RowPerformanceTest.java

package tech.tablesaw.perf;

import static java.lang.System.out;

import com.google.common.base.Stopwatch;
import it.unimi.dsi.fastutil.longs.LongArrayList;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;
import org.apache.commons.lang3.RandomUtils;
import org.apache.commons.text.RandomStringGenerator;
import tech.tablesaw.api.DateTimeColumn;
import tech.tablesaw.api.DoubleColumn;
import tech.tablesaw.api.Row;
import tech.tablesaw.api.StringColumn;
import tech.tablesaw.api.Table;
import tech.tablesaw.columns.datetimes.PackedLocalDateTime;
import tech.tablesaw.columns.numbers.NumberColumnFormatter;

public class RowPerformanceTest {

  private static final int CONCEPT_COUNT = 1000;

  // pools to get random test data from
  private static List<String> concepts = new ArrayList<>(CONCEPT_COUNT);
  private static LongArrayList dates = new LongArrayList(5_000_000);

  private static int numberOfRecordsInTable = 5_000_000;

  public static void main(String[] args) {

    Table t = defineSchema();
    generateTestData(t, numberOfRecordsInTable);
    System.out.println();

    System.out.println("Test table info: ");
    System.out.println(t.structure());
    System.out.println();
    System.out.println(t.shape());
    System.out.println();
    System.out.println();

    Row row = new Row(t);
    // run with no operations
    Stopwatch stopwatch = Stopwatch.createStarted();
    while (row.hasNext()) {
      row.next();
    }
    stopwatch.stop();
    System.out.println(
        "No op test (iteration only): " + stopwatch.elapsed(TimeUnit.MILLISECONDS) + " ms");

    System.out.println();
    System.out.println("Tests getting columns by name");

    // run with one getDouble()
    row = new Row(t);
    stopwatch.reset();
    stopwatch.start();
    while (row.hasNext()) {
      row.next();
      row.getDouble("lowValue");
    }
    stopwatch.stop();
    System.out.println("one getDouble(): " + stopwatch.elapsed(TimeUnit.MILLISECONDS) + " ms");

    // run with one getInt() (casting from double)
    row = new Row(t);
    stopwatch.reset();
    stopwatch.start();
    while (row.hasNext()) {
      row.next();
      row.getDouble("lowValue");
    }
    stopwatch.stop();
    System.out.println("one getInt(): " + stopwatch.elapsed(TimeUnit.MILLISECONDS) + " ms");

    // run with one getPackedDateTime()
    row = new Row(t);
    stopwatch.reset();
    stopwatch.start();
    while (row.hasNext()) {
      row.next();
      row.getPackedDateTime("date");
    }
    stopwatch.stop();
    System.out.println(
        "one getPackedDateTime(): " + stopwatch.elapsed(TimeUnit.MILLISECONDS) + " ms");

    // run with one getDateTime()
    row = new Row(t);
    stopwatch.reset();
    stopwatch.start();
    while (row.hasNext()) {
      row.next();
      row.getDateTime("date");
    }
    stopwatch.stop();
    System.out.println("one getDateTime(): " + stopwatch.elapsed(TimeUnit.MILLISECONDS) + " ms");

    // run with one getString()
    row = new Row(t);
    stopwatch.reset();
    stopwatch.start();
    while (row.hasNext()) {
      row.next();
      row.getString("concept");
    }
    stopwatch.stop();
    System.out.println("one getString(): " + stopwatch.elapsed(TimeUnit.MILLISECONDS) + " ms");

    // run with a packedDateTime, aString, and two doubles
    row = new Row(t);
    stopwatch.reset();
    stopwatch.start();
    while (row.hasNext()) {
      row.next();
      row.getString("concept");
      row.getPackedDateTime("date");
      row.getDouble("lowValue");
      row.getDouble("highValue");
    }
    stopwatch.stop();
    System.out.println("Getting four values: " + stopwatch.elapsed(TimeUnit.MILLISECONDS) + " ms");

    System.out.println();
    System.out.println("Tests getting columns by index");

    // run with one getDouble()
    row = new Row(t);
    stopwatch.reset();
    stopwatch.start();
    while (row.hasNext()) {
      row.next();
      row.getDouble(2);
    }
    stopwatch.stop();
    System.out.println("one getDouble(): " + stopwatch.elapsed(TimeUnit.MILLISECONDS) + " ms");

    // run with one getInt() (casting from double)
    row = new Row(t);
    stopwatch.reset();
    stopwatch.start();
    while (row.hasNext()) {
      row.next();
      row.getDouble(2);
    }
    stopwatch.stop();
    System.out.println("one getInt(): " + stopwatch.elapsed(TimeUnit.MILLISECONDS) + " ms");

    // run with one getPackedDateTime()
    row = new Row(t);
    stopwatch.reset();
    stopwatch.start();
    while (row.hasNext()) {
      row.next();
      row.getPackedDateTime(1);
    }
    stopwatch.stop();
    System.out.println(
        "one getPackedDateTime(): " + stopwatch.elapsed(TimeUnit.MILLISECONDS) + " ms");

    // run with one getDateTime()
    row = new Row(t);
    stopwatch.reset();
    stopwatch.start();
    while (row.hasNext()) {
      row.next();
      row.getDateTime(1);
    }
    stopwatch.stop();
    System.out.println("one getDateTime(): " + stopwatch.elapsed(TimeUnit.MILLISECONDS) + " ms");

    // run with one getString()
    row = new Row(t);
    stopwatch.reset();
    stopwatch.start();
    while (row.hasNext()) {
      row.next();
      row.getString(0);
    }
    stopwatch.stop();
    System.out.println("one getString(): " + stopwatch.elapsed(TimeUnit.MILLISECONDS) + " ms");

    // run with a packedDateTime, aString, and two doubles
    row = new Row(t);
    stopwatch.reset();
    stopwatch.start();
    while (row.hasNext()) {
      row.next();
      row.getString(0);
      row.getPackedDateTime(1);
      row.getDouble(2);
      row.getDouble(3);
    }
    stopwatch.stop();
    System.out.println("Getting four values: " + stopwatch.elapsed(TimeUnit.MILLISECONDS) + " ms");

    System.out.println("Done");
  }

  private static Table defineSchema() {
    Table t;
    t = Table.create("Observations");
    StringColumn conceptId = StringColumn.create("concept");
    DateTimeColumn date = DateTimeColumn.create("date");
    DoubleColumn lowValues = DoubleColumn.create("lowValue");
    DoubleColumn highValues = DoubleColumn.create("highValue");
    highValues.setPrintFormatter(NumberColumnFormatter.ints());
    lowValues.setPrintFormatter(NumberColumnFormatter.ints());

    t.addColumns(conceptId);
    t.addColumns(date);
    t.addColumns(lowValues);
    t.addColumns(highValues);
    return t;
  }

  private static void generateTestData(Table t, int numberOfRecordsInTable) {
    out.println("Generating test data");
    LocalDateTime startDateTime = LocalDateTime.of(2008, 1, 1, 0, 0, 0);
    generateData(numberOfRecordsInTable, startDateTime, t);
    out.println("Done ");
  }

  private static void generateData(int observationCount, LocalDateTime dateTime, Table table) {
    // createFromCsv pools of random values

    RandomStringGenerator generator =
        new RandomStringGenerator.Builder().withinRange(32, 127).build();

    while (concepts.size() <= CONCEPT_COUNT) {
      concepts.add(generator.generate(30));
    }

    while (dates.size() <= numberOfRecordsInTable) {
      dates.add(PackedLocalDateTime.pack(dateTime.plusMinutes(1)));
    }

    DateTimeColumn dateColumn = table.dateTimeColumn("date");
    StringColumn conceptColumn = table.stringColumn("concept");
    DoubleColumn lowValues = table.doubleColumn("lowValue");
    DoubleColumn highValues = table.doubleColumn("highValue");

    // sample from the pools to write the data
    for (int i = 0; i < observationCount; i++) {
      dateColumn.appendInternal(dates.getLong(i));
      conceptColumn.append(concepts.get(RandomUtils.nextInt(0, concepts.size())));
      lowValues.append(RandomUtils.nextDouble(0, 1_000_000));
      highValues.append(RandomUtils.nextDouble(0, 1_000_000));
    }
  }
}