SawStorageTest.java

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package tech.tablesaw.io.saw;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static tech.tablesaw.api.ColumnType.INSTANT;

import java.time.Instant;
import java.time.LocalDate;
import java.util.function.Supplier;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import tech.tablesaw.api.BooleanColumn;
import tech.tablesaw.api.DateColumn;
import tech.tablesaw.api.DoubleColumn;
import tech.tablesaw.api.FloatColumn;
import tech.tablesaw.api.InstantColumn;
import tech.tablesaw.api.IntColumn;
import tech.tablesaw.api.LongColumn;
import tech.tablesaw.api.StringColumn;
import tech.tablesaw.api.Table;

/** Tests for reading and writing saw files */
class SawStorageTest {

  private final Table empty = Table.create("empty table");

  private final Table noData =
      Table.create("no data", IntColumn.create("empty int"), DoubleColumn.create("empty double"));

  private final Table intsOnly =
      Table.create(
          "Ints only",
          IntColumn.indexColumn("index1", 100, 1),
          IntColumn.indexColumn("index2", 100, 1));

  private final Table boolsOnly = Table.create("Bools only", BooleanColumn.create("bc1", 1000));

  private final Table intsAndStrings =
      Table.create("Ints and strings", IntColumn.indexColumn("index1", 100, 300));

  private final Table intsAndText =
      Table.create("Ints and text", IntColumn.indexColumn("index1", 100, 300));

  private final Table instants =
      Table.create(
          "Instants",
          IntColumn.indexColumn("index1", 100, 300),
          InstantColumn.create("Instants", 100));

  private static final int COUNT = 5;

  private static final String tempDir = System.getProperty("java.io.tmpdir");
  private final Table table = Table.create("t");
  private final FloatColumn floatColumn = FloatColumn.create("float");
  private final StringColumn categoryColumn = StringColumn.create("string");
  private final DateColumn localDateColumn = DateColumn.create("date");
  private final LongColumn longColumn = LongColumn.create("long");
  private final BooleanColumn booleanColumn = BooleanColumn.create("bool");

  private static Table baseball;

  @BeforeAll
  static void readTables() {
    baseball = Table.read().csv("../data/baseball.csv");
  }

  @BeforeEach
  void setUp() {

    intsAndStrings.addColumns(intsAndStrings.intColumn("index1").asStringColumn());
    boolsOnly
        .booleanColumn(0)
        .fillWith(
            new Supplier<Boolean>() {
              @Override
              public Boolean get() {
                return true;
              }
            });

    for (int i = 0; i < COUNT; i++) {
      floatColumn.append((float) i);
      localDateColumn.append(LocalDate.now());
      categoryColumn.append("Category " + i);
      longColumn.append(i);
      booleanColumn.append(i % 2 == 0);
    }
    table.addColumns(floatColumn);
    table.addColumns(localDateColumn);
    table.addColumns(categoryColumn);
    table.addColumns(longColumn);
    table.addColumns(booleanColumn);

    instants.instantColumn(1).fillWith((Supplier<Instant>) Instant::now);
  }

  @Test
  void testWriteTable() {
    new SawWriter(tempDir + "/zeta", table).write();
    Table t = new SawReader(tempDir + "/zeta/t.saw").read();
    assertEquals(table.columnCount(), t.columnCount());
    assertEquals(table.rowCount(), t.rowCount());
    for (int i = 0; i < table.rowCount(); i++) {
      assertEquals(categoryColumn.get(i), t.stringColumn("string").get(i));
    }
    t.sortOn("string"); // exercise the column a bit
  }

  @Test
  void testWriteTable2() {

    new SawWriter(tempDir + "/zeta", table).write();
    Table t = new SawReader(tempDir + "/zeta/t.saw").read();
    assertEquals(table.columnCount(), t.columnCount());
    assertEquals(table.rowCount(), t.rowCount());
    for (int i = 0; i < table.rowCount(); i++) {
      assertEquals(booleanColumn.get(i), t.booleanColumn("bool").get(i));
    }
    t.sortOn("string"); // exercise the column a bit
  }

  @Test
  void testWriteTableTwice() {

    new SawWriter(tempDir + "/mytables2", table).write();
    Table t = new SawReader(tempDir + "/mytables2/t.saw").read();
    t.floatColumn("float").setName("a float column");

    new SawWriter(tempDir + "/mytables2", table);
    t = new SawReader(tempDir + "/mytables2/t.saw").read();

    assertEquals(table.name(), t.name());
    assertEquals(table.rowCount(), t.rowCount());
    assertEquals(table.columnCount(), t.columnCount());
  }

  @Test
  void saveEmptyTable() {
    String path = new SawWriter(tempDir, empty).write();
    Table table = new SawReader(path).read();
    assertNotNull(table);
  }

  @Test
  void saveNoDataTable() {
    String path = new SawWriter(tempDir, noData).write();
    Table table = new SawReader(path).read();
    assertNotNull(table);
    assertTrue(table.columnCount() > 0);
    assertTrue(table.isEmpty());
  }

  @Test
  void saveIntsOnly() {
    String path = new SawWriter(tempDir, intsOnly).write();
    Table table = new SawReader(path).read();
    assertNotNull(table);
    assertEquals(intsOnly.rowCount(), table.rowCount());
  }

  @Test
  void saveBooleansOnly() {
    String path = new SawWriter(tempDir, boolsOnly).write();
    Table table = new SawReader(path).read();
    assertNotNull(table);
    assertEquals(boolsOnly.rowCount(), table.rowCount());
  }

  @Test
  void saveIntsAndStrings() {
    String path = new SawWriter(tempDir, intsAndStrings).write();
    Table table = new SawReader(path).read();
    assertNotNull(table);
    assertEquals(intsAndStrings.rowCount(), table.rowCount());
  }

  @Test
  void saveInstants() {
    String path = new SawWriter(tempDir, instants).write();
    Table table = new SawReader(path).read();
    assertEquals(100, table.column(0).size());
    assertEquals(INSTANT, table.column(1).type());
    assertEquals(instants.rowCount(), table.rowCount());
    assertEquals(instants.instantColumn(1).get(20), table.instantColumn(1).get(20));
  }

  @Test
  void bush() {
    Table bush = Table.read().csv("../data/bush.csv");
    String path = new SawWriter("../testoutput/bush", bush).write();
    Table table = new SawReader(path).read();
    assertEquals(table.column(1).size(), bush.rowCount());
  }

  @Test
  void tornado() {
    Table tornado = Table.read().csv("../data/tornadoes_1950-2014.csv");
    String path = new SawWriter("../testoutput/tornadoes_1950-2014", tornado).write();
    Table table = new SawReader(path).read();
    assertTrue(table.column(1).size() > 0);
    assertEquals(tornado.columnCount(), table.columnCount());
    assertEquals(tornado.rowCount(), table.rowCount());
  }

  @Test
  void baseball() {
    String path = new SawWriter("../testoutput/baseball", baseball).write();
    Table table = new SawReader(path).read();
    assertTrue(baseball.column(1).size() > 0);
    assertEquals(baseball.columnCount(), table.columnCount());
    assertEquals(baseball.rowCount(), table.rowCount());
  }

  @Test
  void metadata() {
    String path = new SawWriter("../testoutput/baseball", baseball).write();
    assertEquals("baseball.csv: 1232 rows X 15 cols", new SawReader(path).shape());
    assertEquals(1232, new SawReader(path).rowCount());
    assertEquals(15, new SawReader(path).columnCount());
    assertEquals(baseball.columnNames(), new SawReader(path).columnNames());
    assertEquals(baseball.structure().printAll(), new SawReader(path).structure().printAll());
  }

  @Test
  void selectedColumns() {
    String path = new SawWriter("../testoutput/baseball", baseball).write();
    Table bb2 =
        new SawReader(path, new SawReadOptions().selectedColumns("OBP", "SLG", "BA")).read();
    assertEquals(3, bb2.columnCount());
    assertTrue(bb2.columnNames().contains("OBP"));
    assertTrue(bb2.columnNames().contains("SLG"));
    assertTrue(bb2.columnNames().contains("BA"));
    assertEquals(baseball.rowCount(), bb2.rowCount());
  }

  @Test
  void noCompression() {
    String path =
        new SawWriter(
                "../testoutput/baseball",
                baseball,
                new SawWriteOptions().compressionType(CompressionType.NONE))
            .write();
    Table bb2 =
        new SawReader(path, new SawReadOptions().selectedColumns("OBP", "SLG", "BA")).read();
    assertEquals(3, bb2.columnCount());
    assertTrue(bb2.columnNames().contains("OBP"));
    assertTrue(bb2.columnNames().contains("SLG"));
    assertTrue(bb2.columnNames().contains("BA"));
    assertEquals(baseball.rowCount(), bb2.rowCount());
  }

  @Test
  void lz4Compression() {
    String path =
        new SawWriter(
                "../testoutput/baseball",
                baseball,
                new SawWriteOptions().compressionType(CompressionType.LZ4))
            .write();
    Table bb2 =
        new SawReader(path, new SawReadOptions().selectedColumns("OBP", "SLG", "BA")).read();
    assertEquals(3, bb2.columnCount());
    assertTrue(bb2.columnNames().contains("OBP"));
    assertTrue(bb2.columnNames().contains("SLG"));
    assertTrue(bb2.columnNames().contains("BA"));
    assertEquals(baseball.rowCount(), bb2.rowCount());
  }

  @Test
  void bostonRobberies() {
    Table robereries = Table.read().csv("../data/boston-robberies.csv");
    String path = new SawWriter("../testoutput/boston_robberies", robereries).write();
    Table table = new SawReader(path).read();
    assertEquals(robereries.columnCount(), table.columnCount());
    assertEquals(robereries.rowCount(), table.rowCount());
  }

  @Test
  void sacramento() {
    Table sacramento = Table.read().csv("../data/sacramento_real_estate_transactions.csv");
    String path = new SawWriter("../testoutput/sacramento", sacramento).write();
    Table table = new SawReader(path).read();
    assertEquals(sacramento.columnCount(), table.columnCount());
    assertEquals(sacramento.rowCount(), table.rowCount());
  }

  @Test
  void test_wines() {
    Table wines = Table.read().csv("../data/test_wines.csv");
    String path = new SawWriter("../testoutput/test_wines", wines).write();
    Table table = new SawReader(path).read();
    assertEquals(wines.columnCount(), table.columnCount());
    assertEquals(wines.rowCount(), table.rowCount());
    assertEquals(
        wines.stringColumn("name").getDictionary(), table.stringColumn("name").getDictionary());
    new SawWriter("../testoutput/test_wines", table);
    Table table1 = new SawReader(path).read();
    assertEquals(
        wines.stringColumn("name").getDictionary(), table1.stringColumn("name").getDictionary());
  }

  @Test
  void saveStrings() {

    StringColumn index2 = StringColumn.create("index2");
    for (int j = 0; j < 100; j++) {
      for (int i = 0; i < 100; i++) {
        index2.append(String.valueOf(i));
      }
    }
    StringColumn index3 = StringColumn.create("index3");
    for (int j = 0; j < 10; j++) {
      for (int i = 0; i < 1000; i++) {
        index3.append(String.valueOf(i));
      }
    }
    final Table wines =
        Table.create(
            "million ints",
            IntColumn.indexColumn("index1", 10_000, 1).asStringColumn().setName("index1"),
            index2,
            index3);
    String path = new SawWriter(tempDir, wines).write();
    Table table = new SawReader(path).read();
    assertEquals(wines.columnCount(), table.columnCount());
    assertEquals(wines.rowCount(), table.rowCount());
    assertEquals(
        wines.stringColumn("index2").getDictionary(), table.stringColumn("index2").getDictionary());
    new SawWriter(tempDir, table);
    Table table1 = new SawReader(path).read();
    assertEquals(
        wines.stringColumn("index1").getDictionary(),
        table1.stringColumn("index1").getDictionary());
    assertEquals(
        wines.stringColumn("index2").getDictionary(),
        table1.stringColumn("index2").getDictionary());
  }
}