FixedWidthReaderTest.java

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package tech.tablesaw.io.fixed;

import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static tech.tablesaw.api.ColumnType.FLOAT;
import static tech.tablesaw.api.ColumnType.SHORT;
import static tech.tablesaw.api.ColumnType.SKIP;
import static tech.tablesaw.api.ColumnType.STRING;

import com.google.common.collect.ImmutableMap;
import com.univocity.parsers.fixed.FixedWidthFields;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.util.Arrays;
import java.util.Locale;
import org.junit.jupiter.api.Test;
import tech.tablesaw.api.ColumnType;
import tech.tablesaw.api.Table;

/** Tests for CSV Reading */
public class FixedWidthReaderTest {

  private final FixedWidthFields car_fields_specs = new FixedWidthFields(4, 5, 40, 40, 8);
  private final ColumnType[] car_types = {SHORT, STRING, STRING, STRING, SHORT};
  private final ColumnType[] car_types_with_SKIP = {SHORT, STRING, STRING, SKIP, FLOAT};

  @Test
  public void testWithCarsData() throws Exception {

    Table table =
        Table.read()
            .usingOptions(
                FixedWidthReadOptions.builder("../data/fixed_width_cars_test.txt")
                    .header(true)
                    .columnTypes(car_types)
                    .columnSpecs(car_fields_specs)
                    .padding('_')
                    .systemLineEnding()
                    .build());

    String[] expected = new String[] {"Year", "Make", "Model", "Description", "Price"};
    assertArrayEquals(expected, table.columnNames().toArray());

    table = table.sortDescendingOn("Year");
    table.removeColumns("Description");

    expected = new String[] {"Year", "Make", "Model", "Price"};
    assertArrayEquals(expected, table.columnNames().toArray());
  }

  @Test
  public void testWithColumnSKIP() throws Exception {

    Table table =
        Table.read()
            .usingOptions(
                FixedWidthReadOptions.builder("../data/fixed_width_cars_test.txt")
                    .header(true)
                    .columnTypes(car_types_with_SKIP)
                    .columnSpecs(car_fields_specs)
                    .padding('_')
                    .systemLineEnding()
                    .build());

    assertEquals(4, table.columnCount());

    String[] expected = {"Year", "Make", "Model", "Price"};
    assertArrayEquals(expected, table.columnNames().toArray());
  }

  @Test
  public void testWithColumnSKIPWithoutHeader() throws Exception {

    Table table =
        Table.read()
            .usingOptions(
                FixedWidthReadOptions.builder("../data/fixed_width_cars_no_header_test.txt")
                    .header(false)
                    .columnTypes(car_types_with_SKIP)
                    .columnSpecs(car_fields_specs)
                    .padding('_')
                    .systemLineEnding()
                    .skipTrailingCharsUntilNewline(true)
                    .build());

    assertEquals(4, table.columnCount());

    String[] expected = new String[] {"C0", "C1", "C2", "C4"};
    assertArrayEquals(expected, table.columnNames().toArray());
  }

  @Test
  public void testDataTypeDetection() throws Exception {

    InputStream stream = new FileInputStream(new File("../data/fixed_width_cars_test.txt"));
    FixedWidthReadOptions options =
        FixedWidthReadOptions.builder(stream)
            .header(true)
            .columnSpecs(car_fields_specs)
            .padding('_')
            .systemLineEnding()
            .sample(false)
            .locale(Locale.getDefault())
            .minimizeColumnSizes()
            .build();

    Reader reader = new FileReader("../data/fixed_width_missing_values.txt");
    ColumnType[] columnTypes = new FixedWidthReader().detectColumnTypes(reader, options);
    assertArrayEquals(car_types, columnTypes);
  }

  @Test
  public void testWithMissingValue() throws Exception {

    Reader reader = new FileReader("../data/fixed_width_missing_values.txt");
    FixedWidthReadOptions options =
        FixedWidthReadOptions.builder(reader)
            .header(true)
            .columnSpecs(car_fields_specs)
            .padding('_')
            .systemLineEnding()
            .missingValueIndicator("null")
            .minimizeColumnSizes()
            .sample(false)
            .build();

    Table t = Table.read().usingOptions(options);

    assertEquals(2, t.shortColumn(0).countMissing());
    assertEquals(2, t.stringColumn(1).countMissing());
    assertEquals(1, t.stringColumn(2).countMissing());
    assertEquals(3, t.stringColumn(3).countMissing());
  }

  @Test
  public void testWithSkipTrailingCharsUntilNewline() throws Exception {

    Table table =
        Table.read()
            .usingOptions(
                FixedWidthReadOptions.builder("../data/fixed_width_wrong_line_length.txt")
                    .header(true)
                    .columnTypes(car_types)
                    .columnSpecs(car_fields_specs)
                    .padding('_')
                    .systemLineEnding()
                    .skipTrailingCharsUntilNewline(true)
                    .build());

    String[] expected = new String[] {"Year", "Make", "Model", "Description", "Price"};
    assertArrayEquals(expected, table.columnNames().toArray());

    table = table.sortDescendingOn("Year");
    table.removeColumns("Price");

    expected = new String[] {"Year", "Make", "Model", "Description"};
    assertArrayEquals(expected, table.columnNames().toArray());
  }

  @Test
  public void testCustomizedColumnTypesMixedWithDetection() throws Exception {
    InputStream stream = new FileInputStream(new File("../data/fixed_width_cars_test.txt"));
    FixedWidthReadOptions options =
        FixedWidthReadOptions.builder(stream)
            .header(true)
            .columnSpecs(car_fields_specs)
            .padding('_')
            .systemLineEnding()
            .sample(false)
            .locale(Locale.getDefault())
            .minimizeColumnSizes()
            .columnTypesPartial(ImmutableMap.of("Year", STRING))
            .build();

    ColumnType[] columnTypes = new FixedWidthReader().read(options).typeArray();

    ColumnType[] expectedTypes = Arrays.copyOf(car_types, car_types.length);
    car_types[0] = STRING; // Year
    assertArrayEquals(expectedTypes, columnTypes);
  }

  @Test
  public void testCustomizedColumnTypeAllCustomized() throws IOException {
    InputStream stream = new FileInputStream("../data/fixed_width_cars_test.txt");
    FixedWidthReadOptions options =
        FixedWidthReadOptions.builder(stream)
            .header(true)
            .columnSpecs(car_fields_specs)
            .padding('_')
            .systemLineEnding()
            .sample(false)
            .locale(Locale.getDefault())
            .minimizeColumnSizes()
            .columnTypes(columnName -> STRING)
            .build();

    ColumnType[] columnTypes = new FixedWidthReader().read(options).typeArray();

    assertTrue(Arrays.stream(columnTypes).allMatch(columnType -> columnType.equals(STRING)));
  }
}