XlsxReaderTest.java
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package tech.tablesaw.io.xlsx;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;
import static tech.tablesaw.api.ColumnType.BOOLEAN;
import static tech.tablesaw.api.ColumnType.DOUBLE;
import static tech.tablesaw.api.ColumnType.FLOAT;
import static tech.tablesaw.api.ColumnType.INTEGER;
import static tech.tablesaw.api.ColumnType.LOCAL_DATE_TIME;
import static tech.tablesaw.api.ColumnType.LONG;
import static tech.tablesaw.api.ColumnType.STRING;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import java.io.IOException;
import java.time.LocalDateTime;
import java.util.Arrays;
import java.util.List;
import org.junit.jupiter.api.Test;
import tech.tablesaw.api.ColumnType;
import tech.tablesaw.api.Table;
import tech.tablesaw.columns.Column;
public class XlsxReaderTest {
private List<Table> readN(String name, int expectedCount) {
try {
String fileName = name + ".xlsx";
List<Table> tables =
new XlsxReader().readMultiple(XlsxReadOptions.builder("../data/" + fileName).build());
assertNotNull(tables, "No tables read from " + fileName);
assertEquals(expectedCount, tables.size(), "Wrong number of tables in " + fileName);
return tables;
} catch (final IOException e) {
fail(e.getMessage());
}
return null;
}
private Table read1(String name, int size, String... columnNames) {
Table table = readN(name, 1).get(0);
int colNum = 0;
for (final Column<?> column : table.columns()) {
assertEquals(columnNames[colNum], column.name(), "Wrong column name");
assertEquals(size, column.size(), "Wrong size for column " + columnNames[colNum]);
colNum++;
}
return table;
}
@SafeVarargs
private final <T> void assertColumnValues(Column<T> column, T... ts) {
for (int i = 0; i < column.size(); i++) {
if (ts[i] == null) {
assertTrue(
column.isMissing(i),
"Should be missing value in row "
+ i
+ " of column "
+ column.name()
+ ", but it was "
+ column.get(i));
} else {
assertEquals(
ts[i], column.get(i), "Wrong value in row " + i + " of column " + column.name());
}
}
}
@Test
public void testColumns() {
Table table =
read1(
"columns",
3,
"stringcol",
"shortcol",
"intcol",
"longcol",
"doublecol",
"booleancol",
"datecol",
"formulacol",
"mixed",
"mixed2",
"intcol2");
// stringcol shortcol intcol longcol doublecol booleancol datecol
// formulacol
// Hallvard 123 12345678 12345678900 12,34 TRUE 22/02/2019 20:54:09 135.34
// Marit 124 12345679 12345678901 13,35 FALSE 23/03/2020 21:55:10 137.35
assertColumnValues(table.stringColumn("stringcol"), "Hallvard", "Marit", "Quentin");
assertColumnValues(table.intColumn("shortcol"), 123, 124, 125);
assertColumnValues(table.intColumn("intcol"), 12345678, 12345679, 12345679);
assertColumnValues(table.longColumn("longcol"), 12345678900L, 12345678901L, 12345678901L);
assertColumnValues(table.doubleColumn("doublecol"), 12.34, 13.35, 13.35);
assertColumnValues(table.booleanColumn("booleancol"), true, false, false);
assertColumnValues(
table.dateTimeColumn("datecol"),
LocalDateTime.of(2019, 2, 22, 20, 54, 9),
LocalDateTime.of(2020, 3, 23, 21, 55, 10),
LocalDateTime.of(2020, 3, 23, 21, 55, 10));
assertColumnValues(table.doubleColumn("formulacol"), 135.34, 137.35, 138.35);
assertEquals(table.column("mixed").asList(), Lists.newArrayList("123.00", "abc", ""));
assertEquals(table.column("mixed2").asList(), Lists.newArrayList("abc", "123", ""));
assertEquals(table.column("intcol2").asList(), Lists.newArrayList(null, 1234, 1234));
}
@Test
public void testColumnsWithMissingValues() {
Table table =
read1(
"columns-with-missing-values",
2,
"stringcol",
"shortcol",
"intcol",
"longcol",
"doublecol",
"booleancol",
"datecol",
"formulacol");
// stringcol shortcol intcol longcol doublecol booleancol
// datecol
// Hallvard 12345678 12345678900 TRUE 22/02/2019
// 20:54:09
// 124 12345679 13,35
assertColumnValues(table.stringColumn("stringcol"), "Hallvard", null);
assertColumnValues(table.intColumn("shortcol"), null, 124);
assertColumnValues(table.intColumn("intcol"), 12345678, 12345679);
assertColumnValues(table.longColumn("longcol"), 12345678900L, null);
assertColumnValues(table.doubleColumn("doublecol"), null, 13.35);
assertColumnValues(table.booleanColumn("booleancol"), true, null);
assertColumnValues(
table.dateTimeColumn("datecol"), LocalDateTime.of(2019, 2, 22, 20, 54, 9), null);
assertColumnValues(table.doubleColumn("formulacol"), null, 137.35);
}
@Test
public void testSheetIndex() throws IOException {
Table table =
new XlsxReader()
.read(XlsxReadOptions.builder("../data/multiplesheets.xlsx").sheetIndex(1).build());
assertNotNull(table, "No table read from multiplesheets.xlsx");
assertColumnValues(table.stringColumn("stringcol"), "John", "Doe");
assertEquals("multiplesheets.xlsx#Sheet2", table.name(), "table name is different");
Table tableImplicit =
new XlsxReader().read(XlsxReadOptions.builder("../data/multiplesheets.xlsx").build());
// the table from the 2nd sheet should be picked up
assertNotNull(tableImplicit, "No table read from multiplesheets.xlsx");
try {
new XlsxReader()
.read(XlsxReadOptions.builder("../data/multiplesheets.xlsx").sheetIndex(0).build());
fail("First sheet is empty, no table should be found");
} catch (IllegalArgumentException iae) {
// expected
}
try {
new XlsxReader()
.read(XlsxReadOptions.builder("../data/multiplesheets.xlsx").sheetIndex(5).build());
fail("Only 2 sheets exist, no sheet 5");
} catch (IndexOutOfBoundsException iobe) {
// expected
}
}
@Test
public void testEmptyFileThrowsIllegalArgumentNoTableFound() throws IOException {
assertThrows(
IllegalArgumentException.class,
() -> {
new XlsxReader().read(XlsxReadOptions.builder("../data/empty.xlsx").build());
});
}
@Test
public void testCustomizedColumnTypesMixedWithDetection() throws IOException {
Table table =
new XlsxReader()
.read(
XlsxReadOptions.builder("../data/columns.xlsx")
.columnTypesPartial(
ImmutableMap.of("shortcol", DOUBLE, "intcol", LONG, "formulacol", FLOAT))
.build());
ColumnType[] columnTypes = table.typeArray();
assertArrayEquals(
columnTypes,
new ColumnType[] {
STRING,
DOUBLE,
LONG,
LONG,
DOUBLE,
BOOLEAN,
LOCAL_DATE_TIME,
FLOAT,
STRING,
STRING,
INTEGER
});
assertEquals(table.column("mixed").asList(), Lists.newArrayList("123.00", "abc", ""));
assertEquals(table.column("mixed2").asList(), Lists.newArrayList("abc", "123", ""));
assertEquals(table.column("intcol2").asList(), Lists.newArrayList(null, 1234, 1234));
}
@Test
public void testCustomizedColumnTypeAllCustomized() throws IOException {
Table table =
new XlsxReader()
.read(
XlsxReadOptions.builder("../data/columns.xlsx")
.columnTypes(columName -> STRING)
.build());
ColumnType[] columnTypes = table.typeArray();
assertTrue(Arrays.stream(columnTypes).allMatch(columnType -> columnType.equals(STRING)));
}
@Test
public void testCustomizedEmptyColumnsArePreserved() throws IOException {
Table table =
new XlsxReader()
.read(
XlsxReadOptions.builder("../data/columns.xlsx")
.columnTypes(columName -> STRING)
.build());
assertEquals(
table.column("empty").type(),
STRING,
"Empty column must be preserved as it's type is specified");
}
@Test
public void testCustomizedColumnStringShouldTryToPreserveValuesFromOtherExcelTypes()
throws IOException {
Table table =
new XlsxReader()
.read(
XlsxReadOptions.builder("../data/columns.xlsx")
.columnTypes(columName -> STRING)
.build());
assertEquals(
table.column("stringcol").asList(), Lists.newArrayList("Hallvard", "Marit", "Quentin"));
assertEquals(
table.column("intcol").asList(), Lists.newArrayList("12345678", "12345679", "12345679"));
// Not ideal, format viewed in excel is without E+10 notation
assertEquals(
table.column("longcol").asList(),
Lists.newArrayList("1.23457E+10", "1.23457E+10", "1.23457E+10"));
assertEquals(table.column("doublecol").asList(), Lists.newArrayList("12.34", "13.35", "13.35"));
assertEquals(table.column("booleancol").asList(), Lists.newArrayList("TRUE", "FALSE", "FALSE"));
assertEquals(
table.column("datecol").asList(),
Lists.newArrayList("22/02/2019 20:54:09", "23/03/2020 21:55:10", "23/03/2020 21:55:10"));
assertEquals(
table.column("formulacol").asList(), Lists.newArrayList("135.34", "137.35", "138.35"));
assertEquals(table.column("empty").asList(), Lists.newArrayList("", "", ""));
assertEquals(table.column("mixed").asList(), Lists.newArrayList("123.00", "abc", ""));
assertEquals(table.column("mixed2").asList(), Lists.newArrayList("abc", "123", ""));
assertEquals(table.column("intcol2").asList(), Lists.newArrayList("", "1234", "1234"));
}
}