FixedWidthParserTest.java
/*******************************************************************************
* Copyright 2014 Univocity Software Pty Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
package com.univocity.parsers.fixed;
import com.univocity.parsers.*;
import com.univocity.parsers.annotations.*;
import com.univocity.parsers.common.processor.*;
import org.testng.annotations.*;
import java.io.*;
import java.util.*;
import static org.testng.Assert.*;
public class FixedWidthParserTest extends ParserTestCase {
@DataProvider(name = "fileProvider")
public Object[][] csvProvider() {
return new Object[][]{
{".txt", new char[]{'\n'}},
{"-dos.txt", new char[]{'\r', '\n'}},
{"-mac.txt", new char[]{'\r'}},
{".txt", null},
{"-dos.txt", null},
{"-mac.txt", null}
};
}
protected FixedWidthFields getFieldLengths() {
return new FixedWidthFields(new int[]{11, 38, 20, 8});
}
private FixedWidthParserSettings newSettings(FixedWidthFields lengths, char[] lineSeparator) {
FixedWidthParserSettings settings = new FixedWidthParserSettings(getFieldLengths());
if (lineSeparator == null) {
settings.setLineSeparatorDetectionEnabled(true);
} else {
settings.getFormat().setLineSeparator(lineSeparator);
}
return settings;
}
@Test(enabled = true, dataProvider = "fileProvider")
public void testFixedWidthParser(String fileExtension, char[] lineSeparator) throws Exception {
FixedWidthParserSettings settings = newSettings(getFieldLengths(), lineSeparator);
settings.setHeaderExtractionEnabled(true);
settings.setRowProcessor(processor);
FixedWidthParser parser = new FixedWidthParser(settings);
parser.parse(this.newReader("/fixed/essential" + fileExtension));
String[] expectedHeaders = new String[]{
"DATE", "NAME", "OWED", "INTEREST",
};
String[][] expectedResult = new String[][]{
{"2013-FEB-28", "Harry Dong", "15000.99", "8.786",},
{"2013-JAN-1", "Billy Rubin", "15100.99", "5",},
{"2012-SEP-1", "Willie Stroker", "15000.00", "6",},
{"2012-JAN-11", "Mike Litoris", "15000", "4.86",},
{"2010-JUL-01", "Gaye Males", "1", "8.6",},
};
this.assertHeadersAndValuesMatch(expectedHeaders, expectedResult);
}
@Override
protected RowListProcessor newRowListProcessor() {
return new RowListProcessor();
}
@Test(enabled = true, dataProvider = "fileProvider")
public void testFixedWidthParserSkippingUntilNewLine(String fileExtension, char[] lineSeparator) throws Exception {
FixedWidthParserSettings settings = newSettings(getFieldLengths(), lineSeparator);
settings.setSkipTrailingCharsUntilNewline(true);
settings.setRecordEndsOnNewline(true);
settings.setHeaderExtractionEnabled(false);
settings.setRowProcessor(processor);
String[] expectedHeaders = new String[]{
"DATE", "NAME", "OWED", "INTEREST",
};
settings.setHeaders(expectedHeaders);
FixedWidthParser parser = new FixedWidthParser(settings);
parser.parse(this.newReader("/fixed/essential_1" + fileExtension));
String[][] expectedResult = new String[][]{
{"2013-FEB-28", "Harry Dong", "15000.99", "8.786",},
{"2013-JAN-1", "Billy Rubin", "15100.99", "5",},
{"2012-SEP-1", "Willie Stroker"},
{"2012-JAN-11", "Mike Litoris", "15000", "4.86",},
{"2010-JUL-01", "Gaye Males", "1", "8.6",},
};
this.assertHeadersAndValuesMatch(expectedHeaders, expectedResult);
}
@Test(enabled = true, dataProvider = "fileProvider")
public void testFixedWidthParserWithPadding(String fileExtension, char[] lineSeparator) throws Exception {
FixedWidthParserSettings settings = newSettings(getFieldLengths(), lineSeparator);
settings.getFormat().setPadding('_');
settings.setSkipTrailingCharsUntilNewline(true);
settings.setRecordEndsOnNewline(true);
settings.setHeaderExtractionEnabled(false);
settings.setRowProcessor(processor);
String[] expectedHeaders = new String[]{
"DATE", "NAME", "OWED", "INTEREST",
};
settings.setHeaders(expectedHeaders);
FixedWidthParser parser = new FixedWidthParser(settings);
parser.parse(this.newReader("/fixed/essential_3" + fileExtension));
String[][] expectedResult = new String[][]{
{"2013-FEB-28", "Harry Dong", "15000.99", "8.786",},
{"2013-JAN-1", "Billy Rubin", "15100.99", "5",},
{"2012-SEP-1", "Willie Stroker"},
{"2012-JAN-11", "Mike Litoris", "15000", "4.86",},
{"2010-JUL-01", "Gaye Males", "1", "8.6",},
};
this.assertHeadersAndValuesMatch(expectedHeaders, expectedResult);
}
@Test(enabled = true, dataProvider = "fileProvider")
public void testFixedWidthParserWithPaddingAndNoTrimming(String fileExtension, char[] lineSeparator) throws Exception {
FixedWidthParserSettings settings = newSettings(getFieldLengths(), lineSeparator);
settings.getFormat().setPadding('_');
settings.setSkipTrailingCharsUntilNewline(true);
settings.setIgnoreLeadingWhitespaces(false);
settings.setIgnoreTrailingWhitespaces(false);
settings.setRecordEndsOnNewline(true);
settings.setHeaderExtractionEnabled(false);
settings.setRowProcessor(processor);
String[] expectedHeaders = new String[]{
"DATE", "NAME", "OWED", "INTEREST",
};
settings.setHeaders(expectedHeaders);
FixedWidthParser parser = new FixedWidthParser(settings);
parser.parse(this.newReader("/fixed/essential_2" + fileExtension));
String[][] expectedResult = new String[][]{
{"2013-FEB-28", " Harry Dong ", "15000.99", " 8.786",},
{"2013-JAN-1", "Billy Rubin ", "15100.99", "5",},
{"2012-SEP-1", " Willie Stroker"},
{"2012-JAN-11", "Mike Litoris ", "15000", "4.86",},
{"2010-JUL-01", " Gaye Males ", " 1 ", "8.6 ",},
};
this.assertHeadersAndValuesMatch(expectedHeaders, expectedResult);
}
@Test
public void testParsingWithPaddingPerField() {
FixedWidthFields fieldLengths = new FixedWidthFields(20, 8);
fieldLengths.setPadding('0', 1);
fieldLengths.setAlignment(FieldAlignment.RIGHT, 1);
FixedWidthParserSettings fwws = new FixedWidthParserSettings(fieldLengths);
fwws.getFormat().setPadding('_');
fwws.getFormat().setLineSeparator("\n");
fwws.setHeaderExtractionEnabled(true);
FixedWidthParser parser = new FixedWidthParser(fwws);
parser.beginParsing(new StringReader("ziel____________________plzV\nziel0_______________00000000\nziel1_______________00000001\n"));
assertEquals(parser.parseNext(), new String[]{"ziel0", null});
assertEquals(parser.parseNext(), new String[]{"ziel1", "1"});
assertEquals(parser.getContext().headers(), new String[]{"ziel", "plzV"});
}
@Test
public void testParsingWithoutRecordBreaks() {
int[] length = new int[]{2, 2, 2};
FixedWidthFields lengths = new FixedWidthFields(length);
FixedWidthParserSettings settings = new FixedWidthParserSettings(lengths);
FixedWidthParser parser = new FixedWidthParser(settings);
parser.beginParsing(new StringReader("abcdefghijkl"));
String[] data;
data = parser.parseNext();
assertEquals(data[0], "ab");
assertEquals(data[1], "cd");
assertEquals(data[2], "ef");
data = parser.parseNext();
assertEquals(data[0], "gh");
assertEquals(data[1], "ij");
assertEquals(data[2], "kl");
}
@Test
public void testParsingWithoutRecordBreaksButTrailingSpaces() {
int[] length = new int[]{2, 2, 2, 2};
FixedWidthFields lengths = new FixedWidthFields(length);
FixedWidthParserSettings settings = new FixedWidthParserSettings(lengths);
FixedWidthParser parser = new FixedWidthParser(settings);
parser.beginParsing(new StringReader("abcdef ghijkl "));
String[] data;
data = parser.parseNext();
assertEquals(data[0], "ab");
assertEquals(data[1], "cd");
assertEquals(data[2], "ef");
assertEquals(data[3], null);
data = parser.parseNext();
assertEquals(data[0], "gh");
assertEquals(data[1], "ij");
assertEquals(data[2], "kl");
assertEquals(data[3], null);
}
@Test
public void testBitsAreNotDiscardedWhenParsing() {
FixedWidthFields lengths = new FixedWidthFields(3, 3);
FixedWidthParserSettings parserSettings = new FixedWidthParserSettings(lengths);
parserSettings.getFormat().setPadding('_');
parserSettings.setSkipBitsAsWhitespace(false);
FixedWidthParser parser = new FixedWidthParser(parserSettings);
String[] line;
line = parser.parseLine("\0 a_b_");
assertEquals(line.length, 2);
assertEquals(line[0], "\0 a");
assertEquals(line[1], "b");
line = parser.parseLine("\1_ab \0");
assertEquals(line.length, 2);
assertEquals(line[0], "\1_a");
assertEquals(line[1], "b \0");
line = parser.parseLine("_\2ab\1_");
assertEquals(line.length, 2);
assertEquals(line[0], "a");
assertEquals(line[1], "b\1");
line = parser.parseLine("\2_ab\1_");
assertEquals(line.length, 2);
assertEquals(line[0], "a");
assertEquals(line[1], "b\1");
}
public static class X {
public X() {
}
public X(int a, String b) {
this.a = a;
this.b = b;
}
@Parsed
@FixedWidth(4)
int a;
@Parsed
@FixedWidth(from = 5, to = 14)
String b;
}
@Test
public void testFixedWidthAnnotation() throws Exception {
BeanListProcessor<X> rowProcessor = new BeanListProcessor<X>(X.class);
FixedWidthParserSettings parserSettings = new FixedWidthParserSettings();
parserSettings.getFormat().setLineSeparator("\n");
parserSettings.setProcessor(rowProcessor);
FixedWidthParser parser = new FixedWidthParser(parserSettings);
parser.parse(new StringReader("12 some text \n71 more text "));
List<X> beans = rowProcessor.getBeans();
assertEquals(beans.size(), 2);
assertEquals(beans.get(0).a, 12);
assertEquals(beans.get(0).b, "ome text");
assertEquals(beans.get(1).a, 71);
assertEquals(beans.get(1).b, "ore text");
}
@Test
public void testFieldRanges() throws Exception {
FixedWidthFields fields = new FixedWidthFields();
fields.addField(5, 7).addField(10, 14);
FixedWidthParserSettings s = new FixedWidthParserSettings(fields);
FixedWidthParser p = new FixedWidthParser(s);
String[] line = p.parseLine("123456789012345");
assertEquals(line.length, 2);
assertEquals(line[0], "67");
assertEquals(line[1], "1234");
}
@Test
public void testSingleLineParsingWithLookAhead() {
FixedWidthFields fields1 = new FixedWidthFields();
fields1.addField(1).addField(5);
FixedWidthFields fields2 = new FixedWidthFields();
fields2.addField(1).addField(2).addField(3);
FixedWidthParserSettings s = new FixedWidthParserSettings();
s.addFormatForLookahead("1", fields1);
s.addFormatForLookahead("2", fields2);
FixedWidthParser p = new FixedWidthParser(s);
String[] line1 = p.parseLine("1ABCDE");
assertEquals(line1.length, 2);
assertEquals(line1[0], "1");
assertEquals(line1[1], "ABCDE");
String[] line2 = p.parseLine("2ABCDE");
assertEquals(line2.length, 3);
assertEquals(line2[0], "2");
assertEquals(line2[1], "AB");
assertEquals(line2[2], "CDE");
}
}