FixedWidthParserExamples.java
/*******************************************************************************
* Copyright 2014 Univocity Software Pty Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
package com.univocity.parsers.examples;
import com.univocity.parsers.common.*;
import com.univocity.parsers.common.processor.*;
import com.univocity.parsers.conversions.*;
import com.univocity.parsers.fixed.*;
import org.testng.annotations.*;
import java.math.*;
import java.util.*;
public class FixedWidthParserExamples extends Example {
@Test
public void example001ParseAll() throws Exception {
//##CODE_START
// creates the sequence of field lengths in the file to be parsed
FixedWidthFields lengths = new FixedWidthFields(4, 5, 40, 40, 8);
// creates the default settings for a fixed width parser
FixedWidthParserSettings settings = new FixedWidthParserSettings(lengths);
//sets the character used for padding unwritten spaces in the file
settings.getFormat().setPadding('_');
settings.getFormat().setLineSeparator("\n");
// creates a fixed-width parser with the given settings
FixedWidthParser parser = new FixedWidthParser(settings);
// parses all rows in one go.
List<String[]> allRows = parser.parseAll(getReader("/examples/example.txt"));
//##CODE_END
printAndValidate(null, allRows);
}
@Test
public void example002ReadSimpleFixedWidth() throws Exception {
StringBuilder out = new StringBuilder();
//##CODE_START
// creates the sequence of field lengths in the file to be parsed
FixedWidthFields lengths = new FixedWidthFields(4, 5, 40, 40, 8);
// creates the default settings for a fixed width parser
FixedWidthParserSettings settings = new FixedWidthParserSettings(lengths);
settings.getFormat().setLineSeparator("\n");
//sets the character used for padding unwritten spaces in the file
settings.getFormat().setPadding('_');
// creates a fixed-width parser with the given settings
FixedWidthParser parser = new FixedWidthParser(settings);
// call beginParsing to read records one by one, iterator-style.
parser.beginParsing(getReader("/examples/example.txt"));
String[] row;
while ((row = parser.parseNext()) != null) {
println(out, Arrays.toString(row));
}
// Resources are closed automatically when the end of the input is reached,
// but you can call stopParsing() at any time.
//You only need to use this if you are not parsing the entire content.
//It doesn't hurt if you call it anyway.
parser.stopParsing();
//##CODE_END
printAndValidate(out);
}
@Test
public void example003ReadFixedWidthWithRowProcessor() throws Exception {
//##CODE_START
//A RowListProcessor stores each parsed row in a List.
RowListProcessor rowProcessor = new RowListProcessor();
FixedWidthFields lengths = new FixedWidthFields(4, 5, 40, 40, 8);
FixedWidthParserSettings parserSettings = new FixedWidthParserSettings(lengths);
parserSettings.getFormat().setPadding('_');
//You can configure the parser to automatically detect what line separator sequence is in the input
parserSettings.setLineSeparatorDetectionEnabled(true);
//set the RowProcessor that will process the values of each parsed row.
//You can create your own or use any pre-defined RowProcessor
//in the 'com.univocity.parsers.common.processor' package
parserSettings.setProcessor(rowProcessor);
// flag to consider the first parsed row as the headers of each column in the file.
parserSettings.setHeaderExtractionEnabled(true);
// creates a parser instance with the given settings
FixedWidthParser parser = new FixedWidthParser(parserSettings);
// the 'parse' method will parse the file and delegate each parsed row to the RowProcessor you defined
parser.parse(getReader("/examples/example.txt"));
// get the parsed records from the RowListProcessor here.
// Note that different implementations of RowProcessor will provide different sets of functionalities.
String[] headers = rowProcessor.getHeaders();
List<String[]> rows = rowProcessor.getRows();
//##CODE_END
printAndValidate(headers, rows);
}
@Test
public void example004ReadFixedWidthAndConvertValues() throws Exception {
final StringBuilder out = new StringBuilder();
//##CODE_START
// ObjectRowProcessor converts the parsed values and gives you the resulting row.
ObjectRowProcessor rowProcessor = new ObjectRowProcessor() {
@Override
public void rowProcessed(Object[] row, ParsingContext context) {
//here is the row. Let's just print it to the standard output.
println(out, Arrays.toString(row));
}
};
// converts values in the "Price" column (index 4) to BigDecimal
rowProcessor.convertIndexes(Conversions.toBigDecimal()).set(4);
// converts the values in columns "Make, Model and Description" to lower case, and sets the value "chevy" to null.
rowProcessor.convertFields(Conversions.toLowerCase(), Conversions.toNull("chevy")).set("Make", "Model", "Description");
// converts the values at index 0 (year) to BigInteger. Nulls are converted to BigInteger.ZERO.
rowProcessor.convertFields(new BigIntegerConversion(BigInteger.ZERO, "0")).set("year");
FixedWidthFields lengths = new FixedWidthFields(4, 5, 40, 40, 8);
FixedWidthParserSettings parserSettings = new FixedWidthParserSettings(lengths);
parserSettings.getFormat().setLineSeparator("\n");
parserSettings.getFormat().setPadding('_');
parserSettings.setProcessor(rowProcessor);
parserSettings.setHeaderExtractionEnabled(true);
FixedWidthParser parser = new FixedWidthParser(parserSettings);
//the rowProcessor will be executed here.
parser.parse(getReader("/examples/example.txt"));
//##CODE_END
printAndValidate(out);
}
@Test
public void example005UsingAnnotations() throws Exception {
//##CODE_START
// BeanListProcessor converts each parsed row to an instance of a given class, then stores each instance into a list.
BeanListProcessor<TestBean> rowProcessor = new BeanListProcessor<TestBean>(TestBean.class);
FixedWidthFields lengths = new FixedWidthFields(11, 15, 10, 10, 20);
FixedWidthParserSettings parserSettings = new FixedWidthParserSettings(lengths);
parserSettings.getFormat().setPadding('_');
parserSettings.getFormat().setLineSeparator("\n");
parserSettings.setProcessor(rowProcessor);
parserSettings.setHeaderExtractionEnabled(true);
FixedWidthParser parser = new FixedWidthParser(parserSettings);
parser.parse(getReader("/examples/bean_test.txt"));
// The BeanListProcessor provides a list of objects extracted from the input.
List<TestBean> beans = rowProcessor.getBeans();
//##CODE_END
printAndValidate(beans.toString());
}
@Test
public void example006MasterDetail() throws Exception {
//##CODE_START
// 1st, Create a RowProcessor to process all "detail" elements
ObjectRowListProcessor detailProcessor = new ObjectRowListProcessor();
// converts values at in the "Amount" column (position 1 in the file) to integer.
detailProcessor.convertIndexes(Conversions.toInteger()).set(1);
// 2nd, Create MasterDetailProcessor to identify whether or not a row is the master row.
// the row placement argument indicates whether the master detail row occurs before or after a sequence of "detail" rows.
MasterDetailListProcessor masterRowProcessor = new MasterDetailListProcessor(RowPlacement.BOTTOM, detailProcessor) {
@Override
protected boolean isMasterRecord(String[] row, ParsingContext context) {
//Returns true if the parsed row is the master row.
//In this example, rows that have "Total" in the first column are master rows.
return "Total".equals(row[0]);
}
};
// We want our master rows to store BigIntegers in the "Amount" column
masterRowProcessor.convertIndexes(Conversions.toBigInteger()).set(1);
FixedWidthFields lengths = new FixedWidthFields(12, 7);
FixedWidthParserSettings parserSettings = new FixedWidthParserSettings(lengths);
parserSettings.setHeaderExtractionEnabled(true);
// Set the RowProcessor to the masterRowProcessor.
parserSettings.setProcessor(masterRowProcessor);
FixedWidthParser parser = new FixedWidthParser(parserSettings);
parser.parse(getReader("/examples/master_detail.txt"));
List<MasterDetailRecord> rows = masterRowProcessor.getRecords();
MasterDetailRecord masterRecord = rows.get(0);
// The master record has one master row and multiple detail rows.
Object[] masterRow = masterRecord.getMasterRow();
List<Object[]> detailRows = masterRecord.getDetailRows();
//##CODE_END
printAndValidate(masterRow, detailRows);
}
@Test
public void example007BatchedColumns() throws Exception {
final StringBuilder out = new StringBuilder();
FixedWidthParserSettings settings = new FixedWidthParserSettings(new FixedWidthFields(4, 5, 40, 40, 8));
settings.setHeaderExtractionEnabled(true);
settings.getFormat().setPadding('_');
settings.getFormat().setLineSeparator("\n");
//##CODE_START
//To process larger inputs, we can use a batched column processor.
//Here we set the batch size to 3, meaning we'll get the column values of at most 3 rows in each batch.
settings.setProcessor(new BatchedColumnProcessor(3) {
@Override
public void batchProcessed(int rowsInThisBatch) {
List<List<String>> columnValues = getColumnValuesAsList();
println(out, "Batch " + getBatchesProcessed() + ":");
int i = 0;
for (List<String> column : columnValues) {
println(out, "Column " + (i++) + ":" + column);
}
}
});
FixedWidthParser parser = new FixedWidthParser(settings);
parser.parse(getReader("/examples/example.txt"));
//##CODE_END
printAndValidate(out);
}
@Test
public void example008BeanListToStringList() throws Exception {
// Let's use the code we had before to load a list of TestBeans
BeanListProcessor<TestBean> rowProcessor = new BeanListProcessor<TestBean>(TestBean.class);
FixedWidthFields lengths = new FixedWidthFields(11, 15, 10, 10, 20);
FixedWidthParserSettings parserSettings = new FixedWidthParserSettings(lengths);
parserSettings.getFormat().setPadding('_');
parserSettings.getFormat().setLineSeparator("\n");
parserSettings.setProcessor(rowProcessor);
parserSettings.setHeaderExtractionEnabled(true);
FixedWidthParser parser = new FixedWidthParser(parserSettings);
parser.parse(getReader("/examples/bean_test.txt"));
List<TestBean> beans = rowProcessor.getBeans();
//##CODE_START
BeanWriterProcessor<TestBean> writerProcessor = new BeanWriterProcessor<TestBean>(TestBean.class);
LinkedHashMap<String, Integer> fieldsAndLengths = new LinkedHashMap<String, Integer>();
fieldsAndLengths.put("amount", 15);
fieldsAndLengths.put("date", 11);
fieldsAndLengths.put("pending", 10);
fieldsAndLengths.put("quantity", 10);
fieldsAndLengths.put("comments", 20);
FixedWidthWriterSettings writerSettings = new FixedWidthWriterSettings(new FixedWidthFields(fieldsAndLengths));
writerSettings.getFormat().setPadding('_');
writerSettings.getFormat().setLineSeparator("\n");
writerSettings.setRowWriterProcessor(writerProcessor);
//note that we are not passing in an instanceof java.io.Writer here.
FixedWidthWriter writer = new FixedWidthWriter(writerSettings);
//let's see how the headers will appear
println(writer.writeHeadersToString());
List<String> rows = writer.processRecordsToString(beans); //beans is just a List of TestBean
//each row should have data of a TestBean:
for (String row : rows) {
println(row);
}
//##CODE_END
printAndValidate();
}
@Test
public void example009ParseWithLookahead() throws Exception {
//##CODE_START
FixedWidthParserSettings settings = new FixedWidthParserSettings();
settings.getFormat().setLineSeparator("\n");
//We are going to parse the multi_schema.txt file, with a lookahead value in front of each record
//Let's define the format used to store clients' records
FixedWidthFields clientFields = new FixedWidthFields();
clientFields.addField("Lookahead", 2); //here we will store the look ahead value in a column
clientFields.addField("ClientID", 7, FieldAlignment.RIGHT, '0');
clientFields.addField("Name", 20);
//Here's the format used for client accounts:
FixedWidthFields accountFields = new FixedWidthFields();
accountFields.addField("ID", 7, FieldAlignment.RIGHT, '0'); //here the account ID will be prefixed by the lookahead value
accountFields.addField("Bank", 4);
accountFields.addField("AccountNumber", 10);
accountFields.addField("Swift", 7);
//If a record starts with C#, it's a client record, so we associate "C#" with the client format
settings.addFormatForLookahead("C#", clientFields);
//And here we associate "A#" with the account format
settings.addFormatForLookahead("A#", accountFields);
//We can now parse all rows
FixedWidthParser parser = new FixedWidthParser(settings);
List<String[]> rows = parser.parseAll(getReader("/examples/multi_schema.txt"));
//##CODE_END
printAndValidate(rows);
}
@Test
public void example010ParseWithDefaultAndLookahead() throws Exception {
//Here's the format used for client accounts:
FixedWidthFields accountFields = new FixedWidthFields();
accountFields.addField("ID", 5, FieldAlignment.RIGHT, '0'); //now, the account fields won't have a lookahead value.
accountFields.addField("Bank", 4);
accountFields.addField("AccountNumber", 10);
accountFields.addField("Swift", 7);
//##CODE_START
//In some cases the input records might not have a lookahead value. On the multi_schema2.txt file,
//only client records have a lookahead. If no other lookahead is matched, the parser will switch back to
//the default field format. Here, the format used by account records will be used as default.
FixedWidthParserSettings settings = new FixedWidthParserSettings(accountFields);
settings.getFormat().setLineSeparator("\n");
//Let's again define the format used to store clients' records
FixedWidthFields clientFields = new FixedWidthFields();
clientFields.addField("Lookahead", 2); //here we will store the look ahead value in a column
clientFields.addField("ClientID", 7, FieldAlignment.RIGHT, '0');
clientFields.addField("Name", 20);
//If a record starts with C#, it's a client record, so we associate "C#" with the client format.
//Any other record will be parsed using the default format
settings.addFormatForLookahead("?#", clientFields);
//Let's parse all rows now
FixedWidthParser parser = new FixedWidthParser(settings);
List<String[]> rows = parser.parseAll(getReader("/examples/multi_schema2.txt"));
//##CODE_END
printAndValidate(rows);
}
@Test
public void example011ParseWithLookbehind() throws Exception {
FixedWidthParserSettings settings = new FixedWidthParserSettings();
settings.getFormat().setLineSeparator("\n");
//Here's the format used for client accounts:
FixedWidthFields accountFields = new FixedWidthFields();
accountFields.addField("ID", 5, FieldAlignment.RIGHT, '0'); //the account fields won't have a lookahead value.
accountFields.addField("Bank", 4);
accountFields.addField("AccountNumber", 10);
accountFields.addField("Swift", 7);
//Let's again define the format used to store clients' records
FixedWidthFields clientFields = new FixedWidthFields();
clientFields.addField("Lookahead", 2); //here we will store the look ahead value in a column
clientFields.addField("ClientID", 7, FieldAlignment.RIGHT, '0');
clientFields.addField("Name", 20);
//##CODE_START
//We can also specify a lookbehind value to determine which format to use when parsing the input.
//If a record starts with C#, it's a client record, so we associate "C#" with the client format.
settings.addFormatForLookahead("C#", clientFields);
//If a record parsed previously has a C#, but the current doesn't, then we are processing accounts. Let's use the account format.
settings.addFormatForLookbehind("?#", accountFields);
//Let's parse all rows now
FixedWidthParser parser = new FixedWidthParser(settings);
List<String[]> rows = parser.parseAll(getReader("/examples/multi_schema2.txt"));
//##CODE_END
printAndValidate(rows);
}
}