TsvParserExamples.java

/*******************************************************************************
 * Copyright 2014 Univocity Software Pty Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 ******************************************************************************/
package com.univocity.parsers.examples;

import com.univocity.parsers.common.*;
import com.univocity.parsers.common.processor.*;
import com.univocity.parsers.conversions.*;
import com.univocity.parsers.tsv.*;
import org.testng.annotations.*;

import java.math.*;
import java.util.*;
import java.util.Map.*;

public class TsvParserExamples extends Example {

	@Test
	public void example001ParseAll() throws Exception {
		//##CODE_START

		TsvParserSettings settings = new TsvParserSettings();
		settings.getFormat().setLineSeparator("\n");

		// creates a TSV parser
		TsvParser parser = new TsvParser(settings);

		// parses all rows in one go.
		List<String[]> allRows = parser.parseAll(getReader("/examples/example.tsv"));

		//##CODE_END
		printAndValidate(null, allRows);
	}

	@Test
	public void example002ReadSimpleTsv() throws Exception {
		StringBuilder out = new StringBuilder();

		TsvParserSettings settings = new TsvParserSettings();
		//the file used in the example uses '\n' as the line separator sequence.
		//the line separator sequence is defined here to ensure systems such as MacOS and Windows
		//are able to process this file correctly (MacOS uses '\r'; and Windows uses '\r\n').
		settings.getFormat().setLineSeparator("\n");

		//##CODE_START

		// creates a TSV parser
		TsvParser parser = new TsvParser(settings);

		// call beginParsing to read records one by one, iterator-style.
		parser.beginParsing(getReader("/examples/example.tsv"));

		String[] row;
		while ((row = parser.parseNext()) != null) {
			println(out, Arrays.toString(row));
		}

		// The resources are closed automatically when the end of the input is reached,
		// or when an error happens, but you can call stopParsing() at any time.

		// You only need to use this if you are not parsing the entire content.
		// But it doesn't hurt if you call it anyway.
		parser.stopParsing();

		//##CODE_END

		printAndValidate(out);
	}

	@Test
	public void example003ReadTsvWithRowProcessor() throws Exception {
		//##CODE_START

		// The settings object provides many configuration options
		TsvParserSettings parserSettings = new TsvParserSettings();

		//You can configure the parser to automatically detect what line separator sequence is in the input
		parserSettings.setLineSeparatorDetectionEnabled(true);

		// A RowListProcessor stores each parsed row in a List.
		RowListProcessor rowProcessor = new RowListProcessor();

		// You can configure the parser to use a RowProcessor to process the values of each parsed row.
		// You will find more RowProcessors in the 'com.univocity.parsers.common.processor' package, but you can also create your own.
		parserSettings.setProcessor(rowProcessor);

		// Let's consider the first parsed row as the headers of each column in the file.
		parserSettings.setHeaderExtractionEnabled(true);

		// creates a parser instance with the given settings
		TsvParser parser = new TsvParser(parserSettings);

		// the 'parse' method will parse the file and delegate each parsed row to the RowProcessor you defined
		parser.parse(getReader("/examples/example.tsv"));

		// get the parsed records from the RowListProcessor here.
		// Note that different implementations of RowProcessor will provide different sets of functionalities.
		String[] headers = rowProcessor.getHeaders();
		List<String[]> rows = rowProcessor.getRows();

		//##CODE_END

		printAndValidate(headers, rows);
	}

	@Test
	public void example004ReadTsvAndConvertValues() throws Exception {
		final StringBuilder out = new StringBuilder();

		//##CODE_START

		// ObjectRowProcessor converts the parsed values and gives you the resulting row.
		ObjectRowProcessor rowProcessor = new ObjectRowProcessor() {
			@Override
			public void rowProcessed(Object[] row, ParsingContext context) {
				//here is the row. Let's just print it.
				println(out, Arrays.toString(row));
			}
		};

		// converts values in the "Price" column (index 4) to BigDecimal
		rowProcessor.convertIndexes(Conversions.toBigDecimal()).set(4);

		// converts the values in columns "Make, Model and Description" to lower case, and sets the value "chevy" to null.
		rowProcessor.convertFields(Conversions.toLowerCase(), Conversions.toNull("chevy")).set("Make", "Model", "Description");

		// converts the values at index 0 (year) to BigInteger. Nulls are converted to BigInteger.ZERO.
		rowProcessor.convertFields(new BigIntegerConversion(BigInteger.ZERO, "0")).set("year");

		TsvParserSettings parserSettings = new TsvParserSettings();
		parserSettings.getFormat().setLineSeparator("\n");
		parserSettings.setProcessor(rowProcessor);
		parserSettings.setHeaderExtractionEnabled(true);

		TsvParser parser = new TsvParser(parserSettings);

		//the rowProcessor will be executed here.
		parser.parse(getReader("/examples/example.tsv"));

		//##CODE_END

		printAndValidate(out);
	}

	@Test
	public void example005UsingAnnotations() throws Exception {
		//##CODE_START
		// BeanListProcessor converts each parsed row to an instance of a given class, then stores each instance into a list.
		BeanListProcessor<TestBean> rowProcessor = new BeanListProcessor<TestBean>(TestBean.class);

		TsvParserSettings parserSettings = new TsvParserSettings();
		parserSettings.setProcessor(rowProcessor);
		parserSettings.setHeaderExtractionEnabled(true);

		TsvParser parser = new TsvParser(parserSettings);
		parser.parse(getReader("/examples/bean_test.tsv"));

		// The BeanListProcessor provides a list of objects extracted from the input.
		List<TestBean> beans = rowProcessor.getBeans();

		//##CODE_END

		printAndValidate(beans.toString());
	}

	@Test
	public void example006MasterDetail() throws Exception {
		//##CODE_START
		// 1st, Create a RowProcessor to process all "detail" elements
		ObjectRowListProcessor detailProcessor = new ObjectRowListProcessor();

		// converts values at in the "Amount" column (position 1 in the file) to integer.
		detailProcessor.convertIndexes(Conversions.toInteger()).set(1);

		// 2nd, Create MasterDetailProcessor to identify whether or not a row is the master row.
		// the row placement argument indicates whether the master detail row occurs before or after a sequence of "detail" rows.
		MasterDetailListProcessor masterRowProcessor = new MasterDetailListProcessor(RowPlacement.BOTTOM, detailProcessor) {
			@Override
			protected boolean isMasterRecord(String[] row, ParsingContext context) {
				//Returns true if the parsed row is the master row.
				//In this example, rows that have "Total" in the first column are master rows.
				return "Total".equals(row[0]);
			}
		};
		// We want our master rows to store BigIntegers in the "Amount" column
		masterRowProcessor.convertIndexes(Conversions.toBigInteger()).set(1);

		TsvParserSettings parserSettings = new TsvParserSettings();
		parserSettings.setHeaderExtractionEnabled(true);

		// Set the RowProcessor to the masterRowProcessor.
		parserSettings.setProcessor(masterRowProcessor);

		TsvParser parser = new TsvParser(parserSettings);
		parser.parse(getReader("/examples/master_detail.tsv"));

		// Here we get the MasterDetailRecord elements.
		List<MasterDetailRecord> rows = masterRowProcessor.getRecords();
		MasterDetailRecord masterRecord = rows.get(0);

		// The master record has one master row and multiple detail rows.
		Object[] masterRow = masterRecord.getMasterRow();
		List<Object[]> detailRows = masterRecord.getDetailRows();
		//##CODE_END
		printAndValidate(masterRow, detailRows);
	}

	@Test
	public void example007ConvertColumns() throws Exception {
		TsvParserSettings parserSettings = new TsvParserSettings();
		parserSettings.getFormat().setLineSeparator("\n");
		parserSettings.setHeaderExtractionEnabled(true);

		//##CODE_START

		// ObjectColumnProcessor converts the parsed values and stores them in columns
		// Use BatchedObjectColumnProcessor to process columns in batches
		ObjectColumnProcessor rowProcessor = new ObjectColumnProcessor();

		// converts values in the "Price" column (index 4) to BigDecimal
		rowProcessor.convertIndexes(Conversions.toBigDecimal()).set(4);

		// converts the values in columns "Make, Model and Description" to lower case, and sets the value "chevy" to null.
		rowProcessor.convertFields(Conversions.toLowerCase(), Conversions.toNull("chevy")).set("Make", "Model", "Description");

		// converts the values at index 0 (year) to BigInteger. Nulls are converted to BigInteger.ZERO.
		rowProcessor.convertFields(new BigIntegerConversion(BigInteger.ZERO, "0")).set("year");

		parserSettings.setProcessor(rowProcessor);

		TsvParser parser = new TsvParser(parserSettings);

		//the rowProcessor will be executed here.
		parser.parse(getReader("/examples/example.tsv"));

		//Let's get the column values:
		Map<Integer, List<Object>> columnValues = rowProcessor.getColumnValuesAsMapOfIndexes();

		//##CODE_END
		StringBuilder out = new StringBuilder();
		for (Entry<Integer, List<Object>> e : columnValues.entrySet()) {
			List<Object> values = e.getValue();
			Integer columnIndex = e.getKey();
			println(out, columnIndex + " -> " + values);
		}

		printAndValidate(out);
	}

	@Test
	public void example008ParseLine() throws Exception {
		StringBuilder out = new StringBuilder();
		//##CODE_START
		// creates a TSV parser
		TsvParser parser = new TsvParser(new TsvParserSettings());

		String[] line;
		line = parser.parseLine("A	B	C");
		println(out, Arrays.toString(line));

		line = parser.parseLine("1	2	3	4");
		println(out, Arrays.toString(line));
		//##CODE_END
		printAndValidate(out);
	}

	@Test
	public void example009ParseJoinedLines() throws Exception {
		//##CODE_START

		//Let's write 3 values to a TSV, one of them has a line break.
		String []values = new String[]{"Value 1",	"Breaking [\n] here", "Value 3"};

		TsvWriterSettings writerSettings = new TsvWriterSettings();
		writerSettings.getFormat().setLineSeparator("\n");

		// In TSV, we can have line separators escaped with a slash before a line break. In this case the current
		// line will be joined with the next line.
		writerSettings.setLineJoiningEnabled(true);

		//Let's write the values and see how the data looks like:
		String writtenLine = new TsvWriter(writerSettings).writeRowToString(values);
		println("Written data\n------------\n" + writtenLine);

		// To parse, we just use the same confiuration:
		TsvParserSettings parserSettings = new TsvParserSettings();
		parserSettings.setLineJoiningEnabled(true);
		parserSettings.getFormat().setLineSeparator("\n");

		TsvParser parser = new TsvParser(parserSettings);

		//Let's parse the contents we've just written:
		values = parser.parseLine(writtenLine);

		println("\nParsed elements\n---------------");
		println("First: " + values[0]);
		println("Second: " + values[1]);
		println("Third: " + values[2]);

		//##CODE_END
		printAndValidate();
	}


}