SettingsExamples.java

/*******************************************************************************
 * Copyright 2014 Univocity Software Pty Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 ******************************************************************************/
package com.univocity.parsers.examples;

import com.univocity.parsers.common.processor.*;
import com.univocity.parsers.csv.*;
import com.univocity.parsers.fixed.*;
import org.testng.annotations.*;

import java.util.*;

public class SettingsExamples extends Example {

	@Test
	public void example001ColumnSelection() {
		CsvParserSettings parserSettings = new CsvParserSettings();
		//the file used in the example uses '\n' as the line separator sequence.
		//the line separator sequence is defined here to ensure systems such as MacOS and Windows
		//are able to process this file correctly (MacOS uses '\r'; and Windows uses '\r\n').
		parserSettings.getFormat().setLineSeparator("\n");

		//##CODE_START
		// Here we select only the columns "Price", "Year" and "Make".
		// The parser just skips the other fields
		parserSettings.selectFields("Price", "Year", "Make");

		// let's parse with these settings and print the parsed rows.
		List<String[]> parsedRows = parseWithSettings(parserSettings);
		//##CODE_END
		printAndValidate(parsedRows);
	}

	@Test
	public void example002ColumnSelectionWithNoReordering() {
		CsvParserSettings parserSettings = new CsvParserSettings();
		//the file used in the example uses '\n' as the line separator sequence.
		//the line separator sequence is defined here to ensure systems such as MacOS and Windows
		//are able to process this file correctly (MacOS uses '\r'; and Windows uses '\r\n').
		parserSettings.getFormat().setLineSeparator("\n");

		//##CODE_START
		// Here we select only the columns "Price", "Year" and "Make".
		// The parser just skips the other fields
		parserSettings.selectFields("Price", "Year", "Make");

		// Column reordering is enabled by default. When you disable it,
		// all columns will be produced in the order they are defined in the file.
		// Fields that were not selected will be null, as they are not processed by the parser
		parserSettings.setColumnReorderingEnabled(false);

		// Let's parse with these settings and print the parsed rows.
		List<String[]> parsedRows = parseWithSettings(parserSettings);
		//##CODE_END
		printAndValidate(parsedRows);
	}

	@Test
	public void example003ColumnSelectionByIndex() {
		CsvParserSettings parserSettings = new CsvParserSettings();
		//the file used in the example uses '\n' as the line separator sequence.
		//the line separator sequence is defined here to ensure systems such as MacOS and Windows
		//are able to process this file correctly (MacOS uses '\r'; and Windows uses '\r\n').
		parserSettings.getFormat().setLineSeparator("\n");

		//##CODE_START
		// Here we select only the columns by their indexes.
		// The parser just skips the values in other columns
		parserSettings.selectIndexes(4, 0, 1);

		// let's parse with these settings and print the parsed rows.
		List<String[]> parsedRows = parseWithSettings(parserSettings);
		//##CODE_END
		printAndValidate(parsedRows);
	}

	@Test
	public void example004LotsOfDifferentSettings() {
		CsvParserSettings parserSettings = new CsvParserSettings();

		//##CODE_START
		//You can configure the parser to automatically detect what line separator sequence is in the input
		parserSettings.setLineSeparatorDetectionEnabled(true);

		// sets what is the default value to use when the parsed value is null
		parserSettings.setNullValue("<NULL>");

		// sets what is the default value to use when the parsed value is empty
		parserSettings.setEmptyValue("<EMPTY>"); // for CSV only

		// sets the headers of the parsed file. If the headers are set then 'setHeaderExtractionEnabled(true)'
		// will make the parser simply ignore the first input row.
		parserSettings.setHeaders("a", "b", "c", "d", "e");

		// prints the columns in reverse order.
		// NOTE: when fields are selected, all rows produced will have the exact same number of columns
		parserSettings.selectFields("e", "d", "c", "b", "a");

		// does not skip leading whitespaces
		parserSettings.setIgnoreLeadingWhitespaces(false);

		// does not skip trailing whitespaces
		parserSettings.setIgnoreTrailingWhitespaces(false);

		// reads a fixed number of records then stop and close any resources
		parserSettings.setNumberOfRecordsToRead(9);

		// does not skip empty lines
		parserSettings.setSkipEmptyLines(false);

		// sets the maximum number of characters to read in each column.
		// The default is 4096 characters. You need this to avoid OutOfMemoryErrors in case a file
		// does not have a valid format. In such cases the parser might just keep reading from the input
		// until its end or the memory is exhausted. This sets a limit which avoids unwanted JVM crashes.
		parserSettings.setMaxCharsPerColumn(100);

		// for the same reasons as above, this sets a hard limit on how many columns an input row can have.
		// The default is 512.
		parserSettings.setMaxColumns(10);

		// Sets the number of characters held by the parser's buffer at any given time.
		parserSettings.setInputBufferSize(1000);

		// Disables the separate thread that loads the input buffer. By default, the input is going to be loaded incrementally
		// on a separate thread if the available processor number is greater than 1. Leave this enabled to get better performance
		// when parsing big files (> 100 Mb).
		parserSettings.setReadInputOnSeparateThread(false);

		// let's parse with these settings and print the parsed rows.
		List<String[]> parsedRows = parseWithSettings(parserSettings);
		//##CODE_END
		printAndValidate(parsedRows);
	}

	@Test
	public void example005FixedWidthSettings() {
		//##CODE_START
		// For the sake of the example, we will not read the last 8 characters (for the Year column).
		// We will also NOT set the padding character to '_' so the output makes more sense for reading
		// and you can see what characters are being processed
		FixedWidthParserSettings parserSettings = new FixedWidthParserSettings(new FixedWidthFields(4, 5, 40, 40 /*, 8*/));

		//the file used in the example uses '\n' as the line separator sequence.
		//the line separator sequence is defined here to ensure systems such as MacOS and Windows
		//are able to process this file correctly (MacOS uses '\r'; and Windows uses '\r\n').
		parserSettings.getFormat().setLineSeparator("\n");

		// The fixed width parser settings has most of the settings for CSV.
		// These are the only extra settings you need:

		// If a row has more characters than what is defined, skip them until the end of the line.
		parserSettings.setSkipTrailingCharsUntilNewline(true);

		// If a record has less characters than what is expected and a new line is found,
		// this record is considered parsed. Data in the next row will be parsed as a new record.
		parserSettings.setRecordEndsOnNewline(true);

		RowListProcessor rowProcessor = new RowListProcessor();

		parserSettings.setProcessor(rowProcessor);
		parserSettings.setHeaderExtractionEnabled(true);

		FixedWidthParser parser = new FixedWidthParser(parserSettings);
		parser.parse(getReader("/examples/example.txt"));

		List<String[]> rows = rowProcessor.getRows();
		//##CODE_END
		printAndValidate(rows);
	}

	/**
	 * Parses the example input file (/examples/example.csv) with a given setting.
	 * @param parserSettings settings used to parse the example.csv file
	 * @return a list with all parsed rows.
	 */
	private List<String[]> parseWithSettings(CsvParserSettings parserSettings) {
		RowListProcessor rowProcessor = new RowListProcessor();

		parserSettings.setProcessor(rowProcessor);
		parserSettings.setHeaderExtractionEnabled(true);

		CsvParser parser = new CsvParser(parserSettings);
		parser.parse(getReader("/examples/example.csv"));

		List<String[]> rows = rowProcessor.getRows();
		return rows;
	}
}