CsvParserTest.java

/*******************************************************************************
 * Copyright 2014 Univocity Software Pty Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 ******************************************************************************/
package com.univocity.parsers.csv;

import com.univocity.parsers.*;
import com.univocity.parsers.common.*;
import com.univocity.parsers.common.processor.*;
import com.univocity.parsers.common.record.*;
import org.testng.annotations.*;

import java.io.*;
import java.util.*;
import java.util.concurrent.*;

import static org.testng.Assert.*;

public class CsvParserTest extends ParserTestCase {

	@DataProvider(name = "testProvider")
	public Object[][] testProvider() {
		return new Object[][]{
				{"/csv/test.csv", new char[]{'\n'}},
				{"/csv/test.csv", null}
		};
	}

	@DataProvider(name = "csvProvider")
	public Object[][] csvProvider() {
		return new Object[][]{
				{"/csv/essential.csv", new char[]{'\n'}},
				{"/csv/essential-dos.csv", new char[]{'\r', '\n'}},
				{"/csv/essential-mac.csv", new char[]{'\r'}},
				{"/csv/essential.csv", null},
				{"/csv/essential-dos.csv", null},
				{"/csv/essential-mac.csv", null}
		};
	}

	@Test(enabled = true, dataProvider = "csvProvider")
	public void parseIgnoringWhitespaces(String csvFile, char[] lineSeparator) throws Exception {
		CsvParserSettings settings = newCsvInputSettings(lineSeparator);
		settings.setCommentCollectionEnabled(true);
		settings.setRowProcessor(processor);
		settings.setHeaderExtractionEnabled(true);
		settings.setIgnoreLeadingWhitespaces(true);
		settings.setIgnoreTrailingWhitespaces(true);

		CsvParser parser = new CsvParser(settings);
		parser.parse(newReader(csvFile));

		String[] expectedHeaders = new String[]{"Year", "Make", "Model", "Description", "Price"};

		String[][] expectedResult = new String[][]{
				{"1997", "Ford", "E350", "ac, abs, moon", "3000.00"},
				{"1999", "Chevy", "Venture \"Extended Edition\"", null, "4900.00"},
				{"1996", "Jeep", "Grand Cherokee", "MUST SELL!\nair, moon roof, loaded", "4799.00"},
				{"1999", "Chevy", "Venture \"Extended Edition, Very Large\"", null, "5000.00"},
				{null, null, "Venture \"Extended Edition\"", null, "4900.00"},
				{null, null, null, null, null},
				{null, null, null, null, null},
				{null, null, "5", null, null},
				{"1997", "Ford", "E350", "ac, abs, moon", "3000.00"},
				{"1997", "Ford", "E350", " ac, abs, moon ", "3000.00"},
				{"1997", "Ford", "E350", " ac, abs, moon ", "3000.00"},
				{"19 97", "Fo rd", "E350", " ac, abs, moon ", "3000.00"},
				{null, " ", null, "  ", "30 00.00"},
				{"1997", "Ford", "E350", " \" ac, abs, moon \" ", "3000.00"},
				{"1997", "Ford", "E350", "\" ac, abs, moon \" ", "3000.00"},
		};

		assertHeadersAndValuesMatch(expectedHeaders, expectedResult);

		Map<Long, String> comments = parser.getContext().comments();
		assertEquals(comments.size(), 1);
		assertEquals(comments.keySet().iterator().next().longValue(), 6L);
		assertEquals(comments.values().iterator().next(), parser.getContext().lastComment());
		assertEquals(parser.getContext().lastComment(), "this is a comment and should be ignored");
	}

	protected CsvParserSettings newCsvInputSettings(char[] lineSeparator) {
		CsvParserSettings out = new CsvParserSettings();
		if (lineSeparator == null) {
			out.setLineSeparatorDetectionEnabled(true);
		} else {
			out.getFormat().setLineSeparator(lineSeparator);
		}
		out.getFormat().setNormalizedNewline('\n');
		return out;
	}

	@Test(enabled = true, dataProvider = "csvProvider")
	public void parseUsingWhitespaces(String csvFile, char[] lineSeparator) throws Exception {
		CsvParserSettings settings = newCsvInputSettings(lineSeparator);
		settings.setRowProcessor(processor);
		settings.setHeaderExtractionEnabled(true);
		settings.setNullValue("?????");
		settings.setEmptyValue("XXXXX");
		settings.setIgnoreLeadingWhitespaces(false);
		settings.setIgnoreTrailingWhitespaces(false);

		CsvParser parser = new CsvParser(settings);
		parser.parse(newReader(csvFile));

		String[] expectedHeaders = new String[]{"Year", "Make", "Model", "Description", "Price"};

		String[][] expectedResult = new String[][]{
				{"1997", "Ford", "E350", "ac, abs, moon", "3000.00"},
				{"1999", "Chevy", "Venture \"Extended Edition\"", "XXXXX", "4900.00"},
				{"1996", "Jeep", "Grand Cherokee", "MUST SELL!\nair, moon roof, loaded", "4799.00"},
				{"1999", "Chevy", "Venture \"Extended Edition, Very Large\"", "?????", "5000.00"},
				{"?????", "?????", "Venture \"Extended Edition\"", "XXXXX", "4900.00"},
				{"?????", "?????", "?????", "?????", "?????"},
				{" ", " ", " ", " ", " "},
				{"?????", "?????", " 5 ", "?????", "?????"},
				{"  "},
				{"1997 ", " Ford ", "E350", "ac, abs, moon", " \"3000.00\" \t"},
				{"1997", " Ford ", "E350", " ac, abs, moon ", "3000.00  \t"},
				{"  1997", " Ford ", "E350", " ac, abs, moon ", "3000.00"},
				{"    19 97 ", " Fo rd ", "E350", " ac, abs, moon ", "3000.00"},
				{"\t\t", " ", "  ", " \"  \"\t", "30 00.00\t"},
				{"1997", "Ford", "E350", " \" ac, abs, moon \" ", "3000.00"},
				{"1997", "Ford", "E350", "\" ac, abs, moon \" ", "3000.00"},
		};

		assertHeadersAndValuesMatch(expectedHeaders, expectedResult);
	}

	@Test(enabled = true, dataProvider = "csvProvider")
	public void parseColumns(String csvFile, char[] lineSeparator) throws Exception {
		CsvParserSettings settings = newCsvInputSettings(lineSeparator);
		settings.setRowProcessor(processor);
		settings.setHeaderExtractionEnabled(true);
		settings.setIgnoreLeadingWhitespaces(true);
		settings.setIgnoreTrailingWhitespaces(true);
		settings.selectFields("Year");
		settings.setColumnReorderingEnabled(false);

		CsvParser parser = new CsvParser(settings);
		parser.parse(newReader(csvFile));

		String[] expectedHeaders = new String[]{"Year", "Make", "Model", "Description", "Price"};

		String[][] expectedResult = new String[][]{
				{"1997", null, null, null, null},
				{"1999", null, null, null, null},
				{"1996", null, null, null, null},
				{"1999", null, null, null, null},
				{null, null, null, null, null},
				{null, null, null, null, null},
				{null, null, null, null, null},
				{null, null, null, null, null},
				{"1997", null, null, null, null},
				{"1997", null, null, null, null},
				{"1997", null, null, null, null},
				{"19 97", null, null, null, null},
				{null, null, null, null, null},
				{"1997", null, null, null, null},
				{"1997", null, null, null, null},
		};

		assertHeadersAndValuesMatch(expectedHeaders, expectedResult);
	}

	private String[] process(String input, Integer[] indexesToExclude, Integer[] indexesToSelect, String[] fieldsToExclude, String[] fieldsToSelect) {
		CsvParserSettings settings = new CsvParserSettings();
		settings.getFormat().setLineSeparator("\n");
		settings.setHeaderExtractionEnabled(fieldsToExclude != null || fieldsToSelect != null);

		if (indexesToExclude != null) {
			settings.excludeIndexes(indexesToExclude);
		} else if (fieldsToExclude != null) {
			settings.excludeFields(fieldsToExclude);
		} else if (indexesToSelect != null) {
			settings.selectIndexes(indexesToSelect);
		} else if (fieldsToSelect != null) {
			settings.selectFields(fieldsToSelect);
		}

		CsvParser parser = new CsvParser(settings);
		return parser.parseLine(input);
	}

	@Test
	public void columnSelectionTest() {
		String[] result;
		String input = "a,b,c,d,e";

		Integer[] indexesToExclude = new Integer[]{0, 4};
		result = process(input, indexesToExclude, null, null, null);
		assertEquals(result, new String[]{"b", "c", "d"});

		Integer[] indexesToSelect = new Integer[]{0, 4};
		result = process(input, null, indexesToSelect, null, null);
		assertEquals(result, new String[]{"a", "e"});

		input = "ha,hb,hc,hd,he\na,b,c,d,e";

		String[] fieldsToExclude = new String[]{"hb", "hd"};
		result = process(input, null, null, fieldsToExclude, null);
		assertEquals(result, new String[]{"a", "c", "e"});

		String[] fieldsToSelect = new String[]{"hb", "hd"};
		result = process(input, null, null, null, fieldsToSelect);
		assertEquals(result, new String[]{"b", "d"});
	}

	@Override
	protected RowListProcessor newRowListProcessor() {
		return new RowListProcessor() {
			@Override
			public void processStarted(ParsingContext context) {
				super.processStarted(context);
				context.skipLines(2);
			}

			@Override
			public void rowProcessed(String[] row, ParsingContext context) {
				super.rowProcessed(row, context);

				// for (int i = 0; i < row.length; i++) {
				// row[i] = ">>" + row[i] + "<<";
				// }
				// System.out.println(context.currentLine() + " => " + Arrays.toString(row));

				if (context.currentLine() == 9) {
					context.skipLines(1);
				}
			}
		};
	}

	@Test(enabled = true, dataProvider = "csvProvider")
	public void parseOneByOne(String csvFile, char[] lineSeparator) throws Exception {
		CsvParserSettings settings = newCsvInputSettings(lineSeparator);
		settings.setRowProcessor(processor);
		settings.setHeaderExtractionEnabled(true);
		settings.setIgnoreLeadingWhitespaces(true);
		settings.setIgnoreTrailingWhitespaces(true);
		settings.setHeaders("YR", "MK", "MDL", "DSC", "PRC");

		List<Object[]> results = new ArrayList<Object[]>();
		CsvParser parser = new CsvParser(settings);
		try {
			parser.beginParsing(newReader(csvFile));

			Object[] row;
			while ((row = parser.parseNext()) != null) {
				if (row.length == 5) {
					results.add(row);
				}
			}
		} finally {
			parser.stopParsing();
		}

		String[] expectedHeaders = new String[]{"YR", "MK", "MDL", "DSC", "PRC"};

		String[][] expectedResult = new String[][]{
				{"1997", "Ford", "E350", "ac, abs, moon", "3000.00"},
				{"1999", "Chevy", "Venture \"Extended Edition\"", null, "4900.00"},
				{"1996", "Jeep", "Grand Cherokee", "MUST SELL!\nair, moon roof, loaded", "4799.00"},
				{"1999", "Chevy", "Venture \"Extended Edition, Very Large\"", null, "5000.00"},
				{null, null, "Venture \"Extended Edition\"", null, "4900.00"},
				{null, null, null, null, null},
				{null, null, null, null, null},
				{null, null, "5", null, null},
				{"1997", "Ford", "E350", "ac, abs, moon", "3000.00"},
				{"1997", "Ford", "E350", " ac, abs, moon ", "3000.00"},
				{"1997", "Ford", "E350", " ac, abs, moon ", "3000.00"},
				{"19 97", "Fo rd", "E350", " ac, abs, moon ", "3000.00"},
				{null, " ", null, "  ", "30 00.00"},
				{"1997", "Ford", "E350", " \" ac, abs, moon \" ", "3000.00"},
				{"1997", "Ford", "E350", "\" ac, abs, moon \" ", "3000.00"},
		};

		Object[] headers = processor.getHeaders();
		TestUtils.assertEquals(headers, expectedHeaders);

		assertEquals(results.size(), expectedResult.length);

		for (int i = 0; i < expectedResult.length; i++) {
			Object[] result = results.get(i);
			String[] expectedRow = expectedResult[i];
			assertEquals(result, expectedRow);
		}
	}

	@Test(enabled = true, dataProvider = "csvProvider")
	public void parse3Records(String csvFile, char[] lineSeparator) throws Exception {
		CsvParserSettings settings = newCsvInputSettings(lineSeparator);
		settings.setRowProcessor(processor);
		settings.setHeaderExtractionEnabled(true);
		settings.setIgnoreLeadingWhitespaces(true);
		settings.setIgnoreTrailingWhitespaces(true);
		settings.setNumberOfRecordsToRead(3);

		CsvParser parser = new CsvParser(settings);
		parser.parse(newReader(csvFile));

		String[] expectedHeaders = new String[]{"Year", "Make", "Model", "Description", "Price"};

		String[][] expectedResult = new String[][]{
				{"1997", "Ford", "E350", "ac, abs, moon", "3000.00"},
				{"1999", "Chevy", "Venture \"Extended Edition\"", null, "4900.00"},
				{"1996", "Jeep", "Grand Cherokee", "MUST SELL!\nair, moon roof, loaded", "4799.00"},
		};

		assertHeadersAndValuesMatch(expectedHeaders, expectedResult);
	}

	@Test
	public void parseBrokenQuoteEscape() {
		CsvParserSettings settings = newCsvInputSettings(new char[]{'\n'});
		settings.setParseUnescapedQuotesUntilDelimiter(false);
		settings.setHeaderExtractionEnabled(false);
		CsvParser parser = new CsvParser(settings);

		parser.beginParsing(new StringReader(""
				+ "something,\"a quoted value \"with unescaped quotes\" can be parsed\", something\n"
				+ "1997 , Ford ,E350,\"s, m\"\"\"	, \"3000.00\"\n"
				+ "1997 , Ford ,E350,\"ac, abs, moon\"	, \"3000.00\" \n"
				+ "something,\"a \"quoted\" \"\"value\"\" \"\"with unescaped quotes\"\" can be parsed\" , something\n"));

		String[] row = parser.parseNext();

		assertEquals(row[0], "something");
		assertEquals(row[2], "something");
		assertEquals(row[1], "a quoted value \"with unescaped quotes\" can be parsed");

		row = parser.parseNext();

		assertEquals(row, new String[]{"1997", "Ford", "E350", "s, m\"", "3000.00"});

		row = parser.parseNext();
		assertEquals(row, new String[]{"1997", "Ford", "E350", "ac, abs, moon", "3000.00"});

		row = parser.parseNext();
		assertEquals(row[0], "something");
		assertEquals(row[2], "something");
		assertEquals(row[1], "a \"quoted\" \"value\" \"with unescaped quotes\" can be parsed");

	}

	@Test
	public void testReadEmptyValue() {
		CsvParserSettings settings = newCsvInputSettings(new char[]{'\n'});
		settings.setEmptyValue("");
		settings.setHeaderExtractionEnabled(false);
		CsvParser parser = new CsvParser(settings);

		parser.beginParsing(new StringReader("a,b,,c,\"\",\r\n"));
		String[] row = parser.parseNext();

		assertEquals(row[0], "a");
		assertEquals(row[1], "b");
		assertEquals(row[2], null);
		assertEquals(row[3], "c");
		assertEquals(row[4], "");
		assertEquals(row[5], null);
	}

	@DataProvider
	public Object[][] escapeHandlingProvider() {
		return new Object[][]{
				//parsing a line with the following content: ||,|| |"," |" B |" "," |" ||"
				{false, false, new String[]{"||", "|| |\"", " \" B \" ", " \" |"}}, // process escapes on quoted values only: 		||	, || |"	, " B "			,	" |
				{false, true, new String[]{"|", "| \"", " \" B \" ", " \" |"}}, // process escapes quoted and unquoted: 			|	, | "	, " B "			,	" |
				{true, false, new String[]{"||", "|| |\"", " |\" B |\" ", " |\" ||"}}, // keep escape on quoted values only:		||	, || |"	, " |" B |" "	,  |" ||"
				{true, true, new String[]{"||", "|| |\"", " |\" B |\" ", " |\" ||"}} // keep escape on everything: 				||	, || |"	, " |" B |" "	,  |" ||"
		};
	}

	@Test(dataProvider = "escapeHandlingProvider")
	public void testHandlingOfEscapeSequences(boolean keepEscape, boolean escapeUnquoted, String[] expected) throws Exception {
		CsvParserSettings settings = new CsvParserSettings();
		settings.setKeepEscapeSequences(keepEscape);
		settings.setEscapeUnquotedValues(escapeUnquoted);
		settings.getFormat().setCharToEscapeQuoteEscaping('|');
		settings.getFormat().setQuoteEscape('|');

		String line = "||,|| |\",\" |\" B |\" \",\" |\" ||\"";

		CsvParser parser = new CsvParser(settings);
		String[] result = parser.parseLine(line); // ||, || |", " |" B |" ", " |" ||"
		assertEquals(result, expected);
	}

	@Test
	public void testEscapedLineEndingsAreNotModified() {
		CsvParserSettings settings = new CsvParserSettings();
		settings.setNormalizeLineEndingsWithinQuotes(false);
		settings.getFormat().setLineSeparator("\r\n");

		CsvParser parser = new CsvParser(settings);
		String input = "1,\" Line1 \r\n Line2 \r Line3 \n Line4 \n\r \"\r\n" +
				"2,\" Line10 \r\n Line11 \"";


		List<String[]> result = parser.parseAll(new StringReader(input)); // ||, || |", " |" B |" ", " |" ||"
		assertEquals(result.size(), 2);
		assertEquals(result.get(0).length, 2);
		assertEquals(result.get(1).length, 2);

		assertEquals(result.get(0), new String[]{"1", " Line1 \r\n Line2 \r Line3 \n Line4 \n\r "});
		assertEquals(result.get(1), new String[]{"2", " Line10 \r\n Line11 "});

	}

	public char[] getLineSeparator() {
		return new char[]{'\n'};
	}

	@Test
	public void shouldNotAllowParseUnescapedQuotes() throws UnsupportedEncodingException {
		CsvParserSettings settings = newCsvInputSettings(getLineSeparator());
		settings.setRowProcessor(new RowListProcessor()); //Default used by CsvParserTest skip 2 lines
		settings.setParseUnescapedQuotes(false); //To force exception

		CsvParser parser = new CsvParser(settings);
		try {
			parser.parse(new StringReader("1997,\"TV 29\"LED\"\n"));
			fail("Expected exception to be thrown here");
		} catch (TextParsingException ex) {
			assertTrue(ex.getMessage().contains("Unescaped quote character"));
		}
	}

	@Test
	public void parseQuotedStringFollowedByBlankSpace() throws UnsupportedEncodingException {
		RowListProcessor processor = new RowListProcessor();
		CsvParserSettings settings = newCsvInputSettings(getLineSeparator());
		settings.setRowProcessor(processor); //Default used by CsvParserTest skip 2 lines
		settings.setParseUnescapedQuotes(true);
		settings.setParseUnescapedQuotesUntilDelimiter(false);

		CsvParser parser = new CsvParser(settings);
		parser.parse(new StringReader("1997,\"TV 29\" LED\"\n"));

		List<String[]> rows = processor.getRows();

		assertEquals(rows.size(), 1);

		String[] firstRow = rows.get(0);
		assertEquals(firstRow[0], "1997");
		assertEquals(firstRow[1], "TV 29\" LED");
	}

	@Test(dataProvider = "testProvider")
	public void shouldNotAllowUnexpectedCharacterAfterQuotedValue(String csvFile, char[] lineSeparator) throws UnsupportedEncodingException {
		CsvParserSettings settings = newCsvInputSettings(lineSeparator);
		settings.setParseUnescapedQuotes(false);

		CsvParser parser = new CsvParser(settings);
		try {
			parser.parseLine("1997,\"value\"x");
			fail("Expected exception to be thrown here");
		} catch (TextParsingException ex) {
			assertTrue(ex.getMessage().contains("Unescaped quote character '\"' inside quoted value of CSV field"));
		}
	}


	@Test
	public void parseValueProcessingEscapeNotIgnoringWhitespace() {
		RowListProcessor processor = new RowListProcessor();
		CsvParserSettings settings = newCsvInputSettings(getLineSeparator());
		settings.setRowProcessor(processor); //Default used by CsvParserTest skip 2 lines
		settings.setKeepEscapeSequences(true);
		settings.setIgnoreTrailingWhitespaces(false);
		settings.setEscapeUnquotedValues(true);

		CsvFormat format = new CsvFormat();
		format.setQuoteEscape('\'');
		format.setCharToEscapeQuoteEscaping('\\');
		settings.setFormat(format);

		CsvParser parser = new CsvParser(settings);
		parser.parse(new StringReader("'\\\"a\n")); //goes into the else statement of CsvParser.parseValueProcessingEscape() method.

		List<String[]> rows = processor.getRows();

		assertEquals(rows.size(), 1);
		String[] firstRow = rows.get(0);

		assertEquals(firstRow[0], "\\\"a");
	}

	@Test
	public void parseValueProcessingEscapeNotIgnoringWhitespacePrevQuoteEscape2() {
		RowListProcessor processor = new RowListProcessor();
		CsvParserSettings settings = newCsvInputSettings(getLineSeparator());
		settings.setRowProcessor(processor); //Default used by CsvParserTest skip 2 lines
		settings.setKeepEscapeSequences(true);
		settings.setIgnoreTrailingWhitespaces(false);
		settings.setEscapeUnquotedValues(true);

		CsvFormat format = new CsvFormat();
		format.setQuoteEscape('\'');
		format.setCharToEscapeQuoteEscaping('\\');
		settings.setFormat(format);

		CsvParser parser = new CsvParser(settings);
		parser.parse(new StringReader("\\\'\n"));

		List<String[]> rows = processor.getRows();

		assertEquals(rows.size(), 1);
		String[] firstRow = rows.get(0);

		assertEquals(firstRow[0], "\\\\'");
	}

	@Test
	public void parseValueProcessingEscapeNotIgnoringWhitespacePrevQuoteEscape() {
		RowListProcessor processor = new RowListProcessor();
		CsvParserSettings settings = newCsvInputSettings(getLineSeparator());
		settings.setRowProcessor(processor); //Default used by CsvParserTest skip 2 lines
		settings.setKeepEscapeSequences(true);
		settings.setIgnoreTrailingWhitespaces(false);
		settings.setEscapeUnquotedValues(true);

		CsvFormat format = new CsvFormat();
		format.setQuoteEscape('\'');
		format.setCharToEscapeQuoteEscaping('\\');
		settings.setFormat(format);

		CsvParser parser = new CsvParser(settings);
		parser.parse(new StringReader("'\"a\n"));

		List<String[]> rows = processor.getRows();

		assertEquals(rows.size(), 1);
		String[] firstRow = rows.get(0);

		assertEquals(firstRow[0], "'\"a");
	}

	@DataProvider
	public Object[][] skipLinesProvider() {
		return new Object[][]{
				{0, "1234"},
				{1, "234"},
				{2, "34"},
				{3, "4"},
				{4, null},
				{5, "BOOM"},
		};
	}

	@Test(dataProvider = "skipLinesProvider")
	public void testSkipLines(int rowsToSkip, String expectedResult) {
		CsvParserSettings settings = new CsvParserSettings();
		settings.getFormat().setLineSeparator("\n");

		settings.setNumberOfRowsToSkip(rowsToSkip);

		CsvParser parser = new CsvParser(settings);
		String input = "1\n2\n3\n4\n";

		List<String[]> result = parser.parseAll(new StringReader(input));
		StringBuilder out = null;
		for (String row[] : result) {
			if (out == null) {
				out = new StringBuilder();
			}
			assertEquals(row.length, 1);
			out.append(row[0]);
		}

		if("BOOM".equals(expectedResult)){
			expectedResult = null;
		}
		assertEquals(out == null ? null : out.toString(), expectedResult);
	}

	@Test
	public void testParseUnescapedQuotesWithStop() {
		CsvParserSettings settings = new CsvParserSettings();
		settings.setParseUnescapedQuotesUntilDelimiter(true);
		settings.getFormat().setLineSeparator("\n");

		CsvParser parser = new CsvParser(settings);
		String input = "field1,\"inner quote\" field2,\"12,34\",\",5\",";

		String[] values = parser.parseLine(input);

		assertEquals(values[0], "field1");
		assertEquals(values[1], "\"inner quote\" field2");
		assertEquals(values[2], "12,34");
		assertEquals(values[3], ",5");
		assertEquals(values[4], null);
	}

	@Test
	public void parseIgnoreTrailingWhitespace() {
		CsvParserSettings settings = new CsvParserSettings();
		settings.getFormat().setLineSeparator("\n");
		settings.setIgnoreTrailingWhitespaces(true);
		CsvParser parser = new CsvParser(settings);

		String[] value = parser.parseLine("b ");
		assertEquals(value[0], "b");
	}

	@Test
	public void parseWithAutoExpansion() {
		CsvParserSettings settings = new CsvParserSettings();
		settings.setMaxCharsPerColumn(-1);

		StringBuilder in = new StringBuilder(100000);
		for (int i = 0; i < 100000; i++) {
			in.append(i % 10);
			if (i % 10000 == 0) {
				in.append(',');
			}
		}

		String[] result = new CsvParser(settings).parseLine(in.toString());
		StringBuilder out = new StringBuilder();
		for (String value : result) {
			if (out.length() > 0) {
				out.append(',');
			}
			out.append(value);
		}

		assertEquals(out.toString(), in.toString());
	}

	@Test
	public void testErrorMessageRestrictions() {
		CsvParserSettings settings = new CsvParserSettings();
		settings.setMaxCharsPerColumn(3);
		settings.setErrorContentLength(0);

		try {
			new CsvParser(settings).parseLine("abcde");
			fail("Expecting an exception here");
		} catch (TextParsingException ex) {
			assertFalse(ex.getMessage().contains("abc"));
			assertNull(ex.getParsedContent());
		}

		settings.setErrorContentLength(2);
		try {
			new CsvParser(settings).parseLine("abcde");
			fail("Expecting an exception here");
		} catch (TextParsingException ex) {
			assertTrue(ex.getMessage().contains("...bc"));
			assertEquals(ex.getParsedContent(), "abc");
		}
	}

	@Test
	public void testKeepQuotes() {
		CsvParserSettings settings = new CsvParserSettings();
		settings.setKeepQuotes(true);
		settings.getFormat().setQuote('\'');
		settings.getFormat().setQuoteEscape('\'');
		CsvParser parser = new CsvParser(settings);

		String[] result = parser.parseLine("a,'b', '', '' c '', '' ' '', ''''");
		assertEquals(result[0], "a");
		assertEquals(result[1], "'b'");
		assertEquals(result[2], "''");
		assertEquals(result[3], "'' c ''");
		assertEquals(result[4], "'' ' ''");
		assertEquals(result[5], "'''");
	}

	@Test
	public void testNullValue() {
		CsvParserSettings settings = new CsvParserSettings();
		settings.setIgnoreLeadingWhitespaces(false);
		settings.setIgnoreTrailingWhitespaces(true);
		settings.setNullValue("NULL");
		CsvParser parser = new CsvParser(settings);


		String[] result = parser.parseLine(", ,");
		assertEquals(result.length, 3);
		assertEquals(result[0], "NULL");
		assertEquals(result[1], "NULL");
		assertEquals(result[2], "NULL");
	}

	@Test
	public void testColumnReorderingWithUserProvidedHeaders() throws Exception {
		CsvParserSettings settings = new CsvParserSettings();
		settings.setHeaders("a", "b", "c");

		settings.setColumnReorderingEnabled(false);
		settings.selectFields("a", "c");

		String[] values = new CsvParser(settings).parseLine("1,2,3");
		assertEquals(values, new String[]{"1", null, "3"});
	}

	@Test
	public void testEscapeCharacter() {
		CsvParserSettings parserSettings = new CsvParserSettings();
		parserSettings.getFormat().setQuoteEscape('/');

		CsvParser parser = new CsvParser(parserSettings);
		String[] line;

		line = parser.parseLine("\"a ,/,b/,\",c");
		assertEquals(line.length, 2);
		assertEquals(line[0], "a ,/,b/,");
		assertEquals(line[1], "c");

		line = parser.parseLine("\"a ,//,b//,\",c");
		assertEquals(line.length, 2);
		assertEquals(line[0], "a ,/,b/,");
		assertEquals(line[1], "c");
	}

	@Test
	public void testBitsAreNotDiscardedWhenParsing() {
		CsvParserSettings parserSettings = new CsvParserSettings();
		parserSettings.setSkipBitsAsWhitespace(false);

		CsvParser parser = new CsvParser(parserSettings);
		String[] line;

		line = parser.parseLine("\0 a, b");
		assertEquals(line.length, 2);
		assertEquals(line[0], "\0 a");
		assertEquals(line[1], "b");

		line = parser.parseLine("\1 a, b \0");
		assertEquals(line.length, 2);
		assertEquals(line[0], "\1 a");
		assertEquals(line[1], "b \0");

		line = parser.parseLine("\2 a, \"b, \1\"");
		assertEquals(line.length, 2);
		assertEquals(line[0], "a");
		assertEquals(line[1], "b, \1");
	}

	@Test
	public void testParserIteratorOnFile() throws Exception {
		CsvParserSettings parserSettings = new CsvParserSettings();
		parserSettings.setLineSeparatorDetectionEnabled(true);

		CsvParser parser = new CsvParser(parserSettings);

		String[][] correctRows = {
				{"a", "b", "c"},
				{"d", "e", "f"},
				{"g", "h", "i"},
				{"j", null},
				{"k", "l"},
				{"m", "n", "o", "p", "q", "r"}
		};

		Reader input = newReader("/csv/iterating_test.csv");
		int i = 0;
		for (String[] row : parser.iterate(input)) {
			assertEquals(row, correctRows[i++]);
		}

		input = newReader("/csv/iterating_test.csv");
		i = 0;
		for (Record row : parser.iterateRecords(input)) {
			assertEquals(row.getValues(), correctRows[i++]);
		}

		for (Record row : parser.iterateRecords(new StringReader(""))) {
			fail("Empty input, should not get here");
		}
	}

	private static void append4000Symbols(StringBuilder sb) {
		final long startTime = System.currentTimeMillis() - TimeUnit.SECONDS.toMillis(200);
		for (int i = 0; i < 200; i++) {
			sb.append(startTime + TimeUnit.SECONDS.toMillis(i)).append(",10000\n");
		}
	}

	@Test
	public void testCollectCommentOnBufferUpdate() {
		final StringBuilder commentLine = new StringBuilder("#");
		for (int i = 0; i < 100; i++) {
			commentLine.append(' ').append(i);
		}
		final StringBuilder csv = new StringBuilder("time,value\n");
		append4000Symbols(csv);
		// now the comment processing will meet buffer update operation (if buffer size is 4096)
		csv.append(commentLine);

		final CsvParserSettings csvParserSettings = new CsvParserSettings();
		csvParserSettings.setCommentCollectionEnabled(true);
		csvParserSettings.setReadInputOnSeparateThread(false);
		final CsvParser csvParser = new CsvParser(csvParserSettings);
		csvParser.parseAll(new StringReader(csv.toString()));
		final Map<Long, String> comments = csvParser.getContext().comments();
		assertEquals(1, comments.size());
		assertEquals(commentLine.substring(2), comments.values().iterator().next());
	}

	@Test
	public void shouldPrintUserDefinedHeaders() {
		final String[] userDefinedHeader = {"timestamp", "memory_used"};
		final CsvParserSettings settings = new CsvParserSettings();
		settings.setHeaderExtractionEnabled(false);
		settings.setHeaders(userDefinedHeader);

		final String[][] headersFromContext = new String[][]{null};
		settings.setProcessor(new AbstractRowProcessor() {
			@Override
			public void processStarted(ParsingContext context) {
				headersFromContext[0] = context.headers();
				System.out.println("headers: " + Arrays.toString(context.headers()));
			}
		});
		settings.setReadInputOnSeparateThread(false);
		final CsvParser csvParser = new CsvParser(settings);
		final String csv = "2018-11-22T17:53:19.446Z,1493984088\n" +
				"2018-11-22T17:53:34.447Z,865556632\n" +
				"2018-11-22T17:53:49.447Z,600667192";
		csvParser.parse(new StringReader(csv));
		assertEquals(headersFromContext[0], userDefinedHeader);
	}

	@Test
	public void shouldResolveSpaceAsColumnSeparator() {
		final Reader csv = new StringReader("<TICKER> <PER> <DATE> <TIME> <OPEN> <HIGH> <LOW> <CLOSE> <VOL>\n" +
				"MOEX 15 12/03/18 10:15:00 90.0900000 90.9500000 90.0700000 90.8200000 468,730\n" +
				"MOEX 15 12/03/18 10:30:00 90.8200000 90.8600000 90.6000000 90.7100000 136,040\n" +
				"MOEX 15 12/03/18 10:45:00 90.7000000 91.3000000 90.6600000 90.9900000 278,580");
		final CsvParserSettings settings = new CsvParserSettings();
		settings.setReadInputOnSeparateThread(false);
		settings.setLineSeparatorDetectionEnabled(true);
		settings.setDelimiterDetectionEnabled(true, ',', ' ', ';');
		final CsvParser csvParser = new CsvParser(settings);
		csvParser.beginParsing(csv);
		assertEquals(csvParser.getDetectedFormat().getDelimiter(), ' ');
	}

	@Test
	public void shouldNotResolveSpaceAsColumnSeparator() {
		final Reader csv = new StringReader("<COL TICKER>,<COL PER>,<COL DATE>,<COL TIME>,<COL OPEN>,<COL HIGH>,<COL LOW>,<COL CLOSE>,<COL VOL>\n" +
				"MOEX,15,12/03/18,10:15:00,90.0900000,90.9500000,90.0700000,90.8200000,468,730\n" +
				"MOEX,15,12/03/18,10:30:00,90.8200000,90.8600000,90.6000000,90.7100000,136,040\n" +
				"MOEX,15,12/03/18,10:45:00,90.7000000,91.3000000,90.6600000,90.9900000,278,580");
		final CsvParserSettings settings = new CsvParserSettings();
		settings.setReadInputOnSeparateThread(false);
		settings.setLineSeparatorDetectionEnabled(true);
		settings.setDelimiterDetectionEnabled(true, ' ', ',', ';');
		final CsvParser csvParser = new CsvParser(settings);
		csvParser.beginParsing(csv);
		assertEquals(csvParser.getDetectedFormat().getDelimiter(), ',');
	}

	@Test(enabled = true, dataProvider = "csvProvider")
	public void parseDisablingCommentLineCheck(String csvFile, char[] lineSeparator) throws Exception {
		CsvParserSettings settings = newCsvInputSettings(lineSeparator);
		settings.setCommentCollectionEnabled(true);
		settings.setRowProcessor(processor);
		settings.setCommentProcessingEnabled(false);
		settings.setHeaderExtractionEnabled(true);
		settings.setIgnoreLeadingWhitespaces(true);
		settings.setIgnoreTrailingWhitespaces(true);

		CsvParser parser = new CsvParser(settings);
		parser.parse(newReader(csvFile));

		String[] expectedHeaders = new String[]{"Year", "Make", "Model", "Description", "Price"};

		String[][] expectedResult = new String[][]{
				{"1997", "Ford", "E350", "ac, abs, moon", "3000.00"},
				{"1999", "Chevy", "Venture \"Extended Edition\"", null, "4900.00"},
				{"#this is a comment and should be ignored"},
				{"1996", "Jeep", "Grand Cherokee", "MUST SELL!\nair, moon roof, loaded", "4799.00"},
				{"1999", "Chevy", "Venture \"Extended Edition, Very Large\"", null, "5000.00"},
				{null, null, "Venture \"Extended Edition\"", null, "4900.00"},
				{null, null, null, null, null},
				{null, null, null, null, null},
				{null, null, "5", null, null},
				{"1997", "Ford", "E350", "ac, abs, moon", "3000.00"},
				{"1997", "Ford", "E350", " ac, abs, moon ", "3000.00"},
				{"1997", "Ford", "E350", " ac, abs, moon ", "3000.00"},
				{"19 97", "Fo rd", "E350", " ac, abs, moon ", "3000.00"},
				{null, " ", null, "  ", "30 00.00"},
				{"1997", "Ford", "E350", " \" ac, abs, moon \" ", "3000.00"},
				{"1997", "Ford", "E350", "\" ac, abs, moon \" ", "3000.00"},
		};

		assertHeadersAndValuesMatch(expectedHeaders, expectedResult);

		Map<Long, String> comments = parser.getContext().comments();
		assertEquals(comments.size(), 0);
		assertEquals(parser.getContext().lastComment(), null);
	}

}