Github_271.java
/*******************************************************************************
* Copyright 2018 Univocity Software Pty Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
package com.univocity.parsers.issues.github;
import com.univocity.parsers.csv.*;
import org.testng.annotations.*;
import java.io.*;
import java.util.*;
import static org.testng.Assert.*;
/**
* From: https://github.com/univocity/univocity-parsers/issues/271
*
* @author Univocity Software Pty Ltd - <a href="mailto:dev@univocity.com">dev@univocity.com</a>
*/
public class Github_271 {
@DataProvider
public Object[][] delimiterProvider() {
return new Object[][]{
{","},
{"#|#"},
{"##"},
{". ."},
{". "}
};
}
@Test(dataProvider = "delimiterProvider")
public void testUnescapedHandling(String delimiter) {
CsvParserSettings parserSettings = new CsvParserSettings();
parserSettings.getFormat().setLineSeparator("\n");
parserSettings.getFormat().setDelimiter(delimiter);
parserSettings.setUnescapedQuoteHandling(UnescapedQuoteHandling.BACK_TO_DELIMITER);
parserSettings.setReadInputOnSeparateThread(true);
parserSettings.trimValues(true);
CsvParser lineParser = new CsvParser(parserSettings);
List<String[]> rows = lineParser.parseAll(new StringReader("" +
"\"name\"" + delimiter + "\"description\"" + delimiter + "\"digit\"" + delimiter + "\"other\"\n" +
" \"test one\"" + delimiter + "\"test description with \"\"" + delimiter + "\"1\"" + delimiter + "\"other one\"" +
"\n" +
"\"test two\"" + delimiter + "\"test description without a quote\"" + delimiter + "\"2\"" + delimiter + "\"other two\""));
assertEquals(rows.size(), 3);
String[] row;
row = rows.get(0);
assertEquals(row.length, 4);
assertEquals(row[0], "name");
assertEquals(row[1], "description");
assertEquals(row[2], "digit");
assertEquals(row[3], "other");
row = rows.get(1);
assertEquals(row.length, 4);
assertEquals(row[0], "test one");
assertEquals(row[1], "test description with \"");
assertEquals(row[2], "1");
assertEquals(row[3], "other one");
row = rows.get(2);
assertEquals(row.length, 4);
assertEquals(row[0], "test two");
assertEquals(row[1], "test description without a quote");
assertEquals(row[2], "2");
assertEquals(row[3], "other two");
}
@Test(dataProvider = "delimiterProvider")
public void testBackToDelimiter(String delimiter) {
CsvParserSettings settings = new CsvParserSettings();
settings.getFormat().setDelimiter(delimiter);
settings.getFormat().setLineSeparator("\n");
settings.setUnescapedQuoteHandling(UnescapedQuoteHandling.BACK_TO_DELIMITER);
CsvParser parser = new CsvParser(settings);
StringReader input = new StringReader("" +
"Example Line 1" + delimiter + "some data" + delimiter + "\"good line\"" + delimiter + "processes fine" + delimiter + "happy\n" +
"Example Line 2" + delimiter + "some data" + delimiter + "\"bad line" + delimiter + "processes poorly" + delimiter + "unhappy\n" +
"Example Line 3" + delimiter + "some data" + delimiter + "\"good line\"" + delimiter + "dies before here" + delimiter + "unhappy");
parser.beginParsing(input);
String[] row;
row = parser.parseNext();
assertEquals(row.length, 5);
assertEquals(row[0], "Example Line 1");
assertEquals(row[1], "some data");
assertEquals(row[2], "good line");
assertEquals(row[3], "processes fine");
assertEquals(row[4], "happy");
row = parser.parseNext();
assertEquals(row.length, 5);
assertEquals(row[0], "Example Line 2");
assertEquals(row[1], "some data");
assertEquals(row[2], "bad line");
assertEquals(row[3], "processes poorly");
assertEquals(row[4], "unhappy");
row = parser.parseNext();
assertEquals(row.length, 5);
assertEquals(row[0], "Example Line 3");
assertEquals(row[1], "some data");
assertEquals(row[2], "good line");
assertEquals(row[3], "dies before here");
assertEquals(row[4], "unhappy");
}
@Test
public void testBackToDelimiterWithAutoDetection() {
String input = "\"name\"|\"description\"|\"digit\"|\"other\"\n \"test one\"|\"test description with \"\"|\"1\"|\"other one\"\n \"test two\"|\"test description without a quote\"|\"2\"|\"other two\"\n";
CsvParserSettings parserSettings = new CsvParserSettings();
parserSettings.setUnescapedQuoteHandling(UnescapedQuoteHandling.BACK_TO_DELIMITER);
parserSettings.setLineSeparatorDetectionEnabled(true);
parserSettings.setHeaderExtractionEnabled(true);
parserSettings.setDelimiterDetectionEnabled(true);
parserSettings.setQuoteDetectionEnabled(true);
CsvParser parser = new CsvParser(parserSettings);
List<String[]> rows = parser.parseAll(new StringReader(input));
assertEquals(rows.size(), 2);
String[] row;
row = rows.get(0);
assertEquals(row.length, 4);
assertEquals(row[0], "test one");
assertEquals(row[1], "test description with \"");
assertEquals(row[2], "1");
assertEquals(row[3], "other one");
row = rows.get(1);
assertEquals(row.length, 4);
assertEquals(row[0], "test two");
assertEquals(row[1], "test description without a quote");
assertEquals(row[2], "2");
assertEquals(row[3], "other two");
}
@Test
public void testBackToDelimiterWithKeepQuotes() {
String input = "\"name\"|\"description\"|\"digit\"|\"other\"\n \"test one\"|\"test description with \"\"|\"1\"|\"other one\"\n \"test two\"|\"test description without a quote\"|\"2\"|\"other two\"\n";
CsvParserSettings parserSettings = new CsvParserSettings();
parserSettings.setUnescapedQuoteHandling(UnescapedQuoteHandling.BACK_TO_DELIMITER);
parserSettings.setLineSeparatorDetectionEnabled(true);
parserSettings.setHeaderExtractionEnabled(true);
parserSettings.setDelimiterDetectionEnabled(true);
parserSettings.setQuoteDetectionEnabled(true);
parserSettings.setKeepQuotes(true);
CsvParser parser = new CsvParser(parserSettings);
List<String[]> rows = parser.parseAll(new StringReader(input));
assertEquals(rows.size(), 2);
String[] row;
row = rows.get(0);
assertEquals(row.length, 4);
assertEquals(row[0], "\"test one\"");
assertEquals(row[1], "\"test description with \"\"");
assertEquals(row[2], "\"1\"");
assertEquals(row[3], "\"other one\"");
row = rows.get(1);
assertEquals(row.length, 4);
assertEquals(row[0], "\"test two\"");
assertEquals(row[1], "\"test description without a quote\"");
assertEquals(row[2], "\"2\"");
assertEquals(row[3], "\"other two\"");
}
}