CsvSearchExample.java
/*******************************************************************************
* Copyright 2016 Univocity Software Pty Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
package com.univocity.parsers.examples.samples;
import com.univocity.parsers.common.*;
import com.univocity.parsers.common.processor.*;
import com.univocity.parsers.csv.*;
import java.io.*;
import java.util.*;
/**
* Example to demonstrate how to implement a basic search over a CSV file.
*
* @author Univocity Software Pty Ltd - <a href="mailto:dev@univocity.com">dev@univocity.com</a>
*/
public class CsvSearchExample {
// Let's create our own RowProcessor to analyze the rows
static class CsvSearch extends RowListProcessor {
private final String stringToMatch;
private final String columnToMatch;
private int indexToMatch = -1;
public CsvSearch(String columnToMatch, String stringToMatch){
this.columnToMatch = columnToMatch;
this.stringToMatch = stringToMatch.toLowerCase();
}
public CsvSearch(int columnToMatch, String stringToMatch){
this(stringToMatch, null);
indexToMatch = columnToMatch;
}
@Override
public void rowProcessed(String[] row, ParsingContext context) {
if(indexToMatch == -1) {
indexToMatch = context.indexOf(columnToMatch);
}
String value = row[indexToMatch];
if(value != null && value.toLowerCase().contains(stringToMatch)) {
super.rowProcessed(row, context);
}
// else skip the row.
}
}
public static void main(String... args) {
// let's measure the time roughly
long start = System.currentTimeMillis();
CsvParserSettings settings = new CsvParserSettings();
settings.setHeaderExtractionEnabled(true);
CsvSearch search = new CsvSearch("City", "Paris"); //searching for cities with "paris" in the name
//We instruct the parser to send all rows parsed to your custom RowProcessor.
settings.setProcessor(search);
//Finally, we create a parser
CsvParser parser = new CsvParser(settings);
//And parse! All rows are sent to your custom RowProcessor (CsvSearch)
//I'm using a 150MB CSV file with 1.3 million rows.
parser.parse(new File("/Users/jbax/dev/data/worldcitiespop.txt"), "ISO-8859-1");
List<String[]> results = search.getRows();
//Nothing else to do. The parser closes the input and does everything for you safely. Let's just get the results:
System.out.println("Rows matched: " + results.size());
System.out.println("Time taken: " + (System.currentTimeMillis() - start) + " ms");
System.out.println("Matched rows");
for(String[] row : results){
System.out.println(Arrays.toString(row));
}
}
}