SPARQLResultsTSVMappingStrategy.java
/*******************************************************************************
* Copyright (c) 2018 Eclipse RDF4J contributors.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
package org.eclipse.rdf4j.query.resultio.text.tsv;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.lang3.StringUtils;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Literal;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.ValueFactory;
import org.eclipse.rdf4j.query.BindingSet;
import org.eclipse.rdf4j.query.impl.ListBindingSet;
import org.eclipse.rdf4j.query.resultio.text.SPARQLResultsXSVMappingStrategy;
import org.eclipse.rdf4j.rio.helpers.NTriplesUtil;
import com.opencsv.CSVReader;
import com.opencsv.exceptions.CsvValidationException;
/**
* Implements a {@link com.opencsv.bean.MappingStrategy} to allow opencsv to work in parallel. This is where the input
* is converted into {@link org.eclipse.rdf4j.query.BindingSet}s.
*
* @author Andrew Rucker Jones
*/
public class SPARQLResultsTSVMappingStrategy extends SPARQLResultsXSVMappingStrategy {
public SPARQLResultsTSVMappingStrategy(ValueFactory valueFactory) {
super(valueFactory);
}
@Override
public void captureHeader(CSVReader reader) throws IOException {
try {
// header is mandatory in SPARQL TSV
bindingNames = Stream.of(reader.readNext())
.map(s -> StringUtils.removeStart(s, "?"))
.collect(Collectors.toList());
} catch (CsvValidationException ex) {
throw new IOException(ex);
}
}
@Override
public BindingSet populateNewBean(String[] line) {
// process solution
List<Value> values = new ArrayList<>(line.length);
for (String valueString : line) {
values.add(parseValue(valueString));
}
return new ListBindingSet(bindingNames, values.toArray(new Value[values.size()]));
}
protected Value parseValue(String valueString) {
Value v = null;
if (valueString.startsWith("<<")) {
v = NTriplesUtil.parseTriple(valueString, valueFactory);
} else if (valueString.startsWith("_:")) {
v = valueFactory.createBNode(valueString.substring(2));
} else if (valueString.startsWith("<") && valueString.endsWith(">")) {
try {
v = valueFactory.createIRI(valueString.substring(1, valueString.length() - 1));
} catch (IllegalArgumentException e) {
v = valueFactory.createLiteral(valueString);
}
} else if (valueString.startsWith("\"")) {
v = parseLiteral(valueString);
} else if (!"".equals(valueString)) {
if (numberPattern.matcher(valueString).matches()) {
v = parseNumberPatternMatch(valueString);
} else {
v = valueFactory.createLiteral(valueString);
}
}
return v;
}
/**
* Parses a literal, creates an object for it and returns this object.
*
* @param literal The literal to parse.
* @return An object representing the parsed literal.
* @throws IllegalArgumentException If the supplied literal could not be parsed correctly.
*/
protected Literal parseLiteral(String literal) throws IllegalArgumentException {
if (literal.startsWith("\"")) {
// Find string separation points
int endLabelIdx = findEndOfLabel(literal);
if (endLabelIdx != -1) {
int startLangIdx = literal.indexOf('@', endLabelIdx);
int startDtIdx = literal.indexOf("^^", endLabelIdx);
if (startLangIdx != -1 && startDtIdx != -1) {
throw new IllegalArgumentException("Literals can not have both a language and a datatype");
}
// Get label
String label = literal.substring(1, endLabelIdx);
label = decodeString(label);
if (startLangIdx != -1) {
// Get language
String language = literal.substring(startLangIdx + 1);
return valueFactory.createLiteral(label, language);
} else if (startDtIdx != -1) {
// Get datatype
String datatype = literal.substring(startDtIdx + 2);
datatype = datatype.substring(1, datatype.length() - 1);
IRI dtURI = valueFactory.createIRI(datatype);
return valueFactory.createLiteral(label, dtURI);
} else {
return valueFactory.createLiteral(label);
}
}
}
throw new IllegalArgumentException("Not a legal literal: " + literal);
}
/**
* Finds the end of the label in a literal string.
*
* @return The index of the double quote ending the label.
*/
private int findEndOfLabel(String literal) {
// we just look for the last occurrence of a double quote
return literal.lastIndexOf('"');
}
/**
* Decodes an encoded Turtle string. Any \-escape sequences are substituted with their decoded value.
*
* @param s An encoded Turtle string.
* @return The unencoded string.
* @throws IllegalArgumentException If the supplied string is not a correctly encoded Turtle string.
**/
protected static String decodeString(String s) {
int backSlashIdx = s.indexOf('\\');
if (backSlashIdx == -1) {
// No escaped characters found
return s;
}
int startIdx = 0;
int sLength = s.length();
StringBuilder sb = new StringBuilder(sLength);
while (backSlashIdx != -1) {
sb.append(s.substring(startIdx, backSlashIdx));
if (backSlashIdx + 1 >= sLength) {
throw new IllegalArgumentException("Unescaped backslash in: " + s);
}
char c = s.charAt(backSlashIdx + 1);
switch (c) {
case 't':
sb.append('\t');
startIdx = backSlashIdx + 2;
break;
case 'r':
sb.append('\r');
startIdx = backSlashIdx + 2;
break;
case 'n':
sb.append('\n');
startIdx = backSlashIdx + 2;
break;
case '"':
sb.append('"');
startIdx = backSlashIdx + 2;
break;
case '>':
sb.append('>');
startIdx = backSlashIdx + 2;
break;
case '\\':
sb.append('\\');
startIdx = backSlashIdx + 2;
break;
case 'u': {
// \\uxxxx
if (backSlashIdx + 5 >= sLength) {
throw new IllegalArgumentException("Incomplete Unicode escape sequence in: " + s);
}
String xx = s.substring(backSlashIdx + 2, backSlashIdx + 6);
try {
c = (char) Integer.parseInt(xx, 16);
sb.append(c);
startIdx = backSlashIdx + 6;
} catch (NumberFormatException e) {
throw new IllegalArgumentException("Illegal Unicode escape sequence '\\u" + xx + "' in: " + s);
}
break;
}
case 'U': {
// \\Uxxxxxxxx
if (backSlashIdx + 9 >= sLength) {
throw new IllegalArgumentException("Incomplete Unicode escape sequence in: " + s);
}
String xx = s.substring(backSlashIdx + 2, backSlashIdx + 10);
try {
c = (char) Integer.parseInt(xx, 16);
sb.append(c);
startIdx = backSlashIdx + 10;
} catch (NumberFormatException e) {
throw new IllegalArgumentException("Illegal Unicode escape sequence '\\U" + xx + "' in: " + s);
}
break;
}
default:
throw new IllegalArgumentException("Unescaped backslash in: " + s);
}
backSlashIdx = s.indexOf('\\', startIdx);
}
sb.append(s.substring(startIdx));
return sb.toString();
}
}