SPARQLResultsSAXParser.java
/*******************************************************************************
* Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
package org.eclipse.rdf4j.query.resultio.sparqlxml;
import static org.eclipse.rdf4j.query.resultio.sparqlxml.SPARQLResultsXMLConstants.BINDING_NAME_ATT;
import static org.eclipse.rdf4j.query.resultio.sparqlxml.SPARQLResultsXMLConstants.BINDING_TAG;
import static org.eclipse.rdf4j.query.resultio.sparqlxml.SPARQLResultsXMLConstants.BNODE_TAG;
import static org.eclipse.rdf4j.query.resultio.sparqlxml.SPARQLResultsXMLConstants.BOOLEAN_TAG;
import static org.eclipse.rdf4j.query.resultio.sparqlxml.SPARQLResultsXMLConstants.LITERAL_DATATYPE_ATT;
import static org.eclipse.rdf4j.query.resultio.sparqlxml.SPARQLResultsXMLConstants.LITERAL_LANG_ATT;
import static org.eclipse.rdf4j.query.resultio.sparqlxml.SPARQLResultsXMLConstants.LITERAL_TAG;
import static org.eclipse.rdf4j.query.resultio.sparqlxml.SPARQLResultsXMLConstants.OBJECT_TAG;
import static org.eclipse.rdf4j.query.resultio.sparqlxml.SPARQLResultsXMLConstants.O_TAG;
import static org.eclipse.rdf4j.query.resultio.sparqlxml.SPARQLResultsXMLConstants.PREDICATE_TAG;
import static org.eclipse.rdf4j.query.resultio.sparqlxml.SPARQLResultsXMLConstants.P_TAG;
import static org.eclipse.rdf4j.query.resultio.sparqlxml.SPARQLResultsXMLConstants.RESULT_SET_TAG;
import static org.eclipse.rdf4j.query.resultio.sparqlxml.SPARQLResultsXMLConstants.RESULT_TAG;
import static org.eclipse.rdf4j.query.resultio.sparqlxml.SPARQLResultsXMLConstants.STATEMENT_TAG;
import static org.eclipse.rdf4j.query.resultio.sparqlxml.SPARQLResultsXMLConstants.SUBJECT_TAG;
import static org.eclipse.rdf4j.query.resultio.sparqlxml.SPARQLResultsXMLConstants.S_TAG;
import static org.eclipse.rdf4j.query.resultio.sparqlxml.SPARQLResultsXMLConstants.TRIPLE_TAG;
import static org.eclipse.rdf4j.query.resultio.sparqlxml.SPARQLResultsXMLConstants.URI_TAG;
import static org.eclipse.rdf4j.query.resultio.sparqlxml.SPARQLResultsXMLConstants.VAR_NAME_ATT;
import static org.eclipse.rdf4j.query.resultio.sparqlxml.SPARQLResultsXMLConstants.VAR_TAG;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Deque;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.StringUtils;
import org.eclipse.rdf4j.common.xml.SimpleSAXAdapter;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.ValueFactory;
import org.eclipse.rdf4j.model.vocabulary.RDF;
import org.eclipse.rdf4j.model.vocabulary.XSD;
import org.eclipse.rdf4j.query.QueryResultHandler;
import org.eclipse.rdf4j.query.TupleQueryResultHandlerException;
import org.eclipse.rdf4j.query.impl.MapBindingSet;
import org.eclipse.rdf4j.query.resultio.QueryResultParseException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;
class SPARQLResultsSAXParser extends SimpleSAXAdapter {
private final Logger logger = LoggerFactory.getLogger(this.getClass());
/*-----------*
* Variables *
*-----------*/
/**
* The variable names that are specified in the header.
*/
private List<String> bindingNames;
/**
* The most recently parsed binding name.
*/
private String currentBindingName;
/**
* The most recently parsed value.
*/
private Value currentValue;
/**
* The bound variables for the current result.
*/
private MapBindingSet currentSolution;
private final ValueFactory valueFactory;
private final QueryResultHandler handler;
/**
* stack for handling nested RDF-star triples
*/
private final Deque<TripleContainer> tripleStack = new ArrayDeque<>();
public SPARQLResultsSAXParser(ValueFactory valueFactory, QueryResultHandler handler) {
this.valueFactory = valueFactory;
this.handler = handler;
}
/*---------*
* Methods *
*---------*/
@Override
public void startDocument() throws SAXException {
bindingNames = new ArrayList<>();
currentValue = null;
}
@Override
public void endDocument() throws SAXException {
try {
if (handler != null) {
handler.endQueryResult();
}
} catch (TupleQueryResultHandlerException e) {
throw new SAXException(e);
}
}
@Override
public void startTag(String tagName, Map<String, String> atts, String text) throws SAXException {
if (BINDING_TAG.equals(tagName)) {
currentBindingName = atts.get(BINDING_NAME_ATT);
if (currentBindingName == null) {
throw new SAXException(BINDING_NAME_ATT + " attribute missing for " + BINDING_TAG + " element");
}
} else if (TRIPLE_TAG.equals(tagName) || STATEMENT_TAG.equals(tagName)) {
tripleStack.push(new TripleContainer());
} else if (URI_TAG.equals(tagName)) {
try {
currentValue = valueFactory.createIRI(text);
} catch (IllegalArgumentException e) {
// Malformed URI
throw new SAXException(e.getMessage(), e);
}
} else if (BNODE_TAG.equals(tagName)) {
currentValue = valueFactory.createBNode(text);
} else if (LITERAL_TAG.equals(tagName)) {
String xmlLang = atts.get(LITERAL_LANG_ATT);
String datatype = atts.get(LITERAL_DATATYPE_ATT);
if (xmlLang != null) {
currentValue = valueFactory.createLiteral(text, xmlLang);
} else if (datatype != null) {
IRI datatypeIri;
try {
datatypeIri = valueFactory.createIRI(datatype);
} catch (IllegalArgumentException e) {
// Illegal datatype URI
throw new SAXException(e.getMessage(), e);
}
// For broken SPARQL endpoints which return LANGSTRING without a language, fall back
// to using STRING as the datatype
if (RDF.LANGSTRING.equals(datatypeIri) && xmlLang == null) {
logger.debug(
"rdf:langString typed literal missing language tag: '{}'. Falling back to xsd:string.",
StringUtils.abbreviate(text, 10)
);
datatypeIri = XSD.STRING;
}
currentValue = valueFactory.createLiteral(text, datatypeIri);
} else {
currentValue = valueFactory.createLiteral(text);
}
} else if (RESULT_TAG.equals(tagName)) {
currentSolution = new MapBindingSet(bindingNames.size());
} else if (VAR_TAG.equals(tagName)) {
String varName = atts.get(VAR_NAME_ATT);
if (varName == null) {
throw new SAXException(VAR_NAME_ATT + " missing for " + VAR_TAG + " element");
}
bindingNames.add(varName);
} else if (RESULT_SET_TAG.equals(tagName)) {
try {
if (handler != null) {
handler.startQueryResult(bindingNames);
}
} catch (TupleQueryResultHandlerException e) {
throw new SAXException(e);
}
} else if (BOOLEAN_TAG.equals(tagName)) {
QueryResultParseException realException = new QueryResultParseException(
"Found boolean results in tuple parser");
throw new SAXException(realException);
}
}
@Override
public void endTag(String tagName) throws SAXException {
TripleContainer currentTriple;
switch (tagName) {
case BINDING_TAG:
if (currentValue == null) {
throw new SAXException("Value missing for " + BINDING_TAG + " element");
}
currentSolution.addBinding(currentBindingName, currentValue);
currentBindingName = null;
currentValue = null;
break;
case SUBJECT_TAG:
case S_TAG:
currentTriple = tripleStack.peek();
if (currentTriple.getSubject() != null) {
throw new SAXException("RDF-star triple subject defined twice");
}
if (currentValue instanceof Resource) {
currentTriple.setSubject((Resource) currentValue);
} else {
throw new SAXException("unexpected value type for subject: " + currentValue);
}
break;
case PREDICATE_TAG:
case P_TAG:
currentTriple = tripleStack.peek();
if (currentTriple.getPredicate() != null) {
throw new SAXException("RDF-star triple predicate defined twice");
}
if (currentValue instanceof IRI) {
currentTriple.setPredicate((IRI) currentValue);
} else {
throw new SAXException("unexpected value type for predicate: " + currentValue);
}
break;
case OBJECT_TAG:
case O_TAG:
currentTriple = tripleStack.peek();
if (currentTriple.getObject() != null) {
throw new SAXException("RDF-star triple object defined twice");
}
currentTriple.setObject(currentValue);
break;
case TRIPLE_TAG:
case STATEMENT_TAG:
currentTriple = tripleStack.pop();
currentValue = valueFactory.createTriple(currentTriple.getSubject(), currentTriple.getPredicate(),
currentTriple.getObject());
break;
case RESULT_TAG:
try {
if (handler != null) {
handler.handleSolution(currentSolution);
}
currentSolution = null;
} catch (TupleQueryResultHandlerException e) {
throw new SAXException(e);
}
break;
}
}
private static class TripleContainer {
private Resource subject;
private IRI predicate;
private Value object;
/**
* @return the subject
*/
public Resource getSubject() {
return subject;
}
/**
* @param subject the subject to set
*/
public void setSubject(Resource subject) {
this.subject = subject;
}
/**
* @return the predicate
*/
public IRI getPredicate() {
return predicate;
}
/**
* @param predicate the predicate to set
*/
public void setPredicate(IRI predicate) {
this.predicate = predicate;
}
/**
* @return the object
*/
public Value getObject() {
return object;
}
/**
* @param object the object to set
*/
public void setObject(Value object) {
this.object = object;
}
}
}