AbstractSPARQLJSONParser.java

/*******************************************************************************
 * Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others.
 *
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Distribution License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/org/documents/edl-v10.php.
 *
 * SPDX-License-Identifier: BSD-3-Clause
 *******************************************************************************/
package org.eclipse.rdf4j.query.resultio.sparqljson;

import static org.eclipse.rdf4j.query.resultio.sparqljson.SPARQLStarResultsJSONConstants.OBJECT;
import static org.eclipse.rdf4j.query.resultio.sparqljson.SPARQLStarResultsJSONConstants.PREDICATE;
import static org.eclipse.rdf4j.query.resultio.sparqljson.SPARQLStarResultsJSONConstants.SUBJECT;
import static org.eclipse.rdf4j.query.resultio.sparqljson.SPARQLStarResultsJSONConstants.TRIPLE_STARDOG;

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.apache.commons.lang3.StringUtils;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.model.Triple;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.ValueFactory;
import org.eclipse.rdf4j.model.vocabulary.RDF;
import org.eclipse.rdf4j.model.vocabulary.XSD;
import org.eclipse.rdf4j.query.BindingSet;
import org.eclipse.rdf4j.query.QueryResultHandlerException;
import org.eclipse.rdf4j.query.impl.MapBindingSet;
import org.eclipse.rdf4j.query.resultio.AbstractQueryResultParser;
import org.eclipse.rdf4j.query.resultio.QueryResultParseException;
import org.eclipse.rdf4j.rio.RioSetting;
import org.eclipse.rdf4j.rio.helpers.JSONSettings;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonFactoryBuilder;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.core.JsonToken;
import com.fasterxml.jackson.core.StreamReadFeature;
import com.fasterxml.jackson.core.StreamWriteFeature;
import com.fasterxml.jackson.core.json.JsonReadFeature;

/**
 * Abstract base class for SPARQL Results JSON Parsers. Provides a common implementation of both boolean and tuple
 * parsing.
 *
 * @author Peter Ansell
 * @author Sebastian Schaffert
 */
public abstract class AbstractSPARQLJSONParser extends AbstractQueryResultParser {

	private final Logger logger = LoggerFactory.getLogger(this.getClass());

	public static final String HEAD = "head";

	public static final String LINK = "link";

	public static final String VARS = "vars";

	public static final String BOOLEAN = "boolean";

	public static final String RESULTS = "results";

	public static final String BINDINGS = "bindings";

	public static final String TYPE = "type";

	public static final String VALUE = "value";

	public static final String XMLLANG = "xml:lang";

	public static final String DATATYPE = "datatype";

	public static final String LITERAL = "literal";

	public static final String TYPED_LITERAL = "typed-literal";

	public static final String BNODE = "bnode";

	public static final String URI = "uri";

	/**
	 * Backwards compatibility with very early version of original SPARQL spec.
	 */
	private static final String DISTINCT = "distinct";

	/**
	 * Backwards compatibility with very early version of original SPARQL spec.
	 */
	private static final String ORDERED = "ordered";

	/**
	 *
	 */
	protected AbstractSPARQLJSONParser() {
		super();
	}

	/**
	 *
	 */
	protected AbstractSPARQLJSONParser(ValueFactory valueFactory) {
		super(valueFactory);
	}

	@Override
	public void parseQueryResult(InputStream in)
			throws IOException, QueryResultParseException, QueryResultHandlerException {
		parseQueryResultInternal(in, true, true);
	}

	protected boolean parseQueryResultInternal(InputStream in, boolean attemptParseBoolean, boolean attemptParseTuple)
			throws IOException, QueryResultParseException, QueryResultHandlerException {
		if (!attemptParseBoolean && !attemptParseTuple) {
			throw new IllegalArgumentException(
					"Internal error: Did not specify whether to parse as either boolean and/or tuple");
		}

		JsonParser jp;

		boolean result = false;
		try {
			jp = configureNewJsonFactory().createParser(in);

			if (jp.nextToken() != JsonToken.START_OBJECT) {
				throw new QueryResultParseException("Expected SPARQL Results JSON document to start with an Object",
						jp.getCurrentLocation().getLineNr(), jp.getCurrentLocation().getColumnNr());
			}

			List<String> varsList = new ArrayList<>();
			boolean varsFound = false;
			Set<BindingSet> bindings = new HashSet<>();

			while (jp.nextToken() != JsonToken.END_OBJECT) {

				final String baseStr = jp.getCurrentName();

				if (baseStr.equals(HEAD)) {
					if (jp.nextToken() != JsonToken.START_OBJECT) {
						throw new QueryResultParseException("Did not find object under " + baseStr + " field",
								jp.getCurrentLocation().getLineNr(), jp.getCurrentLocation().getColumnNr());
					}

					while (jp.nextToken() != JsonToken.END_OBJECT) {
						final String headStr = jp.getCurrentName();

						if (headStr.equals(VARS)) {
							if (!attemptParseTuple) {
								throw new QueryResultParseException(
										"Found tuple results variables when attempting to parse SPARQL Results JSON to boolean result",
										jp.getCurrentLocation().getLineNr(), jp.getCurrentLocation().getLineNr());
							}

							if (jp.nextToken() != JsonToken.START_ARRAY) {
								throw new QueryResultParseException("Expected variable labels to be an array",
										jp.getCurrentLocation().getLineNr(), jp.getCurrentLocation().getColumnNr());
							}

							while (jp.nextToken() != JsonToken.END_ARRAY) {
								varsList.add(jp.getText());
							}

							if (this.handler != null) {
								handler.startQueryResult(varsList);
							}

							varsFound = true;

							// If the bindings were populated before this point push them
							// out now.
							if (!bindings.isEmpty() && this.handler != null) {
								for (BindingSet nextBinding : bindings) {
									handler.handleSolution(nextBinding);
									handler.endQueryResult();
								}
								bindings.clear();
							}

						} else if (headStr.equals(LINK)) {
							List<String> linksList = new ArrayList<>();
							if (jp.nextToken() != JsonToken.START_ARRAY) {
								throw new QueryResultParseException("Expected links to be an array",
										jp.getCurrentLocation().getLineNr(), jp.getCurrentLocation().getColumnNr());
							}

							while (jp.nextToken() != JsonToken.END_ARRAY) {
								linksList.add(jp.getText());
							}

							if (this.handler != null) {
								handler.handleLinks(linksList);
							}

						} else {
							throw new QueryResultParseException("Found unexpected object in head field: " + headStr,
									jp.getCurrentLocation().getLineNr(), jp.getCurrentLocation().getColumnNr());
						}
					}
				} else if (baseStr.equals(RESULTS)) {
					if (!attemptParseTuple) {
						throw new QueryResultParseException(
								"Found tuple results bindings when attempting to parse SPARQL Results JSON to boolean result",
								jp.getCurrentLocation().getLineNr(), jp.getCurrentLocation().getLineNr());
					}
					if (jp.nextToken() != JsonToken.START_OBJECT) {
						throw new QueryResultParseException(
								"Found unexpected token in results object: " + jp.getCurrentName(),
								jp.getCurrentLocation().getLineNr(), jp.getCurrentLocation().getColumnNr());
					}

					while (jp.nextToken() != JsonToken.END_OBJECT) {

						if (jp.getCurrentName().equals(BINDINGS)) {
							if (jp.nextToken() != JsonToken.START_ARRAY) {
								throw new QueryResultParseException("Found unexpected token in bindings object",
										jp.getCurrentLocation().getLineNr(), jp.getCurrentLocation().getColumnNr());
							}

							while (jp.nextToken() != JsonToken.END_ARRAY) {

								MapBindingSet nextBindingSet = new MapBindingSet();

								if (jp.getCurrentToken() != JsonToken.START_OBJECT) {
									throw new QueryResultParseException(
											"Did not find object in bindings array: " + jp.getCurrentName(),
											jp.getCurrentLocation().getLineNr(), jp.getCurrentLocation().getColumnNr());
								}

								while (jp.nextToken() != JsonToken.END_OBJECT) {

									if (jp.getCurrentToken() != JsonToken.FIELD_NAME) {
										throw new QueryResultParseException("Did not find binding name",
												jp.getCurrentLocation().getLineNr(),
												jp.getCurrentLocation().getColumnNr());
									}

									final String bindingStr = jp.getCurrentName();

									nextBindingSet.addBinding(bindingStr, parseValue(jp, bindingStr));
								}
								// parsing of solution finished, report result return to
								// bindings state
								if (!varsFound) {
									// Buffer the bindings to fit with the
									// QueryResultHandler contract so that startQueryResults
									// is
									// always called before handleSolution
									bindings.add(nextBindingSet);
								} else if (handler != null) {
									handler.handleSolution(nextBindingSet);
								}
							}
							if (handler != null) {
								handler.endQueryResult();
							}
						}
						// Backwards compatibility with very old draft of the original
						// SPARQL spec
						else if (jp.getCurrentName().equals(DISTINCT) || jp.getCurrentName().equals(ORDERED)) {
							jp.nextToken();
						} else {
							throw new QueryResultParseException(
									"Found unexpected field in results: " + jp.getCurrentName(),
									jp.getCurrentLocation().getLineNr(), jp.getCurrentLocation().getColumnNr());
						}
					}
				} else if (baseStr.equals(BOOLEAN)) {
					if (!attemptParseBoolean) {
						throw new QueryResultParseException(
								"Found boolean results when attempting to parse SPARQL Results JSON to tuple results",
								jp.getCurrentLocation().getLineNr(), jp.getCurrentLocation().getLineNr());
					}
					jp.nextToken();

					result = Boolean.parseBoolean(jp.getText());
					if (handler != null) {
						handler.handleBoolean(result);
					}
				} else {
					logger.debug("Found unexpected object in top level {} field #{}.{}", baseStr,
							jp.getCurrentLocation().getLineNr(), jp.getCurrentLocation().getColumnNr());
					// Consume the discovered unexpected object
					// (in particular, if it is either an array or a composite object).
					jp.nextToken();

					if (jp.currentToken() == JsonToken.START_ARRAY) {
						while (!(jp.getParsingContext().getParent().inRoot()
								&& (jp.currentToken() == JsonToken.END_ARRAY))) {
							if (jp.nextToken() == null) {
								throw new QueryResultParseException(
										"An array value of the unexpected " + baseStr + " field is not closed.",
										jp.getCurrentLocation().getLineNr(), jp.getCurrentLocation().getLineNr());
							}
						}
					} else if (jp.currentToken() == JsonToken.START_OBJECT) {
						while (!(jp.getParsingContext().getParent().inRoot()
								&& (jp.currentToken() == JsonToken.END_OBJECT))) {
							if (jp.nextToken() == null) {
								throw new QueryResultParseException(
										"An object value of the unexpected " + baseStr + " field is not closed.",
										jp.getCurrentLocation().getLineNr(), jp.getCurrentLocation().getLineNr());
							}
						}
					}
				}
			}
		} catch (JsonProcessingException e) {
			throw new QueryResultParseException("Could not parse SPARQL/JSON", e, e.getLocation().getLineNr(),
					e.getLocation().getLineNr());
		}

		return result;
	}

	protected Value parseValue(JsonParser jp, String bindingStr) throws IOException {
		if (jp.nextToken() != JsonToken.START_OBJECT) {
			throw new QueryResultParseException("Did not find object for binding value",
					jp.getCurrentLocation().getLineNr(),
					jp.getCurrentLocation().getColumnNr());
		}

		String lang = null;
		String type = null;
		String datatype = null;
		String value = null;

		Triple triple = null;

		while (jp.nextToken() != JsonToken.END_OBJECT) {
			if (jp.getCurrentToken() != JsonToken.FIELD_NAME) {
				throw new QueryResultParseException(
						"Did not find value attribute under " + bindingStr + " field",
						jp.getCurrentLocation().getLineNr(),
						jp.getCurrentLocation().getColumnNr());
			}
			String fieldName = jp.getCurrentName();

			// set the appropriate state variable
			if (TYPE.equals(fieldName)) {
				type = jp.nextTextValue();
				if (TRIPLE_STARDOG.equals(type)) {
					// Stardog RDF-star serialization dialect does not wrap the triple in a value object
					triple = parseStardogTripleValue(jp, type);
					// avoid reading away the next end-of-object token by jumping out of the loop.
					break;
				}
			} else if (XMLLANG.equals(fieldName)) {
				lang = jp.nextTextValue();
			} else if (DATATYPE.equals(fieldName)) {
				datatype = jp.nextTextValue();
			} else if (VALUE.equals(fieldName)) {
				if (jp.nextToken() == JsonToken.START_OBJECT) {
					triple = parseTripleValue(jp, fieldName);
					if (jp.getCurrentToken() != JsonToken.END_OBJECT) {
						throw new QueryResultParseException("Unexpected token: " + jp.getCurrentName(),
								jp.getCurrentLocation().getLineNr(),
								jp.getCurrentLocation().getColumnNr());
					}
				} else {
					value = jp.getText();
				}
			} else {
				throw new QueryResultParseException("Unexpected field name: " + fieldName,
						jp.getCurrentLocation().getLineNr(),
						jp.getCurrentLocation().getColumnNr());

			}
		}
		if (triple != null && checkTripleType(jp, type)) {
			return triple;
		}

		return parseValue(type, value, lang, datatype);
	}

	private Triple parseStardogTripleValue(JsonParser jp, String fieldName) throws IOException {
		Value subject = null, predicate = null, object = null;

		while (jp.nextToken() != JsonToken.END_OBJECT) {
			if (jp.getCurrentToken() != JsonToken.FIELD_NAME) {
				throw new QueryResultParseException("Did not find triple attribute in triple value",
						jp.getCurrentLocation().getLineNr(),
						jp.getCurrentLocation().getColumnNr());
			}
			String posName = jp.getCurrentName();
			if (SUBJECT.equals(posName)) {
				if (subject != null) {
					throw new QueryResultParseException(
							posName + " field encountered twice in triple value: ",
							jp.getCurrentLocation().getLineNr(),
							jp.getCurrentLocation().getColumnNr());
				}
				subject = parseValue(jp, fieldName + ":" + posName);
			} else if (PREDICATE.equals(posName)) {
				if (predicate != null) {
					throw new QueryResultParseException(
							posName + " field encountered twice in triple value: ",
							jp.getCurrentLocation().getLineNr(),
							jp.getCurrentLocation().getColumnNr());
				}
				predicate = parseValue(jp, fieldName + ":" + posName);
			} else if (OBJECT.equals(posName)) {
				if (object != null) {
					throw new QueryResultParseException(
							posName + " field encountered twice in triple value: ",
							jp.getCurrentLocation().getLineNr(),
							jp.getCurrentLocation().getColumnNr());
				}
				object = parseValue(jp, fieldName + ":" + posName);
			} else if ("g".equals(posName)) {
				// silently ignore named graph field in Stardog dialect
				parseValue(jp, fieldName + ":" + posName);
			} else {
				throw new QueryResultParseException("Unexpected field name in triple value: " + posName,
						jp.getCurrentLocation().getLineNr(),
						jp.getCurrentLocation().getColumnNr());
			}
		}

		if (subject instanceof Resource && predicate instanceof IRI && object != null) {
			return valueFactory.createTriple((Resource) subject, (IRI) predicate, object);
		} else {
			throw new QueryResultParseException("Incomplete or invalid triple value",
					jp.getCurrentLocation().getLineNr(),
					jp.getCurrentLocation().getColumnNr());
		}
	}

	protected Triple parseTripleValue(JsonParser jp, String fieldName) throws IOException {
		throw new QueryResultParseException("Unexpected object as value", jp.getCurrentLocation().getLineNr(),
				jp.getCurrentLocation().getColumnNr());
	}

	protected boolean checkTripleType(JsonParser jp, String type) {
		throw new IllegalStateException();
	}

	/**
	 * Parse a value out of the elements for a binding.
	 *
	 * @param type     {@link #LITERAL}, {@link #TYPED_LITERAL}, {@link #BNODE} or {@link #URI}
	 * @param value    actual value text
	 * @param language language tag, if applicable
	 * @param datatype datatype tag, if applicable
	 * @return the value corresponding to the given parameters
	 */
	private Value parseValue(String type, String value, String language, String datatype) {
		logger.trace("type: {}", type);
		logger.trace("value: {}", value);
		logger.trace("language: {}", language);
		logger.trace("datatype: {}", datatype);

		Value result = null;

		if (type.equals(LITERAL) || type.equals(TYPED_LITERAL)) {
			if (language != null) {
				result = valueFactory.createLiteral(value, language);
			} else if (datatype != null) {
				IRI datatypeIri;
				datatypeIri = valueFactory.createIRI(datatype);

				// For broken SPARQL endpoints which return LANGSTRING without a language, fall back
				// to using STRING as the datatype
				if (RDF.LANGSTRING.equals(datatypeIri)) {
					logger.debug(
							"rdf:langString typed literal missing language tag: '{}'. Falling back to xsd:string.",
							StringUtils.abbreviate(value, 10)
					);
					datatypeIri = XSD.STRING;
				}

				result = valueFactory.createLiteral(value, datatypeIri);
			} else {
				result = valueFactory.createLiteral(value);
			}
		} else if (type.equals(BNODE)) {
			result = valueFactory.createBNode(value);
		} else if (type.equals(URI)) {
			result = valueFactory.createIRI(value);
		}

		logger.debug("result value: {}", result);

		return result;
	}

	@Override
	public Collection<RioSetting<?>> getSupportedSettings() {
		Collection<RioSetting<?>> result = new HashSet<>(super.getSupportedSettings());

		result.add(JSONSettings.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER);
		result.add(JSONSettings.ALLOW_COMMENTS);
		result.add(JSONSettings.ALLOW_NON_NUMERIC_NUMBERS);
		result.add(JSONSettings.ALLOW_NUMERIC_LEADING_ZEROS);
		result.add(JSONSettings.ALLOW_SINGLE_QUOTES);
		result.add(JSONSettings.ALLOW_UNQUOTED_CONTROL_CHARS);
		result.add(JSONSettings.ALLOW_UNQUOTED_FIELD_NAMES);
		result.add(JSONSettings.ALLOW_YAML_COMMENTS);
		result.add(JSONSettings.ALLOW_TRAILING_COMMA);
		result.add(JSONSettings.INCLUDE_SOURCE_IN_LOCATION);
		result.add(JSONSettings.STRICT_DUPLICATE_DETECTION);

		return result;
	}

	/**
	 * Get an instance of JsonFactory configured using the settings from {@link #getParserConfig()}.
	 *
	 * @return A newly configured JsonFactory based on the currently enabled settings
	 */
	private JsonFactory configureNewJsonFactory() {
		final JsonFactoryBuilder builder = new JsonFactoryBuilder();
		// Disable features that may work for most JSON where the field names are
		// in limited supply,
		// but does not work for SPARQL/JSON where a wide range of URIs are used for
		// subjects and predicates
		builder.disable(JsonFactory.Feature.INTERN_FIELD_NAMES);
		builder.disable(JsonFactory.Feature.CANONICALIZE_FIELD_NAMES);
		builder.disable(StreamWriteFeature.AUTO_CLOSE_TARGET);

		if (getParserConfig().isSet(JSONSettings.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER)) {
			builder.configure(JsonReadFeature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER,
					getParserConfig().get(JSONSettings.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER));
		}
		if (getParserConfig().isSet(JSONSettings.ALLOW_COMMENTS)) {
			builder.configure(JsonReadFeature.ALLOW_JAVA_COMMENTS,
					getParserConfig().get(JSONSettings.ALLOW_COMMENTS));
		}
		if (getParserConfig().isSet(JSONSettings.ALLOW_NON_NUMERIC_NUMBERS)) {
			builder.configure(JsonReadFeature.ALLOW_NON_NUMERIC_NUMBERS,
					getParserConfig().get(JSONSettings.ALLOW_NON_NUMERIC_NUMBERS));
		}
		if (getParserConfig().isSet(JSONSettings.ALLOW_NUMERIC_LEADING_ZEROS)) {
			builder.configure(JsonReadFeature.ALLOW_LEADING_ZEROS_FOR_NUMBERS,
					getParserConfig().get(JSONSettings.ALLOW_NUMERIC_LEADING_ZEROS));
		}
		if (getParserConfig().isSet(JSONSettings.ALLOW_SINGLE_QUOTES)) {
			builder.configure(JsonReadFeature.ALLOW_SINGLE_QUOTES,
					getParserConfig().get(JSONSettings.ALLOW_SINGLE_QUOTES));
		}
		if (getParserConfig().isSet(JSONSettings.ALLOW_UNQUOTED_CONTROL_CHARS)) {
			builder.configure(JsonReadFeature.ALLOW_UNESCAPED_CONTROL_CHARS,
					getParserConfig().get(JSONSettings.ALLOW_UNQUOTED_CONTROL_CHARS));
		}
		if (getParserConfig().isSet(JSONSettings.ALLOW_UNQUOTED_FIELD_NAMES)) {
			builder.configure(JsonReadFeature.ALLOW_UNQUOTED_FIELD_NAMES,
					getParserConfig().get(JSONSettings.ALLOW_UNQUOTED_FIELD_NAMES));
		}
		if (getParserConfig().isSet(JSONSettings.ALLOW_YAML_COMMENTS)) {
			builder.configure(JsonReadFeature.ALLOW_YAML_COMMENTS,
					getParserConfig().get(JSONSettings.ALLOW_YAML_COMMENTS));
		}
		if (getParserConfig().isSet(JSONSettings.ALLOW_TRAILING_COMMA)) {
			builder.configure(JsonReadFeature.ALLOW_TRAILING_COMMA,
					getParserConfig().get(JSONSettings.ALLOW_TRAILING_COMMA));
		}
		if (getParserConfig().isSet(JSONSettings.INCLUDE_SOURCE_IN_LOCATION)) {
			builder.configure(StreamReadFeature.INCLUDE_SOURCE_IN_LOCATION,
					getParserConfig().get(JSONSettings.INCLUDE_SOURCE_IN_LOCATION));
		}
		if (getParserConfig().isSet(JSONSettings.STRICT_DUPLICATE_DETECTION)) {
			builder.configure(StreamReadFeature.STRICT_DUPLICATE_DETECTION,
					getParserConfig().get(JSONSettings.STRICT_DUPLICATE_DETECTION));
		}
		return builder.build();
	}
}