QueryEvaluationUtility.java
/*******************************************************************************
* Copyright (c) 2022 Eclipse RDF4J contributors.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
package org.eclipse.rdf4j.query.algebra.evaluation.util;
import java.util.Objects;
import javax.xml.datatype.DatatypeConstants;
import javax.xml.datatype.Duration;
import javax.xml.datatype.XMLGregorianCalendar;
import org.eclipse.rdf4j.common.annotation.InternalUseOnly;
import org.eclipse.rdf4j.model.Literal;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.base.CoreDatatype;
import org.eclipse.rdf4j.model.datatypes.XMLDatatypeUtil;
import org.eclipse.rdf4j.model.util.Literals;
import org.eclipse.rdf4j.query.algebra.Compare.CompareOp;
/**
* This class will take over for QueryEvaluationUtil. Currently marked as InternalUseOnly because there may still be
* changes to how this class works.
*
* @author Arjohn Kampman
* @author H��vard M. Ottestad
*/
@InternalUseOnly()
public class QueryEvaluationUtility {
/**
* Determines the effective boolean value (EBV) of the supplied value as defined in the
* <a href="http://www.w3.org/TR/rdf-sparql-query/#ebv">SPARQL specification</a>:
* <ul>
* <li>The EBV of any literal whose type is CoreDatatype.XSD:boolean or numeric is false if the lexical form is not
* valid for that datatype (e.g. "abc"^^xsd:integer).
* <li>If the argument is a typed literal with a datatype of CoreDatatype.XSD:boolean, the EBV is the value of that
* argument.
* <li>If the argument is a plain literal or a typed literal with a datatype of CoreDatatype.XSD:string, the EBV is
* false if the operand value has zero length; otherwise the EBV is true.
* <li>If the argument is a numeric type or a typed literal with a datatype derived from a numeric type, the EBV is
* false if the operand value is NaN or is numerically equal to zero; otherwise the EBV is true.
* <li>All other arguments, including unbound arguments, produce a type error.
* </ul>
*
* @param value Some value.
* @return The EBV of <var>value</var>.
*/
public static Result getEffectiveBooleanValue(Value value) {
if (value.isLiteral()) {
Literal literal = (Literal) value;
String label = literal.getLabel();
CoreDatatype.XSD datatype = literal.getCoreDatatype().asXSDDatatypeOrNull();
if (datatype == CoreDatatype.XSD.STRING) {
return Result.fromBoolean(!label.isEmpty());
} else if (datatype == CoreDatatype.XSD.BOOLEAN) {
// also false for illegal values
return Result.fromBoolean("true".equals(label) || "1".equals(label));
} else if (datatype == CoreDatatype.XSD.DECIMAL) {
try {
String normDec = XMLDatatypeUtil.normalizeDecimal(label);
return Result.fromBoolean(!normDec.equals("0.0"));
} catch (IllegalArgumentException e) {
return Result.fromBoolean(false);
}
} else if (datatype != null && datatype.isIntegerDatatype()) {
try {
String normInt = XMLDatatypeUtil.normalize(label, datatype);
return Result.fromBoolean(!normInt.equals("0"));
} catch (IllegalArgumentException e) {
return Result.fromBoolean(false);
}
} else if (datatype != null && datatype.isFloatingPointDatatype()) {
try {
String normFP = XMLDatatypeUtil.normalize(label, datatype);
return Result.fromBoolean(!normFP.equals("0.0E0") && !normFP.equals("NaN"));
} catch (IllegalArgumentException e) {
return Result.fromBoolean(false);
}
}
}
return Result.incompatibleValueExpression;
}
public static Result compare(Value leftVal, Value rightVal, CompareOp operator) {
return compare(leftVal, rightVal, operator, true);
}
public static Result compare(Value leftVal, Value rightVal, CompareOp operator, boolean strict) {
if (leftVal.isLiteral() && rightVal.isLiteral()) {
// Both left and right argument is a Literal
return compareLiterals((Literal) leftVal, (Literal) rightVal, operator, strict);
} else {
// All other value combinations
switch (operator) {
case EQ:
return Result.fromBoolean(Objects.equals(leftVal, rightVal));
case NE:
return Result.fromBoolean(!Objects.equals(leftVal, rightVal));
default:
return Result.incompatibleValueExpression;
}
}
}
/**
* Compares the supplied {@link Literal} arguments using the supplied operator, using strict (minimally-conforming)
* SPARQL 1.1 operator behavior.
*
* @param leftLit the left literal argument of the comparison.
* @param rightLit the right literal argument of the comparison.
* @param operator the comparison operator to use.
* @return {@code true} if execution of the supplied operator on the supplied arguments succeeds, {@code false}
* otherwise.
*/
public static Result compareLiterals(Literal leftLit, Literal rightLit, CompareOp operator) {
return compareLiterals(leftLit, rightLit, operator, true);
}
public static Order compareLiterals(Literal leftLit, Literal rightLit, boolean strict) {
// type precendence:
// - simple literal
// - numeric
// - CoreDatatype.XSD:boolean
// - CoreDatatype.XSD:dateTime
// - CoreDatatype.XSD:string
// - RDF term (equal and unequal only)
CoreDatatype leftCoreDatatype = leftLit.getCoreDatatype();
CoreDatatype rightCoreDatatype = rightLit.getCoreDatatype();
boolean leftLangLit = leftCoreDatatype == CoreDatatype.RDF.LANGSTRING;
boolean rightLangLit = rightCoreDatatype == CoreDatatype.RDF.LANGSTRING;
CoreDatatype.XSD leftXSDDatatype = leftCoreDatatype.asXSDDatatype().orElse(null);
CoreDatatype.XSD rightXSDDatatype = rightCoreDatatype.asXSDDatatype().orElse(null);
// for purposes of query evaluation in SPARQL, simple literals and string-typed literals with the same lexical
// value are considered equal.
if (leftCoreDatatype == CoreDatatype.XSD.STRING && rightCoreDatatype == CoreDatatype.XSD.STRING) {
return Order.from(leftLit.getLabel().compareTo(rightLit.getLabel()));
} else if (!(leftLangLit || rightLangLit)) {
CoreDatatype.XSD commonDatatype = getCommonDatatype(strict, leftXSDDatatype, rightXSDDatatype);
if (commonDatatype != null) {
try {
Order order = handleCommonDatatype(leftLit, rightLit, strict, leftXSDDatatype, rightXSDDatatype,
leftLangLit, rightLangLit, commonDatatype);
if (order == Order.illegalArgument) {
if (leftLit.equals(rightLit)) {
return Order.equal;
}
}
if (order != null) {
return order;
}
} catch (IllegalArgumentException e) {
if (leftLit.equals(rightLit)) {
return Order.equal;
}
}
}
}
// All other cases, e.g. literals with languages, unequal or
// unordered datatypes, etc. These arguments can only be compared
// using the operators 'EQ' and 'NE'. See SPARQL's RDFterm-equal
// operator
return otherCases(leftLit, rightLit, leftXSDDatatype, rightXSDDatatype, leftLangLit, rightLangLit);
}
/**
* Compares the supplied {@link Literal} arguments using the supplied operator.
*
* @param leftLit the left literal argument of the comparison.
* @param rightLit the right literal argument of the comparison.
* @param operator the comparison operator to use.
* @param strict boolean indicating whether comparison should use strict (minimally-conforming) SPARQL 1.1
* operator behavior, or extended behavior.
* @return {@code true} if execution of the supplied operator on the supplied arguments succeeds, {@code false}
* otherwise.
*/
public static Result compareLiterals(Literal leftLit, Literal rightLit, CompareOp operator, boolean strict) {
Order order = compareLiterals(leftLit, rightLit, strict);
return order.toResult(operator);
}
private static Order handleCommonDatatype(Literal leftLit, Literal rightLit, boolean strict,
CoreDatatype.XSD leftCoreDatatype, CoreDatatype.XSD rightCoreDatatype, boolean leftLangLit,
boolean rightLangLit, CoreDatatype.XSD commonDatatype) {
if (commonDatatype == CoreDatatype.XSD.DOUBLE) {
return Order.from(Double.compare(leftLit.doubleValue(), rightLit.doubleValue()));
} else if (commonDatatype == CoreDatatype.XSD.FLOAT) {
return Order.from(Float.compare(leftLit.floatValue(), rightLit.floatValue()));
} else if (commonDatatype == CoreDatatype.XSD.DECIMAL) {
return Order.from(leftLit.decimalValue().compareTo(rightLit.decimalValue()));
} else if (commonDatatype.isIntegerDatatype()) {
return Order.from(leftLit.integerValue().compareTo(rightLit.integerValue()));
} else if (commonDatatype == CoreDatatype.XSD.BOOLEAN) {
return Order.from(Boolean.compare(leftLit.booleanValue(), rightLit.booleanValue()));
} else if (commonDatatype.isCalendarDatatype()) {
XMLGregorianCalendar left = leftLit.calendarValue();
XMLGregorianCalendar right = rightLit.calendarValue();
int compare = left.compare(right);
// Note: XMLGregorianCalendar.compare() returns compatible values (-1, 0, 1) but INDETERMINATE
// needs special treatment
if (compare == DatatypeConstants.INDETERMINATE) {
// If we compare two CoreDatatype.XSD:dateTime we should use the specific comparison specified in SPARQL
// 1.1
if (leftCoreDatatype == CoreDatatype.XSD.DATETIME && rightCoreDatatype == CoreDatatype.XSD.DATETIME) {
return Order.incompatibleValueExpression;
}
} else {
return Order.from(compare);
}
} else if (!strict && commonDatatype.isDurationDatatype()) {
Duration left = XMLDatatypeUtil.parseDuration(leftLit.getLabel());
Duration right = XMLDatatypeUtil.parseDuration(rightLit.getLabel());
int compare = left.compare(right);
if (compare != DatatypeConstants.INDETERMINATE) {
return Order.from(compare);
} else {
return otherCases(leftLit, rightLit, leftCoreDatatype, rightCoreDatatype, leftLangLit, rightLangLit);
}
} else if (commonDatatype == CoreDatatype.XSD.STRING) {
return Order.from(leftLit.getLabel().compareTo(rightLit.getLabel()));
}
return null;
}
private static Order otherCases(Literal leftLit, Literal rightLit, CoreDatatype.XSD leftCoreDatatype,
CoreDatatype.XSD rightCoreDatatype, boolean leftLangLit, boolean rightLangLit) {
boolean literalsEqual = leftLit.equals(rightLit);
if (!literalsEqual) {
if (!leftLangLit && !rightLangLit && isSupportedDatatype(leftCoreDatatype)
&& isSupportedDatatype(rightCoreDatatype)) {
// left and right arguments have incompatible but supported datatypes
// we need to check that the lexical-to-value mapping for both datatypes succeeds
if (!XMLDatatypeUtil.isValidValue(leftLit.getLabel(), leftCoreDatatype)) {
return Order.incompatibleValueExpression;
}
if (!XMLDatatypeUtil.isValidValue(rightLit.getLabel(), rightCoreDatatype)) {
return Order.incompatibleValueExpression;
}
boolean leftString = leftCoreDatatype == CoreDatatype.XSD.STRING;
boolean leftNumeric = leftCoreDatatype.isNumericDatatype();
boolean leftDate = leftCoreDatatype.isCalendarDatatype();
boolean rightString = rightCoreDatatype == CoreDatatype.XSD.STRING;
boolean rightNumeric = rightCoreDatatype.isNumericDatatype();
boolean rightDate = rightCoreDatatype.isCalendarDatatype();
if (leftString != rightString) {
return Order.incompatibleValueExpression;
}
if (leftNumeric != rightNumeric) {
return Order.incompatibleValueExpression;
}
if (leftDate != rightDate) {
return Order.incompatibleValueExpression;
}
} else if (!leftLangLit && !rightLangLit) {
// For literals with unsupported datatypes we don't know if their values are equal
return Order.incompatibleValueExpression;
}
}
if (literalsEqual) {
return Order.equal;
}
return Order.notEqual;
}
private static CoreDatatype.XSD getCommonDatatype(boolean strict, CoreDatatype.XSD leftCoreDatatype,
CoreDatatype.XSD rightCoreDatatype) {
if (leftCoreDatatype != null && rightCoreDatatype != null) {
if (leftCoreDatatype == rightCoreDatatype) {
return leftCoreDatatype;
} else if (leftCoreDatatype.isNumericDatatype() && rightCoreDatatype.isNumericDatatype()) {
// left and right arguments have different datatypes, try to find a more general, shared datatype
if (leftCoreDatatype == CoreDatatype.XSD.DOUBLE || rightCoreDatatype == CoreDatatype.XSD.DOUBLE) {
return CoreDatatype.XSD.DOUBLE;
} else if (leftCoreDatatype == CoreDatatype.XSD.FLOAT || rightCoreDatatype == CoreDatatype.XSD.FLOAT) {
return CoreDatatype.XSD.FLOAT;
} else if (leftCoreDatatype == CoreDatatype.XSD.DECIMAL
|| rightCoreDatatype == CoreDatatype.XSD.DECIMAL) {
return CoreDatatype.XSD.DECIMAL;
} else {
return CoreDatatype.XSD.INTEGER;
}
} else if (!strict && leftCoreDatatype.isCalendarDatatype() && rightCoreDatatype.isCalendarDatatype()) {
// We're not running in strict eval mode so we use extended datatype comparsion.
return CoreDatatype.XSD.DATETIME;
} else if (!strict && leftCoreDatatype.isDurationDatatype() && rightCoreDatatype.isDurationDatatype()) {
return CoreDatatype.XSD.DURATION;
}
}
return null;
}
/**
* Checks whether the supplied value is a "plain literal". A "plain literal" is a literal with no datatype and
* optionally a language tag.
*
* @see <a href="http://www.w3.org/TR/2004/REC-rdf-concepts-20040210/#dfn-plain-literal">RDF Literal
* Documentation</a>
*/
public static boolean isPlainLiteral(Value v) {
if (v.isLiteral()) {
return isPlainLiteral((Literal) v);
}
return false;
}
public static boolean isPlainLiteral(Literal l) {
assert l.getLanguage().isEmpty() || l.getCoreDatatype() == CoreDatatype.RDF.LANGSTRING;
return l.getCoreDatatype() == CoreDatatype.XSD.STRING || l.getCoreDatatype() == CoreDatatype.RDF.LANGSTRING;
}
/**
* Checks whether the supplied value is a "simple literal". A "simple literal" is a literal with no language tag nor
* datatype.
*
* @see <a href="http://www.w3.org/TR/sparql11-query/#simple_literal">SPARQL Simple Literal Documentation</a>
*/
public static boolean isSimpleLiteral(Value v) {
if (v.isLiteral()) {
return isSimpleLiteral((Literal) v);
}
return false;
}
// public static boolean isPlainLiteral(Literal l) {
// return l.getCoreDatatype().filter(d -> d == CoreDatatype.XSD.STRING).isPresent();
//// return l.getCoreDatatype().orElse(null) == CoreDatatype.XSD.STRING;
// }
/**
* Checks whether the supplied literal is a "simple literal". A "simple literal" is a literal with no language tag
* and the datatype {@link CoreDatatype.XSD#STRING}.
*
* @see <a href="http://www.w3.org/TR/sparql11-query/#simple_literal">SPARQL Simple Literal Documentation</a>
*/
public static boolean isSimpleLiteral(Literal l) {
return l.getCoreDatatype() == CoreDatatype.XSD.STRING;
}
/**
* Checks whether the supplied literal is a "simple literal". A "simple literal" is a literal with no language tag
* and the datatype {@link CoreDatatype.XSD#STRING}.
*
* @see <a href="http://www.w3.org/TR/sparql11-query/#simple_literal">SPARQL Simple Literal Documentation</a>
*/
public static boolean isSimpleLiteral(boolean isLang, CoreDatatype datatype) {
return datatype == CoreDatatype.XSD.STRING;
}
/**
* Checks whether the supplied literal is a "string literal". A "string literal" is either a simple literal, a plain
* literal with language tag, or a literal with datatype CoreDatatype.XSD:string.
*
* @see <a href="http://www.w3.org/TR/sparql11-query/#func-string">SPARQL Functions on Strings Documentation</a>
*/
public static boolean isStringLiteral(Value v) {
if (v.isLiteral()) {
return isStringLiteral((Literal) v);
}
return false;
}
/**
* Checks whether the supplied two literal arguments are 'argument compatible' according to the SPARQL definition.
*
* @param arg1 the first argument
* @param arg2 the second argument
* @return true iff the two supplied arguments are argument compatible, false otherwise
* @see <a href="http://www.w3.org/TR/sparql11-query/#func-arg-compatibility">SPARQL Argument Compatibility
* Rules</a>
*/
public static boolean compatibleArguments(Literal arg1, Literal arg2) {
// 1. The arguments are literals typed as CoreDatatype.XSD:string
// 2. The arguments are language literals with identical language tags
// 3. The first argument is a language literal and the second
// argument is a literal typed as CoreDatatype.XSD:string
return isSimpleLiteral(arg1) && isSimpleLiteral(arg2)
|| Literals.isLanguageLiteral(arg1) && Literals.isLanguageLiteral(arg2)
&& arg1.getLanguage().equals(arg2.getLanguage())
|| Literals.isLanguageLiteral(arg1) && isSimpleLiteral(arg2);
}
/**
* Checks whether the supplied literal is a "string literal". A "string literal" is either a simple literal, a plain
* literal with language tag, or a literal with datatype CoreDatatype.XSD:string.
*
* @see <a href="http://www.w3.org/TR/sparql11-query/#func-string">SPARQL Functions on Strings Documentation</a>
*/
public static boolean isStringLiteral(Literal l) {
return l.getCoreDatatype() == CoreDatatype.XSD.STRING || l.getCoreDatatype() == CoreDatatype.RDF.LANGSTRING;
}
private static boolean isSupportedDatatype(CoreDatatype.XSD datatype) {
return datatype != null && (datatype == CoreDatatype.XSD.STRING || datatype.isNumericDatatype()
|| datatype.isCalendarDatatype());
}
public enum Result {
_true(true),
_false(false),
incompatibleValueExpression(),
illegalArgument();
static Result fromBoolean(boolean b) {
if (b) {
return _true;
}
return _false;
}
private final boolean value;
private final boolean isIncompatible;
Result(boolean value) {
this.value = value;
isIncompatible = false;
}
Result() {
isIncompatible = true;
value = false;
}
public boolean orElse(boolean alternative) {
if (this == incompatibleValueExpression) {
return alternative;
} else if (this == illegalArgument) {
throw new IllegalStateException("IllegalArgument needs to be handled");
}
return value;
}
}
enum Order {
smaller(-1),
greater(1),
equal(0),
notEqual(0),
incompatibleValueExpression(0),
illegalArgument(0);
private final int value;
Order(int value) {
this.value = value;
}
public static Order from(int value) {
if (value < 0) {
return smaller;
}
if (value > 0) {
return greater;
}
return equal;
}
public int asInt() {
if (!isValid() && this != notEqual) {
throw new IllegalStateException();
}
return value;
}
public boolean isValid() {
return !(this == incompatibleValueExpression || this == illegalArgument);
}
public Result toResult(CompareOp operator) {
if (!isValid()) {
if (this == incompatibleValueExpression) {
return Result.incompatibleValueExpression;
}
if (this == illegalArgument) {
return Result.illegalArgument;
}
}
if (this == notEqual) {
switch (operator) {
case EQ:
return Result._false;
case NE:
return Result._true;
case LT:
case LE:
case GE:
case GT:
return Result.incompatibleValueExpression;
default:
return Result.illegalArgument;
}
}
switch (operator) {
case LT:
return Result.fromBoolean(value < 0);
case LE:
return Result.fromBoolean(value <= 0);
case EQ:
return Result.fromBoolean(value == 0);
case NE:
return Result.fromBoolean(value != 0);
case GE:
return Result.fromBoolean(value >= 0);
case GT:
return Result.fromBoolean(value > 0);
default:
return Result.illegalArgument;
}
}
}
}