EvaluationStatistics.java

/*******************************************************************************
 * Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others.
 *
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Distribution License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/org/documents/edl-v10.php.
 *
 * SPDX-License-Identifier: BSD-3-Clause
 *******************************************************************************/
package org.eclipse.rdf4j.query.algebra.evaluation.impl;

import java.util.Collection;
import java.util.UUID;
import java.util.concurrent.atomic.AtomicLong;

import org.eclipse.rdf4j.query.algebra.AbstractQueryModelNode;
import org.eclipse.rdf4j.query.algebra.ArbitraryLengthPath;
import org.eclipse.rdf4j.query.algebra.BinaryTupleOperator;
import org.eclipse.rdf4j.query.algebra.BindingSetAssignment;
import org.eclipse.rdf4j.query.algebra.EmptySet;
import org.eclipse.rdf4j.query.algebra.Join;
import org.eclipse.rdf4j.query.algebra.LeftJoin;
import org.eclipse.rdf4j.query.algebra.QueryModelNode;
import org.eclipse.rdf4j.query.algebra.QueryRoot;
import org.eclipse.rdf4j.query.algebra.Service;
import org.eclipse.rdf4j.query.algebra.SingletonSet;
import org.eclipse.rdf4j.query.algebra.StatementPattern;
import org.eclipse.rdf4j.query.algebra.TripleRef;
import org.eclipse.rdf4j.query.algebra.TupleExpr;
import org.eclipse.rdf4j.query.algebra.UnaryTupleOperator;
import org.eclipse.rdf4j.query.algebra.Var;
import org.eclipse.rdf4j.query.algebra.ZeroLengthPath;
import org.eclipse.rdf4j.query.algebra.helpers.AbstractQueryModelVisitor;
import org.eclipse.rdf4j.query.algebra.helpers.AbstractSimpleQueryModelVisitor;

/**
 * Supplies various query model statistics to the query engine/optimizer.
 *
 * @author Arjohn Kampman
 * @author James Leigh
 */
public class EvaluationStatistics {

	// static UUID as prefix together with a thread safe incrementing long ensures a unique identifier.
	private final static String uniqueIdPrefix = UUID.randomUUID().toString().replace("-", "");
	private final static AtomicLong uniqueIdSuffix = new AtomicLong();

	private CardinalityCalculator calculator;

	public double getCardinality(TupleExpr expr) {
		if (calculator == null) {
			calculator = createCardinalityCalculator();
			assert calculator != null;
		}

		if (expr instanceof AbstractQueryModelNode && ((AbstractQueryModelNode) expr).isCardinalitySet()) {
			return ((AbstractQueryModelNode) expr).getCardinality();
		}

		expr.visit(calculator);
		return calculator.getCardinality();
	}

	protected CardinalityCalculator createCardinalityCalculator() {
		return new CardinalityCalculator();
	}

	/*-----------------------------------*
	 * Inner class CardinalityCalculator *
	 *-----------------------------------*/

	protected static class CardinalityCalculator extends AbstractQueryModelVisitor<RuntimeException> {

		private static final double VAR_CARDINALITY = 10;
		private static final double UNBOUND_SERVICE_CARDINALITY = 100000;

		protected double cardinality;

		public double getCardinality() {
			return cardinality;
		}

		@Override
		public void meet(EmptySet node) {
			// no binding sets
			cardinality = 0.0;
		}

		@Override
		public void meet(SingletonSet node) {
			// one empty binding set
			cardinality = 1.0;
		}

		@Override
		public void meet(BindingSetAssignment node) {
			cardinality = getCardinalityInternal(node);
		}

		@Override
		public void meet(ZeroLengthPath node) {
			Var subjVar = node.getSubjectVar();
			Var objVar = node.getObjectVar();
			if (subjVar != null && subjVar.hasValue() || objVar != null && objVar.hasValue()) {
				// subj = obj
				cardinality = 1.0;
			} else {
				// actual cardinality = count(union(subjs, objs))
				// but cost is equivalent to ?s ?p ?o ?c (impl scans all statements)
				// so due to the lower actual cardinality we value it in preference to a fully unbound statement
				// pattern.
				cardinality = getSubjectCardinality(subjVar) * getObjectCardinality(objVar)
						* getContextCardinality(node.getContextVar());
			}
		}

		@Override
		public void meet(ArbitraryLengthPath node) {
			final Var pathVar = new Var("_anon_" + uniqueIdPrefix + uniqueIdSuffix.incrementAndGet(), true);
			// cardinality of ALP is determined based on the cost of a
			// single ?s ?p ?o ?c pattern where ?p is unbound, compensating for the fact that
			// the length of the path is unknown but expected to be _at least_ twice that of a normal
			// statement pattern.
			cardinality = 2.0 * getCardinalityInternal(new StatementPattern(node.getSubjectVar().clone(), pathVar,
					node.getObjectVar().clone(), node.getContextVar() != null ? node.getContextVar().clone() : null));
		}

		@Override
		public void meet(Service node) {
			if (!node.getServiceRef().hasValue()) {
				// the IRI is not available, may be computed in the course of the
				// query
				// => use high cost to order the SERVICE node late in the query plan
				cardinality = UNBOUND_SERVICE_CARDINALITY;
			} else {
				ServiceNodeAnalyzer serviceAnalyzer = new ServiceNodeAnalyzer();
				node.visitChildren(serviceAnalyzer);
				int count = serviceAnalyzer.getStatementCount();

				// more than one free variable in a single triple pattern
				if (count == 1 && node.getServiceVars().size() > 1) {
					cardinality = 100 + node.getServiceVars().size(); // TODO (should
					// be higher
					// than other
					// simple
					// stmts)
				} else {
					// only very selective statements should be better than this
					// => evaluate service expressions first
					cardinality = 1 + node.getServiceVars().size() * 0.1;
				}
			}
		}

		@Override
		public void meet(StatementPattern sp) {
			cardinality = getCardinalityInternal(sp);
		}

		@Override
		public void meet(TripleRef tripleRef) {
			cardinality = getCardinalityInternal(tripleRef);
		}

		private double getCardinalityInternal(StatementPattern node) {
			if (!node.isCardinalitySet()) {
				node.setCardinality(getCardinality(node));
			}
			return node.getCardinality();
		}

		private double getCardinalityInternal(TripleRef node) {
			if (!node.isCardinalitySet()) {
				node.setCardinality(getCardinality(node));
			}
			return node.getCardinality();
		}

		private double getCardinalityInternal(BindingSetAssignment node) {
			if (!node.isCardinalitySet()) {
				node.setCardinality(getCardinality(node));
			}
			return node.getCardinality();
		}

		protected double getCardinality(StatementPattern sp) {
			return getSubjectCardinality(sp) * getPredicateCardinality(sp) * getObjectCardinality(sp)
					* getContextCardinality(sp);
		}

		protected double getCardinality(BindingSetAssignment bindingSetAssignment) {
			// actual cardinality is node.getBindingSets().size() binding sets
			// but cost is cheap as we don't need to query the triple store
			// so effective cardinality is 1 or a very slowly increasing function of node.getBindingSets().size().
			return 1.0;
		}

		protected double getCardinality(TripleRef tripleRef) {
			return getSubjectCardinality(tripleRef.getSubjectVar())
					* getPredicateCardinality(tripleRef.getPredicateVar())
					* getObjectCardinality(tripleRef.getObjectVar());
		}

		/**
		 * Override this if you are able to determine the cardinality based not only on the subjectVar itself but also
		 * the other vars (e.g. the predicate value might determine a subject subset).
		 */
		protected double getSubjectCardinality(StatementPattern sp) {
			return getSubjectCardinality(sp.getSubjectVar());
		}

		protected double getSubjectCardinality(Var var) {
			return getCardinality(VAR_CARDINALITY, var);
		}

		/**
		 * Override this if you are able to determine the cardinality based not only on the predicateVar itself but also
		 * the other vars (e.g. the subject value might determine a predicate subset).
		 */
		protected double getPredicateCardinality(StatementPattern sp) {
			return getPredicateCardinality(sp.getPredicateVar());
		}

		protected double getPredicateCardinality(Var var) {
			return getCardinality(VAR_CARDINALITY, var);
		}

		/**
		 * Override this if you are able to determine the cardinality based not only on the objectVar itself but also
		 * the other vars (e.g. the predicate value might determine an object subset).
		 */
		protected double getObjectCardinality(StatementPattern sp) {
			return getObjectCardinality(sp.getObjectVar());
		}

		protected double getObjectCardinality(Var var) {
			return getCardinality(VAR_CARDINALITY, var);
		}

		/**
		 * Override this if you are able to determine the cardinality based not only on the contextVar itself but also
		 * the other vars (e.g. the subject value might determine a context subset).
		 */
		protected double getContextCardinality(StatementPattern sp) {
			return getContextCardinality(sp.getContextVar());
		}

		protected double getContextCardinality(Var var) {
			return getCardinality(VAR_CARDINALITY, var);
		}

		protected double getCardinality(double varCardinality, Var var) {
			return var == null || var.hasValue() ? 1.0 : varCardinality;
		}

		protected double getCardinality(double varCardinality, Collection<Var> vars) {
			int constantVarCount = countConstantVars(vars);
			double unboundVarFactor = vars.size() - constantVarCount;
			return Math.pow(varCardinality, unboundVarFactor);
		}

		protected int countConstantVars(Iterable<Var> vars) {
			int constantVarCount = 0;

			for (Var var : vars) {
				if (var.hasValue()) {
					constantVarCount++;
				}
			}

			return constantVarCount;
		}

		@Override
		public void meet(Join node) {
			node.getLeftArg().visit(this);
			double leftArgCost = this.cardinality;

			node.getRightArg().visit(this);
			cardinality *= leftArgCost;
		}

		@Override
		public void meet(LeftJoin node) {
			node.getLeftArg().visit(this);
			double leftArgCost = this.cardinality;

			node.getRightArg().visit(this);
			cardinality *= leftArgCost;
		}

		@Override
		protected void meetBinaryTupleOperator(BinaryTupleOperator node) {
			node.getLeftArg().visit(this);
			double leftArgCost = this.cardinality;

			node.getRightArg().visit(this);
			cardinality += leftArgCost;
		}

		@Override
		protected void meetUnaryTupleOperator(UnaryTupleOperator node) {
			node.getArg().visit(this);
		}

		@Override
		public void meet(QueryRoot node) {
			node.getArg().visit(this);
		}

		@Override
		protected void meetNode(QueryModelNode node) {
			throw new IllegalArgumentException("Unhandled node type: " + node.getClass());
		}
	}

	// count the number of triple patterns
	private static class ServiceNodeAnalyzer extends AbstractSimpleQueryModelVisitor<RuntimeException> {

		private int count = 0;

		private ServiceNodeAnalyzer() {
			super(true);
		}

		public int getStatementCount() {
			return count;
		}

		@Override
		public void meet(StatementPattern node) throws RuntimeException {
			count++;
		}
	}

}