ControlledWorkerBoundJoin.java

/*******************************************************************************
 * Copyright (c) 2019 Eclipse RDF4J contributors.
 *
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Distribution License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/org/documents/edl-v10.php.
 *
 * SPDX-License-Identifier: BSD-3-Clause
 *******************************************************************************/
package org.eclipse.rdf4j.federated.evaluation.join;

import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Phaser;
import java.util.concurrent.TimeUnit;

import org.eclipse.rdf4j.common.iteration.CloseableIteration;
import org.eclipse.rdf4j.federated.FedXConfig;
import org.eclipse.rdf4j.federated.algebra.BoundJoinTupleExpr;
import org.eclipse.rdf4j.federated.algebra.CheckStatementPattern;
import org.eclipse.rdf4j.federated.algebra.FedXService;
import org.eclipse.rdf4j.federated.algebra.StatementTupleExpr;
import org.eclipse.rdf4j.federated.evaluation.FederationEvalStrategy;
import org.eclipse.rdf4j.federated.evaluation.concurrent.ControlledWorkerScheduler;
import org.eclipse.rdf4j.federated.evaluation.concurrent.ParallelExecutor;
import org.eclipse.rdf4j.federated.evaluation.concurrent.ParallelTask;
import org.eclipse.rdf4j.federated.structures.QueryInfo;
import org.eclipse.rdf4j.query.BindingSet;
import org.eclipse.rdf4j.query.QueryEvaluationException;
import org.eclipse.rdf4j.query.algebra.TupleExpr;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Execute the nested loop join in an asynchronous fashion, using grouped requests, i.e. group bindings into one SPARQL
 * request using a VALUES clause.
 *
 * The number of concurrent threads is controlled by a {@link ControlledWorkerScheduler} which works according to the
 * FIFO principle and uses worker threads.
 *
 * This join cursor blocks until all scheduled tasks are finished, however the result iteration can be accessed from
 * different threads to allow for pipelining.
 *
 * @author Andreas Schwarte
 *
 * @deprecated replaced with {@link ControlledWorkerBindJoin}l
 */
@Deprecated(forRemoval = true)
public class ControlledWorkerBoundJoin extends ControlledWorkerJoin {

	private static final Logger log = LoggerFactory.getLogger(ControlledWorkerBoundJoin.class);

	/**
	 * Whether to submit the first intermediate result immediately in a non-bound way
	 */
	private boolean submitFirstResultImmediately = false;

	public ControlledWorkerBoundJoin(ControlledWorkerScheduler<BindingSet> scheduler, FederationEvalStrategy strategy,
			CloseableIteration<BindingSet> leftIter,
			TupleExpr rightArg, BindingSet bindings, QueryInfo queryInfo)
			throws QueryEvaluationException {
		super(scheduler, strategy, leftIter, rightArg, bindings, queryInfo);
	}

	protected void setSubmitFirstResultImmediately(boolean flag) {
		this.submitFirstResultImmediately = flag;
	}

	@Override
	protected void handleBindings() throws Exception {
		if (!(canApplyVectoredEvaluation(rightArg))) {
			log.debug(
					"Right argument is not an applicable BoundJoinTupleExpr. Fallback on ControlledWorkerJoin implementation: "
							+ rightArg.getClass().getCanonicalName());
			super.handleBindings(); // fallback
			return;
		}

		int nBindingsCfg = this.queryInfo.getFederationContext().getConfig().getBoundJoinBlockSize();
		int totalBindings = 0; // the total number of bindings
		TupleExpr expr = rightArg;

		TaskCreator taskCreator = null;
		Phaser currentPhaser = phaser;

		if (submitFirstResultImmediately) {
			// first item is always sent in a non-bound way
			if (!isClosed() && leftIter.hasNext()) {
				BindingSet b = leftIter.next();
				totalBindings++;
				taskCreator = determineTaskCreator(expr, b);
				currentPhaser.register();
				scheduler.schedule(
						new ParallelJoinTask(new PhaserHandlingParallelExecutor(this, currentPhaser), strategy, expr,
								b));
			}
		}

		int nBindings;
		List<BindingSet> bindings;
		while (!isClosed() && leftIter.hasNext()) {

			// create a new phaser if there are more than 10000 parties
			// note: a phaser supports only up to 65535 registered parties
			if (currentPhaser.getRegisteredParties() >= 10000) {
				currentPhaser = new Phaser(currentPhaser);
			}

			// determine the bind join block size
			nBindings = getNextBindJoinSize(nBindingsCfg, totalBindings);

			bindings = new ArrayList<>(nBindings);

			int count = 0;
			while (!isClosed() && count < nBindings && leftIter.hasNext()) {
				var bs = leftIter.next();
				if (taskCreator == null) {
					taskCreator = determineTaskCreator(expr, bs);
				}
				bindings.add(bs);
				count++;
			}

			totalBindings += count;

			currentPhaser.register();
			scheduler.schedule(taskCreator.getTask(new PhaserHandlingParallelExecutor(this, currentPhaser), bindings));
		}

		leftIter.close();

		scheduler.informFinish(this);

		if (log.isDebugEnabled()) {
			log.debug("JoinStats: left iter of " + getDisplayId() + " had " + totalBindings + " results.");
		}

		phaser.awaitAdvanceInterruptibly(phaser.arrive(), queryInfo.getMaxRemainingTimeMS(), TimeUnit.MILLISECONDS);
	}

	@Override
	public void handleClose() throws QueryEvaluationException {
		try {
			super.handleClose();
		} finally {
			// signal the phaser to close (if currently being blocked)
			phaser.forceTermination();
		}
	}

	protected TaskCreator determineTaskCreator(TupleExpr expr, BindingSet bs) {
		final TaskCreator taskCreator;
		if (expr instanceof StatementTupleExpr) {
			StatementTupleExpr stmt = (StatementTupleExpr) expr;
			if (stmt.hasFreeVarsFor(bs)) {
				taskCreator = new BoundJoinTaskCreator(strategy, stmt);
			} else {
				expr = new CheckStatementPattern(stmt, queryInfo);
				taskCreator = new CheckJoinTaskCreator(strategy, (CheckStatementPattern) expr);
			}
		} else if (expr instanceof FedXService) {
			taskCreator = new FedXServiceJoinTaskCreator(strategy, (FedXService) expr);
		} else {
			throw new RuntimeException("Expr is of unexpected type: " + expr.getClass().getCanonicalName()
					+ ". Please report this problem.");
		}
		return taskCreator;
	}

	/**
	 * Return the size of the next bind join block.
	 *
	 * @param configuredBindJoinSize the configured bind join size
	 * @param totalBindings          the current process bindings from the intermediate result set
	 * @return
	 */
	protected int getNextBindJoinSize(int configuredBindJoinSize, int totalBindings) {

		/*
		 * XXX idea:
		 *
		 * make nBindings dependent on the number of intermediate results of the left argument.
		 *
		 * If many intermediate results, increase the number of bindings. This will result in less remote SPARQL
		 * requests.
		 *
		 */

		return configuredBindJoinSize;
	}

	/**
	 * Returns true if the vectored evaluation can be applied for the join argument, i.e. there is no fallback to
	 * {@link ControlledWorkerJoin#handleBindings()}. This is
	 *
	 * a) if the expr is a {@link BoundJoinTupleExpr} (Mind the special handling for {@link FedXService} as defined in
	 * b) b) if the expr is a {@link FedXService} and {@link FedXConfig#getEnableServiceAsBoundJoin()}
	 *
	 * @return
	 */
	private boolean canApplyVectoredEvaluation(TupleExpr expr) {
		if (expr instanceof BoundJoinTupleExpr) {
			if (expr instanceof FedXService) {
				return this.queryInfo.getFederationContext().getConfig().getEnableServiceAsBoundJoin();
			}
			return true;
		}
		return false;
	}

	protected interface TaskCreator {
		ParallelTask<BindingSet> getTask(ParallelExecutor<BindingSet> control, List<BindingSet> bindings);
	}

	protected class BoundJoinTaskCreator implements TaskCreator {
		protected final FederationEvalStrategy _strategy;
		protected final StatementTupleExpr _expr;

		public BoundJoinTaskCreator(
				FederationEvalStrategy strategy, StatementTupleExpr expr) {
			super();
			_strategy = strategy;
			_expr = expr;
		}

		@Override
		public ParallelTask<BindingSet> getTask(ParallelExecutor<BindingSet> control, List<BindingSet> bindings) {
			return new ParallelBoundJoinTask(control, _strategy, _expr, bindings);
		}
	}

	protected class CheckJoinTaskCreator implements TaskCreator {
		protected final FederationEvalStrategy _strategy;
		protected final CheckStatementPattern _expr;

		public CheckJoinTaskCreator(
				FederationEvalStrategy strategy, CheckStatementPattern expr) {
			super();
			_strategy = strategy;
			_expr = expr;
		}

		@Override
		public ParallelTask<BindingSet> getTask(ParallelExecutor<BindingSet> control, List<BindingSet> bindings) {
			return new ParallelCheckJoinTask(control, _strategy, _expr, bindings);
		}
	}

	protected class FedXServiceJoinTaskCreator implements TaskCreator {
		protected final FederationEvalStrategy _strategy;
		protected final FedXService _expr;

		public FedXServiceJoinTaskCreator(
				FederationEvalStrategy strategy, FedXService expr) {
			super();
			_strategy = strategy;
			_expr = expr;
		}

		@Override
		public ParallelTask<BindingSet> getTask(ParallelExecutor<BindingSet> control, List<BindingSet> bindings) {
			return new ParallelServiceJoinTask(control, _strategy, _expr, bindings);
		}
	}

}