SPARQLMinusIteration.java
/*******************************************************************************
* Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
package org.eclipse.rdf4j.query.algebra.evaluation.iterator;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.eclipse.rdf4j.common.iteration.CloseableIteration;
import org.eclipse.rdf4j.common.iteration.FilterIteration;
import org.eclipse.rdf4j.common.iteration.Iterations;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.query.BindingSet;
/**
* An Iteration that returns the results of an Iteration (the left argument) MINUS any results that are compatible with
* results of another Iteration (the right argument) or that have no shared variables. This iteration uses the formal
* definition of the SPARQL 1.1 MINUS operator to determine which BindingSets to return.
*
* @author Jeen
* @see <a href="http://www.w3.org/TR/sparql11-query/#sparqlAlgebra">SPARQL Algebra Documentation</a>
*/
public class SPARQLMinusIteration extends FilterIteration<BindingSet> {
/*-----------*
* Variables *
*-----------*/
private final CloseableIteration<BindingSet> rightArg;
private boolean initialized;
private Set<BindingSet> excludeSet;
private Set<String> excludeSetBindingNames;
private boolean excludeSetBindingNamesAreAllTheSame;
private BindingSet[] excludeSetList;
// Index of rightArg binding sets by (variable name, value) to quickly find candidates
private Map<String, Map<Value, BindingSet[]>> rightIndex;
/*--------------*
* Constructors *
*--------------*/
/**
* Creates a new MinusIteration that returns the results of the left argument minus the results of the right
* argument. By default, duplicates are <em>not</em> filtered from the results.
*
* @param leftArg An Iteration containing the main set of elements.
* @param rightArg An Iteration containing the set of elements that should be filtered from the main set.
*/
public SPARQLMinusIteration(CloseableIteration<BindingSet> leftArg, CloseableIteration<BindingSet> rightArg) {
super(leftArg);
assert rightArg != null;
this.rightArg = rightArg;
this.initialized = false;
}
/*--------------*
* Constructors *
*--------------*/
// implements LookAheadIteration.getNextElement()
@Override
protected boolean accept(BindingSet bindingSet) {
if (!initialized) {
// Build set of elements-to-exclude from right argument
excludeSet = makeSet(getRightArg());
excludeSetList = excludeSet.toArray(new BindingSet[0]);
excludeSetBindingNames = excludeSet.stream()
.map(BindingSet::getBindingNames)
.flatMap(Set::stream)
.collect(Collectors.toSet());
excludeSetBindingNamesAreAllTheSame = excludeSet.stream().allMatch(b -> {
Set<String> bindingNames = b.getBindingNames();
if (bindingNames.size() == excludeSetBindingNames.size()) {
return bindingNames.containsAll(excludeSetBindingNames);
}
return false;
});
// Build right-side index by (name,value) -> list of rows
HashMap<String, HashMap<Value, ArrayList<BindingSet>>> tmpIndex = new HashMap<>();
for (BindingSet bs : excludeSetList) {
for (String name : bs.getBindingNames()) {
Value v = bs.getValue(name);
if (v != null) {
tmpIndex
.computeIfAbsent(name, k -> new HashMap<>())
.computeIfAbsent(v, k -> new ArrayList<>())
.add(bs);
}
}
}
var built = new HashMap<String, Map<Value, BindingSet[]>>(tmpIndex.size() * 2);
for (var e : tmpIndex.entrySet()) {
var inner = new HashMap<Value, BindingSet[]>(e.getValue().size() * 2);
for (var e2 : e.getValue().entrySet()) {
inner.put(e2.getKey(), e2.getValue().toArray(new BindingSet[0]));
}
built.put(e.getKey(), inner);
}
this.rightIndex = built;
initialized = true;
}
Set<String> bindingNames = bindingSet.getBindingNames();
boolean hasSharedBindings = false;
// Fast union check: if no variable is shared with the union of right variables, accept immediately
if (!excludeSetBindingNames.isEmpty()) {
final Set<String> left = bindingNames;
final Set<String> rightUnion = excludeSetBindingNames;
if (left.size() <= rightUnion.size()) {
for (String name : left) {
if (rightUnion.contains(name)) {
hasSharedBindings = true;
break;
}
}
} else {
for (String name : rightUnion) {
if (left.contains(name)) {
hasSharedBindings = true;
break;
}
}
}
}
if (!hasSharedBindings) {
return true;
}
// Use right-side index to find only candidates that match on at least one shared (name,value)
if (rightIndex != null && !rightIndex.isEmpty()) {
for (String name : bindingNames) {
Value leftVal = bindingSet.getValue(name);
if (leftVal == null) {
continue; // unbound on left does not participate in compatibility
}
Map<Value, BindingSet[]> byValue = rightIndex.get(name);
if (byValue == null) {
continue;
}
BindingSet[] candidates = byValue.get(leftVal);
if (candidates == null) {
continue;
}
for (int j = 0; j < candidates.length; j++) {
BindingSet excluded = candidates[j];
if (excluded.isCompatible(bindingSet)) {
return false;
}
}
}
return true;
}
// Fallback: scan all (should be rare or small)
for (BindingSet excluded : excludeSetList) {
if (!excludeSetBindingNamesAreAllTheSame) {
hasSharedBindings = false;
final Set<String> excludedNames = excluded.getBindingNames();
if (bindingNames.size() <= excludedNames.size()) {
for (String name : bindingNames) {
if (excludedNames.contains(name)) {
hasSharedBindings = true;
break;
}
}
} else {
for (String name : excludedNames) {
if (bindingNames.contains(name)) {
hasSharedBindings = true;
break;
}
}
}
}
if (hasSharedBindings && excluded.isCompatible(bindingSet)) {
return false;
}
}
return true;
}
protected Set<BindingSet> makeSet() {
return new LinkedHashSet<>();
}
protected Set<String> makeSet(Set<String> set) {
return new HashSet<>(set);
}
protected Set<BindingSet> makeSet(CloseableIteration<BindingSet> rightArg) {
return Iterations.asSet(rightArg);
}
@Override
protected void handleClose() {
if (rightArg != null) {
rightArg.close();
}
}
/**
* @return Returns the rightArg.
*/
protected CloseableIteration<BindingSet> getRightArg() {
return rightArg;
}
protected long clearExcludeSet() {
int size = excludeSet.size();
excludeSet.clear();
return size;
}
}