CollectionFactory.java
/*******************************************************************************
* Copyright (c) 2022 Eclipse RDF4J contributors.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
package org.eclipse.rdf4j.collection.factory.api;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Set;
import java.util.function.BiConsumer;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.function.Supplier;
import java.util.function.ToIntBiFunction;
import java.util.function.ToIntFunction;
import org.eclipse.rdf4j.common.annotation.Experimental;
import org.eclipse.rdf4j.common.annotation.InternalUseOnly;
import org.eclipse.rdf4j.common.exception.RDF4JException;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.query.BindingSet;
import org.eclipse.rdf4j.query.MutableBindingSet;
import org.eclipse.rdf4j.query.impl.MapBindingSet;
/**
* A Factory that may generate optimised and/or disk based collections
*
* Factories like this should not be cached but created a new everytime as the closing is important if they are disk
* based.
*/
@InternalUseOnly
public interface CollectionFactory extends AutoCloseable {
@Override
void close() throws RDF4JException;
/**
* @param <T> of the list
* @return a list that may be optimised and/or disk based
*/
public <T> List<T> createList();
/**
* @return a list that may be optimised and/or disk based for Values only
*/
public List<Value> createValueList();
/**
* @param <T> of the set
* @return a set that may be optimised and/or disk based
*/
public <T> Set<T> createSet();
/**
* @return a set that may be optimised and/or disk based
*/
public default Set<BindingSet> createSetOfBindingSets() {
// note the odd lambda returning a lambda
Function<String, Predicate<BindingSet>> gethas = (n) -> (b) -> b.hasBinding(n);
Function<String, Function<BindingSet, Value>> getget = (n) -> (b) -> b.getValue(n);
Function<String, BiConsumer<Value, MutableBindingSet>> getSet = (n) -> (v, b) -> b.setBinding(n, v);
return createSetOfBindingSets(MapBindingSet::new, gethas, getget, getSet);
}
/**
* Allows optimizations beyond what would otherwise be possible, regarding disk access and storage.
*
* @param create a supplier that makes bindingsets
* @param a supplier used to create prebound hasBinding predicates
* @param a supplier used to create prebound getValue functions
* @param a supplier used to create prebound setValue functions
* @return a set that may be optimised and/or disk based
*/
public Set<BindingSet> createSetOfBindingSets(Supplier<MutableBindingSet> create,
Function<String, Predicate<BindingSet>> getHas, Function<String, Function<BindingSet, Value>> getGet,
Function<String, BiConsumer<Value, MutableBindingSet>> getSet);
/**
* @return a set that may be optimised and/or disk based for Values
*/
public Set<Value> createValueSet();
/**
* @param <K> key type
* @param <V> value type
* @return a map
*/
public <K, V> Map<K, V> createMap();
/**
* @param <V> value type
* @return a map
*/
public <V> Map<Value, V> createValueKeyedMap();
/**
* @param <T> of the contents of the queue
* @return a new queue
*/
public <T> Queue<T> createQueue();
/**
* @return a new queue
*/
public Queue<Value> createValueQueue();
/**
* @return a new queue that may be optimized and may use the functions passed in.
*/
@Experimental
public default Queue<BindingSet> createBindingSetQueue(Supplier<MutableBindingSet> create,
Function<String, Predicate<BindingSet>> getHas, Function<String, Function<BindingSet, Value>> getget,
Function<String, BiConsumer<Value, MutableBindingSet>> getSet) {
return createQueue();
}
/**
* @return a new queue optimized for bindingsets
*/
@Experimental
public default Queue<BindingSet> createBindingSetQueue() {
Function<String, Predicate<BindingSet>> gethas = (n) -> (b) -> b.hasBinding(n);
Function<String, Function<BindingSet, Value>> getget = (n) -> (b) -> b.getValue(n);
Function<String, BiConsumer<Value, MutableBindingSet>> getSet = (n) -> (v, b) -> b.setBinding(n, v);
return createBindingSetQueue(MapBindingSet::new, gethas, getget, getSet);
}
@InternalUseOnly
public <E> Map<BindingSetKey, E> createGroupByMap();
@InternalUseOnly
public BindingSetKey createBindingSetKey(BindingSet bindingSet, List<Function<BindingSet, Value>> getValues,
ToIntFunction<BindingSet> hashOfBindingSetCalculator);
@InternalUseOnly
@Experimental
private byte[] valueIntoByteArray(Value value) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
try (ObjectOutputStream oos = new ObjectOutputStream(baos)) {
oos.writeObject(value);
} catch (IOException e) {
throw new IllegalStateException(e);
}
return baos.toByteArray();
}
@InternalUseOnly
@Experimental
private void valueIntoObjectOutputStream(Value value, ObjectOutputStream oos) throws IOException {
oos.writeObject(value);
}
@InternalUseOnly
@Experimental
private Value valueFromObjectInputStream(ObjectInputStream ois) throws ClassNotFoundException, IOException {
return (Value) ois.readObject();
}
/**
* Hashes a value that complies with the hashCode/equals conception but only in context of this collection/factory
* storage layer. Potentially also only valid during a single transaction scope.
*
* @param getValue the function to extract the value to hash
* @param nextHash any previously calculated hash value for earlier values in the BindingSet
* @param bs the bindingset to take the value from
* @return a hash function
*/
@InternalUseOnly
default int hashAValue(Function<BindingSet, Value> getValue, int nextHash, BindingSet bs) {
Value value = getValue.apply(bs);
if (value != null) {
return 31 * nextHash + value.hashCode();
} else {
return nextHash;
}
}
/**
* Generate a method that calculates a hash code that is valid in context of a single store implementation and
* QueryExecutionContext.
*
* @param getValues that should be considered in the hash
* @return a hash function
*
* @implNote this method is unlikely to require overriding, check if overriding the hashAValue method is not
* sufficient first
*/
@InternalUseOnly
public default ToIntFunction<BindingSet> hashOfBindingSetFuntion(List<Function<BindingSet, Value>> getValues) {
if (!getValues.isEmpty()) {
// Special case the getValues to remove a loop if we know there is only one
// value
Iterator<Function<BindingSet, Value>> iterator = getValues.iterator();
Function<BindingSet, Value> getFirstValue = iterator.next();
ToIntFunction<BindingSet> hashFirstValue = (bs) -> {
Value value = getFirstValue.apply(bs);
if (value != null) {
return 31 + value.hashCode();
}
return 1;
};
if (!iterator.hasNext()) {
// There is only one value to hash so no loop no multiplication.
return hashFirstValue;
} else {
// There are multiple values so we collect a set of functions
// Note that we reuse the hashFirstValue function created before so the size of
// the array is one smaller;
@SuppressWarnings("unchecked")
ToIntBiFunction<BindingSet, Integer>[] hashOtherValues = new ToIntBiFunction[getValues.size() - 1];
for (int i = 0; iterator.hasNext(); i++) {
Function<BindingSet, Value> getValue = iterator.next();
hashOtherValues[i] = (bs, nextHash) -> hashAValue(getValue, nextHash, bs);
}
// Again a set of special cased hashcode methods which avoid an array value and
// length checks;
switch (hashOtherValues.length) {
case 1: {
ToIntBiFunction<BindingSet, Integer> hashSecondValue = hashOtherValues[0];
return (bs) -> {
// Take the hash of the first value
int nextHash = hashFirstValue.applyAsInt(bs);
return hashSecondValue.applyAsInt(bs, nextHash);
};
}
case 2: {
ToIntBiFunction<BindingSet, Integer> hashSecondValue = hashOtherValues[0];
ToIntBiFunction<BindingSet, Integer> hashThirdValue = hashOtherValues[1];
return (bs) -> {
int nextHash = hashFirstValue.applyAsInt(bs);
nextHash = hashSecondValue.applyAsInt(bs, nextHash);
nextHash = hashThirdValue.applyAsInt(bs, nextHash);
return nextHash;
};
}
case 3: {
ToIntBiFunction<BindingSet, Integer> hashSecondValue = hashOtherValues[0];
ToIntBiFunction<BindingSet, Integer> hashThirdValue = hashOtherValues[1];
ToIntBiFunction<BindingSet, Integer> hashFourthValue = hashOtherValues[2];
return (bs) -> {
int nextHash = hashFirstValue.applyAsInt(bs);
nextHash = hashSecondValue.applyAsInt(bs, nextHash);
nextHash = hashThirdValue.applyAsInt(bs, nextHash);
nextHash = hashFourthValue.applyAsInt(bs, nextHash);
return nextHash;
};
}
default: {
return (bs) -> {
// Take the hash of the first value
int nextHash = hashFirstValue.applyAsInt(bs);
for (int i = 0; i < hashOtherValues.length; i++) {
// hash the next values in order.
nextHash = hashOtherValues[i].applyAsInt(bs, nextHash);
}
return nextHash;
};
}
}
}
} else {
// If the values is empty hash is always one.
return (bs) -> 1;
}
}
}