EvaluationStatisticsTest.java

/*******************************************************************************
 * Copyright (c) 2019 Eclipse RDF4J contributors.
 *
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Distribution License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/org/documents/edl-v10.php.
 *
 * SPDX-License-Identifier: BSD-3-Clause
 *******************************************************************************/
package org.eclipse.rdf4j.sail.extensiblestore.evaluationstatistics;

import static org.junit.jupiter.api.Assertions.assertEquals;

import java.io.IOException;
import java.io.InputStream;
import java.util.stream.IntStream;

import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Model;
import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.eclipse.rdf4j.model.vocabulary.RDF;
import org.eclipse.rdf4j.model.vocabulary.RDFS;
import org.eclipse.rdf4j.query.algebra.StatementPattern;
import org.eclipse.rdf4j.query.algebra.Var;
import org.eclipse.rdf4j.rio.RDFFormat;
import org.eclipse.rdf4j.rio.Rio;
import org.eclipse.rdf4j.sail.extensiblestore.valuefactory.ExtensibleStatementHelper;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class EvaluationStatisticsTest {

	private static final Logger logger = LoggerFactory.getLogger(EvaluationStatisticsTest.class);
	private static final SimpleValueFactory vf = SimpleValueFactory.getInstance();
	private static final ExtensibleStatementHelper ex = ExtensibleStatementHelper.getDefaultImpl();

	Model parse;

	{
		try {
			parse = Rio.parse(getResourceAsStream("bsbm-100.ttl"), "", RDFFormat.TURTLE);
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

	@Test
	public void testStaleStats() throws InterruptedException {

		ExtensibleDynamicEvaluationStatistics extensibleDynamicEvaluationStatistics = new ExtensibleDynamicEvaluationStatistics(
				null);

		parse.forEach(s -> extensibleDynamicEvaluationStatistics.add(ex.fromStatement(s, false)));
		extensibleDynamicEvaluationStatistics.waitForQueue();
		double staleness1 = extensibleDynamicEvaluationStatistics.staleness(parse.size());
		roundedAssert(0, staleness1);

		parse.forEach(s -> extensibleDynamicEvaluationStatistics.remove(ex.fromStatement(s, false)));
		extensibleDynamicEvaluationStatistics.waitForQueue();
		double staleness2 = extensibleDynamicEvaluationStatistics.staleness(0);
		roundedAssert(0, staleness2);

		IntStream.range(0, 100).forEach(i -> {
			extensibleDynamicEvaluationStatistics
					.add(ex.fromStatement(vf.createStatement(RDF.TYPE, RDFS.LABEL, vf.createLiteral(i + "a")), false));
		});
		parse.forEach(s -> extensibleDynamicEvaluationStatistics.add(ex.fromStatement(s, false)));
		extensibleDynamicEvaluationStatistics.waitForQueue();

		double staleness3 = extensibleDynamicEvaluationStatistics.staleness(100 + parse.size());
		roundedAssert(1, staleness3);

		IntStream.range(0, 100000).forEach(i -> {
			extensibleDynamicEvaluationStatistics
					.add(ex.fromStatement(vf.createStatement(RDF.TYPE, RDFS.LABEL, vf.createLiteral(i + "b")), false));
		});

		extensibleDynamicEvaluationStatistics.waitForQueue();

		double staleness4 = extensibleDynamicEvaluationStatistics.staleness(100000 + 100 + parse.size());

		roundedAssert(0.3, staleness4);

	}

	@Test
	public void stalenessCalculationTest() throws InterruptedException {
		SimpleValueFactory vf = SimpleValueFactory.getInstance();

		ExtensibleDynamicEvaluationStatistics extensibleDynamicEvaluationStatistics = new ExtensibleDynamicEvaluationStatistics(
				null);

		parse.forEach(s -> extensibleDynamicEvaluationStatistics.add(ex.fromStatement(s, false)));
		extensibleDynamicEvaluationStatistics.waitForQueue();

		double staleness1 = extensibleDynamicEvaluationStatistics.staleness(parse.size());
		roundedAssert(0, staleness1);

		double staleness2 = extensibleDynamicEvaluationStatistics.staleness(parse.size() * 3);
		roundedAssert(0.7, staleness2);

		double staleness3 = extensibleDynamicEvaluationStatistics.staleness(parse.size() / 3);
		roundedAssert(0.7, staleness3);

	}

	@Test
	public void testAcurracy() throws InterruptedException {
		SimpleValueFactory vf = SimpleValueFactory.getInstance();

		ExtensibleDynamicEvaluationStatistics extensibleDynamicEvaluationStatistics = new ExtensibleDynamicEvaluationStatistics(
				null);

		parse.forEach(s -> extensibleDynamicEvaluationStatistics.add(ex.fromStatement(s, false)));
		extensibleDynamicEvaluationStatistics.waitForQueue();

		ExtensibleDynamicEvaluationStatistics.ExtensibleDynamicEvaluationStatisticsCardinalityCalculator cardinalityCalculator = (ExtensibleDynamicEvaluationStatistics.ExtensibleDynamicEvaluationStatisticsCardinalityCalculator) extensibleDynamicEvaluationStatistics
				.createCardinalityCalculator();

		IRI bdbmProductType = vf.createIRI("http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/", "ProductType");
		IRI dataFromProducer1Product31 = vf
				.createIRI("http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/", "Product31");

		StatementPattern null_rdfType_bsbmProductType = new StatementPattern(
				new Var("a", null),
				new Var("b", RDF.TYPE),
				new Var("c", bdbmProductType));

		checkPattern(cardinalityCalculator, null_rdfType_bsbmProductType, 5);

		StatementPattern null_null_null = new StatementPattern(
				new Var("a", null),
				new Var("b", null),
				new Var("c", null));

		checkPattern(cardinalityCalculator, null_null_null, 5);

		StatementPattern null_rdfType_null = new StatementPattern(
				new Var("a", null),
				new Var("b", RDF.TYPE),
				new Var("c", null));

		checkPattern(cardinalityCalculator, null_rdfType_null, 5);

		StatementPattern nonExistent = new StatementPattern(
				new Var("a", null),
				new Var("b", vf.createIRI("http://example.com/fhjerhf2uhfjkdsbf32o")),
				new Var("c", null));

		checkPattern(cardinalityCalculator, nonExistent, 5);

		// this last pattern isn't very accurate, it's actually 46 statements, but the estimate is 100.4
		StatementPattern bsbmProductType_null_null = new StatementPattern(
				new Var("a", dataFromProducer1Product31),
				new Var("b", null),
				new Var("c", null));

		checkPattern(cardinalityCalculator, bsbmProductType_null_null, 120);

	}

	private void checkPattern(
			ExtensibleDynamicEvaluationStatistics.ExtensibleDynamicEvaluationStatisticsCardinalityCalculator cardinalityCalculator,
			StatementPattern pattern, int percentage) {
		double estimatedCardinality = cardinalityCalculator.getCardinality(pattern);
		int actual = parse
				.filter((Resource) pattern.getSubjectVar().getValue(), (IRI) pattern.getPredicateVar().getValue(),
						pattern.getObjectVar().getValue())
				.size();

		fuzzyAssert(actual, estimatedCardinality, percentage);
	}

	private void fuzzyAssert(int expected, double estimated, int percentage) {

		double diff = Math.abs(expected - estimated);
		if (100.0 / expected * diff > percentage) {
			assertEquals(expected, estimated,
					"Estimated cardinality should be within " + percentage + "% of expected cardinality");
		}

	}

	private void roundedAssert(double expected, double actual) {
		assertEquals(expected, Math.round(actual * 10) / 10.0);
	}

	private static InputStream getResourceAsStream(String name) {
		return EvaluationStatisticsTest.class.getClassLoader().getResourceAsStream(name);
	}

}