GeneralCompareBench.java
/*******************************************************************************
* Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
package org.eclipse.rdf4j.query.algebra.evaluation.benchmark;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.Random;
import java.util.concurrent.TimeUnit;
import javax.xml.datatype.DatatypeFactory;
import javax.xml.datatype.XMLGregorianCalendar;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Literal;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.base.CoreDatatype;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.eclipse.rdf4j.query.algebra.Compare.CompareOp;
import org.eclipse.rdf4j.query.algebra.evaluation.ValueExprEvaluationException;
import org.eclipse.rdf4j.query.algebra.evaluation.util.QueryEvaluationUtil;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Level;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;
@BenchmarkMode(Mode.Throughput)
@OutputTimeUnit(TimeUnit.SECONDS)
@Warmup(iterations = 6)
@Measurement(iterations = 10)
@Fork(2)
public class GeneralCompareBench {
@State(Scope.Benchmark)
public static class DataSet {
@Param({ "65536" }) // large enough to avoid cache re-use patterns
public int size;
@Param({ "42" })
public long seed;
/**
* Percentage (0..100) of items that are intentionally error cases (e.g., incompatible supported types in strict
* mode, unsupported datatypes, indeterminate dateTime).
*/
@Param({ "3" })
public int errorRatePercent;
/**
* Distribution profile: - "balanced": a bit of everything - "numericHeavy": more numbers - "stringHeavy": more
* strings
*/
@Param({ "balanced" })
public String mix;
Value[] a;
Value[] b;
CompareOp[] op;
boolean[] strict;
final SimpleValueFactory vf = SimpleValueFactory.getInstance();
DatatypeFactory df;
IRI unknownDT;
@Setup
public void setup() {
try {
df = DatatypeFactory.newInstance();
} catch (Exception e) {
throw new RuntimeException(e);
}
unknownDT = vf.createIRI("http://example.com/dt#unknown");
a = new Value[size];
b = new Value[size];
op = new CompareOp[size];
strict = new boolean[size];
Random rnd = new Random(seed);
int wNum, wStr, wBool, wDate, wDur, wUnsup, wIncomp;
switch (mix) {
case "numericHeavy": {
wNum = 55;
wStr = 10;
wBool = 5;
wDate = 15;
wDur = 5;
wUnsup = 5;
wIncomp = 5;
}
break;
case "stringHeavy": {
wNum = 15;
wStr = 55;
wBool = 5;
wDate = 10;
wDur = 5;
wUnsup = 5;
wIncomp = 5;
}
break;
default: {
wNum = 35;
wStr = 25;
wBool = 10;
wDate = 15;
wDur = 5;
wUnsup = 5;
wIncomp = 5;
}
break;
}
final int total = wNum + wStr + wBool + wDate + wDur + wUnsup + wIncomp;
for (int i = 0; i < size; i++) {
// Generate a pair (a[i], b[i]) of some type
int pick = rnd.nextInt(total);
boolean isDuration = false;
if ((pick -= wNum) < 0) {
genNumeric(i, rnd);
} else if ((pick -= wStr) < 0) {
genString(i, rnd);
} else if ((pick -= wBool) < 0) {
genBoolean(i, rnd);
} else if ((pick -= wDate) < 0) {
genDateTime(i, rnd);
} else if ((pick -= wDur) < 0) {
genDuration(i, rnd);
isDuration = true; // this type requires non-strict to hit the duration path
} else if ((pick -= wUnsup) < 0) {
genUnsupported(i, rnd);
} else {
genIncompatibleSupported(i, rnd);
}
// Choose operator
op[i] = CompareOp.values()[rnd.nextInt(CompareOp.values().length)];
// Choose strictness (duration items force non-strict so the duration code path is actually exercised)
strict[i] = isDuration ? false : rnd.nextInt(100) >= 15;
// Inject a small fraction of explicit error cases (overrides everything above)
if (rnd.nextInt(100) < errorRatePercent) {
int mode = rnd.nextInt(3);
switch (mode) {
case 0: { // string vs boolean under strict EQ/NE -> strict type error
a[i] = vf.createLiteral("foo");
b[i] = vf.createLiteral(rnd.nextBoolean());
op[i] = rnd.nextBoolean() ? CompareOp.EQ : CompareOp.NE;
strict[i] = true;
}
break;
case 1: { // dateTime indeterminate: no-tz vs Z under strict -> INDETERMINATE thrown
a[i] = vf.createLiteral(df.newXMLGregorianCalendar("2020-01-01T00:00:00"));
b[i] = vf.createLiteral(df.newXMLGregorianCalendar("2020-01-01T00:00:00Z"));
op[i] = CompareOp.EQ;
strict[i] = true;
}
break;
default: { // unsupported datatypes
a[i] = vf.createLiteral("x", unknownDT);
b[i] = vf.createLiteral("y", unknownDT);
op[i] = CompareOp.EQ;
strict[i] = true;
}
}
}
}
}
private void genNumeric(int i, Random rnd) {
int subtype = rnd.nextInt(4); // 0:double, 1:float, 2:integer, 3:decimal
switch (subtype) {
case 0: {
double x = rnd.nextDouble() * 1e6 - 5e5;
double y = rnd.nextInt(10) == 0 ? x : x + (rnd.nextBoolean() ? 1 : -1) * rnd.nextDouble();
a[i] = vf.createLiteral(x);
b[i] = vf.createLiteral(y);
}
break;
case 1: {
float x = (float) (rnd.nextGaussian() * 100.0);
float y = rnd.nextInt(10) == 0 ? x : x + (rnd.nextBoolean() ? 1 : -1) * (float) rnd.nextGaussian();
a[i] = vf.createLiteral(x);
b[i] = vf.createLiteral(y);
}
break;
case 2: {
BigInteger x = new BigInteger(64, rnd);
BigInteger y = rnd.nextInt(10) == 0 ? x : x.add(BigInteger.valueOf(rnd.nextInt(3) - 1));
a[i] = vf.createLiteral(x);
b[i] = vf.createLiteral(y);
}
break;
default: {
// decimals with varying scale
BigDecimal x = new BigDecimal(String.format("%d.%02d", rnd.nextInt(1000), rnd.nextInt(100)));
BigDecimal y = rnd.nextInt(10) == 0 ? x : x.add(new BigDecimal("0.01"));
a[i] = vf.createLiteral(x);
b[i] = vf.createLiteral(y);
}
}
}
private void genString(int i, Random rnd) {
String[] pool = { "a", "b", "foo", "bar", "lorem", "ipsum", "" };
String x = pool[rnd.nextInt(pool.length)];
String y = rnd.nextInt(10) == 0 ? x : pool[rnd.nextInt(pool.length)];
a[i] = vf.createLiteral(x); // xsd:string (simple)
b[i] = vf.createLiteral(y);
}
private void genBoolean(int i, Random rnd) {
boolean x = rnd.nextBoolean();
boolean y = rnd.nextInt(10) == 0 ? x : !x;
a[i] = vf.createLiteral(x);
b[i] = vf.createLiteral(y);
}
private void genDateTime(int i, Random rnd) {
// Three variants:
// 0) Z vs Z (equal)
// 1) +01:00 vs Z but same instant (12:..+01:00 equals 11:..Z) <-- fixed: adjust hour, not minutes
// 2) no tz vs Z (often INDETERMINATE under strict)
int m = rnd.nextInt(60), s = rnd.nextInt(60);
String xLex, yLex;
switch (rnd.nextInt(3)) {
case 0: {
xLex = String.format("2020-01-01T12:%02d:%02dZ", m, s);
yLex = xLex;
}
break;
case 1: {
xLex = String.format("2020-01-01T12:%02d:%02d+01:00", m, s);
yLex = String.format("2020-01-01T11:%02d:%02dZ", m, s); // same instant, valid time
}
break;
default: {
xLex = String.format("2020-01-01T12:%02d:%02d", m, s); // no tz
yLex = String.format("2020-01-01T12:%02d:%02dZ", m, s); // Z
}
break;
}
XMLGregorianCalendar x = df.newXMLGregorianCalendar(xLex);
XMLGregorianCalendar y = df.newXMLGregorianCalendar(yLex);
a[i] = vf.createLiteral(x);
b[i] = vf.createLiteral(y);
}
private void genDuration(int i, Random rnd) {
// Common equal-ish durations (P1D vs PT24H) and slight differences
boolean equal = rnd.nextBoolean();
String x = "P1D";
String y = equal ? "PT24H" : "PT24H30M";
a[i] = vf.createLiteral(x, CoreDatatype.XSD.DURATION.getIri());
b[i] = vf.createLiteral(y, CoreDatatype.XSD.DURATION.getIri());
// strictness is handled by caller (forced false for durations)
}
private void genUnsupported(int i, Random rnd) {
a[i] = vf.createLiteral("x", unknownDT);
b[i] = vf.createLiteral("y", unknownDT);
}
private void genIncompatibleSupported(int i, Random rnd) {
// e.g., xsd:string vs xsd:boolean (supported but incompatible)
a[i] = vf.createLiteral("foo");
b[i] = vf.createLiteral(rnd.nextBoolean());
}
}
@State(Scope.Thread)
public static class Cursor {
int idx = 0;
boolean pow2;
int mask;
@Setup(Level.Iteration)
public void setup(DataSet ds) {
idx = 0;
pow2 = (ds.size & (ds.size - 1)) == 0;
mask = ds.size - 1;
}
int next(int n) {
int i = idx++;
if (pow2) {
idx &= mask;
return i & mask;
} else {
// Avoid expensive % in hot loop: manual wrap
if (idx >= n)
idx -= n;
return (i >= n) ? (i - n) : i;
}
}
}
@Benchmark
public void general_dispatch_compare(DataSet ds, Cursor cur, Blackhole bh) {
final int i = cur.next(ds.size);
boolean r = false;
try {
r = QueryEvaluationUtil.compare(ds.a[i], ds.b[i], ds.op[i], ds.strict[i]);
} catch (ValueExprEvaluationException ex) {
bh.consume(ex.getClass());
}
bh.consume(r);
}
@Benchmark
public void general_literal_EQ_fastpath(DataSet ds, Cursor cur, Blackhole bh) {
final int i = cur.next(ds.size);
boolean r = false;
try {
r = QueryEvaluationUtil.compareLiteralsEQ((Literal) ds.a[i], (Literal) ds.b[i], ds.strict[i]);
} catch (Throwable t) {
bh.consume(t.getClass());
}
bh.consume(r);
}
}