ThemeDataSetGeneratorRandomizationTest.java
/*******************************************************************************
* Copyright (c) 2025 Eclipse RDF4J contributors.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
// Some portions generated by Codex
package org.eclipse.rdf4j.benchmark.rio.util;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.util.Random;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Model;
import org.eclipse.rdf4j.model.util.Values;
import org.eclipse.rdf4j.model.vocabulary.RDF;
import org.junit.jupiter.api.Test;
class ThemeDataSetGeneratorRandomizationTest {
private static final String BASE = "http://example.com/theme/";
private static final long JITTER_SEED_XOR = 0x9E3779B97F4A7C15L;
@Test
void medicalConfigCountsAreJitteredDeterministically() {
long seed = 123L;
ThemeDataSetGenerator.MedicalConfig config = ThemeDataSetGenerator.medicalConfig()
.withPatientCount(10)
.withEncountersPerPatient(4)
.withConditionsPerEncounter(3)
.withMedicationsPerPatient(2)
.withObservationsPerEncounter(2)
.withPractitionerCount(6)
.withSeed(seed);
Model model = ThemeDataSetGenerator.generateMedicalRecords(config);
Model modelRepeat = ThemeDataSetGenerator.generateMedicalRecords(config);
assertEquals(model, modelRepeat, "Dataset should be stable for the same seed and config");
IRI patientType = Values.iri(BASE, "medical/Patient");
IRI practitionerType = Values.iri(BASE, "medical/Practitioner");
IRI encounterType = Values.iri(BASE, "medical/Encounter");
IRI conditionType = Values.iri(BASE, "medical/Condition");
IRI medicationType = Values.iri(BASE, "medical/Medication");
IRI observationType = Values.iri(BASE, "medical/Observation");
long actualPatients = countType(model, patientType);
long actualPractitioners = countType(model, practitionerType);
long actualEncounters = countType(model, encounterType);
long actualConditions = countType(model, conditionType);
long actualMedications = countType(model, medicationType);
long actualObservations = countType(model, observationType);
Random jitter = new Random(seed ^ JITTER_SEED_XOR);
int expectedPractitioners = jitterInt(jitter, 6, 1);
int expectedPatients = jitterInt(jitter, 10, 1);
long expectedMedications = 0;
long expectedEncounters = 0;
long expectedConditions = 0;
long expectedObservations = 0;
for (int p = 0; p < expectedPatients; p++) {
expectedMedications += jitterInt(jitter, 2, 1);
int encounters = jitterInt(jitter, 4, 1);
expectedEncounters += encounters;
for (int e = 0; e < encounters; e++) {
expectedConditions += jitterInt(jitter, 3, 1);
expectedObservations += jitterInt(jitter, 2, 1);
}
}
assertEquals(expectedPatients, actualPatients, "Unexpected patient count");
assertEquals(expectedPractitioners, actualPractitioners, "Unexpected practitioner count");
assertEquals(expectedEncounters, actualEncounters, "Unexpected encounter count");
assertEquals(expectedConditions, actualConditions, "Unexpected condition count");
assertEquals(expectedMedications, actualMedications, "Unexpected medication count");
assertEquals(expectedObservations, actualObservations, "Unexpected observation count");
}
private static long countType(Model model, IRI type) {
return model.filter(null, RDF.TYPE, type).subjects().size();
}
private static int jitterInt(Random random, int base, int minValue) {
int delta = base / 2;
int min = Math.max(minValue, base - delta);
int max = Math.max(min, base + delta);
if (min == max) {
return min;
}
return min + random.nextInt(max - min + 1);
}
}