RepositoryUtil.java
/*******************************************************************************
* Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
package org.eclipse.rdf4j.repository.util;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Map;
import java.util.Set;
import org.eclipse.rdf4j.common.iteration.Iterations;
import org.eclipse.rdf4j.model.BNode;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.model.Statement;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.util.Models;
import org.eclipse.rdf4j.repository.Repository;
import org.eclipse.rdf4j.repository.RepositoryConnection;
import org.eclipse.rdf4j.repository.RepositoryException;
/**
* Utility methods for comparing sets of statements (graphs) with each other. The supplied comparison operations map
* bnodes in the two supplied models on to each other and thus define a graph isomorphism.
*
* @author jeen
* @author Arjohn Kampman
*/
public class RepositoryUtil {
/**
* Compares the models in the default contexts of the two supplied repositories and returns true if they are equal.
* Models are equal if they contain the same set of statements. bNodes IDs are not relevant for model equality, they
* are mapped from one model to the other by using the attached properties. Note that the method pulls the entire
* default context of both repositories into main memory. Use with caution.
*/
public static boolean equals(Repository rep1, Repository rep2) throws RepositoryException {
// Fetch statements from rep1 and rep2
Set<Statement> model1, model2;
try (RepositoryConnection con1 = rep1.getConnection()) {
model1 = Iterations.asSet(con1.getStatements(null, null, null, true));
}
try (RepositoryConnection con2 = rep2.getConnection()) {
model2 = Iterations.asSet(con2.getStatements(null, null, null, true));
}
return Models.isomorphic(model1, model2);
}
/**
* Compares the models of the default context of two repositories and returns true if rep1 is a subset of rep2. Note
* that the method pulls the entire default context of both repositories into main memory. Use with caution.
*/
public static boolean isSubset(Repository rep1, Repository rep2) throws RepositoryException {
Set<Statement> model1, model2;
try (RepositoryConnection con1 = rep1.getConnection()) {
model1 = Iterations.asSet(con1.getStatements(null, null, null, true));
}
try (RepositoryConnection con2 = rep2.getConnection()) {
model2 = Iterations.asSet(con2.getStatements(null, null, null, true));
}
return Models.isSubset(model1, model2);
}
/**
* Compares two models defined by the default context of two repositories and returns the difference between the
* first and the second model (that is, all statements that are present in rep1 but not in rep2). Blank node IDs are
* not relevant for model equality, they are mapped from one model to the other by using the attached properties.
* Note that the method pulls the entire default context of both repositories into main memory. Use with caution.
* <p>
* <b>NOTE: this algorithm is currently broken; it doesn't actually map blank nodes between the two models.</b>
*
* @return The collection of statements that is the difference between rep1 and rep2.
*/
public static Collection<? extends Statement> difference(Repository rep1, Repository rep2)
throws RepositoryException {
Collection<Statement> model1;
Collection<Statement> model2;
try (RepositoryConnection con1 = rep1.getConnection()) {
model1 = Iterations.asSet(con1.getStatements(null, null, null, false));
}
try (RepositoryConnection con2 = rep2.getConnection()) {
model2 = Iterations.asSet(con2.getStatements(null, null, null, false));
}
return difference(model1, model2);
}
/**
* Compares two models, defined by two statement collections, and returns the difference between the first and the
* second model (that is, all statements that are present in model1 but not in model2). Blank node IDs are not
* relevant for model equality, they are mapped from one model to the other by using the attached properties. *
* <p>
* <b>NOTE: this algorithm is currently broken; it doesn't actually map blank nodes between the two models.</b>
*
* @return The collection of statements that is the difference between model1 and model2.
*/
public static Collection<? extends Statement> difference(Collection<? extends Statement> model1,
Collection<? extends Statement> model2) {
// Create working copies
LinkedList<Statement> copy1 = new LinkedList<>(model1);
LinkedList<Statement> copy2 = new LinkedList<>(model2);
Collection<Statement> result = new ArrayList<>();
// Compare statements that don't contain bNodes
Iterator<Statement> iter1 = copy1.iterator();
while (iter1.hasNext()) {
Statement st = iter1.next();
if (st.getSubject() instanceof BNode || st.getObject() instanceof BNode) {
// One or more of the statement's components is a bNode,
// these statements are handled later
continue;
}
// Try to remove the statement from model2
boolean removed = copy2.remove(st);
if (!removed) {
// statement was not present in model2 and is part of the difference
result.add(st);
}
iter1.remove();
}
// FIXME: this algorithm is broken: bNodeMapping is assumed to contain a
// bnode mapping while in reallity it is an empty map
HashMap<BNode, BNode> bNodeMapping = new HashMap<>();
// mapBlankNodes(copy1, copy2, bNodeMapping, 0);
for (Statement st1 : copy1) {
boolean foundMatch = false;
for (Statement st2 : copy2) {
if (statementsMatch(st1, st2, bNodeMapping)) {
// Found a matching statement
foundMatch = true;
break;
}
}
if (!foundMatch) {
// No statement matching st1 was found in model2, st1 is part of
// the difference.
result.add(st1);
}
}
return result;
}
private static boolean statementsMatch(Statement st1, Statement st2, Map<BNode, BNode> bNodeMapping) {
IRI pred1 = st1.getPredicate();
IRI pred2 = st2.getPredicate();
if (!pred1.equals(pred2)) {
// predicates don't match
return false;
}
Resource subj1 = st1.getSubject();
Resource subj2 = st2.getSubject();
if (!(subj1 instanceof BNode)) {
if (!subj1.equals(subj2)) {
// subjects are not bNodes and don't match
return false;
}
} else { // subj1 instanceof BNode
BNode mappedBNode = bNodeMapping.get(subj1);
if (mappedBNode != null) {
// bNode 'subj1' was already mapped to some other bNode
if (!subj2.equals(mappedBNode)) {
// 'subj1' and 'subj2' do not match
return false;
}
} else {
// 'subj1' was not yet mapped. we need to check if 'subj2' is a
// possible mapping candidate
if (bNodeMapping.containsValue(subj2)) {
// 'subj2' is already mapped to some other value.
return false;
}
}
}
Value obj1 = st1.getObject();
Value obj2 = st2.getObject();
if (!(obj1 instanceof BNode)) {
if (!obj1.equals(obj2)) {
// objects are not bNodes and don't match
return false;
}
} else { // obj1 instanceof BNode
BNode mappedBNode = bNodeMapping.get(obj1);
if (mappedBNode != null) {
// bNode 'obj1' was already mapped to some other bNode
if (!obj2.equals(mappedBNode)) {
// 'obj1' and 'obj2' do not match
return false;
}
} else {
// 'obj1' was not yet mapped. we need to check if 'obj2' is a
// possible mapping candidate
if (bNodeMapping.containsValue(obj2)) {
// 'obj2' is already mapped to some other value.
return false;
}
}
}
return true;
}
}