DataStore.java
/*******************************************************************************
* Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
package org.eclipse.rdf4j.sail.nativerdf.datastore;
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.zip.CRC32;
import org.eclipse.rdf4j.common.io.ByteArrayUtil;
/**
* Class that provides indexed storage and retrieval of arbitrary length data.
*
* @author Arjohn Kampman
*/
public class DataStore implements Closeable {
/*-----------*
* Variables *
*-----------*/
private final DataFile dataFile;
private final IDFile idFile;
private final HashFile hashFile;
/*--------------*
* Constructors *
*--------------*/
public DataStore(File dataDir, String filePrefix) throws IOException {
this(dataDir, filePrefix, false);
}
public DataStore(File dataDir, String filePrefix, boolean forceSync) throws IOException {
dataFile = new DataFile(new File(dataDir, filePrefix + ".dat"), forceSync);
idFile = new IDFile(new File(dataDir, filePrefix + ".id"), forceSync);
hashFile = new HashFile(new File(dataDir, filePrefix + ".hash"), forceSync);
}
/*---------*
* Methods *
*---------*/
/**
* Gets the value for the specified ID.
*
* @param id A value ID, should be larger than 0.
* @return The value for the ID, or <var>null</var> if no such value could be found.
* @throws IOException If an I/O error occurred.
*/
public byte[] getData(int id) throws IOException {
assert id > 0 : "id must be larger than 0, is: " + id;
// Data not in cache or cache not used, fetch from file
long offset = idFile.getOffset(id);
if (offset != 0L) {
return dataFile.getData(offset);
}
return null;
}
/**
* Gets the ID for the specified value.
*
* @param queryData The value to get the ID for, must not be <var>null</var>.
* @return The ID for the specified value, or <var>-1</var> if no such ID could be found.
* @throws IOException If an I/O error occurred.
*/
public int getID(byte[] queryData) throws IOException {
assert queryData != null : "queryData must not be null";
int id;
// Value not in cache or cache not used, fetch from file
int hash = getDataHash(queryData);
HashFile.IDIterator iter = hashFile.getIDIterator(hash);
try {
while ((id = iter.next()) >= 0) {
long offset = idFile.getOffset(id);
byte[] data = dataFile.getData(offset);
if (Arrays.equals(queryData, data)) {
// Matching data found
break;
}
}
} finally {
iter.close();
}
return id;
}
/**
* Returns the maximum value-ID that is in use.
*
* @return The largest ID, or <var>0</var> if the store does not contain any values.
*/
public int getMaxID() {
return idFile.getMaxID();
}
/**
* Stores the supplied value and returns the ID that has been assigned to it. In case the data to store is already
* present, the ID of this existing data is returned.
*
* @param data The data to store, must not be <var>null</var>.
* @return The ID that has been assigned to the value.
* @throws IOException If an I/O error occurred.
*/
public int storeData(byte[] data) throws IOException {
assert data != null : "data must not be null";
int id = getID(data);
if (id == -1) {
// Data not stored yet, store it under a new ID.
long offset = dataFile.storeData(data);
id = idFile.storeOffset(offset);
hashFile.storeID(getDataHash(data), id);
}
return id;
}
/**
* Synchronizes any recent changes to the data to disk.
*
* @throws IOException If an I/O error occurred.
*/
public void sync() throws IOException {
hashFile.sync();
idFile.sync();
dataFile.sync();
}
/**
* Removes all values from the DataStore.
*
* @throws IOException If an I/O error occurred.
*/
public void clear() throws IOException {
try {
hashFile.clear();
} finally {
try {
idFile.clear();
} finally {
dataFile.clear();
}
}
}
/**
* Closes the DataStore, releasing any file references, etc. In case a transaction is currently open, it will be
* rolled back. Once closed, the DataStore can no longer be used.
*
* @throws IOException If an I/O error occurred.
*/
@Override
public void close() throws IOException {
try {
hashFile.close();
} finally {
try {
idFile.close();
} finally {
dataFile.close();
}
}
}
/**
* Gets a hash code for the supplied data.
*
* @param data The data to calculate the hash code for.
* @return A hash code for the supplied data.
*/
private int getDataHash(byte[] data) {
CRC32 crc32 = new CRC32();
crc32.update(data);
return (int) crc32.getValue();
}
/*--------------------*
* Test/debug methods *
*--------------------*/
public static void main(String[] args) throws Exception {
if (args.length < 2) {
System.err.println(
"Usage: java org.eclipse.rdf4j.sesame.sailimpl.nativerdf.datastore.DataStore <data-dir> <file-prefix>");
return;
}
System.out.println("Dumping DataStore contents...");
File dataDir = new File(args[0]);
DataFile.DataIterator iter;
try (DataStore dataStore = new DataStore(dataDir, args[1])) {
iter = dataStore.dataFile.iterator();
while (iter.hasNext()) {
byte[] data = iter.next();
System.out.println(ByteArrayUtil.toHexString(data));
}
}
}
}