IDFile.java
/*******************************************************************************
* Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
package org.eclipse.rdf4j.sail.nativerdf.datastore;
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.math.RoundingMode;
import java.util.Arrays;
import org.eclipse.rdf4j.common.io.NioFile;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.google.common.math.LongMath;
/**
* Class supplying access to an ID file. An ID file maps IDs (integers >= 1) to file pointers (long integers). There
* is a direct correlation between IDs and the position at which the file pointers are stored; the file pointer for ID X
* is stored at position 8*X.
* <p>
* This class supports parallel reads but not parallel writes.
*
* @author Arjohn Kampman
*/
public class IDFile implements Closeable {
/*-----------*
* Constants *
*-----------*/
/**
* Magic number "Native ID File" to detect whether the file is actually an ID file. The first three bytes of the
* file should be equal to this magic number.
*/
private static final byte[] MAGIC_NUMBER = new byte[] { 'n', 'i', 'f' };
/**
* File format version, stored as the fourth byte in ID files.
*/
private static final byte FILE_FORMAT_VERSION = 1;
/**
* The size of the file header in bytes. The file header contains the following data: magic number (3 bytes) file
* format version (1 byte) and 4 dummy bytes to align data at 8-byte offsets.
*/
private static final long HEADER_LENGTH = 8;
private static final long ITEM_SIZE = 8L;
/*-----------*
* Variables *
*-----------*/
private final NioFile nioFile;
private final boolean forceSync;
// A cache for lines read from the file index. This cache is unlimited size and uses soft values to allow GC of
// unreferenced lines when under memory pressure.
private final Cache<Integer, Long[]> cache = CacheBuilder.newBuilder().softValues().build();
// We choose a cacheLineSize of 4KB since this is a typical file system block size.
private final int blockSize = 4 * 1024; // 4KB
private final int cacheLineSize = (int) (blockSize / ITEM_SIZE);
private final int cacheLineShift = LongMath.log2(cacheLineSize, RoundingMode.UNNECESSARY);
// keeping a reference of the last created cache line here should stop GC from removing it
private int gcReducingCacheIndex;
private Long[] gcReducingCache;
// cached file size
private volatile long nioFileSize;
/*--------------*
* Constructors *
*--------------*/
public IDFile(File file) throws IOException {
this(file, false);
nioFileSize = nioFile.size();
}
public IDFile(File file, boolean forceSync) throws IOException {
this.nioFile = new NioFile(file);
this.forceSync = forceSync;
try {
if (nioFile.size() == 0L) {
// Empty file, write header
nioFile.writeBytes(MAGIC_NUMBER, 0);
nioFile.writeByte(FILE_FORMAT_VERSION, 3);
nioFile.writeBytes(new byte[] { 0, 0, 0, 0 }, 4);
sync();
} else if (nioFile.size() < HEADER_LENGTH) {
throw new IOException("File too small to be a compatible ID file");
} else {
// Verify file header
if (!Arrays.equals(MAGIC_NUMBER, nioFile.readBytes(0, MAGIC_NUMBER.length))) {
throw new IOException("File doesn't contain compatible ID records");
}
byte version = nioFile.readByte(MAGIC_NUMBER.length);
if (version > FILE_FORMAT_VERSION) {
throw new IOException("Unable to read ID file; it uses a newer file format");
} else if (version != FILE_FORMAT_VERSION) {
throw new IOException("Unable to read ID file; invalid file format version: " + version);
}
}
} catch (IOException e) {
this.nioFile.close();
throw e;
}
nioFileSize = nioFile.size();
}
/*---------*
* Methods *
*---------*/
public final File getFile() {
return nioFile.getFile();
}
/**
* Gets the largest ID that is stored in this ID file.
*
* @return The largest ID, or <var>0</var> if the file does not contain any data.
*/
public int getMaxID() {
return (int) (nioFileSize / ITEM_SIZE) - 1;
}
/**
* Stores the offset of a new data entry, returning the ID under which is stored.
*/
public int storeOffset(long offset) throws IOException {
long fileSize = nioFileSize;
nioFile.writeLong(offset, fileSize);
nioFileSize += ITEM_SIZE;
return (int) (fileSize / ITEM_SIZE);
}
/**
* Sets or updates the stored offset for the specified ID.
*
* @param id The ID to set the offset for, must be larger than 0.
* @param offset The (new) offset for the specified ID.
*/
public void setOffset(int id, long offset) throws IOException {
assert id > 0 : "id must be larger than 0, is: " + id;
nioFile.writeLong(offset, ITEM_SIZE * id);
// We need to update the cache after writing to file (not before) so that if anyone refreshes the cache it will
// include the write above.
// The scenario is as follows:
// 1. there is nothing in the cache, everything is fine
// 2. the relevant cache line is from before the writeLong operation above, in which case we update it
// 3. the relevant cache line is from right after the write in which case updating it doesnt matter
int cacheLookupIndex = id >> cacheLineShift;
int cacheLineLookupIndex = id % cacheLineSize;
Long[] cacheLine = getCacheLine(cacheLookupIndex);
if (cacheLine != null) {
cacheLine[cacheLineLookupIndex] = offset;
}
}
/**
* Gets the offset of the data entry with the specified ID.
*
* @param id The ID to get the offset for, must be larger than 0.
* @return The offset for the ID.
*/
public long getOffset(int id) throws IOException {
assert id > 0 : "id must be larger than 0, is: " + id;
// the index used to lookup the cache line
int cacheLookupIndex = id >> cacheLineShift;
// the index used to lookup the actual value inside the cache line
int cacheLineLookupIndex = id % cacheLineSize;
// the cache line which is of size cacheLineSize
Long[] cacheLine = getCacheLine(cacheLookupIndex);
if (cacheLine != null) {
return cacheLine[cacheLineLookupIndex];
}
// We only cache complete lines of size cacheLineSize. This means that the last line in the file will almost
// never be cached. This simplifies the code since we don't have to deal with partial lines.
if (getMaxID() > cacheLineSize && id < getMaxID() - cacheLineSize) {
// doing one big read is considerably faster than doing a single read per id
byte[] bytes = nioFile.readBytes(ITEM_SIZE * (cacheLookupIndex << cacheLineShift),
(int) (ITEM_SIZE * cacheLineSize));
cacheLine = convertBytesToLongs(bytes);
synchronized (this) {
// we try not to overwrite an existing cache line
if (cache.getIfPresent(cacheLineLookupIndex) == null) {
cache.put(cacheLookupIndex, cacheLine);
}
gcReducingCache = cacheLine;
gcReducingCacheIndex = cacheLookupIndex;
}
return cacheLine[cacheLineLookupIndex];
}
// we did not find a cached value and we did not create a new cache line
return nioFile.readLong(ITEM_SIZE * id);
}
/**
* Discards all stored data.
*
* @throws IOException If an I/O error occurred.
*/
public void clear() throws IOException {
nioFile.truncate(HEADER_LENGTH);
nioFileSize = nioFile.size();
clearCache();
}
/**
* Syncs any unstored data to the hash file.
*/
public void sync() throws IOException {
if (forceSync) {
nioFile.force(false);
}
}
public void sync(boolean force) throws IOException {
nioFile.force(false);
}
/**
* Closes the ID file, releasing any file locks that it might have.
*
* @throws IOException
*/
@Override
public void close() throws IOException {
nioFile.close();
}
synchronized private Long[] getCacheLine(int cacheLookupIndex) {
if (cacheLookupIndex == gcReducingCacheIndex) {
return gcReducingCache;
} else {
return cache.getIfPresent(cacheLookupIndex);
}
}
private Long[] convertBytesToLongs(byte[] bytes) {
Long[] cacheLine;
cacheLine = new Long[cacheLineSize];
for (int i = 0; i < bytes.length; i += ITEM_SIZE) {
long l = ((long) (bytes[i + 0] & 0xff) << 56)
| ((long) bytes[i + 1] & 0xff) << 48
| ((long) bytes[i + 2] & 0xff) << 40
| ((long) bytes[i + 3] & 0xff) << 32
| ((long) bytes[i + 4] & 0xff) << 24
| ((long) bytes[i + 5] & 0xff) << 16
| ((long) bytes[i + 6] & 0xff) << 8
| ((long) bytes[i + 7] & 0xff);
cacheLine[(int) (i / ITEM_SIZE)] = l;
}
return cacheLine;
}
synchronized public void clearCache() {
cache.invalidateAll();
gcReducingCacheIndex = -1;
gcReducingCache = null;
}
}