BitSetUtil.java
package org.roaringbitmap;
import static java.lang.Long.numberOfTrailingZeros;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.BitSet;
/***
*
* This class provides convenience functions to manipulate BitSet and RoaringBitmap objects.
*
*/
public class BitSetUtil {
// a block consists has a maximum of 1024 words, each representing 64 bits,
// thus representing at maximum 65536 bits
public static final int BLOCK_LENGTH = BitmapContainer.MAX_CAPACITY / Long.SIZE; //
// 64-bit
// word
/**
* Convert a {@link RoaringBitmap} to a {@link BitSet}.
* <p>
* Equivalent to calling {@code BitSet.valueOf(BitSetUtil.toLongArray(bitmap))}.
*/
public static BitSet bitsetOf(RoaringBitmap bitmap) {
return BitSet.valueOf(toLongArray(bitmap));
}
/**
* Convert a {@link RoaringBitmap} to a {@link BitSet} without copying to an intermediate array.
*/
public static BitSet bitsetOfWithoutCopy(RoaringBitmap bitmap) {
if (bitmap.isEmpty()) {
return new BitSet(0);
}
int last = bitmap.last();
if (last < 0) {
throw new IllegalArgumentException("bitmap has negative bits set");
}
BitSet bitSet = new BitSet(last);
bitmap.forEach((IntConsumer) bitSet::set);
return bitSet;
}
/**
* Returns an array of little-endian ordered bytes, given a {@link RoaringBitmap}.
* <p>
* See {@link BitSet#toByteArray()}.
*/
public static byte[] toByteArray(RoaringBitmap bitmap) {
long[] words = toLongArray(bitmap);
ByteBuffer buffer =
ByteBuffer.allocate(words.length * Long.SIZE).order(ByteOrder.LITTLE_ENDIAN);
buffer.asLongBuffer().put(words);
return buffer.array();
}
/**
* Returns an array of long, given a {@link RoaringBitmap}.
* <p>
* See {@link BitSet#toLongArray()}.
*/
public static long[] toLongArray(RoaringBitmap bitmap) {
if (bitmap.isEmpty()) {
return new long[0];
}
int last = bitmap.last();
if (last < 0) {
throw new IllegalArgumentException("bitmap has negative bits set");
}
int lastBit = Math.max(last, Long.SIZE);
int remainder = lastBit % Long.SIZE;
int numBits = remainder > 0 ? lastBit - remainder : lastBit;
int wordsInUse = numBits / Long.SIZE + 1;
long[] words = new long[wordsInUse];
ContainerPointer pointer = bitmap.getContainerPointer();
int numContainers = Math.max(words.length / BLOCK_LENGTH, 1);
int position = 0;
for (int i = 0; i <= numContainers; i++) {
char key = Util.lowbits(i);
if (key == pointer.key()) {
Container container = pointer.getContainer();
int remaining = wordsInUse - position;
int length = Math.min(BLOCK_LENGTH, remaining);
if (container instanceof BitmapContainer) {
((BitmapContainer) container).copyBitmapTo(words, position, length);
} else {
container.copyBitmapTo(words, position);
}
position += length;
pointer.advance();
if (pointer.getContainer() == null) {
break;
}
} else {
position += BLOCK_LENGTH;
}
}
assert pointer.getContainer() == null;
assert position == wordsInUse;
return words;
}
/**
* Creates array container's content char buffer.
*
* @param from first value of the range
* @param to last value of the range
* @param cardinality new buffer cardinality, expected to be less than 4096 and more than present
* values in given bitmap
* @param words bitmap
* @return array container's content char buffer
*/
public static char[] arrayContainerBufferOf(
final int from, final int to, final int cardinality, final long[] words) {
return arrayContainerBufferOf(from, to, new char[cardinality], words);
}
/**
* Creates array container's content char buffer.
*
* @param from first value of the range
* @param to last value of the range
* @param buffer new buffer, expected to have size less than 4096 and more than present
* values in given bitmap
* @param words bitmap
* @return array container's content char buffer - the same as {@code buffer}
*/
public static char[] arrayContainerBufferOf(
final int from, final int to, final char[] buffer, final long[] words) {
// precondition: cardinality is max 4096
int base = 0;
int pos = 0;
for (int i = from; i < to; i++) {
long word = words[i];
while (word != 0L) {
buffer[pos++] = (char) (base + numberOfTrailingZeros(word));
word &= (word - 1);
}
base += 64;
}
return buffer;
}
private static ArrayContainer arrayContainerOf(
final int from, final int to, final int cardinality, final long[] words) {
return new ArrayContainer(arrayContainerBufferOf(from, to, cardinality, words));
}
/**
* Generate a RoaringBitmap out of a BitSet
*
* @param bitSet original bitset (will not be modified)
* @return roaring bitmap equivalent to BitSet
*/
public static RoaringBitmap bitmapOf(final BitSet bitSet) {
return bitmapOf(bitSet.toLongArray());
}
/**
* Generate a RoaringBitmap out of a long[], each long using little-endian representation of its
* bits
*
* @see BitSet#toLongArray() for an equivalent
* @param words array of longs (will not be modified)
* @return roaring bitmap
*/
public static RoaringBitmap bitmapOf(final long[] words) {
// split long[] into blocks.
// each block becomes a single container, if any bit is set
final RoaringBitmap ans = new RoaringBitmap();
int containerIndex = 0;
for (int from = 0; from < words.length; from += BLOCK_LENGTH) {
final int to = Math.min(from + BLOCK_LENGTH, words.length);
final int blockCardinality = cardinality(from, to, words);
if (blockCardinality > 0) {
ans.highLowContainer.insertNewKeyValueAt(
containerIndex++,
Util.highbits(from * Long.SIZE),
BitSetUtil.containerOf(from, to, blockCardinality, words));
}
}
return ans;
}
/**
* Efficiently generate a RoaringBitmap from an uncompressed byte array or ByteBuffer
* This method tries to minimise all kinds of memory allocation
*
* @param bb the uncompressed bitmap
* @param fastRank if set, returned bitmap is of type
* {@link org.roaringbitmap.FastRankRoaringBitmap}
* @return roaring bitmap
*/
public static RoaringBitmap bitmapOf(ByteBuffer bb, boolean fastRank) {
return bitmapOf(bb, fastRank, new long[BLOCK_LENGTH]);
}
/**
* Efficiently generate a RoaringBitmap from an uncompressed byte array or ByteBuffer
* This method tries to minimise all kinds of memory allocation
* <br>
* You can provide a cached wordsBuffer for avoiding 8 KB of extra allocation on every call
* No reference is kept to the wordsBuffer, so it can be cached as a ThreadLocal
*
* @param bb the uncompressed bitmap
* @param fastRank if set, returned bitmap is of type
* {@link org.roaringbitmap.FastRankRoaringBitmap}
* @param wordsBuffer buffer of length {@link BitSetUtil#BLOCK_LENGTH}
* @return roaring bitmap
*/
public static RoaringBitmap bitmapOf(ByteBuffer bb, boolean fastRank, long[] wordsBuffer) {
if (wordsBuffer.length != BLOCK_LENGTH) {
throw new IllegalArgumentException("wordsBuffer length should be " + BLOCK_LENGTH);
}
bb = bb.slice().order(ByteOrder.LITTLE_ENDIAN);
final RoaringBitmap ans = fastRank ? new FastRankRoaringBitmap() : new RoaringBitmap();
// split buffer into blocks of long[]
int containerIndex = 0;
int blockLength = 0, blockCardinality = 0, offset = 0;
long word;
while (bb.remaining() >= 8) {
word = bb.getLong();
// Add read long to block
wordsBuffer[blockLength++] = word;
blockCardinality += Long.bitCount(word);
// When block is full, add block to bitmap
if (blockLength == BLOCK_LENGTH) {
// Each block becomes a single container, if any bit is set
if (blockCardinality > 0) {
ans.highLowContainer.insertNewKeyValueAt(
containerIndex++,
Util.highbits(offset),
BitSetUtil.containerOf(0, blockLength, blockCardinality, wordsBuffer));
}
/*
Offset can overflow when bitsets size is more than Integer.MAX_VALUE - 64
It's harmless though, as it will happen after the last block is added
*/
offset += (BLOCK_LENGTH * Long.SIZE);
blockLength = blockCardinality = 0;
}
}
if (bb.remaining() > 0) {
// Read remaining (less than 8) bytes
// We can do this in while loop also, it will probably slow things down a bit though
word = 0;
for (int remaining = bb.remaining(), j = 0; j < remaining; j++) {
word |= (bb.get() & 0xffL) << (8 * j);
}
// Add last word to block, only if any bit is set
if (word != 0) {
wordsBuffer[blockLength++] = word;
blockCardinality += Long.bitCount(word);
}
}
// Add block to map, if any bit is set
if (blockCardinality > 0) {
ans.highLowContainer.insertNewKeyValueAt(
containerIndex,
Util.highbits(offset),
BitSetUtil.containerOf(0, blockLength, blockCardinality, wordsBuffer));
}
return ans;
}
private static int cardinality(final int from, final int to, final long[] words) {
int sum = 0;
for (int i = from; i < to; i++) {
sum += Long.bitCount(words[i]);
}
return sum;
}
private static Container containerOf(
final int from, final int to, final int blockCardinality, final long[] words) {
// find the best container available
if (blockCardinality <= ArrayContainer.DEFAULT_MAX_SIZE) {
// containers with DEFAULT_MAX_SIZE or less integers should be
// ArrayContainers
return arrayContainerOf(from, to, blockCardinality, words);
} else {
// otherwise use bitmap container
long[] container = new long[BLOCK_LENGTH];
System.arraycopy(words, from, container, 0, to - from);
return new BitmapContainer(container, blockCardinality);
}
}
/**
* Compares a RoaringBitmap and a BitSet. They are equal if and only if they contain the same set
* of integers.
*
* @param bitset first object to be compared
* @param bitmap second object to be compared
* @return whether they are equals
*/
public static boolean equals(final BitSet bitset, final RoaringBitmap bitmap) {
if (bitset.cardinality() != bitmap.getCardinality()) {
return false;
}
final IntIterator it = bitmap.getIntIterator();
while (it.hasNext()) {
int val = it.next();
if (!bitset.get(val)) {
return false;
}
}
return true;
}
}