StateSet.java
/*
* Copyright (C) 1998-2018 Gerwin Klein <lsf@jflex.de>
* Copyright (C) 2021 Google LLC
* SPDX-License-Identifier: BSD-3-Clause
*/
package jflex.state;
import java.util.Iterator;
import javax.annotation.Nullable;
import jflex.logging.Out;
/**
* A set of NFA states (= ints).
*
* <p>Similar to {@link java.util.BitSet}, but tuned for sets of states. Can hold at most {@code
* 2^64} elements, but is only useful for much smaller sets (ideally not more than tens of
* thousands).
*
* <p>Provides an Integer iterator and a native int enumerator.
*
* @author Gerwin Klein
* @version JFlex 1.10.0-SNAPSHOT
* @see StateSetEnumerator
*/
public final class StateSet implements Iterable<Integer> {
/** Compile time {@code DEBUG} setting, local to {@code StateSet} */
private final boolean DEBUG = false;
/** {@code 2^BITS} per word */
static final int BITS = 6;
static final int MASK = (1 << BITS) - 1;
/**
* Content of the {@code StateSet}, one bit per int, i.e. bit 0 of {@code bits[0]} stands for 0,
* bit 1 of {@code bits[0]} stands for 1, bit 1 of {@code bits[1]} stands for 65, etc.
*/
long[] bits;
/** Construct an empty StateSet with default memory backing. */
public StateSet() {
this(256);
}
/**
* Construct an empty StateSet with specified memory backing.
*
* @param size an int specifying the largest number this set will store. The StateSet will
* automatically resize of a larger number is added; specifying the size avoids re-allocation.
*/
public StateSet(int size) {
bits = new long[size2nbits(size)];
}
/**
* Construct an StateSet with specified initial element and memory backing.
*
* @param size an int specifying the largest number this set will store. The StateSet will
* automatically resize of a larger number is added; specifying the size avoids re-allocation.
* @param state the element the set should contain.
*/
public StateSet(int size, int state) {
this(size);
addState(state);
}
/**
* Copy the specified StateSet to create a new one.
*
* @param set the {@link StateSet} object to copy.
*/
public StateSet(StateSet set) {
bits = new long[set.bits.length];
System.arraycopy(set.bits, 0, bits, 0, set.bits.length);
}
/** Return a new StateSet of the specified length. */
public static StateSet emptySet(int length) {
return new StateSet(nbits2size(length));
}
/**
* Add an element (a state) to the set. Will automatically resize the set representation if
* necessary.
*
* @param state the element to add.
*/
public void addState(int state) {
if (DEBUG) Out.debug("StateSet.addState(" + state + ") start to " + this);
int index = state >> BITS;
if (index >= bits.length) resize(state);
bits[index] |= (1L << (state & MASK));
if (DEBUG) Out.debug("StateSet.addState(" + state + ") result: " + this);
}
/**
* Compute the array size for a given set size.
*
* @param size the desired size of the set.
* @return an array size such that the set can hold at least {@code size} elements.
*/
static int size2nbits(int size) {
return (size >> BITS) + 1;
}
/**
* Compute a set size that will lead to an array of the given length.
*
* <p>Precondition: length > 0 && length <= 2^58 (58=64-BITS)
*
* @param length desired length of the StateSet array
* @return an int {@code val} such that {@code size2nbits(val) = length}
*/
static int nbits2size(int length) {
// size2nbits((length - 1) << BITS)
// = (((length - 1) << BITS) >> BITS) + 1
// = length, if (length - 1) << BITS has no overflow
return (length - 1) << BITS;
}
/**
* Resize this set so it can hold at least {@code size} elements.
*
* @param size new maximum element
*/
private void resize(int size) {
int needed = size2nbits(size);
// grow at least by a factor of 4 to reduce number of re-allocations
long[] newbits = new long[Math.max(bits.length * 4, needed)];
System.arraycopy(bits, 0, newbits, 0, bits.length);
bits = newbits;
}
/** Remove all elements from this set. */
public void clear() {
int l = bits.length;
for (int i = 0; i < l; i++) bits[i] = 0;
}
/**
* Determine if a given state is an element of the set.
*
* @param state the element to check for.
* @return true iff this set has the element {@code state}.
*/
public boolean hasElement(int state) {
int index = state >> BITS;
if (index >= bits.length) return false;
return (bits[index] & (1L << (state & MASK))) != 0;
}
/**
* Returns an element of the set and removes it.
*
* <p>Precondition: the set is not empty.
*
* @return an element of the set.
*/
public int getAndRemoveElement() {
int i = 0;
int o = 0;
long m = 1;
while (bits[i] == 0) i++;
while ((bits[i] & m) == 0) {
m <<= 1;
o++;
}
bits[i] &= ~m;
return (i << BITS) + o;
}
/**
* Remove a given state from the set.
*
* @param state the element to remove.
*/
public void remove(int state) {
int index = state >> BITS;
if (index >= bits.length) return;
bits[index] &= ~(1L << (state & MASK));
}
/**
* Remove all states from {@code this} that are not contained in the provided {@link StateSet}.
*
* @param set the {@link StateSet} object to intersect with.
*/
public void intersect(StateSet set) {
if (set == null) {
clear();
} else {
int l = Math.min(bits.length, set.bits.length);
for (int i = 0; i < l; i++) bits[i] &= set.bits[i];
for (int i = l; i < bits.length; i++) bits[i] = 0;
}
}
/**
* Returns the complement of this set with respect to the specified set, that is, the set of
* elements that are contained in the specified set but are not contained in this set.
*
* <p>Does not change this set.
*
* @param univ the {@link StateSet} that determines which elements can at most be returned.
* @return the {@link StateSet} that contains all elements of {@code univ} that are not in this
* set.
*/
@Nullable
public StateSet complement(StateSet univ) {
if (univ == null) {
return null;
}
StateSet result = emptySet(univ.bits.length);
int i;
int m = Math.min(bits.length, univ.bits.length);
for (i = 0; i < m; i++) result.bits[i] = ~bits[i] & univ.bits[i];
if (bits.length < univ.bits.length)
System.arraycopy(univ.bits, m, result.bits, m, result.bits.length - m);
if (DEBUG) {
Out.debug("Complement of " + this + Out.NL + "and " + univ + Out.NL + " is :" + result);
}
return result;
}
/**
* Add all elements of the specified StateSet to this one.
*
* @param set a {@link StateSet} object to be added.
*/
public void add(StateSet set) {
if (DEBUG) Out.debug("StateSet.add(" + set + "), this = " + this);
if (set == null) return;
long[] this_bits;
long[] add_bits = set.bits;
int add_bits_length = add_bits.length;
if (bits.length < add_bits_length) {
this_bits = new long[add_bits_length];
System.arraycopy(bits, 0, this_bits, 0, bits.length);
} else {
this_bits = this.bits;
}
for (int i = 0; i < add_bits_length; i++) this_bits[i] |= add_bits[i];
this.bits = this_bits;
if (DEBUG) Out.debug("StateSet.add() result: " + this);
}
@Override
public boolean equals(@Nullable Object b) {
if (!(b instanceof StateSet)) {
return false;
}
int i = 0;
int l1, l2;
StateSet set = (StateSet) b;
l1 = bits.length;
l2 = set.bits.length;
if (l1 <= l2) {
while (i < l1) {
if (bits[i] != set.bits[i]) return false;
i++;
}
while (i < l2) if (set.bits[i++] != 0) return false;
} else {
while (i < l2) {
if (bits[i] != set.bits[i]) return false;
i++;
}
while (i < l1) if (bits[i++] != 0) return false;
}
return true;
}
@Override
public int hashCode() {
long h = 1234;
long[] _bits = bits;
int i = bits.length - 1;
// ignore zero high bits
while (i >= 0 && _bits[i] == 0) i--;
while (i >= 0) h ^= _bits[i--] * i;
return (int) ((h >> 32) ^ h);
}
/**
* Construct an enumerator for this set.
*
* @return a {@link StateSetEnumerator} object for this set.
*/
public StateSetEnumerator states() {
return new StateSetEnumerator(this);
}
/**
* Determine if the State set contains elements.
*
* @return true iff the set is not empty.
*/
public boolean containsElements() {
for (long bit : bits) if (bit != 0) return true;
return false;
}
/**
* Determine if the given set is a subset of this set.
*
* @param set the set to check containment of
* @return true iff {@code set} is contained in this set.
*/
public boolean contains(StateSet set) {
if (set.bits.length > bits.length) {
for (int i = bits.length; i < set.bits.length; i++) {
if (set.bits[i] != 0) return false;
}
}
for (int i = 0; i < Math.min(bits.length, set.bits.length); i++) {
if ((bits[i] | set.bits[i]) != bits[i]) return false;
}
return true;
}
/**
* Return a copy of this StateSet.
*
* @return a {@link StateSet} object with the same content as this.
*/
public StateSet copy() {
StateSet set = emptySet(bits.length);
System.arraycopy(bits, 0, set.bits, 0, bits.length);
return set;
}
/**
* Copy specified StateSet into this.
*
* @param set the state set to copy.
*/
public void copy(StateSet set) {
if (set == null) clear();
else {
if (bits.length < set.bits.length) bits = new long[set.bits.length];
else for (int i = set.bits.length; i < bits.length; i++) bits[i] = 0;
System.arraycopy(set.bits, 0, bits, 0, Math.min(bits.length, set.bits.length));
}
}
@Override
public String toString() {
StateSetEnumerator set = states();
StringBuilder result = new StringBuilder("{");
if (set.hasMoreElements()) {
result.append(set.nextElement());
}
while (set.hasMoreElements()) {
int i = set.nextElement();
result.append(", ").append(i);
}
result.append("}");
return result.toString();
}
/**
* Construct an Integer iterator for this StateSet.
*
* @return an iterator for this StateSet.
*/
@Override
public Iterator<Integer> iterator() {
return states();
}
/**
* Provide the max value that can be stored without a resize
*
* @return an int of the max value
*/
public int getCurrentMaxState() {
return (bits.length << BITS) | ~(0xFFFFFFFF << BITS);
}
}