TestSymbolTable.java
package wstxtest.util;
import java.util.HashSet;
import java.util.Set;
import com.ctc.wstx.util.SymbolTable;
import org.junit.jupiter.api.Test;
/**
* Unit tests for {@link SymbolTable}, focused on the seeded hashing
* introduced for woodstox issue #12 (hash-flooding defense).
*/
public class TestSymbolTable
extends wstxtest.BaseJUnit4Test
{
@Test
public void testSeedsDifferAcrossInstances()
{
// 32-bit random seeds should be effectively unique across a small
// number of fresh instances; if many of these collide, ThreadLocalRandom
// is broken.
Set<Integer> seen = new HashSet<>();
for (int i = 0; i < 32; ++i) {
seen.add(new SymbolTable().getHashSeed());
}
assertTrue("Expected SymbolTable seeds to vary across instances, got: " + seen,
seen.size() > 1);
}
@Test
public void testChildInheritsParentSeed()
{
SymbolTable master = new SymbolTable();
SymbolTable child = master.makeChild();
assertEquals("Child must reuse parent's seed so it can read parent's buckets",
master.getHashSeed(), child.getHashSeed());
}
@Test
public void testSeededHashMatchesInlineComputation()
{
SymbolTable table = new SymbolTable();
int seed = table.getHashSeed();
String key = "elementName";
// Recreate what StreamScanner does inline while parsing a name:
int inline = seed ^ key.charAt(0);
for (int i = 1, len = key.length(); i < len; ++i) {
inline = (inline * 31) + key.charAt(i);
}
inline = SymbolTable.finalizeHash(inline);
char[] buf = key.toCharArray();
int viaCharBuf = SymbolTable.calcHash(buf, 0, buf.length, seed);
int viaString = SymbolTable.calcHash(key, seed);
assertEquals("char[] calcHash must equal inline compute + finalizer",
inline, viaCharBuf);
assertEquals("String calcHash must equal inline compute + finalizer",
inline, viaString);
}
@Test
public void testBucketIndexVariesWithSeed()
{
// Hash-flooding defense rests on the attacker not being able to predict
// which bucket a given key lands in. Verify that for one fixed key, the
// low-bit "bucket index" reached across many independently-seeded
// tables actually varies ��� i.e. the seed is genuinely folded into the
// bits an attacker would target.
final String key = "elementName";
final int mask = 0xFF;
Set<Integer> indexes = new HashSet<>();
for (int i = 0; i < 64; ++i) {
int seed = new SymbolTable().getHashSeed();
indexes.add(SymbolTable.calcHash(key, seed) & mask);
}
assertTrue("Bucket index must depend on seed; got: " + indexes,
indexes.size() > 1);
}
@Test
public void testSeedSeparatesLegacyCollisionPair()
{
// "Aa" and "BB" share the legacy multiply-31 hash. The XOR-with-seed
// scheme can't break the collision for every seed (low-bit collapses
// survive), but it must break it for the *majority* of seeds ���
// empirically ~75%. Verify the rate stays above a safe floor.
final int trials = 2000;
int separated = 0;
java.util.Random rng = new java.util.Random(42);
for (int i = 0; i < trials; ++i) {
int seed = rng.nextInt();
if (SymbolTable.calcHash("Aa", seed) != SymbolTable.calcHash("BB", seed)) {
++separated;
}
}
// Expected rate is 3/4 (1500); leave generous headroom for variance.
assertTrue("Seed should break the Aa/BB collision for most seeds, but only "
+ separated + " of " + trials + " did", separated > 1200);
}
@Test
public void testLookupRoundTripWithRandomSeed()
{
SymbolTable table = new SymbolTable(false);
int seed = table.getHashSeed();
String[] keys = {
"alpha", "beta", "gamma", "delta", "epsilon",
"Aa", "BB", "x", "longer-element-name", "ns:prefixed"
};
String[] interned = new String[keys.length];
for (int i = 0; i < keys.length; ++i) {
char[] buf = keys[i].toCharArray();
int hash = SymbolTable.calcHash(buf, 0, buf.length, seed);
interned[i] = table.findSymbol(buf, 0, buf.length, hash);
assertEquals(keys[i], interned[i]);
}
// Second pass: same hash + chars must return identity-equal String.
for (int i = 0; i < keys.length; ++i) {
char[] buf = keys[i].toCharArray();
int hash = SymbolTable.calcHash(buf, 0, buf.length, seed);
assertSame("Second lookup must return the interned instance for " + keys[i],
interned[i], table.findSymbol(buf, 0, buf.length, hash));
}
}
@Test
public void testRehashPreservesLookups()
{
// Force well past the default fill threshold (96 entries) so rehash runs.
SymbolTable table = new SymbolTable(false, 16);
final int n = 500;
String[] keys = new String[n];
for (int i = 0; i < n; ++i) {
keys[i] = "name-" + i;
table.findSymbol(keys[i]);
}
// All entries must still be retrievable after multiple rehashes.
for (int i = 0; i < n; ++i) {
assertEquals(keys[i], table.findSymbol(keys[i]));
}
assertEquals(n, table.size());
}
}