TestCrcUtil.java
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.util;
import java.util.Objects;
import java.util.Random;
import java.util.function.ToIntFunction;
import org.apache.hadoop.test.LambdaTestUtils;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.Timeout;
import static org.junit.jupiter.api.Assertions.assertEquals;
/**
* Unittests for CrcUtil.
*/
@Timeout(10)
public class TestCrcUtil {
private static final Random RANDOM = new Random();
@Test
public void testComposeCrc32() {
byte[] data = new byte[64 * 1024];
RANDOM.nextBytes(data);
doTestComposeCrc(data, DataChecksum.Type.CRC32, 512, false);
doTestComposeCrc(data, DataChecksum.Type.CRC32, 511, false);
doTestComposeCrc(data, DataChecksum.Type.CRC32, 32 * 1024, false);
doTestComposeCrc(data, DataChecksum.Type.CRC32, 32 * 1024 - 1, false);
}
@Test
public void testComposeCrc32c() {
byte[] data = new byte[64 * 1024];
RANDOM.nextBytes(data);
doTestComposeCrc(data, DataChecksum.Type.CRC32C, 512, false);
doTestComposeCrc(data, DataChecksum.Type.CRC32C, 511, false);
doTestComposeCrc(data, DataChecksum.Type.CRC32C, 32 * 1024, false);
doTestComposeCrc(data, DataChecksum.Type.CRC32C, 32 * 1024 - 1, false);
}
@Test
public void testComposeCrc32WithMonomial() {
byte[] data = new byte[64 * 1024];
RANDOM.nextBytes(data);
doTestComposeCrc(data, DataChecksum.Type.CRC32, 512, true);
doTestComposeCrc(data, DataChecksum.Type.CRC32, 511, true);
doTestComposeCrc(data, DataChecksum.Type.CRC32, 32 * 1024, true);
doTestComposeCrc(data, DataChecksum.Type.CRC32, 32 * 1024 - 1, true);
}
@Test
public void testComposeCrc32cWithMonomial() {
byte[] data = new byte[64 * 1024];
RANDOM.nextBytes(data);
doTestComposeCrc(data, DataChecksum.Type.CRC32C, 512, true);
doTestComposeCrc(data, DataChecksum.Type.CRC32C, 511, true);
doTestComposeCrc(data, DataChecksum.Type.CRC32C, 32 * 1024, true);
doTestComposeCrc(data, DataChecksum.Type.CRC32C, 32 * 1024 - 1, true);
}
@Test
public void testComposeCrc32ZeroLength() {
doTestComposeCrcZerolength(DataChecksum.Type.CRC32);
}
@Test
public void testComposeCrc32CZeroLength() {
doTestComposeCrcZerolength(DataChecksum.Type.CRC32C);
}
/**
* Helper method to compare a DataChecksum-computed end-to-end CRC against
* a piecewise-computed CRC that uses CrcUtil.compose on "chunk CRCs"
* corresponding to ever {@code chunkSize} bytes.
*/
private static void doTestComposeCrc(
byte[] data, DataChecksum.Type type, int chunkSize, boolean useMonomial) {
final ToIntFunction<Long> mod = DataChecksum.getModFunction(type);
// Get full end-to-end CRC in a single shot first.
DataChecksum checksum = DataChecksum.newDataChecksum(
type, Integer.MAX_VALUE);
Objects.requireNonNull(checksum, "checksum");
checksum.update(data, 0, data.length);
int fullCrc = (int) checksum.getValue();
// Now compute CRCs of each chunk individually first, and compose them in a
// second pass to compare to the end-to-end CRC.
int compositeCrc = 0;
int crcMonomial =
useMonomial ? CrcUtil.getMonomial(chunkSize, mod) : 0;
for (int offset = 0;
offset + chunkSize <= data.length;
offset += chunkSize) {
checksum.reset();
checksum.update(data, offset, chunkSize);
int partialCrc = (int) checksum.getValue();
if (useMonomial) {
compositeCrc = CrcUtil.composeWithMonomial(
compositeCrc, partialCrc, crcMonomial, mod);
} else {
compositeCrc = CrcUtil.compose(
compositeCrc, partialCrc, chunkSize, mod);
}
}
// There may be a final partial chunk smaller than chunkSize.
int partialChunkSize = data.length % chunkSize;
if (partialChunkSize > 0) {
checksum.reset();
checksum.update(data, data.length - partialChunkSize, partialChunkSize);
int partialCrc = (int) checksum.getValue();
compositeCrc = CrcUtil.compose(
compositeCrc, partialCrc, partialChunkSize, mod);
}
assertEquals(fullCrc, compositeCrc, String.format(
"Using CRC type '%s' and chunkSize '%d', expected '0x%08x', got '0x%08x'",
type, chunkSize, fullCrc, compositeCrc));
}
/**
* Helper method for testing the behavior of composing a CRC with a
* zero-length second CRC.
*/
private static void doTestComposeCrcZerolength(DataChecksum.Type type) {
// Without loss of generality, we can pick any integer as our fake crcA
// even if we don't happen to know the preimage.
int crcA = 0xCAFEBEEF;
final ToIntFunction<Long> mod = DataChecksum.getModFunction(type);
DataChecksum checksum = DataChecksum.newDataChecksum(
type, Integer.MAX_VALUE);
Objects.requireNonNull(checksum, "checksum");
int crcB = (int) checksum.getValue();
assertEquals(crcA, CrcUtil.compose(crcA, crcB, 0, mod));
int monomial = CrcUtil.getMonomial(0, mod);
assertEquals(
crcA, CrcUtil.composeWithMonomial(crcA, crcB, monomial, mod));
}
@Test
public void testIntSerialization() {
byte[] bytes = CrcUtil.intToBytes(0xCAFEBEEF);
assertEquals(0xCAFEBEEF, CrcUtil.readInt(bytes, 0));
bytes = new byte[8];
CrcUtil.writeInt(bytes, 0, 0xCAFEBEEF);
assertEquals(0xCAFEBEEF, CrcUtil.readInt(bytes, 0));
CrcUtil.writeInt(bytes, 4, 0xABCDABCD);
assertEquals(0xABCDABCD, CrcUtil.readInt(bytes, 4));
// Assert big-endian format for general Java consistency.
assertEquals(0xBEEFABCD, CrcUtil.readInt(bytes, 2));
}
@Test
public void testToSingleCrcStringBadLength()
throws Exception {
LambdaTestUtils.intercept(
IllegalArgumentException.class,
"length",
() -> CrcUtil.toSingleCrcString(new byte[8]));
}
@Test
public void testToSingleCrcString() {
byte[] buf = CrcUtil.intToBytes(0xcafebeef);
assertEquals(
"0xcafebeef", CrcUtil.toSingleCrcString(buf));
}
@Test
public void testToMultiCrcStringBadLength()
throws Exception {
LambdaTestUtils.intercept(
IllegalArgumentException.class,
"length",
() -> CrcUtil.toMultiCrcString(new byte[6]));
}
@Test
public void testToMultiCrcStringMultipleElements() {
byte[] buf = new byte[12];
CrcUtil.writeInt(buf, 0, 0xcafebeef);
CrcUtil.writeInt(buf, 4, 0xababcccc);
CrcUtil.writeInt(buf, 8, 0xddddefef);
assertEquals(
"[0xcafebeef, 0xababcccc, 0xddddefef]",
CrcUtil.toMultiCrcString(buf));
}
@Test
public void testToMultiCrcStringSingleElement() {
byte[] buf = new byte[4];
CrcUtil.writeInt(buf, 0, 0xcafebeef);
assertEquals(
"[0xcafebeef]",
CrcUtil.toMultiCrcString(buf));
}
@Test
public void testToMultiCrcStringNoElements() {
assertEquals(
"[]",
CrcUtil.toMultiCrcString(new byte[0]));
}
@Test
public void testMultiplyMod() {
runTestMultiplyMod(10_000_000, DataChecksum.Type.CRC32);
runTestMultiplyMod(10_000_000, DataChecksum.Type.CRC32C);
}
private static long[] runTestMultiplyMod(int n, DataChecksum.Type type) {
System.out.printf("Run %s with %d computations%n", type, n);
final int polynomial = getCrcPolynomialForType(type);
final ToIntFunction<Long> mod = DataChecksum.getModFunction(type);
final int[] p = new int[n];
final int[] q = new int[n];
for (int i = 0; i < n; i++) {
p[i] = RANDOM.nextInt();
q[i] = RANDOM.nextInt();
}
final int[] expected = new int[n];
final long[] times = new long[2];
final long t0 = System.currentTimeMillis();
for (int i = 0; i < n; i++) {
expected[i] = galoisFieldMultiply(p[i], q[i], polynomial);
}
times[0] = System.currentTimeMillis() - t0;
final double ops0 = n * 1000.0 / times[0];
System.out.printf("galoisFieldMultiply: %.3fs (%.2f ops)%n", times[0] / 1000.0, ops0);
final int[] computed = new int[n];
final long t1 = System.currentTimeMillis();
for (int i = 0; i < n; i++) {
computed[i] = CrcUtil.multiplyMod(p[i], q[i], mod);
}
times[1] = System.currentTimeMillis() - t1;
final double ops1 = n * 1000.0 / times[1];
System.out.printf("multiplyCrc32 : %.3fs (%.2f ops)%n", times[1] / 1000.0, ops1);
System.out.printf("multiplyCrc32 is %.2f%% faster%n", (ops1 - ops0) * 100.0 / ops0);
for (int i = 0; i < n; i++) {
if (expected[i] != computed[i]) {
System.out.printf("expected %08X%n", expected[i]);
System.out.printf("computed %08X%n", computed[i]);
throw new IllegalStateException();
}
}
return times;
}
/**
* getCrcPolynomialForType.
*
* @param type type.
* @return the int representation of the polynomial associated with the
* CRC {@code type}, suitable for use with further CRC arithmetic.
*/
private static int getCrcPolynomialForType(DataChecksum.Type type) {
switch (type) {
case CRC32:
return CrcUtil.GZIP_POLYNOMIAL;
case CRC32C:
return CrcUtil.CASTAGNOLI_POLYNOMIAL;
default:
throw new IllegalArgumentException("Unexpected type: " + type);
}
}
/**
* Galois field multiplication of {@code p} and {@code q} with the
* generator polynomial {@code m} as the modulus.
*
* @param m The little-endian polynomial to use as the modulus when
* multiplying p and q, with implicit "1" bit beyond the bottom bit.
*/
private static int galoisFieldMultiply(int p, int q, int m) {
int summation = 0;
// Top bit is the x^0 place; each right-shift increments the degree of the
// current term.
int curTerm = CrcUtil.MULTIPLICATIVE_IDENTITY;
// Iteratively multiply p by x mod m as we go to represent the q[i] term
// (of degree x^i) times p.
int px = p;
while (curTerm != 0) {
if ((q & curTerm) != 0) {
summation ^= px;
}
// Bottom bit represents highest degree since we're little-endian; before
// we multiply by "x" for the next term, check bottom bit to know whether
// the resulting px will thus have a term matching the implicit "1" term
// of "m" and thus will need to subtract "m" after mutiplying by "x".
boolean hasMaxDegree = ((px & 1) != 0);
px >>>= 1;
if (hasMaxDegree) {
px ^= m;
}
curTerm >>>= 1;
}
return summation;
}
/** For running benchmarks. */
public static class Benchmark {
/**
* Usages: java {@link Benchmark} [m] [n] [type]
* m: the number of iterations
* n: the number of multiplication
* type: the CRC type, either CRC32 or CRC32C.
*/
public static void main(String[] args) throws Exception {
final int m = args.length >= 1? Integer.parseInt(args[0]) : 10;
final int n = args.length >= 2? Integer.parseInt(args[1]) : 100_000_000;
final DataChecksum.Type type = args.length >= 3? DataChecksum.Type.valueOf(args[2])
: DataChecksum.Type.CRC32;
final int warmUpIterations = 2;
System.out.printf("%nStart warming up with %d iterations ...%n", warmUpIterations);
for (int i = 0; i < 2; i++) {
runTestMultiplyMod(n, type);
}
System.out.printf("%nStart benchmark with %d iterations ...%n", m);
final long[] times = new long[2];
for (int i = 0; i < m; i++) {
System.out.printf("%d) ", i);
final long[] t = runTestMultiplyMod(n, type);
times[0] += t[0];
times[1] += t[1];
}
System.out.printf("%nResult) %d x %d computations:%n", m, n);
final double ops0 = n * 1000.0 / times[0];
System.out.printf("galoisFieldMultiply: %.3fs (%.2f ops)%n", times[0] / 1000.0, ops0);
final double ops1 = n * 1000.0 / times[1];
System.out.printf("multiplyCrc32 : %.3fs (%.2f ops)%n", times[1] / 1000.0, ops1);
System.out.printf("multiplyCrc32 is %.2f%% faster%n", (ops1 - ops0) * 100.0 / ops0);
}
}
}