/src/abseil-cpp/absl/crc/internal/crc.cc

Source
// Copyright 2022 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Implementation of CRCs (aka Rabin Fingerprints).
// Treats the input as a polynomial with coefficients in Z(2),
// and finds the remainder when divided by an irreducible polynomial
// of the appropriate length.
// It handles all CRC sizes from 8 to 128 bits.
// It's somewhat complicated by having separate implementations optimized for
// CRC's <=32 bits, <= 64 bits, and <= 128 bits.
// The input string is prefixed with a "1" bit, and has "degree" "0" bits
// appended to it before the remainder is found.   This ensures that
// short strings are scrambled somewhat and that strings consisting
// of all nulls have a non-zero CRC.
//
// Uses the "interleaved word-by-word" method from
// "Everything we know about CRC but afraid to forget" by Andrew Kadatch
// and Bob Jenkins,
// http://crcutil.googlecode.com/files/crc-doc.1.0.pdf
//
// The idea is to compute kStride CRCs simultaneously, allowing the
// processor to more effectively use multiple execution units. Each of
// the CRCs is calculated on one word of data followed by kStride - 1
// words of zeroes; the CRC starting points are staggered by one word.
// Assuming a stride of 4 with data words "ABCDABCDABCD", the first
// CRC is over A000A000A, the second over 0B000B000B, and so on.
// The CRC of the whole data is then calculated by properly aligning the
// CRCs by appending zeroes until the data lengths agree then XORing
// the CRCs.

#include "absl/crc/internal/crc.h"

#include <cstdint>

#include "absl/base/internal/endian.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/base/prefetch.h"
#include "absl/crc/internal/crc_internal.h"

namespace absl {
ABSL_NAMESPACE_BEGIN
namespace crc_internal {

namespace {

// Constants
#if defined(__i386__) || defined(__x86_64__)
constexpr bool kNeedAlignedLoads = false;
#else
constexpr bool kNeedAlignedLoads = true;
#endif

// We express the number of zeroes as a number in base ZEROES_BASE. By
// pre-computing the zero extensions for all possible components of such an
// expression (numbers in a form a*ZEROES_BASE**b), we can calculate the
// resulting extension by multiplying the extensions for individual components
// using log_{ZEROES_BASE}(num_zeroes) polynomial multiplications. The tables of
// zero extensions contain (ZEROES_BASE - 1) * (log_{ZEROES_BASE}(64)) entries.
constexpr int ZEROES_BASE_LG = 4;                   // log_2(ZEROES_BASE)
constexpr int ZEROES_BASE = (1 << ZEROES_BASE_LG);  // must be a power of 2

constexpr uint32_t kCrc32cPoly = 0x82f63b78;

uint32_t ReverseBits(uint32_t bits) {
  bits = (bits & 0xaaaaaaaau) >> 1 | (bits & 0x55555555u) << 1;
  bits = (bits & 0xccccccccu) >> 2 | (bits & 0x33333333u) << 2;
  bits = (bits & 0xf0f0f0f0u) >> 4 | (bits & 0x0f0f0f0fu) << 4;
  return absl::gbswap_32(bits);
}

// Polynomial long multiplication mod the polynomial of degree 32.
void PolyMultiply(uint32_t* val, uint32_t m, uint32_t poly) {
  uint32_t l = *val;
  uint32_t result = 0;
  auto onebit = uint32_t{0x80000000u};
  for (uint32_t one = onebit; one != 0; one >>= 1) {
    if ((l & one) != 0) {
      result ^= m;
    }
    if (m & 1) {
      m = (m >> 1) ^ poly;
    } else {
      m >>= 1;
    }
  }
  *val = result;
}
}  // namespace

void CRCImpl::FillWordTable(uint32_t poly, uint32_t last, int word_size,
                            Uint32By256* t) {
  for (int j = 0; j != word_size; j++) {  // for each byte of extension....
    t[j][0] = 0;                          // a zero has no effect
    for (int i = 128; i != 0; i >>= 1) {  // fill in entries for powers of 2
      if (j == 0 && i == 128) {
        t[j][i] = last;  // top bit in last byte is given
      } else {
        // each successive power of two is derived from the previous
        // one, either in this table, or the last table
        uint32_t pred;
        if (i == 128) {
          pred = t[j - 1][1];
        } else {
          pred = t[j][i << 1];
        }
        // Advance the CRC by one bit (multiply by X, and take remainder
        // through one step of polynomial long division)
        if (pred & 1) {
          t[j][i] = (pred >> 1) ^ poly;
        } else {
          t[j][i] = pred >> 1;
        }
      }
    }
    // CRCs have the property that CRC(a xor b) == CRC(a) xor CRC(b)
    // so we can make all the tables for non-powers of two by
    // xoring previously created entries.
    for (int i = 2; i != 256; i <<= 1) {
      for (int k = i + 1; k != (i << 1); k++) {
        t[j][k] = t[j][i] ^ t[j][k - i];
      }
    }
  }
}

int CRCImpl::FillZeroesTable(uint32_t poly, Uint32By256* t) {
  uint32_t inc = 1;
  inc <<= 31;

  // Extend by one zero bit. We know degree > 1 so (inc & 1) == 0.
  inc >>= 1;

  // Now extend by 2, 4, and 8 bits, so now `inc` is extended by one zero byte.
  for (int i = 0; i < 3; ++i) {
    PolyMultiply(&inc, inc, poly);
  }

  int j = 0;
  for (uint64_t inc_len = 1; inc_len != 0; inc_len <<= ZEROES_BASE_LG) {
    // Every entry in the table adds an additional inc_len zeroes.
    uint32_t v = inc;
    for (int a = 1; a != ZEROES_BASE; a++) {
      t[0][j] = v;
      PolyMultiply(&v, inc, poly);
      j++;
    }
    inc = v;
  }
  ABSL_RAW_CHECK(j <= 256, "");
  return j;
}

// Internal version of the "constructor".
CRCImpl* CRCImpl::NewInternal() {
  // Find an accelearated implementation first.
  CRCImpl* result = TryNewCRC32AcceleratedX86ARMCombined();

  // Fall back to generic implementions if no acceleration is available.
  if (result == nullptr) {
    result = new CRC32();
  }

  result->InitTables();

  return result;
}

//  The 32-bit implementation

void CRC32::InitTables() {
  // Compute the table for extending a CRC by one byte.
  Uint32By256* t = new Uint32By256[4];
  FillWordTable(kCrc32cPoly, kCrc32cPoly, 1, t);
  for (int i = 0; i != 256; i++) {
    this->table0_[i] = t[0][i];
  }

  // Construct a table for updating the CRC by 4 bytes data followed by
  // 12 bytes of zeroes.
  //
  // Note: the data word size could be larger than the CRC size; it might
  // be slightly faster to use a 64-bit data word, but doing so doubles the
  // table size.
  uint32_t last = kCrc32cPoly;
  const size_t size = 12;
  for (size_t i = 0; i < size; ++i) {
    last = (last >> 8) ^ this->table0_[last & 0xff];
  }
  FillWordTable(kCrc32cPoly, last, 4, t);
  for (size_t b = 0; b < 4; ++b) {
    for (int i = 0; i < 256; ++i) {
      this->table_[b][i] = t[b][i];
    }
  }

  int j = FillZeroesTable(kCrc32cPoly, t);
  ABSL_RAW_CHECK(j <= static_cast<int>(ABSL_ARRAYSIZE(this->zeroes_)), "");
  for (int i = 0; i < j; i++) {
    this->zeroes_[i] = t[0][i];
  }

  delete[] t;

  // Build up tables for _reversing_ the operation of doing CRC operations on
  // zero bytes.

  // In C++, extending `crc` by a single zero bit is done by the following:
  // (A)  bool low_bit_set = (crc & 1);
  //      crc >>= 1;
  //      if (low_bit_set) crc ^= kCrc32cPoly;
  //
  // In particular note that the high bit of `crc` after this operation will be
  // set if and only if the low bit of `crc` was set before it.  This means that
  // no information is lost, and the operation can be reversed, as follows:
  // (B)  bool high_bit_set = (crc & 0x80000000u);
  //      if (high_bit_set) crc ^= kCrc32cPoly;
  //      crc <<= 1;
  //      if (high_bit_set) crc ^= 1;
  //
  // Or, equivalently:
  // (C)  bool high_bit_set = (crc & 0x80000000u);
  //      crc <<= 1;
  //      if (high_bit_set) crc ^= ((kCrc32cPoly << 1) ^ 1);
  //
  // The last observation is, if we store our checksums in variable `rcrc`,
  // with order of the bits reversed, the inverse operation becomes:
  // (D)  bool low_bit_set = (rcrc & 1);
  //      rcrc >>= 1;
  //      if (low_bit_set) rcrc ^= ReverseBits((kCrc32cPoly << 1) ^ 1)
  //
  // This is the same algorithm (A) that we started with, only with a different
  // polynomial bit pattern.  This means that by building up our tables with
  // this alternate polynomial, we can apply the CRC algorithms to a
  // bit-reversed CRC checksum to perform inverse zero-extension.

  const uint32_t kCrc32cUnextendPoly =
      ReverseBits(static_cast<uint32_t>((kCrc32cPoly << 1) ^ 1));
  FillWordTable(kCrc32cUnextendPoly, kCrc32cUnextendPoly, 1, &reverse_table0_);

  j = FillZeroesTable(kCrc32cUnextendPoly, &reverse_zeroes_);
  ABSL_RAW_CHECK(j <= static_cast<int>(ABSL_ARRAYSIZE(this->reverse_zeroes_)),
                 "");
}

void CRC32::Extend(uint32_t* crc, const void* bytes, size_t length) const {
  const uint8_t* p = static_cast<const uint8_t*>(bytes);
  const uint8_t* e = p + length;
  uint32_t l = *crc;

  auto step_one_byte = [this, &p, &l]() {
    int c = (l & 0xff) ^ *p++;
    l = this->table0_[c] ^ (l >> 8);
  };

  if (kNeedAlignedLoads) {
    // point x at first 4-byte aligned byte in string. this might be past the
    // end of the string.
    const uint8_t* x = RoundUp<4>(p);
    if (x <= e) {
      // Process bytes until finished or p is 4-byte aligned
      while (p != x) {
        step_one_byte();
      }
    }
  }

  const size_t kSwathSize = 16;
  if (static_cast<size_t>(e - p) >= kSwathSize) {
    // Load one swath of data into the operating buffers.
    uint32_t buf0 = absl::little_endian::Load32(p) ^ l;
    uint32_t buf1 = absl::little_endian::Load32(p + 4);
    uint32_t buf2 = absl::little_endian::Load32(p + 8);
    uint32_t buf3 = absl::little_endian::Load32(p + 12);
    p += kSwathSize;

    // Increment a CRC value by a "swath"; this combines the four bytes
    // starting at `ptr` and twelve zero bytes, so that four CRCs can be
    // built incrementally and combined at the end.
    const auto step_swath = [this](uint32_t crc_in, const std::uint8_t* ptr) {
      return absl::little_endian::Load32(ptr) ^
             this->table_[3][crc_in & 0xff] ^
             this->table_[2][(crc_in >> 8) & 0xff] ^
             this->table_[1][(crc_in >> 16) & 0xff] ^
             this->table_[0][crc_in >> 24];
    };

    // Run one CRC calculation step over all swaths in one 16-byte stride
    const auto step_stride = [&]() {
      buf0 = step_swath(buf0, p);
      buf1 = step_swath(buf1, p + 4);
      buf2 = step_swath(buf2, p + 8);
      buf3 = step_swath(buf3, p + 12);
      p += 16;
    };

    // Process kStride interleaved swaths through the data in parallel.
    while ((e - p) > kPrefetchHorizon) {
      PrefetchToLocalCacheNta(
          reinterpret_cast<const void*>(p + kPrefetchHorizon));
      // Process 64 bytes at a time
      step_stride();
      step_stride();
      step_stride();
      step_stride();
    }
    while (static_cast<size_t>(e - p) >= kSwathSize) {
      step_stride();
    }

    // Now advance one word at a time as far as possible. This isn't worth
    // doing if we have word-advance tables.
    while (static_cast<size_t>(e - p) >= 4) {
      buf0 = step_swath(buf0, p);
      uint32_t tmp = buf0;
      buf0 = buf1;
      buf1 = buf2;
      buf2 = buf3;
      buf3 = tmp;
      p += 4;
    }

    // Combine the results from the different swaths. This is just a CRC
    // on the data values in the bufX words.
    auto combine_one_word = [this](uint32_t crc_in, uint32_t w) {
      w ^= crc_in;
      for (size_t i = 0; i < 4; ++i) {
        w = (w >> 8) ^ this->table0_[w & 0xff];
      }
      return w;
    };

    l = combine_one_word(0, buf0);
    l = combine_one_word(l, buf1);
    l = combine_one_word(l, buf2);
    l = combine_one_word(l, buf3);
  }

  // Process the last few bytes
  while (p != e) {
    step_one_byte();
  }

  *crc = l;
}

void CRC32::ExtendByZeroesImpl(uint32_t* crc, size_t length,
                               const uint32_t zeroes_table[256],
                               const uint32_t poly_table[256]) {
  if (length != 0) {
    uint32_t l = *crc;
    // For each ZEROES_BASE_LG bits in length
    // (after the low-order bits have been removed)
    // we lookup the appropriate polynomial in the zeroes_ array
    // and do a polynomial long multiplication (mod the CRC polynomial)
    // to extend the CRC by the appropriate number of bits.
    for (int i = 0; length != 0;
         i += ZEROES_BASE - 1, length >>= ZEROES_BASE_LG) {
      int c = length & (ZEROES_BASE - 1);  // pick next ZEROES_BASE_LG bits
      if (c != 0) {                        // if they are not zero,
                                           // multiply by entry in table
        // Build a table to aid in multiplying 2 bits at a time.
        // It takes too long to build tables for more bits.
        uint64_t m = zeroes_table[c + i - 1];
        m <<= 1;
        uint64_t m2 = m << 1;
        uint64_t mtab[4] = {0, m, m2, m2 ^ m};

        // Do the multiply one byte at a time.
        uint64_t result = 0;
        for (int x = 0; x < 32; x += 8) {
          // The carry-less multiply.
          result ^= mtab[l & 3] ^ (mtab[(l >> 2) & 3] << 2) ^
                    (mtab[(l >> 4) & 3] << 4) ^ (mtab[(l >> 6) & 3] << 6);
          l >>= 8;

          // Reduce modulo the polynomial
          result = (result >> 8) ^ poly_table[result & 0xff];
        }
        l = static_cast<uint32_t>(result);
      }
    }
    *crc = l;
  }
}

void CRC32::ExtendByZeroes(uint32_t* crc, size_t length) const {
  return CRC32::ExtendByZeroesImpl(crc, length, zeroes_, table0_);
}

void CRC32::UnextendByZeroes(uint32_t* crc, size_t length) const {
  // See the comment in CRC32::InitTables() for an explanation of the algorithm
  // below.
  *crc = ReverseBits(*crc);
  ExtendByZeroesImpl(crc, length, reverse_zeroes_, reverse_table0_);
  *crc = ReverseBits(*crc);
}

void CRC32::Scramble(uint32_t* crc) const {
  // Rotate by near half the word size plus 1.  See the scramble comment in
  // crc_internal.h for an explanation.
  constexpr int scramble_rotate = (32 / 2) + 1;
  *crc = RotateRight<uint32_t>(static_cast<unsigned int>(*crc + kScrambleLo),
                               32, scramble_rotate) &
         MaskOfLength<uint32_t>(32);
}

void CRC32::Unscramble(uint32_t* crc) const {
  constexpr int scramble_rotate = (32 / 2) + 1;
  uint64_t rotated = RotateRight<uint32_t>(static_cast<unsigned int>(*crc), 32,
                                           32 - scramble_rotate);
  *crc = (rotated - kScrambleLo) & MaskOfLength<uint32_t>(32);
}

// Constructor and destructor for base class CRC.
CRC::~CRC() {}
CRC::CRC() {}

// The "constructor" for a CRC32C with a standard polynomial.
CRC* CRC::Crc32c() {
  static CRC* singleton = CRCImpl::NewInternal();
  return singleton;
}

}  // namespace crc_internal
ABSL_NAMESPACE_END
}  // namespace absl

Coverage Report

Created: 2026-05-30 06:23

Line	Count	Source
1		// Copyright 2022 The Abseil Authors.
2		//
3		// Licensed under the Apache License, Version 2.0 (the "License");
4		// you may not use this file except in compliance with the License.
5		// You may obtain a copy of the License at
6		//
7		// https://www.apache.org/licenses/LICENSE-2.0
8		//
9		// Unless required by applicable law or agreed to in writing, software
10		// distributed under the License is distributed on an "AS IS" BASIS,
11		// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12		// See the License for the specific language governing permissions and
13		// limitations under the License.
14
15		// Implementation of CRCs (aka Rabin Fingerprints).
16		// Treats the input as a polynomial with coefficients in Z(2),
17		// and finds the remainder when divided by an irreducible polynomial
18		// of the appropriate length.
19		// It handles all CRC sizes from 8 to 128 bits.
20		// It's somewhat complicated by having separate implementations optimized for
21		// CRC's <=32 bits, <= 64 bits, and <= 128 bits.
22		// The input string is prefixed with a "1" bit, and has "degree" "0" bits
23		// appended to it before the remainder is found. This ensures that
24		// short strings are scrambled somewhat and that strings consisting
25		// of all nulls have a non-zero CRC.
26		//
27		// Uses the "interleaved word-by-word" method from
28		// "Everything we know about CRC but afraid to forget" by Andrew Kadatch
29		// and Bob Jenkins,
30		// http://crcutil.googlecode.com/files/crc-doc.1.0.pdf
31		//
32		// The idea is to compute kStride CRCs simultaneously, allowing the
33		// processor to more effectively use multiple execution units. Each of
34		// the CRCs is calculated on one word of data followed by kStride - 1
35		// words of zeroes; the CRC starting points are staggered by one word.
36		// Assuming a stride of 4 with data words "ABCDABCDABCD", the first
37		// CRC is over A000A000A, the second over 0B000B000B, and so on.
38		// The CRC of the whole data is then calculated by properly aligning the
39		// CRCs by appending zeroes until the data lengths agree then XORing
40		// the CRCs.
41
42		#include "absl/crc/internal/crc.h"
43
44		#include <cstdint>
45
46		#include "absl/base/internal/endian.h"
47		#include "absl/base/internal/raw_logging.h"
48		#include "absl/base/prefetch.h"
49		#include "absl/crc/internal/crc_internal.h"
50
51		namespace absl {
52		ABSL_NAMESPACE_BEGIN
53		namespace crc_internal {
54
55		namespace {
56
57		// Constants
58		#if defined(__i386__) \|\| defined(__x86_64__)
59		constexpr bool kNeedAlignedLoads = false;
60		#else
61		constexpr bool kNeedAlignedLoads = true;
62		#endif
63
64		// We express the number of zeroes as a number in base ZEROES_BASE. By
65		// pre-computing the zero extensions for all possible components of such an
66		// expression (numbers in a form aZEROES_BASE*b), we can calculate the
67		// resulting extension by multiplying the extensions for individual components
68		// using log_{ZEROES_BASE}(num_zeroes) polynomial multiplications. The tables of
69		// zero extensions contain (ZEROES_BASE - 1) * (log_{ZEROES_BASE}(64)) entries.
70		constexpr int ZEROES_BASE_LG = 4; // log_2(ZEROES_BASE)
71		constexpr int ZEROES_BASE = (1 << ZEROES_BASE_LG); // must be a power of 2
72
73		constexpr uint32_t kCrc32cPoly = 0x82f63b78;
74
75	0	uint32_t ReverseBits(uint32_t bits) {
76	0	bits = (bits & 0xaaaaaaaau) >> 1 \| (bits & 0x55555555u) << 1;
77	0	bits = (bits & 0xccccccccu) >> 2 \| (bits & 0x33333333u) << 2;
78	0	bits = (bits & 0xf0f0f0f0u) >> 4 \| (bits & 0x0f0f0f0fu) << 4;
79	0	return absl::gbswap_32(bits);
80	0	}
81
82		// Polynomial long multiplication mod the polynomial of degree 32.
83	0	void PolyMultiply(uint32_t* val, uint32_t m, uint32_t poly) {
84	0	uint32_t l = *val;
85	0	uint32_t result = 0;
86	0	auto onebit = uint32_t{0x80000000u};
87	0	for (uint32_t one = onebit; one != 0; one >>= 1) {
88	0	if ((l & one) != 0) {
89	0	result ^= m;
90	0	}
91	0	if (m & 1) {
92	0	m = (m >> 1) ^ poly;
93	0	} else {
94	0	m >>= 1;
95	0	}
96	0	}
97	0	*val = result;
98	0	}
99		} // namespace
100
101		void CRCImpl::FillWordTable(uint32_t poly, uint32_t last, int word_size,
102	0	Uint32By256* t) {
103	0	for (int j = 0; j != word_size; j++) { // for each byte of extension....
104	0	t[j][0] = 0; // a zero has no effect
105	0	for (int i = 128; i != 0; i >>= 1) { // fill in entries for powers of 2
106	0	if (j == 0 && i == 128) {
107	0	t[j][i] = last; // top bit in last byte is given
108	0	} else {
109		// each successive power of two is derived from the previous
110		// one, either in this table, or the last table
111	0	uint32_t pred;
112	0	if (i == 128) {
113	0	pred = t[j - 1][1];
114	0	} else {
115	0	pred = t[j][i << 1];
116	0	}
117		// Advance the CRC by one bit (multiply by X, and take remainder
118		// through one step of polynomial long division)
119	0	if (pred & 1) {
120	0	t[j][i] = (pred >> 1) ^ poly;
121	0	} else {
122	0	t[j][i] = pred >> 1;
123	0	}
124	0	}
125	0	}
126		// CRCs have the property that CRC(a xor b) == CRC(a) xor CRC(b)
127		// so we can make all the tables for non-powers of two by
128		// xoring previously created entries.
129	0	for (int i = 2; i != 256; i <<= 1) {
130	0	for (int k = i + 1; k != (i << 1); k++) {
131	0	t[j][k] = t[j][i] ^ t[j][k - i];
132	0	}
133	0	}
134	0	}
135	0	}
136
137	0	int CRCImpl::FillZeroesTable(uint32_t poly, Uint32By256* t) {
138	0	uint32_t inc = 1;
139	0	inc <<= 31;
140
141		// Extend by one zero bit. We know degree > 1 so (inc & 1) == 0.
142	0	inc >>= 1;
143
144		// Now extend by 2, 4, and 8 bits, so now `inc` is extended by one zero byte.
145	0	for (int i = 0; i < 3; ++i) {
146	0	PolyMultiply(&inc, inc, poly);
147	0	}
148
149	0	int j = 0;
150	0	for (uint64_t inc_len = 1; inc_len != 0; inc_len <<= ZEROES_BASE_LG) {
151		// Every entry in the table adds an additional inc_len zeroes.
152	0	uint32_t v = inc;
153	0	for (int a = 1; a != ZEROES_BASE; a++) {
154	0	t[0][j] = v;
155	0	PolyMultiply(&v, inc, poly);
156	0	j++;
157	0	}
158	0	inc = v;
159	0	}
160	0	ABSL_RAW_CHECK(j <= 256, "");
161	0	return j;
162	0	}
163
164		// Internal version of the "constructor".
165	0	CRCImpl* CRCImpl::NewInternal() {
166		// Find an accelearated implementation first.
167	0	CRCImpl* result = TryNewCRC32AcceleratedX86ARMCombined();
168
169		// Fall back to generic implementions if no acceleration is available.
170	0	if (result == nullptr) {
171	0	result = new CRC32();
172	0	}
173
174	0	result->InitTables();
175
176	0	return result;
177	0	}
178
179		// The 32-bit implementation
180
181	0	void CRC32::InitTables() {
182		// Compute the table for extending a CRC by one byte.
183	0	Uint32By256* t = new Uint32By256[4];
184	0	FillWordTable(kCrc32cPoly, kCrc32cPoly, 1, t);
185	0	for (int i = 0; i != 256; i++) {
186	0	this->table0_[i] = t[0][i];
187	0	}
188
189		// Construct a table for updating the CRC by 4 bytes data followed by
190		// 12 bytes of zeroes.
191		//
192		// Note: the data word size could be larger than the CRC size; it might
193		// be slightly faster to use a 64-bit data word, but doing so doubles the
194		// table size.
195	0	uint32_t last = kCrc32cPoly;
196	0	const size_t size = 12;
197	0	for (size_t i = 0; i < size; ++i) {
198	0	last = (last >> 8) ^ this->table0_[last & 0xff];
199	0	}
200	0	FillWordTable(kCrc32cPoly, last, 4, t);
201	0	for (size_t b = 0; b < 4; ++b) {
202	0	for (int i = 0; i < 256; ++i) {
203	0	this->table_[b][i] = t[b][i];
204	0	}
205	0	}
206
207	0	int j = FillZeroesTable(kCrc32cPoly, t);
208	0	ABSL_RAW_CHECK(j <= static_cast<int>(ABSL_ARRAYSIZE(this->zeroes_)), "");
209	0	for (int i = 0; i < j; i++) {
210	0	this->zeroes_[i] = t[0][i];
211	0	}
212
213	0	delete[] t;
214
215		// Build up tables for _reversing_ the operation of doing CRC operations on
216		// zero bytes.
217
218		// In C++, extending `crc` by a single zero bit is done by the following:
219		// (A) bool low_bit_set = (crc & 1);
220		// crc >>= 1;
221		// if (low_bit_set) crc ^= kCrc32cPoly;
222		//
223		// In particular note that the high bit of `crc` after this operation will be
224		// set if and only if the low bit of `crc` was set before it. This means that
225		// no information is lost, and the operation can be reversed, as follows:
226		// (B) bool high_bit_set = (crc & 0x80000000u);
227		// if (high_bit_set) crc ^= kCrc32cPoly;
228		// crc <<= 1;
229		// if (high_bit_set) crc ^= 1;
230		//
231		// Or, equivalently:
232		// (C) bool high_bit_set = (crc & 0x80000000u);
233		// crc <<= 1;
234		// if (high_bit_set) crc ^= ((kCrc32cPoly << 1) ^ 1);
235		//
236		// The last observation is, if we store our checksums in variable `rcrc`,
237		// with order of the bits reversed, the inverse operation becomes:
238		// (D) bool low_bit_set = (rcrc & 1);
239		// rcrc >>= 1;
240		// if (low_bit_set) rcrc ^= ReverseBits((kCrc32cPoly << 1) ^ 1)
241		//
242		// This is the same algorithm (A) that we started with, only with a different
243		// polynomial bit pattern. This means that by building up our tables with
244		// this alternate polynomial, we can apply the CRC algorithms to a
245		// bit-reversed CRC checksum to perform inverse zero-extension.
246
247	0	const uint32_t kCrc32cUnextendPoly =
248	0	ReverseBits(static_cast<uint32_t>((kCrc32cPoly << 1) ^ 1));
249	0	FillWordTable(kCrc32cUnextendPoly, kCrc32cUnextendPoly, 1, &reverse_table0_);
250
251	0	j = FillZeroesTable(kCrc32cUnextendPoly, &reverse_zeroes_);
252	0	ABSL_RAW_CHECK(j <= static_cast<int>(ABSL_ARRAYSIZE(this->reverse_zeroes_)),
253	0	"");
254	0	}
255
256	0	void CRC32::Extend(uint32_t* crc, const void* bytes, size_t length) const {
257	0	const uint8_t* p = static_cast<const uint8_t*>(bytes);
258	0	const uint8_t* e = p + length;
259	0	uint32_t l = *crc;
260
261	0	auto step_one_byte = [this, &p, &l]() {
262	0	int c = (l & 0xff) ^ *p++;
263	0	l = this->table0_[c] ^ (l >> 8);
264	0	};
265
266	0	if (kNeedAlignedLoads) {
267		// point x at first 4-byte aligned byte in string. this might be past the
268		// end of the string.
269	0	const uint8_t* x = RoundUp<4>(p);
270	0	if (x <= e) {
271		// Process bytes until finished or p is 4-byte aligned
272	0	while (p != x) {
273	0	step_one_byte();
274	0	}
275	0	}
276	0	}
277
278	0	const size_t kSwathSize = 16;
279	0	if (static_cast<size_t>(e - p) >= kSwathSize) {
280		// Load one swath of data into the operating buffers.
281	0	uint32_t buf0 = absl::little_endian::Load32(p) ^ l;
282	0	uint32_t buf1 = absl::little_endian::Load32(p + 4);
283	0	uint32_t buf2 = absl::little_endian::Load32(p + 8);
284	0	uint32_t buf3 = absl::little_endian::Load32(p + 12);
285	0	p += kSwathSize;
286
287		// Increment a CRC value by a "swath"; this combines the four bytes
288		// starting at `ptr` and twelve zero bytes, so that four CRCs can be
289		// built incrementally and combined at the end.
290	0	const auto step_swath = [this](uint32_t crc_in, const std::uint8_t* ptr) {
291	0	return absl::little_endian::Load32(ptr) ^
292	0	this->table_[3][crc_in & 0xff] ^
293	0	this->table_[2][(crc_in >> 8) & 0xff] ^
294	0	this->table_[1][(crc_in >> 16) & 0xff] ^
295	0	this->table_[0][crc_in >> 24];
296	0	};
297
298		// Run one CRC calculation step over all swaths in one 16-byte stride
299	0	const auto step_stride = [&]() {
300	0	buf0 = step_swath(buf0, p);
301	0	buf1 = step_swath(buf1, p + 4);
302	0	buf2 = step_swath(buf2, p + 8);
303	0	buf3 = step_swath(buf3, p + 12);
304	0	p += 16;
305	0	};
306
307		// Process kStride interleaved swaths through the data in parallel.
308	0	while ((e - p) > kPrefetchHorizon) {
309	0	PrefetchToLocalCacheNta(
310	0	reinterpret_cast<const void*>(p + kPrefetchHorizon));
311		// Process 64 bytes at a time
312	0	step_stride();
313	0	step_stride();
314	0	step_stride();
315	0	step_stride();
316	0	}
317	0	while (static_cast<size_t>(e - p) >= kSwathSize) {
318	0	step_stride();
319	0	}
320
321		// Now advance one word at a time as far as possible. This isn't worth
322		// doing if we have word-advance tables.
323	0	while (static_cast<size_t>(e - p) >= 4) {
324	0	buf0 = step_swath(buf0, p);
325	0	uint32_t tmp = buf0;
326	0	buf0 = buf1;
327	0	buf1 = buf2;
328	0	buf2 = buf3;
329	0	buf3 = tmp;
330	0	p += 4;
331	0	}
332
333		// Combine the results from the different swaths. This is just a CRC
334		// on the data values in the bufX words.
335	0	auto combine_one_word = [this](uint32_t crc_in, uint32_t w) {
336	0	w ^= crc_in;
337	0	for (size_t i = 0; i < 4; ++i) {
338	0	w = (w >> 8) ^ this->table0_[w & 0xff];
339	0	}
340	0	return w;
341	0	};
342
343	0	l = combine_one_word(0, buf0);
344	0	l = combine_one_word(l, buf1);
345	0	l = combine_one_word(l, buf2);
346	0	l = combine_one_word(l, buf3);
347	0	}
348
349		// Process the last few bytes
350	0	while (p != e) {
351	0	step_one_byte();
352	0	}
353
354	0	*crc = l;
355	0	}
356
357		void CRC32::ExtendByZeroesImpl(uint32_t* crc, size_t length,
358		const uint32_t zeroes_table[256],
359	0	const uint32_t poly_table[256]) {
360	0	if (length != 0) {
361	0	uint32_t l = *crc;
362		// For each ZEROES_BASE_LG bits in length
363		// (after the low-order bits have been removed)
364		// we lookup the appropriate polynomial in the zeroes_ array
365		// and do a polynomial long multiplication (mod the CRC polynomial)
366		// to extend the CRC by the appropriate number of bits.
367	0	for (int i = 0; length != 0;
368	0	i += ZEROES_BASE - 1, length >>= ZEROES_BASE_LG) {
369	0	int c = length & (ZEROES_BASE - 1); // pick next ZEROES_BASE_LG bits
370	0	if (c != 0) { // if they are not zero,
371		// multiply by entry in table
372		// Build a table to aid in multiplying 2 bits at a time.
373		// It takes too long to build tables for more bits.
374	0	uint64_t m = zeroes_table[c + i - 1];
375	0	m <<= 1;
376	0	uint64_t m2 = m << 1;
377	0	uint64_t mtab[4] = {0, m, m2, m2 ^ m};
378
379		// Do the multiply one byte at a time.
380	0	uint64_t result = 0;
381	0	for (int x = 0; x < 32; x += 8) {
382		// The carry-less multiply.
383	0	result ^= mtab[l & 3] ^ (mtab[(l >> 2) & 3] << 2) ^
384	0	(mtab[(l >> 4) & 3] << 4) ^ (mtab[(l >> 6) & 3] << 6);
385	0	l >>= 8;
386
387		// Reduce modulo the polynomial
388	0	result = (result >> 8) ^ poly_table[result & 0xff];
389	0	}
390	0	l = static_cast<uint32_t>(result);
391	0	}
392	0	}
393	0	*crc = l;
394	0	}
395	0	}
396
397	0	void CRC32::ExtendByZeroes(uint32_t* crc, size_t length) const {
398	0	return CRC32::ExtendByZeroesImpl(crc, length, zeroes_, table0_);
399	0	}
400
401	0	void CRC32::UnextendByZeroes(uint32_t* crc, size_t length) const {
402		// See the comment in CRC32::InitTables() for an explanation of the algorithm
403		// below.
404	0	crc = ReverseBits(crc);
405	0	ExtendByZeroesImpl(crc, length, reverse_zeroes_, reverse_table0_);
406	0	crc = ReverseBits(crc);
407	0	}
408
409	0	void CRC32::Scramble(uint32_t* crc) const {
410		// Rotate by near half the word size plus 1. See the scramble comment in
411		// crc_internal.h for an explanation.
412	0	constexpr int scramble_rotate = (32 / 2) + 1;
413	0	crc = RotateRight<uint32_t>(static_cast<unsigned int>(crc + kScrambleLo),
414	0	32, scramble_rotate) &
415	0	MaskOfLength<uint32_t>(32);
416	0	}
417
418	0	void CRC32::Unscramble(uint32_t* crc) const {
419	0	constexpr int scramble_rotate = (32 / 2) + 1;
420	0	uint64_t rotated = RotateRight<uint32_t>(static_cast<unsigned int>(*crc), 32,
421	0	32 - scramble_rotate);
422	0	*crc = (rotated - kScrambleLo) & MaskOfLength<uint32_t>(32);
423	0	}
424
425		// Constructor and destructor for base class CRC.
426	0	CRC::~CRC() {}
427	0	CRC::CRC() {}
428
429		// The "constructor" for a CRC32C with a standard polynomial.
430	0	CRC* CRC::Crc32c() {
431	0	static CRC* singleton = CRCImpl::NewInternal();
432	0	return singleton;
433	0	}
434
435		} // namespace crc_internal
436		ABSL_NAMESPACE_END
437		} // namespace absl