Coverage Report

Created: 2026-05-30 06:23

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/abseil-cpp/absl/crc/internal/crc.cc
Line
Count
Source
1
// Copyright 2022 The Abseil Authors.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//      https://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
// Implementation of CRCs (aka Rabin Fingerprints).
16
// Treats the input as a polynomial with coefficients in Z(2),
17
// and finds the remainder when divided by an irreducible polynomial
18
// of the appropriate length.
19
// It handles all CRC sizes from 8 to 128 bits.
20
// It's somewhat complicated by having separate implementations optimized for
21
// CRC's <=32 bits, <= 64 bits, and <= 128 bits.
22
// The input string is prefixed with a "1" bit, and has "degree" "0" bits
23
// appended to it before the remainder is found.   This ensures that
24
// short strings are scrambled somewhat and that strings consisting
25
// of all nulls have a non-zero CRC.
26
//
27
// Uses the "interleaved word-by-word" method from
28
// "Everything we know about CRC but afraid to forget" by Andrew Kadatch
29
// and Bob Jenkins,
30
// http://crcutil.googlecode.com/files/crc-doc.1.0.pdf
31
//
32
// The idea is to compute kStride CRCs simultaneously, allowing the
33
// processor to more effectively use multiple execution units. Each of
34
// the CRCs is calculated on one word of data followed by kStride - 1
35
// words of zeroes; the CRC starting points are staggered by one word.
36
// Assuming a stride of 4 with data words "ABCDABCDABCD", the first
37
// CRC is over A000A000A, the second over 0B000B000B, and so on.
38
// The CRC of the whole data is then calculated by properly aligning the
39
// CRCs by appending zeroes until the data lengths agree then XORing
40
// the CRCs.
41
42
#include "absl/crc/internal/crc.h"
43
44
#include <cstdint>
45
46
#include "absl/base/internal/endian.h"
47
#include "absl/base/internal/raw_logging.h"
48
#include "absl/base/prefetch.h"
49
#include "absl/crc/internal/crc_internal.h"
50
51
namespace absl {
52
ABSL_NAMESPACE_BEGIN
53
namespace crc_internal {
54
55
namespace {
56
57
// Constants
58
#if defined(__i386__) || defined(__x86_64__)
59
constexpr bool kNeedAlignedLoads = false;
60
#else
61
constexpr bool kNeedAlignedLoads = true;
62
#endif
63
64
// We express the number of zeroes as a number in base ZEROES_BASE. By
65
// pre-computing the zero extensions for all possible components of such an
66
// expression (numbers in a form a*ZEROES_BASE**b), we can calculate the
67
// resulting extension by multiplying the extensions for individual components
68
// using log_{ZEROES_BASE}(num_zeroes) polynomial multiplications. The tables of
69
// zero extensions contain (ZEROES_BASE - 1) * (log_{ZEROES_BASE}(64)) entries.
70
constexpr int ZEROES_BASE_LG = 4;                   // log_2(ZEROES_BASE)
71
constexpr int ZEROES_BASE = (1 << ZEROES_BASE_LG);  // must be a power of 2
72
73
constexpr uint32_t kCrc32cPoly = 0x82f63b78;
74
75
0
uint32_t ReverseBits(uint32_t bits) {
76
0
  bits = (bits & 0xaaaaaaaau) >> 1 | (bits & 0x55555555u) << 1;
77
0
  bits = (bits & 0xccccccccu) >> 2 | (bits & 0x33333333u) << 2;
78
0
  bits = (bits & 0xf0f0f0f0u) >> 4 | (bits & 0x0f0f0f0fu) << 4;
79
0
  return absl::gbswap_32(bits);
80
0
}
81
82
// Polynomial long multiplication mod the polynomial of degree 32.
83
0
void PolyMultiply(uint32_t* val, uint32_t m, uint32_t poly) {
84
0
  uint32_t l = *val;
85
0
  uint32_t result = 0;
86
0
  auto onebit = uint32_t{0x80000000u};
87
0
  for (uint32_t one = onebit; one != 0; one >>= 1) {
88
0
    if ((l & one) != 0) {
89
0
      result ^= m;
90
0
    }
91
0
    if (m & 1) {
92
0
      m = (m >> 1) ^ poly;
93
0
    } else {
94
0
      m >>= 1;
95
0
    }
96
0
  }
97
0
  *val = result;
98
0
}
99
}  // namespace
100
101
void CRCImpl::FillWordTable(uint32_t poly, uint32_t last, int word_size,
102
0
                            Uint32By256* t) {
103
0
  for (int j = 0; j != word_size; j++) {  // for each byte of extension....
104
0
    t[j][0] = 0;                          // a zero has no effect
105
0
    for (int i = 128; i != 0; i >>= 1) {  // fill in entries for powers of 2
106
0
      if (j == 0 && i == 128) {
107
0
        t[j][i] = last;  // top bit in last byte is given
108
0
      } else {
109
        // each successive power of two is derived from the previous
110
        // one, either in this table, or the last table
111
0
        uint32_t pred;
112
0
        if (i == 128) {
113
0
          pred = t[j - 1][1];
114
0
        } else {
115
0
          pred = t[j][i << 1];
116
0
        }
117
        // Advance the CRC by one bit (multiply by X, and take remainder
118
        // through one step of polynomial long division)
119
0
        if (pred & 1) {
120
0
          t[j][i] = (pred >> 1) ^ poly;
121
0
        } else {
122
0
          t[j][i] = pred >> 1;
123
0
        }
124
0
      }
125
0
    }
126
    // CRCs have the property that CRC(a xor b) == CRC(a) xor CRC(b)
127
    // so we can make all the tables for non-powers of two by
128
    // xoring previously created entries.
129
0
    for (int i = 2; i != 256; i <<= 1) {
130
0
      for (int k = i + 1; k != (i << 1); k++) {
131
0
        t[j][k] = t[j][i] ^ t[j][k - i];
132
0
      }
133
0
    }
134
0
  }
135
0
}
136
137
0
int CRCImpl::FillZeroesTable(uint32_t poly, Uint32By256* t) {
138
0
  uint32_t inc = 1;
139
0
  inc <<= 31;
140
141
  // Extend by one zero bit. We know degree > 1 so (inc & 1) == 0.
142
0
  inc >>= 1;
143
144
  // Now extend by 2, 4, and 8 bits, so now `inc` is extended by one zero byte.
145
0
  for (int i = 0; i < 3; ++i) {
146
0
    PolyMultiply(&inc, inc, poly);
147
0
  }
148
149
0
  int j = 0;
150
0
  for (uint64_t inc_len = 1; inc_len != 0; inc_len <<= ZEROES_BASE_LG) {
151
    // Every entry in the table adds an additional inc_len zeroes.
152
0
    uint32_t v = inc;
153
0
    for (int a = 1; a != ZEROES_BASE; a++) {
154
0
      t[0][j] = v;
155
0
      PolyMultiply(&v, inc, poly);
156
0
      j++;
157
0
    }
158
0
    inc = v;
159
0
  }
160
0
  ABSL_RAW_CHECK(j <= 256, "");
161
0
  return j;
162
0
}
163
164
// Internal version of the "constructor".
165
0
CRCImpl* CRCImpl::NewInternal() {
166
  // Find an accelearated implementation first.
167
0
  CRCImpl* result = TryNewCRC32AcceleratedX86ARMCombined();
168
169
  // Fall back to generic implementions if no acceleration is available.
170
0
  if (result == nullptr) {
171
0
    result = new CRC32();
172
0
  }
173
174
0
  result->InitTables();
175
176
0
  return result;
177
0
}
178
179
//  The 32-bit implementation
180
181
0
void CRC32::InitTables() {
182
  // Compute the table for extending a CRC by one byte.
183
0
  Uint32By256* t = new Uint32By256[4];
184
0
  FillWordTable(kCrc32cPoly, kCrc32cPoly, 1, t);
185
0
  for (int i = 0; i != 256; i++) {
186
0
    this->table0_[i] = t[0][i];
187
0
  }
188
189
  // Construct a table for updating the CRC by 4 bytes data followed by
190
  // 12 bytes of zeroes.
191
  //
192
  // Note: the data word size could be larger than the CRC size; it might
193
  // be slightly faster to use a 64-bit data word, but doing so doubles the
194
  // table size.
195
0
  uint32_t last = kCrc32cPoly;
196
0
  const size_t size = 12;
197
0
  for (size_t i = 0; i < size; ++i) {
198
0
    last = (last >> 8) ^ this->table0_[last & 0xff];
199
0
  }
200
0
  FillWordTable(kCrc32cPoly, last, 4, t);
201
0
  for (size_t b = 0; b < 4; ++b) {
202
0
    for (int i = 0; i < 256; ++i) {
203
0
      this->table_[b][i] = t[b][i];
204
0
    }
205
0
  }
206
207
0
  int j = FillZeroesTable(kCrc32cPoly, t);
208
0
  ABSL_RAW_CHECK(j <= static_cast<int>(ABSL_ARRAYSIZE(this->zeroes_)), "");
209
0
  for (int i = 0; i < j; i++) {
210
0
    this->zeroes_[i] = t[0][i];
211
0
  }
212
213
0
  delete[] t;
214
215
  // Build up tables for _reversing_ the operation of doing CRC operations on
216
  // zero bytes.
217
218
  // In C++, extending `crc` by a single zero bit is done by the following:
219
  // (A)  bool low_bit_set = (crc & 1);
220
  //      crc >>= 1;
221
  //      if (low_bit_set) crc ^= kCrc32cPoly;
222
  //
223
  // In particular note that the high bit of `crc` after this operation will be
224
  // set if and only if the low bit of `crc` was set before it.  This means that
225
  // no information is lost, and the operation can be reversed, as follows:
226
  // (B)  bool high_bit_set = (crc & 0x80000000u);
227
  //      if (high_bit_set) crc ^= kCrc32cPoly;
228
  //      crc <<= 1;
229
  //      if (high_bit_set) crc ^= 1;
230
  //
231
  // Or, equivalently:
232
  // (C)  bool high_bit_set = (crc & 0x80000000u);
233
  //      crc <<= 1;
234
  //      if (high_bit_set) crc ^= ((kCrc32cPoly << 1) ^ 1);
235
  //
236
  // The last observation is, if we store our checksums in variable `rcrc`,
237
  // with order of the bits reversed, the inverse operation becomes:
238
  // (D)  bool low_bit_set = (rcrc & 1);
239
  //      rcrc >>= 1;
240
  //      if (low_bit_set) rcrc ^= ReverseBits((kCrc32cPoly << 1) ^ 1)
241
  //
242
  // This is the same algorithm (A) that we started with, only with a different
243
  // polynomial bit pattern.  This means that by building up our tables with
244
  // this alternate polynomial, we can apply the CRC algorithms to a
245
  // bit-reversed CRC checksum to perform inverse zero-extension.
246
247
0
  const uint32_t kCrc32cUnextendPoly =
248
0
      ReverseBits(static_cast<uint32_t>((kCrc32cPoly << 1) ^ 1));
249
0
  FillWordTable(kCrc32cUnextendPoly, kCrc32cUnextendPoly, 1, &reverse_table0_);
250
251
0
  j = FillZeroesTable(kCrc32cUnextendPoly, &reverse_zeroes_);
252
0
  ABSL_RAW_CHECK(j <= static_cast<int>(ABSL_ARRAYSIZE(this->reverse_zeroes_)),
253
0
                 "");
254
0
}
255
256
0
void CRC32::Extend(uint32_t* crc, const void* bytes, size_t length) const {
257
0
  const uint8_t* p = static_cast<const uint8_t*>(bytes);
258
0
  const uint8_t* e = p + length;
259
0
  uint32_t l = *crc;
260
261
0
  auto step_one_byte = [this, &p, &l]() {
262
0
    int c = (l & 0xff) ^ *p++;
263
0
    l = this->table0_[c] ^ (l >> 8);
264
0
  };
265
266
0
  if (kNeedAlignedLoads) {
267
    // point x at first 4-byte aligned byte in string. this might be past the
268
    // end of the string.
269
0
    const uint8_t* x = RoundUp<4>(p);
270
0
    if (x <= e) {
271
      // Process bytes until finished or p is 4-byte aligned
272
0
      while (p != x) {
273
0
        step_one_byte();
274
0
      }
275
0
    }
276
0
  }
277
278
0
  const size_t kSwathSize = 16;
279
0
  if (static_cast<size_t>(e - p) >= kSwathSize) {
280
    // Load one swath of data into the operating buffers.
281
0
    uint32_t buf0 = absl::little_endian::Load32(p) ^ l;
282
0
    uint32_t buf1 = absl::little_endian::Load32(p + 4);
283
0
    uint32_t buf2 = absl::little_endian::Load32(p + 8);
284
0
    uint32_t buf3 = absl::little_endian::Load32(p + 12);
285
0
    p += kSwathSize;
286
287
    // Increment a CRC value by a "swath"; this combines the four bytes
288
    // starting at `ptr` and twelve zero bytes, so that four CRCs can be
289
    // built incrementally and combined at the end.
290
0
    const auto step_swath = [this](uint32_t crc_in, const std::uint8_t* ptr) {
291
0
      return absl::little_endian::Load32(ptr) ^
292
0
             this->table_[3][crc_in & 0xff] ^
293
0
             this->table_[2][(crc_in >> 8) & 0xff] ^
294
0
             this->table_[1][(crc_in >> 16) & 0xff] ^
295
0
             this->table_[0][crc_in >> 24];
296
0
    };
297
298
    // Run one CRC calculation step over all swaths in one 16-byte stride
299
0
    const auto step_stride = [&]() {
300
0
      buf0 = step_swath(buf0, p);
301
0
      buf1 = step_swath(buf1, p + 4);
302
0
      buf2 = step_swath(buf2, p + 8);
303
0
      buf3 = step_swath(buf3, p + 12);
304
0
      p += 16;
305
0
    };
306
307
    // Process kStride interleaved swaths through the data in parallel.
308
0
    while ((e - p) > kPrefetchHorizon) {
309
0
      PrefetchToLocalCacheNta(
310
0
          reinterpret_cast<const void*>(p + kPrefetchHorizon));
311
      // Process 64 bytes at a time
312
0
      step_stride();
313
0
      step_stride();
314
0
      step_stride();
315
0
      step_stride();
316
0
    }
317
0
    while (static_cast<size_t>(e - p) >= kSwathSize) {
318
0
      step_stride();
319
0
    }
320
321
    // Now advance one word at a time as far as possible. This isn't worth
322
    // doing if we have word-advance tables.
323
0
    while (static_cast<size_t>(e - p) >= 4) {
324
0
      buf0 = step_swath(buf0, p);
325
0
      uint32_t tmp = buf0;
326
0
      buf0 = buf1;
327
0
      buf1 = buf2;
328
0
      buf2 = buf3;
329
0
      buf3 = tmp;
330
0
      p += 4;
331
0
    }
332
333
    // Combine the results from the different swaths. This is just a CRC
334
    // on the data values in the bufX words.
335
0
    auto combine_one_word = [this](uint32_t crc_in, uint32_t w) {
336
0
      w ^= crc_in;
337
0
      for (size_t i = 0; i < 4; ++i) {
338
0
        w = (w >> 8) ^ this->table0_[w & 0xff];
339
0
      }
340
0
      return w;
341
0
    };
342
343
0
    l = combine_one_word(0, buf0);
344
0
    l = combine_one_word(l, buf1);
345
0
    l = combine_one_word(l, buf2);
346
0
    l = combine_one_word(l, buf3);
347
0
  }
348
349
  // Process the last few bytes
350
0
  while (p != e) {
351
0
    step_one_byte();
352
0
  }
353
354
0
  *crc = l;
355
0
}
356
357
void CRC32::ExtendByZeroesImpl(uint32_t* crc, size_t length,
358
                               const uint32_t zeroes_table[256],
359
0
                               const uint32_t poly_table[256]) {
360
0
  if (length != 0) {
361
0
    uint32_t l = *crc;
362
    // For each ZEROES_BASE_LG bits in length
363
    // (after the low-order bits have been removed)
364
    // we lookup the appropriate polynomial in the zeroes_ array
365
    // and do a polynomial long multiplication (mod the CRC polynomial)
366
    // to extend the CRC by the appropriate number of bits.
367
0
    for (int i = 0; length != 0;
368
0
         i += ZEROES_BASE - 1, length >>= ZEROES_BASE_LG) {
369
0
      int c = length & (ZEROES_BASE - 1);  // pick next ZEROES_BASE_LG bits
370
0
      if (c != 0) {                        // if they are not zero,
371
                                           // multiply by entry in table
372
        // Build a table to aid in multiplying 2 bits at a time.
373
        // It takes too long to build tables for more bits.
374
0
        uint64_t m = zeroes_table[c + i - 1];
375
0
        m <<= 1;
376
0
        uint64_t m2 = m << 1;
377
0
        uint64_t mtab[4] = {0, m, m2, m2 ^ m};
378
379
        // Do the multiply one byte at a time.
380
0
        uint64_t result = 0;
381
0
        for (int x = 0; x < 32; x += 8) {
382
          // The carry-less multiply.
383
0
          result ^= mtab[l & 3] ^ (mtab[(l >> 2) & 3] << 2) ^
384
0
                    (mtab[(l >> 4) & 3] << 4) ^ (mtab[(l >> 6) & 3] << 6);
385
0
          l >>= 8;
386
387
          // Reduce modulo the polynomial
388
0
          result = (result >> 8) ^ poly_table[result & 0xff];
389
0
        }
390
0
        l = static_cast<uint32_t>(result);
391
0
      }
392
0
    }
393
0
    *crc = l;
394
0
  }
395
0
}
396
397
0
void CRC32::ExtendByZeroes(uint32_t* crc, size_t length) const {
398
0
  return CRC32::ExtendByZeroesImpl(crc, length, zeroes_, table0_);
399
0
}
400
401
0
void CRC32::UnextendByZeroes(uint32_t* crc, size_t length) const {
402
  // See the comment in CRC32::InitTables() for an explanation of the algorithm
403
  // below.
404
0
  *crc = ReverseBits(*crc);
405
0
  ExtendByZeroesImpl(crc, length, reverse_zeroes_, reverse_table0_);
406
0
  *crc = ReverseBits(*crc);
407
0
}
408
409
0
void CRC32::Scramble(uint32_t* crc) const {
410
  // Rotate by near half the word size plus 1.  See the scramble comment in
411
  // crc_internal.h for an explanation.
412
0
  constexpr int scramble_rotate = (32 / 2) + 1;
413
0
  *crc = RotateRight<uint32_t>(static_cast<unsigned int>(*crc + kScrambleLo),
414
0
                               32, scramble_rotate) &
415
0
         MaskOfLength<uint32_t>(32);
416
0
}
417
418
0
void CRC32::Unscramble(uint32_t* crc) const {
419
0
  constexpr int scramble_rotate = (32 / 2) + 1;
420
0
  uint64_t rotated = RotateRight<uint32_t>(static_cast<unsigned int>(*crc), 32,
421
0
                                           32 - scramble_rotate);
422
0
  *crc = (rotated - kScrambleLo) & MaskOfLength<uint32_t>(32);
423
0
}
424
425
// Constructor and destructor for base class CRC.
426
0
CRC::~CRC() {}
427
0
CRC::CRC() {}
428
429
// The "constructor" for a CRC32C with a standard polynomial.
430
0
CRC* CRC::Crc32c() {
431
0
  static CRC* singleton = CRCImpl::NewInternal();
432
0
  return singleton;
433
0
}
434
435
}  // namespace crc_internal
436
ABSL_NAMESPACE_END
437
}  // namespace absl