/src/abseil-cpp/absl/crc/internal/crc_internal.h
Line | Count | Source |
1 | | // Copyright 2022 The Abseil Authors. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // https://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | |
15 | | #ifndef ABSL_CRC_INTERNAL_CRC_INTERNAL_H_ |
16 | | #define ABSL_CRC_INTERNAL_CRC_INTERNAL_H_ |
17 | | |
18 | | #include <cstdint> |
19 | | #include <memory> |
20 | | #include <vector> |
21 | | |
22 | | #include "absl/base/internal/raw_logging.h" |
23 | | #include "absl/crc/internal/crc.h" |
24 | | |
25 | | namespace absl { |
26 | | ABSL_NAMESPACE_BEGIN |
27 | | |
28 | | namespace crc_internal { |
29 | | |
30 | | // Prefetch constants used in some Extend() implementations |
31 | | constexpr int kPrefetchHorizon = ABSL_CACHELINE_SIZE * 4; // Prefetch this far |
32 | | // Shorter prefetch distance for smaller buffers |
33 | | constexpr int kPrefetchHorizonMedium = ABSL_CACHELINE_SIZE * 1; |
34 | | static_assert(kPrefetchHorizon >= 64, "CRCPrefetchHorizon less than loop len"); |
35 | | |
36 | | // We require the Scramble() function: |
37 | | // - to be reversible (Unscramble() must exist) |
38 | | // - to be non-linear in the polynomial's Galois field (so the CRC of a |
39 | | // scrambled CRC is not linearly affected by the scrambled CRC, even if |
40 | | // using the same polynomial) |
41 | | // - not to be its own inverse. Preferably, if X=Scramble^N(X) and N!=0, then |
42 | | // N is large. |
43 | | // - to be fast. |
44 | | // - not to change once defined. |
45 | | // We introduce non-linearity in two ways: |
46 | | // Addition of a constant. |
47 | | // - The carries introduce non-linearity; we use bits of an irrational |
48 | | // (phi) to make it unlikely that we introduce no carries. |
49 | | // Rotate by a constant number of bits. |
50 | | // - We use floor(degree/2)+1, which does not divide the degree, and |
51 | | // splits the bits nearly evenly, which makes it less likely the |
52 | | // halves will be the same or one will be all zeroes. |
53 | | // We do both things to improve the chances of non-linearity in the face of |
54 | | // bit patterns with low numbers of bits set, while still being fast. |
55 | | // Below is the constant that we add. The bits are the first 128 bits of the |
56 | | // fractional part of phi, with a 1 ored into the bottom bit to maximize the |
57 | | // cycle length of repeated adds. |
58 | | constexpr uint64_t kScrambleHi = (static_cast<uint64_t>(0x4f1bbcdcU) << 32) | |
59 | | static_cast<uint64_t>(0xbfa53e0aU); |
60 | | constexpr uint64_t kScrambleLo = (static_cast<uint64_t>(0xf9ce6030U) << 32) | |
61 | | static_cast<uint64_t>(0x2e76e41bU); |
62 | | |
63 | | class CRCImpl : public CRC { // Implementation of the abstract class CRC |
64 | | public: |
65 | | using Uint32By256 = uint32_t[256]; |
66 | | |
67 | 0 | CRCImpl() = default; |
68 | | ~CRCImpl() override = default; |
69 | | |
70 | | // The internal version of CRC::New(). |
71 | | static CRCImpl* NewInternal(); |
72 | | |
73 | | // Fill in a table for updating a CRC by one word of 'word_size' bytes |
74 | | // [last_lo, last_hi] contains the answer if the last bit in the word |
75 | | // is set. |
76 | | static void FillWordTable(uint32_t poly, uint32_t last, int word_size, |
77 | | Uint32By256* t); |
78 | | |
79 | | // Build the table for extending by zeroes, returning the number of entries. |
80 | | // For a in {1, 2, ..., ZEROES_BASE-1}, b in {0, 1, 2, 3, ...}, |
81 | | // entry j=a-1+(ZEROES_BASE-1)*b |
82 | | // contains a polynomial Pi such that multiplying |
83 | | // a CRC by Pi mod P, where P is the CRC polynomial, is equivalent to |
84 | | // appending a*2**(ZEROES_BASE_LG*b) zero bytes to the original string. |
85 | | static int FillZeroesTable(uint32_t poly, Uint32By256* t); |
86 | | |
87 | | virtual void InitTables() = 0; |
88 | | |
89 | | private: |
90 | | CRCImpl(const CRCImpl&) = delete; |
91 | | CRCImpl& operator=(const CRCImpl&) = delete; |
92 | | }; |
93 | | |
94 | | // This is the 32-bit implementation. It handles all sizes from 8 to 32. |
95 | | class CRC32 : public CRCImpl { |
96 | | public: |
97 | 0 | CRC32() = default; |
98 | | ~CRC32() override = default; |
99 | | |
100 | | void Extend(uint32_t* crc, const void* bytes, size_t length) const override; |
101 | | void ExtendByZeroes(uint32_t* crc, size_t length) const override; |
102 | | void Scramble(uint32_t* crc) const override; |
103 | | void Unscramble(uint32_t* crc) const override; |
104 | | void UnextendByZeroes(uint32_t* crc, size_t length) const override; |
105 | | |
106 | | void InitTables() override; |
107 | | |
108 | | private: |
109 | | // Common implementation guts for ExtendByZeroes and UnextendByZeroes(). |
110 | | // |
111 | | // zeroes_table is a table as returned by FillZeroesTable(), containing |
112 | | // polynomials representing CRCs of strings-of-zeros of various lengths, |
113 | | // and which can be combined by polynomial multiplication. poly_table is |
114 | | // a table of CRC byte extension values. These tables are determined by |
115 | | // the generator polynomial. |
116 | | // |
117 | | // These will be set to reverse_zeroes_ and reverse_table0_ for Unextend, and |
118 | | // CRC32::zeroes_ and CRC32::table0_ for Extend. |
119 | | static void ExtendByZeroesImpl(uint32_t* crc, size_t length, |
120 | | const uint32_t zeroes_table[256], |
121 | | const uint32_t poly_table[256]); |
122 | | |
123 | | uint32_t table0_[256]; // table of byte extensions |
124 | | uint32_t zeroes_[256]; // table of zero extensions |
125 | | |
126 | | // table of 4-byte extensions shifted by 12 bytes of zeroes |
127 | | uint32_t table_[4][256]; |
128 | | |
129 | | // Reverse lookup tables, using the alternate polynomial used by |
130 | | // UnextendByZeroes(). |
131 | | uint32_t reverse_table0_[256]; // table of reverse byte extensions |
132 | | uint32_t reverse_zeroes_[256]; // table of reverse zero extensions |
133 | | |
134 | | CRC32(const CRC32&) = delete; |
135 | | CRC32& operator=(const CRC32&) = delete; |
136 | | }; |
137 | | |
138 | | // Helpers |
139 | | |
140 | | // Return a bit mask containing len 1-bits. |
141 | | // Requires 0 < len <= sizeof(T) |
142 | | template <typename T> |
143 | 0 | T MaskOfLength(int len) { |
144 | | // shift 2 by len-1 rather than 1 by len because shifts of wordsize |
145 | | // are undefined. |
146 | 0 | return (T(2) << (len - 1)) - 1; |
147 | 0 | } |
148 | | |
149 | | // Rotate low-order "width" bits of "in" right by "r" bits, |
150 | | // setting other bits in word to arbitrary values. |
151 | | template <typename T> |
152 | 0 | T RotateRight(T in, int width, int r) { |
153 | 0 | return (in << (width - r)) | ((in >> r) & MaskOfLength<T>(width - r)); |
154 | 0 | } |
155 | | |
156 | | // RoundUp<N>(p) returns the lowest address >= p aligned to an N-byte |
157 | | // boundary. Requires that N is a power of 2. |
158 | | template <int alignment> |
159 | 0 | const uint8_t* RoundUp(const uint8_t* p) { |
160 | 0 | static_assert((alignment & (alignment - 1)) == 0, "alignment is not 2^n"); |
161 | 0 | constexpr uintptr_t mask = alignment - 1; |
162 | 0 | const uintptr_t as_uintptr = reinterpret_cast<uintptr_t>(p); |
163 | 0 | return reinterpret_cast<const uint8_t*>((as_uintptr + mask) & ~mask); |
164 | 0 | } |
165 | | |
166 | | // Return a newly created CRC32AcceleratedX86ARMCombined if we can use Intel's |
167 | | // or ARM's CRC acceleration for a given polynomial. Return nullptr otherwise. |
168 | | CRCImpl* TryNewCRC32AcceleratedX86ARMCombined(); |
169 | | |
170 | | // Return all possible hardware accelerated implementations. For testing only. |
171 | | std::vector<std::unique_ptr<CRCImpl>> NewCRC32AcceleratedX86ARMCombinedAll(); |
172 | | |
173 | | } // namespace crc_internal |
174 | | ABSL_NAMESPACE_END |
175 | | } // namespace absl |
176 | | |
177 | | #endif // ABSL_CRC_INTERNAL_CRC_INTERNAL_H_ |