/src/xz/src/liblzma/check/crc32_fast.c
Line | Count | Source |
1 | | // SPDX-License-Identifier: 0BSD |
2 | | |
3 | | /////////////////////////////////////////////////////////////////////////////// |
4 | | // |
5 | | /// \file crc32_fast.c |
6 | | /// \brief CRC32 calculation |
7 | | // |
8 | | // Authors: Lasse Collin |
9 | | // Ilya Kurdyukov |
10 | | // |
11 | | /////////////////////////////////////////////////////////////////////////////// |
12 | | |
13 | | #include "check.h" |
14 | | #include "crc_common.h" |
15 | | |
16 | | #if defined(CRC_X86_CLMUL) |
17 | | # define BUILDING_CRC_CLMUL 32 |
18 | | # include "crc_x86_clmul.h" |
19 | | #elif defined(CRC32_ARM64) |
20 | | # include "crc32_arm64.h" |
21 | | #elif defined(CRC32_LOONGARCH) |
22 | | # include "crc32_loongarch.h" |
23 | | #endif |
24 | | |
25 | | |
26 | | #ifdef CRC32_GENERIC |
27 | | |
28 | | /////////////////// |
29 | | // Generic CRC32 // |
30 | | /////////////////// |
31 | | |
32 | | #ifdef WORDS_BIGENDIAN |
33 | | # include "crc32_table_be.h" |
34 | | #else |
35 | | # include "crc32_table_le.h" |
36 | | #endif |
37 | | |
38 | | |
39 | | #ifdef HAVE_CRC_X86_ASM |
40 | | extern uint32_t lzma_crc32_generic( |
41 | | const uint8_t *buf, size_t size, uint32_t crc); |
42 | | #else |
43 | | static uint32_t |
44 | | lzma_crc32_generic(const uint8_t *buf, size_t size, uint32_t crc) |
45 | 0 | { |
46 | 0 | crc = ~crc; |
47 | |
|
48 | | #ifdef WORDS_BIGENDIAN |
49 | | crc = byteswap32(crc); |
50 | | #endif |
51 | |
|
52 | 0 | if (size > 8) { |
53 | | // Fix the alignment, if needed. The if statement above |
54 | | // ensures that this won't read past the end of buf[]. |
55 | 0 | while ((uintptr_t)(buf) & 7) { |
56 | 0 | crc = lzma_crc32_table[0][*buf++ ^ A(crc)] ^ S8(crc); |
57 | 0 | --size; |
58 | 0 | } |
59 | | |
60 | | // Calculate the position where to stop. |
61 | 0 | const uint8_t *const limit = buf + (size & ~(size_t)(7)); |
62 | | |
63 | | // Calculate how many bytes must be calculated separately |
64 | | // before returning the result. |
65 | 0 | size &= (size_t)(7); |
66 | | |
67 | | // Calculate the CRC32 using the slice-by-eight algorithm. |
68 | 0 | while (buf < limit) { |
69 | 0 | crc ^= aligned_read32ne(buf); |
70 | 0 | buf += 4; |
71 | |
|
72 | 0 | crc = lzma_crc32_table[7][A(crc)] |
73 | 0 | ^ lzma_crc32_table[6][B(crc)] |
74 | 0 | ^ lzma_crc32_table[5][C(crc)] |
75 | 0 | ^ lzma_crc32_table[4][D(crc)]; |
76 | |
|
77 | 0 | const uint32_t tmp = aligned_read32ne(buf); |
78 | 0 | buf += 4; |
79 | | |
80 | | // At least with some compilers, it is critical for |
81 | | // performance, that the crc variable is XORed |
82 | | // between the two table-lookup pairs. |
83 | 0 | crc = lzma_crc32_table[3][A(tmp)] |
84 | 0 | ^ lzma_crc32_table[2][B(tmp)] |
85 | 0 | ^ crc |
86 | 0 | ^ lzma_crc32_table[1][C(tmp)] |
87 | 0 | ^ lzma_crc32_table[0][D(tmp)]; |
88 | 0 | } |
89 | 0 | } |
90 | |
|
91 | 0 | while (size-- != 0) |
92 | 0 | crc = lzma_crc32_table[0][*buf++ ^ A(crc)] ^ S8(crc); |
93 | |
|
94 | | #ifdef WORDS_BIGENDIAN |
95 | | crc = byteswap32(crc); |
96 | | #endif |
97 | |
|
98 | 0 | return ~crc; |
99 | 0 | } |
100 | | #endif // HAVE_CRC_X86_ASM |
101 | | #endif // CRC32_GENERIC |
102 | | |
103 | | |
104 | | #if defined(CRC32_GENERIC) && defined(CRC32_ARCH_OPTIMIZED) |
105 | | |
106 | | ////////////////////////// |
107 | | // Function dispatching // |
108 | | ////////////////////////// |
109 | | |
110 | | // If both the generic and arch-optimized implementations are built, then |
111 | | // the function to use is selected at runtime because the system running |
112 | | // the binary might not have the arch-specific instruction set extension(s) |
113 | | // available. The dispatch methods in order of priority: |
114 | | // |
115 | | // 1. Constructor. This method uses __attribute__((__constructor__)) to |
116 | | // set crc32_func at load time. This avoids extra computation (and any |
117 | | // unlikely threading bugs) on the first call to lzma_crc32() to decide |
118 | | // which implementation should be used. |
119 | | // |
120 | | // 2. First Call Resolution. On the very first call to lzma_crc32(), the |
121 | | // call will be directed to crc32_dispatch() instead. This will set the |
122 | | // appropriate implementation function and will not be called again. |
123 | | // This method does not use any kind of locking but is safe because if |
124 | | // multiple threads run the dispatcher simultaneously then they will all |
125 | | // set crc32_func to the same value. |
126 | | |
127 | | typedef uint32_t (*crc32_func_type)( |
128 | | const uint8_t *buf, size_t size, uint32_t crc); |
129 | | |
130 | | // This resolver is shared between all dispatch methods. |
131 | | static crc32_func_type |
132 | | crc32_resolve(void) |
133 | 294 | { |
134 | 294 | return is_arch_extension_supported() |
135 | 294 | ? &crc32_arch_optimized : &lzma_crc32_generic; |
136 | 294 | } |
137 | | |
138 | | |
139 | | #ifdef HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR |
140 | | // Constructor method. |
141 | | # define CRC32_SET_FUNC_ATTR __attribute__((__constructor__)) |
142 | | static crc32_func_type crc32_func; |
143 | | #else |
144 | | // First Call Resolution method. |
145 | | # define CRC32_SET_FUNC_ATTR |
146 | | static uint32_t crc32_dispatch(const uint8_t *buf, size_t size, uint32_t crc); |
147 | | static crc32_func_type crc32_func = &crc32_dispatch; |
148 | | #endif |
149 | | |
150 | | CRC32_SET_FUNC_ATTR |
151 | | static void |
152 | | crc32_set_func(void) |
153 | 294 | { |
154 | 294 | crc32_func = crc32_resolve(); |
155 | 294 | return; |
156 | 294 | } |
157 | | |
158 | | #ifndef HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR |
159 | | static uint32_t |
160 | | crc32_dispatch(const uint8_t *buf, size_t size, uint32_t crc) |
161 | | { |
162 | | // When __attribute__((__constructor__)) isn't supported, set the |
163 | | // function pointer without any locking. If multiple threads run |
164 | | // the detection code in parallel, they will all end up setting |
165 | | // the pointer to the same value. This avoids the use of |
166 | | // mythread_once() on every call to lzma_crc32() but this likely |
167 | | // isn't strictly standards compliant. Let's change it if it breaks. |
168 | | crc32_set_func(); |
169 | | return crc32_func(buf, size, crc); |
170 | | } |
171 | | |
172 | | #endif |
173 | | #endif |
174 | | |
175 | | |
176 | | extern LZMA_API(uint32_t) |
177 | | lzma_crc32(const uint8_t *buf, size_t size, uint32_t crc) |
178 | 67.3k | { |
179 | 67.3k | #if defined(CRC32_GENERIC) && defined(CRC32_ARCH_OPTIMIZED) |
180 | | /* |
181 | | #ifndef HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR |
182 | | // See crc32_dispatch(). This would be the alternative which uses |
183 | | // locking and doesn't use crc32_dispatch(). Note that on Windows |
184 | | // this method needs Vista threads. |
185 | | mythread_once(crc64_set_func); |
186 | | #endif |
187 | | */ |
188 | 67.3k | return crc32_func(buf, size, crc); |
189 | | |
190 | | #elif defined(CRC32_ARCH_OPTIMIZED) |
191 | | return crc32_arch_optimized(buf, size, crc); |
192 | | |
193 | | #else |
194 | | return lzma_crc32_generic(buf, size, crc); |
195 | | #endif |
196 | 67.3k | } |