/src/libdeflate/lib/matchfinder_common.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * matchfinder_common.h - common code for Lempel-Ziv matchfinding |
3 | | */ |
4 | | |
5 | | #ifndef LIB_MATCHFINDER_COMMON_H |
6 | | #define LIB_MATCHFINDER_COMMON_H |
7 | | |
8 | | #include "lib_common.h" |
9 | | |
10 | | #ifndef MATCHFINDER_WINDOW_ORDER |
11 | | # error "MATCHFINDER_WINDOW_ORDER must be defined!" |
12 | | #endif |
13 | | |
14 | | /* |
15 | | * Given a 32-bit value that was loaded with the platform's native endianness, |
16 | | * return a 32-bit value whose high-order 8 bits are 0 and whose low-order 24 |
17 | | * bits contain the first 3 bytes, arranged in octets in a platform-dependent |
18 | | * order, at the memory location from which the input 32-bit value was loaded. |
19 | | */ |
20 | | static forceinline u32 |
21 | | loaded_u32_to_u24(u32 v) |
22 | 0 | { |
23 | 0 | if (CPU_IS_LITTLE_ENDIAN()) |
24 | 0 | return v & 0xFFFFFF; |
25 | 0 | else |
26 | 0 | return v >> 8; |
27 | 0 | } |
28 | | |
29 | | /* |
30 | | * Load the next 3 bytes from @p into the 24 low-order bits of a 32-bit value. |
31 | | * The order in which the 3 bytes will be arranged as octets in the 24 bits is |
32 | | * platform-dependent. At least 4 bytes (not 3) must be available at @p. |
33 | | */ |
34 | | static forceinline u32 |
35 | | load_u24_unaligned(const u8 *p) |
36 | 0 | { |
37 | 0 | #if UNALIGNED_ACCESS_IS_FAST |
38 | 0 | return loaded_u32_to_u24(load_u32_unaligned(p)); |
39 | | #else |
40 | | if (CPU_IS_LITTLE_ENDIAN()) |
41 | | return ((u32)p[0] << 0) | ((u32)p[1] << 8) | ((u32)p[2] << 16); |
42 | | else |
43 | | return ((u32)p[2] << 0) | ((u32)p[1] << 8) | ((u32)p[0] << 16); |
44 | | #endif |
45 | 0 | } |
46 | | |
47 | 0 | #define MATCHFINDER_WINDOW_SIZE (1UL << MATCHFINDER_WINDOW_ORDER) |
48 | | |
49 | | typedef s16 mf_pos_t; |
50 | | |
51 | 0 | #define MATCHFINDER_INITVAL ((mf_pos_t)-MATCHFINDER_WINDOW_SIZE) |
52 | | |
53 | | /* |
54 | | * This is the memory address alignment, in bytes, required for the matchfinder |
55 | | * buffers by the architecture-specific implementations of matchfinder_init() |
56 | | * and matchfinder_rebase(). "Matchfinder buffer" means an entire struct |
57 | | * hc_matchfinder, bt_matchfinder, or ht_matchfinder; the next_tab field of |
58 | | * struct hc_matchfinder; or the child_tab field of struct bt_matchfinder. |
59 | | * |
60 | | * This affects how the entire 'struct deflate_compressor' is allocated, since |
61 | | * the matchfinder structures are embedded inside it. |
62 | | * |
63 | | * Currently the maximum memory address alignment required is 32 bytes, needed |
64 | | * by the AVX-2 matchfinder functions. |
65 | | */ |
66 | 0 | #define MATCHFINDER_MEM_ALIGNMENT 32 |
67 | | |
68 | | /* |
69 | | * This declares a size, in bytes, that is guaranteed to divide the sizes of the |
70 | | * matchfinder buffers (where "matchfinder buffers" is as defined for |
71 | | * MATCHFINDER_MEM_ALIGNMENT). The architecture-specific implementations of |
72 | | * matchfinder_init() and matchfinder_rebase() take advantage of this value. |
73 | | * |
74 | | * Currently the maximum size alignment required is 128 bytes, needed by |
75 | | * the AVX-2 matchfinder functions. However, the RISC-V Vector Extension |
76 | | * matchfinder functions can, in principle, take advantage of a larger size |
77 | | * alignment. Therefore, we set this to 1024, which still easily divides the |
78 | | * actual sizes that result from the current matchfinder struct definitions. |
79 | | * This value can safely be changed to any power of two that is >= 128. |
80 | | */ |
81 | | #define MATCHFINDER_SIZE_ALIGNMENT 1024 |
82 | | |
83 | | #undef matchfinder_init |
84 | | #undef matchfinder_rebase |
85 | | #ifdef _aligned_attribute |
86 | | # define MATCHFINDER_ALIGNED _aligned_attribute(MATCHFINDER_MEM_ALIGNMENT) |
87 | | # if defined(ARCH_ARM32) || defined(ARCH_ARM64) |
88 | | # include "arm/matchfinder_impl.h" |
89 | | # elif defined(ARCH_RISCV) |
90 | | # include "riscv/matchfinder_impl.h" |
91 | | # elif defined(ARCH_X86_32) || defined(ARCH_X86_64) |
92 | | # include "x86/matchfinder_impl.h" |
93 | | # endif |
94 | | #else |
95 | | # define MATCHFINDER_ALIGNED |
96 | | #endif |
97 | | |
98 | | /* |
99 | | * Initialize the hash table portion of the matchfinder. |
100 | | * |
101 | | * Essentially, this is an optimized memset(). |
102 | | * |
103 | | * 'data' must be aligned to a MATCHFINDER_MEM_ALIGNMENT boundary, and |
104 | | * 'size' must be a multiple of MATCHFINDER_SIZE_ALIGNMENT. |
105 | | */ |
106 | | #ifndef matchfinder_init |
107 | | static forceinline void |
108 | | matchfinder_init(mf_pos_t *data, size_t size) |
109 | | { |
110 | | size_t num_entries = size / sizeof(*data); |
111 | | size_t i; |
112 | | |
113 | | for (i = 0; i < num_entries; i++) |
114 | | data[i] = MATCHFINDER_INITVAL; |
115 | | } |
116 | | #endif |
117 | | |
118 | | /* |
119 | | * Slide the matchfinder by MATCHFINDER_WINDOW_SIZE bytes. |
120 | | * |
121 | | * This must be called just after each MATCHFINDER_WINDOW_SIZE bytes have been |
122 | | * run through the matchfinder. |
123 | | * |
124 | | * This subtracts MATCHFINDER_WINDOW_SIZE bytes from each entry in the given |
125 | | * array, making the entries be relative to the current position rather than the |
126 | | * position MATCHFINDER_WINDOW_SIZE bytes prior. To avoid integer underflows, |
127 | | * entries that would become less than -MATCHFINDER_WINDOW_SIZE stay at |
128 | | * -MATCHFINDER_WINDOW_SIZE, keeping them permanently out of bounds. |
129 | | * |
130 | | * The given array must contain all matchfinder data that is position-relative: |
131 | | * the hash table(s) as well as any hash chain or binary tree links. Its |
132 | | * address must be aligned to a MATCHFINDER_MEM_ALIGNMENT boundary, and its size |
133 | | * must be a multiple of MATCHFINDER_SIZE_ALIGNMENT. |
134 | | */ |
135 | | #ifndef matchfinder_rebase |
136 | | static forceinline void |
137 | | matchfinder_rebase(mf_pos_t *data, size_t size) |
138 | | { |
139 | | size_t num_entries = size / sizeof(*data); |
140 | | size_t i; |
141 | | |
142 | | if (MATCHFINDER_WINDOW_SIZE == 32768) { |
143 | | /* |
144 | | * Branchless version for 32768-byte windows. Clear all bits if |
145 | | * the value was already negative, then set the sign bit. This |
146 | | * is equivalent to subtracting 32768 with signed saturation. |
147 | | */ |
148 | | for (i = 0; i < num_entries; i++) |
149 | | data[i] = 0x8000 | (data[i] & ~(data[i] >> 15)); |
150 | | } else { |
151 | | for (i = 0; i < num_entries; i++) { |
152 | | if (data[i] >= 0) |
153 | | data[i] -= (mf_pos_t)-MATCHFINDER_WINDOW_SIZE; |
154 | | else |
155 | | data[i] = (mf_pos_t)-MATCHFINDER_WINDOW_SIZE; |
156 | | } |
157 | | } |
158 | | } |
159 | | #endif |
160 | | |
161 | | /* |
162 | | * The hash function: given a sequence prefix held in the low-order bits of a |
163 | | * 32-bit value, multiply by a carefully-chosen large constant. Discard any |
164 | | * bits of the product that don't fit in a 32-bit value, but take the |
165 | | * next-highest @num_bits bits of the product as the hash value, as those have |
166 | | * the most randomness. |
167 | | */ |
168 | | static forceinline u32 |
169 | | lz_hash(u32 seq, unsigned num_bits) |
170 | 0 | { |
171 | 0 | return (u32)(seq * 0x1E35A7BD) >> (32 - num_bits); |
172 | 0 | } |
173 | | |
174 | | /* |
175 | | * Return the number of bytes at @matchptr that match the bytes at @strptr, up |
176 | | * to a maximum of @max_len. Initially, @start_len bytes are matched. |
177 | | */ |
178 | | static forceinline u32 |
179 | | lz_extend(const u8 * const strptr, const u8 * const matchptr, |
180 | | const u32 start_len, const u32 max_len) |
181 | 0 | { |
182 | 0 | u32 len = start_len; |
183 | 0 | machine_word_t v_word; |
184 | |
|
185 | 0 | if (UNALIGNED_ACCESS_IS_FAST) { |
186 | |
|
187 | 0 | if (likely(max_len - len >= 4 * WORDBYTES)) { |
188 | |
|
189 | 0 | #define COMPARE_WORD_STEP \ |
190 | 0 | v_word = load_word_unaligned(&matchptr[len]) ^ \ |
191 | 0 | load_word_unaligned(&strptr[len]); \ |
192 | 0 | if (v_word != 0) \ |
193 | 0 | goto word_differs; \ |
194 | 0 | len += WORDBYTES; \ |
195 | 0 |
|
196 | 0 | COMPARE_WORD_STEP |
197 | 0 | COMPARE_WORD_STEP |
198 | 0 | COMPARE_WORD_STEP |
199 | 0 | COMPARE_WORD_STEP |
200 | 0 | #undef COMPARE_WORD_STEP |
201 | 0 | } |
202 | | |
203 | 0 | while (len + WORDBYTES <= max_len) { |
204 | 0 | v_word = load_word_unaligned(&matchptr[len]) ^ |
205 | 0 | load_word_unaligned(&strptr[len]); |
206 | 0 | if (v_word != 0) |
207 | 0 | goto word_differs; |
208 | 0 | len += WORDBYTES; |
209 | 0 | } |
210 | 0 | } |
211 | | |
212 | 0 | while (len < max_len && matchptr[len] == strptr[len]) |
213 | 0 | len++; |
214 | 0 | return len; |
215 | | |
216 | 0 | word_differs: |
217 | 0 | if (CPU_IS_LITTLE_ENDIAN()) |
218 | 0 | len += (bsfw(v_word) >> 3); |
219 | 0 | else |
220 | 0 | len += (WORDBITS - 1 - bsrw(v_word)) >> 3; |
221 | 0 | return len; |
222 | 0 | } |
223 | | |
224 | | #endif /* LIB_MATCHFINDER_COMMON_H */ |