/src/libdeflate/lib/bt_matchfinder.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * bt_matchfinder.h - Lempel-Ziv matchfinding with a hash table of binary trees |
3 | | * |
4 | | * Copyright 2016 Eric Biggers |
5 | | * |
6 | | * Permission is hereby granted, free of charge, to any person |
7 | | * obtaining a copy of this software and associated documentation |
8 | | * files (the "Software"), to deal in the Software without |
9 | | * restriction, including without limitation the rights to use, |
10 | | * copy, modify, merge, publish, distribute, sublicense, and/or sell |
11 | | * copies of the Software, and to permit persons to whom the |
12 | | * Software is furnished to do so, subject to the following |
13 | | * conditions: |
14 | | * |
15 | | * The above copyright notice and this permission notice shall be |
16 | | * included in all copies or substantial portions of the Software. |
17 | | * |
18 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
19 | | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES |
20 | | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
21 | | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT |
22 | | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
23 | | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
24 | | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
25 | | * OTHER DEALINGS IN THE SOFTWARE. |
26 | | * |
27 | | * ---------------------------------------------------------------------------- |
28 | | * |
29 | | * This is a Binary Trees (bt) based matchfinder. |
30 | | * |
31 | | * The main data structure is a hash table where each hash bucket contains a |
32 | | * binary tree of sequences whose first 4 bytes share the same hash code. Each |
33 | | * sequence is identified by its starting position in the input buffer. Each |
34 | | * binary tree is always sorted such that each left child represents a sequence |
35 | | * lexicographically lesser than its parent and each right child represents a |
36 | | * sequence lexicographically greater than its parent. |
37 | | * |
38 | | * The algorithm processes the input buffer sequentially. At each byte |
39 | | * position, the hash code of the first 4 bytes of the sequence beginning at |
40 | | * that position (the sequence being matched against) is computed. This |
41 | | * identifies the hash bucket to use for that position. Then, a new binary tree |
42 | | * node is created to represent the current sequence. Then, in a single tree |
43 | | * traversal, the hash bucket's binary tree is searched for matches and is |
44 | | * re-rooted at the new node. |
45 | | * |
46 | | * Compared to the simpler algorithm that uses linked lists instead of binary |
47 | | * trees (see hc_matchfinder.h), the binary tree version gains more information |
48 | | * at each node visitation. Ideally, the binary tree version will examine only |
49 | | * 'log(n)' nodes to find the same matches that the linked list version will |
50 | | * find by examining 'n' nodes. In addition, the binary tree version can |
51 | | * examine fewer bytes at each node by taking advantage of the common prefixes |
52 | | * that result from the sort order, whereas the linked list version may have to |
53 | | * examine up to the full length of the match at each node. |
54 | | * |
55 | | * However, it is not always best to use the binary tree version. It requires |
56 | | * nearly twice as much memory as the linked list version, and it takes time to |
57 | | * keep the binary trees sorted, even at positions where the compressor does not |
58 | | * need matches. Generally, when doing fast compression on small buffers, |
59 | | * binary trees are the wrong approach. They are best suited for thorough |
60 | | * compression and/or large buffers. |
61 | | * |
62 | | * ---------------------------------------------------------------------------- |
63 | | */ |
64 | | |
65 | | #ifndef LIB_BT_MATCHFINDER_H |
66 | | #define LIB_BT_MATCHFINDER_H |
67 | | |
68 | | #include "matchfinder_common.h" |
69 | | |
70 | 0 | #define BT_MATCHFINDER_HASH3_ORDER 16 |
71 | 0 | #define BT_MATCHFINDER_HASH3_WAYS 2 |
72 | 0 | #define BT_MATCHFINDER_HASH4_ORDER 16 |
73 | | |
74 | | #define BT_MATCHFINDER_TOTAL_HASH_SIZE \ |
75 | 0 | (((1UL << BT_MATCHFINDER_HASH3_ORDER) * BT_MATCHFINDER_HASH3_WAYS + \ |
76 | 0 | (1UL << BT_MATCHFINDER_HASH4_ORDER)) * sizeof(mf_pos_t)) |
77 | | |
78 | | /* Representation of a match found by the bt_matchfinder */ |
79 | | struct lz_match { |
80 | | |
81 | | /* The number of bytes matched. */ |
82 | | u16 length; |
83 | | |
84 | | /* The offset back from the current position that was matched. */ |
85 | | u16 offset; |
86 | | }; |
87 | | |
88 | | struct MATCHFINDER_ALIGNED bt_matchfinder { |
89 | | |
90 | | /* The hash table for finding length 3 matches */ |
91 | | mf_pos_t hash3_tab[1UL << BT_MATCHFINDER_HASH3_ORDER][BT_MATCHFINDER_HASH3_WAYS]; |
92 | | |
93 | | /* The hash table which contains the roots of the binary trees for |
94 | | * finding length 4+ matches */ |
95 | | mf_pos_t hash4_tab[1UL << BT_MATCHFINDER_HASH4_ORDER]; |
96 | | |
97 | | /* The child node references for the binary trees. The left and right |
98 | | * children of the node for the sequence with position 'pos' are |
99 | | * 'child_tab[pos * 2]' and 'child_tab[pos * 2 + 1]', respectively. */ |
100 | | mf_pos_t child_tab[2UL * MATCHFINDER_WINDOW_SIZE]; |
101 | | }; |
102 | | |
103 | | /* Prepare the matchfinder for a new input buffer. */ |
104 | | static forceinline void |
105 | | bt_matchfinder_init(struct bt_matchfinder *mf) |
106 | 0 | { |
107 | 0 | STATIC_ASSERT(BT_MATCHFINDER_TOTAL_HASH_SIZE % |
108 | 0 | MATCHFINDER_SIZE_ALIGNMENT == 0); |
109 | |
|
110 | 0 | matchfinder_init((mf_pos_t *)mf, BT_MATCHFINDER_TOTAL_HASH_SIZE); |
111 | 0 | } |
112 | | |
113 | | static forceinline void |
114 | | bt_matchfinder_slide_window(struct bt_matchfinder *mf) |
115 | 0 | { |
116 | 0 | STATIC_ASSERT(sizeof(*mf) % MATCHFINDER_SIZE_ALIGNMENT == 0); |
117 | |
|
118 | 0 | matchfinder_rebase((mf_pos_t *)mf, sizeof(*mf)); |
119 | 0 | } |
120 | | |
121 | | static forceinline mf_pos_t * |
122 | | bt_left_child(struct bt_matchfinder *mf, s32 node) |
123 | 0 | { |
124 | 0 | return &mf->child_tab[2 * (node & (MATCHFINDER_WINDOW_SIZE - 1)) + 0]; |
125 | 0 | } |
126 | | |
127 | | static forceinline mf_pos_t * |
128 | | bt_right_child(struct bt_matchfinder *mf, s32 node) |
129 | 0 | { |
130 | 0 | return &mf->child_tab[2 * (node & (MATCHFINDER_WINDOW_SIZE - 1)) + 1]; |
131 | 0 | } |
132 | | |
133 | | /* The minimum permissible value of 'max_len' for bt_matchfinder_get_matches() |
134 | | * and bt_matchfinder_skip_byte(). There must be sufficiently many bytes |
135 | | * remaining to load a 32-bit integer from the *next* position. */ |
136 | 0 | #define BT_MATCHFINDER_REQUIRED_NBYTES 5 |
137 | | |
138 | | /* Advance the binary tree matchfinder by one byte, optionally recording |
139 | | * matches. @record_matches should be a compile-time constant. */ |
140 | | static forceinline struct lz_match * |
141 | | bt_matchfinder_advance_one_byte(struct bt_matchfinder * const mf, |
142 | | const u8 * const in_base, |
143 | | const ptrdiff_t cur_pos, |
144 | | const u32 max_len, |
145 | | const u32 nice_len, |
146 | | const u32 max_search_depth, |
147 | | u32 * const next_hashes, |
148 | | struct lz_match *lz_matchptr, |
149 | | const bool record_matches) |
150 | 0 | { |
151 | 0 | const u8 *in_next = in_base + cur_pos; |
152 | 0 | u32 depth_remaining = max_search_depth; |
153 | 0 | const s32 cutoff = cur_pos - MATCHFINDER_WINDOW_SIZE; |
154 | 0 | u32 next_hashseq; |
155 | 0 | u32 hash3; |
156 | 0 | u32 hash4; |
157 | 0 | s32 cur_node; |
158 | 0 | #if BT_MATCHFINDER_HASH3_WAYS >= 2 |
159 | 0 | s32 cur_node_2; |
160 | 0 | #endif |
161 | 0 | const u8 *matchptr; |
162 | 0 | mf_pos_t *pending_lt_ptr, *pending_gt_ptr; |
163 | 0 | u32 best_lt_len, best_gt_len; |
164 | 0 | u32 len; |
165 | 0 | u32 best_len = 3; |
166 | |
|
167 | 0 | STATIC_ASSERT(BT_MATCHFINDER_HASH3_WAYS >= 1 && |
168 | 0 | BT_MATCHFINDER_HASH3_WAYS <= 2); |
169 | |
|
170 | 0 | next_hashseq = get_unaligned_le32(in_next + 1); |
171 | |
|
172 | 0 | hash3 = next_hashes[0]; |
173 | 0 | hash4 = next_hashes[1]; |
174 | |
|
175 | 0 | next_hashes[0] = lz_hash(next_hashseq & 0xFFFFFF, BT_MATCHFINDER_HASH3_ORDER); |
176 | 0 | next_hashes[1] = lz_hash(next_hashseq, BT_MATCHFINDER_HASH4_ORDER); |
177 | 0 | prefetchw(&mf->hash3_tab[next_hashes[0]]); |
178 | 0 | prefetchw(&mf->hash4_tab[next_hashes[1]]); |
179 | |
|
180 | 0 | cur_node = mf->hash3_tab[hash3][0]; |
181 | 0 | mf->hash3_tab[hash3][0] = cur_pos; |
182 | 0 | #if BT_MATCHFINDER_HASH3_WAYS >= 2 |
183 | 0 | cur_node_2 = mf->hash3_tab[hash3][1]; |
184 | 0 | mf->hash3_tab[hash3][1] = cur_node; |
185 | 0 | #endif |
186 | 0 | if (record_matches && cur_node > cutoff) { |
187 | 0 | u32 seq3 = load_u24_unaligned(in_next); |
188 | 0 | if (seq3 == load_u24_unaligned(&in_base[cur_node])) { |
189 | 0 | lz_matchptr->length = 3; |
190 | 0 | lz_matchptr->offset = in_next - &in_base[cur_node]; |
191 | 0 | lz_matchptr++; |
192 | 0 | } |
193 | 0 | #if BT_MATCHFINDER_HASH3_WAYS >= 2 |
194 | 0 | else if (cur_node_2 > cutoff && |
195 | 0 | seq3 == load_u24_unaligned(&in_base[cur_node_2])) |
196 | 0 | { |
197 | 0 | lz_matchptr->length = 3; |
198 | 0 | lz_matchptr->offset = in_next - &in_base[cur_node_2]; |
199 | 0 | lz_matchptr++; |
200 | 0 | } |
201 | 0 | #endif |
202 | 0 | } |
203 | |
|
204 | 0 | cur_node = mf->hash4_tab[hash4]; |
205 | 0 | mf->hash4_tab[hash4] = cur_pos; |
206 | |
|
207 | 0 | pending_lt_ptr = bt_left_child(mf, cur_pos); |
208 | 0 | pending_gt_ptr = bt_right_child(mf, cur_pos); |
209 | |
|
210 | 0 | if (cur_node <= cutoff) { |
211 | 0 | *pending_lt_ptr = MATCHFINDER_INITVAL; |
212 | 0 | *pending_gt_ptr = MATCHFINDER_INITVAL; |
213 | 0 | return lz_matchptr; |
214 | 0 | } |
215 | | |
216 | 0 | best_lt_len = 0; |
217 | 0 | best_gt_len = 0; |
218 | 0 | len = 0; |
219 | |
|
220 | 0 | for (;;) { |
221 | 0 | matchptr = &in_base[cur_node]; |
222 | |
|
223 | 0 | if (matchptr[len] == in_next[len]) { |
224 | 0 | len = lz_extend(in_next, matchptr, len + 1, max_len); |
225 | 0 | if (!record_matches || len > best_len) { |
226 | 0 | if (record_matches) { |
227 | 0 | best_len = len; |
228 | 0 | lz_matchptr->length = len; |
229 | 0 | lz_matchptr->offset = in_next - matchptr; |
230 | 0 | lz_matchptr++; |
231 | 0 | } |
232 | 0 | if (len >= nice_len) { |
233 | 0 | *pending_lt_ptr = *bt_left_child(mf, cur_node); |
234 | 0 | *pending_gt_ptr = *bt_right_child(mf, cur_node); |
235 | 0 | return lz_matchptr; |
236 | 0 | } |
237 | 0 | } |
238 | 0 | } |
239 | | |
240 | 0 | if (matchptr[len] < in_next[len]) { |
241 | 0 | *pending_lt_ptr = cur_node; |
242 | 0 | pending_lt_ptr = bt_right_child(mf, cur_node); |
243 | 0 | cur_node = *pending_lt_ptr; |
244 | 0 | best_lt_len = len; |
245 | 0 | if (best_gt_len < len) |
246 | 0 | len = best_gt_len; |
247 | 0 | } else { |
248 | 0 | *pending_gt_ptr = cur_node; |
249 | 0 | pending_gt_ptr = bt_left_child(mf, cur_node); |
250 | 0 | cur_node = *pending_gt_ptr; |
251 | 0 | best_gt_len = len; |
252 | 0 | if (best_lt_len < len) |
253 | 0 | len = best_lt_len; |
254 | 0 | } |
255 | |
|
256 | 0 | if (cur_node <= cutoff || !--depth_remaining) { |
257 | 0 | *pending_lt_ptr = MATCHFINDER_INITVAL; |
258 | 0 | *pending_gt_ptr = MATCHFINDER_INITVAL; |
259 | 0 | return lz_matchptr; |
260 | 0 | } |
261 | 0 | } |
262 | 0 | } |
263 | | |
264 | | /* |
265 | | * Retrieve a list of matches with the current position. |
266 | | * |
267 | | * @mf |
268 | | * The matchfinder structure. |
269 | | * @in_base |
270 | | * Pointer to the next byte in the input buffer to process _at the last |
271 | | * time bt_matchfinder_init() or bt_matchfinder_slide_window() was called_. |
272 | | * @cur_pos |
273 | | * The current position in the input buffer relative to @in_base (the |
274 | | * position of the sequence being matched against). |
275 | | * @max_len |
276 | | * The maximum permissible match length at this position. Must be >= |
277 | | * BT_MATCHFINDER_REQUIRED_NBYTES. |
278 | | * @nice_len |
279 | | * Stop searching if a match of at least this length is found. |
280 | | * Must be <= @max_len. |
281 | | * @max_search_depth |
282 | | * Limit on the number of potential matches to consider. Must be >= 1. |
283 | | * @next_hashes |
284 | | * The precomputed hash codes for the sequence beginning at @in_next. |
285 | | * These will be used and then updated with the precomputed hashcodes for |
286 | | * the sequence beginning at @in_next + 1. |
287 | | * @lz_matchptr |
288 | | * An array in which this function will record the matches. The recorded |
289 | | * matches will be sorted by strictly increasing length and (non-strictly) |
290 | | * increasing offset. The maximum number of matches that may be found is |
291 | | * 'nice_len - 2'. |
292 | | * |
293 | | * The return value is a pointer to the next available slot in the @lz_matchptr |
294 | | * array. (If no matches were found, this will be the same as @lz_matchptr.) |
295 | | */ |
296 | | static forceinline struct lz_match * |
297 | | bt_matchfinder_get_matches(struct bt_matchfinder *mf, |
298 | | const u8 *in_base, |
299 | | ptrdiff_t cur_pos, |
300 | | u32 max_len, |
301 | | u32 nice_len, |
302 | | u32 max_search_depth, |
303 | | u32 next_hashes[2], |
304 | | struct lz_match *lz_matchptr) |
305 | 0 | { |
306 | 0 | return bt_matchfinder_advance_one_byte(mf, |
307 | 0 | in_base, |
308 | 0 | cur_pos, |
309 | 0 | max_len, |
310 | 0 | nice_len, |
311 | 0 | max_search_depth, |
312 | 0 | next_hashes, |
313 | 0 | lz_matchptr, |
314 | 0 | true); |
315 | 0 | } |
316 | | |
317 | | /* |
318 | | * Advance the matchfinder, but don't record any matches. |
319 | | * |
320 | | * This is very similar to bt_matchfinder_get_matches() because both functions |
321 | | * must do hashing and tree re-rooting. |
322 | | */ |
323 | | static forceinline void |
324 | | bt_matchfinder_skip_byte(struct bt_matchfinder *mf, |
325 | | const u8 *in_base, |
326 | | ptrdiff_t cur_pos, |
327 | | u32 nice_len, |
328 | | u32 max_search_depth, |
329 | | u32 next_hashes[2]) |
330 | 0 | { |
331 | 0 | bt_matchfinder_advance_one_byte(mf, |
332 | 0 | in_base, |
333 | 0 | cur_pos, |
334 | 0 | nice_len, |
335 | 0 | nice_len, |
336 | 0 | max_search_depth, |
337 | 0 | next_hashes, |
338 | 0 | NULL, |
339 | 0 | false); |
340 | 0 | } |
341 | | |
342 | | #endif /* LIB_BT_MATCHFINDER_H */ |