/src/libdeflate/lib/ht_matchfinder.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * ht_matchfinder.h - Lempel-Ziv matchfinding with a hash table |
3 | | * |
4 | | * Copyright 2022 Eric Biggers |
5 | | * |
6 | | * Permission is hereby granted, free of charge, to any person |
7 | | * obtaining a copy of this software and associated documentation |
8 | | * files (the "Software"), to deal in the Software without |
9 | | * restriction, including without limitation the rights to use, |
10 | | * copy, modify, merge, publish, distribute, sublicense, and/or sell |
11 | | * copies of the Software, and to permit persons to whom the |
12 | | * Software is furnished to do so, subject to the following |
13 | | * conditions: |
14 | | * |
15 | | * The above copyright notice and this permission notice shall be |
16 | | * included in all copies or substantial portions of the Software. |
17 | | * |
18 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
19 | | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES |
20 | | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
21 | | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT |
22 | | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
23 | | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
24 | | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
25 | | * OTHER DEALINGS IN THE SOFTWARE. |
26 | | * |
27 | | * --------------------------------------------------------------------------- |
28 | | * |
29 | | * This is a Hash Table (ht) matchfinder. |
30 | | * |
31 | | * This is a variant of the Hash Chains (hc) matchfinder that is optimized for |
32 | | * very fast compression. The ht_matchfinder stores the hash chains inline in |
33 | | * the hash table, whereas the hc_matchfinder stores them in a separate array. |
34 | | * Storing the hash chains inline is the faster method when max_search_depth |
35 | | * (the maximum chain length) is very small. It is not appropriate when |
36 | | * max_search_depth is larger, as then it uses too much memory. |
37 | | * |
38 | | * Due to its focus on speed, the ht_matchfinder doesn't support length 3 |
39 | | * matches. It also doesn't allow max_search_depth to vary at runtime; it is |
40 | | * fixed at build time as HT_MATCHFINDER_BUCKET_SIZE. |
41 | | * |
42 | | * See hc_matchfinder.h for more information. |
43 | | */ |
44 | | |
45 | | #ifndef LIB_HT_MATCHFINDER_H |
46 | | #define LIB_HT_MATCHFINDER_H |
47 | | |
48 | | #include "matchfinder_common.h" |
49 | | |
50 | 0 | #define HT_MATCHFINDER_HASH_ORDER 15 |
51 | 0 | #define HT_MATCHFINDER_BUCKET_SIZE 2 |
52 | | |
53 | | #define HT_MATCHFINDER_MIN_MATCH_LEN 4 |
54 | | /* Minimum value of max_len for ht_matchfinder_longest_match() */ |
55 | 0 | #define HT_MATCHFINDER_REQUIRED_NBYTES 5 |
56 | | |
57 | | struct MATCHFINDER_ALIGNED ht_matchfinder { |
58 | | mf_pos_t hash_tab[1UL << HT_MATCHFINDER_HASH_ORDER] |
59 | | [HT_MATCHFINDER_BUCKET_SIZE]; |
60 | | }; |
61 | | |
62 | | static forceinline void |
63 | | ht_matchfinder_init(struct ht_matchfinder *mf) |
64 | 0 | { |
65 | 0 | STATIC_ASSERT(sizeof(*mf) % MATCHFINDER_SIZE_ALIGNMENT == 0); |
66 | |
|
67 | 0 | matchfinder_init((mf_pos_t *)mf, sizeof(*mf)); |
68 | 0 | } |
69 | | |
70 | | static forceinline void |
71 | | ht_matchfinder_slide_window(struct ht_matchfinder *mf) |
72 | 0 | { |
73 | 0 | matchfinder_rebase((mf_pos_t *)mf, sizeof(*mf)); |
74 | 0 | } |
75 | | |
76 | | /* Note: max_len must be >= HT_MATCHFINDER_REQUIRED_NBYTES */ |
77 | | static forceinline u32 |
78 | | ht_matchfinder_longest_match(struct ht_matchfinder * const mf, |
79 | | const u8 ** const in_base_p, |
80 | | const u8 * const in_next, |
81 | | const u32 max_len, |
82 | | const u32 nice_len, |
83 | | u32 * const next_hash, |
84 | | u32 * const offset_ret) |
85 | 0 | { |
86 | 0 | u32 best_len = 0; |
87 | 0 | const u8 *best_matchptr = in_next; |
88 | 0 | u32 cur_pos = in_next - *in_base_p; |
89 | 0 | const u8 *in_base; |
90 | 0 | mf_pos_t cutoff; |
91 | 0 | u32 hash; |
92 | 0 | u32 seq; |
93 | 0 | mf_pos_t cur_node; |
94 | 0 | const u8 *matchptr; |
95 | 0 | #if HT_MATCHFINDER_BUCKET_SIZE > 1 |
96 | 0 | mf_pos_t to_insert; |
97 | 0 | u32 len; |
98 | 0 | #endif |
99 | | #if HT_MATCHFINDER_BUCKET_SIZE > 2 |
100 | | int i; |
101 | | #endif |
102 | | |
103 | | /* This is assumed throughout this function. */ |
104 | 0 | STATIC_ASSERT(HT_MATCHFINDER_MIN_MATCH_LEN == 4); |
105 | |
|
106 | 0 | if (cur_pos == MATCHFINDER_WINDOW_SIZE) { |
107 | 0 | ht_matchfinder_slide_window(mf); |
108 | 0 | *in_base_p += MATCHFINDER_WINDOW_SIZE; |
109 | 0 | cur_pos = 0; |
110 | 0 | } |
111 | 0 | in_base = *in_base_p; |
112 | 0 | cutoff = cur_pos - MATCHFINDER_WINDOW_SIZE; |
113 | |
|
114 | 0 | hash = *next_hash; |
115 | 0 | STATIC_ASSERT(HT_MATCHFINDER_REQUIRED_NBYTES == 5); |
116 | 0 | *next_hash = lz_hash(get_unaligned_le32(in_next + 1), |
117 | 0 | HT_MATCHFINDER_HASH_ORDER); |
118 | 0 | seq = load_u32_unaligned(in_next); |
119 | 0 | prefetchw(&mf->hash_tab[*next_hash]); |
120 | | #if HT_MATCHFINDER_BUCKET_SIZE == 1 |
121 | | /* Hand-unrolled version for BUCKET_SIZE == 1 */ |
122 | | cur_node = mf->hash_tab[hash][0]; |
123 | | mf->hash_tab[hash][0] = cur_pos; |
124 | | if (cur_node <= cutoff) |
125 | | goto out; |
126 | | matchptr = &in_base[cur_node]; |
127 | | if (load_u32_unaligned(matchptr) == seq) { |
128 | | best_len = lz_extend(in_next, matchptr, 4, max_len); |
129 | | best_matchptr = matchptr; |
130 | | } |
131 | | #elif HT_MATCHFINDER_BUCKET_SIZE == 2 |
132 | | /* |
133 | | * Hand-unrolled version for BUCKET_SIZE == 2. The logic here also |
134 | | * differs slightly in that it copies the first entry to the second even |
135 | | * if nice_len is reached on the first, as this can be slightly faster. |
136 | | */ |
137 | 0 | cur_node = mf->hash_tab[hash][0]; |
138 | 0 | mf->hash_tab[hash][0] = cur_pos; |
139 | 0 | if (cur_node <= cutoff) |
140 | 0 | goto out; |
141 | 0 | matchptr = &in_base[cur_node]; |
142 | |
|
143 | 0 | to_insert = cur_node; |
144 | 0 | cur_node = mf->hash_tab[hash][1]; |
145 | 0 | mf->hash_tab[hash][1] = to_insert; |
146 | |
|
147 | 0 | if (load_u32_unaligned(matchptr) == seq) { |
148 | 0 | best_len = lz_extend(in_next, matchptr, 4, max_len); |
149 | 0 | best_matchptr = matchptr; |
150 | 0 | if (cur_node <= cutoff || best_len >= nice_len) |
151 | 0 | goto out; |
152 | 0 | matchptr = &in_base[cur_node]; |
153 | 0 | if (load_u32_unaligned(matchptr) == seq && |
154 | 0 | load_u32_unaligned(matchptr + best_len - 3) == |
155 | 0 | load_u32_unaligned(in_next + best_len - 3)) { |
156 | 0 | len = lz_extend(in_next, matchptr, 4, max_len); |
157 | 0 | if (len > best_len) { |
158 | 0 | best_len = len; |
159 | 0 | best_matchptr = matchptr; |
160 | 0 | } |
161 | 0 | } |
162 | 0 | } else { |
163 | 0 | if (cur_node <= cutoff) |
164 | 0 | goto out; |
165 | 0 | matchptr = &in_base[cur_node]; |
166 | 0 | if (load_u32_unaligned(matchptr) == seq) { |
167 | 0 | best_len = lz_extend(in_next, matchptr, 4, max_len); |
168 | 0 | best_matchptr = matchptr; |
169 | 0 | } |
170 | 0 | } |
171 | | #else |
172 | | /* Generic version for HT_MATCHFINDER_BUCKET_SIZE > 2 */ |
173 | | to_insert = cur_pos; |
174 | | for (i = 0; i < HT_MATCHFINDER_BUCKET_SIZE; i++) { |
175 | | cur_node = mf->hash_tab[hash][i]; |
176 | | mf->hash_tab[hash][i] = to_insert; |
177 | | if (cur_node <= cutoff) |
178 | | goto out; |
179 | | matchptr = &in_base[cur_node]; |
180 | | if (load_u32_unaligned(matchptr) == seq) { |
181 | | len = lz_extend(in_next, matchptr, 4, max_len); |
182 | | if (len > best_len) { |
183 | | best_len = len; |
184 | | best_matchptr = matchptr; |
185 | | if (best_len >= nice_len) |
186 | | goto out; |
187 | | } |
188 | | } |
189 | | to_insert = cur_node; |
190 | | } |
191 | | #endif |
192 | 0 | out: |
193 | 0 | *offset_ret = in_next - best_matchptr; |
194 | 0 | return best_len; |
195 | 0 | } |
196 | | |
197 | | static forceinline void |
198 | | ht_matchfinder_skip_bytes(struct ht_matchfinder * const mf, |
199 | | const u8 ** const in_base_p, |
200 | | const u8 *in_next, |
201 | | const u8 * const in_end, |
202 | | const u32 count, |
203 | | u32 * const next_hash) |
204 | 0 | { |
205 | 0 | s32 cur_pos = in_next - *in_base_p; |
206 | 0 | u32 hash; |
207 | 0 | u32 remaining = count; |
208 | 0 | int i; |
209 | |
|
210 | 0 | if (unlikely(count + HT_MATCHFINDER_REQUIRED_NBYTES > in_end - in_next)) |
211 | 0 | return; |
212 | | |
213 | 0 | if (cur_pos + count - 1 >= MATCHFINDER_WINDOW_SIZE) { |
214 | 0 | ht_matchfinder_slide_window(mf); |
215 | 0 | *in_base_p += MATCHFINDER_WINDOW_SIZE; |
216 | 0 | cur_pos -= MATCHFINDER_WINDOW_SIZE; |
217 | 0 | } |
218 | |
|
219 | 0 | hash = *next_hash; |
220 | 0 | do { |
221 | 0 | for (i = HT_MATCHFINDER_BUCKET_SIZE - 1; i > 0; i--) |
222 | 0 | mf->hash_tab[hash][i] = mf->hash_tab[hash][i - 1]; |
223 | 0 | mf->hash_tab[hash][0] = cur_pos; |
224 | |
|
225 | 0 | hash = lz_hash(get_unaligned_le32(++in_next), |
226 | 0 | HT_MATCHFINDER_HASH_ORDER); |
227 | 0 | cur_pos++; |
228 | 0 | } while (--remaining); |
229 | |
|
230 | 0 | prefetchw(&mf->hash_tab[hash]); |
231 | 0 | *next_hash = hash; |
232 | 0 | } |
233 | | |
234 | | #endif /* LIB_HT_MATCHFINDER_H */ |