/src/xz/src/liblzma/lz/lz_encoder_mf.c
Line | Count | Source |
1 | | // SPDX-License-Identifier: 0BSD |
2 | | |
3 | | /////////////////////////////////////////////////////////////////////////////// |
4 | | // |
5 | | /// \file lz_encoder_mf.c |
6 | | /// \brief Match finders |
7 | | /// |
8 | | // Authors: Igor Pavlov |
9 | | // Lasse Collin |
10 | | // |
11 | | /////////////////////////////////////////////////////////////////////////////// |
12 | | |
13 | | #include "lz_encoder.h" |
14 | | #include "lz_encoder_hash.h" |
15 | | #include "memcmplen.h" |
16 | | |
17 | | |
18 | | /// \brief Find matches starting from the current byte |
19 | | /// |
20 | | /// \return The length of the longest match found |
21 | | extern uint32_t |
22 | | lzma_mf_find(lzma_mf *mf, uint32_t *count_ptr, lzma_match *matches) |
23 | 6.67M | { |
24 | | // Call the match finder. It returns the number of length-distance |
25 | | // pairs found. |
26 | | // FIXME: Minimum count is zero, what _exactly_ is the maximum? |
27 | 6.67M | const uint32_t count = mf->find(mf, matches); |
28 | | |
29 | | // Length of the longest match; assume that no matches were found |
30 | | // and thus the maximum length is zero. |
31 | 6.67M | uint32_t len_best = 0; |
32 | | |
33 | 6.67M | if (count > 0) { |
34 | | #ifndef NDEBUG |
35 | | // Validate the matches. |
36 | | for (uint32_t i = 0; i < count; ++i) { |
37 | | assert(matches[i].len <= mf->nice_len); |
38 | | assert(matches[i].dist < mf->read_pos); |
39 | | assert(memcmp(mf_ptr(mf) - 1, |
40 | | mf_ptr(mf) - matches[i].dist - 2, |
41 | | matches[i].len) == 0); |
42 | | } |
43 | | #endif |
44 | | |
45 | | // The last used element in the array contains |
46 | | // the longest match. |
47 | 2.59M | len_best = matches[count - 1].len; |
48 | | |
49 | | // If a match of maximum search length was found, try to |
50 | | // extend the match to maximum possible length. |
51 | 2.59M | if (len_best == mf->nice_len) { |
52 | | // The limit for the match length is either the |
53 | | // maximum match length supported by the LZ-based |
54 | | // encoder or the number of bytes left in the |
55 | | // dictionary, whichever is smaller. |
56 | 51.5k | uint32_t limit = mf_avail(mf) + 1; |
57 | 51.5k | if (limit > mf->match_len_max) |
58 | 51.3k | limit = mf->match_len_max; |
59 | | |
60 | | // Pointer to the byte we just ran through |
61 | | // the match finder. |
62 | 51.5k | const uint8_t *p1 = mf_ptr(mf) - 1; |
63 | | |
64 | | // Pointer to the beginning of the match. We need -1 |
65 | | // here because the match distances are zero based. |
66 | 51.5k | const uint8_t *p2 = p1 - matches[count - 1].dist - 1; |
67 | | |
68 | 51.5k | len_best = lzma_memcmplen(p1, p2, len_best, limit); |
69 | 51.5k | } |
70 | 2.59M | } |
71 | | |
72 | 6.67M | *count_ptr = count; |
73 | | |
74 | | // Finally update the read position to indicate that match finder was |
75 | | // run for this dictionary offset. |
76 | 6.67M | ++mf->read_ahead; |
77 | | |
78 | 6.67M | return len_best; |
79 | 6.67M | } |
80 | | |
81 | | |
82 | | /// Hash value to indicate unused element in the hash. Since we start the |
83 | | /// positions from dict_size + 1, zero is always too far to qualify |
84 | | /// as usable match position. |
85 | 0 | #define EMPTY_HASH_VALUE 0 |
86 | | |
87 | | |
88 | | /// Normalization must be done when lzma_mf.offset + lzma_mf.read_pos |
89 | | /// reaches MUST_NORMALIZE_POS. |
90 | 0 | #define MUST_NORMALIZE_POS UINT32_MAX |
91 | | |
92 | | |
93 | | /// \brief Normalizes hash values |
94 | | /// |
95 | | /// The hash arrays store positions of match candidates. The positions are |
96 | | /// relative to an arbitrary offset that is not the same as the absolute |
97 | | /// offset in the input stream. The relative position of the current byte |
98 | | /// is lzma_mf.offset + lzma_mf.read_pos. The distances of the matches are |
99 | | /// the differences of the current read position and the position found from |
100 | | /// the hash. |
101 | | /// |
102 | | /// To prevent integer overflows of the offsets stored in the hash arrays, |
103 | | /// we need to "normalize" the stored values now and then. During the |
104 | | /// normalization, we drop values that indicate distance greater than the |
105 | | /// dictionary size, thus making space for new values. |
106 | | static void |
107 | | normalize(lzma_mf *mf) |
108 | 0 | { |
109 | 0 | assert(mf->read_pos + mf->offset == MUST_NORMALIZE_POS); |
110 | | |
111 | | // In future we may not want to touch the lowest bits, because there |
112 | | // may be match finders that use larger resolution than one byte. |
113 | 0 | const uint32_t subvalue |
114 | 0 | = (MUST_NORMALIZE_POS - mf->cyclic_size); |
115 | | // & ~((UINT32_C(1) << 10) - 1); |
116 | |
|
117 | 0 | for (uint32_t i = 0; i < mf->hash_count; ++i) { |
118 | | // If the distance is greater than the dictionary size, |
119 | | // we can simply mark the hash element as empty. |
120 | 0 | if (mf->hash[i] <= subvalue) |
121 | 0 | mf->hash[i] = EMPTY_HASH_VALUE; |
122 | 0 | else |
123 | 0 | mf->hash[i] -= subvalue; |
124 | 0 | } |
125 | |
|
126 | 0 | for (uint32_t i = 0; i < mf->sons_count; ++i) { |
127 | | // Do the same for mf->son. |
128 | | // |
129 | | // NOTE: There may be uninitialized elements in mf->son. |
130 | | // Valgrind may complain that the "if" below depends on |
131 | | // an uninitialized value. In this case it is safe to ignore |
132 | | // the warning. See also the comments in lz_encoder_init() |
133 | | // in lz_encoder.c. |
134 | 0 | if (mf->son[i] <= subvalue) |
135 | 0 | mf->son[i] = EMPTY_HASH_VALUE; |
136 | 0 | else |
137 | 0 | mf->son[i] -= subvalue; |
138 | 0 | } |
139 | | |
140 | | // Update offset to match the new locations. |
141 | 0 | mf->offset -= subvalue; |
142 | |
|
143 | 0 | return; |
144 | 0 | } |
145 | | |
146 | | |
147 | | /// Mark the current byte as processed from point of view of the match finder. |
148 | | static void |
149 | | move_pos(lzma_mf *mf) |
150 | 35.5M | { |
151 | 35.5M | if (++mf->cyclic_pos == mf->cyclic_size) |
152 | 0 | mf->cyclic_pos = 0; |
153 | | |
154 | 35.5M | ++mf->read_pos; |
155 | 35.5M | assert(mf->read_pos <= mf->write_pos); |
156 | | |
157 | 35.5M | if (unlikely(mf->read_pos + mf->offset == UINT32_MAX)) |
158 | 0 | normalize(mf); |
159 | 35.5M | } |
160 | | |
161 | | |
162 | | /// When flushing, we cannot run the match finder unless there is nice_len |
163 | | /// bytes available in the dictionary. Instead, we skip running the match |
164 | | /// finder (indicating that no match was found), and count how many bytes we |
165 | | /// have ignored this way. |
166 | | /// |
167 | | /// When new data is given after the flushing was completed, the match finder |
168 | | /// is restarted by rewinding mf->read_pos backwards by mf->pending. Then |
169 | | /// the missed bytes are added to the hash using the match finder's skip |
170 | | /// function (with small amount of input, it may start using mf->pending |
171 | | /// again if flushing). |
172 | | /// |
173 | | /// Due to this rewinding, we don't touch cyclic_pos or test for |
174 | | /// normalization. It will be done when the match finder's skip function |
175 | | /// catches up after a flush. |
176 | | static void |
177 | | move_pending(lzma_mf *mf) |
178 | 4.57k | { |
179 | 4.57k | ++mf->read_pos; |
180 | 4.57k | assert(mf->read_pos <= mf->write_pos); |
181 | 4.57k | ++mf->pending; |
182 | 4.57k | } |
183 | | |
184 | | |
185 | | /// Calculate len_limit and determine if there is enough input to run |
186 | | /// the actual match finder code. Sets up "cur" and "pos". This macro |
187 | | /// is used by all find functions and binary tree skip functions. Hash |
188 | | /// chain skip function doesn't need len_limit so a simpler code is used |
189 | | /// in them. |
190 | | #define header(is_bt, len_min, ret_op) \ |
191 | 6.67M | uint32_t len_limit = mf_avail(mf); \ |
192 | 6.67M | if (mf->nice_len <= len_limit) { \ |
193 | 6.62M | len_limit = mf->nice_len; \ |
194 | 6.62M | } else if (len_limit < (len_min) \ |
195 | 56.2k | || (is_bt && mf->action == LZMA_SYNC_FLUSH)) { \ |
196 | 1.41k | assert(mf->action != LZMA_RUN); \ |
197 | 1.41k | move_pending(mf); \ |
198 | 1.41k | ret_op; \ |
199 | 1.41k | } \ |
200 | 6.67M | const uint8_t *cur = mf_ptr(mf); \ |
201 | 6.67M | const uint32_t pos = mf->read_pos + mf->offset |
202 | | |
203 | | |
204 | | /// Header for find functions. "return 0" indicates that zero matches |
205 | | /// were found. |
206 | | #define header_find(is_bt, len_min) \ |
207 | 6.67M | header(is_bt, len_min, return 0); \ |
208 | 6.67M | uint32_t matches_count = 0 |
209 | | |
210 | | |
211 | | /// Header for a loop in a skip function. "continue" tells to skip the rest |
212 | | /// of the code in the loop. |
213 | | #define header_skip(is_bt, len_min) \ |
214 | 0 | header(is_bt, len_min, continue) |
215 | | |
216 | | |
217 | | /// Calls hc_find_func() or bt_find_func() and calculates the total number |
218 | | /// of matches found. Updates the dictionary position and returns the number |
219 | | /// of matches found. |
220 | 6.64M | #define call_find(func, len_best) \ |
221 | 6.64M | do { \ |
222 | 6.64M | matches_count = (uint32_t)(func(len_limit, pos, cur, cur_match, \ |
223 | 6.64M | mf->depth, mf->son, \ |
224 | 6.64M | mf->cyclic_pos, mf->cyclic_size, \ |
225 | 6.64M | matches + matches_count, len_best) \ |
226 | 6.64M | - matches); \ |
227 | 6.64M | move_pos(mf); \ |
228 | 6.64M | return matches_count; \ |
229 | 6.64M | } while (0) |
230 | | |
231 | | |
232 | | //////////////// |
233 | | // Hash Chain // |
234 | | //////////////// |
235 | | |
236 | | #if defined(HAVE_MF_HC3) || defined(HAVE_MF_HC4) |
237 | | /// |
238 | | /// |
239 | | /// \param len_limit Don't look for matches longer than len_limit. |
240 | | /// \param pos lzma_mf.read_pos + lzma_mf.offset |
241 | | /// \param cur Pointer to current byte (mf_ptr(mf)) |
242 | | /// \param cur_match Start position of the current match candidate |
243 | | /// \param depth Maximum length of the hash chain |
244 | | /// \param son lzma_mf.son (contains the hash chain) |
245 | | /// \param cyclic_pos lzma_mf.cyclic_pos |
246 | | /// \param cyclic_size lzma_mf_cyclic_size |
247 | | /// \param matches Array to hold the matches. |
248 | | /// \param len_best The length of the longest match found so far. |
249 | | static lzma_match * |
250 | | hc_find_func( |
251 | | const uint32_t len_limit, |
252 | | const uint32_t pos, |
253 | | const uint8_t *const cur, |
254 | | uint32_t cur_match, |
255 | | uint32_t depth, |
256 | | uint32_t *const son, |
257 | | const uint32_t cyclic_pos, |
258 | | const uint32_t cyclic_size, |
259 | | lzma_match *matches, |
260 | | uint32_t len_best) |
261 | 6.64M | { |
262 | 6.64M | son[cyclic_pos] = cur_match; |
263 | | |
264 | 20.1M | while (true) { |
265 | 20.1M | const uint32_t delta = pos - cur_match; |
266 | 20.1M | if (depth-- == 0 || delta >= cyclic_size) |
267 | 6.62M | return matches; |
268 | | |
269 | 13.4M | const uint8_t *const pb = cur - delta; |
270 | 13.4M | cur_match = son[cyclic_pos - delta |
271 | 13.4M | + (delta > cyclic_pos ? cyclic_size : 0)]; |
272 | | |
273 | 13.4M | if (pb[len_best] == cur[len_best] && pb[0] == cur[0]) { |
274 | 3.74M | uint32_t len = lzma_memcmplen(pb, cur, 1, len_limit); |
275 | | |
276 | 3.74M | if (len_best < len) { |
277 | 2.41M | len_best = len; |
278 | 2.41M | matches->len = len; |
279 | 2.41M | matches->dist = delta - 1; |
280 | 2.41M | ++matches; |
281 | | |
282 | 2.41M | if (len == len_limit) |
283 | 19.7k | return matches; |
284 | 2.41M | } |
285 | 3.74M | } |
286 | 13.4M | } |
287 | 6.64M | } |
288 | | |
289 | | |
290 | | #define hc_find(len_best) \ |
291 | 6.64M | call_find(hc_find_func, len_best) |
292 | | |
293 | | |
294 | 28.9M | #define hc_skip() \ |
295 | 28.9M | do { \ |
296 | 28.9M | mf->son[mf->cyclic_pos] = cur_match; \ |
297 | 28.9M | move_pos(mf); \ |
298 | 28.9M | } while (0) |
299 | | |
300 | | #endif |
301 | | |
302 | | |
303 | | #ifdef HAVE_MF_HC3 |
304 | | extern uint32_t |
305 | | lzma_mf_hc3_find(lzma_mf *mf, lzma_match *matches) |
306 | 0 | { |
307 | 0 | header_find(false, 3); |
308 | |
|
309 | 0 | hash_3_calc(); |
310 | |
|
311 | 0 | const uint32_t delta2 = pos - mf->hash[hash_2_value]; |
312 | 0 | const uint32_t cur_match = mf->hash[FIX_3_HASH_SIZE + hash_value]; |
313 | |
|
314 | 0 | mf->hash[hash_2_value] = pos; |
315 | 0 | mf->hash[FIX_3_HASH_SIZE + hash_value] = pos; |
316 | |
|
317 | 0 | uint32_t len_best = 2; |
318 | |
|
319 | 0 | if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) { |
320 | 0 | len_best = lzma_memcmplen(cur - delta2, cur, |
321 | 0 | len_best, len_limit); |
322 | |
|
323 | 0 | matches[0].len = len_best; |
324 | 0 | matches[0].dist = delta2 - 1; |
325 | 0 | matches_count = 1; |
326 | |
|
327 | 0 | if (len_best == len_limit) { |
328 | 0 | hc_skip(); |
329 | 0 | return 1; // matches_count |
330 | 0 | } |
331 | 0 | } |
332 | | |
333 | 0 | hc_find(len_best); |
334 | 0 | } |
335 | | |
336 | | |
337 | | extern void |
338 | | lzma_mf_hc3_skip(lzma_mf *mf, uint32_t amount) |
339 | 0 | { |
340 | 0 | do { |
341 | 0 | if (mf_avail(mf) < 3) { |
342 | 0 | move_pending(mf); |
343 | 0 | continue; |
344 | 0 | } |
345 | | |
346 | 0 | const uint8_t *cur = mf_ptr(mf); |
347 | 0 | const uint32_t pos = mf->read_pos + mf->offset; |
348 | |
|
349 | 0 | hash_3_calc(); |
350 | |
|
351 | 0 | const uint32_t cur_match |
352 | 0 | = mf->hash[FIX_3_HASH_SIZE + hash_value]; |
353 | |
|
354 | 0 | mf->hash[hash_2_value] = pos; |
355 | 0 | mf->hash[FIX_3_HASH_SIZE + hash_value] = pos; |
356 | |
|
357 | 0 | hc_skip(); |
358 | |
|
359 | 0 | } while (--amount != 0); |
360 | 0 | } |
361 | | #endif |
362 | | |
363 | | |
364 | | #ifdef HAVE_MF_HC4 |
365 | | extern uint32_t |
366 | | lzma_mf_hc4_find(lzma_mf *mf, lzma_match *matches) |
367 | 6.67M | { |
368 | 6.67M | header_find(false, 4); |
369 | | |
370 | 6.67M | hash_4_calc(); |
371 | | |
372 | 6.67M | uint32_t delta2 = pos - mf->hash[hash_2_value]; |
373 | 6.67M | const uint32_t delta3 |
374 | 6.67M | = pos - mf->hash[FIX_3_HASH_SIZE + hash_3_value]; |
375 | 6.67M | const uint32_t cur_match = mf->hash[FIX_4_HASH_SIZE + hash_value]; |
376 | | |
377 | 6.67M | mf->hash[hash_2_value ] = pos; |
378 | 6.67M | mf->hash[FIX_3_HASH_SIZE + hash_3_value] = pos; |
379 | 6.67M | mf->hash[FIX_4_HASH_SIZE + hash_value] = pos; |
380 | | |
381 | 6.67M | uint32_t len_best = 1; |
382 | | |
383 | 6.67M | if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) { |
384 | 2.41M | len_best = 2; |
385 | 2.41M | matches[0].len = 2; |
386 | 2.41M | matches[0].dist = delta2 - 1; |
387 | 2.41M | matches_count = 1; |
388 | 2.41M | } |
389 | | |
390 | 6.67M | if (delta2 != delta3 && delta3 < mf->cyclic_size |
391 | 2.23M | && *(cur - delta3) == *cur) { |
392 | 986k | len_best = 3; |
393 | 986k | matches[matches_count++].dist = delta3 - 1; |
394 | 986k | delta2 = delta3; |
395 | 986k | } |
396 | | |
397 | 6.67M | if (matches_count != 0) { |
398 | 2.59M | len_best = lzma_memcmplen(cur - delta2, cur, |
399 | 2.59M | len_best, len_limit); |
400 | | |
401 | 2.59M | matches[matches_count - 1].len = len_best; |
402 | | |
403 | 2.59M | if (len_best == len_limit) { |
404 | 33.0k | hc_skip(); |
405 | 33.0k | return matches_count; |
406 | 33.0k | } |
407 | 2.59M | } |
408 | | |
409 | 6.64M | if (len_best < 3) |
410 | 4.46M | len_best = 3; |
411 | | |
412 | 6.64M | hc_find(len_best); |
413 | 6.64M | } |
414 | | |
415 | | |
416 | | extern void |
417 | | lzma_mf_hc4_skip(lzma_mf *mf, uint32_t amount) |
418 | 1.18M | { |
419 | 28.9M | do { |
420 | 28.9M | if (mf_avail(mf) < 4) { |
421 | 3.15k | move_pending(mf); |
422 | 3.15k | continue; |
423 | 3.15k | } |
424 | | |
425 | 28.9M | const uint8_t *cur = mf_ptr(mf); |
426 | 28.9M | const uint32_t pos = mf->read_pos + mf->offset; |
427 | | |
428 | 28.9M | hash_4_calc(); |
429 | | |
430 | 28.9M | const uint32_t cur_match |
431 | 28.9M | = mf->hash[FIX_4_HASH_SIZE + hash_value]; |
432 | | |
433 | 28.9M | mf->hash[hash_2_value] = pos; |
434 | 28.9M | mf->hash[FIX_3_HASH_SIZE + hash_3_value] = pos; |
435 | 28.9M | mf->hash[FIX_4_HASH_SIZE + hash_value] = pos; |
436 | | |
437 | 28.9M | hc_skip(); |
438 | | |
439 | 28.9M | } while (--amount != 0); |
440 | 1.18M | } |
441 | | #endif |
442 | | |
443 | | |
444 | | ///////////////// |
445 | | // Binary Tree // |
446 | | ///////////////// |
447 | | |
448 | | #if defined(HAVE_MF_BT2) || defined(HAVE_MF_BT3) || defined(HAVE_MF_BT4) |
449 | | static lzma_match * |
450 | | bt_find_func( |
451 | | const uint32_t len_limit, |
452 | | const uint32_t pos, |
453 | | const uint8_t *const cur, |
454 | | uint32_t cur_match, |
455 | | uint32_t depth, |
456 | | uint32_t *const son, |
457 | | const uint32_t cyclic_pos, |
458 | | const uint32_t cyclic_size, |
459 | | lzma_match *matches, |
460 | | uint32_t len_best) |
461 | 0 | { |
462 | 0 | uint32_t *ptr0 = son + (cyclic_pos << 1) + 1; |
463 | 0 | uint32_t *ptr1 = son + (cyclic_pos << 1); |
464 | |
|
465 | 0 | uint32_t len0 = 0; |
466 | 0 | uint32_t len1 = 0; |
467 | |
|
468 | 0 | while (true) { |
469 | 0 | const uint32_t delta = pos - cur_match; |
470 | 0 | if (depth-- == 0 || delta >= cyclic_size) { |
471 | 0 | *ptr0 = EMPTY_HASH_VALUE; |
472 | 0 | *ptr1 = EMPTY_HASH_VALUE; |
473 | 0 | return matches; |
474 | 0 | } |
475 | | |
476 | 0 | uint32_t *const pair = son + ((cyclic_pos - delta |
477 | 0 | + (delta > cyclic_pos ? cyclic_size : 0)) |
478 | 0 | << 1); |
479 | |
|
480 | 0 | const uint8_t *const pb = cur - delta; |
481 | 0 | uint32_t len = my_min(len0, len1); |
482 | |
|
483 | 0 | if (pb[len] == cur[len]) { |
484 | 0 | len = lzma_memcmplen(pb, cur, len + 1, len_limit); |
485 | |
|
486 | 0 | if (len_best < len) { |
487 | 0 | len_best = len; |
488 | 0 | matches->len = len; |
489 | 0 | matches->dist = delta - 1; |
490 | 0 | ++matches; |
491 | |
|
492 | 0 | if (len == len_limit) { |
493 | 0 | *ptr1 = pair[0]; |
494 | 0 | *ptr0 = pair[1]; |
495 | 0 | return matches; |
496 | 0 | } |
497 | 0 | } |
498 | 0 | } |
499 | | |
500 | 0 | if (pb[len] < cur[len]) { |
501 | 0 | *ptr1 = cur_match; |
502 | 0 | ptr1 = pair + 1; |
503 | 0 | cur_match = *ptr1; |
504 | 0 | len1 = len; |
505 | 0 | } else { |
506 | 0 | *ptr0 = cur_match; |
507 | 0 | ptr0 = pair; |
508 | 0 | cur_match = *ptr0; |
509 | 0 | len0 = len; |
510 | 0 | } |
511 | 0 | } |
512 | 0 | } |
513 | | |
514 | | |
515 | | static void |
516 | | bt_skip_func( |
517 | | const uint32_t len_limit, |
518 | | const uint32_t pos, |
519 | | const uint8_t *const cur, |
520 | | uint32_t cur_match, |
521 | | uint32_t depth, |
522 | | uint32_t *const son, |
523 | | const uint32_t cyclic_pos, |
524 | | const uint32_t cyclic_size) |
525 | 0 | { |
526 | 0 | uint32_t *ptr0 = son + (cyclic_pos << 1) + 1; |
527 | 0 | uint32_t *ptr1 = son + (cyclic_pos << 1); |
528 | |
|
529 | 0 | uint32_t len0 = 0; |
530 | 0 | uint32_t len1 = 0; |
531 | |
|
532 | 0 | while (true) { |
533 | 0 | const uint32_t delta = pos - cur_match; |
534 | 0 | if (depth-- == 0 || delta >= cyclic_size) { |
535 | 0 | *ptr0 = EMPTY_HASH_VALUE; |
536 | 0 | *ptr1 = EMPTY_HASH_VALUE; |
537 | 0 | return; |
538 | 0 | } |
539 | | |
540 | 0 | uint32_t *pair = son + ((cyclic_pos - delta |
541 | 0 | + (delta > cyclic_pos ? cyclic_size : 0)) |
542 | 0 | << 1); |
543 | 0 | const uint8_t *pb = cur - delta; |
544 | 0 | uint32_t len = my_min(len0, len1); |
545 | |
|
546 | 0 | if (pb[len] == cur[len]) { |
547 | 0 | len = lzma_memcmplen(pb, cur, len + 1, len_limit); |
548 | |
|
549 | 0 | if (len == len_limit) { |
550 | 0 | *ptr1 = pair[0]; |
551 | 0 | *ptr0 = pair[1]; |
552 | 0 | return; |
553 | 0 | } |
554 | 0 | } |
555 | | |
556 | 0 | if (pb[len] < cur[len]) { |
557 | 0 | *ptr1 = cur_match; |
558 | 0 | ptr1 = pair + 1; |
559 | 0 | cur_match = *ptr1; |
560 | 0 | len1 = len; |
561 | 0 | } else { |
562 | 0 | *ptr0 = cur_match; |
563 | 0 | ptr0 = pair; |
564 | 0 | cur_match = *ptr0; |
565 | 0 | len0 = len; |
566 | 0 | } |
567 | 0 | } |
568 | 0 | } |
569 | | |
570 | | |
571 | | #define bt_find(len_best) \ |
572 | 0 | call_find(bt_find_func, len_best) |
573 | | |
574 | 0 | #define bt_skip() \ |
575 | 0 | do { \ |
576 | 0 | bt_skip_func(len_limit, pos, cur, cur_match, mf->depth, \ |
577 | 0 | mf->son, mf->cyclic_pos, \ |
578 | 0 | mf->cyclic_size); \ |
579 | 0 | move_pos(mf); \ |
580 | 0 | } while (0) |
581 | | |
582 | | #endif |
583 | | |
584 | | |
585 | | #ifdef HAVE_MF_BT2 |
586 | | extern uint32_t |
587 | | lzma_mf_bt2_find(lzma_mf *mf, lzma_match *matches) |
588 | 0 | { |
589 | 0 | header_find(true, 2); |
590 | |
|
591 | 0 | hash_2_calc(); |
592 | |
|
593 | 0 | const uint32_t cur_match = mf->hash[hash_value]; |
594 | 0 | mf->hash[hash_value] = pos; |
595 | |
|
596 | 0 | bt_find(1); |
597 | 0 | } |
598 | | |
599 | | |
600 | | extern void |
601 | | lzma_mf_bt2_skip(lzma_mf *mf, uint32_t amount) |
602 | 0 | { |
603 | 0 | do { |
604 | 0 | header_skip(true, 2); |
605 | |
|
606 | 0 | hash_2_calc(); |
607 | |
|
608 | 0 | const uint32_t cur_match = mf->hash[hash_value]; |
609 | 0 | mf->hash[hash_value] = pos; |
610 | |
|
611 | 0 | bt_skip(); |
612 | |
|
613 | 0 | } while (--amount != 0); |
614 | 0 | } |
615 | | #endif |
616 | | |
617 | | |
618 | | #ifdef HAVE_MF_BT3 |
619 | | extern uint32_t |
620 | | lzma_mf_bt3_find(lzma_mf *mf, lzma_match *matches) |
621 | 0 | { |
622 | 0 | header_find(true, 3); |
623 | |
|
624 | 0 | hash_3_calc(); |
625 | |
|
626 | 0 | const uint32_t delta2 = pos - mf->hash[hash_2_value]; |
627 | 0 | const uint32_t cur_match = mf->hash[FIX_3_HASH_SIZE + hash_value]; |
628 | |
|
629 | 0 | mf->hash[hash_2_value] = pos; |
630 | 0 | mf->hash[FIX_3_HASH_SIZE + hash_value] = pos; |
631 | |
|
632 | 0 | uint32_t len_best = 2; |
633 | |
|
634 | 0 | if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) { |
635 | 0 | len_best = lzma_memcmplen( |
636 | 0 | cur, cur - delta2, len_best, len_limit); |
637 | |
|
638 | 0 | matches[0].len = len_best; |
639 | 0 | matches[0].dist = delta2 - 1; |
640 | 0 | matches_count = 1; |
641 | |
|
642 | 0 | if (len_best == len_limit) { |
643 | 0 | bt_skip(); |
644 | 0 | return 1; // matches_count |
645 | 0 | } |
646 | 0 | } |
647 | | |
648 | 0 | bt_find(len_best); |
649 | 0 | } |
650 | | |
651 | | |
652 | | extern void |
653 | | lzma_mf_bt3_skip(lzma_mf *mf, uint32_t amount) |
654 | 0 | { |
655 | 0 | do { |
656 | 0 | header_skip(true, 3); |
657 | |
|
658 | 0 | hash_3_calc(); |
659 | |
|
660 | 0 | const uint32_t cur_match |
661 | 0 | = mf->hash[FIX_3_HASH_SIZE + hash_value]; |
662 | |
|
663 | 0 | mf->hash[hash_2_value] = pos; |
664 | 0 | mf->hash[FIX_3_HASH_SIZE + hash_value] = pos; |
665 | |
|
666 | 0 | bt_skip(); |
667 | |
|
668 | 0 | } while (--amount != 0); |
669 | 0 | } |
670 | | #endif |
671 | | |
672 | | |
673 | | #ifdef HAVE_MF_BT4 |
674 | | extern uint32_t |
675 | | lzma_mf_bt4_find(lzma_mf *mf, lzma_match *matches) |
676 | 0 | { |
677 | 0 | header_find(true, 4); |
678 | |
|
679 | 0 | hash_4_calc(); |
680 | |
|
681 | 0 | uint32_t delta2 = pos - mf->hash[hash_2_value]; |
682 | 0 | const uint32_t delta3 |
683 | 0 | = pos - mf->hash[FIX_3_HASH_SIZE + hash_3_value]; |
684 | 0 | const uint32_t cur_match = mf->hash[FIX_4_HASH_SIZE + hash_value]; |
685 | |
|
686 | 0 | mf->hash[hash_2_value] = pos; |
687 | 0 | mf->hash[FIX_3_HASH_SIZE + hash_3_value] = pos; |
688 | 0 | mf->hash[FIX_4_HASH_SIZE + hash_value] = pos; |
689 | |
|
690 | 0 | uint32_t len_best = 1; |
691 | |
|
692 | 0 | if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) { |
693 | 0 | len_best = 2; |
694 | 0 | matches[0].len = 2; |
695 | 0 | matches[0].dist = delta2 - 1; |
696 | 0 | matches_count = 1; |
697 | 0 | } |
698 | |
|
699 | 0 | if (delta2 != delta3 && delta3 < mf->cyclic_size |
700 | 0 | && *(cur - delta3) == *cur) { |
701 | 0 | len_best = 3; |
702 | 0 | matches[matches_count++].dist = delta3 - 1; |
703 | 0 | delta2 = delta3; |
704 | 0 | } |
705 | |
|
706 | 0 | if (matches_count != 0) { |
707 | 0 | len_best = lzma_memcmplen( |
708 | 0 | cur, cur - delta2, len_best, len_limit); |
709 | |
|
710 | 0 | matches[matches_count - 1].len = len_best; |
711 | |
|
712 | 0 | if (len_best == len_limit) { |
713 | 0 | bt_skip(); |
714 | 0 | return matches_count; |
715 | 0 | } |
716 | 0 | } |
717 | | |
718 | 0 | if (len_best < 3) |
719 | 0 | len_best = 3; |
720 | |
|
721 | 0 | bt_find(len_best); |
722 | 0 | } |
723 | | |
724 | | |
725 | | extern void |
726 | | lzma_mf_bt4_skip(lzma_mf *mf, uint32_t amount) |
727 | 0 | { |
728 | 0 | do { |
729 | 0 | header_skip(true, 4); |
730 | |
|
731 | 0 | hash_4_calc(); |
732 | |
|
733 | 0 | const uint32_t cur_match |
734 | 0 | = mf->hash[FIX_4_HASH_SIZE + hash_value]; |
735 | |
|
736 | 0 | mf->hash[hash_2_value] = pos; |
737 | 0 | mf->hash[FIX_3_HASH_SIZE + hash_3_value] = pos; |
738 | 0 | mf->hash[FIX_4_HASH_SIZE + hash_value] = pos; |
739 | |
|
740 | 0 | bt_skip(); |
741 | |
|
742 | 0 | } while (--amount != 0); |
743 | 0 | } |
744 | | #endif |