Coverage Report

Created: 2024-07-27 06:04

/work/_deps/deflate-src/lib/hc_matchfinder.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * hc_matchfinder.h - Lempel-Ziv matchfinding with a hash table of linked lists
3
 *
4
 * Copyright 2016 Eric Biggers
5
 *
6
 * Permission is hereby granted, free of charge, to any person
7
 * obtaining a copy of this software and associated documentation
8
 * files (the "Software"), to deal in the Software without
9
 * restriction, including without limitation the rights to use,
10
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
11
 * copies of the Software, and to permit persons to whom the
12
 * Software is furnished to do so, subject to the following
13
 * conditions:
14
 *
15
 * The above copyright notice and this permission notice shall be
16
 * included in all copies or substantial portions of the Software.
17
 *
18
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25
 * OTHER DEALINGS IN THE SOFTWARE.
26
 *
27
 * ---------------------------------------------------------------------------
28
 *
29
 *           Algorithm
30
 *
31
 * This is a Hash Chains (hc) based matchfinder.
32
 *
33
 * The main data structure is a hash table where each hash bucket contains a
34
 * linked list (or "chain") of sequences whose first 4 bytes share the same hash
35
 * code.  Each sequence is identified by its starting position in the input
36
 * buffer.
37
 *
38
 * The algorithm processes the input buffer sequentially.  At each byte
39
 * position, the hash code of the first 4 bytes of the sequence beginning at
40
 * that position (the sequence being matched against) is computed.  This
41
 * identifies the hash bucket to use for that position.  Then, this hash
42
 * bucket's linked list is searched for matches.  Then, a new linked list node
43
 * is created to represent the current sequence and is prepended to the list.
44
 *
45
 * This algorithm has several useful properties:
46
 *
47
 * - It only finds true Lempel-Ziv matches; i.e., those where the matching
48
 *   sequence occurs prior to the sequence being matched against.
49
 *
50
 * - The sequences in each linked list are always sorted by decreasing starting
51
 *   position.  Therefore, the closest (smallest offset) matches are found
52
 *   first, which in many compression formats tend to be the cheapest to encode.
53
 *
54
 * - Although fast running time is not guaranteed due to the possibility of the
55
 *   lists getting very long, the worst degenerate behavior can be easily
56
 *   prevented by capping the number of nodes searched at each position.
57
 *
58
 * - If the compressor decides not to search for matches at a certain position,
59
 *   then that position can be quickly inserted without searching the list.
60
 *
61
 * - The algorithm is adaptable to sliding windows: just store the positions
62
 *   relative to a "base" value that is updated from time to time, and stop
63
 *   searching each list when the sequences get too far away.
64
 *
65
 * ----------------------------------------------------------------------------
66
 *
67
 *         Optimizations
68
 *
69
 * The main hash table and chains handle length 4+ matches.  Length 3 matches
70
 * are handled by a separate hash table with no chains.  This works well for
71
 * typical "greedy" or "lazy"-style compressors, where length 3 matches are
72
 * often only helpful if they have small offsets.  Instead of searching a full
73
 * chain for length 3+ matches, the algorithm just checks for one close length 3
74
 * match, then focuses on finding length 4+ matches.
75
 *
76
 * The longest_match() and skip_bytes() functions are inlined into the
77
 * compressors that use them.  This isn't just about saving the overhead of a
78
 * function call.  These functions are intended to be called from the inner
79
 * loops of compressors, where giving the compiler more control over register
80
 * allocation is very helpful.  There is also significant benefit to be gained
81
 * from allowing the CPU to predict branches independently at each call site.
82
 * For example, "lazy"-style compressors can be written with two calls to
83
 * longest_match(), each of which starts with a different 'best_len' and
84
 * therefore has significantly different performance characteristics.
85
 *
86
 * Although any hash function can be used, a multiplicative hash is fast and
87
 * works well.
88
 *
89
 * On some processors, it is significantly faster to extend matches by whole
90
 * words (32 or 64 bits) instead of by individual bytes.  For this to be the
91
 * case, the processor must implement unaligned memory accesses efficiently and
92
 * must have either a fast "find first set bit" instruction or a fast "find last
93
 * set bit" instruction, depending on the processor's endianness.
94
 *
95
 * The code uses one loop for finding the first match and one loop for finding a
96
 * longer match.  Each of these loops is tuned for its respective task and in
97
 * combination are faster than a single generalized loop that handles both
98
 * tasks.
99
 *
100
 * The code also uses a tight inner loop that only compares the last and first
101
 * bytes of a potential match.  It is only when these bytes match that a full
102
 * match extension is attempted.
103
 *
104
 * ----------------------------------------------------------------------------
105
 */
106
107
#ifndef LIB_HC_MATCHFINDER_H
108
#define LIB_HC_MATCHFINDER_H
109
110
#include "matchfinder_common.h"
111
112
0
#define HC_MATCHFINDER_HASH3_ORDER  15
113
0
#define HC_MATCHFINDER_HASH4_ORDER  16
114
115
#define HC_MATCHFINDER_TOTAL_HASH_SIZE      \
116
0
  (((1UL << HC_MATCHFINDER_HASH3_ORDER) +   \
117
0
    (1UL << HC_MATCHFINDER_HASH4_ORDER)) * sizeof(mf_pos_t))
118
119
struct MATCHFINDER_ALIGNED hc_matchfinder  {
120
121
  /* The hash table for finding length 3 matches  */
122
  mf_pos_t hash3_tab[1UL << HC_MATCHFINDER_HASH3_ORDER];
123
124
  /* The hash table which contains the first nodes of the linked lists for
125
   * finding length 4+ matches  */
126
  mf_pos_t hash4_tab[1UL << HC_MATCHFINDER_HASH4_ORDER];
127
128
  /* The "next node" references for the linked lists.  The "next node" of
129
   * the node for the sequence with position 'pos' is 'next_tab[pos]'.  */
130
  mf_pos_t next_tab[MATCHFINDER_WINDOW_SIZE];
131
};
132
133
/* Prepare the matchfinder for a new input buffer.  */
134
static forceinline void
135
hc_matchfinder_init(struct hc_matchfinder *mf)
136
0
{
137
0
  STATIC_ASSERT(HC_MATCHFINDER_TOTAL_HASH_SIZE %
138
0
          MATCHFINDER_SIZE_ALIGNMENT == 0);
139
140
0
  matchfinder_init((mf_pos_t *)mf, HC_MATCHFINDER_TOTAL_HASH_SIZE);
141
0
}
142
143
static forceinline void
144
hc_matchfinder_slide_window(struct hc_matchfinder *mf)
145
0
{
146
0
  STATIC_ASSERT(sizeof(*mf) % MATCHFINDER_SIZE_ALIGNMENT == 0);
147
148
0
  matchfinder_rebase((mf_pos_t *)mf, sizeof(*mf));
149
0
}
150
151
/*
152
 * Find the longest match longer than 'best_len' bytes.
153
 *
154
 * @mf
155
 *  The matchfinder structure.
156
 * @in_base_p
157
 *  Location of a pointer which points to the place in the input data the
158
 *  matchfinder currently stores positions relative to.  This may be updated
159
 *  by this function.
160
 * @in_next
161
 *  Pointer to the next position in the input buffer, i.e. the sequence
162
 *  being matched against.
163
 * @best_len
164
 *  Require a match longer than this length.
165
 * @max_len
166
 *  The maximum permissible match length at this position.
167
 * @nice_len
168
 *  Stop searching if a match of at least this length is found.
169
 *  Must be <= @max_len.
170
 * @max_search_depth
171
 *  Limit on the number of potential matches to consider.  Must be >= 1.
172
 * @next_hashes
173
 *  The precomputed hash codes for the sequence beginning at @in_next.
174
 *  These will be used and then updated with the precomputed hashcodes for
175
 *  the sequence beginning at @in_next + 1.
176
 * @offset_ret
177
 *  If a match is found, its offset is returned in this location.
178
 *
179
 * Return the length of the match found, or 'best_len' if no match longer than
180
 * 'best_len' was found.
181
 */
182
static forceinline u32
183
hc_matchfinder_longest_match(struct hc_matchfinder * const mf,
184
           const u8 ** const in_base_p,
185
           const u8 * const in_next,
186
           u32 best_len,
187
           const u32 max_len,
188
           const u32 nice_len,
189
           const u32 max_search_depth,
190
           u32 * const next_hashes,
191
           u32 * const offset_ret)
192
0
{
193
0
  u32 depth_remaining = max_search_depth;
194
0
  const u8 *best_matchptr = in_next;
195
0
  mf_pos_t cur_node3, cur_node4;
196
0
  u32 hash3, hash4;
197
0
  u32 next_hashseq;
198
0
  u32 seq4;
199
0
  const u8 *matchptr;
200
0
  u32 len;
201
0
  u32 cur_pos = in_next - *in_base_p;
202
0
  const u8 *in_base;
203
0
  mf_pos_t cutoff;
204
205
0
  if (cur_pos == MATCHFINDER_WINDOW_SIZE) {
206
0
    hc_matchfinder_slide_window(mf);
207
0
    *in_base_p += MATCHFINDER_WINDOW_SIZE;
208
0
    cur_pos = 0;
209
0
  }
210
211
0
  in_base = *in_base_p;
212
0
  cutoff = cur_pos - MATCHFINDER_WINDOW_SIZE;
213
214
0
  if (unlikely(max_len < 5)) /* can we read 4 bytes from 'in_next + 1'? */
215
0
    goto out;
216
217
  /* Get the precomputed hash codes.  */
218
0
  hash3 = next_hashes[0];
219
0
  hash4 = next_hashes[1];
220
221
  /* From the hash buckets, get the first node of each linked list.  */
222
0
  cur_node3 = mf->hash3_tab[hash3];
223
0
  cur_node4 = mf->hash4_tab[hash4];
224
225
  /* Update for length 3 matches.  This replaces the singleton node in the
226
   * 'hash3' bucket with the node for the current sequence.  */
227
0
  mf->hash3_tab[hash3] = cur_pos;
228
229
  /* Update for length 4 matches.  This prepends the node for the current
230
   * sequence to the linked list in the 'hash4' bucket.  */
231
0
  mf->hash4_tab[hash4] = cur_pos;
232
0
  mf->next_tab[cur_pos] = cur_node4;
233
234
  /* Compute the next hash codes.  */
235
0
  next_hashseq = get_unaligned_le32(in_next + 1);
236
0
  next_hashes[0] = lz_hash(next_hashseq & 0xFFFFFF, HC_MATCHFINDER_HASH3_ORDER);
237
0
  next_hashes[1] = lz_hash(next_hashseq, HC_MATCHFINDER_HASH4_ORDER);
238
0
  prefetchw(&mf->hash3_tab[next_hashes[0]]);
239
0
  prefetchw(&mf->hash4_tab[next_hashes[1]]);
240
241
0
  if (best_len < 4) {  /* No match of length >= 4 found yet?  */
242
243
    /* Check for a length 3 match if needed.  */
244
245
0
    if (cur_node3 <= cutoff)
246
0
      goto out;
247
248
0
    seq4 = load_u32_unaligned(in_next);
249
250
0
    if (best_len < 3) {
251
0
      matchptr = &in_base[cur_node3];
252
0
      if (load_u24_unaligned(matchptr) == loaded_u32_to_u24(seq4)) {
253
0
        best_len = 3;
254
0
        best_matchptr = matchptr;
255
0
      }
256
0
    }
257
258
    /* Check for a length 4 match.  */
259
260
0
    if (cur_node4 <= cutoff)
261
0
      goto out;
262
263
0
    for (;;) {
264
      /* No length 4 match found yet.  Check the first 4 bytes.  */
265
0
      matchptr = &in_base[cur_node4];
266
267
0
      if (load_u32_unaligned(matchptr) == seq4)
268
0
        break;
269
270
      /* The first 4 bytes did not match.  Keep trying.  */
271
0
      cur_node4 = mf->next_tab[cur_node4 & (MATCHFINDER_WINDOW_SIZE - 1)];
272
0
      if (cur_node4 <= cutoff || !--depth_remaining)
273
0
        goto out;
274
0
    }
275
276
    /* Found a match of length >= 4.  Extend it to its full length.  */
277
0
    best_matchptr = matchptr;
278
0
    best_len = lz_extend(in_next, best_matchptr, 4, max_len);
279
0
    if (best_len >= nice_len)
280
0
      goto out;
281
0
    cur_node4 = mf->next_tab[cur_node4 & (MATCHFINDER_WINDOW_SIZE - 1)];
282
0
    if (cur_node4 <= cutoff || !--depth_remaining)
283
0
      goto out;
284
0
  } else {
285
0
    if (cur_node4 <= cutoff || best_len >= nice_len)
286
0
      goto out;
287
0
  }
288
289
  /* Check for matches of length >= 5.  */
290
291
0
  for (;;) {
292
0
    for (;;) {
293
0
      matchptr = &in_base[cur_node4];
294
295
      /* Already found a length 4 match.  Try for a longer
296
       * match; start by checking either the last 4 bytes and
297
       * the first 4 bytes, or the last byte.  (The last byte,
298
       * the one which would extend the match length by 1, is
299
       * the most important.)  */
300
0
    #if UNALIGNED_ACCESS_IS_FAST
301
0
      if ((load_u32_unaligned(matchptr + best_len - 3) ==
302
0
           load_u32_unaligned(in_next + best_len - 3)) &&
303
0
          (load_u32_unaligned(matchptr) ==
304
0
           load_u32_unaligned(in_next)))
305
    #else
306
      if (matchptr[best_len] == in_next[best_len])
307
    #endif
308
0
        break;
309
310
      /* Continue to the next node in the list.  */
311
0
      cur_node4 = mf->next_tab[cur_node4 & (MATCHFINDER_WINDOW_SIZE - 1)];
312
0
      if (cur_node4 <= cutoff || !--depth_remaining)
313
0
        goto out;
314
0
    }
315
316
0
  #if UNALIGNED_ACCESS_IS_FAST
317
0
    len = 4;
318
  #else
319
    len = 0;
320
  #endif
321
0
    len = lz_extend(in_next, matchptr, len, max_len);
322
0
    if (len > best_len) {
323
      /* This is the new longest match.  */
324
0
      best_len = len;
325
0
      best_matchptr = matchptr;
326
0
      if (best_len >= nice_len)
327
0
        goto out;
328
0
    }
329
330
    /* Continue to the next node in the list.  */
331
0
    cur_node4 = mf->next_tab[cur_node4 & (MATCHFINDER_WINDOW_SIZE - 1)];
332
0
    if (cur_node4 <= cutoff || !--depth_remaining)
333
0
      goto out;
334
0
  }
335
0
out:
336
0
  *offset_ret = in_next - best_matchptr;
337
0
  return best_len;
338
0
}
339
340
/*
341
 * Advance the matchfinder, but don't search for matches.
342
 *
343
 * @mf
344
 *  The matchfinder structure.
345
 * @in_base_p
346
 *  Location of a pointer which points to the place in the input data the
347
 *  matchfinder currently stores positions relative to.  This may be updated
348
 *  by this function.
349
 * @in_next
350
 *  Pointer to the next position in the input buffer.
351
 * @in_end
352
 *  Pointer to the end of the input buffer.
353
 * @count
354
 *  The number of bytes to advance.  Must be > 0.
355
 * @next_hashes
356
 *  The precomputed hash codes for the sequence beginning at @in_next.
357
 *  These will be used and then updated with the precomputed hashcodes for
358
 *  the sequence beginning at @in_next + @count.
359
 */
360
static forceinline void
361
hc_matchfinder_skip_bytes(struct hc_matchfinder * const mf,
362
        const u8 ** const in_base_p,
363
        const u8 *in_next,
364
        const u8 * const in_end,
365
        const u32 count,
366
        u32 * const next_hashes)
367
0
{
368
0
  u32 cur_pos;
369
0
  u32 hash3, hash4;
370
0
  u32 next_hashseq;
371
0
  u32 remaining = count;
372
373
0
  if (unlikely(count + 5 > in_end - in_next))
374
0
    return;
375
376
0
  cur_pos = in_next - *in_base_p;
377
0
  hash3 = next_hashes[0];
378
0
  hash4 = next_hashes[1];
379
0
  do {
380
0
    if (cur_pos == MATCHFINDER_WINDOW_SIZE) {
381
0
      hc_matchfinder_slide_window(mf);
382
0
      *in_base_p += MATCHFINDER_WINDOW_SIZE;
383
0
      cur_pos = 0;
384
0
    }
385
0
    mf->hash3_tab[hash3] = cur_pos;
386
0
    mf->next_tab[cur_pos] = mf->hash4_tab[hash4];
387
0
    mf->hash4_tab[hash4] = cur_pos;
388
389
0
    next_hashseq = get_unaligned_le32(++in_next);
390
0
    hash3 = lz_hash(next_hashseq & 0xFFFFFF, HC_MATCHFINDER_HASH3_ORDER);
391
0
    hash4 = lz_hash(next_hashseq, HC_MATCHFINDER_HASH4_ORDER);
392
0
    cur_pos++;
393
0
  } while (--remaining);
394
395
0
  prefetchw(&mf->hash3_tab[hash3]);
396
0
  prefetchw(&mf->hash4_tab[hash4]);
397
0
  next_hashes[0] = hash3;
398
0
  next_hashes[1] = hash4;
399
0
}
400
401
#endif /* LIB_HC_MATCHFINDER_H */