Coverage Report

Created: 2025-06-16 07:00

/src/libdeflate/lib/bt_matchfinder.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * bt_matchfinder.h - Lempel-Ziv matchfinding with a hash table of binary trees
3
 *
4
 * Copyright 2016 Eric Biggers
5
 *
6
 * Permission is hereby granted, free of charge, to any person
7
 * obtaining a copy of this software and associated documentation
8
 * files (the "Software"), to deal in the Software without
9
 * restriction, including without limitation the rights to use,
10
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
11
 * copies of the Software, and to permit persons to whom the
12
 * Software is furnished to do so, subject to the following
13
 * conditions:
14
 *
15
 * The above copyright notice and this permission notice shall be
16
 * included in all copies or substantial portions of the Software.
17
 *
18
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25
 * OTHER DEALINGS IN THE SOFTWARE.
26
 *
27
 * ----------------------------------------------------------------------------
28
 *
29
 * This is a Binary Trees (bt) based matchfinder.
30
 *
31
 * The main data structure is a hash table where each hash bucket contains a
32
 * binary tree of sequences whose first 4 bytes share the same hash code.  Each
33
 * sequence is identified by its starting position in the input buffer.  Each
34
 * binary tree is always sorted such that each left child represents a sequence
35
 * lexicographically lesser than its parent and each right child represents a
36
 * sequence lexicographically greater than its parent.
37
 *
38
 * The algorithm processes the input buffer sequentially.  At each byte
39
 * position, the hash code of the first 4 bytes of the sequence beginning at
40
 * that position (the sequence being matched against) is computed.  This
41
 * identifies the hash bucket to use for that position.  Then, a new binary tree
42
 * node is created to represent the current sequence.  Then, in a single tree
43
 * traversal, the hash bucket's binary tree is searched for matches and is
44
 * re-rooted at the new node.
45
 *
46
 * Compared to the simpler algorithm that uses linked lists instead of binary
47
 * trees (see hc_matchfinder.h), the binary tree version gains more information
48
 * at each node visitation.  Ideally, the binary tree version will examine only
49
 * 'log(n)' nodes to find the same matches that the linked list version will
50
 * find by examining 'n' nodes.  In addition, the binary tree version can
51
 * examine fewer bytes at each node by taking advantage of the common prefixes
52
 * that result from the sort order, whereas the linked list version may have to
53
 * examine up to the full length of the match at each node.
54
 *
55
 * However, it is not always best to use the binary tree version.  It requires
56
 * nearly twice as much memory as the linked list version, and it takes time to
57
 * keep the binary trees sorted, even at positions where the compressor does not
58
 * need matches.  Generally, when doing fast compression on small buffers,
59
 * binary trees are the wrong approach.  They are best suited for thorough
60
 * compression and/or large buffers.
61
 *
62
 * ----------------------------------------------------------------------------
63
 */
64
65
#ifndef LIB_BT_MATCHFINDER_H
66
#define LIB_BT_MATCHFINDER_H
67
68
#include "matchfinder_common.h"
69
70
0
#define BT_MATCHFINDER_HASH3_ORDER 16
71
0
#define BT_MATCHFINDER_HASH3_WAYS  2
72
0
#define BT_MATCHFINDER_HASH4_ORDER 16
73
74
#define BT_MATCHFINDER_TOTAL_HASH_SIZE    \
75
0
  (((1UL << BT_MATCHFINDER_HASH3_ORDER) * BT_MATCHFINDER_HASH3_WAYS + \
76
0
    (1UL << BT_MATCHFINDER_HASH4_ORDER)) * sizeof(mf_pos_t))
77
78
/* Representation of a match found by the bt_matchfinder  */
79
struct lz_match {
80
81
  /* The number of bytes matched.  */
82
  u16 length;
83
84
  /* The offset back from the current position that was matched.  */
85
  u16 offset;
86
};
87
88
struct MATCHFINDER_ALIGNED bt_matchfinder {
89
90
  /* The hash table for finding length 3 matches  */
91
  mf_pos_t hash3_tab[1UL << BT_MATCHFINDER_HASH3_ORDER][BT_MATCHFINDER_HASH3_WAYS];
92
93
  /* The hash table which contains the roots of the binary trees for
94
   * finding length 4+ matches  */
95
  mf_pos_t hash4_tab[1UL << BT_MATCHFINDER_HASH4_ORDER];
96
97
  /* The child node references for the binary trees.  The left and right
98
   * children of the node for the sequence with position 'pos' are
99
   * 'child_tab[pos * 2]' and 'child_tab[pos * 2 + 1]', respectively.  */
100
  mf_pos_t child_tab[2UL * MATCHFINDER_WINDOW_SIZE];
101
};
102
103
/* Prepare the matchfinder for a new input buffer.  */
104
static forceinline void
105
bt_matchfinder_init(struct bt_matchfinder *mf)
106
0
{
107
0
  STATIC_ASSERT(BT_MATCHFINDER_TOTAL_HASH_SIZE %
108
0
          MATCHFINDER_SIZE_ALIGNMENT == 0);
109
110
0
  matchfinder_init((mf_pos_t *)mf, BT_MATCHFINDER_TOTAL_HASH_SIZE);
111
0
}
112
113
static forceinline void
114
bt_matchfinder_slide_window(struct bt_matchfinder *mf)
115
0
{
116
0
  STATIC_ASSERT(sizeof(*mf) % MATCHFINDER_SIZE_ALIGNMENT == 0);
117
118
0
  matchfinder_rebase((mf_pos_t *)mf, sizeof(*mf));
119
0
}
120
121
static forceinline mf_pos_t *
122
bt_left_child(struct bt_matchfinder *mf, s32 node)
123
0
{
124
0
  return &mf->child_tab[2 * (node & (MATCHFINDER_WINDOW_SIZE - 1)) + 0];
125
0
}
126
127
static forceinline mf_pos_t *
128
bt_right_child(struct bt_matchfinder *mf, s32 node)
129
0
{
130
0
  return &mf->child_tab[2 * (node & (MATCHFINDER_WINDOW_SIZE - 1)) + 1];
131
0
}
132
133
/* The minimum permissible value of 'max_len' for bt_matchfinder_get_matches()
134
 * and bt_matchfinder_skip_byte().  There must be sufficiently many bytes
135
 * remaining to load a 32-bit integer from the *next* position.  */
136
0
#define BT_MATCHFINDER_REQUIRED_NBYTES  5
137
138
/* Advance the binary tree matchfinder by one byte, optionally recording
139
 * matches.  @record_matches should be a compile-time constant.  */
140
static forceinline struct lz_match *
141
bt_matchfinder_advance_one_byte(struct bt_matchfinder * const mf,
142
        const u8 * const in_base,
143
        const ptrdiff_t cur_pos,
144
        const u32 max_len,
145
        const u32 nice_len,
146
        const u32 max_search_depth,
147
        u32 * const next_hashes,
148
        struct lz_match *lz_matchptr,
149
        const bool record_matches)
150
0
{
151
0
  const u8 *in_next = in_base + cur_pos;
152
0
  u32 depth_remaining = max_search_depth;
153
0
  const s32 cutoff = cur_pos - MATCHFINDER_WINDOW_SIZE;
154
0
  u32 next_hashseq;
155
0
  u32 hash3;
156
0
  u32 hash4;
157
0
  s32 cur_node;
158
0
#if BT_MATCHFINDER_HASH3_WAYS >= 2
159
0
  s32 cur_node_2;
160
0
#endif
161
0
  const u8 *matchptr;
162
0
  mf_pos_t *pending_lt_ptr, *pending_gt_ptr;
163
0
  u32 best_lt_len, best_gt_len;
164
0
  u32 len;
165
0
  u32 best_len = 3;
166
167
0
  STATIC_ASSERT(BT_MATCHFINDER_HASH3_WAYS >= 1 &&
168
0
          BT_MATCHFINDER_HASH3_WAYS <= 2);
169
170
0
  next_hashseq = get_unaligned_le32(in_next + 1);
171
172
0
  hash3 = next_hashes[0];
173
0
  hash4 = next_hashes[1];
174
175
0
  next_hashes[0] = lz_hash(next_hashseq & 0xFFFFFF, BT_MATCHFINDER_HASH3_ORDER);
176
0
  next_hashes[1] = lz_hash(next_hashseq, BT_MATCHFINDER_HASH4_ORDER);
177
0
  prefetchw(&mf->hash3_tab[next_hashes[0]]);
178
0
  prefetchw(&mf->hash4_tab[next_hashes[1]]);
179
180
0
  cur_node = mf->hash3_tab[hash3][0];
181
0
  mf->hash3_tab[hash3][0] = cur_pos;
182
0
#if BT_MATCHFINDER_HASH3_WAYS >= 2
183
0
  cur_node_2 = mf->hash3_tab[hash3][1];
184
0
  mf->hash3_tab[hash3][1] = cur_node;
185
0
#endif
186
0
  if (record_matches && cur_node > cutoff) {
187
0
    u32 seq3 = load_u24_unaligned(in_next);
188
0
    if (seq3 == load_u24_unaligned(&in_base[cur_node])) {
189
0
      lz_matchptr->length = 3;
190
0
      lz_matchptr->offset = in_next - &in_base[cur_node];
191
0
      lz_matchptr++;
192
0
    }
193
0
  #if BT_MATCHFINDER_HASH3_WAYS >= 2
194
0
    else if (cur_node_2 > cutoff &&
195
0
      seq3 == load_u24_unaligned(&in_base[cur_node_2]))
196
0
    {
197
0
      lz_matchptr->length = 3;
198
0
      lz_matchptr->offset = in_next - &in_base[cur_node_2];
199
0
      lz_matchptr++;
200
0
    }
201
0
  #endif
202
0
  }
203
204
0
  cur_node = mf->hash4_tab[hash4];
205
0
  mf->hash4_tab[hash4] = cur_pos;
206
207
0
  pending_lt_ptr = bt_left_child(mf, cur_pos);
208
0
  pending_gt_ptr = bt_right_child(mf, cur_pos);
209
210
0
  if (cur_node <= cutoff) {
211
0
    *pending_lt_ptr = MATCHFINDER_INITVAL;
212
0
    *pending_gt_ptr = MATCHFINDER_INITVAL;
213
0
    return lz_matchptr;
214
0
  }
215
216
0
  best_lt_len = 0;
217
0
  best_gt_len = 0;
218
0
  len = 0;
219
220
0
  for (;;) {
221
0
    matchptr = &in_base[cur_node];
222
223
0
    if (matchptr[len] == in_next[len]) {
224
0
      len = lz_extend(in_next, matchptr, len + 1, max_len);
225
0
      if (!record_matches || len > best_len) {
226
0
        if (record_matches) {
227
0
          best_len = len;
228
0
          lz_matchptr->length = len;
229
0
          lz_matchptr->offset = in_next - matchptr;
230
0
          lz_matchptr++;
231
0
        }
232
0
        if (len >= nice_len) {
233
0
          *pending_lt_ptr = *bt_left_child(mf, cur_node);
234
0
          *pending_gt_ptr = *bt_right_child(mf, cur_node);
235
0
          return lz_matchptr;
236
0
        }
237
0
      }
238
0
    }
239
240
0
    if (matchptr[len] < in_next[len]) {
241
0
      *pending_lt_ptr = cur_node;
242
0
      pending_lt_ptr = bt_right_child(mf, cur_node);
243
0
      cur_node = *pending_lt_ptr;
244
0
      best_lt_len = len;
245
0
      if (best_gt_len < len)
246
0
        len = best_gt_len;
247
0
    } else {
248
0
      *pending_gt_ptr = cur_node;
249
0
      pending_gt_ptr = bt_left_child(mf, cur_node);
250
0
      cur_node = *pending_gt_ptr;
251
0
      best_gt_len = len;
252
0
      if (best_lt_len < len)
253
0
        len = best_lt_len;
254
0
    }
255
256
0
    if (cur_node <= cutoff || !--depth_remaining) {
257
0
      *pending_lt_ptr = MATCHFINDER_INITVAL;
258
0
      *pending_gt_ptr = MATCHFINDER_INITVAL;
259
0
      return lz_matchptr;
260
0
    }
261
0
  }
262
0
}
263
264
/*
265
 * Retrieve a list of matches with the current position.
266
 *
267
 * @mf
268
 *  The matchfinder structure.
269
 * @in_base
270
 *  Pointer to the next byte in the input buffer to process _at the last
271
 *  time bt_matchfinder_init() or bt_matchfinder_slide_window() was called_.
272
 * @cur_pos
273
 *  The current position in the input buffer relative to @in_base (the
274
 *  position of the sequence being matched against).
275
 * @max_len
276
 *  The maximum permissible match length at this position.  Must be >=
277
 *  BT_MATCHFINDER_REQUIRED_NBYTES.
278
 * @nice_len
279
 *  Stop searching if a match of at least this length is found.
280
 *  Must be <= @max_len.
281
 * @max_search_depth
282
 *  Limit on the number of potential matches to consider.  Must be >= 1.
283
 * @next_hashes
284
 *  The precomputed hash codes for the sequence beginning at @in_next.
285
 *  These will be used and then updated with the precomputed hashcodes for
286
 *  the sequence beginning at @in_next + 1.
287
 * @lz_matchptr
288
 *  An array in which this function will record the matches.  The recorded
289
 *  matches will be sorted by strictly increasing length and (non-strictly)
290
 *  increasing offset.  The maximum number of matches that may be found is
291
 *  'nice_len - 2'.
292
 *
293
 * The return value is a pointer to the next available slot in the @lz_matchptr
294
 * array.  (If no matches were found, this will be the same as @lz_matchptr.)
295
 */
296
static forceinline struct lz_match *
297
bt_matchfinder_get_matches(struct bt_matchfinder *mf,
298
         const u8 *in_base,
299
         ptrdiff_t cur_pos,
300
         u32 max_len,
301
         u32 nice_len,
302
         u32 max_search_depth,
303
         u32 next_hashes[2],
304
         struct lz_match *lz_matchptr)
305
0
{
306
0
  return bt_matchfinder_advance_one_byte(mf,
307
0
                 in_base,
308
0
                 cur_pos,
309
0
                 max_len,
310
0
                 nice_len,
311
0
                 max_search_depth,
312
0
                 next_hashes,
313
0
                 lz_matchptr,
314
0
                 true);
315
0
}
316
317
/*
318
 * Advance the matchfinder, but don't record any matches.
319
 *
320
 * This is very similar to bt_matchfinder_get_matches() because both functions
321
 * must do hashing and tree re-rooting.
322
 */
323
static forceinline void
324
bt_matchfinder_skip_byte(struct bt_matchfinder *mf,
325
       const u8 *in_base,
326
       ptrdiff_t cur_pos,
327
       u32 nice_len,
328
       u32 max_search_depth,
329
       u32 next_hashes[2])
330
0
{
331
0
  bt_matchfinder_advance_one_byte(mf,
332
0
          in_base,
333
0
          cur_pos,
334
0
          nice_len,
335
0
          nice_len,
336
0
          max_search_depth,
337
0
          next_hashes,
338
0
          NULL,
339
0
          false);
340
0
}
341
342
#endif /* LIB_BT_MATCHFINDER_H */