Coverage Report

Created: 2025-06-16 07:00

/src/libdeflate/lib/ht_matchfinder.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * ht_matchfinder.h - Lempel-Ziv matchfinding with a hash table
3
 *
4
 * Copyright 2022 Eric Biggers
5
 *
6
 * Permission is hereby granted, free of charge, to any person
7
 * obtaining a copy of this software and associated documentation
8
 * files (the "Software"), to deal in the Software without
9
 * restriction, including without limitation the rights to use,
10
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
11
 * copies of the Software, and to permit persons to whom the
12
 * Software is furnished to do so, subject to the following
13
 * conditions:
14
 *
15
 * The above copyright notice and this permission notice shall be
16
 * included in all copies or substantial portions of the Software.
17
 *
18
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25
 * OTHER DEALINGS IN THE SOFTWARE.
26
 *
27
 * ---------------------------------------------------------------------------
28
 *
29
 * This is a Hash Table (ht) matchfinder.
30
 *
31
 * This is a variant of the Hash Chains (hc) matchfinder that is optimized for
32
 * very fast compression.  The ht_matchfinder stores the hash chains inline in
33
 * the hash table, whereas the hc_matchfinder stores them in a separate array.
34
 * Storing the hash chains inline is the faster method when max_search_depth
35
 * (the maximum chain length) is very small.  It is not appropriate when
36
 * max_search_depth is larger, as then it uses too much memory.
37
 *
38
 * Due to its focus on speed, the ht_matchfinder doesn't support length 3
39
 * matches.  It also doesn't allow max_search_depth to vary at runtime; it is
40
 * fixed at build time as HT_MATCHFINDER_BUCKET_SIZE.
41
 *
42
 * See hc_matchfinder.h for more information.
43
 */
44
45
#ifndef LIB_HT_MATCHFINDER_H
46
#define LIB_HT_MATCHFINDER_H
47
48
#include "matchfinder_common.h"
49
50
0
#define HT_MATCHFINDER_HASH_ORDER 15
51
0
#define HT_MATCHFINDER_BUCKET_SIZE  2
52
53
#define HT_MATCHFINDER_MIN_MATCH_LEN  4
54
/* Minimum value of max_len for ht_matchfinder_longest_match() */
55
0
#define HT_MATCHFINDER_REQUIRED_NBYTES  5
56
57
struct MATCHFINDER_ALIGNED ht_matchfinder {
58
  mf_pos_t hash_tab[1UL << HT_MATCHFINDER_HASH_ORDER]
59
       [HT_MATCHFINDER_BUCKET_SIZE];
60
};
61
62
static forceinline void
63
ht_matchfinder_init(struct ht_matchfinder *mf)
64
0
{
65
0
  STATIC_ASSERT(sizeof(*mf) % MATCHFINDER_SIZE_ALIGNMENT == 0);
66
67
0
  matchfinder_init((mf_pos_t *)mf, sizeof(*mf));
68
0
}
69
70
static forceinline void
71
ht_matchfinder_slide_window(struct ht_matchfinder *mf)
72
0
{
73
0
  matchfinder_rebase((mf_pos_t *)mf, sizeof(*mf));
74
0
}
75
76
/* Note: max_len must be >= HT_MATCHFINDER_REQUIRED_NBYTES */
77
static forceinline u32
78
ht_matchfinder_longest_match(struct ht_matchfinder * const mf,
79
           const u8 ** const in_base_p,
80
           const u8 * const in_next,
81
           const u32 max_len,
82
           const u32 nice_len,
83
           u32 * const next_hash,
84
           u32 * const offset_ret)
85
0
{
86
0
  u32 best_len = 0;
87
0
  const u8 *best_matchptr = in_next;
88
0
  u32 cur_pos = in_next - *in_base_p;
89
0
  const u8 *in_base;
90
0
  mf_pos_t cutoff;
91
0
  u32 hash;
92
0
  u32 seq;
93
0
  mf_pos_t cur_node;
94
0
  const u8 *matchptr;
95
0
#if HT_MATCHFINDER_BUCKET_SIZE > 1
96
0
  mf_pos_t to_insert;
97
0
  u32 len;
98
0
#endif
99
#if HT_MATCHFINDER_BUCKET_SIZE > 2
100
  int i;
101
#endif
102
103
  /* This is assumed throughout this function. */
104
0
  STATIC_ASSERT(HT_MATCHFINDER_MIN_MATCH_LEN == 4);
105
106
0
  if (cur_pos == MATCHFINDER_WINDOW_SIZE) {
107
0
    ht_matchfinder_slide_window(mf);
108
0
    *in_base_p += MATCHFINDER_WINDOW_SIZE;
109
0
    cur_pos = 0;
110
0
  }
111
0
  in_base = *in_base_p;
112
0
  cutoff = cur_pos - MATCHFINDER_WINDOW_SIZE;
113
114
0
  hash = *next_hash;
115
0
  STATIC_ASSERT(HT_MATCHFINDER_REQUIRED_NBYTES == 5);
116
0
  *next_hash = lz_hash(get_unaligned_le32(in_next + 1),
117
0
           HT_MATCHFINDER_HASH_ORDER);
118
0
  seq = load_u32_unaligned(in_next);
119
0
  prefetchw(&mf->hash_tab[*next_hash]);
120
#if HT_MATCHFINDER_BUCKET_SIZE == 1
121
  /* Hand-unrolled version for BUCKET_SIZE == 1 */
122
  cur_node = mf->hash_tab[hash][0];
123
  mf->hash_tab[hash][0] = cur_pos;
124
  if (cur_node <= cutoff)
125
    goto out;
126
  matchptr = &in_base[cur_node];
127
  if (load_u32_unaligned(matchptr) == seq) {
128
    best_len = lz_extend(in_next, matchptr, 4, max_len);
129
    best_matchptr = matchptr;
130
  }
131
#elif HT_MATCHFINDER_BUCKET_SIZE == 2
132
  /*
133
   * Hand-unrolled version for BUCKET_SIZE == 2.  The logic here also
134
   * differs slightly in that it copies the first entry to the second even
135
   * if nice_len is reached on the first, as this can be slightly faster.
136
   */
137
0
  cur_node = mf->hash_tab[hash][0];
138
0
  mf->hash_tab[hash][0] = cur_pos;
139
0
  if (cur_node <= cutoff)
140
0
    goto out;
141
0
  matchptr = &in_base[cur_node];
142
143
0
  to_insert = cur_node;
144
0
  cur_node = mf->hash_tab[hash][1];
145
0
  mf->hash_tab[hash][1] = to_insert;
146
147
0
  if (load_u32_unaligned(matchptr) == seq) {
148
0
    best_len = lz_extend(in_next, matchptr, 4, max_len);
149
0
    best_matchptr = matchptr;
150
0
    if (cur_node <= cutoff || best_len >= nice_len)
151
0
      goto out;
152
0
    matchptr = &in_base[cur_node];
153
0
    if (load_u32_unaligned(matchptr) == seq &&
154
0
        load_u32_unaligned(matchptr + best_len - 3) ==
155
0
        load_u32_unaligned(in_next + best_len - 3)) {
156
0
      len = lz_extend(in_next, matchptr, 4, max_len);
157
0
      if (len > best_len) {
158
0
        best_len = len;
159
0
        best_matchptr = matchptr;
160
0
      }
161
0
    }
162
0
  } else {
163
0
    if (cur_node <= cutoff)
164
0
      goto out;
165
0
    matchptr = &in_base[cur_node];
166
0
    if (load_u32_unaligned(matchptr) == seq) {
167
0
      best_len = lz_extend(in_next, matchptr, 4, max_len);
168
0
      best_matchptr = matchptr;
169
0
    }
170
0
  }
171
#else
172
  /* Generic version for HT_MATCHFINDER_BUCKET_SIZE > 2 */
173
  to_insert = cur_pos;
174
  for (i = 0; i < HT_MATCHFINDER_BUCKET_SIZE; i++) {
175
    cur_node = mf->hash_tab[hash][i];
176
    mf->hash_tab[hash][i] = to_insert;
177
    if (cur_node <= cutoff)
178
      goto out;
179
    matchptr = &in_base[cur_node];
180
    if (load_u32_unaligned(matchptr) == seq) {
181
      len = lz_extend(in_next, matchptr, 4, max_len);
182
      if (len > best_len) {
183
        best_len = len;
184
        best_matchptr = matchptr;
185
        if (best_len >= nice_len)
186
          goto out;
187
      }
188
    }
189
    to_insert = cur_node;
190
  }
191
#endif
192
0
out:
193
0
  *offset_ret = in_next - best_matchptr;
194
0
  return best_len;
195
0
}
196
197
static forceinline void
198
ht_matchfinder_skip_bytes(struct ht_matchfinder * const mf,
199
        const u8 ** const in_base_p,
200
        const u8 *in_next,
201
        const u8 * const in_end,
202
        const u32 count,
203
        u32 * const next_hash)
204
0
{
205
0
  s32 cur_pos = in_next - *in_base_p;
206
0
  u32 hash;
207
0
  u32 remaining = count;
208
0
  int i;
209
210
0
  if (unlikely(count + HT_MATCHFINDER_REQUIRED_NBYTES > in_end - in_next))
211
0
    return;
212
213
0
  if (cur_pos + count - 1 >= MATCHFINDER_WINDOW_SIZE) {
214
0
    ht_matchfinder_slide_window(mf);
215
0
    *in_base_p += MATCHFINDER_WINDOW_SIZE;
216
0
    cur_pos -= MATCHFINDER_WINDOW_SIZE;
217
0
  }
218
219
0
  hash = *next_hash;
220
0
  do {
221
0
    for (i = HT_MATCHFINDER_BUCKET_SIZE - 1; i > 0; i--)
222
0
      mf->hash_tab[hash][i] = mf->hash_tab[hash][i - 1];
223
0
    mf->hash_tab[hash][0] = cur_pos;
224
225
0
    hash = lz_hash(get_unaligned_le32(++in_next),
226
0
             HT_MATCHFINDER_HASH_ORDER);
227
0
    cur_pos++;
228
0
  } while (--remaining);
229
230
0
  prefetchw(&mf->hash_tab[hash]);
231
0
  *next_hash = hash;
232
0
}
233
234
#endif /* LIB_HT_MATCHFINDER_H */