Coverage Report

Created: 2025-06-16 07:00

/src/libdeflate/lib/matchfinder_common.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * matchfinder_common.h - common code for Lempel-Ziv matchfinding
3
 */
4
5
#ifndef LIB_MATCHFINDER_COMMON_H
6
#define LIB_MATCHFINDER_COMMON_H
7
8
#include "lib_common.h"
9
10
#ifndef MATCHFINDER_WINDOW_ORDER
11
#  error "MATCHFINDER_WINDOW_ORDER must be defined!"
12
#endif
13
14
/*
15
 * Given a 32-bit value that was loaded with the platform's native endianness,
16
 * return a 32-bit value whose high-order 8 bits are 0 and whose low-order 24
17
 * bits contain the first 3 bytes, arranged in octets in a platform-dependent
18
 * order, at the memory location from which the input 32-bit value was loaded.
19
 */
20
static forceinline u32
21
loaded_u32_to_u24(u32 v)
22
0
{
23
0
  if (CPU_IS_LITTLE_ENDIAN())
24
0
    return v & 0xFFFFFF;
25
0
  else
26
0
    return v >> 8;
27
0
}
28
29
/*
30
 * Load the next 3 bytes from @p into the 24 low-order bits of a 32-bit value.
31
 * The order in which the 3 bytes will be arranged as octets in the 24 bits is
32
 * platform-dependent.  At least 4 bytes (not 3) must be available at @p.
33
 */
34
static forceinline u32
35
load_u24_unaligned(const u8 *p)
36
0
{
37
0
#if UNALIGNED_ACCESS_IS_FAST
38
0
  return loaded_u32_to_u24(load_u32_unaligned(p));
39
#else
40
  if (CPU_IS_LITTLE_ENDIAN())
41
    return ((u32)p[0] << 0) | ((u32)p[1] << 8) | ((u32)p[2] << 16);
42
  else
43
    return ((u32)p[2] << 0) | ((u32)p[1] << 8) | ((u32)p[0] << 16);
44
#endif
45
0
}
46
47
0
#define MATCHFINDER_WINDOW_SIZE (1UL << MATCHFINDER_WINDOW_ORDER)
48
49
typedef s16 mf_pos_t;
50
51
0
#define MATCHFINDER_INITVAL ((mf_pos_t)-MATCHFINDER_WINDOW_SIZE)
52
53
/*
54
 * This is the memory address alignment, in bytes, required for the matchfinder
55
 * buffers by the architecture-specific implementations of matchfinder_init()
56
 * and matchfinder_rebase().  "Matchfinder buffer" means an entire struct
57
 * hc_matchfinder, bt_matchfinder, or ht_matchfinder; the next_tab field of
58
 * struct hc_matchfinder; or the child_tab field of struct bt_matchfinder.
59
 *
60
 * This affects how the entire 'struct deflate_compressor' is allocated, since
61
 * the matchfinder structures are embedded inside it.
62
 *
63
 * Currently the maximum memory address alignment required is 32 bytes, needed
64
 * by the AVX-2 matchfinder functions.
65
 */
66
0
#define MATCHFINDER_MEM_ALIGNMENT 32
67
68
/*
69
 * This declares a size, in bytes, that is guaranteed to divide the sizes of the
70
 * matchfinder buffers (where "matchfinder buffers" is as defined for
71
 * MATCHFINDER_MEM_ALIGNMENT).  The architecture-specific implementations of
72
 * matchfinder_init() and matchfinder_rebase() take advantage of this value.
73
 *
74
 * Currently the maximum size alignment required is 128 bytes, needed by
75
 * the AVX-2 matchfinder functions.  However, the RISC-V Vector Extension
76
 * matchfinder functions can, in principle, take advantage of a larger size
77
 * alignment.  Therefore, we set this to 1024, which still easily divides the
78
 * actual sizes that result from the current matchfinder struct definitions.
79
 * This value can safely be changed to any power of two that is >= 128.
80
 */
81
#define MATCHFINDER_SIZE_ALIGNMENT  1024
82
83
#undef matchfinder_init
84
#undef matchfinder_rebase
85
#ifdef _aligned_attribute
86
#  define MATCHFINDER_ALIGNED _aligned_attribute(MATCHFINDER_MEM_ALIGNMENT)
87
#  if defined(ARCH_ARM32) || defined(ARCH_ARM64)
88
#    include "arm/matchfinder_impl.h"
89
#  elif defined(ARCH_RISCV)
90
#    include "riscv/matchfinder_impl.h"
91
#  elif defined(ARCH_X86_32) || defined(ARCH_X86_64)
92
#    include "x86/matchfinder_impl.h"
93
#  endif
94
#else
95
#  define MATCHFINDER_ALIGNED
96
#endif
97
98
/*
99
 * Initialize the hash table portion of the matchfinder.
100
 *
101
 * Essentially, this is an optimized memset().
102
 *
103
 * 'data' must be aligned to a MATCHFINDER_MEM_ALIGNMENT boundary, and
104
 * 'size' must be a multiple of MATCHFINDER_SIZE_ALIGNMENT.
105
 */
106
#ifndef matchfinder_init
107
static forceinline void
108
matchfinder_init(mf_pos_t *data, size_t size)
109
{
110
  size_t num_entries = size / sizeof(*data);
111
  size_t i;
112
113
  for (i = 0; i < num_entries; i++)
114
    data[i] = MATCHFINDER_INITVAL;
115
}
116
#endif
117
118
/*
119
 * Slide the matchfinder by MATCHFINDER_WINDOW_SIZE bytes.
120
 *
121
 * This must be called just after each MATCHFINDER_WINDOW_SIZE bytes have been
122
 * run through the matchfinder.
123
 *
124
 * This subtracts MATCHFINDER_WINDOW_SIZE bytes from each entry in the given
125
 * array, making the entries be relative to the current position rather than the
126
 * position MATCHFINDER_WINDOW_SIZE bytes prior.  To avoid integer underflows,
127
 * entries that would become less than -MATCHFINDER_WINDOW_SIZE stay at
128
 * -MATCHFINDER_WINDOW_SIZE, keeping them permanently out of bounds.
129
 *
130
 * The given array must contain all matchfinder data that is position-relative:
131
 * the hash table(s) as well as any hash chain or binary tree links.  Its
132
 * address must be aligned to a MATCHFINDER_MEM_ALIGNMENT boundary, and its size
133
 * must be a multiple of MATCHFINDER_SIZE_ALIGNMENT.
134
 */
135
#ifndef matchfinder_rebase
136
static forceinline void
137
matchfinder_rebase(mf_pos_t *data, size_t size)
138
{
139
  size_t num_entries = size / sizeof(*data);
140
  size_t i;
141
142
  if (MATCHFINDER_WINDOW_SIZE == 32768) {
143
    /*
144
     * Branchless version for 32768-byte windows.  Clear all bits if
145
     * the value was already negative, then set the sign bit.  This
146
     * is equivalent to subtracting 32768 with signed saturation.
147
     */
148
    for (i = 0; i < num_entries; i++)
149
      data[i] = 0x8000 | (data[i] & ~(data[i] >> 15));
150
  } else {
151
    for (i = 0; i < num_entries; i++) {
152
      if (data[i] >= 0)
153
        data[i] -= (mf_pos_t)-MATCHFINDER_WINDOW_SIZE;
154
      else
155
        data[i] = (mf_pos_t)-MATCHFINDER_WINDOW_SIZE;
156
    }
157
  }
158
}
159
#endif
160
161
/*
162
 * The hash function: given a sequence prefix held in the low-order bits of a
163
 * 32-bit value, multiply by a carefully-chosen large constant.  Discard any
164
 * bits of the product that don't fit in a 32-bit value, but take the
165
 * next-highest @num_bits bits of the product as the hash value, as those have
166
 * the most randomness.
167
 */
168
static forceinline u32
169
lz_hash(u32 seq, unsigned num_bits)
170
0
{
171
0
  return (u32)(seq * 0x1E35A7BD) >> (32 - num_bits);
172
0
}
173
174
/*
175
 * Return the number of bytes at @matchptr that match the bytes at @strptr, up
176
 * to a maximum of @max_len.  Initially, @start_len bytes are matched.
177
 */
178
static forceinline u32
179
lz_extend(const u8 * const strptr, const u8 * const matchptr,
180
    const u32 start_len, const u32 max_len)
181
0
{
182
0
  u32 len = start_len;
183
0
  machine_word_t v_word;
184
185
0
  if (UNALIGNED_ACCESS_IS_FAST) {
186
187
0
    if (likely(max_len - len >= 4 * WORDBYTES)) {
188
189
0
    #define COMPARE_WORD_STEP       \
190
0
      v_word = load_word_unaligned(&matchptr[len]) ^  \
191
0
         load_word_unaligned(&strptr[len]); \
192
0
      if (v_word != 0)       \
193
0
        goto word_differs;     \
194
0
      len += WORDBYTES;       \
195
0
196
0
      COMPARE_WORD_STEP
197
0
      COMPARE_WORD_STEP
198
0
      COMPARE_WORD_STEP
199
0
      COMPARE_WORD_STEP
200
0
    #undef COMPARE_WORD_STEP
201
0
    }
202
203
0
    while (len + WORDBYTES <= max_len) {
204
0
      v_word = load_word_unaligned(&matchptr[len]) ^
205
0
         load_word_unaligned(&strptr[len]);
206
0
      if (v_word != 0)
207
0
        goto word_differs;
208
0
      len += WORDBYTES;
209
0
    }
210
0
  }
211
212
0
  while (len < max_len && matchptr[len] == strptr[len])
213
0
    len++;
214
0
  return len;
215
216
0
word_differs:
217
0
  if (CPU_IS_LITTLE_ENDIAN())
218
0
    len += (bsfw(v_word) >> 3);
219
0
  else
220
0
    len += (WORDBITS - 1 - bsrw(v_word)) >> 3;
221
0
  return len;
222
0
}
223
224
#endif /* LIB_MATCHFINDER_COMMON_H */