Coverage Report

Created: 2024-07-27 06:04

/work/_deps/deflate-src/lib/matchfinder_common.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * matchfinder_common.h - common code for Lempel-Ziv matchfinding
3
 */
4
5
#ifndef LIB_MATCHFINDER_COMMON_H
6
#define LIB_MATCHFINDER_COMMON_H
7
8
#include "lib_common.h"
9
10
#ifndef MATCHFINDER_WINDOW_ORDER
11
#  error "MATCHFINDER_WINDOW_ORDER must be defined!"
12
#endif
13
14
/*
15
 * Given a 32-bit value that was loaded with the platform's native endianness,
16
 * return a 32-bit value whose high-order 8 bits are 0 and whose low-order 24
17
 * bits contain the first 3 bytes, arranged in octets in a platform-dependent
18
 * order, at the memory location from which the input 32-bit value was loaded.
19
 */
20
static forceinline u32
21
loaded_u32_to_u24(u32 v)
22
0
{
23
0
  if (CPU_IS_LITTLE_ENDIAN())
24
0
    return v & 0xFFFFFF;
25
0
  else
26
0
    return v >> 8;
27
0
}
28
29
/*
30
 * Load the next 3 bytes from @p into the 24 low-order bits of a 32-bit value.
31
 * The order in which the 3 bytes will be arranged as octets in the 24 bits is
32
 * platform-dependent.  At least 4 bytes (not 3) must be available at @p.
33
 */
34
static forceinline u32
35
load_u24_unaligned(const u8 *p)
36
0
{
37
0
#if UNALIGNED_ACCESS_IS_FAST
38
0
  return loaded_u32_to_u24(load_u32_unaligned(p));
39
#else
40
  if (CPU_IS_LITTLE_ENDIAN())
41
    return ((u32)p[0] << 0) | ((u32)p[1] << 8) | ((u32)p[2] << 16);
42
  else
43
    return ((u32)p[2] << 0) | ((u32)p[1] << 8) | ((u32)p[0] << 16);
44
#endif
45
0
}
46
47
0
#define MATCHFINDER_WINDOW_SIZE (1UL << MATCHFINDER_WINDOW_ORDER)
48
49
typedef s16 mf_pos_t;
50
51
0
#define MATCHFINDER_INITVAL ((mf_pos_t)-MATCHFINDER_WINDOW_SIZE)
52
53
/*
54
 * Required alignment of the matchfinder buffer pointer and size.  The values
55
 * here come from the AVX-2 implementation, which is the worst case.
56
 */
57
0
#define MATCHFINDER_MEM_ALIGNMENT 32
58
#define MATCHFINDER_SIZE_ALIGNMENT  128
59
60
#undef matchfinder_init
61
#undef matchfinder_rebase
62
#ifdef _aligned_attribute
63
#  define MATCHFINDER_ALIGNED _aligned_attribute(MATCHFINDER_MEM_ALIGNMENT)
64
#  if defined(ARCH_ARM32) || defined(ARCH_ARM64)
65
#    include "arm/matchfinder_impl.h"
66
#  elif defined(ARCH_X86_32) || defined(ARCH_X86_64)
67
#    include "x86/matchfinder_impl.h"
68
#  endif
69
#else
70
#  define MATCHFINDER_ALIGNED
71
#endif
72
73
/*
74
 * Initialize the hash table portion of the matchfinder.
75
 *
76
 * Essentially, this is an optimized memset().
77
 *
78
 * 'data' must be aligned to a MATCHFINDER_MEM_ALIGNMENT boundary, and
79
 * 'size' must be a multiple of MATCHFINDER_SIZE_ALIGNMENT.
80
 */
81
#ifndef matchfinder_init
82
static forceinline void
83
matchfinder_init(mf_pos_t *data, size_t size)
84
{
85
  size_t num_entries = size / sizeof(*data);
86
  size_t i;
87
88
  for (i = 0; i < num_entries; i++)
89
    data[i] = MATCHFINDER_INITVAL;
90
}
91
#endif
92
93
/*
94
 * Slide the matchfinder by MATCHFINDER_WINDOW_SIZE bytes.
95
 *
96
 * This must be called just after each MATCHFINDER_WINDOW_SIZE bytes have been
97
 * run through the matchfinder.
98
 *
99
 * This subtracts MATCHFINDER_WINDOW_SIZE bytes from each entry in the given
100
 * array, making the entries be relative to the current position rather than the
101
 * position MATCHFINDER_WINDOW_SIZE bytes prior.  To avoid integer underflows,
102
 * entries that would become less than -MATCHFINDER_WINDOW_SIZE stay at
103
 * -MATCHFINDER_WINDOW_SIZE, keeping them permanently out of bounds.
104
 *
105
 * The given array must contain all matchfinder data that is position-relative:
106
 * the hash table(s) as well as any hash chain or binary tree links.  Its
107
 * address must be aligned to a MATCHFINDER_MEM_ALIGNMENT boundary, and its size
108
 * must be a multiple of MATCHFINDER_SIZE_ALIGNMENT.
109
 */
110
#ifndef matchfinder_rebase
111
static forceinline void
112
matchfinder_rebase(mf_pos_t *data, size_t size)
113
{
114
  size_t num_entries = size / sizeof(*data);
115
  size_t i;
116
117
  if (MATCHFINDER_WINDOW_SIZE == 32768) {
118
    /*
119
     * Branchless version for 32768-byte windows.  Clear all bits if
120
     * the value was already negative, then set the sign bit.  This
121
     * is equivalent to subtracting 32768 with signed saturation.
122
     */
123
    for (i = 0; i < num_entries; i++)
124
      data[i] = 0x8000 | (data[i] & ~(data[i] >> 15));
125
  } else {
126
    for (i = 0; i < num_entries; i++) {
127
      if (data[i] >= 0)
128
        data[i] -= (mf_pos_t)-MATCHFINDER_WINDOW_SIZE;
129
      else
130
        data[i] = (mf_pos_t)-MATCHFINDER_WINDOW_SIZE;
131
    }
132
  }
133
}
134
#endif
135
136
/*
137
 * The hash function: given a sequence prefix held in the low-order bits of a
138
 * 32-bit value, multiply by a carefully-chosen large constant.  Discard any
139
 * bits of the product that don't fit in a 32-bit value, but take the
140
 * next-highest @num_bits bits of the product as the hash value, as those have
141
 * the most randomness.
142
 */
143
static forceinline u32
144
lz_hash(u32 seq, unsigned num_bits)
145
0
{
146
0
  return (u32)(seq * 0x1E35A7BD) >> (32 - num_bits);
147
0
}
148
149
/*
150
 * Return the number of bytes at @matchptr that match the bytes at @strptr, up
151
 * to a maximum of @max_len.  Initially, @start_len bytes are matched.
152
 */
153
static forceinline unsigned
154
lz_extend(const u8 * const strptr, const u8 * const matchptr,
155
    const unsigned start_len, const unsigned max_len)
156
0
{
157
0
  unsigned len = start_len;
158
0
  machine_word_t v_word;
159
160
0
  if (UNALIGNED_ACCESS_IS_FAST) {
161
162
0
    if (likely(max_len - len >= 4 * WORDBYTES)) {
163
164
0
    #define COMPARE_WORD_STEP       \
165
0
      v_word = load_word_unaligned(&matchptr[len]) ^  \
166
0
         load_word_unaligned(&strptr[len]); \
167
0
      if (v_word != 0)       \
168
0
        goto word_differs;     \
169
0
      len += WORDBYTES;       \
170
0
171
0
      COMPARE_WORD_STEP
172
0
      COMPARE_WORD_STEP
173
0
      COMPARE_WORD_STEP
174
0
      COMPARE_WORD_STEP
175
0
    #undef COMPARE_WORD_STEP
176
0
    }
177
178
0
    while (len + WORDBYTES <= max_len) {
179
0
      v_word = load_word_unaligned(&matchptr[len]) ^
180
0
         load_word_unaligned(&strptr[len]);
181
0
      if (v_word != 0)
182
0
        goto word_differs;
183
0
      len += WORDBYTES;
184
0
    }
185
0
  }
186
187
0
  while (len < max_len && matchptr[len] == strptr[len])
188
0
    len++;
189
0
  return len;
190
191
0
word_differs:
192
0
  if (CPU_IS_LITTLE_ENDIAN())
193
0
    len += (bsfw(v_word) >> 3);
194
0
  else
195
0
    len += (WORDBITS - 1 - bsrw(v_word)) >> 3;
196
0
  return len;
197
0
}
198
199
#endif /* LIB_MATCHFINDER_COMMON_H */