/work/_deps/deflate-src/lib/matchfinder_common.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * matchfinder_common.h - common code for Lempel-Ziv matchfinding |
3 | | */ |
4 | | |
5 | | #ifndef LIB_MATCHFINDER_COMMON_H |
6 | | #define LIB_MATCHFINDER_COMMON_H |
7 | | |
8 | | #include "lib_common.h" |
9 | | |
10 | | #ifndef MATCHFINDER_WINDOW_ORDER |
11 | | # error "MATCHFINDER_WINDOW_ORDER must be defined!" |
12 | | #endif |
13 | | |
14 | | /* |
15 | | * Given a 32-bit value that was loaded with the platform's native endianness, |
16 | | * return a 32-bit value whose high-order 8 bits are 0 and whose low-order 24 |
17 | | * bits contain the first 3 bytes, arranged in octets in a platform-dependent |
18 | | * order, at the memory location from which the input 32-bit value was loaded. |
19 | | */ |
20 | | static forceinline u32 |
21 | | loaded_u32_to_u24(u32 v) |
22 | 0 | { |
23 | 0 | if (CPU_IS_LITTLE_ENDIAN()) |
24 | 0 | return v & 0xFFFFFF; |
25 | 0 | else |
26 | 0 | return v >> 8; |
27 | 0 | } |
28 | | |
29 | | /* |
30 | | * Load the next 3 bytes from @p into the 24 low-order bits of a 32-bit value. |
31 | | * The order in which the 3 bytes will be arranged as octets in the 24 bits is |
32 | | * platform-dependent. At least 4 bytes (not 3) must be available at @p. |
33 | | */ |
34 | | static forceinline u32 |
35 | | load_u24_unaligned(const u8 *p) |
36 | 0 | { |
37 | 0 | #if UNALIGNED_ACCESS_IS_FAST |
38 | 0 | return loaded_u32_to_u24(load_u32_unaligned(p)); |
39 | | #else |
40 | | if (CPU_IS_LITTLE_ENDIAN()) |
41 | | return ((u32)p[0] << 0) | ((u32)p[1] << 8) | ((u32)p[2] << 16); |
42 | | else |
43 | | return ((u32)p[2] << 0) | ((u32)p[1] << 8) | ((u32)p[0] << 16); |
44 | | #endif |
45 | 0 | } |
46 | | |
47 | 0 | #define MATCHFINDER_WINDOW_SIZE (1UL << MATCHFINDER_WINDOW_ORDER) |
48 | | |
49 | | typedef s16 mf_pos_t; |
50 | | |
51 | 0 | #define MATCHFINDER_INITVAL ((mf_pos_t)-MATCHFINDER_WINDOW_SIZE) |
52 | | |
53 | | /* |
54 | | * Required alignment of the matchfinder buffer pointer and size. The values |
55 | | * here come from the AVX-2 implementation, which is the worst case. |
56 | | */ |
57 | 0 | #define MATCHFINDER_MEM_ALIGNMENT 32 |
58 | | #define MATCHFINDER_SIZE_ALIGNMENT 128 |
59 | | |
60 | | #undef matchfinder_init |
61 | | #undef matchfinder_rebase |
62 | | #ifdef _aligned_attribute |
63 | | # define MATCHFINDER_ALIGNED _aligned_attribute(MATCHFINDER_MEM_ALIGNMENT) |
64 | | # if defined(ARCH_ARM32) || defined(ARCH_ARM64) |
65 | | # include "arm/matchfinder_impl.h" |
66 | | # elif defined(ARCH_X86_32) || defined(ARCH_X86_64) |
67 | | # include "x86/matchfinder_impl.h" |
68 | | # endif |
69 | | #else |
70 | | # define MATCHFINDER_ALIGNED |
71 | | #endif |
72 | | |
73 | | /* |
74 | | * Initialize the hash table portion of the matchfinder. |
75 | | * |
76 | | * Essentially, this is an optimized memset(). |
77 | | * |
78 | | * 'data' must be aligned to a MATCHFINDER_MEM_ALIGNMENT boundary, and |
79 | | * 'size' must be a multiple of MATCHFINDER_SIZE_ALIGNMENT. |
80 | | */ |
81 | | #ifndef matchfinder_init |
82 | | static forceinline void |
83 | | matchfinder_init(mf_pos_t *data, size_t size) |
84 | | { |
85 | | size_t num_entries = size / sizeof(*data); |
86 | | size_t i; |
87 | | |
88 | | for (i = 0; i < num_entries; i++) |
89 | | data[i] = MATCHFINDER_INITVAL; |
90 | | } |
91 | | #endif |
92 | | |
93 | | /* |
94 | | * Slide the matchfinder by MATCHFINDER_WINDOW_SIZE bytes. |
95 | | * |
96 | | * This must be called just after each MATCHFINDER_WINDOW_SIZE bytes have been |
97 | | * run through the matchfinder. |
98 | | * |
99 | | * This subtracts MATCHFINDER_WINDOW_SIZE bytes from each entry in the given |
100 | | * array, making the entries be relative to the current position rather than the |
101 | | * position MATCHFINDER_WINDOW_SIZE bytes prior. To avoid integer underflows, |
102 | | * entries that would become less than -MATCHFINDER_WINDOW_SIZE stay at |
103 | | * -MATCHFINDER_WINDOW_SIZE, keeping them permanently out of bounds. |
104 | | * |
105 | | * The given array must contain all matchfinder data that is position-relative: |
106 | | * the hash table(s) as well as any hash chain or binary tree links. Its |
107 | | * address must be aligned to a MATCHFINDER_MEM_ALIGNMENT boundary, and its size |
108 | | * must be a multiple of MATCHFINDER_SIZE_ALIGNMENT. |
109 | | */ |
110 | | #ifndef matchfinder_rebase |
111 | | static forceinline void |
112 | | matchfinder_rebase(mf_pos_t *data, size_t size) |
113 | | { |
114 | | size_t num_entries = size / sizeof(*data); |
115 | | size_t i; |
116 | | |
117 | | if (MATCHFINDER_WINDOW_SIZE == 32768) { |
118 | | /* |
119 | | * Branchless version for 32768-byte windows. Clear all bits if |
120 | | * the value was already negative, then set the sign bit. This |
121 | | * is equivalent to subtracting 32768 with signed saturation. |
122 | | */ |
123 | | for (i = 0; i < num_entries; i++) |
124 | | data[i] = 0x8000 | (data[i] & ~(data[i] >> 15)); |
125 | | } else { |
126 | | for (i = 0; i < num_entries; i++) { |
127 | | if (data[i] >= 0) |
128 | | data[i] -= (mf_pos_t)-MATCHFINDER_WINDOW_SIZE; |
129 | | else |
130 | | data[i] = (mf_pos_t)-MATCHFINDER_WINDOW_SIZE; |
131 | | } |
132 | | } |
133 | | } |
134 | | #endif |
135 | | |
136 | | /* |
137 | | * The hash function: given a sequence prefix held in the low-order bits of a |
138 | | * 32-bit value, multiply by a carefully-chosen large constant. Discard any |
139 | | * bits of the product that don't fit in a 32-bit value, but take the |
140 | | * next-highest @num_bits bits of the product as the hash value, as those have |
141 | | * the most randomness. |
142 | | */ |
143 | | static forceinline u32 |
144 | | lz_hash(u32 seq, unsigned num_bits) |
145 | 0 | { |
146 | 0 | return (u32)(seq * 0x1E35A7BD) >> (32 - num_bits); |
147 | 0 | } |
148 | | |
149 | | /* |
150 | | * Return the number of bytes at @matchptr that match the bytes at @strptr, up |
151 | | * to a maximum of @max_len. Initially, @start_len bytes are matched. |
152 | | */ |
153 | | static forceinline unsigned |
154 | | lz_extend(const u8 * const strptr, const u8 * const matchptr, |
155 | | const unsigned start_len, const unsigned max_len) |
156 | 0 | { |
157 | 0 | unsigned len = start_len; |
158 | 0 | machine_word_t v_word; |
159 | |
|
160 | 0 | if (UNALIGNED_ACCESS_IS_FAST) { |
161 | |
|
162 | 0 | if (likely(max_len - len >= 4 * WORDBYTES)) { |
163 | |
|
164 | 0 | #define COMPARE_WORD_STEP \ |
165 | 0 | v_word = load_word_unaligned(&matchptr[len]) ^ \ |
166 | 0 | load_word_unaligned(&strptr[len]); \ |
167 | 0 | if (v_word != 0) \ |
168 | 0 | goto word_differs; \ |
169 | 0 | len += WORDBYTES; \ |
170 | 0 |
|
171 | 0 | COMPARE_WORD_STEP |
172 | 0 | COMPARE_WORD_STEP |
173 | 0 | COMPARE_WORD_STEP |
174 | 0 | COMPARE_WORD_STEP |
175 | 0 | #undef COMPARE_WORD_STEP |
176 | 0 | } |
177 | | |
178 | 0 | while (len + WORDBYTES <= max_len) { |
179 | 0 | v_word = load_word_unaligned(&matchptr[len]) ^ |
180 | 0 | load_word_unaligned(&strptr[len]); |
181 | 0 | if (v_word != 0) |
182 | 0 | goto word_differs; |
183 | 0 | len += WORDBYTES; |
184 | 0 | } |
185 | 0 | } |
186 | | |
187 | 0 | while (len < max_len && matchptr[len] == strptr[len]) |
188 | 0 | len++; |
189 | 0 | return len; |
190 | | |
191 | 0 | word_differs: |
192 | 0 | if (CPU_IS_LITTLE_ENDIAN()) |
193 | 0 | len += (bsfw(v_word) >> 3); |
194 | 0 | else |
195 | 0 | len += (WORDBITS - 1 - bsrw(v_word)) >> 3; |
196 | 0 | return len; |
197 | 0 | } |
198 | | |
199 | | #endif /* LIB_MATCHFINDER_COMMON_H */ |