/src/xz/src/liblzma/lz/lz_decoder.h
Line | Count | Source (jump to first uncovered line) |
1 | | // SPDX-License-Identifier: 0BSD |
2 | | |
3 | | /////////////////////////////////////////////////////////////////////////////// |
4 | | // |
5 | | /// \file lz_decoder.h |
6 | | /// \brief LZ out window |
7 | | /// |
8 | | // Authors: Igor Pavlov |
9 | | // Lasse Collin |
10 | | // |
11 | | /////////////////////////////////////////////////////////////////////////////// |
12 | | |
13 | | #ifndef LZMA_LZ_DECODER_H |
14 | | #define LZMA_LZ_DECODER_H |
15 | | |
16 | | #include "common.h" |
17 | | |
18 | | #ifdef HAVE_IMMINTRIN_H |
19 | | # include <immintrin.h> |
20 | | #endif |
21 | | |
22 | | |
23 | | // dict_repeat() implementation variant: |
24 | | // 0 = Byte-by-byte copying only. |
25 | | // 1 = Use memcpy() for non-overlapping copies. |
26 | | // 2 = Use x86 SSE2 for non-overlapping copies. |
27 | | #ifndef LZMA_LZ_DECODER_CONFIG |
28 | | # if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ |
29 | | && defined(HAVE_IMMINTRIN_H) \ |
30 | | && (defined(__SSE2__) || defined(_M_X64) \ |
31 | | || (defined(_M_IX86_FP) && _M_IX86_FP >= 2)) |
32 | | # define LZMA_LZ_DECODER_CONFIG 2 |
33 | | # else |
34 | | # define LZMA_LZ_DECODER_CONFIG 1 |
35 | | # endif |
36 | | #endif |
37 | | |
38 | | /// Byte-by-byte and memcpy() copy exactly the amount needed. Other methods |
39 | | /// can copy up to LZ_DICT_EXTRA bytes more than requested, and this amount |
40 | | /// of extra space is needed at the end of the allocated dictionary buffer. |
41 | | /// |
42 | | /// NOTE: If this is increased, update LZMA_DICT_REPEAT_MAX too. |
43 | | #if LZMA_LZ_DECODER_CONFIG >= 2 |
44 | 21.7k | # define LZ_DICT_EXTRA 32 |
45 | | #else |
46 | | # define LZ_DICT_EXTRA 0 |
47 | | #endif |
48 | | |
49 | | /// Maximum number of bytes that dict_repeat() may copy. The allocated |
50 | | /// dictionary buffer will be 2 * LZ_DICT_REPEAT_MAX + LZMA_DICT_EXTRA bytes |
51 | | /// larger than the actual dictionary size: |
52 | | /// |
53 | | /// (1) Every time the decoder reaches the end of the dictionary buffer, |
54 | | /// the last LZ_DICT_REPEAT_MAX bytes will be copied to the beginning. |
55 | | /// This way dict_repeat() will only need to copy from one place, |
56 | | /// never from both the end and beginning of the buffer. |
57 | | /// |
58 | | /// (2) The other LZ_DICT_REPEAT_MAX bytes is kept as a buffer between |
59 | | /// the oldest byte still in the dictionary and the current write |
60 | | /// position. This way dict_repeat() with the maximum valid distance |
61 | | /// won't need memmove() as the copying cannot overlap. |
62 | | /// |
63 | | /// (3) LZ_DICT_EXTRA bytes are required at the end of the dictionary buffer |
64 | | /// so that extra copying done by dict_repeat() won't write or read past |
65 | | /// the end of the allocated buffer. This amount is *not* counted as part |
66 | | /// of lzma_dict.size. |
67 | | /// |
68 | | /// Note that memcpy() still cannot be used if distance < len. |
69 | | /// |
70 | | /// LZMA's longest match length is 273 bytes. The LZMA decoder looks at |
71 | | /// the lowest four bits of the dictionary position, thus 273 must be |
72 | | /// rounded up to the next multiple of 16 (288). In addition, optimized |
73 | | /// dict_repeat() copies 32 bytes at a time, thus this must also be |
74 | | /// a multiple of 32. |
75 | 4.35M | #define LZ_DICT_REPEAT_MAX 288 |
76 | | |
77 | | /// Initial position in lzma_dict.buf when the dictionary is empty. |
78 | 4.20M | #define LZ_DICT_INIT_POS (2 * LZ_DICT_REPEAT_MAX) |
79 | | |
80 | | |
81 | | typedef struct { |
82 | | /// Pointer to the dictionary buffer. |
83 | | uint8_t *buf; |
84 | | |
85 | | /// Write position in dictionary. The next byte will be written to |
86 | | /// buf[pos]. |
87 | | size_t pos; |
88 | | |
89 | | /// Indicates how full the dictionary is. This is used by |
90 | | /// dict_is_distance_valid() to detect corrupt files that would |
91 | | /// read beyond the beginning of the dictionary. |
92 | | size_t full; |
93 | | |
94 | | /// Write limit |
95 | | size_t limit; |
96 | | |
97 | | /// Allocated size of buf. This is 2 * LZ_DICT_REPEAT_MAX bytes |
98 | | /// larger than the actual dictionary size. This is enforced by |
99 | | /// how the value for "full" is set; it can be at most |
100 | | /// "size - 2 * LZ_DICT_REPEAT_MAX". |
101 | | size_t size; |
102 | | |
103 | | /// True once the dictionary has become full and the writing position |
104 | | /// has been wrapped in decode_buffer() in lz_decoder.c. |
105 | | bool has_wrapped; |
106 | | |
107 | | /// True when dictionary should be reset before decoding more data. |
108 | | bool need_reset; |
109 | | |
110 | | } lzma_dict; |
111 | | |
112 | | |
113 | | typedef struct { |
114 | | size_t dict_size; |
115 | | const uint8_t *preset_dict; |
116 | | size_t preset_dict_size; |
117 | | } lzma_lz_options; |
118 | | |
119 | | |
120 | | typedef struct { |
121 | | /// Data specific to the LZ-based decoder |
122 | | void *coder; |
123 | | |
124 | | /// Function to decode from in[] to *dict |
125 | | lzma_ret (*code)(void *coder, |
126 | | lzma_dict *restrict dict, const uint8_t *restrict in, |
127 | | size_t *restrict in_pos, size_t in_size); |
128 | | |
129 | | void (*reset)(void *coder, const void *options); |
130 | | |
131 | | /// Set the uncompressed size. If uncompressed_size == LZMA_VLI_UNKNOWN |
132 | | /// then allow_eopm will always be true. |
133 | | void (*set_uncompressed)(void *coder, lzma_vli uncompressed_size, |
134 | | bool allow_eopm); |
135 | | |
136 | | /// Free allocated resources |
137 | | void (*end)(void *coder, const lzma_allocator *allocator); |
138 | | |
139 | | } lzma_lz_decoder; |
140 | | |
141 | | |
142 | | #define LZMA_LZ_DECODER_INIT \ |
143 | 10.3k | (lzma_lz_decoder){ \ |
144 | 10.3k | .coder = NULL, \ |
145 | 10.3k | .code = NULL, \ |
146 | 10.3k | .reset = NULL, \ |
147 | 10.3k | .set_uncompressed = NULL, \ |
148 | 10.3k | .end = NULL, \ |
149 | 10.3k | } |
150 | | |
151 | | |
152 | | extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next, |
153 | | const lzma_allocator *allocator, |
154 | | const lzma_filter_info *filters, |
155 | | lzma_ret (*lz_init)(lzma_lz_decoder *lz, |
156 | | const lzma_allocator *allocator, |
157 | | lzma_vli id, const void *options, |
158 | | lzma_lz_options *lz_options)); |
159 | | |
160 | | extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size); |
161 | | |
162 | | |
163 | | ////////////////////// |
164 | | // Inline functions // |
165 | | ////////////////////// |
166 | | |
167 | | /// Get a byte from the history buffer. |
168 | | static inline uint8_t |
169 | | dict_get(const lzma_dict *const dict, const uint32_t distance) |
170 | 247k | { |
171 | 247k | return dict->buf[dict->pos - distance - 1 |
172 | 247k | + (distance < dict->pos |
173 | 247k | ? 0 : dict->size - LZ_DICT_REPEAT_MAX)]; |
174 | 247k | } Unexecuted instantiation: alone_decoder.c:dict_get Line | Count | Source | 170 | 247k | { | 171 | 247k | return dict->buf[dict->pos - distance - 1 | 172 | 247k | + (distance < dict->pos | 173 | 247k | ? 0 : dict->size - LZ_DICT_REPEAT_MAX)]; | 174 | 247k | } |
Unexecuted instantiation: lzma2_decoder.c:dict_get Unexecuted instantiation: lz_decoder.c:dict_get |
175 | | |
176 | | |
177 | | /// Optimized version of dict_get(dict, 0) |
178 | | static inline uint8_t |
179 | | dict_get0(const lzma_dict *const dict) |
180 | 5.10M | { |
181 | 5.10M | return dict->buf[dict->pos - 1]; |
182 | 5.10M | } Unexecuted instantiation: alone_decoder.c:dict_get0 Line | Count | Source | 180 | 5.10M | { | 181 | 5.10M | return dict->buf[dict->pos - 1]; | 182 | 5.10M | } |
Unexecuted instantiation: lzma2_decoder.c:dict_get0 Unexecuted instantiation: lz_decoder.c:dict_get0 |
183 | | |
184 | | |
185 | | /// Test if dictionary is empty. |
186 | | static inline bool |
187 | | dict_is_empty(const lzma_dict *const dict) |
188 | 0 | { |
189 | 0 | return dict->full == 0; |
190 | 0 | } Unexecuted instantiation: alone_decoder.c:dict_is_empty Unexecuted instantiation: lzma_decoder.c:dict_is_empty Unexecuted instantiation: lzma2_decoder.c:dict_is_empty Unexecuted instantiation: lz_decoder.c:dict_is_empty |
191 | | |
192 | | |
193 | | /// Validate the match distance |
194 | | static inline bool |
195 | | dict_is_distance_valid(const lzma_dict *const dict, const size_t distance) |
196 | 863k | { |
197 | 863k | return dict->full > distance; |
198 | 863k | } Unexecuted instantiation: alone_decoder.c:dict_is_distance_valid lzma_decoder.c:dict_is_distance_valid Line | Count | Source | 196 | 863k | { | 197 | 863k | return dict->full > distance; | 198 | 863k | } |
Unexecuted instantiation: lzma2_decoder.c:dict_is_distance_valid Unexecuted instantiation: lz_decoder.c:dict_is_distance_valid |
199 | | |
200 | | |
201 | | /// Repeat *len bytes at distance. |
202 | | static inline bool |
203 | | dict_repeat(lzma_dict *restrict dict, |
204 | | uint32_t distance, uint32_t *restrict len) |
205 | 829k | { |
206 | | // Don't write past the end of the dictionary. |
207 | 829k | const size_t dict_avail = dict->limit - dict->pos; |
208 | 829k | uint32_t left = my_min(dict_avail, *len); |
209 | 829k | *len -= left; |
210 | | |
211 | 829k | size_t back = dict->pos - distance - 1; |
212 | 829k | if (distance >= dict->pos) |
213 | 42.1k | back += dict->size - LZ_DICT_REPEAT_MAX; |
214 | | |
215 | | #if LZMA_LZ_DECODER_CONFIG == 0 |
216 | | // Minimal byte-by-byte method. This might be the least bad choice |
217 | | // if memcpy() isn't fast and there's no replacement for it below. |
218 | | while (left-- > 0) { |
219 | | dict->buf[dict->pos++] = dict->buf[back++]; |
220 | | } |
221 | | |
222 | | #else |
223 | | // Because memcpy() or a similar method can be faster than copying |
224 | | // byte by byte in a loop, the copying process is split into |
225 | | // two cases. |
226 | 829k | if (distance < left) { |
227 | | // Source and target areas overlap, thus we can't use |
228 | | // memcpy() nor even memmove() safely. |
229 | 114M | do { |
230 | 114M | dict->buf[dict->pos++] = dict->buf[back++]; |
231 | 114M | } while (--left > 0); |
232 | 537k | } else { |
233 | | # if LZMA_LZ_DECODER_CONFIG == 1 |
234 | | memcpy(dict->buf + dict->pos, dict->buf + back, left); |
235 | | dict->pos += left; |
236 | | |
237 | | # elif LZMA_LZ_DECODER_CONFIG == 2 |
238 | | // This can copy up to 32 bytes more than required. |
239 | | // (If left == 0, we still copy 32 bytes.) |
240 | 291k | size_t pos = dict->pos; |
241 | 291k | dict->pos += left; |
242 | 498k | do { |
243 | 498k | const __m128i x0 = _mm_loadu_si128( |
244 | 498k | (__m128i *)(dict->buf + back)); |
245 | 498k | const __m128i x1 = _mm_loadu_si128( |
246 | 498k | (__m128i *)(dict->buf + back + 16)); |
247 | 498k | back += 32; |
248 | 498k | _mm_storeu_si128( |
249 | 498k | (__m128i *)(dict->buf + pos), x0); |
250 | 498k | _mm_storeu_si128( |
251 | 498k | (__m128i *)(dict->buf + pos + 16), x1); |
252 | 498k | pos += 32; |
253 | 498k | } while (pos < dict->pos); |
254 | | |
255 | | # else |
256 | | # error "Invalid LZMA_LZ_DECODER_CONFIG value" |
257 | | # endif |
258 | 291k | } |
259 | 829k | #endif |
260 | | |
261 | | // Update how full the dictionary is. |
262 | 829k | if (!dict->has_wrapped) |
263 | 274k | dict->full = dict->pos - LZ_DICT_INIT_POS; |
264 | | |
265 | 829k | return *len != 0; |
266 | 829k | } Unexecuted instantiation: alone_decoder.c:dict_repeat lzma_decoder.c:dict_repeat Line | Count | Source | 205 | 829k | { | 206 | | // Don't write past the end of the dictionary. | 207 | 829k | const size_t dict_avail = dict->limit - dict->pos; | 208 | 829k | uint32_t left = my_min(dict_avail, *len); | 209 | 829k | *len -= left; | 210 | | | 211 | 829k | size_t back = dict->pos - distance - 1; | 212 | 829k | if (distance >= dict->pos) | 213 | 42.1k | back += dict->size - LZ_DICT_REPEAT_MAX; | 214 | | | 215 | | #if LZMA_LZ_DECODER_CONFIG == 0 | 216 | | // Minimal byte-by-byte method. This might be the least bad choice | 217 | | // if memcpy() isn't fast and there's no replacement for it below. | 218 | | while (left-- > 0) { | 219 | | dict->buf[dict->pos++] = dict->buf[back++]; | 220 | | } | 221 | | | 222 | | #else | 223 | | // Because memcpy() or a similar method can be faster than copying | 224 | | // byte by byte in a loop, the copying process is split into | 225 | | // two cases. | 226 | 829k | if (distance < left) { | 227 | | // Source and target areas overlap, thus we can't use | 228 | | // memcpy() nor even memmove() safely. | 229 | 114M | do { | 230 | 114M | dict->buf[dict->pos++] = dict->buf[back++]; | 231 | 114M | } while (--left > 0); | 232 | 537k | } else { | 233 | | # if LZMA_LZ_DECODER_CONFIG == 1 | 234 | | memcpy(dict->buf + dict->pos, dict->buf + back, left); | 235 | | dict->pos += left; | 236 | | | 237 | | # elif LZMA_LZ_DECODER_CONFIG == 2 | 238 | | // This can copy up to 32 bytes more than required. | 239 | | // (If left == 0, we still copy 32 bytes.) | 240 | 291k | size_t pos = dict->pos; | 241 | 291k | dict->pos += left; | 242 | 498k | do { | 243 | 498k | const __m128i x0 = _mm_loadu_si128( | 244 | 498k | (__m128i *)(dict->buf + back)); | 245 | 498k | const __m128i x1 = _mm_loadu_si128( | 246 | 498k | (__m128i *)(dict->buf + back + 16)); | 247 | 498k | back += 32; | 248 | 498k | _mm_storeu_si128( | 249 | 498k | (__m128i *)(dict->buf + pos), x0); | 250 | 498k | _mm_storeu_si128( | 251 | 498k | (__m128i *)(dict->buf + pos + 16), x1); | 252 | 498k | pos += 32; | 253 | 498k | } while (pos < dict->pos); | 254 | | | 255 | | # else | 256 | | # error "Invalid LZMA_LZ_DECODER_CONFIG value" | 257 | | # endif | 258 | 291k | } | 259 | 829k | #endif | 260 | | | 261 | | // Update how full the dictionary is. | 262 | 829k | if (!dict->has_wrapped) | 263 | 274k | dict->full = dict->pos - LZ_DICT_INIT_POS; | 264 | | | 265 | 829k | return *len != 0; | 266 | 829k | } |
Unexecuted instantiation: lzma2_decoder.c:dict_repeat Unexecuted instantiation: lz_decoder.c:dict_repeat |
267 | | |
268 | | |
269 | | static inline void |
270 | | dict_put(lzma_dict *restrict dict, uint8_t byte) |
271 | 5.15M | { |
272 | 5.15M | dict->buf[dict->pos++] = byte; |
273 | | |
274 | 5.15M | if (!dict->has_wrapped) |
275 | 3.87M | dict->full = dict->pos - LZ_DICT_INIT_POS; |
276 | 5.15M | } Unexecuted instantiation: alone_decoder.c:dict_put Line | Count | Source | 271 | 5.15M | { | 272 | 5.15M | dict->buf[dict->pos++] = byte; | 273 | | | 274 | 5.15M | if (!dict->has_wrapped) | 275 | 3.87M | dict->full = dict->pos - LZ_DICT_INIT_POS; | 276 | 5.15M | } |
Unexecuted instantiation: lzma2_decoder.c:dict_put Unexecuted instantiation: lz_decoder.c:dict_put |
277 | | |
278 | | |
279 | | /// Puts one byte into the dictionary. Returns true if the dictionary was |
280 | | /// already full and the byte couldn't be added. |
281 | | static inline bool |
282 | | dict_put_safe(lzma_dict *restrict dict, uint8_t byte) |
283 | 135k | { |
284 | 135k | if (unlikely(dict->pos == dict->limit)) |
285 | 1.35k | return true; |
286 | | |
287 | 134k | dict_put(dict, byte); |
288 | 134k | return false; |
289 | 135k | } Unexecuted instantiation: alone_decoder.c:dict_put_safe lzma_decoder.c:dict_put_safe Line | Count | Source | 283 | 135k | { | 284 | 135k | if (unlikely(dict->pos == dict->limit)) | 285 | 1.35k | return true; | 286 | | | 287 | 134k | dict_put(dict, byte); | 288 | 134k | return false; | 289 | 135k | } |
Unexecuted instantiation: lzma2_decoder.c:dict_put_safe Unexecuted instantiation: lz_decoder.c:dict_put_safe |
290 | | |
291 | | |
292 | | /// Copies arbitrary amount of data into the dictionary. |
293 | | static inline void |
294 | | dict_write(lzma_dict *restrict dict, const uint8_t *restrict in, |
295 | | size_t *restrict in_pos, size_t in_size, |
296 | | size_t *restrict left) |
297 | 32.9k | { |
298 | | // NOTE: If we are being given more data than the size of the |
299 | | // dictionary, it could be possible to optimize the LZ decoder |
300 | | // so that not everything needs to go through the dictionary. |
301 | | // This shouldn't be very common thing in practice though, and |
302 | | // the slowdown of one extra memcpy() isn't bad compared to how |
303 | | // much time it would have taken if the data were compressed. |
304 | | |
305 | 32.9k | if (in_size - *in_pos > *left) |
306 | 9.15k | in_size = *in_pos + *left; |
307 | | |
308 | 32.9k | *left -= lzma_bufcpy(in, in_pos, in_size, |
309 | 32.9k | dict->buf, &dict->pos, dict->limit); |
310 | | |
311 | 32.9k | if (!dict->has_wrapped) |
312 | 14.7k | dict->full = dict->pos - LZ_DICT_INIT_POS; |
313 | | |
314 | 32.9k | return; |
315 | 32.9k | } Unexecuted instantiation: alone_decoder.c:dict_write Unexecuted instantiation: lzma_decoder.c:dict_write lzma2_decoder.c:dict_write Line | Count | Source | 297 | 32.9k | { | 298 | | // NOTE: If we are being given more data than the size of the | 299 | | // dictionary, it could be possible to optimize the LZ decoder | 300 | | // so that not everything needs to go through the dictionary. | 301 | | // This shouldn't be very common thing in practice though, and | 302 | | // the slowdown of one extra memcpy() isn't bad compared to how | 303 | | // much time it would have taken if the data were compressed. | 304 | | | 305 | 32.9k | if (in_size - *in_pos > *left) | 306 | 9.15k | in_size = *in_pos + *left; | 307 | | | 308 | 32.9k | *left -= lzma_bufcpy(in, in_pos, in_size, | 309 | 32.9k | dict->buf, &dict->pos, dict->limit); | 310 | | | 311 | 32.9k | if (!dict->has_wrapped) | 312 | 14.7k | dict->full = dict->pos - LZ_DICT_INIT_POS; | 313 | | | 314 | 32.9k | return; | 315 | 32.9k | } |
Unexecuted instantiation: lz_decoder.c:dict_write |
316 | | |
317 | | |
318 | | static inline void |
319 | | dict_reset(lzma_dict *dict) |
320 | 8.78k | { |
321 | 8.78k | dict->need_reset = true; |
322 | 8.78k | return; |
323 | 8.78k | } Unexecuted instantiation: alone_decoder.c:dict_reset Unexecuted instantiation: lzma_decoder.c:dict_reset lzma2_decoder.c:dict_reset Line | Count | Source | 320 | 8.78k | { | 321 | 8.78k | dict->need_reset = true; | 322 | 8.78k | return; | 323 | 8.78k | } |
Unexecuted instantiation: lz_decoder.c:dict_reset |
324 | | |
325 | | #endif |