/src/xz/src/liblzma/lz/lz_decoder.h
Line | Count | Source |
1 | | // SPDX-License-Identifier: 0BSD |
2 | | |
3 | | /////////////////////////////////////////////////////////////////////////////// |
4 | | // |
5 | | /// \file lz_decoder.h |
6 | | /// \brief LZ out window |
7 | | /// |
8 | | // Authors: Igor Pavlov |
9 | | // Lasse Collin |
10 | | // |
11 | | /////////////////////////////////////////////////////////////////////////////// |
12 | | |
13 | | #ifndef LZMA_LZ_DECODER_H |
14 | | #define LZMA_LZ_DECODER_H |
15 | | |
16 | | #include "common.h" |
17 | | |
18 | | #ifdef HAVE_IMMINTRIN_H |
19 | | # include <immintrin.h> |
20 | | #endif |
21 | | |
22 | | |
23 | | // dict_repeat() implementation variant: |
24 | | // 0 = Byte-by-byte copying only. |
25 | | // 1 = Use memcpy() for non-overlapping copies. |
26 | | // 2 = Use x86 SSE2 for non-overlapping copies. |
27 | | #ifndef LZMA_LZ_DECODER_CONFIG |
28 | | # if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ |
29 | | && defined(HAVE_IMMINTRIN_H) \ |
30 | | && (defined(__SSE2__) || defined(_M_X64) \ |
31 | | || (defined(_M_IX86_FP) && _M_IX86_FP >= 2)) |
32 | | # define LZMA_LZ_DECODER_CONFIG 2 |
33 | | # else |
34 | | # define LZMA_LZ_DECODER_CONFIG 1 |
35 | | # endif |
36 | | #endif |
37 | | |
38 | | /// Byte-by-byte and memcpy() copy exactly the amount needed. Other methods |
39 | | /// can copy up to LZ_DICT_EXTRA bytes more than requested, and this amount |
40 | | /// of extra space is needed at the end of the allocated dictionary buffer. |
41 | | /// |
42 | | /// NOTE: If this is increased, update LZMA_DICT_REPEAT_MAX too. |
43 | | #if LZMA_LZ_DECODER_CONFIG >= 2 |
44 | 0 | # define LZ_DICT_EXTRA 32 |
45 | | #else |
46 | | # define LZ_DICT_EXTRA 0 |
47 | | #endif |
48 | | |
49 | | /// Maximum number of bytes that dict_repeat() may copy. The allocated |
50 | | /// dictionary buffer will be 2 * LZ_DICT_REPEAT_MAX + LZMA_DICT_EXTRA bytes |
51 | | /// larger than the actual dictionary size: |
52 | | /// |
53 | | /// (1) Every time the decoder reaches the end of the dictionary buffer, |
54 | | /// the last LZ_DICT_REPEAT_MAX bytes will be copied to the beginning. |
55 | | /// This way dict_repeat() will only need to copy from one place, |
56 | | /// never from both the end and beginning of the buffer. |
57 | | /// |
58 | | /// (2) The other LZ_DICT_REPEAT_MAX bytes is kept as a buffer between |
59 | | /// the oldest byte still in the dictionary and the current write |
60 | | /// position. This way dict_repeat() with the maximum valid distance |
61 | | /// won't need memmove() as the copying cannot overlap. |
62 | | /// |
63 | | /// (3) LZ_DICT_EXTRA bytes are required at the end of the dictionary buffer |
64 | | /// so that extra copying done by dict_repeat() won't write or read past |
65 | | /// the end of the allocated buffer. This amount is *not* counted as part |
66 | | /// of lzma_dict.size. |
67 | | /// |
68 | | /// Note that memcpy() still cannot be used if distance < len. |
69 | | /// |
70 | | /// LZMA's longest match length is 273 bytes. The LZMA decoder looks at |
71 | | /// the lowest four bits of the dictionary position, thus 273 must be |
72 | | /// rounded up to the next multiple of 16 (288). In addition, optimized |
73 | | /// dict_repeat() copies 32 bytes at a time, thus this must also be |
74 | | /// a multiple of 32. |
75 | 0 | #define LZ_DICT_REPEAT_MAX 288 |
76 | | |
77 | | /// Initial position in lzma_dict.buf when the dictionary is empty. |
78 | 0 | #define LZ_DICT_INIT_POS (2 * LZ_DICT_REPEAT_MAX) |
79 | | |
80 | | |
81 | | typedef struct { |
82 | | /// Pointer to the dictionary buffer. |
83 | | uint8_t *buf; |
84 | | |
85 | | /// Write position in dictionary. The next byte will be written to |
86 | | /// buf[pos]. |
87 | | size_t pos; |
88 | | |
89 | | /// Indicates how full the dictionary is. This is used by |
90 | | /// dict_is_distance_valid() to detect corrupt files that would |
91 | | /// read beyond the beginning of the dictionary. |
92 | | size_t full; |
93 | | |
94 | | /// Write limit |
95 | | size_t limit; |
96 | | |
97 | | /// Allocated size of buf. This is 2 * LZ_DICT_REPEAT_MAX bytes |
98 | | /// larger than the actual dictionary size. This is enforced by |
99 | | /// how the value for "full" is set; it can be at most |
100 | | /// "size - 2 * LZ_DICT_REPEAT_MAX". |
101 | | size_t size; |
102 | | |
103 | | /// True once the dictionary has become full and the writing position |
104 | | /// has been wrapped in decode_buffer() in lz_decoder.c. |
105 | | bool has_wrapped; |
106 | | |
107 | | /// True when dictionary should be reset before decoding more data. |
108 | | bool need_reset; |
109 | | |
110 | | } lzma_dict; |
111 | | |
112 | | |
113 | | typedef struct { |
114 | | size_t dict_size; |
115 | | const uint8_t *preset_dict; |
116 | | size_t preset_dict_size; |
117 | | } lzma_lz_options; |
118 | | |
119 | | |
120 | | typedef struct { |
121 | | /// Data specific to the LZ-based decoder |
122 | | void *coder; |
123 | | |
124 | | /// Function to decode from in[] to *dict |
125 | | lzma_ret (*code)(void *coder, |
126 | | lzma_dict *restrict dict, const uint8_t *restrict in, |
127 | | size_t *restrict in_pos, size_t in_size); |
128 | | |
129 | | void (*reset)(void *coder, const void *options); |
130 | | |
131 | | /// Set the uncompressed size. If uncompressed_size == LZMA_VLI_UNKNOWN |
132 | | /// then allow_eopm will always be true. |
133 | | void (*set_uncompressed)(void *coder, lzma_vli uncompressed_size, |
134 | | bool allow_eopm); |
135 | | |
136 | | /// Free allocated resources |
137 | | void (*end)(void *coder, const lzma_allocator *allocator); |
138 | | |
139 | | } lzma_lz_decoder; |
140 | | |
141 | | |
142 | | #define LZMA_LZ_DECODER_INIT \ |
143 | 0 | (lzma_lz_decoder){ \ |
144 | 0 | .coder = NULL, \ |
145 | 0 | .code = NULL, \ |
146 | 0 | .reset = NULL, \ |
147 | 0 | .set_uncompressed = NULL, \ |
148 | 0 | .end = NULL, \ |
149 | 0 | } |
150 | | |
151 | | |
152 | | extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next, |
153 | | const lzma_allocator *allocator, |
154 | | const lzma_filter_info *filters, |
155 | | lzma_ret (*lz_init)(lzma_lz_decoder *lz, |
156 | | const lzma_allocator *allocator, |
157 | | lzma_vli id, const void *options, |
158 | | lzma_lz_options *lz_options)); |
159 | | |
160 | | extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size); |
161 | | |
162 | | |
163 | | ////////////////////// |
164 | | // Inline functions // |
165 | | ////////////////////// |
166 | | |
167 | | /// Get a byte from the history buffer. |
168 | | static inline uint8_t |
169 | | dict_get(const lzma_dict *const dict, const uint32_t distance) |
170 | 0 | { |
171 | 0 | return dict->buf[dict->pos - distance - 1 |
172 | 0 | + (distance < dict->pos |
173 | 0 | ? 0 : dict->size - LZ_DICT_REPEAT_MAX)]; |
174 | 0 | } Unexecuted instantiation: alone_decoder.c:dict_get Unexecuted instantiation: lzma_decoder.c:dict_get Unexecuted instantiation: lz_decoder.c:dict_get Unexecuted instantiation: lzma2_decoder.c:dict_get |
175 | | |
176 | | |
177 | | /// Optimized version of dict_get(dict, 0) |
178 | | static inline uint8_t |
179 | | dict_get0(const lzma_dict *const dict) |
180 | 0 | { |
181 | 0 | return dict->buf[dict->pos - 1]; |
182 | 0 | } Unexecuted instantiation: alone_decoder.c:dict_get0 Unexecuted instantiation: lzma_decoder.c:dict_get0 Unexecuted instantiation: lz_decoder.c:dict_get0 Unexecuted instantiation: lzma2_decoder.c:dict_get0 |
183 | | |
184 | | |
185 | | /// Test if dictionary is empty. |
186 | | static inline bool |
187 | | dict_is_empty(const lzma_dict *const dict) |
188 | 0 | { |
189 | 0 | return dict->full == 0; |
190 | 0 | } Unexecuted instantiation: alone_decoder.c:dict_is_empty Unexecuted instantiation: lzma_decoder.c:dict_is_empty Unexecuted instantiation: lz_decoder.c:dict_is_empty Unexecuted instantiation: lzma2_decoder.c:dict_is_empty |
191 | | |
192 | | |
193 | | /// Validate the match distance |
194 | | static inline bool |
195 | | dict_is_distance_valid(const lzma_dict *const dict, const size_t distance) |
196 | 0 | { |
197 | 0 | return dict->full > distance; |
198 | 0 | } Unexecuted instantiation: alone_decoder.c:dict_is_distance_valid Unexecuted instantiation: lzma_decoder.c:dict_is_distance_valid Unexecuted instantiation: lz_decoder.c:dict_is_distance_valid Unexecuted instantiation: lzma2_decoder.c:dict_is_distance_valid |
199 | | |
200 | | |
201 | | /// Repeat *len bytes at distance. |
202 | | static inline bool |
203 | | dict_repeat(lzma_dict *restrict dict, |
204 | | uint32_t distance, uint32_t *restrict len) |
205 | 0 | { |
206 | | // Don't write past the end of the dictionary. |
207 | 0 | const size_t dict_avail = dict->limit - dict->pos; |
208 | 0 | uint32_t left = my_min(dict_avail, *len); |
209 | 0 | *len -= left; |
210 | |
|
211 | 0 | size_t back = dict->pos - distance - 1; |
212 | 0 | if (distance >= dict->pos) |
213 | 0 | back += dict->size - LZ_DICT_REPEAT_MAX; |
214 | |
|
215 | | #if LZMA_LZ_DECODER_CONFIG == 0 |
216 | | // Minimal byte-by-byte method. This might be the least bad choice |
217 | | // if memcpy() isn't fast and there's no replacement for it below. |
218 | | while (left-- > 0) { |
219 | | dict->buf[dict->pos++] = dict->buf[back++]; |
220 | | } |
221 | | |
222 | | #else |
223 | | // Because memcpy() or a similar method can be faster than copying |
224 | | // byte by byte in a loop, the copying process is split into |
225 | | // two cases. |
226 | 0 | if (distance < left) { |
227 | | // Source and target areas overlap, thus we can't use |
228 | | // memcpy() nor even memmove() safely. |
229 | 0 | do { |
230 | 0 | dict->buf[dict->pos++] = dict->buf[back++]; |
231 | 0 | } while (--left > 0); |
232 | 0 | } else { |
233 | | # if LZMA_LZ_DECODER_CONFIG == 1 |
234 | | memcpy(dict->buf + dict->pos, dict->buf + back, left); |
235 | | dict->pos += left; |
236 | | |
237 | | # elif LZMA_LZ_DECODER_CONFIG == 2 |
238 | | // This can copy up to 32 bytes more than required. |
239 | | // (If left == 0, we still copy 32 bytes.) |
240 | 0 | size_t pos = dict->pos; |
241 | 0 | dict->pos += left; |
242 | 0 | do { |
243 | 0 | const __m128i x0 = _mm_loadu_si128( |
244 | 0 | (__m128i *)(dict->buf + back)); |
245 | 0 | const __m128i x1 = _mm_loadu_si128( |
246 | 0 | (__m128i *)(dict->buf + back + 16)); |
247 | 0 | back += 32; |
248 | 0 | _mm_storeu_si128( |
249 | 0 | (__m128i *)(dict->buf + pos), x0); |
250 | 0 | _mm_storeu_si128( |
251 | 0 | (__m128i *)(dict->buf + pos + 16), x1); |
252 | 0 | pos += 32; |
253 | 0 | } while (pos < dict->pos); |
254 | |
|
255 | | # else |
256 | | # error "Invalid LZMA_LZ_DECODER_CONFIG value" |
257 | | # endif |
258 | 0 | } |
259 | 0 | #endif |
260 | | |
261 | | // Update how full the dictionary is. |
262 | 0 | if (!dict->has_wrapped) |
263 | 0 | dict->full = dict->pos - LZ_DICT_INIT_POS; |
264 | |
|
265 | 0 | return *len != 0; |
266 | 0 | } Unexecuted instantiation: alone_decoder.c:dict_repeat Unexecuted instantiation: lzma_decoder.c:dict_repeat Unexecuted instantiation: lz_decoder.c:dict_repeat Unexecuted instantiation: lzma2_decoder.c:dict_repeat |
267 | | |
268 | | |
269 | | static inline void |
270 | | dict_put(lzma_dict *restrict dict, uint8_t byte) |
271 | 0 | { |
272 | 0 | dict->buf[dict->pos++] = byte; |
273 | |
|
274 | 0 | if (!dict->has_wrapped) |
275 | 0 | dict->full = dict->pos - LZ_DICT_INIT_POS; |
276 | 0 | } Unexecuted instantiation: alone_decoder.c:dict_put Unexecuted instantiation: lzma_decoder.c:dict_put Unexecuted instantiation: lz_decoder.c:dict_put Unexecuted instantiation: lzma2_decoder.c:dict_put |
277 | | |
278 | | |
279 | | /// Puts one byte into the dictionary. Returns true if the dictionary was |
280 | | /// already full and the byte couldn't be added. |
281 | | static inline bool |
282 | | dict_put_safe(lzma_dict *restrict dict, uint8_t byte) |
283 | 0 | { |
284 | 0 | if (unlikely(dict->pos == dict->limit)) |
285 | 0 | return true; |
286 | | |
287 | 0 | dict_put(dict, byte); |
288 | 0 | return false; |
289 | 0 | } Unexecuted instantiation: alone_decoder.c:dict_put_safe Unexecuted instantiation: lzma_decoder.c:dict_put_safe Unexecuted instantiation: lz_decoder.c:dict_put_safe Unexecuted instantiation: lzma2_decoder.c:dict_put_safe |
290 | | |
291 | | |
292 | | /// Copies arbitrary amount of data into the dictionary. |
293 | | static inline void |
294 | | dict_write(lzma_dict *restrict dict, const uint8_t *restrict in, |
295 | | size_t *restrict in_pos, size_t in_size, |
296 | | size_t *restrict left) |
297 | 0 | { |
298 | | // NOTE: If we are being given more data than the size of the |
299 | | // dictionary, it could be possible to optimize the LZ decoder |
300 | | // so that not everything needs to go through the dictionary. |
301 | | // This shouldn't be very common thing in practice though, and |
302 | | // the slowdown of one extra memcpy() isn't bad compared to how |
303 | | // much time it would have taken if the data were compressed. |
304 | |
|
305 | 0 | if (in_size - *in_pos > *left) |
306 | 0 | in_size = *in_pos + *left; |
307 | |
|
308 | 0 | *left -= lzma_bufcpy(in, in_pos, in_size, |
309 | 0 | dict->buf, &dict->pos, dict->limit); |
310 | |
|
311 | 0 | if (!dict->has_wrapped) |
312 | 0 | dict->full = dict->pos - LZ_DICT_INIT_POS; |
313 | |
|
314 | 0 | return; |
315 | 0 | } Unexecuted instantiation: alone_decoder.c:dict_write Unexecuted instantiation: lzma_decoder.c:dict_write Unexecuted instantiation: lz_decoder.c:dict_write Unexecuted instantiation: lzma2_decoder.c:dict_write |
316 | | |
317 | | |
318 | | static inline void |
319 | | dict_reset(lzma_dict *dict) |
320 | 0 | { |
321 | | dict->need_reset = true; |
322 | 0 | return; |
323 | 0 | } Unexecuted instantiation: alone_decoder.c:dict_reset Unexecuted instantiation: lzma_decoder.c:dict_reset Unexecuted instantiation: lz_decoder.c:dict_reset Unexecuted instantiation: lzma2_decoder.c:dict_reset |
324 | | |
325 | | #endif |