/src/xz/src/liblzma/lz/lz_decoder.h

Source
// SPDX-License-Identifier: 0BSD

///////////////////////////////////////////////////////////////////////////////
//
/// \file       lz_decoder.h
/// \brief      LZ out window
///
//  Authors:    Igor Pavlov
//              Lasse Collin
//
///////////////////////////////////////////////////////////////////////////////

#ifndef LZMA_LZ_DECODER_H
#define LZMA_LZ_DECODER_H

#include "common.h"

#ifdef HAVE_IMMINTRIN_H
# include <immintrin.h>
#endif


// dict_repeat() implementation variant:
// 0 = Byte-by-byte copying only.
// 1 = Use memcpy() for non-overlapping copies.
// 2 = Use x86 SSE2 for non-overlapping copies.
#ifndef LZMA_LZ_DECODER_CONFIG
# if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
    && defined(HAVE_IMMINTRIN_H) \
    && (defined(__SSE2__) || defined(_M_X64) \
      || (defined(_M_IX86_FP) && _M_IX86_FP >= 2))
#   define LZMA_LZ_DECODER_CONFIG 2
# else
#   define LZMA_LZ_DECODER_CONFIG 1
# endif
#endif

/// Byte-by-byte and memcpy() copy exactly the amount needed. Other methods
/// can copy up to LZ_DICT_EXTRA bytes more than requested, and this amount
/// of extra space is needed at the end of the allocated dictionary buffer.
///
/// NOTE: If this is increased, update LZMA_DICT_REPEAT_MAX too.
#if LZMA_LZ_DECODER_CONFIG >= 2
# define LZ_DICT_EXTRA 32
#else
# define LZ_DICT_EXTRA 0
#endif

/// Maximum number of bytes that dict_repeat() may copy. The allocated
/// dictionary buffer will be 2 * LZ_DICT_REPEAT_MAX + LZMA_DICT_EXTRA bytes
/// larger than the actual dictionary size:
///
/// (1) Every time the decoder reaches the end of the dictionary buffer,
///     the last LZ_DICT_REPEAT_MAX bytes will be copied to the beginning.
///     This way dict_repeat() will only need to copy from one place,
///     never from both the end and beginning of the buffer.
///
/// (2) The other LZ_DICT_REPEAT_MAX bytes is kept as a buffer between
///     the oldest byte still in the dictionary and the current write
///     position. This way dict_repeat() with the maximum valid distance
///     won't need memmove() as the copying cannot overlap.
///
/// (3) LZ_DICT_EXTRA bytes are required at the end of the dictionary buffer
///     so that extra copying done by dict_repeat() won't write or read past
///     the end of the allocated buffer. This amount is *not* counted as part
///     of lzma_dict.size.
///
/// Note that memcpy() still cannot be used if distance < len.
///
/// LZMA's longest match length is 273 bytes. The LZMA decoder looks at
/// the lowest four bits of the dictionary position, thus 273 must be
/// rounded up to the next multiple of 16 (288). In addition, optimized
/// dict_repeat() copies 32 bytes at a time, thus this must also be
/// a multiple of 32.
#define LZ_DICT_REPEAT_MAX 288

/// Initial position in lzma_dict.buf when the dictionary is empty.
#define LZ_DICT_INIT_POS (2 * LZ_DICT_REPEAT_MAX)


typedef struct {
  /// Pointer to the dictionary buffer.
  uint8_t *buf;

  /// Write position in dictionary. The next byte will be written to
  /// buf[pos].
  size_t pos;

  /// Indicates how full the dictionary is. This is used by
  /// dict_is_distance_valid() to detect corrupt files that would
  /// read beyond the beginning of the dictionary.
  size_t full;

  /// Write limit
  size_t limit;

  /// Allocated size of buf. This is 2 * LZ_DICT_REPEAT_MAX bytes
  /// larger than the actual dictionary size. This is enforced by
  /// how the value for "full" is set; it can be at most
  /// "size - 2 * LZ_DICT_REPEAT_MAX".
  size_t size;

  /// True once the dictionary has become full and the writing position
  /// has been wrapped in decode_buffer() in lz_decoder.c.
  bool has_wrapped;

  /// True when dictionary should be reset before decoding more data.
  bool need_reset;

} lzma_dict;


typedef struct {
  size_t dict_size;
  const uint8_t *preset_dict;
  size_t preset_dict_size;
} lzma_lz_options;


typedef struct {
  /// Data specific to the LZ-based decoder
  void *coder;

  /// Function to decode from in[] to *dict
  lzma_ret (*code)(void *coder,
      lzma_dict *restrict dict, const uint8_t *restrict in,
      size_t *restrict in_pos, size_t in_size);

  void (*reset)(void *coder, const void *options);

  /// Set the uncompressed size. If uncompressed_size == LZMA_VLI_UNKNOWN
  /// then allow_eopm will always be true.
  void (*set_uncompressed)(void *coder, lzma_vli uncompressed_size,
      bool allow_eopm);

  /// Free allocated resources
  void (*end)(void *coder, const lzma_allocator *allocator);

} lzma_lz_decoder;


#define LZMA_LZ_DECODER_INIT \
  (lzma_lz_decoder){ \
    .coder = NULL, \
    .code = NULL, \
    .reset = NULL, \
    .set_uncompressed = NULL, \
    .end = NULL, \
  }


extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next,
    const lzma_allocator *allocator,
    const lzma_filter_info *filters,
    lzma_ret (*lz_init)(lzma_lz_decoder *lz,
      const lzma_allocator *allocator,
      lzma_vli id, const void *options,
      lzma_lz_options *lz_options));

extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size);


//////////////////////
// Inline functions //
//////////////////////

/// Get a byte from the history buffer.
static inline uint8_t
dict_get(const lzma_dict *const dict, const uint32_t distance)
{
  return dict->buf[dict->pos - distance - 1
      + (distance < dict->pos
        ? 0 : dict->size - LZ_DICT_REPEAT_MAX)];
}


/// Optimized version of dict_get(dict, 0)
static inline uint8_t
dict_get0(const lzma_dict *const dict)
{
  return dict->buf[dict->pos - 1];
}


/// Test if dictionary is empty.
static inline bool
dict_is_empty(const lzma_dict *const dict)
{
  return dict->full == 0;
}


/// Validate the match distance
static inline bool
dict_is_distance_valid(const lzma_dict *const dict, const size_t distance)
{
  return dict->full > distance;
}


/// Repeat *len bytes at distance.
static inline bool
dict_repeat(lzma_dict *restrict dict,
    uint32_t distance, uint32_t *restrict len)
{
  // Don't write past the end of the dictionary.
  const size_t dict_avail = dict->limit - dict->pos;
  uint32_t left = my_min(dict_avail, *len);
  *len -= left;

  size_t back = dict->pos - distance - 1;
  if (distance >= dict->pos)
    back += dict->size - LZ_DICT_REPEAT_MAX;

#if LZMA_LZ_DECODER_CONFIG == 0
  // Minimal byte-by-byte method. This might be the least bad choice
  // if memcpy() isn't fast and there's no replacement for it below.
  while (left-- > 0) {
    dict->buf[dict->pos++] = dict->buf[back++];
  }

#else
  // Because memcpy() or a similar method can be faster than copying
  // byte by byte in a loop, the copying process is split into
  // two cases.
  if (distance < left) {
    // Source and target areas overlap, thus we can't use
    // memcpy() nor even memmove() safely.
    do {
      dict->buf[dict->pos++] = dict->buf[back++];
    } while (--left > 0);
  } else {
# if LZMA_LZ_DECODER_CONFIG == 1
    memcpy(dict->buf + dict->pos, dict->buf + back, left);
    dict->pos += left;

# elif LZMA_LZ_DECODER_CONFIG == 2
    // This can copy up to 32 bytes more than required.
    // (If left == 0, we still copy 32 bytes.)
    size_t pos = dict->pos;
    dict->pos += left;
    do {
      const __m128i x0 = _mm_loadu_si128(
          (__m128i *)(dict->buf + back));
      const __m128i x1 = _mm_loadu_si128(
          (__m128i *)(dict->buf + back + 16));
      back += 32;
      _mm_storeu_si128(
          (__m128i *)(dict->buf + pos), x0);
      _mm_storeu_si128(
          (__m128i *)(dict->buf + pos + 16), x1);
      pos += 32;
    } while (pos < dict->pos);

# else
#   error "Invalid LZMA_LZ_DECODER_CONFIG value"
# endif
  }
#endif

  // Update how full the dictionary is.
  if (!dict->has_wrapped)
    dict->full = dict->pos - LZ_DICT_INIT_POS;

  return *len != 0;
}


static inline void
dict_put(lzma_dict *restrict dict, uint8_t byte)
{
  dict->buf[dict->pos++] = byte;

  if (!dict->has_wrapped)
    dict->full = dict->pos - LZ_DICT_INIT_POS;
}


/// Puts one byte into the dictionary. Returns true if the dictionary was
/// already full and the byte couldn't be added.
static inline bool
dict_put_safe(lzma_dict *restrict dict, uint8_t byte)
{
  if (unlikely(dict->pos == dict->limit))
    return true;

  dict_put(dict, byte);
  return false;
}


/// Copies arbitrary amount of data into the dictionary.
static inline void
dict_write(lzma_dict *restrict dict, const uint8_t *restrict in,
    size_t *restrict in_pos, size_t in_size,
    size_t *restrict left)
{
  // NOTE: If we are being given more data than the size of the
  // dictionary, it could be possible to optimize the LZ decoder
  // so that not everything needs to go through the dictionary.
  // This shouldn't be very common thing in practice though, and
  // the slowdown of one extra memcpy() isn't bad compared to how
  // much time it would have taken if the data were compressed.

  if (in_size - *in_pos > *left)
    in_size = *in_pos + *left;

  *left -= lzma_bufcpy(in, in_pos, in_size,
      dict->buf, &dict->pos, dict->limit);

  if (!dict->has_wrapped)
    dict->full = dict->pos - LZ_DICT_INIT_POS;

  return;
}


static inline void
dict_reset(lzma_dict *dict)
{
  dict->need_reset = true;
  return;
}

#endif

Coverage Report

Created: 2025-12-31 07:53

Line	Count	Source
1		// SPDX-License-Identifier: 0BSD
2
3		///////////////////////////////////////////////////////////////////////////////
4		//
5		/// \file lz_decoder.h
6		/// \brief LZ out window
7		///
8		// Authors: Igor Pavlov
9		// Lasse Collin
10		//
11		///////////////////////////////////////////////////////////////////////////////
12
13		#ifndef LZMA_LZ_DECODER_H
14		#define LZMA_LZ_DECODER_H
15
16		#include "common.h"
17
18		#ifdef HAVE_IMMINTRIN_H
19		# include <immintrin.h>
20		#endif
21
22
23		// dict_repeat() implementation variant:
24		// 0 = Byte-by-byte copying only.
25		// 1 = Use memcpy() for non-overlapping copies.
26		// 2 = Use x86 SSE2 for non-overlapping copies.
27		#ifndef LZMA_LZ_DECODER_CONFIG
28		# if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
29		&& defined(HAVE_IMMINTRIN_H) \
30		&& (defined(__SSE2__) \|\| defined(_M_X64) \
31		\|\| (defined(_M_IX86_FP) && _M_IX86_FP >= 2))
32		# define LZMA_LZ_DECODER_CONFIG 2
33		# else
34		# define LZMA_LZ_DECODER_CONFIG 1
35		# endif
36		#endif
37
38		/// Byte-by-byte and memcpy() copy exactly the amount needed. Other methods
39		/// can copy up to LZ_DICT_EXTRA bytes more than requested, and this amount
40		/// of extra space is needed at the end of the allocated dictionary buffer.
41		///
42		/// NOTE: If this is increased, update LZMA_DICT_REPEAT_MAX too.
43		#if LZMA_LZ_DECODER_CONFIG >= 2
44	0	# define LZ_DICT_EXTRA 32
45		#else
46		# define LZ_DICT_EXTRA 0
47		#endif
48
49		/// Maximum number of bytes that dict_repeat() may copy. The allocated
50		/// dictionary buffer will be 2 * LZ_DICT_REPEAT_MAX + LZMA_DICT_EXTRA bytes
51		/// larger than the actual dictionary size:
52		///
53		/// (1) Every time the decoder reaches the end of the dictionary buffer,
54		/// the last LZ_DICT_REPEAT_MAX bytes will be copied to the beginning.
55		/// This way dict_repeat() will only need to copy from one place,
56		/// never from both the end and beginning of the buffer.
57		///
58		/// (2) The other LZ_DICT_REPEAT_MAX bytes is kept as a buffer between
59		/// the oldest byte still in the dictionary and the current write
60		/// position. This way dict_repeat() with the maximum valid distance
61		/// won't need memmove() as the copying cannot overlap.
62		///
63		/// (3) LZ_DICT_EXTRA bytes are required at the end of the dictionary buffer
64		/// so that extra copying done by dict_repeat() won't write or read past
65		/// the end of the allocated buffer. This amount is not counted as part
66		/// of lzma_dict.size.
67		///
68		/// Note that memcpy() still cannot be used if distance < len.
69		///
70		/// LZMA's longest match length is 273 bytes. The LZMA decoder looks at
71		/// the lowest four bits of the dictionary position, thus 273 must be
72		/// rounded up to the next multiple of 16 (288). In addition, optimized
73		/// dict_repeat() copies 32 bytes at a time, thus this must also be
74		/// a multiple of 32.
75	0	#define LZ_DICT_REPEAT_MAX 288
76
77		/// Initial position in lzma_dict.buf when the dictionary is empty.
78	0	#define LZ_DICT_INIT_POS (2 * LZ_DICT_REPEAT_MAX)
79
80
81		typedef struct {
82		/// Pointer to the dictionary buffer.
83		uint8_t *buf;
84
85		/// Write position in dictionary. The next byte will be written to
86		/// buf[pos].
87		size_t pos;
88
89		/// Indicates how full the dictionary is. This is used by
90		/// dict_is_distance_valid() to detect corrupt files that would
91		/// read beyond the beginning of the dictionary.
92		size_t full;
93
94		/// Write limit
95		size_t limit;
96
97		/// Allocated size of buf. This is 2 * LZ_DICT_REPEAT_MAX bytes
98		/// larger than the actual dictionary size. This is enforced by
99		/// how the value for "full" is set; it can be at most
100		/// "size - 2 * LZ_DICT_REPEAT_MAX".
101		size_t size;
102
103		/// True once the dictionary has become full and the writing position
104		/// has been wrapped in decode_buffer() in lz_decoder.c.
105		bool has_wrapped;
106
107		/// True when dictionary should be reset before decoding more data.
108		bool need_reset;
109
110		} lzma_dict;
111
112
113		typedef struct {
114		size_t dict_size;
115		const uint8_t *preset_dict;
116		size_t preset_dict_size;
117		} lzma_lz_options;
118
119
120		typedef struct {
121		/// Data specific to the LZ-based decoder
122		void *coder;
123
124		/// Function to decode from in[] to *dict
125		lzma_ret (code)(void coder,
126		lzma_dict restrict dict, const uint8_t restrict in,
127		size_t *restrict in_pos, size_t in_size);
128
129		void (reset)(void coder, const void *options);
130
131		/// Set the uncompressed size. If uncompressed_size == LZMA_VLI_UNKNOWN
132		/// then allow_eopm will always be true.
133		void (set_uncompressed)(void coder, lzma_vli uncompressed_size,
134		bool allow_eopm);
135
136		/// Free allocated resources
137		void (end)(void coder, const lzma_allocator *allocator);
138
139		} lzma_lz_decoder;
140
141
142		#define LZMA_LZ_DECODER_INIT \
143	0	(lzma_lz_decoder){ \
144	0	.coder = NULL, \
145	0	.code = NULL, \
146	0	.reset = NULL, \
147	0	.set_uncompressed = NULL, \
148	0	.end = NULL, \
149	0	}
150
151
152		extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next,
153		const lzma_allocator *allocator,
154		const lzma_filter_info *filters,
155		lzma_ret (lz_init)(lzma_lz_decoder lz,
156		const lzma_allocator *allocator,
157		lzma_vli id, const void *options,
158		lzma_lz_options *lz_options));
159
160		extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size);
161
162
163		//////////////////////
164		// Inline functions //
165		//////////////////////
166
167		/// Get a byte from the history buffer.
168		static inline uint8_t
169		dict_get(const lzma_dict *const dict, const uint32_t distance)
170	0	{
171	0	return dict->buf[dict->pos - distance - 1
172	0	+ (distance < dict->pos
173	0	? 0 : dict->size - LZ_DICT_REPEAT_MAX)];
174	0	} Unexecuted instantiation: alone_decoder.c:dict_get Unexecuted instantiation: lzma_decoder.c:dict_get Unexecuted instantiation: lz_decoder.c:dict_get Unexecuted instantiation: lzma2_decoder.c:dict_get
175
176
177		/// Optimized version of dict_get(dict, 0)
178		static inline uint8_t
179		dict_get0(const lzma_dict *const dict)
180	0	{
181	0	return dict->buf[dict->pos - 1];
182	0	} Unexecuted instantiation: alone_decoder.c:dict_get0 Unexecuted instantiation: lzma_decoder.c:dict_get0 Unexecuted instantiation: lz_decoder.c:dict_get0 Unexecuted instantiation: lzma2_decoder.c:dict_get0
183
184
185		/// Test if dictionary is empty.
186		static inline bool
187		dict_is_empty(const lzma_dict *const dict)
188	0	{
189	0	return dict->full == 0;
190	0	} Unexecuted instantiation: alone_decoder.c:dict_is_empty Unexecuted instantiation: lzma_decoder.c:dict_is_empty Unexecuted instantiation: lz_decoder.c:dict_is_empty Unexecuted instantiation: lzma2_decoder.c:dict_is_empty
191
192
193		/// Validate the match distance
194		static inline bool
195		dict_is_distance_valid(const lzma_dict *const dict, const size_t distance)
196	0	{
197	0	return dict->full > distance;
198	0	} Unexecuted instantiation: alone_decoder.c:dict_is_distance_valid Unexecuted instantiation: lzma_decoder.c:dict_is_distance_valid Unexecuted instantiation: lz_decoder.c:dict_is_distance_valid Unexecuted instantiation: lzma2_decoder.c:dict_is_distance_valid
199
200
201		/// Repeat *len bytes at distance.
202		static inline bool
203		dict_repeat(lzma_dict *restrict dict,
204		uint32_t distance, uint32_t *restrict len)
205	0	{
206		// Don't write past the end of the dictionary.
207	0	const size_t dict_avail = dict->limit - dict->pos;
208	0	uint32_t left = my_min(dict_avail, *len);
209	0	*len -= left;
210
211	0	size_t back = dict->pos - distance - 1;
212	0	if (distance >= dict->pos)
213	0	back += dict->size - LZ_DICT_REPEAT_MAX;
214
215		#if LZMA_LZ_DECODER_CONFIG == 0
216		// Minimal byte-by-byte method. This might be the least bad choice
217		// if memcpy() isn't fast and there's no replacement for it below.
218		while (left-- > 0) {
219		dict->buf[dict->pos++] = dict->buf[back++];
220		}
221
222		#else
223		// Because memcpy() or a similar method can be faster than copying
224		// byte by byte in a loop, the copying process is split into
225		// two cases.
226	0	if (distance < left) {
227		// Source and target areas overlap, thus we can't use
228		// memcpy() nor even memmove() safely.
229	0	do {
230	0	dict->buf[dict->pos++] = dict->buf[back++];
231	0	} while (--left > 0);
232	0	} else {
233		# if LZMA_LZ_DECODER_CONFIG == 1
234		memcpy(dict->buf + dict->pos, dict->buf + back, left);
235		dict->pos += left;
236
237		# elif LZMA_LZ_DECODER_CONFIG == 2
238		// This can copy up to 32 bytes more than required.
239		// (If left == 0, we still copy 32 bytes.)
240	0	size_t pos = dict->pos;
241	0	dict->pos += left;
242	0	do {
243	0	const __m128i x0 = _mm_loadu_si128(
244	0	(__m128i *)(dict->buf + back));
245	0	const __m128i x1 = _mm_loadu_si128(
246	0	(__m128i *)(dict->buf + back + 16));
247	0	back += 32;
248	0	_mm_storeu_si128(
249	0	(__m128i *)(dict->buf + pos), x0);
250	0	_mm_storeu_si128(
251	0	(__m128i *)(dict->buf + pos + 16), x1);
252	0	pos += 32;
253	0	} while (pos < dict->pos);
254
255		# else
256		# error "Invalid LZMA_LZ_DECODER_CONFIG value"
257		# endif
258	0	}
259	0	#endif
260
261		// Update how full the dictionary is.
262	0	if (!dict->has_wrapped)
263	0	dict->full = dict->pos - LZ_DICT_INIT_POS;
264
265	0	return *len != 0;
266	0	} Unexecuted instantiation: alone_decoder.c:dict_repeat Unexecuted instantiation: lzma_decoder.c:dict_repeat Unexecuted instantiation: lz_decoder.c:dict_repeat Unexecuted instantiation: lzma2_decoder.c:dict_repeat
267
268
269		static inline void
270		dict_put(lzma_dict *restrict dict, uint8_t byte)
271	0	{
272	0	dict->buf[dict->pos++] = byte;
273
274	0	if (!dict->has_wrapped)
275	0	dict->full = dict->pos - LZ_DICT_INIT_POS;
276	0	} Unexecuted instantiation: alone_decoder.c:dict_put Unexecuted instantiation: lzma_decoder.c:dict_put Unexecuted instantiation: lz_decoder.c:dict_put Unexecuted instantiation: lzma2_decoder.c:dict_put
277
278
279		/// Puts one byte into the dictionary. Returns true if the dictionary was
280		/// already full and the byte couldn't be added.
281		static inline bool
282		dict_put_safe(lzma_dict *restrict dict, uint8_t byte)
283	0	{
284	0	if (unlikely(dict->pos == dict->limit))
285	0	return true;
286
287	0	dict_put(dict, byte);
288	0	return false;
289	0	} Unexecuted instantiation: alone_decoder.c:dict_put_safe Unexecuted instantiation: lzma_decoder.c:dict_put_safe Unexecuted instantiation: lz_decoder.c:dict_put_safe Unexecuted instantiation: lzma2_decoder.c:dict_put_safe
290
291
292		/// Copies arbitrary amount of data into the dictionary.
293		static inline void
294		dict_write(lzma_dict restrict dict, const uint8_t restrict in,
295		size_t *restrict in_pos, size_t in_size,
296		size_t *restrict left)
297	0	{
298		// NOTE: If we are being given more data than the size of the
299		// dictionary, it could be possible to optimize the LZ decoder
300		// so that not everything needs to go through the dictionary.
301		// This shouldn't be very common thing in practice though, and
302		// the slowdown of one extra memcpy() isn't bad compared to how
303		// much time it would have taken if the data were compressed.
304
305	0	if (in_size - in_pos > left)
306	0	in_size = in_pos + left;
307
308	0	*left -= lzma_bufcpy(in, in_pos, in_size,
309	0	dict->buf, &dict->pos, dict->limit);
310
311	0	if (!dict->has_wrapped)
312	0	dict->full = dict->pos - LZ_DICT_INIT_POS;
313
314	0	return;
315	0	} Unexecuted instantiation: alone_decoder.c:dict_write Unexecuted instantiation: lzma_decoder.c:dict_write Unexecuted instantiation: lz_decoder.c:dict_write Unexecuted instantiation: lzma2_decoder.c:dict_write
316
317
318		static inline void
319		dict_reset(lzma_dict *dict)
320	0	{
321		dict->need_reset = true;
322	0	return;
323	0	} Unexecuted instantiation: alone_decoder.c:dict_reset Unexecuted instantiation: lzma_decoder.c:dict_reset Unexecuted instantiation: lz_decoder.c:dict_reset Unexecuted instantiation: lzma2_decoder.c:dict_reset
324
325		#endif