/src/xz/src/liblzma/lz/lz_encoder.c

Source
// SPDX-License-Identifier: 0BSD

///////////////////////////////////////////////////////////////////////////////
//
/// \file       lz_encoder.c
/// \brief      LZ in window
///
//  Authors:    Igor Pavlov
//              Lasse Collin
//
///////////////////////////////////////////////////////////////////////////////

#include "lz_encoder.h"
#include "lz_encoder_hash.h"

// See lz_encoder_hash.h. This is a bit hackish but avoids making
// endianness a conditional in makefiles.
#ifdef LZMA_LZ_HASH_TABLE_IS_NEEDED
# include "lz_encoder_hash_table.h"
#endif

#include "memcmplen.h"


typedef struct {
  /// LZ-based encoder e.g. LZMA
  lzma_lz_encoder lz;

  /// History buffer and match finder
  lzma_mf mf;

  /// Next coder in the chain
  lzma_next_coder next;
} lzma_coder;


/// \brief      Moves the data in the input window to free space for new data
///
/// mf->buffer is a sliding input window, which keeps mf->keep_size_before
/// bytes of input history available all the time. Now and then we need to
/// "slide" the buffer to make space for the new data to the end of the
/// buffer. At the same time, data older than keep_size_before is dropped.
///
static void
move_window(lzma_mf *mf)
{
  // Align the move to a multiple of 16 bytes. Some LZ-based encoders
  // like LZMA use the lowest bits of mf->read_pos to know the
  // alignment of the uncompressed data. We also get better speed
  // for memmove() with aligned buffers.
  assert(mf->read_pos > mf->keep_size_before);
  const uint32_t move_offset
    = (mf->read_pos - mf->keep_size_before) & ~UINT32_C(15);

  assert(mf->write_pos > move_offset);
  const size_t move_size = mf->write_pos - move_offset;

  assert(move_offset + move_size <= mf->size);

  memmove(mf->buffer, mf->buffer + move_offset, move_size);

  mf->offset += move_offset;
  mf->read_pos -= move_offset;
  mf->read_limit -= move_offset;
  mf->write_pos -= move_offset;

  return;
}


/// \brief      Tries to fill the input window (mf->buffer)
///
/// If we are the last encoder in the chain, our input data is in in[].
/// Otherwise we call the next filter in the chain to process in[] and
/// write its output to mf->buffer.
///
/// This function must not be called once it has returned LZMA_STREAM_END.
///
static lzma_ret
fill_window(lzma_coder *coder, const lzma_allocator *allocator,
    const uint8_t *in, size_t *in_pos, size_t in_size,
    lzma_action action)
{
  assert(coder->mf.read_pos <= coder->mf.write_pos);

  // Move the sliding window if needed.
  if (coder->mf.read_pos >= coder->mf.size - coder->mf.keep_size_after)
    move_window(&coder->mf);

  // Maybe this is ugly, but lzma_mf uses uint32_t for most things
  // (which I find cleanest), but we need size_t here when filling
  // the history window.
  size_t write_pos = coder->mf.write_pos;
  lzma_ret ret;
  if (coder->next.code == NULL) {
    // Not using a filter, simply memcpy() as much as possible.
    lzma_bufcpy(in, in_pos, in_size, coder->mf.buffer,
        &write_pos, coder->mf.size);

    ret = action != LZMA_RUN && *in_pos == in_size
        ? LZMA_STREAM_END : LZMA_OK;

  } else {
    ret = coder->next.code(coder->next.coder, allocator,
        in, in_pos, in_size,
        coder->mf.buffer, &write_pos,
        coder->mf.size, action);
  }

  coder->mf.write_pos = write_pos;

  // Silence Valgrind. lzma_memcmplen() can read extra bytes
  // and Valgrind will give warnings if those bytes are uninitialized
  // because Valgrind cannot see that the values of the uninitialized
  // bytes are eventually ignored.
  memzero(coder->mf.buffer + write_pos, LZMA_MEMCMPLEN_EXTRA);

  // If end of stream has been reached or flushing completed, we allow
  // the encoder to process all the input (that is, read_pos is allowed
  // to reach write_pos). Otherwise we keep keep_size_after bytes
  // available as prebuffer.
  if (ret == LZMA_STREAM_END) {
    assert(*in_pos == in_size);
    ret = LZMA_OK;
    coder->mf.action = action;
    coder->mf.read_limit = coder->mf.write_pos;

  } else if (coder->mf.write_pos > coder->mf.keep_size_after) {
    // This needs to be done conditionally, because if we got
    // only little new input, there may be too little input
    // to do any encoding yet.
    coder->mf.read_limit = coder->mf.write_pos
        - coder->mf.keep_size_after;
  }

  // Restart the match finder after finished LZMA_SYNC_FLUSH.
  if (coder->mf.pending > 0
      && coder->mf.read_pos < coder->mf.read_limit) {
    // Match finder may update coder->pending and expects it to
    // start from zero, so use a temporary variable.
    const uint32_t pending = coder->mf.pending;
    coder->mf.pending = 0;

    // Rewind read_pos so that the match finder can hash
    // the pending bytes.
    assert(coder->mf.read_pos >= pending);
    coder->mf.read_pos -= pending;

    // Call the skip function directly instead of using
    // mf_skip(), since we don't want to touch mf->read_ahead.
    coder->mf.skip(&coder->mf, pending);
  }

  return ret;
}


static lzma_ret
lz_encode(void *coder_ptr, const lzma_allocator *allocator,
    const uint8_t *restrict in, size_t *restrict in_pos,
    size_t in_size,
    uint8_t *restrict out, size_t *restrict out_pos,
    size_t out_size, lzma_action action)
{
  lzma_coder *coder = coder_ptr;

  while (*out_pos < out_size
      && (*in_pos < in_size || action != LZMA_RUN)) {
    // Read more data to coder->mf.buffer if needed.
    if (coder->mf.action == LZMA_RUN && coder->mf.read_pos
        >= coder->mf.read_limit)
      return_if_error(fill_window(coder, allocator,
          in, in_pos, in_size, action));

    // Encode
    const lzma_ret ret = coder->lz.code(coder->lz.coder,
        &coder->mf, out, out_pos, out_size);
    if (ret != LZMA_OK) {
      // Setting this to LZMA_RUN for cases when we are
      // flushing. It doesn't matter when finishing or if
      // an error occurred.
      coder->mf.action = LZMA_RUN;
      return ret;
    }
  }

  return LZMA_OK;
}


static bool
lz_encoder_prepare(lzma_mf *mf, const lzma_allocator *allocator,
    const lzma_lz_options *lz_options)
{
  // For now, the dictionary size is limited to 1.5 GiB. This may grow
  // in the future if needed, but it needs a little more work than just
  // changing this check.
  if (!IS_ENC_DICT_SIZE_VALID(lz_options->dict_size)
      || lz_options->nice_len > lz_options->match_len_max)
    return true;

  mf->keep_size_before = lz_options->before_size + lz_options->dict_size;

  mf->keep_size_after = lz_options->after_size
      + lz_options->match_len_max;

  // To avoid constant memmove()s, allocate some extra space. Since
  // memmove()s become more expensive when the size of the buffer
  // increases, we reserve more space when a large dictionary is
  // used to make the memmove() calls rarer.
  //
  // This works with dictionaries up to about 3 GiB. If bigger
  // dictionary is wanted, some extra work is needed:
  //   - Several variables in lzma_mf have to be changed from uint32_t
  //     to size_t.
  //   - Memory usage calculation needs something too, e.g. use uint64_t
  //     for mf->size.
  uint32_t reserve = lz_options->dict_size / 2;
  if (reserve > (UINT32_C(1) << 30))
    reserve /= 2;

  reserve += (lz_options->before_size + lz_options->match_len_max
      + lz_options->after_size) / 2 + (UINT32_C(1) << 19);

  const uint32_t old_size = mf->size;
  mf->size = mf->keep_size_before + reserve + mf->keep_size_after;

  // Deallocate the old history buffer if it exists but has different
  // size than what is needed now.
  if (mf->buffer != NULL && old_size != mf->size) {
    lzma_free(mf->buffer, allocator);
    mf->buffer = NULL;
  }

  // Match finder options
  mf->match_len_max = lz_options->match_len_max;
  mf->nice_len = lz_options->nice_len;

  // cyclic_size has to stay smaller than 2 Gi. Note that this doesn't
  // mean limiting dictionary size to less than 2 GiB. With a match
  // finder that uses multibyte resolution (hashes start at e.g. every
  // fourth byte), cyclic_size would stay below 2 Gi even when
  // dictionary size is greater than 2 GiB.
  //
  // It would be possible to allow cyclic_size >= 2 Gi, but then we
  // would need to be careful to use 64-bit types in various places
  // (size_t could do since we would need bigger than 32-bit address
  // space anyway). It would also require either zeroing a multigigabyte
  // buffer at initialization (waste of time and RAM) or allow
  // normalization in lz_encoder_mf.c to access uninitialized
  // memory to keep the code simpler. The current way is simple and
  // still allows pretty big dictionaries, so I don't expect these
  // limits to change.
  mf->cyclic_size = lz_options->dict_size + 1;

  // Validate the match finder ID and setup the function pointers.
  switch (lz_options->match_finder) {
#ifdef HAVE_MF_HC3
  case LZMA_MF_HC3:
    mf->find = &lzma_mf_hc3_find;
    mf->skip = &lzma_mf_hc3_skip;
    break;
#endif
#ifdef HAVE_MF_HC4
  case LZMA_MF_HC4:
    mf->find = &lzma_mf_hc4_find;
    mf->skip = &lzma_mf_hc4_skip;
    break;
#endif
#ifdef HAVE_MF_BT2
  case LZMA_MF_BT2:
    mf->find = &lzma_mf_bt2_find;
    mf->skip = &lzma_mf_bt2_skip;
    break;
#endif
#ifdef HAVE_MF_BT3
  case LZMA_MF_BT3:
    mf->find = &lzma_mf_bt3_find;
    mf->skip = &lzma_mf_bt3_skip;
    break;
#endif
#ifdef HAVE_MF_BT4
  case LZMA_MF_BT4:
    mf->find = &lzma_mf_bt4_find;
    mf->skip = &lzma_mf_bt4_skip;
    break;
#endif

  default:
    return true;
  }

  // Calculate the sizes of mf->hash and mf->son.
  //
  // NOTE: Since 5.3.5beta the LZMA encoder ensures that nice_len
  // is big enough for the selected match finder. This makes it
  // easier for applications as nice_len = 2 will always be accepted
  // even though the effective value can be slightly bigger.
  const uint32_t hash_bytes
      = mf_get_hash_bytes(lz_options->match_finder);
  assert(hash_bytes <= mf->nice_len);

  const bool is_bt = (lz_options->match_finder & 0x10) != 0;
  uint32_t hs;

  if (hash_bytes == 2) {
    hs = 0xFFFF;
  } else {
    // Round dictionary size up to the next 2^n - 1 so it can
    // be used as a hash mask.
    hs = lz_options->dict_size - 1;
    hs |= hs >> 1;
    hs |= hs >> 2;
    hs |= hs >> 4;
    hs |= hs >> 8;
    hs >>= 1;
    hs |= 0xFFFF;

    if (hs > (UINT32_C(1) << 24)) {
      if (hash_bytes == 3)
        hs = (UINT32_C(1) << 24) - 1;
      else
        hs >>= 1;
    }
  }

  mf->hash_mask = hs;

  ++hs;
  if (hash_bytes > 2)
    hs += HASH_2_SIZE;
  if (hash_bytes > 3)
    hs += HASH_3_SIZE;
/*
  No match finder uses this at the moment.
  if (mf->hash_bytes > 4)
    hs += HASH_4_SIZE;
*/

  const uint32_t old_hash_count = mf->hash_count;
  const uint32_t old_sons_count = mf->sons_count;
  mf->hash_count = hs;
  mf->sons_count = mf->cyclic_size;
  if (is_bt)
    mf->sons_count *= 2;

  // Deallocate the old hash array if it exists and has different size
  // than what is needed now.
  if (old_hash_count != mf->hash_count
      || old_sons_count != mf->sons_count) {
    lzma_free(mf->hash, allocator);
    mf->hash = NULL;

    lzma_free(mf->son, allocator);
    mf->son = NULL;
  }

  // Maximum number of match finder cycles
  mf->depth = lz_options->depth;
  if (mf->depth == 0) {
    if (is_bt)
      mf->depth = 16 + mf->nice_len / 2;
    else
      mf->depth = 4 + mf->nice_len / 4;
  }

  return false;
}


static bool
lz_encoder_init(lzma_mf *mf, const lzma_allocator *allocator,
    const lzma_lz_options *lz_options)
{
  // Allocate the history buffer.
  if (mf->buffer == NULL) {
    // lzma_memcmplen() is used for the dictionary buffer
    // so we need to allocate a few extra bytes to prevent
    // it from reading past the end of the buffer.
    mf->buffer = lzma_alloc(mf->size + LZMA_MEMCMPLEN_EXTRA,
        allocator);
    if (mf->buffer == NULL)
      return true;

    // Keep Valgrind happy with lzma_memcmplen() and initialize
    // the extra bytes whose value may get read but which will
    // effectively get ignored.
    memzero(mf->buffer + mf->size, LZMA_MEMCMPLEN_EXTRA);
  }

  // Use cyclic_size as initial mf->offset. This allows
  // avoiding a few branches in the match finders. The downside is
  // that match finder needs to be normalized more often, which may
  // hurt performance with huge dictionaries.
  mf->offset = mf->cyclic_size;
  mf->read_pos = 0;
  mf->read_ahead = 0;
  mf->read_limit = 0;
  mf->write_pos = 0;
  mf->pending = 0;

#if UINT32_MAX >= SIZE_MAX / 4
  // Check for integer overflow. (Huge dictionaries are not
  // possible on 32-bit CPU.)
  if (mf->hash_count > SIZE_MAX / sizeof(uint32_t)
      || mf->sons_count > SIZE_MAX / sizeof(uint32_t))
    return true;
#endif

  // Allocate and initialize the hash table. Since EMPTY_HASH_VALUE
  // is zero, we can use lzma_alloc_zero() or memzero() for mf->hash.
  //
  // We don't need to initialize mf->son, but not doing that may
  // make Valgrind complain in normalization (see normalize() in
  // lz_encoder_mf.c). Skipping the initialization is *very* good
  // when big dictionary is used but only small amount of data gets
  // actually compressed: most of the mf->son won't get actually
  // allocated by the kernel, so we avoid wasting RAM and improve
  // initialization speed a lot.
  if (mf->hash == NULL) {
    mf->hash = lzma_alloc_zero(mf->hash_count * sizeof(uint32_t),
        allocator);
    mf->son = lzma_alloc(mf->sons_count * sizeof(uint32_t),
        allocator);

    if (mf->hash == NULL || mf->son == NULL) {
      lzma_free(mf->hash, allocator);
      mf->hash = NULL;

      lzma_free(mf->son, allocator);
      mf->son = NULL;

      return true;
    }
  } else {
/*
    for (uint32_t i = 0; i < mf->hash_count; ++i)
      mf->hash[i] = EMPTY_HASH_VALUE;
*/
    memzero(mf->hash, mf->hash_count * sizeof(uint32_t));
  }

  mf->cyclic_pos = 0;

  // Handle preset dictionary.
  if (lz_options->preset_dict != NULL
      && lz_options->preset_dict_size > 0) {
    // If the preset dictionary is bigger than the actual
    // dictionary, use only the tail.
    mf->write_pos = my_min(lz_options->preset_dict_size, mf->size);
    memcpy(mf->buffer, lz_options->preset_dict
        + lz_options->preset_dict_size - mf->write_pos,
        mf->write_pos);
    mf->action = LZMA_SYNC_FLUSH;
    mf->skip(mf, mf->write_pos);
  }

  mf->action = LZMA_RUN;

  return false;
}


extern uint64_t
lzma_lz_encoder_memusage(const lzma_lz_options *lz_options)
{
  // Old buffers must not exist when calling lz_encoder_prepare().
  lzma_mf mf = {
    .buffer = NULL,
    .hash = NULL,
    .son = NULL,
    .hash_count = 0,
    .sons_count = 0,
  };

  // Setup the size information into mf.
  if (lz_encoder_prepare(&mf, NULL, lz_options))
    return UINT64_MAX;

  // Calculate the memory usage.
  return ((uint64_t)(mf.hash_count) + mf.sons_count) * sizeof(uint32_t)
      + mf.size + sizeof(lzma_coder);
}


static void
lz_encoder_end(void *coder_ptr, const lzma_allocator *allocator)
{
  lzma_coder *coder = coder_ptr;

  lzma_next_end(&coder->next, allocator);

  lzma_free(coder->mf.son, allocator);
  lzma_free(coder->mf.hash, allocator);
  lzma_free(coder->mf.buffer, allocator);

  if (coder->lz.end != NULL)
    coder->lz.end(coder->lz.coder, allocator);
  else
    lzma_free(coder->lz.coder, allocator);

  lzma_free(coder, allocator);
  return;
}


static lzma_ret
lz_encoder_update(void *coder_ptr, const lzma_allocator *allocator,
    const lzma_filter *filters_null lzma_attribute((__unused__)),
    const lzma_filter *reversed_filters)
{
  lzma_coder *coder = coder_ptr;

  if (coder->lz.options_update == NULL)
    return LZMA_PROG_ERROR;

  return_if_error(coder->lz.options_update(
      coder->lz.coder, reversed_filters));

  return lzma_next_filter_update(
      &coder->next, allocator, reversed_filters + 1);
}


static lzma_ret
lz_encoder_set_out_limit(void *coder_ptr, uint64_t *uncomp_size,
    uint64_t out_limit)
{
  lzma_coder *coder = coder_ptr;

  // This is supported only if there are no other filters chained.
  if (coder->next.code == NULL && coder->lz.set_out_limit != NULL)
    return coder->lz.set_out_limit(
        coder->lz.coder, uncomp_size, out_limit);

  return LZMA_OPTIONS_ERROR;
}


extern lzma_ret
lzma_lz_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
    const lzma_filter_info *filters,
    lzma_ret (*lz_init)(lzma_lz_encoder *lz,
      const lzma_allocator *allocator,
      lzma_vli id, const void *options,
      lzma_lz_options *lz_options))
{
#if defined(HAVE_SMALL) && !defined(HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR)
  // The CRC32 table must be initialized.
  lzma_crc32_init();
#endif

  // Allocate and initialize the base data structure.
  lzma_coder *coder = next->coder;
  if (coder == NULL) {
    coder = lzma_alloc(sizeof(lzma_coder), allocator);
    if (coder == NULL)
      return LZMA_MEM_ERROR;

    next->coder = coder;
    next->code = &lz_encode;
    next->end = &lz_encoder_end;
    next->update = &lz_encoder_update;
    next->set_out_limit = &lz_encoder_set_out_limit;

    coder->lz.coder = NULL;
    coder->lz.code = NULL;
    coder->lz.end = NULL;
    coder->lz.options_update = NULL;
    coder->lz.set_out_limit = NULL;

    // mf.size is initialized to silence Valgrind
    // when used on optimized binaries (GCC may reorder
    // code in a way that Valgrind gets unhappy).
    coder->mf.buffer = NULL;
    coder->mf.size = 0;
    coder->mf.hash = NULL;
    coder->mf.son = NULL;
    coder->mf.hash_count = 0;
    coder->mf.sons_count = 0;

    coder->next = LZMA_NEXT_CODER_INIT;
  }

  // Initialize the LZ-based encoder.
  lzma_lz_options lz_options;
  return_if_error(lz_init(&coder->lz, allocator,
      filters[0].id, filters[0].options, &lz_options));

  // Setup the size information into coder->mf and deallocate
  // old buffers if they have wrong size.
  if (lz_encoder_prepare(&coder->mf, allocator, &lz_options))
    return LZMA_OPTIONS_ERROR;

  // Allocate new buffers if needed, and do the rest of
  // the initialization.
  if (lz_encoder_init(&coder->mf, allocator, &lz_options))
    return LZMA_MEM_ERROR;

  // Initialize the next filter in the chain, if any.
  return lzma_next_filter_init(&coder->next, allocator, filters + 1);
}


extern LZMA_API(lzma_bool)
lzma_mf_is_supported(lzma_match_finder mf)
{
  switch (mf) {
#ifdef HAVE_MF_HC3
  case LZMA_MF_HC3:
    return true;
#endif
#ifdef HAVE_MF_HC4
  case LZMA_MF_HC4:
    return true;
#endif
#ifdef HAVE_MF_BT2
  case LZMA_MF_BT2:
    return true;
#endif
#ifdef HAVE_MF_BT3
  case LZMA_MF_BT3:
    return true;
#endif
#ifdef HAVE_MF_BT4
  case LZMA_MF_BT4:
    return true;
#endif
  default:
    return false;
  }
}

Coverage Report

Created: 2025-11-24 06:55

Line	Count	Source
1		// SPDX-License-Identifier: 0BSD
2
3		///////////////////////////////////////////////////////////////////////////////
4		//
5		/// \file lz_encoder.c
6		/// \brief LZ in window
7		///
8		// Authors: Igor Pavlov
9		// Lasse Collin
10		//
11		///////////////////////////////////////////////////////////////////////////////
12
13		#include "lz_encoder.h"
14		#include "lz_encoder_hash.h"
15
16		// See lz_encoder_hash.h. This is a bit hackish but avoids making
17		// endianness a conditional in makefiles.
18		#ifdef LZMA_LZ_HASH_TABLE_IS_NEEDED
19		# include "lz_encoder_hash_table.h"
20		#endif
21
22		#include "memcmplen.h"
23
24
25		typedef struct {
26		/// LZ-based encoder e.g. LZMA
27		lzma_lz_encoder lz;
28
29		/// History buffer and match finder
30		lzma_mf mf;
31
32		/// Next coder in the chain
33		lzma_next_coder next;
34		} lzma_coder;
35
36
37		/// \brief Moves the data in the input window to free space for new data
38		///
39		/// mf->buffer is a sliding input window, which keeps mf->keep_size_before
40		/// bytes of input history available all the time. Now and then we need to
41		/// "slide" the buffer to make space for the new data to the end of the
42		/// buffer. At the same time, data older than keep_size_before is dropped.
43		///
44		static void
45		move_window(lzma_mf *mf)
46	0	{
47		// Align the move to a multiple of 16 bytes. Some LZ-based encoders
48		// like LZMA use the lowest bits of mf->read_pos to know the
49		// alignment of the uncompressed data. We also get better speed
50		// for memmove() with aligned buffers.
51	0	assert(mf->read_pos > mf->keep_size_before);
52	0	const uint32_t move_offset
53	0	= (mf->read_pos - mf->keep_size_before) & ~UINT32_C(15);
54
55	0	assert(mf->write_pos > move_offset);
56	0	const size_t move_size = mf->write_pos - move_offset;
57
58	0	assert(move_offset + move_size <= mf->size);
59
60	0	memmove(mf->buffer, mf->buffer + move_offset, move_size);
61
62	0	mf->offset += move_offset;
63	0	mf->read_pos -= move_offset;
64	0	mf->read_limit -= move_offset;
65	0	mf->write_pos -= move_offset;
66
67	0	return;
68	0	}
69
70
71		/// \brief Tries to fill the input window (mf->buffer)
72		///
73		/// If we are the last encoder in the chain, our input data is in in[].
74		/// Otherwise we call the next filter in the chain to process in[] and
75		/// write its output to mf->buffer.
76		///
77		/// This function must not be called once it has returned LZMA_STREAM_END.
78		///
79		static lzma_ret
80		fill_window(lzma_coder coder, const lzma_allocator allocator,
81		const uint8_t in, size_t in_pos, size_t in_size,
82		lzma_action action)
83	8.12k	{
84	8.12k	assert(coder->mf.read_pos <= coder->mf.write_pos);
85
86		// Move the sliding window if needed.
87	8.12k	if (coder->mf.read_pos >= coder->mf.size - coder->mf.keep_size_after)
88	0	move_window(&coder->mf);
89
90		// Maybe this is ugly, but lzma_mf uses uint32_t for most things
91		// (which I find cleanest), but we need size_t here when filling
92		// the history window.
93	8.12k	size_t write_pos = coder->mf.write_pos;
94	8.12k	lzma_ret ret;
95	8.12k	if (coder->next.code == NULL) {
96		// Not using a filter, simply memcpy() as much as possible.
97	8.12k	lzma_bufcpy(in, in_pos, in_size, coder->mf.buffer,
98	8.12k	&write_pos, coder->mf.size);
99
100	8.12k	ret = action != LZMA_RUN && *in_pos == in_size
101	8.12k	? LZMA_STREAM_END : LZMA_OK;
102
103	8.12k	} else {
104	0	ret = coder->next.code(coder->next.coder, allocator,
105	0	in, in_pos, in_size,
106	0	coder->mf.buffer, &write_pos,
107	0	coder->mf.size, action);
108	0	}
109
110	8.12k	coder->mf.write_pos = write_pos;
111
112		// Silence Valgrind. lzma_memcmplen() can read extra bytes
113		// and Valgrind will give warnings if those bytes are uninitialized
114		// because Valgrind cannot see that the values of the uninitialized
115		// bytes are eventually ignored.
116	8.12k	memzero(coder->mf.buffer + write_pos, LZMA_MEMCMPLEN_EXTRA);
117
118		// If end of stream has been reached or flushing completed, we allow
119		// the encoder to process all the input (that is, read_pos is allowed
120		// to reach write_pos). Otherwise we keep keep_size_after bytes
121		// available as prebuffer.
122	8.12k	if (ret == LZMA_STREAM_END) {
123	3.95k	assert(*in_pos == in_size);
124	3.95k	ret = LZMA_OK;
125	3.95k	coder->mf.action = action;
126	3.95k	coder->mf.read_limit = coder->mf.write_pos;
127
128	4.16k	} else if (coder->mf.write_pos > coder->mf.keep_size_after) {
129		// This needs to be done conditionally, because if we got
130		// only little new input, there may be too little input
131		// to do any encoding yet.
132	25	coder->mf.read_limit = coder->mf.write_pos
133	25	- coder->mf.keep_size_after;
134	25	}
135
136		// Restart the match finder after finished LZMA_SYNC_FLUSH.
137	8.12k	if (coder->mf.pending > 0
138	0	&& coder->mf.read_pos < coder->mf.read_limit) {
139		// Match finder may update coder->pending and expects it to
140		// start from zero, so use a temporary variable.
141	0	const uint32_t pending = coder->mf.pending;
142	0	coder->mf.pending = 0;
143
144		// Rewind read_pos so that the match finder can hash
145		// the pending bytes.
146	0	assert(coder->mf.read_pos >= pending);
147	0	coder->mf.read_pos -= pending;
148
149		// Call the skip function directly instead of using
150		// mf_skip(), since we don't want to touch mf->read_ahead.
151	0	coder->mf.skip(&coder->mf, pending);
152	0	}
153
154	8.12k	return ret;
155	8.12k	}
156
157
158		static lzma_ret
159		lz_encode(void coder_ptr, const lzma_allocator allocator,
160		const uint8_t restrict in, size_t restrict in_pos,
161		size_t in_size,
162		uint8_t restrict out, size_t restrict out_pos,
163		size_t out_size, lzma_action action)
164	8.13k	{
165	8.13k	lzma_coder *coder = coder_ptr;
166
167	12.3k	while (*out_pos < out_size
168	12.3k	&& (*in_pos < in_size \|\| action != LZMA_RUN)) {
169		// Read more data to coder->mf.buffer if needed.
170	8.13k	if (coder->mf.action == LZMA_RUN && coder->mf.read_pos
171	8.12k	>= coder->mf.read_limit)
172	8.12k	return_if_error(fill_window(coder, allocator,
173	8.13k	in, in_pos, in_size, action));
174
175		// Encode
176	8.13k	const lzma_ret ret = coder->lz.code(coder->lz.coder,
177	8.13k	&coder->mf, out, out_pos, out_size);
178	8.13k	if (ret != LZMA_OK) {
179		// Setting this to LZMA_RUN for cases when we are
180		// flushing. It doesn't matter when finishing or if
181		// an error occurred.
182	3.95k	coder->mf.action = LZMA_RUN;
183	3.95k	return ret;
184	3.95k	}
185	8.13k	}
186
187	4.18k	return LZMA_OK;
188	8.13k	}
189
190
191		static bool
192		lz_encoder_prepare(lzma_mf mf, const lzma_allocator allocator,
193		const lzma_lz_options *lz_options)
194	3.96k	{
195		// For now, the dictionary size is limited to 1.5 GiB. This may grow
196		// in the future if needed, but it needs a little more work than just
197		// changing this check.
198	3.96k	if (!IS_ENC_DICT_SIZE_VALID(lz_options->dict_size)
199	3.96k	\|\| lz_options->nice_len > lz_options->match_len_max)
200	0	return true;
201
202	3.96k	mf->keep_size_before = lz_options->before_size + lz_options->dict_size;
203
204	3.96k	mf->keep_size_after = lz_options->after_size
205	3.96k	+ lz_options->match_len_max;
206
207		// To avoid constant memmove()s, allocate some extra space. Since
208		// memmove()s become more expensive when the size of the buffer
209		// increases, we reserve more space when a large dictionary is
210		// used to make the memmove() calls rarer.
211		//
212		// This works with dictionaries up to about 3 GiB. If bigger
213		// dictionary is wanted, some extra work is needed:
214		// - Several variables in lzma_mf have to be changed from uint32_t
215		// to size_t.
216		// - Memory usage calculation needs something too, e.g. use uint64_t
217		// for mf->size.
218	3.96k	uint32_t reserve = lz_options->dict_size / 2;
219	3.96k	if (reserve > (UINT32_C(1) << 30))
220	0	reserve /= 2;
221
222	3.96k	reserve += (lz_options->before_size + lz_options->match_len_max
223	3.96k	+ lz_options->after_size) / 2 + (UINT32_C(1) << 19);
224
225	3.96k	const uint32_t old_size = mf->size;
226	3.96k	mf->size = mf->keep_size_before + reserve + mf->keep_size_after;
227
228		// Deallocate the old history buffer if it exists but has different
229		// size than what is needed now.
230	3.96k	if (mf->buffer != NULL && old_size != mf->size) {
231	0	lzma_free(mf->buffer, allocator);
232	0	mf->buffer = NULL;
233	0	}
234
235		// Match finder options
236	3.96k	mf->match_len_max = lz_options->match_len_max;
237	3.96k	mf->nice_len = lz_options->nice_len;
238
239		// cyclic_size has to stay smaller than 2 Gi. Note that this doesn't
240		// mean limiting dictionary size to less than 2 GiB. With a match
241		// finder that uses multibyte resolution (hashes start at e.g. every
242		// fourth byte), cyclic_size would stay below 2 Gi even when
243		// dictionary size is greater than 2 GiB.
244		//
245		// It would be possible to allow cyclic_size >= 2 Gi, but then we
246		// would need to be careful to use 64-bit types in various places
247		// (size_t could do since we would need bigger than 32-bit address
248		// space anyway). It would also require either zeroing a multigigabyte
249		// buffer at initialization (waste of time and RAM) or allow
250		// normalization in lz_encoder_mf.c to access uninitialized
251		// memory to keep the code simpler. The current way is simple and
252		// still allows pretty big dictionaries, so I don't expect these
253		// limits to change.
254	3.96k	mf->cyclic_size = lz_options->dict_size + 1;
255
256		// Validate the match finder ID and setup the function pointers.
257	3.96k	switch (lz_options->match_finder) {
258	0	#ifdef HAVE_MF_HC3
259	944	case LZMA_MF_HC3:
260	944	mf->find = &lzma_mf_hc3_find;
261	944	mf->skip = &lzma_mf_hc3_skip;
262	944	break;
263	0	#endif
264	0	#ifdef HAVE_MF_HC4
265	691	case LZMA_MF_HC4:
266	691	mf->find = &lzma_mf_hc4_find;
267	691	mf->skip = &lzma_mf_hc4_skip;
268	691	break;
269	0	#endif
270	0	#ifdef HAVE_MF_BT2
271	0	case LZMA_MF_BT2:
272	0	mf->find = &lzma_mf_bt2_find;
273	0	mf->skip = &lzma_mf_bt2_skip;
274	0	break;
275	0	#endif
276	0	#ifdef HAVE_MF_BT3
277	0	case LZMA_MF_BT3:
278	0	mf->find = &lzma_mf_bt3_find;
279	0	mf->skip = &lzma_mf_bt3_skip;
280	0	break;
281	0	#endif
282	0	#ifdef HAVE_MF_BT4
283	2.32k	case LZMA_MF_BT4:
284	2.32k	mf->find = &lzma_mf_bt4_find;
285	2.32k	mf->skip = &lzma_mf_bt4_skip;
286	2.32k	break;
287	0	#endif
288
289	0	default:
290	0	return true;
291	3.96k	}
292
293		// Calculate the sizes of mf->hash and mf->son.
294		//
295		// NOTE: Since 5.3.5beta the LZMA encoder ensures that nice_len
296		// is big enough for the selected match finder. This makes it
297		// easier for applications as nice_len = 2 will always be accepted
298		// even though the effective value can be slightly bigger.
299	3.96k	const uint32_t hash_bytes
300	3.96k	= mf_get_hash_bytes(lz_options->match_finder);
301	3.96k	assert(hash_bytes <= mf->nice_len);
302
303	3.96k	const bool is_bt = (lz_options->match_finder & 0x10) != 0;
304	3.96k	uint32_t hs;
305
306	3.96k	if (hash_bytes == 2) {
307	0	hs = 0xFFFF;
308	3.96k	} else {
309		// Round dictionary size up to the next 2^n - 1 so it can
310		// be used as a hash mask.
311	3.96k	hs = lz_options->dict_size - 1;
312	3.96k	hs \|= hs >> 1;
313	3.96k	hs \|= hs >> 2;
314	3.96k	hs \|= hs >> 4;
315	3.96k	hs \|= hs >> 8;
316	3.96k	hs >>= 1;
317	3.96k	hs \|= 0xFFFF;
318
319	3.96k	if (hs > (UINT32_C(1) << 24)) {
320	0	if (hash_bytes == 3)
321	0	hs = (UINT32_C(1) << 24) - 1;
322	0	else
323	0	hs >>= 1;
324	0	}
325	3.96k	}
326
327	3.96k	mf->hash_mask = hs;
328
329	3.96k	++hs;
330	3.96k	if (hash_bytes > 2)
331	3.96k	hs += HASH_2_SIZE;
332	3.96k	if (hash_bytes > 3)
333	3.01k	hs += HASH_3_SIZE;
334		/*
335		No match finder uses this at the moment.
336		if (mf->hash_bytes > 4)
337		hs += HASH_4_SIZE;
338		*/
339
340	3.96k	const uint32_t old_hash_count = mf->hash_count;
341	3.96k	const uint32_t old_sons_count = mf->sons_count;
342	3.96k	mf->hash_count = hs;
343	3.96k	mf->sons_count = mf->cyclic_size;
344	3.96k	if (is_bt)
345	2.32k	mf->sons_count *= 2;
346
347		// Deallocate the old hash array if it exists and has different size
348		// than what is needed now.
349	3.96k	if (old_hash_count != mf->hash_count
350	3.96k	\|\| old_sons_count != mf->sons_count) {
351	3.96k	lzma_free(mf->hash, allocator);
352	3.96k	mf->hash = NULL;
353
354	3.96k	lzma_free(mf->son, allocator);
355	3.96k	mf->son = NULL;
356	3.96k	}
357
358		// Maximum number of match finder cycles
359	3.96k	mf->depth = lz_options->depth;
360	3.96k	if (mf->depth == 0) {
361	1.42k	if (is_bt)
362	1.42k	mf->depth = 16 + mf->nice_len / 2;
363	0	else
364	0	mf->depth = 4 + mf->nice_len / 4;
365	1.42k	}
366
367	3.96k	return false;
368	3.96k	}
369
370
371		static bool
372		lz_encoder_init(lzma_mf mf, const lzma_allocator allocator,
373		const lzma_lz_options *lz_options)
374	3.96k	{
375		// Allocate the history buffer.
376	3.96k	if (mf->buffer == NULL) {
377		// lzma_memcmplen() is used for the dictionary buffer
378		// so we need to allocate a few extra bytes to prevent
379		// it from reading past the end of the buffer.
380	3.96k	mf->buffer = lzma_alloc(mf->size + LZMA_MEMCMPLEN_EXTRA,
381	3.96k	allocator);
382	3.96k	if (mf->buffer == NULL)
383	0	return true;
384
385		// Keep Valgrind happy with lzma_memcmplen() and initialize
386		// the extra bytes whose value may get read but which will
387		// effectively get ignored.
388	3.96k	memzero(mf->buffer + mf->size, LZMA_MEMCMPLEN_EXTRA);
389	3.96k	}
390
391		// Use cyclic_size as initial mf->offset. This allows
392		// avoiding a few branches in the match finders. The downside is
393		// that match finder needs to be normalized more often, which may
394		// hurt performance with huge dictionaries.
395	3.96k	mf->offset = mf->cyclic_size;
396	3.96k	mf->read_pos = 0;
397	3.96k	mf->read_ahead = 0;
398	3.96k	mf->read_limit = 0;
399	3.96k	mf->write_pos = 0;
400	3.96k	mf->pending = 0;
401
402		#if UINT32_MAX >= SIZE_MAX / 4
403		// Check for integer overflow. (Huge dictionaries are not
404		// possible on 32-bit CPU.)
405		if (mf->hash_count > SIZE_MAX / sizeof(uint32_t)
406		\|\| mf->sons_count > SIZE_MAX / sizeof(uint32_t))
407		return true;
408		#endif
409
410		// Allocate and initialize the hash table. Since EMPTY_HASH_VALUE
411		// is zero, we can use lzma_alloc_zero() or memzero() for mf->hash.
412		//
413		// We don't need to initialize mf->son, but not doing that may
414		// make Valgrind complain in normalization (see normalize() in
415		// lz_encoder_mf.c). Skipping the initialization is very good
416		// when big dictionary is used but only small amount of data gets
417		// actually compressed: most of the mf->son won't get actually
418		// allocated by the kernel, so we avoid wasting RAM and improve
419		// initialization speed a lot.
420	3.96k	if (mf->hash == NULL) {
421	3.96k	mf->hash = lzma_alloc_zero(mf->hash_count * sizeof(uint32_t),
422	3.96k	allocator);
423	3.96k	mf->son = lzma_alloc(mf->sons_count * sizeof(uint32_t),
424	3.96k	allocator);
425
426	3.96k	if (mf->hash == NULL \|\| mf->son == NULL) {
427	0	lzma_free(mf->hash, allocator);
428	0	mf->hash = NULL;
429
430	0	lzma_free(mf->son, allocator);
431	0	mf->son = NULL;
432
433	0	return true;
434	0	}
435	3.96k	} else {
436		/*
437		for (uint32_t i = 0; i < mf->hash_count; ++i)
438		mf->hash[i] = EMPTY_HASH_VALUE;
439		*/
440	0	memzero(mf->hash, mf->hash_count * sizeof(uint32_t));
441	0	}
442
443	3.96k	mf->cyclic_pos = 0;
444
445		// Handle preset dictionary.
446	3.96k	if (lz_options->preset_dict != NULL
447	0	&& lz_options->preset_dict_size > 0) {
448		// If the preset dictionary is bigger than the actual
449		// dictionary, use only the tail.
450	0	mf->write_pos = my_min(lz_options->preset_dict_size, mf->size);
451	0	memcpy(mf->buffer, lz_options->preset_dict
452	0	+ lz_options->preset_dict_size - mf->write_pos,
453	0	mf->write_pos);
454	0	mf->action = LZMA_SYNC_FLUSH;
455	0	mf->skip(mf, mf->write_pos);
456	0	}
457
458	3.96k	mf->action = LZMA_RUN;
459
460	3.96k	return false;
461	3.96k	}
462
463
464		extern uint64_t
465		lzma_lz_encoder_memusage(const lzma_lz_options *lz_options)
466	0	{
467		// Old buffers must not exist when calling lz_encoder_prepare().
468	0	lzma_mf mf = {
469	0	.buffer = NULL,
470	0	.hash = NULL,
471	0	.son = NULL,
472	0	.hash_count = 0,
473	0	.sons_count = 0,
474	0	};
475
476		// Setup the size information into mf.
477	0	if (lz_encoder_prepare(&mf, NULL, lz_options))
478	0	return UINT64_MAX;
479
480		// Calculate the memory usage.
481	0	return ((uint64_t)(mf.hash_count) + mf.sons_count) * sizeof(uint32_t)
482	0	+ mf.size + sizeof(lzma_coder);
483	0	}
484
485
486		static void
487		lz_encoder_end(void coder_ptr, const lzma_allocator allocator)
488	3.96k	{
489	3.96k	lzma_coder *coder = coder_ptr;
490
491	3.96k	lzma_next_end(&coder->next, allocator);
492
493	3.96k	lzma_free(coder->mf.son, allocator);
494	3.96k	lzma_free(coder->mf.hash, allocator);
495	3.96k	lzma_free(coder->mf.buffer, allocator);
496
497	3.96k	if (coder->lz.end != NULL)
498	3.96k	coder->lz.end(coder->lz.coder, allocator);
499	0	else
500	0	lzma_free(coder->lz.coder, allocator);
501
502	3.96k	lzma_free(coder, allocator);
503	3.96k	return;
504	3.96k	}
505
506
507		static lzma_ret
508		lz_encoder_update(void coder_ptr, const lzma_allocator allocator,
509		const lzma_filter *filters_null lzma_attribute((__unused__)),
510		const lzma_filter *reversed_filters)
511	0	{
512	0	lzma_coder *coder = coder_ptr;
513
514	0	if (coder->lz.options_update == NULL)
515	0	return LZMA_PROG_ERROR;
516
517	0	return_if_error(coder->lz.options_update(
518	0	coder->lz.coder, reversed_filters));
519
520	0	return lzma_next_filter_update(
521	0	&coder->next, allocator, reversed_filters + 1);
522	0	}
523
524
525		static lzma_ret
526		lz_encoder_set_out_limit(void coder_ptr, uint64_t uncomp_size,
527		uint64_t out_limit)
528	0	{
529	0	lzma_coder *coder = coder_ptr;
530
531		// This is supported only if there are no other filters chained.
532	0	if (coder->next.code == NULL && coder->lz.set_out_limit != NULL)
533	0	return coder->lz.set_out_limit(
534	0	coder->lz.coder, uncomp_size, out_limit);
535
536	0	return LZMA_OPTIONS_ERROR;
537	0	}
538
539
540		extern lzma_ret
541		lzma_lz_encoder_init(lzma_next_coder next, const lzma_allocator allocator,
542		const lzma_filter_info *filters,
543		lzma_ret (lz_init)(lzma_lz_encoder lz,
544		const lzma_allocator *allocator,
545		lzma_vli id, const void *options,
546		lzma_lz_options *lz_options))
547	3.96k	{
548		#if defined(HAVE_SMALL) && !defined(HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR)
549		// The CRC32 table must be initialized.
550		lzma_crc32_init();
551		#endif
552
553		// Allocate and initialize the base data structure.
554	3.96k	lzma_coder *coder = next->coder;
555	3.96k	if (coder == NULL) {
556	3.96k	coder = lzma_alloc(sizeof(lzma_coder), allocator);
557	3.96k	if (coder == NULL)
558	0	return LZMA_MEM_ERROR;
559
560	3.96k	next->coder = coder;
561	3.96k	next->code = &lz_encode;
562	3.96k	next->end = &lz_encoder_end;
563	3.96k	next->update = &lz_encoder_update;
564	3.96k	next->set_out_limit = &lz_encoder_set_out_limit;
565
566	3.96k	coder->lz.coder = NULL;
567	3.96k	coder->lz.code = NULL;
568	3.96k	coder->lz.end = NULL;
569	3.96k	coder->lz.options_update = NULL;
570	3.96k	coder->lz.set_out_limit = NULL;
571
572		// mf.size is initialized to silence Valgrind
573		// when used on optimized binaries (GCC may reorder
574		// code in a way that Valgrind gets unhappy).
575	3.96k	coder->mf.buffer = NULL;
576	3.96k	coder->mf.size = 0;
577	3.96k	coder->mf.hash = NULL;
578	3.96k	coder->mf.son = NULL;
579	3.96k	coder->mf.hash_count = 0;
580	3.96k	coder->mf.sons_count = 0;
581
582	3.96k	coder->next = LZMA_NEXT_CODER_INIT;
583	3.96k	}
584
585		// Initialize the LZ-based encoder.
586	3.96k	lzma_lz_options lz_options;
587	3.96k	return_if_error(lz_init(&coder->lz, allocator,
588	3.96k	filters[0].id, filters[0].options, &lz_options));
589
590		// Setup the size information into coder->mf and deallocate
591		// old buffers if they have wrong size.
592	3.96k	if (lz_encoder_prepare(&coder->mf, allocator, &lz_options))
593	0	return LZMA_OPTIONS_ERROR;
594
595		// Allocate new buffers if needed, and do the rest of
596		// the initialization.
597	3.96k	if (lz_encoder_init(&coder->mf, allocator, &lz_options))
598	0	return LZMA_MEM_ERROR;
599
600		// Initialize the next filter in the chain, if any.
601	3.96k	return lzma_next_filter_init(&coder->next, allocator, filters + 1);
602	3.96k	}
603
604
605		extern LZMA_API(lzma_bool)
606		lzma_mf_is_supported(lzma_match_finder mf)
607	0	{
608	0	switch (mf) {
609	0	#ifdef HAVE_MF_HC3
610	0	case LZMA_MF_HC3:
611	0	return true;
612	0	#endif
613	0	#ifdef HAVE_MF_HC4
614	0	case LZMA_MF_HC4:
615	0	return true;
616	0	#endif
617	0	#ifdef HAVE_MF_BT2
618	0	case LZMA_MF_BT2:
619	0	return true;
620	0	#endif
621	0	#ifdef HAVE_MF_BT3
622	0	case LZMA_MF_BT3:
623	0	return true;
624	0	#endif
625	0	#ifdef HAVE_MF_BT4
626	0	case LZMA_MF_BT4:
627	0	return true;
628	0	#endif
629	0	default:
630		return false;
631	0	}
632	0	}