/src/CMake/Utilities/cmliblzma/liblzma/lzma/lzma2_encoder.c
Line | Count | Source |
1 | | // SPDX-License-Identifier: 0BSD |
2 | | |
3 | | /////////////////////////////////////////////////////////////////////////////// |
4 | | // |
5 | | /// \file lzma2_encoder.c |
6 | | /// \brief LZMA2 encoder |
7 | | /// |
8 | | // Authors: Igor Pavlov |
9 | | // Lasse Collin |
10 | | // |
11 | | /////////////////////////////////////////////////////////////////////////////// |
12 | | |
13 | | #include "lz_encoder.h" |
14 | | #include "lzma_encoder.h" |
15 | | #include "fastpos.h" |
16 | | #include "lzma2_encoder.h" |
17 | | |
18 | | |
19 | | typedef struct { |
20 | | enum { |
21 | | SEQ_INIT, |
22 | | SEQ_LZMA_ENCODE, |
23 | | SEQ_LZMA_COPY, |
24 | | SEQ_UNCOMPRESSED_HEADER, |
25 | | SEQ_UNCOMPRESSED_COPY, |
26 | | } sequence; |
27 | | |
28 | | /// LZMA encoder |
29 | | void *lzma; |
30 | | |
31 | | /// LZMA options currently in use. |
32 | | lzma_options_lzma opt_cur; |
33 | | |
34 | | bool need_properties; |
35 | | bool need_state_reset; |
36 | | bool need_dictionary_reset; |
37 | | |
38 | | /// Uncompressed size of a chunk |
39 | | size_t uncompressed_size; |
40 | | |
41 | | /// Compressed size of a chunk (excluding headers); this is also used |
42 | | /// to indicate the end of buf[] in SEQ_LZMA_COPY. |
43 | | size_t compressed_size; |
44 | | |
45 | | /// Read position in buf[] |
46 | | size_t buf_pos; |
47 | | |
48 | | /// Buffer to hold the chunk header and LZMA compressed data |
49 | | uint8_t buf[LZMA2_HEADER_MAX + LZMA2_CHUNK_MAX]; |
50 | | } lzma_lzma2_coder; |
51 | | |
52 | | |
53 | | static void |
54 | | lzma2_header_lzma(lzma_lzma2_coder *coder) |
55 | 0 | { |
56 | 0 | assert(coder->uncompressed_size > 0); |
57 | 0 | assert(coder->uncompressed_size <= LZMA2_UNCOMPRESSED_MAX); |
58 | 0 | assert(coder->compressed_size > 0); |
59 | 0 | assert(coder->compressed_size <= LZMA2_CHUNK_MAX); |
60 | |
|
61 | 0 | size_t pos; |
62 | |
|
63 | 0 | if (coder->need_properties) { |
64 | 0 | pos = 0; |
65 | |
|
66 | 0 | if (coder->need_dictionary_reset) |
67 | 0 | coder->buf[pos] = 0x80 + (3 << 5); |
68 | 0 | else |
69 | 0 | coder->buf[pos] = 0x80 + (2 << 5); |
70 | 0 | } else { |
71 | 0 | pos = 1; |
72 | |
|
73 | 0 | if (coder->need_state_reset) |
74 | 0 | coder->buf[pos] = 0x80 + (1 << 5); |
75 | 0 | else |
76 | 0 | coder->buf[pos] = 0x80; |
77 | 0 | } |
78 | | |
79 | | // Set the start position for copying. |
80 | 0 | coder->buf_pos = pos; |
81 | | |
82 | | // Uncompressed size |
83 | 0 | size_t size = coder->uncompressed_size - 1; |
84 | 0 | coder->buf[pos++] += size >> 16; |
85 | 0 | coder->buf[pos++] = (size >> 8) & 0xFF; |
86 | 0 | coder->buf[pos++] = size & 0xFF; |
87 | | |
88 | | // Compressed size |
89 | 0 | size = coder->compressed_size - 1; |
90 | 0 | coder->buf[pos++] = size >> 8; |
91 | 0 | coder->buf[pos++] = size & 0xFF; |
92 | | |
93 | | // Properties, if needed |
94 | 0 | if (coder->need_properties) |
95 | 0 | lzma_lzma_lclppb_encode(&coder->opt_cur, coder->buf + pos); |
96 | |
|
97 | 0 | coder->need_properties = false; |
98 | 0 | coder->need_state_reset = false; |
99 | 0 | coder->need_dictionary_reset = false; |
100 | | |
101 | | // The copying code uses coder->compressed_size to indicate the end |
102 | | // of coder->buf[], so we need add the maximum size of the header here. |
103 | 0 | coder->compressed_size += LZMA2_HEADER_MAX; |
104 | |
|
105 | 0 | return; |
106 | 0 | } |
107 | | |
108 | | |
109 | | static void |
110 | | lzma2_header_uncompressed(lzma_lzma2_coder *coder) |
111 | 0 | { |
112 | 0 | assert(coder->uncompressed_size > 0); |
113 | 0 | assert(coder->uncompressed_size <= LZMA2_CHUNK_MAX); |
114 | | |
115 | | // If this is the first chunk, we need to include dictionary |
116 | | // reset indicator. |
117 | 0 | if (coder->need_dictionary_reset) |
118 | 0 | coder->buf[0] = 1; |
119 | 0 | else |
120 | 0 | coder->buf[0] = 2; |
121 | |
|
122 | 0 | coder->need_dictionary_reset = false; |
123 | | |
124 | | // "Compressed" size |
125 | 0 | coder->buf[1] = (coder->uncompressed_size - 1) >> 8; |
126 | 0 | coder->buf[2] = (coder->uncompressed_size - 1) & 0xFF; |
127 | | |
128 | | // Set the start position for copying. |
129 | 0 | coder->buf_pos = 0; |
130 | 0 | return; |
131 | 0 | } |
132 | | |
133 | | |
134 | | static lzma_ret |
135 | | lzma2_encode(void *coder_ptr, lzma_mf *restrict mf, |
136 | | uint8_t *restrict out, size_t *restrict out_pos, |
137 | | size_t out_size) |
138 | 0 | { |
139 | 0 | lzma_lzma2_coder *restrict coder = coder_ptr; |
140 | |
|
141 | 0 | while (*out_pos < out_size) |
142 | 0 | switch (coder->sequence) { |
143 | 0 | case SEQ_INIT: |
144 | | // If there's no input left and we are flushing or finishing, |
145 | | // don't start a new chunk. |
146 | 0 | if (mf_unencoded(mf) == 0) { |
147 | | // Write end of payload marker if finishing. |
148 | 0 | if (mf->action == LZMA_FINISH) |
149 | 0 | out[(*out_pos)++] = 0; |
150 | |
|
151 | 0 | return mf->action == LZMA_RUN |
152 | 0 | ? LZMA_OK : LZMA_STREAM_END; |
153 | 0 | } |
154 | | |
155 | 0 | if (coder->need_state_reset) |
156 | 0 | return_if_error(lzma_lzma_encoder_reset( |
157 | 0 | coder->lzma, &coder->opt_cur)); |
158 | | |
159 | 0 | coder->uncompressed_size = 0; |
160 | 0 | coder->compressed_size = 0; |
161 | 0 | coder->sequence = SEQ_LZMA_ENCODE; |
162 | | |
163 | | // Fall through |
164 | |
|
165 | 0 | case SEQ_LZMA_ENCODE: { |
166 | | // Calculate how much more uncompressed data this chunk |
167 | | // could accept. |
168 | 0 | const uint32_t left = LZMA2_UNCOMPRESSED_MAX |
169 | 0 | - coder->uncompressed_size; |
170 | 0 | uint32_t limit; |
171 | |
|
172 | 0 | if (left < mf->match_len_max) { |
173 | | // Must flush immediately since the next LZMA symbol |
174 | | // could make the uncompressed size of the chunk too |
175 | | // big. |
176 | 0 | limit = 0; |
177 | 0 | } else { |
178 | | // Calculate maximum read_limit that is OK from point |
179 | | // of view of LZMA2 chunk size. |
180 | 0 | limit = mf->read_pos - mf->read_ahead |
181 | 0 | + left - mf->match_len_max; |
182 | 0 | } |
183 | | |
184 | | // Save the start position so that we can update |
185 | | // coder->uncompressed_size. |
186 | 0 | const uint32_t read_start = mf->read_pos - mf->read_ahead; |
187 | | |
188 | | // Call the LZMA encoder until the chunk is finished. |
189 | 0 | const lzma_ret ret = lzma_lzma_encode(coder->lzma, mf, |
190 | 0 | coder->buf + LZMA2_HEADER_MAX, |
191 | 0 | &coder->compressed_size, |
192 | 0 | LZMA2_CHUNK_MAX, limit); |
193 | |
|
194 | 0 | coder->uncompressed_size += mf->read_pos - mf->read_ahead |
195 | 0 | - read_start; |
196 | |
|
197 | 0 | assert(coder->compressed_size <= LZMA2_CHUNK_MAX); |
198 | 0 | assert(coder->uncompressed_size <= LZMA2_UNCOMPRESSED_MAX); |
199 | |
|
200 | 0 | if (ret != LZMA_STREAM_END) |
201 | 0 | return LZMA_OK; |
202 | | |
203 | | // See if the chunk compressed. If it didn't, we encode it |
204 | | // as uncompressed chunk. This saves a few bytes of space |
205 | | // and makes decoding faster. |
206 | 0 | if (coder->compressed_size >= coder->uncompressed_size) { |
207 | 0 | coder->uncompressed_size += mf->read_ahead; |
208 | 0 | assert(coder->uncompressed_size |
209 | 0 | <= LZMA2_UNCOMPRESSED_MAX); |
210 | 0 | mf->read_ahead = 0; |
211 | 0 | lzma2_header_uncompressed(coder); |
212 | 0 | coder->need_state_reset = true; |
213 | 0 | coder->sequence = SEQ_UNCOMPRESSED_HEADER; |
214 | 0 | break; |
215 | 0 | } |
216 | | |
217 | | // The chunk did compress at least by one byte, so we store |
218 | | // the chunk as LZMA. |
219 | 0 | lzma2_header_lzma(coder); |
220 | |
|
221 | 0 | coder->sequence = SEQ_LZMA_COPY; |
222 | 0 | } |
223 | | |
224 | | // Fall through |
225 | | |
226 | 0 | case SEQ_LZMA_COPY: |
227 | | // Copy the compressed chunk along its headers to the |
228 | | // output buffer. |
229 | 0 | lzma_bufcpy(coder->buf, &coder->buf_pos, |
230 | 0 | coder->compressed_size, |
231 | 0 | out, out_pos, out_size); |
232 | 0 | if (coder->buf_pos != coder->compressed_size) |
233 | 0 | return LZMA_OK; |
234 | | |
235 | 0 | coder->sequence = SEQ_INIT; |
236 | 0 | break; |
237 | | |
238 | 0 | case SEQ_UNCOMPRESSED_HEADER: |
239 | | // Copy the three-byte header to indicate uncompressed chunk. |
240 | 0 | lzma_bufcpy(coder->buf, &coder->buf_pos, |
241 | 0 | LZMA2_HEADER_UNCOMPRESSED, |
242 | 0 | out, out_pos, out_size); |
243 | 0 | if (coder->buf_pos != LZMA2_HEADER_UNCOMPRESSED) |
244 | 0 | return LZMA_OK; |
245 | | |
246 | 0 | coder->sequence = SEQ_UNCOMPRESSED_COPY; |
247 | | |
248 | | // Fall through |
249 | |
|
250 | 0 | case SEQ_UNCOMPRESSED_COPY: |
251 | | // Copy the uncompressed data as is from the dictionary |
252 | | // to the output buffer. |
253 | 0 | mf_read(mf, out, out_pos, out_size, &coder->uncompressed_size); |
254 | 0 | if (coder->uncompressed_size != 0) |
255 | 0 | return LZMA_OK; |
256 | | |
257 | 0 | coder->sequence = SEQ_INIT; |
258 | 0 | break; |
259 | 0 | } |
260 | | |
261 | 0 | return LZMA_OK; |
262 | 0 | } |
263 | | |
264 | | |
265 | | static void |
266 | | lzma2_encoder_end(void *coder_ptr, const lzma_allocator *allocator) |
267 | 0 | { |
268 | 0 | lzma_lzma2_coder *coder = coder_ptr; |
269 | 0 | lzma_free(coder->lzma, allocator); |
270 | 0 | lzma_free(coder, allocator); |
271 | 0 | return; |
272 | 0 | } |
273 | | |
274 | | |
275 | | static lzma_ret |
276 | | lzma2_encoder_options_update(void *coder_ptr, const lzma_filter *filter) |
277 | 0 | { |
278 | 0 | lzma_lzma2_coder *coder = coder_ptr; |
279 | | |
280 | | // New options can be set only when there is no incomplete chunk. |
281 | | // This is the case at the beginning of the raw stream and right |
282 | | // after LZMA_SYNC_FLUSH. |
283 | 0 | if (filter->options == NULL || coder->sequence != SEQ_INIT) |
284 | 0 | return LZMA_PROG_ERROR; |
285 | | |
286 | | // Look if there are new options. At least for now, |
287 | | // only lc/lp/pb can be changed. |
288 | 0 | const lzma_options_lzma *opt = filter->options; |
289 | 0 | if (coder->opt_cur.lc != opt->lc || coder->opt_cur.lp != opt->lp |
290 | 0 | || coder->opt_cur.pb != opt->pb) { |
291 | | // Validate the options. |
292 | 0 | if (opt->lc > LZMA_LCLP_MAX || opt->lp > LZMA_LCLP_MAX |
293 | 0 | || opt->lc + opt->lp > LZMA_LCLP_MAX |
294 | 0 | || opt->pb > LZMA_PB_MAX) |
295 | 0 | return LZMA_OPTIONS_ERROR; |
296 | | |
297 | | // The new options will be used when the encoder starts |
298 | | // a new LZMA2 chunk. |
299 | 0 | coder->opt_cur.lc = opt->lc; |
300 | 0 | coder->opt_cur.lp = opt->lp; |
301 | 0 | coder->opt_cur.pb = opt->pb; |
302 | 0 | coder->need_properties = true; |
303 | 0 | coder->need_state_reset = true; |
304 | 0 | } |
305 | | |
306 | 0 | return LZMA_OK; |
307 | 0 | } |
308 | | |
309 | | |
310 | | static lzma_ret |
311 | | lzma2_encoder_init(lzma_lz_encoder *lz, const lzma_allocator *allocator, |
312 | | lzma_vli id lzma_attribute((__unused__)), const void *options, |
313 | | lzma_lz_options *lz_options) |
314 | 0 | { |
315 | 0 | if (options == NULL) |
316 | 0 | return LZMA_PROG_ERROR; |
317 | | |
318 | 0 | lzma_lzma2_coder *coder = lz->coder; |
319 | 0 | if (coder == NULL) { |
320 | 0 | coder = lzma_alloc(sizeof(lzma_lzma2_coder), allocator); |
321 | 0 | if (coder == NULL) |
322 | 0 | return LZMA_MEM_ERROR; |
323 | | |
324 | 0 | lz->coder = coder; |
325 | 0 | lz->code = &lzma2_encode; |
326 | 0 | lz->end = &lzma2_encoder_end; |
327 | 0 | lz->options_update = &lzma2_encoder_options_update; |
328 | |
|
329 | 0 | coder->lzma = NULL; |
330 | 0 | } |
331 | | |
332 | 0 | coder->opt_cur = *(const lzma_options_lzma *)(options); |
333 | |
|
334 | 0 | coder->sequence = SEQ_INIT; |
335 | 0 | coder->need_properties = true; |
336 | 0 | coder->need_state_reset = false; |
337 | 0 | coder->need_dictionary_reset |
338 | 0 | = coder->opt_cur.preset_dict == NULL |
339 | 0 | || coder->opt_cur.preset_dict_size == 0; |
340 | | |
341 | | // Initialize LZMA encoder |
342 | 0 | return_if_error(lzma_lzma_encoder_create(&coder->lzma, allocator, |
343 | 0 | LZMA_FILTER_LZMA2, &coder->opt_cur, lz_options)); |
344 | | |
345 | | // Make sure that we will always have enough history available in |
346 | | // case we need to use uncompressed chunks. They are used when the |
347 | | // compressed size of a chunk is not smaller than the uncompressed |
348 | | // size, so we need to have at least LZMA2_COMPRESSED_MAX bytes |
349 | | // history available. |
350 | 0 | if (lz_options->before_size + lz_options->dict_size < LZMA2_CHUNK_MAX) |
351 | 0 | lz_options->before_size |
352 | 0 | = LZMA2_CHUNK_MAX - lz_options->dict_size; |
353 | |
|
354 | 0 | return LZMA_OK; |
355 | 0 | } |
356 | | |
357 | | |
358 | | extern lzma_ret |
359 | | lzma_lzma2_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator, |
360 | | const lzma_filter_info *filters) |
361 | 0 | { |
362 | 0 | return lzma_lz_encoder_init( |
363 | 0 | next, allocator, filters, &lzma2_encoder_init); |
364 | 0 | } |
365 | | |
366 | | |
367 | | extern uint64_t |
368 | | lzma_lzma2_encoder_memusage(const void *options) |
369 | 0 | { |
370 | 0 | const uint64_t lzma_mem = lzma_lzma_encoder_memusage(options); |
371 | 0 | if (lzma_mem == UINT64_MAX) |
372 | 0 | return UINT64_MAX; |
373 | | |
374 | 0 | return sizeof(lzma_lzma2_coder) + lzma_mem; |
375 | 0 | } |
376 | | |
377 | | |
378 | | extern lzma_ret |
379 | | lzma_lzma2_props_encode(const void *options, uint8_t *out) |
380 | 0 | { |
381 | 0 | if (options == NULL) |
382 | 0 | return LZMA_PROG_ERROR; |
383 | | |
384 | 0 | const lzma_options_lzma *const opt = options; |
385 | 0 | uint32_t d = my_max(opt->dict_size, LZMA_DICT_SIZE_MIN); |
386 | | |
387 | | // Round up to the next 2^n - 1 or 2^n + 2^(n - 1) - 1 depending |
388 | | // on which one is the next: |
389 | 0 | --d; |
390 | 0 | d |= d >> 2; |
391 | 0 | d |= d >> 3; |
392 | 0 | d |= d >> 4; |
393 | 0 | d |= d >> 8; |
394 | 0 | d |= d >> 16; |
395 | | |
396 | | // Get the highest two bits using the proper encoding: |
397 | 0 | if (d == UINT32_MAX) |
398 | 0 | out[0] = 40; |
399 | 0 | else |
400 | 0 | out[0] = get_dist_slot(d + 1) - 24; |
401 | |
|
402 | 0 | return LZMA_OK; |
403 | 0 | } |
404 | | |
405 | | |
406 | | extern uint64_t |
407 | | lzma_lzma2_block_size(const void *options) |
408 | 0 | { |
409 | 0 | const lzma_options_lzma *const opt = options; |
410 | |
|
411 | 0 | if (!IS_ENC_DICT_SIZE_VALID(opt->dict_size)) |
412 | 0 | return UINT64_MAX; |
413 | | |
414 | | // Use at least 1 MiB to keep compression ratio better. |
415 | 0 | return my_max((uint64_t)(opt->dict_size) * 3, UINT64_C(1) << 20); |
416 | 0 | } |