/src/xz/src/liblzma/lzma/lzma_common.h
Line | Count | Source |
1 | | // SPDX-License-Identifier: 0BSD |
2 | | |
3 | | /////////////////////////////////////////////////////////////////////////////// |
4 | | // |
5 | | /// \file lzma_common.h |
6 | | /// \brief Private definitions common to LZMA encoder and decoder |
7 | | /// |
8 | | // Authors: Igor Pavlov |
9 | | // Lasse Collin |
10 | | // |
11 | | /////////////////////////////////////////////////////////////////////////////// |
12 | | |
13 | | #ifndef LZMA_LZMA_COMMON_H |
14 | | #define LZMA_LZMA_COMMON_H |
15 | | |
16 | | #include "common.h" |
17 | | #include "range_common.h" |
18 | | |
19 | | |
20 | | /////////////////// |
21 | | // Miscellaneous // |
22 | | /////////////////// |
23 | | |
24 | | /// Maximum number of position states. A position state is the lowest pos bits |
25 | | /// number of bits of the current uncompressed offset. In some places there |
26 | | /// are different sets of probabilities for different pos states. |
27 | | #define POS_STATES_MAX (1 << LZMA_PB_MAX) |
28 | | |
29 | | |
30 | | /// Validates lc, lp, and pb. |
31 | | static inline bool |
32 | | is_lclppb_valid(const lzma_options_lzma *options) |
33 | 9.89k | { |
34 | 9.89k | return options->lc <= LZMA_LCLP_MAX && options->lp <= LZMA_LCLP_MAX |
35 | 9.89k | && options->lc + options->lp <= LZMA_LCLP_MAX |
36 | 9.89k | && options->pb <= LZMA_PB_MAX; |
37 | 9.89k | } lzma_decoder.c:is_lclppb_valid Line | Count | Source | 33 | 3.01k | { | 34 | 3.01k | return options->lc <= LZMA_LCLP_MAX && options->lp <= LZMA_LCLP_MAX | 35 | 3.01k | && options->lc + options->lp <= LZMA_LCLP_MAX | 36 | 3.01k | && options->pb <= LZMA_PB_MAX; | 37 | 3.01k | } |
lzma_encoder.c:is_lclppb_valid Line | Count | Source | 33 | 6.88k | { | 34 | 6.88k | return options->lc <= LZMA_LCLP_MAX && options->lp <= LZMA_LCLP_MAX | 35 | 6.88k | && options->lc + options->lp <= LZMA_LCLP_MAX | 36 | 6.88k | && options->pb <= LZMA_PB_MAX; | 37 | 6.88k | } |
Unexecuted instantiation: lzma_encoder_optimum_fast.c:is_lclppb_valid Unexecuted instantiation: lzma_encoder_optimum_normal.c:is_lclppb_valid |
38 | | |
39 | | |
40 | | /////////// |
41 | | // State // |
42 | | /////////// |
43 | | |
44 | | /// This enum is used to track which events have occurred most recently and |
45 | | /// in which order. This information is used to predict the next event. |
46 | | /// |
47 | | /// Events: |
48 | | /// - Literal: One 8-bit byte |
49 | | /// - Match: Repeat a chunk of data at some distance |
50 | | /// - Long repeat: Multi-byte match at a recently seen distance |
51 | | /// - Short repeat: One-byte repeat at a recently seen distance |
52 | | /// |
53 | | /// The event names are in from STATE_oldest_older_previous. REP means |
54 | | /// either short or long repeated match, and NONLIT means any non-literal. |
55 | | typedef enum { |
56 | | STATE_LIT_LIT, |
57 | | STATE_MATCH_LIT_LIT, |
58 | | STATE_REP_LIT_LIT, |
59 | | STATE_SHORTREP_LIT_LIT, |
60 | | STATE_MATCH_LIT, |
61 | | STATE_REP_LIT, |
62 | | STATE_SHORTREP_LIT, |
63 | | STATE_LIT_MATCH, |
64 | | STATE_LIT_LONGREP, |
65 | | STATE_LIT_SHORTREP, |
66 | | STATE_NONLIT_MATCH, |
67 | | STATE_NONLIT_REP, |
68 | | } lzma_lzma_state; |
69 | | |
70 | | |
71 | | /// Total number of states |
72 | 428k | #define STATES 12 |
73 | | |
74 | | /// The lowest 7 states indicate that the previous state was a literal. |
75 | 11.6M | #define LIT_STATES 7 |
76 | | |
77 | | |
78 | | /// Indicate that the latest state was a literal. |
79 | | #define update_literal(state) \ |
80 | 472k | state = ((state) <= STATE_SHORTREP_LIT_LIT \ |
81 | 472k | ? STATE_LIT_LIT \ |
82 | 472k | : ((state) <= STATE_LIT_SHORTREP \ |
83 | 393k | ? (state) - 3 \ |
84 | 393k | : (state) - 6)) |
85 | | |
86 | | /// Like update_literal(state) but when it is already known that |
87 | | /// is_literal_state(state) is true. |
88 | | #define update_literal_normal(state) \ |
89 | 3.80M | state = ((state) <= STATE_SHORTREP_LIT_LIT \ |
90 | 3.80M | ? STATE_LIT_LIT \ |
91 | 3.80M | : (state) - 3) |
92 | | |
93 | | /// Like update_literal(state) but when it is already known that |
94 | | /// is_literal_state(state) is false. |
95 | | #define update_literal_matched(state) \ |
96 | 613k | state = ((state) <= STATE_LIT_SHORTREP \ |
97 | 613k | ? (state) - 3 \ |
98 | 613k | : (state) - 6) |
99 | | |
100 | | /// Indicate that the latest state was a match. |
101 | | #define update_match(state) \ |
102 | 1.28M | state = ((state) < LIT_STATES ? STATE_LIT_MATCH : STATE_NONLIT_MATCH) |
103 | | |
104 | | /// Indicate that the latest state was a long repeated match. |
105 | | #define update_long_rep(state) \ |
106 | 6.00M | state = ((state) < LIT_STATES ? STATE_LIT_LONGREP : STATE_NONLIT_REP) |
107 | | |
108 | | /// Indicate that the latest state was a short match. |
109 | | #define update_short_rep(state) \ |
110 | 220k | state = ((state) < LIT_STATES ? STATE_LIT_SHORTREP : STATE_NONLIT_REP) |
111 | | |
112 | | /// Test if the previous state was a literal. |
113 | | #define is_literal_state(state) \ |
114 | 4.84M | ((state) < LIT_STATES) |
115 | | |
116 | | |
117 | | ///////////// |
118 | | // Literal // |
119 | | ///////////// |
120 | | |
121 | | /// Each literal coder is divided in three sections: |
122 | | /// - 0x001-0x0FF: Without match byte |
123 | | /// - 0x101-0x1FF: With match byte; match bit is 0 |
124 | | /// - 0x201-0x2FF: With match byte; match bit is 1 |
125 | | /// |
126 | | /// Match byte is used when the previous LZMA symbol was something else than |
127 | | /// a literal (that is, it was some kind of match). |
128 | 32.9k | #define LITERAL_CODER_SIZE UINT32_C(0x300) |
129 | | |
130 | | /// Maximum number of literal coders |
131 | | #define LITERAL_CODERS_MAX (1 << LZMA_LCLP_MAX) |
132 | | |
133 | | /// Calculates the literal_mask that literal_subcoder() needs. |
134 | | #define literal_mask_calc(lc, lp) \ |
135 | 32.9k | ((UINT32_C(0x100) << (lp)) - (UINT32_C(0x100) >> (lc))) |
136 | | |
137 | | /// Locate the literal coder for the next literal byte. The choice depends on |
138 | | /// - the lowest literal_pos_bits bits of the position of the current |
139 | | /// byte; and |
140 | | /// - the highest literal_context_bits bits of the previous byte. |
141 | | #define literal_subcoder(probs, lc, literal_mask, pos, prev_byte) \ |
142 | 5.17M | ((probs) + UINT32_C(3) * \ |
143 | 5.17M | (((((pos) << 8) + (prev_byte)) & (literal_mask)) << (lc))) |
144 | | |
145 | | |
146 | | static inline void |
147 | | literal_init(probability *probs, uint32_t lc, uint32_t lp) |
148 | 32.9k | { |
149 | 32.9k | assert(lc + lp <= LZMA_LCLP_MAX); |
150 | | |
151 | 32.9k | const size_t coders = LITERAL_CODER_SIZE << (lc + lp); |
152 | | |
153 | 146M | for (size_t i = 0; i < coders; ++i) |
154 | 146M | bit_reset(probs[i]); |
155 | | |
156 | 32.9k | return; |
157 | 32.9k | } lzma_decoder.c:literal_init Line | Count | Source | 148 | 28.9k | { | 149 | 28.9k | assert(lc + lp <= LZMA_LCLP_MAX); | 150 | | | 151 | 28.9k | const size_t coders = LITERAL_CODER_SIZE << (lc + lp); | 152 | | | 153 | 121M | for (size_t i = 0; i < coders; ++i) | 154 | 121M | bit_reset(probs[i]); | 155 | | | 156 | 28.9k | return; | 157 | 28.9k | } |
lzma_encoder.c:literal_init Line | Count | Source | 148 | 4.00k | { | 149 | 4.00k | assert(lc + lp <= LZMA_LCLP_MAX); | 150 | | | 151 | 4.00k | const size_t coders = LITERAL_CODER_SIZE << (lc + lp); | 152 | | | 153 | 24.5M | for (size_t i = 0; i < coders; ++i) | 154 | 24.5M | bit_reset(probs[i]); | 155 | | | 156 | 4.00k | return; | 157 | 4.00k | } |
Unexecuted instantiation: lzma_encoder_optimum_fast.c:literal_init Unexecuted instantiation: lzma_encoder_optimum_normal.c:literal_init |
158 | | |
159 | | |
160 | | ////////////////// |
161 | | // Match length // |
162 | | ////////////////// |
163 | | |
164 | | // Minimum length of a match is two bytes. |
165 | 33.3M | #define MATCH_LEN_MIN 2 |
166 | | |
167 | | // Match length is encoded with 4, 5, or 10 bits. |
168 | | // |
169 | | // Length Bits |
170 | | // 2-9 4 = Choice=0 + 3 bits |
171 | | // 10-17 5 = Choice=1 + Choice2=0 + 3 bits |
172 | | // 18-273 10 = Choice=1 + Choice2=1 + 8 bits |
173 | 3.72M | #define LEN_LOW_BITS 3 |
174 | 3.49M | #define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS) |
175 | 3.22M | #define LEN_MID_BITS 3 |
176 | 3.05M | #define LEN_MID_SYMBOLS (1 << LEN_MID_BITS) |
177 | 2.85M | #define LEN_HIGH_BITS 8 |
178 | 253k | #define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS) |
179 | 8.00k | #define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS) |
180 | | |
181 | | // Maximum length of a match is 273 which is a result of the encoding |
182 | | // described above. |
183 | 12.0k | #define MATCH_LEN_MAX (MATCH_LEN_MIN + LEN_SYMBOLS - 1) |
184 | | |
185 | | |
186 | | //////////////////// |
187 | | // Match distance // |
188 | | //////////////////// |
189 | | |
190 | | // Different sets of probabilities are used for match distances that have very |
191 | | // short match length: Lengths of 2, 3, and 4 bytes have a separate set of |
192 | | // probabilities for each length. The matches with longer length use a shared |
193 | | // set of probabilities. |
194 | 9.07M | #define DIST_STATES 4 |
195 | | |
196 | | // Macro to get the index of the appropriate probability array. |
197 | | #define get_dist_state(len) \ |
198 | 4.10M | ((len) < DIST_STATES + MATCH_LEN_MIN \ |
199 | 4.10M | ? (len) - MATCH_LEN_MIN \ |
200 | 4.10M | : DIST_STATES - 1) |
201 | | |
202 | | // The highest two bits of a match distance (distance slot) are encoded |
203 | | // using six bits. See fastpos.h for more explanation. |
204 | 649k | #define DIST_SLOT_BITS 6 |
205 | 167k | #define DIST_SLOTS (1 << DIST_SLOT_BITS) |
206 | | |
207 | | // Match distances up to 127 are fully encoded using probabilities. Since |
208 | | // the highest two bits (distance slot) are always encoded using six bits, |
209 | | // the distances 0-3 don't need any additional bits to encode, since the |
210 | | // distance slot itself is the same as the actual distance. DIST_MODEL_START |
211 | | // indicates the first distance slot where at least one additional bit is |
212 | | // needed. |
213 | 735k | #define DIST_MODEL_START 4 |
214 | | |
215 | | // Match distances greater than 127 are encoded in three pieces: |
216 | | // - distance slot: the highest two bits |
217 | | // - direct bits: 2-26 bits below the highest two bits |
218 | | // - alignment bits: four lowest bits |
219 | | // |
220 | | // Direct bits don't use any probabilities. |
221 | | // |
222 | | // The distance slot value of 14 is for distances 128-191 (see the table in |
223 | | // fastpos.h to understand why). |
224 | 11.9M | #define DIST_MODEL_END 14 |
225 | | |
226 | | // Distance slots that indicate a distance <= 127. |
227 | 7.52M | #define FULL_DISTANCES_BITS (DIST_MODEL_END / 2) |
228 | 7.52M | #define FULL_DISTANCES (1 << FULL_DISTANCES_BITS) |
229 | | |
230 | | // For match distances greater than 127, only the highest two bits and the |
231 | | // lowest four bits (alignment) is encoded using probabilities. |
232 | 2.21M | #define ALIGN_BITS 4 |
233 | 1.38M | #define ALIGN_SIZE (1 << ALIGN_BITS) |
234 | 1.18M | #define ALIGN_MASK (ALIGN_SIZE - 1) |
235 | | |
236 | | // LZMA remembers the four most recent match distances. Reusing these distances |
237 | | // tends to take less space than re-encoding the actual distance value. |
238 | 8.63M | #define REPS 4 |
239 | | |
240 | | #endif |