/src/xz/src/liblzma/common/index_hash.c
Line | Count | Source (jump to first uncovered line) |
1 | | /////////////////////////////////////////////////////////////////////////////// |
2 | | // |
3 | | /// \file index_hash.c |
4 | | /// \brief Validates Index by using a hash function |
5 | | // |
6 | | // Author: Lasse Collin |
7 | | // |
8 | | // This file has been put into the public domain. |
9 | | // You can do whatever you want with this file. |
10 | | // |
11 | | /////////////////////////////////////////////////////////////////////////////// |
12 | | |
13 | | #include "common.h" |
14 | | #include "index.h" |
15 | | #include "check.h" |
16 | | |
17 | | |
18 | | typedef struct { |
19 | | /// Sum of the Block sizes (including Block Padding) |
20 | | lzma_vli blocks_size; |
21 | | |
22 | | /// Sum of the Uncompressed Size fields |
23 | | lzma_vli uncompressed_size; |
24 | | |
25 | | /// Number of Records |
26 | | lzma_vli count; |
27 | | |
28 | | /// Size of the List of Index Records as bytes |
29 | | lzma_vli index_list_size; |
30 | | |
31 | | /// Check calculated from Unpadded Sizes and Uncompressed Sizes. |
32 | | lzma_check_state check; |
33 | | |
34 | | } lzma_index_hash_info; |
35 | | |
36 | | |
37 | | struct lzma_index_hash_s { |
38 | | enum { |
39 | | SEQ_BLOCK, |
40 | | SEQ_COUNT, |
41 | | SEQ_UNPADDED, |
42 | | SEQ_UNCOMPRESSED, |
43 | | SEQ_PADDING_INIT, |
44 | | SEQ_PADDING, |
45 | | SEQ_CRC32, |
46 | | } sequence; |
47 | | |
48 | | /// Information collected while decoding the actual Blocks. |
49 | | lzma_index_hash_info blocks; |
50 | | |
51 | | /// Information collected from the Index field. |
52 | | lzma_index_hash_info records; |
53 | | |
54 | | /// Number of Records not fully decoded |
55 | | lzma_vli remaining; |
56 | | |
57 | | /// Unpadded Size currently being read from an Index Record. |
58 | | lzma_vli unpadded_size; |
59 | | |
60 | | /// Uncompressed Size currently being read from an Index Record. |
61 | | lzma_vli uncompressed_size; |
62 | | |
63 | | /// Position in variable-length integers when decoding them from |
64 | | /// the List of Records. |
65 | | size_t pos; |
66 | | |
67 | | /// CRC32 of the Index |
68 | | uint32_t crc32; |
69 | | }; |
70 | | |
71 | | |
72 | | extern LZMA_API(lzma_index_hash *) |
73 | | lzma_index_hash_init(lzma_index_hash *index_hash, |
74 | | const lzma_allocator *allocator) |
75 | 59.1k | { |
76 | 59.1k | if (index_hash == NULL) { |
77 | 10.3k | index_hash = lzma_alloc(sizeof(lzma_index_hash), allocator); |
78 | 10.3k | if (index_hash == NULL) |
79 | 0 | return NULL; |
80 | 10.3k | } |
81 | | |
82 | 59.1k | index_hash->sequence = SEQ_BLOCK; |
83 | 59.1k | index_hash->blocks.blocks_size = 0; |
84 | 59.1k | index_hash->blocks.uncompressed_size = 0; |
85 | 59.1k | index_hash->blocks.count = 0; |
86 | 59.1k | index_hash->blocks.index_list_size = 0; |
87 | 59.1k | index_hash->records.blocks_size = 0; |
88 | 59.1k | index_hash->records.uncompressed_size = 0; |
89 | 59.1k | index_hash->records.count = 0; |
90 | 59.1k | index_hash->records.index_list_size = 0; |
91 | 59.1k | index_hash->unpadded_size = 0; |
92 | 59.1k | index_hash->uncompressed_size = 0; |
93 | 59.1k | index_hash->pos = 0; |
94 | 59.1k | index_hash->crc32 = 0; |
95 | | |
96 | | // These cannot fail because LZMA_CHECK_BEST is known to be supported. |
97 | 59.1k | (void)lzma_check_init(&index_hash->blocks.check, LZMA_CHECK_BEST); |
98 | 59.1k | (void)lzma_check_init(&index_hash->records.check, LZMA_CHECK_BEST); |
99 | | |
100 | 59.1k | return index_hash; |
101 | 59.1k | } |
102 | | |
103 | | |
104 | | extern LZMA_API(void) |
105 | | lzma_index_hash_end(lzma_index_hash *index_hash, |
106 | | const lzma_allocator *allocator) |
107 | 10.3k | { |
108 | 10.3k | lzma_free(index_hash, allocator); |
109 | 10.3k | return; |
110 | 10.3k | } |
111 | | |
112 | | |
113 | | extern LZMA_API(lzma_vli) |
114 | | lzma_index_hash_size(const lzma_index_hash *index_hash) |
115 | 48.9k | { |
116 | | // Get the size of the Index from ->blocks instead of ->records for |
117 | | // cases where application wants to know the Index Size before |
118 | | // decoding the Index. |
119 | 48.9k | return index_size(index_hash->blocks.count, |
120 | 48.9k | index_hash->blocks.index_list_size); |
121 | 48.9k | } |
122 | | |
123 | | |
124 | | /// Updates the sizes and the hash without any validation. |
125 | | static void |
126 | | hash_append(lzma_index_hash_info *info, lzma_vli unpadded_size, |
127 | | lzma_vli uncompressed_size) |
128 | 273k | { |
129 | 273k | info->blocks_size += vli_ceil4(unpadded_size); |
130 | 273k | info->uncompressed_size += uncompressed_size; |
131 | 273k | info->index_list_size += lzma_vli_size(unpadded_size) |
132 | 273k | + lzma_vli_size(uncompressed_size); |
133 | 273k | ++info->count; |
134 | | |
135 | 273k | const lzma_vli sizes[2] = { unpadded_size, uncompressed_size }; |
136 | 273k | lzma_check_update(&info->check, LZMA_CHECK_BEST, |
137 | 273k | (const uint8_t *)(sizes), sizeof(sizes)); |
138 | | |
139 | 273k | return; |
140 | 273k | } |
141 | | |
142 | | |
143 | | extern LZMA_API(lzma_ret) |
144 | | lzma_index_hash_append(lzma_index_hash *index_hash, lzma_vli unpadded_size, |
145 | | lzma_vli uncompressed_size) |
146 | 272k | { |
147 | | // Validate the arguments. |
148 | 272k | if (index_hash == NULL || index_hash->sequence != SEQ_BLOCK |
149 | 272k | || unpadded_size < UNPADDED_SIZE_MIN |
150 | 272k | || unpadded_size > UNPADDED_SIZE_MAX |
151 | 272k | || uncompressed_size > LZMA_VLI_MAX) |
152 | 0 | return LZMA_PROG_ERROR; |
153 | | |
154 | | // Update the hash. |
155 | 272k | hash_append(&index_hash->blocks, unpadded_size, uncompressed_size); |
156 | | |
157 | | // Validate the properties of *info are still in allowed limits. |
158 | 272k | if (index_hash->blocks.blocks_size > LZMA_VLI_MAX |
159 | 272k | || index_hash->blocks.uncompressed_size > LZMA_VLI_MAX |
160 | 272k | || index_size(index_hash->blocks.count, |
161 | 272k | index_hash->blocks.index_list_size) |
162 | 272k | > LZMA_BACKWARD_SIZE_MAX |
163 | 272k | || index_stream_size(index_hash->blocks.blocks_size, |
164 | 272k | index_hash->blocks.count, |
165 | 272k | index_hash->blocks.index_list_size) |
166 | 272k | > LZMA_VLI_MAX) |
167 | 0 | return LZMA_DATA_ERROR; |
168 | | |
169 | 272k | return LZMA_OK; |
170 | 272k | } |
171 | | |
172 | | |
173 | | extern LZMA_API(lzma_ret) |
174 | | lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in, |
175 | | size_t *in_pos, size_t in_size) |
176 | 49.5k | { |
177 | | // Catch zero input buffer here, because in contrast to Index encoder |
178 | | // and decoder functions, applications call this function directly |
179 | | // instead of via lzma_code(), which does the buffer checking. |
180 | 49.5k | if (*in_pos >= in_size) |
181 | 0 | return LZMA_BUF_ERROR; |
182 | | |
183 | | // NOTE: This function has many similarities to index_encode() and |
184 | | // index_decode() functions found from index_encoder.c and |
185 | | // index_decoder.c. See the comments especially in index_encoder.c. |
186 | 49.5k | const size_t in_start = *in_pos; |
187 | 49.5k | lzma_ret ret = LZMA_OK; |
188 | | |
189 | 247k | while (*in_pos < in_size) |
190 | 247k | switch (index_hash->sequence) { |
191 | 49.5k | case SEQ_BLOCK: |
192 | | // Check the Index Indicator is present. |
193 | 49.5k | if (in[(*in_pos)++] != INDEX_INDICATOR) |
194 | 0 | return LZMA_DATA_ERROR; |
195 | | |
196 | 49.5k | index_hash->sequence = SEQ_COUNT; |
197 | 49.5k | break; |
198 | | |
199 | 49.5k | case SEQ_COUNT: { |
200 | 49.5k | ret = lzma_vli_decode(&index_hash->remaining, |
201 | 49.5k | &index_hash->pos, in, in_pos, in_size); |
202 | 49.5k | if (ret != LZMA_STREAM_END) |
203 | 16 | goto out; |
204 | | |
205 | | // The count must match the count of the Blocks decoded. |
206 | 49.5k | if (index_hash->remaining != index_hash->blocks.count) |
207 | 120 | return LZMA_DATA_ERROR; |
208 | | |
209 | 49.3k | ret = LZMA_OK; |
210 | 49.3k | index_hash->pos = 0; |
211 | | |
212 | | // Handle the special case when there are no Blocks. |
213 | 49.3k | index_hash->sequence = index_hash->remaining == 0 |
214 | 49.3k | ? SEQ_PADDING_INIT : SEQ_UNPADDED; |
215 | 49.3k | break; |
216 | 49.5k | } |
217 | | |
218 | 1.28k | case SEQ_UNPADDED: |
219 | 2.42k | case SEQ_UNCOMPRESSED: { |
220 | 2.42k | lzma_vli *size = index_hash->sequence == SEQ_UNPADDED |
221 | 2.42k | ? &index_hash->unpadded_size |
222 | 2.42k | : &index_hash->uncompressed_size; |
223 | | |
224 | 2.42k | ret = lzma_vli_decode(size, &index_hash->pos, |
225 | 2.42k | in, in_pos, in_size); |
226 | 2.42k | if (ret != LZMA_STREAM_END) |
227 | 13 | goto out; |
228 | | |
229 | 2.41k | ret = LZMA_OK; |
230 | 2.41k | index_hash->pos = 0; |
231 | | |
232 | 2.41k | if (index_hash->sequence == SEQ_UNPADDED) { |
233 | 1.28k | if (index_hash->unpadded_size < UNPADDED_SIZE_MIN |
234 | 1.28k | || index_hash->unpadded_size |
235 | 1.27k | > UNPADDED_SIZE_MAX) |
236 | 7 | return LZMA_DATA_ERROR; |
237 | | |
238 | 1.27k | index_hash->sequence = SEQ_UNCOMPRESSED; |
239 | 1.27k | } else { |
240 | | // Update the hash. |
241 | 1.12k | hash_append(&index_hash->records, |
242 | 1.12k | index_hash->unpadded_size, |
243 | 1.12k | index_hash->uncompressed_size); |
244 | | |
245 | | // Verify that we don't go over the known sizes. Note |
246 | | // that this validation is simpler than the one used |
247 | | // in lzma_index_hash_append(), because here we know |
248 | | // that values in index_hash->blocks are already |
249 | | // validated and we are fine as long as we don't |
250 | | // exceed them in index_hash->records. |
251 | 1.12k | if (index_hash->blocks.blocks_size |
252 | 1.12k | < index_hash->records.blocks_size |
253 | 1.12k | || index_hash->blocks.uncompressed_size |
254 | 1.09k | < index_hash->records.uncompressed_size |
255 | 1.12k | || index_hash->blocks.index_list_size |
256 | 1.03k | < index_hash->records.index_list_size) |
257 | 99 | return LZMA_DATA_ERROR; |
258 | | |
259 | | // Check if this was the last Record. |
260 | 1.02k | index_hash->sequence = --index_hash->remaining == 0 |
261 | 1.02k | ? SEQ_PADDING_INIT : SEQ_UNPADDED; |
262 | 1.02k | } |
263 | | |
264 | 2.30k | break; |
265 | 2.41k | } |
266 | | |
267 | 49.1k | case SEQ_PADDING_INIT: |
268 | 49.1k | index_hash->pos = (LZMA_VLI_C(4) - index_size_unpadded( |
269 | 49.1k | index_hash->records.count, |
270 | 49.1k | index_hash->records.index_list_size)) & 3; |
271 | 49.1k | index_hash->sequence = SEQ_PADDING; |
272 | | |
273 | | // Fall through |
274 | | |
275 | 146k | case SEQ_PADDING: |
276 | 146k | if (index_hash->pos > 0) { |
277 | 97.1k | --index_hash->pos; |
278 | 97.1k | if (in[(*in_pos)++] != 0x00) |
279 | 11 | return LZMA_DATA_ERROR; |
280 | | |
281 | 97.1k | break; |
282 | 97.1k | } |
283 | | |
284 | | // Compare the sizes. |
285 | 49.0k | if (index_hash->blocks.blocks_size |
286 | 49.0k | != index_hash->records.blocks_size |
287 | 49.0k | || index_hash->blocks.uncompressed_size |
288 | 49.0k | != index_hash->records.uncompressed_size |
289 | 49.0k | || index_hash->blocks.index_list_size |
290 | 49.0k | != index_hash->records.index_list_size) |
291 | 29 | return LZMA_DATA_ERROR; |
292 | | |
293 | | // Finish the hashes and compare them. |
294 | 49.0k | lzma_check_finish(&index_hash->blocks.check, LZMA_CHECK_BEST); |
295 | 49.0k | lzma_check_finish(&index_hash->records.check, LZMA_CHECK_BEST); |
296 | 49.0k | if (memcmp(index_hash->blocks.check.buffer.u8, |
297 | 49.0k | index_hash->records.check.buffer.u8, |
298 | 49.0k | lzma_check_size(LZMA_CHECK_BEST)) != 0) |
299 | 19 | return LZMA_DATA_ERROR; |
300 | | |
301 | | // Finish the CRC32 calculation. |
302 | 49.0k | index_hash->crc32 = lzma_crc32(in + in_start, |
303 | 49.0k | *in_pos - in_start, index_hash->crc32); |
304 | | |
305 | 49.0k | index_hash->sequence = SEQ_CRC32; |
306 | | |
307 | | // Fall through |
308 | | |
309 | 49.0k | case SEQ_CRC32: |
310 | 196k | do { |
311 | 196k | if (*in_pos == in_size) |
312 | 23 | return LZMA_OK; |
313 | | |
314 | 196k | if (((index_hash->crc32 >> (index_hash->pos * 8)) |
315 | 196k | & 0xFF) != in[(*in_pos)++]) { |
316 | | #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION |
317 | | return LZMA_DATA_ERROR; |
318 | | #endif |
319 | 7.28k | } |
320 | | |
321 | 196k | } while (++index_hash->pos < 4); |
322 | | |
323 | 49.0k | return LZMA_STREAM_END; |
324 | | |
325 | 0 | default: |
326 | 0 | assert(0); |
327 | 0 | return LZMA_PROG_ERROR; |
328 | 247k | } |
329 | | |
330 | 208 | out: |
331 | | // Update the CRC32. |
332 | | // |
333 | | // Avoid null pointer + 0 (undefined behavior) in "in + in_start". |
334 | | // In such a case we had no input and thus in_used == 0. |
335 | 208 | { |
336 | 208 | const size_t in_used = *in_pos - in_start; |
337 | 208 | if (in_used > 0) |
338 | 208 | index_hash->crc32 = lzma_crc32(in + in_start, |
339 | 208 | in_used, index_hash->crc32); |
340 | 208 | } |
341 | | |
342 | 208 | return ret; |
343 | 49.5k | } |