Coverage Report

Created: 2023-09-25 06:56

/src/xz/src/liblzma/common/index_hash.c
Line
Count
Source (jump to first uncovered line)
1
///////////////////////////////////////////////////////////////////////////////
2
//
3
/// \file       index_hash.c
4
/// \brief      Validates Index by using a hash function
5
//
6
//  Author:     Lasse Collin
7
//
8
//  This file has been put into the public domain.
9
//  You can do whatever you want with this file.
10
//
11
///////////////////////////////////////////////////////////////////////////////
12
13
#include "common.h"
14
#include "index.h"
15
#include "check.h"
16
17
18
typedef struct {
19
  /// Sum of the Block sizes (including Block Padding)
20
  lzma_vli blocks_size;
21
22
  /// Sum of the Uncompressed Size fields
23
  lzma_vli uncompressed_size;
24
25
  /// Number of Records
26
  lzma_vli count;
27
28
  /// Size of the List of Index Records as bytes
29
  lzma_vli index_list_size;
30
31
  /// Check calculated from Unpadded Sizes and Uncompressed Sizes.
32
  lzma_check_state check;
33
34
} lzma_index_hash_info;
35
36
37
struct lzma_index_hash_s {
38
  enum {
39
    SEQ_BLOCK,
40
    SEQ_COUNT,
41
    SEQ_UNPADDED,
42
    SEQ_UNCOMPRESSED,
43
    SEQ_PADDING_INIT,
44
    SEQ_PADDING,
45
    SEQ_CRC32,
46
  } sequence;
47
48
  /// Information collected while decoding the actual Blocks.
49
  lzma_index_hash_info blocks;
50
51
  /// Information collected from the Index field.
52
  lzma_index_hash_info records;
53
54
  /// Number of Records not fully decoded
55
  lzma_vli remaining;
56
57
  /// Unpadded Size currently being read from an Index Record.
58
  lzma_vli unpadded_size;
59
60
  /// Uncompressed Size currently being read from an Index Record.
61
  lzma_vli uncompressed_size;
62
63
  /// Position in variable-length integers when decoding them from
64
  /// the List of Records.
65
  size_t pos;
66
67
  /// CRC32 of the Index
68
  uint32_t crc32;
69
};
70
71
72
extern LZMA_API(lzma_index_hash *)
73
lzma_index_hash_init(lzma_index_hash *index_hash,
74
    const lzma_allocator *allocator)
75
59.1k
{
76
59.1k
  if (index_hash == NULL) {
77
10.3k
    index_hash = lzma_alloc(sizeof(lzma_index_hash), allocator);
78
10.3k
    if (index_hash == NULL)
79
0
      return NULL;
80
10.3k
  }
81
82
59.1k
  index_hash->sequence = SEQ_BLOCK;
83
59.1k
  index_hash->blocks.blocks_size = 0;
84
59.1k
  index_hash->blocks.uncompressed_size = 0;
85
59.1k
  index_hash->blocks.count = 0;
86
59.1k
  index_hash->blocks.index_list_size = 0;
87
59.1k
  index_hash->records.blocks_size = 0;
88
59.1k
  index_hash->records.uncompressed_size = 0;
89
59.1k
  index_hash->records.count = 0;
90
59.1k
  index_hash->records.index_list_size = 0;
91
59.1k
  index_hash->unpadded_size = 0;
92
59.1k
  index_hash->uncompressed_size = 0;
93
59.1k
  index_hash->pos = 0;
94
59.1k
  index_hash->crc32 = 0;
95
96
  // These cannot fail because LZMA_CHECK_BEST is known to be supported.
97
59.1k
  (void)lzma_check_init(&index_hash->blocks.check, LZMA_CHECK_BEST);
98
59.1k
  (void)lzma_check_init(&index_hash->records.check, LZMA_CHECK_BEST);
99
100
59.1k
  return index_hash;
101
59.1k
}
102
103
104
extern LZMA_API(void)
105
lzma_index_hash_end(lzma_index_hash *index_hash,
106
    const lzma_allocator *allocator)
107
10.3k
{
108
10.3k
  lzma_free(index_hash, allocator);
109
10.3k
  return;
110
10.3k
}
111
112
113
extern LZMA_API(lzma_vli)
114
lzma_index_hash_size(const lzma_index_hash *index_hash)
115
48.9k
{
116
  // Get the size of the Index from ->blocks instead of ->records for
117
  // cases where application wants to know the Index Size before
118
  // decoding the Index.
119
48.9k
  return index_size(index_hash->blocks.count,
120
48.9k
      index_hash->blocks.index_list_size);
121
48.9k
}
122
123
124
/// Updates the sizes and the hash without any validation.
125
static void
126
hash_append(lzma_index_hash_info *info, lzma_vli unpadded_size,
127
    lzma_vli uncompressed_size)
128
273k
{
129
273k
  info->blocks_size += vli_ceil4(unpadded_size);
130
273k
  info->uncompressed_size += uncompressed_size;
131
273k
  info->index_list_size += lzma_vli_size(unpadded_size)
132
273k
      + lzma_vli_size(uncompressed_size);
133
273k
  ++info->count;
134
135
273k
  const lzma_vli sizes[2] = { unpadded_size, uncompressed_size };
136
273k
  lzma_check_update(&info->check, LZMA_CHECK_BEST,
137
273k
      (const uint8_t *)(sizes), sizeof(sizes));
138
139
273k
  return;
140
273k
}
141
142
143
extern LZMA_API(lzma_ret)
144
lzma_index_hash_append(lzma_index_hash *index_hash, lzma_vli unpadded_size,
145
    lzma_vli uncompressed_size)
146
272k
{
147
  // Validate the arguments.
148
272k
  if (index_hash == NULL || index_hash->sequence != SEQ_BLOCK
149
272k
      || unpadded_size < UNPADDED_SIZE_MIN
150
272k
      || unpadded_size > UNPADDED_SIZE_MAX
151
272k
      || uncompressed_size > LZMA_VLI_MAX)
152
0
    return LZMA_PROG_ERROR;
153
154
  // Update the hash.
155
272k
  hash_append(&index_hash->blocks, unpadded_size, uncompressed_size);
156
157
  // Validate the properties of *info are still in allowed limits.
158
272k
  if (index_hash->blocks.blocks_size > LZMA_VLI_MAX
159
272k
      || index_hash->blocks.uncompressed_size > LZMA_VLI_MAX
160
272k
      || index_size(index_hash->blocks.count,
161
272k
          index_hash->blocks.index_list_size)
162
272k
        > LZMA_BACKWARD_SIZE_MAX
163
272k
      || index_stream_size(index_hash->blocks.blocks_size,
164
272k
          index_hash->blocks.count,
165
272k
          index_hash->blocks.index_list_size)
166
272k
        > LZMA_VLI_MAX)
167
0
    return LZMA_DATA_ERROR;
168
169
272k
  return LZMA_OK;
170
272k
}
171
172
173
extern LZMA_API(lzma_ret)
174
lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in,
175
    size_t *in_pos, size_t in_size)
176
49.5k
{
177
  // Catch zero input buffer here, because in contrast to Index encoder
178
  // and decoder functions, applications call this function directly
179
  // instead of via lzma_code(), which does the buffer checking.
180
49.5k
  if (*in_pos >= in_size)
181
0
    return LZMA_BUF_ERROR;
182
183
  // NOTE: This function has many similarities to index_encode() and
184
  // index_decode() functions found from index_encoder.c and
185
  // index_decoder.c. See the comments especially in index_encoder.c.
186
49.5k
  const size_t in_start = *in_pos;
187
49.5k
  lzma_ret ret = LZMA_OK;
188
189
247k
  while (*in_pos < in_size)
190
247k
  switch (index_hash->sequence) {
191
49.5k
  case SEQ_BLOCK:
192
    // Check the Index Indicator is present.
193
49.5k
    if (in[(*in_pos)++] != INDEX_INDICATOR)
194
0
      return LZMA_DATA_ERROR;
195
196
49.5k
    index_hash->sequence = SEQ_COUNT;
197
49.5k
    break;
198
199
49.5k
  case SEQ_COUNT: {
200
49.5k
    ret = lzma_vli_decode(&index_hash->remaining,
201
49.5k
        &index_hash->pos, in, in_pos, in_size);
202
49.5k
    if (ret != LZMA_STREAM_END)
203
16
      goto out;
204
205
    // The count must match the count of the Blocks decoded.
206
49.5k
    if (index_hash->remaining != index_hash->blocks.count)
207
120
      return LZMA_DATA_ERROR;
208
209
49.3k
    ret = LZMA_OK;
210
49.3k
    index_hash->pos = 0;
211
212
    // Handle the special case when there are no Blocks.
213
49.3k
    index_hash->sequence = index_hash->remaining == 0
214
49.3k
        ? SEQ_PADDING_INIT : SEQ_UNPADDED;
215
49.3k
    break;
216
49.5k
  }
217
218
1.28k
  case SEQ_UNPADDED:
219
2.42k
  case SEQ_UNCOMPRESSED: {
220
2.42k
    lzma_vli *size = index_hash->sequence == SEQ_UNPADDED
221
2.42k
        ? &index_hash->unpadded_size
222
2.42k
        : &index_hash->uncompressed_size;
223
224
2.42k
    ret = lzma_vli_decode(size, &index_hash->pos,
225
2.42k
        in, in_pos, in_size);
226
2.42k
    if (ret != LZMA_STREAM_END)
227
13
      goto out;
228
229
2.41k
    ret = LZMA_OK;
230
2.41k
    index_hash->pos = 0;
231
232
2.41k
    if (index_hash->sequence == SEQ_UNPADDED) {
233
1.28k
      if (index_hash->unpadded_size < UNPADDED_SIZE_MIN
234
1.28k
          || index_hash->unpadded_size
235
1.27k
            > UNPADDED_SIZE_MAX)
236
7
        return LZMA_DATA_ERROR;
237
238
1.27k
      index_hash->sequence = SEQ_UNCOMPRESSED;
239
1.27k
    } else {
240
      // Update the hash.
241
1.12k
      hash_append(&index_hash->records,
242
1.12k
          index_hash->unpadded_size,
243
1.12k
          index_hash->uncompressed_size);
244
245
      // Verify that we don't go over the known sizes. Note
246
      // that this validation is simpler than the one used
247
      // in lzma_index_hash_append(), because here we know
248
      // that values in index_hash->blocks are already
249
      // validated and we are fine as long as we don't
250
      // exceed them in index_hash->records.
251
1.12k
      if (index_hash->blocks.blocks_size
252
1.12k
          < index_hash->records.blocks_size
253
1.12k
          || index_hash->blocks.uncompressed_size
254
1.09k
          < index_hash->records.uncompressed_size
255
1.12k
          || index_hash->blocks.index_list_size
256
1.03k
          < index_hash->records.index_list_size)
257
99
        return LZMA_DATA_ERROR;
258
259
      // Check if this was the last Record.
260
1.02k
      index_hash->sequence = --index_hash->remaining == 0
261
1.02k
          ? SEQ_PADDING_INIT : SEQ_UNPADDED;
262
1.02k
    }
263
264
2.30k
    break;
265
2.41k
  }
266
267
49.1k
  case SEQ_PADDING_INIT:
268
49.1k
    index_hash->pos = (LZMA_VLI_C(4) - index_size_unpadded(
269
49.1k
        index_hash->records.count,
270
49.1k
        index_hash->records.index_list_size)) & 3;
271
49.1k
    index_hash->sequence = SEQ_PADDING;
272
273
  // Fall through
274
275
146k
  case SEQ_PADDING:
276
146k
    if (index_hash->pos > 0) {
277
97.1k
      --index_hash->pos;
278
97.1k
      if (in[(*in_pos)++] != 0x00)
279
11
        return LZMA_DATA_ERROR;
280
281
97.1k
      break;
282
97.1k
    }
283
284
    // Compare the sizes.
285
49.0k
    if (index_hash->blocks.blocks_size
286
49.0k
        != index_hash->records.blocks_size
287
49.0k
        || index_hash->blocks.uncompressed_size
288
49.0k
        != index_hash->records.uncompressed_size
289
49.0k
        || index_hash->blocks.index_list_size
290
49.0k
        != index_hash->records.index_list_size)
291
29
      return LZMA_DATA_ERROR;
292
293
    // Finish the hashes and compare them.
294
49.0k
    lzma_check_finish(&index_hash->blocks.check, LZMA_CHECK_BEST);
295
49.0k
    lzma_check_finish(&index_hash->records.check, LZMA_CHECK_BEST);
296
49.0k
    if (memcmp(index_hash->blocks.check.buffer.u8,
297
49.0k
        index_hash->records.check.buffer.u8,
298
49.0k
        lzma_check_size(LZMA_CHECK_BEST)) != 0)
299
19
      return LZMA_DATA_ERROR;
300
301
    // Finish the CRC32 calculation.
302
49.0k
    index_hash->crc32 = lzma_crc32(in + in_start,
303
49.0k
        *in_pos - in_start, index_hash->crc32);
304
305
49.0k
    index_hash->sequence = SEQ_CRC32;
306
307
  // Fall through
308
309
49.0k
  case SEQ_CRC32:
310
196k
    do {
311
196k
      if (*in_pos == in_size)
312
23
        return LZMA_OK;
313
314
196k
      if (((index_hash->crc32 >> (index_hash->pos * 8))
315
196k
          & 0xFF) != in[(*in_pos)++]) {
316
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
317
        return LZMA_DATA_ERROR;
318
#endif
319
7.28k
      }
320
321
196k
    } while (++index_hash->pos < 4);
322
323
49.0k
    return LZMA_STREAM_END;
324
325
0
  default:
326
0
    assert(0);
327
0
    return LZMA_PROG_ERROR;
328
247k
  }
329
330
208
out:
331
  // Update the CRC32.
332
  //
333
  // Avoid null pointer + 0 (undefined behavior) in "in + in_start".
334
  // In such a case we had no input and thus in_used == 0.
335
208
  {
336
208
    const size_t in_used = *in_pos - in_start;
337
208
    if (in_used > 0)
338
208
      index_hash->crc32 = lzma_crc32(in + in_start,
339
208
          in_used, index_hash->crc32);
340
208
  }
341
342
208
  return ret;
343
49.5k
}