Coverage Report

Created: 2025-12-31 07:01

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/git/reftable/block.c
Line
Count
Source
1
/*
2
 * Copyright 2020 Google LLC
3
 *
4
 * Use of this source code is governed by a BSD-style
5
 * license that can be found in the LICENSE file or at
6
 * https://developers.google.com/open-source/licenses/bsd
7
 */
8
9
#include "block.h"
10
11
#include "blocksource.h"
12
#include "constants.h"
13
#include "iter.h"
14
#include "record.h"
15
#include "reftable-error.h"
16
#include "system.h"
17
18
size_t header_size(int version)
19
0
{
20
0
  switch (version) {
21
0
  case 1:
22
0
    return 24;
23
0
  case 2:
24
0
    return 28;
25
0
  }
26
0
  abort();
27
0
}
28
29
size_t footer_size(int version)
30
0
{
31
0
  switch (version) {
32
0
  case 1:
33
0
    return 68;
34
0
  case 2:
35
0
    return 72;
36
0
  }
37
0
  abort();
38
0
}
39
40
static int block_writer_register_restart(struct block_writer *w, int n,
41
           int is_restart, struct reftable_buf *key)
42
0
{
43
0
  uint32_t rlen;
44
0
  int err;
45
46
0
  rlen = w->restart_len;
47
0
  if (rlen >= MAX_RESTARTS)
48
0
    is_restart = 0;
49
50
0
  if (is_restart)
51
0
    rlen++;
52
0
  if (2 + 3 * rlen + n > w->block_size - w->next)
53
0
    return REFTABLE_ENTRY_TOO_BIG_ERROR;
54
0
  if (is_restart) {
55
0
    REFTABLE_ALLOC_GROW_OR_NULL(w->restarts, w->restart_len + 1,
56
0
              w->restart_cap);
57
0
    if (!w->restarts)
58
0
      return REFTABLE_OUT_OF_MEMORY_ERROR;
59
0
    w->restarts[w->restart_len++] = w->next;
60
0
  }
61
62
0
  w->next += n;
63
64
0
  reftable_buf_reset(&w->last_key);
65
0
  err = reftable_buf_add(&w->last_key, key->buf, key->len);
66
0
  if (err < 0)
67
0
    return err;
68
69
0
  w->entries++;
70
0
  return 0;
71
0
}
72
73
int block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *block,
74
          uint32_t block_size, uint32_t header_off, uint32_t hash_size)
75
0
{
76
0
  bw->block = block;
77
0
  bw->hash_size = hash_size;
78
0
  bw->block_size = block_size;
79
0
  bw->header_off = header_off;
80
0
  bw->block[header_off] = typ;
81
0
  bw->next = header_off + 4;
82
0
  bw->restart_interval = 16;
83
0
  bw->entries = 0;
84
0
  bw->restart_len = 0;
85
0
  bw->last_key.len = 0;
86
0
  if (!bw->zstream) {
87
0
    REFTABLE_CALLOC_ARRAY(bw->zstream, 1);
88
0
    if (!bw->zstream)
89
0
      return REFTABLE_OUT_OF_MEMORY_ERROR;
90
0
    deflateInit(bw->zstream, 9);
91
0
  }
92
93
0
  return 0;
94
0
}
95
96
uint8_t block_writer_type(struct block_writer *bw)
97
0
{
98
0
  return bw->block[bw->header_off];
99
0
}
100
101
/*
102
 * Adds the reftable_record to the block. Returns 0 on success and
103
 * appropriate error codes on failure.
104
 */
105
int block_writer_add(struct block_writer *w, struct reftable_record *rec)
106
0
{
107
0
  struct reftable_buf empty = REFTABLE_BUF_INIT;
108
0
  struct reftable_buf last =
109
0
    w->entries % w->restart_interval == 0 ? empty : w->last_key;
110
0
  struct string_view out = {
111
0
    .buf = w->block + w->next,
112
0
    .len = w->block_size - w->next,
113
0
  };
114
0
  struct string_view start = out;
115
0
  int is_restart = 0;
116
0
  int n = 0;
117
0
  int err;
118
119
0
  err = reftable_record_key(rec, &w->scratch);
120
0
  if (err < 0)
121
0
    goto done;
122
123
0
  if (!w->scratch.len) {
124
0
    err = REFTABLE_API_ERROR;
125
0
    goto done;
126
0
  }
127
128
0
  n = reftable_encode_key(&is_restart, out, last, w->scratch,
129
0
        reftable_record_val_type(rec));
130
0
  if (n < 0) {
131
0
    err = n;
132
0
    goto done;
133
0
  }
134
0
  string_view_consume(&out, n);
135
136
0
  n = reftable_record_encode(rec, out, w->hash_size);
137
0
  if (n < 0) {
138
0
    err = n;
139
0
    goto done;
140
0
  }
141
0
  string_view_consume(&out, n);
142
143
0
  err = block_writer_register_restart(w, start.len - out.len, is_restart,
144
0
              &w->scratch);
145
0
done:
146
0
  return err;
147
0
}
148
149
int block_writer_finish(struct block_writer *w)
150
0
{
151
0
  for (uint32_t i = 0; i < w->restart_len; i++) {
152
0
    reftable_put_be24(w->block + w->next, w->restarts[i]);
153
0
    w->next += 3;
154
0
  }
155
156
0
  reftable_put_be16(w->block + w->next, w->restart_len);
157
0
  w->next += 2;
158
0
  reftable_put_be24(w->block + 1 + w->header_off, w->next);
159
160
  /*
161
   * Log records are stored zlib-compressed. Note that the compression
162
   * also spans over the restart points we have just written.
163
   */
164
0
  if (block_writer_type(w) == REFTABLE_BLOCK_TYPE_LOG) {
165
0
    int block_header_skip = 4 + w->header_off;
166
0
    uLongf src_len = w->next - block_header_skip, compressed_len;
167
0
    int ret;
168
169
0
    ret = deflateReset(w->zstream);
170
0
    if (ret != Z_OK)
171
0
      return REFTABLE_ZLIB_ERROR;
172
173
    /*
174
     * Precompute the upper bound of how many bytes the compressed
175
     * data may end up with. Combined with `Z_FINISH`, `deflate()`
176
     * is guaranteed to return `Z_STREAM_END`.
177
     */
178
0
    compressed_len = deflateBound(w->zstream, src_len);
179
0
    REFTABLE_ALLOC_GROW_OR_NULL(w->compressed, compressed_len,
180
0
              w->compressed_cap);
181
0
    if (!w->compressed) {
182
0
      ret = REFTABLE_OUT_OF_MEMORY_ERROR;
183
0
      return ret;
184
0
    }
185
186
0
    w->zstream->next_out = w->compressed;
187
0
    w->zstream->avail_out = compressed_len;
188
0
    w->zstream->next_in = w->block + block_header_skip;
189
0
    w->zstream->avail_in = src_len;
190
191
    /*
192
     * We want to perform all decompression in a single step, which
193
     * is why we can pass Z_FINISH here. As we have precomputed the
194
     * deflated buffer's size via `deflateBound()` this function is
195
     * guaranteed to succeed according to the zlib documentation.
196
     */
197
0
    ret = deflate(w->zstream, Z_FINISH);
198
0
    if (ret != Z_STREAM_END)
199
0
      return REFTABLE_ZLIB_ERROR;
200
201
    /*
202
     * Overwrite the uncompressed data we have already written and
203
     * adjust the `next` pointer to point right after the
204
     * compressed data.
205
     */
206
0
    memcpy(w->block + block_header_skip, w->compressed,
207
0
           w->zstream->total_out);
208
0
    w->next = w->zstream->total_out + block_header_skip;
209
0
  }
210
211
0
  return w->next;
212
0
}
213
214
static int read_block(struct reftable_block_source *source,
215
          struct reftable_block_data *dest, uint64_t off,
216
          uint32_t sz)
217
0
{
218
0
  size_t size = block_source_size(source);
219
0
  block_source_release_data(dest);
220
0
  if (off >= size)
221
0
    return 0;
222
0
  if (off + sz > size)
223
0
    sz = size - off;
224
0
  return block_source_read_data(source, dest, off, sz);
225
0
}
226
227
int reftable_block_init(struct reftable_block *block,
228
      struct reftable_block_source *source,
229
      uint32_t offset, uint32_t header_size,
230
      uint32_t table_block_size, uint32_t hash_size,
231
      uint8_t want_type)
232
0
{
233
0
  uint32_t guess_block_size = table_block_size ?
234
0
    table_block_size : DEFAULT_BLOCK_SIZE;
235
0
  uint32_t full_block_size = table_block_size;
236
0
  uint16_t restart_count;
237
0
  uint32_t restart_off;
238
0
  uint32_t block_size;
239
0
  uint8_t block_type;
240
0
  int err;
241
242
0
  err = read_block(source, &block->block_data, offset, guess_block_size);
243
0
  if (err < 0)
244
0
    goto done;
245
246
0
  block_type = block->block_data.data[header_size];
247
0
  if (!reftable_is_block_type(block_type)) {
248
0
    err = REFTABLE_FORMAT_ERROR;
249
0
    goto done;
250
0
  }
251
0
  if (want_type != REFTABLE_BLOCK_TYPE_ANY && block_type != want_type) {
252
0
    err = 1;
253
0
    goto done;
254
0
  }
255
256
0
  block_size = reftable_get_be24(block->block_data.data + header_size + 1);
257
0
  if (block_size > guess_block_size) {
258
0
    err = read_block(source, &block->block_data, offset, block_size);
259
0
    if (err < 0)
260
0
      goto done;
261
0
  }
262
263
0
  if (block_type == REFTABLE_BLOCK_TYPE_LOG) {
264
0
    uint32_t block_header_skip = 4 + header_size;
265
0
    uLong dst_len = block_size - block_header_skip;
266
0
    uLong src_len = block->block_data.len - block_header_skip;
267
268
    /* Log blocks specify the *uncompressed* size in their header. */
269
0
    REFTABLE_ALLOC_GROW_OR_NULL(block->uncompressed_data, block_size,
270
0
              block->uncompressed_cap);
271
0
    if (!block->uncompressed_data) {
272
0
      err = REFTABLE_OUT_OF_MEMORY_ERROR;
273
0
      goto done;
274
0
    }
275
276
    /* Copy over the block header verbatim. It's not compressed. */
277
0
    memcpy(block->uncompressed_data, block->block_data.data, block_header_skip);
278
279
0
    if (!block->zstream) {
280
0
      REFTABLE_CALLOC_ARRAY(block->zstream, 1);
281
0
      if (!block->zstream) {
282
0
        err = REFTABLE_OUT_OF_MEMORY_ERROR;
283
0
        goto done;
284
0
      }
285
286
0
      err = inflateInit(block->zstream);
287
0
    } else {
288
0
      err = inflateReset(block->zstream);
289
0
    }
290
0
    if (err != Z_OK) {
291
0
      err = REFTABLE_ZLIB_ERROR;
292
0
      goto done;
293
0
    }
294
295
0
    block->zstream->next_in = block->block_data.data + block_header_skip;
296
0
    block->zstream->avail_in = src_len;
297
0
    block->zstream->next_out = block->uncompressed_data + block_header_skip;
298
0
    block->zstream->avail_out = dst_len;
299
300
    /*
301
     * We know both input as well as output size, and we know that
302
     * the sizes should never be bigger than `uInt_MAX` because
303
     * blocks can at most be 16MB large. We can thus use `Z_FINISH`
304
     * here to instruct zlib to inflate the data in one go, which
305
     * is more efficient than using `Z_NO_FLUSH`.
306
     */
307
0
    err = inflate(block->zstream, Z_FINISH);
308
0
    if (err != Z_STREAM_END) {
309
0
      err = REFTABLE_ZLIB_ERROR;
310
0
      goto done;
311
0
    }
312
0
    err = 0;
313
314
0
    if (block->zstream->total_out + block_header_skip != block_size) {
315
0
      err = REFTABLE_FORMAT_ERROR;
316
0
      goto done;
317
0
    }
318
319
    /* We're done with the input data. */
320
0
    block_source_release_data(&block->block_data);
321
0
    block->block_data.data = block->uncompressed_data;
322
0
    block->block_data.len = block_size;
323
0
    full_block_size = src_len + block_header_skip - block->zstream->avail_in;
324
0
  } else if (full_block_size == 0) {
325
0
    full_block_size = block_size;
326
0
  } else if (block_size < full_block_size && block_size < block->block_data.len &&
327
0
       block->block_data.data[block_size] != 0) {
328
    /* If the block is smaller than the full block size, it is
329
       padded (data followed by '\0') or the next block is
330
       unaligned. */
331
0
    full_block_size = block_size;
332
0
  }
333
334
0
  restart_count = reftable_get_be16(block->block_data.data + block_size - 2);
335
0
  restart_off = block_size - 2 - 3 * restart_count;
336
337
0
  block->block_type = block_type;
338
0
  block->hash_size = hash_size;
339
0
  block->restart_off = restart_off;
340
0
  block->full_block_size = full_block_size;
341
0
  block->header_off = header_size;
342
0
  block->restart_count = restart_count;
343
344
0
  err = 0;
345
346
0
done:
347
0
  if (err < 0)
348
0
    reftable_block_release(block);
349
0
  return err;
350
0
}
351
352
void reftable_block_release(struct reftable_block *block)
353
0
{
354
0
  inflateEnd(block->zstream);
355
0
  reftable_free(block->zstream);
356
0
  reftable_free(block->uncompressed_data);
357
0
  block_source_release_data(&block->block_data);
358
0
  memset(block, 0, sizeof(*block));
359
0
}
360
361
uint8_t reftable_block_type(const struct reftable_block *b)
362
0
{
363
0
  return b->block_data.data[b->header_off];
364
0
}
365
366
int reftable_block_first_key(const struct reftable_block *block, struct reftable_buf *key)
367
0
{
368
0
  int off = block->header_off + 4, n;
369
0
  struct string_view in = {
370
0
    .buf = block->block_data.data + off,
371
0
    .len = block->restart_off - off,
372
0
  };
373
0
  uint8_t extra = 0;
374
375
0
  reftable_buf_reset(key);
376
377
0
  n = reftable_decode_key(key, &extra, in);
378
0
  if (n < 0)
379
0
    return n;
380
0
  if (!key->len)
381
0
    return REFTABLE_FORMAT_ERROR;
382
383
0
  return 0;
384
0
}
385
386
static uint32_t block_restart_offset(const struct reftable_block *b, size_t idx)
387
0
{
388
0
  return reftable_get_be24(b->block_data.data + b->restart_off + 3 * idx);
389
0
}
390
391
void block_iter_init(struct block_iter *it, const struct reftable_block *block)
392
0
{
393
0
  it->block = block;
394
0
  block_iter_seek_start(it);
395
0
}
396
397
void block_iter_seek_start(struct block_iter *it)
398
0
{
399
0
  reftable_buf_reset(&it->last_key);
400
0
  it->next_off = it->block->header_off + 4;
401
0
}
402
403
struct restart_needle_less_args {
404
  int error;
405
  struct reftable_buf needle;
406
  const struct reftable_block *block;
407
};
408
409
static int restart_needle_less(size_t idx, void *_args)
410
0
{
411
0
  struct restart_needle_less_args *args = _args;
412
0
  uint32_t off = block_restart_offset(args->block, idx);
413
0
  struct string_view in = {
414
0
    .buf = args->block->block_data.data + off,
415
0
    .len = args->block->restart_off - off,
416
0
  };
417
0
  uint64_t prefix_len, suffix_len;
418
0
  uint8_t extra;
419
0
  int n;
420
421
  /*
422
   * Records at restart points are stored without prefix compression, so
423
   * there is no need to fully decode the record key here. This removes
424
   * the need for allocating memory.
425
   */
426
0
  n = reftable_decode_keylen(in, &prefix_len, &suffix_len, &extra);
427
0
  if (n < 0 || prefix_len) {
428
0
    args->error = 1;
429
0
    return -1;
430
0
  }
431
432
0
  string_view_consume(&in, n);
433
0
  if (suffix_len > in.len) {
434
0
    args->error = 1;
435
0
    return -1;
436
0
  }
437
438
0
  n = memcmp(args->needle.buf, in.buf,
439
0
       args->needle.len < suffix_len ? args->needle.len : suffix_len);
440
0
  if (n)
441
0
    return n < 0;
442
0
  return args->needle.len < suffix_len;
443
0
}
444
445
int block_iter_next(struct block_iter *it, struct reftable_record *rec)
446
0
{
447
0
  struct string_view in = {
448
0
    .buf = (unsigned char *) it->block->block_data.data + it->next_off,
449
0
    .len = it->block->restart_off - it->next_off,
450
0
  };
451
0
  struct string_view start = in;
452
0
  uint8_t extra = 0;
453
0
  int n = 0;
454
455
0
  if (it->next_off >= it->block->restart_off)
456
0
    return 1;
457
458
0
  n = reftable_decode_key(&it->last_key, &extra, in);
459
0
  if (n < 0)
460
0
    return -1;
461
0
  if (!it->last_key.len)
462
0
    return REFTABLE_FORMAT_ERROR;
463
464
0
  string_view_consume(&in, n);
465
0
  n = reftable_record_decode(rec, it->last_key, extra, in, it->block->hash_size,
466
0
           &it->scratch);
467
0
  if (n < 0)
468
0
    return -1;
469
0
  string_view_consume(&in, n);
470
471
0
  it->next_off += start.len - in.len;
472
0
  return 0;
473
0
}
474
475
void block_iter_reset(struct block_iter *it)
476
0
{
477
0
  reftable_buf_reset(&it->last_key);
478
0
  it->next_off = 0;
479
0
  it->block = NULL;
480
0
}
481
482
void block_iter_close(struct block_iter *it)
483
0
{
484
0
  reftable_buf_release(&it->last_key);
485
0
  reftable_buf_release(&it->scratch);
486
0
}
487
488
int block_iter_seek_key(struct block_iter *it, struct reftable_buf *want)
489
0
{
490
0
  struct restart_needle_less_args args = {
491
0
    .needle = *want,
492
0
    .block = it->block,
493
0
  };
494
0
  struct reftable_record rec;
495
0
  int err = 0;
496
0
  size_t i;
497
498
  /*
499
   * Perform a binary search over the block's restart points, which
500
   * avoids doing a linear scan over the whole block. Like this, we
501
   * identify the section of the block that should contain our key.
502
   *
503
   * Note that we explicitly search for the first restart point _greater_
504
   * than the sought-after record, not _greater or equal_ to it. In case
505
   * the sought-after record is located directly at the restart point we
506
   * would otherwise start doing the linear search at the preceding
507
   * restart point. While that works alright, we would end up scanning
508
   * too many record.
509
   */
510
0
  i = binsearch(it->block->restart_count, &restart_needle_less, &args);
511
0
  if (args.error) {
512
0
    err = REFTABLE_FORMAT_ERROR;
513
0
    goto done;
514
0
  }
515
516
  /*
517
   * Now there are multiple cases:
518
   *
519
   *   - `i == 0`: The wanted record is smaller than the record found at
520
   *     the first restart point. As the first restart point is the first
521
   *     record in the block, our wanted record cannot be located in this
522
   *     block at all. We still need to position the iterator so that the
523
   *     next call to `block_iter_next()` will yield an end-of-iterator
524
   *     signal.
525
   *
526
   *   - `i == restart_count`: The wanted record was not found at any of
527
   *     the restart points. As there is no restart point at the end of
528
   *     the section the record may thus be contained in the last block.
529
   *
530
   *   - `i > 0`: The wanted record must be contained in the section
531
   *     before the found restart point. We thus do a linear search
532
   *     starting from the preceding restart point.
533
   */
534
0
  if (i > 0)
535
0
    it->next_off = block_restart_offset(it->block, i - 1);
536
0
  else
537
0
    it->next_off = it->block->header_off + 4;
538
539
0
  err = reftable_record_init(&rec, reftable_block_type(it->block));
540
0
  if (err < 0)
541
0
    goto done;
542
543
  /*
544
   * We're looking for the last entry less than the wanted key so that
545
   * the next call to `block_reader_next()` would yield the wanted
546
   * record. We thus don't want to position our iterator at the sought
547
   * after record, but one before. To do so, we have to go one entry too
548
   * far and then back up.
549
   */
550
0
  while (1) {
551
0
    size_t prev_off = it->next_off;
552
553
0
    err = block_iter_next(it, &rec);
554
0
    if (err < 0)
555
0
      goto done;
556
0
    if (err > 0) {
557
0
      it->next_off = prev_off;
558
0
      err = 0;
559
0
      goto done;
560
0
    }
561
562
0
    err = reftable_record_key(&rec, &it->last_key);
563
0
    if (err < 0)
564
0
      goto done;
565
566
    /*
567
     * Check whether the current key is greater or equal to the
568
     * sought-after key. In case it is greater we know that the
569
     * record does not exist in the block and can thus abort early.
570
     * In case it is equal to the sought-after key we have found
571
     * the desired record.
572
     *
573
     * Note that we store the next record's key record directly in
574
     * `last_key` without restoring the key of the preceding record
575
     * in case we need to go one record back. This is safe to do as
576
     * `block_iter_next()` would return the ref whose key is equal
577
     * to `last_key` now, and naturally all keys share a prefix
578
     * with themselves.
579
     */
580
0
    if (reftable_buf_cmp(&it->last_key, want) >= 0) {
581
0
      it->next_off = prev_off;
582
0
      goto done;
583
0
    }
584
0
  }
585
586
0
done:
587
0
  reftable_record_release(&rec);
588
0
  return err;
589
0
}
590
591
static int block_iter_seek_void(void *it, struct reftable_record *want)
592
0
{
593
0
  struct reftable_buf buf = REFTABLE_BUF_INIT;
594
0
  struct block_iter *bi = it;
595
0
  int err;
596
597
0
  if (bi->block->block_type != want->type)
598
0
    return REFTABLE_API_ERROR;
599
600
0
  err = reftable_record_key(want, &buf);
601
0
  if (err < 0)
602
0
    goto out;
603
604
0
  err = block_iter_seek_key(it, &buf);
605
0
  if (err < 0)
606
0
    goto out;
607
608
0
  err = 0;
609
610
0
out:
611
0
  reftable_buf_release(&buf);
612
0
  return err;
613
0
}
614
615
static int block_iter_next_void(void *it, struct reftable_record *rec)
616
0
{
617
0
  return block_iter_next(it, rec);
618
0
}
619
620
static void block_iter_close_void(void *it)
621
0
{
622
0
  block_iter_close(it);
623
0
}
624
625
static struct reftable_iterator_vtable block_iter_vtable = {
626
  .seek = &block_iter_seek_void,
627
  .next = &block_iter_next_void,
628
  .close = &block_iter_close_void,
629
};
630
631
int reftable_block_init_iterator(const struct reftable_block *b,
632
         struct reftable_iterator *it)
633
0
{
634
0
  struct block_iter *bi;
635
636
0
  REFTABLE_CALLOC_ARRAY(bi, 1);
637
0
  block_iter_init(bi, b);
638
639
0
  assert(!it->ops);
640
0
  it->iter_arg = bi;
641
0
  it->ops = &block_iter_vtable;
642
643
0
  return 0;
644
0
}
645
646
void block_writer_release(struct block_writer *bw)
647
0
{
648
0
  deflateEnd(bw->zstream);
649
0
  REFTABLE_FREE_AND_NULL(bw->zstream);
650
0
  REFTABLE_FREE_AND_NULL(bw->restarts);
651
0
  REFTABLE_FREE_AND_NULL(bw->compressed);
652
0
  reftable_buf_release(&bw->scratch);
653
0
  reftable_buf_release(&bw->last_key);
654
  /* the block is not owned. */
655
0
}