/src/git/reftable/block.c

Source (jump to first uncovered line)
/*
Copyright 2020 Google LLC

Use of this source code is governed by a BSD-style
license that can be found in the LICENSE file or at
https://developers.google.com/open-source/licenses/bsd
*/

#include "block.h"

#include "blocksource.h"
#include "constants.h"
#include "record.h"
#include "reftable-error.h"
#include "system.h"
#include <zlib.h>

int header_size(int version)
{
  switch (version) {
  case 1:
    return 24;
  case 2:
    return 28;
  }
  abort();
}

int footer_size(int version)
{
  switch (version) {
  case 1:
    return 68;
  case 2:
    return 72;
  }
  abort();
}

static int block_writer_register_restart(struct block_writer *w, int n,
           int is_restart, struct strbuf *key)
{
  int rlen = w->restart_len;
  if (rlen >= MAX_RESTARTS) {
    is_restart = 0;
  }

  if (is_restart) {
    rlen++;
  }
  if (2 + 3 * rlen + n > w->block_size - w->next)
    return -1;
  if (is_restart) {
    REFTABLE_ALLOC_GROW(w->restarts, w->restart_len + 1, w->restart_cap);
    w->restarts[w->restart_len++] = w->next;
  }

  w->next += n;

  strbuf_reset(&w->last_key);
  strbuf_addbuf(&w->last_key, key);
  w->entries++;
  return 0;
}

void block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *buf,
           uint32_t block_size, uint32_t header_off, int hash_size)
{
  bw->buf = buf;
  bw->hash_size = hash_size;
  bw->block_size = block_size;
  bw->header_off = header_off;
  bw->buf[header_off] = typ;
  bw->next = header_off + 4;
  bw->restart_interval = 16;
  bw->entries = 0;
  bw->restart_len = 0;
  bw->last_key.len = 0;
  if (!bw->zstream) {
    REFTABLE_CALLOC_ARRAY(bw->zstream, 1);
    deflateInit(bw->zstream, 9);
  }
}

uint8_t block_writer_type(struct block_writer *bw)
{
  return bw->buf[bw->header_off];
}

/* Adds the reftable_record to the block. Returns -1 if it does not fit, 0 on
   success. Returns REFTABLE_API_ERROR if attempting to write a record with
   empty key. */
int block_writer_add(struct block_writer *w, struct reftable_record *rec)
{
  struct strbuf empty = STRBUF_INIT;
  struct strbuf last =
    w->entries % w->restart_interval == 0 ? empty : w->last_key;
  struct string_view out = {
    .buf = w->buf + w->next,
    .len = w->block_size - w->next,
  };

  struct string_view start = out;

  int is_restart = 0;
  struct strbuf key = STRBUF_INIT;
  int n = 0;
  int err = -1;

  reftable_record_key(rec, &key);
  if (!key.len) {
    err = REFTABLE_API_ERROR;
    goto done;
  }

  n = reftable_encode_key(&is_restart, out, last, key,
        reftable_record_val_type(rec));
  if (n < 0)
    goto done;
  string_view_consume(&out, n);

  n = reftable_record_encode(rec, out, w->hash_size);
  if (n < 0)
    goto done;
  string_view_consume(&out, n);

  err = block_writer_register_restart(w, start.len - out.len, is_restart,
              &key);
done:
  strbuf_release(&key);
  return err;
}

int block_writer_finish(struct block_writer *w)
{
  int i;
  for (i = 0; i < w->restart_len; i++) {
    put_be24(w->buf + w->next, w->restarts[i]);
    w->next += 3;
  }

  put_be16(w->buf + w->next, w->restart_len);
  w->next += 2;
  put_be24(w->buf + 1 + w->header_off, w->next);

  /*
   * Log records are stored zlib-compressed. Note that the compression
   * also spans over the restart points we have just written.
   */
  if (block_writer_type(w) == BLOCK_TYPE_LOG) {
    int block_header_skip = 4 + w->header_off;
    uLongf src_len = w->next - block_header_skip, compressed_len;
    int ret;

    ret = deflateReset(w->zstream);
    if (ret != Z_OK)
      return REFTABLE_ZLIB_ERROR;

    /*
     * Precompute the upper bound of how many bytes the compressed
     * data may end up with. Combined with `Z_FINISH`, `deflate()`
     * is guaranteed to return `Z_STREAM_END`.
     */
    compressed_len = deflateBound(w->zstream, src_len);
    REFTABLE_ALLOC_GROW(w->compressed, compressed_len, w->compressed_cap);

    w->zstream->next_out = w->compressed;
    w->zstream->avail_out = compressed_len;
    w->zstream->next_in = w->buf + block_header_skip;
    w->zstream->avail_in = src_len;

    /*
     * We want to perform all decompression in a single step, which
     * is why we can pass Z_FINISH here. As we have precomputed the
     * deflated buffer's size via `deflateBound()` this function is
     * guaranteed to succeed according to the zlib documentation.
     */
    ret = deflate(w->zstream, Z_FINISH);
    if (ret != Z_STREAM_END)
      return REFTABLE_ZLIB_ERROR;

    /*
     * Overwrite the uncompressed data we have already written and
     * adjust the `next` pointer to point right after the
     * compressed data.
     */
    memcpy(w->buf + block_header_skip, w->compressed,
           w->zstream->total_out);
    w->next = w->zstream->total_out + block_header_skip;
  }

  return w->next;
}

int block_reader_init(struct block_reader *br, struct reftable_block *block,
          uint32_t header_off, uint32_t table_block_size,
          int hash_size)
{
  uint32_t full_block_size = table_block_size;
  uint8_t typ = block->data[header_off];
  uint32_t sz = get_be24(block->data + header_off + 1);
  int err = 0;
  uint16_t restart_count = 0;
  uint32_t restart_start = 0;
  uint8_t *restart_bytes = NULL;

  reftable_block_done(&br->block);

  if (!reftable_is_block_type(typ)) {
    err =  REFTABLE_FORMAT_ERROR;
    goto done;
  }

  if (typ == BLOCK_TYPE_LOG) {
    uint32_t block_header_skip = 4 + header_off;
    uLong dst_len = sz - block_header_skip;
    uLong src_len = block->len - block_header_skip;

    /* Log blocks specify the *uncompressed* size in their header. */
    REFTABLE_ALLOC_GROW(br->uncompressed_data, sz,
            br->uncompressed_cap);

    /* Copy over the block header verbatim. It's not compressed. */
    memcpy(br->uncompressed_data, block->data, block_header_skip);

    if (!br->zstream) {
      REFTABLE_CALLOC_ARRAY(br->zstream, 1);
      err = inflateInit(br->zstream);
    } else {
      err = inflateReset(br->zstream);
    }
    if (err != Z_OK) {
      err = REFTABLE_ZLIB_ERROR;
      goto done;
    }

    br->zstream->next_in = block->data + block_header_skip;
    br->zstream->avail_in = src_len;
    br->zstream->next_out = br->uncompressed_data + block_header_skip;
    br->zstream->avail_out = dst_len;

    /*
     * We know both input as well as output size, and we know that
     * the sizes should never be bigger than `uInt_MAX` because
     * blocks can at most be 16MB large. We can thus use `Z_FINISH`
     * here to instruct zlib to inflate the data in one go, which
     * is more efficient than using `Z_NO_FLUSH`.
     */
    err = inflate(br->zstream, Z_FINISH);
    if (err != Z_STREAM_END) {
      err = REFTABLE_ZLIB_ERROR;
      goto done;
    }
    err = 0;

    if (br->zstream->total_out + block_header_skip != sz) {
      err = REFTABLE_FORMAT_ERROR;
      goto done;
    }

    /* We're done with the input data. */
    reftable_block_done(block);
    block->data = br->uncompressed_data;
    block->len = sz;
    full_block_size = src_len + block_header_skip - br->zstream->avail_in;
  } else if (full_block_size == 0) {
    full_block_size = sz;
  } else if (sz < full_block_size && sz < block->len &&
       block->data[sz] != 0) {
    /* If the block is smaller than the full block size, it is
       padded (data followed by '\0') or the next block is
       unaligned. */
    full_block_size = sz;
  }

  restart_count = get_be16(block->data + sz - 2);
  restart_start = sz - 2 - 3 * restart_count;
  restart_bytes = block->data + restart_start;

  /* transfer ownership. */
  br->block = *block;
  block->data = NULL;
  block->len = 0;

  br->hash_size = hash_size;
  br->block_len = restart_start;
  br->full_block_size = full_block_size;
  br->header_off = header_off;
  br->restart_count = restart_count;
  br->restart_bytes = restart_bytes;

done:
  return err;
}

void block_reader_release(struct block_reader *br)
{
  inflateEnd(br->zstream);
  reftable_free(br->zstream);
  reftable_free(br->uncompressed_data);
  reftable_block_done(&br->block);
}

uint8_t block_reader_type(const struct block_reader *r)
{
  return r->block.data[r->header_off];
}

int block_reader_first_key(const struct block_reader *br, struct strbuf *key)
{
  int off = br->header_off + 4, n;
  struct string_view in = {
    .buf = br->block.data + off,
    .len = br->block_len - off,
  };
  uint8_t extra = 0;

  strbuf_reset(key);

  n = reftable_decode_key(key, &extra, in);
  if (n < 0)
    return n;
  if (!key->len)
    return REFTABLE_FORMAT_ERROR;

  return 0;
}

static uint32_t block_reader_restart_offset(const struct block_reader *br, size_t idx)
{
  return get_be24(br->restart_bytes + 3 * idx);
}

void block_iter_seek_start(struct block_iter *it, const struct block_reader *br)
{
  it->block = br->block.data;
  it->block_len = br->block_len;
  it->hash_size = br->hash_size;
  strbuf_reset(&it->last_key);
  it->next_off = br->header_off + 4;
}

struct restart_needle_less_args {
  int error;
  struct strbuf needle;
  const struct block_reader *reader;
};

static int restart_needle_less(size_t idx, void *_args)
{
  struct restart_needle_less_args *args = _args;
  uint32_t off = block_reader_restart_offset(args->reader, idx);
  struct string_view in = {
    .buf = args->reader->block.data + off,
    .len = args->reader->block_len - off,
  };
  uint64_t prefix_len, suffix_len;
  uint8_t extra;
  int n;

  /*
   * Records at restart points are stored without prefix compression, so
   * there is no need to fully decode the record key here. This removes
   * the need for allocating memory.
   */
  n = reftable_decode_keylen(in, &prefix_len, &suffix_len, &extra);
  if (n < 0 || prefix_len) {
    args->error = 1;
    return -1;
  }

  string_view_consume(&in, n);
  if (suffix_len > in.len) {
    args->error = 1;
    return -1;
  }

  n = memcmp(args->needle.buf, in.buf,
       args->needle.len < suffix_len ? args->needle.len : suffix_len);
  if (n)
    return n < 0;
  return args->needle.len < suffix_len;
}

int block_iter_next(struct block_iter *it, struct reftable_record *rec)
{
  struct string_view in = {
    .buf = (unsigned char *) it->block + it->next_off,
    .len = it->block_len - it->next_off,
  };
  struct string_view start = in;
  uint8_t extra = 0;
  int n = 0;

  if (it->next_off >= it->block_len)
    return 1;

  n = reftable_decode_key(&it->last_key, &extra, in);
  if (n < 0)
    return -1;
  if (!it->last_key.len)
    return REFTABLE_FORMAT_ERROR;

  string_view_consume(&in, n);
  n = reftable_record_decode(rec, it->last_key, extra, in, it->hash_size,
           &it->scratch);
  if (n < 0)
    return -1;
  string_view_consume(&in, n);

  it->next_off += start.len - in.len;
  return 0;
}

void block_iter_reset(struct block_iter *it)
{
  strbuf_reset(&it->last_key);
  it->next_off = 0;
  it->block = NULL;
  it->block_len = 0;
  it->hash_size = 0;
}

void block_iter_close(struct block_iter *it)
{
  strbuf_release(&it->last_key);
  strbuf_release(&it->scratch);
}

int block_iter_seek_key(struct block_iter *it, const struct block_reader *br,
      struct strbuf *want)
{
  struct restart_needle_less_args args = {
    .needle = *want,
    .reader = br,
  };
  struct reftable_record rec;
  int err = 0;
  size_t i;

  /*
   * Perform a binary search over the block's restart points, which
   * avoids doing a linear scan over the whole block. Like this, we
   * identify the section of the block that should contain our key.
   *
   * Note that we explicitly search for the first restart point _greater_
   * than the sought-after record, not _greater or equal_ to it. In case
   * the sought-after record is located directly at the restart point we
   * would otherwise start doing the linear search at the preceding
   * restart point. While that works alright, we would end up scanning
   * too many record.
   */
  i = binsearch(br->restart_count, &restart_needle_less, &args);
  if (args.error) {
    err = REFTABLE_FORMAT_ERROR;
    goto done;
  }

  /*
   * Now there are multiple cases:
   *
   *   - `i == 0`: The wanted record is smaller than the record found at
   *     the first restart point. As the first restart point is the first
   *     record in the block, our wanted record cannot be located in this
   *     block at all. We still need to position the iterator so that the
   *     next call to `block_iter_next()` will yield an end-of-iterator
   *     signal.
   *
   *   - `i == restart_count`: The wanted record was not found at any of
   *     the restart points. As there is no restart point at the end of
   *     the section the record may thus be contained in the last block.
   *
   *   - `i > 0`: The wanted record must be contained in the section
   *     before the found restart point. We thus do a linear search
   *     starting from the preceding restart point.
   */
  if (i > 0)
    it->next_off = block_reader_restart_offset(br, i - 1);
  else
    it->next_off = br->header_off + 4;
  it->block = br->block.data;
  it->block_len = br->block_len;
  it->hash_size = br->hash_size;

  reftable_record_init(&rec, block_reader_type(br));

  /*
   * We're looking for the last entry less than the wanted key so that
   * the next call to `block_reader_next()` would yield the wanted
   * record. We thus don't want to position our reader at the sought
   * after record, but one before. To do so, we have to go one entry too
   * far and then back up.
   */
  while (1) {
    size_t prev_off = it->next_off;

    err = block_iter_next(it, &rec);
    if (err < 0)
      goto done;
    if (err > 0) {
      it->next_off = prev_off;
      err = 0;
      goto done;
    }

    /*
     * Check whether the current key is greater or equal to the
     * sought-after key. In case it is greater we know that the
     * record does not exist in the block and can thus abort early.
     * In case it is equal to the sought-after key we have found
     * the desired record.
     *
     * Note that we store the next record's key record directly in
     * `last_key` without restoring the key of the preceding record
     * in case we need to go one record back. This is safe to do as
     * `block_iter_next()` would return the ref whose key is equal
     * to `last_key` now, and naturally all keys share a prefix
     * with themselves.
     */
    reftable_record_key(&rec, &it->last_key);
    if (strbuf_cmp(&it->last_key, want) >= 0) {
      it->next_off = prev_off;
      goto done;
    }
  }

done:
  reftable_record_release(&rec);
  return err;
}

void block_writer_release(struct block_writer *bw)
{
  deflateEnd(bw->zstream);
  FREE_AND_NULL(bw->zstream);
  FREE_AND_NULL(bw->restarts);
  FREE_AND_NULL(bw->compressed);
  strbuf_release(&bw->last_key);
  /* the block is not owned. */
}

void reftable_block_done(struct reftable_block *blockp)
{
  struct reftable_block_source source = blockp->source;
  if (blockp && source.ops)
    source.ops->return_block(source.arg, blockp);
  blockp->data = NULL;
  blockp->len = 0;
  blockp->source.ops = NULL;
  blockp->source.arg = NULL;
}

Coverage Report

Created: 2024-09-16 06:10

Line	Count	Source (jump to first uncovered line)
1		/*
2		Copyright 2020 Google LLC
3
4		Use of this source code is governed by a BSD-style
5		license that can be found in the LICENSE file or at
6		https://developers.google.com/open-source/licenses/bsd
7		*/
8
9		#include "block.h"
10
11		#include "blocksource.h"
12		#include "constants.h"
13		#include "record.h"
14		#include "reftable-error.h"
15		#include "system.h"
16		#include <zlib.h>
17
18		int header_size(int version)
19	0	{
20	0	switch (version) {
21	0	case 1:
22	0	return 24;
23	0	case 2:
24	0	return 28;
25	0	}
26	0	abort();
27	0	}
28
29		int footer_size(int version)
30	0	{
31	0	switch (version) {
32	0	case 1:
33	0	return 68;
34	0	case 2:
35	0	return 72;
36	0	}
37	0	abort();
38	0	}
39
40		static int block_writer_register_restart(struct block_writer *w, int n,
41		int is_restart, struct strbuf *key)
42	0	{
43	0	int rlen = w->restart_len;
44	0	if (rlen >= MAX_RESTARTS) {
45	0	is_restart = 0;
46	0	}
47
48	0	if (is_restart) {
49	0	rlen++;
50	0	}
51	0	if (2 + 3 * rlen + n > w->block_size - w->next)
52	0	return -1;
53	0	if (is_restart) {
54	0	REFTABLE_ALLOC_GROW(w->restarts, w->restart_len + 1, w->restart_cap);
55	0	w->restarts[w->restart_len++] = w->next;
56	0	}
57
58	0	w->next += n;
59
60	0	strbuf_reset(&w->last_key);
61	0	strbuf_addbuf(&w->last_key, key);
62	0	w->entries++;
63	0	return 0;
64	0	}
65
66		void block_writer_init(struct block_writer bw, uint8_t typ, uint8_t buf,
67		uint32_t block_size, uint32_t header_off, int hash_size)
68	0	{
69	0	bw->buf = buf;
70	0	bw->hash_size = hash_size;
71	0	bw->block_size = block_size;
72	0	bw->header_off = header_off;
73	0	bw->buf[header_off] = typ;
74	0	bw->next = header_off + 4;
75	0	bw->restart_interval = 16;
76	0	bw->entries = 0;
77	0	bw->restart_len = 0;
78	0	bw->last_key.len = 0;
79	0	if (!bw->zstream) {
80	0	REFTABLE_CALLOC_ARRAY(bw->zstream, 1);
81	0	deflateInit(bw->zstream, 9);
82	0	}
83	0	}
84
85		uint8_t block_writer_type(struct block_writer *bw)
86	0	{
87	0	return bw->buf[bw->header_off];
88	0	}
89
90		/* Adds the reftable_record to the block. Returns -1 if it does not fit, 0 on
91		success. Returns REFTABLE_API_ERROR if attempting to write a record with
92		empty key. */
93		int block_writer_add(struct block_writer w, struct reftable_record rec)
94	0	{
95	0	struct strbuf empty = STRBUF_INIT;
96	0	struct strbuf last =
97	0	w->entries % w->restart_interval == 0 ? empty : w->last_key;
98	0	struct string_view out = {
99	0	.buf = w->buf + w->next,
100	0	.len = w->block_size - w->next,
101	0	};
102
103	0	struct string_view start = out;
104
105	0	int is_restart = 0;
106	0	struct strbuf key = STRBUF_INIT;
107	0	int n = 0;
108	0	int err = -1;
109
110	0	reftable_record_key(rec, &key);
111	0	if (!key.len) {
112	0	err = REFTABLE_API_ERROR;
113	0	goto done;
114	0	}
115
116	0	n = reftable_encode_key(&is_restart, out, last, key,
117	0	reftable_record_val_type(rec));
118	0	if (n < 0)
119	0	goto done;
120	0	string_view_consume(&out, n);
121
122	0	n = reftable_record_encode(rec, out, w->hash_size);
123	0	if (n < 0)
124	0	goto done;
125	0	string_view_consume(&out, n);
126
127	0	err = block_writer_register_restart(w, start.len - out.len, is_restart,
128	0	&key);
129	0	done:
130	0	strbuf_release(&key);
131	0	return err;
132	0	}
133
134		int block_writer_finish(struct block_writer *w)
135	0	{
136	0	int i;
137	0	for (i = 0; i < w->restart_len; i++) {
138	0	put_be24(w->buf + w->next, w->restarts[i]);
139	0	w->next += 3;
140	0	}
141
142	0	put_be16(w->buf + w->next, w->restart_len);
143	0	w->next += 2;
144	0	put_be24(w->buf + 1 + w->header_off, w->next);
145
146		/*
147		* Log records are stored zlib-compressed. Note that the compression
148		* also spans over the restart points we have just written.
149		*/
150	0	if (block_writer_type(w) == BLOCK_TYPE_LOG) {
151	0	int block_header_skip = 4 + w->header_off;
152	0	uLongf src_len = w->next - block_header_skip, compressed_len;
153	0	int ret;
154
155	0	ret = deflateReset(w->zstream);
156	0	if (ret != Z_OK)
157	0	return REFTABLE_ZLIB_ERROR;
158
159		/*
160		* Precompute the upper bound of how many bytes the compressed
161		* data may end up with. Combined with `Z_FINISH`, `deflate()`
162		* is guaranteed to return `Z_STREAM_END`.
163		*/
164	0	compressed_len = deflateBound(w->zstream, src_len);
165	0	REFTABLE_ALLOC_GROW(w->compressed, compressed_len, w->compressed_cap);
166
167	0	w->zstream->next_out = w->compressed;
168	0	w->zstream->avail_out = compressed_len;
169	0	w->zstream->next_in = w->buf + block_header_skip;
170	0	w->zstream->avail_in = src_len;
171
172		/*
173		* We want to perform all decompression in a single step, which
174		* is why we can pass Z_FINISH here. As we have precomputed the
175		* deflated buffer's size via `deflateBound()` this function is
176		* guaranteed to succeed according to the zlib documentation.
177		*/
178	0	ret = deflate(w->zstream, Z_FINISH);
179	0	if (ret != Z_STREAM_END)
180	0	return REFTABLE_ZLIB_ERROR;
181
182		/*
183		* Overwrite the uncompressed data we have already written and
184		* adjust the `next` pointer to point right after the
185		* compressed data.
186		*/
187	0	memcpy(w->buf + block_header_skip, w->compressed,
188	0	w->zstream->total_out);
189	0	w->next = w->zstream->total_out + block_header_skip;
190	0	}
191
192	0	return w->next;
193	0	}
194
195		int block_reader_init(struct block_reader br, struct reftable_block block,
196		uint32_t header_off, uint32_t table_block_size,
197		int hash_size)
198	0	{
199	0	uint32_t full_block_size = table_block_size;
200	0	uint8_t typ = block->data[header_off];
201	0	uint32_t sz = get_be24(block->data + header_off + 1);
202	0	int err = 0;
203	0	uint16_t restart_count = 0;
204	0	uint32_t restart_start = 0;
205	0	uint8_t *restart_bytes = NULL;
206
207	0	reftable_block_done(&br->block);
208
209	0	if (!reftable_is_block_type(typ)) {
210	0	err = REFTABLE_FORMAT_ERROR;
211	0	goto done;
212	0	}
213
214	0	if (typ == BLOCK_TYPE_LOG) {
215	0	uint32_t block_header_skip = 4 + header_off;
216	0	uLong dst_len = sz - block_header_skip;
217	0	uLong src_len = block->len - block_header_skip;
218
219		/* Log blocks specify the uncompressed size in their header. */
220	0	REFTABLE_ALLOC_GROW(br->uncompressed_data, sz,
221	0	br->uncompressed_cap);
222
223		/* Copy over the block header verbatim. It's not compressed. */
224	0	memcpy(br->uncompressed_data, block->data, block_header_skip);
225
226	0	if (!br->zstream) {
227	0	REFTABLE_CALLOC_ARRAY(br->zstream, 1);
228	0	err = inflateInit(br->zstream);
229	0	} else {
230	0	err = inflateReset(br->zstream);
231	0	}
232	0	if (err != Z_OK) {
233	0	err = REFTABLE_ZLIB_ERROR;
234	0	goto done;
235	0	}
236
237	0	br->zstream->next_in = block->data + block_header_skip;
238	0	br->zstream->avail_in = src_len;
239	0	br->zstream->next_out = br->uncompressed_data + block_header_skip;
240	0	br->zstream->avail_out = dst_len;
241
242		/*
243		* We know both input as well as output size, and we know that
244		* the sizes should never be bigger than `uInt_MAX` because
245		* blocks can at most be 16MB large. We can thus use `Z_FINISH`
246		* here to instruct zlib to inflate the data in one go, which
247		* is more efficient than using `Z_NO_FLUSH`.
248		*/
249	0	err = inflate(br->zstream, Z_FINISH);
250	0	if (err != Z_STREAM_END) {
251	0	err = REFTABLE_ZLIB_ERROR;
252	0	goto done;
253	0	}
254	0	err = 0;
255
256	0	if (br->zstream->total_out + block_header_skip != sz) {
257	0	err = REFTABLE_FORMAT_ERROR;
258	0	goto done;
259	0	}
260
261		/* We're done with the input data. */
262	0	reftable_block_done(block);
263	0	block->data = br->uncompressed_data;
264	0	block->len = sz;
265	0	full_block_size = src_len + block_header_skip - br->zstream->avail_in;
266	0	} else if (full_block_size == 0) {
267	0	full_block_size = sz;
268	0	} else if (sz < full_block_size && sz < block->len &&
269	0	block->data[sz] != 0) {
270		/* If the block is smaller than the full block size, it is
271		padded (data followed by '\0') or the next block is
272		unaligned. */
273	0	full_block_size = sz;
274	0	}
275
276	0	restart_count = get_be16(block->data + sz - 2);
277	0	restart_start = sz - 2 - 3 * restart_count;
278	0	restart_bytes = block->data + restart_start;
279
280		/* transfer ownership. */
281	0	br->block = *block;
282	0	block->data = NULL;
283	0	block->len = 0;
284
285	0	br->hash_size = hash_size;
286	0	br->block_len = restart_start;
287	0	br->full_block_size = full_block_size;
288	0	br->header_off = header_off;
289	0	br->restart_count = restart_count;
290	0	br->restart_bytes = restart_bytes;
291
292	0	done:
293	0	return err;
294	0	}
295
296		void block_reader_release(struct block_reader *br)
297	0	{
298	0	inflateEnd(br->zstream);
299	0	reftable_free(br->zstream);
300	0	reftable_free(br->uncompressed_data);
301	0	reftable_block_done(&br->block);
302	0	}
303
304		uint8_t block_reader_type(const struct block_reader *r)
305	0	{
306	0	return r->block.data[r->header_off];
307	0	}
308
309		int block_reader_first_key(const struct block_reader br, struct strbuf key)
310	0	{
311	0	int off = br->header_off + 4, n;
312	0	struct string_view in = {
313	0	.buf = br->block.data + off,
314	0	.len = br->block_len - off,
315	0	};
316	0	uint8_t extra = 0;
317
318	0	strbuf_reset(key);
319
320	0	n = reftable_decode_key(key, &extra, in);
321	0	if (n < 0)
322	0	return n;
323	0	if (!key->len)
324	0	return REFTABLE_FORMAT_ERROR;
325
326	0	return 0;
327	0	}
328
329		static uint32_t block_reader_restart_offset(const struct block_reader *br, size_t idx)
330	0	{
331	0	return get_be24(br->restart_bytes + 3 * idx);
332	0	}
333
334		void block_iter_seek_start(struct block_iter it, const struct block_reader br)
335	0	{
336	0	it->block = br->block.data;
337	0	it->block_len = br->block_len;
338	0	it->hash_size = br->hash_size;
339	0	strbuf_reset(&it->last_key);
340	0	it->next_off = br->header_off + 4;
341	0	}
342
343		struct restart_needle_less_args {
344		int error;
345		struct strbuf needle;
346		const struct block_reader *reader;
347		};
348
349		static int restart_needle_less(size_t idx, void *_args)
350	0	{
351	0	struct restart_needle_less_args *args = _args;
352	0	uint32_t off = block_reader_restart_offset(args->reader, idx);
353	0	struct string_view in = {
354	0	.buf = args->reader->block.data + off,
355	0	.len = args->reader->block_len - off,
356	0	};
357	0	uint64_t prefix_len, suffix_len;
358	0	uint8_t extra;
359	0	int n;
360
361		/*
362		* Records at restart points are stored without prefix compression, so
363		* there is no need to fully decode the record key here. This removes
364		* the need for allocating memory.
365		*/
366	0	n = reftable_decode_keylen(in, &prefix_len, &suffix_len, &extra);
367	0	if (n < 0 \|\| prefix_len) {
368	0	args->error = 1;
369	0	return -1;
370	0	}
371
372	0	string_view_consume(&in, n);
373	0	if (suffix_len > in.len) {
374	0	args->error = 1;
375	0	return -1;
376	0	}
377
378	0	n = memcmp(args->needle.buf, in.buf,
379	0	args->needle.len < suffix_len ? args->needle.len : suffix_len);
380	0	if (n)
381	0	return n < 0;
382	0	return args->needle.len < suffix_len;
383	0	}
384
385		int block_iter_next(struct block_iter it, struct reftable_record rec)
386	0	{
387	0	struct string_view in = {
388	0	.buf = (unsigned char *) it->block + it->next_off,
389	0	.len = it->block_len - it->next_off,
390	0	};
391	0	struct string_view start = in;
392	0	uint8_t extra = 0;
393	0	int n = 0;
394
395	0	if (it->next_off >= it->block_len)
396	0	return 1;
397
398	0	n = reftable_decode_key(&it->last_key, &extra, in);
399	0	if (n < 0)
400	0	return -1;
401	0	if (!it->last_key.len)
402	0	return REFTABLE_FORMAT_ERROR;
403
404	0	string_view_consume(&in, n);
405	0	n = reftable_record_decode(rec, it->last_key, extra, in, it->hash_size,
406	0	&it->scratch);
407	0	if (n < 0)
408	0	return -1;
409	0	string_view_consume(&in, n);
410
411	0	it->next_off += start.len - in.len;
412	0	return 0;
413	0	}
414
415		void block_iter_reset(struct block_iter *it)
416	0	{
417	0	strbuf_reset(&it->last_key);
418	0	it->next_off = 0;
419	0	it->block = NULL;
420	0	it->block_len = 0;
421	0	it->hash_size = 0;
422	0	}
423
424		void block_iter_close(struct block_iter *it)
425	0	{
426	0	strbuf_release(&it->last_key);
427	0	strbuf_release(&it->scratch);
428	0	}
429
430		int block_iter_seek_key(struct block_iter it, const struct block_reader br,
431		struct strbuf *want)
432	0	{
433	0	struct restart_needle_less_args args = {
434	0	.needle = *want,
435	0	.reader = br,
436	0	};
437	0	struct reftable_record rec;
438	0	int err = 0;
439	0	size_t i;
440
441		/*
442		* Perform a binary search over the block's restart points, which
443		* avoids doing a linear scan over the whole block. Like this, we
444		* identify the section of the block that should contain our key.
445		*
446		* Note that we explicitly search for the first restart point _greater_
447		* than the sought-after record, not _greater or equal_ to it. In case
448		* the sought-after record is located directly at the restart point we
449		* would otherwise start doing the linear search at the preceding
450		* restart point. While that works alright, we would end up scanning
451		* too many record.
452		*/
453	0	i = binsearch(br->restart_count, &restart_needle_less, &args);
454	0	if (args.error) {
455	0	err = REFTABLE_FORMAT_ERROR;
456	0	goto done;
457	0	}
458
459		/*
460		* Now there are multiple cases:
461		*
462		* - `i == 0`: The wanted record is smaller than the record found at
463		* the first restart point. As the first restart point is the first
464		* record in the block, our wanted record cannot be located in this
465		* block at all. We still need to position the iterator so that the
466		* next call to `block_iter_next()` will yield an end-of-iterator
467		* signal.
468		*
469		* - `i == restart_count`: The wanted record was not found at any of
470		* the restart points. As there is no restart point at the end of
471		* the section the record may thus be contained in the last block.
472		*
473		* - `i > 0`: The wanted record must be contained in the section
474		* before the found restart point. We thus do a linear search
475		* starting from the preceding restart point.
476		*/
477	0	if (i > 0)
478	0	it->next_off = block_reader_restart_offset(br, i - 1);
479	0	else
480	0	it->next_off = br->header_off + 4;
481	0	it->block = br->block.data;
482	0	it->block_len = br->block_len;
483	0	it->hash_size = br->hash_size;
484
485	0	reftable_record_init(&rec, block_reader_type(br));
486
487		/*
488		* We're looking for the last entry less than the wanted key so that
489		* the next call to `block_reader_next()` would yield the wanted
490		* record. We thus don't want to position our reader at the sought
491		* after record, but one before. To do so, we have to go one entry too
492		* far and then back up.
493		*/
494	0	while (1) {
495	0	size_t prev_off = it->next_off;
496
497	0	err = block_iter_next(it, &rec);
498	0	if (err < 0)
499	0	goto done;
500	0	if (err > 0) {
501	0	it->next_off = prev_off;
502	0	err = 0;
503	0	goto done;
504	0	}
505
506		/*
507		* Check whether the current key is greater or equal to the
508		* sought-after key. In case it is greater we know that the
509		* record does not exist in the block and can thus abort early.
510		* In case it is equal to the sought-after key we have found
511		* the desired record.
512		*
513		* Note that we store the next record's key record directly in
514		* `last_key` without restoring the key of the preceding record
515		* in case we need to go one record back. This is safe to do as
516		* `block_iter_next()` would return the ref whose key is equal
517		* to `last_key` now, and naturally all keys share a prefix
518		* with themselves.
519		*/
520	0	reftable_record_key(&rec, &it->last_key);
521	0	if (strbuf_cmp(&it->last_key, want) >= 0) {
522	0	it->next_off = prev_off;
523	0	goto done;
524	0	}
525	0	}
526
527	0	done:
528	0	reftable_record_release(&rec);
529	0	return err;
530	0	}
531
532		void block_writer_release(struct block_writer *bw)
533	0	{
534	0	deflateEnd(bw->zstream);
535	0	FREE_AND_NULL(bw->zstream);
536	0	FREE_AND_NULL(bw->restarts);
537	0	FREE_AND_NULL(bw->compressed);
538	0	strbuf_release(&bw->last_key);
539		/* the block is not owned. */
540	0	}
541
542		void reftable_block_done(struct reftable_block *blockp)
543	0	{
544	0	struct reftable_block_source source = blockp->source;
545	0	if (blockp && source.ops)
546	0	source.ops->return_block(source.arg, blockp);
547	0	blockp->data = NULL;
548	0	blockp->len = 0;
549	0	blockp->source.ops = NULL;
550	0	blockp->source.arg = NULL;
551	0	}