/src/git/reftable/writer.c

Source (jump to first uncovered line)
/*
Copyright 2020 Google LLC

Use of this source code is governed by a BSD-style
license that can be found in the LICENSE file or at
https://developers.google.com/open-source/licenses/bsd
*/

#include "writer.h"

#include "system.h"

#include "block.h"
#include "constants.h"
#include "record.h"
#include "tree.h"
#include "reftable-error.h"

/* finishes a block, and writes it to storage */
static int writer_flush_block(struct reftable_writer *w);

/* deallocates memory related to the index */
static void writer_clear_index(struct reftable_writer *w);

/* finishes writing a 'r' (refs) or 'g' (reflogs) section */
static int writer_finish_public_section(struct reftable_writer *w);

static struct reftable_block_stats *
writer_reftable_block_stats(struct reftable_writer *w, uint8_t typ)
{
  switch (typ) {
  case 'r':
    return &w->stats.ref_stats;
  case 'o':
    return &w->stats.obj_stats;
  case 'i':
    return &w->stats.idx_stats;
  case 'g':
    return &w->stats.log_stats;
  }
  abort();
  return NULL;
}

/* write data, queuing the padding for the next write. Returns negative for
 * error. */
static int padded_write(struct reftable_writer *w, uint8_t *data, size_t len,
      int padding)
{
  int n = 0;
  if (w->pending_padding > 0) {
    uint8_t *zeroed = reftable_calloc(w->pending_padding, sizeof(*zeroed));
    int n = w->write(w->write_arg, zeroed, w->pending_padding);
    if (n < 0)
      return n;

    w->pending_padding = 0;
    reftable_free(zeroed);
  }

  w->pending_padding = padding;
  n = w->write(w->write_arg, data, len);
  if (n < 0)
    return n;
  n += padding;
  return 0;
}

static void options_set_defaults(struct reftable_write_options *opts)
{
  if (opts->restart_interval == 0) {
    opts->restart_interval = 16;
  }

  if (opts->hash_id == 0) {
    opts->hash_id = GIT_SHA1_FORMAT_ID;
  }
  if (opts->block_size == 0) {
    opts->block_size = DEFAULT_BLOCK_SIZE;
  }
}

static int writer_version(struct reftable_writer *w)
{
  return (w->opts.hash_id == 0 || w->opts.hash_id == GIT_SHA1_FORMAT_ID) ?
           1 :
           2;
}

static int writer_write_header(struct reftable_writer *w, uint8_t *dest)
{
  memcpy(dest, "REFT", 4);

  dest[4] = writer_version(w);

  put_be24(dest + 5, w->opts.block_size);
  put_be64(dest + 8, w->min_update_index);
  put_be64(dest + 16, w->max_update_index);
  if (writer_version(w) == 2) {
    put_be32(dest + 24, w->opts.hash_id);
  }
  return header_size(writer_version(w));
}

static void writer_reinit_block_writer(struct reftable_writer *w, uint8_t typ)
{
  int block_start = 0;
  if (w->next == 0) {
    block_start = header_size(writer_version(w));
  }

  strbuf_reset(&w->last_key);
  block_writer_init(&w->block_writer_data, typ, w->block,
        w->opts.block_size, block_start,
        hash_size(w->opts.hash_id));
  w->block_writer = &w->block_writer_data;
  w->block_writer->restart_interval = w->opts.restart_interval;
}

struct reftable_writer *
reftable_new_writer(ssize_t (*writer_func)(void *, const void *, size_t),
        int (*flush_func)(void *),
        void *writer_arg, const struct reftable_write_options *_opts)
{
  struct reftable_writer *wp = reftable_calloc(1, sizeof(*wp));
  struct reftable_write_options opts = {0};

  if (_opts)
    opts = *_opts;
  options_set_defaults(&opts);
  if (opts.block_size >= (1 << 24))
    BUG("configured block size exceeds 16MB");

  strbuf_init(&wp->block_writer_data.last_key, 0);
  strbuf_init(&wp->last_key, 0);
  REFTABLE_CALLOC_ARRAY(wp->block, opts.block_size);
  wp->write = writer_func;
  wp->write_arg = writer_arg;
  wp->opts = opts;
  wp->flush = flush_func;
  writer_reinit_block_writer(wp, BLOCK_TYPE_REF);

  return wp;
}

void reftable_writer_set_limits(struct reftable_writer *w, uint64_t min,
        uint64_t max)
{
  w->min_update_index = min;
  w->max_update_index = max;
}

static void writer_release(struct reftable_writer *w)
{
  if (w) {
    reftable_free(w->block);
    w->block = NULL;
    block_writer_release(&w->block_writer_data);
    w->block_writer = NULL;
    writer_clear_index(w);
    strbuf_release(&w->last_key);
  }
}

void reftable_writer_free(struct reftable_writer *w)
{
  writer_release(w);
  reftable_free(w);
}

struct obj_index_tree_node {
  struct strbuf hash;
  uint64_t *offsets;
  size_t offset_len;
  size_t offset_cap;
};

#define OBJ_INDEX_TREE_NODE_INIT    \
  {                           \
    .hash = STRBUF_INIT \
  }

static int obj_index_tree_node_compare(const void *a, const void *b)
{
  return strbuf_cmp(&((const struct obj_index_tree_node *)a)->hash,
        &((const struct obj_index_tree_node *)b)->hash);
}

static void writer_index_hash(struct reftable_writer *w, struct strbuf *hash)
{
  uint64_t off = w->next;

  struct obj_index_tree_node want = { .hash = *hash };

  struct tree_node *node = tree_search(&want, &w->obj_index_tree,
               &obj_index_tree_node_compare, 0);
  struct obj_index_tree_node *key = NULL;
  if (!node) {
    struct obj_index_tree_node empty = OBJ_INDEX_TREE_NODE_INIT;
    key = reftable_malloc(sizeof(struct obj_index_tree_node));
    *key = empty;

    strbuf_reset(&key->hash);
    strbuf_addbuf(&key->hash, hash);
    tree_search((void *)key, &w->obj_index_tree,
          &obj_index_tree_node_compare, 1);
  } else {
    key = node->key;
  }

  if (key->offset_len > 0 && key->offsets[key->offset_len - 1] == off) {
    return;
  }

  REFTABLE_ALLOC_GROW(key->offsets, key->offset_len + 1, key->offset_cap);
  key->offsets[key->offset_len++] = off;
}

static int writer_add_record(struct reftable_writer *w,
           struct reftable_record *rec)
{
  struct strbuf key = STRBUF_INIT;
  int err;

  reftable_record_key(rec, &key);
  if (strbuf_cmp(&w->last_key, &key) >= 0) {
    err = REFTABLE_API_ERROR;
    goto done;
  }

  strbuf_reset(&w->last_key);
  strbuf_addbuf(&w->last_key, &key);
  if (!w->block_writer)
    writer_reinit_block_writer(w, reftable_record_type(rec));

  if (block_writer_type(w->block_writer) != reftable_record_type(rec))
    BUG("record of type %d added to writer of type %d",
        reftable_record_type(rec), block_writer_type(w->block_writer));

  /*
   * Try to add the record to the writer. If this succeeds then we're
   * done. Otherwise the block writer may have hit the block size limit
   * and needs to be flushed.
   */
  if (!block_writer_add(w->block_writer, rec)) {
    err = 0;
    goto done;
  }

  /*
   * The current block is full, so we need to flush and reinitialize the
   * writer to start writing the next block.
   */
  err = writer_flush_block(w);
  if (err < 0)
    goto done;
  writer_reinit_block_writer(w, reftable_record_type(rec));

  /*
   * Try to add the record to the writer again. If this still fails then
   * the record does not fit into the block size.
   *
   * TODO: it would be great to have `block_writer_add()` return proper
   *       error codes so that we don't have to second-guess the failure
   *       mode here.
   */
  err = block_writer_add(w->block_writer, rec);
  if (err) {
    err = REFTABLE_ENTRY_TOO_BIG_ERROR;
    goto done;
  }

done:
  strbuf_release(&key);
  return err;
}

int reftable_writer_add_ref(struct reftable_writer *w,
          struct reftable_ref_record *ref)
{
  struct reftable_record rec = {
    .type = BLOCK_TYPE_REF,
    .u = {
      .ref = *ref
    },
  };
  int err = 0;

  if (!ref->refname)
    return REFTABLE_API_ERROR;
  if (ref->update_index < w->min_update_index ||
      ref->update_index > w->max_update_index)
    return REFTABLE_API_ERROR;

  rec.u.ref.update_index -= w->min_update_index;

  err = writer_add_record(w, &rec);
  if (err < 0)
    return err;

  if (!w->opts.skip_index_objects && reftable_ref_record_val1(ref)) {
    struct strbuf h = STRBUF_INIT;
    strbuf_add(&h, (char *)reftable_ref_record_val1(ref),
         hash_size(w->opts.hash_id));
    writer_index_hash(w, &h);
    strbuf_release(&h);
  }

  if (!w->opts.skip_index_objects && reftable_ref_record_val2(ref)) {
    struct strbuf h = STRBUF_INIT;
    strbuf_add(&h, reftable_ref_record_val2(ref),
         hash_size(w->opts.hash_id));
    writer_index_hash(w, &h);
    strbuf_release(&h);
  }
  return 0;
}

int reftable_writer_add_refs(struct reftable_writer *w,
           struct reftable_ref_record *refs, int n)
{
  int err = 0;
  int i = 0;
  QSORT(refs, n, reftable_ref_record_compare_name);
  for (i = 0; err == 0 && i < n; i++) {
    err = reftable_writer_add_ref(w, &refs[i]);
  }
  return err;
}

static int reftable_writer_add_log_verbatim(struct reftable_writer *w,
              struct reftable_log_record *log)
{
  struct reftable_record rec = {
    .type = BLOCK_TYPE_LOG,
    .u = {
      .log = *log,
    },
  };
  if (w->block_writer &&
      block_writer_type(w->block_writer) == BLOCK_TYPE_REF) {
    int err = writer_finish_public_section(w);
    if (err < 0)
      return err;
  }

  w->next -= w->pending_padding;
  w->pending_padding = 0;
  return writer_add_record(w, &rec);
}

int reftable_writer_add_log(struct reftable_writer *w,
          struct reftable_log_record *log)
{
  char *input_log_message = NULL;
  struct strbuf cleaned_message = STRBUF_INIT;
  int err = 0;

  if (log->value_type == REFTABLE_LOG_DELETION)
    return reftable_writer_add_log_verbatim(w, log);

  if (!log->refname)
    return REFTABLE_API_ERROR;

  input_log_message = log->value.update.message;
  if (!w->opts.exact_log_message && log->value.update.message) {
    strbuf_addstr(&cleaned_message, log->value.update.message);
    while (cleaned_message.len &&
           cleaned_message.buf[cleaned_message.len - 1] == '\n')
      strbuf_setlen(&cleaned_message,
              cleaned_message.len - 1);
    if (strchr(cleaned_message.buf, '\n')) {
      /* multiple lines not allowed. */
      err = REFTABLE_API_ERROR;
      goto done;
    }
    strbuf_addstr(&cleaned_message, "\n");
    log->value.update.message = cleaned_message.buf;
  }

  err = reftable_writer_add_log_verbatim(w, log);
  log->value.update.message = input_log_message;
done:
  strbuf_release(&cleaned_message);
  return err;
}

int reftable_writer_add_logs(struct reftable_writer *w,
           struct reftable_log_record *logs, int n)
{
  int err = 0;
  int i = 0;
  QSORT(logs, n, reftable_log_record_compare_key);

  for (i = 0; err == 0 && i < n; i++) {
    err = reftable_writer_add_log(w, &logs[i]);
  }
  return err;
}

static int writer_finish_section(struct reftable_writer *w)
{
  struct reftable_block_stats *bstats = NULL;
  uint8_t typ = block_writer_type(w->block_writer);
  uint64_t index_start = 0;
  int max_level = 0;
  size_t threshold = w->opts.unpadded ? 1 : 3;
  int before_blocks = w->stats.idx_stats.blocks;
  int err;

  err = writer_flush_block(w);
  if (err < 0)
    return err;

  /*
   * When the section we are about to index has a lot of blocks then the
   * index itself may span across multiple blocks, as well. This would
   * require a linear scan over index blocks only to find the desired
   * indexed block, which is inefficient. Instead, we write a multi-level
   * index where index records of level N+1 will refer to index blocks of
   * level N. This isn't constant time, either, but at least logarithmic.
   *
   * This loop handles writing this multi-level index. Note that we write
   * the lowest-level index pointing to the indexed blocks first. We then
   * continue writing additional index levels until the current level has
   * less blocks than the threshold so that the highest level will be at
   * the end of the index section.
   *
   * Readers are thus required to start reading the index section from
   * its end, which is why we set `index_start` to the beginning of the
   * last index section.
   */
  while (w->index_len > threshold) {
    struct reftable_index_record *idx = NULL;
    size_t i, idx_len;

    max_level++;
    index_start = w->next;
    writer_reinit_block_writer(w, BLOCK_TYPE_INDEX);

    idx = w->index;
    idx_len = w->index_len;

    w->index = NULL;
    w->index_len = 0;
    w->index_cap = 0;
    for (i = 0; i < idx_len; i++) {
      struct reftable_record rec = {
        .type = BLOCK_TYPE_INDEX,
        .u = {
          .idx = idx[i],
        },
      };

      err = writer_add_record(w, &rec);
      if (err < 0)
        return err;
    }

    err = writer_flush_block(w);
    if (err < 0)
      return err;

    for (i = 0; i < idx_len; i++)
      strbuf_release(&idx[i].last_key);
    reftable_free(idx);
  }

  /*
   * The index may still contain a number of index blocks lower than the
   * threshold. Clear it so that these entries don't leak into the next
   * index section.
   */
  writer_clear_index(w);

  bstats = writer_reftable_block_stats(w, typ);
  bstats->index_blocks = w->stats.idx_stats.blocks - before_blocks;
  bstats->index_offset = index_start;
  bstats->max_index_level = max_level;

  /* Reinit lastKey, as the next section can start with any key. */
  strbuf_reset(&w->last_key);

  return 0;
}

struct common_prefix_arg {
  struct strbuf *last;
  int max;
};

static void update_common(void *void_arg, void *key)
{
  struct common_prefix_arg *arg = void_arg;
  struct obj_index_tree_node *entry = key;
  if (arg->last) {
    int n = common_prefix_size(&entry->hash, arg->last);
    if (n > arg->max) {
      arg->max = n;
    }
  }
  arg->last = &entry->hash;
}

struct write_record_arg {
  struct reftable_writer *w;
  int err;
};

static void write_object_record(void *void_arg, void *key)
{
  struct write_record_arg *arg = void_arg;
  struct obj_index_tree_node *entry = key;
  struct reftable_record
    rec = { .type = BLOCK_TYPE_OBJ,
      .u.obj = {
        .hash_prefix = (uint8_t *)entry->hash.buf,
        .hash_prefix_len = arg->w->stats.object_id_len,
        .offsets = entry->offsets,
        .offset_len = entry->offset_len,
      } };
  if (arg->err < 0)
    goto done;

  arg->err = block_writer_add(arg->w->block_writer, &rec);
  if (arg->err == 0)
    goto done;

  arg->err = writer_flush_block(arg->w);
  if (arg->err < 0)
    goto done;

  writer_reinit_block_writer(arg->w, BLOCK_TYPE_OBJ);
  arg->err = block_writer_add(arg->w->block_writer, &rec);
  if (arg->err == 0)
    goto done;

  rec.u.obj.offset_len = 0;
  arg->err = block_writer_add(arg->w->block_writer, &rec);

  /* Should be able to write into a fresh block. */
  assert(arg->err == 0);

done:;
}

static void object_record_free(void *void_arg UNUSED, void *key)
{
  struct obj_index_tree_node *entry = key;

  FREE_AND_NULL(entry->offsets);
  strbuf_release(&entry->hash);
  reftable_free(entry);
}

static int writer_dump_object_index(struct reftable_writer *w)
{
  struct write_record_arg closure = { .w = w };
  struct common_prefix_arg common = {
    .max = 1,   /* obj_id_len should be >= 2. */
  };
  if (w->obj_index_tree) {
    infix_walk(w->obj_index_tree, &update_common, &common);
  }
  w->stats.object_id_len = common.max + 1;

  writer_reinit_block_writer(w, BLOCK_TYPE_OBJ);

  if (w->obj_index_tree) {
    infix_walk(w->obj_index_tree, &write_object_record, &closure);
  }

  if (closure.err < 0)
    return closure.err;
  return writer_finish_section(w);
}

static int writer_finish_public_section(struct reftable_writer *w)
{
  uint8_t typ = 0;
  int err = 0;

  if (!w->block_writer)
    return 0;

  typ = block_writer_type(w->block_writer);
  err = writer_finish_section(w);
  if (err < 0)
    return err;
  if (typ == BLOCK_TYPE_REF && !w->opts.skip_index_objects &&
      w->stats.ref_stats.index_blocks > 0) {
    err = writer_dump_object_index(w);
    if (err < 0)
      return err;
  }

  if (w->obj_index_tree) {
    infix_walk(w->obj_index_tree, &object_record_free, NULL);
    tree_free(w->obj_index_tree);
    w->obj_index_tree = NULL;
  }

  w->block_writer = NULL;
  return 0;
}

int reftable_writer_close(struct reftable_writer *w)
{
  uint8_t footer[72];
  uint8_t *p = footer;
  int err = writer_finish_public_section(w);
  int empty_table = w->next == 0;
  if (err != 0)
    goto done;
  w->pending_padding = 0;
  if (empty_table) {
    /* Empty tables need a header anyway. */
    uint8_t header[28];
    int n = writer_write_header(w, header);
    err = padded_write(w, header, n, 0);
    if (err < 0)
      goto done;
  }

  p += writer_write_header(w, footer);
  put_be64(p, w->stats.ref_stats.index_offset);
  p += 8;
  put_be64(p, (w->stats.obj_stats.offset) << 5 | w->stats.object_id_len);
  p += 8;
  put_be64(p, w->stats.obj_stats.index_offset);
  p += 8;

  put_be64(p, w->stats.log_stats.offset);
  p += 8;
  put_be64(p, w->stats.log_stats.index_offset);
  p += 8;

  put_be32(p, crc32(0, footer, p - footer));
  p += 4;

  err = w->flush(w->write_arg);
  if (err < 0) {
    err = REFTABLE_IO_ERROR;
    goto done;
  }

  err = padded_write(w, footer, footer_size(writer_version(w)), 0);
  if (err < 0)
    goto done;

  if (empty_table) {
    err = REFTABLE_EMPTY_TABLE_ERROR;
    goto done;
  }

done:
  writer_release(w);
  return err;
}

static void writer_clear_index(struct reftable_writer *w)
{
  for (size_t i = 0; w->index && i < w->index_len; i++)
    strbuf_release(&w->index[i].last_key);
  FREE_AND_NULL(w->index);
  w->index_len = 0;
  w->index_cap = 0;
}

static int writer_flush_nonempty_block(struct reftable_writer *w)
{
  struct reftable_index_record index_record = {
    .last_key = STRBUF_INIT,
  };
  uint8_t typ = block_writer_type(w->block_writer);
  struct reftable_block_stats *bstats;
  int raw_bytes, padding = 0, err;
  uint64_t block_typ_off;

  /*
   * Finish the current block. This will cause the block writer to emit
   * restart points and potentially compress records in case we are
   * writing a log block.
   *
   * Note that this is still happening in memory.
   */
  raw_bytes = block_writer_finish(w->block_writer);
  if (raw_bytes < 0)
    return raw_bytes;

  /*
   * By default, all records except for log records are padded to the
   * block size.
   */
  if (!w->opts.unpadded && typ != BLOCK_TYPE_LOG)
    padding = w->opts.block_size - raw_bytes;

  bstats = writer_reftable_block_stats(w, typ);
  block_typ_off = (bstats->blocks == 0) ? w->next : 0;
  if (block_typ_off > 0)
    bstats->offset = block_typ_off;
  bstats->entries += w->block_writer->entries;
  bstats->restarts += w->block_writer->restart_len;
  bstats->blocks++;
  w->stats.blocks++;

  /*
   * If this is the first block we're writing to the table then we need
   * to also write the reftable header.
   */
  if (!w->next)
    writer_write_header(w, w->block);

  err = padded_write(w, w->block, raw_bytes, padding);
  if (err < 0)
    return err;

  /*
   * Add an index record for every block that we're writing. If we end up
   * having more than a threshold of index records we will end up writing
   * an index section in `writer_finish_section()`. Each index record
   * contains the last record key of the block it is indexing as well as
   * the offset of that block.
   *
   * Note that this also applies when flushing index blocks, in which
   * case we will end up with a multi-level index.
   */
  REFTABLE_ALLOC_GROW(w->index, w->index_len + 1, w->index_cap);
  index_record.offset = w->next;
  strbuf_reset(&index_record.last_key);
  strbuf_addbuf(&index_record.last_key, &w->block_writer->last_key);
  w->index[w->index_len] = index_record;
  w->index_len++;

  w->next += padding + raw_bytes;
  w->block_writer = NULL;

  return 0;
}

static int writer_flush_block(struct reftable_writer *w)
{
  if (!w->block_writer)
    return 0;
  if (w->block_writer->entries == 0)
    return 0;
  return writer_flush_nonempty_block(w);
}

const struct reftable_stats *reftable_writer_stats(struct reftable_writer *w)
{
  return &w->stats;
}

Coverage Report

Created: 2024-09-08 06:23

Line	Count	Source (jump to first uncovered line)
1		/*
2		Copyright 2020 Google LLC
3
4		Use of this source code is governed by a BSD-style
5		license that can be found in the LICENSE file or at
6		https://developers.google.com/open-source/licenses/bsd
7		*/
8
9		#include "writer.h"
10
11		#include "system.h"
12
13		#include "block.h"
14		#include "constants.h"
15		#include "record.h"
16		#include "tree.h"
17		#include "reftable-error.h"
18
19		/* finishes a block, and writes it to storage */
20		static int writer_flush_block(struct reftable_writer *w);
21
22		/* deallocates memory related to the index */
23		static void writer_clear_index(struct reftable_writer *w);
24
25		/* finishes writing a 'r' (refs) or 'g' (reflogs) section */
26		static int writer_finish_public_section(struct reftable_writer *w);
27
28		static struct reftable_block_stats *
29		writer_reftable_block_stats(struct reftable_writer *w, uint8_t typ)
30	0	{
31	0	switch (typ) {
32	0	case 'r':
33	0	return &w->stats.ref_stats;
34	0	case 'o':
35	0	return &w->stats.obj_stats;
36	0	case 'i':
37	0	return &w->stats.idx_stats;
38	0	case 'g':
39	0	return &w->stats.log_stats;
40	0	}
41	0	abort();
42	0	return NULL;
43	0	}
44
45		/* write data, queuing the padding for the next write. Returns negative for
46		* error. */
47		static int padded_write(struct reftable_writer w, uint8_t data, size_t len,
48		int padding)
49	0	{
50	0	int n = 0;
51	0	if (w->pending_padding > 0) {
52	0	uint8_t zeroed = reftable_calloc(w->pending_padding, sizeof(zeroed));
53	0	int n = w->write(w->write_arg, zeroed, w->pending_padding);
54	0	if (n < 0)
55	0	return n;
56
57	0	w->pending_padding = 0;
58	0	reftable_free(zeroed);
59	0	}
60
61	0	w->pending_padding = padding;
62	0	n = w->write(w->write_arg, data, len);
63	0	if (n < 0)
64	0	return n;
65	0	n += padding;
66	0	return 0;
67	0	}
68
69		static void options_set_defaults(struct reftable_write_options *opts)
70	0	{
71	0	if (opts->restart_interval == 0) {
72	0	opts->restart_interval = 16;
73	0	}
74
75	0	if (opts->hash_id == 0) {
76	0	opts->hash_id = GIT_SHA1_FORMAT_ID;
77	0	}
78	0	if (opts->block_size == 0) {
79	0	opts->block_size = DEFAULT_BLOCK_SIZE;
80	0	}
81	0	}
82
83		static int writer_version(struct reftable_writer *w)
84	0	{
85	0	return (w->opts.hash_id == 0 \|\| w->opts.hash_id == GIT_SHA1_FORMAT_ID) ?
86	0	1 :
87	0	2;
88	0	}
89
90		static int writer_write_header(struct reftable_writer w, uint8_t dest)
91	0	{
92	0	memcpy(dest, "REFT", 4);
93
94	0	dest[4] = writer_version(w);
95
96	0	put_be24(dest + 5, w->opts.block_size);
97	0	put_be64(dest + 8, w->min_update_index);
98	0	put_be64(dest + 16, w->max_update_index);
99	0	if (writer_version(w) == 2) {
100	0	put_be32(dest + 24, w->opts.hash_id);
101	0	}
102	0	return header_size(writer_version(w));
103	0	}
104
105		static void writer_reinit_block_writer(struct reftable_writer *w, uint8_t typ)
106	0	{
107	0	int block_start = 0;
108	0	if (w->next == 0) {
109	0	block_start = header_size(writer_version(w));
110	0	}
111
112	0	strbuf_reset(&w->last_key);
113	0	block_writer_init(&w->block_writer_data, typ, w->block,
114	0	w->opts.block_size, block_start,
115	0	hash_size(w->opts.hash_id));
116	0	w->block_writer = &w->block_writer_data;
117	0	w->block_writer->restart_interval = w->opts.restart_interval;
118	0	}
119
120		struct reftable_writer *
121		reftable_new_writer(ssize_t (writer_func)(void , const void *, size_t),
122		int (flush_func)(void ),
123		void writer_arg, const struct reftable_write_options _opts)
124	0	{
125	0	struct reftable_writer wp = reftable_calloc(1, sizeof(wp));
126	0	struct reftable_write_options opts = {0};
127
128	0	if (_opts)
129	0	opts = *_opts;
130	0	options_set_defaults(&opts);
131	0	if (opts.block_size >= (1 << 24))
132	0	BUG("configured block size exceeds 16MB");
133
134	0	strbuf_init(&wp->block_writer_data.last_key, 0);
135	0	strbuf_init(&wp->last_key, 0);
136	0	REFTABLE_CALLOC_ARRAY(wp->block, opts.block_size);
137	0	wp->write = writer_func;
138	0	wp->write_arg = writer_arg;
139	0	wp->opts = opts;
140	0	wp->flush = flush_func;
141	0	writer_reinit_block_writer(wp, BLOCK_TYPE_REF);
142
143	0	return wp;
144	0	}
145
146		void reftable_writer_set_limits(struct reftable_writer *w, uint64_t min,
147		uint64_t max)
148	0	{
149	0	w->min_update_index = min;
150	0	w->max_update_index = max;
151	0	}
152
153		static void writer_release(struct reftable_writer *w)
154	0	{
155	0	if (w) {
156	0	reftable_free(w->block);
157	0	w->block = NULL;
158	0	block_writer_release(&w->block_writer_data);
159	0	w->block_writer = NULL;
160	0	writer_clear_index(w);
161	0	strbuf_release(&w->last_key);
162	0	}
163	0	}
164
165		void reftable_writer_free(struct reftable_writer *w)
166	0	{
167	0	writer_release(w);
168	0	reftable_free(w);
169	0	}
170
171		struct obj_index_tree_node {
172		struct strbuf hash;
173		uint64_t *offsets;
174		size_t offset_len;
175		size_t offset_cap;
176		};
177
178		#define OBJ_INDEX_TREE_NODE_INIT \
179	0	{ \
180	0	.hash = STRBUF_INIT \
181	0	}
182
183		static int obj_index_tree_node_compare(const void a, const void b)
184	0	{
185	0	return strbuf_cmp(&((const struct obj_index_tree_node *)a)->hash,
186	0	&((const struct obj_index_tree_node *)b)->hash);
187	0	}
188
189		static void writer_index_hash(struct reftable_writer w, struct strbuf hash)
190	0	{
191	0	uint64_t off = w->next;
192
193	0	struct obj_index_tree_node want = { .hash = *hash };
194
195	0	struct tree_node *node = tree_search(&want, &w->obj_index_tree,
196	0	&obj_index_tree_node_compare, 0);
197	0	struct obj_index_tree_node *key = NULL;
198	0	if (!node) {
199	0	struct obj_index_tree_node empty = OBJ_INDEX_TREE_NODE_INIT;
200	0	key = reftable_malloc(sizeof(struct obj_index_tree_node));
201	0	*key = empty;
202
203	0	strbuf_reset(&key->hash);
204	0	strbuf_addbuf(&key->hash, hash);
205	0	tree_search((void *)key, &w->obj_index_tree,
206	0	&obj_index_tree_node_compare, 1);
207	0	} else {
208	0	key = node->key;
209	0	}
210
211	0	if (key->offset_len > 0 && key->offsets[key->offset_len - 1] == off) {
212	0	return;
213	0	}
214
215	0	REFTABLE_ALLOC_GROW(key->offsets, key->offset_len + 1, key->offset_cap);
216	0	key->offsets[key->offset_len++] = off;
217	0	}
218
219		static int writer_add_record(struct reftable_writer *w,
220		struct reftable_record *rec)
221	0	{
222	0	struct strbuf key = STRBUF_INIT;
223	0	int err;
224
225	0	reftable_record_key(rec, &key);
226	0	if (strbuf_cmp(&w->last_key, &key) >= 0) {
227	0	err = REFTABLE_API_ERROR;
228	0	goto done;
229	0	}
230
231	0	strbuf_reset(&w->last_key);
232	0	strbuf_addbuf(&w->last_key, &key);
233	0	if (!w->block_writer)
234	0	writer_reinit_block_writer(w, reftable_record_type(rec));
235
236	0	if (block_writer_type(w->block_writer) != reftable_record_type(rec))
237	0	BUG("record of type %d added to writer of type %d",
238	0	reftable_record_type(rec), block_writer_type(w->block_writer));
239
240		/*
241		* Try to add the record to the writer. If this succeeds then we're
242		* done. Otherwise the block writer may have hit the block size limit
243		* and needs to be flushed.
244		*/
245	0	if (!block_writer_add(w->block_writer, rec)) {
246	0	err = 0;
247	0	goto done;
248	0	}
249
250		/*
251		* The current block is full, so we need to flush and reinitialize the
252		* writer to start writing the next block.
253		*/
254	0	err = writer_flush_block(w);
255	0	if (err < 0)
256	0	goto done;
257	0	writer_reinit_block_writer(w, reftable_record_type(rec));
258
259		/*
260		* Try to add the record to the writer again. If this still fails then
261		* the record does not fit into the block size.
262		*
263		* TODO: it would be great to have `block_writer_add()` return proper
264		* error codes so that we don't have to second-guess the failure
265		* mode here.
266		*/
267	0	err = block_writer_add(w->block_writer, rec);
268	0	if (err) {
269	0	err = REFTABLE_ENTRY_TOO_BIG_ERROR;
270	0	goto done;
271	0	}
272
273	0	done:
274	0	strbuf_release(&key);
275	0	return err;
276	0	}
277
278		int reftable_writer_add_ref(struct reftable_writer *w,
279		struct reftable_ref_record *ref)
280	0	{
281	0	struct reftable_record rec = {
282	0	.type = BLOCK_TYPE_REF,
283	0	.u = {
284	0	.ref = *ref
285	0	},
286	0	};
287	0	int err = 0;
288
289	0	if (!ref->refname)
290	0	return REFTABLE_API_ERROR;
291	0	if (ref->update_index < w->min_update_index \|\|
292	0	ref->update_index > w->max_update_index)
293	0	return REFTABLE_API_ERROR;
294
295	0	rec.u.ref.update_index -= w->min_update_index;
296
297	0	err = writer_add_record(w, &rec);
298	0	if (err < 0)
299	0	return err;
300
301	0	if (!w->opts.skip_index_objects && reftable_ref_record_val1(ref)) {
302	0	struct strbuf h = STRBUF_INIT;
303	0	strbuf_add(&h, (char *)reftable_ref_record_val1(ref),
304	0	hash_size(w->opts.hash_id));
305	0	writer_index_hash(w, &h);
306	0	strbuf_release(&h);
307	0	}
308
309	0	if (!w->opts.skip_index_objects && reftable_ref_record_val2(ref)) {
310	0	struct strbuf h = STRBUF_INIT;
311	0	strbuf_add(&h, reftable_ref_record_val2(ref),
312	0	hash_size(w->opts.hash_id));
313	0	writer_index_hash(w, &h);
314	0	strbuf_release(&h);
315	0	}
316	0	return 0;
317	0	}
318
319		int reftable_writer_add_refs(struct reftable_writer *w,
320		struct reftable_ref_record *refs, int n)
321	0	{
322	0	int err = 0;
323	0	int i = 0;
324	0	QSORT(refs, n, reftable_ref_record_compare_name);
325	0	for (i = 0; err == 0 && i < n; i++) {
326	0	err = reftable_writer_add_ref(w, &refs[i]);
327	0	}
328	0	return err;
329	0	}
330
331		static int reftable_writer_add_log_verbatim(struct reftable_writer *w,
332		struct reftable_log_record *log)
333	0	{
334	0	struct reftable_record rec = {
335	0	.type = BLOCK_TYPE_LOG,
336	0	.u = {
337	0	.log = *log,
338	0	},
339	0	};
340	0	if (w->block_writer &&
341	0	block_writer_type(w->block_writer) == BLOCK_TYPE_REF) {
342	0	int err = writer_finish_public_section(w);
343	0	if (err < 0)
344	0	return err;
345	0	}
346
347	0	w->next -= w->pending_padding;
348	0	w->pending_padding = 0;
349	0	return writer_add_record(w, &rec);
350	0	}
351
352		int reftable_writer_add_log(struct reftable_writer *w,
353		struct reftable_log_record *log)
354	0	{
355	0	char *input_log_message = NULL;
356	0	struct strbuf cleaned_message = STRBUF_INIT;
357	0	int err = 0;
358
359	0	if (log->value_type == REFTABLE_LOG_DELETION)
360	0	return reftable_writer_add_log_verbatim(w, log);
361
362	0	if (!log->refname)
363	0	return REFTABLE_API_ERROR;
364
365	0	input_log_message = log->value.update.message;
366	0	if (!w->opts.exact_log_message && log->value.update.message) {
367	0	strbuf_addstr(&cleaned_message, log->value.update.message);
368	0	while (cleaned_message.len &&
369	0	cleaned_message.buf[cleaned_message.len - 1] == '\n')
370	0	strbuf_setlen(&cleaned_message,
371	0	cleaned_message.len - 1);
372	0	if (strchr(cleaned_message.buf, '\n')) {
373		/* multiple lines not allowed. */
374	0	err = REFTABLE_API_ERROR;
375	0	goto done;
376	0	}
377	0	strbuf_addstr(&cleaned_message, "\n");
378	0	log->value.update.message = cleaned_message.buf;
379	0	}
380
381	0	err = reftable_writer_add_log_verbatim(w, log);
382	0	log->value.update.message = input_log_message;
383	0	done:
384	0	strbuf_release(&cleaned_message);
385	0	return err;
386	0	}
387
388		int reftable_writer_add_logs(struct reftable_writer *w,
389		struct reftable_log_record *logs, int n)
390	0	{
391	0	int err = 0;
392	0	int i = 0;
393	0	QSORT(logs, n, reftable_log_record_compare_key);
394
395	0	for (i = 0; err == 0 && i < n; i++) {
396	0	err = reftable_writer_add_log(w, &logs[i]);
397	0	}
398	0	return err;
399	0	}
400
401		static int writer_finish_section(struct reftable_writer *w)
402	0	{
403	0	struct reftable_block_stats *bstats = NULL;
404	0	uint8_t typ = block_writer_type(w->block_writer);
405	0	uint64_t index_start = 0;
406	0	int max_level = 0;
407	0	size_t threshold = w->opts.unpadded ? 1 : 3;
408	0	int before_blocks = w->stats.idx_stats.blocks;
409	0	int err;
410
411	0	err = writer_flush_block(w);
412	0	if (err < 0)
413	0	return err;
414
415		/*
416		* When the section we are about to index has a lot of blocks then the
417		* index itself may span across multiple blocks, as well. This would
418		* require a linear scan over index blocks only to find the desired
419		* indexed block, which is inefficient. Instead, we write a multi-level
420		* index where index records of level N+1 will refer to index blocks of
421		* level N. This isn't constant time, either, but at least logarithmic.
422		*
423		* This loop handles writing this multi-level index. Note that we write
424		* the lowest-level index pointing to the indexed blocks first. We then
425		* continue writing additional index levels until the current level has
426		* less blocks than the threshold so that the highest level will be at
427		* the end of the index section.
428		*
429		* Readers are thus required to start reading the index section from
430		* its end, which is why we set `index_start` to the beginning of the
431		* last index section.
432		*/
433	0	while (w->index_len > threshold) {
434	0	struct reftable_index_record *idx = NULL;
435	0	size_t i, idx_len;
436
437	0	max_level++;
438	0	index_start = w->next;
439	0	writer_reinit_block_writer(w, BLOCK_TYPE_INDEX);
440
441	0	idx = w->index;
442	0	idx_len = w->index_len;
443
444	0	w->index = NULL;
445	0	w->index_len = 0;
446	0	w->index_cap = 0;
447	0	for (i = 0; i < idx_len; i++) {
448	0	struct reftable_record rec = {
449	0	.type = BLOCK_TYPE_INDEX,
450	0	.u = {
451	0	.idx = idx[i],
452	0	},
453	0	};
454
455	0	err = writer_add_record(w, &rec);
456	0	if (err < 0)
457	0	return err;
458	0	}
459
460	0	err = writer_flush_block(w);
461	0	if (err < 0)
462	0	return err;
463
464	0	for (i = 0; i < idx_len; i++)
465	0	strbuf_release(&idx[i].last_key);
466	0	reftable_free(idx);
467	0	}
468
469		/*
470		* The index may still contain a number of index blocks lower than the
471		* threshold. Clear it so that these entries don't leak into the next
472		* index section.
473		*/
474	0	writer_clear_index(w);
475
476	0	bstats = writer_reftable_block_stats(w, typ);
477	0	bstats->index_blocks = w->stats.idx_stats.blocks - before_blocks;
478	0	bstats->index_offset = index_start;
479	0	bstats->max_index_level = max_level;
480
481		/* Reinit lastKey, as the next section can start with any key. */
482	0	strbuf_reset(&w->last_key);
483
484	0	return 0;
485	0	}
486
487		struct common_prefix_arg {
488		struct strbuf *last;
489		int max;
490		};
491
492		static void update_common(void void_arg, void key)
493	0	{
494	0	struct common_prefix_arg *arg = void_arg;
495	0	struct obj_index_tree_node *entry = key;
496	0	if (arg->last) {
497	0	int n = common_prefix_size(&entry->hash, arg->last);
498	0	if (n > arg->max) {
499	0	arg->max = n;
500	0	}
501	0	}
502	0	arg->last = &entry->hash;
503	0	}
504
505		struct write_record_arg {
506		struct reftable_writer *w;
507		int err;
508		};
509
510		static void write_object_record(void void_arg, void key)
511	0	{
512	0	struct write_record_arg *arg = void_arg;
513	0	struct obj_index_tree_node *entry = key;
514	0	struct reftable_record
515	0	rec = { .type = BLOCK_TYPE_OBJ,
516	0	.u.obj = {
517	0	.hash_prefix = (uint8_t *)entry->hash.buf,
518	0	.hash_prefix_len = arg->w->stats.object_id_len,
519	0	.offsets = entry->offsets,
520	0	.offset_len = entry->offset_len,
521	0	} };
522	0	if (arg->err < 0)
523	0	goto done;
524
525	0	arg->err = block_writer_add(arg->w->block_writer, &rec);
526	0	if (arg->err == 0)
527	0	goto done;
528
529	0	arg->err = writer_flush_block(arg->w);
530	0	if (arg->err < 0)
531	0	goto done;
532
533	0	writer_reinit_block_writer(arg->w, BLOCK_TYPE_OBJ);
534	0	arg->err = block_writer_add(arg->w->block_writer, &rec);
535	0	if (arg->err == 0)
536	0	goto done;
537
538	0	rec.u.obj.offset_len = 0;
539	0	arg->err = block_writer_add(arg->w->block_writer, &rec);
540
541		/* Should be able to write into a fresh block. */
542	0	assert(arg->err == 0);
543
544	0	done:;
545	0	}
546
547		static void object_record_free(void void_arg UNUSED, void key)
548	0	{
549	0	struct obj_index_tree_node *entry = key;
550
551	0	FREE_AND_NULL(entry->offsets);
552	0	strbuf_release(&entry->hash);
553	0	reftable_free(entry);
554	0	}
555
556		static int writer_dump_object_index(struct reftable_writer *w)
557	0	{
558	0	struct write_record_arg closure = { .w = w };
559	0	struct common_prefix_arg common = {
560	0	.max = 1, /* obj_id_len should be >= 2. */
561	0	};
562	0	if (w->obj_index_tree) {
563	0	infix_walk(w->obj_index_tree, &update_common, &common);
564	0	}
565	0	w->stats.object_id_len = common.max + 1;
566
567	0	writer_reinit_block_writer(w, BLOCK_TYPE_OBJ);
568
569	0	if (w->obj_index_tree) {
570	0	infix_walk(w->obj_index_tree, &write_object_record, &closure);
571	0	}
572
573	0	if (closure.err < 0)
574	0	return closure.err;
575	0	return writer_finish_section(w);
576	0	}
577
578		static int writer_finish_public_section(struct reftable_writer *w)
579	0	{
580	0	uint8_t typ = 0;
581	0	int err = 0;
582
583	0	if (!w->block_writer)
584	0	return 0;
585
586	0	typ = block_writer_type(w->block_writer);
587	0	err = writer_finish_section(w);
588	0	if (err < 0)
589	0	return err;
590	0	if (typ == BLOCK_TYPE_REF && !w->opts.skip_index_objects &&
591	0	w->stats.ref_stats.index_blocks > 0) {
592	0	err = writer_dump_object_index(w);
593	0	if (err < 0)
594	0	return err;
595	0	}
596
597	0	if (w->obj_index_tree) {
598	0	infix_walk(w->obj_index_tree, &object_record_free, NULL);
599	0	tree_free(w->obj_index_tree);
600	0	w->obj_index_tree = NULL;
601	0	}
602
603	0	w->block_writer = NULL;
604	0	return 0;
605	0	}
606
607		int reftable_writer_close(struct reftable_writer *w)
608	0	{
609	0	uint8_t footer[72];
610	0	uint8_t *p = footer;
611	0	int err = writer_finish_public_section(w);
612	0	int empty_table = w->next == 0;
613	0	if (err != 0)
614	0	goto done;
615	0	w->pending_padding = 0;
616	0	if (empty_table) {
617		/* Empty tables need a header anyway. */
618	0	uint8_t header[28];
619	0	int n = writer_write_header(w, header);
620	0	err = padded_write(w, header, n, 0);
621	0	if (err < 0)
622	0	goto done;
623	0	}
624
625	0	p += writer_write_header(w, footer);
626	0	put_be64(p, w->stats.ref_stats.index_offset);
627	0	p += 8;
628	0	put_be64(p, (w->stats.obj_stats.offset) << 5 \| w->stats.object_id_len);
629	0	p += 8;
630	0	put_be64(p, w->stats.obj_stats.index_offset);
631	0	p += 8;
632
633	0	put_be64(p, w->stats.log_stats.offset);
634	0	p += 8;
635	0	put_be64(p, w->stats.log_stats.index_offset);
636	0	p += 8;
637
638	0	put_be32(p, crc32(0, footer, p - footer));
639	0	p += 4;
640
641	0	err = w->flush(w->write_arg);
642	0	if (err < 0) {
643	0	err = REFTABLE_IO_ERROR;
644	0	goto done;
645	0	}
646
647	0	err = padded_write(w, footer, footer_size(writer_version(w)), 0);
648	0	if (err < 0)
649	0	goto done;
650
651	0	if (empty_table) {
652	0	err = REFTABLE_EMPTY_TABLE_ERROR;
653	0	goto done;
654	0	}
655
656	0	done:
657	0	writer_release(w);
658	0	return err;
659	0	}
660
661		static void writer_clear_index(struct reftable_writer *w)
662	0	{
663	0	for (size_t i = 0; w->index && i < w->index_len; i++)
664	0	strbuf_release(&w->index[i].last_key);
665	0	FREE_AND_NULL(w->index);
666	0	w->index_len = 0;
667	0	w->index_cap = 0;
668	0	}
669
670		static int writer_flush_nonempty_block(struct reftable_writer *w)
671	0	{
672	0	struct reftable_index_record index_record = {
673	0	.last_key = STRBUF_INIT,
674	0	};
675	0	uint8_t typ = block_writer_type(w->block_writer);
676	0	struct reftable_block_stats *bstats;
677	0	int raw_bytes, padding = 0, err;
678	0	uint64_t block_typ_off;
679
680		/*
681		* Finish the current block. This will cause the block writer to emit
682		* restart points and potentially compress records in case we are
683		* writing a log block.
684		*
685		* Note that this is still happening in memory.
686		*/
687	0	raw_bytes = block_writer_finish(w->block_writer);
688	0	if (raw_bytes < 0)
689	0	return raw_bytes;
690
691		/*
692		* By default, all records except for log records are padded to the
693		* block size.
694		*/
695	0	if (!w->opts.unpadded && typ != BLOCK_TYPE_LOG)
696	0	padding = w->opts.block_size - raw_bytes;
697
698	0	bstats = writer_reftable_block_stats(w, typ);
699	0	block_typ_off = (bstats->blocks == 0) ? w->next : 0;
700	0	if (block_typ_off > 0)
701	0	bstats->offset = block_typ_off;
702	0	bstats->entries += w->block_writer->entries;
703	0	bstats->restarts += w->block_writer->restart_len;
704	0	bstats->blocks++;
705	0	w->stats.blocks++;
706
707		/*
708		* If this is the first block we're writing to the table then we need
709		* to also write the reftable header.
710		*/
711	0	if (!w->next)
712	0	writer_write_header(w, w->block);
713
714	0	err = padded_write(w, w->block, raw_bytes, padding);
715	0	if (err < 0)
716	0	return err;
717
718		/*
719		* Add an index record for every block that we're writing. If we end up
720		* having more than a threshold of index records we will end up writing
721		* an index section in `writer_finish_section()`. Each index record
722		* contains the last record key of the block it is indexing as well as
723		* the offset of that block.
724		*
725		* Note that this also applies when flushing index blocks, in which
726		* case we will end up with a multi-level index.
727		*/
728	0	REFTABLE_ALLOC_GROW(w->index, w->index_len + 1, w->index_cap);
729	0	index_record.offset = w->next;
730	0	strbuf_reset(&index_record.last_key);
731	0	strbuf_addbuf(&index_record.last_key, &w->block_writer->last_key);
732	0	w->index[w->index_len] = index_record;
733	0	w->index_len++;
734
735	0	w->next += padding + raw_bytes;
736	0	w->block_writer = NULL;
737
738	0	return 0;
739	0	}
740
741		static int writer_flush_block(struct reftable_writer *w)
742	0	{
743	0	if (!w->block_writer)
744	0	return 0;
745	0	if (w->block_writer->entries == 0)
746	0	return 0;
747	0	return writer_flush_nonempty_block(w);
748	0	}
749
750		const struct reftable_stats reftable_writer_stats(struct reftable_writer w)
751	0	{
752	0	return &w->stats;
753	0	}