Coverage Report

Created: 2024-09-08 06:24

/src/git/reftable/merged.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
Copyright 2020 Google LLC
3
4
Use of this source code is governed by a BSD-style
5
license that can be found in the LICENSE file or at
6
https://developers.google.com/open-source/licenses/bsd
7
*/
8
9
#include "merged.h"
10
11
#include "constants.h"
12
#include "iter.h"
13
#include "pq.h"
14
#include "reader.h"
15
#include "record.h"
16
#include "reftable-merged.h"
17
#include "reftable-error.h"
18
#include "system.h"
19
20
struct merged_subiter {
21
  struct reftable_iterator iter;
22
  struct reftable_record rec;
23
};
24
25
struct merged_iter {
26
  struct merged_subiter *subiters;
27
  struct merged_iter_pqueue pq;
28
  size_t subiters_len;
29
  int suppress_deletions;
30
  ssize_t advance_index;
31
};
32
33
static void merged_iter_init(struct merged_iter *mi,
34
           struct reftable_merged_table *mt,
35
           uint8_t typ)
36
0
{
37
0
  memset(mi, 0, sizeof(*mi));
38
0
  mi->advance_index = -1;
39
0
  mi->suppress_deletions = mt->suppress_deletions;
40
41
0
  REFTABLE_CALLOC_ARRAY(mi->subiters, mt->readers_len);
42
0
  for (size_t i = 0; i < mt->readers_len; i++) {
43
0
    reftable_record_init(&mi->subiters[i].rec, typ);
44
0
    reader_init_iter(mt->readers[i], &mi->subiters[i].iter, typ);
45
0
  }
46
0
  mi->subiters_len = mt->readers_len;
47
0
}
48
49
static void merged_iter_close(void *p)
50
0
{
51
0
  struct merged_iter *mi = p;
52
53
0
  merged_iter_pqueue_release(&mi->pq);
54
0
  for (size_t i = 0; i < mi->subiters_len; i++) {
55
0
    reftable_iterator_destroy(&mi->subiters[i].iter);
56
0
    reftable_record_release(&mi->subiters[i].rec);
57
0
  }
58
0
  reftable_free(mi->subiters);
59
0
}
60
61
static int merged_iter_advance_subiter(struct merged_iter *mi, size_t idx)
62
0
{
63
0
  struct pq_entry e = {
64
0
    .index = idx,
65
0
    .rec = &mi->subiters[idx].rec,
66
0
  };
67
0
  int err;
68
69
0
  err = iterator_next(&mi->subiters[idx].iter, &mi->subiters[idx].rec);
70
0
  if (err)
71
0
    return err;
72
73
0
  merged_iter_pqueue_add(&mi->pq, &e);
74
0
  return 0;
75
0
}
76
77
static int merged_iter_seek(struct merged_iter *mi, struct reftable_record *want)
78
0
{
79
0
  int err;
80
81
0
  mi->advance_index = -1;
82
83
0
  for (size_t i = 0; i < mi->subiters_len; i++) {
84
0
    err = iterator_seek(&mi->subiters[i].iter, want);
85
0
    if (err < 0)
86
0
      return err;
87
0
    if (err > 0)
88
0
      continue;
89
90
0
    err = merged_iter_advance_subiter(mi, i);
91
0
    if (err < 0)
92
0
      return err;
93
0
  }
94
95
0
  return 0;
96
0
}
97
98
static int merged_iter_next_entry(struct merged_iter *mi,
99
          struct reftable_record *rec)
100
0
{
101
0
  struct pq_entry entry = { 0 };
102
0
  int err = 0, empty;
103
104
0
  empty = merged_iter_pqueue_is_empty(mi->pq);
105
106
0
  if (mi->advance_index >= 0) {
107
    /*
108
     * When there are no pqueue entries then we only have a single
109
     * subiter left. There is no need to use the pqueue in that
110
     * case anymore as we know that the subiter will return entries
111
     * in the correct order already.
112
     *
113
     * While this may sound like a very specific edge case, it may
114
     * happen more frequently than you think. Most repositories
115
     * will end up having a single large base table that contains
116
     * most of the refs. It's thus likely that we exhaust all
117
     * subiters but the one from that base ref.
118
     */
119
0
    if (empty)
120
0
      return iterator_next(&mi->subiters[mi->advance_index].iter,
121
0
               rec);
122
123
0
    err = merged_iter_advance_subiter(mi, mi->advance_index);
124
0
    if (err < 0)
125
0
      return err;
126
0
    if (!err)
127
0
      empty = 0;
128
0
    mi->advance_index = -1;
129
0
  }
130
131
0
  if (empty)
132
0
    return 1;
133
134
0
  entry = merged_iter_pqueue_remove(&mi->pq);
135
136
  /*
137
    One can also use reftable as datacenter-local storage, where the ref
138
    database is maintained in globally consistent database (eg.
139
    CockroachDB or Spanner). In this scenario, replication delays together
140
    with compaction may cause newer tables to contain older entries. In
141
    such a deployment, the loop below must be changed to collect all
142
    entries for the same key, and return new the newest one.
143
  */
144
0
  while (!merged_iter_pqueue_is_empty(mi->pq)) {
145
0
    struct pq_entry top = merged_iter_pqueue_top(mi->pq);
146
0
    int cmp;
147
148
0
    cmp = reftable_record_cmp(top.rec, entry.rec);
149
0
    if (cmp > 0)
150
0
      break;
151
152
0
    merged_iter_pqueue_remove(&mi->pq);
153
0
    err = merged_iter_advance_subiter(mi, top.index);
154
0
    if (err < 0)
155
0
      return err;
156
0
  }
157
158
0
  mi->advance_index = entry.index;
159
0
  SWAP(*rec, *entry.rec);
160
0
  return 0;
161
0
}
162
163
static int merged_iter_seek_void(void *it, struct reftable_record *want)
164
0
{
165
0
  return merged_iter_seek(it, want);
166
0
}
167
168
static int merged_iter_next_void(void *p, struct reftable_record *rec)
169
0
{
170
0
  struct merged_iter *mi = p;
171
0
  while (1) {
172
0
    int err = merged_iter_next_entry(mi, rec);
173
0
    if (err)
174
0
      return err;
175
0
    if (mi->suppress_deletions && reftable_record_is_deletion(rec))
176
0
      continue;
177
0
    return 0;
178
0
  }
179
0
}
180
181
static struct reftable_iterator_vtable merged_iter_vtable = {
182
  .seek = merged_iter_seek_void,
183
  .next = &merged_iter_next_void,
184
  .close = &merged_iter_close,
185
};
186
187
static void iterator_from_merged_iter(struct reftable_iterator *it,
188
              struct merged_iter *mi)
189
0
{
190
0
  assert(!it->ops);
191
0
  it->iter_arg = mi;
192
0
  it->ops = &merged_iter_vtable;
193
0
}
194
195
int reftable_merged_table_new(struct reftable_merged_table **dest,
196
            struct reftable_reader **readers, size_t n,
197
            uint32_t hash_id)
198
0
{
199
0
  struct reftable_merged_table *m = NULL;
200
0
  uint64_t last_max = 0;
201
0
  uint64_t first_min = 0;
202
203
0
  for (size_t i = 0; i < n; i++) {
204
0
    uint64_t min = reftable_reader_min_update_index(readers[i]);
205
0
    uint64_t max = reftable_reader_max_update_index(readers[i]);
206
207
0
    if (reftable_reader_hash_id(readers[i]) != hash_id) {
208
0
      return REFTABLE_FORMAT_ERROR;
209
0
    }
210
0
    if (i == 0 || min < first_min) {
211
0
      first_min = min;
212
0
    }
213
0
    if (i == 0 || max > last_max) {
214
0
      last_max = max;
215
0
    }
216
0
  }
217
218
0
  REFTABLE_CALLOC_ARRAY(m, 1);
219
0
  m->readers = readers;
220
0
  m->readers_len = n;
221
0
  m->min = first_min;
222
0
  m->max = last_max;
223
0
  m->hash_id = hash_id;
224
0
  *dest = m;
225
0
  return 0;
226
0
}
227
228
void reftable_merged_table_free(struct reftable_merged_table *mt)
229
0
{
230
0
  if (!mt)
231
0
    return;
232
0
  reftable_free(mt);
233
0
}
234
235
uint64_t
236
reftable_merged_table_max_update_index(struct reftable_merged_table *mt)
237
0
{
238
0
  return mt->max;
239
0
}
240
241
uint64_t
242
reftable_merged_table_min_update_index(struct reftable_merged_table *mt)
243
0
{
244
0
  return mt->min;
245
0
}
246
247
void merged_table_init_iter(struct reftable_merged_table *mt,
248
          struct reftable_iterator *it,
249
          uint8_t typ)
250
0
{
251
0
  struct merged_iter *mi = reftable_malloc(sizeof(*mi));
252
0
  merged_iter_init(mi, mt, typ);
253
0
  iterator_from_merged_iter(it, mi);
254
0
}
255
256
void reftable_merged_table_init_ref_iterator(struct reftable_merged_table *mt,
257
               struct reftable_iterator *it)
258
0
{
259
0
  merged_table_init_iter(mt, it, BLOCK_TYPE_REF);
260
0
}
261
262
void reftable_merged_table_init_log_iterator(struct reftable_merged_table *mt,
263
               struct reftable_iterator *it)
264
0
{
265
0
  merged_table_init_iter(mt, it, BLOCK_TYPE_LOG);
266
0
}
267
268
uint32_t reftable_merged_table_hash_id(struct reftable_merged_table *mt)
269
0
{
270
0
  return mt->hash_id;
271
0
}