/src/git/reftable/merged.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | Copyright 2020 Google LLC |
3 | | |
4 | | Use of this source code is governed by a BSD-style |
5 | | license that can be found in the LICENSE file or at |
6 | | https://developers.google.com/open-source/licenses/bsd |
7 | | */ |
8 | | |
9 | | #include "merged.h" |
10 | | |
11 | | #include "constants.h" |
12 | | #include "iter.h" |
13 | | #include "pq.h" |
14 | | #include "reader.h" |
15 | | #include "record.h" |
16 | | #include "reftable-merged.h" |
17 | | #include "reftable-error.h" |
18 | | #include "system.h" |
19 | | |
20 | | struct merged_subiter { |
21 | | struct reftable_iterator iter; |
22 | | struct reftable_record rec; |
23 | | }; |
24 | | |
25 | | struct merged_iter { |
26 | | struct merged_subiter *subiters; |
27 | | struct merged_iter_pqueue pq; |
28 | | size_t subiters_len; |
29 | | int suppress_deletions; |
30 | | ssize_t advance_index; |
31 | | }; |
32 | | |
33 | | static void merged_iter_init(struct merged_iter *mi, |
34 | | struct reftable_merged_table *mt, |
35 | | uint8_t typ) |
36 | 0 | { |
37 | 0 | memset(mi, 0, sizeof(*mi)); |
38 | 0 | mi->advance_index = -1; |
39 | 0 | mi->suppress_deletions = mt->suppress_deletions; |
40 | |
|
41 | 0 | REFTABLE_CALLOC_ARRAY(mi->subiters, mt->readers_len); |
42 | 0 | for (size_t i = 0; i < mt->readers_len; i++) { |
43 | 0 | reftable_record_init(&mi->subiters[i].rec, typ); |
44 | 0 | reader_init_iter(mt->readers[i], &mi->subiters[i].iter, typ); |
45 | 0 | } |
46 | 0 | mi->subiters_len = mt->readers_len; |
47 | 0 | } |
48 | | |
49 | | static void merged_iter_close(void *p) |
50 | 0 | { |
51 | 0 | struct merged_iter *mi = p; |
52 | |
|
53 | 0 | merged_iter_pqueue_release(&mi->pq); |
54 | 0 | for (size_t i = 0; i < mi->subiters_len; i++) { |
55 | 0 | reftable_iterator_destroy(&mi->subiters[i].iter); |
56 | 0 | reftable_record_release(&mi->subiters[i].rec); |
57 | 0 | } |
58 | 0 | reftable_free(mi->subiters); |
59 | 0 | } |
60 | | |
61 | | static int merged_iter_advance_subiter(struct merged_iter *mi, size_t idx) |
62 | 0 | { |
63 | 0 | struct pq_entry e = { |
64 | 0 | .index = idx, |
65 | 0 | .rec = &mi->subiters[idx].rec, |
66 | 0 | }; |
67 | 0 | int err; |
68 | |
|
69 | 0 | err = iterator_next(&mi->subiters[idx].iter, &mi->subiters[idx].rec); |
70 | 0 | if (err) |
71 | 0 | return err; |
72 | | |
73 | 0 | merged_iter_pqueue_add(&mi->pq, &e); |
74 | 0 | return 0; |
75 | 0 | } |
76 | | |
77 | | static int merged_iter_seek(struct merged_iter *mi, struct reftable_record *want) |
78 | 0 | { |
79 | 0 | int err; |
80 | |
|
81 | 0 | mi->advance_index = -1; |
82 | |
|
83 | 0 | for (size_t i = 0; i < mi->subiters_len; i++) { |
84 | 0 | err = iterator_seek(&mi->subiters[i].iter, want); |
85 | 0 | if (err < 0) |
86 | 0 | return err; |
87 | 0 | if (err > 0) |
88 | 0 | continue; |
89 | | |
90 | 0 | err = merged_iter_advance_subiter(mi, i); |
91 | 0 | if (err < 0) |
92 | 0 | return err; |
93 | 0 | } |
94 | | |
95 | 0 | return 0; |
96 | 0 | } |
97 | | |
98 | | static int merged_iter_next_entry(struct merged_iter *mi, |
99 | | struct reftable_record *rec) |
100 | 0 | { |
101 | 0 | struct pq_entry entry = { 0 }; |
102 | 0 | int err = 0, empty; |
103 | |
|
104 | 0 | empty = merged_iter_pqueue_is_empty(mi->pq); |
105 | |
|
106 | 0 | if (mi->advance_index >= 0) { |
107 | | /* |
108 | | * When there are no pqueue entries then we only have a single |
109 | | * subiter left. There is no need to use the pqueue in that |
110 | | * case anymore as we know that the subiter will return entries |
111 | | * in the correct order already. |
112 | | * |
113 | | * While this may sound like a very specific edge case, it may |
114 | | * happen more frequently than you think. Most repositories |
115 | | * will end up having a single large base table that contains |
116 | | * most of the refs. It's thus likely that we exhaust all |
117 | | * subiters but the one from that base ref. |
118 | | */ |
119 | 0 | if (empty) |
120 | 0 | return iterator_next(&mi->subiters[mi->advance_index].iter, |
121 | 0 | rec); |
122 | | |
123 | 0 | err = merged_iter_advance_subiter(mi, mi->advance_index); |
124 | 0 | if (err < 0) |
125 | 0 | return err; |
126 | 0 | if (!err) |
127 | 0 | empty = 0; |
128 | 0 | mi->advance_index = -1; |
129 | 0 | } |
130 | | |
131 | 0 | if (empty) |
132 | 0 | return 1; |
133 | | |
134 | 0 | entry = merged_iter_pqueue_remove(&mi->pq); |
135 | | |
136 | | /* |
137 | | One can also use reftable as datacenter-local storage, where the ref |
138 | | database is maintained in globally consistent database (eg. |
139 | | CockroachDB or Spanner). In this scenario, replication delays together |
140 | | with compaction may cause newer tables to contain older entries. In |
141 | | such a deployment, the loop below must be changed to collect all |
142 | | entries for the same key, and return new the newest one. |
143 | | */ |
144 | 0 | while (!merged_iter_pqueue_is_empty(mi->pq)) { |
145 | 0 | struct pq_entry top = merged_iter_pqueue_top(mi->pq); |
146 | 0 | int cmp; |
147 | |
|
148 | 0 | cmp = reftable_record_cmp(top.rec, entry.rec); |
149 | 0 | if (cmp > 0) |
150 | 0 | break; |
151 | | |
152 | 0 | merged_iter_pqueue_remove(&mi->pq); |
153 | 0 | err = merged_iter_advance_subiter(mi, top.index); |
154 | 0 | if (err < 0) |
155 | 0 | return err; |
156 | 0 | } |
157 | | |
158 | 0 | mi->advance_index = entry.index; |
159 | 0 | SWAP(*rec, *entry.rec); |
160 | 0 | return 0; |
161 | 0 | } |
162 | | |
163 | | static int merged_iter_seek_void(void *it, struct reftable_record *want) |
164 | 0 | { |
165 | 0 | return merged_iter_seek(it, want); |
166 | 0 | } |
167 | | |
168 | | static int merged_iter_next_void(void *p, struct reftable_record *rec) |
169 | 0 | { |
170 | 0 | struct merged_iter *mi = p; |
171 | 0 | while (1) { |
172 | 0 | int err = merged_iter_next_entry(mi, rec); |
173 | 0 | if (err) |
174 | 0 | return err; |
175 | 0 | if (mi->suppress_deletions && reftable_record_is_deletion(rec)) |
176 | 0 | continue; |
177 | 0 | return 0; |
178 | 0 | } |
179 | 0 | } |
180 | | |
181 | | static struct reftable_iterator_vtable merged_iter_vtable = { |
182 | | .seek = merged_iter_seek_void, |
183 | | .next = &merged_iter_next_void, |
184 | | .close = &merged_iter_close, |
185 | | }; |
186 | | |
187 | | static void iterator_from_merged_iter(struct reftable_iterator *it, |
188 | | struct merged_iter *mi) |
189 | 0 | { |
190 | 0 | assert(!it->ops); |
191 | 0 | it->iter_arg = mi; |
192 | 0 | it->ops = &merged_iter_vtable; |
193 | 0 | } |
194 | | |
195 | | int reftable_merged_table_new(struct reftable_merged_table **dest, |
196 | | struct reftable_reader **readers, size_t n, |
197 | | uint32_t hash_id) |
198 | 0 | { |
199 | 0 | struct reftable_merged_table *m = NULL; |
200 | 0 | uint64_t last_max = 0; |
201 | 0 | uint64_t first_min = 0; |
202 | |
|
203 | 0 | for (size_t i = 0; i < n; i++) { |
204 | 0 | uint64_t min = reftable_reader_min_update_index(readers[i]); |
205 | 0 | uint64_t max = reftable_reader_max_update_index(readers[i]); |
206 | |
|
207 | 0 | if (reftable_reader_hash_id(readers[i]) != hash_id) { |
208 | 0 | return REFTABLE_FORMAT_ERROR; |
209 | 0 | } |
210 | 0 | if (i == 0 || min < first_min) { |
211 | 0 | first_min = min; |
212 | 0 | } |
213 | 0 | if (i == 0 || max > last_max) { |
214 | 0 | last_max = max; |
215 | 0 | } |
216 | 0 | } |
217 | | |
218 | 0 | REFTABLE_CALLOC_ARRAY(m, 1); |
219 | 0 | m->readers = readers; |
220 | 0 | m->readers_len = n; |
221 | 0 | m->min = first_min; |
222 | 0 | m->max = last_max; |
223 | 0 | m->hash_id = hash_id; |
224 | 0 | *dest = m; |
225 | 0 | return 0; |
226 | 0 | } |
227 | | |
228 | | void reftable_merged_table_free(struct reftable_merged_table *mt) |
229 | 0 | { |
230 | 0 | if (!mt) |
231 | 0 | return; |
232 | 0 | reftable_free(mt); |
233 | 0 | } |
234 | | |
235 | | uint64_t |
236 | | reftable_merged_table_max_update_index(struct reftable_merged_table *mt) |
237 | 0 | { |
238 | 0 | return mt->max; |
239 | 0 | } |
240 | | |
241 | | uint64_t |
242 | | reftable_merged_table_min_update_index(struct reftable_merged_table *mt) |
243 | 0 | { |
244 | 0 | return mt->min; |
245 | 0 | } |
246 | | |
247 | | void merged_table_init_iter(struct reftable_merged_table *mt, |
248 | | struct reftable_iterator *it, |
249 | | uint8_t typ) |
250 | 0 | { |
251 | 0 | struct merged_iter *mi = reftable_malloc(sizeof(*mi)); |
252 | 0 | merged_iter_init(mi, mt, typ); |
253 | 0 | iterator_from_merged_iter(it, mi); |
254 | 0 | } |
255 | | |
256 | | void reftable_merged_table_init_ref_iterator(struct reftable_merged_table *mt, |
257 | | struct reftable_iterator *it) |
258 | 0 | { |
259 | 0 | merged_table_init_iter(mt, it, BLOCK_TYPE_REF); |
260 | 0 | } |
261 | | |
262 | | void reftable_merged_table_init_log_iterator(struct reftable_merged_table *mt, |
263 | | struct reftable_iterator *it) |
264 | 0 | { |
265 | 0 | merged_table_init_iter(mt, it, BLOCK_TYPE_LOG); |
266 | 0 | } |
267 | | |
268 | | uint32_t reftable_merged_table_hash_id(struct reftable_merged_table *mt) |
269 | 0 | { |
270 | 0 | return mt->hash_id; |
271 | 0 | } |