/src/samba/lib/tdb/common/tdb.c
Line | Count | Source |
1 | | /* |
2 | | Unix SMB/CIFS implementation. |
3 | | |
4 | | trivial database library |
5 | | |
6 | | Copyright (C) Andrew Tridgell 1999-2005 |
7 | | Copyright (C) Paul `Rusty' Russell 2000 |
8 | | Copyright (C) Jeremy Allison 2000-2003 |
9 | | |
10 | | ** NOTE! The following LGPL license applies to the tdb |
11 | | ** library. This does NOT imply that all of Samba is released |
12 | | ** under the LGPL |
13 | | |
14 | | This library is free software; you can redistribute it and/or |
15 | | modify it under the terms of the GNU Lesser General Public |
16 | | License as published by the Free Software Foundation; either |
17 | | version 3 of the License, or (at your option) any later version. |
18 | | |
19 | | This library is distributed in the hope that it will be useful, |
20 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
21 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
22 | | Lesser General Public License for more details. |
23 | | |
24 | | You should have received a copy of the GNU Lesser General Public |
25 | | License along with this library; if not, see <http://www.gnu.org/licenses/>. |
26 | | */ |
27 | | |
28 | | #include "tdb_private.h" |
29 | | |
30 | | _PUBLIC_ TDB_DATA tdb_null; |
31 | | |
32 | | /* |
33 | | non-blocking increment of the tdb sequence number if the tdb has been opened using |
34 | | the TDB_SEQNUM flag |
35 | | */ |
36 | | _PUBLIC_ void tdb_increment_seqnum_nonblock(struct tdb_context *tdb) |
37 | 0 | { |
38 | 0 | tdb_off_t seqnum=0; |
39 | |
|
40 | 0 | if (!(tdb->flags & TDB_SEQNUM)) { |
41 | 0 | return; |
42 | 0 | } |
43 | | |
44 | | /* we ignore errors from this, as we have no sane way of |
45 | | dealing with them. |
46 | | */ |
47 | 0 | tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum); |
48 | 0 | seqnum++; |
49 | 0 | tdb_ofs_write(tdb, TDB_SEQNUM_OFS, &seqnum); |
50 | 0 | } |
51 | | |
52 | | /* |
53 | | increment the tdb sequence number if the tdb has been opened using |
54 | | the TDB_SEQNUM flag |
55 | | */ |
56 | | static void tdb_increment_seqnum(struct tdb_context *tdb) |
57 | 0 | { |
58 | 0 | if (!(tdb->flags & TDB_SEQNUM)) { |
59 | 0 | return; |
60 | 0 | } |
61 | | |
62 | 0 | if (tdb->transaction != NULL) { |
63 | 0 | tdb_increment_seqnum_nonblock(tdb); |
64 | 0 | return; |
65 | 0 | } |
66 | | |
67 | 0 | #if defined(HAVE___ATOMIC_ADD_FETCH) && defined(HAVE___ATOMIC_ADD_LOAD) |
68 | 0 | if (tdb->map_ptr != NULL) { |
69 | 0 | uint32_t *pseqnum = (uint32_t *)( |
70 | 0 | TDB_SEQNUM_OFS + (char *)tdb->map_ptr); |
71 | 0 | __atomic_add_fetch(pseqnum, 1, __ATOMIC_SEQ_CST); |
72 | 0 | return; |
73 | 0 | } |
74 | 0 | #endif |
75 | | |
76 | 0 | if (tdb_nest_lock(tdb, TDB_SEQNUM_OFS, F_WRLCK, |
77 | 0 | TDB_LOCK_WAIT|TDB_LOCK_PROBE) != 0) { |
78 | 0 | return; |
79 | 0 | } |
80 | | |
81 | 0 | tdb_increment_seqnum_nonblock(tdb); |
82 | |
|
83 | 0 | tdb_nest_unlock(tdb, TDB_SEQNUM_OFS, F_WRLCK, false); |
84 | 0 | } |
85 | | |
86 | | static int tdb_key_compare(TDB_DATA key, TDB_DATA data, void *private_data) |
87 | 0 | { |
88 | 0 | return memcmp(data.dptr, key.dptr, data.dsize); |
89 | 0 | } |
90 | | |
91 | | void tdb_chainwalk_init(struct tdb_chainwalk_ctx *ctx, tdb_off_t ptr) |
92 | 0 | { |
93 | 0 | *ctx = (struct tdb_chainwalk_ctx) { .slow_ptr = ptr }; |
94 | 0 | } |
95 | | |
96 | | bool tdb_chainwalk_check(struct tdb_context *tdb, |
97 | | struct tdb_chainwalk_ctx *ctx, |
98 | | tdb_off_t next_ptr) |
99 | 0 | { |
100 | 0 | int ret; |
101 | |
|
102 | 0 | if (ctx->slow_chase) { |
103 | 0 | ret = tdb_ofs_read(tdb, ctx->slow_ptr, &ctx->slow_ptr); |
104 | 0 | if (ret == -1) { |
105 | 0 | return false; |
106 | 0 | } |
107 | 0 | } |
108 | 0 | ctx->slow_chase = !ctx->slow_chase; |
109 | |
|
110 | 0 | if (next_ptr == ctx->slow_ptr) { |
111 | 0 | tdb->ecode = TDB_ERR_CORRUPT; |
112 | 0 | TDB_LOG((tdb, TDB_DEBUG_ERROR, |
113 | 0 | "tdb_chainwalk_check: circular chain\n")); |
114 | 0 | return false; |
115 | 0 | } |
116 | | |
117 | 0 | return true; |
118 | 0 | } |
119 | | |
120 | | /* Returns 0 on fail. On success, return offset of record, and fills |
121 | | in rec */ |
122 | | static tdb_off_t tdb_find(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, |
123 | | struct tdb_record *r) |
124 | 0 | { |
125 | 0 | tdb_off_t rec_ptr; |
126 | 0 | struct tdb_chainwalk_ctx chainwalk; |
127 | | |
128 | | /* read in the hash top */ |
129 | 0 | if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1) |
130 | 0 | return 0; |
131 | | |
132 | 0 | tdb_chainwalk_init(&chainwalk, rec_ptr); |
133 | | |
134 | | /* keep looking until we find the right record */ |
135 | 0 | while (rec_ptr) { |
136 | 0 | bool ok; |
137 | |
|
138 | 0 | if (tdb_rec_read(tdb, rec_ptr, r) == -1) |
139 | 0 | return 0; |
140 | | |
141 | 0 | if (!TDB_DEAD(r) && hash==r->full_hash |
142 | 0 | && key.dsize==r->key_len |
143 | 0 | && tdb_parse_data(tdb, key, rec_ptr + sizeof(*r), |
144 | 0 | r->key_len, tdb_key_compare, |
145 | 0 | NULL) == 0) { |
146 | 0 | return rec_ptr; |
147 | 0 | } |
148 | 0 | rec_ptr = r->next; |
149 | |
|
150 | 0 | ok = tdb_chainwalk_check(tdb, &chainwalk, rec_ptr); |
151 | 0 | if (!ok) { |
152 | 0 | return 0; |
153 | 0 | } |
154 | 0 | } |
155 | 0 | tdb->ecode = TDB_ERR_NOEXIST; |
156 | 0 | return 0; |
157 | 0 | } |
158 | | |
159 | | /* As tdb_find, but if you succeed, keep the lock */ |
160 | | tdb_off_t tdb_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, int locktype, |
161 | | struct tdb_record *rec) |
162 | 0 | { |
163 | 0 | uint32_t rec_ptr; |
164 | |
|
165 | 0 | if (tdb_lock(tdb, BUCKET(hash), locktype) == -1) |
166 | 0 | return 0; |
167 | 0 | if (!(rec_ptr = tdb_find(tdb, key, hash, rec))) |
168 | 0 | tdb_unlock(tdb, BUCKET(hash), locktype); |
169 | 0 | return rec_ptr; |
170 | 0 | } |
171 | | |
172 | | static TDB_DATA _tdb_fetch(struct tdb_context *tdb, TDB_DATA key); |
173 | | |
174 | | struct tdb_update_hash_state { |
175 | | const TDB_DATA *dbufs; |
176 | | int num_dbufs; |
177 | | tdb_len_t dbufs_len; |
178 | | }; |
179 | | |
180 | | static int tdb_update_hash_cmp(TDB_DATA key, TDB_DATA data, void *private_data) |
181 | 0 | { |
182 | 0 | struct tdb_update_hash_state *state = private_data; |
183 | 0 | unsigned char *dptr = data.dptr; |
184 | 0 | int i; |
185 | |
|
186 | 0 | if (state->dbufs_len != data.dsize) { |
187 | 0 | return -1; |
188 | 0 | } |
189 | | |
190 | 0 | for (i=0; i<state->num_dbufs; i++) { |
191 | 0 | TDB_DATA dbuf = state->dbufs[i]; |
192 | 0 | if( dbuf.dsize > 0) { |
193 | 0 | int ret; |
194 | 0 | ret = memcmp(dptr, dbuf.dptr, dbuf.dsize); |
195 | 0 | if (ret != 0) { |
196 | 0 | return -1; |
197 | 0 | } |
198 | 0 | dptr += dbuf.dsize; |
199 | 0 | } |
200 | 0 | } |
201 | | |
202 | 0 | return 0; |
203 | 0 | } |
204 | | |
205 | | /* update an entry in place - this only works if the new data size |
206 | | is <= the old data size and the key exists. |
207 | | on failure return -1. |
208 | | */ |
209 | | static int tdb_update_hash(struct tdb_context *tdb, TDB_DATA key, |
210 | | uint32_t hash, |
211 | | const TDB_DATA *dbufs, int num_dbufs, |
212 | | tdb_len_t dbufs_len) |
213 | 0 | { |
214 | 0 | struct tdb_record rec; |
215 | 0 | tdb_off_t rec_ptr, ofs; |
216 | 0 | int i; |
217 | | |
218 | | /* find entry */ |
219 | 0 | if (!(rec_ptr = tdb_find(tdb, key, hash, &rec))) |
220 | 0 | return -1; |
221 | | |
222 | | /* it could be an exact duplicate of what is there - this is |
223 | | * surprisingly common (eg. with a ldb re-index). */ |
224 | 0 | if (rec.data_len == dbufs_len) { |
225 | 0 | struct tdb_update_hash_state state = { |
226 | 0 | .dbufs = dbufs, .num_dbufs = num_dbufs, |
227 | 0 | .dbufs_len = dbufs_len |
228 | 0 | }; |
229 | 0 | int ret; |
230 | |
|
231 | 0 | ret = tdb_parse_record(tdb, key, tdb_update_hash_cmp, &state); |
232 | 0 | if (ret == 0) { |
233 | 0 | return 0; |
234 | 0 | } |
235 | 0 | } |
236 | | |
237 | | /* must be long enough key, data and tailer */ |
238 | 0 | if (rec.rec_len < key.dsize + dbufs_len + sizeof(tdb_off_t)) { |
239 | 0 | tdb->ecode = TDB_SUCCESS; /* Not really an error */ |
240 | 0 | return -1; |
241 | 0 | } |
242 | | |
243 | 0 | ofs = rec_ptr + sizeof(rec) + rec.key_len; |
244 | |
|
245 | 0 | for (i=0; i<num_dbufs; i++) { |
246 | 0 | TDB_DATA dbuf = dbufs[i]; |
247 | 0 | int ret; |
248 | |
|
249 | 0 | ret = tdb->methods->tdb_write(tdb, ofs, dbuf.dptr, dbuf.dsize); |
250 | 0 | if (ret == -1) { |
251 | 0 | return -1; |
252 | 0 | } |
253 | 0 | ofs += dbuf.dsize; |
254 | 0 | } |
255 | | |
256 | 0 | if (dbufs_len != rec.data_len) { |
257 | | /* update size */ |
258 | 0 | rec.data_len = dbufs_len; |
259 | 0 | return tdb_rec_write(tdb, rec_ptr, &rec); |
260 | 0 | } |
261 | | |
262 | 0 | return 0; |
263 | 0 | } |
264 | | |
265 | | /* find an entry in the database given a key */ |
266 | | /* If an entry doesn't exist tdb_err will be set to |
267 | | * TDB_ERR_NOEXIST. If a key has no data attached |
268 | | * then the TDB_DATA will have zero length but |
269 | | * a non-zero pointer |
270 | | */ |
271 | | static TDB_DATA _tdb_fetch(struct tdb_context *tdb, TDB_DATA key) |
272 | 0 | { |
273 | 0 | tdb_off_t rec_ptr; |
274 | 0 | struct tdb_record rec; |
275 | 0 | TDB_DATA ret; |
276 | 0 | uint32_t hash; |
277 | | |
278 | | /* find which hash bucket it is in */ |
279 | 0 | hash = tdb->hash_fn(&key); |
280 | 0 | if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) |
281 | 0 | return tdb_null; |
282 | | |
283 | 0 | ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec) + rec.key_len, |
284 | 0 | rec.data_len); |
285 | 0 | ret.dsize = rec.data_len; |
286 | 0 | tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK); |
287 | 0 | return ret; |
288 | 0 | } |
289 | | |
290 | | _PUBLIC_ TDB_DATA tdb_fetch(struct tdb_context *tdb, TDB_DATA key) |
291 | 0 | { |
292 | 0 | TDB_DATA ret = _tdb_fetch(tdb, key); |
293 | |
|
294 | 0 | tdb_trace_1rec_retrec(tdb, "tdb_fetch", key, ret); |
295 | 0 | return ret; |
296 | 0 | } |
297 | | |
298 | | /* |
299 | | * Find an entry in the database and hand the record's data to a parsing |
300 | | * function. The parsing function is executed under the chain read lock, so it |
301 | | * should be fast and should not block on other syscalls. |
302 | | * |
303 | | * DON'T CALL OTHER TDB CALLS FROM THE PARSER, THIS MIGHT LEAD TO SEGFAULTS. |
304 | | * |
305 | | * For mmapped tdb's that do not have a transaction open it points the parsing |
306 | | * function directly at the mmap area, it avoids the malloc/memcpy in this |
307 | | * case. If a transaction is open or no mmap is available, it has to do |
308 | | * malloc/read/parse/free. |
309 | | * |
310 | | * This is interesting for all readers of potentially large data structures in |
311 | | * the tdb records, ldb indexes being one example. |
312 | | * |
313 | | * Return -1 if the record was not found. |
314 | | */ |
315 | | |
316 | | _PUBLIC_ int tdb_parse_record(struct tdb_context *tdb, TDB_DATA key, |
317 | | int (*parser)(TDB_DATA key, TDB_DATA data, |
318 | | void *private_data), |
319 | | void *private_data) |
320 | 0 | { |
321 | 0 | tdb_off_t rec_ptr; |
322 | 0 | struct tdb_record rec; |
323 | 0 | int ret; |
324 | 0 | uint32_t hash; |
325 | | |
326 | | /* find which hash bucket it is in */ |
327 | 0 | hash = tdb->hash_fn(&key); |
328 | |
|
329 | 0 | if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) { |
330 | | /* record not found */ |
331 | 0 | tdb_trace_1rec_ret(tdb, "tdb_parse_record", key, -1); |
332 | 0 | tdb->ecode = TDB_ERR_NOEXIST; |
333 | 0 | return -1; |
334 | 0 | } |
335 | 0 | tdb_trace_1rec_ret(tdb, "tdb_parse_record", key, 0); |
336 | |
|
337 | 0 | ret = tdb_parse_data(tdb, key, rec_ptr + sizeof(rec) + rec.key_len, |
338 | 0 | rec.data_len, parser, private_data); |
339 | |
|
340 | 0 | tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK); |
341 | |
|
342 | 0 | return ret; |
343 | 0 | } |
344 | | |
345 | | /* check if an entry in the database exists |
346 | | |
347 | | note that 1 is returned if the key is found and 0 is returned if not found |
348 | | this doesn't match the conventions in the rest of this module, but is |
349 | | compatible with gdbm |
350 | | */ |
351 | | static int tdb_exists_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash) |
352 | 0 | { |
353 | 0 | struct tdb_record rec; |
354 | |
|
355 | 0 | if (tdb_find_lock_hash(tdb, key, hash, F_RDLCK, &rec) == 0) |
356 | 0 | return 0; |
357 | 0 | tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK); |
358 | 0 | return 1; |
359 | 0 | } |
360 | | |
361 | | _PUBLIC_ int tdb_exists(struct tdb_context *tdb, TDB_DATA key) |
362 | 0 | { |
363 | 0 | uint32_t hash = tdb->hash_fn(&key); |
364 | 0 | int ret; |
365 | |
|
366 | 0 | ret = tdb_exists_hash(tdb, key, hash); |
367 | 0 | tdb_trace_1rec_ret(tdb, "tdb_exists", key, ret); |
368 | 0 | return ret; |
369 | 0 | } |
370 | | |
371 | | /* |
372 | | * Move a dead record to the freelist. The hash chain and freelist |
373 | | * must be locked. |
374 | | */ |
375 | | static int tdb_del_dead(struct tdb_context *tdb, |
376 | | uint32_t last_ptr, |
377 | | uint32_t rec_ptr, |
378 | | struct tdb_record *rec, |
379 | | bool *deleted) |
380 | 0 | { |
381 | 0 | int ret; |
382 | |
|
383 | 0 | ret = tdb_write_lock_record(tdb, rec_ptr); |
384 | 0 | if (ret == -1) { |
385 | | /* Someone traversing here: Just leave it dead */ |
386 | 0 | return 0; |
387 | 0 | } |
388 | 0 | ret = tdb_write_unlock_record(tdb, rec_ptr); |
389 | 0 | if (ret == -1) { |
390 | 0 | return -1; |
391 | 0 | } |
392 | 0 | ret = tdb_ofs_write(tdb, last_ptr, &rec->next); |
393 | 0 | if (ret == -1) { |
394 | 0 | return -1; |
395 | 0 | } |
396 | | |
397 | 0 | *deleted = true; |
398 | |
|
399 | 0 | ret = tdb_free(tdb, rec_ptr, rec); |
400 | 0 | return ret; |
401 | 0 | } |
402 | | |
403 | | /* |
404 | | * Walk the hash chain and leave tdb->max_dead_records around. Move |
405 | | * the rest of dead records to the freelist. |
406 | | */ |
407 | | int tdb_trim_dead(struct tdb_context *tdb, uint32_t hash) |
408 | 0 | { |
409 | 0 | struct tdb_chainwalk_ctx chainwalk; |
410 | 0 | struct tdb_record rec; |
411 | 0 | tdb_off_t last_ptr, rec_ptr; |
412 | 0 | bool locked_freelist = false; |
413 | 0 | int num_dead = 0; |
414 | 0 | int ret; |
415 | |
|
416 | 0 | last_ptr = TDB_HASH_TOP(hash); |
417 | | |
418 | | /* |
419 | | * Init chainwalk with the pointer to the hash top. It might |
420 | | * be that the very first record in the chain is a dead one |
421 | | * that we have to delete. |
422 | | */ |
423 | 0 | tdb_chainwalk_init(&chainwalk, last_ptr); |
424 | |
|
425 | 0 | ret = tdb_ofs_read(tdb, last_ptr, &rec_ptr); |
426 | 0 | if (ret == -1) { |
427 | 0 | return -1; |
428 | 0 | } |
429 | | |
430 | 0 | while (rec_ptr != 0) { |
431 | 0 | bool deleted = false; |
432 | 0 | uint32_t next; |
433 | |
|
434 | 0 | ret = tdb_rec_read(tdb, rec_ptr, &rec); |
435 | 0 | if (ret == -1) { |
436 | 0 | goto fail; |
437 | 0 | } |
438 | | |
439 | | /* |
440 | | * Make a copy of rec.next: Further down we might |
441 | | * delete and put the record on the freelist. Make |
442 | | * sure that modifications in that code path can't |
443 | | * break the chainwalk here. |
444 | | */ |
445 | 0 | next = rec.next; |
446 | |
|
447 | 0 | if (rec.magic == TDB_DEAD_MAGIC) { |
448 | 0 | num_dead += 1; |
449 | |
|
450 | 0 | if (num_dead > tdb->max_dead_records) { |
451 | |
|
452 | 0 | if (!locked_freelist) { |
453 | | /* |
454 | | * Lock the freelist only if |
455 | | * it's really required. |
456 | | */ |
457 | 0 | ret = tdb_lock(tdb, -1, F_WRLCK); |
458 | 0 | if (ret == -1) { |
459 | 0 | goto fail; |
460 | 0 | }; |
461 | 0 | locked_freelist = true; |
462 | 0 | } |
463 | | |
464 | 0 | ret = tdb_del_dead( |
465 | 0 | tdb, |
466 | 0 | last_ptr, |
467 | 0 | rec_ptr, |
468 | 0 | &rec, |
469 | 0 | &deleted); |
470 | |
|
471 | 0 | if (ret == -1) { |
472 | 0 | goto fail; |
473 | 0 | } |
474 | 0 | } |
475 | 0 | } |
476 | | |
477 | | /* |
478 | | * Don't do the chainwalk check if "rec_ptr" was |
479 | | * deleted. We reduced the chain, and the chainwalk |
480 | | * check might catch up early. Imagine a valid chain |
481 | | * with just dead records: We never can bump the |
482 | | * "slow" pointer in chainwalk_check, as there isn't |
483 | | * anything left to jump to and compare. |
484 | | */ |
485 | 0 | if (!deleted) { |
486 | 0 | bool ok; |
487 | |
|
488 | 0 | last_ptr = rec_ptr; |
489 | |
|
490 | 0 | ok = tdb_chainwalk_check(tdb, &chainwalk, next); |
491 | 0 | if (!ok) { |
492 | 0 | ret = -1; |
493 | 0 | goto fail; |
494 | 0 | } |
495 | 0 | } |
496 | 0 | rec_ptr = next; |
497 | 0 | } |
498 | 0 | ret = 0; |
499 | 0 | fail: |
500 | 0 | if (locked_freelist) { |
501 | 0 | tdb_unlock(tdb, -1, F_WRLCK); |
502 | 0 | } |
503 | 0 | return ret; |
504 | 0 | } |
505 | | |
506 | | /* delete an entry in the database given a key */ |
507 | | static int tdb_delete_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash) |
508 | 0 | { |
509 | 0 | tdb_off_t rec_ptr; |
510 | 0 | struct tdb_record rec; |
511 | 0 | int ret; |
512 | |
|
513 | 0 | if (tdb->read_only || tdb->traverse_read) { |
514 | 0 | tdb->ecode = TDB_ERR_RDONLY; |
515 | 0 | return -1; |
516 | 0 | } |
517 | | |
518 | 0 | rec_ptr = tdb_find_lock_hash(tdb, key, hash, F_WRLCK, &rec); |
519 | 0 | if (rec_ptr == 0) { |
520 | 0 | return -1; |
521 | 0 | } |
522 | | |
523 | | /* |
524 | | * Mark the record dead |
525 | | */ |
526 | 0 | rec.magic = TDB_DEAD_MAGIC; |
527 | 0 | ret = tdb_rec_write(tdb, rec_ptr, &rec); |
528 | 0 | if (ret == -1) { |
529 | 0 | goto done; |
530 | 0 | } |
531 | | |
532 | 0 | tdb_increment_seqnum(tdb); |
533 | |
|
534 | 0 | ret = tdb_trim_dead(tdb, hash); |
535 | 0 | done: |
536 | 0 | if (tdb_unlock(tdb, BUCKET(hash), F_WRLCK) != 0) |
537 | 0 | TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_delete: WARNING tdb_unlock failed!\n")); |
538 | 0 | return ret; |
539 | 0 | } |
540 | | |
541 | | _PUBLIC_ int tdb_delete(struct tdb_context *tdb, TDB_DATA key) |
542 | 0 | { |
543 | 0 | uint32_t hash = tdb->hash_fn(&key); |
544 | 0 | int ret; |
545 | |
|
546 | 0 | ret = tdb_delete_hash(tdb, key, hash); |
547 | 0 | tdb_trace_1rec_ret(tdb, "tdb_delete", key, ret); |
548 | 0 | return ret; |
549 | 0 | } |
550 | | |
551 | | /* |
552 | | * See if we have a dead record around with enough space |
553 | | */ |
554 | | tdb_off_t tdb_find_dead(struct tdb_context *tdb, uint32_t hash, |
555 | | struct tdb_record *r, tdb_len_t length, |
556 | | tdb_off_t *p_last_ptr) |
557 | 0 | { |
558 | 0 | tdb_off_t rec_ptr, last_ptr; |
559 | 0 | struct tdb_chainwalk_ctx chainwalk; |
560 | 0 | tdb_off_t best_rec_ptr = 0; |
561 | 0 | tdb_off_t best_last_ptr = 0; |
562 | 0 | struct tdb_record best = { .rec_len = UINT32_MAX }; |
563 | |
|
564 | 0 | length += sizeof(tdb_off_t); /* tailer */ |
565 | |
|
566 | 0 | last_ptr = TDB_HASH_TOP(hash); |
567 | | |
568 | | /* read in the hash top */ |
569 | 0 | if (tdb_ofs_read(tdb, last_ptr, &rec_ptr) == -1) |
570 | 0 | return 0; |
571 | | |
572 | 0 | tdb_chainwalk_init(&chainwalk, rec_ptr); |
573 | | |
574 | | /* keep looking until we find the right record */ |
575 | 0 | while (rec_ptr) { |
576 | 0 | bool ok; |
577 | |
|
578 | 0 | if (tdb_rec_read(tdb, rec_ptr, r) == -1) |
579 | 0 | return 0; |
580 | | |
581 | 0 | if (TDB_DEAD(r) && (r->rec_len >= length) && |
582 | 0 | (r->rec_len < best.rec_len)) { |
583 | 0 | best_rec_ptr = rec_ptr; |
584 | 0 | best_last_ptr = last_ptr; |
585 | 0 | best = *r; |
586 | 0 | } |
587 | 0 | last_ptr = rec_ptr; |
588 | 0 | rec_ptr = r->next; |
589 | |
|
590 | 0 | ok = tdb_chainwalk_check(tdb, &chainwalk, rec_ptr); |
591 | 0 | if (!ok) { |
592 | 0 | return 0; |
593 | 0 | } |
594 | 0 | } |
595 | | |
596 | 0 | if (best.rec_len == UINT32_MAX) { |
597 | 0 | return 0; |
598 | 0 | } |
599 | | |
600 | 0 | *r = best; |
601 | 0 | *p_last_ptr = best_last_ptr; |
602 | 0 | return best_rec_ptr; |
603 | 0 | } |
604 | | |
605 | | static int _tdb_storev(struct tdb_context *tdb, TDB_DATA key, |
606 | | const TDB_DATA *dbufs, int num_dbufs, |
607 | | int flag, uint32_t hash) |
608 | 0 | { |
609 | 0 | struct tdb_record rec; |
610 | 0 | tdb_off_t rec_ptr, ofs; |
611 | 0 | tdb_len_t rec_len, dbufs_len; |
612 | 0 | int i; |
613 | 0 | int ret = -1; |
614 | |
|
615 | 0 | dbufs_len = 0; |
616 | |
|
617 | 0 | for (i=0; i<num_dbufs; i++) { |
618 | 0 | size_t dsize = dbufs[i].dsize; |
619 | |
|
620 | 0 | if ((dsize != 0) && (dbufs[i].dptr == NULL)) { |
621 | 0 | tdb->ecode = TDB_ERR_EINVAL; |
622 | 0 | goto fail; |
623 | 0 | } |
624 | | |
625 | 0 | dbufs_len += dsize; |
626 | 0 | if (dbufs_len < dsize) { |
627 | 0 | tdb->ecode = TDB_ERR_OOM; |
628 | 0 | goto fail; |
629 | 0 | } |
630 | 0 | } |
631 | | |
632 | 0 | rec_len = key.dsize + dbufs_len; |
633 | 0 | if ((rec_len < key.dsize) || (rec_len < dbufs_len)) { |
634 | 0 | tdb->ecode = TDB_ERR_OOM; |
635 | 0 | goto fail; |
636 | 0 | } |
637 | | |
638 | | /* check for it existing, on insert. */ |
639 | 0 | if (flag == TDB_INSERT) { |
640 | 0 | if (tdb_exists_hash(tdb, key, hash)) { |
641 | 0 | tdb->ecode = TDB_ERR_EXISTS; |
642 | 0 | goto fail; |
643 | 0 | } |
644 | 0 | } else { |
645 | | /* first try in-place update, on modify or replace. */ |
646 | 0 | if (tdb_update_hash(tdb, key, hash, dbufs, num_dbufs, |
647 | 0 | dbufs_len) == 0) { |
648 | 0 | goto done; |
649 | 0 | } |
650 | 0 | if (tdb->ecode == TDB_ERR_NOEXIST && |
651 | 0 | flag == TDB_MODIFY) { |
652 | | /* if the record doesn't exist and we are in TDB_MODIFY mode then |
653 | | we should fail the store */ |
654 | 0 | goto fail; |
655 | 0 | } |
656 | 0 | } |
657 | | /* reset the error code potentially set by the tdb_update_hash() */ |
658 | 0 | tdb->ecode = TDB_SUCCESS; |
659 | | |
660 | | /* delete any existing record - if it doesn't exist we don't |
661 | | care. Doing this first reduces fragmentation, and avoids |
662 | | coalescing with `allocated' block before it's updated. */ |
663 | 0 | if (flag != TDB_INSERT) |
664 | 0 | tdb_delete_hash(tdb, key, hash); |
665 | | |
666 | | /* we have to allocate some space */ |
667 | 0 | rec_ptr = tdb_allocate(tdb, hash, rec_len, &rec); |
668 | |
|
669 | 0 | if (rec_ptr == 0) { |
670 | 0 | goto fail; |
671 | 0 | } |
672 | | |
673 | | /* Read hash top into next ptr */ |
674 | 0 | if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec.next) == -1) |
675 | 0 | goto fail; |
676 | | |
677 | 0 | rec.key_len = key.dsize; |
678 | 0 | rec.data_len = dbufs_len; |
679 | 0 | rec.full_hash = hash; |
680 | 0 | rec.magic = TDB_MAGIC; |
681 | |
|
682 | 0 | ofs = rec_ptr; |
683 | | |
684 | | /* write out and point the top of the hash chain at it */ |
685 | 0 | ret = tdb_rec_write(tdb, ofs, &rec); |
686 | 0 | if (ret == -1) { |
687 | 0 | goto fail; |
688 | 0 | } |
689 | 0 | ofs += sizeof(rec); |
690 | |
|
691 | 0 | ret = tdb->methods->tdb_write(tdb, ofs, key.dptr, key.dsize); |
692 | 0 | if (ret == -1) { |
693 | 0 | goto fail; |
694 | 0 | } |
695 | 0 | ofs += key.dsize; |
696 | |
|
697 | 0 | for (i=0; i<num_dbufs; i++) { |
698 | 0 | if (dbufs[i].dsize == 0) { |
699 | 0 | continue; |
700 | 0 | } |
701 | | |
702 | 0 | ret = tdb->methods->tdb_write(tdb, ofs, dbufs[i].dptr, |
703 | 0 | dbufs[i].dsize); |
704 | 0 | if (ret == -1) { |
705 | 0 | goto fail; |
706 | 0 | } |
707 | 0 | ofs += dbufs[i].dsize; |
708 | 0 | } |
709 | | |
710 | 0 | ret = tdb_ofs_write(tdb, TDB_HASH_TOP(hash), &rec_ptr); |
711 | 0 | if (ret == -1) { |
712 | | /* Need to tdb_unallocate() here */ |
713 | 0 | goto fail; |
714 | 0 | } |
715 | | |
716 | 0 | done: |
717 | 0 | ret = 0; |
718 | 0 | fail: |
719 | 0 | if (ret == 0) { |
720 | 0 | tdb_increment_seqnum(tdb); |
721 | 0 | } |
722 | 0 | return ret; |
723 | 0 | } |
724 | | |
725 | | static int _tdb_store(struct tdb_context *tdb, TDB_DATA key, |
726 | | TDB_DATA dbuf, int flag, uint32_t hash) |
727 | 0 | { |
728 | 0 | return _tdb_storev(tdb, key, &dbuf, 1, flag, hash); |
729 | 0 | } |
730 | | |
731 | | /* store an element in the database, replacing any existing element |
732 | | with the same key |
733 | | |
734 | | return 0 on success, -1 on failure |
735 | | */ |
736 | | _PUBLIC_ int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag) |
737 | 0 | { |
738 | 0 | uint32_t hash; |
739 | 0 | int ret; |
740 | |
|
741 | 0 | if (tdb->read_only || tdb->traverse_read) { |
742 | 0 | tdb->ecode = TDB_ERR_RDONLY; |
743 | 0 | tdb_trace_2rec_flag_ret(tdb, "tdb_store", key, dbuf, flag, -1); |
744 | 0 | return -1; |
745 | 0 | } |
746 | | |
747 | | /* find which hash bucket it is in */ |
748 | 0 | hash = tdb->hash_fn(&key); |
749 | 0 | if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1) |
750 | 0 | return -1; |
751 | | |
752 | 0 | ret = _tdb_store(tdb, key, dbuf, flag, hash); |
753 | 0 | tdb_trace_2rec_flag_ret(tdb, "tdb_store", key, dbuf, flag, ret); |
754 | 0 | tdb_unlock(tdb, BUCKET(hash), F_WRLCK); |
755 | 0 | return ret; |
756 | 0 | } |
757 | | |
758 | | _PUBLIC_ int tdb_storev(struct tdb_context *tdb, TDB_DATA key, |
759 | | const TDB_DATA *dbufs, int num_dbufs, int flag) |
760 | 0 | { |
761 | 0 | uint32_t hash; |
762 | 0 | int ret; |
763 | |
|
764 | 0 | if (tdb->read_only || tdb->traverse_read) { |
765 | 0 | tdb->ecode = TDB_ERR_RDONLY; |
766 | 0 | tdb_trace_1plusn_rec_flag_ret(tdb, "tdb_storev", key, |
767 | 0 | dbufs, num_dbufs, flag, -1); |
768 | 0 | return -1; |
769 | 0 | } |
770 | | |
771 | | /* find which hash bucket it is in */ |
772 | 0 | hash = tdb->hash_fn(&key); |
773 | 0 | if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1) |
774 | 0 | return -1; |
775 | | |
776 | 0 | ret = _tdb_storev(tdb, key, dbufs, num_dbufs, flag, hash); |
777 | 0 | tdb_trace_1plusn_rec_flag_ret(tdb, "tdb_storev", key, |
778 | 0 | dbufs, num_dbufs, flag, -1); |
779 | 0 | tdb_unlock(tdb, BUCKET(hash), F_WRLCK); |
780 | 0 | return ret; |
781 | 0 | } |
782 | | |
783 | | /* Append to an entry. Create if not exist. */ |
784 | | _PUBLIC_ int tdb_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf) |
785 | 0 | { |
786 | 0 | uint32_t hash; |
787 | 0 | TDB_DATA dbufs[2]; |
788 | 0 | int ret = -1; |
789 | | |
790 | | /* find which hash bucket it is in */ |
791 | 0 | hash = tdb->hash_fn(&key); |
792 | 0 | if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1) |
793 | 0 | return -1; |
794 | | |
795 | 0 | dbufs[0] = _tdb_fetch(tdb, key); |
796 | 0 | dbufs[1] = new_dbuf; |
797 | |
|
798 | 0 | ret = _tdb_storev(tdb, key, dbufs, 2, 0, hash); |
799 | 0 | tdb_trace_2rec_retrec(tdb, "tdb_append", key, dbufs[0], dbufs[1]); |
800 | |
|
801 | 0 | tdb_unlock(tdb, BUCKET(hash), F_WRLCK); |
802 | 0 | SAFE_FREE(dbufs[0].dptr); |
803 | 0 | return ret; |
804 | 0 | } |
805 | | |
806 | | |
807 | | /* |
808 | | return the name of the current tdb file |
809 | | useful for external logging functions |
810 | | */ |
811 | | _PUBLIC_ const char *tdb_name(struct tdb_context *tdb) |
812 | 0 | { |
813 | 0 | return tdb->name; |
814 | 0 | } |
815 | | |
816 | | /* |
817 | | return the underlying file descriptor being used by tdb, or -1 |
818 | | useful for external routines that want to check the device/inode |
819 | | of the fd |
820 | | */ |
821 | | _PUBLIC_ int tdb_fd(struct tdb_context *tdb) |
822 | 0 | { |
823 | 0 | return tdb->fd; |
824 | 0 | } |
825 | | |
826 | | /* |
827 | | return the current logging function |
828 | | useful for external tdb routines that wish to log tdb errors |
829 | | */ |
830 | | _PUBLIC_ tdb_log_func tdb_log_fn(struct tdb_context *tdb) |
831 | 0 | { |
832 | 0 | return tdb->log.log_fn; |
833 | 0 | } |
834 | | |
835 | | |
836 | | /* |
837 | | get the tdb sequence number. Only makes sense if the writers opened |
838 | | with TDB_SEQNUM set. Note that this sequence number will wrap quite |
839 | | quickly, so it should only be used for a 'has something changed' |
840 | | test, not for code that relies on the count of the number of changes |
841 | | made. If you want a counter then use a tdb record. |
842 | | |
843 | | The aim of this sequence number is to allow for a very lightweight |
844 | | test of a possible tdb change. |
845 | | */ |
846 | | _PUBLIC_ int tdb_get_seqnum(struct tdb_context *tdb) |
847 | 0 | { |
848 | 0 | tdb_off_t seqnum=0; |
849 | |
|
850 | 0 | if (tdb->transaction != NULL) { |
851 | 0 | tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum); |
852 | 0 | return seqnum; |
853 | 0 | } |
854 | | |
855 | 0 | #if defined(HAVE___ATOMIC_ADD_FETCH) && defined(HAVE___ATOMIC_ADD_LOAD) |
856 | 0 | if (tdb->map_ptr != NULL) { |
857 | 0 | uint32_t *pseqnum = (uint32_t *)( |
858 | 0 | TDB_SEQNUM_OFS + (char *)tdb->map_ptr); |
859 | 0 | uint32_t ret; |
860 | 0 | __atomic_load(pseqnum, &ret,__ATOMIC_SEQ_CST); |
861 | 0 | return ret; |
862 | 0 | } |
863 | 0 | #endif |
864 | | |
865 | 0 | tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum); |
866 | 0 | return seqnum; |
867 | 0 | } |
868 | | |
869 | | _PUBLIC_ int tdb_hash_size(struct tdb_context *tdb) |
870 | 0 | { |
871 | 0 | return tdb->hash_size; |
872 | 0 | } |
873 | | |
874 | | _PUBLIC_ size_t tdb_map_size(struct tdb_context *tdb) |
875 | 0 | { |
876 | 0 | return tdb->map_size; |
877 | 0 | } |
878 | | |
879 | | _PUBLIC_ int tdb_get_flags(struct tdb_context *tdb) |
880 | 0 | { |
881 | 0 | return tdb->flags; |
882 | 0 | } |
883 | | |
884 | | _PUBLIC_ void tdb_add_flags(struct tdb_context *tdb, unsigned flags) |
885 | 0 | { |
886 | 0 | if ((flags & TDB_ALLOW_NESTING) && |
887 | 0 | (flags & TDB_DISALLOW_NESTING)) { |
888 | 0 | tdb->ecode = TDB_ERR_NESTING; |
889 | 0 | TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_add_flags: " |
890 | 0 | "allow_nesting and disallow_nesting are not allowed together!")); |
891 | 0 | return; |
892 | 0 | } |
893 | | |
894 | 0 | if (flags & TDB_ALLOW_NESTING) { |
895 | 0 | tdb->flags &= ~TDB_DISALLOW_NESTING; |
896 | 0 | } |
897 | 0 | if (flags & TDB_DISALLOW_NESTING) { |
898 | 0 | tdb->flags &= ~TDB_ALLOW_NESTING; |
899 | 0 | } |
900 | |
|
901 | 0 | tdb->flags |= flags; |
902 | 0 | } |
903 | | |
904 | | _PUBLIC_ void tdb_remove_flags(struct tdb_context *tdb, unsigned flags) |
905 | 0 | { |
906 | 0 | if ((flags & TDB_ALLOW_NESTING) && |
907 | 0 | (flags & TDB_DISALLOW_NESTING)) { |
908 | 0 | tdb->ecode = TDB_ERR_NESTING; |
909 | 0 | TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_remove_flags: " |
910 | 0 | "allow_nesting and disallow_nesting are not allowed together!")); |
911 | 0 | return; |
912 | 0 | } |
913 | | |
914 | 0 | if ((flags & TDB_NOLOCK) && |
915 | 0 | (tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) && |
916 | 0 | (tdb->mutexes == NULL)) { |
917 | 0 | tdb->ecode = TDB_ERR_LOCK; |
918 | 0 | TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_remove_flags: " |
919 | 0 | "Can not remove NOLOCK flag on mutexed databases")); |
920 | 0 | return; |
921 | 0 | } |
922 | | |
923 | 0 | if (flags & TDB_ALLOW_NESTING) { |
924 | 0 | tdb->flags |= TDB_DISALLOW_NESTING; |
925 | 0 | } |
926 | 0 | if (flags & TDB_DISALLOW_NESTING) { |
927 | 0 | tdb->flags |= TDB_ALLOW_NESTING; |
928 | 0 | } |
929 | |
|
930 | 0 | tdb->flags &= ~flags; |
931 | 0 | } |
932 | | |
933 | | |
934 | | /* |
935 | | enable sequence number handling on an open tdb |
936 | | */ |
937 | | _PUBLIC_ void tdb_enable_seqnum(struct tdb_context *tdb) |
938 | 0 | { |
939 | 0 | tdb->flags |= TDB_SEQNUM; |
940 | 0 | } |
941 | | |
942 | | |
943 | | /* |
944 | | add a region of the file to the freelist. Length is the size of the region in bytes, |
945 | | which includes the free list header that needs to be added |
946 | | */ |
947 | | static int tdb_free_region(struct tdb_context *tdb, tdb_off_t offset, ssize_t length) |
948 | 0 | { |
949 | 0 | struct tdb_record rec; |
950 | 0 | if (length <= sizeof(rec)) { |
951 | | /* the region is not worth adding */ |
952 | 0 | return 0; |
953 | 0 | } |
954 | 0 | if (length + offset > tdb->map_size) { |
955 | 0 | TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_free_region: adding region beyond end of file\n")); |
956 | 0 | return -1; |
957 | 0 | } |
958 | 0 | memset(&rec,'\0',sizeof(rec)); |
959 | 0 | rec.rec_len = length - sizeof(rec); |
960 | 0 | if (tdb_free(tdb, offset, &rec) == -1) { |
961 | 0 | TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_free_region: failed to add free record\n")); |
962 | 0 | return -1; |
963 | 0 | } |
964 | 0 | return 0; |
965 | 0 | } |
966 | | |
967 | | /* |
968 | | wipe the entire database, deleting all records. This can be done |
969 | | very fast by using a allrecord lock. The entire data portion of the |
970 | | file becomes a single entry in the freelist. |
971 | | |
972 | | This code carefully steps around the recovery area, leaving it alone |
973 | | */ |
974 | | _PUBLIC_ int tdb_wipe_all(struct tdb_context *tdb) |
975 | 0 | { |
976 | 0 | uint32_t i; |
977 | 0 | tdb_off_t offset = 0; |
978 | 0 | ssize_t data_len; |
979 | 0 | tdb_off_t recovery_head; |
980 | 0 | tdb_len_t recovery_size = 0; |
981 | |
|
982 | 0 | if (tdb_lockall(tdb) != 0) { |
983 | 0 | return -1; |
984 | 0 | } |
985 | | |
986 | 0 | tdb_trace(tdb, "tdb_wipe_all"); |
987 | | |
988 | | /* see if the tdb has a recovery area, and remember its size |
989 | | if so. We don't want to lose this as otherwise each |
990 | | tdb_wipe_all() in a transaction will increase the size of |
991 | | the tdb by the size of the recovery area */ |
992 | 0 | if (tdb_ofs_read(tdb, TDB_RECOVERY_HEAD, &recovery_head) == -1) { |
993 | 0 | TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_wipe_all: failed to read recovery head\n")); |
994 | 0 | goto failed; |
995 | 0 | } |
996 | | |
997 | 0 | if (recovery_head != 0) { |
998 | 0 | struct tdb_record rec; |
999 | 0 | if (tdb->methods->tdb_read(tdb, recovery_head, &rec, sizeof(rec), DOCONV()) == -1) { |
1000 | 0 | TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_wipe_all: failed to read recovery record\n")); |
1001 | 0 | return -1; |
1002 | 0 | } |
1003 | 0 | recovery_size = rec.rec_len + sizeof(rec); |
1004 | 0 | } |
1005 | | |
1006 | | /* wipe the hashes */ |
1007 | 0 | for (i=0;i<tdb->hash_size;i++) { |
1008 | 0 | if (tdb_ofs_write(tdb, TDB_HASH_TOP(i), &offset) == -1) { |
1009 | 0 | TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write hash %d\n", i)); |
1010 | 0 | goto failed; |
1011 | 0 | } |
1012 | 0 | } |
1013 | | |
1014 | | /* wipe the freelist */ |
1015 | 0 | if (tdb_ofs_write(tdb, FREELIST_TOP, &offset) == -1) { |
1016 | 0 | TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write freelist\n")); |
1017 | 0 | goto failed; |
1018 | 0 | } |
1019 | | |
1020 | | /* add all the rest of the file to the freelist, possibly leaving a gap |
1021 | | for the recovery area */ |
1022 | 0 | if (recovery_size == 0) { |
1023 | | /* the simple case - the whole file can be used as a freelist */ |
1024 | 0 | data_len = (tdb->map_size - TDB_DATA_START(tdb->hash_size)); |
1025 | 0 | if (tdb_free_region(tdb, TDB_DATA_START(tdb->hash_size), data_len) != 0) { |
1026 | 0 | goto failed; |
1027 | 0 | } |
1028 | 0 | } else { |
1029 | | /* we need to add two freelist entries - one on either |
1030 | | side of the recovery area |
1031 | | |
1032 | | Note that we cannot shift the recovery area during |
1033 | | this operation. Only the transaction.c code may |
1034 | | move the recovery area or we risk subtle data |
1035 | | corruption |
1036 | | */ |
1037 | 0 | data_len = (recovery_head - TDB_DATA_START(tdb->hash_size)); |
1038 | 0 | if (tdb_free_region(tdb, TDB_DATA_START(tdb->hash_size), data_len) != 0) { |
1039 | 0 | goto failed; |
1040 | 0 | } |
1041 | | /* and the 2nd free list entry after the recovery area - if any */ |
1042 | 0 | data_len = tdb->map_size - (recovery_head+recovery_size); |
1043 | 0 | if (tdb_free_region(tdb, recovery_head+recovery_size, data_len) != 0) { |
1044 | 0 | goto failed; |
1045 | 0 | } |
1046 | 0 | } |
1047 | | |
1048 | 0 | tdb_increment_seqnum_nonblock(tdb); |
1049 | |
|
1050 | 0 | if (tdb_unlockall(tdb) != 0) { |
1051 | 0 | TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to unlock\n")); |
1052 | 0 | goto failed; |
1053 | 0 | } |
1054 | | |
1055 | 0 | return 0; |
1056 | | |
1057 | 0 | failed: |
1058 | 0 | tdb_unlockall(tdb); |
1059 | 0 | return -1; |
1060 | 0 | } |
1061 | | |
1062 | | struct traverse_state { |
1063 | | bool error; |
1064 | | struct tdb_context *dest_db; |
1065 | | }; |
1066 | | |
1067 | | /* |
1068 | | traverse function for repacking |
1069 | | */ |
1070 | | static int repack_traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, void *private_data) |
1071 | 0 | { |
1072 | 0 | struct traverse_state *state = (struct traverse_state *)private_data; |
1073 | 0 | if (tdb_store(state->dest_db, key, data, TDB_INSERT) != 0) { |
1074 | 0 | state->error = true; |
1075 | 0 | return -1; |
1076 | 0 | } |
1077 | 0 | return 0; |
1078 | 0 | } |
1079 | | |
1080 | | /* |
1081 | | repack a tdb |
1082 | | */ |
1083 | | _PUBLIC_ int tdb_repack(struct tdb_context *tdb) |
1084 | 0 | { |
1085 | 0 | struct tdb_context *tmp_db; |
1086 | 0 | struct traverse_state state; |
1087 | |
|
1088 | 0 | tdb_trace(tdb, "tdb_repack"); |
1089 | |
|
1090 | 0 | if (tdb_transaction_start(tdb) != 0) { |
1091 | 0 | TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to start transaction\n")); |
1092 | 0 | return -1; |
1093 | 0 | } |
1094 | | |
1095 | 0 | tmp_db = tdb_open("tmpdb", tdb_hash_size(tdb), TDB_INTERNAL, O_RDWR|O_CREAT, 0); |
1096 | 0 | if (tmp_db == NULL) { |
1097 | 0 | TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to create tmp_db\n")); |
1098 | 0 | tdb_transaction_cancel(tdb); |
1099 | 0 | return -1; |
1100 | 0 | } |
1101 | | |
1102 | 0 | state.error = false; |
1103 | 0 | state.dest_db = tmp_db; |
1104 | |
|
1105 | 0 | if (tdb_traverse_read(tdb, repack_traverse, &state) == -1) { |
1106 | 0 | TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to traverse copying out\n")); |
1107 | 0 | tdb_transaction_cancel(tdb); |
1108 | 0 | tdb_close(tmp_db); |
1109 | 0 | return -1; |
1110 | 0 | } |
1111 | | |
1112 | 0 | if (state.error) { |
1113 | 0 | TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Error during traversal\n")); |
1114 | 0 | tdb_transaction_cancel(tdb); |
1115 | 0 | tdb_close(tmp_db); |
1116 | 0 | return -1; |
1117 | 0 | } |
1118 | | |
1119 | 0 | if (tdb_wipe_all(tdb) != 0) { |
1120 | 0 | TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to wipe database\n")); |
1121 | 0 | tdb_transaction_cancel(tdb); |
1122 | 0 | tdb_close(tmp_db); |
1123 | 0 | return -1; |
1124 | 0 | } |
1125 | | |
1126 | 0 | state.error = false; |
1127 | 0 | state.dest_db = tdb; |
1128 | |
|
1129 | 0 | if (tdb_traverse_read(tmp_db, repack_traverse, &state) == -1) { |
1130 | 0 | TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to traverse copying back\n")); |
1131 | 0 | tdb_transaction_cancel(tdb); |
1132 | 0 | tdb_close(tmp_db); |
1133 | 0 | return -1; |
1134 | 0 | } |
1135 | | |
1136 | 0 | if (state.error) { |
1137 | 0 | TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Error during second traversal\n")); |
1138 | 0 | tdb_transaction_cancel(tdb); |
1139 | 0 | tdb_close(tmp_db); |
1140 | 0 | return -1; |
1141 | 0 | } |
1142 | | |
1143 | 0 | tdb_close(tmp_db); |
1144 | |
|
1145 | 0 | if (tdb_transaction_commit(tdb) != 0) { |
1146 | 0 | TDB_LOG((tdb, TDB_DEBUG_FATAL, __location__ " Failed to commit\n")); |
1147 | 0 | return -1; |
1148 | 0 | } |
1149 | | |
1150 | 0 | return 0; |
1151 | 0 | } |
1152 | | |
1153 | | /* Even on files, we can get partial writes due to signals. */ |
1154 | | bool tdb_write_all(int fd, const void *buf, size_t count) |
1155 | 0 | { |
1156 | 0 | while (count) { |
1157 | 0 | ssize_t ret; |
1158 | 0 | ret = write(fd, buf, count); |
1159 | 0 | if (ret < 0) |
1160 | 0 | return false; |
1161 | 0 | buf = (const char *)buf + ret; |
1162 | 0 | count -= ret; |
1163 | 0 | } |
1164 | 0 | return true; |
1165 | 0 | } |
1166 | | |
1167 | | bool tdb_add_off_t(tdb_off_t a, tdb_off_t b, tdb_off_t *pret) |
1168 | 0 | { |
1169 | 0 | tdb_off_t ret = a + b; |
1170 | |
|
1171 | 0 | if ((ret < a) || (ret < b)) { |
1172 | 0 | return false; |
1173 | 0 | } |
1174 | 0 | *pret = ret; |
1175 | | return true; |
1176 | 0 | } |
1177 | | |
1178 | | #ifdef TDB_TRACE |
1179 | | static void tdb_trace_write(struct tdb_context *tdb, const char *str) |
1180 | | { |
1181 | | if (!tdb_write_all(tdb->tracefd, str, strlen(str))) { |
1182 | | close(tdb->tracefd); |
1183 | | tdb->tracefd = -1; |
1184 | | } |
1185 | | } |
1186 | | |
1187 | | static void tdb_trace_start(struct tdb_context *tdb) |
1188 | | { |
1189 | | tdb_off_t seqnum=0; |
1190 | | char msg[sizeof(tdb_off_t) * 4 + 1]; |
1191 | | |
1192 | | tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum); |
1193 | | snprintf(msg, sizeof(msg), "%u ", seqnum); |
1194 | | tdb_trace_write(tdb, msg); |
1195 | | } |
1196 | | |
1197 | | static void tdb_trace_end(struct tdb_context *tdb) |
1198 | | { |
1199 | | tdb_trace_write(tdb, "\n"); |
1200 | | } |
1201 | | |
1202 | | static void tdb_trace_end_ret(struct tdb_context *tdb, int ret) |
1203 | | { |
1204 | | char msg[sizeof(ret) * 4 + 4]; |
1205 | | snprintf(msg, sizeof(msg), " = %i\n", ret); |
1206 | | tdb_trace_write(tdb, msg); |
1207 | | } |
1208 | | |
1209 | | static void tdb_trace_record(struct tdb_context *tdb, TDB_DATA rec) |
1210 | | { |
1211 | | char msg[21 + rec.dsize*2], *p; |
1212 | | unsigned int i; |
1213 | | |
1214 | | /* We differentiate zero-length records from non-existent ones. */ |
1215 | | if (rec.dptr == NULL) { |
1216 | | tdb_trace_write(tdb, " NULL"); |
1217 | | return; |
1218 | | } |
1219 | | |
1220 | | /* snprintf here is purely cargo-cult programming. */ |
1221 | | p = msg; |
1222 | | p += snprintf(p, sizeof(msg), " %zu:", rec.dsize); |
1223 | | |
1224 | | for (i = 0; i < rec.dsize; i++) { |
1225 | | snprintf(p, 3, "%02x", rec.dptr[i]); |
1226 | | p += 2; |
1227 | | } |
1228 | | |
1229 | | tdb_trace_write(tdb, msg); |
1230 | | } |
1231 | | |
1232 | | void tdb_trace(struct tdb_context *tdb, const char *op) |
1233 | | { |
1234 | | tdb_trace_start(tdb); |
1235 | | tdb_trace_write(tdb, op); |
1236 | | tdb_trace_end(tdb); |
1237 | | } |
1238 | | |
1239 | | void tdb_trace_seqnum(struct tdb_context *tdb, uint32_t seqnum, const char *op) |
1240 | | { |
1241 | | char msg[sizeof(tdb_off_t) * 4 + 1]; |
1242 | | |
1243 | | snprintf(msg, sizeof(msg), "%u ", seqnum); |
1244 | | tdb_trace_write(tdb, msg); |
1245 | | tdb_trace_write(tdb, op); |
1246 | | tdb_trace_end(tdb); |
1247 | | } |
1248 | | |
1249 | | void tdb_trace_open(struct tdb_context *tdb, const char *op, |
1250 | | unsigned hash_size, unsigned tdb_flags, unsigned open_flags) |
1251 | | { |
1252 | | char msg[128]; |
1253 | | |
1254 | | snprintf(msg, sizeof(msg), |
1255 | | "%s %u 0x%x 0x%x", op, hash_size, tdb_flags, open_flags); |
1256 | | tdb_trace_start(tdb); |
1257 | | tdb_trace_write(tdb, msg); |
1258 | | tdb_trace_end(tdb); |
1259 | | } |
1260 | | |
1261 | | void tdb_trace_ret(struct tdb_context *tdb, const char *op, int ret) |
1262 | | { |
1263 | | tdb_trace_start(tdb); |
1264 | | tdb_trace_write(tdb, op); |
1265 | | tdb_trace_end_ret(tdb, ret); |
1266 | | } |
1267 | | |
1268 | | void tdb_trace_retrec(struct tdb_context *tdb, const char *op, TDB_DATA ret) |
1269 | | { |
1270 | | tdb_trace_start(tdb); |
1271 | | tdb_trace_write(tdb, op); |
1272 | | tdb_trace_write(tdb, " ="); |
1273 | | tdb_trace_record(tdb, ret); |
1274 | | tdb_trace_end(tdb); |
1275 | | } |
1276 | | |
1277 | | void tdb_trace_1rec(struct tdb_context *tdb, const char *op, |
1278 | | TDB_DATA rec) |
1279 | | { |
1280 | | tdb_trace_start(tdb); |
1281 | | tdb_trace_write(tdb, op); |
1282 | | tdb_trace_record(tdb, rec); |
1283 | | tdb_trace_end(tdb); |
1284 | | } |
1285 | | |
1286 | | void tdb_trace_1rec_ret(struct tdb_context *tdb, const char *op, |
1287 | | TDB_DATA rec, int ret) |
1288 | | { |
1289 | | tdb_trace_start(tdb); |
1290 | | tdb_trace_write(tdb, op); |
1291 | | tdb_trace_record(tdb, rec); |
1292 | | tdb_trace_end_ret(tdb, ret); |
1293 | | } |
1294 | | |
1295 | | void tdb_trace_1rec_retrec(struct tdb_context *tdb, const char *op, |
1296 | | TDB_DATA rec, TDB_DATA ret) |
1297 | | { |
1298 | | tdb_trace_start(tdb); |
1299 | | tdb_trace_write(tdb, op); |
1300 | | tdb_trace_record(tdb, rec); |
1301 | | tdb_trace_write(tdb, " ="); |
1302 | | tdb_trace_record(tdb, ret); |
1303 | | tdb_trace_end(tdb); |
1304 | | } |
1305 | | |
1306 | | void tdb_trace_2rec_flag_ret(struct tdb_context *tdb, const char *op, |
1307 | | TDB_DATA rec1, TDB_DATA rec2, unsigned flag, |
1308 | | int ret) |
1309 | | { |
1310 | | char msg[1 + sizeof(ret) * 4]; |
1311 | | |
1312 | | snprintf(msg, sizeof(msg), " %#x", flag); |
1313 | | tdb_trace_start(tdb); |
1314 | | tdb_trace_write(tdb, op); |
1315 | | tdb_trace_record(tdb, rec1); |
1316 | | tdb_trace_record(tdb, rec2); |
1317 | | tdb_trace_write(tdb, msg); |
1318 | | tdb_trace_end_ret(tdb, ret); |
1319 | | } |
1320 | | |
1321 | | void tdb_trace_1plusn_rec_flag_ret(struct tdb_context *tdb, const char *op, |
1322 | | TDB_DATA rec, |
1323 | | const TDB_DATA *recs, int num_recs, |
1324 | | unsigned flag, int ret) |
1325 | | { |
1326 | | char msg[1 + sizeof(ret) * 4]; |
1327 | | int i; |
1328 | | |
1329 | | snprintf(msg, sizeof(msg), " %#x", flag); |
1330 | | tdb_trace_start(tdb); |
1331 | | tdb_trace_write(tdb, op); |
1332 | | tdb_trace_record(tdb, rec); |
1333 | | for (i=0; i<num_recs; i++) { |
1334 | | tdb_trace_record(tdb, recs[i]); |
1335 | | } |
1336 | | tdb_trace_write(tdb, msg); |
1337 | | tdb_trace_end_ret(tdb, ret); |
1338 | | } |
1339 | | |
1340 | | void tdb_trace_2rec_retrec(struct tdb_context *tdb, const char *op, |
1341 | | TDB_DATA rec1, TDB_DATA rec2, TDB_DATA ret) |
1342 | | { |
1343 | | tdb_trace_start(tdb); |
1344 | | tdb_trace_write(tdb, op); |
1345 | | tdb_trace_record(tdb, rec1); |
1346 | | tdb_trace_record(tdb, rec2); |
1347 | | tdb_trace_write(tdb, " ="); |
1348 | | tdb_trace_record(tdb, ret); |
1349 | | tdb_trace_end(tdb); |
1350 | | } |
1351 | | #endif |