Coverage Report

Created: 2025-11-11 07:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/bind9/lib/dns/qp.c
Line
Count
Source
1
/*
2
 * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
3
 *
4
 * SPDX-License-Identifier: MPL-2.0
5
 *
6
 * This Source Code Form is subject to the terms of the Mozilla Public
7
 * License, v. 2.0. If a copy of the MPL was not distributed with this
8
 * file, you can obtain one at https://mozilla.org/MPL/2.0/.
9
 *
10
 * See the COPYRIGHT file distributed with this work for additional
11
 * information regarding copyright ownership.
12
 */
13
14
/*
15
 * For an overview, see doc/design/qp-trie.md
16
 */
17
18
#include <inttypes.h>
19
#include <stdbool.h>
20
#include <stddef.h>
21
#include <stdint.h>
22
#include <string.h>
23
24
#if FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
25
#include <sys/mman.h>
26
#include <unistd.h>
27
#endif
28
29
#include <isc/atomic.h>
30
#include <isc/bit.h>
31
#include <isc/buffer.h>
32
#include <isc/log.h>
33
#include <isc/magic.h>
34
#include <isc/mem.h>
35
#include <isc/mutex.h>
36
#include <isc/refcount.h>
37
#include <isc/result.h>
38
#include <isc/rwlock.h>
39
#include <isc/tid.h>
40
#include <isc/time.h>
41
#include <isc/types.h>
42
#include <isc/urcu.h>
43
#include <isc/util.h>
44
45
#include <dns/fixedname.h>
46
#include <dns/name.h>
47
#include <dns/qp.h>
48
#include <dns/types.h>
49
50
#include "qp_p.h"
51
52
#ifndef DNS_QP_LOG_STATS_LEVEL
53
#define DNS_QP_LOG_STATS_LEVEL 3
54
#endif
55
#ifndef DNS_QP_TRACE
56
#define DNS_QP_TRACE 0
57
#endif
58
59
/*
60
 * very basic garbage collector statistics
61
 *
62
 * XXXFANF for now we're logging GC times, but ideally we should
63
 * accumulate stats more quietly and report via the statschannel
64
 */
65
static atomic_uint_fast64_t compact_time;
66
static atomic_uint_fast64_t recycle_time;
67
static atomic_uint_fast64_t rollback_time;
68
69
/* for LOG_STATS() format strings */
70
#define PRItime " %" PRIu64 " ns "
71
72
#if DNS_QP_LOG_STATS_LEVEL
73
#define LOG_STATS(...)                                            \
74
13.4k
  isc_log_write(DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_QP, \
75
13.4k
          ISC_LOG_DEBUG(DNS_QP_LOG_STATS_LEVEL), __VA_ARGS__)
76
#else
77
#define LOG_STATS(...)
78
#endif
79
80
#if DNS_QP_TRACE
81
/*
82
 * TRACE is generally used in allocation-related functions so it doesn't
83
 * trace very high-frequency ops
84
 */
85
#define TRACE(fmt, ...)                                                        \
86
  do {                                                                   \
87
    if (isc_log_wouldlog(ISC_LOG_DEBUG(7))) {                      \
88
      isc_log_write(DNS_LOGCATEGORY_DATABASE,                \
89
              DNS_LOGMODULE_QP, ISC_LOG_DEBUG(7),      \
90
              "%s:%d:%s(qp %p uctx \"%s\"):t%" PRItid  \
91
              ": " fmt,                                \
92
              __FILE__, __LINE__, __func__, qp,        \
93
              qp ? TRIENAME(qp) : "(null)", isc_tid(), \
94
              ##__VA_ARGS__);                          \
95
    }                                                              \
96
  } while (0)
97
#else
98
#define TRACE(...)
99
#endif
100
101
#if DNS_QPMULTI_TRACE
102
ISC_REFCOUNT_STATIC_TRACE_DECL(dns_qpmulti);
103
#define dns_qpmulti_ref(ptr) dns_qpmulti__ref(ptr, __func__, __FILE__, __LINE__)
104
#define dns_qpmulti_unref(ptr) \
105
  dns_qpmulti__unref(ptr, __func__, __FILE__, __LINE__)
106
#define dns_qpmulti_attach(ptr, ptrp) \
107
  dns_qpmulti__attach(ptr, ptrp, __func__, __FILE__, __LINE__)
108
#define dns_qpmulti_detach(ptrp) \
109
  dns_qpmulti__detach(ptrp, __func__, __FILE__, __LINE__)
110
#else
111
ISC_REFCOUNT_STATIC_DECL(dns_qpmulti);
112
#endif
113
114
/***********************************************************************
115
 *
116
 *  converting DNS names to trie keys
117
 */
118
119
/*
120
 * Convert the namespace value. We map namespace values to numerical
121
 * digits so they can be represented in a single byte in the QP key;
122
 * thus namespace 0 becomes '0', etc.
123
 */
124
26.7M
#define ENCODE_NAMESPACE(c) dns_qp_bits_for_byte[(c) + (uint8_t)'0']
125
#define DECODE_NAMESPACE(c) dns_qp_byte_for_bit[(c)] - (uint8_t)'0'
126
127
28.0k
#define NAME_OFFSET 1
128
129
/*
130
 * Number of distinct byte values, i.e. 256
131
 */
132
5.65k
#define BYTE_VALUES (UINT8_MAX + 1)
133
134
/*
135
 * Lookup table mapping bytes in DNS names to bit positions, used
136
 * by dns_qpkey_fromname() to convert DNS names to qp-trie keys.
137
 *
138
 * Each element holds one or two bit positions, bit_one in the
139
 * lower half and bit_two in the upper half.
140
 *
141
 * For common hostname characters, bit_two is zero (which cannot
142
 * be a valid bit position).
143
 *
144
 * For others, bit_one is the escape bit, and bit_two is the
145
 * position of the character within the escaped range.
146
 */
147
uint16_t dns_qp_bits_for_byte[BYTE_VALUES] = { 0 };
148
149
/*
150
 * And the reverse, mapping bit positions to characters, so the tests
151
 * can print diagnostics involving qp-trie keys.
152
 *
153
 * This table only handles the first bit in an escape sequence; we
154
 * arrange that we can calculate the byte value for both bits by
155
 * adding the the second bit to the first bit's byte value.
156
 */
157
uint8_t dns_qp_byte_for_bit[SHIFT_OFFSET] = { 0 };
158
159
/*
160
 * Fill in the lookup tables at program startup. (It doesn't matter
161
 * when this is initialized relative to other startup code.)
162
 */
163
164
/*
165
 * The bit positions for bytes inside labels have to be between
166
 * SHIFT_BITMAP and SHIFT_OFFSET. (SHIFT_NOBYTE separates labels.)
167
 *
168
 * Each byte range in between common hostname characters has a different
169
 * escape character, to preserve the correct lexical order.
170
 *
171
 * Escaped byte ranges mostly fit into the space available in the
172
 * bitmap, except for those above 'z' (which is mostly bytes with the
173
 * top bit set). So, when we reach the end of the bitmap we roll over
174
 * to the next escape character.
175
 *
176
 * After filling the table we ensure that the bit positions for
177
 * hostname characters and escape characters all fit.
178
 */
179
void
180
22
dns__qp_initialize(void) {
181
  /* zero common character marker not a valid shift position */
182
22
  INSIST(0 < SHIFT_BITMAP);
183
  /* first bit is common byte or escape byte */
184
22
  dns_qpshift_t bit_one = SHIFT_BITMAP;
185
  /* second bit is position in escaped range */
186
22
  dns_qpshift_t bit_two = SHIFT_BITMAP;
187
22
  bool escaping = true;
188
189
5.65k
  for (unsigned int byte = 0; byte < BYTE_VALUES; byte++) {
190
5.63k
    if (qp_common_character(byte)) {
191
902
      escaping = false;
192
902
      bit_one++;
193
902
      dns_qp_byte_for_bit[bit_one] = byte;
194
902
      dns_qp_bits_for_byte[byte] = bit_one;
195
4.73k
    } else if ('A' <= byte && byte <= 'Z') {
196
      /* map upper case to lower case */
197
572
      dns_qpshift_t after_esc = bit_one + 1;
198
572
      dns_qpshift_t skip_punct = 'a' - '_';
199
572
      dns_qpshift_t letter = byte - 'A';
200
572
      dns_qpshift_t bit = after_esc + skip_punct + letter;
201
572
      dns_qp_bits_for_byte[byte] = bit;
202
      /* to simplify reverse conversion */
203
572
      bit_two++;
204
4.15k
    } else {
205
      /* non-hostname characters need to be escaped */
206
4.15k
      if (!escaping || bit_two >= SHIFT_OFFSET) {
207
88
        escaping = true;
208
88
        bit_one++;
209
88
        dns_qp_byte_for_bit[bit_one] = byte;
210
88
        bit_two = SHIFT_BITMAP;
211
88
      }
212
4.15k
      dns_qp_bits_for_byte[byte] = bit_two << 8 | bit_one;
213
4.15k
      bit_two++;
214
4.15k
    }
215
5.63k
  }
216
22
  ENSURE(bit_one < SHIFT_OFFSET);
217
22
}
218
219
void
220
0
dns__qp_shutdown(void) {
221
  /* Nothing */
222
0
}
223
224
/*
225
 * Convert a DNS name into a trie lookup key.
226
 *
227
 * Returns the length of the key.
228
 *
229
 * For performance we get our hands dirty in the guts of the name.
230
 *
231
 * We don't worry about the distinction between absolute and relative
232
 * names. When the trie is only used with absolute names, the first byte
233
 * of the key will always be SHIFT_NOBYTE and it will always be skipped
234
 * when traversing the trie. So keeping the root label costs little, and
235
 * it allows us to support tries of relative names too. In fact absolute
236
 * and relative names can be mixed in the same trie without causing
237
 * confusion, because the presence or absence of the initial
238
 * SHIFT_NOBYTE in the key disambiguates them (exactly like a trailing
239
 * dot in a zone file).
240
 */
241
size_t
242
dns_qpkey_fromname(dns_qpkey_t key, const dns_name_t *name,
243
26.7M
       dns_namespace_t space) {
244
26.7M
  REQUIRE(ISC_MAGIC_VALID(name, DNS_NAME_MAGIC));
245
246
26.7M
  dns_offsets_t offsets;
247
26.7M
  size_t labels = dns_name_offsets(name, offsets);
248
26.7M
  size_t len = 0;
249
250
  /* namespace */
251
26.7M
  key[len++] = ENCODE_NAMESPACE(space);
252
  /* name */
253
26.7M
  if (labels == 0) {
254
0
    key[len] = SHIFT_NOBYTE;
255
0
    return len;
256
0
  }
257
258
26.7M
  size_t label = labels;
259
214M
  while (label-- > 0) {
260
188M
    const uint8_t *ldata = name->ndata + offsets[label];
261
188M
    size_t label_len = *ldata++;
262
531M
    while (label_len-- > 0) {
263
343M
      uint16_t bits = dns_qp_bits_for_byte[*ldata++];
264
343M
      key[len++] = bits & 0xFF; /* bit_one */
265
343M
      if ((bits >> 8) != 0) {   /* escape? */
266
174M
        key[len++] = bits >> 8; /* bit_two */
267
174M
      }
268
343M
    }
269
    /* label terminator */
270
188M
    key[len++] = SHIFT_NOBYTE;
271
188M
  }
272
  /* mark end with a double NOBYTE */
273
26.7M
  key[len] = SHIFT_NOBYTE;
274
26.7M
  ENSURE(len < sizeof(dns_qpkey_t));
275
26.7M
  return len;
276
26.7M
}
277
278
void
279
dns_qpkey_toname(const dns_qpkey_t key, size_t keylen, dns_name_t *name,
280
609
     dns_namespace_t *space) {
281
609
  size_t locs[DNS_NAME_MAXLABELS];
282
609
  size_t loc = 0;
283
609
  size_t offset = 0;
284
285
609
  REQUIRE(ISC_MAGIC_VALID(name, DNS_NAME_MAGIC));
286
609
  REQUIRE(name->buffer != NULL);
287
609
  REQUIRE(keylen > 0);
288
289
609
  dns_name_reset(name);
290
291
609
  SET_IF_NOT_NULL(space, DECODE_NAMESPACE(key[offset++]));
292
293
609
  if (keylen == NAME_OFFSET) {
294
0
    return;
295
0
  }
296
297
  /* Scan the key looking for label boundaries */
298
32.0k
  for (; offset <= keylen; offset++) {
299
32.0k
    INSIST(key[offset] >= SHIFT_NOBYTE &&
300
32.0k
           key[offset] < SHIFT_OFFSET);
301
32.0k
    INSIST(loc < DNS_NAME_MAXLABELS);
302
32.0k
    if (qpkey_bit(key, keylen, offset) == SHIFT_NOBYTE) {
303
5.25k
      if (qpkey_bit(key, keylen, offset + 1) == SHIFT_NOBYTE)
304
609
      {
305
609
        locs[loc] = offset + 1;
306
609
        goto scanned;
307
609
      }
308
4.64k
      locs[loc++] = offset + 1;
309
26.8k
    } else if (offset == NAME_OFFSET) {
310
      /* This happens for a relative name */
311
0
      locs[loc++] = offset;
312
0
    }
313
32.0k
  }
314
609
  UNREACHABLE();
315
609
scanned:
316
317
  /*
318
   * In the key the labels are encoded in reverse order, so
319
   * we step backward through the label boundaries, then forward
320
   * through the labels, to create the DNS wire format data.
321
   */
322
5.25k
  while (loc-- > 0) {
323
4.64k
    uint8_t len = 0, *lenp = NULL;
324
325
    /* Store the location of the length byte */
326
4.64k
    lenp = isc_buffer_used(name->buffer);
327
328
    /* Add a length byte to the name data */
329
4.64k
    isc_buffer_putuint8(name->buffer, 0);
330
4.64k
    name->length++;
331
332
    /* Convert from escaped byte ranges to ASCII */
333
18.9k
    for (offset = locs[loc]; offset < locs[loc + 1] - 1; offset++) {
334
14.2k
      uint8_t bit = qpkey_bit(key, keylen, offset);
335
14.2k
      uint8_t byte = dns_qp_byte_for_bit[bit];
336
14.2k
      if (qp_common_character(byte)) {
337
1.82k
        isc_buffer_putuint8(name->buffer, byte);
338
12.4k
      } else {
339
12.4k
        byte += key[++offset] - SHIFT_BITMAP;
340
12.4k
        isc_buffer_putuint8(name->buffer, byte);
341
12.4k
      }
342
14.2k
      len++;
343
14.2k
    }
344
345
4.64k
    name->length += len;
346
347
    /* Write the final label length to the length byte */
348
4.64k
    *lenp = len;
349
4.64k
  }
350
351
  /* Add a root label for absolute names */
352
609
  if (key[NAME_OFFSET] == SHIFT_NOBYTE) {
353
609
    name->attributes.absolute = true;
354
609
    isc_buffer_putuint8(name->buffer, 0);
355
609
    name->length++;
356
609
  }
357
358
609
  name->ndata = isc_buffer_base(name->buffer);
359
609
}
360
361
/*
362
 * Sentinel value for equal keys
363
 */
364
14.2M
#define QPKEY_EQUAL (~(size_t)0)
365
366
/*
367
 * Compare two keys and return the offset where they differ.
368
 *
369
 * This offset is used to work out where a trie search diverged: when one
370
 * of the keys is in the trie and one is not, the common prefix (up to the
371
 * offset) is the part of the unknown key that exists in the trie. This
372
 * matters for adding new keys or finding neighbours of missing keys.
373
 *
374
 * When the keys are different lengths it is possible (but unwise) for
375
 * the longer key to be the same as the shorter key but with superfluous
376
 * trailing SHIFT_NOBYTE elements. This makes the keys equal for the
377
 * purpose of traversing the trie.
378
 */
379
static size_t
380
qpkey_compare(const dns_qpkey_t key_a, const size_t keylen_a,
381
11.2M
        const dns_qpkey_t key_b, const size_t keylen_b) {
382
11.2M
  size_t keylen = ISC_MAX(keylen_a, keylen_b);
383
318M
  for (size_t offset = 0; offset < keylen; offset++) {
384
315M
    if (qpkey_bit(key_a, keylen_a, offset) !=
385
315M
        qpkey_bit(key_b, keylen_b, offset))
386
8.32M
    {
387
8.32M
      return offset;
388
8.32M
    }
389
315M
  }
390
2.97M
  return QPKEY_EQUAL;
391
11.2M
}
392
393
/***********************************************************************
394
 *
395
 *  allocator wrappers
396
 */
397
398
#if FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
399
400
/*
401
 * Optionally (for debugging) during a copy-on-write transaction, use
402
 * memory protection to ensure that the shared chunks are not modified.
403
 * Once a chunk becomes shared, it remains read-only until it is freed.
404
 * POSIX says we have to use mmap() to get an allocation that we can
405
 * definitely pass to mprotect().
406
 */
407
408
static size_t
409
0
chunk_size_raw(void) {
410
0
  size_t size = (size_t)sysconf(_SC_PAGE_SIZE);
411
0
  return ISC_MAX(size, QP_CHUNK_BYTES);
412
0
}
413
414
static void *
415
408k
chunk_get_raw(dns_qp_t *qp, size_t len) {
416
408k
  if (qp->write_protect) {
417
0
    size_t size = chunk_size_raw();
418
0
    void *ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
419
0
         MAP_ANON | MAP_PRIVATE, -1, 0);
420
0
    RUNTIME_CHECK(ptr != MAP_FAILED);
421
0
    return ptr;
422
408k
  } else {
423
408k
    return isc_mem_allocate(qp->mctx, len);
424
408k
  }
425
408k
}
426
427
static void
428
408k
chunk_free_raw(dns_qp_t *qp, void *ptr) {
429
408k
  if (qp->write_protect) {
430
0
    RUNTIME_CHECK(munmap(ptr, chunk_size_raw()) == 0);
431
408k
  } else {
432
408k
    isc_mem_free(qp->mctx, ptr);
433
408k
  }
434
408k
}
435
436
static void *
437
0
chunk_shrink_raw(dns_qp_t *qp, void *ptr, size_t bytes) {
438
0
  if (qp->write_protect) {
439
0
    return ptr;
440
0
  } else {
441
0
    return isc_mem_reallocate(qp->mctx, ptr, bytes);
442
0
  }
443
0
}
444
445
static void
446
18.4k
write_protect(dns_qp_t *qp, dns_qpchunk_t chunk) {
447
18.4k
  if (qp->write_protect) {
448
    /* see transaction_open() wrt this special case */
449
0
    if (qp->transaction_mode == QP_WRITE && chunk == qp->bump) {
450
0
      return;
451
0
    }
452
0
    TRACE("chunk %u", chunk);
453
0
    void *ptr = qp->base->ptr[chunk];
454
0
    size_t size = chunk_size_raw();
455
0
    RUNTIME_CHECK(mprotect(ptr, size, PROT_READ) >= 0);
456
0
  }
457
18.4k
}
458
459
#else
460
461
#define chunk_get_raw(qp, size) isc_mem_allocate(qp->mctx, size)
462
#define chunk_free_raw(qp, ptr) isc_mem_free(qp->mctx, ptr)
463
464
#define chunk_shrink_raw(qp, ptr, size) isc_mem_reallocate(qp->mctx, ptr, size)
465
466
#define write_protect(qp, chunk)
467
468
#endif
469
470
/***********************************************************************
471
 *
472
 *  allocator
473
 */
474
475
/*
476
 * When we reuse the bump chunk across multiple write transactions,
477
 * it can have an immutable prefix and a mutable suffix.
478
 */
479
static inline bool
480
57.1M
cells_immutable(dns_qp_t *qp, dns_qpref_t ref) {
481
57.1M
  dns_qpchunk_t chunk = ref_chunk(ref);
482
57.1M
  dns_qpcell_t cell = ref_cell(ref);
483
57.1M
  if (chunk == qp->bump) {
484
9.41M
    return cell < qp->fender;
485
47.7M
  } else {
486
47.7M
    return qp->usage[chunk].immutable;
487
47.7M
  }
488
57.1M
}
489
490
/*
491
 * Find the next power that is both bigger than size and prev_capacity,
492
 * but still within the chunk min and max sizes.
493
 */
494
static dns_qpcell_t
495
408k
next_capacity(uint32_t prev_capacity, uint32_t size) {
496
  /*
497
   * Request size was floored at 2 because builtin_clz used to be 0.
498
   * We keep this behavior because stdc_leading_zeros(0) = 32.
499
   */
500
408k
  size = ISC_MAX3(size, prev_capacity, 2U);
501
408k
  uint32_t log2 = 32U - stdc_leading_zeros(size - 1U);
502
503
408k
  return 1U << ISC_CLAMP(log2, QP_CHUNK_LOG_MIN, QP_CHUNK_LOG_MAX);
504
408k
}
505
506
/*
507
 * Create a fresh bump chunk and allocate some twigs from it.
508
 */
509
static dns_qpref_t
510
408k
chunk_alloc(dns_qp_t *qp, dns_qpchunk_t chunk, dns_qpweight_t size) {
511
408k
  INSIST(qp->base->ptr[chunk] == NULL);
512
408k
  INSIST(qp->usage[chunk].used == 0);
513
408k
  INSIST(qp->usage[chunk].free == 0);
514
408k
  INSIST(qp->chunk_capacity <= QP_CHUNK_SIZE);
515
516
408k
  qp->chunk_capacity = next_capacity(qp->chunk_capacity * 2u, size);
517
408k
  qp->base->ptr[chunk] =
518
408k
    chunk_get_raw(qp, qp->chunk_capacity * sizeof(dns_qpnode_t));
519
520
408k
  qp->usage[chunk] = (qp_usage_t){ .exists = true,
521
408k
           .used = size,
522
408k
           .capacity = qp->chunk_capacity };
523
408k
  qp->used_count += size;
524
408k
  qp->bump = chunk;
525
408k
  qp->fender = 0;
526
527
408k
  if (qp->write_protect) {
528
0
    TRACE("chunk %u base %p", chunk, qp->base->ptr[chunk]);
529
0
  }
530
408k
  return make_ref(chunk, 0);
531
408k
}
532
533
/*
534
 * This is used to grow the chunk arrays when they fill up. If the old
535
 * base array is in use by readers, we must make a clone, otherwise we
536
 * can reallocate in place.
537
 *
538
 * The isc_refcount_init() and qpbase_unref() in this function are a pair.
539
 */
540
static void
541
20.4k
realloc_chunk_arrays(dns_qp_t *qp, dns_qpchunk_t newmax) {
542
20.4k
  size_t oldptrs = sizeof(qp->base->ptr[0]) * qp->chunk_max;
543
20.4k
  size_t newptrs = sizeof(qp->base->ptr[0]) * newmax;
544
20.4k
  size_t size = STRUCT_FLEX_SIZE(qp->base, ptr, newmax);
545
546
20.4k
  if (qp->base == NULL || qpbase_unref(qp)) {
547
20.1k
    qp->base = isc_mem_reallocate(qp->mctx, qp->base, size);
548
20.1k
  } else {
549
342
    dns_qpbase_t *oldbase = qp->base;
550
342
    qp->base = isc_mem_allocate(qp->mctx, size);
551
342
    memmove(&qp->base->ptr[0], &oldbase->ptr[0], oldptrs);
552
342
  }
553
20.4k
  memset(&qp->base->ptr[qp->chunk_max], 0, newptrs - oldptrs);
554
20.4k
  isc_refcount_init(&qp->base->refcount, 1);
555
20.4k
  qp->base->magic = QPBASE_MAGIC;
556
557
  /* usage array is exclusive to the writer */
558
20.4k
  size_t oldusage = sizeof(qp->usage[0]) * qp->chunk_max;
559
20.4k
  size_t newusage = sizeof(qp->usage[0]) * newmax;
560
20.4k
  qp->usage = isc_mem_reallocate(qp->mctx, qp->usage, newusage);
561
20.4k
  memset(&qp->usage[qp->chunk_max], 0, newusage - oldusage);
562
563
20.4k
  qp->chunk_max = newmax;
564
565
20.4k
  TRACE("qpbase %p usage %p max %u", qp->base, qp->usage, qp->chunk_max);
566
20.4k
}
567
568
/*
569
 * There was no space in the bump chunk, so find a place to put a fresh
570
 * chunk in the chunk arrays, then allocate some twigs from it.
571
 */
572
static dns_qpref_t
573
408k
alloc_slow(dns_qp_t *qp, dns_qpweight_t size) {
574
408k
  dns_qpchunk_t chunk;
575
576
432M
  for (chunk = 0; chunk < qp->chunk_max; chunk++) {
577
432M
    if (!qp->usage[chunk].exists) {
578
387k
      return chunk_alloc(qp, chunk, size);
579
387k
    }
580
432M
  }
581
20.4k
  ENSURE(chunk == qp->chunk_max);
582
20.4k
  realloc_chunk_arrays(qp, GROWTH_FACTOR(chunk));
583
20.4k
  return chunk_alloc(qp, chunk, size);
584
408k
}
585
586
/*
587
 * Ensure we are using a fresh bump chunk.
588
 */
589
static void
590
21.0k
alloc_reset(dns_qp_t *qp) {
591
21.0k
  (void)alloc_slow(qp, 0);
592
21.0k
}
593
594
/*
595
 * Allocate some fresh twigs. This is the bump allocator fast path.
596
 */
597
static inline dns_qpref_t
598
8.05M
alloc_twigs(dns_qp_t *qp, dns_qpweight_t size) {
599
8.05M
  dns_qpchunk_t chunk = qp->bump;
600
8.05M
  dns_qpcell_t cell = qp->usage[chunk].used;
601
602
8.05M
  if (cell + size <= qp->usage[chunk].capacity) {
603
7.66M
    qp->usage[chunk].used += size;
604
7.66M
    qp->used_count += size;
605
7.66M
    return make_ref(chunk, cell);
606
7.66M
  } else {
607
387k
    return alloc_slow(qp, size);
608
387k
  }
609
8.05M
}
610
611
/*
612
 * Record that some twigs are no longer being used, and if possible
613
 * zero them to ensure that there isn't a spurious double detach when
614
 * the chunk is later recycled.
615
 *
616
 * Returns true if the twigs were immediately destroyed.
617
 *
618
 * NOTE: the caller is responsible for attaching or detaching any
619
 * leaves as required.
620
 */
621
static inline bool
622
6.37M
free_twigs(dns_qp_t *qp, dns_qpref_t twigs, dns_qpweight_t size) {
623
6.37M
  dns_qpchunk_t chunk = ref_chunk(twigs);
624
625
6.37M
  qp->free_count += size;
626
6.37M
  qp->usage[chunk].free += size;
627
6.37M
  ENSURE(qp->free_count <= qp->used_count);
628
6.37M
  ENSURE(qp->usage[chunk].free <= qp->usage[chunk].used);
629
630
6.37M
  if (cells_immutable(qp, twigs)) {
631
33.6k
    qp->hold_count += size;
632
33.6k
    ENSURE(qp->free_count >= qp->hold_count);
633
33.6k
    return false;
634
6.33M
  } else {
635
6.33M
    zero_twigs(ref_ptr(qp, twigs), size);
636
6.33M
    return true;
637
6.33M
  }
638
6.37M
}
639
640
/*
641
 * When some twigs have been copied, and free_twigs() could not
642
 * immediately destroy the old copy, we need to update the refcount
643
 * on any leaves that were duplicated.
644
 */
645
static void
646
15.2k
attach_twigs(dns_qp_t *qp, dns_qpnode_t *twigs, dns_qpweight_t size) {
647
51.8k
  for (dns_qpweight_t pos = 0; pos < size; pos++) {
648
36.6k
    if (node_tag(&twigs[pos]) == LEAF_TAG) {
649
27.3k
      attach_leaf(qp, &twigs[pos]);
650
27.3k
    }
651
36.6k
  }
652
15.2k
}
653
654
/***********************************************************************
655
 *
656
 *  chunk reclamation
657
 */
658
659
/*
660
 * Is any of this chunk still in use?
661
 */
662
static inline dns_qpcell_t
663
3.86M
chunk_usage(dns_qp_t *qp, dns_qpchunk_t chunk) {
664
3.86M
  return qp->usage[chunk].used - qp->usage[chunk].free;
665
3.86M
}
666
667
/*
668
 * We remove each empty chunk from the total counts when the chunk is
669
 * freed, or when it is scheduled for safe memory reclamation. We check
670
 * the chunk's phase to avoid discounting it twice in the latter case.
671
 */
672
static void
673
408k
chunk_discount(dns_qp_t *qp, dns_qpchunk_t chunk) {
674
408k
  if (qp->usage[chunk].discounted) {
675
320
    return;
676
320
  }
677
408k
  INSIST(qp->used_count >= qp->usage[chunk].used);
678
408k
  INSIST(qp->free_count >= qp->usage[chunk].free);
679
408k
  qp->used_count -= qp->usage[chunk].used;
680
408k
  qp->free_count -= qp->usage[chunk].free;
681
408k
  qp->usage[chunk].discounted = true;
682
408k
}
683
684
/*
685
 * When a chunk is being recycled, we need to detach any leaves that
686
 * remain, and free any `base` arrays that have been marked as unused.
687
 */
688
static void
689
408k
chunk_free(dns_qp_t *qp, dns_qpchunk_t chunk) {
690
408k
  if (qp->write_protect) {
691
0
    TRACE("chunk %u base %p", chunk, qp->base->ptr[chunk]);
692
0
  }
693
694
408k
  dns_qpnode_t *n = qp->base->ptr[chunk];
695
48.5M
  for (dns_qpcell_t count = qp->usage[chunk].used; count > 0;
696
48.1M
       count--, n++)
697
48.1M
  {
698
48.1M
    if (node_tag(n) == LEAF_TAG && node_pointer(n) != NULL) {
699
6.74M
      detach_leaf(qp, n);
700
41.3M
    } else if (count > 1 && reader_valid(n)) {
701
36.8k
      dns_qpreader_t qpr;
702
36.8k
      unpack_reader(&qpr, n);
703
      /* pairs with dns_qpmulti_commit() */
704
36.8k
      if (qpbase_unref(&qpr)) {
705
342
        isc_mem_free(qp->mctx, qpr.base);
706
342
      }
707
36.8k
    }
708
48.1M
  }
709
408k
  chunk_discount(qp, chunk);
710
408k
  chunk_free_raw(qp, qp->base->ptr[chunk]);
711
408k
  qp->base->ptr[chunk] = NULL;
712
408k
  qp->usage[chunk] = (qp_usage_t){};
713
408k
}
714
715
/*
716
 * Free any chunks that we can while a trie is in use.
717
 */
718
static void
719
3.11k
recycle(dns_qp_t *qp) {
720
3.11k
  unsigned int nfree = 0;
721
722
3.11k
  isc_nanosecs_t start = isc_time_monotonic();
723
724
644k
  for (dns_qpchunk_t chunk = 0; chunk < qp->chunk_max; chunk++) {
725
641k
    if (chunk != qp->bump && chunk_usage(qp, chunk) == 0 &&
726
405k
        qp->usage[chunk].exists && !qp->usage[chunk].immutable)
727
278k
    {
728
278k
      chunk_free(qp, chunk);
729
278k
      nfree++;
730
278k
    }
731
641k
  }
732
733
3.11k
  isc_nanosecs_t time = isc_time_monotonic() - start;
734
3.11k
  atomic_fetch_add_relaxed(&recycle_time, time);
735
736
3.11k
  if (nfree > 0) {
737
3.11k
    LOG_STATS("qp recycle" PRItime "free %u chunks", time, nfree);
738
3.11k
    LOG_STATS("qp recycle leaf %u live %u used %u free %u hold %u",
739
3.11k
        qp->leaf_count, qp->used_count - qp->free_count,
740
3.11k
        qp->used_count, qp->free_count, qp->hold_count);
741
3.11k
  }
742
3.11k
}
743
744
/*
745
 * asynchronous cleanup
746
 */
747
static void
748
320
reclaim_chunks_cb(struct rcu_head *arg) {
749
320
  qp_rcuctx_t *rcuctx = caa_container_of(arg, qp_rcuctx_t, rcu_head);
750
320
  REQUIRE(QPRCU_VALID(rcuctx));
751
320
  dns_qpmulti_t *multi = rcuctx->multi;
752
320
  REQUIRE(QPMULTI_VALID(multi));
753
754
320
  LOCK(&multi->mutex);
755
320
  dns_qp_t *qp = &multi->writer;
756
757
  /*
758
   * If chunk_max is zero, chunks have already been freed.
759
   */
760
320
  if (qp->chunk_max != 0) {
761
320
    unsigned int nfree = 0;
762
320
    isc_nanosecs_t start = isc_time_monotonic();
763
764
320
    INSIST(QP_VALID(qp));
765
766
640
    for (unsigned int i = 0; i < rcuctx->count; i++) {
767
320
      dns_qpchunk_t chunk = rcuctx->chunk[i];
768
320
      if (qp->usage[chunk].snapshot) {
769
        /* clean up when snapshot is destroyed */
770
0
        qp->usage[chunk].snapfree = true;
771
320
      } else {
772
320
        chunk_free(qp, chunk);
773
320
        nfree++;
774
320
      }
775
320
    }
776
777
320
    isc_nanosecs_t time = isc_time_monotonic() - start;
778
320
    recycle_time += time;
779
780
320
    if (nfree > 0) {
781
320
      LOG_STATS("qp reclaim" PRItime "free %u chunks", time,
782
320
          nfree);
783
320
      LOG_STATS(
784
320
        "qp reclaim leaf %u live %u used %u free %u "
785
320
        "hold %u",
786
320
        qp->leaf_count, qp->used_count - qp->free_count,
787
320
        qp->used_count, qp->free_count, qp->hold_count);
788
320
    }
789
320
  }
790
791
320
  UNLOCK(&multi->mutex);
792
793
320
  dns_qpmulti_detach(&multi);
794
320
  isc_mem_putanddetach(&rcuctx->mctx, rcuctx,
795
320
           STRUCT_FLEX_SIZE(rcuctx, chunk, rcuctx->count));
796
320
}
797
798
/*
799
 * At the end of a transaction, schedule empty but immutable chunks
800
 * for reclamation later.
801
 */
802
static void
803
36.8k
reclaim_chunks(dns_qpmulti_t *multi) {
804
36.8k
  dns_qp_t *qp = &multi->writer;
805
806
36.8k
  unsigned int count = 0;
807
275k
  for (dns_qpchunk_t chunk = 0; chunk < qp->chunk_max; chunk++) {
808
238k
    if (chunk != qp->bump && chunk_usage(qp, chunk) == 0 &&
809
94.6k
        qp->usage[chunk].exists && qp->usage[chunk].immutable &&
810
320
        !qp->usage[chunk].discounted)
811
320
    {
812
320
      count++;
813
320
    }
814
238k
  }
815
816
36.8k
  if (count == 0) {
817
36.5k
    return;
818
36.5k
  }
819
820
320
  qp_rcuctx_t *rcuctx =
821
320
    isc_mem_get(qp->mctx, STRUCT_FLEX_SIZE(rcuctx, chunk, count));
822
320
  *rcuctx = (qp_rcuctx_t){
823
320
    .magic = QPRCU_MAGIC,
824
320
    .multi = multi,
825
320
    .count = count,
826
320
  };
827
320
  isc_mem_attach(qp->mctx, &rcuctx->mctx);
828
829
320
  unsigned int i = 0;
830
163k
  for (dns_qpchunk_t chunk = 0; chunk < qp->chunk_max; chunk++) {
831
162k
    if (chunk != qp->bump && chunk_usage(qp, chunk) == 0 &&
832
57.7k
        qp->usage[chunk].exists && qp->usage[chunk].immutable &&
833
320
        !qp->usage[chunk].discounted)
834
320
    {
835
320
      rcuctx->chunk[i++] = chunk;
836
320
      chunk_discount(qp, chunk);
837
320
    }
838
162k
  }
839
840
  /*
841
   * Reference the qpmulti object to keep it from being
842
   * freed until reclaim_chunks_cb() runs.
843
   */
844
320
  dns_qpmulti_ref(multi);
845
320
  call_rcu(&rcuctx->rcu_head, reclaim_chunks_cb);
846
847
320
  LOG_STATS("qp will reclaim %u chunks", count);
848
320
}
849
850
/*
851
 * When a snapshot is destroyed, clean up chunks that need free()ing
852
 * and are not used by any remaining snapshots.
853
 */
854
static void
855
0
marksweep_chunks(dns_qpmulti_t *multi) {
856
0
  unsigned int nfree = 0;
857
858
0
  isc_nanosecs_t start = isc_time_monotonic();
859
860
0
  dns_qp_t *qpw = &multi->writer;
861
862
0
  ISC_LIST_FOREACH(multi->snapshots, qps, link) {
863
0
    for (dns_qpchunk_t chunk = 0; chunk < qps->chunk_max; chunk++) {
864
0
      if (qps->base->ptr[chunk] != NULL) {
865
0
        INSIST(qps->base->ptr[chunk] ==
866
0
               qpw->base->ptr[chunk]);
867
0
        qpw->usage[chunk].snapmark = true;
868
0
      }
869
0
    }
870
0
  }
871
872
0
  for (dns_qpchunk_t chunk = 0; chunk < qpw->chunk_max; chunk++) {
873
0
    qpw->usage[chunk].snapshot = qpw->usage[chunk].snapmark;
874
0
    qpw->usage[chunk].snapmark = false;
875
0
    if (qpw->usage[chunk].snapfree && !qpw->usage[chunk].snapshot) {
876
0
      chunk_free(qpw, chunk);
877
0
      nfree++;
878
0
    }
879
0
  }
880
881
0
  isc_nanosecs_t time = isc_time_monotonic() - start;
882
0
  recycle_time += time;
883
884
0
  if (nfree > 0) {
885
0
    LOG_STATS("qp marksweep" PRItime "free %u chunks", time, nfree);
886
0
    LOG_STATS(
887
0
      "qp marksweep leaf %u live %u used %u free %u hold %u",
888
0
      qpw->leaf_count, qpw->used_count - qpw->free_count,
889
0
      qpw->used_count, qpw->free_count, qpw->hold_count);
890
0
  }
891
0
}
892
893
/***********************************************************************
894
 *
895
 *  garbage collector
896
 */
897
898
/*
899
 * Move a branch node's twigs to the `bump` chunk, for copy-on-write
900
 * or for garbage collection. We don't update the node in place
901
 * because `compact_recursive()` does not ensure the node itself is
902
 * mutable until after it discovers evacuation was necessary.
903
 *
904
 * If free_twigs() could not immediately destroy the old twigs, we have
905
 * to re-attach to any leaves.
906
 */
907
static dns_qpref_t
908
906k
evacuate(dns_qp_t *qp, dns_qpnode_t *n) {
909
906k
  dns_qpweight_t size = branch_twigs_size(n);
910
906k
  dns_qpref_t old_ref = branch_twigs_ref(n);
911
906k
  dns_qpref_t new_ref = alloc_twigs(qp, size);
912
906k
  dns_qpnode_t *old_twigs = ref_ptr(qp, old_ref);
913
906k
  dns_qpnode_t *new_twigs = ref_ptr(qp, new_ref);
914
915
906k
  move_twigs(new_twigs, old_twigs, size);
916
906k
  if (!free_twigs(qp, old_ref, size)) {
917
14.7k
    attach_twigs(qp, new_twigs, size);
918
14.7k
  }
919
920
906k
  return new_ref;
921
906k
}
922
923
/*
924
 * Immutable nodes need copy-on-write. As we walk down the trie finding the
925
 * right place to modify, make_root_mutable() and make_twigs_mutable()
926
 * are called to ensure that immutable nodes on the path from the root are
927
 * copied to a mutable chunk.
928
 */
929
930
static inline dns_qpnode_t *
931
7.09M
make_root_mutable(dns_qp_t *qp) {
932
7.09M
  if (cells_immutable(qp, qp->root_ref)) {
933
6.23k
    qp->root_ref = evacuate(qp, MOVABLE_ROOT(qp));
934
6.23k
  }
935
7.09M
  return ref_ptr(qp, qp->root_ref);
936
7.09M
}
937
938
static inline void
939
40.8M
make_twigs_mutable(dns_qp_t *qp, dns_qpnode_t *n) {
940
40.8M
  if (cells_immutable(qp, branch_twigs_ref(n))) {
941
7.54k
    *n = make_node(branch_index(n), evacuate(qp, n));
942
7.54k
  }
943
40.8M
}
944
945
/*
946
 * Compact the trie by traversing the whole thing recursively, copying
947
 * bottom-up as required. The aim is to avoid evacuation as much as
948
 * possible, but when parts of the trie are immutable, we need to evacuate
949
 * the paths from the root to the parts of the trie that occupy
950
 * fragmented chunks.
951
 *
952
 * Without the QP_MIN_USED check, the algorithm will leave the trie
953
 * unchanged. If the children are all leaves, the loop changes nothing,
954
 * so we will return this node's original ref. If all of the children
955
 * that are branches did not need moving, again, the loop changes
956
 * nothing. So the evacuation check is the only place that the
957
 * algorithm introduces ref changes, that then bubble up towards the
958
 * root through the logic inside the loop.
959
 */
960
static dns_qpref_t
961
2.86M
compact_recursive(dns_qp_t *qp, dns_qpnode_t *parent) {
962
2.86M
  dns_qpweight_t size = branch_twigs_size(parent);
963
2.86M
  dns_qpref_t twigs_ref = branch_twigs_ref(parent);
964
2.86M
  dns_qpchunk_t chunk = ref_chunk(twigs_ref);
965
966
2.86M
  if (qp->compact_all ||
967
2.86M
      (chunk != qp->bump && chunk_usage(qp, chunk) < QP_MIN_USED))
968
892k
  {
969
892k
    twigs_ref = evacuate(qp, parent);
970
892k
  }
971
2.86M
  bool immutable = cells_immutable(qp, twigs_ref);
972
30.9M
  for (dns_qpweight_t pos = 0; pos < size; pos++) {
973
28.0M
    dns_qpnode_t *child = ref_ptr(qp, twigs_ref) + pos;
974
28.0M
    if (!is_branch(child)) {
975
25.1M
      continue;
976
25.1M
    }
977
2.86M
    dns_qpref_t old_grandtwigs = branch_twigs_ref(child);
978
2.86M
    dns_qpref_t new_grandtwigs = compact_recursive(qp, child);
979
2.86M
    if (old_grandtwigs == new_grandtwigs) {
980
1.97M
      continue;
981
1.97M
    }
982
892k
    if (immutable) {
983
109
      twigs_ref = evacuate(qp, parent);
984
      /* the twigs have moved */
985
109
      child = ref_ptr(qp, twigs_ref) + pos;
986
109
      immutable = false;
987
109
    }
988
892k
    *child = make_node(branch_index(child), new_grandtwigs);
989
892k
  }
990
2.86M
  return twigs_ref;
991
2.86M
}
992
993
static void
994
3.11k
compact(dns_qp_t *qp) {
995
3.11k
  LOG_STATS("qp compact before leaf %u live %u used %u free %u hold %u",
996
3.11k
      qp->leaf_count, qp->used_count - qp->free_count,
997
3.11k
      qp->used_count, qp->free_count, qp->hold_count);
998
999
3.11k
  isc_nanosecs_t start = isc_time_monotonic();
1000
1001
3.11k
  if (qp->usage[qp->bump].free > QP_MAX_FREE) {
1002
2.47k
    alloc_reset(qp);
1003
2.47k
  }
1004
1005
3.11k
  if (qp->leaf_count > 0) {
1006
3.11k
    qp->root_ref = compact_recursive(qp, MOVABLE_ROOT(qp));
1007
3.11k
  }
1008
3.11k
  qp->compact_all = false;
1009
1010
3.11k
  isc_nanosecs_t time = isc_time_monotonic() - start;
1011
3.11k
  atomic_fetch_add_relaxed(&compact_time, time);
1012
1013
3.11k
  LOG_STATS("qp compact" PRItime
1014
3.11k
      "leaf %u live %u used %u free %u hold %u",
1015
3.11k
      time, qp->leaf_count, qp->used_count - qp->free_count,
1016
3.11k
      qp->used_count, qp->free_count, qp->hold_count);
1017
3.11k
}
1018
1019
void
1020
18.4k
dns_qp_compact(dns_qp_t *qp, dns_qpgc_t mode) {
1021
18.4k
  REQUIRE(QP_VALID(qp));
1022
18.4k
  if (mode == DNS_QPGC_MAYBE && !QP_NEEDGC(qp)) {
1023
18.3k
    return;
1024
18.3k
  }
1025
33
  if (mode == DNS_QPGC_ALL) {
1026
0
    alloc_reset(qp);
1027
0
    qp->compact_all = true;
1028
0
  }
1029
33
  compact(qp);
1030
33
  recycle(qp);
1031
33
}
1032
1033
/*
1034
 * Free some twigs and (if they were destroyed immediately so that the
1035
 * result from QP_MAX_GARBAGE can change) compact the trie if necessary.
1036
 *
1037
 * This is called by the trie modification API entry points. The
1038
 * free_twigs() function requires the caller to attach or detach any
1039
 * leaves as necessary. Callers of squash_twigs() satisfy this
1040
 * requirement by calling make_twigs_mutable().
1041
 *
1042
 * Aside: In typical garbage collectors, compaction is triggered when
1043
 * the allocator runs out of space. But that is because typical garbage
1044
 * collectors do not know how much memory can be recovered, so they must
1045
 * find out by scanning the heap. The qp-trie code was originally
1046
 * designed to use malloc() and free(), so it has more information about
1047
 * when garbage collection might be worthwhile. Hence we can trigger
1048
 * collection when garbage passes a threshold.
1049
 *
1050
 * XXXFANF: If we need to avoid latency outliers caused by compaction in
1051
 * write transactions, we can check qp->transaction_mode here.
1052
 */
1053
static inline bool
1054
5.05M
squash_twigs(dns_qp_t *qp, dns_qpref_t twigs, dns_qpweight_t size) {
1055
5.05M
  bool destroyed = free_twigs(qp, twigs, size);
1056
5.05M
  if (destroyed && QP_AUTOGC(qp)) {
1057
3.07k
    compact(qp);
1058
3.07k
    recycle(qp);
1059
    /*
1060
     * This shouldn't happen if the garbage collector is
1061
     * working correctly. We can recover at the cost of some
1062
     * time and space, but recovery should be cheaper than
1063
     * letting compact+recycle fail repeatedly.
1064
     */
1065
3.07k
    if (QP_AUTOGC(qp)) {
1066
0
      isc_log_write(DNS_LOGCATEGORY_DATABASE,
1067
0
              DNS_LOGMODULE_QP, ISC_LOG_NOTICE,
1068
0
              "qp %p uctx \"%s\" compact/recycle "
1069
0
              "failed to recover any space, "
1070
0
              "scheduling a full compaction",
1071
0
              qp, TRIENAME(qp));
1072
0
      qp->compact_all = true;
1073
0
    }
1074
3.07k
  }
1075
5.05M
  return destroyed;
1076
5.05M
}
1077
1078
/***********************************************************************
1079
 *
1080
 *  public accessors for memory management internals
1081
 */
1082
1083
dns_qp_memusage_t
1084
2
dns_qp_memusage(dns_qp_t *qp) {
1085
2
  REQUIRE(QP_VALID(qp));
1086
1087
2
  dns_qp_memusage_t memusage = {
1088
2
    .uctx = qp->uctx,
1089
2
    .leaves = qp->leaf_count,
1090
2
    .live = qp->used_count - qp->free_count,
1091
2
    .used = qp->used_count,
1092
2
    .hold = qp->hold_count,
1093
2
    .free = qp->free_count,
1094
2
    .node_size = sizeof(dns_qpnode_t),
1095
2
    .fragmented = QP_NEEDGC(qp),
1096
2
  };
1097
1098
2
  size_t chunk_usage_bytes = 0;
1099
6
  for (dns_qpchunk_t chunk = 0; chunk < qp->chunk_max; chunk++) {
1100
4
    if (qp->base->ptr[chunk] != NULL) {
1101
2
      chunk_usage_bytes += qp->usage[chunk].capacity;
1102
2
      memusage.chunk_count += 1;
1103
2
    }
1104
4
  }
1105
1106
  /*
1107
   * XXXFANF does not subtract chunks that have been shrunk,
1108
   * and does not count unreclaimed dns_qpbase_t objects
1109
   */
1110
2
  memusage.bytes = chunk_usage_bytes +
1111
2
       qp->chunk_max * sizeof(qp->base->ptr[0]) +
1112
2
       qp->chunk_max * sizeof(qp->usage[0]);
1113
1114
2
  return memusage;
1115
2
}
1116
1117
dns_qp_memusage_t
1118
2
dns_qpmulti_memusage(dns_qpmulti_t *multi) {
1119
2
  REQUIRE(QPMULTI_VALID(multi));
1120
2
  LOCK(&multi->mutex);
1121
1122
2
  dns_qp_t *qp = &multi->writer;
1123
2
  INSIST(QP_VALID(qp));
1124
1125
2
  dns_qp_memusage_t memusage = dns_qp_memusage(qp);
1126
1127
2
  if (qp->transaction_mode == QP_UPDATE && qp->usage != NULL) {
1128
0
    memusage.bytes -= qp->usage[qp->bump].capacity;
1129
0
    memusage.bytes += qp->usage[qp->bump].used *
1130
0
          sizeof(dns_qpnode_t);
1131
0
  }
1132
1133
2
  UNLOCK(&multi->mutex);
1134
2
  return memusage;
1135
2
}
1136
1137
void
1138
dns_qp_gctime(isc_nanosecs_t *compact_p, isc_nanosecs_t *recycle_p,
1139
0
        isc_nanosecs_t *rollback_p) {
1140
0
  *compact_p = atomic_load_relaxed(&compact_time);
1141
0
  *recycle_p = atomic_load_relaxed(&recycle_time);
1142
0
  *rollback_p = atomic_load_relaxed(&rollback_time);
1143
0
}
1144
1145
/***********************************************************************
1146
 *
1147
 *  read-write transactions
1148
 */
1149
1150
static dns_qp_t *
1151
36.8k
transaction_open(dns_qpmulti_t *multi, dns_qp_t **qptp) {
1152
36.8k
  REQUIRE(QPMULTI_VALID(multi));
1153
36.8k
  REQUIRE(qptp != NULL && *qptp == NULL);
1154
1155
36.8k
  LOCK(&multi->mutex);
1156
1157
36.8k
  dns_qp_t *qp = &multi->writer;
1158
36.8k
  INSIST(QP_VALID(qp));
1159
1160
  /*
1161
   * Mark existing chunks as immutable.
1162
   *
1163
   * Aside: The bump chunk is special: in a series of write
1164
   * transactions the bump chunk is reused; the first part (up
1165
   * to fender) is immutable, the rest mutable. But we set its
1166
   * immutable flag so that when the bump chunk fills up, the
1167
   * first part continues to be treated as immutable. (And the
1168
   * rest of the chunk too, but that's OK.)
1169
   */
1170
73.6k
  for (dns_qpchunk_t chunk = 0; chunk < qp->chunk_max; chunk++) {
1171
36.8k
    if (qp->usage[chunk].exists) {
1172
18.4k
      qp->usage[chunk].immutable = true;
1173
18.4k
      write_protect(qp, chunk);
1174
18.4k
    }
1175
36.8k
  }
1176
1177
  /*
1178
   * Ensure QP_AUTOGC() ignores free space in immutable chunks.
1179
   */
1180
36.8k
  qp->hold_count = qp->free_count;
1181
1182
36.8k
  *qptp = qp;
1183
36.8k
  return qp;
1184
36.8k
}
1185
1186
/*
1187
 * a write is light
1188
 *
1189
 * We need to ensure we allocate from a fresh chunk if the last transaction
1190
 * shrunk the bump chunk; but usually in a sequence of write transactions
1191
 * we just put `fender` at the point where we started this generation.
1192
 *
1193
 * (Aside: Instead of keeping the previous transaction's mode, I
1194
 * considered forcing allocation into the slow path by fiddling with
1195
 * the bump chunk's usage counters. But that is troublesome because
1196
 * `chunk_free()` needs to know how much of the chunk to scan.)
1197
 */
1198
void
1199
36.8k
dns_qpmulti_write(dns_qpmulti_t *multi, dns_qp_t **qptp) {
1200
36.8k
  dns_qp_t *qp = transaction_open(multi, qptp);
1201
36.8k
  TRACE("");
1202
1203
36.8k
  if (qp->transaction_mode == QP_WRITE) {
1204
18.4k
    qp->fender = qp->usage[qp->bump].used;
1205
18.4k
  } else {
1206
18.4k
    alloc_reset(qp);
1207
18.4k
  }
1208
36.8k
  qp->transaction_mode = QP_WRITE;
1209
36.8k
}
1210
1211
/*
1212
 * an update is heavier
1213
 *
1214
 * We always reset the allocator to the start of a fresh chunk,
1215
 * because the previous transaction was probably an update that shrunk
1216
 * the bump chunk. It simplifies rollback because `fender` is always zero.
1217
 *
1218
 * To rollback a transaction, we need to reset all the allocation
1219
 * counters to their previous state, in particular we need to un-free
1220
 * any nodes that were copied to make them mutable. This means we need
1221
 * to make a copy of basically the whole `dns_qp_t writer`: everything
1222
 * but the chunks holding the trie nodes.
1223
 *
1224
 * We do most of the transaction setup before creating the rollback
1225
 * state so that after rollback we have a correct idea of which chunks
1226
 * are immutable, and so we have the correct transaction mode to make
1227
 * the next transaction allocate a new bump chunk. The exception is
1228
 * resetting the allocator, which we do after creating the rollback
1229
 * state; if this transaction is rolled back then the next transaction
1230
 * will start from the rollback state and also reset the allocator as
1231
 * one of its first actions.
1232
 */
1233
void
1234
0
dns_qpmulti_update(dns_qpmulti_t *multi, dns_qp_t **qptp) {
1235
0
  dns_qp_t *qp = transaction_open(multi, qptp);
1236
0
  TRACE("");
1237
1238
0
  qp->transaction_mode = QP_UPDATE;
1239
1240
0
  dns_qp_t *rollback = isc_mem_allocate(qp->mctx, sizeof(*rollback));
1241
0
  memmove(rollback, qp, sizeof(*rollback));
1242
  /* can be uninitialized on the first transaction */
1243
0
  if (rollback->base != NULL) {
1244
0
    INSIST(QPBASE_VALID(rollback->base));
1245
0
    INSIST(qp->usage != NULL && qp->chunk_max > 0);
1246
    /* paired with either _commit() or _rollback() */
1247
0
    isc_refcount_increment(&rollback->base->refcount);
1248
0
    size_t usage_bytes = sizeof(qp->usage[0]) * qp->chunk_max;
1249
0
    rollback->usage = isc_mem_allocate(qp->mctx, usage_bytes);
1250
0
    memmove(rollback->usage, qp->usage, usage_bytes);
1251
0
  }
1252
0
  INSIST(multi->rollback == NULL);
1253
0
  multi->rollback = rollback;
1254
1255
0
  alloc_reset(qp);
1256
0
}
1257
1258
void
1259
36.8k
dns_qpmulti_commit(dns_qpmulti_t *multi, dns_qp_t **qptp) {
1260
36.8k
  REQUIRE(QPMULTI_VALID(multi));
1261
36.8k
  REQUIRE(qptp != NULL && *qptp == &multi->writer);
1262
36.8k
  REQUIRE(multi->writer.transaction_mode == QP_WRITE ||
1263
36.8k
    multi->writer.transaction_mode == QP_UPDATE);
1264
1265
36.8k
  dns_qp_t *qp = *qptp;
1266
36.8k
  TRACE("");
1267
1268
36.8k
  if (qp->transaction_mode == QP_UPDATE) {
1269
0
    INSIST(multi->rollback != NULL);
1270
    /* paired with dns_qpmulti_update() */
1271
0
    if (qpbase_unref(multi->rollback)) {
1272
0
      isc_mem_free(qp->mctx, multi->rollback->base);
1273
0
    }
1274
0
    if (multi->rollback->usage != NULL) {
1275
0
      isc_mem_free(qp->mctx, multi->rollback->usage);
1276
0
    }
1277
0
    isc_mem_free(qp->mctx, multi->rollback);
1278
0
  }
1279
36.8k
  INSIST(multi->rollback == NULL);
1280
1281
  /* not the first commit? */
1282
36.8k
  if (multi->reader_ref != INVALID_REF) {
1283
18.4k
    INSIST(cells_immutable(qp, multi->reader_ref));
1284
18.4k
    free_twigs(qp, multi->reader_ref, READER_SIZE);
1285
18.4k
  }
1286
1287
36.8k
  if (qp->transaction_mode == QP_UPDATE) {
1288
    /* minimize memory overhead */
1289
0
    compact(qp);
1290
0
    multi->reader_ref = alloc_twigs(qp, READER_SIZE);
1291
0
    qp->base->ptr[qp->bump] = chunk_shrink_raw(
1292
0
      qp, qp->base->ptr[qp->bump],
1293
0
      qp->usage[qp->bump].used * sizeof(dns_qpnode_t));
1294
36.8k
  } else {
1295
36.8k
    multi->reader_ref = alloc_twigs(qp, READER_SIZE);
1296
36.8k
  }
1297
1298
  /* anchor a new version of the trie */
1299
36.8k
  dns_qpnode_t *reader = ref_ptr(qp, multi->reader_ref);
1300
36.8k
  make_reader(reader, multi);
1301
  /* paired with chunk_free() */
1302
36.8k
  isc_refcount_increment(&qp->base->refcount);
1303
1304
36.8k
  rcu_assign_pointer(multi->reader, reader); /* COMMIT */
1305
1306
  /* clean up what we can right now */
1307
36.8k
  if (qp->transaction_mode == QP_UPDATE || QP_NEEDGC(qp)) {
1308
5
    recycle(qp);
1309
5
  }
1310
1311
  /* schedule the rest for later */
1312
36.8k
  reclaim_chunks(multi);
1313
1314
36.8k
  *qptp = NULL;
1315
36.8k
  UNLOCK(&multi->mutex);
1316
36.8k
}
1317
1318
/*
1319
 * Throw away everything that was allocated during this transaction.
1320
 */
1321
void
1322
0
dns_qpmulti_rollback(dns_qpmulti_t *multi, dns_qp_t **qptp) {
1323
0
  unsigned int nfree = 0;
1324
1325
0
  REQUIRE(QPMULTI_VALID(multi));
1326
0
  REQUIRE(multi->writer.transaction_mode == QP_UPDATE);
1327
0
  REQUIRE(qptp != NULL && *qptp == &multi->writer);
1328
1329
0
  dns_qp_t *qp = *qptp;
1330
0
  TRACE("");
1331
1332
0
  isc_nanosecs_t start = isc_time_monotonic();
1333
1334
0
  for (dns_qpchunk_t chunk = 0; chunk < qp->chunk_max; chunk++) {
1335
0
    if (qp->base->ptr[chunk] != NULL && !qp->usage[chunk].immutable)
1336
0
    {
1337
0
      chunk_free(qp, chunk);
1338
      /*
1339
       * we need to clear its base pointer in the rollback
1340
       * trie, in case the arrays were resized
1341
       */
1342
0
      if (chunk < multi->rollback->chunk_max) {
1343
0
        INSIST(!multi->rollback->usage[chunk].exists);
1344
0
        multi->rollback->base->ptr[chunk] = NULL;
1345
0
      }
1346
0
      nfree++;
1347
0
    }
1348
0
  }
1349
1350
  /*
1351
   * multi->rollback->base and multi->writer->base are the same,
1352
   * unless there was a realloc_chunk_arrays() during the transaction
1353
   */
1354
0
  if (qpbase_unref(qp)) {
1355
    /* paired with dns_qpmulti_update() */
1356
0
    isc_mem_free(qp->mctx, qp->base);
1357
0
  }
1358
0
  isc_mem_free(qp->mctx, qp->usage);
1359
1360
  /* reset allocator state */
1361
0
  INSIST(multi->rollback != NULL);
1362
0
  memmove(qp, multi->rollback, sizeof(*qp));
1363
0
  isc_mem_free(qp->mctx, multi->rollback);
1364
0
  INSIST(multi->rollback == NULL);
1365
1366
0
  isc_nanosecs_t time = isc_time_monotonic() - start;
1367
0
  atomic_fetch_add_relaxed(&rollback_time, time);
1368
1369
0
  LOG_STATS("qp rollback" PRItime "free %u chunks", time, nfree);
1370
1371
0
  *qptp = NULL;
1372
0
  UNLOCK(&multi->mutex);
1373
0
}
1374
1375
/***********************************************************************
1376
 *
1377
 *  read-only transactions
1378
 */
1379
1380
static dns_qpmulti_t *
1381
503
reader_open(dns_qpmulti_t *multi, dns_qpreadable_t qpr) {
1382
503
  dns_qpreader_t *qp = dns_qpreader(qpr);
1383
503
  dns_qpnode_t *reader = rcu_dereference(multi->reader);
1384
503
  if (reader == NULL) {
1385
0
    QP_INIT(qp, multi->writer.methods, multi->writer.uctx);
1386
503
  } else {
1387
503
    multi = unpack_reader(qp, reader);
1388
503
  }
1389
503
  return multi;
1390
503
}
1391
1392
/*
1393
 * a query is light
1394
 */
1395
1396
void
1397
503
dns_qpmulti_query(dns_qpmulti_t *multi, dns_qpread_t *qp) {
1398
503
  REQUIRE(QPMULTI_VALID(multi));
1399
503
  REQUIRE(qp != NULL);
1400
1401
503
  qp->tid = isc_tid();
1402
503
  rcu_read_lock();
1403
1404
503
  dns_qpmulti_t *whence = reader_open(multi, qp);
1405
503
  INSIST(whence == multi);
1406
503
}
1407
1408
void
1409
503
dns_qpread_destroy(dns_qpmulti_t *multi, dns_qpread_t *qp) {
1410
503
  REQUIRE(QPMULTI_VALID(multi));
1411
503
  REQUIRE(QP_VALID(qp));
1412
503
  REQUIRE(qp->tid == isc_tid());
1413
503
  *qp = (dns_qpread_t){};
1414
503
  rcu_read_unlock();
1415
503
}
1416
1417
/*
1418
 * a snapshot is heavy
1419
 */
1420
1421
void
1422
0
dns_qpmulti_snapshot(dns_qpmulti_t *multi, dns_qpsnap_t **qpsp) {
1423
0
  REQUIRE(QPMULTI_VALID(multi));
1424
0
  REQUIRE(qpsp != NULL && *qpsp == NULL);
1425
1426
0
  rcu_read_lock();
1427
1428
0
  LOCK(&multi->mutex);
1429
1430
0
  dns_qp_t *qpw = &multi->writer;
1431
0
  size_t bytes = sizeof(dns_qpsnap_t) + sizeof(dns_qpbase_t) +
1432
0
           sizeof(qpw->base->ptr[0]) * qpw->chunk_max;
1433
0
  dns_qpsnap_t *qps = isc_mem_allocate(qpw->mctx, bytes);
1434
1435
0
  qps->whence = reader_open(multi, qps);
1436
0
  INSIST(qps->whence == multi);
1437
1438
  /* not a separate allocation */
1439
0
  qps->base = (dns_qpbase_t *)(qps + 1);
1440
0
  isc_refcount_init(&qps->base->refcount, 0);
1441
1442
  /*
1443
   * only copy base pointers of chunks we need, so we can
1444
   * reclaim unused memory in dns_qpsnap_destroy()
1445
   */
1446
0
  qps->chunk_max = qpw->chunk_max;
1447
0
  for (dns_qpchunk_t chunk = 0; chunk < qpw->chunk_max; chunk++) {
1448
0
    if (qpw->usage[chunk].exists && chunk_usage(qpw, chunk) > 0) {
1449
0
      qpw->usage[chunk].snapshot = true;
1450
0
      qps->base->ptr[chunk] = qpw->base->ptr[chunk];
1451
0
    } else {
1452
0
      qps->base->ptr[chunk] = NULL;
1453
0
    }
1454
0
  }
1455
0
  ISC_LIST_INITANDAPPEND(multi->snapshots, qps, link);
1456
1457
0
  *qpsp = qps;
1458
0
  UNLOCK(&multi->mutex);
1459
1460
0
  rcu_read_unlock();
1461
0
}
1462
1463
void
1464
0
dns_qpsnap_destroy(dns_qpmulti_t *multi, dns_qpsnap_t **qpsp) {
1465
0
  REQUIRE(QPMULTI_VALID(multi));
1466
0
  REQUIRE(qpsp != NULL && *qpsp != NULL);
1467
1468
0
  LOCK(&multi->mutex);
1469
1470
0
  dns_qpsnap_t *qp = *qpsp;
1471
1472
  /* make sure the API is being used correctly */
1473
0
  REQUIRE(qp->whence == multi);
1474
1475
0
  ISC_LIST_UNLINK(multi->snapshots, qp, link);
1476
1477
  /*
1478
   * eagerly reclaim chunks that are now unused, so that memory does
1479
   * not accumulate when a trie has a lot of updates and snapshots
1480
   */
1481
0
  marksweep_chunks(multi);
1482
1483
0
  isc_mem_free(multi->writer.mctx, qp);
1484
1485
0
  *qpsp = NULL;
1486
0
  UNLOCK(&multi->mutex);
1487
0
}
1488
1489
/***********************************************************************
1490
 *
1491
 *  constructors, destructors
1492
 */
1493
1494
void
1495
dns_qp_create(isc_mem_t *mctx, const dns_qpmethods_t *methods, void *uctx,
1496
123
        dns_qp_t **qptp) {
1497
123
  REQUIRE(mctx != NULL);
1498
123
  REQUIRE(methods != NULL);
1499
123
  REQUIRE(qptp != NULL && *qptp == NULL);
1500
1501
123
  dns_qp_t *qp = isc_mem_get(mctx, sizeof(*qp));
1502
123
  QP_INIT(qp, methods, uctx);
1503
123
  isc_mem_attach(mctx, &qp->mctx);
1504
123
  alloc_reset(qp);
1505
123
  TRACE("");
1506
123
  *qptp = qp;
1507
123
}
1508
1509
void
1510
dns_qpmulti_create(isc_mem_t *mctx, const dns_qpmethods_t *methods, void *uctx,
1511
18.4k
       dns_qpmulti_t **qpmp) {
1512
18.4k
  REQUIRE(qpmp != NULL && *qpmp == NULL);
1513
1514
18.4k
  dns_qpmulti_t *multi = isc_mem_get(mctx, sizeof(*multi));
1515
18.4k
  *multi = (dns_qpmulti_t){ .magic = QPMULTI_MAGIC,
1516
18.4k
          .reader_ref = INVALID_REF,
1517
18.4k
          .references = ISC_REFCOUNT_INITIALIZER(1) };
1518
18.4k
  isc_mutex_init(&multi->mutex);
1519
18.4k
  ISC_LIST_INIT(multi->snapshots);
1520
1521
  /*
1522
   * Do not waste effort allocating a bump chunk that will be thrown
1523
   * away when a transaction is opened. dns_qpmulti_update() always
1524
   * allocates; to ensure dns_qpmulti_write() does too, pretend the
1525
   * previous transaction was an update
1526
   */
1527
18.4k
  dns_qp_t *qp = &multi->writer;
1528
18.4k
  QP_INIT(qp, methods, uctx);
1529
18.4k
  isc_mem_attach(mctx, &qp->mctx);
1530
18.4k
  qp->transaction_mode = QP_UPDATE;
1531
18.4k
  TRACE("");
1532
18.4k
  *qpmp = multi;
1533
18.4k
}
1534
1535
static void
1536
18.5k
destroy_guts(dns_qp_t *qp) {
1537
18.5k
  if (qp->chunk_max == 0) {
1538
0
    return;
1539
0
  }
1540
1541
223k
  for (dns_qpchunk_t chunk = 0; chunk < qp->chunk_max; chunk++) {
1542
205k
    if (qp->base->ptr[chunk] != NULL) {
1543
129k
      chunk_free(qp, chunk);
1544
129k
    }
1545
205k
  }
1546
18.5k
  qp->chunk_max = 0;
1547
18.5k
  ENSURE(qp->used_count == 0);
1548
18.5k
  ENSURE(qp->free_count == 0);
1549
18.5k
  ENSURE(isc_refcount_current(&qp->base->refcount) == 1);
1550
18.5k
  isc_mem_free(qp->mctx, qp->base);
1551
18.5k
  isc_mem_free(qp->mctx, qp->usage);
1552
18.5k
  qp->magic = 0;
1553
18.5k
}
1554
1555
void
1556
123
dns_qp_destroy(dns_qp_t **qptp) {
1557
123
  REQUIRE(qptp != NULL);
1558
123
  REQUIRE(QP_VALID(*qptp));
1559
1560
123
  dns_qp_t *qp = *qptp;
1561
123
  *qptp = NULL;
1562
1563
  /* do not try to destroy part of a dns_qpmulti_t */
1564
123
  REQUIRE(qp->transaction_mode == QP_NONE);
1565
1566
123
  TRACE("");
1567
123
  destroy_guts(qp);
1568
123
  isc_mem_putanddetach(&qp->mctx, qp, sizeof(*qp));
1569
123
}
1570
1571
static void
1572
18.4k
qpmulti_free_mem(dns_qpmulti_t *multi) {
1573
18.4k
  REQUIRE(QPMULTI_VALID(multi));
1574
1575
  /* reassure thread sanitizer */
1576
18.4k
  LOCK(&multi->mutex);
1577
18.4k
  dns_qp_t *qp = &multi->writer;
1578
18.4k
  UNLOCK(&multi->mutex);
1579
1580
18.4k
  isc_mutex_destroy(&multi->mutex);
1581
18.4k
  isc_mem_putanddetach(&qp->mctx, multi, sizeof(*multi));
1582
18.4k
}
1583
1584
#if QPMULTI_TRACE
1585
ISC_REFCOUNT_STATIC_TRACE_IMPL(dns_qpmulti, qpmulti_free_mem)
1586
#else
1587
37.7k
ISC_REFCOUNT_STATIC_IMPL(dns_qpmulti, qpmulti_free_mem)
qp.c:dns_qpmulti_ref
Line
Count
Source
1587
ISC_REFCOUNT_STATIC_IMPL(dns_qpmulti, qpmulti_free_mem)
qp.c:dns_qpmulti_detach
Line
Count
Source
1587
ISC_REFCOUNT_STATIC_IMPL(dns_qpmulti, qpmulti_free_mem)
qp.c:dns_qpmulti_unref
Line
Count
Source
1587
ISC_REFCOUNT_STATIC_IMPL(dns_qpmulti, qpmulti_free_mem)
1588
37.7k
#endif
1589
37.7k
1590
37.7k
static void
1591
37.7k
qpmulti_destroy_guts_cb(struct rcu_head *arg) {
1592
18.4k
  qp_rcuctx_t *rcuctx = caa_container_of(arg, qp_rcuctx_t, rcu_head);
1593
18.4k
  REQUIRE(QPRCU_VALID(rcuctx));
1594
  /* only nonzero for reclaim_chunks_cb() */
1595
18.4k
  REQUIRE(rcuctx->count == 0);
1596
1597
18.4k
  dns_qpmulti_t *multi = rcuctx->multi;
1598
18.4k
  REQUIRE(QPMULTI_VALID(multi));
1599
1600
  /* reassure thread sanitizer */
1601
18.4k
  LOCK(&multi->mutex);
1602
1603
18.4k
  dns_qp_t *qp = &multi->writer;
1604
18.4k
  REQUIRE(QP_VALID(qp));
1605
1606
18.4k
  destroy_guts(qp);
1607
1608
18.4k
  UNLOCK(&multi->mutex);
1609
1610
18.4k
  dns_qpmulti_detach(&multi);
1611
18.4k
  isc_mem_putanddetach(&rcuctx->mctx, rcuctx,
1612
18.4k
           STRUCT_FLEX_SIZE(rcuctx, chunk, rcuctx->count));
1613
18.4k
}
1614
1615
void
1616
18.4k
dns_qpmulti_destroy(dns_qpmulti_t **qpmp) {
1617
18.4k
  dns_qp_t *qp = NULL;
1618
18.4k
  dns_qpmulti_t *multi = NULL;
1619
18.4k
  qp_rcuctx_t *rcuctx = NULL;
1620
1621
18.4k
  REQUIRE(qpmp != NULL);
1622
18.4k
  REQUIRE(QPMULTI_VALID(*qpmp));
1623
1624
18.4k
  multi = *qpmp;
1625
18.4k
  qp = &multi->writer;
1626
18.4k
  *qpmp = NULL;
1627
1628
18.4k
  REQUIRE(QP_VALID(qp));
1629
18.4k
  REQUIRE(multi->rollback == NULL);
1630
18.4k
  REQUIRE(ISC_LIST_EMPTY(multi->snapshots));
1631
1632
18.4k
  rcuctx = isc_mem_get(qp->mctx, STRUCT_FLEX_SIZE(rcuctx, chunk, 0));
1633
18.4k
  *rcuctx = (qp_rcuctx_t){
1634
18.4k
    .magic = QPRCU_MAGIC,
1635
18.4k
    .multi = multi,
1636
18.4k
  };
1637
18.4k
  isc_mem_attach(qp->mctx, &rcuctx->mctx);
1638
18.4k
  call_rcu(&rcuctx->rcu_head, qpmulti_destroy_guts_cb);
1639
18.4k
}
1640
1641
/***********************************************************************
1642
 *
1643
 *  modification
1644
 */
1645
1646
isc_result_t
1647
7.15M
dns_qp_insert(dns_qp_t *qp, void *pval, uint32_t ival) {
1648
7.15M
  dns_qpref_t new_ref, old_ref;
1649
7.15M
  dns_qpnode_t new_leaf, old_node;
1650
7.15M
  dns_qpnode_t *new_twigs = NULL, *old_twigs = NULL;
1651
7.15M
  dns_qpshift_t new_bit, old_bit;
1652
7.15M
  dns_qpweight_t old_size, new_size;
1653
7.15M
  dns_qpkey_t new_key, old_key;
1654
7.15M
  size_t new_keylen, old_keylen;
1655
7.15M
  size_t offset;
1656
7.15M
  uint64_t index;
1657
7.15M
  dns_qpshift_t bit;
1658
7.15M
  dns_qpweight_t pos;
1659
7.15M
  dns_qpnode_t *n = NULL;
1660
1661
7.15M
  REQUIRE(QP_VALID(qp));
1662
1663
7.15M
  new_leaf = make_leaf(pval, ival);
1664
7.15M
  new_keylen = leaf_qpkey(qp, &new_leaf, new_key);
1665
1666
  /* first leaf in an empty trie? */
1667
7.15M
  if (qp->leaf_count == 0) {
1668
410k
    new_ref = alloc_twigs(qp, 1);
1669
410k
    new_twigs = ref_ptr(qp, new_ref);
1670
410k
    *new_twigs = new_leaf;
1671
410k
    attach_leaf(qp, new_twigs);
1672
410k
    qp->leaf_count++;
1673
410k
    qp->root_ref = new_ref;
1674
410k
    return ISC_R_SUCCESS;
1675
410k
  }
1676
1677
  /*
1678
   * We need to keep searching down to a leaf even if our key is
1679
   * missing from this branch. It doesn't matter which twig we
1680
   * choose since the keys are all the same up to this node's
1681
   * offset. Note that if we simply use branch_twig_pos(n, bit)
1682
   * we may get an out-of-bounds access if our bit is greater
1683
   * than all the set bits in the node.
1684
   */
1685
6.74M
  n = ref_ptr(qp, qp->root_ref);
1686
53.4M
  while (is_branch(n)) {
1687
46.7M
    prefetch_twigs(qp, n);
1688
46.7M
    dns_qpref_t ref = branch_twigs_ref(n);
1689
46.7M
    bit = branch_keybit(n, new_key, new_keylen);
1690
46.7M
    pos = branch_has_twig(n, bit) ? branch_twig_pos(n, bit) : 0;
1691
46.7M
    n = ref_ptr(qp, ref + pos);
1692
46.7M
  }
1693
1694
  /* do the keys differ, and if so, where? */
1695
6.74M
  old_keylen = leaf_qpkey(qp, n, old_key);
1696
6.74M
  offset = qpkey_compare(new_key, new_keylen, old_key, old_keylen);
1697
6.74M
  if (offset == QPKEY_EQUAL) {
1698
46.0k
    return ISC_R_EXISTS;
1699
46.0k
  }
1700
6.70M
  new_bit = qpkey_bit(new_key, new_keylen, offset);
1701
6.70M
  old_bit = qpkey_bit(old_key, old_keylen, offset);
1702
1703
  /* find where to insert a branch or grow an existing branch. */
1704
6.70M
  n = make_root_mutable(qp);
1705
47.5M
  while (is_branch(n)) {
1706
45.9M
    prefetch_twigs(qp, n);
1707
45.9M
    if (offset < branch_key_offset(n)) {
1708
87.8k
      goto newbranch;
1709
87.8k
    }
1710
45.8M
    if (offset == branch_key_offset(n)) {
1711
5.05M
      goto growbranch;
1712
5.05M
    }
1713
40.8M
    make_twigs_mutable(qp, n);
1714
40.8M
    bit = branch_keybit(n, new_key, new_keylen);
1715
40.8M
    INSIST(branch_has_twig(n, bit));
1716
40.8M
    n = branch_twig_ptr(qp, n, bit);
1717
40.8M
  }
1718
  /* fall through */
1719
1720
1.64M
newbranch:
1721
1.64M
  new_ref = alloc_twigs(qp, 2);
1722
1.64M
  new_twigs = ref_ptr(qp, new_ref);
1723
1724
  /* save before overwriting. */
1725
1.64M
  old_node = *n;
1726
1727
  /* new branch node takes old node's place */
1728
1.64M
  index = BRANCH_TAG | (1ULL << new_bit) | (1ULL << old_bit) |
1729
1.64M
    ((uint64_t)offset << SHIFT_OFFSET);
1730
1.64M
  *n = make_node(index, new_ref);
1731
1732
  /* populate twigs */
1733
1.64M
  new_twigs[old_bit > new_bit] = old_node;
1734
1.64M
  new_twigs[new_bit > old_bit] = new_leaf;
1735
1736
1.64M
  attach_leaf(qp, &new_leaf);
1737
1.64M
  qp->leaf_count++;
1738
1739
1.64M
  return ISC_R_SUCCESS;
1740
1741
5.05M
growbranch:
1742
5.05M
  INSIST(!branch_has_twig(n, new_bit));
1743
1744
  /* locate twigs vectors */
1745
5.05M
  old_size = branch_twigs_size(n);
1746
5.05M
  new_size = old_size + 1;
1747
5.05M
  old_ref = branch_twigs_ref(n);
1748
5.05M
  new_ref = alloc_twigs(qp, new_size);
1749
5.05M
  old_twigs = ref_ptr(qp, old_ref);
1750
5.05M
  new_twigs = ref_ptr(qp, new_ref);
1751
1752
  /* embiggen branch node */
1753
5.05M
  index = branch_index(n) | (1ULL << new_bit);
1754
5.05M
  *n = make_node(index, new_ref);
1755
1756
  /* embiggen twigs vector */
1757
5.05M
  pos = branch_twig_pos(n, new_bit);
1758
5.05M
  move_twigs(new_twigs, old_twigs, pos);
1759
5.05M
  new_twigs[pos] = new_leaf;
1760
5.05M
  move_twigs(new_twigs + pos + 1, old_twigs + pos, old_size - pos);
1761
1762
5.05M
  if (squash_twigs(qp, old_ref, old_size)) {
1763
    /* old twigs destroyed, only attach to new leaf */
1764
5.05M
    attach_leaf(qp, &new_leaf);
1765
5.05M
  } else {
1766
    /* old twigs duplicated, attach to all leaves */
1767
526
    attach_twigs(qp, new_twigs, new_size);
1768
526
  }
1769
5.05M
  qp->leaf_count++;
1770
1771
5.05M
  return ISC_R_SUCCESS;
1772
6.70M
}
1773
1774
isc_result_t
1775
dns_qp_deletekey(dns_qp_t *qp, const dns_qpkey_t search_key,
1776
2.45M
     size_t search_keylen, void **pval_r, uint32_t *ival_r) {
1777
2.45M
  REQUIRE(QP_VALID(qp));
1778
2.45M
  REQUIRE(search_keylen < sizeof(dns_qpkey_t));
1779
1780
2.45M
  if (get_root(qp) == NULL) {
1781
2.06M
    return ISC_R_NOTFOUND;
1782
2.06M
  }
1783
1784
392k
  dns_qpshift_t bit = 0; /* suppress warning */
1785
392k
  dns_qpnode_t *parent = NULL;
1786
392k
  dns_qpnode_t *n = make_root_mutable(qp);
1787
392k
  while (is_branch(n)) {
1788
0
    prefetch_twigs(qp, n);
1789
0
    bit = branch_keybit(n, search_key, search_keylen);
1790
0
    if (!branch_has_twig(n, bit)) {
1791
0
      return ISC_R_NOTFOUND;
1792
0
    }
1793
0
    make_twigs_mutable(qp, n);
1794
0
    parent = n;
1795
0
    n = branch_twig_ptr(qp, n, bit);
1796
0
  }
1797
1798
392k
  dns_qpkey_t found_key;
1799
392k
  size_t found_keylen = leaf_qpkey(qp, n, found_key);
1800
392k
  if (qpkey_compare(search_key, search_keylen, found_key, found_keylen) !=
1801
392k
      QPKEY_EQUAL)
1802
0
  {
1803
0
    return ISC_R_NOTFOUND;
1804
0
  }
1805
1806
392k
  SET_IF_NOT_NULL(pval_r, leaf_pval(n));
1807
392k
  SET_IF_NOT_NULL(ival_r, leaf_ival(n));
1808
392k
  detach_leaf(qp, n);
1809
392k
  qp->leaf_count--;
1810
1811
  /* trie becomes empty */
1812
392k
  if (qp->leaf_count == 0) {
1813
392k
    INSIST(parent == NULL);
1814
392k
    INSIST(n == get_root(qp));
1815
392k
    free_twigs(qp, qp->root_ref, 1);
1816
392k
    qp->root_ref = INVALID_REF;
1817
392k
    return ISC_R_SUCCESS;
1818
392k
  }
1819
1820
  /* step back to parent node */
1821
0
  n = parent;
1822
0
  parent = NULL;
1823
1824
0
  INSIST(bit != 0);
1825
0
  dns_qpweight_t size = branch_twigs_size(n);
1826
0
  dns_qpweight_t pos = branch_twig_pos(n, bit);
1827
0
  dns_qpref_t ref = branch_twigs_ref(n);
1828
0
  dns_qpnode_t *twigs = ref_ptr(qp, ref);
1829
1830
0
  if (size == 2) {
1831
    /*
1832
     * move the other twig to the parent branch.
1833
     */
1834
0
    *n = twigs[!pos];
1835
0
    squash_twigs(qp, ref, 2);
1836
0
  } else {
1837
    /*
1838
     * shrink the twigs in place, to avoid using the bump
1839
     * chunk too fast - the gc will clean up after us
1840
     */
1841
0
    *n = make_node(branch_index(n) & ~(1ULL << bit), ref);
1842
0
    move_twigs(twigs + pos, twigs + pos + 1, size - pos - 1);
1843
0
    squash_twigs(qp, ref + size - 1, 1);
1844
0
  }
1845
1846
0
  return ISC_R_SUCCESS;
1847
392k
}
1848
1849
isc_result_t
1850
dns_qp_deletename(dns_qp_t *qp, const dns_name_t *name, dns_namespace_t space,
1851
2
      void **pval_r, uint32_t *ival_r) {
1852
2
  dns_qpkey_t key;
1853
2
  size_t keylen = dns_qpkey_fromname(key, name, space);
1854
2
  return dns_qp_deletekey(qp, key, keylen, pval_r, ival_r);
1855
2
}
1856
1857
/***********************************************************************
1858
 *  chains
1859
 */
1860
static void
1861
612
maybe_set_name(dns_qpreader_t *qp, dns_qpnode_t *node, dns_name_t *name) {
1862
612
  dns_qpkey_t key;
1863
612
  size_t len;
1864
1865
612
  if (name == NULL) {
1866
488
    return;
1867
488
  }
1868
1869
124
  dns_name_reset(name);
1870
124
  len = leaf_qpkey(qp, node, key);
1871
124
  dns_qpkey_toname(key, len, name, NULL);
1872
124
}
1873
1874
void
1875
497
dns_qpchain_init(dns_qpreadable_t qpr, dns_qpchain_t *chain) {
1876
497
  dns_qpreader_t *qp = dns_qpreader(qpr);
1877
497
  REQUIRE(QP_VALID(qp));
1878
497
  REQUIRE(chain != NULL);
1879
1880
  /*
1881
   * dns_qpchain_t contains a 2kb buffer, which is slow to
1882
   * zero-initialize. Therefore we avoid designated initializers, and
1883
   * initialize each field manually.
1884
   */
1885
497
  chain->magic = QPCHAIN_MAGIC;
1886
497
  chain->qp = qp;
1887
497
  chain->len = 0;
1888
497
}
1889
1890
unsigned int
1891
182
dns_qpchain_length(dns_qpchain_t *chain) {
1892
182
  REQUIRE(QPCHAIN_VALID(chain));
1893
1894
182
  return chain->len;
1895
182
}
1896
1897
void
1898
dns_qpchain_node(dns_qpchain_t *chain, unsigned int level, dns_name_t *name,
1899
62
     void **pval_r, uint32_t *ival_r) {
1900
62
  dns_qpnode_t *node = NULL;
1901
1902
62
  REQUIRE(QPCHAIN_VALID(chain));
1903
62
  REQUIRE(level < chain->len);
1904
1905
62
  node = chain->chain[level].node;
1906
62
  maybe_set_name(chain->qp, node, name);
1907
62
  SET_IF_NOT_NULL(pval_r, leaf_pval(node));
1908
62
  SET_IF_NOT_NULL(ival_r, leaf_ival(node));
1909
62
}
1910
1911
/***********************************************************************
1912
 *  iterators
1913
 */
1914
1915
void
1916
559
dns_qpiter_init(dns_qpreadable_t qpr, dns_qpiter_t *qpi) {
1917
559
  dns_qpreader_t *qp = dns_qpreader(qpr);
1918
559
  REQUIRE(QP_VALID(qp));
1919
559
  REQUIRE(qpi != NULL);
1920
1921
  /*
1922
   * dns_qpiter_t contains a 4kb buffer, which is slow to zero-initialize.
1923
   * Therefore we avoid designated initializers, and initialize each
1924
   * field manually.
1925
   */
1926
559
  qpi->qp = qp;
1927
559
  qpi->sp = 0;
1928
559
  qpi->magic = QPITER_MAGIC;
1929
  /*
1930
   * The top of the stack must be initialized.
1931
   */
1932
559
  qpi->stack[qpi->sp] = NULL;
1933
559
}
1934
1935
/*
1936
 * are we at the last twig in this branch (in whichever direction
1937
 * we're iterating)?
1938
 */
1939
static bool
1940
248
last_twig(dns_qpiter_t *qpi, bool forward) {
1941
248
  dns_qpweight_t pos = 0, max = 0;
1942
248
  if (qpi->sp > 0) {
1943
186
    dns_qpnode_t *child = qpi->stack[qpi->sp];
1944
186
    dns_qpnode_t *parent = qpi->stack[qpi->sp - 1];
1945
186
    pos = child - ref_ptr(qpi->qp, branch_twigs_ref(parent));
1946
186
    if (forward) {
1947
186
      max = branch_twigs_size(parent) - 1;
1948
186
    }
1949
186
  }
1950
248
  return pos == max;
1951
248
}
1952
1953
/*
1954
 * move a QP iterator forward or back to the next or previous leaf.
1955
 * note: this function can go wrong when the iterator refers to
1956
 * a mutable view of the trie which is altered while iterating
1957
 */
1958
static isc_result_t
1959
iterate(bool forward, dns_qpiter_t *qpi, dns_name_t *name, void **pval_r,
1960
186
  uint32_t *ival_r) {
1961
186
  dns_qpnode_t *node = NULL;
1962
186
  bool initial_branch = true;
1963
1964
186
  REQUIRE(QPITER_VALID(qpi));
1965
1966
186
  dns_qpreader_t *qp = qpi->qp;
1967
1968
186
  REQUIRE(QP_VALID(qp));
1969
1970
186
  node = get_root(qp);
1971
186
  if (node == NULL) {
1972
0
    return ISC_R_NOMORE;
1973
0
  }
1974
1975
248
  do {
1976
248
    if (qpi->stack[qpi->sp] == NULL) {
1977
      /* newly initialized iterator: use the root node */
1978
0
      INSIST(qpi->sp == 0);
1979
0
      qpi->stack[0] = node;
1980
248
    } else if (!initial_branch) {
1981
      /*
1982
       * in a prior loop, we reached a branch; from
1983
       * here we just need to get the highest or lowest
1984
       * leaf in the subtree; we don't need to bother
1985
       * stepping forward or backward through twigs
1986
       * anymore.
1987
       */
1988
0
      INSIST(qpi->sp > 0);
1989
248
    } else if (last_twig(qpi, forward)) {
1990
      /*
1991
       * we've stepped to the end (or the beginning,
1992
       * if we're iterating backwards) of a set of twigs.
1993
       */
1994
124
      if (qpi->sp == 0) {
1995
        /*
1996
         * we've finished iterating. reinitialize
1997
         * the iterator, then return ISC_R_NOMORE.
1998
         */
1999
62
        dns_qpiter_init(qpi->qp, qpi);
2000
62
        return ISC_R_NOMORE;
2001
62
      }
2002
2003
      /*
2004
       * pop the stack, and resume at the parent branch.
2005
       */
2006
62
      qpi->stack[qpi->sp] = NULL;
2007
62
      qpi->sp--;
2008
62
      continue;
2009
124
    } else {
2010
      /*
2011
       * there are more twigs in the current branch,
2012
       * so step the node pointer forward (or back).
2013
       */
2014
124
      qpi->stack[qpi->sp] += (forward ? 1 : -1);
2015
124
      node = qpi->stack[qpi->sp];
2016
124
    }
2017
2018
    /*
2019
     * if we're at a branch now, we loop down to the
2020
     * left- or rightmost leaf.
2021
     */
2022
124
    if (is_branch(node)) {
2023
0
      qpi->sp++;
2024
0
      INSIST(qpi->sp < DNS_QP_MAXKEY);
2025
0
      node = ref_ptr(qp, branch_twigs_ref(node)) +
2026
0
             (forward ? 0 : branch_twigs_size(node) - 1);
2027
0
      qpi->stack[qpi->sp] = node;
2028
0
      initial_branch = false;
2029
0
    }
2030
186
  } while (is_branch(node));
2031
2032
  /* we're at a leaf: return its data to the caller */
2033
124
  SET_IF_NOT_NULL(pval_r, leaf_pval(node));
2034
124
  SET_IF_NOT_NULL(ival_r, leaf_ival(node));
2035
124
  maybe_set_name(qpi->qp, node, name);
2036
124
  return ISC_R_SUCCESS;
2037
186
}
2038
2039
isc_result_t
2040
dns_qpiter_next(dns_qpiter_t *qpi, dns_name_t *name, void **pval_r,
2041
186
    uint32_t *ival_r) {
2042
186
  return iterate(true, qpi, name, pval_r, ival_r);
2043
186
}
2044
2045
isc_result_t
2046
dns_qpiter_prev(dns_qpiter_t *qpi, dns_name_t *name, void **pval_r,
2047
0
    uint32_t *ival_r) {
2048
0
  return iterate(false, qpi, name, pval_r, ival_r);
2049
0
}
2050
2051
isc_result_t
2052
dns_qpiter_current(dns_qpiter_t *qpi, dns_name_t *name, void **pval_r,
2053
62
       uint32_t *ival_r) {
2054
62
  dns_qpnode_t *node = NULL;
2055
2056
62
  REQUIRE(QPITER_VALID(qpi));
2057
2058
62
  node = qpi->stack[qpi->sp];
2059
62
  if (node == NULL || is_branch(node)) {
2060
0
    return ISC_R_FAILURE;
2061
0
  }
2062
2063
62
  SET_IF_NOT_NULL(pval_r, leaf_pval(node));
2064
62
  SET_IF_NOT_NULL(ival_r, leaf_ival(node));
2065
62
  maybe_set_name(qpi->qp, node, name);
2066
62
  return ISC_R_SUCCESS;
2067
62
}
2068
2069
/***********************************************************************
2070
 *
2071
 *  search
2072
 */
2073
2074
isc_result_t
2075
dns_qp_getkey(dns_qpreadable_t qpr, const dns_qpkey_t search_key,
2076
10.9M
        size_t search_keylen, void **pval_r, uint32_t *ival_r) {
2077
10.9M
  dns_qpreader_t *qp = dns_qpreader(qpr);
2078
10.9M
  dns_qpkey_t found_key;
2079
10.9M
  size_t found_keylen;
2080
10.9M
  dns_qpshift_t bit;
2081
10.9M
  dns_qpnode_t *n = NULL;
2082
2083
10.9M
  REQUIRE(QP_VALID(qp));
2084
10.9M
  REQUIRE(search_keylen < sizeof(dns_qpkey_t));
2085
2086
10.9M
  n = get_root(qp);
2087
10.9M
  if (n == NULL) {
2088
1.72M
    return ISC_R_NOTFOUND;
2089
1.72M
  }
2090
2091
91.9M
  while (is_branch(n)) {
2092
87.7M
    prefetch_twigs(qp, n);
2093
87.7M
    bit = branch_keybit(n, search_key, search_keylen);
2094
87.7M
    if (!branch_has_twig(n, bit)) {
2095
5.03M
      return ISC_R_NOTFOUND;
2096
5.03M
    }
2097
82.7M
    n = branch_twig_ptr(qp, n, bit);
2098
82.7M
  }
2099
2100
4.15M
  found_keylen = leaf_qpkey(qp, n, found_key);
2101
4.15M
  if (qpkey_compare(search_key, search_keylen, found_key, found_keylen) !=
2102
4.15M
      QPKEY_EQUAL)
2103
1.62M
  {
2104
1.62M
    return ISC_R_NOTFOUND;
2105
1.62M
  }
2106
2107
2.53M
  SET_IF_NOT_NULL(pval_r, leaf_pval(n));
2108
2.53M
  SET_IF_NOT_NULL(ival_r, leaf_ival(n));
2109
2.53M
  return ISC_R_SUCCESS;
2110
4.15M
}
2111
2112
isc_result_t
2113
dns_qp_getname(dns_qpreadable_t qpr, const dns_name_t *name,
2114
9.19M
         dns_namespace_t space, void **pval_r, uint32_t *ival_r) {
2115
9.19M
  dns_qpkey_t key;
2116
9.19M
  size_t keylen = dns_qpkey_fromname(key, name, space);
2117
9.19M
  return dns_qp_getkey(qpr, key, keylen, pval_r, ival_r);
2118
9.19M
}
2119
2120
static inline void
2121
364
add_link(dns_qpchain_t *chain, dns_qpnode_t *node, size_t offset) {
2122
  /* prevent duplication */
2123
364
  if (chain->len != 0 && chain->chain[chain->len - 1].node == node) {
2124
0
    return;
2125
0
  }
2126
364
  chain->chain[chain->len].node = node;
2127
364
  chain->chain[chain->len].offset = offset;
2128
364
  chain->len++;
2129
364
  INSIST(chain->len <= DNS_NAME_MAXLABELS);
2130
364
}
2131
2132
static inline void
2133
0
prevleaf(dns_qpiter_t *it) {
2134
0
  isc_result_t result = dns_qpiter_prev(it, NULL, NULL, NULL);
2135
0
  if (result == ISC_R_NOMORE) {
2136
0
    result = dns_qpiter_prev(it, NULL, NULL, NULL);
2137
0
  }
2138
0
  RUNTIME_CHECK(result == ISC_R_SUCCESS);
2139
0
}
2140
2141
static inline void
2142
62
greatest_leaf(dns_qpreadable_t qpr, dns_qpnode_t *n, dns_qpiter_t *iter) {
2143
62
  while (is_branch(n)) {
2144
0
    dns_qpref_t ref = branch_twigs_ref(n) + branch_twigs_size(n) -
2145
0
          1;
2146
0
    iter->stack[++iter->sp] = n;
2147
0
    n = ref_ptr(qpr, ref);
2148
0
  }
2149
62
  iter->stack[++iter->sp] = n;
2150
62
}
2151
2152
static inline dns_qpnode_t *
2153
0
anyleaf(dns_qpreader_t *qp, dns_qpnode_t *n) {
2154
0
  while (is_branch(n)) {
2155
0
    n = branch_twigs(qp, n);
2156
0
  }
2157
0
  return n;
2158
0
}
2159
2160
static inline int
2161
twig_offset(dns_qpnode_t *n, dns_qpshift_t sbit, dns_qpshift_t kbit,
2162
62
      dns_qpshift_t fbit) {
2163
62
  dns_qpweight_t pos = branch_twig_pos(n, sbit);
2164
62
  if (branch_has_twig(n, sbit)) {
2165
62
    return pos - (kbit < fbit);
2166
62
  }
2167
0
  return pos - 1;
2168
62
}
2169
2170
/*
2171
 * If dns_qp_lookup() was passed an iterator, we want it to point at the
2172
 * matching name in the case of an exact match, or at the predecessor name
2173
 * for a non-exact match.
2174
 *
2175
 * If there is an exact match, then there is nothing to be done. Otherwise,
2176
 * we pop up the iterator stack until we find a parent branch with an offset
2177
 * that is before the position where the search key differs from the found key.
2178
 * From there we can step to the leaf that is the predecessor of the searched
2179
 * name.
2180
 *
2181
 * Requires the iterator to be pointing at a leaf node.
2182
 */
2183
static void
2184
fix_iterator(dns_qpreader_t *qp, dns_qpiter_t *it, dns_qpkey_t key,
2185
182
       size_t len) {
2186
182
  dns_qpnode_t *n = it->stack[it->sp];
2187
2188
182
  REQUIRE(!is_branch(n));
2189
2190
182
  dns_qpkey_t found;
2191
182
  size_t foundlen = leaf_qpkey(qp, n, found);
2192
182
  size_t to = qpkey_compare(key, len, found, foundlen);
2193
2194
  /* If the keys are equal, the iterator is already at the right node. */
2195
182
  if (to == QPKEY_EQUAL) {
2196
120
    return;
2197
120
  }
2198
2199
  /*
2200
   * Special case: if the key differs even before the root
2201
   * key offset, it means the name desired either precedes or
2202
   * follows the entire range of names in the database, and
2203
   * popping up the stack won't help us, so just move the
2204
   * iterator one step back from the origin and return.
2205
   */
2206
62
  if (to < branch_key_offset(it->stack[0])) {
2207
0
    dns_qpiter_init(qp, it);
2208
0
    prevleaf(it);
2209
0
    return;
2210
0
  }
2211
2212
  /*
2213
   * As long as the branch offset point is after the point where the
2214
   * key differs, we need to branch up and find a better node.
2215
   */
2216
62
  while (it->sp > 0) {
2217
62
    dns_qpnode_t *b = it->stack[it->sp - 1];
2218
62
    if (branch_key_offset(b) < to) {
2219
62
      break;
2220
62
    }
2221
0
    it->sp--;
2222
0
  }
2223
62
  n = it->stack[it->sp];
2224
2225
  /*
2226
   * Either we are now at the correct branch, or we are at the
2227
   * first unmatched node. Determine the bit position for the
2228
   * twig we need (sbit).
2229
   */
2230
62
  dns_qpshift_t kbit = qpkey_bit(key, len, to);
2231
62
  dns_qpshift_t fbit = qpkey_bit(found, foundlen, to);
2232
62
  dns_qpshift_t sbit = 0;
2233
2234
62
  if (is_branch(n) && branch_key_offset(n) == to) {
2235
    /* We are on the correct branch now. */
2236
0
    sbit = kbit;
2237
62
  } else if (it->sp == 0) {
2238
    /*
2239
     * We are on the root branch, popping up the stack won't
2240
     * help us, so just move the iterator one step back from the
2241
     * origin and return.
2242
     */
2243
0
    dns_qpiter_init(qp, it);
2244
0
    prevleaf(it);
2245
0
    return;
2246
62
  } else {
2247
    /* We are at the first unmatched node, pop up the stack. */
2248
62
    n = it->stack[--it->sp];
2249
62
    sbit = qpkey_bit(key, len, branch_key_offset(n));
2250
62
  }
2251
2252
62
  INSIST(is_branch(n));
2253
2254
62
  prefetch_twigs(qp, n);
2255
62
  dns_qpnode_t *twigs = branch_twigs(qp, n);
2256
62
  int toff = twig_offset(n, sbit, kbit, fbit);
2257
62
  if (toff >= 0) {
2258
    /*
2259
     * The name we want would've been after some twig in
2260
     * this branch. Walk down from that twig to the
2261
     * highest leaf in its subtree to get the predecessor.
2262
     */
2263
62
    greatest_leaf(qp, twigs + toff, it);
2264
62
  } else {
2265
    /*
2266
     * Every leaf below this node is greater than the one we
2267
     * wanted, so the previous leaf is the predecessor.
2268
     */
2269
0
    prevleaf(it);
2270
0
  }
2271
62
}
2272
2273
/*
2274
 * When searching for a requested name in dns_qp_lookup(), we might add
2275
 * a leaf node to the chain, then subsequently determine that it was a
2276
 * dead end. When this happens, the chain can be left holding a node
2277
 * that is *not* an ancestor of the requested name. We correct for that
2278
 * here.
2279
 */
2280
static void
2281
124
fix_chain(dns_qpchain_t *chain, size_t offset) {
2282
124
  while (chain->len > 0 && chain->chain[chain->len - 1].offset >= offset)
2283
0
  {
2284
0
    chain->len--;
2285
0
    chain->chain[chain->len].node = NULL;
2286
0
    chain->chain[chain->len].offset = 0;
2287
0
  }
2288
124
}
2289
2290
isc_result_t
2291
dns_qp_lookup(dns_qpreadable_t qpr, const dns_name_t *name,
2292
        dns_namespace_t space, dns_name_t *foundname, dns_qpiter_t *iter,
2293
497
        dns_qpchain_t *chain, void **pval_r, uint32_t *ival_r) {
2294
497
  dns_qpreader_t *qp = dns_qpreader(qpr);
2295
497
  dns_qpkey_t search, found;
2296
497
  size_t searchlen, foundlen;
2297
497
  size_t offset = 0;
2298
497
  dns_qpnode_t *n = NULL;
2299
497
  dns_qpshift_t bit = SHIFT_NOBYTE;
2300
497
  dns_qpchain_t oc;
2301
497
  dns_qpiter_t it;
2302
497
  bool matched = false;
2303
497
  bool setiter = true;
2304
2305
497
  REQUIRE(QP_VALID(qp));
2306
497
  REQUIRE(foundname == NULL || ISC_MAGIC_VALID(name, DNS_NAME_MAGIC));
2307
2308
497
  searchlen = dns_qpkey_fromname(search, name, space);
2309
2310
497
  if (chain == NULL) {
2311
0
    chain = &oc;
2312
0
  }
2313
497
  if (iter == NULL) {
2314
315
    iter = &it;
2315
315
    setiter = false;
2316
315
  }
2317
497
  dns_qpchain_init(qp, chain);
2318
497
  dns_qpiter_init(qp, iter);
2319
2320
497
  n = get_root(qp);
2321
497
  if (n == NULL) {
2322
0
    return ISC_R_NOTFOUND;
2323
0
  }
2324
497
  iter->stack[0] = n;
2325
2326
  /*
2327
   * Like `dns_qp_insert()`, we must find a leaf. However, we don't make a
2328
   * second pass: instead, we keep track of any leaves with shorter keys
2329
   * that we discover along the way. (In general, qp-trie searches can be
2330
   * one-pass, by recording their traversal, or two-pass, for less stack
2331
   * memory usage.)
2332
   */
2333
679
  while (is_branch(n)) {
2334
182
    prefetch_twigs(qp, n);
2335
2336
182
    offset = branch_key_offset(n);
2337
182
    bit = qpkey_bit(search, searchlen, offset);
2338
182
    dns_qpnode_t *twigs = branch_twigs(qp, n);
2339
2340
    /*
2341
     * A shorter key that can be a parent domain always has a
2342
     * leaf node at SHIFT_NOBYTE (indicating end of its key)
2343
     * where our search key has a normal character immediately
2344
     * after a label separator.
2345
     *
2346
     * Note 1: It is OK if `off - 1` underflows: it will
2347
     * become SIZE_MAX, which is greater than `searchlen`, so
2348
     * `qpkey_bit()` will return SHIFT_NOBYTE, which is what we
2349
     * want when `off == 0`.
2350
     *
2351
     * Note 2: If SHIFT_NOBYTE twig is present, it will always
2352
     * be in position 0, the first location in 'twigs'.
2353
     */
2354
182
    if (bit != SHIFT_NOBYTE && branch_has_twig(n, SHIFT_NOBYTE) &&
2355
0
        qpkey_bit(search, searchlen, offset - 1) == SHIFT_NOBYTE &&
2356
0
        !is_branch(twigs))
2357
0
    {
2358
0
      add_link(chain, twigs, offset);
2359
0
    }
2360
2361
182
    matched = branch_has_twig(n, bit);
2362
182
    if (matched) {
2363
      /*
2364
       * found a match: if it's a branch, we keep
2365
       * searching, and if it's a leaf, we drop out of
2366
       * the loop.
2367
       */
2368
182
      n = branch_twig_ptr(qp, n, bit);
2369
182
    } else {
2370
      /*
2371
       * this branch is a dead end, and the predecessor
2372
       * doesn't matter. now we just need to find a leaf
2373
       * to end on so that qpkey_leaf() will work below.
2374
       */
2375
0
      n = anyleaf(qp, twigs);
2376
0
    }
2377
2378
182
    iter->stack[++iter->sp] = n;
2379
182
  }
2380
2381
497
  if (setiter) {
2382
    /*
2383
     * we found a leaf, but it might not be the leaf we wanted.
2384
     * if it isn't, and if the caller passed us an iterator,
2385
     * then we might need to reposition it.
2386
     */
2387
182
    fix_iterator(qp, iter, search, searchlen);
2388
182
    n = iter->stack[iter->sp];
2389
182
  }
2390
2391
  /* at this point, n can only be a leaf node */
2392
497
  INSIST(!is_branch(n));
2393
2394
497
  foundlen = leaf_qpkey(qp, n, found);
2395
497
  offset = qpkey_compare(search, searchlen, found, foundlen);
2396
2397
  /* the search ended with an exact or partial match */
2398
497
  if (offset == QPKEY_EQUAL || offset == foundlen) {
2399
364
    isc_result_t result = ISC_R_SUCCESS;
2400
2401
364
    if (offset == foundlen) {
2402
124
      fix_chain(chain, offset);
2403
124
      result = DNS_R_PARTIALMATCH;
2404
124
    }
2405
364
    add_link(chain, n, offset);
2406
2407
364
    SET_IF_NOT_NULL(pval_r, leaf_pval(n));
2408
364
    SET_IF_NOT_NULL(ival_r, leaf_ival(n));
2409
364
    maybe_set_name(qp, n, foundname);
2410
364
    return result;
2411
364
  }
2412
2413
  /*
2414
   * the requested name was not found, but if an ancestor
2415
   * was, we can retrieve that from the chain.
2416
   */
2417
133
  int len = chain->len;
2418
133
  while (len-- > 0) {
2419
0
    if (offset >= chain->chain[len].offset) {
2420
0
      n = chain->chain[len].node;
2421
0
      SET_IF_NOT_NULL(pval_r, leaf_pval(n));
2422
0
      SET_IF_NOT_NULL(ival_r, leaf_ival(n));
2423
0
      maybe_set_name(qp, n, foundname);
2424
0
      return DNS_R_PARTIALMATCH;
2425
0
    } else {
2426
      /*
2427
       * oops, during the search we found and added
2428
       * a leaf that's longer than the requested
2429
       * name; remove it from the chain.
2430
       */
2431
0
      chain->len--;
2432
0
    }
2433
0
  }
2434
2435
  /* nothing was found at all */
2436
133
  return ISC_R_NOTFOUND;
2437
133
}
2438
2439
/**********************************************************************/