Line | Count | Source |
1 | | /* |
2 | | * Copyright (C) Internet Systems Consortium, Inc. ("ISC") |
3 | | * |
4 | | * SPDX-License-Identifier: MPL-2.0 |
5 | | * |
6 | | * This Source Code Form is subject to the terms of the Mozilla Public |
7 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
8 | | * file, you can obtain one at https://mozilla.org/MPL/2.0/. |
9 | | * |
10 | | * See the COPYRIGHT file distributed with this work for additional |
11 | | * information regarding copyright ownership. |
12 | | */ |
13 | | |
14 | | /* |
15 | | * For an overview, see doc/design/qp-trie.md |
16 | | */ |
17 | | |
18 | | #include <inttypes.h> |
19 | | #include <stdbool.h> |
20 | | #include <stddef.h> |
21 | | #include <stdint.h> |
22 | | #include <string.h> |
23 | | |
24 | | #if FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION |
25 | | #include <sys/mman.h> |
26 | | #include <unistd.h> |
27 | | #endif |
28 | | |
29 | | #include <isc/atomic.h> |
30 | | #include <isc/bit.h> |
31 | | #include <isc/buffer.h> |
32 | | #include <isc/log.h> |
33 | | #include <isc/magic.h> |
34 | | #include <isc/mem.h> |
35 | | #include <isc/mutex.h> |
36 | | #include <isc/refcount.h> |
37 | | #include <isc/result.h> |
38 | | #include <isc/rwlock.h> |
39 | | #include <isc/tid.h> |
40 | | #include <isc/time.h> |
41 | | #include <isc/types.h> |
42 | | #include <isc/urcu.h> |
43 | | #include <isc/util.h> |
44 | | |
45 | | #include <dns/fixedname.h> |
46 | | #include <dns/name.h> |
47 | | #include <dns/qp.h> |
48 | | #include <dns/types.h> |
49 | | |
50 | | #include "qp_p.h" |
51 | | |
52 | | #ifndef DNS_QP_LOG_STATS_LEVEL |
53 | | #define DNS_QP_LOG_STATS_LEVEL 3 |
54 | | #endif |
55 | | #ifndef DNS_QP_TRACE |
56 | | #define DNS_QP_TRACE 0 |
57 | | #endif |
58 | | |
59 | | /* |
60 | | * very basic garbage collector statistics |
61 | | * |
62 | | * XXXFANF for now we're logging GC times, but ideally we should |
63 | | * accumulate stats more quietly and report via the statschannel |
64 | | */ |
65 | | static atomic_uint_fast64_t compact_time; |
66 | | static atomic_uint_fast64_t recycle_time; |
67 | | static atomic_uint_fast64_t rollback_time; |
68 | | |
69 | | /* for LOG_STATS() format strings */ |
70 | | #define PRItime " %" PRIu64 " ns " |
71 | | |
72 | | #if DNS_QP_LOG_STATS_LEVEL |
73 | | #define LOG_STATS(...) \ |
74 | 13.4k | isc_log_write(DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_QP, \ |
75 | 13.4k | ISC_LOG_DEBUG(DNS_QP_LOG_STATS_LEVEL), __VA_ARGS__) |
76 | | #else |
77 | | #define LOG_STATS(...) |
78 | | #endif |
79 | | |
80 | | #if DNS_QP_TRACE |
81 | | /* |
82 | | * TRACE is generally used in allocation-related functions so it doesn't |
83 | | * trace very high-frequency ops |
84 | | */ |
85 | | #define TRACE(fmt, ...) \ |
86 | | do { \ |
87 | | if (isc_log_wouldlog(ISC_LOG_DEBUG(7))) { \ |
88 | | isc_log_write(DNS_LOGCATEGORY_DATABASE, \ |
89 | | DNS_LOGMODULE_QP, ISC_LOG_DEBUG(7), \ |
90 | | "%s:%d:%s(qp %p uctx \"%s\"):t%" PRItid \ |
91 | | ": " fmt, \ |
92 | | __FILE__, __LINE__, __func__, qp, \ |
93 | | qp ? TRIENAME(qp) : "(null)", isc_tid(), \ |
94 | | ##__VA_ARGS__); \ |
95 | | } \ |
96 | | } while (0) |
97 | | #else |
98 | | #define TRACE(...) |
99 | | #endif |
100 | | |
101 | | #if DNS_QPMULTI_TRACE |
102 | | ISC_REFCOUNT_STATIC_TRACE_DECL(dns_qpmulti); |
103 | | #define dns_qpmulti_ref(ptr) dns_qpmulti__ref(ptr, __func__, __FILE__, __LINE__) |
104 | | #define dns_qpmulti_unref(ptr) \ |
105 | | dns_qpmulti__unref(ptr, __func__, __FILE__, __LINE__) |
106 | | #define dns_qpmulti_attach(ptr, ptrp) \ |
107 | | dns_qpmulti__attach(ptr, ptrp, __func__, __FILE__, __LINE__) |
108 | | #define dns_qpmulti_detach(ptrp) \ |
109 | | dns_qpmulti__detach(ptrp, __func__, __FILE__, __LINE__) |
110 | | #else |
111 | | ISC_REFCOUNT_STATIC_DECL(dns_qpmulti); |
112 | | #endif |
113 | | |
114 | | /*********************************************************************** |
115 | | * |
116 | | * converting DNS names to trie keys |
117 | | */ |
118 | | |
119 | | /* |
120 | | * Convert the namespace value. We map namespace values to numerical |
121 | | * digits so they can be represented in a single byte in the QP key; |
122 | | * thus namespace 0 becomes '0', etc. |
123 | | */ |
124 | 26.7M | #define ENCODE_NAMESPACE(c) dns_qp_bits_for_byte[(c) + (uint8_t)'0'] |
125 | | #define DECODE_NAMESPACE(c) dns_qp_byte_for_bit[(c)] - (uint8_t)'0' |
126 | | |
127 | 28.0k | #define NAME_OFFSET 1 |
128 | | |
129 | | /* |
130 | | * Number of distinct byte values, i.e. 256 |
131 | | */ |
132 | 5.65k | #define BYTE_VALUES (UINT8_MAX + 1) |
133 | | |
134 | | /* |
135 | | * Lookup table mapping bytes in DNS names to bit positions, used |
136 | | * by dns_qpkey_fromname() to convert DNS names to qp-trie keys. |
137 | | * |
138 | | * Each element holds one or two bit positions, bit_one in the |
139 | | * lower half and bit_two in the upper half. |
140 | | * |
141 | | * For common hostname characters, bit_two is zero (which cannot |
142 | | * be a valid bit position). |
143 | | * |
144 | | * For others, bit_one is the escape bit, and bit_two is the |
145 | | * position of the character within the escaped range. |
146 | | */ |
147 | | uint16_t dns_qp_bits_for_byte[BYTE_VALUES] = { 0 }; |
148 | | |
149 | | /* |
150 | | * And the reverse, mapping bit positions to characters, so the tests |
151 | | * can print diagnostics involving qp-trie keys. |
152 | | * |
153 | | * This table only handles the first bit in an escape sequence; we |
154 | | * arrange that we can calculate the byte value for both bits by |
155 | | * adding the the second bit to the first bit's byte value. |
156 | | */ |
157 | | uint8_t dns_qp_byte_for_bit[SHIFT_OFFSET] = { 0 }; |
158 | | |
159 | | /* |
160 | | * Fill in the lookup tables at program startup. (It doesn't matter |
161 | | * when this is initialized relative to other startup code.) |
162 | | */ |
163 | | |
164 | | /* |
165 | | * The bit positions for bytes inside labels have to be between |
166 | | * SHIFT_BITMAP and SHIFT_OFFSET. (SHIFT_NOBYTE separates labels.) |
167 | | * |
168 | | * Each byte range in between common hostname characters has a different |
169 | | * escape character, to preserve the correct lexical order. |
170 | | * |
171 | | * Escaped byte ranges mostly fit into the space available in the |
172 | | * bitmap, except for those above 'z' (which is mostly bytes with the |
173 | | * top bit set). So, when we reach the end of the bitmap we roll over |
174 | | * to the next escape character. |
175 | | * |
176 | | * After filling the table we ensure that the bit positions for |
177 | | * hostname characters and escape characters all fit. |
178 | | */ |
179 | | void |
180 | 22 | dns__qp_initialize(void) { |
181 | | /* zero common character marker not a valid shift position */ |
182 | 22 | INSIST(0 < SHIFT_BITMAP); |
183 | | /* first bit is common byte or escape byte */ |
184 | 22 | dns_qpshift_t bit_one = SHIFT_BITMAP; |
185 | | /* second bit is position in escaped range */ |
186 | 22 | dns_qpshift_t bit_two = SHIFT_BITMAP; |
187 | 22 | bool escaping = true; |
188 | | |
189 | 5.65k | for (unsigned int byte = 0; byte < BYTE_VALUES; byte++) { |
190 | 5.63k | if (qp_common_character(byte)) { |
191 | 902 | escaping = false; |
192 | 902 | bit_one++; |
193 | 902 | dns_qp_byte_for_bit[bit_one] = byte; |
194 | 902 | dns_qp_bits_for_byte[byte] = bit_one; |
195 | 4.73k | } else if ('A' <= byte && byte <= 'Z') { |
196 | | /* map upper case to lower case */ |
197 | 572 | dns_qpshift_t after_esc = bit_one + 1; |
198 | 572 | dns_qpshift_t skip_punct = 'a' - '_'; |
199 | 572 | dns_qpshift_t letter = byte - 'A'; |
200 | 572 | dns_qpshift_t bit = after_esc + skip_punct + letter; |
201 | 572 | dns_qp_bits_for_byte[byte] = bit; |
202 | | /* to simplify reverse conversion */ |
203 | 572 | bit_two++; |
204 | 4.15k | } else { |
205 | | /* non-hostname characters need to be escaped */ |
206 | 4.15k | if (!escaping || bit_two >= SHIFT_OFFSET) { |
207 | 88 | escaping = true; |
208 | 88 | bit_one++; |
209 | 88 | dns_qp_byte_for_bit[bit_one] = byte; |
210 | 88 | bit_two = SHIFT_BITMAP; |
211 | 88 | } |
212 | 4.15k | dns_qp_bits_for_byte[byte] = bit_two << 8 | bit_one; |
213 | 4.15k | bit_two++; |
214 | 4.15k | } |
215 | 5.63k | } |
216 | 22 | ENSURE(bit_one < SHIFT_OFFSET); |
217 | 22 | } |
218 | | |
219 | | void |
220 | 0 | dns__qp_shutdown(void) { |
221 | | /* Nothing */ |
222 | 0 | } |
223 | | |
224 | | /* |
225 | | * Convert a DNS name into a trie lookup key. |
226 | | * |
227 | | * Returns the length of the key. |
228 | | * |
229 | | * For performance we get our hands dirty in the guts of the name. |
230 | | * |
231 | | * We don't worry about the distinction between absolute and relative |
232 | | * names. When the trie is only used with absolute names, the first byte |
233 | | * of the key will always be SHIFT_NOBYTE and it will always be skipped |
234 | | * when traversing the trie. So keeping the root label costs little, and |
235 | | * it allows us to support tries of relative names too. In fact absolute |
236 | | * and relative names can be mixed in the same trie without causing |
237 | | * confusion, because the presence or absence of the initial |
238 | | * SHIFT_NOBYTE in the key disambiguates them (exactly like a trailing |
239 | | * dot in a zone file). |
240 | | */ |
241 | | size_t |
242 | | dns_qpkey_fromname(dns_qpkey_t key, const dns_name_t *name, |
243 | 26.7M | dns_namespace_t space) { |
244 | 26.7M | REQUIRE(ISC_MAGIC_VALID(name, DNS_NAME_MAGIC)); |
245 | | |
246 | 26.7M | dns_offsets_t offsets; |
247 | 26.7M | size_t labels = dns_name_offsets(name, offsets); |
248 | 26.7M | size_t len = 0; |
249 | | |
250 | | /* namespace */ |
251 | 26.7M | key[len++] = ENCODE_NAMESPACE(space); |
252 | | /* name */ |
253 | 26.7M | if (labels == 0) { |
254 | 0 | key[len] = SHIFT_NOBYTE; |
255 | 0 | return len; |
256 | 0 | } |
257 | | |
258 | 26.7M | size_t label = labels; |
259 | 214M | while (label-- > 0) { |
260 | 188M | const uint8_t *ldata = name->ndata + offsets[label]; |
261 | 188M | size_t label_len = *ldata++; |
262 | 531M | while (label_len-- > 0) { |
263 | 343M | uint16_t bits = dns_qp_bits_for_byte[*ldata++]; |
264 | 343M | key[len++] = bits & 0xFF; /* bit_one */ |
265 | 343M | if ((bits >> 8) != 0) { /* escape? */ |
266 | 174M | key[len++] = bits >> 8; /* bit_two */ |
267 | 174M | } |
268 | 343M | } |
269 | | /* label terminator */ |
270 | 188M | key[len++] = SHIFT_NOBYTE; |
271 | 188M | } |
272 | | /* mark end with a double NOBYTE */ |
273 | 26.7M | key[len] = SHIFT_NOBYTE; |
274 | 26.7M | ENSURE(len < sizeof(dns_qpkey_t)); |
275 | 26.7M | return len; |
276 | 26.7M | } |
277 | | |
278 | | void |
279 | | dns_qpkey_toname(const dns_qpkey_t key, size_t keylen, dns_name_t *name, |
280 | 609 | dns_namespace_t *space) { |
281 | 609 | size_t locs[DNS_NAME_MAXLABELS]; |
282 | 609 | size_t loc = 0; |
283 | 609 | size_t offset = 0; |
284 | | |
285 | 609 | REQUIRE(ISC_MAGIC_VALID(name, DNS_NAME_MAGIC)); |
286 | 609 | REQUIRE(name->buffer != NULL); |
287 | 609 | REQUIRE(keylen > 0); |
288 | | |
289 | 609 | dns_name_reset(name); |
290 | | |
291 | 609 | SET_IF_NOT_NULL(space, DECODE_NAMESPACE(key[offset++])); |
292 | | |
293 | 609 | if (keylen == NAME_OFFSET) { |
294 | 0 | return; |
295 | 0 | } |
296 | | |
297 | | /* Scan the key looking for label boundaries */ |
298 | 32.0k | for (; offset <= keylen; offset++) { |
299 | 32.0k | INSIST(key[offset] >= SHIFT_NOBYTE && |
300 | 32.0k | key[offset] < SHIFT_OFFSET); |
301 | 32.0k | INSIST(loc < DNS_NAME_MAXLABELS); |
302 | 32.0k | if (qpkey_bit(key, keylen, offset) == SHIFT_NOBYTE) { |
303 | 5.25k | if (qpkey_bit(key, keylen, offset + 1) == SHIFT_NOBYTE) |
304 | 609 | { |
305 | 609 | locs[loc] = offset + 1; |
306 | 609 | goto scanned; |
307 | 609 | } |
308 | 4.64k | locs[loc++] = offset + 1; |
309 | 26.8k | } else if (offset == NAME_OFFSET) { |
310 | | /* This happens for a relative name */ |
311 | 0 | locs[loc++] = offset; |
312 | 0 | } |
313 | 32.0k | } |
314 | 609 | UNREACHABLE(); |
315 | 609 | scanned: |
316 | | |
317 | | /* |
318 | | * In the key the labels are encoded in reverse order, so |
319 | | * we step backward through the label boundaries, then forward |
320 | | * through the labels, to create the DNS wire format data. |
321 | | */ |
322 | 5.25k | while (loc-- > 0) { |
323 | 4.64k | uint8_t len = 0, *lenp = NULL; |
324 | | |
325 | | /* Store the location of the length byte */ |
326 | 4.64k | lenp = isc_buffer_used(name->buffer); |
327 | | |
328 | | /* Add a length byte to the name data */ |
329 | 4.64k | isc_buffer_putuint8(name->buffer, 0); |
330 | 4.64k | name->length++; |
331 | | |
332 | | /* Convert from escaped byte ranges to ASCII */ |
333 | 18.9k | for (offset = locs[loc]; offset < locs[loc + 1] - 1; offset++) { |
334 | 14.2k | uint8_t bit = qpkey_bit(key, keylen, offset); |
335 | 14.2k | uint8_t byte = dns_qp_byte_for_bit[bit]; |
336 | 14.2k | if (qp_common_character(byte)) { |
337 | 1.82k | isc_buffer_putuint8(name->buffer, byte); |
338 | 12.4k | } else { |
339 | 12.4k | byte += key[++offset] - SHIFT_BITMAP; |
340 | 12.4k | isc_buffer_putuint8(name->buffer, byte); |
341 | 12.4k | } |
342 | 14.2k | len++; |
343 | 14.2k | } |
344 | | |
345 | 4.64k | name->length += len; |
346 | | |
347 | | /* Write the final label length to the length byte */ |
348 | 4.64k | *lenp = len; |
349 | 4.64k | } |
350 | | |
351 | | /* Add a root label for absolute names */ |
352 | 609 | if (key[NAME_OFFSET] == SHIFT_NOBYTE) { |
353 | 609 | name->attributes.absolute = true; |
354 | 609 | isc_buffer_putuint8(name->buffer, 0); |
355 | 609 | name->length++; |
356 | 609 | } |
357 | | |
358 | 609 | name->ndata = isc_buffer_base(name->buffer); |
359 | 609 | } |
360 | | |
361 | | /* |
362 | | * Sentinel value for equal keys |
363 | | */ |
364 | 14.2M | #define QPKEY_EQUAL (~(size_t)0) |
365 | | |
366 | | /* |
367 | | * Compare two keys and return the offset where they differ. |
368 | | * |
369 | | * This offset is used to work out where a trie search diverged: when one |
370 | | * of the keys is in the trie and one is not, the common prefix (up to the |
371 | | * offset) is the part of the unknown key that exists in the trie. This |
372 | | * matters for adding new keys or finding neighbours of missing keys. |
373 | | * |
374 | | * When the keys are different lengths it is possible (but unwise) for |
375 | | * the longer key to be the same as the shorter key but with superfluous |
376 | | * trailing SHIFT_NOBYTE elements. This makes the keys equal for the |
377 | | * purpose of traversing the trie. |
378 | | */ |
379 | | static size_t |
380 | | qpkey_compare(const dns_qpkey_t key_a, const size_t keylen_a, |
381 | 11.2M | const dns_qpkey_t key_b, const size_t keylen_b) { |
382 | 11.2M | size_t keylen = ISC_MAX(keylen_a, keylen_b); |
383 | 318M | for (size_t offset = 0; offset < keylen; offset++) { |
384 | 315M | if (qpkey_bit(key_a, keylen_a, offset) != |
385 | 315M | qpkey_bit(key_b, keylen_b, offset)) |
386 | 8.32M | { |
387 | 8.32M | return offset; |
388 | 8.32M | } |
389 | 315M | } |
390 | 2.97M | return QPKEY_EQUAL; |
391 | 11.2M | } |
392 | | |
393 | | /*********************************************************************** |
394 | | * |
395 | | * allocator wrappers |
396 | | */ |
397 | | |
398 | | #if FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION |
399 | | |
400 | | /* |
401 | | * Optionally (for debugging) during a copy-on-write transaction, use |
402 | | * memory protection to ensure that the shared chunks are not modified. |
403 | | * Once a chunk becomes shared, it remains read-only until it is freed. |
404 | | * POSIX says we have to use mmap() to get an allocation that we can |
405 | | * definitely pass to mprotect(). |
406 | | */ |
407 | | |
408 | | static size_t |
409 | 0 | chunk_size_raw(void) { |
410 | 0 | size_t size = (size_t)sysconf(_SC_PAGE_SIZE); |
411 | 0 | return ISC_MAX(size, QP_CHUNK_BYTES); |
412 | 0 | } |
413 | | |
414 | | static void * |
415 | 408k | chunk_get_raw(dns_qp_t *qp, size_t len) { |
416 | 408k | if (qp->write_protect) { |
417 | 0 | size_t size = chunk_size_raw(); |
418 | 0 | void *ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, |
419 | 0 | MAP_ANON | MAP_PRIVATE, -1, 0); |
420 | 0 | RUNTIME_CHECK(ptr != MAP_FAILED); |
421 | 0 | return ptr; |
422 | 408k | } else { |
423 | 408k | return isc_mem_allocate(qp->mctx, len); |
424 | 408k | } |
425 | 408k | } |
426 | | |
427 | | static void |
428 | 408k | chunk_free_raw(dns_qp_t *qp, void *ptr) { |
429 | 408k | if (qp->write_protect) { |
430 | 0 | RUNTIME_CHECK(munmap(ptr, chunk_size_raw()) == 0); |
431 | 408k | } else { |
432 | 408k | isc_mem_free(qp->mctx, ptr); |
433 | 408k | } |
434 | 408k | } |
435 | | |
436 | | static void * |
437 | 0 | chunk_shrink_raw(dns_qp_t *qp, void *ptr, size_t bytes) { |
438 | 0 | if (qp->write_protect) { |
439 | 0 | return ptr; |
440 | 0 | } else { |
441 | 0 | return isc_mem_reallocate(qp->mctx, ptr, bytes); |
442 | 0 | } |
443 | 0 | } |
444 | | |
445 | | static void |
446 | 18.4k | write_protect(dns_qp_t *qp, dns_qpchunk_t chunk) { |
447 | 18.4k | if (qp->write_protect) { |
448 | | /* see transaction_open() wrt this special case */ |
449 | 0 | if (qp->transaction_mode == QP_WRITE && chunk == qp->bump) { |
450 | 0 | return; |
451 | 0 | } |
452 | 0 | TRACE("chunk %u", chunk); |
453 | 0 | void *ptr = qp->base->ptr[chunk]; |
454 | 0 | size_t size = chunk_size_raw(); |
455 | 0 | RUNTIME_CHECK(mprotect(ptr, size, PROT_READ) >= 0); |
456 | 0 | } |
457 | 18.4k | } |
458 | | |
459 | | #else |
460 | | |
461 | | #define chunk_get_raw(qp, size) isc_mem_allocate(qp->mctx, size) |
462 | | #define chunk_free_raw(qp, ptr) isc_mem_free(qp->mctx, ptr) |
463 | | |
464 | | #define chunk_shrink_raw(qp, ptr, size) isc_mem_reallocate(qp->mctx, ptr, size) |
465 | | |
466 | | #define write_protect(qp, chunk) |
467 | | |
468 | | #endif |
469 | | |
470 | | /*********************************************************************** |
471 | | * |
472 | | * allocator |
473 | | */ |
474 | | |
475 | | /* |
476 | | * When we reuse the bump chunk across multiple write transactions, |
477 | | * it can have an immutable prefix and a mutable suffix. |
478 | | */ |
479 | | static inline bool |
480 | 57.1M | cells_immutable(dns_qp_t *qp, dns_qpref_t ref) { |
481 | 57.1M | dns_qpchunk_t chunk = ref_chunk(ref); |
482 | 57.1M | dns_qpcell_t cell = ref_cell(ref); |
483 | 57.1M | if (chunk == qp->bump) { |
484 | 9.41M | return cell < qp->fender; |
485 | 47.7M | } else { |
486 | 47.7M | return qp->usage[chunk].immutable; |
487 | 47.7M | } |
488 | 57.1M | } |
489 | | |
490 | | /* |
491 | | * Find the next power that is both bigger than size and prev_capacity, |
492 | | * but still within the chunk min and max sizes. |
493 | | */ |
494 | | static dns_qpcell_t |
495 | 408k | next_capacity(uint32_t prev_capacity, uint32_t size) { |
496 | | /* |
497 | | * Request size was floored at 2 because builtin_clz used to be 0. |
498 | | * We keep this behavior because stdc_leading_zeros(0) = 32. |
499 | | */ |
500 | 408k | size = ISC_MAX3(size, prev_capacity, 2U); |
501 | 408k | uint32_t log2 = 32U - stdc_leading_zeros(size - 1U); |
502 | | |
503 | 408k | return 1U << ISC_CLAMP(log2, QP_CHUNK_LOG_MIN, QP_CHUNK_LOG_MAX); |
504 | 408k | } |
505 | | |
506 | | /* |
507 | | * Create a fresh bump chunk and allocate some twigs from it. |
508 | | */ |
509 | | static dns_qpref_t |
510 | 408k | chunk_alloc(dns_qp_t *qp, dns_qpchunk_t chunk, dns_qpweight_t size) { |
511 | 408k | INSIST(qp->base->ptr[chunk] == NULL); |
512 | 408k | INSIST(qp->usage[chunk].used == 0); |
513 | 408k | INSIST(qp->usage[chunk].free == 0); |
514 | 408k | INSIST(qp->chunk_capacity <= QP_CHUNK_SIZE); |
515 | | |
516 | 408k | qp->chunk_capacity = next_capacity(qp->chunk_capacity * 2u, size); |
517 | 408k | qp->base->ptr[chunk] = |
518 | 408k | chunk_get_raw(qp, qp->chunk_capacity * sizeof(dns_qpnode_t)); |
519 | | |
520 | 408k | qp->usage[chunk] = (qp_usage_t){ .exists = true, |
521 | 408k | .used = size, |
522 | 408k | .capacity = qp->chunk_capacity }; |
523 | 408k | qp->used_count += size; |
524 | 408k | qp->bump = chunk; |
525 | 408k | qp->fender = 0; |
526 | | |
527 | 408k | if (qp->write_protect) { |
528 | 0 | TRACE("chunk %u base %p", chunk, qp->base->ptr[chunk]); |
529 | 0 | } |
530 | 408k | return make_ref(chunk, 0); |
531 | 408k | } |
532 | | |
533 | | /* |
534 | | * This is used to grow the chunk arrays when they fill up. If the old |
535 | | * base array is in use by readers, we must make a clone, otherwise we |
536 | | * can reallocate in place. |
537 | | * |
538 | | * The isc_refcount_init() and qpbase_unref() in this function are a pair. |
539 | | */ |
540 | | static void |
541 | 20.4k | realloc_chunk_arrays(dns_qp_t *qp, dns_qpchunk_t newmax) { |
542 | 20.4k | size_t oldptrs = sizeof(qp->base->ptr[0]) * qp->chunk_max; |
543 | 20.4k | size_t newptrs = sizeof(qp->base->ptr[0]) * newmax; |
544 | 20.4k | size_t size = STRUCT_FLEX_SIZE(qp->base, ptr, newmax); |
545 | | |
546 | 20.4k | if (qp->base == NULL || qpbase_unref(qp)) { |
547 | 20.1k | qp->base = isc_mem_reallocate(qp->mctx, qp->base, size); |
548 | 20.1k | } else { |
549 | 342 | dns_qpbase_t *oldbase = qp->base; |
550 | 342 | qp->base = isc_mem_allocate(qp->mctx, size); |
551 | 342 | memmove(&qp->base->ptr[0], &oldbase->ptr[0], oldptrs); |
552 | 342 | } |
553 | 20.4k | memset(&qp->base->ptr[qp->chunk_max], 0, newptrs - oldptrs); |
554 | 20.4k | isc_refcount_init(&qp->base->refcount, 1); |
555 | 20.4k | qp->base->magic = QPBASE_MAGIC; |
556 | | |
557 | | /* usage array is exclusive to the writer */ |
558 | 20.4k | size_t oldusage = sizeof(qp->usage[0]) * qp->chunk_max; |
559 | 20.4k | size_t newusage = sizeof(qp->usage[0]) * newmax; |
560 | 20.4k | qp->usage = isc_mem_reallocate(qp->mctx, qp->usage, newusage); |
561 | 20.4k | memset(&qp->usage[qp->chunk_max], 0, newusage - oldusage); |
562 | | |
563 | 20.4k | qp->chunk_max = newmax; |
564 | | |
565 | 20.4k | TRACE("qpbase %p usage %p max %u", qp->base, qp->usage, qp->chunk_max); |
566 | 20.4k | } |
567 | | |
568 | | /* |
569 | | * There was no space in the bump chunk, so find a place to put a fresh |
570 | | * chunk in the chunk arrays, then allocate some twigs from it. |
571 | | */ |
572 | | static dns_qpref_t |
573 | 408k | alloc_slow(dns_qp_t *qp, dns_qpweight_t size) { |
574 | 408k | dns_qpchunk_t chunk; |
575 | | |
576 | 432M | for (chunk = 0; chunk < qp->chunk_max; chunk++) { |
577 | 432M | if (!qp->usage[chunk].exists) { |
578 | 387k | return chunk_alloc(qp, chunk, size); |
579 | 387k | } |
580 | 432M | } |
581 | 20.4k | ENSURE(chunk == qp->chunk_max); |
582 | 20.4k | realloc_chunk_arrays(qp, GROWTH_FACTOR(chunk)); |
583 | 20.4k | return chunk_alloc(qp, chunk, size); |
584 | 408k | } |
585 | | |
586 | | /* |
587 | | * Ensure we are using a fresh bump chunk. |
588 | | */ |
589 | | static void |
590 | 21.0k | alloc_reset(dns_qp_t *qp) { |
591 | 21.0k | (void)alloc_slow(qp, 0); |
592 | 21.0k | } |
593 | | |
594 | | /* |
595 | | * Allocate some fresh twigs. This is the bump allocator fast path. |
596 | | */ |
597 | | static inline dns_qpref_t |
598 | 8.05M | alloc_twigs(dns_qp_t *qp, dns_qpweight_t size) { |
599 | 8.05M | dns_qpchunk_t chunk = qp->bump; |
600 | 8.05M | dns_qpcell_t cell = qp->usage[chunk].used; |
601 | | |
602 | 8.05M | if (cell + size <= qp->usage[chunk].capacity) { |
603 | 7.66M | qp->usage[chunk].used += size; |
604 | 7.66M | qp->used_count += size; |
605 | 7.66M | return make_ref(chunk, cell); |
606 | 7.66M | } else { |
607 | 387k | return alloc_slow(qp, size); |
608 | 387k | } |
609 | 8.05M | } |
610 | | |
611 | | /* |
612 | | * Record that some twigs are no longer being used, and if possible |
613 | | * zero them to ensure that there isn't a spurious double detach when |
614 | | * the chunk is later recycled. |
615 | | * |
616 | | * Returns true if the twigs were immediately destroyed. |
617 | | * |
618 | | * NOTE: the caller is responsible for attaching or detaching any |
619 | | * leaves as required. |
620 | | */ |
621 | | static inline bool |
622 | 6.37M | free_twigs(dns_qp_t *qp, dns_qpref_t twigs, dns_qpweight_t size) { |
623 | 6.37M | dns_qpchunk_t chunk = ref_chunk(twigs); |
624 | | |
625 | 6.37M | qp->free_count += size; |
626 | 6.37M | qp->usage[chunk].free += size; |
627 | 6.37M | ENSURE(qp->free_count <= qp->used_count); |
628 | 6.37M | ENSURE(qp->usage[chunk].free <= qp->usage[chunk].used); |
629 | | |
630 | 6.37M | if (cells_immutable(qp, twigs)) { |
631 | 33.6k | qp->hold_count += size; |
632 | 33.6k | ENSURE(qp->free_count >= qp->hold_count); |
633 | 33.6k | return false; |
634 | 6.33M | } else { |
635 | 6.33M | zero_twigs(ref_ptr(qp, twigs), size); |
636 | 6.33M | return true; |
637 | 6.33M | } |
638 | 6.37M | } |
639 | | |
640 | | /* |
641 | | * When some twigs have been copied, and free_twigs() could not |
642 | | * immediately destroy the old copy, we need to update the refcount |
643 | | * on any leaves that were duplicated. |
644 | | */ |
645 | | static void |
646 | 15.2k | attach_twigs(dns_qp_t *qp, dns_qpnode_t *twigs, dns_qpweight_t size) { |
647 | 51.8k | for (dns_qpweight_t pos = 0; pos < size; pos++) { |
648 | 36.6k | if (node_tag(&twigs[pos]) == LEAF_TAG) { |
649 | 27.3k | attach_leaf(qp, &twigs[pos]); |
650 | 27.3k | } |
651 | 36.6k | } |
652 | 15.2k | } |
653 | | |
654 | | /*********************************************************************** |
655 | | * |
656 | | * chunk reclamation |
657 | | */ |
658 | | |
659 | | /* |
660 | | * Is any of this chunk still in use? |
661 | | */ |
662 | | static inline dns_qpcell_t |
663 | 3.86M | chunk_usage(dns_qp_t *qp, dns_qpchunk_t chunk) { |
664 | 3.86M | return qp->usage[chunk].used - qp->usage[chunk].free; |
665 | 3.86M | } |
666 | | |
667 | | /* |
668 | | * We remove each empty chunk from the total counts when the chunk is |
669 | | * freed, or when it is scheduled for safe memory reclamation. We check |
670 | | * the chunk's phase to avoid discounting it twice in the latter case. |
671 | | */ |
672 | | static void |
673 | 408k | chunk_discount(dns_qp_t *qp, dns_qpchunk_t chunk) { |
674 | 408k | if (qp->usage[chunk].discounted) { |
675 | 320 | return; |
676 | 320 | } |
677 | 408k | INSIST(qp->used_count >= qp->usage[chunk].used); |
678 | 408k | INSIST(qp->free_count >= qp->usage[chunk].free); |
679 | 408k | qp->used_count -= qp->usage[chunk].used; |
680 | 408k | qp->free_count -= qp->usage[chunk].free; |
681 | 408k | qp->usage[chunk].discounted = true; |
682 | 408k | } |
683 | | |
684 | | /* |
685 | | * When a chunk is being recycled, we need to detach any leaves that |
686 | | * remain, and free any `base` arrays that have been marked as unused. |
687 | | */ |
688 | | static void |
689 | 408k | chunk_free(dns_qp_t *qp, dns_qpchunk_t chunk) { |
690 | 408k | if (qp->write_protect) { |
691 | 0 | TRACE("chunk %u base %p", chunk, qp->base->ptr[chunk]); |
692 | 0 | } |
693 | | |
694 | 408k | dns_qpnode_t *n = qp->base->ptr[chunk]; |
695 | 48.5M | for (dns_qpcell_t count = qp->usage[chunk].used; count > 0; |
696 | 48.1M | count--, n++) |
697 | 48.1M | { |
698 | 48.1M | if (node_tag(n) == LEAF_TAG && node_pointer(n) != NULL) { |
699 | 6.74M | detach_leaf(qp, n); |
700 | 41.3M | } else if (count > 1 && reader_valid(n)) { |
701 | 36.8k | dns_qpreader_t qpr; |
702 | 36.8k | unpack_reader(&qpr, n); |
703 | | /* pairs with dns_qpmulti_commit() */ |
704 | 36.8k | if (qpbase_unref(&qpr)) { |
705 | 342 | isc_mem_free(qp->mctx, qpr.base); |
706 | 342 | } |
707 | 36.8k | } |
708 | 48.1M | } |
709 | 408k | chunk_discount(qp, chunk); |
710 | 408k | chunk_free_raw(qp, qp->base->ptr[chunk]); |
711 | 408k | qp->base->ptr[chunk] = NULL; |
712 | 408k | qp->usage[chunk] = (qp_usage_t){}; |
713 | 408k | } |
714 | | |
715 | | /* |
716 | | * Free any chunks that we can while a trie is in use. |
717 | | */ |
718 | | static void |
719 | 3.11k | recycle(dns_qp_t *qp) { |
720 | 3.11k | unsigned int nfree = 0; |
721 | | |
722 | 3.11k | isc_nanosecs_t start = isc_time_monotonic(); |
723 | | |
724 | 644k | for (dns_qpchunk_t chunk = 0; chunk < qp->chunk_max; chunk++) { |
725 | 641k | if (chunk != qp->bump && chunk_usage(qp, chunk) == 0 && |
726 | 405k | qp->usage[chunk].exists && !qp->usage[chunk].immutable) |
727 | 278k | { |
728 | 278k | chunk_free(qp, chunk); |
729 | 278k | nfree++; |
730 | 278k | } |
731 | 641k | } |
732 | | |
733 | 3.11k | isc_nanosecs_t time = isc_time_monotonic() - start; |
734 | 3.11k | atomic_fetch_add_relaxed(&recycle_time, time); |
735 | | |
736 | 3.11k | if (nfree > 0) { |
737 | 3.11k | LOG_STATS("qp recycle" PRItime "free %u chunks", time, nfree); |
738 | 3.11k | LOG_STATS("qp recycle leaf %u live %u used %u free %u hold %u", |
739 | 3.11k | qp->leaf_count, qp->used_count - qp->free_count, |
740 | 3.11k | qp->used_count, qp->free_count, qp->hold_count); |
741 | 3.11k | } |
742 | 3.11k | } |
743 | | |
744 | | /* |
745 | | * asynchronous cleanup |
746 | | */ |
747 | | static void |
748 | 320 | reclaim_chunks_cb(struct rcu_head *arg) { |
749 | 320 | qp_rcuctx_t *rcuctx = caa_container_of(arg, qp_rcuctx_t, rcu_head); |
750 | 320 | REQUIRE(QPRCU_VALID(rcuctx)); |
751 | 320 | dns_qpmulti_t *multi = rcuctx->multi; |
752 | 320 | REQUIRE(QPMULTI_VALID(multi)); |
753 | | |
754 | 320 | LOCK(&multi->mutex); |
755 | 320 | dns_qp_t *qp = &multi->writer; |
756 | | |
757 | | /* |
758 | | * If chunk_max is zero, chunks have already been freed. |
759 | | */ |
760 | 320 | if (qp->chunk_max != 0) { |
761 | 320 | unsigned int nfree = 0; |
762 | 320 | isc_nanosecs_t start = isc_time_monotonic(); |
763 | | |
764 | 320 | INSIST(QP_VALID(qp)); |
765 | | |
766 | 640 | for (unsigned int i = 0; i < rcuctx->count; i++) { |
767 | 320 | dns_qpchunk_t chunk = rcuctx->chunk[i]; |
768 | 320 | if (qp->usage[chunk].snapshot) { |
769 | | /* clean up when snapshot is destroyed */ |
770 | 0 | qp->usage[chunk].snapfree = true; |
771 | 320 | } else { |
772 | 320 | chunk_free(qp, chunk); |
773 | 320 | nfree++; |
774 | 320 | } |
775 | 320 | } |
776 | | |
777 | 320 | isc_nanosecs_t time = isc_time_monotonic() - start; |
778 | 320 | recycle_time += time; |
779 | | |
780 | 320 | if (nfree > 0) { |
781 | 320 | LOG_STATS("qp reclaim" PRItime "free %u chunks", time, |
782 | 320 | nfree); |
783 | 320 | LOG_STATS( |
784 | 320 | "qp reclaim leaf %u live %u used %u free %u " |
785 | 320 | "hold %u", |
786 | 320 | qp->leaf_count, qp->used_count - qp->free_count, |
787 | 320 | qp->used_count, qp->free_count, qp->hold_count); |
788 | 320 | } |
789 | 320 | } |
790 | | |
791 | 320 | UNLOCK(&multi->mutex); |
792 | | |
793 | 320 | dns_qpmulti_detach(&multi); |
794 | 320 | isc_mem_putanddetach(&rcuctx->mctx, rcuctx, |
795 | 320 | STRUCT_FLEX_SIZE(rcuctx, chunk, rcuctx->count)); |
796 | 320 | } |
797 | | |
798 | | /* |
799 | | * At the end of a transaction, schedule empty but immutable chunks |
800 | | * for reclamation later. |
801 | | */ |
802 | | static void |
803 | 36.8k | reclaim_chunks(dns_qpmulti_t *multi) { |
804 | 36.8k | dns_qp_t *qp = &multi->writer; |
805 | | |
806 | 36.8k | unsigned int count = 0; |
807 | 275k | for (dns_qpchunk_t chunk = 0; chunk < qp->chunk_max; chunk++) { |
808 | 238k | if (chunk != qp->bump && chunk_usage(qp, chunk) == 0 && |
809 | 94.6k | qp->usage[chunk].exists && qp->usage[chunk].immutable && |
810 | 320 | !qp->usage[chunk].discounted) |
811 | 320 | { |
812 | 320 | count++; |
813 | 320 | } |
814 | 238k | } |
815 | | |
816 | 36.8k | if (count == 0) { |
817 | 36.5k | return; |
818 | 36.5k | } |
819 | | |
820 | 320 | qp_rcuctx_t *rcuctx = |
821 | 320 | isc_mem_get(qp->mctx, STRUCT_FLEX_SIZE(rcuctx, chunk, count)); |
822 | 320 | *rcuctx = (qp_rcuctx_t){ |
823 | 320 | .magic = QPRCU_MAGIC, |
824 | 320 | .multi = multi, |
825 | 320 | .count = count, |
826 | 320 | }; |
827 | 320 | isc_mem_attach(qp->mctx, &rcuctx->mctx); |
828 | | |
829 | 320 | unsigned int i = 0; |
830 | 163k | for (dns_qpchunk_t chunk = 0; chunk < qp->chunk_max; chunk++) { |
831 | 162k | if (chunk != qp->bump && chunk_usage(qp, chunk) == 0 && |
832 | 57.7k | qp->usage[chunk].exists && qp->usage[chunk].immutable && |
833 | 320 | !qp->usage[chunk].discounted) |
834 | 320 | { |
835 | 320 | rcuctx->chunk[i++] = chunk; |
836 | 320 | chunk_discount(qp, chunk); |
837 | 320 | } |
838 | 162k | } |
839 | | |
840 | | /* |
841 | | * Reference the qpmulti object to keep it from being |
842 | | * freed until reclaim_chunks_cb() runs. |
843 | | */ |
844 | 320 | dns_qpmulti_ref(multi); |
845 | 320 | call_rcu(&rcuctx->rcu_head, reclaim_chunks_cb); |
846 | | |
847 | 320 | LOG_STATS("qp will reclaim %u chunks", count); |
848 | 320 | } |
849 | | |
850 | | /* |
851 | | * When a snapshot is destroyed, clean up chunks that need free()ing |
852 | | * and are not used by any remaining snapshots. |
853 | | */ |
854 | | static void |
855 | 0 | marksweep_chunks(dns_qpmulti_t *multi) { |
856 | 0 | unsigned int nfree = 0; |
857 | |
|
858 | 0 | isc_nanosecs_t start = isc_time_monotonic(); |
859 | |
|
860 | 0 | dns_qp_t *qpw = &multi->writer; |
861 | |
|
862 | 0 | ISC_LIST_FOREACH(multi->snapshots, qps, link) { |
863 | 0 | for (dns_qpchunk_t chunk = 0; chunk < qps->chunk_max; chunk++) { |
864 | 0 | if (qps->base->ptr[chunk] != NULL) { |
865 | 0 | INSIST(qps->base->ptr[chunk] == |
866 | 0 | qpw->base->ptr[chunk]); |
867 | 0 | qpw->usage[chunk].snapmark = true; |
868 | 0 | } |
869 | 0 | } |
870 | 0 | } |
871 | |
|
872 | 0 | for (dns_qpchunk_t chunk = 0; chunk < qpw->chunk_max; chunk++) { |
873 | 0 | qpw->usage[chunk].snapshot = qpw->usage[chunk].snapmark; |
874 | 0 | qpw->usage[chunk].snapmark = false; |
875 | 0 | if (qpw->usage[chunk].snapfree && !qpw->usage[chunk].snapshot) { |
876 | 0 | chunk_free(qpw, chunk); |
877 | 0 | nfree++; |
878 | 0 | } |
879 | 0 | } |
880 | |
|
881 | 0 | isc_nanosecs_t time = isc_time_monotonic() - start; |
882 | 0 | recycle_time += time; |
883 | |
|
884 | 0 | if (nfree > 0) { |
885 | 0 | LOG_STATS("qp marksweep" PRItime "free %u chunks", time, nfree); |
886 | 0 | LOG_STATS( |
887 | 0 | "qp marksweep leaf %u live %u used %u free %u hold %u", |
888 | 0 | qpw->leaf_count, qpw->used_count - qpw->free_count, |
889 | 0 | qpw->used_count, qpw->free_count, qpw->hold_count); |
890 | 0 | } |
891 | 0 | } |
892 | | |
893 | | /*********************************************************************** |
894 | | * |
895 | | * garbage collector |
896 | | */ |
897 | | |
898 | | /* |
899 | | * Move a branch node's twigs to the `bump` chunk, for copy-on-write |
900 | | * or for garbage collection. We don't update the node in place |
901 | | * because `compact_recursive()` does not ensure the node itself is |
902 | | * mutable until after it discovers evacuation was necessary. |
903 | | * |
904 | | * If free_twigs() could not immediately destroy the old twigs, we have |
905 | | * to re-attach to any leaves. |
906 | | */ |
907 | | static dns_qpref_t |
908 | 906k | evacuate(dns_qp_t *qp, dns_qpnode_t *n) { |
909 | 906k | dns_qpweight_t size = branch_twigs_size(n); |
910 | 906k | dns_qpref_t old_ref = branch_twigs_ref(n); |
911 | 906k | dns_qpref_t new_ref = alloc_twigs(qp, size); |
912 | 906k | dns_qpnode_t *old_twigs = ref_ptr(qp, old_ref); |
913 | 906k | dns_qpnode_t *new_twigs = ref_ptr(qp, new_ref); |
914 | | |
915 | 906k | move_twigs(new_twigs, old_twigs, size); |
916 | 906k | if (!free_twigs(qp, old_ref, size)) { |
917 | 14.7k | attach_twigs(qp, new_twigs, size); |
918 | 14.7k | } |
919 | | |
920 | 906k | return new_ref; |
921 | 906k | } |
922 | | |
923 | | /* |
924 | | * Immutable nodes need copy-on-write. As we walk down the trie finding the |
925 | | * right place to modify, make_root_mutable() and make_twigs_mutable() |
926 | | * are called to ensure that immutable nodes on the path from the root are |
927 | | * copied to a mutable chunk. |
928 | | */ |
929 | | |
930 | | static inline dns_qpnode_t * |
931 | 7.09M | make_root_mutable(dns_qp_t *qp) { |
932 | 7.09M | if (cells_immutable(qp, qp->root_ref)) { |
933 | 6.23k | qp->root_ref = evacuate(qp, MOVABLE_ROOT(qp)); |
934 | 6.23k | } |
935 | 7.09M | return ref_ptr(qp, qp->root_ref); |
936 | 7.09M | } |
937 | | |
938 | | static inline void |
939 | 40.8M | make_twigs_mutable(dns_qp_t *qp, dns_qpnode_t *n) { |
940 | 40.8M | if (cells_immutable(qp, branch_twigs_ref(n))) { |
941 | 7.54k | *n = make_node(branch_index(n), evacuate(qp, n)); |
942 | 7.54k | } |
943 | 40.8M | } |
944 | | |
945 | | /* |
946 | | * Compact the trie by traversing the whole thing recursively, copying |
947 | | * bottom-up as required. The aim is to avoid evacuation as much as |
948 | | * possible, but when parts of the trie are immutable, we need to evacuate |
949 | | * the paths from the root to the parts of the trie that occupy |
950 | | * fragmented chunks. |
951 | | * |
952 | | * Without the QP_MIN_USED check, the algorithm will leave the trie |
953 | | * unchanged. If the children are all leaves, the loop changes nothing, |
954 | | * so we will return this node's original ref. If all of the children |
955 | | * that are branches did not need moving, again, the loop changes |
956 | | * nothing. So the evacuation check is the only place that the |
957 | | * algorithm introduces ref changes, that then bubble up towards the |
958 | | * root through the logic inside the loop. |
959 | | */ |
960 | | static dns_qpref_t |
961 | 2.86M | compact_recursive(dns_qp_t *qp, dns_qpnode_t *parent) { |
962 | 2.86M | dns_qpweight_t size = branch_twigs_size(parent); |
963 | 2.86M | dns_qpref_t twigs_ref = branch_twigs_ref(parent); |
964 | 2.86M | dns_qpchunk_t chunk = ref_chunk(twigs_ref); |
965 | | |
966 | 2.86M | if (qp->compact_all || |
967 | 2.86M | (chunk != qp->bump && chunk_usage(qp, chunk) < QP_MIN_USED)) |
968 | 892k | { |
969 | 892k | twigs_ref = evacuate(qp, parent); |
970 | 892k | } |
971 | 2.86M | bool immutable = cells_immutable(qp, twigs_ref); |
972 | 30.9M | for (dns_qpweight_t pos = 0; pos < size; pos++) { |
973 | 28.0M | dns_qpnode_t *child = ref_ptr(qp, twigs_ref) + pos; |
974 | 28.0M | if (!is_branch(child)) { |
975 | 25.1M | continue; |
976 | 25.1M | } |
977 | 2.86M | dns_qpref_t old_grandtwigs = branch_twigs_ref(child); |
978 | 2.86M | dns_qpref_t new_grandtwigs = compact_recursive(qp, child); |
979 | 2.86M | if (old_grandtwigs == new_grandtwigs) { |
980 | 1.97M | continue; |
981 | 1.97M | } |
982 | 892k | if (immutable) { |
983 | 109 | twigs_ref = evacuate(qp, parent); |
984 | | /* the twigs have moved */ |
985 | 109 | child = ref_ptr(qp, twigs_ref) + pos; |
986 | 109 | immutable = false; |
987 | 109 | } |
988 | 892k | *child = make_node(branch_index(child), new_grandtwigs); |
989 | 892k | } |
990 | 2.86M | return twigs_ref; |
991 | 2.86M | } |
992 | | |
993 | | static void |
994 | 3.11k | compact(dns_qp_t *qp) { |
995 | 3.11k | LOG_STATS("qp compact before leaf %u live %u used %u free %u hold %u", |
996 | 3.11k | qp->leaf_count, qp->used_count - qp->free_count, |
997 | 3.11k | qp->used_count, qp->free_count, qp->hold_count); |
998 | | |
999 | 3.11k | isc_nanosecs_t start = isc_time_monotonic(); |
1000 | | |
1001 | 3.11k | if (qp->usage[qp->bump].free > QP_MAX_FREE) { |
1002 | 2.47k | alloc_reset(qp); |
1003 | 2.47k | } |
1004 | | |
1005 | 3.11k | if (qp->leaf_count > 0) { |
1006 | 3.11k | qp->root_ref = compact_recursive(qp, MOVABLE_ROOT(qp)); |
1007 | 3.11k | } |
1008 | 3.11k | qp->compact_all = false; |
1009 | | |
1010 | 3.11k | isc_nanosecs_t time = isc_time_monotonic() - start; |
1011 | 3.11k | atomic_fetch_add_relaxed(&compact_time, time); |
1012 | | |
1013 | 3.11k | LOG_STATS("qp compact" PRItime |
1014 | 3.11k | "leaf %u live %u used %u free %u hold %u", |
1015 | 3.11k | time, qp->leaf_count, qp->used_count - qp->free_count, |
1016 | 3.11k | qp->used_count, qp->free_count, qp->hold_count); |
1017 | 3.11k | } |
1018 | | |
1019 | | void |
1020 | 18.4k | dns_qp_compact(dns_qp_t *qp, dns_qpgc_t mode) { |
1021 | 18.4k | REQUIRE(QP_VALID(qp)); |
1022 | 18.4k | if (mode == DNS_QPGC_MAYBE && !QP_NEEDGC(qp)) { |
1023 | 18.3k | return; |
1024 | 18.3k | } |
1025 | 33 | if (mode == DNS_QPGC_ALL) { |
1026 | 0 | alloc_reset(qp); |
1027 | 0 | qp->compact_all = true; |
1028 | 0 | } |
1029 | 33 | compact(qp); |
1030 | 33 | recycle(qp); |
1031 | 33 | } |
1032 | | |
1033 | | /* |
1034 | | * Free some twigs and (if they were destroyed immediately so that the |
1035 | | * result from QP_MAX_GARBAGE can change) compact the trie if necessary. |
1036 | | * |
1037 | | * This is called by the trie modification API entry points. The |
1038 | | * free_twigs() function requires the caller to attach or detach any |
1039 | | * leaves as necessary. Callers of squash_twigs() satisfy this |
1040 | | * requirement by calling make_twigs_mutable(). |
1041 | | * |
1042 | | * Aside: In typical garbage collectors, compaction is triggered when |
1043 | | * the allocator runs out of space. But that is because typical garbage |
1044 | | * collectors do not know how much memory can be recovered, so they must |
1045 | | * find out by scanning the heap. The qp-trie code was originally |
1046 | | * designed to use malloc() and free(), so it has more information about |
1047 | | * when garbage collection might be worthwhile. Hence we can trigger |
1048 | | * collection when garbage passes a threshold. |
1049 | | * |
1050 | | * XXXFANF: If we need to avoid latency outliers caused by compaction in |
1051 | | * write transactions, we can check qp->transaction_mode here. |
1052 | | */ |
1053 | | static inline bool |
1054 | 5.05M | squash_twigs(dns_qp_t *qp, dns_qpref_t twigs, dns_qpweight_t size) { |
1055 | 5.05M | bool destroyed = free_twigs(qp, twigs, size); |
1056 | 5.05M | if (destroyed && QP_AUTOGC(qp)) { |
1057 | 3.07k | compact(qp); |
1058 | 3.07k | recycle(qp); |
1059 | | /* |
1060 | | * This shouldn't happen if the garbage collector is |
1061 | | * working correctly. We can recover at the cost of some |
1062 | | * time and space, but recovery should be cheaper than |
1063 | | * letting compact+recycle fail repeatedly. |
1064 | | */ |
1065 | 3.07k | if (QP_AUTOGC(qp)) { |
1066 | 0 | isc_log_write(DNS_LOGCATEGORY_DATABASE, |
1067 | 0 | DNS_LOGMODULE_QP, ISC_LOG_NOTICE, |
1068 | 0 | "qp %p uctx \"%s\" compact/recycle " |
1069 | 0 | "failed to recover any space, " |
1070 | 0 | "scheduling a full compaction", |
1071 | 0 | qp, TRIENAME(qp)); |
1072 | 0 | qp->compact_all = true; |
1073 | 0 | } |
1074 | 3.07k | } |
1075 | 5.05M | return destroyed; |
1076 | 5.05M | } |
1077 | | |
1078 | | /*********************************************************************** |
1079 | | * |
1080 | | * public accessors for memory management internals |
1081 | | */ |
1082 | | |
1083 | | dns_qp_memusage_t |
1084 | 2 | dns_qp_memusage(dns_qp_t *qp) { |
1085 | 2 | REQUIRE(QP_VALID(qp)); |
1086 | | |
1087 | 2 | dns_qp_memusage_t memusage = { |
1088 | 2 | .uctx = qp->uctx, |
1089 | 2 | .leaves = qp->leaf_count, |
1090 | 2 | .live = qp->used_count - qp->free_count, |
1091 | 2 | .used = qp->used_count, |
1092 | 2 | .hold = qp->hold_count, |
1093 | 2 | .free = qp->free_count, |
1094 | 2 | .node_size = sizeof(dns_qpnode_t), |
1095 | 2 | .fragmented = QP_NEEDGC(qp), |
1096 | 2 | }; |
1097 | | |
1098 | 2 | size_t chunk_usage_bytes = 0; |
1099 | 6 | for (dns_qpchunk_t chunk = 0; chunk < qp->chunk_max; chunk++) { |
1100 | 4 | if (qp->base->ptr[chunk] != NULL) { |
1101 | 2 | chunk_usage_bytes += qp->usage[chunk].capacity; |
1102 | 2 | memusage.chunk_count += 1; |
1103 | 2 | } |
1104 | 4 | } |
1105 | | |
1106 | | /* |
1107 | | * XXXFANF does not subtract chunks that have been shrunk, |
1108 | | * and does not count unreclaimed dns_qpbase_t objects |
1109 | | */ |
1110 | 2 | memusage.bytes = chunk_usage_bytes + |
1111 | 2 | qp->chunk_max * sizeof(qp->base->ptr[0]) + |
1112 | 2 | qp->chunk_max * sizeof(qp->usage[0]); |
1113 | | |
1114 | 2 | return memusage; |
1115 | 2 | } |
1116 | | |
1117 | | dns_qp_memusage_t |
1118 | 2 | dns_qpmulti_memusage(dns_qpmulti_t *multi) { |
1119 | 2 | REQUIRE(QPMULTI_VALID(multi)); |
1120 | 2 | LOCK(&multi->mutex); |
1121 | | |
1122 | 2 | dns_qp_t *qp = &multi->writer; |
1123 | 2 | INSIST(QP_VALID(qp)); |
1124 | | |
1125 | 2 | dns_qp_memusage_t memusage = dns_qp_memusage(qp); |
1126 | | |
1127 | 2 | if (qp->transaction_mode == QP_UPDATE && qp->usage != NULL) { |
1128 | 0 | memusage.bytes -= qp->usage[qp->bump].capacity; |
1129 | 0 | memusage.bytes += qp->usage[qp->bump].used * |
1130 | 0 | sizeof(dns_qpnode_t); |
1131 | 0 | } |
1132 | | |
1133 | 2 | UNLOCK(&multi->mutex); |
1134 | 2 | return memusage; |
1135 | 2 | } |
1136 | | |
1137 | | void |
1138 | | dns_qp_gctime(isc_nanosecs_t *compact_p, isc_nanosecs_t *recycle_p, |
1139 | 0 | isc_nanosecs_t *rollback_p) { |
1140 | 0 | *compact_p = atomic_load_relaxed(&compact_time); |
1141 | 0 | *recycle_p = atomic_load_relaxed(&recycle_time); |
1142 | 0 | *rollback_p = atomic_load_relaxed(&rollback_time); |
1143 | 0 | } |
1144 | | |
1145 | | /*********************************************************************** |
1146 | | * |
1147 | | * read-write transactions |
1148 | | */ |
1149 | | |
1150 | | static dns_qp_t * |
1151 | 36.8k | transaction_open(dns_qpmulti_t *multi, dns_qp_t **qptp) { |
1152 | 36.8k | REQUIRE(QPMULTI_VALID(multi)); |
1153 | 36.8k | REQUIRE(qptp != NULL && *qptp == NULL); |
1154 | | |
1155 | 36.8k | LOCK(&multi->mutex); |
1156 | | |
1157 | 36.8k | dns_qp_t *qp = &multi->writer; |
1158 | 36.8k | INSIST(QP_VALID(qp)); |
1159 | | |
1160 | | /* |
1161 | | * Mark existing chunks as immutable. |
1162 | | * |
1163 | | * Aside: The bump chunk is special: in a series of write |
1164 | | * transactions the bump chunk is reused; the first part (up |
1165 | | * to fender) is immutable, the rest mutable. But we set its |
1166 | | * immutable flag so that when the bump chunk fills up, the |
1167 | | * first part continues to be treated as immutable. (And the |
1168 | | * rest of the chunk too, but that's OK.) |
1169 | | */ |
1170 | 73.6k | for (dns_qpchunk_t chunk = 0; chunk < qp->chunk_max; chunk++) { |
1171 | 36.8k | if (qp->usage[chunk].exists) { |
1172 | 18.4k | qp->usage[chunk].immutable = true; |
1173 | 18.4k | write_protect(qp, chunk); |
1174 | 18.4k | } |
1175 | 36.8k | } |
1176 | | |
1177 | | /* |
1178 | | * Ensure QP_AUTOGC() ignores free space in immutable chunks. |
1179 | | */ |
1180 | 36.8k | qp->hold_count = qp->free_count; |
1181 | | |
1182 | 36.8k | *qptp = qp; |
1183 | 36.8k | return qp; |
1184 | 36.8k | } |
1185 | | |
1186 | | /* |
1187 | | * a write is light |
1188 | | * |
1189 | | * We need to ensure we allocate from a fresh chunk if the last transaction |
1190 | | * shrunk the bump chunk; but usually in a sequence of write transactions |
1191 | | * we just put `fender` at the point where we started this generation. |
1192 | | * |
1193 | | * (Aside: Instead of keeping the previous transaction's mode, I |
1194 | | * considered forcing allocation into the slow path by fiddling with |
1195 | | * the bump chunk's usage counters. But that is troublesome because |
1196 | | * `chunk_free()` needs to know how much of the chunk to scan.) |
1197 | | */ |
1198 | | void |
1199 | 36.8k | dns_qpmulti_write(dns_qpmulti_t *multi, dns_qp_t **qptp) { |
1200 | 36.8k | dns_qp_t *qp = transaction_open(multi, qptp); |
1201 | 36.8k | TRACE(""); |
1202 | | |
1203 | 36.8k | if (qp->transaction_mode == QP_WRITE) { |
1204 | 18.4k | qp->fender = qp->usage[qp->bump].used; |
1205 | 18.4k | } else { |
1206 | 18.4k | alloc_reset(qp); |
1207 | 18.4k | } |
1208 | 36.8k | qp->transaction_mode = QP_WRITE; |
1209 | 36.8k | } |
1210 | | |
1211 | | /* |
1212 | | * an update is heavier |
1213 | | * |
1214 | | * We always reset the allocator to the start of a fresh chunk, |
1215 | | * because the previous transaction was probably an update that shrunk |
1216 | | * the bump chunk. It simplifies rollback because `fender` is always zero. |
1217 | | * |
1218 | | * To rollback a transaction, we need to reset all the allocation |
1219 | | * counters to their previous state, in particular we need to un-free |
1220 | | * any nodes that were copied to make them mutable. This means we need |
1221 | | * to make a copy of basically the whole `dns_qp_t writer`: everything |
1222 | | * but the chunks holding the trie nodes. |
1223 | | * |
1224 | | * We do most of the transaction setup before creating the rollback |
1225 | | * state so that after rollback we have a correct idea of which chunks |
1226 | | * are immutable, and so we have the correct transaction mode to make |
1227 | | * the next transaction allocate a new bump chunk. The exception is |
1228 | | * resetting the allocator, which we do after creating the rollback |
1229 | | * state; if this transaction is rolled back then the next transaction |
1230 | | * will start from the rollback state and also reset the allocator as |
1231 | | * one of its first actions. |
1232 | | */ |
1233 | | void |
1234 | 0 | dns_qpmulti_update(dns_qpmulti_t *multi, dns_qp_t **qptp) { |
1235 | 0 | dns_qp_t *qp = transaction_open(multi, qptp); |
1236 | 0 | TRACE(""); |
1237 | |
|
1238 | 0 | qp->transaction_mode = QP_UPDATE; |
1239 | |
|
1240 | 0 | dns_qp_t *rollback = isc_mem_allocate(qp->mctx, sizeof(*rollback)); |
1241 | 0 | memmove(rollback, qp, sizeof(*rollback)); |
1242 | | /* can be uninitialized on the first transaction */ |
1243 | 0 | if (rollback->base != NULL) { |
1244 | 0 | INSIST(QPBASE_VALID(rollback->base)); |
1245 | 0 | INSIST(qp->usage != NULL && qp->chunk_max > 0); |
1246 | | /* paired with either _commit() or _rollback() */ |
1247 | 0 | isc_refcount_increment(&rollback->base->refcount); |
1248 | 0 | size_t usage_bytes = sizeof(qp->usage[0]) * qp->chunk_max; |
1249 | 0 | rollback->usage = isc_mem_allocate(qp->mctx, usage_bytes); |
1250 | 0 | memmove(rollback->usage, qp->usage, usage_bytes); |
1251 | 0 | } |
1252 | 0 | INSIST(multi->rollback == NULL); |
1253 | 0 | multi->rollback = rollback; |
1254 | |
|
1255 | 0 | alloc_reset(qp); |
1256 | 0 | } |
1257 | | |
1258 | | void |
1259 | 36.8k | dns_qpmulti_commit(dns_qpmulti_t *multi, dns_qp_t **qptp) { |
1260 | 36.8k | REQUIRE(QPMULTI_VALID(multi)); |
1261 | 36.8k | REQUIRE(qptp != NULL && *qptp == &multi->writer); |
1262 | 36.8k | REQUIRE(multi->writer.transaction_mode == QP_WRITE || |
1263 | 36.8k | multi->writer.transaction_mode == QP_UPDATE); |
1264 | | |
1265 | 36.8k | dns_qp_t *qp = *qptp; |
1266 | 36.8k | TRACE(""); |
1267 | | |
1268 | 36.8k | if (qp->transaction_mode == QP_UPDATE) { |
1269 | 0 | INSIST(multi->rollback != NULL); |
1270 | | /* paired with dns_qpmulti_update() */ |
1271 | 0 | if (qpbase_unref(multi->rollback)) { |
1272 | 0 | isc_mem_free(qp->mctx, multi->rollback->base); |
1273 | 0 | } |
1274 | 0 | if (multi->rollback->usage != NULL) { |
1275 | 0 | isc_mem_free(qp->mctx, multi->rollback->usage); |
1276 | 0 | } |
1277 | 0 | isc_mem_free(qp->mctx, multi->rollback); |
1278 | 0 | } |
1279 | 36.8k | INSIST(multi->rollback == NULL); |
1280 | | |
1281 | | /* not the first commit? */ |
1282 | 36.8k | if (multi->reader_ref != INVALID_REF) { |
1283 | 18.4k | INSIST(cells_immutable(qp, multi->reader_ref)); |
1284 | 18.4k | free_twigs(qp, multi->reader_ref, READER_SIZE); |
1285 | 18.4k | } |
1286 | | |
1287 | 36.8k | if (qp->transaction_mode == QP_UPDATE) { |
1288 | | /* minimize memory overhead */ |
1289 | 0 | compact(qp); |
1290 | 0 | multi->reader_ref = alloc_twigs(qp, READER_SIZE); |
1291 | 0 | qp->base->ptr[qp->bump] = chunk_shrink_raw( |
1292 | 0 | qp, qp->base->ptr[qp->bump], |
1293 | 0 | qp->usage[qp->bump].used * sizeof(dns_qpnode_t)); |
1294 | 36.8k | } else { |
1295 | 36.8k | multi->reader_ref = alloc_twigs(qp, READER_SIZE); |
1296 | 36.8k | } |
1297 | | |
1298 | | /* anchor a new version of the trie */ |
1299 | 36.8k | dns_qpnode_t *reader = ref_ptr(qp, multi->reader_ref); |
1300 | 36.8k | make_reader(reader, multi); |
1301 | | /* paired with chunk_free() */ |
1302 | 36.8k | isc_refcount_increment(&qp->base->refcount); |
1303 | | |
1304 | 36.8k | rcu_assign_pointer(multi->reader, reader); /* COMMIT */ |
1305 | | |
1306 | | /* clean up what we can right now */ |
1307 | 36.8k | if (qp->transaction_mode == QP_UPDATE || QP_NEEDGC(qp)) { |
1308 | 5 | recycle(qp); |
1309 | 5 | } |
1310 | | |
1311 | | /* schedule the rest for later */ |
1312 | 36.8k | reclaim_chunks(multi); |
1313 | | |
1314 | 36.8k | *qptp = NULL; |
1315 | 36.8k | UNLOCK(&multi->mutex); |
1316 | 36.8k | } |
1317 | | |
1318 | | /* |
1319 | | * Throw away everything that was allocated during this transaction. |
1320 | | */ |
1321 | | void |
1322 | 0 | dns_qpmulti_rollback(dns_qpmulti_t *multi, dns_qp_t **qptp) { |
1323 | 0 | unsigned int nfree = 0; |
1324 | |
|
1325 | 0 | REQUIRE(QPMULTI_VALID(multi)); |
1326 | 0 | REQUIRE(multi->writer.transaction_mode == QP_UPDATE); |
1327 | 0 | REQUIRE(qptp != NULL && *qptp == &multi->writer); |
1328 | |
|
1329 | 0 | dns_qp_t *qp = *qptp; |
1330 | 0 | TRACE(""); |
1331 | |
|
1332 | 0 | isc_nanosecs_t start = isc_time_monotonic(); |
1333 | |
|
1334 | 0 | for (dns_qpchunk_t chunk = 0; chunk < qp->chunk_max; chunk++) { |
1335 | 0 | if (qp->base->ptr[chunk] != NULL && !qp->usage[chunk].immutable) |
1336 | 0 | { |
1337 | 0 | chunk_free(qp, chunk); |
1338 | | /* |
1339 | | * we need to clear its base pointer in the rollback |
1340 | | * trie, in case the arrays were resized |
1341 | | */ |
1342 | 0 | if (chunk < multi->rollback->chunk_max) { |
1343 | 0 | INSIST(!multi->rollback->usage[chunk].exists); |
1344 | 0 | multi->rollback->base->ptr[chunk] = NULL; |
1345 | 0 | } |
1346 | 0 | nfree++; |
1347 | 0 | } |
1348 | 0 | } |
1349 | | |
1350 | | /* |
1351 | | * multi->rollback->base and multi->writer->base are the same, |
1352 | | * unless there was a realloc_chunk_arrays() during the transaction |
1353 | | */ |
1354 | 0 | if (qpbase_unref(qp)) { |
1355 | | /* paired with dns_qpmulti_update() */ |
1356 | 0 | isc_mem_free(qp->mctx, qp->base); |
1357 | 0 | } |
1358 | 0 | isc_mem_free(qp->mctx, qp->usage); |
1359 | | |
1360 | | /* reset allocator state */ |
1361 | 0 | INSIST(multi->rollback != NULL); |
1362 | 0 | memmove(qp, multi->rollback, sizeof(*qp)); |
1363 | 0 | isc_mem_free(qp->mctx, multi->rollback); |
1364 | 0 | INSIST(multi->rollback == NULL); |
1365 | |
|
1366 | 0 | isc_nanosecs_t time = isc_time_monotonic() - start; |
1367 | 0 | atomic_fetch_add_relaxed(&rollback_time, time); |
1368 | |
|
1369 | 0 | LOG_STATS("qp rollback" PRItime "free %u chunks", time, nfree); |
1370 | |
|
1371 | 0 | *qptp = NULL; |
1372 | 0 | UNLOCK(&multi->mutex); |
1373 | 0 | } |
1374 | | |
1375 | | /*********************************************************************** |
1376 | | * |
1377 | | * read-only transactions |
1378 | | */ |
1379 | | |
1380 | | static dns_qpmulti_t * |
1381 | 503 | reader_open(dns_qpmulti_t *multi, dns_qpreadable_t qpr) { |
1382 | 503 | dns_qpreader_t *qp = dns_qpreader(qpr); |
1383 | 503 | dns_qpnode_t *reader = rcu_dereference(multi->reader); |
1384 | 503 | if (reader == NULL) { |
1385 | 0 | QP_INIT(qp, multi->writer.methods, multi->writer.uctx); |
1386 | 503 | } else { |
1387 | 503 | multi = unpack_reader(qp, reader); |
1388 | 503 | } |
1389 | 503 | return multi; |
1390 | 503 | } |
1391 | | |
1392 | | /* |
1393 | | * a query is light |
1394 | | */ |
1395 | | |
1396 | | void |
1397 | 503 | dns_qpmulti_query(dns_qpmulti_t *multi, dns_qpread_t *qp) { |
1398 | 503 | REQUIRE(QPMULTI_VALID(multi)); |
1399 | 503 | REQUIRE(qp != NULL); |
1400 | | |
1401 | 503 | qp->tid = isc_tid(); |
1402 | 503 | rcu_read_lock(); |
1403 | | |
1404 | 503 | dns_qpmulti_t *whence = reader_open(multi, qp); |
1405 | 503 | INSIST(whence == multi); |
1406 | 503 | } |
1407 | | |
1408 | | void |
1409 | 503 | dns_qpread_destroy(dns_qpmulti_t *multi, dns_qpread_t *qp) { |
1410 | 503 | REQUIRE(QPMULTI_VALID(multi)); |
1411 | 503 | REQUIRE(QP_VALID(qp)); |
1412 | 503 | REQUIRE(qp->tid == isc_tid()); |
1413 | 503 | *qp = (dns_qpread_t){}; |
1414 | 503 | rcu_read_unlock(); |
1415 | 503 | } |
1416 | | |
1417 | | /* |
1418 | | * a snapshot is heavy |
1419 | | */ |
1420 | | |
1421 | | void |
1422 | 0 | dns_qpmulti_snapshot(dns_qpmulti_t *multi, dns_qpsnap_t **qpsp) { |
1423 | 0 | REQUIRE(QPMULTI_VALID(multi)); |
1424 | 0 | REQUIRE(qpsp != NULL && *qpsp == NULL); |
1425 | |
|
1426 | 0 | rcu_read_lock(); |
1427 | |
|
1428 | 0 | LOCK(&multi->mutex); |
1429 | |
|
1430 | 0 | dns_qp_t *qpw = &multi->writer; |
1431 | 0 | size_t bytes = sizeof(dns_qpsnap_t) + sizeof(dns_qpbase_t) + |
1432 | 0 | sizeof(qpw->base->ptr[0]) * qpw->chunk_max; |
1433 | 0 | dns_qpsnap_t *qps = isc_mem_allocate(qpw->mctx, bytes); |
1434 | |
|
1435 | 0 | qps->whence = reader_open(multi, qps); |
1436 | 0 | INSIST(qps->whence == multi); |
1437 | | |
1438 | | /* not a separate allocation */ |
1439 | 0 | qps->base = (dns_qpbase_t *)(qps + 1); |
1440 | 0 | isc_refcount_init(&qps->base->refcount, 0); |
1441 | | |
1442 | | /* |
1443 | | * only copy base pointers of chunks we need, so we can |
1444 | | * reclaim unused memory in dns_qpsnap_destroy() |
1445 | | */ |
1446 | 0 | qps->chunk_max = qpw->chunk_max; |
1447 | 0 | for (dns_qpchunk_t chunk = 0; chunk < qpw->chunk_max; chunk++) { |
1448 | 0 | if (qpw->usage[chunk].exists && chunk_usage(qpw, chunk) > 0) { |
1449 | 0 | qpw->usage[chunk].snapshot = true; |
1450 | 0 | qps->base->ptr[chunk] = qpw->base->ptr[chunk]; |
1451 | 0 | } else { |
1452 | 0 | qps->base->ptr[chunk] = NULL; |
1453 | 0 | } |
1454 | 0 | } |
1455 | 0 | ISC_LIST_INITANDAPPEND(multi->snapshots, qps, link); |
1456 | |
|
1457 | 0 | *qpsp = qps; |
1458 | 0 | UNLOCK(&multi->mutex); |
1459 | |
|
1460 | 0 | rcu_read_unlock(); |
1461 | 0 | } |
1462 | | |
1463 | | void |
1464 | 0 | dns_qpsnap_destroy(dns_qpmulti_t *multi, dns_qpsnap_t **qpsp) { |
1465 | 0 | REQUIRE(QPMULTI_VALID(multi)); |
1466 | 0 | REQUIRE(qpsp != NULL && *qpsp != NULL); |
1467 | |
|
1468 | 0 | LOCK(&multi->mutex); |
1469 | |
|
1470 | 0 | dns_qpsnap_t *qp = *qpsp; |
1471 | | |
1472 | | /* make sure the API is being used correctly */ |
1473 | 0 | REQUIRE(qp->whence == multi); |
1474 | |
|
1475 | 0 | ISC_LIST_UNLINK(multi->snapshots, qp, link); |
1476 | | |
1477 | | /* |
1478 | | * eagerly reclaim chunks that are now unused, so that memory does |
1479 | | * not accumulate when a trie has a lot of updates and snapshots |
1480 | | */ |
1481 | 0 | marksweep_chunks(multi); |
1482 | |
|
1483 | 0 | isc_mem_free(multi->writer.mctx, qp); |
1484 | |
|
1485 | 0 | *qpsp = NULL; |
1486 | 0 | UNLOCK(&multi->mutex); |
1487 | 0 | } |
1488 | | |
1489 | | /*********************************************************************** |
1490 | | * |
1491 | | * constructors, destructors |
1492 | | */ |
1493 | | |
1494 | | void |
1495 | | dns_qp_create(isc_mem_t *mctx, const dns_qpmethods_t *methods, void *uctx, |
1496 | 123 | dns_qp_t **qptp) { |
1497 | 123 | REQUIRE(mctx != NULL); |
1498 | 123 | REQUIRE(methods != NULL); |
1499 | 123 | REQUIRE(qptp != NULL && *qptp == NULL); |
1500 | | |
1501 | 123 | dns_qp_t *qp = isc_mem_get(mctx, sizeof(*qp)); |
1502 | 123 | QP_INIT(qp, methods, uctx); |
1503 | 123 | isc_mem_attach(mctx, &qp->mctx); |
1504 | 123 | alloc_reset(qp); |
1505 | 123 | TRACE(""); |
1506 | 123 | *qptp = qp; |
1507 | 123 | } |
1508 | | |
1509 | | void |
1510 | | dns_qpmulti_create(isc_mem_t *mctx, const dns_qpmethods_t *methods, void *uctx, |
1511 | 18.4k | dns_qpmulti_t **qpmp) { |
1512 | 18.4k | REQUIRE(qpmp != NULL && *qpmp == NULL); |
1513 | | |
1514 | 18.4k | dns_qpmulti_t *multi = isc_mem_get(mctx, sizeof(*multi)); |
1515 | 18.4k | *multi = (dns_qpmulti_t){ .magic = QPMULTI_MAGIC, |
1516 | 18.4k | .reader_ref = INVALID_REF, |
1517 | 18.4k | .references = ISC_REFCOUNT_INITIALIZER(1) }; |
1518 | 18.4k | isc_mutex_init(&multi->mutex); |
1519 | 18.4k | ISC_LIST_INIT(multi->snapshots); |
1520 | | |
1521 | | /* |
1522 | | * Do not waste effort allocating a bump chunk that will be thrown |
1523 | | * away when a transaction is opened. dns_qpmulti_update() always |
1524 | | * allocates; to ensure dns_qpmulti_write() does too, pretend the |
1525 | | * previous transaction was an update |
1526 | | */ |
1527 | 18.4k | dns_qp_t *qp = &multi->writer; |
1528 | 18.4k | QP_INIT(qp, methods, uctx); |
1529 | 18.4k | isc_mem_attach(mctx, &qp->mctx); |
1530 | 18.4k | qp->transaction_mode = QP_UPDATE; |
1531 | 18.4k | TRACE(""); |
1532 | 18.4k | *qpmp = multi; |
1533 | 18.4k | } |
1534 | | |
1535 | | static void |
1536 | 18.5k | destroy_guts(dns_qp_t *qp) { |
1537 | 18.5k | if (qp->chunk_max == 0) { |
1538 | 0 | return; |
1539 | 0 | } |
1540 | | |
1541 | 223k | for (dns_qpchunk_t chunk = 0; chunk < qp->chunk_max; chunk++) { |
1542 | 205k | if (qp->base->ptr[chunk] != NULL) { |
1543 | 129k | chunk_free(qp, chunk); |
1544 | 129k | } |
1545 | 205k | } |
1546 | 18.5k | qp->chunk_max = 0; |
1547 | 18.5k | ENSURE(qp->used_count == 0); |
1548 | 18.5k | ENSURE(qp->free_count == 0); |
1549 | 18.5k | ENSURE(isc_refcount_current(&qp->base->refcount) == 1); |
1550 | 18.5k | isc_mem_free(qp->mctx, qp->base); |
1551 | 18.5k | isc_mem_free(qp->mctx, qp->usage); |
1552 | 18.5k | qp->magic = 0; |
1553 | 18.5k | } |
1554 | | |
1555 | | void |
1556 | 123 | dns_qp_destroy(dns_qp_t **qptp) { |
1557 | 123 | REQUIRE(qptp != NULL); |
1558 | 123 | REQUIRE(QP_VALID(*qptp)); |
1559 | | |
1560 | 123 | dns_qp_t *qp = *qptp; |
1561 | 123 | *qptp = NULL; |
1562 | | |
1563 | | /* do not try to destroy part of a dns_qpmulti_t */ |
1564 | 123 | REQUIRE(qp->transaction_mode == QP_NONE); |
1565 | | |
1566 | 123 | TRACE(""); |
1567 | 123 | destroy_guts(qp); |
1568 | 123 | isc_mem_putanddetach(&qp->mctx, qp, sizeof(*qp)); |
1569 | 123 | } |
1570 | | |
1571 | | static void |
1572 | 18.4k | qpmulti_free_mem(dns_qpmulti_t *multi) { |
1573 | 18.4k | REQUIRE(QPMULTI_VALID(multi)); |
1574 | | |
1575 | | /* reassure thread sanitizer */ |
1576 | 18.4k | LOCK(&multi->mutex); |
1577 | 18.4k | dns_qp_t *qp = &multi->writer; |
1578 | 18.4k | UNLOCK(&multi->mutex); |
1579 | | |
1580 | 18.4k | isc_mutex_destroy(&multi->mutex); |
1581 | 18.4k | isc_mem_putanddetach(&qp->mctx, multi, sizeof(*multi)); |
1582 | 18.4k | } |
1583 | | |
1584 | | #if QPMULTI_TRACE |
1585 | | ISC_REFCOUNT_STATIC_TRACE_IMPL(dns_qpmulti, qpmulti_free_mem) |
1586 | | #else |
1587 | 37.7k | ISC_REFCOUNT_STATIC_IMPL(dns_qpmulti, qpmulti_free_mem) Line | Count | Source | 1587 | | ISC_REFCOUNT_STATIC_IMPL(dns_qpmulti, qpmulti_free_mem) |
Line | Count | Source | 1587 | | ISC_REFCOUNT_STATIC_IMPL(dns_qpmulti, qpmulti_free_mem) |
Line | Count | Source | 1587 | | ISC_REFCOUNT_STATIC_IMPL(dns_qpmulti, qpmulti_free_mem) |
|
1588 | 37.7k | #endif |
1589 | 37.7k | |
1590 | 37.7k | static void |
1591 | 37.7k | qpmulti_destroy_guts_cb(struct rcu_head *arg) { |
1592 | 18.4k | qp_rcuctx_t *rcuctx = caa_container_of(arg, qp_rcuctx_t, rcu_head); |
1593 | 18.4k | REQUIRE(QPRCU_VALID(rcuctx)); |
1594 | | /* only nonzero for reclaim_chunks_cb() */ |
1595 | 18.4k | REQUIRE(rcuctx->count == 0); |
1596 | | |
1597 | 18.4k | dns_qpmulti_t *multi = rcuctx->multi; |
1598 | 18.4k | REQUIRE(QPMULTI_VALID(multi)); |
1599 | | |
1600 | | /* reassure thread sanitizer */ |
1601 | 18.4k | LOCK(&multi->mutex); |
1602 | | |
1603 | 18.4k | dns_qp_t *qp = &multi->writer; |
1604 | 18.4k | REQUIRE(QP_VALID(qp)); |
1605 | | |
1606 | 18.4k | destroy_guts(qp); |
1607 | | |
1608 | 18.4k | UNLOCK(&multi->mutex); |
1609 | | |
1610 | 18.4k | dns_qpmulti_detach(&multi); |
1611 | 18.4k | isc_mem_putanddetach(&rcuctx->mctx, rcuctx, |
1612 | 18.4k | STRUCT_FLEX_SIZE(rcuctx, chunk, rcuctx->count)); |
1613 | 18.4k | } |
1614 | | |
1615 | | void |
1616 | 18.4k | dns_qpmulti_destroy(dns_qpmulti_t **qpmp) { |
1617 | 18.4k | dns_qp_t *qp = NULL; |
1618 | 18.4k | dns_qpmulti_t *multi = NULL; |
1619 | 18.4k | qp_rcuctx_t *rcuctx = NULL; |
1620 | | |
1621 | 18.4k | REQUIRE(qpmp != NULL); |
1622 | 18.4k | REQUIRE(QPMULTI_VALID(*qpmp)); |
1623 | | |
1624 | 18.4k | multi = *qpmp; |
1625 | 18.4k | qp = &multi->writer; |
1626 | 18.4k | *qpmp = NULL; |
1627 | | |
1628 | 18.4k | REQUIRE(QP_VALID(qp)); |
1629 | 18.4k | REQUIRE(multi->rollback == NULL); |
1630 | 18.4k | REQUIRE(ISC_LIST_EMPTY(multi->snapshots)); |
1631 | | |
1632 | 18.4k | rcuctx = isc_mem_get(qp->mctx, STRUCT_FLEX_SIZE(rcuctx, chunk, 0)); |
1633 | 18.4k | *rcuctx = (qp_rcuctx_t){ |
1634 | 18.4k | .magic = QPRCU_MAGIC, |
1635 | 18.4k | .multi = multi, |
1636 | 18.4k | }; |
1637 | 18.4k | isc_mem_attach(qp->mctx, &rcuctx->mctx); |
1638 | 18.4k | call_rcu(&rcuctx->rcu_head, qpmulti_destroy_guts_cb); |
1639 | 18.4k | } |
1640 | | |
1641 | | /*********************************************************************** |
1642 | | * |
1643 | | * modification |
1644 | | */ |
1645 | | |
1646 | | isc_result_t |
1647 | 7.15M | dns_qp_insert(dns_qp_t *qp, void *pval, uint32_t ival) { |
1648 | 7.15M | dns_qpref_t new_ref, old_ref; |
1649 | 7.15M | dns_qpnode_t new_leaf, old_node; |
1650 | 7.15M | dns_qpnode_t *new_twigs = NULL, *old_twigs = NULL; |
1651 | 7.15M | dns_qpshift_t new_bit, old_bit; |
1652 | 7.15M | dns_qpweight_t old_size, new_size; |
1653 | 7.15M | dns_qpkey_t new_key, old_key; |
1654 | 7.15M | size_t new_keylen, old_keylen; |
1655 | 7.15M | size_t offset; |
1656 | 7.15M | uint64_t index; |
1657 | 7.15M | dns_qpshift_t bit; |
1658 | 7.15M | dns_qpweight_t pos; |
1659 | 7.15M | dns_qpnode_t *n = NULL; |
1660 | | |
1661 | 7.15M | REQUIRE(QP_VALID(qp)); |
1662 | | |
1663 | 7.15M | new_leaf = make_leaf(pval, ival); |
1664 | 7.15M | new_keylen = leaf_qpkey(qp, &new_leaf, new_key); |
1665 | | |
1666 | | /* first leaf in an empty trie? */ |
1667 | 7.15M | if (qp->leaf_count == 0) { |
1668 | 410k | new_ref = alloc_twigs(qp, 1); |
1669 | 410k | new_twigs = ref_ptr(qp, new_ref); |
1670 | 410k | *new_twigs = new_leaf; |
1671 | 410k | attach_leaf(qp, new_twigs); |
1672 | 410k | qp->leaf_count++; |
1673 | 410k | qp->root_ref = new_ref; |
1674 | 410k | return ISC_R_SUCCESS; |
1675 | 410k | } |
1676 | | |
1677 | | /* |
1678 | | * We need to keep searching down to a leaf even if our key is |
1679 | | * missing from this branch. It doesn't matter which twig we |
1680 | | * choose since the keys are all the same up to this node's |
1681 | | * offset. Note that if we simply use branch_twig_pos(n, bit) |
1682 | | * we may get an out-of-bounds access if our bit is greater |
1683 | | * than all the set bits in the node. |
1684 | | */ |
1685 | 6.74M | n = ref_ptr(qp, qp->root_ref); |
1686 | 53.4M | while (is_branch(n)) { |
1687 | 46.7M | prefetch_twigs(qp, n); |
1688 | 46.7M | dns_qpref_t ref = branch_twigs_ref(n); |
1689 | 46.7M | bit = branch_keybit(n, new_key, new_keylen); |
1690 | 46.7M | pos = branch_has_twig(n, bit) ? branch_twig_pos(n, bit) : 0; |
1691 | 46.7M | n = ref_ptr(qp, ref + pos); |
1692 | 46.7M | } |
1693 | | |
1694 | | /* do the keys differ, and if so, where? */ |
1695 | 6.74M | old_keylen = leaf_qpkey(qp, n, old_key); |
1696 | 6.74M | offset = qpkey_compare(new_key, new_keylen, old_key, old_keylen); |
1697 | 6.74M | if (offset == QPKEY_EQUAL) { |
1698 | 46.0k | return ISC_R_EXISTS; |
1699 | 46.0k | } |
1700 | 6.70M | new_bit = qpkey_bit(new_key, new_keylen, offset); |
1701 | 6.70M | old_bit = qpkey_bit(old_key, old_keylen, offset); |
1702 | | |
1703 | | /* find where to insert a branch or grow an existing branch. */ |
1704 | 6.70M | n = make_root_mutable(qp); |
1705 | 47.5M | while (is_branch(n)) { |
1706 | 45.9M | prefetch_twigs(qp, n); |
1707 | 45.9M | if (offset < branch_key_offset(n)) { |
1708 | 87.8k | goto newbranch; |
1709 | 87.8k | } |
1710 | 45.8M | if (offset == branch_key_offset(n)) { |
1711 | 5.05M | goto growbranch; |
1712 | 5.05M | } |
1713 | 40.8M | make_twigs_mutable(qp, n); |
1714 | 40.8M | bit = branch_keybit(n, new_key, new_keylen); |
1715 | 40.8M | INSIST(branch_has_twig(n, bit)); |
1716 | 40.8M | n = branch_twig_ptr(qp, n, bit); |
1717 | 40.8M | } |
1718 | | /* fall through */ |
1719 | | |
1720 | 1.64M | newbranch: |
1721 | 1.64M | new_ref = alloc_twigs(qp, 2); |
1722 | 1.64M | new_twigs = ref_ptr(qp, new_ref); |
1723 | | |
1724 | | /* save before overwriting. */ |
1725 | 1.64M | old_node = *n; |
1726 | | |
1727 | | /* new branch node takes old node's place */ |
1728 | 1.64M | index = BRANCH_TAG | (1ULL << new_bit) | (1ULL << old_bit) | |
1729 | 1.64M | ((uint64_t)offset << SHIFT_OFFSET); |
1730 | 1.64M | *n = make_node(index, new_ref); |
1731 | | |
1732 | | /* populate twigs */ |
1733 | 1.64M | new_twigs[old_bit > new_bit] = old_node; |
1734 | 1.64M | new_twigs[new_bit > old_bit] = new_leaf; |
1735 | | |
1736 | 1.64M | attach_leaf(qp, &new_leaf); |
1737 | 1.64M | qp->leaf_count++; |
1738 | | |
1739 | 1.64M | return ISC_R_SUCCESS; |
1740 | | |
1741 | 5.05M | growbranch: |
1742 | 5.05M | INSIST(!branch_has_twig(n, new_bit)); |
1743 | | |
1744 | | /* locate twigs vectors */ |
1745 | 5.05M | old_size = branch_twigs_size(n); |
1746 | 5.05M | new_size = old_size + 1; |
1747 | 5.05M | old_ref = branch_twigs_ref(n); |
1748 | 5.05M | new_ref = alloc_twigs(qp, new_size); |
1749 | 5.05M | old_twigs = ref_ptr(qp, old_ref); |
1750 | 5.05M | new_twigs = ref_ptr(qp, new_ref); |
1751 | | |
1752 | | /* embiggen branch node */ |
1753 | 5.05M | index = branch_index(n) | (1ULL << new_bit); |
1754 | 5.05M | *n = make_node(index, new_ref); |
1755 | | |
1756 | | /* embiggen twigs vector */ |
1757 | 5.05M | pos = branch_twig_pos(n, new_bit); |
1758 | 5.05M | move_twigs(new_twigs, old_twigs, pos); |
1759 | 5.05M | new_twigs[pos] = new_leaf; |
1760 | 5.05M | move_twigs(new_twigs + pos + 1, old_twigs + pos, old_size - pos); |
1761 | | |
1762 | 5.05M | if (squash_twigs(qp, old_ref, old_size)) { |
1763 | | /* old twigs destroyed, only attach to new leaf */ |
1764 | 5.05M | attach_leaf(qp, &new_leaf); |
1765 | 5.05M | } else { |
1766 | | /* old twigs duplicated, attach to all leaves */ |
1767 | 526 | attach_twigs(qp, new_twigs, new_size); |
1768 | 526 | } |
1769 | 5.05M | qp->leaf_count++; |
1770 | | |
1771 | 5.05M | return ISC_R_SUCCESS; |
1772 | 6.70M | } |
1773 | | |
1774 | | isc_result_t |
1775 | | dns_qp_deletekey(dns_qp_t *qp, const dns_qpkey_t search_key, |
1776 | 2.45M | size_t search_keylen, void **pval_r, uint32_t *ival_r) { |
1777 | 2.45M | REQUIRE(QP_VALID(qp)); |
1778 | 2.45M | REQUIRE(search_keylen < sizeof(dns_qpkey_t)); |
1779 | | |
1780 | 2.45M | if (get_root(qp) == NULL) { |
1781 | 2.06M | return ISC_R_NOTFOUND; |
1782 | 2.06M | } |
1783 | | |
1784 | 392k | dns_qpshift_t bit = 0; /* suppress warning */ |
1785 | 392k | dns_qpnode_t *parent = NULL; |
1786 | 392k | dns_qpnode_t *n = make_root_mutable(qp); |
1787 | 392k | while (is_branch(n)) { |
1788 | 0 | prefetch_twigs(qp, n); |
1789 | 0 | bit = branch_keybit(n, search_key, search_keylen); |
1790 | 0 | if (!branch_has_twig(n, bit)) { |
1791 | 0 | return ISC_R_NOTFOUND; |
1792 | 0 | } |
1793 | 0 | make_twigs_mutable(qp, n); |
1794 | 0 | parent = n; |
1795 | 0 | n = branch_twig_ptr(qp, n, bit); |
1796 | 0 | } |
1797 | | |
1798 | 392k | dns_qpkey_t found_key; |
1799 | 392k | size_t found_keylen = leaf_qpkey(qp, n, found_key); |
1800 | 392k | if (qpkey_compare(search_key, search_keylen, found_key, found_keylen) != |
1801 | 392k | QPKEY_EQUAL) |
1802 | 0 | { |
1803 | 0 | return ISC_R_NOTFOUND; |
1804 | 0 | } |
1805 | | |
1806 | 392k | SET_IF_NOT_NULL(pval_r, leaf_pval(n)); |
1807 | 392k | SET_IF_NOT_NULL(ival_r, leaf_ival(n)); |
1808 | 392k | detach_leaf(qp, n); |
1809 | 392k | qp->leaf_count--; |
1810 | | |
1811 | | /* trie becomes empty */ |
1812 | 392k | if (qp->leaf_count == 0) { |
1813 | 392k | INSIST(parent == NULL); |
1814 | 392k | INSIST(n == get_root(qp)); |
1815 | 392k | free_twigs(qp, qp->root_ref, 1); |
1816 | 392k | qp->root_ref = INVALID_REF; |
1817 | 392k | return ISC_R_SUCCESS; |
1818 | 392k | } |
1819 | | |
1820 | | /* step back to parent node */ |
1821 | 0 | n = parent; |
1822 | 0 | parent = NULL; |
1823 | |
|
1824 | 0 | INSIST(bit != 0); |
1825 | 0 | dns_qpweight_t size = branch_twigs_size(n); |
1826 | 0 | dns_qpweight_t pos = branch_twig_pos(n, bit); |
1827 | 0 | dns_qpref_t ref = branch_twigs_ref(n); |
1828 | 0 | dns_qpnode_t *twigs = ref_ptr(qp, ref); |
1829 | |
|
1830 | 0 | if (size == 2) { |
1831 | | /* |
1832 | | * move the other twig to the parent branch. |
1833 | | */ |
1834 | 0 | *n = twigs[!pos]; |
1835 | 0 | squash_twigs(qp, ref, 2); |
1836 | 0 | } else { |
1837 | | /* |
1838 | | * shrink the twigs in place, to avoid using the bump |
1839 | | * chunk too fast - the gc will clean up after us |
1840 | | */ |
1841 | 0 | *n = make_node(branch_index(n) & ~(1ULL << bit), ref); |
1842 | 0 | move_twigs(twigs + pos, twigs + pos + 1, size - pos - 1); |
1843 | 0 | squash_twigs(qp, ref + size - 1, 1); |
1844 | 0 | } |
1845 | |
|
1846 | 0 | return ISC_R_SUCCESS; |
1847 | 392k | } |
1848 | | |
1849 | | isc_result_t |
1850 | | dns_qp_deletename(dns_qp_t *qp, const dns_name_t *name, dns_namespace_t space, |
1851 | 2 | void **pval_r, uint32_t *ival_r) { |
1852 | 2 | dns_qpkey_t key; |
1853 | 2 | size_t keylen = dns_qpkey_fromname(key, name, space); |
1854 | 2 | return dns_qp_deletekey(qp, key, keylen, pval_r, ival_r); |
1855 | 2 | } |
1856 | | |
1857 | | /*********************************************************************** |
1858 | | * chains |
1859 | | */ |
1860 | | static void |
1861 | 612 | maybe_set_name(dns_qpreader_t *qp, dns_qpnode_t *node, dns_name_t *name) { |
1862 | 612 | dns_qpkey_t key; |
1863 | 612 | size_t len; |
1864 | | |
1865 | 612 | if (name == NULL) { |
1866 | 488 | return; |
1867 | 488 | } |
1868 | | |
1869 | 124 | dns_name_reset(name); |
1870 | 124 | len = leaf_qpkey(qp, node, key); |
1871 | 124 | dns_qpkey_toname(key, len, name, NULL); |
1872 | 124 | } |
1873 | | |
1874 | | void |
1875 | 497 | dns_qpchain_init(dns_qpreadable_t qpr, dns_qpchain_t *chain) { |
1876 | 497 | dns_qpreader_t *qp = dns_qpreader(qpr); |
1877 | 497 | REQUIRE(QP_VALID(qp)); |
1878 | 497 | REQUIRE(chain != NULL); |
1879 | | |
1880 | | /* |
1881 | | * dns_qpchain_t contains a 2kb buffer, which is slow to |
1882 | | * zero-initialize. Therefore we avoid designated initializers, and |
1883 | | * initialize each field manually. |
1884 | | */ |
1885 | 497 | chain->magic = QPCHAIN_MAGIC; |
1886 | 497 | chain->qp = qp; |
1887 | 497 | chain->len = 0; |
1888 | 497 | } |
1889 | | |
1890 | | unsigned int |
1891 | 182 | dns_qpchain_length(dns_qpchain_t *chain) { |
1892 | 182 | REQUIRE(QPCHAIN_VALID(chain)); |
1893 | | |
1894 | 182 | return chain->len; |
1895 | 182 | } |
1896 | | |
1897 | | void |
1898 | | dns_qpchain_node(dns_qpchain_t *chain, unsigned int level, dns_name_t *name, |
1899 | 62 | void **pval_r, uint32_t *ival_r) { |
1900 | 62 | dns_qpnode_t *node = NULL; |
1901 | | |
1902 | 62 | REQUIRE(QPCHAIN_VALID(chain)); |
1903 | 62 | REQUIRE(level < chain->len); |
1904 | | |
1905 | 62 | node = chain->chain[level].node; |
1906 | 62 | maybe_set_name(chain->qp, node, name); |
1907 | 62 | SET_IF_NOT_NULL(pval_r, leaf_pval(node)); |
1908 | 62 | SET_IF_NOT_NULL(ival_r, leaf_ival(node)); |
1909 | 62 | } |
1910 | | |
1911 | | /*********************************************************************** |
1912 | | * iterators |
1913 | | */ |
1914 | | |
1915 | | void |
1916 | 559 | dns_qpiter_init(dns_qpreadable_t qpr, dns_qpiter_t *qpi) { |
1917 | 559 | dns_qpreader_t *qp = dns_qpreader(qpr); |
1918 | 559 | REQUIRE(QP_VALID(qp)); |
1919 | 559 | REQUIRE(qpi != NULL); |
1920 | | |
1921 | | /* |
1922 | | * dns_qpiter_t contains a 4kb buffer, which is slow to zero-initialize. |
1923 | | * Therefore we avoid designated initializers, and initialize each |
1924 | | * field manually. |
1925 | | */ |
1926 | 559 | qpi->qp = qp; |
1927 | 559 | qpi->sp = 0; |
1928 | 559 | qpi->magic = QPITER_MAGIC; |
1929 | | /* |
1930 | | * The top of the stack must be initialized. |
1931 | | */ |
1932 | 559 | qpi->stack[qpi->sp] = NULL; |
1933 | 559 | } |
1934 | | |
1935 | | /* |
1936 | | * are we at the last twig in this branch (in whichever direction |
1937 | | * we're iterating)? |
1938 | | */ |
1939 | | static bool |
1940 | 248 | last_twig(dns_qpiter_t *qpi, bool forward) { |
1941 | 248 | dns_qpweight_t pos = 0, max = 0; |
1942 | 248 | if (qpi->sp > 0) { |
1943 | 186 | dns_qpnode_t *child = qpi->stack[qpi->sp]; |
1944 | 186 | dns_qpnode_t *parent = qpi->stack[qpi->sp - 1]; |
1945 | 186 | pos = child - ref_ptr(qpi->qp, branch_twigs_ref(parent)); |
1946 | 186 | if (forward) { |
1947 | 186 | max = branch_twigs_size(parent) - 1; |
1948 | 186 | } |
1949 | 186 | } |
1950 | 248 | return pos == max; |
1951 | 248 | } |
1952 | | |
1953 | | /* |
1954 | | * move a QP iterator forward or back to the next or previous leaf. |
1955 | | * note: this function can go wrong when the iterator refers to |
1956 | | * a mutable view of the trie which is altered while iterating |
1957 | | */ |
1958 | | static isc_result_t |
1959 | | iterate(bool forward, dns_qpiter_t *qpi, dns_name_t *name, void **pval_r, |
1960 | 186 | uint32_t *ival_r) { |
1961 | 186 | dns_qpnode_t *node = NULL; |
1962 | 186 | bool initial_branch = true; |
1963 | | |
1964 | 186 | REQUIRE(QPITER_VALID(qpi)); |
1965 | | |
1966 | 186 | dns_qpreader_t *qp = qpi->qp; |
1967 | | |
1968 | 186 | REQUIRE(QP_VALID(qp)); |
1969 | | |
1970 | 186 | node = get_root(qp); |
1971 | 186 | if (node == NULL) { |
1972 | 0 | return ISC_R_NOMORE; |
1973 | 0 | } |
1974 | | |
1975 | 248 | do { |
1976 | 248 | if (qpi->stack[qpi->sp] == NULL) { |
1977 | | /* newly initialized iterator: use the root node */ |
1978 | 0 | INSIST(qpi->sp == 0); |
1979 | 0 | qpi->stack[0] = node; |
1980 | 248 | } else if (!initial_branch) { |
1981 | | /* |
1982 | | * in a prior loop, we reached a branch; from |
1983 | | * here we just need to get the highest or lowest |
1984 | | * leaf in the subtree; we don't need to bother |
1985 | | * stepping forward or backward through twigs |
1986 | | * anymore. |
1987 | | */ |
1988 | 0 | INSIST(qpi->sp > 0); |
1989 | 248 | } else if (last_twig(qpi, forward)) { |
1990 | | /* |
1991 | | * we've stepped to the end (or the beginning, |
1992 | | * if we're iterating backwards) of a set of twigs. |
1993 | | */ |
1994 | 124 | if (qpi->sp == 0) { |
1995 | | /* |
1996 | | * we've finished iterating. reinitialize |
1997 | | * the iterator, then return ISC_R_NOMORE. |
1998 | | */ |
1999 | 62 | dns_qpiter_init(qpi->qp, qpi); |
2000 | 62 | return ISC_R_NOMORE; |
2001 | 62 | } |
2002 | | |
2003 | | /* |
2004 | | * pop the stack, and resume at the parent branch. |
2005 | | */ |
2006 | 62 | qpi->stack[qpi->sp] = NULL; |
2007 | 62 | qpi->sp--; |
2008 | 62 | continue; |
2009 | 124 | } else { |
2010 | | /* |
2011 | | * there are more twigs in the current branch, |
2012 | | * so step the node pointer forward (or back). |
2013 | | */ |
2014 | 124 | qpi->stack[qpi->sp] += (forward ? 1 : -1); |
2015 | 124 | node = qpi->stack[qpi->sp]; |
2016 | 124 | } |
2017 | | |
2018 | | /* |
2019 | | * if we're at a branch now, we loop down to the |
2020 | | * left- or rightmost leaf. |
2021 | | */ |
2022 | 124 | if (is_branch(node)) { |
2023 | 0 | qpi->sp++; |
2024 | 0 | INSIST(qpi->sp < DNS_QP_MAXKEY); |
2025 | 0 | node = ref_ptr(qp, branch_twigs_ref(node)) + |
2026 | 0 | (forward ? 0 : branch_twigs_size(node) - 1); |
2027 | 0 | qpi->stack[qpi->sp] = node; |
2028 | 0 | initial_branch = false; |
2029 | 0 | } |
2030 | 186 | } while (is_branch(node)); |
2031 | | |
2032 | | /* we're at a leaf: return its data to the caller */ |
2033 | 124 | SET_IF_NOT_NULL(pval_r, leaf_pval(node)); |
2034 | 124 | SET_IF_NOT_NULL(ival_r, leaf_ival(node)); |
2035 | 124 | maybe_set_name(qpi->qp, node, name); |
2036 | 124 | return ISC_R_SUCCESS; |
2037 | 186 | } |
2038 | | |
2039 | | isc_result_t |
2040 | | dns_qpiter_next(dns_qpiter_t *qpi, dns_name_t *name, void **pval_r, |
2041 | 186 | uint32_t *ival_r) { |
2042 | 186 | return iterate(true, qpi, name, pval_r, ival_r); |
2043 | 186 | } |
2044 | | |
2045 | | isc_result_t |
2046 | | dns_qpiter_prev(dns_qpiter_t *qpi, dns_name_t *name, void **pval_r, |
2047 | 0 | uint32_t *ival_r) { |
2048 | 0 | return iterate(false, qpi, name, pval_r, ival_r); |
2049 | 0 | } |
2050 | | |
2051 | | isc_result_t |
2052 | | dns_qpiter_current(dns_qpiter_t *qpi, dns_name_t *name, void **pval_r, |
2053 | 62 | uint32_t *ival_r) { |
2054 | 62 | dns_qpnode_t *node = NULL; |
2055 | | |
2056 | 62 | REQUIRE(QPITER_VALID(qpi)); |
2057 | | |
2058 | 62 | node = qpi->stack[qpi->sp]; |
2059 | 62 | if (node == NULL || is_branch(node)) { |
2060 | 0 | return ISC_R_FAILURE; |
2061 | 0 | } |
2062 | | |
2063 | 62 | SET_IF_NOT_NULL(pval_r, leaf_pval(node)); |
2064 | 62 | SET_IF_NOT_NULL(ival_r, leaf_ival(node)); |
2065 | 62 | maybe_set_name(qpi->qp, node, name); |
2066 | 62 | return ISC_R_SUCCESS; |
2067 | 62 | } |
2068 | | |
2069 | | /*********************************************************************** |
2070 | | * |
2071 | | * search |
2072 | | */ |
2073 | | |
2074 | | isc_result_t |
2075 | | dns_qp_getkey(dns_qpreadable_t qpr, const dns_qpkey_t search_key, |
2076 | 10.9M | size_t search_keylen, void **pval_r, uint32_t *ival_r) { |
2077 | 10.9M | dns_qpreader_t *qp = dns_qpreader(qpr); |
2078 | 10.9M | dns_qpkey_t found_key; |
2079 | 10.9M | size_t found_keylen; |
2080 | 10.9M | dns_qpshift_t bit; |
2081 | 10.9M | dns_qpnode_t *n = NULL; |
2082 | | |
2083 | 10.9M | REQUIRE(QP_VALID(qp)); |
2084 | 10.9M | REQUIRE(search_keylen < sizeof(dns_qpkey_t)); |
2085 | | |
2086 | 10.9M | n = get_root(qp); |
2087 | 10.9M | if (n == NULL) { |
2088 | 1.72M | return ISC_R_NOTFOUND; |
2089 | 1.72M | } |
2090 | | |
2091 | 91.9M | while (is_branch(n)) { |
2092 | 87.7M | prefetch_twigs(qp, n); |
2093 | 87.7M | bit = branch_keybit(n, search_key, search_keylen); |
2094 | 87.7M | if (!branch_has_twig(n, bit)) { |
2095 | 5.03M | return ISC_R_NOTFOUND; |
2096 | 5.03M | } |
2097 | 82.7M | n = branch_twig_ptr(qp, n, bit); |
2098 | 82.7M | } |
2099 | | |
2100 | 4.15M | found_keylen = leaf_qpkey(qp, n, found_key); |
2101 | 4.15M | if (qpkey_compare(search_key, search_keylen, found_key, found_keylen) != |
2102 | 4.15M | QPKEY_EQUAL) |
2103 | 1.62M | { |
2104 | 1.62M | return ISC_R_NOTFOUND; |
2105 | 1.62M | } |
2106 | | |
2107 | 2.53M | SET_IF_NOT_NULL(pval_r, leaf_pval(n)); |
2108 | 2.53M | SET_IF_NOT_NULL(ival_r, leaf_ival(n)); |
2109 | 2.53M | return ISC_R_SUCCESS; |
2110 | 4.15M | } |
2111 | | |
2112 | | isc_result_t |
2113 | | dns_qp_getname(dns_qpreadable_t qpr, const dns_name_t *name, |
2114 | 9.19M | dns_namespace_t space, void **pval_r, uint32_t *ival_r) { |
2115 | 9.19M | dns_qpkey_t key; |
2116 | 9.19M | size_t keylen = dns_qpkey_fromname(key, name, space); |
2117 | 9.19M | return dns_qp_getkey(qpr, key, keylen, pval_r, ival_r); |
2118 | 9.19M | } |
2119 | | |
2120 | | static inline void |
2121 | 364 | add_link(dns_qpchain_t *chain, dns_qpnode_t *node, size_t offset) { |
2122 | | /* prevent duplication */ |
2123 | 364 | if (chain->len != 0 && chain->chain[chain->len - 1].node == node) { |
2124 | 0 | return; |
2125 | 0 | } |
2126 | 364 | chain->chain[chain->len].node = node; |
2127 | 364 | chain->chain[chain->len].offset = offset; |
2128 | 364 | chain->len++; |
2129 | 364 | INSIST(chain->len <= DNS_NAME_MAXLABELS); |
2130 | 364 | } |
2131 | | |
2132 | | static inline void |
2133 | 0 | prevleaf(dns_qpiter_t *it) { |
2134 | 0 | isc_result_t result = dns_qpiter_prev(it, NULL, NULL, NULL); |
2135 | 0 | if (result == ISC_R_NOMORE) { |
2136 | 0 | result = dns_qpiter_prev(it, NULL, NULL, NULL); |
2137 | 0 | } |
2138 | 0 | RUNTIME_CHECK(result == ISC_R_SUCCESS); |
2139 | 0 | } |
2140 | | |
2141 | | static inline void |
2142 | 62 | greatest_leaf(dns_qpreadable_t qpr, dns_qpnode_t *n, dns_qpiter_t *iter) { |
2143 | 62 | while (is_branch(n)) { |
2144 | 0 | dns_qpref_t ref = branch_twigs_ref(n) + branch_twigs_size(n) - |
2145 | 0 | 1; |
2146 | 0 | iter->stack[++iter->sp] = n; |
2147 | 0 | n = ref_ptr(qpr, ref); |
2148 | 0 | } |
2149 | 62 | iter->stack[++iter->sp] = n; |
2150 | 62 | } |
2151 | | |
2152 | | static inline dns_qpnode_t * |
2153 | 0 | anyleaf(dns_qpreader_t *qp, dns_qpnode_t *n) { |
2154 | 0 | while (is_branch(n)) { |
2155 | 0 | n = branch_twigs(qp, n); |
2156 | 0 | } |
2157 | 0 | return n; |
2158 | 0 | } |
2159 | | |
2160 | | static inline int |
2161 | | twig_offset(dns_qpnode_t *n, dns_qpshift_t sbit, dns_qpshift_t kbit, |
2162 | 62 | dns_qpshift_t fbit) { |
2163 | 62 | dns_qpweight_t pos = branch_twig_pos(n, sbit); |
2164 | 62 | if (branch_has_twig(n, sbit)) { |
2165 | 62 | return pos - (kbit < fbit); |
2166 | 62 | } |
2167 | 0 | return pos - 1; |
2168 | 62 | } |
2169 | | |
2170 | | /* |
2171 | | * If dns_qp_lookup() was passed an iterator, we want it to point at the |
2172 | | * matching name in the case of an exact match, or at the predecessor name |
2173 | | * for a non-exact match. |
2174 | | * |
2175 | | * If there is an exact match, then there is nothing to be done. Otherwise, |
2176 | | * we pop up the iterator stack until we find a parent branch with an offset |
2177 | | * that is before the position where the search key differs from the found key. |
2178 | | * From there we can step to the leaf that is the predecessor of the searched |
2179 | | * name. |
2180 | | * |
2181 | | * Requires the iterator to be pointing at a leaf node. |
2182 | | */ |
2183 | | static void |
2184 | | fix_iterator(dns_qpreader_t *qp, dns_qpiter_t *it, dns_qpkey_t key, |
2185 | 182 | size_t len) { |
2186 | 182 | dns_qpnode_t *n = it->stack[it->sp]; |
2187 | | |
2188 | 182 | REQUIRE(!is_branch(n)); |
2189 | | |
2190 | 182 | dns_qpkey_t found; |
2191 | 182 | size_t foundlen = leaf_qpkey(qp, n, found); |
2192 | 182 | size_t to = qpkey_compare(key, len, found, foundlen); |
2193 | | |
2194 | | /* If the keys are equal, the iterator is already at the right node. */ |
2195 | 182 | if (to == QPKEY_EQUAL) { |
2196 | 120 | return; |
2197 | 120 | } |
2198 | | |
2199 | | /* |
2200 | | * Special case: if the key differs even before the root |
2201 | | * key offset, it means the name desired either precedes or |
2202 | | * follows the entire range of names in the database, and |
2203 | | * popping up the stack won't help us, so just move the |
2204 | | * iterator one step back from the origin and return. |
2205 | | */ |
2206 | 62 | if (to < branch_key_offset(it->stack[0])) { |
2207 | 0 | dns_qpiter_init(qp, it); |
2208 | 0 | prevleaf(it); |
2209 | 0 | return; |
2210 | 0 | } |
2211 | | |
2212 | | /* |
2213 | | * As long as the branch offset point is after the point where the |
2214 | | * key differs, we need to branch up and find a better node. |
2215 | | */ |
2216 | 62 | while (it->sp > 0) { |
2217 | 62 | dns_qpnode_t *b = it->stack[it->sp - 1]; |
2218 | 62 | if (branch_key_offset(b) < to) { |
2219 | 62 | break; |
2220 | 62 | } |
2221 | 0 | it->sp--; |
2222 | 0 | } |
2223 | 62 | n = it->stack[it->sp]; |
2224 | | |
2225 | | /* |
2226 | | * Either we are now at the correct branch, or we are at the |
2227 | | * first unmatched node. Determine the bit position for the |
2228 | | * twig we need (sbit). |
2229 | | */ |
2230 | 62 | dns_qpshift_t kbit = qpkey_bit(key, len, to); |
2231 | 62 | dns_qpshift_t fbit = qpkey_bit(found, foundlen, to); |
2232 | 62 | dns_qpshift_t sbit = 0; |
2233 | | |
2234 | 62 | if (is_branch(n) && branch_key_offset(n) == to) { |
2235 | | /* We are on the correct branch now. */ |
2236 | 0 | sbit = kbit; |
2237 | 62 | } else if (it->sp == 0) { |
2238 | | /* |
2239 | | * We are on the root branch, popping up the stack won't |
2240 | | * help us, so just move the iterator one step back from the |
2241 | | * origin and return. |
2242 | | */ |
2243 | 0 | dns_qpiter_init(qp, it); |
2244 | 0 | prevleaf(it); |
2245 | 0 | return; |
2246 | 62 | } else { |
2247 | | /* We are at the first unmatched node, pop up the stack. */ |
2248 | 62 | n = it->stack[--it->sp]; |
2249 | 62 | sbit = qpkey_bit(key, len, branch_key_offset(n)); |
2250 | 62 | } |
2251 | | |
2252 | 62 | INSIST(is_branch(n)); |
2253 | | |
2254 | 62 | prefetch_twigs(qp, n); |
2255 | 62 | dns_qpnode_t *twigs = branch_twigs(qp, n); |
2256 | 62 | int toff = twig_offset(n, sbit, kbit, fbit); |
2257 | 62 | if (toff >= 0) { |
2258 | | /* |
2259 | | * The name we want would've been after some twig in |
2260 | | * this branch. Walk down from that twig to the |
2261 | | * highest leaf in its subtree to get the predecessor. |
2262 | | */ |
2263 | 62 | greatest_leaf(qp, twigs + toff, it); |
2264 | 62 | } else { |
2265 | | /* |
2266 | | * Every leaf below this node is greater than the one we |
2267 | | * wanted, so the previous leaf is the predecessor. |
2268 | | */ |
2269 | 0 | prevleaf(it); |
2270 | 0 | } |
2271 | 62 | } |
2272 | | |
2273 | | /* |
2274 | | * When searching for a requested name in dns_qp_lookup(), we might add |
2275 | | * a leaf node to the chain, then subsequently determine that it was a |
2276 | | * dead end. When this happens, the chain can be left holding a node |
2277 | | * that is *not* an ancestor of the requested name. We correct for that |
2278 | | * here. |
2279 | | */ |
2280 | | static void |
2281 | 124 | fix_chain(dns_qpchain_t *chain, size_t offset) { |
2282 | 124 | while (chain->len > 0 && chain->chain[chain->len - 1].offset >= offset) |
2283 | 0 | { |
2284 | 0 | chain->len--; |
2285 | 0 | chain->chain[chain->len].node = NULL; |
2286 | 0 | chain->chain[chain->len].offset = 0; |
2287 | 0 | } |
2288 | 124 | } |
2289 | | |
2290 | | isc_result_t |
2291 | | dns_qp_lookup(dns_qpreadable_t qpr, const dns_name_t *name, |
2292 | | dns_namespace_t space, dns_name_t *foundname, dns_qpiter_t *iter, |
2293 | 497 | dns_qpchain_t *chain, void **pval_r, uint32_t *ival_r) { |
2294 | 497 | dns_qpreader_t *qp = dns_qpreader(qpr); |
2295 | 497 | dns_qpkey_t search, found; |
2296 | 497 | size_t searchlen, foundlen; |
2297 | 497 | size_t offset = 0; |
2298 | 497 | dns_qpnode_t *n = NULL; |
2299 | 497 | dns_qpshift_t bit = SHIFT_NOBYTE; |
2300 | 497 | dns_qpchain_t oc; |
2301 | 497 | dns_qpiter_t it; |
2302 | 497 | bool matched = false; |
2303 | 497 | bool setiter = true; |
2304 | | |
2305 | 497 | REQUIRE(QP_VALID(qp)); |
2306 | 497 | REQUIRE(foundname == NULL || ISC_MAGIC_VALID(name, DNS_NAME_MAGIC)); |
2307 | | |
2308 | 497 | searchlen = dns_qpkey_fromname(search, name, space); |
2309 | | |
2310 | 497 | if (chain == NULL) { |
2311 | 0 | chain = &oc; |
2312 | 0 | } |
2313 | 497 | if (iter == NULL) { |
2314 | 315 | iter = ⁢ |
2315 | 315 | setiter = false; |
2316 | 315 | } |
2317 | 497 | dns_qpchain_init(qp, chain); |
2318 | 497 | dns_qpiter_init(qp, iter); |
2319 | | |
2320 | 497 | n = get_root(qp); |
2321 | 497 | if (n == NULL) { |
2322 | 0 | return ISC_R_NOTFOUND; |
2323 | 0 | } |
2324 | 497 | iter->stack[0] = n; |
2325 | | |
2326 | | /* |
2327 | | * Like `dns_qp_insert()`, we must find a leaf. However, we don't make a |
2328 | | * second pass: instead, we keep track of any leaves with shorter keys |
2329 | | * that we discover along the way. (In general, qp-trie searches can be |
2330 | | * one-pass, by recording their traversal, or two-pass, for less stack |
2331 | | * memory usage.) |
2332 | | */ |
2333 | 679 | while (is_branch(n)) { |
2334 | 182 | prefetch_twigs(qp, n); |
2335 | | |
2336 | 182 | offset = branch_key_offset(n); |
2337 | 182 | bit = qpkey_bit(search, searchlen, offset); |
2338 | 182 | dns_qpnode_t *twigs = branch_twigs(qp, n); |
2339 | | |
2340 | | /* |
2341 | | * A shorter key that can be a parent domain always has a |
2342 | | * leaf node at SHIFT_NOBYTE (indicating end of its key) |
2343 | | * where our search key has a normal character immediately |
2344 | | * after a label separator. |
2345 | | * |
2346 | | * Note 1: It is OK if `off - 1` underflows: it will |
2347 | | * become SIZE_MAX, which is greater than `searchlen`, so |
2348 | | * `qpkey_bit()` will return SHIFT_NOBYTE, which is what we |
2349 | | * want when `off == 0`. |
2350 | | * |
2351 | | * Note 2: If SHIFT_NOBYTE twig is present, it will always |
2352 | | * be in position 0, the first location in 'twigs'. |
2353 | | */ |
2354 | 182 | if (bit != SHIFT_NOBYTE && branch_has_twig(n, SHIFT_NOBYTE) && |
2355 | 0 | qpkey_bit(search, searchlen, offset - 1) == SHIFT_NOBYTE && |
2356 | 0 | !is_branch(twigs)) |
2357 | 0 | { |
2358 | 0 | add_link(chain, twigs, offset); |
2359 | 0 | } |
2360 | | |
2361 | 182 | matched = branch_has_twig(n, bit); |
2362 | 182 | if (matched) { |
2363 | | /* |
2364 | | * found a match: if it's a branch, we keep |
2365 | | * searching, and if it's a leaf, we drop out of |
2366 | | * the loop. |
2367 | | */ |
2368 | 182 | n = branch_twig_ptr(qp, n, bit); |
2369 | 182 | } else { |
2370 | | /* |
2371 | | * this branch is a dead end, and the predecessor |
2372 | | * doesn't matter. now we just need to find a leaf |
2373 | | * to end on so that qpkey_leaf() will work below. |
2374 | | */ |
2375 | 0 | n = anyleaf(qp, twigs); |
2376 | 0 | } |
2377 | | |
2378 | 182 | iter->stack[++iter->sp] = n; |
2379 | 182 | } |
2380 | | |
2381 | 497 | if (setiter) { |
2382 | | /* |
2383 | | * we found a leaf, but it might not be the leaf we wanted. |
2384 | | * if it isn't, and if the caller passed us an iterator, |
2385 | | * then we might need to reposition it. |
2386 | | */ |
2387 | 182 | fix_iterator(qp, iter, search, searchlen); |
2388 | 182 | n = iter->stack[iter->sp]; |
2389 | 182 | } |
2390 | | |
2391 | | /* at this point, n can only be a leaf node */ |
2392 | 497 | INSIST(!is_branch(n)); |
2393 | | |
2394 | 497 | foundlen = leaf_qpkey(qp, n, found); |
2395 | 497 | offset = qpkey_compare(search, searchlen, found, foundlen); |
2396 | | |
2397 | | /* the search ended with an exact or partial match */ |
2398 | 497 | if (offset == QPKEY_EQUAL || offset == foundlen) { |
2399 | 364 | isc_result_t result = ISC_R_SUCCESS; |
2400 | | |
2401 | 364 | if (offset == foundlen) { |
2402 | 124 | fix_chain(chain, offset); |
2403 | 124 | result = DNS_R_PARTIALMATCH; |
2404 | 124 | } |
2405 | 364 | add_link(chain, n, offset); |
2406 | | |
2407 | 364 | SET_IF_NOT_NULL(pval_r, leaf_pval(n)); |
2408 | 364 | SET_IF_NOT_NULL(ival_r, leaf_ival(n)); |
2409 | 364 | maybe_set_name(qp, n, foundname); |
2410 | 364 | return result; |
2411 | 364 | } |
2412 | | |
2413 | | /* |
2414 | | * the requested name was not found, but if an ancestor |
2415 | | * was, we can retrieve that from the chain. |
2416 | | */ |
2417 | 133 | int len = chain->len; |
2418 | 133 | while (len-- > 0) { |
2419 | 0 | if (offset >= chain->chain[len].offset) { |
2420 | 0 | n = chain->chain[len].node; |
2421 | 0 | SET_IF_NOT_NULL(pval_r, leaf_pval(n)); |
2422 | 0 | SET_IF_NOT_NULL(ival_r, leaf_ival(n)); |
2423 | 0 | maybe_set_name(qp, n, foundname); |
2424 | 0 | return DNS_R_PARTIALMATCH; |
2425 | 0 | } else { |
2426 | | /* |
2427 | | * oops, during the search we found and added |
2428 | | * a leaf that's longer than the requested |
2429 | | * name; remove it from the chain. |
2430 | | */ |
2431 | 0 | chain->len--; |
2432 | 0 | } |
2433 | 0 | } |
2434 | | |
2435 | | /* nothing was found at all */ |
2436 | 133 | return ISC_R_NOTFOUND; |
2437 | 133 | } |
2438 | | |
2439 | | /**********************************************************************/ |