qp.c revision 1.3 1 1.3 christos /* $NetBSD: qp.c,v 1.3 2025/01/27 02:16:05 christos Exp $ */
2 1.1 christos
3 1.1 christos /*
4 1.1 christos * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
5 1.1 christos *
6 1.1 christos * SPDX-License-Identifier: MPL-2.0
7 1.1 christos *
8 1.1 christos * This Source Code Form is subject to the terms of the Mozilla Public
9 1.1 christos * License, v. 2.0. If a copy of the MPL was not distributed with this
10 1.1 christos * file, you can obtain one at https://mozilla.org/MPL/2.0/.
11 1.1 christos *
12 1.1 christos * See the COPYRIGHT file distributed with this work for additional
13 1.1 christos * information regarding copyright ownership.
14 1.1 christos */
15 1.1 christos
16 1.1 christos /*
17 1.1 christos * For an overview, see doc/design/qp-trie.md
18 1.1 christos */
19 1.1 christos
20 1.1 christos #include <inttypes.h>
21 1.1 christos #include <stdbool.h>
22 1.1 christos #include <stddef.h>
23 1.1 christos #include <stdint.h>
24 1.1 christos #include <string.h>
25 1.1 christos
26 1.1 christos #if FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
27 1.1 christos #include <sys/mman.h>
28 1.1 christos #include <unistd.h>
29 1.1 christos #endif
30 1.1 christos
31 1.1 christos #include <isc/atomic.h>
32 1.1 christos #include <isc/buffer.h>
33 1.1 christos #include <isc/magic.h>
34 1.1 christos #include <isc/mem.h>
35 1.1 christos #include <isc/mutex.h>
36 1.1 christos #include <isc/refcount.h>
37 1.1 christos #include <isc/result.h>
38 1.1 christos #include <isc/rwlock.h>
39 1.1 christos #include <isc/tid.h>
40 1.1 christos #include <isc/time.h>
41 1.1 christos #include <isc/types.h>
42 1.1 christos #include <isc/urcu.h>
43 1.1 christos #include <isc/util.h>
44 1.1 christos
45 1.1 christos #include <dns/fixedname.h>
46 1.1 christos #include <dns/log.h>
47 1.1 christos #include <dns/name.h>
48 1.1 christos #include <dns/qp.h>
49 1.1 christos #include <dns/types.h>
50 1.1 christos
51 1.1 christos #include "qp_p.h"
52 1.1 christos
53 1.1 christos #ifndef DNS_QP_LOG_STATS
54 1.1 christos #define DNS_QP_LOG_STATS 1
55 1.1 christos #endif
56 1.1 christos #ifndef DNS_QP_TRACE
57 1.1 christos #define DNS_QP_TRACE 0
58 1.1 christos #endif
59 1.1 christos
60 1.1 christos /*
61 1.1 christos * very basic garbage collector statistics
62 1.1 christos *
63 1.1 christos * XXXFANF for now we're logging GC times, but ideally we should
64 1.1 christos * accumulate stats more quietly and report via the statschannel
65 1.1 christos */
66 1.3 christos #ifdef _LP64
67 1.1 christos static atomic_uint_fast64_t compact_time;
68 1.1 christos static atomic_uint_fast64_t recycle_time;
69 1.1 christos static atomic_uint_fast64_t rollback_time;
70 1.3 christos #define ISC_QP_ADD(v, a) atomic_fetch_add_relaxed(&(v), (a))
71 1.3 christos #define ISC_QP_GET(v) atomic_load_relaxed(v)
72 1.3 christos #else
73 1.3 christos static uint64_t compact_time;
74 1.3 christos static uint64_t recycle_time;
75 1.3 christos static uint64_t rollback_time;
76 1.3 christos static isc_mutex_t qp_mutex = PTHREAD_MUTEX_INITIALIZER;
77 1.3 christos #define ISC_QP_ADD(v, a) \
78 1.3 christos ({ \
79 1.3 christos isc_mutex_lock(&qp_mutex); \
80 1.3 christos uint64_t x = (v) + (a); \
81 1.3 christos isc_mutex_unlock(&qp_mutex); \
82 1.3 christos x; \
83 1.3 christos })
84 1.3 christos #define ISC_QP_GET(v) \
85 1.3 christos ({ \
86 1.3 christos isc_mutex_lock(&qp_mutex); \
87 1.3 christos uint64_t x = (v); \
88 1.3 christos isc_mutex_unlock(&qp_mutex); \
89 1.3 christos x; \
90 1.3 christos })
91 1.3 christos #endif
92 1.3 christos
93 1.1 christos
94 1.1 christos /* for LOG_STATS() format strings */
95 1.1 christos #define PRItime " %" PRIu64 " ns "
96 1.1 christos
97 1.1 christos #if DNS_QP_LOG_STATS
98 1.1 christos #define LOG_STATS(...) \
99 1.1 christos isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_QP, \
100 1.1 christos ISC_LOG_DEBUG(1), __VA_ARGS__)
101 1.1 christos #else
102 1.1 christos #define LOG_STATS(...)
103 1.1 christos #endif
104 1.1 christos
105 1.1 christos #if DNS_QP_TRACE
106 1.1 christos /*
107 1.1 christos * TRACE is generally used in allocation-related functions so it doesn't
108 1.1 christos * trace very high-frequency ops
109 1.1 christos */
110 1.1 christos #define TRACE(fmt, ...) \
111 1.1 christos do { \
112 1.1 christos if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(7))) { \
113 1.1 christos isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, \
114 1.1 christos DNS_LOGMODULE_QP, ISC_LOG_DEBUG(7), \
115 1.1 christos "%s:%d:%s(qp %p uctx \"%s\"):t%u: " fmt, \
116 1.1 christos __FILE__, __LINE__, __func__, qp, \
117 1.1 christos qp ? TRIENAME(qp) : "(null)", isc_tid(), \
118 1.1 christos ##__VA_ARGS__); \
119 1.1 christos } \
120 1.1 christos } while (0)
121 1.1 christos #else
122 1.1 christos #define TRACE(...)
123 1.1 christos #endif
124 1.1 christos
125 1.1 christos /***********************************************************************
126 1.1 christos *
127 1.1 christos * converting DNS names to trie keys
128 1.1 christos */
129 1.1 christos
130 1.1 christos /*
131 1.1 christos * Number of distinct byte values, i.e. 256
132 1.1 christos */
133 1.1 christos #define BYTE_VALUES (UINT8_MAX + 1)
134 1.1 christos
135 1.1 christos /*
136 1.1 christos * Lookup table mapping bytes in DNS names to bit positions, used
137 1.1 christos * by dns_qpkey_fromname() to convert DNS names to qp-trie keys.
138 1.1 christos *
139 1.1 christos * Each element holds one or two bit positions, bit_one in the
140 1.1 christos * lower half and bit_two in the upper half.
141 1.1 christos *
142 1.1 christos * For common hostname characters, bit_two is zero (which cannot
143 1.1 christos * be a valid bit position).
144 1.1 christos *
145 1.1 christos * For others, bit_one is the escape bit, and bit_two is the
146 1.1 christos * position of the character within the escaped range.
147 1.1 christos */
148 1.1 christos uint16_t dns_qp_bits_for_byte[BYTE_VALUES] = { 0 };
149 1.1 christos
150 1.1 christos /*
151 1.1 christos * And the reverse, mapping bit positions to characters, so the tests
152 1.1 christos * can print diagnostics involving qp-trie keys.
153 1.1 christos *
154 1.1 christos * This table only handles the first bit in an escape sequence; we
155 1.1 christos * arrange that we can calculate the byte value for both bits by
156 1.1 christos * adding the the second bit to the first bit's byte value.
157 1.1 christos */
158 1.1 christos uint8_t dns_qp_byte_for_bit[SHIFT_OFFSET] = { 0 };
159 1.1 christos
160 1.1 christos /*
161 1.1 christos * Fill in the lookup tables at program startup. (It doesn't matter
162 1.1 christos * when this is initialized relative to other startup code.)
163 1.1 christos */
164 1.1 christos static void
165 1.1 christos initialize_bits_for_byte(void) ISC_CONSTRUCTOR;
166 1.1 christos
167 1.1 christos /*
168 1.1 christos * The bit positions for bytes inside labels have to be between
169 1.1 christos * SHIFT_BITMAP and SHIFT_OFFSET. (SHIFT_NOBYTE separates labels.)
170 1.1 christos *
171 1.1 christos * Each byte range in between common hostname characters has a different
172 1.1 christos * escape character, to preserve the correct lexical order.
173 1.1 christos *
174 1.1 christos * Escaped byte ranges mostly fit into the space available in the
175 1.1 christos * bitmap, except for those above 'z' (which is mostly bytes with the
176 1.1 christos * top bit set). So, when we reach the end of the bitmap we roll over
177 1.1 christos * to the next escape character.
178 1.1 christos *
179 1.1 christos * After filling the table we ensure that the bit positions for
180 1.1 christos * hostname characters and escape characters all fit.
181 1.1 christos */
182 1.1 christos static void
183 1.1 christos initialize_bits_for_byte(void) {
184 1.1 christos /* zero common character marker not a valid shift position */
185 1.1 christos INSIST(0 < SHIFT_BITMAP);
186 1.1 christos /* first bit is common byte or escape byte */
187 1.1 christos dns_qpshift_t bit_one = SHIFT_BITMAP;
188 1.1 christos /* second bit is position in escaped range */
189 1.1 christos dns_qpshift_t bit_two = SHIFT_BITMAP;
190 1.1 christos bool escaping = true;
191 1.1 christos
192 1.1 christos for (unsigned int byte = 0; byte < BYTE_VALUES; byte++) {
193 1.1 christos if (qp_common_character(byte)) {
194 1.1 christos escaping = false;
195 1.1 christos bit_one++;
196 1.1 christos dns_qp_byte_for_bit[bit_one] = byte;
197 1.1 christos dns_qp_bits_for_byte[byte] = bit_one;
198 1.1 christos } else if ('A' <= byte && byte <= 'Z') {
199 1.1 christos /* map upper case to lower case */
200 1.1 christos dns_qpshift_t after_esc = bit_one + 1;
201 1.1 christos dns_qpshift_t skip_punct = 'a' - '_';
202 1.1 christos dns_qpshift_t letter = byte - 'A';
203 1.1 christos dns_qpshift_t bit = after_esc + skip_punct + letter;
204 1.1 christos dns_qp_bits_for_byte[byte] = bit;
205 1.1 christos /* to simplify reverse conversion */
206 1.1 christos bit_two++;
207 1.1 christos } else {
208 1.1 christos /* non-hostname characters need to be escaped */
209 1.1 christos if (!escaping || bit_two >= SHIFT_OFFSET) {
210 1.1 christos escaping = true;
211 1.1 christos bit_one++;
212 1.1 christos dns_qp_byte_for_bit[bit_one] = byte;
213 1.1 christos bit_two = SHIFT_BITMAP;
214 1.1 christos }
215 1.1 christos dns_qp_bits_for_byte[byte] = bit_two << 8 | bit_one;
216 1.1 christos bit_two++;
217 1.1 christos }
218 1.1 christos }
219 1.1 christos ENSURE(bit_one < SHIFT_OFFSET);
220 1.1 christos }
221 1.1 christos
222 1.1 christos /*
223 1.1 christos * Convert a DNS name into a trie lookup key.
224 1.1 christos *
225 1.1 christos * Returns the length of the key.
226 1.1 christos *
227 1.1 christos * For performance we get our hands dirty in the guts of the name.
228 1.1 christos *
229 1.1 christos * We don't worry about the distinction between absolute and relative
230 1.1 christos * names. When the trie is only used with absolute names, the first byte
231 1.1 christos * of the key will always be SHIFT_NOBYTE and it will always be skipped
232 1.1 christos * when traversing the trie. So keeping the root label costs little, and
233 1.1 christos * it allows us to support tries of relative names too. In fact absolute
234 1.1 christos * and relative names can be mixed in the same trie without causing
235 1.1 christos * confusion, because the presence or absence of the initial
236 1.1 christos * SHIFT_NOBYTE in the key disambiguates them (exactly like a trailing
237 1.1 christos * dot in a zone file).
238 1.1 christos */
239 1.1 christos size_t
240 1.1 christos dns_qpkey_fromname(dns_qpkey_t key, const dns_name_t *name) {
241 1.1 christos size_t len, label;
242 1.1 christos dns_fixedname_t fixed;
243 1.1 christos
244 1.1 christos REQUIRE(ISC_MAGIC_VALID(name, DNS_NAME_MAGIC));
245 1.1 christos
246 1.1 christos if (name->labels == 0) {
247 1.1 christos key[0] = SHIFT_NOBYTE;
248 1.1 christos return 0;
249 1.1 christos }
250 1.1 christos
251 1.1 christos if (name->offsets == NULL) {
252 1.1 christos dns_name_t *clone = dns_fixedname_initname(&fixed);
253 1.1 christos dns_name_clone(name, clone);
254 1.1 christos name = clone;
255 1.1 christos }
256 1.1 christos
257 1.1 christos len = 0;
258 1.1 christos label = name->labels;
259 1.1 christos while (label-- > 0) {
260 1.1 christos const uint8_t *ldata = name->ndata + name->offsets[label];
261 1.1 christos size_t label_len = *ldata++;
262 1.1 christos while (label_len-- > 0) {
263 1.1 christos uint16_t bits = dns_qp_bits_for_byte[*ldata++];
264 1.1 christos key[len++] = bits & 0xFF; /* bit_one */
265 1.1 christos if ((bits >> 8) != 0) { /* escape? */
266 1.1 christos key[len++] = bits >> 8; /* bit_two */
267 1.1 christos }
268 1.1 christos }
269 1.1 christos /* label terminator */
270 1.1 christos key[len++] = SHIFT_NOBYTE;
271 1.1 christos }
272 1.1 christos /* mark end with a double NOBYTE */
273 1.1 christos key[len] = SHIFT_NOBYTE;
274 1.1 christos ENSURE(len < sizeof(dns_qpkey_t));
275 1.1 christos return len;
276 1.1 christos }
277 1.1 christos
278 1.1 christos void
279 1.1 christos dns_qpkey_toname(const dns_qpkey_t key, size_t keylen, dns_name_t *name) {
280 1.1 christos size_t locs[DNS_NAME_MAXLABELS];
281 1.1 christos size_t loc = 0, opos = 0;
282 1.1 christos size_t offset;
283 1.1 christos
284 1.1 christos REQUIRE(ISC_MAGIC_VALID(name, DNS_NAME_MAGIC));
285 1.1 christos REQUIRE(name->buffer != NULL);
286 1.1 christos REQUIRE(name->offsets != NULL);
287 1.1 christos
288 1.1 christos dns_name_reset(name);
289 1.1 christos
290 1.1 christos if (keylen == 0) {
291 1.1 christos return;
292 1.1 christos }
293 1.1 christos
294 1.1 christos /* Scan the key looking for label boundaries */
295 1.1 christos for (offset = 0; offset <= keylen; offset++) {
296 1.1 christos INSIST(key[offset] >= SHIFT_NOBYTE &&
297 1.1 christos key[offset] < SHIFT_OFFSET);
298 1.1 christos INSIST(loc < DNS_NAME_MAXLABELS);
299 1.1 christos if (qpkey_bit(key, keylen, offset) == SHIFT_NOBYTE) {
300 1.1 christos if (qpkey_bit(key, keylen, offset + 1) == SHIFT_NOBYTE)
301 1.1 christos {
302 1.1 christos locs[loc] = offset + 1;
303 1.1 christos goto scanned;
304 1.1 christos }
305 1.1 christos locs[loc++] = offset + 1;
306 1.1 christos } else if (offset == 0) {
307 1.1 christos /* This happens for a relative name */
308 1.1 christos locs[loc++] = offset;
309 1.1 christos }
310 1.1 christos }
311 1.1 christos UNREACHABLE();
312 1.1 christos scanned:
313 1.1 christos
314 1.1 christos /*
315 1.1 christos * In the key the labels are encoded in reverse order, so
316 1.1 christos * we step backward through the label boundaries, then forward
317 1.1 christos * through the labels, to create the DNS wire format data.
318 1.1 christos */
319 1.1 christos name->labels = loc;
320 1.1 christos while (loc-- > 0) {
321 1.1 christos uint8_t len = 0, *lenp = NULL;
322 1.1 christos
323 1.1 christos /* Add a length byte to the name data and set an offset */
324 1.1 christos lenp = isc_buffer_used(name->buffer);
325 1.1 christos isc_buffer_putuint8(name->buffer, 0);
326 1.1 christos name->offsets[opos++] = name->length++;
327 1.1 christos
328 1.1 christos /* Convert from escaped byte ranges to ASCII */
329 1.1 christos for (offset = locs[loc]; offset < locs[loc + 1] - 1; offset++) {
330 1.1 christos uint8_t bit = qpkey_bit(key, keylen, offset);
331 1.1 christos uint8_t byte = dns_qp_byte_for_bit[bit];
332 1.1 christos if (qp_common_character(byte)) {
333 1.1 christos isc_buffer_putuint8(name->buffer, byte);
334 1.1 christos } else {
335 1.1 christos byte += key[++offset] - SHIFT_BITMAP;
336 1.1 christos isc_buffer_putuint8(name->buffer, byte);
337 1.1 christos }
338 1.1 christos len++;
339 1.1 christos }
340 1.1 christos
341 1.1 christos name->length += len;
342 1.1 christos *lenp = len;
343 1.1 christos }
344 1.1 christos
345 1.1 christos /* Add a root label for absolute names */
346 1.1 christos if (key[0] == SHIFT_NOBYTE) {
347 1.1 christos name->attributes.absolute = true;
348 1.1 christos isc_buffer_putuint8(name->buffer, 0);
349 1.1 christos name->offsets[opos++] = name->length++;
350 1.1 christos name->labels++;
351 1.1 christos }
352 1.1 christos
353 1.1 christos name->ndata = isc_buffer_base(name->buffer);
354 1.1 christos }
355 1.1 christos
356 1.1 christos /*
357 1.1 christos * Sentinel value for equal keys
358 1.1 christos */
359 1.1 christos #define QPKEY_EQUAL (~(size_t)0)
360 1.1 christos
361 1.1 christos /*
362 1.1 christos * Compare two keys and return the offset where they differ.
363 1.1 christos *
364 1.1 christos * This offset is used to work out where a trie search diverged: when one
365 1.1 christos * of the keys is in the trie and one is not, the common prefix (up to the
366 1.1 christos * offset) is the part of the unknown key that exists in the trie. This
367 1.1 christos * matters for adding new keys or finding neighbours of missing keys.
368 1.1 christos *
369 1.1 christos * When the keys are different lengths it is possible (but unwise) for
370 1.1 christos * the longer key to be the same as the shorter key but with superfluous
371 1.1 christos * trailing SHIFT_NOBYTE elements. This makes the keys equal for the
372 1.1 christos * purpose of traversing the trie.
373 1.1 christos */
374 1.1 christos static size_t
375 1.1 christos qpkey_compare(const dns_qpkey_t key_a, const size_t keylen_a,
376 1.1 christos const dns_qpkey_t key_b, const size_t keylen_b) {
377 1.1 christos size_t keylen = ISC_MAX(keylen_a, keylen_b);
378 1.1 christos for (size_t offset = 0; offset < keylen; offset++) {
379 1.1 christos if (qpkey_bit(key_a, keylen_a, offset) !=
380 1.1 christos qpkey_bit(key_b, keylen_b, offset))
381 1.1 christos {
382 1.1 christos return offset;
383 1.1 christos }
384 1.1 christos }
385 1.1 christos return QPKEY_EQUAL;
386 1.1 christos }
387 1.1 christos
388 1.1 christos /***********************************************************************
389 1.1 christos *
390 1.1 christos * allocator wrappers
391 1.1 christos */
392 1.1 christos
393 1.1 christos #if FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
394 1.1 christos
395 1.1 christos /*
396 1.1 christos * Optionally (for debugging) during a copy-on-write transaction, use
397 1.1 christos * memory protection to ensure that the shared chunks are not modified.
398 1.1 christos * Once a chunk becomes shared, it remains read-only until it is freed.
399 1.1 christos * POSIX says we have to use mmap() to get an allocation that we can
400 1.1 christos * definitely pass to mprotect().
401 1.1 christos */
402 1.1 christos
403 1.1 christos static size_t
404 1.1 christos chunk_size_raw(void) {
405 1.1 christos size_t size = (size_t)sysconf(_SC_PAGE_SIZE);
406 1.1 christos return ISC_MAX(size, QP_CHUNK_BYTES);
407 1.1 christos }
408 1.1 christos
409 1.1 christos static void *
410 1.1 christos chunk_get_raw(dns_qp_t *qp) {
411 1.1 christos if (qp->write_protect) {
412 1.1 christos size_t size = chunk_size_raw();
413 1.1 christos void *ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
414 1.1 christos MAP_ANON | MAP_PRIVATE, -1, 0);
415 1.1 christos RUNTIME_CHECK(ptr != MAP_FAILED);
416 1.1 christos return ptr;
417 1.1 christos } else {
418 1.1 christos return isc_mem_allocate(qp->mctx, QP_CHUNK_BYTES);
419 1.1 christos }
420 1.1 christos }
421 1.1 christos
422 1.1 christos static void
423 1.1 christos chunk_free_raw(dns_qp_t *qp, void *ptr) {
424 1.1 christos if (qp->write_protect) {
425 1.1 christos RUNTIME_CHECK(munmap(ptr, chunk_size_raw()) == 0);
426 1.1 christos } else {
427 1.1 christos isc_mem_free(qp->mctx, ptr);
428 1.1 christos }
429 1.1 christos }
430 1.1 christos
431 1.1 christos static void *
432 1.1 christos chunk_shrink_raw(dns_qp_t *qp, void *ptr, size_t bytes) {
433 1.1 christos if (qp->write_protect) {
434 1.1 christos return ptr;
435 1.1 christos } else {
436 1.1 christos return isc_mem_reallocate(qp->mctx, ptr, bytes);
437 1.1 christos }
438 1.1 christos }
439 1.1 christos
440 1.1 christos static void
441 1.1 christos write_protect(dns_qp_t *qp, dns_qpchunk_t chunk) {
442 1.1 christos if (qp->write_protect) {
443 1.1 christos /* see transaction_open() wrt this special case */
444 1.1 christos if (qp->transaction_mode == QP_WRITE && chunk == qp->bump) {
445 1.1 christos return;
446 1.1 christos }
447 1.1 christos TRACE("chunk %u", chunk);
448 1.1 christos void *ptr = qp->base->ptr[chunk];
449 1.1 christos size_t size = chunk_size_raw();
450 1.1 christos RUNTIME_CHECK(mprotect(ptr, size, PROT_READ) >= 0);
451 1.1 christos }
452 1.1 christos }
453 1.1 christos
454 1.1 christos #else
455 1.1 christos
456 1.1 christos #define chunk_get_raw(qp) isc_mem_allocate(qp->mctx, QP_CHUNK_BYTES)
457 1.1 christos #define chunk_free_raw(qp, ptr) isc_mem_free(qp->mctx, ptr)
458 1.1 christos
459 1.1 christos #define chunk_shrink_raw(qp, ptr, size) isc_mem_reallocate(qp->mctx, ptr, size)
460 1.1 christos
461 1.1 christos #define write_protect(qp, chunk)
462 1.1 christos
463 1.1 christos #endif
464 1.1 christos
465 1.1 christos /***********************************************************************
466 1.1 christos *
467 1.1 christos * allocator
468 1.1 christos */
469 1.1 christos
470 1.1 christos /*
471 1.1 christos * When we reuse the bump chunk across multiple write transactions,
472 1.1 christos * it can have an immutable prefix and a mutable suffix.
473 1.1 christos */
474 1.1 christos static inline bool
475 1.1 christos cells_immutable(dns_qp_t *qp, dns_qpref_t ref) {
476 1.1 christos dns_qpchunk_t chunk = ref_chunk(ref);
477 1.1 christos dns_qpcell_t cell = ref_cell(ref);
478 1.1 christos if (chunk == qp->bump) {
479 1.1 christos return cell < qp->fender;
480 1.1 christos } else {
481 1.1 christos return qp->usage[chunk].immutable;
482 1.1 christos }
483 1.1 christos }
484 1.1 christos
485 1.1 christos /*
486 1.1 christos * Create a fresh bump chunk and allocate some twigs from it.
487 1.1 christos */
488 1.1 christos static dns_qpref_t
489 1.1 christos chunk_alloc(dns_qp_t *qp, dns_qpchunk_t chunk, dns_qpweight_t size) {
490 1.1 christos INSIST(qp->base->ptr[chunk] == NULL);
491 1.1 christos INSIST(qp->usage[chunk].used == 0);
492 1.1 christos INSIST(qp->usage[chunk].free == 0);
493 1.1 christos
494 1.1 christos qp->base->ptr[chunk] = chunk_get_raw(qp);
495 1.1 christos qp->usage[chunk] = (qp_usage_t){ .exists = true, .used = size };
496 1.1 christos qp->used_count += size;
497 1.1 christos qp->bump = chunk;
498 1.1 christos qp->fender = 0;
499 1.1 christos
500 1.1 christos if (qp->write_protect) {
501 1.1 christos TRACE("chunk %u base %p", chunk, qp->base->ptr[chunk]);
502 1.1 christos }
503 1.1 christos return make_ref(chunk, 0);
504 1.1 christos }
505 1.1 christos
506 1.1 christos /*
507 1.1 christos * This is used to grow the chunk arrays when they fill up. If the old
508 1.1 christos * base array is in use by readers, we must make a clone, otherwise we
509 1.1 christos * can reallocate in place.
510 1.1 christos *
511 1.1 christos * The isc_refcount_init() and qpbase_unref() in this function are a pair.
512 1.1 christos */
513 1.1 christos static void
514 1.1 christos realloc_chunk_arrays(dns_qp_t *qp, dns_qpchunk_t newmax) {
515 1.1 christos size_t oldptrs = sizeof(qp->base->ptr[0]) * qp->chunk_max;
516 1.1 christos size_t newptrs = sizeof(qp->base->ptr[0]) * newmax;
517 1.1 christos size_t size = STRUCT_FLEX_SIZE(qp->base, ptr, newmax);
518 1.1 christos
519 1.1 christos if (qp->base == NULL || qpbase_unref(qp)) {
520 1.1 christos qp->base = isc_mem_reallocate(qp->mctx, qp->base, size);
521 1.1 christos } else {
522 1.1 christos dns_qpbase_t *oldbase = qp->base;
523 1.1 christos qp->base = isc_mem_allocate(qp->mctx, size);
524 1.1 christos memmove(&qp->base->ptr[0], &oldbase->ptr[0], oldptrs);
525 1.1 christos }
526 1.1 christos memset(&qp->base->ptr[qp->chunk_max], 0, newptrs - oldptrs);
527 1.1 christos isc_refcount_init(&qp->base->refcount, 1);
528 1.1 christos qp->base->magic = QPBASE_MAGIC;
529 1.1 christos
530 1.1 christos /* usage array is exclusive to the writer */
531 1.1 christos size_t oldusage = sizeof(qp->usage[0]) * qp->chunk_max;
532 1.1 christos size_t newusage = sizeof(qp->usage[0]) * newmax;
533 1.1 christos qp->usage = isc_mem_reallocate(qp->mctx, qp->usage, newusage);
534 1.1 christos memset(&qp->usage[qp->chunk_max], 0, newusage - oldusage);
535 1.1 christos
536 1.1 christos qp->chunk_max = newmax;
537 1.1 christos
538 1.1 christos TRACE("qpbase %p usage %p max %u", qp->base, qp->usage, qp->chunk_max);
539 1.1 christos }
540 1.1 christos
541 1.1 christos /*
542 1.1 christos * There was no space in the bump chunk, so find a place to put a fresh
543 1.1 christos * chunk in the chunk arrays, then allocate some twigs from it.
544 1.1 christos */
545 1.1 christos static dns_qpref_t
546 1.1 christos alloc_slow(dns_qp_t *qp, dns_qpweight_t size) {
547 1.1 christos dns_qpchunk_t chunk;
548 1.1 christos
549 1.1 christos for (chunk = 0; chunk < qp->chunk_max; chunk++) {
550 1.1 christos if (!qp->usage[chunk].exists) {
551 1.1 christos return chunk_alloc(qp, chunk, size);
552 1.1 christos }
553 1.1 christos }
554 1.1 christos ENSURE(chunk == qp->chunk_max);
555 1.1 christos realloc_chunk_arrays(qp, GROWTH_FACTOR(chunk));
556 1.1 christos return chunk_alloc(qp, chunk, size);
557 1.1 christos }
558 1.1 christos
559 1.1 christos /*
560 1.1 christos * Ensure we are using a fresh bump chunk.
561 1.1 christos */
562 1.1 christos static void
563 1.1 christos alloc_reset(dns_qp_t *qp) {
564 1.1 christos (void)alloc_slow(qp, 0);
565 1.1 christos }
566 1.1 christos
567 1.1 christos /*
568 1.1 christos * Allocate some fresh twigs. This is the bump allocator fast path.
569 1.1 christos */
570 1.1 christos static inline dns_qpref_t
571 1.1 christos alloc_twigs(dns_qp_t *qp, dns_qpweight_t size) {
572 1.1 christos dns_qpchunk_t chunk = qp->bump;
573 1.1 christos dns_qpcell_t cell = qp->usage[chunk].used;
574 1.1 christos
575 1.1 christos if (cell + size <= QP_CHUNK_SIZE) {
576 1.1 christos qp->usage[chunk].used += size;
577 1.1 christos qp->used_count += size;
578 1.1 christos return make_ref(chunk, cell);
579 1.1 christos } else {
580 1.1 christos return alloc_slow(qp, size);
581 1.1 christos }
582 1.1 christos }
583 1.1 christos
584 1.1 christos /*
585 1.1 christos * Record that some twigs are no longer being used, and if possible
586 1.1 christos * zero them to ensure that there isn't a spurious double detach when
587 1.1 christos * the chunk is later recycled.
588 1.1 christos *
589 1.1 christos * Returns true if the twigs were immediately destroyed.
590 1.1 christos *
591 1.1 christos * NOTE: the caller is responsible for attaching or detaching any
592 1.1 christos * leaves as required.
593 1.1 christos */
594 1.1 christos static inline bool
595 1.1 christos free_twigs(dns_qp_t *qp, dns_qpref_t twigs, dns_qpweight_t size) {
596 1.1 christos dns_qpchunk_t chunk = ref_chunk(twigs);
597 1.1 christos
598 1.1 christos qp->free_count += size;
599 1.1 christos qp->usage[chunk].free += size;
600 1.1 christos ENSURE(qp->free_count <= qp->used_count);
601 1.1 christos ENSURE(qp->usage[chunk].free <= qp->usage[chunk].used);
602 1.1 christos
603 1.1 christos if (cells_immutable(qp, twigs)) {
604 1.1 christos qp->hold_count += size;
605 1.1 christos ENSURE(qp->free_count >= qp->hold_count);
606 1.1 christos return false;
607 1.1 christos } else {
608 1.1 christos zero_twigs(ref_ptr(qp, twigs), size);
609 1.1 christos return true;
610 1.1 christos }
611 1.1 christos }
612 1.1 christos
613 1.1 christos /*
614 1.1 christos * When some twigs have been copied, and free_twigs() could not
615 1.1 christos * immediately destroy the old copy, we need to update the refcount
616 1.1 christos * on any leaves that were duplicated.
617 1.1 christos */
618 1.1 christos static void
619 1.1 christos attach_twigs(dns_qp_t *qp, dns_qpnode_t *twigs, dns_qpweight_t size) {
620 1.1 christos for (dns_qpweight_t pos = 0; pos < size; pos++) {
621 1.1 christos if (node_tag(&twigs[pos]) == LEAF_TAG) {
622 1.1 christos attach_leaf(qp, &twigs[pos]);
623 1.1 christos }
624 1.1 christos }
625 1.1 christos }
626 1.1 christos
627 1.1 christos /***********************************************************************
628 1.1 christos *
629 1.1 christos * chunk reclamation
630 1.1 christos */
631 1.1 christos
632 1.1 christos /*
633 1.1 christos * Is any of this chunk still in use?
634 1.1 christos */
635 1.1 christos static inline dns_qpcell_t
636 1.1 christos chunk_usage(dns_qp_t *qp, dns_qpchunk_t chunk) {
637 1.1 christos return qp->usage[chunk].used - qp->usage[chunk].free;
638 1.1 christos }
639 1.1 christos
640 1.1 christos /*
641 1.1 christos * We remove each empty chunk from the total counts when the chunk is
642 1.1 christos * freed, or when it is scheduled for safe memory reclamation. We check
643 1.1 christos * the chunk's phase to avoid discounting it twice in the latter case.
644 1.1 christos */
645 1.1 christos static void
646 1.1 christos chunk_discount(dns_qp_t *qp, dns_qpchunk_t chunk) {
647 1.1 christos if (qp->usage[chunk].discounted) {
648 1.1 christos return;
649 1.1 christos }
650 1.1 christos INSIST(qp->used_count >= qp->usage[chunk].used);
651 1.1 christos INSIST(qp->free_count >= qp->usage[chunk].free);
652 1.1 christos qp->used_count -= qp->usage[chunk].used;
653 1.1 christos qp->free_count -= qp->usage[chunk].free;
654 1.1 christos qp->usage[chunk].discounted = true;
655 1.1 christos }
656 1.1 christos
657 1.1 christos /*
658 1.1 christos * When a chunk is being recycled, we need to detach any leaves that
659 1.1 christos * remain, and free any `base` arrays that have been marked as unused.
660 1.1 christos */
661 1.1 christos static void
662 1.1 christos chunk_free(dns_qp_t *qp, dns_qpchunk_t chunk) {
663 1.1 christos if (qp->write_protect) {
664 1.1 christos TRACE("chunk %u base %p", chunk, qp->base->ptr[chunk]);
665 1.1 christos }
666 1.1 christos
667 1.1 christos dns_qpnode_t *n = qp->base->ptr[chunk];
668 1.1 christos for (dns_qpcell_t count = qp->usage[chunk].used; count > 0;
669 1.1 christos count--, n++)
670 1.1 christos {
671 1.1 christos if (node_tag(n) == LEAF_TAG && node_pointer(n) != NULL) {
672 1.1 christos detach_leaf(qp, n);
673 1.1 christos } else if (count > 1 && reader_valid(n)) {
674 1.1 christos dns_qpreader_t qpr;
675 1.1 christos unpack_reader(&qpr, n);
676 1.1 christos /* pairs with dns_qpmulti_commit() */
677 1.1 christos if (qpbase_unref(&qpr)) {
678 1.1 christos isc_mem_free(qp->mctx, qpr.base);
679 1.1 christos }
680 1.1 christos }
681 1.1 christos }
682 1.1 christos chunk_discount(qp, chunk);
683 1.1 christos chunk_free_raw(qp, qp->base->ptr[chunk]);
684 1.1 christos qp->base->ptr[chunk] = NULL;
685 1.1 christos qp->usage[chunk] = (qp_usage_t){};
686 1.1 christos }
687 1.1 christos
688 1.1 christos /*
689 1.1 christos * Free any chunks that we can while a trie is in use.
690 1.1 christos */
691 1.1 christos static void
692 1.1 christos recycle(dns_qp_t *qp) {
693 1.1 christos unsigned int free = 0;
694 1.1 christos
695 1.1 christos isc_nanosecs_t start = isc_time_monotonic();
696 1.1 christos
697 1.1 christos for (dns_qpchunk_t chunk = 0; chunk < qp->chunk_max; chunk++) {
698 1.1 christos if (chunk != qp->bump && chunk_usage(qp, chunk) == 0 &&
699 1.1 christos qp->usage[chunk].exists && !qp->usage[chunk].immutable)
700 1.1 christos {
701 1.1 christos chunk_free(qp, chunk);
702 1.1 christos free++;
703 1.1 christos }
704 1.1 christos }
705 1.1 christos
706 1.1 christos isc_nanosecs_t time = isc_time_monotonic() - start;
707 1.3 christos ISC_QP_ADD(recycle_time, time);
708 1.1 christos
709 1.1 christos if (free > 0) {
710 1.1 christos LOG_STATS("qp recycle" PRItime "free %u chunks", time, free);
711 1.1 christos LOG_STATS("qp recycle leaf %u live %u used %u free %u hold %u",
712 1.1 christos qp->leaf_count, qp->used_count - qp->free_count,
713 1.1 christos qp->used_count, qp->free_count, qp->hold_count);
714 1.1 christos }
715 1.1 christos }
716 1.1 christos
717 1.1 christos /*
718 1.1 christos * asynchronous cleanup
719 1.1 christos */
720 1.1 christos static void
721 1.1 christos reclaim_chunks_cb(struct rcu_head *arg) {
722 1.1 christos qp_rcuctx_t *rcuctx = caa_container_of(arg, qp_rcuctx_t, rcu_head);
723 1.1 christos REQUIRE(QPRCU_VALID(rcuctx));
724 1.1 christos dns_qpmulti_t *multi = rcuctx->multi;
725 1.1 christos REQUIRE(QPMULTI_VALID(multi));
726 1.1 christos
727 1.1 christos LOCK(&multi->mutex);
728 1.1 christos
729 1.1 christos dns_qp_t *qp = &multi->writer;
730 1.1 christos REQUIRE(QP_VALID(qp));
731 1.1 christos
732 1.1 christos unsigned int free = 0;
733 1.1 christos isc_nanosecs_t start = isc_time_monotonic();
734 1.1 christos
735 1.1 christos for (unsigned int i = 0; i < rcuctx->count; i++) {
736 1.1 christos dns_qpchunk_t chunk = rcuctx->chunk[i];
737 1.1 christos if (qp->usage[chunk].snapshot) {
738 1.1 christos /* cleanup when snapshot is destroyed */
739 1.1 christos qp->usage[chunk].snapfree = true;
740 1.1 christos } else {
741 1.1 christos chunk_free(qp, chunk);
742 1.1 christos free++;
743 1.1 christos }
744 1.1 christos }
745 1.1 christos
746 1.1 christos isc_mem_putanddetach(&rcuctx->mctx, rcuctx,
747 1.1 christos STRUCT_FLEX_SIZE(rcuctx, chunk, rcuctx->count));
748 1.1 christos
749 1.1 christos isc_nanosecs_t time = isc_time_monotonic() - start;
750 1.3 christos ISC_QP_ADD(recycle_time, time);
751 1.1 christos
752 1.1 christos if (free > 0) {
753 1.1 christos LOG_STATS("qp reclaim" PRItime "free %u chunks", time, free);
754 1.1 christos LOG_STATS("qp reclaim leaf %u live %u used %u free %u hold %u",
755 1.1 christos qp->leaf_count, qp->used_count - qp->free_count,
756 1.1 christos qp->used_count, qp->free_count, qp->hold_count);
757 1.1 christos }
758 1.1 christos
759 1.1 christos UNLOCK(&multi->mutex);
760 1.1 christos }
761 1.1 christos
762 1.1 christos /*
763 1.1 christos * At the end of a transaction, schedule empty but immutable chunks
764 1.1 christos * for reclamation later.
765 1.1 christos */
766 1.1 christos static void
767 1.1 christos reclaim_chunks(dns_qpmulti_t *multi) {
768 1.1 christos dns_qp_t *qp = &multi->writer;
769 1.1 christos
770 1.1 christos unsigned int count = 0;
771 1.1 christos for (dns_qpchunk_t chunk = 0; chunk < qp->chunk_max; chunk++) {
772 1.1 christos if (chunk != qp->bump && chunk_usage(qp, chunk) == 0 &&
773 1.1 christos qp->usage[chunk].exists && qp->usage[chunk].immutable &&
774 1.1 christos !qp->usage[chunk].discounted)
775 1.1 christos {
776 1.1 christos count++;
777 1.1 christos }
778 1.1 christos }
779 1.1 christos
780 1.1 christos if (count == 0) {
781 1.1 christos return;
782 1.1 christos }
783 1.1 christos
784 1.1 christos qp_rcuctx_t *rcuctx =
785 1.1 christos isc_mem_get(qp->mctx, STRUCT_FLEX_SIZE(rcuctx, chunk, count));
786 1.1 christos *rcuctx = (qp_rcuctx_t){
787 1.1 christos .magic = QPRCU_MAGIC,
788 1.1 christos .multi = multi,
789 1.1 christos .count = count,
790 1.1 christos };
791 1.1 christos isc_mem_attach(qp->mctx, &rcuctx->mctx);
792 1.1 christos
793 1.1 christos unsigned int i = 0;
794 1.1 christos for (dns_qpchunk_t chunk = 0; chunk < qp->chunk_max; chunk++) {
795 1.1 christos if (chunk != qp->bump && chunk_usage(qp, chunk) == 0 &&
796 1.1 christos qp->usage[chunk].exists && qp->usage[chunk].immutable &&
797 1.1 christos !qp->usage[chunk].discounted)
798 1.1 christos {
799 1.1 christos rcuctx->chunk[i++] = chunk;
800 1.1 christos chunk_discount(qp, chunk);
801 1.1 christos }
802 1.1 christos }
803 1.1 christos
804 1.1 christos call_rcu(&rcuctx->rcu_head, reclaim_chunks_cb);
805 1.1 christos
806 1.1 christos LOG_STATS("qp will reclaim %u chunks", count);
807 1.1 christos }
808 1.1 christos
809 1.1 christos /*
810 1.1 christos * When a snapshot is destroyed, clean up chunks that need free()ing
811 1.1 christos * and are not used by any remaining snapshots.
812 1.1 christos */
813 1.1 christos static void
814 1.1 christos marksweep_chunks(dns_qpmulti_t *multi) {
815 1.1 christos unsigned int free = 0;
816 1.1 christos
817 1.1 christos isc_nanosecs_t start = isc_time_monotonic();
818 1.1 christos
819 1.1 christos dns_qp_t *qpw = &multi->writer;
820 1.1 christos
821 1.1 christos for (dns_qpsnap_t *qps = ISC_LIST_HEAD(multi->snapshots); qps != NULL;
822 1.1 christos qps = ISC_LIST_NEXT(qps, link))
823 1.1 christos {
824 1.1 christos for (dns_qpchunk_t chunk = 0; chunk < qps->chunk_max; chunk++) {
825 1.1 christos if (qps->base->ptr[chunk] != NULL) {
826 1.1 christos INSIST(qps->base->ptr[chunk] ==
827 1.1 christos qpw->base->ptr[chunk]);
828 1.1 christos qpw->usage[chunk].snapmark = true;
829 1.1 christos }
830 1.1 christos }
831 1.1 christos }
832 1.1 christos
833 1.1 christos for (dns_qpchunk_t chunk = 0; chunk < qpw->chunk_max; chunk++) {
834 1.1 christos qpw->usage[chunk].snapshot = qpw->usage[chunk].snapmark;
835 1.1 christos qpw->usage[chunk].snapmark = false;
836 1.1 christos if (qpw->usage[chunk].snapfree && !qpw->usage[chunk].snapshot) {
837 1.1 christos chunk_free(qpw, chunk);
838 1.1 christos free++;
839 1.1 christos }
840 1.1 christos }
841 1.1 christos
842 1.1 christos isc_nanosecs_t time = isc_time_monotonic() - start;
843 1.3 christos ISC_QP_ADD(recycle_time, time);
844 1.1 christos
845 1.1 christos if (free > 0) {
846 1.1 christos LOG_STATS("qp marksweep" PRItime "free %u chunks", time, free);
847 1.1 christos LOG_STATS(
848 1.1 christos "qp marksweep leaf %u live %u used %u free %u hold %u",
849 1.1 christos qpw->leaf_count, qpw->used_count - qpw->free_count,
850 1.1 christos qpw->used_count, qpw->free_count, qpw->hold_count);
851 1.1 christos }
852 1.1 christos }
853 1.1 christos
854 1.1 christos /***********************************************************************
855 1.1 christos *
856 1.1 christos * garbage collector
857 1.1 christos */
858 1.1 christos
859 1.1 christos /*
860 1.1 christos * Move a branch node's twigs to the `bump` chunk, for copy-on-write
861 1.1 christos * or for garbage collection. We don't update the node in place
862 1.1 christos * because `compact_recursive()` does not ensure the node itself is
863 1.1 christos * mutable until after it discovers evacuation was necessary.
864 1.1 christos *
865 1.1 christos * If free_twigs() could not immediately destroy the old twigs, we have
866 1.1 christos * to re-attach to any leaves.
867 1.1 christos */
868 1.1 christos static dns_qpref_t
869 1.1 christos evacuate(dns_qp_t *qp, dns_qpnode_t *n) {
870 1.1 christos dns_qpweight_t size = branch_twigs_size(n);
871 1.1 christos dns_qpref_t old_ref = branch_twigs_ref(n);
872 1.1 christos dns_qpref_t new_ref = alloc_twigs(qp, size);
873 1.1 christos dns_qpnode_t *old_twigs = ref_ptr(qp, old_ref);
874 1.1 christos dns_qpnode_t *new_twigs = ref_ptr(qp, new_ref);
875 1.1 christos
876 1.1 christos move_twigs(new_twigs, old_twigs, size);
877 1.1 christos if (!free_twigs(qp, old_ref, size)) {
878 1.1 christos attach_twigs(qp, new_twigs, size);
879 1.1 christos }
880 1.1 christos
881 1.1 christos return new_ref;
882 1.1 christos }
883 1.1 christos
884 1.1 christos /*
885 1.1 christos * Immutable nodes need copy-on-write. As we walk down the trie finding the
886 1.1 christos * right place to modify, make_root_mutable() and make_twigs_mutable()
887 1.1 christos * are called to ensure that immutable nodes on the path from the root are
888 1.1 christos * copied to a mutable chunk.
889 1.1 christos */
890 1.1 christos
891 1.1 christos static inline dns_qpnode_t *
892 1.1 christos make_root_mutable(dns_qp_t *qp) {
893 1.1 christos if (cells_immutable(qp, qp->root_ref)) {
894 1.1 christos qp->root_ref = evacuate(qp, MOVABLE_ROOT(qp));
895 1.1 christos }
896 1.1 christos return ref_ptr(qp, qp->root_ref);
897 1.1 christos }
898 1.1 christos
899 1.1 christos static inline void
900 1.1 christos make_twigs_mutable(dns_qp_t *qp, dns_qpnode_t *n) {
901 1.1 christos if (cells_immutable(qp, branch_twigs_ref(n))) {
902 1.1 christos *n = make_node(branch_index(n), evacuate(qp, n));
903 1.1 christos }
904 1.1 christos }
905 1.1 christos
906 1.1 christos /*
907 1.1 christos * Compact the trie by traversing the whole thing recursively, copying
908 1.1 christos * bottom-up as required. The aim is to avoid evacuation as much as
909 1.1 christos * possible, but when parts of the trie are immutable, we need to evacuate
910 1.1 christos * the paths from the root to the parts of the trie that occupy
911 1.1 christos * fragmented chunks.
912 1.1 christos *
913 1.1 christos * Without the QP_MIN_USED check, the algorithm will leave the trie
914 1.1 christos * unchanged. If the children are all leaves, the loop changes nothing,
915 1.1 christos * so we will return this node's original ref. If all of the children
916 1.1 christos * that are branches did not need moving, again, the loop changes
917 1.1 christos * nothing. So the evacuation check is the only place that the
918 1.1 christos * algorithm introduces ref changes, that then bubble up towards the
919 1.1 christos * root through the logic inside the loop.
920 1.1 christos */
921 1.1 christos static dns_qpref_t
922 1.1 christos compact_recursive(dns_qp_t *qp, dns_qpnode_t *parent) {
923 1.1 christos dns_qpweight_t size = branch_twigs_size(parent);
924 1.1 christos dns_qpref_t twigs_ref = branch_twigs_ref(parent);
925 1.1 christos dns_qpchunk_t chunk = ref_chunk(twigs_ref);
926 1.1 christos
927 1.1 christos if (qp->compact_all ||
928 1.1 christos (chunk != qp->bump && chunk_usage(qp, chunk) < QP_MIN_USED))
929 1.1 christos {
930 1.1 christos twigs_ref = evacuate(qp, parent);
931 1.1 christos }
932 1.1 christos bool immutable = cells_immutable(qp, twigs_ref);
933 1.1 christos for (dns_qpweight_t pos = 0; pos < size; pos++) {
934 1.1 christos dns_qpnode_t *child = ref_ptr(qp, twigs_ref) + pos;
935 1.1 christos if (!is_branch(child)) {
936 1.1 christos continue;
937 1.1 christos }
938 1.1 christos dns_qpref_t old_grandtwigs = branch_twigs_ref(child);
939 1.1 christos dns_qpref_t new_grandtwigs = compact_recursive(qp, child);
940 1.1 christos if (old_grandtwigs == new_grandtwigs) {
941 1.1 christos continue;
942 1.1 christos }
943 1.1 christos if (immutable) {
944 1.1 christos twigs_ref = evacuate(qp, parent);
945 1.1 christos /* the twigs have moved */
946 1.1 christos child = ref_ptr(qp, twigs_ref) + pos;
947 1.1 christos immutable = false;
948 1.1 christos }
949 1.1 christos *child = make_node(branch_index(child), new_grandtwigs);
950 1.1 christos }
951 1.1 christos return twigs_ref;
952 1.1 christos }
953 1.1 christos
954 1.1 christos static void
955 1.1 christos compact(dns_qp_t *qp) {
956 1.1 christos LOG_STATS("qp compact before leaf %u live %u used %u free %u hold %u",
957 1.1 christos qp->leaf_count, qp->used_count - qp->free_count,
958 1.1 christos qp->used_count, qp->free_count, qp->hold_count);
959 1.1 christos
960 1.1 christos isc_nanosecs_t start = isc_time_monotonic();
961 1.1 christos
962 1.1 christos if (qp->usage[qp->bump].free > QP_MAX_FREE) {
963 1.1 christos alloc_reset(qp);
964 1.1 christos }
965 1.1 christos
966 1.1 christos if (qp->leaf_count > 0) {
967 1.1 christos qp->root_ref = compact_recursive(qp, MOVABLE_ROOT(qp));
968 1.1 christos }
969 1.1 christos qp->compact_all = false;
970 1.1 christos
971 1.1 christos isc_nanosecs_t time = isc_time_monotonic() - start;
972 1.3 christos ISC_QP_ADD(compact_time, time);
973 1.1 christos
974 1.1 christos LOG_STATS("qp compact" PRItime
975 1.1 christos "leaf %u live %u used %u free %u hold %u",
976 1.1 christos time, qp->leaf_count, qp->used_count - qp->free_count,
977 1.1 christos qp->used_count, qp->free_count, qp->hold_count);
978 1.1 christos }
979 1.1 christos
980 1.1 christos void
981 1.1 christos dns_qp_compact(dns_qp_t *qp, dns_qpgc_t mode) {
982 1.1 christos REQUIRE(QP_VALID(qp));
983 1.1 christos if (mode == DNS_QPGC_MAYBE && !QP_NEEDGC(qp)) {
984 1.1 christos return;
985 1.1 christos }
986 1.1 christos if (mode == DNS_QPGC_ALL) {
987 1.1 christos alloc_reset(qp);
988 1.1 christos qp->compact_all = true;
989 1.1 christos }
990 1.1 christos compact(qp);
991 1.1 christos recycle(qp);
992 1.1 christos }
993 1.1 christos
994 1.1 christos /*
995 1.1 christos * Free some twigs and (if they were destroyed immediately so that the
996 1.1 christos * result from QP_MAX_GARBAGE can change) compact the trie if necessary.
997 1.1 christos *
998 1.1 christos * This is called by the trie modification API entry points. The
999 1.1 christos * free_twigs() function requires the caller to attach or detach any
1000 1.1 christos * leaves as necessary. Callers of squash_twigs() satisfy this
1001 1.1 christos * requirement by calling make_twigs_mutable().
1002 1.1 christos *
1003 1.1 christos * Aside: In typical garbage collectors, compaction is triggered when
1004 1.1 christos * the allocator runs out of space. But that is because typical garbage
1005 1.1 christos * collectors do not know how much memory can be recovered, so they must
1006 1.1 christos * find out by scanning the heap. The qp-trie code was originally
1007 1.1 christos * designed to use malloc() and free(), so it has more information about
1008 1.1 christos * when garbage collection might be worthwhile. Hence we can trigger
1009 1.1 christos * collection when garbage passes a threshold.
1010 1.1 christos *
1011 1.1 christos * XXXFANF: If we need to avoid latency outliers caused by compaction in
1012 1.1 christos * write transactions, we can check qp->transaction_mode here.
1013 1.1 christos */
1014 1.1 christos static inline bool
1015 1.1 christos squash_twigs(dns_qp_t *qp, dns_qpref_t twigs, dns_qpweight_t size) {
1016 1.1 christos bool destroyed = free_twigs(qp, twigs, size);
1017 1.1 christos if (destroyed && QP_AUTOGC(qp)) {
1018 1.1 christos compact(qp);
1019 1.1 christos recycle(qp);
1020 1.1 christos /*
1021 1.1 christos * This shouldn't happen if the garbage collector is
1022 1.1 christos * working correctly. We can recover at the cost of some
1023 1.1 christos * time and space, but recovery should be cheaper than
1024 1.1 christos * letting compact+recycle fail repeatedly.
1025 1.1 christos */
1026 1.1 christos if (QP_AUTOGC(qp)) {
1027 1.1 christos isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1028 1.1 christos DNS_LOGMODULE_QP, ISC_LOG_NOTICE,
1029 1.1 christos "qp %p uctx \"%s\" compact/recycle "
1030 1.1 christos "failed to recover any space, "
1031 1.1 christos "scheduling a full compaction",
1032 1.1 christos qp, TRIENAME(qp));
1033 1.1 christos qp->compact_all = true;
1034 1.1 christos }
1035 1.1 christos }
1036 1.1 christos return destroyed;
1037 1.1 christos }
1038 1.1 christos
1039 1.1 christos /***********************************************************************
1040 1.1 christos *
1041 1.1 christos * public accessors for memory management internals
1042 1.1 christos */
1043 1.1 christos
1044 1.1 christos dns_qp_memusage_t
1045 1.1 christos dns_qp_memusage(dns_qp_t *qp) {
1046 1.1 christos REQUIRE(QP_VALID(qp));
1047 1.1 christos
1048 1.1 christos dns_qp_memusage_t memusage = {
1049 1.1 christos .uctx = qp->uctx,
1050 1.1 christos .leaves = qp->leaf_count,
1051 1.1 christos .live = qp->used_count - qp->free_count,
1052 1.1 christos .used = qp->used_count,
1053 1.1 christos .hold = qp->hold_count,
1054 1.1 christos .free = qp->free_count,
1055 1.1 christos .node_size = sizeof(dns_qpnode_t),
1056 1.1 christos .chunk_size = QP_CHUNK_SIZE,
1057 1.1 christos .fragmented = QP_NEEDGC(qp),
1058 1.1 christos };
1059 1.1 christos
1060 1.1 christos for (dns_qpchunk_t chunk = 0; chunk < qp->chunk_max; chunk++) {
1061 1.1 christos if (qp->base->ptr[chunk] != NULL) {
1062 1.1 christos memusage.chunk_count += 1;
1063 1.1 christos }
1064 1.1 christos }
1065 1.1 christos
1066 1.1 christos /*
1067 1.1 christos * XXXFANF does not subtract chunks that have been shrunk,
1068 1.1 christos * and does not count unreclaimed dns_qpbase_t objects
1069 1.1 christos */
1070 1.1 christos memusage.bytes = memusage.chunk_count * QP_CHUNK_BYTES +
1071 1.1 christos qp->chunk_max * sizeof(qp->base->ptr[0]) +
1072 1.1 christos qp->chunk_max * sizeof(qp->usage[0]);
1073 1.1 christos
1074 1.1 christos return memusage;
1075 1.1 christos }
1076 1.1 christos
1077 1.1 christos dns_qp_memusage_t
1078 1.1 christos dns_qpmulti_memusage(dns_qpmulti_t *multi) {
1079 1.1 christos REQUIRE(QPMULTI_VALID(multi));
1080 1.1 christos LOCK(&multi->mutex);
1081 1.1 christos
1082 1.1 christos dns_qp_t *qp = &multi->writer;
1083 1.1 christos INSIST(QP_VALID(qp));
1084 1.1 christos
1085 1.1 christos dns_qp_memusage_t memusage = dns_qp_memusage(qp);
1086 1.1 christos
1087 1.1 christos if (qp->transaction_mode == QP_UPDATE) {
1088 1.1 christos memusage.bytes -= QP_CHUNK_BYTES;
1089 1.1 christos memusage.bytes += qp->usage[qp->bump].used *
1090 1.1 christos sizeof(dns_qpnode_t);
1091 1.1 christos }
1092 1.1 christos
1093 1.1 christos UNLOCK(&multi->mutex);
1094 1.1 christos return memusage;
1095 1.1 christos }
1096 1.1 christos
1097 1.1 christos void
1098 1.1 christos dns_qp_gctime(isc_nanosecs_t *compact_p, isc_nanosecs_t *recycle_p,
1099 1.1 christos isc_nanosecs_t *rollback_p) {
1100 1.3 christos *compact_p = ISC_QP_GET(compact_time);
1101 1.3 christos *recycle_p = ISC_QP_GET(recycle_time);
1102 1.3 christos *rollback_p = ISC_QP_GET(rollback_time);
1103 1.1 christos }
1104 1.1 christos
1105 1.1 christos /***********************************************************************
1106 1.1 christos *
1107 1.1 christos * read-write transactions
1108 1.1 christos */
1109 1.1 christos
1110 1.1 christos static dns_qp_t *
1111 1.1 christos transaction_open(dns_qpmulti_t *multi, dns_qp_t **qptp) {
1112 1.1 christos REQUIRE(QPMULTI_VALID(multi));
1113 1.1 christos REQUIRE(qptp != NULL && *qptp == NULL);
1114 1.1 christos
1115 1.1 christos LOCK(&multi->mutex);
1116 1.1 christos
1117 1.1 christos dns_qp_t *qp = &multi->writer;
1118 1.1 christos INSIST(QP_VALID(qp));
1119 1.1 christos
1120 1.1 christos /*
1121 1.1 christos * Mark existing chunks as immutable.
1122 1.1 christos *
1123 1.1 christos * Aside: The bump chunk is special: in a series of write
1124 1.1 christos * transactions the bump chunk is reused; the first part (up
1125 1.1 christos * to fender) is immutable, the rest mutable. But we set its
1126 1.1 christos * immutable flag so that when the bump chunk fills up, the
1127 1.1 christos * first part continues to be treated as immutable. (And the
1128 1.1 christos * rest of the chunk too, but that's OK.)
1129 1.1 christos */
1130 1.1 christos for (dns_qpchunk_t chunk = 0; chunk < qp->chunk_max; chunk++) {
1131 1.1 christos if (qp->usage[chunk].exists) {
1132 1.1 christos qp->usage[chunk].immutable = true;
1133 1.1 christos write_protect(qp, chunk);
1134 1.1 christos }
1135 1.1 christos }
1136 1.1 christos
1137 1.1 christos /*
1138 1.1 christos * Ensure QP_AUTOGC() ignores free space in immutable chunks.
1139 1.1 christos */
1140 1.1 christos qp->hold_count = qp->free_count;
1141 1.1 christos
1142 1.1 christos *qptp = qp;
1143 1.1 christos return qp;
1144 1.1 christos }
1145 1.1 christos
1146 1.1 christos /*
1147 1.1 christos * a write is light
1148 1.1 christos *
1149 1.1 christos * We need to ensure we allocate from a fresh chunk if the last transaction
1150 1.1 christos * shrunk the bump chunk; but usually in a sequence of write transactions
1151 1.1 christos * we just put `fender` at the point where we started this generation.
1152 1.1 christos *
1153 1.1 christos * (Aside: Instead of keeping the previous transaction's mode, I
1154 1.1 christos * considered forcing allocation into the slow path by fiddling with
1155 1.1 christos * the bump chunk's usage counters. But that is troublesome because
1156 1.1 christos * `chunk_free()` needs to know how much of the chunk to scan.)
1157 1.1 christos */
1158 1.1 christos void
1159 1.1 christos dns_qpmulti_write(dns_qpmulti_t *multi, dns_qp_t **qptp) {
1160 1.1 christos dns_qp_t *qp = transaction_open(multi, qptp);
1161 1.1 christos TRACE("");
1162 1.1 christos
1163 1.1 christos if (qp->transaction_mode == QP_WRITE) {
1164 1.1 christos qp->fender = qp->usage[qp->bump].used;
1165 1.1 christos } else {
1166 1.1 christos alloc_reset(qp);
1167 1.1 christos }
1168 1.1 christos qp->transaction_mode = QP_WRITE;
1169 1.1 christos }
1170 1.1 christos
1171 1.1 christos /*
1172 1.1 christos * an update is heavier
1173 1.1 christos *
1174 1.1 christos * We always reset the allocator to the start of a fresh chunk,
1175 1.1 christos * because the previous transaction was probably an update that shrunk
1176 1.1 christos * the bump chunk. It simplifies rollback because `fender` is always zero.
1177 1.1 christos *
1178 1.1 christos * To rollback a transaction, we need to reset all the allocation
1179 1.1 christos * counters to their previous state, in particular we need to un-free
1180 1.1 christos * any nodes that were copied to make them mutable. This means we need
1181 1.1 christos * to make a copy of basically the whole `dns_qp_t writer`: everything
1182 1.1 christos * but the chunks holding the trie nodes.
1183 1.1 christos *
1184 1.1 christos * We do most of the transaction setup before creating the rollback
1185 1.1 christos * state so that after rollback we have a correct idea of which chunks
1186 1.1 christos * are immutable, and so we have the correct transaction mode to make
1187 1.1 christos * the next transaction allocate a new bump chunk. The exception is
1188 1.1 christos * resetting the allocator, which we do after creating the rollback
1189 1.1 christos * state; if this transaction is rolled back then the next transaction
1190 1.1 christos * will start from the rollback state and also reset the allocator as
1191 1.1 christos * one of its first actions.
1192 1.1 christos */
1193 1.1 christos void
1194 1.1 christos dns_qpmulti_update(dns_qpmulti_t *multi, dns_qp_t **qptp) {
1195 1.1 christos dns_qp_t *qp = transaction_open(multi, qptp);
1196 1.1 christos TRACE("");
1197 1.1 christos
1198 1.1 christos qp->transaction_mode = QP_UPDATE;
1199 1.1 christos
1200 1.1 christos dns_qp_t *rollback = isc_mem_allocate(qp->mctx, sizeof(*rollback));
1201 1.1 christos memmove(rollback, qp, sizeof(*rollback));
1202 1.1 christos /* can be uninitialized on the first transaction */
1203 1.1 christos if (rollback->base != NULL) {
1204 1.1 christos INSIST(QPBASE_VALID(rollback->base));
1205 1.1 christos INSIST(qp->usage != NULL && qp->chunk_max > 0);
1206 1.1 christos /* paired with either _commit() or _rollback() */
1207 1.1 christos isc_refcount_increment(&rollback->base->refcount);
1208 1.1 christos size_t usage_bytes = sizeof(qp->usage[0]) * qp->chunk_max;
1209 1.1 christos rollback->usage = isc_mem_allocate(qp->mctx, usage_bytes);
1210 1.1 christos memmove(rollback->usage, qp->usage, usage_bytes);
1211 1.1 christos }
1212 1.1 christos INSIST(multi->rollback == NULL);
1213 1.1 christos multi->rollback = rollback;
1214 1.1 christos
1215 1.1 christos alloc_reset(qp);
1216 1.1 christos }
1217 1.1 christos
1218 1.1 christos void
1219 1.1 christos dns_qpmulti_commit(dns_qpmulti_t *multi, dns_qp_t **qptp) {
1220 1.1 christos REQUIRE(QPMULTI_VALID(multi));
1221 1.1 christos REQUIRE(qptp != NULL && *qptp == &multi->writer);
1222 1.1 christos REQUIRE(multi->writer.transaction_mode == QP_WRITE ||
1223 1.1 christos multi->writer.transaction_mode == QP_UPDATE);
1224 1.1 christos
1225 1.1 christos dns_qp_t *qp = *qptp;
1226 1.1 christos TRACE("");
1227 1.1 christos
1228 1.1 christos if (qp->transaction_mode == QP_UPDATE) {
1229 1.1 christos INSIST(multi->rollback != NULL);
1230 1.1 christos /* paired with dns_qpmulti_update() */
1231 1.1 christos if (qpbase_unref(multi->rollback)) {
1232 1.1 christos isc_mem_free(qp->mctx, multi->rollback->base);
1233 1.1 christos }
1234 1.1 christos if (multi->rollback->usage != NULL) {
1235 1.1 christos isc_mem_free(qp->mctx, multi->rollback->usage);
1236 1.1 christos }
1237 1.1 christos isc_mem_free(qp->mctx, multi->rollback);
1238 1.1 christos }
1239 1.1 christos INSIST(multi->rollback == NULL);
1240 1.1 christos
1241 1.1 christos /* not the first commit? */
1242 1.1 christos if (multi->reader_ref != INVALID_REF) {
1243 1.1 christos INSIST(cells_immutable(qp, multi->reader_ref));
1244 1.1 christos free_twigs(qp, multi->reader_ref, READER_SIZE);
1245 1.1 christos }
1246 1.1 christos
1247 1.1 christos if (qp->transaction_mode == QP_UPDATE) {
1248 1.1 christos /* minimize memory overhead */
1249 1.1 christos compact(qp);
1250 1.1 christos multi->reader_ref = alloc_twigs(qp, READER_SIZE);
1251 1.1 christos qp->base->ptr[qp->bump] = chunk_shrink_raw(
1252 1.1 christos qp, qp->base->ptr[qp->bump],
1253 1.1 christos qp->usage[qp->bump].used * sizeof(dns_qpnode_t));
1254 1.1 christos } else {
1255 1.1 christos multi->reader_ref = alloc_twigs(qp, READER_SIZE);
1256 1.1 christos }
1257 1.1 christos
1258 1.1 christos /* anchor a new version of the trie */
1259 1.1 christos dns_qpnode_t *reader = ref_ptr(qp, multi->reader_ref);
1260 1.1 christos make_reader(reader, multi);
1261 1.1 christos /* paired with chunk_free() */
1262 1.1 christos isc_refcount_increment(&qp->base->refcount);
1263 1.1 christos
1264 1.1 christos rcu_assign_pointer(multi->reader, reader); /* COMMIT */
1265 1.1 christos
1266 1.1 christos /* clean up what we can right now */
1267 1.1 christos if (qp->transaction_mode == QP_UPDATE || QP_NEEDGC(qp)) {
1268 1.1 christos recycle(qp);
1269 1.1 christos }
1270 1.1 christos
1271 1.1 christos /* schedule the rest for later */
1272 1.1 christos reclaim_chunks(multi);
1273 1.1 christos
1274 1.1 christos *qptp = NULL;
1275 1.1 christos UNLOCK(&multi->mutex);
1276 1.1 christos }
1277 1.1 christos
1278 1.1 christos /*
1279 1.1 christos * Throw away everything that was allocated during this transaction.
1280 1.1 christos */
1281 1.1 christos void
1282 1.1 christos dns_qpmulti_rollback(dns_qpmulti_t *multi, dns_qp_t **qptp) {
1283 1.1 christos unsigned int free = 0;
1284 1.1 christos
1285 1.1 christos REQUIRE(QPMULTI_VALID(multi));
1286 1.1 christos REQUIRE(multi->writer.transaction_mode == QP_UPDATE);
1287 1.1 christos REQUIRE(qptp != NULL && *qptp == &multi->writer);
1288 1.1 christos
1289 1.1 christos dns_qp_t *qp = *qptp;
1290 1.1 christos TRACE("");
1291 1.1 christos
1292 1.1 christos isc_nanosecs_t start = isc_time_monotonic();
1293 1.1 christos
1294 1.1 christos for (dns_qpchunk_t chunk = 0; chunk < qp->chunk_max; chunk++) {
1295 1.1 christos if (qp->base->ptr[chunk] != NULL && !qp->usage[chunk].immutable)
1296 1.1 christos {
1297 1.1 christos chunk_free(qp, chunk);
1298 1.1 christos /*
1299 1.1 christos * we need to clear its base pointer in the rollback
1300 1.1 christos * trie, in case the arrays were resized
1301 1.1 christos */
1302 1.1 christos if (chunk < multi->rollback->chunk_max) {
1303 1.1 christos INSIST(!multi->rollback->usage[chunk].exists);
1304 1.1 christos multi->rollback->base->ptr[chunk] = NULL;
1305 1.1 christos }
1306 1.1 christos free++;
1307 1.1 christos }
1308 1.1 christos }
1309 1.1 christos
1310 1.1 christos /*
1311 1.1 christos * multi->rollback->base and multi->writer->base are the same,
1312 1.1 christos * unless there was a realloc_chunk_arrays() during the transaction
1313 1.1 christos */
1314 1.1 christos if (qpbase_unref(qp)) {
1315 1.1 christos /* paired with dns_qpmulti_update() */
1316 1.1 christos isc_mem_free(qp->mctx, qp->base);
1317 1.1 christos }
1318 1.1 christos isc_mem_free(qp->mctx, qp->usage);
1319 1.1 christos
1320 1.1 christos /* reset allocator state */
1321 1.1 christos INSIST(multi->rollback != NULL);
1322 1.1 christos memmove(qp, multi->rollback, sizeof(*qp));
1323 1.1 christos isc_mem_free(qp->mctx, multi->rollback);
1324 1.1 christos INSIST(multi->rollback == NULL);
1325 1.1 christos
1326 1.1 christos isc_nanosecs_t time = isc_time_monotonic() - start;
1327 1.3 christos ISC_QP_ADD(rollback_time, time);
1328 1.1 christos
1329 1.1 christos LOG_STATS("qp rollback" PRItime "free %u chunks", time, free);
1330 1.1 christos
1331 1.1 christos *qptp = NULL;
1332 1.1 christos UNLOCK(&multi->mutex);
1333 1.1 christos }
1334 1.1 christos
1335 1.1 christos /***********************************************************************
1336 1.1 christos *
1337 1.1 christos * read-only transactions
1338 1.1 christos */
1339 1.1 christos
1340 1.1 christos static dns_qpmulti_t *
1341 1.1 christos reader_open(dns_qpmulti_t *multi, dns_qpreadable_t qpr) {
1342 1.1 christos dns_qpreader_t *qp = dns_qpreader(qpr);
1343 1.1 christos dns_qpnode_t *reader = rcu_dereference(multi->reader);
1344 1.1 christos if (reader == NULL) {
1345 1.1 christos QP_INIT(qp, multi->writer.methods, multi->writer.uctx);
1346 1.1 christos } else {
1347 1.1 christos multi = unpack_reader(qp, reader);
1348 1.1 christos }
1349 1.1 christos return multi;
1350 1.1 christos }
1351 1.1 christos
1352 1.1 christos /*
1353 1.1 christos * a query is light
1354 1.1 christos */
1355 1.1 christos
1356 1.1 christos void
1357 1.1 christos dns_qpmulti_query(dns_qpmulti_t *multi, dns_qpread_t *qp) {
1358 1.1 christos REQUIRE(QPMULTI_VALID(multi));
1359 1.1 christos REQUIRE(qp != NULL);
1360 1.1 christos
1361 1.1 christos qp->tid = isc_tid();
1362 1.1 christos rcu_read_lock();
1363 1.1 christos
1364 1.1 christos dns_qpmulti_t *whence = reader_open(multi, qp);
1365 1.1 christos INSIST(whence == multi);
1366 1.1 christos }
1367 1.1 christos
1368 1.1 christos void
1369 1.1 christos dns_qpread_destroy(dns_qpmulti_t *multi, dns_qpread_t *qp) {
1370 1.1 christos REQUIRE(QPMULTI_VALID(multi));
1371 1.1 christos REQUIRE(QP_VALID(qp));
1372 1.1 christos REQUIRE(qp->tid == isc_tid());
1373 1.1 christos *qp = (dns_qpread_t){};
1374 1.1 christos rcu_read_unlock();
1375 1.1 christos }
1376 1.1 christos
1377 1.1 christos /*
1378 1.1 christos * a snapshot is heavy
1379 1.1 christos */
1380 1.1 christos
1381 1.1 christos void
1382 1.1 christos dns_qpmulti_snapshot(dns_qpmulti_t *multi, dns_qpsnap_t **qpsp) {
1383 1.1 christos REQUIRE(QPMULTI_VALID(multi));
1384 1.1 christos REQUIRE(qpsp != NULL && *qpsp == NULL);
1385 1.1 christos
1386 1.1 christos rcu_read_lock();
1387 1.1 christos
1388 1.1 christos LOCK(&multi->mutex);
1389 1.1 christos
1390 1.1 christos dns_qp_t *qpw = &multi->writer;
1391 1.1 christos size_t bytes = sizeof(dns_qpsnap_t) + sizeof(dns_qpbase_t) +
1392 1.1 christos sizeof(qpw->base->ptr[0]) * qpw->chunk_max;
1393 1.1 christos dns_qpsnap_t *qps = isc_mem_allocate(qpw->mctx, bytes);
1394 1.1 christos
1395 1.1 christos qps->whence = reader_open(multi, qps);
1396 1.1 christos INSIST(qps->whence == multi);
1397 1.1 christos
1398 1.1 christos /* not a separate allocation */
1399 1.1 christos qps->base = (dns_qpbase_t *)(qps + 1);
1400 1.1 christos isc_refcount_init(&qps->base->refcount, 0);
1401 1.1 christos
1402 1.1 christos /*
1403 1.1 christos * only copy base pointers of chunks we need, so we can
1404 1.1 christos * reclaim unused memory in dns_qpsnap_destroy()
1405 1.1 christos */
1406 1.1 christos qps->chunk_max = qpw->chunk_max;
1407 1.1 christos for (dns_qpchunk_t chunk = 0; chunk < qpw->chunk_max; chunk++) {
1408 1.1 christos if (qpw->usage[chunk].exists && chunk_usage(qpw, chunk) > 0) {
1409 1.1 christos qpw->usage[chunk].snapshot = true;
1410 1.1 christos qps->base->ptr[chunk] = qpw->base->ptr[chunk];
1411 1.1 christos } else {
1412 1.1 christos qps->base->ptr[chunk] = NULL;
1413 1.1 christos }
1414 1.1 christos }
1415 1.1 christos ISC_LIST_INITANDAPPEND(multi->snapshots, qps, link);
1416 1.1 christos
1417 1.1 christos *qpsp = qps;
1418 1.1 christos UNLOCK(&multi->mutex);
1419 1.1 christos
1420 1.1 christos rcu_read_unlock();
1421 1.1 christos }
1422 1.1 christos
1423 1.1 christos void
1424 1.1 christos dns_qpsnap_destroy(dns_qpmulti_t *multi, dns_qpsnap_t **qpsp) {
1425 1.1 christos REQUIRE(QPMULTI_VALID(multi));
1426 1.1 christos REQUIRE(qpsp != NULL && *qpsp != NULL);
1427 1.1 christos
1428 1.1 christos LOCK(&multi->mutex);
1429 1.1 christos
1430 1.1 christos dns_qpsnap_t *qp = *qpsp;
1431 1.1 christos
1432 1.1 christos /* make sure the API is being used correctly */
1433 1.1 christos REQUIRE(qp->whence == multi);
1434 1.1 christos
1435 1.1 christos ISC_LIST_UNLINK(multi->snapshots, qp, link);
1436 1.1 christos
1437 1.1 christos /*
1438 1.1 christos * eagerly reclaim chunks that are now unused, so that memory does
1439 1.1 christos * not accumulate when a trie has a lot of updates and snapshots
1440 1.1 christos */
1441 1.1 christos marksweep_chunks(multi);
1442 1.1 christos
1443 1.1 christos isc_mem_free(multi->writer.mctx, qp);
1444 1.1 christos
1445 1.1 christos *qpsp = NULL;
1446 1.1 christos UNLOCK(&multi->mutex);
1447 1.1 christos }
1448 1.1 christos
1449 1.1 christos /***********************************************************************
1450 1.1 christos *
1451 1.1 christos * constructors, destructors
1452 1.1 christos */
1453 1.1 christos
1454 1.1 christos void
1455 1.1 christos dns_qp_create(isc_mem_t *mctx, const dns_qpmethods_t *methods, void *uctx,
1456 1.1 christos dns_qp_t **qptp) {
1457 1.1 christos REQUIRE(qptp != NULL && *qptp == NULL);
1458 1.1 christos
1459 1.1 christos dns_qp_t *qp = isc_mem_get(mctx, sizeof(*qp));
1460 1.1 christos QP_INIT(qp, methods, uctx);
1461 1.1 christos isc_mem_attach(mctx, &qp->mctx);
1462 1.1 christos alloc_reset(qp);
1463 1.1 christos TRACE("");
1464 1.1 christos *qptp = qp;
1465 1.1 christos }
1466 1.1 christos
1467 1.1 christos void
1468 1.1 christos dns_qpmulti_create(isc_mem_t *mctx, const dns_qpmethods_t *methods, void *uctx,
1469 1.1 christos dns_qpmulti_t **qpmp) {
1470 1.1 christos REQUIRE(qpmp != NULL && *qpmp == NULL);
1471 1.1 christos
1472 1.1 christos dns_qpmulti_t *multi = isc_mem_get(mctx, sizeof(*multi));
1473 1.1 christos *multi = (dns_qpmulti_t){
1474 1.1 christos .magic = QPMULTI_MAGIC,
1475 1.1 christos .reader_ref = INVALID_REF,
1476 1.1 christos };
1477 1.1 christos isc_mutex_init(&multi->mutex);
1478 1.1 christos ISC_LIST_INIT(multi->snapshots);
1479 1.1 christos /*
1480 1.1 christos * Do not waste effort allocating a bump chunk that will be thrown
1481 1.1 christos * away when a transaction is opened. dns_qpmulti_update() always
1482 1.1 christos * allocates; to ensure dns_qpmulti_write() does too, pretend the
1483 1.1 christos * previous transaction was an update
1484 1.1 christos */
1485 1.1 christos dns_qp_t *qp = &multi->writer;
1486 1.1 christos QP_INIT(qp, methods, uctx);
1487 1.1 christos isc_mem_attach(mctx, &qp->mctx);
1488 1.1 christos qp->transaction_mode = QP_UPDATE;
1489 1.1 christos TRACE("");
1490 1.1 christos *qpmp = multi;
1491 1.1 christos }
1492 1.1 christos
1493 1.1 christos static void
1494 1.1 christos destroy_guts(dns_qp_t *qp) {
1495 1.1 christos if (qp->chunk_max == 0) {
1496 1.1 christos return;
1497 1.1 christos }
1498 1.1 christos for (dns_qpchunk_t chunk = 0; chunk < qp->chunk_max; chunk++) {
1499 1.1 christos if (qp->base->ptr[chunk] != NULL) {
1500 1.1 christos chunk_free(qp, chunk);
1501 1.1 christos }
1502 1.1 christos }
1503 1.1 christos ENSURE(qp->used_count == 0);
1504 1.1 christos ENSURE(qp->free_count == 0);
1505 1.1 christos ENSURE(isc_refcount_current(&qp->base->refcount) == 1);
1506 1.1 christos isc_mem_free(qp->mctx, qp->base);
1507 1.1 christos isc_mem_free(qp->mctx, qp->usage);
1508 1.1 christos qp->magic = 0;
1509 1.1 christos }
1510 1.1 christos
1511 1.1 christos void
1512 1.1 christos dns_qp_destroy(dns_qp_t **qptp) {
1513 1.1 christos REQUIRE(qptp != NULL);
1514 1.1 christos REQUIRE(QP_VALID(*qptp));
1515 1.1 christos
1516 1.1 christos dns_qp_t *qp = *qptp;
1517 1.1 christos *qptp = NULL;
1518 1.1 christos
1519 1.1 christos /* do not try to destroy part of a dns_qpmulti_t */
1520 1.1 christos REQUIRE(qp->transaction_mode == QP_NONE);
1521 1.1 christos
1522 1.1 christos TRACE("");
1523 1.1 christos destroy_guts(qp);
1524 1.1 christos isc_mem_putanddetach(&qp->mctx, qp, sizeof(*qp));
1525 1.1 christos }
1526 1.1 christos
1527 1.1 christos static void
1528 1.1 christos qpmulti_destroy_cb(struct rcu_head *arg) {
1529 1.1 christos qp_rcuctx_t *rcuctx = caa_container_of(arg, qp_rcuctx_t, rcu_head);
1530 1.1 christos REQUIRE(QPRCU_VALID(rcuctx));
1531 1.1 christos /* only nonzero for reclaim_chunks_cb() */
1532 1.1 christos REQUIRE(rcuctx->count == 0);
1533 1.1 christos
1534 1.1 christos dns_qpmulti_t *multi = rcuctx->multi;
1535 1.1 christos REQUIRE(QPMULTI_VALID(multi));
1536 1.1 christos
1537 1.1 christos /* reassure thread sanitizer */
1538 1.1 christos LOCK(&multi->mutex);
1539 1.1 christos
1540 1.1 christos dns_qp_t *qp = &multi->writer;
1541 1.1 christos REQUIRE(QP_VALID(qp));
1542 1.1 christos
1543 1.1 christos destroy_guts(qp);
1544 1.1 christos
1545 1.1 christos UNLOCK(&multi->mutex);
1546 1.1 christos
1547 1.1 christos isc_mutex_destroy(&multi->mutex);
1548 1.1 christos isc_mem_putanddetach(&rcuctx->mctx, rcuctx,
1549 1.1 christos STRUCT_FLEX_SIZE(rcuctx, chunk, rcuctx->count));
1550 1.1 christos isc_mem_putanddetach(&qp->mctx, multi, sizeof(*multi));
1551 1.1 christos }
1552 1.1 christos
1553 1.1 christos void
1554 1.1 christos dns_qpmulti_destroy(dns_qpmulti_t **qpmp) {
1555 1.1 christos dns_qp_t *qp = NULL;
1556 1.1 christos dns_qpmulti_t *multi = NULL;
1557 1.1 christos qp_rcuctx_t *rcuctx = NULL;
1558 1.1 christos
1559 1.1 christos REQUIRE(qpmp != NULL);
1560 1.1 christos REQUIRE(QPMULTI_VALID(*qpmp));
1561 1.1 christos
1562 1.1 christos multi = *qpmp;
1563 1.1 christos qp = &multi->writer;
1564 1.1 christos *qpmp = NULL;
1565 1.1 christos
1566 1.1 christos REQUIRE(QP_VALID(qp));
1567 1.1 christos REQUIRE(multi->rollback == NULL);
1568 1.1 christos REQUIRE(ISC_LIST_EMPTY(multi->snapshots));
1569 1.1 christos
1570 1.1 christos rcuctx = isc_mem_get(qp->mctx, STRUCT_FLEX_SIZE(rcuctx, chunk, 0));
1571 1.1 christos *rcuctx = (qp_rcuctx_t){
1572 1.1 christos .magic = QPRCU_MAGIC,
1573 1.1 christos .multi = multi,
1574 1.1 christos };
1575 1.1 christos isc_mem_attach(qp->mctx, &rcuctx->mctx);
1576 1.1 christos call_rcu(&rcuctx->rcu_head, qpmulti_destroy_cb);
1577 1.1 christos }
1578 1.1 christos
1579 1.1 christos /***********************************************************************
1580 1.1 christos *
1581 1.1 christos * modification
1582 1.1 christos */
1583 1.1 christos
1584 1.1 christos isc_result_t
1585 1.1 christos dns_qp_insert(dns_qp_t *qp, void *pval, uint32_t ival) {
1586 1.1 christos dns_qpref_t new_ref, old_ref;
1587 1.1 christos dns_qpnode_t new_leaf, old_node;
1588 1.1 christos dns_qpnode_t *new_twigs = NULL, *old_twigs = NULL;
1589 1.1 christos dns_qpshift_t new_bit, old_bit;
1590 1.1 christos dns_qpweight_t old_size, new_size;
1591 1.1 christos dns_qpkey_t new_key, old_key;
1592 1.1 christos size_t new_keylen, old_keylen;
1593 1.1 christos size_t offset;
1594 1.1 christos uint64_t index;
1595 1.1 christos dns_qpshift_t bit;
1596 1.1 christos dns_qpweight_t pos;
1597 1.1 christos dns_qpnode_t *n = NULL;
1598 1.1 christos
1599 1.1 christos REQUIRE(QP_VALID(qp));
1600 1.1 christos
1601 1.1 christos new_leaf = make_leaf(pval, ival);
1602 1.1 christos new_keylen = leaf_qpkey(qp, &new_leaf, new_key);
1603 1.1 christos
1604 1.1 christos /* first leaf in an empty trie? */
1605 1.1 christos if (qp->leaf_count == 0) {
1606 1.1 christos new_ref = alloc_twigs(qp, 1);
1607 1.1 christos new_twigs = ref_ptr(qp, new_ref);
1608 1.1 christos *new_twigs = new_leaf;
1609 1.1 christos attach_leaf(qp, new_twigs);
1610 1.1 christos qp->leaf_count++;
1611 1.1 christos qp->root_ref = new_ref;
1612 1.1 christos return ISC_R_SUCCESS;
1613 1.1 christos }
1614 1.1 christos
1615 1.1 christos /*
1616 1.1 christos * We need to keep searching down to a leaf even if our key is
1617 1.1 christos * missing from this branch. It doesn't matter which twig we
1618 1.1 christos * choose since the keys are all the same up to this node's
1619 1.1 christos * offset. Note that if we simply use branch_twig_pos(n, bit)
1620 1.1 christos * we may get an out-of-bounds access if our bit is greater
1621 1.1 christos * than all the set bits in the node.
1622 1.1 christos */
1623 1.1 christos n = ref_ptr(qp, qp->root_ref);
1624 1.1 christos while (is_branch(n)) {
1625 1.1 christos prefetch_twigs(qp, n);
1626 1.1 christos dns_qpref_t ref = branch_twigs_ref(n);
1627 1.1 christos bit = branch_keybit(n, new_key, new_keylen);
1628 1.1 christos pos = branch_has_twig(n, bit) ? branch_twig_pos(n, bit) : 0;
1629 1.1 christos n = ref_ptr(qp, ref + pos);
1630 1.1 christos }
1631 1.1 christos
1632 1.1 christos /* do the keys differ, and if so, where? */
1633 1.1 christos old_keylen = leaf_qpkey(qp, n, old_key);
1634 1.1 christos offset = qpkey_compare(new_key, new_keylen, old_key, old_keylen);
1635 1.1 christos if (offset == QPKEY_EQUAL) {
1636 1.1 christos return ISC_R_EXISTS;
1637 1.1 christos }
1638 1.1 christos new_bit = qpkey_bit(new_key, new_keylen, offset);
1639 1.1 christos old_bit = qpkey_bit(old_key, old_keylen, offset);
1640 1.1 christos
1641 1.1 christos /* find where to insert a branch or grow an existing branch. */
1642 1.1 christos n = make_root_mutable(qp);
1643 1.1 christos while (is_branch(n)) {
1644 1.1 christos prefetch_twigs(qp, n);
1645 1.1 christos if (offset < branch_key_offset(n)) {
1646 1.1 christos goto newbranch;
1647 1.1 christos }
1648 1.1 christos if (offset == branch_key_offset(n)) {
1649 1.1 christos goto growbranch;
1650 1.1 christos }
1651 1.1 christos make_twigs_mutable(qp, n);
1652 1.1 christos bit = branch_keybit(n, new_key, new_keylen);
1653 1.1 christos INSIST(branch_has_twig(n, bit));
1654 1.1 christos n = branch_twig_ptr(qp, n, bit);
1655 1.1 christos }
1656 1.1 christos /* fall through */
1657 1.1 christos
1658 1.1 christos newbranch:
1659 1.1 christos new_ref = alloc_twigs(qp, 2);
1660 1.1 christos new_twigs = ref_ptr(qp, new_ref);
1661 1.1 christos
1662 1.1 christos /* save before overwriting. */
1663 1.1 christos old_node = *n;
1664 1.1 christos
1665 1.1 christos /* new branch node takes old node's place */
1666 1.1 christos index = BRANCH_TAG | (1ULL << new_bit) | (1ULL << old_bit) |
1667 1.1 christos ((uint64_t)offset << SHIFT_OFFSET);
1668 1.1 christos *n = make_node(index, new_ref);
1669 1.1 christos
1670 1.1 christos /* populate twigs */
1671 1.1 christos new_twigs[old_bit > new_bit] = old_node;
1672 1.1 christos new_twigs[new_bit > old_bit] = new_leaf;
1673 1.1 christos
1674 1.1 christos attach_leaf(qp, &new_leaf);
1675 1.1 christos qp->leaf_count++;
1676 1.1 christos
1677 1.1 christos return ISC_R_SUCCESS;
1678 1.1 christos
1679 1.1 christos growbranch:
1680 1.1 christos INSIST(!branch_has_twig(n, new_bit));
1681 1.1 christos
1682 1.1 christos /* locate twigs vectors */
1683 1.1 christos old_size = branch_twigs_size(n);
1684 1.1 christos new_size = old_size + 1;
1685 1.1 christos old_ref = branch_twigs_ref(n);
1686 1.1 christos new_ref = alloc_twigs(qp, new_size);
1687 1.1 christos old_twigs = ref_ptr(qp, old_ref);
1688 1.1 christos new_twigs = ref_ptr(qp, new_ref);
1689 1.1 christos
1690 1.1 christos /* embiggen branch node */
1691 1.1 christos index = branch_index(n) | (1ULL << new_bit);
1692 1.1 christos *n = make_node(index, new_ref);
1693 1.1 christos
1694 1.1 christos /* embiggen twigs vector */
1695 1.1 christos pos = branch_twig_pos(n, new_bit);
1696 1.1 christos move_twigs(new_twigs, old_twigs, pos);
1697 1.1 christos new_twigs[pos] = new_leaf;
1698 1.1 christos move_twigs(new_twigs + pos + 1, old_twigs + pos, old_size - pos);
1699 1.1 christos
1700 1.1 christos if (squash_twigs(qp, old_ref, old_size)) {
1701 1.1 christos /* old twigs destroyed, only attach to new leaf */
1702 1.1 christos attach_leaf(qp, &new_leaf);
1703 1.1 christos } else {
1704 1.1 christos /* old twigs duplicated, attach to all leaves */
1705 1.1 christos attach_twigs(qp, new_twigs, new_size);
1706 1.1 christos }
1707 1.1 christos qp->leaf_count++;
1708 1.1 christos
1709 1.1 christos return ISC_R_SUCCESS;
1710 1.1 christos }
1711 1.1 christos
1712 1.1 christos isc_result_t
1713 1.1 christos dns_qp_deletekey(dns_qp_t *qp, const dns_qpkey_t search_key,
1714 1.1 christos size_t search_keylen, void **pval_r, uint32_t *ival_r) {
1715 1.1 christos REQUIRE(QP_VALID(qp));
1716 1.1 christos REQUIRE(search_keylen < sizeof(dns_qpkey_t));
1717 1.1 christos
1718 1.1 christos if (get_root(qp) == NULL) {
1719 1.1 christos return ISC_R_NOTFOUND;
1720 1.1 christos }
1721 1.1 christos
1722 1.1 christos dns_qpshift_t bit = 0; /* suppress warning */
1723 1.1 christos dns_qpnode_t *parent = NULL;
1724 1.1 christos dns_qpnode_t *n = make_root_mutable(qp);
1725 1.1 christos while (is_branch(n)) {
1726 1.1 christos prefetch_twigs(qp, n);
1727 1.1 christos bit = branch_keybit(n, search_key, search_keylen);
1728 1.1 christos if (!branch_has_twig(n, bit)) {
1729 1.1 christos return ISC_R_NOTFOUND;
1730 1.1 christos }
1731 1.1 christos make_twigs_mutable(qp, n);
1732 1.1 christos parent = n;
1733 1.1 christos n = branch_twig_ptr(qp, n, bit);
1734 1.1 christos }
1735 1.1 christos
1736 1.1 christos dns_qpkey_t found_key;
1737 1.1 christos size_t found_keylen = leaf_qpkey(qp, n, found_key);
1738 1.1 christos if (qpkey_compare(search_key, search_keylen, found_key, found_keylen) !=
1739 1.1 christos QPKEY_EQUAL)
1740 1.1 christos {
1741 1.1 christos return ISC_R_NOTFOUND;
1742 1.1 christos }
1743 1.1 christos
1744 1.1 christos SET_IF_NOT_NULL(pval_r, leaf_pval(n));
1745 1.1 christos SET_IF_NOT_NULL(ival_r, leaf_ival(n));
1746 1.1 christos detach_leaf(qp, n);
1747 1.1 christos qp->leaf_count--;
1748 1.1 christos
1749 1.1 christos /* trie becomes empty */
1750 1.1 christos if (qp->leaf_count == 0) {
1751 1.1 christos INSIST(parent == NULL);
1752 1.1 christos INSIST(n == get_root(qp));
1753 1.1 christos free_twigs(qp, qp->root_ref, 1);
1754 1.1 christos qp->root_ref = INVALID_REF;
1755 1.1 christos return ISC_R_SUCCESS;
1756 1.1 christos }
1757 1.1 christos
1758 1.1 christos /* step back to parent node */
1759 1.1 christos n = parent;
1760 1.1 christos parent = NULL;
1761 1.1 christos
1762 1.1 christos INSIST(bit != 0);
1763 1.1 christos dns_qpweight_t size = branch_twigs_size(n);
1764 1.1 christos dns_qpweight_t pos = branch_twig_pos(n, bit);
1765 1.1 christos dns_qpref_t ref = branch_twigs_ref(n);
1766 1.1 christos dns_qpnode_t *twigs = ref_ptr(qp, ref);
1767 1.1 christos
1768 1.1 christos if (size == 2) {
1769 1.1 christos /*
1770 1.1 christos * move the other twig to the parent branch.
1771 1.1 christos */
1772 1.1 christos *n = twigs[!pos];
1773 1.1 christos squash_twigs(qp, ref, 2);
1774 1.1 christos } else {
1775 1.1 christos /*
1776 1.1 christos * shrink the twigs in place, to avoid using the bump
1777 1.1 christos * chunk too fast - the gc will clean up after us
1778 1.1 christos */
1779 1.1 christos *n = make_node(branch_index(n) & ~(1ULL << bit), ref);
1780 1.1 christos move_twigs(twigs + pos, twigs + pos + 1, size - pos - 1);
1781 1.1 christos squash_twigs(qp, ref + size - 1, 1);
1782 1.1 christos }
1783 1.1 christos
1784 1.1 christos return ISC_R_SUCCESS;
1785 1.1 christos }
1786 1.1 christos
1787 1.1 christos isc_result_t
1788 1.1 christos dns_qp_deletename(dns_qp_t *qp, const dns_name_t *name, void **pval_r,
1789 1.1 christos uint32_t *ival_r) {
1790 1.1 christos dns_qpkey_t key;
1791 1.1 christos size_t keylen = dns_qpkey_fromname(key, name);
1792 1.1 christos return dns_qp_deletekey(qp, key, keylen, pval_r, ival_r);
1793 1.1 christos }
1794 1.1 christos
1795 1.1 christos /***********************************************************************
1796 1.1 christos * chains
1797 1.1 christos */
1798 1.1 christos static void
1799 1.1 christos maybe_set_name(dns_qpreader_t *qp, dns_qpnode_t *node, dns_name_t *name) {
1800 1.1 christos dns_qpkey_t key;
1801 1.1 christos size_t len;
1802 1.1 christos
1803 1.1 christos if (name == NULL) {
1804 1.1 christos return;
1805 1.1 christos }
1806 1.1 christos
1807 1.1 christos dns_name_reset(name);
1808 1.1 christos len = leaf_qpkey(qp, node, key);
1809 1.1 christos dns_qpkey_toname(key, len, name);
1810 1.1 christos }
1811 1.1 christos
1812 1.1 christos void
1813 1.1 christos dns_qpchain_init(dns_qpreadable_t qpr, dns_qpchain_t *chain) {
1814 1.1 christos dns_qpreader_t *qp = dns_qpreader(qpr);
1815 1.1 christos REQUIRE(QP_VALID(qp));
1816 1.1 christos REQUIRE(chain != NULL);
1817 1.1 christos
1818 1.1 christos *chain = (dns_qpchain_t){
1819 1.1 christos .magic = QPCHAIN_MAGIC,
1820 1.1 christos .qp = qp,
1821 1.1 christos };
1822 1.1 christos }
1823 1.1 christos
1824 1.1 christos unsigned int
1825 1.1 christos dns_qpchain_length(dns_qpchain_t *chain) {
1826 1.1 christos REQUIRE(QPCHAIN_VALID(chain));
1827 1.1 christos
1828 1.1 christos return chain->len;
1829 1.1 christos }
1830 1.1 christos
1831 1.1 christos void
1832 1.1 christos dns_qpchain_node(dns_qpchain_t *chain, unsigned int level, dns_name_t *name,
1833 1.1 christos void **pval_r, uint32_t *ival_r) {
1834 1.1 christos dns_qpnode_t *node = NULL;
1835 1.1 christos
1836 1.1 christos REQUIRE(QPCHAIN_VALID(chain));
1837 1.1 christos REQUIRE(level < chain->len);
1838 1.1 christos
1839 1.1 christos node = chain->chain[level].node;
1840 1.1 christos maybe_set_name(chain->qp, node, name);
1841 1.1 christos SET_IF_NOT_NULL(pval_r, leaf_pval(node));
1842 1.1 christos SET_IF_NOT_NULL(ival_r, leaf_ival(node));
1843 1.1 christos }
1844 1.1 christos
1845 1.1 christos /***********************************************************************
1846 1.1 christos * iterators
1847 1.1 christos */
1848 1.1 christos
1849 1.1 christos void
1850 1.1 christos dns_qpiter_init(dns_qpreadable_t qpr, dns_qpiter_t *qpi) {
1851 1.1 christos dns_qpreader_t *qp = dns_qpreader(qpr);
1852 1.1 christos REQUIRE(QP_VALID(qp));
1853 1.1 christos REQUIRE(qpi != NULL);
1854 1.1 christos *qpi = (dns_qpiter_t){
1855 1.1 christos .qp = qp,
1856 1.1 christos .magic = QPITER_MAGIC,
1857 1.1 christos };
1858 1.1 christos }
1859 1.1 christos
1860 1.1 christos /*
1861 1.1 christos * are we at the last twig in this branch (in whichever direction
1862 1.1 christos * we're iterating)?
1863 1.1 christos */
1864 1.1 christos static bool
1865 1.1 christos last_twig(dns_qpiter_t *qpi, bool forward) {
1866 1.1 christos dns_qpweight_t pos = 0, max = 0;
1867 1.1 christos if (qpi->sp > 0) {
1868 1.1 christos dns_qpnode_t *child = qpi->stack[qpi->sp];
1869 1.1 christos dns_qpnode_t *parent = qpi->stack[qpi->sp - 1];
1870 1.1 christos pos = child - ref_ptr(qpi->qp, branch_twigs_ref(parent));
1871 1.1 christos if (forward) {
1872 1.1 christos max = branch_twigs_size(parent) - 1;
1873 1.1 christos }
1874 1.1 christos }
1875 1.1 christos return pos == max;
1876 1.1 christos }
1877 1.1 christos
1878 1.1 christos /*
1879 1.1 christos * move a QP iterator forward or back to the next or previous leaf.
1880 1.1 christos * note: this function can go wrong when the iterator refers to
1881 1.1 christos * a mutable view of the trie which is altered while iterating
1882 1.1 christos */
1883 1.1 christos static isc_result_t
1884 1.1 christos iterate(bool forward, dns_qpiter_t *qpi, dns_name_t *name, void **pval_r,
1885 1.1 christos uint32_t *ival_r) {
1886 1.1 christos dns_qpnode_t *node = NULL;
1887 1.1 christos bool initial_branch = true;
1888 1.1 christos
1889 1.1 christos REQUIRE(QPITER_VALID(qpi));
1890 1.1 christos
1891 1.1 christos dns_qpreader_t *qp = qpi->qp;
1892 1.1 christos
1893 1.1 christos REQUIRE(QP_VALID(qp));
1894 1.1 christos
1895 1.1 christos node = get_root(qp);
1896 1.1 christos if (node == NULL) {
1897 1.1 christos return ISC_R_NOMORE;
1898 1.1 christos }
1899 1.1 christos
1900 1.1 christos do {
1901 1.1 christos if (qpi->stack[qpi->sp] == NULL) {
1902 1.1 christos /* newly initialized iterator: use the root node */
1903 1.1 christos INSIST(qpi->sp == 0);
1904 1.1 christos qpi->stack[0] = node;
1905 1.1 christos } else if (!initial_branch) {
1906 1.1 christos /*
1907 1.1 christos * in a prior loop, we reached a branch; from
1908 1.1 christos * here we just need to get the highest or lowest
1909 1.1 christos * leaf in the subtree; we don't need to bother
1910 1.1 christos * stepping forward or backward through twigs
1911 1.1 christos * anymore.
1912 1.1 christos */
1913 1.1 christos INSIST(qpi->sp > 0);
1914 1.1 christos } else if (last_twig(qpi, forward)) {
1915 1.1 christos /*
1916 1.1 christos * we've stepped to the end (or the beginning,
1917 1.1 christos * if we're iterating backwards) of a set of twigs.
1918 1.1 christos */
1919 1.1 christos if (qpi->sp == 0) {
1920 1.1 christos /*
1921 1.1 christos * we've finished iterating. reinitialize
1922 1.1 christos * the iterator, then return ISC_R_NOMORE.
1923 1.1 christos */
1924 1.1 christos dns_qpiter_init(qpi->qp, qpi);
1925 1.1 christos return ISC_R_NOMORE;
1926 1.1 christos }
1927 1.1 christos
1928 1.1 christos /*
1929 1.1 christos * pop the stack, and resume at the parent branch.
1930 1.1 christos */
1931 1.1 christos qpi->stack[qpi->sp] = NULL;
1932 1.1 christos qpi->sp--;
1933 1.1 christos continue;
1934 1.1 christos } else {
1935 1.1 christos /*
1936 1.1 christos * there are more twigs in the current branch,
1937 1.1 christos * so step the node pointer forward (or back).
1938 1.1 christos */
1939 1.1 christos qpi->stack[qpi->sp] += (forward ? 1 : -1);
1940 1.1 christos node = qpi->stack[qpi->sp];
1941 1.1 christos }
1942 1.1 christos
1943 1.1 christos /*
1944 1.1 christos * if we're at a branch now, we loop down to the
1945 1.1 christos * left- or rightmost leaf.
1946 1.1 christos */
1947 1.1 christos if (is_branch(node)) {
1948 1.1 christos qpi->sp++;
1949 1.1 christos INSIST(qpi->sp < DNS_QP_MAXKEY);
1950 1.1 christos node = ref_ptr(qp, branch_twigs_ref(node)) +
1951 1.1 christos (forward ? 0 : branch_twigs_size(node) - 1);
1952 1.1 christos qpi->stack[qpi->sp] = node;
1953 1.1 christos initial_branch = false;
1954 1.1 christos }
1955 1.1 christos } while (is_branch(node));
1956 1.1 christos
1957 1.1 christos /* we're at a leaf: return its data to the caller */
1958 1.1 christos SET_IF_NOT_NULL(pval_r, leaf_pval(node));
1959 1.1 christos SET_IF_NOT_NULL(ival_r, leaf_ival(node));
1960 1.1 christos maybe_set_name(qpi->qp, node, name);
1961 1.1 christos return ISC_R_SUCCESS;
1962 1.1 christos }
1963 1.1 christos
1964 1.1 christos isc_result_t
1965 1.1 christos dns_qpiter_next(dns_qpiter_t *qpi, dns_name_t *name, void **pval_r,
1966 1.1 christos uint32_t *ival_r) {
1967 1.1 christos return iterate(true, qpi, name, pval_r, ival_r);
1968 1.1 christos }
1969 1.1 christos
1970 1.1 christos isc_result_t
1971 1.1 christos dns_qpiter_prev(dns_qpiter_t *qpi, dns_name_t *name, void **pval_r,
1972 1.1 christos uint32_t *ival_r) {
1973 1.1 christos return iterate(false, qpi, name, pval_r, ival_r);
1974 1.1 christos }
1975 1.1 christos
1976 1.1 christos isc_result_t
1977 1.1 christos dns_qpiter_current(dns_qpiter_t *qpi, dns_name_t *name, void **pval_r,
1978 1.1 christos uint32_t *ival_r) {
1979 1.1 christos dns_qpnode_t *node = NULL;
1980 1.1 christos
1981 1.1 christos REQUIRE(QPITER_VALID(qpi));
1982 1.1 christos
1983 1.1 christos node = qpi->stack[qpi->sp];
1984 1.1 christos if (node == NULL || is_branch(node)) {
1985 1.1 christos return ISC_R_FAILURE;
1986 1.1 christos }
1987 1.1 christos
1988 1.1 christos SET_IF_NOT_NULL(pval_r, leaf_pval(node));
1989 1.1 christos SET_IF_NOT_NULL(ival_r, leaf_ival(node));
1990 1.1 christos maybe_set_name(qpi->qp, node, name);
1991 1.1 christos return ISC_R_SUCCESS;
1992 1.1 christos }
1993 1.1 christos
1994 1.1 christos /***********************************************************************
1995 1.1 christos *
1996 1.1 christos * search
1997 1.1 christos */
1998 1.1 christos
1999 1.1 christos isc_result_t
2000 1.1 christos dns_qp_getkey(dns_qpreadable_t qpr, const dns_qpkey_t search_key,
2001 1.1 christos size_t search_keylen, void **pval_r, uint32_t *ival_r) {
2002 1.1 christos dns_qpreader_t *qp = dns_qpreader(qpr);
2003 1.1 christos dns_qpkey_t found_key;
2004 1.1 christos size_t found_keylen;
2005 1.1 christos dns_qpshift_t bit;
2006 1.1 christos dns_qpnode_t *n = NULL;
2007 1.1 christos
2008 1.1 christos REQUIRE(QP_VALID(qp));
2009 1.1 christos REQUIRE(search_keylen < sizeof(dns_qpkey_t));
2010 1.1 christos
2011 1.1 christos n = get_root(qp);
2012 1.1 christos if (n == NULL) {
2013 1.1 christos return ISC_R_NOTFOUND;
2014 1.1 christos }
2015 1.1 christos
2016 1.1 christos while (is_branch(n)) {
2017 1.1 christos prefetch_twigs(qp, n);
2018 1.1 christos bit = branch_keybit(n, search_key, search_keylen);
2019 1.1 christos if (!branch_has_twig(n, bit)) {
2020 1.1 christos return ISC_R_NOTFOUND;
2021 1.1 christos }
2022 1.1 christos n = branch_twig_ptr(qp, n, bit);
2023 1.1 christos }
2024 1.1 christos
2025 1.1 christos found_keylen = leaf_qpkey(qp, n, found_key);
2026 1.1 christos if (qpkey_compare(search_key, search_keylen, found_key, found_keylen) !=
2027 1.1 christos QPKEY_EQUAL)
2028 1.1 christos {
2029 1.1 christos return ISC_R_NOTFOUND;
2030 1.1 christos }
2031 1.1 christos
2032 1.1 christos SET_IF_NOT_NULL(pval_r, leaf_pval(n));
2033 1.1 christos SET_IF_NOT_NULL(ival_r, leaf_ival(n));
2034 1.1 christos return ISC_R_SUCCESS;
2035 1.1 christos }
2036 1.1 christos
2037 1.1 christos isc_result_t
2038 1.1 christos dns_qp_getname(dns_qpreadable_t qpr, const dns_name_t *name, void **pval_r,
2039 1.1 christos uint32_t *ival_r) {
2040 1.1 christos dns_qpkey_t key;
2041 1.1 christos size_t keylen = dns_qpkey_fromname(key, name);
2042 1.1 christos return dns_qp_getkey(qpr, key, keylen, pval_r, ival_r);
2043 1.1 christos }
2044 1.1 christos
2045 1.1 christos static inline void
2046 1.1 christos add_link(dns_qpchain_t *chain, dns_qpnode_t *node, size_t offset) {
2047 1.1 christos /* prevent duplication */
2048 1.1 christos if (chain->len != 0 && chain->chain[chain->len - 1].node == node) {
2049 1.1 christos return;
2050 1.1 christos }
2051 1.1 christos chain->chain[chain->len].node = node;
2052 1.1 christos chain->chain[chain->len].offset = offset;
2053 1.1 christos chain->len++;
2054 1.1 christos INSIST(chain->len <= DNS_NAME_MAXLABELS);
2055 1.1 christos }
2056 1.1 christos
2057 1.1 christos static inline void
2058 1.1 christos prevleaf(dns_qpiter_t *it) {
2059 1.1 christos isc_result_t result = dns_qpiter_prev(it, NULL, NULL, NULL);
2060 1.1 christos if (result == ISC_R_NOMORE) {
2061 1.1 christos result = dns_qpiter_prev(it, NULL, NULL, NULL);
2062 1.1 christos }
2063 1.1 christos RUNTIME_CHECK(result == ISC_R_SUCCESS);
2064 1.1 christos }
2065 1.1 christos
2066 1.1 christos static inline void
2067 1.1 christos greatest_leaf(dns_qpreadable_t qpr, dns_qpnode_t *n, dns_qpiter_t *iter) {
2068 1.1 christos while (is_branch(n)) {
2069 1.1 christos dns_qpref_t ref = branch_twigs_ref(n) + branch_twigs_size(n) -
2070 1.1 christos 1;
2071 1.1 christos iter->stack[++iter->sp] = n;
2072 1.1 christos n = ref_ptr(qpr, ref);
2073 1.1 christos }
2074 1.1 christos iter->stack[++iter->sp] = n;
2075 1.1 christos }
2076 1.1 christos
2077 1.1 christos static inline dns_qpnode_t *
2078 1.1 christos anyleaf(dns_qpreader_t *qp, dns_qpnode_t *n) {
2079 1.1 christos while (is_branch(n)) {
2080 1.1 christos n = branch_twigs(qp, n);
2081 1.1 christos }
2082 1.1 christos return n;
2083 1.1 christos }
2084 1.1 christos
2085 1.1 christos static inline int
2086 1.1 christos twig_offset(dns_qpnode_t *n, dns_qpshift_t sbit, dns_qpshift_t kbit,
2087 1.1 christos dns_qpshift_t fbit) {
2088 1.1 christos dns_qpweight_t pos = branch_twig_pos(n, sbit);
2089 1.1 christos if (branch_has_twig(n, sbit)) {
2090 1.1 christos return pos - (kbit < fbit);
2091 1.1 christos }
2092 1.1 christos return pos - 1;
2093 1.1 christos }
2094 1.1 christos
2095 1.1 christos /*
2096 1.1 christos * If dns_qp_lookup() was passed an iterator, we want it to point at the
2097 1.1 christos * matching name in the case of an exact match, or at the predecessor name
2098 1.1 christos * for a non-exact match.
2099 1.1 christos *
2100 1.1 christos * If there is an exact match, then there is nothing to be done. Otherwise,
2101 1.1 christos * we pop up the iterator stack until we find a parent branch with an offset
2102 1.1 christos * that is before the position where the search key differs from the found key.
2103 1.1 christos * From there we can step to the leaf that is the predecessor of the searched
2104 1.1 christos * name.
2105 1.1 christos *
2106 1.1 christos * Requires the iterator to be pointing at a leaf node.
2107 1.1 christos */
2108 1.1 christos static void
2109 1.1 christos fix_iterator(dns_qpreader_t *qp, dns_qpiter_t *it, dns_qpkey_t key,
2110 1.1 christos size_t len) {
2111 1.1 christos dns_qpnode_t *n = it->stack[it->sp];
2112 1.1 christos
2113 1.1 christos REQUIRE(!is_branch(n));
2114 1.1 christos
2115 1.1 christos dns_qpkey_t found;
2116 1.1 christos size_t foundlen = leaf_qpkey(qp, n, found);
2117 1.1 christos size_t to = qpkey_compare(key, len, found, foundlen);
2118 1.1 christos
2119 1.1 christos /* If the keys are equal, the iterator is already at the right node. */
2120 1.1 christos if (to == QPKEY_EQUAL) {
2121 1.1 christos return;
2122 1.1 christos }
2123 1.1 christos
2124 1.1 christos /*
2125 1.1 christos * Special case: if the key differs even before the root
2126 1.1 christos * key offset, it means the name desired either precedes or
2127 1.1 christos * follows the entire range of names in the database, and
2128 1.1 christos * popping up the stack won't help us, so just move the
2129 1.1 christos * iterator one step back from the origin and return.
2130 1.1 christos */
2131 1.1 christos if (to < branch_key_offset(it->stack[0])) {
2132 1.1 christos dns_qpiter_init(qp, it);
2133 1.1 christos prevleaf(it);
2134 1.1 christos return;
2135 1.1 christos }
2136 1.1 christos
2137 1.1 christos /*
2138 1.1 christos * As long as the branch offset point is after the point where the
2139 1.1 christos * key differs, we need to branch up and find a better node.
2140 1.1 christos */
2141 1.1 christos while (it->sp > 0) {
2142 1.1 christos dns_qpnode_t *b = it->stack[it->sp - 1];
2143 1.1 christos if (branch_key_offset(b) < to) {
2144 1.1 christos break;
2145 1.1 christos }
2146 1.1 christos it->sp--;
2147 1.1 christos }
2148 1.1 christos n = it->stack[it->sp];
2149 1.1 christos
2150 1.1 christos /*
2151 1.1 christos * Either we are now at the correct branch, or we are at the
2152 1.1 christos * first unmatched node. Determine the bit position for the
2153 1.1 christos * twig we need (sbit).
2154 1.1 christos */
2155 1.1 christos dns_qpshift_t kbit = qpkey_bit(key, len, to);
2156 1.1 christos dns_qpshift_t fbit = qpkey_bit(found, foundlen, to);
2157 1.1 christos dns_qpshift_t sbit = 0;
2158 1.1 christos
2159 1.1 christos if (is_branch(n) && branch_key_offset(n) == to) {
2160 1.1 christos /* We are on the correct branch now. */
2161 1.1 christos sbit = kbit;
2162 1.1 christos } else if (it->sp == 0) {
2163 1.1 christos /*
2164 1.1 christos * We are on the root branch, popping up the stack won't
2165 1.1 christos * help us, so just move the iterator one step back from the
2166 1.1 christos * origin and return.
2167 1.1 christos */
2168 1.1 christos dns_qpiter_init(qp, it);
2169 1.1 christos prevleaf(it);
2170 1.1 christos return;
2171 1.1 christos } else {
2172 1.1 christos /* We are at the first unmatched node, pop up the stack. */
2173 1.1 christos n = it->stack[--it->sp];
2174 1.1 christos sbit = qpkey_bit(key, len, branch_key_offset(n));
2175 1.1 christos }
2176 1.1 christos
2177 1.1 christos INSIST(is_branch(n));
2178 1.1 christos
2179 1.1 christos prefetch_twigs(qp, n);
2180 1.1 christos dns_qpnode_t *twigs = branch_twigs(qp, n);
2181 1.1 christos int toff = twig_offset(n, sbit, kbit, fbit);
2182 1.1 christos if (toff >= 0) {
2183 1.1 christos /*
2184 1.1 christos * The name we want would've been after some twig in
2185 1.1 christos * this branch. Walk down from that twig to the
2186 1.1 christos * highest leaf in its subtree to get the predecessor.
2187 1.1 christos */
2188 1.1 christos greatest_leaf(qp, twigs + toff, it);
2189 1.1 christos } else {
2190 1.1 christos /*
2191 1.1 christos * Every leaf below this node is greater than the one we
2192 1.1 christos * wanted, so the previous leaf is the predecessor.
2193 1.1 christos */
2194 1.1 christos prevleaf(it);
2195 1.1 christos }
2196 1.1 christos }
2197 1.1 christos
2198 1.1 christos /*
2199 1.1 christos * When searching for a requested name in dns_qp_lookup(), we might add
2200 1.1 christos * a leaf node to the chain, then subsequently determine that it was a
2201 1.1 christos * dead end. When this happens, the chain can be left holding a node
2202 1.1 christos * that is *not* an ancestor of the requested name. We correct for that
2203 1.1 christos * here.
2204 1.1 christos */
2205 1.1 christos static void
2206 1.1 christos fix_chain(dns_qpchain_t *chain, size_t offset) {
2207 1.1 christos while (chain->len > 0 && chain->chain[chain->len - 1].offset >= offset)
2208 1.1 christos {
2209 1.1 christos chain->len--;
2210 1.1 christos chain->chain[chain->len].node = NULL;
2211 1.1 christos chain->chain[chain->len].offset = 0;
2212 1.1 christos }
2213 1.1 christos }
2214 1.1 christos
2215 1.1 christos isc_result_t
2216 1.1 christos dns_qp_lookup(dns_qpreadable_t qpr, const dns_name_t *name,
2217 1.1 christos dns_name_t *foundname, dns_qpiter_t *iter, dns_qpchain_t *chain,
2218 1.1 christos void **pval_r, uint32_t *ival_r) {
2219 1.1 christos dns_qpreader_t *qp = dns_qpreader(qpr);
2220 1.1 christos dns_qpkey_t search, found;
2221 1.1 christos size_t searchlen, foundlen;
2222 1.1 christos size_t offset = 0;
2223 1.1 christos dns_qpnode_t *n = NULL;
2224 1.1 christos dns_qpshift_t bit = SHIFT_NOBYTE;
2225 1.1 christos dns_qpchain_t oc;
2226 1.1 christos dns_qpiter_t it;
2227 1.1 christos bool matched = false;
2228 1.1 christos bool setiter = true;
2229 1.1 christos
2230 1.1 christos REQUIRE(QP_VALID(qp));
2231 1.1 christos REQUIRE(foundname == NULL || ISC_MAGIC_VALID(name, DNS_NAME_MAGIC));
2232 1.1 christos
2233 1.1 christos searchlen = dns_qpkey_fromname(search, name);
2234 1.1 christos
2235 1.1 christos if (chain == NULL) {
2236 1.1 christos chain = &oc;
2237 1.1 christos }
2238 1.1 christos if (iter == NULL) {
2239 1.1 christos iter = ⁢
2240 1.1 christos setiter = false;
2241 1.1 christos }
2242 1.1 christos dns_qpchain_init(qp, chain);
2243 1.1 christos dns_qpiter_init(qp, iter);
2244 1.1 christos
2245 1.1 christos n = get_root(qp);
2246 1.1 christos if (n == NULL) {
2247 1.1 christos return ISC_R_NOTFOUND;
2248 1.1 christos }
2249 1.1 christos iter->stack[0] = n;
2250 1.1 christos
2251 1.1 christos /*
2252 1.1 christos * Like `dns_qp_insert()`, we must find a leaf. However, we don't make a
2253 1.1 christos * second pass: instead, we keep track of any leaves with shorter keys
2254 1.1 christos * that we discover along the way. (In general, qp-trie searches can be
2255 1.1 christos * one-pass, by recording their traversal, or two-pass, for less stack
2256 1.1 christos * memory usage.)
2257 1.1 christos */
2258 1.1 christos while (is_branch(n)) {
2259 1.1 christos prefetch_twigs(qp, n);
2260 1.1 christos
2261 1.1 christos offset = branch_key_offset(n);
2262 1.1 christos bit = qpkey_bit(search, searchlen, offset);
2263 1.1 christos dns_qpnode_t *twigs = branch_twigs(qp, n);
2264 1.1 christos
2265 1.1 christos /*
2266 1.1 christos * A shorter key that can be a parent domain always has a
2267 1.1 christos * leaf node at SHIFT_NOBYTE (indicating end of its key)
2268 1.1 christos * where our search key has a normal character immediately
2269 1.1 christos * after a label separator.
2270 1.1 christos *
2271 1.1 christos * Note 1: It is OK if `off - 1` underflows: it will
2272 1.1 christos * become SIZE_MAX, which is greater than `searchlen`, so
2273 1.1 christos * `qpkey_bit()` will return SHIFT_NOBYTE, which is what we
2274 1.1 christos * want when `off == 0`.
2275 1.1 christos *
2276 1.1 christos * Note 2: If SHIFT_NOBYTE twig is present, it will always
2277 1.1 christos * be in position 0, the first location in 'twigs'.
2278 1.1 christos */
2279 1.1 christos if (bit != SHIFT_NOBYTE && branch_has_twig(n, SHIFT_NOBYTE) &&
2280 1.1 christos qpkey_bit(search, searchlen, offset - 1) == SHIFT_NOBYTE &&
2281 1.1 christos !is_branch(twigs))
2282 1.1 christos {
2283 1.1 christos add_link(chain, twigs, offset);
2284 1.1 christos }
2285 1.1 christos
2286 1.1 christos matched = branch_has_twig(n, bit);
2287 1.1 christos if (matched) {
2288 1.1 christos /*
2289 1.1 christos * found a match: if it's a branch, we keep
2290 1.1 christos * searching, and if it's a leaf, we drop out of
2291 1.1 christos * the loop.
2292 1.1 christos */
2293 1.1 christos n = branch_twig_ptr(qp, n, bit);
2294 1.1 christos } else {
2295 1.1 christos /*
2296 1.1 christos * this branch is a dead end, and the predecessor
2297 1.1 christos * doesn't matter. now we just need to find a leaf
2298 1.1 christos * to end on so that qpkey_leaf() will work below.
2299 1.1 christos */
2300 1.1 christos n = anyleaf(qp, twigs);
2301 1.1 christos }
2302 1.1 christos
2303 1.1 christos iter->stack[++iter->sp] = n;
2304 1.1 christos }
2305 1.1 christos
2306 1.1 christos if (setiter) {
2307 1.1 christos /*
2308 1.1 christos * we found a leaf, but it might not be the leaf we wanted.
2309 1.1 christos * if it isn't, and if the caller passed us an iterator,
2310 1.1 christos * then we might need to reposition it.
2311 1.1 christos */
2312 1.1 christos fix_iterator(qp, iter, search, searchlen);
2313 1.1 christos n = iter->stack[iter->sp];
2314 1.1 christos }
2315 1.1 christos
2316 1.1 christos /* at this point, n can only be a leaf node */
2317 1.1 christos INSIST(!is_branch(n));
2318 1.1 christos
2319 1.1 christos foundlen = leaf_qpkey(qp, n, found);
2320 1.1 christos offset = qpkey_compare(search, searchlen, found, foundlen);
2321 1.1 christos
2322 1.1 christos /* the search ended with an exact or partial match */
2323 1.1 christos if (offset == QPKEY_EQUAL || offset == foundlen) {
2324 1.1 christos isc_result_t result = ISC_R_SUCCESS;
2325 1.1 christos
2326 1.1 christos if (offset == foundlen) {
2327 1.1 christos fix_chain(chain, offset);
2328 1.1 christos result = DNS_R_PARTIALMATCH;
2329 1.1 christos }
2330 1.1 christos add_link(chain, n, offset);
2331 1.1 christos
2332 1.1 christos SET_IF_NOT_NULL(pval_r, leaf_pval(n));
2333 1.1 christos SET_IF_NOT_NULL(ival_r, leaf_ival(n));
2334 1.1 christos maybe_set_name(qp, n, foundname);
2335 1.1 christos return result;
2336 1.1 christos }
2337 1.1 christos
2338 1.1 christos /*
2339 1.1 christos * the requested name was not found, but if an ancestor
2340 1.1 christos * was, we can retrieve that from the chain.
2341 1.1 christos */
2342 1.1 christos int len = chain->len;
2343 1.1 christos while (len-- > 0) {
2344 1.1 christos if (offset >= chain->chain[len].offset) {
2345 1.1 christos n = chain->chain[len].node;
2346 1.1 christos SET_IF_NOT_NULL(pval_r, leaf_pval(n));
2347 1.1 christos SET_IF_NOT_NULL(ival_r, leaf_ival(n));
2348 1.1 christos maybe_set_name(qp, n, foundname);
2349 1.1 christos return DNS_R_PARTIALMATCH;
2350 1.1 christos } else {
2351 1.1 christos /*
2352 1.1 christos * oops, during the search we found and added
2353 1.1 christos * a leaf that's longer than the requested
2354 1.1 christos * name; remove it from the chain.
2355 1.1 christos */
2356 1.1 christos chain->len--;
2357 1.1 christos }
2358 1.1 christos }
2359 1.1 christos
2360 1.1 christos /* nothing was found at all */
2361 1.1 christos return ISC_R_NOTFOUND;
2362 1.1 christos }
2363 1.1 christos
2364 1.1 christos /**********************************************************************/
2365