1 1.4 christos /* $NetBSD: qp.c,v 1.5 2026/01/29 18:37:49 christos Exp $ */ 2 1.1 christos 3 1.1 christos /* 4 1.1 christos * Copyright (C) Internet Systems Consortium, Inc. ("ISC") 5 1.1 christos * 6 1.1 christos * SPDX-License-Identifier: MPL-2.0 7 1.1 christos * 8 1.1 christos * This Source Code Form is subject to the terms of the Mozilla Public 9 1.1 christos * License, v. 2.0. If a copy of the MPL was not distributed with this 10 1.1 christos * file, you can obtain one at https://mozilla.org/MPL/2.0/. 11 1.1 christos * 12 1.1 christos * See the COPYRIGHT file distributed with this work for additional 13 1.1 christos * information regarding copyright ownership. 14 1.1 christos */ 15 1.1 christos 16 1.1 christos /* 17 1.1 christos * For an overview, see doc/design/qp-trie.md 18 1.1 christos */ 19 1.1 christos 20 1.1 christos #include <inttypes.h> 21 1.1 christos #include <stdbool.h> 22 1.1 christos #include <stddef.h> 23 1.1 christos #include <stdint.h> 24 1.1 christos #include <string.h> 25 1.1 christos 26 1.1 christos #if FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION 27 1.1 christos #include <sys/mman.h> 28 1.1 christos #include <unistd.h> 29 1.1 christos #endif 30 1.1 christos 31 1.1 christos #include <isc/atomic.h> 32 1.1 christos #include <isc/buffer.h> 33 1.1 christos #include <isc/magic.h> 34 1.1 christos #include <isc/mem.h> 35 1.1 christos #include <isc/mutex.h> 36 1.1 christos #include <isc/refcount.h> 37 1.1 christos #include <isc/result.h> 38 1.1 christos #include <isc/rwlock.h> 39 1.1 christos #include <isc/tid.h> 40 1.1 christos #include <isc/time.h> 41 1.1 christos #include <isc/types.h> 42 1.1 christos #include <isc/urcu.h> 43 1.1 christos #include <isc/util.h> 44 1.1 christos 45 1.1 christos #include <dns/fixedname.h> 46 1.1 christos #include <dns/log.h> 47 1.1 christos #include <dns/name.h> 48 1.1 christos #include <dns/qp.h> 49 1.1 christos #include <dns/types.h> 50 1.1 christos 51 1.1 christos #include "qp_p.h" 52 1.1 christos 53 1.1 christos #ifndef DNS_QP_LOG_STATS 54 1.5 christos #define DNS_QP_LOG_STATS 0 55 1.1 christos #endif 56 1.1 christos #ifndef DNS_QP_TRACE 57 1.1 christos #define DNS_QP_TRACE 0 58 1.1 christos #endif 59 1.1 christos 60 1.1 christos /* 61 1.1 christos * very basic garbage collector statistics 62 1.1 christos * 63 1.1 christos * XXXFANF for now we're logging GC times, but ideally we should 64 1.1 christos * accumulate stats more quietly and report via the statschannel 65 1.1 christos */ 66 1.3 christos #ifdef _LP64 67 1.1 christos static atomic_uint_fast64_t compact_time; 68 1.1 christos static atomic_uint_fast64_t recycle_time; 69 1.1 christos static atomic_uint_fast64_t rollback_time; 70 1.3 christos #define ISC_QP_ADD(v, a) atomic_fetch_add_relaxed(&(v), (a)) 71 1.4 christos #define ISC_QP_GET(v) atomic_load_relaxed(&(v)) 72 1.3 christos #else 73 1.3 christos static uint64_t compact_time; 74 1.3 christos static uint64_t recycle_time; 75 1.3 christos static uint64_t rollback_time; 76 1.3 christos static isc_mutex_t qp_mutex = PTHREAD_MUTEX_INITIALIZER; 77 1.3 christos #define ISC_QP_ADD(v, a) \ 78 1.3 christos ({ \ 79 1.3 christos isc_mutex_lock(&qp_mutex); \ 80 1.3 christos uint64_t x = (v) + (a); \ 81 1.3 christos isc_mutex_unlock(&qp_mutex); \ 82 1.3 christos x; \ 83 1.3 christos }) 84 1.3 christos #define ISC_QP_GET(v) \ 85 1.3 christos ({ \ 86 1.3 christos isc_mutex_lock(&qp_mutex); \ 87 1.3 christos uint64_t x = (v); \ 88 1.3 christos isc_mutex_unlock(&qp_mutex); \ 89 1.3 christos x; \ 90 1.3 christos }) 91 1.3 christos #endif 92 1.3 christos 93 1.1 christos 94 1.1 christos /* for LOG_STATS() format strings */ 95 1.1 christos #define PRItime " %" PRIu64 " ns " 96 1.1 christos 97 1.1 christos #if DNS_QP_LOG_STATS 98 1.1 christos #define LOG_STATS(...) \ 99 1.1 christos isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_QP, \ 100 1.1 christos ISC_LOG_DEBUG(1), __VA_ARGS__) 101 1.1 christos #else 102 1.1 christos #define LOG_STATS(...) 103 1.1 christos #endif 104 1.1 christos 105 1.1 christos #if DNS_QP_TRACE 106 1.1 christos /* 107 1.1 christos * TRACE is generally used in allocation-related functions so it doesn't 108 1.1 christos * trace very high-frequency ops 109 1.1 christos */ 110 1.1 christos #define TRACE(fmt, ...) \ 111 1.1 christos do { \ 112 1.1 christos if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(7))) { \ 113 1.1 christos isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, \ 114 1.1 christos DNS_LOGMODULE_QP, ISC_LOG_DEBUG(7), \ 115 1.1 christos "%s:%d:%s(qp %p uctx \"%s\"):t%u: " fmt, \ 116 1.1 christos __FILE__, __LINE__, __func__, qp, \ 117 1.1 christos qp ? TRIENAME(qp) : "(null)", isc_tid(), \ 118 1.1 christos ##__VA_ARGS__); \ 119 1.1 christos } \ 120 1.1 christos } while (0) 121 1.1 christos #else 122 1.1 christos #define TRACE(...) 123 1.1 christos #endif 124 1.1 christos 125 1.5 christos #if DNS_QPMULTI_TRACE 126 1.5 christos ISC_REFCOUNT_STATIC_TRACE_DECL(dns_qpmulti); 127 1.5 christos #define dns_qpmulti_ref(ptr) dns_qpmulti__ref(ptr, __func__, __FILE__, __LINE__) 128 1.5 christos #define dns_qpmulti_unref(ptr) \ 129 1.5 christos dns_qpmulti__unref(ptr, __func__, __FILE__, __LINE__) 130 1.5 christos #define dns_qpmulti_attach(ptr, ptrp) \ 131 1.5 christos dns_qpmulti__attach(ptr, ptrp, __func__, __FILE__, __LINE__) 132 1.5 christos #define dns_qpmulti_detach(ptrp) \ 133 1.5 christos dns_qpmulti__detach(ptrp, __func__, __FILE__, __LINE__) 134 1.5 christos #else 135 1.5 christos ISC_REFCOUNT_STATIC_DECL(dns_qpmulti); 136 1.5 christos #endif 137 1.5 christos 138 1.1 christos /*********************************************************************** 139 1.1 christos * 140 1.1 christos * converting DNS names to trie keys 141 1.1 christos */ 142 1.1 christos 143 1.1 christos /* 144 1.1 christos * Number of distinct byte values, i.e. 256 145 1.1 christos */ 146 1.1 christos #define BYTE_VALUES (UINT8_MAX + 1) 147 1.1 christos 148 1.1 christos /* 149 1.1 christos * Lookup table mapping bytes in DNS names to bit positions, used 150 1.1 christos * by dns_qpkey_fromname() to convert DNS names to qp-trie keys. 151 1.1 christos * 152 1.1 christos * Each element holds one or two bit positions, bit_one in the 153 1.1 christos * lower half and bit_two in the upper half. 154 1.1 christos * 155 1.1 christos * For common hostname characters, bit_two is zero (which cannot 156 1.1 christos * be a valid bit position). 157 1.1 christos * 158 1.1 christos * For others, bit_one is the escape bit, and bit_two is the 159 1.1 christos * position of the character within the escaped range. 160 1.1 christos */ 161 1.1 christos uint16_t dns_qp_bits_for_byte[BYTE_VALUES] = { 0 }; 162 1.1 christos 163 1.1 christos /* 164 1.1 christos * And the reverse, mapping bit positions to characters, so the tests 165 1.1 christos * can print diagnostics involving qp-trie keys. 166 1.1 christos * 167 1.1 christos * This table only handles the first bit in an escape sequence; we 168 1.1 christos * arrange that we can calculate the byte value for both bits by 169 1.1 christos * adding the the second bit to the first bit's byte value. 170 1.1 christos */ 171 1.1 christos uint8_t dns_qp_byte_for_bit[SHIFT_OFFSET] = { 0 }; 172 1.1 christos 173 1.1 christos /* 174 1.1 christos * Fill in the lookup tables at program startup. (It doesn't matter 175 1.1 christos * when this is initialized relative to other startup code.) 176 1.1 christos */ 177 1.1 christos static void 178 1.1 christos initialize_bits_for_byte(void) ISC_CONSTRUCTOR; 179 1.1 christos 180 1.1 christos /* 181 1.1 christos * The bit positions for bytes inside labels have to be between 182 1.1 christos * SHIFT_BITMAP and SHIFT_OFFSET. (SHIFT_NOBYTE separates labels.) 183 1.1 christos * 184 1.1 christos * Each byte range in between common hostname characters has a different 185 1.1 christos * escape character, to preserve the correct lexical order. 186 1.1 christos * 187 1.1 christos * Escaped byte ranges mostly fit into the space available in the 188 1.1 christos * bitmap, except for those above 'z' (which is mostly bytes with the 189 1.1 christos * top bit set). So, when we reach the end of the bitmap we roll over 190 1.1 christos * to the next escape character. 191 1.1 christos * 192 1.1 christos * After filling the table we ensure that the bit positions for 193 1.1 christos * hostname characters and escape characters all fit. 194 1.1 christos */ 195 1.1 christos static void 196 1.1 christos initialize_bits_for_byte(void) { 197 1.1 christos /* zero common character marker not a valid shift position */ 198 1.1 christos INSIST(0 < SHIFT_BITMAP); 199 1.1 christos /* first bit is common byte or escape byte */ 200 1.1 christos dns_qpshift_t bit_one = SHIFT_BITMAP; 201 1.1 christos /* second bit is position in escaped range */ 202 1.1 christos dns_qpshift_t bit_two = SHIFT_BITMAP; 203 1.1 christos bool escaping = true; 204 1.1 christos 205 1.1 christos for (unsigned int byte = 0; byte < BYTE_VALUES; byte++) { 206 1.1 christos if (qp_common_character(byte)) { 207 1.1 christos escaping = false; 208 1.1 christos bit_one++; 209 1.1 christos dns_qp_byte_for_bit[bit_one] = byte; 210 1.1 christos dns_qp_bits_for_byte[byte] = bit_one; 211 1.1 christos } else if ('A' <= byte && byte <= 'Z') { 212 1.1 christos /* map upper case to lower case */ 213 1.1 christos dns_qpshift_t after_esc = bit_one + 1; 214 1.1 christos dns_qpshift_t skip_punct = 'a' - '_'; 215 1.1 christos dns_qpshift_t letter = byte - 'A'; 216 1.1 christos dns_qpshift_t bit = after_esc + skip_punct + letter; 217 1.1 christos dns_qp_bits_for_byte[byte] = bit; 218 1.1 christos /* to simplify reverse conversion */ 219 1.1 christos bit_two++; 220 1.1 christos } else { 221 1.1 christos /* non-hostname characters need to be escaped */ 222 1.1 christos if (!escaping || bit_two >= SHIFT_OFFSET) { 223 1.1 christos escaping = true; 224 1.1 christos bit_one++; 225 1.1 christos dns_qp_byte_for_bit[bit_one] = byte; 226 1.1 christos bit_two = SHIFT_BITMAP; 227 1.1 christos } 228 1.1 christos dns_qp_bits_for_byte[byte] = bit_two << 8 | bit_one; 229 1.1 christos bit_two++; 230 1.1 christos } 231 1.1 christos } 232 1.1 christos ENSURE(bit_one < SHIFT_OFFSET); 233 1.1 christos } 234 1.1 christos 235 1.1 christos /* 236 1.1 christos * Convert a DNS name into a trie lookup key. 237 1.1 christos * 238 1.1 christos * Returns the length of the key. 239 1.1 christos * 240 1.1 christos * For performance we get our hands dirty in the guts of the name. 241 1.1 christos * 242 1.1 christos * We don't worry about the distinction between absolute and relative 243 1.1 christos * names. When the trie is only used with absolute names, the first byte 244 1.1 christos * of the key will always be SHIFT_NOBYTE and it will always be skipped 245 1.1 christos * when traversing the trie. So keeping the root label costs little, and 246 1.1 christos * it allows us to support tries of relative names too. In fact absolute 247 1.1 christos * and relative names can be mixed in the same trie without causing 248 1.1 christos * confusion, because the presence or absence of the initial 249 1.1 christos * SHIFT_NOBYTE in the key disambiguates them (exactly like a trailing 250 1.1 christos * dot in a zone file). 251 1.1 christos */ 252 1.1 christos size_t 253 1.1 christos dns_qpkey_fromname(dns_qpkey_t key, const dns_name_t *name) { 254 1.1 christos size_t len, label; 255 1.1 christos dns_fixedname_t fixed; 256 1.1 christos 257 1.1 christos REQUIRE(ISC_MAGIC_VALID(name, DNS_NAME_MAGIC)); 258 1.1 christos 259 1.1 christos if (name->labels == 0) { 260 1.1 christos key[0] = SHIFT_NOBYTE; 261 1.1 christos return 0; 262 1.1 christos } 263 1.1 christos 264 1.1 christos if (name->offsets == NULL) { 265 1.1 christos dns_name_t *clone = dns_fixedname_initname(&fixed); 266 1.1 christos dns_name_clone(name, clone); 267 1.1 christos name = clone; 268 1.1 christos } 269 1.1 christos 270 1.1 christos len = 0; 271 1.1 christos label = name->labels; 272 1.1 christos while (label-- > 0) { 273 1.1 christos const uint8_t *ldata = name->ndata + name->offsets[label]; 274 1.1 christos size_t label_len = *ldata++; 275 1.1 christos while (label_len-- > 0) { 276 1.1 christos uint16_t bits = dns_qp_bits_for_byte[*ldata++]; 277 1.1 christos key[len++] = bits & 0xFF; /* bit_one */ 278 1.1 christos if ((bits >> 8) != 0) { /* escape? */ 279 1.1 christos key[len++] = bits >> 8; /* bit_two */ 280 1.1 christos } 281 1.1 christos } 282 1.1 christos /* label terminator */ 283 1.1 christos key[len++] = SHIFT_NOBYTE; 284 1.1 christos } 285 1.1 christos /* mark end with a double NOBYTE */ 286 1.1 christos key[len] = SHIFT_NOBYTE; 287 1.1 christos ENSURE(len < sizeof(dns_qpkey_t)); 288 1.1 christos return len; 289 1.1 christos } 290 1.1 christos 291 1.1 christos void 292 1.1 christos dns_qpkey_toname(const dns_qpkey_t key, size_t keylen, dns_name_t *name) { 293 1.1 christos size_t locs[DNS_NAME_MAXLABELS]; 294 1.1 christos size_t loc = 0, opos = 0; 295 1.1 christos size_t offset; 296 1.1 christos 297 1.1 christos REQUIRE(ISC_MAGIC_VALID(name, DNS_NAME_MAGIC)); 298 1.1 christos REQUIRE(name->buffer != NULL); 299 1.1 christos REQUIRE(name->offsets != NULL); 300 1.1 christos 301 1.1 christos dns_name_reset(name); 302 1.1 christos 303 1.1 christos if (keylen == 0) { 304 1.1 christos return; 305 1.1 christos } 306 1.1 christos 307 1.1 christos /* Scan the key looking for label boundaries */ 308 1.1 christos for (offset = 0; offset <= keylen; offset++) { 309 1.1 christos INSIST(key[offset] >= SHIFT_NOBYTE && 310 1.1 christos key[offset] < SHIFT_OFFSET); 311 1.1 christos INSIST(loc < DNS_NAME_MAXLABELS); 312 1.1 christos if (qpkey_bit(key, keylen, offset) == SHIFT_NOBYTE) { 313 1.1 christos if (qpkey_bit(key, keylen, offset + 1) == SHIFT_NOBYTE) 314 1.1 christos { 315 1.1 christos locs[loc] = offset + 1; 316 1.1 christos goto scanned; 317 1.1 christos } 318 1.1 christos locs[loc++] = offset + 1; 319 1.1 christos } else if (offset == 0) { 320 1.1 christos /* This happens for a relative name */ 321 1.1 christos locs[loc++] = offset; 322 1.1 christos } 323 1.1 christos } 324 1.1 christos UNREACHABLE(); 325 1.1 christos scanned: 326 1.1 christos 327 1.1 christos /* 328 1.1 christos * In the key the labels are encoded in reverse order, so 329 1.1 christos * we step backward through the label boundaries, then forward 330 1.1 christos * through the labels, to create the DNS wire format data. 331 1.1 christos */ 332 1.1 christos name->labels = loc; 333 1.1 christos while (loc-- > 0) { 334 1.1 christos uint8_t len = 0, *lenp = NULL; 335 1.1 christos 336 1.1 christos /* Add a length byte to the name data and set an offset */ 337 1.1 christos lenp = isc_buffer_used(name->buffer); 338 1.1 christos isc_buffer_putuint8(name->buffer, 0); 339 1.1 christos name->offsets[opos++] = name->length++; 340 1.1 christos 341 1.1 christos /* Convert from escaped byte ranges to ASCII */ 342 1.1 christos for (offset = locs[loc]; offset < locs[loc + 1] - 1; offset++) { 343 1.1 christos uint8_t bit = qpkey_bit(key, keylen, offset); 344 1.1 christos uint8_t byte = dns_qp_byte_for_bit[bit]; 345 1.1 christos if (qp_common_character(byte)) { 346 1.1 christos isc_buffer_putuint8(name->buffer, byte); 347 1.1 christos } else { 348 1.1 christos byte += key[++offset] - SHIFT_BITMAP; 349 1.1 christos isc_buffer_putuint8(name->buffer, byte); 350 1.1 christos } 351 1.1 christos len++; 352 1.1 christos } 353 1.1 christos 354 1.1 christos name->length += len; 355 1.1 christos *lenp = len; 356 1.1 christos } 357 1.1 christos 358 1.1 christos /* Add a root label for absolute names */ 359 1.1 christos if (key[0] == SHIFT_NOBYTE) { 360 1.1 christos name->attributes.absolute = true; 361 1.1 christos isc_buffer_putuint8(name->buffer, 0); 362 1.1 christos name->offsets[opos++] = name->length++; 363 1.1 christos name->labels++; 364 1.1 christos } 365 1.1 christos 366 1.1 christos name->ndata = isc_buffer_base(name->buffer); 367 1.1 christos } 368 1.1 christos 369 1.1 christos /* 370 1.1 christos * Sentinel value for equal keys 371 1.1 christos */ 372 1.1 christos #define QPKEY_EQUAL (~(size_t)0) 373 1.1 christos 374 1.1 christos /* 375 1.1 christos * Compare two keys and return the offset where they differ. 376 1.1 christos * 377 1.1 christos * This offset is used to work out where a trie search diverged: when one 378 1.1 christos * of the keys is in the trie and one is not, the common prefix (up to the 379 1.1 christos * offset) is the part of the unknown key that exists in the trie. This 380 1.1 christos * matters for adding new keys or finding neighbours of missing keys. 381 1.1 christos * 382 1.1 christos * When the keys are different lengths it is possible (but unwise) for 383 1.1 christos * the longer key to be the same as the shorter key but with superfluous 384 1.1 christos * trailing SHIFT_NOBYTE elements. This makes the keys equal for the 385 1.1 christos * purpose of traversing the trie. 386 1.1 christos */ 387 1.1 christos static size_t 388 1.1 christos qpkey_compare(const dns_qpkey_t key_a, const size_t keylen_a, 389 1.1 christos const dns_qpkey_t key_b, const size_t keylen_b) { 390 1.1 christos size_t keylen = ISC_MAX(keylen_a, keylen_b); 391 1.1 christos for (size_t offset = 0; offset < keylen; offset++) { 392 1.1 christos if (qpkey_bit(key_a, keylen_a, offset) != 393 1.1 christos qpkey_bit(key_b, keylen_b, offset)) 394 1.1 christos { 395 1.1 christos return offset; 396 1.1 christos } 397 1.1 christos } 398 1.1 christos return QPKEY_EQUAL; 399 1.1 christos } 400 1.1 christos 401 1.1 christos /*********************************************************************** 402 1.1 christos * 403 1.1 christos * allocator wrappers 404 1.1 christos */ 405 1.1 christos 406 1.1 christos #if FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION 407 1.1 christos 408 1.1 christos /* 409 1.1 christos * Optionally (for debugging) during a copy-on-write transaction, use 410 1.1 christos * memory protection to ensure that the shared chunks are not modified. 411 1.1 christos * Once a chunk becomes shared, it remains read-only until it is freed. 412 1.1 christos * POSIX says we have to use mmap() to get an allocation that we can 413 1.1 christos * definitely pass to mprotect(). 414 1.1 christos */ 415 1.1 christos 416 1.1 christos static size_t 417 1.1 christos chunk_size_raw(void) { 418 1.1 christos size_t size = (size_t)sysconf(_SC_PAGE_SIZE); 419 1.1 christos return ISC_MAX(size, QP_CHUNK_BYTES); 420 1.1 christos } 421 1.1 christos 422 1.1 christos static void * 423 1.1 christos chunk_get_raw(dns_qp_t *qp) { 424 1.1 christos if (qp->write_protect) { 425 1.1 christos size_t size = chunk_size_raw(); 426 1.1 christos void *ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, 427 1.1 christos MAP_ANON | MAP_PRIVATE, -1, 0); 428 1.1 christos RUNTIME_CHECK(ptr != MAP_FAILED); 429 1.1 christos return ptr; 430 1.1 christos } else { 431 1.1 christos return isc_mem_allocate(qp->mctx, QP_CHUNK_BYTES); 432 1.1 christos } 433 1.1 christos } 434 1.1 christos 435 1.1 christos static void 436 1.1 christos chunk_free_raw(dns_qp_t *qp, void *ptr) { 437 1.1 christos if (qp->write_protect) { 438 1.1 christos RUNTIME_CHECK(munmap(ptr, chunk_size_raw()) == 0); 439 1.1 christos } else { 440 1.1 christos isc_mem_free(qp->mctx, ptr); 441 1.1 christos } 442 1.1 christos } 443 1.1 christos 444 1.1 christos static void * 445 1.1 christos chunk_shrink_raw(dns_qp_t *qp, void *ptr, size_t bytes) { 446 1.1 christos if (qp->write_protect) { 447 1.1 christos return ptr; 448 1.1 christos } else { 449 1.1 christos return isc_mem_reallocate(qp->mctx, ptr, bytes); 450 1.1 christos } 451 1.1 christos } 452 1.1 christos 453 1.1 christos static void 454 1.1 christos write_protect(dns_qp_t *qp, dns_qpchunk_t chunk) { 455 1.1 christos if (qp->write_protect) { 456 1.1 christos /* see transaction_open() wrt this special case */ 457 1.1 christos if (qp->transaction_mode == QP_WRITE && chunk == qp->bump) { 458 1.1 christos return; 459 1.1 christos } 460 1.1 christos TRACE("chunk %u", chunk); 461 1.1 christos void *ptr = qp->base->ptr[chunk]; 462 1.1 christos size_t size = chunk_size_raw(); 463 1.1 christos RUNTIME_CHECK(mprotect(ptr, size, PROT_READ) >= 0); 464 1.1 christos } 465 1.1 christos } 466 1.1 christos 467 1.1 christos #else 468 1.1 christos 469 1.5 christos #define chunk_get_raw(qp, size) isc_mem_allocate(qp->mctx, size) 470 1.1 christos #define chunk_free_raw(qp, ptr) isc_mem_free(qp->mctx, ptr) 471 1.1 christos 472 1.1 christos #define chunk_shrink_raw(qp, ptr, size) isc_mem_reallocate(qp->mctx, ptr, size) 473 1.1 christos 474 1.1 christos #define write_protect(qp, chunk) 475 1.1 christos 476 1.1 christos #endif 477 1.1 christos 478 1.1 christos /*********************************************************************** 479 1.1 christos * 480 1.1 christos * allocator 481 1.1 christos */ 482 1.1 christos 483 1.1 christos /* 484 1.1 christos * When we reuse the bump chunk across multiple write transactions, 485 1.1 christos * it can have an immutable prefix and a mutable suffix. 486 1.1 christos */ 487 1.1 christos static inline bool 488 1.1 christos cells_immutable(dns_qp_t *qp, dns_qpref_t ref) { 489 1.1 christos dns_qpchunk_t chunk = ref_chunk(ref); 490 1.1 christos dns_qpcell_t cell = ref_cell(ref); 491 1.1 christos if (chunk == qp->bump) { 492 1.1 christos return cell < qp->fender; 493 1.1 christos } else { 494 1.1 christos return qp->usage[chunk].immutable; 495 1.1 christos } 496 1.1 christos } 497 1.1 christos 498 1.1 christos /* 499 1.5 christos * Find the next power that is both bigger than size and prev_capacity, 500 1.5 christos * but still within the chunk min and max sizes. 501 1.5 christos */ 502 1.5 christos static dns_qpcell_t 503 1.5 christos next_capacity(uint32_t prev_capacity, uint32_t size) { 504 1.5 christos /* 505 1.5 christos * Unfortunately builtin_clz is undefined for 0. We work around this 506 1.5 christos * issue by flooring the request size at 2. 507 1.5 christos */ 508 1.5 christos size = ISC_MAX3(size, prev_capacity, 2u); 509 1.5 christos uint32_t log2 = 32u - __builtin_clz(size - 1u); 510 1.5 christos 511 1.5 christos return 1U << ISC_CLAMP(log2, QP_CHUNK_LOG_MIN, QP_CHUNK_LOG_MAX); 512 1.5 christos } 513 1.5 christos 514 1.5 christos /* 515 1.1 christos * Create a fresh bump chunk and allocate some twigs from it. 516 1.1 christos */ 517 1.1 christos static dns_qpref_t 518 1.1 christos chunk_alloc(dns_qp_t *qp, dns_qpchunk_t chunk, dns_qpweight_t size) { 519 1.1 christos INSIST(qp->base->ptr[chunk] == NULL); 520 1.1 christos INSIST(qp->usage[chunk].used == 0); 521 1.1 christos INSIST(qp->usage[chunk].free == 0); 522 1.5 christos INSIST(qp->chunk_capacity <= QP_CHUNK_SIZE); 523 1.1 christos 524 1.5 christos qp->chunk_capacity = next_capacity(qp->chunk_capacity * 2u, size); 525 1.5 christos qp->base->ptr[chunk] = 526 1.5 christos chunk_get_raw(qp, qp->chunk_capacity * sizeof(dns_qpnode_t)); 527 1.5 christos 528 1.5 christos qp->usage[chunk] = (qp_usage_t){ .exists = true, 529 1.5 christos .used = size, 530 1.5 christos .capacity = qp->chunk_capacity }; 531 1.1 christos qp->used_count += size; 532 1.1 christos qp->bump = chunk; 533 1.1 christos qp->fender = 0; 534 1.1 christos 535 1.1 christos if (qp->write_protect) { 536 1.1 christos TRACE("chunk %u base %p", chunk, qp->base->ptr[chunk]); 537 1.1 christos } 538 1.1 christos return make_ref(chunk, 0); 539 1.1 christos } 540 1.1 christos 541 1.1 christos /* 542 1.1 christos * This is used to grow the chunk arrays when they fill up. If the old 543 1.1 christos * base array is in use by readers, we must make a clone, otherwise we 544 1.1 christos * can reallocate in place. 545 1.1 christos * 546 1.1 christos * The isc_refcount_init() and qpbase_unref() in this function are a pair. 547 1.1 christos */ 548 1.1 christos static void 549 1.1 christos realloc_chunk_arrays(dns_qp_t *qp, dns_qpchunk_t newmax) { 550 1.1 christos size_t oldptrs = sizeof(qp->base->ptr[0]) * qp->chunk_max; 551 1.1 christos size_t newptrs = sizeof(qp->base->ptr[0]) * newmax; 552 1.1 christos size_t size = STRUCT_FLEX_SIZE(qp->base, ptr, newmax); 553 1.1 christos 554 1.1 christos if (qp->base == NULL || qpbase_unref(qp)) { 555 1.1 christos qp->base = isc_mem_reallocate(qp->mctx, qp->base, size); 556 1.1 christos } else { 557 1.1 christos dns_qpbase_t *oldbase = qp->base; 558 1.1 christos qp->base = isc_mem_allocate(qp->mctx, size); 559 1.1 christos memmove(&qp->base->ptr[0], &oldbase->ptr[0], oldptrs); 560 1.1 christos } 561 1.1 christos memset(&qp->base->ptr[qp->chunk_max], 0, newptrs - oldptrs); 562 1.1 christos isc_refcount_init(&qp->base->refcount, 1); 563 1.1 christos qp->base->magic = QPBASE_MAGIC; 564 1.1 christos 565 1.1 christos /* usage array is exclusive to the writer */ 566 1.1 christos size_t oldusage = sizeof(qp->usage[0]) * qp->chunk_max; 567 1.1 christos size_t newusage = sizeof(qp->usage[0]) * newmax; 568 1.1 christos qp->usage = isc_mem_reallocate(qp->mctx, qp->usage, newusage); 569 1.1 christos memset(&qp->usage[qp->chunk_max], 0, newusage - oldusage); 570 1.1 christos 571 1.1 christos qp->chunk_max = newmax; 572 1.1 christos 573 1.1 christos TRACE("qpbase %p usage %p max %u", qp->base, qp->usage, qp->chunk_max); 574 1.1 christos } 575 1.1 christos 576 1.1 christos /* 577 1.1 christos * There was no space in the bump chunk, so find a place to put a fresh 578 1.1 christos * chunk in the chunk arrays, then allocate some twigs from it. 579 1.1 christos */ 580 1.1 christos static dns_qpref_t 581 1.1 christos alloc_slow(dns_qp_t *qp, dns_qpweight_t size) { 582 1.1 christos dns_qpchunk_t chunk; 583 1.1 christos 584 1.1 christos for (chunk = 0; chunk < qp->chunk_max; chunk++) { 585 1.1 christos if (!qp->usage[chunk].exists) { 586 1.1 christos return chunk_alloc(qp, chunk, size); 587 1.1 christos } 588 1.1 christos } 589 1.1 christos ENSURE(chunk == qp->chunk_max); 590 1.1 christos realloc_chunk_arrays(qp, GROWTH_FACTOR(chunk)); 591 1.1 christos return chunk_alloc(qp, chunk, size); 592 1.1 christos } 593 1.1 christos 594 1.1 christos /* 595 1.1 christos * Ensure we are using a fresh bump chunk. 596 1.1 christos */ 597 1.1 christos static void 598 1.1 christos alloc_reset(dns_qp_t *qp) { 599 1.1 christos (void)alloc_slow(qp, 0); 600 1.1 christos } 601 1.1 christos 602 1.1 christos /* 603 1.1 christos * Allocate some fresh twigs. This is the bump allocator fast path. 604 1.1 christos */ 605 1.1 christos static inline dns_qpref_t 606 1.1 christos alloc_twigs(dns_qp_t *qp, dns_qpweight_t size) { 607 1.1 christos dns_qpchunk_t chunk = qp->bump; 608 1.1 christos dns_qpcell_t cell = qp->usage[chunk].used; 609 1.1 christos 610 1.5 christos if (cell + size <= qp->usage[chunk].capacity) { 611 1.1 christos qp->usage[chunk].used += size; 612 1.1 christos qp->used_count += size; 613 1.1 christos return make_ref(chunk, cell); 614 1.1 christos } else { 615 1.1 christos return alloc_slow(qp, size); 616 1.1 christos } 617 1.1 christos } 618 1.1 christos 619 1.1 christos /* 620 1.1 christos * Record that some twigs are no longer being used, and if possible 621 1.1 christos * zero them to ensure that there isn't a spurious double detach when 622 1.1 christos * the chunk is later recycled. 623 1.1 christos * 624 1.1 christos * Returns true if the twigs were immediately destroyed. 625 1.1 christos * 626 1.1 christos * NOTE: the caller is responsible for attaching or detaching any 627 1.1 christos * leaves as required. 628 1.1 christos */ 629 1.1 christos static inline bool 630 1.1 christos free_twigs(dns_qp_t *qp, dns_qpref_t twigs, dns_qpweight_t size) { 631 1.1 christos dns_qpchunk_t chunk = ref_chunk(twigs); 632 1.1 christos 633 1.1 christos qp->free_count += size; 634 1.1 christos qp->usage[chunk].free += size; 635 1.1 christos ENSURE(qp->free_count <= qp->used_count); 636 1.1 christos ENSURE(qp->usage[chunk].free <= qp->usage[chunk].used); 637 1.1 christos 638 1.1 christos if (cells_immutable(qp, twigs)) { 639 1.1 christos qp->hold_count += size; 640 1.1 christos ENSURE(qp->free_count >= qp->hold_count); 641 1.1 christos return false; 642 1.1 christos } else { 643 1.1 christos zero_twigs(ref_ptr(qp, twigs), size); 644 1.1 christos return true; 645 1.1 christos } 646 1.1 christos } 647 1.1 christos 648 1.1 christos /* 649 1.1 christos * When some twigs have been copied, and free_twigs() could not 650 1.1 christos * immediately destroy the old copy, we need to update the refcount 651 1.1 christos * on any leaves that were duplicated. 652 1.1 christos */ 653 1.1 christos static void 654 1.1 christos attach_twigs(dns_qp_t *qp, dns_qpnode_t *twigs, dns_qpweight_t size) { 655 1.1 christos for (dns_qpweight_t pos = 0; pos < size; pos++) { 656 1.1 christos if (node_tag(&twigs[pos]) == LEAF_TAG) { 657 1.1 christos attach_leaf(qp, &twigs[pos]); 658 1.1 christos } 659 1.1 christos } 660 1.1 christos } 661 1.1 christos 662 1.1 christos /*********************************************************************** 663 1.1 christos * 664 1.1 christos * chunk reclamation 665 1.1 christos */ 666 1.1 christos 667 1.1 christos /* 668 1.1 christos * Is any of this chunk still in use? 669 1.1 christos */ 670 1.1 christos static inline dns_qpcell_t 671 1.1 christos chunk_usage(dns_qp_t *qp, dns_qpchunk_t chunk) { 672 1.1 christos return qp->usage[chunk].used - qp->usage[chunk].free; 673 1.1 christos } 674 1.1 christos 675 1.1 christos /* 676 1.1 christos * We remove each empty chunk from the total counts when the chunk is 677 1.1 christos * freed, or when it is scheduled for safe memory reclamation. We check 678 1.1 christos * the chunk's phase to avoid discounting it twice in the latter case. 679 1.1 christos */ 680 1.1 christos static void 681 1.1 christos chunk_discount(dns_qp_t *qp, dns_qpchunk_t chunk) { 682 1.1 christos if (qp->usage[chunk].discounted) { 683 1.1 christos return; 684 1.1 christos } 685 1.1 christos INSIST(qp->used_count >= qp->usage[chunk].used); 686 1.1 christos INSIST(qp->free_count >= qp->usage[chunk].free); 687 1.1 christos qp->used_count -= qp->usage[chunk].used; 688 1.1 christos qp->free_count -= qp->usage[chunk].free; 689 1.1 christos qp->usage[chunk].discounted = true; 690 1.1 christos } 691 1.1 christos 692 1.1 christos /* 693 1.1 christos * When a chunk is being recycled, we need to detach any leaves that 694 1.1 christos * remain, and free any `base` arrays that have been marked as unused. 695 1.1 christos */ 696 1.1 christos static void 697 1.1 christos chunk_free(dns_qp_t *qp, dns_qpchunk_t chunk) { 698 1.1 christos if (qp->write_protect) { 699 1.1 christos TRACE("chunk %u base %p", chunk, qp->base->ptr[chunk]); 700 1.1 christos } 701 1.1 christos 702 1.1 christos dns_qpnode_t *n = qp->base->ptr[chunk]; 703 1.1 christos for (dns_qpcell_t count = qp->usage[chunk].used; count > 0; 704 1.1 christos count--, n++) 705 1.1 christos { 706 1.1 christos if (node_tag(n) == LEAF_TAG && node_pointer(n) != NULL) { 707 1.1 christos detach_leaf(qp, n); 708 1.1 christos } else if (count > 1 && reader_valid(n)) { 709 1.1 christos dns_qpreader_t qpr; 710 1.1 christos unpack_reader(&qpr, n); 711 1.1 christos /* pairs with dns_qpmulti_commit() */ 712 1.1 christos if (qpbase_unref(&qpr)) { 713 1.1 christos isc_mem_free(qp->mctx, qpr.base); 714 1.1 christos } 715 1.1 christos } 716 1.1 christos } 717 1.1 christos chunk_discount(qp, chunk); 718 1.1 christos chunk_free_raw(qp, qp->base->ptr[chunk]); 719 1.1 christos qp->base->ptr[chunk] = NULL; 720 1.1 christos qp->usage[chunk] = (qp_usage_t){}; 721 1.1 christos } 722 1.1 christos 723 1.1 christos /* 724 1.1 christos * Free any chunks that we can while a trie is in use. 725 1.1 christos */ 726 1.1 christos static void 727 1.1 christos recycle(dns_qp_t *qp) { 728 1.5 christos unsigned int nfree = 0; 729 1.1 christos 730 1.1 christos isc_nanosecs_t start = isc_time_monotonic(); 731 1.1 christos 732 1.1 christos for (dns_qpchunk_t chunk = 0; chunk < qp->chunk_max; chunk++) { 733 1.1 christos if (chunk != qp->bump && chunk_usage(qp, chunk) == 0 && 734 1.1 christos qp->usage[chunk].exists && !qp->usage[chunk].immutable) 735 1.1 christos { 736 1.1 christos chunk_free(qp, chunk); 737 1.5 christos nfree++; 738 1.1 christos } 739 1.1 christos } 740 1.1 christos 741 1.1 christos isc_nanosecs_t time = isc_time_monotonic() - start; 742 1.3 christos ISC_QP_ADD(recycle_time, time); 743 1.1 christos 744 1.5 christos if (nfree > 0) { 745 1.5 christos LOG_STATS("qp recycle" PRItime "free %u chunks", time, nfree); 746 1.1 christos LOG_STATS("qp recycle leaf %u live %u used %u free %u hold %u", 747 1.1 christos qp->leaf_count, qp->used_count - qp->free_count, 748 1.1 christos qp->used_count, qp->free_count, qp->hold_count); 749 1.1 christos } 750 1.1 christos } 751 1.1 christos 752 1.1 christos /* 753 1.1 christos * asynchronous cleanup 754 1.1 christos */ 755 1.1 christos static void 756 1.1 christos reclaim_chunks_cb(struct rcu_head *arg) { 757 1.1 christos qp_rcuctx_t *rcuctx = caa_container_of(arg, qp_rcuctx_t, rcu_head); 758 1.1 christos REQUIRE(QPRCU_VALID(rcuctx)); 759 1.1 christos dns_qpmulti_t *multi = rcuctx->multi; 760 1.1 christos REQUIRE(QPMULTI_VALID(multi)); 761 1.1 christos 762 1.1 christos LOCK(&multi->mutex); 763 1.5 christos dns_qp_t *qp = &multi->writer; 764 1.1 christos 765 1.5 christos /* 766 1.5 christos * If chunk_max is zero, chunks have already been freed. 767 1.5 christos */ 768 1.5 christos if (qp->chunk_max != 0) { 769 1.5 christos unsigned int free = 0; 770 1.5 christos isc_nanosecs_t start = isc_time_monotonic(); 771 1.5 christos 772 1.5 christos INSIST(QP_VALID(qp)); 773 1.5 christos 774 1.5 christos for (unsigned int i = 0; i < rcuctx->count; i++) { 775 1.5 christos dns_qpchunk_t chunk = rcuctx->chunk[i]; 776 1.5 christos if (qp->usage[chunk].snapshot) { 777 1.5 christos /* clean up when snapshot is destroyed */ 778 1.5 christos qp->usage[chunk].snapfree = true; 779 1.5 christos } else { 780 1.5 christos chunk_free(qp, chunk); 781 1.5 christos free++; 782 1.5 christos } 783 1.5 christos } 784 1.1 christos 785 1.5 christos isc_nanosecs_t time = isc_time_monotonic() - start; 786 1.5 christos recycle_time += time; 787 1.1 christos 788 1.5 christos if (free > 0) { 789 1.5 christos LOG_STATS("qp reclaim" PRItime "free %u chunks", time, 790 1.5 christos free); 791 1.5 christos LOG_STATS( 792 1.5 christos "qp reclaim leaf %u live %u used %u free %u " 793 1.5 christos "hold %u", 794 1.5 christos qp->leaf_count, qp->used_count - qp->free_count, 795 1.5 christos qp->used_count, qp->free_count, qp->hold_count); 796 1.1 christos } 797 1.1 christos } 798 1.1 christos 799 1.5 christos UNLOCK(&multi->mutex); 800 1.5 christos 801 1.5 christos dns_qpmulti_detach(&multi); 802 1.1 christos isc_mem_putanddetach(&rcuctx->mctx, rcuctx, 803 1.1 christos STRUCT_FLEX_SIZE(rcuctx, chunk, rcuctx->count)); 804 1.1 christos } 805 1.1 christos 806 1.1 christos /* 807 1.1 christos * At the end of a transaction, schedule empty but immutable chunks 808 1.1 christos * for reclamation later. 809 1.1 christos */ 810 1.1 christos static void 811 1.1 christos reclaim_chunks(dns_qpmulti_t *multi) { 812 1.1 christos dns_qp_t *qp = &multi->writer; 813 1.1 christos 814 1.1 christos unsigned int count = 0; 815 1.1 christos for (dns_qpchunk_t chunk = 0; chunk < qp->chunk_max; chunk++) { 816 1.1 christos if (chunk != qp->bump && chunk_usage(qp, chunk) == 0 && 817 1.1 christos qp->usage[chunk].exists && qp->usage[chunk].immutable && 818 1.1 christos !qp->usage[chunk].discounted) 819 1.1 christos { 820 1.1 christos count++; 821 1.1 christos } 822 1.1 christos } 823 1.1 christos 824 1.1 christos if (count == 0) { 825 1.1 christos return; 826 1.1 christos } 827 1.1 christos 828 1.1 christos qp_rcuctx_t *rcuctx = 829 1.1 christos isc_mem_get(qp->mctx, STRUCT_FLEX_SIZE(rcuctx, chunk, count)); 830 1.1 christos *rcuctx = (qp_rcuctx_t){ 831 1.1 christos .magic = QPRCU_MAGIC, 832 1.1 christos .multi = multi, 833 1.1 christos .count = count, 834 1.1 christos }; 835 1.1 christos isc_mem_attach(qp->mctx, &rcuctx->mctx); 836 1.1 christos 837 1.1 christos unsigned int i = 0; 838 1.1 christos for (dns_qpchunk_t chunk = 0; chunk < qp->chunk_max; chunk++) { 839 1.1 christos if (chunk != qp->bump && chunk_usage(qp, chunk) == 0 && 840 1.1 christos qp->usage[chunk].exists && qp->usage[chunk].immutable && 841 1.1 christos !qp->usage[chunk].discounted) 842 1.1 christos { 843 1.1 christos rcuctx->chunk[i++] = chunk; 844 1.1 christos chunk_discount(qp, chunk); 845 1.1 christos } 846 1.1 christos } 847 1.1 christos 848 1.5 christos /* 849 1.5 christos * Reference the qpmulti object to keep it from being 850 1.5 christos * freed until reclaim_chunks_cb() runs. 851 1.5 christos */ 852 1.5 christos dns_qpmulti_ref(multi); 853 1.1 christos call_rcu(&rcuctx->rcu_head, reclaim_chunks_cb); 854 1.1 christos 855 1.1 christos LOG_STATS("qp will reclaim %u chunks", count); 856 1.1 christos } 857 1.1 christos 858 1.1 christos /* 859 1.1 christos * When a snapshot is destroyed, clean up chunks that need free()ing 860 1.1 christos * and are not used by any remaining snapshots. 861 1.1 christos */ 862 1.1 christos static void 863 1.1 christos marksweep_chunks(dns_qpmulti_t *multi) { 864 1.5 christos unsigned int nfree = 0; 865 1.1 christos 866 1.1 christos isc_nanosecs_t start = isc_time_monotonic(); 867 1.1 christos 868 1.1 christos dns_qp_t *qpw = &multi->writer; 869 1.1 christos 870 1.1 christos for (dns_qpsnap_t *qps = ISC_LIST_HEAD(multi->snapshots); qps != NULL; 871 1.1 christos qps = ISC_LIST_NEXT(qps, link)) 872 1.1 christos { 873 1.1 christos for (dns_qpchunk_t chunk = 0; chunk < qps->chunk_max; chunk++) { 874 1.1 christos if (qps->base->ptr[chunk] != NULL) { 875 1.1 christos INSIST(qps->base->ptr[chunk] == 876 1.1 christos qpw->base->ptr[chunk]); 877 1.1 christos qpw->usage[chunk].snapmark = true; 878 1.1 christos } 879 1.1 christos } 880 1.1 christos } 881 1.1 christos 882 1.1 christos for (dns_qpchunk_t chunk = 0; chunk < qpw->chunk_max; chunk++) { 883 1.1 christos qpw->usage[chunk].snapshot = qpw->usage[chunk].snapmark; 884 1.1 christos qpw->usage[chunk].snapmark = false; 885 1.1 christos if (qpw->usage[chunk].snapfree && !qpw->usage[chunk].snapshot) { 886 1.1 christos chunk_free(qpw, chunk); 887 1.5 christos nfree++; 888 1.1 christos } 889 1.1 christos } 890 1.1 christos 891 1.1 christos isc_nanosecs_t time = isc_time_monotonic() - start; 892 1.3 christos ISC_QP_ADD(recycle_time, time); 893 1.1 christos 894 1.5 christos if (nfree > 0) { 895 1.5 christos LOG_STATS("qp marksweep" PRItime "free %u chunks", time, nfree); 896 1.1 christos LOG_STATS( 897 1.1 christos "qp marksweep leaf %u live %u used %u free %u hold %u", 898 1.1 christos qpw->leaf_count, qpw->used_count - qpw->free_count, 899 1.1 christos qpw->used_count, qpw->free_count, qpw->hold_count); 900 1.1 christos } 901 1.1 christos } 902 1.1 christos 903 1.1 christos /*********************************************************************** 904 1.1 christos * 905 1.1 christos * garbage collector 906 1.1 christos */ 907 1.1 christos 908 1.1 christos /* 909 1.1 christos * Move a branch node's twigs to the `bump` chunk, for copy-on-write 910 1.1 christos * or for garbage collection. We don't update the node in place 911 1.1 christos * because `compact_recursive()` does not ensure the node itself is 912 1.1 christos * mutable until after it discovers evacuation was necessary. 913 1.1 christos * 914 1.1 christos * If free_twigs() could not immediately destroy the old twigs, we have 915 1.1 christos * to re-attach to any leaves. 916 1.1 christos */ 917 1.1 christos static dns_qpref_t 918 1.1 christos evacuate(dns_qp_t *qp, dns_qpnode_t *n) { 919 1.1 christos dns_qpweight_t size = branch_twigs_size(n); 920 1.1 christos dns_qpref_t old_ref = branch_twigs_ref(n); 921 1.1 christos dns_qpref_t new_ref = alloc_twigs(qp, size); 922 1.1 christos dns_qpnode_t *old_twigs = ref_ptr(qp, old_ref); 923 1.1 christos dns_qpnode_t *new_twigs = ref_ptr(qp, new_ref); 924 1.1 christos 925 1.1 christos move_twigs(new_twigs, old_twigs, size); 926 1.1 christos if (!free_twigs(qp, old_ref, size)) { 927 1.1 christos attach_twigs(qp, new_twigs, size); 928 1.1 christos } 929 1.1 christos 930 1.1 christos return new_ref; 931 1.1 christos } 932 1.1 christos 933 1.1 christos /* 934 1.1 christos * Immutable nodes need copy-on-write. As we walk down the trie finding the 935 1.1 christos * right place to modify, make_root_mutable() and make_twigs_mutable() 936 1.1 christos * are called to ensure that immutable nodes on the path from the root are 937 1.1 christos * copied to a mutable chunk. 938 1.1 christos */ 939 1.1 christos 940 1.1 christos static inline dns_qpnode_t * 941 1.1 christos make_root_mutable(dns_qp_t *qp) { 942 1.1 christos if (cells_immutable(qp, qp->root_ref)) { 943 1.1 christos qp->root_ref = evacuate(qp, MOVABLE_ROOT(qp)); 944 1.1 christos } 945 1.1 christos return ref_ptr(qp, qp->root_ref); 946 1.1 christos } 947 1.1 christos 948 1.1 christos static inline void 949 1.1 christos make_twigs_mutable(dns_qp_t *qp, dns_qpnode_t *n) { 950 1.1 christos if (cells_immutable(qp, branch_twigs_ref(n))) { 951 1.1 christos *n = make_node(branch_index(n), evacuate(qp, n)); 952 1.1 christos } 953 1.1 christos } 954 1.1 christos 955 1.1 christos /* 956 1.1 christos * Compact the trie by traversing the whole thing recursively, copying 957 1.1 christos * bottom-up as required. The aim is to avoid evacuation as much as 958 1.1 christos * possible, but when parts of the trie are immutable, we need to evacuate 959 1.1 christos * the paths from the root to the parts of the trie that occupy 960 1.1 christos * fragmented chunks. 961 1.1 christos * 962 1.1 christos * Without the QP_MIN_USED check, the algorithm will leave the trie 963 1.1 christos * unchanged. If the children are all leaves, the loop changes nothing, 964 1.1 christos * so we will return this node's original ref. If all of the children 965 1.1 christos * that are branches did not need moving, again, the loop changes 966 1.1 christos * nothing. So the evacuation check is the only place that the 967 1.1 christos * algorithm introduces ref changes, that then bubble up towards the 968 1.1 christos * root through the logic inside the loop. 969 1.1 christos */ 970 1.1 christos static dns_qpref_t 971 1.1 christos compact_recursive(dns_qp_t *qp, dns_qpnode_t *parent) { 972 1.1 christos dns_qpweight_t size = branch_twigs_size(parent); 973 1.1 christos dns_qpref_t twigs_ref = branch_twigs_ref(parent); 974 1.1 christos dns_qpchunk_t chunk = ref_chunk(twigs_ref); 975 1.1 christos 976 1.1 christos if (qp->compact_all || 977 1.1 christos (chunk != qp->bump && chunk_usage(qp, chunk) < QP_MIN_USED)) 978 1.1 christos { 979 1.1 christos twigs_ref = evacuate(qp, parent); 980 1.1 christos } 981 1.1 christos bool immutable = cells_immutable(qp, twigs_ref); 982 1.1 christos for (dns_qpweight_t pos = 0; pos < size; pos++) { 983 1.1 christos dns_qpnode_t *child = ref_ptr(qp, twigs_ref) + pos; 984 1.1 christos if (!is_branch(child)) { 985 1.1 christos continue; 986 1.1 christos } 987 1.1 christos dns_qpref_t old_grandtwigs = branch_twigs_ref(child); 988 1.1 christos dns_qpref_t new_grandtwigs = compact_recursive(qp, child); 989 1.1 christos if (old_grandtwigs == new_grandtwigs) { 990 1.1 christos continue; 991 1.1 christos } 992 1.1 christos if (immutable) { 993 1.1 christos twigs_ref = evacuate(qp, parent); 994 1.1 christos /* the twigs have moved */ 995 1.1 christos child = ref_ptr(qp, twigs_ref) + pos; 996 1.1 christos immutable = false; 997 1.1 christos } 998 1.1 christos *child = make_node(branch_index(child), new_grandtwigs); 999 1.1 christos } 1000 1.1 christos return twigs_ref; 1001 1.1 christos } 1002 1.1 christos 1003 1.1 christos static void 1004 1.1 christos compact(dns_qp_t *qp) { 1005 1.1 christos LOG_STATS("qp compact before leaf %u live %u used %u free %u hold %u", 1006 1.1 christos qp->leaf_count, qp->used_count - qp->free_count, 1007 1.1 christos qp->used_count, qp->free_count, qp->hold_count); 1008 1.1 christos 1009 1.1 christos isc_nanosecs_t start = isc_time_monotonic(); 1010 1.1 christos 1011 1.1 christos if (qp->usage[qp->bump].free > QP_MAX_FREE) { 1012 1.1 christos alloc_reset(qp); 1013 1.1 christos } 1014 1.1 christos 1015 1.1 christos if (qp->leaf_count > 0) { 1016 1.1 christos qp->root_ref = compact_recursive(qp, MOVABLE_ROOT(qp)); 1017 1.1 christos } 1018 1.1 christos qp->compact_all = false; 1019 1.1 christos 1020 1.1 christos isc_nanosecs_t time = isc_time_monotonic() - start; 1021 1.3 christos ISC_QP_ADD(compact_time, time); 1022 1.1 christos 1023 1.1 christos LOG_STATS("qp compact" PRItime 1024 1.1 christos "leaf %u live %u used %u free %u hold %u", 1025 1.1 christos time, qp->leaf_count, qp->used_count - qp->free_count, 1026 1.1 christos qp->used_count, qp->free_count, qp->hold_count); 1027 1.1 christos } 1028 1.1 christos 1029 1.1 christos void 1030 1.1 christos dns_qp_compact(dns_qp_t *qp, dns_qpgc_t mode) { 1031 1.1 christos REQUIRE(QP_VALID(qp)); 1032 1.1 christos if (mode == DNS_QPGC_MAYBE && !QP_NEEDGC(qp)) { 1033 1.1 christos return; 1034 1.1 christos } 1035 1.1 christos if (mode == DNS_QPGC_ALL) { 1036 1.1 christos alloc_reset(qp); 1037 1.1 christos qp->compact_all = true; 1038 1.1 christos } 1039 1.1 christos compact(qp); 1040 1.1 christos recycle(qp); 1041 1.1 christos } 1042 1.1 christos 1043 1.1 christos /* 1044 1.1 christos * Free some twigs and (if they were destroyed immediately so that the 1045 1.1 christos * result from QP_MAX_GARBAGE can change) compact the trie if necessary. 1046 1.1 christos * 1047 1.1 christos * This is called by the trie modification API entry points. The 1048 1.1 christos * free_twigs() function requires the caller to attach or detach any 1049 1.1 christos * leaves as necessary. Callers of squash_twigs() satisfy this 1050 1.1 christos * requirement by calling make_twigs_mutable(). 1051 1.1 christos * 1052 1.1 christos * Aside: In typical garbage collectors, compaction is triggered when 1053 1.1 christos * the allocator runs out of space. But that is because typical garbage 1054 1.1 christos * collectors do not know how much memory can be recovered, so they must 1055 1.1 christos * find out by scanning the heap. The qp-trie code was originally 1056 1.1 christos * designed to use malloc() and free(), so it has more information about 1057 1.1 christos * when garbage collection might be worthwhile. Hence we can trigger 1058 1.1 christos * collection when garbage passes a threshold. 1059 1.1 christos * 1060 1.1 christos * XXXFANF: If we need to avoid latency outliers caused by compaction in 1061 1.1 christos * write transactions, we can check qp->transaction_mode here. 1062 1.1 christos */ 1063 1.1 christos static inline bool 1064 1.1 christos squash_twigs(dns_qp_t *qp, dns_qpref_t twigs, dns_qpweight_t size) { 1065 1.1 christos bool destroyed = free_twigs(qp, twigs, size); 1066 1.1 christos if (destroyed && QP_AUTOGC(qp)) { 1067 1.1 christos compact(qp); 1068 1.1 christos recycle(qp); 1069 1.1 christos /* 1070 1.1 christos * This shouldn't happen if the garbage collector is 1071 1.1 christos * working correctly. We can recover at the cost of some 1072 1.1 christos * time and space, but recovery should be cheaper than 1073 1.1 christos * letting compact+recycle fail repeatedly. 1074 1.1 christos */ 1075 1.1 christos if (QP_AUTOGC(qp)) { 1076 1.1 christos isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, 1077 1.1 christos DNS_LOGMODULE_QP, ISC_LOG_NOTICE, 1078 1.1 christos "qp %p uctx \"%s\" compact/recycle " 1079 1.1 christos "failed to recover any space, " 1080 1.1 christos "scheduling a full compaction", 1081 1.1 christos qp, TRIENAME(qp)); 1082 1.1 christos qp->compact_all = true; 1083 1.1 christos } 1084 1.1 christos } 1085 1.1 christos return destroyed; 1086 1.1 christos } 1087 1.1 christos 1088 1.1 christos /*********************************************************************** 1089 1.1 christos * 1090 1.1 christos * public accessors for memory management internals 1091 1.1 christos */ 1092 1.1 christos 1093 1.1 christos dns_qp_memusage_t 1094 1.1 christos dns_qp_memusage(dns_qp_t *qp) { 1095 1.1 christos REQUIRE(QP_VALID(qp)); 1096 1.1 christos 1097 1.1 christos dns_qp_memusage_t memusage = { 1098 1.1 christos .uctx = qp->uctx, 1099 1.1 christos .leaves = qp->leaf_count, 1100 1.1 christos .live = qp->used_count - qp->free_count, 1101 1.1 christos .used = qp->used_count, 1102 1.1 christos .hold = qp->hold_count, 1103 1.1 christos .free = qp->free_count, 1104 1.1 christos .node_size = sizeof(dns_qpnode_t), 1105 1.1 christos .fragmented = QP_NEEDGC(qp), 1106 1.1 christos }; 1107 1.1 christos 1108 1.5 christos size_t chunk_usage_bytes = 0; 1109 1.1 christos for (dns_qpchunk_t chunk = 0; chunk < qp->chunk_max; chunk++) { 1110 1.1 christos if (qp->base->ptr[chunk] != NULL) { 1111 1.5 christos chunk_usage_bytes += qp->usage[chunk].capacity; 1112 1.1 christos memusage.chunk_count += 1; 1113 1.1 christos } 1114 1.1 christos } 1115 1.1 christos 1116 1.1 christos /* 1117 1.1 christos * XXXFANF does not subtract chunks that have been shrunk, 1118 1.1 christos * and does not count unreclaimed dns_qpbase_t objects 1119 1.1 christos */ 1120 1.5 christos memusage.bytes = chunk_usage_bytes + 1121 1.1 christos qp->chunk_max * sizeof(qp->base->ptr[0]) + 1122 1.1 christos qp->chunk_max * sizeof(qp->usage[0]); 1123 1.1 christos 1124 1.1 christos return memusage; 1125 1.1 christos } 1126 1.1 christos 1127 1.1 christos dns_qp_memusage_t 1128 1.1 christos dns_qpmulti_memusage(dns_qpmulti_t *multi) { 1129 1.1 christos REQUIRE(QPMULTI_VALID(multi)); 1130 1.1 christos LOCK(&multi->mutex); 1131 1.1 christos 1132 1.1 christos dns_qp_t *qp = &multi->writer; 1133 1.1 christos INSIST(QP_VALID(qp)); 1134 1.1 christos 1135 1.1 christos dns_qp_memusage_t memusage = dns_qp_memusage(qp); 1136 1.1 christos 1137 1.5 christos if (qp->transaction_mode == QP_UPDATE && qp->usage != NULL) { 1138 1.5 christos memusage.bytes -= qp->usage[qp->bump].capacity; 1139 1.1 christos memusage.bytes += qp->usage[qp->bump].used * 1140 1.1 christos sizeof(dns_qpnode_t); 1141 1.1 christos } 1142 1.1 christos 1143 1.1 christos UNLOCK(&multi->mutex); 1144 1.1 christos return memusage; 1145 1.1 christos } 1146 1.1 christos 1147 1.1 christos void 1148 1.1 christos dns_qp_gctime(isc_nanosecs_t *compact_p, isc_nanosecs_t *recycle_p, 1149 1.1 christos isc_nanosecs_t *rollback_p) { 1150 1.3 christos *compact_p = ISC_QP_GET(compact_time); 1151 1.3 christos *recycle_p = ISC_QP_GET(recycle_time); 1152 1.3 christos *rollback_p = ISC_QP_GET(rollback_time); 1153 1.1 christos } 1154 1.1 christos 1155 1.1 christos /*********************************************************************** 1156 1.1 christos * 1157 1.1 christos * read-write transactions 1158 1.1 christos */ 1159 1.1 christos 1160 1.1 christos static dns_qp_t * 1161 1.1 christos transaction_open(dns_qpmulti_t *multi, dns_qp_t **qptp) { 1162 1.1 christos REQUIRE(QPMULTI_VALID(multi)); 1163 1.1 christos REQUIRE(qptp != NULL && *qptp == NULL); 1164 1.1 christos 1165 1.1 christos LOCK(&multi->mutex); 1166 1.1 christos 1167 1.1 christos dns_qp_t *qp = &multi->writer; 1168 1.1 christos INSIST(QP_VALID(qp)); 1169 1.1 christos 1170 1.1 christos /* 1171 1.1 christos * Mark existing chunks as immutable. 1172 1.1 christos * 1173 1.1 christos * Aside: The bump chunk is special: in a series of write 1174 1.1 christos * transactions the bump chunk is reused; the first part (up 1175 1.1 christos * to fender) is immutable, the rest mutable. But we set its 1176 1.1 christos * immutable flag so that when the bump chunk fills up, the 1177 1.1 christos * first part continues to be treated as immutable. (And the 1178 1.1 christos * rest of the chunk too, but that's OK.) 1179 1.1 christos */ 1180 1.1 christos for (dns_qpchunk_t chunk = 0; chunk < qp->chunk_max; chunk++) { 1181 1.1 christos if (qp->usage[chunk].exists) { 1182 1.1 christos qp->usage[chunk].immutable = true; 1183 1.1 christos write_protect(qp, chunk); 1184 1.1 christos } 1185 1.1 christos } 1186 1.1 christos 1187 1.1 christos /* 1188 1.1 christos * Ensure QP_AUTOGC() ignores free space in immutable chunks. 1189 1.1 christos */ 1190 1.1 christos qp->hold_count = qp->free_count; 1191 1.1 christos 1192 1.1 christos *qptp = qp; 1193 1.1 christos return qp; 1194 1.1 christos } 1195 1.1 christos 1196 1.1 christos /* 1197 1.1 christos * a write is light 1198 1.1 christos * 1199 1.1 christos * We need to ensure we allocate from a fresh chunk if the last transaction 1200 1.1 christos * shrunk the bump chunk; but usually in a sequence of write transactions 1201 1.1 christos * we just put `fender` at the point where we started this generation. 1202 1.1 christos * 1203 1.1 christos * (Aside: Instead of keeping the previous transaction's mode, I 1204 1.1 christos * considered forcing allocation into the slow path by fiddling with 1205 1.1 christos * the bump chunk's usage counters. But that is troublesome because 1206 1.1 christos * `chunk_free()` needs to know how much of the chunk to scan.) 1207 1.1 christos */ 1208 1.1 christos void 1209 1.1 christos dns_qpmulti_write(dns_qpmulti_t *multi, dns_qp_t **qptp) { 1210 1.1 christos dns_qp_t *qp = transaction_open(multi, qptp); 1211 1.1 christos TRACE(""); 1212 1.1 christos 1213 1.1 christos if (qp->transaction_mode == QP_WRITE) { 1214 1.1 christos qp->fender = qp->usage[qp->bump].used; 1215 1.1 christos } else { 1216 1.1 christos alloc_reset(qp); 1217 1.1 christos } 1218 1.1 christos qp->transaction_mode = QP_WRITE; 1219 1.1 christos } 1220 1.1 christos 1221 1.1 christos /* 1222 1.1 christos * an update is heavier 1223 1.1 christos * 1224 1.1 christos * We always reset the allocator to the start of a fresh chunk, 1225 1.1 christos * because the previous transaction was probably an update that shrunk 1226 1.1 christos * the bump chunk. It simplifies rollback because `fender` is always zero. 1227 1.1 christos * 1228 1.1 christos * To rollback a transaction, we need to reset all the allocation 1229 1.1 christos * counters to their previous state, in particular we need to un-free 1230 1.1 christos * any nodes that were copied to make them mutable. This means we need 1231 1.1 christos * to make a copy of basically the whole `dns_qp_t writer`: everything 1232 1.1 christos * but the chunks holding the trie nodes. 1233 1.1 christos * 1234 1.1 christos * We do most of the transaction setup before creating the rollback 1235 1.1 christos * state so that after rollback we have a correct idea of which chunks 1236 1.1 christos * are immutable, and so we have the correct transaction mode to make 1237 1.1 christos * the next transaction allocate a new bump chunk. The exception is 1238 1.1 christos * resetting the allocator, which we do after creating the rollback 1239 1.1 christos * state; if this transaction is rolled back then the next transaction 1240 1.1 christos * will start from the rollback state and also reset the allocator as 1241 1.1 christos * one of its first actions. 1242 1.1 christos */ 1243 1.1 christos void 1244 1.1 christos dns_qpmulti_update(dns_qpmulti_t *multi, dns_qp_t **qptp) { 1245 1.1 christos dns_qp_t *qp = transaction_open(multi, qptp); 1246 1.1 christos TRACE(""); 1247 1.1 christos 1248 1.1 christos qp->transaction_mode = QP_UPDATE; 1249 1.1 christos 1250 1.1 christos dns_qp_t *rollback = isc_mem_allocate(qp->mctx, sizeof(*rollback)); 1251 1.1 christos memmove(rollback, qp, sizeof(*rollback)); 1252 1.1 christos /* can be uninitialized on the first transaction */ 1253 1.1 christos if (rollback->base != NULL) { 1254 1.1 christos INSIST(QPBASE_VALID(rollback->base)); 1255 1.1 christos INSIST(qp->usage != NULL && qp->chunk_max > 0); 1256 1.1 christos /* paired with either _commit() or _rollback() */ 1257 1.1 christos isc_refcount_increment(&rollback->base->refcount); 1258 1.1 christos size_t usage_bytes = sizeof(qp->usage[0]) * qp->chunk_max; 1259 1.1 christos rollback->usage = isc_mem_allocate(qp->mctx, usage_bytes); 1260 1.1 christos memmove(rollback->usage, qp->usage, usage_bytes); 1261 1.1 christos } 1262 1.1 christos INSIST(multi->rollback == NULL); 1263 1.1 christos multi->rollback = rollback; 1264 1.1 christos 1265 1.1 christos alloc_reset(qp); 1266 1.1 christos } 1267 1.1 christos 1268 1.1 christos void 1269 1.1 christos dns_qpmulti_commit(dns_qpmulti_t *multi, dns_qp_t **qptp) { 1270 1.1 christos REQUIRE(QPMULTI_VALID(multi)); 1271 1.1 christos REQUIRE(qptp != NULL && *qptp == &multi->writer); 1272 1.1 christos REQUIRE(multi->writer.transaction_mode == QP_WRITE || 1273 1.1 christos multi->writer.transaction_mode == QP_UPDATE); 1274 1.1 christos 1275 1.1 christos dns_qp_t *qp = *qptp; 1276 1.1 christos TRACE(""); 1277 1.1 christos 1278 1.1 christos if (qp->transaction_mode == QP_UPDATE) { 1279 1.1 christos INSIST(multi->rollback != NULL); 1280 1.1 christos /* paired with dns_qpmulti_update() */ 1281 1.1 christos if (qpbase_unref(multi->rollback)) { 1282 1.1 christos isc_mem_free(qp->mctx, multi->rollback->base); 1283 1.1 christos } 1284 1.1 christos if (multi->rollback->usage != NULL) { 1285 1.1 christos isc_mem_free(qp->mctx, multi->rollback->usage); 1286 1.1 christos } 1287 1.1 christos isc_mem_free(qp->mctx, multi->rollback); 1288 1.1 christos } 1289 1.1 christos INSIST(multi->rollback == NULL); 1290 1.1 christos 1291 1.1 christos /* not the first commit? */ 1292 1.1 christos if (multi->reader_ref != INVALID_REF) { 1293 1.1 christos INSIST(cells_immutable(qp, multi->reader_ref)); 1294 1.1 christos free_twigs(qp, multi->reader_ref, READER_SIZE); 1295 1.1 christos } 1296 1.1 christos 1297 1.1 christos if (qp->transaction_mode == QP_UPDATE) { 1298 1.1 christos /* minimize memory overhead */ 1299 1.1 christos compact(qp); 1300 1.1 christos multi->reader_ref = alloc_twigs(qp, READER_SIZE); 1301 1.1 christos qp->base->ptr[qp->bump] = chunk_shrink_raw( 1302 1.1 christos qp, qp->base->ptr[qp->bump], 1303 1.1 christos qp->usage[qp->bump].used * sizeof(dns_qpnode_t)); 1304 1.1 christos } else { 1305 1.1 christos multi->reader_ref = alloc_twigs(qp, READER_SIZE); 1306 1.1 christos } 1307 1.1 christos 1308 1.1 christos /* anchor a new version of the trie */ 1309 1.1 christos dns_qpnode_t *reader = ref_ptr(qp, multi->reader_ref); 1310 1.1 christos make_reader(reader, multi); 1311 1.1 christos /* paired with chunk_free() */ 1312 1.1 christos isc_refcount_increment(&qp->base->refcount); 1313 1.1 christos 1314 1.1 christos rcu_assign_pointer(multi->reader, reader); /* COMMIT */ 1315 1.1 christos 1316 1.1 christos /* clean up what we can right now */ 1317 1.1 christos if (qp->transaction_mode == QP_UPDATE || QP_NEEDGC(qp)) { 1318 1.1 christos recycle(qp); 1319 1.1 christos } 1320 1.1 christos 1321 1.1 christos /* schedule the rest for later */ 1322 1.1 christos reclaim_chunks(multi); 1323 1.1 christos 1324 1.1 christos *qptp = NULL; 1325 1.1 christos UNLOCK(&multi->mutex); 1326 1.1 christos } 1327 1.1 christos 1328 1.1 christos /* 1329 1.1 christos * Throw away everything that was allocated during this transaction. 1330 1.1 christos */ 1331 1.1 christos void 1332 1.1 christos dns_qpmulti_rollback(dns_qpmulti_t *multi, dns_qp_t **qptp) { 1333 1.5 christos /* 1334 1.5 christos * nfree is only used when logging stats, hence the attribute. 1335 1.5 christos */ 1336 1.5 christos unsigned int nfree ISC_ATTR_UNUSED = 0; 1337 1.1 christos 1338 1.1 christos REQUIRE(QPMULTI_VALID(multi)); 1339 1.1 christos REQUIRE(multi->writer.transaction_mode == QP_UPDATE); 1340 1.1 christos REQUIRE(qptp != NULL && *qptp == &multi->writer); 1341 1.1 christos 1342 1.1 christos dns_qp_t *qp = *qptp; 1343 1.1 christos TRACE(""); 1344 1.1 christos 1345 1.1 christos isc_nanosecs_t start = isc_time_monotonic(); 1346 1.1 christos 1347 1.1 christos for (dns_qpchunk_t chunk = 0; chunk < qp->chunk_max; chunk++) { 1348 1.1 christos if (qp->base->ptr[chunk] != NULL && !qp->usage[chunk].immutable) 1349 1.1 christos { 1350 1.1 christos chunk_free(qp, chunk); 1351 1.1 christos /* 1352 1.1 christos * we need to clear its base pointer in the rollback 1353 1.1 christos * trie, in case the arrays were resized 1354 1.1 christos */ 1355 1.1 christos if (chunk < multi->rollback->chunk_max) { 1356 1.1 christos INSIST(!multi->rollback->usage[chunk].exists); 1357 1.1 christos multi->rollback->base->ptr[chunk] = NULL; 1358 1.1 christos } 1359 1.5 christos nfree++; 1360 1.1 christos } 1361 1.1 christos } 1362 1.1 christos 1363 1.1 christos /* 1364 1.1 christos * multi->rollback->base and multi->writer->base are the same, 1365 1.1 christos * unless there was a realloc_chunk_arrays() during the transaction 1366 1.1 christos */ 1367 1.1 christos if (qpbase_unref(qp)) { 1368 1.1 christos /* paired with dns_qpmulti_update() */ 1369 1.1 christos isc_mem_free(qp->mctx, qp->base); 1370 1.1 christos } 1371 1.1 christos isc_mem_free(qp->mctx, qp->usage); 1372 1.1 christos 1373 1.1 christos /* reset allocator state */ 1374 1.1 christos INSIST(multi->rollback != NULL); 1375 1.1 christos memmove(qp, multi->rollback, sizeof(*qp)); 1376 1.1 christos isc_mem_free(qp->mctx, multi->rollback); 1377 1.1 christos INSIST(multi->rollback == NULL); 1378 1.1 christos 1379 1.1 christos isc_nanosecs_t time = isc_time_monotonic() - start; 1380 1.3 christos ISC_QP_ADD(rollback_time, time); 1381 1.1 christos 1382 1.5 christos LOG_STATS("qp rollback" PRItime "free %u chunks", time, nfree); 1383 1.1 christos 1384 1.1 christos *qptp = NULL; 1385 1.1 christos UNLOCK(&multi->mutex); 1386 1.1 christos } 1387 1.1 christos 1388 1.1 christos /*********************************************************************** 1389 1.1 christos * 1390 1.1 christos * read-only transactions 1391 1.1 christos */ 1392 1.1 christos 1393 1.1 christos static dns_qpmulti_t * 1394 1.1 christos reader_open(dns_qpmulti_t *multi, dns_qpreadable_t qpr) { 1395 1.1 christos dns_qpreader_t *qp = dns_qpreader(qpr); 1396 1.1 christos dns_qpnode_t *reader = rcu_dereference(multi->reader); 1397 1.1 christos if (reader == NULL) { 1398 1.1 christos QP_INIT(qp, multi->writer.methods, multi->writer.uctx); 1399 1.1 christos } else { 1400 1.1 christos multi = unpack_reader(qp, reader); 1401 1.1 christos } 1402 1.1 christos return multi; 1403 1.1 christos } 1404 1.1 christos 1405 1.1 christos /* 1406 1.1 christos * a query is light 1407 1.1 christos */ 1408 1.1 christos 1409 1.1 christos void 1410 1.1 christos dns_qpmulti_query(dns_qpmulti_t *multi, dns_qpread_t *qp) { 1411 1.1 christos REQUIRE(QPMULTI_VALID(multi)); 1412 1.1 christos REQUIRE(qp != NULL); 1413 1.1 christos 1414 1.1 christos qp->tid = isc_tid(); 1415 1.1 christos rcu_read_lock(); 1416 1.1 christos 1417 1.1 christos dns_qpmulti_t *whence = reader_open(multi, qp); 1418 1.1 christos INSIST(whence == multi); 1419 1.1 christos } 1420 1.1 christos 1421 1.1 christos void 1422 1.1 christos dns_qpread_destroy(dns_qpmulti_t *multi, dns_qpread_t *qp) { 1423 1.1 christos REQUIRE(QPMULTI_VALID(multi)); 1424 1.1 christos REQUIRE(QP_VALID(qp)); 1425 1.1 christos REQUIRE(qp->tid == isc_tid()); 1426 1.1 christos *qp = (dns_qpread_t){}; 1427 1.1 christos rcu_read_unlock(); 1428 1.1 christos } 1429 1.1 christos 1430 1.1 christos /* 1431 1.1 christos * a snapshot is heavy 1432 1.1 christos */ 1433 1.1 christos 1434 1.1 christos void 1435 1.1 christos dns_qpmulti_snapshot(dns_qpmulti_t *multi, dns_qpsnap_t **qpsp) { 1436 1.1 christos REQUIRE(QPMULTI_VALID(multi)); 1437 1.1 christos REQUIRE(qpsp != NULL && *qpsp == NULL); 1438 1.1 christos 1439 1.1 christos rcu_read_lock(); 1440 1.1 christos 1441 1.1 christos LOCK(&multi->mutex); 1442 1.1 christos 1443 1.1 christos dns_qp_t *qpw = &multi->writer; 1444 1.1 christos size_t bytes = sizeof(dns_qpsnap_t) + sizeof(dns_qpbase_t) + 1445 1.1 christos sizeof(qpw->base->ptr[0]) * qpw->chunk_max; 1446 1.1 christos dns_qpsnap_t *qps = isc_mem_allocate(qpw->mctx, bytes); 1447 1.1 christos 1448 1.1 christos qps->whence = reader_open(multi, qps); 1449 1.1 christos INSIST(qps->whence == multi); 1450 1.1 christos 1451 1.1 christos /* not a separate allocation */ 1452 1.1 christos qps->base = (dns_qpbase_t *)(qps + 1); 1453 1.1 christos isc_refcount_init(&qps->base->refcount, 0); 1454 1.1 christos 1455 1.1 christos /* 1456 1.1 christos * only copy base pointers of chunks we need, so we can 1457 1.1 christos * reclaim unused memory in dns_qpsnap_destroy() 1458 1.1 christos */ 1459 1.1 christos qps->chunk_max = qpw->chunk_max; 1460 1.1 christos for (dns_qpchunk_t chunk = 0; chunk < qpw->chunk_max; chunk++) { 1461 1.1 christos if (qpw->usage[chunk].exists && chunk_usage(qpw, chunk) > 0) { 1462 1.1 christos qpw->usage[chunk].snapshot = true; 1463 1.1 christos qps->base->ptr[chunk] = qpw->base->ptr[chunk]; 1464 1.1 christos } else { 1465 1.1 christos qps->base->ptr[chunk] = NULL; 1466 1.1 christos } 1467 1.1 christos } 1468 1.1 christos ISC_LIST_INITANDAPPEND(multi->snapshots, qps, link); 1469 1.1 christos 1470 1.1 christos *qpsp = qps; 1471 1.1 christos UNLOCK(&multi->mutex); 1472 1.1 christos 1473 1.1 christos rcu_read_unlock(); 1474 1.1 christos } 1475 1.1 christos 1476 1.1 christos void 1477 1.1 christos dns_qpsnap_destroy(dns_qpmulti_t *multi, dns_qpsnap_t **qpsp) { 1478 1.1 christos REQUIRE(QPMULTI_VALID(multi)); 1479 1.1 christos REQUIRE(qpsp != NULL && *qpsp != NULL); 1480 1.1 christos 1481 1.1 christos LOCK(&multi->mutex); 1482 1.1 christos 1483 1.1 christos dns_qpsnap_t *qp = *qpsp; 1484 1.1 christos 1485 1.1 christos /* make sure the API is being used correctly */ 1486 1.1 christos REQUIRE(qp->whence == multi); 1487 1.1 christos 1488 1.1 christos ISC_LIST_UNLINK(multi->snapshots, qp, link); 1489 1.1 christos 1490 1.1 christos /* 1491 1.1 christos * eagerly reclaim chunks that are now unused, so that memory does 1492 1.1 christos * not accumulate when a trie has a lot of updates and snapshots 1493 1.1 christos */ 1494 1.1 christos marksweep_chunks(multi); 1495 1.1 christos 1496 1.1 christos isc_mem_free(multi->writer.mctx, qp); 1497 1.1 christos 1498 1.1 christos *qpsp = NULL; 1499 1.1 christos UNLOCK(&multi->mutex); 1500 1.1 christos } 1501 1.1 christos 1502 1.1 christos /*********************************************************************** 1503 1.1 christos * 1504 1.1 christos * constructors, destructors 1505 1.1 christos */ 1506 1.1 christos 1507 1.1 christos void 1508 1.1 christos dns_qp_create(isc_mem_t *mctx, const dns_qpmethods_t *methods, void *uctx, 1509 1.1 christos dns_qp_t **qptp) { 1510 1.1 christos REQUIRE(qptp != NULL && *qptp == NULL); 1511 1.1 christos 1512 1.1 christos dns_qp_t *qp = isc_mem_get(mctx, sizeof(*qp)); 1513 1.1 christos QP_INIT(qp, methods, uctx); 1514 1.1 christos isc_mem_attach(mctx, &qp->mctx); 1515 1.1 christos alloc_reset(qp); 1516 1.1 christos TRACE(""); 1517 1.1 christos *qptp = qp; 1518 1.1 christos } 1519 1.1 christos 1520 1.1 christos void 1521 1.1 christos dns_qpmulti_create(isc_mem_t *mctx, const dns_qpmethods_t *methods, void *uctx, 1522 1.1 christos dns_qpmulti_t **qpmp) { 1523 1.1 christos REQUIRE(qpmp != NULL && *qpmp == NULL); 1524 1.1 christos 1525 1.1 christos dns_qpmulti_t *multi = isc_mem_get(mctx, sizeof(*multi)); 1526 1.5 christos *multi = (dns_qpmulti_t){ .magic = QPMULTI_MAGIC, 1527 1.5 christos .reader_ref = INVALID_REF, 1528 1.5 christos .references = ISC_REFCOUNT_INITIALIZER(1) }; 1529 1.1 christos isc_mutex_init(&multi->mutex); 1530 1.1 christos ISC_LIST_INIT(multi->snapshots); 1531 1.5 christos 1532 1.1 christos /* 1533 1.1 christos * Do not waste effort allocating a bump chunk that will be thrown 1534 1.1 christos * away when a transaction is opened. dns_qpmulti_update() always 1535 1.1 christos * allocates; to ensure dns_qpmulti_write() does too, pretend the 1536 1.1 christos * previous transaction was an update 1537 1.1 christos */ 1538 1.1 christos dns_qp_t *qp = &multi->writer; 1539 1.1 christos QP_INIT(qp, methods, uctx); 1540 1.1 christos isc_mem_attach(mctx, &qp->mctx); 1541 1.1 christos qp->transaction_mode = QP_UPDATE; 1542 1.1 christos TRACE(""); 1543 1.1 christos *qpmp = multi; 1544 1.1 christos } 1545 1.1 christos 1546 1.1 christos static void 1547 1.1 christos destroy_guts(dns_qp_t *qp) { 1548 1.1 christos if (qp->chunk_max == 0) { 1549 1.1 christos return; 1550 1.1 christos } 1551 1.5 christos 1552 1.1 christos for (dns_qpchunk_t chunk = 0; chunk < qp->chunk_max; chunk++) { 1553 1.1 christos if (qp->base->ptr[chunk] != NULL) { 1554 1.1 christos chunk_free(qp, chunk); 1555 1.1 christos } 1556 1.1 christos } 1557 1.5 christos qp->chunk_max = 0; 1558 1.1 christos ENSURE(qp->used_count == 0); 1559 1.1 christos ENSURE(qp->free_count == 0); 1560 1.1 christos ENSURE(isc_refcount_current(&qp->base->refcount) == 1); 1561 1.1 christos isc_mem_free(qp->mctx, qp->base); 1562 1.1 christos isc_mem_free(qp->mctx, qp->usage); 1563 1.1 christos qp->magic = 0; 1564 1.1 christos } 1565 1.1 christos 1566 1.1 christos void 1567 1.1 christos dns_qp_destroy(dns_qp_t **qptp) { 1568 1.1 christos REQUIRE(qptp != NULL); 1569 1.1 christos REQUIRE(QP_VALID(*qptp)); 1570 1.1 christos 1571 1.1 christos dns_qp_t *qp = *qptp; 1572 1.1 christos *qptp = NULL; 1573 1.1 christos 1574 1.1 christos /* do not try to destroy part of a dns_qpmulti_t */ 1575 1.1 christos REQUIRE(qp->transaction_mode == QP_NONE); 1576 1.1 christos 1577 1.1 christos TRACE(""); 1578 1.1 christos destroy_guts(qp); 1579 1.1 christos isc_mem_putanddetach(&qp->mctx, qp, sizeof(*qp)); 1580 1.1 christos } 1581 1.1 christos 1582 1.1 christos static void 1583 1.5 christos qpmulti_free_mem(dns_qpmulti_t *multi) { 1584 1.5 christos REQUIRE(QPMULTI_VALID(multi)); 1585 1.5 christos 1586 1.5 christos /* reassure thread sanitizer */ 1587 1.5 christos LOCK(&multi->mutex); 1588 1.5 christos dns_qp_t *qp = &multi->writer; 1589 1.5 christos UNLOCK(&multi->mutex); 1590 1.5 christos 1591 1.5 christos isc_mutex_destroy(&multi->mutex); 1592 1.5 christos isc_mem_putanddetach(&qp->mctx, multi, sizeof(*multi)); 1593 1.5 christos } 1594 1.5 christos 1595 1.5 christos #if QPMULTI_TRACE 1596 1.5 christos ISC_REFCOUNT_STATIC_TRACE_IMPL(dns_qpmulti, qpmulti_free_mem) 1597 1.5 christos #else 1598 1.5 christos ISC_REFCOUNT_STATIC_IMPL(dns_qpmulti, qpmulti_free_mem) 1599 1.5 christos #endif 1600 1.5 christos 1601 1.5 christos static void 1602 1.5 christos qpmulti_destroy_guts_cb(struct rcu_head *arg) { 1603 1.1 christos qp_rcuctx_t *rcuctx = caa_container_of(arg, qp_rcuctx_t, rcu_head); 1604 1.1 christos REQUIRE(QPRCU_VALID(rcuctx)); 1605 1.1 christos /* only nonzero for reclaim_chunks_cb() */ 1606 1.1 christos REQUIRE(rcuctx->count == 0); 1607 1.1 christos 1608 1.1 christos dns_qpmulti_t *multi = rcuctx->multi; 1609 1.1 christos REQUIRE(QPMULTI_VALID(multi)); 1610 1.1 christos 1611 1.1 christos /* reassure thread sanitizer */ 1612 1.1 christos LOCK(&multi->mutex); 1613 1.1 christos 1614 1.1 christos dns_qp_t *qp = &multi->writer; 1615 1.1 christos REQUIRE(QP_VALID(qp)); 1616 1.1 christos 1617 1.1 christos destroy_guts(qp); 1618 1.1 christos 1619 1.1 christos UNLOCK(&multi->mutex); 1620 1.1 christos 1621 1.5 christos dns_qpmulti_detach(&multi); 1622 1.1 christos isc_mem_putanddetach(&rcuctx->mctx, rcuctx, 1623 1.1 christos STRUCT_FLEX_SIZE(rcuctx, chunk, rcuctx->count)); 1624 1.1 christos } 1625 1.1 christos 1626 1.1 christos void 1627 1.1 christos dns_qpmulti_destroy(dns_qpmulti_t **qpmp) { 1628 1.1 christos dns_qp_t *qp = NULL; 1629 1.1 christos dns_qpmulti_t *multi = NULL; 1630 1.1 christos qp_rcuctx_t *rcuctx = NULL; 1631 1.1 christos 1632 1.1 christos REQUIRE(qpmp != NULL); 1633 1.1 christos REQUIRE(QPMULTI_VALID(*qpmp)); 1634 1.1 christos 1635 1.1 christos multi = *qpmp; 1636 1.1 christos qp = &multi->writer; 1637 1.1 christos *qpmp = NULL; 1638 1.1 christos 1639 1.1 christos REQUIRE(QP_VALID(qp)); 1640 1.1 christos REQUIRE(multi->rollback == NULL); 1641 1.1 christos REQUIRE(ISC_LIST_EMPTY(multi->snapshots)); 1642 1.1 christos 1643 1.1 christos rcuctx = isc_mem_get(qp->mctx, STRUCT_FLEX_SIZE(rcuctx, chunk, 0)); 1644 1.1 christos *rcuctx = (qp_rcuctx_t){ 1645 1.1 christos .magic = QPRCU_MAGIC, 1646 1.1 christos .multi = multi, 1647 1.1 christos }; 1648 1.1 christos isc_mem_attach(qp->mctx, &rcuctx->mctx); 1649 1.5 christos call_rcu(&rcuctx->rcu_head, qpmulti_destroy_guts_cb); 1650 1.1 christos } 1651 1.1 christos 1652 1.1 christos /*********************************************************************** 1653 1.1 christos * 1654 1.1 christos * modification 1655 1.1 christos */ 1656 1.1 christos 1657 1.1 christos isc_result_t 1658 1.1 christos dns_qp_insert(dns_qp_t *qp, void *pval, uint32_t ival) { 1659 1.1 christos dns_qpref_t new_ref, old_ref; 1660 1.1 christos dns_qpnode_t new_leaf, old_node; 1661 1.1 christos dns_qpnode_t *new_twigs = NULL, *old_twigs = NULL; 1662 1.1 christos dns_qpshift_t new_bit, old_bit; 1663 1.1 christos dns_qpweight_t old_size, new_size; 1664 1.1 christos dns_qpkey_t new_key, old_key; 1665 1.1 christos size_t new_keylen, old_keylen; 1666 1.1 christos size_t offset; 1667 1.1 christos uint64_t index; 1668 1.1 christos dns_qpshift_t bit; 1669 1.1 christos dns_qpweight_t pos; 1670 1.1 christos dns_qpnode_t *n = NULL; 1671 1.1 christos 1672 1.1 christos REQUIRE(QP_VALID(qp)); 1673 1.1 christos 1674 1.1 christos new_leaf = make_leaf(pval, ival); 1675 1.1 christos new_keylen = leaf_qpkey(qp, &new_leaf, new_key); 1676 1.1 christos 1677 1.1 christos /* first leaf in an empty trie? */ 1678 1.1 christos if (qp->leaf_count == 0) { 1679 1.1 christos new_ref = alloc_twigs(qp, 1); 1680 1.1 christos new_twigs = ref_ptr(qp, new_ref); 1681 1.1 christos *new_twigs = new_leaf; 1682 1.1 christos attach_leaf(qp, new_twigs); 1683 1.1 christos qp->leaf_count++; 1684 1.1 christos qp->root_ref = new_ref; 1685 1.1 christos return ISC_R_SUCCESS; 1686 1.1 christos } 1687 1.1 christos 1688 1.1 christos /* 1689 1.1 christos * We need to keep searching down to a leaf even if our key is 1690 1.1 christos * missing from this branch. It doesn't matter which twig we 1691 1.1 christos * choose since the keys are all the same up to this node's 1692 1.1 christos * offset. Note that if we simply use branch_twig_pos(n, bit) 1693 1.1 christos * we may get an out-of-bounds access if our bit is greater 1694 1.1 christos * than all the set bits in the node. 1695 1.1 christos */ 1696 1.1 christos n = ref_ptr(qp, qp->root_ref); 1697 1.1 christos while (is_branch(n)) { 1698 1.1 christos prefetch_twigs(qp, n); 1699 1.1 christos dns_qpref_t ref = branch_twigs_ref(n); 1700 1.1 christos bit = branch_keybit(n, new_key, new_keylen); 1701 1.1 christos pos = branch_has_twig(n, bit) ? branch_twig_pos(n, bit) : 0; 1702 1.1 christos n = ref_ptr(qp, ref + pos); 1703 1.1 christos } 1704 1.1 christos 1705 1.1 christos /* do the keys differ, and if so, where? */ 1706 1.1 christos old_keylen = leaf_qpkey(qp, n, old_key); 1707 1.1 christos offset = qpkey_compare(new_key, new_keylen, old_key, old_keylen); 1708 1.1 christos if (offset == QPKEY_EQUAL) { 1709 1.1 christos return ISC_R_EXISTS; 1710 1.1 christos } 1711 1.1 christos new_bit = qpkey_bit(new_key, new_keylen, offset); 1712 1.1 christos old_bit = qpkey_bit(old_key, old_keylen, offset); 1713 1.1 christos 1714 1.1 christos /* find where to insert a branch or grow an existing branch. */ 1715 1.1 christos n = make_root_mutable(qp); 1716 1.1 christos while (is_branch(n)) { 1717 1.1 christos prefetch_twigs(qp, n); 1718 1.1 christos if (offset < branch_key_offset(n)) { 1719 1.1 christos goto newbranch; 1720 1.1 christos } 1721 1.1 christos if (offset == branch_key_offset(n)) { 1722 1.1 christos goto growbranch; 1723 1.1 christos } 1724 1.1 christos make_twigs_mutable(qp, n); 1725 1.1 christos bit = branch_keybit(n, new_key, new_keylen); 1726 1.1 christos INSIST(branch_has_twig(n, bit)); 1727 1.1 christos n = branch_twig_ptr(qp, n, bit); 1728 1.1 christos } 1729 1.1 christos /* fall through */ 1730 1.1 christos 1731 1.1 christos newbranch: 1732 1.1 christos new_ref = alloc_twigs(qp, 2); 1733 1.1 christos new_twigs = ref_ptr(qp, new_ref); 1734 1.1 christos 1735 1.1 christos /* save before overwriting. */ 1736 1.1 christos old_node = *n; 1737 1.1 christos 1738 1.1 christos /* new branch node takes old node's place */ 1739 1.1 christos index = BRANCH_TAG | (1ULL << new_bit) | (1ULL << old_bit) | 1740 1.1 christos ((uint64_t)offset << SHIFT_OFFSET); 1741 1.1 christos *n = make_node(index, new_ref); 1742 1.1 christos 1743 1.1 christos /* populate twigs */ 1744 1.1 christos new_twigs[old_bit > new_bit] = old_node; 1745 1.1 christos new_twigs[new_bit > old_bit] = new_leaf; 1746 1.1 christos 1747 1.1 christos attach_leaf(qp, &new_leaf); 1748 1.1 christos qp->leaf_count++; 1749 1.1 christos 1750 1.1 christos return ISC_R_SUCCESS; 1751 1.1 christos 1752 1.1 christos growbranch: 1753 1.1 christos INSIST(!branch_has_twig(n, new_bit)); 1754 1.1 christos 1755 1.1 christos /* locate twigs vectors */ 1756 1.1 christos old_size = branch_twigs_size(n); 1757 1.1 christos new_size = old_size + 1; 1758 1.1 christos old_ref = branch_twigs_ref(n); 1759 1.1 christos new_ref = alloc_twigs(qp, new_size); 1760 1.1 christos old_twigs = ref_ptr(qp, old_ref); 1761 1.1 christos new_twigs = ref_ptr(qp, new_ref); 1762 1.1 christos 1763 1.1 christos /* embiggen branch node */ 1764 1.1 christos index = branch_index(n) | (1ULL << new_bit); 1765 1.1 christos *n = make_node(index, new_ref); 1766 1.1 christos 1767 1.1 christos /* embiggen twigs vector */ 1768 1.1 christos pos = branch_twig_pos(n, new_bit); 1769 1.1 christos move_twigs(new_twigs, old_twigs, pos); 1770 1.1 christos new_twigs[pos] = new_leaf; 1771 1.1 christos move_twigs(new_twigs + pos + 1, old_twigs + pos, old_size - pos); 1772 1.1 christos 1773 1.1 christos if (squash_twigs(qp, old_ref, old_size)) { 1774 1.1 christos /* old twigs destroyed, only attach to new leaf */ 1775 1.1 christos attach_leaf(qp, &new_leaf); 1776 1.1 christos } else { 1777 1.1 christos /* old twigs duplicated, attach to all leaves */ 1778 1.1 christos attach_twigs(qp, new_twigs, new_size); 1779 1.1 christos } 1780 1.1 christos qp->leaf_count++; 1781 1.1 christos 1782 1.1 christos return ISC_R_SUCCESS; 1783 1.1 christos } 1784 1.1 christos 1785 1.1 christos isc_result_t 1786 1.1 christos dns_qp_deletekey(dns_qp_t *qp, const dns_qpkey_t search_key, 1787 1.1 christos size_t search_keylen, void **pval_r, uint32_t *ival_r) { 1788 1.1 christos REQUIRE(QP_VALID(qp)); 1789 1.1 christos REQUIRE(search_keylen < sizeof(dns_qpkey_t)); 1790 1.1 christos 1791 1.1 christos if (get_root(qp) == NULL) { 1792 1.1 christos return ISC_R_NOTFOUND; 1793 1.1 christos } 1794 1.1 christos 1795 1.1 christos dns_qpshift_t bit = 0; /* suppress warning */ 1796 1.1 christos dns_qpnode_t *parent = NULL; 1797 1.1 christos dns_qpnode_t *n = make_root_mutable(qp); 1798 1.1 christos while (is_branch(n)) { 1799 1.1 christos prefetch_twigs(qp, n); 1800 1.1 christos bit = branch_keybit(n, search_key, search_keylen); 1801 1.1 christos if (!branch_has_twig(n, bit)) { 1802 1.1 christos return ISC_R_NOTFOUND; 1803 1.1 christos } 1804 1.1 christos make_twigs_mutable(qp, n); 1805 1.1 christos parent = n; 1806 1.1 christos n = branch_twig_ptr(qp, n, bit); 1807 1.1 christos } 1808 1.1 christos 1809 1.1 christos dns_qpkey_t found_key; 1810 1.1 christos size_t found_keylen = leaf_qpkey(qp, n, found_key); 1811 1.1 christos if (qpkey_compare(search_key, search_keylen, found_key, found_keylen) != 1812 1.1 christos QPKEY_EQUAL) 1813 1.1 christos { 1814 1.1 christos return ISC_R_NOTFOUND; 1815 1.1 christos } 1816 1.1 christos 1817 1.1 christos SET_IF_NOT_NULL(pval_r, leaf_pval(n)); 1818 1.1 christos SET_IF_NOT_NULL(ival_r, leaf_ival(n)); 1819 1.1 christos detach_leaf(qp, n); 1820 1.1 christos qp->leaf_count--; 1821 1.1 christos 1822 1.1 christos /* trie becomes empty */ 1823 1.1 christos if (qp->leaf_count == 0) { 1824 1.1 christos INSIST(parent == NULL); 1825 1.1 christos INSIST(n == get_root(qp)); 1826 1.1 christos free_twigs(qp, qp->root_ref, 1); 1827 1.1 christos qp->root_ref = INVALID_REF; 1828 1.1 christos return ISC_R_SUCCESS; 1829 1.1 christos } 1830 1.1 christos 1831 1.1 christos /* step back to parent node */ 1832 1.1 christos n = parent; 1833 1.1 christos parent = NULL; 1834 1.1 christos 1835 1.1 christos INSIST(bit != 0); 1836 1.1 christos dns_qpweight_t size = branch_twigs_size(n); 1837 1.1 christos dns_qpweight_t pos = branch_twig_pos(n, bit); 1838 1.1 christos dns_qpref_t ref = branch_twigs_ref(n); 1839 1.1 christos dns_qpnode_t *twigs = ref_ptr(qp, ref); 1840 1.1 christos 1841 1.1 christos if (size == 2) { 1842 1.1 christos /* 1843 1.1 christos * move the other twig to the parent branch. 1844 1.1 christos */ 1845 1.1 christos *n = twigs[!pos]; 1846 1.1 christos squash_twigs(qp, ref, 2); 1847 1.1 christos } else { 1848 1.1 christos /* 1849 1.1 christos * shrink the twigs in place, to avoid using the bump 1850 1.1 christos * chunk too fast - the gc will clean up after us 1851 1.1 christos */ 1852 1.1 christos *n = make_node(branch_index(n) & ~(1ULL << bit), ref); 1853 1.1 christos move_twigs(twigs + pos, twigs + pos + 1, size - pos - 1); 1854 1.1 christos squash_twigs(qp, ref + size - 1, 1); 1855 1.1 christos } 1856 1.1 christos 1857 1.1 christos return ISC_R_SUCCESS; 1858 1.1 christos } 1859 1.1 christos 1860 1.1 christos isc_result_t 1861 1.1 christos dns_qp_deletename(dns_qp_t *qp, const dns_name_t *name, void **pval_r, 1862 1.1 christos uint32_t *ival_r) { 1863 1.1 christos dns_qpkey_t key; 1864 1.1 christos size_t keylen = dns_qpkey_fromname(key, name); 1865 1.1 christos return dns_qp_deletekey(qp, key, keylen, pval_r, ival_r); 1866 1.1 christos } 1867 1.1 christos 1868 1.1 christos /*********************************************************************** 1869 1.1 christos * chains 1870 1.1 christos */ 1871 1.1 christos static void 1872 1.1 christos maybe_set_name(dns_qpreader_t *qp, dns_qpnode_t *node, dns_name_t *name) { 1873 1.1 christos dns_qpkey_t key; 1874 1.1 christos size_t len; 1875 1.1 christos 1876 1.1 christos if (name == NULL) { 1877 1.1 christos return; 1878 1.1 christos } 1879 1.1 christos 1880 1.1 christos dns_name_reset(name); 1881 1.1 christos len = leaf_qpkey(qp, node, key); 1882 1.1 christos dns_qpkey_toname(key, len, name); 1883 1.1 christos } 1884 1.1 christos 1885 1.1 christos void 1886 1.1 christos dns_qpchain_init(dns_qpreadable_t qpr, dns_qpchain_t *chain) { 1887 1.1 christos dns_qpreader_t *qp = dns_qpreader(qpr); 1888 1.1 christos REQUIRE(QP_VALID(qp)); 1889 1.1 christos REQUIRE(chain != NULL); 1890 1.1 christos 1891 1.1 christos *chain = (dns_qpchain_t){ 1892 1.1 christos .magic = QPCHAIN_MAGIC, 1893 1.1 christos .qp = qp, 1894 1.1 christos }; 1895 1.1 christos } 1896 1.1 christos 1897 1.1 christos unsigned int 1898 1.1 christos dns_qpchain_length(dns_qpchain_t *chain) { 1899 1.1 christos REQUIRE(QPCHAIN_VALID(chain)); 1900 1.1 christos 1901 1.1 christos return chain->len; 1902 1.1 christos } 1903 1.1 christos 1904 1.1 christos void 1905 1.1 christos dns_qpchain_node(dns_qpchain_t *chain, unsigned int level, dns_name_t *name, 1906 1.1 christos void **pval_r, uint32_t *ival_r) { 1907 1.1 christos dns_qpnode_t *node = NULL; 1908 1.1 christos 1909 1.1 christos REQUIRE(QPCHAIN_VALID(chain)); 1910 1.1 christos REQUIRE(level < chain->len); 1911 1.1 christos 1912 1.1 christos node = chain->chain[level].node; 1913 1.1 christos maybe_set_name(chain->qp, node, name); 1914 1.1 christos SET_IF_NOT_NULL(pval_r, leaf_pval(node)); 1915 1.1 christos SET_IF_NOT_NULL(ival_r, leaf_ival(node)); 1916 1.1 christos } 1917 1.1 christos 1918 1.1 christos /*********************************************************************** 1919 1.1 christos * iterators 1920 1.1 christos */ 1921 1.1 christos 1922 1.1 christos void 1923 1.1 christos dns_qpiter_init(dns_qpreadable_t qpr, dns_qpiter_t *qpi) { 1924 1.1 christos dns_qpreader_t *qp = dns_qpreader(qpr); 1925 1.1 christos REQUIRE(QP_VALID(qp)); 1926 1.1 christos REQUIRE(qpi != NULL); 1927 1.1 christos *qpi = (dns_qpiter_t){ 1928 1.1 christos .qp = qp, 1929 1.1 christos .magic = QPITER_MAGIC, 1930 1.1 christos }; 1931 1.1 christos } 1932 1.1 christos 1933 1.1 christos /* 1934 1.1 christos * are we at the last twig in this branch (in whichever direction 1935 1.1 christos * we're iterating)? 1936 1.1 christos */ 1937 1.1 christos static bool 1938 1.1 christos last_twig(dns_qpiter_t *qpi, bool forward) { 1939 1.1 christos dns_qpweight_t pos = 0, max = 0; 1940 1.1 christos if (qpi->sp > 0) { 1941 1.1 christos dns_qpnode_t *child = qpi->stack[qpi->sp]; 1942 1.1 christos dns_qpnode_t *parent = qpi->stack[qpi->sp - 1]; 1943 1.1 christos pos = child - ref_ptr(qpi->qp, branch_twigs_ref(parent)); 1944 1.1 christos if (forward) { 1945 1.1 christos max = branch_twigs_size(parent) - 1; 1946 1.1 christos } 1947 1.1 christos } 1948 1.1 christos return pos == max; 1949 1.1 christos } 1950 1.1 christos 1951 1.1 christos /* 1952 1.1 christos * move a QP iterator forward or back to the next or previous leaf. 1953 1.1 christos * note: this function can go wrong when the iterator refers to 1954 1.1 christos * a mutable view of the trie which is altered while iterating 1955 1.1 christos */ 1956 1.1 christos static isc_result_t 1957 1.1 christos iterate(bool forward, dns_qpiter_t *qpi, dns_name_t *name, void **pval_r, 1958 1.1 christos uint32_t *ival_r) { 1959 1.1 christos dns_qpnode_t *node = NULL; 1960 1.1 christos bool initial_branch = true; 1961 1.1 christos 1962 1.1 christos REQUIRE(QPITER_VALID(qpi)); 1963 1.1 christos 1964 1.1 christos dns_qpreader_t *qp = qpi->qp; 1965 1.1 christos 1966 1.1 christos REQUIRE(QP_VALID(qp)); 1967 1.1 christos 1968 1.1 christos node = get_root(qp); 1969 1.1 christos if (node == NULL) { 1970 1.1 christos return ISC_R_NOMORE; 1971 1.1 christos } 1972 1.1 christos 1973 1.1 christos do { 1974 1.1 christos if (qpi->stack[qpi->sp] == NULL) { 1975 1.1 christos /* newly initialized iterator: use the root node */ 1976 1.1 christos INSIST(qpi->sp == 0); 1977 1.1 christos qpi->stack[0] = node; 1978 1.1 christos } else if (!initial_branch) { 1979 1.1 christos /* 1980 1.1 christos * in a prior loop, we reached a branch; from 1981 1.1 christos * here we just need to get the highest or lowest 1982 1.1 christos * leaf in the subtree; we don't need to bother 1983 1.1 christos * stepping forward or backward through twigs 1984 1.1 christos * anymore. 1985 1.1 christos */ 1986 1.1 christos INSIST(qpi->sp > 0); 1987 1.1 christos } else if (last_twig(qpi, forward)) { 1988 1.1 christos /* 1989 1.1 christos * we've stepped to the end (or the beginning, 1990 1.1 christos * if we're iterating backwards) of a set of twigs. 1991 1.1 christos */ 1992 1.1 christos if (qpi->sp == 0) { 1993 1.1 christos /* 1994 1.1 christos * we've finished iterating. reinitialize 1995 1.1 christos * the iterator, then return ISC_R_NOMORE. 1996 1.1 christos */ 1997 1.1 christos dns_qpiter_init(qpi->qp, qpi); 1998 1.1 christos return ISC_R_NOMORE; 1999 1.1 christos } 2000 1.1 christos 2001 1.1 christos /* 2002 1.1 christos * pop the stack, and resume at the parent branch. 2003 1.1 christos */ 2004 1.1 christos qpi->stack[qpi->sp] = NULL; 2005 1.1 christos qpi->sp--; 2006 1.1 christos continue; 2007 1.1 christos } else { 2008 1.1 christos /* 2009 1.1 christos * there are more twigs in the current branch, 2010 1.1 christos * so step the node pointer forward (or back). 2011 1.1 christos */ 2012 1.1 christos qpi->stack[qpi->sp] += (forward ? 1 : -1); 2013 1.1 christos node = qpi->stack[qpi->sp]; 2014 1.1 christos } 2015 1.1 christos 2016 1.1 christos /* 2017 1.1 christos * if we're at a branch now, we loop down to the 2018 1.1 christos * left- or rightmost leaf. 2019 1.1 christos */ 2020 1.1 christos if (is_branch(node)) { 2021 1.1 christos qpi->sp++; 2022 1.1 christos INSIST(qpi->sp < DNS_QP_MAXKEY); 2023 1.1 christos node = ref_ptr(qp, branch_twigs_ref(node)) + 2024 1.1 christos (forward ? 0 : branch_twigs_size(node) - 1); 2025 1.1 christos qpi->stack[qpi->sp] = node; 2026 1.1 christos initial_branch = false; 2027 1.1 christos } 2028 1.1 christos } while (is_branch(node)); 2029 1.1 christos 2030 1.1 christos /* we're at a leaf: return its data to the caller */ 2031 1.1 christos SET_IF_NOT_NULL(pval_r, leaf_pval(node)); 2032 1.1 christos SET_IF_NOT_NULL(ival_r, leaf_ival(node)); 2033 1.1 christos maybe_set_name(qpi->qp, node, name); 2034 1.1 christos return ISC_R_SUCCESS; 2035 1.1 christos } 2036 1.1 christos 2037 1.1 christos isc_result_t 2038 1.1 christos dns_qpiter_next(dns_qpiter_t *qpi, dns_name_t *name, void **pval_r, 2039 1.1 christos uint32_t *ival_r) { 2040 1.1 christos return iterate(true, qpi, name, pval_r, ival_r); 2041 1.1 christos } 2042 1.1 christos 2043 1.1 christos isc_result_t 2044 1.1 christos dns_qpiter_prev(dns_qpiter_t *qpi, dns_name_t *name, void **pval_r, 2045 1.1 christos uint32_t *ival_r) { 2046 1.1 christos return iterate(false, qpi, name, pval_r, ival_r); 2047 1.1 christos } 2048 1.1 christos 2049 1.1 christos isc_result_t 2050 1.1 christos dns_qpiter_current(dns_qpiter_t *qpi, dns_name_t *name, void **pval_r, 2051 1.1 christos uint32_t *ival_r) { 2052 1.1 christos dns_qpnode_t *node = NULL; 2053 1.1 christos 2054 1.1 christos REQUIRE(QPITER_VALID(qpi)); 2055 1.1 christos 2056 1.1 christos node = qpi->stack[qpi->sp]; 2057 1.1 christos if (node == NULL || is_branch(node)) { 2058 1.1 christos return ISC_R_FAILURE; 2059 1.1 christos } 2060 1.1 christos 2061 1.1 christos SET_IF_NOT_NULL(pval_r, leaf_pval(node)); 2062 1.1 christos SET_IF_NOT_NULL(ival_r, leaf_ival(node)); 2063 1.1 christos maybe_set_name(qpi->qp, node, name); 2064 1.1 christos return ISC_R_SUCCESS; 2065 1.1 christos } 2066 1.1 christos 2067 1.1 christos /*********************************************************************** 2068 1.1 christos * 2069 1.1 christos * search 2070 1.1 christos */ 2071 1.1 christos 2072 1.1 christos isc_result_t 2073 1.1 christos dns_qp_getkey(dns_qpreadable_t qpr, const dns_qpkey_t search_key, 2074 1.1 christos size_t search_keylen, void **pval_r, uint32_t *ival_r) { 2075 1.1 christos dns_qpreader_t *qp = dns_qpreader(qpr); 2076 1.1 christos dns_qpkey_t found_key; 2077 1.1 christos size_t found_keylen; 2078 1.1 christos dns_qpshift_t bit; 2079 1.1 christos dns_qpnode_t *n = NULL; 2080 1.1 christos 2081 1.1 christos REQUIRE(QP_VALID(qp)); 2082 1.1 christos REQUIRE(search_keylen < sizeof(dns_qpkey_t)); 2083 1.1 christos 2084 1.1 christos n = get_root(qp); 2085 1.1 christos if (n == NULL) { 2086 1.1 christos return ISC_R_NOTFOUND; 2087 1.1 christos } 2088 1.1 christos 2089 1.1 christos while (is_branch(n)) { 2090 1.1 christos prefetch_twigs(qp, n); 2091 1.1 christos bit = branch_keybit(n, search_key, search_keylen); 2092 1.1 christos if (!branch_has_twig(n, bit)) { 2093 1.1 christos return ISC_R_NOTFOUND; 2094 1.1 christos } 2095 1.1 christos n = branch_twig_ptr(qp, n, bit); 2096 1.1 christos } 2097 1.1 christos 2098 1.1 christos found_keylen = leaf_qpkey(qp, n, found_key); 2099 1.1 christos if (qpkey_compare(search_key, search_keylen, found_key, found_keylen) != 2100 1.1 christos QPKEY_EQUAL) 2101 1.1 christos { 2102 1.1 christos return ISC_R_NOTFOUND; 2103 1.1 christos } 2104 1.1 christos 2105 1.1 christos SET_IF_NOT_NULL(pval_r, leaf_pval(n)); 2106 1.1 christos SET_IF_NOT_NULL(ival_r, leaf_ival(n)); 2107 1.1 christos return ISC_R_SUCCESS; 2108 1.1 christos } 2109 1.1 christos 2110 1.1 christos isc_result_t 2111 1.1 christos dns_qp_getname(dns_qpreadable_t qpr, const dns_name_t *name, void **pval_r, 2112 1.1 christos uint32_t *ival_r) { 2113 1.1 christos dns_qpkey_t key; 2114 1.1 christos size_t keylen = dns_qpkey_fromname(key, name); 2115 1.1 christos return dns_qp_getkey(qpr, key, keylen, pval_r, ival_r); 2116 1.1 christos } 2117 1.1 christos 2118 1.1 christos static inline void 2119 1.1 christos add_link(dns_qpchain_t *chain, dns_qpnode_t *node, size_t offset) { 2120 1.1 christos /* prevent duplication */ 2121 1.1 christos if (chain->len != 0 && chain->chain[chain->len - 1].node == node) { 2122 1.1 christos return; 2123 1.1 christos } 2124 1.1 christos chain->chain[chain->len].node = node; 2125 1.1 christos chain->chain[chain->len].offset = offset; 2126 1.1 christos chain->len++; 2127 1.1 christos INSIST(chain->len <= DNS_NAME_MAXLABELS); 2128 1.1 christos } 2129 1.1 christos 2130 1.1 christos static inline void 2131 1.1 christos prevleaf(dns_qpiter_t *it) { 2132 1.1 christos isc_result_t result = dns_qpiter_prev(it, NULL, NULL, NULL); 2133 1.1 christos if (result == ISC_R_NOMORE) { 2134 1.1 christos result = dns_qpiter_prev(it, NULL, NULL, NULL); 2135 1.1 christos } 2136 1.1 christos RUNTIME_CHECK(result == ISC_R_SUCCESS); 2137 1.1 christos } 2138 1.1 christos 2139 1.1 christos static inline void 2140 1.1 christos greatest_leaf(dns_qpreadable_t qpr, dns_qpnode_t *n, dns_qpiter_t *iter) { 2141 1.1 christos while (is_branch(n)) { 2142 1.1 christos dns_qpref_t ref = branch_twigs_ref(n) + branch_twigs_size(n) - 2143 1.1 christos 1; 2144 1.1 christos iter->stack[++iter->sp] = n; 2145 1.1 christos n = ref_ptr(qpr, ref); 2146 1.1 christos } 2147 1.1 christos iter->stack[++iter->sp] = n; 2148 1.1 christos } 2149 1.1 christos 2150 1.1 christos static inline dns_qpnode_t * 2151 1.1 christos anyleaf(dns_qpreader_t *qp, dns_qpnode_t *n) { 2152 1.1 christos while (is_branch(n)) { 2153 1.1 christos n = branch_twigs(qp, n); 2154 1.1 christos } 2155 1.1 christos return n; 2156 1.1 christos } 2157 1.1 christos 2158 1.1 christos static inline int 2159 1.1 christos twig_offset(dns_qpnode_t *n, dns_qpshift_t sbit, dns_qpshift_t kbit, 2160 1.1 christos dns_qpshift_t fbit) { 2161 1.1 christos dns_qpweight_t pos = branch_twig_pos(n, sbit); 2162 1.1 christos if (branch_has_twig(n, sbit)) { 2163 1.1 christos return pos - (kbit < fbit); 2164 1.1 christos } 2165 1.1 christos return pos - 1; 2166 1.1 christos } 2167 1.1 christos 2168 1.1 christos /* 2169 1.1 christos * If dns_qp_lookup() was passed an iterator, we want it to point at the 2170 1.1 christos * matching name in the case of an exact match, or at the predecessor name 2171 1.1 christos * for a non-exact match. 2172 1.1 christos * 2173 1.1 christos * If there is an exact match, then there is nothing to be done. Otherwise, 2174 1.1 christos * we pop up the iterator stack until we find a parent branch with an offset 2175 1.1 christos * that is before the position where the search key differs from the found key. 2176 1.1 christos * From there we can step to the leaf that is the predecessor of the searched 2177 1.1 christos * name. 2178 1.1 christos * 2179 1.1 christos * Requires the iterator to be pointing at a leaf node. 2180 1.1 christos */ 2181 1.1 christos static void 2182 1.1 christos fix_iterator(dns_qpreader_t *qp, dns_qpiter_t *it, dns_qpkey_t key, 2183 1.1 christos size_t len) { 2184 1.1 christos dns_qpnode_t *n = it->stack[it->sp]; 2185 1.1 christos 2186 1.1 christos REQUIRE(!is_branch(n)); 2187 1.1 christos 2188 1.1 christos dns_qpkey_t found; 2189 1.1 christos size_t foundlen = leaf_qpkey(qp, n, found); 2190 1.1 christos size_t to = qpkey_compare(key, len, found, foundlen); 2191 1.1 christos 2192 1.1 christos /* If the keys are equal, the iterator is already at the right node. */ 2193 1.1 christos if (to == QPKEY_EQUAL) { 2194 1.1 christos return; 2195 1.1 christos } 2196 1.1 christos 2197 1.1 christos /* 2198 1.1 christos * Special case: if the key differs even before the root 2199 1.1 christos * key offset, it means the name desired either precedes or 2200 1.1 christos * follows the entire range of names in the database, and 2201 1.1 christos * popping up the stack won't help us, so just move the 2202 1.1 christos * iterator one step back from the origin and return. 2203 1.1 christos */ 2204 1.1 christos if (to < branch_key_offset(it->stack[0])) { 2205 1.1 christos dns_qpiter_init(qp, it); 2206 1.1 christos prevleaf(it); 2207 1.1 christos return; 2208 1.1 christos } 2209 1.1 christos 2210 1.1 christos /* 2211 1.1 christos * As long as the branch offset point is after the point where the 2212 1.1 christos * key differs, we need to branch up and find a better node. 2213 1.1 christos */ 2214 1.1 christos while (it->sp > 0) { 2215 1.1 christos dns_qpnode_t *b = it->stack[it->sp - 1]; 2216 1.1 christos if (branch_key_offset(b) < to) { 2217 1.1 christos break; 2218 1.1 christos } 2219 1.1 christos it->sp--; 2220 1.1 christos } 2221 1.1 christos n = it->stack[it->sp]; 2222 1.1 christos 2223 1.1 christos /* 2224 1.1 christos * Either we are now at the correct branch, or we are at the 2225 1.1 christos * first unmatched node. Determine the bit position for the 2226 1.1 christos * twig we need (sbit). 2227 1.1 christos */ 2228 1.1 christos dns_qpshift_t kbit = qpkey_bit(key, len, to); 2229 1.1 christos dns_qpshift_t fbit = qpkey_bit(found, foundlen, to); 2230 1.1 christos dns_qpshift_t sbit = 0; 2231 1.1 christos 2232 1.1 christos if (is_branch(n) && branch_key_offset(n) == to) { 2233 1.1 christos /* We are on the correct branch now. */ 2234 1.1 christos sbit = kbit; 2235 1.1 christos } else if (it->sp == 0) { 2236 1.1 christos /* 2237 1.1 christos * We are on the root branch, popping up the stack won't 2238 1.1 christos * help us, so just move the iterator one step back from the 2239 1.1 christos * origin and return. 2240 1.1 christos */ 2241 1.1 christos dns_qpiter_init(qp, it); 2242 1.1 christos prevleaf(it); 2243 1.1 christos return; 2244 1.1 christos } else { 2245 1.1 christos /* We are at the first unmatched node, pop up the stack. */ 2246 1.1 christos n = it->stack[--it->sp]; 2247 1.1 christos sbit = qpkey_bit(key, len, branch_key_offset(n)); 2248 1.1 christos } 2249 1.1 christos 2250 1.1 christos INSIST(is_branch(n)); 2251 1.1 christos 2252 1.1 christos prefetch_twigs(qp, n); 2253 1.1 christos dns_qpnode_t *twigs = branch_twigs(qp, n); 2254 1.1 christos int toff = twig_offset(n, sbit, kbit, fbit); 2255 1.1 christos if (toff >= 0) { 2256 1.1 christos /* 2257 1.1 christos * The name we want would've been after some twig in 2258 1.1 christos * this branch. Walk down from that twig to the 2259 1.1 christos * highest leaf in its subtree to get the predecessor. 2260 1.1 christos */ 2261 1.1 christos greatest_leaf(qp, twigs + toff, it); 2262 1.1 christos } else { 2263 1.1 christos /* 2264 1.1 christos * Every leaf below this node is greater than the one we 2265 1.1 christos * wanted, so the previous leaf is the predecessor. 2266 1.1 christos */ 2267 1.1 christos prevleaf(it); 2268 1.1 christos } 2269 1.1 christos } 2270 1.1 christos 2271 1.1 christos /* 2272 1.1 christos * When searching for a requested name in dns_qp_lookup(), we might add 2273 1.1 christos * a leaf node to the chain, then subsequently determine that it was a 2274 1.1 christos * dead end. When this happens, the chain can be left holding a node 2275 1.1 christos * that is *not* an ancestor of the requested name. We correct for that 2276 1.1 christos * here. 2277 1.1 christos */ 2278 1.1 christos static void 2279 1.1 christos fix_chain(dns_qpchain_t *chain, size_t offset) { 2280 1.1 christos while (chain->len > 0 && chain->chain[chain->len - 1].offset >= offset) 2281 1.1 christos { 2282 1.1 christos chain->len--; 2283 1.1 christos chain->chain[chain->len].node = NULL; 2284 1.1 christos chain->chain[chain->len].offset = 0; 2285 1.1 christos } 2286 1.1 christos } 2287 1.1 christos 2288 1.1 christos isc_result_t 2289 1.1 christos dns_qp_lookup(dns_qpreadable_t qpr, const dns_name_t *name, 2290 1.1 christos dns_name_t *foundname, dns_qpiter_t *iter, dns_qpchain_t *chain, 2291 1.1 christos void **pval_r, uint32_t *ival_r) { 2292 1.1 christos dns_qpreader_t *qp = dns_qpreader(qpr); 2293 1.1 christos dns_qpkey_t search, found; 2294 1.1 christos size_t searchlen, foundlen; 2295 1.1 christos size_t offset = 0; 2296 1.1 christos dns_qpnode_t *n = NULL; 2297 1.1 christos dns_qpshift_t bit = SHIFT_NOBYTE; 2298 1.1 christos dns_qpchain_t oc; 2299 1.1 christos dns_qpiter_t it; 2300 1.1 christos bool matched = false; 2301 1.1 christos bool setiter = true; 2302 1.1 christos 2303 1.1 christos REQUIRE(QP_VALID(qp)); 2304 1.1 christos REQUIRE(foundname == NULL || ISC_MAGIC_VALID(name, DNS_NAME_MAGIC)); 2305 1.1 christos 2306 1.1 christos searchlen = dns_qpkey_fromname(search, name); 2307 1.1 christos 2308 1.1 christos if (chain == NULL) { 2309 1.1 christos chain = &oc; 2310 1.1 christos } 2311 1.1 christos if (iter == NULL) { 2312 1.1 christos iter = ⁢ 2313 1.1 christos setiter = false; 2314 1.1 christos } 2315 1.1 christos dns_qpchain_init(qp, chain); 2316 1.1 christos dns_qpiter_init(qp, iter); 2317 1.1 christos 2318 1.1 christos n = get_root(qp); 2319 1.1 christos if (n == NULL) { 2320 1.1 christos return ISC_R_NOTFOUND; 2321 1.1 christos } 2322 1.1 christos iter->stack[0] = n; 2323 1.1 christos 2324 1.1 christos /* 2325 1.1 christos * Like `dns_qp_insert()`, we must find a leaf. However, we don't make a 2326 1.1 christos * second pass: instead, we keep track of any leaves with shorter keys 2327 1.1 christos * that we discover along the way. (In general, qp-trie searches can be 2328 1.1 christos * one-pass, by recording their traversal, or two-pass, for less stack 2329 1.1 christos * memory usage.) 2330 1.1 christos */ 2331 1.1 christos while (is_branch(n)) { 2332 1.1 christos prefetch_twigs(qp, n); 2333 1.1 christos 2334 1.1 christos offset = branch_key_offset(n); 2335 1.1 christos bit = qpkey_bit(search, searchlen, offset); 2336 1.1 christos dns_qpnode_t *twigs = branch_twigs(qp, n); 2337 1.1 christos 2338 1.1 christos /* 2339 1.1 christos * A shorter key that can be a parent domain always has a 2340 1.1 christos * leaf node at SHIFT_NOBYTE (indicating end of its key) 2341 1.1 christos * where our search key has a normal character immediately 2342 1.1 christos * after a label separator. 2343 1.1 christos * 2344 1.1 christos * Note 1: It is OK if `off - 1` underflows: it will 2345 1.1 christos * become SIZE_MAX, which is greater than `searchlen`, so 2346 1.1 christos * `qpkey_bit()` will return SHIFT_NOBYTE, which is what we 2347 1.1 christos * want when `off == 0`. 2348 1.1 christos * 2349 1.1 christos * Note 2: If SHIFT_NOBYTE twig is present, it will always 2350 1.1 christos * be in position 0, the first location in 'twigs'. 2351 1.1 christos */ 2352 1.1 christos if (bit != SHIFT_NOBYTE && branch_has_twig(n, SHIFT_NOBYTE) && 2353 1.1 christos qpkey_bit(search, searchlen, offset - 1) == SHIFT_NOBYTE && 2354 1.1 christos !is_branch(twigs)) 2355 1.1 christos { 2356 1.1 christos add_link(chain, twigs, offset); 2357 1.1 christos } 2358 1.1 christos 2359 1.1 christos matched = branch_has_twig(n, bit); 2360 1.1 christos if (matched) { 2361 1.1 christos /* 2362 1.1 christos * found a match: if it's a branch, we keep 2363 1.1 christos * searching, and if it's a leaf, we drop out of 2364 1.1 christos * the loop. 2365 1.1 christos */ 2366 1.1 christos n = branch_twig_ptr(qp, n, bit); 2367 1.1 christos } else { 2368 1.1 christos /* 2369 1.1 christos * this branch is a dead end, and the predecessor 2370 1.1 christos * doesn't matter. now we just need to find a leaf 2371 1.1 christos * to end on so that qpkey_leaf() will work below. 2372 1.1 christos */ 2373 1.1 christos n = anyleaf(qp, twigs); 2374 1.1 christos } 2375 1.1 christos 2376 1.1 christos iter->stack[++iter->sp] = n; 2377 1.1 christos } 2378 1.1 christos 2379 1.1 christos if (setiter) { 2380 1.1 christos /* 2381 1.1 christos * we found a leaf, but it might not be the leaf we wanted. 2382 1.1 christos * if it isn't, and if the caller passed us an iterator, 2383 1.1 christos * then we might need to reposition it. 2384 1.1 christos */ 2385 1.1 christos fix_iterator(qp, iter, search, searchlen); 2386 1.1 christos n = iter->stack[iter->sp]; 2387 1.1 christos } 2388 1.1 christos 2389 1.1 christos /* at this point, n can only be a leaf node */ 2390 1.1 christos INSIST(!is_branch(n)); 2391 1.1 christos 2392 1.1 christos foundlen = leaf_qpkey(qp, n, found); 2393 1.1 christos offset = qpkey_compare(search, searchlen, found, foundlen); 2394 1.1 christos 2395 1.1 christos /* the search ended with an exact or partial match */ 2396 1.1 christos if (offset == QPKEY_EQUAL || offset == foundlen) { 2397 1.1 christos isc_result_t result = ISC_R_SUCCESS; 2398 1.1 christos 2399 1.1 christos if (offset == foundlen) { 2400 1.1 christos fix_chain(chain, offset); 2401 1.1 christos result = DNS_R_PARTIALMATCH; 2402 1.1 christos } 2403 1.1 christos add_link(chain, n, offset); 2404 1.1 christos 2405 1.1 christos SET_IF_NOT_NULL(pval_r, leaf_pval(n)); 2406 1.1 christos SET_IF_NOT_NULL(ival_r, leaf_ival(n)); 2407 1.1 christos maybe_set_name(qp, n, foundname); 2408 1.1 christos return result; 2409 1.1 christos } 2410 1.1 christos 2411 1.1 christos /* 2412 1.1 christos * the requested name was not found, but if an ancestor 2413 1.1 christos * was, we can retrieve that from the chain. 2414 1.1 christos */ 2415 1.1 christos int len = chain->len; 2416 1.1 christos while (len-- > 0) { 2417 1.1 christos if (offset >= chain->chain[len].offset) { 2418 1.1 christos n = chain->chain[len].node; 2419 1.1 christos SET_IF_NOT_NULL(pval_r, leaf_pval(n)); 2420 1.1 christos SET_IF_NOT_NULL(ival_r, leaf_ival(n)); 2421 1.1 christos maybe_set_name(qp, n, foundname); 2422 1.1 christos return DNS_R_PARTIALMATCH; 2423 1.1 christos } else { 2424 1.1 christos /* 2425 1.1 christos * oops, during the search we found and added 2426 1.1 christos * a leaf that's longer than the requested 2427 1.1 christos * name; remove it from the chain. 2428 1.1 christos */ 2429 1.1 christos chain->len--; 2430 1.1 christos } 2431 1.1 christos } 2432 1.1 christos 2433 1.1 christos /* nothing was found at all */ 2434 1.1 christos return ISC_R_NOTFOUND; 2435 1.1 christos } 2436 1.1 christos 2437 1.1 christos /**********************************************************************/ 2438