Home | History | Annotate | Line # | Download | only in dns
badcache.c revision 1.5
      1 /*	$NetBSD: badcache.c,v 1.5 2021/02/19 16:42:15 christos Exp $	*/
      2 
      3 /*
      4  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
      5  *
      6  * This Source Code Form is subject to the terms of the Mozilla Public
      7  * License, v. 2.0. If a copy of the MPL was not distributed with this
      8  * file, you can obtain one at https://mozilla.org/MPL/2.0/.
      9  *
     10  * See the COPYRIGHT file distributed with this work for additional
     11  * information regarding copyright ownership.
     12  */
     13 
     14 /*! \file */
     15 
     16 #include <inttypes.h>
     17 #include <stdbool.h>
     18 
     19 #include <isc/buffer.h>
     20 #include <isc/hash.h>
     21 #include <isc/log.h>
     22 #include <isc/mem.h>
     23 #include <isc/mutex.h>
     24 #include <isc/platform.h>
     25 #include <isc/print.h>
     26 #include <isc/rwlock.h>
     27 #include <isc/string.h>
     28 #include <isc/time.h>
     29 #include <isc/util.h>
     30 
     31 #include <dns/badcache.h>
     32 #include <dns/name.h>
     33 #include <dns/rdatatype.h>
     34 #include <dns/types.h>
     35 
     36 typedef struct dns_bcentry dns_bcentry_t;
     37 
     38 struct dns_badcache {
     39 	unsigned int magic;
     40 	isc_rwlock_t lock;
     41 	isc_mem_t *mctx;
     42 
     43 	isc_mutex_t *tlocks;
     44 	dns_bcentry_t **table;
     45 
     46 	atomic_uint_fast32_t count;
     47 	atomic_uint_fast32_t sweep;
     48 
     49 	unsigned int minsize;
     50 	unsigned int size;
     51 };
     52 
     53 #define BADCACHE_MAGIC	  ISC_MAGIC('B', 'd', 'C', 'a')
     54 #define VALID_BADCACHE(m) ISC_MAGIC_VALID(m, BADCACHE_MAGIC)
     55 
     56 struct dns_bcentry {
     57 	dns_bcentry_t *next;
     58 	dns_rdatatype_t type;
     59 	isc_time_t expire;
     60 	uint32_t flags;
     61 	unsigned int hashval;
     62 	dns_name_t name;
     63 };
     64 
     65 static void
     66 badcache_resize(dns_badcache_t *bc, isc_time_t *now);
     67 
     68 isc_result_t
     69 dns_badcache_init(isc_mem_t *mctx, unsigned int size, dns_badcache_t **bcp) {
     70 	dns_badcache_t *bc = NULL;
     71 	unsigned int i;
     72 
     73 	REQUIRE(bcp != NULL && *bcp == NULL);
     74 	REQUIRE(mctx != NULL);
     75 
     76 	bc = isc_mem_get(mctx, sizeof(dns_badcache_t));
     77 	memset(bc, 0, sizeof(dns_badcache_t));
     78 
     79 	isc_mem_attach(mctx, &bc->mctx);
     80 	isc_rwlock_init(&bc->lock, 0, 0);
     81 
     82 	bc->table = isc_mem_get(bc->mctx, sizeof(*bc->table) * size);
     83 	bc->tlocks = isc_mem_get(bc->mctx, sizeof(isc_mutex_t) * size);
     84 	for (i = 0; i < size; i++) {
     85 		isc_mutex_init(&bc->tlocks[i]);
     86 	}
     87 	bc->size = bc->minsize = size;
     88 	memset(bc->table, 0, bc->size * sizeof(dns_bcentry_t *));
     89 
     90 	atomic_init(&bc->count, 0);
     91 	atomic_init(&bc->sweep, 0);
     92 	bc->magic = BADCACHE_MAGIC;
     93 
     94 	*bcp = bc;
     95 	return (ISC_R_SUCCESS);
     96 }
     97 
     98 void
     99 dns_badcache_destroy(dns_badcache_t **bcp) {
    100 	dns_badcache_t *bc;
    101 	unsigned int i;
    102 
    103 	REQUIRE(bcp != NULL && *bcp != NULL);
    104 	bc = *bcp;
    105 	*bcp = NULL;
    106 
    107 	dns_badcache_flush(bc);
    108 
    109 	bc->magic = 0;
    110 	isc_rwlock_destroy(&bc->lock);
    111 	for (i = 0; i < bc->size; i++) {
    112 		isc_mutex_destroy(&bc->tlocks[i]);
    113 	}
    114 	isc_mem_put(bc->mctx, bc->table, sizeof(dns_bcentry_t *) * bc->size);
    115 	isc_mem_put(bc->mctx, bc->tlocks, sizeof(isc_mutex_t) * bc->size);
    116 	isc_mem_putanddetach(&bc->mctx, bc, sizeof(dns_badcache_t));
    117 }
    118 
    119 static void
    120 badcache_resize(dns_badcache_t *bc, isc_time_t *now) {
    121 	dns_bcentry_t **newtable, *bad, *next;
    122 	isc_mutex_t *newlocks;
    123 	unsigned int newsize, i;
    124 	bool grow;
    125 
    126 	RWLOCK(&bc->lock, isc_rwlocktype_write);
    127 
    128 	/*
    129 	 * XXXWPK we will have a thundering herd problem here,
    130 	 * as all threads will wait on the RWLOCK when there's
    131 	 * a need to resize badcache.
    132 	 * However, it happens so rarely it should not be a
    133 	 * performance issue. This is because we double the
    134 	 * size every time we grow it, and we don't shrink
    135 	 * unless the number of entries really shrunk. In a
    136 	 * high load situation, the number of badcache entries
    137 	 * will eventually stabilize.
    138 	 */
    139 	if (atomic_load_relaxed(&bc->count) > bc->size * 8) {
    140 		grow = true;
    141 	} else if (atomic_load_relaxed(&bc->count) < bc->size * 2 &&
    142 		   bc->size > bc->minsize)
    143 	{
    144 		grow = false;
    145 	} else {
    146 		/* Someone resized it already, bail. */
    147 		RWUNLOCK(&bc->lock, isc_rwlocktype_write);
    148 		return;
    149 	}
    150 
    151 	if (grow) {
    152 		newsize = bc->size * 2 + 1;
    153 	} else {
    154 		newsize = (bc->size - 1) / 2;
    155 #ifdef __clang_analyzer__
    156 		/*
    157 		 * XXXWPK there's a bug in clang static analyzer -
    158 		 * `value % newsize` is considered undefined even though
    159 		 * we check if newsize is larger than 0. This helps.
    160 		 */
    161 		newsize += 1;
    162 #endif
    163 	}
    164 	RUNTIME_CHECK(newsize > 0);
    165 
    166 	newtable = isc_mem_get(bc->mctx, sizeof(dns_bcentry_t *) * newsize);
    167 	memset(newtable, 0, sizeof(dns_bcentry_t *) * newsize);
    168 
    169 	newlocks = isc_mem_get(bc->mctx, sizeof(isc_mutex_t) * newsize);
    170 
    171 	/* Copy existing mutexes */
    172 	for (i = 0; i < newsize && i < bc->size; i++) {
    173 		newlocks[i] = bc->tlocks[i];
    174 	}
    175 	/* Initialize additional mutexes if we're growing */
    176 	for (i = bc->size; i < newsize; i++) {
    177 		isc_mutex_init(&newlocks[i]);
    178 	}
    179 	/* Destroy extra mutexes if we're shrinking */
    180 	for (i = newsize; i < bc->size; i++) {
    181 		isc_mutex_destroy(&bc->tlocks[i]);
    182 	}
    183 
    184 	for (i = 0; atomic_load_relaxed(&bc->count) > 0 && i < bc->size; i++) {
    185 		for (bad = bc->table[i]; bad != NULL; bad = next) {
    186 			next = bad->next;
    187 			if (isc_time_compare(&bad->expire, now) < 0) {
    188 				isc_mem_put(bc->mctx, bad,
    189 					    sizeof(*bad) + bad->name.length);
    190 				atomic_fetch_sub_relaxed(&bc->count, 1);
    191 			} else {
    192 				bad->next = newtable[bad->hashval % newsize];
    193 				newtable[bad->hashval % newsize] = bad;
    194 			}
    195 		}
    196 		bc->table[i] = NULL;
    197 	}
    198 
    199 	isc_mem_put(bc->mctx, bc->tlocks, sizeof(isc_mutex_t) * bc->size);
    200 	bc->tlocks = newlocks;
    201 
    202 	isc_mem_put(bc->mctx, bc->table, sizeof(*bc->table) * bc->size);
    203 	bc->size = newsize;
    204 	bc->table = newtable;
    205 
    206 	RWUNLOCK(&bc->lock, isc_rwlocktype_write);
    207 }
    208 
    209 void
    210 dns_badcache_add(dns_badcache_t *bc, const dns_name_t *name,
    211 		 dns_rdatatype_t type, bool update, uint32_t flags,
    212 		 isc_time_t *expire) {
    213 	isc_result_t result;
    214 	unsigned int hashval, hash;
    215 	dns_bcentry_t *bad, *prev, *next;
    216 	isc_time_t now;
    217 	bool resize = false;
    218 
    219 	REQUIRE(VALID_BADCACHE(bc));
    220 	REQUIRE(name != NULL);
    221 	REQUIRE(expire != NULL);
    222 
    223 	RWLOCK(&bc->lock, isc_rwlocktype_read);
    224 
    225 	result = isc_time_now(&now);
    226 	if (result != ISC_R_SUCCESS) {
    227 		isc_time_settoepoch(&now);
    228 	}
    229 
    230 	hashval = dns_name_hash(name, false);
    231 	hash = hashval % bc->size;
    232 	LOCK(&bc->tlocks[hash]);
    233 	prev = NULL;
    234 	for (bad = bc->table[hash]; bad != NULL; bad = next) {
    235 		next = bad->next;
    236 		if (bad->type == type && dns_name_equal(name, &bad->name)) {
    237 			if (update) {
    238 				bad->expire = *expire;
    239 				bad->flags = flags;
    240 			}
    241 			break;
    242 		}
    243 		if (isc_time_compare(&bad->expire, &now) < 0) {
    244 			if (prev == NULL) {
    245 				bc->table[hash] = bad->next;
    246 			} else {
    247 				prev->next = bad->next;
    248 			}
    249 			isc_mem_put(bc->mctx, bad,
    250 				    sizeof(*bad) + bad->name.length);
    251 			atomic_fetch_sub_relaxed(&bc->count, 1);
    252 		} else {
    253 			prev = bad;
    254 		}
    255 	}
    256 
    257 	if (bad == NULL) {
    258 		isc_buffer_t buffer;
    259 		bad = isc_mem_get(bc->mctx, sizeof(*bad) + name->length);
    260 		bad->type = type;
    261 		bad->hashval = hashval;
    262 		bad->expire = *expire;
    263 		bad->flags = flags;
    264 		isc_buffer_init(&buffer, bad + 1, name->length);
    265 		dns_name_init(&bad->name, NULL);
    266 		dns_name_copy(name, &bad->name, &buffer);
    267 		bad->next = bc->table[hash];
    268 		bc->table[hash] = bad;
    269 		unsigned count = atomic_fetch_add_relaxed(&bc->count, 1);
    270 		if ((count > bc->size * 8) ||
    271 		    (count < bc->size * 2 && bc->size > bc->minsize)) {
    272 			resize = true;
    273 		}
    274 	} else {
    275 		bad->expire = *expire;
    276 	}
    277 
    278 	UNLOCK(&bc->tlocks[hash]);
    279 	RWUNLOCK(&bc->lock, isc_rwlocktype_read);
    280 	if (resize) {
    281 		badcache_resize(bc, &now);
    282 	}
    283 }
    284 
    285 bool
    286 dns_badcache_find(dns_badcache_t *bc, const dns_name_t *name,
    287 		  dns_rdatatype_t type, uint32_t *flagp, isc_time_t *now) {
    288 	dns_bcentry_t *bad, *prev, *next;
    289 	bool answer = false;
    290 	unsigned int i;
    291 	unsigned int hash;
    292 
    293 	REQUIRE(VALID_BADCACHE(bc));
    294 	REQUIRE(name != NULL);
    295 	REQUIRE(now != NULL);
    296 
    297 	RWLOCK(&bc->lock, isc_rwlocktype_read);
    298 
    299 	/*
    300 	 * XXXMUKS: dns_name_equal() is expensive as it does a
    301 	 * octet-by-octet comparison, and it can be made better in two
    302 	 * ways here. First, lowercase the names (use
    303 	 * dns_name_downcase() instead of dns_name_copy() in
    304 	 * dns_badcache_add()) so that dns_name_caseequal() can be used
    305 	 * which the compiler will emit as SIMD instructions. Second,
    306 	 * don't put multiple copies of the same name in the chain (or
    307 	 * multiple names will have to be matched for equality), but use
    308 	 * name->link to store the type specific part.
    309 	 */
    310 
    311 	if (atomic_load_relaxed(&bc->count) == 0) {
    312 		goto skip;
    313 	}
    314 
    315 	hash = dns_name_hash(name, false) % bc->size;
    316 	prev = NULL;
    317 	LOCK(&bc->tlocks[hash]);
    318 	for (bad = bc->table[hash]; bad != NULL; bad = next) {
    319 		next = bad->next;
    320 		/*
    321 		 * Search the hash list. Clean out expired records as we go.
    322 		 */
    323 		if (isc_time_compare(&bad->expire, now) < 0) {
    324 			if (prev != NULL) {
    325 				prev->next = bad->next;
    326 			} else {
    327 				bc->table[hash] = bad->next;
    328 			}
    329 
    330 			isc_mem_put(bc->mctx, bad,
    331 				    sizeof(*bad) + bad->name.length);
    332 			atomic_fetch_sub(&bc->count, 1);
    333 			continue;
    334 		}
    335 		if (bad->type == type && dns_name_equal(name, &bad->name)) {
    336 			if (flagp != NULL) {
    337 				*flagp = bad->flags;
    338 			}
    339 			answer = true;
    340 			break;
    341 		}
    342 		prev = bad;
    343 	}
    344 	UNLOCK(&bc->tlocks[hash]);
    345 skip:
    346 
    347 	/*
    348 	 * Slow sweep to clean out stale records.
    349 	 */
    350 	i = atomic_fetch_add(&bc->sweep, 1) % bc->size;
    351 	if (isc_mutex_trylock(&bc->tlocks[i]) == ISC_R_SUCCESS) {
    352 		bad = bc->table[i];
    353 		if (bad != NULL && isc_time_compare(&bad->expire, now) < 0) {
    354 			bc->table[i] = bad->next;
    355 			isc_mem_put(bc->mctx, bad,
    356 				    sizeof(*bad) + bad->name.length);
    357 			atomic_fetch_sub_relaxed(&bc->count, 1);
    358 		}
    359 		UNLOCK(&bc->tlocks[i]);
    360 	}
    361 
    362 	RWUNLOCK(&bc->lock, isc_rwlocktype_read);
    363 	return (answer);
    364 }
    365 
    366 void
    367 dns_badcache_flush(dns_badcache_t *bc) {
    368 	dns_bcentry_t *entry, *next;
    369 	unsigned int i;
    370 
    371 	RWLOCK(&bc->lock, isc_rwlocktype_write);
    372 	REQUIRE(VALID_BADCACHE(bc));
    373 
    374 	for (i = 0; atomic_load_relaxed(&bc->count) > 0 && i < bc->size; i++) {
    375 		for (entry = bc->table[i]; entry != NULL; entry = next) {
    376 			next = entry->next;
    377 			isc_mem_put(bc->mctx, entry,
    378 				    sizeof(*entry) + entry->name.length);
    379 			atomic_fetch_sub_relaxed(&bc->count, 1);
    380 		}
    381 		bc->table[i] = NULL;
    382 	}
    383 	RWUNLOCK(&bc->lock, isc_rwlocktype_write);
    384 }
    385 
    386 void
    387 dns_badcache_flushname(dns_badcache_t *bc, const dns_name_t *name) {
    388 	dns_bcentry_t *bad, *prev, *next;
    389 	isc_result_t result;
    390 	isc_time_t now;
    391 	unsigned int hash;
    392 
    393 	REQUIRE(VALID_BADCACHE(bc));
    394 	REQUIRE(name != NULL);
    395 
    396 	RWLOCK(&bc->lock, isc_rwlocktype_read);
    397 
    398 	result = isc_time_now(&now);
    399 	if (result != ISC_R_SUCCESS) {
    400 		isc_time_settoepoch(&now);
    401 	}
    402 	hash = dns_name_hash(name, false) % bc->size;
    403 	LOCK(&bc->tlocks[hash]);
    404 	prev = NULL;
    405 	for (bad = bc->table[hash]; bad != NULL; bad = next) {
    406 		int n;
    407 		next = bad->next;
    408 		n = isc_time_compare(&bad->expire, &now);
    409 		if (n < 0 || dns_name_equal(name, &bad->name)) {
    410 			if (prev == NULL) {
    411 				bc->table[hash] = bad->next;
    412 			} else {
    413 				prev->next = bad->next;
    414 			}
    415 
    416 			isc_mem_put(bc->mctx, bad,
    417 				    sizeof(*bad) + bad->name.length);
    418 			atomic_fetch_sub_relaxed(&bc->count, 1);
    419 		} else {
    420 			prev = bad;
    421 		}
    422 	}
    423 	UNLOCK(&bc->tlocks[hash]);
    424 
    425 	RWUNLOCK(&bc->lock, isc_rwlocktype_read);
    426 }
    427 
    428 void
    429 dns_badcache_flushtree(dns_badcache_t *bc, const dns_name_t *name) {
    430 	dns_bcentry_t *bad, *prev, *next;
    431 	unsigned int i;
    432 	int n;
    433 	isc_time_t now;
    434 	isc_result_t result;
    435 
    436 	REQUIRE(VALID_BADCACHE(bc));
    437 	REQUIRE(name != NULL);
    438 
    439 	/*
    440 	 * We write lock the tree to avoid relocking every node
    441 	 * individually.
    442 	 */
    443 	RWLOCK(&bc->lock, isc_rwlocktype_write);
    444 
    445 	result = isc_time_now(&now);
    446 	if (result != ISC_R_SUCCESS) {
    447 		isc_time_settoepoch(&now);
    448 	}
    449 
    450 	for (i = 0; atomic_load_relaxed(&bc->count) > 0 && i < bc->size; i++) {
    451 		prev = NULL;
    452 		for (bad = bc->table[i]; bad != NULL; bad = next) {
    453 			next = bad->next;
    454 			n = isc_time_compare(&bad->expire, &now);
    455 			if (n < 0 || dns_name_issubdomain(&bad->name, name)) {
    456 				if (prev == NULL) {
    457 					bc->table[i] = bad->next;
    458 				} else {
    459 					prev->next = bad->next;
    460 				}
    461 
    462 				isc_mem_put(bc->mctx, bad,
    463 					    sizeof(*bad) + bad->name.length);
    464 				atomic_fetch_sub_relaxed(&bc->count, 1);
    465 			} else {
    466 				prev = bad;
    467 			}
    468 		}
    469 	}
    470 
    471 	RWUNLOCK(&bc->lock, isc_rwlocktype_write);
    472 }
    473 
    474 void
    475 dns_badcache_print(dns_badcache_t *bc, const char *cachename, FILE *fp) {
    476 	char namebuf[DNS_NAME_FORMATSIZE];
    477 	char typebuf[DNS_RDATATYPE_FORMATSIZE];
    478 	dns_bcentry_t *bad, *next, *prev;
    479 	isc_time_t now;
    480 	unsigned int i;
    481 	uint64_t t;
    482 
    483 	REQUIRE(VALID_BADCACHE(bc));
    484 	REQUIRE(cachename != NULL);
    485 	REQUIRE(fp != NULL);
    486 
    487 	/*
    488 	 * We write lock the tree to avoid relocking every node
    489 	 * individually.
    490 	 */
    491 	RWLOCK(&bc->lock, isc_rwlocktype_write);
    492 	fprintf(fp, ";\n; %s\n;\n", cachename);
    493 
    494 	TIME_NOW(&now);
    495 	for (i = 0; atomic_load_relaxed(&bc->count) > 0 && i < bc->size; i++) {
    496 		prev = NULL;
    497 		for (bad = bc->table[i]; bad != NULL; bad = next) {
    498 			next = bad->next;
    499 			if (isc_time_compare(&bad->expire, &now) < 0) {
    500 				if (prev != NULL) {
    501 					prev->next = bad->next;
    502 				} else {
    503 					bc->table[i] = bad->next;
    504 				}
    505 
    506 				isc_mem_put(bc->mctx, bad,
    507 					    sizeof(*bad) + bad->name.length);
    508 				atomic_fetch_sub_relaxed(&bc->count, 1);
    509 				continue;
    510 			}
    511 			prev = bad;
    512 			dns_name_format(&bad->name, namebuf, sizeof(namebuf));
    513 			dns_rdatatype_format(bad->type, typebuf,
    514 					     sizeof(typebuf));
    515 			t = isc_time_microdiff(&bad->expire, &now);
    516 			t /= 1000;
    517 			fprintf(fp,
    518 				"; %s/%s [ttl "
    519 				"%" PRIu64 "]\n",
    520 				namebuf, typebuf, t);
    521 		}
    522 	}
    523 	RWUNLOCK(&bc->lock, isc_rwlocktype_write);
    524 }
    525