badcache.c revision 1.5 1 /* $NetBSD: badcache.c,v 1.5 2021/02/19 16:42:15 christos Exp $ */
2
3 /*
4 * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
5 *
6 * This Source Code Form is subject to the terms of the Mozilla Public
7 * License, v. 2.0. If a copy of the MPL was not distributed with this
8 * file, you can obtain one at https://mozilla.org/MPL/2.0/.
9 *
10 * See the COPYRIGHT file distributed with this work for additional
11 * information regarding copyright ownership.
12 */
13
14 /*! \file */
15
16 #include <inttypes.h>
17 #include <stdbool.h>
18
19 #include <isc/buffer.h>
20 #include <isc/hash.h>
21 #include <isc/log.h>
22 #include <isc/mem.h>
23 #include <isc/mutex.h>
24 #include <isc/platform.h>
25 #include <isc/print.h>
26 #include <isc/rwlock.h>
27 #include <isc/string.h>
28 #include <isc/time.h>
29 #include <isc/util.h>
30
31 #include <dns/badcache.h>
32 #include <dns/name.h>
33 #include <dns/rdatatype.h>
34 #include <dns/types.h>
35
36 typedef struct dns_bcentry dns_bcentry_t;
37
38 struct dns_badcache {
39 unsigned int magic;
40 isc_rwlock_t lock;
41 isc_mem_t *mctx;
42
43 isc_mutex_t *tlocks;
44 dns_bcentry_t **table;
45
46 atomic_uint_fast32_t count;
47 atomic_uint_fast32_t sweep;
48
49 unsigned int minsize;
50 unsigned int size;
51 };
52
53 #define BADCACHE_MAGIC ISC_MAGIC('B', 'd', 'C', 'a')
54 #define VALID_BADCACHE(m) ISC_MAGIC_VALID(m, BADCACHE_MAGIC)
55
56 struct dns_bcentry {
57 dns_bcentry_t *next;
58 dns_rdatatype_t type;
59 isc_time_t expire;
60 uint32_t flags;
61 unsigned int hashval;
62 dns_name_t name;
63 };
64
65 static void
66 badcache_resize(dns_badcache_t *bc, isc_time_t *now);
67
68 isc_result_t
69 dns_badcache_init(isc_mem_t *mctx, unsigned int size, dns_badcache_t **bcp) {
70 dns_badcache_t *bc = NULL;
71 unsigned int i;
72
73 REQUIRE(bcp != NULL && *bcp == NULL);
74 REQUIRE(mctx != NULL);
75
76 bc = isc_mem_get(mctx, sizeof(dns_badcache_t));
77 memset(bc, 0, sizeof(dns_badcache_t));
78
79 isc_mem_attach(mctx, &bc->mctx);
80 isc_rwlock_init(&bc->lock, 0, 0);
81
82 bc->table = isc_mem_get(bc->mctx, sizeof(*bc->table) * size);
83 bc->tlocks = isc_mem_get(bc->mctx, sizeof(isc_mutex_t) * size);
84 for (i = 0; i < size; i++) {
85 isc_mutex_init(&bc->tlocks[i]);
86 }
87 bc->size = bc->minsize = size;
88 memset(bc->table, 0, bc->size * sizeof(dns_bcentry_t *));
89
90 atomic_init(&bc->count, 0);
91 atomic_init(&bc->sweep, 0);
92 bc->magic = BADCACHE_MAGIC;
93
94 *bcp = bc;
95 return (ISC_R_SUCCESS);
96 }
97
98 void
99 dns_badcache_destroy(dns_badcache_t **bcp) {
100 dns_badcache_t *bc;
101 unsigned int i;
102
103 REQUIRE(bcp != NULL && *bcp != NULL);
104 bc = *bcp;
105 *bcp = NULL;
106
107 dns_badcache_flush(bc);
108
109 bc->magic = 0;
110 isc_rwlock_destroy(&bc->lock);
111 for (i = 0; i < bc->size; i++) {
112 isc_mutex_destroy(&bc->tlocks[i]);
113 }
114 isc_mem_put(bc->mctx, bc->table, sizeof(dns_bcentry_t *) * bc->size);
115 isc_mem_put(bc->mctx, bc->tlocks, sizeof(isc_mutex_t) * bc->size);
116 isc_mem_putanddetach(&bc->mctx, bc, sizeof(dns_badcache_t));
117 }
118
119 static void
120 badcache_resize(dns_badcache_t *bc, isc_time_t *now) {
121 dns_bcentry_t **newtable, *bad, *next;
122 isc_mutex_t *newlocks;
123 unsigned int newsize, i;
124 bool grow;
125
126 RWLOCK(&bc->lock, isc_rwlocktype_write);
127
128 /*
129 * XXXWPK we will have a thundering herd problem here,
130 * as all threads will wait on the RWLOCK when there's
131 * a need to resize badcache.
132 * However, it happens so rarely it should not be a
133 * performance issue. This is because we double the
134 * size every time we grow it, and we don't shrink
135 * unless the number of entries really shrunk. In a
136 * high load situation, the number of badcache entries
137 * will eventually stabilize.
138 */
139 if (atomic_load_relaxed(&bc->count) > bc->size * 8) {
140 grow = true;
141 } else if (atomic_load_relaxed(&bc->count) < bc->size * 2 &&
142 bc->size > bc->minsize)
143 {
144 grow = false;
145 } else {
146 /* Someone resized it already, bail. */
147 RWUNLOCK(&bc->lock, isc_rwlocktype_write);
148 return;
149 }
150
151 if (grow) {
152 newsize = bc->size * 2 + 1;
153 } else {
154 newsize = (bc->size - 1) / 2;
155 #ifdef __clang_analyzer__
156 /*
157 * XXXWPK there's a bug in clang static analyzer -
158 * `value % newsize` is considered undefined even though
159 * we check if newsize is larger than 0. This helps.
160 */
161 newsize += 1;
162 #endif
163 }
164 RUNTIME_CHECK(newsize > 0);
165
166 newtable = isc_mem_get(bc->mctx, sizeof(dns_bcentry_t *) * newsize);
167 memset(newtable, 0, sizeof(dns_bcentry_t *) * newsize);
168
169 newlocks = isc_mem_get(bc->mctx, sizeof(isc_mutex_t) * newsize);
170
171 /* Copy existing mutexes */
172 for (i = 0; i < newsize && i < bc->size; i++) {
173 newlocks[i] = bc->tlocks[i];
174 }
175 /* Initialize additional mutexes if we're growing */
176 for (i = bc->size; i < newsize; i++) {
177 isc_mutex_init(&newlocks[i]);
178 }
179 /* Destroy extra mutexes if we're shrinking */
180 for (i = newsize; i < bc->size; i++) {
181 isc_mutex_destroy(&bc->tlocks[i]);
182 }
183
184 for (i = 0; atomic_load_relaxed(&bc->count) > 0 && i < bc->size; i++) {
185 for (bad = bc->table[i]; bad != NULL; bad = next) {
186 next = bad->next;
187 if (isc_time_compare(&bad->expire, now) < 0) {
188 isc_mem_put(bc->mctx, bad,
189 sizeof(*bad) + bad->name.length);
190 atomic_fetch_sub_relaxed(&bc->count, 1);
191 } else {
192 bad->next = newtable[bad->hashval % newsize];
193 newtable[bad->hashval % newsize] = bad;
194 }
195 }
196 bc->table[i] = NULL;
197 }
198
199 isc_mem_put(bc->mctx, bc->tlocks, sizeof(isc_mutex_t) * bc->size);
200 bc->tlocks = newlocks;
201
202 isc_mem_put(bc->mctx, bc->table, sizeof(*bc->table) * bc->size);
203 bc->size = newsize;
204 bc->table = newtable;
205
206 RWUNLOCK(&bc->lock, isc_rwlocktype_write);
207 }
208
209 void
210 dns_badcache_add(dns_badcache_t *bc, const dns_name_t *name,
211 dns_rdatatype_t type, bool update, uint32_t flags,
212 isc_time_t *expire) {
213 isc_result_t result;
214 unsigned int hashval, hash;
215 dns_bcentry_t *bad, *prev, *next;
216 isc_time_t now;
217 bool resize = false;
218
219 REQUIRE(VALID_BADCACHE(bc));
220 REQUIRE(name != NULL);
221 REQUIRE(expire != NULL);
222
223 RWLOCK(&bc->lock, isc_rwlocktype_read);
224
225 result = isc_time_now(&now);
226 if (result != ISC_R_SUCCESS) {
227 isc_time_settoepoch(&now);
228 }
229
230 hashval = dns_name_hash(name, false);
231 hash = hashval % bc->size;
232 LOCK(&bc->tlocks[hash]);
233 prev = NULL;
234 for (bad = bc->table[hash]; bad != NULL; bad = next) {
235 next = bad->next;
236 if (bad->type == type && dns_name_equal(name, &bad->name)) {
237 if (update) {
238 bad->expire = *expire;
239 bad->flags = flags;
240 }
241 break;
242 }
243 if (isc_time_compare(&bad->expire, &now) < 0) {
244 if (prev == NULL) {
245 bc->table[hash] = bad->next;
246 } else {
247 prev->next = bad->next;
248 }
249 isc_mem_put(bc->mctx, bad,
250 sizeof(*bad) + bad->name.length);
251 atomic_fetch_sub_relaxed(&bc->count, 1);
252 } else {
253 prev = bad;
254 }
255 }
256
257 if (bad == NULL) {
258 isc_buffer_t buffer;
259 bad = isc_mem_get(bc->mctx, sizeof(*bad) + name->length);
260 bad->type = type;
261 bad->hashval = hashval;
262 bad->expire = *expire;
263 bad->flags = flags;
264 isc_buffer_init(&buffer, bad + 1, name->length);
265 dns_name_init(&bad->name, NULL);
266 dns_name_copy(name, &bad->name, &buffer);
267 bad->next = bc->table[hash];
268 bc->table[hash] = bad;
269 unsigned count = atomic_fetch_add_relaxed(&bc->count, 1);
270 if ((count > bc->size * 8) ||
271 (count < bc->size * 2 && bc->size > bc->minsize)) {
272 resize = true;
273 }
274 } else {
275 bad->expire = *expire;
276 }
277
278 UNLOCK(&bc->tlocks[hash]);
279 RWUNLOCK(&bc->lock, isc_rwlocktype_read);
280 if (resize) {
281 badcache_resize(bc, &now);
282 }
283 }
284
285 bool
286 dns_badcache_find(dns_badcache_t *bc, const dns_name_t *name,
287 dns_rdatatype_t type, uint32_t *flagp, isc_time_t *now) {
288 dns_bcentry_t *bad, *prev, *next;
289 bool answer = false;
290 unsigned int i;
291 unsigned int hash;
292
293 REQUIRE(VALID_BADCACHE(bc));
294 REQUIRE(name != NULL);
295 REQUIRE(now != NULL);
296
297 RWLOCK(&bc->lock, isc_rwlocktype_read);
298
299 /*
300 * XXXMUKS: dns_name_equal() is expensive as it does a
301 * octet-by-octet comparison, and it can be made better in two
302 * ways here. First, lowercase the names (use
303 * dns_name_downcase() instead of dns_name_copy() in
304 * dns_badcache_add()) so that dns_name_caseequal() can be used
305 * which the compiler will emit as SIMD instructions. Second,
306 * don't put multiple copies of the same name in the chain (or
307 * multiple names will have to be matched for equality), but use
308 * name->link to store the type specific part.
309 */
310
311 if (atomic_load_relaxed(&bc->count) == 0) {
312 goto skip;
313 }
314
315 hash = dns_name_hash(name, false) % bc->size;
316 prev = NULL;
317 LOCK(&bc->tlocks[hash]);
318 for (bad = bc->table[hash]; bad != NULL; bad = next) {
319 next = bad->next;
320 /*
321 * Search the hash list. Clean out expired records as we go.
322 */
323 if (isc_time_compare(&bad->expire, now) < 0) {
324 if (prev != NULL) {
325 prev->next = bad->next;
326 } else {
327 bc->table[hash] = bad->next;
328 }
329
330 isc_mem_put(bc->mctx, bad,
331 sizeof(*bad) + bad->name.length);
332 atomic_fetch_sub(&bc->count, 1);
333 continue;
334 }
335 if (bad->type == type && dns_name_equal(name, &bad->name)) {
336 if (flagp != NULL) {
337 *flagp = bad->flags;
338 }
339 answer = true;
340 break;
341 }
342 prev = bad;
343 }
344 UNLOCK(&bc->tlocks[hash]);
345 skip:
346
347 /*
348 * Slow sweep to clean out stale records.
349 */
350 i = atomic_fetch_add(&bc->sweep, 1) % bc->size;
351 if (isc_mutex_trylock(&bc->tlocks[i]) == ISC_R_SUCCESS) {
352 bad = bc->table[i];
353 if (bad != NULL && isc_time_compare(&bad->expire, now) < 0) {
354 bc->table[i] = bad->next;
355 isc_mem_put(bc->mctx, bad,
356 sizeof(*bad) + bad->name.length);
357 atomic_fetch_sub_relaxed(&bc->count, 1);
358 }
359 UNLOCK(&bc->tlocks[i]);
360 }
361
362 RWUNLOCK(&bc->lock, isc_rwlocktype_read);
363 return (answer);
364 }
365
366 void
367 dns_badcache_flush(dns_badcache_t *bc) {
368 dns_bcentry_t *entry, *next;
369 unsigned int i;
370
371 RWLOCK(&bc->lock, isc_rwlocktype_write);
372 REQUIRE(VALID_BADCACHE(bc));
373
374 for (i = 0; atomic_load_relaxed(&bc->count) > 0 && i < bc->size; i++) {
375 for (entry = bc->table[i]; entry != NULL; entry = next) {
376 next = entry->next;
377 isc_mem_put(bc->mctx, entry,
378 sizeof(*entry) + entry->name.length);
379 atomic_fetch_sub_relaxed(&bc->count, 1);
380 }
381 bc->table[i] = NULL;
382 }
383 RWUNLOCK(&bc->lock, isc_rwlocktype_write);
384 }
385
386 void
387 dns_badcache_flushname(dns_badcache_t *bc, const dns_name_t *name) {
388 dns_bcentry_t *bad, *prev, *next;
389 isc_result_t result;
390 isc_time_t now;
391 unsigned int hash;
392
393 REQUIRE(VALID_BADCACHE(bc));
394 REQUIRE(name != NULL);
395
396 RWLOCK(&bc->lock, isc_rwlocktype_read);
397
398 result = isc_time_now(&now);
399 if (result != ISC_R_SUCCESS) {
400 isc_time_settoepoch(&now);
401 }
402 hash = dns_name_hash(name, false) % bc->size;
403 LOCK(&bc->tlocks[hash]);
404 prev = NULL;
405 for (bad = bc->table[hash]; bad != NULL; bad = next) {
406 int n;
407 next = bad->next;
408 n = isc_time_compare(&bad->expire, &now);
409 if (n < 0 || dns_name_equal(name, &bad->name)) {
410 if (prev == NULL) {
411 bc->table[hash] = bad->next;
412 } else {
413 prev->next = bad->next;
414 }
415
416 isc_mem_put(bc->mctx, bad,
417 sizeof(*bad) + bad->name.length);
418 atomic_fetch_sub_relaxed(&bc->count, 1);
419 } else {
420 prev = bad;
421 }
422 }
423 UNLOCK(&bc->tlocks[hash]);
424
425 RWUNLOCK(&bc->lock, isc_rwlocktype_read);
426 }
427
428 void
429 dns_badcache_flushtree(dns_badcache_t *bc, const dns_name_t *name) {
430 dns_bcentry_t *bad, *prev, *next;
431 unsigned int i;
432 int n;
433 isc_time_t now;
434 isc_result_t result;
435
436 REQUIRE(VALID_BADCACHE(bc));
437 REQUIRE(name != NULL);
438
439 /*
440 * We write lock the tree to avoid relocking every node
441 * individually.
442 */
443 RWLOCK(&bc->lock, isc_rwlocktype_write);
444
445 result = isc_time_now(&now);
446 if (result != ISC_R_SUCCESS) {
447 isc_time_settoepoch(&now);
448 }
449
450 for (i = 0; atomic_load_relaxed(&bc->count) > 0 && i < bc->size; i++) {
451 prev = NULL;
452 for (bad = bc->table[i]; bad != NULL; bad = next) {
453 next = bad->next;
454 n = isc_time_compare(&bad->expire, &now);
455 if (n < 0 || dns_name_issubdomain(&bad->name, name)) {
456 if (prev == NULL) {
457 bc->table[i] = bad->next;
458 } else {
459 prev->next = bad->next;
460 }
461
462 isc_mem_put(bc->mctx, bad,
463 sizeof(*bad) + bad->name.length);
464 atomic_fetch_sub_relaxed(&bc->count, 1);
465 } else {
466 prev = bad;
467 }
468 }
469 }
470
471 RWUNLOCK(&bc->lock, isc_rwlocktype_write);
472 }
473
474 void
475 dns_badcache_print(dns_badcache_t *bc, const char *cachename, FILE *fp) {
476 char namebuf[DNS_NAME_FORMATSIZE];
477 char typebuf[DNS_RDATATYPE_FORMATSIZE];
478 dns_bcentry_t *bad, *next, *prev;
479 isc_time_t now;
480 unsigned int i;
481 uint64_t t;
482
483 REQUIRE(VALID_BADCACHE(bc));
484 REQUIRE(cachename != NULL);
485 REQUIRE(fp != NULL);
486
487 /*
488 * We write lock the tree to avoid relocking every node
489 * individually.
490 */
491 RWLOCK(&bc->lock, isc_rwlocktype_write);
492 fprintf(fp, ";\n; %s\n;\n", cachename);
493
494 TIME_NOW(&now);
495 for (i = 0; atomic_load_relaxed(&bc->count) > 0 && i < bc->size; i++) {
496 prev = NULL;
497 for (bad = bc->table[i]; bad != NULL; bad = next) {
498 next = bad->next;
499 if (isc_time_compare(&bad->expire, &now) < 0) {
500 if (prev != NULL) {
501 prev->next = bad->next;
502 } else {
503 bc->table[i] = bad->next;
504 }
505
506 isc_mem_put(bc->mctx, bad,
507 sizeof(*bad) + bad->name.length);
508 atomic_fetch_sub_relaxed(&bc->count, 1);
509 continue;
510 }
511 prev = bad;
512 dns_name_format(&bad->name, namebuf, sizeof(namebuf));
513 dns_rdatatype_format(bad->type, typebuf,
514 sizeof(typebuf));
515 t = isc_time_microdiff(&bad->expire, &now);
516 t /= 1000;
517 fprintf(fp,
518 "; %s/%s [ttl "
519 "%" PRIu64 "]\n",
520 namebuf, typebuf, t);
521 }
522 }
523 RWUNLOCK(&bc->lock, isc_rwlocktype_write);
524 }
525