Home | History | Annotate | Line # | Download | only in dist
      1 
      2 /* rrl.c - Response Rate Limiting for NSD.
      3  * By W.C.A. Wijngaards
      4  * Copyright 2012, NLnet Labs.
      5  * BSD, see LICENSE.
      6  */
      7 #include "config.h"
      8 #include <errno.h>
      9 #include "rrl.h"
     10 #include "util.h"
     11 #include "lookup3.h"
     12 #include "options.h"
     13 
     14 #ifdef RATELIMIT
     15 
     16 #ifdef HAVE_MMAP
     17 #include <sys/mman.h>
     18 #if defined(MAP_ANON) && !defined(MAP_ANONYMOUS)
     19 #define MAP_ANONYMOUS   MAP_ANON
     20 #endif
     21 #endif /* HAVE_MMAP */
     22 
     23 
     24 /**
     25  * The rate limiting data structure bucket, this represents one rate of
     26  * packets from a single source.
     27  * Smoothed average rates.
     28  */
     29 struct rrl_bucket {
     30 	/* the source netmask */
     31 	uint64_t source;
     32 	/* rate, in queries per second, which due to rate=r(t)+r(t-1)/2 is
     33 	 * equal to double the queries per second */
     34 	uint32_t rate;
     35 	/* the full hash */
     36 	uint32_t hash;
     37 	/* counter for queries arrived in this second */
     38 	uint32_t counter;
     39 	/* timestamp, which time is the time of the counter, the rate is from
     40 	 * one timestep before that. */
     41 	int32_t stamp;
     42 	/* flags for the source mask and type */
     43 	uint16_t flags;
     44 };
     45 
     46 /* the (global) array of RRL buckets */
     47 static struct rrl_bucket* rrl_array = NULL;
     48 static size_t rrl_array_size = RRL_BUCKETS;
     49 static uint32_t rrl_ratelimit = RRL_LIMIT; /* 2x qps */
     50 static uint8_t rrl_slip_ratio = RRL_SLIP;
     51 static uint8_t rrl_ipv4_prefixlen = RRL_IPV4_PREFIX_LENGTH;
     52 static uint8_t rrl_ipv6_prefixlen = RRL_IPV6_PREFIX_LENGTH;
     53 static uint64_t rrl_ipv6_mask; /* max prefixlen 64 */
     54 static uint32_t rrl_whitelist_ratelimit = RRL_WLIST_LIMIT; /* 2x qps */
     55 
     56 /* the array of mmaps for the children (saved between reloads) */
     57 static void** rrl_maps = NULL;
     58 static size_t rrl_maps_num = 0;
     59 
     60 void rrl_mmap_init(int numch, size_t numbuck, size_t lm, size_t wlm, size_t sm,
     61 	size_t plf, size_t pls)
     62 {
     63 #ifdef HAVE_MMAP
     64 	size_t i;
     65 #endif
     66 	if(numbuck != 0)
     67 		rrl_array_size = numbuck;
     68 	rrl_ratelimit = lm*2;
     69 	rrl_slip_ratio = sm;
     70 	rrl_ipv4_prefixlen = plf;
     71 	rrl_ipv6_prefixlen = pls;
     72 	if (pls <= 32) {
     73 		rrl_ipv6_mask = ((uint64_t) htonl(0xffffffff << (32-pls))) << 32;
     74 	} else {
     75 		rrl_ipv6_mask =  ((uint64_t) htonl(0xffffffff << (64-pls))) |
     76 			(((uint64_t)0xffffffff)<<32);
     77 	}
     78 	rrl_whitelist_ratelimit = wlm*2;
     79 #ifdef HAVE_MMAP
     80 	/* allocate the ratelimit hashtable in a memory map so it is
     81 	 * preserved across reforks (every child its own table) */
     82 	rrl_maps_num = (size_t)numch;
     83 	rrl_maps = (void**)xmallocarray(rrl_maps_num, sizeof(void*));
     84 	for(i=0; i<rrl_maps_num; i++) {
     85 		rrl_maps[i] = mmap(NULL,
     86 			sizeof(struct rrl_bucket)*rrl_array_size,
     87 			PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0);
     88 		if(rrl_maps[i] == MAP_FAILED) {
     89 			log_msg(LOG_ERR, "rrl: mmap failed: %s",
     90 				strerror(errno));
     91 			exit(1);
     92 		}
     93 		memset(rrl_maps[i], 0,
     94 			sizeof(struct rrl_bucket)*rrl_array_size);
     95 	}
     96 #else
     97 	(void)numch;
     98 	rrl_maps_num = 0;
     99 	rrl_maps = NULL;
    100 #endif
    101 }
    102 
    103 void rrl_mmap_deinit(void)
    104 {
    105 #ifdef HAVE_MMAP
    106 	size_t i;
    107 	for(i=0; i<rrl_maps_num; i++) {
    108 		munmap(rrl_maps[i], sizeof(struct rrl_bucket)*rrl_array_size);
    109 		rrl_maps[i] = NULL;
    110 	}
    111 	free(rrl_maps);
    112 	rrl_maps = NULL;
    113 #endif
    114 }
    115 
    116 void rrl_mmap_deinit_keep_mmap(void)
    117 {
    118 #ifdef HAVE_MMAP
    119 	free(rrl_maps);
    120 	rrl_maps = NULL;
    121 #endif
    122 }
    123 
    124 void rrl_set_limit(size_t lm, size_t wlm, size_t sm)
    125 {
    126 	rrl_ratelimit = lm*2;
    127 	rrl_whitelist_ratelimit = wlm*2;
    128 	rrl_slip_ratio = sm;
    129 }
    130 
    131 void rrl_init(size_t ch)
    132 {
    133 	if(!rrl_maps || ch >= rrl_maps_num)
    134 	    rrl_array = xalloc_array_zero(sizeof(struct rrl_bucket),
    135 	    	rrl_array_size);
    136 #ifdef HAVE_MMAP
    137 	else rrl_array = (struct rrl_bucket*)rrl_maps[ch];
    138 #endif
    139 }
    140 
    141 void rrl_deinit(size_t ch)
    142 {
    143 	if(!rrl_maps || ch >= rrl_maps_num)
    144 		free(rrl_array);
    145 	rrl_array = NULL;
    146 }
    147 
    148 /** return the source netblock of the query, this is the genuine source
    149  * for genuine queries and the target for reflected packets */
    150 static uint64_t rrl_get_source(query_type* query, uint16_t* c2)
    151 {
    152 	/* note there is an IPv6 subnet, that maps
    153 	 * to the same buckets as IPv4 space, but there is a flag in c2
    154 	 * that makes the hash different */
    155 #ifdef INET6
    156 	if( ((struct sockaddr_in*)&query->client_addr)->sin_family == AF_INET) {
    157 		*c2 = 0;
    158 		return ((struct sockaddr_in*)&query->client_addr)->
    159 			sin_addr.s_addr & htonl(0xffffffff << (32-rrl_ipv4_prefixlen));
    160 	} else {
    161 		uint64_t s;
    162 		*c2 = rrl_ip6;
    163 		memmove(&s, &((struct sockaddr_in6*)&query->client_addr)->sin6_addr,
    164 			sizeof(s));
    165 		return s & rrl_ipv6_mask;
    166 	}
    167 #else
    168 	*c2 = 0;
    169 	return query->client_addr.sin_addr.s_addr & htonl(0xffffffff << (32-rrl_ipv4_prefixlen));
    170 #endif
    171 }
    172 
    173 /** debug source to string */
    174 static const char* rrlsource2str(uint64_t s, uint16_t c2)
    175 {
    176 	static char buf[64];
    177 	struct in_addr a4;
    178 #ifdef INET6
    179 	if(c2) {
    180 		/* IPv6 */
    181 		struct in6_addr a6;
    182 		memset(&a6, 0, sizeof(a6));
    183 		memmove(&a6, &s, sizeof(s));
    184 		if(!inet_ntop(AF_INET6, &a6, buf, sizeof(buf)))
    185 			strlcpy(buf, "[ip6 ntop failed]", sizeof(buf));
    186 		else {
    187 			static char prefix[5];
    188 			snprintf(prefix, sizeof(prefix), "/%d", rrl_ipv6_prefixlen);
    189 			strlcat(buf, &prefix[0], sizeof(buf));
    190 		}
    191 		return buf;
    192 	}
    193 #else
    194 	(void)c2;
    195 #endif
    196 	/* ipv4 */
    197 	a4.s_addr = (uint32_t)s;
    198 	if(!inet_ntop(AF_INET, &a4, buf, sizeof(buf)))
    199 		strlcpy(buf, "[ip4 ntop failed]", sizeof(buf));
    200 	else {
    201 		static char prefix[5];
    202 		snprintf(prefix, sizeof(prefix), "/%d", rrl_ipv4_prefixlen);
    203 		strlcat(buf, &prefix[0], sizeof(buf));
    204 	}
    205 	return buf;
    206 }
    207 
    208 enum rrl_type rrlstr2type(const char* s)
    209 {
    210 	if(strcmp(s, "nxdomain")==0) return rrl_type_nxdomain;
    211 	else if(strcmp(s, "error")==0) return rrl_type_error;
    212 	else if(strcmp(s, "referral")==0) return rrl_type_referral;
    213 	else if(strcmp(s, "any")==0) return rrl_type_any;
    214 	else if(strcmp(s, "wildcard")==0) return rrl_type_wildcard;
    215 	else if(strcmp(s, "nodata")==0) return rrl_type_nodata;
    216 	else if(strcmp(s, "dnskey")==0) return rrl_type_dnskey;
    217 	else if(strcmp(s, "positive")==0) return rrl_type_positive;
    218 	else if(strcmp(s, "rrsig")==0) return rrl_type_rrsig;
    219 	else if(strcmp(s, "all")==0) return rrl_type_all;
    220 	return 0; /* unknown */
    221 }
    222 
    223 const char* rrltype2str(enum rrl_type c)
    224 {
    225 	switch(c & 0x0fff) {
    226 		case rrl_type_nxdomain: return "nxdomain";
    227 		case rrl_type_error: return "error";
    228 		case rrl_type_referral: return "referral";
    229 		case rrl_type_any: return "any";
    230 		case rrl_type_wildcard: return "wildcard";
    231 		case rrl_type_nodata: return "nodata";
    232 		case rrl_type_dnskey: return "dnskey";
    233 		case rrl_type_positive: return "positive";
    234 		case rrl_type_rrsig: return "rrsig";
    235 		case rrl_type_all: return "all";
    236 	}
    237 	return "unknown";
    238 }
    239 
    240 /** classify the query in a number of different types, each has separate
    241  * ratelimiting, so that positive queries are not impeded by others */
    242 static uint16_t rrl_classify(query_type* query, const uint8_t** d,
    243 	size_t* d_len)
    244 {
    245 	if(RCODE(query->packet) == RCODE_NXDOMAIN) {
    246 		if(query->zone && query->zone->apex) {
    247 			*d = dname_name(domain_dname(query->zone->apex));
    248 			*d_len = domain_dname(query->zone->apex)->name_size;
    249 		}
    250 		return rrl_type_nxdomain;
    251 	}
    252 	if(RCODE(query->packet) != RCODE_OK) {
    253 		if(query->zone && query->zone->apex) {
    254 			*d = dname_name(domain_dname(query->zone->apex));
    255 			*d_len = domain_dname(query->zone->apex)->name_size;
    256 		}
    257 		return rrl_type_error;
    258 	}
    259 	if(query->delegation_domain) {
    260 		*d = dname_name(domain_dname(query->delegation_domain));
    261 		*d_len = domain_dname(query->delegation_domain)->name_size;
    262 		return rrl_type_referral;
    263 	}
    264 	if(query->qtype == TYPE_ANY) {
    265 		if(query->qname) {
    266 			*d = dname_name(query->qname);
    267 			*d_len = query->qname->name_size;
    268 		}
    269 		return rrl_type_any;
    270 	}
    271 	if(query->qtype == TYPE_RRSIG) {
    272 		if(query->qname) {
    273 			*d = dname_name(query->qname);
    274 			*d_len = query->qname->name_size;
    275 		}
    276 		return rrl_type_rrsig;
    277 	}
    278 	if(query->wildcard_domain) {
    279 		*d = dname_name(domain_dname(query->wildcard_domain));
    280 		*d_len = domain_dname(query->wildcard_domain)->name_size;
    281 		return rrl_type_wildcard;
    282 	}
    283 	if(ANCOUNT(query->packet) == 0) {
    284 		if(query->zone && query->zone->apex) {
    285 			*d = dname_name(domain_dname(query->zone->apex));
    286 			*d_len = domain_dname(query->zone->apex)->name_size;
    287 		}
    288 		return rrl_type_nodata;
    289 	}
    290 	if(query->qtype == TYPE_DNSKEY) {
    291 		if(query->qname) {
    292 			*d = dname_name(query->qname);
    293 			*d_len = query->qname->name_size;
    294 		}
    295 		return rrl_type_dnskey;
    296 	}
    297 	/* positive */
    298 	if(query->qname) {
    299 		*d = dname_name(query->qname);
    300 		*d_len = query->qname->name_size;
    301 	}
    302 	return rrl_type_positive;
    303 }
    304 
    305 /** Examine the query and return hash and source of netblock. */
    306 static void examine_query(query_type* query, uint32_t* hash, uint64_t* source,
    307 	uint16_t* flags, uint32_t* lm)
    308 {
    309 	/* compile a binary string representing the query */
    310 	uint16_t c, c2;
    311 	/* size with 16 bytes to spare */
    312 	uint8_t buf[MAXDOMAINLEN + sizeof(*source) + sizeof(c) + 16];
    313 	const uint8_t* dname = NULL; size_t dname_len = 0;
    314 	uint32_t r = 0x267fcd16;
    315 
    316 	*source = rrl_get_source(query, &c2);
    317 	c = rrl_classify(query, &dname, &dname_len);
    318 	if(query->zone && query->zone->opts &&
    319 		(query->zone->opts->pattern->rrl_whitelist & c))
    320 		*lm = rrl_whitelist_ratelimit;
    321 	if(*lm == 0) return;
    322 	c |= c2;
    323 	*flags = c;
    324 	memmove(buf, source, sizeof(*source));
    325 	memmove(buf+sizeof(*source), &c, sizeof(c));
    326 
    327 	DEBUG(DEBUG_QUERY, 1, (LOG_INFO, "rrl_examine type %s name %s", rrltype2str(c), dname?wiredname2str(dname):"NULL"));
    328 
    329 	/* and hash it */
    330 	if(dname && dname_len <= MAXDOMAINLEN) {
    331 		memmove(buf+sizeof(*source)+sizeof(c), dname, dname_len);
    332 		*hash = hashlittle(buf, sizeof(*source)+sizeof(c)+dname_len, r);
    333 	} else
    334 		*hash = hashlittle(buf, sizeof(*source)+sizeof(c), r);
    335 }
    336 
    337 /* age the bucket because elapsed time steps have gone by */
    338 static void rrl_attenuate_bucket(struct rrl_bucket* b, int32_t elapsed)
    339 {
    340 	if(elapsed > 16) {
    341 		b->rate = 0;
    342 	} else {
    343 		/* divide rate /2 for every elapsed time step, because
    344 		 * the counters in the inbetween steps were 0 */
    345 		/* r(t) = 0 + 0/2 + 0/4 + .. + oldrate/2^dt */
    346 		b->rate >>= elapsed;
    347 		/* we know that elapsed >= 2 */
    348 		b->rate += (b->counter>>(elapsed-1));
    349 	}
    350 }
    351 
    352 /** log a message about ratelimits */
    353 static void
    354 rrl_msg(query_type* query, const char* str)
    355 {
    356 	uint16_t c, c2, wl = 0;
    357 	const uint8_t* d = NULL;
    358 	size_t d_len;
    359 	uint64_t s;
    360 	char address[128];
    361 	if(verbosity < 1) return;
    362 	addr2str(&query->client_addr, address, sizeof(address));
    363 	s = rrl_get_source(query, &c2);
    364 	c = rrl_classify(query, &d, &d_len) | c2;
    365 	if(query->zone && query->zone->opts &&
    366 		(query->zone->opts->pattern->rrl_whitelist & c))
    367 		wl = 1;
    368 	log_msg(LOG_INFO, "ratelimit %s %s type %s%s target %s query %s %s",
    369 		str, d?wiredname2str(d):"", rrltype2str(c),
    370 		wl?"(whitelisted)":"", rrlsource2str(s, c2),
    371 		address, rrtype_to_string(query->qtype));
    372 }
    373 
    374 /** true if the query used to be blocked by the ratelimit */
    375 static int
    376 used_to_block(uint32_t rate, uint32_t counter, uint32_t lm)
    377 {
    378 	return rate >= lm || counter+rate/2 >= lm;
    379 }
    380 
    381 /** update the rate in a ratelimit bucket, return actual rate */
    382 uint32_t rrl_update(query_type* query, uint32_t hash, uint64_t source,
    383 	uint16_t flags, int32_t now, uint32_t lm)
    384 {
    385 	struct rrl_bucket* b = &rrl_array[hash % rrl_array_size];
    386 
    387 	DEBUG(DEBUG_QUERY, 1, (LOG_INFO, "source %llx hash %x oldrate %d oldcount %d stamp %d",
    388 		(long long unsigned)source, hash, b->rate, b->counter, b->stamp));
    389 
    390 	/* check if different source */
    391 	if(b->source != source || b->flags != flags || b->hash != hash) {
    392 		/* initialise */
    393 		/* potentially the wrong limit here, used lower nonwhitelim */
    394 		if(verbosity >= 1 &&
    395 			used_to_block(b->rate, b->counter, rrl_ratelimit)) {
    396 			char address[128];
    397 			addr2str(&query->client_addr, address, sizeof(address));
    398 			log_msg(LOG_INFO, "ratelimit unblock ~ type %s target %s query %s %s (%s collision)",
    399 				rrltype2str(b->flags),
    400 				rrlsource2str(b->source, b->flags),
    401 				address, rrtype_to_string(query->qtype),
    402 				(b->hash!=hash?"bucket":"hash"));
    403 		}
    404 		b->hash = hash;
    405 		b->source = source;
    406 		b->flags = flags;
    407 		b->counter = 1;
    408 		b->rate = 0;
    409 		b->stamp = now;
    410 		return 1;
    411 	}
    412 	/* this is the same source */
    413 
    414 	/* check if old, zero or smooth it */
    415 	/* circular arith for time */
    416 	if(now - b->stamp == 1) {
    417 		/* very busy bucket and time just stepped one step */
    418 		int oldblock = used_to_block(b->rate, b->counter, lm);
    419 		b->rate = b->rate/2 + b->counter;
    420 		if(oldblock && b->rate < lm)
    421 			rrl_msg(query, "unblock");
    422 		b->counter = 1;
    423 		b->stamp = now;
    424 	} else if(now - b->stamp > 0) {
    425 		/* older bucket */
    426 		int olderblock = used_to_block(b->rate, b->counter, lm);
    427 		rrl_attenuate_bucket(b, now - b->stamp);
    428 		if(olderblock && b->rate < lm)
    429 			rrl_msg(query, "unblock");
    430 		b->counter = 1;
    431 		b->stamp = now;
    432 	} else if(now != b->stamp) {
    433 		/* robust, timestamp from the future */
    434 		if(used_to_block(b->rate, b->counter, lm))
    435 			rrl_msg(query, "unblock");
    436 		b->rate = 0;
    437 		b->counter = 1;
    438 		b->stamp = now;
    439 	} else {
    440 		/* bucket is from the current timestep, update counter */
    441 		b->counter ++;
    442 
    443 		/* log what is blocked for operational debugging */
    444 		if(b->counter + b->rate/2 == lm && b->rate < lm)
    445 			rrl_msg(query, "block");
    446 	}
    447 
    448 	/* return max from current rate and projected next-value for rate */
    449 	/* so that if the rate increases suddenly very high, it is
    450 	 * stopped halfway into the time step */
    451 	if(b->counter > b->rate/2)
    452 		return b->counter + b->rate/2;
    453 	return b->rate;
    454 }
    455 
    456 int rrl_process_query(query_type* query)
    457 {
    458 	uint64_t source;
    459 	uint32_t hash;
    460 	/* we can use circular arithmetic here, so int32 works after 2038 */
    461 	int32_t now = (int32_t)time(NULL);
    462 	uint32_t lm = rrl_ratelimit;
    463 	uint16_t flags;
    464 	if(rrl_ratelimit == 0 && rrl_whitelist_ratelimit == 0)
    465 		return 0;
    466 
    467 	/* examine query */
    468 	examine_query(query, &hash, &source, &flags, &lm);
    469 
    470 	if(lm == 0)
    471 		return 0; /* no limit for this */
    472 
    473 	/* update rate */
    474 	return (rrl_update(query, hash, source, flags, now, lm) >= lm);
    475 }
    476 
    477 query_state_type rrl_slip(query_type* query)
    478 {
    479 	/* discard number the packets, randomly */
    480 #ifdef HAVE_ARC4RANDOM_UNIFORM
    481 	if((rrl_slip_ratio > 0) && ((rrl_slip_ratio == 1) || ((arc4random_uniform(rrl_slip_ratio)) == 0))) {
    482 #elif HAVE_ARC4RANDOM
    483 	if((rrl_slip_ratio > 0) && ((rrl_slip_ratio == 1) || ((arc4random() % rrl_slip_ratio) == 0))) {
    484 #else
    485 	if((rrl_slip_ratio > 0) && ((rrl_slip_ratio == 1) || ((random() % rrl_slip_ratio) == 0))) {
    486 #endif
    487 		/* set TC on the rest */
    488 		TC_SET(query->packet);
    489 		ANCOUNT_SET(query->packet, 0);
    490 		NSCOUNT_SET(query->packet, 0);
    491 		ARCOUNT_SET(query->packet, 0);
    492 		if(query->qname)
    493 			/* header, type, class, qname */
    494 			buffer_set_position(query->packet,
    495 				QHEADERSZ+4+query->qname->name_size);
    496 		else 	buffer_set_position(query->packet, QHEADERSZ);
    497 		return QUERY_PROCESSED;
    498 	}
    499 	return QUERY_DISCARDED;
    500 }
    501 
    502 #endif /* RATELIMIT */
    503