Home | History | Annotate | Line # | Download | only in cache
      1 /*
      2  * services/cache/infra.h - infrastructure cache, server rtt and capabilities
      3  *
      4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
      5  *
      6  * This software is open source.
      7  *
      8  * Redistribution and use in source and binary forms, with or without
      9  * modification, are permitted provided that the following conditions
     10  * are met:
     11  *
     12  * Redistributions of source code must retain the above copyright notice,
     13  * this list of conditions and the following disclaimer.
     14  *
     15  * Redistributions in binary form must reproduce the above copyright notice,
     16  * this list of conditions and the following disclaimer in the documentation
     17  * and/or other materials provided with the distribution.
     18  *
     19  * Neither the name of the NLNET LABS nor the names of its contributors may
     20  * be used to endorse or promote products derived from this software without
     21  * specific prior written permission.
     22  *
     23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     27  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
     29  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     30  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
     31  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
     32  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     34  */
     35 
     36 /**
     37  * \file
     38  *
     39  * This file contains the infrastructure cache, as well as rate limiting.
     40  * Note that there are two sorts of rate-limiting here:
     41  *  - Pre-cache, per-query rate limiting (query ratelimits)
     42  *  - Post-cache, per-domain name rate limiting (infra-ratelimits)
     43  */
     44 
     45 #ifndef SERVICES_CACHE_INFRA_H
     46 #define SERVICES_CACHE_INFRA_H
     47 #include "util/storage/lruhash.h"
     48 #include "util/storage/dnstree.h"
     49 #include "util/rtt.h"
     50 #include "util/netevent.h"
     51 #include "util/data/msgreply.h"
     52 struct slabhash;
     53 struct config_file;
     54 
     55 /** number of timeouts for a type when the domain can be blocked ;
     56  * even if another type has completely rtt maxed it, the different type
     57  * can do this number of packets (until those all timeout too) */
     58 #define TIMEOUT_COUNT_MAX 3
     59 
     60 
     61 /** Timeout when only a single probe query per IP is allowed.
     62  *  Any RTO above this number is considered a probe.
     63  *  It is synchronized (caped) with USEFUL_SERVER_TOP_TIMEOUT so that probing
     64  *  keeps working even if that configurable number drops below the default
     65  *  12000 ms of probing. */
     66 extern int PROBE_MAXRTO;
     67 
     68 /**
     69  * Host information kept for every server, per zone.
     70  */
     71 struct infra_key {
     72 	/** the host address. */
     73 	struct sockaddr_storage addr;
     74 	/** length of addr. */
     75 	socklen_t addrlen;
     76 	/** zone name in wireformat */
     77 	uint8_t* zonename;
     78 	/** length of zonename */
     79 	size_t namelen;
     80 	/** hash table entry, data of type infra_data. */
     81 	struct lruhash_entry entry;
     82 };
     83 
     84 /**
     85  * Host information encompasses host capabilities and retransmission timeouts.
     86  * And lameness information (notAuthoritative, noEDNS, Recursive)
     87  */
     88 struct infra_data {
     89 	/** TTL value for this entry. absolute time. */
     90 	time_t ttl;
     91 
     92 	/** time in seconds (absolute) when probing re-commences, 0 disabled */
     93 	time_t probedelay;
     94 	/** round trip times for timeout calculation */
     95 	struct rtt_info rtt;
     96 
     97 	/** edns version that the host supports, -1 means no EDNS */
     98 	int edns_version;
     99 	/** if the EDNS lameness is already known or not.
    100 	 * EDNS lame is when EDNS queries or replies are dropped,
    101 	 * and cause a timeout */
    102 	uint8_t edns_lame_known;
    103 
    104 	/** is the host lame (does not serve the zone authoritatively),
    105 	 * or is the host dnssec lame (does not serve DNSSEC data) */
    106 	uint8_t isdnsseclame;
    107 	/** is the host recursion lame (not AA, but RA) */
    108 	uint8_t rec_lame;
    109 	/** the host is lame (not authoritative) for A records */
    110 	uint8_t lame_type_A;
    111 	/** the host is lame (not authoritative) for other query types */
    112 	uint8_t lame_other;
    113 
    114 	/** timeouts counter for type A */
    115 	uint8_t timeout_A;
    116 	/** timeouts counter for type AAAA */
    117 	uint8_t timeout_AAAA;
    118 	/** timeouts counter for others */
    119 	uint8_t timeout_other;
    120 };
    121 
    122 /**
    123  * Infra cache
    124  */
    125 struct infra_cache {
    126 	/** The hash table with hosts */
    127 	struct slabhash* hosts;
    128 	/** TTL value for host information, in seconds */
    129 	int host_ttl;
    130 	/** the hosts that are down are kept probed for recovery */
    131 	int infra_keep_probing;
    132 	/** hash table with query rates per name: rate_key, rate_data */
    133 	struct slabhash* domain_rates;
    134 	/** ratelimit settings for domains, struct domain_limit_data */
    135 	rbtree_type domain_limits;
    136 	/** hash table with query rates per client ip: ip_rate_key, ip_rate_data */
    137 	struct slabhash* client_ip_rates;
    138 	/** tree of addr_tree_node, with wait_limit_netblock_info information */
    139 	rbtree_type wait_limits_netblock;
    140 	/** tree of addr_tree_node, with wait_limit_netblock_info information */
    141 	rbtree_type wait_limits_cookie_netblock;
    142 };
    143 
    144 /** ratelimit, unless overridden by domain_limits, 0 is off */
    145 extern int infra_dp_ratelimit;
    146 
    147 /**
    148  * ratelimit settings for domains
    149  */
    150 struct domain_limit_data {
    151 	/** key for rbtree, must be first in struct, name of domain */
    152 	struct name_tree_node node;
    153 	/** ratelimit for exact match with this name, -1 if not set */
    154 	int lim;
    155 	/** ratelimit for names below this name, -1 if not set */
    156 	int below;
    157 };
    158 
    159 /**
    160  * key for ratelimit lookups, a domain name
    161  */
    162 struct rate_key {
    163 	/** lruhash key entry */
    164 	struct lruhash_entry entry;
    165 	/** domain name in uncompressed wireformat */
    166 	uint8_t* name;
    167 	/** length of name */
    168 	size_t namelen;
    169 };
    170 
    171 /** ip ratelimit, 0 is off */
    172 extern int infra_ip_ratelimit;
    173 /** ip ratelimit for DNS Cookie clients, 0 is off */
    174 extern int infra_ip_ratelimit_cookie;
    175 
    176 /**
    177  * key for ip_ratelimit lookups, a source IP.
    178  */
    179 struct ip_rate_key {
    180 	/** lruhash key entry */
    181 	struct lruhash_entry entry;
    182 	/** client ip information */
    183 	struct sockaddr_storage addr;
    184 	/** length of address */
    185 	socklen_t addrlen;
    186 };
    187 
    188 /** number of seconds to track qps rate */
    189 #define RATE_WINDOW 2
    190 
    191 /**
    192  * Data for ratelimits per domain name
    193  * It is incremented when a non-cache-lookup happens for that domain name.
    194  * The name is the delegation point we have for the name.
    195  * If a new delegation point is found (a referral reply), the previous
    196  * delegation point is decremented, and the new one is charged with the query.
    197  */
    198 struct rate_data {
    199 	/** queries counted, for that second. 0 if not in use. */
    200 	int qps[RATE_WINDOW];
    201 	/** what the timestamp is of the qps array members, counter is
    202 	 * valid for that timestamp.  Usually now and now-1. */
    203 	time_t timestamp[RATE_WINDOW];
    204 	/** the number of queries waiting in the mesh */
    205 	int mesh_wait;
    206 };
    207 
    208 #define ip_rate_data rate_data
    209 
    210 /**
    211  * Data to store the configuration per netblock for the wait limit
    212  */
    213 struct wait_limit_netblock_info {
    214 	/** The addr tree node, this must be first. */
    215 	struct addr_tree_node node;
    216 	/** the limit on the amount */
    217 	int limit;
    218 };
    219 
    220 /** infra host cache default hash lookup size */
    221 #define INFRA_HOST_STARTSIZE 32
    222 /** bytes per zonename reserved in the hostcache, dnamelen(zonename.com.) */
    223 #define INFRA_BYTES_NAME 14
    224 
    225 /**
    226  * Create infra cache.
    227  * @param cfg: config parameters or NULL for defaults.
    228  * @return: new infra cache, or NULL.
    229  */
    230 struct infra_cache* infra_create(struct config_file* cfg);
    231 
    232 /**
    233  * Delete infra cache.
    234  * @param infra: infrastructure cache to delete.
    235  */
    236 void infra_delete(struct infra_cache* infra);
    237 
    238 /**
    239  * Adjust infra cache to use updated configuration settings.
    240  * This may clean the cache. Operates a bit like realloc.
    241  * There may be no threading or use by other threads.
    242  * @param infra: existing cache. If NULL a new infra cache is returned.
    243  * @param cfg: config options.
    244  * @return the new infra cache pointer or NULL on error.
    245  */
    246 struct infra_cache* infra_adjust(struct infra_cache* infra,
    247 	struct config_file* cfg);
    248 
    249 /**
    250  * Plain find infra data function (used by the other functions)
    251  * @param infra: infrastructure cache.
    252  * @param addr: host address.
    253  * @param addrlen: length of addr.
    254  * @param name: domain name of zone.
    255  * @param namelen: length of domain name.
    256  * @param wr: if true, writelock, else readlock.
    257  * @return the entry, could be expired (this is not checked) or NULL.
    258  */
    259 struct lruhash_entry* infra_lookup_nottl(struct infra_cache* infra,
    260 	struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* name,
    261 	size_t namelen, int wr);
    262 
    263 /**
    264  * Find host information to send a packet. Creates new entry if not found.
    265  * Lameness is empty. EDNS is 0 (try with first), and rtt is returned for
    266  * the first message to it.
    267  * Use this to send a packet only, because it also locks out others when
    268  * probing is restricted.
    269  * @param infra: infrastructure cache.
    270  * @param addr: host address.
    271  * @param addrlen: length of addr.
    272  * @param name: domain name of zone.
    273  * @param namelen: length of domain name.
    274  * @param timenow: what time it is now.
    275  * @param edns_vs: edns version it supports, is returned.
    276  * @param edns_lame_known: if EDNS lame (EDNS is dropped in transit) has
    277  * 	already been probed, is returned.
    278  * @param to: timeout to use, is returned.
    279  * @return: 0 on error.
    280  */
    281 int infra_host(struct infra_cache* infra, struct sockaddr_storage* addr,
    282 	socklen_t addrlen, uint8_t* name, size_t namelen,
    283 	time_t timenow, int* edns_vs, uint8_t* edns_lame_known, int* to);
    284 
    285 /**
    286  * Set a host to be lame for the given zone.
    287  * @param infra: infrastructure cache.
    288  * @param addr: host address.
    289  * @param addrlen: length of addr.
    290  * @param name: domain name of zone apex.
    291  * @param namelen: length of domain name.
    292  * @param timenow: what time it is now.
    293  * @param dnsseclame: if true the host is set dnssec lame.
    294  *	if false, the host is marked lame (not serving the zone).
    295  * @param reclame: if true host is a recursor not AA server.
    296  *      if false, dnsseclame or marked lame.
    297  * @param qtype: the query type for which it is lame.
    298  * @return: 0 on error.
    299  */
    300 int infra_set_lame(struct infra_cache* infra,
    301         struct sockaddr_storage* addr, socklen_t addrlen,
    302 	uint8_t* name, size_t namelen, time_t timenow, int dnsseclame,
    303 	int reclame, uint16_t qtype);
    304 
    305 /**
    306  * Update rtt information for the host.
    307  * @param infra: infrastructure cache.
    308  * @param addr: host address.
    309  * @param addrlen: length of addr.
    310  * @param name: zone name
    311  * @param namelen: zone name length
    312  * @param qtype: query type.
    313  * @param roundtrip: estimate of roundtrip time in milliseconds or -1 for
    314  * 	timeout.
    315  * @param orig_rtt: original rtt for the query that timed out (roundtrip==-1).
    316  * 	ignored if roundtrip != -1.
    317  * @param timenow: what time it is now.
    318  * @return: 0 on error. new rto otherwise.
    319  */
    320 int infra_rtt_update(struct infra_cache* infra, struct sockaddr_storage* addr,
    321 	socklen_t addrlen, uint8_t* name, size_t namelen, int qtype,
    322 	int roundtrip, int orig_rtt, time_t timenow);
    323 
    324 /**
    325  * Update information for the host, store that a TCP transaction works.
    326  * @param infra: infrastructure cache.
    327  * @param addr: host address.
    328  * @param addrlen: length of addr.
    329  * @param name: name of zone
    330  * @param namelen: length of name
    331  */
    332 void infra_update_tcp_works(struct infra_cache* infra,
    333         struct sockaddr_storage* addr, socklen_t addrlen,
    334 	uint8_t* name, size_t namelen);
    335 
    336 /**
    337  * Update edns information for the host.
    338  * @param infra: infrastructure cache.
    339  * @param addr: host address.
    340  * @param addrlen: length of addr.
    341  * @param name: name of zone
    342  * @param namelen: length of name
    343  * @param edns_version: the version that it publishes.
    344  * 	If it is known to support EDNS then no-EDNS is not stored over it.
    345  * @param timenow: what time it is now.
    346  * @return: 0 on error.
    347  */
    348 int infra_edns_update(struct infra_cache* infra,
    349         struct sockaddr_storage* addr, socklen_t addrlen,
    350 	uint8_t* name, size_t namelen, int edns_version, time_t timenow);
    351 
    352 /**
    353  * Get Lameness information and average RTT if host is in the cache.
    354  * This information is to be used for server selection.
    355  * @param infra: infrastructure cache.
    356  * @param addr: host address.
    357  * @param addrlen: length of addr.
    358  * @param name: zone name.
    359  * @param namelen: zone name length.
    360  * @param qtype: the query to be made.
    361  * @param lame: if function returns true, this returns lameness of the zone.
    362  * @param dnsseclame: if function returns true, this returns if the zone
    363  *	is dnssec-lame.
    364  * @param reclame: if function returns true, this is if it is recursion lame.
    365  * @param rtt: if function returns true, this returns avg rtt of the server.
    366  * 	The rtt value is unclamped and reflects recent timeouts.
    367  * @param timenow: what time it is now.
    368  * @return if found in cache, or false if not (or TTL bad).
    369  */
    370 int infra_get_lame_rtt(struct infra_cache* infra,
    371         struct sockaddr_storage* addr, socklen_t addrlen,
    372 	uint8_t* name, size_t namelen, uint16_t qtype,
    373 	int* lame, int* dnsseclame, int* reclame, int* rtt, time_t timenow);
    374 
    375 /**
    376  * Get additional (debug) info on timing.
    377  * @param infra: infra cache.
    378  * @param addr: host address.
    379  * @param addrlen: length of addr.
    380  * @param name: zone name
    381  * @param namelen: zone name length
    382  * @param rtt: the rtt_info is copied into here (caller alloced return struct).
    383  * @param delay: probe delay (if any).
    384  * @param timenow: what time it is now.
    385  * @param tA: timeout counter on type A.
    386  * @param tAAAA: timeout counter on type AAAA.
    387  * @param tother: timeout counter on type other.
    388  * @return TTL the infra host element is valid for. If -1: not found in cache.
    389  *	TTL -2: found but expired.
    390  */
    391 long long infra_get_host_rto(struct infra_cache* infra,
    392         struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* name,
    393 	size_t namelen, struct rtt_info* rtt, int* delay, time_t timenow,
    394 	int* tA, int* tAAAA, int* tother);
    395 
    396 /**
    397  * Increment the query rate counter for a delegation point.
    398  * @param infra: infra cache.
    399  * @param name: zone name
    400  * @param namelen: zone name length
    401  * @param timenow: what time it is now.
    402  * @param backoff: if backoff is enabled.
    403  * @param qinfo: for logging, query name.
    404  * @param replylist: for logging, querier's address (if any).
    405  * @return 1 if it could be incremented. 0 if the increment overshot the
    406  * ratelimit or if in the previous second the ratelimit was exceeded.
    407  * Failures like alloc failures are not returned (probably as 1).
    408  */
    409 int infra_ratelimit_inc(struct infra_cache* infra, uint8_t* name,
    410 	size_t namelen, time_t timenow, int backoff, struct query_info* qinfo,
    411 	struct comm_reply* replylist);
    412 
    413 /**
    414  * Decrement the query rate counter for a delegation point.
    415  * Because the reply received for the delegation point was pleasant,
    416  * we do not charge this delegation point with it (i.e. it was a referral).
    417  * Should call it with same second as when inc() was called.
    418  * @param infra: infra cache.
    419  * @param name: zone name
    420  * @param namelen: zone name length
    421  * @param timenow: what time it is now.
    422  */
    423 void infra_ratelimit_dec(struct infra_cache* infra, uint8_t* name,
    424 	size_t namelen, time_t timenow);
    425 
    426 /**
    427  * See if the query rate counter for a delegation point is exceeded.
    428  * So, no queries are going to be allowed.
    429  * @param infra: infra cache.
    430  * @param name: zone name
    431  * @param namelen: zone name length
    432  * @param timenow: what time it is now.
    433  * @param backoff: if backoff is enabled.
    434  * @return true if exceeded.
    435  */
    436 int infra_ratelimit_exceeded(struct infra_cache* infra, uint8_t* name,
    437 	size_t namelen, time_t timenow, int backoff);
    438 
    439 /** find the maximum rate stored. 0 if no information.
    440  *  When backoff is enabled look for the maximum in the whole RATE_WINDOW. */
    441 int infra_rate_max(void* data, time_t now, int backoff);
    442 
    443 /** find the ratelimit in qps for a domain. 0 if no limit for domain. */
    444 int infra_find_ratelimit(struct infra_cache* infra, uint8_t* name,
    445 	size_t namelen);
    446 
    447 /** Update query ratelimit hash and decide
    448  *  whether or not a query should be dropped.
    449  *  @param infra: infra cache
    450  *  @param addr: client address
    451  *  @param addrlen: client address length
    452  *  @param timenow: what time it is now.
    453  *  @param has_cookie: if the request came with a DNS Cookie.
    454  *  @param backoff: if backoff is enabled.
    455  *  @param buffer: with query for logging.
    456  *  @return 1 if it could be incremented. 0 if the increment overshot the
    457  *  ratelimit and the query should be dropped. */
    458 int infra_ip_ratelimit_inc(struct infra_cache* infra,
    459 	struct sockaddr_storage* addr, socklen_t addrlen, time_t timenow,
    460 	int has_cookie, int backoff, struct sldns_buffer* buffer);
    461 
    462 /**
    463  * Get memory used by the infra cache.
    464  * @param infra: infrastructure cache.
    465  * @return memory in use in bytes.
    466  */
    467 size_t infra_get_mem(struct infra_cache* infra);
    468 
    469 /** calculate size for the hashtable, does not count size of lameness,
    470  * so the hashtable is a fixed number of items */
    471 size_t infra_sizefunc(void* k, void* d);
    472 
    473 /** compare two addresses, returns -1, 0, or +1 */
    474 int infra_compfunc(void* key1, void* key2);
    475 
    476 /** delete key, and destroy the lock */
    477 void infra_delkeyfunc(void* k, void* arg);
    478 
    479 /** delete data and destroy the lameness hashtable */
    480 void infra_deldatafunc(void* d, void* arg);
    481 
    482 /** calculate size for the hashtable */
    483 size_t rate_sizefunc(void* k, void* d);
    484 
    485 /** compare two names, returns -1, 0, or +1 */
    486 int rate_compfunc(void* key1, void* key2);
    487 
    488 /** delete key, and destroy the lock */
    489 void rate_delkeyfunc(void* k, void* arg);
    490 
    491 /** delete data */
    492 void rate_deldatafunc(void* d, void* arg);
    493 
    494 /* calculate size for the client ip hashtable */
    495 size_t ip_rate_sizefunc(void* k, void* d);
    496 
    497 /* compare two addresses */
    498 int ip_rate_compfunc(void* key1, void* key2);
    499 
    500 /* delete key, and destroy the lock */
    501 void ip_rate_delkeyfunc(void* d, void* arg);
    502 
    503 /* delete data */
    504 #define ip_rate_deldatafunc rate_deldatafunc
    505 
    506 /** See if the IP address can have another reply in the wait limit */
    507 int infra_wait_limit_allowed(struct infra_cache* infra, struct comm_reply* rep,
    508 	int cookie_valid, struct config_file* cfg);
    509 
    510 /** Increment number of waiting replies for IP */
    511 void infra_wait_limit_inc(struct infra_cache* infra, struct comm_reply* rep,
    512 	time_t timenow, struct config_file* cfg);
    513 
    514 /** Decrement number of waiting replies for IP */
    515 void infra_wait_limit_dec(struct infra_cache* infra, struct comm_reply* rep,
    516 	struct config_file* cfg);
    517 
    518 /** setup wait limits tree (0 on failure) */
    519 int setup_wait_limits(struct rbtree_type* wait_limits_netblock,
    520 	struct rbtree_type* wait_limits_cookie_netblock,
    521 	struct config_file* cfg);
    522 
    523 /** Free the wait limits and wait cookie limits tree. */
    524 void wait_limits_free(struct rbtree_type* wait_limits_tree);
    525 
    526 /** setup domain limits tree (0 on failure) */
    527 int setup_domain_limits(struct rbtree_type* domain_limits,
    528 	struct config_file* cfg);
    529 
    530 /** Free the domain limits tree. */
    531 void domain_limits_free(struct rbtree_type* domain_limits);
    532 
    533 /** exported for unit test */
    534 int still_useful_timeout();
    535 
    536 #endif /* SERVICES_CACHE_INFRA_H */
    537