Home | History | Annotate | Line # | Download | only in iterator
      1 /*
      2  * iterator/iterator.h - iterative resolver DNS query response module
      3  *
      4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
      5  *
      6  * This software is open source.
      7  *
      8  * Redistribution and use in source and binary forms, with or without
      9  * modification, are permitted provided that the following conditions
     10  * are met:
     11  *
     12  * Redistributions of source code must retain the above copyright notice,
     13  * this list of conditions and the following disclaimer.
     14  *
     15  * Redistributions in binary form must reproduce the above copyright notice,
     16  * this list of conditions and the following disclaimer in the documentation
     17  * and/or other materials provided with the distribution.
     18  *
     19  * Neither the name of the NLNET LABS nor the names of its contributors may
     20  * be used to endorse or promote products derived from this software without
     21  * specific prior written permission.
     22  *
     23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     27  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
     29  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     30  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
     31  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
     32  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     34  */
     35 
     36 /**
     37  * \file
     38  *
     39  * This file contains a module that performs recursive iterative DNS query
     40  * processing.
     41  */
     42 
     43 #ifndef ITERATOR_ITERATOR_H
     44 #define ITERATOR_ITERATOR_H
     45 #include "services/outbound_list.h"
     46 #include "util/data/msgreply.h"
     47 #include "util/module.h"
     48 struct delegpt;
     49 struct iter_donotq;
     50 struct iter_prep_list;
     51 struct iter_priv;
     52 struct rbtree_type;
     53 
     54 /** max number of targets spawned for a query and its subqueries */
     55 #define MAX_TARGET_COUNT	64
     56 /** max number of upstream queries for a query and its subqueries, it is
     57  * never reset. */
     58 extern int MAX_GLOBAL_QUOTA;
     59 /** max number of target lookups per qstate, per delegation point */
     60 #define MAX_DP_TARGET_COUNT	16
     61 /** max number of nxdomains allowed for target lookups for a query and
     62  * its subqueries */
     63 #define MAX_TARGET_NX		5
     64 /** max number of nxdomains allowed for target lookups for a query and
     65  * its subqueries when fallback has kicked in */
     66 #define MAX_TARGET_NX_FALLBACK	(MAX_TARGET_NX*2)
     67 /** max number of referrals. Makes sure resolver does not run away */
     68 #define MAX_REFERRAL_COUNT	130
     69 /** max number of queries for which to perform dnsseclameness detection,
     70  * (rrsigs missing detection) after that, just pick up that response */
     71 #define DNSSEC_LAME_DETECT_COUNT 4
     72 /**
     73  * max number of QNAME minimisation iterations. Limits number of queries for
     74  * QNAMEs with a lot of labels.
     75 */
     76 #define MAX_MINIMISE_COUNT	10
     77 /* max number of time-outs for minimised query. Prevents resolving failures
     78  * when the QNAME minimisation QTYPE is blocked. */
     79 #define MAX_MINIMISE_TIMEOUT_COUNT 3
     80 /**
     81  * number of labels from QNAME that are always send individually when using
     82  * QNAME minimisation, even when the number of labels of the QNAME is bigger
     83  * than MAX_MINIMISE_COUNT */
     84 #define MINIMISE_ONE_LAB	4
     85 #define MINIMISE_MULTIPLE_LABS	(MAX_MINIMISE_COUNT - MINIMISE_ONE_LAB)
     86 /** at what query-sent-count to stop target fetch policy */
     87 #define TARGET_FETCH_STOP	3
     88 /** how nice is a server without further information, in msec
     89  * Equals rtt initial timeout value.
     90  */
     91 extern int UNKNOWN_SERVER_NICENESS;
     92 /** maximum timeout before a host is deemed unsuitable, in msec.
     93  * After host_ttl this will be timed out and the host will be tried again.
     94  * Equals RTT_MAX_TIMEOUT, and thus when RTT_MAX_TIMEOUT is overwritten by
     95  * config infra_cache_max_rtt, it will be overwritten as well. */
     96 extern int USEFUL_SERVER_TOP_TIMEOUT;
     97 /** penalty to validation failed blacklisted IPs
     98  * Equals USEFUL_SERVER_TOP_TIMEOUT*4, and thus when RTT_MAX_TIMEOUT is
     99  * overwritten by config infra_cache_max_rtt, it will be overwritten as well. */
    100 extern int BLACKLIST_PENALTY;
    101 /** RTT band, within this amount from the best, servers are chosen randomly.
    102  * Chosen so that the UNKNOWN_SERVER_NICENESS falls within the band of a
    103  * fast server, this causes server exploration as a side benefit. msec. */
    104 #define RTT_BAND 400
    105 /** Number of retries for empty nodata packets before it is accepted. */
    106 #define EMPTY_NODATA_RETRY_COUNT 2
    107 
    108 /**
    109  * Iterator global state for nat64.
    110  */
    111 struct iter_nat64 {
    112 	/** A flag to locally apply NAT64 to make IPv4 addrs into IPv6 */
    113 	int use_nat64;
    114 
    115 	/** NAT64 prefix address, cf. dns64_env->prefix_addr */
    116 	struct sockaddr_storage nat64_prefix_addr;
    117 
    118 	/** sizeof(sockaddr_in6) */
    119 	socklen_t nat64_prefix_addrlen;
    120 
    121 	/** CIDR mask length of NAT64 prefix */
    122 	int nat64_prefix_net;
    123 };
    124 
    125 /**
    126  * Global state for the iterator.
    127  */
    128 struct iter_env {
    129 	/** A flag to indicate whether or not we have an IPv6 route */
    130 	int supports_ipv6;
    131 
    132 	/** A flag to indicate whether or not we have an IPv4 route */
    133 	int supports_ipv4;
    134 
    135 	/** State for nat64 */
    136 	struct iter_nat64 nat64;
    137 
    138 	/** A set of inetaddrs that should never be queried. */
    139 	struct iter_donotq* donotq;
    140 
    141 	/** private address space and private domains */
    142 	struct iter_priv* priv;
    143 
    144 	/** whitelist for capsforid names */
    145 	struct rbtree_type* caps_white;
    146 
    147 	/** The maximum dependency depth that this resolver will pursue. */
    148 	int max_dependency_depth;
    149 
    150 	/**
    151 	 * The target fetch policy for each dependency level. This is
    152 	 * described as a simple number (per dependency level):
    153 	 *	negative numbers (usually just -1) mean fetch-all,
    154 	 *	0 means only fetch on demand, and
    155 	 *	positive numbers mean to fetch at most that many targets.
    156 	 * array of max_dependency_depth+1 size.
    157 	 */
    158 	int* target_fetch_policy;
    159 
    160 	/** lock on ratelimit counter */
    161 	lock_basic_type queries_ratelimit_lock;
    162 	/** number of queries that have been ratelimited */
    163 	size_t num_queries_ratelimited;
    164 
    165 	/** number of retries on outgoing queries */
    166 	int outbound_msg_retry;
    167 
    168 	/** number of queries_sent */
    169 	int max_sent_count;
    170 
    171 	/** max number of query restarts to limit length of CNAME chain */
    172 	int max_query_restarts;
    173 };
    174 
    175 /**
    176  * QNAME minimisation state
    177  */
    178 enum minimisation_state {
    179 	/**
    180 	 * (Re)start minimisation. Outgoing QNAME should be set to dp->name.
    181 	 * State entered on new query or after following referral or CNAME.
    182 	 */
    183 	INIT_MINIMISE_STATE = 0,
    184 	/**
    185 	 * QNAME minimisation ongoing. Increase QNAME on every iteration.
    186 	 */
    187 	MINIMISE_STATE,
    188 	/**
    189 	 * Don't increment QNAME this iteration
    190 	 */
    191 	SKIP_MINIMISE_STATE,
    192 	/**
    193 	 * Send out full QNAME + original QTYPE
    194 	 */
    195 	DONOT_MINIMISE_STATE,
    196 };
    197 
    198 /**
    199  * State of the iterator for a query.
    200  */
    201 enum iter_state {
    202 	/**
    203 	 * Externally generated queries start at this state. Query restarts are
    204 	 * reset to this state.
    205 	 */
    206 	INIT_REQUEST_STATE = 0,
    207 
    208 	/**
    209 	 * Root priming events reactivate here, most other events pass
    210 	 * through this naturally as the 2nd part of the INIT_REQUEST_STATE.
    211 	 */
    212 	INIT_REQUEST_2_STATE,
    213 
    214 	/**
    215 	 * Stub priming events reactivate here, most other events pass
    216 	 * through this naturally as the 3rd part of the INIT_REQUEST_STATE.
    217 	 */
    218 	INIT_REQUEST_3_STATE,
    219 
    220 	/**
    221 	 * Each time a delegation point changes for a given query or a
    222 	 * query times out and/or wakes up, this state is (re)visited.
    223 	 * This state is responsible for iterating through a list of
    224 	 * nameserver targets.
    225 	 */
    226 	QUERYTARGETS_STATE,
    227 
    228 	/**
    229 	 * Responses to queries start at this state. This state handles
    230 	 * the decision tree associated with handling responses.
    231 	 */
    232 	QUERY_RESP_STATE,
    233 
    234 	/** Responses to priming queries finish at this state. */
    235 	PRIME_RESP_STATE,
    236 
    237 	/** Collecting query class information, for qclass=ANY, when
    238 	 * it spawns off queries for every class, it returns here. */
    239 	COLLECT_CLASS_STATE,
    240 
    241 	/** Find NS record to resolve DS record from, walking to the right
    242 	 * NS spot until we find it */
    243 	DSNS_FIND_STATE,
    244 
    245 	/** Responses that are to be returned upstream end at this state.
    246 	 * As well as responses to target queries. */
    247 	FINISHED_STATE
    248 };
    249 
    250 /**
    251  * Shared counters for queries.
    252  */
    253 enum target_count_variables {
    254 	/** Reference count for the shared iter_qstate->target_count. */
    255 	TARGET_COUNT_REF = 0,
    256 	/** Number of target queries spawned for the query and subqueries. */
    257 	TARGET_COUNT_QUERIES,
    258 	/** Number of nxdomain responses encountered. */
    259 	TARGET_COUNT_NX,
    260 	/** Global quota on number of queries to upstream servers per
    261 	 * client request, that is never reset. */
    262 	TARGET_COUNT_GLOBAL_QUOTA,
    263 
    264 	/** This should stay last here, it is used for the allocation */
    265 	TARGET_COUNT_MAX,
    266 };
    267 
    268 /**
    269  * Per query state for the iterator module.
    270  */
    271 struct iter_qstate {
    272 	/**
    273 	 * State of the iterator module.
    274 	 * This is the state that event is in or should sent to -- all
    275 	 * requests should start with the INIT_REQUEST_STATE. All
    276 	 * responses should start with QUERY_RESP_STATE. Subsequent
    277 	 * processing of the event will change this state.
    278 	 */
    279 	enum iter_state state;
    280 
    281 	/**
    282 	 * Final state for the iterator module.
    283 	 * This is the state that responses should be routed to once the
    284 	 * response is final. For externally initiated queries, this
    285 	 * will be FINISHED_STATE, locally initiated queries will have
    286 	 * different final states.
    287 	 */
    288 	enum iter_state final_state;
    289 
    290 	/**
    291 	 * The depth of this query, this means the depth of recursion.
    292 	 * This address is needed for another query, which is an address
    293 	 * needed for another query, etc. Original client query has depth 0.
    294 	 */
    295 	int depth;
    296 
    297 	/**
    298 	 * The response
    299 	 */
    300 	struct dns_msg* response;
    301 
    302 	/**
    303 	 * This is a list of RRsets that must be prepended to the
    304 	 * ANSWER section of a response before being sent upstream.
    305 	 */
    306 	struct iter_prep_list* an_prepend_list;
    307 	/** Last element of the prepend list */
    308 	struct iter_prep_list* an_prepend_last;
    309 
    310 	/**
    311 	 * This is the list of RRsets that must be prepended to the
    312 	 * AUTHORITY section of the response before being sent upstream.
    313 	 */
    314 	struct iter_prep_list* ns_prepend_list;
    315 	/** Last element of the authority prepend list */
    316 	struct iter_prep_list* ns_prepend_last;
    317 
    318 	/** query name used for chasing the results. Initially the same as
    319 	 * the state qinfo, but after CNAMEs this will be different.
    320 	 * The query info used to elicit the results needed. */
    321 	struct query_info qchase;
    322 	/** query flags to use when chasing the answer (i.e. RD flag) */
    323 	uint16_t chase_flags;
    324 	/** true if we set RD bit because of last resort recursion lame query*/
    325 	int chase_to_rd;
    326 
    327 	/**
    328 	 * This is the current delegation point for an in-progress query. This
    329 	 * object retains state as to which delegation targets need to be
    330 	 * (sub)queried for vs which ones have already been visited.
    331 	 */
    332 	struct delegpt* dp;
    333 
    334 	/** state for 0x20 fallback when capsfail happens, 0 not a fallback */
    335 	int caps_fallback;
    336 	/** state for capsfail: current server number to try */
    337 	size_t caps_server;
    338 	/** state for capsfail: stored query for comparisons. Can be NULL if
    339 	 * no response had been seen prior to starting the fallback. */
    340 	struct reply_info* caps_reply;
    341 	struct dns_msg* caps_response;
    342 
    343 	/** Current delegation message - returned for non-RD queries */
    344 	struct dns_msg* deleg_msg;
    345 
    346 	/** number of outstanding target sub queries */
    347 	int num_target_queries;
    348 
    349 	/** outstanding direct queries */
    350 	int num_current_queries;
    351 
    352 	/** the number of times this query has been restarted. */
    353 	int query_restart_count;
    354 
    355 	/** the number of times this query has followed a referral. */
    356 	int referral_count;
    357 
    358 	/** number of queries fired off */
    359 	int sent_count;
    360 
    361 	/** malloced-array shared with this query and its subqueries. It keeps
    362 	 * track of the defined enum target_count_variables counters. */
    363 	int* target_count;
    364 
    365 	/** number of target lookups per delegation point. Reset to 0 after
    366 	 * receiving referral answer. Not shared with subqueries. */
    367 	int dp_target_count;
    368 
    369 	/** Delegation point that triggered the NXNS fallback; shared with
    370 	 * this query and its subqueries, count-referenced by the reference
    371 	 * counter in target_count.
    372 	 * This also marks the fallback activation. */
    373 	uint8_t** nxns_dp;
    374 
    375 	/** if true, already tested for ratelimiting and passed the test */
    376 	int ratelimit_ok;
    377 
    378 	/**
    379 	 * The query must store NS records from referrals as parentside RRs
    380 	 * Enabled once it hits resolution problems, to throttle retries.
    381 	 * If enabled it is the pointer to the old delegation point with
    382 	 * the old retry counts for bad-nameserver-addresses.
    383 	 */
    384 	struct delegpt* store_parent_NS;
    385 
    386 	/**
    387 	 * The query is for parent-side glue(A or AAAA) for a nameserver.
    388 	 * If the item is seen as glue in a referral, and pside_glue is NULL,
    389 	 * then it is stored in pside_glue for later.
    390 	 * If it was never seen, at the end, then a negative caching element
    391 	 * must be created.
    392 	 * The (data or negative) RR cache element then throttles retries.
    393 	 */
    394 	int query_for_pside_glue;
    395 	/** the parent-side-glue element (NULL if none, its first match) */
    396 	struct ub_packed_rrset_key* pside_glue;
    397 
    398 	/** If nonNULL we are walking upwards from DS query to find NS */
    399 	uint8_t* dsns_point;
    400 	/** length of the dname in dsns_point */
    401 	size_t dsns_point_len;
    402 
    403 	/**
    404 	 * expected dnssec information for this iteration step.
    405 	 * If dnssec rrsigs are expected and not given, the server is marked
    406 	 * lame (dnssec-lame).
    407 	 */
    408 	int dnssec_expected;
    409 
    410 	/**
    411 	 * We are expecting dnssec information, but we also know the server
    412 	 * is DNSSEC lame.  The response need not be marked dnssec-lame again.
    413 	 */
    414 	int dnssec_lame_query;
    415 
    416 	/**
    417 	 * This is flag that, if true, means that this event is
    418 	 * waiting for a stub priming query.
    419 	 */
    420 	int wait_priming_stub;
    421 
    422 	/**
    423 	 * This is a flag that, if true, means that this query is
    424 	 * for (re)fetching glue from a zone. Since the address should
    425 	 * have been glue, query again to the servers that should have
    426 	 * been returning it as glue.
    427 	 * The delegation point must be set to the one that should *not*
    428 	 * be used when creating the state. A higher one will be attempted.
    429 	 */
    430 	int refetch_glue;
    431 
    432 	/**
    433 	 * This flag detects that a completely empty nodata was received,
    434 	 * already so that it is accepted later. */
    435 	int empty_nodata_found;
    436 
    437 	/** list of pending queries to authoritative servers. */
    438 	struct outbound_list outlist;
    439 
    440 	/** QNAME minimisation state, RFC9156 */
    441 	enum minimisation_state minimisation_state;
    442 
    443 	/** State for capsfail: QNAME minimisation state for comparisons. */
    444 	enum minimisation_state caps_minimisation_state;
    445 
    446 	/**
    447 	 * The query info that is sent upstream. Will be a subset of qchase
    448 	 * when qname minimisation is enabled.
    449 	 */
    450 	struct query_info qinfo_out;
    451 
    452 	/**
    453 	 * Count number of QNAME minimisation iterations. Used to limit number of
    454 	 * outgoing queries when QNAME minimisation is enabled.
    455 	 */
    456 	int minimise_count;
    457 
    458 	/**
    459 	 * Count number of time-outs. Used to prevent resolving failures when
    460 	 * the QNAME minimisation QTYPE is blocked. Used to determine if
    461 	 * capsforid fallback should be started.*/
    462 	int timeout_count;
    463 
    464 	/** True if the current response is from auth_zone */
    465 	int auth_zone_response;
    466 	/** True if the auth_zones should not be consulted for the query */
    467 	int auth_zone_avoid;
    468 	/** true if there have been scrubbing failures of reply packets */
    469 	int scrub_failures;
    470 	/** true if there have been parse failures of reply packets */
    471 	int parse_failures;
    472 	/** a failure printout address for last received answer */
    473 	union {
    474 		struct in_addr in;
    475 #ifdef AF_INET6
    476 		struct in6_addr in6;
    477 #endif
    478 	} fail_addr;
    479 	/** which fail_addr, 0 is nothing, 4 or 6 */
    480 	int fail_addr_type;
    481 };
    482 
    483 /**
    484  * List of prepend items
    485  */
    486 struct iter_prep_list {
    487 	/** next in list */
    488 	struct iter_prep_list* next;
    489 	/** rrset */
    490 	struct ub_packed_rrset_key* rrset;
    491 };
    492 
    493 /**
    494  * Get the iterator function block.
    495  * @return: function block with function pointers to iterator methods.
    496  */
    497 struct module_func_block* iter_get_funcblock(void);
    498 
    499 /**
    500  * Get iterator state as a string
    501  * @param state: to convert
    502  * @return constant string that is printable.
    503  */
    504 const char* iter_state_to_string(enum iter_state state);
    505 
    506 /**
    507  * See if iterator state is a response state
    508  * @param s: to inspect
    509  * @return true if response state.
    510  */
    511 int iter_state_is_responsestate(enum iter_state s);
    512 
    513 /** iterator init */
    514 int iter_init(struct module_env* env, int id);
    515 
    516 /** iterator deinit */
    517 void iter_deinit(struct module_env* env, int id);
    518 
    519 /** iterator operate on a query */
    520 void iter_operate(struct module_qstate* qstate, enum module_ev event, int id,
    521 	struct outbound_entry* outbound);
    522 
    523 /**
    524  * Return priming query results to interested super querystates.
    525  *
    526  * Sets the delegation point and delegation message (not nonRD queries).
    527  * This is a callback from walk_supers.
    528  *
    529  * @param qstate: query state that finished.
    530  * @param id: module id.
    531  * @param super: the qstate to inform.
    532  */
    533 void iter_inform_super(struct module_qstate* qstate, int id,
    534 	struct module_qstate* super);
    535 
    536 /** iterator cleanup query state */
    537 void iter_clear(struct module_qstate* qstate, int id);
    538 
    539 /** iterator alloc size routine */
    540 size_t iter_get_mem(struct module_env* env, int id);
    541 
    542 #endif /* ITERATOR_ITERATOR_H */
    543