Home | History | Annotate | Line # | Download | only in libntp
      1 /*	$NetBSD: ntp_intres.c,v 1.13 2024/08/18 20:47:13 christos Exp $	*/
      2 
      3 /*
      4  * ntp_intres.c - Implements a generic blocking worker child or thread,
      5  *		  initially to provide a nonblocking solution for DNS
      6  *		  name to address lookups available with getaddrinfo().
      7  *
      8  * This is a new implementation as of 2009 sharing the filename and
      9  * very little else with the prior implementation, which used a
     10  * temporary file to receive a single set of requests from the parent,
     11  * and a NTP mode 7 authenticated request to push back responses.
     12  *
     13  * A primary goal in rewriting this code was the need to support the
     14  * pool configuration directive's requirement to retrieve multiple
     15  * addresses resolving a single name, which has previously been
     16  * satisfied with blocking resolver calls from the ntpd mainline code.
     17  *
     18  * A secondary goal is to provide a generic mechanism for other
     19  * blocking operations to be delegated to a worker using a common
     20  * model for both Unix and Windows ntpd.  ntp_worker.c, work_fork.c,
     21  * and work_thread.c implement the generic mechanism.  This file
     22  * implements the two current consumers, getaddrinfo_sometime() and the
     23  * presently unused getnameinfo_sometime().
     24  *
     25  * Both routines deliver results to a callback and manage memory
     26  * allocation, meaning there is no freeaddrinfo_sometime().
     27  *
     28  * The initial implementation for Unix uses a pair of unidirectional
     29  * pipes, one each for requests and responses, connecting the forked
     30  * blocking child worker with the ntpd mainline.  The threaded code
     31  * uses arrays of pointers to queue requests and responses.
     32  *
     33  * The parent drives the process, including scheduling sleeps between
     34  * retries.
     35  *
     36  * Memory is managed differently for a child process, which mallocs
     37  * request buffers to read from the pipe into, whereas the threaded
     38  * code mallocs a copy of the request to hand off to the worker via
     39  * the queueing array.  The resulting request buffer is free()d by
     40  * platform-independent code.  A wrinkle is the request needs to be
     41  * available to the requestor during response processing.
     42  *
     43  * Response memory allocation is also platform-dependent.  With a
     44  * separate process and pipes, the response is free()d after being
     45  * written to the pipe.  With threads, the same memory is handed
     46  * over and the requestor frees it after processing is completed.
     47  *
     48  * The code should be generalized to support threads on Unix using
     49  * much of the same code used for Windows initially.
     50  *
     51  */
     52 #ifdef HAVE_CONFIG_H
     53 # include <config.h>
     54 #endif
     55 
     56 #include "ntp_workimpl.h"
     57 
     58 #ifdef WORKER
     59 
     60 #include <stdio.h>
     61 #include <ctype.h>
     62 #include <signal.h>
     63 
     64 /**/
     65 #ifdef HAVE_SYS_TYPES_H
     66 # include <sys/types.h>
     67 #endif
     68 #ifdef HAVE_NETINET_IN_H
     69 #include <netinet/in.h>
     70 #endif
     71 #include <arpa/inet.h>
     72 /**/
     73 #ifdef HAVE_SYS_PARAM_H
     74 # include <sys/param.h>
     75 #endif
     76 
     77 #if !defined(HAVE_RES_INIT) && defined(HAVE___RES_INIT)
     78 # define HAVE_RES_INIT
     79 #endif
     80 
     81 #if defined(HAVE_RESOLV_H) && defined(HAVE_RES_INIT)
     82 # ifdef HAVE_ARPA_NAMESER_H
     83 #  include <arpa/nameser.h> /* DNS HEADER struct */
     84 # endif
     85 # ifdef HAVE_NETDB_H
     86 #  include <netdb.h>
     87 # endif
     88 # include <resolv.h>
     89 #endif
     90 
     91 #include "ntp.h"
     92 #include "ntp_debug.h"
     93 #include "ntp_malloc.h"
     94 #include "ntp_syslog.h"
     95 #include "ntp_unixtime.h"
     96 #include "ntp_intres.h"
     97 #include "intreswork.h"
     98 
     99 
    100 /*
    101  * Following are implementations of getaddrinfo_sometime() and
    102  * getnameinfo_sometime().  Each is implemented in three routines:
    103  *
    104  * getaddrinfo_sometime()		getnameinfo_sometime()
    105  * blocking_getaddrinfo()		blocking_getnameinfo()
    106  * getaddrinfo_sometime_complete()	getnameinfo_sometime_complete()
    107  *
    108  * The first runs in the parent and marshalls (or serializes) request
    109  * parameters into a request blob which is processed in the child by
    110  * the second routine, blocking_*(), which serializes the results into
    111  * a response blob unpacked by the third routine, *_complete(), which
    112  * calls the callback routine provided with the request and frees
    113  * _request_ memory allocated by the first routine.  Response memory
    114  * is managed by the code which calls the *_complete routines.
    115  */
    116 
    117 
    118 /* === typedefs === */
    119 typedef struct blocking_gai_req_tag {	/* marshalled args */
    120 	size_t			octets;
    121 	u_int			dns_idx;
    122 	time_t			scheduled;
    123 	time_t			earliest;
    124 	int			retry;
    125 	struct addrinfo		hints;
    126 	u_int			qflags;
    127 	gai_sometime_callback	callback;
    128 	void *			context;
    129 	size_t			nodesize;
    130 	size_t			servsize;
    131 } blocking_gai_req;
    132 
    133 typedef struct blocking_gai_resp_tag {
    134 	size_t			octets;
    135 	int			retcode;
    136 	int			retry;
    137 	int			gai_errno; /* for EAI_SYSTEM case */
    138 	int			ai_count;
    139 	/*
    140 	 * Followed by ai_count struct addrinfo and then ai_count
    141 	 * sockaddr_u and finally the canonical name strings.
    142 	 */
    143 } blocking_gai_resp;
    144 
    145 typedef struct blocking_gni_req_tag {
    146 	size_t			octets;
    147 	u_int			dns_idx;
    148 	time_t			scheduled;
    149 	time_t			earliest;
    150 	int			retry;
    151 	size_t			hostoctets;
    152 	size_t			servoctets;
    153 	int			flags;
    154 	gni_sometime_callback	callback;
    155 	void *			context;
    156 	sockaddr_u		socku;
    157 } blocking_gni_req;
    158 
    159 typedef struct blocking_gni_resp_tag {
    160 	size_t			octets;
    161 	int			retcode;
    162 	int			gni_errno; /* for EAI_SYSTEM case */
    163 	int			retry;
    164 	size_t			hostoctets;
    165 	size_t			servoctets;
    166 	/*
    167 	 * Followed by hostoctets bytes of null-terminated host,
    168 	 * then servoctets bytes of null-terminated service.
    169 	 */
    170 } blocking_gni_resp;
    171 
    172 /* per-DNS-worker state in parent */
    173 typedef struct dnschild_ctx_tag {
    174 	u_int	index;
    175 	time_t	next_dns_timeslot;
    176 } dnschild_ctx;
    177 
    178 /* per-DNS-worker state in worker */
    179 typedef struct dnsworker_ctx_tag {
    180 	blocking_child *	c;
    181 	time_t			ignore_scheduled_before;
    182 #ifdef HAVE_RES_INIT
    183 	time_t	next_res_init;
    184 #endif
    185 } dnsworker_ctx;
    186 
    187 
    188 /* === variables === */
    189 dnschild_ctx **		dnschild_contexts;		/* parent */
    190 u_int			dnschild_contexts_alloc;
    191 dnsworker_ctx **	dnsworker_contexts;		/* child */
    192 u_int			dnsworker_contexts_alloc;
    193 
    194 #ifdef HAVE_RES_INIT
    195 static	time_t		next_res_init;
    196 #endif
    197 
    198 
    199 /* === forward declarations === */
    200 static	u_int		reserve_dnschild_ctx(void);
    201 static	u_int		get_dnschild_ctx(void);
    202 static	dnsworker_ctx *	get_worker_context(blocking_child *, u_int);
    203 static	void		scheduled_sleep(time_t, time_t,
    204 					dnsworker_ctx *);
    205 static	void		manage_dns_retry_interval(time_t *, time_t *,
    206 						  int *, time_t *,
    207 						  int/*BOOL*/);
    208 static	int		should_retry_dns(int, int);
    209 #ifdef HAVE_RES_INIT
    210 static	void		reload_resolv_conf(dnsworker_ctx *);
    211 #else
    212 # define		reload_resolv_conf(wc)		\
    213 	do {						\
    214 		(void)(wc);				\
    215 	} while (FALSE)
    216 #endif
    217 static	void		getaddrinfo_sometime_complete(blocking_work_req,
    218 						      void *, size_t,
    219 						      void *);
    220 static	void		getnameinfo_sometime_complete(blocking_work_req,
    221 						      void *, size_t,
    222 						      void *);
    223 
    224 
    225 /* === functions === */
    226 /*
    227  * getaddrinfo_sometime - uses blocking child to call getaddrinfo then
    228  *			  invokes provided callback completion function.
    229  */
    230 int
    231 getaddrinfo_sometime_ex(
    232 	const char *		node,
    233 	const char *		service,
    234 	const struct addrinfo *	hints,
    235 	int			retry,
    236 	gai_sometime_callback	callback,
    237 	void *			context,
    238 	u_int			qflags
    239 	)
    240 {
    241 	blocking_gai_req *	gai_req;
    242 	u_int			idx;
    243 	dnschild_ctx *		child_ctx;
    244 	size_t			req_size;
    245 	size_t			nodesize;
    246 	size_t			servsize;
    247 	time_t			now;
    248 
    249 	REQUIRE(NULL != node);
    250 	if (NULL != hints) {
    251 		REQUIRE(0 == hints->ai_addrlen);
    252 		REQUIRE(NULL == hints->ai_addr);
    253 		REQUIRE(NULL == hints->ai_canonname);
    254 		REQUIRE(NULL == hints->ai_next);
    255 	}
    256 
    257 	idx = get_dnschild_ctx();
    258 	child_ctx = dnschild_contexts[idx];
    259 
    260 	nodesize = strlen(node) + 1;
    261 	servsize = strlen(service) + 1;
    262 	req_size = sizeof(*gai_req) + nodesize + servsize;
    263 
    264 	gai_req = emalloc_zero(req_size);
    265 
    266 	gai_req->octets = req_size;
    267 	gai_req->dns_idx = idx;
    268 	now = time(NULL);
    269 	gai_req->scheduled = now;
    270 	gai_req->earliest = max(now, child_ctx->next_dns_timeslot);
    271 	child_ctx->next_dns_timeslot = gai_req->earliest;
    272 	if (hints != NULL)
    273 		gai_req->hints = *hints;
    274 	gai_req->retry = retry;
    275 	gai_req->callback = callback;
    276 	gai_req->context = context;
    277 	gai_req->nodesize = nodesize;
    278 	gai_req->servsize = servsize;
    279 	gai_req->qflags = qflags;
    280 
    281 	memcpy((char *)gai_req + sizeof(*gai_req), node, nodesize);
    282 	memcpy((char *)gai_req + sizeof(*gai_req) + nodesize, service,
    283 	       servsize);
    284 
    285 	if (queue_blocking_request(
    286 		BLOCKING_GETADDRINFO,
    287 		gai_req,
    288 		req_size,
    289 		&getaddrinfo_sometime_complete,
    290 		gai_req)) {
    291 
    292 		msyslog(LOG_ERR, "unable to queue getaddrinfo request");
    293 		errno = EFAULT;
    294 		return -1;
    295 	}
    296 
    297 	return 0;
    298 }
    299 
    300 int
    301 blocking_getaddrinfo(
    302 	blocking_child *	c,
    303 	blocking_pipe_header *	req
    304 	)
    305 {
    306 	blocking_gai_req *	gai_req;
    307 	dnsworker_ctx *		worker_ctx;
    308 	blocking_pipe_header *	resp;
    309 	blocking_gai_resp *	gai_resp;
    310 	char *			node;
    311 	char *			service;
    312 	struct addrinfo *	ai_res;
    313 	struct addrinfo *	ai;
    314 	struct addrinfo *	serialized_ai;
    315 	size_t			canons_octets;
    316 	size_t			this_octets;
    317 	size_t			resp_octets;
    318 	char *			cp;
    319 	time_t			time_now;
    320 
    321 	gai_req = (void *)((char *)req + sizeof(*req));
    322 	node = (char *)gai_req + sizeof(*gai_req);
    323 	service = node + gai_req->nodesize;
    324 
    325 	worker_ctx = get_worker_context(c, gai_req->dns_idx);
    326 	scheduled_sleep(gai_req->scheduled, gai_req->earliest,
    327 			worker_ctx);
    328 	reload_resolv_conf(worker_ctx);
    329 
    330 	/*
    331 	 * Take a shot at the final size, better to overestimate
    332 	 * at first and then realloc to a smaller size.
    333 	 */
    334 
    335 	resp_octets = sizeof(*resp) + sizeof(*gai_resp) +
    336 		      16 * (sizeof(struct addrinfo) +
    337 			    sizeof(sockaddr_u)) +
    338 		      256;
    339 	resp = emalloc_zero(resp_octets);
    340 	gai_resp = (void *)(resp + 1);
    341 
    342 	TRACE(2, ("blocking_getaddrinfo given node %s serv %s fam %d flags %x\n",
    343 		  node, service, gai_req->hints.ai_family,
    344 		  gai_req->hints.ai_flags));
    345 #ifdef DEBUG
    346 	if (debug >= 2)
    347 		fflush(stdout);
    348 #endif
    349 	ai_res = NULL;
    350 	gai_resp->retcode = getaddrinfo(node, service, &gai_req->hints,
    351 					&ai_res);
    352 	gai_resp->retry = gai_req->retry;
    353 #ifdef EAI_SYSTEM
    354 	if (EAI_SYSTEM == gai_resp->retcode)
    355 		gai_resp->gai_errno = errno;
    356 #endif
    357 	canons_octets = 0;
    358 
    359 	if (0 == gai_resp->retcode) {
    360 		ai = ai_res;
    361 		while (NULL != ai) {
    362 			gai_resp->ai_count++;
    363 			if (ai->ai_canonname)
    364 				canons_octets += strlen(ai->ai_canonname) + 1;
    365 			ai = ai->ai_next;
    366 		}
    367 		/*
    368 		 * If this query succeeded only after retrying, DNS may have
    369 		 * just become responsive.  Ignore previously-scheduled
    370 		 * retry sleeps once for each pending request, similar to
    371 		 * the way scheduled_sleep() does when its worker_sleep()
    372 		 * is interrupted.
    373 		 */
    374 		if (gai_resp->retry > INITIAL_DNS_RETRY) {
    375 			time_now = time(NULL);
    376 			worker_ctx->ignore_scheduled_before = time_now;
    377 			TRACE(1, ("DNS success after retry, ignoring sleeps scheduled before now (%s)\n",
    378 				  humantime(time_now)));
    379 		}
    380 	}
    381 
    382 	/*
    383 	 * Our response consists of a header, followed by ai_count
    384 	 * addrinfo structs followed by ai_count sockaddr_storage
    385 	 * structs followed by the canonical names.
    386 	 */
    387 	gai_resp->octets = sizeof(*gai_resp)
    388 			    + gai_resp->ai_count
    389 				* (sizeof(gai_req->hints)
    390 				   + sizeof(sockaddr_u))
    391 			    + canons_octets;
    392 
    393 	resp_octets = sizeof(*resp) + gai_resp->octets;
    394 	resp = erealloc(resp, resp_octets);
    395 	gai_resp = (void *)(resp + 1);
    396 
    397 	/* cp serves as our current pointer while serializing */
    398 	cp = (void *)(gai_resp + 1);
    399 	canons_octets = 0;
    400 
    401 	if (0 == gai_resp->retcode) {
    402 		ai = ai_res;
    403 		while (NULL != ai) {
    404 			memcpy(cp, ai, sizeof(*ai));
    405 			serialized_ai = (void *)cp;
    406 			cp += sizeof(*ai);
    407 
    408 			/* transform ai_canonname into offset */
    409 			if (NULL != ai->ai_canonname) {
    410 				serialized_ai->ai_canonname = (char *)canons_octets;
    411 				canons_octets += strlen(ai->ai_canonname) + 1;
    412 			}
    413 
    414 			/* leave fixup of ai_addr pointer for receiver */
    415 
    416 			ai = ai->ai_next;
    417 		}
    418 
    419 		ai = ai_res;
    420 		while (NULL != ai) {
    421 			INSIST(ai->ai_addrlen <= sizeof(sockaddr_u));
    422 			memcpy(cp, ai->ai_addr, ai->ai_addrlen);
    423 			cp += sizeof(sockaddr_u);
    424 
    425 			ai = ai->ai_next;
    426 		}
    427 
    428 		ai = ai_res;
    429 		while (NULL != ai) {
    430 			if (NULL != ai->ai_canonname) {
    431 				this_octets = strlen(ai->ai_canonname) + 1;
    432 				memcpy(cp, ai->ai_canonname, this_octets);
    433 				cp += this_octets;
    434 			}
    435 
    436 			ai = ai->ai_next;
    437 		}
    438 		freeaddrinfo(ai_res);
    439 	}
    440 
    441 	/*
    442 	 * make sure our walk and earlier calc match
    443 	 */
    444 	DEBUG_INSIST((size_t)(cp - (char *)resp) == resp_octets);
    445 
    446 	if (queue_blocking_response(c, resp, resp_octets, req)) {
    447 		msyslog(LOG_ERR, "blocking_getaddrinfo can not queue response");
    448 		return -1;
    449 	}
    450 
    451 	return 0;
    452 }
    453 
    454 int
    455 getaddrinfo_sometime(
    456 	const char *		node,
    457 	const char *		service,
    458 	const struct addrinfo *	hints,
    459 	int			retry,
    460 	gai_sometime_callback	callback,
    461 	void *			context
    462 	)
    463 {
    464 	return getaddrinfo_sometime_ex(node, service, hints, retry,
    465 				       callback, context, 0);
    466 }
    467 
    468 
    469 static void
    470 getaddrinfo_sometime_complete(
    471 	blocking_work_req	rtype,
    472 	void *			context,
    473 	size_t			respsize,
    474 	void *			resp
    475 	)
    476 {
    477 	blocking_gai_req *	gai_req;
    478 	blocking_gai_resp *	gai_resp;
    479 	dnschild_ctx *		child_ctx;
    480 	struct addrinfo *	ai;
    481 	struct addrinfo *	next_ai;
    482 	sockaddr_u *		psau;
    483 	char *			node;
    484 	char *			service;
    485 	char *			canon_start;
    486 	time_t			time_now;
    487 	int			again, noerr;
    488 	int			af;
    489 	const char *		fam_spec;
    490 	int			i;
    491 
    492 	gai_req = context;
    493 	gai_resp = resp;
    494 
    495 	DEBUG_REQUIRE(BLOCKING_GETADDRINFO == rtype);
    496 	DEBUG_REQUIRE(respsize == gai_resp->octets);
    497 
    498 	node = (char *)gai_req + sizeof(*gai_req);
    499 	service = node + gai_req->nodesize;
    500 
    501 	child_ctx = dnschild_contexts[gai_req->dns_idx];
    502 
    503 	if (0 == gai_resp->retcode) {
    504 		/*
    505 		 * If this query succeeded only after retrying, DNS may have
    506 		 * just become responsive.
    507 		 */
    508 		if (gai_resp->retry > INITIAL_DNS_RETRY) {
    509 			time_now = time(NULL);
    510 			child_ctx->next_dns_timeslot = time_now;
    511 			TRACE(1, ("DNS success after retry, %u next_dns_timeslot reset (%s)\n",
    512 				  gai_req->dns_idx, humantime(time_now)));
    513 		}
    514 	} else {
    515 		noerr = !!(gai_req->qflags & GAIR_F_IGNDNSERR);
    516 		again = noerr || should_retry_dns(
    517 					gai_resp->retcode, gai_resp->gai_errno);
    518 		/*
    519 		 * exponential backoff of DNS retries to 64s
    520 		 */
    521 		if (gai_req->retry > 0 && again) {
    522 			/* log the first retry only */
    523 			if (INITIAL_DNS_RETRY == gai_req->retry)
    524 				NLOG(NLOG_SYSINFO) {
    525 					af = gai_req->hints.ai_family;
    526 					fam_spec = (AF_INET6 == af)
    527 						       ? " (AAAA)"
    528 						       : (AF_INET == af)
    529 							     ? " (A)"
    530 							     : "";
    531 #ifdef EAI_SYSTEM
    532 					if (EAI_SYSTEM == gai_resp->retcode) {
    533 						errno = gai_resp->gai_errno;
    534 						msyslog(LOG_INFO,
    535 							"retrying DNS %s%s: EAI_SYSTEM %d: %m",
    536 							node, fam_spec,
    537 							gai_resp->gai_errno);
    538 					} else
    539 #endif
    540 						msyslog(LOG_INFO,
    541 							"retrying DNS %s%s: %s (%d)",
    542 							node, fam_spec,
    543 							gai_strerror(gai_resp->retcode),
    544 							gai_resp->retcode);
    545 				}
    546 			manage_dns_retry_interval(
    547 				&gai_req->scheduled, &gai_req->earliest,
    548 				&gai_req->retry, &child_ctx->next_dns_timeslot,
    549 				noerr);
    550 			if (!queue_blocking_request(
    551 					BLOCKING_GETADDRINFO,
    552 					gai_req,
    553 					gai_req->octets,
    554 					&getaddrinfo_sometime_complete,
    555 					gai_req))
    556 				return;
    557 			else
    558 				msyslog(LOG_ERR,
    559 					"unable to retry hostname %s",
    560 					node);
    561 		}
    562 	}
    563 
    564 	/*
    565 	 * fixup pointers in returned addrinfo array
    566 	 */
    567 	ai = (void *)((char *)gai_resp + sizeof(*gai_resp));
    568 	next_ai = NULL;
    569 	for (i = gai_resp->ai_count - 1; i >= 0; i--) {
    570 		ai[i].ai_next = next_ai;
    571 		next_ai = &ai[i];
    572 	}
    573 
    574 	psau = (void *)((char *)ai + gai_resp->ai_count * sizeof(*ai));
    575 	canon_start = (char *)psau + gai_resp->ai_count * sizeof(*psau);
    576 
    577 	for (i = 0; i < gai_resp->ai_count; i++) {
    578 		if (NULL != ai[i].ai_addr)
    579 			ai[i].ai_addr = &psau->sa;
    580 		psau++;
    581 		if (NULL != ai[i].ai_canonname)
    582 			ai[i].ai_canonname += (size_t)canon_start;
    583 	}
    584 
    585 	ENSURE((char *)psau == canon_start);
    586 
    587 	if (!gai_resp->ai_count)
    588 		ai = NULL;
    589 
    590 	(*gai_req->callback)(gai_resp->retcode, gai_resp->gai_errno,
    591 			     gai_req->context, node, service,
    592 			     &gai_req->hints, ai);
    593 
    594 	free(gai_req);
    595 	/* gai_resp is part of block freed by process_blocking_resp() */
    596 }
    597 
    598 
    599 #ifdef TEST_BLOCKING_WORKER
    600 void gai_test_callback(int rescode, int gai_errno, void *context, const char *name, const char *service, const struct addrinfo *hints, const struct addrinfo *ai_res)
    601 {
    602 	sockaddr_u addr;
    603 
    604 	if (rescode) {
    605 		TRACE(1, ("gai_test_callback context %p error rescode %d %s serv %s\n",
    606 			  context, rescode, name, service));
    607 		return;
    608 	}
    609 	while (!rescode && NULL != ai_res) {
    610 		ZERO_SOCK(&addr);
    611 		memcpy(&addr, ai_res->ai_addr, ai_res->ai_addrlen);
    612 		TRACE(1, ("ctx %p fam %d addr %s canon '%s' type %s at %p ai_addr %p ai_next %p\n",
    613 			  context,
    614 			  AF(&addr),
    615 			  stoa(&addr),
    616 			  (ai_res->ai_canonname)
    617 			      ? ai_res->ai_canonname
    618 			      : "",
    619 			  (SOCK_DGRAM == ai_res->ai_socktype)
    620 			      ? "DGRAM"
    621 			      : (SOCK_STREAM == ai_res->ai_socktype)
    622 				    ? "STREAM"
    623 				    : "(other)",
    624 			  ai_res,
    625 			  ai_res->ai_addr,
    626 			  ai_res->ai_next));
    627 
    628 		getnameinfo_sometime((sockaddr_u *)ai_res->ai_addr, 128, 32, 0, gni_test_callback, context);
    629 
    630 		ai_res = ai_res->ai_next;
    631 	}
    632 }
    633 #endif	/* TEST_BLOCKING_WORKER */
    634 
    635 
    636 int
    637 getnameinfo_sometime(
    638 	sockaddr_u *		psau,
    639 	size_t			hostoctets,
    640 	size_t			servoctets,
    641 	int			flags,
    642 	gni_sometime_callback	callback,
    643 	void *			context
    644 	)
    645 {
    646 	blocking_gni_req *	gni_req;
    647 	u_int			idx;
    648 	dnschild_ctx *		child_ctx;
    649 	time_t			time_now;
    650 
    651 	REQUIRE(hostoctets);
    652 	REQUIRE(hostoctets + servoctets < 1024);
    653 
    654 	idx = get_dnschild_ctx();
    655 	child_ctx = dnschild_contexts[idx];
    656 
    657 	gni_req = emalloc_zero(sizeof(*gni_req));
    658 
    659 	gni_req->octets = sizeof(*gni_req);
    660 	gni_req->dns_idx = idx;
    661 	time_now = time(NULL);
    662 	gni_req->scheduled = time_now;
    663 	gni_req->earliest = max(time_now, child_ctx->next_dns_timeslot);
    664 	child_ctx->next_dns_timeslot = gni_req->earliest;
    665 	memcpy(&gni_req->socku, psau, SOCKLEN(psau));
    666 	gni_req->hostoctets = hostoctets;
    667 	gni_req->servoctets = servoctets;
    668 	gni_req->flags = flags;
    669 	gni_req->retry = INITIAL_DNS_RETRY;
    670 	gni_req->callback = callback;
    671 	gni_req->context = context;
    672 
    673 	if (queue_blocking_request(
    674 		BLOCKING_GETNAMEINFO,
    675 		gni_req,
    676 		sizeof(*gni_req),
    677 		&getnameinfo_sometime_complete,
    678 		gni_req)) {
    679 
    680 		msyslog(LOG_ERR, "unable to queue getnameinfo request");
    681 		errno = EFAULT;
    682 		return -1;
    683 	}
    684 
    685 	return 0;
    686 }
    687 
    688 
    689 int
    690 blocking_getnameinfo(
    691 	blocking_child *	c,
    692 	blocking_pipe_header *	req
    693 	)
    694 {
    695 	blocking_gni_req *	gni_req;
    696 	dnsworker_ctx *		worker_ctx;
    697 	blocking_pipe_header *	resp;
    698 	blocking_gni_resp *	gni_resp;
    699 	size_t			octets;
    700 	size_t			resp_octets;
    701 	char *			service;
    702 	char *			cp;
    703 	int			rc;
    704 	time_t			time_now;
    705 	char			host[1024];
    706 
    707 	gni_req = (void *)((char *)req + sizeof(*req));
    708 
    709 	octets = gni_req->hostoctets + gni_req->servoctets;
    710 
    711 	/*
    712 	 * Some alloca() implementations are fragile regarding
    713 	 * large allocations.  We only need room for the host
    714 	 * and service names.
    715 	 */
    716 	REQUIRE(octets < sizeof(host));
    717 	service = host + gni_req->hostoctets;
    718 
    719 	worker_ctx = get_worker_context(c, gni_req->dns_idx);
    720 	scheduled_sleep(gni_req->scheduled, gni_req->earliest,
    721 			worker_ctx);
    722 	reload_resolv_conf(worker_ctx);
    723 
    724 	/*
    725 	 * Take a shot at the final size, better to overestimate
    726 	 * then realloc to a smaller size.
    727 	 */
    728 
    729 	resp_octets = sizeof(*resp) + sizeof(*gni_resp) + octets;
    730 	resp = emalloc_zero(resp_octets);
    731 	gni_resp = (void *)((char *)resp + sizeof(*resp));
    732 
    733 	TRACE(2, ("blocking_getnameinfo given addr %s flags 0x%x hostlen %lu servlen %lu\n",
    734 		  stoa(&gni_req->socku), gni_req->flags,
    735 		  (u_long)gni_req->hostoctets, (u_long)gni_req->servoctets));
    736 
    737 	gni_resp->retcode = getnameinfo(&gni_req->socku.sa,
    738 					SOCKLEN(&gni_req->socku),
    739 					host,
    740 					gni_req->hostoctets,
    741 					service,
    742 					gni_req->servoctets,
    743 					gni_req->flags);
    744 	gni_resp->retry = gni_req->retry;
    745 #ifdef EAI_SYSTEM
    746 	if (EAI_SYSTEM == gni_resp->retcode)
    747 		gni_resp->gni_errno = errno;
    748 #endif
    749 
    750 	if (0 != gni_resp->retcode) {
    751 		gni_resp->hostoctets = 0;
    752 		gni_resp->servoctets = 0;
    753 	} else {
    754 		gni_resp->hostoctets = strlen(host) + 1;
    755 		gni_resp->servoctets = strlen(service) + 1;
    756 		/*
    757 		 * If this query succeeded only after retrying, DNS may have
    758 		 * just become responsive.  Ignore previously-scheduled
    759 		 * retry sleeps once for each pending request, similar to
    760 		 * the way scheduled_sleep() does when its worker_sleep()
    761 		 * is interrupted.
    762 		 */
    763 		if (gni_req->retry > INITIAL_DNS_RETRY) {
    764 			time_now = time(NULL);
    765 			worker_ctx->ignore_scheduled_before = time_now;
    766 			TRACE(1, ("DNS success after retrying, ignoring sleeps scheduled before now (%s)\n",
    767 				humantime(time_now)));
    768 		}
    769 	}
    770 	octets = gni_resp->hostoctets + gni_resp->servoctets;
    771 	/*
    772 	 * Our response consists of a header, followed by the host and
    773 	 * service strings, each null-terminated.
    774 	 */
    775 	resp_octets = sizeof(*resp) + sizeof(*gni_resp) + octets;
    776 
    777 	resp = erealloc(resp, resp_octets);
    778 	gni_resp = (void *)(resp + 1);
    779 
    780 	gni_resp->octets = sizeof(*gni_resp) + octets;
    781 
    782 	/* cp serves as our current pointer while serializing */
    783 	cp = (void *)(gni_resp + 1);
    784 
    785 	if (0 == gni_resp->retcode) {
    786 		memcpy(cp, host, gni_resp->hostoctets);
    787 		cp += gni_resp->hostoctets;
    788 		memcpy(cp, service, gni_resp->servoctets);
    789 		cp += gni_resp->servoctets;
    790 	}
    791 
    792 	INSIST((size_t)(cp - (char *)resp) == resp_octets);
    793 	INSIST(resp_octets - sizeof(*resp) == gni_resp->octets);
    794 
    795 	rc = queue_blocking_response(c, resp, resp_octets, req);
    796 	if (rc)
    797 		msyslog(LOG_ERR, "blocking_getnameinfo unable to queue response");
    798 	return rc;
    799 }
    800 
    801 
    802 static void
    803 getnameinfo_sometime_complete(
    804 	blocking_work_req	rtype,
    805 	void *			context,
    806 	size_t			respsize,
    807 	void *			resp
    808 	)
    809 {
    810 	blocking_gni_req *	gni_req;
    811 	blocking_gni_resp *	gni_resp;
    812 	dnschild_ctx *		child_ctx;
    813 	char *			host;
    814 	char *			service;
    815 	time_t			time_now;
    816 	int			again;
    817 
    818 	gni_req = context;
    819 	gni_resp = resp;
    820 
    821 	DEBUG_REQUIRE(BLOCKING_GETNAMEINFO == rtype);
    822 	DEBUG_REQUIRE(respsize == gni_resp->octets);
    823 
    824 	child_ctx = dnschild_contexts[gni_req->dns_idx];
    825 
    826 	if (0 == gni_resp->retcode) {
    827 		/*
    828 		 * If this query succeeded only after retrying, DNS may have
    829 		 * just become responsive.
    830 		 */
    831 		if (gni_resp->retry > INITIAL_DNS_RETRY) {
    832 			time_now = time(NULL);
    833 			child_ctx->next_dns_timeslot = time_now;
    834 			TRACE(1, ("DNS success after retry, %u next_dns_timeslot reset (%s)\n",
    835 				  gni_req->dns_idx, humantime(time_now)));
    836 		}
    837 	} else {
    838 		again = should_retry_dns(gni_resp->retcode, gni_resp->gni_errno);
    839 		/*
    840 		 * exponential backoff of DNS retries to 64s
    841 		 */
    842 		if (gni_req->retry > 0)
    843 			manage_dns_retry_interval(&gni_req->scheduled,
    844 			    &gni_req->earliest, &gni_req->retry,
    845 						  &child_ctx->next_dns_timeslot, FALSE);
    846 
    847 		if (gni_req->retry > 0 && again) {
    848 			if (!queue_blocking_request(
    849 				BLOCKING_GETNAMEINFO,
    850 				gni_req,
    851 				gni_req->octets,
    852 				&getnameinfo_sometime_complete,
    853 				gni_req))
    854 				return;
    855 
    856 			msyslog(LOG_ERR, "unable to retry reverse lookup of %s", stoa(&gni_req->socku));
    857 		}
    858 	}
    859 
    860 	if (!gni_resp->hostoctets) {
    861 		host = NULL;
    862 		service = NULL;
    863 	} else {
    864 		host = (char *)gni_resp + sizeof(*gni_resp);
    865 		service = (gni_resp->servoctets)
    866 			      ? host + gni_resp->hostoctets
    867 			      : NULL;
    868 	}
    869 
    870 	(*gni_req->callback)(gni_resp->retcode, gni_resp->gni_errno,
    871 			     &gni_req->socku, gni_req->flags, host,
    872 			     service, gni_req->context);
    873 
    874 	free(gni_req);
    875 	/* gni_resp is part of block freed by process_blocking_resp() */
    876 }
    877 
    878 
    879 #ifdef TEST_BLOCKING_WORKER
    880 void gni_test_callback(int rescode, int gni_errno, sockaddr_u *psau, int flags, const char *host, const char *service, void *context)
    881 {
    882 	if (!rescode)
    883 		TRACE(1, ("gni_test_callback got host '%s' serv '%s' for addr %s context %p\n",
    884 			  host, service, stoa(psau), context));
    885 	else
    886 		TRACE(1, ("gni_test_callback context %p rescode %d gni_errno %d flags 0x%x addr %s\n",
    887 			  context, rescode, gni_errno, flags, stoa(psau)));
    888 }
    889 #endif	/* TEST_BLOCKING_WORKER */
    890 
    891 
    892 #ifdef HAVE_RES_INIT
    893 static void
    894 reload_resolv_conf(
    895 	dnsworker_ctx *	worker_ctx
    896 	)
    897 {
    898 	time_t	time_now;
    899 
    900 	/*
    901 	 * This is ad-hoc.  Reload /etc/resolv.conf once per minute
    902 	 * to pick up on changes from the DHCP client.  [Bug 1226]
    903 	 * When using threads for the workers, this needs to happen
    904 	 * only once per minute process-wide.
    905 	 */
    906 	time_now = time(NULL);
    907 # ifdef WORK_THREAD
    908 	worker_ctx->next_res_init = next_res_init;
    909 # endif
    910 	if (worker_ctx->next_res_init <= time_now) {
    911 		if (worker_ctx->next_res_init != 0)
    912 			res_init();
    913 		worker_ctx->next_res_init = time_now + 60;
    914 # ifdef WORK_THREAD
    915 		next_res_init = worker_ctx->next_res_init;
    916 # endif
    917 	}
    918 }
    919 #endif	/* HAVE_RES_INIT */
    920 
    921 
    922 static u_int
    923 reserve_dnschild_ctx(void)
    924 {
    925 	const size_t	ps = sizeof(dnschild_contexts[0]);
    926 	const size_t	cs = sizeof(*dnschild_contexts[0]);
    927 	u_int		c;
    928 	u_int		new_alloc;
    929 	size_t		octets;
    930 	size_t		new_octets;
    931 
    932 	c = 0;
    933 	while (TRUE) {
    934 		for ( ; c < dnschild_contexts_alloc; c++) {
    935 			if (NULL == dnschild_contexts[c]) {
    936 				dnschild_contexts[c] = emalloc_zero(cs);
    937 
    938 				return c;
    939 			}
    940 		}
    941 		new_alloc = dnschild_contexts_alloc + 20;
    942 		new_octets = new_alloc * ps;
    943 		octets = dnschild_contexts_alloc * ps;
    944 		dnschild_contexts = erealloc_zero(dnschild_contexts,
    945 						  new_octets, octets);
    946 		dnschild_contexts_alloc = new_alloc;
    947 	}
    948 }
    949 
    950 
    951 static u_int
    952 get_dnschild_ctx(void)
    953 {
    954 	static u_int	shared_ctx = UINT_MAX;
    955 
    956 	if (worker_per_query)
    957 		return reserve_dnschild_ctx();
    958 
    959 	if (UINT_MAX == shared_ctx)
    960 		shared_ctx = reserve_dnschild_ctx();
    961 
    962 	return shared_ctx;
    963 }
    964 
    965 
    966 static dnsworker_ctx *
    967 get_worker_context(
    968 	blocking_child *	c,
    969 	u_int			idx
    970 	)
    971 {
    972 	u_int		min_new_alloc;
    973 	u_int		new_alloc;
    974 	size_t		octets;
    975 	size_t		new_octets;
    976 	dnsworker_ctx *	retv;
    977 
    978 	worker_global_lock(TRUE);
    979 
    980 	if (dnsworker_contexts_alloc <= idx) {
    981 		min_new_alloc = 1 + idx;
    982 		/* round new_alloc up to nearest multiple of 4 */
    983 		new_alloc = (min_new_alloc + 4) & ~(4 - 1);
    984 		new_octets = new_alloc * sizeof(dnsworker_ctx*);
    985 		octets = dnsworker_contexts_alloc * sizeof(dnsworker_ctx*);
    986 		dnsworker_contexts = erealloc_zero(dnsworker_contexts,
    987 						   new_octets, octets);
    988 		dnsworker_contexts_alloc = new_alloc;
    989 		retv = emalloc_zero(sizeof(dnsworker_ctx));
    990 		dnsworker_contexts[idx] = retv;
    991 	} else if (NULL == (retv = dnsworker_contexts[idx])) {
    992 		retv = emalloc_zero(sizeof(dnsworker_ctx));
    993 		dnsworker_contexts[idx] = retv;
    994 	}
    995 
    996 	worker_global_lock(FALSE);
    997 
    998 	ZERO(*retv);
    999 	retv->c = c;
   1000 	return retv;
   1001 }
   1002 
   1003 
   1004 static void
   1005 scheduled_sleep(
   1006 	time_t		scheduled,
   1007 	time_t		earliest,
   1008 	dnsworker_ctx *	worker_ctx
   1009 	)
   1010 {
   1011 	time_t now;
   1012 
   1013 	if (scheduled < worker_ctx->ignore_scheduled_before) {
   1014 		TRACE(1, ("ignoring sleep until %s scheduled at %s (before %s)\n",
   1015 			  humantime(earliest), humantime(scheduled),
   1016 			  humantime(worker_ctx->ignore_scheduled_before)));
   1017 		return;
   1018 	}
   1019 
   1020 	now = time(NULL);
   1021 
   1022 	if (now < earliest) {
   1023 		TRACE(1, ("sleep until %s scheduled at %s (>= %s)\n",
   1024 			  humantime(earliest), humantime(scheduled),
   1025 			  humantime(worker_ctx->ignore_scheduled_before)));
   1026 		if (-1 == worker_sleep(worker_ctx->c, earliest - now)) {
   1027 			/* our sleep was interrupted */
   1028 			now = time(NULL);
   1029 			worker_ctx->ignore_scheduled_before = now;
   1030 #ifdef HAVE_RES_INIT
   1031 			worker_ctx->next_res_init = now + 60;
   1032 			next_res_init = worker_ctx->next_res_init;
   1033 			res_init();
   1034 #endif
   1035 			TRACE(1, ("sleep interrupted by daemon, ignoring sleeps scheduled before now (%s)\n",
   1036 				  humantime(worker_ctx->ignore_scheduled_before)));
   1037 		}
   1038 	}
   1039 }
   1040 
   1041 
   1042 /*
   1043  * manage_dns_retry_interval is a helper used by
   1044  * getaddrinfo_sometime_complete and getnameinfo_sometime_complete
   1045  * to calculate the new retry interval and schedule the next query.
   1046  */
   1047 static void
   1048 manage_dns_retry_interval(
   1049 	time_t *	pscheduled,
   1050 	time_t *	pwhen,
   1051 	int *		pretry,
   1052 	time_t *	pnext_timeslot,
   1053 	int		forever
   1054 	)
   1055 {
   1056 	time_t	now;
   1057 	time_t	when;
   1058 	int	retry;
   1059 	int	retmax;
   1060 
   1061 	now = time(NULL);
   1062 	retry = *pretry;
   1063 	when = max(now + retry, *pnext_timeslot);
   1064 	*pnext_timeslot = when;
   1065 
   1066 	/* this exponential backoff is slower than doubling up: The
   1067 	 * sequence goes 2-3-4-6-8-12-16-24-32... and the upper limit is
   1068 	 * 64 seconds for things that should not repeat forever, and
   1069 	 * 1024 when repeated forever.
   1070 	 */
   1071 	retmax = forever ? 1024 : 64;
   1072 	retry <<= 1;
   1073 	if (retry & (retry - 1))
   1074 		retry &= (retry - 1);
   1075 	else
   1076 		retry -= (retry >> 2);
   1077 	retry = min(retmax, retry);
   1078 
   1079 	*pscheduled = now;
   1080 	*pwhen = when;
   1081 	*pretry = retry;
   1082 }
   1083 
   1084 /*
   1085  * should_retry_dns is a helper used by getaddrinfo_sometime_complete
   1086  * and getnameinfo_sometime_complete which implements ntpd's DNS retry
   1087  * policy.
   1088  */
   1089 static int
   1090 should_retry_dns(
   1091 	int	rescode,
   1092 	int	res_errno
   1093 	)
   1094 {
   1095 	static int	eai_again_seen;
   1096 	int		again;
   1097 #if defined (EAI_SYSTEM) && defined(DEBUG)
   1098 	char		msg[256];
   1099 #endif
   1100 
   1101 	/*
   1102 	 * If the resolver failed, see if the failure is
   1103 	 * temporary. If so, return success.
   1104 	 */
   1105 	again = 0;
   1106 
   1107 	switch (rescode) {
   1108 
   1109 	case EAI_FAIL:
   1110 		again = 1;
   1111 		break;
   1112 
   1113 	case EAI_AGAIN:
   1114 		again = 1;
   1115 		eai_again_seen = 1;		/* [Bug 1178] */
   1116 		break;
   1117 
   1118 	case EAI_NONAME:
   1119 #if defined(EAI_NODATA) && (EAI_NODATA != EAI_NONAME)
   1120 	case EAI_NODATA:
   1121 #endif
   1122 		again = !eai_again_seen;	/* [Bug 1178] */
   1123 		break;
   1124 
   1125 #ifdef EAI_SYSTEM
   1126 	case EAI_SYSTEM:
   1127 		/*
   1128 		 * EAI_SYSTEM means the real error is in errno.  We should be more
   1129 		 * discriminating about which errno values require retrying, but
   1130 		 * this matches existing behavior.
   1131 		 */
   1132 		again = 1;
   1133 # ifdef DEBUG
   1134 		errno_to_str(res_errno, msg, sizeof(msg));
   1135 		TRACE(1, ("intres: EAI_SYSTEM errno %d (%s) means try again, right?\n",
   1136 			  res_errno, msg));
   1137 # endif
   1138 		break;
   1139 #endif
   1140 	}
   1141 
   1142 	TRACE(2, ("intres: resolver returned: %s (%d), %sretrying\n",
   1143 		  gai_strerror(rescode), rescode, again ? "" : "not "));
   1144 
   1145 	return again;
   1146 }
   1147 
   1148 #else	/* !WORKER follows */
   1149 int ntp_intres_nonempty_compilation_unit;
   1150 #endif
   1151