xref: /freebsd/contrib/ntp/libntp/ntp_intres.c (revision 7f9dff23d3092aa33ad45b2b63e52469b3c13a6e)
1 /*
2  * ntp_intres.c - Implements a generic blocking worker child or thread,
3  *		  initially to provide a nonblocking solution for DNS
4  *		  name to address lookups available with getaddrinfo().
5  *
6  * This is a new implementation as of 2009 sharing the filename and
7  * very little else with the prior implementation, which used a
8  * temporary file to receive a single set of requests from the parent,
9  * and a NTP mode 7 authenticated request to push back responses.
10  *
11  * A primary goal in rewriting this code was the need to support the
12  * pool configuration directive's requirement to retrieve multiple
13  * addresses resolving a single name, which has previously been
14  * satisfied with blocking resolver calls from the ntpd mainline code.
15  *
16  * A secondary goal is to provide a generic mechanism for other
17  * blocking operations to be delegated to a worker using a common
18  * model for both Unix and Windows ntpd.  ntp_worker.c, work_fork.c,
19  * and work_thread.c implement the generic mechanism.  This file
20  * implements the two current consumers, getaddrinfo_sometime() and the
21  * presently unused getnameinfo_sometime().
22  *
23  * Both routines deliver results to a callback and manage memory
24  * allocation, meaning there is no freeaddrinfo_sometime().
25  *
26  * The initial implementation for Unix uses a pair of unidirectional
27  * pipes, one each for requests and responses, connecting the forked
28  * blocking child worker with the ntpd mainline.  The threaded code
29  * uses arrays of pointers to queue requests and responses.
30  *
31  * The parent drives the process, including scheduling sleeps between
32  * retries.
33  *
34  * Memory is managed differently for a child process, which mallocs
35  * request buffers to read from the pipe into, whereas the threaded
36  * code mallocs a copy of the request to hand off to the worker via
37  * the queueing array.  The resulting request buffer is free()d by
38  * platform-independent code.  A wrinkle is the request needs to be
39  * available to the requestor during response processing.
40  *
41  * Response memory allocation is also platform-dependent.  With a
42  * separate process and pipes, the response is free()d after being
43  * written to the pipe.  With threads, the same memory is handed
44  * over and the requestor frees it after processing is completed.
45  *
46  * The code should be generalized to support threads on Unix using
47  * much of the same code used for Windows initially.
48  *
49  */
50 #ifdef HAVE_CONFIG_H
51 # include <config.h>
52 #endif
53 
54 #include "ntp_workimpl.h"
55 
56 #ifdef WORKER
57 
58 #include <stdio.h>
59 #include <ctype.h>
60 #include <signal.h>
61 
62 /**/
63 #ifdef HAVE_SYS_TYPES_H
64 # include <sys/types.h>
65 #endif
66 #ifdef HAVE_NETINET_IN_H
67 #include <netinet/in.h>
68 #endif
69 #include <arpa/inet.h>
70 /**/
71 #ifdef HAVE_SYS_PARAM_H
72 # include <sys/param.h>
73 #endif
74 
75 #if !defined(HAVE_RES_INIT) && defined(HAVE___RES_INIT)
76 # define HAVE_RES_INIT
77 #endif
78 
79 #if defined(HAVE_RESOLV_H) && defined(HAVE_RES_INIT)
80 # ifdef HAVE_ARPA_NAMESER_H
81 #  include <arpa/nameser.h> /* DNS HEADER struct */
82 # endif
83 # ifdef HAVE_NETDB_H
84 #  include <netdb.h>
85 # endif
86 # include <resolv.h>
87 # ifdef HAVE_INT32_ONLY_WITH_DNS
88 #  define HAVE_INT32
89 # endif
90 # ifdef HAVE_U_INT32_ONLY_WITH_DNS
91 #  define HAVE_U_INT32
92 # endif
93 #endif
94 
95 #include "ntp.h"
96 #include "ntp_debug.h"
97 #include "ntp_malloc.h"
98 #include "ntp_syslog.h"
99 #include "ntp_unixtime.h"
100 #include "ntp_intres.h"
101 #include "intreswork.h"
102 
103 
104 /*
105  * Following are implementations of getaddrinfo_sometime() and
106  * getnameinfo_sometime().  Each is implemented in three routines:
107  *
108  * getaddrinfo_sometime()		getnameinfo_sometime()
109  * blocking_getaddrinfo()		blocking_getnameinfo()
110  * getaddrinfo_sometime_complete()	getnameinfo_sometime_complete()
111  *
112  * The first runs in the parent and marshalls (or serializes) request
113  * parameters into a request blob which is processed in the child by
114  * the second routine, blocking_*(), which serializes the results into
115  * a response blob unpacked by the third routine, *_complete(), which
116  * calls the callback routine provided with the request and frees
117  * _request_ memory allocated by the first routine.  Response memory
118  * is managed by the code which calls the *_complete routines.
119  */
120 
121 
122 /* === typedefs === */
123 typedef struct blocking_gai_req_tag {	/* marshalled args */
124 	size_t			octets;
125 	u_int			dns_idx;
126 	time_t			scheduled;
127 	time_t			earliest;
128 	int			retry;
129 	struct addrinfo		hints;
130 	u_int			qflags;
131 	gai_sometime_callback	callback;
132 	void *			context;
133 	size_t			nodesize;
134 	size_t			servsize;
135 } blocking_gai_req;
136 
137 typedef struct blocking_gai_resp_tag {
138 	size_t			octets;
139 	int			retcode;
140 	int			retry;
141 	int			gai_errno; /* for EAI_SYSTEM case */
142 	int			ai_count;
143 	/*
144 	 * Followed by ai_count struct addrinfo and then ai_count
145 	 * sockaddr_u and finally the canonical name strings.
146 	 */
147 } blocking_gai_resp;
148 
149 typedef struct blocking_gni_req_tag {
150 	size_t			octets;
151 	u_int			dns_idx;
152 	time_t			scheduled;
153 	time_t			earliest;
154 	int			retry;
155 	size_t			hostoctets;
156 	size_t			servoctets;
157 	int			flags;
158 	gni_sometime_callback	callback;
159 	void *			context;
160 	sockaddr_u		socku;
161 } blocking_gni_req;
162 
163 typedef struct blocking_gni_resp_tag {
164 	size_t			octets;
165 	int			retcode;
166 	int			gni_errno; /* for EAI_SYSTEM case */
167 	int			retry;
168 	size_t			hostoctets;
169 	size_t			servoctets;
170 	/*
171 	 * Followed by hostoctets bytes of null-terminated host,
172 	 * then servoctets bytes of null-terminated service.
173 	 */
174 } blocking_gni_resp;
175 
176 /* per-DNS-worker state in parent */
177 typedef struct dnschild_ctx_tag {
178 	u_int	index;
179 	time_t	next_dns_timeslot;
180 } dnschild_ctx;
181 
182 /* per-DNS-worker state in worker */
183 typedef struct dnsworker_ctx_tag {
184 	blocking_child *	c;
185 	time_t			ignore_scheduled_before;
186 #ifdef HAVE_RES_INIT
187 	time_t	next_res_init;
188 #endif
189 } dnsworker_ctx;
190 
191 
192 /* === variables === */
193 dnschild_ctx **		dnschild_contexts;		/* parent */
194 u_int			dnschild_contexts_alloc;
195 dnsworker_ctx **	dnsworker_contexts;		/* child */
196 u_int			dnsworker_contexts_alloc;
197 
198 #ifdef HAVE_RES_INIT
199 static	time_t		next_res_init;
200 #endif
201 
202 
203 /* === forward declarations === */
204 static	u_int		reserve_dnschild_ctx(void);
205 static	u_int		get_dnschild_ctx(void);
206 static	dnsworker_ctx *	get_worker_context(blocking_child *, u_int);
207 static	void		scheduled_sleep(time_t, time_t,
208 					dnsworker_ctx *);
209 static	void		manage_dns_retry_interval(time_t *, time_t *,
210 						  int *, time_t *,
211 						  int/*BOOL*/);
212 static	int		should_retry_dns(int, int);
213 #ifdef HAVE_RES_INIT
214 static	void		reload_resolv_conf(dnsworker_ctx *);
215 #else
216 # define		reload_resolv_conf(wc)		\
217 	do {						\
218 		(void)(wc);				\
219 	} while (FALSE)
220 #endif
221 static	void		getaddrinfo_sometime_complete(blocking_work_req,
222 						      void *, size_t,
223 						      void *);
224 static	void		getnameinfo_sometime_complete(blocking_work_req,
225 						      void *, size_t,
226 						      void *);
227 
228 
229 /* === functions === */
230 /*
231  * getaddrinfo_sometime - uses blocking child to call getaddrinfo then
232  *			  invokes provided callback completion function.
233  */
234 int
235 getaddrinfo_sometime_ex(
236 	const char *		node,
237 	const char *		service,
238 	const struct addrinfo *	hints,
239 	int			retry,
240 	gai_sometime_callback	callback,
241 	void *			context,
242 	u_int			qflags
243 	)
244 {
245 	blocking_gai_req *	gai_req;
246 	u_int			idx;
247 	dnschild_ctx *		child_ctx;
248 	size_t			req_size;
249 	size_t			nodesize;
250 	size_t			servsize;
251 	time_t			now;
252 
253 	REQUIRE(NULL != node);
254 	if (NULL != hints) {
255 		REQUIRE(0 == hints->ai_addrlen);
256 		REQUIRE(NULL == hints->ai_addr);
257 		REQUIRE(NULL == hints->ai_canonname);
258 		REQUIRE(NULL == hints->ai_next);
259 	}
260 
261 	idx = get_dnschild_ctx();
262 	child_ctx = dnschild_contexts[idx];
263 
264 	nodesize = strlen(node) + 1;
265 	servsize = strlen(service) + 1;
266 	req_size = sizeof(*gai_req) + nodesize + servsize;
267 
268 	gai_req = emalloc_zero(req_size);
269 
270 	gai_req->octets = req_size;
271 	gai_req->dns_idx = idx;
272 	now = time(NULL);
273 	gai_req->scheduled = now;
274 	gai_req->earliest = max(now, child_ctx->next_dns_timeslot);
275 	child_ctx->next_dns_timeslot = gai_req->earliest;
276 	if (hints != NULL)
277 		gai_req->hints = *hints;
278 	gai_req->retry = retry;
279 	gai_req->callback = callback;
280 	gai_req->context = context;
281 	gai_req->nodesize = nodesize;
282 	gai_req->servsize = servsize;
283 	gai_req->qflags = qflags;
284 
285 	memcpy((char *)gai_req + sizeof(*gai_req), node, nodesize);
286 	memcpy((char *)gai_req + sizeof(*gai_req) + nodesize, service,
287 	       servsize);
288 
289 	if (queue_blocking_request(
290 		BLOCKING_GETADDRINFO,
291 		gai_req,
292 		req_size,
293 		&getaddrinfo_sometime_complete,
294 		gai_req)) {
295 
296 		msyslog(LOG_ERR, "unable to queue getaddrinfo request");
297 		errno = EFAULT;
298 		return -1;
299 	}
300 
301 	return 0;
302 }
303 
304 int
305 blocking_getaddrinfo(
306 	blocking_child *	c,
307 	blocking_pipe_header *	req
308 	)
309 {
310 	blocking_gai_req *	gai_req;
311 	dnsworker_ctx *		worker_ctx;
312 	blocking_pipe_header *	resp;
313 	blocking_gai_resp *	gai_resp;
314 	char *			node;
315 	char *			service;
316 	struct addrinfo *	ai_res;
317 	struct addrinfo *	ai;
318 	struct addrinfo *	serialized_ai;
319 	size_t			canons_octets;
320 	size_t			this_octets;
321 	size_t			resp_octets;
322 	char *			cp;
323 	time_t			time_now;
324 
325 	gai_req = (void *)((char *)req + sizeof(*req));
326 	node = (char *)gai_req + sizeof(*gai_req);
327 	service = node + gai_req->nodesize;
328 
329 	worker_ctx = get_worker_context(c, gai_req->dns_idx);
330 	scheduled_sleep(gai_req->scheduled, gai_req->earliest,
331 			worker_ctx);
332 	reload_resolv_conf(worker_ctx);
333 
334 	/*
335 	 * Take a shot at the final size, better to overestimate
336 	 * at first and then realloc to a smaller size.
337 	 */
338 
339 	resp_octets = sizeof(*resp) + sizeof(*gai_resp) +
340 		      16 * (sizeof(struct addrinfo) +
341 			    sizeof(sockaddr_u)) +
342 		      256;
343 	resp = emalloc_zero(resp_octets);
344 	gai_resp = (void *)(resp + 1);
345 
346 	TRACE(2, ("blocking_getaddrinfo given node %s serv %s fam %d flags %x\n",
347 		  node, service, gai_req->hints.ai_family,
348 		  gai_req->hints.ai_flags));
349 #ifdef DEBUG
350 	if (debug >= 2)
351 		fflush(stdout);
352 #endif
353 	ai_res = NULL;
354 	gai_resp->retcode = getaddrinfo(node, service, &gai_req->hints,
355 					&ai_res);
356 	gai_resp->retry = gai_req->retry;
357 #ifdef EAI_SYSTEM
358 	if (EAI_SYSTEM == gai_resp->retcode)
359 		gai_resp->gai_errno = errno;
360 #endif
361 	canons_octets = 0;
362 
363 	if (0 == gai_resp->retcode) {
364 		ai = ai_res;
365 		while (NULL != ai) {
366 			gai_resp->ai_count++;
367 			if (ai->ai_canonname)
368 				canons_octets += strlen(ai->ai_canonname) + 1;
369 			ai = ai->ai_next;
370 		}
371 		/*
372 		 * If this query succeeded only after retrying, DNS may have
373 		 * just become responsive.  Ignore previously-scheduled
374 		 * retry sleeps once for each pending request, similar to
375 		 * the way scheduled_sleep() does when its worker_sleep()
376 		 * is interrupted.
377 		 */
378 		if (gai_resp->retry > INITIAL_DNS_RETRY) {
379 			time_now = time(NULL);
380 			worker_ctx->ignore_scheduled_before = time_now;
381 			TRACE(1, ("DNS success after retry, ignoring sleeps scheduled before now (%s)\n",
382 				  humantime(time_now)));
383 		}
384 	}
385 
386 	/*
387 	 * Our response consists of a header, followed by ai_count
388 	 * addrinfo structs followed by ai_count sockaddr_storage
389 	 * structs followed by the canonical names.
390 	 */
391 	gai_resp->octets = sizeof(*gai_resp)
392 			    + gai_resp->ai_count
393 				* (sizeof(gai_req->hints)
394 				   + sizeof(sockaddr_u))
395 			    + canons_octets;
396 
397 	resp_octets = sizeof(*resp) + gai_resp->octets;
398 	resp = erealloc(resp, resp_octets);
399 	gai_resp = (void *)(resp + 1);
400 
401 	/* cp serves as our current pointer while serializing */
402 	cp = (void *)(gai_resp + 1);
403 	canons_octets = 0;
404 
405 	if (0 == gai_resp->retcode) {
406 		ai = ai_res;
407 		while (NULL != ai) {
408 			memcpy(cp, ai, sizeof(*ai));
409 			serialized_ai = (void *)cp;
410 			cp += sizeof(*ai);
411 
412 			/* transform ai_canonname into offset */
413 			if (NULL != serialized_ai->ai_canonname) {
414 				serialized_ai->ai_canonname = (char *)canons_octets;
415 				canons_octets += strlen(ai->ai_canonname) + 1;
416 			}
417 
418 			/* leave fixup of ai_addr pointer for receiver */
419 
420 			ai = ai->ai_next;
421 		}
422 
423 		ai = ai_res;
424 		while (NULL != ai) {
425 			INSIST(ai->ai_addrlen <= sizeof(sockaddr_u));
426 			memcpy(cp, ai->ai_addr, ai->ai_addrlen);
427 			cp += sizeof(sockaddr_u);
428 
429 			ai = ai->ai_next;
430 		}
431 
432 		ai = ai_res;
433 		while (NULL != ai) {
434 			if (NULL != ai->ai_canonname) {
435 				this_octets = strlen(ai->ai_canonname) + 1;
436 				memcpy(cp, ai->ai_canonname, this_octets);
437 				cp += this_octets;
438 			}
439 
440 			ai = ai->ai_next;
441 		}
442 		freeaddrinfo(ai_res);
443 	}
444 
445 	/*
446 	 * make sure our walk and earlier calc match
447 	 */
448 	DEBUG_INSIST((size_t)(cp - (char *)resp) == resp_octets);
449 
450 	if (queue_blocking_response(c, resp, resp_octets, req)) {
451 		msyslog(LOG_ERR, "blocking_getaddrinfo can not queue response");
452 		return -1;
453 	}
454 
455 	return 0;
456 }
457 
458 int
459 getaddrinfo_sometime(
460 	const char *		node,
461 	const char *		service,
462 	const struct addrinfo *	hints,
463 	int			retry,
464 	gai_sometime_callback	callback,
465 	void *			context
466 	)
467 {
468 	return getaddrinfo_sometime_ex(node, service, hints, retry,
469 				       callback, context, 0);
470 }
471 
472 
473 static void
474 getaddrinfo_sometime_complete(
475 	blocking_work_req	rtype,
476 	void *			context,
477 	size_t			respsize,
478 	void *			resp
479 	)
480 {
481 	blocking_gai_req *	gai_req;
482 	blocking_gai_resp *	gai_resp;
483 	dnschild_ctx *		child_ctx;
484 	struct addrinfo *	ai;
485 	struct addrinfo *	next_ai;
486 	sockaddr_u *		psau;
487 	char *			node;
488 	char *			service;
489 	char *			canon_start;
490 	time_t			time_now;
491 	int			again, noerr;
492 	int			af;
493 	const char *		fam_spec;
494 	int			i;
495 
496 	gai_req = context;
497 	gai_resp = resp;
498 
499 	DEBUG_REQUIRE(BLOCKING_GETADDRINFO == rtype);
500 	DEBUG_REQUIRE(respsize == gai_resp->octets);
501 
502 	node = (char *)gai_req + sizeof(*gai_req);
503 	service = node + gai_req->nodesize;
504 
505 	child_ctx = dnschild_contexts[gai_req->dns_idx];
506 
507 	if (0 == gai_resp->retcode) {
508 		/*
509 		 * If this query succeeded only after retrying, DNS may have
510 		 * just become responsive.
511 		 */
512 		if (gai_resp->retry > INITIAL_DNS_RETRY) {
513 			time_now = time(NULL);
514 			child_ctx->next_dns_timeslot = time_now;
515 			TRACE(1, ("DNS success after retry, %u next_dns_timeslot reset (%s)\n",
516 				  gai_req->dns_idx, humantime(time_now)));
517 		}
518 	} else {
519 		noerr = !!(gai_req->qflags & GAIR_F_IGNDNSERR);
520 		again = noerr || should_retry_dns(
521 					gai_resp->retcode, gai_resp->gai_errno);
522 		/*
523 		 * exponential backoff of DNS retries to 64s
524 		 */
525 		if (gai_req->retry > 0 && again) {
526 			/* log the first retry only */
527 			if (INITIAL_DNS_RETRY == gai_req->retry)
528 				NLOG(NLOG_SYSINFO) {
529 					af = gai_req->hints.ai_family;
530 					fam_spec = (AF_INET6 == af)
531 						       ? " (AAAA)"
532 						       : (AF_INET == af)
533 							     ? " (A)"
534 							     : "";
535 #ifdef EAI_SYSTEM
536 					if (EAI_SYSTEM == gai_resp->retcode) {
537 						errno = gai_resp->gai_errno;
538 						msyslog(LOG_INFO,
539 							"retrying DNS %s%s: EAI_SYSTEM %d: %m",
540 							node, fam_spec,
541 							gai_resp->gai_errno);
542 					} else
543 #endif
544 						msyslog(LOG_INFO,
545 							"retrying DNS %s%s: %s (%d)",
546 							node, fam_spec,
547 							gai_strerror(gai_resp->retcode),
548 							gai_resp->retcode);
549 				}
550 			manage_dns_retry_interval(
551 				&gai_req->scheduled, &gai_req->earliest,
552 				&gai_req->retry, &child_ctx->next_dns_timeslot,
553 				noerr);
554 			if (!queue_blocking_request(
555 					BLOCKING_GETADDRINFO,
556 					gai_req,
557 					gai_req->octets,
558 					&getaddrinfo_sometime_complete,
559 					gai_req))
560 				return;
561 			else
562 				msyslog(LOG_ERR,
563 					"unable to retry hostname %s",
564 					node);
565 		}
566 	}
567 
568 	/*
569 	 * fixup pointers in returned addrinfo array
570 	 */
571 	ai = (void *)((char *)gai_resp + sizeof(*gai_resp));
572 	next_ai = NULL;
573 	for (i = gai_resp->ai_count - 1; i >= 0; i--) {
574 		ai[i].ai_next = next_ai;
575 		next_ai = &ai[i];
576 	}
577 
578 	psau = (void *)((char *)ai + gai_resp->ai_count * sizeof(*ai));
579 	canon_start = (char *)psau + gai_resp->ai_count * sizeof(*psau);
580 
581 	for (i = 0; i < gai_resp->ai_count; i++) {
582 		if (NULL != ai[i].ai_addr)
583 			ai[i].ai_addr = &psau->sa;
584 		psau++;
585 		if (NULL != ai[i].ai_canonname)
586 			ai[i].ai_canonname += (size_t)canon_start;
587 	}
588 
589 	ENSURE((char *)psau == canon_start);
590 
591 	if (!gai_resp->ai_count)
592 		ai = NULL;
593 
594 	(*gai_req->callback)(gai_resp->retcode, gai_resp->gai_errno,
595 			     gai_req->context, node, service,
596 			     &gai_req->hints, ai);
597 
598 	free(gai_req);
599 	/* gai_resp is part of block freed by process_blocking_resp() */
600 }
601 
602 
603 #ifdef TEST_BLOCKING_WORKER
604 void gai_test_callback(int rescode, int gai_errno, void *context, const char *name, const char *service, const struct addrinfo *hints, const struct addrinfo *ai_res)
605 {
606 	sockaddr_u addr;
607 
608 	if (rescode) {
609 		TRACE(1, ("gai_test_callback context %p error rescode %d %s serv %s\n",
610 			  context, rescode, name, service));
611 		return;
612 	}
613 	while (!rescode && NULL != ai_res) {
614 		ZERO_SOCK(&addr);
615 		memcpy(&addr, ai_res->ai_addr, ai_res->ai_addrlen);
616 		TRACE(1, ("ctx %p fam %d addr %s canon '%s' type %s at %p ai_addr %p ai_next %p\n",
617 			  context,
618 			  AF(&addr),
619 			  stoa(&addr),
620 			  (ai_res->ai_canonname)
621 			      ? ai_res->ai_canonname
622 			      : "",
623 			  (SOCK_DGRAM == ai_res->ai_socktype)
624 			      ? "DGRAM"
625 			      : (SOCK_STREAM == ai_res->ai_socktype)
626 				    ? "STREAM"
627 				    : "(other)",
628 			  ai_res,
629 			  ai_res->ai_addr,
630 			  ai_res->ai_next));
631 
632 		getnameinfo_sometime((sockaddr_u *)ai_res->ai_addr, 128, 32, 0, gni_test_callback, context);
633 
634 		ai_res = ai_res->ai_next;
635 	}
636 }
637 #endif	/* TEST_BLOCKING_WORKER */
638 
639 
640 int
641 getnameinfo_sometime(
642 	sockaddr_u *		psau,
643 	size_t			hostoctets,
644 	size_t			servoctets,
645 	int			flags,
646 	gni_sometime_callback	callback,
647 	void *			context
648 	)
649 {
650 	blocking_gni_req *	gni_req;
651 	u_int			idx;
652 	dnschild_ctx *		child_ctx;
653 	time_t			time_now;
654 
655 	REQUIRE(hostoctets);
656 	REQUIRE(hostoctets + servoctets < 1024);
657 
658 	idx = get_dnschild_ctx();
659 	child_ctx = dnschild_contexts[idx];
660 
661 	gni_req = emalloc_zero(sizeof(*gni_req));
662 
663 	gni_req->octets = sizeof(*gni_req);
664 	gni_req->dns_idx = idx;
665 	time_now = time(NULL);
666 	gni_req->scheduled = time_now;
667 	gni_req->earliest = max(time_now, child_ctx->next_dns_timeslot);
668 	child_ctx->next_dns_timeslot = gni_req->earliest;
669 	memcpy(&gni_req->socku, psau, SOCKLEN(psau));
670 	gni_req->hostoctets = hostoctets;
671 	gni_req->servoctets = servoctets;
672 	gni_req->flags = flags;
673 	gni_req->retry = INITIAL_DNS_RETRY;
674 	gni_req->callback = callback;
675 	gni_req->context = context;
676 
677 	if (queue_blocking_request(
678 		BLOCKING_GETNAMEINFO,
679 		gni_req,
680 		sizeof(*gni_req),
681 		&getnameinfo_sometime_complete,
682 		gni_req)) {
683 
684 		msyslog(LOG_ERR, "unable to queue getnameinfo request");
685 		errno = EFAULT;
686 		return -1;
687 	}
688 
689 	return 0;
690 }
691 
692 
693 int
694 blocking_getnameinfo(
695 	blocking_child *	c,
696 	blocking_pipe_header *	req
697 	)
698 {
699 	blocking_gni_req *	gni_req;
700 	dnsworker_ctx *		worker_ctx;
701 	blocking_pipe_header *	resp;
702 	blocking_gni_resp *	gni_resp;
703 	size_t			octets;
704 	size_t			resp_octets;
705 	char *			service;
706 	char *			cp;
707 	int			rc;
708 	time_t			time_now;
709 	char			host[1024];
710 
711 	gni_req = (void *)((char *)req + sizeof(*req));
712 
713 	octets = gni_req->hostoctets + gni_req->servoctets;
714 
715 	/*
716 	 * Some alloca() implementations are fragile regarding
717 	 * large allocations.  We only need room for the host
718 	 * and service names.
719 	 */
720 	REQUIRE(octets < sizeof(host));
721 	service = host + gni_req->hostoctets;
722 
723 	worker_ctx = get_worker_context(c, gni_req->dns_idx);
724 	scheduled_sleep(gni_req->scheduled, gni_req->earliest,
725 			worker_ctx);
726 	reload_resolv_conf(worker_ctx);
727 
728 	/*
729 	 * Take a shot at the final size, better to overestimate
730 	 * then realloc to a smaller size.
731 	 */
732 
733 	resp_octets = sizeof(*resp) + sizeof(*gni_resp) + octets;
734 	resp = emalloc_zero(resp_octets);
735 	gni_resp = (void *)((char *)resp + sizeof(*resp));
736 
737 	TRACE(2, ("blocking_getnameinfo given addr %s flags 0x%x hostlen %lu servlen %lu\n",
738 		  stoa(&gni_req->socku), gni_req->flags,
739 		  (u_long)gni_req->hostoctets, (u_long)gni_req->servoctets));
740 
741 	gni_resp->retcode = getnameinfo(&gni_req->socku.sa,
742 					SOCKLEN(&gni_req->socku),
743 					host,
744 					gni_req->hostoctets,
745 					service,
746 					gni_req->servoctets,
747 					gni_req->flags);
748 	gni_resp->retry = gni_req->retry;
749 #ifdef EAI_SYSTEM
750 	if (EAI_SYSTEM == gni_resp->retcode)
751 		gni_resp->gni_errno = errno;
752 #endif
753 
754 	if (0 != gni_resp->retcode) {
755 		gni_resp->hostoctets = 0;
756 		gni_resp->servoctets = 0;
757 	} else {
758 		gni_resp->hostoctets = strlen(host) + 1;
759 		gni_resp->servoctets = strlen(service) + 1;
760 		/*
761 		 * If this query succeeded only after retrying, DNS may have
762 		 * just become responsive.  Ignore previously-scheduled
763 		 * retry sleeps once for each pending request, similar to
764 		 * the way scheduled_sleep() does when its worker_sleep()
765 		 * is interrupted.
766 		 */
767 		if (gni_req->retry > INITIAL_DNS_RETRY) {
768 			time_now = time(NULL);
769 			worker_ctx->ignore_scheduled_before = time_now;
770 			TRACE(1, ("DNS success after retrying, ignoring sleeps scheduled before now (%s)\n",
771 				humantime(time_now)));
772 		}
773 	}
774 	octets = gni_resp->hostoctets + gni_resp->servoctets;
775 	/*
776 	 * Our response consists of a header, followed by the host and
777 	 * service strings, each null-terminated.
778 	 */
779 	resp_octets = sizeof(*resp) + sizeof(*gni_resp) + octets;
780 
781 	resp = erealloc(resp, resp_octets);
782 	gni_resp = (void *)(resp + 1);
783 
784 	gni_resp->octets = sizeof(*gni_resp) + octets;
785 
786 	/* cp serves as our current pointer while serializing */
787 	cp = (void *)(gni_resp + 1);
788 
789 	if (0 == gni_resp->retcode) {
790 		memcpy(cp, host, gni_resp->hostoctets);
791 		cp += gni_resp->hostoctets;
792 		memcpy(cp, service, gni_resp->servoctets);
793 		cp += gni_resp->servoctets;
794 	}
795 
796 	INSIST((size_t)(cp - (char *)resp) == resp_octets);
797 	INSIST(resp_octets - sizeof(*resp) == gni_resp->octets);
798 
799 	rc = queue_blocking_response(c, resp, resp_octets, req);
800 	if (rc)
801 		msyslog(LOG_ERR, "blocking_getnameinfo unable to queue response");
802 	return rc;
803 }
804 
805 
806 static void
807 getnameinfo_sometime_complete(
808 	blocking_work_req	rtype,
809 	void *			context,
810 	size_t			respsize,
811 	void *			resp
812 	)
813 {
814 	blocking_gni_req *	gni_req;
815 	blocking_gni_resp *	gni_resp;
816 	dnschild_ctx *		child_ctx;
817 	char *			host;
818 	char *			service;
819 	time_t			time_now;
820 	int			again;
821 
822 	gni_req = context;
823 	gni_resp = resp;
824 
825 	DEBUG_REQUIRE(BLOCKING_GETNAMEINFO == rtype);
826 	DEBUG_REQUIRE(respsize == gni_resp->octets);
827 
828 	child_ctx = dnschild_contexts[gni_req->dns_idx];
829 
830 	if (0 == gni_resp->retcode) {
831 		/*
832 		 * If this query succeeded only after retrying, DNS may have
833 		 * just become responsive.
834 		 */
835 		if (gni_resp->retry > INITIAL_DNS_RETRY) {
836 			time_now = time(NULL);
837 			child_ctx->next_dns_timeslot = time_now;
838 			TRACE(1, ("DNS success after retry, %u next_dns_timeslot reset (%s)\n",
839 				  gni_req->dns_idx, humantime(time_now)));
840 		}
841 	} else {
842 		again = should_retry_dns(gni_resp->retcode, gni_resp->gni_errno);
843 		/*
844 		 * exponential backoff of DNS retries to 64s
845 		 */
846 		if (gni_req->retry > 0)
847 			manage_dns_retry_interval(&gni_req->scheduled,
848 			    &gni_req->earliest, &gni_req->retry,
849 						  &child_ctx->next_dns_timeslot, FALSE);
850 
851 		if (gni_req->retry > 0 && again) {
852 			if (!queue_blocking_request(
853 				BLOCKING_GETNAMEINFO,
854 				gni_req,
855 				gni_req->octets,
856 				&getnameinfo_sometime_complete,
857 				gni_req))
858 				return;
859 
860 			msyslog(LOG_ERR, "unable to retry reverse lookup of %s", stoa(&gni_req->socku));
861 		}
862 	}
863 
864 	if (!gni_resp->hostoctets) {
865 		host = NULL;
866 		service = NULL;
867 	} else {
868 		host = (char *)gni_resp + sizeof(*gni_resp);
869 		service = (gni_resp->servoctets)
870 			      ? host + gni_resp->hostoctets
871 			      : NULL;
872 	}
873 
874 	(*gni_req->callback)(gni_resp->retcode, gni_resp->gni_errno,
875 			     &gni_req->socku, gni_req->flags, host,
876 			     service, gni_req->context);
877 
878 	free(gni_req);
879 	/* gni_resp is part of block freed by process_blocking_resp() */
880 }
881 
882 
883 #ifdef TEST_BLOCKING_WORKER
884 void gni_test_callback(int rescode, int gni_errno, sockaddr_u *psau, int flags, const char *host, const char *service, void *context)
885 {
886 	if (!rescode)
887 		TRACE(1, ("gni_test_callback got host '%s' serv '%s' for addr %s context %p\n",
888 			  host, service, stoa(psau), context));
889 	else
890 		TRACE(1, ("gni_test_callback context %p rescode %d gni_errno %d flags 0x%x addr %s\n",
891 			  context, rescode, gni_errno, flags, stoa(psau)));
892 }
893 #endif	/* TEST_BLOCKING_WORKER */
894 
895 
896 #ifdef HAVE_RES_INIT
897 static void
898 reload_resolv_conf(
899 	dnsworker_ctx *	worker_ctx
900 	)
901 {
902 	time_t	time_now;
903 
904 	/*
905 	 * This is ad-hoc.  Reload /etc/resolv.conf once per minute
906 	 * to pick up on changes from the DHCP client.  [Bug 1226]
907 	 * When using threads for the workers, this needs to happen
908 	 * only once per minute process-wide.
909 	 */
910 	time_now = time(NULL);
911 # ifdef WORK_THREAD
912 	worker_ctx->next_res_init = next_res_init;
913 # endif
914 	if (worker_ctx->next_res_init <= time_now) {
915 		if (worker_ctx->next_res_init != 0)
916 			res_init();
917 		worker_ctx->next_res_init = time_now + 60;
918 # ifdef WORK_THREAD
919 		next_res_init = worker_ctx->next_res_init;
920 # endif
921 	}
922 }
923 #endif	/* HAVE_RES_INIT */
924 
925 
926 static u_int
927 reserve_dnschild_ctx(void)
928 {
929 	const size_t	ps = sizeof(dnschild_contexts[0]);
930 	const size_t	cs = sizeof(*dnschild_contexts[0]);
931 	u_int		c;
932 	u_int		new_alloc;
933 	size_t		octets;
934 	size_t		new_octets;
935 
936 	c = 0;
937 	while (TRUE) {
938 		for ( ; c < dnschild_contexts_alloc; c++) {
939 			if (NULL == dnschild_contexts[c]) {
940 				dnschild_contexts[c] = emalloc_zero(cs);
941 
942 				return c;
943 			}
944 		}
945 		new_alloc = dnschild_contexts_alloc + 20;
946 		new_octets = new_alloc * ps;
947 		octets = dnschild_contexts_alloc * ps;
948 		dnschild_contexts = erealloc_zero(dnschild_contexts,
949 						  new_octets, octets);
950 		dnschild_contexts_alloc = new_alloc;
951 	}
952 }
953 
954 
955 static u_int
956 get_dnschild_ctx(void)
957 {
958 	static u_int	shared_ctx = UINT_MAX;
959 
960 	if (worker_per_query)
961 		return reserve_dnschild_ctx();
962 
963 	if (UINT_MAX == shared_ctx)
964 		shared_ctx = reserve_dnschild_ctx();
965 
966 	return shared_ctx;
967 }
968 
969 
970 static dnsworker_ctx *
971 get_worker_context(
972 	blocking_child *	c,
973 	u_int			idx
974 	)
975 {
976 	u_int		min_new_alloc;
977 	u_int		new_alloc;
978 	size_t		octets;
979 	size_t		new_octets;
980 	dnsworker_ctx *	retv;
981 
982 	worker_global_lock(TRUE);
983 
984 	if (dnsworker_contexts_alloc <= idx) {
985 		min_new_alloc = 1 + idx;
986 		/* round new_alloc up to nearest multiple of 4 */
987 		new_alloc = (min_new_alloc + 4) & ~(4 - 1);
988 		new_octets = new_alloc * sizeof(dnsworker_ctx*);
989 		octets = dnsworker_contexts_alloc * sizeof(dnsworker_ctx*);
990 		dnsworker_contexts = erealloc_zero(dnsworker_contexts,
991 						   new_octets, octets);
992 		dnsworker_contexts_alloc = new_alloc;
993 		retv = emalloc_zero(sizeof(dnsworker_ctx));
994 		dnsworker_contexts[idx] = retv;
995 	} else if (NULL == (retv = dnsworker_contexts[idx])) {
996 		retv = emalloc_zero(sizeof(dnsworker_ctx));
997 		dnsworker_contexts[idx] = retv;
998 	}
999 
1000 	worker_global_lock(FALSE);
1001 
1002 	ZERO(*retv);
1003 	retv->c = c;
1004 	return retv;
1005 }
1006 
1007 
1008 static void
1009 scheduled_sleep(
1010 	time_t		scheduled,
1011 	time_t		earliest,
1012 	dnsworker_ctx *	worker_ctx
1013 	)
1014 {
1015 	time_t now;
1016 
1017 	if (scheduled < worker_ctx->ignore_scheduled_before) {
1018 		TRACE(1, ("ignoring sleep until %s scheduled at %s (before %s)\n",
1019 			  humantime(earliest), humantime(scheduled),
1020 			  humantime(worker_ctx->ignore_scheduled_before)));
1021 		return;
1022 	}
1023 
1024 	now = time(NULL);
1025 
1026 	if (now < earliest) {
1027 		TRACE(1, ("sleep until %s scheduled at %s (>= %s)\n",
1028 			  humantime(earliest), humantime(scheduled),
1029 			  humantime(worker_ctx->ignore_scheduled_before)));
1030 		if (-1 == worker_sleep(worker_ctx->c, earliest - now)) {
1031 			/* our sleep was interrupted */
1032 			now = time(NULL);
1033 			worker_ctx->ignore_scheduled_before = now;
1034 #ifdef HAVE_RES_INIT
1035 			worker_ctx->next_res_init = now + 60;
1036 			next_res_init = worker_ctx->next_res_init;
1037 			res_init();
1038 #endif
1039 			TRACE(1, ("sleep interrupted by daemon, ignoring sleeps scheduled before now (%s)\n",
1040 				  humantime(worker_ctx->ignore_scheduled_before)));
1041 		}
1042 	}
1043 }
1044 
1045 
1046 /*
1047  * manage_dns_retry_interval is a helper used by
1048  * getaddrinfo_sometime_complete and getnameinfo_sometime_complete
1049  * to calculate the new retry interval and schedule the next query.
1050  */
1051 static void
1052 manage_dns_retry_interval(
1053 	time_t *	pscheduled,
1054 	time_t *	pwhen,
1055 	int *		pretry,
1056 	time_t *	pnext_timeslot,
1057 	int		forever
1058 	)
1059 {
1060 	time_t	now;
1061 	time_t	when;
1062 	int	retry;
1063 	int	retmax;
1064 
1065 	now = time(NULL);
1066 	retry = *pretry;
1067 	when = max(now + retry, *pnext_timeslot);
1068 	*pnext_timeslot = when;
1069 
1070 	/* this exponential backoff is slower than doubling up: The
1071 	 * sequence goes 2-3-4-6-8-12-16-24-32... and the upper limit is
1072 	 * 64 seconds for things that should not repeat forever, and
1073 	 * 1024 when repeated forever.
1074 	 */
1075 	retmax = forever ? 1024 : 64;
1076 	retry <<= 1;
1077 	if (retry & (retry - 1))
1078 		retry &= (retry - 1);
1079 	else
1080 		retry -= (retry >> 2);
1081 	retry = min(retmax, retry);
1082 
1083 	*pscheduled = now;
1084 	*pwhen = when;
1085 	*pretry = retry;
1086 }
1087 
1088 /*
1089  * should_retry_dns is a helper used by getaddrinfo_sometime_complete
1090  * and getnameinfo_sometime_complete which implements ntpd's DNS retry
1091  * policy.
1092  */
1093 static int
1094 should_retry_dns(
1095 	int	rescode,
1096 	int	res_errno
1097 	)
1098 {
1099 	static int	eai_again_seen;
1100 	int		again;
1101 #if defined (EAI_SYSTEM) && defined(DEBUG)
1102 	char		msg[256];
1103 #endif
1104 
1105 	/*
1106 	 * If the resolver failed, see if the failure is
1107 	 * temporary. If so, return success.
1108 	 */
1109 	again = 0;
1110 
1111 	switch (rescode) {
1112 
1113 	case EAI_FAIL:
1114 		again = 1;
1115 		break;
1116 
1117 	case EAI_AGAIN:
1118 		again = 1;
1119 		eai_again_seen = 1;		/* [Bug 1178] */
1120 		break;
1121 
1122 	case EAI_NONAME:
1123 #if defined(EAI_NODATA) && (EAI_NODATA != EAI_NONAME)
1124 	case EAI_NODATA:
1125 #endif
1126 		again = !eai_again_seen;	/* [Bug 1178] */
1127 		break;
1128 
1129 #ifdef EAI_SYSTEM
1130 	case EAI_SYSTEM:
1131 		/*
1132 		 * EAI_SYSTEM means the real error is in errno.  We should be more
1133 		 * discriminating about which errno values require retrying, but
1134 		 * this matches existing behavior.
1135 		 */
1136 		again = 1;
1137 # ifdef DEBUG
1138 		errno_to_str(res_errno, msg, sizeof(msg));
1139 		TRACE(1, ("intres: EAI_SYSTEM errno %d (%s) means try again, right?\n",
1140 			  res_errno, msg));
1141 # endif
1142 		break;
1143 #endif
1144 	}
1145 
1146 	TRACE(2, ("intres: resolver returned: %s (%d), %sretrying\n",
1147 		  gai_strerror(rescode), rescode, again ? "" : "not "));
1148 
1149 	return again;
1150 }
1151 
1152 #else	/* !WORKER follows */
1153 int ntp_intres_nonempty_compilation_unit;
1154 #endif
1155