xref: /freebsd/contrib/ntp/libntp/ntp_intres.c (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 /*
2  * ntp_intres.c - Implements a generic blocking worker child or thread,
3  *		  initially to provide a nonblocking solution for DNS
4  *		  name to address lookups available with getaddrinfo().
5  *
6  * This is a new implementation as of 2009 sharing the filename and
7  * very little else with the prior implementation, which used a
8  * temporary file to receive a single set of requests from the parent,
9  * and a NTP mode 7 authenticated request to push back responses.
10  *
11  * A primary goal in rewriting this code was the need to support the
12  * pool configuration directive's requirement to retrieve multiple
13  * addresses resolving a single name, which has previously been
14  * satisfied with blocking resolver calls from the ntpd mainline code.
15  *
16  * A secondary goal is to provide a generic mechanism for other
17  * blocking operations to be delegated to a worker using a common
18  * model for both Unix and Windows ntpd.  ntp_worker.c, work_fork.c,
19  * and work_thread.c implement the generic mechanism.  This file
20  * implements the two current consumers, getaddrinfo_sometime() and the
21  * presently unused getnameinfo_sometime().
22  *
23  * Both routines deliver results to a callback and manage memory
24  * allocation, meaning there is no freeaddrinfo_sometime().
25  *
26  * The initial implementation for Unix uses a pair of unidirectional
27  * pipes, one each for requests and responses, connecting the forked
28  * blocking child worker with the ntpd mainline.  The threaded code
29  * uses arrays of pointers to queue requests and responses.
30  *
31  * The parent drives the process, including scheduling sleeps between
32  * retries.
33  *
34  * Memory is managed differently for a child process, which mallocs
35  * request buffers to read from the pipe into, whereas the threaded
36  * code mallocs a copy of the request to hand off to the worker via
37  * the queueing array.  The resulting request buffer is free()d by
38  * platform-independent code.  A wrinkle is the request needs to be
39  * available to the requestor during response processing.
40  *
41  * Response memory allocation is also platform-dependent.  With a
42  * separate process and pipes, the response is free()d after being
43  * written to the pipe.  With threads, the same memory is handed
44  * over and the requestor frees it after processing is completed.
45  *
46  * The code should be generalized to support threads on Unix using
47  * much of the same code used for Windows initially.
48  *
49  */
50 #ifdef HAVE_CONFIG_H
51 # include <config.h>
52 #endif
53 
54 #include "ntp_workimpl.h"
55 
56 #ifdef WORKER
57 
58 #include <stdio.h>
59 #include <ctype.h>
60 #include <signal.h>
61 
62 /**/
63 #ifdef HAVE_SYS_TYPES_H
64 # include <sys/types.h>
65 #endif
66 #ifdef HAVE_NETINET_IN_H
67 #include <netinet/in.h>
68 #endif
69 #include <arpa/inet.h>
70 /**/
71 #ifdef HAVE_SYS_PARAM_H
72 # include <sys/param.h>
73 #endif
74 
75 #if !defined(HAVE_RES_INIT) && defined(HAVE___RES_INIT)
76 # define HAVE_RES_INIT
77 #endif
78 
79 #if defined(HAVE_RESOLV_H) && defined(HAVE_RES_INIT)
80 # ifdef HAVE_ARPA_NAMESER_H
81 #  include <arpa/nameser.h> /* DNS HEADER struct */
82 # endif
83 # ifdef HAVE_NETDB_H
84 #  include <netdb.h>
85 # endif
86 # include <resolv.h>
87 #endif
88 
89 #include "ntp.h"
90 #include "ntp_debug.h"
91 #include "ntp_malloc.h"
92 #include "ntp_syslog.h"
93 #include "ntp_unixtime.h"
94 #include "ntp_intres.h"
95 #include "intreswork.h"
96 
97 
98 /*
99  * Following are implementations of getaddrinfo_sometime() and
100  * getnameinfo_sometime().  Each is implemented in three routines:
101  *
102  * getaddrinfo_sometime()		getnameinfo_sometime()
103  * blocking_getaddrinfo()		blocking_getnameinfo()
104  * getaddrinfo_sometime_complete()	getnameinfo_sometime_complete()
105  *
106  * The first runs in the parent and marshalls (or serializes) request
107  * parameters into a request blob which is processed in the child by
108  * the second routine, blocking_*(), which serializes the results into
109  * a response blob unpacked by the third routine, *_complete(), which
110  * calls the callback routine provided with the request and frees
111  * _request_ memory allocated by the first routine.  Response memory
112  * is managed by the code which calls the *_complete routines.
113  */
114 
115 
116 /* === typedefs === */
117 typedef struct blocking_gai_req_tag {	/* marshalled args */
118 	size_t			octets;
119 	u_int			dns_idx;
120 	time_t			scheduled;
121 	time_t			earliest;
122 	int			retry;
123 	struct addrinfo		hints;
124 	u_int			qflags;
125 	gai_sometime_callback	callback;
126 	void *			context;
127 	size_t			nodesize;
128 	size_t			servsize;
129 } blocking_gai_req;
130 
131 typedef struct blocking_gai_resp_tag {
132 	size_t			octets;
133 	int			retcode;
134 	int			retry;
135 	int			gai_errno; /* for EAI_SYSTEM case */
136 	int			ai_count;
137 	/*
138 	 * Followed by ai_count struct addrinfo and then ai_count
139 	 * sockaddr_u and finally the canonical name strings.
140 	 */
141 } blocking_gai_resp;
142 
143 typedef struct blocking_gni_req_tag {
144 	size_t			octets;
145 	u_int			dns_idx;
146 	time_t			scheduled;
147 	time_t			earliest;
148 	int			retry;
149 	size_t			hostoctets;
150 	size_t			servoctets;
151 	int			flags;
152 	gni_sometime_callback	callback;
153 	void *			context;
154 	sockaddr_u		socku;
155 } blocking_gni_req;
156 
157 typedef struct blocking_gni_resp_tag {
158 	size_t			octets;
159 	int			retcode;
160 	int			gni_errno; /* for EAI_SYSTEM case */
161 	int			retry;
162 	size_t			hostoctets;
163 	size_t			servoctets;
164 	/*
165 	 * Followed by hostoctets bytes of null-terminated host,
166 	 * then servoctets bytes of null-terminated service.
167 	 */
168 } blocking_gni_resp;
169 
170 /* per-DNS-worker state in parent */
171 typedef struct dnschild_ctx_tag {
172 	u_int	index;
173 	time_t	next_dns_timeslot;
174 } dnschild_ctx;
175 
176 /* per-DNS-worker state in worker */
177 typedef struct dnsworker_ctx_tag {
178 	blocking_child *	c;
179 	time_t			ignore_scheduled_before;
180 #ifdef HAVE_RES_INIT
181 	time_t	next_res_init;
182 #endif
183 } dnsworker_ctx;
184 
185 
186 /* === variables === */
187 dnschild_ctx **		dnschild_contexts;		/* parent */
188 u_int			dnschild_contexts_alloc;
189 dnsworker_ctx **	dnsworker_contexts;		/* child */
190 u_int			dnsworker_contexts_alloc;
191 
192 #ifdef HAVE_RES_INIT
193 static	time_t		next_res_init;
194 #endif
195 
196 
197 /* === forward declarations === */
198 static	u_int		reserve_dnschild_ctx(void);
199 static	u_int		get_dnschild_ctx(void);
200 static	dnsworker_ctx *	get_worker_context(blocking_child *, u_int);
201 static	void		scheduled_sleep(time_t, time_t,
202 					dnsworker_ctx *);
203 static	void		manage_dns_retry_interval(time_t *, time_t *,
204 						  int *, time_t *,
205 						  int/*BOOL*/);
206 static	int		should_retry_dns(int, int);
207 #ifdef HAVE_RES_INIT
208 static	void		reload_resolv_conf(dnsworker_ctx *);
209 #else
210 # define		reload_resolv_conf(wc)		\
211 	do {						\
212 		(void)(wc);				\
213 	} while (FALSE)
214 #endif
215 static	void		getaddrinfo_sometime_complete(blocking_work_req,
216 						      void *, size_t,
217 						      void *);
218 static	void		getnameinfo_sometime_complete(blocking_work_req,
219 						      void *, size_t,
220 						      void *);
221 
222 
223 /* === functions === */
224 /*
225  * getaddrinfo_sometime - uses blocking child to call getaddrinfo then
226  *			  invokes provided callback completion function.
227  */
228 int
229 getaddrinfo_sometime_ex(
230 	const char *		node,
231 	const char *		service,
232 	const struct addrinfo *	hints,
233 	int			retry,
234 	gai_sometime_callback	callback,
235 	void *			context,
236 	u_int			qflags
237 	)
238 {
239 	blocking_gai_req *	gai_req;
240 	u_int			idx;
241 	dnschild_ctx *		child_ctx;
242 	size_t			req_size;
243 	size_t			nodesize;
244 	size_t			servsize;
245 	time_t			now;
246 
247 	REQUIRE(NULL != node);
248 	if (NULL != hints) {
249 		REQUIRE(0 == hints->ai_addrlen);
250 		REQUIRE(NULL == hints->ai_addr);
251 		REQUIRE(NULL == hints->ai_canonname);
252 		REQUIRE(NULL == hints->ai_next);
253 	}
254 
255 	idx = get_dnschild_ctx();
256 	child_ctx = dnschild_contexts[idx];
257 
258 	nodesize = strlen(node) + 1;
259 	servsize = strlen(service) + 1;
260 	req_size = sizeof(*gai_req) + nodesize + servsize;
261 
262 	gai_req = emalloc_zero(req_size);
263 
264 	gai_req->octets = req_size;
265 	gai_req->dns_idx = idx;
266 	now = time(NULL);
267 	gai_req->scheduled = now;
268 	gai_req->earliest = max(now, child_ctx->next_dns_timeslot);
269 	child_ctx->next_dns_timeslot = gai_req->earliest;
270 	if (hints != NULL)
271 		gai_req->hints = *hints;
272 	gai_req->retry = retry;
273 	gai_req->callback = callback;
274 	gai_req->context = context;
275 	gai_req->nodesize = nodesize;
276 	gai_req->servsize = servsize;
277 	gai_req->qflags = qflags;
278 
279 	memcpy((char *)gai_req + sizeof(*gai_req), node, nodesize);
280 	memcpy((char *)gai_req + sizeof(*gai_req) + nodesize, service,
281 	       servsize);
282 
283 	if (queue_blocking_request(
284 		BLOCKING_GETADDRINFO,
285 		gai_req,
286 		req_size,
287 		&getaddrinfo_sometime_complete,
288 		gai_req)) {
289 
290 		msyslog(LOG_ERR, "unable to queue getaddrinfo request");
291 		errno = EFAULT;
292 		return -1;
293 	}
294 
295 	return 0;
296 }
297 
298 int
299 blocking_getaddrinfo(
300 	blocking_child *	c,
301 	blocking_pipe_header *	req
302 	)
303 {
304 	blocking_gai_req *	gai_req;
305 	dnsworker_ctx *		worker_ctx;
306 	blocking_pipe_header *	resp;
307 	blocking_gai_resp *	gai_resp;
308 	char *			node;
309 	char *			service;
310 	struct addrinfo *	ai_res;
311 	struct addrinfo *	ai;
312 	struct addrinfo *	serialized_ai;
313 	size_t			canons_octets;
314 	size_t			this_octets;
315 	size_t			resp_octets;
316 	char *			cp;
317 	time_t			time_now;
318 
319 	gai_req = (void *)((char *)req + sizeof(*req));
320 	node = (char *)gai_req + sizeof(*gai_req);
321 	service = node + gai_req->nodesize;
322 
323 	worker_ctx = get_worker_context(c, gai_req->dns_idx);
324 	scheduled_sleep(gai_req->scheduled, gai_req->earliest,
325 			worker_ctx);
326 	reload_resolv_conf(worker_ctx);
327 
328 	/*
329 	 * Take a shot at the final size, better to overestimate
330 	 * at first and then realloc to a smaller size.
331 	 */
332 
333 	resp_octets = sizeof(*resp) + sizeof(*gai_resp) +
334 		      16 * (sizeof(struct addrinfo) +
335 			    sizeof(sockaddr_u)) +
336 		      256;
337 	resp = emalloc_zero(resp_octets);
338 	gai_resp = (void *)(resp + 1);
339 
340 	TRACE(2, ("blocking_getaddrinfo given node %s serv %s fam %d flags %x\n",
341 		  node, service, gai_req->hints.ai_family,
342 		  gai_req->hints.ai_flags));
343 #ifdef DEBUG
344 	if (debug >= 2)
345 		fflush(stdout);
346 #endif
347 	ai_res = NULL;
348 	gai_resp->retcode = getaddrinfo(node, service, &gai_req->hints,
349 					&ai_res);
350 	gai_resp->retry = gai_req->retry;
351 #ifdef EAI_SYSTEM
352 	if (EAI_SYSTEM == gai_resp->retcode)
353 		gai_resp->gai_errno = errno;
354 #endif
355 	canons_octets = 0;
356 
357 	if (0 == gai_resp->retcode) {
358 		ai = ai_res;
359 		while (NULL != ai) {
360 			gai_resp->ai_count++;
361 			if (ai->ai_canonname)
362 				canons_octets += strlen(ai->ai_canonname) + 1;
363 			ai = ai->ai_next;
364 		}
365 		/*
366 		 * If this query succeeded only after retrying, DNS may have
367 		 * just become responsive.  Ignore previously-scheduled
368 		 * retry sleeps once for each pending request, similar to
369 		 * the way scheduled_sleep() does when its worker_sleep()
370 		 * is interrupted.
371 		 */
372 		if (gai_resp->retry > INITIAL_DNS_RETRY) {
373 			time_now = time(NULL);
374 			worker_ctx->ignore_scheduled_before = time_now;
375 			TRACE(1, ("DNS success after retry, ignoring sleeps scheduled before now (%s)\n",
376 				  humantime(time_now)));
377 		}
378 	}
379 
380 	/*
381 	 * Our response consists of a header, followed by ai_count
382 	 * addrinfo structs followed by ai_count sockaddr_storage
383 	 * structs followed by the canonical names.
384 	 */
385 	gai_resp->octets = sizeof(*gai_resp)
386 			    + gai_resp->ai_count
387 				* (sizeof(gai_req->hints)
388 				   + sizeof(sockaddr_u))
389 			    + canons_octets;
390 
391 	resp_octets = sizeof(*resp) + gai_resp->octets;
392 	resp = erealloc(resp, resp_octets);
393 	gai_resp = (void *)(resp + 1);
394 
395 	/* cp serves as our current pointer while serializing */
396 	cp = (void *)(gai_resp + 1);
397 	canons_octets = 0;
398 
399 	if (0 == gai_resp->retcode) {
400 		ai = ai_res;
401 		while (NULL != ai) {
402 			memcpy(cp, ai, sizeof(*ai));
403 			serialized_ai = (void *)cp;
404 			cp += sizeof(*ai);
405 
406 			/* transform ai_canonname into offset */
407 			if (NULL != ai->ai_canonname) {
408 				serialized_ai->ai_canonname = (char *)canons_octets;
409 				canons_octets += strlen(ai->ai_canonname) + 1;
410 			}
411 
412 			/* leave fixup of ai_addr pointer for receiver */
413 
414 			ai = ai->ai_next;
415 		}
416 
417 		ai = ai_res;
418 		while (NULL != ai) {
419 			INSIST(ai->ai_addrlen <= sizeof(sockaddr_u));
420 			memcpy(cp, ai->ai_addr, ai->ai_addrlen);
421 			cp += sizeof(sockaddr_u);
422 
423 			ai = ai->ai_next;
424 		}
425 
426 		ai = ai_res;
427 		while (NULL != ai) {
428 			if (NULL != ai->ai_canonname) {
429 				this_octets = strlen(ai->ai_canonname) + 1;
430 				memcpy(cp, ai->ai_canonname, this_octets);
431 				cp += this_octets;
432 			}
433 
434 			ai = ai->ai_next;
435 		}
436 		freeaddrinfo(ai_res);
437 	}
438 
439 	/*
440 	 * make sure our walk and earlier calc match
441 	 */
442 	DEBUG_INSIST((size_t)(cp - (char *)resp) == resp_octets);
443 
444 	if (queue_blocking_response(c, resp, resp_octets, req)) {
445 		msyslog(LOG_ERR, "blocking_getaddrinfo can not queue response");
446 		return -1;
447 	}
448 
449 	return 0;
450 }
451 
452 int
453 getaddrinfo_sometime(
454 	const char *		node,
455 	const char *		service,
456 	const struct addrinfo *	hints,
457 	int			retry,
458 	gai_sometime_callback	callback,
459 	void *			context
460 	)
461 {
462 	return getaddrinfo_sometime_ex(node, service, hints, retry,
463 				       callback, context, 0);
464 }
465 
466 
467 static void
468 getaddrinfo_sometime_complete(
469 	blocking_work_req	rtype,
470 	void *			context,
471 	size_t			respsize,
472 	void *			resp
473 	)
474 {
475 	blocking_gai_req *	gai_req;
476 	blocking_gai_resp *	gai_resp;
477 	dnschild_ctx *		child_ctx;
478 	struct addrinfo *	ai;
479 	struct addrinfo *	next_ai;
480 	sockaddr_u *		psau;
481 	char *			node;
482 	char *			service;
483 	char *			canon_start;
484 	time_t			time_now;
485 	int			again, noerr;
486 	int			af;
487 	const char *		fam_spec;
488 	int			i;
489 
490 	gai_req = context;
491 	gai_resp = resp;
492 
493 	DEBUG_REQUIRE(BLOCKING_GETADDRINFO == rtype);
494 	DEBUG_REQUIRE(respsize == gai_resp->octets);
495 
496 	node = (char *)gai_req + sizeof(*gai_req);
497 	service = node + gai_req->nodesize;
498 
499 	child_ctx = dnschild_contexts[gai_req->dns_idx];
500 
501 	if (0 == gai_resp->retcode) {
502 		/*
503 		 * If this query succeeded only after retrying, DNS may have
504 		 * just become responsive.
505 		 */
506 		if (gai_resp->retry > INITIAL_DNS_RETRY) {
507 			time_now = time(NULL);
508 			child_ctx->next_dns_timeslot = time_now;
509 			TRACE(1, ("DNS success after retry, %u next_dns_timeslot reset (%s)\n",
510 				  gai_req->dns_idx, humantime(time_now)));
511 		}
512 	} else {
513 		noerr = !!(gai_req->qflags & GAIR_F_IGNDNSERR);
514 		again = noerr || should_retry_dns(
515 					gai_resp->retcode, gai_resp->gai_errno);
516 		/*
517 		 * exponential backoff of DNS retries to 64s
518 		 */
519 		if (gai_req->retry > 0 && again) {
520 			/* log the first retry only */
521 			if (INITIAL_DNS_RETRY == gai_req->retry)
522 				NLOG(NLOG_SYSINFO) {
523 					af = gai_req->hints.ai_family;
524 					fam_spec = (AF_INET6 == af)
525 						       ? " (AAAA)"
526 						       : (AF_INET == af)
527 							     ? " (A)"
528 							     : "";
529 #ifdef EAI_SYSTEM
530 					if (EAI_SYSTEM == gai_resp->retcode) {
531 						errno = gai_resp->gai_errno;
532 						msyslog(LOG_INFO,
533 							"retrying DNS %s%s: EAI_SYSTEM %d: %m",
534 							node, fam_spec,
535 							gai_resp->gai_errno);
536 					} else
537 #endif
538 						msyslog(LOG_INFO,
539 							"retrying DNS %s%s: %s (%d)",
540 							node, fam_spec,
541 							gai_strerror(gai_resp->retcode),
542 							gai_resp->retcode);
543 				}
544 			manage_dns_retry_interval(
545 				&gai_req->scheduled, &gai_req->earliest,
546 				&gai_req->retry, &child_ctx->next_dns_timeslot,
547 				noerr);
548 			if (!queue_blocking_request(
549 					BLOCKING_GETADDRINFO,
550 					gai_req,
551 					gai_req->octets,
552 					&getaddrinfo_sometime_complete,
553 					gai_req))
554 				return;
555 			else
556 				msyslog(LOG_ERR,
557 					"unable to retry hostname %s",
558 					node);
559 		}
560 	}
561 
562 	/*
563 	 * fixup pointers in returned addrinfo array
564 	 */
565 	ai = (void *)((char *)gai_resp + sizeof(*gai_resp));
566 	next_ai = NULL;
567 	for (i = gai_resp->ai_count - 1; i >= 0; i--) {
568 		ai[i].ai_next = next_ai;
569 		next_ai = &ai[i];
570 	}
571 
572 	psau = (void *)((char *)ai + gai_resp->ai_count * sizeof(*ai));
573 	canon_start = (char *)psau + gai_resp->ai_count * sizeof(*psau);
574 
575 	for (i = 0; i < gai_resp->ai_count; i++) {
576 		if (NULL != ai[i].ai_addr)
577 			ai[i].ai_addr = &psau->sa;
578 		psau++;
579 		if (NULL != ai[i].ai_canonname)
580 			ai[i].ai_canonname += (size_t)canon_start;
581 	}
582 
583 	ENSURE((char *)psau == canon_start);
584 
585 	if (!gai_resp->ai_count)
586 		ai = NULL;
587 
588 	(*gai_req->callback)(gai_resp->retcode, gai_resp->gai_errno,
589 			     gai_req->context, node, service,
590 			     &gai_req->hints, ai);
591 
592 	free(gai_req);
593 	/* gai_resp is part of block freed by process_blocking_resp() */
594 }
595 
596 
597 #ifdef TEST_BLOCKING_WORKER
598 void gai_test_callback(int rescode, int gai_errno, void *context, const char *name, const char *service, const struct addrinfo *hints, const struct addrinfo *ai_res)
599 {
600 	sockaddr_u addr;
601 
602 	if (rescode) {
603 		TRACE(1, ("gai_test_callback context %p error rescode %d %s serv %s\n",
604 			  context, rescode, name, service));
605 		return;
606 	}
607 	while (!rescode && NULL != ai_res) {
608 		ZERO_SOCK(&addr);
609 		memcpy(&addr, ai_res->ai_addr, ai_res->ai_addrlen);
610 		TRACE(1, ("ctx %p fam %d addr %s canon '%s' type %s at %p ai_addr %p ai_next %p\n",
611 			  context,
612 			  AF(&addr),
613 			  stoa(&addr),
614 			  (ai_res->ai_canonname)
615 			      ? ai_res->ai_canonname
616 			      : "",
617 			  (SOCK_DGRAM == ai_res->ai_socktype)
618 			      ? "DGRAM"
619 			      : (SOCK_STREAM == ai_res->ai_socktype)
620 				    ? "STREAM"
621 				    : "(other)",
622 			  ai_res,
623 			  ai_res->ai_addr,
624 			  ai_res->ai_next));
625 
626 		getnameinfo_sometime((sockaddr_u *)ai_res->ai_addr, 128, 32, 0, gni_test_callback, context);
627 
628 		ai_res = ai_res->ai_next;
629 	}
630 }
631 #endif	/* TEST_BLOCKING_WORKER */
632 
633 
634 int
635 getnameinfo_sometime(
636 	sockaddr_u *		psau,
637 	size_t			hostoctets,
638 	size_t			servoctets,
639 	int			flags,
640 	gni_sometime_callback	callback,
641 	void *			context
642 	)
643 {
644 	blocking_gni_req *	gni_req;
645 	u_int			idx;
646 	dnschild_ctx *		child_ctx;
647 	time_t			time_now;
648 
649 	REQUIRE(hostoctets);
650 	REQUIRE(hostoctets + servoctets < 1024);
651 
652 	idx = get_dnschild_ctx();
653 	child_ctx = dnschild_contexts[idx];
654 
655 	gni_req = emalloc_zero(sizeof(*gni_req));
656 
657 	gni_req->octets = sizeof(*gni_req);
658 	gni_req->dns_idx = idx;
659 	time_now = time(NULL);
660 	gni_req->scheduled = time_now;
661 	gni_req->earliest = max(time_now, child_ctx->next_dns_timeslot);
662 	child_ctx->next_dns_timeslot = gni_req->earliest;
663 	memcpy(&gni_req->socku, psau, SOCKLEN(psau));
664 	gni_req->hostoctets = hostoctets;
665 	gni_req->servoctets = servoctets;
666 	gni_req->flags = flags;
667 	gni_req->retry = INITIAL_DNS_RETRY;
668 	gni_req->callback = callback;
669 	gni_req->context = context;
670 
671 	if (queue_blocking_request(
672 		BLOCKING_GETNAMEINFO,
673 		gni_req,
674 		sizeof(*gni_req),
675 		&getnameinfo_sometime_complete,
676 		gni_req)) {
677 
678 		msyslog(LOG_ERR, "unable to queue getnameinfo request");
679 		errno = EFAULT;
680 		return -1;
681 	}
682 
683 	return 0;
684 }
685 
686 
687 int
688 blocking_getnameinfo(
689 	blocking_child *	c,
690 	blocking_pipe_header *	req
691 	)
692 {
693 	blocking_gni_req *	gni_req;
694 	dnsworker_ctx *		worker_ctx;
695 	blocking_pipe_header *	resp;
696 	blocking_gni_resp *	gni_resp;
697 	size_t			octets;
698 	size_t			resp_octets;
699 	char *			service;
700 	char *			cp;
701 	int			rc;
702 	time_t			time_now;
703 	char			host[1024];
704 
705 	gni_req = (void *)((char *)req + sizeof(*req));
706 
707 	octets = gni_req->hostoctets + gni_req->servoctets;
708 
709 	/*
710 	 * Some alloca() implementations are fragile regarding
711 	 * large allocations.  We only need room for the host
712 	 * and service names.
713 	 */
714 	REQUIRE(octets < sizeof(host));
715 	service = host + gni_req->hostoctets;
716 
717 	worker_ctx = get_worker_context(c, gni_req->dns_idx);
718 	scheduled_sleep(gni_req->scheduled, gni_req->earliest,
719 			worker_ctx);
720 	reload_resolv_conf(worker_ctx);
721 
722 	/*
723 	 * Take a shot at the final size, better to overestimate
724 	 * then realloc to a smaller size.
725 	 */
726 
727 	resp_octets = sizeof(*resp) + sizeof(*gni_resp) + octets;
728 	resp = emalloc_zero(resp_octets);
729 	gni_resp = (void *)((char *)resp + sizeof(*resp));
730 
731 	TRACE(2, ("blocking_getnameinfo given addr %s flags 0x%x hostlen %lu servlen %lu\n",
732 		  stoa(&gni_req->socku), gni_req->flags,
733 		  (u_long)gni_req->hostoctets, (u_long)gni_req->servoctets));
734 
735 	gni_resp->retcode = getnameinfo(&gni_req->socku.sa,
736 					SOCKLEN(&gni_req->socku),
737 					host,
738 					gni_req->hostoctets,
739 					service,
740 					gni_req->servoctets,
741 					gni_req->flags);
742 	gni_resp->retry = gni_req->retry;
743 #ifdef EAI_SYSTEM
744 	if (EAI_SYSTEM == gni_resp->retcode)
745 		gni_resp->gni_errno = errno;
746 #endif
747 
748 	if (0 != gni_resp->retcode) {
749 		gni_resp->hostoctets = 0;
750 		gni_resp->servoctets = 0;
751 	} else {
752 		gni_resp->hostoctets = strlen(host) + 1;
753 		gni_resp->servoctets = strlen(service) + 1;
754 		/*
755 		 * If this query succeeded only after retrying, DNS may have
756 		 * just become responsive.  Ignore previously-scheduled
757 		 * retry sleeps once for each pending request, similar to
758 		 * the way scheduled_sleep() does when its worker_sleep()
759 		 * is interrupted.
760 		 */
761 		if (gni_req->retry > INITIAL_DNS_RETRY) {
762 			time_now = time(NULL);
763 			worker_ctx->ignore_scheduled_before = time_now;
764 			TRACE(1, ("DNS success after retrying, ignoring sleeps scheduled before now (%s)\n",
765 				humantime(time_now)));
766 		}
767 	}
768 	octets = gni_resp->hostoctets + gni_resp->servoctets;
769 	/*
770 	 * Our response consists of a header, followed by the host and
771 	 * service strings, each null-terminated.
772 	 */
773 	resp_octets = sizeof(*resp) + sizeof(*gni_resp) + octets;
774 
775 	resp = erealloc(resp, resp_octets);
776 	gni_resp = (void *)(resp + 1);
777 
778 	gni_resp->octets = sizeof(*gni_resp) + octets;
779 
780 	/* cp serves as our current pointer while serializing */
781 	cp = (void *)(gni_resp + 1);
782 
783 	if (0 == gni_resp->retcode) {
784 		memcpy(cp, host, gni_resp->hostoctets);
785 		cp += gni_resp->hostoctets;
786 		memcpy(cp, service, gni_resp->servoctets);
787 		cp += gni_resp->servoctets;
788 	}
789 
790 	INSIST((size_t)(cp - (char *)resp) == resp_octets);
791 	INSIST(resp_octets - sizeof(*resp) == gni_resp->octets);
792 
793 	rc = queue_blocking_response(c, resp, resp_octets, req);
794 	if (rc)
795 		msyslog(LOG_ERR, "blocking_getnameinfo unable to queue response");
796 	return rc;
797 }
798 
799 
800 static void
801 getnameinfo_sometime_complete(
802 	blocking_work_req	rtype,
803 	void *			context,
804 	size_t			respsize,
805 	void *			resp
806 	)
807 {
808 	blocking_gni_req *	gni_req;
809 	blocking_gni_resp *	gni_resp;
810 	dnschild_ctx *		child_ctx;
811 	char *			host;
812 	char *			service;
813 	time_t			time_now;
814 	int			again;
815 
816 	gni_req = context;
817 	gni_resp = resp;
818 
819 	DEBUG_REQUIRE(BLOCKING_GETNAMEINFO == rtype);
820 	DEBUG_REQUIRE(respsize == gni_resp->octets);
821 
822 	child_ctx = dnschild_contexts[gni_req->dns_idx];
823 
824 	if (0 == gni_resp->retcode) {
825 		/*
826 		 * If this query succeeded only after retrying, DNS may have
827 		 * just become responsive.
828 		 */
829 		if (gni_resp->retry > INITIAL_DNS_RETRY) {
830 			time_now = time(NULL);
831 			child_ctx->next_dns_timeslot = time_now;
832 			TRACE(1, ("DNS success after retry, %u next_dns_timeslot reset (%s)\n",
833 				  gni_req->dns_idx, humantime(time_now)));
834 		}
835 	} else {
836 		again = should_retry_dns(gni_resp->retcode, gni_resp->gni_errno);
837 		/*
838 		 * exponential backoff of DNS retries to 64s
839 		 */
840 		if (gni_req->retry > 0)
841 			manage_dns_retry_interval(&gni_req->scheduled,
842 			    &gni_req->earliest, &gni_req->retry,
843 						  &child_ctx->next_dns_timeslot, FALSE);
844 
845 		if (gni_req->retry > 0 && again) {
846 			if (!queue_blocking_request(
847 				BLOCKING_GETNAMEINFO,
848 				gni_req,
849 				gni_req->octets,
850 				&getnameinfo_sometime_complete,
851 				gni_req))
852 				return;
853 
854 			msyslog(LOG_ERR, "unable to retry reverse lookup of %s", stoa(&gni_req->socku));
855 		}
856 	}
857 
858 	if (!gni_resp->hostoctets) {
859 		host = NULL;
860 		service = NULL;
861 	} else {
862 		host = (char *)gni_resp + sizeof(*gni_resp);
863 		service = (gni_resp->servoctets)
864 			      ? host + gni_resp->hostoctets
865 			      : NULL;
866 	}
867 
868 	(*gni_req->callback)(gni_resp->retcode, gni_resp->gni_errno,
869 			     &gni_req->socku, gni_req->flags, host,
870 			     service, gni_req->context);
871 
872 	free(gni_req);
873 	/* gni_resp is part of block freed by process_blocking_resp() */
874 }
875 
876 
877 #ifdef TEST_BLOCKING_WORKER
878 void gni_test_callback(int rescode, int gni_errno, sockaddr_u *psau, int flags, const char *host, const char *service, void *context)
879 {
880 	if (!rescode)
881 		TRACE(1, ("gni_test_callback got host '%s' serv '%s' for addr %s context %p\n",
882 			  host, service, stoa(psau), context));
883 	else
884 		TRACE(1, ("gni_test_callback context %p rescode %d gni_errno %d flags 0x%x addr %s\n",
885 			  context, rescode, gni_errno, flags, stoa(psau)));
886 }
887 #endif	/* TEST_BLOCKING_WORKER */
888 
889 
890 #ifdef HAVE_RES_INIT
891 static void
892 reload_resolv_conf(
893 	dnsworker_ctx *	worker_ctx
894 	)
895 {
896 	time_t	time_now;
897 
898 	/*
899 	 * This is ad-hoc.  Reload /etc/resolv.conf once per minute
900 	 * to pick up on changes from the DHCP client.  [Bug 1226]
901 	 * When using threads for the workers, this needs to happen
902 	 * only once per minute process-wide.
903 	 */
904 	time_now = time(NULL);
905 # ifdef WORK_THREAD
906 	worker_ctx->next_res_init = next_res_init;
907 # endif
908 	if (worker_ctx->next_res_init <= time_now) {
909 		if (worker_ctx->next_res_init != 0)
910 			res_init();
911 		worker_ctx->next_res_init = time_now + 60;
912 # ifdef WORK_THREAD
913 		next_res_init = worker_ctx->next_res_init;
914 # endif
915 	}
916 }
917 #endif	/* HAVE_RES_INIT */
918 
919 
920 static u_int
921 reserve_dnschild_ctx(void)
922 {
923 	const size_t	ps = sizeof(dnschild_contexts[0]);
924 	const size_t	cs = sizeof(*dnschild_contexts[0]);
925 	u_int		c;
926 	u_int		new_alloc;
927 	size_t		octets;
928 	size_t		new_octets;
929 
930 	c = 0;
931 	while (TRUE) {
932 		for ( ; c < dnschild_contexts_alloc; c++) {
933 			if (NULL == dnschild_contexts[c]) {
934 				dnschild_contexts[c] = emalloc_zero(cs);
935 
936 				return c;
937 			}
938 		}
939 		new_alloc = dnschild_contexts_alloc + 20;
940 		new_octets = new_alloc * ps;
941 		octets = dnschild_contexts_alloc * ps;
942 		dnschild_contexts = erealloc_zero(dnschild_contexts,
943 						  new_octets, octets);
944 		dnschild_contexts_alloc = new_alloc;
945 	}
946 }
947 
948 
949 static u_int
950 get_dnschild_ctx(void)
951 {
952 	static u_int	shared_ctx = UINT_MAX;
953 
954 	if (worker_per_query)
955 		return reserve_dnschild_ctx();
956 
957 	if (UINT_MAX == shared_ctx)
958 		shared_ctx = reserve_dnschild_ctx();
959 
960 	return shared_ctx;
961 }
962 
963 
964 static dnsworker_ctx *
965 get_worker_context(
966 	blocking_child *	c,
967 	u_int			idx
968 	)
969 {
970 	u_int		min_new_alloc;
971 	u_int		new_alloc;
972 	size_t		octets;
973 	size_t		new_octets;
974 	dnsworker_ctx *	retv;
975 
976 	worker_global_lock(TRUE);
977 
978 	if (dnsworker_contexts_alloc <= idx) {
979 		min_new_alloc = 1 + idx;
980 		/* round new_alloc up to nearest multiple of 4 */
981 		new_alloc = (min_new_alloc + 4) & ~(4 - 1);
982 		new_octets = new_alloc * sizeof(dnsworker_ctx*);
983 		octets = dnsworker_contexts_alloc * sizeof(dnsworker_ctx*);
984 		dnsworker_contexts = erealloc_zero(dnsworker_contexts,
985 						   new_octets, octets);
986 		dnsworker_contexts_alloc = new_alloc;
987 		retv = emalloc_zero(sizeof(dnsworker_ctx));
988 		dnsworker_contexts[idx] = retv;
989 	} else if (NULL == (retv = dnsworker_contexts[idx])) {
990 		retv = emalloc_zero(sizeof(dnsworker_ctx));
991 		dnsworker_contexts[idx] = retv;
992 	}
993 
994 	worker_global_lock(FALSE);
995 
996 	ZERO(*retv);
997 	retv->c = c;
998 	return retv;
999 }
1000 
1001 
1002 static void
1003 scheduled_sleep(
1004 	time_t		scheduled,
1005 	time_t		earliest,
1006 	dnsworker_ctx *	worker_ctx
1007 	)
1008 {
1009 	time_t now;
1010 
1011 	if (scheduled < worker_ctx->ignore_scheduled_before) {
1012 		TRACE(1, ("ignoring sleep until %s scheduled at %s (before %s)\n",
1013 			  humantime(earliest), humantime(scheduled),
1014 			  humantime(worker_ctx->ignore_scheduled_before)));
1015 		return;
1016 	}
1017 
1018 	now = time(NULL);
1019 
1020 	if (now < earliest) {
1021 		TRACE(1, ("sleep until %s scheduled at %s (>= %s)\n",
1022 			  humantime(earliest), humantime(scheduled),
1023 			  humantime(worker_ctx->ignore_scheduled_before)));
1024 		if (-1 == worker_sleep(worker_ctx->c, earliest - now)) {
1025 			/* our sleep was interrupted */
1026 			now = time(NULL);
1027 			worker_ctx->ignore_scheduled_before = now;
1028 #ifdef HAVE_RES_INIT
1029 			worker_ctx->next_res_init = now + 60;
1030 			next_res_init = worker_ctx->next_res_init;
1031 			res_init();
1032 #endif
1033 			TRACE(1, ("sleep interrupted by daemon, ignoring sleeps scheduled before now (%s)\n",
1034 				  humantime(worker_ctx->ignore_scheduled_before)));
1035 		}
1036 	}
1037 }
1038 
1039 
1040 /*
1041  * manage_dns_retry_interval is a helper used by
1042  * getaddrinfo_sometime_complete and getnameinfo_sometime_complete
1043  * to calculate the new retry interval and schedule the next query.
1044  */
1045 static void
1046 manage_dns_retry_interval(
1047 	time_t *	pscheduled,
1048 	time_t *	pwhen,
1049 	int *		pretry,
1050 	time_t *	pnext_timeslot,
1051 	int		forever
1052 	)
1053 {
1054 	time_t	now;
1055 	time_t	when;
1056 	int	retry;
1057 	int	retmax;
1058 
1059 	now = time(NULL);
1060 	retry = *pretry;
1061 	when = max(now + retry, *pnext_timeslot);
1062 	*pnext_timeslot = when;
1063 
1064 	/* this exponential backoff is slower than doubling up: The
1065 	 * sequence goes 2-3-4-6-8-12-16-24-32... and the upper limit is
1066 	 * 64 seconds for things that should not repeat forever, and
1067 	 * 1024 when repeated forever.
1068 	 */
1069 	retmax = forever ? 1024 : 64;
1070 	retry <<= 1;
1071 	if (retry & (retry - 1))
1072 		retry &= (retry - 1);
1073 	else
1074 		retry -= (retry >> 2);
1075 	retry = min(retmax, retry);
1076 
1077 	*pscheduled = now;
1078 	*pwhen = when;
1079 	*pretry = retry;
1080 }
1081 
1082 /*
1083  * should_retry_dns is a helper used by getaddrinfo_sometime_complete
1084  * and getnameinfo_sometime_complete which implements ntpd's DNS retry
1085  * policy.
1086  */
1087 static int
1088 should_retry_dns(
1089 	int	rescode,
1090 	int	res_errno
1091 	)
1092 {
1093 	static int	eai_again_seen;
1094 	int		again;
1095 #if defined (EAI_SYSTEM) && defined(DEBUG)
1096 	char		msg[256];
1097 #endif
1098 
1099 	/*
1100 	 * If the resolver failed, see if the failure is
1101 	 * temporary. If so, return success.
1102 	 */
1103 	again = 0;
1104 
1105 	switch (rescode) {
1106 
1107 	case EAI_FAIL:
1108 		again = 1;
1109 		break;
1110 
1111 	case EAI_AGAIN:
1112 		again = 1;
1113 		eai_again_seen = 1;		/* [Bug 1178] */
1114 		break;
1115 
1116 	case EAI_NONAME:
1117 #if defined(EAI_NODATA) && (EAI_NODATA != EAI_NONAME)
1118 	case EAI_NODATA:
1119 #endif
1120 		again = !eai_again_seen;	/* [Bug 1178] */
1121 		break;
1122 
1123 #ifdef EAI_SYSTEM
1124 	case EAI_SYSTEM:
1125 		/*
1126 		 * EAI_SYSTEM means the real error is in errno.  We should be more
1127 		 * discriminating about which errno values require retrying, but
1128 		 * this matches existing behavior.
1129 		 */
1130 		again = 1;
1131 # ifdef DEBUG
1132 		errno_to_str(res_errno, msg, sizeof(msg));
1133 		TRACE(1, ("intres: EAI_SYSTEM errno %d (%s) means try again, right?\n",
1134 			  res_errno, msg));
1135 # endif
1136 		break;
1137 #endif
1138 	}
1139 
1140 	TRACE(2, ("intres: resolver returned: %s (%d), %sretrying\n",
1141 		  gai_strerror(rescode), rescode, again ? "" : "not "));
1142 
1143 	return again;
1144 }
1145 
1146 #else	/* !WORKER follows */
1147 int ntp_intres_nonempty_compilation_unit;
1148 #endif
1149