xref: /freebsd/contrib/ntp/ntpd/ntp_io.c (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 /*
2  * ntp_io.c - input/output routines for ntpd.	The socket-opening code
3  *		   was shamelessly stolen from ntpd.
4  */
5 
6 #ifdef HAVE_CONFIG_H
7 # include <config.h>
8 #endif
9 
10 #include <stdio.h>
11 #include <signal.h>
12 #ifdef HAVE_FNMATCH_H
13 # include <fnmatch.h>
14 # if !defined(FNM_CASEFOLD) && defined(FNM_IGNORECASE)
15 #  define FNM_CASEFOLD FNM_IGNORECASE
16 # endif
17 #endif
18 #ifdef HAVE_SYS_PARAM_H
19 # include <sys/param.h>
20 #endif
21 #ifdef HAVE_SYS_IOCTL_H
22 # include <sys/ioctl.h>
23 #endif
24 #ifdef HAVE_SYS_SOCKIO_H	/* UXPV: SIOC* #defines (Frank Vance <fvance@waii.com>) */
25 # include <sys/sockio.h>
26 #endif
27 #ifdef HAVE_SYS_UIO_H
28 # include <sys/uio.h>
29 #endif
30 
31 #include "ntp_machine.h"
32 #include "ntpd.h"
33 #include "ntp_io.h"
34 #include "iosignal.h"
35 #include "ntp_lists.h"
36 #include "ntp_refclock.h"
37 #include "ntp_stdlib.h"
38 #include "ntp_worker.h"
39 #include "ntp_request.h"
40 #include "ntp_assert.h"
41 #include "timevalops.h"
42 #include "timespecops.h"
43 #include "ntpd-opts.h"
44 #include "safecast.h"
45 
46 /* Don't include ISC's version of IPv6 variables and structures */
47 #define ISC_IPV6_H 1
48 #include <isc/mem.h>
49 #include <isc/interfaceiter.h>
50 #include <isc/netaddr.h>
51 #include <isc/result.h>
52 #include <isc/sockaddr.h>
53 
54 #ifdef SIM
55 #include "ntpsim.h"
56 #endif
57 
58 #ifdef HAS_ROUTING_SOCKET
59 # include <net/route.h>
60 # ifdef HAVE_RTNETLINK
61 #  include <linux/rtnetlink.h>
62 # endif
63 #endif
64 
65 /*
66  * setsockopt does not always have the same arg declaration
67  * across all platforms. If it's not defined we make it empty
68  */
69 
70 #ifndef SETSOCKOPT_ARG_CAST
71 #define SETSOCKOPT_ARG_CAST
72 #endif
73 
74 extern int listen_to_virtual_ips;
75 
76 #ifndef IPTOS_DSCP_EF
77 #define IPTOS_DSCP_EF 0xb8
78 #endif
79 int qos = IPTOS_DSCP_EF;	/* QoS RFC3246 */
80 
81 #ifdef LEAP_SMEAR
82 /* TODO burnicki: This should be moved to ntp_timer.c, but if we do so
83  * we get a linker error. Since we're running out of time before the leap
84  * second occurs, we let it here where it just works.
85  */
86 int leap_smear_intv;
87 #endif
88 
89 /*
90  * NIC rule entry
91  */
92 typedef struct nic_rule_tag nic_rule;
93 
94 struct nic_rule_tag {
95 	nic_rule *	next;
96 	nic_rule_action	action;
97 	nic_rule_match	match_type;
98 	char *		if_name;
99 	sockaddr_u	addr;
100 	int		prefixlen;
101 };
102 
103 /*
104  * NIC rule listhead.  Entries are added at the head so that the first
105  * match in the list is the last matching rule specified.
106  */
107 nic_rule *nic_rule_list;
108 
109 
110 #if defined(SO_BINTIME) && defined(SCM_BINTIME) && defined(CMSG_FIRSTHDR)
111 #  define HAVE_PACKET_TIMESTAMP
112 #  define HAVE_BINTIME
113 #  ifdef BINTIME_CTLMSGBUF_SIZE
114 #   define CMSG_BUFSIZE BINTIME_CTLMSGBUF_SIZE
115 #  else
116 #   define CMSG_BUFSIZE  1536 /* moderate default */
117 #  endif
118 #elif defined(SO_TIMESTAMPNS) && defined(SCM_TIMESTAMPNS) && defined(CMSG_FIRSTHDR)
119 #  define HAVE_PACKET_TIMESTAMP
120 #  define HAVE_TIMESTAMPNS
121 #  ifdef TIMESTAMPNS_CTLMSGBUF_SIZE
122 #   define CMSG_BUFSIZE TIMESTAMPNS_CTLMSGBUF_SIZE
123 #  else
124 #   define CMSG_BUFSIZE  1536 /* moderate default */
125 #  endif
126 #elif defined(SO_TIMESTAMP) && defined(SCM_TIMESTAMP) && defined(CMSG_FIRSTHDR)
127 #  define HAVE_PACKET_TIMESTAMP
128 #  define HAVE_TIMESTAMP
129 #  ifdef TIMESTAMP_CTLMSGBUF_SIZE
130 #   define CMSG_BUFSIZE TIMESTAMP_CTLMSGBUF_SIZE
131 #  else
132 #   define CMSG_BUFSIZE  1536 /* moderate default */
133 #  endif
134 #else
135 /* fill in for old/other timestamp interfaces */
136 #endif
137 
138 #if defined(SYS_WINNT)
139 #include "win32_io.h"
140 #include <isc/win32os.h>
141 #endif
142 
143 /*
144  * We do asynchronous input using the SIGIO facility.  A number of
145  * recvbuf buffers are preallocated for input.	In the signal
146  * handler we poll to see which sockets are ready and read the
147  * packets from them into the recvbuf's along with a time stamp and
148  * an indication of the source host and the interface it was received
149  * through.  This allows us to get as accurate receive time stamps
150  * as possible independent of other processing going on.
151  *
152  * We watch the number of recvbufs available to the signal handler
153  * and allocate more when this number drops below the low water
154  * mark.  If the signal handler should run out of buffers in the
155  * interim it will drop incoming frames, the idea being that it is
156  * better to drop a packet than to be inaccurate.
157  */
158 
159 
160 /*
161  * Other statistics of possible interest
162  */
163 volatile u_long packets_dropped;	/* total number of packets dropped on reception */
164 volatile u_long packets_ignored;	/* packets received on wild card interface */
165 volatile u_long packets_received;	/* total number of packets received */
166 	 u_long packets_sent;		/* total number of packets sent */
167 	 u_long packets_notsent;	/* total number of packets which couldn't be sent */
168 
169 volatile u_long handler_calls;	/* number of calls to interrupt handler */
170 volatile u_long handler_pkts;	/* number of pkts received by handler */
171 u_long io_timereset;		/* time counters were reset */
172 
173 /*
174  * Interface stuff
175  */
176 endpt *	any_interface;		/* wildcard ipv4 interface */
177 endpt *	any6_interface;		/* wildcard ipv6 interface */
178 endpt *	loopback_interface;	/* loopback ipv4 interface */
179 
180 static isc_boolean_t broadcast_client_enabled;
181 u_int sys_ifnum;			/* next .ifnum to assign */
182 int ninterfaces;			/* Total number of interfaces */
183 
184 int no_periodic_scan;		/* network endpoint scans */
185 int scan_addrs_once;		/* because dropped privs */
186 int nonlocal_v4_addr_up;	/* should we try IPv4 pool? */
187 int nonlocal_v6_addr_up;	/* should we try IPv6 pool? */
188 
189 #ifdef REFCLOCK
190 /*
191  * Refclock stuff.	We keep a chain of structures with data concerning
192  * the guys we are doing I/O for.
193  */
194 static	struct refclockio *refio;
195 #endif /* REFCLOCK */
196 
197 /*
198  * File descriptor masks etc. for call to select
199  * Not needed for I/O Completion Ports or anything outside this file
200  */
201 static fd_set activefds;
202 static int maxactivefd;
203 
204 /*
205  * bit alternating value to detect verified interfaces during an update cycle
206  */
207 static  u_short		sys_interphase = 0;
208 
209 static endpt *	new_interface(endpt *);
210 static void	add_interface(endpt *);
211 static int	update_interfaces(u_short, interface_receiver_t,
212 				  void *);
213 static void	remove_interface(endpt *);
214 static endpt *	create_interface(u_short, endpt *);
215 
216 static inline int is_wildcard_addr(const sockaddr_u *psau);
217 
218 /*
219  * Multicast functions
220  */
221 static	isc_boolean_t	addr_ismulticast	(sockaddr_u *);
222 static	isc_boolean_t	is_anycast		(sockaddr_u *,
223 						 const char *);
224 
225 /*
226  * Not all platforms support multicast
227  */
228 #ifdef MCAST
229 static	isc_boolean_t	socket_multicast_enable	(endpt *, sockaddr_u *);
230 static	isc_boolean_t	socket_multicast_disable(endpt *, sockaddr_u *);
231 #endif
232 
233 #ifdef DEBUG
234 static void interface_dump	(const endpt *);
235 static void print_interface	(const endpt *, const char *, const char *);
236 #define DPRINT_INTERFACE(level, args) do { if (debug >= (level)) { print_interface args; } } while (0)
237 #else
238 #define DPRINT_INTERFACE(level, args) do {} while (0)
239 #endif
240 
241 typedef struct vsock vsock_t;
242 enum desc_type { FD_TYPE_SOCKET, FD_TYPE_FILE };
243 
244 struct vsock {
245 	vsock_t	*	link;
246 	SOCKET		fd;
247 	enum desc_type	type;
248 };
249 
250 vsock_t	*fd_list;
251 
252 #if !defined(HAVE_IO_COMPLETION_PORT) && defined(HAS_ROUTING_SOCKET)
253 /*
254  * async notification processing (e. g. routing sockets)
255  */
256 /*
257  * support for receiving data on fd that is not a refclock or a socket
258  * like e. g. routing sockets
259  */
260 struct asyncio_reader {
261 	struct asyncio_reader *link;		    /* the list this is being kept in */
262 	SOCKET fd;				    /* fd to be read */
263 	void  *data;				    /* possibly local data */
264 	void (*receiver)(struct asyncio_reader *);  /* input handler */
265 };
266 
267 struct asyncio_reader *asyncio_reader_list;
268 
269 static void delete_asyncio_reader (struct asyncio_reader *);
270 static struct asyncio_reader *new_asyncio_reader (void);
271 static void add_asyncio_reader (struct asyncio_reader *, enum desc_type);
272 static void remove_asyncio_reader (struct asyncio_reader *);
273 
274 #endif /* !defined(HAVE_IO_COMPLETION_PORT) && defined(HAS_ROUTING_SOCKET) */
275 
276 static void init_async_notifications (void);
277 
278 static	int	addr_eqprefix	(const sockaddr_u *, const sockaddr_u *,
279 				 int);
280 static int	addr_samesubnet	(const sockaddr_u *, const sockaddr_u *,
281 				 const sockaddr_u *, const sockaddr_u *);
282 static	int	create_sockets	(u_short);
283 static	SOCKET	open_socket	(sockaddr_u *, int, int, endpt *);
284 static	void	set_reuseaddr	(int);
285 static	isc_boolean_t	socket_broadcast_enable	 (endpt *, SOCKET, sockaddr_u *);
286 
287 #if !defined(HAVE_IO_COMPLETION_PORT) && !defined(HAVE_SIGNALED_IO)
288 static	char *	fdbits		(int, const fd_set *);
289 #endif
290 #ifdef  OS_MISSES_SPECIFIC_ROUTE_UPDATES
291 static	isc_boolean_t	socket_broadcast_disable (endpt *, sockaddr_u *);
292 #endif
293 
294 typedef struct remaddr remaddr_t;
295 
296 struct remaddr {
297 	remaddr_t *		link;
298 	sockaddr_u		addr;
299 	endpt *			ep;
300 };
301 
302 remaddr_t *	remoteaddr_list;
303 endpt *		ep_list;	/* complete endpt list */
304 endpt *		mc4_list;	/* IPv4 mcast-capable unicast endpts */
305 endpt *		mc6_list;	/* IPv6 mcast-capable unicast endpts */
306 
307 static endpt *	wildipv4;
308 static endpt *	wildipv6;
309 
310 #define		RFC3927_ADDR	0xa9fe0000	/* 169.254. */
311 #define		RFC3927_MASK	0xffff0000
312 #define		IS_AUTOCONF(addr4)					\
313 		((SRCADR(addr4) & RFC3927_MASK) == RFC3927_ADDR)
314 
315 #ifdef SYS_WINNT
316 int accept_wildcard_if_for_winnt;
317 #else
318 const int accept_wildcard_if_for_winnt = FALSE;
319 #define		init_io_completion_port()	do {} while (FALSE)
320 #endif
321 
322 static void	add_fd_to_list		(SOCKET, enum desc_type);
323 static endpt *	find_addr_in_list	(sockaddr_u *);
324 static endpt *	find_flagged_addr_in_list(sockaddr_u *, u_int32);
325 static void	delete_addr_from_list	(sockaddr_u *);
326 static void	delete_interface_from_list(endpt *);
327 static void	close_and_delete_fd_from_list(SOCKET, endpt *);
328 static void	add_addr_to_list	(sockaddr_u *, endpt *);
329 static void	create_wildcards	(u_short);
330 static endpt *	findlocalinterface	(sockaddr_u *, int, int);
331 static endpt *	findclosestinterface	(sockaddr_u *, int);
332 #ifdef DEBUG
333 static const char *	action_text	(nic_rule_action);
334 #endif
335 static nic_rule_action	interface_action(char *, sockaddr_u *, u_int32);
336 static void		convert_isc_if	(isc_interface_t *,
337 					 endpt *, u_short);
338 static void		calc_addr_distance(sockaddr_u *,
339 					   const sockaddr_u *,
340 					   const sockaddr_u *);
341 static int		cmp_addr_distance(const sockaddr_u *,
342 					  const sockaddr_u *);
343 
344 /*
345  * Routines to read the ntp packets
346  */
347 #if !defined(HAVE_IO_COMPLETION_PORT)
348 static inline int	read_network_packet	(SOCKET, endpt *, l_fp);
349 static void		ntpd_addremove_io_fd	(int, int, int);
350 static void 		input_handler_scan	(const l_fp*, const fd_set*);
351 static int/*BOOL*/	sanitize_fdset		(int errc);
352 #ifdef REFCLOCK
353 static inline int	read_refclock_packet	(SOCKET, struct refclockio *, l_fp);
354 #endif
355 #ifdef HAVE_SIGNALED_IO
356 static void 		input_handler		(l_fp*);
357 #endif
358 #endif
359 
360 
361 #ifndef HAVE_IO_COMPLETION_PORT
362 void
363 maintain_activefds(
364 	int fd,
365 	int closing
366 	)
367 {
368 	int i;
369 
370 	if (fd < 0 || fd >= FD_SETSIZE) {
371 		msyslog(LOG_ERR,
372 			"Too many sockets in use, FD_SETSIZE %d exceeded by fd %d",
373 			FD_SETSIZE, fd);
374 		exit(1);
375 	}
376 
377 	if (!closing) {
378 		FD_SET(fd, &activefds);
379 		maxactivefd = max(fd, maxactivefd);
380 	} else {
381 		FD_CLR(fd, &activefds);
382 		if (maxactivefd && fd == maxactivefd) {
383 			for (i = maxactivefd - 1; i >= 0; i--)
384 				if (FD_ISSET(i, &activefds)) {
385 					maxactivefd = i;
386 					break;
387 				}
388 			INSIST(fd != maxactivefd);
389 		}
390 	}
391 }
392 #endif	/* !HAVE_IO_COMPLETION_PORT */
393 
394 
395 #ifdef DEBUG_TIMING
396 /*
397  * collect timing information for various processing
398  * paths. currently we only pass them on to the file
399  * for later processing. this could also do histogram
400  * based analysis in other to reduce the load (and skew)
401  * dur to the file output
402  */
403 void
404 collect_timing(struct recvbuf *rb, const char *tag, int count, l_fp *dts)
405 {
406 	char buf[256];
407 
408 	snprintf(buf, sizeof(buf), "%s %d %s %s",
409 		 (rb != NULL)
410 		     ? ((rb->dstadr != NULL)
411 			    ? stoa(&rb->recv_srcadr)
412 			    : "-REFCLOCK-")
413 		     : "-",
414 		 count, lfptoa(dts, 9), tag);
415 	record_timing_stats(buf);
416 }
417 #endif
418 
419 /*
420  * About dynamic interfaces, sockets, reception and more...
421  *
422  * the code solves following tasks:
423  *
424  *   - keep a current list of active interfaces in order
425  *     to bind to to the interface address on NTP_PORT so that
426  *     all wild and specific bindings for NTP_PORT are taken by ntpd
427  *     to avoid other daemons messing with the time or sockets.
428  *   - all interfaces keep a list of peers that are referencing
429  *     the interface in order to quickly re-assign the peers to
430  *     new interface in case an interface is deleted (=> gone from system or
431  *     down)
432  *   - have a preconfigured socket ready with the right local address
433  *     for transmission and reception
434  *   - have an address list for all destination addresses used within ntpd
435  *     to find the "right" preconfigured socket.
436  *   - facilitate updating the internal interface list with respect to
437  *     the current kernel state
438  *
439  * special issues:
440  *
441  *   - mapping of multicast addresses to the interface affected is not always
442  *     one to one - especially on hosts with multiple interfaces
443  *     the code here currently allocates a separate interface entry for those
444  *     multicast addresses
445  *     iff it is able to bind to a *new* socket with the multicast address (flags |= MCASTIF)
446  *     in case of failure the multicast address is bound to an existing interface.
447  *   - on some systems it is perfectly legal to assign the same address to
448  *     multiple interfaces. Therefore this code does not keep a list of interfaces
449  *     but a list of interfaces that represent a unique address as determined by the kernel
450  *     by the procedure in findlocalinterface. Thus it is perfectly legal to see only
451  *     one representative of a group of real interfaces if they share the same address.
452  *
453  * Frank Kardel 20050910
454  */
455 
456 /*
457  * init_io - initialize I/O module.
458  */
459 void
460 init_io(void)
461 {
462 	/* Init buffer free list and stat counters */
463 	init_recvbuff(RECV_INIT);
464 	/* update interface every 5 minutes as default */
465 	endpt_scan_period = 301;
466 
467 #ifdef WORK_PIPE
468 	addremove_io_fd = &ntpd_addremove_io_fd;
469 #endif
470 
471 	init_io_completion_port();
472 #if defined(HAVE_SIGNALED_IO)
473 	(void) set_signal(input_handler);
474 #endif
475 }
476 
477 
478 static void
479 ntpd_addremove_io_fd(
480 	int	fd,
481 	int	is_pipe,
482 	int	remove_it
483 	)
484 {
485 	UNUSED_ARG(is_pipe);
486 
487 #ifdef HAVE_SIGNALED_IO
488 	if (!remove_it)
489 		init_socket_sig(fd);
490 #endif /* not HAVE_SIGNALED_IO */
491 
492 	maintain_activefds(fd, remove_it);
493 }
494 
495 
496 /*
497  * io_open_sockets - call socket creation routine
498  */
499 void
500 io_open_sockets(void)
501 {
502 	static int already_opened;
503 
504 	if (already_opened || HAVE_OPT( SAVECONFIGQUIT ))
505 		return;
506 
507 	already_opened = 1;
508 
509 	/*
510 	 * Create the sockets
511 	 */
512 	BLOCKIO();
513 	create_sockets(NTP_PORT);
514 	UNBLOCKIO();
515 
516 	init_async_notifications();
517 
518 	DPRINTF(3, ("io_open_sockets: maxactivefd %d\n", maxactivefd));
519 }
520 
521 
522 #ifdef DEBUG
523 /*
524  * function to dump the contents of the interface structure
525  * for debugging use only.
526  * We face a dilemma here -- sockets are FDs under POSIX and
527  * actually HANDLES under Windows. So we use '%lld' as format
528  * and cast the value to 'long long'; this should not hurt
529  * with UNIX-like systems and does not truncate values on Win64.
530  */
531 void
532 interface_dump(const endpt *itf)
533 {
534 	printf("Dumping interface: %p\n", itf);
535 	printf("fd = %lld\n", (long long)itf->fd);
536 	printf("bfd = %lld\n", (long long)itf->bfd);
537 	printf("sin = %s,\n", stoa(&itf->sin));
538 	printf("bcast = %s,\n", stoa(&itf->bcast));
539 	printf("mask = %s,\n", stoa(&itf->mask));
540 	printf("name = %s\n", itf->name);
541 	printf("flags = 0x%08x\n", itf->flags);
542 	printf("last_ttl = %d\n", itf->last_ttl);
543 	printf("addr_refid = %08x\n", itf->addr_refid);
544 	printf("num_mcast = %d\n", itf->num_mcast);
545 	printf("received = %ld\n", itf->received);
546 	printf("sent = %ld\n", itf->sent);
547 	printf("notsent = %ld\n", itf->notsent);
548 	printf("ifindex = %u\n", itf->ifindex);
549 	printf("peercnt = %u\n", itf->peercnt);
550 	printf("phase = %u\n", itf->phase);
551 }
552 
553 
554 /*
555  * print_interface - helper to output debug information
556  */
557 static void
558 print_interface(const endpt *iface, const char *pfx, const char *sfx)
559 {
560 	printf("%sinterface #%d: fd=%lld, bfd=%lld, name=%s, flags=0x%x, ifindex=%u, sin=%s",
561 	       pfx,
562 	       iface->ifnum,
563 	       (long long)iface->fd,
564 	       (long long)iface->bfd,
565 	       iface->name,
566 	       iface->flags,
567 	       iface->ifindex,
568 	       stoa(&iface->sin));
569 	if (AF_INET == iface->family) {
570 		if (iface->flags & INT_BROADCAST)
571 			printf(", bcast=%s", stoa(&iface->bcast));
572 		printf(", mask=%s", stoa(&iface->mask));
573 	}
574 	printf(", %s:%s",
575 	       (iface->ignore_packets)
576 		   ? "Disabled"
577 		   : "Enabled",
578 	       sfx);
579 	if (debug > 4)	/* in-depth debugging only */
580 		interface_dump(iface);
581 }
582 #endif
583 
584 #if !defined(HAVE_IO_COMPLETION_PORT) && defined(HAS_ROUTING_SOCKET)
585 /*
586  * create an asyncio_reader structure
587  */
588 static struct asyncio_reader *
589 new_asyncio_reader(void)
590 {
591 	struct asyncio_reader *reader;
592 
593 	reader = emalloc_zero(sizeof(*reader));
594 	reader->fd = INVALID_SOCKET;
595 
596 	return reader;
597 }
598 
599 /*
600  * delete a reader
601  */
602 static void
603 delete_asyncio_reader(
604 	struct asyncio_reader *reader
605 	)
606 {
607 	free(reader);
608 }
609 
610 /*
611  * add asynchio_reader
612  */
613 static void
614 add_asyncio_reader(
615 	struct asyncio_reader *	reader,
616 	enum desc_type		type)
617 {
618 	LINK_SLIST(asyncio_reader_list, reader, link);
619 	add_fd_to_list(reader->fd, type);
620 }
621 
622 /*
623  * remove asyncio_reader
624  */
625 static void
626 remove_asyncio_reader(
627 	struct asyncio_reader *reader
628 	)
629 {
630 	struct asyncio_reader *unlinked;
631 
632 	UNLINK_SLIST(unlinked, asyncio_reader_list, reader, link,
633 	    struct asyncio_reader);
634 
635 	if (reader->fd != INVALID_SOCKET) {
636 		close_and_delete_fd_from_list(reader->fd, NULL);
637 	}
638 	reader->fd = INVALID_SOCKET;
639 }
640 #endif /* !defined(HAVE_IO_COMPLETION_PORT) && defined(HAS_ROUTING_SOCKET) */
641 
642 
643 /* compare two sockaddr prefixes */
644 static int
645 addr_eqprefix(
646 	const sockaddr_u *	a,
647 	const sockaddr_u *	b,
648 	int			prefixlen
649 	)
650 {
651 	isc_netaddr_t		isc_a;
652 	isc_netaddr_t		isc_b;
653 	isc_sockaddr_t		isc_sa;
654 
655 	ZERO(isc_sa);
656 	memcpy(&isc_sa.type, a, min(sizeof(isc_sa.type), sizeof(*a)));
657 	isc_netaddr_fromsockaddr(&isc_a, &isc_sa);
658 
659 	ZERO(isc_sa);
660 	memcpy(&isc_sa.type, b, min(sizeof(isc_sa.type), sizeof(*b)));
661 	isc_netaddr_fromsockaddr(&isc_b, &isc_sa);
662 
663 	return (int)isc_netaddr_eqprefix(&isc_a, &isc_b,
664 					 (u_int)prefixlen);
665 }
666 
667 
668 static int
669 addr_samesubnet(
670 	const sockaddr_u *	a,
671 	const sockaddr_u *	a_mask,
672 	const sockaddr_u *	b,
673 	const sockaddr_u *	b_mask
674 	)
675 {
676 	const u_int32 *	pa;
677 	const u_int32 *	pa_limit;
678 	const u_int32 *	pb;
679 	const u_int32 *	pm;
680 	size_t		loops;
681 
682 	REQUIRE(AF(a) == AF(a_mask));
683 	REQUIRE(AF(b) == AF(b_mask));
684 	/*
685 	 * With address and mask families verified to match, comparing
686 	 * the masks also validates the address's families match.
687 	 */
688 	if (!SOCK_EQ(a_mask, b_mask))
689 		return FALSE;
690 
691 	if (IS_IPV6(a)) {
692 		loops = sizeof(NSRCADR6(a)) / sizeof(*pa);
693 		pa = (const void *)&NSRCADR6(a);
694 		pb = (const void *)&NSRCADR6(b);
695 		pm = (const void *)&NSRCADR6(a_mask);
696 	} else {
697 		loops = sizeof(NSRCADR(a)) / sizeof(*pa);
698 		pa = (const void *)&NSRCADR(a);
699 		pb = (const void *)&NSRCADR(b);
700 		pm = (const void *)&NSRCADR(a_mask);
701 	}
702 	for (pa_limit = pa + loops; pa < pa_limit; pa++, pb++, pm++)
703 		if ((*pa & *pm) != (*pb & *pm))
704 			return FALSE;
705 
706 	return TRUE;
707 }
708 
709 
710 /*
711  * interface list enumerator - visitor pattern
712  */
713 void
714 interface_enumerate(
715 	interface_receiver_t	receiver,
716 	void *			data
717 	)
718 {
719 	interface_info_t ifi;
720 
721 	ifi.action = IFS_EXISTS;
722 	for (ifi.ep = ep_list; ifi.ep != NULL; ifi.ep = ifi.ep->elink)
723 		(*receiver)(data, &ifi);
724 }
725 
726 /*
727  * do standard initialization of interface structure
728  */
729 static inline void
730 init_interface(
731 	endpt *ep
732 	)
733 {
734 	ZERO(*ep);
735 	ep->fd = INVALID_SOCKET;
736 	ep->bfd = INVALID_SOCKET;
737 	ep->phase = sys_interphase;
738 }
739 
740 
741 /*
742  * create new interface structure initialize from
743  * template structure or via standard initialization
744  * function
745  */
746 static endpt *
747 new_interface(
748 	endpt *protot
749 	)
750 {
751 	endpt *	iface;
752 
753 	iface = emalloc(sizeof(*iface));
754 	if (NULL == protot) {
755 		ZERO(*iface);
756 	} else {
757 		memcpy(iface, protot, sizeof(*iface));
758 	}
759 	/* count every new instance of an interface in the system */
760 	iface->ifnum = sys_ifnum++;
761 	iface->starttime = current_time;
762 
763 #   ifdef HAVE_IO_COMPLETION_PORT
764 	if (!io_completion_port_add_interface(iface)) {
765 		msyslog(LOG_EMERG, "cannot register interface with IO engine -- will exit now");
766 		exit(1);
767 	}
768 #   endif
769 	return iface;
770 }
771 
772 
773 /*
774  * return interface storage into free memory pool
775  */
776 static void
777 delete_interface(
778 	endpt *ep
779 	)
780 {
781 #    ifdef HAVE_IO_COMPLETION_PORT
782 	io_completion_port_remove_interface(ep);
783 #    endif
784 	free(ep);
785 }
786 
787 
788 /*
789  * link interface into list of known interfaces
790  */
791 static void
792 add_interface(
793 	endpt *	ep
794 	)
795 {
796 	endpt **	pmclisthead;
797 	endpt *		scan;
798 	endpt *		scan_next;
799 	int		same_subnet;
800 	int		rc;
801 
802 	/* Calculate the refid */
803 	ep->addr_refid = addr2refid(&ep->sin);
804 #    ifdef WORDS_BIGENDIAN
805 	if (IS_IPV6(&ep->sin)) {
806 		ep->old_refid = BYTESWAP32(ep->addr_refid);
807 	}
808 #    endif
809 	/* link at tail so ntpdc -c ifstats index increases each row */
810 	LINK_TAIL_SLIST(ep_list, ep, elink, endpt);
811 	ninterfaces++;
812 #ifdef MCAST
813 	/* the rest is for enabled multicast-capable addresses only */
814 	if (ep->ignore_packets || !(INT_MULTICAST & ep->flags) ||
815 	    INT_LOOPBACK & ep->flags)
816 		return;
817 # ifndef INCLUDE_IPV6_MULTICAST_SUPPORT
818 	if (AF_INET6 == ep->family)
819 		return;
820 # endif
821 	pmclisthead = (AF_INET == ep->family)
822 			 ? &mc4_list
823 			 : &mc6_list;
824 
825 	/*
826 	 * If we have multiple global addresses from the same prefix
827 	 * on the same network interface, multicast from one.
828 	 */
829 	for (scan = *pmclisthead; scan != NULL; scan = scan_next) {
830 		scan_next = scan->mclink;
831 		if (   ep->family != scan->family
832 		    || ep->ifindex != scan->ifindex) {
833 			continue;
834 		}
835 		same_subnet = addr_samesubnet(&ep->sin, &ep->mask,
836 					      &scan->sin, &scan->mask);
837 		if (same_subnet) {
838 			DPRINTF(4, ("did not add %s to multicast-capable list"
839 				    "which already has %s\n",
840 				    stoa(&ep->sin), stoa(&scan->sin)));
841 			return;
842 		}
843 	}
844 	LINK_SLIST(*pmclisthead, ep, mclink);
845 	if (INVALID_SOCKET == ep->fd)
846 		return;
847 
848 	/*
849 	 * select the local address from which to send to multicast.
850 	 */
851 	switch (AF(&ep->sin)) {
852 
853 	case AF_INET :
854 		rc = setsockopt(ep->fd, IPPROTO_IP,
855 				IP_MULTICAST_IF,
856 				(void *)&NSRCADR(&ep->sin),
857 				sizeof(NSRCADR(&ep->sin)));
858 		if (rc)
859 			msyslog(LOG_ERR,
860 				"setsockopt IP_MULTICAST_IF %s fails: %m",
861 				stoa(&ep->sin));
862 		break;
863 
864 # ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
865 	case AF_INET6 :
866 		rc = setsockopt(ep->fd, IPPROTO_IPV6,
867 				 IPV6_MULTICAST_IF,
868 				 (void *)&ep->ifindex,
869 				 sizeof(ep->ifindex));
870 		/* do not complain if bound addr scope is ifindex */
871 		if (rc && ep->ifindex != SCOPE(&ep->sin))
872 			msyslog(LOG_ERR,
873 				"setsockopt IPV6_MULTICAST_IF %u for %s fails: %m",
874 				ep->ifindex, stoa(&ep->sin));
875 		break;
876 # endif
877 	}
878 #endif	/* MCAST */
879 }
880 
881 
882 /*
883  * remove interface from known interface list and clean up
884  * associated resources
885  */
886 static void
887 remove_interface(
888 	endpt *	ep
889 	)
890 {
891 	endpt *		unlinked;
892 	endpt **	pmclisthead;
893 	sockaddr_u	resmask;
894 	int/*BOOL*/	success;
895 
896 	UNLINK_SLIST(unlinked, ep_list, ep, elink, endpt);
897 	if (!ep->ignore_packets && INT_MULTICAST & ep->flags) {
898 		pmclisthead = (AF_INET == ep->family)
899 				 ? &mc4_list
900 				 : &mc6_list;
901 		UNLINK_SLIST(unlinked, *pmclisthead, ep, mclink, endpt);
902 		DPRINTF(4, ("%s %s IPv%s multicast-capable unicast local address list\n",
903 			stoa(&ep->sin),
904 			(unlinked != NULL)
905 			    ? "removed from"
906 			    : "not found on",
907 			(AF_INET == ep->family)
908 			    ? "4"
909 			    : "6"));
910 	}
911 	delete_interface_from_list(ep);
912 
913 	if (ep->fd != INVALID_SOCKET) {
914 		msyslog(LOG_INFO,
915 			"Deleting %d %s, [%s]:%hd, stats:"
916 			" received=%ld, sent=%ld, dropped=%ld,"
917 			" active_time=%ld secs",
918 			ep->ifnum,
919 			ep->name,
920 			stoa(&ep->sin),
921 			SRCPORT(&ep->sin),
922 			ep->received,
923 			ep->sent,
924 			ep->notsent,
925 			current_time - ep->starttime);
926 		close_and_delete_fd_from_list(ep->fd, ep);
927 		ep->fd = INVALID_SOCKET;
928 	}
929 
930 	if (ep->bfd != INVALID_SOCKET) {
931 		msyslog(LOG_INFO,
932 			"stop listening for broadcasts to %s on interface #%d %s",
933 			stoa(&ep->bcast), ep->ifnum, ep->name);
934 		close_and_delete_fd_from_list(ep->bfd, ep);
935 		ep->bfd = INVALID_SOCKET;
936 	}
937 #   ifdef HAVE_IO_COMPLETION_PORT
938 	io_completion_port_remove_interface(ep);
939 #   endif
940 
941 	ninterfaces--;
942 	mon_clearinterface(ep);
943 
944 	/* remove restrict interface entry */
945 	SET_HOSTMASK(&resmask, AF(&ep->sin));
946 	success = hack_restrict(RESTRICT_REMOVEIF, &ep->sin, &resmask, 0,
947 				RESM_NTPONLY | RESM_INTERFACE, 0, 0);
948 	if (!success) {
949 		msyslog(LOG_ERR,
950 			"unable to remove self-restriction for %s",
951 			stoa(&ep->sin));
952 	}
953 
954 }
955 
956 
957 static void
958 log_listen_address(
959 	endpt *	ep
960 	)
961 {
962 	msyslog(LOG_INFO, "%s on %d %s %s",
963 		(ep->ignore_packets)
964 		    ? "Listen and drop"
965 		    : "Listen normally",
966 		ep->ifnum,
967 		ep->name,
968 		sptoa(&ep->sin));
969 }
970 
971 
972 static void
973 create_wildcards(
974 	u_short	port
975 	)
976 {
977 	int			v4wild;
978 #ifdef INCLUDE_IPV6_SUPPORT
979 	int			v6wild;
980 #endif
981 	sockaddr_u		wildaddr;
982 	nic_rule_action		action;
983 	endpt *			wildif;
984 
985 	/*
986 	 * silence "potentially uninitialized" warnings from VC9
987 	 * failing to follow the logic.  Ideally action could remain
988 	 * uninitialized, and the memset be the first statement under
989 	 * the first if (v4wild).
990 	 */
991 	action = ACTION_LISTEN;
992 	ZERO(wildaddr);
993 
994 #ifdef INCLUDE_IPV6_SUPPORT
995 	/*
996 	 * create pseudo-interface with wildcard IPv6 address
997 	 */
998 	v6wild = ipv6_works;
999 	if (v6wild) {
1000 		/* set wildaddr to the v6 wildcard address :: */
1001 		ZERO(wildaddr);
1002 		AF(&wildaddr) = AF_INET6;
1003 		SET_ADDR6N(&wildaddr, in6addr_any);
1004 		SET_PORT(&wildaddr, port);
1005 		SET_SCOPE(&wildaddr, 0);
1006 
1007 		/* check for interface/nic rules affecting the wildcard */
1008 		action = interface_action(NULL, &wildaddr, 0);
1009 		v6wild = (ACTION_IGNORE != action);
1010 	}
1011 	if (v6wild) {
1012 		wildif = new_interface(NULL);
1013 
1014 		strlcpy(wildif->name, "v6wildcard", sizeof(wildif->name));
1015 		memcpy(&wildif->sin, &wildaddr, sizeof(wildif->sin));
1016 		wildif->family = AF_INET6;
1017 		AF(&wildif->mask) = AF_INET6;
1018 		SET_ONESMASK(&wildif->mask);
1019 
1020 		wildif->flags = INT_UP | INT_WILDCARD;
1021 		wildif->ignore_packets = (ACTION_DROP == action);
1022 
1023 		wildif->fd = open_socket(&wildif->sin, 0, 1, wildif);
1024 
1025 		if (wildif->fd != INVALID_SOCKET) {
1026 			wildipv6 = wildif;
1027 			any6_interface = wildif;
1028 			add_addr_to_list(&wildif->sin, wildif);
1029 			add_interface(wildif);
1030 			log_listen_address(wildif);
1031 		} else {
1032 			msyslog(LOG_ERR,
1033 				"unable to bind to wildcard address %s - another process may be running - EXITING",
1034 				stoa(&wildif->sin));
1035 			exit(1);
1036 		}
1037 		DPRINT_INTERFACE(2, (wildif, "created ", "\n"));
1038 	}
1039 #endif
1040 
1041 	/*
1042 	 * create pseudo-interface with wildcard IPv4 address
1043 	 */
1044 	v4wild = ipv4_works;
1045 	if (v4wild) {
1046 		/* set wildaddr to the v4 wildcard address 0.0.0.0 */
1047 		AF(&wildaddr) = AF_INET;
1048 		SET_ADDR4N(&wildaddr, INADDR_ANY);
1049 		SET_PORT(&wildaddr, port);
1050 
1051 		/* check for interface/nic rules affecting the wildcard */
1052 		action = interface_action(NULL, &wildaddr, 0);
1053 		v4wild = (ACTION_IGNORE != action);
1054 	}
1055 	if (v4wild) {
1056 		wildif = new_interface(NULL);
1057 
1058 		strlcpy(wildif->name, "v4wildcard", sizeof(wildif->name));
1059 		memcpy(&wildif->sin, &wildaddr, sizeof(wildif->sin));
1060 		wildif->family = AF_INET;
1061 		AF(&wildif->mask) = AF_INET;
1062 		SET_ONESMASK(&wildif->mask);
1063 
1064 		wildif->flags = INT_BROADCAST | INT_UP | INT_WILDCARD;
1065 		wildif->ignore_packets = (ACTION_DROP == action);
1066 #if defined(MCAST)
1067 		/*
1068 		 * enable multicast reception on the broadcast socket
1069 		 */
1070 		AF(&wildif->bcast) = AF_INET;
1071 		SET_ADDR4N(&wildif->bcast, INADDR_ANY);
1072 		SET_PORT(&wildif->bcast, port);
1073 #endif /* MCAST */
1074 		wildif->fd = open_socket(&wildif->sin, 0, 1, wildif);
1075 
1076 		if (wildif->fd != INVALID_SOCKET) {
1077 			wildipv4 = wildif;
1078 			any_interface = wildif;
1079 
1080 			add_addr_to_list(&wildif->sin, wildif);
1081 			add_interface(wildif);
1082 			log_listen_address(wildif);
1083 		} else {
1084 			msyslog(LOG_ERR,
1085 				"unable to bind to wildcard address %s - another process may be running - EXITING",
1086 				stoa(&wildif->sin));
1087 			exit(1);
1088 		}
1089 		DPRINT_INTERFACE(2, (wildif, "created ", "\n"));
1090 	}
1091 }
1092 
1093 
1094 /*
1095  * add_nic_rule() -- insert a rule entry at the head of nic_rule_list.
1096  */
1097 void
1098 add_nic_rule(
1099 	nic_rule_match	match_type,
1100 	const char *	if_name,	/* interface name or numeric address */
1101 	int		prefixlen,
1102 	nic_rule_action	action
1103 	)
1104 {
1105 	nic_rule *	rule;
1106 	isc_boolean_t	is_ip;
1107 
1108 	rule = emalloc_zero(sizeof(*rule));
1109 	rule->match_type = match_type;
1110 	rule->prefixlen = prefixlen;
1111 	rule->action = action;
1112 
1113 	if (MATCH_IFNAME == match_type) {
1114 		REQUIRE(NULL != if_name);
1115 		rule->if_name = estrdup(if_name);
1116 	} else if (MATCH_IFADDR == match_type) {
1117 		REQUIRE(NULL != if_name);
1118 		/* set rule->addr */
1119 		is_ip = is_ip_address(if_name, AF_UNSPEC, &rule->addr);
1120 		REQUIRE(is_ip);
1121 	} else
1122 		REQUIRE(NULL == if_name);
1123 
1124 	LINK_SLIST(nic_rule_list, rule, next);
1125 }
1126 
1127 
1128 #ifdef DEBUG
1129 static const char *
1130 action_text(
1131 	nic_rule_action	action
1132 	)
1133 {
1134 	const char *t;
1135 
1136 	switch (action) {
1137 
1138 	default:
1139 		t = "ERROR";	/* quiet uninit warning */
1140 		DPRINTF(1, ("fatal: unknown nic_rule_action %d\n",
1141 			    action));
1142 		ENSURE(0);
1143 		break;
1144 
1145 	case ACTION_LISTEN:
1146 		t = "listen";
1147 		break;
1148 
1149 	case ACTION_IGNORE:
1150 		t = "ignore";
1151 		break;
1152 
1153 	case ACTION_DROP:
1154 		t = "drop";
1155 		break;
1156 	}
1157 
1158 	return t;
1159 }
1160 #endif	/* DEBUG */
1161 
1162 
1163 static nic_rule_action
1164 interface_action(
1165 	char *		if_name,
1166 	sockaddr_u *	if_addr,
1167 	u_int32		if_flags
1168 	)
1169 {
1170 	nic_rule *	rule;
1171 	int		isloopback;
1172 	int		iswildcard;
1173 
1174 	DPRINTF(4, ("interface_action: interface %s ",
1175 		    (if_name != NULL) ? if_name : "wildcard"));
1176 
1177 	iswildcard = is_wildcard_addr(if_addr);
1178 	isloopback = !!(INT_LOOPBACK & if_flags);
1179 
1180 	/*
1181 	 * Find any matching NIC rule from --interface / -I or ntp.conf
1182 	 * interface/nic rules.
1183 	 */
1184 	for (rule = nic_rule_list; rule != NULL; rule = rule->next) {
1185 
1186 		switch (rule->match_type) {
1187 
1188 		case MATCH_ALL:
1189 			/* loopback and wildcard excluded from "all" */
1190 			if (isloopback || iswildcard)
1191 				break;
1192 			DPRINTF(4, ("nic all %s\n",
1193 			    action_text(rule->action)));
1194 			return rule->action;
1195 
1196 		case MATCH_IPV4:
1197 			if (IS_IPV4(if_addr)) {
1198 				DPRINTF(4, ("nic ipv4 %s\n",
1199 				    action_text(rule->action)));
1200 				return rule->action;
1201 			}
1202 			break;
1203 
1204 		case MATCH_IPV6:
1205 			if (IS_IPV6(if_addr)) {
1206 				DPRINTF(4, ("nic ipv6 %s\n",
1207 				    action_text(rule->action)));
1208 				return rule->action;
1209 			}
1210 			break;
1211 
1212 		case MATCH_WILDCARD:
1213 			if (iswildcard) {
1214 				DPRINTF(4, ("nic wildcard %s\n",
1215 				    action_text(rule->action)));
1216 				return rule->action;
1217 			}
1218 			break;
1219 
1220 		case MATCH_IFADDR:
1221 			if (rule->prefixlen != -1) {
1222 				if (addr_eqprefix(if_addr, &rule->addr,
1223 						  rule->prefixlen)) {
1224 
1225 					DPRINTF(4, ("subnet address match - %s\n",
1226 					    action_text(rule->action)));
1227 					return rule->action;
1228 				}
1229 			} else
1230 				if (SOCK_EQ(if_addr, &rule->addr)) {
1231 
1232 					DPRINTF(4, ("address match - %s\n",
1233 					    action_text(rule->action)));
1234 					return rule->action;
1235 				}
1236 			break;
1237 
1238 		case MATCH_IFNAME:
1239 			if (if_name != NULL
1240 #if defined(HAVE_FNMATCH) && defined(FNM_CASEFOLD)
1241 			    && !fnmatch(rule->if_name, if_name, FNM_CASEFOLD)
1242 #else
1243 			    && !strcasecmp(if_name, rule->if_name)
1244 #endif
1245 			    ) {
1246 
1247 				DPRINTF(4, ("interface name match - %s\n",
1248 				    action_text(rule->action)));
1249 				return rule->action;
1250 			}
1251 			break;
1252 		}
1253 	}
1254 
1255 	/*
1256 	 * Unless explicitly disabled such as with "nic ignore ::1"
1257 	 * listen on loopback addresses.  Since ntpq and ntpdc query
1258 	 * "localhost" by default, which typically resolves to ::1 and
1259 	 * 127.0.0.1, it's useful to default to listening on both.
1260 	 */
1261 	if (isloopback) {
1262 		DPRINTF(4, ("default loopback listen\n"));
1263 		return ACTION_LISTEN;
1264 	}
1265 
1266 	/*
1267 	 * Treat wildcard addresses specially.  If there is no explicit
1268 	 * "nic ... wildcard" or "nic ... 0.0.0.0" or "nic ... ::" rule
1269 	 * default to drop.
1270 	 */
1271 	if (iswildcard) {
1272 		DPRINTF(4, ("default wildcard drop\n"));
1273 		return ACTION_DROP;
1274 	}
1275 
1276 	/*
1277 	 * Check for "virtual IP" (colon in the interface name) after
1278 	 * the rules so that "ntpd --interface eth0:1 -novirtualips"
1279 	 * does indeed listen on eth0:1's addresses.
1280 	 */
1281 	if (!listen_to_virtual_ips && if_name != NULL
1282 	    && (strchr(if_name, ':') != NULL)) {
1283 
1284 		DPRINTF(4, ("virtual ip - ignore\n"));
1285 		return ACTION_IGNORE;
1286 	}
1287 
1288 	/*
1289 	 * If there are no --interface/-I command-line options and no
1290 	 * interface/nic rules in ntp.conf, the default action is to
1291 	 * listen.  In the presence of rules from either, the default
1292 	 * is to ignore.  This implements ntpd's traditional listen-
1293 	 * every default with no interface listen configuration, and
1294 	 * ensures a single -I eth0 or "nic listen eth0" means do not
1295 	 * listen on any other addresses.
1296 	 */
1297 	if (NULL == nic_rule_list) {
1298 		DPRINTF(4, ("default listen\n"));
1299 		return ACTION_LISTEN;
1300 	}
1301 
1302 	DPRINTF(4, ("implicit ignore\n"));
1303 	return ACTION_IGNORE;
1304 }
1305 
1306 
1307 static void
1308 convert_isc_if(
1309 	isc_interface_t *isc_if,
1310 	endpt *itf,
1311 	u_short port
1312 	)
1313 {
1314 	strlcpy(itf->name, isc_if->name, sizeof(itf->name));
1315 	itf->ifindex = isc_if->ifindex;
1316 	itf->family = (u_short)isc_if->af;
1317 	AF(&itf->sin) = itf->family;
1318 	AF(&itf->mask) = itf->family;
1319 	AF(&itf->bcast) = itf->family;
1320 	SET_PORT(&itf->sin, port);
1321 	SET_PORT(&itf->mask, port);
1322 	SET_PORT(&itf->bcast, port);
1323 
1324 	if (IS_IPV4(&itf->sin)) {
1325 		NSRCADR(&itf->sin) = isc_if->address.type.in.s_addr;
1326 		NSRCADR(&itf->mask) = isc_if->netmask.type.in.s_addr;
1327 
1328 		if (isc_if->flags & INTERFACE_F_BROADCAST) {
1329 			itf->flags |= INT_BROADCAST;
1330 			NSRCADR(&itf->bcast) =
1331 			    isc_if->broadcast.type.in.s_addr;
1332 		}
1333 	}
1334 #ifdef INCLUDE_IPV6_SUPPORT
1335 	else if (IS_IPV6(&itf->sin)) {
1336 		SET_ADDR6N(&itf->sin, isc_if->address.type.in6);
1337 		SET_ADDR6N(&itf->mask, isc_if->netmask.type.in6);
1338 
1339 		SET_SCOPE(&itf->sin, isc_if->address.zone);
1340 	}
1341 #endif /* INCLUDE_IPV6_SUPPORT */
1342 
1343 
1344 	/* Process the rest of the flags */
1345 
1346 	itf->flags |=
1347 		  ((INTERFACE_F_UP & isc_if->flags)
1348 			? INT_UP : 0)
1349 		| ((INTERFACE_F_LOOPBACK & isc_if->flags)
1350 			? INT_LOOPBACK : 0)
1351 		| ((INTERFACE_F_POINTTOPOINT & isc_if->flags)
1352 			? INT_PPP : 0)
1353 		| ((INTERFACE_F_MULTICAST & isc_if->flags)
1354 			? INT_MULTICAST : 0)
1355 		| ((INTERFACE_F_PRIVACY & isc_if->flags)
1356 			? INT_PRIVACY : 0)
1357 		;
1358 
1359 	/*
1360 	 * Clear the loopback flag if the address is not localhost.
1361 	 * http://bugs.ntp.org/1683
1362 	 */
1363 	if ((INT_LOOPBACK & itf->flags) && !IS_LOOPBACK_ADDR(&itf->sin)) {
1364 		itf->flags &= ~INT_LOOPBACK;
1365 	}
1366 }
1367 
1368 
1369 /*
1370  * refresh_interface
1371  *
1372  * some OSes have been observed to keep
1373  * cached routes even when more specific routes
1374  * become available.
1375  * this can be mitigated by re-binding
1376  * the socket.
1377  */
1378 static int
1379 refresh_interface(
1380 	endpt *	iface
1381 	)
1382 {
1383 #ifdef  OS_MISSES_SPECIFIC_ROUTE_UPDATES
1384 	if (iface->fd != INVALID_SOCKET) {
1385 		int bcast = (iface->flags & INT_BCASTXMIT) != 0;
1386 		/* as we forcibly close() the socket remove the
1387 		   broadcast permission indication */
1388 		if (bcast)
1389 			socket_broadcast_disable(iface, &iface->sin);
1390 
1391 		close_and_delete_fd_from_list(iface->fd);
1392 
1393 		/* create new socket picking up a new first hop binding
1394 		   at connect() time */
1395 		iface->fd = open_socket(&iface->sin,
1396 					    bcast, 0, iface);
1397 		 /*
1398 		  * reset TTL indication so TTL is is set again
1399 		  * next time around
1400 		  */
1401 		iface->last_ttl = 0;
1402 		return (iface->fd != INVALID_SOCKET);
1403 	} else
1404 		return 0;	/* invalid sockets are not refreshable */
1405 #else /* !OS_MISSES_SPECIFIC_ROUTE_UPDATES */
1406 	return (iface->fd != INVALID_SOCKET);
1407 #endif /* !OS_MISSES_SPECIFIC_ROUTE_UPDATES */
1408 }
1409 
1410 /*
1411  * interface_update - externally callable update function
1412  */
1413 void
1414 interface_update(
1415 	interface_receiver_t	receiver,
1416 	void *			data
1417 	)
1418 {
1419 	int new_interface_found;
1420 
1421 	if (scan_addrs_once) {
1422 		return;
1423 	}
1424 	BLOCKIO();
1425 	new_interface_found = update_interfaces(NTP_PORT, receiver, data);
1426 	UNBLOCKIO();
1427 
1428 	if (!new_interface_found) {
1429 		return;
1430 	}
1431 #ifdef DEBUG
1432 	msyslog(LOG_DEBUG, "new interface(s) found: waking up resolver");
1433 #endif
1434 	interrupt_worker_sleep();
1435 }
1436 
1437 
1438 /*
1439  * sau_from_netaddr() - convert network address on-wire formats.
1440  * Convert from libisc's isc_netaddr_t to NTP's sockaddr_u
1441  */
1442 void
1443 sau_from_netaddr(
1444 	sockaddr_u *psau,
1445 	const isc_netaddr_t *pna
1446 	)
1447 {
1448 	ZERO_SOCK(psau);
1449 	AF(psau) = (u_short)pna->family;
1450 	switch (pna->family) {
1451 
1452 	case AF_INET:
1453 		psau->sa4.sin_addr = pna->type.in;
1454 		break;
1455 
1456 	case AF_INET6:
1457 		psau->sa6.sin6_addr = pna->type.in6;
1458 		break;
1459 	}
1460 }
1461 
1462 
1463 static int
1464 is_wildcard_addr(
1465 	const sockaddr_u *psau
1466 	)
1467 {
1468 	if (IS_IPV4(psau) && !NSRCADR(psau))
1469 		return 1;
1470 
1471 #ifdef INCLUDE_IPV6_SUPPORT
1472 	if (IS_IPV6(psau) && S_ADDR6_EQ(psau, &in6addr_any))
1473 		return 1;
1474 #endif
1475 
1476 	return 0;
1477 }
1478 
1479 
1480 isc_boolean_t
1481 is_linklocal(
1482 	sockaddr_u *		psau
1483 )
1484 {
1485 	struct in6_addr *	p6addr;
1486 
1487 	if (IS_IPV6(psau)) {
1488 		p6addr = &psau->sa6.sin6_addr;
1489 		if (   IN6_IS_ADDR_LINKLOCAL(p6addr)
1490 		    || IN6_IS_ADDR_SITELOCAL(p6addr)) {
1491 
1492 			return TRUE;
1493 		}
1494 	} else if (IS_IPV4(psau)) {
1495 		/* autoconf are link-local 169.254.0.0/16 */
1496 		if (IS_AUTOCONF(psau)) {
1497 			return TRUE;
1498 		}
1499 	}
1500 	return FALSE;
1501 }
1502 
1503 
1504 #ifdef OS_NEEDS_REUSEADDR_FOR_IFADDRBIND
1505 /*
1506  * enable/disable re-use of wildcard address socket
1507  */
1508 static void
1509 set_wildcard_reuse(
1510 	u_short	family,
1511 	int	on
1512 	)
1513 {
1514 	endpt *any;
1515 	SOCKET fd = INVALID_SOCKET;
1516 
1517 	any = ANY_INTERFACE_BYFAM(family);
1518 	if (any != NULL)
1519 		fd = any->fd;
1520 
1521 	if (fd != INVALID_SOCKET) {
1522 		if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
1523 			       (void *)&on, sizeof(on)))
1524 			msyslog(LOG_ERR,
1525 				"set_wildcard_reuse: setsockopt(SO_REUSEADDR, %s) failed: %m",
1526 				on ? "on" : "off");
1527 
1528 		DPRINTF(4, ("set SO_REUSEADDR to %s on %s\n",
1529 			    on ? "on" : "off",
1530 			    stoa(&any->sin)));
1531 	}
1532 }
1533 #endif /* OS_NEEDS_REUSEADDR_FOR_IFADDRBIND */
1534 
1535 static isc_boolean_t
1536 check_flags(
1537 	sockaddr_u *psau,
1538 	const char *name,
1539 	u_int32 flags
1540 	)
1541 {
1542 #if defined(SIOCGIFAFLAG_IN)
1543 	struct ifreq ifr;
1544 	int fd;
1545 
1546 	if (psau->sa.sa_family != AF_INET)
1547 		return ISC_FALSE;
1548 	if ((fd = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
1549 		return ISC_FALSE;
1550 	ZERO(ifr);
1551 	memcpy(&ifr.ifr_addr, &psau->sa, sizeof(ifr.ifr_addr));
1552 	strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
1553 	if (ioctl(fd, SIOCGIFAFLAG_IN, &ifr) < 0) {
1554 		close(fd);
1555 		return ISC_FALSE;
1556 	}
1557 	close(fd);
1558 	if ((ifr.ifr_addrflags & flags) != 0)
1559 		return ISC_TRUE;
1560 #endif	/* SIOCGIFAFLAG_IN */
1561 	return ISC_FALSE;
1562 }
1563 
1564 static isc_boolean_t
1565 check_flags6(
1566 	sockaddr_u *psau,
1567 	const char *name,
1568 	u_int32 flags6
1569 	)
1570 {
1571 #if defined(INCLUDE_IPV6_SUPPORT) && defined(SIOCGIFAFLAG_IN6)
1572 	struct in6_ifreq ifr6;
1573 	int fd;
1574 
1575 	if (psau->sa.sa_family != AF_INET6)
1576 		return ISC_FALSE;
1577 	if ((fd = socket(AF_INET6, SOCK_DGRAM, 0)) < 0)
1578 		return ISC_FALSE;
1579 	ZERO(ifr6);
1580 	memcpy(&ifr6.ifr_addr, &psau->sa6, sizeof(ifr6.ifr_addr));
1581 	strlcpy(ifr6.ifr_name, name, sizeof(ifr6.ifr_name));
1582 	if (ioctl(fd, SIOCGIFAFLAG_IN6, &ifr6) < 0) {
1583 		close(fd);
1584 		return ISC_FALSE;
1585 	}
1586 	close(fd);
1587 	if ((ifr6.ifr_ifru.ifru_flags6 & flags6) != 0)
1588 		return ISC_TRUE;
1589 #endif	/* INCLUDE_IPV6_SUPPORT && SIOCGIFAFLAG_IN6 */
1590 	return ISC_FALSE;
1591 }
1592 
1593 static isc_boolean_t
1594 is_anycast(
1595 	sockaddr_u *psau,
1596 	const char *name
1597 	)
1598 {
1599 #ifdef IN6_IFF_ANYCAST
1600 	return check_flags6(psau, name, IN6_IFF_ANYCAST);
1601 #else
1602 	return ISC_FALSE;
1603 #endif
1604 }
1605 
1606 static isc_boolean_t
1607 is_valid(
1608 	sockaddr_u *psau,
1609 	const char *name
1610 	)
1611 {
1612 	u_int32 flags;
1613 
1614 	flags = 0;
1615 	switch (psau->sa.sa_family) {
1616 	case AF_INET:
1617 #ifdef IN_IFF_DETACHED
1618 		flags |= IN_IFF_DETACHED;
1619 #endif
1620 #ifdef IN_IFF_TENTATIVE
1621 		flags |= IN_IFF_TENTATIVE;
1622 #endif
1623 		return check_flags(psau, name, flags) ? ISC_FALSE : ISC_TRUE;
1624 	case AF_INET6:
1625 #ifdef IN6_IFF_DEPARTED
1626 		flags |= IN6_IFF_DEPARTED;
1627 #endif
1628 #ifdef IN6_IFF_DETACHED
1629 		flags |= IN6_IFF_DETACHED;
1630 #endif
1631 #ifdef IN6_IFF_TENTATIVE
1632 		flags |= IN6_IFF_TENTATIVE;
1633 #endif
1634 		return check_flags6(psau, name, flags) ? ISC_FALSE : ISC_TRUE;
1635 	default:
1636 		return ISC_FALSE;
1637 	}
1638 }
1639 
1640 /*
1641  * update_interface strategy
1642  *
1643  * toggle configuration phase
1644  *
1645  * Phase 1a:
1646  * forall currently existing interfaces
1647  *   if address is known:
1648  *	drop socket - rebind again
1649  *
1650  *   if address is NOT known:
1651  *	Add address to list of new addresses
1652  *
1653  * Phase 1b:
1654  *	Scan the list of new addresses marking IPv6 link-local addresses
1655  *	   which also have a global v6 address using the same OS ifindex.
1656  *	Attempt to create a new interface entry
1657  *
1658  * Phase 2:
1659  * forall currently known non MCAST and WILDCARD interfaces
1660  *   if interface does not match configuration phase (not seen in phase 1):
1661  *	remove interface from known interface list
1662  *	forall peers associated with this interface
1663  *         disconnect peer from this interface
1664  *
1665  * Phase 3:
1666  *   attempt to re-assign interfaces to peers
1667  *
1668  */
1669 
1670 static int
1671 update_interfaces(
1672 	u_short			port,
1673 	interface_receiver_t	receiver,
1674 	void *			data
1675 	)
1676 {
1677 	isc_mem_t *		mctx = (void *)-1;
1678 	interface_info_t	ifi;
1679 	isc_interfaceiter_t *	iter;
1680 	isc_result_t		result;
1681 	isc_interface_t		isc_if;
1682 	int			new_interface_found;
1683 	unsigned int		family;
1684 	endpt			enumep;
1685 	endpt *			ep;
1686 	endpt *			next_ep;
1687 	endpt *			newaddrs;
1688 	endpt *			newaddrs_tail;
1689 	endpt *			ep2;
1690 
1691 	DPRINTF(3, ("update_interfaces(%d)\n", port));
1692 
1693 	/*
1694 	 * phase 1a - scan OS local addresses
1695 	 * - update those that ntpd already knows
1696 	 * - build a list of newly-discovered addresses.
1697 	 */
1698 
1699 	new_interface_found = FALSE;
1700 	nonlocal_v4_addr_up = nonlocal_v6_addr_up = FALSE;
1701 	iter = NULL;
1702 	newaddrs = newaddrs_tail = NULL;
1703 	result = isc_interfaceiter_create(mctx, &iter);
1704 
1705 	if (result != ISC_R_SUCCESS)
1706 		return 0;
1707 
1708 	/*
1709 	 * Toggle system interface scan phase to find untouched
1710 	 * interfaces to be deleted.
1711 	 */
1712 	sys_interphase ^= 0x1;
1713 
1714 	for (result = isc_interfaceiter_first(iter);
1715 	     ISC_R_SUCCESS == result;
1716 	     result = isc_interfaceiter_next(iter)) {
1717 
1718 		result = isc_interfaceiter_current(iter, &isc_if);
1719 
1720 		if (result != ISC_R_SUCCESS) {
1721 			break;
1722 		}
1723 		/* See if we have a valid family to use */
1724 		family = isc_if.address.family;
1725 		if (AF_INET != family && AF_INET6 != family)
1726 			continue;
1727 		if (AF_INET == family && !ipv4_works)
1728 			continue;
1729 		if (AF_INET6 == family && !ipv6_works)
1730 			continue;
1731 
1732 		/* create prototype */
1733 		init_interface(&enumep);
1734 
1735 		convert_isc_if(&isc_if, &enumep, port);
1736 
1737 		DPRINT_INTERFACE(4, (&enumep, "examining ", "\n"));
1738 
1739 		/*
1740 		 * Check if and how we are going to use the interface.
1741 		 */
1742 		switch (interface_action(enumep.name, &enumep.sin,
1743 					 enumep.flags)) {
1744 
1745 		case ACTION_IGNORE:
1746 			DPRINTF(4, ("ignoring interface %s (%s) - by nic rules\n",
1747 				    enumep.name, stoa(&enumep.sin)));
1748 			continue;
1749 
1750 		case ACTION_LISTEN:
1751 			DPRINTF(4, ("listen interface %s (%s) - by nic rules\n",
1752 				    enumep.name, stoa(&enumep.sin)));
1753 			enumep.ignore_packets = ISC_FALSE;
1754 			break;
1755 
1756 		case ACTION_DROP:
1757 			DPRINTF(4, ("drop on interface %s (%s) - by nic rules\n",
1758 				    enumep.name, stoa(&enumep.sin)));
1759 			enumep.ignore_packets = ISC_TRUE;
1760 			break;
1761 		}
1762 
1763 		 /* interfaces must be UP to be usable */
1764 		if (!(enumep.flags & INT_UP)) {
1765 			DPRINTF(4, ("skipping interface %s (%s) - DOWN\n",
1766 				    enumep.name, stoa(&enumep.sin)));
1767 			continue;
1768 		}
1769 
1770 		/*
1771 		 * skip any interfaces UP and bound to a wildcard
1772 		 * address - some dhcp clients produce that in the
1773 		 * wild
1774 		 */
1775 		if (is_wildcard_addr(&enumep.sin))
1776 			continue;
1777 
1778 		if (is_anycast(&enumep.sin, isc_if.name))
1779 			continue;
1780 
1781 		/*
1782 		 * skip any address that is an invalid state to be used
1783 		 */
1784 		if (!is_valid(&enumep.sin, isc_if.name))
1785 			continue;
1786 
1787 		/*
1788 		 * Keep track of having non-linklocal connectivity
1789 		 * for IPv4 and IPv6 so we don't solicit pool hosts
1790 		 * when it can't work.
1791 		 */
1792 		if (   !(INT_LOOPBACK & enumep.flags)
1793 		    && !is_linklocal(&enumep.sin)) {
1794 			if (IS_IPV6(&enumep.sin)) {
1795 				nonlocal_v6_addr_up = TRUE;
1796 			} else {
1797 				nonlocal_v4_addr_up = TRUE;
1798 			}
1799 		}
1800 		/*
1801 		 * map to local *address* in order to map all duplicate
1802 		 * interfaces to an endpt structure with the appropriate
1803 		 * socket.  Our name space is (ip-address), NOT
1804 		 * (interface name, ip-address).
1805 		 */
1806 		ep = getinterface(&enumep.sin, INT_WILDCARD);
1807 
1808 		if (NULL == ep) {
1809 			ep = emalloc(sizeof(*ep));
1810 			memcpy(ep, &enumep, sizeof(*ep));
1811 			if (NULL != newaddrs_tail) {
1812 				newaddrs_tail->elink = ep;
1813 				newaddrs_tail = ep;
1814 			} else {
1815 				newaddrs_tail = newaddrs = ep;
1816 			}
1817 			continue;
1818 		}
1819 
1820 		if (!refresh_interface(ep)) {
1821 			/*
1822 			 * Refreshing failed, we will delete the endpt
1823 			 * in phase 2 because it was not marked current.
1824 			 * We can bind to the address as the refresh
1825 			 * code already closed the endpt's socket.
1826 			*/
1827 			continue;
1828 		}
1829 		/*
1830 		 * found existing and up to date interface -
1831 		 * mark present.
1832 		 */
1833 		if (ep->phase != sys_interphase) {
1834 			/*
1835 			 * On a new round we reset the name so
1836 			 * the interface name shows up again if
1837 			 * this address is no longer shared.
1838 			 * We reset ignore_packets from the
1839 			 * new prototype to respect any runtime
1840 			 * changes to the nic rules.
1841 			 */
1842 			strlcpy(ep->name, enumep.name, sizeof(ep->name));
1843 			ep->ignore_packets = enumep.ignore_packets;
1844 		} else {
1845 			/*
1846 			 * DLH: else branch might be dead code from
1847 			 * when both address and name were compared.
1848 			 */
1849 			msyslog(LOG_INFO, "%s on %u %s -> *multiple*",
1850 				stoa(&ep->sin), ep->ifnum, ep->name);
1851 			/* name collision - rename interface */
1852 			strlcpy(ep->name, "*multiple*", sizeof(ep->name));
1853 		}
1854 
1855 		DPRINT_INTERFACE(4, (ep, "updating ", " present\n"));
1856 
1857 		if (ep->ignore_packets != enumep.ignore_packets) {
1858 			/*
1859 			 * We have conflicting configurations for the
1860 			 * address. This can happen with
1861 			 * -I <interfacename> on the command line for an
1862 			 *  interface that shares its address with other
1863 			 * interfaces. We cannot disambiguate incoming
1864 			 * packets delivered to this socket without extra
1865 			 * syscalls/features.  Note this is an unusual
1866 			 * configuration where several interfaces share
1867 			 * an address but filtering via interface name is
1868 			 * attempted.  We resolve the config conflict by
1869 			 * disabling the processing of received packets.
1870 			 * This leads to no service on the address where
1871 			 * the conflict occurs.
1872 			 */
1873 			msyslog(LOG_WARNING,
1874 				"conflicting listen configuration between"
1875 				" %s and %s for %s, disabled",
1876 				enumep.name, ep->name, stoa(&enumep.sin));
1877 
1878 			ep->ignore_packets = TRUE;
1879 		}
1880 
1881 		ep->phase = sys_interphase;
1882 
1883 		ifi.action = IFS_EXISTS;
1884 		ifi.ep = ep;
1885 		if (receiver != NULL) {
1886 			(*receiver)(data, &ifi);
1887 		}
1888 	}
1889 
1890 	isc_interfaceiter_destroy(&iter);
1891 
1892 	/*
1893 	 * Phase 1b
1894 	 */
1895 	for (ep = newaddrs; ep != NULL; ep = ep->elink) {
1896 		if (IS_IPV6(&ep->sin) && is_linklocal(&ep->sin)) {
1897 			for (ep2 = newaddrs; ep2 != NULL; ep2 = ep2->elink) {
1898 				if (   IS_IPV6(&ep2->sin)
1899 				    && ep != ep2
1900 				    && !is_linklocal(&ep2->sin)) {
1901 
1902 					ep->flags |= INT_LL_OF_GLOB;
1903 					break;
1904 				}
1905 			}
1906 		}
1907 	}
1908 	for (ep2 = newaddrs; ep2 != NULL; ep2 = next_ep) {
1909 		next_ep = ep2->elink;
1910 		ep2->elink = NULL;
1911 		ep = create_interface(port, ep2);
1912 		if (ep != NULL) {
1913 			ifi.action = IFS_CREATED;
1914 			ifi.ep = ep;
1915 			if (receiver != NULL) {
1916 				(*receiver)(data, &ifi);
1917 			}
1918 			new_interface_found = TRUE;
1919 			DPRINT_INTERFACE(3,
1920 				(ep, "updating ", " new - created\n"));
1921 		}
1922 		else {
1923 			DPRINT_INTERFACE(3,
1924 				(ep, "updating ", " new - FAILED"));
1925 
1926 			msyslog(LOG_ERR,
1927 				"cannot bind address %s",
1928 				stoa(&ep->sin));
1929 		}
1930 		free(ep2);
1931 	}
1932 
1933 	/*
1934 	 * phase 2 - delete gone interfaces - reassigning peers to
1935 	 * other interfaces
1936 	 */
1937 	for (ep = ep_list; ep != NULL; ep = next_ep) {
1938 		next_ep = ep->elink;
1939 
1940 		/*
1941 		 * if phase does not match sys_phase this interface was
1942 		 * not enumerated during the last interface scan - so it
1943 		 * is gone and will be deleted here unless it did not
1944 		 * originate from interface enumeration (INT_WILDCARD,
1945 		 * INT_MCASTIF).
1946 		 */
1947 		if (((INT_WILDCARD | INT_MCASTIF) & ep->flags) ||
1948 		    ep->phase == sys_interphase)
1949 			continue;
1950 
1951 		DPRINT_INTERFACE(3, (ep, "updating ",
1952 				     "GONE - deleting\n"));
1953 		remove_interface(ep);
1954 
1955 		ifi.action = IFS_DELETED;
1956 		ifi.ep = ep;
1957 		if (receiver != NULL) {
1958 			(*receiver)(data, &ifi);
1959 		}
1960 		/* disconnect peers from deleted endpt. */
1961 		while (ep->peers != NULL) {
1962 			set_peerdstadr(ep->peers, NULL);
1963 		}
1964 		/*
1965 		 * update globals in case we lose
1966 		 * a loopback interface
1967 		 */
1968 		if (ep == loopback_interface) {
1969 			loopback_interface = NULL;
1970 		}
1971 		delete_interface(ep);
1972 	}
1973 
1974 	/*
1975 	 * phase 3 - re-configure as the world has possibly changed
1976 	 *
1977 	 * never ever make this conditional again - it is needed to track
1978 	 * routing updates. see bug #2506
1979 	 */
1980 	refresh_all_peerinterfaces();
1981 
1982 	if (sys_bclient) {
1983 		io_setbclient();
1984 	}
1985 #ifdef MCAST
1986 	/*
1987 	 * Check multicast interfaces and try to join multicast groups if
1988 	 * not joined yet.
1989 	 */
1990 	for (ep = ep_list; ep != NULL; ep = ep->elink) {
1991 		remaddr_t *entry;
1992 
1993 		if (!(INT_MCASTIF & ep->flags) || (INT_MCASTOPEN & ep->flags)) {
1994 			continue;
1995 		}
1996 		/* Find remote address that was linked to this interface */
1997 		for (entry = remoteaddr_list;
1998 		     entry != NULL;
1999 		     entry = entry->link) {
2000 			if (entry->ep == ep) {
2001 				if (socket_multicast_enable(ep, &entry->addr)) {
2002 					msyslog(LOG_INFO,
2003 						"Joined %s socket to multicast group %s",
2004 						stoa(&ep->sin),
2005 						stoa(&entry->addr));
2006 				}
2007 				break;
2008 			}
2009 		}
2010 	}
2011 #endif /* MCAST */
2012 
2013 	return new_interface_found;
2014 }
2015 
2016 
2017 /*
2018  * create_sockets - create a socket for each interface plus a default
2019  *			socket for when we don't know where to send
2020  */
2021 static int
2022 create_sockets(
2023 	u_short port
2024 	)
2025 {
2026 #ifndef HAVE_IO_COMPLETION_PORT
2027 	/*
2028 	 * I/O Completion Ports don't care about the select and FD_SET
2029 	 */
2030 	maxactivefd = 0;
2031 	FD_ZERO(&activefds);
2032 #endif
2033 
2034 	DPRINTF(2, ("create_sockets(%d)\n", port));
2035 
2036 	create_wildcards(port);
2037 
2038 	update_interfaces(port, NULL, NULL);
2039 
2040 	/*
2041 	 * Now that we have opened all the sockets, turn off the reuse
2042 	 * flag for security.
2043 	 */
2044 	set_reuseaddr(0);
2045 
2046 	DPRINTF(2, ("create_sockets: Total interfaces = %d\n", ninterfaces));
2047 
2048 	return ninterfaces;
2049 }
2050 
2051 /*
2052  * create_interface - create a new interface for a given prototype
2053  *		      binding the socket.
2054  */
2055 static endpt *
2056 create_interface(
2057 	u_short	port,
2058 	endpt *	protot
2059 	)
2060 {
2061 	sockaddr_u	resmask;
2062 	endpt *		iface;
2063 	int/*BOOL*/	success;
2064 #if defined(MCAST) && defined(MULTICAST_NONEWSOCKET)
2065 	remaddr_t *	entry;
2066 	remaddr_t *	next_entry;
2067 #endif
2068 	DPRINTF(2, ("create_interface(%s)\n", sptoa(&protot->sin)));
2069 
2070 	/* build an interface */
2071 	iface = new_interface(protot);
2072 
2073 	/*
2074 	 * create socket
2075 	 */
2076 	iface->fd = open_socket(&iface->sin, 0, 0, iface);
2077 
2078 	if (iface->fd != INVALID_SOCKET)
2079 		log_listen_address(iface);
2080 
2081 	if ((INT_BROADCAST & iface->flags)
2082 	    && iface->bfd != INVALID_SOCKET)
2083 		msyslog(LOG_INFO, "Listening on broadcast address %s",
2084 			sptoa(&iface->bcast));
2085 
2086 	if (INVALID_SOCKET == iface->fd
2087 	    && INVALID_SOCKET == iface->bfd) {
2088 		msyslog(LOG_ERR, "unable to create socket on %s (%d) for %s",
2089 			iface->name,
2090 			iface->ifnum,
2091 			sptoa(&iface->sin));
2092 		delete_interface(iface);
2093 		return NULL;
2094 	}
2095 
2096 	/*
2097 	 * Blacklist our own addresses, no use talking to ourself
2098 	 */
2099 	SET_HOSTMASK(&resmask, AF(&iface->sin));
2100 	success = hack_restrict(RESTRICT_FLAGS, &iface->sin, &resmask,
2101 				-4, RESM_NTPONLY | RESM_INTERFACE,
2102 				RES_IGNORE, 0);
2103 	if (!success) {
2104 		msyslog(LOG_ERR,
2105 			"unable to self-restrict %s", stoa(&iface->sin));
2106 	}
2107 
2108 	/*
2109 	 * set globals with the first found
2110 	 * loopback interface of the appropriate class
2111 	 */
2112 	if (NULL == loopback_interface && AF_INET == iface->family
2113 	    && (INT_LOOPBACK & iface->flags))
2114 		loopback_interface = iface;
2115 
2116 	/*
2117 	 * put into our interface list
2118 	 */
2119 	add_addr_to_list(&iface->sin, iface);
2120 	add_interface(iface);
2121 
2122 #if defined(MCAST) && defined(MULTICAST_NONEWSOCKET)
2123 	/*
2124 	 * Join any previously-configured compatible multicast groups.
2125 	 */
2126 	if (INT_MULTICAST & iface->flags &&
2127 	    !((INT_LOOPBACK | INT_WILDCARD) & iface->flags) &&
2128 	    !iface->ignore_packets) {
2129 		for (entry = remoteaddr_list;
2130 		     entry != NULL;
2131 		     entry = next_entry) {
2132 			next_entry = entry->link;
2133 			if (AF(&iface->sin) != AF(&entry->addr) ||
2134 			    !IS_MCAST(&entry->addr))
2135 				continue;
2136 			if (socket_multicast_enable(iface,
2137 						    &entry->addr))
2138 				msyslog(LOG_INFO,
2139 					"Joined %s socket to multicast group %s",
2140 					stoa(&iface->sin),
2141 					stoa(&entry->addr));
2142 			else
2143 				msyslog(LOG_ERR,
2144 					"Failed to join %s socket to multicast group %s",
2145 					stoa(&iface->sin),
2146 					stoa(&entry->addr));
2147 		}
2148 	}
2149 #endif	/* MCAST && MCAST_NONEWSOCKET */
2150 
2151 	DPRINT_INTERFACE(2, (iface, "created ", "\n"));
2152 	return iface;
2153 }
2154 
2155 
2156 #ifdef DEBUG
2157 const char *
2158 iflags_str(
2159 	u_int32 iflags
2160 )
2161 {
2162 	const size_t	sz = LIB_BUFLENGTH;
2163 	char *		ifs;
2164 
2165 	LIB_GETBUF(ifs);
2166 	ifs[0] = '\0';
2167 
2168 	if (iflags & INT_UP) {
2169 		CLEAR_BIT_IF_DEBUG(INT_UP, iflags);
2170 		append_flagstr(ifs, sz, "up");
2171 	}
2172 
2173 	if (iflags & INT_PPP) {
2174 		CLEAR_BIT_IF_DEBUG(INT_PPP, iflags);
2175 		append_flagstr(ifs, sz, "ppp");
2176 	}
2177 
2178 	if (iflags & INT_LOOPBACK) {
2179 		CLEAR_BIT_IF_DEBUG(INT_LOOPBACK, iflags);
2180 		append_flagstr(ifs, sz, "loopback");
2181 	}
2182 
2183 	if (iflags & INT_BROADCAST) {
2184 		CLEAR_BIT_IF_DEBUG(INT_BROADCAST, iflags);
2185 		append_flagstr(ifs, sz, "broadcast");
2186 	}
2187 
2188 	if (iflags & INT_MULTICAST) {
2189 		CLEAR_BIT_IF_DEBUG(INT_MULTICAST, iflags);
2190 		append_flagstr(ifs, sz, "multicast");
2191 	}
2192 
2193 	if (iflags & INT_BCASTOPEN) {
2194 		CLEAR_BIT_IF_DEBUG(INT_BCASTOPEN, iflags);
2195 		append_flagstr(ifs, sz, "bcastopen");
2196 	}
2197 
2198 	if (iflags & INT_MCASTOPEN) {
2199 		CLEAR_BIT_IF_DEBUG(INT_MCASTOPEN, iflags);
2200 		append_flagstr(ifs, sz, "mcastopen");
2201 	}
2202 
2203 	if (iflags & INT_WILDCARD) {
2204 		CLEAR_BIT_IF_DEBUG(INT_WILDCARD, iflags);
2205 		append_flagstr(ifs, sz, "wildcard");
2206 	}
2207 
2208 	if (iflags & INT_MCASTIF) {
2209 		CLEAR_BIT_IF_DEBUG(INT_MCASTIF, iflags);
2210 		append_flagstr(ifs, sz, "mcastif");
2211 	}
2212 
2213 	if (iflags & INT_PRIVACY) {
2214 		CLEAR_BIT_IF_DEBUG(INT_PRIVACY, iflags);
2215 		append_flagstr(ifs, sz, "IPv6privacy");
2216 	}
2217 
2218 	if (iflags & INT_BCASTXMIT) {
2219 		CLEAR_BIT_IF_DEBUG(INT_BCASTXMIT, iflags);
2220 		append_flagstr(ifs, sz, "bcastxmit");
2221 	}
2222 
2223 	if (iflags & INT_LL_OF_GLOB) {
2224 		CLEAR_BIT_IF_DEBUG(INT_LL_OF_GLOB, iflags);
2225 		append_flagstr(ifs, sz, "linklocal-w-global");
2226 	}
2227 
2228 	DEBUG_INVARIANT(!iflags);
2229 
2230 	return ifs;
2231 }
2232 #endif	/* DEBUG */
2233 
2234 
2235 #ifdef SO_EXCLUSIVEADDRUSE
2236 static void
2237 set_excladdruse(
2238 	SOCKET fd
2239 	)
2240 {
2241 	int one = 1;
2242 	int failed;
2243 #ifdef SYS_WINNT
2244 	DWORD err;
2245 #endif
2246 
2247 	failed = setsockopt(fd, SOL_SOCKET, SO_EXCLUSIVEADDRUSE,
2248 			    (void *)&one, sizeof(one));
2249 
2250 	if (!failed)
2251 		return;
2252 
2253 #ifdef SYS_WINNT
2254 	/*
2255 	 * Prior to Windows XP setting SO_EXCLUSIVEADDRUSE can fail with
2256 	 * error WSAINVAL depending on service pack level and whether
2257 	 * the user account is in the Administrators group.  Do not
2258 	 * complain if it fails that way on versions prior to XP (5.1).
2259 	 */
2260 	err = GetLastError();
2261 
2262 	if (isc_win32os_versioncheck(5, 1, 0, 0) < 0	/* < 5.1/XP */
2263 	    && WSAEINVAL == err)
2264 		return;
2265 
2266 	SetLastError(err);
2267 #endif
2268 	msyslog(LOG_ERR,
2269 		"setsockopt(%d, SO_EXCLUSIVEADDRUSE, on): %m",
2270 		(int)fd);
2271 }
2272 #endif  /* SO_EXCLUSIVEADDRUSE */
2273 
2274 
2275 /*
2276  * set_reuseaddr() - set/clear REUSEADDR on all sockets
2277  *			NB possible hole - should we be doing this on broadcast
2278  *			fd's also?
2279  */
2280 static void
2281 set_reuseaddr(
2282 	int flag
2283 	)
2284 {
2285 #ifndef SO_EXCLUSIVEADDRUSE
2286 	endpt *ep;
2287 
2288 	for (ep = ep_list; ep != NULL; ep = ep->elink) {
2289 		if (ep->flags & INT_WILDCARD)
2290 			continue;
2291 
2292 		/*
2293 		 * if ep->fd  is INVALID_SOCKET, we might have a adapter
2294 		 * configured but not present
2295 		 */
2296 		DPRINTF(4, ("setting SO_REUSEADDR on %.16s@%s to %s\n",
2297 			    ep->name, stoa(&ep->sin),
2298 			    flag ? "on" : "off"));
2299 
2300 		if (ep->fd != INVALID_SOCKET) {
2301 			if (setsockopt(ep->fd, SOL_SOCKET, SO_REUSEADDR,
2302 				       (void *)&flag, sizeof(flag))) {
2303 				msyslog(LOG_ERR, "set_reuseaddr: setsockopt(%s, SO_REUSEADDR, %s) failed: %m",
2304 					stoa(&ep->sin), flag ? "on" : "off");
2305 			}
2306 		}
2307 	}
2308 #endif /* ! SO_EXCLUSIVEADDRUSE */
2309 }
2310 
2311 /*
2312  * This is just a wrapper around an internal function so we can
2313  * make other changes as necessary later on
2314  */
2315 void
2316 enable_broadcast(
2317 	endpt *		iface,
2318 	sockaddr_u *	baddr
2319 	)
2320 {
2321 #ifdef OPEN_BCAST_SOCKET
2322 	socket_broadcast_enable(iface, iface->fd, baddr);
2323 #endif
2324 }
2325 
2326 #ifdef OPEN_BCAST_SOCKET
2327 /*
2328  * Enable a broadcast address to a given socket
2329  * The socket is in the ep_list all we need to do is enable
2330  * broadcasting. It is not this function's job to select the socket
2331  */
2332 static isc_boolean_t
2333 socket_broadcast_enable(
2334 	endpt *		iface,
2335 	SOCKET		fd,
2336 	sockaddr_u *	baddr
2337 	)
2338 {
2339 #ifdef SO_BROADCAST
2340 	int on = 1;
2341 
2342 	if (IS_IPV4(baddr)) {
2343 		/* if this interface can support broadcast, set SO_BROADCAST */
2344 		if (setsockopt(fd, SOL_SOCKET, SO_BROADCAST,
2345 			       (void *)&on, sizeof(on)))
2346 			msyslog(LOG_ERR,
2347 				"setsockopt(SO_BROADCAST) enable failure on address %s: %m",
2348 				stoa(baddr));
2349 		else
2350 			DPRINTF(2, ("Broadcast enabled on socket %d for address %s\n",
2351 				    fd, stoa(baddr)));
2352 	}
2353 	iface->flags |= INT_BCASTXMIT;
2354 	return ISC_TRUE;
2355 #else
2356 	return ISC_FALSE;
2357 #endif /* SO_BROADCAST */
2358 }
2359 
2360 #ifdef  OS_MISSES_SPECIFIC_ROUTE_UPDATES
2361 /*
2362  * Remove a broadcast address from a given socket
2363  * The socket is in the ep_list all we need to do is disable
2364  * broadcasting. It is not this function's job to select the socket
2365  */
2366 static isc_boolean_t
2367 socket_broadcast_disable(
2368 	endpt *	iface,
2369 	sockaddr_u *		baddr
2370 	)
2371 {
2372 #ifdef SO_BROADCAST
2373 	int off = 0;	/* This seems to be OK as an int */
2374 
2375 	if (IS_IPV4(baddr) && setsockopt(iface->fd, SOL_SOCKET,
2376 	    SO_BROADCAST, (void *)&off, sizeof(off)))
2377 		msyslog(LOG_ERR,
2378 			"setsockopt(SO_BROADCAST) disable failure on address %s: %m",
2379 			stoa(baddr));
2380 
2381 	iface->flags &= ~INT_BCASTXMIT;
2382 	return ISC_TRUE;
2383 #else
2384 	return ISC_FALSE;
2385 #endif /* SO_BROADCAST */
2386 }
2387 #endif /* OS_MISSES_SPECIFIC_ROUTE_UPDATES */
2388 
2389 #endif /* OPEN_BCAST_SOCKET */
2390 
2391 
2392 /*
2393  * Check to see if the address is a multicast address
2394  */
2395 static isc_boolean_t
2396 addr_ismulticast(
2397 	sockaddr_u *maddr
2398 	)
2399 {
2400 	isc_boolean_t result;
2401 
2402 #ifndef INCLUDE_IPV6_MULTICAST_SUPPORT
2403 	/*
2404 	 * If we don't have IPV6 support any IPV6 addr is not multicast
2405 	 */
2406 	if (IS_IPV6(maddr))
2407 		result = ISC_FALSE;
2408 	else
2409 #endif
2410 		result = IS_MCAST(maddr);
2411 
2412 	if (!result)
2413 		DPRINTF(4, ("address %s is not multicast\n",
2414 			    stoa(maddr)));
2415 
2416 	return result;
2417 }
2418 
2419 /*
2420  * Multicast servers need to set the appropriate Multicast interface
2421  * socket option in order for it to know which interface to use for
2422  * send the multicast packet.
2423  */
2424 void
2425 enable_multicast_if(
2426 	endpt *		iface,
2427 	sockaddr_u *	maddr
2428 	)
2429 {
2430 #ifdef MCAST
2431 #ifdef IP_MULTICAST_LOOP
2432 	TYPEOF_IP_MULTICAST_LOOP off = 0;
2433 #endif
2434 #if defined(INCLUDE_IPV6_MULTICAST_SUPPORT) && defined(IPV6_MULTICAST_LOOP)
2435 	u_int off6 = 0;
2436 #endif
2437 
2438 	REQUIRE(AF(maddr) == AF(&iface->sin));
2439 
2440 	switch (AF(&iface->sin)) {
2441 
2442 	case AF_INET:
2443 #ifdef IP_MULTICAST_LOOP
2444 		/*
2445 		 * Don't send back to itself, but allow failure to set
2446 		 */
2447 		if (setsockopt(iface->fd, IPPROTO_IP,
2448 			       IP_MULTICAST_LOOP,
2449 			       (void *)&off,
2450 			       sizeof(off))) {
2451 
2452 			msyslog(LOG_ERR,
2453 				"setsockopt IP_MULTICAST_LOOP failed: %m on socket %d, addr %s for multicast address %s",
2454 				iface->fd, stoa(&iface->sin),
2455 				stoa(maddr));
2456 		}
2457 #endif
2458 		break;
2459 
2460 	case AF_INET6:
2461 #ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
2462 #ifdef IPV6_MULTICAST_LOOP
2463 		/*
2464 		 * Don't send back to itself, but allow failure to set
2465 		 */
2466 		if (setsockopt(iface->fd, IPPROTO_IPV6,
2467 			       IPV6_MULTICAST_LOOP,
2468 			       (void *) &off6, sizeof(off6))) {
2469 
2470 			msyslog(LOG_ERR,
2471 				"setsockopt IPV6_MULTICAST_LOOP failed: %m on socket %d, addr %s for multicast address %s",
2472 				iface->fd, stoa(&iface->sin),
2473 				stoa(maddr));
2474 		}
2475 #endif
2476 		break;
2477 #else
2478 		return;
2479 #endif	/* INCLUDE_IPV6_MULTICAST_SUPPORT */
2480 	}
2481 	return;
2482 #endif
2483 }
2484 
2485 /*
2486  * Add a multicast address to a given socket
2487  * The socket is in the ep_list all we need to do is enable
2488  * multicasting. It is not this function's job to select the socket
2489  */
2490 #if defined(MCAST)
2491 static isc_boolean_t
2492 socket_multicast_enable(
2493 	endpt *		iface,
2494 	sockaddr_u *	maddr
2495 	)
2496 {
2497 	struct ip_mreq		mreq;
2498 # ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
2499 	struct ipv6_mreq	mreq6;
2500 # endif
2501 	switch (AF(maddr)) {
2502 
2503 	case AF_INET:
2504 		ZERO(mreq);
2505 		mreq.imr_multiaddr = SOCK_ADDR4(maddr);
2506 		mreq.imr_interface.s_addr = htonl(INADDR_ANY);
2507 		if (setsockopt(iface->fd,
2508 			       IPPROTO_IP,
2509 			       IP_ADD_MEMBERSHIP,
2510 			       (void *)&mreq,
2511 			       sizeof(mreq))) {
2512 			DPRINTF(2, (
2513 				"setsockopt IP_ADD_MEMBERSHIP failed: %m on socket %d, addr %s for %x / %x (%s)",
2514 				iface->fd, stoa(&iface->sin),
2515 				mreq.imr_multiaddr.s_addr,
2516 				mreq.imr_interface.s_addr,
2517 				stoa(maddr)));
2518 			return ISC_FALSE;
2519 		}
2520 		DPRINTF(4, ("Added IPv4 multicast membership on socket %d, addr %s for %x / %x (%s)\n",
2521 			    iface->fd, stoa(&iface->sin),
2522 			    mreq.imr_multiaddr.s_addr,
2523 			    mreq.imr_interface.s_addr, stoa(maddr)));
2524 		break;
2525 
2526 	case AF_INET6:
2527 # ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
2528 		/*
2529 		 * Enable reception of multicast packets.
2530 		 * If the address is link-local we can get the
2531 		 * interface index from the scope id. Don't do this
2532 		 * for other types of multicast addresses. For now let
2533 		 * the kernel figure it out.
2534 		 */
2535 		ZERO(mreq6);
2536 		mreq6.ipv6mr_multiaddr = SOCK_ADDR6(maddr);
2537 		mreq6.ipv6mr_interface = iface->ifindex;
2538 
2539 		if (setsockopt(iface->fd, IPPROTO_IPV6,
2540 			       IPV6_JOIN_GROUP, (void *)&mreq6,
2541 			       sizeof(mreq6))) {
2542 			DPRINTF(2, (
2543 				"setsockopt IPV6_JOIN_GROUP failed: %m on socket %d, addr %s for interface %u (%s)",
2544 				iface->fd, stoa(&iface->sin),
2545 				mreq6.ipv6mr_interface, stoa(maddr)));
2546 			return ISC_FALSE;
2547 		}
2548 		DPRINTF(4, ("Added IPv6 multicast group on socket %d, addr %s for interface %u (%s)\n",
2549 			    iface->fd, stoa(&iface->sin),
2550 			    mreq6.ipv6mr_interface, stoa(maddr)));
2551 # else
2552 		return ISC_FALSE;
2553 # endif	/* INCLUDE_IPV6_MULTICAST_SUPPORT */
2554 	}
2555 	iface->flags |= INT_MCASTOPEN;
2556 	iface->num_mcast++;
2557 
2558 	return ISC_TRUE;
2559 }
2560 #endif	/* MCAST */
2561 
2562 
2563 /*
2564  * Remove a multicast address from a given socket
2565  * The socket is in the ep_list all we need to do is disable
2566  * multicasting. It is not this function's job to select the socket
2567  */
2568 #ifdef MCAST
2569 static isc_boolean_t
2570 socket_multicast_disable(
2571 	endpt *	iface,
2572 	sockaddr_u *		maddr
2573 	)
2574 {
2575 # ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
2576 	struct ipv6_mreq mreq6;
2577 # endif
2578 	struct ip_mreq mreq;
2579 
2580 	if (find_addr_in_list(maddr) == NULL) {
2581 		DPRINTF(4, ("socket_multicast_disable(%s): not found\n",
2582 			    stoa(maddr)));
2583 		return ISC_TRUE;
2584 	}
2585 
2586 	switch (AF(maddr)) {
2587 
2588 	case AF_INET:
2589 		ZERO(mreq);
2590 		mreq.imr_multiaddr = SOCK_ADDR4(maddr);
2591 		mreq.imr_interface = SOCK_ADDR4(&iface->sin);
2592 		if (setsockopt(iface->fd, IPPROTO_IP,
2593 			       IP_DROP_MEMBERSHIP, (void *)&mreq,
2594 			       sizeof(mreq))) {
2595 
2596 			msyslog(LOG_ERR,
2597 				"setsockopt IP_DROP_MEMBERSHIP failed: %m on socket %d, addr %s for %x / %x (%s)",
2598 				iface->fd, stoa(&iface->sin),
2599 				SRCADR(maddr), SRCADR(&iface->sin),
2600 				stoa(maddr));
2601 			return ISC_FALSE;
2602 		}
2603 		break;
2604 	case AF_INET6:
2605 # ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
2606 		/*
2607 		 * Disable reception of multicast packets
2608 		 * If the address is link-local we can get the
2609 		 * interface index from the scope id.  Don't do this
2610 		 * for other types of multicast addresses. For now let
2611 		 * the kernel figure it out.
2612 		 */
2613 		ZERO(mreq6);
2614 		mreq6.ipv6mr_multiaddr = SOCK_ADDR6(maddr);
2615 		mreq6.ipv6mr_interface = iface->ifindex;
2616 
2617 		if (setsockopt(iface->fd, IPPROTO_IPV6,
2618 			       IPV6_LEAVE_GROUP, (void *)&mreq6,
2619 			       sizeof(mreq6))) {
2620 
2621 			msyslog(LOG_ERR,
2622 				"setsockopt IPV6_LEAVE_GROUP failure: %m on socket %d, addr %s for %d (%s)",
2623 				iface->fd, stoa(&iface->sin),
2624 				iface->ifindex, stoa(maddr));
2625 			return ISC_FALSE;
2626 		}
2627 		break;
2628 # else
2629 		return ISC_FALSE;
2630 # endif	/* INCLUDE_IPV6_MULTICAST_SUPPORT */
2631 	}
2632 
2633 	iface->num_mcast--;
2634 	if (iface->num_mcast <= 0) {
2635 		iface->flags &= ~INT_MCASTOPEN;
2636 	}
2637 	return ISC_TRUE;
2638 }
2639 #endif	/* MCAST */
2640 
2641 
2642 /*
2643  * io_setbclient - open the broadcast client sockets
2644  */
2645 void
2646 io_setbclient(void)
2647 {
2648 #ifdef OPEN_BCAST_SOCKET
2649 	endpt *		ep;
2650 	unsigned int	nif, ni4;
2651 
2652 	nif = ni4 = 0;
2653 	set_reuseaddr(1);
2654 
2655 	for (ep = ep_list; ep != NULL; ep = ep->elink) {
2656 		/* count IPv4 interfaces. Needed later to decide
2657 		 * if we should log an error or not.
2658 		 */
2659 		if (AF_INET == ep->family) {
2660 			++ni4;
2661 		}
2662 
2663 		if (ep->flags & (INT_WILDCARD | INT_LOOPBACK))
2664 			continue;
2665 
2666 		/* use only allowed addresses */
2667 		if (ep->ignore_packets)
2668 			continue;
2669 
2670 		/* Need a broadcast-capable interface */
2671 		if (!(ep->flags & INT_BROADCAST))
2672 			continue;
2673 
2674 		/* Only IPv4 addresses are valid for broadcast */
2675 		REQUIRE(IS_IPV4(&ep->bcast));
2676 
2677 		/* Do we already have the broadcast address open? */
2678 		if (ep->flags & INT_BCASTOPEN) {
2679 			/*
2680 			 * account for already open interfaces to avoid
2681 			 * misleading warning below
2682 			 */
2683 			nif++;
2684 			continue;
2685 		}
2686 
2687 		/*
2688 		 * Try to open the broadcast address
2689 		 */
2690 		ep->family = AF_INET;
2691 		ep->bfd = open_socket(&ep->bcast, 1, 0, ep);
2692 
2693 		/*
2694 		 * If we succeeded then we use it otherwise enable
2695 		 * broadcast on the interface address
2696 		 */
2697 		if (ep->bfd != INVALID_SOCKET) {
2698 			nif++;
2699 			ep->flags |= INT_BCASTOPEN;
2700 			msyslog(LOG_INFO,
2701 				"Listen for broadcasts to %s on interface #%d %s",
2702 				stoa(&ep->bcast), ep->ifnum, ep->name);
2703 		} else switch (errno) {
2704 			/* Silently ignore EADDRINUSE as we probably
2705 			 * opened the socket already for an address in
2706 			 * the same network */
2707 		case EADDRINUSE:
2708 			/* Some systems cannot bind a socket to a broadcast
2709 			 * address, as that is not a valid host address. */
2710 		case EADDRNOTAVAIL:
2711 #		    ifdef SYS_WINNT	/*TODO: use for other systems, too? */
2712 			/* avoid recurrence here -- if we already have a
2713 			 * regular socket, it's quite useless to try this
2714 			 * again.
2715 			 */
2716 			if (ep->fd != INVALID_SOCKET) {
2717 				ep->flags |= INT_BCASTOPEN;
2718 				nif++;
2719 			}
2720 #		    endif
2721 			break;
2722 
2723 		default:
2724 			msyslog(LOG_INFO,
2725 				"failed to listen for broadcasts to %s on interface #%d %s",
2726 				stoa(&ep->bcast), ep->ifnum, ep->name);
2727 			break;
2728 		}
2729 	}
2730 	set_reuseaddr(0);
2731 	if (nif != 0) {
2732 		broadcast_client_enabled = ISC_TRUE;
2733 		DPRINTF(1, ("io_setbclient: listening to %d broadcast addresses\n", nif));
2734 	} else {
2735 		broadcast_client_enabled = ISC_FALSE;
2736 		/* This is expected when having only IPv6 interfaces
2737 		 * and no IPv4 interfaces at all. We suppress the error
2738 		 * log in that case... everything else should work!
2739 		 */
2740 		if (ni4) {
2741 			msyslog(LOG_ERR,
2742 				"Unable to listen for broadcasts, no broadcast interfaces available");
2743 		}
2744 	}
2745 #else
2746 	msyslog(LOG_ERR,
2747 		"io_setbclient: Broadcast Client disabled by build");
2748 #endif	/* OPEN_BCAST_SOCKET */
2749 }
2750 
2751 
2752 /*
2753  * io_unsetbclient - close the broadcast client sockets
2754  */
2755 void
2756 io_unsetbclient(void)
2757 {
2758 	endpt *ep;
2759 
2760 	for (ep = ep_list; ep != NULL; ep = ep->elink) {
2761 		if (INT_WILDCARD & ep->flags)
2762 			continue;
2763 		if (!(INT_BCASTOPEN & ep->flags))
2764 			continue;
2765 
2766 		if (ep->bfd != INVALID_SOCKET) {
2767 			/* destroy broadcast listening socket */
2768 			msyslog(LOG_INFO,
2769 				"stop listening for broadcasts to %s on interface #%d %s",
2770 				stoa(&ep->bcast), ep->ifnum, ep->name);
2771 			close_and_delete_fd_from_list(ep->bfd, ep);
2772 			ep->bfd = INVALID_SOCKET;
2773 		}
2774 		ep->flags &= ~INT_BCASTOPEN;
2775 	}
2776 	broadcast_client_enabled = ISC_FALSE;
2777 }
2778 
2779 
2780 /*
2781  * io_multicast_add() - add multicast group address
2782  */
2783 void
2784 io_multicast_add(
2785 	sockaddr_u *addr
2786 	)
2787 {
2788 #ifdef MCAST
2789 	endpt *	ep;
2790 	endpt *	one_ep;
2791 
2792 	/*
2793 	 * Check to see if this is a multicast address
2794 	 */
2795 	if (!addr_ismulticast(addr))
2796 		return;
2797 
2798 	/* If we already have it we can just return */
2799 	if (NULL != find_flagged_addr_in_list(addr, INT_MCASTOPEN)) {
2800 		return;
2801 	}
2802 
2803 # ifndef MULTICAST_NONEWSOCKET
2804 	ep = new_interface(NULL);
2805 
2806 	/*
2807 	 * Open a new socket for the multicast address
2808 	 */
2809 	ep->sin = *addr;
2810 	SET_PORT(&ep->sin, NTP_PORT);
2811 	ep->family = AF(&ep->sin);
2812 	AF(&ep->mask) = ep->family;
2813 	SET_ONESMASK(&ep->mask);
2814 
2815 	set_reuseaddr(1);
2816 	ep->bfd = INVALID_SOCKET;
2817 	ep->fd = open_socket(&ep->sin, 0, 0, ep);
2818 	if (ep->fd != INVALID_SOCKET) {
2819 		ep->ignore_packets = ISC_FALSE;
2820 		ep->flags |= INT_MCASTIF;
2821 		ep->ifindex = SCOPE(addr);
2822 
2823 		strlcpy(ep->name, "multicast", sizeof(ep->name));
2824 		DPRINT_INTERFACE(2, (ep, "multicast add ", "\n"));
2825 		add_interface(ep);
2826 		log_listen_address(ep);
2827 	} else {
2828 		/* bind failed, re-use wildcard interface */
2829 		delete_interface(ep);
2830 
2831 		if (IS_IPV4(addr))
2832 			ep = wildipv4;
2833 		else if (IS_IPV6(addr))
2834 			ep = wildipv6;
2835 		else
2836 			ep = NULL;
2837 
2838 		if (ep != NULL) {
2839 			/* HACK ! -- stuff in an address */
2840 			/* because we don't bind addr? DH */
2841 			ep->bcast = *addr;
2842 			msyslog(LOG_ERR,
2843 				"multicast address %s using wildcard interface #%d %s",
2844 				stoa(addr), ep->ifnum, ep->name);
2845 		} else {
2846 			msyslog(LOG_ERR,
2847 				"No multicast socket available to use for address %s",
2848 				stoa(addr));
2849 			return;
2850 		}
2851 	}
2852 	{	/* in place of the { following for in #else clause */
2853 		one_ep = ep;
2854 # else	/* MULTICAST_NONEWSOCKET follows */
2855 	/*
2856 	 * For the case where we can't use a separate socket (Windows)
2857 	 * join each applicable endpoint socket to the group address.
2858 	 */
2859 	if (IS_IPV4(addr))
2860 		one_ep = wildipv4;
2861 	else
2862 		one_ep = wildipv6;
2863 	for (ep = ep_list; ep != NULL; ep = ep->elink) {
2864 		if (ep->ignore_packets || AF(&ep->sin) != AF(addr) ||
2865 		    !(INT_MULTICAST & ep->flags) ||
2866 		    (INT_LOOPBACK | INT_WILDCARD) & ep->flags)
2867 			continue;
2868 		one_ep = ep;
2869 # endif	/* MULTICAST_NONEWSOCKET */
2870 		if (socket_multicast_enable(ep, addr))
2871 			msyslog(LOG_INFO,
2872 				"Joined %s socket to multicast group %s",
2873 				stoa(&ep->sin),
2874 				stoa(addr));
2875 	}
2876 
2877 	add_addr_to_list(addr, one_ep);
2878 #else	/* !MCAST  follows*/
2879 	msyslog(LOG_ERR,
2880 		"Can not add multicast address %s: no multicast support",
2881 		stoa(addr));
2882 #endif
2883 	return;
2884 }
2885 
2886 
2887 /*
2888  * io_multicast_del() - delete multicast group address
2889  */
2890 void
2891 io_multicast_del(
2892 	sockaddr_u *	addr
2893 	)
2894 {
2895 #ifdef MCAST
2896 	endpt *iface;
2897 
2898 	/*
2899 	 * Check to see if this is a multicast address
2900 	 */
2901 	if (!addr_ismulticast(addr)) {
2902 		msyslog(LOG_ERR, "invalid multicast address %s",
2903 			stoa(addr));
2904 		return;
2905 	}
2906 
2907 	/*
2908 	 * Disable reception of multicast packets
2909 	 */
2910 	while ((iface = find_flagged_addr_in_list(addr, INT_MCASTOPEN))
2911 	       != NULL)
2912 		socket_multicast_disable(iface, addr);
2913 
2914 	delete_addr_from_list(addr);
2915 
2916 #else /* not MCAST */
2917 	msyslog(LOG_ERR,
2918 		"Can not delete multicast address %s: no multicast support",
2919 		stoa(addr));
2920 #endif /* not MCAST */
2921 }
2922 
2923 
2924 /*
2925  * open_socket - open a socket, returning the file descriptor
2926  */
2927 
2928 static SOCKET
2929 open_socket(
2930 	sockaddr_u *	addr,
2931 	int		bcast,
2932 	int		turn_off_reuse,
2933 	endpt *		interf
2934 	)
2935 {
2936 	SOCKET	fd;
2937 	int	errval;
2938 	/*
2939 	 * int is OK for REUSEADR per
2940 	 * http://www.kohala.com/start/mcast.api.txt
2941 	 */
2942 	int	on = 1;
2943 	int	off = 0;
2944 
2945 	if (IS_IPV6(addr) && !ipv6_works)
2946 		return INVALID_SOCKET;
2947 
2948 	/* create a datagram (UDP) socket */
2949 	fd = socket(AF(addr), SOCK_DGRAM, 0);
2950 	if (INVALID_SOCKET == fd) {
2951 		errval = socket_errno();
2952 		msyslog(LOG_ERR,
2953 			"socket(AF_INET%s, SOCK_DGRAM, 0) failed on address %s: %m",
2954 			IS_IPV6(addr) ? "6" : "", stoa(addr));
2955 
2956 		if (errval == EPROTONOSUPPORT ||
2957 		    errval == EAFNOSUPPORT ||
2958 		    errval == EPFNOSUPPORT)
2959 			return (INVALID_SOCKET);
2960 
2961 		errno = errval;
2962 		msyslog(LOG_ERR,
2963 			"unexpected socket() error %m code %d (not EPROTONOSUPPORT nor EAFNOSUPPORT nor EPFNOSUPPORT) - exiting",
2964 			errno);
2965 		exit(1);
2966 	}
2967 
2968 #ifdef SYS_WINNT
2969 	connection_reset_fix(fd, addr);
2970 #endif
2971 	/*
2972 	 * Fixup the file descriptor for some systems
2973 	 * See bug #530 for details of the issue.
2974 	 */
2975 	fd = move_fd(fd);
2976 
2977 	/*
2978 	 * set SO_REUSEADDR since we will be binding the same port
2979 	 * number on each interface according to turn_off_reuse.
2980 	 * This is undesirable on Windows versions starting with
2981 	 * Windows XP (numeric version 5.1).
2982 	 */
2983 #ifdef SYS_WINNT
2984 	if (isc_win32os_versioncheck(5, 1, 0, 0) < 0)  /* before 5.1 */
2985 #endif
2986 		if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
2987 			       (void *)((turn_off_reuse)
2988 					    ? &off
2989 					    : &on),
2990 			       sizeof(on))) {
2991 
2992 			msyslog(LOG_ERR,
2993 				"setsockopt SO_REUSEADDR %s fails for address %s: %m",
2994 				(turn_off_reuse)
2995 				    ? "off"
2996 				    : "on",
2997 				stoa(addr));
2998 			closesocket(fd);
2999 			return INVALID_SOCKET;
3000 		}
3001 #ifdef SO_EXCLUSIVEADDRUSE
3002 	/*
3003 	 * setting SO_EXCLUSIVEADDRUSE on the wildcard we open
3004 	 * first will cause more specific binds to fail.
3005 	 */
3006 	if (!(interf->flags & INT_WILDCARD))
3007 		set_excladdruse(fd);
3008 #endif
3009 
3010 	/*
3011 	 * IPv4 specific options go here
3012 	 */
3013 	if (IS_IPV4(addr)) {
3014 #if defined(IPPROTO_IP) && defined(IP_TOS)
3015 		if (setsockopt(fd, IPPROTO_IP, IP_TOS, (void *)&qos,
3016 			       sizeof(qos)))
3017 			msyslog(LOG_ERR,
3018 				"setsockopt IP_TOS (%02x) fails on address %s: %m",
3019 				qos, stoa(addr));
3020 #endif /* IPPROTO_IP && IP_TOS */
3021 		if (bcast)
3022 			socket_broadcast_enable(interf, fd, addr);
3023 	}
3024 
3025 	/*
3026 	 * IPv6 specific options go here
3027 	 */
3028 	if (IS_IPV6(addr)) {
3029 #if defined(IPPROTO_IPV6) && defined(IPV6_TCLASS)
3030 		if (setsockopt(fd, IPPROTO_IPV6, IPV6_TCLASS, (void *)&qos,
3031 			       sizeof(qos)))
3032 			msyslog(LOG_ERR,
3033 				"setsockopt IPV6_TCLASS (%02x) fails on address %s: %m",
3034 				qos, stoa(addr));
3035 #endif /* IPPROTO_IPV6 && IPV6_TCLASS */
3036 #ifdef IPV6_V6ONLY
3037 		if (isc_net_probe_ipv6only() == ISC_R_SUCCESS
3038 		    && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY,
3039 		    (void *)&on, sizeof(on)))
3040 			msyslog(LOG_ERR,
3041 				"setsockopt IPV6_V6ONLY on fails on address %s: %m",
3042 				stoa(addr));
3043 #endif
3044 #ifdef IPV6_BINDV6ONLY
3045 		if (setsockopt(fd, IPPROTO_IPV6, IPV6_BINDV6ONLY,
3046 		    (void *)&on, sizeof(on)))
3047 			msyslog(LOG_ERR,
3048 				"setsockopt IPV6_BINDV6ONLY on fails on address %s: %m",
3049 				stoa(addr));
3050 #endif
3051 	}
3052 
3053 #ifdef OS_NEEDS_REUSEADDR_FOR_IFADDRBIND
3054 	/*
3055 	 * some OSes don't allow binding to more specific
3056 	 * addresses if a wildcard address already bound
3057 	 * to the port and SO_REUSEADDR is not set
3058 	 */
3059 	if (!is_wildcard_addr(addr))
3060 		set_wildcard_reuse(AF(addr), 1);
3061 #endif
3062 
3063 	/*
3064 	 * bind the local address.
3065 	 */
3066 	errval = bind(fd, &addr->sa, SOCKLEN(addr));
3067 
3068 #ifdef OS_NEEDS_REUSEADDR_FOR_IFADDRBIND
3069 	if (!is_wildcard_addr(addr))
3070 		set_wildcard_reuse(AF(addr), 0);
3071 #endif
3072 
3073 	if (errval < 0) {
3074 		/*
3075 		 * Don't log this under all conditions
3076 		 */
3077 		if (turn_off_reuse == 0
3078 #ifdef DEBUG
3079 		    || debug > 1
3080 #endif
3081 		    ) {
3082 			msyslog(LOG_ERR,
3083 				"bind(%d) AF_INET%s %s%s flags 0x%x failed: %m",
3084 				fd, IS_IPV6(addr) ? "6" : "",
3085 				sptoa(addr),
3086 				IS_MCAST(addr) ? " (multicast)" : "",
3087 				interf->flags);
3088 		}
3089 
3090 		closesocket(fd);
3091 
3092 		return INVALID_SOCKET;
3093 	}
3094 
3095 #ifdef HAVE_TIMESTAMP
3096 	{
3097 		if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMP,
3098 			       (void *)&on, sizeof(on)))
3099 			msyslog(LOG_DEBUG,
3100 				"setsockopt SO_TIMESTAMP on fails on address %s: %m",
3101 				stoa(addr));
3102 		else
3103 			DPRINTF(4, ("setsockopt SO_TIMESTAMP enabled on fd %d address %s\n",
3104 				    fd, stoa(addr)));
3105 	}
3106 #endif
3107 #ifdef HAVE_TIMESTAMPNS
3108 	{
3109 		if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPNS,
3110 			       (void *)&on, sizeof(on)))
3111 			msyslog(LOG_DEBUG,
3112 				"setsockopt SO_TIMESTAMPNS on fails on address %s: %m",
3113 				stoa(addr));
3114 		else
3115 			DPRINTF(4, ("setsockopt SO_TIMESTAMPNS enabled on fd %d address %s\n",
3116 				    fd, stoa(addr)));
3117 	}
3118 #endif
3119 #ifdef HAVE_BINTIME
3120 	{
3121 		if (setsockopt(fd, SOL_SOCKET, SO_BINTIME,
3122 			       (void *)&on, sizeof(on)))
3123 			msyslog(LOG_DEBUG,
3124 				"setsockopt SO_BINTIME on fails on address %s: %m",
3125 				stoa(addr));
3126 		else
3127 			DPRINTF(4, ("setsockopt SO_BINTIME enabled on fd %d address %s\n",
3128 				    fd, stoa(addr)));
3129 	}
3130 #endif
3131 
3132 	DPRINTF(4, ("bind(%d) addr %s, flags 0x%x\n",
3133 		    fd, sptoa(addr), interf->flags));
3134 
3135 	make_socket_nonblocking(fd);
3136 
3137 #ifdef HAVE_SIGNALED_IO
3138 	init_socket_sig(fd);
3139 #endif /* not HAVE_SIGNALED_IO */
3140 
3141 	add_fd_to_list(fd, FD_TYPE_SOCKET);
3142 
3143 #if !defined(SYS_WINNT) && !defined(VMS)
3144 	DPRINTF(4, ("flags for fd %d: 0x%x\n", fd,
3145 		    fcntl(fd, F_GETFL, 0)));
3146 #endif /* SYS_WINNT || VMS */
3147 
3148 #if defined(HAVE_IO_COMPLETION_PORT)
3149 /*
3150  * Add the socket to the completion port
3151  */
3152 	if (!io_completion_port_add_socket(fd, interf, bcast)) {
3153 		msyslog(LOG_ERR, "unable to set up io completion port - EXITING");
3154 		exit(1);
3155 	}
3156 #endif
3157 	return fd;
3158 }
3159 
3160 
3161 
3162 /* XXX ELIMINATE sendpkt similar in ntpq.c, ntpdc.c, ntp_io.c, ntptrace.c */
3163 /*
3164  * sendpkt - send a packet to the specified destination from the given endpt
3165  *	     except for multicast, which may be sent from several addresses.
3166  */
3167 void
3168 sendpkt(
3169 	sockaddr_u *	dest,
3170 	endpt *		ep,
3171 	int		ttl,
3172 	struct pkt *	pkt,
3173 	int		len
3174 	)
3175 {
3176 	endpt *	src;
3177 	int	ismcast;
3178 	int	cc;
3179 	int	rc;
3180 	u_char	cttl;
3181 	l_fp	fp_zero = { { 0 }, 0 };
3182 	l_fp	org, rec, xmt;
3183 
3184 	ismcast = IS_MCAST(dest);
3185 	if (!ismcast) {
3186 		src = ep;
3187 	} else {
3188 #ifndef MCAST
3189 		return;
3190 #endif
3191 		src = (IS_IPV4(dest))
3192 			? mc4_list
3193 			: mc6_list;
3194 	}
3195 
3196 	if (NULL == src) {
3197 		/*
3198 		 * unbound peer - drop request and wait for better
3199 		 * network conditions
3200 		 */
3201 		DPRINTF(2, ("%ssendpkt(dst=%s, ttl=%d, len=%d): no interface - IGNORED\n",
3202 			    ismcast ? "\tMCAST\t***** " : "",
3203 			    stoa(dest), ttl, len));
3204 		return;
3205 	}
3206 
3207 	do {
3208 		if (INT_LL_OF_GLOB & src->flags) {
3209 			/* avoid duplicate multicasts on same IPv6 net */
3210 			goto loop;
3211 		}
3212 		DPRINTF(2, ("%ssendpkt(%d, dst=%s, src=%s, ttl=%d, len=%d)\n",
3213 			    ismcast ? "\tMCAST\t***** " : "", src->fd,
3214 			    stoa(dest), stoa(&src->sin), ttl, len));
3215 #ifdef MCAST
3216 		if (ismcast && ttl > 0 && ttl != src->last_ttl) {
3217 			/*
3218 			 * set the multicast ttl for outgoing packets
3219 			 */
3220 			switch (AF(&src->sin)) {
3221 
3222 			case AF_INET :
3223 				cttl = (u_char)ttl;
3224 				rc = setsockopt(src->fd, IPPROTO_IP,
3225 						IP_MULTICAST_TTL,
3226 						(void *)&cttl,
3227 						sizeof(cttl));
3228 				break;
3229 
3230 # ifdef INCLUDE_IPV6_SUPPORT
3231 			case AF_INET6 :
3232 				rc = setsockopt(src->fd, IPPROTO_IPV6,
3233 						 IPV6_MULTICAST_HOPS,
3234 						 (void *)&ttl,
3235 						 sizeof(ttl));
3236 				break;
3237 # endif	/* INCLUDE_IPV6_SUPPORT */
3238 
3239 			default:
3240 				rc = 0;
3241 			}
3242 
3243 			if (!rc)
3244 				src->last_ttl = ttl;
3245 			else
3246 				msyslog(LOG_ERR,
3247 					"setsockopt IP_MULTICAST_TTL/IPV6_MULTICAST_HOPS fails on address %s: %m",
3248 					stoa(&src->sin));
3249 		}
3250 #endif	/* MCAST */
3251 
3252 #ifdef SIM
3253 		cc = simulate_server(dest, src, pkt);
3254 #elif defined(HAVE_IO_COMPLETION_PORT)
3255 		cc = io_completion_port_sendto(src, src->fd, pkt,
3256 			(size_t)len, dest);
3257 #else
3258 		cc = sendto(src->fd, (char *)pkt, (u_int)len, 0,
3259 			    &dest->sa, SOCKLEN(dest));
3260 #endif
3261 		if (cc == -1) {
3262 			src->notsent++;
3263 			packets_notsent++;
3264 		} else	{
3265 			src->sent++;
3266 			packets_sent++;
3267 		}
3268 	    loop:
3269 		if (ismcast)
3270 			src = src->mclink;
3271 	} while (ismcast && src != NULL);
3272 
3273 	/* HMS: pkt->rootdisp is usually random here */
3274 	NTOHL_FP(&pkt->org, &org);
3275 	NTOHL_FP(&pkt->rec, &rec);
3276 	NTOHL_FP(&pkt->xmt, &xmt);
3277 	record_raw_stats(src ? &src->sin : NULL, dest,
3278 			&org, &rec, &xmt, &fp_zero,
3279 			PKT_LEAP(pkt->li_vn_mode),
3280 			PKT_VERSION(pkt->li_vn_mode),
3281 			PKT_MODE(pkt->li_vn_mode),
3282 			pkt->stratum,
3283 			pkt->ppoll, pkt->precision,
3284 			FPTOD(NTOHS_FP(pkt->rootdelay)),
3285 			FPTOD(NTOHS_FP(pkt->rootdisp)),  pkt->refid,
3286 			len - MIN_V4_PKT_LEN, (u_char *)&pkt->exten);
3287 }
3288 
3289 
3290 #if !defined(HAVE_IO_COMPLETION_PORT)
3291 #if !defined(HAVE_SIGNALED_IO)
3292 /*
3293  * fdbits - generate ascii representation of fd_set (FAU debug support)
3294  * HFDF format - highest fd first.
3295  */
3296 static char *
3297 fdbits(
3298 	int		count,
3299 	const fd_set*	set
3300 	)
3301 {
3302 	static char buffer[256];
3303 	char * buf = buffer;
3304 
3305 	count = min(count,  sizeof(buffer) - 1);
3306 
3307 	while (count >= 0) {
3308 		*buf++ = FD_ISSET(count, set) ? '#' : '-';
3309 		count--;
3310 	}
3311 	*buf = '\0';
3312 
3313 	return buffer;
3314 }
3315 #endif
3316 
3317 #ifdef REFCLOCK
3318 /*
3319  * Routine to read the refclock packets for a specific interface
3320  * Return the number of bytes read. That way we know if we should
3321  * read it again or go on to the next one if no bytes returned
3322  */
3323 static inline int
3324 read_refclock_packet(
3325 	SOCKET			fd,
3326 	struct refclockio *	rp,
3327 	l_fp			ts
3328 	)
3329 {
3330 	u_int			read_count;
3331 	int			buflen;
3332 	int			saved_errno;
3333 	int			consumed;
3334 	struct recvbuf *	rb;
3335 
3336 	rb = get_free_recv_buffer(TRUE);
3337 
3338 	if (NULL == rb) {
3339 		/*
3340 		 * No buffer space available - just drop the 'packet'.
3341 		 * Since this is a non-blocking character stream we read
3342 		 * all data that we can.
3343 		 *
3344 		 * ...hmmmm... what about "tcflush(fd,TCIFLUSH)" here?!?
3345 		 */
3346 		char buf[128];
3347 		do
3348 			buflen = read(fd, buf, sizeof(buf));
3349 		while (buflen > 0);
3350 		packets_dropped++;
3351 		return (buflen);
3352 	}
3353 
3354 	/* TALOS-CAN-0064: avoid signed/unsigned clashes that can lead
3355 	 * to buffer overrun and memory corruption
3356 	 */
3357 	if (rp->datalen <= 0 || (size_t)rp->datalen > sizeof(rb->recv_space))
3358 		read_count = sizeof(rb->recv_space);
3359 	else
3360 		read_count = (u_int)rp->datalen;
3361 	do {
3362 		buflen = read(fd, (char *)&rb->recv_space, read_count);
3363 	} while (buflen < 0 && EINTR == errno);
3364 
3365 	if (buflen <= 0) {
3366 		saved_errno = errno;
3367 		freerecvbuf(rb);
3368 		errno = saved_errno;
3369 		return buflen;
3370 	}
3371 
3372 	/*
3373 	 * Got one. Mark how and when it got here,
3374 	 * put it on the full list and do bookkeeping.
3375 	 */
3376 	rb->recv_length = buflen;
3377 	rb->recv_peer = rp->srcclock;
3378 	rb->dstadr = NULL;
3379 	rb->fd = fd;
3380 	rb->recv_time = ts;
3381 	rb->receiver = rp->clock_recv;
3382 
3383 	consumed = indicate_refclock_packet(rp, rb);
3384 	if (!consumed) {
3385 		rp->recvcount++;
3386 		packets_received++;
3387 	}
3388 
3389 	return buflen;
3390 }
3391 #endif	/* REFCLOCK */
3392 
3393 
3394 #ifdef HAVE_PACKET_TIMESTAMP
3395 /*
3396  * extract timestamps from control message buffer
3397  */
3398 static l_fp
3399 fetch_timestamp(
3400 	struct recvbuf *	rb,
3401 	struct msghdr *		msghdr,
3402 	l_fp			ts
3403 	)
3404 {
3405 	struct cmsghdr *	cmsghdr;
3406 	unsigned long		ticks;
3407 	double			fuzz;
3408 	l_fp			lfpfuzz;
3409 	l_fp			nts;
3410 #ifdef DEBUG_TIMING
3411 	l_fp			dts;
3412 #endif
3413 
3414 	cmsghdr = CMSG_FIRSTHDR(msghdr);
3415 	while (cmsghdr != NULL) {
3416 		switch (cmsghdr->cmsg_type)
3417 		{
3418 #ifdef HAVE_BINTIME
3419 		case SCM_BINTIME:
3420 #endif  /* HAVE_BINTIME */
3421 #ifdef HAVE_TIMESTAMPNS
3422 		case SCM_TIMESTAMPNS:
3423 #endif	/* HAVE_TIMESTAMPNS */
3424 #ifdef HAVE_TIMESTAMP
3425 		case SCM_TIMESTAMP:
3426 #endif	/* HAVE_TIMESTAMP */
3427 #if defined(HAVE_BINTIME) || defined (HAVE_TIMESTAMPNS) || defined(HAVE_TIMESTAMP)
3428 			switch (cmsghdr->cmsg_type)
3429 			{
3430 #ifdef HAVE_BINTIME
3431 			case SCM_BINTIME:
3432 				{
3433 					struct bintime	pbt;
3434 					memcpy(&pbt, CMSG_DATA(cmsghdr), sizeof(pbt));
3435 					/*
3436 					 * bintime documentation is at http://phk.freebsd.dk/pubs/timecounter.pdf
3437 					 */
3438 					nts.l_i = pbt.sec + JAN_1970;
3439 					nts.l_uf = (u_int32)(pbt.frac >> 32);
3440 					if (sys_tick > measured_tick &&
3441 					    sys_tick > 1e-9) {
3442 						ticks = (unsigned long)(nts.l_uf / (unsigned long)(sys_tick * FRAC));
3443 						nts.l_uf = (unsigned long)(ticks * (unsigned long)(sys_tick * FRAC));
3444 					}
3445 					DPRINTF(4, ("fetch_timestamp: system bintime network time stamp: %ld.%09lu\n",
3446 						    (long)pbt.sec, (u_long)((nts.l_uf / FRAC) * 1e9)));
3447 				}
3448 				break;
3449 #endif  /* HAVE_BINTIME */
3450 #ifdef HAVE_TIMESTAMPNS
3451 			case SCM_TIMESTAMPNS:
3452 				{
3453 					struct timespec	pts;
3454 					memcpy(&pts, CMSG_DATA(cmsghdr), sizeof(pts));
3455 					if (sys_tick > measured_tick &&
3456 					    sys_tick > 1e-9) {
3457 						ticks = (unsigned long)((pts.tv_nsec * 1e-9) /
3458 									sys_tick);
3459 						pts.tv_nsec = (long)(ticks * 1e9 *
3460 								     sys_tick);
3461 					}
3462 					DPRINTF(4, ("fetch_timestamp: system nsec network time stamp: %ld.%09ld\n",
3463 						    pts.tv_sec, pts.tv_nsec));
3464 					nts = tspec_stamp_to_lfp(pts);
3465 				}
3466 				break;
3467 #endif	/* HAVE_TIMESTAMPNS */
3468 #ifdef HAVE_TIMESTAMP
3469 			case SCM_TIMESTAMP:
3470 				{
3471 					struct timeval	ptv;
3472 					memcpy(&ptv, CMSG_DATA(cmsghdr), sizeof(ptv));
3473 					if (sys_tick > measured_tick &&
3474 					    sys_tick > 1e-6) {
3475 						ticks = (unsigned long)((ptv.tv_usec * 1e-6) /
3476 									sys_tick);
3477 						ptv.tv_usec = (long)(ticks * 1e6 *
3478 								    sys_tick);
3479 					}
3480 					DPRINTF(4, ("fetch_timestamp: system usec network time stamp: %jd.%06ld\n",
3481 						    (intmax_t)ptv.tv_sec, (long)ptv.tv_usec));
3482 					nts = tval_stamp_to_lfp(ptv);
3483 				}
3484 				break;
3485 #endif  /* HAVE_TIMESTAMP */
3486 			}
3487 			fuzz = ntp_uurandom() * sys_fuzz;
3488 			DTOLFP(fuzz, &lfpfuzz);
3489 			L_ADD(&nts, &lfpfuzz);
3490 #ifdef DEBUG_TIMING
3491 			dts = ts;
3492 			L_SUB(&dts, &nts);
3493 			collect_timing(rb, "input processing delay", 1,
3494 				       &dts);
3495 			DPRINTF(4, ("fetch_timestamp: timestamp delta: %s (incl. fuzz)\n",
3496 				    lfptoa(&dts, 9)));
3497 #endif	/* DEBUG_TIMING */
3498 			ts = nts;  /* network time stamp */
3499 			break;
3500 #endif	/* HAVE_BINTIME || HAVE_TIMESTAMPNS || HAVE_TIMESTAMP */
3501 
3502 		default:
3503 			DPRINTF(4, ("fetch_timestamp: skipping control message 0x%x\n",
3504 				    cmsghdr->cmsg_type));
3505 		}
3506 		cmsghdr = CMSG_NXTHDR(msghdr, cmsghdr);
3507 	}
3508 	return ts;
3509 }
3510 #endif	/* HAVE_PACKET_TIMESTAMP */
3511 
3512 
3513 /*
3514  * Routine to read the network NTP packets for a specific interface
3515  * Return the number of bytes read. That way we know if we should
3516  * read it again or go on to the next one if no bytes returned
3517  */
3518 static inline int
3519 read_network_packet(
3520 	SOCKET		fd,
3521 	endpt *		itf,
3522 	l_fp		ts
3523 	)
3524 {
3525 	GETSOCKNAME_SOCKLEN_TYPE fromlen;
3526 	int buflen;
3527 	register struct recvbuf *rb;
3528 #ifdef HAVE_PACKET_TIMESTAMP
3529 	struct msghdr msghdr;
3530 	struct iovec iovec;
3531 	char control[CMSG_BUFSIZE];
3532 #endif
3533 
3534 	/*
3535 	 * Get a buffer and read the frame.  If we haven't got a buffer,
3536 	 * or this is received on a disallowed socket, just dump the
3537 	 * packet.
3538 	 */
3539 
3540 	rb = itf->ignore_packets ? NULL : get_free_recv_buffer(FALSE);
3541 	if (NULL == rb) {
3542 		/* A partial read on a UDP socket truncates the data and
3543 		 * removes the message from the queue. So there's no
3544 		 * need to have a full buffer here on the stack.
3545 		 */
3546 		char buf[16];
3547 		sockaddr_u from;
3548 
3549 		if (rb != NULL)
3550 			freerecvbuf(rb);
3551 
3552 		fromlen = sizeof(from);
3553 		buflen = recvfrom(fd, buf, sizeof(buf), 0,
3554 				  &from.sa, &fromlen);
3555 		DPRINTF(4, ("%s on (%lu) fd=%d from %s\n",
3556 			(itf->ignore_packets)
3557 			    ? "ignore"
3558 			    : "drop",
3559 			free_recvbuffs(), fd, stoa(&from)));
3560 		if (itf->ignore_packets)
3561 			packets_ignored++;
3562 		else
3563 			packets_dropped++;
3564 		return (buflen);
3565 	}
3566 
3567 	fromlen = sizeof(rb->recv_srcadr);
3568 
3569 #ifndef HAVE_PACKET_TIMESTAMP
3570 	rb->recv_length = recvfrom(fd, (char *)&rb->recv_space,
3571 				   sizeof(rb->recv_space), 0,
3572 				   &rb->recv_srcadr.sa, &fromlen);
3573 #else
3574 	iovec.iov_base        = &rb->recv_space;
3575 	iovec.iov_len         = sizeof(rb->recv_space);
3576 	msghdr.msg_name       = &rb->recv_srcadr;
3577 	msghdr.msg_namelen    = fromlen;
3578 	msghdr.msg_iov        = &iovec;
3579 	msghdr.msg_iovlen     = 1;
3580 	msghdr.msg_control    = (void *)&control;
3581 	msghdr.msg_controllen = sizeof(control);
3582 	msghdr.msg_flags      = 0;
3583 	rb->recv_length       = recvmsg(fd, &msghdr, 0);
3584 #endif
3585 
3586 	buflen = rb->recv_length;
3587 
3588 	if (buflen == 0 || (buflen == -1 &&
3589 	    (EWOULDBLOCK == errno
3590 #ifdef EAGAIN
3591 	     || EAGAIN == errno
3592 #endif
3593 	     ))) {
3594 		freerecvbuf(rb);
3595 		return (buflen);
3596 	} else if (buflen < 0) {
3597 		msyslog(LOG_ERR, "recvfrom(%s) fd=%d: %m",
3598 			stoa(&rb->recv_srcadr), fd);
3599 		DPRINTF(5, ("read_network_packet: fd=%d dropped (bad recvfrom)\n",
3600 			    fd));
3601 		freerecvbuf(rb);
3602 		return (buflen);
3603 	}
3604 
3605 	DPRINTF(3, ("read_network_packet: fd=%d length %d from %s\n",
3606 		    fd, buflen, stoa(&rb->recv_srcadr)));
3607 
3608 #ifdef ENABLE_BUG3020_FIX
3609 	if (ISREFCLOCKADR(&rb->recv_srcadr)) {
3610 		msyslog(LOG_ERR, "recvfrom(%s) fd=%d: refclock srcadr on a network interface!",
3611 			stoa(&rb->recv_srcadr), fd);
3612 		DPRINTF(1, ("read_network_packet: fd=%d dropped (refclock srcadr))\n",
3613 			    fd));
3614 		packets_dropped++;
3615 		freerecvbuf(rb);
3616 		return (buflen);
3617 	}
3618 #endif
3619 
3620 	/*
3621 	** Bug 2672: Some OSes (MacOSX and Linux) don't block spoofed ::1
3622 	*/
3623 
3624 	if (   IS_IPV6(&rb->recv_srcadr)
3625 	    && IN6_IS_ADDR_LOOPBACK(PSOCK_ADDR6(&rb->recv_srcadr))
3626 	    && !(INT_LOOPBACK & itf->flags)) {
3627 
3628 		packets_dropped++;
3629 		DPRINTF(2, ("DROPPING pkt with spoofed ::1 source on %s\n", latoa(itf)));
3630 		freerecvbuf(rb);
3631 		return -1;
3632 	}
3633 
3634 	/*
3635 	 * Got one.  Mark how and when it got here,
3636 	 * put it on the full list and do bookkeeping.
3637 	 */
3638 	rb->dstadr = itf;
3639 	rb->fd = fd;
3640 #ifdef HAVE_PACKET_TIMESTAMP
3641 	/* pick up a network time stamp if possible */
3642 	ts = fetch_timestamp(rb, &msghdr, ts);
3643 #endif
3644 	rb->recv_time = ts;
3645 	rb->receiver = receive;
3646 
3647 	add_full_recv_buffer(rb);
3648 
3649 	itf->received++;
3650 	packets_received++;
3651 	return (buflen);
3652 }
3653 
3654 /*
3655  * attempt to handle io (select()/signaled IO)
3656  */
3657 void
3658 io_handler(void)
3659 {
3660 #  ifndef HAVE_SIGNALED_IO
3661 	fd_set rdfdes;
3662 	int nfound;
3663 
3664 	/*
3665 	 * Use select() on all on all input fd's for unlimited
3666 	 * time.  select() will terminate on SIGALARM or on the
3667 	 * reception of input.	Using select() means we can't do
3668 	 * robust signal handling and we get a potential race
3669 	 * between checking for alarms and doing the select().
3670 	 * Mostly harmless, I think.
3671 	 */
3672 	/*
3673 	 * On VMS, I suspect that select() can't be interrupted
3674 	 * by a "signal" either, so I take the easy way out and
3675 	 * have select() time out after one second.
3676 	 * System clock updates really aren't time-critical,
3677 	 * and - lacking a hardware reference clock - I have
3678 	 * yet to learn about anything else that is.
3679 	 */
3680 	++handler_calls;
3681 	rdfdes = activefds;
3682 #   if !defined(VMS) && !defined(SYS_VXWORKS)
3683 	nfound = select(maxactivefd + 1, &rdfdes, NULL,
3684 			NULL, NULL);
3685 #   else	/* VMS, VxWorks */
3686 	/* make select() wake up after one second */
3687 	{
3688 		struct timeval t1;
3689 		t1.tv_sec  = 1;
3690 		t1.tv_usec = 0;
3691 		nfound = select(maxactivefd + 1,
3692 				&rdfdes, NULL, NULL,
3693 				&t1);
3694 	}
3695 #   endif	/* VMS, VxWorks */
3696 	if (nfound < 0 && sanitize_fdset(errno)) {
3697 		struct timeval t1;
3698 		t1.tv_sec  = 0;
3699 		t1.tv_usec = 0;
3700 		rdfdes = activefds;
3701 		nfound = select(maxactivefd + 1,
3702 				&rdfdes, NULL, NULL,
3703 				&t1);
3704 	}
3705 
3706 	if (nfound > 0) {
3707 		l_fp ts;
3708 
3709 		get_systime(&ts);
3710 
3711 		input_handler_scan(&ts, &rdfdes);
3712 	} else if (nfound == -1 && errno != EINTR) {
3713 		msyslog(LOG_ERR, "select() error: %m");
3714 	}
3715 #   ifdef DEBUG
3716 	else if (debug > 4) {
3717 		msyslog(LOG_DEBUG, "select(): nfound=%d, error: %m", nfound);
3718 	} else {
3719 		DPRINTF(3, ("select() returned %d: %m\n", nfound));
3720 	}
3721 #   endif /* DEBUG */
3722 #  else /* HAVE_SIGNALED_IO */
3723 	wait_for_signal();
3724 #  endif /* HAVE_SIGNALED_IO */
3725 }
3726 
3727 #ifdef HAVE_SIGNALED_IO
3728 /*
3729  * input_handler - receive packets asynchronously
3730  *
3731  * ALWAYS IN SIGNAL HANDLER CONTEXT -- only async-safe functions allowed!
3732  */
3733 static RETSIGTYPE
3734 input_handler(
3735 	l_fp *	cts
3736 	)
3737 {
3738 	int		n;
3739 	struct timeval	tvzero;
3740 	fd_set		fds;
3741 
3742 	++handler_calls;
3743 
3744 	/*
3745 	 * Do a poll to see who has data
3746 	 */
3747 
3748 	fds = activefds;
3749 	tvzero.tv_sec = tvzero.tv_usec = 0;
3750 
3751 	n = select(maxactivefd + 1, &fds, NULL, NULL, &tvzero);
3752 	if (n < 0 && sanitize_fdset(errno)) {
3753 		fds = activefds;
3754 		tvzero.tv_sec = tvzero.tv_usec = 0;
3755 		n = select(maxactivefd + 1, &fds, NULL, NULL, &tvzero);
3756 	}
3757 	if (n > 0)
3758 		input_handler_scan(cts, &fds);
3759 }
3760 #endif /* HAVE_SIGNALED_IO */
3761 
3762 
3763 /*
3764  * Try to sanitize the global FD set
3765  *
3766  * SIGNAL HANDLER CONTEXT if HAVE_SIGNALED_IO, ordinary userspace otherwise
3767  */
3768 static int/*BOOL*/
3769 sanitize_fdset(
3770 	int	errc
3771 	)
3772 {
3773 	int j, b, maxscan;
3774 
3775 #  ifndef HAVE_SIGNALED_IO
3776 	/*
3777 	 * extended FAU debugging output
3778 	 */
3779 	if (errc != EINTR) {
3780 		msyslog(LOG_ERR,
3781 			"select(%d, %s, 0L, 0L, &0.0) error: %m",
3782 			maxactivefd + 1,
3783 			fdbits(maxactivefd, &activefds));
3784 	}
3785 #   endif
3786 
3787 	if (errc != EBADF)
3788 		return FALSE;
3789 
3790 	/* if we have oviously bad FDs, try to sanitize the FD set. */
3791 	for (j = 0, maxscan = 0; j <= maxactivefd; j++) {
3792 		if (FD_ISSET(j, &activefds)) {
3793 			if (-1 != read(j, &b, 0)) {
3794 				maxscan = j;
3795 				continue;
3796 			}
3797 #		    ifndef HAVE_SIGNALED_IO
3798 			msyslog(LOG_ERR,
3799 				"Removing bad file descriptor %d from select set",
3800 				j);
3801 #		    endif
3802 			FD_CLR(j, &activefds);
3803 		}
3804 	}
3805 	if (maxactivefd != maxscan)
3806 		maxactivefd = maxscan;
3807 	return TRUE;
3808 }
3809 
3810 /*
3811  * scan the known FDs (clocks, servers, ...) for presence in a 'fd_set'.
3812  *
3813  * SIGNAL HANDLER CONTEXT if HAVE_SIGNALED_IO, ordinary userspace otherwise
3814  */
3815 static void
3816 input_handler_scan(
3817 	const l_fp *	cts,
3818 	const fd_set *	pfds
3819 	)
3820 {
3821 	int		buflen;
3822 	u_int		idx;
3823 	int		doing;
3824 	SOCKET		fd;
3825 	blocking_child *c;
3826 	l_fp		ts;	/* Timestamp at BOselect() gob */
3827 
3828 #if defined(DEBUG_TIMING)
3829 	l_fp		ts_e;	/* Timestamp at EOselect() gob */
3830 #endif
3831 	endpt *		ep;
3832 #ifdef REFCLOCK
3833 	struct refclockio *rp;
3834 	int		saved_errno;
3835 	const char *	clk;
3836 #endif
3837 #ifdef HAS_ROUTING_SOCKET
3838 	struct asyncio_reader *	asyncio_reader;
3839 	struct asyncio_reader *	next_asyncio_reader;
3840 #endif
3841 
3842 	++handler_pkts;
3843 	ts = *cts;
3844 
3845 #ifdef REFCLOCK
3846 	/*
3847 	 * Check out the reference clocks first, if any
3848 	 */
3849 
3850 	for (rp = refio; rp != NULL; rp = rp->next) {
3851 		fd = rp->fd;
3852 
3853 		if (!FD_ISSET(fd, pfds))
3854 			continue;
3855 		buflen = read_refclock_packet(fd, rp, ts);
3856 		/*
3857 		 * The first read must succeed after select() indicates
3858 		 * readability, or we've reached a permanent EOF.
3859 		 * http://bugs.ntp.org/1732 reported ntpd munching CPU
3860 		 * after a USB GPS was unplugged because select was
3861 		 * indicating EOF but ntpd didn't remove the descriptor
3862 		 * from the activefds set.
3863 		 */
3864 		if (buflen < 0 && EAGAIN != errno) {
3865 			saved_errno = errno;
3866 			clk = refnumtoa(&rp->srcclock->srcadr);
3867 			errno = saved_errno;
3868 			msyslog(LOG_ERR, "%s read: %m", clk);
3869 			maintain_activefds(fd, TRUE);
3870 		} else if (0 == buflen) {
3871 			clk = refnumtoa(&rp->srcclock->srcadr);
3872 			msyslog(LOG_ERR, "%s read EOF", clk);
3873 			maintain_activefds(fd, TRUE);
3874 		} else {
3875 			/* drain any remaining refclock input */
3876 			do {
3877 				buflen = read_refclock_packet(fd, rp, ts);
3878 			} while (buflen > 0);
3879 		}
3880 	}
3881 #endif /* REFCLOCK */
3882 
3883 	/*
3884 	 * Loop through the interfaces looking for data to read.
3885 	 */
3886 	for (ep = ep_list; ep != NULL; ep = ep->elink) {
3887 		for (doing = 0; doing < 2; doing++) {
3888 			if (!doing) {
3889 				fd = ep->fd;
3890 			} else {
3891 				if (!(ep->flags & INT_BCASTOPEN))
3892 					break;
3893 				fd = ep->bfd;
3894 			}
3895 			if (fd < 0)
3896 				continue;
3897 			if (FD_ISSET(fd, pfds))
3898 				do {
3899 					buflen = read_network_packet(
3900 							fd, ep, ts);
3901 				} while (buflen > 0);
3902 			/* Check more interfaces */
3903 		}
3904 	}
3905 
3906 #ifdef HAS_ROUTING_SOCKET
3907 	/*
3908 	 * scan list of asyncio readers - currently only used for routing sockets
3909 	 */
3910 	asyncio_reader = asyncio_reader_list;
3911 
3912 	while (asyncio_reader != NULL) {
3913 		/* callback may unlink and free asyncio_reader */
3914 		next_asyncio_reader = asyncio_reader->link;
3915 		if (FD_ISSET(asyncio_reader->fd, pfds))
3916 			(*asyncio_reader->receiver)(asyncio_reader);
3917 		asyncio_reader = next_asyncio_reader;
3918 	}
3919 #endif /* HAS_ROUTING_SOCKET */
3920 
3921 	/*
3922 	 * Check for a response from a blocking child
3923 	 */
3924 	for (idx = 0; idx < blocking_children_alloc; idx++) {
3925 		c = blocking_children[idx];
3926 		if (NULL == c || -1 == c->resp_read_pipe)
3927 			continue;
3928 		if (FD_ISSET(c->resp_read_pipe, pfds)) {
3929 			++c->resp_ready_seen;
3930 			++blocking_child_ready_seen;
3931 		}
3932 	}
3933 
3934 	/* We've done our work */
3935 #if defined(DEBUG_TIMING)
3936 	get_systime(&ts_e);
3937 	/*
3938 	 * (ts_e - ts) is the amount of time we spent
3939 	 * processing this gob of file descriptors.  Log
3940 	 * it.
3941 	 */
3942 	L_SUB(&ts_e, &ts);
3943 	collect_timing(NULL, "input handler", 1, &ts_e);
3944 	if (debug > 3)
3945 		msyslog(LOG_DEBUG,
3946 			"input_handler: Processed a gob of fd's in %s msec",
3947 			lfptoms(&ts_e, 6));
3948 #endif /* DEBUG_TIMING */
3949 }
3950 #endif /* !HAVE_IO_COMPLETION_PORT */
3951 
3952 /*
3953  * find an interface suitable for the src address
3954  */
3955 endpt *
3956 select_peerinterface(
3957 	struct peer *	peer,
3958 	sockaddr_u *	srcadr,
3959 	endpt *		dstadr
3960 	)
3961 {
3962 	endpt *ep;
3963 #ifndef SIM
3964 	endpt *wild;
3965 
3966 	wild = ANY_INTERFACE_CHOOSE(srcadr);
3967 
3968 	/*
3969 	 * Initialize the peer structure and dance the interface jig.
3970 	 * Reference clocks step the loopback waltz, the others
3971 	 * squaredance around the interface list looking for a buddy. If
3972 	 * the dance peters out, there is always the wildcard interface.
3973 	 * This might happen in some systems and would preclude proper
3974 	 * operation with public key cryptography.
3975 	 */
3976 	if (ISREFCLOCKADR(srcadr)) {
3977 		ep = loopback_interface;
3978 	} else if (peer->cast_flags &
3979 		   (MDF_BCLNT | MDF_ACAST | MDF_MCAST | MDF_BCAST)) {
3980 		ep = findbcastinter(srcadr);
3981 		if (ep != NULL)
3982 			DPRINTF(4, ("Found *-cast interface %s for address %s\n",
3983 				stoa(&ep->sin), stoa(srcadr)));
3984 		else
3985 			DPRINTF(4, ("No *-cast local address found for address %s\n",
3986 				stoa(srcadr)));
3987 	} else {
3988 		ep = dstadr;
3989 		if (NULL == ep) {
3990 			ep = wild;
3991 		}
3992 	}
3993 	/*
3994 	 * If it is a multicast address, findbcastinter() may not find
3995 	 * it.  For unicast, we get to find the interface when dstadr is
3996 	 * given to us as the wildcard (ANY_INTERFACE_CHOOSE).  Either
3997 	 * way, try a little harder.
3998 	 */
3999 	if (wild == ep) {
4000 		ep = findinterface(srcadr);
4001 	}
4002 	/*
4003 	 * we do not bind to the wildcard interfaces for output
4004 	 * as our (network) source address would be undefined and
4005 	 * crypto will not work without knowing the own transmit address
4006 	 */
4007 	if (ep != NULL && (INT_WILDCARD & ep->flags)) {
4008 		if (!accept_wildcard_if_for_winnt) {
4009 			ep = NULL;
4010 		}
4011 	}
4012 #else	/* SIM follows */
4013 	ep = loopback_interface;
4014 #endif
4015 
4016 	return ep;
4017 }
4018 
4019 
4020 /*
4021  * findinterface - find local interface corresponding to address
4022  */
4023 endpt *
4024 findinterface(
4025 	sockaddr_u *addr
4026 	)
4027 {
4028 	endpt *iface;
4029 
4030 	iface = findlocalinterface(addr, INT_WILDCARD, 0);
4031 
4032 	if (NULL == iface) {
4033 		DPRINTF(4, ("Found no interface for address %s - returning wildcard\n",
4034 			    stoa(addr)));
4035 
4036 		iface = ANY_INTERFACE_CHOOSE(addr);
4037 	} else
4038 		DPRINTF(4, ("Found interface #%d %s for address %s\n",
4039 			    iface->ifnum, iface->name, stoa(addr)));
4040 
4041 	return iface;
4042 }
4043 
4044 /*
4045  * findlocalinterface - find local interface corresponding to addr,
4046  * which does not have any of flags set.  If bcast is nonzero, addr is
4047  * a broadcast address.
4048  *
4049  * This code attempts to find the local sending address for an outgoing
4050  * address by connecting a new socket to destinationaddress:NTP_PORT
4051  * and reading the sockname of the resulting connect.
4052  * the complicated sequence simulates the routing table lookup
4053  * for to first hop without duplicating any of the routing logic into
4054  * ntpd. preferably we would have used an API call - but its not there -
4055  * so this is the best we can do here short of duplicating to entire routing
4056  * logic in ntpd which would be a silly and really unportable thing to do.
4057  *
4058  */
4059 static endpt *
4060 findlocalinterface(
4061 	sockaddr_u *	addr,
4062 	int		flags,
4063 	int		bcast
4064 	)
4065 {
4066 	GETSOCKNAME_SOCKLEN_TYPE	sockaddrlen;
4067 	endpt *				iface;
4068 	sockaddr_u			saddr;
4069 	SOCKET				s;
4070 	int				rtn;
4071 	int				on;
4072 
4073 	DPRINTF(4, ("Finding interface for addr %s in list of addresses\n",
4074 		    stoa(addr)));
4075 
4076 	/* [Bug 3437] The prototype POOL peer can be AF_UNSPEC.
4077 	 * This is bound to fail, but on the way to nowhere it
4078 	 * triggers a security incident on SELinux.
4079 	 *
4080 	 * Checking the condition and failing early is probably good
4081 	 * advice, and even saves us some syscalls in that case.
4082 	 * Thanks to Miroslav Lichvar for finding this.
4083 	 */
4084 	if (AF_UNSPEC == AF(addr)) {
4085 		return NULL;
4086 	}
4087 	s = socket(AF(addr), SOCK_DGRAM, 0);
4088 	if (INVALID_SOCKET == s) {
4089 		return NULL;
4090 	}
4091 	/*
4092 	 * If we are looking for broadcast interface we need to set this
4093 	 * socket to allow broadcast
4094 	 */
4095 	if (bcast) {
4096 		on = 1;
4097 		if (SOCKET_ERROR == setsockopt(s, SOL_SOCKET,
4098 						SO_BROADCAST,
4099 						(void *)&on,
4100 						sizeof(on))) {
4101 			closesocket(s);
4102 			return NULL;
4103 		}
4104 	}
4105 
4106 	rtn = connect(s, &addr->sa, SOCKLEN(addr));
4107 	if (SOCKET_ERROR == rtn) {
4108 		closesocket(s);
4109 		return NULL;
4110 	}
4111 
4112 	sockaddrlen = sizeof(saddr);
4113 	rtn = getsockname(s, &saddr.sa, &sockaddrlen);
4114 	closesocket(s);
4115 	if (SOCKET_ERROR == rtn)
4116 		return NULL;
4117 
4118 	DPRINTF(4, ("findlocalinterface: kernel maps %s to %s\n",
4119 		    stoa(addr), stoa(&saddr)));
4120 
4121 	iface = getinterface(&saddr, flags);
4122 
4123 	/*
4124 	 * if we didn't find an exact match on saddr, find the closest
4125 	 * available local address.  This handles the case of the
4126 	 * address suggested by the kernel being excluded by nic rules
4127 	 * or the user's -I and -L options to ntpd.
4128 	 * See http://bugs.ntp.org/1184 and http://bugs.ntp.org/1683
4129 	 * for more background.
4130 	 */
4131 	if (NULL == iface || iface->ignore_packets) {
4132 		iface = findclosestinterface(&saddr,
4133 					     flags | INT_LOOPBACK);
4134 	}
4135 	/*
4136 	 * Don't select an interface which will ignore replies, or one
4137 	 * dedicated to multicast receive.
4138 	 */
4139 	if (   iface != NULL
4140 	    && (iface->ignore_packets || (INT_MCASTIF & iface->flags))) {
4141 		iface = NULL;
4142 	}
4143 	return iface;
4144 }
4145 
4146 
4147 /*
4148  * findclosestinterface
4149  *
4150  * If there are -I/--interface or -L/novirtualips command-line options,
4151  * or "nic" or "interface" rules in ntp.conf, findlocalinterface() may
4152  * find the kernel's preferred local address for a given peer address is
4153  * administratively unavailable to ntpd, and punt to this routine's more
4154  * expensive search.
4155  *
4156  * Find the numerically closest local address to the one connect()
4157  * suggested.  This matches an address on the same subnet first, as
4158  * needed by Bug 1184, and provides a consistent choice if there are
4159  * multiple feasible local addresses, regardless of the order ntpd
4160  * enumerated them.
4161  */
4162 endpt *
4163 findclosestinterface(
4164 	sockaddr_u *	addr,
4165 	int		flags
4166 	)
4167 {
4168 	endpt *		ep;
4169 	endpt *		winner;
4170 	sockaddr_u	addr_dist;
4171 	sockaddr_u	min_dist;
4172 
4173 	ZERO_SOCK(&min_dist);
4174 	winner = NULL;
4175 
4176 	for (ep = ep_list; ep != NULL; ep = ep->elink) {
4177 		if (ep->ignore_packets ||
4178 		    AF(addr) != ep->family ||
4179 		    flags & ep->flags)
4180 			continue;
4181 
4182 		calc_addr_distance(&addr_dist, addr, &ep->sin);
4183 		if (NULL == winner ||
4184 		    -1 == cmp_addr_distance(&addr_dist, &min_dist)) {
4185 			min_dist = addr_dist;
4186 			winner = ep;
4187 		}
4188 	}
4189 	if (NULL == winner)
4190 		DPRINTF(4, ("findclosestinterface(%s) failed\n",
4191 			    stoa(addr)));
4192 	else
4193 		DPRINTF(4, ("findclosestinterface(%s) -> %s\n",
4194 			    stoa(addr), stoa(&winner->sin)));
4195 
4196 	return winner;
4197 }
4198 
4199 
4200 /*
4201  * calc_addr_distance - calculate the distance between two addresses,
4202  *			the absolute value of the difference between
4203  *			the addresses numerically, stored as an address.
4204  */
4205 static void
4206 calc_addr_distance(
4207 	sockaddr_u *		dist,
4208 	const sockaddr_u *	a1,
4209 	const sockaddr_u *	a2
4210 	)
4211 {
4212 	u_char *	pdist;
4213 	const u_char *	p1;
4214 	const u_char *	p2;
4215 	size_t		cb;
4216 	int		different;
4217 	int		a1_greater;
4218 	u_int		u;
4219 
4220 	REQUIRE(AF(a1) == AF(a2));
4221 
4222 	ZERO_SOCK(dist);
4223 	AF(dist) = AF(a1);
4224 
4225 	if (IS_IPV4(a1)) {
4226 		pdist = (      u_char *)&NSRCADR(dist);
4227 		p1 =	(const u_char *)&NSRCADR(a1);
4228 		p2 =	(const u_char *)&NSRCADR(a2);
4229 	} else {
4230 		pdist = (      u_char *)&NSRCADR(dist);
4231 		p1 =	(const u_char *)&NSRCADR(a1);
4232 		p2 =	(const u_char *)&NSRCADR(a2);
4233 	}
4234 	cb = SIZEOF_INADDR(AF(dist));
4235 	different = FALSE;
4236 	a1_greater = FALSE;
4237 	for (u = 0; u < cb; u++) {
4238 		if (!different && p1[u] != p2[u]) {
4239 			a1_greater = (p1[u] > p2[u]);
4240 			different = TRUE;
4241 		}
4242 		if (a1_greater) {
4243 			pdist[u] = p1[u] - p2[u];
4244 		} else {
4245 			pdist[u] = p2[u] - p1[u];
4246 		}
4247 	}
4248 }
4249 
4250 
4251 /*
4252  * cmp_addr_distance - compare two address distances, returning -1, 0,
4253  *		       1 to indicate their relationship.
4254  */
4255 static int
4256 cmp_addr_distance(
4257 	const sockaddr_u *	d1,
4258 	const sockaddr_u *	d2
4259 	)
4260 {
4261 	int	i;
4262 
4263 	REQUIRE(AF(d1) == AF(d2));
4264 
4265 	if (IS_IPV4(d1)) {
4266 		if (SRCADR(d1) < SRCADR(d2))
4267 			return -1;
4268 		else if (SRCADR(d1) == SRCADR(d2))
4269 			return 0;
4270 		else
4271 			return 1;
4272 	}
4273 
4274 	for (i = 0; i < (int)sizeof(NSRCADR6(d1)); i++) {
4275 		if (NSRCADR6(d1)[i] < NSRCADR6(d2)[i])
4276 			return -1;
4277 		else if (NSRCADR6(d1)[i] > NSRCADR6(d2)[i])
4278 			return 1;
4279 	}
4280 
4281 	return 0;
4282 }
4283 
4284 
4285 
4286 /*
4287  * fetch an interface structure the matches the
4288  * address and has the given flags NOT set
4289  */
4290 endpt *
4291 getinterface(
4292 	sockaddr_u *	addr,
4293 	u_int32		flags
4294 	)
4295 {
4296 	endpt *iface;
4297 
4298 	iface = find_addr_in_list(addr);
4299 
4300 	if (iface != NULL && (iface->flags & flags))
4301 		iface = NULL;
4302 
4303 	return iface;
4304 }
4305 
4306 
4307 /*
4308  * findbcastinter - find broadcast interface corresponding to address
4309  */
4310 endpt *
4311 findbcastinter(
4312 	sockaddr_u *addr
4313 	)
4314 {
4315 	endpt *	iface;
4316 
4317 	iface = NULL;
4318 #if !defined(MPE) && (defined(SIOCGIFCONF) || defined(SYS_WINNT))
4319 	DPRINTF(4, ("Finding broadcast/multicast interface for addr %s in list of addresses\n",
4320 		    stoa(addr)));
4321 
4322 	iface = findlocalinterface(addr, INT_LOOPBACK | INT_WILDCARD,
4323 				   1);
4324 	if (iface != NULL) {
4325 		DPRINTF(4, ("Easily found bcast-/mcast- interface index #%d %s\n",
4326 			    iface->ifnum, iface->name));
4327 		return iface;
4328 	}
4329 
4330 	/*
4331 	 * plan B - try to find something reasonable in our lists in
4332 	 * case kernel lookup doesn't help
4333 	 */
4334 	for (iface = ep_list; iface != NULL; iface = iface->elink) {
4335 		if (iface->flags & INT_WILDCARD)
4336 			continue;
4337 
4338 		/* Don't bother with ignored interfaces */
4339 		if (iface->ignore_packets)
4340 			continue;
4341 
4342 		/*
4343 		 * First look if this is the correct family
4344 		 */
4345 		if(AF(&iface->sin) != AF(addr))
4346 			continue;
4347 
4348 		/* Skip the loopback addresses */
4349 		if (iface->flags & INT_LOOPBACK)
4350 			continue;
4351 
4352 		/*
4353 		 * If we are looking to match a multicast address and
4354 		 * this interface is one...
4355 		 */
4356 		if (addr_ismulticast(addr)
4357 		    && (iface->flags & INT_MULTICAST)) {
4358 #ifdef INCLUDE_IPV6_SUPPORT
4359 			/*
4360 			 * ...it is the winner unless we're looking for
4361 			 * an interface to use for link-local multicast
4362 			 * and its address is not link-local.
4363 			 */
4364 			if (IS_IPV6(addr)
4365 			    && IN6_IS_ADDR_MC_LINKLOCAL(PSOCK_ADDR6(addr))
4366 			    && !IN6_IS_ADDR_LINKLOCAL(PSOCK_ADDR6(&iface->sin)))
4367 				continue;
4368 #endif
4369 			break;
4370 		}
4371 
4372 		/*
4373 		 * We match only those interfaces marked as
4374 		 * broadcastable and either the explicit broadcast
4375 		 * address or the network portion of the IP address.
4376 		 * Sloppy.
4377 		 */
4378 		if (IS_IPV4(addr)) {
4379 			if (SOCK_EQ(&iface->bcast, addr))
4380 				break;
4381 
4382 			if ((NSRCADR(&iface->sin) & NSRCADR(&iface->mask))
4383 			    == (NSRCADR(addr)	  & NSRCADR(&iface->mask)))
4384 				break;
4385 		}
4386 #ifdef INCLUDE_IPV6_SUPPORT
4387 		else if (IS_IPV6(addr)) {
4388 			if (SOCK_EQ(&iface->bcast, addr))
4389 				break;
4390 
4391 			if (SOCK_EQ(netof(&iface->sin), netof(addr)))
4392 				break;
4393 		}
4394 #endif
4395 	}
4396 #endif /* SIOCGIFCONF */
4397 	if (NULL == iface) {
4398 		DPRINTF(4, ("No bcast interface found for %s\n",
4399 			    stoa(addr)));
4400 		iface = ANY_INTERFACE_CHOOSE(addr);
4401 	} else {
4402 		DPRINTF(4, ("Found bcast-/mcast- interface index #%d %s\n",
4403 			    iface->ifnum, iface->name));
4404 	}
4405 
4406 	return iface;
4407 }
4408 
4409 
4410 /*
4411  * io_clr_stats - clear I/O module statistics
4412  */
4413 void
4414 io_clr_stats(void)
4415 {
4416 	packets_dropped = 0;
4417 	packets_ignored = 0;
4418 	packets_received = 0;
4419 	packets_sent = 0;
4420 	packets_notsent = 0;
4421 
4422 	handler_calls = 0;
4423 	handler_pkts = 0;
4424 	io_timereset = current_time;
4425 }
4426 
4427 
4428 #ifdef REFCLOCK
4429 /*
4430  * io_addclock - add a reference clock to the list and arrange that we
4431  *				 get SIGIO interrupts from it.
4432  */
4433 int
4434 io_addclock(
4435 	struct refclockio *rio
4436 	)
4437 {
4438 	BLOCKIO();
4439 
4440 	/*
4441 	 * Stuff the I/O structure in the list and mark the descriptor
4442 	 * in use.  There is a harmless (I hope) race condition here.
4443 	 */
4444 	rio->active = TRUE;
4445 
4446 # ifdef HAVE_SIGNALED_IO
4447 	if (init_clock_sig(rio)) {
4448 		UNBLOCKIO();
4449 		return 0;
4450 	}
4451 # elif defined(HAVE_IO_COMPLETION_PORT)
4452 	if (!io_completion_port_add_clock_io(rio)) {
4453 		UNBLOCKIO();
4454 		return 0;
4455 	}
4456 # endif
4457 
4458 	/*
4459 	 * enqueue
4460 	 */
4461 	LINK_SLIST(refio, rio, next);
4462 
4463 	/*
4464 	 * register fd
4465 	 */
4466 	add_fd_to_list(rio->fd, FD_TYPE_FILE);
4467 
4468 	UNBLOCKIO();
4469 	return 1;
4470 }
4471 
4472 
4473 /*
4474  * io_closeclock - close the clock in the I/O structure given
4475  */
4476 void
4477 io_closeclock(
4478 	struct refclockio *rio
4479 	)
4480 {
4481 	struct refclockio *unlinked;
4482 
4483 	BLOCKIO();
4484 
4485 	/*
4486 	 * Remove structure from the list
4487 	 */
4488 	rio->active = FALSE;
4489 	UNLINK_SLIST(unlinked, refio, rio, next, struct refclockio);
4490 	if (NULL != unlinked) {
4491 		/* Close the descriptor. The order of operations is
4492 		 * important here in case of async / overlapped IO:
4493 		 * only after we have removed the clock from the
4494 		 * IO completion port we can be sure no further
4495 		 * input is queued. So...
4496 		 *  - we first disable feeding to the queu by removing
4497 		 *    the clock from the IO engine
4498 		 *  - close the file (which brings down any IO on it)
4499 		 *  - clear the buffer from results for this fd
4500 		 */
4501 #	    ifdef HAVE_IO_COMPLETION_PORT
4502 		io_completion_port_remove_clock_io(rio);
4503 #	    endif
4504 		close_and_delete_fd_from_list(rio->fd, NULL);
4505 		purge_recv_buffers_for_fd(rio->fd);
4506 		rio->fd = -1;
4507 	}
4508 
4509 	UNBLOCKIO();
4510 }
4511 #endif	/* REFCLOCK */
4512 
4513 
4514 /*
4515  * On NT a SOCKET is an unsigned int so we cannot possibly keep it in
4516  * an array. So we use one of the ISC_LIST functions to hold the
4517  * socket value and use that when we want to enumerate it.
4518  *
4519  * This routine is called by the forked intres child process to close
4520  * all open sockets.  On Windows there's no need as intres runs in
4521  * the same process as a thread.
4522  */
4523 #ifndef SYS_WINNT
4524 void
4525 kill_asyncio(
4526 	int	startfd
4527 	)
4528 {
4529 	BLOCKIO();
4530 
4531 	/*
4532 	 * In the child process we do not maintain activefds and
4533 	 * maxactivefd.  Zeroing maxactivefd disables code which
4534 	 * maintains it in close_and_delete_fd_from_list().
4535 	 */
4536 	maxactivefd = 0;
4537 
4538 	while (fd_list != NULL)
4539 		close_and_delete_fd_from_list(fd_list->fd, NULL);
4540 
4541 	UNBLOCKIO();
4542 }
4543 #endif	/* !SYS_WINNT */
4544 
4545 
4546 /*
4547  * Add and delete functions for the list of input file descriptors
4548  */
4549 static void
4550 add_fd_to_list(
4551 	SOCKET fd,
4552 	enum desc_type type
4553 	)
4554 {
4555 	vsock_t *lsock = emalloc(sizeof(*lsock));
4556 
4557 	lsock->fd = fd;
4558 	lsock->type = type;
4559 
4560 	LINK_SLIST(fd_list, lsock, link);
4561 	maintain_activefds(fd, 0);
4562 }
4563 
4564 
4565 static void
4566 close_and_delete_fd_from_list(
4567 	SOCKET fd,
4568 	endpt *ep	/* req. if fd is in struct endpt */
4569 	)
4570 {
4571 	vsock_t *lsock;
4572 
4573 	UNLINK_EXPR_SLIST(lsock, fd_list, fd ==
4574 	    UNLINK_EXPR_SLIST_CURRENT()->fd, link, vsock_t);
4575 
4576 	if (NULL == lsock)
4577 		return;
4578 
4579 	switch (lsock->type) {
4580 
4581 	case FD_TYPE_SOCKET:
4582 	    #ifdef HAVE_IO_COMPLETION_PORT
4583 		if (ep != NULL) {
4584 			io_completion_port_remove_socket(fd, ep);
4585 		}
4586 	    #endif
4587 		closesocket(lsock->fd);
4588 		break;
4589 
4590 	case FD_TYPE_FILE:
4591 		closeserial((int)lsock->fd);
4592 		break;
4593 
4594 	default:
4595 		msyslog(LOG_ERR,
4596 			"internal error - illegal descriptor type %d - EXITING",
4597 			(int)lsock->type);
4598 		exit(1);
4599 	}
4600 
4601 	free(lsock);
4602 	/*
4603 	 * remove from activefds
4604 	 */
4605 	maintain_activefds(fd, 1);
4606 }
4607 
4608 
4609 static void
4610 add_addr_to_list(
4611 	sockaddr_u *	addr,
4612 	endpt *		ep
4613 	)
4614 {
4615 	remaddr_t *laddr;
4616 
4617 #ifdef DEBUG
4618 	if (find_addr_in_list(addr) == NULL) {
4619 #endif
4620 		/* not there yet - add to list */
4621 		laddr = emalloc(sizeof(*laddr));
4622 		laddr->addr = *addr;
4623 		laddr->ep = ep;
4624 
4625 		LINK_SLIST(remoteaddr_list, laddr, link);
4626 
4627 		DPRINTF(4, ("Added addr %s to list of addresses\n",
4628 			    stoa(addr)));
4629 #ifdef DEBUG
4630 	} else
4631 		DPRINTF(4, ("WARNING: Attempt to add duplicate addr %s to address list\n",
4632 			    stoa(addr)));
4633 #endif
4634 }
4635 
4636 
4637 static void
4638 delete_addr_from_list(
4639 	sockaddr_u *addr
4640 	)
4641 {
4642 	remaddr_t *unlinked;
4643 
4644 	UNLINK_EXPR_SLIST(unlinked, remoteaddr_list, SOCK_EQ(addr,
4645 		&(UNLINK_EXPR_SLIST_CURRENT()->addr)), link, remaddr_t);
4646 
4647 	if (unlinked != NULL) {
4648 		DPRINTF(4, ("Deleted addr %s from list of addresses\n",
4649 			stoa(addr)));
4650 		free(unlinked);
4651 	}
4652 }
4653 
4654 
4655 static void
4656 delete_interface_from_list(
4657 	endpt *iface
4658 	)
4659 {
4660 	remaddr_t *unlinked;
4661 
4662 	for (;;) {
4663 		UNLINK_EXPR_SLIST(unlinked, remoteaddr_list, iface ==
4664 		    UNLINK_EXPR_SLIST_CURRENT()->ep, link,
4665 		    remaddr_t);
4666 
4667 		if (unlinked == NULL)
4668 			break;
4669 		DPRINTF(4, ("Deleted addr %s for interface #%d %s from list of addresses\n",
4670 			    stoa(&unlinked->addr), iface->ifnum,
4671 			    iface->name));
4672 		free(unlinked);
4673 	}
4674 }
4675 
4676 
4677 static endpt *
4678 find_addr_in_list(
4679 	sockaddr_u *addr
4680 	)
4681 {
4682 	remaddr_t *entry;
4683 
4684 	DPRINTF(4, ("Searching for addr %s in list of addresses - ",
4685 		    stoa(addr)));
4686 
4687 	for (entry = remoteaddr_list;
4688 	     entry != NULL;
4689 	     entry = entry->link)
4690 		if (SOCK_EQ(&entry->addr, addr)) {
4691 			DPRINTF(4, ("FOUND\n"));
4692 			return entry->ep;
4693 		}
4694 
4695 	DPRINTF(4, ("NOT FOUND\n"));
4696 	return NULL;
4697 }
4698 
4699 
4700 /*
4701  * Find the given address with the all given flags set in the list
4702  */
4703 static endpt *
4704 find_flagged_addr_in_list(
4705 	sockaddr_u *	addr,
4706 	u_int32		flags
4707 	)
4708 {
4709 	remaddr_t *entry;
4710 
4711 	DPRINTF(4, ("Finding addr %s with flags %d in list: ",
4712 		    stoa(addr), flags));
4713 
4714 	for (entry = remoteaddr_list;
4715 	     entry != NULL;
4716 	     entry = entry->link)
4717 
4718 		if (SOCK_EQ(&entry->addr, addr)
4719 		    && (entry->ep->flags & flags) == flags) {
4720 
4721 			DPRINTF(4, ("FOUND\n"));
4722 			return entry->ep;
4723 		}
4724 
4725 	DPRINTF(4, ("NOT FOUND\n"));
4726 	return NULL;
4727 }
4728 
4729 
4730 const char *
4731 localaddrtoa(
4732 	endpt *la
4733 	)
4734 {
4735 	return (NULL == la)
4736 		   ? "<null>"
4737 		   : stoa(&la->sin);
4738 }
4739 
4740 
4741 #ifdef HAS_ROUTING_SOCKET
4742 # ifndef UPDATE_GRACE
4743 #  define UPDATE_GRACE	3	/* min. UPDATE_GRACE - 1 seconds before scanning */
4744 # endif
4745 
4746 static void
4747 process_routing_msgs(struct asyncio_reader *reader)
4748 {
4749 	static void *	buffer;
4750 	static size_t	buffsz = 8192;
4751 	int		cnt, new, msg_type;
4752 	socklen_t	len;
4753 #ifdef HAVE_RTNETLINK
4754 	struct nlmsghdr *nh;
4755 #else
4756 	struct rt_msghdr rtm;
4757 	char *p;
4758 	char *endp;
4759 #endif
4760 
4761 	if (scan_addrs_once) {
4762 		/*
4763 		 * discard ourselves if we are not needed any more
4764 		 * usually happens when running unprivileged
4765 		 */
4766 		goto disable;
4767 	}
4768 
4769 	if (NULL == buffer) {
4770 		buffer = emalloc(buffsz);
4771 	}
4772 
4773 	cnt = read(reader->fd, buffer, buffsz);
4774 
4775 	if (cnt < 0) {
4776 		if (errno == ENOBUFS) {
4777 			/* increase socket buffer by 25% */
4778 			len = sizeof cnt;
4779 			if (0 > getsockopt(reader->fd, SOL_SOCKET, SO_RCVBUF, &cnt, &len) ||
4780 			    sizeof cnt != len) {
4781 				msyslog(LOG_ERR,
4782 					"routing getsockopt SO_RCVBUF %u %u: %m - disabling",
4783 					(u_int)cnt, (u_int)sizeof cnt);
4784 				goto disable;
4785 			}
4786 			new = cnt + (cnt / 4);
4787 			if (0 > setsockopt(reader->fd, SOL_SOCKET, SO_RCVBUF, &new, sizeof new)) {
4788 				msyslog(LOG_ERR,
4789 					"routing setsockopt SO_RCVBUF %d -> %d: %m - disabling",
4790 					cnt, new);
4791 				goto disable;
4792 			}
4793 		} else {
4794 			msyslog(LOG_ERR,
4795 				"routing socket reports: %m - disabling");
4796 		    disable:
4797 			remove_asyncio_reader(reader);
4798 			delete_asyncio_reader(reader);
4799 			return;
4800 		}
4801 	}
4802 
4803 	/*
4804 	 * process routing message
4805 	 */
4806 #ifdef HAVE_RTNETLINK
4807 	for (nh = buffer; NLMSG_OK(nh, cnt); nh = NLMSG_NEXT(nh, cnt))
4808 	{
4809 		msg_type = nh->nlmsg_type;
4810 #else
4811 	for (p = buffer, endp = p + cnt;
4812 	     (p + sizeof(struct rt_msghdr)) <= endp;
4813 	     p += rtm.rtm_msglen)
4814 	{
4815 		memcpy(&rtm, p, sizeof(rtm));
4816 		if (rtm.rtm_version != RTM_VERSION) {
4817 			msyslog(LOG_ERR,
4818 				"version mismatch (got %d - expected %d) on routing socket - disabling",
4819 				rtm.rtm_version, RTM_VERSION);
4820 
4821 			remove_asyncio_reader(reader);
4822 			delete_asyncio_reader(reader);
4823 			return;
4824 		}
4825 		msg_type = rtm.rtm_type;
4826 #endif	/* !HAVE_RTNETLINK */
4827 		switch (msg_type) {
4828 #ifdef RTM_NEWADDR
4829 		case RTM_NEWADDR:
4830 #endif
4831 #ifdef RTM_DELADDR
4832 		case RTM_DELADDR:
4833 #endif
4834 #ifdef RTM_ADD
4835 		case RTM_ADD:
4836 #endif
4837 #ifdef RTM_DELETE
4838 		case RTM_DELETE:
4839 #endif
4840 #ifdef RTM_REDIRECT
4841 		case RTM_REDIRECT:
4842 #endif
4843 #ifdef RTM_CHANGE
4844 		case RTM_CHANGE:
4845 #endif
4846 #ifdef RTM_LOSING
4847 		case RTM_LOSING:
4848 #endif
4849 #ifdef RTM_IFINFO
4850 		case RTM_IFINFO:
4851 #endif
4852 #ifdef RTM_IFANNOUNCE
4853 		case RTM_IFANNOUNCE:
4854 #endif
4855 #ifdef RTM_NEWLINK
4856 		case RTM_NEWLINK:
4857 #endif
4858 #ifdef RTM_DELLINK
4859 		case RTM_DELLINK:
4860 #endif
4861 #ifdef RTM_NEWROUTE
4862 		case RTM_NEWROUTE:
4863 #endif
4864 #ifdef RTM_DELROUTE
4865 		case RTM_DELROUTE:
4866 #endif
4867 			/*
4868 			 * we are keen on new and deleted addresses and
4869 			 * if an interface goes up and down or routing
4870 			 * changes
4871 			 */
4872 			DPRINTF(3, ("routing message op = %d: scheduling interface update\n",
4873 				    msg_type));
4874 			endpt_scan_timer = UPDATE_GRACE + current_time;
4875 			break;
4876 #ifdef HAVE_RTNETLINK
4877 		case NLMSG_DONE:
4878 			/* end of multipart message */
4879 			return;
4880 #endif
4881 		default:
4882 			/*
4883 			 * the rest doesn't bother us.
4884 			 */
4885 			DPRINTF(4, ("routing message op = %d: ignored\n",
4886 				    msg_type));
4887 			break;
4888 		}
4889 	}
4890 }
4891 
4892 /*
4893  * set up routing notifications
4894  */
4895 static void
4896 init_async_notifications(void)
4897 {
4898 	struct asyncio_reader *reader;
4899 #ifdef HAVE_RTNETLINK
4900 	int fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
4901 	struct sockaddr_nl sa;
4902 #else
4903 	int fd = socket(PF_ROUTE, SOCK_RAW, AF_UNSPEC);
4904 #endif
4905 	if (fd < 0) {
4906 		msyslog(LOG_ERR,
4907 			"unable to open routing socket (%m) - using polled interface update");
4908 		return;
4909 	}
4910 
4911 	fd = move_fd(fd);
4912 #ifdef HAVE_RTNETLINK
4913 	ZERO(sa);
4914 	sa.nl_family = PF_NETLINK;
4915 	sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR
4916 		       | RTMGRP_IPV6_IFADDR | RTMGRP_IPV4_ROUTE
4917 		       | RTMGRP_IPV4_MROUTE | RTMGRP_IPV6_ROUTE
4918 		       | RTMGRP_IPV6_MROUTE;
4919 	if (bind(fd, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
4920 		msyslog(LOG_ERR,
4921 			"bind failed on routing socket (%m) - using polled interface update");
4922 		return;
4923 	}
4924 #endif
4925 	make_socket_nonblocking(fd);
4926 #if defined(HAVE_SIGNALED_IO)
4927 	init_socket_sig(fd);
4928 #endif /* HAVE_SIGNALED_IO */
4929 
4930 	reader = new_asyncio_reader();
4931 
4932 	reader->fd = fd;
4933 	reader->receiver = process_routing_msgs;
4934 
4935 	add_asyncio_reader(reader, FD_TYPE_SOCKET);
4936 	msyslog(LOG_INFO,
4937 		"Listening on routing socket on fd #%d for interface updates",
4938 		fd);
4939 }
4940 #else
4941 /* HAS_ROUTING_SOCKET not defined */
4942 static void
4943 init_async_notifications(void)
4944 {
4945 }
4946 #endif
4947