xref: /illumos-gate/usr/src/cmd/fm/modules/common/ip-transport/ip.c (revision 3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/socket.h>
28 #include <sys/sysmacros.h>
29 #include <sys/fm/protocol.h>
30 
31 #include <netinet/in.h>
32 #include <arpa/inet.h>
33 
34 #include <strings.h>
35 #include <unistd.h>
36 #include <pthread.h>
37 #include <alloca.h>
38 #include <fcntl.h>
39 #include <errno.h>
40 #include <netdb.h>
41 #include <poll.h>
42 #include <stdarg.h>
43 
44 #include <fm/fmd_api.h>
45 
46 #define	IP_MAGIC	"\177FMA" /* magic string identifying a packet header */
47 #define	IP_MAGLEN	4	/* length of magic string */
48 #define	IP_DEBUG_OFF	0	/* No informational debugging printed */
49 #define	IP_DEBUG_FINE	1	/* Basic debug information printed (default) */
50 #define	IP_DEBUG_FINER	2	/* More debug information printed. */
51 #define	IP_DEBUG_FINEST	3	/* All debug information printed */
52 
53 typedef struct ip_hdr {
54 	char iph_magic[IP_MAGLEN]; /* magic string */
55 	uint32_t iph_size;	/* packed size */
56 } ip_hdr_t;
57 
58 typedef struct ip_buf {
59 	void *ipb_buf;		/* data buffer */
60 	size_t ipb_size;	/* size of buffer */
61 } ip_buf_t;
62 
63 typedef struct ip_cinfo {	/* Connection specific information */
64 	struct addrinfo *addr;	/* Connection address(es) */
65 	char *name;		/* The name of the server or interface */
66 	int retry;		/* The number of connection retries */
67 } ip_cinfo_t;
68 
69 typedef struct ip_xprt {
70 	fmd_xprt_t *ipx_xprt;	/* transport handle */
71 	int ipx_flags;		/* transport flags */
72 	int ipx_fd;		/* socket file descriptor */
73 	int ipx_done;		/* flag indicating connection closed */
74 	pthread_t ipx_tid;	/* recv-side auxiliary thread */
75 	ip_buf_t ipx_sndbuf;	/* buffer for sending events */
76 	ip_buf_t ipx_rcvbuf;	/* buffer for receiving events */
77 	ip_cinfo_t *ipx_cinfo;	/* info for reconnect */
78 	char *ipx_addr;		/* address:port of remote connection */
79 	struct ip_xprt *ipx_next;	/* next ip_xprt in global list */
80 } ip_xprt_t;
81 
82 #define	IPX_ID(a) ((a)->ipx_addr)
83 
84 typedef struct ip_stat {
85 	fmd_stat_t ips_accfail;	/* failed accepts */
86 	fmd_stat_t ips_badmagic; /* invalid packet headers */
87 	fmd_stat_t ips_packfail; /* failed packs */
88 	fmd_stat_t ips_unpackfail; /* failed unpacks */
89 } ip_stat_t;
90 
91 static void ip_xprt_create(fmd_xprt_t *, int, int, ip_cinfo_t *, char *);
92 static void ip_xprt_destroy(ip_xprt_t *);
93 
94 static ip_stat_t ip_stat = {
95 	{ "accfail", FMD_TYPE_UINT64, "failed accepts" },
96 	{ "badmagic", FMD_TYPE_UINT64, "invalid packet headers" },
97 	{ "packfail", FMD_TYPE_UINT64, "failed packs" },
98 	{ "unpackfail", FMD_TYPE_UINT64, "failed unpacks" },
99 };
100 
101 static fmd_hdl_t *ip_hdl;	/* module handle */
102 static pthread_mutex_t ip_lock;	/* lock for ip_xps list */
103 static ip_xprt_t *ip_xps;	/* list of active transports */
104 static nvlist_t *ip_auth;	/* authority to use for transport(s) */
105 static size_t ip_size;		/* default buffer size */
106 static volatile int ip_quit;	/* signal to quit */
107 static int ip_qlen;		/* queue length for listen(3SOCKET) */
108 static int ip_mtbf;		/* mtbf for simulating packet drop */
109 static int ip_external;		/* set transport to be "external" */
110 static int ip_no_remote_repair;	/* disallow remote repair */
111 static int ip_hconly;		/* only cache faults that are hc-scheme */
112 static int ip_rdonly;		/* force transport to be rdonly */
113 static int ip_hc_present_only;	/* only cache faults if hc-scheme and present */
114 static char *ip_domain_name;	/* set domain name for received list.suspects */
115 static hrtime_t ip_burp;	/* make mtbf slower by adding this much delay */
116 static int ip_translate;	/* call fmd_xprt_translate() before sending */
117 static char *ip_port;		/* port to connect to (or bind to if server) */
118 static int ip_retry;		/* retry count for ip_xprt_setup() -1=forever */
119 static hrtime_t ip_sleep;	/* sleep delay for ip_xprt_setup() */
120 static ip_cinfo_t ip_listen;	/* Transport service conn info for server */
121 static ip_cinfo_t ip_server;    /* Remote server connection info for client */
122 static ip_cinfo_t ip_server2;	/* Second remote server conn info for client */
123 static int ip_debug_level;	/* level for printing debug messages */
124 
125 /*
126  * Prints a debug message to the fmd debug framework if the debug level is set
127  * to at least the given level.
128  */
129 static void
130 ip_debug(int level, char *fmt, ...)
131 {
132 	if (ip_debug_level >= level) {
133 		va_list args;
134 		va_start(args, fmt);
135 		fmd_hdl_vdebug(ip_hdl, fmt, args);
136 		va_end(args);
137 	}
138 }
139 
140 /*
141  * Allocate space in ipx_sndbuf for a header and a packed XDR encoding of
142  * the specified nvlist, and then send the buffer to our remote peer.
143  */
144 static int
145 ip_xprt_send(fmd_hdl_t *hdl, fmd_xprt_t *xp, fmd_event_t *ep, nvlist_t *nvl)
146 {
147 	ip_xprt_t *ipx = fmd_xprt_getspecific(hdl, xp);
148 
149 	size_t size, nvsize;
150 	char *buf, *nvbuf;
151 	ip_hdr_t *iph;
152 	ssize_t r, n;
153 	int err;
154 
155 	/*
156 	 * For testing purposes, if ip_mtbf is non-zero, use this to pseudo-
157 	 * randomly simulate the need for retries.  If ip_burp is also set,
158 	 * then we also suspend the transport for a bit and wake it up again.
159 	 */
160 	if (ip_mtbf != 0 && gethrtime() % ip_mtbf == 0) {
161 		if (ip_burp != 0) {
162 			ip_debug(IP_DEBUG_FINE, "burping ipx %s", IPX_ID(ipx));
163 			ipx->ipx_flags |= FMD_XPRT_SUSPENDED;
164 			(void) fmd_timer_install(ip_hdl, ipx, NULL, ip_burp);
165 			fmd_xprt_suspend(ip_hdl, xp);
166 		}
167 		return (FMD_SEND_RETRY);
168 	}
169 
170 	if (ip_translate && (nvl = fmd_xprt_translate(hdl, xp, ep)) == NULL) {
171 		fmd_hdl_error(hdl, "failed to translate event %p", (void *)ep);
172 		return (FMD_SEND_FAILED);
173 	}
174 
175 	(void) nvlist_size(nvl, &nvsize, NV_ENCODE_XDR);
176 	size = r = sizeof (ip_hdr_t) + nvsize;
177 
178 	if (ipx->ipx_sndbuf.ipb_size < size) {
179 		fmd_hdl_free(hdl, ipx->ipx_sndbuf.ipb_buf,
180 		    ipx->ipx_sndbuf.ipb_size);
181 		ipx->ipx_sndbuf.ipb_size = P2ROUNDUP(size, 16);
182 		ipx->ipx_sndbuf.ipb_buf = fmd_hdl_alloc(hdl,
183 		    ipx->ipx_sndbuf.ipb_size, FMD_SLEEP);
184 	}
185 
186 	buf = ipx->ipx_sndbuf.ipb_buf;
187 	iph = (ip_hdr_t *)(uintptr_t)buf;
188 	nvbuf = buf + sizeof (ip_hdr_t);
189 
190 	bcopy(IP_MAGIC, iph->iph_magic, IP_MAGLEN);
191 	iph->iph_size = htonl(nvsize);
192 	err = nvlist_pack(nvl, &nvbuf, &nvsize, NV_ENCODE_XDR, 0);
193 
194 	if (ip_translate)
195 		nvlist_free(nvl);
196 
197 	if (err != 0) {
198 		fmd_hdl_error(ip_hdl, "failed to pack event for "
199 		    "transport %p: %s\n", (void *)ipx->ipx_xprt, strerror(err));
200 		ip_stat.ips_packfail.fmds_value.ui64++;
201 		return (FMD_SEND_FAILED);
202 	}
203 
204 	while (!ip_quit && r != 0) {
205 		if ((n = send(ipx->ipx_fd, buf, r, 0)) < 0) {
206 			if (errno != EINTR && errno != EWOULDBLOCK) {
207 				ip_debug(IP_DEBUG_FINE,
208 				    "failed to send to %s", IPX_ID(ipx));
209 				return (FMD_SEND_FAILED);
210 			}
211 			continue;
212 		}
213 		buf += n;
214 		r -= n;
215 	}
216 
217 	ip_debug(IP_DEBUG_FINEST, "Sent event %d bytes to %s",
218 	    size, IPX_ID(ipx));
219 	return (FMD_SEND_SUCCESS);
220 }
221 
222 /*
223  * Receive a chunk of data of the specified size from our remote peer.  The
224  * data is received into ipx_rcvbuf, and then a pointer to the buffer is
225  * returned.  NOTE: The data is only valid until the next call to ip_xprt_recv.
226  * If the connection breaks or ip_quit is set during receive, NULL is returned.
227  */
228 static void *
229 ip_xprt_recv(ip_xprt_t *ipx, size_t size)
230 {
231 	char *buf = ipx->ipx_rcvbuf.ipb_buf;
232 	ssize_t n, r = size;
233 
234 	if (ipx->ipx_rcvbuf.ipb_size < size) {
235 		fmd_hdl_free(ip_hdl, ipx->ipx_rcvbuf.ipb_buf,
236 		    ipx->ipx_rcvbuf.ipb_size);
237 		ipx->ipx_rcvbuf.ipb_size = P2ROUNDUP(size, 16);
238 		ipx->ipx_rcvbuf.ipb_buf = buf = fmd_hdl_alloc(ip_hdl,
239 		    ipx->ipx_rcvbuf.ipb_size, FMD_SLEEP);
240 	}
241 
242 	while (!ip_quit && r != 0) {
243 		if ((n = recv(ipx->ipx_fd, buf, r, MSG_WAITALL)) == 0) {
244 			ipx->ipx_done++;
245 			return (NULL);
246 		}
247 
248 		if (n < 0) {
249 			if (errno != EINTR && errno != EWOULDBLOCK) {
250 				ip_debug(IP_DEBUG_FINE,
251 				    "failed to recv on ipx %s", IPX_ID(ipx));
252 			}
253 			continue;
254 		}
255 		/* Reset retry counter after a successful connection */
256 		if (ipx->ipx_cinfo) {
257 			ipx->ipx_cinfo->retry = ip_retry;
258 		}
259 
260 		buf += n;
261 		r -= n;
262 	}
263 
264 	return (r ? NULL: ipx->ipx_rcvbuf.ipb_buf);
265 }
266 
267 /*
268  * Sets the address/port of the remote connection in the connection info struct
269  * This is called after a TCP session has been set up with a known remote
270  * address (sap)
271  */
272 static void
273 ip_xprt_set_addr(ip_xprt_t *ipx, const struct sockaddr *sap)
274 {
275 	const struct sockaddr_in6 *sin6 = (const void *)sap;
276 	const struct sockaddr_in *sin = (const void *)sap;
277 
278 	char buf[INET6_ADDRSTRLEN + 16];
279 	struct in_addr v4addr;
280 	in_port_t port;
281 	int n;
282 
283 	ip_debug(IP_DEBUG_FINER, "Enter ip_xprt_set_name");
284 
285 	if (sap->sa_family == AF_INET6 &&
286 	    IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
287 		IN6_V4MAPPED_TO_INADDR(&sin6->sin6_addr, &v4addr);
288 		(void) inet_ntop(AF_INET, &v4addr, buf, sizeof (buf));
289 		port = ntohs(sin6->sin6_port);
290 	} else if (sap->sa_family == AF_INET6) {
291 		(void) inet_ntop(AF_INET6, &sin6->sin6_addr, buf, sizeof (buf));
292 		port = ntohs(sin6->sin6_port);
293 	} else {
294 		(void) inet_ntop(AF_INET, &sin->sin_addr, buf, sizeof (buf));
295 		port = ntohs(sin->sin_port);
296 	}
297 
298 	n = strlen(buf);
299 	(void) snprintf(buf + n, sizeof (buf) - n, ":%u", port);
300 
301 	if (ipx->ipx_addr)
302 		fmd_hdl_strfree(ip_hdl, ipx->ipx_addr);
303 	ipx->ipx_addr = fmd_hdl_strdup(ip_hdl, buf, FMD_SLEEP);
304 	ip_debug(IP_DEBUG_FINE, "connection addr is %s on %p",
305 	    ipx->ipx_addr, (void *)ipx);
306 }
307 
308 static nvlist_t *
309 ip_xprt_auth(ip_xprt_t *ipx)
310 {
311 	nvlist_t *nvl;
312 	int err;
313 
314 	ip_debug(IP_DEBUG_FINER, "Enter ip_xprt_auth");
315 
316 	if (ip_auth != NULL)
317 		err = nvlist_dup(ip_auth, &nvl, 0);
318 	else
319 		err = nvlist_alloc(&nvl, 0, 0);
320 
321 	if (err != 0) {
322 		fmd_hdl_abort(ip_hdl, "failed to create nvlist for "
323 		    "authority: %s\n", strerror(err));
324 	}
325 
326 	if (ip_auth != NULL)
327 		return (nvl);
328 
329 	ip_debug(IP_DEBUG_FINE, "ip_authority %s=%s\n",
330 	    FM_FMRI_AUTH_SERVER, ipx->ipx_addr);
331 
332 	(void) nvlist_add_uint8(nvl, FM_VERSION, FM_FMRI_AUTH_VERSION);
333 	(void) nvlist_add_string(nvl, FM_FMRI_AUTH_SERVER, ipx->ipx_addr);
334 
335 	return (nvl);
336 }
337 
338 static void
339 ip_xprt_accept(ip_xprt_t *ipx)
340 {
341 	struct sockaddr_storage sa;
342 	socklen_t salen = sizeof (sa);
343 	fmd_xprt_t *xp;
344 	int fd;
345 
346 	ip_debug(IP_DEBUG_FINER, "Enter ip_xprt_accept");
347 
348 	if ((fd = accept(ipx->ipx_fd, (struct sockaddr *)&sa, &salen)) == -1) {
349 		fmd_hdl_error(ip_hdl, "failed to accept connection");
350 		ip_stat.ips_accfail.fmds_value.ui64++;
351 		return;
352 	}
353 	ip_debug(IP_DEBUG_FINE, "Accepted socket on fd %d", fd);
354 
355 	ip_xprt_set_addr(ipx, (struct sockaddr *)&sa);
356 	xp = fmd_xprt_open(ip_hdl, ipx->ipx_flags,
357 	    ip_xprt_auth(ipx), NULL);
358 	ip_xprt_create(xp, fd, ipx->ipx_flags, &ip_listen, ipx->ipx_addr);
359 }
360 
361 static void
362 ip_xprt_recv_event(ip_xprt_t *ipx)
363 {
364 	ip_hdr_t *iph;
365 	nvlist_t *nvl;
366 	size_t size;
367 	void *buf;
368 	int err;
369 
370 	if ((iph = ip_xprt_recv(ipx, sizeof (ip_hdr_t))) == NULL)
371 		return; /* connection broken */
372 
373 	if (bcmp(iph->iph_magic, IP_MAGIC, IP_MAGLEN) != 0) {
374 		fmd_hdl_error(ip_hdl,
375 		    "invalid hdr magic %x.%x.%x.%x from transport %s\n",
376 		    iph->iph_magic[0], iph->iph_magic[1], iph->iph_magic[2],
377 		    iph->iph_magic[3], IPX_ID(ipx));
378 		ip_stat.ips_badmagic.fmds_value.ui64++;
379 		return;
380 	}
381 
382 	size = ntohl(iph->iph_size);
383 
384 	if ((buf = ip_xprt_recv(ipx, size)) == NULL)
385 		return; /* connection broken */
386 
387 	if ((err = nvlist_unpack(buf, size, &nvl, 0)) != 0) {
388 		fmd_hdl_error(ip_hdl, "failed to unpack event from "
389 		    "transport %s: %s\n",
390 		    IPX_ID(ipx), strerror(err));
391 		ip_stat.ips_unpackfail.fmds_value.ui64++;
392 	} else {
393 		if (ip_domain_name)
394 			fmd_xprt_add_domain(ip_hdl, nvl, ip_domain_name);
395 		fmd_xprt_post(ip_hdl, ipx->ipx_xprt, nvl, 0);
396 	}
397 
398 	if (fmd_xprt_error(ip_hdl, ipx->ipx_xprt)) {
399 		fmd_hdl_error(ip_hdl, "protocol error on transport %p",
400 		    (void *)ipx->ipx_xprt);
401 		ipx->ipx_done++;
402 	}
403 	ip_debug(IP_DEBUG_FINEST, "Recv event %d bytes from %s",
404 	    size, IPX_ID(ipx));
405 }
406 
407 static void
408 ip_xprt_thread(void *arg)
409 {
410 	ip_xprt_t *ipx = arg;
411 	struct sockaddr_storage sa;
412 	socklen_t salen = sizeof (sa);
413 	struct pollfd pfd;
414 	id_t id;
415 
416 	ip_debug(IP_DEBUG_FINER, "Enter ip_xprt_thread");
417 
418 	while (!ip_quit && !ipx->ipx_done) {
419 		if (ipx->ipx_xprt != NULL || (ipx->ipx_flags & FMD_XPRT_ACCEPT))
420 			pfd.events = POLLIN;
421 		else
422 			pfd.events = POLLOUT;
423 
424 		pfd.fd = ipx->ipx_fd;
425 		pfd.revents = 0;
426 
427 		if (poll(&pfd, 1, -1) <= 0)
428 			continue; /* loop around and check ip_quit */
429 
430 		if (pfd.revents & (POLLHUP | POLLERR)) {
431 			ip_debug(IP_DEBUG_FINE, "hangup fd %d\n", ipx->ipx_fd);
432 			break;
433 		}
434 
435 		if (pfd.revents & POLLOUT) {
436 			/*
437 			 * Once we're connected, there's no reason to have our
438 			 * calls to recv() and send() be non-blocking since we
439 			 * we have separate threads for each: clear O_NONBLOCK.
440 			 */
441 			(void) fcntl(ipx->ipx_fd, F_SETFL,
442 			    fcntl(ipx->ipx_fd, F_GETFL, 0) & ~O_NONBLOCK);
443 
444 			if (getpeername(ipx->ipx_fd, (struct sockaddr *)&sa,
445 			    &salen) != 0) {
446 				fmd_hdl_error(ip_hdl, "failed to get peer name "
447 				    "for fd %d", ipx->ipx_fd);
448 				bzero(&sa, sizeof (sa));
449 			}
450 			ip_xprt_set_addr(ipx, (struct sockaddr *)&sa);
451 			ipx->ipx_xprt = fmd_xprt_open(ip_hdl, ipx->ipx_flags,
452 			    ip_xprt_auth(ipx), ipx);
453 
454 			ip_debug(IP_DEBUG_FINE, "connect fd %d ipx %p",
455 			    ipx->ipx_fd, (void *)ipx);
456 			continue;
457 		}
458 
459 		if (pfd.revents & POLLIN) {
460 			if (ipx->ipx_xprt == NULL)
461 				ip_xprt_accept(ipx);
462 			else
463 				ip_xprt_recv_event(ipx);
464 		}
465 	}
466 
467 	id = fmd_timer_install(ip_hdl, ipx, NULL, 0);
468 	ip_debug(IP_DEBUG_FINE, "close fd %d (timer %d)", ipx->ipx_fd, (int)id);
469 }
470 
471 static void
472 ip_xprt_create(fmd_xprt_t *xp, int fd, int flags, ip_cinfo_t *cinfo, char *addr)
473 {
474 	ip_xprt_t *ipx = fmd_hdl_zalloc(ip_hdl, sizeof (ip_xprt_t), FMD_SLEEP);
475 
476 	ip_debug(IP_DEBUG_FINER, "Enter ip_xprt_create %p", (void *)ipx);
477 
478 	ipx->ipx_xprt = xp;
479 	ipx->ipx_flags = flags;
480 	ipx->ipx_fd = fd;
481 	ipx->ipx_tid = fmd_thr_create(ip_hdl, ip_xprt_thread, ipx);
482 	ipx->ipx_cinfo = cinfo;
483 	ipx->ipx_addr = fmd_hdl_strdup(ip_hdl, addr, FMD_SLEEP);
484 
485 	if (ipx->ipx_xprt != NULL)
486 		fmd_xprt_setspecific(ip_hdl, ipx->ipx_xprt, ipx);
487 
488 	(void) pthread_mutex_lock(&ip_lock);
489 
490 	ipx->ipx_next = ip_xps;
491 	ip_xps = ipx;
492 
493 	(void) pthread_mutex_unlock(&ip_lock);
494 }
495 
496 static void
497 ip_xprt_destroy(ip_xprt_t *ipx)
498 {
499 	ip_xprt_t *ipp, **ppx = &ip_xps;
500 
501 	ip_debug(IP_DEBUG_FINER, "Enter ip_xprt_destory %s %p",
502 	    IPX_ID(ipx), (void *)ipx);
503 
504 	(void) pthread_mutex_lock(&ip_lock);
505 
506 	for (ipp = *ppx; ipp != NULL; ipp = ipp->ipx_next) {
507 		if (ipp != ipx)
508 			ppx = &ipp->ipx_next;
509 		else
510 			break;
511 	}
512 
513 	if (ipp != ipx) {
514 		(void) pthread_mutex_unlock(&ip_lock);
515 		fmd_hdl_abort(ip_hdl, "ipx %p not on xps list\n", (void *)ipx);
516 	}
517 
518 	*ppx = ipx->ipx_next;
519 	ipx->ipx_next = NULL;
520 
521 	(void) pthread_mutex_unlock(&ip_lock);
522 
523 	fmd_thr_signal(ip_hdl, ipx->ipx_tid);
524 	fmd_thr_destroy(ip_hdl, ipx->ipx_tid);
525 
526 	if (ipx->ipx_xprt != NULL)
527 		fmd_xprt_close(ip_hdl, ipx->ipx_xprt);
528 
529 	fmd_hdl_free(ip_hdl, ipx->ipx_sndbuf.ipb_buf, ipx->ipx_sndbuf.ipb_size);
530 	fmd_hdl_free(ip_hdl, ipx->ipx_rcvbuf.ipb_buf, ipx->ipx_rcvbuf.ipb_size);
531 
532 	(void) close(ipx->ipx_fd);
533 	if (ipx->ipx_addr) {
534 		fmd_hdl_strfree(ip_hdl, ipx->ipx_addr);
535 		ipx->ipx_addr = NULL;
536 	}
537 	fmd_hdl_free(ip_hdl, ipx, sizeof (ip_xprt_t));
538 }
539 
540 /*
541  * Loop through the addresses in the connection info structure that were
542  * created by getaddrinfo() in ip_setup_addr during initialization (_fmd_init)
543  * and for each one attempt to create a socket and initialize it.  If we are
544  * successful, return zero.  If we fail, we check ip_retry: if it is non-zero
545  * we return the last errno and let our caller retry ip_xprt_setup() later.  If
546  * ip_retry reaches zero, we call fmd_hdl_abort() with an appropriate message.
547  */
548 static int
549 ip_xprt_setup(fmd_hdl_t *hdl, ip_cinfo_t *cinfo)
550 {
551 	int err, fd, oflags, xflags, optval = 1;
552 	struct addrinfo *aip;
553 	const char *s1, *s2;
554 	struct addrinfo *ail = cinfo->addr;
555 
556 	ip_debug(IP_DEBUG_FINER, "Enter ip_xprt_setup");
557 
558 	/*
559 	 * Set up flags as specified in the .conf file. Note that these are
560 	 * mostly only used for testing purposes, allowing the transport to
561 	 * be set up in various modes.
562 	 */
563 	if (ail != ip_listen.addr)
564 		xflags = (ip_rdonly == FMD_B_TRUE) ? FMD_XPRT_RDONLY :
565 		    FMD_XPRT_RDWR;
566 	else
567 		xflags = ((ip_rdonly == FMD_B_TRUE) ? FMD_XPRT_RDONLY :
568 		    FMD_XPRT_RDWR) | FMD_XPRT_ACCEPT;
569 
570 	if (ip_external == FMD_B_TRUE)
571 		xflags |= FMD_XPRT_EXTERNAL;
572 	if (ip_no_remote_repair == FMD_B_TRUE)
573 		xflags |= FMD_XPRT_NO_REMOTE_REPAIR;
574 	if (ip_hconly == FMD_B_TRUE)
575 		xflags |= FMD_XPRT_HCONLY;
576 	if (ip_hc_present_only == FMD_B_TRUE)
577 		xflags |= FMD_XPRT_HC_PRESENT_ONLY;
578 
579 	for (aip = ail; aip != NULL; aip = aip->ai_next) {
580 		if (aip->ai_family != AF_INET && aip->ai_family != AF_INET6)
581 			continue; /* ignore anything that isn't IPv4 or IPv6 */
582 
583 		if ((fd = socket(aip->ai_family,
584 		    aip->ai_socktype, aip->ai_protocol)) == -1) {
585 			err = errno;
586 			continue;
587 		}
588 
589 		oflags = fcntl(fd, F_GETFL, 0);
590 		(void) fcntl(fd, F_SETFL, oflags | O_NONBLOCK);
591 
592 		if (xflags & FMD_XPRT_ACCEPT) {
593 			err = setsockopt(fd, SOL_SOCKET,
594 			    SO_REUSEADDR, &optval, sizeof (optval)) != 0 ||
595 			    bind(fd, aip->ai_addr, aip->ai_addrlen) != 0 ||
596 			    listen(fd, ip_qlen) != 0;
597 		} else {
598 			err = connect(fd, aip->ai_addr, aip->ai_addrlen);
599 			if (err)
600 				err = errno;
601 			if (err == EINPROGRESS)
602 				err = 0;
603 		}
604 
605 		if (err == 0) {
606 			ip_xprt_create(NULL, fd, xflags, cinfo, NULL);
607 			ip_debug(IP_DEBUG_FINER, "Exit ip_xprt_setup");
608 			return (0);
609 		}
610 
611 		ip_debug(IP_DEBUG_FINE, "Error=%d errno=%d", err, errno);
612 
613 		err = errno;
614 		(void) close(fd);
615 	}
616 
617 	if (cinfo->name != NULL) {
618 		s1 = "failed to connect to";
619 		s2 = cinfo->name;
620 	} else {
621 		s1 = "failed to listen on";
622 		s2 = ip_port;
623 	}
624 
625 	if (err == EACCES || cinfo->retry-- == 0)
626 		fmd_hdl_abort(hdl, "%s %s: %s\n", s1, s2, strerror(err));
627 
628 	ip_debug(IP_DEBUG_FINE, "%s %s: %s (will retry)\n",
629 	    s1, s2, strerror(err));
630 	ip_debug(IP_DEBUG_FINER, "Exit ip_xprt_setup");
631 	return (err);
632 }
633 
634 /*
635  * Free address based resources
636  */
637 static void
638 ip_addr_cleanup()
639 {
640 	if (ip_listen.addr != NULL) {
641 		freeaddrinfo(ip_listen.addr);
642 		ip_listen.addr = NULL;
643 	}
644 	if (ip_server.addr != NULL) {
645 		freeaddrinfo(ip_server.addr);
646 		ip_server.addr = NULL;
647 	}
648 	if (ip_server2.addr != NULL) {
649 		freeaddrinfo(ip_server2.addr);
650 		ip_server2.addr = NULL;
651 	}
652 	fmd_prop_free_string(ip_hdl, ip_server.name);
653 	fmd_prop_free_string(ip_hdl, ip_server2.name);
654 	fmd_prop_free_string(ip_hdl, ip_port);
655 }
656 
657 /*
658  * Setup a single address for ip connection.
659  */
660 static int
661 ip_setup_addr(ip_cinfo_t *cinfo)
662 {
663 	struct addrinfo aih;
664 	char *server = cinfo->name;
665 	int err;
666 
667 	bzero(&aih, sizeof (aih));
668 	aih.ai_flags = AI_ADDRCONFIG;
669 	aih.ai_family = AF_UNSPEC;
670 	aih.ai_socktype = SOCK_STREAM;
671 	if (server != NULL) {
672 		ip_debug(IP_DEBUG_FINE, "resolving %s:%s\n", server, ip_port);
673 	} else {
674 		aih.ai_flags |= AI_PASSIVE;
675 		cinfo->name = "localhost";
676 	}
677 
678 	err = getaddrinfo(server, ip_port, &aih, &cinfo->addr);
679 	if (err != 0) {
680 		fmd_hdl_error(ip_hdl, "failed to resolve host %s port %s: %s\n",
681 		    cinfo->name, ip_port, gai_strerror(err));
682 		cinfo->addr = NULL;
683 	}
684 	return (err);
685 }
686 
687 /*
688  * Setup all IP addresses for network configuration.
689  * The listen address for for a service that will bind to clients.
690  * A client can connect up to two servers using ip_server and ip_server2
691  * properties.
692  */
693 static int
694 ip_setup_addrs()
695 {
696 	int err = 0;
697 	ip_listen.addr = NULL;
698 	ip_server.addr = NULL;
699 	ip_server.retry = ip_retry;
700 	ip_server2.addr = NULL;
701 
702 	if ((ip_server.name == NULL && ip_server2.name == NULL) ||
703 	    ip_listen.name) {
704 		err = ip_setup_addr(&ip_listen);
705 	}
706 	if (ip_server.name != NULL && err == 0) {
707 		err = ip_setup_addr(&ip_server);
708 	}
709 	if (ip_server2.name != NULL && err == 0) {
710 		err = ip_setup_addr(&ip_server2);
711 	}
712 	if (err != 0) {
713 		ip_addr_cleanup();
714 	}
715 	return (err);
716 }
717 
718 /*
719  * Timeout handler for the transport module.  We use three types of timeouts:
720  *
721  * (a) arg is NULL: attempt ip_xprt_setup(), re-install timeout to retry
722  * (b) arg is non-NULL, FMD_XPRT_SUSPENDED: call fmd_xprt_resume() on arg
723  * (c) arg is non-NULL, !FMD_XPRT_SUSPENDED: call ip_xprt_destroy() on arg
724  *
725  * Case (c) is required as we need to cause the module's main thread, which
726  * runs this timeout handler, to join with the transport's auxiliary thread.
727  * If the connection is a client then a timer will be installed to retry
728  * connecting to the server.
729  */
730 static void
731 ip_timeout(fmd_hdl_t *hdl, id_t id, void *arg) {
732 	ip_xprt_t *ipx;
733 
734 	if (arg == NULL ||
735 		arg == &ip_server || arg == &ip_server2 || arg == &ip_listen) {
736 		ip_debug(IP_DEBUG_FINER,
737 			"Enter ip_timeout (a) install new timer");
738 		if (ip_xprt_setup(hdl, arg) != 0)
739 			(void) fmd_timer_install(hdl, arg, NULL, ip_sleep);
740 	} else {
741 		ipx = arg;
742 		if (ipx->ipx_flags & FMD_XPRT_SUSPENDED) {
743 			ip_debug(IP_DEBUG_FINE, "timer %d waking ipx %p",
744 				(int)id, arg);
745 			ipx->ipx_flags &= ~FMD_XPRT_SUSPENDED;
746 			fmd_xprt_resume(hdl, ipx->ipx_xprt);
747 		} else {
748 			ip_debug(IP_DEBUG_FINE, "timer %d closing ipx %p",
749 				(int)id, arg);
750 			ip_xprt_destroy(ipx);
751 			if ((ipx->ipx_flags & FMD_XPRT_ACCEPT) !=
752 				FMD_XPRT_ACCEPT)
753 				(void) fmd_timer_install(
754 					hdl, ipx->ipx_cinfo, NULL, ip_sleep);
755 		}
756 	}
757 }
758 
759 static const fmd_prop_t fmd_props[] = {
760 	{ "ip_authority", FMD_TYPE_STRING, NULL },
761 	{ "ip_bufsize", FMD_TYPE_SIZE, "4k" },
762 	{ "ip_burp", FMD_TYPE_TIME, "0" },
763 	{ "ip_enable", FMD_TYPE_BOOL, "false" },
764 	{ "ip_mtbf", FMD_TYPE_INT32, "0" },
765 	{ "ip_external", FMD_TYPE_BOOL, "true" },
766 	{ "ip_no_remote_repair", FMD_TYPE_BOOL, "true" },
767 	{ "ip_hconly", FMD_TYPE_BOOL, "false" },
768 	{ "ip_rdonly", FMD_TYPE_BOOL, "false" },
769 	{ "ip_hc_present_only", FMD_TYPE_BOOL, "false" },
770 	{ "ip_domain_name", FMD_TYPE_STRING, NULL },
771 	{ "ip_port", FMD_TYPE_STRING, "664" },
772 	{ "ip_qlen", FMD_TYPE_INT32, "32" },
773 	{ "ip_retry", FMD_TYPE_INT32, "-1" },	    /* -1=forever */
774 	{ "ip_server", FMD_TYPE_STRING, NULL },	    /* server name */
775 	{ "ip_server2", FMD_TYPE_STRING, NULL },    /* secondary server name */
776 	{ "ip_sleep", FMD_TYPE_TIME, "10s" },
777 	{ "ip_translate", FMD_TYPE_BOOL, "false" },
778 	{ "ip_bind_addr", FMD_TYPE_STRING, NULL },  /* network interface addr */
779 	{ "ip_debug_level", FMD_TYPE_INT32, "1" },  /* debug levels 0-3 */
780 	{ NULL, 0, NULL }
781 };
782 
783 static const fmd_hdl_ops_t fmd_ops = {
784 	NULL,			/* fmdo_recv */
785 	ip_timeout,		/* fmdo_timeout */
786 	NULL,			/* fmdo_close */
787 	NULL,			/* fmdo_stats */
788 	NULL,			/* fmdo_gc */
789 	ip_xprt_send,		/* fmdo_send */
790 };
791 
792 static const fmd_hdl_info_t fmd_info = {
793 	"IP Transport Agent", "1.0", &fmd_ops, fmd_props
794 };
795 
796 /*
797  * Initialize the ip-transport module as either a server or a client.  Note
798  * that the ip-transport module is not enabled by default under Solaris:
799  * at present we require a developer or tool to "setprop ip_enable true".
800  * If ip-transport is needed in the future out-of-the-box on one or more Sun
801  * platforms, the code to check 'ip_enable' should be replaced with:
802  *
803  * (a) configuring ip-transport to operate in client mode by default,
804  * (b) a platform-specific configuration mechanism, or
805  * (c) a means to assure security and prevent denial-of-service attacks.
806  *
807  * Note that (c) is only an issue when the transport module operates
808  * in server mode (i.e. with the ip_server property set to NULL) on a
809  * generic Solaris system which may be exposed directly to the Internet.
810  * The property ip_bind_addr can be used to define a private network interface
811  * to use so that the service is not exposed to the Internet.
812  */
813 void
814 _fmd_init(fmd_hdl_t *hdl)
815 {
816 	char *auth, *p, *q, *r, *s;
817 
818 	if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0)
819 		return; /* failed to register handle */
820 
821 	if (fmd_prop_get_int32(hdl, "ip_enable") == FMD_B_FALSE) {
822 		fmd_hdl_unregister(hdl);
823 		return;
824 	}
825 
826 	(void) fmd_stat_create(hdl, FMD_STAT_NOALLOC,
827 	    sizeof (ip_stat) / sizeof (fmd_stat_t), (fmd_stat_t *)&ip_stat);
828 
829 	ip_hdl = hdl;
830 	(void) pthread_mutex_init(&ip_lock, NULL);
831 
832 	ip_burp = fmd_prop_get_int64(hdl, "ip_burp");
833 	ip_mtbf = fmd_prop_get_int32(hdl, "ip_mtbf");
834 	ip_external = fmd_prop_get_int32(hdl, "ip_external");
835 	ip_no_remote_repair = fmd_prop_get_int32(hdl, "ip_no_remote_repair");
836 	ip_hconly = fmd_prop_get_int32(hdl, "ip_hconly");
837 	ip_rdonly = fmd_prop_get_int32(hdl, "ip_rdonly");
838 	ip_hc_present_only = fmd_prop_get_int32(hdl, "ip_hc_present_only");
839 	ip_domain_name = fmd_prop_get_string(hdl, "ip_domain_name");
840 	ip_qlen = fmd_prop_get_int32(hdl, "ip_qlen");
841 	ip_retry = fmd_prop_get_int32(hdl, "ip_retry");
842 	ip_sleep = fmd_prop_get_int64(hdl, "ip_sleep");
843 	ip_translate = fmd_prop_get_int32(hdl, "ip_translate");
844 
845 	ip_size = (size_t)fmd_prop_get_int64(hdl, "ip_bufsize");
846 	ip_size = MAX(ip_size, sizeof (ip_hdr_t));
847 
848 	ip_listen.name = fmd_prop_get_string(hdl, "ip_bind_addr");
849 	ip_server.name = fmd_prop_get_string(hdl, "ip_server");
850 	ip_server2.name = fmd_prop_get_string(hdl, "ip_server2");
851 	ip_port = fmd_prop_get_string(hdl, "ip_port");
852 	ip_debug_level = fmd_prop_get_int32(hdl, "ip_debug_level");
853 
854 	if (ip_setup_addrs()) {
855 		fmd_hdl_abort(hdl, "Unable to setup IP addresses.");
856 		return;
857 	}
858 
859 
860 	/*
861 	 * If ip_authority is set, tokenize this string and turn it into an
862 	 * FMA authority represented as a name-value pair list.  We will use
863 	 * this authority for all transports created by this module.  If
864 	 * ip_authority isn't set, we'll compute authorities on the fly.
865 	 */
866 	if ((auth = fmd_prop_get_string(hdl, "ip_authority")) != NULL) {
867 		(void) nvlist_alloc(&ip_auth, 0, 0);
868 		(void) nvlist_add_uint8(ip_auth,
869 		    FM_VERSION, FM_FMRI_AUTH_VERSION);
870 
871 		s = alloca(strlen(auth) + 1);
872 		(void) strcpy(s, auth);
873 		fmd_prop_free_string(hdl, auth);
874 
875 		for (p = strtok_r(s, ",", &q); p != NULL;
876 		    p = strtok_r(NULL, ",", &q)) {
877 
878 			if ((r = strchr(p, '=')) == NULL) {
879 				ip_addr_cleanup();
880 				fmd_hdl_abort(hdl, "ip_authority element <%s> "
881 				    "must be in <name>=<value> form\n", p);
882 			}
883 
884 			*r = '\0';
885 			(void) nvlist_add_string(ip_auth, p, r + 1);
886 			*r = '=';
887 		}
888 	}
889 
890 	/*
891 	 * Start ip transport server to listen for clients
892 	 */
893 	if (ip_listen.addr != NULL) {
894 		if (ip_xprt_setup(hdl, &ip_listen) != 0) {
895 			(void) fmd_timer_install(hdl, &ip_listen, NULL,
896 			    ip_sleep);
897 		}
898 	}
899 
900 	/*
901 	 * Call ip_xprt_setup() to connect to server(s).  If it fails and
902 	 * ip_retry is non-zero, install a timer to try again after
903 	 * 'ip_sleep' nsecs.
904 	 */
905 	if (ip_server.addr != NULL) {
906 		if (ip_xprt_setup(hdl, &ip_server) != 0)
907 			(void) fmd_timer_install(hdl, &ip_server, NULL,
908 			    ip_sleep);
909 	}
910 	if (ip_server2.addr != NULL) {
911 		if (ip_xprt_setup(hdl, &ip_server2) != 0)
912 			(void) fmd_timer_install(hdl, &ip_server2, NULL,
913 			    ip_sleep);
914 	}
915 }
916 
917 void
918 _fmd_fini(fmd_hdl_t *hdl)
919 {
920 	ip_quit++; /* set quit flag before signalling auxiliary threads */
921 
922 	while (ip_xps != NULL)
923 		ip_xprt_destroy(ip_xps);
924 
925 	if (ip_auth != NULL)
926 		nvlist_free(ip_auth);
927 	ip_addr_cleanup();
928 
929 	fmd_hdl_unregister(hdl);
930 }
931