xref: /illumos-gate/usr/src/cmd/fm/modules/common/ip-transport/ip.c (revision 64639aaf7beb84086b88f186ea1fa9ccf0be8c57)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/socket.h>
28 #include <sys/sysmacros.h>
29 #include <sys/fm/protocol.h>
30 
31 #include <netinet/in.h>
32 #include <arpa/inet.h>
33 
34 #include <strings.h>
35 #include <unistd.h>
36 #include <pthread.h>
37 #include <alloca.h>
38 #include <fcntl.h>
39 #include <errno.h>
40 #include <netdb.h>
41 #include <poll.h>
42 #include <stdarg.h>
43 
44 #include <fm/fmd_api.h>
45 
46 #define	IP_MAGIC	"\177FMA" /* magic string identifying a packet header */
47 #define	IP_MAGLEN	4	/* length of magic string */
48 #define	IP_DEBUG_OFF	0	/* No informational debugging printed */
49 #define	IP_DEBUG_FINE	1	/* Basic debug information printed (default) */
50 #define	IP_DEBUG_FINER	2	/* More debug information printed. */
51 #define	IP_DEBUG_FINEST	3	/* All debug information printed */
52 
53 typedef struct ip_hdr {
54 	char iph_magic[IP_MAGLEN]; /* magic string */
55 	uint32_t iph_size;	/* packed size */
56 } ip_hdr_t;
57 
58 typedef struct ip_buf {
59 	void *ipb_buf;		/* data buffer */
60 	size_t ipb_size;	/* size of buffer */
61 } ip_buf_t;
62 
63 typedef struct ip_cinfo {	    /* Connection specific information */
64 	struct addrinfo *ipc_addr;  /* Connection address(es) */
65 	char *ipc_name;		    /* The name of the server or interface */
66 	int ipc_retry;		    /* The number of connection retries */
67 	boolean_t ipc_accept;	    /* Will connection accept clients */
68 	id_t ipc_timer;		    /* FMD timer id for connection */
69 	struct ip_cinfo *ipc_next;  /* Next conneciton in list */
70 } ip_cinfo_t;
71 
72 typedef struct ip_xprt {
73 	fmd_xprt_t *ipx_xprt;	/* transport handle */
74 	int ipx_flags;		/* transport flags */
75 	int ipx_fd;		/* socket file descriptor */
76 	int ipx_done;		/* flag indicating connection closed */
77 	pthread_t ipx_tid;	/* recv-side auxiliary thread */
78 	ip_buf_t ipx_sndbuf;	/* buffer for sending events */
79 	ip_buf_t ipx_rcvbuf;	/* buffer for receiving events */
80 	ip_cinfo_t *ipx_cinfo;	/* info for reconnect */
81 	id_t ipx_spnd_timer;	/* connection suspend timer */
82 	char *ipx_addr;		/* address:port of remote connection */
83 	struct ip_xprt *ipx_next;	/* next ip_xprt in global list */
84 } ip_xprt_t;
85 
86 #define	IPX_ID(a) ((a)->ipx_addr == NULL ? "(Not connected)" : (a)->ipx_addr)
87 
88 typedef struct ip_stat {
89 	fmd_stat_t ips_accfail;	/* failed accepts */
90 	fmd_stat_t ips_badmagic; /* invalid packet headers */
91 	fmd_stat_t ips_packfail; /* failed packs */
92 	fmd_stat_t ips_unpackfail; /* failed unpacks */
93 } ip_stat_t;
94 
95 static void ip_xprt_create(fmd_xprt_t *, int, int, ip_cinfo_t *, char *);
96 static void ip_xprt_destroy(ip_xprt_t *);
97 
98 static ip_stat_t ip_stat = {
99 	{ "accfail", FMD_TYPE_UINT64, "failed accepts" },
100 	{ "badmagic", FMD_TYPE_UINT64, "invalid packet headers" },
101 	{ "packfail", FMD_TYPE_UINT64, "failed packs" },
102 	{ "unpackfail", FMD_TYPE_UINT64, "failed unpacks" },
103 };
104 
105 static fmd_hdl_t *ip_hdl;	/* module handle */
106 static pthread_mutex_t ip_lock;	/* lock for ip_xps list */
107 static ip_xprt_t *ip_xps;	/* list of active transports */
108 static pthread_mutex_t ip_conns_lock;	/* lock for ip_conns list */
109 static ip_cinfo_t *ip_conns;	/* list of all configured connection info */
110 static nvlist_t *ip_auth;	/* authority to use for transport(s) */
111 static size_t ip_size;		/* default buffer size */
112 static volatile int ip_quit;	/* signal to quit */
113 static int ip_qlen;		/* queue length for listen(3SOCKET) */
114 static int ip_mtbf;		/* mtbf for simulating packet drop */
115 static int ip_external;		/* set transport to be "external" */
116 static int ip_no_remote_repair;	/* disallow remote repair */
117 static int ip_hconly;		/* only cache faults that are hc-scheme */
118 static int ip_rdonly;		/* force transport to be rdonly */
119 static int ip_hc_present_only;	/* only cache faults if hc-scheme and present */
120 static char *ip_domain_name;	/* set domain name for received list.suspects */
121 static hrtime_t ip_burp;	/* make mtbf slower by adding this much delay */
122 static int ip_translate;	/* call fmd_xprt_translate() before sending */
123 static char *ip_port;		/* port to connect to (or bind to if server) */
124 static int ip_retry;		/* retry count for ip_xprt_setup() -1=forever */
125 static hrtime_t ip_sleep;	/* sleep delay for ip_xprt_setup() */
126 static int ip_debug_level;	/* level for printing debug messages */
127 
128 /*
129  * Prints a debug message to the fmd debug framework if the debug level is set
130  * to at least the given level.
131  */
132 static void
133 ip_debug(int level, char *fmt, ...)
134 {
135 	if (ip_debug_level >= level) {
136 		va_list args;
137 		va_start(args, fmt);
138 		fmd_hdl_vdebug(ip_hdl, fmt, args);
139 		va_end(args);
140 	}
141 }
142 
143 /*
144  * Allocate space in ipx_sndbuf for a header and a packed XDR encoding of
145  * the specified nvlist, and then send the buffer to our remote peer.
146  */
147 static int
148 ip_fmdo_send(fmd_hdl_t *hdl, fmd_xprt_t *xp, fmd_event_t *ep, nvlist_t *nvl)
149 {
150 	ip_xprt_t *ipx;
151 	size_t size, nvsize;
152 	char *buf, *nvbuf;
153 	ip_hdr_t *iph;
154 	ssize_t r, n;
155 	int err;
156 
157 	if (xp == NULL) {
158 		ip_debug(IP_DEBUG_FINE, "ip_fmdo_send failed: xp=NULL\n");
159 		return (FMD_SEND_FAILED);
160 	}
161 	ipx = fmd_xprt_getspecific(hdl, xp);
162 
163 	/*
164 	 * For testing purposes, if ip_mtbf is non-zero, use this to pseudo-
165 	 * randomly simulate the need for retries.  If ip_burp is also set,
166 	 * then we also suspend the transport for a bit and wake it up again.
167 	 */
168 	if (ip_mtbf != 0 && gethrtime() % ip_mtbf == 0) {
169 		if (ip_burp != 0) {
170 			ip_debug(IP_DEBUG_FINE, "burping ipx %s", IPX_ID(ipx));
171 			ipx->ipx_flags |= FMD_XPRT_SUSPENDED;
172 			ipx->ipx_spnd_timer = fmd_timer_install(
173 			    ip_hdl, ipx, NULL, ip_burp);
174 			fmd_xprt_suspend(ip_hdl, xp);
175 		}
176 		return (FMD_SEND_RETRY);
177 	}
178 
179 	if (ip_translate && (nvl = fmd_xprt_translate(hdl, xp, ep)) == NULL) {
180 		fmd_hdl_error(hdl, "failed to translate event %p", (void *)ep);
181 		return (FMD_SEND_FAILED);
182 	}
183 
184 	(void) nvlist_size(nvl, &nvsize, NV_ENCODE_XDR);
185 	size = r = sizeof (ip_hdr_t) + nvsize;
186 
187 	if (ipx->ipx_sndbuf.ipb_size < size) {
188 		fmd_hdl_free(hdl, ipx->ipx_sndbuf.ipb_buf,
189 		    ipx->ipx_sndbuf.ipb_size);
190 		ipx->ipx_sndbuf.ipb_size = P2ROUNDUP(size, 16);
191 		ipx->ipx_sndbuf.ipb_buf = fmd_hdl_alloc(hdl,
192 		    ipx->ipx_sndbuf.ipb_size, FMD_SLEEP);
193 	}
194 
195 	buf = ipx->ipx_sndbuf.ipb_buf;
196 	iph = (ip_hdr_t *)(uintptr_t)buf;
197 	nvbuf = buf + sizeof (ip_hdr_t);
198 
199 	bcopy(IP_MAGIC, iph->iph_magic, IP_MAGLEN);
200 	iph->iph_size = htonl(nvsize);
201 	err = nvlist_pack(nvl, &nvbuf, &nvsize, NV_ENCODE_XDR, 0);
202 
203 	if (ip_translate)
204 		nvlist_free(nvl);
205 
206 	if (err != 0) {
207 		fmd_hdl_error(ip_hdl, "failed to pack event for "
208 		    "transport %p: %s\n", (void *)ipx->ipx_xprt, strerror(err));
209 		ip_stat.ips_packfail.fmds_value.ui64++;
210 		return (FMD_SEND_FAILED);
211 	}
212 
213 	while (!ip_quit && r != 0) {
214 		if ((n = send(ipx->ipx_fd, buf, r, 0)) < 0) {
215 			if (errno != EINTR && errno != EWOULDBLOCK) {
216 				ip_debug(IP_DEBUG_FINE,
217 				    "failed to send to %s", IPX_ID(ipx));
218 				return (FMD_SEND_FAILED);
219 			}
220 			continue;
221 		}
222 		buf += n;
223 		r -= n;
224 	}
225 
226 	ip_debug(IP_DEBUG_FINEST, "Sent event %d bytes to %s",
227 	    size, IPX_ID(ipx));
228 	return (FMD_SEND_SUCCESS);
229 }
230 
231 /*
232  * Sends events over transports that are configured read only.  When the module
233  * is in read only mode it will receive all events and only send events that
234  * have a subscription set.
235  *
236  * The configuration file will have to set prop ip_rdonly true and also
237  * subscribe for events that are desired to be sent over the transport in order
238  * for this function to be used.
239  */
240 /* ARGSUSED */
241 static void
242 ip_fmdo_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
243 {
244 	int err;
245 	ip_xprt_t *ipx;
246 
247 	if (ip_rdonly && !ip_quit) {
248 		(void) pthread_mutex_lock(&ip_lock);
249 
250 		for (ipx = ip_xps; ipx != NULL; ipx = ipx->ipx_next) {
251 			err = ip_fmdo_send(hdl, ipx->ipx_xprt, ep, nvl);
252 			while (FMD_SEND_RETRY == err) {
253 				err = ip_fmdo_send(hdl, ipx->ipx_xprt, ep, nvl);
254 			}
255 		}
256 		(void) pthread_mutex_unlock(&ip_lock);
257 	}
258 }
259 
260 /*
261  * Receive a chunk of data of the specified size from our remote peer.  The
262  * data is received into ipx_rcvbuf, and then a pointer to the buffer is
263  * returned.  NOTE: The data is only valid until the next call to ip_xprt_recv.
264  * If the connection breaks or ip_quit is set during receive, NULL is returned.
265  */
266 static void *
267 ip_xprt_recv(ip_xprt_t *ipx, size_t size)
268 {
269 	char *buf = ipx->ipx_rcvbuf.ipb_buf;
270 	ssize_t n, r = size;
271 
272 	if (ipx->ipx_rcvbuf.ipb_size < size) {
273 		fmd_hdl_free(ip_hdl, ipx->ipx_rcvbuf.ipb_buf,
274 		    ipx->ipx_rcvbuf.ipb_size);
275 		ipx->ipx_rcvbuf.ipb_size = P2ROUNDUP(size, 16);
276 		ipx->ipx_rcvbuf.ipb_buf = buf = fmd_hdl_alloc(ip_hdl,
277 		    ipx->ipx_rcvbuf.ipb_size, FMD_SLEEP);
278 	}
279 
280 	while (!ip_quit && r != 0) {
281 		if ((n = recv(ipx->ipx_fd, buf, r, MSG_WAITALL)) == 0) {
282 			ipx->ipx_done++;
283 			return (NULL);
284 		}
285 
286 		if (n < 0) {
287 			if (errno != EINTR && errno != EWOULDBLOCK) {
288 				ip_debug(IP_DEBUG_FINE,
289 				    "failed to recv on ipx %s", IPX_ID(ipx));
290 			}
291 			continue;
292 		}
293 		/* Reset retry counter after a successful connection */
294 		if (ipx->ipx_cinfo) {
295 			ipx->ipx_cinfo->ipc_retry = ip_retry;
296 		}
297 
298 		buf += n;
299 		r -= n;
300 	}
301 
302 	return (r ? NULL: ipx->ipx_rcvbuf.ipb_buf);
303 }
304 
305 /*
306  * Sets the address/port of the remote connection in the connection info struct
307  * This is called after a TCP session has been set up with a known remote
308  * address (sap)
309  */
310 static void
311 ip_xprt_set_addr(ip_xprt_t *ipx, const struct sockaddr *sap)
312 {
313 	const struct sockaddr_in6 *sin6 = (const void *)sap;
314 	const struct sockaddr_in *sin = (const void *)sap;
315 
316 	char buf[INET6_ADDRSTRLEN + 16];
317 	struct in_addr v4addr;
318 	in_port_t port;
319 	int n;
320 
321 	ip_debug(IP_DEBUG_FINER, "Enter ip_xprt_set_addr");
322 
323 	if (sap->sa_family == AF_INET6 &&
324 	    IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
325 		IN6_V4MAPPED_TO_INADDR(&sin6->sin6_addr, &v4addr);
326 		(void) inet_ntop(AF_INET, &v4addr, buf, sizeof (buf));
327 		port = ntohs(sin6->sin6_port);
328 	} else if (sap->sa_family == AF_INET6) {
329 		(void) inet_ntop(AF_INET6, &sin6->sin6_addr, buf, sizeof (buf));
330 		port = ntohs(sin6->sin6_port);
331 	} else {
332 		(void) inet_ntop(AF_INET, &sin->sin_addr, buf, sizeof (buf));
333 		port = ntohs(sin->sin_port);
334 	}
335 
336 	n = strlen(buf);
337 	(void) snprintf(buf + n, sizeof (buf) - n, ":%u", port);
338 
339 	if (ipx->ipx_addr)
340 		fmd_hdl_strfree(ip_hdl, ipx->ipx_addr);
341 	ipx->ipx_addr = fmd_hdl_strdup(ip_hdl, buf, FMD_SLEEP);
342 	ip_debug(IP_DEBUG_FINE, "connection addr is %s on %p",
343 	    ipx->ipx_addr, (void *)ipx);
344 }
345 
346 static nvlist_t *
347 ip_xprt_auth(ip_xprt_t *ipx)
348 {
349 	nvlist_t *nvl;
350 	int err;
351 
352 	ip_debug(IP_DEBUG_FINER, "Enter ip_xprt_auth");
353 
354 	if (ip_auth != NULL)
355 		err = nvlist_dup(ip_auth, &nvl, 0);
356 	else
357 		err = nvlist_alloc(&nvl, 0, 0);
358 
359 	if (err != 0) {
360 		fmd_hdl_abort(ip_hdl, "failed to create nvlist for "
361 		    "authority: %s\n", strerror(err));
362 	}
363 
364 	if (ip_auth != NULL)
365 		return (nvl);
366 
367 	ip_debug(IP_DEBUG_FINE, "ip_authority %s=%s\n",
368 	    FM_FMRI_AUTH_SERVER, ipx->ipx_addr);
369 
370 	(void) nvlist_add_uint8(nvl, FM_VERSION, FM_FMRI_AUTH_VERSION);
371 	(void) nvlist_add_string(nvl, FM_FMRI_AUTH_SERVER, ipx->ipx_addr);
372 
373 	return (nvl);
374 }
375 
376 static void
377 ip_xprt_accept(ip_xprt_t *ipx)
378 {
379 	struct sockaddr_storage sa;
380 	socklen_t salen = sizeof (sa);
381 	fmd_xprt_t *xp;
382 	int fd;
383 
384 	ip_debug(IP_DEBUG_FINER, "Enter ip_xprt_accept");
385 
386 	if ((fd = accept(ipx->ipx_fd, (struct sockaddr *)&sa, &salen)) == -1) {
387 		fmd_hdl_error(ip_hdl, "failed to accept connection");
388 		ip_stat.ips_accfail.fmds_value.ui64++;
389 		return;
390 	}
391 	ip_debug(IP_DEBUG_FINE, "Accepted socket on fd %d", fd);
392 
393 	ip_xprt_set_addr(ipx, (struct sockaddr *)&sa);
394 	xp = fmd_xprt_open(ip_hdl, ipx->ipx_flags,
395 	    ip_xprt_auth(ipx), NULL);
396 	ip_xprt_create(xp, fd, ipx->ipx_flags, ipx->ipx_cinfo, ipx->ipx_addr);
397 }
398 
399 static void
400 ip_xprt_recv_event(ip_xprt_t *ipx)
401 {
402 	ip_hdr_t *iph;
403 	nvlist_t *nvl;
404 	size_t size;
405 	void *buf;
406 	int err;
407 
408 	if ((iph = ip_xprt_recv(ipx, sizeof (ip_hdr_t))) == NULL)
409 		return; /* connection broken */
410 
411 	if (bcmp(iph->iph_magic, IP_MAGIC, IP_MAGLEN) != 0) {
412 		fmd_hdl_error(ip_hdl,
413 		    "invalid hdr magic %x.%x.%x.%x from transport %s\n",
414 		    iph->iph_magic[0], iph->iph_magic[1], iph->iph_magic[2],
415 		    iph->iph_magic[3], IPX_ID(ipx));
416 		ip_stat.ips_badmagic.fmds_value.ui64++;
417 		return;
418 	}
419 
420 	size = ntohl(iph->iph_size);
421 
422 	if ((buf = ip_xprt_recv(ipx, size)) == NULL)
423 		return; /* connection broken */
424 
425 	if ((err = nvlist_unpack(buf, size, &nvl, 0)) != 0) {
426 		fmd_hdl_error(ip_hdl, "failed to unpack event from "
427 		    "transport %s: %s\n",
428 		    IPX_ID(ipx), strerror(err));
429 		ip_stat.ips_unpackfail.fmds_value.ui64++;
430 	} else {
431 		if (ip_domain_name)
432 			fmd_xprt_add_domain(ip_hdl, nvl, ip_domain_name);
433 		fmd_xprt_post(ip_hdl, ipx->ipx_xprt, nvl, 0);
434 	}
435 
436 	if (fmd_xprt_error(ip_hdl, ipx->ipx_xprt)) {
437 		fmd_hdl_error(ip_hdl, "protocol error on transport %p",
438 		    (void *)ipx->ipx_xprt);
439 		ipx->ipx_done++;
440 	}
441 	ip_debug(IP_DEBUG_FINEST, "Recv event %d bytes from %s",
442 	    size, IPX_ID(ipx));
443 }
444 
445 static void
446 ip_xprt_thread(void *arg)
447 {
448 	ip_xprt_t *ipx = arg;
449 	struct sockaddr_storage sa;
450 	socklen_t salen = sizeof (sa);
451 	struct pollfd pfd;
452 
453 	ip_debug(IP_DEBUG_FINER, "Enter ip_xprt_thread");
454 
455 	while (!ip_quit && !ipx->ipx_done) {
456 		if (ipx->ipx_xprt != NULL || (ipx->ipx_flags & FMD_XPRT_ACCEPT))
457 			pfd.events = POLLIN;
458 		else
459 			pfd.events = POLLOUT;
460 
461 		pfd.fd = ipx->ipx_fd;
462 		pfd.revents = 0;
463 
464 		if (poll(&pfd, 1, -1) <= 0)
465 			continue; /* loop around and check ip_quit */
466 
467 		if (pfd.revents & (POLLHUP | POLLERR)) {
468 			ip_debug(IP_DEBUG_FINE, "hangup fd %d\n", ipx->ipx_fd);
469 			break;
470 		}
471 
472 		if (pfd.revents & POLLOUT) {
473 			/*
474 			 * Once we're connected, there's no reason to have our
475 			 * calls to recv() and send() be non-blocking since we
476 			 * we have separate threads for each: clear O_NONBLOCK.
477 			 */
478 			(void) fcntl(ipx->ipx_fd, F_SETFL,
479 			    fcntl(ipx->ipx_fd, F_GETFL, 0) & ~O_NONBLOCK);
480 
481 			if (getpeername(ipx->ipx_fd, (struct sockaddr *)&sa,
482 			    &salen) != 0) {
483 				ip_debug(IP_DEBUG_FINE,
484 				    "Not connected, no remote name for fd %d. "
485 				    " Will retry.",
486 				    ipx->ipx_fd);
487 				bzero(&sa, sizeof (sa));
488 				break;
489 			}
490 			ip_xprt_set_addr(ipx, (struct sockaddr *)&sa);
491 			ipx->ipx_xprt = fmd_xprt_open(ip_hdl, ipx->ipx_flags,
492 			    ip_xprt_auth(ipx), ipx);
493 
494 			ip_debug(IP_DEBUG_FINE, "connect fd %d ipx %p",
495 			    ipx->ipx_fd, (void *)ipx);
496 			continue;
497 		}
498 
499 		if (pfd.revents & POLLIN) {
500 			if (ipx->ipx_xprt == NULL)
501 				ip_xprt_accept(ipx);
502 			else
503 				ip_xprt_recv_event(ipx);
504 		}
505 	}
506 
507 	ipx->ipx_cinfo->ipc_timer = fmd_timer_install(ip_hdl, ipx, NULL, 0);
508 	ip_debug(IP_DEBUG_FINE, "close fd %d (timer %d)", ipx->ipx_fd,
509 	    (int)ipx->ipx_cinfo->ipc_timer);
510 }
511 
512 static void
513 ip_xprt_create(fmd_xprt_t *xp, int fd, int flags, ip_cinfo_t *cinfo, char *addr)
514 {
515 	ip_xprt_t *ipx = fmd_hdl_zalloc(ip_hdl, sizeof (ip_xprt_t), FMD_SLEEP);
516 
517 	ip_debug(IP_DEBUG_FINER, "Enter ip_xprt_create %p", (void *)ipx);
518 
519 	ipx->ipx_xprt = xp;
520 	ipx->ipx_flags = flags;
521 	ipx->ipx_fd = fd;
522 	ipx->ipx_tid = fmd_thr_create(ip_hdl, ip_xprt_thread, ipx);
523 	ipx->ipx_cinfo = cinfo;
524 	ipx->ipx_addr = fmd_hdl_strdup(ip_hdl, addr, FMD_SLEEP);
525 
526 	if (ipx->ipx_xprt != NULL)
527 		fmd_xprt_setspecific(ip_hdl, ipx->ipx_xprt, ipx);
528 
529 	(void) pthread_mutex_lock(&ip_lock);
530 
531 	ipx->ipx_next = ip_xps;
532 	ip_xps = ipx;
533 
534 	(void) pthread_mutex_unlock(&ip_lock);
535 }
536 
537 static void
538 ip_xprt_destroy(ip_xprt_t *ipx)
539 {
540 	ip_xprt_t *ipp, **ppx = &ip_xps;
541 
542 	ip_debug(IP_DEBUG_FINER, "Enter ip_xprt_destory %s %p",
543 	    IPX_ID(ipx), (void *)ipx);
544 
545 	(void) pthread_mutex_lock(&ip_lock);
546 
547 	for (ipp = *ppx; ipp != NULL; ipp = ipp->ipx_next) {
548 		if (ipp != ipx)
549 			ppx = &ipp->ipx_next;
550 		else
551 			break;
552 	}
553 
554 	if (ipp != ipx) {
555 		(void) pthread_mutex_unlock(&ip_lock);
556 		fmd_hdl_abort(ip_hdl, "ipx %p not on xps list\n", (void *)ipx);
557 	}
558 
559 	*ppx = ipx->ipx_next;
560 	ipx->ipx_next = NULL;
561 
562 	(void) pthread_mutex_unlock(&ip_lock);
563 
564 	if (ipx->ipx_spnd_timer)
565 		fmd_timer_remove(ip_hdl, ipx->ipx_spnd_timer);
566 
567 	fmd_thr_signal(ip_hdl, ipx->ipx_tid);
568 	fmd_thr_destroy(ip_hdl, ipx->ipx_tid);
569 
570 	if (ipx->ipx_xprt != NULL)
571 		fmd_xprt_close(ip_hdl, ipx->ipx_xprt);
572 
573 	fmd_hdl_free(ip_hdl, ipx->ipx_sndbuf.ipb_buf, ipx->ipx_sndbuf.ipb_size);
574 	fmd_hdl_free(ip_hdl, ipx->ipx_rcvbuf.ipb_buf, ipx->ipx_rcvbuf.ipb_size);
575 
576 	(void) close(ipx->ipx_fd);
577 	if (ipx->ipx_addr) {
578 		fmd_hdl_strfree(ip_hdl, ipx->ipx_addr);
579 		ipx->ipx_addr = NULL;
580 	}
581 	fmd_hdl_free(ip_hdl, ipx, sizeof (ip_xprt_t));
582 }
583 
584 /*
585  * Loop through the addresses in the connection info structure that were
586  * created by getaddrinfo() in ip_setup_addr during initialization (_fmd_init)
587  * and for each one attempt to create a socket and initialize it.  If we are
588  * successful, return zero.  If we fail, we check ip_retry: if it is non-zero
589  * we return the last errno and let our caller retry ip_xprt_setup() later.  If
590  * ip_retry reaches zero, we call fmd_hdl_abort() with an appropriate message.
591  */
592 static int
593 ip_xprt_setup(fmd_hdl_t *hdl, ip_cinfo_t *cinfo)
594 {
595 	int err, fd, oflags, xflags, optval = 1;
596 	struct addrinfo *aip;
597 	const char *s1, *s2;
598 	struct addrinfo *ail = cinfo->ipc_addr;
599 
600 	ip_debug(IP_DEBUG_FINER, "Enter ip_xprt_setup %s\n",
601 	    cinfo->ipc_name == NULL ? "localhost" : cinfo->ipc_name);
602 
603 	/*
604 	 * Set up flags as specified in the .conf file. Note that these are
605 	 * mostly only used for testing purposes, allowing the transport to
606 	 * be set up in various modes.
607 	 */
608 	xflags = (ip_rdonly == FMD_B_TRUE) ? FMD_XPRT_RDONLY : FMD_XPRT_RDWR;
609 	if (cinfo->ipc_accept)
610 		xflags |= FMD_XPRT_ACCEPT;
611 	if (ip_external == FMD_B_TRUE)
612 		xflags |= FMD_XPRT_EXTERNAL;
613 	if (ip_no_remote_repair == FMD_B_TRUE)
614 		xflags |= FMD_XPRT_NO_REMOTE_REPAIR;
615 	if (ip_hconly == FMD_B_TRUE)
616 		xflags |= FMD_XPRT_HCONLY;
617 	if (ip_hc_present_only == FMD_B_TRUE)
618 		xflags |= FMD_XPRT_HC_PRESENT_ONLY;
619 
620 	for (aip = ail; aip != NULL; aip = aip->ai_next) {
621 		if (aip->ai_family != AF_INET && aip->ai_family != AF_INET6)
622 			continue; /* ignore anything that isn't IPv4 or IPv6 */
623 
624 		if ((fd = socket(aip->ai_family,
625 		    aip->ai_socktype, aip->ai_protocol)) == -1) {
626 			err = errno;
627 			continue;
628 		}
629 
630 		oflags = fcntl(fd, F_GETFL, 0);
631 		(void) fcntl(fd, F_SETFL, oflags | O_NONBLOCK);
632 
633 		if (xflags & FMD_XPRT_ACCEPT) {
634 			err = setsockopt(fd, SOL_SOCKET,
635 			    SO_REUSEADDR, &optval, sizeof (optval)) != 0 ||
636 			    bind(fd, aip->ai_addr, aip->ai_addrlen) != 0 ||
637 			    listen(fd, ip_qlen) != 0;
638 		} else {
639 			err = connect(fd, aip->ai_addr, aip->ai_addrlen);
640 			if (err)
641 				err = errno;
642 			if (err == EINPROGRESS)
643 				err = 0;
644 		}
645 
646 		if (err == 0) {
647 			ip_xprt_create(NULL, fd, xflags, cinfo, NULL);
648 			ip_debug(IP_DEBUG_FINER, "Exit ip_xprt_setup");
649 			return (0);
650 		}
651 
652 		ip_debug(IP_DEBUG_FINE, "Error=%d errno=%d", err, errno);
653 
654 		err = errno;
655 		(void) close(fd);
656 	}
657 
658 	if (cinfo->ipc_name != NULL) {
659 		s1 = "failed to connect to";
660 		s2 = cinfo->ipc_name;
661 	} else {
662 		s1 = "failed to listen on";
663 		s2 = ip_port;
664 	}
665 
666 	if (err == EACCES || cinfo->ipc_retry-- == 0)
667 		fmd_hdl_abort(hdl, "%s %s: %s\n", s1, s2, strerror(err));
668 
669 	ip_debug(IP_DEBUG_FINE, "%s %s: %s (will retry)\n",
670 	    s1, s2, strerror(err));
671 	ip_debug(IP_DEBUG_FINER, "Exit ip_xprt_setup");
672 	return (err);
673 }
674 
675 /*
676  * Free address based resources
677  */
678 static void
679 ip_addr_cleanup()
680 {
681 	ip_cinfo_t *conn;
682 
683 	(void) pthread_mutex_lock(&ip_conns_lock);
684 	conn = ip_conns;
685 	while (conn != NULL) {
686 		ip_conns = conn->ipc_next;
687 		if (conn->ipc_addr != NULL)
688 			freeaddrinfo(conn->ipc_addr);
689 		conn->ipc_addr = NULL;
690 		if (conn->ipc_timer)
691 			fmd_timer_remove(ip_hdl, conn->ipc_timer);
692 		fmd_hdl_strfree(ip_hdl, conn->ipc_name);
693 		fmd_hdl_free(ip_hdl, conn, sizeof (ip_cinfo_t));
694 		conn = ip_conns;
695 	}
696 	(void) pthread_mutex_unlock(&ip_conns_lock);
697 
698 	fmd_prop_free_string(ip_hdl, ip_port);
699 }
700 
701 static boolean_t
702 ip_argis_cinfo(void *arg)
703 {
704 	boolean_t exists = B_FALSE;
705 	ip_cinfo_t *conn;
706 
707 	(void) pthread_mutex_lock(&ip_conns_lock);
708 	for (conn = ip_conns; conn != NULL; conn = conn->ipc_next) {
709 		if (conn == arg) {
710 			exists = B_TRUE;
711 			break;
712 		}
713 	}
714 	(void) pthread_mutex_unlock(&ip_conns_lock);
715 
716 	return (exists);
717 }
718 
719 
720 static ip_cinfo_t *
721 ip_create_cinfo(char *server, boolean_t accept)
722 {
723 	int err;
724 	struct addrinfo aih;
725 	ip_cinfo_t *cinfo = fmd_hdl_zalloc(
726 	    ip_hdl, sizeof (ip_cinfo_t), FMD_NOSLEEP);
727 
728 	if (cinfo == NULL)
729 		return (NULL);
730 
731 	cinfo->ipc_accept = accept;
732 	cinfo->ipc_retry = ip_retry;
733 	if (server != NULL) {
734 		cinfo->ipc_name = fmd_hdl_strdup(ip_hdl, server, FMD_NOSLEEP);
735 		if (cinfo->ipc_name == NULL) {
736 			fmd_hdl_free(ip_hdl, cinfo, sizeof (ip_cinfo_t));
737 			return (NULL);
738 		}
739 	}
740 
741 	bzero(&aih, sizeof (aih));
742 	aih.ai_flags = AI_ADDRCONFIG;
743 	aih.ai_family = AF_UNSPEC;
744 	aih.ai_socktype = SOCK_STREAM;
745 	if (server != NULL) {
746 		ip_debug(IP_DEBUG_FINE, "resolving %s:%s\n", server, ip_port);
747 	} else {
748 		aih.ai_flags |= AI_PASSIVE;
749 		cinfo->ipc_name = fmd_hdl_strdup(
750 		    ip_hdl, "localhost", FMD_NOSLEEP);
751 		if (cinfo->ipc_name == NULL) {
752 			fmd_hdl_free(ip_hdl, cinfo, sizeof (ip_cinfo_t));
753 			return (NULL);
754 		}
755 	}
756 
757 	err = getaddrinfo(server, ip_port, &aih, &cinfo->ipc_addr);
758 	if (err != 0) {
759 		fmd_hdl_error(ip_hdl, "failed to resolve host %s port %s: %s\n",
760 		    cinfo->ipc_name, ip_port, gai_strerror(err));
761 		cinfo->ipc_addr = NULL;
762 		fmd_hdl_strfree(ip_hdl, cinfo->ipc_name);
763 		fmd_hdl_free(ip_hdl, cinfo, sizeof (ip_cinfo_t));
764 		cinfo = NULL;
765 	}
766 	return (cinfo);
767 }
768 
769 /*
770  * Setup a single ip address for ip connection.
771  * If unable to setup any of the addresses then all addresses will be cleaned up
772  * and non-zero will be returned.
773  */
774 static int
775 ip_setup_addr(char *server, boolean_t accept)
776 {
777 	int err = 0;
778 	ip_cinfo_t *cinfo = ip_create_cinfo(server, accept);
779 
780 	if (cinfo == NULL) {
781 		ip_addr_cleanup();
782 		err++;
783 	} else {
784 		(void) pthread_mutex_lock(&ip_conns_lock);
785 		cinfo->ipc_next = ip_conns;
786 		ip_conns = cinfo;
787 		(void) pthread_mutex_unlock(&ip_conns_lock);
788 	}
789 	return (err);
790 }
791 
792 /*
793  * Setup a ip addresses for an ip connection.  The address can be a comma
794  * separated list of addresses as well.
795  * If unable to setup any of the addresses then all addresses will be cleaned up
796  * and non-zero will be returned.
797  */
798 static int
799 ip_setup_addrs(char *server, boolean_t accept)
800 {
801 	int err = 0;
802 	char *addr = server;
803 	char *p;
804 
805 	for (p = server; *p != '\0'; p++) {
806 		if (*p == ',') {
807 			*p = '\0';
808 			err = ip_setup_addr(addr, accept);
809 			*p = ',';
810 			if (err)
811 				return (err);
812 			addr = ++p;
813 			if (*addr == '\0')
814 				break;
815 		}
816 	}
817 	if (*addr != '\0') {
818 		err = ip_setup_addr(addr, accept);
819 	}
820 	return (err);
821 }
822 
823 /*
824  * Starts all connections for each configured network address.  If there is an
825  * error starting a connection a timer will be started for a retry.
826  */
827 static void
828 ip_start_connections()
829 {
830 	ip_cinfo_t *conn;
831 
832 	(void) pthread_mutex_lock(&ip_conns_lock);
833 	for (conn = ip_conns; conn != NULL; conn = conn->ipc_next) {
834 		if (ip_xprt_setup(ip_hdl, conn) != 0) {
835 			conn->ipc_timer = fmd_timer_install(ip_hdl, conn, NULL,
836 			    ip_sleep);
837 		}
838 	}
839 	(void) pthread_mutex_unlock(&ip_conns_lock);
840 }
841 
842 /*
843  * Timeout handler for the transport module.  We use these types of timeouts:
844  *
845  * (a) arg is ip_cinfo_t: attempt ip_xprt_setup(), re-install timeout to retry
846  * (b) arg is ip_xprt_t, FMD_XPRT_SUSPENDED: call fmd_xprt_resume() on arg
847  * (c) arg is ip_xprt_t, !FMD_XPRT_SUSPENDED: call ip_xprt_destroy() on arg
848  * (d) arg is NULL, ignore as this shouldn't happen
849  *
850  * Case (c) is required as we need to cause the module's main thread, which
851  * runs this timeout handler, to join with the transport's auxiliary thread.
852  * If the connection is a client then a timer will be installed to retry
853  * connecting to the server.
854  */
855 static void
856 ip_timeout(fmd_hdl_t *hdl, id_t id, void *arg) {
857 	int install_timer;
858 	ip_cinfo_t *cinfo;
859 	ip_xprt_t *ipx;
860 
861 	if (arg == NULL) {
862 		fmd_hdl_error(hdl, "ip_timeout failed because hg arg is NULL");
863 	} else if (ip_argis_cinfo(arg)) {
864 		ip_debug(IP_DEBUG_FINER,
865 			"Enter ip_timeout (a) install new timer");
866 		cinfo = arg;
867 		if ((ip_xprt_setup(hdl, arg) != 0) && !ip_quit)
868 			cinfo->ipc_timer = fmd_timer_install(
869 				hdl, cinfo, NULL, ip_sleep);
870 		else
871 			cinfo->ipc_timer = NULL;
872 	} else {
873 		ipx = arg;
874 		if (ipx->ipx_flags & FMD_XPRT_SUSPENDED) {
875 			ipx->ipx_spnd_timer = NULL;
876 			ip_debug(IP_DEBUG_FINE, "timer %d waking ipx %p",
877 				(int)id, arg);
878 			ipx->ipx_flags &= ~FMD_XPRT_SUSPENDED;
879 			fmd_xprt_resume(hdl, ipx->ipx_xprt);
880 		} else {
881 			ip_debug(IP_DEBUG_FINE, "timer %d closing ipx %p",
882 				(int)id, arg);
883 			cinfo = ipx->ipx_cinfo;
884 			install_timer = (ipx->ipx_flags & FMD_XPRT_ACCEPT) !=
885 				FMD_XPRT_ACCEPT;
886 			ip_xprt_destroy(ipx);
887 			if (install_timer && !ip_quit)
888 				cinfo->ipc_timer = fmd_timer_install(
889 					hdl, cinfo, NULL, ip_sleep);
890 			else
891 				cinfo->ipc_timer = NULL;
892 		}
893 	}
894 }
895 
896 static const fmd_prop_t fmd_props[] = {
897 	{ "ip_authority", FMD_TYPE_STRING, NULL },
898 	{ "ip_bufsize", FMD_TYPE_SIZE, "4k" },
899 	{ "ip_burp", FMD_TYPE_TIME, "0" },
900 	{ "ip_enable", FMD_TYPE_BOOL, "false" },
901 	{ "ip_mtbf", FMD_TYPE_INT32, "0" },
902 	{ "ip_external", FMD_TYPE_BOOL, "true" },
903 	{ "ip_no_remote_repair", FMD_TYPE_BOOL, "true" },
904 	{ "ip_hconly", FMD_TYPE_BOOL, "false" },
905 	{ "ip_rdonly", FMD_TYPE_BOOL, "false" },
906 	{ "ip_hc_present_only", FMD_TYPE_BOOL, "false" },
907 	{ "ip_domain_name", FMD_TYPE_STRING, NULL },
908 	{ "ip_port", FMD_TYPE_STRING, "664" },
909 	{ "ip_qlen", FMD_TYPE_INT32, "32" },
910 	{ "ip_retry", FMD_TYPE_INT32, "-1" },	    /* -1=forever */
911 	{ "ip_server", FMD_TYPE_STRING, NULL },	    /* server name */
912 	{ "ip_sleep", FMD_TYPE_TIME, "10s" },
913 	{ "ip_translate", FMD_TYPE_BOOL, "false" },
914 	{ "ip_bind_addr", FMD_TYPE_STRING, NULL },  /* network interface addr */
915 	{ "ip_debug_level", FMD_TYPE_INT32, "1" },  /* debug levels 0-3 */
916 	{ NULL, 0, NULL }
917 };
918 
919 static const fmd_hdl_ops_t fmd_ops = {
920 	ip_fmdo_recv,		/* fmdo_recv */
921 	ip_timeout,		/* fmdo_timeout */
922 	NULL,			/* fmdo_close */
923 	NULL,			/* fmdo_stats */
924 	NULL,			/* fmdo_gc */
925 	ip_fmdo_send,		/* fmdo_send */
926 };
927 
928 static const fmd_hdl_info_t fmd_info = {
929 	"IP Transport Agent", "1.0", &fmd_ops, fmd_props
930 };
931 
932 /*
933  * Initialize the ip-transport module as either a server or a client.  Note
934  * that the ip-transport module is not enabled by default under Solaris:
935  * at present we require a developer or tool to "setprop ip_enable true".
936  * If ip-transport is needed in the future out-of-the-box on one or more Sun
937  * platforms, the code to check 'ip_enable' should be replaced with:
938  *
939  * (a) configuring ip-transport to operate in client mode by default,
940  * (b) a platform-specific configuration mechanism, or
941  * (c) a means to assure security and prevent denial-of-service attacks.
942  *
943  * Note that (c) is only an issue when the transport module operates
944  * in server mode (i.e. with the ip_server property set to NULL) on a
945  * generic Solaris system which may be exposed directly to the Internet.
946  * The property ip_bind_addr can be used to define a private network interface
947  * to use so that the service is not exposed to the Internet.
948  */
949 void
950 _fmd_init(fmd_hdl_t *hdl)
951 {
952 	char *addr, *auth, *p, *q, *r, *s;
953 	int err;
954 
955 	if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0)
956 		return; /* failed to register handle */
957 
958 	if (fmd_prop_get_int32(hdl, "ip_enable") == FMD_B_FALSE) {
959 		fmd_hdl_unregister(hdl);
960 		return;
961 	}
962 
963 	(void) fmd_stat_create(hdl, FMD_STAT_NOALLOC,
964 	    sizeof (ip_stat) / sizeof (fmd_stat_t), (fmd_stat_t *)&ip_stat);
965 
966 	ip_hdl = hdl;
967 	(void) pthread_mutex_init(&ip_lock, NULL);
968 
969 	ip_burp = fmd_prop_get_int64(hdl, "ip_burp");
970 	ip_mtbf = fmd_prop_get_int32(hdl, "ip_mtbf");
971 	ip_external = fmd_prop_get_int32(hdl, "ip_external");
972 	ip_no_remote_repair = fmd_prop_get_int32(hdl, "ip_no_remote_repair");
973 	ip_hconly = fmd_prop_get_int32(hdl, "ip_hconly");
974 	ip_rdonly = fmd_prop_get_int32(hdl, "ip_rdonly");
975 	ip_hc_present_only = fmd_prop_get_int32(hdl, "ip_hc_present_only");
976 	ip_domain_name = fmd_prop_get_string(hdl, "ip_domain_name");
977 	ip_qlen = fmd_prop_get_int32(hdl, "ip_qlen");
978 	ip_retry = fmd_prop_get_int32(hdl, "ip_retry");
979 	ip_sleep = fmd_prop_get_int64(hdl, "ip_sleep");
980 	ip_translate = fmd_prop_get_int32(hdl, "ip_translate");
981 
982 	ip_size = (size_t)fmd_prop_get_int64(hdl, "ip_bufsize");
983 	ip_size = MAX(ip_size, sizeof (ip_hdr_t));
984 	ip_port = fmd_prop_get_string(hdl, "ip_port");
985 	ip_debug_level = fmd_prop_get_int32(hdl, "ip_debug_level");
986 
987 	ip_conns = NULL;
988 	addr = fmd_prop_get_string(hdl, "ip_bind_addr");
989 	if (addr != NULL) {
990 		err = ip_setup_addrs(addr, B_TRUE);
991 		if (err) {
992 			fmd_hdl_abort(hdl, "Unable to setup ip_bind_addr %s",
993 			    addr);
994 			return;
995 		}
996 		fmd_prop_free_string(hdl, addr);
997 	}
998 	addr = fmd_prop_get_string(hdl, "ip_server");
999 	if (addr != NULL) {
1000 		err = ip_setup_addrs(addr, B_FALSE);
1001 		if (err) {
1002 			fmd_hdl_abort(hdl, "Unable to setup ip_server %s",
1003 			    addr);
1004 			return;
1005 		}
1006 		fmd_prop_free_string(hdl, addr);
1007 	}
1008 
1009 	/*
1010 	 * If no specific connecitons configured then set up general server
1011 	 * listening on all network ports.
1012 	 */
1013 	if (ip_conns == NULL) {
1014 		if (ip_setup_addr(NULL, B_TRUE) != 0) {
1015 			fmd_hdl_abort(hdl, "Unable to setup server.");
1016 			return;
1017 		}
1018 	}
1019 
1020 	/*
1021 	 * If ip_authority is set, tokenize this string and turn it into an
1022 	 * FMA authority represented as a name-value pair list.  We will use
1023 	 * this authority for all transports created by this module.  If
1024 	 * ip_authority isn't set, we'll compute authorities on the fly.
1025 	 */
1026 	if ((auth = fmd_prop_get_string(hdl, "ip_authority")) != NULL) {
1027 		(void) nvlist_alloc(&ip_auth, 0, 0);
1028 		(void) nvlist_add_uint8(ip_auth,
1029 		    FM_VERSION, FM_FMRI_AUTH_VERSION);
1030 
1031 		s = alloca(strlen(auth) + 1);
1032 		(void) strcpy(s, auth);
1033 		fmd_prop_free_string(hdl, auth);
1034 
1035 		for (p = strtok_r(s, ",", &q); p != NULL;
1036 		    p = strtok_r(NULL, ",", &q)) {
1037 
1038 			if ((r = strchr(p, '=')) == NULL) {
1039 				ip_addr_cleanup();
1040 				fmd_hdl_abort(hdl, "ip_authority element <%s> "
1041 				    "must be in <name>=<value> form\n", p);
1042 			}
1043 
1044 			*r = '\0';
1045 			(void) nvlist_add_string(ip_auth, p, r + 1);
1046 			*r = '=';
1047 		}
1048 	}
1049 
1050 	ip_start_connections();
1051 }
1052 
1053 void
1054 _fmd_fini(fmd_hdl_t *hdl)
1055 {
1056 	ip_quit++; /* set quit flag before signalling auxiliary threads */
1057 
1058 	while (ip_xps != NULL)
1059 		ip_xprt_destroy(ip_xps);
1060 
1061 	if (ip_auth != NULL)
1062 		nvlist_free(ip_auth);
1063 
1064 	ip_addr_cleanup();
1065 
1066 	if (ip_domain_name != NULL)
1067 		fmd_prop_free_string(ip_hdl, ip_domain_name);
1068 
1069 	fmd_hdl_unregister(hdl);
1070 }
1071