xref: /illumos-gate/usr/src/cmd/fm/modules/common/ip-transport/ip.c (revision 20a7641f9918de8574b8b3b47dbe35c4bfc78df1)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/socket.h>
28 #include <sys/sysmacros.h>
29 #include <sys/fm/protocol.h>
30 
31 #include <netinet/in.h>
32 #include <arpa/inet.h>
33 
34 #include <strings.h>
35 #include <unistd.h>
36 #include <pthread.h>
37 #include <fcntl.h>
38 #include <errno.h>
39 #include <netdb.h>
40 #include <poll.h>
41 #include <stdarg.h>
42 
43 #include <fm/fmd_api.h>
44 
45 #define	IP_MAGIC	"\177FMA" /* magic string identifying a packet header */
46 #define	IP_MAGLEN	4	/* length of magic string */
47 #define	IP_DEBUG_OFF	0	/* No informational debugging printed */
48 #define	IP_DEBUG_FINE	1	/* Basic debug information printed (default) */
49 #define	IP_DEBUG_FINER	2	/* More debug information printed. */
50 #define	IP_DEBUG_FINEST	3	/* All debug information printed */
51 
52 typedef struct ip_hdr {
53 	char iph_magic[IP_MAGLEN]; /* magic string */
54 	uint32_t iph_size;	/* packed size */
55 } ip_hdr_t;
56 
57 typedef struct ip_buf {
58 	void *ipb_buf;		/* data buffer */
59 	size_t ipb_size;	/* size of buffer */
60 } ip_buf_t;
61 
62 typedef struct ip_cinfo {	    /* Connection specific information */
63 	struct addrinfo *ipc_addr;  /* Connection address(es) */
64 	char *ipc_name;		    /* The name of the server or interface */
65 	int ipc_retry;		    /* The number of connection retries */
66 	boolean_t ipc_accept;	    /* Will connection accept clients */
67 	id_t ipc_timer;		    /* FMD timer id for connection */
68 	struct ip_cinfo *ipc_next;  /* Next conneciton in list */
69 } ip_cinfo_t;
70 
71 typedef struct ip_xprt {
72 	fmd_xprt_t *ipx_xprt;	/* transport handle */
73 	int ipx_flags;		/* transport flags */
74 	int ipx_fd;		/* socket file descriptor */
75 	int ipx_done;		/* flag indicating connection closed */
76 	pthread_t ipx_tid;	/* recv-side auxiliary thread */
77 	ip_buf_t ipx_sndbuf;	/* buffer for sending events */
78 	ip_buf_t ipx_rcvbuf;	/* buffer for receiving events */
79 	ip_cinfo_t *ipx_cinfo;	/* info for reconnect */
80 	id_t ipx_spnd_timer;	/* connection suspend timer */
81 	char *ipx_addr;		/* address:port of remote connection */
82 	struct ip_xprt *ipx_next;	/* next ip_xprt in global list */
83 } ip_xprt_t;
84 
85 #define	IPX_ID(a) ((a)->ipx_addr == NULL ? "(Not connected)" : (a)->ipx_addr)
86 
87 typedef struct ip_stat {
88 	fmd_stat_t ips_accfail;	/* failed accepts */
89 	fmd_stat_t ips_badmagic; /* invalid packet headers */
90 	fmd_stat_t ips_packfail; /* failed packs */
91 	fmd_stat_t ips_unpackfail; /* failed unpacks */
92 } ip_stat_t;
93 
94 static void ip_xprt_create(fmd_xprt_t *, int, int, ip_cinfo_t *, char *);
95 static void ip_xprt_destroy(ip_xprt_t *);
96 
97 static ip_stat_t ip_stat = {
98 	{ "accfail", FMD_TYPE_UINT64, "failed accepts" },
99 	{ "badmagic", FMD_TYPE_UINT64, "invalid packet headers" },
100 	{ "packfail", FMD_TYPE_UINT64, "failed packs" },
101 	{ "unpackfail", FMD_TYPE_UINT64, "failed unpacks" },
102 };
103 
104 static fmd_hdl_t *ip_hdl;	/* module handle */
105 static pthread_mutex_t ip_lock;	/* lock for ip_xps list */
106 static ip_xprt_t *ip_xps;	/* list of active transports */
107 static pthread_mutex_t ip_conns_lock;	/* lock for ip_conns list */
108 static ip_cinfo_t *ip_conns;	/* list of all configured connection info */
109 static nvlist_t *ip_auth;	/* authority to use for transport(s) */
110 static size_t ip_size;		/* default buffer size */
111 static volatile int ip_quit;	/* signal to quit */
112 static int ip_qlen;		/* queue length for listen(3SOCKET) */
113 static int ip_mtbf;		/* mtbf for simulating packet drop */
114 static int ip_external;		/* set transport to be "external" */
115 static int ip_no_remote_repair;	/* disallow remote repair */
116 static int ip_hconly;		/* only cache faults that are hc-scheme */
117 static int ip_rdonly;		/* force transport to be rdonly */
118 static int ip_hc_present_only;	/* only cache faults if hc-scheme and present */
119 static char *ip_domain_name;	/* set domain name for received list.suspects */
120 static hrtime_t ip_burp;	/* make mtbf slower by adding this much delay */
121 static int ip_translate;	/* call fmd_xprt_translate() before sending */
122 static char *ip_port;		/* port to connect to (or bind to if server) */
123 static int ip_retry;		/* retry count for ip_xprt_setup() -1=forever */
124 static hrtime_t ip_sleep;	/* sleep delay for ip_xprt_setup() */
125 static int ip_debug_level;	/* level for printing debug messages */
126 
127 /*
128  * Prints a debug message to the fmd debug framework if the debug level is set
129  * to at least the given level.
130  */
131 static void
132 ip_debug(int level, char *fmt, ...)
133 {
134 	if (ip_debug_level >= level) {
135 		va_list args;
136 		va_start(args, fmt);
137 		fmd_hdl_vdebug(ip_hdl, fmt, args);
138 		va_end(args);
139 	}
140 }
141 
142 /*
143  * Allocate space in ipx_sndbuf for a header and a packed XDR encoding of
144  * the specified nvlist, and then send the buffer to our remote peer.
145  */
146 static int
147 ip_fmdo_send(fmd_hdl_t *hdl, fmd_xprt_t *xp, fmd_event_t *ep, nvlist_t *nvl)
148 {
149 	ip_xprt_t *ipx;
150 	size_t size, nvsize;
151 	char *buf, *nvbuf;
152 	ip_hdr_t *iph;
153 	ssize_t r, n;
154 	int err;
155 
156 	if (xp == NULL) {
157 		ip_debug(IP_DEBUG_FINE, "ip_fmdo_send failed: xp=NULL\n");
158 		return (FMD_SEND_FAILED);
159 	}
160 	ipx = fmd_xprt_getspecific(hdl, xp);
161 
162 	/*
163 	 * For testing purposes, if ip_mtbf is non-zero, use this to pseudo-
164 	 * randomly simulate the need for retries.  If ip_burp is also set,
165 	 * then we also suspend the transport for a bit and wake it up again.
166 	 */
167 	if (ip_mtbf != 0 && gethrtime() % ip_mtbf == 0) {
168 		if (ip_burp != 0) {
169 			ip_debug(IP_DEBUG_FINE, "burping ipx %s", IPX_ID(ipx));
170 			ipx->ipx_flags |= FMD_XPRT_SUSPENDED;
171 			ipx->ipx_spnd_timer = fmd_timer_install(
172 			    ip_hdl, ipx, NULL, ip_burp);
173 			fmd_xprt_suspend(ip_hdl, xp);
174 		}
175 		return (FMD_SEND_RETRY);
176 	}
177 
178 	if (ip_translate && (nvl = fmd_xprt_translate(hdl, xp, ep)) == NULL) {
179 		fmd_hdl_error(hdl, "failed to translate event %p", (void *)ep);
180 		return (FMD_SEND_FAILED);
181 	}
182 
183 	(void) nvlist_size(nvl, &nvsize, NV_ENCODE_XDR);
184 	size = r = sizeof (ip_hdr_t) + nvsize;
185 
186 	if (ipx->ipx_sndbuf.ipb_size < size) {
187 		fmd_hdl_free(hdl, ipx->ipx_sndbuf.ipb_buf,
188 		    ipx->ipx_sndbuf.ipb_size);
189 		ipx->ipx_sndbuf.ipb_size = P2ROUNDUP(size, 16);
190 		ipx->ipx_sndbuf.ipb_buf = fmd_hdl_alloc(hdl,
191 		    ipx->ipx_sndbuf.ipb_size, FMD_SLEEP);
192 	}
193 
194 	buf = ipx->ipx_sndbuf.ipb_buf;
195 	iph = (ip_hdr_t *)(uintptr_t)buf;
196 	nvbuf = buf + sizeof (ip_hdr_t);
197 
198 	bcopy(IP_MAGIC, iph->iph_magic, IP_MAGLEN);
199 	iph->iph_size = htonl(nvsize);
200 	err = nvlist_pack(nvl, &nvbuf, &nvsize, NV_ENCODE_XDR, 0);
201 
202 	if (ip_translate)
203 		nvlist_free(nvl);
204 
205 	if (err != 0) {
206 		fmd_hdl_error(ip_hdl, "failed to pack event for "
207 		    "transport %p: %s\n", (void *)ipx->ipx_xprt, strerror(err));
208 		ip_stat.ips_packfail.fmds_value.ui64++;
209 		return (FMD_SEND_FAILED);
210 	}
211 
212 	while (!ip_quit && r != 0) {
213 		if ((n = send(ipx->ipx_fd, buf, r, 0)) < 0) {
214 			if (errno != EINTR && errno != EWOULDBLOCK) {
215 				ip_debug(IP_DEBUG_FINE,
216 				    "failed to send to %s", IPX_ID(ipx));
217 				return (FMD_SEND_FAILED);
218 			}
219 			continue;
220 		}
221 		buf += n;
222 		r -= n;
223 	}
224 
225 	ip_debug(IP_DEBUG_FINEST, "Sent event %d bytes to %s",
226 	    size, IPX_ID(ipx));
227 	return (FMD_SEND_SUCCESS);
228 }
229 
230 /*
231  * Sends events over transports that are configured read only.  When the module
232  * is in read only mode it will receive all events and only send events that
233  * have a subscription set.
234  *
235  * The configuration file will have to set prop ip_rdonly true and also
236  * subscribe for events that are desired to be sent over the transport in order
237  * for this function to be used.
238  */
239 /* ARGSUSED */
240 static void
241 ip_fmdo_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
242 {
243 	int err;
244 	ip_xprt_t *ipx;
245 
246 	if (ip_rdonly && !ip_quit) {
247 		(void) pthread_mutex_lock(&ip_lock);
248 
249 		for (ipx = ip_xps; ipx != NULL; ipx = ipx->ipx_next) {
250 			err = ip_fmdo_send(hdl, ipx->ipx_xprt, ep, nvl);
251 			while (FMD_SEND_RETRY == err) {
252 				err = ip_fmdo_send(hdl, ipx->ipx_xprt, ep, nvl);
253 			}
254 		}
255 		(void) pthread_mutex_unlock(&ip_lock);
256 	}
257 }
258 
259 /*
260  * Receive a chunk of data of the specified size from our remote peer.  The
261  * data is received into ipx_rcvbuf, and then a pointer to the buffer is
262  * returned.  NOTE: The data is only valid until the next call to ip_xprt_recv.
263  * If the connection breaks or ip_quit is set during receive, NULL is returned.
264  */
265 static void *
266 ip_xprt_recv(ip_xprt_t *ipx, size_t size)
267 {
268 	char *buf = ipx->ipx_rcvbuf.ipb_buf;
269 	ssize_t n, r = size;
270 
271 	if (ipx->ipx_rcvbuf.ipb_size < size) {
272 		fmd_hdl_free(ip_hdl, ipx->ipx_rcvbuf.ipb_buf,
273 		    ipx->ipx_rcvbuf.ipb_size);
274 		ipx->ipx_rcvbuf.ipb_size = P2ROUNDUP(size, 16);
275 		ipx->ipx_rcvbuf.ipb_buf = buf = fmd_hdl_alloc(ip_hdl,
276 		    ipx->ipx_rcvbuf.ipb_size, FMD_SLEEP);
277 	}
278 
279 	while (!ip_quit && r != 0) {
280 		if ((n = recv(ipx->ipx_fd, buf, r, MSG_WAITALL)) == 0) {
281 			ipx->ipx_done++;
282 			return (NULL);
283 		}
284 
285 		if (n < 0) {
286 			if (errno != EINTR && errno != EWOULDBLOCK) {
287 				ip_debug(IP_DEBUG_FINE,
288 				    "failed to recv on ipx %s", IPX_ID(ipx));
289 			}
290 			continue;
291 		}
292 		/* Reset retry counter after a successful connection */
293 		if (ipx->ipx_cinfo) {
294 			ipx->ipx_cinfo->ipc_retry = ip_retry;
295 		}
296 
297 		buf += n;
298 		r -= n;
299 	}
300 
301 	return (r ? NULL: ipx->ipx_rcvbuf.ipb_buf);
302 }
303 
304 /*
305  * Sets the address/port of the remote connection in the connection info struct
306  * This is called after a TCP session has been set up with a known remote
307  * address (sap)
308  */
309 static void
310 ip_xprt_set_addr(ip_xprt_t *ipx, const struct sockaddr *sap)
311 {
312 	const struct sockaddr_in6 *sin6 = (const void *)sap;
313 	const struct sockaddr_in *sin = (const void *)sap;
314 
315 	char buf[INET6_ADDRSTRLEN + 16];
316 	struct in_addr v4addr;
317 	in_port_t port;
318 	int n;
319 
320 	ip_debug(IP_DEBUG_FINER, "Enter ip_xprt_set_addr");
321 
322 	if (sap->sa_family == AF_INET6 &&
323 	    IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
324 		IN6_V4MAPPED_TO_INADDR(&sin6->sin6_addr, &v4addr);
325 		(void) inet_ntop(AF_INET, &v4addr, buf, sizeof (buf));
326 		port = ntohs(sin6->sin6_port);
327 	} else if (sap->sa_family == AF_INET6) {
328 		(void) inet_ntop(AF_INET6, &sin6->sin6_addr, buf, sizeof (buf));
329 		port = ntohs(sin6->sin6_port);
330 	} else {
331 		(void) inet_ntop(AF_INET, &sin->sin_addr, buf, sizeof (buf));
332 		port = ntohs(sin->sin_port);
333 	}
334 
335 	n = strlen(buf);
336 	(void) snprintf(buf + n, sizeof (buf) - n, ":%u", port);
337 
338 	if (ipx->ipx_addr)
339 		fmd_hdl_strfree(ip_hdl, ipx->ipx_addr);
340 	ipx->ipx_addr = fmd_hdl_strdup(ip_hdl, buf, FMD_SLEEP);
341 	ip_debug(IP_DEBUG_FINE, "connection addr is %s on %p",
342 	    ipx->ipx_addr, (void *)ipx);
343 }
344 
345 static nvlist_t *
346 ip_xprt_auth(ip_xprt_t *ipx)
347 {
348 	nvlist_t *nvl;
349 	int err;
350 
351 	ip_debug(IP_DEBUG_FINER, "Enter ip_xprt_auth");
352 
353 	if (ip_auth != NULL)
354 		err = nvlist_dup(ip_auth, &nvl, 0);
355 	else
356 		err = nvlist_alloc(&nvl, 0, 0);
357 
358 	if (err != 0) {
359 		fmd_hdl_abort(ip_hdl, "failed to create nvlist for "
360 		    "authority: %s\n", strerror(err));
361 	}
362 
363 	if (ip_auth != NULL)
364 		return (nvl);
365 
366 	ip_debug(IP_DEBUG_FINE, "ip_authority %s=%s\n",
367 	    FM_FMRI_AUTH_SERVER, ipx->ipx_addr);
368 
369 	(void) nvlist_add_uint8(nvl, FM_VERSION, FM_FMRI_AUTH_VERSION);
370 	(void) nvlist_add_string(nvl, FM_FMRI_AUTH_SERVER, ipx->ipx_addr);
371 
372 	return (nvl);
373 }
374 
375 static void
376 ip_xprt_accept(ip_xprt_t *ipx)
377 {
378 	struct sockaddr_storage sa;
379 	socklen_t salen = sizeof (sa);
380 	fmd_xprt_t *xp;
381 	int fd;
382 
383 	ip_debug(IP_DEBUG_FINER, "Enter ip_xprt_accept");
384 
385 	if ((fd = accept(ipx->ipx_fd, (struct sockaddr *)&sa, &salen)) == -1) {
386 		fmd_hdl_error(ip_hdl, "failed to accept connection");
387 		ip_stat.ips_accfail.fmds_value.ui64++;
388 		return;
389 	}
390 	ip_debug(IP_DEBUG_FINE, "Accepted socket on fd %d", fd);
391 
392 	ip_xprt_set_addr(ipx, (struct sockaddr *)&sa);
393 	xp = fmd_xprt_open(ip_hdl, ipx->ipx_flags,
394 	    ip_xprt_auth(ipx), NULL);
395 	ip_xprt_create(xp, fd, ipx->ipx_flags, ipx->ipx_cinfo, ipx->ipx_addr);
396 }
397 
398 static void
399 ip_xprt_recv_event(ip_xprt_t *ipx)
400 {
401 	ip_hdr_t *iph;
402 	nvlist_t *nvl;
403 	size_t size;
404 	void *buf;
405 	int err;
406 
407 	if ((iph = ip_xprt_recv(ipx, sizeof (ip_hdr_t))) == NULL)
408 		return; /* connection broken */
409 
410 	if (bcmp(iph->iph_magic, IP_MAGIC, IP_MAGLEN) != 0) {
411 		fmd_hdl_error(ip_hdl,
412 		    "invalid hdr magic %x.%x.%x.%x from transport %s\n",
413 		    iph->iph_magic[0], iph->iph_magic[1], iph->iph_magic[2],
414 		    iph->iph_magic[3], IPX_ID(ipx));
415 		ip_stat.ips_badmagic.fmds_value.ui64++;
416 		return;
417 	}
418 
419 	size = ntohl(iph->iph_size);
420 
421 	if ((buf = ip_xprt_recv(ipx, size)) == NULL)
422 		return; /* connection broken */
423 
424 	if ((err = nvlist_unpack(buf, size, &nvl, 0)) != 0) {
425 		fmd_hdl_error(ip_hdl, "failed to unpack event from "
426 		    "transport %s: %s\n",
427 		    IPX_ID(ipx), strerror(err));
428 		ip_stat.ips_unpackfail.fmds_value.ui64++;
429 	} else {
430 		if (ip_domain_name)
431 			fmd_xprt_add_domain(ip_hdl, nvl, ip_domain_name);
432 		fmd_xprt_post(ip_hdl, ipx->ipx_xprt, nvl, 0);
433 	}
434 
435 	if (fmd_xprt_error(ip_hdl, ipx->ipx_xprt)) {
436 		fmd_hdl_error(ip_hdl, "protocol error on transport %p",
437 		    (void *)ipx->ipx_xprt);
438 		ipx->ipx_done++;
439 	}
440 	ip_debug(IP_DEBUG_FINEST, "Recv event %d bytes from %s",
441 	    size, IPX_ID(ipx));
442 }
443 
444 static void
445 ip_xprt_thread(void *arg)
446 {
447 	ip_xprt_t *ipx = arg;
448 	struct sockaddr_storage sa;
449 	socklen_t salen = sizeof (sa);
450 	struct pollfd pfd;
451 
452 	ip_debug(IP_DEBUG_FINER, "Enter ip_xprt_thread");
453 
454 	while (!ip_quit && !ipx->ipx_done) {
455 		if (ipx->ipx_xprt != NULL || (ipx->ipx_flags & FMD_XPRT_ACCEPT))
456 			pfd.events = POLLIN;
457 		else
458 			pfd.events = POLLOUT;
459 
460 		pfd.fd = ipx->ipx_fd;
461 		pfd.revents = 0;
462 
463 		if (poll(&pfd, 1, -1) <= 0)
464 			continue; /* loop around and check ip_quit */
465 
466 		if (pfd.revents & (POLLHUP | POLLERR)) {
467 			ip_debug(IP_DEBUG_FINE, "hangup fd %d\n", ipx->ipx_fd);
468 			break;
469 		}
470 
471 		if (pfd.revents & POLLOUT) {
472 			/*
473 			 * Once we're connected, there's no reason to have our
474 			 * calls to recv() and send() be non-blocking since we
475 			 * we have separate threads for each: clear O_NONBLOCK.
476 			 */
477 			(void) fcntl(ipx->ipx_fd, F_SETFL,
478 			    fcntl(ipx->ipx_fd, F_GETFL, 0) & ~O_NONBLOCK);
479 
480 			if (getpeername(ipx->ipx_fd, (struct sockaddr *)&sa,
481 			    &salen) != 0) {
482 				ip_debug(IP_DEBUG_FINE,
483 				    "Not connected, no remote name for fd %d. "
484 				    " Will retry.",
485 				    ipx->ipx_fd);
486 				bzero(&sa, sizeof (sa));
487 				break;
488 			}
489 			ip_xprt_set_addr(ipx, (struct sockaddr *)&sa);
490 			ipx->ipx_xprt = fmd_xprt_open(ip_hdl, ipx->ipx_flags,
491 			    ip_xprt_auth(ipx), ipx);
492 
493 			ip_debug(IP_DEBUG_FINE, "connect fd %d ipx %p",
494 			    ipx->ipx_fd, (void *)ipx);
495 			continue;
496 		}
497 
498 		if (pfd.revents & POLLIN) {
499 			if (ipx->ipx_xprt == NULL)
500 				ip_xprt_accept(ipx);
501 			else
502 				ip_xprt_recv_event(ipx);
503 		}
504 	}
505 
506 	ipx->ipx_cinfo->ipc_timer = fmd_timer_install(ip_hdl, ipx, NULL, 0);
507 	ip_debug(IP_DEBUG_FINE, "close fd %d (timer %d)", ipx->ipx_fd,
508 	    (int)ipx->ipx_cinfo->ipc_timer);
509 }
510 
511 static void
512 ip_xprt_create(fmd_xprt_t *xp, int fd, int flags, ip_cinfo_t *cinfo, char *addr)
513 {
514 	ip_xprt_t *ipx = fmd_hdl_zalloc(ip_hdl, sizeof (ip_xprt_t), FMD_SLEEP);
515 
516 	ip_debug(IP_DEBUG_FINER, "Enter ip_xprt_create %p", (void *)ipx);
517 
518 	ipx->ipx_xprt = xp;
519 	ipx->ipx_flags = flags;
520 	ipx->ipx_fd = fd;
521 	ipx->ipx_tid = fmd_thr_create(ip_hdl, ip_xprt_thread, ipx);
522 	ipx->ipx_cinfo = cinfo;
523 	ipx->ipx_addr = fmd_hdl_strdup(ip_hdl, addr, FMD_SLEEP);
524 
525 	if (ipx->ipx_xprt != NULL)
526 		fmd_xprt_setspecific(ip_hdl, ipx->ipx_xprt, ipx);
527 
528 	(void) pthread_mutex_lock(&ip_lock);
529 
530 	ipx->ipx_next = ip_xps;
531 	ip_xps = ipx;
532 
533 	(void) pthread_mutex_unlock(&ip_lock);
534 }
535 
536 static void
537 ip_xprt_destroy(ip_xprt_t *ipx)
538 {
539 	ip_xprt_t *ipp, **ppx = &ip_xps;
540 
541 	ip_debug(IP_DEBUG_FINER, "Enter ip_xprt_destory %s %p",
542 	    IPX_ID(ipx), (void *)ipx);
543 
544 	(void) pthread_mutex_lock(&ip_lock);
545 
546 	for (ipp = *ppx; ipp != NULL; ipp = ipp->ipx_next) {
547 		if (ipp != ipx)
548 			ppx = &ipp->ipx_next;
549 		else
550 			break;
551 	}
552 
553 	if (ipp != ipx) {
554 		(void) pthread_mutex_unlock(&ip_lock);
555 		fmd_hdl_abort(ip_hdl, "ipx %p not on xps list\n", (void *)ipx);
556 	}
557 
558 	*ppx = ipx->ipx_next;
559 	ipx->ipx_next = NULL;
560 
561 	(void) pthread_mutex_unlock(&ip_lock);
562 
563 	if (ipx->ipx_spnd_timer)
564 		fmd_timer_remove(ip_hdl, ipx->ipx_spnd_timer);
565 
566 	fmd_thr_signal(ip_hdl, ipx->ipx_tid);
567 	fmd_thr_destroy(ip_hdl, ipx->ipx_tid);
568 
569 	if (ipx->ipx_xprt != NULL)
570 		fmd_xprt_close(ip_hdl, ipx->ipx_xprt);
571 
572 	fmd_hdl_free(ip_hdl, ipx->ipx_sndbuf.ipb_buf, ipx->ipx_sndbuf.ipb_size);
573 	fmd_hdl_free(ip_hdl, ipx->ipx_rcvbuf.ipb_buf, ipx->ipx_rcvbuf.ipb_size);
574 
575 	(void) close(ipx->ipx_fd);
576 	if (ipx->ipx_addr) {
577 		fmd_hdl_strfree(ip_hdl, ipx->ipx_addr);
578 		ipx->ipx_addr = NULL;
579 	}
580 	fmd_hdl_free(ip_hdl, ipx, sizeof (ip_xprt_t));
581 }
582 
583 /*
584  * Loop through the addresses in the connection info structure that were
585  * created by getaddrinfo() in ip_setup_addr during initialization (_fmd_init)
586  * and for each one attempt to create a socket and initialize it.  If we are
587  * successful, return zero.  If we fail, we check ip_retry: if it is non-zero
588  * we return the last errno and let our caller retry ip_xprt_setup() later.  If
589  * ip_retry reaches zero, we call fmd_hdl_abort() with an appropriate message.
590  */
591 static int
592 ip_xprt_setup(fmd_hdl_t *hdl, ip_cinfo_t *cinfo)
593 {
594 	int err, fd, oflags, xflags, optval = 1;
595 	struct addrinfo *aip;
596 	const char *s1, *s2;
597 	struct addrinfo *ail = cinfo->ipc_addr;
598 
599 	ip_debug(IP_DEBUG_FINER, "Enter ip_xprt_setup %s\n",
600 	    cinfo->ipc_name == NULL ? "localhost" : cinfo->ipc_name);
601 
602 	/*
603 	 * Set up flags as specified in the .conf file. Note that these are
604 	 * mostly only used for testing purposes, allowing the transport to
605 	 * be set up in various modes.
606 	 */
607 	xflags = (ip_rdonly == FMD_B_TRUE) ? FMD_XPRT_RDONLY : FMD_XPRT_RDWR;
608 	if (cinfo->ipc_accept)
609 		xflags |= FMD_XPRT_ACCEPT;
610 	if (ip_external == FMD_B_TRUE)
611 		xflags |= FMD_XPRT_EXTERNAL;
612 	if (ip_no_remote_repair == FMD_B_TRUE)
613 		xflags |= FMD_XPRT_NO_REMOTE_REPAIR;
614 	if (ip_hconly == FMD_B_TRUE)
615 		xflags |= FMD_XPRT_HCONLY;
616 	if (ip_hc_present_only == FMD_B_TRUE)
617 		xflags |= FMD_XPRT_HC_PRESENT_ONLY;
618 
619 	for (aip = ail; aip != NULL; aip = aip->ai_next) {
620 		if (aip->ai_family != AF_INET && aip->ai_family != AF_INET6)
621 			continue; /* ignore anything that isn't IPv4 or IPv6 */
622 
623 		if ((fd = socket(aip->ai_family,
624 		    aip->ai_socktype, aip->ai_protocol)) == -1) {
625 			err = errno;
626 			continue;
627 		}
628 
629 		oflags = fcntl(fd, F_GETFL, 0);
630 		(void) fcntl(fd, F_SETFL, oflags | O_NONBLOCK);
631 
632 		if (xflags & FMD_XPRT_ACCEPT) {
633 			err = setsockopt(fd, SOL_SOCKET,
634 			    SO_REUSEADDR, &optval, sizeof (optval)) != 0 ||
635 			    bind(fd, aip->ai_addr, aip->ai_addrlen) != 0 ||
636 			    listen(fd, ip_qlen) != 0;
637 		} else {
638 			err = connect(fd, aip->ai_addr, aip->ai_addrlen);
639 			if (err)
640 				err = errno;
641 			if (err == EINPROGRESS)
642 				err = 0;
643 		}
644 
645 		if (err == 0) {
646 			ip_xprt_create(NULL, fd, xflags, cinfo, NULL);
647 			ip_debug(IP_DEBUG_FINER, "Exit ip_xprt_setup");
648 			return (0);
649 		}
650 
651 		ip_debug(IP_DEBUG_FINE, "Error=%d errno=%d", err, errno);
652 
653 		err = errno;
654 		(void) close(fd);
655 	}
656 
657 	if (cinfo->ipc_name != NULL) {
658 		s1 = "failed to connect to";
659 		s2 = cinfo->ipc_name;
660 	} else {
661 		s1 = "failed to listen on";
662 		s2 = ip_port;
663 	}
664 
665 	if (err == EACCES || cinfo->ipc_retry-- == 0)
666 		fmd_hdl_abort(hdl, "%s %s: %s\n", s1, s2, strerror(err));
667 
668 	ip_debug(IP_DEBUG_FINE, "%s %s: %s (will retry)\n",
669 	    s1, s2, strerror(err));
670 	ip_debug(IP_DEBUG_FINER, "Exit ip_xprt_setup");
671 	return (err);
672 }
673 
674 /*
675  * Free address based resources
676  */
677 static void
678 ip_addr_cleanup()
679 {
680 	ip_cinfo_t *conn;
681 
682 	(void) pthread_mutex_lock(&ip_conns_lock);
683 	conn = ip_conns;
684 	while (conn != NULL) {
685 		ip_conns = conn->ipc_next;
686 		if (conn->ipc_addr != NULL)
687 			freeaddrinfo(conn->ipc_addr);
688 		conn->ipc_addr = NULL;
689 		if (conn->ipc_timer)
690 			fmd_timer_remove(ip_hdl, conn->ipc_timer);
691 		fmd_hdl_strfree(ip_hdl, conn->ipc_name);
692 		fmd_hdl_free(ip_hdl, conn, sizeof (ip_cinfo_t));
693 		conn = ip_conns;
694 	}
695 	(void) pthread_mutex_unlock(&ip_conns_lock);
696 
697 	fmd_prop_free_string(ip_hdl, ip_port);
698 }
699 
700 static boolean_t
701 ip_argis_cinfo(void *arg)
702 {
703 	boolean_t exists = B_FALSE;
704 	ip_cinfo_t *conn;
705 
706 	(void) pthread_mutex_lock(&ip_conns_lock);
707 	for (conn = ip_conns; conn != NULL; conn = conn->ipc_next) {
708 		if (conn == arg) {
709 			exists = B_TRUE;
710 			break;
711 		}
712 	}
713 	(void) pthread_mutex_unlock(&ip_conns_lock);
714 
715 	return (exists);
716 }
717 
718 
719 static ip_cinfo_t *
720 ip_create_cinfo(char *server, boolean_t accept)
721 {
722 	int err;
723 	struct addrinfo aih;
724 	ip_cinfo_t *cinfo = fmd_hdl_zalloc(
725 	    ip_hdl, sizeof (ip_cinfo_t), FMD_NOSLEEP);
726 
727 	if (cinfo == NULL)
728 		return (NULL);
729 
730 	cinfo->ipc_accept = accept;
731 	cinfo->ipc_retry = ip_retry;
732 	if (server != NULL) {
733 		cinfo->ipc_name = fmd_hdl_strdup(ip_hdl, server, FMD_NOSLEEP);
734 		if (cinfo->ipc_name == NULL) {
735 			fmd_hdl_free(ip_hdl, cinfo, sizeof (ip_cinfo_t));
736 			return (NULL);
737 		}
738 	}
739 
740 	bzero(&aih, sizeof (aih));
741 	aih.ai_flags = AI_ADDRCONFIG;
742 	aih.ai_family = AF_UNSPEC;
743 	aih.ai_socktype = SOCK_STREAM;
744 	if (server != NULL) {
745 		ip_debug(IP_DEBUG_FINE, "resolving %s:%s\n", server, ip_port);
746 	} else {
747 		aih.ai_flags |= AI_PASSIVE;
748 		cinfo->ipc_name = fmd_hdl_strdup(
749 		    ip_hdl, "localhost", FMD_NOSLEEP);
750 		if (cinfo->ipc_name == NULL) {
751 			fmd_hdl_free(ip_hdl, cinfo, sizeof (ip_cinfo_t));
752 			return (NULL);
753 		}
754 	}
755 
756 	err = getaddrinfo(server, ip_port, &aih, &cinfo->ipc_addr);
757 	if (err != 0) {
758 		fmd_hdl_error(ip_hdl, "failed to resolve host %s port %s: %s\n",
759 		    cinfo->ipc_name, ip_port, gai_strerror(err));
760 		cinfo->ipc_addr = NULL;
761 		fmd_hdl_strfree(ip_hdl, cinfo->ipc_name);
762 		fmd_hdl_free(ip_hdl, cinfo, sizeof (ip_cinfo_t));
763 		cinfo = NULL;
764 	}
765 	return (cinfo);
766 }
767 
768 /*
769  * Setup a single ip address for ip connection.
770  * If unable to setup any of the addresses then all addresses will be cleaned up
771  * and non-zero will be returned.
772  */
773 static int
774 ip_setup_addr(char *server, boolean_t accept)
775 {
776 	int err = 0;
777 	ip_cinfo_t *cinfo = ip_create_cinfo(server, accept);
778 
779 	if (cinfo == NULL) {
780 		ip_addr_cleanup();
781 		err++;
782 	} else {
783 		(void) pthread_mutex_lock(&ip_conns_lock);
784 		cinfo->ipc_next = ip_conns;
785 		ip_conns = cinfo;
786 		(void) pthread_mutex_unlock(&ip_conns_lock);
787 	}
788 	return (err);
789 }
790 
791 /*
792  * Setup a ip addresses for an ip connection.  The address can be a comma
793  * separated list of addresses as well.
794  * If unable to setup any of the addresses then all addresses will be cleaned up
795  * and non-zero will be returned.
796  */
797 static int
798 ip_setup_addrs(char *server, boolean_t accept)
799 {
800 	int err = 0;
801 	char *addr = server;
802 	char *p;
803 
804 	for (p = server; *p != '\0'; p++) {
805 		if (*p == ',') {
806 			*p = '\0';
807 			err = ip_setup_addr(addr, accept);
808 			*p = ',';
809 			if (err)
810 				return (err);
811 			addr = ++p;
812 			if (*addr == '\0')
813 				break;
814 		}
815 	}
816 	if (*addr != '\0') {
817 		err = ip_setup_addr(addr, accept);
818 	}
819 	return (err);
820 }
821 
822 /*
823  * Starts all connections for each configured network address.  If there is an
824  * error starting a connection a timer will be started for a retry.
825  */
826 static void
827 ip_start_connections()
828 {
829 	ip_cinfo_t *conn;
830 
831 	(void) pthread_mutex_lock(&ip_conns_lock);
832 	for (conn = ip_conns; conn != NULL; conn = conn->ipc_next) {
833 		if (ip_xprt_setup(ip_hdl, conn) != 0) {
834 			conn->ipc_timer = fmd_timer_install(ip_hdl, conn, NULL,
835 			    ip_sleep);
836 		}
837 	}
838 	(void) pthread_mutex_unlock(&ip_conns_lock);
839 }
840 
841 /*
842  * Timeout handler for the transport module.  We use these types of timeouts:
843  *
844  * (a) arg is ip_cinfo_t: attempt ip_xprt_setup(), re-install timeout to retry
845  * (b) arg is ip_xprt_t, FMD_XPRT_SUSPENDED: call fmd_xprt_resume() on arg
846  * (c) arg is ip_xprt_t, !FMD_XPRT_SUSPENDED: call ip_xprt_destroy() on arg
847  * (d) arg is NULL, ignore as this shouldn't happen
848  *
849  * Case (c) is required as we need to cause the module's main thread, which
850  * runs this timeout handler, to join with the transport's auxiliary thread.
851  * If the connection is a client then a timer will be installed to retry
852  * connecting to the server.
853  */
854 static void
855 ip_timeout(fmd_hdl_t *hdl, id_t id, void *arg)
856 {
857 	int install_timer;
858 	ip_cinfo_t *cinfo;
859 	ip_xprt_t *ipx;
860 
861 	if (arg == NULL) {
862 		fmd_hdl_error(hdl, "ip_timeout failed because hg arg is NULL");
863 	} else if (ip_argis_cinfo(arg)) {
864 		ip_debug(IP_DEBUG_FINER,
865 		    "Enter ip_timeout (a) install new timer");
866 		cinfo = arg;
867 		if ((ip_xprt_setup(hdl, arg) != 0) && !ip_quit)
868 			cinfo->ipc_timer = fmd_timer_install(
869 			    hdl, cinfo, NULL, ip_sleep);
870 		else
871 			cinfo->ipc_timer = 0;
872 	} else {
873 		ipx = arg;
874 		if (ipx->ipx_flags & FMD_XPRT_SUSPENDED) {
875 			ipx->ipx_spnd_timer = 0;
876 			ip_debug(IP_DEBUG_FINE, "timer %d waking ipx %p",
877 			    (int)id, arg);
878 			ipx->ipx_flags &= ~FMD_XPRT_SUSPENDED;
879 			fmd_xprt_resume(hdl, ipx->ipx_xprt);
880 		} else {
881 			ip_debug(IP_DEBUG_FINE, "timer %d closing ipx %p",
882 			    (int)id, arg);
883 			cinfo = ipx->ipx_cinfo;
884 			install_timer = (ipx->ipx_flags & FMD_XPRT_ACCEPT) !=
885 			    FMD_XPRT_ACCEPT;
886 			ip_xprt_destroy(ipx);
887 			if (install_timer && !ip_quit)
888 				cinfo->ipc_timer = fmd_timer_install(
889 				    hdl, cinfo, NULL, ip_sleep);
890 			else
891 				cinfo->ipc_timer = 0;
892 		}
893 	}
894 }
895 
896 static const fmd_prop_t fmd_props[] = {
897 	{ "ip_authority", FMD_TYPE_STRING, NULL },
898 	{ "ip_bufsize", FMD_TYPE_SIZE, "4k" },
899 	{ "ip_burp", FMD_TYPE_TIME, "0" },
900 	{ "ip_enable", FMD_TYPE_BOOL, "false" },
901 	{ "ip_mtbf", FMD_TYPE_INT32, "0" },
902 	{ "ip_external", FMD_TYPE_BOOL, "true" },
903 	{ "ip_no_remote_repair", FMD_TYPE_BOOL, "true" },
904 	{ "ip_hconly", FMD_TYPE_BOOL, "false" },
905 	{ "ip_rdonly", FMD_TYPE_BOOL, "false" },
906 	{ "ip_hc_present_only", FMD_TYPE_BOOL, "false" },
907 	{ "ip_domain_name", FMD_TYPE_STRING, NULL },
908 	{ "ip_port", FMD_TYPE_STRING, "664" },
909 	{ "ip_qlen", FMD_TYPE_INT32, "32" },
910 	{ "ip_retry", FMD_TYPE_INT32, "-1" },	    /* -1=forever */
911 	{ "ip_server", FMD_TYPE_STRING, NULL },	    /* server name */
912 	{ "ip_sleep", FMD_TYPE_TIME, "10s" },
913 	{ "ip_translate", FMD_TYPE_BOOL, "false" },
914 	{ "ip_bind_addr", FMD_TYPE_STRING, NULL },  /* network interface addr */
915 	{ "ip_debug_level", FMD_TYPE_INT32, "1" },  /* debug levels 0-3 */
916 	{ NULL, 0, NULL }
917 };
918 
919 static const fmd_hdl_ops_t fmd_ops = {
920 	ip_fmdo_recv,		/* fmdo_recv */
921 	ip_timeout,		/* fmdo_timeout */
922 	NULL,			/* fmdo_close */
923 	NULL,			/* fmdo_stats */
924 	NULL,			/* fmdo_gc */
925 	ip_fmdo_send,		/* fmdo_send */
926 };
927 
928 static const fmd_hdl_info_t fmd_info = {
929 	"IP Transport Agent", "1.0", &fmd_ops, fmd_props
930 };
931 
932 /*
933  * Initialize the ip-transport module as either a server or a client.  Note
934  * that the ip-transport module is not enabled by default under Solaris:
935  * at present we require a developer or tool to "setprop ip_enable true".
936  * If ip-transport is needed in the future out-of-the-box on one or more Sun
937  * platforms, the code to check 'ip_enable' should be replaced with:
938  *
939  * (a) configuring ip-transport to operate in client mode by default,
940  * (b) a platform-specific configuration mechanism, or
941  * (c) a means to assure security and prevent denial-of-service attacks.
942  *
943  * Note that (c) is only an issue when the transport module operates
944  * in server mode (i.e. with the ip_server property set to NULL) on a
945  * generic Solaris system which may be exposed directly to the Internet.
946  * The property ip_bind_addr can be used to define a private network interface
947  * to use so that the service is not exposed to the Internet.
948  */
949 void
950 _fmd_init(fmd_hdl_t *hdl)
951 {
952 	char *addr, *auth, *p, *q, *r, *s;
953 	int err;
954 
955 	if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0)
956 		return; /* failed to register handle */
957 
958 	if (fmd_prop_get_int32(hdl, "ip_enable") == FMD_B_FALSE) {
959 		fmd_hdl_unregister(hdl);
960 		return;
961 	}
962 
963 	(void) fmd_stat_create(hdl, FMD_STAT_NOALLOC,
964 	    sizeof (ip_stat) / sizeof (fmd_stat_t), (fmd_stat_t *)&ip_stat);
965 
966 	ip_hdl = hdl;
967 	(void) pthread_mutex_init(&ip_lock, NULL);
968 
969 	ip_burp = fmd_prop_get_int64(hdl, "ip_burp");
970 	ip_mtbf = fmd_prop_get_int32(hdl, "ip_mtbf");
971 	ip_external = fmd_prop_get_int32(hdl, "ip_external");
972 	ip_no_remote_repair = fmd_prop_get_int32(hdl, "ip_no_remote_repair");
973 	ip_hconly = fmd_prop_get_int32(hdl, "ip_hconly");
974 	ip_rdonly = fmd_prop_get_int32(hdl, "ip_rdonly");
975 	ip_hc_present_only = fmd_prop_get_int32(hdl, "ip_hc_present_only");
976 	ip_domain_name = fmd_prop_get_string(hdl, "ip_domain_name");
977 	ip_qlen = fmd_prop_get_int32(hdl, "ip_qlen");
978 	ip_retry = fmd_prop_get_int32(hdl, "ip_retry");
979 	ip_sleep = fmd_prop_get_int64(hdl, "ip_sleep");
980 	ip_translate = fmd_prop_get_int32(hdl, "ip_translate");
981 
982 	ip_size = (size_t)fmd_prop_get_int64(hdl, "ip_bufsize");
983 	ip_size = MAX(ip_size, sizeof (ip_hdr_t));
984 	ip_port = fmd_prop_get_string(hdl, "ip_port");
985 	ip_debug_level = fmd_prop_get_int32(hdl, "ip_debug_level");
986 
987 	ip_conns = NULL;
988 	addr = fmd_prop_get_string(hdl, "ip_bind_addr");
989 	if (addr != NULL) {
990 		err = ip_setup_addrs(addr, B_TRUE);
991 		if (err) {
992 			fmd_hdl_abort(hdl, "Unable to setup ip_bind_addr %s",
993 			    addr);
994 			return;
995 		}
996 		fmd_prop_free_string(hdl, addr);
997 	}
998 	addr = fmd_prop_get_string(hdl, "ip_server");
999 	if (addr != NULL) {
1000 		err = ip_setup_addrs(addr, B_FALSE);
1001 		if (err) {
1002 			fmd_hdl_abort(hdl, "Unable to setup ip_server %s",
1003 			    addr);
1004 			return;
1005 		}
1006 		fmd_prop_free_string(hdl, addr);
1007 	}
1008 
1009 	/*
1010 	 * If no specific connecitons configured then set up general server
1011 	 * listening on all network ports.
1012 	 */
1013 	if (ip_conns == NULL) {
1014 		if (ip_setup_addr(NULL, B_TRUE) != 0) {
1015 			fmd_hdl_abort(hdl, "Unable to setup server.");
1016 			return;
1017 		}
1018 	}
1019 
1020 	/*
1021 	 * If ip_authority is set, tokenize this string and turn it into an
1022 	 * FMA authority represented as a name-value pair list.  We will use
1023 	 * this authority for all transports created by this module.  If
1024 	 * ip_authority isn't set, we'll compute authorities on the fly.
1025 	 */
1026 	if ((auth = fmd_prop_get_string(hdl, "ip_authority")) != NULL) {
1027 		(void) nvlist_alloc(&ip_auth, 0, 0);
1028 		(void) nvlist_add_uint8(ip_auth,
1029 		    FM_VERSION, FM_FMRI_AUTH_VERSION);
1030 
1031 		s = strdupa(auth);
1032 		fmd_prop_free_string(hdl, auth);
1033 
1034 		for (p = strtok_r(s, ",", &q); p != NULL;
1035 		    p = strtok_r(NULL, ",", &q)) {
1036 
1037 			if ((r = strchr(p, '=')) == NULL) {
1038 				ip_addr_cleanup();
1039 				fmd_hdl_abort(hdl, "ip_authority element <%s> "
1040 				    "must be in <name>=<value> form\n", p);
1041 			}
1042 
1043 			*r = '\0';
1044 			(void) nvlist_add_string(ip_auth, p, r + 1);
1045 			*r = '=';
1046 		}
1047 	}
1048 
1049 	ip_start_connections();
1050 }
1051 
1052 void
1053 _fmd_fini(fmd_hdl_t *hdl)
1054 {
1055 	ip_quit++; /* set quit flag before signalling auxiliary threads */
1056 
1057 	while (ip_xps != NULL)
1058 		ip_xprt_destroy(ip_xps);
1059 
1060 	nvlist_free(ip_auth);
1061 
1062 	ip_addr_cleanup();
1063 
1064 	if (ip_domain_name != NULL)
1065 		fmd_prop_free_string(ip_hdl, ip_domain_name);
1066 
1067 	fmd_hdl_unregister(hdl);
1068 }
1069