xref: /titanic_52/usr/src/cmd/fm/modules/sun4v/etm/etm.c (revision ec851306d86fc4bd601a05db6d187cac3fb96b26)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 
23 /*
24  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
25  * Use is subject to license terms.
26  */
27 
28 /*
29  * etm.c	FMA Event Transport Module implementation, a plugin of FMD
30  *		for sun4v/Ontario
31  *
32  * plugin for sending/receiving FMA events to/from service processor
33  */
34 
35 #pragma ident	"%Z%%M%	%I%	%E% SMI"
36 
37 /*
38  * --------------------------------- includes --------------------------------
39  */
40 
41 #include <sys/fm/protocol.h>
42 #include <sys/sysevent/eventdefs.h>
43 #include <sys/fm/util.h>
44 #include <netinet/in.h>
45 #include <fm/fmd_api.h>
46 #include <libsysevent.h>
47 
48 #include "etm_xport_api.h"
49 #include "etm_etm_proto.h"
50 #include "etm_impl.h"
51 
52 #include <pthread.h>
53 #include <signal.h>
54 #include <stropts.h>
55 #include <locale.h>
56 #include <strings.h>
57 #include <stdlib.h>
58 #include <unistd.h>
59 #include <limits.h>
60 #include <values.h>
61 #include <alloca.h>
62 #include <errno.h>
63 #include <fcntl.h>
64 #include <time.h>
65 
66 /*
67  * ----------------------------- forward decls -------------------------------
68  */
69 
70 static void
71 etm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class);
72 
73 /*
74  * ------------------------- data structs for FMD ----------------------------
75  */
76 
77 static const fmd_hdl_ops_t fmd_ops = {
78 	etm_recv,	/* fmdo_recv */
79 	NULL,		/* fmdo_timeout */
80 	NULL,		/* fmdo_close */
81 	NULL,		/* fmdo_stats */
82 	NULL,		/* fmdo_gc */
83 };
84 
85 static const fmd_prop_t fmd_props[] = {
86 	{ ETM_PROP_NM_XPORT_ADDRS,	FMD_TYPE_STRING, "" },
87 	{ ETM_PROP_NM_DEBUG_LVL,	FMD_TYPE_INT32, "0" },
88 	{ ETM_PROP_NM_DEBUG_MAX_EV_CNT,	FMD_TYPE_INT32, "-1" },
89 	{ NULL, 0, NULL }
90 };
91 
92 static const fmd_hdl_info_t fmd_info = {
93 	"FMA Event Transport Module", "1.0", &fmd_ops, fmd_props
94 };
95 
96 /*
97  * ----------------------- private consts and defns --------------------------
98  */
99 
100 /* misc buffer for variable sized protocol header fields */
101 
102 #define	ETM_MISC_BUF_SZ	(4 * 1024)
103 
104 /* try limit for IO operations w/ capped exp backoff sleep on retry */
105 
106 /*
107  * Design_Note:	ETM will potentially retry forever IO operations that the
108  *		transport fails with EAGAIN (aka EWOULDBLOCK) rather than
109  *		giving up after some number of seconds. This avoids
110  *		dropping FMA events while the service processor is down,
111  *		but at the risk of pending fmdo_recv() forever and
112  *		overflowing FMD's event queue for ETM.
113  *		A future TBD enhancement would be to always recv
114  *		and send each ETM msg in a single read/write() to reduce
115  *		the risk of failure between ETM msg hdr and body,
116  *		assuming the MTU_SZ is large enough.
117  */
118 
119 #define	ETM_TRY_MAX_CNT		(MAXINT - 1)
120 #define	ETM_TRY_BACKOFF_RATE	(4)
121 #define	ETM_TRY_BACKOFF_CAP	(60)
122 
123 /* protocol transaction id consts for starting id and increment amt */
124 
125 #define	ETM_XID_BEG	(1)
126 #define	ETM_XID_INC	(2)
127 
128 /*
129  * ---------------------------- global data ----------------------------------
130  */
131 
132 static int
133 etm_debug_lvl = 0;	/* debug level: 0 is off, 1 is on, 2 is more, etc */
134 
135 static int
136 etm_debug_max_ev_cnt = -1; /* max allowed event count for debugging */
137 
138 static pthread_t
139 etm_svr_tid = NULL;	/* thread id of connection acceptance server */
140 
141 static volatile int
142 etm_is_dying = 0;	/* bool for dying (killing self) */
143 
144 static uint32_t
145 etm_xid_cur = ETM_XID_BEG; /* current transaction id for sends */
146 
147 static uint32_t
148 etm_xid_ping = 0;	/* xid of last CONTROL msg sent requesting ping */
149 
150 static uint32_t
151 etm_xid_ver_set = 0;	/* xid of last CONTROL msg sent requesting ver set */
152 
153 static struct stats {
154 
155 	/* ETM msg counters */
156 
157 	fmd_stat_t etm_rd_hdr_fmaevent;
158 	fmd_stat_t etm_rd_hdr_control;
159 	fmd_stat_t etm_rd_hdr_response;
160 	fmd_stat_t etm_rd_body_fmaevent;
161 	fmd_stat_t etm_rd_body_control;
162 	fmd_stat_t etm_rd_body_response;
163 	fmd_stat_t etm_wr_hdr_fmaevent;
164 	fmd_stat_t etm_wr_hdr_control;
165 	fmd_stat_t etm_wr_hdr_response;
166 	fmd_stat_t etm_wr_body_fmaevent;
167 	fmd_stat_t etm_wr_body_control;
168 	fmd_stat_t etm_wr_body_response;
169 
170 	/* ETM byte counters */
171 
172 	fmd_stat_t etm_wr_fmd_bytes;
173 	fmd_stat_t etm_rd_fmd_bytes;
174 	fmd_stat_t etm_wr_xport_bytes;
175 	fmd_stat_t etm_rd_xport_bytes;
176 
177 	fmd_stat_t etm_magic_drop_bytes;
178 
179 	/* ETM [dropped] FMA event counters */
180 
181 	fmd_stat_t etm_rd_fmd_fmaevent;
182 	fmd_stat_t etm_wr_fmd_fmaevent;
183 
184 	fmd_stat_t etm_rd_drop_fmaevent;
185 	fmd_stat_t etm_wr_drop_fmaevent;
186 
187 	/* ETM protocol failures */
188 
189 	fmd_stat_t etm_magic_bad;
190 	fmd_stat_t etm_ver_bad;
191 	fmd_stat_t etm_msgtype_bad;
192 	fmd_stat_t etm_subtype_bad;
193 	fmd_stat_t etm_xid_bad;
194 	fmd_stat_t etm_fmaeventlen_bad;
195 	fmd_stat_t etm_respcode_bad;
196 	fmd_stat_t etm_timeout_bad;
197 	fmd_stat_t etm_evlens_bad;
198 
199 	/* IO operation failures */
200 
201 	fmd_stat_t etm_xport_wr_fail;
202 	fmd_stat_t etm_xport_rd_fail;
203 	fmd_stat_t etm_xport_pk_fail;
204 
205 	/* IO operation retries */
206 
207 	fmd_stat_t etm_xport_wr_retry;
208 	fmd_stat_t etm_xport_rd_retry;
209 	fmd_stat_t etm_xport_pk_retry;
210 
211 	/* system and library failures */
212 
213 	fmd_stat_t etm_os_sysevent_publish_fail;
214 	fmd_stat_t etm_os_sysevent_bind_fail;
215 	fmd_stat_t etm_os_nvlist_pack_fail;
216 	fmd_stat_t etm_os_nvlist_unpack_fail;
217 	fmd_stat_t etm_os_nvlist_size_fail;
218 	fmd_stat_t etm_os_pthread_create_fail;
219 
220 	/* xport API failures */
221 
222 	fmd_stat_t etm_xport_get_ev_addrv_fail;
223 	fmd_stat_t etm_xport_open_fail;
224 	fmd_stat_t etm_xport_close_fail;
225 	fmd_stat_t etm_xport_accept_fail;
226 	fmd_stat_t etm_xport_open_retry;
227 
228 	/* FMD entry point bad arguments */
229 
230 	fmd_stat_t etm_fmd_recv_badargs;
231 	fmd_stat_t etm_fmd_init_badargs;
232 	fmd_stat_t etm_fmd_fini_badargs;
233 
234 } etm_stats = {
235 
236 	/* ETM msg counters */
237 
238 	{ "etm_rd_hdr_fmaevent", FMD_TYPE_UINT64,
239 		"ETM fmaevent msg headers rcvd from xport" },
240 	{ "etm_rd_hdr_control", FMD_TYPE_UINT64,
241 		"ETM control msg headers rcvd from xport" },
242 	{ "etm_rd_hdr_response", FMD_TYPE_UINT64,
243 		"ETM response msg headers rcvd from xport" },
244 	{ "etm_rd_body_fmaevent", FMD_TYPE_UINT64,
245 		"ETM fmaevent msg bodies rcvd from xport" },
246 	{ "etm_rd_body_control", FMD_TYPE_UINT64,
247 		"ETM control msg bodies rcvd from xport" },
248 	{ "etm_rd_body_response", FMD_TYPE_UINT64,
249 		"ETM response msg bodies rcvd from xport" },
250 	{ "etm_wr_hdr_fmaevent", FMD_TYPE_UINT64,
251 		"ETM fmaevent msg headers sent to xport" },
252 	{ "etm_wr_hdr_control", FMD_TYPE_UINT64,
253 		"ETM control msg headers sent to xport" },
254 	{ "etm_wr_hdr_response", FMD_TYPE_UINT64,
255 		"ETM response msg headers sent to xport" },
256 	{ "etm_wr_body_fmaevent", FMD_TYPE_UINT64,
257 		"ETM fmaevent msg bodies sent to xport" },
258 	{ "etm_wr_body_control", FMD_TYPE_UINT64,
259 		"ETM control msg bodies sent to xport" },
260 	{ "etm_wr_body_response", FMD_TYPE_UINT64,
261 		"ETM response msg bodies sent to xport" },
262 
263 	/* ETM byte counters */
264 
265 	{ "etm_wr_fmd_bytes", FMD_TYPE_UINT64,
266 		"bytes of FMA events sent to FMD" },
267 	{ "etm_rd_fmd_bytes", FMD_TYPE_UINT64,
268 		"bytes of FMA events rcvd from FMD" },
269 	{ "etm_wr_xport_bytes", FMD_TYPE_UINT64,
270 		"bytes of FMA events sent to xport" },
271 	{ "etm_rd_xport_bytes", FMD_TYPE_UINT64,
272 		"bytes of FMA events rcvd from xport" },
273 
274 	{ "etm_magic_drop_bytes", FMD_TYPE_UINT64,
275 		"bytes dropped from xport pre magic num" },
276 
277 	/* ETM [dropped] FMA event counters */
278 
279 	{ "etm_rd_fmd_fmaevent", FMD_TYPE_UINT64,
280 		"FMA events rcvd from FMD" },
281 	{ "etm_wr_fmd_fmaevent", FMD_TYPE_UINT64,
282 		"FMA events sent to FMD" },
283 
284 	{ "etm_rd_drop_fmaevent", FMD_TYPE_UINT64,
285 		"dropped FMA events from xport" },
286 	{ "etm_wr_drop_fmaevent", FMD_TYPE_UINT64,
287 		"dropped FMA events to xport" },
288 
289 	/* ETM protocol failures */
290 
291 	{ "etm_magic_bad", FMD_TYPE_UINT64,
292 		"ETM msgs w/ invalid magic num" },
293 	{ "etm_ver_bad", FMD_TYPE_UINT64,
294 		"ETM msgs w/ invalid protocol version" },
295 	{ "etm_msgtype_bad", FMD_TYPE_UINT64,
296 		"ETM msgs w/ invalid message type" },
297 	{ "etm_subtype_bad", FMD_TYPE_UINT64,
298 		"ETM msgs w/ invalid sub type" },
299 	{ "etm_xid_bad", FMD_TYPE_UINT64,
300 		"ETM msgs w/ unmatched xid" },
301 	{ "etm_fmaeventlen_bad", FMD_TYPE_UINT64,
302 		"ETM msgs w/ invalid FMA event length" },
303 	{ "etm_respcode_bad", FMD_TYPE_UINT64,
304 		"ETM msgs w/ invalid response code" },
305 	{ "etm_timeout_bad", FMD_TYPE_UINT64,
306 		"ETM msgs w/ invalid timeout value" },
307 	{ "etm_evlens_bad", FMD_TYPE_UINT64,
308 		"ETM msgs w/ too many event lengths" },
309 
310 	/* IO operation failures */
311 
312 	{ "etm_xport_wr_fail", FMD_TYPE_UINT64,
313 		"xport write failures" },
314 	{ "etm_xport_rd_fail", FMD_TYPE_UINT64,
315 		"xport read failures" },
316 	{ "etm_xport_pk_fail", FMD_TYPE_UINT64,
317 		"xport peek failures" },
318 
319 	/* IO operation retries */
320 
321 	{ "etm_xport_wr_retry", FMD_TYPE_UINT64,
322 		"xport write retries" },
323 	{ "etm_xport_rd_retry", FMD_TYPE_UINT64,
324 		"xport read retries" },
325 	{ "etm_xport_pk_retry", FMD_TYPE_UINT64,
326 		"xport peek retries" },
327 
328 	/* system and library failures */
329 
330 	{ "etm_os_sysevent_publish_fail", FMD_TYPE_UINT64,
331 		"sysevent_evc_publish failures" },
332 	{ "etm_os_sysevent_bind_fail", FMD_TYPE_UINT64,
333 		"sysevent_evc_bind failures" },
334 	{ "etm_os_nvlist_pack_fail", FMD_TYPE_UINT64,
335 		"nvlist_pack failures" },
336 	{ "etm_os_nvlist_unpack_fail", FMD_TYPE_UINT64,
337 		"nvlist_unpack failures" },
338 	{ "etm_os_nvlist_size_fail", FMD_TYPE_UINT64,
339 		"nvlist_size failures" },
340 	{ "etm_os_pthread_create_fail", FMD_TYPE_UINT64,
341 		"pthread_create failures" },
342 
343 	/* transport API failures */
344 
345 	{ "etm_xport_get_ev_addrv_fail", FMD_TYPE_UINT64,
346 		"xport get event addrv API failures" },
347 	{ "etm_xport_open_fail", FMD_TYPE_UINT64,
348 		"xport open API failures" },
349 	{ "etm_xport_close_fail", FMD_TYPE_UINT64,
350 		"xport close API failures" },
351 	{ "etm_xport_accept_fail", FMD_TYPE_UINT64,
352 		"xport accept API failures" },
353 	{ "etm_xport_open_retry", FMD_TYPE_UINT64,
354 		"xport open API retries" },
355 
356 	/* FMD entry point bad arguments */
357 
358 	{ "etm_fmd_recv_badargs", FMD_TYPE_UINT64,
359 		"bad arguments from fmd_recv entry point" },
360 	{ "etm_fmd_init_badargs", FMD_TYPE_UINT64,
361 		"bad arguments from fmd_init entry point" },
362 	{ "etm_fmd_fini_badargs", FMD_TYPE_UINT64,
363 		"bad arguments from fmd_fini entry point" }
364 };
365 
366 /*
367  * -------------------------- support functions ------------------------------
368  */
369 
370 /*
371  * Design_Note:	Each failure worth reporting to FMD should be done using
372  *		a single call to fmd_hdl_error() as it logs an FMA event
373  *		for each call. Also be aware that all the fmd_hdl_*()
374  *		format strings currently use platform specific *printf()
375  *		routines; so "%p" under Solaris does not prepend "0x" to
376  *		the outputted hex digits, while Linux and VxWorks do.
377  */
378 
379 /*
380  * etm_hexdump - hexdump the given buffer (for debugging) using
381  *		the given FMD module handle
382  */
383 
384 static void
385 etm_hexdump(fmd_hdl_t *hdl, void *buf, size_t byte_cnt)
386 {
387 	uint8_t		*bp;		/* byte ptr */
388 	int		i, j;		/* index */
389 	char		cb[80];		/* char buf */
390 	unsigned int	n;		/* a byte of data for sprintf() */
391 
392 	bp = buf;
393 	j = 0;
394 
395 	/*
396 	 * Design_Note:	fmd_hdl_debug() auto adds a newline if missing;
397 	 *		hence cb exists to accumulate a longer string.
398 	 */
399 
400 	for (i = 1; i <= byte_cnt; i++) {
401 		n = *bp++;
402 		(void) sprintf(&cb[j], "%2.2x ", n);
403 		j += 3;
404 		/* add a newline every 16 bytes or at the buffer's end */
405 		if (((i % 16) == 0) || (i >= byte_cnt)) {
406 			cb[j-1] = '\0';
407 			fmd_hdl_debug(hdl, "%s\n", cb);
408 			j = 0;
409 		}
410 	} /* for each byte in the buffer */
411 
412 } /* etm_hexdump() */
413 
414 /*
415  * etm_sleep - sleep the caller for the given number of seconds,
416  *		return 0 or -errno value
417  *
418  * Design_Note:	To avoid interfering with FMD's signal mask (SIGALRM)
419  *		do not use [Solaris] sleep(3C) and instead use
420  *		pthread_cond_wait() or nanosleep(), both of which
421  *		are POSIX spec-ed to leave signal masks alone.
422  *		This is needed for Solaris and Linux (domain and SP).
423  */
424 
425 static int
426 etm_sleep(unsigned sleep_sec)
427 {
428 	struct timespec	tms;	/* for nanosleep() */
429 
430 	tms.tv_sec = sleep_sec;
431 	tms.tv_nsec = 0;
432 
433 	if (nanosleep(&tms, NULL) < 0) {
434 		/* errno assumed set by above call */
435 		return (-errno);
436 	}
437 	return (0);
438 
439 } /* etm_sleep() */
440 
441 /*
442  * etm_conn_open - open a connection to the given transport address,
443  *		return 0 and the opened connection handle
444  *		or -errno value
445  *
446  * caveats:	the err_substr is used in failure cases for calling
447  *		fmd_hdl_error()
448  */
449 
450 static int
451 etm_conn_open(fmd_hdl_t *hdl, char *err_substr,
452 		etm_xport_addr_t addr, etm_xport_conn_t *connp)
453 {
454 	etm_xport_conn_t	conn;	/* connection to return */
455 	int			nev;	/* -errno value */
456 
457 	if ((conn = etm_xport_open(hdl, addr)) == NULL) {
458 		nev = (-errno);
459 		fmd_hdl_error(hdl, "error: %s: errno %d\n",
460 					err_substr, errno);
461 		etm_stats.etm_xport_open_fail.fmds_value.ui64++;
462 		return (nev);
463 	} else {
464 		*connp = conn;
465 		return (0);
466 	}
467 } /* etm_conn_open() */
468 
469 /*
470  * etm_conn_close - close the given connection,
471  *		return 0 or -errno value
472  *
473  * caveats:	the err_substr is used in failure cases for calling
474  *		fmd_hdl_error()
475  */
476 
477 static int
478 etm_conn_close(fmd_hdl_t *hdl, char *err_substr, etm_xport_conn_t conn)
479 {
480 	int	nev;	/* -errno value */
481 
482 	if (etm_xport_close(hdl, conn) == NULL) {
483 		nev = (-errno);
484 		fmd_hdl_error(hdl, "warning: %s: errno %d\n",
485 					err_substr, errno);
486 		etm_stats.etm_xport_close_fail.fmds_value.ui64++;
487 		return (nev);
488 	} else {
489 		return (0);
490 	}
491 } /* etm_conn_close() */
492 
493 /*
494  * etm_io_op - perform an IO operation on the given connection
495  *		with the given buffer,
496  *		accommodating MTU size and retrying op if needed,
497  *		return how many bytes actually done by the op
498  *		or -errno value
499  *
500  * caveats:	the err_substr is used in failure cases for calling
501  *		fmd_hdl_error()
502  */
503 
504 static ssize_t
505 etm_io_op(fmd_hdl_t *hdl, char *err_substr, etm_xport_conn_t conn,
506 				void *buf, size_t byte_cnt, int io_op)
507 {
508 	ssize_t		rv;		/* ret val / byte count */
509 	ssize_t		n;		/* gen use */
510 	uint8_t		*datap;		/* ptr to data */
511 	size_t		mtu_sz;		/* MTU size in bytes */
512 	int		(*io_func_ptr)(fmd_hdl_t *, etm_xport_conn_t,
513 							void *, size_t);
514 	size_t		io_sz;		/* byte count for io_func_ptr */
515 	int		try_cnt;	/* number of tries done */
516 	int		sleep_sec;	/* exp backoff sleep period in sec */
517 	int		sleep_rv;	/* ret val from sleeping */
518 	fmd_stat_t	io_retry_stat;	/* IO retry stat to update */
519 	fmd_stat_t	io_fail_stat;	/* IO failure stat to update */
520 
521 	if ((conn == NULL) || (buf == NULL)) {
522 		return (-EINVAL);
523 	}
524 	switch (io_op) {
525 		case ETM_IO_OP_RD:
526 			io_func_ptr = etm_xport_read;
527 			io_retry_stat = etm_stats.etm_xport_rd_retry;
528 			io_fail_stat = etm_stats.etm_xport_rd_fail;
529 			break;
530 		case ETM_IO_OP_WR:
531 			io_func_ptr = etm_xport_write;
532 			io_retry_stat = etm_stats.etm_xport_wr_retry;
533 			io_fail_stat = etm_stats.etm_xport_wr_fail;
534 			break;
535 		case ETM_IO_OP_PK:
536 			io_func_ptr = etm_xport_peek;
537 			io_retry_stat = etm_stats.etm_xport_pk_retry;
538 			io_fail_stat = etm_stats.etm_xport_pk_fail;
539 			break;
540 		default:
541 			return (-EINVAL);
542 	}
543 	if (byte_cnt == 0) {
544 		return (byte_cnt);	/* nop */
545 	}
546 
547 	/* obtain [current] MTU size */
548 
549 	if ((n = etm_xport_get_opt(hdl, conn, ETM_XPORT_OPT_MTU_SZ)) < 0) {
550 		mtu_sz = ETM_XPORT_MTU_SZ_DEF;
551 	} else {
552 		mtu_sz = n;
553 	}
554 
555 	/* loop until all IO done, try limit exceeded, or real failure */
556 
557 	rv = 0;
558 	datap = buf;
559 	while (rv < byte_cnt) {
560 		io_sz = MIN((byte_cnt - rv), mtu_sz);
561 		try_cnt = 0;
562 		sleep_sec = 0;
563 
564 		/* when give up, return -errno value even if partly done */
565 
566 		while ((n = (*io_func_ptr)(hdl, conn, datap, io_sz)) ==
567 								(-EAGAIN)) {
568 			try_cnt++;
569 			if (try_cnt > ETM_TRY_MAX_CNT) {
570 				rv = n;
571 				goto func_ret;
572 			}
573 			if (etm_is_dying) {
574 				rv = (-EINTR);
575 				goto func_ret;
576 			}
577 			if ((sleep_rv = etm_sleep(sleep_sec)) < 0) {
578 				rv = sleep_rv;
579 				goto func_ret;
580 			}
581 			sleep_sec = ((sleep_sec == 0) ? 1 :
582 					(sleep_sec * ETM_TRY_BACKOFF_RATE));
583 			sleep_sec = MIN(sleep_sec, ETM_TRY_BACKOFF_CAP);
584 			io_retry_stat.fmds_value.ui64++;
585 			if (etm_debug_lvl >= 1) {
586 				fmd_hdl_debug(hdl, "info: retrying io op %d "
587 						"due to EAGAIN\n", io_op);
588 			}
589 		} /* while trying the io operation */
590 
591 		if (etm_is_dying) {
592 			rv = (-EINTR);
593 			goto func_ret;
594 		}
595 		if (n < 0) {
596 			rv = n;
597 			goto func_ret;
598 		}
599 		/* avoid spinning CPU when given 0 bytes but no error */
600 		if (n == 0) {
601 			if ((sleep_rv = etm_sleep(ETM_SLEEP_QUIK)) < 0) {
602 				rv = sleep_rv;
603 				goto func_ret;
604 			}
605 		}
606 		rv += n;
607 		datap += n;
608 	} /* while still have more data */
609 
610 func_ret:
611 
612 	if (rv < 0) {
613 		io_fail_stat.fmds_value.ui64++;
614 		fmd_hdl_error(hdl, "error: %s: errno %d\n",
615 					err_substr, (int)(-rv));
616 	}
617 	if (etm_debug_lvl >= 3) {
618 		fmd_hdl_debug(hdl, "info: io op %d ret %d of %d\n",
619 					io_op, (int)rv, (int)byte_cnt);
620 	}
621 	return (rv);
622 
623 } /* etm_io_op() */
624 
625 /*
626  * etm_magic_read - read the magic number of an ETM message header
627  *		from the given connection into the given buffer,
628  *		return 0 or -errno value
629  *
630  * Design_Note:	This routine is intended to help protect ETM from protocol
631  *		framing errors as might be caused by an SP reset / crash in
632  *		the middle of an ETM message send; the connection will be
633  *		read from for as many bytes as needed until the magic number
634  *		is found using a sliding buffer for comparisons.
635  */
636 
637 static int
638 etm_magic_read(fmd_hdl_t *hdl, etm_xport_conn_t conn, uint32_t *magic_ptr)
639 {
640 	int		rv;		/* ret val */
641 	uint32_t	magic_num;	/* magic number */
642 	int		byte_cnt;	/* count of bytes read */
643 	uint8_t		buf5[4+1];	/* sliding input buffer */
644 	int		i, j;		/* indices into buf5 */
645 	ssize_t		n;		/* gen use */
646 	uint8_t		drop_buf[1024];	/* dropped bytes buffer */
647 
648 	rv = 0;		/* assume success */
649 	magic_num = 0;
650 	byte_cnt = 0;
651 	j = 0;
652 
653 	/* magic number bytes are sent in network (big endian) order */
654 
655 	while (magic_num != ETM_PROTO_MAGIC_NUM) {
656 		if ((n = etm_io_op(hdl, "bad io read on magic",
657 				conn, &buf5[j], 1, ETM_IO_OP_RD)) < 0) {
658 			rv = n;
659 			goto func_ret;
660 		}
661 		byte_cnt++;
662 		j = MIN((j + 1), sizeof (magic_num));
663 		if (byte_cnt < sizeof (magic_num)) {
664 			continue;
665 		}
666 
667 		if (byte_cnt > sizeof (magic_num)) {
668 			etm_stats.etm_magic_drop_bytes.fmds_value.ui64++;
669 			i = MIN(byte_cnt - j - 1, sizeof (drop_buf) - 1);
670 			drop_buf[i] = buf5[0];
671 			for (i = 0; i < j; i++) {
672 				buf5[i] = buf5[i+1];
673 			} /* for sliding the buffer contents */
674 		}
675 		(void) memcpy(&magic_num, &buf5[0], sizeof (magic_num));
676 		magic_num = ntohl(magic_num);
677 	} /* for reading bytes until find magic number */
678 
679 func_ret:
680 
681 	if (byte_cnt != sizeof (magic_num)) {
682 		fmd_hdl_error(hdl, "warning: bad proto frame "
683 				"implies corrupt/lost msg(s)\n");
684 	}
685 	if ((byte_cnt > sizeof (magic_num)) && (etm_debug_lvl >= 2)) {
686 		i = MIN(byte_cnt - sizeof (magic_num), sizeof (drop_buf));
687 		fmd_hdl_debug(hdl, "info: magic drop hexdump "
688 				"first %d of %d bytes:\n",
689 				i, byte_cnt - sizeof (magic_num));
690 		etm_hexdump(hdl, drop_buf, i);
691 	}
692 
693 	if (rv == 0) {
694 		*magic_ptr = magic_num;
695 	}
696 	return (rv);
697 
698 } /* etm_magic_read() */
699 
700 /*
701  * etm_hdr_read - allocate, read, and validate a [variable sized]
702  *		ETM message header from the given connection,
703  *		return the allocated ETM message header
704  *		(which is guaranteed to be large enough to reuse as a
705  *		RESPONSE msg hdr) and its size
706  *		or NULL and set errno on failure
707  */
708 
709 static void *
710 etm_hdr_read(fmd_hdl_t *hdl, etm_xport_conn_t conn, size_t *szp)
711 {
712 	uint8_t			*hdrp;		/* ptr to header to return */
713 	size_t			hdr_sz;		/* sizeof *hdrp */
714 	etm_proto_v1_pp_t	pp; 		/* protocol preamble */
715 	etm_proto_v1_ev_hdr_t	*ev_hdrp;	/* for FMA_EVENT msg */
716 	etm_proto_v1_ctl_hdr_t	*ctl_hdrp;	/* for CONTROL msg */
717 	etm_proto_v1_resp_hdr_t *resp_hdrp;	/* for RESPONSE msg */
718 	uint32_t		*lenp;		/* ptr to FMA event length */
719 	ssize_t			i, n;		/* gen use */
720 	uint8_t	misc_buf[ETM_MISC_BUF_SZ];	/* for var sized hdrs */
721 	int			dummy_int;	/* dummy var to appease lint */
722 
723 	hdrp = NULL; hdr_sz = 0;
724 
725 	/* read the magic number which starts the protocol preamble */
726 
727 	if ((n = etm_magic_read(hdl, conn, &pp.pp_magic_num)) < 0) {
728 		errno = (-n);
729 		etm_stats.etm_magic_bad.fmds_value.ui64++;
730 		return (NULL);
731 	}
732 
733 	/* read the rest of the protocol preamble all at once */
734 
735 	if ((n = etm_io_op(hdl, "bad io read on preamble",
736 				conn, &pp.pp_proto_ver,
737 				sizeof (pp) - sizeof (pp.pp_magic_num),
738 				ETM_IO_OP_RD)) < 0) {
739 		errno = (-n);
740 		return (NULL);
741 	}
742 
743 	/*
744 	 * Design_Note:	The magic number was already network decoded; but
745 	 *		some other preamble fields also need to be decoded,
746 	 *		specifically pp_xid and pp_timeout. The rest of the
747 	 *		preamble fields are byte sized and hence need no
748 	 *		decoding.
749 	 */
750 
751 	pp.pp_xid = ntohl(pp.pp_xid);
752 	pp.pp_timeout = ntohl(pp.pp_timeout);
753 
754 	/* sanity check the header as best we can */
755 
756 	if (pp.pp_proto_ver != ETM_PROTO_V1) {
757 		fmd_hdl_error(hdl, "error: bad proto ver %d\n",
758 					(int)pp.pp_proto_ver);
759 		errno = EPROTO;
760 		etm_stats.etm_ver_bad.fmds_value.ui64++;
761 		return (NULL);
762 	}
763 
764 	dummy_int = pp.pp_msg_type;
765 	if ((dummy_int <= ETM_MSG_TYPE_TOO_LOW) ||
766 	    (dummy_int >= ETM_MSG_TYPE_TOO_BIG)) {
767 		fmd_hdl_error(hdl, "error: bad msg type %d", dummy_int);
768 		errno = EBADMSG;
769 		etm_stats.etm_msgtype_bad.fmds_value.ui64++;
770 		return (NULL);
771 	}
772 
773 	/* handle [var sized] hdrs for FMA_EVENT, CONTROL, RESPONSE msgs */
774 
775 	if (pp.pp_msg_type == ETM_MSG_TYPE_FMA_EVENT) {
776 
777 		ev_hdrp = (void*)&misc_buf[0];
778 		hdr_sz = sizeof (*ev_hdrp);
779 		(void) memcpy(&ev_hdrp->ev_pp, &pp, sizeof (pp));
780 
781 		/* sanity check the header's timeout */
782 
783 		if (ev_hdrp->ev_pp.pp_timeout != ETM_PROTO_V1_TIMEOUT_NONE) {
784 			errno = ETIME;
785 			etm_stats.etm_timeout_bad.fmds_value.ui64++;
786 			return (NULL);
787 		}
788 
789 		/* get all FMA event lengths from the header */
790 
791 		lenp = (uint32_t *)&ev_hdrp->ev_lens[0]; lenp--;
792 		i = -1;	/* cnt of length entries preceding 0 */
793 		do {
794 			i++; lenp++;
795 			if ((sizeof (*ev_hdrp) + (i * sizeof (*lenp))) >=
796 							ETM_MISC_BUF_SZ) {
797 				errno = E2BIG;	/* ridiculous size */
798 				etm_stats.etm_evlens_bad.fmds_value.ui64++;
799 				return (NULL);
800 			}
801 			if ((n = etm_io_op(hdl, "bad io read on event len",
802 						conn, lenp, sizeof (*lenp),
803 						ETM_IO_OP_RD)) < 0) {
804 				errno = (-n);
805 				return (NULL);
806 			}
807 			*lenp = ntohl(*lenp);
808 
809 		} while (*lenp != 0);
810 		i += 0; /* first len already counted by sizeof(ev_hdr) */
811 		hdr_sz += (i * sizeof (*lenp));
812 
813 		etm_stats.etm_rd_hdr_fmaevent.fmds_value.ui64++;
814 
815 	} else if (pp.pp_msg_type == ETM_MSG_TYPE_CONTROL) {
816 
817 		ctl_hdrp = (void*)&misc_buf[0];
818 		hdr_sz = sizeof (*ctl_hdrp);
819 		(void) memcpy(&ctl_hdrp->ctl_pp, &pp, sizeof (pp));
820 
821 		/* sanity check the header's sub type (control selector) */
822 
823 		if ((ctl_hdrp->ctl_pp.pp_sub_type <= ETM_CTL_SEL_TOO_LOW) ||
824 		    (ctl_hdrp->ctl_pp.pp_sub_type >= ETM_CTL_SEL_TOO_BIG)) {
825 			fmd_hdl_error(hdl, "error: bad ctl sub type %d\n",
826 					(int)ctl_hdrp->ctl_pp.pp_sub_type);
827 			errno = EBADMSG;
828 			etm_stats.etm_subtype_bad.fmds_value.ui64++;
829 			return (NULL);
830 		}
831 
832 		/* get the control length */
833 
834 		if ((n = etm_io_op(hdl, "bad io read on ctl len",
835 					conn, &ctl_hdrp->ctl_len,
836 					sizeof (ctl_hdrp->ctl_len),
837 					ETM_IO_OP_RD)) < 0) {
838 			errno = (-n);
839 			return (NULL);
840 		}
841 
842 		ctl_hdrp->ctl_len = ntohl(ctl_hdrp->ctl_len);
843 
844 		etm_stats.etm_rd_hdr_control.fmds_value.ui64++;
845 
846 	} else if (pp.pp_msg_type == ETM_MSG_TYPE_RESPONSE) {
847 
848 		resp_hdrp = (void*)&misc_buf[0];
849 		hdr_sz = sizeof (*resp_hdrp);
850 		(void) memcpy(&resp_hdrp->resp_pp, &pp, sizeof (pp));
851 
852 		/* sanity check the header's timeout */
853 
854 		if (resp_hdrp->resp_pp.pp_timeout !=
855 						ETM_PROTO_V1_TIMEOUT_NONE) {
856 			errno = ETIME;
857 			etm_stats.etm_timeout_bad.fmds_value.ui64++;
858 			return (NULL);
859 		}
860 
861 		/* get the response code and length */
862 
863 		if ((n = etm_io_op(hdl, "bad io read on resp code+len",
864 					conn, &resp_hdrp->resp_code,
865 					sizeof (resp_hdrp->resp_code) +
866 					sizeof (resp_hdrp->resp_len),
867 					ETM_IO_OP_RD)) < 0) {
868 			errno = (-n);
869 			return (NULL);
870 		}
871 
872 		resp_hdrp->resp_code = ntohl(resp_hdrp->resp_code);
873 		resp_hdrp->resp_len = ntohl(resp_hdrp->resp_len);
874 
875 		etm_stats.etm_rd_hdr_response.fmds_value.ui64++;
876 
877 	} /* whether we have FMA_EVENT, CONTROL, RESPONSE msg */
878 
879 	/*
880 	 * choose a header size that allows hdr reuse for RESPONSE msgs,
881 	 * allocate and populate the message header, and
882 	 * return alloc size to caller for later free of hdrp
883 	 */
884 
885 	hdr_sz = MAX(hdr_sz, sizeof (*resp_hdrp));
886 	hdrp = fmd_hdl_zalloc(hdl, hdr_sz, FMD_SLEEP);
887 	(void) memcpy(hdrp, misc_buf, hdr_sz);
888 
889 	if (etm_debug_lvl >= 3) {
890 		fmd_hdl_debug(hdl, "info: msg hdr hexdump %d bytes:\n",
891 								hdr_sz);
892 		etm_hexdump(hdl, hdrp, hdr_sz);
893 	}
894 	*szp = hdr_sz;
895 	return (hdrp);
896 
897 } /* etm_hdr_read() */
898 
899 /*
900  * etm_hdr_write - create and write a [variable sized] ETM message header
901  *		to the given connection appropriate for the given FMA event
902  *		and type of nvlist encoding,
903  *		return the allocated ETM message header and its size
904  *		or NULL and set errno on failure
905  */
906 
907 static void*
908 etm_hdr_write(fmd_hdl_t *hdl, etm_xport_conn_t conn, nvlist_t *evp,
909 						int encoding, size_t *szp)
910 {
911 	etm_proto_v1_ev_hdr_t	*hdrp;		/* for FMA_EVENT msg */
912 	size_t			hdr_sz;		/* sizeof *hdrp */
913 	uint32_t		*lenp;		/* ptr to FMA event length */
914 	size_t			evsz;		/* packed FMA event size */
915 	ssize_t			n;		/* gen use */
916 
917 	/* allocate and populate the message header for 1 FMA event */
918 
919 	hdr_sz = sizeof (*hdrp) + (1 * sizeof (hdrp->ev_lens[0]));
920 
921 	hdrp = fmd_hdl_zalloc(hdl, hdr_sz, FMD_SLEEP);
922 
923 	/*
924 	 * Design_Note: Although the ETM protocol supports it, sun4v/Ontario
925 	 *		does not wait for responses/ACKs on FMA events. All
926 	 *		such msgs are sent with ETM_PROTO_V1_TIMEOUT_NONE.
927 	 */
928 
929 	hdrp->ev_pp.pp_magic_num = ETM_PROTO_MAGIC_NUM;
930 	hdrp->ev_pp.pp_magic_num = htonl(hdrp->ev_pp.pp_magic_num);
931 	hdrp->ev_pp.pp_proto_ver = ETM_PROTO_V1;
932 	hdrp->ev_pp.pp_msg_type = ETM_MSG_TYPE_FMA_EVENT;
933 	hdrp->ev_pp.pp_sub_type = 0;
934 	hdrp->ev_pp.pp_rsvd_pad = 0;
935 	hdrp->ev_pp.pp_xid = etm_xid_cur;
936 	hdrp->ev_pp.pp_xid = htonl(hdrp->ev_pp.pp_xid);
937 	etm_xid_cur += ETM_XID_INC;
938 	hdrp->ev_pp.pp_timeout = ETM_PROTO_V1_TIMEOUT_NONE;
939 	hdrp->ev_pp.pp_timeout = htonl(hdrp->ev_pp.pp_timeout);
940 
941 	lenp = &hdrp->ev_lens[0];
942 
943 	if ((n = nvlist_size(evp, &evsz, encoding)) != 0) {
944 		errno = n;
945 		fmd_hdl_free(hdl, hdrp, hdr_sz);
946 		etm_stats.etm_os_nvlist_size_fail.fmds_value.ui64++;
947 		return (NULL);
948 	}
949 
950 	/* indicate 1 FMA event, network encode its length, and 0-terminate */
951 
952 	*lenp = evsz; *lenp = htonl(*lenp); lenp++;
953 	*lenp = 0; *lenp = htonl(*lenp); lenp++;
954 
955 	/*
956 	 * write the network encoded header to the transport, and
957 	 * return alloc size to caller for later free
958 	 */
959 
960 	if ((n = etm_io_op(hdl, "bad io write on event hdr",
961 				conn, hdrp, hdr_sz, ETM_IO_OP_WR)) < 0) {
962 		errno = (-n);
963 		fmd_hdl_free(hdl, hdrp, hdr_sz);
964 		return (NULL);
965 	}
966 
967 	*szp = hdr_sz;
968 	return (hdrp);
969 
970 } /* etm_hdr_write() */
971 
972 /*
973  * etm_post_to_fmd - post the given FMA event to FMD
974  *			[via sysevent or via a FMD transport API call ],
975  *			return 0 or -errno value
976  *
977  * Design_Note:	This routine exists to ease future porting to both
978  *		FMA Phase 2 FMD as well as porting to Linux which lacks
979  *		a native sysevent.
980  */
981 
982 static int
983 etm_post_to_fmd(fmd_hdl_t *hdl, nvlist_t *evp)
984 {
985 	int			rv;		/* ret val */
986 	evchan_t		*scp;		/* sysevent channel ptr */
987 	ssize_t			n;		/* gen use */
988 
989 	rv = 0; /* default success */
990 
991 	scp = NULL;
992 
993 	if ((n = sysevent_evc_bind(FM_ERROR_CHAN, &scp,
994 				EVCH_CREAT | EVCH_HOLD_PEND)) != 0) {
995 		rv = (-n);
996 		fmd_hdl_error(hdl, "error: FMA event dropped: "
997 				"sysevent bind errno %d\n", n);
998 		etm_stats.etm_os_sysevent_bind_fail.fmds_value.ui64++;
999 		etm_stats.etm_rd_drop_fmaevent.fmds_value.ui64++;
1000 		goto func_ret;
1001 	}
1002 
1003 	if ((n = sysevent_evc_publish(scp, EC_FM, ESC_FM_ERROR, "com.sun",
1004 				getexecname(), evp, EVCH_SLEEP)) != 0) {
1005 		rv = (-n);
1006 		fmd_hdl_error(hdl, "error: FMA event dropped: "
1007 				"sysevent publish errno %d\n", n);
1008 		etm_stats.etm_os_sysevent_publish_fail.fmds_value.ui64++;
1009 		etm_stats.etm_rd_drop_fmaevent.fmds_value.ui64++;
1010 		goto func_ret;
1011 	}
1012 
1013 func_ret:
1014 
1015 	if (scp != NULL) {
1016 		sysevent_evc_unbind(scp);
1017 	}
1018 	if (rv == 0) {
1019 		etm_stats.etm_wr_fmd_fmaevent.fmds_value.ui64++;
1020 		(void) nvlist_size(evp, (size_t *)&n, NV_ENCODE_XDR);
1021 		etm_stats.etm_wr_fmd_bytes.fmds_value.ui64 += n;
1022 		if (etm_debug_lvl >= 1) {
1023 			fmd_hdl_debug(hdl, "info: event %p post ok to FMD\n",
1024 								evp);
1025 		}
1026 	}
1027 	return (rv);
1028 
1029 } /* etm_post_to_fmd() */
1030 
1031 /*
1032  * etm_req_ver_negot - send an ETM control message to the other end requesting
1033  *			that the ETM protocol version be negotiated/set
1034  */
1035 
1036 static void
1037 etm_req_ver_negot(fmd_hdl_t *hdl)
1038 {
1039 	etm_xport_addr_t	*addrv;		/* default dst addr(s) */
1040 	etm_xport_conn_t	conn;		/* connection to other end */
1041 	etm_proto_v1_ctl_hdr_t	*ctl_hdrp;	/* for CONTROL msg */
1042 	size_t			hdr_sz;		/* sizeof header */
1043 	uint8_t			*body_buf;	/* msg body buffer */
1044 	uint32_t		body_sz;	/* sizeof *body_buf */
1045 	ssize_t			i;		/* gen use */
1046 
1047 	/* populate an ETM control msg to send */
1048 
1049 	hdr_sz = sizeof (*ctl_hdrp);
1050 	body_sz = (1 + 1);		/* V1 byte plus null byte */
1051 
1052 	ctl_hdrp = fmd_hdl_zalloc(hdl, hdr_sz + body_sz, FMD_SLEEP);
1053 
1054 	ctl_hdrp->ctl_pp.pp_magic_num = htonl(ETM_PROTO_MAGIC_NUM);
1055 	ctl_hdrp->ctl_pp.pp_proto_ver = ETM_PROTO_V1;
1056 	ctl_hdrp->ctl_pp.pp_msg_type = ETM_MSG_TYPE_CONTROL;
1057 	ctl_hdrp->ctl_pp.pp_sub_type = ETM_CTL_SEL_VER_SET_REQ;
1058 	ctl_hdrp->ctl_pp.pp_rsvd_pad = 0;
1059 	etm_xid_ver_set = etm_xid_cur;
1060 	etm_xid_cur += ETM_XID_INC;
1061 	ctl_hdrp->ctl_pp.pp_xid = htonl(etm_xid_ver_set);
1062 	ctl_hdrp->ctl_pp.pp_timeout = htonl(ETM_PROTO_V1_TIMEOUT_FOREVER);
1063 	ctl_hdrp->ctl_len = htonl(body_sz);
1064 
1065 	body_buf = (void*)&ctl_hdrp->ctl_len;
1066 	body_buf += sizeof (ctl_hdrp->ctl_len);
1067 	*body_buf++ = ETM_PROTO_V1;
1068 	*body_buf++ = '\0';
1069 
1070 	/*
1071 	 * open and close a connection to send the ETM control msg
1072 	 * to any/all of the default dst addrs
1073 	 */
1074 
1075 	if ((addrv = etm_xport_get_ev_addrv(hdl, NULL)) == NULL) {
1076 		fmd_hdl_error(hdl,
1077 			"error: bad ctl dst addrs errno %d\n", errno);
1078 		etm_stats.etm_xport_get_ev_addrv_fail.fmds_value.ui64++;
1079 		goto func_ret;
1080 	}
1081 
1082 	for (i = 0; addrv[i] != NULL; i++) {
1083 
1084 		if (etm_conn_open(hdl, "bad conn open during ver negot",
1085 					addrv[i], &conn) < 0) {
1086 			continue;
1087 		}
1088 		if (etm_io_op(hdl, "bad io write on ctl hdr+body",
1089 					conn, ctl_hdrp, hdr_sz + body_sz,
1090 					ETM_IO_OP_WR) >= 0) {
1091 			etm_stats.etm_wr_hdr_control.fmds_value.ui64++;
1092 			etm_stats.etm_wr_body_control.fmds_value.ui64++;
1093 		}
1094 		(void) etm_conn_close(hdl, "bad conn close during ver negot",
1095 									conn);
1096 
1097 	} /* foreach dst addr */
1098 
1099 func_ret:
1100 
1101 	if (addrv != NULL) {
1102 		etm_xport_free_addrv(hdl, addrv);
1103 	}
1104 	fmd_hdl_free(hdl, ctl_hdrp, hdr_sz + body_sz);
1105 
1106 } /* etm_req_ver_negot() */
1107 
1108 /*
1109  * etm_handle_new_conn - receive an ETM message sent from the other end via
1110  *			the given open connection, pull out any FMA events
1111  *			and post them to the local FMD (or handle any ETM
1112  *			control or response msg); when done, close the
1113  *			connection
1114  */
1115 
1116 static void
1117 etm_handle_new_conn(fmd_hdl_t *hdl, etm_xport_conn_t conn)
1118 {
1119 	etm_proto_v1_ev_hdr_t	*ev_hdrp;	/* for FMA_EVENT msg */
1120 	etm_proto_v1_ctl_hdr_t	*ctl_hdrp;	/* for CONTROL msg */
1121 	etm_proto_v1_resp_hdr_t *resp_hdrp;	/* for RESPONSE msg */
1122 	size_t			hdr_sz;		/* sizeof header */
1123 	uint8_t			*body_buf;	/* msg body buffer */
1124 	uint32_t		body_sz;	/* sizeof body_buf */
1125 	uint8_t			*bp;		/* byte ptr within body_buf */
1126 	nvlist_t		*evp;		/* ptr to unpacked FMA event */
1127 	char			*class;		/* FMA event class */
1128 	ssize_t			i, n;		/* gen use */
1129 
1130 	fmd_hdl_debug(hdl, "info: handling new conn %p\n", conn);
1131 
1132 	ev_hdrp = NULL;
1133 	ctl_hdrp = NULL;
1134 	resp_hdrp = NULL;
1135 	body_buf = NULL;
1136 	class = NULL;
1137 	evp = NULL;
1138 
1139 	/* read a network decoded message header from the connection */
1140 
1141 	/*
1142 	 * Design_Note:	We rely on the fact that all message types have
1143 	 *		a common protocol preamble; if this fact should
1144 	 *		ever change it may break the code below. We also
1145 	 *		rely on the fact that FMA_EVENT and CONTROL headers
1146 	 *		returned will be sized large enough to reuse them
1147 	 *		as RESPONSE headers if the remote endpt asked
1148 	 *		for a response via the pp_timeout field.
1149 	 */
1150 
1151 	if ((ev_hdrp = etm_hdr_read(hdl, conn, &hdr_sz)) == NULL) {
1152 		/* errno assumed set by above call */
1153 		fmd_hdl_error(hdl, "error: FMA event dropped: "
1154 					"bad hdr read errno %d\n", errno);
1155 		etm_stats.etm_rd_drop_fmaevent.fmds_value.ui64++;
1156 		goto func_ret;
1157 	}
1158 
1159 	/*
1160 	 * handle the message based on its preamble pp_msg_type
1161 	 * which is known to be valid from etm_hdr_read() checks
1162 	 */
1163 
1164 	if (ev_hdrp->ev_pp.pp_msg_type == ETM_MSG_TYPE_FMA_EVENT) {
1165 
1166 		fmd_hdl_debug(hdl, "info: rcvd FMA_EVENT msg from xport\n");
1167 
1168 		/* allocate buf large enough for whole body / all FMA events */
1169 
1170 		body_sz = 0;
1171 		for (i = 0; ev_hdrp->ev_lens[i] != 0; i++) {
1172 			body_sz += ev_hdrp->ev_lens[i];
1173 		} /* for summing sizes of all FMA events */
1174 
1175 		if (etm_debug_lvl >= 1) {
1176 			fmd_hdl_debug(hdl, "info: event lengths %d sum %d\n",
1177 								i, body_sz);
1178 		}
1179 
1180 		body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP);
1181 
1182 		/* read all the FMA events at once */
1183 
1184 		if ((n = etm_io_op(hdl, "FMA event dropped: "
1185 					"bad io read on event bodies",
1186 					conn, body_buf, body_sz,
1187 					ETM_IO_OP_RD)) < 0) {
1188 			etm_stats.etm_rd_drop_fmaevent.fmds_value.ui64++;
1189 			goto func_ret;
1190 		}
1191 
1192 		etm_stats.etm_rd_xport_bytes.fmds_value.ui64 += body_sz;
1193 
1194 		/* immediately close the connection to improve xport thruput */
1195 
1196 		(void) etm_conn_close(hdl, "bad conn close "
1197 					"after event body read", conn);
1198 		conn = NULL;
1199 
1200 		/* unpack each FMA event and post it to FMD */
1201 
1202 		bp = body_buf;
1203 		for (i = 0; ev_hdrp->ev_lens[i] != 0; i++) {
1204 			if ((n = nvlist_unpack((char *)bp,
1205 					ev_hdrp->ev_lens[i], &evp, 0)) != 0) {
1206 				fmd_hdl_error(hdl, "error: FMA event dropped: "
1207 						"bad event body unpack "
1208 						"errno %d\n", n);
1209 				if (etm_debug_lvl >= 2) {
1210 					fmd_hdl_debug(hdl, "info: FMA event "
1211 						"hexdump %d bytes:\n",
1212 						ev_hdrp->ev_lens[i]);
1213 					etm_hexdump(hdl, bp,
1214 						ev_hdrp->ev_lens[i]);
1215 				}
1216 				etm_stats.etm_os_nvlist_unpack_fail.fmds_value.
1217 					ui64++;
1218 				etm_stats.etm_rd_drop_fmaevent.fmds_value.
1219 					ui64++;
1220 				bp += ev_hdrp->ev_lens[i];
1221 				continue;
1222 			}
1223 			etm_stats.etm_rd_body_fmaevent.fmds_value.ui64++;
1224 			if (etm_debug_lvl >= 1) {
1225 				(void) nvlist_lookup_string(evp, FM_CLASS,
1226 								&class);
1227 				if (class == NULL) {
1228 					class = "NULL";
1229 				}
1230 				fmd_hdl_debug(hdl, "info: FMA event %p "
1231 						"class %s\n", evp, class);
1232 			}
1233 			(void) etm_post_to_fmd(hdl, evp);
1234 			nvlist_free(evp);
1235 			bp += ev_hdrp->ev_lens[i];
1236 		} /* foreach FMA event in the body buffer */
1237 
1238 	} else if (ev_hdrp->ev_pp.pp_msg_type == ETM_MSG_TYPE_CONTROL) {
1239 
1240 		ctl_hdrp = (void*)ev_hdrp;
1241 
1242 		fmd_hdl_debug(hdl, "info: rcvd CONTROL msg from xport\n");
1243 		if (etm_debug_lvl >= 1) {
1244 			fmd_hdl_debug(hdl, "info: ctl sel %d xid 0x%x\n",
1245 					(int)ctl_hdrp->ctl_pp.pp_sub_type,
1246 					ctl_hdrp->ctl_pp.pp_xid);
1247 		}
1248 
1249 		/*
1250 		 * if we have a VER_SET_REQ read the body and validate
1251 		 * the protocol version set contained therein,
1252 		 * otherwise we have a PING_REQ (which has no body)
1253 		 * and we [also] fall thru to the code which sends a
1254 		 * response msg if the pp_timeout field requested one
1255 		 */
1256 
1257 		if (ctl_hdrp->ctl_pp.pp_sub_type == ETM_CTL_SEL_VER_SET_REQ) {
1258 
1259 			body_sz = ctl_hdrp->ctl_len;
1260 			body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP);
1261 
1262 			if ((n = etm_io_op(hdl, "bad io read on ctl body",
1263 						conn, body_buf, body_sz,
1264 						ETM_IO_OP_RD)) < 0) {
1265 				goto func_ret;
1266 			}
1267 
1268 			/* complain if version set lacks our version */
1269 
1270 			n = 0;
1271 			for (i = 0; i < body_sz; i++) {
1272 				if (body_buf[i] == ETM_PROTO_V1) {
1273 					n = 1;
1274 					break;
1275 				}
1276 			}
1277 			if (n == 0) {
1278 				etm_stats.etm_ver_bad.fmds_value.ui64++;
1279 			}
1280 
1281 		} /* if got version set request */
1282 
1283 		etm_stats.etm_rd_body_control.fmds_value.ui64++;
1284 
1285 		/* if a response is requested send one (reuse received hdr) */
1286 
1287 		if (ctl_hdrp->ctl_pp.pp_timeout != ETM_PROTO_V1_TIMEOUT_NONE) {
1288 			resp_hdrp = (void*)ctl_hdrp;
1289 			resp_hdrp->resp_len = 0;
1290 			if (ctl_hdrp->ctl_pp.pp_sub_type ==
1291 						ETM_CTL_SEL_VER_SET_REQ) {
1292 				resp_hdrp->resp_len = 1;
1293 			}
1294 			resp_hdrp->resp_code = 0;
1295 			resp_hdrp->resp_pp.pp_timeout =
1296 						ETM_PROTO_V1_TIMEOUT_NONE;
1297 			resp_hdrp->resp_pp.pp_msg_type = ETM_MSG_TYPE_RESPONSE;
1298 			if ((n = etm_io_op(hdl, "bad io write on resp hdr",
1299 						conn, resp_hdrp,
1300 						sizeof (*resp_hdrp),
1301 						ETM_IO_OP_WR)) < 0) {
1302 				goto func_ret;
1303 			}
1304 			etm_stats.etm_wr_hdr_response.fmds_value.ui64++;
1305 			if (resp_hdrp->resp_pp.pp_sub_type ==
1306 						ETM_CTL_SEL_VER_SET_REQ) {
1307 				/* send our default proto ver in resp body */
1308 				bp = (void*)&i;
1309 				*bp = ETM_PROTO_V1;
1310 				if ((n = etm_io_op(hdl,
1311 						"bad io write on resp body",
1312 						conn, bp, 1,
1313 						ETM_IO_OP_WR)) < 0) {
1314 					goto func_ret;
1315 				}
1316 			} /* if need to send proto ver in [tmp] msg body */
1317 			etm_stats.etm_wr_body_response.fmds_value.ui64++;
1318 			fmd_hdl_debug(hdl, "info: response sent "
1319 					"xid 0x%x code %d body len %d\n",
1320 					resp_hdrp->resp_pp.pp_xid,
1321 					resp_hdrp->resp_code,
1322 					resp_hdrp->resp_len);
1323 		} /* if a response was requested */
1324 
1325 	} else if (ev_hdrp->ev_pp.pp_msg_type == ETM_MSG_TYPE_RESPONSE) {
1326 
1327 		resp_hdrp = (void*)ev_hdrp;
1328 
1329 		fmd_hdl_debug(hdl, "info: rcvd RESPONSE msg from xport\n");
1330 		if (etm_debug_lvl >= 1) {
1331 			fmd_hdl_debug(hdl, "info: resp xid 0x%x\n",
1332 					(int)resp_hdrp->resp_pp.pp_xid);
1333 		}
1334 
1335 		body_sz = resp_hdrp->resp_len;
1336 		body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP);
1337 
1338 		if ((n = etm_io_op(hdl, "bad io read on resp len",
1339 				conn, body_buf, body_sz, ETM_IO_OP_RD)) < 0) {
1340 			goto func_ret;
1341 		}
1342 
1343 		etm_stats.etm_rd_body_response.fmds_value.ui64++;
1344 
1345 		/*
1346 		 * look up the xid to interpret response body
1347 		 *
1348 		 * ping is a nop; for ver set just confirm ETM_PROTO_V1
1349 		 * was negotiated
1350 		 */
1351 
1352 		if ((resp_hdrp->resp_pp.pp_xid != etm_xid_ping) &&
1353 			(resp_hdrp->resp_pp.pp_xid != etm_xid_ver_set)) {
1354 			etm_stats.etm_xid_bad.fmds_value.ui64++;
1355 			goto func_ret;
1356 		}
1357 
1358 		if (resp_hdrp->resp_pp.pp_xid == etm_xid_ver_set) {
1359 			if (body_buf[0] != ETM_PROTO_V1) {
1360 				etm_stats.etm_ver_bad.fmds_value.ui64++;
1361 				goto func_ret;
1362 			}
1363 		} /* if have resp to last req to set proto ver */
1364 
1365 	} /* whether we have a FMA_EVENT, CONTROL, or RESPONSE msg */
1366 
1367 func_ret:
1368 
1369 	if (conn != NULL) {
1370 		(void) etm_conn_close(hdl, "bad conn close after event recv",
1371 									conn);
1372 	}
1373 	if (ev_hdrp != NULL) {
1374 		fmd_hdl_free(hdl, ev_hdrp, hdr_sz);
1375 	}
1376 	if (body_buf != NULL) {
1377 		fmd_hdl_free(hdl, body_buf, body_sz);
1378 	}
1379 } /* etm_handle_new_conn() */
1380 
1381 /*
1382  * etm_server - loop forever accepting new connections
1383  *		using the given FMD handle,
1384  *		handling any ETM msgs sent from the other side
1385  *		via each such connection
1386  */
1387 
1388 static void
1389 etm_server(void *arg)
1390 {
1391 	etm_xport_conn_t	conn;		/* connection handle */
1392 	ssize_t			n;		/* gen use */
1393 	fmd_hdl_t		*hdl;		/* FMD handle */
1394 
1395 	hdl = arg;
1396 
1397 	fmd_hdl_debug(hdl, "info: connection server starting\n");
1398 
1399 	while (!etm_is_dying) {
1400 		if ((conn = etm_xport_accept(hdl, NULL)) == NULL) {
1401 			/* errno assumed set by above call */
1402 			n = errno;
1403 			if (etm_is_dying) {
1404 				break;
1405 			}
1406 			fmd_hdl_debug(hdl,
1407 				"error: bad conn accept errno %d\n", n);
1408 			etm_stats.etm_xport_accept_fail.fmds_value.ui64++;
1409 			/* avoid spinning CPU */
1410 			(void) etm_sleep(ETM_SLEEP_SLOW);
1411 			continue;
1412 		}
1413 
1414 		/*
1415 		 * Design_Note: etm_handle_new_conn() will close the
1416 		 *		accepted connection when done. In early designs
1417 		 *		etm_handle_new_conn() was spawned as a
1418 		 *		separate thread via pthread_create();
1419 		 *		however fmd_thr_create() constrains thread
1420 		 *		creation to prevent spawned threads from
1421 		 *		spawning others (ie, no grandchildren).
1422 		 *		Hence etm_handle_new_conn() is now called
1423 		 *		as a simple function [w/ multiple args].
1424 		 */
1425 
1426 		etm_handle_new_conn(hdl, conn);
1427 
1428 	} /* while accepting new connections until ETM dies */
1429 
1430 	/* ETM is dying (probably due to "fmadm unload etm") */
1431 
1432 	if (etm_debug_lvl >= 1) {
1433 		fmd_hdl_debug(hdl, "info: connection server is dying\n");
1434 	}
1435 } /* etm_server() */
1436 
1437 /*
1438  * -------------------------- FMD entry points -------------------------------
1439  */
1440 
1441 /*
1442  * _fmd_init - initialize the transport for use by ETM and start the
1443  *		server daemon to accept new connections to us
1444  *
1445  *		FMD will read our *.conf and subscribe us to FMA events
1446  */
1447 
1448 void
1449 _fmd_init(fmd_hdl_t *hdl)
1450 {
1451 	ssize_t			n;		/* gen use */
1452 
1453 	if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) {
1454 		return; /* invalid data in configuration file */
1455 	}
1456 
1457 	fmd_hdl_debug(hdl, "info: module initializing\n");
1458 
1459 	/* setup statistics and properties from FMD */
1460 
1461 	(void) fmd_stat_create(hdl, FMD_STAT_NOALLOC,
1462 				sizeof (etm_stats) / sizeof (fmd_stat_t),
1463 				(fmd_stat_t *)&etm_stats);
1464 
1465 	etm_debug_lvl = fmd_prop_get_int32(hdl, ETM_PROP_NM_DEBUG_LVL);
1466 	etm_debug_max_ev_cnt = fmd_prop_get_int32(hdl,
1467 						ETM_PROP_NM_DEBUG_MAX_EV_CNT);
1468 	fmd_hdl_debug(hdl, "info: etm_debug_lvl %d "
1469 			"etm_debug_max_ev_cnt %d\n",
1470 			etm_debug_lvl, etm_debug_max_ev_cnt);
1471 
1472 	/*
1473 	 * init the transport,
1474 	 * start the connection acceptance server, and
1475 	 * request protocol version be negotiated
1476 	 */
1477 
1478 	if ((n = etm_xport_init(hdl)) != 0) {
1479 		fmd_hdl_error(hdl, "error: bad xport init errno %d\n", (-n));
1480 		fmd_hdl_unregister(hdl);
1481 		return;
1482 	}
1483 
1484 	etm_svr_tid = fmd_thr_create(hdl, etm_server, hdl);
1485 	etm_req_ver_negot(hdl);
1486 
1487 	fmd_hdl_debug(hdl, "info: module initialized ok\n");
1488 
1489 } /* _fmd_init() */
1490 
1491 /*
1492  * etm_recv - receive an FMA event from FMD and transport it
1493  *		to the remote endpoint
1494  */
1495 
1496 /*ARGSUSED*/
1497 void
1498 etm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *evp, const char *class)
1499 {
1500 	etm_xport_addr_t	*addrv;	/* vector of transport addresses */
1501 	etm_xport_conn_t	conn;	/* connection handle */
1502 	etm_proto_v1_ev_hdr_t	*hdrp;	/* for FMA_EVENT msg */
1503 	ssize_t			i, n;	/* gen use */
1504 	size_t			sz;	/* header size */
1505 	size_t			buflen;	/* size of packed FMA event */
1506 	uint8_t			*buf;	/* tmp buffer for packed FMA event */
1507 
1508 	buflen = 0;
1509 	(void) nvlist_size(evp, &buflen, NV_ENCODE_XDR);
1510 	etm_stats.etm_rd_fmd_bytes.fmds_value.ui64 += buflen;
1511 	etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64++;
1512 
1513 	fmd_hdl_debug(hdl, "info: rcvd event %p from FMD\n", evp);
1514 	fmd_hdl_debug(hdl, "info: cnt %llu class %s\n",
1515 		etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64, class);
1516 
1517 	/*
1518 	 * if the debug limit has been set, avoid excessive traffic,
1519 	 * for example, an infinite cycle using loopback nodes
1520 	 */
1521 
1522 	if ((etm_debug_max_ev_cnt >= 0) &&
1523 		(etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64 >
1524 						etm_debug_max_ev_cnt)) {
1525 		fmd_hdl_debug(hdl, "warning: FMA event dropped: "
1526 			"event %p cnt %llu > debug max %d\n", evp,
1527 			etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64,
1528 			etm_debug_max_ev_cnt);
1529 		etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++;
1530 		return;
1531 	}
1532 
1533 	/* allocate a buffer for the FMA event and nvlist pack it */
1534 
1535 	buf = fmd_hdl_zalloc(hdl, buflen, FMD_SLEEP);
1536 
1537 	if ((n = nvlist_pack(evp, (char **)&buf, &buflen,
1538 					NV_ENCODE_XDR, 0)) != 0) {
1539 		fmd_hdl_error(hdl, "error: FMA event dropped: "
1540 				"event pack errno %d\n", n);
1541 		etm_stats.etm_os_nvlist_pack_fail.fmds_value.ui64++;
1542 		etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++;
1543 		fmd_hdl_free(hdl, buf, buflen);
1544 		return;
1545 	}
1546 
1547 	/* get vector of dst addrs and send the FMA event to each one */
1548 
1549 	if ((addrv = etm_xport_get_ev_addrv(hdl, evp)) == NULL) {
1550 		fmd_hdl_error(hdl, "error: FMA event dropped: "
1551 				"bad event dst addrs errno %d\n", errno);
1552 		etm_stats.etm_xport_get_ev_addrv_fail.fmds_value.ui64++;
1553 		etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++;
1554 		fmd_hdl_free(hdl, buf, buflen);
1555 		return;
1556 	}
1557 
1558 	for (i = 0; addrv[i] != NULL; i++) {
1559 
1560 		/* open a new connection to this dst addr */
1561 
1562 		if ((n = etm_conn_open(hdl, "FMA event dropped: "
1563 				"bad conn open on new ev",
1564 				addrv[i], &conn)) < 0) {
1565 			etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++;
1566 			continue;
1567 		}
1568 
1569 		/* write the ETM message header */
1570 
1571 		if ((hdrp = etm_hdr_write(hdl, conn, evp, NV_ENCODE_XDR,
1572 							&sz)) == NULL) {
1573 			fmd_hdl_error(hdl, "error: FMA event dropped: "
1574 					"bad hdr write errno %d\n", errno);
1575 			(void) etm_conn_close(hdl,
1576 				"bad conn close per bad hdr wr", conn);
1577 			etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++;
1578 			continue;
1579 		}
1580 
1581 		fmd_hdl_free(hdl, hdrp, sz);	/* header not needed */
1582 		etm_stats.etm_wr_hdr_fmaevent.fmds_value.ui64++;
1583 		fmd_hdl_debug(hdl, "info: hdr xport write ok for event %p\n",
1584 								evp);
1585 
1586 		/* write the ETM message body, ie, the packed nvlist */
1587 
1588 		if ((n = etm_io_op(hdl, "FMA event dropped: "
1589 					"bad io write on event", conn,
1590 					buf, buflen, ETM_IO_OP_WR)) < 0) {
1591 			(void) etm_conn_close(hdl,
1592 				"bad conn close per bad body wr", conn);
1593 			etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++;
1594 			continue;
1595 		}
1596 
1597 		etm_stats.etm_wr_body_fmaevent.fmds_value.ui64++;
1598 		etm_stats.etm_wr_xport_bytes.fmds_value.ui64 += buflen;
1599 		fmd_hdl_debug(hdl, "info: body xport write ok for event %p\n",
1600 								evp);
1601 
1602 		/* close the connection */
1603 
1604 		(void) etm_conn_close(hdl, "bad conn close after event send",
1605 									conn);
1606 	} /* foreach dst addr in the vector */
1607 
1608 	etm_xport_free_addrv(hdl, addrv);
1609 	fmd_hdl_free(hdl, buf, buflen);
1610 
1611 } /* etm_recv() */
1612 
1613 /*
1614  * _fmd_fini - stop the server daemon and teardown the transport
1615  */
1616 
1617 void
1618 _fmd_fini(fmd_hdl_t *hdl)
1619 {
1620 	ssize_t	n;	/* gen use */
1621 
1622 	fmd_hdl_debug(hdl, "info: module finializing\n");
1623 
1624 	/* kill the connection server ; wait for it to die */
1625 
1626 	etm_is_dying = 1;
1627 
1628 	if (etm_svr_tid != NULL) {
1629 		fmd_thr_signal(hdl, etm_svr_tid);
1630 		fmd_thr_destroy(hdl, etm_svr_tid);
1631 		etm_svr_tid = NULL;
1632 	} /* if server thread was successfully created */
1633 
1634 	/* teardown the transport */
1635 
1636 	if ((n = etm_xport_fini(hdl)) != 0) {
1637 		fmd_hdl_error(hdl, "warning: xport fini errno %d\n", (-n));
1638 	}
1639 
1640 	fmd_hdl_debug(hdl, "info: module finalized ok\n");
1641 
1642 } /* _fmd_fini() */
1643