xref: /titanic_52/usr/src/cmd/fm/modules/sun4v/etm/etm.c (revision 4c1177a46d4d850e30806d4e27d635527bba8e90)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * etm.c	FMA Event Transport Module implementation, a plugin of FMD
29  *		for sun4v/Ontario
30  *
31  * plugin for sending/receiving FMA events to/from service processor
32  */
33 
34 /*
35  * --------------------------------- includes --------------------------------
36  */
37 
38 #include <sys/fm/protocol.h>
39 #include <sys/fm/util.h>
40 #include <sys/fm/ldom.h>
41 #include <sys/strlog.h>
42 #include <sys/syslog.h>
43 #include <sys/libds.h>
44 #include <netinet/in.h>
45 #include <fm/fmd_api.h>
46 
47 #include "etm_xport_api.h"
48 #include "etm_etm_proto.h"
49 #include "etm_impl.h"
50 #include "etm_iosvc.h"
51 #include "etm_filter.h"
52 #include "etm_ckpt.h"
53 
54 #include <pthread.h>
55 #include <signal.h>
56 #include <stropts.h>
57 #include <locale.h>
58 #include <strings.h>
59 #include <stdlib.h>
60 #include <unistd.h>
61 #include <limits.h>
62 #include <values.h>
63 #include <alloca.h>
64 #include <errno.h>
65 #include <dlfcn.h>
66 #include <link.h>
67 #include <fcntl.h>
68 #include <time.h>
69 
70 /*
71  * ----------------------------- forward decls -------------------------------
72  */
73 
74 static void
75 etm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class);
76 
77 static int
78 etm_send(fmd_hdl_t *hdl, fmd_xprt_t *xp, fmd_event_t *event, nvlist_t *nvl);
79 
80 static void
81 etm_send_to_remote_root(void *arg);
82 
83 static void
84 etm_recv_from_remote_root(void *arg);
85 
86 static void
87 etm_ckpt_remove(fmd_hdl_t *hdl, etm_iosvc_q_ele_t *ele);
88 
89 /*
90  * ------------------------- data structs for FMD ----------------------------
91  */
92 
93 static const fmd_hdl_ops_t fmd_ops = {
94 	etm_recv,	/* fmdo_recv */
95 	NULL,		/* fmdo_timeout */
96 	NULL,		/* fmdo_close */
97 	NULL,		/* fmdo_stats */
98 	NULL,		/* fmdo_gc */
99 	etm_send,	/* fmdo_send */
100 };
101 
102 static const fmd_prop_t fmd_props[] = {
103 	{ ETM_PROP_NM_XPORT_ADDRS,		FMD_TYPE_STRING, "" },
104 	{ ETM_PROP_NM_DEBUG_LVL,		FMD_TYPE_INT32, "0" },
105 	{ ETM_PROP_NM_DEBUG_MAX_EV_CNT,		FMD_TYPE_INT32, "-1" },
106 	{ ETM_PROP_NM_CONSOLE,			FMD_TYPE_BOOL, "false" },
107 	{ ETM_PROP_NM_SYSLOGD,			FMD_TYPE_BOOL, "true" },
108 	{ ETM_PROP_NM_FACILITY,			FMD_TYPE_STRING, "LOG_DAEMON" },
109 	{ ETM_PROP_NM_MAX_RESP_Q_LEN,		FMD_TYPE_UINT32, "32" },
110 	{ ETM_PROP_NM_BAD_ACC_TO_SEC,		FMD_TYPE_UINT32, "1" },
111 	{ ETM_PROP_NM_FMA_RESP_WAIT_TIME,	FMD_TYPE_INT32, "240" },
112 	{ NULL, 0, NULL }
113 };
114 
115 
116 static const fmd_hdl_info_t fmd_info = {
117 	"FMA Event Transport Module", "1.2", &fmd_ops, fmd_props
118 };
119 
120 /*
121  * ----------------------- private consts and defns --------------------------
122  */
123 
124 /* misc buffer for variable sized protocol header fields */
125 
126 #define	ETM_MISC_BUF_SZ	(4 * 1024)
127 
128 static uint32_t
129 etm_ldom_type = LDOM_TYPE_LEGACY;
130 
131 /* try limit for IO operations w/ capped exp backoff sleep on retry */
132 
133 /*
134  * Design_Note:	ETM will potentially retry forever IO operations that the
135  *		transport fails with EAGAIN (aka EWOULDBLOCK) rather than
136  *		giving up after some number of seconds. This avoids
137  *		dropping FMA events while the service processor is down,
138  *		but at the risk of pending fmdo_recv() forever and
139  *		overflowing FMD's event queue for ETM.
140  *		A future TBD enhancement would be to always recv
141  *		and send each ETM msg in a single read/write() to reduce
142  *		the risk of failure between ETM msg hdr and body,
143  *		assuming the MTU_SZ is large enough.
144  */
145 
146 #define	ETM_TRY_MAX_CNT		(MAXINT - 1)
147 #define	ETM_TRY_BACKOFF_RATE	(4)
148 #define	ETM_TRY_BACKOFF_CAP	(60)
149 
150 /* amount to increment protocol transaction id on each new send */
151 
152 #define	ETM_XID_INC		(2)
153 
154 typedef struct etm_resp_q_ele {
155 
156 	etm_xport_conn_t	rqe_conn;	/* open connection to send on */
157 	etm_proto_v1_pp_t	*rqe_hdrp;	/* ptr to ETM msg hdr */
158 	size_t			rqe_hdr_sz;	/* sizeof ETM msg hdr */
159 	int32_t			rqe_resp_code;	/* response code to send */
160 
161 	struct etm_resp_q_ele	*rqe_nextp;	/* PRIVATE - next ele ptr */
162 
163 } etm_resp_q_ele_t;	/* responder queue element */
164 
165 /*
166  * ---------------------------- global data ----------------------------------
167  */
168 
169 static fmd_hdl_t
170 *init_hdl = NULL;	/* used in mem allocator and several other places */
171 
172 static int
173 etm_debug_lvl = 0;	/* debug level: 0 is off, 1 is on, 2 is more, etc */
174 
175 static int
176 etm_debug_max_ev_cnt = -1; /* max allowed event count for debugging */
177 
178 static fmd_xprt_t
179 *etm_fmd_xprt = NULL;	/* FMD transport layer handle */
180 
181 static pthread_t
182 etm_svr_tid = NULL;	/* thread id of connection acceptance server */
183 
184 static pthread_t
185 etm_resp_tid = NULL;	/* thread id of msg responder */
186 
187 static etm_resp_q_ele_t
188 *etm_resp_q_head = NULL; /* ptr to cur head of responder queue */
189 
190 static etm_resp_q_ele_t
191 *etm_resp_q_tail = NULL; /* ptr to cur tail of responder queue */
192 
193 static uint32_t
194 etm_resp_q_cur_len = 0;	/* cur length (ele cnt) of responder queue */
195 
196 static uint32_t
197 etm_resp_q_max_len = 0;	/* max length (ele cnt) of responder queue */
198 
199 static uint32_t
200 etm_bad_acc_to_sec = 0;	/* sleep timeout (in sec) after bad conn accept */
201 
202 static pthread_mutex_t
203 etm_resp_q_lock = PTHREAD_MUTEX_INITIALIZER;	/* protects responder queue */
204 
205 static pthread_cond_t
206 etm_resp_q_cv = PTHREAD_COND_INITIALIZER;	/* nudges msg responder */
207 
208 static volatile int
209 etm_is_dying = 0;	/* bool for dying (killing self) */
210 
211 static uint32_t
212 etm_xid_cur = 0;	/* current transaction id for sends */
213 
214 static uint32_t
215 etm_xid_ping = 0;	/* xid of last CONTROL msg sent requesting ping */
216 
217 static uint32_t
218 etm_xid_ver_negot = 0;	/* xid of last CONTROL msg sent requesting ver negot */
219 
220 static uint32_t
221 etm_xid_posted_logged_ev = 0;
222 			/* xid of last FMA_EVENT msg/event posted OK to FMD */
223 
224 static uint32_t
225 etm_xid_posted_sa = 0;	/* xid of last ALERT msg/event posted OK to syslog */
226 
227 static uint8_t
228 etm_resp_ver = ETM_PROTO_V1; /* proto ver [negotiated] for msg sends */
229 
230 static uint32_t
231 etm_fma_resp_wait_time = 30;	/*  time (sec) wait for fma event resp */
232 
233 static pthread_mutex_t
234 etm_write_lock = PTHREAD_MUTEX_INITIALIZER;	/* for write operations */
235 
236 static log_ctl_t syslog_ctl;	/* log(7D) meta-data for each msg */
237 static int syslog_facility;	/* log(7D) facility (part of priority) */
238 static int syslog_logfd = -1;	/* log(7D) file descriptor */
239 static int syslog_msgfd = -1;	/* sysmsg(7D) file descriptor */
240 static int syslog_file = 0;	/* log to syslog_logfd */
241 static int syslog_cons = 0;	/* log to syslog_msgfd */
242 
243 static const struct facility {
244 	const char *fac_name;
245 	int fac_value;
246 } syslog_facs[] = {
247 	{ "LOG_DAEMON", LOG_DAEMON },
248 	{ "LOG_LOCAL0", LOG_LOCAL0 },
249 	{ "LOG_LOCAL1", LOG_LOCAL1 },
250 	{ "LOG_LOCAL2", LOG_LOCAL2 },
251 	{ "LOG_LOCAL3", LOG_LOCAL3 },
252 	{ "LOG_LOCAL4", LOG_LOCAL4 },
253 	{ "LOG_LOCAL5", LOG_LOCAL5 },
254 	{ "LOG_LOCAL6", LOG_LOCAL6 },
255 	{ "LOG_LOCAL7", LOG_LOCAL7 },
256 	{ NULL, 0 }
257 };
258 
259 static struct stats {
260 
261 	/* ETM msg counters */
262 
263 	fmd_stat_t etm_rd_hdr_fmaevent;
264 	fmd_stat_t etm_rd_hdr_control;
265 	fmd_stat_t etm_rd_hdr_alert;
266 	fmd_stat_t etm_rd_hdr_response;
267 	fmd_stat_t etm_rd_body_fmaevent;
268 	fmd_stat_t etm_rd_body_control;
269 	fmd_stat_t etm_rd_body_alert;
270 	fmd_stat_t etm_rd_body_response;
271 	fmd_stat_t etm_wr_hdr_fmaevent;
272 	fmd_stat_t etm_wr_hdr_control;
273 	fmd_stat_t etm_wr_hdr_response;
274 	fmd_stat_t etm_wr_body_fmaevent;
275 	fmd_stat_t etm_wr_body_control;
276 	fmd_stat_t etm_wr_body_response;
277 
278 	fmd_stat_t etm_rd_max_ev_per_msg;
279 	fmd_stat_t etm_wr_max_ev_per_msg;
280 
281 	fmd_stat_t etm_resp_q_cur_len;
282 	fmd_stat_t etm_resp_q_max_len;
283 
284 	/* ETM byte counters */
285 
286 	fmd_stat_t etm_wr_fmd_bytes;
287 	fmd_stat_t etm_rd_fmd_bytes;
288 	fmd_stat_t etm_wr_xport_bytes;
289 	fmd_stat_t etm_rd_xport_bytes;
290 
291 	fmd_stat_t etm_magic_drop_bytes;
292 
293 	/* ETM [dropped] FMA event counters */
294 
295 	fmd_stat_t etm_rd_fmd_fmaevent;
296 	fmd_stat_t etm_wr_fmd_fmaevent;
297 
298 	fmd_stat_t etm_rd_drop_fmaevent;
299 	fmd_stat_t etm_wr_drop_fmaevent;
300 
301 	fmd_stat_t etm_rd_dup_fmaevent;
302 	fmd_stat_t etm_wr_dup_fmaevent;
303 
304 	fmd_stat_t etm_rd_dup_alert;
305 	fmd_stat_t etm_wr_dup_alert;
306 
307 	fmd_stat_t etm_enq_drop_resp_q;
308 	fmd_stat_t etm_deq_drop_resp_q;
309 
310 	/* ETM protocol failures */
311 
312 	fmd_stat_t etm_magic_bad;
313 	fmd_stat_t etm_ver_bad;
314 	fmd_stat_t etm_msgtype_bad;
315 	fmd_stat_t etm_subtype_bad;
316 	fmd_stat_t etm_xid_bad;
317 	fmd_stat_t etm_fmaeventlen_bad;
318 	fmd_stat_t etm_respcode_bad;
319 	fmd_stat_t etm_timeout_bad;
320 	fmd_stat_t etm_evlens_bad;
321 
322 	/* IO operation failures */
323 
324 	fmd_stat_t etm_xport_wr_fail;
325 	fmd_stat_t etm_xport_rd_fail;
326 	fmd_stat_t etm_xport_pk_fail;
327 
328 	/* IO operation retries */
329 
330 	fmd_stat_t etm_xport_wr_retry;
331 	fmd_stat_t etm_xport_rd_retry;
332 	fmd_stat_t etm_xport_pk_retry;
333 
334 	/* system and library failures */
335 
336 	fmd_stat_t etm_os_nvlist_pack_fail;
337 	fmd_stat_t etm_os_nvlist_unpack_fail;
338 	fmd_stat_t etm_os_nvlist_size_fail;
339 	fmd_stat_t etm_os_pthread_create_fail;
340 
341 	/* xport API failures */
342 
343 	fmd_stat_t etm_xport_get_ev_addrv_fail;
344 	fmd_stat_t etm_xport_open_fail;
345 	fmd_stat_t etm_xport_close_fail;
346 	fmd_stat_t etm_xport_accept_fail;
347 	fmd_stat_t etm_xport_open_retry;
348 
349 	/* FMD entry point bad arguments */
350 
351 	fmd_stat_t etm_fmd_init_badargs;
352 	fmd_stat_t etm_fmd_fini_badargs;
353 
354 	/* Alert logging errors */
355 
356 	fmd_stat_t etm_log_err;
357 	fmd_stat_t etm_msg_err;
358 
359 	/* miscellaneous stats */
360 
361 	fmd_stat_t etm_reset_xport;
362 
363 } etm_stats = {
364 
365 	/* ETM msg counters */
366 
367 	{ "etm_rd_hdr_fmaevent", FMD_TYPE_UINT64,
368 		"ETM fmaevent msg headers rcvd from xport" },
369 	{ "etm_rd_hdr_control", FMD_TYPE_UINT64,
370 		"ETM control msg headers rcvd from xport" },
371 	{ "etm_rd_hdr_alert", FMD_TYPE_UINT64,
372 		"ETM alert msg headers rcvd from xport" },
373 	{ "etm_rd_hdr_response", FMD_TYPE_UINT64,
374 		"ETM response msg headers rcvd from xport" },
375 	{ "etm_rd_body_fmaevent", FMD_TYPE_UINT64,
376 		"ETM fmaevent msg bodies rcvd from xport" },
377 	{ "etm_rd_body_control", FMD_TYPE_UINT64,
378 		"ETM control msg bodies rcvd from xport" },
379 	{ "etm_rd_body_alert", FMD_TYPE_UINT64,
380 		"ETM alert msg bodies rcvd from xport" },
381 	{ "etm_rd_body_response", FMD_TYPE_UINT64,
382 		"ETM response msg bodies rcvd from xport" },
383 	{ "etm_wr_hdr_fmaevent", FMD_TYPE_UINT64,
384 		"ETM fmaevent msg headers sent to xport" },
385 	{ "etm_wr_hdr_control", FMD_TYPE_UINT64,
386 		"ETM control msg headers sent to xport" },
387 	{ "etm_wr_hdr_response", FMD_TYPE_UINT64,
388 		"ETM response msg headers sent to xport" },
389 	{ "etm_wr_body_fmaevent", FMD_TYPE_UINT64,
390 		"ETM fmaevent msg bodies sent to xport" },
391 	{ "etm_wr_body_control", FMD_TYPE_UINT64,
392 		"ETM control msg bodies sent to xport" },
393 	{ "etm_wr_body_response", FMD_TYPE_UINT64,
394 		"ETM response msg bodies sent to xport" },
395 
396 	{ "etm_rd_max_ev_per_msg", FMD_TYPE_UINT64,
397 		"max FMA events per ETM msg from xport" },
398 	{ "etm_wr_max_ev_per_msg", FMD_TYPE_UINT64,
399 		"max FMA events per ETM msg to xport" },
400 
401 	{ "etm_resp_q_cur_len", FMD_TYPE_UINT64,
402 		"cur enqueued response msgs to xport" },
403 	{ "etm_resp_q_max_len", FMD_TYPE_UINT64,
404 		"max enqueable response msgs to xport" },
405 
406 	/* ETM byte counters */
407 
408 	{ "etm_wr_fmd_bytes", FMD_TYPE_UINT64,
409 		"bytes of FMA events sent to FMD" },
410 	{ "etm_rd_fmd_bytes", FMD_TYPE_UINT64,
411 		"bytes of FMA events rcvd from FMD" },
412 	{ "etm_wr_xport_bytes", FMD_TYPE_UINT64,
413 		"bytes of FMA events sent to xport" },
414 	{ "etm_rd_xport_bytes", FMD_TYPE_UINT64,
415 		"bytes of FMA events rcvd from xport" },
416 
417 	{ "etm_magic_drop_bytes", FMD_TYPE_UINT64,
418 		"bytes dropped from xport pre magic num" },
419 
420 	/* ETM [dropped] FMA event counters */
421 
422 	{ "etm_rd_fmd_fmaevent", FMD_TYPE_UINT64,
423 		"FMA events rcvd from FMD" },
424 	{ "etm_wr_fmd_fmaevent", FMD_TYPE_UINT64,
425 		"FMA events sent to FMD" },
426 
427 	{ "etm_rd_drop_fmaevent", FMD_TYPE_UINT64,
428 		"dropped FMA events from xport" },
429 	{ "etm_wr_drop_fmaevent", FMD_TYPE_UINT64,
430 		"dropped FMA events to xport" },
431 
432 	{ "etm_rd_dup_fmaevent", FMD_TYPE_UINT64,
433 	    "duplicate FMA events rcvd from xport" },
434 	{ "etm_wr_dup_fmaevent", FMD_TYPE_UINT64,
435 	    "duplicate FMA events sent to xport" },
436 
437 	{ "etm_rd_dup_alert", FMD_TYPE_UINT64,
438 	    "duplicate ALERTs rcvd from xport" },
439 	{ "etm_wr_dup_alert", FMD_TYPE_UINT64,
440 	    "duplicate ALERTs sent to xport" },
441 
442 	{ "etm_enq_drop_resp_q", FMD_TYPE_UINT64,
443 	    "dropped response msgs on enq" },
444 	{ "etm_deq_drop_resp_q", FMD_TYPE_UINT64,
445 	    "dropped response msgs on deq" },
446 
447 	/* ETM protocol failures */
448 
449 	{ "etm_magic_bad", FMD_TYPE_UINT64,
450 		"ETM msgs w/ invalid magic num" },
451 	{ "etm_ver_bad", FMD_TYPE_UINT64,
452 		"ETM msgs w/ invalid protocol version" },
453 	{ "etm_msgtype_bad", FMD_TYPE_UINT64,
454 		"ETM msgs w/ invalid message type" },
455 	{ "etm_subtype_bad", FMD_TYPE_UINT64,
456 		"ETM msgs w/ invalid sub type" },
457 	{ "etm_xid_bad", FMD_TYPE_UINT64,
458 		"ETM msgs w/ unmatched xid" },
459 	{ "etm_fmaeventlen_bad", FMD_TYPE_UINT64,
460 		"ETM msgs w/ invalid FMA event length" },
461 	{ "etm_respcode_bad", FMD_TYPE_UINT64,
462 		"ETM msgs w/ invalid response code" },
463 	{ "etm_timeout_bad", FMD_TYPE_UINT64,
464 		"ETM msgs w/ invalid timeout value" },
465 	{ "etm_evlens_bad", FMD_TYPE_UINT64,
466 		"ETM msgs w/ too many event lengths" },
467 
468 	/* IO operation failures */
469 
470 	{ "etm_xport_wr_fail", FMD_TYPE_UINT64,
471 		"xport write failures" },
472 	{ "etm_xport_rd_fail", FMD_TYPE_UINT64,
473 		"xport read failures" },
474 	{ "etm_xport_pk_fail", FMD_TYPE_UINT64,
475 		"xport peek failures" },
476 
477 	/* IO operation retries */
478 
479 	{ "etm_xport_wr_retry", FMD_TYPE_UINT64,
480 		"xport write retries" },
481 	{ "etm_xport_rd_retry", FMD_TYPE_UINT64,
482 		"xport read retries" },
483 	{ "etm_xport_pk_retry", FMD_TYPE_UINT64,
484 		"xport peek retries" },
485 
486 	/* system and library failures */
487 
488 	{ "etm_os_nvlist_pack_fail", FMD_TYPE_UINT64,
489 		"nvlist_pack failures" },
490 	{ "etm_os_nvlist_unpack_fail", FMD_TYPE_UINT64,
491 		"nvlist_unpack failures" },
492 	{ "etm_os_nvlist_size_fail", FMD_TYPE_UINT64,
493 		"nvlist_size failures" },
494 	{ "etm_os_pthread_create_fail", FMD_TYPE_UINT64,
495 		"pthread_create failures" },
496 
497 	/* transport API failures */
498 
499 	{ "etm_xport_get_ev_addrv_fail", FMD_TYPE_UINT64,
500 		"xport get event addrv API failures" },
501 	{ "etm_xport_open_fail", FMD_TYPE_UINT64,
502 		"xport open API failures" },
503 	{ "etm_xport_close_fail", FMD_TYPE_UINT64,
504 		"xport close API failures" },
505 	{ "etm_xport_accept_fail", FMD_TYPE_UINT64,
506 		"xport accept API failures" },
507 	{ "etm_xport_open_retry", FMD_TYPE_UINT64,
508 		"xport open API retries" },
509 
510 	/* FMD entry point bad arguments */
511 
512 	{ "etm_fmd_init_badargs", FMD_TYPE_UINT64,
513 	    "bad arguments from fmd_init entry point" },
514 	{ "etm_fmd_fini_badargs", FMD_TYPE_UINT64,
515 	    "bad arguments from fmd_fini entry point" },
516 
517 	/* Alert logging errors */
518 
519 	{ "etm_log_err", FMD_TYPE_UINT64,
520 		"failed to log message to log(7D)" },
521 	{ "etm_msg_err", FMD_TYPE_UINT64,
522 		"failed to log message to sysmsg(7D)" },
523 
524 	/* miscellaneous stats */
525 
526 	{ "etm_reset_xport", FMD_TYPE_UINT64,
527 		"xport resets after xport API failure" }
528 };
529 
530 
531 /*
532  * -------------------- global data for Root ldom-------------------------
533  */
534 
535 ldom_hdl_t
536 *etm_lhp = NULL;		/* ldom pointer */
537 
538 static void *etm_dl_hdl = (void *)NULL;
539 static const char *etm_dl_path = "libds.so.1";
540 static int etm_dl_mode = (RTLD_NOW | RTLD_LOCAL);
541 
542 static int(*etm_ds_svc_reg)(ds_capability_t *cap, ds_ops_t *ops) =
543 	(int (*)(ds_capability_t *cap, ds_ops_t *ops))NULL;
544 static int(*etm_ds_clnt_reg)(ds_capability_t *cap, ds_ops_t *ops) =
545 	(int (*)(ds_capability_t *cap, ds_ops_t *ops))NULL;
546 static int(*etm_ds_send_msg)(ds_hdl_t hdl, void *buf, size_t buflen) =
547 	(int (*)(ds_hdl_t hdl, void *buf, size_t buflen))NULL;
548 static int(*etm_ds_recv_msg)(ds_hdl_t hdl, void *buf, size_t buflen,
549     size_t *msglen) =
550 	(int (*)(ds_hdl_t hdl, void *buf, size_t buflen, size_t *msglen))NULL;
551 static int (*etm_ds_fini)(void) = (int (*)(void))NULL;
552 
553 static pthread_mutex_t
554 iosvc_list_lock =  PTHREAD_MUTEX_INITIALIZER;
555 
556 static pthread_t
557 etm_async_e_tid = NULL;	/* thread id of io svc async event handler */
558 
559 static etm_proto_v1_ev_hdr_t iosvc_hdr = {
560 	ETM_PROTO_MAGIC_NUM,	/* magic number */
561 	ETM_PROTO_V1,		/* default to V1, not checked */
562 	ETM_MSG_TYPE_FMA_EVENT,	/* Root Domain inteoduces only FMA events */
563 	0,			/* sub-type */
564 	0,			/* pad */
565 	0,			/* add the xid at the Q send time */
566 	ETM_PROTO_V1_TIMEOUT_NONE,
567 	0			/* ev_lens, 0-termed, after 1 FMA event */
568 };
569 
570 /*
571  * static iosvc_list
572  */
573 static etm_iosvc_t iosvc_list[NUM_OF_ROOT_DOMAINS] = {
574 	{"", 0}, {"", 0}, {"", 0}, {"", 0}, {"", 0}, {"", 0},
575 	{"", 0}, {"", 0}
576 };
577 
578 static etm_iosvc_t io_svc = {
579 	"\0",				/* ldom_name */
580 	PTHREAD_COND_INITIALIZER,	/* nudges */
581 	PTHREAD_MUTEX_INITIALIZER,	/* protects the iosvc msg Q */
582 	NULL,				/* iosvc msg Q head */
583 	NULL,				/* iosvc msg Q tail */
584 	0,				/* msg Q current length */
585 	100,				/* msg Q max length */
586 	0,				/* current transaction id */
587 	0,				/* xid of last event posted to FMD */
588 	DS_INVALID_HDL,			/* DS handle */
589 	NULL,				/* fmd xprt handle */
590 	NULL,				/* tid 4 send to remote RootDomain */
591 	NULL,				/* tid 4 recv from remote RootDomain */
592 	PTHREAD_COND_INITIALIZER,	/* nudges etm_send_to_remote_root */
593 	PTHREAD_MUTEX_INITIALIZER,	/* protects msg_ack_cv */
594 	0,				/* send/recv threads are not dying */
595 	0,				/* flag for start sending msg Q */
596 	0				/* indicate if the ACK has come  */
597 };
598 etm_iosvc_t *io_svc_p = &io_svc;
599 
600 
601 static uint32_t
602 flags;					/* flags for fmd_xprt_open */
603 
604 static etm_async_event_ele_t
605 async_event_q[ASYNC_EVENT_Q_SIZE];	/* holds the async events */
606 
607 static uint32_t
608 etm_async_q_head = 0;		/* ptr to cur head of async event queue */
609 
610 static uint32_t
611 etm_async_q_tail = 0;		/* ptr to cur tail of async event queue */
612 
613 static uint32_t
614 etm_async_q_cur_len = 0;	/* cur length (ele cnt) of async event queue */
615 
616 static uint32_t
617 etm_async_q_max_len = ASYNC_EVENT_Q_SIZE;
618 				/* max length (ele cnt) of async event queue */
619 
620 static pthread_cond_t
621 etm_async_event_q_cv = PTHREAD_COND_INITIALIZER;
622 				/* nudges  async event handler */
623 
624 static pthread_mutex_t
625 etm_async_event_q_lock = PTHREAD_MUTEX_INITIALIZER;
626 				/* protects async event q */
627 
628 static ds_ver_t
629 etm_iosvc_vers[] = { { 1, 0} };
630 
631 #define	ETM_NVERS	(sizeof (etm_iosvc_vers) / sizeof (ds_ver_t))
632 
633 static ds_capability_t
634 iosvc_caps = {
635 	"ETM",				/* svc_id */
636 	etm_iosvc_vers,			/* vers */
637 	ETM_NVERS			/* number of vers */
638 };
639 
640 static void
641 etm_iosvc_reg_handler(ds_hdl_t hdl, ds_cb_arg_t arg, ds_ver_t *ver,
642     ds_domain_hdl_t did);
643 
644 static void
645 etm_iosvc_unreg_handler(ds_hdl_t hdl, ds_cb_arg_t arg);
646 
647 static ds_ops_t
648 iosvc_ops = {
649 	etm_iosvc_reg_handler,		/* ds_reg_cb */
650 	etm_iosvc_unreg_handler,	/* ds_unreg_cb */
651 	NULL,				/* ds_data_cb */
652 	NULL				/* cb_arg */
653 };
654 
655 
656 /*
657  * -------------------------- support functions ------------------------------
658  */
659 
660 /*
661  * Design_Note:	Each failure worth reporting to FMD should be done using
662  *		a single call to fmd_hdl_error() as it logs an FMA event
663  *		for each call. Also be aware that all the fmd_hdl_*()
664  *		format strings currently use platform specific *printf()
665  *		routines; so "%p" under Solaris does not prepend "0x" to
666  *		the outputted hex digits, while Linux and VxWorks do.
667  */
668 
669 
670 /*
671  * etm_show_time - display the current time of day (for debugging) using
672  *		the given FMD module handle and annotation string
673  */
674 
675 static void
676 etm_show_time(fmd_hdl_t *hdl, char *note_str)
677 {
678 	struct timeval		tmv;		/* timeval */
679 
680 	(void) gettimeofday(&tmv, NULL);
681 	fmd_hdl_debug(hdl, "info: %s: cur Unix Epoch time %d.%06d\n",
682 	    note_str, tmv.tv_sec, tmv.tv_usec);
683 
684 } /* etm_show_time() */
685 
686 /*
687  * etm_hexdump - hexdump the given buffer (for debugging) using
688  *		the given FMD module handle
689  */
690 
691 static void
692 etm_hexdump(fmd_hdl_t *hdl, void *buf, size_t byte_cnt)
693 {
694 	uint8_t		*bp;		/* byte ptr */
695 	int		i, j;		/* index */
696 	char		cb[80];		/* char buf */
697 	unsigned int	n;		/* a byte of data for sprintf() */
698 
699 	bp = buf;
700 	j = 0;
701 
702 	/*
703 	 * Design_Note:	fmd_hdl_debug() auto adds a newline if missing;
704 	 *		hence cb exists to accumulate a longer string.
705 	 */
706 
707 	for (i = 1; i <= byte_cnt; i++) {
708 		n = *bp++;
709 		(void) sprintf(&cb[j], "%2.2x ", n);
710 		j += 3;
711 		/* add a newline every 16 bytes or at the buffer's end */
712 		if (((i % 16) == 0) || (i >= byte_cnt)) {
713 			cb[j-1] = '\0';
714 			fmd_hdl_debug(hdl, "%s\n", cb);
715 			j = 0;
716 		}
717 	} /* for each byte in the buffer */
718 
719 } /* etm_hexdump() */
720 
721 /*
722  * etm_sleep - sleep the caller for the given number of seconds,
723  *		return 0 or -errno value
724  *
725  * Design_Note:	To avoid interfering with FMD's signal mask (SIGALRM)
726  *		do not use [Solaris] sleep(3C) and instead use
727  *		pthread_cond_wait() or nanosleep(), both of which
728  *		are POSIX spec-ed to leave signal masks alone.
729  *		This is needed for Solaris and Linux (domain and SP).
730  */
731 
732 static int
733 etm_sleep(unsigned sleep_sec)
734 {
735 	struct timespec	tms;	/* for nanosleep() */
736 
737 	tms.tv_sec = sleep_sec;
738 	tms.tv_nsec = 0;
739 
740 	if (nanosleep(&tms, NULL) < 0) {
741 		/* errno assumed set by above call */
742 		return (-errno);
743 	}
744 	return (0);
745 
746 } /* etm_sleep() */
747 
748 /*
749  * etm_conn_open - open a connection to the given transport address,
750  *		return 0 and the opened connection handle
751  *		or -errno value
752  *
753  * caveats:	the err_substr is used in failure cases for calling
754  *		fmd_hdl_error()
755  */
756 
757 static int
758 etm_conn_open(fmd_hdl_t *hdl, char *err_substr,
759 		etm_xport_addr_t addr, etm_xport_conn_t *connp)
760 {
761 	etm_xport_conn_t	conn;	/* connection to return */
762 	int			nev;	/* -errno value */
763 
764 	if ((conn = etm_xport_open(hdl, addr)) == NULL) {
765 		nev = (-errno);
766 		fmd_hdl_error(hdl, "error: %s: errno %d\n",
767 		    err_substr, errno);
768 		etm_stats.etm_xport_open_fail.fmds_value.ui64++;
769 		return (nev);
770 	} else {
771 		*connp = conn;
772 		return (0);
773 	}
774 } /* etm_conn_open() */
775 
776 /*
777  * etm_conn_close - close the given connection,
778  *		return 0 or -errno value
779  *
780  * caveats:	the err_substr is used in failure cases for calling
781  *		fmd_hdl_error()
782  */
783 
784 static int
785 etm_conn_close(fmd_hdl_t *hdl, char *err_substr, etm_xport_conn_t conn)
786 {
787 	int	nev;	/* -errno value */
788 
789 	if (etm_xport_close(hdl, conn) == NULL) {
790 		nev = (-errno);
791 		fmd_hdl_error(hdl, "warning: %s: errno %d\n",
792 		    err_substr, errno);
793 		etm_stats.etm_xport_close_fail.fmds_value.ui64++;
794 		return (nev);
795 	} else {
796 		return (0);
797 	}
798 } /* etm_conn_close() */
799 
800 /*
801  * etm_io_op - perform an IO operation on the given connection
802  *		with the given buffer,
803  *		accommodating MTU size and retrying op if needed,
804  *		return how many bytes actually done by the op
805  *		or -errno value
806  *
807  * caveats:	the err_substr is used in failure cases for calling
808  *		fmd_hdl_error()
809  */
810 
811 static ssize_t
812 etm_io_op(fmd_hdl_t *hdl, char *err_substr, etm_xport_conn_t conn,
813 				void *buf, size_t byte_cnt, int io_op)
814 {
815 	ssize_t		rv;		/* ret val / byte count */
816 	ssize_t		n;		/* gen use */
817 	uint8_t		*datap;		/* ptr to data */
818 	size_t		mtu_sz;		/* MTU size in bytes */
819 	int		(*io_func_ptr)(fmd_hdl_t *, etm_xport_conn_t,
820 	    void *, size_t);
821 	size_t		io_sz;		/* byte count for io_func_ptr */
822 	int		try_cnt;	/* number of tries done */
823 	int		sleep_sec;	/* exp backoff sleep period in sec */
824 	int		sleep_rv;	/* ret val from sleeping */
825 	fmd_stat_t	io_retry_stat;	/* IO retry stat to update */
826 	fmd_stat_t	io_fail_stat;	/* IO failure stat to update */
827 
828 	if ((conn == NULL) || (buf == NULL)) {
829 		return (-EINVAL);
830 	}
831 	switch (io_op) {
832 	case ETM_IO_OP_RD:
833 		io_func_ptr = etm_xport_read;
834 		io_retry_stat = etm_stats.etm_xport_rd_retry;
835 		io_fail_stat = etm_stats.etm_xport_rd_fail;
836 		break;
837 	case ETM_IO_OP_WR:
838 		io_func_ptr = etm_xport_write;
839 		io_retry_stat = etm_stats.etm_xport_wr_retry;
840 		io_fail_stat = etm_stats.etm_xport_wr_fail;
841 		break;
842 	default:
843 		return (-EINVAL);
844 	}
845 	if (byte_cnt == 0) {
846 		return (byte_cnt);	/* nop */
847 	}
848 
849 	/* obtain [current] MTU size */
850 
851 	if ((n = etm_xport_get_opt(hdl, conn, ETM_XPORT_OPT_MTU_SZ)) < 0) {
852 		mtu_sz = ETM_XPORT_MTU_SZ_DEF;
853 	} else {
854 		mtu_sz = n;
855 	}
856 
857 	/* loop until all IO done, try limit exceeded, or real failure */
858 
859 	rv = 0;
860 	datap = buf;
861 	while (rv < byte_cnt) {
862 		io_sz = MIN((byte_cnt - rv), mtu_sz);
863 		try_cnt = 0;
864 		sleep_sec = 0;
865 
866 		/* when give up, return -errno value even if partly done */
867 
868 		while ((n = (*io_func_ptr)(hdl, conn, datap, io_sz)) ==
869 		    (-EAGAIN)) {
870 			try_cnt++;
871 			if (try_cnt > ETM_TRY_MAX_CNT) {
872 				rv = n;
873 				goto func_ret;
874 			}
875 			if (etm_is_dying) {
876 				rv = (-EINTR);
877 				goto func_ret;
878 			}
879 			if ((sleep_rv = etm_sleep(sleep_sec)) < 0) {
880 				rv = sleep_rv;
881 				goto func_ret;
882 			}
883 			sleep_sec = ((sleep_sec == 0) ? 1 :
884 			    (sleep_sec * ETM_TRY_BACKOFF_RATE));
885 			sleep_sec = MIN(sleep_sec, ETM_TRY_BACKOFF_CAP);
886 			io_retry_stat.fmds_value.ui64++;
887 			if (etm_debug_lvl >= 1) {
888 				fmd_hdl_debug(hdl, "info: retrying io op %d "
889 				    "due to EAGAIN\n", io_op);
890 			}
891 		} /* while trying the io operation */
892 
893 		if (etm_is_dying) {
894 			rv = (-EINTR);
895 			goto func_ret;
896 		}
897 		if (n < 0) {
898 			rv = n;
899 			goto func_ret;
900 		}
901 		/* avoid spinning CPU when given 0 bytes but no error */
902 		if (n == 0) {
903 			if ((sleep_rv = etm_sleep(ETM_SLEEP_QUIK)) < 0) {
904 				rv = sleep_rv;
905 				goto func_ret;
906 			}
907 		}
908 		rv += n;
909 		datap += n;
910 	} /* while still have more data */
911 
912 func_ret:
913 
914 	if (rv < 0) {
915 		io_fail_stat.fmds_value.ui64++;
916 		fmd_hdl_debug(hdl, "error: %s: errno %d\n",
917 		    err_substr, (int)(-rv));
918 	}
919 	if (etm_debug_lvl >= 3) {
920 		fmd_hdl_debug(hdl, "info: io op %d ret %d of %d\n",
921 		    io_op, (int)rv, (int)byte_cnt);
922 	}
923 	return (rv);
924 
925 } /* etm_io_op() */
926 
927 /*
928  * etm_magic_read - read the magic number of an ETM message header
929  *		from the given connection into the given buffer,
930  *		return 0 or -errno value
931  *
932  * Design_Note:	This routine is intended to help protect ETM from protocol
933  *		framing errors as might be caused by an SP reset / crash in
934  *		the middle of an ETM message send; the connection will be
935  *		read from for as many bytes as needed until the magic number
936  *		is found using a sliding buffer for comparisons.
937  */
938 
939 static int
940 etm_magic_read(fmd_hdl_t *hdl, etm_xport_conn_t conn, uint32_t *magic_ptr)
941 {
942 	int		rv;		/* ret val */
943 	uint32_t	magic_num;	/* magic number */
944 	int		byte_cnt;	/* count of bytes read */
945 	uint8_t		buf5[4+1];	/* sliding input buffer */
946 	int		i, j;		/* indices into buf5 */
947 	ssize_t		n;		/* gen use */
948 	uint8_t		drop_buf[1024];	/* dropped bytes buffer */
949 
950 	rv = 0;		/* assume success */
951 	magic_num = 0;
952 	byte_cnt = 0;
953 	j = 0;
954 
955 	/* magic number bytes are sent in network (big endian) order */
956 
957 	while (magic_num != ETM_PROTO_MAGIC_NUM) {
958 		if ((n = etm_io_op(hdl, "bad io read on magic",
959 		    conn, &buf5[j], 1, ETM_IO_OP_RD)) < 0) {
960 			rv = n;
961 			goto func_ret;
962 		}
963 		byte_cnt++;
964 		j = MIN((j + 1), sizeof (magic_num));
965 		if (byte_cnt < sizeof (magic_num)) {
966 			continue;
967 		}
968 
969 		if (byte_cnt > sizeof (magic_num)) {
970 			etm_stats.etm_magic_drop_bytes.fmds_value.ui64++;
971 			i = MIN(byte_cnt - j - 1, sizeof (drop_buf) - 1);
972 			drop_buf[i] = buf5[0];
973 			for (i = 0; i < j; i++) {
974 				buf5[i] = buf5[i+1];
975 			} /* for sliding the buffer contents */
976 		}
977 		(void) memcpy(&magic_num, &buf5[0], sizeof (magic_num));
978 		magic_num = ntohl(magic_num);
979 	} /* for reading bytes until find magic number */
980 
981 func_ret:
982 
983 	if (byte_cnt != sizeof (magic_num)) {
984 		fmd_hdl_debug(hdl, "warning: bad proto frame "
985 		    "implies corrupt/lost msg(s)\n");
986 	}
987 	if ((byte_cnt > sizeof (magic_num)) && (etm_debug_lvl >= 2)) {
988 		i = MIN(byte_cnt - sizeof (magic_num), sizeof (drop_buf));
989 		fmd_hdl_debug(hdl, "info: magic drop hexdump "
990 		    "first %d of %d bytes:\n", i,
991 		    byte_cnt - sizeof (magic_num));
992 		etm_hexdump(hdl, drop_buf, i);
993 	}
994 
995 	if (rv == 0) {
996 		*magic_ptr = magic_num;
997 	}
998 	return (rv);
999 
1000 } /* etm_magic_read() */
1001 
1002 /*
1003  * etm_hdr_read - allocate, read, and validate a [variable sized]
1004  *		ETM message header from the given connection,
1005  *		return the allocated ETM message header
1006  *		(which is guaranteed to be large enough to reuse as a
1007  *		RESPONSE msg hdr) and its size
1008  *		or NULL and set errno on failure
1009  */
1010 
1011 static void *
1012 etm_hdr_read(fmd_hdl_t *hdl, etm_xport_conn_t conn, size_t *szp)
1013 {
1014 	uint8_t			*hdrp;		/* ptr to header to return */
1015 	size_t			hdr_sz;		/* sizeof *hdrp */
1016 	etm_proto_v1_pp_t	pp; 		/* protocol preamble */
1017 	etm_proto_v1_ev_hdr_t	*ev_hdrp;	/* for FMA_EVENT msg */
1018 	etm_proto_v1_ctl_hdr_t	*ctl_hdrp;	/* for CONTROL msg */
1019 	etm_proto_v1_resp_hdr_t *resp_hdrp;	/* for RESPONSE msg */
1020 	etm_proto_v3_sa_hdr_t	*sa_hdrp;	/* for ALERT msg */
1021 	uint32_t		*lenp;		/* ptr to FMA event length */
1022 	ssize_t			i, n;		/* gen use */
1023 	uint8_t	misc_buf[ETM_MISC_BUF_SZ];	/* for var sized hdrs */
1024 	int			dummy_int;	/* dummy var to appease lint */
1025 
1026 	hdrp = NULL; hdr_sz = 0;
1027 
1028 	/* read the magic number which starts the protocol preamble */
1029 
1030 	if ((n = etm_magic_read(hdl, conn, &pp.pp_magic_num)) < 0) {
1031 		errno = (-n);
1032 		etm_stats.etm_magic_bad.fmds_value.ui64++;
1033 		return (NULL);
1034 	}
1035 
1036 	/* read the rest of the protocol preamble all at once */
1037 
1038 	if ((n = etm_io_op(hdl, "bad io read on preamble",
1039 	    conn, &pp.pp_proto_ver, sizeof (pp) - sizeof (pp.pp_magic_num),
1040 	    ETM_IO_OP_RD)) < 0) {
1041 		errno = (-n);
1042 		return (NULL);
1043 	}
1044 
1045 	/*
1046 	 * Design_Note:	The magic number was already network decoded; but
1047 	 *		some other preamble fields also need to be decoded,
1048 	 *		specifically pp_xid and pp_timeout. The rest of the
1049 	 *		preamble fields are byte sized and hence need no
1050 	 *		decoding.
1051 	 */
1052 
1053 	pp.pp_xid = ntohl(pp.pp_xid);
1054 	pp.pp_timeout = ntohl(pp.pp_timeout);
1055 
1056 	/* sanity check the header as best we can */
1057 
1058 	if ((pp.pp_proto_ver < ETM_PROTO_V1) ||
1059 	    (pp.pp_proto_ver > ETM_PROTO_V3)) {
1060 		fmd_hdl_error(hdl, "error: bad proto ver %d\n",
1061 		    (int)pp.pp_proto_ver);
1062 		errno = EPROTO;
1063 		etm_stats.etm_ver_bad.fmds_value.ui64++;
1064 		return (NULL);
1065 	}
1066 
1067 	dummy_int = pp.pp_msg_type;
1068 	if ((dummy_int <= ETM_MSG_TYPE_TOO_LOW) ||
1069 	    (dummy_int >= ETM_MSG_TYPE_TOO_BIG)) {
1070 		fmd_hdl_error(hdl, "error: bad msg type %d", dummy_int);
1071 		errno = EBADMSG;
1072 		etm_stats.etm_msgtype_bad.fmds_value.ui64++;
1073 		return (NULL);
1074 	}
1075 
1076 	/* handle [var sized] hdrs for FMA_EVENT, CONTROL, RESPONSE msgs */
1077 
1078 	if (pp.pp_msg_type == ETM_MSG_TYPE_FMA_EVENT) {
1079 
1080 		ev_hdrp = (void*)&misc_buf[0];
1081 		hdr_sz = sizeof (*ev_hdrp);
1082 		(void) memcpy(&ev_hdrp->ev_pp, &pp, sizeof (pp));
1083 
1084 		/* sanity check the header's timeout */
1085 
1086 		if ((ev_hdrp->ev_pp.pp_proto_ver == ETM_PROTO_V1) &&
1087 		    (ev_hdrp->ev_pp.pp_timeout != ETM_PROTO_V1_TIMEOUT_NONE)) {
1088 			errno = ETIME;
1089 			etm_stats.etm_timeout_bad.fmds_value.ui64++;
1090 			return (NULL);
1091 		}
1092 
1093 		/* get all FMA event lengths from the header */
1094 
1095 		lenp = (uint32_t *)&ev_hdrp->ev_lens[0]; lenp--;
1096 		i = -1;	/* cnt of length entries preceding 0 */
1097 		do {
1098 			i++; lenp++;
1099 			if ((sizeof (*ev_hdrp) + (i * sizeof (*lenp))) >=
1100 			    ETM_MISC_BUF_SZ) {
1101 				errno = E2BIG;	/* ridiculous size */
1102 				etm_stats.etm_evlens_bad.fmds_value.ui64++;
1103 				return (NULL);
1104 			}
1105 			if ((n = etm_io_op(hdl, "bad io read on event len",
1106 			    conn, lenp, sizeof (*lenp), ETM_IO_OP_RD)) < 0) {
1107 				errno = (-n);
1108 				return (NULL);
1109 			}
1110 			*lenp = ntohl(*lenp);
1111 
1112 		} while (*lenp != 0);
1113 		i += 0; /* first len already counted by sizeof(ev_hdr) */
1114 		hdr_sz += (i * sizeof (*lenp));
1115 
1116 		etm_stats.etm_rd_hdr_fmaevent.fmds_value.ui64++;
1117 
1118 	} else if (pp.pp_msg_type == ETM_MSG_TYPE_CONTROL) {
1119 
1120 		ctl_hdrp = (void*)&misc_buf[0];
1121 		hdr_sz = sizeof (*ctl_hdrp);
1122 		(void) memcpy(&ctl_hdrp->ctl_pp, &pp, sizeof (pp));
1123 
1124 		/* sanity check the header's sub type (control selector) */
1125 
1126 		if ((ctl_hdrp->ctl_pp.pp_sub_type <= ETM_CTL_SEL_TOO_LOW) ||
1127 		    (ctl_hdrp->ctl_pp.pp_sub_type >= ETM_CTL_SEL_TOO_BIG)) {
1128 			fmd_hdl_error(hdl, "error: bad ctl sub type %d\n",
1129 			    (int)ctl_hdrp->ctl_pp.pp_sub_type);
1130 			errno = EBADMSG;
1131 			etm_stats.etm_subtype_bad.fmds_value.ui64++;
1132 			return (NULL);
1133 		}
1134 
1135 		/* get the control length */
1136 
1137 		if ((n = etm_io_op(hdl, "bad io read on ctl len",
1138 		    conn, &ctl_hdrp->ctl_len, sizeof (ctl_hdrp->ctl_len),
1139 		    ETM_IO_OP_RD)) < 0) {
1140 			errno = (-n);
1141 			return (NULL);
1142 		}
1143 
1144 		ctl_hdrp->ctl_len = ntohl(ctl_hdrp->ctl_len);
1145 
1146 		etm_stats.etm_rd_hdr_control.fmds_value.ui64++;
1147 
1148 	} else if (pp.pp_msg_type == ETM_MSG_TYPE_RESPONSE) {
1149 
1150 		resp_hdrp = (void*)&misc_buf[0];
1151 		hdr_sz = sizeof (*resp_hdrp);
1152 		(void) memcpy(&resp_hdrp->resp_pp, &pp, sizeof (pp));
1153 
1154 		/* sanity check the header's timeout */
1155 
1156 		if (resp_hdrp->resp_pp.pp_timeout !=
1157 		    ETM_PROTO_V1_TIMEOUT_NONE) {
1158 			errno = ETIME;
1159 			etm_stats.etm_timeout_bad.fmds_value.ui64++;
1160 			return (NULL);
1161 		}
1162 
1163 		/* get the response code and length */
1164 
1165 		if ((n = etm_io_op(hdl, "bad io read on resp code+len",
1166 		    conn, &resp_hdrp->resp_code,
1167 		    sizeof (resp_hdrp->resp_code)
1168 		    + sizeof (resp_hdrp->resp_len),
1169 		    ETM_IO_OP_RD)) < 0) {
1170 			errno = (-n);
1171 			return (NULL);
1172 		}
1173 
1174 		resp_hdrp->resp_code = ntohl(resp_hdrp->resp_code);
1175 		resp_hdrp->resp_len = ntohl(resp_hdrp->resp_len);
1176 
1177 		etm_stats.etm_rd_hdr_response.fmds_value.ui64++;
1178 
1179 	} else if (pp.pp_msg_type == ETM_MSG_TYPE_ALERT) {
1180 
1181 		sa_hdrp = (void*)&misc_buf[0];
1182 		hdr_sz = sizeof (*sa_hdrp);
1183 		(void) memcpy(&sa_hdrp->sa_pp, &pp, sizeof (pp));
1184 
1185 		/* sanity check the header's protocol version */
1186 
1187 		if (sa_hdrp->sa_pp.pp_proto_ver != ETM_PROTO_V3) {
1188 			errno = EPROTO;
1189 			etm_stats.etm_ver_bad.fmds_value.ui64++;
1190 			return (NULL);
1191 		}
1192 
1193 		/* get the priority and length */
1194 
1195 		if ((n = etm_io_op(hdl, "bad io read on sa priority+len",
1196 		    conn, &sa_hdrp->sa_priority,
1197 		    sizeof (sa_hdrp->sa_priority)
1198 		    + sizeof (sa_hdrp->sa_len),
1199 		    ETM_IO_OP_RD)) < 0) {
1200 			errno = (-n);
1201 			return (NULL);
1202 		}
1203 
1204 		sa_hdrp->sa_priority = ntohl(sa_hdrp->sa_priority);
1205 		sa_hdrp->sa_len = ntohl(sa_hdrp->sa_len);
1206 
1207 		etm_stats.etm_rd_hdr_alert.fmds_value.ui64++;
1208 
1209 	} /* whether we have FMA_EVENT, ALERT, CONTROL, or RESPONSE msg */
1210 
1211 	/*
1212 	 * choose a header size that allows hdr reuse for RESPONSE msgs,
1213 	 * allocate and populate the message header, and
1214 	 * return alloc size to caller for later free of hdrp
1215 	 */
1216 
1217 	hdr_sz = MAX(hdr_sz, sizeof (*resp_hdrp));
1218 	hdrp = fmd_hdl_zalloc(hdl, hdr_sz, FMD_SLEEP);
1219 	(void) memcpy(hdrp, misc_buf, hdr_sz);
1220 
1221 	if (etm_debug_lvl >= 3) {
1222 		fmd_hdl_debug(hdl, "info: msg hdr hexdump %d bytes:\n", hdr_sz);
1223 		etm_hexdump(hdl, hdrp, hdr_sz);
1224 	}
1225 	*szp = hdr_sz;
1226 	return (hdrp);
1227 
1228 } /* etm_hdr_read() */
1229 
1230 /*
1231  * etm_hdr_write - create and write a [variable sized] ETM message header
1232  *		to the given connection appropriate for the given FMA event
1233  *		and type of nvlist encoding,
1234  *		return the allocated ETM message header and its size
1235  *		or NULL and set errno on failure
1236  */
1237 
1238 static void*
1239 etm_hdr_write(fmd_hdl_t *hdl, etm_xport_conn_t conn, nvlist_t *evp,
1240 						int encoding, size_t *szp)
1241 {
1242 	etm_proto_v1_ev_hdr_t	*hdrp;		/* for FMA_EVENT msg */
1243 	size_t			hdr_sz;		/* sizeof *hdrp */
1244 	uint32_t		*lenp;		/* ptr to FMA event length */
1245 	size_t			evsz;		/* packed FMA event size */
1246 	ssize_t			n;		/* gen use */
1247 
1248 	/* allocate and populate the message header for 1 FMA event */
1249 
1250 	hdr_sz = sizeof (*hdrp) + (1 * sizeof (hdrp->ev_lens[0]));
1251 
1252 	hdrp = fmd_hdl_zalloc(hdl, hdr_sz, FMD_SLEEP);
1253 
1254 	/*
1255 	 * Design_Note: Although the ETM protocol supports it, we do not (yet)
1256 	 *		want responses/ACKs on FMA events that we send. All
1257 	 *		such messages are sent with ETM_PROTO_V1_TIMEOUT_NONE.
1258 	 */
1259 
1260 	hdrp->ev_pp.pp_magic_num = ETM_PROTO_MAGIC_NUM;
1261 	hdrp->ev_pp.pp_magic_num = htonl(hdrp->ev_pp.pp_magic_num);
1262 	hdrp->ev_pp.pp_proto_ver = ETM_PROTO_V1;
1263 	hdrp->ev_pp.pp_msg_type = ETM_MSG_TYPE_FMA_EVENT;
1264 	hdrp->ev_pp.pp_sub_type = 0;
1265 	hdrp->ev_pp.pp_rsvd_pad = 0;
1266 	hdrp->ev_pp.pp_xid = etm_xid_cur;
1267 	hdrp->ev_pp.pp_xid = htonl(hdrp->ev_pp.pp_xid);
1268 	etm_xid_cur += ETM_XID_INC;
1269 	hdrp->ev_pp.pp_timeout = ETM_PROTO_V1_TIMEOUT_NONE;
1270 	hdrp->ev_pp.pp_timeout = htonl(hdrp->ev_pp.pp_timeout);
1271 
1272 	lenp = &hdrp->ev_lens[0];
1273 
1274 	if ((n = nvlist_size(evp, &evsz, encoding)) != 0) {
1275 		errno = n;
1276 		fmd_hdl_free(hdl, hdrp, hdr_sz);
1277 		etm_stats.etm_os_nvlist_size_fail.fmds_value.ui64++;
1278 		return (NULL);
1279 	}
1280 
1281 	/* indicate 1 FMA event, network encode its length, and 0-terminate */
1282 
1283 	etm_stats.etm_wr_max_ev_per_msg.fmds_value.ui64 = 1;
1284 
1285 	*lenp = evsz; *lenp = htonl(*lenp); lenp++;
1286 	*lenp = 0; *lenp = htonl(*lenp); lenp++;
1287 
1288 	/*
1289 	 * write the network encoded header to the transport, and
1290 	 * return alloc size to caller for later free
1291 	 */
1292 
1293 	if ((n = etm_io_op(hdl, "bad io write on event hdr",
1294 	    conn, hdrp, hdr_sz, ETM_IO_OP_WR)) < 0) {
1295 		errno = (-n);
1296 		fmd_hdl_free(hdl, hdrp, hdr_sz);
1297 		return (NULL);
1298 	}
1299 
1300 	*szp = hdr_sz;
1301 	return (hdrp);
1302 
1303 } /* etm_hdr_write() */
1304 
1305 /*
1306  * etm_post_to_fmd - post the given FMA event to FMD
1307  *			via a FMD transport API call,
1308  *			return 0 or -errno value
1309  *
1310  * caveats:	the FMA event (evp) is freed by FMD,
1311  *		thus callers of this function should
1312  *		immediately discard any ptr they have to the
1313  *		nvlist without freeing or dereferencing it
1314  */
1315 
1316 static int
1317 etm_post_to_fmd(fmd_hdl_t *hdl, fmd_xprt_t *fmd_xprt, nvlist_t *evp)
1318 {
1319 	ssize_t			ev_sz;		/* sizeof *evp */
1320 
1321 	(void) nvlist_size(evp, (size_t *)&ev_sz, NV_ENCODE_XDR);
1322 
1323 	if (etm_debug_lvl >= 2) {
1324 		etm_show_time(hdl, "ante ev post");
1325 	}
1326 	fmd_xprt_post(hdl, fmd_xprt, evp, 0);
1327 	etm_stats.etm_wr_fmd_fmaevent.fmds_value.ui64++;
1328 	etm_stats.etm_wr_fmd_bytes.fmds_value.ui64 += ev_sz;
1329 	if (etm_debug_lvl >= 1) {
1330 		fmd_hdl_debug(hdl, "info: event %p post ok to FMD\n", evp);
1331 	}
1332 	if (etm_debug_lvl >= 2) {
1333 		etm_show_time(hdl, "post ev post");
1334 	}
1335 	return (0);
1336 
1337 } /* etm_post_to_fmd() */
1338 
1339 /*
1340  * Ideally we would just use syslog(3C) for outputting our messages.
1341  * Unfortunately, as this module is running within the FMA daemon context,
1342  * that would create the situation where this module's openlog() would
1343  * have the monopoly on syslog(3C) for the daemon and all its modules.
1344  * To avoid that situation, this module uses the same logic as the
1345  * syslog-msgs FM module to directly call into the log(7D) and sysmsg(7D)
1346  * devices for syslog and console.
1347  */
1348 
1349 static int
1350 etm_post_to_syslog(fmd_hdl_t *hdl, uint32_t priority, uint32_t body_sz,
1351 							uint8_t *body_buf)
1352 {
1353 	char		*sysmessage;	/* Formatted message */
1354 	size_t		formatlen;	/* maximum length of sysmessage */
1355 	struct strbuf	ctl, dat;	/* structs pushed to the logfd */
1356 	uint32_t	msgid;		/* syslog message ID number */
1357 
1358 	if ((syslog_file == 0) && (syslog_cons == 0)) {
1359 		return (0);
1360 	}
1361 
1362 	if (etm_debug_lvl >= 2) {
1363 		etm_show_time(hdl, "ante syslog post");
1364 	}
1365 
1366 	formatlen = body_sz + 64; /* +64 for prefix strings added below */
1367 	sysmessage = fmd_hdl_zalloc(hdl, formatlen, FMD_SLEEP);
1368 
1369 	if (syslog_file) {
1370 		STRLOG_MAKE_MSGID(body_buf, msgid);
1371 		(void) snprintf(sysmessage, formatlen,
1372 		    "SC Alert: [ID %u FACILITY_AND_PRIORITY] %s", msgid,
1373 		    body_buf);
1374 
1375 		syslog_ctl.pri = syslog_facility | priority;
1376 
1377 		ctl.buf = (void *)&syslog_ctl;
1378 		ctl.len = sizeof (syslog_ctl);
1379 
1380 		dat.buf = sysmessage;
1381 		dat.len = strlen(sysmessage) + 1;
1382 
1383 		if (putmsg(syslog_logfd, &ctl, &dat, 0) != 0) {
1384 			fmd_hdl_debug(hdl, "putmsg failed: %s\n",
1385 			    strerror(errno));
1386 			etm_stats.etm_log_err.fmds_value.ui64++;
1387 		}
1388 	}
1389 
1390 	if (syslog_cons) {
1391 		(void) snprintf(sysmessage, formatlen,
1392 		    "SC Alert: %s\r\n", body_buf);
1393 
1394 		dat.buf = sysmessage;
1395 		dat.len = strlen(sysmessage) + 1;
1396 
1397 		if (write(syslog_msgfd, dat.buf, dat.len) != dat.len) {
1398 			fmd_hdl_debug(hdl, "write failed: %s\n",
1399 			    strerror(errno));
1400 			etm_stats.etm_msg_err.fmds_value.ui64++;
1401 		}
1402 	}
1403 
1404 	fmd_hdl_free(hdl, sysmessage, formatlen);
1405 
1406 	if (etm_debug_lvl >= 2) {
1407 		etm_show_time(hdl, "post syslog post");
1408 	}
1409 
1410 	return (0);
1411 }
1412 
1413 
1414 /*
1415  * etm_req_ver_negot - send an ETM control message to the other end requesting
1416  *			that the ETM protocol version be negotiated/set
1417  */
1418 
1419 static void
1420 etm_req_ver_negot(fmd_hdl_t *hdl)
1421 {
1422 	etm_xport_addr_t	*addrv;		/* default dst addr(s) */
1423 	etm_xport_conn_t	conn;		/* connection to other end */
1424 	etm_proto_v1_ctl_hdr_t	*ctl_hdrp;	/* for CONTROL msg */
1425 	size_t			hdr_sz;		/* sizeof header */
1426 	uint8_t			*body_buf;	/* msg body buffer */
1427 	uint32_t		body_sz;	/* sizeof *body_buf */
1428 	ssize_t			i;		/* gen use */
1429 
1430 	/* populate an ETM control msg to send */
1431 
1432 	hdr_sz = sizeof (*ctl_hdrp);
1433 	body_sz = (3 + 1);		/* version bytes plus null byte */
1434 
1435 	ctl_hdrp = fmd_hdl_zalloc(hdl, hdr_sz + body_sz, FMD_SLEEP);
1436 
1437 	ctl_hdrp->ctl_pp.pp_magic_num = htonl(ETM_PROTO_MAGIC_NUM);
1438 	ctl_hdrp->ctl_pp.pp_proto_ver = ETM_PROTO_V1;
1439 	ctl_hdrp->ctl_pp.pp_msg_type = ETM_MSG_TYPE_CONTROL;
1440 	ctl_hdrp->ctl_pp.pp_sub_type = ETM_CTL_SEL_VER_NEGOT_REQ;
1441 	ctl_hdrp->ctl_pp.pp_rsvd_pad = 0;
1442 	etm_xid_ver_negot = etm_xid_cur;
1443 	etm_xid_cur += ETM_XID_INC;
1444 	ctl_hdrp->ctl_pp.pp_xid = htonl(etm_xid_ver_negot);
1445 	ctl_hdrp->ctl_pp.pp_timeout = htonl(ETM_PROTO_V1_TIMEOUT_FOREVER);
1446 	ctl_hdrp->ctl_len = htonl(body_sz);
1447 
1448 	body_buf = (void*)&ctl_hdrp->ctl_len;
1449 	body_buf += sizeof (ctl_hdrp->ctl_len);
1450 	*body_buf++ = ETM_PROTO_V3;
1451 	*body_buf++ = ETM_PROTO_V2;
1452 	*body_buf++ = ETM_PROTO_V1;
1453 	*body_buf++ = '\0';
1454 
1455 	/*
1456 	 * open and close a connection to send the ETM control msg
1457 	 * to any/all of the default dst addrs
1458 	 */
1459 
1460 	if ((addrv = etm_xport_get_ev_addrv(hdl, NULL)) == NULL) {
1461 		fmd_hdl_error(hdl,
1462 		    "error: bad ctl dst addrs errno %d\n", errno);
1463 		etm_stats.etm_xport_get_ev_addrv_fail.fmds_value.ui64++;
1464 		goto func_ret;
1465 	}
1466 
1467 	for (i = 0; addrv[i] != NULL; i++) {
1468 
1469 		if (etm_conn_open(hdl, "bad conn open during ver negot",
1470 		    addrv[i], &conn) < 0) {
1471 			continue;
1472 		}
1473 		if (etm_io_op(hdl, "bad io write on ctl hdr+body",
1474 		    conn, ctl_hdrp, hdr_sz + body_sz, ETM_IO_OP_WR) >= 0) {
1475 			etm_stats.etm_wr_hdr_control.fmds_value.ui64++;
1476 			etm_stats.etm_wr_body_control.fmds_value.ui64++;
1477 		}
1478 		(void) etm_conn_close(hdl, "bad conn close during ver negot",
1479 		    conn);
1480 
1481 	} /* foreach dst addr */
1482 
1483 func_ret:
1484 
1485 	if (addrv != NULL) {
1486 		etm_xport_free_addrv(hdl, addrv);
1487 	}
1488 	fmd_hdl_free(hdl, ctl_hdrp, hdr_sz + body_sz);
1489 
1490 } /* etm_req_ver_negot() */
1491 
1492 
1493 
1494 /*
1495  * etm_iosvc_msg_enq - add element to tail of ETM iosvc msg queue
1496  * etm_iosvc_msg_deq - del element from head of ETM iosvc msg  queue
1497  * need to grab the mutex lock before calling this routine
1498  * return >0 for success, or -errno value
1499  */
1500 static int
1501 etm_iosvc_msg_enq(fmd_hdl_t *hdl, etm_iosvc_t *iosvc, etm_iosvc_q_ele_t *msgp)
1502 {
1503 	etm_iosvc_q_ele_t		*newp;	/* ptr to new msg q ele */
1504 
1505 	if (iosvc->msg_q_cur_len >= iosvc->msg_q_max_len) {
1506 		fmd_hdl_debug(hdl, "warning: enq to full msg queue\n");
1507 		return (-E2BIG);
1508 	}
1509 
1510 	newp = fmd_hdl_zalloc(hdl, sizeof (*newp), FMD_SLEEP);
1511 	(void) memcpy(newp, msgp, sizeof (*newp));
1512 	newp->msg_nextp = NULL;
1513 
1514 	if (iosvc->msg_q_cur_len == 0) {
1515 		iosvc->msg_q_head = newp;
1516 	} else {
1517 		iosvc->msg_q_tail->msg_nextp = newp;
1518 	}
1519 
1520 	iosvc->msg_q_tail = newp;
1521 	iosvc->msg_q_cur_len++;
1522 	fmd_hdl_debug(hdl, "info: current msg queue length %d\n",
1523 	    iosvc->msg_q_cur_len);
1524 
1525 	return (1);
1526 
1527 } /* etm_iosvc_msg_enq() */
1528 
1529 static int
1530 etm_iosvc_msg_deq(fmd_hdl_t *hdl, etm_iosvc_t *iosvc, etm_iosvc_q_ele_t *msgp)
1531 {
1532 	etm_iosvc_q_ele_t	*oldp;	/* ptr to old msg q ele */
1533 
1534 	if (iosvc->msg_q_cur_len == 0) {
1535 		fmd_hdl_debug(hdl, "warning: deq from empty responder queue\n");
1536 		return (-ENOENT);
1537 	}
1538 
1539 	(void) memcpy(msgp, iosvc->msg_q_head, sizeof (*msgp));
1540 	msgp->msg_nextp = NULL;
1541 
1542 	oldp = iosvc->msg_q_head;
1543 	iosvc->msg_q_head = iosvc->msg_q_head->msg_nextp;
1544 
1545 	/*
1546 	 * free the mem alloc-ed in etm_iosvc_msg_enq()
1547 	 */
1548 	fmd_hdl_free(hdl, oldp, sizeof (*oldp));
1549 
1550 	iosvc->msg_q_cur_len--;
1551 	if (iosvc->msg_q_cur_len == 0) {
1552 		iosvc->msg_q_tail = NULL;
1553 	}
1554 
1555 	return (1);
1556 
1557 } /* etm_iosvc_msg_deq() */
1558 
1559 
1560 /*
1561  * etm_msg_enq_head():
1562  * enq the msg to the head of the Q.
1563  * If the Q is full, drop the msg at the tail then enq the msg at head.
1564  * need to grab mutex lock iosvc->msg_q_lock before calling this routine.
1565  */
1566 static void
1567 etm_msg_enq_head(fmd_hdl_t *fmd_hdl, etm_iosvc_t *iosvc,
1568     etm_iosvc_q_ele_t *msg_ele)
1569 {
1570 
1571 	etm_iosvc_q_ele_t	*newp;	/* iosvc msg ele ptr */
1572 
1573 	if (iosvc->msg_q_cur_len >= iosvc->msg_q_max_len) {
1574 		fmd_hdl_debug(fmd_hdl,
1575 		    "warning: add to head of a full msg queue."
1576 		    " Drop the msg at the tail\n");
1577 		/*
1578 		 * drop the msg at the tail
1579 		 */
1580 		newp = iosvc->msg_q_head;
1581 		while (newp->msg_nextp != iosvc->msg_q_tail) {
1582 			newp = newp->msg_nextp;
1583 		}
1584 
1585 		/*
1586 		 * free the msg in iosvc->msg_q_tail->msg
1587 		 * free the mem pointed to by iosvc->msg_q_tail
1588 		 */
1589 		fmd_hdl_free(fmd_hdl, iosvc->msg_q_tail->msg,
1590 		    iosvc->msg_q_tail->msg_size);
1591 		fmd_hdl_free(fmd_hdl, iosvc->msg_q_tail, sizeof (*newp));
1592 		iosvc->msg_q_tail = newp;
1593 		iosvc->msg_q_tail->msg_nextp = NULL;
1594 		iosvc->msg_q_cur_len--;
1595 	}
1596 
1597 	/*
1598 	 * enq the msg to the head
1599 	 */
1600 	newp = fmd_hdl_zalloc(fmd_hdl, sizeof (*newp), FMD_SLEEP);
1601 	(void) memcpy(newp, msg_ele, sizeof (*newp));
1602 	if (iosvc->msg_q_cur_len == 0) {
1603 		newp->msg_nextp = NULL;
1604 		iosvc->msg_q_tail = newp;
1605 	} else {
1606 		newp->msg_nextp = iosvc->msg_q_head;
1607 	}
1608 	iosvc->msg_q_head = newp;
1609 	iosvc->msg_q_cur_len++;
1610 } /* etm_msg_enq_head() */
1611 
1612 /*
1613  * etm_iosvc_cleanup():
1614  * Clean up an iosvc structure
1615  * 1) close the fmd_xprt if it has not been closed
1616  * 2) Terminate the send/revc threads
1617  * 3) If the clean_msg_q flag is set, free all fma events in the queue. In
1618  *    addition, if the chpt_remove flag is set, delete the checkpoint so that
1619  *    the events are not persisted.
1620  */
1621 static void
1622 etm_iosvc_cleanup(fmd_hdl_t *fmd_hdl, etm_iosvc_t *iosvc, boolean_t clean_msg_q,
1623     boolean_t ckpt_remove)
1624 {
1625 
1626 	etm_iosvc_q_ele_t	msg_ele;	/* io svc msg Q ele */
1627 
1628 	iosvc->thr_is_dying = 1;
1629 
1630 	iosvc->ds_hdl = DS_INVALID_HDL;
1631 	if (iosvc->fmd_xprt != NULL) {
1632 		fmd_xprt_close(fmd_hdl, iosvc->fmd_xprt);
1633 		iosvc->fmd_xprt = NULL;
1634 	} /* if fmd-xprt has been opened */
1635 
1636 	if (iosvc->send_tid != NULL) {
1637 		fmd_thr_signal(fmd_hdl, iosvc->send_tid);
1638 		fmd_thr_destroy(fmd_hdl, iosvc->send_tid);
1639 		iosvc->send_tid = NULL;
1640 	} /* if io svc send thread was created ok */
1641 
1642 	if (iosvc->recv_tid != NULL) {
1643 		fmd_thr_signal(fmd_hdl, iosvc->recv_tid);
1644 		fmd_thr_destroy(fmd_hdl, iosvc->recv_tid);
1645 		iosvc->recv_tid = NULL;
1646 	} /* if root domain recv thread was created */
1647 
1648 
1649 	if (clean_msg_q) {
1650 		iosvc->ldom_name[0] = '\0';
1651 
1652 		(void) pthread_mutex_lock(&iosvc->msg_q_lock);
1653 		while (iosvc->msg_q_cur_len > 0) {
1654 			(void) etm_iosvc_msg_deq(fmd_hdl, iosvc, &msg_ele);
1655 			if (ckpt_remove == B_TRUE &&
1656 			    msg_ele.ckpt_flag != ETM_CKPT_NOOP) {
1657 				etm_ckpt_remove(fmd_hdl, &msg_ele);
1658 			}
1659 			fmd_hdl_free(fmd_hdl, msg_ele.msg, msg_ele.msg_size);
1660 		}
1661 		(void) pthread_mutex_unlock(&iosvc->msg_q_lock);
1662 	}
1663 
1664 	return;
1665 
1666 } /* etm_iosvc_cleanup() */
1667 
1668 /*
1669  * etm_iosvc_lookup(using ldom_name or ds_hdl when ldom_name is empty)
1670  * not found, create one, add to iosvc_list
1671  */
1672 etm_iosvc_t *
1673 etm_iosvc_lookup(fmd_hdl_t *fmd_hdl, char *ldom_name, ds_hdl_t ds_hdl,
1674     boolean_t iosvc_create)
1675 {
1676 	uint32_t		i;			/* for loop var */
1677 	int32_t			first_empty_slot = -1;	/* remember that */
1678 
1679 	for (i = 0; i < NUM_OF_ROOT_DOMAINS; i++) {
1680 		if (ldom_name[0] == '\0') {
1681 			/*
1682 			 * search by hdl passed in
1683 			 * the only time this is used is at ds_unreg_cb time.
1684 			 * there is no ldom name, only the valid ds_hdl.
1685 			 * find an iosvc with the matching ds_hdl.
1686 			 * ignore the iosvc_create flag, should never need to
1687 			 * create an iosvc for ds_unreg_cb
1688 			 */
1689 			if (ds_hdl == iosvc_list[i].ds_hdl) {
1690 				if (etm_debug_lvl >= 2) {
1691 				fmd_hdl_debug(fmd_hdl,
1692 			    "info: found an iosvc at slot %d w/ ds_hdl %d \n",
1693 				    i, iosvc_list[i].ds_hdl);
1694 				}
1695 				if (iosvc_list[i].ldom_name[0] != '\0')
1696 					if (etm_debug_lvl >= 2) {
1697 						fmd_hdl_debug(fmd_hdl,
1698 				    "info: found an iosvc w/ ldom_name %s \n",
1699 						    iosvc_list[i].ldom_name);
1700 				}
1701 				return (&iosvc_list[i]);
1702 			} else {
1703 				continue;
1704 			}
1705 		} else if (iosvc_list[i].ldom_name[0] != '\0') {
1706 			/*
1707 			 * this is  an non-empty iosvc structure slot
1708 			 */
1709 			if (strcmp(ldom_name, iosvc_list[i].ldom_name) == 0) {
1710 				/*
1711 				 * found an iosvc structure that matches the
1712 				 * passed in ldom_name, return the ptr
1713 				 */
1714 				if (etm_debug_lvl >= 2) {
1715 					fmd_hdl_debug(fmd_hdl, "info: found an "
1716 					    "iosvc at slot %d w/ ds_hdl %d \n",
1717 					    i, iosvc_list[i].ds_hdl);
1718 					fmd_hdl_debug(fmd_hdl, "info: found an "
1719 					    "iosvc w/ ldom_name %s \n",
1720 					    iosvc_list[i].ldom_name);
1721 				}
1722 				return (&iosvc_list[i]);
1723 			} else {
1724 				/*
1725 				 * non-empty slot with no-matching name,
1726 				 * move on to next slot.
1727 				 */
1728 				continue;
1729 			}
1730 		} else {
1731 			/*
1732 			 * found the 1st slot with ldom name being empty
1733 			 * remember the slot #, will be used for creating one
1734 			 */
1735 			if (first_empty_slot == -1) {
1736 				first_empty_slot = i;
1737 			}
1738 		}
1739 	}
1740 	if (iosvc_create == B_TRUE && first_empty_slot >= 0) {
1741 		/*
1742 		 * this is the case we need to add an iosvc at first_empty_slot
1743 		 * for the ldom_name at iosvc_list[first_empty_slot]
1744 		 */
1745 		fmd_hdl_debug(fmd_hdl,
1746 		    "info: create an iosvc with ldom name %s\n",
1747 		    ldom_name);
1748 		i = first_empty_slot;
1749 		(void) memcpy(&iosvc_list[i], &io_svc, sizeof (etm_iosvc_t));
1750 		(void) strcpy(iosvc_list[i].ldom_name, ldom_name);
1751 		fmd_hdl_debug(fmd_hdl, "info: iosvc #%d has ldom name %s\n",
1752 		    i, iosvc_list[i].ldom_name);
1753 		return (&iosvc_list[i]);
1754 	} else {
1755 		return (NULL);
1756 	}
1757 
1758 } /* etm_iosvc_lookup() */
1759 
1760 
1761 /*
1762  * etm_ckpt_remove:
1763  * remove the ckpt for the iosvc element
1764  */
1765 static void
1766 etm_ckpt_remove(fmd_hdl_t *hdl, etm_iosvc_q_ele_t *ele) {
1767 	int		err;			/* temp error */
1768 	nvlist_t	*evp = NULL;		/* event pointer */
1769 	etm_proto_v1_ev_hdr_t	*hdrp;		/* hdr for FMA_EVENT */
1770 	char		*buf;			/* packed event pointer */
1771 
1772 	if ((ele->ckpt_flag == ETM_CKPT_NOOP) ||
1773 	    (etm_ldom_type != LDOM_TYPE_CONTROL)) {
1774 		return;
1775 	}
1776 
1777 	/* the pointer to the packed event in the etm message */
1778 	hdrp = (etm_proto_v1_ev_hdr_t *)((ptrdiff_t)ele->msg);
1779 	buf = (char *)((ptrdiff_t)hdrp + sizeof (*hdrp)
1780 	    + (1 * sizeof (hdrp->ev_lens[0])));
1781 
1782 	/* unpack it, then uncheckpoited it */
1783 	if ((err = nvlist_unpack(buf, hdrp->ev_lens[0], &evp, 0)) != 0) {
1784 		fmd_hdl_debug(hdl, "failed to unpack event(rc=%d)\n", err);
1785 		return;
1786 	}
1787 	(void) etm_ckpt_delete(hdl, evp);
1788 	nvlist_free(evp);
1789 }
1790 
1791 /*
1792  * etm_send_ds_msg()
1793  * call ds_send_msg() to send the msg passed in.
1794  * timedcond_wait for the ACK to come back.
1795  * if the ACK doesn't come in the specified time, retrun -EAGAIN.
1796  * other wise, return 1.
1797  */
1798 int
1799 etm_send_ds_msg(fmd_hdl_t *fmd_hdl, boolean_t ckpt_remove, etm_iosvc_t *iosvc,
1800     etm_iosvc_q_ele_t *msg_ele, etm_proto_v1_ev_hdr_t *evhdrp)
1801 {
1802 	uint32_t		rc;		/* for return code  */
1803 
1804 	struct timeval		tv;
1805 	struct timespec		timeout;
1806 
1807 
1808 	/*
1809 	 * call ds_send_msg(). Return (-EAGAIN) if not successful
1810 	 */
1811 	if ((rc = (*etm_ds_send_msg)(iosvc->ds_hdl, msg_ele->msg,
1812 	    msg_ele->msg_size)) != 0) {
1813 		fmd_hdl_debug(fmd_hdl, "info: ds_send_msg rc %d xid %d\n",
1814 		    rc, evhdrp->ev_pp.pp_xid);
1815 			return (-EAGAIN);
1816 	}
1817 
1818 	/*
1819 	 * wait on the cv for resp msg for cur_send_xid
1820 	 */
1821 	(void *) pthread_mutex_lock(&iosvc->msg_ack_lock);
1822 
1823 	(void) gettimeofday(&tv, 0);
1824 	timeout.tv_sec = tv.tv_sec + etm_fma_resp_wait_time;
1825 	timeout.tv_nsec = 0;
1826 
1827 	fmd_hdl_debug(fmd_hdl, "info: waiting on msg_ack_cv for ldom %s\n",
1828 	    iosvc->ldom_name);
1829 	rc = pthread_cond_timedwait(&iosvc->msg_ack_cv, &iosvc->msg_ack_lock,
1830 	    &timeout);
1831 	(void *) pthread_mutex_unlock(&iosvc->msg_ack_lock);
1832 	fmd_hdl_debug(fmd_hdl,  "info: msg_ack_cv returns with rc %d\n", rc);
1833 
1834 	/*
1835 	 * check to see if ack_ok is non-zero
1836 	 * if non-zero, resp msg has been received
1837 	 */
1838 	if (iosvc->ack_ok != 0) {
1839 		/*
1840 		 * ACK came ok,  this send is successful,
1841 		 * tell the caller ready to send next.
1842 		 * free mem alloc-ed in
1843 		 * etm_pack_ds_msg
1844 		 */
1845 		if (ckpt_remove == B_TRUE &&
1846 		    etm_ldom_type == LDOM_TYPE_CONTROL) {
1847 			etm_ckpt_remove(fmd_hdl, msg_ele);
1848 		}
1849 		fmd_hdl_free(fmd_hdl, msg_ele->msg, msg_ele->msg_size);
1850 		iosvc->cur_send_xid++;
1851 		return (1);
1852 	} else {
1853 		/*
1854 		 * the ACK did not come on time
1855 		 * tell the caller to resend cur_send_xid
1856 		 */
1857 		return (-EAGAIN);
1858 	} /* iosvc->ack_ok != 0 */
1859 } /* etm_send_ds_msg() */
1860 
1861 /*
1862  * both events from fmdo_send entry point and from SP are using the
1863  * etm_proto_v1_ev_hdr_t as its header and it will be the same header for all
1864  * ds send/recv msgs.
1865  * Idealy, we should use the hdr coming with the SP FMA event. Since fmdo_send
1866  * entry point can be called before FMA events from SP, we can't rely on
1867  * the SP FMA event hdr. Use the static hdr for packing ds msgs for fmdo_send
1868  * events.
1869  * return >0 for success, or -errno value
1870  * Design assumption: there is one FMA event per ds msg
1871  */
1872 int
1873 etm_pack_ds_msg(fmd_hdl_t *fmd_hdl, etm_iosvc_t *iosvc,
1874 	etm_proto_v1_ev_hdr_t *ev_hdrp, size_t hdr_sz, nvlist_t *evp,
1875 	etm_pack_msg_type_t msg_type, uint_t ckpt_opt)
1876 {
1877 	etm_proto_v1_ev_hdr_t	*hdrp;		/* for FMA_EVENT msg */
1878 	uint32_t		*lenp;		/* ptr to FMA event length */
1879 	size_t			evsz;		/* packed FMA event size */
1880 	char 			*buf;
1881 	uint32_t		rc;		/* for return code  */
1882 	char 			*msg;		/* body of msg to be Qed */
1883 
1884 	etm_iosvc_q_ele_t	msg_ele;	/* io svc msg Q ele */
1885 	etm_proto_v1_ev_hdr_t	*evhdrp;
1886 
1887 
1888 	if (ev_hdrp == NULL) {
1889 		hdrp = &iosvc_hdr;
1890 	} else {
1891 		hdrp = ev_hdrp;
1892 	}
1893 
1894 	/*
1895 	 * determine hdr_sz if 0, otherwise use the one passed in hdr_sz
1896 	 */
1897 
1898 	if (hdr_sz == 0) {
1899 		hdr_sz = sizeof (*hdrp) + (1 * sizeof (hdrp->ev_lens[0]));
1900 	}
1901 
1902 	/*
1903 	 * determine evp size
1904 	 */
1905 	(void) nvlist_size(evp, &evsz, NV_ENCODE_XDR);
1906 
1907 	/* indicate 1 FMA event, no network encoding, and 0-terminate */
1908 	lenp = &hdrp->ev_lens[0];
1909 	*lenp = evsz;
1910 
1911 	/*
1912 	 * now the total of mem needs to be alloc-ed/ds msg size is
1913 	 * hdr_sz + evsz
1914 	 * msg will be freed in etm_send_to_remote_root() after ds_send_msg()
1915 	 */
1916 	msg = fmd_hdl_zalloc(fmd_hdl, hdr_sz + evsz, FMD_SLEEP);
1917 
1918 
1919 	/*
1920 	 * copy hdr, 0 terminate the length vector,  and then evp
1921 	 */
1922 	(void) memcpy(msg, hdrp, sizeof (*hdrp));
1923 	hdrp = (etm_proto_v1_ev_hdr_t *)((ptrdiff_t)msg);
1924 	lenp = &hdrp->ev_lens[0];
1925 	lenp++;
1926 	*lenp = 0;
1927 
1928 	buf = fmd_hdl_zalloc(fmd_hdl, evsz, FMD_SLEEP);
1929 	(void) nvlist_pack(evp, (char **)&buf, &evsz, NV_ENCODE_XDR, 0);
1930 	(void) memcpy(msg + hdr_sz, buf, evsz);
1931 	fmd_hdl_free(fmd_hdl, buf, evsz);
1932 
1933 	fmd_hdl_debug(fmd_hdl, "info: hdr_sz= %d evsz= %d in etm_pack_ds_msg"
1934 	    "for ldom %s\n", hdr_sz, evsz, iosvc->ldom_name);
1935 	msg_ele.msg = msg;
1936 	msg_ele.msg_size = hdr_sz + evsz;
1937 	msg_ele.ckpt_flag = ckpt_opt;
1938 
1939 	/*
1940 	 * decide what to do with the msg:
1941 	 * if SP ereports (msg_type == SP_MSG), always enq the msg
1942 	 * if not SP ereports, ie, fmd xprt control msgs, enq it _only_ after
1943 	 * resource.fm.xprt.run has been sent (which sets start_sending_Q to 1)
1944 	 */
1945 	if ((msg_type == SP_MSG) ||
1946 	    (msg_type != SP_MSG) && (iosvc->start_sending_Q == 1)) {
1947 		/*
1948 		 * this is the case when the msg needs to be enq-ed
1949 		 */
1950 		(void) pthread_mutex_lock(&iosvc->msg_q_lock);
1951 		rc = etm_iosvc_msg_enq(fmd_hdl, iosvc, &msg_ele);
1952 		if ((rc > 0) && (ckpt_opt & ETM_CKPT_SAVE) &&
1953 		    (etm_ldom_type == LDOM_TYPE_CONTROL)) {
1954 			(void) etm_ckpt_add(fmd_hdl, evp);
1955 		}
1956 		if (iosvc->msg_q_cur_len == 1)
1957 			(void) pthread_cond_signal(&iosvc->msg_q_cv);
1958 		(void) pthread_mutex_unlock(&iosvc->msg_q_lock);
1959 	} else {
1960 		/*
1961 		 * fmd RDWR xprt procotol startup msgs, send it now!
1962 		 */
1963 		iosvc->ack_ok = 0;
1964 		evhdrp = (etm_proto_v1_ev_hdr_t *)((ptrdiff_t)msg_ele.msg);
1965 		evhdrp->ev_pp.pp_xid = iosvc->cur_send_xid + 1;
1966 		while (!iosvc->ack_ok && iosvc->ds_hdl != DS_INVALID_HDL &&
1967 		    !etm_is_dying) {
1968 			if (etm_send_ds_msg(fmd_hdl, B_FALSE, iosvc, &msg_ele,
1969 			    evhdrp) < 0) {
1970 				continue;
1971 			}
1972 		}
1973 		if (msg_type == FMD_XPRT_RUN_MSG)
1974 			iosvc->start_sending_Q = 1;
1975 	}
1976 
1977 	return (rc);
1978 
1979 } /* etm_pack_ds_msg() */
1980 
1981 /*
1982  * Design_Note:	For all etm_resp_q_*() functions and etm_resp_q_* globals,
1983  *		the mutex etm_resp_q_lock must be held by the caller.
1984  */
1985 
1986 /*
1987  * etm_resp_q_enq - add element to tail of ETM responder queue
1988  * etm_resp_q_deq - del element from head of ETM responder queue
1989  *
1990  * return >0 for success, or -errno value
1991  */
1992 
1993 static int
1994 etm_resp_q_enq(fmd_hdl_t *hdl, etm_resp_q_ele_t *rqep)
1995 {
1996 	etm_resp_q_ele_t	*newp;	/* ptr to new resp q ele */
1997 
1998 	if (etm_resp_q_cur_len >= etm_resp_q_max_len) {
1999 		fmd_hdl_debug(hdl, "warning: enq to full responder queue\n");
2000 		etm_stats.etm_enq_drop_resp_q.fmds_value.ui64++;
2001 		return (-E2BIG);
2002 	}
2003 
2004 	newp = fmd_hdl_zalloc(hdl, sizeof (*newp), FMD_SLEEP);
2005 	(void) memcpy(newp, rqep, sizeof (*newp));
2006 	newp->rqe_nextp = NULL;
2007 
2008 	if (etm_resp_q_cur_len == 0) {
2009 		etm_resp_q_head = newp;
2010 	} else {
2011 		etm_resp_q_tail->rqe_nextp = newp;
2012 	}
2013 	etm_resp_q_tail = newp;
2014 	etm_resp_q_cur_len++;
2015 	etm_stats.etm_resp_q_cur_len.fmds_value.ui64 = etm_resp_q_cur_len;
2016 
2017 	return (1);
2018 
2019 } /* etm_resp_q_enq() */
2020 
2021 static int
2022 etm_resp_q_deq(fmd_hdl_t *hdl, etm_resp_q_ele_t *rqep)
2023 {
2024 	etm_resp_q_ele_t	*oldp;	/* ptr to old resp q ele */
2025 
2026 	if (etm_resp_q_cur_len == 0) {
2027 		fmd_hdl_debug(hdl, "warning: deq from empty responder queue\n");
2028 		etm_stats.etm_deq_drop_resp_q.fmds_value.ui64++;
2029 		return (-ENOENT);
2030 	}
2031 
2032 	(void) memcpy(rqep, etm_resp_q_head, sizeof (*rqep));
2033 	rqep->rqe_nextp = NULL;
2034 
2035 	oldp = etm_resp_q_head;
2036 	etm_resp_q_head = etm_resp_q_head->rqe_nextp;
2037 	fmd_hdl_free(hdl, oldp, sizeof (*oldp));
2038 
2039 	etm_resp_q_cur_len--;
2040 	etm_stats.etm_resp_q_cur_len.fmds_value.ui64 = etm_resp_q_cur_len;
2041 	if (etm_resp_q_cur_len == 0) {
2042 		etm_resp_q_tail = NULL;
2043 	}
2044 
2045 	return (1);
2046 
2047 } /* etm_resp_q_deq() */
2048 
2049 /*
2050  * etm_maybe_enq_response - check the given message header to see
2051  *				whether a response has been requested,
2052  *				if so then enqueue the given connection
2053  *				and header for later transport by the
2054  *				responder thread as an ETM response msg,
2055  *				return 0 for nop, >0 success, or -errno value
2056  */
2057 
2058 static ssize_t
2059 etm_maybe_enq_response(fmd_hdl_t *hdl, etm_xport_conn_t conn,
2060     void *hdrp, uint32_t hdr_sz, int32_t resp_code)
2061 {
2062 	ssize_t			rv;		/* ret val */
2063 	etm_proto_v1_pp_t	*ppp;		/* protocol preamble ptr */
2064 	uint8_t			orig_msg_type;	/* orig hdr's message type */
2065 	uint32_t		orig_timeout;	/* orig hdr's timeout */
2066 	etm_resp_q_ele_t	rqe;		/* responder queue ele */
2067 
2068 	ppp = hdrp;
2069 	orig_msg_type = ppp->pp_msg_type;
2070 	orig_timeout = ppp->pp_timeout;
2071 
2072 	/* bail out now if no response is to be sent */
2073 
2074 	if (orig_timeout == ETM_PROTO_V1_TIMEOUT_NONE) {
2075 		return (0);
2076 	} /* if a nop */
2077 
2078 	if ((orig_msg_type != ETM_MSG_TYPE_FMA_EVENT) &&
2079 	    (orig_msg_type != ETM_MSG_TYPE_ALERT) &&
2080 	    (orig_msg_type != ETM_MSG_TYPE_CONTROL)) {
2081 		fmd_hdl_debug(hdl, "warning: bad msg type 0x%x\n",
2082 		    orig_msg_type);
2083 		return (-EINVAL);
2084 	} /* if inappropriate hdr for a response msg */
2085 
2086 	/*
2087 	 * enqueue the msg hdr and nudge the responder thread
2088 	 * if the responder queue was previously empty
2089 	 */
2090 
2091 	rqe.rqe_conn = conn;
2092 	rqe.rqe_hdrp = hdrp;
2093 	rqe.rqe_hdr_sz = hdr_sz;
2094 	rqe.rqe_resp_code = resp_code;
2095 
2096 	(void) pthread_mutex_lock(&etm_resp_q_lock);
2097 
2098 	if (etm_resp_q_cur_len == etm_resp_q_max_len)
2099 		(void) pthread_cond_wait(&etm_resp_q_cv, &etm_resp_q_lock);
2100 
2101 	rv = etm_resp_q_enq(hdl, &rqe);
2102 	if (etm_resp_q_cur_len == 1)
2103 		(void) pthread_cond_signal(&etm_resp_q_cv);
2104 	(void) pthread_mutex_unlock(&etm_resp_q_lock);
2105 
2106 	return (rv);
2107 
2108 } /* etm_maybe_enq_response() */
2109 
2110 /*
2111  * Design_Note:	We rely on the fact that all message types have
2112  *		a common protocol preamble; if this fact should
2113  *		ever change it may break the code below. We also
2114  *		rely on the fact that FMA_EVENT and CONTROL headers
2115  *		returned by etm_hdr_read() will be sized large enough
2116  *		to reuse them as RESPONSE headers if the remote endpt
2117  *		asked for a response via the pp_timeout field.
2118  */
2119 
2120 /*
2121  * etm_send_response - use the given message header and response code
2122  *			to construct an appropriate response message,
2123  *			and send it back on the given connection,
2124  *			return >0 for success, or -errno value
2125  */
2126 
2127 static ssize_t
2128 etm_send_response(fmd_hdl_t *hdl, etm_xport_conn_t conn,
2129     void *hdrp, int32_t resp_code)
2130 {
2131 	ssize_t			rv;		/* ret val */
2132 	etm_proto_v1_pp_t	*ppp;		/* protocol preamble ptr */
2133 	etm_proto_v1_resp_hdr_t *resp_hdrp;	/* for RESPONSE msg */
2134 	uint8_t			resp_body[4];	/* response body if needed */
2135 	uint8_t			*resp_msg;	/* response hdr+body */
2136 	size_t			hdr_sz;		/* sizeof response hdr */
2137 	uint8_t			orig_msg_type;	/* orig hdr's message type */
2138 
2139 	ppp = hdrp;
2140 	orig_msg_type = ppp->pp_msg_type;
2141 
2142 	if (etm_debug_lvl >= 2) {
2143 		etm_show_time(hdl, "ante resp send");
2144 	}
2145 
2146 	/* reuse the given header as a response header */
2147 
2148 	resp_hdrp = hdrp;
2149 	resp_hdrp->resp_code = resp_code;
2150 	resp_hdrp->resp_len = 0;		/* default is empty body */
2151 
2152 	if ((orig_msg_type == ETM_MSG_TYPE_CONTROL) &&
2153 	    (ppp->pp_sub_type == ETM_CTL_SEL_VER_NEGOT_REQ)) {
2154 		resp_body[0] = ETM_PROTO_V2;
2155 		resp_body[1] = ETM_PROTO_V3;
2156 		resp_body[2] = 0;
2157 		resp_hdrp->resp_len = 3;
2158 	} /* if should send our/negotiated proto ver in resp body */
2159 
2160 	/* respond with the proto ver that was negotiated */
2161 
2162 	resp_hdrp->resp_pp.pp_proto_ver = etm_resp_ver;
2163 	resp_hdrp->resp_pp.pp_msg_type = ETM_MSG_TYPE_RESPONSE;
2164 	resp_hdrp->resp_pp.pp_timeout = ETM_PROTO_V1_TIMEOUT_NONE;
2165 
2166 	/*
2167 	 * send the whole response msg in one write, header and body;
2168 	 * avoid the alloc-and-copy if we can reuse the hdr as the msg,
2169 	 * ie, if the body is empty. update the response stats.
2170 	 */
2171 
2172 	hdr_sz = sizeof (etm_proto_v1_resp_hdr_t);
2173 
2174 	resp_msg = hdrp;
2175 	if (resp_hdrp->resp_len > 0) {
2176 		resp_msg = fmd_hdl_zalloc(hdl, hdr_sz + resp_hdrp->resp_len,
2177 		    FMD_SLEEP);
2178 		(void) memcpy(resp_msg, resp_hdrp, hdr_sz);
2179 		(void) memcpy(resp_msg + hdr_sz, resp_body,
2180 		    resp_hdrp->resp_len);
2181 	}
2182 
2183 	(void) pthread_mutex_lock(&etm_write_lock);
2184 	rv = etm_io_op(hdl, "bad io write on resp msg", conn,
2185 	    resp_msg, hdr_sz + resp_hdrp->resp_len, ETM_IO_OP_WR);
2186 	(void) pthread_mutex_unlock(&etm_write_lock);
2187 	if (rv < 0) {
2188 		goto func_ret;
2189 	}
2190 
2191 	etm_stats.etm_wr_hdr_response.fmds_value.ui64++;
2192 	etm_stats.etm_wr_body_response.fmds_value.ui64++;
2193 
2194 	fmd_hdl_debug(hdl, "info: sent V%u RESPONSE msg to xport "
2195 	    "xid 0x%x code %d len %u\n",
2196 	    (unsigned int)resp_hdrp->resp_pp.pp_proto_ver,
2197 	    resp_hdrp->resp_pp.pp_xid, resp_hdrp->resp_code,
2198 	    resp_hdrp->resp_len);
2199 func_ret:
2200 
2201 	if (resp_hdrp->resp_len > 0) {
2202 		fmd_hdl_free(hdl, resp_msg, hdr_sz + resp_hdrp->resp_len);
2203 	}
2204 	if (etm_debug_lvl >= 2) {
2205 		etm_show_time(hdl, "post resp send");
2206 	}
2207 	return (rv);
2208 
2209 } /* etm_send_response() */
2210 
2211 /*
2212  * etm_reset_xport - reset the transport layer (via fini;init)
2213  *			presumably for an error condition we cannot
2214  *			otherwise recover from (ex: hung LDC channel)
2215  *
2216  * caveats - no checking/locking is done to ensure an existing connection
2217  *		is idle during an xport reset; we don't want to deadlock
2218  *		and presumably the transport is stuck/unusable anyway
2219  */
2220 
2221 static void
2222 etm_reset_xport(fmd_hdl_t *hdl)
2223 {
2224 	(void) etm_xport_fini(hdl);
2225 	(void) etm_xport_init(hdl);
2226 	etm_stats.etm_reset_xport.fmds_value.ui64++;
2227 
2228 } /* etm_reset_xport() */
2229 
2230 /*
2231  * etm_handle_new_conn - receive an ETM message sent from the other end via
2232  *			the given open connection, pull out any FMA events
2233  *			and post them to the local FMD (or handle any ETM
2234  *			control or response msg); when done, close the
2235  *			connection
2236  */
2237 
2238 static void
2239 etm_handle_new_conn(fmd_hdl_t *hdl, etm_xport_conn_t conn)
2240 {
2241 	etm_proto_v1_ev_hdr_t	*ev_hdrp;	/* for FMA_EVENT msg */
2242 	etm_proto_v1_ctl_hdr_t	*ctl_hdrp;	/* for CONTROL msg */
2243 	etm_proto_v1_resp_hdr_t *resp_hdrp;	/* for RESPONSE msg */
2244 	etm_proto_v3_sa_hdr_t	*sa_hdrp;	/* for ALERT msg */
2245 	etm_iosvc_t		*iosvc;		/* iosvc data structure */
2246 	int32_t			resp_code;	/* response code */
2247 	ssize_t			enq_rv;		/* resp_q enqueue status */
2248 	size_t			hdr_sz;		/* sizeof header */
2249 	size_t			evsz;		/* FMA event size */
2250 	uint8_t			*body_buf;	/* msg body buffer */
2251 	uint32_t		body_sz;	/* sizeof body_buf */
2252 	uint32_t		ev_cnt;		/* count of FMA events */
2253 	uint8_t			*bp;		/* byte ptr within body_buf */
2254 	nvlist_t		*evp;		/* ptr to unpacked FMA event */
2255 	char			*class;		/* FMA event class */
2256 	ssize_t			i, n;		/* gen use */
2257 	int			should_reset_xport; /* bool to reset xport */
2258 	char			ldom_name[MAX_LDOM_NAME]; /* ldom name */
2259 	int			rc;		/* return code */
2260 	uint64_t		did;		/* domain id */
2261 
2262 
2263 	if (etm_debug_lvl >= 2) {
2264 		etm_show_time(hdl, "ante conn handle");
2265 	}
2266 	fmd_hdl_debug(hdl, "info: handling new conn %p\n", conn);
2267 
2268 	should_reset_xport = 0;
2269 	ev_hdrp = NULL;
2270 	ctl_hdrp = NULL;
2271 	resp_hdrp = NULL;
2272 	sa_hdrp = NULL;
2273 	body_buf = NULL;
2274 	class = NULL;
2275 	evp = NULL;
2276 	resp_code = 0;	/* default is success */
2277 	enq_rv = 0;	/* default is nop, ie, did not enqueue */
2278 
2279 	/* read a network decoded message header from the connection */
2280 
2281 	if ((ev_hdrp = etm_hdr_read(hdl, conn, &hdr_sz)) == NULL) {
2282 		/* errno assumed set by above call */
2283 		should_reset_xport = (errno == ENOTACTIVE);
2284 		fmd_hdl_debug(hdl, "error: FMA event dropped: "
2285 		    "bad hdr read errno %d\n", errno);
2286 		etm_stats.etm_rd_drop_fmaevent.fmds_value.ui64++;
2287 		goto func_ret;
2288 	}
2289 
2290 	/*
2291 	 * handle the message based on its preamble pp_msg_type
2292 	 * which is known to be valid from etm_hdr_read() checks
2293 	 */
2294 
2295 	if (ev_hdrp->ev_pp.pp_msg_type == ETM_MSG_TYPE_FMA_EVENT) {
2296 
2297 		fmd_hdl_debug(hdl, "info: rcvd FMA_EVENT msg from xport\n");
2298 
2299 		/* allocate buf large enough for whole body / all FMA events */
2300 
2301 		body_sz = 0;
2302 		for (i = 0; ev_hdrp->ev_lens[i] != 0; i++) {
2303 			body_sz += ev_hdrp->ev_lens[i];
2304 		} /* for summing sizes of all FMA events */
2305 		if (i > etm_stats.etm_rd_max_ev_per_msg.fmds_value.ui64)
2306 			etm_stats.etm_rd_max_ev_per_msg.fmds_value.ui64 = i;
2307 		ev_cnt = i;
2308 
2309 		if (etm_debug_lvl >= 1) {
2310 			fmd_hdl_debug(hdl, "info: event lengths %u sum %u\n",
2311 			    ev_cnt, body_sz);
2312 		}
2313 
2314 		body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP);
2315 
2316 		/* read all the FMA events at once */
2317 
2318 		if ((n = etm_io_op(hdl, "FMA event dropped: "
2319 		    "bad io read on event bodies", conn, body_buf, body_sz,
2320 		    ETM_IO_OP_RD)) < 0) {
2321 			should_reset_xport = (n == -ENOTACTIVE);
2322 			etm_stats.etm_rd_drop_fmaevent.fmds_value.ui64++;
2323 			goto func_ret;
2324 		}
2325 
2326 		etm_stats.etm_rd_xport_bytes.fmds_value.ui64 += body_sz;
2327 		etm_stats.etm_rd_body_fmaevent.fmds_value.ui64 += ev_cnt;
2328 
2329 		/*
2330 		 * now that we've read the entire ETM msg from the conn,
2331 		 * which avoids later ETM protocol framing errors if we didn't,
2332 		 * check for dup msg/xid against last good FMD posting,
2333 		 * if a dup then resend response but skip repost to FMD
2334 		 */
2335 
2336 		if (ev_hdrp->ev_pp.pp_xid == etm_xid_posted_logged_ev) {
2337 			enq_rv = etm_maybe_enq_response(hdl, conn,
2338 			    ev_hdrp, hdr_sz, 0);
2339 			fmd_hdl_debug(hdl, "info: skipping dup FMA event post "
2340 			    "xid 0x%x\n", etm_xid_posted_logged_ev);
2341 			etm_stats.etm_rd_dup_fmaevent.fmds_value.ui64++;
2342 			goto func_ret;
2343 		}
2344 
2345 		/* unpack each FMA event and post it to FMD */
2346 
2347 		bp = body_buf;
2348 		for (i = 0; i < ev_cnt; i++) {
2349 			if ((n = nvlist_unpack((char *)bp,
2350 			    ev_hdrp->ev_lens[i], &evp, 0)) != 0) {
2351 				resp_code = (-n);
2352 				enq_rv = etm_maybe_enq_response(hdl, conn,
2353 				    ev_hdrp, hdr_sz, resp_code);
2354 				fmd_hdl_error(hdl, "error: FMA event dropped: "
2355 				    "bad event body unpack errno %d\n", n);
2356 				if (etm_debug_lvl >= 2) {
2357 					fmd_hdl_debug(hdl, "info: FMA event "
2358 					    "hexdump %d bytes:\n",
2359 					    ev_hdrp->ev_lens[i]);
2360 					etm_hexdump(hdl, bp,
2361 					    ev_hdrp->ev_lens[i]);
2362 				}
2363 				etm_stats.etm_os_nvlist_unpack_fail.fmds_value.
2364 				    ui64++;
2365 				etm_stats.etm_rd_drop_fmaevent.fmds_value.
2366 				    ui64++;
2367 				bp += ev_hdrp->ev_lens[i];
2368 				continue;
2369 			}
2370 
2371 			if (etm_debug_lvl >= 1) {
2372 				(void) nvlist_lookup_string(evp, FM_CLASS,
2373 				    &class);
2374 				if (class == NULL) {
2375 					class = "NULL";
2376 				}
2377 				fmd_hdl_debug(hdl, "info: FMA event %p "
2378 				    "class %s\n", evp, class);
2379 			}
2380 
2381 			rc = nvlist_size(evp, &evsz, NV_ENCODE_XDR);
2382 			fmd_hdl_debug(hdl,
2383 			    "info: evp size before pack ds msg %d\n", evsz);
2384 			ldom_name[0] = '\0';
2385 			rc = etm_filter_find_ldom_id(hdl, evp, ldom_name,
2386 			    MAX_LDOM_NAME, &did);
2387 
2388 			/*
2389 			 * if rc is zero and the ldom_name is not "primary",
2390 			 * the evp belongs to a root domain, put the evp in an
2391 			 * outgoing etm queue,
2392 			 * in all other cases, whether ldom_name is primary or
2393 			 * can't find a ldom name, call etm_post_to_fmd
2394 			 */
2395 			if ((rc == 0) && strcmp(ldom_name, "primary") &&
2396 			    strcmp(ldom_name, "")) {
2397 				/*
2398 				 * use the ldom_name, guaranteered at this point
2399 				 * to be a valid ldom name/non-NULL, to find the
2400 				 * iosvc data.
2401 				 * add an iosvc struct if can not find one
2402 				 */
2403 				(void) pthread_mutex_unlock(&iosvc_list_lock);
2404 				iosvc = etm_iosvc_lookup(hdl, ldom_name,
2405 				    DS_INVALID_HDL, B_TRUE);
2406 				(void) pthread_mutex_unlock(&iosvc_list_lock);
2407 				if (iosvc == NULL) {
2408 					fmd_hdl_debug(hdl,
2409 					    "error: can't find iosvc for ldom "
2410 					    "name %s\n", ldom_name);
2411 				} else {
2412 					resp_code = 0;
2413 					(void) etm_pack_ds_msg(hdl, iosvc,
2414 					    ev_hdrp, hdr_sz, evp,
2415 					    SP_MSG, ETM_CKPT_SAVE);
2416 					/*
2417 					 * call the new fmd_xprt_log()
2418 					 */
2419 					fmd_xprt_log(hdl, etm_fmd_xprt, evp, 0);
2420 					etm_xid_posted_logged_ev =
2421 					    ev_hdrp->ev_pp.pp_xid;
2422 				}
2423 			} else {
2424 				/*
2425 				 * post the fma event to the control fmd
2426 				 */
2427 				resp_code = etm_post_to_fmd(hdl, etm_fmd_xprt,
2428 				    evp);
2429 				if (resp_code >= 0) {
2430 					etm_xid_posted_logged_ev =
2431 					    ev_hdrp->ev_pp.pp_xid;
2432 				}
2433 			}
2434 
2435 			evp = NULL;
2436 			enq_rv = etm_maybe_enq_response(hdl, conn,
2437 			    ev_hdrp, hdr_sz, resp_code);
2438 			bp += ev_hdrp->ev_lens[i];
2439 		} /* foreach FMA event in the body buffer */
2440 
2441 	} else if (ev_hdrp->ev_pp.pp_msg_type == ETM_MSG_TYPE_CONTROL) {
2442 
2443 		ctl_hdrp = (void*)ev_hdrp;
2444 
2445 		fmd_hdl_debug(hdl, "info: rcvd CONTROL msg from xport\n");
2446 		if (etm_debug_lvl >= 1) {
2447 			fmd_hdl_debug(hdl, "info: ctl sel %d xid 0x%x\n",
2448 			    (int)ctl_hdrp->ctl_pp.pp_sub_type,
2449 			    ctl_hdrp->ctl_pp.pp_xid);
2450 		}
2451 
2452 		/*
2453 		 * if we have a VER_NEGOT_REQ read the body and validate
2454 		 * the protocol version set contained therein,
2455 		 * otherwise we have a PING_REQ (which has no body)
2456 		 * and we [also] fall thru to the code which sends a
2457 		 * response msg if the pp_timeout field requested one
2458 		 */
2459 
2460 		if (ctl_hdrp->ctl_pp.pp_sub_type == ETM_CTL_SEL_VER_NEGOT_REQ) {
2461 
2462 			body_sz = ctl_hdrp->ctl_len;
2463 			body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP);
2464 
2465 			if ((n = etm_io_op(hdl, "bad io read on ctl body",
2466 			    conn, body_buf, body_sz, ETM_IO_OP_RD)) < 0) {
2467 				should_reset_xport = (n == -ENOTACTIVE);
2468 				goto func_ret;
2469 			}
2470 
2471 			/* complain if version set completely incompatible */
2472 
2473 			for (i = 0; i < body_sz; i++) {
2474 				if ((body_buf[i] == ETM_PROTO_V1) ||
2475 				    (body_buf[i] == ETM_PROTO_V2) ||
2476 				    (body_buf[i] == ETM_PROTO_V3)) {
2477 					break;
2478 				}
2479 			}
2480 			if (i >= body_sz) {
2481 				etm_stats.etm_ver_bad.fmds_value.ui64++;
2482 				resp_code = (-EPROTO);
2483 			}
2484 
2485 		} /* if got version set request */
2486 
2487 		etm_stats.etm_rd_body_control.fmds_value.ui64++;
2488 
2489 		enq_rv = etm_maybe_enq_response(hdl, conn,
2490 		    ctl_hdrp, hdr_sz, resp_code);
2491 
2492 	} else if (ev_hdrp->ev_pp.pp_msg_type == ETM_MSG_TYPE_RESPONSE) {
2493 
2494 		resp_hdrp = (void*)ev_hdrp;
2495 
2496 		fmd_hdl_debug(hdl, "info: rcvd RESPONSE msg from xport\n");
2497 		if (etm_debug_lvl >= 1) {
2498 			fmd_hdl_debug(hdl, "info: resp xid 0x%x\n",
2499 			    (int)resp_hdrp->resp_pp.pp_xid);
2500 		}
2501 
2502 		body_sz = resp_hdrp->resp_len;
2503 		body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP);
2504 
2505 		if ((n = etm_io_op(hdl, "bad io read on resp len",
2506 		    conn, body_buf, body_sz, ETM_IO_OP_RD)) < 0) {
2507 			should_reset_xport = (n == -ENOTACTIVE);
2508 			goto func_ret;
2509 		}
2510 
2511 		etm_stats.etm_rd_body_response.fmds_value.ui64++;
2512 
2513 		/*
2514 		 * look up the xid to interpret the response body
2515 		 *
2516 		 * ping is a nop; for ver negot confirm that a supported
2517 		 * protocol version was negotiated and remember which one
2518 		 */
2519 
2520 		if ((resp_hdrp->resp_pp.pp_xid != etm_xid_ping) &&
2521 		    (resp_hdrp->resp_pp.pp_xid != etm_xid_ver_negot)) {
2522 			etm_stats.etm_xid_bad.fmds_value.ui64++;
2523 			goto func_ret;
2524 		}
2525 
2526 		if (resp_hdrp->resp_pp.pp_xid == etm_xid_ver_negot) {
2527 			if ((body_buf[0] < ETM_PROTO_V1) ||
2528 			    (body_buf[0] > ETM_PROTO_V3)) {
2529 				etm_stats.etm_ver_bad.fmds_value.ui64++;
2530 				goto func_ret;
2531 			}
2532 			etm_resp_ver = body_buf[0];
2533 		} /* if have resp to last req to negotiate proto ver */
2534 
2535 	} else if (ev_hdrp->ev_pp.pp_msg_type == ETM_MSG_TYPE_ALERT) {
2536 
2537 		sa_hdrp = (void*)ev_hdrp;
2538 
2539 		fmd_hdl_debug(hdl, "info: rcvd ALERT msg from xport\n");
2540 		if (etm_debug_lvl >= 1) {
2541 			fmd_hdl_debug(hdl, "info: sa sel %d xid 0x%x\n",
2542 			    (int)sa_hdrp->sa_pp.pp_sub_type,
2543 			    sa_hdrp->sa_pp.pp_xid);
2544 		}
2545 
2546 		body_sz = sa_hdrp->sa_len;
2547 		body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP);
2548 
2549 		if ((n = etm_io_op(hdl, "bad io read on sa body",
2550 		    conn, body_buf, body_sz, ETM_IO_OP_RD)) < 0) {
2551 			should_reset_xport = (n == -ENOTACTIVE);
2552 			goto func_ret;
2553 		}
2554 
2555 		etm_stats.etm_rd_body_alert.fmds_value.ui64++;
2556 
2557 		/*
2558 		 * now that we've read the entire ETM msg from the conn,
2559 		 * which avoids later ETM protocol framing errors if we didn't,
2560 		 * check for dup msg/xid against last good syslog posting,
2561 		 * if a dup then resend response but skip repost to syslog
2562 		 */
2563 
2564 		if (sa_hdrp->sa_pp.pp_xid == etm_xid_posted_sa) {
2565 			enq_rv = etm_maybe_enq_response(hdl, conn,
2566 			    sa_hdrp, hdr_sz, 0);
2567 			fmd_hdl_debug(hdl, "info: skipping dup ALERT post "
2568 			    "xid 0x%x\n", etm_xid_posted_sa);
2569 			etm_stats.etm_rd_dup_alert.fmds_value.ui64++;
2570 			goto func_ret;
2571 		}
2572 
2573 		resp_code = etm_post_to_syslog(hdl, sa_hdrp->sa_priority,
2574 		    body_sz, body_buf);
2575 		if (resp_code >= 0) {
2576 			etm_xid_posted_sa = sa_hdrp->sa_pp.pp_xid;
2577 		}
2578 		enq_rv = etm_maybe_enq_response(hdl, conn,
2579 		    sa_hdrp, hdr_sz, resp_code);
2580 	} /* whether we have a FMA_EVENT, CONTROL, RESPONSE or ALERT msg */
2581 
2582 func_ret:
2583 
2584 	if (etm_debug_lvl >= 2) {
2585 		etm_show_time(hdl, "post conn handle");
2586 	}
2587 
2588 	/*
2589 	 * if no responder ele was enqueued, close the conn now
2590 	 * and free the ETM msg hdr; the ETM msg body is not needed
2591 	 * by the responder thread and should always be freed here
2592 	 */
2593 
2594 	if (enq_rv <= 0) {
2595 		(void) etm_conn_close(hdl, "bad conn close after msg recv",
2596 		    conn);
2597 		if (ev_hdrp != NULL) {
2598 			fmd_hdl_free(hdl, ev_hdrp, hdr_sz);
2599 		}
2600 	}
2601 	if (body_buf != NULL) {
2602 		fmd_hdl_free(hdl, body_buf, body_sz);
2603 	}
2604 	if (should_reset_xport) {
2605 		etm_reset_xport(hdl);
2606 	}
2607 } /* etm_handle_new_conn() */
2608 
2609 /*
2610  * etm_handle_bad_accept - recover from a failed connection acceptance
2611  */
2612 
2613 static void
2614 etm_handle_bad_accept(fmd_hdl_t *hdl, int nev)
2615 {
2616 	int	should_reset_xport; /* bool to reset xport */
2617 
2618 	should_reset_xport = (nev == -ENOTACTIVE);
2619 	fmd_hdl_debug(hdl, "error: bad conn accept errno %d\n", (-nev));
2620 	etm_stats.etm_xport_accept_fail.fmds_value.ui64++;
2621 	(void) etm_sleep(etm_bad_acc_to_sec); /* avoid spinning CPU */
2622 	if (should_reset_xport) {
2623 		etm_reset_xport(hdl);
2624 	}
2625 } /* etm_handle_bad_accept() */
2626 
2627 /*
2628  * etm_server - loop forever accepting new connections
2629  *		using the given FMD handle,
2630  *		handling any ETM msgs sent from the other side
2631  *		via each such connection
2632  */
2633 
2634 static void
2635 etm_server(void *arg)
2636 {
2637 	etm_xport_conn_t	conn;		/* connection handle */
2638 	int			nev;		/* -errno val */
2639 	fmd_hdl_t		*hdl;		/* FMD handle */
2640 
2641 	hdl = arg;
2642 
2643 	fmd_hdl_debug(hdl, "info: connection server starting\n");
2644 
2645 	/*
2646 	 * Restore the checkpointed events and dispatch them before starting to
2647 	 * receive more events from the sp.
2648 	 */
2649 	etm_ckpt_recover(hdl);
2650 
2651 	while (!etm_is_dying) {
2652 
2653 		if ((conn = etm_xport_accept(hdl, NULL)) == NULL) {
2654 			/* errno assumed set by above call */
2655 			nev = (-errno);
2656 			if (etm_is_dying) {
2657 				break;
2658 			}
2659 			etm_handle_bad_accept(hdl, nev);
2660 			continue;
2661 		}
2662 
2663 		/* handle the new message/connection, closing it when done */
2664 
2665 		etm_handle_new_conn(hdl, conn);
2666 
2667 	} /* while accepting new connections until ETM dies */
2668 
2669 	/* ETM is dying (probably due to "fmadm unload etm") */
2670 
2671 	fmd_hdl_debug(hdl, "info: connection server is dying\n");
2672 
2673 } /* etm_server() */
2674 
2675 /*
2676  * etm_responder - loop forever waiting for new responder queue elements
2677  *		to be enqueued, for each one constructing and sending
2678  *		an ETM response msg to the other side, and closing its
2679  *		associated connection when appropriate
2680  *
2681  *	this thread exists to ensure that the etm_server() thread
2682  *	never pends indefinitely waiting on the xport write lock, and is
2683  *	hence always available to accept new connections and handle
2684  *	incoming messages
2685  *
2686  *	this design relies on the fact that each connection accepted and
2687  *	returned by the ETM xport layer is unique, and each can be closed
2688  *	independently of the others while multiple connections are
2689  *	outstanding
2690  */
2691 
2692 static void
2693 etm_responder(void *arg)
2694 {
2695 	ssize_t			n;		/* gen use */
2696 	fmd_hdl_t		*hdl;		/* FMD handle */
2697 	etm_resp_q_ele_t	rqe;		/* responder queue ele */
2698 
2699 	hdl = arg;
2700 
2701 	fmd_hdl_debug(hdl, "info: responder server starting\n");
2702 
2703 	while (!etm_is_dying) {
2704 
2705 		(void) pthread_mutex_lock(&etm_resp_q_lock);
2706 
2707 		while (etm_resp_q_cur_len == 0) {
2708 			(void) pthread_cond_wait(&etm_resp_q_cv,
2709 			    &etm_resp_q_lock);
2710 			if (etm_is_dying) {
2711 				(void) pthread_mutex_unlock(&etm_resp_q_lock);
2712 				goto func_ret;
2713 			}
2714 		} /* while the responder queue is empty, wait to be nudged */
2715 
2716 		/*
2717 		 * for every responder ele that has been enqueued,
2718 		 * dequeue and send it as an ETM response msg,
2719 		 * closing its associated conn and freeing its hdr
2720 		 *
2721 		 * enter the queue draining loop holding the responder
2722 		 * queue lock, but do not hold the lock indefinitely
2723 		 * (the actual send may pend us indefinitely),
2724 		 * so that other threads will never pend for long
2725 		 * trying to enqueue a new element
2726 		 */
2727 
2728 		while (etm_resp_q_cur_len > 0) {
2729 
2730 			(void) etm_resp_q_deq(hdl, &rqe);
2731 
2732 			if ((etm_resp_q_cur_len + 1) == etm_resp_q_max_len)
2733 				(void) pthread_cond_signal(&etm_resp_q_cv);
2734 
2735 			(void) pthread_mutex_unlock(&etm_resp_q_lock);
2736 
2737 			if ((n = etm_send_response(hdl, rqe.rqe_conn,
2738 			    rqe.rqe_hdrp, rqe.rqe_resp_code)) < 0) {
2739 				fmd_hdl_error(hdl, "error: bad resp send "
2740 				    "errno %d\n", (-n));
2741 			}
2742 
2743 			(void) etm_conn_close(hdl, "bad conn close after resp",
2744 			    rqe.rqe_conn);
2745 			fmd_hdl_free(hdl, rqe.rqe_hdrp, rqe.rqe_hdr_sz);
2746 
2747 			if (etm_is_dying) {
2748 				goto func_ret;
2749 			}
2750 			(void) pthread_mutex_lock(&etm_resp_q_lock);
2751 
2752 		} /* while draining the responder queue */
2753 
2754 		(void) pthread_mutex_unlock(&etm_resp_q_lock);
2755 
2756 	} /* while awaiting and sending resp msgs until ETM dies */
2757 
2758 func_ret:
2759 
2760 	/* ETM is dying (probably due to "fmadm unload etm") */
2761 
2762 	fmd_hdl_debug(hdl, "info: responder server is dying\n");
2763 
2764 	(void) pthread_mutex_lock(&etm_resp_q_lock);
2765 	if (etm_resp_q_cur_len > 0) {
2766 		fmd_hdl_error(hdl, "warning: %d response msgs dropped\n",
2767 		    (int)etm_resp_q_cur_len);
2768 		while (etm_resp_q_cur_len > 0) {
2769 			(void) etm_resp_q_deq(hdl, &rqe);
2770 			(void) etm_conn_close(hdl, "bad conn close after deq",
2771 			    rqe.rqe_conn);
2772 			fmd_hdl_free(hdl, rqe.rqe_hdrp, rqe.rqe_hdr_sz);
2773 		}
2774 	}
2775 	(void) pthread_mutex_unlock(&etm_resp_q_lock);
2776 
2777 } /* etm_responder() */
2778 
2779 static void *
2780 etm_init_alloc(size_t size)
2781 {
2782 	return (fmd_hdl_alloc(init_hdl, size, FMD_SLEEP));
2783 }
2784 
2785 static void
2786 etm_init_free(void *addr, size_t size)
2787 {
2788 	fmd_hdl_free(init_hdl, addr, size);
2789 }
2790 
2791 /*
2792  * ---------------------root ldom support functions -----------------------
2793  */
2794 
2795 /*
2796  * use a static array async_event_q instead of dynamicaly allocated mem  queue
2797  * for etm_async_q_enq and etm_async_q_deq.
2798  * This is not running in an fmd aux thread, can't use the fmd_hdl_* funcs.
2799  * caller needs to grab the mutex lock before calling this func.
2800  * return >0 for success, or -errno value
2801  */
2802 static int
2803 etm_async_q_enq(etm_async_event_ele_t *async_e)
2804 {
2805 
2806 	if (etm_async_q_cur_len >= etm_async_q_max_len) {
2807 		/* etm_stats.etm_enq_drop_async_q.fmds_value.ui64++; */
2808 		return (-E2BIG);
2809 	}
2810 
2811 	(void) memcpy(&async_event_q[etm_async_q_tail], async_e,
2812 	    sizeof (*async_e));
2813 
2814 	etm_async_q_tail++;
2815 	if (etm_async_q_tail == etm_async_q_max_len) {
2816 		etm_async_q_tail = 0;
2817 	}
2818 	etm_async_q_cur_len++;
2819 
2820 /* etm_stats.etm_async_q_cur_len.fmds_value.ui64 = etm_async_q_cur_len; */
2821 
2822 	return (1);
2823 
2824 } /* etm_async_q_enq() */
2825 
2826 
2827 static int
2828 etm_async_q_deq(etm_async_event_ele_t *async_e)
2829 {
2830 
2831 	if (etm_async_q_cur_len == 0) {
2832 		/* etm_stats.etm_deq_drop_async_q.fmds_value.ui64++; */
2833 		return (-ENOENT);
2834 	}
2835 
2836 	(void) memcpy(async_e, &async_event_q[etm_async_q_head],
2837 	    sizeof (*async_e));
2838 
2839 	etm_async_q_head++;
2840 	if (etm_async_q_head == etm_async_q_max_len) {
2841 		etm_async_q_head = 0;
2842 	}
2843 	etm_async_q_cur_len--;
2844 
2845 	return (1);
2846 } /* etm_async_q_deq */
2847 
2848 
2849 /*
2850  * setting up the fields in iosvc at DS_REG_CB time
2851  */
2852 void
2853 etm_iosvc_setup(fmd_hdl_t *fmd_hdl, etm_iosvc_t *iosvc,
2854 	etm_async_event_ele_t *async_e)
2855 {
2856 	iosvc->ds_hdl = async_e->ds_hdl;
2857 	iosvc->cur_send_xid = 0;
2858 	iosvc->xid_posted_ev = 0;
2859 	iosvc->start_sending_Q = 0;
2860 
2861 	/*
2862 	 * open the fmd xprt if it
2863 	 * hasn't been previously opened
2864 	 */
2865 	fmd_hdl_debug(fmd_hdl,  "info: before fmd_xprt_open ldom_name is %s\n",
2866 	    async_e->ldom_name);
2867 
2868 	if (iosvc->fmd_xprt == NULL) {
2869 		iosvc->fmd_xprt = fmd_xprt_open(fmd_hdl, flags, NULL, iosvc);
2870 	}
2871 
2872 	iosvc->thr_is_dying = 0;
2873 	if (iosvc->recv_tid == NULL) {
2874 		iosvc->recv_tid = fmd_thr_create(fmd_hdl,
2875 		    etm_recv_from_remote_root, iosvc);
2876 	}
2877 	if (iosvc->send_tid == NULL) {
2878 		iosvc->send_tid = fmd_thr_create(fmd_hdl,
2879 		    etm_send_to_remote_root, iosvc);
2880 	}
2881 } /* etm_iosvc_setup() */
2882 
2883 
2884 /*
2885  * ds userland interface ds_reg_cb  callback func
2886  */
2887 
2888 /* ARGSUSED */
2889 static void
2890 etm_iosvc_reg_handler(ds_hdl_t ds_hdl, ds_cb_arg_t arg, ds_ver_t *ver,
2891 	ds_domain_hdl_t dhdl)
2892 {
2893 	etm_async_event_ele_t	async_ele;
2894 
2895 
2896 	/*
2897 	 * do version check here.
2898 	 * checked the ver received here against etm_iosvc_vers here
2899 	 */
2900 	if (etm_iosvc_vers[0].major != ver->major ||
2901 	    etm_iosvc_vers[0].minor != ver->minor) {
2902 		/*
2903 		 * can't log an fmd debug msg,
2904 		 * not running in an fmd aux thread
2905 		 */
2906 		return;
2907 	}
2908 
2909 	/*
2910 	 * the callback should have a valid ldom_name
2911 	 * can't log fmd debugging msg here since this is not in an fmd aux
2912 	 * thread. log fmd debug msg in etm_async_event_handle()
2913 	 */
2914 	async_ele.ds_hdl = ds_hdl;
2915 	async_ele.dhdl = dhdl;
2916 	async_ele.ldom_name[0] = '\0';
2917 	async_ele.event_type = ETM_ASYNC_EVENT_DS_REG_CB;
2918 	(void) pthread_mutex_lock(&etm_async_event_q_lock);
2919 	(void) etm_async_q_enq(&async_ele);
2920 	if (etm_async_q_cur_len == 1)
2921 		(void) pthread_cond_signal(&etm_async_event_q_cv);
2922 	(void) pthread_mutex_unlock(&etm_async_event_q_lock);
2923 
2924 } /* etm_iosvc_reg_handler */
2925 
2926 
2927 /*
2928  * ds userland interface ds_unreg_cb  callback func
2929  */
2930 
2931 /*ARGSUSED*/
2932 static void
2933 etm_iosvc_unreg_handler(ds_hdl_t hdl, ds_cb_arg_t arg)
2934 {
2935 	etm_async_event_ele_t	async_ele;
2936 
2937 	/*
2938 	 * fill in async_ele and enqueue async_ele
2939 	 */
2940 	async_ele.ldom_name[0] = '\0';
2941 	async_ele.ds_hdl = hdl;
2942 	async_ele.event_type = ETM_ASYNC_EVENT_DS_UNREG_CB;
2943 	(void) pthread_mutex_lock(&etm_async_event_q_lock);
2944 	(void) etm_async_q_enq(&async_ele);
2945 	if (etm_async_q_cur_len == 1)
2946 		(void) pthread_cond_signal(&etm_async_event_q_cv);
2947 	(void) pthread_mutex_unlock(&etm_async_event_q_lock);
2948 } /* etm_iosvc_unreg_handler */
2949 
2950 /*
2951  * ldom event registration callback func
2952  */
2953 
2954 /* ARGSUSED */
2955 static void
2956 ldom_event_handler(char *ldom_name, ldom_event_t event, ldom_cb_arg_t data)
2957 {
2958 	etm_async_event_ele_t	async_ele;
2959 
2960 	/*
2961 	 * the callback will have a valid ldom_name
2962 	 */
2963 	async_ele.ldom_name[0] = '\0';
2964 	if (ldom_name)
2965 		(void) strcpy(async_ele.ldom_name, ldom_name);
2966 	async_ele.ds_hdl = DS_INVALID_HDL;
2967 
2968 	/*
2969 	 * fill in async_ele and enq async_ele
2970 	 */
2971 	switch (event) {
2972 	case LDOM_EVENT_BIND:
2973 		async_ele.event_type = ETM_ASYNC_EVENT_LDOM_BIND;
2974 		break;
2975 	case LDOM_EVENT_UNBIND:
2976 		async_ele.event_type = ETM_ASYNC_EVENT_LDOM_UNBIND;
2977 		break;
2978 	case LDOM_EVENT_ADD:
2979 		async_ele.event_type = ETM_ASYNC_EVENT_LDOM_ADD;
2980 		break;
2981 	case LDOM_EVENT_REMOVE:
2982 		async_ele.event_type = ETM_ASYNC_EVENT_LDOM_REMOVE;
2983 		break;
2984 	default:
2985 		/*
2986 		 * for all other ldom events, do nothing
2987 		 */
2988 		return;
2989 	} /* switch (event) */
2990 
2991 	(void) pthread_mutex_lock(&etm_async_event_q_lock);
2992 	(void) etm_async_q_enq(&async_ele);
2993 	if (etm_async_q_cur_len == 1)
2994 		(void) pthread_cond_signal(&etm_async_event_q_cv);
2995 	(void) pthread_mutex_unlock(&etm_async_event_q_lock);
2996 
2997 } /* ldom_event_handler */
2998 
2999 
3000 /*
3001  * This is running as an fmd aux thread.
3002  * This is the func that actually handle the events, which include:
3003  * 1. ldom events. ldom events are  on Control Domain only
3004  * 2. any DS userland callback funcs
3005  * these events are already Q-ed in the async_event_ele_q
3006  * deQ and process the events accordingly
3007  */
3008 static void
3009 etm_async_event_handler(void *arg)
3010 {
3011 
3012 	fmd_hdl_t		*fmd_hdl = (fmd_hdl_t *)arg;
3013 	etm_iosvc_t		*iosvc;		/* ptr 2 iosvc struct */
3014 	etm_async_event_ele_t	async_e;
3015 
3016 	fmd_hdl_debug(fmd_hdl, "info: etm_async_event_handler starting\n");
3017 	/*
3018 	 *  handle etm is not dying and Q len > 0
3019 	 */
3020 	while (!etm_is_dying) {
3021 		/*
3022 		 * grab the lock to check the Q len
3023 		 */
3024 		(void) pthread_mutex_lock(&etm_async_event_q_lock);
3025 		fmd_hdl_debug(fmd_hdl, "info: etm_async_q_cur_len %d\n",
3026 		    etm_async_q_cur_len);
3027 
3028 		while (etm_async_q_cur_len > 0) {
3029 			(void) etm_async_q_deq(&async_e);
3030 			(void) pthread_mutex_unlock(&etm_async_event_q_lock);
3031 			fmd_hdl_debug(fmd_hdl,
3032 			    "info: processing an async event type %d ds_hdl"
3033 			    " %d\n", async_e.event_type, async_e.ds_hdl);
3034 			if (async_e.ldom_name[0] != '\0') {
3035 				fmd_hdl_debug(fmd_hdl,
3036 				    "info: procssing async evt ldom_name %s\n",
3037 				    async_e.ldom_name);
3038 			}
3039 
3040 			/*
3041 			 * at this point, if async_e.ldom_name is not NULL,
3042 			 * we have a valid iosvc strcut ptr.
3043 			 * the only time async_e.ldom_name is NULL is  at
3044 			 * ds_unreg_cb()
3045 			 */
3046 			switch (async_e.event_type)  {
3047 			case ETM_ASYNC_EVENT_LDOM_UNBIND:
3048 			case ETM_ASYNC_EVENT_LDOM_REMOVE:
3049 				/*
3050 				 * we have a valid ldom_name,
3051 				 * etm_lookup_struct(ldom_name)
3052 				 * do nothing if can't find an iosvc
3053 				 * no iosvc clean up to do
3054 				 */
3055 				(void) pthread_mutex_lock(
3056 				    &iosvc_list_lock);
3057 				iosvc = etm_iosvc_lookup(fmd_hdl,
3058 				    async_e.ldom_name,
3059 				    async_e.ds_hdl, B_FALSE);
3060 				if (iosvc == NULL) {
3061 					fmd_hdl_debug(fmd_hdl,
3062 					    "error: can't find iosvc for ldom "
3063 					    "name %s\n",
3064 					    async_e.ldom_name);
3065 					(void) pthread_mutex_unlock(
3066 					    &iosvc_list_lock);
3067 					break;
3068 				}
3069 				/*
3070 				 * Clean up the queue, delete all messages and
3071 				 * do not persist checkpointed fma events.
3072 				 */
3073 				etm_iosvc_cleanup(fmd_hdl, iosvc, B_TRUE,
3074 				    B_TRUE);
3075 				(void) pthread_mutex_unlock(
3076 				    &iosvc_list_lock);
3077 				break;
3078 
3079 			case ETM_ASYNC_EVENT_LDOM_BIND:
3080 
3081 				/*
3082 				 * create iosvc if it has not been
3083 				 * created
3084 				 * async_e.ds_hdl is invalid
3085 				 * async_e.ldom_name is valid ldom_name
3086 				 */
3087 				(void) pthread_mutex_lock(
3088 				    &iosvc_list_lock);
3089 				iosvc = etm_iosvc_lookup(fmd_hdl,
3090 				    async_e.ldom_name,
3091 				    async_e.ds_hdl, B_TRUE);
3092 				if (iosvc == NULL) {
3093 					fmd_hdl_debug(fmd_hdl,
3094 					    "error: can't create iosvc for "
3095 					    "async evnt %d\n",
3096 					    async_e.event_type);
3097 					(void) pthread_mutex_unlock(
3098 					    &iosvc_list_lock);
3099 					break;
3100 				}
3101 				(void) strcpy(iosvc->ldom_name,
3102 				    async_e.ldom_name);
3103 				iosvc->ds_hdl = async_e.ds_hdl;
3104 				(void) pthread_mutex_unlock(
3105 				    &iosvc_list_lock);
3106 				break;
3107 
3108 			case ETM_ASYNC_EVENT_DS_REG_CB:
3109 				if (etm_ldom_type == LDOM_TYPE_CONTROL) {
3110 					/*
3111 					 * find the root ldom name from
3112 					 * ldom domain hdl/id
3113 					 */
3114 					if (etm_filter_find_ldom_name(
3115 					    fmd_hdl, async_e.dhdl,
3116 					    async_e.ldom_name,
3117 					    MAX_LDOM_NAME) != 0) {
3118 						fmd_hdl_debug(fmd_hdl,
3119 						    "error: can't find root "
3120 						    "domain name from did %d\n",
3121 						    async_e.dhdl);
3122 						break;
3123 					} else {
3124 						fmd_hdl_debug(fmd_hdl,
3125 						    "info: etm_filter_find_"
3126 						    "ldom_name returned %s\n",
3127 						    async_e.ldom_name);
3128 					}
3129 					/*
3130 					 * now we should have a valid
3131 					 * root domain name.
3132 					 * lookup the iosvc struct
3133 					 * associated with the ldom_name
3134 					 * and init the iosvc struct
3135 					 */
3136 					(void) pthread_mutex_lock(
3137 					    &iosvc_list_lock);
3138 					iosvc = etm_iosvc_lookup(
3139 					    fmd_hdl, async_e.ldom_name,
3140 					    async_e.ds_hdl, B_TRUE);
3141 					if (iosvc == NULL) {
3142 						fmd_hdl_debug(fmd_hdl,
3143 						    "error: can't create iosvc "
3144 						    "for async evnt %d\n",
3145 						    async_e.event_type);
3146 						(void) pthread_mutex_unlock(
3147 						    &iosvc_list_lock);
3148 						break;
3149 					}
3150 
3151 					etm_iosvc_setup(fmd_hdl, iosvc,
3152 					    &async_e);
3153 					(void) pthread_mutex_unlock(
3154 					    &iosvc_list_lock);
3155 				} else {
3156 					iosvc = &io_svc;
3157 					(void) strcpy(iosvc->ldom_name,
3158 					    async_e.ldom_name);
3159 
3160 					etm_iosvc_setup(fmd_hdl, iosvc,
3161 					    &async_e);
3162 				}
3163 				break;
3164 
3165 			case ETM_ASYNC_EVENT_DS_UNREG_CB:
3166 				/*
3167 				 * decide which iosvc struct to perform
3168 				 * this UNREG callback on.
3169 				 */
3170 				if (etm_ldom_type == LDOM_TYPE_CONTROL) {
3171 					(void) pthread_mutex_lock(
3172 					    &iosvc_list_lock);
3173 					/*
3174 					 * lookup the iosvc struct w/
3175 					 * ds_hdl
3176 					 */
3177 					iosvc = etm_iosvc_lookup(
3178 					    fmd_hdl, async_e.ldom_name,
3179 					    async_e.ds_hdl, B_FALSE);
3180 					if (iosvc == NULL) {
3181 						fmd_hdl_debug(fmd_hdl,
3182 						    "error: can't find iosvc "
3183 						    "for async evnt %d\n",
3184 						    async_e.event_type);
3185 					(void) pthread_mutex_unlock(
3186 					    &iosvc_list_lock);
3187 						break;
3188 					}
3189 
3190 					/*
3191 					 * ds_hdl and fmd_xprt_open
3192 					 * go hand to hand together
3193 					 * after unreg_cb,
3194 					 * ds_hdl is INVALID and
3195 					 * fmd_xprt is closed.
3196 					 * the ldom name and the msg Q
3197 					 * remains in iosvc_list
3198 					 */
3199 					if (iosvc->ldom_name != '\0')
3200 						fmd_hdl_debug(fmd_hdl,
3201 						    "info: iosvc  w/ ldom_name "
3202 						    "%s \n", iosvc->ldom_name);
3203 
3204 					/*
3205 					 * destroy send/recv threads and
3206 					 * other clean up on Control side.
3207 					 */
3208 					etm_iosvc_cleanup(fmd_hdl, iosvc,
3209 					    B_FALSE, B_FALSE);
3210 					(void) pthread_mutex_unlock(
3211 					    &iosvc_list_lock);
3212 				} else {
3213 					iosvc = &io_svc;
3214 					/*
3215 					 * destroy send/recv threads and
3216 					 * then clean up on Root side.
3217 					 */
3218 					etm_iosvc_cleanup(fmd_hdl, iosvc,
3219 					    B_FALSE, B_FALSE);
3220 				}
3221 				break;
3222 
3223 			default:
3224 				/*
3225 				 * for all other events, etm doesn't care.
3226 				 * already logged an fmd info msg w/
3227 				 * the event type. Do nothing here.
3228 				 */
3229 				break;
3230 			} /* switch (async_e.event_type) */
3231 
3232 			if (etm_ldom_type == LDOM_TYPE_CONTROL) {
3233 				etm_filter_handle_ldom_event(fmd_hdl,
3234 				    async_e.event_type, async_e.ldom_name);
3235 			}
3236 
3237 			/*
3238 			 * grab the lock to check the q length again
3239 			 */
3240 			(void) pthread_mutex_lock(&etm_async_event_q_lock);
3241 
3242 			if (etm_is_dying) {
3243 				break;
3244 			}
3245 		}	/* etm_async_q_cur_len */
3246 
3247 		/*
3248 		 * we have the mutex lock at this point, whether
3249 		 * . etm_is_dying  and/or
3250 		 * . q_len == 0
3251 		 */
3252 		if (!etm_is_dying && etm_async_q_cur_len == 0) {
3253 			fmd_hdl_debug(fmd_hdl,
3254 			    "info: cond wait on async_event_q_cv\n");
3255 			(void) pthread_cond_wait(&etm_async_event_q_cv,
3256 			    &etm_async_event_q_lock);
3257 			fmd_hdl_debug(fmd_hdl,
3258 			    "info: cond wait on async_event_q_cv rtns\n");
3259 		}
3260 		(void) pthread_mutex_unlock(&etm_async_event_q_lock);
3261 	} /* etm_is_dying */
3262 
3263 	fmd_hdl_debug(fmd_hdl,
3264 	    "info: etm async event handler thread exiting\n");
3265 
3266 } /* etm_async_event_handler */
3267 
3268 /*
3269  * deQ what's in iosvc msg Q
3270  * send iosvc_msgp to the remote io svc ldom by calling ds_send_msg()
3271  * the iosvc_msgp already has the packed msg, which is hdr + 1 fma event
3272  */
3273 static void
3274 etm_send_to_remote_root(void *arg)
3275 {
3276 
3277 	etm_iosvc_t		*iosvc = (etm_iosvc_t *)arg;	/* iosvc ptr */
3278 	etm_iosvc_q_ele_t	msg_ele;	/* iosvc msg ele */
3279 	etm_proto_v1_ev_hdr_t	*ev_hdrp;	/* hdr for FMA_EVENT */
3280 	fmd_hdl_t		*fmd_hdl = init_hdl;	/* fmd handle */
3281 
3282 
3283 	fmd_hdl_debug(fmd_hdl,
3284 	    "info: send to remote iosvc starting w/ ldom_name %s\n",
3285 	    iosvc->ldom_name);
3286 
3287 	/*
3288 	 *  loop forever until etm_is_dying or thr_is_dying
3289 	 */
3290 	while (!etm_is_dying && !iosvc->thr_is_dying) {
3291 		if (iosvc->ds_hdl != DS_INVALID_HDL &&
3292 		    iosvc->start_sending_Q > 0) {
3293 			(void) pthread_mutex_lock(&iosvc->msg_q_lock);
3294 			while (iosvc->msg_q_cur_len > 0 &&
3295 			    iosvc->ds_hdl != DS_INVALID_HDL)  {
3296 				(void) etm_iosvc_msg_deq(fmd_hdl, iosvc,
3297 				    &msg_ele);
3298 				if (etm_debug_lvl >= 3) {
3299 					fmd_hdl_debug(fmd_hdl, "info: valid "
3300 					    "ds_hdl before ds_send_msg \n");
3301 				}
3302 				(void) pthread_mutex_unlock(&iosvc->msg_q_lock);
3303 
3304 				iosvc->ack_ok = 0;
3305 				ev_hdrp = (etm_proto_v1_ev_hdr_t *)
3306 				    ((ptrdiff_t)msg_ele.msg);
3307 				ev_hdrp->ev_pp.pp_xid = iosvc->cur_send_xid + 1;
3308 				while (!iosvc->ack_ok &&
3309 				    iosvc->ds_hdl != DS_INVALID_HDL &&
3310 				    !etm_is_dying) {
3311 					/*
3312 					 * call ds_send_msg() to send the msg,
3313 					 * wait for the recv end to send the
3314 					 * resp msg back.
3315 					 * If resp msg is recv-ed, ack_ok
3316 					 * will be set to 1.
3317 					 * otherwise, retry.
3318 					 */
3319 					if (etm_send_ds_msg(fmd_hdl, B_TRUE,
3320 					    iosvc, &msg_ele, ev_hdrp) < 0) {
3321 						continue;
3322 					}
3323 
3324 					if (etm_is_dying || iosvc->thr_is_dying)
3325 						break;
3326 				}
3327 
3328 				/*
3329 				 * if out of the while loop but !ack_ok, ie,
3330 				 * ds_hdl becomes invalid at some point
3331 				 * while waiting the resp msg, we need to put
3332 				 * the msg back to the head of the Q.
3333 				 */
3334 				if (!iosvc->ack_ok) {
3335 					(void) pthread_mutex_lock(
3336 					    &iosvc->msg_q_lock);
3337 					/*
3338 					 * put the msg back to the head of Q.
3339 					 * If the Q is full at this point,
3340 					 * drop the msg at the tail, enq this
3341 					 * msg to the head.
3342 					 */
3343 					etm_msg_enq_head(fmd_hdl, iosvc,
3344 					    &msg_ele);
3345 					(void) pthread_mutex_unlock(
3346 					    &iosvc->msg_q_lock);
3347 				}
3348 
3349 				/*
3350 				 *
3351 				 * grab the lock to check the Q len again
3352 				 */
3353 				(void) pthread_mutex_lock(&iosvc->msg_q_lock);
3354 				if (etm_is_dying || iosvc->thr_is_dying) {
3355 					break;
3356 				}
3357 			} /* while dequeing iosvc msgs to send */
3358 
3359 			/*
3360 			 * we have the mutex lock for msg_q_lock at this point
3361 			 * we are here because
3362 			 * 1) q_len == 0: then wait on the cv for Q to be filled
3363 			 * 2) etm_is_dying
3364 			 */
3365 			if (!etm_is_dying && !iosvc->thr_is_dying &&
3366 			    iosvc->msg_q_cur_len == 0) {
3367 				fmd_hdl_debug(fmd_hdl,
3368 				    "info: waiting on msg_q_cv\n");
3369 				(void) pthread_cond_wait(&iosvc->msg_q_cv,
3370 				    &iosvc->msg_q_lock);
3371 			}
3372 			(void) pthread_mutex_unlock(&iosvc->msg_q_lock);
3373 			if (etm_is_dying || iosvc->thr_is_dying)  {
3374 				break;
3375 			}
3376 		} else {
3377 			(void) etm_sleep(1);
3378 		} /* wait for the start_sendingQ > 0 */
3379 	} /* etm_is_dying or thr_is_dying */
3380 	fmd_hdl_debug(fmd_hdl, "info; etm send thread exiting \n");
3381 } /* etm_send_to_remote_root */
3382 
3383 
3384 /*
3385  * receive etm msgs from the remote root ldom by calling ds_recv_msg()
3386  * if FMA events/ereports, call fmd_xprt_post() to post to fmd
3387  * send ACK back by calling ds_send_msg()
3388  */
3389 static void
3390 etm_recv_from_remote_root(void *arg)
3391 {
3392 	etm_iosvc_t		*iosvc = (etm_iosvc_t *)arg;	/* iosvc ptr */
3393 	etm_proto_v1_pp_t	*pp;		/* protocol preamble */
3394 	etm_proto_v1_ev_hdr_t	*ev_hdrp;	/* for FMA_EVENT msg */
3395 	etm_proto_v1_resp_hdr_t	*resp_hdrp;	/* for RESPONSE msg */
3396 	int32_t			resp_code = 0;	/* default is success */
3397 	int32_t			rc;		/* return value */
3398 	size_t			maxlen = MAXLEN;
3399 						/* max msg len */
3400 	char 			msgbuf[MAXLEN];	/* recv msg buf */
3401 	size_t			msg_size;	/* recv msg size */
3402 	size_t			hdr_sz;		/* sizeof *hdrp */
3403 	size_t			evsz;		/* sizeof *evp */
3404 	size_t			fma_event_size;	/* sizeof FMA event  */
3405 	nvlist_t 		*evp;		/* ptr to the nvlist */
3406 	char			*buf;		/* ptr to the nvlist */
3407 	static uint32_t		mem_alloc = 0;	/* indicate if alloc mem */
3408 	char 			*msg;		/* ptr to alloc mem */
3409 	fmd_hdl_t		*fmd_hdl = init_hdl;
3410 
3411 
3412 
3413 	fmd_hdl_debug(fmd_hdl,
3414 	    "info: recv from remote iosvc starting with ldom name %s \n",
3415 	    iosvc->ldom_name);
3416 
3417 	/*
3418 	 * loop forever until etm_is_dying or the thread is dying
3419 	 */
3420 
3421 	msg = msgbuf;
3422 	while (!etm_is_dying && !iosvc->thr_is_dying) {
3423 		if (iosvc->ds_hdl == DS_INVALID_HDL) {
3424 			fmd_hdl_debug(fmd_hdl,
3425 			    "info: ds_hdl is invalid in recv thr\n");
3426 			(void) etm_sleep(1);
3427 			continue;
3428 		}
3429 
3430 		/*
3431 		 * for now, there are FMA_EVENT and ACK msg type.
3432 		 * use FMA_EVENT buf as the maxlen, hdr+1 fma event.
3433 		 * FMA_EVENT is big enough to hold an ACK msg.
3434 		 * the actual msg size received is in msg_size.
3435 		 */
3436 		rc = (*etm_ds_recv_msg)(iosvc->ds_hdl, msg, maxlen, &msg_size);
3437 		if (rc == EFBIG) {
3438 			fmd_hdl_debug(fmd_hdl,
3439 			    "info: ds_recv_msg needs mem the size of %d\n",
3440 			    msg_size);
3441 			msg = fmd_hdl_zalloc(fmd_hdl, msg_size, FMD_SLEEP);
3442 			mem_alloc = 1;
3443 		} else if (rc == 0) {
3444 			fmd_hdl_debug(fmd_hdl,
3445 			    "info: ds_recv_msg received a msg ok\n");
3446 			/*
3447 			 * check the magic # in  msg.hdr
3448 			 */
3449 			pp = (etm_proto_v1_pp_t *)((ptrdiff_t)msg);
3450 			if (pp->pp_magic_num != ETM_PROTO_MAGIC_NUM) {
3451 				fmd_hdl_debug(fmd_hdl,
3452 				    "info: bad ds recv on magic\n");
3453 				continue;
3454 			}
3455 
3456 			/*
3457 			 * check the msg type against msg_size to be sure
3458 			 * that received msg is not a truncated msg
3459 			 */
3460 			if (pp->pp_msg_type == ETM_MSG_TYPE_FMA_EVENT) {
3461 
3462 				ev_hdrp = (etm_proto_v1_ev_hdr_t *)
3463 				    ((ptrdiff_t)msg);
3464 				fmd_hdl_debug(fmd_hdl, "info: ds received "
3465 				    "FMA EVENT xid=%d msg_size=%d\n",
3466 				    ev_hdrp->ev_pp.pp_xid, msg_size);
3467 				hdr_sz = sizeof (*ev_hdrp) +
3468 				    1*(sizeof (ev_hdrp->ev_lens[0]));
3469 				fma_event_size = hdr_sz + ev_hdrp->ev_lens[0];
3470 				if (fma_event_size != msg_size) {
3471 					fmd_hdl_debug(fmd_hdl, "info: wrong "
3472 					    "ev msg size received\n");
3473 					continue;
3474 					/*
3475 					 * Simply  do nothing. The send side
3476 					 * will timedcond_wait waiting on the
3477 					 * resp msg will timeout and
3478 					 * re-send the same msg.
3479 					 */
3480 				}
3481 				if (etm_debug_lvl >= 3) {
3482 					fmd_hdl_debug(fmd_hdl,  "info: recv msg"
3483 					    " size %d hdrsz %d evp size %d\n",
3484 					    msg_size, hdr_sz,
3485 					    ev_hdrp->ev_lens[0]);
3486 				}
3487 
3488 				if (ev_hdrp->ev_pp.pp_xid !=
3489 				    iosvc->xid_posted_ev) {
3490 					/*
3491 					 * different from last xid posted to
3492 					 * fmd, post to fmd now.
3493 					 */
3494 					buf = msg + hdr_sz;
3495 					rc = nvlist_unpack(buf,
3496 					    ev_hdrp->ev_lens[0], &evp, 0);
3497 					rc = nvlist_size(evp, &evsz,
3498 					    NV_ENCODE_XDR);
3499 					fmd_hdl_debug(fmd_hdl,
3500 					    "info: evp size %d before fmd"
3501 					    "post\n", evsz);
3502 
3503 					if ((rc = etm_post_to_fmd(fmd_hdl,
3504 					    iosvc->fmd_xprt, evp)) >= 0) {
3505 						fmd_hdl_debug(fmd_hdl,
3506 						    "info: xid posted to fmd %d"
3507 						    "\n",
3508 						    ev_hdrp->ev_pp.pp_xid);
3509 						iosvc->xid_posted_ev =
3510 						    ev_hdrp->ev_pp.pp_xid;
3511 					}
3512 				}
3513 
3514 				/*
3515 				 * ready to  send the RESPONSE msg back
3516 				 * reuse the msg buffer as the response buffer
3517 				 */
3518 				resp_hdrp = (etm_proto_v1_resp_hdr_t *)
3519 				    ((ptrdiff_t)msg);
3520 				resp_hdrp->resp_pp.pp_msg_type =
3521 				    ETM_MSG_TYPE_RESPONSE;
3522 
3523 				resp_hdrp->resp_code = resp_code;
3524 				resp_hdrp->resp_len = sizeof (*resp_hdrp);
3525 
3526 				/*
3527 				 * send the whole response msg in one send
3528 				 */
3529 				if ((*etm_ds_send_msg)(iosvc->ds_hdl, msg,
3530 				    sizeof (*resp_hdrp)) != 0) {
3531 					fmd_hdl_debug(fmd_hdl,
3532 					    "info: send response msg failed\n");
3533 				} else {
3534 					fmd_hdl_debug(fmd_hdl,
3535 					    "info: ds send resp msg ok"
3536 					    "size %d\n", sizeof (*resp_hdrp));
3537 				}
3538 			} else if (pp->pp_msg_type == ETM_MSG_TYPE_RESPONSE) {
3539 				fmd_hdl_debug(fmd_hdl,
3540 				    "info: ds received respond msg xid=%d"
3541 				    "msg_size=%d for ldom %s\n", pp->pp_xid,
3542 				    msg_size, iosvc->ldom_name);
3543 				if (sizeof (*resp_hdrp) != msg_size) {
3544 					fmd_hdl_debug(fmd_hdl,
3545 					    "info: wrong resp msg size"
3546 					    "received\n");
3547 					fmd_hdl_debug(fmd_hdl,
3548 					    "info: resp msg size %d recv resp"
3549 					    "msg size %d\n",
3550 					    sizeof (*resp_hdrp), msg_size);
3551 					continue;
3552 				}
3553 				/*
3554 				 * is the pp.pp_xid == iosvc->cur_send_xid+1,
3555 				 * if so, nudge the send routine to send next
3556 				 */
3557 				if (pp->pp_xid != iosvc->cur_send_xid+1) {
3558 					fmd_hdl_debug(fmd_hdl,
3559 					    "info: ds received resp msg xid=%d "
3560 					    "doesn't match cur_send_id=%d\n",
3561 					    pp->pp_xid, iosvc->cur_send_xid+1);
3562 					continue;
3563 				}
3564 				(void) pthread_mutex_lock(&iosvc->msg_ack_lock);
3565 				iosvc->ack_ok = 1;
3566 				(void) pthread_cond_signal(&iosvc->msg_ack_cv);
3567 				(void) pthread_mutex_unlock(
3568 				    &iosvc->msg_ack_lock);
3569 				fmd_hdl_debug(fmd_hdl,
3570 				    "info: signaling msg_ack_cv\n");
3571 			} else {
3572 				/*
3573 				 * place holder for future msg types
3574 				 */
3575 				fmd_hdl_debug(fmd_hdl,
3576 				    "info: ds received unrecognized msg\n");
3577 			}
3578 			if (mem_alloc) {
3579 				fmd_hdl_free(fmd_hdl, msg, msg_size);
3580 				mem_alloc = 0;
3581 				msg = msgbuf;
3582 			}
3583 		} else {
3584 			if (etm_debug_lvl >= 3) {
3585 				fmd_hdl_debug(fmd_hdl,
3586 				    "info: ds_recv_msg() failed\n");
3587 			}
3588 		} /* ds_recv_msg() returns */
3589 	} /* etm_is_dying */
3590 
3591 	/*
3592 	 * need to free the mem allocated in msg upon exiting the thread
3593 	 */
3594 	if (mem_alloc) {
3595 		fmd_hdl_free(fmd_hdl, msg, msg_size);
3596 		mem_alloc = 0;
3597 		msg = msgbuf;
3598 	}
3599 	fmd_hdl_debug(fmd_hdl, "info; etm recv thread exiting \n");
3600 } /* etm_recv_from_remote_root */
3601 
3602 
3603 
3604 /*
3605  * etm_ds_init
3606  *		initialize DS services function pointers by calling
3607  *		dlopen() followed by  dlsym() for each ds func.
3608  *		if any dlopen() or dlsym() call fails, return -ENOENT
3609  *		return >0 for successs, -ENOENT for failure
3610  */
3611 static int
3612 etm_ds_init(fmd_hdl_t *hdl)
3613 {
3614 	int rc = 0;
3615 
3616 	if ((etm_dl_hdl = dlopen(etm_dl_path, etm_dl_mode)) == NULL) {
3617 		fmd_hdl_debug(hdl, "error: failed to dlopen %s\n", etm_dl_path);
3618 		return (-ENOENT);
3619 	}
3620 
3621 	etm_ds_svc_reg = (int (*)(ds_capability_t *cap, ds_ops_t *ops))
3622 	    dlsym(etm_dl_hdl, "ds_svc_reg");
3623 	if (etm_ds_svc_reg == NULL) {
3624 		fmd_hdl_debug(hdl,
3625 		    "error: failed to dlsym ds_svc_reg() w/ error %s\n",
3626 		    dlerror());
3627 		rc = -ENOENT;
3628 	}
3629 
3630 
3631 	etm_ds_clnt_reg = (int (*)(ds_capability_t *cap, ds_ops_t *ops))
3632 	    dlsym(etm_dl_hdl, "ds_clnt_reg");
3633 	if (etm_ds_clnt_reg == NULL) {
3634 		fmd_hdl_debug(hdl,
3635 		    "error: dlsym(ds_clnt_reg) failed w/ errno %d\n", errno);
3636 		rc = -ENOENT;
3637 	}
3638 
3639 	etm_ds_send_msg = (int (*)(ds_hdl_t hdl, void *buf, size_t buflen))
3640 	    dlsym(etm_dl_hdl, "ds_send_msg");
3641 	if (etm_ds_send_msg == NULL) {
3642 		fmd_hdl_debug(hdl, "error: dlsym(ds_send_msg) failed\n");
3643 		rc = -ENOENT;
3644 	}
3645 
3646 	etm_ds_recv_msg = (int (*)(ds_hdl_t hdl, void *buf, size_t buflen,
3647 	    size_t *msglen))dlsym(etm_dl_hdl, "ds_recv_msg");
3648 	if (etm_ds_recv_msg == NULL) {
3649 		fmd_hdl_debug(hdl, "error: dlsym(ds_recv_msg) failed\n");
3650 		rc = -ENOENT;
3651 	}
3652 
3653 	etm_ds_fini = (int (*)(void))dlsym(etm_dl_hdl, "ds_fini");
3654 	if (etm_ds_fini == NULL) {
3655 		fmd_hdl_debug(hdl, "error: dlsym(ds_fini) failed\n");
3656 		rc = -ENOENT;
3657 	}
3658 
3659 	if (rc == -ENOENT) {
3660 		(void) dlclose(etm_dl_hdl);
3661 	}
3662 	return (rc);
3663 
3664 } /* etm_ds_init() */
3665 
3666 
3667 /*
3668  * -------------------------- FMD entry points -------------------------------
3669  */
3670 
3671 /*
3672  * _fmd_init - initialize the transport for use by ETM and start the
3673  *		server daemon to accept new connections to us
3674  *
3675  *		FMD will read our *.conf and subscribe us to FMA events
3676  */
3677 
3678 void
3679 _fmd_init(fmd_hdl_t *hdl)
3680 {
3681 	struct timeval		tmv;		/* timeval */
3682 	ssize_t			n;		/* gen use */
3683 	const struct facility	*fp;		/* syslog facility matching */
3684 	char			*facname;	/* syslog facility property */
3685 	uint32_t		type_mask;	/* type of the local host */
3686 	int			rc;		/* funcs return code */
3687 
3688 
3689 	if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) {
3690 		return; /* invalid data in configuration file */
3691 	}
3692 
3693 	fmd_hdl_debug(hdl, "info: module initializing\n");
3694 
3695 	init_hdl = hdl;
3696 	etm_lhp = ldom_init(etm_init_alloc, etm_init_free);
3697 
3698 	/*
3699 	 * decide the ldom type, do initialization accordingly
3700 	 */
3701 	if ((rc = ldom_get_type(etm_lhp, &type_mask)) != 0) {
3702 		fmd_hdl_debug(hdl, "error: can't decide ldom type\n");
3703 		fmd_hdl_debug(hdl, "info: module unregistering\n");
3704 		ldom_fini(etm_lhp);
3705 		fmd_hdl_unregister(hdl);
3706 		return;
3707 	}
3708 
3709 	if ((type_mask & LDOM_TYPE_LEGACY) || (type_mask & LDOM_TYPE_CONTROL)) {
3710 		if (type_mask & LDOM_TYPE_LEGACY) {
3711 			/*
3712 			 * running on a legacy sun4v domain,
3713 			 * act as the the old sun4v
3714 			 */
3715 			etm_ldom_type = LDOM_TYPE_LEGACY;
3716 			fmd_hdl_debug(hdl, "info: running as the old sun4v\n");
3717 			ldom_fini(etm_lhp);
3718 		} else if (type_mask & LDOM_TYPE_CONTROL) {
3719 			etm_ldom_type = LDOM_TYPE_CONTROL;
3720 			fmd_hdl_debug(hdl, "info: running as control domain\n");
3721 
3722 			/*
3723 			 * looking for libds.so.1.
3724 			 * If not found, don't do DS registration. As a result,
3725 			 * there will be no DS callbacks or other DS services.
3726 			 */
3727 			if (etm_ds_init(hdl) >= 0) {
3728 				etm_filter_init(hdl);
3729 				etm_ckpt_init(hdl);
3730 
3731 				flags = FMD_XPRT_RDWR | FMD_XPRT_ACCEPT;
3732 
3733 				/*
3734 				 * ds client registration
3735 				 */
3736 				if ((rc = (*etm_ds_clnt_reg)(&iosvc_caps,
3737 				    &iosvc_ops))) {
3738 					fmd_hdl_debug(hdl,
3739 					"error: ds_clnt_reg(): errno %d\n", rc);
3740 				}
3741 			} else {
3742 				fmd_hdl_debug(hdl, "error: dlopen() libds "
3743 				    "failed, continue without the DS services");
3744 			}
3745 
3746 			/*
3747 			 * register for ldom status events
3748 			 */
3749 			if ((rc = ldom_register_event(etm_lhp,
3750 			    ldom_event_handler, hdl))) {
3751 				fmd_hdl_debug(hdl,
3752 				    "error: ldom_register_event():"
3753 				    " errno %d\n", rc);
3754 			}
3755 
3756 			/*
3757 			 * create the thread for handling both the ldom status
3758 			 * change and service events
3759 			 */
3760 			etm_async_e_tid = fmd_thr_create(hdl,
3761 			    etm_async_event_handler, hdl);
3762 		}
3763 
3764 		/* setup statistics and properties from FMD */
3765 
3766 		(void) fmd_stat_create(hdl, FMD_STAT_NOALLOC,
3767 		    sizeof (etm_stats) / sizeof (fmd_stat_t),
3768 		    (fmd_stat_t *)&etm_stats);
3769 
3770 		etm_fma_resp_wait_time = fmd_prop_get_int32(hdl,
3771 		    ETM_PROP_NM_FMA_RESP_WAIT_TIME);
3772 		etm_debug_lvl = fmd_prop_get_int32(hdl, ETM_PROP_NM_DEBUG_LVL);
3773 		etm_debug_max_ev_cnt = fmd_prop_get_int32(hdl,
3774 		    ETM_PROP_NM_DEBUG_MAX_EV_CNT);
3775 		fmd_hdl_debug(hdl, "info: etm_debug_lvl %d "
3776 		    "etm_debug_max_ev_cnt %d\n", etm_debug_lvl,
3777 		    etm_debug_max_ev_cnt);
3778 
3779 		etm_resp_q_max_len = fmd_prop_get_int32(hdl,
3780 		    ETM_PROP_NM_MAX_RESP_Q_LEN);
3781 		etm_stats.etm_resp_q_max_len.fmds_value.ui64 =
3782 		    etm_resp_q_max_len;
3783 		etm_bad_acc_to_sec = fmd_prop_get_int32(hdl,
3784 		    ETM_PROP_NM_BAD_ACC_TO_SEC);
3785 
3786 		/*
3787 		 * obtain an FMD transport handle so we can post
3788 		 * FMA events later
3789 		 */
3790 
3791 		etm_fmd_xprt = fmd_xprt_open(hdl, FMD_XPRT_RDONLY, NULL, NULL);
3792 
3793 		/*
3794 		 * encourage protocol transaction id to be unique per module
3795 		 * load
3796 		 */
3797 
3798 		(void) gettimeofday(&tmv, NULL);
3799 		etm_xid_cur = (uint32_t)((tmv.tv_sec << 10) |
3800 		    ((unsigned long)tmv.tv_usec >> 10));
3801 
3802 		/* init the ETM transport */
3803 
3804 		if ((n = etm_xport_init(hdl)) != 0) {
3805 			fmd_hdl_error(hdl, "error: bad xport init errno %d\n",
3806 			    (-n));
3807 			fmd_hdl_unregister(hdl);
3808 			return;
3809 		}
3810 
3811 		/*
3812 		 * Cache any properties we use every time we receive an alert.
3813 		 */
3814 		syslog_file = fmd_prop_get_int32(hdl, ETM_PROP_NM_SYSLOGD);
3815 		syslog_cons = fmd_prop_get_int32(hdl, ETM_PROP_NM_CONSOLE);
3816 
3817 		if (syslog_file && (syslog_logfd = open("/dev/conslog",
3818 		    O_WRONLY | O_NOCTTY)) == -1) {
3819 			fmd_hdl_error(hdl,
3820 			    "error: failed to open /dev/conslog");
3821 			syslog_file = 0;
3822 		}
3823 
3824 		if (syslog_cons && (syslog_msgfd = open("/dev/sysmsg",
3825 		    O_WRONLY | O_NOCTTY)) == -1) {
3826 			fmd_hdl_error(hdl, "error: failed to open /dev/sysmsg");
3827 			syslog_cons = 0;
3828 		}
3829 
3830 		if (syslog_file) {
3831 			/*
3832 			 * Look up the value of the "facility" property and
3833 			 * use it to determine * what syslog LOG_* facility
3834 			 * value we use to fill in our log_ctl_t.
3835 			 */
3836 			facname = fmd_prop_get_string(hdl,
3837 			    ETM_PROP_NM_FACILITY);
3838 
3839 			for (fp = syslog_facs; fp->fac_name != NULL; fp++) {
3840 				if (strcmp(fp->fac_name, facname) == 0)
3841 					break;
3842 			}
3843 
3844 			if (fp->fac_name == NULL) {
3845 				fmd_hdl_error(hdl, "error: invalid 'facility'"
3846 				    " setting: %s\n", facname);
3847 				syslog_file = 0;
3848 			} else {
3849 				syslog_facility = fp->fac_value;
3850 				syslog_ctl.flags = SL_CONSOLE | SL_LOGONLY;
3851 			}
3852 
3853 			fmd_prop_free_string(hdl, facname);
3854 		}
3855 
3856 		/*
3857 		 * start the message responder and the connection acceptance
3858 		 * server; request protocol version be negotiated after waiting
3859 		 * a second for the receiver to be ready to start handshaking
3860 		 */
3861 
3862 		etm_resp_tid = fmd_thr_create(hdl, etm_responder, hdl);
3863 		etm_svr_tid = fmd_thr_create(hdl, etm_server, hdl);
3864 
3865 		(void) etm_sleep(ETM_SLEEP_QUIK);
3866 		etm_req_ver_negot(hdl);
3867 
3868 	} else if (type_mask & LDOM_TYPE_ROOT) {
3869 		etm_ldom_type = LDOM_TYPE_ROOT;
3870 		fmd_hdl_debug(hdl, "info: running as root domain\n");
3871 
3872 		/*
3873 		 * looking for libds.so.1.
3874 		 * If not found, don't do DS registration. As a result,
3875 		 * there will be no DS callbacks or other DS services.
3876 		 */
3877 		if (etm_ds_init(hdl) < 0) {
3878 			fmd_hdl_debug(hdl,
3879 			    "error: dlopen() libds failed, "
3880 			    "module unregistering\n");
3881 			ldom_fini(etm_lhp);
3882 			fmd_hdl_unregister(hdl);
3883 			return;
3884 		}
3885 
3886 		/*
3887 		 * DS service registration
3888 		 */
3889 		if ((rc = (*etm_ds_svc_reg)(&iosvc_caps, &iosvc_ops))) {
3890 			fmd_hdl_debug(hdl, "error: ds_svc_reg(): errno %d\n",
3891 			    rc);
3892 		}
3893 
3894 		/*
3895 		 * this thread is created for ds_reg_cb/ds_unreg_cb
3896 		 */
3897 		etm_async_e_tid = fmd_thr_create(hdl,
3898 		    etm_async_event_handler, hdl);
3899 
3900 		flags = FMD_XPRT_RDWR;
3901 	} else if ((type_mask & LDOM_TYPE_IO) || (type_mask == 0)) {
3902 		/*
3903 		 * Do not load this module if it is
3904 		 * . runing on a non-root ldom
3905 		 * . the domain owns no io devices
3906 		 */
3907 		fmd_hdl_debug(hdl,
3908 		    "info: non-root ldom, module unregistering\n");
3909 		ldom_fini(etm_lhp);
3910 		fmd_hdl_unregister(hdl);
3911 		return;
3912 	} else {
3913 		/*
3914 		 * place holder, all other cases. unload etm for now
3915 		 */
3916 		fmd_hdl_debug(hdl,
3917 		    "info: other ldom type, module unregistering\n");
3918 		ldom_fini(etm_lhp);
3919 		fmd_hdl_unregister(hdl);
3920 		return;
3921 	}
3922 
3923 	fmd_hdl_debug(hdl, "info: module initialized ok\n");
3924 
3925 } /* _fmd_init() */
3926 
3927 /*
3928  * etm_recv - receive an FMA event from FMD and transport it
3929  *		to the remote endpoint
3930  */
3931 
3932 /*ARGSUSED*/
3933 void
3934 etm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *evp, const char *class)
3935 {
3936 	etm_xport_addr_t	*addrv;	/* vector of transport addresses */
3937 	etm_xport_conn_t	conn;	/* connection handle */
3938 	etm_proto_v1_ev_hdr_t	*hdrp;	/* for FMA_EVENT msg */
3939 	ssize_t			i, n;	/* gen use */
3940 	size_t			sz;	/* header size */
3941 	size_t			buflen;	/* size of packed FMA event */
3942 	uint8_t			*buf;	/* tmp buffer for packed FMA event */
3943 
3944 	/*
3945 	 * if this is running on a Root Domain, ignore the events,
3946 	 * return right away
3947 	 */
3948 	if (etm_ldom_type == LDOM_TYPE_ROOT)
3949 		return;
3950 
3951 	buflen = 0;
3952 	if ((n = nvlist_size(evp, &buflen, NV_ENCODE_XDR)) != 0) {
3953 		fmd_hdl_error(hdl, "error: FMA event dropped: "
3954 		    "event size errno %d class %s\n", n, class);
3955 		etm_stats.etm_os_nvlist_size_fail.fmds_value.ui64++;
3956 		etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++;
3957 		return;
3958 	}
3959 
3960 	fmd_hdl_debug(hdl, "info: rcvd event %p from FMD\n", evp);
3961 	fmd_hdl_debug(hdl, "info: cnt %llu class %s\n",
3962 	    etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64, class);
3963 
3964 	etm_stats.etm_rd_fmd_bytes.fmds_value.ui64 += buflen;
3965 	etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64++;
3966 
3967 	/*
3968 	 * if the debug limit has been set, avoid excessive traffic,
3969 	 * for example, an infinite cycle using loopback nodes
3970 	 */
3971 
3972 	if ((etm_debug_max_ev_cnt >= 0) &&
3973 	    (etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64 >
3974 	    etm_debug_max_ev_cnt)) {
3975 		fmd_hdl_debug(hdl, "warning: FMA event dropped: "
3976 		    "event %p cnt %llu > debug max %d\n", evp,
3977 		    etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64,
3978 		    etm_debug_max_ev_cnt);
3979 		etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++;
3980 		return;
3981 	}
3982 
3983 	/* allocate a buffer for the FMA event and nvlist pack it */
3984 
3985 	buf = fmd_hdl_zalloc(hdl, buflen, FMD_SLEEP);
3986 
3987 	/*
3988 	 * increment the ttl value if the event is from remote (a root domain)
3989 	 * uncomment this when enabling fault forwarding from Root domains
3990 	 * to Control domain.
3991 	 *
3992 	 * uint8_t			ttl;
3993 	 * if (fmd_event_local(hdl, evp) != FMD_EVF_LOCAL) {
3994 	 *	if (nvlist_lookup_uint8(evp, FMD_EVN_TTL, &ttl) == 0) {
3995 	 *		(void) nvlist_remove(evp, FMD_EVN_TTL, DATA_TYPE_UINT8);
3996 	 *		(void) nvlist_add_uint8(evp, FMD_EVN_TTL, ttl + 1);
3997 	 *	}
3998 	 * }
3999 	 */
4000 
4001 	if ((n = nvlist_pack(evp, (char **)&buf, &buflen,
4002 	    NV_ENCODE_XDR, 0)) != 0) {
4003 		fmd_hdl_error(hdl, "error: FMA event dropped: "
4004 		    "event pack errno %d class %s\n", n, class);
4005 		etm_stats.etm_os_nvlist_pack_fail.fmds_value.ui64++;
4006 		etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++;
4007 		fmd_hdl_free(hdl, buf, buflen);
4008 		return;
4009 	}
4010 
4011 	/* get vector of dst addrs and send the FMA event to each one */
4012 
4013 	if ((addrv = etm_xport_get_ev_addrv(hdl, evp)) == NULL) {
4014 		fmd_hdl_error(hdl, "error: FMA event dropped: "
4015 		    "bad event dst addrs errno %d\n", errno);
4016 		etm_stats.etm_xport_get_ev_addrv_fail.fmds_value.ui64++;
4017 		etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++;
4018 		fmd_hdl_free(hdl, buf, buflen);
4019 		return;
4020 	}
4021 
4022 	for (i = 0; addrv[i] != NULL; i++) {
4023 
4024 		/* open a new connection to this dst addr */
4025 
4026 		if ((n = etm_conn_open(hdl, "FMA event dropped: "
4027 		    "bad conn open on new ev", addrv[i], &conn)) < 0) {
4028 			etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++;
4029 			continue;
4030 		}
4031 
4032 		(void) pthread_mutex_lock(&etm_write_lock);
4033 
4034 		/* write the ETM message header */
4035 
4036 		if ((hdrp = etm_hdr_write(hdl, conn, evp, NV_ENCODE_XDR,
4037 		    &sz)) == NULL) {
4038 			(void) pthread_mutex_unlock(&etm_write_lock);
4039 			fmd_hdl_error(hdl, "error: FMA event dropped: "
4040 			    "bad hdr write errno %d\n", errno);
4041 			(void) etm_conn_close(hdl,
4042 			    "bad conn close per bad hdr wr", conn);
4043 			etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++;
4044 			continue;
4045 		}
4046 
4047 		fmd_hdl_free(hdl, hdrp, sz);	/* header not needed */
4048 		etm_stats.etm_wr_hdr_fmaevent.fmds_value.ui64++;
4049 		fmd_hdl_debug(hdl, "info: hdr xport write ok for event %p\n",
4050 		    evp);
4051 
4052 		/* write the ETM message body, ie, the packed nvlist */
4053 
4054 		if ((n = etm_io_op(hdl, "FMA event dropped: "
4055 		    "bad io write on event", conn,
4056 		    buf, buflen, ETM_IO_OP_WR)) < 0) {
4057 			(void) pthread_mutex_unlock(&etm_write_lock);
4058 			(void) etm_conn_close(hdl,
4059 			    "bad conn close per bad body wr", conn);
4060 			etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++;
4061 			continue;
4062 		}
4063 
4064 		(void) pthread_mutex_unlock(&etm_write_lock);
4065 
4066 		etm_stats.etm_wr_body_fmaevent.fmds_value.ui64++;
4067 		etm_stats.etm_wr_xport_bytes.fmds_value.ui64 += buflen;
4068 		fmd_hdl_debug(hdl, "info: body xport write ok for event %p\n",
4069 		    evp);
4070 
4071 		/* close the connection */
4072 
4073 		(void) etm_conn_close(hdl, "bad conn close after event send",
4074 		    conn);
4075 	} /* foreach dst addr in the vector */
4076 
4077 	etm_xport_free_addrv(hdl, addrv);
4078 	fmd_hdl_free(hdl, buf, buflen);
4079 
4080 } /* etm_recv() */
4081 
4082 
4083 /*
4084  * etm_send -	receive an FMA event from FMD and enQ it in the iosvc.Q.
4085  *		etm_send_to_remote_root() deQ and xprt the FMA events to a
4086  *		remote root domain
4087  *		return FMD_SEND_SUCCESS for success,
4088  *		       FMD_SEND_FAILED for error
4089  */
4090 
4091 /*ARGSUSED*/
4092 int
4093 etm_send(fmd_hdl_t *fmd_hdl, fmd_xprt_t *xp, fmd_event_t *ep, nvlist_t *nvl)
4094 {
4095 	uint32_t	pack_it;	/* whether to pack/enq the event */
4096 	etm_pack_msg_type_t	msg_type;
4097 					/* tell etm_pack_ds_msg() what to do */
4098 	etm_iosvc_t	*iosvc;		/* ptr to cur iosvc struct */
4099 	char 		*class;		/* nvlist class name */
4100 
4101 	pack_it = 1;
4102 	msg_type = FMD_XPRT_OTHER_MSG;
4103 
4104 	(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
4105 	if (class == NULL) {
4106 		pack_it = 0;
4107 	} else  {
4108 		if (etm_debug_lvl >= 1) {
4109 			fmd_hdl_debug(fmd_hdl,
4110 			    "info: evp class= %s in etm_send\n", class);
4111 		}
4112 
4113 		if (etm_ldom_type ==  LDOM_TYPE_CONTROL) {
4114 			iosvc =
4115 			    (etm_iosvc_t *)fmd_xprt_getspecific(fmd_hdl, xp);
4116 
4117 			/*
4118 			 * check the flag FORWARDING_FAULTS_TO_CONTROL to
4119 			 * decide if or not to drop fault subscription
4120 			 * control msgs
4121 			 */
4122 			if (strcmp(class, "resource.fm.xprt.subscribe") == 0) {
4123 				pack_it = 0;
4124 				/*
4125 				 * if (FORWARDING_FAULTS_TO_CONTROL == 1) {
4126 				 * (void) nvlist_lookup_string(nvl,
4127 				 *    FM_RSRC_XPRT_SUBCLASS, &subclass);
4128 				 * if (strcmp(subclass, "list.suspect")
4129 				 *    == 0) {
4130 				 *	pack_it = 1;
4131 				 *	msg_action = FMD_XPRT_OTHER_MSG;
4132 				 * }
4133 				 * if (strcmp(subclass, "list.repaired")
4134 				 *    == 0) {
4135 				 *	pack_it = 1;
4136 				 *	msg_action = FMD_XPRT_OTHER_MSG;
4137 				 * }
4138 				 * }
4139 				 */
4140 			}
4141 			if (strcmp(class, "resource.fm.xprt.run") == 0) {
4142 				pack_it = 1;
4143 				msg_type = FMD_XPRT_RUN_MSG;
4144 			}
4145 		} else { /* has to be the root domain ldom */
4146 			iosvc = &io_svc;
4147 			/*
4148 			 * drop all ereport and fault subscriptions
4149 			 * are we dropping too much here, more than just ereport
4150 			 * and fault subscriptions? need to check
4151 			 */
4152 			if (strcmp(class, "resource.fm.xprt.subscribe") == 0)
4153 				pack_it = 0;
4154 			if (strcmp(class, "resource.fm.xprt.run") == 0) {
4155 				pack_it = 1;
4156 				msg_type = FMD_XPRT_RUN_MSG;
4157 			}
4158 		}
4159 	}
4160 
4161 	if (pack_it)  {
4162 		if (etm_debug_lvl >= 1) {
4163 			fmd_hdl_debug(fmd_hdl,
4164 			    "info: ldom name returned from xprt get specific="
4165 			    "%s xprt=%lld\n", iosvc->ldom_name, xp);
4166 		}
4167 		/*
4168 		 * pack the etm msg for the DS library and  enq in io_svc->Q
4169 		 * when the hdrp is NULL, the packing func will use the static
4170 		 * iosvc_hdr
4171 		 */
4172 		(void) etm_pack_ds_msg(fmd_hdl, iosvc, NULL, 0, nvl, msg_type,
4173 		    ETM_CKPT_NOOP);
4174 	}
4175 
4176 	return (FMD_SEND_SUCCESS);
4177 
4178 } /* etm_send() */
4179 
4180 
4181 
4182 /*
4183  * _fmd_fini - stop the server daemon and teardown the transport
4184  */
4185 
4186 void
4187 _fmd_fini(fmd_hdl_t *hdl)
4188 {
4189 	ssize_t			n;		/* gen use */
4190 	etm_iosvc_t		*iosvc;		/* ptr to insvc struct */
4191 	etm_iosvc_q_ele_t	msg_ele;	/* iosvc msg ele */
4192 	uint32_t		i;		/* for loop var */
4193 
4194 	fmd_hdl_debug(hdl, "info: module finalizing\n");
4195 
4196 	/* kill the connection server and responder ; wait for them to die */
4197 
4198 	etm_is_dying = 1;
4199 
4200 	if (etm_svr_tid != NULL) {
4201 		fmd_thr_signal(hdl, etm_svr_tid);
4202 		fmd_thr_destroy(hdl, etm_svr_tid);
4203 		etm_svr_tid = NULL;
4204 	} /* if server thread was successfully created */
4205 
4206 	if (etm_resp_tid != NULL) {
4207 		fmd_thr_signal(hdl, etm_resp_tid);
4208 		fmd_thr_destroy(hdl, etm_resp_tid);
4209 		etm_resp_tid = NULL;
4210 	} /* if responder thread was successfully created */
4211 
4212 	if (etm_async_e_tid != NULL) {
4213 		fmd_thr_signal(hdl, etm_async_e_tid);
4214 		fmd_thr_destroy(hdl, etm_async_e_tid);
4215 		etm_async_e_tid = NULL;
4216 	} /* if async event handler thread was successfully created */
4217 
4218 
4219 	if ((etm_ldom_type == LDOM_TYPE_LEGACY) ||
4220 	    (etm_ldom_type == LDOM_TYPE_CONTROL)) {
4221 
4222 		/* teardown the transport and cleanup syslogging */
4223 		if ((n = etm_xport_fini(hdl)) != 0) {
4224 			fmd_hdl_error(hdl, "warning: xport fini errno %d\n",
4225 			    (-n));
4226 		}
4227 		if (etm_fmd_xprt != NULL) {
4228 			fmd_xprt_close(hdl, etm_fmd_xprt);
4229 		}
4230 
4231 		if (syslog_logfd != -1) {
4232 			(void) close(syslog_logfd);
4233 		}
4234 		if (syslog_msgfd != -1) {
4235 			(void) close(syslog_msgfd);
4236 		}
4237 	}
4238 
4239 	if (etm_ldom_type == LDOM_TYPE_CONTROL)  {
4240 		if (ldom_unregister_event(etm_lhp))
4241 			fmd_hdl_debug(hdl, "ldom_unregister_event() failed\n");
4242 
4243 		/*
4244 		 * On control domain side, there may be multiple iosvc struct
4245 		 * in use, one for each bound/active domain. Each struct
4246 		 * manages a queue of fma events destined to the root domain.
4247 		 * Need to go thru every iosvc struct to clean up its resources.
4248 		 */
4249 		for (i = 0; i < NUM_OF_ROOT_DOMAINS; i++) {
4250 			if (iosvc_list[i].ldom_name[0] != '\0') {
4251 				/*
4252 				 * found an iosvc struct for a root domain
4253 				 */
4254 				iosvc = &iosvc_list[i];
4255 				(void) pthread_mutex_lock(&iosvc_list_lock);
4256 				etm_iosvc_cleanup(hdl, iosvc, B_TRUE, B_FALSE);
4257 				(void) pthread_mutex_unlock(&iosvc_list_lock);
4258 
4259 			} else {
4260 				/*
4261 				 * reach the end of existing iosvc structures
4262 				 */
4263 				continue;
4264 			}
4265 		} /* for i<NUM_OF_ROOT_DOMAINS */
4266 		etm_ckpt_fini(hdl);
4267 		etm_filter_fini(hdl);
4268 
4269 		ldom_fini(etm_lhp);
4270 
4271 	} else if (etm_ldom_type == LDOM_TYPE_ROOT) {
4272 		/*
4273 		 * On root domain side, there is only one iosvc struct in use.
4274 		 */
4275 		iosvc = &io_svc;
4276 		if (iosvc->send_tid != NULL) {
4277 			fmd_thr_signal(hdl, iosvc->send_tid);
4278 			fmd_thr_destroy(hdl, iosvc->send_tid);
4279 			iosvc->send_tid = NULL;
4280 		} /* if io svc send thread was successfully created */
4281 
4282 		if (iosvc->recv_tid != NULL) {
4283 			fmd_thr_signal(hdl, iosvc->recv_tid);
4284 			fmd_thr_destroy(hdl, iosvc->recv_tid);
4285 			iosvc->recv_tid = NULL;
4286 		} /* if io svc receive thread was successfully created */
4287 
4288 		(void) pthread_mutex_lock(&iosvc->msg_q_lock);
4289 		while (iosvc->msg_q_cur_len > 0) {
4290 			(void) etm_iosvc_msg_deq(hdl, iosvc, &msg_ele);
4291 			fmd_hdl_free(hdl, msg_ele.msg, msg_ele.msg_size);
4292 		}
4293 		(void) pthread_mutex_unlock(&iosvc->msg_q_lock);
4294 
4295 		if (iosvc->fmd_xprt != NULL)
4296 			fmd_xprt_close(hdl, iosvc->fmd_xprt);
4297 		ldom_fini(etm_lhp);
4298 	}
4299 	if (etm_ds_fini) {
4300 		(*etm_ds_fini)();
4301 		(void) dlclose(etm_dl_hdl);
4302 	}
4303 
4304 	fmd_hdl_debug(hdl, "info: module finalized ok\n");
4305 
4306 } /* _fmd_fini() */
4307