xref: /titanic_50/usr/src/cmd/fm/modules/sun4v/etm/etm.c (revision 3fbe3e2827948b5ff8ffec94d18c232af100ea3c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * etm.c	FMA Event Transport Module implementation, a plugin of FMD
29  *		for sun4v/Ontario
30  *
31  * plugin for sending/receiving FMA events to/from service processor
32  */
33 
34 /*
35  * --------------------------------- includes --------------------------------
36  */
37 
38 #include <sys/fm/protocol.h>
39 #include <sys/fm/util.h>
40 #include <sys/fm/ldom.h>
41 #include <sys/strlog.h>
42 #include <sys/syslog.h>
43 #include <sys/libds.h>
44 #include <netinet/in.h>
45 #include <fm/fmd_api.h>
46 
47 #include "etm_xport_api.h"
48 #include "etm_etm_proto.h"
49 #include "etm_impl.h"
50 #include "etm_iosvc.h"
51 #include "etm_filter.h"
52 #include "etm_ckpt.h"
53 
54 #include <pthread.h>
55 #include <signal.h>
56 #include <stropts.h>
57 #include <locale.h>
58 #include <strings.h>
59 #include <stdlib.h>
60 #include <unistd.h>
61 #include <limits.h>
62 #include <values.h>
63 #include <alloca.h>
64 #include <errno.h>
65 #include <dlfcn.h>
66 #include <link.h>
67 #include <fcntl.h>
68 #include <time.h>
69 
70 /*
71  * ----------------------------- forward decls -------------------------------
72  */
73 
74 static void
75 etm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class);
76 
77 static int
78 etm_send(fmd_hdl_t *hdl, fmd_xprt_t *xp, fmd_event_t *event, nvlist_t *nvl);
79 
80 static void
81 etm_send_to_remote_root(void *arg);
82 
83 static void
84 etm_recv_from_remote_root(void *arg);
85 
86 /*
87  * ------------------------- data structs for FMD ----------------------------
88  */
89 
90 static fmd_hdl_ops_t fmd_ops = {
91 	etm_recv,	/* fmdo_recv */
92 	NULL,		/* fmdo_timeout */
93 	NULL,		/* fmdo_close */
94 	NULL,		/* fmdo_stats */
95 	NULL,		/* fmdo_gc */
96 	etm_send,	/* fmdo_send */
97 };
98 
99 static const fmd_prop_t fmd_props[] = {
100 	{ ETM_PROP_NM_XPORT_ADDRS,		FMD_TYPE_STRING, "" },
101 	{ ETM_PROP_NM_DEBUG_LVL,		FMD_TYPE_INT32, "0" },
102 	{ ETM_PROP_NM_DEBUG_MAX_EV_CNT,		FMD_TYPE_INT32, "-1" },
103 	{ ETM_PROP_NM_CONSOLE,			FMD_TYPE_BOOL, "false" },
104 	{ ETM_PROP_NM_SYSLOGD,			FMD_TYPE_BOOL, "true" },
105 	{ ETM_PROP_NM_FACILITY,			FMD_TYPE_STRING, "LOG_DAEMON" },
106 	{ ETM_PROP_NM_MAX_RESP_Q_LEN,		FMD_TYPE_UINT32, "512" },
107 	{ ETM_PROP_NM_BAD_ACC_TO_SEC,		FMD_TYPE_UINT32, "1" },
108 	{ ETM_PROP_NM_FMA_RESP_WAIT_TIME,	FMD_TYPE_INT32, "240" },
109 	{ NULL, 0, NULL }
110 };
111 
112 
113 static const fmd_hdl_info_t fmd_info = {
114 	"FMA Event Transport Module", "1.2", &fmd_ops, fmd_props
115 };
116 
117 /*
118  * ----------------------- private consts and defns --------------------------
119  */
120 
121 /* misc buffer for variable sized protocol header fields */
122 
123 #define	ETM_MISC_BUF_SZ	(4 * 1024)
124 
125 static uint32_t
126 etm_ldom_type = LDOM_TYPE_LEGACY;
127 
128 /* try limit for IO operations w/ capped exp backoff sleep on retry */
129 
130 /*
131  * Design_Note:	ETM will potentially retry forever IO operations that the
132  *		transport fails with EAGAIN (aka EWOULDBLOCK) rather than
133  *		giving up after some number of seconds. This avoids
134  *		dropping FMA events while the service processor is down,
135  *		but at the risk of pending fmdo_recv() forever and
136  *		overflowing FMD's event queue for ETM.
137  *		A future TBD enhancement would be to always recv
138  *		and send each ETM msg in a single read/write() to reduce
139  *		the risk of failure between ETM msg hdr and body,
140  *		assuming the MTU_SZ is large enough.
141  */
142 
143 #define	ETM_TRY_MAX_CNT		(MAXINT - 1)
144 #define	ETM_TRY_BACKOFF_RATE	(4)
145 #define	ETM_TRY_BACKOFF_CAP	(60)
146 
147 /* amount to increment protocol transaction id on each new send */
148 
149 #define	ETM_XID_INC		(2)
150 
151 typedef struct etm_resp_q_ele {
152 
153 	etm_xport_conn_t	rqe_conn;	/* open connection to send on */
154 	etm_proto_v1_pp_t	*rqe_hdrp;	/* ptr to ETM msg hdr */
155 	size_t			rqe_hdr_sz;	/* sizeof ETM msg hdr */
156 	int32_t			rqe_resp_code;	/* response code to send */
157 
158 	struct etm_resp_q_ele	*rqe_nextp;	/* PRIVATE - next ele ptr */
159 
160 } etm_resp_q_ele_t;	/* responder queue element */
161 
162 /*
163  * ---------------------------- global data ----------------------------------
164  */
165 
166 static fmd_hdl_t
167 *init_hdl = NULL;	/* used in mem allocator and several other places */
168 
169 static int
170 etm_debug_lvl = 0;	/* debug level: 0 is off, 1 is on, 2 is more, etc */
171 
172 static int
173 etm_debug_max_ev_cnt = -1; /* max allowed event count for debugging */
174 
175 static fmd_xprt_t
176 *etm_fmd_xprt = NULL;	/* FMD transport layer handle */
177 
178 static pthread_t
179 etm_svr_tid = NULL;	/* thread id of connection acceptance server */
180 
181 static pthread_t
182 etm_resp_tid = NULL;	/* thread id of msg responder */
183 
184 static etm_resp_q_ele_t
185 *etm_resp_q_head = NULL; /* ptr to cur head of responder queue */
186 
187 static etm_resp_q_ele_t
188 *etm_resp_q_tail = NULL; /* ptr to cur tail of responder queue */
189 
190 static uint32_t
191 etm_resp_q_cur_len = 0;	/* cur length (ele cnt) of responder queue */
192 
193 static uint32_t
194 etm_resp_q_max_len = 0;	/* max length (ele cnt) of responder queue */
195 
196 static uint32_t
197 etm_bad_acc_to_sec = 0;	/* sleep timeout (in sec) after bad conn accept */
198 
199 static pthread_mutex_t
200 etm_resp_q_lock = PTHREAD_MUTEX_INITIALIZER;	/* protects responder queue */
201 
202 static pthread_cond_t
203 etm_resp_q_cv = PTHREAD_COND_INITIALIZER;	/* nudges msg responder */
204 
205 static volatile int
206 etm_is_dying = 0;	/* bool for dying (killing self) */
207 
208 static uint32_t
209 etm_xid_cur = 0;	/* current transaction id for sends */
210 
211 static uint32_t
212 etm_xid_ping = 0;	/* xid of last CONTROL msg sent requesting ping */
213 
214 static uint32_t
215 etm_xid_ver_negot = 0;	/* xid of last CONTROL msg sent requesting ver negot */
216 
217 static uint32_t
218 etm_xid_posted_logged_ev = 0;
219 			/* xid of last FMA_EVENT msg/event posted OK to FMD */
220 
221 static uint32_t
222 etm_xid_posted_sa = 0;	/* xid of last ALERT msg/event posted OK to syslog */
223 
224 static uint8_t
225 etm_resp_ver = ETM_PROTO_V1; /* proto ver [negotiated] for msg sends */
226 
227 static uint32_t
228 etm_fma_resp_wait_time = 30;	/*  time (sec) wait for fma event resp */
229 
230 static pthread_mutex_t
231 etm_write_lock = PTHREAD_MUTEX_INITIALIZER;	/* for write operations */
232 
233 static log_ctl_t syslog_ctl;	/* log(7D) meta-data for each msg */
234 static int syslog_facility;	/* log(7D) facility (part of priority) */
235 static int syslog_logfd = -1;	/* log(7D) file descriptor */
236 static int syslog_msgfd = -1;	/* sysmsg(7D) file descriptor */
237 static int syslog_file = 0;	/* log to syslog_logfd */
238 static int syslog_cons = 0;	/* log to syslog_msgfd */
239 
240 static const struct facility {
241 	const char *fac_name;
242 	int fac_value;
243 } syslog_facs[] = {
244 	{ "LOG_DAEMON", LOG_DAEMON },
245 	{ "LOG_LOCAL0", LOG_LOCAL0 },
246 	{ "LOG_LOCAL1", LOG_LOCAL1 },
247 	{ "LOG_LOCAL2", LOG_LOCAL2 },
248 	{ "LOG_LOCAL3", LOG_LOCAL3 },
249 	{ "LOG_LOCAL4", LOG_LOCAL4 },
250 	{ "LOG_LOCAL5", LOG_LOCAL5 },
251 	{ "LOG_LOCAL6", LOG_LOCAL6 },
252 	{ "LOG_LOCAL7", LOG_LOCAL7 },
253 	{ NULL, 0 }
254 };
255 
256 static struct stats {
257 
258 	/* ETM msg counters */
259 
260 	fmd_stat_t etm_rd_hdr_fmaevent;
261 	fmd_stat_t etm_rd_hdr_control;
262 	fmd_stat_t etm_rd_hdr_alert;
263 	fmd_stat_t etm_rd_hdr_response;
264 	fmd_stat_t etm_rd_body_fmaevent;
265 	fmd_stat_t etm_rd_body_control;
266 	fmd_stat_t etm_rd_body_alert;
267 	fmd_stat_t etm_rd_body_response;
268 	fmd_stat_t etm_wr_hdr_fmaevent;
269 	fmd_stat_t etm_wr_hdr_control;
270 	fmd_stat_t etm_wr_hdr_response;
271 	fmd_stat_t etm_wr_body_fmaevent;
272 	fmd_stat_t etm_wr_body_control;
273 	fmd_stat_t etm_wr_body_response;
274 
275 	fmd_stat_t etm_rd_max_ev_per_msg;
276 	fmd_stat_t etm_wr_max_ev_per_msg;
277 
278 	fmd_stat_t etm_resp_q_cur_len;
279 	fmd_stat_t etm_resp_q_max_len;
280 
281 	/* ETM byte counters */
282 
283 	fmd_stat_t etm_wr_fmd_bytes;
284 	fmd_stat_t etm_rd_fmd_bytes;
285 	fmd_stat_t etm_wr_xport_bytes;
286 	fmd_stat_t etm_rd_xport_bytes;
287 
288 	fmd_stat_t etm_magic_drop_bytes;
289 
290 	/* ETM [dropped] FMA event counters */
291 
292 	fmd_stat_t etm_rd_fmd_fmaevent;
293 	fmd_stat_t etm_wr_fmd_fmaevent;
294 
295 	fmd_stat_t etm_rd_drop_fmaevent;
296 	fmd_stat_t etm_wr_drop_fmaevent;
297 
298 	fmd_stat_t etm_rd_dup_fmaevent;
299 	fmd_stat_t etm_wr_dup_fmaevent;
300 
301 	fmd_stat_t etm_rd_dup_alert;
302 	fmd_stat_t etm_wr_dup_alert;
303 
304 	fmd_stat_t etm_enq_drop_resp_q;
305 	fmd_stat_t etm_deq_drop_resp_q;
306 
307 	/* ETM protocol failures */
308 
309 	fmd_stat_t etm_magic_bad;
310 	fmd_stat_t etm_ver_bad;
311 	fmd_stat_t etm_msgtype_bad;
312 	fmd_stat_t etm_subtype_bad;
313 	fmd_stat_t etm_xid_bad;
314 	fmd_stat_t etm_fmaeventlen_bad;
315 	fmd_stat_t etm_respcode_bad;
316 	fmd_stat_t etm_timeout_bad;
317 	fmd_stat_t etm_evlens_bad;
318 
319 	/* IO operation failures */
320 
321 	fmd_stat_t etm_xport_wr_fail;
322 	fmd_stat_t etm_xport_rd_fail;
323 	fmd_stat_t etm_xport_pk_fail;
324 
325 	/* IO operation retries */
326 
327 	fmd_stat_t etm_xport_wr_retry;
328 	fmd_stat_t etm_xport_rd_retry;
329 	fmd_stat_t etm_xport_pk_retry;
330 
331 	/* system and library failures */
332 
333 	fmd_stat_t etm_os_nvlist_pack_fail;
334 	fmd_stat_t etm_os_nvlist_unpack_fail;
335 	fmd_stat_t etm_os_nvlist_size_fail;
336 	fmd_stat_t etm_os_pthread_create_fail;
337 
338 	/* xport API failures */
339 
340 	fmd_stat_t etm_xport_get_ev_addrv_fail;
341 	fmd_stat_t etm_xport_open_fail;
342 	fmd_stat_t etm_xport_close_fail;
343 	fmd_stat_t etm_xport_accept_fail;
344 	fmd_stat_t etm_xport_open_retry;
345 
346 	/* FMD entry point bad arguments */
347 
348 	fmd_stat_t etm_fmd_init_badargs;
349 	fmd_stat_t etm_fmd_fini_badargs;
350 
351 	/* Alert logging errors */
352 
353 	fmd_stat_t etm_log_err;
354 	fmd_stat_t etm_msg_err;
355 
356 	/* miscellaneous stats */
357 
358 	fmd_stat_t etm_reset_xport;
359 
360 } etm_stats = {
361 
362 	/* ETM msg counters */
363 
364 	{ "etm_rd_hdr_fmaevent", FMD_TYPE_UINT64,
365 		"ETM fmaevent msg headers rcvd from xport" },
366 	{ "etm_rd_hdr_control", FMD_TYPE_UINT64,
367 		"ETM control msg headers rcvd from xport" },
368 	{ "etm_rd_hdr_alert", FMD_TYPE_UINT64,
369 		"ETM alert msg headers rcvd from xport" },
370 	{ "etm_rd_hdr_response", FMD_TYPE_UINT64,
371 		"ETM response msg headers rcvd from xport" },
372 	{ "etm_rd_body_fmaevent", FMD_TYPE_UINT64,
373 		"ETM fmaevent msg bodies rcvd from xport" },
374 	{ "etm_rd_body_control", FMD_TYPE_UINT64,
375 		"ETM control msg bodies rcvd from xport" },
376 	{ "etm_rd_body_alert", FMD_TYPE_UINT64,
377 		"ETM alert msg bodies rcvd from xport" },
378 	{ "etm_rd_body_response", FMD_TYPE_UINT64,
379 		"ETM response msg bodies rcvd from xport" },
380 	{ "etm_wr_hdr_fmaevent", FMD_TYPE_UINT64,
381 		"ETM fmaevent msg headers sent to xport" },
382 	{ "etm_wr_hdr_control", FMD_TYPE_UINT64,
383 		"ETM control msg headers sent to xport" },
384 	{ "etm_wr_hdr_response", FMD_TYPE_UINT64,
385 		"ETM response msg headers sent to xport" },
386 	{ "etm_wr_body_fmaevent", FMD_TYPE_UINT64,
387 		"ETM fmaevent msg bodies sent to xport" },
388 	{ "etm_wr_body_control", FMD_TYPE_UINT64,
389 		"ETM control msg bodies sent to xport" },
390 	{ "etm_wr_body_response", FMD_TYPE_UINT64,
391 		"ETM response msg bodies sent to xport" },
392 
393 	{ "etm_rd_max_ev_per_msg", FMD_TYPE_UINT64,
394 		"max FMA events per ETM msg from xport" },
395 	{ "etm_wr_max_ev_per_msg", FMD_TYPE_UINT64,
396 		"max FMA events per ETM msg to xport" },
397 
398 	{ "etm_resp_q_cur_len", FMD_TYPE_UINT64,
399 		"cur enqueued response msgs to xport" },
400 	{ "etm_resp_q_max_len", FMD_TYPE_UINT64,
401 		"max enqueable response msgs to xport" },
402 
403 	/* ETM byte counters */
404 
405 	{ "etm_wr_fmd_bytes", FMD_TYPE_UINT64,
406 		"bytes of FMA events sent to FMD" },
407 	{ "etm_rd_fmd_bytes", FMD_TYPE_UINT64,
408 		"bytes of FMA events rcvd from FMD" },
409 	{ "etm_wr_xport_bytes", FMD_TYPE_UINT64,
410 		"bytes of FMA events sent to xport" },
411 	{ "etm_rd_xport_bytes", FMD_TYPE_UINT64,
412 		"bytes of FMA events rcvd from xport" },
413 
414 	{ "etm_magic_drop_bytes", FMD_TYPE_UINT64,
415 		"bytes dropped from xport pre magic num" },
416 
417 	/* ETM [dropped] FMA event counters */
418 
419 	{ "etm_rd_fmd_fmaevent", FMD_TYPE_UINT64,
420 		"FMA events rcvd from FMD" },
421 	{ "etm_wr_fmd_fmaevent", FMD_TYPE_UINT64,
422 		"FMA events sent to FMD" },
423 
424 	{ "etm_rd_drop_fmaevent", FMD_TYPE_UINT64,
425 		"dropped FMA events from xport" },
426 	{ "etm_wr_drop_fmaevent", FMD_TYPE_UINT64,
427 		"dropped FMA events to xport" },
428 
429 	{ "etm_rd_dup_fmaevent", FMD_TYPE_UINT64,
430 	    "duplicate FMA events rcvd from xport" },
431 	{ "etm_wr_dup_fmaevent", FMD_TYPE_UINT64,
432 	    "duplicate FMA events sent to xport" },
433 
434 	{ "etm_rd_dup_alert", FMD_TYPE_UINT64,
435 	    "duplicate ALERTs rcvd from xport" },
436 	{ "etm_wr_dup_alert", FMD_TYPE_UINT64,
437 	    "duplicate ALERTs sent to xport" },
438 
439 	{ "etm_enq_drop_resp_q", FMD_TYPE_UINT64,
440 	    "dropped response msgs on enq" },
441 	{ "etm_deq_drop_resp_q", FMD_TYPE_UINT64,
442 	    "dropped response msgs on deq" },
443 
444 	/* ETM protocol failures */
445 
446 	{ "etm_magic_bad", FMD_TYPE_UINT64,
447 		"ETM msgs w/ invalid magic num" },
448 	{ "etm_ver_bad", FMD_TYPE_UINT64,
449 		"ETM msgs w/ invalid protocol version" },
450 	{ "etm_msgtype_bad", FMD_TYPE_UINT64,
451 		"ETM msgs w/ invalid message type" },
452 	{ "etm_subtype_bad", FMD_TYPE_UINT64,
453 		"ETM msgs w/ invalid sub type" },
454 	{ "etm_xid_bad", FMD_TYPE_UINT64,
455 		"ETM msgs w/ unmatched xid" },
456 	{ "etm_fmaeventlen_bad", FMD_TYPE_UINT64,
457 		"ETM msgs w/ invalid FMA event length" },
458 	{ "etm_respcode_bad", FMD_TYPE_UINT64,
459 		"ETM msgs w/ invalid response code" },
460 	{ "etm_timeout_bad", FMD_TYPE_UINT64,
461 		"ETM msgs w/ invalid timeout value" },
462 	{ "etm_evlens_bad", FMD_TYPE_UINT64,
463 		"ETM msgs w/ too many event lengths" },
464 
465 	/* IO operation failures */
466 
467 	{ "etm_xport_wr_fail", FMD_TYPE_UINT64,
468 		"xport write failures" },
469 	{ "etm_xport_rd_fail", FMD_TYPE_UINT64,
470 		"xport read failures" },
471 	{ "etm_xport_pk_fail", FMD_TYPE_UINT64,
472 		"xport peek failures" },
473 
474 	/* IO operation retries */
475 
476 	{ "etm_xport_wr_retry", FMD_TYPE_UINT64,
477 		"xport write retries" },
478 	{ "etm_xport_rd_retry", FMD_TYPE_UINT64,
479 		"xport read retries" },
480 	{ "etm_xport_pk_retry", FMD_TYPE_UINT64,
481 		"xport peek retries" },
482 
483 	/* system and library failures */
484 
485 	{ "etm_os_nvlist_pack_fail", FMD_TYPE_UINT64,
486 		"nvlist_pack failures" },
487 	{ "etm_os_nvlist_unpack_fail", FMD_TYPE_UINT64,
488 		"nvlist_unpack failures" },
489 	{ "etm_os_nvlist_size_fail", FMD_TYPE_UINT64,
490 		"nvlist_size failures" },
491 	{ "etm_os_pthread_create_fail", FMD_TYPE_UINT64,
492 		"pthread_create failures" },
493 
494 	/* transport API failures */
495 
496 	{ "etm_xport_get_ev_addrv_fail", FMD_TYPE_UINT64,
497 		"xport get event addrv API failures" },
498 	{ "etm_xport_open_fail", FMD_TYPE_UINT64,
499 		"xport open API failures" },
500 	{ "etm_xport_close_fail", FMD_TYPE_UINT64,
501 		"xport close API failures" },
502 	{ "etm_xport_accept_fail", FMD_TYPE_UINT64,
503 		"xport accept API failures" },
504 	{ "etm_xport_open_retry", FMD_TYPE_UINT64,
505 		"xport open API retries" },
506 
507 	/* FMD entry point bad arguments */
508 
509 	{ "etm_fmd_init_badargs", FMD_TYPE_UINT64,
510 	    "bad arguments from fmd_init entry point" },
511 	{ "etm_fmd_fini_badargs", FMD_TYPE_UINT64,
512 	    "bad arguments from fmd_fini entry point" },
513 
514 	/* Alert logging errors */
515 
516 	{ "etm_log_err", FMD_TYPE_UINT64,
517 		"failed to log message to log(7D)" },
518 	{ "etm_msg_err", FMD_TYPE_UINT64,
519 		"failed to log message to sysmsg(7D)" },
520 
521 	/* miscellaneous stats */
522 
523 	{ "etm_reset_xport", FMD_TYPE_UINT64,
524 		"xport resets after xport API failure" }
525 };
526 
527 
528 /*
529  * -------------------- global data for Root ldom-------------------------
530  */
531 
532 ldom_hdl_t
533 *etm_lhp = NULL;		/* ldom pointer */
534 
535 static void *etm_dl_hdl = (void *)NULL;
536 static const char *etm_dl_path = "libds.so.1";
537 static int etm_dl_mode = (RTLD_NOW | RTLD_LOCAL);
538 
539 static int(*etm_ds_svc_reg)(ds_capability_t *cap, ds_ops_t *ops) =
540 	(int (*)(ds_capability_t *cap, ds_ops_t *ops))NULL;
541 static int(*etm_ds_clnt_reg)(ds_capability_t *cap, ds_ops_t *ops) =
542 	(int (*)(ds_capability_t *cap, ds_ops_t *ops))NULL;
543 static int(*etm_ds_send_msg)(ds_hdl_t hdl, void *buf, size_t buflen) =
544 	(int (*)(ds_hdl_t hdl, void *buf, size_t buflen))NULL;
545 static int(*etm_ds_recv_msg)(ds_hdl_t hdl, void *buf, size_t buflen,
546     size_t *msglen) =
547 	(int (*)(ds_hdl_t hdl, void *buf, size_t buflen, size_t *msglen))NULL;
548 static int (*etm_ds_fini)(void) = (int (*)(void))NULL;
549 
550 static pthread_mutex_t
551 iosvc_list_lock =  PTHREAD_MUTEX_INITIALIZER;
552 
553 static pthread_t
554 etm_async_e_tid = NULL;	/* thread id of io svc async event handler */
555 
556 static etm_proto_v1_ev_hdr_t iosvc_hdr = {
557 	ETM_PROTO_MAGIC_NUM,	/* magic number */
558 	ETM_PROTO_V1,		/* default to V1, not checked */
559 	ETM_MSG_TYPE_FMA_EVENT,	/* Root Domain inteoduces only FMA events */
560 	0,			/* sub-type */
561 	0,			/* pad */
562 	0,			/* add the xid at the Q send time */
563 	ETM_PROTO_V1_TIMEOUT_NONE,
564 	0			/* ev_lens, 0-termed, after 1 FMA event */
565 };
566 
567 /*
568  * static iosvc_list
569  */
570 static etm_iosvc_t iosvc_list[NUM_OF_ROOT_DOMAINS] = {
571 	{"", 0}, {"", 0}, {"", 0}, {"", 0}, {"", 0}, {"", 0},
572 	{"", 0}, {"", 0}
573 };
574 
575 static etm_iosvc_t io_svc = {
576 	"\0",				/* ldom_name */
577 	PTHREAD_COND_INITIALIZER,	/* nudges */
578 	PTHREAD_MUTEX_INITIALIZER,	/* protects the iosvc msg Q */
579 	NULL,				/* iosvc msg Q head */
580 	NULL,				/* iosvc msg Q tail */
581 	0,				/* msg Q current length */
582 	100,				/* msg Q max length */
583 	0,				/* current transaction id */
584 	0,				/* xid of last event posted to FMD */
585 	DS_INVALID_HDL,			/* DS handle */
586 	NULL,				/* fmd xprt handle */
587 	NULL,				/* tid 4 send to remote RootDomain */
588 	NULL,				/* tid 4 recv from remote RootDomain */
589 	PTHREAD_COND_INITIALIZER,	/* nudges etm_send_to_remote_root */
590 	PTHREAD_MUTEX_INITIALIZER,	/* protects msg_ack_cv */
591 	0,				/* send/recv threads are not dying */
592 	0,				/* flag for start sending msg Q */
593 	0				/* indicate if the ACK has come  */
594 };
595 etm_iosvc_t *io_svc_p = &io_svc;
596 
597 
598 static uint32_t
599 flags;					/* flags for fmd_xprt_open */
600 
601 static etm_async_event_ele_t
602 async_event_q[ASYNC_EVENT_Q_SIZE];	/* holds the async events */
603 
604 static uint32_t
605 etm_async_q_head = 0;		/* ptr to cur head of async event queue */
606 
607 static uint32_t
608 etm_async_q_tail = 0;		/* ptr to cur tail of async event queue */
609 
610 static uint32_t
611 etm_async_q_cur_len = 0;	/* cur length (ele cnt) of async event queue */
612 
613 static uint32_t
614 etm_async_q_max_len = ASYNC_EVENT_Q_SIZE;
615 				/* max length (ele cnt) of async event queue */
616 
617 static pthread_cond_t
618 etm_async_event_q_cv = PTHREAD_COND_INITIALIZER;
619 				/* nudges  async event handler */
620 
621 static pthread_mutex_t
622 etm_async_event_q_lock = PTHREAD_MUTEX_INITIALIZER;
623 				/* protects async event q */
624 
625 static ds_ver_t
626 etm_iosvc_vers[] = { { 1, 0} };
627 
628 #define	ETM_NVERS	(sizeof (etm_iosvc_vers) / sizeof (ds_ver_t))
629 
630 static ds_capability_t
631 iosvc_caps = {
632 	"ETM",				/* svc_id */
633 	etm_iosvc_vers,			/* vers */
634 	ETM_NVERS			/* number of vers */
635 };
636 
637 static void
638 etm_iosvc_reg_handler(ds_hdl_t hdl, ds_cb_arg_t arg, ds_ver_t *ver,
639     ds_domain_hdl_t did);
640 
641 static void
642 etm_iosvc_unreg_handler(ds_hdl_t hdl, ds_cb_arg_t arg);
643 
644 static ds_ops_t
645 iosvc_ops = {
646 	etm_iosvc_reg_handler,		/* ds_reg_cb */
647 	etm_iosvc_unreg_handler,	/* ds_unreg_cb */
648 	NULL,				/* ds_data_cb */
649 	NULL				/* cb_arg */
650 };
651 
652 
653 /*
654  * -------------------------- support functions ------------------------------
655  */
656 
657 /*
658  * Design_Note:	Each failure worth reporting to FMD should be done using
659  *		a single call to fmd_hdl_error() as it logs an FMA event
660  *		for each call. Also be aware that all the fmd_hdl_*()
661  *		format strings currently use platform specific *printf()
662  *		routines; so "%p" under Solaris does not prepend "0x" to
663  *		the outputted hex digits, while Linux and VxWorks do.
664  */
665 
666 
667 /*
668  * etm_show_time - display the current time of day (for debugging) using
669  *		the given FMD module handle and annotation string
670  */
671 
672 static void
673 etm_show_time(fmd_hdl_t *hdl, char *note_str)
674 {
675 	struct timeval		tmv;		/* timeval */
676 
677 	(void) gettimeofday(&tmv, NULL);
678 	fmd_hdl_debug(hdl, "info: %s: cur Unix Epoch time %d.%06d\n",
679 	    note_str, tmv.tv_sec, tmv.tv_usec);
680 
681 } /* etm_show_time() */
682 
683 /*
684  * etm_hexdump - hexdump the given buffer (for debugging) using
685  *		the given FMD module handle
686  */
687 
688 static void
689 etm_hexdump(fmd_hdl_t *hdl, void *buf, size_t byte_cnt)
690 {
691 	uint8_t		*bp;		/* byte ptr */
692 	int		i, j;		/* index */
693 	char		cb[80];		/* char buf */
694 	unsigned int	n;		/* a byte of data for sprintf() */
695 
696 	bp = buf;
697 	j = 0;
698 
699 	/*
700 	 * Design_Note:	fmd_hdl_debug() auto adds a newline if missing;
701 	 *		hence cb exists to accumulate a longer string.
702 	 */
703 
704 	for (i = 1; i <= byte_cnt; i++) {
705 		n = *bp++;
706 		(void) sprintf(&cb[j], "%2.2x ", n);
707 		j += 3;
708 		/* add a newline every 16 bytes or at the buffer's end */
709 		if (((i % 16) == 0) || (i >= byte_cnt)) {
710 			cb[j-1] = '\0';
711 			fmd_hdl_debug(hdl, "%s\n", cb);
712 			j = 0;
713 		}
714 	} /* for each byte in the buffer */
715 
716 } /* etm_hexdump() */
717 
718 /*
719  * etm_sleep - sleep the caller for the given number of seconds,
720  *		return 0 or -errno value
721  *
722  * Design_Note:	To avoid interfering with FMD's signal mask (SIGALRM)
723  *		do not use [Solaris] sleep(3C) and instead use
724  *		pthread_cond_wait() or nanosleep(), both of which
725  *		are POSIX spec-ed to leave signal masks alone.
726  *		This is needed for Solaris and Linux (domain and SP).
727  */
728 
729 static int
730 etm_sleep(unsigned sleep_sec)
731 {
732 	struct timespec	tms;	/* for nanosleep() */
733 
734 	tms.tv_sec = sleep_sec;
735 	tms.tv_nsec = 0;
736 
737 	if (nanosleep(&tms, NULL) < 0) {
738 		/* errno assumed set by above call */
739 		return (-errno);
740 	}
741 	return (0);
742 
743 } /* etm_sleep() */
744 
745 /*
746  * etm_conn_open - open a connection to the given transport address,
747  *		return 0 and the opened connection handle
748  *		or -errno value
749  *
750  * caveats:	the err_substr is used in failure cases for calling
751  *		fmd_hdl_error()
752  */
753 
754 static int
755 etm_conn_open(fmd_hdl_t *hdl, char *err_substr,
756 		etm_xport_addr_t addr, etm_xport_conn_t *connp)
757 {
758 	etm_xport_conn_t	conn;	/* connection to return */
759 	int			nev;	/* -errno value */
760 
761 	if ((conn = etm_xport_open(hdl, addr)) == NULL) {
762 		nev = (-errno);
763 		fmd_hdl_error(hdl, "error: %s: errno %d\n",
764 		    err_substr, errno);
765 		etm_stats.etm_xport_open_fail.fmds_value.ui64++;
766 		return (nev);
767 	} else {
768 		*connp = conn;
769 		return (0);
770 	}
771 } /* etm_conn_open() */
772 
773 /*
774  * etm_conn_close - close the given connection,
775  *		return 0 or -errno value
776  *
777  * caveats:	the err_substr is used in failure cases for calling
778  *		fmd_hdl_error()
779  */
780 
781 static int
782 etm_conn_close(fmd_hdl_t *hdl, char *err_substr, etm_xport_conn_t conn)
783 {
784 	int	nev;	/* -errno value */
785 
786 	if (etm_xport_close(hdl, conn) == NULL) {
787 		nev = (-errno);
788 		fmd_hdl_error(hdl, "warning: %s: errno %d\n",
789 		    err_substr, errno);
790 		etm_stats.etm_xport_close_fail.fmds_value.ui64++;
791 		return (nev);
792 	} else {
793 		return (0);
794 	}
795 } /* etm_conn_close() */
796 
797 /*
798  * etm_io_op - perform an IO operation on the given connection
799  *		with the given buffer,
800  *		accommodating MTU size and retrying op if needed,
801  *		return how many bytes actually done by the op
802  *		or -errno value
803  *
804  * caveats:	the err_substr is used in failure cases for calling
805  *		fmd_hdl_error()
806  */
807 
808 static ssize_t
809 etm_io_op(fmd_hdl_t *hdl, char *err_substr, etm_xport_conn_t conn,
810 				void *buf, size_t byte_cnt, int io_op)
811 {
812 	ssize_t		rv;		/* ret val / byte count */
813 	ssize_t		n;		/* gen use */
814 	uint8_t		*datap;		/* ptr to data */
815 	size_t		mtu_sz;		/* MTU size in bytes */
816 	int		(*io_func_ptr)(fmd_hdl_t *, etm_xport_conn_t,
817 	    void *, size_t);
818 	size_t		io_sz;		/* byte count for io_func_ptr */
819 	int		try_cnt;	/* number of tries done */
820 	int		sleep_sec;	/* exp backoff sleep period in sec */
821 	int		sleep_rv;	/* ret val from sleeping */
822 	fmd_stat_t	io_retry_stat;	/* IO retry stat to update */
823 	fmd_stat_t	io_fail_stat;	/* IO failure stat to update */
824 
825 	if ((conn == NULL) || (buf == NULL)) {
826 		return (-EINVAL);
827 	}
828 	switch (io_op) {
829 	case ETM_IO_OP_RD:
830 		io_func_ptr = etm_xport_read;
831 		io_retry_stat = etm_stats.etm_xport_rd_retry;
832 		io_fail_stat = etm_stats.etm_xport_rd_fail;
833 		break;
834 	case ETM_IO_OP_WR:
835 		io_func_ptr = etm_xport_write;
836 		io_retry_stat = etm_stats.etm_xport_wr_retry;
837 		io_fail_stat = etm_stats.etm_xport_wr_fail;
838 		break;
839 	default:
840 		return (-EINVAL);
841 	}
842 	if (byte_cnt == 0) {
843 		return (byte_cnt);	/* nop */
844 	}
845 
846 	/* obtain [current] MTU size */
847 
848 	if ((n = etm_xport_get_opt(hdl, conn, ETM_XPORT_OPT_MTU_SZ)) < 0) {
849 		mtu_sz = ETM_XPORT_MTU_SZ_DEF;
850 	} else {
851 		mtu_sz = n;
852 	}
853 
854 	/* loop until all IO done, try limit exceeded, or real failure */
855 
856 	rv = 0;
857 	datap = buf;
858 	while (rv < byte_cnt) {
859 		io_sz = MIN((byte_cnt - rv), mtu_sz);
860 		try_cnt = 0;
861 		sleep_sec = 0;
862 
863 		/* when give up, return -errno value even if partly done */
864 
865 		while ((n = (*io_func_ptr)(hdl, conn, datap, io_sz)) ==
866 		    (-EAGAIN)) {
867 			try_cnt++;
868 			if (try_cnt > ETM_TRY_MAX_CNT) {
869 				rv = n;
870 				goto func_ret;
871 			}
872 			if (etm_is_dying) {
873 				rv = (-EINTR);
874 				goto func_ret;
875 			}
876 			if ((sleep_rv = etm_sleep(sleep_sec)) < 0) {
877 				rv = sleep_rv;
878 				goto func_ret;
879 			}
880 			sleep_sec = ((sleep_sec == 0) ? 1 :
881 			    (sleep_sec * ETM_TRY_BACKOFF_RATE));
882 			sleep_sec = MIN(sleep_sec, ETM_TRY_BACKOFF_CAP);
883 			io_retry_stat.fmds_value.ui64++;
884 			if (etm_debug_lvl >= 1) {
885 				fmd_hdl_debug(hdl, "info: retrying io op %d "
886 				    "due to EAGAIN\n", io_op);
887 			}
888 		} /* while trying the io operation */
889 
890 		if (etm_is_dying) {
891 			rv = (-EINTR);
892 			goto func_ret;
893 		}
894 		if (n < 0) {
895 			rv = n;
896 			goto func_ret;
897 		}
898 		/* avoid spinning CPU when given 0 bytes but no error */
899 		if (n == 0) {
900 			if ((sleep_rv = etm_sleep(ETM_SLEEP_QUIK)) < 0) {
901 				rv = sleep_rv;
902 				goto func_ret;
903 			}
904 		}
905 		rv += n;
906 		datap += n;
907 	} /* while still have more data */
908 
909 func_ret:
910 
911 	if (rv < 0) {
912 		io_fail_stat.fmds_value.ui64++;
913 		fmd_hdl_debug(hdl, "error: %s: errno %d\n",
914 		    err_substr, (int)(-rv));
915 	}
916 	if (etm_debug_lvl >= 3) {
917 		fmd_hdl_debug(hdl, "info: io op %d ret %d of %d\n",
918 		    io_op, (int)rv, (int)byte_cnt);
919 	}
920 	return (rv);
921 
922 } /* etm_io_op() */
923 
924 /*
925  * etm_magic_read - read the magic number of an ETM message header
926  *		from the given connection into the given buffer,
927  *		return 0 or -errno value
928  *
929  * Design_Note:	This routine is intended to help protect ETM from protocol
930  *		framing errors as might be caused by an SP reset / crash in
931  *		the middle of an ETM message send; the connection will be
932  *		read from for as many bytes as needed until the magic number
933  *		is found using a sliding buffer for comparisons.
934  */
935 
936 static int
937 etm_magic_read(fmd_hdl_t *hdl, etm_xport_conn_t conn, uint32_t *magic_ptr)
938 {
939 	int		rv;		/* ret val */
940 	uint32_t	magic_num;	/* magic number */
941 	int		byte_cnt;	/* count of bytes read */
942 	uint8_t		buf5[4+1];	/* sliding input buffer */
943 	int		i, j;		/* indices into buf5 */
944 	ssize_t		n;		/* gen use */
945 	uint8_t		drop_buf[1024];	/* dropped bytes buffer */
946 
947 	rv = 0;		/* assume success */
948 	magic_num = 0;
949 	byte_cnt = 0;
950 	j = 0;
951 
952 	/* magic number bytes are sent in network (big endian) order */
953 
954 	while (magic_num != ETM_PROTO_MAGIC_NUM) {
955 		if ((n = etm_io_op(hdl, "bad io read on magic",
956 		    conn, &buf5[j], 1, ETM_IO_OP_RD)) < 0) {
957 			rv = n;
958 			goto func_ret;
959 		}
960 		byte_cnt++;
961 		j = MIN((j + 1), sizeof (magic_num));
962 		if (byte_cnt < sizeof (magic_num)) {
963 			continue;
964 		}
965 
966 		if (byte_cnt > sizeof (magic_num)) {
967 			etm_stats.etm_magic_drop_bytes.fmds_value.ui64++;
968 			i = MIN(byte_cnt - j - 1, sizeof (drop_buf) - 1);
969 			drop_buf[i] = buf5[0];
970 			for (i = 0; i < j; i++) {
971 				buf5[i] = buf5[i+1];
972 			} /* for sliding the buffer contents */
973 		}
974 		(void) memcpy(&magic_num, &buf5[0], sizeof (magic_num));
975 		magic_num = ntohl(magic_num);
976 	} /* for reading bytes until find magic number */
977 
978 func_ret:
979 
980 	if (byte_cnt != sizeof (magic_num)) {
981 		fmd_hdl_debug(hdl, "warning: bad proto frame "
982 		    "implies corrupt/lost msg(s)\n");
983 	}
984 	if ((byte_cnt > sizeof (magic_num)) && (etm_debug_lvl >= 2)) {
985 		i = MIN(byte_cnt - sizeof (magic_num), sizeof (drop_buf));
986 		fmd_hdl_debug(hdl, "info: magic drop hexdump "
987 		    "first %d of %d bytes:\n", i,
988 		    byte_cnt - sizeof (magic_num));
989 		etm_hexdump(hdl, drop_buf, i);
990 	}
991 
992 	if (rv == 0) {
993 		*magic_ptr = magic_num;
994 	}
995 	return (rv);
996 
997 } /* etm_magic_read() */
998 
999 /*
1000  * etm_hdr_read - allocate, read, and validate a [variable sized]
1001  *		ETM message header from the given connection,
1002  *		return the allocated ETM message header
1003  *		(which is guaranteed to be large enough to reuse as a
1004  *		RESPONSE msg hdr) and its size
1005  *		or NULL and set errno on failure
1006  */
1007 
1008 static void *
1009 etm_hdr_read(fmd_hdl_t *hdl, etm_xport_conn_t conn, size_t *szp)
1010 {
1011 	uint8_t			*hdrp;		/* ptr to header to return */
1012 	size_t			hdr_sz;		/* sizeof *hdrp */
1013 	etm_proto_v1_pp_t	pp; 		/* protocol preamble */
1014 	etm_proto_v1_ev_hdr_t	*ev_hdrp;	/* for FMA_EVENT msg */
1015 	etm_proto_v1_ctl_hdr_t	*ctl_hdrp;	/* for CONTROL msg */
1016 	etm_proto_v1_resp_hdr_t *resp_hdrp;	/* for RESPONSE msg */
1017 	etm_proto_v3_sa_hdr_t	*sa_hdrp;	/* for ALERT msg */
1018 	uint32_t		*lenp;		/* ptr to FMA event length */
1019 	ssize_t			i, n;		/* gen use */
1020 	uint8_t	misc_buf[ETM_MISC_BUF_SZ];	/* for var sized hdrs */
1021 	int			dummy_int;	/* dummy var to appease lint */
1022 
1023 	hdrp = NULL; hdr_sz = 0;
1024 
1025 	/* read the magic number which starts the protocol preamble */
1026 
1027 	if ((n = etm_magic_read(hdl, conn, &pp.pp_magic_num)) < 0) {
1028 		errno = (-n);
1029 		etm_stats.etm_magic_bad.fmds_value.ui64++;
1030 		return (NULL);
1031 	}
1032 
1033 	/* read the rest of the protocol preamble all at once */
1034 
1035 	if ((n = etm_io_op(hdl, "bad io read on preamble",
1036 	    conn, &pp.pp_proto_ver, sizeof (pp) - sizeof (pp.pp_magic_num),
1037 	    ETM_IO_OP_RD)) < 0) {
1038 		errno = (-n);
1039 		return (NULL);
1040 	}
1041 
1042 	/*
1043 	 * Design_Note:	The magic number was already network decoded; but
1044 	 *		some other preamble fields also need to be decoded,
1045 	 *		specifically pp_xid and pp_timeout. The rest of the
1046 	 *		preamble fields are byte sized and hence need no
1047 	 *		decoding.
1048 	 */
1049 
1050 	pp.pp_xid = ntohl(pp.pp_xid);
1051 	pp.pp_timeout = ntohl(pp.pp_timeout);
1052 
1053 	/* sanity check the header as best we can */
1054 
1055 	if ((pp.pp_proto_ver < ETM_PROTO_V1) ||
1056 	    (pp.pp_proto_ver > ETM_PROTO_V3)) {
1057 		fmd_hdl_error(hdl, "error: bad proto ver %d\n",
1058 		    (int)pp.pp_proto_ver);
1059 		errno = EPROTO;
1060 		etm_stats.etm_ver_bad.fmds_value.ui64++;
1061 		return (NULL);
1062 	}
1063 
1064 	dummy_int = pp.pp_msg_type;
1065 	if ((dummy_int <= ETM_MSG_TYPE_TOO_LOW) ||
1066 	    (dummy_int >= ETM_MSG_TYPE_TOO_BIG)) {
1067 		fmd_hdl_error(hdl, "error: bad msg type %d", dummy_int);
1068 		errno = EBADMSG;
1069 		etm_stats.etm_msgtype_bad.fmds_value.ui64++;
1070 		return (NULL);
1071 	}
1072 
1073 	/* handle [var sized] hdrs for FMA_EVENT, CONTROL, RESPONSE msgs */
1074 
1075 	if (pp.pp_msg_type == ETM_MSG_TYPE_FMA_EVENT) {
1076 
1077 		ev_hdrp = (void*)&misc_buf[0];
1078 		hdr_sz = sizeof (*ev_hdrp);
1079 		(void) memcpy(&ev_hdrp->ev_pp, &pp, sizeof (pp));
1080 
1081 		/* sanity check the header's timeout */
1082 
1083 		if ((ev_hdrp->ev_pp.pp_proto_ver == ETM_PROTO_V1) &&
1084 		    (ev_hdrp->ev_pp.pp_timeout != ETM_PROTO_V1_TIMEOUT_NONE)) {
1085 			errno = ETIME;
1086 			etm_stats.etm_timeout_bad.fmds_value.ui64++;
1087 			return (NULL);
1088 		}
1089 
1090 		/* get all FMA event lengths from the header */
1091 
1092 		lenp = (uint32_t *)&ev_hdrp->ev_lens[0]; lenp--;
1093 		i = -1;	/* cnt of length entries preceding 0 */
1094 		do {
1095 			i++; lenp++;
1096 			if ((sizeof (*ev_hdrp) + (i * sizeof (*lenp))) >=
1097 			    ETM_MISC_BUF_SZ) {
1098 				errno = E2BIG;	/* ridiculous size */
1099 				etm_stats.etm_evlens_bad.fmds_value.ui64++;
1100 				return (NULL);
1101 			}
1102 			if ((n = etm_io_op(hdl, "bad io read on event len",
1103 			    conn, lenp, sizeof (*lenp), ETM_IO_OP_RD)) < 0) {
1104 				errno = (-n);
1105 				return (NULL);
1106 			}
1107 			*lenp = ntohl(*lenp);
1108 
1109 		} while (*lenp != 0);
1110 		i += 0; /* first len already counted by sizeof(ev_hdr) */
1111 		hdr_sz += (i * sizeof (*lenp));
1112 
1113 		etm_stats.etm_rd_hdr_fmaevent.fmds_value.ui64++;
1114 
1115 	} else if (pp.pp_msg_type == ETM_MSG_TYPE_CONTROL) {
1116 
1117 		ctl_hdrp = (void*)&misc_buf[0];
1118 		hdr_sz = sizeof (*ctl_hdrp);
1119 		(void) memcpy(&ctl_hdrp->ctl_pp, &pp, sizeof (pp));
1120 
1121 		/* sanity check the header's sub type (control selector) */
1122 
1123 		if ((ctl_hdrp->ctl_pp.pp_sub_type <= ETM_CTL_SEL_TOO_LOW) ||
1124 		    (ctl_hdrp->ctl_pp.pp_sub_type >= ETM_CTL_SEL_TOO_BIG)) {
1125 			fmd_hdl_error(hdl, "error: bad ctl sub type %d\n",
1126 			    (int)ctl_hdrp->ctl_pp.pp_sub_type);
1127 			errno = EBADMSG;
1128 			etm_stats.etm_subtype_bad.fmds_value.ui64++;
1129 			return (NULL);
1130 		}
1131 
1132 		/* get the control length */
1133 
1134 		if ((n = etm_io_op(hdl, "bad io read on ctl len",
1135 		    conn, &ctl_hdrp->ctl_len, sizeof (ctl_hdrp->ctl_len),
1136 		    ETM_IO_OP_RD)) < 0) {
1137 			errno = (-n);
1138 			return (NULL);
1139 		}
1140 
1141 		ctl_hdrp->ctl_len = ntohl(ctl_hdrp->ctl_len);
1142 
1143 		etm_stats.etm_rd_hdr_control.fmds_value.ui64++;
1144 
1145 	} else if (pp.pp_msg_type == ETM_MSG_TYPE_RESPONSE) {
1146 
1147 		resp_hdrp = (void*)&misc_buf[0];
1148 		hdr_sz = sizeof (*resp_hdrp);
1149 		(void) memcpy(&resp_hdrp->resp_pp, &pp, sizeof (pp));
1150 
1151 		/* sanity check the header's timeout */
1152 
1153 		if (resp_hdrp->resp_pp.pp_timeout !=
1154 		    ETM_PROTO_V1_TIMEOUT_NONE) {
1155 			errno = ETIME;
1156 			etm_stats.etm_timeout_bad.fmds_value.ui64++;
1157 			return (NULL);
1158 		}
1159 
1160 		/* get the response code and length */
1161 
1162 		if ((n = etm_io_op(hdl, "bad io read on resp code+len",
1163 		    conn, &resp_hdrp->resp_code,
1164 		    sizeof (resp_hdrp->resp_code)
1165 		    + sizeof (resp_hdrp->resp_len),
1166 		    ETM_IO_OP_RD)) < 0) {
1167 			errno = (-n);
1168 			return (NULL);
1169 		}
1170 
1171 		resp_hdrp->resp_code = ntohl(resp_hdrp->resp_code);
1172 		resp_hdrp->resp_len = ntohl(resp_hdrp->resp_len);
1173 
1174 		etm_stats.etm_rd_hdr_response.fmds_value.ui64++;
1175 
1176 	} else if (pp.pp_msg_type == ETM_MSG_TYPE_ALERT) {
1177 
1178 		sa_hdrp = (void*)&misc_buf[0];
1179 		hdr_sz = sizeof (*sa_hdrp);
1180 		(void) memcpy(&sa_hdrp->sa_pp, &pp, sizeof (pp));
1181 
1182 		/* sanity check the header's protocol version */
1183 
1184 		if (sa_hdrp->sa_pp.pp_proto_ver != ETM_PROTO_V3) {
1185 			errno = EPROTO;
1186 			etm_stats.etm_ver_bad.fmds_value.ui64++;
1187 			return (NULL);
1188 		}
1189 
1190 		/* get the priority and length */
1191 
1192 		if ((n = etm_io_op(hdl, "bad io read on sa priority+len",
1193 		    conn, &sa_hdrp->sa_priority,
1194 		    sizeof (sa_hdrp->sa_priority)
1195 		    + sizeof (sa_hdrp->sa_len),
1196 		    ETM_IO_OP_RD)) < 0) {
1197 			errno = (-n);
1198 			return (NULL);
1199 		}
1200 
1201 		sa_hdrp->sa_priority = ntohl(sa_hdrp->sa_priority);
1202 		sa_hdrp->sa_len = ntohl(sa_hdrp->sa_len);
1203 
1204 		etm_stats.etm_rd_hdr_alert.fmds_value.ui64++;
1205 
1206 	} /* whether we have FMA_EVENT, ALERT, CONTROL, or RESPONSE msg */
1207 
1208 	/*
1209 	 * choose a header size that allows hdr reuse for RESPONSE msgs,
1210 	 * allocate and populate the message header, and
1211 	 * return alloc size to caller for later free of hdrp
1212 	 */
1213 
1214 	hdr_sz = MAX(hdr_sz, sizeof (*resp_hdrp));
1215 	hdrp = fmd_hdl_zalloc(hdl, hdr_sz, FMD_SLEEP);
1216 	(void) memcpy(hdrp, misc_buf, hdr_sz);
1217 
1218 	if (etm_debug_lvl >= 3) {
1219 		fmd_hdl_debug(hdl, "info: msg hdr hexdump %d bytes:\n", hdr_sz);
1220 		etm_hexdump(hdl, hdrp, hdr_sz);
1221 	}
1222 	*szp = hdr_sz;
1223 	return (hdrp);
1224 
1225 } /* etm_hdr_read() */
1226 
1227 /*
1228  * etm_hdr_write - create and write a [variable sized] ETM message header
1229  *		to the given connection appropriate for the given FMA event
1230  *		and type of nvlist encoding,
1231  *		return the allocated ETM message header and its size
1232  *		or NULL and set errno on failure
1233  */
1234 
1235 static void*
1236 etm_hdr_write(fmd_hdl_t *hdl, etm_xport_conn_t conn, nvlist_t *evp,
1237 						int encoding, size_t *szp)
1238 {
1239 	etm_proto_v1_ev_hdr_t	*hdrp;		/* for FMA_EVENT msg */
1240 	size_t			hdr_sz;		/* sizeof *hdrp */
1241 	uint32_t		*lenp;		/* ptr to FMA event length */
1242 	size_t			evsz;		/* packed FMA event size */
1243 	ssize_t			n;		/* gen use */
1244 
1245 	/* allocate and populate the message header for 1 FMA event */
1246 
1247 	hdr_sz = sizeof (*hdrp) + (1 * sizeof (hdrp->ev_lens[0]));
1248 
1249 	hdrp = fmd_hdl_zalloc(hdl, hdr_sz, FMD_SLEEP);
1250 
1251 	/*
1252 	 * Design_Note: Although the ETM protocol supports it, we do not (yet)
1253 	 *		want responses/ACKs on FMA events that we send. All
1254 	 *		such messages are sent with ETM_PROTO_V1_TIMEOUT_NONE.
1255 	 */
1256 
1257 	hdrp->ev_pp.pp_magic_num = ETM_PROTO_MAGIC_NUM;
1258 	hdrp->ev_pp.pp_magic_num = htonl(hdrp->ev_pp.pp_magic_num);
1259 	hdrp->ev_pp.pp_proto_ver = ETM_PROTO_V1;
1260 	hdrp->ev_pp.pp_msg_type = ETM_MSG_TYPE_FMA_EVENT;
1261 	hdrp->ev_pp.pp_sub_type = 0;
1262 	hdrp->ev_pp.pp_rsvd_pad = 0;
1263 	hdrp->ev_pp.pp_xid = etm_xid_cur;
1264 	hdrp->ev_pp.pp_xid = htonl(hdrp->ev_pp.pp_xid);
1265 	etm_xid_cur += ETM_XID_INC;
1266 	hdrp->ev_pp.pp_timeout = ETM_PROTO_V1_TIMEOUT_NONE;
1267 	hdrp->ev_pp.pp_timeout = htonl(hdrp->ev_pp.pp_timeout);
1268 
1269 	lenp = &hdrp->ev_lens[0];
1270 
1271 	if ((n = nvlist_size(evp, &evsz, encoding)) != 0) {
1272 		errno = n;
1273 		fmd_hdl_free(hdl, hdrp, hdr_sz);
1274 		etm_stats.etm_os_nvlist_size_fail.fmds_value.ui64++;
1275 		return (NULL);
1276 	}
1277 
1278 	/* indicate 1 FMA event, network encode its length, and 0-terminate */
1279 
1280 	etm_stats.etm_wr_max_ev_per_msg.fmds_value.ui64 = 1;
1281 
1282 	*lenp = evsz; *lenp = htonl(*lenp); lenp++;
1283 	*lenp = 0; *lenp = htonl(*lenp); lenp++;
1284 
1285 	/*
1286 	 * write the network encoded header to the transport, and
1287 	 * return alloc size to caller for later free
1288 	 */
1289 
1290 	if ((n = etm_io_op(hdl, "bad io write on event hdr",
1291 	    conn, hdrp, hdr_sz, ETM_IO_OP_WR)) < 0) {
1292 		errno = (-n);
1293 		fmd_hdl_free(hdl, hdrp, hdr_sz);
1294 		return (NULL);
1295 	}
1296 
1297 	*szp = hdr_sz;
1298 	return (hdrp);
1299 
1300 } /* etm_hdr_write() */
1301 
1302 /*
1303  * etm_post_to_fmd - post the given FMA event to FMD
1304  *			via a FMD transport API call,
1305  *			return 0 or -errno value
1306  *
1307  * caveats:	the FMA event (evp) is freed by FMD,
1308  *		thus callers of this function should
1309  *		immediately discard any ptr they have to the
1310  *		nvlist without freeing or dereferencing it
1311  */
1312 
1313 static int
1314 etm_post_to_fmd(fmd_hdl_t *hdl, fmd_xprt_t *fmd_xprt, nvlist_t *evp)
1315 {
1316 	ssize_t			ev_sz;		/* sizeof *evp */
1317 
1318 	(void) nvlist_size(evp, (size_t *)&ev_sz, NV_ENCODE_XDR);
1319 
1320 	if (etm_debug_lvl >= 2) {
1321 		etm_show_time(hdl, "ante ev post");
1322 	}
1323 	fmd_xprt_post(hdl, fmd_xprt, evp, 0);
1324 	etm_stats.etm_wr_fmd_fmaevent.fmds_value.ui64++;
1325 	etm_stats.etm_wr_fmd_bytes.fmds_value.ui64 += ev_sz;
1326 	if (etm_debug_lvl >= 1) {
1327 		fmd_hdl_debug(hdl, "info: event %p post ok to FMD\n", evp);
1328 	}
1329 	if (etm_debug_lvl >= 2) {
1330 		etm_show_time(hdl, "post ev post");
1331 	}
1332 	return (0);
1333 
1334 } /* etm_post_to_fmd() */
1335 
1336 /*
1337  * Ideally we would just use syslog(3C) for outputting our messages.
1338  * Unfortunately, as this module is running within the FMA daemon context,
1339  * that would create the situation where this module's openlog() would
1340  * have the monopoly on syslog(3C) for the daemon and all its modules.
1341  * To avoid that situation, this module uses the same logic as the
1342  * syslog-msgs FM module to directly call into the log(7D) and sysmsg(7D)
1343  * devices for syslog and console.
1344  */
1345 
1346 static int
1347 etm_post_to_syslog(fmd_hdl_t *hdl, uint32_t priority, uint32_t body_sz,
1348 							uint8_t *body_buf)
1349 {
1350 	char		*sysmessage;	/* Formatted message */
1351 	size_t		formatlen;	/* maximum length of sysmessage */
1352 	struct strbuf	ctl, dat;	/* structs pushed to the logfd */
1353 	uint32_t	msgid;		/* syslog message ID number */
1354 
1355 	if ((syslog_file == 0) && (syslog_cons == 0)) {
1356 		return (0);
1357 	}
1358 
1359 	if (etm_debug_lvl >= 2) {
1360 		etm_show_time(hdl, "ante syslog post");
1361 	}
1362 
1363 	formatlen = body_sz + 64; /* +64 for prefix strings added below */
1364 	sysmessage = fmd_hdl_zalloc(hdl, formatlen, FMD_SLEEP);
1365 
1366 	if (syslog_file) {
1367 		STRLOG_MAKE_MSGID(body_buf, msgid);
1368 		(void) snprintf(sysmessage, formatlen,
1369 		    "SC Alert: [ID %u FACILITY_AND_PRIORITY] %s", msgid,
1370 		    body_buf);
1371 
1372 		syslog_ctl.pri = syslog_facility | priority;
1373 
1374 		ctl.buf = (void *)&syslog_ctl;
1375 		ctl.len = sizeof (syslog_ctl);
1376 
1377 		dat.buf = sysmessage;
1378 		dat.len = strlen(sysmessage) + 1;
1379 
1380 		if (putmsg(syslog_logfd, &ctl, &dat, 0) != 0) {
1381 			fmd_hdl_debug(hdl, "putmsg failed: %s\n",
1382 			    strerror(errno));
1383 			etm_stats.etm_log_err.fmds_value.ui64++;
1384 		}
1385 	}
1386 
1387 	if (syslog_cons) {
1388 		(void) snprintf(sysmessage, formatlen,
1389 		    "SC Alert: %s\r\n", body_buf);
1390 
1391 		dat.buf = sysmessage;
1392 		dat.len = strlen(sysmessage) + 1;
1393 
1394 		if (write(syslog_msgfd, dat.buf, dat.len) != dat.len) {
1395 			fmd_hdl_debug(hdl, "write failed: %s\n",
1396 			    strerror(errno));
1397 			etm_stats.etm_msg_err.fmds_value.ui64++;
1398 		}
1399 	}
1400 
1401 	fmd_hdl_free(hdl, sysmessage, formatlen);
1402 
1403 	if (etm_debug_lvl >= 2) {
1404 		etm_show_time(hdl, "post syslog post");
1405 	}
1406 
1407 	return (0);
1408 }
1409 
1410 
1411 /*
1412  * etm_req_ver_negot - send an ETM control message to the other end requesting
1413  *			that the ETM protocol version be negotiated/set
1414  */
1415 
1416 static void
1417 etm_req_ver_negot(fmd_hdl_t *hdl)
1418 {
1419 	etm_xport_addr_t	*addrv;		/* default dst addr(s) */
1420 	etm_xport_conn_t	conn;		/* connection to other end */
1421 	etm_proto_v1_ctl_hdr_t	*ctl_hdrp;	/* for CONTROL msg */
1422 	size_t			hdr_sz;		/* sizeof header */
1423 	uint8_t			*body_buf;	/* msg body buffer */
1424 	uint32_t		body_sz;	/* sizeof *body_buf */
1425 	ssize_t			i;		/* gen use */
1426 
1427 	/* populate an ETM control msg to send */
1428 
1429 	hdr_sz = sizeof (*ctl_hdrp);
1430 	body_sz = (3 + 1);		/* version bytes plus null byte */
1431 
1432 	ctl_hdrp = fmd_hdl_zalloc(hdl, hdr_sz + body_sz, FMD_SLEEP);
1433 
1434 	ctl_hdrp->ctl_pp.pp_magic_num = htonl(ETM_PROTO_MAGIC_NUM);
1435 	ctl_hdrp->ctl_pp.pp_proto_ver = ETM_PROTO_V1;
1436 	ctl_hdrp->ctl_pp.pp_msg_type = ETM_MSG_TYPE_CONTROL;
1437 	ctl_hdrp->ctl_pp.pp_sub_type = ETM_CTL_SEL_VER_NEGOT_REQ;
1438 	ctl_hdrp->ctl_pp.pp_rsvd_pad = 0;
1439 	etm_xid_ver_negot = etm_xid_cur;
1440 	etm_xid_cur += ETM_XID_INC;
1441 	ctl_hdrp->ctl_pp.pp_xid = htonl(etm_xid_ver_negot);
1442 	ctl_hdrp->ctl_pp.pp_timeout = htonl(ETM_PROTO_V1_TIMEOUT_FOREVER);
1443 	ctl_hdrp->ctl_len = htonl(body_sz);
1444 
1445 	body_buf = (void*)&ctl_hdrp->ctl_len;
1446 	body_buf += sizeof (ctl_hdrp->ctl_len);
1447 	*body_buf++ = ETM_PROTO_V3;
1448 	*body_buf++ = ETM_PROTO_V2;
1449 	*body_buf++ = ETM_PROTO_V1;
1450 	*body_buf++ = '\0';
1451 
1452 	/*
1453 	 * open and close a connection to send the ETM control msg
1454 	 * to any/all of the default dst addrs
1455 	 */
1456 
1457 	if ((addrv = etm_xport_get_ev_addrv(hdl, NULL)) == NULL) {
1458 		fmd_hdl_error(hdl,
1459 		    "error: bad ctl dst addrs errno %d\n", errno);
1460 		etm_stats.etm_xport_get_ev_addrv_fail.fmds_value.ui64++;
1461 		goto func_ret;
1462 	}
1463 
1464 	for (i = 0; addrv[i] != NULL; i++) {
1465 
1466 		if (etm_conn_open(hdl, "bad conn open during ver negot",
1467 		    addrv[i], &conn) < 0) {
1468 			continue;
1469 		}
1470 		if (etm_io_op(hdl, "bad io write on ctl hdr+body",
1471 		    conn, ctl_hdrp, hdr_sz + body_sz, ETM_IO_OP_WR) >= 0) {
1472 			etm_stats.etm_wr_hdr_control.fmds_value.ui64++;
1473 			etm_stats.etm_wr_body_control.fmds_value.ui64++;
1474 		}
1475 		(void) etm_conn_close(hdl, "bad conn close during ver negot",
1476 		    conn);
1477 
1478 	} /* foreach dst addr */
1479 
1480 func_ret:
1481 
1482 	if (addrv != NULL) {
1483 		etm_xport_free_addrv(hdl, addrv);
1484 	}
1485 	fmd_hdl_free(hdl, ctl_hdrp, hdr_sz + body_sz);
1486 
1487 } /* etm_req_ver_negot() */
1488 
1489 
1490 
1491 /*
1492  * etm_iosvc_msg_enq - add element to tail of ETM iosvc msg queue
1493  * etm_iosvc_msg_deq - del element from head of ETM iosvc msg  queue
1494  * need to grab the mutex lock before calling this routine
1495  * return >0 for success, or -errno value
1496  */
1497 static int
1498 etm_iosvc_msg_enq(fmd_hdl_t *hdl, etm_iosvc_t *iosvc, etm_iosvc_q_ele_t *msgp)
1499 {
1500 	etm_iosvc_q_ele_t		*newp;	/* ptr to new msg q ele */
1501 
1502 	if (iosvc->msg_q_cur_len >= iosvc->msg_q_max_len) {
1503 		fmd_hdl_debug(hdl, "warning: enq to full msg queue\n");
1504 		return (-E2BIG);
1505 	}
1506 
1507 	newp = fmd_hdl_zalloc(hdl, sizeof (*newp), FMD_SLEEP);
1508 	(void) memcpy(newp, msgp, sizeof (*newp));
1509 	newp->msg_nextp = NULL;
1510 
1511 	if (iosvc->msg_q_cur_len == 0) {
1512 		iosvc->msg_q_head = newp;
1513 	} else {
1514 		iosvc->msg_q_tail->msg_nextp = newp;
1515 	}
1516 
1517 	iosvc->msg_q_tail = newp;
1518 	iosvc->msg_q_cur_len++;
1519 	fmd_hdl_debug(hdl, "info: current msg queue length %d\n",
1520 	    iosvc->msg_q_cur_len);
1521 
1522 	return (1);
1523 
1524 } /* etm_iosvc_msg_enq() */
1525 
1526 static int
1527 etm_iosvc_msg_deq(fmd_hdl_t *hdl, etm_iosvc_t *iosvc, etm_iosvc_q_ele_t *msgp)
1528 {
1529 	etm_iosvc_q_ele_t	*oldp;	/* ptr to old msg q ele */
1530 
1531 	if (iosvc->msg_q_cur_len == 0) {
1532 		fmd_hdl_debug(hdl, "warning: deq from empty responder queue\n");
1533 		return (-ENOENT);
1534 	}
1535 
1536 	(void) memcpy(msgp, iosvc->msg_q_head, sizeof (*msgp));
1537 	msgp->msg_nextp = NULL;
1538 
1539 	oldp = iosvc->msg_q_head;
1540 	iosvc->msg_q_head = iosvc->msg_q_head->msg_nextp;
1541 
1542 	/*
1543 	 * free the mem alloc-ed in etm_iosvc_msg_enq()
1544 	 */
1545 	fmd_hdl_free(hdl, oldp, sizeof (*oldp));
1546 
1547 	iosvc->msg_q_cur_len--;
1548 	if (iosvc->msg_q_cur_len == 0) {
1549 		iosvc->msg_q_tail = NULL;
1550 	}
1551 
1552 	return (1);
1553 
1554 } /* etm_iosvc_msg_deq() */
1555 
1556 
1557 /*
1558  * etm_msg_enq_head():
1559  * enq the msg to the head of the Q.
1560  * If the Q is full, drop the msg at the tail then enq the msg at head.
1561  * need to grab mutex lock iosvc->msg_q_lock before calling this routine.
1562  */
1563 static void
1564 etm_msg_enq_head(fmd_hdl_t *fmd_hdl, etm_iosvc_t *iosvc,
1565     etm_iosvc_q_ele_t *msg_ele)
1566 {
1567 
1568 	etm_iosvc_q_ele_t	*newp;	/* iosvc msg ele ptr */
1569 
1570 	if (iosvc->msg_q_cur_len >= iosvc->msg_q_max_len) {
1571 		fmd_hdl_debug(fmd_hdl,
1572 		    "warning: add to head of a full msg queue."
1573 		    " Drop the msg at the tail\n");
1574 		/*
1575 		 * drop the msg at the tail
1576 		 */
1577 		newp = iosvc->msg_q_head;
1578 		while (newp->msg_nextp != iosvc->msg_q_tail) {
1579 			newp = newp->msg_nextp;
1580 		}
1581 
1582 		/*
1583 		 * free the msg in iosvc->msg_q_tail->msg
1584 		 * free the mem pointed to by iosvc->msg_q_tail
1585 		 */
1586 		fmd_hdl_free(fmd_hdl, iosvc->msg_q_tail->msg,
1587 		    iosvc->msg_q_tail->msg_size);
1588 		fmd_hdl_free(fmd_hdl, iosvc->msg_q_tail, sizeof (*newp));
1589 		iosvc->msg_q_tail = newp;
1590 		iosvc->msg_q_tail->msg_nextp = NULL;
1591 		iosvc->msg_q_cur_len--;
1592 	}
1593 
1594 	/*
1595 	 * enq the msg to the head
1596 	 */
1597 	newp = fmd_hdl_zalloc(fmd_hdl, sizeof (*newp), FMD_SLEEP);
1598 	(void) memcpy(newp, msg_ele, sizeof (*newp));
1599 	if (iosvc->msg_q_cur_len == 0) {
1600 		newp->msg_nextp = NULL;
1601 		iosvc->msg_q_tail = newp;
1602 	} else {
1603 		newp->msg_nextp = iosvc->msg_q_head;
1604 	}
1605 	iosvc->msg_q_head = newp;
1606 	iosvc->msg_q_cur_len++;
1607 } /* etm_msg_enq_head() */
1608 
1609 /*
1610  * etm_isovc_cleanup():
1611  * clean up what's in the passed-in iosvc struct, including the msg Q.
1612  */
1613 static void
1614 etm_iosvc_cleanup(fmd_hdl_t *fmd_hdl, etm_iosvc_t *iosvc)
1615 {
1616 
1617 	etm_iosvc_q_ele_t	msg_ele;	/* io svc msg Q ele */
1618 
1619 	iosvc->thr_is_dying = 1;
1620 
1621 	if (iosvc->send_tid != NULL) {
1622 		fmd_thr_signal(fmd_hdl, iosvc->send_tid);
1623 		fmd_thr_destroy(fmd_hdl, iosvc->send_tid);
1624 		iosvc->send_tid = NULL;
1625 	} /* if io svc send thread was created ok */
1626 
1627 	if (iosvc->recv_tid != NULL) {
1628 		fmd_thr_signal(fmd_hdl, iosvc->recv_tid);
1629 		fmd_thr_destroy(fmd_hdl, iosvc->recv_tid);
1630 		iosvc->recv_tid = NULL;
1631 	} /* if root domain recv thread was created */
1632 
1633 	iosvc->ldom_name[0] = '\0';
1634 
1635 	iosvc->ds_hdl = DS_INVALID_HDL;
1636 
1637 	if (iosvc->fmd_xprt != NULL) {
1638 		fmd_xprt_close(fmd_hdl, iosvc->fmd_xprt);
1639 		iosvc->fmd_xprt = NULL;
1640 	} /* if fmd-xprt has been opened */
1641 
1642 	(void) pthread_mutex_lock(&iosvc->msg_q_lock);
1643 	while (iosvc->msg_q_cur_len > 0) {
1644 		(void) etm_iosvc_msg_deq(fmd_hdl, iosvc, &msg_ele);
1645 		fmd_hdl_free(fmd_hdl, msg_ele.msg, msg_ele.msg_size);
1646 	}
1647 	(void) pthread_mutex_unlock(&iosvc->msg_q_lock);
1648 
1649 	return;
1650 
1651 } /* etm_iosvc_cleanup() */
1652 
1653 /*
1654  * etm_iosvc_lookup(using ldom_name or ds_hdl when ldom_name is empty)
1655  * not found, create one, add to iosvc_list
1656  */
1657 etm_iosvc_t *
1658 etm_iosvc_lookup(fmd_hdl_t *fmd_hdl, char *ldom_name, ds_hdl_t ds_hdl,
1659     boolean_t iosvc_create)
1660 {
1661 	uint32_t		i;			/* for loop var */
1662 	int32_t			first_empty_slot = -1;	/* remember that */
1663 
1664 	for (i = 0; i < NUM_OF_ROOT_DOMAINS; i++) {
1665 		if (ldom_name[0] == '\0') {
1666 			/*
1667 			 * search by hdl passed in
1668 			 * the only time this is used is at ds_unreg_cb time.
1669 			 * there is no ldom name, only the valid ds_hdl.
1670 			 * find an iosvc with the matching ds_hdl.
1671 			 * ignore the iosvc_create flag, should never need to
1672 			 * create an iosvc for ds_unreg_cb
1673 			 */
1674 			if (ds_hdl == iosvc_list[i].ds_hdl) {
1675 				if (etm_debug_lvl >= 2) {
1676 				fmd_hdl_debug(fmd_hdl,
1677 			    "info: found an iosvc at slot %d w/ ds_hdl %d \n",
1678 				    i, iosvc_list[i].ds_hdl);
1679 				}
1680 				if (iosvc_list[i].ldom_name[0] != '\0')
1681 					if (etm_debug_lvl >= 2) {
1682 						fmd_hdl_debug(fmd_hdl,
1683 				    "info: found an iosvc w/ ldom_name %s \n",
1684 						    iosvc_list[i].ldom_name);
1685 				}
1686 				return (&iosvc_list[i]);
1687 			} else {
1688 				continue;
1689 			}
1690 		} else if (iosvc_list[i].ldom_name[0] != '\0') {
1691 			/*
1692 			 * this is  an non-empty iosvc structure slot
1693 			 */
1694 			if (strcmp(ldom_name, iosvc_list[i].ldom_name) == 0) {
1695 				/*
1696 				 * found an iosvc structure that matches the
1697 				 * passed in ldom_name, return the ptr
1698 				 */
1699 				if (etm_debug_lvl >= 2) {
1700 					fmd_hdl_debug(fmd_hdl, "info: found an "
1701 					    "iosvc at slot %d w/ ds_hdl %d \n",
1702 					    i, iosvc_list[i].ds_hdl);
1703 					fmd_hdl_debug(fmd_hdl, "info: found an "
1704 					    "iosvc w/ ldom_name %s \n",
1705 					    iosvc_list[i].ldom_name);
1706 				}
1707 				return (&iosvc_list[i]);
1708 			} else {
1709 				/*
1710 				 * non-empty slot with no-matching name,
1711 				 * move on to next slot.
1712 				 */
1713 				continue;
1714 			}
1715 		} else {
1716 			/*
1717 			 * found the 1st slot with ldom name being empty
1718 			 * remember the slot #, will be used for creating one
1719 			 */
1720 			if (first_empty_slot == -1) {
1721 				first_empty_slot = i;
1722 			}
1723 		}
1724 	}
1725 	if (iosvc_create == B_TRUE && first_empty_slot >= 0) {
1726 		/*
1727 		 * this is the case we need to add an iosvc at first_empty_slot
1728 		 * for the ldom_name at iosvc_list[first_empty_slot]
1729 		 */
1730 		fmd_hdl_debug(fmd_hdl,
1731 		    "info: create an iosvc with ldom name %s\n",
1732 		    ldom_name);
1733 		i = first_empty_slot;
1734 		(void) memcpy(&iosvc_list[i], &io_svc, sizeof (etm_iosvc_t));
1735 		(void) strcpy(iosvc_list[i].ldom_name, ldom_name);
1736 		fmd_hdl_debug(fmd_hdl, "info: iosvc #%d has ldom name %s\n",
1737 		    i, iosvc_list[i].ldom_name);
1738 		return (&iosvc_list[i]);
1739 	} else {
1740 		return (NULL);
1741 	}
1742 
1743 } /* etm_iosvc_lookup() */
1744 
1745 
1746 /*
1747  * etm_ckpt_remove:
1748  * remove the ckpt for the iosvc element
1749  */
1750 static void
1751 etm_ckpt_remove(fmd_hdl_t *hdl, etm_iosvc_q_ele_t *ele) {
1752 	int		err;			/* temp error */
1753 	nvlist_t	*evp = NULL;		/* event pointer */
1754 	etm_proto_v1_ev_hdr_t	*hdrp;		/* hdr for FMA_EVENT */
1755 	char		*buf;			/* packed event pointer */
1756 
1757 	if ((ele->ckpt_flag == ETM_CKPT_NOOP) ||
1758 	    (etm_ldom_type != LDOM_TYPE_CONTROL)) {
1759 		return;
1760 	}
1761 
1762 	/* the pointer to the packed event in the etm message */
1763 	hdrp = (etm_proto_v1_ev_hdr_t *)((ptrdiff_t)ele->msg);
1764 	buf = (char *)((ptrdiff_t)hdrp + sizeof (*hdrp)
1765 	    + (1 * sizeof (hdrp->ev_lens[0])));
1766 
1767 	/* unpack it, then uncheckpoited it */
1768 	if ((err = nvlist_unpack(buf, hdrp->ev_lens[0], &evp, 0)) != 0) {
1769 		fmd_hdl_debug(hdl, "failed to unpack event(rc=%d)\n", err);
1770 		return;
1771 	}
1772 	(void) etm_ckpt_delete(hdl, evp);
1773 	nvlist_free(evp);
1774 }
1775 
1776 /*
1777  * etm_send_ds_msg()
1778  * call ds_send_msg() to send the msg passed in.
1779  * timedcond_wait for the ACK to come back.
1780  * if the ACK doesn't come in the specified time, retrun -EAGAIN.
1781  * other wise, return 1.
1782  */
1783 int
1784 etm_send_ds_msg(fmd_hdl_t *fmd_hdl, boolean_t ckpt_remove, etm_iosvc_t *iosvc,
1785     etm_iosvc_q_ele_t *msg_ele, etm_proto_v1_ev_hdr_t *evhdrp)
1786 {
1787 	uint32_t		rc;		/* for return code  */
1788 
1789 	struct timeval		tv;
1790 	struct timespec		timeout;
1791 
1792 
1793 	/*
1794 	 * call ds_send_msg(). Return (-EAGAIN) if not successful
1795 	 */
1796 	if ((rc = (*etm_ds_send_msg)(iosvc->ds_hdl, msg_ele->msg,
1797 	    msg_ele->msg_size)) != 0) {
1798 		fmd_hdl_debug(fmd_hdl, "info: ds_send_msg rc %d xid %d\n",
1799 		    rc, evhdrp->ev_pp.pp_xid);
1800 			return (-EAGAIN);
1801 	}
1802 
1803 	/*
1804 	 * wait on the cv for resp msg for cur_send_xid
1805 	 */
1806 	(void *) pthread_mutex_lock(&iosvc->msg_ack_lock);
1807 
1808 	(void) gettimeofday(&tv, 0);
1809 	timeout.tv_sec = tv.tv_sec + etm_fma_resp_wait_time;
1810 	timeout.tv_nsec = 0;
1811 
1812 	fmd_hdl_debug(fmd_hdl, "info: waiting on msg_ack_cv for ldom %s\n",
1813 	    iosvc->ldom_name);
1814 	rc = pthread_cond_timedwait(&iosvc->msg_ack_cv, &iosvc->msg_ack_lock,
1815 	    &timeout);
1816 	(void *) pthread_mutex_unlock(&iosvc->msg_ack_lock);
1817 	fmd_hdl_debug(fmd_hdl,  "info: msg_ack_cv returns with rc %d\n", rc);
1818 
1819 	/*
1820 	 * check to see if ack_ok is non-zero
1821 	 * if non-zero, resp msg has been received
1822 	 */
1823 	if (iosvc->ack_ok != 0) {
1824 		/*
1825 		 * ACK came ok,  this send is successful,
1826 		 * tell the caller ready to send next.
1827 		 * free mem alloc-ed in
1828 		 * etm_pack_ds_msg
1829 		 */
1830 		if (ckpt_remove == B_TRUE &&
1831 		    etm_ldom_type == LDOM_TYPE_CONTROL) {
1832 			etm_ckpt_remove(fmd_hdl, msg_ele);
1833 		}
1834 		fmd_hdl_free(fmd_hdl, msg_ele->msg, msg_ele->msg_size);
1835 		iosvc->cur_send_xid++;
1836 		return (1);
1837 	} else {
1838 		/*
1839 		 * the ACK did not come on time
1840 		 * tell the caller to resend cur_send_xid
1841 		 */
1842 		return (-EAGAIN);
1843 	} /* iosvc->ack_ok != 0 */
1844 } /* etm_send_ds_msg() */
1845 
1846 /*
1847  * both events from fmdo_send entry point and from SP are using the
1848  * etm_proto_v1_ev_hdr_t as its header and it will be the same header for all
1849  * ds send/recv msgs.
1850  * Idealy, we should use the hdr coming with the SP FMA event. Since fmdo_send
1851  * entry point can be called before FMA events from SP, we can't rely on
1852  * the SP FMA event hdr. Use the static hdr for packing ds msgs for fmdo_send
1853  * events.
1854  * return >0 for success, or -errno value
1855  * Design assumption: there is one FMA event per ds msg
1856  */
1857 int
1858 etm_pack_ds_msg(fmd_hdl_t *fmd_hdl, etm_iosvc_t *iosvc,
1859 	etm_proto_v1_ev_hdr_t *ev_hdrp, size_t hdr_sz, nvlist_t *evp,
1860 	etm_pack_msg_type_t msg_type, uint_t ckpt_opt)
1861 {
1862 	etm_proto_v1_ev_hdr_t	*hdrp;		/* for FMA_EVENT msg */
1863 	uint32_t		*lenp;		/* ptr to FMA event length */
1864 	size_t			evsz;		/* packed FMA event size */
1865 	char 			*buf;
1866 	uint32_t		rc;		/* for return code  */
1867 	char 			*msg;		/* body of msg to be Qed */
1868 
1869 	etm_iosvc_q_ele_t	msg_ele;	/* io svc msg Q ele */
1870 	etm_proto_v1_ev_hdr_t	*evhdrp;
1871 
1872 
1873 	if (ev_hdrp == NULL) {
1874 		hdrp = &iosvc_hdr;
1875 	} else {
1876 		hdrp = ev_hdrp;
1877 	}
1878 
1879 	/*
1880 	 * determine hdr_sz if 0, otherwise use the one passed in hdr_sz
1881 	 */
1882 
1883 	if (hdr_sz == 0) {
1884 		hdr_sz = sizeof (*hdrp) + (1 * sizeof (hdrp->ev_lens[0]));
1885 	}
1886 
1887 	/*
1888 	 * determine evp size
1889 	 */
1890 	(void) nvlist_size(evp, &evsz, NV_ENCODE_XDR);
1891 
1892 	/* indicate 1 FMA event, no network encoding, and 0-terminate */
1893 	lenp = &hdrp->ev_lens[0];
1894 	*lenp = evsz;
1895 
1896 	/*
1897 	 * now the total of mem needs to be alloc-ed/ds msg size is
1898 	 * hdr_sz + evsz
1899 	 * msg will be freed in etm_send_to_remote_root() after ds_send_msg()
1900 	 */
1901 	msg = fmd_hdl_zalloc(fmd_hdl, hdr_sz + evsz, FMD_SLEEP);
1902 
1903 
1904 	/*
1905 	 * copy hdr, 0 terminate the length vector,  and then evp
1906 	 */
1907 	(void) memcpy(msg, hdrp, sizeof (*hdrp));
1908 	hdrp = (etm_proto_v1_ev_hdr_t *)((ptrdiff_t)msg);
1909 	lenp = &hdrp->ev_lens[0];
1910 	lenp++;
1911 	*lenp = 0;
1912 
1913 	buf = fmd_hdl_zalloc(fmd_hdl, evsz, FMD_SLEEP);
1914 	(void) nvlist_pack(evp, (char **)&buf, &evsz, NV_ENCODE_XDR, 0);
1915 	(void) memcpy(msg + hdr_sz, buf, evsz);
1916 	fmd_hdl_free(fmd_hdl, buf, evsz);
1917 
1918 	fmd_hdl_debug(fmd_hdl, "info: hdr_sz= %d evsz= %d in etm_pack_ds_msg"
1919 	    "for ldom %s\n", hdr_sz, evsz, iosvc->ldom_name);
1920 	msg_ele.msg = msg;
1921 	msg_ele.msg_size = hdr_sz + evsz;
1922 	msg_ele.ckpt_flag = ckpt_opt;
1923 
1924 	/*
1925 	 * decide what to do with the msg:
1926 	 * if SP ereports (msg_type == SP_MSG), always enq the msg
1927 	 * if not SP ereports, ie, fmd xprt control msgs, enq it _only_ after
1928 	 * resource.fm.xprt.run has been sent (which sets start_sending_Q to 1)
1929 	 */
1930 	if ((msg_type == SP_MSG) ||
1931 	    (msg_type != SP_MSG) && (iosvc->start_sending_Q == 1)) {
1932 		/*
1933 		 * this is the case when the msg needs to be enq-ed
1934 		 */
1935 		(void) pthread_mutex_lock(&iosvc->msg_q_lock);
1936 		rc = etm_iosvc_msg_enq(fmd_hdl, iosvc, &msg_ele);
1937 		if ((rc > 0) && (ckpt_opt & ETM_CKPT_SAVE) &&
1938 		    (etm_ldom_type == LDOM_TYPE_CONTROL)) {
1939 			(void) etm_ckpt_add(fmd_hdl, evp);
1940 		}
1941 		if (iosvc->msg_q_cur_len == 1)
1942 			(void) pthread_cond_signal(&iosvc->msg_q_cv);
1943 		(void) pthread_mutex_unlock(&iosvc->msg_q_lock);
1944 	} else {
1945 		/*
1946 		 * fmd RDWR xprt procotol startup msgs, send it now!
1947 		 */
1948 		iosvc->ack_ok = 0;
1949 		evhdrp = (etm_proto_v1_ev_hdr_t *)((ptrdiff_t)msg_ele.msg);
1950 		evhdrp->ev_pp.pp_xid = iosvc->cur_send_xid + 1;
1951 		while (!iosvc->ack_ok && iosvc->ds_hdl != DS_INVALID_HDL &&
1952 		    !etm_is_dying) {
1953 			if (etm_send_ds_msg(fmd_hdl, B_FALSE, iosvc, &msg_ele,
1954 			    evhdrp) < 0) {
1955 				continue;
1956 			}
1957 		}
1958 		if (msg_type == FMD_XPRT_RUN_MSG)
1959 			iosvc->start_sending_Q = 1;
1960 	}
1961 
1962 	return (rc);
1963 
1964 } /* etm_pack_ds_msg() */
1965 
1966 /*
1967  * Design_Note:	For all etm_resp_q_*() functions and etm_resp_q_* globals,
1968  *		the mutex etm_resp_q_lock must be held by the caller.
1969  */
1970 
1971 /*
1972  * etm_resp_q_enq - add element to tail of ETM responder queue
1973  * etm_resp_q_deq - del element from head of ETM responder queue
1974  *
1975  * return >0 for success, or -errno value
1976  */
1977 
1978 static int
1979 etm_resp_q_enq(fmd_hdl_t *hdl, etm_resp_q_ele_t *rqep)
1980 {
1981 	etm_resp_q_ele_t	*newp;	/* ptr to new resp q ele */
1982 
1983 	if (etm_resp_q_cur_len >= etm_resp_q_max_len) {
1984 		fmd_hdl_debug(hdl, "warning: enq to full responder queue\n");
1985 		etm_stats.etm_enq_drop_resp_q.fmds_value.ui64++;
1986 		return (-E2BIG);
1987 	}
1988 
1989 	newp = fmd_hdl_zalloc(hdl, sizeof (*newp), FMD_SLEEP);
1990 	(void) memcpy(newp, rqep, sizeof (*newp));
1991 	newp->rqe_nextp = NULL;
1992 
1993 	if (etm_resp_q_cur_len == 0) {
1994 		etm_resp_q_head = newp;
1995 	} else {
1996 		etm_resp_q_tail->rqe_nextp = newp;
1997 	}
1998 	etm_resp_q_tail = newp;
1999 	etm_resp_q_cur_len++;
2000 	etm_stats.etm_resp_q_cur_len.fmds_value.ui64 = etm_resp_q_cur_len;
2001 
2002 	return (1);
2003 
2004 } /* etm_resp_q_enq() */
2005 
2006 static int
2007 etm_resp_q_deq(fmd_hdl_t *hdl, etm_resp_q_ele_t *rqep)
2008 {
2009 	etm_resp_q_ele_t	*oldp;	/* ptr to old resp q ele */
2010 
2011 	if (etm_resp_q_cur_len == 0) {
2012 		fmd_hdl_debug(hdl, "warning: deq from empty responder queue\n");
2013 		etm_stats.etm_deq_drop_resp_q.fmds_value.ui64++;
2014 		return (-ENOENT);
2015 	}
2016 
2017 	(void) memcpy(rqep, etm_resp_q_head, sizeof (*rqep));
2018 	rqep->rqe_nextp = NULL;
2019 
2020 	oldp = etm_resp_q_head;
2021 	etm_resp_q_head = etm_resp_q_head->rqe_nextp;
2022 	fmd_hdl_free(hdl, oldp, sizeof (*oldp));
2023 
2024 	etm_resp_q_cur_len--;
2025 	etm_stats.etm_resp_q_cur_len.fmds_value.ui64 = etm_resp_q_cur_len;
2026 	if (etm_resp_q_cur_len == 0) {
2027 		etm_resp_q_tail = NULL;
2028 	}
2029 
2030 	return (1);
2031 
2032 } /* etm_resp_q_deq() */
2033 
2034 /*
2035  * etm_maybe_enq_response - check the given message header to see
2036  *				whether a response has been requested,
2037  *				if so then enqueue the given connection
2038  *				and header for later transport by the
2039  *				responder thread as an ETM response msg,
2040  *				return 0 for nop, >0 success, or -errno value
2041  */
2042 
2043 static ssize_t
2044 etm_maybe_enq_response(fmd_hdl_t *hdl, etm_xport_conn_t conn,
2045     void *hdrp, uint32_t hdr_sz, int32_t resp_code)
2046 {
2047 	ssize_t			rv;		/* ret val */
2048 	etm_proto_v1_pp_t	*ppp;		/* protocol preamble ptr */
2049 	uint8_t			orig_msg_type;	/* orig hdr's message type */
2050 	uint32_t		orig_timeout;	/* orig hdr's timeout */
2051 	etm_resp_q_ele_t	rqe;		/* responder queue ele */
2052 
2053 	ppp = hdrp;
2054 	orig_msg_type = ppp->pp_msg_type;
2055 	orig_timeout = ppp->pp_timeout;
2056 
2057 	/* bail out now if no response is to be sent */
2058 
2059 	if (orig_timeout == ETM_PROTO_V1_TIMEOUT_NONE) {
2060 		return (0);
2061 	} /* if a nop */
2062 
2063 	if ((orig_msg_type != ETM_MSG_TYPE_FMA_EVENT) &&
2064 	    (orig_msg_type != ETM_MSG_TYPE_ALERT) &&
2065 	    (orig_msg_type != ETM_MSG_TYPE_CONTROL)) {
2066 		fmd_hdl_debug(hdl, "warning: bad msg type 0x%x\n",
2067 		    orig_msg_type);
2068 		return (-EINVAL);
2069 	} /* if inappropriate hdr for a response msg */
2070 
2071 	/*
2072 	 * enqueue the msg hdr and nudge the responder thread
2073 	 * if the responder queue was previously empty
2074 	 */
2075 
2076 	rqe.rqe_conn = conn;
2077 	rqe.rqe_hdrp = hdrp;
2078 	rqe.rqe_hdr_sz = hdr_sz;
2079 	rqe.rqe_resp_code = resp_code;
2080 
2081 	(void) pthread_mutex_lock(&etm_resp_q_lock);
2082 	rv = etm_resp_q_enq(hdl, &rqe);
2083 	if (etm_resp_q_cur_len == 1)
2084 		(void) pthread_cond_signal(&etm_resp_q_cv);
2085 	(void) pthread_mutex_unlock(&etm_resp_q_lock);
2086 
2087 	return (rv);
2088 
2089 } /* etm_maybe_enq_response() */
2090 
2091 /*
2092  * Design_Note:	We rely on the fact that all message types have
2093  *		a common protocol preamble; if this fact should
2094  *		ever change it may break the code below. We also
2095  *		rely on the fact that FMA_EVENT and CONTROL headers
2096  *		returned by etm_hdr_read() will be sized large enough
2097  *		to reuse them as RESPONSE headers if the remote endpt
2098  *		asked for a response via the pp_timeout field.
2099  */
2100 
2101 /*
2102  * etm_send_response - use the given message header and response code
2103  *			to construct an appropriate response message,
2104  *			and send it back on the given connection,
2105  *			return >0 for success, or -errno value
2106  */
2107 
2108 static ssize_t
2109 etm_send_response(fmd_hdl_t *hdl, etm_xport_conn_t conn,
2110     void *hdrp, int32_t resp_code)
2111 {
2112 	ssize_t			rv;		/* ret val */
2113 	etm_proto_v1_pp_t	*ppp;		/* protocol preamble ptr */
2114 	etm_proto_v1_resp_hdr_t *resp_hdrp;	/* for RESPONSE msg */
2115 	uint8_t			resp_body[4];	/* response body if needed */
2116 	uint8_t			*resp_msg;	/* response hdr+body */
2117 	size_t			hdr_sz;		/* sizeof response hdr */
2118 	uint8_t			orig_msg_type;	/* orig hdr's message type */
2119 
2120 	ppp = hdrp;
2121 	orig_msg_type = ppp->pp_msg_type;
2122 
2123 	if (etm_debug_lvl >= 2) {
2124 		etm_show_time(hdl, "ante resp send");
2125 	}
2126 
2127 	/* reuse the given header as a response header */
2128 
2129 	resp_hdrp = hdrp;
2130 	resp_hdrp->resp_code = resp_code;
2131 	resp_hdrp->resp_len = 0;		/* default is empty body */
2132 
2133 	if ((orig_msg_type == ETM_MSG_TYPE_CONTROL) &&
2134 	    (ppp->pp_sub_type == ETM_CTL_SEL_VER_NEGOT_REQ)) {
2135 		resp_body[0] = ETM_PROTO_V2;
2136 		resp_body[1] = ETM_PROTO_V3;
2137 		resp_body[2] = 0;
2138 		resp_hdrp->resp_len = 3;
2139 	} /* if should send our/negotiated proto ver in resp body */
2140 
2141 	/* respond with the proto ver that was negotiated */
2142 
2143 	resp_hdrp->resp_pp.pp_proto_ver = etm_resp_ver;
2144 	resp_hdrp->resp_pp.pp_msg_type = ETM_MSG_TYPE_RESPONSE;
2145 	resp_hdrp->resp_pp.pp_timeout = ETM_PROTO_V1_TIMEOUT_NONE;
2146 
2147 	/*
2148 	 * send the whole response msg in one write, header and body;
2149 	 * avoid the alloc-and-copy if we can reuse the hdr as the msg,
2150 	 * ie, if the body is empty. update the response stats.
2151 	 */
2152 
2153 	hdr_sz = sizeof (etm_proto_v1_resp_hdr_t);
2154 
2155 	resp_msg = hdrp;
2156 	if (resp_hdrp->resp_len > 0) {
2157 		resp_msg = fmd_hdl_zalloc(hdl, hdr_sz + resp_hdrp->resp_len,
2158 		    FMD_SLEEP);
2159 		(void) memcpy(resp_msg, resp_hdrp, hdr_sz);
2160 		(void) memcpy(resp_msg + hdr_sz, resp_body,
2161 		    resp_hdrp->resp_len);
2162 	}
2163 
2164 	(void) pthread_mutex_lock(&etm_write_lock);
2165 	rv = etm_io_op(hdl, "bad io write on resp msg", conn,
2166 	    resp_msg, hdr_sz + resp_hdrp->resp_len, ETM_IO_OP_WR);
2167 	(void) pthread_mutex_unlock(&etm_write_lock);
2168 	if (rv < 0) {
2169 		goto func_ret;
2170 	}
2171 
2172 	etm_stats.etm_wr_hdr_response.fmds_value.ui64++;
2173 	etm_stats.etm_wr_body_response.fmds_value.ui64++;
2174 
2175 	fmd_hdl_debug(hdl, "info: sent V%u RESPONSE msg to xport "
2176 	    "xid 0x%x code %d len %u\n",
2177 	    (unsigned int)resp_hdrp->resp_pp.pp_proto_ver,
2178 	    resp_hdrp->resp_pp.pp_xid, resp_hdrp->resp_code,
2179 	    resp_hdrp->resp_len);
2180 func_ret:
2181 
2182 	if (resp_hdrp->resp_len > 0) {
2183 		fmd_hdl_free(hdl, resp_msg, hdr_sz + resp_hdrp->resp_len);
2184 	}
2185 	if (etm_debug_lvl >= 2) {
2186 		etm_show_time(hdl, "post resp send");
2187 	}
2188 	return (rv);
2189 
2190 } /* etm_send_response() */
2191 
2192 /*
2193  * etm_reset_xport - reset the transport layer (via fini;init)
2194  *			presumably for an error condition we cannot
2195  *			otherwise recover from (ex: hung LDC channel)
2196  *
2197  * caveats - no checking/locking is done to ensure an existing connection
2198  *		is idle during an xport reset; we don't want to deadlock
2199  *		and presumably the transport is stuck/unusable anyway
2200  */
2201 
2202 static void
2203 etm_reset_xport(fmd_hdl_t *hdl)
2204 {
2205 	(void) etm_xport_fini(hdl);
2206 	(void) etm_xport_init(hdl);
2207 	etm_stats.etm_reset_xport.fmds_value.ui64++;
2208 
2209 } /* etm_reset_xport() */
2210 
2211 /*
2212  * etm_handle_new_conn - receive an ETM message sent from the other end via
2213  *			the given open connection, pull out any FMA events
2214  *			and post them to the local FMD (or handle any ETM
2215  *			control or response msg); when done, close the
2216  *			connection
2217  */
2218 
2219 static void
2220 etm_handle_new_conn(fmd_hdl_t *hdl, etm_xport_conn_t conn)
2221 {
2222 	etm_proto_v1_ev_hdr_t	*ev_hdrp;	/* for FMA_EVENT msg */
2223 	etm_proto_v1_ctl_hdr_t	*ctl_hdrp;	/* for CONTROL msg */
2224 	etm_proto_v1_resp_hdr_t *resp_hdrp;	/* for RESPONSE msg */
2225 	etm_proto_v3_sa_hdr_t	*sa_hdrp;	/* for ALERT msg */
2226 	etm_iosvc_t		*iosvc;		/* iosvc data structure */
2227 	int32_t			resp_code;	/* response code */
2228 	ssize_t			enq_rv;		/* resp_q enqueue status */
2229 	size_t			hdr_sz;		/* sizeof header */
2230 	size_t			evsz;		/* FMA event size */
2231 	uint8_t			*body_buf;	/* msg body buffer */
2232 	uint32_t		body_sz;	/* sizeof body_buf */
2233 	uint32_t		ev_cnt;		/* count of FMA events */
2234 	uint8_t			*bp;		/* byte ptr within body_buf */
2235 	nvlist_t		*evp;		/* ptr to unpacked FMA event */
2236 	char			*class;		/* FMA event class */
2237 	ssize_t			i, n;		/* gen use */
2238 	int			should_reset_xport; /* bool to reset xport */
2239 	char			ldom_name[MAX_LDOM_NAME]; /* ldom name */
2240 	int			rc;		/* return code */
2241 	uint64_t		did;		/* domain id */
2242 
2243 
2244 	if (etm_debug_lvl >= 2) {
2245 		etm_show_time(hdl, "ante conn handle");
2246 	}
2247 	fmd_hdl_debug(hdl, "info: handling new conn %p\n", conn);
2248 
2249 	should_reset_xport = 0;
2250 	ev_hdrp = NULL;
2251 	ctl_hdrp = NULL;
2252 	resp_hdrp = NULL;
2253 	sa_hdrp = NULL;
2254 	body_buf = NULL;
2255 	class = NULL;
2256 	evp = NULL;
2257 	resp_code = 0;	/* default is success */
2258 	enq_rv = 0;	/* default is nop, ie, did not enqueue */
2259 
2260 	/* read a network decoded message header from the connection */
2261 
2262 	if ((ev_hdrp = etm_hdr_read(hdl, conn, &hdr_sz)) == NULL) {
2263 		/* errno assumed set by above call */
2264 		should_reset_xport = (errno == ENOTACTIVE);
2265 		fmd_hdl_debug(hdl, "error: FMA event dropped: "
2266 		    "bad hdr read errno %d\n", errno);
2267 		etm_stats.etm_rd_drop_fmaevent.fmds_value.ui64++;
2268 		goto func_ret;
2269 	}
2270 
2271 	/*
2272 	 * handle the message based on its preamble pp_msg_type
2273 	 * which is known to be valid from etm_hdr_read() checks
2274 	 */
2275 
2276 	if (ev_hdrp->ev_pp.pp_msg_type == ETM_MSG_TYPE_FMA_EVENT) {
2277 
2278 		fmd_hdl_debug(hdl, "info: rcvd FMA_EVENT msg from xport\n");
2279 
2280 		/* allocate buf large enough for whole body / all FMA events */
2281 
2282 		body_sz = 0;
2283 		for (i = 0; ev_hdrp->ev_lens[i] != 0; i++) {
2284 			body_sz += ev_hdrp->ev_lens[i];
2285 		} /* for summing sizes of all FMA events */
2286 		if (i > etm_stats.etm_rd_max_ev_per_msg.fmds_value.ui64)
2287 			etm_stats.etm_rd_max_ev_per_msg.fmds_value.ui64 = i;
2288 		ev_cnt = i;
2289 
2290 		if (etm_debug_lvl >= 1) {
2291 			fmd_hdl_debug(hdl, "info: event lengths %u sum %u\n",
2292 			    ev_cnt, body_sz);
2293 		}
2294 
2295 		body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP);
2296 
2297 		/* read all the FMA events at once */
2298 
2299 		if ((n = etm_io_op(hdl, "FMA event dropped: "
2300 		    "bad io read on event bodies", conn, body_buf, body_sz,
2301 		    ETM_IO_OP_RD)) < 0) {
2302 			should_reset_xport = (n == -ENOTACTIVE);
2303 			etm_stats.etm_rd_drop_fmaevent.fmds_value.ui64++;
2304 			goto func_ret;
2305 		}
2306 
2307 		etm_stats.etm_rd_xport_bytes.fmds_value.ui64 += body_sz;
2308 		etm_stats.etm_rd_body_fmaevent.fmds_value.ui64 += ev_cnt;
2309 
2310 		/*
2311 		 * now that we've read the entire ETM msg from the conn,
2312 		 * which avoids later ETM protocol framing errors if we didn't,
2313 		 * check for dup msg/xid against last good FMD posting,
2314 		 * if a dup then resend response but skip repost to FMD
2315 		 */
2316 
2317 		if (ev_hdrp->ev_pp.pp_xid == etm_xid_posted_logged_ev) {
2318 			enq_rv = etm_maybe_enq_response(hdl, conn,
2319 			    ev_hdrp, hdr_sz, 0);
2320 			fmd_hdl_debug(hdl, "info: skipping dup FMA event post "
2321 			    "xid 0x%x\n", etm_xid_posted_logged_ev);
2322 			etm_stats.etm_rd_dup_fmaevent.fmds_value.ui64++;
2323 			goto func_ret;
2324 		}
2325 
2326 		/* unpack each FMA event and post it to FMD */
2327 
2328 		bp = body_buf;
2329 		for (i = 0; i < ev_cnt; i++) {
2330 			if ((n = nvlist_unpack((char *)bp,
2331 			    ev_hdrp->ev_lens[i], &evp, 0)) != 0) {
2332 				resp_code = (-n);
2333 				enq_rv = etm_maybe_enq_response(hdl, conn,
2334 				    ev_hdrp, hdr_sz, resp_code);
2335 				fmd_hdl_error(hdl, "error: FMA event dropped: "
2336 				    "bad event body unpack errno %d\n", n);
2337 				if (etm_debug_lvl >= 2) {
2338 					fmd_hdl_debug(hdl, "info: FMA event "
2339 					    "hexdump %d bytes:\n",
2340 					    ev_hdrp->ev_lens[i]);
2341 					etm_hexdump(hdl, bp,
2342 					    ev_hdrp->ev_lens[i]);
2343 				}
2344 				etm_stats.etm_os_nvlist_unpack_fail.fmds_value.
2345 				    ui64++;
2346 				etm_stats.etm_rd_drop_fmaevent.fmds_value.
2347 				    ui64++;
2348 				bp += ev_hdrp->ev_lens[i];
2349 				continue;
2350 			}
2351 
2352 			if (etm_debug_lvl >= 1) {
2353 				(void) nvlist_lookup_string(evp, FM_CLASS,
2354 				    &class);
2355 				if (class == NULL) {
2356 					class = "NULL";
2357 				}
2358 				fmd_hdl_debug(hdl, "info: FMA event %p "
2359 				    "class %s\n", evp, class);
2360 			}
2361 
2362 			rc = nvlist_size(evp, &evsz, NV_ENCODE_XDR);
2363 			fmd_hdl_debug(hdl,
2364 			    "info: evp size before pack ds msg %d\n", evsz);
2365 			ldom_name[0] = '\0';
2366 			rc = etm_filter_find_ldom_id(hdl, evp, ldom_name,
2367 			    MAX_LDOM_NAME, &did);
2368 
2369 			/*
2370 			 * if rc is zero and the ldom_name is not "primary",
2371 			 * the evp belongs to a root domain, put the evp in an
2372 			 * outgoing etm queue,
2373 			 * in all other cases, whether ldom_name is primary or
2374 			 * can't find a ldom name, call etm_post_to_fmd
2375 			 */
2376 			if ((rc == 0) && strcmp(ldom_name, "primary") &&
2377 			    strcmp(ldom_name, "")) {
2378 				/*
2379 				 * use the ldom_name, guaranteered at this point
2380 				 * to be a valid ldom name/non-NULL, to find the
2381 				 * iosvc data.
2382 				 * add an iosvc struct if can not find one
2383 				 */
2384 				(void) pthread_mutex_unlock(&iosvc_list_lock);
2385 				iosvc = etm_iosvc_lookup(hdl, ldom_name,
2386 				    DS_INVALID_HDL, B_TRUE);
2387 				(void) pthread_mutex_unlock(&iosvc_list_lock);
2388 				if (iosvc == NULL) {
2389 					fmd_hdl_debug(hdl,
2390 					    "error: can't find iosvc for ldom "
2391 					    "name %s\n", ldom_name);
2392 				} else {
2393 					resp_code = 0;
2394 					(void) etm_pack_ds_msg(hdl, iosvc,
2395 					    ev_hdrp, hdr_sz, evp,
2396 					    SP_MSG, ETM_CKPT_SAVE);
2397 					/*
2398 					 * call the new fmd_xprt_log()
2399 					 */
2400 					fmd_xprt_log(hdl, etm_fmd_xprt, evp, 0);
2401 					etm_xid_posted_logged_ev =
2402 					    ev_hdrp->ev_pp.pp_xid;
2403 				}
2404 			} else {
2405 				/*
2406 				 * post the fma event to the control fmd
2407 				 */
2408 				resp_code = etm_post_to_fmd(hdl, etm_fmd_xprt,
2409 				    evp);
2410 				if (resp_code >= 0) {
2411 					etm_xid_posted_logged_ev =
2412 					    ev_hdrp->ev_pp.pp_xid;
2413 				}
2414 			}
2415 
2416 			evp = NULL;
2417 			enq_rv = etm_maybe_enq_response(hdl, conn,
2418 			    ev_hdrp, hdr_sz, resp_code);
2419 			bp += ev_hdrp->ev_lens[i];
2420 		} /* foreach FMA event in the body buffer */
2421 
2422 	} else if (ev_hdrp->ev_pp.pp_msg_type == ETM_MSG_TYPE_CONTROL) {
2423 
2424 		ctl_hdrp = (void*)ev_hdrp;
2425 
2426 		fmd_hdl_debug(hdl, "info: rcvd CONTROL msg from xport\n");
2427 		if (etm_debug_lvl >= 1) {
2428 			fmd_hdl_debug(hdl, "info: ctl sel %d xid 0x%x\n",
2429 			    (int)ctl_hdrp->ctl_pp.pp_sub_type,
2430 			    ctl_hdrp->ctl_pp.pp_xid);
2431 		}
2432 
2433 		/*
2434 		 * if we have a VER_NEGOT_REQ read the body and validate
2435 		 * the protocol version set contained therein,
2436 		 * otherwise we have a PING_REQ (which has no body)
2437 		 * and we [also] fall thru to the code which sends a
2438 		 * response msg if the pp_timeout field requested one
2439 		 */
2440 
2441 		if (ctl_hdrp->ctl_pp.pp_sub_type == ETM_CTL_SEL_VER_NEGOT_REQ) {
2442 
2443 			body_sz = ctl_hdrp->ctl_len;
2444 			body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP);
2445 
2446 			if ((n = etm_io_op(hdl, "bad io read on ctl body",
2447 			    conn, body_buf, body_sz, ETM_IO_OP_RD)) < 0) {
2448 				should_reset_xport = (n == -ENOTACTIVE);
2449 				goto func_ret;
2450 			}
2451 
2452 			/* complain if version set completely incompatible */
2453 
2454 			for (i = 0; i < body_sz; i++) {
2455 				if ((body_buf[i] == ETM_PROTO_V1) ||
2456 				    (body_buf[i] == ETM_PROTO_V2) ||
2457 				    (body_buf[i] == ETM_PROTO_V3)) {
2458 					break;
2459 				}
2460 			}
2461 			if (i >= body_sz) {
2462 				etm_stats.etm_ver_bad.fmds_value.ui64++;
2463 				resp_code = (-EPROTO);
2464 			}
2465 
2466 		} /* if got version set request */
2467 
2468 		etm_stats.etm_rd_body_control.fmds_value.ui64++;
2469 
2470 		enq_rv = etm_maybe_enq_response(hdl, conn,
2471 		    ctl_hdrp, hdr_sz, resp_code);
2472 
2473 	} else if (ev_hdrp->ev_pp.pp_msg_type == ETM_MSG_TYPE_RESPONSE) {
2474 
2475 		resp_hdrp = (void*)ev_hdrp;
2476 
2477 		fmd_hdl_debug(hdl, "info: rcvd RESPONSE msg from xport\n");
2478 		if (etm_debug_lvl >= 1) {
2479 			fmd_hdl_debug(hdl, "info: resp xid 0x%x\n",
2480 			    (int)resp_hdrp->resp_pp.pp_xid);
2481 		}
2482 
2483 		body_sz = resp_hdrp->resp_len;
2484 		body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP);
2485 
2486 		if ((n = etm_io_op(hdl, "bad io read on resp len",
2487 		    conn, body_buf, body_sz, ETM_IO_OP_RD)) < 0) {
2488 			should_reset_xport = (n == -ENOTACTIVE);
2489 			goto func_ret;
2490 		}
2491 
2492 		etm_stats.etm_rd_body_response.fmds_value.ui64++;
2493 
2494 		/*
2495 		 * look up the xid to interpret the response body
2496 		 *
2497 		 * ping is a nop; for ver negot confirm that a supported
2498 		 * protocol version was negotiated and remember which one
2499 		 */
2500 
2501 		if ((resp_hdrp->resp_pp.pp_xid != etm_xid_ping) &&
2502 		    (resp_hdrp->resp_pp.pp_xid != etm_xid_ver_negot)) {
2503 			etm_stats.etm_xid_bad.fmds_value.ui64++;
2504 			goto func_ret;
2505 		}
2506 
2507 		if (resp_hdrp->resp_pp.pp_xid == etm_xid_ver_negot) {
2508 			if ((body_buf[0] < ETM_PROTO_V1) ||
2509 			    (body_buf[0] > ETM_PROTO_V3)) {
2510 				etm_stats.etm_ver_bad.fmds_value.ui64++;
2511 				goto func_ret;
2512 			}
2513 			etm_resp_ver = body_buf[0];
2514 		} /* if have resp to last req to negotiate proto ver */
2515 
2516 	} else if (ev_hdrp->ev_pp.pp_msg_type == ETM_MSG_TYPE_ALERT) {
2517 
2518 		sa_hdrp = (void*)ev_hdrp;
2519 
2520 		fmd_hdl_debug(hdl, "info: rcvd ALERT msg from xport\n");
2521 		if (etm_debug_lvl >= 1) {
2522 			fmd_hdl_debug(hdl, "info: sa sel %d xid 0x%x\n",
2523 			    (int)sa_hdrp->sa_pp.pp_sub_type,
2524 			    sa_hdrp->sa_pp.pp_xid);
2525 		}
2526 
2527 		body_sz = sa_hdrp->sa_len;
2528 		body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP);
2529 
2530 		if ((n = etm_io_op(hdl, "bad io read on sa body",
2531 		    conn, body_buf, body_sz, ETM_IO_OP_RD)) < 0) {
2532 			should_reset_xport = (n == -ENOTACTIVE);
2533 			goto func_ret;
2534 		}
2535 
2536 		etm_stats.etm_rd_body_alert.fmds_value.ui64++;
2537 
2538 		/*
2539 		 * now that we've read the entire ETM msg from the conn,
2540 		 * which avoids later ETM protocol framing errors if we didn't,
2541 		 * check for dup msg/xid against last good syslog posting,
2542 		 * if a dup then resend response but skip repost to syslog
2543 		 */
2544 
2545 		if (sa_hdrp->sa_pp.pp_xid == etm_xid_posted_sa) {
2546 			enq_rv = etm_maybe_enq_response(hdl, conn,
2547 			    sa_hdrp, hdr_sz, 0);
2548 			fmd_hdl_debug(hdl, "info: skipping dup ALERT post "
2549 			    "xid 0x%x\n", etm_xid_posted_sa);
2550 			etm_stats.etm_rd_dup_alert.fmds_value.ui64++;
2551 			goto func_ret;
2552 		}
2553 
2554 		resp_code = etm_post_to_syslog(hdl, sa_hdrp->sa_priority,
2555 		    body_sz, body_buf);
2556 		if (resp_code >= 0) {
2557 			etm_xid_posted_sa = sa_hdrp->sa_pp.pp_xid;
2558 		}
2559 		enq_rv = etm_maybe_enq_response(hdl, conn,
2560 		    sa_hdrp, hdr_sz, resp_code);
2561 	} /* whether we have a FMA_EVENT, CONTROL, RESPONSE or ALERT msg */
2562 
2563 func_ret:
2564 
2565 	if (etm_debug_lvl >= 2) {
2566 		etm_show_time(hdl, "post conn handle");
2567 	}
2568 
2569 	/*
2570 	 * if no responder ele was enqueued, close the conn now
2571 	 * and free the ETM msg hdr; the ETM msg body is not needed
2572 	 * by the responder thread and should always be freed here
2573 	 */
2574 
2575 	if (enq_rv <= 0) {
2576 		(void) etm_conn_close(hdl, "bad conn close after msg recv",
2577 		    conn);
2578 		if (ev_hdrp != NULL) {
2579 			fmd_hdl_free(hdl, ev_hdrp, hdr_sz);
2580 		}
2581 	}
2582 	if (body_buf != NULL) {
2583 		fmd_hdl_free(hdl, body_buf, body_sz);
2584 	}
2585 	if (should_reset_xport) {
2586 		etm_reset_xport(hdl);
2587 	}
2588 } /* etm_handle_new_conn() */
2589 
2590 /*
2591  * etm_handle_bad_accept - recover from a failed connection acceptance
2592  */
2593 
2594 static void
2595 etm_handle_bad_accept(fmd_hdl_t *hdl, int nev)
2596 {
2597 	int	should_reset_xport; /* bool to reset xport */
2598 
2599 	should_reset_xport = (nev == -ENOTACTIVE);
2600 	fmd_hdl_debug(hdl, "error: bad conn accept errno %d\n", (-nev));
2601 	etm_stats.etm_xport_accept_fail.fmds_value.ui64++;
2602 	(void) etm_sleep(etm_bad_acc_to_sec); /* avoid spinning CPU */
2603 	if (should_reset_xport) {
2604 		etm_reset_xport(hdl);
2605 	}
2606 } /* etm_handle_bad_accept() */
2607 
2608 /*
2609  * etm_server - loop forever accepting new connections
2610  *		using the given FMD handle,
2611  *		handling any ETM msgs sent from the other side
2612  *		via each such connection
2613  */
2614 
2615 static void
2616 etm_server(void *arg)
2617 {
2618 	etm_xport_conn_t	conn;		/* connection handle */
2619 	int			nev;		/* -errno val */
2620 	fmd_hdl_t		*hdl;		/* FMD handle */
2621 
2622 	hdl = arg;
2623 
2624 	fmd_hdl_debug(hdl, "info: connection server starting\n");
2625 
2626 	/*
2627 	 * Restore the checkpointed events and dispatch them before starting to
2628 	 * receive more events from the sp.
2629 	 */
2630 	etm_ckpt_recover(hdl);
2631 
2632 	while (!etm_is_dying) {
2633 
2634 		if ((conn = etm_xport_accept(hdl, NULL)) == NULL) {
2635 			/* errno assumed set by above call */
2636 			nev = (-errno);
2637 			if (etm_is_dying) {
2638 				break;
2639 			}
2640 			etm_handle_bad_accept(hdl, nev);
2641 			continue;
2642 		}
2643 
2644 		/* handle the new message/connection, closing it when done */
2645 
2646 		etm_handle_new_conn(hdl, conn);
2647 
2648 	} /* while accepting new connections until ETM dies */
2649 
2650 	/* ETM is dying (probably due to "fmadm unload etm") */
2651 
2652 	fmd_hdl_debug(hdl, "info: connection server is dying\n");
2653 
2654 } /* etm_server() */
2655 
2656 /*
2657  * etm_responder - loop forever waiting for new responder queue elements
2658  *		to be enqueued, for each one constructing and sending
2659  *		an ETM response msg to the other side, and closing its
2660  *		associated connection when appropriate
2661  *
2662  *	this thread exists to ensure that the etm_server() thread
2663  *	never pends indefinitely waiting on the xport write lock, and is
2664  *	hence always available to accept new connections and handle
2665  *	incoming messages
2666  *
2667  *	this design relies on the fact that each connection accepted and
2668  *	returned by the ETM xport layer is unique, and each can be closed
2669  *	independently of the others while multiple connections are
2670  *	outstanding
2671  */
2672 
2673 static void
2674 etm_responder(void *arg)
2675 {
2676 	ssize_t			n;		/* gen use */
2677 	fmd_hdl_t		*hdl;		/* FMD handle */
2678 	etm_resp_q_ele_t	rqe;		/* responder queue ele */
2679 
2680 	hdl = arg;
2681 
2682 	fmd_hdl_debug(hdl, "info: responder server starting\n");
2683 
2684 	while (!etm_is_dying) {
2685 
2686 		(void) pthread_mutex_lock(&etm_resp_q_lock);
2687 
2688 		while (etm_resp_q_cur_len == 0) {
2689 			(void) pthread_cond_wait(&etm_resp_q_cv,
2690 			    &etm_resp_q_lock);
2691 			if (etm_is_dying) {
2692 				(void) pthread_mutex_unlock(&etm_resp_q_lock);
2693 				goto func_ret;
2694 			}
2695 		} /* while the responder queue is empty, wait to be nudged */
2696 
2697 		/*
2698 		 * for every responder ele that has been enqueued,
2699 		 * dequeue and send it as an ETM response msg,
2700 		 * closing its associated conn and freeing its hdr
2701 		 *
2702 		 * enter the queue draining loop holding the responder
2703 		 * queue lock, but do not hold the lock indefinitely
2704 		 * (the actual send may pend us indefinitely),
2705 		 * so that other threads will never pend for long
2706 		 * trying to enqueue a new element
2707 		 */
2708 
2709 		while (etm_resp_q_cur_len > 0) {
2710 
2711 			(void) etm_resp_q_deq(hdl, &rqe);
2712 			(void) pthread_mutex_unlock(&etm_resp_q_lock);
2713 
2714 			if ((n = etm_send_response(hdl, rqe.rqe_conn,
2715 			    rqe.rqe_hdrp, rqe.rqe_resp_code)) < 0) {
2716 				fmd_hdl_error(hdl, "error: bad resp send "
2717 				    "errno %d\n", (-n));
2718 			}
2719 
2720 			(void) etm_conn_close(hdl, "bad conn close after resp",
2721 			    rqe.rqe_conn);
2722 			fmd_hdl_free(hdl, rqe.rqe_hdrp, rqe.rqe_hdr_sz);
2723 
2724 			if (etm_is_dying) {
2725 				goto func_ret;
2726 			}
2727 			(void) pthread_mutex_lock(&etm_resp_q_lock);
2728 
2729 		} /* while draining the responder queue */
2730 
2731 		(void) pthread_mutex_unlock(&etm_resp_q_lock);
2732 
2733 	} /* while awaiting and sending resp msgs until ETM dies */
2734 
2735 func_ret:
2736 
2737 	/* ETM is dying (probably due to "fmadm unload etm") */
2738 
2739 	fmd_hdl_debug(hdl, "info: responder server is dying\n");
2740 
2741 	(void) pthread_mutex_lock(&etm_resp_q_lock);
2742 	if (etm_resp_q_cur_len > 0) {
2743 		fmd_hdl_error(hdl, "warning: %d response msgs dropped\n",
2744 		    (int)etm_resp_q_cur_len);
2745 		while (etm_resp_q_cur_len > 0) {
2746 			(void) etm_resp_q_deq(hdl, &rqe);
2747 			(void) etm_conn_close(hdl, "bad conn close after deq",
2748 			    rqe.rqe_conn);
2749 			fmd_hdl_free(hdl, rqe.rqe_hdrp, rqe.rqe_hdr_sz);
2750 		}
2751 	}
2752 	(void) pthread_mutex_unlock(&etm_resp_q_lock);
2753 
2754 } /* etm_responder() */
2755 
2756 static void *
2757 etm_init_alloc(size_t size)
2758 {
2759 	return (fmd_hdl_alloc(init_hdl, size, FMD_SLEEP));
2760 }
2761 
2762 static void
2763 etm_init_free(void *addr, size_t size)
2764 {
2765 	fmd_hdl_free(init_hdl, addr, size);
2766 }
2767 
2768 /*
2769  * ---------------------root ldom support functions -----------------------
2770  */
2771 
2772 /*
2773  * use a static array async_event_q instead of dynamicaly allocated mem  queue
2774  * for etm_async_q_enq and etm_async_q_deq.
2775  * This is not running in an fmd aux thread, can't use the fmd_hdl_* funcs.
2776  * caller needs to grab the mutex lock before calling this func.
2777  * return >0 for success, or -errno value
2778  */
2779 static int
2780 etm_async_q_enq(etm_async_event_ele_t *async_e)
2781 {
2782 
2783 	if (etm_async_q_cur_len >= etm_async_q_max_len) {
2784 		/* etm_stats.etm_enq_drop_async_q.fmds_value.ui64++; */
2785 		return (-E2BIG);
2786 	}
2787 
2788 	(void) memcpy(&async_event_q[etm_async_q_tail], async_e,
2789 	    sizeof (*async_e));
2790 
2791 	etm_async_q_tail++;
2792 	if (etm_async_q_tail == etm_async_q_max_len) {
2793 		etm_async_q_tail = 0;
2794 	}
2795 	etm_async_q_cur_len++;
2796 
2797 /* etm_stats.etm_async_q_cur_len.fmds_value.ui64 = etm_async_q_cur_len; */
2798 
2799 	return (1);
2800 
2801 } /* etm_async_q_enq() */
2802 
2803 
2804 static int
2805 etm_async_q_deq(etm_async_event_ele_t *async_e)
2806 {
2807 
2808 	if (etm_async_q_cur_len == 0) {
2809 		/* etm_stats.etm_deq_drop_async_q.fmds_value.ui64++; */
2810 		return (-ENOENT);
2811 	}
2812 
2813 	(void) memcpy(async_e, &async_event_q[etm_async_q_head],
2814 	    sizeof (*async_e));
2815 
2816 	etm_async_q_head++;
2817 	if (etm_async_q_head == etm_async_q_max_len) {
2818 		etm_async_q_head = 0;
2819 	}
2820 	etm_async_q_cur_len--;
2821 /* etm_stats.etm_async__q_cur_len.fmds_value.ui64 = etm_async_q_cur_len; */
2822 
2823 	return (1);
2824 } /* etm_async_q_deq */
2825 
2826 
2827 /*
2828  * ds userland interface ds_reg_cb  callback func
2829  */
2830 
2831 /* ARGSUSED */
2832 static void
2833 etm_iosvc_reg_handler(ds_hdl_t ds_hdl, ds_cb_arg_t arg, ds_ver_t *ver,
2834 	ds_domain_hdl_t dhdl)
2835 {
2836 	etm_async_event_ele_t	async_ele;
2837 
2838 
2839 	/*
2840 	 * do version check here.
2841 	 * checked the ver received here against etm_iosvc_vers here
2842 	 */
2843 	if (etm_iosvc_vers[0].major != ver->major ||
2844 	    etm_iosvc_vers[0].minor != ver->minor) {
2845 		/*
2846 		 * can't log an fmd debug msg,
2847 		 * not running in an fmd aux thread
2848 		 */
2849 		return;
2850 	}
2851 
2852 	/*
2853 	 * the callback should have a valid ldom_name
2854 	 * can't log fmd debugging msg here since this is not in an fmd aux
2855 	 * thread. log fmd debug msg in etm_async_event_handle()
2856 	 */
2857 	async_ele.ds_hdl = ds_hdl;
2858 	async_ele.dhdl = dhdl;
2859 	async_ele.ldom_name[0] = '\0';
2860 	async_ele.event_type = ETM_ASYNC_EVENT_DS_REG_CB;
2861 	(void) pthread_mutex_lock(&etm_async_event_q_lock);
2862 	(void) etm_async_q_enq(&async_ele);
2863 	if (etm_async_q_cur_len == 1)
2864 		(void) pthread_cond_signal(&etm_async_event_q_cv);
2865 	(void) pthread_mutex_unlock(&etm_async_event_q_lock);
2866 
2867 } /* etm_iosvc_reg_handler */
2868 
2869 
2870 /*
2871  * ds userland interface ds_unreg_cb  callback func
2872  */
2873 
2874 /*ARGSUSED*/
2875 static void
2876 etm_iosvc_unreg_handler(ds_hdl_t hdl, ds_cb_arg_t arg)
2877 {
2878 	etm_async_event_ele_t	async_ele;
2879 
2880 	/*
2881 	 * fill in async_ele and enqueue async_ele
2882 	 */
2883 	async_ele.ldom_name[0] = '\0';
2884 	async_ele.ds_hdl = hdl;
2885 	async_ele.event_type = ETM_ASYNC_EVENT_DS_UNREG_CB;
2886 	(void) pthread_mutex_lock(&etm_async_event_q_lock);
2887 	(void) etm_async_q_enq(&async_ele);
2888 	if (etm_async_q_cur_len == 1)
2889 		(void) pthread_cond_signal(&etm_async_event_q_cv);
2890 	(void) pthread_mutex_unlock(&etm_async_event_q_lock);
2891 } /* etm_iosvc_unreg_handler */
2892 
2893 /*
2894  * ldom event registration callback func
2895  */
2896 
2897 /* ARGSUSED */
2898 static void
2899 ldom_event_handler(char *ldom_name, ldom_event_t event, ldom_cb_arg_t data)
2900 {
2901 	etm_async_event_ele_t	async_ele;
2902 
2903 	/*
2904 	 * the callback will have a valid ldom_name
2905 	 */
2906 	async_ele.ldom_name[0] = '\0';
2907 	if (ldom_name)
2908 		(void) strcpy(async_ele.ldom_name, ldom_name);
2909 	async_ele.ds_hdl = DS_INVALID_HDL;
2910 
2911 	/*
2912 	 * fill in async_ele and enq async_ele
2913 	 */
2914 	switch (event) {
2915 	case LDOM_EVENT_BIND:
2916 		async_ele.event_type = ETM_ASYNC_EVENT_LDOM_BIND;
2917 		break;
2918 	case LDOM_EVENT_UNBIND:
2919 		async_ele.event_type = ETM_ASYNC_EVENT_LDOM_UNBIND;
2920 		break;
2921 	case LDOM_EVENT_ADD:
2922 		async_ele.event_type = ETM_ASYNC_EVENT_LDOM_ADD;
2923 		break;
2924 	case LDOM_EVENT_REMOVE:
2925 		async_ele.event_type = ETM_ASYNC_EVENT_LDOM_REMOVE;
2926 		break;
2927 	default:
2928 		/*
2929 		 * for all other ldom events, do nothing
2930 		 */
2931 		return;
2932 	} /* switch (event) */
2933 
2934 	(void) pthread_mutex_lock(&etm_async_event_q_lock);
2935 	(void) etm_async_q_enq(&async_ele);
2936 	if (etm_async_q_cur_len == 1)
2937 		(void) pthread_cond_signal(&etm_async_event_q_cv);
2938 	(void) pthread_mutex_unlock(&etm_async_event_q_lock);
2939 
2940 } /* ldom_event_handler */
2941 
2942 
2943 /*
2944  * This is running as an fmd aux thread.
2945  * This is the func that actually handle the events, which include:
2946  * 1. ldom events. ldom events are  on Control Domain only
2947  * 2. any DS userland callback funcs
2948  * these events are already Q-ed in the async_event_ele_q
2949  * deQ and process the events accordingly
2950  */
2951 static void
2952 etm_async_event_handler(void *arg)
2953 {
2954 
2955 	fmd_hdl_t		*fmd_hdl = (fmd_hdl_t *)arg;
2956 	etm_iosvc_t		*iosvc;		/* ptr 2 iosvc struct */
2957 	etm_async_event_ele_t	async_e;
2958 
2959 	fmd_hdl_debug(fmd_hdl, "info: etm_async_event_handler starting\n");
2960 	/*
2961 	 *  handle etm is not dying and Q len > 0
2962 	 */
2963 	while (!etm_is_dying) {
2964 		/*
2965 		 * grab the lock to check the Q len
2966 		 */
2967 		(void) pthread_mutex_lock(&etm_async_event_q_lock);
2968 		fmd_hdl_debug(fmd_hdl, "info: etm_async_q_cur_len %d\n",
2969 		    etm_async_q_cur_len);
2970 
2971 		while (etm_async_q_cur_len > 0) {
2972 			(void) etm_async_q_deq(&async_e);
2973 			(void) pthread_mutex_unlock(&etm_async_event_q_lock);
2974 			fmd_hdl_debug(fmd_hdl,
2975 			    "info: processing an async event type %d ds_hdl"
2976 			    " %d\n", async_e.event_type, async_e.ds_hdl);
2977 			if (async_e.ldom_name[0] != '\0') {
2978 				fmd_hdl_debug(fmd_hdl,
2979 				    "info: procssing async evt ldom_name %s\n",
2980 				    async_e.ldom_name);
2981 			}
2982 
2983 			/*
2984 			 * at this point, if async_e.ldom_name is not NULL,
2985 			 * we have a valid iosvc strcut ptr.
2986 			 * the only time async_e.ldom_name is NULL is  at
2987 			 * ds_unreg_cb()
2988 			 */
2989 			switch (async_e.event_type)  {
2990 			case ETM_ASYNC_EVENT_LDOM_UNBIND:
2991 			case ETM_ASYNC_EVENT_LDOM_REMOVE:
2992 				/*
2993 				 * we have a valid ldom_name,
2994 				 * etm_lookup_struct(ldom_name)
2995 				 * do nothing if can't find an iosvc
2996 				 * no iosvc clean up to do
2997 				 */
2998 				(void) pthread_mutex_lock(
2999 				    &iosvc_list_lock);
3000 				iosvc = etm_iosvc_lookup(fmd_hdl,
3001 				    async_e.ldom_name,
3002 				    async_e.ds_hdl, B_FALSE);
3003 				if (iosvc == NULL) {
3004 					fmd_hdl_debug(fmd_hdl,
3005 					    "error: can't find iosvc for ldom "
3006 					    "name %s\n",
3007 					    async_e.ldom_name);
3008 					(void) pthread_mutex_unlock(
3009 					    &iosvc_list_lock);
3010 					break;
3011 				}
3012 				etm_iosvc_cleanup(fmd_hdl, iosvc);
3013 				(void) pthread_mutex_unlock(
3014 				    &iosvc_list_lock);
3015 				break;
3016 
3017 			case ETM_ASYNC_EVENT_LDOM_BIND:
3018 
3019 				/*
3020 				 * create iosvc if it has not been
3021 				 * created
3022 				 * async_e.ds_hdl is invalid
3023 				 * async_e.ldom_name is valid ldom_name
3024 				 */
3025 				(void) pthread_mutex_lock(
3026 				    &iosvc_list_lock);
3027 				iosvc = etm_iosvc_lookup(fmd_hdl,
3028 				    async_e.ldom_name,
3029 				    async_e.ds_hdl, B_TRUE);
3030 				if (iosvc == NULL) {
3031 					fmd_hdl_debug(fmd_hdl,
3032 					    "error: can't create iosvc for "
3033 					    "async evnt %d\n",
3034 					    async_e.event_type);
3035 					(void) pthread_mutex_unlock(
3036 					    &iosvc_list_lock);
3037 					break;
3038 				}
3039 				(void) strcpy(iosvc->ldom_name,
3040 				    async_e.ldom_name);
3041 				iosvc->ds_hdl = async_e.ds_hdl;
3042 				(void) pthread_mutex_unlock(
3043 				    &iosvc_list_lock);
3044 				break;
3045 
3046 			case ETM_ASYNC_EVENT_DS_REG_CB:
3047 				if (etm_ldom_type == LDOM_TYPE_CONTROL) {
3048 					/*
3049 					 * find the root ldom name from
3050 					 * ldom domain hdl/id
3051 					 */
3052 					if (etm_filter_find_ldom_name(
3053 					    fmd_hdl, async_e.dhdl,
3054 					    async_e.ldom_name,
3055 					    MAX_LDOM_NAME) != 0) {
3056 						fmd_hdl_debug(fmd_hdl,
3057 						    "error: can't find root "
3058 						    "domain name from did %d\n",
3059 						    async_e.dhdl);
3060 						break;
3061 					} else {
3062 						fmd_hdl_debug(fmd_hdl,
3063 						    "info: etm_filter_find_"
3064 						    "ldom_name returned %s\n",
3065 						    async_e.ldom_name);
3066 					}
3067 					/*
3068 					 * now we should have a valid
3069 					 * root domain name.
3070 					 * lookup the iosvc struct
3071 					 * associated with the ldom_name
3072 					 * and init the iosvc struct
3073 					 */
3074 					(void) pthread_mutex_lock(
3075 					    &iosvc_list_lock);
3076 					iosvc = etm_iosvc_lookup(
3077 					    fmd_hdl, async_e.ldom_name,
3078 					    async_e.ds_hdl, B_TRUE);
3079 					if (iosvc == NULL) {
3080 						fmd_hdl_debug(fmd_hdl,
3081 						    "error: can't create iosvc "
3082 						    "for async evnt %d\n",
3083 						    async_e.event_type);
3084 					(void) pthread_mutex_unlock(
3085 					    &iosvc_list_lock);
3086 						break;
3087 					}
3088 					iosvc->ds_hdl = async_e.ds_hdl;
3089 					iosvc->cur_send_xid = 0;
3090 
3091 					/*
3092 					 * open the fmd xprt if it
3093 					 * hasn't been previously opened
3094 					 */
3095 					iosvc->start_sending_Q = 0;
3096 					fmd_hdl_debug(fmd_hdl,
3097 					    "info: before fmd_xprt_open"
3098 					    "ldom_name is %s\n",
3099 					    async_e.ldom_name);
3100 					if (iosvc->fmd_xprt == NULL) {
3101 						iosvc->fmd_xprt =
3102 						    fmd_xprt_open(
3103 						    fmd_hdl,
3104 						    flags, NULL,
3105 						    iosvc);
3106 					}
3107 
3108 					iosvc->thr_is_dying = 0;
3109 					if (iosvc->recv_tid == NULL) {
3110 						iosvc->recv_tid =
3111 						    fmd_thr_create(
3112 						    fmd_hdl,
3113 						    etm_recv_from_remote_root,
3114 						    iosvc);
3115 					}
3116 					if (iosvc->send_tid == NULL) {
3117 						iosvc->send_tid =
3118 						    fmd_thr_create(
3119 						    fmd_hdl,
3120 						    etm_send_to_remote_root,
3121 						    iosvc);
3122 					}
3123 
3124 					(void) pthread_mutex_unlock(
3125 					    &iosvc_list_lock);
3126 				} else {
3127 					iosvc = &io_svc;
3128 					(void) strcpy(iosvc->ldom_name,
3129 					    async_e.ldom_name);
3130 					iosvc->ds_hdl = async_e.ds_hdl;
3131 					iosvc->cur_send_xid = 0;
3132 					iosvc->start_sending_Q = 0;
3133 
3134 					/*
3135 					 * open the fmd xprt if it
3136 					 * hasn't been previously opened
3137 					 */
3138 					if (iosvc->fmd_xprt == NULL) {
3139 						iosvc->fmd_xprt =
3140 						    fmd_xprt_open(
3141 						    fmd_hdl,
3142 						    flags, NULL,
3143 						    iosvc);
3144 					}
3145 
3146 					iosvc->thr_is_dying = 0;
3147 					if (iosvc->recv_tid == NULL) {
3148 						iosvc->recv_tid =
3149 						    fmd_thr_create(
3150 						    fmd_hdl,
3151 						    etm_recv_from_remote_root,
3152 						    iosvc);
3153 					}
3154 					if (iosvc->send_tid == NULL) {
3155 						iosvc->send_tid =
3156 						    fmd_thr_create(
3157 						    fmd_hdl,
3158 						    etm_send_to_remote_root,
3159 						    iosvc);
3160 					}
3161 				}
3162 				break;
3163 
3164 			case ETM_ASYNC_EVENT_DS_UNREG_CB:
3165 				/*
3166 				 * decide which iosvc struct to perform
3167 				 * this UNREG callback on.
3168 				 */
3169 				if (etm_ldom_type == LDOM_TYPE_CONTROL) {
3170 					(void) pthread_mutex_lock(
3171 					    &iosvc_list_lock);
3172 					/*
3173 					 * lookup the iosvc struct w/
3174 					 * ds_hdl
3175 					 */
3176 					iosvc = etm_iosvc_lookup(
3177 					    fmd_hdl, async_e.ldom_name,
3178 					    async_e.ds_hdl, B_FALSE);
3179 					if (iosvc == NULL) {
3180 						fmd_hdl_debug(fmd_hdl,
3181 						    "error: can't find iosvc "
3182 						    "for async evnt %d\n",
3183 						    async_e.event_type);
3184 					(void) pthread_mutex_unlock(
3185 					    &iosvc_list_lock);
3186 						break;
3187 					}
3188 
3189 					/*
3190 					 * ds_hdl and fmd_xprt_open
3191 					 * go hand to hand together
3192 					 * after unreg_cb,
3193 					 * ds_hdl is INVALID and
3194 					 * fmd_xprt is closed.
3195 					 * the ldom name and the msg Q
3196 					 * remains in iosvc_list
3197 					 */
3198 					iosvc->ds_hdl = DS_INVALID_HDL;
3199 					if (iosvc->fmd_xprt != NULL)
3200 						fmd_xprt_close(fmd_hdl,
3201 						    iosvc->fmd_xprt);
3202 					iosvc->fmd_xprt = NULL;
3203 
3204 					if (iosvc->ldom_name != '\0')
3205 						fmd_hdl_debug(fmd_hdl,
3206 						    "info: iosvc  w/ ldom_name "
3207 						    "%s \n", iosvc->ldom_name);
3208 
3209 					/*
3210 					 * destroy send/recv threads
3211 					 * on Control side.
3212 					 */
3213 					iosvc->thr_is_dying = 1;
3214 					if (iosvc->send_tid != NULL) {
3215 						fmd_thr_signal(fmd_hdl,
3216 						    iosvc->send_tid);
3217 						fmd_thr_destroy(fmd_hdl,
3218 						    iosvc->send_tid);
3219 						iosvc->send_tid = NULL;
3220 					} /* if send tid was created */
3221 
3222 					if (iosvc->recv_tid != NULL) {
3223 						fmd_thr_signal(fmd_hdl,
3224 						    iosvc->recv_tid);
3225 						fmd_thr_destroy(fmd_hdl,
3226 						    iosvc->recv_tid);
3227 						iosvc->recv_tid = NULL;
3228 					} /* if recv tid was created */
3229 
3230 					(void) pthread_mutex_unlock(
3231 					    &iosvc_list_lock);
3232 				} else {
3233 					iosvc = &io_svc;
3234 					/*
3235 					 * destroy send/recv threads
3236 					 * on Root side.
3237 					 */
3238 					iosvc->thr_is_dying = 1;
3239 					if (iosvc->send_tid != NULL) {
3240 						fmd_thr_signal(fmd_hdl,
3241 						    iosvc->send_tid);
3242 						fmd_thr_destroy(fmd_hdl,
3243 						    iosvc->send_tid);
3244 						iosvc->send_tid = NULL;
3245 					} /* if send tid was created */
3246 
3247 					if (iosvc->recv_tid != NULL) {
3248 						fmd_thr_signal(fmd_hdl,
3249 						    iosvc->recv_tid);
3250 						fmd_thr_destroy(fmd_hdl,
3251 						    iosvc->recv_tid);
3252 						iosvc->recv_tid = NULL;
3253 					} /* if recv tid was created */
3254 
3255 					iosvc->ds_hdl = DS_INVALID_HDL;
3256 					if (iosvc->fmd_xprt != NULL)
3257 						fmd_xprt_close(fmd_hdl,
3258 						    iosvc->fmd_xprt);
3259 					iosvc->fmd_xprt = NULL;
3260 				}
3261 				break;
3262 
3263 			default:
3264 				/*
3265 				 * for all other events, etm doesn't care.
3266 				 * already logged an fmd info msg w/
3267 				 * the event type. Do nothing here.
3268 				 */
3269 				break;
3270 			} /* switch (async_e.event_type) */
3271 
3272 			if (etm_ldom_type == LDOM_TYPE_CONTROL) {
3273 				etm_filter_handle_ldom_event(fmd_hdl,
3274 				    async_e.event_type, async_e.ldom_name);
3275 			}
3276 
3277 			/*
3278 			 * grab the lock to check the q length again
3279 			 */
3280 			(void) pthread_mutex_lock(&etm_async_event_q_lock);
3281 
3282 			if (etm_is_dying) {
3283 				break;
3284 			}
3285 		}	/* etm_async_q_cur_len */
3286 
3287 		/*
3288 		 * we have the mutex lock at this point, whether
3289 		 * . etm_is_dying  and/or
3290 		 * . q_len == 0
3291 		 */
3292 		if (!etm_is_dying && etm_async_q_cur_len == 0) {
3293 			fmd_hdl_debug(fmd_hdl,
3294 			    "info: cond wait on async_event_q_cv\n");
3295 			(void) pthread_cond_wait(&etm_async_event_q_cv,
3296 			    &etm_async_event_q_lock);
3297 			fmd_hdl_debug(fmd_hdl,
3298 			    "info: cond wait on async_event_q_cv rtns\n");
3299 		}
3300 		(void) pthread_mutex_unlock(&etm_async_event_q_lock);
3301 	} /* etm_is_dying */
3302 
3303 	fmd_hdl_debug(fmd_hdl,
3304 	    "info: etm async event handler thread exiting\n");
3305 
3306 } /* etm_async_event_handler */
3307 
3308 /*
3309  * deQ what's in iosvc msg Q
3310  * send iosvc_msgp to the remote io svc ldom by calling ds_send_msg()
3311  * the iosvc_msgp already has the packed msg, which is hdr + 1 fma event
3312  */
3313 static void
3314 etm_send_to_remote_root(void *arg)
3315 {
3316 
3317 	etm_iosvc_t		*iosvc = (etm_iosvc_t *)arg;	/* iosvc ptr */
3318 	etm_iosvc_q_ele_t	msg_ele;	/* iosvc msg ele */
3319 	etm_proto_v1_ev_hdr_t	*ev_hdrp;	/* hdr for FMA_EVENT */
3320 	fmd_hdl_t		*fmd_hdl = init_hdl;	/* fmd handle */
3321 
3322 
3323 	fmd_hdl_debug(fmd_hdl,
3324 	    "info: send to remote iosvc starting w/ ldom_name %s\n",
3325 	    iosvc->ldom_name);
3326 
3327 	/*
3328 	 *  loop forever until etm_is_dying or thr_is_dying
3329 	 */
3330 	while (!etm_is_dying && !iosvc->thr_is_dying) {
3331 		if (iosvc->ds_hdl != DS_INVALID_HDL &&
3332 		    iosvc->start_sending_Q > 0) {
3333 			(void) pthread_mutex_lock(&iosvc->msg_q_lock);
3334 			while (iosvc->msg_q_cur_len > 0 &&
3335 			    iosvc->ds_hdl != DS_INVALID_HDL)  {
3336 				(void) etm_iosvc_msg_deq(fmd_hdl, iosvc,
3337 				    &msg_ele);
3338 				if (etm_debug_lvl >= 3) {
3339 					fmd_hdl_debug(fmd_hdl, "info: valid "
3340 					    "ds_hdl before ds_send_msg \n");
3341 				}
3342 				(void) pthread_mutex_unlock(&iosvc->msg_q_lock);
3343 
3344 				iosvc->ack_ok = 0;
3345 				ev_hdrp = (etm_proto_v1_ev_hdr_t *)
3346 				    ((ptrdiff_t)msg_ele.msg);
3347 				ev_hdrp->ev_pp.pp_xid = iosvc->cur_send_xid + 1;
3348 				while (!iosvc->ack_ok &&
3349 				    iosvc->ds_hdl != DS_INVALID_HDL &&
3350 				    !etm_is_dying) {
3351 					/*
3352 					 * call ds_send_msg() to send the msg,
3353 					 * wait for the recv end to send the
3354 					 * resp msg back.
3355 					 * If resp msg is recv-ed, ack_ok
3356 					 * will be set to 1.
3357 					 * otherwise, retry.
3358 					 */
3359 					if (etm_send_ds_msg(fmd_hdl, B_TRUE,
3360 					    iosvc, &msg_ele, ev_hdrp) < 0) {
3361 						continue;
3362 					}
3363 
3364 					if (etm_is_dying || iosvc->thr_is_dying)
3365 						break;
3366 				}
3367 
3368 				/*
3369 				 * if out of the while loop but !ack_ok, ie,
3370 				 * ds_hdl becomes invalid at some point
3371 				 * while waiting the resp msg, we need to put
3372 				 * the msg back to the head of the Q.
3373 				 */
3374 				if (!iosvc->ack_ok) {
3375 					(void) pthread_mutex_lock(
3376 					    &iosvc->msg_q_lock);
3377 					/*
3378 					 * put the msg back to the head of Q.
3379 					 * If the Q is full at this point,
3380 					 * drop the msg at the tail, enq this
3381 					 * msg to the head.
3382 					 */
3383 					etm_msg_enq_head(fmd_hdl, iosvc,
3384 					    &msg_ele);
3385 					(void) pthread_mutex_unlock(
3386 					    &iosvc->msg_q_lock);
3387 				}
3388 
3389 				/*
3390 				 *
3391 				 * grab the lock to check the Q len again
3392 				 */
3393 				(void) pthread_mutex_lock(&iosvc->msg_q_lock);
3394 				if (etm_is_dying || iosvc->thr_is_dying) {
3395 					break;
3396 				}
3397 			} /* while dequeing iosvc msgs to send */
3398 
3399 			/*
3400 			 * we have the mutex lock for msg_q_lock at this point
3401 			 * we are here because
3402 			 * 1) q_len == 0: then wait on the cv for Q to be filled
3403 			 * 2) etm_is_dying
3404 			 */
3405 			if (!etm_is_dying && !iosvc->thr_is_dying &&
3406 			    iosvc->msg_q_cur_len == 0) {
3407 				fmd_hdl_debug(fmd_hdl,
3408 				    "info: waiting on msg_q_cv\n");
3409 				(void) pthread_cond_wait(&iosvc->msg_q_cv,
3410 				    &iosvc->msg_q_lock);
3411 			}
3412 			(void) pthread_mutex_unlock(&iosvc->msg_q_lock);
3413 			if (etm_is_dying || iosvc->thr_is_dying)  {
3414 				break;
3415 			}
3416 		} else {
3417 			(void) etm_sleep(1);
3418 		} /* wait for the start_sendingQ > 0 */
3419 	} /* etm_is_dying or thr_is_dying */
3420 	fmd_hdl_debug(fmd_hdl, "info; etm send thread exiting \n");
3421 } /* etm_send_to_remote_root */
3422 
3423 
3424 /*
3425  * receive etm msgs from the remote root ldom by calling ds_recv_msg()
3426  * if FMA events/ereports, call fmd_xprt_post() to post to fmd
3427  * send ACK back by calling ds_send_msg()
3428  */
3429 static void
3430 etm_recv_from_remote_root(void *arg)
3431 {
3432 	etm_iosvc_t		*iosvc = (etm_iosvc_t *)arg;	/* iosvc ptr */
3433 	etm_proto_v1_pp_t	*pp;		/* protocol preamble */
3434 	etm_proto_v1_ev_hdr_t	*ev_hdrp;	/* for FMA_EVENT msg */
3435 	etm_proto_v1_resp_hdr_t	*resp_hdrp;	/* for RESPONSE msg */
3436 	int32_t			resp_code = 0;	/* default is success */
3437 	int32_t			rc;		/* return value */
3438 	size_t			maxlen = MAXLEN;
3439 						/* max msg len */
3440 	char 			msgbuf[MAXLEN];	/* recv msg buf */
3441 	size_t			msg_size;	/* recv msg size */
3442 	size_t			hdr_sz;		/* sizeof *hdrp */
3443 	size_t			evsz;		/* sizeof *evp */
3444 	size_t			fma_event_size;	/* sizeof FMA event  */
3445 	nvlist_t 		*evp;		/* ptr to the nvlist */
3446 	char			*buf;		/* ptr to the nvlist */
3447 	static uint32_t		mem_alloc = 0;	/* indicate if alloc mem */
3448 	char 			*msg;		/* ptr to alloc mem */
3449 	fmd_hdl_t		*fmd_hdl = init_hdl;
3450 
3451 
3452 
3453 	fmd_hdl_debug(fmd_hdl,
3454 	    "info: recv from remote iosvc starting with ldom name %s \n",
3455 	    iosvc->ldom_name);
3456 
3457 	/*
3458 	 * loop forever until etm_is_dying or the thread is dying
3459 	 */
3460 
3461 	msg = msgbuf;
3462 	while (!etm_is_dying && !iosvc->thr_is_dying) {
3463 		if (iosvc->ds_hdl == DS_INVALID_HDL) {
3464 			fmd_hdl_debug(fmd_hdl,
3465 			    "info: ds_hdl is invalid in recv thr\n");
3466 			(void) etm_sleep(1);
3467 			continue;
3468 		}
3469 
3470 		/*
3471 		 * for now, there are FMA_EVENT and ACK msg type.
3472 		 * use FMA_EVENT buf as the maxlen, hdr+1 fma event.
3473 		 * FMA_EVENT is big enough to hold an ACK msg.
3474 		 * the actual msg size received is in msg_size.
3475 		 */
3476 		rc = (*etm_ds_recv_msg)(iosvc->ds_hdl, msg, maxlen, &msg_size);
3477 		if (rc == EFBIG) {
3478 			fmd_hdl_debug(fmd_hdl,
3479 			    "info: ds_recv_msg needs mem the size of %d\n",
3480 			    msg_size);
3481 			msg = fmd_hdl_zalloc(fmd_hdl, msg_size, FMD_SLEEP);
3482 			mem_alloc = 1;
3483 		} else if (rc == 0) {
3484 			fmd_hdl_debug(fmd_hdl,
3485 			    "info: ds_recv_msg received a msg ok\n");
3486 			/*
3487 			 * check the magic # in  msg.hdr
3488 			 */
3489 			pp = (etm_proto_v1_pp_t *)((ptrdiff_t)msg);
3490 			if (pp->pp_magic_num != ETM_PROTO_MAGIC_NUM) {
3491 				fmd_hdl_debug(fmd_hdl,
3492 				    "info: bad ds recv on magic\n");
3493 				continue;
3494 			}
3495 
3496 			/*
3497 			 * check the msg type against msg_size to be sure
3498 			 * that received msg is not a truncated msg
3499 			 */
3500 			if (pp->pp_msg_type == ETM_MSG_TYPE_FMA_EVENT) {
3501 
3502 				ev_hdrp = (etm_proto_v1_ev_hdr_t *)
3503 				    ((ptrdiff_t)msg);
3504 				fmd_hdl_debug(fmd_hdl, "info: ds received "
3505 				    "FMA EVENT xid=%d msg_size=%d\n",
3506 				    ev_hdrp->ev_pp.pp_xid, msg_size);
3507 				hdr_sz = sizeof (*ev_hdrp) +
3508 				    1*(sizeof (ev_hdrp->ev_lens[0]));
3509 				fma_event_size = hdr_sz + ev_hdrp->ev_lens[0];
3510 				if (fma_event_size != msg_size) {
3511 					fmd_hdl_debug(fmd_hdl, "info: wrong "
3512 					    "ev msg size received\n");
3513 					continue;
3514 					/*
3515 					 * Simply  do nothing. The send side
3516 					 * will timedcond_wait waiting on the
3517 					 * resp msg will timeout and
3518 					 * re-send the same msg.
3519 					 */
3520 				}
3521 				if (etm_debug_lvl >= 3) {
3522 					fmd_hdl_debug(fmd_hdl,  "info: recv msg"
3523 					    " size %d hdrsz %d evp size %d\n",
3524 					    msg_size, hdr_sz,
3525 					    ev_hdrp->ev_lens[0]);
3526 				}
3527 
3528 				if (ev_hdrp->ev_pp.pp_xid !=
3529 				    iosvc->xid_posted_ev) {
3530 					/*
3531 					 * different from last xid posted to
3532 					 * fmd, post to fmd now.
3533 					 */
3534 					buf = msg + hdr_sz;
3535 					rc = nvlist_unpack(buf,
3536 					    ev_hdrp->ev_lens[0], &evp, 0);
3537 					rc = nvlist_size(evp, &evsz,
3538 					    NV_ENCODE_XDR);
3539 					fmd_hdl_debug(fmd_hdl,
3540 					    "info: evp size %d before fmd"
3541 					    "post\n", evsz);
3542 
3543 					if ((rc = etm_post_to_fmd(fmd_hdl,
3544 					    iosvc->fmd_xprt, evp)) >= 0) {
3545 						fmd_hdl_debug(fmd_hdl,
3546 						    "info: xid posted to fmd %d"
3547 						    "\n",
3548 						    ev_hdrp->ev_pp.pp_xid);
3549 						iosvc->xid_posted_ev =
3550 						    ev_hdrp->ev_pp.pp_xid;
3551 					}
3552 				}
3553 
3554 				/*
3555 				 * ready to  send the RESPONSE msg back
3556 				 * reuse the msg buffer as the response buffer
3557 				 */
3558 				resp_hdrp = (etm_proto_v1_resp_hdr_t *)
3559 				    ((ptrdiff_t)msg);
3560 				resp_hdrp->resp_pp.pp_msg_type =
3561 				    ETM_MSG_TYPE_RESPONSE;
3562 
3563 				resp_hdrp->resp_code = resp_code;
3564 				resp_hdrp->resp_len = sizeof (*resp_hdrp);
3565 
3566 				/*
3567 				 * send the whole response msg in one send
3568 				 */
3569 				if ((*etm_ds_send_msg)(iosvc->ds_hdl, msg,
3570 				    sizeof (*resp_hdrp)) != 0) {
3571 					fmd_hdl_debug(fmd_hdl,
3572 					    "info: send response msg failed\n");
3573 				} else {
3574 					fmd_hdl_debug(fmd_hdl,
3575 					    "info: ds send resp msg ok"
3576 					    "size %d\n", sizeof (*resp_hdrp));
3577 				}
3578 			} else if (pp->pp_msg_type == ETM_MSG_TYPE_RESPONSE) {
3579 				fmd_hdl_debug(fmd_hdl,
3580 				    "info: ds received respond msg xid=%d"
3581 				    "msg_size=%d for ldom %s\n", pp->pp_xid,
3582 				    msg_size, iosvc->ldom_name);
3583 				if (sizeof (*resp_hdrp) != msg_size) {
3584 					fmd_hdl_debug(fmd_hdl,
3585 					    "info: wrong resp msg size"
3586 					    "received\n");
3587 					fmd_hdl_debug(fmd_hdl,
3588 					    "info: resp msg size %d recv resp"
3589 					    "msg size %d\n",
3590 					    sizeof (*resp_hdrp), msg_size);
3591 					continue;
3592 				}
3593 				/*
3594 				 * is the pp.pp_xid == iosvc->cur_send_xid+1,
3595 				 * if so, nudge the send routine to send next
3596 				 */
3597 				if (pp->pp_xid != iosvc->cur_send_xid+1) {
3598 					fmd_hdl_debug(fmd_hdl,
3599 					    "info: ds received resp msg xid=%d "
3600 					    "doesn't match cur_send_id=%d\n",
3601 					    pp->pp_xid, iosvc->cur_send_xid+1);
3602 					continue;
3603 				}
3604 				(void) pthread_mutex_lock(&iosvc->msg_ack_lock);
3605 				iosvc->ack_ok = 1;
3606 				(void) pthread_cond_signal(&iosvc->msg_ack_cv);
3607 				(void) pthread_mutex_unlock(
3608 				    &iosvc->msg_ack_lock);
3609 				fmd_hdl_debug(fmd_hdl,
3610 				    "info: signaling msg_ack_cv\n");
3611 			} else {
3612 				/*
3613 				 * place holder for future msg types
3614 				 */
3615 				fmd_hdl_debug(fmd_hdl,
3616 				    "info: ds received unrecognized msg\n");
3617 			}
3618 			if (mem_alloc) {
3619 				fmd_hdl_free(fmd_hdl, msg, msg_size);
3620 				mem_alloc = 0;
3621 				msg = msgbuf;
3622 			}
3623 		} else {
3624 			if (etm_debug_lvl >= 3) {
3625 				fmd_hdl_debug(fmd_hdl,
3626 				    "info: ds_recv_msg() failed\n");
3627 			}
3628 		} /* ds_recv_msg() returns */
3629 	} /* etm_is_dying */
3630 
3631 	/*
3632 	 * need to free the mem allocated in msg upon exiting the thread
3633 	 */
3634 	if (mem_alloc) {
3635 		fmd_hdl_free(fmd_hdl, msg, msg_size);
3636 		mem_alloc = 0;
3637 		msg = msgbuf;
3638 	}
3639 	fmd_hdl_debug(fmd_hdl, "info; etm recv thread exiting \n");
3640 } /* etm_recv_from_remote_root */
3641 
3642 
3643 
3644 /*
3645  * etm_ds_init
3646  *		initialize DS services function pointers by calling
3647  *		dlopen() followed by  dlsym() for each ds func.
3648  *		if any dlopen() or dlsym() call fails, return -ENOENT
3649  *		return >0 for successs, -ENOENT for failure
3650  */
3651 static int
3652 etm_ds_init(fmd_hdl_t *hdl)
3653 {
3654 	int rc = 0;
3655 
3656 	if ((etm_dl_hdl = dlopen(etm_dl_path, etm_dl_mode)) == NULL) {
3657 		fmd_hdl_debug(hdl, "error: failed to dlopen %s\n", etm_dl_path);
3658 		return (-ENOENT);
3659 	}
3660 
3661 	etm_ds_svc_reg = (int (*)(ds_capability_t *cap, ds_ops_t *ops))
3662 	    dlsym(etm_dl_hdl, "ds_svc_reg");
3663 	if (etm_ds_svc_reg == NULL) {
3664 		fmd_hdl_debug(hdl,
3665 		    "error: failed to dlsym ds_svc_reg() w/ error %s\n",
3666 		    dlerror());
3667 		rc = -ENOENT;
3668 	}
3669 
3670 
3671 	etm_ds_clnt_reg = (int (*)(ds_capability_t *cap, ds_ops_t *ops))
3672 	    dlsym(etm_dl_hdl, "ds_clnt_reg");
3673 	if (etm_ds_clnt_reg == NULL) {
3674 		fmd_hdl_debug(hdl,
3675 		    "error: dlsym(ds_clnt_reg) failed w/ errno %d\n", errno);
3676 		rc = -ENOENT;
3677 	}
3678 
3679 	etm_ds_send_msg = (int (*)(ds_hdl_t hdl, void *buf, size_t buflen))
3680 	    dlsym(etm_dl_hdl, "ds_send_msg");
3681 	if (etm_ds_send_msg == NULL) {
3682 		fmd_hdl_debug(hdl, "error: dlsym(ds_send_msg) failed\n");
3683 		rc = -ENOENT;
3684 	}
3685 
3686 	etm_ds_recv_msg = (int (*)(ds_hdl_t hdl, void *buf, size_t buflen,
3687 	    size_t *msglen))dlsym(etm_dl_hdl, "ds_recv_msg");
3688 	if (etm_ds_recv_msg == NULL) {
3689 		fmd_hdl_debug(hdl, "error: dlsym(ds_recv_msg) failed\n");
3690 		rc = -ENOENT;
3691 	}
3692 
3693 	etm_ds_fini = (int (*)(void))dlsym(etm_dl_hdl, "ds_fini");
3694 	if (etm_ds_fini == NULL) {
3695 		fmd_hdl_debug(hdl, "error: dlsym(ds_fini) failed\n");
3696 		rc = -ENOENT;
3697 	}
3698 
3699 	if (rc == -ENOENT) {
3700 		(void) dlclose(etm_dl_hdl);
3701 	}
3702 	return (rc);
3703 
3704 } /* etm_ds_init() */
3705 
3706 
3707 /*
3708  * -------------------------- FMD entry points -------------------------------
3709  */
3710 
3711 /*
3712  * _fmd_init - initialize the transport for use by ETM and start the
3713  *		server daemon to accept new connections to us
3714  *
3715  *		FMD will read our *.conf and subscribe us to FMA events
3716  */
3717 
3718 void
3719 _fmd_init(fmd_hdl_t *hdl)
3720 {
3721 	struct timeval		tmv;		/* timeval */
3722 	ssize_t			n;		/* gen use */
3723 	const struct facility	*fp;		/* syslog facility matching */
3724 	char			*facname;	/* syslog facility property */
3725 	uint32_t		type_mask;	/* type of the local host */
3726 	int			rc;		/* funcs return code */
3727 
3728 
3729 	if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) {
3730 		return; /* invalid data in configuration file */
3731 	}
3732 
3733 	fmd_hdl_debug(hdl, "info: module initializing\n");
3734 
3735 	init_hdl = hdl;
3736 	etm_lhp = ldom_init(etm_init_alloc, etm_init_free);
3737 
3738 	/*
3739 	 * decide the ldom type, do initialization accordingly
3740 	 */
3741 	if ((rc = ldom_get_type(etm_lhp, &type_mask)) != 0) {
3742 		fmd_hdl_debug(hdl, "error: can't decide ldom type\n");
3743 		fmd_hdl_debug(hdl, "info: module unregistering\n");
3744 		ldom_fini(etm_lhp);
3745 		fmd_hdl_unregister(hdl);
3746 		return;
3747 	}
3748 
3749 	if ((type_mask & LDOM_TYPE_LEGACY) || (type_mask & LDOM_TYPE_CONTROL)) {
3750 		if (type_mask & LDOM_TYPE_LEGACY) {
3751 			/*
3752 			 * running on a legacy sun4v domain,
3753 			 * act as the the old sun4v
3754 			 */
3755 			etm_ldom_type = LDOM_TYPE_LEGACY;
3756 			fmd_hdl_debug(hdl, "info: running as the old sun4v\n");
3757 			ldom_fini(etm_lhp);
3758 		} else if (type_mask & LDOM_TYPE_CONTROL) {
3759 			etm_ldom_type = LDOM_TYPE_CONTROL;
3760 			fmd_hdl_debug(hdl, "info: running as control domain\n");
3761 
3762 			/*
3763 			 * looking for libds.so.1.
3764 			 * If not found, don't do DS registration. As a result,
3765 			 * there will be no DS callbacks or other DS services.
3766 			 */
3767 			if (etm_ds_init(hdl) >= 0) {
3768 				etm_filter_init(hdl);
3769 				etm_ckpt_init(hdl);
3770 
3771 				flags = FMD_XPRT_RDWR | FMD_XPRT_ACCEPT;
3772 
3773 				/*
3774 				 * ds client registration
3775 				 */
3776 				if ((rc = (*etm_ds_clnt_reg)(&iosvc_caps,
3777 				    &iosvc_ops))) {
3778 					fmd_hdl_debug(hdl,
3779 					"error: ds_clnt_reg(): errno %d\n", rc);
3780 				}
3781 			} else {
3782 				fmd_hdl_debug(hdl, "error: dlopen() libds "
3783 				    "failed, continue without the DS services");
3784 			}
3785 
3786 			/*
3787 			 * register for ldom status events
3788 			 */
3789 			if ((rc = ldom_register_event(etm_lhp,
3790 			    ldom_event_handler, hdl))) {
3791 				fmd_hdl_debug(hdl,
3792 				    "error: ldom_register_event():"
3793 				    " errno %d\n", rc);
3794 			}
3795 
3796 			/*
3797 			 * create the thread for handling both the ldom status
3798 			 * change and service events
3799 			 */
3800 			etm_async_e_tid = fmd_thr_create(hdl,
3801 			    etm_async_event_handler, hdl);
3802 		}
3803 
3804 		/* setup statistics and properties from FMD */
3805 
3806 		(void) fmd_stat_create(hdl, FMD_STAT_NOALLOC,
3807 		    sizeof (etm_stats) / sizeof (fmd_stat_t),
3808 		    (fmd_stat_t *)&etm_stats);
3809 
3810 		etm_fma_resp_wait_time = fmd_prop_get_int32(hdl,
3811 		    ETM_PROP_NM_FMA_RESP_WAIT_TIME);
3812 		etm_debug_lvl = fmd_prop_get_int32(hdl, ETM_PROP_NM_DEBUG_LVL);
3813 		etm_debug_max_ev_cnt = fmd_prop_get_int32(hdl,
3814 		    ETM_PROP_NM_DEBUG_MAX_EV_CNT);
3815 		fmd_hdl_debug(hdl, "info: etm_debug_lvl %d "
3816 		    "etm_debug_max_ev_cnt %d\n", etm_debug_lvl,
3817 		    etm_debug_max_ev_cnt);
3818 
3819 		etm_resp_q_max_len = fmd_prop_get_int32(hdl,
3820 		    ETM_PROP_NM_MAX_RESP_Q_LEN);
3821 		etm_stats.etm_resp_q_max_len.fmds_value.ui64 =
3822 		    etm_resp_q_max_len;
3823 		etm_bad_acc_to_sec = fmd_prop_get_int32(hdl,
3824 		    ETM_PROP_NM_BAD_ACC_TO_SEC);
3825 
3826 		/*
3827 		 * obtain an FMD transport handle so we can post
3828 		 * FMA events later
3829 		 */
3830 
3831 		etm_fmd_xprt = fmd_xprt_open(hdl, FMD_XPRT_RDONLY, NULL, NULL);
3832 
3833 		/*
3834 		 * encourage protocol transaction id to be unique per module
3835 		 * load
3836 		 */
3837 
3838 		(void) gettimeofday(&tmv, NULL);
3839 		etm_xid_cur = (uint32_t)((tmv.tv_sec << 10) |
3840 		    ((unsigned long)tmv.tv_usec >> 10));
3841 
3842 		/* init the ETM transport */
3843 
3844 		if ((n = etm_xport_init(hdl)) != 0) {
3845 			fmd_hdl_error(hdl, "error: bad xport init errno %d\n",
3846 			    (-n));
3847 			fmd_hdl_unregister(hdl);
3848 			return;
3849 		}
3850 
3851 		/*
3852 		 * Cache any properties we use every time we receive an alert.
3853 		 */
3854 		syslog_file = fmd_prop_get_int32(hdl, ETM_PROP_NM_SYSLOGD);
3855 		syslog_cons = fmd_prop_get_int32(hdl, ETM_PROP_NM_CONSOLE);
3856 
3857 		if (syslog_file && (syslog_logfd = open("/dev/conslog",
3858 		    O_WRONLY | O_NOCTTY)) == -1) {
3859 			fmd_hdl_error(hdl,
3860 			    "error: failed to open /dev/conslog");
3861 			syslog_file = 0;
3862 		}
3863 
3864 		if (syslog_cons && (syslog_msgfd = open("/dev/sysmsg",
3865 		    O_WRONLY | O_NOCTTY)) == -1) {
3866 			fmd_hdl_error(hdl, "error: failed to open /dev/sysmsg");
3867 			syslog_cons = 0;
3868 		}
3869 
3870 		if (syslog_file) {
3871 			/*
3872 			 * Look up the value of the "facility" property and
3873 			 * use it to determine * what syslog LOG_* facility
3874 			 * value we use to fill in our log_ctl_t.
3875 			 */
3876 			facname = fmd_prop_get_string(hdl,
3877 			    ETM_PROP_NM_FACILITY);
3878 
3879 			for (fp = syslog_facs; fp->fac_name != NULL; fp++) {
3880 				if (strcmp(fp->fac_name, facname) == 0)
3881 					break;
3882 			}
3883 
3884 			if (fp->fac_name == NULL) {
3885 				fmd_hdl_error(hdl, "error: invalid 'facility'"
3886 				    " setting: %s\n", facname);
3887 				syslog_file = 0;
3888 			} else {
3889 				syslog_facility = fp->fac_value;
3890 				syslog_ctl.flags = SL_CONSOLE | SL_LOGONLY;
3891 			}
3892 
3893 			fmd_prop_free_string(hdl, facname);
3894 		}
3895 
3896 		/*
3897 		 * start the message responder and the connection acceptance
3898 		 * server; request protocol version be negotiated after waiting
3899 		 * a second for the receiver to be ready to start handshaking
3900 		 */
3901 
3902 		etm_resp_tid = fmd_thr_create(hdl, etm_responder, hdl);
3903 		etm_svr_tid = fmd_thr_create(hdl, etm_server, hdl);
3904 
3905 		(void) etm_sleep(ETM_SLEEP_QUIK);
3906 		etm_req_ver_negot(hdl);
3907 
3908 	} else if (type_mask & LDOM_TYPE_ROOT) {
3909 		etm_ldom_type = LDOM_TYPE_ROOT;
3910 		fmd_hdl_debug(hdl, "info: running as root domain\n");
3911 
3912 		/*
3913 		 * looking for libds.so.1.
3914 		 * If not found, don't do DS registration. As a result,
3915 		 * there will be no DS callbacks or other DS services.
3916 		 */
3917 		if (etm_ds_init(hdl) < 0) {
3918 			fmd_hdl_debug(hdl,
3919 			    "error: dlopen() libds failed, "
3920 			    "module unregistering\n");
3921 			ldom_fini(etm_lhp);
3922 			fmd_hdl_unregister(hdl);
3923 			return;
3924 		}
3925 
3926 		/*
3927 		 * DS service registration
3928 		 */
3929 		if ((rc = (*etm_ds_svc_reg)(&iosvc_caps, &iosvc_ops))) {
3930 			fmd_hdl_debug(hdl, "error: ds_svc_reg(): errno %d\n",
3931 			    rc);
3932 		}
3933 
3934 		/*
3935 		 * this thread is created for ds_reg_cb/ds_unreg_cb
3936 		 */
3937 		etm_async_e_tid = fmd_thr_create(hdl,
3938 		    etm_async_event_handler, hdl);
3939 
3940 		flags = FMD_XPRT_RDWR;
3941 	} else if ((type_mask & LDOM_TYPE_IO) || (type_mask == 0)) {
3942 		/*
3943 		 * Do not load this module if it is
3944 		 * . runing on a non-root ldom
3945 		 * . the domain owns no io devices
3946 		 */
3947 		fmd_hdl_debug(hdl,
3948 		    "info: non-root ldom, module unregistering\n");
3949 		ldom_fini(etm_lhp);
3950 		fmd_hdl_unregister(hdl);
3951 		return;
3952 	} else {
3953 		/*
3954 		 * place holder, all other cases. unload etm for now
3955 		 */
3956 		fmd_hdl_debug(hdl,
3957 		    "info: other ldom type, module unregistering\n");
3958 		ldom_fini(etm_lhp);
3959 		fmd_hdl_unregister(hdl);
3960 		return;
3961 	}
3962 
3963 	fmd_hdl_debug(hdl, "info: module initialized ok\n");
3964 
3965 } /* _fmd_init() */
3966 
3967 /*
3968  * etm_recv - receive an FMA event from FMD and transport it
3969  *		to the remote endpoint
3970  */
3971 
3972 /*ARGSUSED*/
3973 void
3974 etm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *evp, const char *class)
3975 {
3976 	etm_xport_addr_t	*addrv;	/* vector of transport addresses */
3977 	etm_xport_conn_t	conn;	/* connection handle */
3978 	etm_proto_v1_ev_hdr_t	*hdrp;	/* for FMA_EVENT msg */
3979 	ssize_t			i, n;	/* gen use */
3980 	size_t			sz;	/* header size */
3981 	size_t			buflen;	/* size of packed FMA event */
3982 	uint8_t			*buf;	/* tmp buffer for packed FMA event */
3983 
3984 	/*
3985 	 * if this is running on a Root Domain, ignore the events,
3986 	 * return right away
3987 	 */
3988 	if (etm_ldom_type == LDOM_TYPE_ROOT)
3989 		return;
3990 
3991 	buflen = 0;
3992 	if ((n = nvlist_size(evp, &buflen, NV_ENCODE_XDR)) != 0) {
3993 		fmd_hdl_error(hdl, "error: FMA event dropped: "
3994 		    "event size errno %d class %s\n", n, class);
3995 		etm_stats.etm_os_nvlist_size_fail.fmds_value.ui64++;
3996 		etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++;
3997 		return;
3998 	}
3999 
4000 	fmd_hdl_debug(hdl, "info: rcvd event %p from FMD\n", evp);
4001 	fmd_hdl_debug(hdl, "info: cnt %llu class %s\n",
4002 	    etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64, class);
4003 
4004 	etm_stats.etm_rd_fmd_bytes.fmds_value.ui64 += buflen;
4005 	etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64++;
4006 
4007 	/*
4008 	 * if the debug limit has been set, avoid excessive traffic,
4009 	 * for example, an infinite cycle using loopback nodes
4010 	 */
4011 
4012 	if ((etm_debug_max_ev_cnt >= 0) &&
4013 	    (etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64 >
4014 	    etm_debug_max_ev_cnt)) {
4015 		fmd_hdl_debug(hdl, "warning: FMA event dropped: "
4016 		    "event %p cnt %llu > debug max %d\n", evp,
4017 		    etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64,
4018 		    etm_debug_max_ev_cnt);
4019 		etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++;
4020 		return;
4021 	}
4022 
4023 	/* allocate a buffer for the FMA event and nvlist pack it */
4024 
4025 	buf = fmd_hdl_zalloc(hdl, buflen, FMD_SLEEP);
4026 
4027 	/*
4028 	 * increment the ttl value if the event is from remote (a root domain)
4029 	 * uncomment this when enabling fault forwarding from Root domains
4030 	 * to Control domain.
4031 	 *
4032 	 * uint8_t			ttl;
4033 	 * if (fmd_event_local(hdl, evp) != FMD_EVF_LOCAL) {
4034 	 *	if (nvlist_lookup_uint8(evp, FMD_EVN_TTL, &ttl) == 0) {
4035 	 *		(void) nvlist_remove(evp, FMD_EVN_TTL, DATA_TYPE_UINT8);
4036 	 *		(void) nvlist_add_uint8(evp, FMD_EVN_TTL, ttl + 1);
4037 	 *	}
4038 	 * }
4039 	 */
4040 
4041 	if ((n = nvlist_pack(evp, (char **)&buf, &buflen,
4042 	    NV_ENCODE_XDR, 0)) != 0) {
4043 		fmd_hdl_error(hdl, "error: FMA event dropped: "
4044 		    "event pack errno %d class %s\n", n, class);
4045 		etm_stats.etm_os_nvlist_pack_fail.fmds_value.ui64++;
4046 		etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++;
4047 		fmd_hdl_free(hdl, buf, buflen);
4048 		return;
4049 	}
4050 
4051 	/* get vector of dst addrs and send the FMA event to each one */
4052 
4053 	if ((addrv = etm_xport_get_ev_addrv(hdl, evp)) == NULL) {
4054 		fmd_hdl_error(hdl, "error: FMA event dropped: "
4055 		    "bad event dst addrs errno %d\n", errno);
4056 		etm_stats.etm_xport_get_ev_addrv_fail.fmds_value.ui64++;
4057 		etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++;
4058 		fmd_hdl_free(hdl, buf, buflen);
4059 		return;
4060 	}
4061 
4062 	for (i = 0; addrv[i] != NULL; i++) {
4063 
4064 		/* open a new connection to this dst addr */
4065 
4066 		if ((n = etm_conn_open(hdl, "FMA event dropped: "
4067 		    "bad conn open on new ev", addrv[i], &conn)) < 0) {
4068 			etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++;
4069 			continue;
4070 		}
4071 
4072 		(void) pthread_mutex_lock(&etm_write_lock);
4073 
4074 		/* write the ETM message header */
4075 
4076 		if ((hdrp = etm_hdr_write(hdl, conn, evp, NV_ENCODE_XDR,
4077 		    &sz)) == NULL) {
4078 			(void) pthread_mutex_unlock(&etm_write_lock);
4079 			fmd_hdl_error(hdl, "error: FMA event dropped: "
4080 			    "bad hdr write errno %d\n", errno);
4081 			(void) etm_conn_close(hdl,
4082 			    "bad conn close per bad hdr wr", conn);
4083 			etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++;
4084 			continue;
4085 		}
4086 
4087 		fmd_hdl_free(hdl, hdrp, sz);	/* header not needed */
4088 		etm_stats.etm_wr_hdr_fmaevent.fmds_value.ui64++;
4089 		fmd_hdl_debug(hdl, "info: hdr xport write ok for event %p\n",
4090 		    evp);
4091 
4092 		/* write the ETM message body, ie, the packed nvlist */
4093 
4094 		if ((n = etm_io_op(hdl, "FMA event dropped: "
4095 		    "bad io write on event", conn,
4096 		    buf, buflen, ETM_IO_OP_WR)) < 0) {
4097 			(void) pthread_mutex_unlock(&etm_write_lock);
4098 			(void) etm_conn_close(hdl,
4099 			    "bad conn close per bad body wr", conn);
4100 			etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++;
4101 			continue;
4102 		}
4103 
4104 		(void) pthread_mutex_unlock(&etm_write_lock);
4105 
4106 		etm_stats.etm_wr_body_fmaevent.fmds_value.ui64++;
4107 		etm_stats.etm_wr_xport_bytes.fmds_value.ui64 += buflen;
4108 		fmd_hdl_debug(hdl, "info: body xport write ok for event %p\n",
4109 		    evp);
4110 
4111 		/* close the connection */
4112 
4113 		(void) etm_conn_close(hdl, "bad conn close after event send",
4114 		    conn);
4115 	} /* foreach dst addr in the vector */
4116 
4117 	etm_xport_free_addrv(hdl, addrv);
4118 	fmd_hdl_free(hdl, buf, buflen);
4119 
4120 } /* etm_recv() */
4121 
4122 
4123 /*
4124  * etm_send -	receive an FMA event from FMD and enQ it in the iosvc.Q.
4125  *		etm_send_to_remote_root() deQ and xprt the FMA events to a
4126  *		remote root domain
4127  *		return FMD_SEND_SUCCESS for success,
4128  *		       FMD_SEND_FAILED for error
4129  */
4130 
4131 /*ARGSUSED*/
4132 int
4133 etm_send(fmd_hdl_t *fmd_hdl, fmd_xprt_t *xp, fmd_event_t *ep, nvlist_t *nvl)
4134 {
4135 	uint32_t	pack_it;	/* whether to pack/enq the event */
4136 	etm_pack_msg_type_t	msg_type;
4137 					/* tell etm_pack_ds_msg() what to do */
4138 	etm_iosvc_t	*iosvc;		/* ptr to cur iosvc struct */
4139 	char 		*class;		/* nvlist class name */
4140 
4141 	pack_it = 1;
4142 	msg_type = FMD_XPRT_OTHER_MSG;
4143 
4144 	(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
4145 	if (class == NULL) {
4146 		pack_it = 0;
4147 	} else  {
4148 		if (etm_debug_lvl >= 1) {
4149 			fmd_hdl_debug(fmd_hdl,
4150 			    "info: evp class= %s in etm_send\n", class);
4151 		}
4152 
4153 		if (etm_ldom_type ==  LDOM_TYPE_CONTROL) {
4154 			iosvc =
4155 			    (etm_iosvc_t *)fmd_xprt_getspecific(fmd_hdl, xp);
4156 
4157 			/*
4158 			 * check the flag FORWARDING_FAULTS_TO_CONTROL to
4159 			 * decide if or not to drop fault subscription
4160 			 * control msgs
4161 			 */
4162 			if (strcmp(class, "resource.fm.xprt.subscribe") == 0) {
4163 				pack_it = 0;
4164 				/*
4165 				 * if (FORWARDING_FAULTS_TO_CONTROL == 1) {
4166 				 * (void) nvlist_lookup_string(nvl,
4167 				 *    FM_RSRC_XPRT_SUBCLASS, &subclass);
4168 				 * if (strcmp(subclass, "list.suspect")
4169 				 *    == 0) {
4170 				 *	pack_it = 1;
4171 				 *	msg_action = FMD_XPRT_OTHER_MSG;
4172 				 * }
4173 				 * if (strcmp(subclass, "list.repaired")
4174 				 *    == 0) {
4175 				 *	pack_it = 1;
4176 				 *	msg_action = FMD_XPRT_OTHER_MSG;
4177 				 * }
4178 				 * }
4179 				 */
4180 			}
4181 			if (strcmp(class, "resource.fm.xprt.run") == 0) {
4182 				pack_it = 1;
4183 				msg_type = FMD_XPRT_RUN_MSG;
4184 			}
4185 		} else { /* has to be the root domain ldom */
4186 			iosvc = &io_svc;
4187 			/*
4188 			 * drop all ereport and fault subscriptions
4189 			 * are we dropping too much here, more than just ereport
4190 			 * and fault subscriptions? need to check
4191 			 */
4192 			if (strcmp(class, "resource.fm.xprt.subscribe") == 0)
4193 				pack_it = 0;
4194 			if (strcmp(class, "resource.fm.xprt.run") == 0) {
4195 				pack_it = 1;
4196 				msg_type = FMD_XPRT_RUN_MSG;
4197 			}
4198 		}
4199 	}
4200 
4201 	if (pack_it)  {
4202 		if (etm_debug_lvl >= 1) {
4203 			fmd_hdl_debug(fmd_hdl,
4204 			    "info: ldom name returned from xprt get specific="
4205 			    "%s xprt=%lld\n", iosvc->ldom_name, xp);
4206 		}
4207 		/*
4208 		 * pack the etm msg for the DS library and  enq in io_svc->Q
4209 		 * when the hdrp is NULL, the packing func will use the static
4210 		 * iosvc_hdr
4211 		 */
4212 		(void) etm_pack_ds_msg(fmd_hdl, iosvc, NULL, 0, nvl, msg_type,
4213 		    ETM_CKPT_NOOP);
4214 	}
4215 
4216 	return (FMD_SEND_SUCCESS);
4217 
4218 } /* etm_send() */
4219 
4220 
4221 
4222 /*
4223  * _fmd_fini - stop the server daemon and teardown the transport
4224  */
4225 
4226 void
4227 _fmd_fini(fmd_hdl_t *hdl)
4228 {
4229 	ssize_t			n;		/* gen use */
4230 	etm_iosvc_t		*iosvc;		/* ptr to insvc struct */
4231 	etm_iosvc_q_ele_t	msg_ele;	/* iosvc msg ele */
4232 	uint32_t		i;		/* for loop var */
4233 
4234 	fmd_hdl_debug(hdl, "info: module finalizing\n");
4235 
4236 	/* kill the connection server and responder ; wait for them to die */
4237 
4238 	etm_is_dying = 1;
4239 
4240 	if (etm_svr_tid != NULL) {
4241 		fmd_thr_signal(hdl, etm_svr_tid);
4242 		fmd_thr_destroy(hdl, etm_svr_tid);
4243 		etm_svr_tid = NULL;
4244 	} /* if server thread was successfully created */
4245 
4246 	if (etm_resp_tid != NULL) {
4247 		fmd_thr_signal(hdl, etm_resp_tid);
4248 		fmd_thr_destroy(hdl, etm_resp_tid);
4249 		etm_resp_tid = NULL;
4250 	} /* if responder thread was successfully created */
4251 
4252 	if (etm_async_e_tid != NULL) {
4253 		fmd_thr_signal(hdl, etm_async_e_tid);
4254 		fmd_thr_destroy(hdl, etm_async_e_tid);
4255 		etm_async_e_tid = NULL;
4256 	} /* if async event handler thread was successfully created */
4257 
4258 
4259 	if ((etm_ldom_type == LDOM_TYPE_LEGACY) ||
4260 	    (etm_ldom_type == LDOM_TYPE_CONTROL)) {
4261 
4262 		/* teardown the transport and cleanup syslogging */
4263 		if ((n = etm_xport_fini(hdl)) != 0) {
4264 			fmd_hdl_error(hdl, "warning: xport fini errno %d\n",
4265 			    (-n));
4266 		}
4267 		if (etm_fmd_xprt != NULL) {
4268 			fmd_xprt_close(hdl, etm_fmd_xprt);
4269 		}
4270 
4271 		if (syslog_logfd != -1) {
4272 			(void) close(syslog_logfd);
4273 		}
4274 		if (syslog_msgfd != -1) {
4275 			(void) close(syslog_msgfd);
4276 		}
4277 	}
4278 
4279 	if (etm_ldom_type == LDOM_TYPE_CONTROL)  {
4280 		if (ldom_unregister_event(etm_lhp))
4281 			fmd_hdl_debug(hdl, "ldom_unregister_event() failed\n");
4282 
4283 		/*
4284 		 * on control side, need to go thru every iosvc struct to
4285 		 * 1) process remaining events in the iosvc Q:
4286 		 * for plan A:
4287 		 *    discard remaining events in the Q/free the memory,
4288 		 *    since fmd_xprt_log() already logged in Control D's FMD
4289 		 * 2) unregister the ds_hdl if valid
4290 		 * 3) close the fmd_xprt if it has not been closed
4291 		 */
4292 		for (i = 0; i < NUM_OF_ROOT_DOMAINS; i++) {
4293 			if (iosvc_list[i].ldom_name[0] != '\0') {
4294 				/*
4295 				 * found an iosvc struct for a root domain
4296 				 */
4297 				iosvc = &iosvc_list[i];
4298 				(void) pthread_mutex_lock(&iosvc_list_lock);
4299 				etm_iosvc_cleanup(hdl, iosvc);
4300 				(void) pthread_mutex_unlock(&iosvc_list_lock);
4301 
4302 			} else {
4303 				/*
4304 				 * reach the end of existing iosvc structures
4305 				 */
4306 				continue;
4307 			}
4308 		} /* for i<NUM_OF_ROOT_DOMAINS */
4309 		etm_ckpt_fini(hdl);
4310 		etm_filter_fini(hdl);
4311 
4312 		ldom_fini(etm_lhp);
4313 
4314 	} else if (etm_ldom_type == LDOM_TYPE_ROOT) {
4315 		iosvc = &io_svc;
4316 		if (iosvc->send_tid != NULL) {
4317 			fmd_thr_signal(hdl, iosvc->send_tid);
4318 			fmd_thr_destroy(hdl, iosvc->send_tid);
4319 			iosvc->send_tid = NULL;
4320 		} /* if io svc send thread was successfully created */
4321 
4322 		if (iosvc->recv_tid != NULL) {
4323 			fmd_thr_signal(hdl, iosvc->recv_tid);
4324 			fmd_thr_destroy(hdl, iosvc->recv_tid);
4325 			iosvc->recv_tid = NULL;
4326 		} /* if io svc receive thread was successfully created */
4327 
4328 		(void) pthread_mutex_lock(&iosvc->msg_q_lock);
4329 		while (iosvc->msg_q_cur_len > 0) {
4330 			(void) etm_iosvc_msg_deq(hdl, iosvc, &msg_ele);
4331 			fmd_hdl_free(hdl, msg_ele.msg, msg_ele.msg_size);
4332 		}
4333 		(void) pthread_mutex_unlock(&iosvc->msg_q_lock);
4334 
4335 		if (iosvc->fmd_xprt != NULL)
4336 			fmd_xprt_close(hdl, iosvc->fmd_xprt);
4337 		ldom_fini(etm_lhp);
4338 	}
4339 	if (etm_ds_fini) {
4340 		(*etm_ds_fini)();
4341 		(void) dlclose(etm_dl_hdl);
4342 	}
4343 
4344 	fmd_hdl_debug(hdl, "info: module finalized ok\n");
4345 
4346 } /* _fmd_fini() */
4347