1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * etm.c FMA Event Transport Module implementation, a plugin of FMD 29 * for sun4v/Ontario 30 * 31 * plugin for sending/receiving FMA events to/from service processor 32 */ 33 34 /* 35 * --------------------------------- includes -------------------------------- 36 */ 37 38 #include <sys/fm/protocol.h> 39 #include <sys/fm/util.h> 40 #include <sys/fm/ldom.h> 41 #include <sys/strlog.h> 42 #include <sys/syslog.h> 43 #include <sys/libds.h> 44 #include <netinet/in.h> 45 #include <fm/fmd_api.h> 46 47 #include "etm_xport_api.h" 48 #include "etm_etm_proto.h" 49 #include "etm_impl.h" 50 #include "etm_iosvc.h" 51 #include "etm_filter.h" 52 #include "etm_ckpt.h" 53 54 #include <pthread.h> 55 #include <signal.h> 56 #include <stropts.h> 57 #include <locale.h> 58 #include <strings.h> 59 #include <stdlib.h> 60 #include <unistd.h> 61 #include <limits.h> 62 #include <values.h> 63 #include <alloca.h> 64 #include <errno.h> 65 #include <dlfcn.h> 66 #include <link.h> 67 #include <fcntl.h> 68 #include <time.h> 69 70 /* 71 * ----------------------------- forward decls ------------------------------- 72 */ 73 74 static void 75 etm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class); 76 77 static int 78 etm_send(fmd_hdl_t *hdl, fmd_xprt_t *xp, fmd_event_t *event, nvlist_t *nvl); 79 80 static void 81 etm_send_to_remote_root(void *arg); 82 83 static void 84 etm_recv_from_remote_root(void *arg); 85 86 static void 87 etm_ckpt_remove(fmd_hdl_t *hdl, etm_iosvc_q_ele_t *ele); 88 89 /* 90 * ------------------------- data structs for FMD ---------------------------- 91 */ 92 93 static const fmd_hdl_ops_t fmd_ops = { 94 etm_recv, /* fmdo_recv */ 95 NULL, /* fmdo_timeout */ 96 NULL, /* fmdo_close */ 97 NULL, /* fmdo_stats */ 98 NULL, /* fmdo_gc */ 99 etm_send, /* fmdo_send */ 100 }; 101 102 static const fmd_prop_t fmd_props[] = { 103 { ETM_PROP_NM_XPORT_ADDRS, FMD_TYPE_STRING, "" }, 104 { ETM_PROP_NM_DEBUG_LVL, FMD_TYPE_INT32, "0" }, 105 { ETM_PROP_NM_DEBUG_MAX_EV_CNT, FMD_TYPE_INT32, "-1" }, 106 { ETM_PROP_NM_CONSOLE, FMD_TYPE_BOOL, "false" }, 107 { ETM_PROP_NM_SYSLOGD, FMD_TYPE_BOOL, "true" }, 108 { ETM_PROP_NM_FACILITY, FMD_TYPE_STRING, "LOG_DAEMON" }, 109 { ETM_PROP_NM_MAX_RESP_Q_LEN, FMD_TYPE_UINT32, "512" }, 110 { ETM_PROP_NM_BAD_ACC_TO_SEC, FMD_TYPE_UINT32, "1" }, 111 { ETM_PROP_NM_FMA_RESP_WAIT_TIME, FMD_TYPE_INT32, "240" }, 112 { NULL, 0, NULL } 113 }; 114 115 116 static const fmd_hdl_info_t fmd_info = { 117 "FMA Event Transport Module", "1.2", &fmd_ops, fmd_props 118 }; 119 120 /* 121 * ----------------------- private consts and defns -------------------------- 122 */ 123 124 /* misc buffer for variable sized protocol header fields */ 125 126 #define ETM_MISC_BUF_SZ (4 * 1024) 127 128 static uint32_t 129 etm_ldom_type = LDOM_TYPE_LEGACY; 130 131 /* try limit for IO operations w/ capped exp backoff sleep on retry */ 132 133 /* 134 * Design_Note: ETM will potentially retry forever IO operations that the 135 * transport fails with EAGAIN (aka EWOULDBLOCK) rather than 136 * giving up after some number of seconds. This avoids 137 * dropping FMA events while the service processor is down, 138 * but at the risk of pending fmdo_recv() forever and 139 * overflowing FMD's event queue for ETM. 140 * A future TBD enhancement would be to always recv 141 * and send each ETM msg in a single read/write() to reduce 142 * the risk of failure between ETM msg hdr and body, 143 * assuming the MTU_SZ is large enough. 144 */ 145 146 #define ETM_TRY_MAX_CNT (MAXINT - 1) 147 #define ETM_TRY_BACKOFF_RATE (4) 148 #define ETM_TRY_BACKOFF_CAP (60) 149 150 /* amount to increment protocol transaction id on each new send */ 151 152 #define ETM_XID_INC (2) 153 154 typedef struct etm_resp_q_ele { 155 156 etm_xport_conn_t rqe_conn; /* open connection to send on */ 157 etm_proto_v1_pp_t *rqe_hdrp; /* ptr to ETM msg hdr */ 158 size_t rqe_hdr_sz; /* sizeof ETM msg hdr */ 159 int32_t rqe_resp_code; /* response code to send */ 160 161 struct etm_resp_q_ele *rqe_nextp; /* PRIVATE - next ele ptr */ 162 163 } etm_resp_q_ele_t; /* responder queue element */ 164 165 /* 166 * ---------------------------- global data ---------------------------------- 167 */ 168 169 static fmd_hdl_t 170 *init_hdl = NULL; /* used in mem allocator and several other places */ 171 172 static int 173 etm_debug_lvl = 0; /* debug level: 0 is off, 1 is on, 2 is more, etc */ 174 175 static int 176 etm_debug_max_ev_cnt = -1; /* max allowed event count for debugging */ 177 178 static fmd_xprt_t 179 *etm_fmd_xprt = NULL; /* FMD transport layer handle */ 180 181 static pthread_t 182 etm_svr_tid = NULL; /* thread id of connection acceptance server */ 183 184 static pthread_t 185 etm_resp_tid = NULL; /* thread id of msg responder */ 186 187 static etm_resp_q_ele_t 188 *etm_resp_q_head = NULL; /* ptr to cur head of responder queue */ 189 190 static etm_resp_q_ele_t 191 *etm_resp_q_tail = NULL; /* ptr to cur tail of responder queue */ 192 193 static uint32_t 194 etm_resp_q_cur_len = 0; /* cur length (ele cnt) of responder queue */ 195 196 static uint32_t 197 etm_resp_q_max_len = 0; /* max length (ele cnt) of responder queue */ 198 199 static uint32_t 200 etm_bad_acc_to_sec = 0; /* sleep timeout (in sec) after bad conn accept */ 201 202 static pthread_mutex_t 203 etm_resp_q_lock = PTHREAD_MUTEX_INITIALIZER; /* protects responder queue */ 204 205 static pthread_cond_t 206 etm_resp_q_cv = PTHREAD_COND_INITIALIZER; /* nudges msg responder */ 207 208 static volatile int 209 etm_is_dying = 0; /* bool for dying (killing self) */ 210 211 static uint32_t 212 etm_xid_cur = 0; /* current transaction id for sends */ 213 214 static uint32_t 215 etm_xid_ping = 0; /* xid of last CONTROL msg sent requesting ping */ 216 217 static uint32_t 218 etm_xid_ver_negot = 0; /* xid of last CONTROL msg sent requesting ver negot */ 219 220 static uint32_t 221 etm_xid_posted_logged_ev = 0; 222 /* xid of last FMA_EVENT msg/event posted OK to FMD */ 223 224 static uint32_t 225 etm_xid_posted_sa = 0; /* xid of last ALERT msg/event posted OK to syslog */ 226 227 static uint8_t 228 etm_resp_ver = ETM_PROTO_V1; /* proto ver [negotiated] for msg sends */ 229 230 static uint32_t 231 etm_fma_resp_wait_time = 30; /* time (sec) wait for fma event resp */ 232 233 static pthread_mutex_t 234 etm_write_lock = PTHREAD_MUTEX_INITIALIZER; /* for write operations */ 235 236 static log_ctl_t syslog_ctl; /* log(7D) meta-data for each msg */ 237 static int syslog_facility; /* log(7D) facility (part of priority) */ 238 static int syslog_logfd = -1; /* log(7D) file descriptor */ 239 static int syslog_msgfd = -1; /* sysmsg(7D) file descriptor */ 240 static int syslog_file = 0; /* log to syslog_logfd */ 241 static int syslog_cons = 0; /* log to syslog_msgfd */ 242 243 static const struct facility { 244 const char *fac_name; 245 int fac_value; 246 } syslog_facs[] = { 247 { "LOG_DAEMON", LOG_DAEMON }, 248 { "LOG_LOCAL0", LOG_LOCAL0 }, 249 { "LOG_LOCAL1", LOG_LOCAL1 }, 250 { "LOG_LOCAL2", LOG_LOCAL2 }, 251 { "LOG_LOCAL3", LOG_LOCAL3 }, 252 { "LOG_LOCAL4", LOG_LOCAL4 }, 253 { "LOG_LOCAL5", LOG_LOCAL5 }, 254 { "LOG_LOCAL6", LOG_LOCAL6 }, 255 { "LOG_LOCAL7", LOG_LOCAL7 }, 256 { NULL, 0 } 257 }; 258 259 static struct stats { 260 261 /* ETM msg counters */ 262 263 fmd_stat_t etm_rd_hdr_fmaevent; 264 fmd_stat_t etm_rd_hdr_control; 265 fmd_stat_t etm_rd_hdr_alert; 266 fmd_stat_t etm_rd_hdr_response; 267 fmd_stat_t etm_rd_body_fmaevent; 268 fmd_stat_t etm_rd_body_control; 269 fmd_stat_t etm_rd_body_alert; 270 fmd_stat_t etm_rd_body_response; 271 fmd_stat_t etm_wr_hdr_fmaevent; 272 fmd_stat_t etm_wr_hdr_control; 273 fmd_stat_t etm_wr_hdr_response; 274 fmd_stat_t etm_wr_body_fmaevent; 275 fmd_stat_t etm_wr_body_control; 276 fmd_stat_t etm_wr_body_response; 277 278 fmd_stat_t etm_rd_max_ev_per_msg; 279 fmd_stat_t etm_wr_max_ev_per_msg; 280 281 fmd_stat_t etm_resp_q_cur_len; 282 fmd_stat_t etm_resp_q_max_len; 283 284 /* ETM byte counters */ 285 286 fmd_stat_t etm_wr_fmd_bytes; 287 fmd_stat_t etm_rd_fmd_bytes; 288 fmd_stat_t etm_wr_xport_bytes; 289 fmd_stat_t etm_rd_xport_bytes; 290 291 fmd_stat_t etm_magic_drop_bytes; 292 293 /* ETM [dropped] FMA event counters */ 294 295 fmd_stat_t etm_rd_fmd_fmaevent; 296 fmd_stat_t etm_wr_fmd_fmaevent; 297 298 fmd_stat_t etm_rd_drop_fmaevent; 299 fmd_stat_t etm_wr_drop_fmaevent; 300 301 fmd_stat_t etm_rd_dup_fmaevent; 302 fmd_stat_t etm_wr_dup_fmaevent; 303 304 fmd_stat_t etm_rd_dup_alert; 305 fmd_stat_t etm_wr_dup_alert; 306 307 fmd_stat_t etm_enq_drop_resp_q; 308 fmd_stat_t etm_deq_drop_resp_q; 309 310 /* ETM protocol failures */ 311 312 fmd_stat_t etm_magic_bad; 313 fmd_stat_t etm_ver_bad; 314 fmd_stat_t etm_msgtype_bad; 315 fmd_stat_t etm_subtype_bad; 316 fmd_stat_t etm_xid_bad; 317 fmd_stat_t etm_fmaeventlen_bad; 318 fmd_stat_t etm_respcode_bad; 319 fmd_stat_t etm_timeout_bad; 320 fmd_stat_t etm_evlens_bad; 321 322 /* IO operation failures */ 323 324 fmd_stat_t etm_xport_wr_fail; 325 fmd_stat_t etm_xport_rd_fail; 326 fmd_stat_t etm_xport_pk_fail; 327 328 /* IO operation retries */ 329 330 fmd_stat_t etm_xport_wr_retry; 331 fmd_stat_t etm_xport_rd_retry; 332 fmd_stat_t etm_xport_pk_retry; 333 334 /* system and library failures */ 335 336 fmd_stat_t etm_os_nvlist_pack_fail; 337 fmd_stat_t etm_os_nvlist_unpack_fail; 338 fmd_stat_t etm_os_nvlist_size_fail; 339 fmd_stat_t etm_os_pthread_create_fail; 340 341 /* xport API failures */ 342 343 fmd_stat_t etm_xport_get_ev_addrv_fail; 344 fmd_stat_t etm_xport_open_fail; 345 fmd_stat_t etm_xport_close_fail; 346 fmd_stat_t etm_xport_accept_fail; 347 fmd_stat_t etm_xport_open_retry; 348 349 /* FMD entry point bad arguments */ 350 351 fmd_stat_t etm_fmd_init_badargs; 352 fmd_stat_t etm_fmd_fini_badargs; 353 354 /* Alert logging errors */ 355 356 fmd_stat_t etm_log_err; 357 fmd_stat_t etm_msg_err; 358 359 /* miscellaneous stats */ 360 361 fmd_stat_t etm_reset_xport; 362 363 } etm_stats = { 364 365 /* ETM msg counters */ 366 367 { "etm_rd_hdr_fmaevent", FMD_TYPE_UINT64, 368 "ETM fmaevent msg headers rcvd from xport" }, 369 { "etm_rd_hdr_control", FMD_TYPE_UINT64, 370 "ETM control msg headers rcvd from xport" }, 371 { "etm_rd_hdr_alert", FMD_TYPE_UINT64, 372 "ETM alert msg headers rcvd from xport" }, 373 { "etm_rd_hdr_response", FMD_TYPE_UINT64, 374 "ETM response msg headers rcvd from xport" }, 375 { "etm_rd_body_fmaevent", FMD_TYPE_UINT64, 376 "ETM fmaevent msg bodies rcvd from xport" }, 377 { "etm_rd_body_control", FMD_TYPE_UINT64, 378 "ETM control msg bodies rcvd from xport" }, 379 { "etm_rd_body_alert", FMD_TYPE_UINT64, 380 "ETM alert msg bodies rcvd from xport" }, 381 { "etm_rd_body_response", FMD_TYPE_UINT64, 382 "ETM response msg bodies rcvd from xport" }, 383 { "etm_wr_hdr_fmaevent", FMD_TYPE_UINT64, 384 "ETM fmaevent msg headers sent to xport" }, 385 { "etm_wr_hdr_control", FMD_TYPE_UINT64, 386 "ETM control msg headers sent to xport" }, 387 { "etm_wr_hdr_response", FMD_TYPE_UINT64, 388 "ETM response msg headers sent to xport" }, 389 { "etm_wr_body_fmaevent", FMD_TYPE_UINT64, 390 "ETM fmaevent msg bodies sent to xport" }, 391 { "etm_wr_body_control", FMD_TYPE_UINT64, 392 "ETM control msg bodies sent to xport" }, 393 { "etm_wr_body_response", FMD_TYPE_UINT64, 394 "ETM response msg bodies sent to xport" }, 395 396 { "etm_rd_max_ev_per_msg", FMD_TYPE_UINT64, 397 "max FMA events per ETM msg from xport" }, 398 { "etm_wr_max_ev_per_msg", FMD_TYPE_UINT64, 399 "max FMA events per ETM msg to xport" }, 400 401 { "etm_resp_q_cur_len", FMD_TYPE_UINT64, 402 "cur enqueued response msgs to xport" }, 403 { "etm_resp_q_max_len", FMD_TYPE_UINT64, 404 "max enqueable response msgs to xport" }, 405 406 /* ETM byte counters */ 407 408 { "etm_wr_fmd_bytes", FMD_TYPE_UINT64, 409 "bytes of FMA events sent to FMD" }, 410 { "etm_rd_fmd_bytes", FMD_TYPE_UINT64, 411 "bytes of FMA events rcvd from FMD" }, 412 { "etm_wr_xport_bytes", FMD_TYPE_UINT64, 413 "bytes of FMA events sent to xport" }, 414 { "etm_rd_xport_bytes", FMD_TYPE_UINT64, 415 "bytes of FMA events rcvd from xport" }, 416 417 { "etm_magic_drop_bytes", FMD_TYPE_UINT64, 418 "bytes dropped from xport pre magic num" }, 419 420 /* ETM [dropped] FMA event counters */ 421 422 { "etm_rd_fmd_fmaevent", FMD_TYPE_UINT64, 423 "FMA events rcvd from FMD" }, 424 { "etm_wr_fmd_fmaevent", FMD_TYPE_UINT64, 425 "FMA events sent to FMD" }, 426 427 { "etm_rd_drop_fmaevent", FMD_TYPE_UINT64, 428 "dropped FMA events from xport" }, 429 { "etm_wr_drop_fmaevent", FMD_TYPE_UINT64, 430 "dropped FMA events to xport" }, 431 432 { "etm_rd_dup_fmaevent", FMD_TYPE_UINT64, 433 "duplicate FMA events rcvd from xport" }, 434 { "etm_wr_dup_fmaevent", FMD_TYPE_UINT64, 435 "duplicate FMA events sent to xport" }, 436 437 { "etm_rd_dup_alert", FMD_TYPE_UINT64, 438 "duplicate ALERTs rcvd from xport" }, 439 { "etm_wr_dup_alert", FMD_TYPE_UINT64, 440 "duplicate ALERTs sent to xport" }, 441 442 { "etm_enq_drop_resp_q", FMD_TYPE_UINT64, 443 "dropped response msgs on enq" }, 444 { "etm_deq_drop_resp_q", FMD_TYPE_UINT64, 445 "dropped response msgs on deq" }, 446 447 /* ETM protocol failures */ 448 449 { "etm_magic_bad", FMD_TYPE_UINT64, 450 "ETM msgs w/ invalid magic num" }, 451 { "etm_ver_bad", FMD_TYPE_UINT64, 452 "ETM msgs w/ invalid protocol version" }, 453 { "etm_msgtype_bad", FMD_TYPE_UINT64, 454 "ETM msgs w/ invalid message type" }, 455 { "etm_subtype_bad", FMD_TYPE_UINT64, 456 "ETM msgs w/ invalid sub type" }, 457 { "etm_xid_bad", FMD_TYPE_UINT64, 458 "ETM msgs w/ unmatched xid" }, 459 { "etm_fmaeventlen_bad", FMD_TYPE_UINT64, 460 "ETM msgs w/ invalid FMA event length" }, 461 { "etm_respcode_bad", FMD_TYPE_UINT64, 462 "ETM msgs w/ invalid response code" }, 463 { "etm_timeout_bad", FMD_TYPE_UINT64, 464 "ETM msgs w/ invalid timeout value" }, 465 { "etm_evlens_bad", FMD_TYPE_UINT64, 466 "ETM msgs w/ too many event lengths" }, 467 468 /* IO operation failures */ 469 470 { "etm_xport_wr_fail", FMD_TYPE_UINT64, 471 "xport write failures" }, 472 { "etm_xport_rd_fail", FMD_TYPE_UINT64, 473 "xport read failures" }, 474 { "etm_xport_pk_fail", FMD_TYPE_UINT64, 475 "xport peek failures" }, 476 477 /* IO operation retries */ 478 479 { "etm_xport_wr_retry", FMD_TYPE_UINT64, 480 "xport write retries" }, 481 { "etm_xport_rd_retry", FMD_TYPE_UINT64, 482 "xport read retries" }, 483 { "etm_xport_pk_retry", FMD_TYPE_UINT64, 484 "xport peek retries" }, 485 486 /* system and library failures */ 487 488 { "etm_os_nvlist_pack_fail", FMD_TYPE_UINT64, 489 "nvlist_pack failures" }, 490 { "etm_os_nvlist_unpack_fail", FMD_TYPE_UINT64, 491 "nvlist_unpack failures" }, 492 { "etm_os_nvlist_size_fail", FMD_TYPE_UINT64, 493 "nvlist_size failures" }, 494 { "etm_os_pthread_create_fail", FMD_TYPE_UINT64, 495 "pthread_create failures" }, 496 497 /* transport API failures */ 498 499 { "etm_xport_get_ev_addrv_fail", FMD_TYPE_UINT64, 500 "xport get event addrv API failures" }, 501 { "etm_xport_open_fail", FMD_TYPE_UINT64, 502 "xport open API failures" }, 503 { "etm_xport_close_fail", FMD_TYPE_UINT64, 504 "xport close API failures" }, 505 { "etm_xport_accept_fail", FMD_TYPE_UINT64, 506 "xport accept API failures" }, 507 { "etm_xport_open_retry", FMD_TYPE_UINT64, 508 "xport open API retries" }, 509 510 /* FMD entry point bad arguments */ 511 512 { "etm_fmd_init_badargs", FMD_TYPE_UINT64, 513 "bad arguments from fmd_init entry point" }, 514 { "etm_fmd_fini_badargs", FMD_TYPE_UINT64, 515 "bad arguments from fmd_fini entry point" }, 516 517 /* Alert logging errors */ 518 519 { "etm_log_err", FMD_TYPE_UINT64, 520 "failed to log message to log(7D)" }, 521 { "etm_msg_err", FMD_TYPE_UINT64, 522 "failed to log message to sysmsg(7D)" }, 523 524 /* miscellaneous stats */ 525 526 { "etm_reset_xport", FMD_TYPE_UINT64, 527 "xport resets after xport API failure" } 528 }; 529 530 531 /* 532 * -------------------- global data for Root ldom------------------------- 533 */ 534 535 ldom_hdl_t 536 *etm_lhp = NULL; /* ldom pointer */ 537 538 static void *etm_dl_hdl = (void *)NULL; 539 static const char *etm_dl_path = "libds.so.1"; 540 static int etm_dl_mode = (RTLD_NOW | RTLD_LOCAL); 541 542 static int(*etm_ds_svc_reg)(ds_capability_t *cap, ds_ops_t *ops) = 543 (int (*)(ds_capability_t *cap, ds_ops_t *ops))NULL; 544 static int(*etm_ds_clnt_reg)(ds_capability_t *cap, ds_ops_t *ops) = 545 (int (*)(ds_capability_t *cap, ds_ops_t *ops))NULL; 546 static int(*etm_ds_send_msg)(ds_hdl_t hdl, void *buf, size_t buflen) = 547 (int (*)(ds_hdl_t hdl, void *buf, size_t buflen))NULL; 548 static int(*etm_ds_recv_msg)(ds_hdl_t hdl, void *buf, size_t buflen, 549 size_t *msglen) = 550 (int (*)(ds_hdl_t hdl, void *buf, size_t buflen, size_t *msglen))NULL; 551 static int (*etm_ds_fini)(void) = (int (*)(void))NULL; 552 553 static pthread_mutex_t 554 iosvc_list_lock = PTHREAD_MUTEX_INITIALIZER; 555 556 static pthread_t 557 etm_async_e_tid = NULL; /* thread id of io svc async event handler */ 558 559 static etm_proto_v1_ev_hdr_t iosvc_hdr = { 560 ETM_PROTO_MAGIC_NUM, /* magic number */ 561 ETM_PROTO_V1, /* default to V1, not checked */ 562 ETM_MSG_TYPE_FMA_EVENT, /* Root Domain inteoduces only FMA events */ 563 0, /* sub-type */ 564 0, /* pad */ 565 0, /* add the xid at the Q send time */ 566 ETM_PROTO_V1_TIMEOUT_NONE, 567 0 /* ev_lens, 0-termed, after 1 FMA event */ 568 }; 569 570 /* 571 * static iosvc_list 572 */ 573 static etm_iosvc_t iosvc_list[NUM_OF_ROOT_DOMAINS] = { 574 {"", 0}, {"", 0}, {"", 0}, {"", 0}, {"", 0}, {"", 0}, 575 {"", 0}, {"", 0} 576 }; 577 578 static etm_iosvc_t io_svc = { 579 "\0", /* ldom_name */ 580 PTHREAD_COND_INITIALIZER, /* nudges */ 581 PTHREAD_MUTEX_INITIALIZER, /* protects the iosvc msg Q */ 582 NULL, /* iosvc msg Q head */ 583 NULL, /* iosvc msg Q tail */ 584 0, /* msg Q current length */ 585 100, /* msg Q max length */ 586 0, /* current transaction id */ 587 0, /* xid of last event posted to FMD */ 588 DS_INVALID_HDL, /* DS handle */ 589 NULL, /* fmd xprt handle */ 590 NULL, /* tid 4 send to remote RootDomain */ 591 NULL, /* tid 4 recv from remote RootDomain */ 592 PTHREAD_COND_INITIALIZER, /* nudges etm_send_to_remote_root */ 593 PTHREAD_MUTEX_INITIALIZER, /* protects msg_ack_cv */ 594 0, /* send/recv threads are not dying */ 595 0, /* flag for start sending msg Q */ 596 0 /* indicate if the ACK has come */ 597 }; 598 etm_iosvc_t *io_svc_p = &io_svc; 599 600 601 static uint32_t 602 flags; /* flags for fmd_xprt_open */ 603 604 static etm_async_event_ele_t 605 async_event_q[ASYNC_EVENT_Q_SIZE]; /* holds the async events */ 606 607 static uint32_t 608 etm_async_q_head = 0; /* ptr to cur head of async event queue */ 609 610 static uint32_t 611 etm_async_q_tail = 0; /* ptr to cur tail of async event queue */ 612 613 static uint32_t 614 etm_async_q_cur_len = 0; /* cur length (ele cnt) of async event queue */ 615 616 static uint32_t 617 etm_async_q_max_len = ASYNC_EVENT_Q_SIZE; 618 /* max length (ele cnt) of async event queue */ 619 620 static pthread_cond_t 621 etm_async_event_q_cv = PTHREAD_COND_INITIALIZER; 622 /* nudges async event handler */ 623 624 static pthread_mutex_t 625 etm_async_event_q_lock = PTHREAD_MUTEX_INITIALIZER; 626 /* protects async event q */ 627 628 static ds_ver_t 629 etm_iosvc_vers[] = { { 1, 0} }; 630 631 #define ETM_NVERS (sizeof (etm_iosvc_vers) / sizeof (ds_ver_t)) 632 633 static ds_capability_t 634 iosvc_caps = { 635 "ETM", /* svc_id */ 636 etm_iosvc_vers, /* vers */ 637 ETM_NVERS /* number of vers */ 638 }; 639 640 static void 641 etm_iosvc_reg_handler(ds_hdl_t hdl, ds_cb_arg_t arg, ds_ver_t *ver, 642 ds_domain_hdl_t did); 643 644 static void 645 etm_iosvc_unreg_handler(ds_hdl_t hdl, ds_cb_arg_t arg); 646 647 static ds_ops_t 648 iosvc_ops = { 649 etm_iosvc_reg_handler, /* ds_reg_cb */ 650 etm_iosvc_unreg_handler, /* ds_unreg_cb */ 651 NULL, /* ds_data_cb */ 652 NULL /* cb_arg */ 653 }; 654 655 656 /* 657 * -------------------------- support functions ------------------------------ 658 */ 659 660 /* 661 * Design_Note: Each failure worth reporting to FMD should be done using 662 * a single call to fmd_hdl_error() as it logs an FMA event 663 * for each call. Also be aware that all the fmd_hdl_*() 664 * format strings currently use platform specific *printf() 665 * routines; so "%p" under Solaris does not prepend "0x" to 666 * the outputted hex digits, while Linux and VxWorks do. 667 */ 668 669 670 /* 671 * etm_show_time - display the current time of day (for debugging) using 672 * the given FMD module handle and annotation string 673 */ 674 675 static void 676 etm_show_time(fmd_hdl_t *hdl, char *note_str) 677 { 678 struct timeval tmv; /* timeval */ 679 680 (void) gettimeofday(&tmv, NULL); 681 fmd_hdl_debug(hdl, "info: %s: cur Unix Epoch time %d.%06d\n", 682 note_str, tmv.tv_sec, tmv.tv_usec); 683 684 } /* etm_show_time() */ 685 686 /* 687 * etm_hexdump - hexdump the given buffer (for debugging) using 688 * the given FMD module handle 689 */ 690 691 static void 692 etm_hexdump(fmd_hdl_t *hdl, void *buf, size_t byte_cnt) 693 { 694 uint8_t *bp; /* byte ptr */ 695 int i, j; /* index */ 696 char cb[80]; /* char buf */ 697 unsigned int n; /* a byte of data for sprintf() */ 698 699 bp = buf; 700 j = 0; 701 702 /* 703 * Design_Note: fmd_hdl_debug() auto adds a newline if missing; 704 * hence cb exists to accumulate a longer string. 705 */ 706 707 for (i = 1; i <= byte_cnt; i++) { 708 n = *bp++; 709 (void) sprintf(&cb[j], "%2.2x ", n); 710 j += 3; 711 /* add a newline every 16 bytes or at the buffer's end */ 712 if (((i % 16) == 0) || (i >= byte_cnt)) { 713 cb[j-1] = '\0'; 714 fmd_hdl_debug(hdl, "%s\n", cb); 715 j = 0; 716 } 717 } /* for each byte in the buffer */ 718 719 } /* etm_hexdump() */ 720 721 /* 722 * etm_sleep - sleep the caller for the given number of seconds, 723 * return 0 or -errno value 724 * 725 * Design_Note: To avoid interfering with FMD's signal mask (SIGALRM) 726 * do not use [Solaris] sleep(3C) and instead use 727 * pthread_cond_wait() or nanosleep(), both of which 728 * are POSIX spec-ed to leave signal masks alone. 729 * This is needed for Solaris and Linux (domain and SP). 730 */ 731 732 static int 733 etm_sleep(unsigned sleep_sec) 734 { 735 struct timespec tms; /* for nanosleep() */ 736 737 tms.tv_sec = sleep_sec; 738 tms.tv_nsec = 0; 739 740 if (nanosleep(&tms, NULL) < 0) { 741 /* errno assumed set by above call */ 742 return (-errno); 743 } 744 return (0); 745 746 } /* etm_sleep() */ 747 748 /* 749 * etm_conn_open - open a connection to the given transport address, 750 * return 0 and the opened connection handle 751 * or -errno value 752 * 753 * caveats: the err_substr is used in failure cases for calling 754 * fmd_hdl_error() 755 */ 756 757 static int 758 etm_conn_open(fmd_hdl_t *hdl, char *err_substr, 759 etm_xport_addr_t addr, etm_xport_conn_t *connp) 760 { 761 etm_xport_conn_t conn; /* connection to return */ 762 int nev; /* -errno value */ 763 764 if ((conn = etm_xport_open(hdl, addr)) == NULL) { 765 nev = (-errno); 766 fmd_hdl_error(hdl, "error: %s: errno %d\n", 767 err_substr, errno); 768 etm_stats.etm_xport_open_fail.fmds_value.ui64++; 769 return (nev); 770 } else { 771 *connp = conn; 772 return (0); 773 } 774 } /* etm_conn_open() */ 775 776 /* 777 * etm_conn_close - close the given connection, 778 * return 0 or -errno value 779 * 780 * caveats: the err_substr is used in failure cases for calling 781 * fmd_hdl_error() 782 */ 783 784 static int 785 etm_conn_close(fmd_hdl_t *hdl, char *err_substr, etm_xport_conn_t conn) 786 { 787 int nev; /* -errno value */ 788 789 if (etm_xport_close(hdl, conn) == NULL) { 790 nev = (-errno); 791 fmd_hdl_error(hdl, "warning: %s: errno %d\n", 792 err_substr, errno); 793 etm_stats.etm_xport_close_fail.fmds_value.ui64++; 794 return (nev); 795 } else { 796 return (0); 797 } 798 } /* etm_conn_close() */ 799 800 /* 801 * etm_io_op - perform an IO operation on the given connection 802 * with the given buffer, 803 * accommodating MTU size and retrying op if needed, 804 * return how many bytes actually done by the op 805 * or -errno value 806 * 807 * caveats: the err_substr is used in failure cases for calling 808 * fmd_hdl_error() 809 */ 810 811 static ssize_t 812 etm_io_op(fmd_hdl_t *hdl, char *err_substr, etm_xport_conn_t conn, 813 void *buf, size_t byte_cnt, int io_op) 814 { 815 ssize_t rv; /* ret val / byte count */ 816 ssize_t n; /* gen use */ 817 uint8_t *datap; /* ptr to data */ 818 size_t mtu_sz; /* MTU size in bytes */ 819 int (*io_func_ptr)(fmd_hdl_t *, etm_xport_conn_t, 820 void *, size_t); 821 size_t io_sz; /* byte count for io_func_ptr */ 822 int try_cnt; /* number of tries done */ 823 int sleep_sec; /* exp backoff sleep period in sec */ 824 int sleep_rv; /* ret val from sleeping */ 825 fmd_stat_t io_retry_stat; /* IO retry stat to update */ 826 fmd_stat_t io_fail_stat; /* IO failure stat to update */ 827 828 if ((conn == NULL) || (buf == NULL)) { 829 return (-EINVAL); 830 } 831 switch (io_op) { 832 case ETM_IO_OP_RD: 833 io_func_ptr = etm_xport_read; 834 io_retry_stat = etm_stats.etm_xport_rd_retry; 835 io_fail_stat = etm_stats.etm_xport_rd_fail; 836 break; 837 case ETM_IO_OP_WR: 838 io_func_ptr = etm_xport_write; 839 io_retry_stat = etm_stats.etm_xport_wr_retry; 840 io_fail_stat = etm_stats.etm_xport_wr_fail; 841 break; 842 default: 843 return (-EINVAL); 844 } 845 if (byte_cnt == 0) { 846 return (byte_cnt); /* nop */ 847 } 848 849 /* obtain [current] MTU size */ 850 851 if ((n = etm_xport_get_opt(hdl, conn, ETM_XPORT_OPT_MTU_SZ)) < 0) { 852 mtu_sz = ETM_XPORT_MTU_SZ_DEF; 853 } else { 854 mtu_sz = n; 855 } 856 857 /* loop until all IO done, try limit exceeded, or real failure */ 858 859 rv = 0; 860 datap = buf; 861 while (rv < byte_cnt) { 862 io_sz = MIN((byte_cnt - rv), mtu_sz); 863 try_cnt = 0; 864 sleep_sec = 0; 865 866 /* when give up, return -errno value even if partly done */ 867 868 while ((n = (*io_func_ptr)(hdl, conn, datap, io_sz)) == 869 (-EAGAIN)) { 870 try_cnt++; 871 if (try_cnt > ETM_TRY_MAX_CNT) { 872 rv = n; 873 goto func_ret; 874 } 875 if (etm_is_dying) { 876 rv = (-EINTR); 877 goto func_ret; 878 } 879 if ((sleep_rv = etm_sleep(sleep_sec)) < 0) { 880 rv = sleep_rv; 881 goto func_ret; 882 } 883 sleep_sec = ((sleep_sec == 0) ? 1 : 884 (sleep_sec * ETM_TRY_BACKOFF_RATE)); 885 sleep_sec = MIN(sleep_sec, ETM_TRY_BACKOFF_CAP); 886 io_retry_stat.fmds_value.ui64++; 887 if (etm_debug_lvl >= 1) { 888 fmd_hdl_debug(hdl, "info: retrying io op %d " 889 "due to EAGAIN\n", io_op); 890 } 891 } /* while trying the io operation */ 892 893 if (etm_is_dying) { 894 rv = (-EINTR); 895 goto func_ret; 896 } 897 if (n < 0) { 898 rv = n; 899 goto func_ret; 900 } 901 /* avoid spinning CPU when given 0 bytes but no error */ 902 if (n == 0) { 903 if ((sleep_rv = etm_sleep(ETM_SLEEP_QUIK)) < 0) { 904 rv = sleep_rv; 905 goto func_ret; 906 } 907 } 908 rv += n; 909 datap += n; 910 } /* while still have more data */ 911 912 func_ret: 913 914 if (rv < 0) { 915 io_fail_stat.fmds_value.ui64++; 916 fmd_hdl_debug(hdl, "error: %s: errno %d\n", 917 err_substr, (int)(-rv)); 918 } 919 if (etm_debug_lvl >= 3) { 920 fmd_hdl_debug(hdl, "info: io op %d ret %d of %d\n", 921 io_op, (int)rv, (int)byte_cnt); 922 } 923 return (rv); 924 925 } /* etm_io_op() */ 926 927 /* 928 * etm_magic_read - read the magic number of an ETM message header 929 * from the given connection into the given buffer, 930 * return 0 or -errno value 931 * 932 * Design_Note: This routine is intended to help protect ETM from protocol 933 * framing errors as might be caused by an SP reset / crash in 934 * the middle of an ETM message send; the connection will be 935 * read from for as many bytes as needed until the magic number 936 * is found using a sliding buffer for comparisons. 937 */ 938 939 static int 940 etm_magic_read(fmd_hdl_t *hdl, etm_xport_conn_t conn, uint32_t *magic_ptr) 941 { 942 int rv; /* ret val */ 943 uint32_t magic_num; /* magic number */ 944 int byte_cnt; /* count of bytes read */ 945 uint8_t buf5[4+1]; /* sliding input buffer */ 946 int i, j; /* indices into buf5 */ 947 ssize_t n; /* gen use */ 948 uint8_t drop_buf[1024]; /* dropped bytes buffer */ 949 950 rv = 0; /* assume success */ 951 magic_num = 0; 952 byte_cnt = 0; 953 j = 0; 954 955 /* magic number bytes are sent in network (big endian) order */ 956 957 while (magic_num != ETM_PROTO_MAGIC_NUM) { 958 if ((n = etm_io_op(hdl, "bad io read on magic", 959 conn, &buf5[j], 1, ETM_IO_OP_RD)) < 0) { 960 rv = n; 961 goto func_ret; 962 } 963 byte_cnt++; 964 j = MIN((j + 1), sizeof (magic_num)); 965 if (byte_cnt < sizeof (magic_num)) { 966 continue; 967 } 968 969 if (byte_cnt > sizeof (magic_num)) { 970 etm_stats.etm_magic_drop_bytes.fmds_value.ui64++; 971 i = MIN(byte_cnt - j - 1, sizeof (drop_buf) - 1); 972 drop_buf[i] = buf5[0]; 973 for (i = 0; i < j; i++) { 974 buf5[i] = buf5[i+1]; 975 } /* for sliding the buffer contents */ 976 } 977 (void) memcpy(&magic_num, &buf5[0], sizeof (magic_num)); 978 magic_num = ntohl(magic_num); 979 } /* for reading bytes until find magic number */ 980 981 func_ret: 982 983 if (byte_cnt != sizeof (magic_num)) { 984 fmd_hdl_debug(hdl, "warning: bad proto frame " 985 "implies corrupt/lost msg(s)\n"); 986 } 987 if ((byte_cnt > sizeof (magic_num)) && (etm_debug_lvl >= 2)) { 988 i = MIN(byte_cnt - sizeof (magic_num), sizeof (drop_buf)); 989 fmd_hdl_debug(hdl, "info: magic drop hexdump " 990 "first %d of %d bytes:\n", i, 991 byte_cnt - sizeof (magic_num)); 992 etm_hexdump(hdl, drop_buf, i); 993 } 994 995 if (rv == 0) { 996 *magic_ptr = magic_num; 997 } 998 return (rv); 999 1000 } /* etm_magic_read() */ 1001 1002 /* 1003 * etm_hdr_read - allocate, read, and validate a [variable sized] 1004 * ETM message header from the given connection, 1005 * return the allocated ETM message header 1006 * (which is guaranteed to be large enough to reuse as a 1007 * RESPONSE msg hdr) and its size 1008 * or NULL and set errno on failure 1009 */ 1010 1011 static void * 1012 etm_hdr_read(fmd_hdl_t *hdl, etm_xport_conn_t conn, size_t *szp) 1013 { 1014 uint8_t *hdrp; /* ptr to header to return */ 1015 size_t hdr_sz; /* sizeof *hdrp */ 1016 etm_proto_v1_pp_t pp; /* protocol preamble */ 1017 etm_proto_v1_ev_hdr_t *ev_hdrp; /* for FMA_EVENT msg */ 1018 etm_proto_v1_ctl_hdr_t *ctl_hdrp; /* for CONTROL msg */ 1019 etm_proto_v1_resp_hdr_t *resp_hdrp; /* for RESPONSE msg */ 1020 etm_proto_v3_sa_hdr_t *sa_hdrp; /* for ALERT msg */ 1021 uint32_t *lenp; /* ptr to FMA event length */ 1022 ssize_t i, n; /* gen use */ 1023 uint8_t misc_buf[ETM_MISC_BUF_SZ]; /* for var sized hdrs */ 1024 int dummy_int; /* dummy var to appease lint */ 1025 1026 hdrp = NULL; hdr_sz = 0; 1027 1028 /* read the magic number which starts the protocol preamble */ 1029 1030 if ((n = etm_magic_read(hdl, conn, &pp.pp_magic_num)) < 0) { 1031 errno = (-n); 1032 etm_stats.etm_magic_bad.fmds_value.ui64++; 1033 return (NULL); 1034 } 1035 1036 /* read the rest of the protocol preamble all at once */ 1037 1038 if ((n = etm_io_op(hdl, "bad io read on preamble", 1039 conn, &pp.pp_proto_ver, sizeof (pp) - sizeof (pp.pp_magic_num), 1040 ETM_IO_OP_RD)) < 0) { 1041 errno = (-n); 1042 return (NULL); 1043 } 1044 1045 /* 1046 * Design_Note: The magic number was already network decoded; but 1047 * some other preamble fields also need to be decoded, 1048 * specifically pp_xid and pp_timeout. The rest of the 1049 * preamble fields are byte sized and hence need no 1050 * decoding. 1051 */ 1052 1053 pp.pp_xid = ntohl(pp.pp_xid); 1054 pp.pp_timeout = ntohl(pp.pp_timeout); 1055 1056 /* sanity check the header as best we can */ 1057 1058 if ((pp.pp_proto_ver < ETM_PROTO_V1) || 1059 (pp.pp_proto_ver > ETM_PROTO_V3)) { 1060 fmd_hdl_error(hdl, "error: bad proto ver %d\n", 1061 (int)pp.pp_proto_ver); 1062 errno = EPROTO; 1063 etm_stats.etm_ver_bad.fmds_value.ui64++; 1064 return (NULL); 1065 } 1066 1067 dummy_int = pp.pp_msg_type; 1068 if ((dummy_int <= ETM_MSG_TYPE_TOO_LOW) || 1069 (dummy_int >= ETM_MSG_TYPE_TOO_BIG)) { 1070 fmd_hdl_error(hdl, "error: bad msg type %d", dummy_int); 1071 errno = EBADMSG; 1072 etm_stats.etm_msgtype_bad.fmds_value.ui64++; 1073 return (NULL); 1074 } 1075 1076 /* handle [var sized] hdrs for FMA_EVENT, CONTROL, RESPONSE msgs */ 1077 1078 if (pp.pp_msg_type == ETM_MSG_TYPE_FMA_EVENT) { 1079 1080 ev_hdrp = (void*)&misc_buf[0]; 1081 hdr_sz = sizeof (*ev_hdrp); 1082 (void) memcpy(&ev_hdrp->ev_pp, &pp, sizeof (pp)); 1083 1084 /* sanity check the header's timeout */ 1085 1086 if ((ev_hdrp->ev_pp.pp_proto_ver == ETM_PROTO_V1) && 1087 (ev_hdrp->ev_pp.pp_timeout != ETM_PROTO_V1_TIMEOUT_NONE)) { 1088 errno = ETIME; 1089 etm_stats.etm_timeout_bad.fmds_value.ui64++; 1090 return (NULL); 1091 } 1092 1093 /* get all FMA event lengths from the header */ 1094 1095 lenp = (uint32_t *)&ev_hdrp->ev_lens[0]; lenp--; 1096 i = -1; /* cnt of length entries preceding 0 */ 1097 do { 1098 i++; lenp++; 1099 if ((sizeof (*ev_hdrp) + (i * sizeof (*lenp))) >= 1100 ETM_MISC_BUF_SZ) { 1101 errno = E2BIG; /* ridiculous size */ 1102 etm_stats.etm_evlens_bad.fmds_value.ui64++; 1103 return (NULL); 1104 } 1105 if ((n = etm_io_op(hdl, "bad io read on event len", 1106 conn, lenp, sizeof (*lenp), ETM_IO_OP_RD)) < 0) { 1107 errno = (-n); 1108 return (NULL); 1109 } 1110 *lenp = ntohl(*lenp); 1111 1112 } while (*lenp != 0); 1113 i += 0; /* first len already counted by sizeof(ev_hdr) */ 1114 hdr_sz += (i * sizeof (*lenp)); 1115 1116 etm_stats.etm_rd_hdr_fmaevent.fmds_value.ui64++; 1117 1118 } else if (pp.pp_msg_type == ETM_MSG_TYPE_CONTROL) { 1119 1120 ctl_hdrp = (void*)&misc_buf[0]; 1121 hdr_sz = sizeof (*ctl_hdrp); 1122 (void) memcpy(&ctl_hdrp->ctl_pp, &pp, sizeof (pp)); 1123 1124 /* sanity check the header's sub type (control selector) */ 1125 1126 if ((ctl_hdrp->ctl_pp.pp_sub_type <= ETM_CTL_SEL_TOO_LOW) || 1127 (ctl_hdrp->ctl_pp.pp_sub_type >= ETM_CTL_SEL_TOO_BIG)) { 1128 fmd_hdl_error(hdl, "error: bad ctl sub type %d\n", 1129 (int)ctl_hdrp->ctl_pp.pp_sub_type); 1130 errno = EBADMSG; 1131 etm_stats.etm_subtype_bad.fmds_value.ui64++; 1132 return (NULL); 1133 } 1134 1135 /* get the control length */ 1136 1137 if ((n = etm_io_op(hdl, "bad io read on ctl len", 1138 conn, &ctl_hdrp->ctl_len, sizeof (ctl_hdrp->ctl_len), 1139 ETM_IO_OP_RD)) < 0) { 1140 errno = (-n); 1141 return (NULL); 1142 } 1143 1144 ctl_hdrp->ctl_len = ntohl(ctl_hdrp->ctl_len); 1145 1146 etm_stats.etm_rd_hdr_control.fmds_value.ui64++; 1147 1148 } else if (pp.pp_msg_type == ETM_MSG_TYPE_RESPONSE) { 1149 1150 resp_hdrp = (void*)&misc_buf[0]; 1151 hdr_sz = sizeof (*resp_hdrp); 1152 (void) memcpy(&resp_hdrp->resp_pp, &pp, sizeof (pp)); 1153 1154 /* sanity check the header's timeout */ 1155 1156 if (resp_hdrp->resp_pp.pp_timeout != 1157 ETM_PROTO_V1_TIMEOUT_NONE) { 1158 errno = ETIME; 1159 etm_stats.etm_timeout_bad.fmds_value.ui64++; 1160 return (NULL); 1161 } 1162 1163 /* get the response code and length */ 1164 1165 if ((n = etm_io_op(hdl, "bad io read on resp code+len", 1166 conn, &resp_hdrp->resp_code, 1167 sizeof (resp_hdrp->resp_code) 1168 + sizeof (resp_hdrp->resp_len), 1169 ETM_IO_OP_RD)) < 0) { 1170 errno = (-n); 1171 return (NULL); 1172 } 1173 1174 resp_hdrp->resp_code = ntohl(resp_hdrp->resp_code); 1175 resp_hdrp->resp_len = ntohl(resp_hdrp->resp_len); 1176 1177 etm_stats.etm_rd_hdr_response.fmds_value.ui64++; 1178 1179 } else if (pp.pp_msg_type == ETM_MSG_TYPE_ALERT) { 1180 1181 sa_hdrp = (void*)&misc_buf[0]; 1182 hdr_sz = sizeof (*sa_hdrp); 1183 (void) memcpy(&sa_hdrp->sa_pp, &pp, sizeof (pp)); 1184 1185 /* sanity check the header's protocol version */ 1186 1187 if (sa_hdrp->sa_pp.pp_proto_ver != ETM_PROTO_V3) { 1188 errno = EPROTO; 1189 etm_stats.etm_ver_bad.fmds_value.ui64++; 1190 return (NULL); 1191 } 1192 1193 /* get the priority and length */ 1194 1195 if ((n = etm_io_op(hdl, "bad io read on sa priority+len", 1196 conn, &sa_hdrp->sa_priority, 1197 sizeof (sa_hdrp->sa_priority) 1198 + sizeof (sa_hdrp->sa_len), 1199 ETM_IO_OP_RD)) < 0) { 1200 errno = (-n); 1201 return (NULL); 1202 } 1203 1204 sa_hdrp->sa_priority = ntohl(sa_hdrp->sa_priority); 1205 sa_hdrp->sa_len = ntohl(sa_hdrp->sa_len); 1206 1207 etm_stats.etm_rd_hdr_alert.fmds_value.ui64++; 1208 1209 } /* whether we have FMA_EVENT, ALERT, CONTROL, or RESPONSE msg */ 1210 1211 /* 1212 * choose a header size that allows hdr reuse for RESPONSE msgs, 1213 * allocate and populate the message header, and 1214 * return alloc size to caller for later free of hdrp 1215 */ 1216 1217 hdr_sz = MAX(hdr_sz, sizeof (*resp_hdrp)); 1218 hdrp = fmd_hdl_zalloc(hdl, hdr_sz, FMD_SLEEP); 1219 (void) memcpy(hdrp, misc_buf, hdr_sz); 1220 1221 if (etm_debug_lvl >= 3) { 1222 fmd_hdl_debug(hdl, "info: msg hdr hexdump %d bytes:\n", hdr_sz); 1223 etm_hexdump(hdl, hdrp, hdr_sz); 1224 } 1225 *szp = hdr_sz; 1226 return (hdrp); 1227 1228 } /* etm_hdr_read() */ 1229 1230 /* 1231 * etm_hdr_write - create and write a [variable sized] ETM message header 1232 * to the given connection appropriate for the given FMA event 1233 * and type of nvlist encoding, 1234 * return the allocated ETM message header and its size 1235 * or NULL and set errno on failure 1236 */ 1237 1238 static void* 1239 etm_hdr_write(fmd_hdl_t *hdl, etm_xport_conn_t conn, nvlist_t *evp, 1240 int encoding, size_t *szp) 1241 { 1242 etm_proto_v1_ev_hdr_t *hdrp; /* for FMA_EVENT msg */ 1243 size_t hdr_sz; /* sizeof *hdrp */ 1244 uint32_t *lenp; /* ptr to FMA event length */ 1245 size_t evsz; /* packed FMA event size */ 1246 ssize_t n; /* gen use */ 1247 1248 /* allocate and populate the message header for 1 FMA event */ 1249 1250 hdr_sz = sizeof (*hdrp) + (1 * sizeof (hdrp->ev_lens[0])); 1251 1252 hdrp = fmd_hdl_zalloc(hdl, hdr_sz, FMD_SLEEP); 1253 1254 /* 1255 * Design_Note: Although the ETM protocol supports it, we do not (yet) 1256 * want responses/ACKs on FMA events that we send. All 1257 * such messages are sent with ETM_PROTO_V1_TIMEOUT_NONE. 1258 */ 1259 1260 hdrp->ev_pp.pp_magic_num = ETM_PROTO_MAGIC_NUM; 1261 hdrp->ev_pp.pp_magic_num = htonl(hdrp->ev_pp.pp_magic_num); 1262 hdrp->ev_pp.pp_proto_ver = ETM_PROTO_V1; 1263 hdrp->ev_pp.pp_msg_type = ETM_MSG_TYPE_FMA_EVENT; 1264 hdrp->ev_pp.pp_sub_type = 0; 1265 hdrp->ev_pp.pp_rsvd_pad = 0; 1266 hdrp->ev_pp.pp_xid = etm_xid_cur; 1267 hdrp->ev_pp.pp_xid = htonl(hdrp->ev_pp.pp_xid); 1268 etm_xid_cur += ETM_XID_INC; 1269 hdrp->ev_pp.pp_timeout = ETM_PROTO_V1_TIMEOUT_NONE; 1270 hdrp->ev_pp.pp_timeout = htonl(hdrp->ev_pp.pp_timeout); 1271 1272 lenp = &hdrp->ev_lens[0]; 1273 1274 if ((n = nvlist_size(evp, &evsz, encoding)) != 0) { 1275 errno = n; 1276 fmd_hdl_free(hdl, hdrp, hdr_sz); 1277 etm_stats.etm_os_nvlist_size_fail.fmds_value.ui64++; 1278 return (NULL); 1279 } 1280 1281 /* indicate 1 FMA event, network encode its length, and 0-terminate */ 1282 1283 etm_stats.etm_wr_max_ev_per_msg.fmds_value.ui64 = 1; 1284 1285 *lenp = evsz; *lenp = htonl(*lenp); lenp++; 1286 *lenp = 0; *lenp = htonl(*lenp); lenp++; 1287 1288 /* 1289 * write the network encoded header to the transport, and 1290 * return alloc size to caller for later free 1291 */ 1292 1293 if ((n = etm_io_op(hdl, "bad io write on event hdr", 1294 conn, hdrp, hdr_sz, ETM_IO_OP_WR)) < 0) { 1295 errno = (-n); 1296 fmd_hdl_free(hdl, hdrp, hdr_sz); 1297 return (NULL); 1298 } 1299 1300 *szp = hdr_sz; 1301 return (hdrp); 1302 1303 } /* etm_hdr_write() */ 1304 1305 /* 1306 * etm_post_to_fmd - post the given FMA event to FMD 1307 * via a FMD transport API call, 1308 * return 0 or -errno value 1309 * 1310 * caveats: the FMA event (evp) is freed by FMD, 1311 * thus callers of this function should 1312 * immediately discard any ptr they have to the 1313 * nvlist without freeing or dereferencing it 1314 */ 1315 1316 static int 1317 etm_post_to_fmd(fmd_hdl_t *hdl, fmd_xprt_t *fmd_xprt, nvlist_t *evp) 1318 { 1319 ssize_t ev_sz; /* sizeof *evp */ 1320 1321 (void) nvlist_size(evp, (size_t *)&ev_sz, NV_ENCODE_XDR); 1322 1323 if (etm_debug_lvl >= 2) { 1324 etm_show_time(hdl, "ante ev post"); 1325 } 1326 fmd_xprt_post(hdl, fmd_xprt, evp, 0); 1327 etm_stats.etm_wr_fmd_fmaevent.fmds_value.ui64++; 1328 etm_stats.etm_wr_fmd_bytes.fmds_value.ui64 += ev_sz; 1329 if (etm_debug_lvl >= 1) { 1330 fmd_hdl_debug(hdl, "info: event %p post ok to FMD\n", evp); 1331 } 1332 if (etm_debug_lvl >= 2) { 1333 etm_show_time(hdl, "post ev post"); 1334 } 1335 return (0); 1336 1337 } /* etm_post_to_fmd() */ 1338 1339 /* 1340 * Ideally we would just use syslog(3C) for outputting our messages. 1341 * Unfortunately, as this module is running within the FMA daemon context, 1342 * that would create the situation where this module's openlog() would 1343 * have the monopoly on syslog(3C) for the daemon and all its modules. 1344 * To avoid that situation, this module uses the same logic as the 1345 * syslog-msgs FM module to directly call into the log(7D) and sysmsg(7D) 1346 * devices for syslog and console. 1347 */ 1348 1349 static int 1350 etm_post_to_syslog(fmd_hdl_t *hdl, uint32_t priority, uint32_t body_sz, 1351 uint8_t *body_buf) 1352 { 1353 char *sysmessage; /* Formatted message */ 1354 size_t formatlen; /* maximum length of sysmessage */ 1355 struct strbuf ctl, dat; /* structs pushed to the logfd */ 1356 uint32_t msgid; /* syslog message ID number */ 1357 1358 if ((syslog_file == 0) && (syslog_cons == 0)) { 1359 return (0); 1360 } 1361 1362 if (etm_debug_lvl >= 2) { 1363 etm_show_time(hdl, "ante syslog post"); 1364 } 1365 1366 formatlen = body_sz + 64; /* +64 for prefix strings added below */ 1367 sysmessage = fmd_hdl_zalloc(hdl, formatlen, FMD_SLEEP); 1368 1369 if (syslog_file) { 1370 STRLOG_MAKE_MSGID(body_buf, msgid); 1371 (void) snprintf(sysmessage, formatlen, 1372 "SC Alert: [ID %u FACILITY_AND_PRIORITY] %s", msgid, 1373 body_buf); 1374 1375 syslog_ctl.pri = syslog_facility | priority; 1376 1377 ctl.buf = (void *)&syslog_ctl; 1378 ctl.len = sizeof (syslog_ctl); 1379 1380 dat.buf = sysmessage; 1381 dat.len = strlen(sysmessage) + 1; 1382 1383 if (putmsg(syslog_logfd, &ctl, &dat, 0) != 0) { 1384 fmd_hdl_debug(hdl, "putmsg failed: %s\n", 1385 strerror(errno)); 1386 etm_stats.etm_log_err.fmds_value.ui64++; 1387 } 1388 } 1389 1390 if (syslog_cons) { 1391 (void) snprintf(sysmessage, formatlen, 1392 "SC Alert: %s\r\n", body_buf); 1393 1394 dat.buf = sysmessage; 1395 dat.len = strlen(sysmessage) + 1; 1396 1397 if (write(syslog_msgfd, dat.buf, dat.len) != dat.len) { 1398 fmd_hdl_debug(hdl, "write failed: %s\n", 1399 strerror(errno)); 1400 etm_stats.etm_msg_err.fmds_value.ui64++; 1401 } 1402 } 1403 1404 fmd_hdl_free(hdl, sysmessage, formatlen); 1405 1406 if (etm_debug_lvl >= 2) { 1407 etm_show_time(hdl, "post syslog post"); 1408 } 1409 1410 return (0); 1411 } 1412 1413 1414 /* 1415 * etm_req_ver_negot - send an ETM control message to the other end requesting 1416 * that the ETM protocol version be negotiated/set 1417 */ 1418 1419 static void 1420 etm_req_ver_negot(fmd_hdl_t *hdl) 1421 { 1422 etm_xport_addr_t *addrv; /* default dst addr(s) */ 1423 etm_xport_conn_t conn; /* connection to other end */ 1424 etm_proto_v1_ctl_hdr_t *ctl_hdrp; /* for CONTROL msg */ 1425 size_t hdr_sz; /* sizeof header */ 1426 uint8_t *body_buf; /* msg body buffer */ 1427 uint32_t body_sz; /* sizeof *body_buf */ 1428 ssize_t i; /* gen use */ 1429 1430 /* populate an ETM control msg to send */ 1431 1432 hdr_sz = sizeof (*ctl_hdrp); 1433 body_sz = (3 + 1); /* version bytes plus null byte */ 1434 1435 ctl_hdrp = fmd_hdl_zalloc(hdl, hdr_sz + body_sz, FMD_SLEEP); 1436 1437 ctl_hdrp->ctl_pp.pp_magic_num = htonl(ETM_PROTO_MAGIC_NUM); 1438 ctl_hdrp->ctl_pp.pp_proto_ver = ETM_PROTO_V1; 1439 ctl_hdrp->ctl_pp.pp_msg_type = ETM_MSG_TYPE_CONTROL; 1440 ctl_hdrp->ctl_pp.pp_sub_type = ETM_CTL_SEL_VER_NEGOT_REQ; 1441 ctl_hdrp->ctl_pp.pp_rsvd_pad = 0; 1442 etm_xid_ver_negot = etm_xid_cur; 1443 etm_xid_cur += ETM_XID_INC; 1444 ctl_hdrp->ctl_pp.pp_xid = htonl(etm_xid_ver_negot); 1445 ctl_hdrp->ctl_pp.pp_timeout = htonl(ETM_PROTO_V1_TIMEOUT_FOREVER); 1446 ctl_hdrp->ctl_len = htonl(body_sz); 1447 1448 body_buf = (void*)&ctl_hdrp->ctl_len; 1449 body_buf += sizeof (ctl_hdrp->ctl_len); 1450 *body_buf++ = ETM_PROTO_V3; 1451 *body_buf++ = ETM_PROTO_V2; 1452 *body_buf++ = ETM_PROTO_V1; 1453 *body_buf++ = '\0'; 1454 1455 /* 1456 * open and close a connection to send the ETM control msg 1457 * to any/all of the default dst addrs 1458 */ 1459 1460 if ((addrv = etm_xport_get_ev_addrv(hdl, NULL)) == NULL) { 1461 fmd_hdl_error(hdl, 1462 "error: bad ctl dst addrs errno %d\n", errno); 1463 etm_stats.etm_xport_get_ev_addrv_fail.fmds_value.ui64++; 1464 goto func_ret; 1465 } 1466 1467 for (i = 0; addrv[i] != NULL; i++) { 1468 1469 if (etm_conn_open(hdl, "bad conn open during ver negot", 1470 addrv[i], &conn) < 0) { 1471 continue; 1472 } 1473 if (etm_io_op(hdl, "bad io write on ctl hdr+body", 1474 conn, ctl_hdrp, hdr_sz + body_sz, ETM_IO_OP_WR) >= 0) { 1475 etm_stats.etm_wr_hdr_control.fmds_value.ui64++; 1476 etm_stats.etm_wr_body_control.fmds_value.ui64++; 1477 } 1478 (void) etm_conn_close(hdl, "bad conn close during ver negot", 1479 conn); 1480 1481 } /* foreach dst addr */ 1482 1483 func_ret: 1484 1485 if (addrv != NULL) { 1486 etm_xport_free_addrv(hdl, addrv); 1487 } 1488 fmd_hdl_free(hdl, ctl_hdrp, hdr_sz + body_sz); 1489 1490 } /* etm_req_ver_negot() */ 1491 1492 1493 1494 /* 1495 * etm_iosvc_msg_enq - add element to tail of ETM iosvc msg queue 1496 * etm_iosvc_msg_deq - del element from head of ETM iosvc msg queue 1497 * need to grab the mutex lock before calling this routine 1498 * return >0 for success, or -errno value 1499 */ 1500 static int 1501 etm_iosvc_msg_enq(fmd_hdl_t *hdl, etm_iosvc_t *iosvc, etm_iosvc_q_ele_t *msgp) 1502 { 1503 etm_iosvc_q_ele_t *newp; /* ptr to new msg q ele */ 1504 1505 if (iosvc->msg_q_cur_len >= iosvc->msg_q_max_len) { 1506 fmd_hdl_debug(hdl, "warning: enq to full msg queue\n"); 1507 return (-E2BIG); 1508 } 1509 1510 newp = fmd_hdl_zalloc(hdl, sizeof (*newp), FMD_SLEEP); 1511 (void) memcpy(newp, msgp, sizeof (*newp)); 1512 newp->msg_nextp = NULL; 1513 1514 if (iosvc->msg_q_cur_len == 0) { 1515 iosvc->msg_q_head = newp; 1516 } else { 1517 iosvc->msg_q_tail->msg_nextp = newp; 1518 } 1519 1520 iosvc->msg_q_tail = newp; 1521 iosvc->msg_q_cur_len++; 1522 fmd_hdl_debug(hdl, "info: current msg queue length %d\n", 1523 iosvc->msg_q_cur_len); 1524 1525 return (1); 1526 1527 } /* etm_iosvc_msg_enq() */ 1528 1529 static int 1530 etm_iosvc_msg_deq(fmd_hdl_t *hdl, etm_iosvc_t *iosvc, etm_iosvc_q_ele_t *msgp) 1531 { 1532 etm_iosvc_q_ele_t *oldp; /* ptr to old msg q ele */ 1533 1534 if (iosvc->msg_q_cur_len == 0) { 1535 fmd_hdl_debug(hdl, "warning: deq from empty responder queue\n"); 1536 return (-ENOENT); 1537 } 1538 1539 (void) memcpy(msgp, iosvc->msg_q_head, sizeof (*msgp)); 1540 msgp->msg_nextp = NULL; 1541 1542 oldp = iosvc->msg_q_head; 1543 iosvc->msg_q_head = iosvc->msg_q_head->msg_nextp; 1544 1545 /* 1546 * free the mem alloc-ed in etm_iosvc_msg_enq() 1547 */ 1548 fmd_hdl_free(hdl, oldp, sizeof (*oldp)); 1549 1550 iosvc->msg_q_cur_len--; 1551 if (iosvc->msg_q_cur_len == 0) { 1552 iosvc->msg_q_tail = NULL; 1553 } 1554 1555 return (1); 1556 1557 } /* etm_iosvc_msg_deq() */ 1558 1559 1560 /* 1561 * etm_msg_enq_head(): 1562 * enq the msg to the head of the Q. 1563 * If the Q is full, drop the msg at the tail then enq the msg at head. 1564 * need to grab mutex lock iosvc->msg_q_lock before calling this routine. 1565 */ 1566 static void 1567 etm_msg_enq_head(fmd_hdl_t *fmd_hdl, etm_iosvc_t *iosvc, 1568 etm_iosvc_q_ele_t *msg_ele) 1569 { 1570 1571 etm_iosvc_q_ele_t *newp; /* iosvc msg ele ptr */ 1572 1573 if (iosvc->msg_q_cur_len >= iosvc->msg_q_max_len) { 1574 fmd_hdl_debug(fmd_hdl, 1575 "warning: add to head of a full msg queue." 1576 " Drop the msg at the tail\n"); 1577 /* 1578 * drop the msg at the tail 1579 */ 1580 newp = iosvc->msg_q_head; 1581 while (newp->msg_nextp != iosvc->msg_q_tail) { 1582 newp = newp->msg_nextp; 1583 } 1584 1585 /* 1586 * free the msg in iosvc->msg_q_tail->msg 1587 * free the mem pointed to by iosvc->msg_q_tail 1588 */ 1589 fmd_hdl_free(fmd_hdl, iosvc->msg_q_tail->msg, 1590 iosvc->msg_q_tail->msg_size); 1591 fmd_hdl_free(fmd_hdl, iosvc->msg_q_tail, sizeof (*newp)); 1592 iosvc->msg_q_tail = newp; 1593 iosvc->msg_q_tail->msg_nextp = NULL; 1594 iosvc->msg_q_cur_len--; 1595 } 1596 1597 /* 1598 * enq the msg to the head 1599 */ 1600 newp = fmd_hdl_zalloc(fmd_hdl, sizeof (*newp), FMD_SLEEP); 1601 (void) memcpy(newp, msg_ele, sizeof (*newp)); 1602 if (iosvc->msg_q_cur_len == 0) { 1603 newp->msg_nextp = NULL; 1604 iosvc->msg_q_tail = newp; 1605 } else { 1606 newp->msg_nextp = iosvc->msg_q_head; 1607 } 1608 iosvc->msg_q_head = newp; 1609 iosvc->msg_q_cur_len++; 1610 } /* etm_msg_enq_head() */ 1611 1612 /* 1613 * etm_iosvc_cleanup(): 1614 * Clean up an iosvc structure 1615 * 1) close the fmd_xprt if it has not been closed 1616 * 2) Terminate the send/revc threads 1617 * 3) If the clean_msg_q flag is set, free all fma events in the queue. In 1618 * addition, if the chpt_remove flag is set, delete the checkpoint so that 1619 * the events are not persisted. 1620 */ 1621 static void 1622 etm_iosvc_cleanup(fmd_hdl_t *fmd_hdl, etm_iosvc_t *iosvc, boolean_t clean_msg_q, 1623 boolean_t ckpt_remove) 1624 { 1625 1626 etm_iosvc_q_ele_t msg_ele; /* io svc msg Q ele */ 1627 1628 iosvc->thr_is_dying = 1; 1629 1630 iosvc->ds_hdl = DS_INVALID_HDL; 1631 if (iosvc->fmd_xprt != NULL) { 1632 fmd_xprt_close(fmd_hdl, iosvc->fmd_xprt); 1633 iosvc->fmd_xprt = NULL; 1634 } /* if fmd-xprt has been opened */ 1635 1636 if (iosvc->send_tid != NULL) { 1637 fmd_thr_signal(fmd_hdl, iosvc->send_tid); 1638 fmd_thr_destroy(fmd_hdl, iosvc->send_tid); 1639 iosvc->send_tid = NULL; 1640 } /* if io svc send thread was created ok */ 1641 1642 if (iosvc->recv_tid != NULL) { 1643 fmd_thr_signal(fmd_hdl, iosvc->recv_tid); 1644 fmd_thr_destroy(fmd_hdl, iosvc->recv_tid); 1645 iosvc->recv_tid = NULL; 1646 } /* if root domain recv thread was created */ 1647 1648 1649 if (clean_msg_q) { 1650 iosvc->ldom_name[0] = '\0'; 1651 1652 (void) pthread_mutex_lock(&iosvc->msg_q_lock); 1653 while (iosvc->msg_q_cur_len > 0) { 1654 (void) etm_iosvc_msg_deq(fmd_hdl, iosvc, &msg_ele); 1655 if (ckpt_remove == B_TRUE && 1656 msg_ele.ckpt_flag != ETM_CKPT_NOOP) { 1657 etm_ckpt_remove(fmd_hdl, &msg_ele); 1658 } 1659 fmd_hdl_free(fmd_hdl, msg_ele.msg, msg_ele.msg_size); 1660 } 1661 (void) pthread_mutex_unlock(&iosvc->msg_q_lock); 1662 } 1663 1664 return; 1665 1666 } /* etm_iosvc_cleanup() */ 1667 1668 /* 1669 * etm_iosvc_lookup(using ldom_name or ds_hdl when ldom_name is empty) 1670 * not found, create one, add to iosvc_list 1671 */ 1672 etm_iosvc_t * 1673 etm_iosvc_lookup(fmd_hdl_t *fmd_hdl, char *ldom_name, ds_hdl_t ds_hdl, 1674 boolean_t iosvc_create) 1675 { 1676 uint32_t i; /* for loop var */ 1677 int32_t first_empty_slot = -1; /* remember that */ 1678 1679 for (i = 0; i < NUM_OF_ROOT_DOMAINS; i++) { 1680 if (ldom_name[0] == '\0') { 1681 /* 1682 * search by hdl passed in 1683 * the only time this is used is at ds_unreg_cb time. 1684 * there is no ldom name, only the valid ds_hdl. 1685 * find an iosvc with the matching ds_hdl. 1686 * ignore the iosvc_create flag, should never need to 1687 * create an iosvc for ds_unreg_cb 1688 */ 1689 if (ds_hdl == iosvc_list[i].ds_hdl) { 1690 if (etm_debug_lvl >= 2) { 1691 fmd_hdl_debug(fmd_hdl, 1692 "info: found an iosvc at slot %d w/ ds_hdl %d \n", 1693 i, iosvc_list[i].ds_hdl); 1694 } 1695 if (iosvc_list[i].ldom_name[0] != '\0') 1696 if (etm_debug_lvl >= 2) { 1697 fmd_hdl_debug(fmd_hdl, 1698 "info: found an iosvc w/ ldom_name %s \n", 1699 iosvc_list[i].ldom_name); 1700 } 1701 return (&iosvc_list[i]); 1702 } else { 1703 continue; 1704 } 1705 } else if (iosvc_list[i].ldom_name[0] != '\0') { 1706 /* 1707 * this is an non-empty iosvc structure slot 1708 */ 1709 if (strcmp(ldom_name, iosvc_list[i].ldom_name) == 0) { 1710 /* 1711 * found an iosvc structure that matches the 1712 * passed in ldom_name, return the ptr 1713 */ 1714 if (etm_debug_lvl >= 2) { 1715 fmd_hdl_debug(fmd_hdl, "info: found an " 1716 "iosvc at slot %d w/ ds_hdl %d \n", 1717 i, iosvc_list[i].ds_hdl); 1718 fmd_hdl_debug(fmd_hdl, "info: found an " 1719 "iosvc w/ ldom_name %s \n", 1720 iosvc_list[i].ldom_name); 1721 } 1722 return (&iosvc_list[i]); 1723 } else { 1724 /* 1725 * non-empty slot with no-matching name, 1726 * move on to next slot. 1727 */ 1728 continue; 1729 } 1730 } else { 1731 /* 1732 * found the 1st slot with ldom name being empty 1733 * remember the slot #, will be used for creating one 1734 */ 1735 if (first_empty_slot == -1) { 1736 first_empty_slot = i; 1737 } 1738 } 1739 } 1740 if (iosvc_create == B_TRUE && first_empty_slot >= 0) { 1741 /* 1742 * this is the case we need to add an iosvc at first_empty_slot 1743 * for the ldom_name at iosvc_list[first_empty_slot] 1744 */ 1745 fmd_hdl_debug(fmd_hdl, 1746 "info: create an iosvc with ldom name %s\n", 1747 ldom_name); 1748 i = first_empty_slot; 1749 (void) memcpy(&iosvc_list[i], &io_svc, sizeof (etm_iosvc_t)); 1750 (void) strcpy(iosvc_list[i].ldom_name, ldom_name); 1751 fmd_hdl_debug(fmd_hdl, "info: iosvc #%d has ldom name %s\n", 1752 i, iosvc_list[i].ldom_name); 1753 return (&iosvc_list[i]); 1754 } else { 1755 return (NULL); 1756 } 1757 1758 } /* etm_iosvc_lookup() */ 1759 1760 1761 /* 1762 * etm_ckpt_remove: 1763 * remove the ckpt for the iosvc element 1764 */ 1765 static void 1766 etm_ckpt_remove(fmd_hdl_t *hdl, etm_iosvc_q_ele_t *ele) { 1767 int err; /* temp error */ 1768 nvlist_t *evp = NULL; /* event pointer */ 1769 etm_proto_v1_ev_hdr_t *hdrp; /* hdr for FMA_EVENT */ 1770 char *buf; /* packed event pointer */ 1771 1772 if ((ele->ckpt_flag == ETM_CKPT_NOOP) || 1773 (etm_ldom_type != LDOM_TYPE_CONTROL)) { 1774 return; 1775 } 1776 1777 /* the pointer to the packed event in the etm message */ 1778 hdrp = (etm_proto_v1_ev_hdr_t *)((ptrdiff_t)ele->msg); 1779 buf = (char *)((ptrdiff_t)hdrp + sizeof (*hdrp) 1780 + (1 * sizeof (hdrp->ev_lens[0]))); 1781 1782 /* unpack it, then uncheckpoited it */ 1783 if ((err = nvlist_unpack(buf, hdrp->ev_lens[0], &evp, 0)) != 0) { 1784 fmd_hdl_debug(hdl, "failed to unpack event(rc=%d)\n", err); 1785 return; 1786 } 1787 (void) etm_ckpt_delete(hdl, evp); 1788 nvlist_free(evp); 1789 } 1790 1791 /* 1792 * etm_send_ds_msg() 1793 * call ds_send_msg() to send the msg passed in. 1794 * timedcond_wait for the ACK to come back. 1795 * if the ACK doesn't come in the specified time, retrun -EAGAIN. 1796 * other wise, return 1. 1797 */ 1798 int 1799 etm_send_ds_msg(fmd_hdl_t *fmd_hdl, boolean_t ckpt_remove, etm_iosvc_t *iosvc, 1800 etm_iosvc_q_ele_t *msg_ele, etm_proto_v1_ev_hdr_t *evhdrp) 1801 { 1802 uint32_t rc; /* for return code */ 1803 1804 struct timeval tv; 1805 struct timespec timeout; 1806 1807 1808 /* 1809 * call ds_send_msg(). Return (-EAGAIN) if not successful 1810 */ 1811 if ((rc = (*etm_ds_send_msg)(iosvc->ds_hdl, msg_ele->msg, 1812 msg_ele->msg_size)) != 0) { 1813 fmd_hdl_debug(fmd_hdl, "info: ds_send_msg rc %d xid %d\n", 1814 rc, evhdrp->ev_pp.pp_xid); 1815 return (-EAGAIN); 1816 } 1817 1818 /* 1819 * wait on the cv for resp msg for cur_send_xid 1820 */ 1821 (void *) pthread_mutex_lock(&iosvc->msg_ack_lock); 1822 1823 (void) gettimeofday(&tv, 0); 1824 timeout.tv_sec = tv.tv_sec + etm_fma_resp_wait_time; 1825 timeout.tv_nsec = 0; 1826 1827 fmd_hdl_debug(fmd_hdl, "info: waiting on msg_ack_cv for ldom %s\n", 1828 iosvc->ldom_name); 1829 rc = pthread_cond_timedwait(&iosvc->msg_ack_cv, &iosvc->msg_ack_lock, 1830 &timeout); 1831 (void *) pthread_mutex_unlock(&iosvc->msg_ack_lock); 1832 fmd_hdl_debug(fmd_hdl, "info: msg_ack_cv returns with rc %d\n", rc); 1833 1834 /* 1835 * check to see if ack_ok is non-zero 1836 * if non-zero, resp msg has been received 1837 */ 1838 if (iosvc->ack_ok != 0) { 1839 /* 1840 * ACK came ok, this send is successful, 1841 * tell the caller ready to send next. 1842 * free mem alloc-ed in 1843 * etm_pack_ds_msg 1844 */ 1845 if (ckpt_remove == B_TRUE && 1846 etm_ldom_type == LDOM_TYPE_CONTROL) { 1847 etm_ckpt_remove(fmd_hdl, msg_ele); 1848 } 1849 fmd_hdl_free(fmd_hdl, msg_ele->msg, msg_ele->msg_size); 1850 iosvc->cur_send_xid++; 1851 return (1); 1852 } else { 1853 /* 1854 * the ACK did not come on time 1855 * tell the caller to resend cur_send_xid 1856 */ 1857 return (-EAGAIN); 1858 } /* iosvc->ack_ok != 0 */ 1859 } /* etm_send_ds_msg() */ 1860 1861 /* 1862 * both events from fmdo_send entry point and from SP are using the 1863 * etm_proto_v1_ev_hdr_t as its header and it will be the same header for all 1864 * ds send/recv msgs. 1865 * Idealy, we should use the hdr coming with the SP FMA event. Since fmdo_send 1866 * entry point can be called before FMA events from SP, we can't rely on 1867 * the SP FMA event hdr. Use the static hdr for packing ds msgs for fmdo_send 1868 * events. 1869 * return >0 for success, or -errno value 1870 * Design assumption: there is one FMA event per ds msg 1871 */ 1872 int 1873 etm_pack_ds_msg(fmd_hdl_t *fmd_hdl, etm_iosvc_t *iosvc, 1874 etm_proto_v1_ev_hdr_t *ev_hdrp, size_t hdr_sz, nvlist_t *evp, 1875 etm_pack_msg_type_t msg_type, uint_t ckpt_opt) 1876 { 1877 etm_proto_v1_ev_hdr_t *hdrp; /* for FMA_EVENT msg */ 1878 uint32_t *lenp; /* ptr to FMA event length */ 1879 size_t evsz; /* packed FMA event size */ 1880 char *buf; 1881 uint32_t rc; /* for return code */ 1882 char *msg; /* body of msg to be Qed */ 1883 1884 etm_iosvc_q_ele_t msg_ele; /* io svc msg Q ele */ 1885 etm_proto_v1_ev_hdr_t *evhdrp; 1886 1887 1888 if (ev_hdrp == NULL) { 1889 hdrp = &iosvc_hdr; 1890 } else { 1891 hdrp = ev_hdrp; 1892 } 1893 1894 /* 1895 * determine hdr_sz if 0, otherwise use the one passed in hdr_sz 1896 */ 1897 1898 if (hdr_sz == 0) { 1899 hdr_sz = sizeof (*hdrp) + (1 * sizeof (hdrp->ev_lens[0])); 1900 } 1901 1902 /* 1903 * determine evp size 1904 */ 1905 (void) nvlist_size(evp, &evsz, NV_ENCODE_XDR); 1906 1907 /* indicate 1 FMA event, no network encoding, and 0-terminate */ 1908 lenp = &hdrp->ev_lens[0]; 1909 *lenp = evsz; 1910 1911 /* 1912 * now the total of mem needs to be alloc-ed/ds msg size is 1913 * hdr_sz + evsz 1914 * msg will be freed in etm_send_to_remote_root() after ds_send_msg() 1915 */ 1916 msg = fmd_hdl_zalloc(fmd_hdl, hdr_sz + evsz, FMD_SLEEP); 1917 1918 1919 /* 1920 * copy hdr, 0 terminate the length vector, and then evp 1921 */ 1922 (void) memcpy(msg, hdrp, sizeof (*hdrp)); 1923 hdrp = (etm_proto_v1_ev_hdr_t *)((ptrdiff_t)msg); 1924 lenp = &hdrp->ev_lens[0]; 1925 lenp++; 1926 *lenp = 0; 1927 1928 buf = fmd_hdl_zalloc(fmd_hdl, evsz, FMD_SLEEP); 1929 (void) nvlist_pack(evp, (char **)&buf, &evsz, NV_ENCODE_XDR, 0); 1930 (void) memcpy(msg + hdr_sz, buf, evsz); 1931 fmd_hdl_free(fmd_hdl, buf, evsz); 1932 1933 fmd_hdl_debug(fmd_hdl, "info: hdr_sz= %d evsz= %d in etm_pack_ds_msg" 1934 "for ldom %s\n", hdr_sz, evsz, iosvc->ldom_name); 1935 msg_ele.msg = msg; 1936 msg_ele.msg_size = hdr_sz + evsz; 1937 msg_ele.ckpt_flag = ckpt_opt; 1938 1939 /* 1940 * decide what to do with the msg: 1941 * if SP ereports (msg_type == SP_MSG), always enq the msg 1942 * if not SP ereports, ie, fmd xprt control msgs, enq it _only_ after 1943 * resource.fm.xprt.run has been sent (which sets start_sending_Q to 1) 1944 */ 1945 if ((msg_type == SP_MSG) || 1946 (msg_type != SP_MSG) && (iosvc->start_sending_Q == 1)) { 1947 /* 1948 * this is the case when the msg needs to be enq-ed 1949 */ 1950 (void) pthread_mutex_lock(&iosvc->msg_q_lock); 1951 rc = etm_iosvc_msg_enq(fmd_hdl, iosvc, &msg_ele); 1952 if ((rc > 0) && (ckpt_opt & ETM_CKPT_SAVE) && 1953 (etm_ldom_type == LDOM_TYPE_CONTROL)) { 1954 (void) etm_ckpt_add(fmd_hdl, evp); 1955 } 1956 if (iosvc->msg_q_cur_len == 1) 1957 (void) pthread_cond_signal(&iosvc->msg_q_cv); 1958 (void) pthread_mutex_unlock(&iosvc->msg_q_lock); 1959 } else { 1960 /* 1961 * fmd RDWR xprt procotol startup msgs, send it now! 1962 */ 1963 iosvc->ack_ok = 0; 1964 evhdrp = (etm_proto_v1_ev_hdr_t *)((ptrdiff_t)msg_ele.msg); 1965 evhdrp->ev_pp.pp_xid = iosvc->cur_send_xid + 1; 1966 while (!iosvc->ack_ok && iosvc->ds_hdl != DS_INVALID_HDL && 1967 !etm_is_dying) { 1968 if (etm_send_ds_msg(fmd_hdl, B_FALSE, iosvc, &msg_ele, 1969 evhdrp) < 0) { 1970 continue; 1971 } 1972 } 1973 if (msg_type == FMD_XPRT_RUN_MSG) 1974 iosvc->start_sending_Q = 1; 1975 } 1976 1977 return (rc); 1978 1979 } /* etm_pack_ds_msg() */ 1980 1981 /* 1982 * Design_Note: For all etm_resp_q_*() functions and etm_resp_q_* globals, 1983 * the mutex etm_resp_q_lock must be held by the caller. 1984 */ 1985 1986 /* 1987 * etm_resp_q_enq - add element to tail of ETM responder queue 1988 * etm_resp_q_deq - del element from head of ETM responder queue 1989 * 1990 * return >0 for success, or -errno value 1991 */ 1992 1993 static int 1994 etm_resp_q_enq(fmd_hdl_t *hdl, etm_resp_q_ele_t *rqep) 1995 { 1996 etm_resp_q_ele_t *newp; /* ptr to new resp q ele */ 1997 1998 if (etm_resp_q_cur_len >= etm_resp_q_max_len) { 1999 fmd_hdl_debug(hdl, "warning: enq to full responder queue\n"); 2000 etm_stats.etm_enq_drop_resp_q.fmds_value.ui64++; 2001 return (-E2BIG); 2002 } 2003 2004 newp = fmd_hdl_zalloc(hdl, sizeof (*newp), FMD_SLEEP); 2005 (void) memcpy(newp, rqep, sizeof (*newp)); 2006 newp->rqe_nextp = NULL; 2007 2008 if (etm_resp_q_cur_len == 0) { 2009 etm_resp_q_head = newp; 2010 } else { 2011 etm_resp_q_tail->rqe_nextp = newp; 2012 } 2013 etm_resp_q_tail = newp; 2014 etm_resp_q_cur_len++; 2015 etm_stats.etm_resp_q_cur_len.fmds_value.ui64 = etm_resp_q_cur_len; 2016 2017 return (1); 2018 2019 } /* etm_resp_q_enq() */ 2020 2021 static int 2022 etm_resp_q_deq(fmd_hdl_t *hdl, etm_resp_q_ele_t *rqep) 2023 { 2024 etm_resp_q_ele_t *oldp; /* ptr to old resp q ele */ 2025 2026 if (etm_resp_q_cur_len == 0) { 2027 fmd_hdl_debug(hdl, "warning: deq from empty responder queue\n"); 2028 etm_stats.etm_deq_drop_resp_q.fmds_value.ui64++; 2029 return (-ENOENT); 2030 } 2031 2032 (void) memcpy(rqep, etm_resp_q_head, sizeof (*rqep)); 2033 rqep->rqe_nextp = NULL; 2034 2035 oldp = etm_resp_q_head; 2036 etm_resp_q_head = etm_resp_q_head->rqe_nextp; 2037 fmd_hdl_free(hdl, oldp, sizeof (*oldp)); 2038 2039 etm_resp_q_cur_len--; 2040 etm_stats.etm_resp_q_cur_len.fmds_value.ui64 = etm_resp_q_cur_len; 2041 if (etm_resp_q_cur_len == 0) { 2042 etm_resp_q_tail = NULL; 2043 } 2044 2045 return (1); 2046 2047 } /* etm_resp_q_deq() */ 2048 2049 /* 2050 * etm_maybe_enq_response - check the given message header to see 2051 * whether a response has been requested, 2052 * if so then enqueue the given connection 2053 * and header for later transport by the 2054 * responder thread as an ETM response msg, 2055 * return 0 for nop, >0 success, or -errno value 2056 */ 2057 2058 static ssize_t 2059 etm_maybe_enq_response(fmd_hdl_t *hdl, etm_xport_conn_t conn, 2060 void *hdrp, uint32_t hdr_sz, int32_t resp_code) 2061 { 2062 ssize_t rv; /* ret val */ 2063 etm_proto_v1_pp_t *ppp; /* protocol preamble ptr */ 2064 uint8_t orig_msg_type; /* orig hdr's message type */ 2065 uint32_t orig_timeout; /* orig hdr's timeout */ 2066 etm_resp_q_ele_t rqe; /* responder queue ele */ 2067 2068 ppp = hdrp; 2069 orig_msg_type = ppp->pp_msg_type; 2070 orig_timeout = ppp->pp_timeout; 2071 2072 /* bail out now if no response is to be sent */ 2073 2074 if (orig_timeout == ETM_PROTO_V1_TIMEOUT_NONE) { 2075 return (0); 2076 } /* if a nop */ 2077 2078 if ((orig_msg_type != ETM_MSG_TYPE_FMA_EVENT) && 2079 (orig_msg_type != ETM_MSG_TYPE_ALERT) && 2080 (orig_msg_type != ETM_MSG_TYPE_CONTROL)) { 2081 fmd_hdl_debug(hdl, "warning: bad msg type 0x%x\n", 2082 orig_msg_type); 2083 return (-EINVAL); 2084 } /* if inappropriate hdr for a response msg */ 2085 2086 /* 2087 * enqueue the msg hdr and nudge the responder thread 2088 * if the responder queue was previously empty 2089 */ 2090 2091 rqe.rqe_conn = conn; 2092 rqe.rqe_hdrp = hdrp; 2093 rqe.rqe_hdr_sz = hdr_sz; 2094 rqe.rqe_resp_code = resp_code; 2095 2096 (void) pthread_mutex_lock(&etm_resp_q_lock); 2097 rv = etm_resp_q_enq(hdl, &rqe); 2098 if (etm_resp_q_cur_len == 1) 2099 (void) pthread_cond_signal(&etm_resp_q_cv); 2100 (void) pthread_mutex_unlock(&etm_resp_q_lock); 2101 2102 return (rv); 2103 2104 } /* etm_maybe_enq_response() */ 2105 2106 /* 2107 * Design_Note: We rely on the fact that all message types have 2108 * a common protocol preamble; if this fact should 2109 * ever change it may break the code below. We also 2110 * rely on the fact that FMA_EVENT and CONTROL headers 2111 * returned by etm_hdr_read() will be sized large enough 2112 * to reuse them as RESPONSE headers if the remote endpt 2113 * asked for a response via the pp_timeout field. 2114 */ 2115 2116 /* 2117 * etm_send_response - use the given message header and response code 2118 * to construct an appropriate response message, 2119 * and send it back on the given connection, 2120 * return >0 for success, or -errno value 2121 */ 2122 2123 static ssize_t 2124 etm_send_response(fmd_hdl_t *hdl, etm_xport_conn_t conn, 2125 void *hdrp, int32_t resp_code) 2126 { 2127 ssize_t rv; /* ret val */ 2128 etm_proto_v1_pp_t *ppp; /* protocol preamble ptr */ 2129 etm_proto_v1_resp_hdr_t *resp_hdrp; /* for RESPONSE msg */ 2130 uint8_t resp_body[4]; /* response body if needed */ 2131 uint8_t *resp_msg; /* response hdr+body */ 2132 size_t hdr_sz; /* sizeof response hdr */ 2133 uint8_t orig_msg_type; /* orig hdr's message type */ 2134 2135 ppp = hdrp; 2136 orig_msg_type = ppp->pp_msg_type; 2137 2138 if (etm_debug_lvl >= 2) { 2139 etm_show_time(hdl, "ante resp send"); 2140 } 2141 2142 /* reuse the given header as a response header */ 2143 2144 resp_hdrp = hdrp; 2145 resp_hdrp->resp_code = resp_code; 2146 resp_hdrp->resp_len = 0; /* default is empty body */ 2147 2148 if ((orig_msg_type == ETM_MSG_TYPE_CONTROL) && 2149 (ppp->pp_sub_type == ETM_CTL_SEL_VER_NEGOT_REQ)) { 2150 resp_body[0] = ETM_PROTO_V2; 2151 resp_body[1] = ETM_PROTO_V3; 2152 resp_body[2] = 0; 2153 resp_hdrp->resp_len = 3; 2154 } /* if should send our/negotiated proto ver in resp body */ 2155 2156 /* respond with the proto ver that was negotiated */ 2157 2158 resp_hdrp->resp_pp.pp_proto_ver = etm_resp_ver; 2159 resp_hdrp->resp_pp.pp_msg_type = ETM_MSG_TYPE_RESPONSE; 2160 resp_hdrp->resp_pp.pp_timeout = ETM_PROTO_V1_TIMEOUT_NONE; 2161 2162 /* 2163 * send the whole response msg in one write, header and body; 2164 * avoid the alloc-and-copy if we can reuse the hdr as the msg, 2165 * ie, if the body is empty. update the response stats. 2166 */ 2167 2168 hdr_sz = sizeof (etm_proto_v1_resp_hdr_t); 2169 2170 resp_msg = hdrp; 2171 if (resp_hdrp->resp_len > 0) { 2172 resp_msg = fmd_hdl_zalloc(hdl, hdr_sz + resp_hdrp->resp_len, 2173 FMD_SLEEP); 2174 (void) memcpy(resp_msg, resp_hdrp, hdr_sz); 2175 (void) memcpy(resp_msg + hdr_sz, resp_body, 2176 resp_hdrp->resp_len); 2177 } 2178 2179 (void) pthread_mutex_lock(&etm_write_lock); 2180 rv = etm_io_op(hdl, "bad io write on resp msg", conn, 2181 resp_msg, hdr_sz + resp_hdrp->resp_len, ETM_IO_OP_WR); 2182 (void) pthread_mutex_unlock(&etm_write_lock); 2183 if (rv < 0) { 2184 goto func_ret; 2185 } 2186 2187 etm_stats.etm_wr_hdr_response.fmds_value.ui64++; 2188 etm_stats.etm_wr_body_response.fmds_value.ui64++; 2189 2190 fmd_hdl_debug(hdl, "info: sent V%u RESPONSE msg to xport " 2191 "xid 0x%x code %d len %u\n", 2192 (unsigned int)resp_hdrp->resp_pp.pp_proto_ver, 2193 resp_hdrp->resp_pp.pp_xid, resp_hdrp->resp_code, 2194 resp_hdrp->resp_len); 2195 func_ret: 2196 2197 if (resp_hdrp->resp_len > 0) { 2198 fmd_hdl_free(hdl, resp_msg, hdr_sz + resp_hdrp->resp_len); 2199 } 2200 if (etm_debug_lvl >= 2) { 2201 etm_show_time(hdl, "post resp send"); 2202 } 2203 return (rv); 2204 2205 } /* etm_send_response() */ 2206 2207 /* 2208 * etm_reset_xport - reset the transport layer (via fini;init) 2209 * presumably for an error condition we cannot 2210 * otherwise recover from (ex: hung LDC channel) 2211 * 2212 * caveats - no checking/locking is done to ensure an existing connection 2213 * is idle during an xport reset; we don't want to deadlock 2214 * and presumably the transport is stuck/unusable anyway 2215 */ 2216 2217 static void 2218 etm_reset_xport(fmd_hdl_t *hdl) 2219 { 2220 (void) etm_xport_fini(hdl); 2221 (void) etm_xport_init(hdl); 2222 etm_stats.etm_reset_xport.fmds_value.ui64++; 2223 2224 } /* etm_reset_xport() */ 2225 2226 /* 2227 * etm_handle_new_conn - receive an ETM message sent from the other end via 2228 * the given open connection, pull out any FMA events 2229 * and post them to the local FMD (or handle any ETM 2230 * control or response msg); when done, close the 2231 * connection 2232 */ 2233 2234 static void 2235 etm_handle_new_conn(fmd_hdl_t *hdl, etm_xport_conn_t conn) 2236 { 2237 etm_proto_v1_ev_hdr_t *ev_hdrp; /* for FMA_EVENT msg */ 2238 etm_proto_v1_ctl_hdr_t *ctl_hdrp; /* for CONTROL msg */ 2239 etm_proto_v1_resp_hdr_t *resp_hdrp; /* for RESPONSE msg */ 2240 etm_proto_v3_sa_hdr_t *sa_hdrp; /* for ALERT msg */ 2241 etm_iosvc_t *iosvc; /* iosvc data structure */ 2242 int32_t resp_code; /* response code */ 2243 ssize_t enq_rv; /* resp_q enqueue status */ 2244 size_t hdr_sz; /* sizeof header */ 2245 size_t evsz; /* FMA event size */ 2246 uint8_t *body_buf; /* msg body buffer */ 2247 uint32_t body_sz; /* sizeof body_buf */ 2248 uint32_t ev_cnt; /* count of FMA events */ 2249 uint8_t *bp; /* byte ptr within body_buf */ 2250 nvlist_t *evp; /* ptr to unpacked FMA event */ 2251 char *class; /* FMA event class */ 2252 ssize_t i, n; /* gen use */ 2253 int should_reset_xport; /* bool to reset xport */ 2254 char ldom_name[MAX_LDOM_NAME]; /* ldom name */ 2255 int rc; /* return code */ 2256 uint64_t did; /* domain id */ 2257 2258 2259 if (etm_debug_lvl >= 2) { 2260 etm_show_time(hdl, "ante conn handle"); 2261 } 2262 fmd_hdl_debug(hdl, "info: handling new conn %p\n", conn); 2263 2264 should_reset_xport = 0; 2265 ev_hdrp = NULL; 2266 ctl_hdrp = NULL; 2267 resp_hdrp = NULL; 2268 sa_hdrp = NULL; 2269 body_buf = NULL; 2270 class = NULL; 2271 evp = NULL; 2272 resp_code = 0; /* default is success */ 2273 enq_rv = 0; /* default is nop, ie, did not enqueue */ 2274 2275 /* read a network decoded message header from the connection */ 2276 2277 if ((ev_hdrp = etm_hdr_read(hdl, conn, &hdr_sz)) == NULL) { 2278 /* errno assumed set by above call */ 2279 should_reset_xport = (errno == ENOTACTIVE); 2280 fmd_hdl_debug(hdl, "error: FMA event dropped: " 2281 "bad hdr read errno %d\n", errno); 2282 etm_stats.etm_rd_drop_fmaevent.fmds_value.ui64++; 2283 goto func_ret; 2284 } 2285 2286 /* 2287 * handle the message based on its preamble pp_msg_type 2288 * which is known to be valid from etm_hdr_read() checks 2289 */ 2290 2291 if (ev_hdrp->ev_pp.pp_msg_type == ETM_MSG_TYPE_FMA_EVENT) { 2292 2293 fmd_hdl_debug(hdl, "info: rcvd FMA_EVENT msg from xport\n"); 2294 2295 /* allocate buf large enough for whole body / all FMA events */ 2296 2297 body_sz = 0; 2298 for (i = 0; ev_hdrp->ev_lens[i] != 0; i++) { 2299 body_sz += ev_hdrp->ev_lens[i]; 2300 } /* for summing sizes of all FMA events */ 2301 if (i > etm_stats.etm_rd_max_ev_per_msg.fmds_value.ui64) 2302 etm_stats.etm_rd_max_ev_per_msg.fmds_value.ui64 = i; 2303 ev_cnt = i; 2304 2305 if (etm_debug_lvl >= 1) { 2306 fmd_hdl_debug(hdl, "info: event lengths %u sum %u\n", 2307 ev_cnt, body_sz); 2308 } 2309 2310 body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP); 2311 2312 /* read all the FMA events at once */ 2313 2314 if ((n = etm_io_op(hdl, "FMA event dropped: " 2315 "bad io read on event bodies", conn, body_buf, body_sz, 2316 ETM_IO_OP_RD)) < 0) { 2317 should_reset_xport = (n == -ENOTACTIVE); 2318 etm_stats.etm_rd_drop_fmaevent.fmds_value.ui64++; 2319 goto func_ret; 2320 } 2321 2322 etm_stats.etm_rd_xport_bytes.fmds_value.ui64 += body_sz; 2323 etm_stats.etm_rd_body_fmaevent.fmds_value.ui64 += ev_cnt; 2324 2325 /* 2326 * now that we've read the entire ETM msg from the conn, 2327 * which avoids later ETM protocol framing errors if we didn't, 2328 * check for dup msg/xid against last good FMD posting, 2329 * if a dup then resend response but skip repost to FMD 2330 */ 2331 2332 if (ev_hdrp->ev_pp.pp_xid == etm_xid_posted_logged_ev) { 2333 enq_rv = etm_maybe_enq_response(hdl, conn, 2334 ev_hdrp, hdr_sz, 0); 2335 fmd_hdl_debug(hdl, "info: skipping dup FMA event post " 2336 "xid 0x%x\n", etm_xid_posted_logged_ev); 2337 etm_stats.etm_rd_dup_fmaevent.fmds_value.ui64++; 2338 goto func_ret; 2339 } 2340 2341 /* unpack each FMA event and post it to FMD */ 2342 2343 bp = body_buf; 2344 for (i = 0; i < ev_cnt; i++) { 2345 if ((n = nvlist_unpack((char *)bp, 2346 ev_hdrp->ev_lens[i], &evp, 0)) != 0) { 2347 resp_code = (-n); 2348 enq_rv = etm_maybe_enq_response(hdl, conn, 2349 ev_hdrp, hdr_sz, resp_code); 2350 fmd_hdl_error(hdl, "error: FMA event dropped: " 2351 "bad event body unpack errno %d\n", n); 2352 if (etm_debug_lvl >= 2) { 2353 fmd_hdl_debug(hdl, "info: FMA event " 2354 "hexdump %d bytes:\n", 2355 ev_hdrp->ev_lens[i]); 2356 etm_hexdump(hdl, bp, 2357 ev_hdrp->ev_lens[i]); 2358 } 2359 etm_stats.etm_os_nvlist_unpack_fail.fmds_value. 2360 ui64++; 2361 etm_stats.etm_rd_drop_fmaevent.fmds_value. 2362 ui64++; 2363 bp += ev_hdrp->ev_lens[i]; 2364 continue; 2365 } 2366 2367 if (etm_debug_lvl >= 1) { 2368 (void) nvlist_lookup_string(evp, FM_CLASS, 2369 &class); 2370 if (class == NULL) { 2371 class = "NULL"; 2372 } 2373 fmd_hdl_debug(hdl, "info: FMA event %p " 2374 "class %s\n", evp, class); 2375 } 2376 2377 rc = nvlist_size(evp, &evsz, NV_ENCODE_XDR); 2378 fmd_hdl_debug(hdl, 2379 "info: evp size before pack ds msg %d\n", evsz); 2380 ldom_name[0] = '\0'; 2381 rc = etm_filter_find_ldom_id(hdl, evp, ldom_name, 2382 MAX_LDOM_NAME, &did); 2383 2384 /* 2385 * if rc is zero and the ldom_name is not "primary", 2386 * the evp belongs to a root domain, put the evp in an 2387 * outgoing etm queue, 2388 * in all other cases, whether ldom_name is primary or 2389 * can't find a ldom name, call etm_post_to_fmd 2390 */ 2391 if ((rc == 0) && strcmp(ldom_name, "primary") && 2392 strcmp(ldom_name, "")) { 2393 /* 2394 * use the ldom_name, guaranteered at this point 2395 * to be a valid ldom name/non-NULL, to find the 2396 * iosvc data. 2397 * add an iosvc struct if can not find one 2398 */ 2399 (void) pthread_mutex_unlock(&iosvc_list_lock); 2400 iosvc = etm_iosvc_lookup(hdl, ldom_name, 2401 DS_INVALID_HDL, B_TRUE); 2402 (void) pthread_mutex_unlock(&iosvc_list_lock); 2403 if (iosvc == NULL) { 2404 fmd_hdl_debug(hdl, 2405 "error: can't find iosvc for ldom " 2406 "name %s\n", ldom_name); 2407 } else { 2408 resp_code = 0; 2409 (void) etm_pack_ds_msg(hdl, iosvc, 2410 ev_hdrp, hdr_sz, evp, 2411 SP_MSG, ETM_CKPT_SAVE); 2412 /* 2413 * call the new fmd_xprt_log() 2414 */ 2415 fmd_xprt_log(hdl, etm_fmd_xprt, evp, 0); 2416 etm_xid_posted_logged_ev = 2417 ev_hdrp->ev_pp.pp_xid; 2418 } 2419 } else { 2420 /* 2421 * post the fma event to the control fmd 2422 */ 2423 resp_code = etm_post_to_fmd(hdl, etm_fmd_xprt, 2424 evp); 2425 if (resp_code >= 0) { 2426 etm_xid_posted_logged_ev = 2427 ev_hdrp->ev_pp.pp_xid; 2428 } 2429 } 2430 2431 evp = NULL; 2432 enq_rv = etm_maybe_enq_response(hdl, conn, 2433 ev_hdrp, hdr_sz, resp_code); 2434 bp += ev_hdrp->ev_lens[i]; 2435 } /* foreach FMA event in the body buffer */ 2436 2437 } else if (ev_hdrp->ev_pp.pp_msg_type == ETM_MSG_TYPE_CONTROL) { 2438 2439 ctl_hdrp = (void*)ev_hdrp; 2440 2441 fmd_hdl_debug(hdl, "info: rcvd CONTROL msg from xport\n"); 2442 if (etm_debug_lvl >= 1) { 2443 fmd_hdl_debug(hdl, "info: ctl sel %d xid 0x%x\n", 2444 (int)ctl_hdrp->ctl_pp.pp_sub_type, 2445 ctl_hdrp->ctl_pp.pp_xid); 2446 } 2447 2448 /* 2449 * if we have a VER_NEGOT_REQ read the body and validate 2450 * the protocol version set contained therein, 2451 * otherwise we have a PING_REQ (which has no body) 2452 * and we [also] fall thru to the code which sends a 2453 * response msg if the pp_timeout field requested one 2454 */ 2455 2456 if (ctl_hdrp->ctl_pp.pp_sub_type == ETM_CTL_SEL_VER_NEGOT_REQ) { 2457 2458 body_sz = ctl_hdrp->ctl_len; 2459 body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP); 2460 2461 if ((n = etm_io_op(hdl, "bad io read on ctl body", 2462 conn, body_buf, body_sz, ETM_IO_OP_RD)) < 0) { 2463 should_reset_xport = (n == -ENOTACTIVE); 2464 goto func_ret; 2465 } 2466 2467 /* complain if version set completely incompatible */ 2468 2469 for (i = 0; i < body_sz; i++) { 2470 if ((body_buf[i] == ETM_PROTO_V1) || 2471 (body_buf[i] == ETM_PROTO_V2) || 2472 (body_buf[i] == ETM_PROTO_V3)) { 2473 break; 2474 } 2475 } 2476 if (i >= body_sz) { 2477 etm_stats.etm_ver_bad.fmds_value.ui64++; 2478 resp_code = (-EPROTO); 2479 } 2480 2481 } /* if got version set request */ 2482 2483 etm_stats.etm_rd_body_control.fmds_value.ui64++; 2484 2485 enq_rv = etm_maybe_enq_response(hdl, conn, 2486 ctl_hdrp, hdr_sz, resp_code); 2487 2488 } else if (ev_hdrp->ev_pp.pp_msg_type == ETM_MSG_TYPE_RESPONSE) { 2489 2490 resp_hdrp = (void*)ev_hdrp; 2491 2492 fmd_hdl_debug(hdl, "info: rcvd RESPONSE msg from xport\n"); 2493 if (etm_debug_lvl >= 1) { 2494 fmd_hdl_debug(hdl, "info: resp xid 0x%x\n", 2495 (int)resp_hdrp->resp_pp.pp_xid); 2496 } 2497 2498 body_sz = resp_hdrp->resp_len; 2499 body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP); 2500 2501 if ((n = etm_io_op(hdl, "bad io read on resp len", 2502 conn, body_buf, body_sz, ETM_IO_OP_RD)) < 0) { 2503 should_reset_xport = (n == -ENOTACTIVE); 2504 goto func_ret; 2505 } 2506 2507 etm_stats.etm_rd_body_response.fmds_value.ui64++; 2508 2509 /* 2510 * look up the xid to interpret the response body 2511 * 2512 * ping is a nop; for ver negot confirm that a supported 2513 * protocol version was negotiated and remember which one 2514 */ 2515 2516 if ((resp_hdrp->resp_pp.pp_xid != etm_xid_ping) && 2517 (resp_hdrp->resp_pp.pp_xid != etm_xid_ver_negot)) { 2518 etm_stats.etm_xid_bad.fmds_value.ui64++; 2519 goto func_ret; 2520 } 2521 2522 if (resp_hdrp->resp_pp.pp_xid == etm_xid_ver_negot) { 2523 if ((body_buf[0] < ETM_PROTO_V1) || 2524 (body_buf[0] > ETM_PROTO_V3)) { 2525 etm_stats.etm_ver_bad.fmds_value.ui64++; 2526 goto func_ret; 2527 } 2528 etm_resp_ver = body_buf[0]; 2529 } /* if have resp to last req to negotiate proto ver */ 2530 2531 } else if (ev_hdrp->ev_pp.pp_msg_type == ETM_MSG_TYPE_ALERT) { 2532 2533 sa_hdrp = (void*)ev_hdrp; 2534 2535 fmd_hdl_debug(hdl, "info: rcvd ALERT msg from xport\n"); 2536 if (etm_debug_lvl >= 1) { 2537 fmd_hdl_debug(hdl, "info: sa sel %d xid 0x%x\n", 2538 (int)sa_hdrp->sa_pp.pp_sub_type, 2539 sa_hdrp->sa_pp.pp_xid); 2540 } 2541 2542 body_sz = sa_hdrp->sa_len; 2543 body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP); 2544 2545 if ((n = etm_io_op(hdl, "bad io read on sa body", 2546 conn, body_buf, body_sz, ETM_IO_OP_RD)) < 0) { 2547 should_reset_xport = (n == -ENOTACTIVE); 2548 goto func_ret; 2549 } 2550 2551 etm_stats.etm_rd_body_alert.fmds_value.ui64++; 2552 2553 /* 2554 * now that we've read the entire ETM msg from the conn, 2555 * which avoids later ETM protocol framing errors if we didn't, 2556 * check for dup msg/xid against last good syslog posting, 2557 * if a dup then resend response but skip repost to syslog 2558 */ 2559 2560 if (sa_hdrp->sa_pp.pp_xid == etm_xid_posted_sa) { 2561 enq_rv = etm_maybe_enq_response(hdl, conn, 2562 sa_hdrp, hdr_sz, 0); 2563 fmd_hdl_debug(hdl, "info: skipping dup ALERT post " 2564 "xid 0x%x\n", etm_xid_posted_sa); 2565 etm_stats.etm_rd_dup_alert.fmds_value.ui64++; 2566 goto func_ret; 2567 } 2568 2569 resp_code = etm_post_to_syslog(hdl, sa_hdrp->sa_priority, 2570 body_sz, body_buf); 2571 if (resp_code >= 0) { 2572 etm_xid_posted_sa = sa_hdrp->sa_pp.pp_xid; 2573 } 2574 enq_rv = etm_maybe_enq_response(hdl, conn, 2575 sa_hdrp, hdr_sz, resp_code); 2576 } /* whether we have a FMA_EVENT, CONTROL, RESPONSE or ALERT msg */ 2577 2578 func_ret: 2579 2580 if (etm_debug_lvl >= 2) { 2581 etm_show_time(hdl, "post conn handle"); 2582 } 2583 2584 /* 2585 * if no responder ele was enqueued, close the conn now 2586 * and free the ETM msg hdr; the ETM msg body is not needed 2587 * by the responder thread and should always be freed here 2588 */ 2589 2590 if (enq_rv <= 0) { 2591 (void) etm_conn_close(hdl, "bad conn close after msg recv", 2592 conn); 2593 if (ev_hdrp != NULL) { 2594 fmd_hdl_free(hdl, ev_hdrp, hdr_sz); 2595 } 2596 } 2597 if (body_buf != NULL) { 2598 fmd_hdl_free(hdl, body_buf, body_sz); 2599 } 2600 if (should_reset_xport) { 2601 etm_reset_xport(hdl); 2602 } 2603 } /* etm_handle_new_conn() */ 2604 2605 /* 2606 * etm_handle_bad_accept - recover from a failed connection acceptance 2607 */ 2608 2609 static void 2610 etm_handle_bad_accept(fmd_hdl_t *hdl, int nev) 2611 { 2612 int should_reset_xport; /* bool to reset xport */ 2613 2614 should_reset_xport = (nev == -ENOTACTIVE); 2615 fmd_hdl_debug(hdl, "error: bad conn accept errno %d\n", (-nev)); 2616 etm_stats.etm_xport_accept_fail.fmds_value.ui64++; 2617 (void) etm_sleep(etm_bad_acc_to_sec); /* avoid spinning CPU */ 2618 if (should_reset_xport) { 2619 etm_reset_xport(hdl); 2620 } 2621 } /* etm_handle_bad_accept() */ 2622 2623 /* 2624 * etm_server - loop forever accepting new connections 2625 * using the given FMD handle, 2626 * handling any ETM msgs sent from the other side 2627 * via each such connection 2628 */ 2629 2630 static void 2631 etm_server(void *arg) 2632 { 2633 etm_xport_conn_t conn; /* connection handle */ 2634 int nev; /* -errno val */ 2635 fmd_hdl_t *hdl; /* FMD handle */ 2636 2637 hdl = arg; 2638 2639 fmd_hdl_debug(hdl, "info: connection server starting\n"); 2640 2641 /* 2642 * Restore the checkpointed events and dispatch them before starting to 2643 * receive more events from the sp. 2644 */ 2645 etm_ckpt_recover(hdl); 2646 2647 while (!etm_is_dying) { 2648 2649 if ((conn = etm_xport_accept(hdl, NULL)) == NULL) { 2650 /* errno assumed set by above call */ 2651 nev = (-errno); 2652 if (etm_is_dying) { 2653 break; 2654 } 2655 etm_handle_bad_accept(hdl, nev); 2656 continue; 2657 } 2658 2659 /* handle the new message/connection, closing it when done */ 2660 2661 etm_handle_new_conn(hdl, conn); 2662 2663 } /* while accepting new connections until ETM dies */ 2664 2665 /* ETM is dying (probably due to "fmadm unload etm") */ 2666 2667 fmd_hdl_debug(hdl, "info: connection server is dying\n"); 2668 2669 } /* etm_server() */ 2670 2671 /* 2672 * etm_responder - loop forever waiting for new responder queue elements 2673 * to be enqueued, for each one constructing and sending 2674 * an ETM response msg to the other side, and closing its 2675 * associated connection when appropriate 2676 * 2677 * this thread exists to ensure that the etm_server() thread 2678 * never pends indefinitely waiting on the xport write lock, and is 2679 * hence always available to accept new connections and handle 2680 * incoming messages 2681 * 2682 * this design relies on the fact that each connection accepted and 2683 * returned by the ETM xport layer is unique, and each can be closed 2684 * independently of the others while multiple connections are 2685 * outstanding 2686 */ 2687 2688 static void 2689 etm_responder(void *arg) 2690 { 2691 ssize_t n; /* gen use */ 2692 fmd_hdl_t *hdl; /* FMD handle */ 2693 etm_resp_q_ele_t rqe; /* responder queue ele */ 2694 2695 hdl = arg; 2696 2697 fmd_hdl_debug(hdl, "info: responder server starting\n"); 2698 2699 while (!etm_is_dying) { 2700 2701 (void) pthread_mutex_lock(&etm_resp_q_lock); 2702 2703 while (etm_resp_q_cur_len == 0) { 2704 (void) pthread_cond_wait(&etm_resp_q_cv, 2705 &etm_resp_q_lock); 2706 if (etm_is_dying) { 2707 (void) pthread_mutex_unlock(&etm_resp_q_lock); 2708 goto func_ret; 2709 } 2710 } /* while the responder queue is empty, wait to be nudged */ 2711 2712 /* 2713 * for every responder ele that has been enqueued, 2714 * dequeue and send it as an ETM response msg, 2715 * closing its associated conn and freeing its hdr 2716 * 2717 * enter the queue draining loop holding the responder 2718 * queue lock, but do not hold the lock indefinitely 2719 * (the actual send may pend us indefinitely), 2720 * so that other threads will never pend for long 2721 * trying to enqueue a new element 2722 */ 2723 2724 while (etm_resp_q_cur_len > 0) { 2725 2726 (void) etm_resp_q_deq(hdl, &rqe); 2727 (void) pthread_mutex_unlock(&etm_resp_q_lock); 2728 2729 if ((n = etm_send_response(hdl, rqe.rqe_conn, 2730 rqe.rqe_hdrp, rqe.rqe_resp_code)) < 0) { 2731 fmd_hdl_error(hdl, "error: bad resp send " 2732 "errno %d\n", (-n)); 2733 } 2734 2735 (void) etm_conn_close(hdl, "bad conn close after resp", 2736 rqe.rqe_conn); 2737 fmd_hdl_free(hdl, rqe.rqe_hdrp, rqe.rqe_hdr_sz); 2738 2739 if (etm_is_dying) { 2740 goto func_ret; 2741 } 2742 (void) pthread_mutex_lock(&etm_resp_q_lock); 2743 2744 } /* while draining the responder queue */ 2745 2746 (void) pthread_mutex_unlock(&etm_resp_q_lock); 2747 2748 } /* while awaiting and sending resp msgs until ETM dies */ 2749 2750 func_ret: 2751 2752 /* ETM is dying (probably due to "fmadm unload etm") */ 2753 2754 fmd_hdl_debug(hdl, "info: responder server is dying\n"); 2755 2756 (void) pthread_mutex_lock(&etm_resp_q_lock); 2757 if (etm_resp_q_cur_len > 0) { 2758 fmd_hdl_error(hdl, "warning: %d response msgs dropped\n", 2759 (int)etm_resp_q_cur_len); 2760 while (etm_resp_q_cur_len > 0) { 2761 (void) etm_resp_q_deq(hdl, &rqe); 2762 (void) etm_conn_close(hdl, "bad conn close after deq", 2763 rqe.rqe_conn); 2764 fmd_hdl_free(hdl, rqe.rqe_hdrp, rqe.rqe_hdr_sz); 2765 } 2766 } 2767 (void) pthread_mutex_unlock(&etm_resp_q_lock); 2768 2769 } /* etm_responder() */ 2770 2771 static void * 2772 etm_init_alloc(size_t size) 2773 { 2774 return (fmd_hdl_alloc(init_hdl, size, FMD_SLEEP)); 2775 } 2776 2777 static void 2778 etm_init_free(void *addr, size_t size) 2779 { 2780 fmd_hdl_free(init_hdl, addr, size); 2781 } 2782 2783 /* 2784 * ---------------------root ldom support functions ----------------------- 2785 */ 2786 2787 /* 2788 * use a static array async_event_q instead of dynamicaly allocated mem queue 2789 * for etm_async_q_enq and etm_async_q_deq. 2790 * This is not running in an fmd aux thread, can't use the fmd_hdl_* funcs. 2791 * caller needs to grab the mutex lock before calling this func. 2792 * return >0 for success, or -errno value 2793 */ 2794 static int 2795 etm_async_q_enq(etm_async_event_ele_t *async_e) 2796 { 2797 2798 if (etm_async_q_cur_len >= etm_async_q_max_len) { 2799 /* etm_stats.etm_enq_drop_async_q.fmds_value.ui64++; */ 2800 return (-E2BIG); 2801 } 2802 2803 (void) memcpy(&async_event_q[etm_async_q_tail], async_e, 2804 sizeof (*async_e)); 2805 2806 etm_async_q_tail++; 2807 if (etm_async_q_tail == etm_async_q_max_len) { 2808 etm_async_q_tail = 0; 2809 } 2810 etm_async_q_cur_len++; 2811 2812 /* etm_stats.etm_async_q_cur_len.fmds_value.ui64 = etm_async_q_cur_len; */ 2813 2814 return (1); 2815 2816 } /* etm_async_q_enq() */ 2817 2818 2819 static int 2820 etm_async_q_deq(etm_async_event_ele_t *async_e) 2821 { 2822 2823 if (etm_async_q_cur_len == 0) { 2824 /* etm_stats.etm_deq_drop_async_q.fmds_value.ui64++; */ 2825 return (-ENOENT); 2826 } 2827 2828 (void) memcpy(async_e, &async_event_q[etm_async_q_head], 2829 sizeof (*async_e)); 2830 2831 etm_async_q_head++; 2832 if (etm_async_q_head == etm_async_q_max_len) { 2833 etm_async_q_head = 0; 2834 } 2835 etm_async_q_cur_len--; 2836 2837 return (1); 2838 } /* etm_async_q_deq */ 2839 2840 2841 /* 2842 * setting up the fields in iosvc at DS_REG_CB time 2843 */ 2844 void 2845 etm_iosvc_setup(fmd_hdl_t *fmd_hdl, etm_iosvc_t *iosvc, 2846 etm_async_event_ele_t *async_e) 2847 { 2848 iosvc->ds_hdl = async_e->ds_hdl; 2849 iosvc->cur_send_xid = 0; 2850 iosvc->xid_posted_ev = 0; 2851 iosvc->start_sending_Q = 0; 2852 2853 /* 2854 * open the fmd xprt if it 2855 * hasn't been previously opened 2856 */ 2857 fmd_hdl_debug(fmd_hdl, "info: before fmd_xprt_open ldom_name is %s\n", 2858 async_e->ldom_name); 2859 2860 if (iosvc->fmd_xprt == NULL) { 2861 iosvc->fmd_xprt = fmd_xprt_open(fmd_hdl, flags, NULL, iosvc); 2862 } 2863 2864 iosvc->thr_is_dying = 0; 2865 if (iosvc->recv_tid == NULL) { 2866 iosvc->recv_tid = fmd_thr_create(fmd_hdl, 2867 etm_recv_from_remote_root, iosvc); 2868 } 2869 if (iosvc->send_tid == NULL) { 2870 iosvc->send_tid = fmd_thr_create(fmd_hdl, 2871 etm_send_to_remote_root, iosvc); 2872 } 2873 } /* etm_iosvc_setup() */ 2874 2875 2876 /* 2877 * ds userland interface ds_reg_cb callback func 2878 */ 2879 2880 /* ARGSUSED */ 2881 static void 2882 etm_iosvc_reg_handler(ds_hdl_t ds_hdl, ds_cb_arg_t arg, ds_ver_t *ver, 2883 ds_domain_hdl_t dhdl) 2884 { 2885 etm_async_event_ele_t async_ele; 2886 2887 2888 /* 2889 * do version check here. 2890 * checked the ver received here against etm_iosvc_vers here 2891 */ 2892 if (etm_iosvc_vers[0].major != ver->major || 2893 etm_iosvc_vers[0].minor != ver->minor) { 2894 /* 2895 * can't log an fmd debug msg, 2896 * not running in an fmd aux thread 2897 */ 2898 return; 2899 } 2900 2901 /* 2902 * the callback should have a valid ldom_name 2903 * can't log fmd debugging msg here since this is not in an fmd aux 2904 * thread. log fmd debug msg in etm_async_event_handle() 2905 */ 2906 async_ele.ds_hdl = ds_hdl; 2907 async_ele.dhdl = dhdl; 2908 async_ele.ldom_name[0] = '\0'; 2909 async_ele.event_type = ETM_ASYNC_EVENT_DS_REG_CB; 2910 (void) pthread_mutex_lock(&etm_async_event_q_lock); 2911 (void) etm_async_q_enq(&async_ele); 2912 if (etm_async_q_cur_len == 1) 2913 (void) pthread_cond_signal(&etm_async_event_q_cv); 2914 (void) pthread_mutex_unlock(&etm_async_event_q_lock); 2915 2916 } /* etm_iosvc_reg_handler */ 2917 2918 2919 /* 2920 * ds userland interface ds_unreg_cb callback func 2921 */ 2922 2923 /*ARGSUSED*/ 2924 static void 2925 etm_iosvc_unreg_handler(ds_hdl_t hdl, ds_cb_arg_t arg) 2926 { 2927 etm_async_event_ele_t async_ele; 2928 2929 /* 2930 * fill in async_ele and enqueue async_ele 2931 */ 2932 async_ele.ldom_name[0] = '\0'; 2933 async_ele.ds_hdl = hdl; 2934 async_ele.event_type = ETM_ASYNC_EVENT_DS_UNREG_CB; 2935 (void) pthread_mutex_lock(&etm_async_event_q_lock); 2936 (void) etm_async_q_enq(&async_ele); 2937 if (etm_async_q_cur_len == 1) 2938 (void) pthread_cond_signal(&etm_async_event_q_cv); 2939 (void) pthread_mutex_unlock(&etm_async_event_q_lock); 2940 } /* etm_iosvc_unreg_handler */ 2941 2942 /* 2943 * ldom event registration callback func 2944 */ 2945 2946 /* ARGSUSED */ 2947 static void 2948 ldom_event_handler(char *ldom_name, ldom_event_t event, ldom_cb_arg_t data) 2949 { 2950 etm_async_event_ele_t async_ele; 2951 2952 /* 2953 * the callback will have a valid ldom_name 2954 */ 2955 async_ele.ldom_name[0] = '\0'; 2956 if (ldom_name) 2957 (void) strcpy(async_ele.ldom_name, ldom_name); 2958 async_ele.ds_hdl = DS_INVALID_HDL; 2959 2960 /* 2961 * fill in async_ele and enq async_ele 2962 */ 2963 switch (event) { 2964 case LDOM_EVENT_BIND: 2965 async_ele.event_type = ETM_ASYNC_EVENT_LDOM_BIND; 2966 break; 2967 case LDOM_EVENT_UNBIND: 2968 async_ele.event_type = ETM_ASYNC_EVENT_LDOM_UNBIND; 2969 break; 2970 case LDOM_EVENT_ADD: 2971 async_ele.event_type = ETM_ASYNC_EVENT_LDOM_ADD; 2972 break; 2973 case LDOM_EVENT_REMOVE: 2974 async_ele.event_type = ETM_ASYNC_EVENT_LDOM_REMOVE; 2975 break; 2976 default: 2977 /* 2978 * for all other ldom events, do nothing 2979 */ 2980 return; 2981 } /* switch (event) */ 2982 2983 (void) pthread_mutex_lock(&etm_async_event_q_lock); 2984 (void) etm_async_q_enq(&async_ele); 2985 if (etm_async_q_cur_len == 1) 2986 (void) pthread_cond_signal(&etm_async_event_q_cv); 2987 (void) pthread_mutex_unlock(&etm_async_event_q_lock); 2988 2989 } /* ldom_event_handler */ 2990 2991 2992 /* 2993 * This is running as an fmd aux thread. 2994 * This is the func that actually handle the events, which include: 2995 * 1. ldom events. ldom events are on Control Domain only 2996 * 2. any DS userland callback funcs 2997 * these events are already Q-ed in the async_event_ele_q 2998 * deQ and process the events accordingly 2999 */ 3000 static void 3001 etm_async_event_handler(void *arg) 3002 { 3003 3004 fmd_hdl_t *fmd_hdl = (fmd_hdl_t *)arg; 3005 etm_iosvc_t *iosvc; /* ptr 2 iosvc struct */ 3006 etm_async_event_ele_t async_e; 3007 3008 fmd_hdl_debug(fmd_hdl, "info: etm_async_event_handler starting\n"); 3009 /* 3010 * handle etm is not dying and Q len > 0 3011 */ 3012 while (!etm_is_dying) { 3013 /* 3014 * grab the lock to check the Q len 3015 */ 3016 (void) pthread_mutex_lock(&etm_async_event_q_lock); 3017 fmd_hdl_debug(fmd_hdl, "info: etm_async_q_cur_len %d\n", 3018 etm_async_q_cur_len); 3019 3020 while (etm_async_q_cur_len > 0) { 3021 (void) etm_async_q_deq(&async_e); 3022 (void) pthread_mutex_unlock(&etm_async_event_q_lock); 3023 fmd_hdl_debug(fmd_hdl, 3024 "info: processing an async event type %d ds_hdl" 3025 " %d\n", async_e.event_type, async_e.ds_hdl); 3026 if (async_e.ldom_name[0] != '\0') { 3027 fmd_hdl_debug(fmd_hdl, 3028 "info: procssing async evt ldom_name %s\n", 3029 async_e.ldom_name); 3030 } 3031 3032 /* 3033 * at this point, if async_e.ldom_name is not NULL, 3034 * we have a valid iosvc strcut ptr. 3035 * the only time async_e.ldom_name is NULL is at 3036 * ds_unreg_cb() 3037 */ 3038 switch (async_e.event_type) { 3039 case ETM_ASYNC_EVENT_LDOM_UNBIND: 3040 case ETM_ASYNC_EVENT_LDOM_REMOVE: 3041 /* 3042 * we have a valid ldom_name, 3043 * etm_lookup_struct(ldom_name) 3044 * do nothing if can't find an iosvc 3045 * no iosvc clean up to do 3046 */ 3047 (void) pthread_mutex_lock( 3048 &iosvc_list_lock); 3049 iosvc = etm_iosvc_lookup(fmd_hdl, 3050 async_e.ldom_name, 3051 async_e.ds_hdl, B_FALSE); 3052 if (iosvc == NULL) { 3053 fmd_hdl_debug(fmd_hdl, 3054 "error: can't find iosvc for ldom " 3055 "name %s\n", 3056 async_e.ldom_name); 3057 (void) pthread_mutex_unlock( 3058 &iosvc_list_lock); 3059 break; 3060 } 3061 /* 3062 * Clean up the queue, delete all messages and 3063 * do not persist checkpointed fma events. 3064 */ 3065 etm_iosvc_cleanup(fmd_hdl, iosvc, B_TRUE, 3066 B_TRUE); 3067 (void) pthread_mutex_unlock( 3068 &iosvc_list_lock); 3069 break; 3070 3071 case ETM_ASYNC_EVENT_LDOM_BIND: 3072 3073 /* 3074 * create iosvc if it has not been 3075 * created 3076 * async_e.ds_hdl is invalid 3077 * async_e.ldom_name is valid ldom_name 3078 */ 3079 (void) pthread_mutex_lock( 3080 &iosvc_list_lock); 3081 iosvc = etm_iosvc_lookup(fmd_hdl, 3082 async_e.ldom_name, 3083 async_e.ds_hdl, B_TRUE); 3084 if (iosvc == NULL) { 3085 fmd_hdl_debug(fmd_hdl, 3086 "error: can't create iosvc for " 3087 "async evnt %d\n", 3088 async_e.event_type); 3089 (void) pthread_mutex_unlock( 3090 &iosvc_list_lock); 3091 break; 3092 } 3093 (void) strcpy(iosvc->ldom_name, 3094 async_e.ldom_name); 3095 iosvc->ds_hdl = async_e.ds_hdl; 3096 (void) pthread_mutex_unlock( 3097 &iosvc_list_lock); 3098 break; 3099 3100 case ETM_ASYNC_EVENT_DS_REG_CB: 3101 if (etm_ldom_type == LDOM_TYPE_CONTROL) { 3102 /* 3103 * find the root ldom name from 3104 * ldom domain hdl/id 3105 */ 3106 if (etm_filter_find_ldom_name( 3107 fmd_hdl, async_e.dhdl, 3108 async_e.ldom_name, 3109 MAX_LDOM_NAME) != 0) { 3110 fmd_hdl_debug(fmd_hdl, 3111 "error: can't find root " 3112 "domain name from did %d\n", 3113 async_e.dhdl); 3114 break; 3115 } else { 3116 fmd_hdl_debug(fmd_hdl, 3117 "info: etm_filter_find_" 3118 "ldom_name returned %s\n", 3119 async_e.ldom_name); 3120 } 3121 /* 3122 * now we should have a valid 3123 * root domain name. 3124 * lookup the iosvc struct 3125 * associated with the ldom_name 3126 * and init the iosvc struct 3127 */ 3128 (void) pthread_mutex_lock( 3129 &iosvc_list_lock); 3130 iosvc = etm_iosvc_lookup( 3131 fmd_hdl, async_e.ldom_name, 3132 async_e.ds_hdl, B_TRUE); 3133 if (iosvc == NULL) { 3134 fmd_hdl_debug(fmd_hdl, 3135 "error: can't create iosvc " 3136 "for async evnt %d\n", 3137 async_e.event_type); 3138 (void) pthread_mutex_unlock( 3139 &iosvc_list_lock); 3140 break; 3141 } 3142 3143 etm_iosvc_setup(fmd_hdl, iosvc, 3144 &async_e); 3145 (void) pthread_mutex_unlock( 3146 &iosvc_list_lock); 3147 } else { 3148 iosvc = &io_svc; 3149 (void) strcpy(iosvc->ldom_name, 3150 async_e.ldom_name); 3151 3152 etm_iosvc_setup(fmd_hdl, iosvc, 3153 &async_e); 3154 } 3155 break; 3156 3157 case ETM_ASYNC_EVENT_DS_UNREG_CB: 3158 /* 3159 * decide which iosvc struct to perform 3160 * this UNREG callback on. 3161 */ 3162 if (etm_ldom_type == LDOM_TYPE_CONTROL) { 3163 (void) pthread_mutex_lock( 3164 &iosvc_list_lock); 3165 /* 3166 * lookup the iosvc struct w/ 3167 * ds_hdl 3168 */ 3169 iosvc = etm_iosvc_lookup( 3170 fmd_hdl, async_e.ldom_name, 3171 async_e.ds_hdl, B_FALSE); 3172 if (iosvc == NULL) { 3173 fmd_hdl_debug(fmd_hdl, 3174 "error: can't find iosvc " 3175 "for async evnt %d\n", 3176 async_e.event_type); 3177 (void) pthread_mutex_unlock( 3178 &iosvc_list_lock); 3179 break; 3180 } 3181 3182 /* 3183 * ds_hdl and fmd_xprt_open 3184 * go hand to hand together 3185 * after unreg_cb, 3186 * ds_hdl is INVALID and 3187 * fmd_xprt is closed. 3188 * the ldom name and the msg Q 3189 * remains in iosvc_list 3190 */ 3191 if (iosvc->ldom_name != '\0') 3192 fmd_hdl_debug(fmd_hdl, 3193 "info: iosvc w/ ldom_name " 3194 "%s \n", iosvc->ldom_name); 3195 3196 /* 3197 * destroy send/recv threads and 3198 * other clean up on Control side. 3199 */ 3200 etm_iosvc_cleanup(fmd_hdl, iosvc, 3201 B_FALSE, B_FALSE); 3202 (void) pthread_mutex_unlock( 3203 &iosvc_list_lock); 3204 } else { 3205 iosvc = &io_svc; 3206 /* 3207 * destroy send/recv threads and 3208 * then clean up on Root side. 3209 */ 3210 etm_iosvc_cleanup(fmd_hdl, iosvc, 3211 B_FALSE, B_FALSE); 3212 } 3213 break; 3214 3215 default: 3216 /* 3217 * for all other events, etm doesn't care. 3218 * already logged an fmd info msg w/ 3219 * the event type. Do nothing here. 3220 */ 3221 break; 3222 } /* switch (async_e.event_type) */ 3223 3224 if (etm_ldom_type == LDOM_TYPE_CONTROL) { 3225 etm_filter_handle_ldom_event(fmd_hdl, 3226 async_e.event_type, async_e.ldom_name); 3227 } 3228 3229 /* 3230 * grab the lock to check the q length again 3231 */ 3232 (void) pthread_mutex_lock(&etm_async_event_q_lock); 3233 3234 if (etm_is_dying) { 3235 break; 3236 } 3237 } /* etm_async_q_cur_len */ 3238 3239 /* 3240 * we have the mutex lock at this point, whether 3241 * . etm_is_dying and/or 3242 * . q_len == 0 3243 */ 3244 if (!etm_is_dying && etm_async_q_cur_len == 0) { 3245 fmd_hdl_debug(fmd_hdl, 3246 "info: cond wait on async_event_q_cv\n"); 3247 (void) pthread_cond_wait(&etm_async_event_q_cv, 3248 &etm_async_event_q_lock); 3249 fmd_hdl_debug(fmd_hdl, 3250 "info: cond wait on async_event_q_cv rtns\n"); 3251 } 3252 (void) pthread_mutex_unlock(&etm_async_event_q_lock); 3253 } /* etm_is_dying */ 3254 3255 fmd_hdl_debug(fmd_hdl, 3256 "info: etm async event handler thread exiting\n"); 3257 3258 } /* etm_async_event_handler */ 3259 3260 /* 3261 * deQ what's in iosvc msg Q 3262 * send iosvc_msgp to the remote io svc ldom by calling ds_send_msg() 3263 * the iosvc_msgp already has the packed msg, which is hdr + 1 fma event 3264 */ 3265 static void 3266 etm_send_to_remote_root(void *arg) 3267 { 3268 3269 etm_iosvc_t *iosvc = (etm_iosvc_t *)arg; /* iosvc ptr */ 3270 etm_iosvc_q_ele_t msg_ele; /* iosvc msg ele */ 3271 etm_proto_v1_ev_hdr_t *ev_hdrp; /* hdr for FMA_EVENT */ 3272 fmd_hdl_t *fmd_hdl = init_hdl; /* fmd handle */ 3273 3274 3275 fmd_hdl_debug(fmd_hdl, 3276 "info: send to remote iosvc starting w/ ldom_name %s\n", 3277 iosvc->ldom_name); 3278 3279 /* 3280 * loop forever until etm_is_dying or thr_is_dying 3281 */ 3282 while (!etm_is_dying && !iosvc->thr_is_dying) { 3283 if (iosvc->ds_hdl != DS_INVALID_HDL && 3284 iosvc->start_sending_Q > 0) { 3285 (void) pthread_mutex_lock(&iosvc->msg_q_lock); 3286 while (iosvc->msg_q_cur_len > 0 && 3287 iosvc->ds_hdl != DS_INVALID_HDL) { 3288 (void) etm_iosvc_msg_deq(fmd_hdl, iosvc, 3289 &msg_ele); 3290 if (etm_debug_lvl >= 3) { 3291 fmd_hdl_debug(fmd_hdl, "info: valid " 3292 "ds_hdl before ds_send_msg \n"); 3293 } 3294 (void) pthread_mutex_unlock(&iosvc->msg_q_lock); 3295 3296 iosvc->ack_ok = 0; 3297 ev_hdrp = (etm_proto_v1_ev_hdr_t *) 3298 ((ptrdiff_t)msg_ele.msg); 3299 ev_hdrp->ev_pp.pp_xid = iosvc->cur_send_xid + 1; 3300 while (!iosvc->ack_ok && 3301 iosvc->ds_hdl != DS_INVALID_HDL && 3302 !etm_is_dying) { 3303 /* 3304 * call ds_send_msg() to send the msg, 3305 * wait for the recv end to send the 3306 * resp msg back. 3307 * If resp msg is recv-ed, ack_ok 3308 * will be set to 1. 3309 * otherwise, retry. 3310 */ 3311 if (etm_send_ds_msg(fmd_hdl, B_TRUE, 3312 iosvc, &msg_ele, ev_hdrp) < 0) { 3313 continue; 3314 } 3315 3316 if (etm_is_dying || iosvc->thr_is_dying) 3317 break; 3318 } 3319 3320 /* 3321 * if out of the while loop but !ack_ok, ie, 3322 * ds_hdl becomes invalid at some point 3323 * while waiting the resp msg, we need to put 3324 * the msg back to the head of the Q. 3325 */ 3326 if (!iosvc->ack_ok) { 3327 (void) pthread_mutex_lock( 3328 &iosvc->msg_q_lock); 3329 /* 3330 * put the msg back to the head of Q. 3331 * If the Q is full at this point, 3332 * drop the msg at the tail, enq this 3333 * msg to the head. 3334 */ 3335 etm_msg_enq_head(fmd_hdl, iosvc, 3336 &msg_ele); 3337 (void) pthread_mutex_unlock( 3338 &iosvc->msg_q_lock); 3339 } 3340 3341 /* 3342 * 3343 * grab the lock to check the Q len again 3344 */ 3345 (void) pthread_mutex_lock(&iosvc->msg_q_lock); 3346 if (etm_is_dying || iosvc->thr_is_dying) { 3347 break; 3348 } 3349 } /* while dequeing iosvc msgs to send */ 3350 3351 /* 3352 * we have the mutex lock for msg_q_lock at this point 3353 * we are here because 3354 * 1) q_len == 0: then wait on the cv for Q to be filled 3355 * 2) etm_is_dying 3356 */ 3357 if (!etm_is_dying && !iosvc->thr_is_dying && 3358 iosvc->msg_q_cur_len == 0) { 3359 fmd_hdl_debug(fmd_hdl, 3360 "info: waiting on msg_q_cv\n"); 3361 (void) pthread_cond_wait(&iosvc->msg_q_cv, 3362 &iosvc->msg_q_lock); 3363 } 3364 (void) pthread_mutex_unlock(&iosvc->msg_q_lock); 3365 if (etm_is_dying || iosvc->thr_is_dying) { 3366 break; 3367 } 3368 } else { 3369 (void) etm_sleep(1); 3370 } /* wait for the start_sendingQ > 0 */ 3371 } /* etm_is_dying or thr_is_dying */ 3372 fmd_hdl_debug(fmd_hdl, "info; etm send thread exiting \n"); 3373 } /* etm_send_to_remote_root */ 3374 3375 3376 /* 3377 * receive etm msgs from the remote root ldom by calling ds_recv_msg() 3378 * if FMA events/ereports, call fmd_xprt_post() to post to fmd 3379 * send ACK back by calling ds_send_msg() 3380 */ 3381 static void 3382 etm_recv_from_remote_root(void *arg) 3383 { 3384 etm_iosvc_t *iosvc = (etm_iosvc_t *)arg; /* iosvc ptr */ 3385 etm_proto_v1_pp_t *pp; /* protocol preamble */ 3386 etm_proto_v1_ev_hdr_t *ev_hdrp; /* for FMA_EVENT msg */ 3387 etm_proto_v1_resp_hdr_t *resp_hdrp; /* for RESPONSE msg */ 3388 int32_t resp_code = 0; /* default is success */ 3389 int32_t rc; /* return value */ 3390 size_t maxlen = MAXLEN; 3391 /* max msg len */ 3392 char msgbuf[MAXLEN]; /* recv msg buf */ 3393 size_t msg_size; /* recv msg size */ 3394 size_t hdr_sz; /* sizeof *hdrp */ 3395 size_t evsz; /* sizeof *evp */ 3396 size_t fma_event_size; /* sizeof FMA event */ 3397 nvlist_t *evp; /* ptr to the nvlist */ 3398 char *buf; /* ptr to the nvlist */ 3399 static uint32_t mem_alloc = 0; /* indicate if alloc mem */ 3400 char *msg; /* ptr to alloc mem */ 3401 fmd_hdl_t *fmd_hdl = init_hdl; 3402 3403 3404 3405 fmd_hdl_debug(fmd_hdl, 3406 "info: recv from remote iosvc starting with ldom name %s \n", 3407 iosvc->ldom_name); 3408 3409 /* 3410 * loop forever until etm_is_dying or the thread is dying 3411 */ 3412 3413 msg = msgbuf; 3414 while (!etm_is_dying && !iosvc->thr_is_dying) { 3415 if (iosvc->ds_hdl == DS_INVALID_HDL) { 3416 fmd_hdl_debug(fmd_hdl, 3417 "info: ds_hdl is invalid in recv thr\n"); 3418 (void) etm_sleep(1); 3419 continue; 3420 } 3421 3422 /* 3423 * for now, there are FMA_EVENT and ACK msg type. 3424 * use FMA_EVENT buf as the maxlen, hdr+1 fma event. 3425 * FMA_EVENT is big enough to hold an ACK msg. 3426 * the actual msg size received is in msg_size. 3427 */ 3428 rc = (*etm_ds_recv_msg)(iosvc->ds_hdl, msg, maxlen, &msg_size); 3429 if (rc == EFBIG) { 3430 fmd_hdl_debug(fmd_hdl, 3431 "info: ds_recv_msg needs mem the size of %d\n", 3432 msg_size); 3433 msg = fmd_hdl_zalloc(fmd_hdl, msg_size, FMD_SLEEP); 3434 mem_alloc = 1; 3435 } else if (rc == 0) { 3436 fmd_hdl_debug(fmd_hdl, 3437 "info: ds_recv_msg received a msg ok\n"); 3438 /* 3439 * check the magic # in msg.hdr 3440 */ 3441 pp = (etm_proto_v1_pp_t *)((ptrdiff_t)msg); 3442 if (pp->pp_magic_num != ETM_PROTO_MAGIC_NUM) { 3443 fmd_hdl_debug(fmd_hdl, 3444 "info: bad ds recv on magic\n"); 3445 continue; 3446 } 3447 3448 /* 3449 * check the msg type against msg_size to be sure 3450 * that received msg is not a truncated msg 3451 */ 3452 if (pp->pp_msg_type == ETM_MSG_TYPE_FMA_EVENT) { 3453 3454 ev_hdrp = (etm_proto_v1_ev_hdr_t *) 3455 ((ptrdiff_t)msg); 3456 fmd_hdl_debug(fmd_hdl, "info: ds received " 3457 "FMA EVENT xid=%d msg_size=%d\n", 3458 ev_hdrp->ev_pp.pp_xid, msg_size); 3459 hdr_sz = sizeof (*ev_hdrp) + 3460 1*(sizeof (ev_hdrp->ev_lens[0])); 3461 fma_event_size = hdr_sz + ev_hdrp->ev_lens[0]; 3462 if (fma_event_size != msg_size) { 3463 fmd_hdl_debug(fmd_hdl, "info: wrong " 3464 "ev msg size received\n"); 3465 continue; 3466 /* 3467 * Simply do nothing. The send side 3468 * will timedcond_wait waiting on the 3469 * resp msg will timeout and 3470 * re-send the same msg. 3471 */ 3472 } 3473 if (etm_debug_lvl >= 3) { 3474 fmd_hdl_debug(fmd_hdl, "info: recv msg" 3475 " size %d hdrsz %d evp size %d\n", 3476 msg_size, hdr_sz, 3477 ev_hdrp->ev_lens[0]); 3478 } 3479 3480 if (ev_hdrp->ev_pp.pp_xid != 3481 iosvc->xid_posted_ev) { 3482 /* 3483 * different from last xid posted to 3484 * fmd, post to fmd now. 3485 */ 3486 buf = msg + hdr_sz; 3487 rc = nvlist_unpack(buf, 3488 ev_hdrp->ev_lens[0], &evp, 0); 3489 rc = nvlist_size(evp, &evsz, 3490 NV_ENCODE_XDR); 3491 fmd_hdl_debug(fmd_hdl, 3492 "info: evp size %d before fmd" 3493 "post\n", evsz); 3494 3495 if ((rc = etm_post_to_fmd(fmd_hdl, 3496 iosvc->fmd_xprt, evp)) >= 0) { 3497 fmd_hdl_debug(fmd_hdl, 3498 "info: xid posted to fmd %d" 3499 "\n", 3500 ev_hdrp->ev_pp.pp_xid); 3501 iosvc->xid_posted_ev = 3502 ev_hdrp->ev_pp.pp_xid; 3503 } 3504 } 3505 3506 /* 3507 * ready to send the RESPONSE msg back 3508 * reuse the msg buffer as the response buffer 3509 */ 3510 resp_hdrp = (etm_proto_v1_resp_hdr_t *) 3511 ((ptrdiff_t)msg); 3512 resp_hdrp->resp_pp.pp_msg_type = 3513 ETM_MSG_TYPE_RESPONSE; 3514 3515 resp_hdrp->resp_code = resp_code; 3516 resp_hdrp->resp_len = sizeof (*resp_hdrp); 3517 3518 /* 3519 * send the whole response msg in one send 3520 */ 3521 if ((*etm_ds_send_msg)(iosvc->ds_hdl, msg, 3522 sizeof (*resp_hdrp)) != 0) { 3523 fmd_hdl_debug(fmd_hdl, 3524 "info: send response msg failed\n"); 3525 } else { 3526 fmd_hdl_debug(fmd_hdl, 3527 "info: ds send resp msg ok" 3528 "size %d\n", sizeof (*resp_hdrp)); 3529 } 3530 } else if (pp->pp_msg_type == ETM_MSG_TYPE_RESPONSE) { 3531 fmd_hdl_debug(fmd_hdl, 3532 "info: ds received respond msg xid=%d" 3533 "msg_size=%d for ldom %s\n", pp->pp_xid, 3534 msg_size, iosvc->ldom_name); 3535 if (sizeof (*resp_hdrp) != msg_size) { 3536 fmd_hdl_debug(fmd_hdl, 3537 "info: wrong resp msg size" 3538 "received\n"); 3539 fmd_hdl_debug(fmd_hdl, 3540 "info: resp msg size %d recv resp" 3541 "msg size %d\n", 3542 sizeof (*resp_hdrp), msg_size); 3543 continue; 3544 } 3545 /* 3546 * is the pp.pp_xid == iosvc->cur_send_xid+1, 3547 * if so, nudge the send routine to send next 3548 */ 3549 if (pp->pp_xid != iosvc->cur_send_xid+1) { 3550 fmd_hdl_debug(fmd_hdl, 3551 "info: ds received resp msg xid=%d " 3552 "doesn't match cur_send_id=%d\n", 3553 pp->pp_xid, iosvc->cur_send_xid+1); 3554 continue; 3555 } 3556 (void) pthread_mutex_lock(&iosvc->msg_ack_lock); 3557 iosvc->ack_ok = 1; 3558 (void) pthread_cond_signal(&iosvc->msg_ack_cv); 3559 (void) pthread_mutex_unlock( 3560 &iosvc->msg_ack_lock); 3561 fmd_hdl_debug(fmd_hdl, 3562 "info: signaling msg_ack_cv\n"); 3563 } else { 3564 /* 3565 * place holder for future msg types 3566 */ 3567 fmd_hdl_debug(fmd_hdl, 3568 "info: ds received unrecognized msg\n"); 3569 } 3570 if (mem_alloc) { 3571 fmd_hdl_free(fmd_hdl, msg, msg_size); 3572 mem_alloc = 0; 3573 msg = msgbuf; 3574 } 3575 } else { 3576 if (etm_debug_lvl >= 3) { 3577 fmd_hdl_debug(fmd_hdl, 3578 "info: ds_recv_msg() failed\n"); 3579 } 3580 } /* ds_recv_msg() returns */ 3581 } /* etm_is_dying */ 3582 3583 /* 3584 * need to free the mem allocated in msg upon exiting the thread 3585 */ 3586 if (mem_alloc) { 3587 fmd_hdl_free(fmd_hdl, msg, msg_size); 3588 mem_alloc = 0; 3589 msg = msgbuf; 3590 } 3591 fmd_hdl_debug(fmd_hdl, "info; etm recv thread exiting \n"); 3592 } /* etm_recv_from_remote_root */ 3593 3594 3595 3596 /* 3597 * etm_ds_init 3598 * initialize DS services function pointers by calling 3599 * dlopen() followed by dlsym() for each ds func. 3600 * if any dlopen() or dlsym() call fails, return -ENOENT 3601 * return >0 for successs, -ENOENT for failure 3602 */ 3603 static int 3604 etm_ds_init(fmd_hdl_t *hdl) 3605 { 3606 int rc = 0; 3607 3608 if ((etm_dl_hdl = dlopen(etm_dl_path, etm_dl_mode)) == NULL) { 3609 fmd_hdl_debug(hdl, "error: failed to dlopen %s\n", etm_dl_path); 3610 return (-ENOENT); 3611 } 3612 3613 etm_ds_svc_reg = (int (*)(ds_capability_t *cap, ds_ops_t *ops)) 3614 dlsym(etm_dl_hdl, "ds_svc_reg"); 3615 if (etm_ds_svc_reg == NULL) { 3616 fmd_hdl_debug(hdl, 3617 "error: failed to dlsym ds_svc_reg() w/ error %s\n", 3618 dlerror()); 3619 rc = -ENOENT; 3620 } 3621 3622 3623 etm_ds_clnt_reg = (int (*)(ds_capability_t *cap, ds_ops_t *ops)) 3624 dlsym(etm_dl_hdl, "ds_clnt_reg"); 3625 if (etm_ds_clnt_reg == NULL) { 3626 fmd_hdl_debug(hdl, 3627 "error: dlsym(ds_clnt_reg) failed w/ errno %d\n", errno); 3628 rc = -ENOENT; 3629 } 3630 3631 etm_ds_send_msg = (int (*)(ds_hdl_t hdl, void *buf, size_t buflen)) 3632 dlsym(etm_dl_hdl, "ds_send_msg"); 3633 if (etm_ds_send_msg == NULL) { 3634 fmd_hdl_debug(hdl, "error: dlsym(ds_send_msg) failed\n"); 3635 rc = -ENOENT; 3636 } 3637 3638 etm_ds_recv_msg = (int (*)(ds_hdl_t hdl, void *buf, size_t buflen, 3639 size_t *msglen))dlsym(etm_dl_hdl, "ds_recv_msg"); 3640 if (etm_ds_recv_msg == NULL) { 3641 fmd_hdl_debug(hdl, "error: dlsym(ds_recv_msg) failed\n"); 3642 rc = -ENOENT; 3643 } 3644 3645 etm_ds_fini = (int (*)(void))dlsym(etm_dl_hdl, "ds_fini"); 3646 if (etm_ds_fini == NULL) { 3647 fmd_hdl_debug(hdl, "error: dlsym(ds_fini) failed\n"); 3648 rc = -ENOENT; 3649 } 3650 3651 if (rc == -ENOENT) { 3652 (void) dlclose(etm_dl_hdl); 3653 } 3654 return (rc); 3655 3656 } /* etm_ds_init() */ 3657 3658 3659 /* 3660 * -------------------------- FMD entry points ------------------------------- 3661 */ 3662 3663 /* 3664 * _fmd_init - initialize the transport for use by ETM and start the 3665 * server daemon to accept new connections to us 3666 * 3667 * FMD will read our *.conf and subscribe us to FMA events 3668 */ 3669 3670 void 3671 _fmd_init(fmd_hdl_t *hdl) 3672 { 3673 struct timeval tmv; /* timeval */ 3674 ssize_t n; /* gen use */ 3675 const struct facility *fp; /* syslog facility matching */ 3676 char *facname; /* syslog facility property */ 3677 uint32_t type_mask; /* type of the local host */ 3678 int rc; /* funcs return code */ 3679 3680 3681 if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) { 3682 return; /* invalid data in configuration file */ 3683 } 3684 3685 fmd_hdl_debug(hdl, "info: module initializing\n"); 3686 3687 init_hdl = hdl; 3688 etm_lhp = ldom_init(etm_init_alloc, etm_init_free); 3689 3690 /* 3691 * decide the ldom type, do initialization accordingly 3692 */ 3693 if ((rc = ldom_get_type(etm_lhp, &type_mask)) != 0) { 3694 fmd_hdl_debug(hdl, "error: can't decide ldom type\n"); 3695 fmd_hdl_debug(hdl, "info: module unregistering\n"); 3696 ldom_fini(etm_lhp); 3697 fmd_hdl_unregister(hdl); 3698 return; 3699 } 3700 3701 if ((type_mask & LDOM_TYPE_LEGACY) || (type_mask & LDOM_TYPE_CONTROL)) { 3702 if (type_mask & LDOM_TYPE_LEGACY) { 3703 /* 3704 * running on a legacy sun4v domain, 3705 * act as the the old sun4v 3706 */ 3707 etm_ldom_type = LDOM_TYPE_LEGACY; 3708 fmd_hdl_debug(hdl, "info: running as the old sun4v\n"); 3709 ldom_fini(etm_lhp); 3710 } else if (type_mask & LDOM_TYPE_CONTROL) { 3711 etm_ldom_type = LDOM_TYPE_CONTROL; 3712 fmd_hdl_debug(hdl, "info: running as control domain\n"); 3713 3714 /* 3715 * looking for libds.so.1. 3716 * If not found, don't do DS registration. As a result, 3717 * there will be no DS callbacks or other DS services. 3718 */ 3719 if (etm_ds_init(hdl) >= 0) { 3720 etm_filter_init(hdl); 3721 etm_ckpt_init(hdl); 3722 3723 flags = FMD_XPRT_RDWR | FMD_XPRT_ACCEPT; 3724 3725 /* 3726 * ds client registration 3727 */ 3728 if ((rc = (*etm_ds_clnt_reg)(&iosvc_caps, 3729 &iosvc_ops))) { 3730 fmd_hdl_debug(hdl, 3731 "error: ds_clnt_reg(): errno %d\n", rc); 3732 } 3733 } else { 3734 fmd_hdl_debug(hdl, "error: dlopen() libds " 3735 "failed, continue without the DS services"); 3736 } 3737 3738 /* 3739 * register for ldom status events 3740 */ 3741 if ((rc = ldom_register_event(etm_lhp, 3742 ldom_event_handler, hdl))) { 3743 fmd_hdl_debug(hdl, 3744 "error: ldom_register_event():" 3745 " errno %d\n", rc); 3746 } 3747 3748 /* 3749 * create the thread for handling both the ldom status 3750 * change and service events 3751 */ 3752 etm_async_e_tid = fmd_thr_create(hdl, 3753 etm_async_event_handler, hdl); 3754 } 3755 3756 /* setup statistics and properties from FMD */ 3757 3758 (void) fmd_stat_create(hdl, FMD_STAT_NOALLOC, 3759 sizeof (etm_stats) / sizeof (fmd_stat_t), 3760 (fmd_stat_t *)&etm_stats); 3761 3762 etm_fma_resp_wait_time = fmd_prop_get_int32(hdl, 3763 ETM_PROP_NM_FMA_RESP_WAIT_TIME); 3764 etm_debug_lvl = fmd_prop_get_int32(hdl, ETM_PROP_NM_DEBUG_LVL); 3765 etm_debug_max_ev_cnt = fmd_prop_get_int32(hdl, 3766 ETM_PROP_NM_DEBUG_MAX_EV_CNT); 3767 fmd_hdl_debug(hdl, "info: etm_debug_lvl %d " 3768 "etm_debug_max_ev_cnt %d\n", etm_debug_lvl, 3769 etm_debug_max_ev_cnt); 3770 3771 etm_resp_q_max_len = fmd_prop_get_int32(hdl, 3772 ETM_PROP_NM_MAX_RESP_Q_LEN); 3773 etm_stats.etm_resp_q_max_len.fmds_value.ui64 = 3774 etm_resp_q_max_len; 3775 etm_bad_acc_to_sec = fmd_prop_get_int32(hdl, 3776 ETM_PROP_NM_BAD_ACC_TO_SEC); 3777 3778 /* 3779 * obtain an FMD transport handle so we can post 3780 * FMA events later 3781 */ 3782 3783 etm_fmd_xprt = fmd_xprt_open(hdl, FMD_XPRT_RDONLY, NULL, NULL); 3784 3785 /* 3786 * encourage protocol transaction id to be unique per module 3787 * load 3788 */ 3789 3790 (void) gettimeofday(&tmv, NULL); 3791 etm_xid_cur = (uint32_t)((tmv.tv_sec << 10) | 3792 ((unsigned long)tmv.tv_usec >> 10)); 3793 3794 /* init the ETM transport */ 3795 3796 if ((n = etm_xport_init(hdl)) != 0) { 3797 fmd_hdl_error(hdl, "error: bad xport init errno %d\n", 3798 (-n)); 3799 fmd_hdl_unregister(hdl); 3800 return; 3801 } 3802 3803 /* 3804 * Cache any properties we use every time we receive an alert. 3805 */ 3806 syslog_file = fmd_prop_get_int32(hdl, ETM_PROP_NM_SYSLOGD); 3807 syslog_cons = fmd_prop_get_int32(hdl, ETM_PROP_NM_CONSOLE); 3808 3809 if (syslog_file && (syslog_logfd = open("/dev/conslog", 3810 O_WRONLY | O_NOCTTY)) == -1) { 3811 fmd_hdl_error(hdl, 3812 "error: failed to open /dev/conslog"); 3813 syslog_file = 0; 3814 } 3815 3816 if (syslog_cons && (syslog_msgfd = open("/dev/sysmsg", 3817 O_WRONLY | O_NOCTTY)) == -1) { 3818 fmd_hdl_error(hdl, "error: failed to open /dev/sysmsg"); 3819 syslog_cons = 0; 3820 } 3821 3822 if (syslog_file) { 3823 /* 3824 * Look up the value of the "facility" property and 3825 * use it to determine * what syslog LOG_* facility 3826 * value we use to fill in our log_ctl_t. 3827 */ 3828 facname = fmd_prop_get_string(hdl, 3829 ETM_PROP_NM_FACILITY); 3830 3831 for (fp = syslog_facs; fp->fac_name != NULL; fp++) { 3832 if (strcmp(fp->fac_name, facname) == 0) 3833 break; 3834 } 3835 3836 if (fp->fac_name == NULL) { 3837 fmd_hdl_error(hdl, "error: invalid 'facility'" 3838 " setting: %s\n", facname); 3839 syslog_file = 0; 3840 } else { 3841 syslog_facility = fp->fac_value; 3842 syslog_ctl.flags = SL_CONSOLE | SL_LOGONLY; 3843 } 3844 3845 fmd_prop_free_string(hdl, facname); 3846 } 3847 3848 /* 3849 * start the message responder and the connection acceptance 3850 * server; request protocol version be negotiated after waiting 3851 * a second for the receiver to be ready to start handshaking 3852 */ 3853 3854 etm_resp_tid = fmd_thr_create(hdl, etm_responder, hdl); 3855 etm_svr_tid = fmd_thr_create(hdl, etm_server, hdl); 3856 3857 (void) etm_sleep(ETM_SLEEP_QUIK); 3858 etm_req_ver_negot(hdl); 3859 3860 } else if (type_mask & LDOM_TYPE_ROOT) { 3861 etm_ldom_type = LDOM_TYPE_ROOT; 3862 fmd_hdl_debug(hdl, "info: running as root domain\n"); 3863 3864 /* 3865 * looking for libds.so.1. 3866 * If not found, don't do DS registration. As a result, 3867 * there will be no DS callbacks or other DS services. 3868 */ 3869 if (etm_ds_init(hdl) < 0) { 3870 fmd_hdl_debug(hdl, 3871 "error: dlopen() libds failed, " 3872 "module unregistering\n"); 3873 ldom_fini(etm_lhp); 3874 fmd_hdl_unregister(hdl); 3875 return; 3876 } 3877 3878 /* 3879 * DS service registration 3880 */ 3881 if ((rc = (*etm_ds_svc_reg)(&iosvc_caps, &iosvc_ops))) { 3882 fmd_hdl_debug(hdl, "error: ds_svc_reg(): errno %d\n", 3883 rc); 3884 } 3885 3886 /* 3887 * this thread is created for ds_reg_cb/ds_unreg_cb 3888 */ 3889 etm_async_e_tid = fmd_thr_create(hdl, 3890 etm_async_event_handler, hdl); 3891 3892 flags = FMD_XPRT_RDWR; 3893 } else if ((type_mask & LDOM_TYPE_IO) || (type_mask == 0)) { 3894 /* 3895 * Do not load this module if it is 3896 * . runing on a non-root ldom 3897 * . the domain owns no io devices 3898 */ 3899 fmd_hdl_debug(hdl, 3900 "info: non-root ldom, module unregistering\n"); 3901 ldom_fini(etm_lhp); 3902 fmd_hdl_unregister(hdl); 3903 return; 3904 } else { 3905 /* 3906 * place holder, all other cases. unload etm for now 3907 */ 3908 fmd_hdl_debug(hdl, 3909 "info: other ldom type, module unregistering\n"); 3910 ldom_fini(etm_lhp); 3911 fmd_hdl_unregister(hdl); 3912 return; 3913 } 3914 3915 fmd_hdl_debug(hdl, "info: module initialized ok\n"); 3916 3917 } /* _fmd_init() */ 3918 3919 /* 3920 * etm_recv - receive an FMA event from FMD and transport it 3921 * to the remote endpoint 3922 */ 3923 3924 /*ARGSUSED*/ 3925 void 3926 etm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *evp, const char *class) 3927 { 3928 etm_xport_addr_t *addrv; /* vector of transport addresses */ 3929 etm_xport_conn_t conn; /* connection handle */ 3930 etm_proto_v1_ev_hdr_t *hdrp; /* for FMA_EVENT msg */ 3931 ssize_t i, n; /* gen use */ 3932 size_t sz; /* header size */ 3933 size_t buflen; /* size of packed FMA event */ 3934 uint8_t *buf; /* tmp buffer for packed FMA event */ 3935 3936 /* 3937 * if this is running on a Root Domain, ignore the events, 3938 * return right away 3939 */ 3940 if (etm_ldom_type == LDOM_TYPE_ROOT) 3941 return; 3942 3943 buflen = 0; 3944 if ((n = nvlist_size(evp, &buflen, NV_ENCODE_XDR)) != 0) { 3945 fmd_hdl_error(hdl, "error: FMA event dropped: " 3946 "event size errno %d class %s\n", n, class); 3947 etm_stats.etm_os_nvlist_size_fail.fmds_value.ui64++; 3948 etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; 3949 return; 3950 } 3951 3952 fmd_hdl_debug(hdl, "info: rcvd event %p from FMD\n", evp); 3953 fmd_hdl_debug(hdl, "info: cnt %llu class %s\n", 3954 etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64, class); 3955 3956 etm_stats.etm_rd_fmd_bytes.fmds_value.ui64 += buflen; 3957 etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64++; 3958 3959 /* 3960 * if the debug limit has been set, avoid excessive traffic, 3961 * for example, an infinite cycle using loopback nodes 3962 */ 3963 3964 if ((etm_debug_max_ev_cnt >= 0) && 3965 (etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64 > 3966 etm_debug_max_ev_cnt)) { 3967 fmd_hdl_debug(hdl, "warning: FMA event dropped: " 3968 "event %p cnt %llu > debug max %d\n", evp, 3969 etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64, 3970 etm_debug_max_ev_cnt); 3971 etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; 3972 return; 3973 } 3974 3975 /* allocate a buffer for the FMA event and nvlist pack it */ 3976 3977 buf = fmd_hdl_zalloc(hdl, buflen, FMD_SLEEP); 3978 3979 /* 3980 * increment the ttl value if the event is from remote (a root domain) 3981 * uncomment this when enabling fault forwarding from Root domains 3982 * to Control domain. 3983 * 3984 * uint8_t ttl; 3985 * if (fmd_event_local(hdl, evp) != FMD_EVF_LOCAL) { 3986 * if (nvlist_lookup_uint8(evp, FMD_EVN_TTL, &ttl) == 0) { 3987 * (void) nvlist_remove(evp, FMD_EVN_TTL, DATA_TYPE_UINT8); 3988 * (void) nvlist_add_uint8(evp, FMD_EVN_TTL, ttl + 1); 3989 * } 3990 * } 3991 */ 3992 3993 if ((n = nvlist_pack(evp, (char **)&buf, &buflen, 3994 NV_ENCODE_XDR, 0)) != 0) { 3995 fmd_hdl_error(hdl, "error: FMA event dropped: " 3996 "event pack errno %d class %s\n", n, class); 3997 etm_stats.etm_os_nvlist_pack_fail.fmds_value.ui64++; 3998 etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; 3999 fmd_hdl_free(hdl, buf, buflen); 4000 return; 4001 } 4002 4003 /* get vector of dst addrs and send the FMA event to each one */ 4004 4005 if ((addrv = etm_xport_get_ev_addrv(hdl, evp)) == NULL) { 4006 fmd_hdl_error(hdl, "error: FMA event dropped: " 4007 "bad event dst addrs errno %d\n", errno); 4008 etm_stats.etm_xport_get_ev_addrv_fail.fmds_value.ui64++; 4009 etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; 4010 fmd_hdl_free(hdl, buf, buflen); 4011 return; 4012 } 4013 4014 for (i = 0; addrv[i] != NULL; i++) { 4015 4016 /* open a new connection to this dst addr */ 4017 4018 if ((n = etm_conn_open(hdl, "FMA event dropped: " 4019 "bad conn open on new ev", addrv[i], &conn)) < 0) { 4020 etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; 4021 continue; 4022 } 4023 4024 (void) pthread_mutex_lock(&etm_write_lock); 4025 4026 /* write the ETM message header */ 4027 4028 if ((hdrp = etm_hdr_write(hdl, conn, evp, NV_ENCODE_XDR, 4029 &sz)) == NULL) { 4030 (void) pthread_mutex_unlock(&etm_write_lock); 4031 fmd_hdl_error(hdl, "error: FMA event dropped: " 4032 "bad hdr write errno %d\n", errno); 4033 (void) etm_conn_close(hdl, 4034 "bad conn close per bad hdr wr", conn); 4035 etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; 4036 continue; 4037 } 4038 4039 fmd_hdl_free(hdl, hdrp, sz); /* header not needed */ 4040 etm_stats.etm_wr_hdr_fmaevent.fmds_value.ui64++; 4041 fmd_hdl_debug(hdl, "info: hdr xport write ok for event %p\n", 4042 evp); 4043 4044 /* write the ETM message body, ie, the packed nvlist */ 4045 4046 if ((n = etm_io_op(hdl, "FMA event dropped: " 4047 "bad io write on event", conn, 4048 buf, buflen, ETM_IO_OP_WR)) < 0) { 4049 (void) pthread_mutex_unlock(&etm_write_lock); 4050 (void) etm_conn_close(hdl, 4051 "bad conn close per bad body wr", conn); 4052 etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; 4053 continue; 4054 } 4055 4056 (void) pthread_mutex_unlock(&etm_write_lock); 4057 4058 etm_stats.etm_wr_body_fmaevent.fmds_value.ui64++; 4059 etm_stats.etm_wr_xport_bytes.fmds_value.ui64 += buflen; 4060 fmd_hdl_debug(hdl, "info: body xport write ok for event %p\n", 4061 evp); 4062 4063 /* close the connection */ 4064 4065 (void) etm_conn_close(hdl, "bad conn close after event send", 4066 conn); 4067 } /* foreach dst addr in the vector */ 4068 4069 etm_xport_free_addrv(hdl, addrv); 4070 fmd_hdl_free(hdl, buf, buflen); 4071 4072 } /* etm_recv() */ 4073 4074 4075 /* 4076 * etm_send - receive an FMA event from FMD and enQ it in the iosvc.Q. 4077 * etm_send_to_remote_root() deQ and xprt the FMA events to a 4078 * remote root domain 4079 * return FMD_SEND_SUCCESS for success, 4080 * FMD_SEND_FAILED for error 4081 */ 4082 4083 /*ARGSUSED*/ 4084 int 4085 etm_send(fmd_hdl_t *fmd_hdl, fmd_xprt_t *xp, fmd_event_t *ep, nvlist_t *nvl) 4086 { 4087 uint32_t pack_it; /* whether to pack/enq the event */ 4088 etm_pack_msg_type_t msg_type; 4089 /* tell etm_pack_ds_msg() what to do */ 4090 etm_iosvc_t *iosvc; /* ptr to cur iosvc struct */ 4091 char *class; /* nvlist class name */ 4092 4093 pack_it = 1; 4094 msg_type = FMD_XPRT_OTHER_MSG; 4095 4096 (void) nvlist_lookup_string(nvl, FM_CLASS, &class); 4097 if (class == NULL) { 4098 pack_it = 0; 4099 } else { 4100 if (etm_debug_lvl >= 1) { 4101 fmd_hdl_debug(fmd_hdl, 4102 "info: evp class= %s in etm_send\n", class); 4103 } 4104 4105 if (etm_ldom_type == LDOM_TYPE_CONTROL) { 4106 iosvc = 4107 (etm_iosvc_t *)fmd_xprt_getspecific(fmd_hdl, xp); 4108 4109 /* 4110 * check the flag FORWARDING_FAULTS_TO_CONTROL to 4111 * decide if or not to drop fault subscription 4112 * control msgs 4113 */ 4114 if (strcmp(class, "resource.fm.xprt.subscribe") == 0) { 4115 pack_it = 0; 4116 /* 4117 * if (FORWARDING_FAULTS_TO_CONTROL == 1) { 4118 * (void) nvlist_lookup_string(nvl, 4119 * FM_RSRC_XPRT_SUBCLASS, &subclass); 4120 * if (strcmp(subclass, "list.suspect") 4121 * == 0) { 4122 * pack_it = 1; 4123 * msg_action = FMD_XPRT_OTHER_MSG; 4124 * } 4125 * if (strcmp(subclass, "list.repaired") 4126 * == 0) { 4127 * pack_it = 1; 4128 * msg_action = FMD_XPRT_OTHER_MSG; 4129 * } 4130 * } 4131 */ 4132 } 4133 if (strcmp(class, "resource.fm.xprt.run") == 0) { 4134 pack_it = 1; 4135 msg_type = FMD_XPRT_RUN_MSG; 4136 } 4137 } else { /* has to be the root domain ldom */ 4138 iosvc = &io_svc; 4139 /* 4140 * drop all ereport and fault subscriptions 4141 * are we dropping too much here, more than just ereport 4142 * and fault subscriptions? need to check 4143 */ 4144 if (strcmp(class, "resource.fm.xprt.subscribe") == 0) 4145 pack_it = 0; 4146 if (strcmp(class, "resource.fm.xprt.run") == 0) { 4147 pack_it = 1; 4148 msg_type = FMD_XPRT_RUN_MSG; 4149 } 4150 } 4151 } 4152 4153 if (pack_it) { 4154 if (etm_debug_lvl >= 1) { 4155 fmd_hdl_debug(fmd_hdl, 4156 "info: ldom name returned from xprt get specific=" 4157 "%s xprt=%lld\n", iosvc->ldom_name, xp); 4158 } 4159 /* 4160 * pack the etm msg for the DS library and enq in io_svc->Q 4161 * when the hdrp is NULL, the packing func will use the static 4162 * iosvc_hdr 4163 */ 4164 (void) etm_pack_ds_msg(fmd_hdl, iosvc, NULL, 0, nvl, msg_type, 4165 ETM_CKPT_NOOP); 4166 } 4167 4168 return (FMD_SEND_SUCCESS); 4169 4170 } /* etm_send() */ 4171 4172 4173 4174 /* 4175 * _fmd_fini - stop the server daemon and teardown the transport 4176 */ 4177 4178 void 4179 _fmd_fini(fmd_hdl_t *hdl) 4180 { 4181 ssize_t n; /* gen use */ 4182 etm_iosvc_t *iosvc; /* ptr to insvc struct */ 4183 etm_iosvc_q_ele_t msg_ele; /* iosvc msg ele */ 4184 uint32_t i; /* for loop var */ 4185 4186 fmd_hdl_debug(hdl, "info: module finalizing\n"); 4187 4188 /* kill the connection server and responder ; wait for them to die */ 4189 4190 etm_is_dying = 1; 4191 4192 if (etm_svr_tid != NULL) { 4193 fmd_thr_signal(hdl, etm_svr_tid); 4194 fmd_thr_destroy(hdl, etm_svr_tid); 4195 etm_svr_tid = NULL; 4196 } /* if server thread was successfully created */ 4197 4198 if (etm_resp_tid != NULL) { 4199 fmd_thr_signal(hdl, etm_resp_tid); 4200 fmd_thr_destroy(hdl, etm_resp_tid); 4201 etm_resp_tid = NULL; 4202 } /* if responder thread was successfully created */ 4203 4204 if (etm_async_e_tid != NULL) { 4205 fmd_thr_signal(hdl, etm_async_e_tid); 4206 fmd_thr_destroy(hdl, etm_async_e_tid); 4207 etm_async_e_tid = NULL; 4208 } /* if async event handler thread was successfully created */ 4209 4210 4211 if ((etm_ldom_type == LDOM_TYPE_LEGACY) || 4212 (etm_ldom_type == LDOM_TYPE_CONTROL)) { 4213 4214 /* teardown the transport and cleanup syslogging */ 4215 if ((n = etm_xport_fini(hdl)) != 0) { 4216 fmd_hdl_error(hdl, "warning: xport fini errno %d\n", 4217 (-n)); 4218 } 4219 if (etm_fmd_xprt != NULL) { 4220 fmd_xprt_close(hdl, etm_fmd_xprt); 4221 } 4222 4223 if (syslog_logfd != -1) { 4224 (void) close(syslog_logfd); 4225 } 4226 if (syslog_msgfd != -1) { 4227 (void) close(syslog_msgfd); 4228 } 4229 } 4230 4231 if (etm_ldom_type == LDOM_TYPE_CONTROL) { 4232 if (ldom_unregister_event(etm_lhp)) 4233 fmd_hdl_debug(hdl, "ldom_unregister_event() failed\n"); 4234 4235 /* 4236 * On control domain side, there may be multiple iosvc struct 4237 * in use, one for each bound/active domain. Each struct 4238 * manages a queue of fma events destined to the root domain. 4239 * Need to go thru every iosvc struct to clean up its resources. 4240 */ 4241 for (i = 0; i < NUM_OF_ROOT_DOMAINS; i++) { 4242 if (iosvc_list[i].ldom_name[0] != '\0') { 4243 /* 4244 * found an iosvc struct for a root domain 4245 */ 4246 iosvc = &iosvc_list[i]; 4247 (void) pthread_mutex_lock(&iosvc_list_lock); 4248 etm_iosvc_cleanup(hdl, iosvc, B_TRUE, B_FALSE); 4249 (void) pthread_mutex_unlock(&iosvc_list_lock); 4250 4251 } else { 4252 /* 4253 * reach the end of existing iosvc structures 4254 */ 4255 continue; 4256 } 4257 } /* for i<NUM_OF_ROOT_DOMAINS */ 4258 etm_ckpt_fini(hdl); 4259 etm_filter_fini(hdl); 4260 4261 ldom_fini(etm_lhp); 4262 4263 } else if (etm_ldom_type == LDOM_TYPE_ROOT) { 4264 /* 4265 * On root domain side, there is only one iosvc struct in use. 4266 */ 4267 iosvc = &io_svc; 4268 if (iosvc->send_tid != NULL) { 4269 fmd_thr_signal(hdl, iosvc->send_tid); 4270 fmd_thr_destroy(hdl, iosvc->send_tid); 4271 iosvc->send_tid = NULL; 4272 } /* if io svc send thread was successfully created */ 4273 4274 if (iosvc->recv_tid != NULL) { 4275 fmd_thr_signal(hdl, iosvc->recv_tid); 4276 fmd_thr_destroy(hdl, iosvc->recv_tid); 4277 iosvc->recv_tid = NULL; 4278 } /* if io svc receive thread was successfully created */ 4279 4280 (void) pthread_mutex_lock(&iosvc->msg_q_lock); 4281 while (iosvc->msg_q_cur_len > 0) { 4282 (void) etm_iosvc_msg_deq(hdl, iosvc, &msg_ele); 4283 fmd_hdl_free(hdl, msg_ele.msg, msg_ele.msg_size); 4284 } 4285 (void) pthread_mutex_unlock(&iosvc->msg_q_lock); 4286 4287 if (iosvc->fmd_xprt != NULL) 4288 fmd_xprt_close(hdl, iosvc->fmd_xprt); 4289 ldom_fini(etm_lhp); 4290 } 4291 if (etm_ds_fini) { 4292 (*etm_ds_fini)(); 4293 (void) dlclose(etm_dl_hdl); 4294 } 4295 4296 fmd_hdl_debug(hdl, "info: module finalized ok\n"); 4297 4298 } /* _fmd_fini() */ 4299