1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * etm.c FMA Event Transport Module implementation, a plugin of FMD 29 * for sun4v/Ontario 30 * 31 * plugin for sending/receiving FMA events to/from service processor 32 */ 33 34 /* 35 * --------------------------------- includes -------------------------------- 36 */ 37 38 #include <sys/fm/protocol.h> 39 #include <sys/fm/util.h> 40 #include <sys/fm/ldom.h> 41 #include <sys/strlog.h> 42 #include <sys/syslog.h> 43 #include <sys/libds.h> 44 #include <netinet/in.h> 45 #include <fm/fmd_api.h> 46 47 #include "etm_xport_api.h" 48 #include "etm_etm_proto.h" 49 #include "etm_impl.h" 50 #include "etm_iosvc.h" 51 #include "etm_filter.h" 52 #include "etm_ckpt.h" 53 54 #include <pthread.h> 55 #include <signal.h> 56 #include <stropts.h> 57 #include <locale.h> 58 #include <strings.h> 59 #include <stdlib.h> 60 #include <unistd.h> 61 #include <limits.h> 62 #include <values.h> 63 #include <alloca.h> 64 #include <errno.h> 65 #include <dlfcn.h> 66 #include <link.h> 67 #include <fcntl.h> 68 #include <time.h> 69 70 /* 71 * ----------------------------- forward decls ------------------------------- 72 */ 73 74 static void 75 etm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class); 76 77 static int 78 etm_send(fmd_hdl_t *hdl, fmd_xprt_t *xp, fmd_event_t *event, nvlist_t *nvl); 79 80 static void 81 etm_send_to_remote_root(void *arg); 82 83 static void 84 etm_recv_from_remote_root(void *arg); 85 86 static void 87 etm_ckpt_remove(fmd_hdl_t *hdl, etm_iosvc_q_ele_t *ele); 88 89 /* 90 * ------------------------- data structs for FMD ---------------------------- 91 */ 92 93 static const fmd_hdl_ops_t fmd_ops = { 94 etm_recv, /* fmdo_recv */ 95 NULL, /* fmdo_timeout */ 96 NULL, /* fmdo_close */ 97 NULL, /* fmdo_stats */ 98 NULL, /* fmdo_gc */ 99 etm_send, /* fmdo_send */ 100 }; 101 102 static const fmd_prop_t fmd_props[] = { 103 { ETM_PROP_NM_XPORT_ADDRS, FMD_TYPE_STRING, "" }, 104 { ETM_PROP_NM_DEBUG_LVL, FMD_TYPE_INT32, "0" }, 105 { ETM_PROP_NM_DEBUG_MAX_EV_CNT, FMD_TYPE_INT32, "-1" }, 106 { ETM_PROP_NM_CONSOLE, FMD_TYPE_BOOL, "false" }, 107 { ETM_PROP_NM_SYSLOGD, FMD_TYPE_BOOL, "true" }, 108 { ETM_PROP_NM_FACILITY, FMD_TYPE_STRING, "LOG_DAEMON" }, 109 { ETM_PROP_NM_MAX_RESP_Q_LEN, FMD_TYPE_UINT32, "32" }, 110 { ETM_PROP_NM_BAD_ACC_TO_SEC, FMD_TYPE_UINT32, "1" }, 111 { ETM_PROP_NM_FMA_RESP_WAIT_TIME, FMD_TYPE_INT32, "240" }, 112 { NULL, 0, NULL } 113 }; 114 115 116 static const fmd_hdl_info_t fmd_info = { 117 "FMA Event Transport Module", "1.2", &fmd_ops, fmd_props 118 }; 119 120 /* 121 * ----------------------- private consts and defns -------------------------- 122 */ 123 124 /* misc buffer for variable sized protocol header fields */ 125 126 #define ETM_MISC_BUF_SZ (4 * 1024) 127 128 static uint32_t 129 etm_ldom_type = LDOM_TYPE_LEGACY; 130 131 /* try limit for IO operations w/ capped exp backoff sleep on retry */ 132 133 /* 134 * Design_Note: ETM will potentially retry forever IO operations that the 135 * transport fails with EAGAIN (aka EWOULDBLOCK) rather than 136 * giving up after some number of seconds. This avoids 137 * dropping FMA events while the service processor is down, 138 * but at the risk of pending fmdo_recv() forever and 139 * overflowing FMD's event queue for ETM. 140 * A future TBD enhancement would be to always recv 141 * and send each ETM msg in a single read/write() to reduce 142 * the risk of failure between ETM msg hdr and body, 143 * assuming the MTU_SZ is large enough. 144 */ 145 146 #define ETM_TRY_MAX_CNT (MAXINT - 1) 147 #define ETM_TRY_BACKOFF_RATE (4) 148 #define ETM_TRY_BACKOFF_CAP (60) 149 150 /* amount to increment protocol transaction id on each new send */ 151 152 #define ETM_XID_INC (2) 153 154 typedef struct etm_resp_q_ele { 155 156 etm_xport_conn_t rqe_conn; /* open connection to send on */ 157 etm_proto_v1_pp_t *rqe_hdrp; /* ptr to ETM msg hdr */ 158 size_t rqe_hdr_sz; /* sizeof ETM msg hdr */ 159 int32_t rqe_resp_code; /* response code to send */ 160 161 struct etm_resp_q_ele *rqe_nextp; /* PRIVATE - next ele ptr */ 162 163 } etm_resp_q_ele_t; /* responder queue element */ 164 165 /* 166 * ---------------------------- global data ---------------------------------- 167 */ 168 169 static fmd_hdl_t 170 *init_hdl = NULL; /* used in mem allocator and several other places */ 171 172 static int 173 etm_debug_lvl = 0; /* debug level: 0 is off, 1 is on, 2 is more, etc */ 174 175 static int 176 etm_debug_max_ev_cnt = -1; /* max allowed event count for debugging */ 177 178 static fmd_xprt_t 179 *etm_fmd_xprt = NULL; /* FMD transport layer handle */ 180 181 static pthread_t 182 etm_svr_tid = NULL; /* thread id of connection acceptance server */ 183 184 static pthread_t 185 etm_resp_tid = NULL; /* thread id of msg responder */ 186 187 static etm_resp_q_ele_t 188 *etm_resp_q_head = NULL; /* ptr to cur head of responder queue */ 189 190 static etm_resp_q_ele_t 191 *etm_resp_q_tail = NULL; /* ptr to cur tail of responder queue */ 192 193 static uint32_t 194 etm_resp_q_cur_len = 0; /* cur length (ele cnt) of responder queue */ 195 196 static uint32_t 197 etm_resp_q_max_len = 0; /* max length (ele cnt) of responder queue */ 198 199 static uint32_t 200 etm_bad_acc_to_sec = 0; /* sleep timeout (in sec) after bad conn accept */ 201 202 static pthread_mutex_t 203 etm_resp_q_lock = PTHREAD_MUTEX_INITIALIZER; /* protects responder queue */ 204 205 static pthread_cond_t 206 etm_resp_q_cv = PTHREAD_COND_INITIALIZER; /* nudges msg responder */ 207 208 static volatile int 209 etm_is_dying = 0; /* bool for dying (killing self) */ 210 211 static uint32_t 212 etm_xid_cur = 0; /* current transaction id for sends */ 213 214 static uint32_t 215 etm_xid_ping = 0; /* xid of last CONTROL msg sent requesting ping */ 216 217 static uint32_t 218 etm_xid_ver_negot = 0; /* xid of last CONTROL msg sent requesting ver negot */ 219 220 static uint32_t 221 etm_xid_posted_logged_ev = 0; 222 /* xid of last FMA_EVENT msg/event posted OK to FMD */ 223 224 static uint32_t 225 etm_xid_posted_sa = 0; /* xid of last ALERT msg/event posted OK to syslog */ 226 227 static uint8_t 228 etm_resp_ver = ETM_PROTO_V1; /* proto ver [negotiated] for msg sends */ 229 230 static uint32_t 231 etm_fma_resp_wait_time = 30; /* time (sec) wait for fma event resp */ 232 233 static pthread_mutex_t 234 etm_write_lock = PTHREAD_MUTEX_INITIALIZER; /* for write operations */ 235 236 static log_ctl_t syslog_ctl; /* log(7D) meta-data for each msg */ 237 static int syslog_facility; /* log(7D) facility (part of priority) */ 238 static int syslog_logfd = -1; /* log(7D) file descriptor */ 239 static int syslog_msgfd = -1; /* sysmsg(7D) file descriptor */ 240 static int syslog_file = 0; /* log to syslog_logfd */ 241 static int syslog_cons = 0; /* log to syslog_msgfd */ 242 243 static const struct facility { 244 const char *fac_name; 245 int fac_value; 246 } syslog_facs[] = { 247 { "LOG_DAEMON", LOG_DAEMON }, 248 { "LOG_LOCAL0", LOG_LOCAL0 }, 249 { "LOG_LOCAL1", LOG_LOCAL1 }, 250 { "LOG_LOCAL2", LOG_LOCAL2 }, 251 { "LOG_LOCAL3", LOG_LOCAL3 }, 252 { "LOG_LOCAL4", LOG_LOCAL4 }, 253 { "LOG_LOCAL5", LOG_LOCAL5 }, 254 { "LOG_LOCAL6", LOG_LOCAL6 }, 255 { "LOG_LOCAL7", LOG_LOCAL7 }, 256 { NULL, 0 } 257 }; 258 259 static struct stats { 260 261 /* ETM msg counters */ 262 263 fmd_stat_t etm_rd_hdr_fmaevent; 264 fmd_stat_t etm_rd_hdr_control; 265 fmd_stat_t etm_rd_hdr_alert; 266 fmd_stat_t etm_rd_hdr_response; 267 fmd_stat_t etm_rd_body_fmaevent; 268 fmd_stat_t etm_rd_body_control; 269 fmd_stat_t etm_rd_body_alert; 270 fmd_stat_t etm_rd_body_response; 271 fmd_stat_t etm_wr_hdr_fmaevent; 272 fmd_stat_t etm_wr_hdr_control; 273 fmd_stat_t etm_wr_hdr_response; 274 fmd_stat_t etm_wr_body_fmaevent; 275 fmd_stat_t etm_wr_body_control; 276 fmd_stat_t etm_wr_body_response; 277 278 fmd_stat_t etm_rd_max_ev_per_msg; 279 fmd_stat_t etm_wr_max_ev_per_msg; 280 281 fmd_stat_t etm_resp_q_cur_len; 282 fmd_stat_t etm_resp_q_max_len; 283 284 /* ETM byte counters */ 285 286 fmd_stat_t etm_wr_fmd_bytes; 287 fmd_stat_t etm_rd_fmd_bytes; 288 fmd_stat_t etm_wr_xport_bytes; 289 fmd_stat_t etm_rd_xport_bytes; 290 291 fmd_stat_t etm_magic_drop_bytes; 292 293 /* ETM [dropped] FMA event counters */ 294 295 fmd_stat_t etm_rd_fmd_fmaevent; 296 fmd_stat_t etm_wr_fmd_fmaevent; 297 298 fmd_stat_t etm_rd_drop_fmaevent; 299 fmd_stat_t etm_wr_drop_fmaevent; 300 301 fmd_stat_t etm_rd_dup_fmaevent; 302 fmd_stat_t etm_wr_dup_fmaevent; 303 304 fmd_stat_t etm_rd_dup_alert; 305 fmd_stat_t etm_wr_dup_alert; 306 307 fmd_stat_t etm_enq_drop_resp_q; 308 fmd_stat_t etm_deq_drop_resp_q; 309 310 /* ETM protocol failures */ 311 312 fmd_stat_t etm_magic_bad; 313 fmd_stat_t etm_ver_bad; 314 fmd_stat_t etm_msgtype_bad; 315 fmd_stat_t etm_subtype_bad; 316 fmd_stat_t etm_xid_bad; 317 fmd_stat_t etm_fmaeventlen_bad; 318 fmd_stat_t etm_respcode_bad; 319 fmd_stat_t etm_timeout_bad; 320 fmd_stat_t etm_evlens_bad; 321 322 /* IO operation failures */ 323 324 fmd_stat_t etm_xport_wr_fail; 325 fmd_stat_t etm_xport_rd_fail; 326 fmd_stat_t etm_xport_pk_fail; 327 328 /* IO operation retries */ 329 330 fmd_stat_t etm_xport_wr_retry; 331 fmd_stat_t etm_xport_rd_retry; 332 fmd_stat_t etm_xport_pk_retry; 333 334 /* system and library failures */ 335 336 fmd_stat_t etm_os_nvlist_pack_fail; 337 fmd_stat_t etm_os_nvlist_unpack_fail; 338 fmd_stat_t etm_os_nvlist_size_fail; 339 fmd_stat_t etm_os_pthread_create_fail; 340 341 /* xport API failures */ 342 343 fmd_stat_t etm_xport_get_ev_addrv_fail; 344 fmd_stat_t etm_xport_open_fail; 345 fmd_stat_t etm_xport_close_fail; 346 fmd_stat_t etm_xport_accept_fail; 347 fmd_stat_t etm_xport_open_retry; 348 349 /* FMD entry point bad arguments */ 350 351 fmd_stat_t etm_fmd_init_badargs; 352 fmd_stat_t etm_fmd_fini_badargs; 353 354 /* Alert logging errors */ 355 356 fmd_stat_t etm_log_err; 357 fmd_stat_t etm_msg_err; 358 359 /* miscellaneous stats */ 360 361 fmd_stat_t etm_reset_xport; 362 363 } etm_stats = { 364 365 /* ETM msg counters */ 366 367 { "etm_rd_hdr_fmaevent", FMD_TYPE_UINT64, 368 "ETM fmaevent msg headers rcvd from xport" }, 369 { "etm_rd_hdr_control", FMD_TYPE_UINT64, 370 "ETM control msg headers rcvd from xport" }, 371 { "etm_rd_hdr_alert", FMD_TYPE_UINT64, 372 "ETM alert msg headers rcvd from xport" }, 373 { "etm_rd_hdr_response", FMD_TYPE_UINT64, 374 "ETM response msg headers rcvd from xport" }, 375 { "etm_rd_body_fmaevent", FMD_TYPE_UINT64, 376 "ETM fmaevent msg bodies rcvd from xport" }, 377 { "etm_rd_body_control", FMD_TYPE_UINT64, 378 "ETM control msg bodies rcvd from xport" }, 379 { "etm_rd_body_alert", FMD_TYPE_UINT64, 380 "ETM alert msg bodies rcvd from xport" }, 381 { "etm_rd_body_response", FMD_TYPE_UINT64, 382 "ETM response msg bodies rcvd from xport" }, 383 { "etm_wr_hdr_fmaevent", FMD_TYPE_UINT64, 384 "ETM fmaevent msg headers sent to xport" }, 385 { "etm_wr_hdr_control", FMD_TYPE_UINT64, 386 "ETM control msg headers sent to xport" }, 387 { "etm_wr_hdr_response", FMD_TYPE_UINT64, 388 "ETM response msg headers sent to xport" }, 389 { "etm_wr_body_fmaevent", FMD_TYPE_UINT64, 390 "ETM fmaevent msg bodies sent to xport" }, 391 { "etm_wr_body_control", FMD_TYPE_UINT64, 392 "ETM control msg bodies sent to xport" }, 393 { "etm_wr_body_response", FMD_TYPE_UINT64, 394 "ETM response msg bodies sent to xport" }, 395 396 { "etm_rd_max_ev_per_msg", FMD_TYPE_UINT64, 397 "max FMA events per ETM msg from xport" }, 398 { "etm_wr_max_ev_per_msg", FMD_TYPE_UINT64, 399 "max FMA events per ETM msg to xport" }, 400 401 { "etm_resp_q_cur_len", FMD_TYPE_UINT64, 402 "cur enqueued response msgs to xport" }, 403 { "etm_resp_q_max_len", FMD_TYPE_UINT64, 404 "max enqueable response msgs to xport" }, 405 406 /* ETM byte counters */ 407 408 { "etm_wr_fmd_bytes", FMD_TYPE_UINT64, 409 "bytes of FMA events sent to FMD" }, 410 { "etm_rd_fmd_bytes", FMD_TYPE_UINT64, 411 "bytes of FMA events rcvd from FMD" }, 412 { "etm_wr_xport_bytes", FMD_TYPE_UINT64, 413 "bytes of FMA events sent to xport" }, 414 { "etm_rd_xport_bytes", FMD_TYPE_UINT64, 415 "bytes of FMA events rcvd from xport" }, 416 417 { "etm_magic_drop_bytes", FMD_TYPE_UINT64, 418 "bytes dropped from xport pre magic num" }, 419 420 /* ETM [dropped] FMA event counters */ 421 422 { "etm_rd_fmd_fmaevent", FMD_TYPE_UINT64, 423 "FMA events rcvd from FMD" }, 424 { "etm_wr_fmd_fmaevent", FMD_TYPE_UINT64, 425 "FMA events sent to FMD" }, 426 427 { "etm_rd_drop_fmaevent", FMD_TYPE_UINT64, 428 "dropped FMA events from xport" }, 429 { "etm_wr_drop_fmaevent", FMD_TYPE_UINT64, 430 "dropped FMA events to xport" }, 431 432 { "etm_rd_dup_fmaevent", FMD_TYPE_UINT64, 433 "duplicate FMA events rcvd from xport" }, 434 { "etm_wr_dup_fmaevent", FMD_TYPE_UINT64, 435 "duplicate FMA events sent to xport" }, 436 437 { "etm_rd_dup_alert", FMD_TYPE_UINT64, 438 "duplicate ALERTs rcvd from xport" }, 439 { "etm_wr_dup_alert", FMD_TYPE_UINT64, 440 "duplicate ALERTs sent to xport" }, 441 442 { "etm_enq_drop_resp_q", FMD_TYPE_UINT64, 443 "dropped response msgs on enq" }, 444 { "etm_deq_drop_resp_q", FMD_TYPE_UINT64, 445 "dropped response msgs on deq" }, 446 447 /* ETM protocol failures */ 448 449 { "etm_magic_bad", FMD_TYPE_UINT64, 450 "ETM msgs w/ invalid magic num" }, 451 { "etm_ver_bad", FMD_TYPE_UINT64, 452 "ETM msgs w/ invalid protocol version" }, 453 { "etm_msgtype_bad", FMD_TYPE_UINT64, 454 "ETM msgs w/ invalid message type" }, 455 { "etm_subtype_bad", FMD_TYPE_UINT64, 456 "ETM msgs w/ invalid sub type" }, 457 { "etm_xid_bad", FMD_TYPE_UINT64, 458 "ETM msgs w/ unmatched xid" }, 459 { "etm_fmaeventlen_bad", FMD_TYPE_UINT64, 460 "ETM msgs w/ invalid FMA event length" }, 461 { "etm_respcode_bad", FMD_TYPE_UINT64, 462 "ETM msgs w/ invalid response code" }, 463 { "etm_timeout_bad", FMD_TYPE_UINT64, 464 "ETM msgs w/ invalid timeout value" }, 465 { "etm_evlens_bad", FMD_TYPE_UINT64, 466 "ETM msgs w/ too many event lengths" }, 467 468 /* IO operation failures */ 469 470 { "etm_xport_wr_fail", FMD_TYPE_UINT64, 471 "xport write failures" }, 472 { "etm_xport_rd_fail", FMD_TYPE_UINT64, 473 "xport read failures" }, 474 { "etm_xport_pk_fail", FMD_TYPE_UINT64, 475 "xport peek failures" }, 476 477 /* IO operation retries */ 478 479 { "etm_xport_wr_retry", FMD_TYPE_UINT64, 480 "xport write retries" }, 481 { "etm_xport_rd_retry", FMD_TYPE_UINT64, 482 "xport read retries" }, 483 { "etm_xport_pk_retry", FMD_TYPE_UINT64, 484 "xport peek retries" }, 485 486 /* system and library failures */ 487 488 { "etm_os_nvlist_pack_fail", FMD_TYPE_UINT64, 489 "nvlist_pack failures" }, 490 { "etm_os_nvlist_unpack_fail", FMD_TYPE_UINT64, 491 "nvlist_unpack failures" }, 492 { "etm_os_nvlist_size_fail", FMD_TYPE_UINT64, 493 "nvlist_size failures" }, 494 { "etm_os_pthread_create_fail", FMD_TYPE_UINT64, 495 "pthread_create failures" }, 496 497 /* transport API failures */ 498 499 { "etm_xport_get_ev_addrv_fail", FMD_TYPE_UINT64, 500 "xport get event addrv API failures" }, 501 { "etm_xport_open_fail", FMD_TYPE_UINT64, 502 "xport open API failures" }, 503 { "etm_xport_close_fail", FMD_TYPE_UINT64, 504 "xport close API failures" }, 505 { "etm_xport_accept_fail", FMD_TYPE_UINT64, 506 "xport accept API failures" }, 507 { "etm_xport_open_retry", FMD_TYPE_UINT64, 508 "xport open API retries" }, 509 510 /* FMD entry point bad arguments */ 511 512 { "etm_fmd_init_badargs", FMD_TYPE_UINT64, 513 "bad arguments from fmd_init entry point" }, 514 { "etm_fmd_fini_badargs", FMD_TYPE_UINT64, 515 "bad arguments from fmd_fini entry point" }, 516 517 /* Alert logging errors */ 518 519 { "etm_log_err", FMD_TYPE_UINT64, 520 "failed to log message to log(7D)" }, 521 { "etm_msg_err", FMD_TYPE_UINT64, 522 "failed to log message to sysmsg(7D)" }, 523 524 /* miscellaneous stats */ 525 526 { "etm_reset_xport", FMD_TYPE_UINT64, 527 "xport resets after xport API failure" } 528 }; 529 530 531 /* 532 * -------------------- global data for Root ldom------------------------- 533 */ 534 535 ldom_hdl_t 536 *etm_lhp = NULL; /* ldom pointer */ 537 538 static void *etm_dl_hdl = (void *)NULL; 539 static const char *etm_dl_path = "libds.so.1"; 540 static int etm_dl_mode = (RTLD_NOW | RTLD_LOCAL); 541 542 static int(*etm_ds_svc_reg)(ds_capability_t *cap, ds_ops_t *ops) = 543 (int (*)(ds_capability_t *cap, ds_ops_t *ops))NULL; 544 static int(*etm_ds_clnt_reg)(ds_capability_t *cap, ds_ops_t *ops) = 545 (int (*)(ds_capability_t *cap, ds_ops_t *ops))NULL; 546 static int(*etm_ds_send_msg)(ds_hdl_t hdl, void *buf, size_t buflen) = 547 (int (*)(ds_hdl_t hdl, void *buf, size_t buflen))NULL; 548 static int(*etm_ds_recv_msg)(ds_hdl_t hdl, void *buf, size_t buflen, 549 size_t *msglen) = 550 (int (*)(ds_hdl_t hdl, void *buf, size_t buflen, size_t *msglen))NULL; 551 static int (*etm_ds_fini)(void) = (int (*)(void))NULL; 552 553 static pthread_mutex_t 554 iosvc_list_lock = PTHREAD_MUTEX_INITIALIZER; 555 556 static pthread_t 557 etm_async_e_tid = NULL; /* thread id of io svc async event handler */ 558 559 static etm_proto_v1_ev_hdr_t iosvc_hdr = { 560 ETM_PROTO_MAGIC_NUM, /* magic number */ 561 ETM_PROTO_V1, /* default to V1, not checked */ 562 ETM_MSG_TYPE_FMA_EVENT, /* Root Domain inteoduces only FMA events */ 563 0, /* sub-type */ 564 0, /* pad */ 565 0, /* add the xid at the Q send time */ 566 ETM_PROTO_V1_TIMEOUT_NONE, 567 0 /* ev_lens, 0-termed, after 1 FMA event */ 568 }; 569 570 /* 571 * static iosvc_list 572 */ 573 static etm_iosvc_t iosvc_list[NUM_OF_ROOT_DOMAINS] = { 574 {"", 0}, {"", 0}, {"", 0}, {"", 0}, {"", 0}, {"", 0}, 575 {"", 0}, {"", 0} 576 }; 577 578 static etm_iosvc_t io_svc = { 579 "\0", /* ldom_name */ 580 PTHREAD_COND_INITIALIZER, /* nudges */ 581 PTHREAD_MUTEX_INITIALIZER, /* protects the iosvc msg Q */ 582 NULL, /* iosvc msg Q head */ 583 NULL, /* iosvc msg Q tail */ 584 0, /* msg Q current length */ 585 100, /* msg Q max length */ 586 0, /* current transaction id */ 587 0, /* xid of last event posted to FMD */ 588 DS_INVALID_HDL, /* DS handle */ 589 NULL, /* fmd xprt handle */ 590 NULL, /* tid 4 send to remote RootDomain */ 591 NULL, /* tid 4 recv from remote RootDomain */ 592 PTHREAD_COND_INITIALIZER, /* nudges etm_send_to_remote_root */ 593 PTHREAD_MUTEX_INITIALIZER, /* protects msg_ack_cv */ 594 0, /* send/recv threads are not dying */ 595 0, /* flag for start sending msg Q */ 596 0 /* indicate if the ACK has come */ 597 }; 598 etm_iosvc_t *io_svc_p = &io_svc; 599 600 601 static uint32_t 602 flags; /* flags for fmd_xprt_open */ 603 604 static etm_async_event_ele_t 605 async_event_q[ASYNC_EVENT_Q_SIZE]; /* holds the async events */ 606 607 static uint32_t 608 etm_async_q_head = 0; /* ptr to cur head of async event queue */ 609 610 static uint32_t 611 etm_async_q_tail = 0; /* ptr to cur tail of async event queue */ 612 613 static uint32_t 614 etm_async_q_cur_len = 0; /* cur length (ele cnt) of async event queue */ 615 616 static uint32_t 617 etm_async_q_max_len = ASYNC_EVENT_Q_SIZE; 618 /* max length (ele cnt) of async event queue */ 619 620 static pthread_cond_t 621 etm_async_event_q_cv = PTHREAD_COND_INITIALIZER; 622 /* nudges async event handler */ 623 624 static pthread_mutex_t 625 etm_async_event_q_lock = PTHREAD_MUTEX_INITIALIZER; 626 /* protects async event q */ 627 628 static ds_ver_t 629 etm_iosvc_vers[] = { { 1, 0} }; 630 631 #define ETM_NVERS (sizeof (etm_iosvc_vers) / sizeof (ds_ver_t)) 632 633 static ds_capability_t 634 iosvc_caps = { 635 "ETM", /* svc_id */ 636 etm_iosvc_vers, /* vers */ 637 ETM_NVERS /* number of vers */ 638 }; 639 640 static void 641 etm_iosvc_reg_handler(ds_hdl_t hdl, ds_cb_arg_t arg, ds_ver_t *ver, 642 ds_domain_hdl_t did); 643 644 static void 645 etm_iosvc_unreg_handler(ds_hdl_t hdl, ds_cb_arg_t arg); 646 647 static ds_ops_t 648 iosvc_ops = { 649 etm_iosvc_reg_handler, /* ds_reg_cb */ 650 etm_iosvc_unreg_handler, /* ds_unreg_cb */ 651 NULL, /* ds_data_cb */ 652 NULL /* cb_arg */ 653 }; 654 655 656 /* 657 * -------------------------- support functions ------------------------------ 658 */ 659 660 /* 661 * Design_Note: Each failure worth reporting to FMD should be done using 662 * a single call to fmd_hdl_error() as it logs an FMA event 663 * for each call. Also be aware that all the fmd_hdl_*() 664 * format strings currently use platform specific *printf() 665 * routines; so "%p" under Solaris does not prepend "0x" to 666 * the outputted hex digits, while Linux and VxWorks do. 667 */ 668 669 670 /* 671 * etm_show_time - display the current time of day (for debugging) using 672 * the given FMD module handle and annotation string 673 */ 674 675 static void 676 etm_show_time(fmd_hdl_t *hdl, char *note_str) 677 { 678 struct timeval tmv; /* timeval */ 679 680 (void) gettimeofday(&tmv, NULL); 681 fmd_hdl_debug(hdl, "info: %s: cur Unix Epoch time %d.%06d\n", 682 note_str, tmv.tv_sec, tmv.tv_usec); 683 684 } /* etm_show_time() */ 685 686 /* 687 * etm_hexdump - hexdump the given buffer (for debugging) using 688 * the given FMD module handle 689 */ 690 691 static void 692 etm_hexdump(fmd_hdl_t *hdl, void *buf, size_t byte_cnt) 693 { 694 uint8_t *bp; /* byte ptr */ 695 int i, j; /* index */ 696 char cb[80]; /* char buf */ 697 unsigned int n; /* a byte of data for sprintf() */ 698 699 bp = buf; 700 j = 0; 701 702 /* 703 * Design_Note: fmd_hdl_debug() auto adds a newline if missing; 704 * hence cb exists to accumulate a longer string. 705 */ 706 707 for (i = 1; i <= byte_cnt; i++) { 708 n = *bp++; 709 (void) sprintf(&cb[j], "%2.2x ", n); 710 j += 3; 711 /* add a newline every 16 bytes or at the buffer's end */ 712 if (((i % 16) == 0) || (i >= byte_cnt)) { 713 cb[j-1] = '\0'; 714 fmd_hdl_debug(hdl, "%s\n", cb); 715 j = 0; 716 } 717 } /* for each byte in the buffer */ 718 719 } /* etm_hexdump() */ 720 721 /* 722 * etm_sleep - sleep the caller for the given number of seconds, 723 * return 0 or -errno value 724 * 725 * Design_Note: To avoid interfering with FMD's signal mask (SIGALRM) 726 * do not use [Solaris] sleep(3C) and instead use 727 * pthread_cond_wait() or nanosleep(), both of which 728 * are POSIX spec-ed to leave signal masks alone. 729 * This is needed for Solaris and Linux (domain and SP). 730 */ 731 732 static int 733 etm_sleep(unsigned sleep_sec) 734 { 735 struct timespec tms; /* for nanosleep() */ 736 737 tms.tv_sec = sleep_sec; 738 tms.tv_nsec = 0; 739 740 if (nanosleep(&tms, NULL) < 0) { 741 /* errno assumed set by above call */ 742 return (-errno); 743 } 744 return (0); 745 746 } /* etm_sleep() */ 747 748 /* 749 * etm_conn_open - open a connection to the given transport address, 750 * return 0 and the opened connection handle 751 * or -errno value 752 * 753 * caveats: the err_substr is used in failure cases for calling 754 * fmd_hdl_error() 755 */ 756 757 static int 758 etm_conn_open(fmd_hdl_t *hdl, char *err_substr, 759 etm_xport_addr_t addr, etm_xport_conn_t *connp) 760 { 761 etm_xport_conn_t conn; /* connection to return */ 762 int nev; /* -errno value */ 763 764 if ((conn = etm_xport_open(hdl, addr)) == NULL) { 765 nev = (-errno); 766 fmd_hdl_error(hdl, "error: %s: errno %d\n", 767 err_substr, errno); 768 etm_stats.etm_xport_open_fail.fmds_value.ui64++; 769 return (nev); 770 } else { 771 *connp = conn; 772 return (0); 773 } 774 } /* etm_conn_open() */ 775 776 /* 777 * etm_conn_close - close the given connection, 778 * return 0 or -errno value 779 * 780 * caveats: the err_substr is used in failure cases for calling 781 * fmd_hdl_error() 782 */ 783 784 static int 785 etm_conn_close(fmd_hdl_t *hdl, char *err_substr, etm_xport_conn_t conn) 786 { 787 int nev; /* -errno value */ 788 789 if (etm_xport_close(hdl, conn) == NULL) { 790 nev = (-errno); 791 fmd_hdl_error(hdl, "warning: %s: errno %d\n", 792 err_substr, errno); 793 etm_stats.etm_xport_close_fail.fmds_value.ui64++; 794 return (nev); 795 } else { 796 return (0); 797 } 798 } /* etm_conn_close() */ 799 800 /* 801 * etm_io_op - perform an IO operation on the given connection 802 * with the given buffer, 803 * accommodating MTU size and retrying op if needed, 804 * return how many bytes actually done by the op 805 * or -errno value 806 * 807 * caveats: the err_substr is used in failure cases for calling 808 * fmd_hdl_error() 809 */ 810 811 static ssize_t 812 etm_io_op(fmd_hdl_t *hdl, char *err_substr, etm_xport_conn_t conn, 813 void *buf, size_t byte_cnt, int io_op) 814 { 815 ssize_t rv; /* ret val / byte count */ 816 ssize_t n; /* gen use */ 817 uint8_t *datap; /* ptr to data */ 818 size_t mtu_sz; /* MTU size in bytes */ 819 int (*io_func_ptr)(fmd_hdl_t *, etm_xport_conn_t, 820 void *, size_t); 821 size_t io_sz; /* byte count for io_func_ptr */ 822 int try_cnt; /* number of tries done */ 823 int sleep_sec; /* exp backoff sleep period in sec */ 824 int sleep_rv; /* ret val from sleeping */ 825 fmd_stat_t io_retry_stat; /* IO retry stat to update */ 826 fmd_stat_t io_fail_stat; /* IO failure stat to update */ 827 828 if ((conn == NULL) || (buf == NULL)) { 829 return (-EINVAL); 830 } 831 switch (io_op) { 832 case ETM_IO_OP_RD: 833 io_func_ptr = etm_xport_read; 834 io_retry_stat = etm_stats.etm_xport_rd_retry; 835 io_fail_stat = etm_stats.etm_xport_rd_fail; 836 break; 837 case ETM_IO_OP_WR: 838 io_func_ptr = etm_xport_write; 839 io_retry_stat = etm_stats.etm_xport_wr_retry; 840 io_fail_stat = etm_stats.etm_xport_wr_fail; 841 break; 842 default: 843 return (-EINVAL); 844 } 845 if (byte_cnt == 0) { 846 return (byte_cnt); /* nop */ 847 } 848 849 /* obtain [current] MTU size */ 850 851 if ((n = etm_xport_get_opt(hdl, conn, ETM_XPORT_OPT_MTU_SZ)) < 0) { 852 mtu_sz = ETM_XPORT_MTU_SZ_DEF; 853 } else { 854 mtu_sz = n; 855 } 856 857 /* loop until all IO done, try limit exceeded, or real failure */ 858 859 rv = 0; 860 datap = buf; 861 while (rv < byte_cnt) { 862 io_sz = MIN((byte_cnt - rv), mtu_sz); 863 try_cnt = 0; 864 sleep_sec = 0; 865 866 /* when give up, return -errno value even if partly done */ 867 868 while ((n = (*io_func_ptr)(hdl, conn, datap, io_sz)) == 869 (-EAGAIN)) { 870 try_cnt++; 871 if (try_cnt > ETM_TRY_MAX_CNT) { 872 rv = n; 873 goto func_ret; 874 } 875 if (etm_is_dying) { 876 rv = (-EINTR); 877 goto func_ret; 878 } 879 if ((sleep_rv = etm_sleep(sleep_sec)) < 0) { 880 rv = sleep_rv; 881 goto func_ret; 882 } 883 sleep_sec = ((sleep_sec == 0) ? 1 : 884 (sleep_sec * ETM_TRY_BACKOFF_RATE)); 885 sleep_sec = MIN(sleep_sec, ETM_TRY_BACKOFF_CAP); 886 io_retry_stat.fmds_value.ui64++; 887 if (etm_debug_lvl >= 1) { 888 fmd_hdl_debug(hdl, "info: retrying io op %d " 889 "due to EAGAIN\n", io_op); 890 } 891 } /* while trying the io operation */ 892 893 if (etm_is_dying) { 894 rv = (-EINTR); 895 goto func_ret; 896 } 897 if (n < 0) { 898 rv = n; 899 goto func_ret; 900 } 901 /* avoid spinning CPU when given 0 bytes but no error */ 902 if (n == 0) { 903 if ((sleep_rv = etm_sleep(ETM_SLEEP_QUIK)) < 0) { 904 rv = sleep_rv; 905 goto func_ret; 906 } 907 } 908 rv += n; 909 datap += n; 910 } /* while still have more data */ 911 912 func_ret: 913 914 if (rv < 0) { 915 io_fail_stat.fmds_value.ui64++; 916 fmd_hdl_debug(hdl, "error: %s: errno %d\n", 917 err_substr, (int)(-rv)); 918 } 919 if (etm_debug_lvl >= 3) { 920 fmd_hdl_debug(hdl, "info: io op %d ret %d of %d\n", 921 io_op, (int)rv, (int)byte_cnt); 922 } 923 return (rv); 924 925 } /* etm_io_op() */ 926 927 /* 928 * etm_magic_read - read the magic number of an ETM message header 929 * from the given connection into the given buffer, 930 * return 0 or -errno value 931 * 932 * Design_Note: This routine is intended to help protect ETM from protocol 933 * framing errors as might be caused by an SP reset / crash in 934 * the middle of an ETM message send; the connection will be 935 * read from for as many bytes as needed until the magic number 936 * is found using a sliding buffer for comparisons. 937 */ 938 939 static int 940 etm_magic_read(fmd_hdl_t *hdl, etm_xport_conn_t conn, uint32_t *magic_ptr) 941 { 942 int rv; /* ret val */ 943 uint32_t magic_num; /* magic number */ 944 int byte_cnt; /* count of bytes read */ 945 uint8_t buf5[4+1]; /* sliding input buffer */ 946 int i, j; /* indices into buf5 */ 947 ssize_t n; /* gen use */ 948 uint8_t drop_buf[1024]; /* dropped bytes buffer */ 949 950 rv = 0; /* assume success */ 951 magic_num = 0; 952 byte_cnt = 0; 953 j = 0; 954 955 /* magic number bytes are sent in network (big endian) order */ 956 957 while (magic_num != ETM_PROTO_MAGIC_NUM) { 958 if ((n = etm_io_op(hdl, "bad io read on magic", 959 conn, &buf5[j], 1, ETM_IO_OP_RD)) < 0) { 960 rv = n; 961 goto func_ret; 962 } 963 byte_cnt++; 964 j = MIN((j + 1), sizeof (magic_num)); 965 if (byte_cnt < sizeof (magic_num)) { 966 continue; 967 } 968 969 if (byte_cnt > sizeof (magic_num)) { 970 etm_stats.etm_magic_drop_bytes.fmds_value.ui64++; 971 i = MIN(byte_cnt - j - 1, sizeof (drop_buf) - 1); 972 drop_buf[i] = buf5[0]; 973 for (i = 0; i < j; i++) { 974 buf5[i] = buf5[i+1]; 975 } /* for sliding the buffer contents */ 976 } 977 (void) memcpy(&magic_num, &buf5[0], sizeof (magic_num)); 978 magic_num = ntohl(magic_num); 979 } /* for reading bytes until find magic number */ 980 981 func_ret: 982 983 if (byte_cnt != sizeof (magic_num)) { 984 fmd_hdl_debug(hdl, "warning: bad proto frame " 985 "implies corrupt/lost msg(s)\n"); 986 } 987 if ((byte_cnt > sizeof (magic_num)) && (etm_debug_lvl >= 2)) { 988 i = MIN(byte_cnt - sizeof (magic_num), sizeof (drop_buf)); 989 fmd_hdl_debug(hdl, "info: magic drop hexdump " 990 "first %d of %d bytes:\n", i, 991 byte_cnt - sizeof (magic_num)); 992 etm_hexdump(hdl, drop_buf, i); 993 } 994 995 if (rv == 0) { 996 *magic_ptr = magic_num; 997 } 998 return (rv); 999 1000 } /* etm_magic_read() */ 1001 1002 /* 1003 * etm_hdr_read - allocate, read, and validate a [variable sized] 1004 * ETM message header from the given connection, 1005 * return the allocated ETM message header 1006 * (which is guaranteed to be large enough to reuse as a 1007 * RESPONSE msg hdr) and its size 1008 * or NULL and set errno on failure 1009 */ 1010 1011 static void * 1012 etm_hdr_read(fmd_hdl_t *hdl, etm_xport_conn_t conn, size_t *szp) 1013 { 1014 uint8_t *hdrp; /* ptr to header to return */ 1015 size_t hdr_sz; /* sizeof *hdrp */ 1016 etm_proto_v1_pp_t pp; /* protocol preamble */ 1017 etm_proto_v1_ev_hdr_t *ev_hdrp; /* for FMA_EVENT msg */ 1018 etm_proto_v1_ctl_hdr_t *ctl_hdrp; /* for CONTROL msg */ 1019 etm_proto_v1_resp_hdr_t *resp_hdrp; /* for RESPONSE msg */ 1020 etm_proto_v3_sa_hdr_t *sa_hdrp; /* for ALERT msg */ 1021 uint32_t *lenp; /* ptr to FMA event length */ 1022 ssize_t i, n; /* gen use */ 1023 uint8_t misc_buf[ETM_MISC_BUF_SZ]; /* for var sized hdrs */ 1024 int dummy_int; /* dummy var to appease lint */ 1025 1026 hdrp = NULL; hdr_sz = 0; 1027 1028 /* read the magic number which starts the protocol preamble */ 1029 1030 if ((n = etm_magic_read(hdl, conn, &pp.pp_magic_num)) < 0) { 1031 errno = (-n); 1032 etm_stats.etm_magic_bad.fmds_value.ui64++; 1033 return (NULL); 1034 } 1035 1036 /* read the rest of the protocol preamble all at once */ 1037 1038 if ((n = etm_io_op(hdl, "bad io read on preamble", 1039 conn, &pp.pp_proto_ver, sizeof (pp) - sizeof (pp.pp_magic_num), 1040 ETM_IO_OP_RD)) < 0) { 1041 errno = (-n); 1042 return (NULL); 1043 } 1044 1045 /* 1046 * Design_Note: The magic number was already network decoded; but 1047 * some other preamble fields also need to be decoded, 1048 * specifically pp_xid and pp_timeout. The rest of the 1049 * preamble fields are byte sized and hence need no 1050 * decoding. 1051 */ 1052 1053 pp.pp_xid = ntohl(pp.pp_xid); 1054 pp.pp_timeout = ntohl(pp.pp_timeout); 1055 1056 /* sanity check the header as best we can */ 1057 1058 if ((pp.pp_proto_ver < ETM_PROTO_V1) || 1059 (pp.pp_proto_ver > ETM_PROTO_V3)) { 1060 fmd_hdl_error(hdl, "error: bad proto ver %d\n", 1061 (int)pp.pp_proto_ver); 1062 errno = EPROTO; 1063 etm_stats.etm_ver_bad.fmds_value.ui64++; 1064 return (NULL); 1065 } 1066 1067 dummy_int = pp.pp_msg_type; 1068 if ((dummy_int <= ETM_MSG_TYPE_TOO_LOW) || 1069 (dummy_int >= ETM_MSG_TYPE_TOO_BIG)) { 1070 fmd_hdl_error(hdl, "error: bad msg type %d", dummy_int); 1071 errno = EBADMSG; 1072 etm_stats.etm_msgtype_bad.fmds_value.ui64++; 1073 return (NULL); 1074 } 1075 1076 /* handle [var sized] hdrs for FMA_EVENT, CONTROL, RESPONSE msgs */ 1077 1078 if (pp.pp_msg_type == ETM_MSG_TYPE_FMA_EVENT) { 1079 1080 ev_hdrp = (void*)&misc_buf[0]; 1081 hdr_sz = sizeof (*ev_hdrp); 1082 (void) memcpy(&ev_hdrp->ev_pp, &pp, sizeof (pp)); 1083 1084 /* sanity check the header's timeout */ 1085 1086 if ((ev_hdrp->ev_pp.pp_proto_ver == ETM_PROTO_V1) && 1087 (ev_hdrp->ev_pp.pp_timeout != ETM_PROTO_V1_TIMEOUT_NONE)) { 1088 errno = ETIME; 1089 etm_stats.etm_timeout_bad.fmds_value.ui64++; 1090 return (NULL); 1091 } 1092 1093 /* get all FMA event lengths from the header */ 1094 1095 lenp = (uint32_t *)&ev_hdrp->ev_lens[0]; lenp--; 1096 i = -1; /* cnt of length entries preceding 0 */ 1097 do { 1098 i++; lenp++; 1099 if ((sizeof (*ev_hdrp) + (i * sizeof (*lenp))) >= 1100 ETM_MISC_BUF_SZ) { 1101 errno = E2BIG; /* ridiculous size */ 1102 etm_stats.etm_evlens_bad.fmds_value.ui64++; 1103 return (NULL); 1104 } 1105 if ((n = etm_io_op(hdl, "bad io read on event len", 1106 conn, lenp, sizeof (*lenp), ETM_IO_OP_RD)) < 0) { 1107 errno = (-n); 1108 return (NULL); 1109 } 1110 *lenp = ntohl(*lenp); 1111 1112 } while (*lenp != 0); 1113 i += 0; /* first len already counted by sizeof(ev_hdr) */ 1114 hdr_sz += (i * sizeof (*lenp)); 1115 1116 etm_stats.etm_rd_hdr_fmaevent.fmds_value.ui64++; 1117 1118 } else if (pp.pp_msg_type == ETM_MSG_TYPE_CONTROL) { 1119 1120 ctl_hdrp = (void*)&misc_buf[0]; 1121 hdr_sz = sizeof (*ctl_hdrp); 1122 (void) memcpy(&ctl_hdrp->ctl_pp, &pp, sizeof (pp)); 1123 1124 /* sanity check the header's sub type (control selector) */ 1125 1126 if ((ctl_hdrp->ctl_pp.pp_sub_type <= ETM_CTL_SEL_TOO_LOW) || 1127 (ctl_hdrp->ctl_pp.pp_sub_type >= ETM_CTL_SEL_TOO_BIG)) { 1128 fmd_hdl_error(hdl, "error: bad ctl sub type %d\n", 1129 (int)ctl_hdrp->ctl_pp.pp_sub_type); 1130 errno = EBADMSG; 1131 etm_stats.etm_subtype_bad.fmds_value.ui64++; 1132 return (NULL); 1133 } 1134 1135 /* get the control length */ 1136 1137 if ((n = etm_io_op(hdl, "bad io read on ctl len", 1138 conn, &ctl_hdrp->ctl_len, sizeof (ctl_hdrp->ctl_len), 1139 ETM_IO_OP_RD)) < 0) { 1140 errno = (-n); 1141 return (NULL); 1142 } 1143 1144 ctl_hdrp->ctl_len = ntohl(ctl_hdrp->ctl_len); 1145 1146 etm_stats.etm_rd_hdr_control.fmds_value.ui64++; 1147 1148 } else if (pp.pp_msg_type == ETM_MSG_TYPE_RESPONSE) { 1149 1150 resp_hdrp = (void*)&misc_buf[0]; 1151 hdr_sz = sizeof (*resp_hdrp); 1152 (void) memcpy(&resp_hdrp->resp_pp, &pp, sizeof (pp)); 1153 1154 /* sanity check the header's timeout */ 1155 1156 if (resp_hdrp->resp_pp.pp_timeout != 1157 ETM_PROTO_V1_TIMEOUT_NONE) { 1158 errno = ETIME; 1159 etm_stats.etm_timeout_bad.fmds_value.ui64++; 1160 return (NULL); 1161 } 1162 1163 /* get the response code and length */ 1164 1165 if ((n = etm_io_op(hdl, "bad io read on resp code+len", 1166 conn, &resp_hdrp->resp_code, 1167 sizeof (resp_hdrp->resp_code) 1168 + sizeof (resp_hdrp->resp_len), 1169 ETM_IO_OP_RD)) < 0) { 1170 errno = (-n); 1171 return (NULL); 1172 } 1173 1174 resp_hdrp->resp_code = ntohl(resp_hdrp->resp_code); 1175 resp_hdrp->resp_len = ntohl(resp_hdrp->resp_len); 1176 1177 etm_stats.etm_rd_hdr_response.fmds_value.ui64++; 1178 1179 } else if (pp.pp_msg_type == ETM_MSG_TYPE_ALERT) { 1180 1181 sa_hdrp = (void*)&misc_buf[0]; 1182 hdr_sz = sizeof (*sa_hdrp); 1183 (void) memcpy(&sa_hdrp->sa_pp, &pp, sizeof (pp)); 1184 1185 /* sanity check the header's protocol version */ 1186 1187 if (sa_hdrp->sa_pp.pp_proto_ver != ETM_PROTO_V3) { 1188 errno = EPROTO; 1189 etm_stats.etm_ver_bad.fmds_value.ui64++; 1190 return (NULL); 1191 } 1192 1193 /* get the priority and length */ 1194 1195 if ((n = etm_io_op(hdl, "bad io read on sa priority+len", 1196 conn, &sa_hdrp->sa_priority, 1197 sizeof (sa_hdrp->sa_priority) 1198 + sizeof (sa_hdrp->sa_len), 1199 ETM_IO_OP_RD)) < 0) { 1200 errno = (-n); 1201 return (NULL); 1202 } 1203 1204 sa_hdrp->sa_priority = ntohl(sa_hdrp->sa_priority); 1205 sa_hdrp->sa_len = ntohl(sa_hdrp->sa_len); 1206 1207 etm_stats.etm_rd_hdr_alert.fmds_value.ui64++; 1208 1209 } /* whether we have FMA_EVENT, ALERT, CONTROL, or RESPONSE msg */ 1210 1211 /* 1212 * choose a header size that allows hdr reuse for RESPONSE msgs, 1213 * allocate and populate the message header, and 1214 * return alloc size to caller for later free of hdrp 1215 */ 1216 1217 hdr_sz = MAX(hdr_sz, sizeof (*resp_hdrp)); 1218 hdrp = fmd_hdl_zalloc(hdl, hdr_sz, FMD_SLEEP); 1219 (void) memcpy(hdrp, misc_buf, hdr_sz); 1220 1221 if (etm_debug_lvl >= 3) { 1222 fmd_hdl_debug(hdl, "info: msg hdr hexdump %d bytes:\n", hdr_sz); 1223 etm_hexdump(hdl, hdrp, hdr_sz); 1224 } 1225 *szp = hdr_sz; 1226 return (hdrp); 1227 1228 } /* etm_hdr_read() */ 1229 1230 /* 1231 * etm_hdr_write - create and write a [variable sized] ETM message header 1232 * to the given connection appropriate for the given FMA event 1233 * and type of nvlist encoding, 1234 * return the allocated ETM message header and its size 1235 * or NULL and set errno on failure 1236 */ 1237 1238 static void* 1239 etm_hdr_write(fmd_hdl_t *hdl, etm_xport_conn_t conn, nvlist_t *evp, 1240 int encoding, size_t *szp) 1241 { 1242 etm_proto_v1_ev_hdr_t *hdrp; /* for FMA_EVENT msg */ 1243 size_t hdr_sz; /* sizeof *hdrp */ 1244 uint32_t *lenp; /* ptr to FMA event length */ 1245 size_t evsz; /* packed FMA event size */ 1246 ssize_t n; /* gen use */ 1247 1248 /* allocate and populate the message header for 1 FMA event */ 1249 1250 hdr_sz = sizeof (*hdrp) + (1 * sizeof (hdrp->ev_lens[0])); 1251 1252 hdrp = fmd_hdl_zalloc(hdl, hdr_sz, FMD_SLEEP); 1253 1254 /* 1255 * Design_Note: Although the ETM protocol supports it, we do not (yet) 1256 * want responses/ACKs on FMA events that we send. All 1257 * such messages are sent with ETM_PROTO_V1_TIMEOUT_NONE. 1258 */ 1259 1260 hdrp->ev_pp.pp_magic_num = ETM_PROTO_MAGIC_NUM; 1261 hdrp->ev_pp.pp_magic_num = htonl(hdrp->ev_pp.pp_magic_num); 1262 hdrp->ev_pp.pp_proto_ver = ETM_PROTO_V1; 1263 hdrp->ev_pp.pp_msg_type = ETM_MSG_TYPE_FMA_EVENT; 1264 hdrp->ev_pp.pp_sub_type = 0; 1265 hdrp->ev_pp.pp_rsvd_pad = 0; 1266 hdrp->ev_pp.pp_xid = etm_xid_cur; 1267 hdrp->ev_pp.pp_xid = htonl(hdrp->ev_pp.pp_xid); 1268 etm_xid_cur += ETM_XID_INC; 1269 hdrp->ev_pp.pp_timeout = ETM_PROTO_V1_TIMEOUT_NONE; 1270 hdrp->ev_pp.pp_timeout = htonl(hdrp->ev_pp.pp_timeout); 1271 1272 lenp = &hdrp->ev_lens[0]; 1273 1274 if ((n = nvlist_size(evp, &evsz, encoding)) != 0) { 1275 errno = n; 1276 fmd_hdl_free(hdl, hdrp, hdr_sz); 1277 etm_stats.etm_os_nvlist_size_fail.fmds_value.ui64++; 1278 return (NULL); 1279 } 1280 1281 /* indicate 1 FMA event, network encode its length, and 0-terminate */ 1282 1283 etm_stats.etm_wr_max_ev_per_msg.fmds_value.ui64 = 1; 1284 1285 *lenp = evsz; *lenp = htonl(*lenp); lenp++; 1286 *lenp = 0; *lenp = htonl(*lenp); lenp++; 1287 1288 /* 1289 * write the network encoded header to the transport, and 1290 * return alloc size to caller for later free 1291 */ 1292 1293 if ((n = etm_io_op(hdl, "bad io write on event hdr", 1294 conn, hdrp, hdr_sz, ETM_IO_OP_WR)) < 0) { 1295 errno = (-n); 1296 fmd_hdl_free(hdl, hdrp, hdr_sz); 1297 return (NULL); 1298 } 1299 1300 *szp = hdr_sz; 1301 return (hdrp); 1302 1303 } /* etm_hdr_write() */ 1304 1305 /* 1306 * etm_post_to_fmd - post the given FMA event to FMD 1307 * via a FMD transport API call, 1308 * return 0 or -errno value 1309 * 1310 * caveats: the FMA event (evp) is freed by FMD, 1311 * thus callers of this function should 1312 * immediately discard any ptr they have to the 1313 * nvlist without freeing or dereferencing it 1314 */ 1315 1316 static int 1317 etm_post_to_fmd(fmd_hdl_t *hdl, fmd_xprt_t *fmd_xprt, nvlist_t *evp) 1318 { 1319 ssize_t ev_sz; /* sizeof *evp */ 1320 1321 (void) nvlist_size(evp, (size_t *)&ev_sz, NV_ENCODE_XDR); 1322 1323 if (etm_debug_lvl >= 2) { 1324 etm_show_time(hdl, "ante ev post"); 1325 } 1326 fmd_xprt_post(hdl, fmd_xprt, evp, 0); 1327 etm_stats.etm_wr_fmd_fmaevent.fmds_value.ui64++; 1328 etm_stats.etm_wr_fmd_bytes.fmds_value.ui64 += ev_sz; 1329 if (etm_debug_lvl >= 1) { 1330 fmd_hdl_debug(hdl, "info: event %p post ok to FMD\n", evp); 1331 } 1332 if (etm_debug_lvl >= 2) { 1333 etm_show_time(hdl, "post ev post"); 1334 } 1335 return (0); 1336 1337 } /* etm_post_to_fmd() */ 1338 1339 /* 1340 * Ideally we would just use syslog(3C) for outputting our messages. 1341 * Unfortunately, as this module is running within the FMA daemon context, 1342 * that would create the situation where this module's openlog() would 1343 * have the monopoly on syslog(3C) for the daemon and all its modules. 1344 * To avoid that situation, this module uses the same logic as the 1345 * syslog-msgs FM module to directly call into the log(7D) and sysmsg(7D) 1346 * devices for syslog and console. 1347 */ 1348 1349 static int 1350 etm_post_to_syslog(fmd_hdl_t *hdl, uint32_t priority, uint32_t body_sz, 1351 uint8_t *body_buf) 1352 { 1353 char *sysmessage; /* Formatted message */ 1354 size_t formatlen; /* maximum length of sysmessage */ 1355 struct strbuf ctl, dat; /* structs pushed to the logfd */ 1356 uint32_t msgid; /* syslog message ID number */ 1357 1358 if ((syslog_file == 0) && (syslog_cons == 0)) { 1359 return (0); 1360 } 1361 1362 if (etm_debug_lvl >= 2) { 1363 etm_show_time(hdl, "ante syslog post"); 1364 } 1365 1366 formatlen = body_sz + 64; /* +64 for prefix strings added below */ 1367 sysmessage = fmd_hdl_zalloc(hdl, formatlen, FMD_SLEEP); 1368 1369 if (syslog_file) { 1370 STRLOG_MAKE_MSGID(body_buf, msgid); 1371 (void) snprintf(sysmessage, formatlen, 1372 "SC Alert: [ID %u FACILITY_AND_PRIORITY] %s", msgid, 1373 body_buf); 1374 1375 syslog_ctl.pri = syslog_facility | priority; 1376 1377 ctl.buf = (void *)&syslog_ctl; 1378 ctl.len = sizeof (syslog_ctl); 1379 1380 dat.buf = sysmessage; 1381 dat.len = strlen(sysmessage) + 1; 1382 1383 if (putmsg(syslog_logfd, &ctl, &dat, 0) != 0) { 1384 fmd_hdl_debug(hdl, "putmsg failed: %s\n", 1385 strerror(errno)); 1386 etm_stats.etm_log_err.fmds_value.ui64++; 1387 } 1388 } 1389 1390 if (syslog_cons) { 1391 (void) snprintf(sysmessage, formatlen, 1392 "SC Alert: %s\r\n", body_buf); 1393 1394 dat.buf = sysmessage; 1395 dat.len = strlen(sysmessage) + 1; 1396 1397 if (write(syslog_msgfd, dat.buf, dat.len) != dat.len) { 1398 fmd_hdl_debug(hdl, "write failed: %s\n", 1399 strerror(errno)); 1400 etm_stats.etm_msg_err.fmds_value.ui64++; 1401 } 1402 } 1403 1404 fmd_hdl_free(hdl, sysmessage, formatlen); 1405 1406 if (etm_debug_lvl >= 2) { 1407 etm_show_time(hdl, "post syslog post"); 1408 } 1409 1410 return (0); 1411 } 1412 1413 1414 /* 1415 * etm_req_ver_negot - send an ETM control message to the other end requesting 1416 * that the ETM protocol version be negotiated/set 1417 */ 1418 1419 static void 1420 etm_req_ver_negot(fmd_hdl_t *hdl) 1421 { 1422 etm_xport_addr_t *addrv; /* default dst addr(s) */ 1423 etm_xport_conn_t conn; /* connection to other end */ 1424 etm_proto_v1_ctl_hdr_t *ctl_hdrp; /* for CONTROL msg */ 1425 size_t hdr_sz; /* sizeof header */ 1426 uint8_t *body_buf; /* msg body buffer */ 1427 uint32_t body_sz; /* sizeof *body_buf */ 1428 ssize_t i; /* gen use */ 1429 1430 /* populate an ETM control msg to send */ 1431 1432 hdr_sz = sizeof (*ctl_hdrp); 1433 body_sz = (3 + 1); /* version bytes plus null byte */ 1434 1435 ctl_hdrp = fmd_hdl_zalloc(hdl, hdr_sz + body_sz, FMD_SLEEP); 1436 1437 ctl_hdrp->ctl_pp.pp_magic_num = htonl(ETM_PROTO_MAGIC_NUM); 1438 ctl_hdrp->ctl_pp.pp_proto_ver = ETM_PROTO_V1; 1439 ctl_hdrp->ctl_pp.pp_msg_type = ETM_MSG_TYPE_CONTROL; 1440 ctl_hdrp->ctl_pp.pp_sub_type = ETM_CTL_SEL_VER_NEGOT_REQ; 1441 ctl_hdrp->ctl_pp.pp_rsvd_pad = 0; 1442 etm_xid_ver_negot = etm_xid_cur; 1443 etm_xid_cur += ETM_XID_INC; 1444 ctl_hdrp->ctl_pp.pp_xid = htonl(etm_xid_ver_negot); 1445 ctl_hdrp->ctl_pp.pp_timeout = htonl(ETM_PROTO_V1_TIMEOUT_FOREVER); 1446 ctl_hdrp->ctl_len = htonl(body_sz); 1447 1448 body_buf = (void*)&ctl_hdrp->ctl_len; 1449 body_buf += sizeof (ctl_hdrp->ctl_len); 1450 *body_buf++ = ETM_PROTO_V3; 1451 *body_buf++ = ETM_PROTO_V2; 1452 *body_buf++ = ETM_PROTO_V1; 1453 *body_buf++ = '\0'; 1454 1455 /* 1456 * open and close a connection to send the ETM control msg 1457 * to any/all of the default dst addrs 1458 */ 1459 1460 if ((addrv = etm_xport_get_ev_addrv(hdl, NULL)) == NULL) { 1461 fmd_hdl_error(hdl, 1462 "error: bad ctl dst addrs errno %d\n", errno); 1463 etm_stats.etm_xport_get_ev_addrv_fail.fmds_value.ui64++; 1464 goto func_ret; 1465 } 1466 1467 for (i = 0; addrv[i] != NULL; i++) { 1468 1469 if (etm_conn_open(hdl, "bad conn open during ver negot", 1470 addrv[i], &conn) < 0) { 1471 continue; 1472 } 1473 if (etm_io_op(hdl, "bad io write on ctl hdr+body", 1474 conn, ctl_hdrp, hdr_sz + body_sz, ETM_IO_OP_WR) >= 0) { 1475 etm_stats.etm_wr_hdr_control.fmds_value.ui64++; 1476 etm_stats.etm_wr_body_control.fmds_value.ui64++; 1477 } 1478 (void) etm_conn_close(hdl, "bad conn close during ver negot", 1479 conn); 1480 1481 } /* foreach dst addr */ 1482 1483 func_ret: 1484 1485 if (addrv != NULL) { 1486 etm_xport_free_addrv(hdl, addrv); 1487 } 1488 fmd_hdl_free(hdl, ctl_hdrp, hdr_sz + body_sz); 1489 1490 } /* etm_req_ver_negot() */ 1491 1492 1493 1494 /* 1495 * etm_iosvc_msg_enq - add element to tail of ETM iosvc msg queue 1496 * etm_iosvc_msg_deq - del element from head of ETM iosvc msg queue 1497 * need to grab the mutex lock before calling this routine 1498 * return >0 for success, or -errno value 1499 */ 1500 static int 1501 etm_iosvc_msg_enq(fmd_hdl_t *hdl, etm_iosvc_t *iosvc, etm_iosvc_q_ele_t *msgp) 1502 { 1503 etm_iosvc_q_ele_t *newp; /* ptr to new msg q ele */ 1504 1505 if (iosvc->msg_q_cur_len >= iosvc->msg_q_max_len) { 1506 fmd_hdl_debug(hdl, "warning: enq to full msg queue\n"); 1507 return (-E2BIG); 1508 } 1509 1510 newp = fmd_hdl_zalloc(hdl, sizeof (*newp), FMD_SLEEP); 1511 (void) memcpy(newp, msgp, sizeof (*newp)); 1512 newp->msg_nextp = NULL; 1513 1514 if (iosvc->msg_q_cur_len == 0) { 1515 iosvc->msg_q_head = newp; 1516 } else { 1517 iosvc->msg_q_tail->msg_nextp = newp; 1518 } 1519 1520 iosvc->msg_q_tail = newp; 1521 iosvc->msg_q_cur_len++; 1522 fmd_hdl_debug(hdl, "info: current msg queue length %d\n", 1523 iosvc->msg_q_cur_len); 1524 1525 return (1); 1526 1527 } /* etm_iosvc_msg_enq() */ 1528 1529 static int 1530 etm_iosvc_msg_deq(fmd_hdl_t *hdl, etm_iosvc_t *iosvc, etm_iosvc_q_ele_t *msgp) 1531 { 1532 etm_iosvc_q_ele_t *oldp; /* ptr to old msg q ele */ 1533 1534 if (iosvc->msg_q_cur_len == 0) { 1535 fmd_hdl_debug(hdl, "warning: deq from empty responder queue\n"); 1536 return (-ENOENT); 1537 } 1538 1539 (void) memcpy(msgp, iosvc->msg_q_head, sizeof (*msgp)); 1540 msgp->msg_nextp = NULL; 1541 1542 oldp = iosvc->msg_q_head; 1543 iosvc->msg_q_head = iosvc->msg_q_head->msg_nextp; 1544 1545 /* 1546 * free the mem alloc-ed in etm_iosvc_msg_enq() 1547 */ 1548 fmd_hdl_free(hdl, oldp, sizeof (*oldp)); 1549 1550 iosvc->msg_q_cur_len--; 1551 if (iosvc->msg_q_cur_len == 0) { 1552 iosvc->msg_q_tail = NULL; 1553 } 1554 1555 return (1); 1556 1557 } /* etm_iosvc_msg_deq() */ 1558 1559 1560 /* 1561 * etm_msg_enq_head(): 1562 * enq the msg to the head of the Q. 1563 * If the Q is full, drop the msg at the tail then enq the msg at head. 1564 * need to grab mutex lock iosvc->msg_q_lock before calling this routine. 1565 */ 1566 static void 1567 etm_msg_enq_head(fmd_hdl_t *fmd_hdl, etm_iosvc_t *iosvc, 1568 etm_iosvc_q_ele_t *msg_ele) 1569 { 1570 1571 etm_iosvc_q_ele_t *newp; /* iosvc msg ele ptr */ 1572 1573 if (iosvc->msg_q_cur_len >= iosvc->msg_q_max_len) { 1574 fmd_hdl_debug(fmd_hdl, 1575 "warning: add to head of a full msg queue." 1576 " Drop the msg at the tail\n"); 1577 /* 1578 * drop the msg at the tail 1579 */ 1580 newp = iosvc->msg_q_head; 1581 while (newp->msg_nextp != iosvc->msg_q_tail) { 1582 newp = newp->msg_nextp; 1583 } 1584 1585 /* 1586 * free the msg in iosvc->msg_q_tail->msg 1587 * free the mem pointed to by iosvc->msg_q_tail 1588 */ 1589 fmd_hdl_free(fmd_hdl, iosvc->msg_q_tail->msg, 1590 iosvc->msg_q_tail->msg_size); 1591 fmd_hdl_free(fmd_hdl, iosvc->msg_q_tail, sizeof (*newp)); 1592 iosvc->msg_q_tail = newp; 1593 iosvc->msg_q_tail->msg_nextp = NULL; 1594 iosvc->msg_q_cur_len--; 1595 } 1596 1597 /* 1598 * enq the msg to the head 1599 */ 1600 newp = fmd_hdl_zalloc(fmd_hdl, sizeof (*newp), FMD_SLEEP); 1601 (void) memcpy(newp, msg_ele, sizeof (*newp)); 1602 if (iosvc->msg_q_cur_len == 0) { 1603 newp->msg_nextp = NULL; 1604 iosvc->msg_q_tail = newp; 1605 } else { 1606 newp->msg_nextp = iosvc->msg_q_head; 1607 } 1608 iosvc->msg_q_head = newp; 1609 iosvc->msg_q_cur_len++; 1610 } /* etm_msg_enq_head() */ 1611 1612 /* 1613 * etm_iosvc_cleanup(): 1614 * Clean up an iosvc structure 1615 * 1) close the fmd_xprt if it has not been closed 1616 * 2) Terminate the send/revc threads 1617 * 3) If the clean_msg_q flag is set, free all fma events in the queue. In 1618 * addition, if the chpt_remove flag is set, delete the checkpoint so that 1619 * the events are not persisted. 1620 */ 1621 static void 1622 etm_iosvc_cleanup(fmd_hdl_t *fmd_hdl, etm_iosvc_t *iosvc, boolean_t clean_msg_q, 1623 boolean_t ckpt_remove) 1624 { 1625 1626 etm_iosvc_q_ele_t msg_ele; /* io svc msg Q ele */ 1627 1628 iosvc->thr_is_dying = 1; 1629 1630 iosvc->ds_hdl = DS_INVALID_HDL; 1631 if (iosvc->fmd_xprt != NULL) { 1632 fmd_xprt_close(fmd_hdl, iosvc->fmd_xprt); 1633 iosvc->fmd_xprt = NULL; 1634 } /* if fmd-xprt has been opened */ 1635 1636 if (iosvc->send_tid != NULL) { 1637 fmd_thr_signal(fmd_hdl, iosvc->send_tid); 1638 fmd_thr_destroy(fmd_hdl, iosvc->send_tid); 1639 iosvc->send_tid = NULL; 1640 } /* if io svc send thread was created ok */ 1641 1642 if (iosvc->recv_tid != NULL) { 1643 fmd_thr_signal(fmd_hdl, iosvc->recv_tid); 1644 fmd_thr_destroy(fmd_hdl, iosvc->recv_tid); 1645 iosvc->recv_tid = NULL; 1646 } /* if root domain recv thread was created */ 1647 1648 1649 if (clean_msg_q) { 1650 iosvc->ldom_name[0] = '\0'; 1651 1652 (void) pthread_mutex_lock(&iosvc->msg_q_lock); 1653 while (iosvc->msg_q_cur_len > 0) { 1654 (void) etm_iosvc_msg_deq(fmd_hdl, iosvc, &msg_ele); 1655 if (ckpt_remove == B_TRUE && 1656 msg_ele.ckpt_flag != ETM_CKPT_NOOP) { 1657 etm_ckpt_remove(fmd_hdl, &msg_ele); 1658 } 1659 fmd_hdl_free(fmd_hdl, msg_ele.msg, msg_ele.msg_size); 1660 } 1661 (void) pthread_mutex_unlock(&iosvc->msg_q_lock); 1662 } 1663 1664 return; 1665 1666 } /* etm_iosvc_cleanup() */ 1667 1668 /* 1669 * etm_iosvc_lookup(using ldom_name or ds_hdl when ldom_name is empty) 1670 * not found, create one, add to iosvc_list 1671 */ 1672 etm_iosvc_t * 1673 etm_iosvc_lookup(fmd_hdl_t *fmd_hdl, char *ldom_name, ds_hdl_t ds_hdl, 1674 boolean_t iosvc_create) 1675 { 1676 uint32_t i; /* for loop var */ 1677 int32_t first_empty_slot = -1; /* remember that */ 1678 1679 for (i = 0; i < NUM_OF_ROOT_DOMAINS; i++) { 1680 if (ldom_name[0] == '\0') { 1681 /* 1682 * search by hdl passed in 1683 * the only time this is used is at ds_unreg_cb time. 1684 * there is no ldom name, only the valid ds_hdl. 1685 * find an iosvc with the matching ds_hdl. 1686 * ignore the iosvc_create flag, should never need to 1687 * create an iosvc for ds_unreg_cb 1688 */ 1689 if (ds_hdl == iosvc_list[i].ds_hdl) { 1690 if (etm_debug_lvl >= 2) { 1691 fmd_hdl_debug(fmd_hdl, 1692 "info: found an iosvc at slot %d w/ ds_hdl %d \n", 1693 i, iosvc_list[i].ds_hdl); 1694 } 1695 if (iosvc_list[i].ldom_name[0] != '\0') 1696 if (etm_debug_lvl >= 2) { 1697 fmd_hdl_debug(fmd_hdl, 1698 "info: found an iosvc w/ ldom_name %s \n", 1699 iosvc_list[i].ldom_name); 1700 } 1701 return (&iosvc_list[i]); 1702 } else { 1703 continue; 1704 } 1705 } else if (iosvc_list[i].ldom_name[0] != '\0') { 1706 /* 1707 * this is an non-empty iosvc structure slot 1708 */ 1709 if (strcmp(ldom_name, iosvc_list[i].ldom_name) == 0) { 1710 /* 1711 * found an iosvc structure that matches the 1712 * passed in ldom_name, return the ptr 1713 */ 1714 if (etm_debug_lvl >= 2) { 1715 fmd_hdl_debug(fmd_hdl, "info: found an " 1716 "iosvc at slot %d w/ ds_hdl %d \n", 1717 i, iosvc_list[i].ds_hdl); 1718 fmd_hdl_debug(fmd_hdl, "info: found an " 1719 "iosvc w/ ldom_name %s \n", 1720 iosvc_list[i].ldom_name); 1721 } 1722 return (&iosvc_list[i]); 1723 } else { 1724 /* 1725 * non-empty slot with no-matching name, 1726 * move on to next slot. 1727 */ 1728 continue; 1729 } 1730 } else { 1731 /* 1732 * found the 1st slot with ldom name being empty 1733 * remember the slot #, will be used for creating one 1734 */ 1735 if (first_empty_slot == -1) { 1736 first_empty_slot = i; 1737 } 1738 } 1739 } 1740 if (iosvc_create == B_TRUE && first_empty_slot >= 0) { 1741 /* 1742 * this is the case we need to add an iosvc at first_empty_slot 1743 * for the ldom_name at iosvc_list[first_empty_slot] 1744 */ 1745 fmd_hdl_debug(fmd_hdl, 1746 "info: create an iosvc with ldom name %s\n", 1747 ldom_name); 1748 i = first_empty_slot; 1749 (void) memcpy(&iosvc_list[i], &io_svc, sizeof (etm_iosvc_t)); 1750 (void) strcpy(iosvc_list[i].ldom_name, ldom_name); 1751 fmd_hdl_debug(fmd_hdl, "info: iosvc #%d has ldom name %s\n", 1752 i, iosvc_list[i].ldom_name); 1753 return (&iosvc_list[i]); 1754 } else { 1755 return (NULL); 1756 } 1757 1758 } /* etm_iosvc_lookup() */ 1759 1760 1761 /* 1762 * etm_ckpt_remove: 1763 * remove the ckpt for the iosvc element 1764 */ 1765 static void 1766 etm_ckpt_remove(fmd_hdl_t *hdl, etm_iosvc_q_ele_t *ele) { 1767 int err; /* temp error */ 1768 nvlist_t *evp = NULL; /* event pointer */ 1769 etm_proto_v1_ev_hdr_t *hdrp; /* hdr for FMA_EVENT */ 1770 char *buf; /* packed event pointer */ 1771 1772 if ((ele->ckpt_flag == ETM_CKPT_NOOP) || 1773 (etm_ldom_type != LDOM_TYPE_CONTROL)) { 1774 return; 1775 } 1776 1777 /* the pointer to the packed event in the etm message */ 1778 hdrp = (etm_proto_v1_ev_hdr_t *)((ptrdiff_t)ele->msg); 1779 buf = (char *)((ptrdiff_t)hdrp + sizeof (*hdrp) 1780 + (1 * sizeof (hdrp->ev_lens[0]))); 1781 1782 /* unpack it, then uncheckpoited it */ 1783 if ((err = nvlist_unpack(buf, hdrp->ev_lens[0], &evp, 0)) != 0) { 1784 fmd_hdl_debug(hdl, "failed to unpack event(rc=%d)\n", err); 1785 return; 1786 } 1787 (void) etm_ckpt_delete(hdl, evp); 1788 nvlist_free(evp); 1789 } 1790 1791 /* 1792 * etm_send_ds_msg() 1793 * call ds_send_msg() to send the msg passed in. 1794 * timedcond_wait for the ACK to come back. 1795 * if the ACK doesn't come in the specified time, retrun -EAGAIN. 1796 * other wise, return 1. 1797 */ 1798 int 1799 etm_send_ds_msg(fmd_hdl_t *fmd_hdl, boolean_t ckpt_remove, etm_iosvc_t *iosvc, 1800 etm_iosvc_q_ele_t *msg_ele, etm_proto_v1_ev_hdr_t *evhdrp) 1801 { 1802 uint32_t rc; /* for return code */ 1803 1804 struct timeval tv; 1805 struct timespec timeout; 1806 1807 1808 /* 1809 * call ds_send_msg(). Return (-EAGAIN) if not successful 1810 */ 1811 if ((rc = (*etm_ds_send_msg)(iosvc->ds_hdl, msg_ele->msg, 1812 msg_ele->msg_size)) != 0) { 1813 fmd_hdl_debug(fmd_hdl, "info: ds_send_msg rc %d xid %d\n", 1814 rc, evhdrp->ev_pp.pp_xid); 1815 return (-EAGAIN); 1816 } 1817 1818 /* 1819 * wait on the cv for resp msg for cur_send_xid 1820 */ 1821 (void *) pthread_mutex_lock(&iosvc->msg_ack_lock); 1822 1823 (void) gettimeofday(&tv, 0); 1824 timeout.tv_sec = tv.tv_sec + etm_fma_resp_wait_time; 1825 timeout.tv_nsec = 0; 1826 1827 fmd_hdl_debug(fmd_hdl, "info: waiting on msg_ack_cv for ldom %s\n", 1828 iosvc->ldom_name); 1829 rc = pthread_cond_timedwait(&iosvc->msg_ack_cv, &iosvc->msg_ack_lock, 1830 &timeout); 1831 (void *) pthread_mutex_unlock(&iosvc->msg_ack_lock); 1832 fmd_hdl_debug(fmd_hdl, "info: msg_ack_cv returns with rc %d\n", rc); 1833 1834 /* 1835 * check to see if ack_ok is non-zero 1836 * if non-zero, resp msg has been received 1837 */ 1838 if (iosvc->ack_ok != 0) { 1839 /* 1840 * ACK came ok, this send is successful, 1841 * tell the caller ready to send next. 1842 * free mem alloc-ed in 1843 * etm_pack_ds_msg 1844 */ 1845 if (ckpt_remove == B_TRUE && 1846 etm_ldom_type == LDOM_TYPE_CONTROL) { 1847 etm_ckpt_remove(fmd_hdl, msg_ele); 1848 } 1849 fmd_hdl_free(fmd_hdl, msg_ele->msg, msg_ele->msg_size); 1850 iosvc->cur_send_xid++; 1851 return (1); 1852 } else { 1853 /* 1854 * the ACK did not come on time 1855 * tell the caller to resend cur_send_xid 1856 */ 1857 return (-EAGAIN); 1858 } /* iosvc->ack_ok != 0 */ 1859 } /* etm_send_ds_msg() */ 1860 1861 /* 1862 * both events from fmdo_send entry point and from SP are using the 1863 * etm_proto_v1_ev_hdr_t as its header and it will be the same header for all 1864 * ds send/recv msgs. 1865 * Idealy, we should use the hdr coming with the SP FMA event. Since fmdo_send 1866 * entry point can be called before FMA events from SP, we can't rely on 1867 * the SP FMA event hdr. Use the static hdr for packing ds msgs for fmdo_send 1868 * events. 1869 * return >0 for success, or -errno value 1870 * Design assumption: there is one FMA event per ds msg 1871 */ 1872 int 1873 etm_pack_ds_msg(fmd_hdl_t *fmd_hdl, etm_iosvc_t *iosvc, 1874 etm_proto_v1_ev_hdr_t *ev_hdrp, size_t hdr_sz, nvlist_t *evp, 1875 etm_pack_msg_type_t msg_type, uint_t ckpt_opt) 1876 { 1877 etm_proto_v1_ev_hdr_t *hdrp; /* for FMA_EVENT msg */ 1878 uint32_t *lenp; /* ptr to FMA event length */ 1879 size_t evsz; /* packed FMA event size */ 1880 char *buf; 1881 uint32_t rc; /* for return code */ 1882 char *msg; /* body of msg to be Qed */ 1883 1884 etm_iosvc_q_ele_t msg_ele; /* io svc msg Q ele */ 1885 etm_proto_v1_ev_hdr_t *evhdrp; 1886 1887 1888 if (ev_hdrp == NULL) { 1889 hdrp = &iosvc_hdr; 1890 } else { 1891 hdrp = ev_hdrp; 1892 } 1893 1894 /* 1895 * determine hdr_sz if 0, otherwise use the one passed in hdr_sz 1896 */ 1897 1898 if (hdr_sz == 0) { 1899 hdr_sz = sizeof (*hdrp) + (1 * sizeof (hdrp->ev_lens[0])); 1900 } 1901 1902 /* 1903 * determine evp size 1904 */ 1905 (void) nvlist_size(evp, &evsz, NV_ENCODE_XDR); 1906 1907 /* indicate 1 FMA event, no network encoding, and 0-terminate */ 1908 lenp = &hdrp->ev_lens[0]; 1909 *lenp = evsz; 1910 1911 /* 1912 * now the total of mem needs to be alloc-ed/ds msg size is 1913 * hdr_sz + evsz 1914 * msg will be freed in etm_send_to_remote_root() after ds_send_msg() 1915 */ 1916 msg = fmd_hdl_zalloc(fmd_hdl, hdr_sz + evsz, FMD_SLEEP); 1917 1918 1919 /* 1920 * copy hdr, 0 terminate the length vector, and then evp 1921 */ 1922 (void) memcpy(msg, hdrp, sizeof (*hdrp)); 1923 hdrp = (etm_proto_v1_ev_hdr_t *)((ptrdiff_t)msg); 1924 lenp = &hdrp->ev_lens[0]; 1925 lenp++; 1926 *lenp = 0; 1927 1928 buf = fmd_hdl_zalloc(fmd_hdl, evsz, FMD_SLEEP); 1929 (void) nvlist_pack(evp, (char **)&buf, &evsz, NV_ENCODE_XDR, 0); 1930 (void) memcpy(msg + hdr_sz, buf, evsz); 1931 fmd_hdl_free(fmd_hdl, buf, evsz); 1932 1933 fmd_hdl_debug(fmd_hdl, "info: hdr_sz= %d evsz= %d in etm_pack_ds_msg" 1934 "for ldom %s\n", hdr_sz, evsz, iosvc->ldom_name); 1935 msg_ele.msg = msg; 1936 msg_ele.msg_size = hdr_sz + evsz; 1937 msg_ele.ckpt_flag = ckpt_opt; 1938 1939 /* 1940 * decide what to do with the msg: 1941 * if SP ereports (msg_type == SP_MSG), always enq the msg 1942 * if not SP ereports, ie, fmd xprt control msgs, enq it _only_ after 1943 * resource.fm.xprt.run has been sent (which sets start_sending_Q to 1) 1944 */ 1945 if ((msg_type == SP_MSG) || 1946 (msg_type != SP_MSG) && (iosvc->start_sending_Q == 1)) { 1947 /* 1948 * this is the case when the msg needs to be enq-ed 1949 */ 1950 (void) pthread_mutex_lock(&iosvc->msg_q_lock); 1951 rc = etm_iosvc_msg_enq(fmd_hdl, iosvc, &msg_ele); 1952 if ((rc > 0) && (ckpt_opt & ETM_CKPT_SAVE) && 1953 (etm_ldom_type == LDOM_TYPE_CONTROL)) { 1954 (void) etm_ckpt_add(fmd_hdl, evp); 1955 } 1956 if (iosvc->msg_q_cur_len == 1) 1957 (void) pthread_cond_signal(&iosvc->msg_q_cv); 1958 (void) pthread_mutex_unlock(&iosvc->msg_q_lock); 1959 } else { 1960 /* 1961 * fmd RDWR xprt procotol startup msgs, send it now! 1962 */ 1963 iosvc->ack_ok = 0; 1964 evhdrp = (etm_proto_v1_ev_hdr_t *)((ptrdiff_t)msg_ele.msg); 1965 evhdrp->ev_pp.pp_xid = iosvc->cur_send_xid + 1; 1966 while (!iosvc->ack_ok && iosvc->ds_hdl != DS_INVALID_HDL && 1967 !etm_is_dying) { 1968 if (etm_send_ds_msg(fmd_hdl, B_FALSE, iosvc, &msg_ele, 1969 evhdrp) < 0) { 1970 continue; 1971 } 1972 } 1973 if (msg_type == FMD_XPRT_RUN_MSG) 1974 iosvc->start_sending_Q = 1; 1975 } 1976 1977 return (rc); 1978 1979 } /* etm_pack_ds_msg() */ 1980 1981 /* 1982 * Design_Note: For all etm_resp_q_*() functions and etm_resp_q_* globals, 1983 * the mutex etm_resp_q_lock must be held by the caller. 1984 */ 1985 1986 /* 1987 * etm_resp_q_enq - add element to tail of ETM responder queue 1988 * etm_resp_q_deq - del element from head of ETM responder queue 1989 * 1990 * return >0 for success, or -errno value 1991 */ 1992 1993 static int 1994 etm_resp_q_enq(fmd_hdl_t *hdl, etm_resp_q_ele_t *rqep) 1995 { 1996 etm_resp_q_ele_t *newp; /* ptr to new resp q ele */ 1997 1998 if (etm_resp_q_cur_len >= etm_resp_q_max_len) { 1999 fmd_hdl_debug(hdl, "warning: enq to full responder queue\n"); 2000 etm_stats.etm_enq_drop_resp_q.fmds_value.ui64++; 2001 return (-E2BIG); 2002 } 2003 2004 newp = fmd_hdl_zalloc(hdl, sizeof (*newp), FMD_SLEEP); 2005 (void) memcpy(newp, rqep, sizeof (*newp)); 2006 newp->rqe_nextp = NULL; 2007 2008 if (etm_resp_q_cur_len == 0) { 2009 etm_resp_q_head = newp; 2010 } else { 2011 etm_resp_q_tail->rqe_nextp = newp; 2012 } 2013 etm_resp_q_tail = newp; 2014 etm_resp_q_cur_len++; 2015 etm_stats.etm_resp_q_cur_len.fmds_value.ui64 = etm_resp_q_cur_len; 2016 2017 return (1); 2018 2019 } /* etm_resp_q_enq() */ 2020 2021 static int 2022 etm_resp_q_deq(fmd_hdl_t *hdl, etm_resp_q_ele_t *rqep) 2023 { 2024 etm_resp_q_ele_t *oldp; /* ptr to old resp q ele */ 2025 2026 if (etm_resp_q_cur_len == 0) { 2027 fmd_hdl_debug(hdl, "warning: deq from empty responder queue\n"); 2028 etm_stats.etm_deq_drop_resp_q.fmds_value.ui64++; 2029 return (-ENOENT); 2030 } 2031 2032 (void) memcpy(rqep, etm_resp_q_head, sizeof (*rqep)); 2033 rqep->rqe_nextp = NULL; 2034 2035 oldp = etm_resp_q_head; 2036 etm_resp_q_head = etm_resp_q_head->rqe_nextp; 2037 fmd_hdl_free(hdl, oldp, sizeof (*oldp)); 2038 2039 etm_resp_q_cur_len--; 2040 etm_stats.etm_resp_q_cur_len.fmds_value.ui64 = etm_resp_q_cur_len; 2041 if (etm_resp_q_cur_len == 0) { 2042 etm_resp_q_tail = NULL; 2043 } 2044 2045 return (1); 2046 2047 } /* etm_resp_q_deq() */ 2048 2049 /* 2050 * etm_maybe_enq_response - check the given message header to see 2051 * whether a response has been requested, 2052 * if so then enqueue the given connection 2053 * and header for later transport by the 2054 * responder thread as an ETM response msg, 2055 * return 0 for nop, >0 success, or -errno value 2056 */ 2057 2058 static ssize_t 2059 etm_maybe_enq_response(fmd_hdl_t *hdl, etm_xport_conn_t conn, 2060 void *hdrp, uint32_t hdr_sz, int32_t resp_code) 2061 { 2062 ssize_t rv; /* ret val */ 2063 etm_proto_v1_pp_t *ppp; /* protocol preamble ptr */ 2064 uint8_t orig_msg_type; /* orig hdr's message type */ 2065 uint32_t orig_timeout; /* orig hdr's timeout */ 2066 etm_resp_q_ele_t rqe; /* responder queue ele */ 2067 2068 ppp = hdrp; 2069 orig_msg_type = ppp->pp_msg_type; 2070 orig_timeout = ppp->pp_timeout; 2071 2072 /* bail out now if no response is to be sent */ 2073 2074 if (orig_timeout == ETM_PROTO_V1_TIMEOUT_NONE) { 2075 return (0); 2076 } /* if a nop */ 2077 2078 if ((orig_msg_type != ETM_MSG_TYPE_FMA_EVENT) && 2079 (orig_msg_type != ETM_MSG_TYPE_ALERT) && 2080 (orig_msg_type != ETM_MSG_TYPE_CONTROL)) { 2081 fmd_hdl_debug(hdl, "warning: bad msg type 0x%x\n", 2082 orig_msg_type); 2083 return (-EINVAL); 2084 } /* if inappropriate hdr for a response msg */ 2085 2086 /* 2087 * enqueue the msg hdr and nudge the responder thread 2088 * if the responder queue was previously empty 2089 */ 2090 2091 rqe.rqe_conn = conn; 2092 rqe.rqe_hdrp = hdrp; 2093 rqe.rqe_hdr_sz = hdr_sz; 2094 rqe.rqe_resp_code = resp_code; 2095 2096 (void) pthread_mutex_lock(&etm_resp_q_lock); 2097 2098 if (etm_resp_q_cur_len == etm_resp_q_max_len) 2099 (void) pthread_cond_wait(&etm_resp_q_cv, &etm_resp_q_lock); 2100 2101 rv = etm_resp_q_enq(hdl, &rqe); 2102 if (etm_resp_q_cur_len == 1) 2103 (void) pthread_cond_signal(&etm_resp_q_cv); 2104 (void) pthread_mutex_unlock(&etm_resp_q_lock); 2105 2106 return (rv); 2107 2108 } /* etm_maybe_enq_response() */ 2109 2110 /* 2111 * Design_Note: We rely on the fact that all message types have 2112 * a common protocol preamble; if this fact should 2113 * ever change it may break the code below. We also 2114 * rely on the fact that FMA_EVENT and CONTROL headers 2115 * returned by etm_hdr_read() will be sized large enough 2116 * to reuse them as RESPONSE headers if the remote endpt 2117 * asked for a response via the pp_timeout field. 2118 */ 2119 2120 /* 2121 * etm_send_response - use the given message header and response code 2122 * to construct an appropriate response message, 2123 * and send it back on the given connection, 2124 * return >0 for success, or -errno value 2125 */ 2126 2127 static ssize_t 2128 etm_send_response(fmd_hdl_t *hdl, etm_xport_conn_t conn, 2129 void *hdrp, int32_t resp_code) 2130 { 2131 ssize_t rv; /* ret val */ 2132 etm_proto_v1_pp_t *ppp; /* protocol preamble ptr */ 2133 etm_proto_v1_resp_hdr_t *resp_hdrp; /* for RESPONSE msg */ 2134 uint8_t resp_body[4]; /* response body if needed */ 2135 uint8_t *resp_msg; /* response hdr+body */ 2136 size_t hdr_sz; /* sizeof response hdr */ 2137 uint8_t orig_msg_type; /* orig hdr's message type */ 2138 2139 ppp = hdrp; 2140 orig_msg_type = ppp->pp_msg_type; 2141 2142 if (etm_debug_lvl >= 2) { 2143 etm_show_time(hdl, "ante resp send"); 2144 } 2145 2146 /* reuse the given header as a response header */ 2147 2148 resp_hdrp = hdrp; 2149 resp_hdrp->resp_code = resp_code; 2150 resp_hdrp->resp_len = 0; /* default is empty body */ 2151 2152 if ((orig_msg_type == ETM_MSG_TYPE_CONTROL) && 2153 (ppp->pp_sub_type == ETM_CTL_SEL_VER_NEGOT_REQ)) { 2154 resp_body[0] = ETM_PROTO_V2; 2155 resp_body[1] = ETM_PROTO_V3; 2156 resp_body[2] = 0; 2157 resp_hdrp->resp_len = 3; 2158 } /* if should send our/negotiated proto ver in resp body */ 2159 2160 /* respond with the proto ver that was negotiated */ 2161 2162 resp_hdrp->resp_pp.pp_proto_ver = etm_resp_ver; 2163 resp_hdrp->resp_pp.pp_msg_type = ETM_MSG_TYPE_RESPONSE; 2164 resp_hdrp->resp_pp.pp_timeout = ETM_PROTO_V1_TIMEOUT_NONE; 2165 2166 /* 2167 * send the whole response msg in one write, header and body; 2168 * avoid the alloc-and-copy if we can reuse the hdr as the msg, 2169 * ie, if the body is empty. update the response stats. 2170 */ 2171 2172 hdr_sz = sizeof (etm_proto_v1_resp_hdr_t); 2173 2174 resp_msg = hdrp; 2175 if (resp_hdrp->resp_len > 0) { 2176 resp_msg = fmd_hdl_zalloc(hdl, hdr_sz + resp_hdrp->resp_len, 2177 FMD_SLEEP); 2178 (void) memcpy(resp_msg, resp_hdrp, hdr_sz); 2179 (void) memcpy(resp_msg + hdr_sz, resp_body, 2180 resp_hdrp->resp_len); 2181 } 2182 2183 (void) pthread_mutex_lock(&etm_write_lock); 2184 rv = etm_io_op(hdl, "bad io write on resp msg", conn, 2185 resp_msg, hdr_sz + resp_hdrp->resp_len, ETM_IO_OP_WR); 2186 (void) pthread_mutex_unlock(&etm_write_lock); 2187 if (rv < 0) { 2188 goto func_ret; 2189 } 2190 2191 etm_stats.etm_wr_hdr_response.fmds_value.ui64++; 2192 etm_stats.etm_wr_body_response.fmds_value.ui64++; 2193 2194 fmd_hdl_debug(hdl, "info: sent V%u RESPONSE msg to xport " 2195 "xid 0x%x code %d len %u\n", 2196 (unsigned int)resp_hdrp->resp_pp.pp_proto_ver, 2197 resp_hdrp->resp_pp.pp_xid, resp_hdrp->resp_code, 2198 resp_hdrp->resp_len); 2199 func_ret: 2200 2201 if (resp_hdrp->resp_len > 0) { 2202 fmd_hdl_free(hdl, resp_msg, hdr_sz + resp_hdrp->resp_len); 2203 } 2204 if (etm_debug_lvl >= 2) { 2205 etm_show_time(hdl, "post resp send"); 2206 } 2207 return (rv); 2208 2209 } /* etm_send_response() */ 2210 2211 /* 2212 * etm_reset_xport - reset the transport layer (via fini;init) 2213 * presumably for an error condition we cannot 2214 * otherwise recover from (ex: hung LDC channel) 2215 * 2216 * caveats - no checking/locking is done to ensure an existing connection 2217 * is idle during an xport reset; we don't want to deadlock 2218 * and presumably the transport is stuck/unusable anyway 2219 */ 2220 2221 static void 2222 etm_reset_xport(fmd_hdl_t *hdl) 2223 { 2224 (void) etm_xport_fini(hdl); 2225 (void) etm_xport_init(hdl); 2226 etm_stats.etm_reset_xport.fmds_value.ui64++; 2227 2228 } /* etm_reset_xport() */ 2229 2230 /* 2231 * etm_handle_new_conn - receive an ETM message sent from the other end via 2232 * the given open connection, pull out any FMA events 2233 * and post them to the local FMD (or handle any ETM 2234 * control or response msg); when done, close the 2235 * connection 2236 */ 2237 2238 static void 2239 etm_handle_new_conn(fmd_hdl_t *hdl, etm_xport_conn_t conn) 2240 { 2241 etm_proto_v1_ev_hdr_t *ev_hdrp; /* for FMA_EVENT msg */ 2242 etm_proto_v1_ctl_hdr_t *ctl_hdrp; /* for CONTROL msg */ 2243 etm_proto_v1_resp_hdr_t *resp_hdrp; /* for RESPONSE msg */ 2244 etm_proto_v3_sa_hdr_t *sa_hdrp; /* for ALERT msg */ 2245 etm_iosvc_t *iosvc; /* iosvc data structure */ 2246 int32_t resp_code; /* response code */ 2247 ssize_t enq_rv; /* resp_q enqueue status */ 2248 size_t hdr_sz; /* sizeof header */ 2249 size_t evsz; /* FMA event size */ 2250 uint8_t *body_buf; /* msg body buffer */ 2251 uint32_t body_sz; /* sizeof body_buf */ 2252 uint32_t ev_cnt; /* count of FMA events */ 2253 uint8_t *bp; /* byte ptr within body_buf */ 2254 nvlist_t *evp; /* ptr to unpacked FMA event */ 2255 char *class; /* FMA event class */ 2256 ssize_t i, n; /* gen use */ 2257 int should_reset_xport; /* bool to reset xport */ 2258 char ldom_name[MAX_LDOM_NAME]; /* ldom name */ 2259 int rc; /* return code */ 2260 uint64_t did; /* domain id */ 2261 2262 2263 if (etm_debug_lvl >= 2) { 2264 etm_show_time(hdl, "ante conn handle"); 2265 } 2266 fmd_hdl_debug(hdl, "info: handling new conn %p\n", conn); 2267 2268 should_reset_xport = 0; 2269 ev_hdrp = NULL; 2270 ctl_hdrp = NULL; 2271 resp_hdrp = NULL; 2272 sa_hdrp = NULL; 2273 body_buf = NULL; 2274 class = NULL; 2275 evp = NULL; 2276 resp_code = 0; /* default is success */ 2277 enq_rv = 0; /* default is nop, ie, did not enqueue */ 2278 2279 /* read a network decoded message header from the connection */ 2280 2281 if ((ev_hdrp = etm_hdr_read(hdl, conn, &hdr_sz)) == NULL) { 2282 /* errno assumed set by above call */ 2283 should_reset_xport = (errno == ENOTACTIVE); 2284 fmd_hdl_debug(hdl, "error: FMA event dropped: " 2285 "bad hdr read errno %d\n", errno); 2286 etm_stats.etm_rd_drop_fmaevent.fmds_value.ui64++; 2287 goto func_ret; 2288 } 2289 2290 /* 2291 * handle the message based on its preamble pp_msg_type 2292 * which is known to be valid from etm_hdr_read() checks 2293 */ 2294 2295 if (ev_hdrp->ev_pp.pp_msg_type == ETM_MSG_TYPE_FMA_EVENT) { 2296 2297 fmd_hdl_debug(hdl, "info: rcvd FMA_EVENT msg from xport\n"); 2298 2299 /* allocate buf large enough for whole body / all FMA events */ 2300 2301 body_sz = 0; 2302 for (i = 0; ev_hdrp->ev_lens[i] != 0; i++) { 2303 body_sz += ev_hdrp->ev_lens[i]; 2304 } /* for summing sizes of all FMA events */ 2305 if (i > etm_stats.etm_rd_max_ev_per_msg.fmds_value.ui64) 2306 etm_stats.etm_rd_max_ev_per_msg.fmds_value.ui64 = i; 2307 ev_cnt = i; 2308 2309 if (etm_debug_lvl >= 1) { 2310 fmd_hdl_debug(hdl, "info: event lengths %u sum %u\n", 2311 ev_cnt, body_sz); 2312 } 2313 2314 body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP); 2315 2316 /* read all the FMA events at once */ 2317 2318 if ((n = etm_io_op(hdl, "FMA event dropped: " 2319 "bad io read on event bodies", conn, body_buf, body_sz, 2320 ETM_IO_OP_RD)) < 0) { 2321 should_reset_xport = (n == -ENOTACTIVE); 2322 etm_stats.etm_rd_drop_fmaevent.fmds_value.ui64++; 2323 goto func_ret; 2324 } 2325 2326 etm_stats.etm_rd_xport_bytes.fmds_value.ui64 += body_sz; 2327 etm_stats.etm_rd_body_fmaevent.fmds_value.ui64 += ev_cnt; 2328 2329 /* 2330 * now that we've read the entire ETM msg from the conn, 2331 * which avoids later ETM protocol framing errors if we didn't, 2332 * check for dup msg/xid against last good FMD posting, 2333 * if a dup then resend response but skip repost to FMD 2334 */ 2335 2336 if (ev_hdrp->ev_pp.pp_xid == etm_xid_posted_logged_ev) { 2337 enq_rv = etm_maybe_enq_response(hdl, conn, 2338 ev_hdrp, hdr_sz, 0); 2339 fmd_hdl_debug(hdl, "info: skipping dup FMA event post " 2340 "xid 0x%x\n", etm_xid_posted_logged_ev); 2341 etm_stats.etm_rd_dup_fmaevent.fmds_value.ui64++; 2342 goto func_ret; 2343 } 2344 2345 /* unpack each FMA event and post it to FMD */ 2346 2347 bp = body_buf; 2348 for (i = 0; i < ev_cnt; i++) { 2349 if ((n = nvlist_unpack((char *)bp, 2350 ev_hdrp->ev_lens[i], &evp, 0)) != 0) { 2351 resp_code = (-n); 2352 enq_rv = etm_maybe_enq_response(hdl, conn, 2353 ev_hdrp, hdr_sz, resp_code); 2354 fmd_hdl_error(hdl, "error: FMA event dropped: " 2355 "bad event body unpack errno %d\n", n); 2356 if (etm_debug_lvl >= 2) { 2357 fmd_hdl_debug(hdl, "info: FMA event " 2358 "hexdump %d bytes:\n", 2359 ev_hdrp->ev_lens[i]); 2360 etm_hexdump(hdl, bp, 2361 ev_hdrp->ev_lens[i]); 2362 } 2363 etm_stats.etm_os_nvlist_unpack_fail.fmds_value. 2364 ui64++; 2365 etm_stats.etm_rd_drop_fmaevent.fmds_value. 2366 ui64++; 2367 bp += ev_hdrp->ev_lens[i]; 2368 continue; 2369 } 2370 2371 if (etm_debug_lvl >= 1) { 2372 (void) nvlist_lookup_string(evp, FM_CLASS, 2373 &class); 2374 if (class == NULL) { 2375 class = "NULL"; 2376 } 2377 fmd_hdl_debug(hdl, "info: FMA event %p " 2378 "class %s\n", evp, class); 2379 } 2380 2381 rc = nvlist_size(evp, &evsz, NV_ENCODE_XDR); 2382 fmd_hdl_debug(hdl, 2383 "info: evp size before pack ds msg %d\n", evsz); 2384 ldom_name[0] = '\0'; 2385 rc = etm_filter_find_ldom_id(hdl, evp, ldom_name, 2386 MAX_LDOM_NAME, &did); 2387 2388 /* 2389 * if rc is zero and the ldom_name is not "primary", 2390 * the evp belongs to a root domain, put the evp in an 2391 * outgoing etm queue, 2392 * in all other cases, whether ldom_name is primary or 2393 * can't find a ldom name, call etm_post_to_fmd 2394 */ 2395 if ((rc == 0) && strcmp(ldom_name, "primary") && 2396 strcmp(ldom_name, "")) { 2397 /* 2398 * use the ldom_name, guaranteered at this point 2399 * to be a valid ldom name/non-NULL, to find the 2400 * iosvc data. 2401 * add an iosvc struct if can not find one 2402 */ 2403 (void) pthread_mutex_unlock(&iosvc_list_lock); 2404 iosvc = etm_iosvc_lookup(hdl, ldom_name, 2405 DS_INVALID_HDL, B_TRUE); 2406 (void) pthread_mutex_unlock(&iosvc_list_lock); 2407 if (iosvc == NULL) { 2408 fmd_hdl_debug(hdl, 2409 "error: can't find iosvc for ldom " 2410 "name %s\n", ldom_name); 2411 } else { 2412 resp_code = 0; 2413 (void) etm_pack_ds_msg(hdl, iosvc, 2414 ev_hdrp, hdr_sz, evp, 2415 SP_MSG, ETM_CKPT_SAVE); 2416 /* 2417 * call the new fmd_xprt_log() 2418 */ 2419 fmd_xprt_log(hdl, etm_fmd_xprt, evp, 0); 2420 etm_xid_posted_logged_ev = 2421 ev_hdrp->ev_pp.pp_xid; 2422 } 2423 } else { 2424 /* 2425 * post the fma event to the control fmd 2426 */ 2427 resp_code = etm_post_to_fmd(hdl, etm_fmd_xprt, 2428 evp); 2429 if (resp_code >= 0) { 2430 etm_xid_posted_logged_ev = 2431 ev_hdrp->ev_pp.pp_xid; 2432 } 2433 } 2434 2435 evp = NULL; 2436 enq_rv = etm_maybe_enq_response(hdl, conn, 2437 ev_hdrp, hdr_sz, resp_code); 2438 bp += ev_hdrp->ev_lens[i]; 2439 } /* foreach FMA event in the body buffer */ 2440 2441 } else if (ev_hdrp->ev_pp.pp_msg_type == ETM_MSG_TYPE_CONTROL) { 2442 2443 ctl_hdrp = (void*)ev_hdrp; 2444 2445 fmd_hdl_debug(hdl, "info: rcvd CONTROL msg from xport\n"); 2446 if (etm_debug_lvl >= 1) { 2447 fmd_hdl_debug(hdl, "info: ctl sel %d xid 0x%x\n", 2448 (int)ctl_hdrp->ctl_pp.pp_sub_type, 2449 ctl_hdrp->ctl_pp.pp_xid); 2450 } 2451 2452 /* 2453 * if we have a VER_NEGOT_REQ read the body and validate 2454 * the protocol version set contained therein, 2455 * otherwise we have a PING_REQ (which has no body) 2456 * and we [also] fall thru to the code which sends a 2457 * response msg if the pp_timeout field requested one 2458 */ 2459 2460 if (ctl_hdrp->ctl_pp.pp_sub_type == ETM_CTL_SEL_VER_NEGOT_REQ) { 2461 2462 body_sz = ctl_hdrp->ctl_len; 2463 body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP); 2464 2465 if ((n = etm_io_op(hdl, "bad io read on ctl body", 2466 conn, body_buf, body_sz, ETM_IO_OP_RD)) < 0) { 2467 should_reset_xport = (n == -ENOTACTIVE); 2468 goto func_ret; 2469 } 2470 2471 /* complain if version set completely incompatible */ 2472 2473 for (i = 0; i < body_sz; i++) { 2474 if ((body_buf[i] == ETM_PROTO_V1) || 2475 (body_buf[i] == ETM_PROTO_V2) || 2476 (body_buf[i] == ETM_PROTO_V3)) { 2477 break; 2478 } 2479 } 2480 if (i >= body_sz) { 2481 etm_stats.etm_ver_bad.fmds_value.ui64++; 2482 resp_code = (-EPROTO); 2483 } 2484 2485 } /* if got version set request */ 2486 2487 etm_stats.etm_rd_body_control.fmds_value.ui64++; 2488 2489 enq_rv = etm_maybe_enq_response(hdl, conn, 2490 ctl_hdrp, hdr_sz, resp_code); 2491 2492 } else if (ev_hdrp->ev_pp.pp_msg_type == ETM_MSG_TYPE_RESPONSE) { 2493 2494 resp_hdrp = (void*)ev_hdrp; 2495 2496 fmd_hdl_debug(hdl, "info: rcvd RESPONSE msg from xport\n"); 2497 if (etm_debug_lvl >= 1) { 2498 fmd_hdl_debug(hdl, "info: resp xid 0x%x\n", 2499 (int)resp_hdrp->resp_pp.pp_xid); 2500 } 2501 2502 body_sz = resp_hdrp->resp_len; 2503 body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP); 2504 2505 if ((n = etm_io_op(hdl, "bad io read on resp len", 2506 conn, body_buf, body_sz, ETM_IO_OP_RD)) < 0) { 2507 should_reset_xport = (n == -ENOTACTIVE); 2508 goto func_ret; 2509 } 2510 2511 etm_stats.etm_rd_body_response.fmds_value.ui64++; 2512 2513 /* 2514 * look up the xid to interpret the response body 2515 * 2516 * ping is a nop; for ver negot confirm that a supported 2517 * protocol version was negotiated and remember which one 2518 */ 2519 2520 if ((resp_hdrp->resp_pp.pp_xid != etm_xid_ping) && 2521 (resp_hdrp->resp_pp.pp_xid != etm_xid_ver_negot)) { 2522 etm_stats.etm_xid_bad.fmds_value.ui64++; 2523 goto func_ret; 2524 } 2525 2526 if (resp_hdrp->resp_pp.pp_xid == etm_xid_ver_negot) { 2527 if ((body_buf[0] < ETM_PROTO_V1) || 2528 (body_buf[0] > ETM_PROTO_V3)) { 2529 etm_stats.etm_ver_bad.fmds_value.ui64++; 2530 goto func_ret; 2531 } 2532 etm_resp_ver = body_buf[0]; 2533 } /* if have resp to last req to negotiate proto ver */ 2534 2535 } else if (ev_hdrp->ev_pp.pp_msg_type == ETM_MSG_TYPE_ALERT) { 2536 2537 sa_hdrp = (void*)ev_hdrp; 2538 2539 fmd_hdl_debug(hdl, "info: rcvd ALERT msg from xport\n"); 2540 if (etm_debug_lvl >= 1) { 2541 fmd_hdl_debug(hdl, "info: sa sel %d xid 0x%x\n", 2542 (int)sa_hdrp->sa_pp.pp_sub_type, 2543 sa_hdrp->sa_pp.pp_xid); 2544 } 2545 2546 body_sz = sa_hdrp->sa_len; 2547 body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP); 2548 2549 if ((n = etm_io_op(hdl, "bad io read on sa body", 2550 conn, body_buf, body_sz, ETM_IO_OP_RD)) < 0) { 2551 should_reset_xport = (n == -ENOTACTIVE); 2552 goto func_ret; 2553 } 2554 2555 etm_stats.etm_rd_body_alert.fmds_value.ui64++; 2556 2557 /* 2558 * now that we've read the entire ETM msg from the conn, 2559 * which avoids later ETM protocol framing errors if we didn't, 2560 * check for dup msg/xid against last good syslog posting, 2561 * if a dup then resend response but skip repost to syslog 2562 */ 2563 2564 if (sa_hdrp->sa_pp.pp_xid == etm_xid_posted_sa) { 2565 enq_rv = etm_maybe_enq_response(hdl, conn, 2566 sa_hdrp, hdr_sz, 0); 2567 fmd_hdl_debug(hdl, "info: skipping dup ALERT post " 2568 "xid 0x%x\n", etm_xid_posted_sa); 2569 etm_stats.etm_rd_dup_alert.fmds_value.ui64++; 2570 goto func_ret; 2571 } 2572 2573 resp_code = etm_post_to_syslog(hdl, sa_hdrp->sa_priority, 2574 body_sz, body_buf); 2575 if (resp_code >= 0) { 2576 etm_xid_posted_sa = sa_hdrp->sa_pp.pp_xid; 2577 } 2578 enq_rv = etm_maybe_enq_response(hdl, conn, 2579 sa_hdrp, hdr_sz, resp_code); 2580 } /* whether we have a FMA_EVENT, CONTROL, RESPONSE or ALERT msg */ 2581 2582 func_ret: 2583 2584 if (etm_debug_lvl >= 2) { 2585 etm_show_time(hdl, "post conn handle"); 2586 } 2587 2588 /* 2589 * if no responder ele was enqueued, close the conn now 2590 * and free the ETM msg hdr; the ETM msg body is not needed 2591 * by the responder thread and should always be freed here 2592 */ 2593 2594 if (enq_rv <= 0) { 2595 (void) etm_conn_close(hdl, "bad conn close after msg recv", 2596 conn); 2597 if (ev_hdrp != NULL) { 2598 fmd_hdl_free(hdl, ev_hdrp, hdr_sz); 2599 } 2600 } 2601 if (body_buf != NULL) { 2602 fmd_hdl_free(hdl, body_buf, body_sz); 2603 } 2604 if (should_reset_xport) { 2605 etm_reset_xport(hdl); 2606 } 2607 } /* etm_handle_new_conn() */ 2608 2609 /* 2610 * etm_handle_bad_accept - recover from a failed connection acceptance 2611 */ 2612 2613 static void 2614 etm_handle_bad_accept(fmd_hdl_t *hdl, int nev) 2615 { 2616 int should_reset_xport; /* bool to reset xport */ 2617 2618 should_reset_xport = (nev == -ENOTACTIVE); 2619 fmd_hdl_debug(hdl, "error: bad conn accept errno %d\n", (-nev)); 2620 etm_stats.etm_xport_accept_fail.fmds_value.ui64++; 2621 (void) etm_sleep(etm_bad_acc_to_sec); /* avoid spinning CPU */ 2622 if (should_reset_xport) { 2623 etm_reset_xport(hdl); 2624 } 2625 } /* etm_handle_bad_accept() */ 2626 2627 /* 2628 * etm_server - loop forever accepting new connections 2629 * using the given FMD handle, 2630 * handling any ETM msgs sent from the other side 2631 * via each such connection 2632 */ 2633 2634 static void 2635 etm_server(void *arg) 2636 { 2637 etm_xport_conn_t conn; /* connection handle */ 2638 int nev; /* -errno val */ 2639 fmd_hdl_t *hdl; /* FMD handle */ 2640 2641 hdl = arg; 2642 2643 fmd_hdl_debug(hdl, "info: connection server starting\n"); 2644 2645 /* 2646 * Restore the checkpointed events and dispatch them before starting to 2647 * receive more events from the sp. 2648 */ 2649 etm_ckpt_recover(hdl); 2650 2651 while (!etm_is_dying) { 2652 2653 if ((conn = etm_xport_accept(hdl, NULL)) == NULL) { 2654 /* errno assumed set by above call */ 2655 nev = (-errno); 2656 if (etm_is_dying) { 2657 break; 2658 } 2659 etm_handle_bad_accept(hdl, nev); 2660 continue; 2661 } 2662 2663 /* handle the new message/connection, closing it when done */ 2664 2665 etm_handle_new_conn(hdl, conn); 2666 2667 } /* while accepting new connections until ETM dies */ 2668 2669 /* ETM is dying (probably due to "fmadm unload etm") */ 2670 2671 fmd_hdl_debug(hdl, "info: connection server is dying\n"); 2672 2673 } /* etm_server() */ 2674 2675 /* 2676 * etm_responder - loop forever waiting for new responder queue elements 2677 * to be enqueued, for each one constructing and sending 2678 * an ETM response msg to the other side, and closing its 2679 * associated connection when appropriate 2680 * 2681 * this thread exists to ensure that the etm_server() thread 2682 * never pends indefinitely waiting on the xport write lock, and is 2683 * hence always available to accept new connections and handle 2684 * incoming messages 2685 * 2686 * this design relies on the fact that each connection accepted and 2687 * returned by the ETM xport layer is unique, and each can be closed 2688 * independently of the others while multiple connections are 2689 * outstanding 2690 */ 2691 2692 static void 2693 etm_responder(void *arg) 2694 { 2695 ssize_t n; /* gen use */ 2696 fmd_hdl_t *hdl; /* FMD handle */ 2697 etm_resp_q_ele_t rqe; /* responder queue ele */ 2698 2699 hdl = arg; 2700 2701 fmd_hdl_debug(hdl, "info: responder server starting\n"); 2702 2703 while (!etm_is_dying) { 2704 2705 (void) pthread_mutex_lock(&etm_resp_q_lock); 2706 2707 while (etm_resp_q_cur_len == 0) { 2708 (void) pthread_cond_wait(&etm_resp_q_cv, 2709 &etm_resp_q_lock); 2710 if (etm_is_dying) { 2711 (void) pthread_mutex_unlock(&etm_resp_q_lock); 2712 goto func_ret; 2713 } 2714 } /* while the responder queue is empty, wait to be nudged */ 2715 2716 /* 2717 * for every responder ele that has been enqueued, 2718 * dequeue and send it as an ETM response msg, 2719 * closing its associated conn and freeing its hdr 2720 * 2721 * enter the queue draining loop holding the responder 2722 * queue lock, but do not hold the lock indefinitely 2723 * (the actual send may pend us indefinitely), 2724 * so that other threads will never pend for long 2725 * trying to enqueue a new element 2726 */ 2727 2728 while (etm_resp_q_cur_len > 0) { 2729 2730 (void) etm_resp_q_deq(hdl, &rqe); 2731 2732 if ((etm_resp_q_cur_len + 1) == etm_resp_q_max_len) 2733 (void) pthread_cond_signal(&etm_resp_q_cv); 2734 2735 (void) pthread_mutex_unlock(&etm_resp_q_lock); 2736 2737 if ((n = etm_send_response(hdl, rqe.rqe_conn, 2738 rqe.rqe_hdrp, rqe.rqe_resp_code)) < 0) { 2739 fmd_hdl_error(hdl, "error: bad resp send " 2740 "errno %d\n", (-n)); 2741 } 2742 2743 (void) etm_conn_close(hdl, "bad conn close after resp", 2744 rqe.rqe_conn); 2745 fmd_hdl_free(hdl, rqe.rqe_hdrp, rqe.rqe_hdr_sz); 2746 2747 if (etm_is_dying) { 2748 goto func_ret; 2749 } 2750 (void) pthread_mutex_lock(&etm_resp_q_lock); 2751 2752 } /* while draining the responder queue */ 2753 2754 (void) pthread_mutex_unlock(&etm_resp_q_lock); 2755 2756 } /* while awaiting and sending resp msgs until ETM dies */ 2757 2758 func_ret: 2759 2760 /* ETM is dying (probably due to "fmadm unload etm") */ 2761 2762 fmd_hdl_debug(hdl, "info: responder server is dying\n"); 2763 2764 (void) pthread_mutex_lock(&etm_resp_q_lock); 2765 if (etm_resp_q_cur_len > 0) { 2766 fmd_hdl_error(hdl, "warning: %d response msgs dropped\n", 2767 (int)etm_resp_q_cur_len); 2768 while (etm_resp_q_cur_len > 0) { 2769 (void) etm_resp_q_deq(hdl, &rqe); 2770 (void) etm_conn_close(hdl, "bad conn close after deq", 2771 rqe.rqe_conn); 2772 fmd_hdl_free(hdl, rqe.rqe_hdrp, rqe.rqe_hdr_sz); 2773 } 2774 } 2775 (void) pthread_mutex_unlock(&etm_resp_q_lock); 2776 2777 } /* etm_responder() */ 2778 2779 static void * 2780 etm_init_alloc(size_t size) 2781 { 2782 return (fmd_hdl_alloc(init_hdl, size, FMD_SLEEP)); 2783 } 2784 2785 static void 2786 etm_init_free(void *addr, size_t size) 2787 { 2788 fmd_hdl_free(init_hdl, addr, size); 2789 } 2790 2791 /* 2792 * ---------------------root ldom support functions ----------------------- 2793 */ 2794 2795 /* 2796 * use a static array async_event_q instead of dynamicaly allocated mem queue 2797 * for etm_async_q_enq and etm_async_q_deq. 2798 * This is not running in an fmd aux thread, can't use the fmd_hdl_* funcs. 2799 * caller needs to grab the mutex lock before calling this func. 2800 * return >0 for success, or -errno value 2801 */ 2802 static int 2803 etm_async_q_enq(etm_async_event_ele_t *async_e) 2804 { 2805 2806 if (etm_async_q_cur_len >= etm_async_q_max_len) { 2807 /* etm_stats.etm_enq_drop_async_q.fmds_value.ui64++; */ 2808 return (-E2BIG); 2809 } 2810 2811 (void) memcpy(&async_event_q[etm_async_q_tail], async_e, 2812 sizeof (*async_e)); 2813 2814 etm_async_q_tail++; 2815 if (etm_async_q_tail == etm_async_q_max_len) { 2816 etm_async_q_tail = 0; 2817 } 2818 etm_async_q_cur_len++; 2819 2820 /* etm_stats.etm_async_q_cur_len.fmds_value.ui64 = etm_async_q_cur_len; */ 2821 2822 return (1); 2823 2824 } /* etm_async_q_enq() */ 2825 2826 2827 static int 2828 etm_async_q_deq(etm_async_event_ele_t *async_e) 2829 { 2830 2831 if (etm_async_q_cur_len == 0) { 2832 /* etm_stats.etm_deq_drop_async_q.fmds_value.ui64++; */ 2833 return (-ENOENT); 2834 } 2835 2836 (void) memcpy(async_e, &async_event_q[etm_async_q_head], 2837 sizeof (*async_e)); 2838 2839 etm_async_q_head++; 2840 if (etm_async_q_head == etm_async_q_max_len) { 2841 etm_async_q_head = 0; 2842 } 2843 etm_async_q_cur_len--; 2844 2845 return (1); 2846 } /* etm_async_q_deq */ 2847 2848 2849 /* 2850 * setting up the fields in iosvc at DS_REG_CB time 2851 */ 2852 void 2853 etm_iosvc_setup(fmd_hdl_t *fmd_hdl, etm_iosvc_t *iosvc, 2854 etm_async_event_ele_t *async_e) 2855 { 2856 iosvc->ds_hdl = async_e->ds_hdl; 2857 iosvc->cur_send_xid = 0; 2858 iosvc->xid_posted_ev = 0; 2859 iosvc->start_sending_Q = 0; 2860 2861 /* 2862 * open the fmd xprt if it 2863 * hasn't been previously opened 2864 */ 2865 fmd_hdl_debug(fmd_hdl, "info: before fmd_xprt_open ldom_name is %s\n", 2866 async_e->ldom_name); 2867 2868 if (iosvc->fmd_xprt == NULL) { 2869 iosvc->fmd_xprt = fmd_xprt_open(fmd_hdl, flags, NULL, iosvc); 2870 } 2871 2872 iosvc->thr_is_dying = 0; 2873 if (iosvc->recv_tid == NULL) { 2874 iosvc->recv_tid = fmd_thr_create(fmd_hdl, 2875 etm_recv_from_remote_root, iosvc); 2876 } 2877 if (iosvc->send_tid == NULL) { 2878 iosvc->send_tid = fmd_thr_create(fmd_hdl, 2879 etm_send_to_remote_root, iosvc); 2880 } 2881 } /* etm_iosvc_setup() */ 2882 2883 2884 /* 2885 * ds userland interface ds_reg_cb callback func 2886 */ 2887 2888 /* ARGSUSED */ 2889 static void 2890 etm_iosvc_reg_handler(ds_hdl_t ds_hdl, ds_cb_arg_t arg, ds_ver_t *ver, 2891 ds_domain_hdl_t dhdl) 2892 { 2893 etm_async_event_ele_t async_ele; 2894 2895 2896 /* 2897 * do version check here. 2898 * checked the ver received here against etm_iosvc_vers here 2899 */ 2900 if (etm_iosvc_vers[0].major != ver->major || 2901 etm_iosvc_vers[0].minor != ver->minor) { 2902 /* 2903 * can't log an fmd debug msg, 2904 * not running in an fmd aux thread 2905 */ 2906 return; 2907 } 2908 2909 /* 2910 * the callback should have a valid ldom_name 2911 * can't log fmd debugging msg here since this is not in an fmd aux 2912 * thread. log fmd debug msg in etm_async_event_handle() 2913 */ 2914 async_ele.ds_hdl = ds_hdl; 2915 async_ele.dhdl = dhdl; 2916 async_ele.ldom_name[0] = '\0'; 2917 async_ele.event_type = ETM_ASYNC_EVENT_DS_REG_CB; 2918 (void) pthread_mutex_lock(&etm_async_event_q_lock); 2919 (void) etm_async_q_enq(&async_ele); 2920 if (etm_async_q_cur_len == 1) 2921 (void) pthread_cond_signal(&etm_async_event_q_cv); 2922 (void) pthread_mutex_unlock(&etm_async_event_q_lock); 2923 2924 } /* etm_iosvc_reg_handler */ 2925 2926 2927 /* 2928 * ds userland interface ds_unreg_cb callback func 2929 */ 2930 2931 /*ARGSUSED*/ 2932 static void 2933 etm_iosvc_unreg_handler(ds_hdl_t hdl, ds_cb_arg_t arg) 2934 { 2935 etm_async_event_ele_t async_ele; 2936 2937 /* 2938 * fill in async_ele and enqueue async_ele 2939 */ 2940 async_ele.ldom_name[0] = '\0'; 2941 async_ele.ds_hdl = hdl; 2942 async_ele.event_type = ETM_ASYNC_EVENT_DS_UNREG_CB; 2943 (void) pthread_mutex_lock(&etm_async_event_q_lock); 2944 (void) etm_async_q_enq(&async_ele); 2945 if (etm_async_q_cur_len == 1) 2946 (void) pthread_cond_signal(&etm_async_event_q_cv); 2947 (void) pthread_mutex_unlock(&etm_async_event_q_lock); 2948 } /* etm_iosvc_unreg_handler */ 2949 2950 /* 2951 * ldom event registration callback func 2952 */ 2953 2954 /* ARGSUSED */ 2955 static void 2956 ldom_event_handler(char *ldom_name, ldom_event_t event, ldom_cb_arg_t data) 2957 { 2958 etm_async_event_ele_t async_ele; 2959 2960 /* 2961 * the callback will have a valid ldom_name 2962 */ 2963 async_ele.ldom_name[0] = '\0'; 2964 if (ldom_name) 2965 (void) strcpy(async_ele.ldom_name, ldom_name); 2966 async_ele.ds_hdl = DS_INVALID_HDL; 2967 2968 /* 2969 * fill in async_ele and enq async_ele 2970 */ 2971 switch (event) { 2972 case LDOM_EVENT_BIND: 2973 async_ele.event_type = ETM_ASYNC_EVENT_LDOM_BIND; 2974 break; 2975 case LDOM_EVENT_UNBIND: 2976 async_ele.event_type = ETM_ASYNC_EVENT_LDOM_UNBIND; 2977 break; 2978 case LDOM_EVENT_ADD: 2979 async_ele.event_type = ETM_ASYNC_EVENT_LDOM_ADD; 2980 break; 2981 case LDOM_EVENT_REMOVE: 2982 async_ele.event_type = ETM_ASYNC_EVENT_LDOM_REMOVE; 2983 break; 2984 default: 2985 /* 2986 * for all other ldom events, do nothing 2987 */ 2988 return; 2989 } /* switch (event) */ 2990 2991 (void) pthread_mutex_lock(&etm_async_event_q_lock); 2992 (void) etm_async_q_enq(&async_ele); 2993 if (etm_async_q_cur_len == 1) 2994 (void) pthread_cond_signal(&etm_async_event_q_cv); 2995 (void) pthread_mutex_unlock(&etm_async_event_q_lock); 2996 2997 } /* ldom_event_handler */ 2998 2999 3000 /* 3001 * This is running as an fmd aux thread. 3002 * This is the func that actually handle the events, which include: 3003 * 1. ldom events. ldom events are on Control Domain only 3004 * 2. any DS userland callback funcs 3005 * these events are already Q-ed in the async_event_ele_q 3006 * deQ and process the events accordingly 3007 */ 3008 static void 3009 etm_async_event_handler(void *arg) 3010 { 3011 3012 fmd_hdl_t *fmd_hdl = (fmd_hdl_t *)arg; 3013 etm_iosvc_t *iosvc; /* ptr 2 iosvc struct */ 3014 etm_async_event_ele_t async_e; 3015 3016 fmd_hdl_debug(fmd_hdl, "info: etm_async_event_handler starting\n"); 3017 /* 3018 * handle etm is not dying and Q len > 0 3019 */ 3020 while (!etm_is_dying) { 3021 /* 3022 * grab the lock to check the Q len 3023 */ 3024 (void) pthread_mutex_lock(&etm_async_event_q_lock); 3025 fmd_hdl_debug(fmd_hdl, "info: etm_async_q_cur_len %d\n", 3026 etm_async_q_cur_len); 3027 3028 while (etm_async_q_cur_len > 0) { 3029 (void) etm_async_q_deq(&async_e); 3030 (void) pthread_mutex_unlock(&etm_async_event_q_lock); 3031 fmd_hdl_debug(fmd_hdl, 3032 "info: processing an async event type %d ds_hdl" 3033 " %d\n", async_e.event_type, async_e.ds_hdl); 3034 if (async_e.ldom_name[0] != '\0') { 3035 fmd_hdl_debug(fmd_hdl, 3036 "info: procssing async evt ldom_name %s\n", 3037 async_e.ldom_name); 3038 } 3039 3040 /* 3041 * at this point, if async_e.ldom_name is not NULL, 3042 * we have a valid iosvc strcut ptr. 3043 * the only time async_e.ldom_name is NULL is at 3044 * ds_unreg_cb() 3045 */ 3046 switch (async_e.event_type) { 3047 case ETM_ASYNC_EVENT_LDOM_UNBIND: 3048 case ETM_ASYNC_EVENT_LDOM_REMOVE: 3049 /* 3050 * we have a valid ldom_name, 3051 * etm_lookup_struct(ldom_name) 3052 * do nothing if can't find an iosvc 3053 * no iosvc clean up to do 3054 */ 3055 (void) pthread_mutex_lock( 3056 &iosvc_list_lock); 3057 iosvc = etm_iosvc_lookup(fmd_hdl, 3058 async_e.ldom_name, 3059 async_e.ds_hdl, B_FALSE); 3060 if (iosvc == NULL) { 3061 fmd_hdl_debug(fmd_hdl, 3062 "error: can't find iosvc for ldom " 3063 "name %s\n", 3064 async_e.ldom_name); 3065 (void) pthread_mutex_unlock( 3066 &iosvc_list_lock); 3067 break; 3068 } 3069 /* 3070 * Clean up the queue, delete all messages and 3071 * do not persist checkpointed fma events. 3072 */ 3073 etm_iosvc_cleanup(fmd_hdl, iosvc, B_TRUE, 3074 B_TRUE); 3075 (void) pthread_mutex_unlock( 3076 &iosvc_list_lock); 3077 break; 3078 3079 case ETM_ASYNC_EVENT_LDOM_BIND: 3080 3081 /* 3082 * create iosvc if it has not been 3083 * created 3084 * async_e.ds_hdl is invalid 3085 * async_e.ldom_name is valid ldom_name 3086 */ 3087 (void) pthread_mutex_lock( 3088 &iosvc_list_lock); 3089 iosvc = etm_iosvc_lookup(fmd_hdl, 3090 async_e.ldom_name, 3091 async_e.ds_hdl, B_TRUE); 3092 if (iosvc == NULL) { 3093 fmd_hdl_debug(fmd_hdl, 3094 "error: can't create iosvc for " 3095 "async evnt %d\n", 3096 async_e.event_type); 3097 (void) pthread_mutex_unlock( 3098 &iosvc_list_lock); 3099 break; 3100 } 3101 (void) strcpy(iosvc->ldom_name, 3102 async_e.ldom_name); 3103 iosvc->ds_hdl = async_e.ds_hdl; 3104 (void) pthread_mutex_unlock( 3105 &iosvc_list_lock); 3106 break; 3107 3108 case ETM_ASYNC_EVENT_DS_REG_CB: 3109 if (etm_ldom_type == LDOM_TYPE_CONTROL) { 3110 /* 3111 * find the root ldom name from 3112 * ldom domain hdl/id 3113 */ 3114 if (etm_filter_find_ldom_name( 3115 fmd_hdl, async_e.dhdl, 3116 async_e.ldom_name, 3117 MAX_LDOM_NAME) != 0) { 3118 fmd_hdl_debug(fmd_hdl, 3119 "error: can't find root " 3120 "domain name from did %d\n", 3121 async_e.dhdl); 3122 break; 3123 } else { 3124 fmd_hdl_debug(fmd_hdl, 3125 "info: etm_filter_find_" 3126 "ldom_name returned %s\n", 3127 async_e.ldom_name); 3128 } 3129 /* 3130 * now we should have a valid 3131 * root domain name. 3132 * lookup the iosvc struct 3133 * associated with the ldom_name 3134 * and init the iosvc struct 3135 */ 3136 (void) pthread_mutex_lock( 3137 &iosvc_list_lock); 3138 iosvc = etm_iosvc_lookup( 3139 fmd_hdl, async_e.ldom_name, 3140 async_e.ds_hdl, B_TRUE); 3141 if (iosvc == NULL) { 3142 fmd_hdl_debug(fmd_hdl, 3143 "error: can't create iosvc " 3144 "for async evnt %d\n", 3145 async_e.event_type); 3146 (void) pthread_mutex_unlock( 3147 &iosvc_list_lock); 3148 break; 3149 } 3150 3151 etm_iosvc_setup(fmd_hdl, iosvc, 3152 &async_e); 3153 (void) pthread_mutex_unlock( 3154 &iosvc_list_lock); 3155 } else { 3156 iosvc = &io_svc; 3157 (void) strcpy(iosvc->ldom_name, 3158 async_e.ldom_name); 3159 3160 etm_iosvc_setup(fmd_hdl, iosvc, 3161 &async_e); 3162 } 3163 break; 3164 3165 case ETM_ASYNC_EVENT_DS_UNREG_CB: 3166 /* 3167 * decide which iosvc struct to perform 3168 * this UNREG callback on. 3169 */ 3170 if (etm_ldom_type == LDOM_TYPE_CONTROL) { 3171 (void) pthread_mutex_lock( 3172 &iosvc_list_lock); 3173 /* 3174 * lookup the iosvc struct w/ 3175 * ds_hdl 3176 */ 3177 iosvc = etm_iosvc_lookup( 3178 fmd_hdl, async_e.ldom_name, 3179 async_e.ds_hdl, B_FALSE); 3180 if (iosvc == NULL) { 3181 fmd_hdl_debug(fmd_hdl, 3182 "error: can't find iosvc " 3183 "for async evnt %d\n", 3184 async_e.event_type); 3185 (void) pthread_mutex_unlock( 3186 &iosvc_list_lock); 3187 break; 3188 } 3189 3190 /* 3191 * ds_hdl and fmd_xprt_open 3192 * go hand to hand together 3193 * after unreg_cb, 3194 * ds_hdl is INVALID and 3195 * fmd_xprt is closed. 3196 * the ldom name and the msg Q 3197 * remains in iosvc_list 3198 */ 3199 if (iosvc->ldom_name != '\0') 3200 fmd_hdl_debug(fmd_hdl, 3201 "info: iosvc w/ ldom_name " 3202 "%s \n", iosvc->ldom_name); 3203 3204 /* 3205 * destroy send/recv threads and 3206 * other clean up on Control side. 3207 */ 3208 etm_iosvc_cleanup(fmd_hdl, iosvc, 3209 B_FALSE, B_FALSE); 3210 (void) pthread_mutex_unlock( 3211 &iosvc_list_lock); 3212 } else { 3213 iosvc = &io_svc; 3214 /* 3215 * destroy send/recv threads and 3216 * then clean up on Root side. 3217 */ 3218 etm_iosvc_cleanup(fmd_hdl, iosvc, 3219 B_FALSE, B_FALSE); 3220 } 3221 break; 3222 3223 default: 3224 /* 3225 * for all other events, etm doesn't care. 3226 * already logged an fmd info msg w/ 3227 * the event type. Do nothing here. 3228 */ 3229 break; 3230 } /* switch (async_e.event_type) */ 3231 3232 if (etm_ldom_type == LDOM_TYPE_CONTROL) { 3233 etm_filter_handle_ldom_event(fmd_hdl, 3234 async_e.event_type, async_e.ldom_name); 3235 } 3236 3237 /* 3238 * grab the lock to check the q length again 3239 */ 3240 (void) pthread_mutex_lock(&etm_async_event_q_lock); 3241 3242 if (etm_is_dying) { 3243 break; 3244 } 3245 } /* etm_async_q_cur_len */ 3246 3247 /* 3248 * we have the mutex lock at this point, whether 3249 * . etm_is_dying and/or 3250 * . q_len == 0 3251 */ 3252 if (!etm_is_dying && etm_async_q_cur_len == 0) { 3253 fmd_hdl_debug(fmd_hdl, 3254 "info: cond wait on async_event_q_cv\n"); 3255 (void) pthread_cond_wait(&etm_async_event_q_cv, 3256 &etm_async_event_q_lock); 3257 fmd_hdl_debug(fmd_hdl, 3258 "info: cond wait on async_event_q_cv rtns\n"); 3259 } 3260 (void) pthread_mutex_unlock(&etm_async_event_q_lock); 3261 } /* etm_is_dying */ 3262 3263 fmd_hdl_debug(fmd_hdl, 3264 "info: etm async event handler thread exiting\n"); 3265 3266 } /* etm_async_event_handler */ 3267 3268 /* 3269 * deQ what's in iosvc msg Q 3270 * send iosvc_msgp to the remote io svc ldom by calling ds_send_msg() 3271 * the iosvc_msgp already has the packed msg, which is hdr + 1 fma event 3272 */ 3273 static void 3274 etm_send_to_remote_root(void *arg) 3275 { 3276 3277 etm_iosvc_t *iosvc = (etm_iosvc_t *)arg; /* iosvc ptr */ 3278 etm_iosvc_q_ele_t msg_ele; /* iosvc msg ele */ 3279 etm_proto_v1_ev_hdr_t *ev_hdrp; /* hdr for FMA_EVENT */ 3280 fmd_hdl_t *fmd_hdl = init_hdl; /* fmd handle */ 3281 3282 3283 fmd_hdl_debug(fmd_hdl, 3284 "info: send to remote iosvc starting w/ ldom_name %s\n", 3285 iosvc->ldom_name); 3286 3287 /* 3288 * loop forever until etm_is_dying or thr_is_dying 3289 */ 3290 while (!etm_is_dying && !iosvc->thr_is_dying) { 3291 if (iosvc->ds_hdl != DS_INVALID_HDL && 3292 iosvc->start_sending_Q > 0) { 3293 (void) pthread_mutex_lock(&iosvc->msg_q_lock); 3294 while (iosvc->msg_q_cur_len > 0 && 3295 iosvc->ds_hdl != DS_INVALID_HDL) { 3296 (void) etm_iosvc_msg_deq(fmd_hdl, iosvc, 3297 &msg_ele); 3298 if (etm_debug_lvl >= 3) { 3299 fmd_hdl_debug(fmd_hdl, "info: valid " 3300 "ds_hdl before ds_send_msg \n"); 3301 } 3302 (void) pthread_mutex_unlock(&iosvc->msg_q_lock); 3303 3304 iosvc->ack_ok = 0; 3305 ev_hdrp = (etm_proto_v1_ev_hdr_t *) 3306 ((ptrdiff_t)msg_ele.msg); 3307 ev_hdrp->ev_pp.pp_xid = iosvc->cur_send_xid + 1; 3308 while (!iosvc->ack_ok && 3309 iosvc->ds_hdl != DS_INVALID_HDL && 3310 !etm_is_dying) { 3311 /* 3312 * call ds_send_msg() to send the msg, 3313 * wait for the recv end to send the 3314 * resp msg back. 3315 * If resp msg is recv-ed, ack_ok 3316 * will be set to 1. 3317 * otherwise, retry. 3318 */ 3319 if (etm_send_ds_msg(fmd_hdl, B_TRUE, 3320 iosvc, &msg_ele, ev_hdrp) < 0) { 3321 continue; 3322 } 3323 3324 if (etm_is_dying || iosvc->thr_is_dying) 3325 break; 3326 } 3327 3328 /* 3329 * if out of the while loop but !ack_ok, ie, 3330 * ds_hdl becomes invalid at some point 3331 * while waiting the resp msg, we need to put 3332 * the msg back to the head of the Q. 3333 */ 3334 if (!iosvc->ack_ok) { 3335 (void) pthread_mutex_lock( 3336 &iosvc->msg_q_lock); 3337 /* 3338 * put the msg back to the head of Q. 3339 * If the Q is full at this point, 3340 * drop the msg at the tail, enq this 3341 * msg to the head. 3342 */ 3343 etm_msg_enq_head(fmd_hdl, iosvc, 3344 &msg_ele); 3345 (void) pthread_mutex_unlock( 3346 &iosvc->msg_q_lock); 3347 } 3348 3349 /* 3350 * 3351 * grab the lock to check the Q len again 3352 */ 3353 (void) pthread_mutex_lock(&iosvc->msg_q_lock); 3354 if (etm_is_dying || iosvc->thr_is_dying) { 3355 break; 3356 } 3357 } /* while dequeing iosvc msgs to send */ 3358 3359 /* 3360 * we have the mutex lock for msg_q_lock at this point 3361 * we are here because 3362 * 1) q_len == 0: then wait on the cv for Q to be filled 3363 * 2) etm_is_dying 3364 */ 3365 if (!etm_is_dying && !iosvc->thr_is_dying && 3366 iosvc->msg_q_cur_len == 0) { 3367 fmd_hdl_debug(fmd_hdl, 3368 "info: waiting on msg_q_cv\n"); 3369 (void) pthread_cond_wait(&iosvc->msg_q_cv, 3370 &iosvc->msg_q_lock); 3371 } 3372 (void) pthread_mutex_unlock(&iosvc->msg_q_lock); 3373 if (etm_is_dying || iosvc->thr_is_dying) { 3374 break; 3375 } 3376 } else { 3377 (void) etm_sleep(1); 3378 } /* wait for the start_sendingQ > 0 */ 3379 } /* etm_is_dying or thr_is_dying */ 3380 fmd_hdl_debug(fmd_hdl, "info; etm send thread exiting \n"); 3381 } /* etm_send_to_remote_root */ 3382 3383 3384 /* 3385 * receive etm msgs from the remote root ldom by calling ds_recv_msg() 3386 * if FMA events/ereports, call fmd_xprt_post() to post to fmd 3387 * send ACK back by calling ds_send_msg() 3388 */ 3389 static void 3390 etm_recv_from_remote_root(void *arg) 3391 { 3392 etm_iosvc_t *iosvc = (etm_iosvc_t *)arg; /* iosvc ptr */ 3393 etm_proto_v1_pp_t *pp; /* protocol preamble */ 3394 etm_proto_v1_ev_hdr_t *ev_hdrp; /* for FMA_EVENT msg */ 3395 etm_proto_v1_resp_hdr_t *resp_hdrp; /* for RESPONSE msg */ 3396 int32_t resp_code = 0; /* default is success */ 3397 int32_t rc; /* return value */ 3398 size_t maxlen = MAXLEN; 3399 /* max msg len */ 3400 char msgbuf[MAXLEN]; /* recv msg buf */ 3401 size_t msg_size; /* recv msg size */ 3402 size_t hdr_sz; /* sizeof *hdrp */ 3403 size_t evsz; /* sizeof *evp */ 3404 size_t fma_event_size; /* sizeof FMA event */ 3405 nvlist_t *evp; /* ptr to the nvlist */ 3406 char *buf; /* ptr to the nvlist */ 3407 static uint32_t mem_alloc = 0; /* indicate if alloc mem */ 3408 char *msg; /* ptr to alloc mem */ 3409 fmd_hdl_t *fmd_hdl = init_hdl; 3410 3411 3412 3413 fmd_hdl_debug(fmd_hdl, 3414 "info: recv from remote iosvc starting with ldom name %s \n", 3415 iosvc->ldom_name); 3416 3417 /* 3418 * loop forever until etm_is_dying or the thread is dying 3419 */ 3420 3421 msg = msgbuf; 3422 while (!etm_is_dying && !iosvc->thr_is_dying) { 3423 if (iosvc->ds_hdl == DS_INVALID_HDL) { 3424 fmd_hdl_debug(fmd_hdl, 3425 "info: ds_hdl is invalid in recv thr\n"); 3426 (void) etm_sleep(1); 3427 continue; 3428 } 3429 3430 /* 3431 * for now, there are FMA_EVENT and ACK msg type. 3432 * use FMA_EVENT buf as the maxlen, hdr+1 fma event. 3433 * FMA_EVENT is big enough to hold an ACK msg. 3434 * the actual msg size received is in msg_size. 3435 */ 3436 rc = (*etm_ds_recv_msg)(iosvc->ds_hdl, msg, maxlen, &msg_size); 3437 if (rc == EFBIG) { 3438 fmd_hdl_debug(fmd_hdl, 3439 "info: ds_recv_msg needs mem the size of %d\n", 3440 msg_size); 3441 msg = fmd_hdl_zalloc(fmd_hdl, msg_size, FMD_SLEEP); 3442 mem_alloc = 1; 3443 } else if (rc == 0) { 3444 fmd_hdl_debug(fmd_hdl, 3445 "info: ds_recv_msg received a msg ok\n"); 3446 /* 3447 * check the magic # in msg.hdr 3448 */ 3449 pp = (etm_proto_v1_pp_t *)((ptrdiff_t)msg); 3450 if (pp->pp_magic_num != ETM_PROTO_MAGIC_NUM) { 3451 fmd_hdl_debug(fmd_hdl, 3452 "info: bad ds recv on magic\n"); 3453 continue; 3454 } 3455 3456 /* 3457 * check the msg type against msg_size to be sure 3458 * that received msg is not a truncated msg 3459 */ 3460 if (pp->pp_msg_type == ETM_MSG_TYPE_FMA_EVENT) { 3461 3462 ev_hdrp = (etm_proto_v1_ev_hdr_t *) 3463 ((ptrdiff_t)msg); 3464 fmd_hdl_debug(fmd_hdl, "info: ds received " 3465 "FMA EVENT xid=%d msg_size=%d\n", 3466 ev_hdrp->ev_pp.pp_xid, msg_size); 3467 hdr_sz = sizeof (*ev_hdrp) + 3468 1*(sizeof (ev_hdrp->ev_lens[0])); 3469 fma_event_size = hdr_sz + ev_hdrp->ev_lens[0]; 3470 if (fma_event_size != msg_size) { 3471 fmd_hdl_debug(fmd_hdl, "info: wrong " 3472 "ev msg size received\n"); 3473 continue; 3474 /* 3475 * Simply do nothing. The send side 3476 * will timedcond_wait waiting on the 3477 * resp msg will timeout and 3478 * re-send the same msg. 3479 */ 3480 } 3481 if (etm_debug_lvl >= 3) { 3482 fmd_hdl_debug(fmd_hdl, "info: recv msg" 3483 " size %d hdrsz %d evp size %d\n", 3484 msg_size, hdr_sz, 3485 ev_hdrp->ev_lens[0]); 3486 } 3487 3488 if (ev_hdrp->ev_pp.pp_xid != 3489 iosvc->xid_posted_ev) { 3490 /* 3491 * different from last xid posted to 3492 * fmd, post to fmd now. 3493 */ 3494 buf = msg + hdr_sz; 3495 rc = nvlist_unpack(buf, 3496 ev_hdrp->ev_lens[0], &evp, 0); 3497 rc = nvlist_size(evp, &evsz, 3498 NV_ENCODE_XDR); 3499 fmd_hdl_debug(fmd_hdl, 3500 "info: evp size %d before fmd" 3501 "post\n", evsz); 3502 3503 if ((rc = etm_post_to_fmd(fmd_hdl, 3504 iosvc->fmd_xprt, evp)) >= 0) { 3505 fmd_hdl_debug(fmd_hdl, 3506 "info: xid posted to fmd %d" 3507 "\n", 3508 ev_hdrp->ev_pp.pp_xid); 3509 iosvc->xid_posted_ev = 3510 ev_hdrp->ev_pp.pp_xid; 3511 } 3512 } 3513 3514 /* 3515 * ready to send the RESPONSE msg back 3516 * reuse the msg buffer as the response buffer 3517 */ 3518 resp_hdrp = (etm_proto_v1_resp_hdr_t *) 3519 ((ptrdiff_t)msg); 3520 resp_hdrp->resp_pp.pp_msg_type = 3521 ETM_MSG_TYPE_RESPONSE; 3522 3523 resp_hdrp->resp_code = resp_code; 3524 resp_hdrp->resp_len = sizeof (*resp_hdrp); 3525 3526 /* 3527 * send the whole response msg in one send 3528 */ 3529 if ((*etm_ds_send_msg)(iosvc->ds_hdl, msg, 3530 sizeof (*resp_hdrp)) != 0) { 3531 fmd_hdl_debug(fmd_hdl, 3532 "info: send response msg failed\n"); 3533 } else { 3534 fmd_hdl_debug(fmd_hdl, 3535 "info: ds send resp msg ok" 3536 "size %d\n", sizeof (*resp_hdrp)); 3537 } 3538 } else if (pp->pp_msg_type == ETM_MSG_TYPE_RESPONSE) { 3539 fmd_hdl_debug(fmd_hdl, 3540 "info: ds received respond msg xid=%d" 3541 "msg_size=%d for ldom %s\n", pp->pp_xid, 3542 msg_size, iosvc->ldom_name); 3543 if (sizeof (*resp_hdrp) != msg_size) { 3544 fmd_hdl_debug(fmd_hdl, 3545 "info: wrong resp msg size" 3546 "received\n"); 3547 fmd_hdl_debug(fmd_hdl, 3548 "info: resp msg size %d recv resp" 3549 "msg size %d\n", 3550 sizeof (*resp_hdrp), msg_size); 3551 continue; 3552 } 3553 /* 3554 * is the pp.pp_xid == iosvc->cur_send_xid+1, 3555 * if so, nudge the send routine to send next 3556 */ 3557 if (pp->pp_xid != iosvc->cur_send_xid+1) { 3558 fmd_hdl_debug(fmd_hdl, 3559 "info: ds received resp msg xid=%d " 3560 "doesn't match cur_send_id=%d\n", 3561 pp->pp_xid, iosvc->cur_send_xid+1); 3562 continue; 3563 } 3564 (void) pthread_mutex_lock(&iosvc->msg_ack_lock); 3565 iosvc->ack_ok = 1; 3566 (void) pthread_cond_signal(&iosvc->msg_ack_cv); 3567 (void) pthread_mutex_unlock( 3568 &iosvc->msg_ack_lock); 3569 fmd_hdl_debug(fmd_hdl, 3570 "info: signaling msg_ack_cv\n"); 3571 } else { 3572 /* 3573 * place holder for future msg types 3574 */ 3575 fmd_hdl_debug(fmd_hdl, 3576 "info: ds received unrecognized msg\n"); 3577 } 3578 if (mem_alloc) { 3579 fmd_hdl_free(fmd_hdl, msg, msg_size); 3580 mem_alloc = 0; 3581 msg = msgbuf; 3582 } 3583 } else { 3584 if (etm_debug_lvl >= 3) { 3585 fmd_hdl_debug(fmd_hdl, 3586 "info: ds_recv_msg() failed\n"); 3587 } 3588 } /* ds_recv_msg() returns */ 3589 } /* etm_is_dying */ 3590 3591 /* 3592 * need to free the mem allocated in msg upon exiting the thread 3593 */ 3594 if (mem_alloc) { 3595 fmd_hdl_free(fmd_hdl, msg, msg_size); 3596 mem_alloc = 0; 3597 msg = msgbuf; 3598 } 3599 fmd_hdl_debug(fmd_hdl, "info; etm recv thread exiting \n"); 3600 } /* etm_recv_from_remote_root */ 3601 3602 3603 3604 /* 3605 * etm_ds_init 3606 * initialize DS services function pointers by calling 3607 * dlopen() followed by dlsym() for each ds func. 3608 * if any dlopen() or dlsym() call fails, return -ENOENT 3609 * return >0 for successs, -ENOENT for failure 3610 */ 3611 static int 3612 etm_ds_init(fmd_hdl_t *hdl) 3613 { 3614 int rc = 0; 3615 3616 if ((etm_dl_hdl = dlopen(etm_dl_path, etm_dl_mode)) == NULL) { 3617 fmd_hdl_debug(hdl, "error: failed to dlopen %s\n", etm_dl_path); 3618 return (-ENOENT); 3619 } 3620 3621 etm_ds_svc_reg = (int (*)(ds_capability_t *cap, ds_ops_t *ops)) 3622 dlsym(etm_dl_hdl, "ds_svc_reg"); 3623 if (etm_ds_svc_reg == NULL) { 3624 fmd_hdl_debug(hdl, 3625 "error: failed to dlsym ds_svc_reg() w/ error %s\n", 3626 dlerror()); 3627 rc = -ENOENT; 3628 } 3629 3630 3631 etm_ds_clnt_reg = (int (*)(ds_capability_t *cap, ds_ops_t *ops)) 3632 dlsym(etm_dl_hdl, "ds_clnt_reg"); 3633 if (etm_ds_clnt_reg == NULL) { 3634 fmd_hdl_debug(hdl, 3635 "error: dlsym(ds_clnt_reg) failed w/ errno %d\n", errno); 3636 rc = -ENOENT; 3637 } 3638 3639 etm_ds_send_msg = (int (*)(ds_hdl_t hdl, void *buf, size_t buflen)) 3640 dlsym(etm_dl_hdl, "ds_send_msg"); 3641 if (etm_ds_send_msg == NULL) { 3642 fmd_hdl_debug(hdl, "error: dlsym(ds_send_msg) failed\n"); 3643 rc = -ENOENT; 3644 } 3645 3646 etm_ds_recv_msg = (int (*)(ds_hdl_t hdl, void *buf, size_t buflen, 3647 size_t *msglen))dlsym(etm_dl_hdl, "ds_recv_msg"); 3648 if (etm_ds_recv_msg == NULL) { 3649 fmd_hdl_debug(hdl, "error: dlsym(ds_recv_msg) failed\n"); 3650 rc = -ENOENT; 3651 } 3652 3653 etm_ds_fini = (int (*)(void))dlsym(etm_dl_hdl, "ds_fini"); 3654 if (etm_ds_fini == NULL) { 3655 fmd_hdl_debug(hdl, "error: dlsym(ds_fini) failed\n"); 3656 rc = -ENOENT; 3657 } 3658 3659 if (rc == -ENOENT) { 3660 (void) dlclose(etm_dl_hdl); 3661 } 3662 return (rc); 3663 3664 } /* etm_ds_init() */ 3665 3666 3667 /* 3668 * -------------------------- FMD entry points ------------------------------- 3669 */ 3670 3671 /* 3672 * _fmd_init - initialize the transport for use by ETM and start the 3673 * server daemon to accept new connections to us 3674 * 3675 * FMD will read our *.conf and subscribe us to FMA events 3676 */ 3677 3678 void 3679 _fmd_init(fmd_hdl_t *hdl) 3680 { 3681 struct timeval tmv; /* timeval */ 3682 ssize_t n; /* gen use */ 3683 const struct facility *fp; /* syslog facility matching */ 3684 char *facname; /* syslog facility property */ 3685 uint32_t type_mask; /* type of the local host */ 3686 int rc; /* funcs return code */ 3687 3688 3689 if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) { 3690 return; /* invalid data in configuration file */ 3691 } 3692 3693 fmd_hdl_debug(hdl, "info: module initializing\n"); 3694 3695 init_hdl = hdl; 3696 etm_lhp = ldom_init(etm_init_alloc, etm_init_free); 3697 3698 /* 3699 * decide the ldom type, do initialization accordingly 3700 */ 3701 if ((rc = ldom_get_type(etm_lhp, &type_mask)) != 0) { 3702 fmd_hdl_debug(hdl, "error: can't decide ldom type\n"); 3703 fmd_hdl_debug(hdl, "info: module unregistering\n"); 3704 ldom_fini(etm_lhp); 3705 fmd_hdl_unregister(hdl); 3706 return; 3707 } 3708 3709 if ((type_mask & LDOM_TYPE_LEGACY) || (type_mask & LDOM_TYPE_CONTROL)) { 3710 if (type_mask & LDOM_TYPE_LEGACY) { 3711 /* 3712 * running on a legacy sun4v domain, 3713 * act as the the old sun4v 3714 */ 3715 etm_ldom_type = LDOM_TYPE_LEGACY; 3716 fmd_hdl_debug(hdl, "info: running as the old sun4v\n"); 3717 ldom_fini(etm_lhp); 3718 } else if (type_mask & LDOM_TYPE_CONTROL) { 3719 etm_ldom_type = LDOM_TYPE_CONTROL; 3720 fmd_hdl_debug(hdl, "info: running as control domain\n"); 3721 3722 /* 3723 * looking for libds.so.1. 3724 * If not found, don't do DS registration. As a result, 3725 * there will be no DS callbacks or other DS services. 3726 */ 3727 if (etm_ds_init(hdl) >= 0) { 3728 etm_filter_init(hdl); 3729 etm_ckpt_init(hdl); 3730 3731 flags = FMD_XPRT_RDWR | FMD_XPRT_ACCEPT; 3732 3733 /* 3734 * ds client registration 3735 */ 3736 if ((rc = (*etm_ds_clnt_reg)(&iosvc_caps, 3737 &iosvc_ops))) { 3738 fmd_hdl_debug(hdl, 3739 "error: ds_clnt_reg(): errno %d\n", rc); 3740 } 3741 } else { 3742 fmd_hdl_debug(hdl, "error: dlopen() libds " 3743 "failed, continue without the DS services"); 3744 } 3745 3746 /* 3747 * register for ldom status events 3748 */ 3749 if ((rc = ldom_register_event(etm_lhp, 3750 ldom_event_handler, hdl))) { 3751 fmd_hdl_debug(hdl, 3752 "error: ldom_register_event():" 3753 " errno %d\n", rc); 3754 } 3755 3756 /* 3757 * create the thread for handling both the ldom status 3758 * change and service events 3759 */ 3760 etm_async_e_tid = fmd_thr_create(hdl, 3761 etm_async_event_handler, hdl); 3762 } 3763 3764 /* setup statistics and properties from FMD */ 3765 3766 (void) fmd_stat_create(hdl, FMD_STAT_NOALLOC, 3767 sizeof (etm_stats) / sizeof (fmd_stat_t), 3768 (fmd_stat_t *)&etm_stats); 3769 3770 etm_fma_resp_wait_time = fmd_prop_get_int32(hdl, 3771 ETM_PROP_NM_FMA_RESP_WAIT_TIME); 3772 etm_debug_lvl = fmd_prop_get_int32(hdl, ETM_PROP_NM_DEBUG_LVL); 3773 etm_debug_max_ev_cnt = fmd_prop_get_int32(hdl, 3774 ETM_PROP_NM_DEBUG_MAX_EV_CNT); 3775 fmd_hdl_debug(hdl, "info: etm_debug_lvl %d " 3776 "etm_debug_max_ev_cnt %d\n", etm_debug_lvl, 3777 etm_debug_max_ev_cnt); 3778 3779 etm_resp_q_max_len = fmd_prop_get_int32(hdl, 3780 ETM_PROP_NM_MAX_RESP_Q_LEN); 3781 etm_stats.etm_resp_q_max_len.fmds_value.ui64 = 3782 etm_resp_q_max_len; 3783 etm_bad_acc_to_sec = fmd_prop_get_int32(hdl, 3784 ETM_PROP_NM_BAD_ACC_TO_SEC); 3785 3786 /* 3787 * obtain an FMD transport handle so we can post 3788 * FMA events later 3789 */ 3790 3791 etm_fmd_xprt = fmd_xprt_open(hdl, FMD_XPRT_RDONLY, NULL, NULL); 3792 3793 /* 3794 * encourage protocol transaction id to be unique per module 3795 * load 3796 */ 3797 3798 (void) gettimeofday(&tmv, NULL); 3799 etm_xid_cur = (uint32_t)((tmv.tv_sec << 10) | 3800 ((unsigned long)tmv.tv_usec >> 10)); 3801 3802 /* init the ETM transport */ 3803 3804 if ((n = etm_xport_init(hdl)) != 0) { 3805 fmd_hdl_error(hdl, "error: bad xport init errno %d\n", 3806 (-n)); 3807 fmd_hdl_unregister(hdl); 3808 return; 3809 } 3810 3811 /* 3812 * Cache any properties we use every time we receive an alert. 3813 */ 3814 syslog_file = fmd_prop_get_int32(hdl, ETM_PROP_NM_SYSLOGD); 3815 syslog_cons = fmd_prop_get_int32(hdl, ETM_PROP_NM_CONSOLE); 3816 3817 if (syslog_file && (syslog_logfd = open("/dev/conslog", 3818 O_WRONLY | O_NOCTTY)) == -1) { 3819 fmd_hdl_error(hdl, 3820 "error: failed to open /dev/conslog"); 3821 syslog_file = 0; 3822 } 3823 3824 if (syslog_cons && (syslog_msgfd = open("/dev/sysmsg", 3825 O_WRONLY | O_NOCTTY)) == -1) { 3826 fmd_hdl_error(hdl, "error: failed to open /dev/sysmsg"); 3827 syslog_cons = 0; 3828 } 3829 3830 if (syslog_file) { 3831 /* 3832 * Look up the value of the "facility" property and 3833 * use it to determine * what syslog LOG_* facility 3834 * value we use to fill in our log_ctl_t. 3835 */ 3836 facname = fmd_prop_get_string(hdl, 3837 ETM_PROP_NM_FACILITY); 3838 3839 for (fp = syslog_facs; fp->fac_name != NULL; fp++) { 3840 if (strcmp(fp->fac_name, facname) == 0) 3841 break; 3842 } 3843 3844 if (fp->fac_name == NULL) { 3845 fmd_hdl_error(hdl, "error: invalid 'facility'" 3846 " setting: %s\n", facname); 3847 syslog_file = 0; 3848 } else { 3849 syslog_facility = fp->fac_value; 3850 syslog_ctl.flags = SL_CONSOLE | SL_LOGONLY; 3851 } 3852 3853 fmd_prop_free_string(hdl, facname); 3854 } 3855 3856 /* 3857 * start the message responder and the connection acceptance 3858 * server; request protocol version be negotiated after waiting 3859 * a second for the receiver to be ready to start handshaking 3860 */ 3861 3862 etm_resp_tid = fmd_thr_create(hdl, etm_responder, hdl); 3863 etm_svr_tid = fmd_thr_create(hdl, etm_server, hdl); 3864 3865 (void) etm_sleep(ETM_SLEEP_QUIK); 3866 etm_req_ver_negot(hdl); 3867 3868 } else if (type_mask & LDOM_TYPE_ROOT) { 3869 etm_ldom_type = LDOM_TYPE_ROOT; 3870 fmd_hdl_debug(hdl, "info: running as root domain\n"); 3871 3872 /* 3873 * looking for libds.so.1. 3874 * If not found, don't do DS registration. As a result, 3875 * there will be no DS callbacks or other DS services. 3876 */ 3877 if (etm_ds_init(hdl) < 0) { 3878 fmd_hdl_debug(hdl, 3879 "error: dlopen() libds failed, " 3880 "module unregistering\n"); 3881 ldom_fini(etm_lhp); 3882 fmd_hdl_unregister(hdl); 3883 return; 3884 } 3885 3886 /* 3887 * DS service registration 3888 */ 3889 if ((rc = (*etm_ds_svc_reg)(&iosvc_caps, &iosvc_ops))) { 3890 fmd_hdl_debug(hdl, "error: ds_svc_reg(): errno %d\n", 3891 rc); 3892 } 3893 3894 /* 3895 * this thread is created for ds_reg_cb/ds_unreg_cb 3896 */ 3897 etm_async_e_tid = fmd_thr_create(hdl, 3898 etm_async_event_handler, hdl); 3899 3900 flags = FMD_XPRT_RDWR; 3901 } else if ((type_mask & LDOM_TYPE_IO) || (type_mask == 0)) { 3902 /* 3903 * Do not load this module if it is 3904 * . runing on a non-root ldom 3905 * . the domain owns no io devices 3906 */ 3907 fmd_hdl_debug(hdl, 3908 "info: non-root ldom, module unregistering\n"); 3909 ldom_fini(etm_lhp); 3910 fmd_hdl_unregister(hdl); 3911 return; 3912 } else { 3913 /* 3914 * place holder, all other cases. unload etm for now 3915 */ 3916 fmd_hdl_debug(hdl, 3917 "info: other ldom type, module unregistering\n"); 3918 ldom_fini(etm_lhp); 3919 fmd_hdl_unregister(hdl); 3920 return; 3921 } 3922 3923 fmd_hdl_debug(hdl, "info: module initialized ok\n"); 3924 3925 } /* _fmd_init() */ 3926 3927 /* 3928 * etm_recv - receive an FMA event from FMD and transport it 3929 * to the remote endpoint 3930 */ 3931 3932 /*ARGSUSED*/ 3933 void 3934 etm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *evp, const char *class) 3935 { 3936 etm_xport_addr_t *addrv; /* vector of transport addresses */ 3937 etm_xport_conn_t conn; /* connection handle */ 3938 etm_proto_v1_ev_hdr_t *hdrp; /* for FMA_EVENT msg */ 3939 ssize_t i, n; /* gen use */ 3940 size_t sz; /* header size */ 3941 size_t buflen; /* size of packed FMA event */ 3942 uint8_t *buf; /* tmp buffer for packed FMA event */ 3943 3944 /* 3945 * if this is running on a Root Domain, ignore the events, 3946 * return right away 3947 */ 3948 if (etm_ldom_type == LDOM_TYPE_ROOT) 3949 return; 3950 3951 buflen = 0; 3952 if ((n = nvlist_size(evp, &buflen, NV_ENCODE_XDR)) != 0) { 3953 fmd_hdl_error(hdl, "error: FMA event dropped: " 3954 "event size errno %d class %s\n", n, class); 3955 etm_stats.etm_os_nvlist_size_fail.fmds_value.ui64++; 3956 etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; 3957 return; 3958 } 3959 3960 fmd_hdl_debug(hdl, "info: rcvd event %p from FMD\n", evp); 3961 fmd_hdl_debug(hdl, "info: cnt %llu class %s\n", 3962 etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64, class); 3963 3964 etm_stats.etm_rd_fmd_bytes.fmds_value.ui64 += buflen; 3965 etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64++; 3966 3967 /* 3968 * if the debug limit has been set, avoid excessive traffic, 3969 * for example, an infinite cycle using loopback nodes 3970 */ 3971 3972 if ((etm_debug_max_ev_cnt >= 0) && 3973 (etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64 > 3974 etm_debug_max_ev_cnt)) { 3975 fmd_hdl_debug(hdl, "warning: FMA event dropped: " 3976 "event %p cnt %llu > debug max %d\n", evp, 3977 etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64, 3978 etm_debug_max_ev_cnt); 3979 etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; 3980 return; 3981 } 3982 3983 /* allocate a buffer for the FMA event and nvlist pack it */ 3984 3985 buf = fmd_hdl_zalloc(hdl, buflen, FMD_SLEEP); 3986 3987 /* 3988 * increment the ttl value if the event is from remote (a root domain) 3989 * uncomment this when enabling fault forwarding from Root domains 3990 * to Control domain. 3991 * 3992 * uint8_t ttl; 3993 * if (fmd_event_local(hdl, evp) != FMD_EVF_LOCAL) { 3994 * if (nvlist_lookup_uint8(evp, FMD_EVN_TTL, &ttl) == 0) { 3995 * (void) nvlist_remove(evp, FMD_EVN_TTL, DATA_TYPE_UINT8); 3996 * (void) nvlist_add_uint8(evp, FMD_EVN_TTL, ttl + 1); 3997 * } 3998 * } 3999 */ 4000 4001 if ((n = nvlist_pack(evp, (char **)&buf, &buflen, 4002 NV_ENCODE_XDR, 0)) != 0) { 4003 fmd_hdl_error(hdl, "error: FMA event dropped: " 4004 "event pack errno %d class %s\n", n, class); 4005 etm_stats.etm_os_nvlist_pack_fail.fmds_value.ui64++; 4006 etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; 4007 fmd_hdl_free(hdl, buf, buflen); 4008 return; 4009 } 4010 4011 /* get vector of dst addrs and send the FMA event to each one */ 4012 4013 if ((addrv = etm_xport_get_ev_addrv(hdl, evp)) == NULL) { 4014 fmd_hdl_error(hdl, "error: FMA event dropped: " 4015 "bad event dst addrs errno %d\n", errno); 4016 etm_stats.etm_xport_get_ev_addrv_fail.fmds_value.ui64++; 4017 etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; 4018 fmd_hdl_free(hdl, buf, buflen); 4019 return; 4020 } 4021 4022 for (i = 0; addrv[i] != NULL; i++) { 4023 4024 /* open a new connection to this dst addr */ 4025 4026 if ((n = etm_conn_open(hdl, "FMA event dropped: " 4027 "bad conn open on new ev", addrv[i], &conn)) < 0) { 4028 etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; 4029 continue; 4030 } 4031 4032 (void) pthread_mutex_lock(&etm_write_lock); 4033 4034 /* write the ETM message header */ 4035 4036 if ((hdrp = etm_hdr_write(hdl, conn, evp, NV_ENCODE_XDR, 4037 &sz)) == NULL) { 4038 (void) pthread_mutex_unlock(&etm_write_lock); 4039 fmd_hdl_error(hdl, "error: FMA event dropped: " 4040 "bad hdr write errno %d\n", errno); 4041 (void) etm_conn_close(hdl, 4042 "bad conn close per bad hdr wr", conn); 4043 etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; 4044 continue; 4045 } 4046 4047 fmd_hdl_free(hdl, hdrp, sz); /* header not needed */ 4048 etm_stats.etm_wr_hdr_fmaevent.fmds_value.ui64++; 4049 fmd_hdl_debug(hdl, "info: hdr xport write ok for event %p\n", 4050 evp); 4051 4052 /* write the ETM message body, ie, the packed nvlist */ 4053 4054 if ((n = etm_io_op(hdl, "FMA event dropped: " 4055 "bad io write on event", conn, 4056 buf, buflen, ETM_IO_OP_WR)) < 0) { 4057 (void) pthread_mutex_unlock(&etm_write_lock); 4058 (void) etm_conn_close(hdl, 4059 "bad conn close per bad body wr", conn); 4060 etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; 4061 continue; 4062 } 4063 4064 (void) pthread_mutex_unlock(&etm_write_lock); 4065 4066 etm_stats.etm_wr_body_fmaevent.fmds_value.ui64++; 4067 etm_stats.etm_wr_xport_bytes.fmds_value.ui64 += buflen; 4068 fmd_hdl_debug(hdl, "info: body xport write ok for event %p\n", 4069 evp); 4070 4071 /* close the connection */ 4072 4073 (void) etm_conn_close(hdl, "bad conn close after event send", 4074 conn); 4075 } /* foreach dst addr in the vector */ 4076 4077 etm_xport_free_addrv(hdl, addrv); 4078 fmd_hdl_free(hdl, buf, buflen); 4079 4080 } /* etm_recv() */ 4081 4082 4083 /* 4084 * etm_send - receive an FMA event from FMD and enQ it in the iosvc.Q. 4085 * etm_send_to_remote_root() deQ and xprt the FMA events to a 4086 * remote root domain 4087 * return FMD_SEND_SUCCESS for success, 4088 * FMD_SEND_FAILED for error 4089 */ 4090 4091 /*ARGSUSED*/ 4092 int 4093 etm_send(fmd_hdl_t *fmd_hdl, fmd_xprt_t *xp, fmd_event_t *ep, nvlist_t *nvl) 4094 { 4095 uint32_t pack_it; /* whether to pack/enq the event */ 4096 etm_pack_msg_type_t msg_type; 4097 /* tell etm_pack_ds_msg() what to do */ 4098 etm_iosvc_t *iosvc; /* ptr to cur iosvc struct */ 4099 char *class; /* nvlist class name */ 4100 4101 pack_it = 1; 4102 msg_type = FMD_XPRT_OTHER_MSG; 4103 4104 (void) nvlist_lookup_string(nvl, FM_CLASS, &class); 4105 if (class == NULL) { 4106 pack_it = 0; 4107 } else { 4108 if (etm_debug_lvl >= 1) { 4109 fmd_hdl_debug(fmd_hdl, 4110 "info: evp class= %s in etm_send\n", class); 4111 } 4112 4113 if (etm_ldom_type == LDOM_TYPE_CONTROL) { 4114 iosvc = 4115 (etm_iosvc_t *)fmd_xprt_getspecific(fmd_hdl, xp); 4116 4117 /* 4118 * check the flag FORWARDING_FAULTS_TO_CONTROL to 4119 * decide if or not to drop fault subscription 4120 * control msgs 4121 */ 4122 if (strcmp(class, "resource.fm.xprt.subscribe") == 0) { 4123 pack_it = 0; 4124 /* 4125 * if (FORWARDING_FAULTS_TO_CONTROL == 1) { 4126 * (void) nvlist_lookup_string(nvl, 4127 * FM_RSRC_XPRT_SUBCLASS, &subclass); 4128 * if (strcmp(subclass, "list.suspect") 4129 * == 0) { 4130 * pack_it = 1; 4131 * msg_action = FMD_XPRT_OTHER_MSG; 4132 * } 4133 * if (strcmp(subclass, "list.repaired") 4134 * == 0) { 4135 * pack_it = 1; 4136 * msg_action = FMD_XPRT_OTHER_MSG; 4137 * } 4138 * } 4139 */ 4140 } 4141 if (strcmp(class, "resource.fm.xprt.run") == 0) { 4142 pack_it = 1; 4143 msg_type = FMD_XPRT_RUN_MSG; 4144 } 4145 } else { /* has to be the root domain ldom */ 4146 iosvc = &io_svc; 4147 /* 4148 * drop all ereport and fault subscriptions 4149 * are we dropping too much here, more than just ereport 4150 * and fault subscriptions? need to check 4151 */ 4152 if (strcmp(class, "resource.fm.xprt.subscribe") == 0) 4153 pack_it = 0; 4154 if (strcmp(class, "resource.fm.xprt.run") == 0) { 4155 pack_it = 1; 4156 msg_type = FMD_XPRT_RUN_MSG; 4157 } 4158 } 4159 } 4160 4161 if (pack_it) { 4162 if (etm_debug_lvl >= 1) { 4163 fmd_hdl_debug(fmd_hdl, 4164 "info: ldom name returned from xprt get specific=" 4165 "%s xprt=%lld\n", iosvc->ldom_name, xp); 4166 } 4167 /* 4168 * pack the etm msg for the DS library and enq in io_svc->Q 4169 * when the hdrp is NULL, the packing func will use the static 4170 * iosvc_hdr 4171 */ 4172 (void) etm_pack_ds_msg(fmd_hdl, iosvc, NULL, 0, nvl, msg_type, 4173 ETM_CKPT_NOOP); 4174 } 4175 4176 return (FMD_SEND_SUCCESS); 4177 4178 } /* etm_send() */ 4179 4180 4181 4182 /* 4183 * _fmd_fini - stop the server daemon and teardown the transport 4184 */ 4185 4186 void 4187 _fmd_fini(fmd_hdl_t *hdl) 4188 { 4189 ssize_t n; /* gen use */ 4190 etm_iosvc_t *iosvc; /* ptr to insvc struct */ 4191 etm_iosvc_q_ele_t msg_ele; /* iosvc msg ele */ 4192 uint32_t i; /* for loop var */ 4193 4194 fmd_hdl_debug(hdl, "info: module finalizing\n"); 4195 4196 /* kill the connection server and responder ; wait for them to die */ 4197 4198 etm_is_dying = 1; 4199 4200 if (etm_svr_tid != NULL) { 4201 fmd_thr_signal(hdl, etm_svr_tid); 4202 fmd_thr_destroy(hdl, etm_svr_tid); 4203 etm_svr_tid = NULL; 4204 } /* if server thread was successfully created */ 4205 4206 if (etm_resp_tid != NULL) { 4207 fmd_thr_signal(hdl, etm_resp_tid); 4208 fmd_thr_destroy(hdl, etm_resp_tid); 4209 etm_resp_tid = NULL; 4210 } /* if responder thread was successfully created */ 4211 4212 if (etm_async_e_tid != NULL) { 4213 fmd_thr_signal(hdl, etm_async_e_tid); 4214 fmd_thr_destroy(hdl, etm_async_e_tid); 4215 etm_async_e_tid = NULL; 4216 } /* if async event handler thread was successfully created */ 4217 4218 4219 if ((etm_ldom_type == LDOM_TYPE_LEGACY) || 4220 (etm_ldom_type == LDOM_TYPE_CONTROL)) { 4221 4222 /* teardown the transport and cleanup syslogging */ 4223 if ((n = etm_xport_fini(hdl)) != 0) { 4224 fmd_hdl_error(hdl, "warning: xport fini errno %d\n", 4225 (-n)); 4226 } 4227 if (etm_fmd_xprt != NULL) { 4228 fmd_xprt_close(hdl, etm_fmd_xprt); 4229 } 4230 4231 if (syslog_logfd != -1) { 4232 (void) close(syslog_logfd); 4233 } 4234 if (syslog_msgfd != -1) { 4235 (void) close(syslog_msgfd); 4236 } 4237 } 4238 4239 if (etm_ldom_type == LDOM_TYPE_CONTROL) { 4240 if (ldom_unregister_event(etm_lhp)) 4241 fmd_hdl_debug(hdl, "ldom_unregister_event() failed\n"); 4242 4243 /* 4244 * On control domain side, there may be multiple iosvc struct 4245 * in use, one for each bound/active domain. Each struct 4246 * manages a queue of fma events destined to the root domain. 4247 * Need to go thru every iosvc struct to clean up its resources. 4248 */ 4249 for (i = 0; i < NUM_OF_ROOT_DOMAINS; i++) { 4250 if (iosvc_list[i].ldom_name[0] != '\0') { 4251 /* 4252 * found an iosvc struct for a root domain 4253 */ 4254 iosvc = &iosvc_list[i]; 4255 (void) pthread_mutex_lock(&iosvc_list_lock); 4256 etm_iosvc_cleanup(hdl, iosvc, B_TRUE, B_FALSE); 4257 (void) pthread_mutex_unlock(&iosvc_list_lock); 4258 4259 } else { 4260 /* 4261 * reach the end of existing iosvc structures 4262 */ 4263 continue; 4264 } 4265 } /* for i<NUM_OF_ROOT_DOMAINS */ 4266 etm_ckpt_fini(hdl); 4267 etm_filter_fini(hdl); 4268 4269 ldom_fini(etm_lhp); 4270 4271 } else if (etm_ldom_type == LDOM_TYPE_ROOT) { 4272 /* 4273 * On root domain side, there is only one iosvc struct in use. 4274 */ 4275 iosvc = &io_svc; 4276 if (iosvc->send_tid != NULL) { 4277 fmd_thr_signal(hdl, iosvc->send_tid); 4278 fmd_thr_destroy(hdl, iosvc->send_tid); 4279 iosvc->send_tid = NULL; 4280 } /* if io svc send thread was successfully created */ 4281 4282 if (iosvc->recv_tid != NULL) { 4283 fmd_thr_signal(hdl, iosvc->recv_tid); 4284 fmd_thr_destroy(hdl, iosvc->recv_tid); 4285 iosvc->recv_tid = NULL; 4286 } /* if io svc receive thread was successfully created */ 4287 4288 (void) pthread_mutex_lock(&iosvc->msg_q_lock); 4289 while (iosvc->msg_q_cur_len > 0) { 4290 (void) etm_iosvc_msg_deq(hdl, iosvc, &msg_ele); 4291 fmd_hdl_free(hdl, msg_ele.msg, msg_ele.msg_size); 4292 } 4293 (void) pthread_mutex_unlock(&iosvc->msg_q_lock); 4294 4295 if (iosvc->fmd_xprt != NULL) 4296 fmd_xprt_close(hdl, iosvc->fmd_xprt); 4297 ldom_fini(etm_lhp); 4298 } 4299 if (etm_ds_fini) { 4300 (*etm_ds_fini)(); 4301 (void) dlclose(etm_dl_hdl); 4302 } 4303 4304 fmd_hdl_debug(hdl, "info: module finalized ok\n"); 4305 4306 } /* _fmd_fini() */ 4307