1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * etm.c FMA Event Transport Module implementation, a plugin of FMD 29 * for sun4v/Ontario 30 * 31 * plugin for sending/receiving FMA events to/from service processor 32 */ 33 34 /* 35 * --------------------------------- includes -------------------------------- 36 */ 37 38 #include <sys/fm/protocol.h> 39 #include <sys/fm/util.h> 40 #include <sys/fm/ldom.h> 41 #include <sys/strlog.h> 42 #include <sys/syslog.h> 43 #include <sys/libds.h> 44 #include <netinet/in.h> 45 #include <fm/fmd_api.h> 46 47 #include "etm_xport_api.h" 48 #include "etm_etm_proto.h" 49 #include "etm_impl.h" 50 #include "etm_iosvc.h" 51 #include "etm_filter.h" 52 #include "etm_ckpt.h" 53 54 #include <pthread.h> 55 #include <signal.h> 56 #include <stropts.h> 57 #include <locale.h> 58 #include <strings.h> 59 #include <stdlib.h> 60 #include <unistd.h> 61 #include <limits.h> 62 #include <values.h> 63 #include <alloca.h> 64 #include <errno.h> 65 #include <dlfcn.h> 66 #include <link.h> 67 #include <fcntl.h> 68 #include <time.h> 69 70 /* 71 * ----------------------------- forward decls ------------------------------- 72 */ 73 74 static void 75 etm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class); 76 77 static int 78 etm_send(fmd_hdl_t *hdl, fmd_xprt_t *xp, fmd_event_t *event, nvlist_t *nvl); 79 80 static void 81 etm_send_to_remote_root(void *arg); 82 83 static void 84 etm_recv_from_remote_root(void *arg); 85 86 /* 87 * ------------------------- data structs for FMD ---------------------------- 88 */ 89 90 static fmd_hdl_ops_t fmd_ops = { 91 etm_recv, /* fmdo_recv */ 92 NULL, /* fmdo_timeout */ 93 NULL, /* fmdo_close */ 94 NULL, /* fmdo_stats */ 95 NULL, /* fmdo_gc */ 96 etm_send, /* fmdo_send */ 97 }; 98 99 static const fmd_prop_t fmd_props[] = { 100 { ETM_PROP_NM_XPORT_ADDRS, FMD_TYPE_STRING, "" }, 101 { ETM_PROP_NM_DEBUG_LVL, FMD_TYPE_INT32, "0" }, 102 { ETM_PROP_NM_DEBUG_MAX_EV_CNT, FMD_TYPE_INT32, "-1" }, 103 { ETM_PROP_NM_CONSOLE, FMD_TYPE_BOOL, "false" }, 104 { ETM_PROP_NM_SYSLOGD, FMD_TYPE_BOOL, "true" }, 105 { ETM_PROP_NM_FACILITY, FMD_TYPE_STRING, "LOG_DAEMON" }, 106 { ETM_PROP_NM_MAX_RESP_Q_LEN, FMD_TYPE_UINT32, "512" }, 107 { ETM_PROP_NM_BAD_ACC_TO_SEC, FMD_TYPE_UINT32, "1" }, 108 { ETM_PROP_NM_FMA_RESP_WAIT_TIME, FMD_TYPE_INT32, "240" }, 109 { NULL, 0, NULL } 110 }; 111 112 113 static const fmd_hdl_info_t fmd_info = { 114 "FMA Event Transport Module", "1.2", &fmd_ops, fmd_props 115 }; 116 117 /* 118 * ----------------------- private consts and defns -------------------------- 119 */ 120 121 /* misc buffer for variable sized protocol header fields */ 122 123 #define ETM_MISC_BUF_SZ (4 * 1024) 124 125 static uint32_t 126 etm_ldom_type = LDOM_TYPE_LEGACY; 127 128 /* try limit for IO operations w/ capped exp backoff sleep on retry */ 129 130 /* 131 * Design_Note: ETM will potentially retry forever IO operations that the 132 * transport fails with EAGAIN (aka EWOULDBLOCK) rather than 133 * giving up after some number of seconds. This avoids 134 * dropping FMA events while the service processor is down, 135 * but at the risk of pending fmdo_recv() forever and 136 * overflowing FMD's event queue for ETM. 137 * A future TBD enhancement would be to always recv 138 * and send each ETM msg in a single read/write() to reduce 139 * the risk of failure between ETM msg hdr and body, 140 * assuming the MTU_SZ is large enough. 141 */ 142 143 #define ETM_TRY_MAX_CNT (MAXINT - 1) 144 #define ETM_TRY_BACKOFF_RATE (4) 145 #define ETM_TRY_BACKOFF_CAP (60) 146 147 /* amount to increment protocol transaction id on each new send */ 148 149 #define ETM_XID_INC (2) 150 151 typedef struct etm_resp_q_ele { 152 153 etm_xport_conn_t rqe_conn; /* open connection to send on */ 154 etm_proto_v1_pp_t *rqe_hdrp; /* ptr to ETM msg hdr */ 155 size_t rqe_hdr_sz; /* sizeof ETM msg hdr */ 156 int32_t rqe_resp_code; /* response code to send */ 157 158 struct etm_resp_q_ele *rqe_nextp; /* PRIVATE - next ele ptr */ 159 160 } etm_resp_q_ele_t; /* responder queue element */ 161 162 /* 163 * ---------------------------- global data ---------------------------------- 164 */ 165 166 static fmd_hdl_t 167 *init_hdl = NULL; /* used in mem allocator and several other places */ 168 169 static int 170 etm_debug_lvl = 0; /* debug level: 0 is off, 1 is on, 2 is more, etc */ 171 172 static int 173 etm_debug_max_ev_cnt = -1; /* max allowed event count for debugging */ 174 175 static fmd_xprt_t 176 *etm_fmd_xprt = NULL; /* FMD transport layer handle */ 177 178 static pthread_t 179 etm_svr_tid = NULL; /* thread id of connection acceptance server */ 180 181 static pthread_t 182 etm_resp_tid = NULL; /* thread id of msg responder */ 183 184 static etm_resp_q_ele_t 185 *etm_resp_q_head = NULL; /* ptr to cur head of responder queue */ 186 187 static etm_resp_q_ele_t 188 *etm_resp_q_tail = NULL; /* ptr to cur tail of responder queue */ 189 190 static uint32_t 191 etm_resp_q_cur_len = 0; /* cur length (ele cnt) of responder queue */ 192 193 static uint32_t 194 etm_resp_q_max_len = 0; /* max length (ele cnt) of responder queue */ 195 196 static uint32_t 197 etm_bad_acc_to_sec = 0; /* sleep timeout (in sec) after bad conn accept */ 198 199 static pthread_mutex_t 200 etm_resp_q_lock = PTHREAD_MUTEX_INITIALIZER; /* protects responder queue */ 201 202 static pthread_cond_t 203 etm_resp_q_cv = PTHREAD_COND_INITIALIZER; /* nudges msg responder */ 204 205 static volatile int 206 etm_is_dying = 0; /* bool for dying (killing self) */ 207 208 static uint32_t 209 etm_xid_cur = 0; /* current transaction id for sends */ 210 211 static uint32_t 212 etm_xid_ping = 0; /* xid of last CONTROL msg sent requesting ping */ 213 214 static uint32_t 215 etm_xid_ver_negot = 0; /* xid of last CONTROL msg sent requesting ver negot */ 216 217 static uint32_t 218 etm_xid_posted_logged_ev = 0; 219 /* xid of last FMA_EVENT msg/event posted OK to FMD */ 220 221 static uint32_t 222 etm_xid_posted_sa = 0; /* xid of last ALERT msg/event posted OK to syslog */ 223 224 static uint8_t 225 etm_resp_ver = ETM_PROTO_V1; /* proto ver [negotiated] for msg sends */ 226 227 static uint32_t 228 etm_fma_resp_wait_time = 30; /* time (sec) wait for fma event resp */ 229 230 static pthread_mutex_t 231 etm_write_lock = PTHREAD_MUTEX_INITIALIZER; /* for write operations */ 232 233 static log_ctl_t syslog_ctl; /* log(7D) meta-data for each msg */ 234 static int syslog_facility; /* log(7D) facility (part of priority) */ 235 static int syslog_logfd = -1; /* log(7D) file descriptor */ 236 static int syslog_msgfd = -1; /* sysmsg(7D) file descriptor */ 237 static int syslog_file = 0; /* log to syslog_logfd */ 238 static int syslog_cons = 0; /* log to syslog_msgfd */ 239 240 static const struct facility { 241 const char *fac_name; 242 int fac_value; 243 } syslog_facs[] = { 244 { "LOG_DAEMON", LOG_DAEMON }, 245 { "LOG_LOCAL0", LOG_LOCAL0 }, 246 { "LOG_LOCAL1", LOG_LOCAL1 }, 247 { "LOG_LOCAL2", LOG_LOCAL2 }, 248 { "LOG_LOCAL3", LOG_LOCAL3 }, 249 { "LOG_LOCAL4", LOG_LOCAL4 }, 250 { "LOG_LOCAL5", LOG_LOCAL5 }, 251 { "LOG_LOCAL6", LOG_LOCAL6 }, 252 { "LOG_LOCAL7", LOG_LOCAL7 }, 253 { NULL, 0 } 254 }; 255 256 static struct stats { 257 258 /* ETM msg counters */ 259 260 fmd_stat_t etm_rd_hdr_fmaevent; 261 fmd_stat_t etm_rd_hdr_control; 262 fmd_stat_t etm_rd_hdr_alert; 263 fmd_stat_t etm_rd_hdr_response; 264 fmd_stat_t etm_rd_body_fmaevent; 265 fmd_stat_t etm_rd_body_control; 266 fmd_stat_t etm_rd_body_alert; 267 fmd_stat_t etm_rd_body_response; 268 fmd_stat_t etm_wr_hdr_fmaevent; 269 fmd_stat_t etm_wr_hdr_control; 270 fmd_stat_t etm_wr_hdr_response; 271 fmd_stat_t etm_wr_body_fmaevent; 272 fmd_stat_t etm_wr_body_control; 273 fmd_stat_t etm_wr_body_response; 274 275 fmd_stat_t etm_rd_max_ev_per_msg; 276 fmd_stat_t etm_wr_max_ev_per_msg; 277 278 fmd_stat_t etm_resp_q_cur_len; 279 fmd_stat_t etm_resp_q_max_len; 280 281 /* ETM byte counters */ 282 283 fmd_stat_t etm_wr_fmd_bytes; 284 fmd_stat_t etm_rd_fmd_bytes; 285 fmd_stat_t etm_wr_xport_bytes; 286 fmd_stat_t etm_rd_xport_bytes; 287 288 fmd_stat_t etm_magic_drop_bytes; 289 290 /* ETM [dropped] FMA event counters */ 291 292 fmd_stat_t etm_rd_fmd_fmaevent; 293 fmd_stat_t etm_wr_fmd_fmaevent; 294 295 fmd_stat_t etm_rd_drop_fmaevent; 296 fmd_stat_t etm_wr_drop_fmaevent; 297 298 fmd_stat_t etm_rd_dup_fmaevent; 299 fmd_stat_t etm_wr_dup_fmaevent; 300 301 fmd_stat_t etm_rd_dup_alert; 302 fmd_stat_t etm_wr_dup_alert; 303 304 fmd_stat_t etm_enq_drop_resp_q; 305 fmd_stat_t etm_deq_drop_resp_q; 306 307 /* ETM protocol failures */ 308 309 fmd_stat_t etm_magic_bad; 310 fmd_stat_t etm_ver_bad; 311 fmd_stat_t etm_msgtype_bad; 312 fmd_stat_t etm_subtype_bad; 313 fmd_stat_t etm_xid_bad; 314 fmd_stat_t etm_fmaeventlen_bad; 315 fmd_stat_t etm_respcode_bad; 316 fmd_stat_t etm_timeout_bad; 317 fmd_stat_t etm_evlens_bad; 318 319 /* IO operation failures */ 320 321 fmd_stat_t etm_xport_wr_fail; 322 fmd_stat_t etm_xport_rd_fail; 323 fmd_stat_t etm_xport_pk_fail; 324 325 /* IO operation retries */ 326 327 fmd_stat_t etm_xport_wr_retry; 328 fmd_stat_t etm_xport_rd_retry; 329 fmd_stat_t etm_xport_pk_retry; 330 331 /* system and library failures */ 332 333 fmd_stat_t etm_os_nvlist_pack_fail; 334 fmd_stat_t etm_os_nvlist_unpack_fail; 335 fmd_stat_t etm_os_nvlist_size_fail; 336 fmd_stat_t etm_os_pthread_create_fail; 337 338 /* xport API failures */ 339 340 fmd_stat_t etm_xport_get_ev_addrv_fail; 341 fmd_stat_t etm_xport_open_fail; 342 fmd_stat_t etm_xport_close_fail; 343 fmd_stat_t etm_xport_accept_fail; 344 fmd_stat_t etm_xport_open_retry; 345 346 /* FMD entry point bad arguments */ 347 348 fmd_stat_t etm_fmd_init_badargs; 349 fmd_stat_t etm_fmd_fini_badargs; 350 351 /* Alert logging errors */ 352 353 fmd_stat_t etm_log_err; 354 fmd_stat_t etm_msg_err; 355 356 /* miscellaneous stats */ 357 358 fmd_stat_t etm_reset_xport; 359 360 } etm_stats = { 361 362 /* ETM msg counters */ 363 364 { "etm_rd_hdr_fmaevent", FMD_TYPE_UINT64, 365 "ETM fmaevent msg headers rcvd from xport" }, 366 { "etm_rd_hdr_control", FMD_TYPE_UINT64, 367 "ETM control msg headers rcvd from xport" }, 368 { "etm_rd_hdr_alert", FMD_TYPE_UINT64, 369 "ETM alert msg headers rcvd from xport" }, 370 { "etm_rd_hdr_response", FMD_TYPE_UINT64, 371 "ETM response msg headers rcvd from xport" }, 372 { "etm_rd_body_fmaevent", FMD_TYPE_UINT64, 373 "ETM fmaevent msg bodies rcvd from xport" }, 374 { "etm_rd_body_control", FMD_TYPE_UINT64, 375 "ETM control msg bodies rcvd from xport" }, 376 { "etm_rd_body_alert", FMD_TYPE_UINT64, 377 "ETM alert msg bodies rcvd from xport" }, 378 { "etm_rd_body_response", FMD_TYPE_UINT64, 379 "ETM response msg bodies rcvd from xport" }, 380 { "etm_wr_hdr_fmaevent", FMD_TYPE_UINT64, 381 "ETM fmaevent msg headers sent to xport" }, 382 { "etm_wr_hdr_control", FMD_TYPE_UINT64, 383 "ETM control msg headers sent to xport" }, 384 { "etm_wr_hdr_response", FMD_TYPE_UINT64, 385 "ETM response msg headers sent to xport" }, 386 { "etm_wr_body_fmaevent", FMD_TYPE_UINT64, 387 "ETM fmaevent msg bodies sent to xport" }, 388 { "etm_wr_body_control", FMD_TYPE_UINT64, 389 "ETM control msg bodies sent to xport" }, 390 { "etm_wr_body_response", FMD_TYPE_UINT64, 391 "ETM response msg bodies sent to xport" }, 392 393 { "etm_rd_max_ev_per_msg", FMD_TYPE_UINT64, 394 "max FMA events per ETM msg from xport" }, 395 { "etm_wr_max_ev_per_msg", FMD_TYPE_UINT64, 396 "max FMA events per ETM msg to xport" }, 397 398 { "etm_resp_q_cur_len", FMD_TYPE_UINT64, 399 "cur enqueued response msgs to xport" }, 400 { "etm_resp_q_max_len", FMD_TYPE_UINT64, 401 "max enqueable response msgs to xport" }, 402 403 /* ETM byte counters */ 404 405 { "etm_wr_fmd_bytes", FMD_TYPE_UINT64, 406 "bytes of FMA events sent to FMD" }, 407 { "etm_rd_fmd_bytes", FMD_TYPE_UINT64, 408 "bytes of FMA events rcvd from FMD" }, 409 { "etm_wr_xport_bytes", FMD_TYPE_UINT64, 410 "bytes of FMA events sent to xport" }, 411 { "etm_rd_xport_bytes", FMD_TYPE_UINT64, 412 "bytes of FMA events rcvd from xport" }, 413 414 { "etm_magic_drop_bytes", FMD_TYPE_UINT64, 415 "bytes dropped from xport pre magic num" }, 416 417 /* ETM [dropped] FMA event counters */ 418 419 { "etm_rd_fmd_fmaevent", FMD_TYPE_UINT64, 420 "FMA events rcvd from FMD" }, 421 { "etm_wr_fmd_fmaevent", FMD_TYPE_UINT64, 422 "FMA events sent to FMD" }, 423 424 { "etm_rd_drop_fmaevent", FMD_TYPE_UINT64, 425 "dropped FMA events from xport" }, 426 { "etm_wr_drop_fmaevent", FMD_TYPE_UINT64, 427 "dropped FMA events to xport" }, 428 429 { "etm_rd_dup_fmaevent", FMD_TYPE_UINT64, 430 "duplicate FMA events rcvd from xport" }, 431 { "etm_wr_dup_fmaevent", FMD_TYPE_UINT64, 432 "duplicate FMA events sent to xport" }, 433 434 { "etm_rd_dup_alert", FMD_TYPE_UINT64, 435 "duplicate ALERTs rcvd from xport" }, 436 { "etm_wr_dup_alert", FMD_TYPE_UINT64, 437 "duplicate ALERTs sent to xport" }, 438 439 { "etm_enq_drop_resp_q", FMD_TYPE_UINT64, 440 "dropped response msgs on enq" }, 441 { "etm_deq_drop_resp_q", FMD_TYPE_UINT64, 442 "dropped response msgs on deq" }, 443 444 /* ETM protocol failures */ 445 446 { "etm_magic_bad", FMD_TYPE_UINT64, 447 "ETM msgs w/ invalid magic num" }, 448 { "etm_ver_bad", FMD_TYPE_UINT64, 449 "ETM msgs w/ invalid protocol version" }, 450 { "etm_msgtype_bad", FMD_TYPE_UINT64, 451 "ETM msgs w/ invalid message type" }, 452 { "etm_subtype_bad", FMD_TYPE_UINT64, 453 "ETM msgs w/ invalid sub type" }, 454 { "etm_xid_bad", FMD_TYPE_UINT64, 455 "ETM msgs w/ unmatched xid" }, 456 { "etm_fmaeventlen_bad", FMD_TYPE_UINT64, 457 "ETM msgs w/ invalid FMA event length" }, 458 { "etm_respcode_bad", FMD_TYPE_UINT64, 459 "ETM msgs w/ invalid response code" }, 460 { "etm_timeout_bad", FMD_TYPE_UINT64, 461 "ETM msgs w/ invalid timeout value" }, 462 { "etm_evlens_bad", FMD_TYPE_UINT64, 463 "ETM msgs w/ too many event lengths" }, 464 465 /* IO operation failures */ 466 467 { "etm_xport_wr_fail", FMD_TYPE_UINT64, 468 "xport write failures" }, 469 { "etm_xport_rd_fail", FMD_TYPE_UINT64, 470 "xport read failures" }, 471 { "etm_xport_pk_fail", FMD_TYPE_UINT64, 472 "xport peek failures" }, 473 474 /* IO operation retries */ 475 476 { "etm_xport_wr_retry", FMD_TYPE_UINT64, 477 "xport write retries" }, 478 { "etm_xport_rd_retry", FMD_TYPE_UINT64, 479 "xport read retries" }, 480 { "etm_xport_pk_retry", FMD_TYPE_UINT64, 481 "xport peek retries" }, 482 483 /* system and library failures */ 484 485 { "etm_os_nvlist_pack_fail", FMD_TYPE_UINT64, 486 "nvlist_pack failures" }, 487 { "etm_os_nvlist_unpack_fail", FMD_TYPE_UINT64, 488 "nvlist_unpack failures" }, 489 { "etm_os_nvlist_size_fail", FMD_TYPE_UINT64, 490 "nvlist_size failures" }, 491 { "etm_os_pthread_create_fail", FMD_TYPE_UINT64, 492 "pthread_create failures" }, 493 494 /* transport API failures */ 495 496 { "etm_xport_get_ev_addrv_fail", FMD_TYPE_UINT64, 497 "xport get event addrv API failures" }, 498 { "etm_xport_open_fail", FMD_TYPE_UINT64, 499 "xport open API failures" }, 500 { "etm_xport_close_fail", FMD_TYPE_UINT64, 501 "xport close API failures" }, 502 { "etm_xport_accept_fail", FMD_TYPE_UINT64, 503 "xport accept API failures" }, 504 { "etm_xport_open_retry", FMD_TYPE_UINT64, 505 "xport open API retries" }, 506 507 /* FMD entry point bad arguments */ 508 509 { "etm_fmd_init_badargs", FMD_TYPE_UINT64, 510 "bad arguments from fmd_init entry point" }, 511 { "etm_fmd_fini_badargs", FMD_TYPE_UINT64, 512 "bad arguments from fmd_fini entry point" }, 513 514 /* Alert logging errors */ 515 516 { "etm_log_err", FMD_TYPE_UINT64, 517 "failed to log message to log(7D)" }, 518 { "etm_msg_err", FMD_TYPE_UINT64, 519 "failed to log message to sysmsg(7D)" }, 520 521 /* miscellaneous stats */ 522 523 { "etm_reset_xport", FMD_TYPE_UINT64, 524 "xport resets after xport API failure" } 525 }; 526 527 528 /* 529 * -------------------- global data for Root ldom------------------------- 530 */ 531 532 ldom_hdl_t 533 *etm_lhp = NULL; /* ldom pointer */ 534 535 static void *etm_dl_hdl = (void *)NULL; 536 static const char *etm_dl_path = "libds.so.1"; 537 static int etm_dl_mode = (RTLD_NOW | RTLD_LOCAL); 538 539 static int(*etm_ds_svc_reg)(ds_capability_t *cap, ds_ops_t *ops) = 540 (int (*)(ds_capability_t *cap, ds_ops_t *ops))NULL; 541 static int(*etm_ds_clnt_reg)(ds_capability_t *cap, ds_ops_t *ops) = 542 (int (*)(ds_capability_t *cap, ds_ops_t *ops))NULL; 543 static int(*etm_ds_send_msg)(ds_hdl_t hdl, void *buf, size_t buflen) = 544 (int (*)(ds_hdl_t hdl, void *buf, size_t buflen))NULL; 545 static int(*etm_ds_recv_msg)(ds_hdl_t hdl, void *buf, size_t buflen, 546 size_t *msglen) = 547 (int (*)(ds_hdl_t hdl, void *buf, size_t buflen, size_t *msglen))NULL; 548 static int (*etm_ds_fini)(void) = (int (*)(void))NULL; 549 550 static pthread_mutex_t 551 iosvc_list_lock = PTHREAD_MUTEX_INITIALIZER; 552 553 static pthread_t 554 etm_async_e_tid = NULL; /* thread id of io svc async event handler */ 555 556 static etm_proto_v1_ev_hdr_t iosvc_hdr = { 557 ETM_PROTO_MAGIC_NUM, /* magic number */ 558 ETM_PROTO_V1, /* default to V1, not checked */ 559 ETM_MSG_TYPE_FMA_EVENT, /* Root Domain inteoduces only FMA events */ 560 0, /* sub-type */ 561 0, /* pad */ 562 0, /* add the xid at the Q send time */ 563 ETM_PROTO_V1_TIMEOUT_NONE, 564 0 /* ev_lens, 0-termed, after 1 FMA event */ 565 }; 566 567 /* 568 * static iosvc_list 569 */ 570 static etm_iosvc_t iosvc_list[NUM_OF_ROOT_DOMAINS] = { 571 {"", 0}, {"", 0}, {"", 0}, {"", 0}, {"", 0}, {"", 0}, 572 {"", 0}, {"", 0} 573 }; 574 575 static etm_iosvc_t io_svc = { 576 "\0", /* ldom_name */ 577 PTHREAD_COND_INITIALIZER, /* nudges */ 578 PTHREAD_MUTEX_INITIALIZER, /* protects the iosvc msg Q */ 579 NULL, /* iosvc msg Q head */ 580 NULL, /* iosvc msg Q tail */ 581 0, /* msg Q current length */ 582 100, /* msg Q max length */ 583 0, /* current transaction id */ 584 0, /* xid of last event posted to FMD */ 585 DS_INVALID_HDL, /* DS handle */ 586 NULL, /* fmd xprt handle */ 587 NULL, /* tid 4 send to remote RootDomain */ 588 NULL, /* tid 4 recv from remote RootDomain */ 589 PTHREAD_COND_INITIALIZER, /* nudges etm_send_to_remote_root */ 590 PTHREAD_MUTEX_INITIALIZER, /* protects msg_ack_cv */ 591 0, /* send/recv threads are not dying */ 592 0, /* flag for start sending msg Q */ 593 0 /* indicate if the ACK has come */ 594 }; 595 etm_iosvc_t *io_svc_p = &io_svc; 596 597 598 static uint32_t 599 flags; /* flags for fmd_xprt_open */ 600 601 static etm_async_event_ele_t 602 async_event_q[ASYNC_EVENT_Q_SIZE]; /* holds the async events */ 603 604 static uint32_t 605 etm_async_q_head = 0; /* ptr to cur head of async event queue */ 606 607 static uint32_t 608 etm_async_q_tail = 0; /* ptr to cur tail of async event queue */ 609 610 static uint32_t 611 etm_async_q_cur_len = 0; /* cur length (ele cnt) of async event queue */ 612 613 static uint32_t 614 etm_async_q_max_len = ASYNC_EVENT_Q_SIZE; 615 /* max length (ele cnt) of async event queue */ 616 617 static pthread_cond_t 618 etm_async_event_q_cv = PTHREAD_COND_INITIALIZER; 619 /* nudges async event handler */ 620 621 static pthread_mutex_t 622 etm_async_event_q_lock = PTHREAD_MUTEX_INITIALIZER; 623 /* protects async event q */ 624 625 static ds_ver_t 626 etm_iosvc_vers[] = { { 1, 0} }; 627 628 #define ETM_NVERS (sizeof (etm_iosvc_vers) / sizeof (ds_ver_t)) 629 630 static ds_capability_t 631 iosvc_caps = { 632 "ETM", /* svc_id */ 633 etm_iosvc_vers, /* vers */ 634 ETM_NVERS /* number of vers */ 635 }; 636 637 static void 638 etm_iosvc_reg_handler(ds_hdl_t hdl, ds_cb_arg_t arg, ds_ver_t *ver, 639 ds_domain_hdl_t did); 640 641 static void 642 etm_iosvc_unreg_handler(ds_hdl_t hdl, ds_cb_arg_t arg); 643 644 static ds_ops_t 645 iosvc_ops = { 646 etm_iosvc_reg_handler, /* ds_reg_cb */ 647 etm_iosvc_unreg_handler, /* ds_unreg_cb */ 648 NULL, /* ds_data_cb */ 649 NULL /* cb_arg */ 650 }; 651 652 653 /* 654 * -------------------------- support functions ------------------------------ 655 */ 656 657 /* 658 * Design_Note: Each failure worth reporting to FMD should be done using 659 * a single call to fmd_hdl_error() as it logs an FMA event 660 * for each call. Also be aware that all the fmd_hdl_*() 661 * format strings currently use platform specific *printf() 662 * routines; so "%p" under Solaris does not prepend "0x" to 663 * the outputted hex digits, while Linux and VxWorks do. 664 */ 665 666 667 /* 668 * etm_show_time - display the current time of day (for debugging) using 669 * the given FMD module handle and annotation string 670 */ 671 672 static void 673 etm_show_time(fmd_hdl_t *hdl, char *note_str) 674 { 675 struct timeval tmv; /* timeval */ 676 677 (void) gettimeofday(&tmv, NULL); 678 fmd_hdl_debug(hdl, "info: %s: cur Unix Epoch time %d.%06d\n", 679 note_str, tmv.tv_sec, tmv.tv_usec); 680 681 } /* etm_show_time() */ 682 683 /* 684 * etm_hexdump - hexdump the given buffer (for debugging) using 685 * the given FMD module handle 686 */ 687 688 static void 689 etm_hexdump(fmd_hdl_t *hdl, void *buf, size_t byte_cnt) 690 { 691 uint8_t *bp; /* byte ptr */ 692 int i, j; /* index */ 693 char cb[80]; /* char buf */ 694 unsigned int n; /* a byte of data for sprintf() */ 695 696 bp = buf; 697 j = 0; 698 699 /* 700 * Design_Note: fmd_hdl_debug() auto adds a newline if missing; 701 * hence cb exists to accumulate a longer string. 702 */ 703 704 for (i = 1; i <= byte_cnt; i++) { 705 n = *bp++; 706 (void) sprintf(&cb[j], "%2.2x ", n); 707 j += 3; 708 /* add a newline every 16 bytes or at the buffer's end */ 709 if (((i % 16) == 0) || (i >= byte_cnt)) { 710 cb[j-1] = '\0'; 711 fmd_hdl_debug(hdl, "%s\n", cb); 712 j = 0; 713 } 714 } /* for each byte in the buffer */ 715 716 } /* etm_hexdump() */ 717 718 /* 719 * etm_sleep - sleep the caller for the given number of seconds, 720 * return 0 or -errno value 721 * 722 * Design_Note: To avoid interfering with FMD's signal mask (SIGALRM) 723 * do not use [Solaris] sleep(3C) and instead use 724 * pthread_cond_wait() or nanosleep(), both of which 725 * are POSIX spec-ed to leave signal masks alone. 726 * This is needed for Solaris and Linux (domain and SP). 727 */ 728 729 static int 730 etm_sleep(unsigned sleep_sec) 731 { 732 struct timespec tms; /* for nanosleep() */ 733 734 tms.tv_sec = sleep_sec; 735 tms.tv_nsec = 0; 736 737 if (nanosleep(&tms, NULL) < 0) { 738 /* errno assumed set by above call */ 739 return (-errno); 740 } 741 return (0); 742 743 } /* etm_sleep() */ 744 745 /* 746 * etm_conn_open - open a connection to the given transport address, 747 * return 0 and the opened connection handle 748 * or -errno value 749 * 750 * caveats: the err_substr is used in failure cases for calling 751 * fmd_hdl_error() 752 */ 753 754 static int 755 etm_conn_open(fmd_hdl_t *hdl, char *err_substr, 756 etm_xport_addr_t addr, etm_xport_conn_t *connp) 757 { 758 etm_xport_conn_t conn; /* connection to return */ 759 int nev; /* -errno value */ 760 761 if ((conn = etm_xport_open(hdl, addr)) == NULL) { 762 nev = (-errno); 763 fmd_hdl_error(hdl, "error: %s: errno %d\n", 764 err_substr, errno); 765 etm_stats.etm_xport_open_fail.fmds_value.ui64++; 766 return (nev); 767 } else { 768 *connp = conn; 769 return (0); 770 } 771 } /* etm_conn_open() */ 772 773 /* 774 * etm_conn_close - close the given connection, 775 * return 0 or -errno value 776 * 777 * caveats: the err_substr is used in failure cases for calling 778 * fmd_hdl_error() 779 */ 780 781 static int 782 etm_conn_close(fmd_hdl_t *hdl, char *err_substr, etm_xport_conn_t conn) 783 { 784 int nev; /* -errno value */ 785 786 if (etm_xport_close(hdl, conn) == NULL) { 787 nev = (-errno); 788 fmd_hdl_error(hdl, "warning: %s: errno %d\n", 789 err_substr, errno); 790 etm_stats.etm_xport_close_fail.fmds_value.ui64++; 791 return (nev); 792 } else { 793 return (0); 794 } 795 } /* etm_conn_close() */ 796 797 /* 798 * etm_io_op - perform an IO operation on the given connection 799 * with the given buffer, 800 * accommodating MTU size and retrying op if needed, 801 * return how many bytes actually done by the op 802 * or -errno value 803 * 804 * caveats: the err_substr is used in failure cases for calling 805 * fmd_hdl_error() 806 */ 807 808 static ssize_t 809 etm_io_op(fmd_hdl_t *hdl, char *err_substr, etm_xport_conn_t conn, 810 void *buf, size_t byte_cnt, int io_op) 811 { 812 ssize_t rv; /* ret val / byte count */ 813 ssize_t n; /* gen use */ 814 uint8_t *datap; /* ptr to data */ 815 size_t mtu_sz; /* MTU size in bytes */ 816 int (*io_func_ptr)(fmd_hdl_t *, etm_xport_conn_t, 817 void *, size_t); 818 size_t io_sz; /* byte count for io_func_ptr */ 819 int try_cnt; /* number of tries done */ 820 int sleep_sec; /* exp backoff sleep period in sec */ 821 int sleep_rv; /* ret val from sleeping */ 822 fmd_stat_t io_retry_stat; /* IO retry stat to update */ 823 fmd_stat_t io_fail_stat; /* IO failure stat to update */ 824 825 if ((conn == NULL) || (buf == NULL)) { 826 return (-EINVAL); 827 } 828 switch (io_op) { 829 case ETM_IO_OP_RD: 830 io_func_ptr = etm_xport_read; 831 io_retry_stat = etm_stats.etm_xport_rd_retry; 832 io_fail_stat = etm_stats.etm_xport_rd_fail; 833 break; 834 case ETM_IO_OP_WR: 835 io_func_ptr = etm_xport_write; 836 io_retry_stat = etm_stats.etm_xport_wr_retry; 837 io_fail_stat = etm_stats.etm_xport_wr_fail; 838 break; 839 default: 840 return (-EINVAL); 841 } 842 if (byte_cnt == 0) { 843 return (byte_cnt); /* nop */ 844 } 845 846 /* obtain [current] MTU size */ 847 848 if ((n = etm_xport_get_opt(hdl, conn, ETM_XPORT_OPT_MTU_SZ)) < 0) { 849 mtu_sz = ETM_XPORT_MTU_SZ_DEF; 850 } else { 851 mtu_sz = n; 852 } 853 854 /* loop until all IO done, try limit exceeded, or real failure */ 855 856 rv = 0; 857 datap = buf; 858 while (rv < byte_cnt) { 859 io_sz = MIN((byte_cnt - rv), mtu_sz); 860 try_cnt = 0; 861 sleep_sec = 0; 862 863 /* when give up, return -errno value even if partly done */ 864 865 while ((n = (*io_func_ptr)(hdl, conn, datap, io_sz)) == 866 (-EAGAIN)) { 867 try_cnt++; 868 if (try_cnt > ETM_TRY_MAX_CNT) { 869 rv = n; 870 goto func_ret; 871 } 872 if (etm_is_dying) { 873 rv = (-EINTR); 874 goto func_ret; 875 } 876 if ((sleep_rv = etm_sleep(sleep_sec)) < 0) { 877 rv = sleep_rv; 878 goto func_ret; 879 } 880 sleep_sec = ((sleep_sec == 0) ? 1 : 881 (sleep_sec * ETM_TRY_BACKOFF_RATE)); 882 sleep_sec = MIN(sleep_sec, ETM_TRY_BACKOFF_CAP); 883 io_retry_stat.fmds_value.ui64++; 884 if (etm_debug_lvl >= 1) { 885 fmd_hdl_debug(hdl, "info: retrying io op %d " 886 "due to EAGAIN\n", io_op); 887 } 888 } /* while trying the io operation */ 889 890 if (etm_is_dying) { 891 rv = (-EINTR); 892 goto func_ret; 893 } 894 if (n < 0) { 895 rv = n; 896 goto func_ret; 897 } 898 /* avoid spinning CPU when given 0 bytes but no error */ 899 if (n == 0) { 900 if ((sleep_rv = etm_sleep(ETM_SLEEP_QUIK)) < 0) { 901 rv = sleep_rv; 902 goto func_ret; 903 } 904 } 905 rv += n; 906 datap += n; 907 } /* while still have more data */ 908 909 func_ret: 910 911 if (rv < 0) { 912 io_fail_stat.fmds_value.ui64++; 913 fmd_hdl_debug(hdl, "error: %s: errno %d\n", 914 err_substr, (int)(-rv)); 915 } 916 if (etm_debug_lvl >= 3) { 917 fmd_hdl_debug(hdl, "info: io op %d ret %d of %d\n", 918 io_op, (int)rv, (int)byte_cnt); 919 } 920 return (rv); 921 922 } /* etm_io_op() */ 923 924 /* 925 * etm_magic_read - read the magic number of an ETM message header 926 * from the given connection into the given buffer, 927 * return 0 or -errno value 928 * 929 * Design_Note: This routine is intended to help protect ETM from protocol 930 * framing errors as might be caused by an SP reset / crash in 931 * the middle of an ETM message send; the connection will be 932 * read from for as many bytes as needed until the magic number 933 * is found using a sliding buffer for comparisons. 934 */ 935 936 static int 937 etm_magic_read(fmd_hdl_t *hdl, etm_xport_conn_t conn, uint32_t *magic_ptr) 938 { 939 int rv; /* ret val */ 940 uint32_t magic_num; /* magic number */ 941 int byte_cnt; /* count of bytes read */ 942 uint8_t buf5[4+1]; /* sliding input buffer */ 943 int i, j; /* indices into buf5 */ 944 ssize_t n; /* gen use */ 945 uint8_t drop_buf[1024]; /* dropped bytes buffer */ 946 947 rv = 0; /* assume success */ 948 magic_num = 0; 949 byte_cnt = 0; 950 j = 0; 951 952 /* magic number bytes are sent in network (big endian) order */ 953 954 while (magic_num != ETM_PROTO_MAGIC_NUM) { 955 if ((n = etm_io_op(hdl, "bad io read on magic", 956 conn, &buf5[j], 1, ETM_IO_OP_RD)) < 0) { 957 rv = n; 958 goto func_ret; 959 } 960 byte_cnt++; 961 j = MIN((j + 1), sizeof (magic_num)); 962 if (byte_cnt < sizeof (magic_num)) { 963 continue; 964 } 965 966 if (byte_cnt > sizeof (magic_num)) { 967 etm_stats.etm_magic_drop_bytes.fmds_value.ui64++; 968 i = MIN(byte_cnt - j - 1, sizeof (drop_buf) - 1); 969 drop_buf[i] = buf5[0]; 970 for (i = 0; i < j; i++) { 971 buf5[i] = buf5[i+1]; 972 } /* for sliding the buffer contents */ 973 } 974 (void) memcpy(&magic_num, &buf5[0], sizeof (magic_num)); 975 magic_num = ntohl(magic_num); 976 } /* for reading bytes until find magic number */ 977 978 func_ret: 979 980 if (byte_cnt != sizeof (magic_num)) { 981 fmd_hdl_debug(hdl, "warning: bad proto frame " 982 "implies corrupt/lost msg(s)\n"); 983 } 984 if ((byte_cnt > sizeof (magic_num)) && (etm_debug_lvl >= 2)) { 985 i = MIN(byte_cnt - sizeof (magic_num), sizeof (drop_buf)); 986 fmd_hdl_debug(hdl, "info: magic drop hexdump " 987 "first %d of %d bytes:\n", i, 988 byte_cnt - sizeof (magic_num)); 989 etm_hexdump(hdl, drop_buf, i); 990 } 991 992 if (rv == 0) { 993 *magic_ptr = magic_num; 994 } 995 return (rv); 996 997 } /* etm_magic_read() */ 998 999 /* 1000 * etm_hdr_read - allocate, read, and validate a [variable sized] 1001 * ETM message header from the given connection, 1002 * return the allocated ETM message header 1003 * (which is guaranteed to be large enough to reuse as a 1004 * RESPONSE msg hdr) and its size 1005 * or NULL and set errno on failure 1006 */ 1007 1008 static void * 1009 etm_hdr_read(fmd_hdl_t *hdl, etm_xport_conn_t conn, size_t *szp) 1010 { 1011 uint8_t *hdrp; /* ptr to header to return */ 1012 size_t hdr_sz; /* sizeof *hdrp */ 1013 etm_proto_v1_pp_t pp; /* protocol preamble */ 1014 etm_proto_v1_ev_hdr_t *ev_hdrp; /* for FMA_EVENT msg */ 1015 etm_proto_v1_ctl_hdr_t *ctl_hdrp; /* for CONTROL msg */ 1016 etm_proto_v1_resp_hdr_t *resp_hdrp; /* for RESPONSE msg */ 1017 etm_proto_v3_sa_hdr_t *sa_hdrp; /* for ALERT msg */ 1018 uint32_t *lenp; /* ptr to FMA event length */ 1019 ssize_t i, n; /* gen use */ 1020 uint8_t misc_buf[ETM_MISC_BUF_SZ]; /* for var sized hdrs */ 1021 int dummy_int; /* dummy var to appease lint */ 1022 1023 hdrp = NULL; hdr_sz = 0; 1024 1025 /* read the magic number which starts the protocol preamble */ 1026 1027 if ((n = etm_magic_read(hdl, conn, &pp.pp_magic_num)) < 0) { 1028 errno = (-n); 1029 etm_stats.etm_magic_bad.fmds_value.ui64++; 1030 return (NULL); 1031 } 1032 1033 /* read the rest of the protocol preamble all at once */ 1034 1035 if ((n = etm_io_op(hdl, "bad io read on preamble", 1036 conn, &pp.pp_proto_ver, sizeof (pp) - sizeof (pp.pp_magic_num), 1037 ETM_IO_OP_RD)) < 0) { 1038 errno = (-n); 1039 return (NULL); 1040 } 1041 1042 /* 1043 * Design_Note: The magic number was already network decoded; but 1044 * some other preamble fields also need to be decoded, 1045 * specifically pp_xid and pp_timeout. The rest of the 1046 * preamble fields are byte sized and hence need no 1047 * decoding. 1048 */ 1049 1050 pp.pp_xid = ntohl(pp.pp_xid); 1051 pp.pp_timeout = ntohl(pp.pp_timeout); 1052 1053 /* sanity check the header as best we can */ 1054 1055 if ((pp.pp_proto_ver < ETM_PROTO_V1) || 1056 (pp.pp_proto_ver > ETM_PROTO_V3)) { 1057 fmd_hdl_error(hdl, "error: bad proto ver %d\n", 1058 (int)pp.pp_proto_ver); 1059 errno = EPROTO; 1060 etm_stats.etm_ver_bad.fmds_value.ui64++; 1061 return (NULL); 1062 } 1063 1064 dummy_int = pp.pp_msg_type; 1065 if ((dummy_int <= ETM_MSG_TYPE_TOO_LOW) || 1066 (dummy_int >= ETM_MSG_TYPE_TOO_BIG)) { 1067 fmd_hdl_error(hdl, "error: bad msg type %d", dummy_int); 1068 errno = EBADMSG; 1069 etm_stats.etm_msgtype_bad.fmds_value.ui64++; 1070 return (NULL); 1071 } 1072 1073 /* handle [var sized] hdrs for FMA_EVENT, CONTROL, RESPONSE msgs */ 1074 1075 if (pp.pp_msg_type == ETM_MSG_TYPE_FMA_EVENT) { 1076 1077 ev_hdrp = (void*)&misc_buf[0]; 1078 hdr_sz = sizeof (*ev_hdrp); 1079 (void) memcpy(&ev_hdrp->ev_pp, &pp, sizeof (pp)); 1080 1081 /* sanity check the header's timeout */ 1082 1083 if ((ev_hdrp->ev_pp.pp_proto_ver == ETM_PROTO_V1) && 1084 (ev_hdrp->ev_pp.pp_timeout != ETM_PROTO_V1_TIMEOUT_NONE)) { 1085 errno = ETIME; 1086 etm_stats.etm_timeout_bad.fmds_value.ui64++; 1087 return (NULL); 1088 } 1089 1090 /* get all FMA event lengths from the header */ 1091 1092 lenp = (uint32_t *)&ev_hdrp->ev_lens[0]; lenp--; 1093 i = -1; /* cnt of length entries preceding 0 */ 1094 do { 1095 i++; lenp++; 1096 if ((sizeof (*ev_hdrp) + (i * sizeof (*lenp))) >= 1097 ETM_MISC_BUF_SZ) { 1098 errno = E2BIG; /* ridiculous size */ 1099 etm_stats.etm_evlens_bad.fmds_value.ui64++; 1100 return (NULL); 1101 } 1102 if ((n = etm_io_op(hdl, "bad io read on event len", 1103 conn, lenp, sizeof (*lenp), ETM_IO_OP_RD)) < 0) { 1104 errno = (-n); 1105 return (NULL); 1106 } 1107 *lenp = ntohl(*lenp); 1108 1109 } while (*lenp != 0); 1110 i += 0; /* first len already counted by sizeof(ev_hdr) */ 1111 hdr_sz += (i * sizeof (*lenp)); 1112 1113 etm_stats.etm_rd_hdr_fmaevent.fmds_value.ui64++; 1114 1115 } else if (pp.pp_msg_type == ETM_MSG_TYPE_CONTROL) { 1116 1117 ctl_hdrp = (void*)&misc_buf[0]; 1118 hdr_sz = sizeof (*ctl_hdrp); 1119 (void) memcpy(&ctl_hdrp->ctl_pp, &pp, sizeof (pp)); 1120 1121 /* sanity check the header's sub type (control selector) */ 1122 1123 if ((ctl_hdrp->ctl_pp.pp_sub_type <= ETM_CTL_SEL_TOO_LOW) || 1124 (ctl_hdrp->ctl_pp.pp_sub_type >= ETM_CTL_SEL_TOO_BIG)) { 1125 fmd_hdl_error(hdl, "error: bad ctl sub type %d\n", 1126 (int)ctl_hdrp->ctl_pp.pp_sub_type); 1127 errno = EBADMSG; 1128 etm_stats.etm_subtype_bad.fmds_value.ui64++; 1129 return (NULL); 1130 } 1131 1132 /* get the control length */ 1133 1134 if ((n = etm_io_op(hdl, "bad io read on ctl len", 1135 conn, &ctl_hdrp->ctl_len, sizeof (ctl_hdrp->ctl_len), 1136 ETM_IO_OP_RD)) < 0) { 1137 errno = (-n); 1138 return (NULL); 1139 } 1140 1141 ctl_hdrp->ctl_len = ntohl(ctl_hdrp->ctl_len); 1142 1143 etm_stats.etm_rd_hdr_control.fmds_value.ui64++; 1144 1145 } else if (pp.pp_msg_type == ETM_MSG_TYPE_RESPONSE) { 1146 1147 resp_hdrp = (void*)&misc_buf[0]; 1148 hdr_sz = sizeof (*resp_hdrp); 1149 (void) memcpy(&resp_hdrp->resp_pp, &pp, sizeof (pp)); 1150 1151 /* sanity check the header's timeout */ 1152 1153 if (resp_hdrp->resp_pp.pp_timeout != 1154 ETM_PROTO_V1_TIMEOUT_NONE) { 1155 errno = ETIME; 1156 etm_stats.etm_timeout_bad.fmds_value.ui64++; 1157 return (NULL); 1158 } 1159 1160 /* get the response code and length */ 1161 1162 if ((n = etm_io_op(hdl, "bad io read on resp code+len", 1163 conn, &resp_hdrp->resp_code, 1164 sizeof (resp_hdrp->resp_code) 1165 + sizeof (resp_hdrp->resp_len), 1166 ETM_IO_OP_RD)) < 0) { 1167 errno = (-n); 1168 return (NULL); 1169 } 1170 1171 resp_hdrp->resp_code = ntohl(resp_hdrp->resp_code); 1172 resp_hdrp->resp_len = ntohl(resp_hdrp->resp_len); 1173 1174 etm_stats.etm_rd_hdr_response.fmds_value.ui64++; 1175 1176 } else if (pp.pp_msg_type == ETM_MSG_TYPE_ALERT) { 1177 1178 sa_hdrp = (void*)&misc_buf[0]; 1179 hdr_sz = sizeof (*sa_hdrp); 1180 (void) memcpy(&sa_hdrp->sa_pp, &pp, sizeof (pp)); 1181 1182 /* sanity check the header's protocol version */ 1183 1184 if (sa_hdrp->sa_pp.pp_proto_ver != ETM_PROTO_V3) { 1185 errno = EPROTO; 1186 etm_stats.etm_ver_bad.fmds_value.ui64++; 1187 return (NULL); 1188 } 1189 1190 /* get the priority and length */ 1191 1192 if ((n = etm_io_op(hdl, "bad io read on sa priority+len", 1193 conn, &sa_hdrp->sa_priority, 1194 sizeof (sa_hdrp->sa_priority) 1195 + sizeof (sa_hdrp->sa_len), 1196 ETM_IO_OP_RD)) < 0) { 1197 errno = (-n); 1198 return (NULL); 1199 } 1200 1201 sa_hdrp->sa_priority = ntohl(sa_hdrp->sa_priority); 1202 sa_hdrp->sa_len = ntohl(sa_hdrp->sa_len); 1203 1204 etm_stats.etm_rd_hdr_alert.fmds_value.ui64++; 1205 1206 } /* whether we have FMA_EVENT, ALERT, CONTROL, or RESPONSE msg */ 1207 1208 /* 1209 * choose a header size that allows hdr reuse for RESPONSE msgs, 1210 * allocate and populate the message header, and 1211 * return alloc size to caller for later free of hdrp 1212 */ 1213 1214 hdr_sz = MAX(hdr_sz, sizeof (*resp_hdrp)); 1215 hdrp = fmd_hdl_zalloc(hdl, hdr_sz, FMD_SLEEP); 1216 (void) memcpy(hdrp, misc_buf, hdr_sz); 1217 1218 if (etm_debug_lvl >= 3) { 1219 fmd_hdl_debug(hdl, "info: msg hdr hexdump %d bytes:\n", hdr_sz); 1220 etm_hexdump(hdl, hdrp, hdr_sz); 1221 } 1222 *szp = hdr_sz; 1223 return (hdrp); 1224 1225 } /* etm_hdr_read() */ 1226 1227 /* 1228 * etm_hdr_write - create and write a [variable sized] ETM message header 1229 * to the given connection appropriate for the given FMA event 1230 * and type of nvlist encoding, 1231 * return the allocated ETM message header and its size 1232 * or NULL and set errno on failure 1233 */ 1234 1235 static void* 1236 etm_hdr_write(fmd_hdl_t *hdl, etm_xport_conn_t conn, nvlist_t *evp, 1237 int encoding, size_t *szp) 1238 { 1239 etm_proto_v1_ev_hdr_t *hdrp; /* for FMA_EVENT msg */ 1240 size_t hdr_sz; /* sizeof *hdrp */ 1241 uint32_t *lenp; /* ptr to FMA event length */ 1242 size_t evsz; /* packed FMA event size */ 1243 ssize_t n; /* gen use */ 1244 1245 /* allocate and populate the message header for 1 FMA event */ 1246 1247 hdr_sz = sizeof (*hdrp) + (1 * sizeof (hdrp->ev_lens[0])); 1248 1249 hdrp = fmd_hdl_zalloc(hdl, hdr_sz, FMD_SLEEP); 1250 1251 /* 1252 * Design_Note: Although the ETM protocol supports it, we do not (yet) 1253 * want responses/ACKs on FMA events that we send. All 1254 * such messages are sent with ETM_PROTO_V1_TIMEOUT_NONE. 1255 */ 1256 1257 hdrp->ev_pp.pp_magic_num = ETM_PROTO_MAGIC_NUM; 1258 hdrp->ev_pp.pp_magic_num = htonl(hdrp->ev_pp.pp_magic_num); 1259 hdrp->ev_pp.pp_proto_ver = ETM_PROTO_V1; 1260 hdrp->ev_pp.pp_msg_type = ETM_MSG_TYPE_FMA_EVENT; 1261 hdrp->ev_pp.pp_sub_type = 0; 1262 hdrp->ev_pp.pp_rsvd_pad = 0; 1263 hdrp->ev_pp.pp_xid = etm_xid_cur; 1264 hdrp->ev_pp.pp_xid = htonl(hdrp->ev_pp.pp_xid); 1265 etm_xid_cur += ETM_XID_INC; 1266 hdrp->ev_pp.pp_timeout = ETM_PROTO_V1_TIMEOUT_NONE; 1267 hdrp->ev_pp.pp_timeout = htonl(hdrp->ev_pp.pp_timeout); 1268 1269 lenp = &hdrp->ev_lens[0]; 1270 1271 if ((n = nvlist_size(evp, &evsz, encoding)) != 0) { 1272 errno = n; 1273 fmd_hdl_free(hdl, hdrp, hdr_sz); 1274 etm_stats.etm_os_nvlist_size_fail.fmds_value.ui64++; 1275 return (NULL); 1276 } 1277 1278 /* indicate 1 FMA event, network encode its length, and 0-terminate */ 1279 1280 etm_stats.etm_wr_max_ev_per_msg.fmds_value.ui64 = 1; 1281 1282 *lenp = evsz; *lenp = htonl(*lenp); lenp++; 1283 *lenp = 0; *lenp = htonl(*lenp); lenp++; 1284 1285 /* 1286 * write the network encoded header to the transport, and 1287 * return alloc size to caller for later free 1288 */ 1289 1290 if ((n = etm_io_op(hdl, "bad io write on event hdr", 1291 conn, hdrp, hdr_sz, ETM_IO_OP_WR)) < 0) { 1292 errno = (-n); 1293 fmd_hdl_free(hdl, hdrp, hdr_sz); 1294 return (NULL); 1295 } 1296 1297 *szp = hdr_sz; 1298 return (hdrp); 1299 1300 } /* etm_hdr_write() */ 1301 1302 /* 1303 * etm_post_to_fmd - post the given FMA event to FMD 1304 * via a FMD transport API call, 1305 * return 0 or -errno value 1306 * 1307 * caveats: the FMA event (evp) is freed by FMD, 1308 * thus callers of this function should 1309 * immediately discard any ptr they have to the 1310 * nvlist without freeing or dereferencing it 1311 */ 1312 1313 static int 1314 etm_post_to_fmd(fmd_hdl_t *hdl, fmd_xprt_t *fmd_xprt, nvlist_t *evp) 1315 { 1316 ssize_t ev_sz; /* sizeof *evp */ 1317 1318 (void) nvlist_size(evp, (size_t *)&ev_sz, NV_ENCODE_XDR); 1319 1320 if (etm_debug_lvl >= 2) { 1321 etm_show_time(hdl, "ante ev post"); 1322 } 1323 fmd_xprt_post(hdl, fmd_xprt, evp, 0); 1324 etm_stats.etm_wr_fmd_fmaevent.fmds_value.ui64++; 1325 etm_stats.etm_wr_fmd_bytes.fmds_value.ui64 += ev_sz; 1326 if (etm_debug_lvl >= 1) { 1327 fmd_hdl_debug(hdl, "info: event %p post ok to FMD\n", evp); 1328 } 1329 if (etm_debug_lvl >= 2) { 1330 etm_show_time(hdl, "post ev post"); 1331 } 1332 return (0); 1333 1334 } /* etm_post_to_fmd() */ 1335 1336 /* 1337 * Ideally we would just use syslog(3C) for outputting our messages. 1338 * Unfortunately, as this module is running within the FMA daemon context, 1339 * that would create the situation where this module's openlog() would 1340 * have the monopoly on syslog(3C) for the daemon and all its modules. 1341 * To avoid that situation, this module uses the same logic as the 1342 * syslog-msgs FM module to directly call into the log(7D) and sysmsg(7D) 1343 * devices for syslog and console. 1344 */ 1345 1346 static int 1347 etm_post_to_syslog(fmd_hdl_t *hdl, uint32_t priority, uint32_t body_sz, 1348 uint8_t *body_buf) 1349 { 1350 char *sysmessage; /* Formatted message */ 1351 size_t formatlen; /* maximum length of sysmessage */ 1352 struct strbuf ctl, dat; /* structs pushed to the logfd */ 1353 uint32_t msgid; /* syslog message ID number */ 1354 1355 if ((syslog_file == 0) && (syslog_cons == 0)) { 1356 return (0); 1357 } 1358 1359 if (etm_debug_lvl >= 2) { 1360 etm_show_time(hdl, "ante syslog post"); 1361 } 1362 1363 formatlen = body_sz + 64; /* +64 for prefix strings added below */ 1364 sysmessage = fmd_hdl_zalloc(hdl, formatlen, FMD_SLEEP); 1365 1366 if (syslog_file) { 1367 STRLOG_MAKE_MSGID(body_buf, msgid); 1368 (void) snprintf(sysmessage, formatlen, 1369 "SC Alert: [ID %u FACILITY_AND_PRIORITY] %s", msgid, 1370 body_buf); 1371 1372 syslog_ctl.pri = syslog_facility | priority; 1373 1374 ctl.buf = (void *)&syslog_ctl; 1375 ctl.len = sizeof (syslog_ctl); 1376 1377 dat.buf = sysmessage; 1378 dat.len = strlen(sysmessage) + 1; 1379 1380 if (putmsg(syslog_logfd, &ctl, &dat, 0) != 0) { 1381 fmd_hdl_debug(hdl, "putmsg failed: %s\n", 1382 strerror(errno)); 1383 etm_stats.etm_log_err.fmds_value.ui64++; 1384 } 1385 } 1386 1387 if (syslog_cons) { 1388 (void) snprintf(sysmessage, formatlen, 1389 "SC Alert: %s\r\n", body_buf); 1390 1391 dat.buf = sysmessage; 1392 dat.len = strlen(sysmessage) + 1; 1393 1394 if (write(syslog_msgfd, dat.buf, dat.len) != dat.len) { 1395 fmd_hdl_debug(hdl, "write failed: %s\n", 1396 strerror(errno)); 1397 etm_stats.etm_msg_err.fmds_value.ui64++; 1398 } 1399 } 1400 1401 fmd_hdl_free(hdl, sysmessage, formatlen); 1402 1403 if (etm_debug_lvl >= 2) { 1404 etm_show_time(hdl, "post syslog post"); 1405 } 1406 1407 return (0); 1408 } 1409 1410 1411 /* 1412 * etm_req_ver_negot - send an ETM control message to the other end requesting 1413 * that the ETM protocol version be negotiated/set 1414 */ 1415 1416 static void 1417 etm_req_ver_negot(fmd_hdl_t *hdl) 1418 { 1419 etm_xport_addr_t *addrv; /* default dst addr(s) */ 1420 etm_xport_conn_t conn; /* connection to other end */ 1421 etm_proto_v1_ctl_hdr_t *ctl_hdrp; /* for CONTROL msg */ 1422 size_t hdr_sz; /* sizeof header */ 1423 uint8_t *body_buf; /* msg body buffer */ 1424 uint32_t body_sz; /* sizeof *body_buf */ 1425 ssize_t i; /* gen use */ 1426 1427 /* populate an ETM control msg to send */ 1428 1429 hdr_sz = sizeof (*ctl_hdrp); 1430 body_sz = (3 + 1); /* version bytes plus null byte */ 1431 1432 ctl_hdrp = fmd_hdl_zalloc(hdl, hdr_sz + body_sz, FMD_SLEEP); 1433 1434 ctl_hdrp->ctl_pp.pp_magic_num = htonl(ETM_PROTO_MAGIC_NUM); 1435 ctl_hdrp->ctl_pp.pp_proto_ver = ETM_PROTO_V1; 1436 ctl_hdrp->ctl_pp.pp_msg_type = ETM_MSG_TYPE_CONTROL; 1437 ctl_hdrp->ctl_pp.pp_sub_type = ETM_CTL_SEL_VER_NEGOT_REQ; 1438 ctl_hdrp->ctl_pp.pp_rsvd_pad = 0; 1439 etm_xid_ver_negot = etm_xid_cur; 1440 etm_xid_cur += ETM_XID_INC; 1441 ctl_hdrp->ctl_pp.pp_xid = htonl(etm_xid_ver_negot); 1442 ctl_hdrp->ctl_pp.pp_timeout = htonl(ETM_PROTO_V1_TIMEOUT_FOREVER); 1443 ctl_hdrp->ctl_len = htonl(body_sz); 1444 1445 body_buf = (void*)&ctl_hdrp->ctl_len; 1446 body_buf += sizeof (ctl_hdrp->ctl_len); 1447 *body_buf++ = ETM_PROTO_V3; 1448 *body_buf++ = ETM_PROTO_V2; 1449 *body_buf++ = ETM_PROTO_V1; 1450 *body_buf++ = '\0'; 1451 1452 /* 1453 * open and close a connection to send the ETM control msg 1454 * to any/all of the default dst addrs 1455 */ 1456 1457 if ((addrv = etm_xport_get_ev_addrv(hdl, NULL)) == NULL) { 1458 fmd_hdl_error(hdl, 1459 "error: bad ctl dst addrs errno %d\n", errno); 1460 etm_stats.etm_xport_get_ev_addrv_fail.fmds_value.ui64++; 1461 goto func_ret; 1462 } 1463 1464 for (i = 0; addrv[i] != NULL; i++) { 1465 1466 if (etm_conn_open(hdl, "bad conn open during ver negot", 1467 addrv[i], &conn) < 0) { 1468 continue; 1469 } 1470 if (etm_io_op(hdl, "bad io write on ctl hdr+body", 1471 conn, ctl_hdrp, hdr_sz + body_sz, ETM_IO_OP_WR) >= 0) { 1472 etm_stats.etm_wr_hdr_control.fmds_value.ui64++; 1473 etm_stats.etm_wr_body_control.fmds_value.ui64++; 1474 } 1475 (void) etm_conn_close(hdl, "bad conn close during ver negot", 1476 conn); 1477 1478 } /* foreach dst addr */ 1479 1480 func_ret: 1481 1482 if (addrv != NULL) { 1483 etm_xport_free_addrv(hdl, addrv); 1484 } 1485 fmd_hdl_free(hdl, ctl_hdrp, hdr_sz + body_sz); 1486 1487 } /* etm_req_ver_negot() */ 1488 1489 1490 1491 /* 1492 * etm_iosvc_msg_enq - add element to tail of ETM iosvc msg queue 1493 * etm_iosvc_msg_deq - del element from head of ETM iosvc msg queue 1494 * need to grab the mutex lock before calling this routine 1495 * return >0 for success, or -errno value 1496 */ 1497 static int 1498 etm_iosvc_msg_enq(fmd_hdl_t *hdl, etm_iosvc_t *iosvc, etm_iosvc_q_ele_t *msgp) 1499 { 1500 etm_iosvc_q_ele_t *newp; /* ptr to new msg q ele */ 1501 1502 if (iosvc->msg_q_cur_len >= iosvc->msg_q_max_len) { 1503 fmd_hdl_debug(hdl, "warning: enq to full msg queue\n"); 1504 return (-E2BIG); 1505 } 1506 1507 newp = fmd_hdl_zalloc(hdl, sizeof (*newp), FMD_SLEEP); 1508 (void) memcpy(newp, msgp, sizeof (*newp)); 1509 newp->msg_nextp = NULL; 1510 1511 if (iosvc->msg_q_cur_len == 0) { 1512 iosvc->msg_q_head = newp; 1513 } else { 1514 iosvc->msg_q_tail->msg_nextp = newp; 1515 } 1516 1517 iosvc->msg_q_tail = newp; 1518 iosvc->msg_q_cur_len++; 1519 fmd_hdl_debug(hdl, "info: current msg queue length %d\n", 1520 iosvc->msg_q_cur_len); 1521 1522 return (1); 1523 1524 } /* etm_iosvc_msg_enq() */ 1525 1526 static int 1527 etm_iosvc_msg_deq(fmd_hdl_t *hdl, etm_iosvc_t *iosvc, etm_iosvc_q_ele_t *msgp) 1528 { 1529 etm_iosvc_q_ele_t *oldp; /* ptr to old msg q ele */ 1530 1531 if (iosvc->msg_q_cur_len == 0) { 1532 fmd_hdl_debug(hdl, "warning: deq from empty responder queue\n"); 1533 return (-ENOENT); 1534 } 1535 1536 (void) memcpy(msgp, iosvc->msg_q_head, sizeof (*msgp)); 1537 msgp->msg_nextp = NULL; 1538 1539 oldp = iosvc->msg_q_head; 1540 iosvc->msg_q_head = iosvc->msg_q_head->msg_nextp; 1541 1542 /* 1543 * free the mem alloc-ed in etm_iosvc_msg_enq() 1544 */ 1545 fmd_hdl_free(hdl, oldp, sizeof (*oldp)); 1546 1547 iosvc->msg_q_cur_len--; 1548 if (iosvc->msg_q_cur_len == 0) { 1549 iosvc->msg_q_tail = NULL; 1550 } 1551 1552 return (1); 1553 1554 } /* etm_iosvc_msg_deq() */ 1555 1556 1557 /* 1558 * etm_msg_enq_head(): 1559 * enq the msg to the head of the Q. 1560 * If the Q is full, drop the msg at the tail then enq the msg at head. 1561 * need to grab mutex lock iosvc->msg_q_lock before calling this routine. 1562 */ 1563 static void 1564 etm_msg_enq_head(fmd_hdl_t *fmd_hdl, etm_iosvc_t *iosvc, 1565 etm_iosvc_q_ele_t *msg_ele) 1566 { 1567 1568 etm_iosvc_q_ele_t *newp; /* iosvc msg ele ptr */ 1569 1570 if (iosvc->msg_q_cur_len >= iosvc->msg_q_max_len) { 1571 fmd_hdl_debug(fmd_hdl, 1572 "warning: add to head of a full msg queue." 1573 " Drop the msg at the tail\n"); 1574 /* 1575 * drop the msg at the tail 1576 */ 1577 newp = iosvc->msg_q_head; 1578 while (newp->msg_nextp != iosvc->msg_q_tail) { 1579 newp = newp->msg_nextp; 1580 } 1581 1582 /* 1583 * free the msg in iosvc->msg_q_tail->msg 1584 * free the mem pointed to by iosvc->msg_q_tail 1585 */ 1586 fmd_hdl_free(fmd_hdl, iosvc->msg_q_tail->msg, 1587 iosvc->msg_q_tail->msg_size); 1588 fmd_hdl_free(fmd_hdl, iosvc->msg_q_tail, sizeof (*newp)); 1589 iosvc->msg_q_tail = newp; 1590 iosvc->msg_q_tail->msg_nextp = NULL; 1591 iosvc->msg_q_cur_len--; 1592 } 1593 1594 /* 1595 * enq the msg to the head 1596 */ 1597 newp = fmd_hdl_zalloc(fmd_hdl, sizeof (*newp), FMD_SLEEP); 1598 (void) memcpy(newp, msg_ele, sizeof (*newp)); 1599 if (iosvc->msg_q_cur_len == 0) { 1600 newp->msg_nextp = NULL; 1601 iosvc->msg_q_tail = newp; 1602 } else { 1603 newp->msg_nextp = iosvc->msg_q_head; 1604 } 1605 iosvc->msg_q_head = newp; 1606 iosvc->msg_q_cur_len++; 1607 } /* etm_msg_enq_head() */ 1608 1609 /* 1610 * etm_isovc_cleanup(): 1611 * clean up what's in the passed-in iosvc struct, including the msg Q. 1612 */ 1613 static void 1614 etm_iosvc_cleanup(fmd_hdl_t *fmd_hdl, etm_iosvc_t *iosvc) 1615 { 1616 1617 etm_iosvc_q_ele_t msg_ele; /* io svc msg Q ele */ 1618 1619 iosvc->thr_is_dying = 1; 1620 1621 if (iosvc->send_tid != NULL) { 1622 fmd_thr_signal(fmd_hdl, iosvc->send_tid); 1623 fmd_thr_destroy(fmd_hdl, iosvc->send_tid); 1624 iosvc->send_tid = NULL; 1625 } /* if io svc send thread was created ok */ 1626 1627 if (iosvc->recv_tid != NULL) { 1628 fmd_thr_signal(fmd_hdl, iosvc->recv_tid); 1629 fmd_thr_destroy(fmd_hdl, iosvc->recv_tid); 1630 iosvc->recv_tid = NULL; 1631 } /* if root domain recv thread was created */ 1632 1633 iosvc->ldom_name[0] = '\0'; 1634 1635 iosvc->ds_hdl = DS_INVALID_HDL; 1636 1637 if (iosvc->fmd_xprt != NULL) { 1638 fmd_xprt_close(fmd_hdl, iosvc->fmd_xprt); 1639 iosvc->fmd_xprt = NULL; 1640 } /* if fmd-xprt has been opened */ 1641 1642 (void) pthread_mutex_lock(&iosvc->msg_q_lock); 1643 while (iosvc->msg_q_cur_len > 0) { 1644 (void) etm_iosvc_msg_deq(fmd_hdl, iosvc, &msg_ele); 1645 fmd_hdl_free(fmd_hdl, msg_ele.msg, msg_ele.msg_size); 1646 } 1647 (void) pthread_mutex_unlock(&iosvc->msg_q_lock); 1648 1649 return; 1650 1651 } /* etm_iosvc_cleanup() */ 1652 1653 /* 1654 * etm_iosvc_lookup(using ldom_name or ds_hdl when ldom_name is empty) 1655 * not found, create one, add to iosvc_list 1656 */ 1657 etm_iosvc_t * 1658 etm_iosvc_lookup(fmd_hdl_t *fmd_hdl, char *ldom_name, ds_hdl_t ds_hdl, 1659 boolean_t iosvc_create) 1660 { 1661 uint32_t i; /* for loop var */ 1662 int32_t first_empty_slot = -1; /* remember that */ 1663 1664 for (i = 0; i < NUM_OF_ROOT_DOMAINS; i++) { 1665 if (ldom_name[0] == '\0') { 1666 /* 1667 * search by hdl passed in 1668 * the only time this is used is at ds_unreg_cb time. 1669 * there is no ldom name, only the valid ds_hdl. 1670 * find an iosvc with the matching ds_hdl. 1671 * ignore the iosvc_create flag, should never need to 1672 * create an iosvc for ds_unreg_cb 1673 */ 1674 if (ds_hdl == iosvc_list[i].ds_hdl) { 1675 if (etm_debug_lvl >= 2) { 1676 fmd_hdl_debug(fmd_hdl, 1677 "info: found an iosvc at slot %d w/ ds_hdl %d \n", 1678 i, iosvc_list[i].ds_hdl); 1679 } 1680 if (iosvc_list[i].ldom_name[0] != '\0') 1681 if (etm_debug_lvl >= 2) { 1682 fmd_hdl_debug(fmd_hdl, 1683 "info: found an iosvc w/ ldom_name %s \n", 1684 iosvc_list[i].ldom_name); 1685 } 1686 return (&iosvc_list[i]); 1687 } else { 1688 continue; 1689 } 1690 } else if (iosvc_list[i].ldom_name[0] != '\0') { 1691 /* 1692 * this is an non-empty iosvc structure slot 1693 */ 1694 if (strcmp(ldom_name, iosvc_list[i].ldom_name) == 0) { 1695 /* 1696 * found an iosvc structure that matches the 1697 * passed in ldom_name, return the ptr 1698 */ 1699 if (etm_debug_lvl >= 2) { 1700 fmd_hdl_debug(fmd_hdl, "info: found an " 1701 "iosvc at slot %d w/ ds_hdl %d \n", 1702 i, iosvc_list[i].ds_hdl); 1703 fmd_hdl_debug(fmd_hdl, "info: found an " 1704 "iosvc w/ ldom_name %s \n", 1705 iosvc_list[i].ldom_name); 1706 } 1707 return (&iosvc_list[i]); 1708 } else { 1709 /* 1710 * non-empty slot with no-matching name, 1711 * move on to next slot. 1712 */ 1713 continue; 1714 } 1715 } else { 1716 /* 1717 * found the 1st slot with ldom name being empty 1718 * remember the slot #, will be used for creating one 1719 */ 1720 if (first_empty_slot == -1) { 1721 first_empty_slot = i; 1722 } 1723 } 1724 } 1725 if (iosvc_create == B_TRUE && first_empty_slot >= 0) { 1726 /* 1727 * this is the case we need to add an iosvc at first_empty_slot 1728 * for the ldom_name at iosvc_list[first_empty_slot] 1729 */ 1730 fmd_hdl_debug(fmd_hdl, 1731 "info: create an iosvc with ldom name %s\n", 1732 ldom_name); 1733 i = first_empty_slot; 1734 (void) memcpy(&iosvc_list[i], &io_svc, sizeof (etm_iosvc_t)); 1735 (void) strcpy(iosvc_list[i].ldom_name, ldom_name); 1736 fmd_hdl_debug(fmd_hdl, "info: iosvc #%d has ldom name %s\n", 1737 i, iosvc_list[i].ldom_name); 1738 return (&iosvc_list[i]); 1739 } else { 1740 return (NULL); 1741 } 1742 1743 } /* etm_iosvc_lookup() */ 1744 1745 1746 /* 1747 * etm_ckpt_remove: 1748 * remove the ckpt for the iosvc element 1749 */ 1750 static void 1751 etm_ckpt_remove(fmd_hdl_t *hdl, etm_iosvc_q_ele_t *ele) { 1752 int err; /* temp error */ 1753 nvlist_t *evp = NULL; /* event pointer */ 1754 etm_proto_v1_ev_hdr_t *hdrp; /* hdr for FMA_EVENT */ 1755 char *buf; /* packed event pointer */ 1756 1757 if ((ele->ckpt_flag == ETM_CKPT_NOOP) || 1758 (etm_ldom_type != LDOM_TYPE_CONTROL)) { 1759 return; 1760 } 1761 1762 /* the pointer to the packed event in the etm message */ 1763 hdrp = (etm_proto_v1_ev_hdr_t *)((ptrdiff_t)ele->msg); 1764 buf = (char *)((ptrdiff_t)hdrp + sizeof (*hdrp) 1765 + (1 * sizeof (hdrp->ev_lens[0]))); 1766 1767 /* unpack it, then uncheckpoited it */ 1768 if ((err = nvlist_unpack(buf, hdrp->ev_lens[0], &evp, 0)) != 0) { 1769 fmd_hdl_debug(hdl, "failed to unpack event(rc=%d)\n", err); 1770 return; 1771 } 1772 (void) etm_ckpt_delete(hdl, evp); 1773 nvlist_free(evp); 1774 } 1775 1776 /* 1777 * etm_send_ds_msg() 1778 * call ds_send_msg() to send the msg passed in. 1779 * timedcond_wait for the ACK to come back. 1780 * if the ACK doesn't come in the specified time, retrun -EAGAIN. 1781 * other wise, return 1. 1782 */ 1783 int 1784 etm_send_ds_msg(fmd_hdl_t *fmd_hdl, boolean_t ckpt_remove, etm_iosvc_t *iosvc, 1785 etm_iosvc_q_ele_t *msg_ele, etm_proto_v1_ev_hdr_t *evhdrp) 1786 { 1787 uint32_t rc; /* for return code */ 1788 1789 struct timeval tv; 1790 struct timespec timeout; 1791 1792 1793 /* 1794 * call ds_send_msg(). Return (-EAGAIN) if not successful 1795 */ 1796 if ((rc = (*etm_ds_send_msg)(iosvc->ds_hdl, msg_ele->msg, 1797 msg_ele->msg_size)) != 0) { 1798 fmd_hdl_debug(fmd_hdl, "info: ds_send_msg rc %d xid %d\n", 1799 rc, evhdrp->ev_pp.pp_xid); 1800 return (-EAGAIN); 1801 } 1802 1803 /* 1804 * wait on the cv for resp msg for cur_send_xid 1805 */ 1806 (void *) pthread_mutex_lock(&iosvc->msg_ack_lock); 1807 1808 (void) gettimeofday(&tv, 0); 1809 timeout.tv_sec = tv.tv_sec + etm_fma_resp_wait_time; 1810 timeout.tv_nsec = 0; 1811 1812 fmd_hdl_debug(fmd_hdl, "info: waiting on msg_ack_cv for ldom %s\n", 1813 iosvc->ldom_name); 1814 rc = pthread_cond_timedwait(&iosvc->msg_ack_cv, &iosvc->msg_ack_lock, 1815 &timeout); 1816 (void *) pthread_mutex_unlock(&iosvc->msg_ack_lock); 1817 fmd_hdl_debug(fmd_hdl, "info: msg_ack_cv returns with rc %d\n", rc); 1818 1819 /* 1820 * check to see if ack_ok is non-zero 1821 * if non-zero, resp msg has been received 1822 */ 1823 if (iosvc->ack_ok != 0) { 1824 /* 1825 * ACK came ok, this send is successful, 1826 * tell the caller ready to send next. 1827 * free mem alloc-ed in 1828 * etm_pack_ds_msg 1829 */ 1830 if (ckpt_remove == B_TRUE && 1831 etm_ldom_type == LDOM_TYPE_CONTROL) { 1832 etm_ckpt_remove(fmd_hdl, msg_ele); 1833 } 1834 fmd_hdl_free(fmd_hdl, msg_ele->msg, msg_ele->msg_size); 1835 iosvc->cur_send_xid++; 1836 return (1); 1837 } else { 1838 /* 1839 * the ACK did not come on time 1840 * tell the caller to resend cur_send_xid 1841 */ 1842 return (-EAGAIN); 1843 } /* iosvc->ack_ok != 0 */ 1844 } /* etm_send_ds_msg() */ 1845 1846 /* 1847 * both events from fmdo_send entry point and from SP are using the 1848 * etm_proto_v1_ev_hdr_t as its header and it will be the same header for all 1849 * ds send/recv msgs. 1850 * Idealy, we should use the hdr coming with the SP FMA event. Since fmdo_send 1851 * entry point can be called before FMA events from SP, we can't rely on 1852 * the SP FMA event hdr. Use the static hdr for packing ds msgs for fmdo_send 1853 * events. 1854 * return >0 for success, or -errno value 1855 * Design assumption: there is one FMA event per ds msg 1856 */ 1857 int 1858 etm_pack_ds_msg(fmd_hdl_t *fmd_hdl, etm_iosvc_t *iosvc, 1859 etm_proto_v1_ev_hdr_t *ev_hdrp, size_t hdr_sz, nvlist_t *evp, 1860 etm_pack_msg_type_t msg_type, uint_t ckpt_opt) 1861 { 1862 etm_proto_v1_ev_hdr_t *hdrp; /* for FMA_EVENT msg */ 1863 uint32_t *lenp; /* ptr to FMA event length */ 1864 size_t evsz; /* packed FMA event size */ 1865 char *buf; 1866 uint32_t rc; /* for return code */ 1867 char *msg; /* body of msg to be Qed */ 1868 1869 etm_iosvc_q_ele_t msg_ele; /* io svc msg Q ele */ 1870 etm_proto_v1_ev_hdr_t *evhdrp; 1871 1872 1873 if (ev_hdrp == NULL) { 1874 hdrp = &iosvc_hdr; 1875 } else { 1876 hdrp = ev_hdrp; 1877 } 1878 1879 /* 1880 * determine hdr_sz if 0, otherwise use the one passed in hdr_sz 1881 */ 1882 1883 if (hdr_sz == 0) { 1884 hdr_sz = sizeof (*hdrp) + (1 * sizeof (hdrp->ev_lens[0])); 1885 } 1886 1887 /* 1888 * determine evp size 1889 */ 1890 (void) nvlist_size(evp, &evsz, NV_ENCODE_XDR); 1891 1892 /* indicate 1 FMA event, no network encoding, and 0-terminate */ 1893 lenp = &hdrp->ev_lens[0]; 1894 *lenp = evsz; 1895 1896 /* 1897 * now the total of mem needs to be alloc-ed/ds msg size is 1898 * hdr_sz + evsz 1899 * msg will be freed in etm_send_to_remote_root() after ds_send_msg() 1900 */ 1901 msg = fmd_hdl_zalloc(fmd_hdl, hdr_sz + evsz, FMD_SLEEP); 1902 1903 1904 /* 1905 * copy hdr, 0 terminate the length vector, and then evp 1906 */ 1907 (void) memcpy(msg, hdrp, sizeof (*hdrp)); 1908 hdrp = (etm_proto_v1_ev_hdr_t *)((ptrdiff_t)msg); 1909 lenp = &hdrp->ev_lens[0]; 1910 lenp++; 1911 *lenp = 0; 1912 1913 buf = fmd_hdl_zalloc(fmd_hdl, evsz, FMD_SLEEP); 1914 (void) nvlist_pack(evp, (char **)&buf, &evsz, NV_ENCODE_XDR, 0); 1915 (void) memcpy(msg + hdr_sz, buf, evsz); 1916 fmd_hdl_free(fmd_hdl, buf, evsz); 1917 1918 fmd_hdl_debug(fmd_hdl, "info: hdr_sz= %d evsz= %d in etm_pack_ds_msg" 1919 "for ldom %s\n", hdr_sz, evsz, iosvc->ldom_name); 1920 msg_ele.msg = msg; 1921 msg_ele.msg_size = hdr_sz + evsz; 1922 msg_ele.ckpt_flag = ckpt_opt; 1923 1924 /* 1925 * decide what to do with the msg: 1926 * if SP ereports (msg_type == SP_MSG), always enq the msg 1927 * if not SP ereports, ie, fmd xprt control msgs, enq it _only_ after 1928 * resource.fm.xprt.run has been sent (which sets start_sending_Q to 1) 1929 */ 1930 if ((msg_type == SP_MSG) || 1931 (msg_type != SP_MSG) && (iosvc->start_sending_Q == 1)) { 1932 /* 1933 * this is the case when the msg needs to be enq-ed 1934 */ 1935 (void) pthread_mutex_lock(&iosvc->msg_q_lock); 1936 rc = etm_iosvc_msg_enq(fmd_hdl, iosvc, &msg_ele); 1937 if ((rc > 0) && (ckpt_opt & ETM_CKPT_SAVE) && 1938 (etm_ldom_type == LDOM_TYPE_CONTROL)) { 1939 (void) etm_ckpt_add(fmd_hdl, evp); 1940 } 1941 if (iosvc->msg_q_cur_len == 1) 1942 (void) pthread_cond_signal(&iosvc->msg_q_cv); 1943 (void) pthread_mutex_unlock(&iosvc->msg_q_lock); 1944 } else { 1945 /* 1946 * fmd RDWR xprt procotol startup msgs, send it now! 1947 */ 1948 iosvc->ack_ok = 0; 1949 evhdrp = (etm_proto_v1_ev_hdr_t *)((ptrdiff_t)msg_ele.msg); 1950 evhdrp->ev_pp.pp_xid = iosvc->cur_send_xid + 1; 1951 while (!iosvc->ack_ok && iosvc->ds_hdl != DS_INVALID_HDL && 1952 !etm_is_dying) { 1953 if (etm_send_ds_msg(fmd_hdl, B_FALSE, iosvc, &msg_ele, 1954 evhdrp) < 0) { 1955 continue; 1956 } 1957 } 1958 if (msg_type == FMD_XPRT_RUN_MSG) 1959 iosvc->start_sending_Q = 1; 1960 } 1961 1962 return (rc); 1963 1964 } /* etm_pack_ds_msg() */ 1965 1966 /* 1967 * Design_Note: For all etm_resp_q_*() functions and etm_resp_q_* globals, 1968 * the mutex etm_resp_q_lock must be held by the caller. 1969 */ 1970 1971 /* 1972 * etm_resp_q_enq - add element to tail of ETM responder queue 1973 * etm_resp_q_deq - del element from head of ETM responder queue 1974 * 1975 * return >0 for success, or -errno value 1976 */ 1977 1978 static int 1979 etm_resp_q_enq(fmd_hdl_t *hdl, etm_resp_q_ele_t *rqep) 1980 { 1981 etm_resp_q_ele_t *newp; /* ptr to new resp q ele */ 1982 1983 if (etm_resp_q_cur_len >= etm_resp_q_max_len) { 1984 fmd_hdl_debug(hdl, "warning: enq to full responder queue\n"); 1985 etm_stats.etm_enq_drop_resp_q.fmds_value.ui64++; 1986 return (-E2BIG); 1987 } 1988 1989 newp = fmd_hdl_zalloc(hdl, sizeof (*newp), FMD_SLEEP); 1990 (void) memcpy(newp, rqep, sizeof (*newp)); 1991 newp->rqe_nextp = NULL; 1992 1993 if (etm_resp_q_cur_len == 0) { 1994 etm_resp_q_head = newp; 1995 } else { 1996 etm_resp_q_tail->rqe_nextp = newp; 1997 } 1998 etm_resp_q_tail = newp; 1999 etm_resp_q_cur_len++; 2000 etm_stats.etm_resp_q_cur_len.fmds_value.ui64 = etm_resp_q_cur_len; 2001 2002 return (1); 2003 2004 } /* etm_resp_q_enq() */ 2005 2006 static int 2007 etm_resp_q_deq(fmd_hdl_t *hdl, etm_resp_q_ele_t *rqep) 2008 { 2009 etm_resp_q_ele_t *oldp; /* ptr to old resp q ele */ 2010 2011 if (etm_resp_q_cur_len == 0) { 2012 fmd_hdl_debug(hdl, "warning: deq from empty responder queue\n"); 2013 etm_stats.etm_deq_drop_resp_q.fmds_value.ui64++; 2014 return (-ENOENT); 2015 } 2016 2017 (void) memcpy(rqep, etm_resp_q_head, sizeof (*rqep)); 2018 rqep->rqe_nextp = NULL; 2019 2020 oldp = etm_resp_q_head; 2021 etm_resp_q_head = etm_resp_q_head->rqe_nextp; 2022 fmd_hdl_free(hdl, oldp, sizeof (*oldp)); 2023 2024 etm_resp_q_cur_len--; 2025 etm_stats.etm_resp_q_cur_len.fmds_value.ui64 = etm_resp_q_cur_len; 2026 if (etm_resp_q_cur_len == 0) { 2027 etm_resp_q_tail = NULL; 2028 } 2029 2030 return (1); 2031 2032 } /* etm_resp_q_deq() */ 2033 2034 /* 2035 * etm_maybe_enq_response - check the given message header to see 2036 * whether a response has been requested, 2037 * if so then enqueue the given connection 2038 * and header for later transport by the 2039 * responder thread as an ETM response msg, 2040 * return 0 for nop, >0 success, or -errno value 2041 */ 2042 2043 static ssize_t 2044 etm_maybe_enq_response(fmd_hdl_t *hdl, etm_xport_conn_t conn, 2045 void *hdrp, uint32_t hdr_sz, int32_t resp_code) 2046 { 2047 ssize_t rv; /* ret val */ 2048 etm_proto_v1_pp_t *ppp; /* protocol preamble ptr */ 2049 uint8_t orig_msg_type; /* orig hdr's message type */ 2050 uint32_t orig_timeout; /* orig hdr's timeout */ 2051 etm_resp_q_ele_t rqe; /* responder queue ele */ 2052 2053 ppp = hdrp; 2054 orig_msg_type = ppp->pp_msg_type; 2055 orig_timeout = ppp->pp_timeout; 2056 2057 /* bail out now if no response is to be sent */ 2058 2059 if (orig_timeout == ETM_PROTO_V1_TIMEOUT_NONE) { 2060 return (0); 2061 } /* if a nop */ 2062 2063 if ((orig_msg_type != ETM_MSG_TYPE_FMA_EVENT) && 2064 (orig_msg_type != ETM_MSG_TYPE_ALERT) && 2065 (orig_msg_type != ETM_MSG_TYPE_CONTROL)) { 2066 fmd_hdl_debug(hdl, "warning: bad msg type 0x%x\n", 2067 orig_msg_type); 2068 return (-EINVAL); 2069 } /* if inappropriate hdr for a response msg */ 2070 2071 /* 2072 * enqueue the msg hdr and nudge the responder thread 2073 * if the responder queue was previously empty 2074 */ 2075 2076 rqe.rqe_conn = conn; 2077 rqe.rqe_hdrp = hdrp; 2078 rqe.rqe_hdr_sz = hdr_sz; 2079 rqe.rqe_resp_code = resp_code; 2080 2081 (void) pthread_mutex_lock(&etm_resp_q_lock); 2082 rv = etm_resp_q_enq(hdl, &rqe); 2083 if (etm_resp_q_cur_len == 1) 2084 (void) pthread_cond_signal(&etm_resp_q_cv); 2085 (void) pthread_mutex_unlock(&etm_resp_q_lock); 2086 2087 return (rv); 2088 2089 } /* etm_maybe_enq_response() */ 2090 2091 /* 2092 * Design_Note: We rely on the fact that all message types have 2093 * a common protocol preamble; if this fact should 2094 * ever change it may break the code below. We also 2095 * rely on the fact that FMA_EVENT and CONTROL headers 2096 * returned by etm_hdr_read() will be sized large enough 2097 * to reuse them as RESPONSE headers if the remote endpt 2098 * asked for a response via the pp_timeout field. 2099 */ 2100 2101 /* 2102 * etm_send_response - use the given message header and response code 2103 * to construct an appropriate response message, 2104 * and send it back on the given connection, 2105 * return >0 for success, or -errno value 2106 */ 2107 2108 static ssize_t 2109 etm_send_response(fmd_hdl_t *hdl, etm_xport_conn_t conn, 2110 void *hdrp, int32_t resp_code) 2111 { 2112 ssize_t rv; /* ret val */ 2113 etm_proto_v1_pp_t *ppp; /* protocol preamble ptr */ 2114 etm_proto_v1_resp_hdr_t *resp_hdrp; /* for RESPONSE msg */ 2115 uint8_t resp_body[4]; /* response body if needed */ 2116 uint8_t *resp_msg; /* response hdr+body */ 2117 size_t hdr_sz; /* sizeof response hdr */ 2118 uint8_t orig_msg_type; /* orig hdr's message type */ 2119 2120 ppp = hdrp; 2121 orig_msg_type = ppp->pp_msg_type; 2122 2123 if (etm_debug_lvl >= 2) { 2124 etm_show_time(hdl, "ante resp send"); 2125 } 2126 2127 /* reuse the given header as a response header */ 2128 2129 resp_hdrp = hdrp; 2130 resp_hdrp->resp_code = resp_code; 2131 resp_hdrp->resp_len = 0; /* default is empty body */ 2132 2133 if ((orig_msg_type == ETM_MSG_TYPE_CONTROL) && 2134 (ppp->pp_sub_type == ETM_CTL_SEL_VER_NEGOT_REQ)) { 2135 resp_body[0] = ETM_PROTO_V2; 2136 resp_body[1] = ETM_PROTO_V3; 2137 resp_body[2] = 0; 2138 resp_hdrp->resp_len = 3; 2139 } /* if should send our/negotiated proto ver in resp body */ 2140 2141 /* respond with the proto ver that was negotiated */ 2142 2143 resp_hdrp->resp_pp.pp_proto_ver = etm_resp_ver; 2144 resp_hdrp->resp_pp.pp_msg_type = ETM_MSG_TYPE_RESPONSE; 2145 resp_hdrp->resp_pp.pp_timeout = ETM_PROTO_V1_TIMEOUT_NONE; 2146 2147 /* 2148 * send the whole response msg in one write, header and body; 2149 * avoid the alloc-and-copy if we can reuse the hdr as the msg, 2150 * ie, if the body is empty. update the response stats. 2151 */ 2152 2153 hdr_sz = sizeof (etm_proto_v1_resp_hdr_t); 2154 2155 resp_msg = hdrp; 2156 if (resp_hdrp->resp_len > 0) { 2157 resp_msg = fmd_hdl_zalloc(hdl, hdr_sz + resp_hdrp->resp_len, 2158 FMD_SLEEP); 2159 (void) memcpy(resp_msg, resp_hdrp, hdr_sz); 2160 (void) memcpy(resp_msg + hdr_sz, resp_body, 2161 resp_hdrp->resp_len); 2162 } 2163 2164 (void) pthread_mutex_lock(&etm_write_lock); 2165 rv = etm_io_op(hdl, "bad io write on resp msg", conn, 2166 resp_msg, hdr_sz + resp_hdrp->resp_len, ETM_IO_OP_WR); 2167 (void) pthread_mutex_unlock(&etm_write_lock); 2168 if (rv < 0) { 2169 goto func_ret; 2170 } 2171 2172 etm_stats.etm_wr_hdr_response.fmds_value.ui64++; 2173 etm_stats.etm_wr_body_response.fmds_value.ui64++; 2174 2175 fmd_hdl_debug(hdl, "info: sent V%u RESPONSE msg to xport " 2176 "xid 0x%x code %d len %u\n", 2177 (unsigned int)resp_hdrp->resp_pp.pp_proto_ver, 2178 resp_hdrp->resp_pp.pp_xid, resp_hdrp->resp_code, 2179 resp_hdrp->resp_len); 2180 func_ret: 2181 2182 if (resp_hdrp->resp_len > 0) { 2183 fmd_hdl_free(hdl, resp_msg, hdr_sz + resp_hdrp->resp_len); 2184 } 2185 if (etm_debug_lvl >= 2) { 2186 etm_show_time(hdl, "post resp send"); 2187 } 2188 return (rv); 2189 2190 } /* etm_send_response() */ 2191 2192 /* 2193 * etm_reset_xport - reset the transport layer (via fini;init) 2194 * presumably for an error condition we cannot 2195 * otherwise recover from (ex: hung LDC channel) 2196 * 2197 * caveats - no checking/locking is done to ensure an existing connection 2198 * is idle during an xport reset; we don't want to deadlock 2199 * and presumably the transport is stuck/unusable anyway 2200 */ 2201 2202 static void 2203 etm_reset_xport(fmd_hdl_t *hdl) 2204 { 2205 (void) etm_xport_fini(hdl); 2206 (void) etm_xport_init(hdl); 2207 etm_stats.etm_reset_xport.fmds_value.ui64++; 2208 2209 } /* etm_reset_xport() */ 2210 2211 /* 2212 * etm_handle_new_conn - receive an ETM message sent from the other end via 2213 * the given open connection, pull out any FMA events 2214 * and post them to the local FMD (or handle any ETM 2215 * control or response msg); when done, close the 2216 * connection 2217 */ 2218 2219 static void 2220 etm_handle_new_conn(fmd_hdl_t *hdl, etm_xport_conn_t conn) 2221 { 2222 etm_proto_v1_ev_hdr_t *ev_hdrp; /* for FMA_EVENT msg */ 2223 etm_proto_v1_ctl_hdr_t *ctl_hdrp; /* for CONTROL msg */ 2224 etm_proto_v1_resp_hdr_t *resp_hdrp; /* for RESPONSE msg */ 2225 etm_proto_v3_sa_hdr_t *sa_hdrp; /* for ALERT msg */ 2226 etm_iosvc_t *iosvc; /* iosvc data structure */ 2227 int32_t resp_code; /* response code */ 2228 ssize_t enq_rv; /* resp_q enqueue status */ 2229 size_t hdr_sz; /* sizeof header */ 2230 size_t evsz; /* FMA event size */ 2231 uint8_t *body_buf; /* msg body buffer */ 2232 uint32_t body_sz; /* sizeof body_buf */ 2233 uint32_t ev_cnt; /* count of FMA events */ 2234 uint8_t *bp; /* byte ptr within body_buf */ 2235 nvlist_t *evp; /* ptr to unpacked FMA event */ 2236 char *class; /* FMA event class */ 2237 ssize_t i, n; /* gen use */ 2238 int should_reset_xport; /* bool to reset xport */ 2239 char ldom_name[MAX_LDOM_NAME]; /* ldom name */ 2240 int rc; /* return code */ 2241 uint64_t did; /* domain id */ 2242 2243 2244 if (etm_debug_lvl >= 2) { 2245 etm_show_time(hdl, "ante conn handle"); 2246 } 2247 fmd_hdl_debug(hdl, "info: handling new conn %p\n", conn); 2248 2249 should_reset_xport = 0; 2250 ev_hdrp = NULL; 2251 ctl_hdrp = NULL; 2252 resp_hdrp = NULL; 2253 sa_hdrp = NULL; 2254 body_buf = NULL; 2255 class = NULL; 2256 evp = NULL; 2257 resp_code = 0; /* default is success */ 2258 enq_rv = 0; /* default is nop, ie, did not enqueue */ 2259 2260 /* read a network decoded message header from the connection */ 2261 2262 if ((ev_hdrp = etm_hdr_read(hdl, conn, &hdr_sz)) == NULL) { 2263 /* errno assumed set by above call */ 2264 should_reset_xport = (errno == ENOTACTIVE); 2265 fmd_hdl_debug(hdl, "error: FMA event dropped: " 2266 "bad hdr read errno %d\n", errno); 2267 etm_stats.etm_rd_drop_fmaevent.fmds_value.ui64++; 2268 goto func_ret; 2269 } 2270 2271 /* 2272 * handle the message based on its preamble pp_msg_type 2273 * which is known to be valid from etm_hdr_read() checks 2274 */ 2275 2276 if (ev_hdrp->ev_pp.pp_msg_type == ETM_MSG_TYPE_FMA_EVENT) { 2277 2278 fmd_hdl_debug(hdl, "info: rcvd FMA_EVENT msg from xport\n"); 2279 2280 /* allocate buf large enough for whole body / all FMA events */ 2281 2282 body_sz = 0; 2283 for (i = 0; ev_hdrp->ev_lens[i] != 0; i++) { 2284 body_sz += ev_hdrp->ev_lens[i]; 2285 } /* for summing sizes of all FMA events */ 2286 if (i > etm_stats.etm_rd_max_ev_per_msg.fmds_value.ui64) 2287 etm_stats.etm_rd_max_ev_per_msg.fmds_value.ui64 = i; 2288 ev_cnt = i; 2289 2290 if (etm_debug_lvl >= 1) { 2291 fmd_hdl_debug(hdl, "info: event lengths %u sum %u\n", 2292 ev_cnt, body_sz); 2293 } 2294 2295 body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP); 2296 2297 /* read all the FMA events at once */ 2298 2299 if ((n = etm_io_op(hdl, "FMA event dropped: " 2300 "bad io read on event bodies", conn, body_buf, body_sz, 2301 ETM_IO_OP_RD)) < 0) { 2302 should_reset_xport = (n == -ENOTACTIVE); 2303 etm_stats.etm_rd_drop_fmaevent.fmds_value.ui64++; 2304 goto func_ret; 2305 } 2306 2307 etm_stats.etm_rd_xport_bytes.fmds_value.ui64 += body_sz; 2308 etm_stats.etm_rd_body_fmaevent.fmds_value.ui64 += ev_cnt; 2309 2310 /* 2311 * now that we've read the entire ETM msg from the conn, 2312 * which avoids later ETM protocol framing errors if we didn't, 2313 * check for dup msg/xid against last good FMD posting, 2314 * if a dup then resend response but skip repost to FMD 2315 */ 2316 2317 if (ev_hdrp->ev_pp.pp_xid == etm_xid_posted_logged_ev) { 2318 enq_rv = etm_maybe_enq_response(hdl, conn, 2319 ev_hdrp, hdr_sz, 0); 2320 fmd_hdl_debug(hdl, "info: skipping dup FMA event post " 2321 "xid 0x%x\n", etm_xid_posted_logged_ev); 2322 etm_stats.etm_rd_dup_fmaevent.fmds_value.ui64++; 2323 goto func_ret; 2324 } 2325 2326 /* unpack each FMA event and post it to FMD */ 2327 2328 bp = body_buf; 2329 for (i = 0; i < ev_cnt; i++) { 2330 if ((n = nvlist_unpack((char *)bp, 2331 ev_hdrp->ev_lens[i], &evp, 0)) != 0) { 2332 resp_code = (-n); 2333 enq_rv = etm_maybe_enq_response(hdl, conn, 2334 ev_hdrp, hdr_sz, resp_code); 2335 fmd_hdl_error(hdl, "error: FMA event dropped: " 2336 "bad event body unpack errno %d\n", n); 2337 if (etm_debug_lvl >= 2) { 2338 fmd_hdl_debug(hdl, "info: FMA event " 2339 "hexdump %d bytes:\n", 2340 ev_hdrp->ev_lens[i]); 2341 etm_hexdump(hdl, bp, 2342 ev_hdrp->ev_lens[i]); 2343 } 2344 etm_stats.etm_os_nvlist_unpack_fail.fmds_value. 2345 ui64++; 2346 etm_stats.etm_rd_drop_fmaevent.fmds_value. 2347 ui64++; 2348 bp += ev_hdrp->ev_lens[i]; 2349 continue; 2350 } 2351 2352 if (etm_debug_lvl >= 1) { 2353 (void) nvlist_lookup_string(evp, FM_CLASS, 2354 &class); 2355 if (class == NULL) { 2356 class = "NULL"; 2357 } 2358 fmd_hdl_debug(hdl, "info: FMA event %p " 2359 "class %s\n", evp, class); 2360 } 2361 2362 rc = nvlist_size(evp, &evsz, NV_ENCODE_XDR); 2363 fmd_hdl_debug(hdl, 2364 "info: evp size before pack ds msg %d\n", evsz); 2365 ldom_name[0] = '\0'; 2366 rc = etm_filter_find_ldom_id(hdl, evp, ldom_name, 2367 MAX_LDOM_NAME, &did); 2368 2369 /* 2370 * if rc is zero and the ldom_name is not "primary", 2371 * the evp belongs to a root domain, put the evp in an 2372 * outgoing etm queue, 2373 * in all other cases, whether ldom_name is primary or 2374 * can't find a ldom name, call etm_post_to_fmd 2375 */ 2376 if ((rc == 0) && strcmp(ldom_name, "primary") && 2377 strcmp(ldom_name, "")) { 2378 /* 2379 * use the ldom_name, guaranteered at this point 2380 * to be a valid ldom name/non-NULL, to find the 2381 * iosvc data. 2382 * add an iosvc struct if can not find one 2383 */ 2384 (void) pthread_mutex_unlock(&iosvc_list_lock); 2385 iosvc = etm_iosvc_lookup(hdl, ldom_name, 2386 DS_INVALID_HDL, B_TRUE); 2387 (void) pthread_mutex_unlock(&iosvc_list_lock); 2388 if (iosvc == NULL) { 2389 fmd_hdl_debug(hdl, 2390 "error: can't find iosvc for ldom " 2391 "name %s\n", ldom_name); 2392 } else { 2393 resp_code = 0; 2394 (void) etm_pack_ds_msg(hdl, iosvc, 2395 ev_hdrp, hdr_sz, evp, 2396 SP_MSG, ETM_CKPT_SAVE); 2397 /* 2398 * call the new fmd_xprt_log() 2399 */ 2400 fmd_xprt_log(hdl, etm_fmd_xprt, evp, 0); 2401 etm_xid_posted_logged_ev = 2402 ev_hdrp->ev_pp.pp_xid; 2403 } 2404 } else { 2405 /* 2406 * post the fma event to the control fmd 2407 */ 2408 resp_code = etm_post_to_fmd(hdl, etm_fmd_xprt, 2409 evp); 2410 if (resp_code >= 0) { 2411 etm_xid_posted_logged_ev = 2412 ev_hdrp->ev_pp.pp_xid; 2413 } 2414 } 2415 2416 evp = NULL; 2417 enq_rv = etm_maybe_enq_response(hdl, conn, 2418 ev_hdrp, hdr_sz, resp_code); 2419 bp += ev_hdrp->ev_lens[i]; 2420 } /* foreach FMA event in the body buffer */ 2421 2422 } else if (ev_hdrp->ev_pp.pp_msg_type == ETM_MSG_TYPE_CONTROL) { 2423 2424 ctl_hdrp = (void*)ev_hdrp; 2425 2426 fmd_hdl_debug(hdl, "info: rcvd CONTROL msg from xport\n"); 2427 if (etm_debug_lvl >= 1) { 2428 fmd_hdl_debug(hdl, "info: ctl sel %d xid 0x%x\n", 2429 (int)ctl_hdrp->ctl_pp.pp_sub_type, 2430 ctl_hdrp->ctl_pp.pp_xid); 2431 } 2432 2433 /* 2434 * if we have a VER_NEGOT_REQ read the body and validate 2435 * the protocol version set contained therein, 2436 * otherwise we have a PING_REQ (which has no body) 2437 * and we [also] fall thru to the code which sends a 2438 * response msg if the pp_timeout field requested one 2439 */ 2440 2441 if (ctl_hdrp->ctl_pp.pp_sub_type == ETM_CTL_SEL_VER_NEGOT_REQ) { 2442 2443 body_sz = ctl_hdrp->ctl_len; 2444 body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP); 2445 2446 if ((n = etm_io_op(hdl, "bad io read on ctl body", 2447 conn, body_buf, body_sz, ETM_IO_OP_RD)) < 0) { 2448 should_reset_xport = (n == -ENOTACTIVE); 2449 goto func_ret; 2450 } 2451 2452 /* complain if version set completely incompatible */ 2453 2454 for (i = 0; i < body_sz; i++) { 2455 if ((body_buf[i] == ETM_PROTO_V1) || 2456 (body_buf[i] == ETM_PROTO_V2) || 2457 (body_buf[i] == ETM_PROTO_V3)) { 2458 break; 2459 } 2460 } 2461 if (i >= body_sz) { 2462 etm_stats.etm_ver_bad.fmds_value.ui64++; 2463 resp_code = (-EPROTO); 2464 } 2465 2466 } /* if got version set request */ 2467 2468 etm_stats.etm_rd_body_control.fmds_value.ui64++; 2469 2470 enq_rv = etm_maybe_enq_response(hdl, conn, 2471 ctl_hdrp, hdr_sz, resp_code); 2472 2473 } else if (ev_hdrp->ev_pp.pp_msg_type == ETM_MSG_TYPE_RESPONSE) { 2474 2475 resp_hdrp = (void*)ev_hdrp; 2476 2477 fmd_hdl_debug(hdl, "info: rcvd RESPONSE msg from xport\n"); 2478 if (etm_debug_lvl >= 1) { 2479 fmd_hdl_debug(hdl, "info: resp xid 0x%x\n", 2480 (int)resp_hdrp->resp_pp.pp_xid); 2481 } 2482 2483 body_sz = resp_hdrp->resp_len; 2484 body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP); 2485 2486 if ((n = etm_io_op(hdl, "bad io read on resp len", 2487 conn, body_buf, body_sz, ETM_IO_OP_RD)) < 0) { 2488 should_reset_xport = (n == -ENOTACTIVE); 2489 goto func_ret; 2490 } 2491 2492 etm_stats.etm_rd_body_response.fmds_value.ui64++; 2493 2494 /* 2495 * look up the xid to interpret the response body 2496 * 2497 * ping is a nop; for ver negot confirm that a supported 2498 * protocol version was negotiated and remember which one 2499 */ 2500 2501 if ((resp_hdrp->resp_pp.pp_xid != etm_xid_ping) && 2502 (resp_hdrp->resp_pp.pp_xid != etm_xid_ver_negot)) { 2503 etm_stats.etm_xid_bad.fmds_value.ui64++; 2504 goto func_ret; 2505 } 2506 2507 if (resp_hdrp->resp_pp.pp_xid == etm_xid_ver_negot) { 2508 if ((body_buf[0] < ETM_PROTO_V1) || 2509 (body_buf[0] > ETM_PROTO_V3)) { 2510 etm_stats.etm_ver_bad.fmds_value.ui64++; 2511 goto func_ret; 2512 } 2513 etm_resp_ver = body_buf[0]; 2514 } /* if have resp to last req to negotiate proto ver */ 2515 2516 } else if (ev_hdrp->ev_pp.pp_msg_type == ETM_MSG_TYPE_ALERT) { 2517 2518 sa_hdrp = (void*)ev_hdrp; 2519 2520 fmd_hdl_debug(hdl, "info: rcvd ALERT msg from xport\n"); 2521 if (etm_debug_lvl >= 1) { 2522 fmd_hdl_debug(hdl, "info: sa sel %d xid 0x%x\n", 2523 (int)sa_hdrp->sa_pp.pp_sub_type, 2524 sa_hdrp->sa_pp.pp_xid); 2525 } 2526 2527 body_sz = sa_hdrp->sa_len; 2528 body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP); 2529 2530 if ((n = etm_io_op(hdl, "bad io read on sa body", 2531 conn, body_buf, body_sz, ETM_IO_OP_RD)) < 0) { 2532 should_reset_xport = (n == -ENOTACTIVE); 2533 goto func_ret; 2534 } 2535 2536 etm_stats.etm_rd_body_alert.fmds_value.ui64++; 2537 2538 /* 2539 * now that we've read the entire ETM msg from the conn, 2540 * which avoids later ETM protocol framing errors if we didn't, 2541 * check for dup msg/xid against last good syslog posting, 2542 * if a dup then resend response but skip repost to syslog 2543 */ 2544 2545 if (sa_hdrp->sa_pp.pp_xid == etm_xid_posted_sa) { 2546 enq_rv = etm_maybe_enq_response(hdl, conn, 2547 sa_hdrp, hdr_sz, 0); 2548 fmd_hdl_debug(hdl, "info: skipping dup ALERT post " 2549 "xid 0x%x\n", etm_xid_posted_sa); 2550 etm_stats.etm_rd_dup_alert.fmds_value.ui64++; 2551 goto func_ret; 2552 } 2553 2554 resp_code = etm_post_to_syslog(hdl, sa_hdrp->sa_priority, 2555 body_sz, body_buf); 2556 if (resp_code >= 0) { 2557 etm_xid_posted_sa = sa_hdrp->sa_pp.pp_xid; 2558 } 2559 enq_rv = etm_maybe_enq_response(hdl, conn, 2560 sa_hdrp, hdr_sz, resp_code); 2561 } /* whether we have a FMA_EVENT, CONTROL, RESPONSE or ALERT msg */ 2562 2563 func_ret: 2564 2565 if (etm_debug_lvl >= 2) { 2566 etm_show_time(hdl, "post conn handle"); 2567 } 2568 2569 /* 2570 * if no responder ele was enqueued, close the conn now 2571 * and free the ETM msg hdr; the ETM msg body is not needed 2572 * by the responder thread and should always be freed here 2573 */ 2574 2575 if (enq_rv <= 0) { 2576 (void) etm_conn_close(hdl, "bad conn close after msg recv", 2577 conn); 2578 if (ev_hdrp != NULL) { 2579 fmd_hdl_free(hdl, ev_hdrp, hdr_sz); 2580 } 2581 } 2582 if (body_buf != NULL) { 2583 fmd_hdl_free(hdl, body_buf, body_sz); 2584 } 2585 if (should_reset_xport) { 2586 etm_reset_xport(hdl); 2587 } 2588 } /* etm_handle_new_conn() */ 2589 2590 /* 2591 * etm_handle_bad_accept - recover from a failed connection acceptance 2592 */ 2593 2594 static void 2595 etm_handle_bad_accept(fmd_hdl_t *hdl, int nev) 2596 { 2597 int should_reset_xport; /* bool to reset xport */ 2598 2599 should_reset_xport = (nev == -ENOTACTIVE); 2600 fmd_hdl_debug(hdl, "error: bad conn accept errno %d\n", (-nev)); 2601 etm_stats.etm_xport_accept_fail.fmds_value.ui64++; 2602 (void) etm_sleep(etm_bad_acc_to_sec); /* avoid spinning CPU */ 2603 if (should_reset_xport) { 2604 etm_reset_xport(hdl); 2605 } 2606 } /* etm_handle_bad_accept() */ 2607 2608 /* 2609 * etm_server - loop forever accepting new connections 2610 * using the given FMD handle, 2611 * handling any ETM msgs sent from the other side 2612 * via each such connection 2613 */ 2614 2615 static void 2616 etm_server(void *arg) 2617 { 2618 etm_xport_conn_t conn; /* connection handle */ 2619 int nev; /* -errno val */ 2620 fmd_hdl_t *hdl; /* FMD handle */ 2621 2622 hdl = arg; 2623 2624 fmd_hdl_debug(hdl, "info: connection server starting\n"); 2625 2626 /* 2627 * Restore the checkpointed events and dispatch them before starting to 2628 * receive more events from the sp. 2629 */ 2630 etm_ckpt_recover(hdl); 2631 2632 while (!etm_is_dying) { 2633 2634 if ((conn = etm_xport_accept(hdl, NULL)) == NULL) { 2635 /* errno assumed set by above call */ 2636 nev = (-errno); 2637 if (etm_is_dying) { 2638 break; 2639 } 2640 etm_handle_bad_accept(hdl, nev); 2641 continue; 2642 } 2643 2644 /* handle the new message/connection, closing it when done */ 2645 2646 etm_handle_new_conn(hdl, conn); 2647 2648 } /* while accepting new connections until ETM dies */ 2649 2650 /* ETM is dying (probably due to "fmadm unload etm") */ 2651 2652 fmd_hdl_debug(hdl, "info: connection server is dying\n"); 2653 2654 } /* etm_server() */ 2655 2656 /* 2657 * etm_responder - loop forever waiting for new responder queue elements 2658 * to be enqueued, for each one constructing and sending 2659 * an ETM response msg to the other side, and closing its 2660 * associated connection when appropriate 2661 * 2662 * this thread exists to ensure that the etm_server() thread 2663 * never pends indefinitely waiting on the xport write lock, and is 2664 * hence always available to accept new connections and handle 2665 * incoming messages 2666 * 2667 * this design relies on the fact that each connection accepted and 2668 * returned by the ETM xport layer is unique, and each can be closed 2669 * independently of the others while multiple connections are 2670 * outstanding 2671 */ 2672 2673 static void 2674 etm_responder(void *arg) 2675 { 2676 ssize_t n; /* gen use */ 2677 fmd_hdl_t *hdl; /* FMD handle */ 2678 etm_resp_q_ele_t rqe; /* responder queue ele */ 2679 2680 hdl = arg; 2681 2682 fmd_hdl_debug(hdl, "info: responder server starting\n"); 2683 2684 while (!etm_is_dying) { 2685 2686 (void) pthread_mutex_lock(&etm_resp_q_lock); 2687 2688 while (etm_resp_q_cur_len == 0) { 2689 (void) pthread_cond_wait(&etm_resp_q_cv, 2690 &etm_resp_q_lock); 2691 if (etm_is_dying) { 2692 (void) pthread_mutex_unlock(&etm_resp_q_lock); 2693 goto func_ret; 2694 } 2695 } /* while the responder queue is empty, wait to be nudged */ 2696 2697 /* 2698 * for every responder ele that has been enqueued, 2699 * dequeue and send it as an ETM response msg, 2700 * closing its associated conn and freeing its hdr 2701 * 2702 * enter the queue draining loop holding the responder 2703 * queue lock, but do not hold the lock indefinitely 2704 * (the actual send may pend us indefinitely), 2705 * so that other threads will never pend for long 2706 * trying to enqueue a new element 2707 */ 2708 2709 while (etm_resp_q_cur_len > 0) { 2710 2711 (void) etm_resp_q_deq(hdl, &rqe); 2712 (void) pthread_mutex_unlock(&etm_resp_q_lock); 2713 2714 if ((n = etm_send_response(hdl, rqe.rqe_conn, 2715 rqe.rqe_hdrp, rqe.rqe_resp_code)) < 0) { 2716 fmd_hdl_error(hdl, "error: bad resp send " 2717 "errno %d\n", (-n)); 2718 } 2719 2720 (void) etm_conn_close(hdl, "bad conn close after resp", 2721 rqe.rqe_conn); 2722 fmd_hdl_free(hdl, rqe.rqe_hdrp, rqe.rqe_hdr_sz); 2723 2724 if (etm_is_dying) { 2725 goto func_ret; 2726 } 2727 (void) pthread_mutex_lock(&etm_resp_q_lock); 2728 2729 } /* while draining the responder queue */ 2730 2731 (void) pthread_mutex_unlock(&etm_resp_q_lock); 2732 2733 } /* while awaiting and sending resp msgs until ETM dies */ 2734 2735 func_ret: 2736 2737 /* ETM is dying (probably due to "fmadm unload etm") */ 2738 2739 fmd_hdl_debug(hdl, "info: responder server is dying\n"); 2740 2741 (void) pthread_mutex_lock(&etm_resp_q_lock); 2742 if (etm_resp_q_cur_len > 0) { 2743 fmd_hdl_error(hdl, "warning: %d response msgs dropped\n", 2744 (int)etm_resp_q_cur_len); 2745 while (etm_resp_q_cur_len > 0) { 2746 (void) etm_resp_q_deq(hdl, &rqe); 2747 (void) etm_conn_close(hdl, "bad conn close after deq", 2748 rqe.rqe_conn); 2749 fmd_hdl_free(hdl, rqe.rqe_hdrp, rqe.rqe_hdr_sz); 2750 } 2751 } 2752 (void) pthread_mutex_unlock(&etm_resp_q_lock); 2753 2754 } /* etm_responder() */ 2755 2756 static void * 2757 etm_init_alloc(size_t size) 2758 { 2759 return (fmd_hdl_alloc(init_hdl, size, FMD_SLEEP)); 2760 } 2761 2762 static void 2763 etm_init_free(void *addr, size_t size) 2764 { 2765 fmd_hdl_free(init_hdl, addr, size); 2766 } 2767 2768 /* 2769 * ---------------------root ldom support functions ----------------------- 2770 */ 2771 2772 /* 2773 * use a static array async_event_q instead of dynamicaly allocated mem queue 2774 * for etm_async_q_enq and etm_async_q_deq. 2775 * This is not running in an fmd aux thread, can't use the fmd_hdl_* funcs. 2776 * caller needs to grab the mutex lock before calling this func. 2777 * return >0 for success, or -errno value 2778 */ 2779 static int 2780 etm_async_q_enq(etm_async_event_ele_t *async_e) 2781 { 2782 2783 if (etm_async_q_cur_len >= etm_async_q_max_len) { 2784 /* etm_stats.etm_enq_drop_async_q.fmds_value.ui64++; */ 2785 return (-E2BIG); 2786 } 2787 2788 (void) memcpy(&async_event_q[etm_async_q_tail], async_e, 2789 sizeof (*async_e)); 2790 2791 etm_async_q_tail++; 2792 if (etm_async_q_tail == etm_async_q_max_len) { 2793 etm_async_q_tail = 0; 2794 } 2795 etm_async_q_cur_len++; 2796 2797 /* etm_stats.etm_async_q_cur_len.fmds_value.ui64 = etm_async_q_cur_len; */ 2798 2799 return (1); 2800 2801 } /* etm_async_q_enq() */ 2802 2803 2804 static int 2805 etm_async_q_deq(etm_async_event_ele_t *async_e) 2806 { 2807 2808 if (etm_async_q_cur_len == 0) { 2809 /* etm_stats.etm_deq_drop_async_q.fmds_value.ui64++; */ 2810 return (-ENOENT); 2811 } 2812 2813 (void) memcpy(async_e, &async_event_q[etm_async_q_head], 2814 sizeof (*async_e)); 2815 2816 etm_async_q_head++; 2817 if (etm_async_q_head == etm_async_q_max_len) { 2818 etm_async_q_head = 0; 2819 } 2820 etm_async_q_cur_len--; 2821 /* etm_stats.etm_async__q_cur_len.fmds_value.ui64 = etm_async_q_cur_len; */ 2822 2823 return (1); 2824 } /* etm_async_q_deq */ 2825 2826 2827 /* 2828 * ds userland interface ds_reg_cb callback func 2829 */ 2830 2831 /* ARGSUSED */ 2832 static void 2833 etm_iosvc_reg_handler(ds_hdl_t ds_hdl, ds_cb_arg_t arg, ds_ver_t *ver, 2834 ds_domain_hdl_t dhdl) 2835 { 2836 etm_async_event_ele_t async_ele; 2837 2838 2839 /* 2840 * do version check here. 2841 * checked the ver received here against etm_iosvc_vers here 2842 */ 2843 if (etm_iosvc_vers[0].major != ver->major || 2844 etm_iosvc_vers[0].minor != ver->minor) { 2845 /* 2846 * can't log an fmd debug msg, 2847 * not running in an fmd aux thread 2848 */ 2849 return; 2850 } 2851 2852 /* 2853 * the callback should have a valid ldom_name 2854 * can't log fmd debugging msg here since this is not in an fmd aux 2855 * thread. log fmd debug msg in etm_async_event_handle() 2856 */ 2857 async_ele.ds_hdl = ds_hdl; 2858 async_ele.dhdl = dhdl; 2859 async_ele.ldom_name[0] = '\0'; 2860 async_ele.event_type = ETM_ASYNC_EVENT_DS_REG_CB; 2861 (void) pthread_mutex_lock(&etm_async_event_q_lock); 2862 (void) etm_async_q_enq(&async_ele); 2863 if (etm_async_q_cur_len == 1) 2864 (void) pthread_cond_signal(&etm_async_event_q_cv); 2865 (void) pthread_mutex_unlock(&etm_async_event_q_lock); 2866 2867 } /* etm_iosvc_reg_handler */ 2868 2869 2870 /* 2871 * ds userland interface ds_unreg_cb callback func 2872 */ 2873 2874 /*ARGSUSED*/ 2875 static void 2876 etm_iosvc_unreg_handler(ds_hdl_t hdl, ds_cb_arg_t arg) 2877 { 2878 etm_async_event_ele_t async_ele; 2879 2880 /* 2881 * fill in async_ele and enqueue async_ele 2882 */ 2883 async_ele.ldom_name[0] = '\0'; 2884 async_ele.ds_hdl = hdl; 2885 async_ele.event_type = ETM_ASYNC_EVENT_DS_UNREG_CB; 2886 (void) pthread_mutex_lock(&etm_async_event_q_lock); 2887 (void) etm_async_q_enq(&async_ele); 2888 if (etm_async_q_cur_len == 1) 2889 (void) pthread_cond_signal(&etm_async_event_q_cv); 2890 (void) pthread_mutex_unlock(&etm_async_event_q_lock); 2891 } /* etm_iosvc_unreg_handler */ 2892 2893 /* 2894 * ldom event registration callback func 2895 */ 2896 2897 /* ARGSUSED */ 2898 static void 2899 ldom_event_handler(char *ldom_name, ldom_event_t event, ldom_cb_arg_t data) 2900 { 2901 etm_async_event_ele_t async_ele; 2902 2903 /* 2904 * the callback will have a valid ldom_name 2905 */ 2906 async_ele.ldom_name[0] = '\0'; 2907 if (ldom_name) 2908 (void) strcpy(async_ele.ldom_name, ldom_name); 2909 async_ele.ds_hdl = DS_INVALID_HDL; 2910 2911 /* 2912 * fill in async_ele and enq async_ele 2913 */ 2914 switch (event) { 2915 case LDOM_EVENT_BIND: 2916 async_ele.event_type = ETM_ASYNC_EVENT_LDOM_BIND; 2917 break; 2918 case LDOM_EVENT_UNBIND: 2919 async_ele.event_type = ETM_ASYNC_EVENT_LDOM_UNBIND; 2920 break; 2921 case LDOM_EVENT_ADD: 2922 async_ele.event_type = ETM_ASYNC_EVENT_LDOM_ADD; 2923 break; 2924 case LDOM_EVENT_REMOVE: 2925 async_ele.event_type = ETM_ASYNC_EVENT_LDOM_REMOVE; 2926 break; 2927 default: 2928 /* 2929 * for all other ldom events, do nothing 2930 */ 2931 return; 2932 } /* switch (event) */ 2933 2934 (void) pthread_mutex_lock(&etm_async_event_q_lock); 2935 (void) etm_async_q_enq(&async_ele); 2936 if (etm_async_q_cur_len == 1) 2937 (void) pthread_cond_signal(&etm_async_event_q_cv); 2938 (void) pthread_mutex_unlock(&etm_async_event_q_lock); 2939 2940 } /* ldom_event_handler */ 2941 2942 2943 /* 2944 * This is running as an fmd aux thread. 2945 * This is the func that actually handle the events, which include: 2946 * 1. ldom events. ldom events are on Control Domain only 2947 * 2. any DS userland callback funcs 2948 * these events are already Q-ed in the async_event_ele_q 2949 * deQ and process the events accordingly 2950 */ 2951 static void 2952 etm_async_event_handler(void *arg) 2953 { 2954 2955 fmd_hdl_t *fmd_hdl = (fmd_hdl_t *)arg; 2956 etm_iosvc_t *iosvc; /* ptr 2 iosvc struct */ 2957 etm_async_event_ele_t async_e; 2958 2959 fmd_hdl_debug(fmd_hdl, "info: etm_async_event_handler starting\n"); 2960 /* 2961 * handle etm is not dying and Q len > 0 2962 */ 2963 while (!etm_is_dying) { 2964 /* 2965 * grab the lock to check the Q len 2966 */ 2967 (void) pthread_mutex_lock(&etm_async_event_q_lock); 2968 fmd_hdl_debug(fmd_hdl, "info: etm_async_q_cur_len %d\n", 2969 etm_async_q_cur_len); 2970 2971 while (etm_async_q_cur_len > 0) { 2972 (void) etm_async_q_deq(&async_e); 2973 (void) pthread_mutex_unlock(&etm_async_event_q_lock); 2974 fmd_hdl_debug(fmd_hdl, 2975 "info: processing an async event type %d ds_hdl" 2976 " %d\n", async_e.event_type, async_e.ds_hdl); 2977 if (async_e.ldom_name[0] != '\0') { 2978 fmd_hdl_debug(fmd_hdl, 2979 "info: procssing async evt ldom_name %s\n", 2980 async_e.ldom_name); 2981 } 2982 2983 /* 2984 * at this point, if async_e.ldom_name is not NULL, 2985 * we have a valid iosvc strcut ptr. 2986 * the only time async_e.ldom_name is NULL is at 2987 * ds_unreg_cb() 2988 */ 2989 switch (async_e.event_type) { 2990 case ETM_ASYNC_EVENT_LDOM_UNBIND: 2991 case ETM_ASYNC_EVENT_LDOM_REMOVE: 2992 /* 2993 * we have a valid ldom_name, 2994 * etm_lookup_struct(ldom_name) 2995 * do nothing if can't find an iosvc 2996 * no iosvc clean up to do 2997 */ 2998 (void) pthread_mutex_lock( 2999 &iosvc_list_lock); 3000 iosvc = etm_iosvc_lookup(fmd_hdl, 3001 async_e.ldom_name, 3002 async_e.ds_hdl, B_FALSE); 3003 if (iosvc == NULL) { 3004 fmd_hdl_debug(fmd_hdl, 3005 "error: can't find iosvc for ldom " 3006 "name %s\n", 3007 async_e.ldom_name); 3008 (void) pthread_mutex_unlock( 3009 &iosvc_list_lock); 3010 break; 3011 } 3012 etm_iosvc_cleanup(fmd_hdl, iosvc); 3013 (void) pthread_mutex_unlock( 3014 &iosvc_list_lock); 3015 break; 3016 3017 case ETM_ASYNC_EVENT_LDOM_BIND: 3018 3019 /* 3020 * create iosvc if it has not been 3021 * created 3022 * async_e.ds_hdl is invalid 3023 * async_e.ldom_name is valid ldom_name 3024 */ 3025 (void) pthread_mutex_lock( 3026 &iosvc_list_lock); 3027 iosvc = etm_iosvc_lookup(fmd_hdl, 3028 async_e.ldom_name, 3029 async_e.ds_hdl, B_TRUE); 3030 if (iosvc == NULL) { 3031 fmd_hdl_debug(fmd_hdl, 3032 "error: can't create iosvc for " 3033 "async evnt %d\n", 3034 async_e.event_type); 3035 (void) pthread_mutex_unlock( 3036 &iosvc_list_lock); 3037 break; 3038 } 3039 (void) strcpy(iosvc->ldom_name, 3040 async_e.ldom_name); 3041 iosvc->ds_hdl = async_e.ds_hdl; 3042 (void) pthread_mutex_unlock( 3043 &iosvc_list_lock); 3044 break; 3045 3046 case ETM_ASYNC_EVENT_DS_REG_CB: 3047 if (etm_ldom_type == LDOM_TYPE_CONTROL) { 3048 /* 3049 * find the root ldom name from 3050 * ldom domain hdl/id 3051 */ 3052 if (etm_filter_find_ldom_name( 3053 fmd_hdl, async_e.dhdl, 3054 async_e.ldom_name, 3055 MAX_LDOM_NAME) != 0) { 3056 fmd_hdl_debug(fmd_hdl, 3057 "error: can't find root " 3058 "domain name from did %d\n", 3059 async_e.dhdl); 3060 break; 3061 } else { 3062 fmd_hdl_debug(fmd_hdl, 3063 "info: etm_filter_find_" 3064 "ldom_name returned %s\n", 3065 async_e.ldom_name); 3066 } 3067 /* 3068 * now we should have a valid 3069 * root domain name. 3070 * lookup the iosvc struct 3071 * associated with the ldom_name 3072 * and init the iosvc struct 3073 */ 3074 (void) pthread_mutex_lock( 3075 &iosvc_list_lock); 3076 iosvc = etm_iosvc_lookup( 3077 fmd_hdl, async_e.ldom_name, 3078 async_e.ds_hdl, B_TRUE); 3079 if (iosvc == NULL) { 3080 fmd_hdl_debug(fmd_hdl, 3081 "error: can't create iosvc " 3082 "for async evnt %d\n", 3083 async_e.event_type); 3084 (void) pthread_mutex_unlock( 3085 &iosvc_list_lock); 3086 break; 3087 } 3088 iosvc->ds_hdl = async_e.ds_hdl; 3089 iosvc->cur_send_xid = 0; 3090 3091 /* 3092 * open the fmd xprt if it 3093 * hasn't been previously opened 3094 */ 3095 iosvc->start_sending_Q = 0; 3096 fmd_hdl_debug(fmd_hdl, 3097 "info: before fmd_xprt_open" 3098 "ldom_name is %s\n", 3099 async_e.ldom_name); 3100 if (iosvc->fmd_xprt == NULL) { 3101 iosvc->fmd_xprt = 3102 fmd_xprt_open( 3103 fmd_hdl, 3104 flags, NULL, 3105 iosvc); 3106 } 3107 3108 iosvc->thr_is_dying = 0; 3109 if (iosvc->recv_tid == NULL) { 3110 iosvc->recv_tid = 3111 fmd_thr_create( 3112 fmd_hdl, 3113 etm_recv_from_remote_root, 3114 iosvc); 3115 } 3116 if (iosvc->send_tid == NULL) { 3117 iosvc->send_tid = 3118 fmd_thr_create( 3119 fmd_hdl, 3120 etm_send_to_remote_root, 3121 iosvc); 3122 } 3123 3124 (void) pthread_mutex_unlock( 3125 &iosvc_list_lock); 3126 } else { 3127 iosvc = &io_svc; 3128 (void) strcpy(iosvc->ldom_name, 3129 async_e.ldom_name); 3130 iosvc->ds_hdl = async_e.ds_hdl; 3131 iosvc->cur_send_xid = 0; 3132 iosvc->start_sending_Q = 0; 3133 3134 /* 3135 * open the fmd xprt if it 3136 * hasn't been previously opened 3137 */ 3138 if (iosvc->fmd_xprt == NULL) { 3139 iosvc->fmd_xprt = 3140 fmd_xprt_open( 3141 fmd_hdl, 3142 flags, NULL, 3143 iosvc); 3144 } 3145 3146 iosvc->thr_is_dying = 0; 3147 if (iosvc->recv_tid == NULL) { 3148 iosvc->recv_tid = 3149 fmd_thr_create( 3150 fmd_hdl, 3151 etm_recv_from_remote_root, 3152 iosvc); 3153 } 3154 if (iosvc->send_tid == NULL) { 3155 iosvc->send_tid = 3156 fmd_thr_create( 3157 fmd_hdl, 3158 etm_send_to_remote_root, 3159 iosvc); 3160 } 3161 } 3162 break; 3163 3164 case ETM_ASYNC_EVENT_DS_UNREG_CB: 3165 /* 3166 * decide which iosvc struct to perform 3167 * this UNREG callback on. 3168 */ 3169 if (etm_ldom_type == LDOM_TYPE_CONTROL) { 3170 (void) pthread_mutex_lock( 3171 &iosvc_list_lock); 3172 /* 3173 * lookup the iosvc struct w/ 3174 * ds_hdl 3175 */ 3176 iosvc = etm_iosvc_lookup( 3177 fmd_hdl, async_e.ldom_name, 3178 async_e.ds_hdl, B_FALSE); 3179 if (iosvc == NULL) { 3180 fmd_hdl_debug(fmd_hdl, 3181 "error: can't find iosvc " 3182 "for async evnt %d\n", 3183 async_e.event_type); 3184 (void) pthread_mutex_unlock( 3185 &iosvc_list_lock); 3186 break; 3187 } 3188 3189 /* 3190 * ds_hdl and fmd_xprt_open 3191 * go hand to hand together 3192 * after unreg_cb, 3193 * ds_hdl is INVALID and 3194 * fmd_xprt is closed. 3195 * the ldom name and the msg Q 3196 * remains in iosvc_list 3197 */ 3198 iosvc->ds_hdl = DS_INVALID_HDL; 3199 if (iosvc->fmd_xprt != NULL) 3200 fmd_xprt_close(fmd_hdl, 3201 iosvc->fmd_xprt); 3202 iosvc->fmd_xprt = NULL; 3203 3204 if (iosvc->ldom_name != '\0') 3205 fmd_hdl_debug(fmd_hdl, 3206 "info: iosvc w/ ldom_name " 3207 "%s \n", iosvc->ldom_name); 3208 3209 /* 3210 * destroy send/recv threads 3211 * on Control side. 3212 */ 3213 iosvc->thr_is_dying = 1; 3214 if (iosvc->send_tid != NULL) { 3215 fmd_thr_signal(fmd_hdl, 3216 iosvc->send_tid); 3217 fmd_thr_destroy(fmd_hdl, 3218 iosvc->send_tid); 3219 iosvc->send_tid = NULL; 3220 } /* if send tid was created */ 3221 3222 if (iosvc->recv_tid != NULL) { 3223 fmd_thr_signal(fmd_hdl, 3224 iosvc->recv_tid); 3225 fmd_thr_destroy(fmd_hdl, 3226 iosvc->recv_tid); 3227 iosvc->recv_tid = NULL; 3228 } /* if recv tid was created */ 3229 3230 (void) pthread_mutex_unlock( 3231 &iosvc_list_lock); 3232 } else { 3233 iosvc = &io_svc; 3234 /* 3235 * destroy send/recv threads 3236 * on Root side. 3237 */ 3238 iosvc->thr_is_dying = 1; 3239 if (iosvc->send_tid != NULL) { 3240 fmd_thr_signal(fmd_hdl, 3241 iosvc->send_tid); 3242 fmd_thr_destroy(fmd_hdl, 3243 iosvc->send_tid); 3244 iosvc->send_tid = NULL; 3245 } /* if send tid was created */ 3246 3247 if (iosvc->recv_tid != NULL) { 3248 fmd_thr_signal(fmd_hdl, 3249 iosvc->recv_tid); 3250 fmd_thr_destroy(fmd_hdl, 3251 iosvc->recv_tid); 3252 iosvc->recv_tid = NULL; 3253 } /* if recv tid was created */ 3254 3255 iosvc->ds_hdl = DS_INVALID_HDL; 3256 if (iosvc->fmd_xprt != NULL) 3257 fmd_xprt_close(fmd_hdl, 3258 iosvc->fmd_xprt); 3259 iosvc->fmd_xprt = NULL; 3260 } 3261 break; 3262 3263 default: 3264 /* 3265 * for all other events, etm doesn't care. 3266 * already logged an fmd info msg w/ 3267 * the event type. Do nothing here. 3268 */ 3269 break; 3270 } /* switch (async_e.event_type) */ 3271 3272 if (etm_ldom_type == LDOM_TYPE_CONTROL) { 3273 etm_filter_handle_ldom_event(fmd_hdl, 3274 async_e.event_type, async_e.ldom_name); 3275 } 3276 3277 /* 3278 * grab the lock to check the q length again 3279 */ 3280 (void) pthread_mutex_lock(&etm_async_event_q_lock); 3281 3282 if (etm_is_dying) { 3283 break; 3284 } 3285 } /* etm_async_q_cur_len */ 3286 3287 /* 3288 * we have the mutex lock at this point, whether 3289 * . etm_is_dying and/or 3290 * . q_len == 0 3291 */ 3292 if (!etm_is_dying && etm_async_q_cur_len == 0) { 3293 fmd_hdl_debug(fmd_hdl, 3294 "info: cond wait on async_event_q_cv\n"); 3295 (void) pthread_cond_wait(&etm_async_event_q_cv, 3296 &etm_async_event_q_lock); 3297 fmd_hdl_debug(fmd_hdl, 3298 "info: cond wait on async_event_q_cv rtns\n"); 3299 } 3300 (void) pthread_mutex_unlock(&etm_async_event_q_lock); 3301 } /* etm_is_dying */ 3302 3303 fmd_hdl_debug(fmd_hdl, 3304 "info: etm async event handler thread exiting\n"); 3305 3306 } /* etm_async_event_handler */ 3307 3308 /* 3309 * deQ what's in iosvc msg Q 3310 * send iosvc_msgp to the remote io svc ldom by calling ds_send_msg() 3311 * the iosvc_msgp already has the packed msg, which is hdr + 1 fma event 3312 */ 3313 static void 3314 etm_send_to_remote_root(void *arg) 3315 { 3316 3317 etm_iosvc_t *iosvc = (etm_iosvc_t *)arg; /* iosvc ptr */ 3318 etm_iosvc_q_ele_t msg_ele; /* iosvc msg ele */ 3319 etm_proto_v1_ev_hdr_t *ev_hdrp; /* hdr for FMA_EVENT */ 3320 fmd_hdl_t *fmd_hdl = init_hdl; /* fmd handle */ 3321 3322 3323 fmd_hdl_debug(fmd_hdl, 3324 "info: send to remote iosvc starting w/ ldom_name %s\n", 3325 iosvc->ldom_name); 3326 3327 /* 3328 * loop forever until etm_is_dying or thr_is_dying 3329 */ 3330 while (!etm_is_dying && !iosvc->thr_is_dying) { 3331 if (iosvc->ds_hdl != DS_INVALID_HDL && 3332 iosvc->start_sending_Q > 0) { 3333 (void) pthread_mutex_lock(&iosvc->msg_q_lock); 3334 while (iosvc->msg_q_cur_len > 0 && 3335 iosvc->ds_hdl != DS_INVALID_HDL) { 3336 (void) etm_iosvc_msg_deq(fmd_hdl, iosvc, 3337 &msg_ele); 3338 if (etm_debug_lvl >= 3) { 3339 fmd_hdl_debug(fmd_hdl, "info: valid " 3340 "ds_hdl before ds_send_msg \n"); 3341 } 3342 (void) pthread_mutex_unlock(&iosvc->msg_q_lock); 3343 3344 iosvc->ack_ok = 0; 3345 ev_hdrp = (etm_proto_v1_ev_hdr_t *) 3346 ((ptrdiff_t)msg_ele.msg); 3347 ev_hdrp->ev_pp.pp_xid = iosvc->cur_send_xid + 1; 3348 while (!iosvc->ack_ok && 3349 iosvc->ds_hdl != DS_INVALID_HDL && 3350 !etm_is_dying) { 3351 /* 3352 * call ds_send_msg() to send the msg, 3353 * wait for the recv end to send the 3354 * resp msg back. 3355 * If resp msg is recv-ed, ack_ok 3356 * will be set to 1. 3357 * otherwise, retry. 3358 */ 3359 if (etm_send_ds_msg(fmd_hdl, B_TRUE, 3360 iosvc, &msg_ele, ev_hdrp) < 0) { 3361 continue; 3362 } 3363 3364 if (etm_is_dying || iosvc->thr_is_dying) 3365 break; 3366 } 3367 3368 /* 3369 * if out of the while loop but !ack_ok, ie, 3370 * ds_hdl becomes invalid at some point 3371 * while waiting the resp msg, we need to put 3372 * the msg back to the head of the Q. 3373 */ 3374 if (!iosvc->ack_ok) { 3375 (void) pthread_mutex_lock( 3376 &iosvc->msg_q_lock); 3377 /* 3378 * put the msg back to the head of Q. 3379 * If the Q is full at this point, 3380 * drop the msg at the tail, enq this 3381 * msg to the head. 3382 */ 3383 etm_msg_enq_head(fmd_hdl, iosvc, 3384 &msg_ele); 3385 (void) pthread_mutex_unlock( 3386 &iosvc->msg_q_lock); 3387 } 3388 3389 /* 3390 * 3391 * grab the lock to check the Q len again 3392 */ 3393 (void) pthread_mutex_lock(&iosvc->msg_q_lock); 3394 if (etm_is_dying || iosvc->thr_is_dying) { 3395 break; 3396 } 3397 } /* while dequeing iosvc msgs to send */ 3398 3399 /* 3400 * we have the mutex lock for msg_q_lock at this point 3401 * we are here because 3402 * 1) q_len == 0: then wait on the cv for Q to be filled 3403 * 2) etm_is_dying 3404 */ 3405 if (!etm_is_dying && !iosvc->thr_is_dying && 3406 iosvc->msg_q_cur_len == 0) { 3407 fmd_hdl_debug(fmd_hdl, 3408 "info: waiting on msg_q_cv\n"); 3409 (void) pthread_cond_wait(&iosvc->msg_q_cv, 3410 &iosvc->msg_q_lock); 3411 } 3412 (void) pthread_mutex_unlock(&iosvc->msg_q_lock); 3413 if (etm_is_dying || iosvc->thr_is_dying) { 3414 break; 3415 } 3416 } else { 3417 (void) etm_sleep(1); 3418 } /* wait for the start_sendingQ > 0 */ 3419 } /* etm_is_dying or thr_is_dying */ 3420 fmd_hdl_debug(fmd_hdl, "info; etm send thread exiting \n"); 3421 } /* etm_send_to_remote_root */ 3422 3423 3424 /* 3425 * receive etm msgs from the remote root ldom by calling ds_recv_msg() 3426 * if FMA events/ereports, call fmd_xprt_post() to post to fmd 3427 * send ACK back by calling ds_send_msg() 3428 */ 3429 static void 3430 etm_recv_from_remote_root(void *arg) 3431 { 3432 etm_iosvc_t *iosvc = (etm_iosvc_t *)arg; /* iosvc ptr */ 3433 etm_proto_v1_pp_t *pp; /* protocol preamble */ 3434 etm_proto_v1_ev_hdr_t *ev_hdrp; /* for FMA_EVENT msg */ 3435 etm_proto_v1_resp_hdr_t *resp_hdrp; /* for RESPONSE msg */ 3436 int32_t resp_code = 0; /* default is success */ 3437 int32_t rc; /* return value */ 3438 size_t maxlen = MAXLEN; 3439 /* max msg len */ 3440 char msgbuf[MAXLEN]; /* recv msg buf */ 3441 size_t msg_size; /* recv msg size */ 3442 size_t hdr_sz; /* sizeof *hdrp */ 3443 size_t evsz; /* sizeof *evp */ 3444 size_t fma_event_size; /* sizeof FMA event */ 3445 nvlist_t *evp; /* ptr to the nvlist */ 3446 char *buf; /* ptr to the nvlist */ 3447 static uint32_t mem_alloc = 0; /* indicate if alloc mem */ 3448 char *msg; /* ptr to alloc mem */ 3449 fmd_hdl_t *fmd_hdl = init_hdl; 3450 3451 3452 3453 fmd_hdl_debug(fmd_hdl, 3454 "info: recv from remote iosvc starting with ldom name %s \n", 3455 iosvc->ldom_name); 3456 3457 /* 3458 * loop forever until etm_is_dying or the thread is dying 3459 */ 3460 3461 msg = msgbuf; 3462 while (!etm_is_dying && !iosvc->thr_is_dying) { 3463 if (iosvc->ds_hdl == DS_INVALID_HDL) { 3464 fmd_hdl_debug(fmd_hdl, 3465 "info: ds_hdl is invalid in recv thr\n"); 3466 (void) etm_sleep(1); 3467 continue; 3468 } 3469 3470 /* 3471 * for now, there are FMA_EVENT and ACK msg type. 3472 * use FMA_EVENT buf as the maxlen, hdr+1 fma event. 3473 * FMA_EVENT is big enough to hold an ACK msg. 3474 * the actual msg size received is in msg_size. 3475 */ 3476 rc = (*etm_ds_recv_msg)(iosvc->ds_hdl, msg, maxlen, &msg_size); 3477 if (rc == EFBIG) { 3478 fmd_hdl_debug(fmd_hdl, 3479 "info: ds_recv_msg needs mem the size of %d\n", 3480 msg_size); 3481 msg = fmd_hdl_zalloc(fmd_hdl, msg_size, FMD_SLEEP); 3482 mem_alloc = 1; 3483 } else if (rc == 0) { 3484 fmd_hdl_debug(fmd_hdl, 3485 "info: ds_recv_msg received a msg ok\n"); 3486 /* 3487 * check the magic # in msg.hdr 3488 */ 3489 pp = (etm_proto_v1_pp_t *)((ptrdiff_t)msg); 3490 if (pp->pp_magic_num != ETM_PROTO_MAGIC_NUM) { 3491 fmd_hdl_debug(fmd_hdl, 3492 "info: bad ds recv on magic\n"); 3493 continue; 3494 } 3495 3496 /* 3497 * check the msg type against msg_size to be sure 3498 * that received msg is not a truncated msg 3499 */ 3500 if (pp->pp_msg_type == ETM_MSG_TYPE_FMA_EVENT) { 3501 3502 ev_hdrp = (etm_proto_v1_ev_hdr_t *) 3503 ((ptrdiff_t)msg); 3504 fmd_hdl_debug(fmd_hdl, "info: ds received " 3505 "FMA EVENT xid=%d msg_size=%d\n", 3506 ev_hdrp->ev_pp.pp_xid, msg_size); 3507 hdr_sz = sizeof (*ev_hdrp) + 3508 1*(sizeof (ev_hdrp->ev_lens[0])); 3509 fma_event_size = hdr_sz + ev_hdrp->ev_lens[0]; 3510 if (fma_event_size != msg_size) { 3511 fmd_hdl_debug(fmd_hdl, "info: wrong " 3512 "ev msg size received\n"); 3513 continue; 3514 /* 3515 * Simply do nothing. The send side 3516 * will timedcond_wait waiting on the 3517 * resp msg will timeout and 3518 * re-send the same msg. 3519 */ 3520 } 3521 if (etm_debug_lvl >= 3) { 3522 fmd_hdl_debug(fmd_hdl, "info: recv msg" 3523 " size %d hdrsz %d evp size %d\n", 3524 msg_size, hdr_sz, 3525 ev_hdrp->ev_lens[0]); 3526 } 3527 3528 if (ev_hdrp->ev_pp.pp_xid != 3529 iosvc->xid_posted_ev) { 3530 /* 3531 * different from last xid posted to 3532 * fmd, post to fmd now. 3533 */ 3534 buf = msg + hdr_sz; 3535 rc = nvlist_unpack(buf, 3536 ev_hdrp->ev_lens[0], &evp, 0); 3537 rc = nvlist_size(evp, &evsz, 3538 NV_ENCODE_XDR); 3539 fmd_hdl_debug(fmd_hdl, 3540 "info: evp size %d before fmd" 3541 "post\n", evsz); 3542 3543 if ((rc = etm_post_to_fmd(fmd_hdl, 3544 iosvc->fmd_xprt, evp)) >= 0) { 3545 fmd_hdl_debug(fmd_hdl, 3546 "info: xid posted to fmd %d" 3547 "\n", 3548 ev_hdrp->ev_pp.pp_xid); 3549 iosvc->xid_posted_ev = 3550 ev_hdrp->ev_pp.pp_xid; 3551 } 3552 } 3553 3554 /* 3555 * ready to send the RESPONSE msg back 3556 * reuse the msg buffer as the response buffer 3557 */ 3558 resp_hdrp = (etm_proto_v1_resp_hdr_t *) 3559 ((ptrdiff_t)msg); 3560 resp_hdrp->resp_pp.pp_msg_type = 3561 ETM_MSG_TYPE_RESPONSE; 3562 3563 resp_hdrp->resp_code = resp_code; 3564 resp_hdrp->resp_len = sizeof (*resp_hdrp); 3565 3566 /* 3567 * send the whole response msg in one send 3568 */ 3569 if ((*etm_ds_send_msg)(iosvc->ds_hdl, msg, 3570 sizeof (*resp_hdrp)) != 0) { 3571 fmd_hdl_debug(fmd_hdl, 3572 "info: send response msg failed\n"); 3573 } else { 3574 fmd_hdl_debug(fmd_hdl, 3575 "info: ds send resp msg ok" 3576 "size %d\n", sizeof (*resp_hdrp)); 3577 } 3578 } else if (pp->pp_msg_type == ETM_MSG_TYPE_RESPONSE) { 3579 fmd_hdl_debug(fmd_hdl, 3580 "info: ds received respond msg xid=%d" 3581 "msg_size=%d for ldom %s\n", pp->pp_xid, 3582 msg_size, iosvc->ldom_name); 3583 if (sizeof (*resp_hdrp) != msg_size) { 3584 fmd_hdl_debug(fmd_hdl, 3585 "info: wrong resp msg size" 3586 "received\n"); 3587 fmd_hdl_debug(fmd_hdl, 3588 "info: resp msg size %d recv resp" 3589 "msg size %d\n", 3590 sizeof (*resp_hdrp), msg_size); 3591 continue; 3592 } 3593 /* 3594 * is the pp.pp_xid == iosvc->cur_send_xid+1, 3595 * if so, nudge the send routine to send next 3596 */ 3597 if (pp->pp_xid != iosvc->cur_send_xid+1) { 3598 fmd_hdl_debug(fmd_hdl, 3599 "info: ds received resp msg xid=%d " 3600 "doesn't match cur_send_id=%d\n", 3601 pp->pp_xid, iosvc->cur_send_xid+1); 3602 continue; 3603 } 3604 (void) pthread_mutex_lock(&iosvc->msg_ack_lock); 3605 iosvc->ack_ok = 1; 3606 (void) pthread_cond_signal(&iosvc->msg_ack_cv); 3607 (void) pthread_mutex_unlock( 3608 &iosvc->msg_ack_lock); 3609 fmd_hdl_debug(fmd_hdl, 3610 "info: signaling msg_ack_cv\n"); 3611 } else { 3612 /* 3613 * place holder for future msg types 3614 */ 3615 fmd_hdl_debug(fmd_hdl, 3616 "info: ds received unrecognized msg\n"); 3617 } 3618 if (mem_alloc) { 3619 fmd_hdl_free(fmd_hdl, msg, msg_size); 3620 mem_alloc = 0; 3621 msg = msgbuf; 3622 } 3623 } else { 3624 if (etm_debug_lvl >= 3) { 3625 fmd_hdl_debug(fmd_hdl, 3626 "info: ds_recv_msg() failed\n"); 3627 } 3628 } /* ds_recv_msg() returns */ 3629 } /* etm_is_dying */ 3630 3631 /* 3632 * need to free the mem allocated in msg upon exiting the thread 3633 */ 3634 if (mem_alloc) { 3635 fmd_hdl_free(fmd_hdl, msg, msg_size); 3636 mem_alloc = 0; 3637 msg = msgbuf; 3638 } 3639 fmd_hdl_debug(fmd_hdl, "info; etm recv thread exiting \n"); 3640 } /* etm_recv_from_remote_root */ 3641 3642 3643 3644 /* 3645 * etm_ds_init 3646 * initialize DS services function pointers by calling 3647 * dlopen() followed by dlsym() for each ds func. 3648 * if any dlopen() or dlsym() call fails, return -ENOENT 3649 * return >0 for successs, -ENOENT for failure 3650 */ 3651 static int 3652 etm_ds_init(fmd_hdl_t *hdl) 3653 { 3654 int rc = 0; 3655 3656 if ((etm_dl_hdl = dlopen(etm_dl_path, etm_dl_mode)) == NULL) { 3657 fmd_hdl_debug(hdl, "error: failed to dlopen %s\n", etm_dl_path); 3658 return (-ENOENT); 3659 } 3660 3661 etm_ds_svc_reg = (int (*)(ds_capability_t *cap, ds_ops_t *ops)) 3662 dlsym(etm_dl_hdl, "ds_svc_reg"); 3663 if (etm_ds_svc_reg == NULL) { 3664 fmd_hdl_debug(hdl, 3665 "error: failed to dlsym ds_svc_reg() w/ error %s\n", 3666 dlerror()); 3667 rc = -ENOENT; 3668 } 3669 3670 3671 etm_ds_clnt_reg = (int (*)(ds_capability_t *cap, ds_ops_t *ops)) 3672 dlsym(etm_dl_hdl, "ds_clnt_reg"); 3673 if (etm_ds_clnt_reg == NULL) { 3674 fmd_hdl_debug(hdl, 3675 "error: dlsym(ds_clnt_reg) failed w/ errno %d\n", errno); 3676 rc = -ENOENT; 3677 } 3678 3679 etm_ds_send_msg = (int (*)(ds_hdl_t hdl, void *buf, size_t buflen)) 3680 dlsym(etm_dl_hdl, "ds_send_msg"); 3681 if (etm_ds_send_msg == NULL) { 3682 fmd_hdl_debug(hdl, "error: dlsym(ds_send_msg) failed\n"); 3683 rc = -ENOENT; 3684 } 3685 3686 etm_ds_recv_msg = (int (*)(ds_hdl_t hdl, void *buf, size_t buflen, 3687 size_t *msglen))dlsym(etm_dl_hdl, "ds_recv_msg"); 3688 if (etm_ds_recv_msg == NULL) { 3689 fmd_hdl_debug(hdl, "error: dlsym(ds_recv_msg) failed\n"); 3690 rc = -ENOENT; 3691 } 3692 3693 etm_ds_fini = (int (*)(void))dlsym(etm_dl_hdl, "ds_fini"); 3694 if (etm_ds_fini == NULL) { 3695 fmd_hdl_debug(hdl, "error: dlsym(ds_fini) failed\n"); 3696 rc = -ENOENT; 3697 } 3698 3699 if (rc == -ENOENT) { 3700 (void) dlclose(etm_dl_hdl); 3701 } 3702 return (rc); 3703 3704 } /* etm_ds_init() */ 3705 3706 3707 /* 3708 * -------------------------- FMD entry points ------------------------------- 3709 */ 3710 3711 /* 3712 * _fmd_init - initialize the transport for use by ETM and start the 3713 * server daemon to accept new connections to us 3714 * 3715 * FMD will read our *.conf and subscribe us to FMA events 3716 */ 3717 3718 void 3719 _fmd_init(fmd_hdl_t *hdl) 3720 { 3721 struct timeval tmv; /* timeval */ 3722 ssize_t n; /* gen use */ 3723 const struct facility *fp; /* syslog facility matching */ 3724 char *facname; /* syslog facility property */ 3725 uint32_t type_mask; /* type of the local host */ 3726 int rc; /* funcs return code */ 3727 3728 3729 if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) { 3730 return; /* invalid data in configuration file */ 3731 } 3732 3733 fmd_hdl_debug(hdl, "info: module initializing\n"); 3734 3735 init_hdl = hdl; 3736 etm_lhp = ldom_init(etm_init_alloc, etm_init_free); 3737 3738 /* 3739 * decide the ldom type, do initialization accordingly 3740 */ 3741 if ((rc = ldom_get_type(etm_lhp, &type_mask)) != 0) { 3742 fmd_hdl_debug(hdl, "error: can't decide ldom type\n"); 3743 fmd_hdl_debug(hdl, "info: module unregistering\n"); 3744 ldom_fini(etm_lhp); 3745 fmd_hdl_unregister(hdl); 3746 return; 3747 } 3748 3749 if ((type_mask & LDOM_TYPE_LEGACY) || (type_mask & LDOM_TYPE_CONTROL)) { 3750 if (type_mask & LDOM_TYPE_LEGACY) { 3751 /* 3752 * running on a legacy sun4v domain, 3753 * act as the the old sun4v 3754 */ 3755 etm_ldom_type = LDOM_TYPE_LEGACY; 3756 fmd_hdl_debug(hdl, "info: running as the old sun4v\n"); 3757 ldom_fini(etm_lhp); 3758 } else if (type_mask & LDOM_TYPE_CONTROL) { 3759 etm_ldom_type = LDOM_TYPE_CONTROL; 3760 fmd_hdl_debug(hdl, "info: running as control domain\n"); 3761 3762 /* 3763 * looking for libds.so.1. 3764 * If not found, don't do DS registration. As a result, 3765 * there will be no DS callbacks or other DS services. 3766 */ 3767 if (etm_ds_init(hdl) >= 0) { 3768 etm_filter_init(hdl); 3769 etm_ckpt_init(hdl); 3770 3771 flags = FMD_XPRT_RDWR | FMD_XPRT_ACCEPT; 3772 3773 /* 3774 * ds client registration 3775 */ 3776 if ((rc = (*etm_ds_clnt_reg)(&iosvc_caps, 3777 &iosvc_ops))) { 3778 fmd_hdl_debug(hdl, 3779 "error: ds_clnt_reg(): errno %d\n", rc); 3780 } 3781 } else { 3782 fmd_hdl_debug(hdl, "error: dlopen() libds " 3783 "failed, continue without the DS services"); 3784 } 3785 3786 /* 3787 * register for ldom status events 3788 */ 3789 if ((rc = ldom_register_event(etm_lhp, 3790 ldom_event_handler, hdl))) { 3791 fmd_hdl_debug(hdl, 3792 "error: ldom_register_event():" 3793 " errno %d\n", rc); 3794 } 3795 3796 /* 3797 * create the thread for handling both the ldom status 3798 * change and service events 3799 */ 3800 etm_async_e_tid = fmd_thr_create(hdl, 3801 etm_async_event_handler, hdl); 3802 } 3803 3804 /* setup statistics and properties from FMD */ 3805 3806 (void) fmd_stat_create(hdl, FMD_STAT_NOALLOC, 3807 sizeof (etm_stats) / sizeof (fmd_stat_t), 3808 (fmd_stat_t *)&etm_stats); 3809 3810 etm_fma_resp_wait_time = fmd_prop_get_int32(hdl, 3811 ETM_PROP_NM_FMA_RESP_WAIT_TIME); 3812 etm_debug_lvl = fmd_prop_get_int32(hdl, ETM_PROP_NM_DEBUG_LVL); 3813 etm_debug_max_ev_cnt = fmd_prop_get_int32(hdl, 3814 ETM_PROP_NM_DEBUG_MAX_EV_CNT); 3815 fmd_hdl_debug(hdl, "info: etm_debug_lvl %d " 3816 "etm_debug_max_ev_cnt %d\n", etm_debug_lvl, 3817 etm_debug_max_ev_cnt); 3818 3819 etm_resp_q_max_len = fmd_prop_get_int32(hdl, 3820 ETM_PROP_NM_MAX_RESP_Q_LEN); 3821 etm_stats.etm_resp_q_max_len.fmds_value.ui64 = 3822 etm_resp_q_max_len; 3823 etm_bad_acc_to_sec = fmd_prop_get_int32(hdl, 3824 ETM_PROP_NM_BAD_ACC_TO_SEC); 3825 3826 /* 3827 * obtain an FMD transport handle so we can post 3828 * FMA events later 3829 */ 3830 3831 etm_fmd_xprt = fmd_xprt_open(hdl, FMD_XPRT_RDONLY, NULL, NULL); 3832 3833 /* 3834 * encourage protocol transaction id to be unique per module 3835 * load 3836 */ 3837 3838 (void) gettimeofday(&tmv, NULL); 3839 etm_xid_cur = (uint32_t)((tmv.tv_sec << 10) | 3840 ((unsigned long)tmv.tv_usec >> 10)); 3841 3842 /* init the ETM transport */ 3843 3844 if ((n = etm_xport_init(hdl)) != 0) { 3845 fmd_hdl_error(hdl, "error: bad xport init errno %d\n", 3846 (-n)); 3847 fmd_hdl_unregister(hdl); 3848 return; 3849 } 3850 3851 /* 3852 * Cache any properties we use every time we receive an alert. 3853 */ 3854 syslog_file = fmd_prop_get_int32(hdl, ETM_PROP_NM_SYSLOGD); 3855 syslog_cons = fmd_prop_get_int32(hdl, ETM_PROP_NM_CONSOLE); 3856 3857 if (syslog_file && (syslog_logfd = open("/dev/conslog", 3858 O_WRONLY | O_NOCTTY)) == -1) { 3859 fmd_hdl_error(hdl, 3860 "error: failed to open /dev/conslog"); 3861 syslog_file = 0; 3862 } 3863 3864 if (syslog_cons && (syslog_msgfd = open("/dev/sysmsg", 3865 O_WRONLY | O_NOCTTY)) == -1) { 3866 fmd_hdl_error(hdl, "error: failed to open /dev/sysmsg"); 3867 syslog_cons = 0; 3868 } 3869 3870 if (syslog_file) { 3871 /* 3872 * Look up the value of the "facility" property and 3873 * use it to determine * what syslog LOG_* facility 3874 * value we use to fill in our log_ctl_t. 3875 */ 3876 facname = fmd_prop_get_string(hdl, 3877 ETM_PROP_NM_FACILITY); 3878 3879 for (fp = syslog_facs; fp->fac_name != NULL; fp++) { 3880 if (strcmp(fp->fac_name, facname) == 0) 3881 break; 3882 } 3883 3884 if (fp->fac_name == NULL) { 3885 fmd_hdl_error(hdl, "error: invalid 'facility'" 3886 " setting: %s\n", facname); 3887 syslog_file = 0; 3888 } else { 3889 syslog_facility = fp->fac_value; 3890 syslog_ctl.flags = SL_CONSOLE | SL_LOGONLY; 3891 } 3892 3893 fmd_prop_free_string(hdl, facname); 3894 } 3895 3896 /* 3897 * start the message responder and the connection acceptance 3898 * server; request protocol version be negotiated after waiting 3899 * a second for the receiver to be ready to start handshaking 3900 */ 3901 3902 etm_resp_tid = fmd_thr_create(hdl, etm_responder, hdl); 3903 etm_svr_tid = fmd_thr_create(hdl, etm_server, hdl); 3904 3905 (void) etm_sleep(ETM_SLEEP_QUIK); 3906 etm_req_ver_negot(hdl); 3907 3908 } else if (type_mask & LDOM_TYPE_ROOT) { 3909 etm_ldom_type = LDOM_TYPE_ROOT; 3910 fmd_hdl_debug(hdl, "info: running as root domain\n"); 3911 3912 /* 3913 * looking for libds.so.1. 3914 * If not found, don't do DS registration. As a result, 3915 * there will be no DS callbacks or other DS services. 3916 */ 3917 if (etm_ds_init(hdl) < 0) { 3918 fmd_hdl_debug(hdl, 3919 "error: dlopen() libds failed, " 3920 "module unregistering\n"); 3921 ldom_fini(etm_lhp); 3922 fmd_hdl_unregister(hdl); 3923 return; 3924 } 3925 3926 /* 3927 * DS service registration 3928 */ 3929 if ((rc = (*etm_ds_svc_reg)(&iosvc_caps, &iosvc_ops))) { 3930 fmd_hdl_debug(hdl, "error: ds_svc_reg(): errno %d\n", 3931 rc); 3932 } 3933 3934 /* 3935 * this thread is created for ds_reg_cb/ds_unreg_cb 3936 */ 3937 etm_async_e_tid = fmd_thr_create(hdl, 3938 etm_async_event_handler, hdl); 3939 3940 flags = FMD_XPRT_RDWR; 3941 } else if ((type_mask & LDOM_TYPE_IO) || (type_mask == 0)) { 3942 /* 3943 * Do not load this module if it is 3944 * . runing on a non-root ldom 3945 * . the domain owns no io devices 3946 */ 3947 fmd_hdl_debug(hdl, 3948 "info: non-root ldom, module unregistering\n"); 3949 ldom_fini(etm_lhp); 3950 fmd_hdl_unregister(hdl); 3951 return; 3952 } else { 3953 /* 3954 * place holder, all other cases. unload etm for now 3955 */ 3956 fmd_hdl_debug(hdl, 3957 "info: other ldom type, module unregistering\n"); 3958 ldom_fini(etm_lhp); 3959 fmd_hdl_unregister(hdl); 3960 return; 3961 } 3962 3963 fmd_hdl_debug(hdl, "info: module initialized ok\n"); 3964 3965 } /* _fmd_init() */ 3966 3967 /* 3968 * etm_recv - receive an FMA event from FMD and transport it 3969 * to the remote endpoint 3970 */ 3971 3972 /*ARGSUSED*/ 3973 void 3974 etm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *evp, const char *class) 3975 { 3976 etm_xport_addr_t *addrv; /* vector of transport addresses */ 3977 etm_xport_conn_t conn; /* connection handle */ 3978 etm_proto_v1_ev_hdr_t *hdrp; /* for FMA_EVENT msg */ 3979 ssize_t i, n; /* gen use */ 3980 size_t sz; /* header size */ 3981 size_t buflen; /* size of packed FMA event */ 3982 uint8_t *buf; /* tmp buffer for packed FMA event */ 3983 3984 /* 3985 * if this is running on a Root Domain, ignore the events, 3986 * return right away 3987 */ 3988 if (etm_ldom_type == LDOM_TYPE_ROOT) 3989 return; 3990 3991 buflen = 0; 3992 if ((n = nvlist_size(evp, &buflen, NV_ENCODE_XDR)) != 0) { 3993 fmd_hdl_error(hdl, "error: FMA event dropped: " 3994 "event size errno %d class %s\n", n, class); 3995 etm_stats.etm_os_nvlist_size_fail.fmds_value.ui64++; 3996 etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; 3997 return; 3998 } 3999 4000 fmd_hdl_debug(hdl, "info: rcvd event %p from FMD\n", evp); 4001 fmd_hdl_debug(hdl, "info: cnt %llu class %s\n", 4002 etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64, class); 4003 4004 etm_stats.etm_rd_fmd_bytes.fmds_value.ui64 += buflen; 4005 etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64++; 4006 4007 /* 4008 * if the debug limit has been set, avoid excessive traffic, 4009 * for example, an infinite cycle using loopback nodes 4010 */ 4011 4012 if ((etm_debug_max_ev_cnt >= 0) && 4013 (etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64 > 4014 etm_debug_max_ev_cnt)) { 4015 fmd_hdl_debug(hdl, "warning: FMA event dropped: " 4016 "event %p cnt %llu > debug max %d\n", evp, 4017 etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64, 4018 etm_debug_max_ev_cnt); 4019 etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; 4020 return; 4021 } 4022 4023 /* allocate a buffer for the FMA event and nvlist pack it */ 4024 4025 buf = fmd_hdl_zalloc(hdl, buflen, FMD_SLEEP); 4026 4027 /* 4028 * increment the ttl value if the event is from remote (a root domain) 4029 * uncomment this when enabling fault forwarding from Root domains 4030 * to Control domain. 4031 * 4032 * uint8_t ttl; 4033 * if (fmd_event_local(hdl, evp) != FMD_EVF_LOCAL) { 4034 * if (nvlist_lookup_uint8(evp, FMD_EVN_TTL, &ttl) == 0) { 4035 * (void) nvlist_remove(evp, FMD_EVN_TTL, DATA_TYPE_UINT8); 4036 * (void) nvlist_add_uint8(evp, FMD_EVN_TTL, ttl + 1); 4037 * } 4038 * } 4039 */ 4040 4041 if ((n = nvlist_pack(evp, (char **)&buf, &buflen, 4042 NV_ENCODE_XDR, 0)) != 0) { 4043 fmd_hdl_error(hdl, "error: FMA event dropped: " 4044 "event pack errno %d class %s\n", n, class); 4045 etm_stats.etm_os_nvlist_pack_fail.fmds_value.ui64++; 4046 etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; 4047 fmd_hdl_free(hdl, buf, buflen); 4048 return; 4049 } 4050 4051 /* get vector of dst addrs and send the FMA event to each one */ 4052 4053 if ((addrv = etm_xport_get_ev_addrv(hdl, evp)) == NULL) { 4054 fmd_hdl_error(hdl, "error: FMA event dropped: " 4055 "bad event dst addrs errno %d\n", errno); 4056 etm_stats.etm_xport_get_ev_addrv_fail.fmds_value.ui64++; 4057 etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; 4058 fmd_hdl_free(hdl, buf, buflen); 4059 return; 4060 } 4061 4062 for (i = 0; addrv[i] != NULL; i++) { 4063 4064 /* open a new connection to this dst addr */ 4065 4066 if ((n = etm_conn_open(hdl, "FMA event dropped: " 4067 "bad conn open on new ev", addrv[i], &conn)) < 0) { 4068 etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; 4069 continue; 4070 } 4071 4072 (void) pthread_mutex_lock(&etm_write_lock); 4073 4074 /* write the ETM message header */ 4075 4076 if ((hdrp = etm_hdr_write(hdl, conn, evp, NV_ENCODE_XDR, 4077 &sz)) == NULL) { 4078 (void) pthread_mutex_unlock(&etm_write_lock); 4079 fmd_hdl_error(hdl, "error: FMA event dropped: " 4080 "bad hdr write errno %d\n", errno); 4081 (void) etm_conn_close(hdl, 4082 "bad conn close per bad hdr wr", conn); 4083 etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; 4084 continue; 4085 } 4086 4087 fmd_hdl_free(hdl, hdrp, sz); /* header not needed */ 4088 etm_stats.etm_wr_hdr_fmaevent.fmds_value.ui64++; 4089 fmd_hdl_debug(hdl, "info: hdr xport write ok for event %p\n", 4090 evp); 4091 4092 /* write the ETM message body, ie, the packed nvlist */ 4093 4094 if ((n = etm_io_op(hdl, "FMA event dropped: " 4095 "bad io write on event", conn, 4096 buf, buflen, ETM_IO_OP_WR)) < 0) { 4097 (void) pthread_mutex_unlock(&etm_write_lock); 4098 (void) etm_conn_close(hdl, 4099 "bad conn close per bad body wr", conn); 4100 etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; 4101 continue; 4102 } 4103 4104 (void) pthread_mutex_unlock(&etm_write_lock); 4105 4106 etm_stats.etm_wr_body_fmaevent.fmds_value.ui64++; 4107 etm_stats.etm_wr_xport_bytes.fmds_value.ui64 += buflen; 4108 fmd_hdl_debug(hdl, "info: body xport write ok for event %p\n", 4109 evp); 4110 4111 /* close the connection */ 4112 4113 (void) etm_conn_close(hdl, "bad conn close after event send", 4114 conn); 4115 } /* foreach dst addr in the vector */ 4116 4117 etm_xport_free_addrv(hdl, addrv); 4118 fmd_hdl_free(hdl, buf, buflen); 4119 4120 } /* etm_recv() */ 4121 4122 4123 /* 4124 * etm_send - receive an FMA event from FMD and enQ it in the iosvc.Q. 4125 * etm_send_to_remote_root() deQ and xprt the FMA events to a 4126 * remote root domain 4127 * return FMD_SEND_SUCCESS for success, 4128 * FMD_SEND_FAILED for error 4129 */ 4130 4131 /*ARGSUSED*/ 4132 int 4133 etm_send(fmd_hdl_t *fmd_hdl, fmd_xprt_t *xp, fmd_event_t *ep, nvlist_t *nvl) 4134 { 4135 uint32_t pack_it; /* whether to pack/enq the event */ 4136 etm_pack_msg_type_t msg_type; 4137 /* tell etm_pack_ds_msg() what to do */ 4138 etm_iosvc_t *iosvc; /* ptr to cur iosvc struct */ 4139 char *class; /* nvlist class name */ 4140 4141 pack_it = 1; 4142 msg_type = FMD_XPRT_OTHER_MSG; 4143 4144 (void) nvlist_lookup_string(nvl, FM_CLASS, &class); 4145 if (class == NULL) { 4146 pack_it = 0; 4147 } else { 4148 if (etm_debug_lvl >= 1) { 4149 fmd_hdl_debug(fmd_hdl, 4150 "info: evp class= %s in etm_send\n", class); 4151 } 4152 4153 if (etm_ldom_type == LDOM_TYPE_CONTROL) { 4154 iosvc = 4155 (etm_iosvc_t *)fmd_xprt_getspecific(fmd_hdl, xp); 4156 4157 /* 4158 * check the flag FORWARDING_FAULTS_TO_CONTROL to 4159 * decide if or not to drop fault subscription 4160 * control msgs 4161 */ 4162 if (strcmp(class, "resource.fm.xprt.subscribe") == 0) { 4163 pack_it = 0; 4164 /* 4165 * if (FORWARDING_FAULTS_TO_CONTROL == 1) { 4166 * (void) nvlist_lookup_string(nvl, 4167 * FM_RSRC_XPRT_SUBCLASS, &subclass); 4168 * if (strcmp(subclass, "list.suspect") 4169 * == 0) { 4170 * pack_it = 1; 4171 * msg_action = FMD_XPRT_OTHER_MSG; 4172 * } 4173 * if (strcmp(subclass, "list.repaired") 4174 * == 0) { 4175 * pack_it = 1; 4176 * msg_action = FMD_XPRT_OTHER_MSG; 4177 * } 4178 * } 4179 */ 4180 } 4181 if (strcmp(class, "resource.fm.xprt.run") == 0) { 4182 pack_it = 1; 4183 msg_type = FMD_XPRT_RUN_MSG; 4184 } 4185 } else { /* has to be the root domain ldom */ 4186 iosvc = &io_svc; 4187 /* 4188 * drop all ereport and fault subscriptions 4189 * are we dropping too much here, more than just ereport 4190 * and fault subscriptions? need to check 4191 */ 4192 if (strcmp(class, "resource.fm.xprt.subscribe") == 0) 4193 pack_it = 0; 4194 if (strcmp(class, "resource.fm.xprt.run") == 0) { 4195 pack_it = 1; 4196 msg_type = FMD_XPRT_RUN_MSG; 4197 } 4198 } 4199 } 4200 4201 if (pack_it) { 4202 if (etm_debug_lvl >= 1) { 4203 fmd_hdl_debug(fmd_hdl, 4204 "info: ldom name returned from xprt get specific=" 4205 "%s xprt=%lld\n", iosvc->ldom_name, xp); 4206 } 4207 /* 4208 * pack the etm msg for the DS library and enq in io_svc->Q 4209 * when the hdrp is NULL, the packing func will use the static 4210 * iosvc_hdr 4211 */ 4212 (void) etm_pack_ds_msg(fmd_hdl, iosvc, NULL, 0, nvl, msg_type, 4213 ETM_CKPT_NOOP); 4214 } 4215 4216 return (FMD_SEND_SUCCESS); 4217 4218 } /* etm_send() */ 4219 4220 4221 4222 /* 4223 * _fmd_fini - stop the server daemon and teardown the transport 4224 */ 4225 4226 void 4227 _fmd_fini(fmd_hdl_t *hdl) 4228 { 4229 ssize_t n; /* gen use */ 4230 etm_iosvc_t *iosvc; /* ptr to insvc struct */ 4231 etm_iosvc_q_ele_t msg_ele; /* iosvc msg ele */ 4232 uint32_t i; /* for loop var */ 4233 4234 fmd_hdl_debug(hdl, "info: module finalizing\n"); 4235 4236 /* kill the connection server and responder ; wait for them to die */ 4237 4238 etm_is_dying = 1; 4239 4240 if (etm_svr_tid != NULL) { 4241 fmd_thr_signal(hdl, etm_svr_tid); 4242 fmd_thr_destroy(hdl, etm_svr_tid); 4243 etm_svr_tid = NULL; 4244 } /* if server thread was successfully created */ 4245 4246 if (etm_resp_tid != NULL) { 4247 fmd_thr_signal(hdl, etm_resp_tid); 4248 fmd_thr_destroy(hdl, etm_resp_tid); 4249 etm_resp_tid = NULL; 4250 } /* if responder thread was successfully created */ 4251 4252 if (etm_async_e_tid != NULL) { 4253 fmd_thr_signal(hdl, etm_async_e_tid); 4254 fmd_thr_destroy(hdl, etm_async_e_tid); 4255 etm_async_e_tid = NULL; 4256 } /* if async event handler thread was successfully created */ 4257 4258 4259 if ((etm_ldom_type == LDOM_TYPE_LEGACY) || 4260 (etm_ldom_type == LDOM_TYPE_CONTROL)) { 4261 4262 /* teardown the transport and cleanup syslogging */ 4263 if ((n = etm_xport_fini(hdl)) != 0) { 4264 fmd_hdl_error(hdl, "warning: xport fini errno %d\n", 4265 (-n)); 4266 } 4267 if (etm_fmd_xprt != NULL) { 4268 fmd_xprt_close(hdl, etm_fmd_xprt); 4269 } 4270 4271 if (syslog_logfd != -1) { 4272 (void) close(syslog_logfd); 4273 } 4274 if (syslog_msgfd != -1) { 4275 (void) close(syslog_msgfd); 4276 } 4277 } 4278 4279 if (etm_ldom_type == LDOM_TYPE_CONTROL) { 4280 if (ldom_unregister_event(etm_lhp)) 4281 fmd_hdl_debug(hdl, "ldom_unregister_event() failed\n"); 4282 4283 /* 4284 * on control side, need to go thru every iosvc struct to 4285 * 1) process remaining events in the iosvc Q: 4286 * for plan A: 4287 * discard remaining events in the Q/free the memory, 4288 * since fmd_xprt_log() already logged in Control D's FMD 4289 * 2) unregister the ds_hdl if valid 4290 * 3) close the fmd_xprt if it has not been closed 4291 */ 4292 for (i = 0; i < NUM_OF_ROOT_DOMAINS; i++) { 4293 if (iosvc_list[i].ldom_name[0] != '\0') { 4294 /* 4295 * found an iosvc struct for a root domain 4296 */ 4297 iosvc = &iosvc_list[i]; 4298 (void) pthread_mutex_lock(&iosvc_list_lock); 4299 etm_iosvc_cleanup(hdl, iosvc); 4300 (void) pthread_mutex_unlock(&iosvc_list_lock); 4301 4302 } else { 4303 /* 4304 * reach the end of existing iosvc structures 4305 */ 4306 continue; 4307 } 4308 } /* for i<NUM_OF_ROOT_DOMAINS */ 4309 etm_ckpt_fini(hdl); 4310 etm_filter_fini(hdl); 4311 4312 ldom_fini(etm_lhp); 4313 4314 } else if (etm_ldom_type == LDOM_TYPE_ROOT) { 4315 iosvc = &io_svc; 4316 if (iosvc->send_tid != NULL) { 4317 fmd_thr_signal(hdl, iosvc->send_tid); 4318 fmd_thr_destroy(hdl, iosvc->send_tid); 4319 iosvc->send_tid = NULL; 4320 } /* if io svc send thread was successfully created */ 4321 4322 if (iosvc->recv_tid != NULL) { 4323 fmd_thr_signal(hdl, iosvc->recv_tid); 4324 fmd_thr_destroy(hdl, iosvc->recv_tid); 4325 iosvc->recv_tid = NULL; 4326 } /* if io svc receive thread was successfully created */ 4327 4328 (void) pthread_mutex_lock(&iosvc->msg_q_lock); 4329 while (iosvc->msg_q_cur_len > 0) { 4330 (void) etm_iosvc_msg_deq(hdl, iosvc, &msg_ele); 4331 fmd_hdl_free(hdl, msg_ele.msg, msg_ele.msg_size); 4332 } 4333 (void) pthread_mutex_unlock(&iosvc->msg_q_lock); 4334 4335 if (iosvc->fmd_xprt != NULL) 4336 fmd_xprt_close(hdl, iosvc->fmd_xprt); 4337 ldom_fini(etm_lhp); 4338 } 4339 if (etm_ds_fini) { 4340 (*etm_ds_fini)(); 4341 (void) dlclose(etm_dl_hdl); 4342 } 4343 4344 fmd_hdl_debug(hdl, "info: module finalized ok\n"); 4345 4346 } /* _fmd_fini() */ 4347