1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * etm.c FMA Event Transport Module implementation, a plugin of FMD 29 * for sun4v/Ontario 30 * 31 * plugin for sending/receiving FMA events to/from service processor 32 */ 33 34 /* 35 * --------------------------------- includes -------------------------------- 36 */ 37 38 #include <sys/fm/protocol.h> 39 #include <sys/fm/util.h> 40 #include <sys/fm/ldom.h> 41 #include <sys/strlog.h> 42 #include <sys/syslog.h> 43 #include <sys/libds.h> 44 #include <netinet/in.h> 45 #include <fm/fmd_api.h> 46 47 #include "etm_xport_api.h" 48 #include "etm_etm_proto.h" 49 #include "etm_impl.h" 50 #include "etm_iosvc.h" 51 #include "etm_filter.h" 52 #include "etm_ckpt.h" 53 54 #include <pthread.h> 55 #include <signal.h> 56 #include <stropts.h> 57 #include <locale.h> 58 #include <strings.h> 59 #include <stdlib.h> 60 #include <unistd.h> 61 #include <limits.h> 62 #include <values.h> 63 #include <alloca.h> 64 #include <errno.h> 65 #include <dlfcn.h> 66 #include <link.h> 67 #include <fcntl.h> 68 #include <time.h> 69 70 /* 71 * ----------------------------- forward decls ------------------------------- 72 */ 73 74 static void 75 etm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class); 76 77 static int 78 etm_send(fmd_hdl_t *hdl, fmd_xprt_t *xp, fmd_event_t *event, nvlist_t *nvl); 79 80 static void 81 etm_send_to_remote_root(void *arg); 82 83 static void 84 etm_recv_from_remote_root(void *arg); 85 86 /* 87 * ------------------------- data structs for FMD ---------------------------- 88 */ 89 90 static const fmd_hdl_ops_t fmd_ops = { 91 etm_recv, /* fmdo_recv */ 92 NULL, /* fmdo_timeout */ 93 NULL, /* fmdo_close */ 94 NULL, /* fmdo_stats */ 95 NULL, /* fmdo_gc */ 96 etm_send, /* fmdo_send */ 97 }; 98 99 static const fmd_prop_t fmd_props[] = { 100 { ETM_PROP_NM_XPORT_ADDRS, FMD_TYPE_STRING, "" }, 101 { ETM_PROP_NM_DEBUG_LVL, FMD_TYPE_INT32, "0" }, 102 { ETM_PROP_NM_DEBUG_MAX_EV_CNT, FMD_TYPE_INT32, "-1" }, 103 { ETM_PROP_NM_CONSOLE, FMD_TYPE_BOOL, "false" }, 104 { ETM_PROP_NM_SYSLOGD, FMD_TYPE_BOOL, "true" }, 105 { ETM_PROP_NM_FACILITY, FMD_TYPE_STRING, "LOG_DAEMON" }, 106 { ETM_PROP_NM_MAX_RESP_Q_LEN, FMD_TYPE_UINT32, "512" }, 107 { ETM_PROP_NM_BAD_ACC_TO_SEC, FMD_TYPE_UINT32, "1" }, 108 { ETM_PROP_NM_FMA_RESP_WAIT_TIME, FMD_TYPE_INT32, "240" }, 109 { NULL, 0, NULL } 110 }; 111 112 113 static const fmd_hdl_info_t fmd_info = { 114 "FMA Event Transport Module", "1.2", &fmd_ops, fmd_props 115 }; 116 117 /* 118 * ----------------------- private consts and defns -------------------------- 119 */ 120 121 /* misc buffer for variable sized protocol header fields */ 122 123 #define ETM_MISC_BUF_SZ (4 * 1024) 124 125 static uint32_t 126 etm_ldom_type = LDOM_TYPE_LEGACY; 127 128 /* try limit for IO operations w/ capped exp backoff sleep on retry */ 129 130 /* 131 * Design_Note: ETM will potentially retry forever IO operations that the 132 * transport fails with EAGAIN (aka EWOULDBLOCK) rather than 133 * giving up after some number of seconds. This avoids 134 * dropping FMA events while the service processor is down, 135 * but at the risk of pending fmdo_recv() forever and 136 * overflowing FMD's event queue for ETM. 137 * A future TBD enhancement would be to always recv 138 * and send each ETM msg in a single read/write() to reduce 139 * the risk of failure between ETM msg hdr and body, 140 * assuming the MTU_SZ is large enough. 141 */ 142 143 #define ETM_TRY_MAX_CNT (MAXINT - 1) 144 #define ETM_TRY_BACKOFF_RATE (4) 145 #define ETM_TRY_BACKOFF_CAP (60) 146 147 /* amount to increment protocol transaction id on each new send */ 148 149 #define ETM_XID_INC (2) 150 151 typedef struct etm_resp_q_ele { 152 153 etm_xport_conn_t rqe_conn; /* open connection to send on */ 154 etm_proto_v1_pp_t *rqe_hdrp; /* ptr to ETM msg hdr */ 155 size_t rqe_hdr_sz; /* sizeof ETM msg hdr */ 156 int32_t rqe_resp_code; /* response code to send */ 157 158 struct etm_resp_q_ele *rqe_nextp; /* PRIVATE - next ele ptr */ 159 160 } etm_resp_q_ele_t; /* responder queue element */ 161 162 /* 163 * ---------------------------- global data ---------------------------------- 164 */ 165 166 static fmd_hdl_t 167 *init_hdl = NULL; /* used in mem allocator and several other places */ 168 169 static int 170 etm_debug_lvl = 0; /* debug level: 0 is off, 1 is on, 2 is more, etc */ 171 172 static int 173 etm_debug_max_ev_cnt = -1; /* max allowed event count for debugging */ 174 175 static fmd_xprt_t 176 *etm_fmd_xprt = NULL; /* FMD transport layer handle */ 177 178 static pthread_t 179 etm_svr_tid = NULL; /* thread id of connection acceptance server */ 180 181 static pthread_t 182 etm_resp_tid = NULL; /* thread id of msg responder */ 183 184 static etm_resp_q_ele_t 185 *etm_resp_q_head = NULL; /* ptr to cur head of responder queue */ 186 187 static etm_resp_q_ele_t 188 *etm_resp_q_tail = NULL; /* ptr to cur tail of responder queue */ 189 190 static uint32_t 191 etm_resp_q_cur_len = 0; /* cur length (ele cnt) of responder queue */ 192 193 static uint32_t 194 etm_resp_q_max_len = 0; /* max length (ele cnt) of responder queue */ 195 196 static uint32_t 197 etm_bad_acc_to_sec = 0; /* sleep timeout (in sec) after bad conn accept */ 198 199 static pthread_mutex_t 200 etm_resp_q_lock = PTHREAD_MUTEX_INITIALIZER; /* protects responder queue */ 201 202 static pthread_cond_t 203 etm_resp_q_cv = PTHREAD_COND_INITIALIZER; /* nudges msg responder */ 204 205 static volatile int 206 etm_is_dying = 0; /* bool for dying (killing self) */ 207 208 static uint32_t 209 etm_xid_cur = 0; /* current transaction id for sends */ 210 211 static uint32_t 212 etm_xid_ping = 0; /* xid of last CONTROL msg sent requesting ping */ 213 214 static uint32_t 215 etm_xid_ver_negot = 0; /* xid of last CONTROL msg sent requesting ver negot */ 216 217 static uint32_t 218 etm_xid_posted_logged_ev = 0; 219 /* xid of last FMA_EVENT msg/event posted OK to FMD */ 220 221 static uint32_t 222 etm_xid_posted_sa = 0; /* xid of last ALERT msg/event posted OK to syslog */ 223 224 static uint8_t 225 etm_resp_ver = ETM_PROTO_V1; /* proto ver [negotiated] for msg sends */ 226 227 static uint32_t 228 etm_fma_resp_wait_time = 30; /* time (sec) wait for fma event resp */ 229 230 static pthread_mutex_t 231 etm_write_lock = PTHREAD_MUTEX_INITIALIZER; /* for write operations */ 232 233 static log_ctl_t syslog_ctl; /* log(7D) meta-data for each msg */ 234 static int syslog_facility; /* log(7D) facility (part of priority) */ 235 static int syslog_logfd = -1; /* log(7D) file descriptor */ 236 static int syslog_msgfd = -1; /* sysmsg(7D) file descriptor */ 237 static int syslog_file = 0; /* log to syslog_logfd */ 238 static int syslog_cons = 0; /* log to syslog_msgfd */ 239 240 static const struct facility { 241 const char *fac_name; 242 int fac_value; 243 } syslog_facs[] = { 244 { "LOG_DAEMON", LOG_DAEMON }, 245 { "LOG_LOCAL0", LOG_LOCAL0 }, 246 { "LOG_LOCAL1", LOG_LOCAL1 }, 247 { "LOG_LOCAL2", LOG_LOCAL2 }, 248 { "LOG_LOCAL3", LOG_LOCAL3 }, 249 { "LOG_LOCAL4", LOG_LOCAL4 }, 250 { "LOG_LOCAL5", LOG_LOCAL5 }, 251 { "LOG_LOCAL6", LOG_LOCAL6 }, 252 { "LOG_LOCAL7", LOG_LOCAL7 }, 253 { NULL, 0 } 254 }; 255 256 static struct stats { 257 258 /* ETM msg counters */ 259 260 fmd_stat_t etm_rd_hdr_fmaevent; 261 fmd_stat_t etm_rd_hdr_control; 262 fmd_stat_t etm_rd_hdr_alert; 263 fmd_stat_t etm_rd_hdr_response; 264 fmd_stat_t etm_rd_body_fmaevent; 265 fmd_stat_t etm_rd_body_control; 266 fmd_stat_t etm_rd_body_alert; 267 fmd_stat_t etm_rd_body_response; 268 fmd_stat_t etm_wr_hdr_fmaevent; 269 fmd_stat_t etm_wr_hdr_control; 270 fmd_stat_t etm_wr_hdr_response; 271 fmd_stat_t etm_wr_body_fmaevent; 272 fmd_stat_t etm_wr_body_control; 273 fmd_stat_t etm_wr_body_response; 274 275 fmd_stat_t etm_rd_max_ev_per_msg; 276 fmd_stat_t etm_wr_max_ev_per_msg; 277 278 fmd_stat_t etm_resp_q_cur_len; 279 fmd_stat_t etm_resp_q_max_len; 280 281 /* ETM byte counters */ 282 283 fmd_stat_t etm_wr_fmd_bytes; 284 fmd_stat_t etm_rd_fmd_bytes; 285 fmd_stat_t etm_wr_xport_bytes; 286 fmd_stat_t etm_rd_xport_bytes; 287 288 fmd_stat_t etm_magic_drop_bytes; 289 290 /* ETM [dropped] FMA event counters */ 291 292 fmd_stat_t etm_rd_fmd_fmaevent; 293 fmd_stat_t etm_wr_fmd_fmaevent; 294 295 fmd_stat_t etm_rd_drop_fmaevent; 296 fmd_stat_t etm_wr_drop_fmaevent; 297 298 fmd_stat_t etm_rd_dup_fmaevent; 299 fmd_stat_t etm_wr_dup_fmaevent; 300 301 fmd_stat_t etm_rd_dup_alert; 302 fmd_stat_t etm_wr_dup_alert; 303 304 fmd_stat_t etm_enq_drop_resp_q; 305 fmd_stat_t etm_deq_drop_resp_q; 306 307 /* ETM protocol failures */ 308 309 fmd_stat_t etm_magic_bad; 310 fmd_stat_t etm_ver_bad; 311 fmd_stat_t etm_msgtype_bad; 312 fmd_stat_t etm_subtype_bad; 313 fmd_stat_t etm_xid_bad; 314 fmd_stat_t etm_fmaeventlen_bad; 315 fmd_stat_t etm_respcode_bad; 316 fmd_stat_t etm_timeout_bad; 317 fmd_stat_t etm_evlens_bad; 318 319 /* IO operation failures */ 320 321 fmd_stat_t etm_xport_wr_fail; 322 fmd_stat_t etm_xport_rd_fail; 323 fmd_stat_t etm_xport_pk_fail; 324 325 /* IO operation retries */ 326 327 fmd_stat_t etm_xport_wr_retry; 328 fmd_stat_t etm_xport_rd_retry; 329 fmd_stat_t etm_xport_pk_retry; 330 331 /* system and library failures */ 332 333 fmd_stat_t etm_os_nvlist_pack_fail; 334 fmd_stat_t etm_os_nvlist_unpack_fail; 335 fmd_stat_t etm_os_nvlist_size_fail; 336 fmd_stat_t etm_os_pthread_create_fail; 337 338 /* xport API failures */ 339 340 fmd_stat_t etm_xport_get_ev_addrv_fail; 341 fmd_stat_t etm_xport_open_fail; 342 fmd_stat_t etm_xport_close_fail; 343 fmd_stat_t etm_xport_accept_fail; 344 fmd_stat_t etm_xport_open_retry; 345 346 /* FMD entry point bad arguments */ 347 348 fmd_stat_t etm_fmd_init_badargs; 349 fmd_stat_t etm_fmd_fini_badargs; 350 351 /* Alert logging errors */ 352 353 fmd_stat_t etm_log_err; 354 fmd_stat_t etm_msg_err; 355 356 /* miscellaneous stats */ 357 358 fmd_stat_t etm_reset_xport; 359 360 } etm_stats = { 361 362 /* ETM msg counters */ 363 364 { "etm_rd_hdr_fmaevent", FMD_TYPE_UINT64, 365 "ETM fmaevent msg headers rcvd from xport" }, 366 { "etm_rd_hdr_control", FMD_TYPE_UINT64, 367 "ETM control msg headers rcvd from xport" }, 368 { "etm_rd_hdr_alert", FMD_TYPE_UINT64, 369 "ETM alert msg headers rcvd from xport" }, 370 { "etm_rd_hdr_response", FMD_TYPE_UINT64, 371 "ETM response msg headers rcvd from xport" }, 372 { "etm_rd_body_fmaevent", FMD_TYPE_UINT64, 373 "ETM fmaevent msg bodies rcvd from xport" }, 374 { "etm_rd_body_control", FMD_TYPE_UINT64, 375 "ETM control msg bodies rcvd from xport" }, 376 { "etm_rd_body_alert", FMD_TYPE_UINT64, 377 "ETM alert msg bodies rcvd from xport" }, 378 { "etm_rd_body_response", FMD_TYPE_UINT64, 379 "ETM response msg bodies rcvd from xport" }, 380 { "etm_wr_hdr_fmaevent", FMD_TYPE_UINT64, 381 "ETM fmaevent msg headers sent to xport" }, 382 { "etm_wr_hdr_control", FMD_TYPE_UINT64, 383 "ETM control msg headers sent to xport" }, 384 { "etm_wr_hdr_response", FMD_TYPE_UINT64, 385 "ETM response msg headers sent to xport" }, 386 { "etm_wr_body_fmaevent", FMD_TYPE_UINT64, 387 "ETM fmaevent msg bodies sent to xport" }, 388 { "etm_wr_body_control", FMD_TYPE_UINT64, 389 "ETM control msg bodies sent to xport" }, 390 { "etm_wr_body_response", FMD_TYPE_UINT64, 391 "ETM response msg bodies sent to xport" }, 392 393 { "etm_rd_max_ev_per_msg", FMD_TYPE_UINT64, 394 "max FMA events per ETM msg from xport" }, 395 { "etm_wr_max_ev_per_msg", FMD_TYPE_UINT64, 396 "max FMA events per ETM msg to xport" }, 397 398 { "etm_resp_q_cur_len", FMD_TYPE_UINT64, 399 "cur enqueued response msgs to xport" }, 400 { "etm_resp_q_max_len", FMD_TYPE_UINT64, 401 "max enqueable response msgs to xport" }, 402 403 /* ETM byte counters */ 404 405 { "etm_wr_fmd_bytes", FMD_TYPE_UINT64, 406 "bytes of FMA events sent to FMD" }, 407 { "etm_rd_fmd_bytes", FMD_TYPE_UINT64, 408 "bytes of FMA events rcvd from FMD" }, 409 { "etm_wr_xport_bytes", FMD_TYPE_UINT64, 410 "bytes of FMA events sent to xport" }, 411 { "etm_rd_xport_bytes", FMD_TYPE_UINT64, 412 "bytes of FMA events rcvd from xport" }, 413 414 { "etm_magic_drop_bytes", FMD_TYPE_UINT64, 415 "bytes dropped from xport pre magic num" }, 416 417 /* ETM [dropped] FMA event counters */ 418 419 { "etm_rd_fmd_fmaevent", FMD_TYPE_UINT64, 420 "FMA events rcvd from FMD" }, 421 { "etm_wr_fmd_fmaevent", FMD_TYPE_UINT64, 422 "FMA events sent to FMD" }, 423 424 { "etm_rd_drop_fmaevent", FMD_TYPE_UINT64, 425 "dropped FMA events from xport" }, 426 { "etm_wr_drop_fmaevent", FMD_TYPE_UINT64, 427 "dropped FMA events to xport" }, 428 429 { "etm_rd_dup_fmaevent", FMD_TYPE_UINT64, 430 "duplicate FMA events rcvd from xport" }, 431 { "etm_wr_dup_fmaevent", FMD_TYPE_UINT64, 432 "duplicate FMA events sent to xport" }, 433 434 { "etm_rd_dup_alert", FMD_TYPE_UINT64, 435 "duplicate ALERTs rcvd from xport" }, 436 { "etm_wr_dup_alert", FMD_TYPE_UINT64, 437 "duplicate ALERTs sent to xport" }, 438 439 { "etm_enq_drop_resp_q", FMD_TYPE_UINT64, 440 "dropped response msgs on enq" }, 441 { "etm_deq_drop_resp_q", FMD_TYPE_UINT64, 442 "dropped response msgs on deq" }, 443 444 /* ETM protocol failures */ 445 446 { "etm_magic_bad", FMD_TYPE_UINT64, 447 "ETM msgs w/ invalid magic num" }, 448 { "etm_ver_bad", FMD_TYPE_UINT64, 449 "ETM msgs w/ invalid protocol version" }, 450 { "etm_msgtype_bad", FMD_TYPE_UINT64, 451 "ETM msgs w/ invalid message type" }, 452 { "etm_subtype_bad", FMD_TYPE_UINT64, 453 "ETM msgs w/ invalid sub type" }, 454 { "etm_xid_bad", FMD_TYPE_UINT64, 455 "ETM msgs w/ unmatched xid" }, 456 { "etm_fmaeventlen_bad", FMD_TYPE_UINT64, 457 "ETM msgs w/ invalid FMA event length" }, 458 { "etm_respcode_bad", FMD_TYPE_UINT64, 459 "ETM msgs w/ invalid response code" }, 460 { "etm_timeout_bad", FMD_TYPE_UINT64, 461 "ETM msgs w/ invalid timeout value" }, 462 { "etm_evlens_bad", FMD_TYPE_UINT64, 463 "ETM msgs w/ too many event lengths" }, 464 465 /* IO operation failures */ 466 467 { "etm_xport_wr_fail", FMD_TYPE_UINT64, 468 "xport write failures" }, 469 { "etm_xport_rd_fail", FMD_TYPE_UINT64, 470 "xport read failures" }, 471 { "etm_xport_pk_fail", FMD_TYPE_UINT64, 472 "xport peek failures" }, 473 474 /* IO operation retries */ 475 476 { "etm_xport_wr_retry", FMD_TYPE_UINT64, 477 "xport write retries" }, 478 { "etm_xport_rd_retry", FMD_TYPE_UINT64, 479 "xport read retries" }, 480 { "etm_xport_pk_retry", FMD_TYPE_UINT64, 481 "xport peek retries" }, 482 483 /* system and library failures */ 484 485 { "etm_os_nvlist_pack_fail", FMD_TYPE_UINT64, 486 "nvlist_pack failures" }, 487 { "etm_os_nvlist_unpack_fail", FMD_TYPE_UINT64, 488 "nvlist_unpack failures" }, 489 { "etm_os_nvlist_size_fail", FMD_TYPE_UINT64, 490 "nvlist_size failures" }, 491 { "etm_os_pthread_create_fail", FMD_TYPE_UINT64, 492 "pthread_create failures" }, 493 494 /* transport API failures */ 495 496 { "etm_xport_get_ev_addrv_fail", FMD_TYPE_UINT64, 497 "xport get event addrv API failures" }, 498 { "etm_xport_open_fail", FMD_TYPE_UINT64, 499 "xport open API failures" }, 500 { "etm_xport_close_fail", FMD_TYPE_UINT64, 501 "xport close API failures" }, 502 { "etm_xport_accept_fail", FMD_TYPE_UINT64, 503 "xport accept API failures" }, 504 { "etm_xport_open_retry", FMD_TYPE_UINT64, 505 "xport open API retries" }, 506 507 /* FMD entry point bad arguments */ 508 509 { "etm_fmd_init_badargs", FMD_TYPE_UINT64, 510 "bad arguments from fmd_init entry point" }, 511 { "etm_fmd_fini_badargs", FMD_TYPE_UINT64, 512 "bad arguments from fmd_fini entry point" }, 513 514 /* Alert logging errors */ 515 516 { "etm_log_err", FMD_TYPE_UINT64, 517 "failed to log message to log(7D)" }, 518 { "etm_msg_err", FMD_TYPE_UINT64, 519 "failed to log message to sysmsg(7D)" }, 520 521 /* miscellaneous stats */ 522 523 { "etm_reset_xport", FMD_TYPE_UINT64, 524 "xport resets after xport API failure" } 525 }; 526 527 528 /* 529 * -------------------- global data for Root ldom------------------------- 530 */ 531 532 ldom_hdl_t 533 *etm_lhp = NULL; /* ldom pointer */ 534 535 static void *etm_dl_hdl = (void *)NULL; 536 static const char *etm_dl_path = "libds.so.1"; 537 static int etm_dl_mode = (RTLD_NOW | RTLD_LOCAL); 538 539 static int(*etm_ds_svc_reg)(ds_capability_t *cap, ds_ops_t *ops) = 540 (int (*)(ds_capability_t *cap, ds_ops_t *ops))NULL; 541 static int(*etm_ds_clnt_reg)(ds_capability_t *cap, ds_ops_t *ops) = 542 (int (*)(ds_capability_t *cap, ds_ops_t *ops))NULL; 543 static int(*etm_ds_send_msg)(ds_hdl_t hdl, void *buf, size_t buflen) = 544 (int (*)(ds_hdl_t hdl, void *buf, size_t buflen))NULL; 545 static int(*etm_ds_recv_msg)(ds_hdl_t hdl, void *buf, size_t buflen, 546 size_t *msglen) = 547 (int (*)(ds_hdl_t hdl, void *buf, size_t buflen, size_t *msglen))NULL; 548 static int (*etm_ds_fini)(void) = (int (*)(void))NULL; 549 550 static pthread_mutex_t 551 iosvc_list_lock = PTHREAD_MUTEX_INITIALIZER; 552 553 static pthread_t 554 etm_async_e_tid = NULL; /* thread id of io svc async event handler */ 555 556 static etm_proto_v1_ev_hdr_t iosvc_hdr = { 557 ETM_PROTO_MAGIC_NUM, /* magic number */ 558 ETM_PROTO_V1, /* default to V1, not checked */ 559 ETM_MSG_TYPE_FMA_EVENT, /* Root Domain inteoduces only FMA events */ 560 0, /* sub-type */ 561 0, /* pad */ 562 0, /* add the xid at the Q send time */ 563 ETM_PROTO_V1_TIMEOUT_NONE, 564 0 /* ev_lens, 0-termed, after 1 FMA event */ 565 }; 566 567 /* 568 * static iosvc_list 569 */ 570 static etm_iosvc_t iosvc_list[NUM_OF_ROOT_DOMAINS] = { 571 {"", 0}, {"", 0}, {"", 0}, {"", 0}, {"", 0}, {"", 0}, 572 {"", 0}, {"", 0} 573 }; 574 575 static etm_iosvc_t io_svc = { 576 "\0", /* ldom_name */ 577 PTHREAD_COND_INITIALIZER, /* nudges */ 578 PTHREAD_MUTEX_INITIALIZER, /* protects the iosvc msg Q */ 579 NULL, /* iosvc msg Q head */ 580 NULL, /* iosvc msg Q tail */ 581 0, /* msg Q current length */ 582 100, /* msg Q max length */ 583 0, /* current transaction id */ 584 0, /* xid of last event posted to FMD */ 585 DS_INVALID_HDL, /* DS handle */ 586 NULL, /* fmd xprt handle */ 587 NULL, /* tid 4 send to remote RootDomain */ 588 NULL, /* tid 4 recv from remote RootDomain */ 589 PTHREAD_COND_INITIALIZER, /* nudges etm_send_to_remote_root */ 590 PTHREAD_MUTEX_INITIALIZER, /* protects msg_ack_cv */ 591 0, /* send/recv threads are not dying */ 592 0, /* flag for start sending msg Q */ 593 0 /* indicate if the ACK has come */ 594 }; 595 etm_iosvc_t *io_svc_p = &io_svc; 596 597 598 static uint32_t 599 flags; /* flags for fmd_xprt_open */ 600 601 static etm_async_event_ele_t 602 async_event_q[ASYNC_EVENT_Q_SIZE]; /* holds the async events */ 603 604 static uint32_t 605 etm_async_q_head = 0; /* ptr to cur head of async event queue */ 606 607 static uint32_t 608 etm_async_q_tail = 0; /* ptr to cur tail of async event queue */ 609 610 static uint32_t 611 etm_async_q_cur_len = 0; /* cur length (ele cnt) of async event queue */ 612 613 static uint32_t 614 etm_async_q_max_len = ASYNC_EVENT_Q_SIZE; 615 /* max length (ele cnt) of async event queue */ 616 617 static pthread_cond_t 618 etm_async_event_q_cv = PTHREAD_COND_INITIALIZER; 619 /* nudges async event handler */ 620 621 static pthread_mutex_t 622 etm_async_event_q_lock = PTHREAD_MUTEX_INITIALIZER; 623 /* protects async event q */ 624 625 static ds_ver_t 626 etm_iosvc_vers[] = { { 1, 0} }; 627 628 #define ETM_NVERS (sizeof (etm_iosvc_vers) / sizeof (ds_ver_t)) 629 630 static ds_capability_t 631 iosvc_caps = { 632 "ETM", /* svc_id */ 633 etm_iosvc_vers, /* vers */ 634 ETM_NVERS /* number of vers */ 635 }; 636 637 static void 638 etm_iosvc_reg_handler(ds_hdl_t hdl, ds_cb_arg_t arg, ds_ver_t *ver, 639 ds_domain_hdl_t did); 640 641 static void 642 etm_iosvc_unreg_handler(ds_hdl_t hdl, ds_cb_arg_t arg); 643 644 static ds_ops_t 645 iosvc_ops = { 646 etm_iosvc_reg_handler, /* ds_reg_cb */ 647 etm_iosvc_unreg_handler, /* ds_unreg_cb */ 648 NULL, /* ds_data_cb */ 649 NULL /* cb_arg */ 650 }; 651 652 653 /* 654 * -------------------------- support functions ------------------------------ 655 */ 656 657 /* 658 * Design_Note: Each failure worth reporting to FMD should be done using 659 * a single call to fmd_hdl_error() as it logs an FMA event 660 * for each call. Also be aware that all the fmd_hdl_*() 661 * format strings currently use platform specific *printf() 662 * routines; so "%p" under Solaris does not prepend "0x" to 663 * the outputted hex digits, while Linux and VxWorks do. 664 */ 665 666 667 /* 668 * etm_show_time - display the current time of day (for debugging) using 669 * the given FMD module handle and annotation string 670 */ 671 672 static void 673 etm_show_time(fmd_hdl_t *hdl, char *note_str) 674 { 675 struct timeval tmv; /* timeval */ 676 677 (void) gettimeofday(&tmv, NULL); 678 fmd_hdl_debug(hdl, "info: %s: cur Unix Epoch time %d.%06d\n", 679 note_str, tmv.tv_sec, tmv.tv_usec); 680 681 } /* etm_show_time() */ 682 683 /* 684 * etm_hexdump - hexdump the given buffer (for debugging) using 685 * the given FMD module handle 686 */ 687 688 static void 689 etm_hexdump(fmd_hdl_t *hdl, void *buf, size_t byte_cnt) 690 { 691 uint8_t *bp; /* byte ptr */ 692 int i, j; /* index */ 693 char cb[80]; /* char buf */ 694 unsigned int n; /* a byte of data for sprintf() */ 695 696 bp = buf; 697 j = 0; 698 699 /* 700 * Design_Note: fmd_hdl_debug() auto adds a newline if missing; 701 * hence cb exists to accumulate a longer string. 702 */ 703 704 for (i = 1; i <= byte_cnt; i++) { 705 n = *bp++; 706 (void) sprintf(&cb[j], "%2.2x ", n); 707 j += 3; 708 /* add a newline every 16 bytes or at the buffer's end */ 709 if (((i % 16) == 0) || (i >= byte_cnt)) { 710 cb[j-1] = '\0'; 711 fmd_hdl_debug(hdl, "%s\n", cb); 712 j = 0; 713 } 714 } /* for each byte in the buffer */ 715 716 } /* etm_hexdump() */ 717 718 /* 719 * etm_sleep - sleep the caller for the given number of seconds, 720 * return 0 or -errno value 721 * 722 * Design_Note: To avoid interfering with FMD's signal mask (SIGALRM) 723 * do not use [Solaris] sleep(3C) and instead use 724 * pthread_cond_wait() or nanosleep(), both of which 725 * are POSIX spec-ed to leave signal masks alone. 726 * This is needed for Solaris and Linux (domain and SP). 727 */ 728 729 static int 730 etm_sleep(unsigned sleep_sec) 731 { 732 struct timespec tms; /* for nanosleep() */ 733 734 tms.tv_sec = sleep_sec; 735 tms.tv_nsec = 0; 736 737 if (nanosleep(&tms, NULL) < 0) { 738 /* errno assumed set by above call */ 739 return (-errno); 740 } 741 return (0); 742 743 } /* etm_sleep() */ 744 745 /* 746 * etm_conn_open - open a connection to the given transport address, 747 * return 0 and the opened connection handle 748 * or -errno value 749 * 750 * caveats: the err_substr is used in failure cases for calling 751 * fmd_hdl_error() 752 */ 753 754 static int 755 etm_conn_open(fmd_hdl_t *hdl, char *err_substr, 756 etm_xport_addr_t addr, etm_xport_conn_t *connp) 757 { 758 etm_xport_conn_t conn; /* connection to return */ 759 int nev; /* -errno value */ 760 761 if ((conn = etm_xport_open(hdl, addr)) == NULL) { 762 nev = (-errno); 763 fmd_hdl_error(hdl, "error: %s: errno %d\n", 764 err_substr, errno); 765 etm_stats.etm_xport_open_fail.fmds_value.ui64++; 766 return (nev); 767 } else { 768 *connp = conn; 769 return (0); 770 } 771 } /* etm_conn_open() */ 772 773 /* 774 * etm_conn_close - close the given connection, 775 * return 0 or -errno value 776 * 777 * caveats: the err_substr is used in failure cases for calling 778 * fmd_hdl_error() 779 */ 780 781 static int 782 etm_conn_close(fmd_hdl_t *hdl, char *err_substr, etm_xport_conn_t conn) 783 { 784 int nev; /* -errno value */ 785 786 if (etm_xport_close(hdl, conn) == NULL) { 787 nev = (-errno); 788 fmd_hdl_error(hdl, "warning: %s: errno %d\n", 789 err_substr, errno); 790 etm_stats.etm_xport_close_fail.fmds_value.ui64++; 791 return (nev); 792 } else { 793 return (0); 794 } 795 } /* etm_conn_close() */ 796 797 /* 798 * etm_io_op - perform an IO operation on the given connection 799 * with the given buffer, 800 * accommodating MTU size and retrying op if needed, 801 * return how many bytes actually done by the op 802 * or -errno value 803 * 804 * caveats: the err_substr is used in failure cases for calling 805 * fmd_hdl_error() 806 */ 807 808 static ssize_t 809 etm_io_op(fmd_hdl_t *hdl, char *err_substr, etm_xport_conn_t conn, 810 void *buf, size_t byte_cnt, int io_op) 811 { 812 ssize_t rv; /* ret val / byte count */ 813 ssize_t n; /* gen use */ 814 uint8_t *datap; /* ptr to data */ 815 size_t mtu_sz; /* MTU size in bytes */ 816 int (*io_func_ptr)(fmd_hdl_t *, etm_xport_conn_t, 817 void *, size_t); 818 size_t io_sz; /* byte count for io_func_ptr */ 819 int try_cnt; /* number of tries done */ 820 int sleep_sec; /* exp backoff sleep period in sec */ 821 int sleep_rv; /* ret val from sleeping */ 822 fmd_stat_t io_retry_stat; /* IO retry stat to update */ 823 fmd_stat_t io_fail_stat; /* IO failure stat to update */ 824 825 if ((conn == NULL) || (buf == NULL)) { 826 return (-EINVAL); 827 } 828 switch (io_op) { 829 case ETM_IO_OP_RD: 830 io_func_ptr = etm_xport_read; 831 io_retry_stat = etm_stats.etm_xport_rd_retry; 832 io_fail_stat = etm_stats.etm_xport_rd_fail; 833 break; 834 case ETM_IO_OP_WR: 835 io_func_ptr = etm_xport_write; 836 io_retry_stat = etm_stats.etm_xport_wr_retry; 837 io_fail_stat = etm_stats.etm_xport_wr_fail; 838 break; 839 default: 840 return (-EINVAL); 841 } 842 if (byte_cnt == 0) { 843 return (byte_cnt); /* nop */ 844 } 845 846 /* obtain [current] MTU size */ 847 848 if ((n = etm_xport_get_opt(hdl, conn, ETM_XPORT_OPT_MTU_SZ)) < 0) { 849 mtu_sz = ETM_XPORT_MTU_SZ_DEF; 850 } else { 851 mtu_sz = n; 852 } 853 854 /* loop until all IO done, try limit exceeded, or real failure */ 855 856 rv = 0; 857 datap = buf; 858 while (rv < byte_cnt) { 859 io_sz = MIN((byte_cnt - rv), mtu_sz); 860 try_cnt = 0; 861 sleep_sec = 0; 862 863 /* when give up, return -errno value even if partly done */ 864 865 while ((n = (*io_func_ptr)(hdl, conn, datap, io_sz)) == 866 (-EAGAIN)) { 867 try_cnt++; 868 if (try_cnt > ETM_TRY_MAX_CNT) { 869 rv = n; 870 goto func_ret; 871 } 872 if (etm_is_dying) { 873 rv = (-EINTR); 874 goto func_ret; 875 } 876 if ((sleep_rv = etm_sleep(sleep_sec)) < 0) { 877 rv = sleep_rv; 878 goto func_ret; 879 } 880 sleep_sec = ((sleep_sec == 0) ? 1 : 881 (sleep_sec * ETM_TRY_BACKOFF_RATE)); 882 sleep_sec = MIN(sleep_sec, ETM_TRY_BACKOFF_CAP); 883 io_retry_stat.fmds_value.ui64++; 884 if (etm_debug_lvl >= 1) { 885 fmd_hdl_debug(hdl, "info: retrying io op %d " 886 "due to EAGAIN\n", io_op); 887 } 888 } /* while trying the io operation */ 889 890 if (etm_is_dying) { 891 rv = (-EINTR); 892 goto func_ret; 893 } 894 if (n < 0) { 895 rv = n; 896 goto func_ret; 897 } 898 /* avoid spinning CPU when given 0 bytes but no error */ 899 if (n == 0) { 900 if ((sleep_rv = etm_sleep(ETM_SLEEP_QUIK)) < 0) { 901 rv = sleep_rv; 902 goto func_ret; 903 } 904 } 905 rv += n; 906 datap += n; 907 } /* while still have more data */ 908 909 func_ret: 910 911 if (rv < 0) { 912 io_fail_stat.fmds_value.ui64++; 913 fmd_hdl_debug(hdl, "error: %s: errno %d\n", 914 err_substr, (int)(-rv)); 915 } 916 if (etm_debug_lvl >= 3) { 917 fmd_hdl_debug(hdl, "info: io op %d ret %d of %d\n", 918 io_op, (int)rv, (int)byte_cnt); 919 } 920 return (rv); 921 922 } /* etm_io_op() */ 923 924 /* 925 * etm_magic_read - read the magic number of an ETM message header 926 * from the given connection into the given buffer, 927 * return 0 or -errno value 928 * 929 * Design_Note: This routine is intended to help protect ETM from protocol 930 * framing errors as might be caused by an SP reset / crash in 931 * the middle of an ETM message send; the connection will be 932 * read from for as many bytes as needed until the magic number 933 * is found using a sliding buffer for comparisons. 934 */ 935 936 static int 937 etm_magic_read(fmd_hdl_t *hdl, etm_xport_conn_t conn, uint32_t *magic_ptr) 938 { 939 int rv; /* ret val */ 940 uint32_t magic_num; /* magic number */ 941 int byte_cnt; /* count of bytes read */ 942 uint8_t buf5[4+1]; /* sliding input buffer */ 943 int i, j; /* indices into buf5 */ 944 ssize_t n; /* gen use */ 945 uint8_t drop_buf[1024]; /* dropped bytes buffer */ 946 947 rv = 0; /* assume success */ 948 magic_num = 0; 949 byte_cnt = 0; 950 j = 0; 951 952 /* magic number bytes are sent in network (big endian) order */ 953 954 while (magic_num != ETM_PROTO_MAGIC_NUM) { 955 if ((n = etm_io_op(hdl, "bad io read on magic", 956 conn, &buf5[j], 1, ETM_IO_OP_RD)) < 0) { 957 rv = n; 958 goto func_ret; 959 } 960 byte_cnt++; 961 j = MIN((j + 1), sizeof (magic_num)); 962 if (byte_cnt < sizeof (magic_num)) { 963 continue; 964 } 965 966 if (byte_cnt > sizeof (magic_num)) { 967 etm_stats.etm_magic_drop_bytes.fmds_value.ui64++; 968 i = MIN(byte_cnt - j - 1, sizeof (drop_buf) - 1); 969 drop_buf[i] = buf5[0]; 970 for (i = 0; i < j; i++) { 971 buf5[i] = buf5[i+1]; 972 } /* for sliding the buffer contents */ 973 } 974 (void) memcpy(&magic_num, &buf5[0], sizeof (magic_num)); 975 magic_num = ntohl(magic_num); 976 } /* for reading bytes until find magic number */ 977 978 func_ret: 979 980 if (byte_cnt != sizeof (magic_num)) { 981 fmd_hdl_debug(hdl, "warning: bad proto frame " 982 "implies corrupt/lost msg(s)\n"); 983 } 984 if ((byte_cnt > sizeof (magic_num)) && (etm_debug_lvl >= 2)) { 985 i = MIN(byte_cnt - sizeof (magic_num), sizeof (drop_buf)); 986 fmd_hdl_debug(hdl, "info: magic drop hexdump " 987 "first %d of %d bytes:\n", i, 988 byte_cnt - sizeof (magic_num)); 989 etm_hexdump(hdl, drop_buf, i); 990 } 991 992 if (rv == 0) { 993 *magic_ptr = magic_num; 994 } 995 return (rv); 996 997 } /* etm_magic_read() */ 998 999 /* 1000 * etm_hdr_read - allocate, read, and validate a [variable sized] 1001 * ETM message header from the given connection, 1002 * return the allocated ETM message header 1003 * (which is guaranteed to be large enough to reuse as a 1004 * RESPONSE msg hdr) and its size 1005 * or NULL and set errno on failure 1006 */ 1007 1008 static void * 1009 etm_hdr_read(fmd_hdl_t *hdl, etm_xport_conn_t conn, size_t *szp) 1010 { 1011 uint8_t *hdrp; /* ptr to header to return */ 1012 size_t hdr_sz; /* sizeof *hdrp */ 1013 etm_proto_v1_pp_t pp; /* protocol preamble */ 1014 etm_proto_v1_ev_hdr_t *ev_hdrp; /* for FMA_EVENT msg */ 1015 etm_proto_v1_ctl_hdr_t *ctl_hdrp; /* for CONTROL msg */ 1016 etm_proto_v1_resp_hdr_t *resp_hdrp; /* for RESPONSE msg */ 1017 etm_proto_v3_sa_hdr_t *sa_hdrp; /* for ALERT msg */ 1018 uint32_t *lenp; /* ptr to FMA event length */ 1019 ssize_t i, n; /* gen use */ 1020 uint8_t misc_buf[ETM_MISC_BUF_SZ]; /* for var sized hdrs */ 1021 int dummy_int; /* dummy var to appease lint */ 1022 1023 hdrp = NULL; hdr_sz = 0; 1024 1025 /* read the magic number which starts the protocol preamble */ 1026 1027 if ((n = etm_magic_read(hdl, conn, &pp.pp_magic_num)) < 0) { 1028 errno = (-n); 1029 etm_stats.etm_magic_bad.fmds_value.ui64++; 1030 return (NULL); 1031 } 1032 1033 /* read the rest of the protocol preamble all at once */ 1034 1035 if ((n = etm_io_op(hdl, "bad io read on preamble", 1036 conn, &pp.pp_proto_ver, sizeof (pp) - sizeof (pp.pp_magic_num), 1037 ETM_IO_OP_RD)) < 0) { 1038 errno = (-n); 1039 return (NULL); 1040 } 1041 1042 /* 1043 * Design_Note: The magic number was already network decoded; but 1044 * some other preamble fields also need to be decoded, 1045 * specifically pp_xid and pp_timeout. The rest of the 1046 * preamble fields are byte sized and hence need no 1047 * decoding. 1048 */ 1049 1050 pp.pp_xid = ntohl(pp.pp_xid); 1051 pp.pp_timeout = ntohl(pp.pp_timeout); 1052 1053 /* sanity check the header as best we can */ 1054 1055 if ((pp.pp_proto_ver < ETM_PROTO_V1) || 1056 (pp.pp_proto_ver > ETM_PROTO_V3)) { 1057 fmd_hdl_error(hdl, "error: bad proto ver %d\n", 1058 (int)pp.pp_proto_ver); 1059 errno = EPROTO; 1060 etm_stats.etm_ver_bad.fmds_value.ui64++; 1061 return (NULL); 1062 } 1063 1064 dummy_int = pp.pp_msg_type; 1065 if ((dummy_int <= ETM_MSG_TYPE_TOO_LOW) || 1066 (dummy_int >= ETM_MSG_TYPE_TOO_BIG)) { 1067 fmd_hdl_error(hdl, "error: bad msg type %d", dummy_int); 1068 errno = EBADMSG; 1069 etm_stats.etm_msgtype_bad.fmds_value.ui64++; 1070 return (NULL); 1071 } 1072 1073 /* handle [var sized] hdrs for FMA_EVENT, CONTROL, RESPONSE msgs */ 1074 1075 if (pp.pp_msg_type == ETM_MSG_TYPE_FMA_EVENT) { 1076 1077 ev_hdrp = (void*)&misc_buf[0]; 1078 hdr_sz = sizeof (*ev_hdrp); 1079 (void) memcpy(&ev_hdrp->ev_pp, &pp, sizeof (pp)); 1080 1081 /* sanity check the header's timeout */ 1082 1083 if ((ev_hdrp->ev_pp.pp_proto_ver == ETM_PROTO_V1) && 1084 (ev_hdrp->ev_pp.pp_timeout != ETM_PROTO_V1_TIMEOUT_NONE)) { 1085 errno = ETIME; 1086 etm_stats.etm_timeout_bad.fmds_value.ui64++; 1087 return (NULL); 1088 } 1089 1090 /* get all FMA event lengths from the header */ 1091 1092 lenp = (uint32_t *)&ev_hdrp->ev_lens[0]; lenp--; 1093 i = -1; /* cnt of length entries preceding 0 */ 1094 do { 1095 i++; lenp++; 1096 if ((sizeof (*ev_hdrp) + (i * sizeof (*lenp))) >= 1097 ETM_MISC_BUF_SZ) { 1098 errno = E2BIG; /* ridiculous size */ 1099 etm_stats.etm_evlens_bad.fmds_value.ui64++; 1100 return (NULL); 1101 } 1102 if ((n = etm_io_op(hdl, "bad io read on event len", 1103 conn, lenp, sizeof (*lenp), ETM_IO_OP_RD)) < 0) { 1104 errno = (-n); 1105 return (NULL); 1106 } 1107 *lenp = ntohl(*lenp); 1108 1109 } while (*lenp != 0); 1110 i += 0; /* first len already counted by sizeof(ev_hdr) */ 1111 hdr_sz += (i * sizeof (*lenp)); 1112 1113 etm_stats.etm_rd_hdr_fmaevent.fmds_value.ui64++; 1114 1115 } else if (pp.pp_msg_type == ETM_MSG_TYPE_CONTROL) { 1116 1117 ctl_hdrp = (void*)&misc_buf[0]; 1118 hdr_sz = sizeof (*ctl_hdrp); 1119 (void) memcpy(&ctl_hdrp->ctl_pp, &pp, sizeof (pp)); 1120 1121 /* sanity check the header's sub type (control selector) */ 1122 1123 if ((ctl_hdrp->ctl_pp.pp_sub_type <= ETM_CTL_SEL_TOO_LOW) || 1124 (ctl_hdrp->ctl_pp.pp_sub_type >= ETM_CTL_SEL_TOO_BIG)) { 1125 fmd_hdl_error(hdl, "error: bad ctl sub type %d\n", 1126 (int)ctl_hdrp->ctl_pp.pp_sub_type); 1127 errno = EBADMSG; 1128 etm_stats.etm_subtype_bad.fmds_value.ui64++; 1129 return (NULL); 1130 } 1131 1132 /* get the control length */ 1133 1134 if ((n = etm_io_op(hdl, "bad io read on ctl len", 1135 conn, &ctl_hdrp->ctl_len, sizeof (ctl_hdrp->ctl_len), 1136 ETM_IO_OP_RD)) < 0) { 1137 errno = (-n); 1138 return (NULL); 1139 } 1140 1141 ctl_hdrp->ctl_len = ntohl(ctl_hdrp->ctl_len); 1142 1143 etm_stats.etm_rd_hdr_control.fmds_value.ui64++; 1144 1145 } else if (pp.pp_msg_type == ETM_MSG_TYPE_RESPONSE) { 1146 1147 resp_hdrp = (void*)&misc_buf[0]; 1148 hdr_sz = sizeof (*resp_hdrp); 1149 (void) memcpy(&resp_hdrp->resp_pp, &pp, sizeof (pp)); 1150 1151 /* sanity check the header's timeout */ 1152 1153 if (resp_hdrp->resp_pp.pp_timeout != 1154 ETM_PROTO_V1_TIMEOUT_NONE) { 1155 errno = ETIME; 1156 etm_stats.etm_timeout_bad.fmds_value.ui64++; 1157 return (NULL); 1158 } 1159 1160 /* get the response code and length */ 1161 1162 if ((n = etm_io_op(hdl, "bad io read on resp code+len", 1163 conn, &resp_hdrp->resp_code, 1164 sizeof (resp_hdrp->resp_code) 1165 + sizeof (resp_hdrp->resp_len), 1166 ETM_IO_OP_RD)) < 0) { 1167 errno = (-n); 1168 return (NULL); 1169 } 1170 1171 resp_hdrp->resp_code = ntohl(resp_hdrp->resp_code); 1172 resp_hdrp->resp_len = ntohl(resp_hdrp->resp_len); 1173 1174 etm_stats.etm_rd_hdr_response.fmds_value.ui64++; 1175 1176 } else if (pp.pp_msg_type == ETM_MSG_TYPE_ALERT) { 1177 1178 sa_hdrp = (void*)&misc_buf[0]; 1179 hdr_sz = sizeof (*sa_hdrp); 1180 (void) memcpy(&sa_hdrp->sa_pp, &pp, sizeof (pp)); 1181 1182 /* sanity check the header's protocol version */ 1183 1184 if (sa_hdrp->sa_pp.pp_proto_ver != ETM_PROTO_V3) { 1185 errno = EPROTO; 1186 etm_stats.etm_ver_bad.fmds_value.ui64++; 1187 return (NULL); 1188 } 1189 1190 /* get the priority and length */ 1191 1192 if ((n = etm_io_op(hdl, "bad io read on sa priority+len", 1193 conn, &sa_hdrp->sa_priority, 1194 sizeof (sa_hdrp->sa_priority) 1195 + sizeof (sa_hdrp->sa_len), 1196 ETM_IO_OP_RD)) < 0) { 1197 errno = (-n); 1198 return (NULL); 1199 } 1200 1201 sa_hdrp->sa_priority = ntohl(sa_hdrp->sa_priority); 1202 sa_hdrp->sa_len = ntohl(sa_hdrp->sa_len); 1203 1204 etm_stats.etm_rd_hdr_alert.fmds_value.ui64++; 1205 1206 } /* whether we have FMA_EVENT, ALERT, CONTROL, or RESPONSE msg */ 1207 1208 /* 1209 * choose a header size that allows hdr reuse for RESPONSE msgs, 1210 * allocate and populate the message header, and 1211 * return alloc size to caller for later free of hdrp 1212 */ 1213 1214 hdr_sz = MAX(hdr_sz, sizeof (*resp_hdrp)); 1215 hdrp = fmd_hdl_zalloc(hdl, hdr_sz, FMD_SLEEP); 1216 (void) memcpy(hdrp, misc_buf, hdr_sz); 1217 1218 if (etm_debug_lvl >= 3) { 1219 fmd_hdl_debug(hdl, "info: msg hdr hexdump %d bytes:\n", hdr_sz); 1220 etm_hexdump(hdl, hdrp, hdr_sz); 1221 } 1222 *szp = hdr_sz; 1223 return (hdrp); 1224 1225 } /* etm_hdr_read() */ 1226 1227 /* 1228 * etm_hdr_write - create and write a [variable sized] ETM message header 1229 * to the given connection appropriate for the given FMA event 1230 * and type of nvlist encoding, 1231 * return the allocated ETM message header and its size 1232 * or NULL and set errno on failure 1233 */ 1234 1235 static void* 1236 etm_hdr_write(fmd_hdl_t *hdl, etm_xport_conn_t conn, nvlist_t *evp, 1237 int encoding, size_t *szp) 1238 { 1239 etm_proto_v1_ev_hdr_t *hdrp; /* for FMA_EVENT msg */ 1240 size_t hdr_sz; /* sizeof *hdrp */ 1241 uint32_t *lenp; /* ptr to FMA event length */ 1242 size_t evsz; /* packed FMA event size */ 1243 ssize_t n; /* gen use */ 1244 1245 /* allocate and populate the message header for 1 FMA event */ 1246 1247 hdr_sz = sizeof (*hdrp) + (1 * sizeof (hdrp->ev_lens[0])); 1248 1249 hdrp = fmd_hdl_zalloc(hdl, hdr_sz, FMD_SLEEP); 1250 1251 /* 1252 * Design_Note: Although the ETM protocol supports it, we do not (yet) 1253 * want responses/ACKs on FMA events that we send. All 1254 * such messages are sent with ETM_PROTO_V1_TIMEOUT_NONE. 1255 */ 1256 1257 hdrp->ev_pp.pp_magic_num = ETM_PROTO_MAGIC_NUM; 1258 hdrp->ev_pp.pp_magic_num = htonl(hdrp->ev_pp.pp_magic_num); 1259 hdrp->ev_pp.pp_proto_ver = ETM_PROTO_V1; 1260 hdrp->ev_pp.pp_msg_type = ETM_MSG_TYPE_FMA_EVENT; 1261 hdrp->ev_pp.pp_sub_type = 0; 1262 hdrp->ev_pp.pp_rsvd_pad = 0; 1263 hdrp->ev_pp.pp_xid = etm_xid_cur; 1264 hdrp->ev_pp.pp_xid = htonl(hdrp->ev_pp.pp_xid); 1265 etm_xid_cur += ETM_XID_INC; 1266 hdrp->ev_pp.pp_timeout = ETM_PROTO_V1_TIMEOUT_NONE; 1267 hdrp->ev_pp.pp_timeout = htonl(hdrp->ev_pp.pp_timeout); 1268 1269 lenp = &hdrp->ev_lens[0]; 1270 1271 if ((n = nvlist_size(evp, &evsz, encoding)) != 0) { 1272 errno = n; 1273 fmd_hdl_free(hdl, hdrp, hdr_sz); 1274 etm_stats.etm_os_nvlist_size_fail.fmds_value.ui64++; 1275 return (NULL); 1276 } 1277 1278 /* indicate 1 FMA event, network encode its length, and 0-terminate */ 1279 1280 etm_stats.etm_wr_max_ev_per_msg.fmds_value.ui64 = 1; 1281 1282 *lenp = evsz; *lenp = htonl(*lenp); lenp++; 1283 *lenp = 0; *lenp = htonl(*lenp); lenp++; 1284 1285 /* 1286 * write the network encoded header to the transport, and 1287 * return alloc size to caller for later free 1288 */ 1289 1290 if ((n = etm_io_op(hdl, "bad io write on event hdr", 1291 conn, hdrp, hdr_sz, ETM_IO_OP_WR)) < 0) { 1292 errno = (-n); 1293 fmd_hdl_free(hdl, hdrp, hdr_sz); 1294 return (NULL); 1295 } 1296 1297 *szp = hdr_sz; 1298 return (hdrp); 1299 1300 } /* etm_hdr_write() */ 1301 1302 /* 1303 * etm_post_to_fmd - post the given FMA event to FMD 1304 * via a FMD transport API call, 1305 * return 0 or -errno value 1306 * 1307 * caveats: the FMA event (evp) is freed by FMD, 1308 * thus callers of this function should 1309 * immediately discard any ptr they have to the 1310 * nvlist without freeing or dereferencing it 1311 */ 1312 1313 static int 1314 etm_post_to_fmd(fmd_hdl_t *hdl, fmd_xprt_t *fmd_xprt, nvlist_t *evp) 1315 { 1316 ssize_t ev_sz; /* sizeof *evp */ 1317 1318 (void) nvlist_size(evp, (size_t *)&ev_sz, NV_ENCODE_XDR); 1319 1320 if (etm_debug_lvl >= 2) { 1321 etm_show_time(hdl, "ante ev post"); 1322 } 1323 fmd_xprt_post(hdl, fmd_xprt, evp, 0); 1324 etm_stats.etm_wr_fmd_fmaevent.fmds_value.ui64++; 1325 etm_stats.etm_wr_fmd_bytes.fmds_value.ui64 += ev_sz; 1326 if (etm_debug_lvl >= 1) { 1327 fmd_hdl_debug(hdl, "info: event %p post ok to FMD\n", evp); 1328 } 1329 if (etm_debug_lvl >= 2) { 1330 etm_show_time(hdl, "post ev post"); 1331 } 1332 return (0); 1333 1334 } /* etm_post_to_fmd() */ 1335 1336 /* 1337 * Ideally we would just use syslog(3C) for outputting our messages. 1338 * Unfortunately, as this module is running within the FMA daemon context, 1339 * that would create the situation where this module's openlog() would 1340 * have the monopoly on syslog(3C) for the daemon and all its modules. 1341 * To avoid that situation, this module uses the same logic as the 1342 * syslog-msgs FM module to directly call into the log(7D) and sysmsg(7D) 1343 * devices for syslog and console. 1344 */ 1345 1346 static int 1347 etm_post_to_syslog(fmd_hdl_t *hdl, uint32_t priority, uint32_t body_sz, 1348 uint8_t *body_buf) 1349 { 1350 char *sysmessage; /* Formatted message */ 1351 size_t formatlen; /* maximum length of sysmessage */ 1352 struct strbuf ctl, dat; /* structs pushed to the logfd */ 1353 uint32_t msgid; /* syslog message ID number */ 1354 1355 if ((syslog_file == 0) && (syslog_cons == 0)) { 1356 return (0); 1357 } 1358 1359 if (etm_debug_lvl >= 2) { 1360 etm_show_time(hdl, "ante syslog post"); 1361 } 1362 1363 formatlen = body_sz + 64; /* +64 for prefix strings added below */ 1364 sysmessage = fmd_hdl_zalloc(hdl, formatlen, FMD_SLEEP); 1365 1366 if (syslog_file) { 1367 STRLOG_MAKE_MSGID(body_buf, msgid); 1368 (void) snprintf(sysmessage, formatlen, 1369 "SC Alert: [ID %u FACILITY_AND_PRIORITY] %s", msgid, 1370 body_buf); 1371 1372 syslog_ctl.pri = syslog_facility | priority; 1373 1374 ctl.buf = (void *)&syslog_ctl; 1375 ctl.len = sizeof (syslog_ctl); 1376 1377 dat.buf = sysmessage; 1378 dat.len = strlen(sysmessage) + 1; 1379 1380 if (putmsg(syslog_logfd, &ctl, &dat, 0) != 0) { 1381 fmd_hdl_debug(hdl, "putmsg failed: %s\n", 1382 strerror(errno)); 1383 etm_stats.etm_log_err.fmds_value.ui64++; 1384 } 1385 } 1386 1387 if (syslog_cons) { 1388 (void) snprintf(sysmessage, formatlen, 1389 "SC Alert: %s\r\n", body_buf); 1390 1391 dat.buf = sysmessage; 1392 dat.len = strlen(sysmessage) + 1; 1393 1394 if (write(syslog_msgfd, dat.buf, dat.len) != dat.len) { 1395 fmd_hdl_debug(hdl, "write failed: %s\n", 1396 strerror(errno)); 1397 etm_stats.etm_msg_err.fmds_value.ui64++; 1398 } 1399 } 1400 1401 fmd_hdl_free(hdl, sysmessage, formatlen); 1402 1403 if (etm_debug_lvl >= 2) { 1404 etm_show_time(hdl, "post syslog post"); 1405 } 1406 1407 return (0); 1408 } 1409 1410 1411 /* 1412 * etm_req_ver_negot - send an ETM control message to the other end requesting 1413 * that the ETM protocol version be negotiated/set 1414 */ 1415 1416 static void 1417 etm_req_ver_negot(fmd_hdl_t *hdl) 1418 { 1419 etm_xport_addr_t *addrv; /* default dst addr(s) */ 1420 etm_xport_conn_t conn; /* connection to other end */ 1421 etm_proto_v1_ctl_hdr_t *ctl_hdrp; /* for CONTROL msg */ 1422 size_t hdr_sz; /* sizeof header */ 1423 uint8_t *body_buf; /* msg body buffer */ 1424 uint32_t body_sz; /* sizeof *body_buf */ 1425 ssize_t i; /* gen use */ 1426 1427 /* populate an ETM control msg to send */ 1428 1429 hdr_sz = sizeof (*ctl_hdrp); 1430 body_sz = (3 + 1); /* version bytes plus null byte */ 1431 1432 ctl_hdrp = fmd_hdl_zalloc(hdl, hdr_sz + body_sz, FMD_SLEEP); 1433 1434 ctl_hdrp->ctl_pp.pp_magic_num = htonl(ETM_PROTO_MAGIC_NUM); 1435 ctl_hdrp->ctl_pp.pp_proto_ver = ETM_PROTO_V1; 1436 ctl_hdrp->ctl_pp.pp_msg_type = ETM_MSG_TYPE_CONTROL; 1437 ctl_hdrp->ctl_pp.pp_sub_type = ETM_CTL_SEL_VER_NEGOT_REQ; 1438 ctl_hdrp->ctl_pp.pp_rsvd_pad = 0; 1439 etm_xid_ver_negot = etm_xid_cur; 1440 etm_xid_cur += ETM_XID_INC; 1441 ctl_hdrp->ctl_pp.pp_xid = htonl(etm_xid_ver_negot); 1442 ctl_hdrp->ctl_pp.pp_timeout = htonl(ETM_PROTO_V1_TIMEOUT_FOREVER); 1443 ctl_hdrp->ctl_len = htonl(body_sz); 1444 1445 body_buf = (void*)&ctl_hdrp->ctl_len; 1446 body_buf += sizeof (ctl_hdrp->ctl_len); 1447 *body_buf++ = ETM_PROTO_V3; 1448 *body_buf++ = ETM_PROTO_V2; 1449 *body_buf++ = ETM_PROTO_V1; 1450 *body_buf++ = '\0'; 1451 1452 /* 1453 * open and close a connection to send the ETM control msg 1454 * to any/all of the default dst addrs 1455 */ 1456 1457 if ((addrv = etm_xport_get_ev_addrv(hdl, NULL)) == NULL) { 1458 fmd_hdl_error(hdl, 1459 "error: bad ctl dst addrs errno %d\n", errno); 1460 etm_stats.etm_xport_get_ev_addrv_fail.fmds_value.ui64++; 1461 goto func_ret; 1462 } 1463 1464 for (i = 0; addrv[i] != NULL; i++) { 1465 1466 if (etm_conn_open(hdl, "bad conn open during ver negot", 1467 addrv[i], &conn) < 0) { 1468 continue; 1469 } 1470 if (etm_io_op(hdl, "bad io write on ctl hdr+body", 1471 conn, ctl_hdrp, hdr_sz + body_sz, ETM_IO_OP_WR) >= 0) { 1472 etm_stats.etm_wr_hdr_control.fmds_value.ui64++; 1473 etm_stats.etm_wr_body_control.fmds_value.ui64++; 1474 } 1475 (void) etm_conn_close(hdl, "bad conn close during ver negot", 1476 conn); 1477 1478 } /* foreach dst addr */ 1479 1480 func_ret: 1481 1482 if (addrv != NULL) { 1483 etm_xport_free_addrv(hdl, addrv); 1484 } 1485 fmd_hdl_free(hdl, ctl_hdrp, hdr_sz + body_sz); 1486 1487 } /* etm_req_ver_negot() */ 1488 1489 1490 1491 /* 1492 * etm_iosvc_msg_enq - add element to tail of ETM iosvc msg queue 1493 * etm_iosvc_msg_deq - del element from head of ETM iosvc msg queue 1494 * need to grab the mutex lock before calling this routine 1495 * return >0 for success, or -errno value 1496 */ 1497 static int 1498 etm_iosvc_msg_enq(fmd_hdl_t *hdl, etm_iosvc_t *iosvc, etm_iosvc_q_ele_t *msgp) 1499 { 1500 etm_iosvc_q_ele_t *newp; /* ptr to new msg q ele */ 1501 1502 if (iosvc->msg_q_cur_len >= iosvc->msg_q_max_len) { 1503 fmd_hdl_debug(hdl, "warning: enq to full msg queue\n"); 1504 return (-E2BIG); 1505 } 1506 1507 newp = fmd_hdl_zalloc(hdl, sizeof (*newp), FMD_SLEEP); 1508 (void) memcpy(newp, msgp, sizeof (*newp)); 1509 newp->msg_nextp = NULL; 1510 1511 if (iosvc->msg_q_cur_len == 0) { 1512 iosvc->msg_q_head = newp; 1513 } else { 1514 iosvc->msg_q_tail->msg_nextp = newp; 1515 } 1516 1517 iosvc->msg_q_tail = newp; 1518 iosvc->msg_q_cur_len++; 1519 fmd_hdl_debug(hdl, "info: current msg queue length %d\n", 1520 iosvc->msg_q_cur_len); 1521 1522 return (1); 1523 1524 } /* etm_iosvc_msg_enq() */ 1525 1526 static int 1527 etm_iosvc_msg_deq(fmd_hdl_t *hdl, etm_iosvc_t *iosvc, etm_iosvc_q_ele_t *msgp) 1528 { 1529 etm_iosvc_q_ele_t *oldp; /* ptr to old msg q ele */ 1530 1531 if (iosvc->msg_q_cur_len == 0) { 1532 fmd_hdl_debug(hdl, "warning: deq from empty responder queue\n"); 1533 return (-ENOENT); 1534 } 1535 1536 (void) memcpy(msgp, iosvc->msg_q_head, sizeof (*msgp)); 1537 msgp->msg_nextp = NULL; 1538 1539 oldp = iosvc->msg_q_head; 1540 iosvc->msg_q_head = iosvc->msg_q_head->msg_nextp; 1541 1542 /* 1543 * free the mem alloc-ed in etm_iosvc_msg_enq() 1544 */ 1545 fmd_hdl_free(hdl, oldp, sizeof (*oldp)); 1546 1547 iosvc->msg_q_cur_len--; 1548 if (iosvc->msg_q_cur_len == 0) { 1549 iosvc->msg_q_tail = NULL; 1550 } 1551 1552 return (1); 1553 1554 } /* etm_iosvc_msg_deq() */ 1555 1556 1557 /* 1558 * etm_msg_enq_head(): 1559 * enq the msg to the head of the Q. 1560 * If the Q is full, drop the msg at the tail then enq the msg at head. 1561 * need to grab mutex lock iosvc->msg_q_lock before calling this routine. 1562 */ 1563 static void 1564 etm_msg_enq_head(fmd_hdl_t *fmd_hdl, etm_iosvc_t *iosvc, 1565 etm_iosvc_q_ele_t *msg_ele) 1566 { 1567 1568 etm_iosvc_q_ele_t *newp; /* iosvc msg ele ptr */ 1569 1570 if (iosvc->msg_q_cur_len >= iosvc->msg_q_max_len) { 1571 fmd_hdl_debug(fmd_hdl, 1572 "warning: add to head of a full msg queue." 1573 " Drop the msg at the tail\n"); 1574 /* 1575 * drop the msg at the tail 1576 */ 1577 newp = iosvc->msg_q_head; 1578 while (newp->msg_nextp != iosvc->msg_q_tail) { 1579 newp = newp->msg_nextp; 1580 } 1581 1582 /* 1583 * free the msg in iosvc->msg_q_tail->msg 1584 * free the mem pointed to by iosvc->msg_q_tail 1585 */ 1586 fmd_hdl_free(fmd_hdl, iosvc->msg_q_tail->msg, 1587 iosvc->msg_q_tail->msg_size); 1588 fmd_hdl_free(fmd_hdl, iosvc->msg_q_tail, sizeof (*newp)); 1589 iosvc->msg_q_tail = newp; 1590 iosvc->msg_q_tail->msg_nextp = NULL; 1591 iosvc->msg_q_cur_len--; 1592 } 1593 1594 /* 1595 * enq the msg to the head 1596 */ 1597 newp = fmd_hdl_zalloc(fmd_hdl, sizeof (*newp), FMD_SLEEP); 1598 (void) memcpy(newp, msg_ele, sizeof (*newp)); 1599 if (iosvc->msg_q_cur_len == 0) { 1600 newp->msg_nextp = NULL; 1601 iosvc->msg_q_tail = newp; 1602 } else { 1603 newp->msg_nextp = iosvc->msg_q_head; 1604 } 1605 iosvc->msg_q_head = newp; 1606 iosvc->msg_q_cur_len++; 1607 } /* etm_msg_enq_head() */ 1608 1609 /* 1610 * etm_isovc_cleanup(): 1611 * clean up what's in the passed-in iosvc struct, optionally including the msg Q 1612 */ 1613 static void 1614 etm_iosvc_cleanup(fmd_hdl_t *fmd_hdl, etm_iosvc_t *iosvc, boolean_t clean_msg_q) 1615 { 1616 1617 etm_iosvc_q_ele_t msg_ele; /* io svc msg Q ele */ 1618 1619 iosvc->thr_is_dying = 1; 1620 1621 iosvc->ds_hdl = DS_INVALID_HDL; 1622 if (iosvc->fmd_xprt != NULL) { 1623 fmd_xprt_close(fmd_hdl, iosvc->fmd_xprt); 1624 iosvc->fmd_xprt = NULL; 1625 } /* if fmd-xprt has been opened */ 1626 1627 if (iosvc->send_tid != NULL) { 1628 fmd_thr_signal(fmd_hdl, iosvc->send_tid); 1629 fmd_thr_destroy(fmd_hdl, iosvc->send_tid); 1630 iosvc->send_tid = NULL; 1631 } /* if io svc send thread was created ok */ 1632 1633 if (iosvc->recv_tid != NULL) { 1634 fmd_thr_signal(fmd_hdl, iosvc->recv_tid); 1635 fmd_thr_destroy(fmd_hdl, iosvc->recv_tid); 1636 iosvc->recv_tid = NULL; 1637 } /* if root domain recv thread was created */ 1638 1639 1640 if (clean_msg_q) { 1641 iosvc->ldom_name[0] = '\0'; 1642 1643 (void) pthread_mutex_lock(&iosvc->msg_q_lock); 1644 while (iosvc->msg_q_cur_len > 0) { 1645 (void) etm_iosvc_msg_deq(fmd_hdl, iosvc, &msg_ele); 1646 fmd_hdl_free(fmd_hdl, msg_ele.msg, msg_ele.msg_size); 1647 } 1648 (void) pthread_mutex_unlock(&iosvc->msg_q_lock); 1649 } 1650 1651 return; 1652 1653 } /* etm_iosvc_cleanup() */ 1654 1655 /* 1656 * etm_iosvc_lookup(using ldom_name or ds_hdl when ldom_name is empty) 1657 * not found, create one, add to iosvc_list 1658 */ 1659 etm_iosvc_t * 1660 etm_iosvc_lookup(fmd_hdl_t *fmd_hdl, char *ldom_name, ds_hdl_t ds_hdl, 1661 boolean_t iosvc_create) 1662 { 1663 uint32_t i; /* for loop var */ 1664 int32_t first_empty_slot = -1; /* remember that */ 1665 1666 for (i = 0; i < NUM_OF_ROOT_DOMAINS; i++) { 1667 if (ldom_name[0] == '\0') { 1668 /* 1669 * search by hdl passed in 1670 * the only time this is used is at ds_unreg_cb time. 1671 * there is no ldom name, only the valid ds_hdl. 1672 * find an iosvc with the matching ds_hdl. 1673 * ignore the iosvc_create flag, should never need to 1674 * create an iosvc for ds_unreg_cb 1675 */ 1676 if (ds_hdl == iosvc_list[i].ds_hdl) { 1677 if (etm_debug_lvl >= 2) { 1678 fmd_hdl_debug(fmd_hdl, 1679 "info: found an iosvc at slot %d w/ ds_hdl %d \n", 1680 i, iosvc_list[i].ds_hdl); 1681 } 1682 if (iosvc_list[i].ldom_name[0] != '\0') 1683 if (etm_debug_lvl >= 2) { 1684 fmd_hdl_debug(fmd_hdl, 1685 "info: found an iosvc w/ ldom_name %s \n", 1686 iosvc_list[i].ldom_name); 1687 } 1688 return (&iosvc_list[i]); 1689 } else { 1690 continue; 1691 } 1692 } else if (iosvc_list[i].ldom_name[0] != '\0') { 1693 /* 1694 * this is an non-empty iosvc structure slot 1695 */ 1696 if (strcmp(ldom_name, iosvc_list[i].ldom_name) == 0) { 1697 /* 1698 * found an iosvc structure that matches the 1699 * passed in ldom_name, return the ptr 1700 */ 1701 if (etm_debug_lvl >= 2) { 1702 fmd_hdl_debug(fmd_hdl, "info: found an " 1703 "iosvc at slot %d w/ ds_hdl %d \n", 1704 i, iosvc_list[i].ds_hdl); 1705 fmd_hdl_debug(fmd_hdl, "info: found an " 1706 "iosvc w/ ldom_name %s \n", 1707 iosvc_list[i].ldom_name); 1708 } 1709 return (&iosvc_list[i]); 1710 } else { 1711 /* 1712 * non-empty slot with no-matching name, 1713 * move on to next slot. 1714 */ 1715 continue; 1716 } 1717 } else { 1718 /* 1719 * found the 1st slot with ldom name being empty 1720 * remember the slot #, will be used for creating one 1721 */ 1722 if (first_empty_slot == -1) { 1723 first_empty_slot = i; 1724 } 1725 } 1726 } 1727 if (iosvc_create == B_TRUE && first_empty_slot >= 0) { 1728 /* 1729 * this is the case we need to add an iosvc at first_empty_slot 1730 * for the ldom_name at iosvc_list[first_empty_slot] 1731 */ 1732 fmd_hdl_debug(fmd_hdl, 1733 "info: create an iosvc with ldom name %s\n", 1734 ldom_name); 1735 i = first_empty_slot; 1736 (void) memcpy(&iosvc_list[i], &io_svc, sizeof (etm_iosvc_t)); 1737 (void) strcpy(iosvc_list[i].ldom_name, ldom_name); 1738 fmd_hdl_debug(fmd_hdl, "info: iosvc #%d has ldom name %s\n", 1739 i, iosvc_list[i].ldom_name); 1740 return (&iosvc_list[i]); 1741 } else { 1742 return (NULL); 1743 } 1744 1745 } /* etm_iosvc_lookup() */ 1746 1747 1748 /* 1749 * etm_ckpt_remove: 1750 * remove the ckpt for the iosvc element 1751 */ 1752 static void 1753 etm_ckpt_remove(fmd_hdl_t *hdl, etm_iosvc_q_ele_t *ele) { 1754 int err; /* temp error */ 1755 nvlist_t *evp = NULL; /* event pointer */ 1756 etm_proto_v1_ev_hdr_t *hdrp; /* hdr for FMA_EVENT */ 1757 char *buf; /* packed event pointer */ 1758 1759 if ((ele->ckpt_flag == ETM_CKPT_NOOP) || 1760 (etm_ldom_type != LDOM_TYPE_CONTROL)) { 1761 return; 1762 } 1763 1764 /* the pointer to the packed event in the etm message */ 1765 hdrp = (etm_proto_v1_ev_hdr_t *)((ptrdiff_t)ele->msg); 1766 buf = (char *)((ptrdiff_t)hdrp + sizeof (*hdrp) 1767 + (1 * sizeof (hdrp->ev_lens[0]))); 1768 1769 /* unpack it, then uncheckpoited it */ 1770 if ((err = nvlist_unpack(buf, hdrp->ev_lens[0], &evp, 0)) != 0) { 1771 fmd_hdl_debug(hdl, "failed to unpack event(rc=%d)\n", err); 1772 return; 1773 } 1774 (void) etm_ckpt_delete(hdl, evp); 1775 nvlist_free(evp); 1776 } 1777 1778 /* 1779 * etm_send_ds_msg() 1780 * call ds_send_msg() to send the msg passed in. 1781 * timedcond_wait for the ACK to come back. 1782 * if the ACK doesn't come in the specified time, retrun -EAGAIN. 1783 * other wise, return 1. 1784 */ 1785 int 1786 etm_send_ds_msg(fmd_hdl_t *fmd_hdl, boolean_t ckpt_remove, etm_iosvc_t *iosvc, 1787 etm_iosvc_q_ele_t *msg_ele, etm_proto_v1_ev_hdr_t *evhdrp) 1788 { 1789 uint32_t rc; /* for return code */ 1790 1791 struct timeval tv; 1792 struct timespec timeout; 1793 1794 1795 /* 1796 * call ds_send_msg(). Return (-EAGAIN) if not successful 1797 */ 1798 if ((rc = (*etm_ds_send_msg)(iosvc->ds_hdl, msg_ele->msg, 1799 msg_ele->msg_size)) != 0) { 1800 fmd_hdl_debug(fmd_hdl, "info: ds_send_msg rc %d xid %d\n", 1801 rc, evhdrp->ev_pp.pp_xid); 1802 return (-EAGAIN); 1803 } 1804 1805 /* 1806 * wait on the cv for resp msg for cur_send_xid 1807 */ 1808 (void *) pthread_mutex_lock(&iosvc->msg_ack_lock); 1809 1810 (void) gettimeofday(&tv, 0); 1811 timeout.tv_sec = tv.tv_sec + etm_fma_resp_wait_time; 1812 timeout.tv_nsec = 0; 1813 1814 fmd_hdl_debug(fmd_hdl, "info: waiting on msg_ack_cv for ldom %s\n", 1815 iosvc->ldom_name); 1816 rc = pthread_cond_timedwait(&iosvc->msg_ack_cv, &iosvc->msg_ack_lock, 1817 &timeout); 1818 (void *) pthread_mutex_unlock(&iosvc->msg_ack_lock); 1819 fmd_hdl_debug(fmd_hdl, "info: msg_ack_cv returns with rc %d\n", rc); 1820 1821 /* 1822 * check to see if ack_ok is non-zero 1823 * if non-zero, resp msg has been received 1824 */ 1825 if (iosvc->ack_ok != 0) { 1826 /* 1827 * ACK came ok, this send is successful, 1828 * tell the caller ready to send next. 1829 * free mem alloc-ed in 1830 * etm_pack_ds_msg 1831 */ 1832 if (ckpt_remove == B_TRUE && 1833 etm_ldom_type == LDOM_TYPE_CONTROL) { 1834 etm_ckpt_remove(fmd_hdl, msg_ele); 1835 } 1836 fmd_hdl_free(fmd_hdl, msg_ele->msg, msg_ele->msg_size); 1837 iosvc->cur_send_xid++; 1838 return (1); 1839 } else { 1840 /* 1841 * the ACK did not come on time 1842 * tell the caller to resend cur_send_xid 1843 */ 1844 return (-EAGAIN); 1845 } /* iosvc->ack_ok != 0 */ 1846 } /* etm_send_ds_msg() */ 1847 1848 /* 1849 * both events from fmdo_send entry point and from SP are using the 1850 * etm_proto_v1_ev_hdr_t as its header and it will be the same header for all 1851 * ds send/recv msgs. 1852 * Idealy, we should use the hdr coming with the SP FMA event. Since fmdo_send 1853 * entry point can be called before FMA events from SP, we can't rely on 1854 * the SP FMA event hdr. Use the static hdr for packing ds msgs for fmdo_send 1855 * events. 1856 * return >0 for success, or -errno value 1857 * Design assumption: there is one FMA event per ds msg 1858 */ 1859 int 1860 etm_pack_ds_msg(fmd_hdl_t *fmd_hdl, etm_iosvc_t *iosvc, 1861 etm_proto_v1_ev_hdr_t *ev_hdrp, size_t hdr_sz, nvlist_t *evp, 1862 etm_pack_msg_type_t msg_type, uint_t ckpt_opt) 1863 { 1864 etm_proto_v1_ev_hdr_t *hdrp; /* for FMA_EVENT msg */ 1865 uint32_t *lenp; /* ptr to FMA event length */ 1866 size_t evsz; /* packed FMA event size */ 1867 char *buf; 1868 uint32_t rc; /* for return code */ 1869 char *msg; /* body of msg to be Qed */ 1870 1871 etm_iosvc_q_ele_t msg_ele; /* io svc msg Q ele */ 1872 etm_proto_v1_ev_hdr_t *evhdrp; 1873 1874 1875 if (ev_hdrp == NULL) { 1876 hdrp = &iosvc_hdr; 1877 } else { 1878 hdrp = ev_hdrp; 1879 } 1880 1881 /* 1882 * determine hdr_sz if 0, otherwise use the one passed in hdr_sz 1883 */ 1884 1885 if (hdr_sz == 0) { 1886 hdr_sz = sizeof (*hdrp) + (1 * sizeof (hdrp->ev_lens[0])); 1887 } 1888 1889 /* 1890 * determine evp size 1891 */ 1892 (void) nvlist_size(evp, &evsz, NV_ENCODE_XDR); 1893 1894 /* indicate 1 FMA event, no network encoding, and 0-terminate */ 1895 lenp = &hdrp->ev_lens[0]; 1896 *lenp = evsz; 1897 1898 /* 1899 * now the total of mem needs to be alloc-ed/ds msg size is 1900 * hdr_sz + evsz 1901 * msg will be freed in etm_send_to_remote_root() after ds_send_msg() 1902 */ 1903 msg = fmd_hdl_zalloc(fmd_hdl, hdr_sz + evsz, FMD_SLEEP); 1904 1905 1906 /* 1907 * copy hdr, 0 terminate the length vector, and then evp 1908 */ 1909 (void) memcpy(msg, hdrp, sizeof (*hdrp)); 1910 hdrp = (etm_proto_v1_ev_hdr_t *)((ptrdiff_t)msg); 1911 lenp = &hdrp->ev_lens[0]; 1912 lenp++; 1913 *lenp = 0; 1914 1915 buf = fmd_hdl_zalloc(fmd_hdl, evsz, FMD_SLEEP); 1916 (void) nvlist_pack(evp, (char **)&buf, &evsz, NV_ENCODE_XDR, 0); 1917 (void) memcpy(msg + hdr_sz, buf, evsz); 1918 fmd_hdl_free(fmd_hdl, buf, evsz); 1919 1920 fmd_hdl_debug(fmd_hdl, "info: hdr_sz= %d evsz= %d in etm_pack_ds_msg" 1921 "for ldom %s\n", hdr_sz, evsz, iosvc->ldom_name); 1922 msg_ele.msg = msg; 1923 msg_ele.msg_size = hdr_sz + evsz; 1924 msg_ele.ckpt_flag = ckpt_opt; 1925 1926 /* 1927 * decide what to do with the msg: 1928 * if SP ereports (msg_type == SP_MSG), always enq the msg 1929 * if not SP ereports, ie, fmd xprt control msgs, enq it _only_ after 1930 * resource.fm.xprt.run has been sent (which sets start_sending_Q to 1) 1931 */ 1932 if ((msg_type == SP_MSG) || 1933 (msg_type != SP_MSG) && (iosvc->start_sending_Q == 1)) { 1934 /* 1935 * this is the case when the msg needs to be enq-ed 1936 */ 1937 (void) pthread_mutex_lock(&iosvc->msg_q_lock); 1938 rc = etm_iosvc_msg_enq(fmd_hdl, iosvc, &msg_ele); 1939 if ((rc > 0) && (ckpt_opt & ETM_CKPT_SAVE) && 1940 (etm_ldom_type == LDOM_TYPE_CONTROL)) { 1941 (void) etm_ckpt_add(fmd_hdl, evp); 1942 } 1943 if (iosvc->msg_q_cur_len == 1) 1944 (void) pthread_cond_signal(&iosvc->msg_q_cv); 1945 (void) pthread_mutex_unlock(&iosvc->msg_q_lock); 1946 } else { 1947 /* 1948 * fmd RDWR xprt procotol startup msgs, send it now! 1949 */ 1950 iosvc->ack_ok = 0; 1951 evhdrp = (etm_proto_v1_ev_hdr_t *)((ptrdiff_t)msg_ele.msg); 1952 evhdrp->ev_pp.pp_xid = iosvc->cur_send_xid + 1; 1953 while (!iosvc->ack_ok && iosvc->ds_hdl != DS_INVALID_HDL && 1954 !etm_is_dying) { 1955 if (etm_send_ds_msg(fmd_hdl, B_FALSE, iosvc, &msg_ele, 1956 evhdrp) < 0) { 1957 continue; 1958 } 1959 } 1960 if (msg_type == FMD_XPRT_RUN_MSG) 1961 iosvc->start_sending_Q = 1; 1962 } 1963 1964 return (rc); 1965 1966 } /* etm_pack_ds_msg() */ 1967 1968 /* 1969 * Design_Note: For all etm_resp_q_*() functions and etm_resp_q_* globals, 1970 * the mutex etm_resp_q_lock must be held by the caller. 1971 */ 1972 1973 /* 1974 * etm_resp_q_enq - add element to tail of ETM responder queue 1975 * etm_resp_q_deq - del element from head of ETM responder queue 1976 * 1977 * return >0 for success, or -errno value 1978 */ 1979 1980 static int 1981 etm_resp_q_enq(fmd_hdl_t *hdl, etm_resp_q_ele_t *rqep) 1982 { 1983 etm_resp_q_ele_t *newp; /* ptr to new resp q ele */ 1984 1985 if (etm_resp_q_cur_len >= etm_resp_q_max_len) { 1986 fmd_hdl_debug(hdl, "warning: enq to full responder queue\n"); 1987 etm_stats.etm_enq_drop_resp_q.fmds_value.ui64++; 1988 return (-E2BIG); 1989 } 1990 1991 newp = fmd_hdl_zalloc(hdl, sizeof (*newp), FMD_SLEEP); 1992 (void) memcpy(newp, rqep, sizeof (*newp)); 1993 newp->rqe_nextp = NULL; 1994 1995 if (etm_resp_q_cur_len == 0) { 1996 etm_resp_q_head = newp; 1997 } else { 1998 etm_resp_q_tail->rqe_nextp = newp; 1999 } 2000 etm_resp_q_tail = newp; 2001 etm_resp_q_cur_len++; 2002 etm_stats.etm_resp_q_cur_len.fmds_value.ui64 = etm_resp_q_cur_len; 2003 2004 return (1); 2005 2006 } /* etm_resp_q_enq() */ 2007 2008 static int 2009 etm_resp_q_deq(fmd_hdl_t *hdl, etm_resp_q_ele_t *rqep) 2010 { 2011 etm_resp_q_ele_t *oldp; /* ptr to old resp q ele */ 2012 2013 if (etm_resp_q_cur_len == 0) { 2014 fmd_hdl_debug(hdl, "warning: deq from empty responder queue\n"); 2015 etm_stats.etm_deq_drop_resp_q.fmds_value.ui64++; 2016 return (-ENOENT); 2017 } 2018 2019 (void) memcpy(rqep, etm_resp_q_head, sizeof (*rqep)); 2020 rqep->rqe_nextp = NULL; 2021 2022 oldp = etm_resp_q_head; 2023 etm_resp_q_head = etm_resp_q_head->rqe_nextp; 2024 fmd_hdl_free(hdl, oldp, sizeof (*oldp)); 2025 2026 etm_resp_q_cur_len--; 2027 etm_stats.etm_resp_q_cur_len.fmds_value.ui64 = etm_resp_q_cur_len; 2028 if (etm_resp_q_cur_len == 0) { 2029 etm_resp_q_tail = NULL; 2030 } 2031 2032 return (1); 2033 2034 } /* etm_resp_q_deq() */ 2035 2036 /* 2037 * etm_maybe_enq_response - check the given message header to see 2038 * whether a response has been requested, 2039 * if so then enqueue the given connection 2040 * and header for later transport by the 2041 * responder thread as an ETM response msg, 2042 * return 0 for nop, >0 success, or -errno value 2043 */ 2044 2045 static ssize_t 2046 etm_maybe_enq_response(fmd_hdl_t *hdl, etm_xport_conn_t conn, 2047 void *hdrp, uint32_t hdr_sz, int32_t resp_code) 2048 { 2049 ssize_t rv; /* ret val */ 2050 etm_proto_v1_pp_t *ppp; /* protocol preamble ptr */ 2051 uint8_t orig_msg_type; /* orig hdr's message type */ 2052 uint32_t orig_timeout; /* orig hdr's timeout */ 2053 etm_resp_q_ele_t rqe; /* responder queue ele */ 2054 2055 ppp = hdrp; 2056 orig_msg_type = ppp->pp_msg_type; 2057 orig_timeout = ppp->pp_timeout; 2058 2059 /* bail out now if no response is to be sent */ 2060 2061 if (orig_timeout == ETM_PROTO_V1_TIMEOUT_NONE) { 2062 return (0); 2063 } /* if a nop */ 2064 2065 if ((orig_msg_type != ETM_MSG_TYPE_FMA_EVENT) && 2066 (orig_msg_type != ETM_MSG_TYPE_ALERT) && 2067 (orig_msg_type != ETM_MSG_TYPE_CONTROL)) { 2068 fmd_hdl_debug(hdl, "warning: bad msg type 0x%x\n", 2069 orig_msg_type); 2070 return (-EINVAL); 2071 } /* if inappropriate hdr for a response msg */ 2072 2073 /* 2074 * enqueue the msg hdr and nudge the responder thread 2075 * if the responder queue was previously empty 2076 */ 2077 2078 rqe.rqe_conn = conn; 2079 rqe.rqe_hdrp = hdrp; 2080 rqe.rqe_hdr_sz = hdr_sz; 2081 rqe.rqe_resp_code = resp_code; 2082 2083 (void) pthread_mutex_lock(&etm_resp_q_lock); 2084 rv = etm_resp_q_enq(hdl, &rqe); 2085 if (etm_resp_q_cur_len == 1) 2086 (void) pthread_cond_signal(&etm_resp_q_cv); 2087 (void) pthread_mutex_unlock(&etm_resp_q_lock); 2088 2089 return (rv); 2090 2091 } /* etm_maybe_enq_response() */ 2092 2093 /* 2094 * Design_Note: We rely on the fact that all message types have 2095 * a common protocol preamble; if this fact should 2096 * ever change it may break the code below. We also 2097 * rely on the fact that FMA_EVENT and CONTROL headers 2098 * returned by etm_hdr_read() will be sized large enough 2099 * to reuse them as RESPONSE headers if the remote endpt 2100 * asked for a response via the pp_timeout field. 2101 */ 2102 2103 /* 2104 * etm_send_response - use the given message header and response code 2105 * to construct an appropriate response message, 2106 * and send it back on the given connection, 2107 * return >0 for success, or -errno value 2108 */ 2109 2110 static ssize_t 2111 etm_send_response(fmd_hdl_t *hdl, etm_xport_conn_t conn, 2112 void *hdrp, int32_t resp_code) 2113 { 2114 ssize_t rv; /* ret val */ 2115 etm_proto_v1_pp_t *ppp; /* protocol preamble ptr */ 2116 etm_proto_v1_resp_hdr_t *resp_hdrp; /* for RESPONSE msg */ 2117 uint8_t resp_body[4]; /* response body if needed */ 2118 uint8_t *resp_msg; /* response hdr+body */ 2119 size_t hdr_sz; /* sizeof response hdr */ 2120 uint8_t orig_msg_type; /* orig hdr's message type */ 2121 2122 ppp = hdrp; 2123 orig_msg_type = ppp->pp_msg_type; 2124 2125 if (etm_debug_lvl >= 2) { 2126 etm_show_time(hdl, "ante resp send"); 2127 } 2128 2129 /* reuse the given header as a response header */ 2130 2131 resp_hdrp = hdrp; 2132 resp_hdrp->resp_code = resp_code; 2133 resp_hdrp->resp_len = 0; /* default is empty body */ 2134 2135 if ((orig_msg_type == ETM_MSG_TYPE_CONTROL) && 2136 (ppp->pp_sub_type == ETM_CTL_SEL_VER_NEGOT_REQ)) { 2137 resp_body[0] = ETM_PROTO_V2; 2138 resp_body[1] = ETM_PROTO_V3; 2139 resp_body[2] = 0; 2140 resp_hdrp->resp_len = 3; 2141 } /* if should send our/negotiated proto ver in resp body */ 2142 2143 /* respond with the proto ver that was negotiated */ 2144 2145 resp_hdrp->resp_pp.pp_proto_ver = etm_resp_ver; 2146 resp_hdrp->resp_pp.pp_msg_type = ETM_MSG_TYPE_RESPONSE; 2147 resp_hdrp->resp_pp.pp_timeout = ETM_PROTO_V1_TIMEOUT_NONE; 2148 2149 /* 2150 * send the whole response msg in one write, header and body; 2151 * avoid the alloc-and-copy if we can reuse the hdr as the msg, 2152 * ie, if the body is empty. update the response stats. 2153 */ 2154 2155 hdr_sz = sizeof (etm_proto_v1_resp_hdr_t); 2156 2157 resp_msg = hdrp; 2158 if (resp_hdrp->resp_len > 0) { 2159 resp_msg = fmd_hdl_zalloc(hdl, hdr_sz + resp_hdrp->resp_len, 2160 FMD_SLEEP); 2161 (void) memcpy(resp_msg, resp_hdrp, hdr_sz); 2162 (void) memcpy(resp_msg + hdr_sz, resp_body, 2163 resp_hdrp->resp_len); 2164 } 2165 2166 (void) pthread_mutex_lock(&etm_write_lock); 2167 rv = etm_io_op(hdl, "bad io write on resp msg", conn, 2168 resp_msg, hdr_sz + resp_hdrp->resp_len, ETM_IO_OP_WR); 2169 (void) pthread_mutex_unlock(&etm_write_lock); 2170 if (rv < 0) { 2171 goto func_ret; 2172 } 2173 2174 etm_stats.etm_wr_hdr_response.fmds_value.ui64++; 2175 etm_stats.etm_wr_body_response.fmds_value.ui64++; 2176 2177 fmd_hdl_debug(hdl, "info: sent V%u RESPONSE msg to xport " 2178 "xid 0x%x code %d len %u\n", 2179 (unsigned int)resp_hdrp->resp_pp.pp_proto_ver, 2180 resp_hdrp->resp_pp.pp_xid, resp_hdrp->resp_code, 2181 resp_hdrp->resp_len); 2182 func_ret: 2183 2184 if (resp_hdrp->resp_len > 0) { 2185 fmd_hdl_free(hdl, resp_msg, hdr_sz + resp_hdrp->resp_len); 2186 } 2187 if (etm_debug_lvl >= 2) { 2188 etm_show_time(hdl, "post resp send"); 2189 } 2190 return (rv); 2191 2192 } /* etm_send_response() */ 2193 2194 /* 2195 * etm_reset_xport - reset the transport layer (via fini;init) 2196 * presumably for an error condition we cannot 2197 * otherwise recover from (ex: hung LDC channel) 2198 * 2199 * caveats - no checking/locking is done to ensure an existing connection 2200 * is idle during an xport reset; we don't want to deadlock 2201 * and presumably the transport is stuck/unusable anyway 2202 */ 2203 2204 static void 2205 etm_reset_xport(fmd_hdl_t *hdl) 2206 { 2207 (void) etm_xport_fini(hdl); 2208 (void) etm_xport_init(hdl); 2209 etm_stats.etm_reset_xport.fmds_value.ui64++; 2210 2211 } /* etm_reset_xport() */ 2212 2213 /* 2214 * etm_handle_new_conn - receive an ETM message sent from the other end via 2215 * the given open connection, pull out any FMA events 2216 * and post them to the local FMD (or handle any ETM 2217 * control or response msg); when done, close the 2218 * connection 2219 */ 2220 2221 static void 2222 etm_handle_new_conn(fmd_hdl_t *hdl, etm_xport_conn_t conn) 2223 { 2224 etm_proto_v1_ev_hdr_t *ev_hdrp; /* for FMA_EVENT msg */ 2225 etm_proto_v1_ctl_hdr_t *ctl_hdrp; /* for CONTROL msg */ 2226 etm_proto_v1_resp_hdr_t *resp_hdrp; /* for RESPONSE msg */ 2227 etm_proto_v3_sa_hdr_t *sa_hdrp; /* for ALERT msg */ 2228 etm_iosvc_t *iosvc; /* iosvc data structure */ 2229 int32_t resp_code; /* response code */ 2230 ssize_t enq_rv; /* resp_q enqueue status */ 2231 size_t hdr_sz; /* sizeof header */ 2232 size_t evsz; /* FMA event size */ 2233 uint8_t *body_buf; /* msg body buffer */ 2234 uint32_t body_sz; /* sizeof body_buf */ 2235 uint32_t ev_cnt; /* count of FMA events */ 2236 uint8_t *bp; /* byte ptr within body_buf */ 2237 nvlist_t *evp; /* ptr to unpacked FMA event */ 2238 char *class; /* FMA event class */ 2239 ssize_t i, n; /* gen use */ 2240 int should_reset_xport; /* bool to reset xport */ 2241 char ldom_name[MAX_LDOM_NAME]; /* ldom name */ 2242 int rc; /* return code */ 2243 uint64_t did; /* domain id */ 2244 2245 2246 if (etm_debug_lvl >= 2) { 2247 etm_show_time(hdl, "ante conn handle"); 2248 } 2249 fmd_hdl_debug(hdl, "info: handling new conn %p\n", conn); 2250 2251 should_reset_xport = 0; 2252 ev_hdrp = NULL; 2253 ctl_hdrp = NULL; 2254 resp_hdrp = NULL; 2255 sa_hdrp = NULL; 2256 body_buf = NULL; 2257 class = NULL; 2258 evp = NULL; 2259 resp_code = 0; /* default is success */ 2260 enq_rv = 0; /* default is nop, ie, did not enqueue */ 2261 2262 /* read a network decoded message header from the connection */ 2263 2264 if ((ev_hdrp = etm_hdr_read(hdl, conn, &hdr_sz)) == NULL) { 2265 /* errno assumed set by above call */ 2266 should_reset_xport = (errno == ENOTACTIVE); 2267 fmd_hdl_debug(hdl, "error: FMA event dropped: " 2268 "bad hdr read errno %d\n", errno); 2269 etm_stats.etm_rd_drop_fmaevent.fmds_value.ui64++; 2270 goto func_ret; 2271 } 2272 2273 /* 2274 * handle the message based on its preamble pp_msg_type 2275 * which is known to be valid from etm_hdr_read() checks 2276 */ 2277 2278 if (ev_hdrp->ev_pp.pp_msg_type == ETM_MSG_TYPE_FMA_EVENT) { 2279 2280 fmd_hdl_debug(hdl, "info: rcvd FMA_EVENT msg from xport\n"); 2281 2282 /* allocate buf large enough for whole body / all FMA events */ 2283 2284 body_sz = 0; 2285 for (i = 0; ev_hdrp->ev_lens[i] != 0; i++) { 2286 body_sz += ev_hdrp->ev_lens[i]; 2287 } /* for summing sizes of all FMA events */ 2288 if (i > etm_stats.etm_rd_max_ev_per_msg.fmds_value.ui64) 2289 etm_stats.etm_rd_max_ev_per_msg.fmds_value.ui64 = i; 2290 ev_cnt = i; 2291 2292 if (etm_debug_lvl >= 1) { 2293 fmd_hdl_debug(hdl, "info: event lengths %u sum %u\n", 2294 ev_cnt, body_sz); 2295 } 2296 2297 body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP); 2298 2299 /* read all the FMA events at once */ 2300 2301 if ((n = etm_io_op(hdl, "FMA event dropped: " 2302 "bad io read on event bodies", conn, body_buf, body_sz, 2303 ETM_IO_OP_RD)) < 0) { 2304 should_reset_xport = (n == -ENOTACTIVE); 2305 etm_stats.etm_rd_drop_fmaevent.fmds_value.ui64++; 2306 goto func_ret; 2307 } 2308 2309 etm_stats.etm_rd_xport_bytes.fmds_value.ui64 += body_sz; 2310 etm_stats.etm_rd_body_fmaevent.fmds_value.ui64 += ev_cnt; 2311 2312 /* 2313 * now that we've read the entire ETM msg from the conn, 2314 * which avoids later ETM protocol framing errors if we didn't, 2315 * check for dup msg/xid against last good FMD posting, 2316 * if a dup then resend response but skip repost to FMD 2317 */ 2318 2319 if (ev_hdrp->ev_pp.pp_xid == etm_xid_posted_logged_ev) { 2320 enq_rv = etm_maybe_enq_response(hdl, conn, 2321 ev_hdrp, hdr_sz, 0); 2322 fmd_hdl_debug(hdl, "info: skipping dup FMA event post " 2323 "xid 0x%x\n", etm_xid_posted_logged_ev); 2324 etm_stats.etm_rd_dup_fmaevent.fmds_value.ui64++; 2325 goto func_ret; 2326 } 2327 2328 /* unpack each FMA event and post it to FMD */ 2329 2330 bp = body_buf; 2331 for (i = 0; i < ev_cnt; i++) { 2332 if ((n = nvlist_unpack((char *)bp, 2333 ev_hdrp->ev_lens[i], &evp, 0)) != 0) { 2334 resp_code = (-n); 2335 enq_rv = etm_maybe_enq_response(hdl, conn, 2336 ev_hdrp, hdr_sz, resp_code); 2337 fmd_hdl_error(hdl, "error: FMA event dropped: " 2338 "bad event body unpack errno %d\n", n); 2339 if (etm_debug_lvl >= 2) { 2340 fmd_hdl_debug(hdl, "info: FMA event " 2341 "hexdump %d bytes:\n", 2342 ev_hdrp->ev_lens[i]); 2343 etm_hexdump(hdl, bp, 2344 ev_hdrp->ev_lens[i]); 2345 } 2346 etm_stats.etm_os_nvlist_unpack_fail.fmds_value. 2347 ui64++; 2348 etm_stats.etm_rd_drop_fmaevent.fmds_value. 2349 ui64++; 2350 bp += ev_hdrp->ev_lens[i]; 2351 continue; 2352 } 2353 2354 if (etm_debug_lvl >= 1) { 2355 (void) nvlist_lookup_string(evp, FM_CLASS, 2356 &class); 2357 if (class == NULL) { 2358 class = "NULL"; 2359 } 2360 fmd_hdl_debug(hdl, "info: FMA event %p " 2361 "class %s\n", evp, class); 2362 } 2363 2364 rc = nvlist_size(evp, &evsz, NV_ENCODE_XDR); 2365 fmd_hdl_debug(hdl, 2366 "info: evp size before pack ds msg %d\n", evsz); 2367 ldom_name[0] = '\0'; 2368 rc = etm_filter_find_ldom_id(hdl, evp, ldom_name, 2369 MAX_LDOM_NAME, &did); 2370 2371 /* 2372 * if rc is zero and the ldom_name is not "primary", 2373 * the evp belongs to a root domain, put the evp in an 2374 * outgoing etm queue, 2375 * in all other cases, whether ldom_name is primary or 2376 * can't find a ldom name, call etm_post_to_fmd 2377 */ 2378 if ((rc == 0) && strcmp(ldom_name, "primary") && 2379 strcmp(ldom_name, "")) { 2380 /* 2381 * use the ldom_name, guaranteered at this point 2382 * to be a valid ldom name/non-NULL, to find the 2383 * iosvc data. 2384 * add an iosvc struct if can not find one 2385 */ 2386 (void) pthread_mutex_unlock(&iosvc_list_lock); 2387 iosvc = etm_iosvc_lookup(hdl, ldom_name, 2388 DS_INVALID_HDL, B_TRUE); 2389 (void) pthread_mutex_unlock(&iosvc_list_lock); 2390 if (iosvc == NULL) { 2391 fmd_hdl_debug(hdl, 2392 "error: can't find iosvc for ldom " 2393 "name %s\n", ldom_name); 2394 } else { 2395 resp_code = 0; 2396 (void) etm_pack_ds_msg(hdl, iosvc, 2397 ev_hdrp, hdr_sz, evp, 2398 SP_MSG, ETM_CKPT_SAVE); 2399 /* 2400 * call the new fmd_xprt_log() 2401 */ 2402 fmd_xprt_log(hdl, etm_fmd_xprt, evp, 0); 2403 etm_xid_posted_logged_ev = 2404 ev_hdrp->ev_pp.pp_xid; 2405 } 2406 } else { 2407 /* 2408 * post the fma event to the control fmd 2409 */ 2410 resp_code = etm_post_to_fmd(hdl, etm_fmd_xprt, 2411 evp); 2412 if (resp_code >= 0) { 2413 etm_xid_posted_logged_ev = 2414 ev_hdrp->ev_pp.pp_xid; 2415 } 2416 } 2417 2418 evp = NULL; 2419 enq_rv = etm_maybe_enq_response(hdl, conn, 2420 ev_hdrp, hdr_sz, resp_code); 2421 bp += ev_hdrp->ev_lens[i]; 2422 } /* foreach FMA event in the body buffer */ 2423 2424 } else if (ev_hdrp->ev_pp.pp_msg_type == ETM_MSG_TYPE_CONTROL) { 2425 2426 ctl_hdrp = (void*)ev_hdrp; 2427 2428 fmd_hdl_debug(hdl, "info: rcvd CONTROL msg from xport\n"); 2429 if (etm_debug_lvl >= 1) { 2430 fmd_hdl_debug(hdl, "info: ctl sel %d xid 0x%x\n", 2431 (int)ctl_hdrp->ctl_pp.pp_sub_type, 2432 ctl_hdrp->ctl_pp.pp_xid); 2433 } 2434 2435 /* 2436 * if we have a VER_NEGOT_REQ read the body and validate 2437 * the protocol version set contained therein, 2438 * otherwise we have a PING_REQ (which has no body) 2439 * and we [also] fall thru to the code which sends a 2440 * response msg if the pp_timeout field requested one 2441 */ 2442 2443 if (ctl_hdrp->ctl_pp.pp_sub_type == ETM_CTL_SEL_VER_NEGOT_REQ) { 2444 2445 body_sz = ctl_hdrp->ctl_len; 2446 body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP); 2447 2448 if ((n = etm_io_op(hdl, "bad io read on ctl body", 2449 conn, body_buf, body_sz, ETM_IO_OP_RD)) < 0) { 2450 should_reset_xport = (n == -ENOTACTIVE); 2451 goto func_ret; 2452 } 2453 2454 /* complain if version set completely incompatible */ 2455 2456 for (i = 0; i < body_sz; i++) { 2457 if ((body_buf[i] == ETM_PROTO_V1) || 2458 (body_buf[i] == ETM_PROTO_V2) || 2459 (body_buf[i] == ETM_PROTO_V3)) { 2460 break; 2461 } 2462 } 2463 if (i >= body_sz) { 2464 etm_stats.etm_ver_bad.fmds_value.ui64++; 2465 resp_code = (-EPROTO); 2466 } 2467 2468 } /* if got version set request */ 2469 2470 etm_stats.etm_rd_body_control.fmds_value.ui64++; 2471 2472 enq_rv = etm_maybe_enq_response(hdl, conn, 2473 ctl_hdrp, hdr_sz, resp_code); 2474 2475 } else if (ev_hdrp->ev_pp.pp_msg_type == ETM_MSG_TYPE_RESPONSE) { 2476 2477 resp_hdrp = (void*)ev_hdrp; 2478 2479 fmd_hdl_debug(hdl, "info: rcvd RESPONSE msg from xport\n"); 2480 if (etm_debug_lvl >= 1) { 2481 fmd_hdl_debug(hdl, "info: resp xid 0x%x\n", 2482 (int)resp_hdrp->resp_pp.pp_xid); 2483 } 2484 2485 body_sz = resp_hdrp->resp_len; 2486 body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP); 2487 2488 if ((n = etm_io_op(hdl, "bad io read on resp len", 2489 conn, body_buf, body_sz, ETM_IO_OP_RD)) < 0) { 2490 should_reset_xport = (n == -ENOTACTIVE); 2491 goto func_ret; 2492 } 2493 2494 etm_stats.etm_rd_body_response.fmds_value.ui64++; 2495 2496 /* 2497 * look up the xid to interpret the response body 2498 * 2499 * ping is a nop; for ver negot confirm that a supported 2500 * protocol version was negotiated and remember which one 2501 */ 2502 2503 if ((resp_hdrp->resp_pp.pp_xid != etm_xid_ping) && 2504 (resp_hdrp->resp_pp.pp_xid != etm_xid_ver_negot)) { 2505 etm_stats.etm_xid_bad.fmds_value.ui64++; 2506 goto func_ret; 2507 } 2508 2509 if (resp_hdrp->resp_pp.pp_xid == etm_xid_ver_negot) { 2510 if ((body_buf[0] < ETM_PROTO_V1) || 2511 (body_buf[0] > ETM_PROTO_V3)) { 2512 etm_stats.etm_ver_bad.fmds_value.ui64++; 2513 goto func_ret; 2514 } 2515 etm_resp_ver = body_buf[0]; 2516 } /* if have resp to last req to negotiate proto ver */ 2517 2518 } else if (ev_hdrp->ev_pp.pp_msg_type == ETM_MSG_TYPE_ALERT) { 2519 2520 sa_hdrp = (void*)ev_hdrp; 2521 2522 fmd_hdl_debug(hdl, "info: rcvd ALERT msg from xport\n"); 2523 if (etm_debug_lvl >= 1) { 2524 fmd_hdl_debug(hdl, "info: sa sel %d xid 0x%x\n", 2525 (int)sa_hdrp->sa_pp.pp_sub_type, 2526 sa_hdrp->sa_pp.pp_xid); 2527 } 2528 2529 body_sz = sa_hdrp->sa_len; 2530 body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP); 2531 2532 if ((n = etm_io_op(hdl, "bad io read on sa body", 2533 conn, body_buf, body_sz, ETM_IO_OP_RD)) < 0) { 2534 should_reset_xport = (n == -ENOTACTIVE); 2535 goto func_ret; 2536 } 2537 2538 etm_stats.etm_rd_body_alert.fmds_value.ui64++; 2539 2540 /* 2541 * now that we've read the entire ETM msg from the conn, 2542 * which avoids later ETM protocol framing errors if we didn't, 2543 * check for dup msg/xid against last good syslog posting, 2544 * if a dup then resend response but skip repost to syslog 2545 */ 2546 2547 if (sa_hdrp->sa_pp.pp_xid == etm_xid_posted_sa) { 2548 enq_rv = etm_maybe_enq_response(hdl, conn, 2549 sa_hdrp, hdr_sz, 0); 2550 fmd_hdl_debug(hdl, "info: skipping dup ALERT post " 2551 "xid 0x%x\n", etm_xid_posted_sa); 2552 etm_stats.etm_rd_dup_alert.fmds_value.ui64++; 2553 goto func_ret; 2554 } 2555 2556 resp_code = etm_post_to_syslog(hdl, sa_hdrp->sa_priority, 2557 body_sz, body_buf); 2558 if (resp_code >= 0) { 2559 etm_xid_posted_sa = sa_hdrp->sa_pp.pp_xid; 2560 } 2561 enq_rv = etm_maybe_enq_response(hdl, conn, 2562 sa_hdrp, hdr_sz, resp_code); 2563 } /* whether we have a FMA_EVENT, CONTROL, RESPONSE or ALERT msg */ 2564 2565 func_ret: 2566 2567 if (etm_debug_lvl >= 2) { 2568 etm_show_time(hdl, "post conn handle"); 2569 } 2570 2571 /* 2572 * if no responder ele was enqueued, close the conn now 2573 * and free the ETM msg hdr; the ETM msg body is not needed 2574 * by the responder thread and should always be freed here 2575 */ 2576 2577 if (enq_rv <= 0) { 2578 (void) etm_conn_close(hdl, "bad conn close after msg recv", 2579 conn); 2580 if (ev_hdrp != NULL) { 2581 fmd_hdl_free(hdl, ev_hdrp, hdr_sz); 2582 } 2583 } 2584 if (body_buf != NULL) { 2585 fmd_hdl_free(hdl, body_buf, body_sz); 2586 } 2587 if (should_reset_xport) { 2588 etm_reset_xport(hdl); 2589 } 2590 } /* etm_handle_new_conn() */ 2591 2592 /* 2593 * etm_handle_bad_accept - recover from a failed connection acceptance 2594 */ 2595 2596 static void 2597 etm_handle_bad_accept(fmd_hdl_t *hdl, int nev) 2598 { 2599 int should_reset_xport; /* bool to reset xport */ 2600 2601 should_reset_xport = (nev == -ENOTACTIVE); 2602 fmd_hdl_debug(hdl, "error: bad conn accept errno %d\n", (-nev)); 2603 etm_stats.etm_xport_accept_fail.fmds_value.ui64++; 2604 (void) etm_sleep(etm_bad_acc_to_sec); /* avoid spinning CPU */ 2605 if (should_reset_xport) { 2606 etm_reset_xport(hdl); 2607 } 2608 } /* etm_handle_bad_accept() */ 2609 2610 /* 2611 * etm_server - loop forever accepting new connections 2612 * using the given FMD handle, 2613 * handling any ETM msgs sent from the other side 2614 * via each such connection 2615 */ 2616 2617 static void 2618 etm_server(void *arg) 2619 { 2620 etm_xport_conn_t conn; /* connection handle */ 2621 int nev; /* -errno val */ 2622 fmd_hdl_t *hdl; /* FMD handle */ 2623 2624 hdl = arg; 2625 2626 fmd_hdl_debug(hdl, "info: connection server starting\n"); 2627 2628 /* 2629 * Restore the checkpointed events and dispatch them before starting to 2630 * receive more events from the sp. 2631 */ 2632 etm_ckpt_recover(hdl); 2633 2634 while (!etm_is_dying) { 2635 2636 if ((conn = etm_xport_accept(hdl, NULL)) == NULL) { 2637 /* errno assumed set by above call */ 2638 nev = (-errno); 2639 if (etm_is_dying) { 2640 break; 2641 } 2642 etm_handle_bad_accept(hdl, nev); 2643 continue; 2644 } 2645 2646 /* handle the new message/connection, closing it when done */ 2647 2648 etm_handle_new_conn(hdl, conn); 2649 2650 } /* while accepting new connections until ETM dies */ 2651 2652 /* ETM is dying (probably due to "fmadm unload etm") */ 2653 2654 fmd_hdl_debug(hdl, "info: connection server is dying\n"); 2655 2656 } /* etm_server() */ 2657 2658 /* 2659 * etm_responder - loop forever waiting for new responder queue elements 2660 * to be enqueued, for each one constructing and sending 2661 * an ETM response msg to the other side, and closing its 2662 * associated connection when appropriate 2663 * 2664 * this thread exists to ensure that the etm_server() thread 2665 * never pends indefinitely waiting on the xport write lock, and is 2666 * hence always available to accept new connections and handle 2667 * incoming messages 2668 * 2669 * this design relies on the fact that each connection accepted and 2670 * returned by the ETM xport layer is unique, and each can be closed 2671 * independently of the others while multiple connections are 2672 * outstanding 2673 */ 2674 2675 static void 2676 etm_responder(void *arg) 2677 { 2678 ssize_t n; /* gen use */ 2679 fmd_hdl_t *hdl; /* FMD handle */ 2680 etm_resp_q_ele_t rqe; /* responder queue ele */ 2681 2682 hdl = arg; 2683 2684 fmd_hdl_debug(hdl, "info: responder server starting\n"); 2685 2686 while (!etm_is_dying) { 2687 2688 (void) pthread_mutex_lock(&etm_resp_q_lock); 2689 2690 while (etm_resp_q_cur_len == 0) { 2691 (void) pthread_cond_wait(&etm_resp_q_cv, 2692 &etm_resp_q_lock); 2693 if (etm_is_dying) { 2694 (void) pthread_mutex_unlock(&etm_resp_q_lock); 2695 goto func_ret; 2696 } 2697 } /* while the responder queue is empty, wait to be nudged */ 2698 2699 /* 2700 * for every responder ele that has been enqueued, 2701 * dequeue and send it as an ETM response msg, 2702 * closing its associated conn and freeing its hdr 2703 * 2704 * enter the queue draining loop holding the responder 2705 * queue lock, but do not hold the lock indefinitely 2706 * (the actual send may pend us indefinitely), 2707 * so that other threads will never pend for long 2708 * trying to enqueue a new element 2709 */ 2710 2711 while (etm_resp_q_cur_len > 0) { 2712 2713 (void) etm_resp_q_deq(hdl, &rqe); 2714 (void) pthread_mutex_unlock(&etm_resp_q_lock); 2715 2716 if ((n = etm_send_response(hdl, rqe.rqe_conn, 2717 rqe.rqe_hdrp, rqe.rqe_resp_code)) < 0) { 2718 fmd_hdl_error(hdl, "error: bad resp send " 2719 "errno %d\n", (-n)); 2720 } 2721 2722 (void) etm_conn_close(hdl, "bad conn close after resp", 2723 rqe.rqe_conn); 2724 fmd_hdl_free(hdl, rqe.rqe_hdrp, rqe.rqe_hdr_sz); 2725 2726 if (etm_is_dying) { 2727 goto func_ret; 2728 } 2729 (void) pthread_mutex_lock(&etm_resp_q_lock); 2730 2731 } /* while draining the responder queue */ 2732 2733 (void) pthread_mutex_unlock(&etm_resp_q_lock); 2734 2735 } /* while awaiting and sending resp msgs until ETM dies */ 2736 2737 func_ret: 2738 2739 /* ETM is dying (probably due to "fmadm unload etm") */ 2740 2741 fmd_hdl_debug(hdl, "info: responder server is dying\n"); 2742 2743 (void) pthread_mutex_lock(&etm_resp_q_lock); 2744 if (etm_resp_q_cur_len > 0) { 2745 fmd_hdl_error(hdl, "warning: %d response msgs dropped\n", 2746 (int)etm_resp_q_cur_len); 2747 while (etm_resp_q_cur_len > 0) { 2748 (void) etm_resp_q_deq(hdl, &rqe); 2749 (void) etm_conn_close(hdl, "bad conn close after deq", 2750 rqe.rqe_conn); 2751 fmd_hdl_free(hdl, rqe.rqe_hdrp, rqe.rqe_hdr_sz); 2752 } 2753 } 2754 (void) pthread_mutex_unlock(&etm_resp_q_lock); 2755 2756 } /* etm_responder() */ 2757 2758 static void * 2759 etm_init_alloc(size_t size) 2760 { 2761 return (fmd_hdl_alloc(init_hdl, size, FMD_SLEEP)); 2762 } 2763 2764 static void 2765 etm_init_free(void *addr, size_t size) 2766 { 2767 fmd_hdl_free(init_hdl, addr, size); 2768 } 2769 2770 /* 2771 * ---------------------root ldom support functions ----------------------- 2772 */ 2773 2774 /* 2775 * use a static array async_event_q instead of dynamicaly allocated mem queue 2776 * for etm_async_q_enq and etm_async_q_deq. 2777 * This is not running in an fmd aux thread, can't use the fmd_hdl_* funcs. 2778 * caller needs to grab the mutex lock before calling this func. 2779 * return >0 for success, or -errno value 2780 */ 2781 static int 2782 etm_async_q_enq(etm_async_event_ele_t *async_e) 2783 { 2784 2785 if (etm_async_q_cur_len >= etm_async_q_max_len) { 2786 /* etm_stats.etm_enq_drop_async_q.fmds_value.ui64++; */ 2787 return (-E2BIG); 2788 } 2789 2790 (void) memcpy(&async_event_q[etm_async_q_tail], async_e, 2791 sizeof (*async_e)); 2792 2793 etm_async_q_tail++; 2794 if (etm_async_q_tail == etm_async_q_max_len) { 2795 etm_async_q_tail = 0; 2796 } 2797 etm_async_q_cur_len++; 2798 2799 /* etm_stats.etm_async_q_cur_len.fmds_value.ui64 = etm_async_q_cur_len; */ 2800 2801 return (1); 2802 2803 } /* etm_async_q_enq() */ 2804 2805 2806 static int 2807 etm_async_q_deq(etm_async_event_ele_t *async_e) 2808 { 2809 2810 if (etm_async_q_cur_len == 0) { 2811 /* etm_stats.etm_deq_drop_async_q.fmds_value.ui64++; */ 2812 return (-ENOENT); 2813 } 2814 2815 (void) memcpy(async_e, &async_event_q[etm_async_q_head], 2816 sizeof (*async_e)); 2817 2818 etm_async_q_head++; 2819 if (etm_async_q_head == etm_async_q_max_len) { 2820 etm_async_q_head = 0; 2821 } 2822 etm_async_q_cur_len--; 2823 2824 return (1); 2825 } /* etm_async_q_deq */ 2826 2827 2828 /* 2829 * setting up the fields in iosvc at DS_REG_CB time 2830 */ 2831 void 2832 etm_iosvc_setup(fmd_hdl_t *fmd_hdl, etm_iosvc_t *iosvc, 2833 etm_async_event_ele_t *async_e) 2834 { 2835 iosvc->ds_hdl = async_e->ds_hdl; 2836 iosvc->cur_send_xid = 0; 2837 iosvc->xid_posted_ev = 0; 2838 iosvc->start_sending_Q = 0; 2839 2840 /* 2841 * open the fmd xprt if it 2842 * hasn't been previously opened 2843 */ 2844 fmd_hdl_debug(fmd_hdl, "info: before fmd_xprt_open ldom_name is %s\n", 2845 async_e->ldom_name); 2846 2847 if (iosvc->fmd_xprt == NULL) { 2848 iosvc->fmd_xprt = fmd_xprt_open(fmd_hdl, flags, NULL, iosvc); 2849 } 2850 2851 iosvc->thr_is_dying = 0; 2852 if (iosvc->recv_tid == NULL) { 2853 iosvc->recv_tid = fmd_thr_create(fmd_hdl, 2854 etm_recv_from_remote_root, iosvc); 2855 } 2856 if (iosvc->send_tid == NULL) { 2857 iosvc->send_tid = fmd_thr_create(fmd_hdl, 2858 etm_send_to_remote_root, iosvc); 2859 } 2860 } /* etm_iosvc_setup() */ 2861 2862 2863 /* 2864 * ds userland interface ds_reg_cb callback func 2865 */ 2866 2867 /* ARGSUSED */ 2868 static void 2869 etm_iosvc_reg_handler(ds_hdl_t ds_hdl, ds_cb_arg_t arg, ds_ver_t *ver, 2870 ds_domain_hdl_t dhdl) 2871 { 2872 etm_async_event_ele_t async_ele; 2873 2874 2875 /* 2876 * do version check here. 2877 * checked the ver received here against etm_iosvc_vers here 2878 */ 2879 if (etm_iosvc_vers[0].major != ver->major || 2880 etm_iosvc_vers[0].minor != ver->minor) { 2881 /* 2882 * can't log an fmd debug msg, 2883 * not running in an fmd aux thread 2884 */ 2885 return; 2886 } 2887 2888 /* 2889 * the callback should have a valid ldom_name 2890 * can't log fmd debugging msg here since this is not in an fmd aux 2891 * thread. log fmd debug msg in etm_async_event_handle() 2892 */ 2893 async_ele.ds_hdl = ds_hdl; 2894 async_ele.dhdl = dhdl; 2895 async_ele.ldom_name[0] = '\0'; 2896 async_ele.event_type = ETM_ASYNC_EVENT_DS_REG_CB; 2897 (void) pthread_mutex_lock(&etm_async_event_q_lock); 2898 (void) etm_async_q_enq(&async_ele); 2899 if (etm_async_q_cur_len == 1) 2900 (void) pthread_cond_signal(&etm_async_event_q_cv); 2901 (void) pthread_mutex_unlock(&etm_async_event_q_lock); 2902 2903 } /* etm_iosvc_reg_handler */ 2904 2905 2906 /* 2907 * ds userland interface ds_unreg_cb callback func 2908 */ 2909 2910 /*ARGSUSED*/ 2911 static void 2912 etm_iosvc_unreg_handler(ds_hdl_t hdl, ds_cb_arg_t arg) 2913 { 2914 etm_async_event_ele_t async_ele; 2915 2916 /* 2917 * fill in async_ele and enqueue async_ele 2918 */ 2919 async_ele.ldom_name[0] = '\0'; 2920 async_ele.ds_hdl = hdl; 2921 async_ele.event_type = ETM_ASYNC_EVENT_DS_UNREG_CB; 2922 (void) pthread_mutex_lock(&etm_async_event_q_lock); 2923 (void) etm_async_q_enq(&async_ele); 2924 if (etm_async_q_cur_len == 1) 2925 (void) pthread_cond_signal(&etm_async_event_q_cv); 2926 (void) pthread_mutex_unlock(&etm_async_event_q_lock); 2927 } /* etm_iosvc_unreg_handler */ 2928 2929 /* 2930 * ldom event registration callback func 2931 */ 2932 2933 /* ARGSUSED */ 2934 static void 2935 ldom_event_handler(char *ldom_name, ldom_event_t event, ldom_cb_arg_t data) 2936 { 2937 etm_async_event_ele_t async_ele; 2938 2939 /* 2940 * the callback will have a valid ldom_name 2941 */ 2942 async_ele.ldom_name[0] = '\0'; 2943 if (ldom_name) 2944 (void) strcpy(async_ele.ldom_name, ldom_name); 2945 async_ele.ds_hdl = DS_INVALID_HDL; 2946 2947 /* 2948 * fill in async_ele and enq async_ele 2949 */ 2950 switch (event) { 2951 case LDOM_EVENT_BIND: 2952 async_ele.event_type = ETM_ASYNC_EVENT_LDOM_BIND; 2953 break; 2954 case LDOM_EVENT_UNBIND: 2955 async_ele.event_type = ETM_ASYNC_EVENT_LDOM_UNBIND; 2956 break; 2957 case LDOM_EVENT_ADD: 2958 async_ele.event_type = ETM_ASYNC_EVENT_LDOM_ADD; 2959 break; 2960 case LDOM_EVENT_REMOVE: 2961 async_ele.event_type = ETM_ASYNC_EVENT_LDOM_REMOVE; 2962 break; 2963 default: 2964 /* 2965 * for all other ldom events, do nothing 2966 */ 2967 return; 2968 } /* switch (event) */ 2969 2970 (void) pthread_mutex_lock(&etm_async_event_q_lock); 2971 (void) etm_async_q_enq(&async_ele); 2972 if (etm_async_q_cur_len == 1) 2973 (void) pthread_cond_signal(&etm_async_event_q_cv); 2974 (void) pthread_mutex_unlock(&etm_async_event_q_lock); 2975 2976 } /* ldom_event_handler */ 2977 2978 2979 /* 2980 * This is running as an fmd aux thread. 2981 * This is the func that actually handle the events, which include: 2982 * 1. ldom events. ldom events are on Control Domain only 2983 * 2. any DS userland callback funcs 2984 * these events are already Q-ed in the async_event_ele_q 2985 * deQ and process the events accordingly 2986 */ 2987 static void 2988 etm_async_event_handler(void *arg) 2989 { 2990 2991 fmd_hdl_t *fmd_hdl = (fmd_hdl_t *)arg; 2992 etm_iosvc_t *iosvc; /* ptr 2 iosvc struct */ 2993 etm_async_event_ele_t async_e; 2994 2995 fmd_hdl_debug(fmd_hdl, "info: etm_async_event_handler starting\n"); 2996 /* 2997 * handle etm is not dying and Q len > 0 2998 */ 2999 while (!etm_is_dying) { 3000 /* 3001 * grab the lock to check the Q len 3002 */ 3003 (void) pthread_mutex_lock(&etm_async_event_q_lock); 3004 fmd_hdl_debug(fmd_hdl, "info: etm_async_q_cur_len %d\n", 3005 etm_async_q_cur_len); 3006 3007 while (etm_async_q_cur_len > 0) { 3008 (void) etm_async_q_deq(&async_e); 3009 (void) pthread_mutex_unlock(&etm_async_event_q_lock); 3010 fmd_hdl_debug(fmd_hdl, 3011 "info: processing an async event type %d ds_hdl" 3012 " %d\n", async_e.event_type, async_e.ds_hdl); 3013 if (async_e.ldom_name[0] != '\0') { 3014 fmd_hdl_debug(fmd_hdl, 3015 "info: procssing async evt ldom_name %s\n", 3016 async_e.ldom_name); 3017 } 3018 3019 /* 3020 * at this point, if async_e.ldom_name is not NULL, 3021 * we have a valid iosvc strcut ptr. 3022 * the only time async_e.ldom_name is NULL is at 3023 * ds_unreg_cb() 3024 */ 3025 switch (async_e.event_type) { 3026 case ETM_ASYNC_EVENT_LDOM_UNBIND: 3027 case ETM_ASYNC_EVENT_LDOM_REMOVE: 3028 /* 3029 * we have a valid ldom_name, 3030 * etm_lookup_struct(ldom_name) 3031 * do nothing if can't find an iosvc 3032 * no iosvc clean up to do 3033 */ 3034 (void) pthread_mutex_lock( 3035 &iosvc_list_lock); 3036 iosvc = etm_iosvc_lookup(fmd_hdl, 3037 async_e.ldom_name, 3038 async_e.ds_hdl, B_FALSE); 3039 if (iosvc == NULL) { 3040 fmd_hdl_debug(fmd_hdl, 3041 "error: can't find iosvc for ldom " 3042 "name %s\n", 3043 async_e.ldom_name); 3044 (void) pthread_mutex_unlock( 3045 &iosvc_list_lock); 3046 break; 3047 } 3048 etm_iosvc_cleanup(fmd_hdl, iosvc, B_TRUE); 3049 (void) pthread_mutex_unlock( 3050 &iosvc_list_lock); 3051 break; 3052 3053 case ETM_ASYNC_EVENT_LDOM_BIND: 3054 3055 /* 3056 * create iosvc if it has not been 3057 * created 3058 * async_e.ds_hdl is invalid 3059 * async_e.ldom_name is valid ldom_name 3060 */ 3061 (void) pthread_mutex_lock( 3062 &iosvc_list_lock); 3063 iosvc = etm_iosvc_lookup(fmd_hdl, 3064 async_e.ldom_name, 3065 async_e.ds_hdl, B_TRUE); 3066 if (iosvc == NULL) { 3067 fmd_hdl_debug(fmd_hdl, 3068 "error: can't create iosvc for " 3069 "async evnt %d\n", 3070 async_e.event_type); 3071 (void) pthread_mutex_unlock( 3072 &iosvc_list_lock); 3073 break; 3074 } 3075 (void) strcpy(iosvc->ldom_name, 3076 async_e.ldom_name); 3077 iosvc->ds_hdl = async_e.ds_hdl; 3078 (void) pthread_mutex_unlock( 3079 &iosvc_list_lock); 3080 break; 3081 3082 case ETM_ASYNC_EVENT_DS_REG_CB: 3083 if (etm_ldom_type == LDOM_TYPE_CONTROL) { 3084 /* 3085 * find the root ldom name from 3086 * ldom domain hdl/id 3087 */ 3088 if (etm_filter_find_ldom_name( 3089 fmd_hdl, async_e.dhdl, 3090 async_e.ldom_name, 3091 MAX_LDOM_NAME) != 0) { 3092 fmd_hdl_debug(fmd_hdl, 3093 "error: can't find root " 3094 "domain name from did %d\n", 3095 async_e.dhdl); 3096 break; 3097 } else { 3098 fmd_hdl_debug(fmd_hdl, 3099 "info: etm_filter_find_" 3100 "ldom_name returned %s\n", 3101 async_e.ldom_name); 3102 } 3103 /* 3104 * now we should have a valid 3105 * root domain name. 3106 * lookup the iosvc struct 3107 * associated with the ldom_name 3108 * and init the iosvc struct 3109 */ 3110 (void) pthread_mutex_lock( 3111 &iosvc_list_lock); 3112 iosvc = etm_iosvc_lookup( 3113 fmd_hdl, async_e.ldom_name, 3114 async_e.ds_hdl, B_TRUE); 3115 if (iosvc == NULL) { 3116 fmd_hdl_debug(fmd_hdl, 3117 "error: can't create iosvc " 3118 "for async evnt %d\n", 3119 async_e.event_type); 3120 (void) pthread_mutex_unlock( 3121 &iosvc_list_lock); 3122 break; 3123 } 3124 3125 etm_iosvc_setup(fmd_hdl, iosvc, 3126 &async_e); 3127 (void) pthread_mutex_unlock( 3128 &iosvc_list_lock); 3129 } else { 3130 iosvc = &io_svc; 3131 (void) strcpy(iosvc->ldom_name, 3132 async_e.ldom_name); 3133 3134 etm_iosvc_setup(fmd_hdl, iosvc, 3135 &async_e); 3136 } 3137 break; 3138 3139 case ETM_ASYNC_EVENT_DS_UNREG_CB: 3140 /* 3141 * decide which iosvc struct to perform 3142 * this UNREG callback on. 3143 */ 3144 if (etm_ldom_type == LDOM_TYPE_CONTROL) { 3145 (void) pthread_mutex_lock( 3146 &iosvc_list_lock); 3147 /* 3148 * lookup the iosvc struct w/ 3149 * ds_hdl 3150 */ 3151 iosvc = etm_iosvc_lookup( 3152 fmd_hdl, async_e.ldom_name, 3153 async_e.ds_hdl, B_FALSE); 3154 if (iosvc == NULL) { 3155 fmd_hdl_debug(fmd_hdl, 3156 "error: can't find iosvc " 3157 "for async evnt %d\n", 3158 async_e.event_type); 3159 (void) pthread_mutex_unlock( 3160 &iosvc_list_lock); 3161 break; 3162 } 3163 3164 /* 3165 * ds_hdl and fmd_xprt_open 3166 * go hand to hand together 3167 * after unreg_cb, 3168 * ds_hdl is INVALID and 3169 * fmd_xprt is closed. 3170 * the ldom name and the msg Q 3171 * remains in iosvc_list 3172 */ 3173 if (iosvc->ldom_name != '\0') 3174 fmd_hdl_debug(fmd_hdl, 3175 "info: iosvc w/ ldom_name " 3176 "%s \n", iosvc->ldom_name); 3177 3178 /* 3179 * destroy send/recv threads and 3180 * other clean up on Control side. 3181 */ 3182 etm_iosvc_cleanup(fmd_hdl, iosvc, 3183 B_FALSE); 3184 (void) pthread_mutex_unlock( 3185 &iosvc_list_lock); 3186 } else { 3187 iosvc = &io_svc; 3188 /* 3189 * destroy send/recv threads and 3190 * then clean up on Root side. 3191 */ 3192 etm_iosvc_cleanup(fmd_hdl, iosvc, 3193 B_FALSE); 3194 } 3195 break; 3196 3197 default: 3198 /* 3199 * for all other events, etm doesn't care. 3200 * already logged an fmd info msg w/ 3201 * the event type. Do nothing here. 3202 */ 3203 break; 3204 } /* switch (async_e.event_type) */ 3205 3206 if (etm_ldom_type == LDOM_TYPE_CONTROL) { 3207 etm_filter_handle_ldom_event(fmd_hdl, 3208 async_e.event_type, async_e.ldom_name); 3209 } 3210 3211 /* 3212 * grab the lock to check the q length again 3213 */ 3214 (void) pthread_mutex_lock(&etm_async_event_q_lock); 3215 3216 if (etm_is_dying) { 3217 break; 3218 } 3219 } /* etm_async_q_cur_len */ 3220 3221 /* 3222 * we have the mutex lock at this point, whether 3223 * . etm_is_dying and/or 3224 * . q_len == 0 3225 */ 3226 if (!etm_is_dying && etm_async_q_cur_len == 0) { 3227 fmd_hdl_debug(fmd_hdl, 3228 "info: cond wait on async_event_q_cv\n"); 3229 (void) pthread_cond_wait(&etm_async_event_q_cv, 3230 &etm_async_event_q_lock); 3231 fmd_hdl_debug(fmd_hdl, 3232 "info: cond wait on async_event_q_cv rtns\n"); 3233 } 3234 (void) pthread_mutex_unlock(&etm_async_event_q_lock); 3235 } /* etm_is_dying */ 3236 3237 fmd_hdl_debug(fmd_hdl, 3238 "info: etm async event handler thread exiting\n"); 3239 3240 } /* etm_async_event_handler */ 3241 3242 /* 3243 * deQ what's in iosvc msg Q 3244 * send iosvc_msgp to the remote io svc ldom by calling ds_send_msg() 3245 * the iosvc_msgp already has the packed msg, which is hdr + 1 fma event 3246 */ 3247 static void 3248 etm_send_to_remote_root(void *arg) 3249 { 3250 3251 etm_iosvc_t *iosvc = (etm_iosvc_t *)arg; /* iosvc ptr */ 3252 etm_iosvc_q_ele_t msg_ele; /* iosvc msg ele */ 3253 etm_proto_v1_ev_hdr_t *ev_hdrp; /* hdr for FMA_EVENT */ 3254 fmd_hdl_t *fmd_hdl = init_hdl; /* fmd handle */ 3255 3256 3257 fmd_hdl_debug(fmd_hdl, 3258 "info: send to remote iosvc starting w/ ldom_name %s\n", 3259 iosvc->ldom_name); 3260 3261 /* 3262 * loop forever until etm_is_dying or thr_is_dying 3263 */ 3264 while (!etm_is_dying && !iosvc->thr_is_dying) { 3265 if (iosvc->ds_hdl != DS_INVALID_HDL && 3266 iosvc->start_sending_Q > 0) { 3267 (void) pthread_mutex_lock(&iosvc->msg_q_lock); 3268 while (iosvc->msg_q_cur_len > 0 && 3269 iosvc->ds_hdl != DS_INVALID_HDL) { 3270 (void) etm_iosvc_msg_deq(fmd_hdl, iosvc, 3271 &msg_ele); 3272 if (etm_debug_lvl >= 3) { 3273 fmd_hdl_debug(fmd_hdl, "info: valid " 3274 "ds_hdl before ds_send_msg \n"); 3275 } 3276 (void) pthread_mutex_unlock(&iosvc->msg_q_lock); 3277 3278 iosvc->ack_ok = 0; 3279 ev_hdrp = (etm_proto_v1_ev_hdr_t *) 3280 ((ptrdiff_t)msg_ele.msg); 3281 ev_hdrp->ev_pp.pp_xid = iosvc->cur_send_xid + 1; 3282 while (!iosvc->ack_ok && 3283 iosvc->ds_hdl != DS_INVALID_HDL && 3284 !etm_is_dying) { 3285 /* 3286 * call ds_send_msg() to send the msg, 3287 * wait for the recv end to send the 3288 * resp msg back. 3289 * If resp msg is recv-ed, ack_ok 3290 * will be set to 1. 3291 * otherwise, retry. 3292 */ 3293 if (etm_send_ds_msg(fmd_hdl, B_TRUE, 3294 iosvc, &msg_ele, ev_hdrp) < 0) { 3295 continue; 3296 } 3297 3298 if (etm_is_dying || iosvc->thr_is_dying) 3299 break; 3300 } 3301 3302 /* 3303 * if out of the while loop but !ack_ok, ie, 3304 * ds_hdl becomes invalid at some point 3305 * while waiting the resp msg, we need to put 3306 * the msg back to the head of the Q. 3307 */ 3308 if (!iosvc->ack_ok) { 3309 (void) pthread_mutex_lock( 3310 &iosvc->msg_q_lock); 3311 /* 3312 * put the msg back to the head of Q. 3313 * If the Q is full at this point, 3314 * drop the msg at the tail, enq this 3315 * msg to the head. 3316 */ 3317 etm_msg_enq_head(fmd_hdl, iosvc, 3318 &msg_ele); 3319 (void) pthread_mutex_unlock( 3320 &iosvc->msg_q_lock); 3321 } 3322 3323 /* 3324 * 3325 * grab the lock to check the Q len again 3326 */ 3327 (void) pthread_mutex_lock(&iosvc->msg_q_lock); 3328 if (etm_is_dying || iosvc->thr_is_dying) { 3329 break; 3330 } 3331 } /* while dequeing iosvc msgs to send */ 3332 3333 /* 3334 * we have the mutex lock for msg_q_lock at this point 3335 * we are here because 3336 * 1) q_len == 0: then wait on the cv for Q to be filled 3337 * 2) etm_is_dying 3338 */ 3339 if (!etm_is_dying && !iosvc->thr_is_dying && 3340 iosvc->msg_q_cur_len == 0) { 3341 fmd_hdl_debug(fmd_hdl, 3342 "info: waiting on msg_q_cv\n"); 3343 (void) pthread_cond_wait(&iosvc->msg_q_cv, 3344 &iosvc->msg_q_lock); 3345 } 3346 (void) pthread_mutex_unlock(&iosvc->msg_q_lock); 3347 if (etm_is_dying || iosvc->thr_is_dying) { 3348 break; 3349 } 3350 } else { 3351 (void) etm_sleep(1); 3352 } /* wait for the start_sendingQ > 0 */ 3353 } /* etm_is_dying or thr_is_dying */ 3354 fmd_hdl_debug(fmd_hdl, "info; etm send thread exiting \n"); 3355 } /* etm_send_to_remote_root */ 3356 3357 3358 /* 3359 * receive etm msgs from the remote root ldom by calling ds_recv_msg() 3360 * if FMA events/ereports, call fmd_xprt_post() to post to fmd 3361 * send ACK back by calling ds_send_msg() 3362 */ 3363 static void 3364 etm_recv_from_remote_root(void *arg) 3365 { 3366 etm_iosvc_t *iosvc = (etm_iosvc_t *)arg; /* iosvc ptr */ 3367 etm_proto_v1_pp_t *pp; /* protocol preamble */ 3368 etm_proto_v1_ev_hdr_t *ev_hdrp; /* for FMA_EVENT msg */ 3369 etm_proto_v1_resp_hdr_t *resp_hdrp; /* for RESPONSE msg */ 3370 int32_t resp_code = 0; /* default is success */ 3371 int32_t rc; /* return value */ 3372 size_t maxlen = MAXLEN; 3373 /* max msg len */ 3374 char msgbuf[MAXLEN]; /* recv msg buf */ 3375 size_t msg_size; /* recv msg size */ 3376 size_t hdr_sz; /* sizeof *hdrp */ 3377 size_t evsz; /* sizeof *evp */ 3378 size_t fma_event_size; /* sizeof FMA event */ 3379 nvlist_t *evp; /* ptr to the nvlist */ 3380 char *buf; /* ptr to the nvlist */ 3381 static uint32_t mem_alloc = 0; /* indicate if alloc mem */ 3382 char *msg; /* ptr to alloc mem */ 3383 fmd_hdl_t *fmd_hdl = init_hdl; 3384 3385 3386 3387 fmd_hdl_debug(fmd_hdl, 3388 "info: recv from remote iosvc starting with ldom name %s \n", 3389 iosvc->ldom_name); 3390 3391 /* 3392 * loop forever until etm_is_dying or the thread is dying 3393 */ 3394 3395 msg = msgbuf; 3396 while (!etm_is_dying && !iosvc->thr_is_dying) { 3397 if (iosvc->ds_hdl == DS_INVALID_HDL) { 3398 fmd_hdl_debug(fmd_hdl, 3399 "info: ds_hdl is invalid in recv thr\n"); 3400 (void) etm_sleep(1); 3401 continue; 3402 } 3403 3404 /* 3405 * for now, there are FMA_EVENT and ACK msg type. 3406 * use FMA_EVENT buf as the maxlen, hdr+1 fma event. 3407 * FMA_EVENT is big enough to hold an ACK msg. 3408 * the actual msg size received is in msg_size. 3409 */ 3410 rc = (*etm_ds_recv_msg)(iosvc->ds_hdl, msg, maxlen, &msg_size); 3411 if (rc == EFBIG) { 3412 fmd_hdl_debug(fmd_hdl, 3413 "info: ds_recv_msg needs mem the size of %d\n", 3414 msg_size); 3415 msg = fmd_hdl_zalloc(fmd_hdl, msg_size, FMD_SLEEP); 3416 mem_alloc = 1; 3417 } else if (rc == 0) { 3418 fmd_hdl_debug(fmd_hdl, 3419 "info: ds_recv_msg received a msg ok\n"); 3420 /* 3421 * check the magic # in msg.hdr 3422 */ 3423 pp = (etm_proto_v1_pp_t *)((ptrdiff_t)msg); 3424 if (pp->pp_magic_num != ETM_PROTO_MAGIC_NUM) { 3425 fmd_hdl_debug(fmd_hdl, 3426 "info: bad ds recv on magic\n"); 3427 continue; 3428 } 3429 3430 /* 3431 * check the msg type against msg_size to be sure 3432 * that received msg is not a truncated msg 3433 */ 3434 if (pp->pp_msg_type == ETM_MSG_TYPE_FMA_EVENT) { 3435 3436 ev_hdrp = (etm_proto_v1_ev_hdr_t *) 3437 ((ptrdiff_t)msg); 3438 fmd_hdl_debug(fmd_hdl, "info: ds received " 3439 "FMA EVENT xid=%d msg_size=%d\n", 3440 ev_hdrp->ev_pp.pp_xid, msg_size); 3441 hdr_sz = sizeof (*ev_hdrp) + 3442 1*(sizeof (ev_hdrp->ev_lens[0])); 3443 fma_event_size = hdr_sz + ev_hdrp->ev_lens[0]; 3444 if (fma_event_size != msg_size) { 3445 fmd_hdl_debug(fmd_hdl, "info: wrong " 3446 "ev msg size received\n"); 3447 continue; 3448 /* 3449 * Simply do nothing. The send side 3450 * will timedcond_wait waiting on the 3451 * resp msg will timeout and 3452 * re-send the same msg. 3453 */ 3454 } 3455 if (etm_debug_lvl >= 3) { 3456 fmd_hdl_debug(fmd_hdl, "info: recv msg" 3457 " size %d hdrsz %d evp size %d\n", 3458 msg_size, hdr_sz, 3459 ev_hdrp->ev_lens[0]); 3460 } 3461 3462 if (ev_hdrp->ev_pp.pp_xid != 3463 iosvc->xid_posted_ev) { 3464 /* 3465 * different from last xid posted to 3466 * fmd, post to fmd now. 3467 */ 3468 buf = msg + hdr_sz; 3469 rc = nvlist_unpack(buf, 3470 ev_hdrp->ev_lens[0], &evp, 0); 3471 rc = nvlist_size(evp, &evsz, 3472 NV_ENCODE_XDR); 3473 fmd_hdl_debug(fmd_hdl, 3474 "info: evp size %d before fmd" 3475 "post\n", evsz); 3476 3477 if ((rc = etm_post_to_fmd(fmd_hdl, 3478 iosvc->fmd_xprt, evp)) >= 0) { 3479 fmd_hdl_debug(fmd_hdl, 3480 "info: xid posted to fmd %d" 3481 "\n", 3482 ev_hdrp->ev_pp.pp_xid); 3483 iosvc->xid_posted_ev = 3484 ev_hdrp->ev_pp.pp_xid; 3485 } 3486 } 3487 3488 /* 3489 * ready to send the RESPONSE msg back 3490 * reuse the msg buffer as the response buffer 3491 */ 3492 resp_hdrp = (etm_proto_v1_resp_hdr_t *) 3493 ((ptrdiff_t)msg); 3494 resp_hdrp->resp_pp.pp_msg_type = 3495 ETM_MSG_TYPE_RESPONSE; 3496 3497 resp_hdrp->resp_code = resp_code; 3498 resp_hdrp->resp_len = sizeof (*resp_hdrp); 3499 3500 /* 3501 * send the whole response msg in one send 3502 */ 3503 if ((*etm_ds_send_msg)(iosvc->ds_hdl, msg, 3504 sizeof (*resp_hdrp)) != 0) { 3505 fmd_hdl_debug(fmd_hdl, 3506 "info: send response msg failed\n"); 3507 } else { 3508 fmd_hdl_debug(fmd_hdl, 3509 "info: ds send resp msg ok" 3510 "size %d\n", sizeof (*resp_hdrp)); 3511 } 3512 } else if (pp->pp_msg_type == ETM_MSG_TYPE_RESPONSE) { 3513 fmd_hdl_debug(fmd_hdl, 3514 "info: ds received respond msg xid=%d" 3515 "msg_size=%d for ldom %s\n", pp->pp_xid, 3516 msg_size, iosvc->ldom_name); 3517 if (sizeof (*resp_hdrp) != msg_size) { 3518 fmd_hdl_debug(fmd_hdl, 3519 "info: wrong resp msg size" 3520 "received\n"); 3521 fmd_hdl_debug(fmd_hdl, 3522 "info: resp msg size %d recv resp" 3523 "msg size %d\n", 3524 sizeof (*resp_hdrp), msg_size); 3525 continue; 3526 } 3527 /* 3528 * is the pp.pp_xid == iosvc->cur_send_xid+1, 3529 * if so, nudge the send routine to send next 3530 */ 3531 if (pp->pp_xid != iosvc->cur_send_xid+1) { 3532 fmd_hdl_debug(fmd_hdl, 3533 "info: ds received resp msg xid=%d " 3534 "doesn't match cur_send_id=%d\n", 3535 pp->pp_xid, iosvc->cur_send_xid+1); 3536 continue; 3537 } 3538 (void) pthread_mutex_lock(&iosvc->msg_ack_lock); 3539 iosvc->ack_ok = 1; 3540 (void) pthread_cond_signal(&iosvc->msg_ack_cv); 3541 (void) pthread_mutex_unlock( 3542 &iosvc->msg_ack_lock); 3543 fmd_hdl_debug(fmd_hdl, 3544 "info: signaling msg_ack_cv\n"); 3545 } else { 3546 /* 3547 * place holder for future msg types 3548 */ 3549 fmd_hdl_debug(fmd_hdl, 3550 "info: ds received unrecognized msg\n"); 3551 } 3552 if (mem_alloc) { 3553 fmd_hdl_free(fmd_hdl, msg, msg_size); 3554 mem_alloc = 0; 3555 msg = msgbuf; 3556 } 3557 } else { 3558 if (etm_debug_lvl >= 3) { 3559 fmd_hdl_debug(fmd_hdl, 3560 "info: ds_recv_msg() failed\n"); 3561 } 3562 } /* ds_recv_msg() returns */ 3563 } /* etm_is_dying */ 3564 3565 /* 3566 * need to free the mem allocated in msg upon exiting the thread 3567 */ 3568 if (mem_alloc) { 3569 fmd_hdl_free(fmd_hdl, msg, msg_size); 3570 mem_alloc = 0; 3571 msg = msgbuf; 3572 } 3573 fmd_hdl_debug(fmd_hdl, "info; etm recv thread exiting \n"); 3574 } /* etm_recv_from_remote_root */ 3575 3576 3577 3578 /* 3579 * etm_ds_init 3580 * initialize DS services function pointers by calling 3581 * dlopen() followed by dlsym() for each ds func. 3582 * if any dlopen() or dlsym() call fails, return -ENOENT 3583 * return >0 for successs, -ENOENT for failure 3584 */ 3585 static int 3586 etm_ds_init(fmd_hdl_t *hdl) 3587 { 3588 int rc = 0; 3589 3590 if ((etm_dl_hdl = dlopen(etm_dl_path, etm_dl_mode)) == NULL) { 3591 fmd_hdl_debug(hdl, "error: failed to dlopen %s\n", etm_dl_path); 3592 return (-ENOENT); 3593 } 3594 3595 etm_ds_svc_reg = (int (*)(ds_capability_t *cap, ds_ops_t *ops)) 3596 dlsym(etm_dl_hdl, "ds_svc_reg"); 3597 if (etm_ds_svc_reg == NULL) { 3598 fmd_hdl_debug(hdl, 3599 "error: failed to dlsym ds_svc_reg() w/ error %s\n", 3600 dlerror()); 3601 rc = -ENOENT; 3602 } 3603 3604 3605 etm_ds_clnt_reg = (int (*)(ds_capability_t *cap, ds_ops_t *ops)) 3606 dlsym(etm_dl_hdl, "ds_clnt_reg"); 3607 if (etm_ds_clnt_reg == NULL) { 3608 fmd_hdl_debug(hdl, 3609 "error: dlsym(ds_clnt_reg) failed w/ errno %d\n", errno); 3610 rc = -ENOENT; 3611 } 3612 3613 etm_ds_send_msg = (int (*)(ds_hdl_t hdl, void *buf, size_t buflen)) 3614 dlsym(etm_dl_hdl, "ds_send_msg"); 3615 if (etm_ds_send_msg == NULL) { 3616 fmd_hdl_debug(hdl, "error: dlsym(ds_send_msg) failed\n"); 3617 rc = -ENOENT; 3618 } 3619 3620 etm_ds_recv_msg = (int (*)(ds_hdl_t hdl, void *buf, size_t buflen, 3621 size_t *msglen))dlsym(etm_dl_hdl, "ds_recv_msg"); 3622 if (etm_ds_recv_msg == NULL) { 3623 fmd_hdl_debug(hdl, "error: dlsym(ds_recv_msg) failed\n"); 3624 rc = -ENOENT; 3625 } 3626 3627 etm_ds_fini = (int (*)(void))dlsym(etm_dl_hdl, "ds_fini"); 3628 if (etm_ds_fini == NULL) { 3629 fmd_hdl_debug(hdl, "error: dlsym(ds_fini) failed\n"); 3630 rc = -ENOENT; 3631 } 3632 3633 if (rc == -ENOENT) { 3634 (void) dlclose(etm_dl_hdl); 3635 } 3636 return (rc); 3637 3638 } /* etm_ds_init() */ 3639 3640 3641 /* 3642 * -------------------------- FMD entry points ------------------------------- 3643 */ 3644 3645 /* 3646 * _fmd_init - initialize the transport for use by ETM and start the 3647 * server daemon to accept new connections to us 3648 * 3649 * FMD will read our *.conf and subscribe us to FMA events 3650 */ 3651 3652 void 3653 _fmd_init(fmd_hdl_t *hdl) 3654 { 3655 struct timeval tmv; /* timeval */ 3656 ssize_t n; /* gen use */ 3657 const struct facility *fp; /* syslog facility matching */ 3658 char *facname; /* syslog facility property */ 3659 uint32_t type_mask; /* type of the local host */ 3660 int rc; /* funcs return code */ 3661 3662 3663 if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) { 3664 return; /* invalid data in configuration file */ 3665 } 3666 3667 fmd_hdl_debug(hdl, "info: module initializing\n"); 3668 3669 init_hdl = hdl; 3670 etm_lhp = ldom_init(etm_init_alloc, etm_init_free); 3671 3672 /* 3673 * decide the ldom type, do initialization accordingly 3674 */ 3675 if ((rc = ldom_get_type(etm_lhp, &type_mask)) != 0) { 3676 fmd_hdl_debug(hdl, "error: can't decide ldom type\n"); 3677 fmd_hdl_debug(hdl, "info: module unregistering\n"); 3678 ldom_fini(etm_lhp); 3679 fmd_hdl_unregister(hdl); 3680 return; 3681 } 3682 3683 if ((type_mask & LDOM_TYPE_LEGACY) || (type_mask & LDOM_TYPE_CONTROL)) { 3684 if (type_mask & LDOM_TYPE_LEGACY) { 3685 /* 3686 * running on a legacy sun4v domain, 3687 * act as the the old sun4v 3688 */ 3689 etm_ldom_type = LDOM_TYPE_LEGACY; 3690 fmd_hdl_debug(hdl, "info: running as the old sun4v\n"); 3691 ldom_fini(etm_lhp); 3692 } else if (type_mask & LDOM_TYPE_CONTROL) { 3693 etm_ldom_type = LDOM_TYPE_CONTROL; 3694 fmd_hdl_debug(hdl, "info: running as control domain\n"); 3695 3696 /* 3697 * looking for libds.so.1. 3698 * If not found, don't do DS registration. As a result, 3699 * there will be no DS callbacks or other DS services. 3700 */ 3701 if (etm_ds_init(hdl) >= 0) { 3702 etm_filter_init(hdl); 3703 etm_ckpt_init(hdl); 3704 3705 flags = FMD_XPRT_RDWR | FMD_XPRT_ACCEPT; 3706 3707 /* 3708 * ds client registration 3709 */ 3710 if ((rc = (*etm_ds_clnt_reg)(&iosvc_caps, 3711 &iosvc_ops))) { 3712 fmd_hdl_debug(hdl, 3713 "error: ds_clnt_reg(): errno %d\n", rc); 3714 } 3715 } else { 3716 fmd_hdl_debug(hdl, "error: dlopen() libds " 3717 "failed, continue without the DS services"); 3718 } 3719 3720 /* 3721 * register for ldom status events 3722 */ 3723 if ((rc = ldom_register_event(etm_lhp, 3724 ldom_event_handler, hdl))) { 3725 fmd_hdl_debug(hdl, 3726 "error: ldom_register_event():" 3727 " errno %d\n", rc); 3728 } 3729 3730 /* 3731 * create the thread for handling both the ldom status 3732 * change and service events 3733 */ 3734 etm_async_e_tid = fmd_thr_create(hdl, 3735 etm_async_event_handler, hdl); 3736 } 3737 3738 /* setup statistics and properties from FMD */ 3739 3740 (void) fmd_stat_create(hdl, FMD_STAT_NOALLOC, 3741 sizeof (etm_stats) / sizeof (fmd_stat_t), 3742 (fmd_stat_t *)&etm_stats); 3743 3744 etm_fma_resp_wait_time = fmd_prop_get_int32(hdl, 3745 ETM_PROP_NM_FMA_RESP_WAIT_TIME); 3746 etm_debug_lvl = fmd_prop_get_int32(hdl, ETM_PROP_NM_DEBUG_LVL); 3747 etm_debug_max_ev_cnt = fmd_prop_get_int32(hdl, 3748 ETM_PROP_NM_DEBUG_MAX_EV_CNT); 3749 fmd_hdl_debug(hdl, "info: etm_debug_lvl %d " 3750 "etm_debug_max_ev_cnt %d\n", etm_debug_lvl, 3751 etm_debug_max_ev_cnt); 3752 3753 etm_resp_q_max_len = fmd_prop_get_int32(hdl, 3754 ETM_PROP_NM_MAX_RESP_Q_LEN); 3755 etm_stats.etm_resp_q_max_len.fmds_value.ui64 = 3756 etm_resp_q_max_len; 3757 etm_bad_acc_to_sec = fmd_prop_get_int32(hdl, 3758 ETM_PROP_NM_BAD_ACC_TO_SEC); 3759 3760 /* 3761 * obtain an FMD transport handle so we can post 3762 * FMA events later 3763 */ 3764 3765 etm_fmd_xprt = fmd_xprt_open(hdl, FMD_XPRT_RDONLY, NULL, NULL); 3766 3767 /* 3768 * encourage protocol transaction id to be unique per module 3769 * load 3770 */ 3771 3772 (void) gettimeofday(&tmv, NULL); 3773 etm_xid_cur = (uint32_t)((tmv.tv_sec << 10) | 3774 ((unsigned long)tmv.tv_usec >> 10)); 3775 3776 /* init the ETM transport */ 3777 3778 if ((n = etm_xport_init(hdl)) != 0) { 3779 fmd_hdl_error(hdl, "error: bad xport init errno %d\n", 3780 (-n)); 3781 fmd_hdl_unregister(hdl); 3782 return; 3783 } 3784 3785 /* 3786 * Cache any properties we use every time we receive an alert. 3787 */ 3788 syslog_file = fmd_prop_get_int32(hdl, ETM_PROP_NM_SYSLOGD); 3789 syslog_cons = fmd_prop_get_int32(hdl, ETM_PROP_NM_CONSOLE); 3790 3791 if (syslog_file && (syslog_logfd = open("/dev/conslog", 3792 O_WRONLY | O_NOCTTY)) == -1) { 3793 fmd_hdl_error(hdl, 3794 "error: failed to open /dev/conslog"); 3795 syslog_file = 0; 3796 } 3797 3798 if (syslog_cons && (syslog_msgfd = open("/dev/sysmsg", 3799 O_WRONLY | O_NOCTTY)) == -1) { 3800 fmd_hdl_error(hdl, "error: failed to open /dev/sysmsg"); 3801 syslog_cons = 0; 3802 } 3803 3804 if (syslog_file) { 3805 /* 3806 * Look up the value of the "facility" property and 3807 * use it to determine * what syslog LOG_* facility 3808 * value we use to fill in our log_ctl_t. 3809 */ 3810 facname = fmd_prop_get_string(hdl, 3811 ETM_PROP_NM_FACILITY); 3812 3813 for (fp = syslog_facs; fp->fac_name != NULL; fp++) { 3814 if (strcmp(fp->fac_name, facname) == 0) 3815 break; 3816 } 3817 3818 if (fp->fac_name == NULL) { 3819 fmd_hdl_error(hdl, "error: invalid 'facility'" 3820 " setting: %s\n", facname); 3821 syslog_file = 0; 3822 } else { 3823 syslog_facility = fp->fac_value; 3824 syslog_ctl.flags = SL_CONSOLE | SL_LOGONLY; 3825 } 3826 3827 fmd_prop_free_string(hdl, facname); 3828 } 3829 3830 /* 3831 * start the message responder and the connection acceptance 3832 * server; request protocol version be negotiated after waiting 3833 * a second for the receiver to be ready to start handshaking 3834 */ 3835 3836 etm_resp_tid = fmd_thr_create(hdl, etm_responder, hdl); 3837 etm_svr_tid = fmd_thr_create(hdl, etm_server, hdl); 3838 3839 (void) etm_sleep(ETM_SLEEP_QUIK); 3840 etm_req_ver_negot(hdl); 3841 3842 } else if (type_mask & LDOM_TYPE_ROOT) { 3843 etm_ldom_type = LDOM_TYPE_ROOT; 3844 fmd_hdl_debug(hdl, "info: running as root domain\n"); 3845 3846 /* 3847 * looking for libds.so.1. 3848 * If not found, don't do DS registration. As a result, 3849 * there will be no DS callbacks or other DS services. 3850 */ 3851 if (etm_ds_init(hdl) < 0) { 3852 fmd_hdl_debug(hdl, 3853 "error: dlopen() libds failed, " 3854 "module unregistering\n"); 3855 ldom_fini(etm_lhp); 3856 fmd_hdl_unregister(hdl); 3857 return; 3858 } 3859 3860 /* 3861 * DS service registration 3862 */ 3863 if ((rc = (*etm_ds_svc_reg)(&iosvc_caps, &iosvc_ops))) { 3864 fmd_hdl_debug(hdl, "error: ds_svc_reg(): errno %d\n", 3865 rc); 3866 } 3867 3868 /* 3869 * this thread is created for ds_reg_cb/ds_unreg_cb 3870 */ 3871 etm_async_e_tid = fmd_thr_create(hdl, 3872 etm_async_event_handler, hdl); 3873 3874 flags = FMD_XPRT_RDWR; 3875 } else if ((type_mask & LDOM_TYPE_IO) || (type_mask == 0)) { 3876 /* 3877 * Do not load this module if it is 3878 * . runing on a non-root ldom 3879 * . the domain owns no io devices 3880 */ 3881 fmd_hdl_debug(hdl, 3882 "info: non-root ldom, module unregistering\n"); 3883 ldom_fini(etm_lhp); 3884 fmd_hdl_unregister(hdl); 3885 return; 3886 } else { 3887 /* 3888 * place holder, all other cases. unload etm for now 3889 */ 3890 fmd_hdl_debug(hdl, 3891 "info: other ldom type, module unregistering\n"); 3892 ldom_fini(etm_lhp); 3893 fmd_hdl_unregister(hdl); 3894 return; 3895 } 3896 3897 fmd_hdl_debug(hdl, "info: module initialized ok\n"); 3898 3899 } /* _fmd_init() */ 3900 3901 /* 3902 * etm_recv - receive an FMA event from FMD and transport it 3903 * to the remote endpoint 3904 */ 3905 3906 /*ARGSUSED*/ 3907 void 3908 etm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *evp, const char *class) 3909 { 3910 etm_xport_addr_t *addrv; /* vector of transport addresses */ 3911 etm_xport_conn_t conn; /* connection handle */ 3912 etm_proto_v1_ev_hdr_t *hdrp; /* for FMA_EVENT msg */ 3913 ssize_t i, n; /* gen use */ 3914 size_t sz; /* header size */ 3915 size_t buflen; /* size of packed FMA event */ 3916 uint8_t *buf; /* tmp buffer for packed FMA event */ 3917 3918 /* 3919 * if this is running on a Root Domain, ignore the events, 3920 * return right away 3921 */ 3922 if (etm_ldom_type == LDOM_TYPE_ROOT) 3923 return; 3924 3925 buflen = 0; 3926 if ((n = nvlist_size(evp, &buflen, NV_ENCODE_XDR)) != 0) { 3927 fmd_hdl_error(hdl, "error: FMA event dropped: " 3928 "event size errno %d class %s\n", n, class); 3929 etm_stats.etm_os_nvlist_size_fail.fmds_value.ui64++; 3930 etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; 3931 return; 3932 } 3933 3934 fmd_hdl_debug(hdl, "info: rcvd event %p from FMD\n", evp); 3935 fmd_hdl_debug(hdl, "info: cnt %llu class %s\n", 3936 etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64, class); 3937 3938 etm_stats.etm_rd_fmd_bytes.fmds_value.ui64 += buflen; 3939 etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64++; 3940 3941 /* 3942 * if the debug limit has been set, avoid excessive traffic, 3943 * for example, an infinite cycle using loopback nodes 3944 */ 3945 3946 if ((etm_debug_max_ev_cnt >= 0) && 3947 (etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64 > 3948 etm_debug_max_ev_cnt)) { 3949 fmd_hdl_debug(hdl, "warning: FMA event dropped: " 3950 "event %p cnt %llu > debug max %d\n", evp, 3951 etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64, 3952 etm_debug_max_ev_cnt); 3953 etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; 3954 return; 3955 } 3956 3957 /* allocate a buffer for the FMA event and nvlist pack it */ 3958 3959 buf = fmd_hdl_zalloc(hdl, buflen, FMD_SLEEP); 3960 3961 /* 3962 * increment the ttl value if the event is from remote (a root domain) 3963 * uncomment this when enabling fault forwarding from Root domains 3964 * to Control domain. 3965 * 3966 * uint8_t ttl; 3967 * if (fmd_event_local(hdl, evp) != FMD_EVF_LOCAL) { 3968 * if (nvlist_lookup_uint8(evp, FMD_EVN_TTL, &ttl) == 0) { 3969 * (void) nvlist_remove(evp, FMD_EVN_TTL, DATA_TYPE_UINT8); 3970 * (void) nvlist_add_uint8(evp, FMD_EVN_TTL, ttl + 1); 3971 * } 3972 * } 3973 */ 3974 3975 if ((n = nvlist_pack(evp, (char **)&buf, &buflen, 3976 NV_ENCODE_XDR, 0)) != 0) { 3977 fmd_hdl_error(hdl, "error: FMA event dropped: " 3978 "event pack errno %d class %s\n", n, class); 3979 etm_stats.etm_os_nvlist_pack_fail.fmds_value.ui64++; 3980 etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; 3981 fmd_hdl_free(hdl, buf, buflen); 3982 return; 3983 } 3984 3985 /* get vector of dst addrs and send the FMA event to each one */ 3986 3987 if ((addrv = etm_xport_get_ev_addrv(hdl, evp)) == NULL) { 3988 fmd_hdl_error(hdl, "error: FMA event dropped: " 3989 "bad event dst addrs errno %d\n", errno); 3990 etm_stats.etm_xport_get_ev_addrv_fail.fmds_value.ui64++; 3991 etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; 3992 fmd_hdl_free(hdl, buf, buflen); 3993 return; 3994 } 3995 3996 for (i = 0; addrv[i] != NULL; i++) { 3997 3998 /* open a new connection to this dst addr */ 3999 4000 if ((n = etm_conn_open(hdl, "FMA event dropped: " 4001 "bad conn open on new ev", addrv[i], &conn)) < 0) { 4002 etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; 4003 continue; 4004 } 4005 4006 (void) pthread_mutex_lock(&etm_write_lock); 4007 4008 /* write the ETM message header */ 4009 4010 if ((hdrp = etm_hdr_write(hdl, conn, evp, NV_ENCODE_XDR, 4011 &sz)) == NULL) { 4012 (void) pthread_mutex_unlock(&etm_write_lock); 4013 fmd_hdl_error(hdl, "error: FMA event dropped: " 4014 "bad hdr write errno %d\n", errno); 4015 (void) etm_conn_close(hdl, 4016 "bad conn close per bad hdr wr", conn); 4017 etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; 4018 continue; 4019 } 4020 4021 fmd_hdl_free(hdl, hdrp, sz); /* header not needed */ 4022 etm_stats.etm_wr_hdr_fmaevent.fmds_value.ui64++; 4023 fmd_hdl_debug(hdl, "info: hdr xport write ok for event %p\n", 4024 evp); 4025 4026 /* write the ETM message body, ie, the packed nvlist */ 4027 4028 if ((n = etm_io_op(hdl, "FMA event dropped: " 4029 "bad io write on event", conn, 4030 buf, buflen, ETM_IO_OP_WR)) < 0) { 4031 (void) pthread_mutex_unlock(&etm_write_lock); 4032 (void) etm_conn_close(hdl, 4033 "bad conn close per bad body wr", conn); 4034 etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; 4035 continue; 4036 } 4037 4038 (void) pthread_mutex_unlock(&etm_write_lock); 4039 4040 etm_stats.etm_wr_body_fmaevent.fmds_value.ui64++; 4041 etm_stats.etm_wr_xport_bytes.fmds_value.ui64 += buflen; 4042 fmd_hdl_debug(hdl, "info: body xport write ok for event %p\n", 4043 evp); 4044 4045 /* close the connection */ 4046 4047 (void) etm_conn_close(hdl, "bad conn close after event send", 4048 conn); 4049 } /* foreach dst addr in the vector */ 4050 4051 etm_xport_free_addrv(hdl, addrv); 4052 fmd_hdl_free(hdl, buf, buflen); 4053 4054 } /* etm_recv() */ 4055 4056 4057 /* 4058 * etm_send - receive an FMA event from FMD and enQ it in the iosvc.Q. 4059 * etm_send_to_remote_root() deQ and xprt the FMA events to a 4060 * remote root domain 4061 * return FMD_SEND_SUCCESS for success, 4062 * FMD_SEND_FAILED for error 4063 */ 4064 4065 /*ARGSUSED*/ 4066 int 4067 etm_send(fmd_hdl_t *fmd_hdl, fmd_xprt_t *xp, fmd_event_t *ep, nvlist_t *nvl) 4068 { 4069 uint32_t pack_it; /* whether to pack/enq the event */ 4070 etm_pack_msg_type_t msg_type; 4071 /* tell etm_pack_ds_msg() what to do */ 4072 etm_iosvc_t *iosvc; /* ptr to cur iosvc struct */ 4073 char *class; /* nvlist class name */ 4074 4075 pack_it = 1; 4076 msg_type = FMD_XPRT_OTHER_MSG; 4077 4078 (void) nvlist_lookup_string(nvl, FM_CLASS, &class); 4079 if (class == NULL) { 4080 pack_it = 0; 4081 } else { 4082 if (etm_debug_lvl >= 1) { 4083 fmd_hdl_debug(fmd_hdl, 4084 "info: evp class= %s in etm_send\n", class); 4085 } 4086 4087 if (etm_ldom_type == LDOM_TYPE_CONTROL) { 4088 iosvc = 4089 (etm_iosvc_t *)fmd_xprt_getspecific(fmd_hdl, xp); 4090 4091 /* 4092 * check the flag FORWARDING_FAULTS_TO_CONTROL to 4093 * decide if or not to drop fault subscription 4094 * control msgs 4095 */ 4096 if (strcmp(class, "resource.fm.xprt.subscribe") == 0) { 4097 pack_it = 0; 4098 /* 4099 * if (FORWARDING_FAULTS_TO_CONTROL == 1) { 4100 * (void) nvlist_lookup_string(nvl, 4101 * FM_RSRC_XPRT_SUBCLASS, &subclass); 4102 * if (strcmp(subclass, "list.suspect") 4103 * == 0) { 4104 * pack_it = 1; 4105 * msg_action = FMD_XPRT_OTHER_MSG; 4106 * } 4107 * if (strcmp(subclass, "list.repaired") 4108 * == 0) { 4109 * pack_it = 1; 4110 * msg_action = FMD_XPRT_OTHER_MSG; 4111 * } 4112 * } 4113 */ 4114 } 4115 if (strcmp(class, "resource.fm.xprt.run") == 0) { 4116 pack_it = 1; 4117 msg_type = FMD_XPRT_RUN_MSG; 4118 } 4119 } else { /* has to be the root domain ldom */ 4120 iosvc = &io_svc; 4121 /* 4122 * drop all ereport and fault subscriptions 4123 * are we dropping too much here, more than just ereport 4124 * and fault subscriptions? need to check 4125 */ 4126 if (strcmp(class, "resource.fm.xprt.subscribe") == 0) 4127 pack_it = 0; 4128 if (strcmp(class, "resource.fm.xprt.run") == 0) { 4129 pack_it = 1; 4130 msg_type = FMD_XPRT_RUN_MSG; 4131 } 4132 } 4133 } 4134 4135 if (pack_it) { 4136 if (etm_debug_lvl >= 1) { 4137 fmd_hdl_debug(fmd_hdl, 4138 "info: ldom name returned from xprt get specific=" 4139 "%s xprt=%lld\n", iosvc->ldom_name, xp); 4140 } 4141 /* 4142 * pack the etm msg for the DS library and enq in io_svc->Q 4143 * when the hdrp is NULL, the packing func will use the static 4144 * iosvc_hdr 4145 */ 4146 (void) etm_pack_ds_msg(fmd_hdl, iosvc, NULL, 0, nvl, msg_type, 4147 ETM_CKPT_NOOP); 4148 } 4149 4150 return (FMD_SEND_SUCCESS); 4151 4152 } /* etm_send() */ 4153 4154 4155 4156 /* 4157 * _fmd_fini - stop the server daemon and teardown the transport 4158 */ 4159 4160 void 4161 _fmd_fini(fmd_hdl_t *hdl) 4162 { 4163 ssize_t n; /* gen use */ 4164 etm_iosvc_t *iosvc; /* ptr to insvc struct */ 4165 etm_iosvc_q_ele_t msg_ele; /* iosvc msg ele */ 4166 uint32_t i; /* for loop var */ 4167 4168 fmd_hdl_debug(hdl, "info: module finalizing\n"); 4169 4170 /* kill the connection server and responder ; wait for them to die */ 4171 4172 etm_is_dying = 1; 4173 4174 if (etm_svr_tid != NULL) { 4175 fmd_thr_signal(hdl, etm_svr_tid); 4176 fmd_thr_destroy(hdl, etm_svr_tid); 4177 etm_svr_tid = NULL; 4178 } /* if server thread was successfully created */ 4179 4180 if (etm_resp_tid != NULL) { 4181 fmd_thr_signal(hdl, etm_resp_tid); 4182 fmd_thr_destroy(hdl, etm_resp_tid); 4183 etm_resp_tid = NULL; 4184 } /* if responder thread was successfully created */ 4185 4186 if (etm_async_e_tid != NULL) { 4187 fmd_thr_signal(hdl, etm_async_e_tid); 4188 fmd_thr_destroy(hdl, etm_async_e_tid); 4189 etm_async_e_tid = NULL; 4190 } /* if async event handler thread was successfully created */ 4191 4192 4193 if ((etm_ldom_type == LDOM_TYPE_LEGACY) || 4194 (etm_ldom_type == LDOM_TYPE_CONTROL)) { 4195 4196 /* teardown the transport and cleanup syslogging */ 4197 if ((n = etm_xport_fini(hdl)) != 0) { 4198 fmd_hdl_error(hdl, "warning: xport fini errno %d\n", 4199 (-n)); 4200 } 4201 if (etm_fmd_xprt != NULL) { 4202 fmd_xprt_close(hdl, etm_fmd_xprt); 4203 } 4204 4205 if (syslog_logfd != -1) { 4206 (void) close(syslog_logfd); 4207 } 4208 if (syslog_msgfd != -1) { 4209 (void) close(syslog_msgfd); 4210 } 4211 } 4212 4213 if (etm_ldom_type == LDOM_TYPE_CONTROL) { 4214 if (ldom_unregister_event(etm_lhp)) 4215 fmd_hdl_debug(hdl, "ldom_unregister_event() failed\n"); 4216 4217 /* 4218 * on control side, need to go thru every iosvc struct to 4219 * 1) process remaining events in the iosvc Q: 4220 * for plan A: 4221 * discard remaining events in the Q/free the memory, 4222 * since fmd_xprt_log() already logged in Control D's FMD 4223 * 2) unregister the ds_hdl if valid 4224 * 3) close the fmd_xprt if it has not been closed 4225 */ 4226 for (i = 0; i < NUM_OF_ROOT_DOMAINS; i++) { 4227 if (iosvc_list[i].ldom_name[0] != '\0') { 4228 /* 4229 * found an iosvc struct for a root domain 4230 */ 4231 iosvc = &iosvc_list[i]; 4232 (void) pthread_mutex_lock(&iosvc_list_lock); 4233 etm_iosvc_cleanup(hdl, iosvc, B_TRUE); 4234 (void) pthread_mutex_unlock(&iosvc_list_lock); 4235 4236 } else { 4237 /* 4238 * reach the end of existing iosvc structures 4239 */ 4240 continue; 4241 } 4242 } /* for i<NUM_OF_ROOT_DOMAINS */ 4243 etm_ckpt_fini(hdl); 4244 etm_filter_fini(hdl); 4245 4246 ldom_fini(etm_lhp); 4247 4248 } else if (etm_ldom_type == LDOM_TYPE_ROOT) { 4249 iosvc = &io_svc; 4250 if (iosvc->send_tid != NULL) { 4251 fmd_thr_signal(hdl, iosvc->send_tid); 4252 fmd_thr_destroy(hdl, iosvc->send_tid); 4253 iosvc->send_tid = NULL; 4254 } /* if io svc send thread was successfully created */ 4255 4256 if (iosvc->recv_tid != NULL) { 4257 fmd_thr_signal(hdl, iosvc->recv_tid); 4258 fmd_thr_destroy(hdl, iosvc->recv_tid); 4259 iosvc->recv_tid = NULL; 4260 } /* if io svc receive thread was successfully created */ 4261 4262 (void) pthread_mutex_lock(&iosvc->msg_q_lock); 4263 while (iosvc->msg_q_cur_len > 0) { 4264 (void) etm_iosvc_msg_deq(hdl, iosvc, &msg_ele); 4265 fmd_hdl_free(hdl, msg_ele.msg, msg_ele.msg_size); 4266 } 4267 (void) pthread_mutex_unlock(&iosvc->msg_q_lock); 4268 4269 if (iosvc->fmd_xprt != NULL) 4270 fmd_xprt_close(hdl, iosvc->fmd_xprt); 4271 ldom_fini(etm_lhp); 4272 } 4273 if (etm_ds_fini) { 4274 (*etm_ds_fini)(); 4275 (void) dlclose(etm_dl_hdl); 4276 } 4277 4278 fmd_hdl_debug(hdl, "info: module finalized ok\n"); 4279 4280 } /* _fmd_fini() */ 4281