1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * etm.c FMA Event Transport Module implementation, a plugin of FMD 29 * for sun4v/Ontario 30 * 31 * plugin for sending/receiving FMA events to/from service processor 32 */ 33 34 #pragma ident "%Z%%M% %I% %E% SMI" 35 36 /* 37 * --------------------------------- includes -------------------------------- 38 */ 39 40 #include <sys/fm/protocol.h> 41 #include <sys/fm/util.h> 42 #include <netinet/in.h> 43 #include <fm/fmd_api.h> 44 #include <sys/fm/ldom.h> 45 #include <sys/strlog.h> 46 #include <sys/syslog.h> 47 48 #include "etm_xport_api.h" 49 #include "etm_etm_proto.h" 50 #include "etm_impl.h" 51 52 #include <pthread.h> 53 #include <signal.h> 54 #include <stropts.h> 55 #include <locale.h> 56 #include <strings.h> 57 #include <stdlib.h> 58 #include <unistd.h> 59 #include <limits.h> 60 #include <values.h> 61 #include <alloca.h> 62 #include <errno.h> 63 #include <fcntl.h> 64 #include <time.h> 65 66 67 /* 68 * ----------------------------- forward decls ------------------------------- 69 */ 70 71 static void 72 etm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class); 73 74 /* 75 * ------------------------- data structs for FMD ---------------------------- 76 */ 77 78 static const fmd_hdl_ops_t fmd_ops = { 79 etm_recv, /* fmdo_recv */ 80 NULL, /* fmdo_timeout */ 81 NULL, /* fmdo_close */ 82 NULL, /* fmdo_stats */ 83 NULL, /* fmdo_gc */ 84 NULL, /* fmdo_send */ 85 }; 86 87 static const fmd_prop_t fmd_props[] = { 88 { ETM_PROP_NM_XPORT_ADDRS, FMD_TYPE_STRING, "" }, 89 { ETM_PROP_NM_DEBUG_LVL, FMD_TYPE_INT32, "0" }, 90 { ETM_PROP_NM_DEBUG_MAX_EV_CNT, FMD_TYPE_INT32, "-1" }, 91 { ETM_PROP_NM_CONSOLE, FMD_TYPE_BOOL, "false" }, 92 { ETM_PROP_NM_SYSLOGD, FMD_TYPE_BOOL, "true" }, 93 { ETM_PROP_NM_FACILITY, FMD_TYPE_STRING, "LOG_DAEMON" }, 94 { NULL, 0, NULL } 95 }; 96 97 98 static const fmd_hdl_info_t fmd_info = { 99 "FMA Event Transport Module", "1.1", &fmd_ops, fmd_props 100 }; 101 102 /* 103 * ----------------------- private consts and defns -------------------------- 104 */ 105 106 /* misc buffer for variable sized protocol header fields */ 107 108 #define ETM_MISC_BUF_SZ (4 * 1024) 109 110 /* try limit for IO operations w/ capped exp backoff sleep on retry */ 111 112 /* 113 * Design_Note: ETM will potentially retry forever IO operations that the 114 * transport fails with EAGAIN (aka EWOULDBLOCK) rather than 115 * giving up after some number of seconds. This avoids 116 * dropping FMA events while the service processor is down, 117 * but at the risk of pending fmdo_recv() forever and 118 * overflowing FMD's event queue for ETM. 119 * A future TBD enhancement would be to always recv 120 * and send each ETM msg in a single read/write() to reduce 121 * the risk of failure between ETM msg hdr and body, 122 * assuming the MTU_SZ is large enough. 123 */ 124 125 #define ETM_TRY_MAX_CNT (MAXINT - 1) 126 #define ETM_TRY_BACKOFF_RATE (4) 127 #define ETM_TRY_BACKOFF_CAP (60) 128 129 /* amount to increment protocol transaction id on each new send */ 130 131 #define ETM_XID_INC (2) 132 133 /* 134 * ---------------------------- global data ---------------------------------- 135 */ 136 137 static fmd_hdl_t 138 *init_hdl = NULL; /* used in mem allocator at init time */ 139 140 static int 141 etm_debug_lvl = 0; /* debug level: 0 is off, 1 is on, 2 is more, etc */ 142 143 static int 144 etm_debug_max_ev_cnt = -1; /* max allowed event count for debugging */ 145 146 static fmd_xprt_t 147 *etm_fmd_xprt = NULL; /* FMD transport layer handle */ 148 149 static pthread_t 150 etm_svr_tid = NULL; /* thread id of connection acceptance server */ 151 152 static volatile int 153 etm_is_dying = 0; /* bool for dying (killing self) */ 154 155 static uint32_t 156 etm_xid_cur = 0; /* current transaction id for sends */ 157 158 static uint32_t 159 etm_xid_ping = 0; /* xid of last CONTROL msg sent requesting ping */ 160 161 static uint32_t 162 etm_xid_ver_negot = 0; /* xid of last CONTROL msg sent requesting ver negot */ 163 164 static uint32_t 165 etm_xid_posted_ev = 0; /* xid of last FMA_EVENT msg/event posted OK to FMD */ 166 167 static uint8_t 168 etm_resp_ver = ETM_PROTO_V1; /* proto ver [negotiated] for msg sends */ 169 170 static pthread_mutex_t 171 etm_write_lock = PTHREAD_MUTEX_INITIALIZER; /* for write operations */ 172 173 static log_ctl_t syslog_ctl; /* log(7D) meta-data for each msg */ 174 static int syslog_facility; /* log(7D) facility (part of priority) */ 175 static int syslog_logfd = -1; /* log(7D) file descriptor */ 176 static int syslog_msgfd = -1; /* sysmsg(7D) file descriptor */ 177 static int syslog_file = 0; /* log to syslog_logfd */ 178 static int syslog_cons = 0; /* log to syslog_msgfd */ 179 180 static const struct facility { 181 const char *fac_name; 182 int fac_value; 183 } syslog_facs[] = { 184 { "LOG_DAEMON", LOG_DAEMON }, 185 { "LOG_LOCAL0", LOG_LOCAL0 }, 186 { "LOG_LOCAL1", LOG_LOCAL1 }, 187 { "LOG_LOCAL2", LOG_LOCAL2 }, 188 { "LOG_LOCAL3", LOG_LOCAL3 }, 189 { "LOG_LOCAL4", LOG_LOCAL4 }, 190 { "LOG_LOCAL5", LOG_LOCAL5 }, 191 { "LOG_LOCAL6", LOG_LOCAL6 }, 192 { "LOG_LOCAL7", LOG_LOCAL7 }, 193 { NULL, 0 } 194 }; 195 196 static struct stats { 197 198 /* ETM msg counters */ 199 200 fmd_stat_t etm_rd_hdr_fmaevent; 201 fmd_stat_t etm_rd_hdr_control; 202 fmd_stat_t etm_rd_hdr_alert; 203 fmd_stat_t etm_rd_hdr_response; 204 fmd_stat_t etm_rd_body_fmaevent; 205 fmd_stat_t etm_rd_body_control; 206 fmd_stat_t etm_rd_body_alert; 207 fmd_stat_t etm_rd_body_response; 208 fmd_stat_t etm_wr_hdr_fmaevent; 209 fmd_stat_t etm_wr_hdr_control; 210 fmd_stat_t etm_wr_hdr_response; 211 fmd_stat_t etm_wr_body_fmaevent; 212 fmd_stat_t etm_wr_body_control; 213 fmd_stat_t etm_wr_body_response; 214 215 /* ETM byte counters */ 216 217 fmd_stat_t etm_wr_fmd_bytes; 218 fmd_stat_t etm_rd_fmd_bytes; 219 fmd_stat_t etm_wr_xport_bytes; 220 fmd_stat_t etm_rd_xport_bytes; 221 222 fmd_stat_t etm_magic_drop_bytes; 223 224 /* ETM [dropped] FMA event counters */ 225 226 fmd_stat_t etm_rd_fmd_fmaevent; 227 fmd_stat_t etm_wr_fmd_fmaevent; 228 229 fmd_stat_t etm_rd_drop_fmaevent; 230 fmd_stat_t etm_wr_drop_fmaevent; 231 232 fmd_stat_t etm_rd_dup_fmaevent; 233 fmd_stat_t etm_wr_dup_fmaevent; 234 235 /* ETM protocol failures */ 236 237 fmd_stat_t etm_magic_bad; 238 fmd_stat_t etm_ver_bad; 239 fmd_stat_t etm_msgtype_bad; 240 fmd_stat_t etm_subtype_bad; 241 fmd_stat_t etm_xid_bad; 242 fmd_stat_t etm_fmaeventlen_bad; 243 fmd_stat_t etm_respcode_bad; 244 fmd_stat_t etm_timeout_bad; 245 fmd_stat_t etm_evlens_bad; 246 247 /* IO operation failures */ 248 249 fmd_stat_t etm_xport_wr_fail; 250 fmd_stat_t etm_xport_rd_fail; 251 fmd_stat_t etm_xport_pk_fail; 252 253 /* IO operation retries */ 254 255 fmd_stat_t etm_xport_wr_retry; 256 fmd_stat_t etm_xport_rd_retry; 257 fmd_stat_t etm_xport_pk_retry; 258 259 /* system and library failures */ 260 261 fmd_stat_t etm_os_nvlist_pack_fail; 262 fmd_stat_t etm_os_nvlist_unpack_fail; 263 fmd_stat_t etm_os_nvlist_size_fail; 264 fmd_stat_t etm_os_pthread_create_fail; 265 266 /* xport API failures */ 267 268 fmd_stat_t etm_xport_get_ev_addrv_fail; 269 fmd_stat_t etm_xport_open_fail; 270 fmd_stat_t etm_xport_close_fail; 271 fmd_stat_t etm_xport_accept_fail; 272 fmd_stat_t etm_xport_open_retry; 273 274 /* FMD entry point bad arguments */ 275 276 fmd_stat_t etm_fmd_recv_badargs; 277 fmd_stat_t etm_fmd_init_badargs; 278 fmd_stat_t etm_fmd_fini_badargs; 279 280 /* Alert logging errors */ 281 fmd_stat_t etm_log_err; 282 fmd_stat_t etm_msg_err; 283 284 } etm_stats = { 285 286 /* ETM msg counters */ 287 288 { "etm_rd_hdr_fmaevent", FMD_TYPE_UINT64, 289 "ETM fmaevent msg headers rcvd from xport" }, 290 { "etm_rd_hdr_control", FMD_TYPE_UINT64, 291 "ETM control msg headers rcvd from xport" }, 292 { "etm_rd_hdr_alert", FMD_TYPE_UINT64, 293 "ETM alert msg headers rcvd from xport" }, 294 { "etm_rd_hdr_response", FMD_TYPE_UINT64, 295 "ETM response msg headers rcvd from xport" }, 296 { "etm_rd_body_fmaevent", FMD_TYPE_UINT64, 297 "ETM fmaevent msg bodies rcvd from xport" }, 298 { "etm_rd_body_control", FMD_TYPE_UINT64, 299 "ETM control msg bodies rcvd from xport" }, 300 { "etm_rd_body_alert", FMD_TYPE_UINT64, 301 "ETM alert msg bodies rcvd from xport" }, 302 { "etm_rd_body_response", FMD_TYPE_UINT64, 303 "ETM response msg bodies rcvd from xport" }, 304 { "etm_wr_hdr_fmaevent", FMD_TYPE_UINT64, 305 "ETM fmaevent msg headers sent to xport" }, 306 { "etm_wr_hdr_control", FMD_TYPE_UINT64, 307 "ETM control msg headers sent to xport" }, 308 { "etm_wr_hdr_response", FMD_TYPE_UINT64, 309 "ETM response msg headers sent to xport" }, 310 { "etm_wr_body_fmaevent", FMD_TYPE_UINT64, 311 "ETM fmaevent msg bodies sent to xport" }, 312 { "etm_wr_body_control", FMD_TYPE_UINT64, 313 "ETM control msg bodies sent to xport" }, 314 { "etm_wr_body_response", FMD_TYPE_UINT64, 315 "ETM response msg bodies sent to xport" }, 316 317 /* ETM byte counters */ 318 319 { "etm_wr_fmd_bytes", FMD_TYPE_UINT64, 320 "bytes of FMA events sent to FMD" }, 321 { "etm_rd_fmd_bytes", FMD_TYPE_UINT64, 322 "bytes of FMA events rcvd from FMD" }, 323 { "etm_wr_xport_bytes", FMD_TYPE_UINT64, 324 "bytes of FMA events sent to xport" }, 325 { "etm_rd_xport_bytes", FMD_TYPE_UINT64, 326 "bytes of FMA events rcvd from xport" }, 327 328 { "etm_magic_drop_bytes", FMD_TYPE_UINT64, 329 "bytes dropped from xport pre magic num" }, 330 331 /* ETM [dropped] FMA event counters */ 332 333 { "etm_rd_fmd_fmaevent", FMD_TYPE_UINT64, 334 "FMA events rcvd from FMD" }, 335 { "etm_wr_fmd_fmaevent", FMD_TYPE_UINT64, 336 "FMA events sent to FMD" }, 337 338 { "etm_rd_drop_fmaevent", FMD_TYPE_UINT64, 339 "dropped FMA events from xport" }, 340 { "etm_wr_drop_fmaevent", FMD_TYPE_UINT64, 341 "dropped FMA events to xport" }, 342 343 { "etm_rd_dup_fmaevent", FMD_TYPE_UINT64, 344 "duplicate FMA events from xport" }, 345 { "etm_wr_dup_fmaevent", FMD_TYPE_UINT64, 346 "duplicate FMA events to xport" }, 347 348 /* ETM protocol failures */ 349 350 { "etm_magic_bad", FMD_TYPE_UINT64, 351 "ETM msgs w/ invalid magic num" }, 352 { "etm_ver_bad", FMD_TYPE_UINT64, 353 "ETM msgs w/ invalid protocol version" }, 354 { "etm_msgtype_bad", FMD_TYPE_UINT64, 355 "ETM msgs w/ invalid message type" }, 356 { "etm_subtype_bad", FMD_TYPE_UINT64, 357 "ETM msgs w/ invalid sub type" }, 358 { "etm_xid_bad", FMD_TYPE_UINT64, 359 "ETM msgs w/ unmatched xid" }, 360 { "etm_fmaeventlen_bad", FMD_TYPE_UINT64, 361 "ETM msgs w/ invalid FMA event length" }, 362 { "etm_respcode_bad", FMD_TYPE_UINT64, 363 "ETM msgs w/ invalid response code" }, 364 { "etm_timeout_bad", FMD_TYPE_UINT64, 365 "ETM msgs w/ invalid timeout value" }, 366 { "etm_evlens_bad", FMD_TYPE_UINT64, 367 "ETM msgs w/ too many event lengths" }, 368 369 /* IO operation failures */ 370 371 { "etm_xport_wr_fail", FMD_TYPE_UINT64, 372 "xport write failures" }, 373 { "etm_xport_rd_fail", FMD_TYPE_UINT64, 374 "xport read failures" }, 375 { "etm_xport_pk_fail", FMD_TYPE_UINT64, 376 "xport peek failures" }, 377 378 /* IO operation retries */ 379 380 { "etm_xport_wr_retry", FMD_TYPE_UINT64, 381 "xport write retries" }, 382 { "etm_xport_rd_retry", FMD_TYPE_UINT64, 383 "xport read retries" }, 384 { "etm_xport_pk_retry", FMD_TYPE_UINT64, 385 "xport peek retries" }, 386 387 /* system and library failures */ 388 389 { "etm_os_nvlist_pack_fail", FMD_TYPE_UINT64, 390 "nvlist_pack failures" }, 391 { "etm_os_nvlist_unpack_fail", FMD_TYPE_UINT64, 392 "nvlist_unpack failures" }, 393 { "etm_os_nvlist_size_fail", FMD_TYPE_UINT64, 394 "nvlist_size failures" }, 395 { "etm_os_pthread_create_fail", FMD_TYPE_UINT64, 396 "pthread_create failures" }, 397 398 /* transport API failures */ 399 400 { "etm_xport_get_ev_addrv_fail", FMD_TYPE_UINT64, 401 "xport get event addrv API failures" }, 402 { "etm_xport_open_fail", FMD_TYPE_UINT64, 403 "xport open API failures" }, 404 { "etm_xport_close_fail", FMD_TYPE_UINT64, 405 "xport close API failures" }, 406 { "etm_xport_accept_fail", FMD_TYPE_UINT64, 407 "xport accept API failures" }, 408 { "etm_xport_open_retry", FMD_TYPE_UINT64, 409 "xport open API retries" }, 410 411 /* FMD entry point bad arguments */ 412 413 { "etm_fmd_recv_badargs", FMD_TYPE_UINT64, 414 "bad arguments from fmd_recv entry point" }, 415 { "etm_fmd_init_badargs", FMD_TYPE_UINT64, 416 "bad arguments from fmd_init entry point" }, 417 { "etm_fmd_fini_badargs", FMD_TYPE_UINT64, 418 "bad arguments from fmd_fini entry point" }, 419 420 /* Alert logging errors */ 421 { "etm_log_err", FMD_TYPE_UINT64, 422 "failed to log message to log(7D)" }, 423 { "etm_msg_err", FMD_TYPE_UINT64, 424 "failed to log message to sysmsg(7D)" } 425 }; 426 427 /* 428 * -------------------------- support functions ------------------------------ 429 */ 430 431 /* 432 * Design_Note: Each failure worth reporting to FMD should be done using 433 * a single call to fmd_hdl_error() as it logs an FMA event 434 * for each call. Also be aware that all the fmd_hdl_*() 435 * format strings currently use platform specific *printf() 436 * routines; so "%p" under Solaris does not prepend "0x" to 437 * the outputted hex digits, while Linux and VxWorks do. 438 */ 439 440 /* 441 * etm_show_time - display the current time of day (for debugging) using 442 * the given FMD module handle and annotation string 443 */ 444 445 static void 446 etm_show_time(fmd_hdl_t *hdl, char *note_str) 447 { 448 struct timeval tmv; /* timeval */ 449 450 (void) gettimeofday(&tmv, NULL); 451 fmd_hdl_debug(hdl, "info: %s: cur Unix Epoch time %d.%06d\n", 452 note_str, tmv.tv_sec, tmv.tv_usec); 453 454 } /* etm_show_time() */ 455 456 /* 457 * etm_hexdump - hexdump the given buffer (for debugging) using 458 * the given FMD module handle 459 */ 460 461 static void 462 etm_hexdump(fmd_hdl_t *hdl, void *buf, size_t byte_cnt) 463 { 464 uint8_t *bp; /* byte ptr */ 465 int i, j; /* index */ 466 char cb[80]; /* char buf */ 467 unsigned int n; /* a byte of data for sprintf() */ 468 469 bp = buf; 470 j = 0; 471 472 /* 473 * Design_Note: fmd_hdl_debug() auto adds a newline if missing; 474 * hence cb exists to accumulate a longer string. 475 */ 476 477 for (i = 1; i <= byte_cnt; i++) { 478 n = *bp++; 479 (void) sprintf(&cb[j], "%2.2x ", n); 480 j += 3; 481 /* add a newline every 16 bytes or at the buffer's end */ 482 if (((i % 16) == 0) || (i >= byte_cnt)) { 483 cb[j-1] = '\0'; 484 fmd_hdl_debug(hdl, "%s\n", cb); 485 j = 0; 486 } 487 } /* for each byte in the buffer */ 488 489 } /* etm_hexdump() */ 490 491 /* 492 * etm_sleep - sleep the caller for the given number of seconds, 493 * return 0 or -errno value 494 * 495 * Design_Note: To avoid interfering with FMD's signal mask (SIGALRM) 496 * do not use [Solaris] sleep(3C) and instead use 497 * pthread_cond_wait() or nanosleep(), both of which 498 * are POSIX spec-ed to leave signal masks alone. 499 * This is needed for Solaris and Linux (domain and SP). 500 */ 501 502 static int 503 etm_sleep(unsigned sleep_sec) 504 { 505 struct timespec tms; /* for nanosleep() */ 506 507 tms.tv_sec = sleep_sec; 508 tms.tv_nsec = 0; 509 510 if (nanosleep(&tms, NULL) < 0) { 511 /* errno assumed set by above call */ 512 return (-errno); 513 } 514 return (0); 515 516 } /* etm_sleep() */ 517 518 /* 519 * etm_conn_open - open a connection to the given transport address, 520 * return 0 and the opened connection handle 521 * or -errno value 522 * 523 * caveats: the err_substr is used in failure cases for calling 524 * fmd_hdl_error() 525 */ 526 527 static int 528 etm_conn_open(fmd_hdl_t *hdl, char *err_substr, 529 etm_xport_addr_t addr, etm_xport_conn_t *connp) 530 { 531 etm_xport_conn_t conn; /* connection to return */ 532 int nev; /* -errno value */ 533 534 if ((conn = etm_xport_open(hdl, addr)) == NULL) { 535 nev = (-errno); 536 fmd_hdl_error(hdl, "error: %s: errno %d\n", 537 err_substr, errno); 538 etm_stats.etm_xport_open_fail.fmds_value.ui64++; 539 return (nev); 540 } else { 541 *connp = conn; 542 return (0); 543 } 544 } /* etm_conn_open() */ 545 546 /* 547 * etm_conn_close - close the given connection, 548 * return 0 or -errno value 549 * 550 * caveats: the err_substr is used in failure cases for calling 551 * fmd_hdl_error() 552 */ 553 554 static int 555 etm_conn_close(fmd_hdl_t *hdl, char *err_substr, etm_xport_conn_t conn) 556 { 557 int nev; /* -errno value */ 558 559 if (etm_xport_close(hdl, conn) == NULL) { 560 nev = (-errno); 561 fmd_hdl_error(hdl, "warning: %s: errno %d\n", 562 err_substr, errno); 563 etm_stats.etm_xport_close_fail.fmds_value.ui64++; 564 return (nev); 565 } else { 566 return (0); 567 } 568 } /* etm_conn_close() */ 569 570 /* 571 * etm_io_op - perform an IO operation on the given connection 572 * with the given buffer, 573 * accommodating MTU size and retrying op if needed, 574 * return how many bytes actually done by the op 575 * or -errno value 576 * 577 * caveats: the err_substr is used in failure cases for calling 578 * fmd_hdl_error() 579 */ 580 581 static ssize_t 582 etm_io_op(fmd_hdl_t *hdl, char *err_substr, etm_xport_conn_t conn, 583 void *buf, size_t byte_cnt, int io_op) 584 { 585 ssize_t rv; /* ret val / byte count */ 586 ssize_t n; /* gen use */ 587 uint8_t *datap; /* ptr to data */ 588 size_t mtu_sz; /* MTU size in bytes */ 589 int (*io_func_ptr)(fmd_hdl_t *, etm_xport_conn_t, 590 void *, size_t); 591 size_t io_sz; /* byte count for io_func_ptr */ 592 int try_cnt; /* number of tries done */ 593 int sleep_sec; /* exp backoff sleep period in sec */ 594 int sleep_rv; /* ret val from sleeping */ 595 fmd_stat_t io_retry_stat; /* IO retry stat to update */ 596 fmd_stat_t io_fail_stat; /* IO failure stat to update */ 597 598 if ((conn == NULL) || (buf == NULL)) { 599 return (-EINVAL); 600 } 601 switch (io_op) { 602 case ETM_IO_OP_RD: 603 io_func_ptr = etm_xport_read; 604 io_retry_stat = etm_stats.etm_xport_rd_retry; 605 io_fail_stat = etm_stats.etm_xport_rd_fail; 606 break; 607 case ETM_IO_OP_WR: 608 io_func_ptr = etm_xport_write; 609 io_retry_stat = etm_stats.etm_xport_wr_retry; 610 io_fail_stat = etm_stats.etm_xport_wr_fail; 611 break; 612 default: 613 return (-EINVAL); 614 } 615 if (byte_cnt == 0) { 616 return (byte_cnt); /* nop */ 617 } 618 619 /* obtain [current] MTU size */ 620 621 if ((n = etm_xport_get_opt(hdl, conn, ETM_XPORT_OPT_MTU_SZ)) < 0) { 622 mtu_sz = ETM_XPORT_MTU_SZ_DEF; 623 } else { 624 mtu_sz = n; 625 } 626 627 /* loop until all IO done, try limit exceeded, or real failure */ 628 629 rv = 0; 630 datap = buf; 631 while (rv < byte_cnt) { 632 io_sz = MIN((byte_cnt - rv), mtu_sz); 633 try_cnt = 0; 634 sleep_sec = 0; 635 636 /* when give up, return -errno value even if partly done */ 637 638 while ((n = (*io_func_ptr)(hdl, conn, datap, io_sz)) == 639 (-EAGAIN)) { 640 try_cnt++; 641 if (try_cnt > ETM_TRY_MAX_CNT) { 642 rv = n; 643 goto func_ret; 644 } 645 if (etm_is_dying) { 646 rv = (-EINTR); 647 goto func_ret; 648 } 649 if ((sleep_rv = etm_sleep(sleep_sec)) < 0) { 650 rv = sleep_rv; 651 goto func_ret; 652 } 653 sleep_sec = ((sleep_sec == 0) ? 1 : 654 (sleep_sec * ETM_TRY_BACKOFF_RATE)); 655 sleep_sec = MIN(sleep_sec, ETM_TRY_BACKOFF_CAP); 656 io_retry_stat.fmds_value.ui64++; 657 if (etm_debug_lvl >= 1) { 658 fmd_hdl_debug(hdl, "info: retrying io op %d " 659 "due to EAGAIN\n", io_op); 660 } 661 } /* while trying the io operation */ 662 663 if (etm_is_dying) { 664 rv = (-EINTR); 665 goto func_ret; 666 } 667 if (n < 0) { 668 rv = n; 669 goto func_ret; 670 } 671 /* avoid spinning CPU when given 0 bytes but no error */ 672 if (n == 0) { 673 if ((sleep_rv = etm_sleep(ETM_SLEEP_QUIK)) < 0) { 674 rv = sleep_rv; 675 goto func_ret; 676 } 677 } 678 rv += n; 679 datap += n; 680 } /* while still have more data */ 681 682 func_ret: 683 684 if (rv < 0) { 685 io_fail_stat.fmds_value.ui64++; 686 fmd_hdl_debug(hdl, "error: %s: errno %d\n", 687 err_substr, (int)(-rv)); 688 } 689 if (etm_debug_lvl >= 3) { 690 fmd_hdl_debug(hdl, "info: io op %d ret %d of %d\n", 691 io_op, (int)rv, (int)byte_cnt); 692 } 693 return (rv); 694 695 } /* etm_io_op() */ 696 697 /* 698 * etm_magic_read - read the magic number of an ETM message header 699 * from the given connection into the given buffer, 700 * return 0 or -errno value 701 * 702 * Design_Note: This routine is intended to help protect ETM from protocol 703 * framing errors as might be caused by an SP reset / crash in 704 * the middle of an ETM message send; the connection will be 705 * read from for as many bytes as needed until the magic number 706 * is found using a sliding buffer for comparisons. 707 */ 708 709 static int 710 etm_magic_read(fmd_hdl_t *hdl, etm_xport_conn_t conn, uint32_t *magic_ptr) 711 { 712 int rv; /* ret val */ 713 uint32_t magic_num; /* magic number */ 714 int byte_cnt; /* count of bytes read */ 715 uint8_t buf5[4+1]; /* sliding input buffer */ 716 int i, j; /* indices into buf5 */ 717 ssize_t n; /* gen use */ 718 uint8_t drop_buf[1024]; /* dropped bytes buffer */ 719 720 rv = 0; /* assume success */ 721 magic_num = 0; 722 byte_cnt = 0; 723 j = 0; 724 725 /* magic number bytes are sent in network (big endian) order */ 726 727 while (magic_num != ETM_PROTO_MAGIC_NUM) { 728 if ((n = etm_io_op(hdl, "bad io read on magic", 729 conn, &buf5[j], 1, ETM_IO_OP_RD)) < 0) { 730 rv = n; 731 goto func_ret; 732 } 733 byte_cnt++; 734 j = MIN((j + 1), sizeof (magic_num)); 735 if (byte_cnt < sizeof (magic_num)) { 736 continue; 737 } 738 739 if (byte_cnt > sizeof (magic_num)) { 740 etm_stats.etm_magic_drop_bytes.fmds_value.ui64++; 741 i = MIN(byte_cnt - j - 1, sizeof (drop_buf) - 1); 742 drop_buf[i] = buf5[0]; 743 for (i = 0; i < j; i++) { 744 buf5[i] = buf5[i+1]; 745 } /* for sliding the buffer contents */ 746 } 747 (void) memcpy(&magic_num, &buf5[0], sizeof (magic_num)); 748 magic_num = ntohl(magic_num); 749 } /* for reading bytes until find magic number */ 750 751 func_ret: 752 753 if (byte_cnt != sizeof (magic_num)) { 754 fmd_hdl_debug(hdl, "warning: bad proto frame " 755 "implies corrupt/lost msg(s)\n"); 756 } 757 if ((byte_cnt > sizeof (magic_num)) && (etm_debug_lvl >= 2)) { 758 i = MIN(byte_cnt - sizeof (magic_num), sizeof (drop_buf)); 759 fmd_hdl_debug(hdl, "info: magic drop hexdump " 760 "first %d of %d bytes:\n", 761 i, byte_cnt - sizeof (magic_num)); 762 etm_hexdump(hdl, drop_buf, i); 763 } 764 765 if (rv == 0) { 766 *magic_ptr = magic_num; 767 } 768 return (rv); 769 770 } /* etm_magic_read() */ 771 772 /* 773 * etm_hdr_read - allocate, read, and validate a [variable sized] 774 * ETM message header from the given connection, 775 * return the allocated ETM message header 776 * (which is guaranteed to be large enough to reuse as a 777 * RESPONSE msg hdr) and its size 778 * or NULL and set errno on failure 779 */ 780 781 static void * 782 etm_hdr_read(fmd_hdl_t *hdl, etm_xport_conn_t conn, size_t *szp) 783 { 784 uint8_t *hdrp; /* ptr to header to return */ 785 size_t hdr_sz; /* sizeof *hdrp */ 786 etm_proto_v1_pp_t pp; /* protocol preamble */ 787 etm_proto_v1_ev_hdr_t *ev_hdrp; /* for FMA_EVENT msg */ 788 etm_proto_v1_ctl_hdr_t *ctl_hdrp; /* for CONTROL msg */ 789 etm_proto_v1_resp_hdr_t *resp_hdrp; /* for RESPONSE msg */ 790 etm_proto_v3_sa_hdr_t *sa_hdrp; /* for ALERT msg */ 791 uint32_t *lenp; /* ptr to FMA event length */ 792 ssize_t i, n; /* gen use */ 793 uint8_t misc_buf[ETM_MISC_BUF_SZ]; /* for var sized hdrs */ 794 int dummy_int; /* dummy var to appease lint */ 795 796 hdrp = NULL; hdr_sz = 0; 797 798 /* read the magic number which starts the protocol preamble */ 799 800 if ((n = etm_magic_read(hdl, conn, &pp.pp_magic_num)) < 0) { 801 errno = (-n); 802 etm_stats.etm_magic_bad.fmds_value.ui64++; 803 return (NULL); 804 } 805 806 /* read the rest of the protocol preamble all at once */ 807 808 if ((n = etm_io_op(hdl, "bad io read on preamble", 809 conn, &pp.pp_proto_ver, 810 sizeof (pp) - sizeof (pp.pp_magic_num), 811 ETM_IO_OP_RD)) < 0) { 812 errno = (-n); 813 return (NULL); 814 } 815 816 /* 817 * Design_Note: The magic number was already network decoded; but 818 * some other preamble fields also need to be decoded, 819 * specifically pp_xid and pp_timeout. The rest of the 820 * preamble fields are byte sized and hence need no 821 * decoding. 822 */ 823 824 pp.pp_xid = ntohl(pp.pp_xid); 825 pp.pp_timeout = ntohl(pp.pp_timeout); 826 827 /* sanity check the header as best we can */ 828 829 if ((pp.pp_proto_ver < ETM_PROTO_V1) || 830 (pp.pp_proto_ver > ETM_PROTO_V3)) { 831 fmd_hdl_error(hdl, "error: bad proto ver %d\n", 832 (int)pp.pp_proto_ver); 833 errno = EPROTO; 834 etm_stats.etm_ver_bad.fmds_value.ui64++; 835 return (NULL); 836 } 837 838 dummy_int = pp.pp_msg_type; 839 if ((dummy_int <= ETM_MSG_TYPE_TOO_LOW) || 840 (dummy_int >= ETM_MSG_TYPE_TOO_BIG)) { 841 fmd_hdl_error(hdl, "error: bad msg type %d", dummy_int); 842 errno = EBADMSG; 843 etm_stats.etm_msgtype_bad.fmds_value.ui64++; 844 return (NULL); 845 } 846 847 /* handle [var sized] hdrs for FMA_EVENT, CONTROL, RESPONSE msgs */ 848 849 if (pp.pp_msg_type == ETM_MSG_TYPE_FMA_EVENT) { 850 851 ev_hdrp = (void*)&misc_buf[0]; 852 hdr_sz = sizeof (*ev_hdrp); 853 (void) memcpy(&ev_hdrp->ev_pp, &pp, sizeof (pp)); 854 855 /* sanity check the header's timeout */ 856 857 if ((ev_hdrp->ev_pp.pp_proto_ver == ETM_PROTO_V1) && 858 (ev_hdrp->ev_pp.pp_timeout != ETM_PROTO_V1_TIMEOUT_NONE)) { 859 errno = ETIME; 860 etm_stats.etm_timeout_bad.fmds_value.ui64++; 861 return (NULL); 862 } 863 864 /* get all FMA event lengths from the header */ 865 866 lenp = (uint32_t *)&ev_hdrp->ev_lens[0]; lenp--; 867 i = -1; /* cnt of length entries preceding 0 */ 868 do { 869 i++; lenp++; 870 if ((sizeof (*ev_hdrp) + (i * sizeof (*lenp))) >= 871 ETM_MISC_BUF_SZ) { 872 errno = E2BIG; /* ridiculous size */ 873 etm_stats.etm_evlens_bad.fmds_value.ui64++; 874 return (NULL); 875 } 876 if ((n = etm_io_op(hdl, "bad io read on event len", 877 conn, lenp, sizeof (*lenp), 878 ETM_IO_OP_RD)) < 0) { 879 errno = (-n); 880 return (NULL); 881 } 882 *lenp = ntohl(*lenp); 883 884 } while (*lenp != 0); 885 i += 0; /* first len already counted by sizeof(ev_hdr) */ 886 hdr_sz += (i * sizeof (*lenp)); 887 888 etm_stats.etm_rd_hdr_fmaevent.fmds_value.ui64++; 889 890 } else if (pp.pp_msg_type == ETM_MSG_TYPE_CONTROL) { 891 892 ctl_hdrp = (void*)&misc_buf[0]; 893 hdr_sz = sizeof (*ctl_hdrp); 894 (void) memcpy(&ctl_hdrp->ctl_pp, &pp, sizeof (pp)); 895 896 /* sanity check the header's sub type (control selector) */ 897 898 if ((ctl_hdrp->ctl_pp.pp_sub_type <= ETM_CTL_SEL_TOO_LOW) || 899 (ctl_hdrp->ctl_pp.pp_sub_type >= ETM_CTL_SEL_TOO_BIG)) { 900 fmd_hdl_error(hdl, "error: bad ctl sub type %d\n", 901 (int)ctl_hdrp->ctl_pp.pp_sub_type); 902 errno = EBADMSG; 903 etm_stats.etm_subtype_bad.fmds_value.ui64++; 904 return (NULL); 905 } 906 907 /* get the control length */ 908 909 if ((n = etm_io_op(hdl, "bad io read on ctl len", 910 conn, &ctl_hdrp->ctl_len, 911 sizeof (ctl_hdrp->ctl_len), 912 ETM_IO_OP_RD)) < 0) { 913 errno = (-n); 914 return (NULL); 915 } 916 917 ctl_hdrp->ctl_len = ntohl(ctl_hdrp->ctl_len); 918 919 etm_stats.etm_rd_hdr_control.fmds_value.ui64++; 920 921 } else if (pp.pp_msg_type == ETM_MSG_TYPE_RESPONSE) { 922 923 resp_hdrp = (void*)&misc_buf[0]; 924 hdr_sz = sizeof (*resp_hdrp); 925 (void) memcpy(&resp_hdrp->resp_pp, &pp, sizeof (pp)); 926 927 /* sanity check the header's timeout */ 928 929 if (resp_hdrp->resp_pp.pp_timeout != 930 ETM_PROTO_V1_TIMEOUT_NONE) { 931 errno = ETIME; 932 etm_stats.etm_timeout_bad.fmds_value.ui64++; 933 return (NULL); 934 } 935 936 /* get the response code and length */ 937 938 if ((n = etm_io_op(hdl, "bad io read on resp code+len", 939 conn, &resp_hdrp->resp_code, 940 sizeof (resp_hdrp->resp_code) + 941 sizeof (resp_hdrp->resp_len), 942 ETM_IO_OP_RD)) < 0) { 943 errno = (-n); 944 return (NULL); 945 } 946 947 resp_hdrp->resp_code = ntohl(resp_hdrp->resp_code); 948 resp_hdrp->resp_len = ntohl(resp_hdrp->resp_len); 949 950 etm_stats.etm_rd_hdr_response.fmds_value.ui64++; 951 952 } else if (pp.pp_msg_type == ETM_MSG_TYPE_ALERT) { 953 954 sa_hdrp = (void*)&misc_buf[0]; 955 hdr_sz = sizeof (*sa_hdrp); 956 (void) memcpy(&sa_hdrp->sa_pp, &pp, sizeof (pp)); 957 958 /* sanity check the header's protocol version */ 959 960 if (sa_hdrp->sa_pp.pp_proto_ver != ETM_PROTO_V3) { 961 errno = EPROTO; 962 etm_stats.etm_ver_bad.fmds_value.ui64++; 963 return (NULL); 964 } 965 966 /* get the priority and length */ 967 968 if ((n = etm_io_op(hdl, "bad io read on sa priority+len", 969 conn, &sa_hdrp->sa_priority, 970 sizeof (sa_hdrp->sa_priority) + 971 sizeof (sa_hdrp->sa_len), 972 ETM_IO_OP_RD)) < 0) { 973 errno = (-n); 974 return (NULL); 975 } 976 977 sa_hdrp->sa_priority = ntohl(sa_hdrp->sa_priority); 978 sa_hdrp->sa_len = ntohl(sa_hdrp->sa_len); 979 980 etm_stats.etm_rd_hdr_alert.fmds_value.ui64++; 981 982 } /* whether we have FMA_EVENT, ALERT, CONTROL, or RESPONSE msg */ 983 984 /* 985 * choose a header size that allows hdr reuse for RESPONSE msgs, 986 * allocate and populate the message header, and 987 * return alloc size to caller for later free of hdrp 988 */ 989 990 hdr_sz = MAX(hdr_sz, sizeof (*resp_hdrp)); 991 hdrp = fmd_hdl_zalloc(hdl, hdr_sz, FMD_SLEEP); 992 (void) memcpy(hdrp, misc_buf, hdr_sz); 993 994 if (etm_debug_lvl >= 3) { 995 fmd_hdl_debug(hdl, "info: msg hdr hexdump %d bytes:\n", 996 hdr_sz); 997 etm_hexdump(hdl, hdrp, hdr_sz); 998 } 999 *szp = hdr_sz; 1000 return (hdrp); 1001 1002 } /* etm_hdr_read() */ 1003 1004 /* 1005 * etm_hdr_write - create and write a [variable sized] ETM message header 1006 * to the given connection appropriate for the given FMA event 1007 * and type of nvlist encoding, 1008 * return the allocated ETM message header and its size 1009 * or NULL and set errno on failure 1010 */ 1011 1012 static void* 1013 etm_hdr_write(fmd_hdl_t *hdl, etm_xport_conn_t conn, nvlist_t *evp, 1014 int encoding, size_t *szp) 1015 { 1016 etm_proto_v1_ev_hdr_t *hdrp; /* for FMA_EVENT msg */ 1017 size_t hdr_sz; /* sizeof *hdrp */ 1018 uint32_t *lenp; /* ptr to FMA event length */ 1019 size_t evsz; /* packed FMA event size */ 1020 ssize_t n; /* gen use */ 1021 1022 /* allocate and populate the message header for 1 FMA event */ 1023 1024 hdr_sz = sizeof (*hdrp) + (1 * sizeof (hdrp->ev_lens[0])); 1025 1026 hdrp = fmd_hdl_zalloc(hdl, hdr_sz, FMD_SLEEP); 1027 1028 /* 1029 * Design_Note: Although the ETM protocol supports it, we do not (yet) 1030 * want responses/ACKs on FMA events that we send. All 1031 * such messages are sent with ETM_PROTO_V1_TIMEOUT_NONE. 1032 */ 1033 1034 hdrp->ev_pp.pp_magic_num = ETM_PROTO_MAGIC_NUM; 1035 hdrp->ev_pp.pp_magic_num = htonl(hdrp->ev_pp.pp_magic_num); 1036 hdrp->ev_pp.pp_proto_ver = ETM_PROTO_V1; 1037 hdrp->ev_pp.pp_msg_type = ETM_MSG_TYPE_FMA_EVENT; 1038 hdrp->ev_pp.pp_sub_type = 0; 1039 hdrp->ev_pp.pp_rsvd_pad = 0; 1040 hdrp->ev_pp.pp_xid = etm_xid_cur; 1041 hdrp->ev_pp.pp_xid = htonl(hdrp->ev_pp.pp_xid); 1042 etm_xid_cur += ETM_XID_INC; 1043 hdrp->ev_pp.pp_timeout = ETM_PROTO_V1_TIMEOUT_NONE; 1044 hdrp->ev_pp.pp_timeout = htonl(hdrp->ev_pp.pp_timeout); 1045 1046 lenp = &hdrp->ev_lens[0]; 1047 1048 if ((n = nvlist_size(evp, &evsz, encoding)) != 0) { 1049 errno = n; 1050 fmd_hdl_free(hdl, hdrp, hdr_sz); 1051 etm_stats.etm_os_nvlist_size_fail.fmds_value.ui64++; 1052 return (NULL); 1053 } 1054 1055 /* indicate 1 FMA event, network encode its length, and 0-terminate */ 1056 1057 *lenp = evsz; *lenp = htonl(*lenp); lenp++; 1058 *lenp = 0; *lenp = htonl(*lenp); lenp++; 1059 1060 /* 1061 * write the network encoded header to the transport, and 1062 * return alloc size to caller for later free 1063 */ 1064 1065 if ((n = etm_io_op(hdl, "bad io write on event hdr", 1066 conn, hdrp, hdr_sz, ETM_IO_OP_WR)) < 0) { 1067 errno = (-n); 1068 fmd_hdl_free(hdl, hdrp, hdr_sz); 1069 return (NULL); 1070 } 1071 1072 *szp = hdr_sz; 1073 return (hdrp); 1074 1075 } /* etm_hdr_write() */ 1076 1077 /* 1078 * etm_post_to_fmd - post the given FMA event to FMD 1079 * via a FMD transport API call, 1080 * return 0 or -errno value 1081 * 1082 * caveats: the FMA event (evp) is freed by FMD, 1083 * thus callers of this function should 1084 * immediately discard any ptr they have to the 1085 * nvlist without freeing or dereferencing it 1086 */ 1087 1088 static int 1089 etm_post_to_fmd(fmd_hdl_t *hdl, nvlist_t *evp) 1090 { 1091 ssize_t ev_sz; /* sizeof *evp */ 1092 1093 (void) nvlist_size(evp, (size_t *)&ev_sz, NV_ENCODE_XDR); 1094 1095 if (etm_debug_lvl >= 2) { 1096 etm_show_time(hdl, "ante ev post"); 1097 } 1098 fmd_xprt_post(hdl, etm_fmd_xprt, evp, 0); 1099 etm_stats.etm_wr_fmd_fmaevent.fmds_value.ui64++; 1100 etm_stats.etm_wr_fmd_bytes.fmds_value.ui64 += ev_sz; 1101 if (etm_debug_lvl >= 1) { 1102 fmd_hdl_debug(hdl, "info: event %p post ok to FMD\n", evp); 1103 } 1104 if (etm_debug_lvl >= 2) { 1105 etm_show_time(hdl, "post ev post"); 1106 } 1107 return (0); 1108 1109 } /* etm_post_to_fmd() */ 1110 1111 /* 1112 * Ideally we would just use syslog(3C) for outputting our messages. 1113 * Unfortunately, as this module is running within the FMA daemon context, 1114 * that would create the situation where this module's openlog() would 1115 * have the monopoly on syslog(3C) for the daemon and all its modules. 1116 * To avoid that situation, this module uses the same logic as the 1117 * syslog-msgs FM module to directly call into the log(7D) and sysmsg(7D) 1118 * devices for syslog and console. 1119 */ 1120 1121 static int 1122 etm_post_to_syslog(fmd_hdl_t *hdl, uint32_t priority, uint32_t body_sz, 1123 uint8_t *body_buf) 1124 { 1125 char *sysmessage; /* Formatted message */ 1126 size_t formatlen; /* maximum length of sysmessage */ 1127 struct strbuf ctl, dat; /* structs pushed to the logfd */ 1128 uint32_t msgid; /* syslog message ID number */ 1129 1130 if ((syslog_file == 0) && (syslog_cons == 0)) { 1131 return (0); 1132 } 1133 1134 if (etm_debug_lvl >= 2) { 1135 etm_show_time(hdl, "ante syslog post"); 1136 } 1137 1138 formatlen = body_sz + 64; /* +64 for prefix strings added below */ 1139 sysmessage = fmd_hdl_zalloc(hdl, formatlen, FMD_SLEEP); 1140 1141 if (syslog_file) { 1142 STRLOG_MAKE_MSGID(body_buf, msgid); 1143 (void) snprintf(sysmessage, formatlen, 1144 "SC Alert: [ID %u FACILITY_AND_PRIORITY] %s", msgid, 1145 body_buf); 1146 1147 syslog_ctl.pri = syslog_facility | priority; 1148 1149 ctl.buf = (void *)&syslog_ctl; 1150 ctl.len = sizeof (syslog_ctl); 1151 1152 dat.buf = sysmessage; 1153 dat.len = strlen(sysmessage) + 1; 1154 1155 if (putmsg(syslog_logfd, &ctl, &dat, 0) != 0) { 1156 fmd_hdl_debug(hdl, "putmsg failed: %s\n", 1157 strerror(errno)); 1158 etm_stats.etm_log_err.fmds_value.ui64++; 1159 } 1160 } 1161 1162 if (syslog_cons) { 1163 (void) snprintf(sysmessage, formatlen, 1164 "SC Alert: %s\r\n", body_buf); 1165 1166 dat.buf = sysmessage; 1167 dat.len = strlen(sysmessage) + 1; 1168 1169 if (write(syslog_msgfd, dat.buf, dat.len) != dat.len) { 1170 fmd_hdl_debug(hdl, "write failed: %s\n", 1171 strerror(errno)); 1172 etm_stats.etm_msg_err.fmds_value.ui64++; 1173 } 1174 } 1175 1176 fmd_hdl_free(hdl, sysmessage, formatlen); 1177 1178 if (etm_debug_lvl >= 2) { 1179 etm_show_time(hdl, "post syslog post"); 1180 } 1181 1182 return (0); 1183 } 1184 1185 1186 /* 1187 * etm_req_ver_negot - send an ETM control message to the other end requesting 1188 * that the ETM protocol version be negotiated/set 1189 */ 1190 1191 static void 1192 etm_req_ver_negot(fmd_hdl_t *hdl) 1193 { 1194 etm_xport_addr_t *addrv; /* default dst addr(s) */ 1195 etm_xport_conn_t conn; /* connection to other end */ 1196 etm_proto_v1_ctl_hdr_t *ctl_hdrp; /* for CONTROL msg */ 1197 size_t hdr_sz; /* sizeof header */ 1198 uint8_t *body_buf; /* msg body buffer */ 1199 uint32_t body_sz; /* sizeof *body_buf */ 1200 ssize_t i; /* gen use */ 1201 1202 /* populate an ETM control msg to send */ 1203 1204 hdr_sz = sizeof (*ctl_hdrp); 1205 body_sz = (3 + 1); /* version bytes plus null byte */ 1206 1207 ctl_hdrp = fmd_hdl_zalloc(hdl, hdr_sz + body_sz, FMD_SLEEP); 1208 1209 ctl_hdrp->ctl_pp.pp_magic_num = htonl(ETM_PROTO_MAGIC_NUM); 1210 ctl_hdrp->ctl_pp.pp_proto_ver = ETM_PROTO_V1; 1211 ctl_hdrp->ctl_pp.pp_msg_type = ETM_MSG_TYPE_CONTROL; 1212 ctl_hdrp->ctl_pp.pp_sub_type = ETM_CTL_SEL_VER_NEGOT_REQ; 1213 ctl_hdrp->ctl_pp.pp_rsvd_pad = 0; 1214 etm_xid_ver_negot = etm_xid_cur; 1215 etm_xid_cur += ETM_XID_INC; 1216 ctl_hdrp->ctl_pp.pp_xid = htonl(etm_xid_ver_negot); 1217 ctl_hdrp->ctl_pp.pp_timeout = htonl(ETM_PROTO_V1_TIMEOUT_FOREVER); 1218 ctl_hdrp->ctl_len = htonl(body_sz); 1219 1220 body_buf = (void*)&ctl_hdrp->ctl_len; 1221 body_buf += sizeof (ctl_hdrp->ctl_len); 1222 *body_buf++ = ETM_PROTO_V3; 1223 *body_buf++ = ETM_PROTO_V2; 1224 *body_buf++ = ETM_PROTO_V1; 1225 *body_buf++ = '\0'; 1226 1227 /* 1228 * open and close a connection to send the ETM control msg 1229 * to any/all of the default dst addrs 1230 */ 1231 1232 if ((addrv = etm_xport_get_ev_addrv(hdl, NULL)) == NULL) { 1233 fmd_hdl_error(hdl, 1234 "error: bad ctl dst addrs errno %d\n", errno); 1235 etm_stats.etm_xport_get_ev_addrv_fail.fmds_value.ui64++; 1236 goto func_ret; 1237 } 1238 1239 for (i = 0; addrv[i] != NULL; i++) { 1240 1241 etm_stats.etm_xport_open_fail.fmds_value.ui64++; 1242 if (etm_conn_open(hdl, "bad conn open during ver negot", 1243 addrv[i], &conn) < 0) { 1244 continue; 1245 } 1246 if (etm_io_op(hdl, "bad io write on ctl hdr+body", 1247 conn, ctl_hdrp, hdr_sz + body_sz, 1248 ETM_IO_OP_WR) >= 0) { 1249 etm_stats.etm_wr_hdr_control.fmds_value.ui64++; 1250 etm_stats.etm_wr_body_control.fmds_value.ui64++; 1251 } 1252 (void) etm_conn_close(hdl, "bad conn close during ver negot", 1253 conn); 1254 1255 } /* foreach dst addr */ 1256 1257 func_ret: 1258 1259 if (addrv != NULL) { 1260 etm_xport_free_addrv(hdl, addrv); 1261 } 1262 fmd_hdl_free(hdl, ctl_hdrp, hdr_sz + body_sz); 1263 1264 } /* etm_req_ver_negot() */ 1265 1266 /* 1267 * Design_Note: We rely on the fact that all message types have 1268 * a common protocol preamble; if this fact should 1269 * ever change it may break the code below. We also 1270 * rely on the fact that FMA_EVENT and CONTROL headers 1271 * returned will be sized large enough to reuse them 1272 * as RESPONSE headers if the remote endpt asked 1273 * for a response via the pp_timeout field. 1274 */ 1275 1276 /* 1277 * etm_maybe_send_response - check the given message header to see 1278 * whether a response has been requested, 1279 * if so then send an appropriate response 1280 * back on the given connection using the 1281 * given response code, 1282 * return 0 or -errno value 1283 */ 1284 1285 static ssize_t 1286 etm_maybe_send_response(fmd_hdl_t *hdl, etm_xport_conn_t conn, 1287 void *hdrp, int32_t resp_code) 1288 { 1289 ssize_t rv; /* ret val */ 1290 etm_proto_v1_pp_t *ppp; /* protocol preamble ptr */ 1291 etm_proto_v1_resp_hdr_t *resp_hdrp; /* for RESPONSE msg */ 1292 uint8_t resp_body[4]; /* response body if needed */ 1293 uint8_t *resp_msg; /* response hdr+body */ 1294 size_t hdr_sz; /* sizeof response hdr */ 1295 uint8_t orig_msg_type; /* orig hdr's message type */ 1296 uint32_t orig_timeout; /* orig hdr's timeout */ 1297 ssize_t n; /* gen use */ 1298 1299 rv = 0; /* default is success */ 1300 ppp = hdrp; 1301 orig_msg_type = ppp->pp_msg_type; 1302 orig_timeout = ppp->pp_timeout; 1303 1304 /* bail out now if no response is to be sent */ 1305 1306 if (orig_timeout == ETM_PROTO_V1_TIMEOUT_NONE) { 1307 return (0); 1308 } /* if a nop */ 1309 1310 if ((orig_msg_type != ETM_MSG_TYPE_FMA_EVENT) && 1311 (orig_msg_type != ETM_MSG_TYPE_ALERT) && 1312 (orig_msg_type != ETM_MSG_TYPE_CONTROL)) { 1313 return (-EINVAL); 1314 } /* if inappropriate hdr for a response msg */ 1315 1316 /* reuse the given header as a response header */ 1317 1318 if (etm_debug_lvl >= 2) { 1319 etm_show_time(hdl, "ante resp send"); 1320 } 1321 1322 resp_hdrp = hdrp; 1323 resp_hdrp->resp_code = resp_code; 1324 resp_hdrp->resp_len = 0; /* default is empty body */ 1325 1326 if ((orig_msg_type == ETM_MSG_TYPE_CONTROL) && 1327 (ppp->pp_sub_type == ETM_CTL_SEL_VER_NEGOT_REQ)) { 1328 resp_body[0] = ETM_PROTO_V2; 1329 resp_body[1] = ETM_PROTO_V3; 1330 resp_body[2] = 0; 1331 resp_hdrp->resp_len = 3; 1332 } /* if should send our/negotiated proto ver in resp body */ 1333 1334 /* respond with the proto ver that was negotiated */ 1335 1336 resp_hdrp->resp_pp.pp_proto_ver = etm_resp_ver; 1337 resp_hdrp->resp_pp.pp_msg_type = ETM_MSG_TYPE_RESPONSE; 1338 resp_hdrp->resp_pp.pp_timeout = ETM_PROTO_V1_TIMEOUT_NONE; 1339 1340 /* 1341 * send the whole response msg in one write, header and body; 1342 * avoid the alloc-and-copy if we can reuse the hdr as the msg, 1343 * ie, if the body is empty 1344 * 1345 * update stats and note the xid associated with last ACKed FMA_EVENT 1346 * known to be successfully posted to FMD to aid duplicate filtering 1347 */ 1348 1349 hdr_sz = sizeof (etm_proto_v1_resp_hdr_t); 1350 1351 resp_msg = hdrp; 1352 if (resp_hdrp->resp_len > 0) { 1353 resp_msg = fmd_hdl_zalloc(hdl, hdr_sz + resp_hdrp->resp_len, 1354 FMD_SLEEP); 1355 (void) memcpy(resp_msg, resp_hdrp, hdr_sz); 1356 (void) memcpy(resp_msg + hdr_sz, resp_body, 1357 resp_hdrp->resp_len); 1358 } 1359 1360 (void) pthread_mutex_lock(&etm_write_lock); 1361 if ((n = etm_io_op(hdl, "bad io write on resp msg", conn, 1362 resp_msg, hdr_sz + resp_hdrp->resp_len, ETM_IO_OP_WR)) < 0) { 1363 (void) pthread_mutex_unlock(&etm_write_lock); 1364 rv = n; 1365 goto func_ret; 1366 } 1367 (void) pthread_mutex_unlock(&etm_write_lock); 1368 1369 etm_stats.etm_wr_hdr_response.fmds_value.ui64++; 1370 etm_stats.etm_wr_body_response.fmds_value.ui64++; 1371 1372 if ((orig_msg_type == ETM_MSG_TYPE_FMA_EVENT) && 1373 (resp_code >= 0)) { 1374 etm_xid_posted_ev = resp_hdrp->resp_pp.pp_xid; 1375 } 1376 1377 fmd_hdl_debug(hdl, "info: sent V%u RESPONSE msg to xport " 1378 "xid 0x%x code %d len %u\n", 1379 (unsigned int)resp_hdrp->resp_pp.pp_proto_ver, 1380 resp_hdrp->resp_pp.pp_xid, resp_hdrp->resp_code, 1381 resp_hdrp->resp_len); 1382 func_ret: 1383 1384 if (resp_hdrp->resp_len > 0) { 1385 fmd_hdl_free(hdl, resp_msg, hdr_sz + resp_hdrp->resp_len); 1386 } 1387 if (etm_debug_lvl >= 2) { 1388 etm_show_time(hdl, "post resp send"); 1389 } 1390 return (rv); 1391 1392 } /* etm_maybe_send_response() */ 1393 1394 /* 1395 * etm_handle_new_conn - receive an ETM message sent from the other end via 1396 * the given open connection, pull out any FMA events 1397 * and post them to the local FMD (or handle any ETM 1398 * control or response msg); when done, close the 1399 * connection 1400 */ 1401 1402 static void 1403 etm_handle_new_conn(fmd_hdl_t *hdl, etm_xport_conn_t conn) 1404 { 1405 etm_proto_v1_ev_hdr_t *ev_hdrp; /* for FMA_EVENT msg */ 1406 etm_proto_v1_ctl_hdr_t *ctl_hdrp; /* for CONTROL msg */ 1407 etm_proto_v1_resp_hdr_t *resp_hdrp; /* for RESPONSE msg */ 1408 etm_proto_v3_sa_hdr_t *sa_hdrp; /* for ALERT msg */ 1409 int32_t resp_code; /* response code */ 1410 size_t hdr_sz; /* sizeof header */ 1411 uint8_t *body_buf; /* msg body buffer */ 1412 uint32_t body_sz; /* sizeof body_buf */ 1413 uint32_t ev_cnt; /* count of FMA events */ 1414 uint8_t *bp; /* byte ptr within body_buf */ 1415 nvlist_t *evp; /* ptr to unpacked FMA event */ 1416 char *class; /* FMA event class */ 1417 ssize_t i, n; /* gen use */ 1418 1419 if (etm_debug_lvl >= 2) { 1420 etm_show_time(hdl, "ante conn handle"); 1421 } 1422 fmd_hdl_debug(hdl, "info: handling new conn %p\n", conn); 1423 1424 ev_hdrp = NULL; 1425 ctl_hdrp = NULL; 1426 resp_hdrp = NULL; 1427 sa_hdrp = NULL; 1428 body_buf = NULL; 1429 class = NULL; 1430 evp = NULL; 1431 resp_code = 0; /* default is success */ 1432 1433 /* read a network decoded message header from the connection */ 1434 1435 if ((ev_hdrp = etm_hdr_read(hdl, conn, &hdr_sz)) == NULL) { 1436 /* errno assumed set by above call */ 1437 fmd_hdl_debug(hdl, "error: FMA event dropped: " 1438 "bad hdr read errno %d\n", errno); 1439 etm_stats.etm_rd_drop_fmaevent.fmds_value.ui64++; 1440 goto func_ret; 1441 } 1442 1443 /* 1444 * handle the message based on its preamble pp_msg_type 1445 * which is known to be valid from etm_hdr_read() checks 1446 */ 1447 1448 if (ev_hdrp->ev_pp.pp_msg_type == ETM_MSG_TYPE_FMA_EVENT) { 1449 1450 fmd_hdl_debug(hdl, "info: rcvd FMA_EVENT msg from xport\n"); 1451 1452 /* 1453 * check for dup msg/xid against last good response sent, 1454 * if a dup then resend response but skip repost to FMD 1455 */ 1456 1457 if (ev_hdrp->ev_pp.pp_xid == etm_xid_posted_ev) { 1458 (void) etm_maybe_send_response(hdl, conn, ev_hdrp, 0); 1459 fmd_hdl_debug(hdl, "info: skipping dup FMA event post " 1460 "xid 0x%x\n", etm_xid_posted_ev); 1461 etm_stats.etm_rd_dup_fmaevent.fmds_value.ui64++; 1462 goto func_ret; 1463 } 1464 1465 /* allocate buf large enough for whole body / all FMA events */ 1466 1467 body_sz = 0; 1468 for (i = 0; ev_hdrp->ev_lens[i] != 0; i++) { 1469 body_sz += ev_hdrp->ev_lens[i]; 1470 } /* for summing sizes of all FMA events */ 1471 ev_cnt = i; 1472 1473 if (etm_debug_lvl >= 1) { 1474 fmd_hdl_debug(hdl, "info: event lengths %u sum %u\n", 1475 ev_cnt, body_sz); 1476 } 1477 1478 body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP); 1479 1480 /* read all the FMA events at once */ 1481 1482 if ((n = etm_io_op(hdl, "FMA event dropped: " 1483 "bad io read on event bodies", 1484 conn, body_buf, body_sz, 1485 ETM_IO_OP_RD)) < 0) { 1486 etm_stats.etm_rd_drop_fmaevent.fmds_value.ui64++; 1487 goto func_ret; 1488 } 1489 1490 etm_stats.etm_rd_xport_bytes.fmds_value.ui64 += body_sz; 1491 etm_stats.etm_rd_body_fmaevent.fmds_value.ui64 += ev_cnt; 1492 1493 /* unpack each FMA event and post it to FMD */ 1494 1495 bp = body_buf; 1496 for (i = 0; i < ev_cnt; i++) { 1497 if ((n = nvlist_unpack((char *)bp, 1498 ev_hdrp->ev_lens[i], &evp, 0)) != 0) { 1499 resp_code = (-n); 1500 (void) etm_maybe_send_response(hdl, conn, 1501 ev_hdrp, resp_code); 1502 fmd_hdl_error(hdl, "error: FMA event dropped: " 1503 "bad event body unpack " 1504 "errno %d\n", n); 1505 if (etm_debug_lvl >= 2) { 1506 fmd_hdl_debug(hdl, "info: FMA event " 1507 "hexdump %d bytes:\n", 1508 ev_hdrp->ev_lens[i]); 1509 etm_hexdump(hdl, bp, 1510 ev_hdrp->ev_lens[i]); 1511 } 1512 etm_stats.etm_os_nvlist_unpack_fail.fmds_value. 1513 ui64++; 1514 etm_stats.etm_rd_drop_fmaevent.fmds_value. 1515 ui64++; 1516 bp += ev_hdrp->ev_lens[i]; 1517 continue; 1518 } 1519 if (etm_debug_lvl >= 1) { 1520 (void) nvlist_lookup_string(evp, FM_CLASS, 1521 &class); 1522 if (class == NULL) { 1523 class = "NULL"; 1524 } 1525 fmd_hdl_debug(hdl, "info: FMA event %p " 1526 "class %s\n", evp, class); 1527 } 1528 resp_code = etm_post_to_fmd(hdl, evp); 1529 evp = NULL; 1530 (void) etm_maybe_send_response(hdl, conn, 1531 ev_hdrp, resp_code); 1532 bp += ev_hdrp->ev_lens[i]; 1533 } /* foreach FMA event in the body buffer */ 1534 1535 } else if (ev_hdrp->ev_pp.pp_msg_type == ETM_MSG_TYPE_CONTROL) { 1536 1537 ctl_hdrp = (void*)ev_hdrp; 1538 1539 fmd_hdl_debug(hdl, "info: rcvd CONTROL msg from xport\n"); 1540 if (etm_debug_lvl >= 1) { 1541 fmd_hdl_debug(hdl, "info: ctl sel %d xid 0x%x\n", 1542 (int)ctl_hdrp->ctl_pp.pp_sub_type, 1543 ctl_hdrp->ctl_pp.pp_xid); 1544 } 1545 1546 /* 1547 * if we have a VER_NEGOT_REQ read the body and validate 1548 * the protocol version set contained therein, 1549 * otherwise we have a PING_REQ (which has no body) 1550 * and we [also] fall thru to the code which sends a 1551 * response msg if the pp_timeout field requested one 1552 */ 1553 1554 if (ctl_hdrp->ctl_pp.pp_sub_type == ETM_CTL_SEL_VER_NEGOT_REQ) { 1555 1556 body_sz = ctl_hdrp->ctl_len; 1557 body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP); 1558 1559 if ((n = etm_io_op(hdl, "bad io read on ctl body", 1560 conn, body_buf, body_sz, 1561 ETM_IO_OP_RD)) < 0) { 1562 goto func_ret; 1563 } 1564 1565 /* complain if version set completely incompatible */ 1566 1567 for (i = 0; i < body_sz; i++) { 1568 if ((body_buf[i] == ETM_PROTO_V1) || 1569 (body_buf[i] == ETM_PROTO_V2) || 1570 (body_buf[i] == ETM_PROTO_V3)) { 1571 break; 1572 } 1573 } 1574 if (i >= body_sz) { 1575 etm_stats.etm_ver_bad.fmds_value.ui64++; 1576 resp_code = (-EPROTO); 1577 } 1578 1579 } /* if got version set request */ 1580 1581 etm_stats.etm_rd_body_control.fmds_value.ui64++; 1582 1583 (void) etm_maybe_send_response(hdl, conn, ctl_hdrp, resp_code); 1584 1585 } else if (ev_hdrp->ev_pp.pp_msg_type == ETM_MSG_TYPE_RESPONSE) { 1586 1587 resp_hdrp = (void*)ev_hdrp; 1588 1589 fmd_hdl_debug(hdl, "info: rcvd RESPONSE msg from xport\n"); 1590 if (etm_debug_lvl >= 1) { 1591 fmd_hdl_debug(hdl, "info: resp xid 0x%x\n", 1592 (int)resp_hdrp->resp_pp.pp_xid); 1593 } 1594 1595 body_sz = resp_hdrp->resp_len; 1596 body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP); 1597 1598 if ((n = etm_io_op(hdl, "bad io read on resp len", 1599 conn, body_buf, body_sz, ETM_IO_OP_RD)) < 0) { 1600 goto func_ret; 1601 } 1602 1603 etm_stats.etm_rd_body_response.fmds_value.ui64++; 1604 1605 /* 1606 * look up the xid to interpret the response body 1607 * 1608 * ping is a nop; for ver negot confirm that a supported 1609 * protocol version was negotiated and remember which one 1610 */ 1611 1612 if ((resp_hdrp->resp_pp.pp_xid != etm_xid_ping) && 1613 (resp_hdrp->resp_pp.pp_xid != etm_xid_ver_negot)) { 1614 etm_stats.etm_xid_bad.fmds_value.ui64++; 1615 goto func_ret; 1616 } 1617 1618 if (resp_hdrp->resp_pp.pp_xid == etm_xid_ver_negot) { 1619 if ((body_buf[0] < ETM_PROTO_V1) || 1620 (body_buf[0] > ETM_PROTO_V3)) { 1621 etm_stats.etm_ver_bad.fmds_value.ui64++; 1622 goto func_ret; 1623 } 1624 etm_resp_ver = body_buf[0]; 1625 } /* if have resp to last req to negotiate proto ver */ 1626 1627 } else if (ev_hdrp->ev_pp.pp_msg_type == ETM_MSG_TYPE_ALERT) { 1628 1629 sa_hdrp = (void*)ev_hdrp; 1630 1631 fmd_hdl_debug(hdl, "info: rcvd ALERT msg from xport\n"); 1632 if (etm_debug_lvl >= 1) { 1633 fmd_hdl_debug(hdl, "info: sa sel %d xid 0x%x\n", 1634 (int)sa_hdrp->sa_pp.pp_sub_type, 1635 sa_hdrp->sa_pp.pp_xid); 1636 } 1637 1638 body_sz = sa_hdrp->sa_len; 1639 body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP); 1640 1641 if ((n = etm_io_op(hdl, "bad io read on sa body", 1642 conn, body_buf, body_sz, 1643 ETM_IO_OP_RD)) < 0) { 1644 goto func_ret; 1645 } 1646 1647 etm_stats.etm_rd_body_alert.fmds_value.ui64++; 1648 1649 resp_code = etm_post_to_syslog(hdl, sa_hdrp->sa_priority, 1650 body_sz, body_buf); 1651 (void) etm_maybe_send_response(hdl, conn, sa_hdrp, resp_code); 1652 } /* whether we have a FMA_EVENT, CONTROL, RESPONSE or ALERT msg */ 1653 1654 func_ret: 1655 1656 (void) etm_conn_close(hdl, "bad conn close after msg recv", conn); 1657 1658 if (etm_debug_lvl >= 2) { 1659 etm_show_time(hdl, "post conn handle"); 1660 } 1661 if (ev_hdrp != NULL) { 1662 fmd_hdl_free(hdl, ev_hdrp, hdr_sz); 1663 } 1664 if (body_buf != NULL) { 1665 fmd_hdl_free(hdl, body_buf, body_sz); 1666 } 1667 } /* etm_handle_new_conn() */ 1668 1669 /* 1670 * etm_server - loop forever accepting new connections 1671 * using the given FMD handle, 1672 * handling any ETM msgs sent from the other side 1673 * via each such connection 1674 */ 1675 1676 static void 1677 etm_server(void *arg) 1678 { 1679 etm_xport_conn_t conn; /* connection handle */ 1680 ssize_t n; /* gen use */ 1681 fmd_hdl_t *hdl; /* FMD handle */ 1682 1683 hdl = arg; 1684 1685 fmd_hdl_debug(hdl, "info: connection server starting\n"); 1686 1687 while (!etm_is_dying) { 1688 1689 if ((conn = etm_xport_accept(hdl, NULL)) == NULL) { 1690 /* errno assumed set by above call */ 1691 n = errno; 1692 if (etm_is_dying) { 1693 break; 1694 } 1695 fmd_hdl_debug(hdl, 1696 "error: bad conn accept errno %d\n", n); 1697 etm_stats.etm_xport_accept_fail.fmds_value.ui64++; 1698 /* avoid spinning CPU */ 1699 (void) etm_sleep(ETM_SLEEP_SLOW); 1700 continue; 1701 } 1702 1703 /* 1704 * Design_Note: etm_handle_new_conn() will close the 1705 * accepted connection when done. In early designs 1706 * etm_handle_new_conn() was spawned as a 1707 * separate thread via pthread_create(); 1708 * however fmd_thr_create() constrains thread 1709 * creation to prevent spawned threads from 1710 * spawning others (ie, no grandchildren). 1711 * Hence etm_handle_new_conn() is now called 1712 * as a simple function [w/ multiple args]. 1713 */ 1714 1715 etm_handle_new_conn(hdl, conn); 1716 1717 } /* while accepting new connections until ETM dies */ 1718 1719 /* ETM is dying (probably due to "fmadm unload etm") */ 1720 1721 if (etm_debug_lvl >= 1) { 1722 fmd_hdl_debug(hdl, "info: connection server is dying\n"); 1723 } 1724 } /* etm_server() */ 1725 1726 static void * 1727 etm_init_alloc(size_t size) 1728 { 1729 return (fmd_hdl_alloc(init_hdl, size, FMD_SLEEP)); 1730 } 1731 1732 static void 1733 etm_init_free(void *addr, size_t size) 1734 { 1735 fmd_hdl_free(init_hdl, addr, size); 1736 } 1737 1738 /* 1739 * -------------------------- FMD entry points ------------------------------- 1740 */ 1741 1742 /* 1743 * _fmd_init - initialize the transport for use by ETM and start the 1744 * server daemon to accept new connections to us 1745 * 1746 * FMD will read our *.conf and subscribe us to FMA events 1747 */ 1748 1749 void 1750 _fmd_init(fmd_hdl_t *hdl) 1751 { 1752 struct timeval tmv; /* timeval */ 1753 ssize_t n; /* gen use */ 1754 ldom_hdl_t *lhp; /* ldom pointer */ 1755 const struct facility *fp; /* syslog facility matching */ 1756 char *facname; /* syslog facility property */ 1757 1758 if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) { 1759 return; /* invalid data in configuration file */ 1760 } 1761 1762 fmd_hdl_debug(hdl, "info: module initializing\n"); 1763 1764 init_hdl = hdl; 1765 lhp = ldom_init(etm_init_alloc, etm_init_free); 1766 1767 /* 1768 * Do not load this module if it is runing on a guest ldom. 1769 */ 1770 if (ldom_major_version(lhp) == 1 && ldom_on_service(lhp) == 0) { 1771 fmd_hdl_debug(hdl, "info: module unregistering\n"); 1772 ldom_fini(lhp); 1773 fmd_hdl_unregister(hdl); 1774 return; 1775 } else { 1776 ldom_fini(lhp); 1777 } 1778 1779 /* setup statistics and properties from FMD */ 1780 1781 (void) fmd_stat_create(hdl, FMD_STAT_NOALLOC, 1782 sizeof (etm_stats) / sizeof (fmd_stat_t), 1783 (fmd_stat_t *)&etm_stats); 1784 1785 etm_debug_lvl = fmd_prop_get_int32(hdl, ETM_PROP_NM_DEBUG_LVL); 1786 etm_debug_max_ev_cnt = fmd_prop_get_int32(hdl, 1787 ETM_PROP_NM_DEBUG_MAX_EV_CNT); 1788 fmd_hdl_debug(hdl, "info: etm_debug_lvl %d " 1789 "etm_debug_max_ev_cnt %d\n", 1790 etm_debug_lvl, etm_debug_max_ev_cnt); 1791 1792 /* obtain an FMD transport handle so we can post FMA events later */ 1793 1794 etm_fmd_xprt = fmd_xprt_open(hdl, FMD_XPRT_RDONLY, NULL, NULL); 1795 1796 /* encourage protocol transaction id to be unique per module load */ 1797 1798 (void) gettimeofday(&tmv, NULL); 1799 etm_xid_cur = (uint32_t)((tmv.tv_sec << 10) | 1800 ((unsigned long)tmv.tv_usec >> 10)); 1801 1802 /* 1803 * init the transport, 1804 * start the connection acceptance server, and 1805 * request protocol version be negotiated 1806 */ 1807 1808 if ((n = etm_xport_init(hdl)) != 0) { 1809 fmd_hdl_error(hdl, "error: bad xport init errno %d\n", (-n)); 1810 fmd_hdl_unregister(hdl); 1811 return; 1812 } 1813 1814 /* 1815 * Cache any properties we use every time we receive an alert. 1816 */ 1817 syslog_file = fmd_prop_get_int32(hdl, ETM_PROP_NM_SYSLOGD); 1818 syslog_cons = fmd_prop_get_int32(hdl, ETM_PROP_NM_CONSOLE); 1819 1820 if (syslog_file && (syslog_logfd = open("/dev/conslog", 1821 O_WRONLY | O_NOCTTY)) == -1) { 1822 fmd_hdl_error(hdl, "error: failed to open /dev/conslog"); 1823 syslog_file = 0; 1824 } 1825 1826 if (syslog_cons && (syslog_msgfd = open("/dev/sysmsg", 1827 O_WRONLY | O_NOCTTY)) == -1) { 1828 fmd_hdl_error(hdl, "error: failed to open /dev/sysmsg"); 1829 syslog_cons = 0; 1830 } 1831 1832 if (syslog_file) { 1833 /* 1834 * Look up the value of the "facility" property and use it to 1835 * determine * what syslog LOG_* facility value we use to 1836 * fill in our log_ctl_t. 1837 */ 1838 facname = fmd_prop_get_string(hdl, ETM_PROP_NM_FACILITY); 1839 1840 for (fp = syslog_facs; fp->fac_name != NULL; fp++) { 1841 if (strcmp(fp->fac_name, facname) == 0) 1842 break; 1843 } 1844 1845 if (fp->fac_name == NULL) { 1846 fmd_hdl_error(hdl, "error: invalid 'facility'" 1847 " setting: %s\n", facname); 1848 syslog_file = 0; 1849 } else { 1850 syslog_facility = fp->fac_value; 1851 syslog_ctl.flags = SL_CONSOLE | SL_LOGONLY; 1852 } 1853 1854 fmd_prop_free_string(hdl, facname); 1855 } 1856 1857 etm_svr_tid = fmd_thr_create(hdl, etm_server, hdl); 1858 1859 /* 1860 * Wait a second for the receiver to be ready before start handshaking 1861 * with the SP. 1862 */ 1863 (void) etm_sleep(ETM_SLEEP_QUIK); 1864 1865 etm_req_ver_negot(hdl); 1866 1867 fmd_hdl_debug(hdl, "info: module initialized ok\n"); 1868 1869 } /* _fmd_init() */ 1870 1871 /* 1872 * etm_recv - receive an FMA event from FMD and transport it 1873 * to the remote endpoint 1874 */ 1875 1876 /*ARGSUSED*/ 1877 void 1878 etm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *evp, const char *class) 1879 { 1880 etm_xport_addr_t *addrv; /* vector of transport addresses */ 1881 etm_xport_conn_t conn; /* connection handle */ 1882 etm_proto_v1_ev_hdr_t *hdrp; /* for FMA_EVENT msg */ 1883 ssize_t i, n; /* gen use */ 1884 size_t sz; /* header size */ 1885 size_t buflen; /* size of packed FMA event */ 1886 uint8_t *buf; /* tmp buffer for packed FMA event */ 1887 1888 buflen = 0; 1889 (void) nvlist_size(evp, &buflen, NV_ENCODE_XDR); 1890 etm_stats.etm_rd_fmd_bytes.fmds_value.ui64 += buflen; 1891 etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64++; 1892 1893 fmd_hdl_debug(hdl, "info: rcvd event %p from FMD\n", evp); 1894 fmd_hdl_debug(hdl, "info: cnt %llu class %s\n", 1895 etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64, class); 1896 1897 /* 1898 * if the debug limit has been set, avoid excessive traffic, 1899 * for example, an infinite cycle using loopback nodes 1900 */ 1901 1902 if ((etm_debug_max_ev_cnt >= 0) && 1903 (etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64 > 1904 etm_debug_max_ev_cnt)) { 1905 fmd_hdl_debug(hdl, "warning: FMA event dropped: " 1906 "event %p cnt %llu > debug max %d\n", evp, 1907 etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64, 1908 etm_debug_max_ev_cnt); 1909 etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; 1910 return; 1911 } 1912 1913 /* allocate a buffer for the FMA event and nvlist pack it */ 1914 1915 buf = fmd_hdl_zalloc(hdl, buflen, FMD_SLEEP); 1916 1917 if ((n = nvlist_pack(evp, (char **)&buf, &buflen, 1918 NV_ENCODE_XDR, 0)) != 0) { 1919 fmd_hdl_error(hdl, "error: FMA event dropped: " 1920 "event pack errno %d\n", n); 1921 etm_stats.etm_os_nvlist_pack_fail.fmds_value.ui64++; 1922 etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; 1923 fmd_hdl_free(hdl, buf, buflen); 1924 return; 1925 } 1926 1927 /* get vector of dst addrs and send the FMA event to each one */ 1928 1929 if ((addrv = etm_xport_get_ev_addrv(hdl, evp)) == NULL) { 1930 fmd_hdl_error(hdl, "error: FMA event dropped: " 1931 "bad event dst addrs errno %d\n", errno); 1932 etm_stats.etm_xport_get_ev_addrv_fail.fmds_value.ui64++; 1933 etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; 1934 fmd_hdl_free(hdl, buf, buflen); 1935 return; 1936 } 1937 1938 for (i = 0; addrv[i] != NULL; i++) { 1939 1940 /* open a new connection to this dst addr */ 1941 1942 if ((n = etm_conn_open(hdl, "FMA event dropped: " 1943 "bad conn open on new ev", 1944 addrv[i], &conn)) < 0) { 1945 etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; 1946 continue; 1947 } 1948 1949 (void) pthread_mutex_lock(&etm_write_lock); 1950 1951 /* write the ETM message header */ 1952 1953 if ((hdrp = etm_hdr_write(hdl, conn, evp, NV_ENCODE_XDR, 1954 &sz)) == NULL) { 1955 (void) pthread_mutex_unlock(&etm_write_lock); 1956 fmd_hdl_error(hdl, "error: FMA event dropped: " 1957 "bad hdr write errno %d\n", errno); 1958 (void) etm_conn_close(hdl, 1959 "bad conn close per bad hdr wr", conn); 1960 etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; 1961 continue; 1962 } 1963 1964 fmd_hdl_free(hdl, hdrp, sz); /* header not needed */ 1965 etm_stats.etm_wr_hdr_fmaevent.fmds_value.ui64++; 1966 fmd_hdl_debug(hdl, "info: hdr xport write ok for event %p\n", 1967 evp); 1968 1969 /* write the ETM message body, ie, the packed nvlist */ 1970 1971 if ((n = etm_io_op(hdl, "FMA event dropped: " 1972 "bad io write on event", conn, 1973 buf, buflen, ETM_IO_OP_WR)) < 0) { 1974 (void) pthread_mutex_unlock(&etm_write_lock); 1975 (void) etm_conn_close(hdl, 1976 "bad conn close per bad body wr", conn); 1977 etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; 1978 continue; 1979 } 1980 1981 (void) pthread_mutex_unlock(&etm_write_lock); 1982 1983 etm_stats.etm_wr_body_fmaevent.fmds_value.ui64++; 1984 etm_stats.etm_wr_xport_bytes.fmds_value.ui64 += buflen; 1985 fmd_hdl_debug(hdl, "info: body xport write ok for event %p\n", 1986 evp); 1987 1988 /* close the connection */ 1989 1990 (void) etm_conn_close(hdl, "bad conn close after event send", 1991 conn); 1992 } /* foreach dst addr in the vector */ 1993 1994 etm_xport_free_addrv(hdl, addrv); 1995 fmd_hdl_free(hdl, buf, buflen); 1996 1997 } /* etm_recv() */ 1998 1999 /* 2000 * _fmd_fini - stop the server daemon and teardown the transport 2001 */ 2002 2003 void 2004 _fmd_fini(fmd_hdl_t *hdl) 2005 { 2006 ssize_t n; /* gen use */ 2007 2008 fmd_hdl_debug(hdl, "info: module finializing\n"); 2009 2010 /* kill the connection server ; wait for it to die */ 2011 2012 etm_is_dying = 1; 2013 2014 if (etm_svr_tid != NULL) { 2015 fmd_thr_signal(hdl, etm_svr_tid); 2016 fmd_thr_destroy(hdl, etm_svr_tid); 2017 etm_svr_tid = NULL; 2018 } /* if server thread was successfully created */ 2019 2020 /* teardown the transport */ 2021 2022 if ((n = etm_xport_fini(hdl)) != 0) { 2023 fmd_hdl_error(hdl, "warning: xport fini errno %d\n", (-n)); 2024 } 2025 if (etm_fmd_xprt != NULL) { 2026 fmd_xprt_close(hdl, etm_fmd_xprt); 2027 } 2028 2029 if (syslog_logfd != -1) { 2030 (void) close(syslog_logfd); 2031 } 2032 if (syslog_msgfd != -1) { 2033 (void) close(syslog_msgfd); 2034 } 2035 2036 fmd_hdl_debug(hdl, "info: module finalized ok\n"); 2037 2038 } /* _fmd_fini() */ 2039