1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * etm.c FMA Event Transport Module implementation, a plugin of FMD 29 * for sun4v/Ontario 30 * 31 * plugin for sending/receiving FMA events to/from service processor 32 */ 33 34 #pragma ident "%Z%%M% %I% %E% SMI" 35 36 /* 37 * --------------------------------- includes -------------------------------- 38 */ 39 40 #include <sys/fm/protocol.h> 41 #include <sys/fm/util.h> 42 #include <netinet/in.h> 43 #include <fm/fmd_api.h> 44 #include <sys/fm/ldom.h> 45 #include <sys/strlog.h> 46 #include <sys/syslog.h> 47 48 #include "etm_xport_api.h" 49 #include "etm_etm_proto.h" 50 #include "etm_impl.h" 51 52 #include <pthread.h> 53 #include <signal.h> 54 #include <stropts.h> 55 #include <locale.h> 56 #include <strings.h> 57 #include <stdlib.h> 58 #include <unistd.h> 59 #include <limits.h> 60 #include <values.h> 61 #include <alloca.h> 62 #include <errno.h> 63 #include <fcntl.h> 64 #include <time.h> 65 66 67 /* 68 * ----------------------------- forward decls ------------------------------- 69 */ 70 71 static void 72 etm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class); 73 74 /* 75 * ------------------------- data structs for FMD ---------------------------- 76 */ 77 78 static const fmd_hdl_ops_t fmd_ops = { 79 etm_recv, /* fmdo_recv */ 80 NULL, /* fmdo_timeout */ 81 NULL, /* fmdo_close */ 82 NULL, /* fmdo_stats */ 83 NULL, /* fmdo_gc */ 84 NULL, /* fmdo_send */ 85 }; 86 87 static const fmd_prop_t fmd_props[] = { 88 { ETM_PROP_NM_XPORT_ADDRS, FMD_TYPE_STRING, "" }, 89 { ETM_PROP_NM_DEBUG_LVL, FMD_TYPE_INT32, "0" }, 90 { ETM_PROP_NM_DEBUG_MAX_EV_CNT, FMD_TYPE_INT32, "-1" }, 91 { ETM_PROP_NM_CONSOLE, FMD_TYPE_BOOL, "false" }, 92 { ETM_PROP_NM_SYSLOGD, FMD_TYPE_BOOL, "true" }, 93 { ETM_PROP_NM_FACILITY, FMD_TYPE_STRING, "LOG_DAEMON" }, 94 { NULL, 0, NULL } 95 }; 96 97 98 static const fmd_hdl_info_t fmd_info = { 99 "FMA Event Transport Module", "1.1", &fmd_ops, fmd_props 100 }; 101 102 /* 103 * ----------------------- private consts and defns -------------------------- 104 */ 105 106 /* misc buffer for variable sized protocol header fields */ 107 108 #define ETM_MISC_BUF_SZ (4 * 1024) 109 110 /* try limit for IO operations w/ capped exp backoff sleep on retry */ 111 112 /* 113 * Design_Note: ETM will potentially retry forever IO operations that the 114 * transport fails with EAGAIN (aka EWOULDBLOCK) rather than 115 * giving up after some number of seconds. This avoids 116 * dropping FMA events while the service processor is down, 117 * but at the risk of pending fmdo_recv() forever and 118 * overflowing FMD's event queue for ETM. 119 * A future TBD enhancement would be to always recv 120 * and send each ETM msg in a single read/write() to reduce 121 * the risk of failure between ETM msg hdr and body, 122 * assuming the MTU_SZ is large enough. 123 */ 124 125 #define ETM_TRY_MAX_CNT (MAXINT - 1) 126 #define ETM_TRY_BACKOFF_RATE (4) 127 #define ETM_TRY_BACKOFF_CAP (60) 128 129 /* amount to increment protocol transaction id on each new send */ 130 131 #define ETM_XID_INC (2) 132 133 /* 134 * ---------------------------- global data ---------------------------------- 135 */ 136 137 static fmd_hdl_t 138 *init_hdl = NULL; /* used in mem allocator at init time */ 139 140 static int 141 etm_debug_lvl = 0; /* debug level: 0 is off, 1 is on, 2 is more, etc */ 142 143 static int 144 etm_debug_max_ev_cnt = -1; /* max allowed event count for debugging */ 145 146 static fmd_xprt_t 147 *etm_fmd_xprt = NULL; /* FMD transport layer handle */ 148 149 static pthread_t 150 etm_svr_tid = NULL; /* thread id of connection acceptance server */ 151 152 static volatile int 153 etm_is_dying = 0; /* bool for dying (killing self) */ 154 155 static uint32_t 156 etm_xid_cur = 0; /* current transaction id for sends */ 157 158 static uint32_t 159 etm_xid_ping = 0; /* xid of last CONTROL msg sent requesting ping */ 160 161 static uint32_t 162 etm_xid_ver_negot = 0; /* xid of last CONTROL msg sent requesting ver negot */ 163 164 static uint32_t 165 etm_xid_posted_ev = 0; /* xid of last FMA_EVENT msg/event posted OK to FMD */ 166 167 static uint8_t 168 etm_resp_ver = ETM_PROTO_V1; /* proto ver [negotiated] for msg sends */ 169 170 static log_ctl_t syslog_ctl; /* log(7D) meta-data for each msg */ 171 static int syslog_facility; /* log(7D) facility (part of priority) */ 172 static int syslog_logfd = -1; /* log(7D) file descriptor */ 173 static int syslog_msgfd = -1; /* sysmsg(7D) file descriptor */ 174 static int syslog_file = 0; /* log to syslog_logfd */ 175 static int syslog_cons = 0; /* log to syslog_msgfd */ 176 177 static const struct facility { 178 const char *fac_name; 179 int fac_value; 180 } syslog_facs[] = { 181 { "LOG_DAEMON", LOG_DAEMON }, 182 { "LOG_LOCAL0", LOG_LOCAL0 }, 183 { "LOG_LOCAL1", LOG_LOCAL1 }, 184 { "LOG_LOCAL2", LOG_LOCAL2 }, 185 { "LOG_LOCAL3", LOG_LOCAL3 }, 186 { "LOG_LOCAL4", LOG_LOCAL4 }, 187 { "LOG_LOCAL5", LOG_LOCAL5 }, 188 { "LOG_LOCAL6", LOG_LOCAL6 }, 189 { "LOG_LOCAL7", LOG_LOCAL7 }, 190 { NULL, 0 } 191 }; 192 193 static struct stats { 194 195 /* ETM msg counters */ 196 197 fmd_stat_t etm_rd_hdr_fmaevent; 198 fmd_stat_t etm_rd_hdr_control; 199 fmd_stat_t etm_rd_hdr_alert; 200 fmd_stat_t etm_rd_hdr_response; 201 fmd_stat_t etm_rd_body_fmaevent; 202 fmd_stat_t etm_rd_body_control; 203 fmd_stat_t etm_rd_body_alert; 204 fmd_stat_t etm_rd_body_response; 205 fmd_stat_t etm_wr_hdr_fmaevent; 206 fmd_stat_t etm_wr_hdr_control; 207 fmd_stat_t etm_wr_hdr_response; 208 fmd_stat_t etm_wr_body_fmaevent; 209 fmd_stat_t etm_wr_body_control; 210 fmd_stat_t etm_wr_body_response; 211 212 /* ETM byte counters */ 213 214 fmd_stat_t etm_wr_fmd_bytes; 215 fmd_stat_t etm_rd_fmd_bytes; 216 fmd_stat_t etm_wr_xport_bytes; 217 fmd_stat_t etm_rd_xport_bytes; 218 219 fmd_stat_t etm_magic_drop_bytes; 220 221 /* ETM [dropped] FMA event counters */ 222 223 fmd_stat_t etm_rd_fmd_fmaevent; 224 fmd_stat_t etm_wr_fmd_fmaevent; 225 226 fmd_stat_t etm_rd_drop_fmaevent; 227 fmd_stat_t etm_wr_drop_fmaevent; 228 229 fmd_stat_t etm_rd_dup_fmaevent; 230 fmd_stat_t etm_wr_dup_fmaevent; 231 232 /* ETM protocol failures */ 233 234 fmd_stat_t etm_magic_bad; 235 fmd_stat_t etm_ver_bad; 236 fmd_stat_t etm_msgtype_bad; 237 fmd_stat_t etm_subtype_bad; 238 fmd_stat_t etm_xid_bad; 239 fmd_stat_t etm_fmaeventlen_bad; 240 fmd_stat_t etm_respcode_bad; 241 fmd_stat_t etm_timeout_bad; 242 fmd_stat_t etm_evlens_bad; 243 244 /* IO operation failures */ 245 246 fmd_stat_t etm_xport_wr_fail; 247 fmd_stat_t etm_xport_rd_fail; 248 fmd_stat_t etm_xport_pk_fail; 249 250 /* IO operation retries */ 251 252 fmd_stat_t etm_xport_wr_retry; 253 fmd_stat_t etm_xport_rd_retry; 254 fmd_stat_t etm_xport_pk_retry; 255 256 /* system and library failures */ 257 258 fmd_stat_t etm_os_nvlist_pack_fail; 259 fmd_stat_t etm_os_nvlist_unpack_fail; 260 fmd_stat_t etm_os_nvlist_size_fail; 261 fmd_stat_t etm_os_pthread_create_fail; 262 263 /* xport API failures */ 264 265 fmd_stat_t etm_xport_get_ev_addrv_fail; 266 fmd_stat_t etm_xport_open_fail; 267 fmd_stat_t etm_xport_close_fail; 268 fmd_stat_t etm_xport_accept_fail; 269 fmd_stat_t etm_xport_open_retry; 270 271 /* FMD entry point bad arguments */ 272 273 fmd_stat_t etm_fmd_recv_badargs; 274 fmd_stat_t etm_fmd_init_badargs; 275 fmd_stat_t etm_fmd_fini_badargs; 276 277 /* Alert logging errors */ 278 fmd_stat_t etm_log_err; 279 fmd_stat_t etm_msg_err; 280 281 } etm_stats = { 282 283 /* ETM msg counters */ 284 285 { "etm_rd_hdr_fmaevent", FMD_TYPE_UINT64, 286 "ETM fmaevent msg headers rcvd from xport" }, 287 { "etm_rd_hdr_control", FMD_TYPE_UINT64, 288 "ETM control msg headers rcvd from xport" }, 289 { "etm_rd_hdr_alert", FMD_TYPE_UINT64, 290 "ETM alert msg headers rcvd from xport" }, 291 { "etm_rd_hdr_response", FMD_TYPE_UINT64, 292 "ETM response msg headers rcvd from xport" }, 293 { "etm_rd_body_fmaevent", FMD_TYPE_UINT64, 294 "ETM fmaevent msg bodies rcvd from xport" }, 295 { "etm_rd_body_control", FMD_TYPE_UINT64, 296 "ETM control msg bodies rcvd from xport" }, 297 { "etm_rd_body_alert", FMD_TYPE_UINT64, 298 "ETM alert msg bodies rcvd from xport" }, 299 { "etm_rd_body_response", FMD_TYPE_UINT64, 300 "ETM response msg bodies rcvd from xport" }, 301 { "etm_wr_hdr_fmaevent", FMD_TYPE_UINT64, 302 "ETM fmaevent msg headers sent to xport" }, 303 { "etm_wr_hdr_control", FMD_TYPE_UINT64, 304 "ETM control msg headers sent to xport" }, 305 { "etm_wr_hdr_response", FMD_TYPE_UINT64, 306 "ETM response msg headers sent to xport" }, 307 { "etm_wr_body_fmaevent", FMD_TYPE_UINT64, 308 "ETM fmaevent msg bodies sent to xport" }, 309 { "etm_wr_body_control", FMD_TYPE_UINT64, 310 "ETM control msg bodies sent to xport" }, 311 { "etm_wr_body_response", FMD_TYPE_UINT64, 312 "ETM response msg bodies sent to xport" }, 313 314 /* ETM byte counters */ 315 316 { "etm_wr_fmd_bytes", FMD_TYPE_UINT64, 317 "bytes of FMA events sent to FMD" }, 318 { "etm_rd_fmd_bytes", FMD_TYPE_UINT64, 319 "bytes of FMA events rcvd from FMD" }, 320 { "etm_wr_xport_bytes", FMD_TYPE_UINT64, 321 "bytes of FMA events sent to xport" }, 322 { "etm_rd_xport_bytes", FMD_TYPE_UINT64, 323 "bytes of FMA events rcvd from xport" }, 324 325 { "etm_magic_drop_bytes", FMD_TYPE_UINT64, 326 "bytes dropped from xport pre magic num" }, 327 328 /* ETM [dropped] FMA event counters */ 329 330 { "etm_rd_fmd_fmaevent", FMD_TYPE_UINT64, 331 "FMA events rcvd from FMD" }, 332 { "etm_wr_fmd_fmaevent", FMD_TYPE_UINT64, 333 "FMA events sent to FMD" }, 334 335 { "etm_rd_drop_fmaevent", FMD_TYPE_UINT64, 336 "dropped FMA events from xport" }, 337 { "etm_wr_drop_fmaevent", FMD_TYPE_UINT64, 338 "dropped FMA events to xport" }, 339 340 { "etm_rd_dup_fmaevent", FMD_TYPE_UINT64, 341 "duplicate FMA events from xport" }, 342 { "etm_wr_dup_fmaevent", FMD_TYPE_UINT64, 343 "duplicate FMA events to xport" }, 344 345 /* ETM protocol failures */ 346 347 { "etm_magic_bad", FMD_TYPE_UINT64, 348 "ETM msgs w/ invalid magic num" }, 349 { "etm_ver_bad", FMD_TYPE_UINT64, 350 "ETM msgs w/ invalid protocol version" }, 351 { "etm_msgtype_bad", FMD_TYPE_UINT64, 352 "ETM msgs w/ invalid message type" }, 353 { "etm_subtype_bad", FMD_TYPE_UINT64, 354 "ETM msgs w/ invalid sub type" }, 355 { "etm_xid_bad", FMD_TYPE_UINT64, 356 "ETM msgs w/ unmatched xid" }, 357 { "etm_fmaeventlen_bad", FMD_TYPE_UINT64, 358 "ETM msgs w/ invalid FMA event length" }, 359 { "etm_respcode_bad", FMD_TYPE_UINT64, 360 "ETM msgs w/ invalid response code" }, 361 { "etm_timeout_bad", FMD_TYPE_UINT64, 362 "ETM msgs w/ invalid timeout value" }, 363 { "etm_evlens_bad", FMD_TYPE_UINT64, 364 "ETM msgs w/ too many event lengths" }, 365 366 /* IO operation failures */ 367 368 { "etm_xport_wr_fail", FMD_TYPE_UINT64, 369 "xport write failures" }, 370 { "etm_xport_rd_fail", FMD_TYPE_UINT64, 371 "xport read failures" }, 372 { "etm_xport_pk_fail", FMD_TYPE_UINT64, 373 "xport peek failures" }, 374 375 /* IO operation retries */ 376 377 { "etm_xport_wr_retry", FMD_TYPE_UINT64, 378 "xport write retries" }, 379 { "etm_xport_rd_retry", FMD_TYPE_UINT64, 380 "xport read retries" }, 381 { "etm_xport_pk_retry", FMD_TYPE_UINT64, 382 "xport peek retries" }, 383 384 /* system and library failures */ 385 386 { "etm_os_nvlist_pack_fail", FMD_TYPE_UINT64, 387 "nvlist_pack failures" }, 388 { "etm_os_nvlist_unpack_fail", FMD_TYPE_UINT64, 389 "nvlist_unpack failures" }, 390 { "etm_os_nvlist_size_fail", FMD_TYPE_UINT64, 391 "nvlist_size failures" }, 392 { "etm_os_pthread_create_fail", FMD_TYPE_UINT64, 393 "pthread_create failures" }, 394 395 /* transport API failures */ 396 397 { "etm_xport_get_ev_addrv_fail", FMD_TYPE_UINT64, 398 "xport get event addrv API failures" }, 399 { "etm_xport_open_fail", FMD_TYPE_UINT64, 400 "xport open API failures" }, 401 { "etm_xport_close_fail", FMD_TYPE_UINT64, 402 "xport close API failures" }, 403 { "etm_xport_accept_fail", FMD_TYPE_UINT64, 404 "xport accept API failures" }, 405 { "etm_xport_open_retry", FMD_TYPE_UINT64, 406 "xport open API retries" }, 407 408 /* FMD entry point bad arguments */ 409 410 { "etm_fmd_recv_badargs", FMD_TYPE_UINT64, 411 "bad arguments from fmd_recv entry point" }, 412 { "etm_fmd_init_badargs", FMD_TYPE_UINT64, 413 "bad arguments from fmd_init entry point" }, 414 { "etm_fmd_fini_badargs", FMD_TYPE_UINT64, 415 "bad arguments from fmd_fini entry point" }, 416 417 /* Alert logging errors */ 418 { "etm_log_err", FMD_TYPE_UINT64, 419 "failed to log message to log(7D)" }, 420 { "etm_msg_err", FMD_TYPE_UINT64, 421 "failed to log message to sysmsg(7D)" } 422 }; 423 424 /* 425 * -------------------------- support functions ------------------------------ 426 */ 427 428 /* 429 * Design_Note: Each failure worth reporting to FMD should be done using 430 * a single call to fmd_hdl_error() as it logs an FMA event 431 * for each call. Also be aware that all the fmd_hdl_*() 432 * format strings currently use platform specific *printf() 433 * routines; so "%p" under Solaris does not prepend "0x" to 434 * the outputted hex digits, while Linux and VxWorks do. 435 */ 436 437 /* 438 * etm_show_time - display the current time of day (for debugging) using 439 * the given FMD module handle and annotation string 440 */ 441 442 static void 443 etm_show_time(fmd_hdl_t *hdl, char *note_str) 444 { 445 struct timeval tmv; /* timeval */ 446 447 (void) gettimeofday(&tmv, NULL); 448 fmd_hdl_debug(hdl, "info: %s: cur Unix Epoch time %d.%06d\n", 449 note_str, tmv.tv_sec, tmv.tv_usec); 450 451 } /* etm_show_time() */ 452 453 /* 454 * etm_hexdump - hexdump the given buffer (for debugging) using 455 * the given FMD module handle 456 */ 457 458 static void 459 etm_hexdump(fmd_hdl_t *hdl, void *buf, size_t byte_cnt) 460 { 461 uint8_t *bp; /* byte ptr */ 462 int i, j; /* index */ 463 char cb[80]; /* char buf */ 464 unsigned int n; /* a byte of data for sprintf() */ 465 466 bp = buf; 467 j = 0; 468 469 /* 470 * Design_Note: fmd_hdl_debug() auto adds a newline if missing; 471 * hence cb exists to accumulate a longer string. 472 */ 473 474 for (i = 1; i <= byte_cnt; i++) { 475 n = *bp++; 476 (void) sprintf(&cb[j], "%2.2x ", n); 477 j += 3; 478 /* add a newline every 16 bytes or at the buffer's end */ 479 if (((i % 16) == 0) || (i >= byte_cnt)) { 480 cb[j-1] = '\0'; 481 fmd_hdl_debug(hdl, "%s\n", cb); 482 j = 0; 483 } 484 } /* for each byte in the buffer */ 485 486 } /* etm_hexdump() */ 487 488 /* 489 * etm_sleep - sleep the caller for the given number of seconds, 490 * return 0 or -errno value 491 * 492 * Design_Note: To avoid interfering with FMD's signal mask (SIGALRM) 493 * do not use [Solaris] sleep(3C) and instead use 494 * pthread_cond_wait() or nanosleep(), both of which 495 * are POSIX spec-ed to leave signal masks alone. 496 * This is needed for Solaris and Linux (domain and SP). 497 */ 498 499 static int 500 etm_sleep(unsigned sleep_sec) 501 { 502 struct timespec tms; /* for nanosleep() */ 503 504 tms.tv_sec = sleep_sec; 505 tms.tv_nsec = 0; 506 507 if (nanosleep(&tms, NULL) < 0) { 508 /* errno assumed set by above call */ 509 return (-errno); 510 } 511 return (0); 512 513 } /* etm_sleep() */ 514 515 /* 516 * etm_conn_open - open a connection to the given transport address, 517 * return 0 and the opened connection handle 518 * or -errno value 519 * 520 * caveats: the err_substr is used in failure cases for calling 521 * fmd_hdl_error() 522 */ 523 524 static int 525 etm_conn_open(fmd_hdl_t *hdl, char *err_substr, 526 etm_xport_addr_t addr, etm_xport_conn_t *connp) 527 { 528 etm_xport_conn_t conn; /* connection to return */ 529 int nev; /* -errno value */ 530 531 if ((conn = etm_xport_open(hdl, addr)) == NULL) { 532 nev = (-errno); 533 fmd_hdl_error(hdl, "error: %s: errno %d\n", 534 err_substr, errno); 535 etm_stats.etm_xport_open_fail.fmds_value.ui64++; 536 return (nev); 537 } else { 538 *connp = conn; 539 return (0); 540 } 541 } /* etm_conn_open() */ 542 543 /* 544 * etm_conn_close - close the given connection, 545 * return 0 or -errno value 546 * 547 * caveats: the err_substr is used in failure cases for calling 548 * fmd_hdl_error() 549 */ 550 551 static int 552 etm_conn_close(fmd_hdl_t *hdl, char *err_substr, etm_xport_conn_t conn) 553 { 554 int nev; /* -errno value */ 555 556 if (etm_xport_close(hdl, conn) == NULL) { 557 nev = (-errno); 558 fmd_hdl_error(hdl, "warning: %s: errno %d\n", 559 err_substr, errno); 560 etm_stats.etm_xport_close_fail.fmds_value.ui64++; 561 return (nev); 562 } else { 563 return (0); 564 } 565 } /* etm_conn_close() */ 566 567 /* 568 * etm_io_op - perform an IO operation on the given connection 569 * with the given buffer, 570 * accommodating MTU size and retrying op if needed, 571 * return how many bytes actually done by the op 572 * or -errno value 573 * 574 * caveats: the err_substr is used in failure cases for calling 575 * fmd_hdl_error() 576 */ 577 578 static ssize_t 579 etm_io_op(fmd_hdl_t *hdl, char *err_substr, etm_xport_conn_t conn, 580 void *buf, size_t byte_cnt, int io_op) 581 { 582 ssize_t rv; /* ret val / byte count */ 583 ssize_t n; /* gen use */ 584 uint8_t *datap; /* ptr to data */ 585 size_t mtu_sz; /* MTU size in bytes */ 586 int (*io_func_ptr)(fmd_hdl_t *, etm_xport_conn_t, 587 void *, size_t); 588 size_t io_sz; /* byte count for io_func_ptr */ 589 int try_cnt; /* number of tries done */ 590 int sleep_sec; /* exp backoff sleep period in sec */ 591 int sleep_rv; /* ret val from sleeping */ 592 fmd_stat_t io_retry_stat; /* IO retry stat to update */ 593 fmd_stat_t io_fail_stat; /* IO failure stat to update */ 594 595 if ((conn == NULL) || (buf == NULL)) { 596 return (-EINVAL); 597 } 598 switch (io_op) { 599 case ETM_IO_OP_RD: 600 io_func_ptr = etm_xport_read; 601 io_retry_stat = etm_stats.etm_xport_rd_retry; 602 io_fail_stat = etm_stats.etm_xport_rd_fail; 603 break; 604 case ETM_IO_OP_WR: 605 io_func_ptr = etm_xport_write; 606 io_retry_stat = etm_stats.etm_xport_wr_retry; 607 io_fail_stat = etm_stats.etm_xport_wr_fail; 608 break; 609 default: 610 return (-EINVAL); 611 } 612 if (byte_cnt == 0) { 613 return (byte_cnt); /* nop */ 614 } 615 616 /* obtain [current] MTU size */ 617 618 if ((n = etm_xport_get_opt(hdl, conn, ETM_XPORT_OPT_MTU_SZ)) < 0) { 619 mtu_sz = ETM_XPORT_MTU_SZ_DEF; 620 } else { 621 mtu_sz = n; 622 } 623 624 /* loop until all IO done, try limit exceeded, or real failure */ 625 626 rv = 0; 627 datap = buf; 628 while (rv < byte_cnt) { 629 io_sz = MIN((byte_cnt - rv), mtu_sz); 630 try_cnt = 0; 631 sleep_sec = 0; 632 633 /* when give up, return -errno value even if partly done */ 634 635 while ((n = (*io_func_ptr)(hdl, conn, datap, io_sz)) == 636 (-EAGAIN)) { 637 try_cnt++; 638 if (try_cnt > ETM_TRY_MAX_CNT) { 639 rv = n; 640 goto func_ret; 641 } 642 if (etm_is_dying) { 643 rv = (-EINTR); 644 goto func_ret; 645 } 646 if ((sleep_rv = etm_sleep(sleep_sec)) < 0) { 647 rv = sleep_rv; 648 goto func_ret; 649 } 650 sleep_sec = ((sleep_sec == 0) ? 1 : 651 (sleep_sec * ETM_TRY_BACKOFF_RATE)); 652 sleep_sec = MIN(sleep_sec, ETM_TRY_BACKOFF_CAP); 653 io_retry_stat.fmds_value.ui64++; 654 if (etm_debug_lvl >= 1) { 655 fmd_hdl_debug(hdl, "info: retrying io op %d " 656 "due to EAGAIN\n", io_op); 657 } 658 } /* while trying the io operation */ 659 660 if (etm_is_dying) { 661 rv = (-EINTR); 662 goto func_ret; 663 } 664 if (n < 0) { 665 rv = n; 666 goto func_ret; 667 } 668 /* avoid spinning CPU when given 0 bytes but no error */ 669 if (n == 0) { 670 if ((sleep_rv = etm_sleep(ETM_SLEEP_QUIK)) < 0) { 671 rv = sleep_rv; 672 goto func_ret; 673 } 674 } 675 rv += n; 676 datap += n; 677 } /* while still have more data */ 678 679 func_ret: 680 681 if (rv < 0) { 682 io_fail_stat.fmds_value.ui64++; 683 fmd_hdl_error(hdl, "error: %s: errno %d\n", 684 err_substr, (int)(-rv)); 685 } 686 if (etm_debug_lvl >= 3) { 687 fmd_hdl_debug(hdl, "info: io op %d ret %d of %d\n", 688 io_op, (int)rv, (int)byte_cnt); 689 } 690 return (rv); 691 692 } /* etm_io_op() */ 693 694 /* 695 * etm_magic_read - read the magic number of an ETM message header 696 * from the given connection into the given buffer, 697 * return 0 or -errno value 698 * 699 * Design_Note: This routine is intended to help protect ETM from protocol 700 * framing errors as might be caused by an SP reset / crash in 701 * the middle of an ETM message send; the connection will be 702 * read from for as many bytes as needed until the magic number 703 * is found using a sliding buffer for comparisons. 704 */ 705 706 static int 707 etm_magic_read(fmd_hdl_t *hdl, etm_xport_conn_t conn, uint32_t *magic_ptr) 708 { 709 int rv; /* ret val */ 710 uint32_t magic_num; /* magic number */ 711 int byte_cnt; /* count of bytes read */ 712 uint8_t buf5[4+1]; /* sliding input buffer */ 713 int i, j; /* indices into buf5 */ 714 ssize_t n; /* gen use */ 715 uint8_t drop_buf[1024]; /* dropped bytes buffer */ 716 717 rv = 0; /* assume success */ 718 magic_num = 0; 719 byte_cnt = 0; 720 j = 0; 721 722 /* magic number bytes are sent in network (big endian) order */ 723 724 while (magic_num != ETM_PROTO_MAGIC_NUM) { 725 if ((n = etm_io_op(hdl, "bad io read on magic", 726 conn, &buf5[j], 1, ETM_IO_OP_RD)) < 0) { 727 rv = n; 728 goto func_ret; 729 } 730 byte_cnt++; 731 j = MIN((j + 1), sizeof (magic_num)); 732 if (byte_cnt < sizeof (magic_num)) { 733 continue; 734 } 735 736 if (byte_cnt > sizeof (magic_num)) { 737 etm_stats.etm_magic_drop_bytes.fmds_value.ui64++; 738 i = MIN(byte_cnt - j - 1, sizeof (drop_buf) - 1); 739 drop_buf[i] = buf5[0]; 740 for (i = 0; i < j; i++) { 741 buf5[i] = buf5[i+1]; 742 } /* for sliding the buffer contents */ 743 } 744 (void) memcpy(&magic_num, &buf5[0], sizeof (magic_num)); 745 magic_num = ntohl(magic_num); 746 } /* for reading bytes until find magic number */ 747 748 func_ret: 749 750 if (byte_cnt != sizeof (magic_num)) { 751 fmd_hdl_error(hdl, "warning: bad proto frame " 752 "implies corrupt/lost msg(s)\n"); 753 } 754 if ((byte_cnt > sizeof (magic_num)) && (etm_debug_lvl >= 2)) { 755 i = MIN(byte_cnt - sizeof (magic_num), sizeof (drop_buf)); 756 fmd_hdl_debug(hdl, "info: magic drop hexdump " 757 "first %d of %d bytes:\n", 758 i, byte_cnt - sizeof (magic_num)); 759 etm_hexdump(hdl, drop_buf, i); 760 } 761 762 if (rv == 0) { 763 *magic_ptr = magic_num; 764 } 765 return (rv); 766 767 } /* etm_magic_read() */ 768 769 /* 770 * etm_hdr_read - allocate, read, and validate a [variable sized] 771 * ETM message header from the given connection, 772 * return the allocated ETM message header 773 * (which is guaranteed to be large enough to reuse as a 774 * RESPONSE msg hdr) and its size 775 * or NULL and set errno on failure 776 */ 777 778 static void * 779 etm_hdr_read(fmd_hdl_t *hdl, etm_xport_conn_t conn, size_t *szp) 780 { 781 uint8_t *hdrp; /* ptr to header to return */ 782 size_t hdr_sz; /* sizeof *hdrp */ 783 etm_proto_v1_pp_t pp; /* protocol preamble */ 784 etm_proto_v1_ev_hdr_t *ev_hdrp; /* for FMA_EVENT msg */ 785 etm_proto_v1_ctl_hdr_t *ctl_hdrp; /* for CONTROL msg */ 786 etm_proto_v1_resp_hdr_t *resp_hdrp; /* for RESPONSE msg */ 787 etm_proto_v3_sa_hdr_t *sa_hdrp; /* for ALERT msg */ 788 uint32_t *lenp; /* ptr to FMA event length */ 789 ssize_t i, n; /* gen use */ 790 uint8_t misc_buf[ETM_MISC_BUF_SZ]; /* for var sized hdrs */ 791 int dummy_int; /* dummy var to appease lint */ 792 793 hdrp = NULL; hdr_sz = 0; 794 795 /* read the magic number which starts the protocol preamble */ 796 797 if ((n = etm_magic_read(hdl, conn, &pp.pp_magic_num)) < 0) { 798 errno = (-n); 799 etm_stats.etm_magic_bad.fmds_value.ui64++; 800 return (NULL); 801 } 802 803 /* read the rest of the protocol preamble all at once */ 804 805 if ((n = etm_io_op(hdl, "bad io read on preamble", 806 conn, &pp.pp_proto_ver, 807 sizeof (pp) - sizeof (pp.pp_magic_num), 808 ETM_IO_OP_RD)) < 0) { 809 errno = (-n); 810 return (NULL); 811 } 812 813 /* 814 * Design_Note: The magic number was already network decoded; but 815 * some other preamble fields also need to be decoded, 816 * specifically pp_xid and pp_timeout. The rest of the 817 * preamble fields are byte sized and hence need no 818 * decoding. 819 */ 820 821 pp.pp_xid = ntohl(pp.pp_xid); 822 pp.pp_timeout = ntohl(pp.pp_timeout); 823 824 /* sanity check the header as best we can */ 825 826 if ((pp.pp_proto_ver < ETM_PROTO_V1) || 827 (pp.pp_proto_ver > ETM_PROTO_V3)) { 828 fmd_hdl_error(hdl, "error: bad proto ver %d\n", 829 (int)pp.pp_proto_ver); 830 errno = EPROTO; 831 etm_stats.etm_ver_bad.fmds_value.ui64++; 832 return (NULL); 833 } 834 835 dummy_int = pp.pp_msg_type; 836 if ((dummy_int <= ETM_MSG_TYPE_TOO_LOW) || 837 (dummy_int >= ETM_MSG_TYPE_TOO_BIG)) { 838 fmd_hdl_error(hdl, "error: bad msg type %d", dummy_int); 839 errno = EBADMSG; 840 etm_stats.etm_msgtype_bad.fmds_value.ui64++; 841 return (NULL); 842 } 843 844 /* handle [var sized] hdrs for FMA_EVENT, CONTROL, RESPONSE msgs */ 845 846 if (pp.pp_msg_type == ETM_MSG_TYPE_FMA_EVENT) { 847 848 ev_hdrp = (void*)&misc_buf[0]; 849 hdr_sz = sizeof (*ev_hdrp); 850 (void) memcpy(&ev_hdrp->ev_pp, &pp, sizeof (pp)); 851 852 /* sanity check the header's timeout */ 853 854 if ((ev_hdrp->ev_pp.pp_proto_ver == ETM_PROTO_V1) && 855 (ev_hdrp->ev_pp.pp_timeout != ETM_PROTO_V1_TIMEOUT_NONE)) { 856 errno = ETIME; 857 etm_stats.etm_timeout_bad.fmds_value.ui64++; 858 return (NULL); 859 } 860 861 /* get all FMA event lengths from the header */ 862 863 lenp = (uint32_t *)&ev_hdrp->ev_lens[0]; lenp--; 864 i = -1; /* cnt of length entries preceding 0 */ 865 do { 866 i++; lenp++; 867 if ((sizeof (*ev_hdrp) + (i * sizeof (*lenp))) >= 868 ETM_MISC_BUF_SZ) { 869 errno = E2BIG; /* ridiculous size */ 870 etm_stats.etm_evlens_bad.fmds_value.ui64++; 871 return (NULL); 872 } 873 if ((n = etm_io_op(hdl, "bad io read on event len", 874 conn, lenp, sizeof (*lenp), 875 ETM_IO_OP_RD)) < 0) { 876 errno = (-n); 877 return (NULL); 878 } 879 *lenp = ntohl(*lenp); 880 881 } while (*lenp != 0); 882 i += 0; /* first len already counted by sizeof(ev_hdr) */ 883 hdr_sz += (i * sizeof (*lenp)); 884 885 etm_stats.etm_rd_hdr_fmaevent.fmds_value.ui64++; 886 887 } else if (pp.pp_msg_type == ETM_MSG_TYPE_CONTROL) { 888 889 ctl_hdrp = (void*)&misc_buf[0]; 890 hdr_sz = sizeof (*ctl_hdrp); 891 (void) memcpy(&ctl_hdrp->ctl_pp, &pp, sizeof (pp)); 892 893 /* sanity check the header's sub type (control selector) */ 894 895 if ((ctl_hdrp->ctl_pp.pp_sub_type <= ETM_CTL_SEL_TOO_LOW) || 896 (ctl_hdrp->ctl_pp.pp_sub_type >= ETM_CTL_SEL_TOO_BIG)) { 897 fmd_hdl_error(hdl, "error: bad ctl sub type %d\n", 898 (int)ctl_hdrp->ctl_pp.pp_sub_type); 899 errno = EBADMSG; 900 etm_stats.etm_subtype_bad.fmds_value.ui64++; 901 return (NULL); 902 } 903 904 /* get the control length */ 905 906 if ((n = etm_io_op(hdl, "bad io read on ctl len", 907 conn, &ctl_hdrp->ctl_len, 908 sizeof (ctl_hdrp->ctl_len), 909 ETM_IO_OP_RD)) < 0) { 910 errno = (-n); 911 return (NULL); 912 } 913 914 ctl_hdrp->ctl_len = ntohl(ctl_hdrp->ctl_len); 915 916 etm_stats.etm_rd_hdr_control.fmds_value.ui64++; 917 918 } else if (pp.pp_msg_type == ETM_MSG_TYPE_RESPONSE) { 919 920 resp_hdrp = (void*)&misc_buf[0]; 921 hdr_sz = sizeof (*resp_hdrp); 922 (void) memcpy(&resp_hdrp->resp_pp, &pp, sizeof (pp)); 923 924 /* sanity check the header's timeout */ 925 926 if (resp_hdrp->resp_pp.pp_timeout != 927 ETM_PROTO_V1_TIMEOUT_NONE) { 928 errno = ETIME; 929 etm_stats.etm_timeout_bad.fmds_value.ui64++; 930 return (NULL); 931 } 932 933 /* get the response code and length */ 934 935 if ((n = etm_io_op(hdl, "bad io read on resp code+len", 936 conn, &resp_hdrp->resp_code, 937 sizeof (resp_hdrp->resp_code) + 938 sizeof (resp_hdrp->resp_len), 939 ETM_IO_OP_RD)) < 0) { 940 errno = (-n); 941 return (NULL); 942 } 943 944 resp_hdrp->resp_code = ntohl(resp_hdrp->resp_code); 945 resp_hdrp->resp_len = ntohl(resp_hdrp->resp_len); 946 947 etm_stats.etm_rd_hdr_response.fmds_value.ui64++; 948 949 } else if (pp.pp_msg_type == ETM_MSG_TYPE_ALERT) { 950 951 sa_hdrp = (void*)&misc_buf[0]; 952 hdr_sz = sizeof (*sa_hdrp); 953 (void) memcpy(&sa_hdrp->sa_pp, &pp, sizeof (pp)); 954 955 /* sanity check the header's protocol version */ 956 957 if (sa_hdrp->sa_pp.pp_proto_ver != ETM_PROTO_V3) { 958 errno = EPROTO; 959 etm_stats.etm_ver_bad.fmds_value.ui64++; 960 return (NULL); 961 } 962 963 /* get the priority and length */ 964 965 if ((n = etm_io_op(hdl, "bad io read on sa priority+len", 966 conn, &sa_hdrp->sa_priority, 967 sizeof (sa_hdrp->sa_priority) + 968 sizeof (sa_hdrp->sa_len), 969 ETM_IO_OP_RD)) < 0) { 970 errno = (-n); 971 return (NULL); 972 } 973 974 sa_hdrp->sa_priority = ntohl(sa_hdrp->sa_priority); 975 sa_hdrp->sa_len = ntohl(sa_hdrp->sa_len); 976 977 etm_stats.etm_rd_hdr_alert.fmds_value.ui64++; 978 979 } /* whether we have FMA_EVENT, ALERT, CONTROL, or RESPONSE msg */ 980 981 /* 982 * choose a header size that allows hdr reuse for RESPONSE msgs, 983 * allocate and populate the message header, and 984 * return alloc size to caller for later free of hdrp 985 */ 986 987 hdr_sz = MAX(hdr_sz, sizeof (*resp_hdrp)); 988 hdrp = fmd_hdl_zalloc(hdl, hdr_sz, FMD_SLEEP); 989 (void) memcpy(hdrp, misc_buf, hdr_sz); 990 991 if (etm_debug_lvl >= 3) { 992 fmd_hdl_debug(hdl, "info: msg hdr hexdump %d bytes:\n", 993 hdr_sz); 994 etm_hexdump(hdl, hdrp, hdr_sz); 995 } 996 *szp = hdr_sz; 997 return (hdrp); 998 999 } /* etm_hdr_read() */ 1000 1001 /* 1002 * etm_hdr_write - create and write a [variable sized] ETM message header 1003 * to the given connection appropriate for the given FMA event 1004 * and type of nvlist encoding, 1005 * return the allocated ETM message header and its size 1006 * or NULL and set errno on failure 1007 */ 1008 1009 static void* 1010 etm_hdr_write(fmd_hdl_t *hdl, etm_xport_conn_t conn, nvlist_t *evp, 1011 int encoding, size_t *szp) 1012 { 1013 etm_proto_v1_ev_hdr_t *hdrp; /* for FMA_EVENT msg */ 1014 size_t hdr_sz; /* sizeof *hdrp */ 1015 uint32_t *lenp; /* ptr to FMA event length */ 1016 size_t evsz; /* packed FMA event size */ 1017 ssize_t n; /* gen use */ 1018 1019 /* allocate and populate the message header for 1 FMA event */ 1020 1021 hdr_sz = sizeof (*hdrp) + (1 * sizeof (hdrp->ev_lens[0])); 1022 1023 hdrp = fmd_hdl_zalloc(hdl, hdr_sz, FMD_SLEEP); 1024 1025 /* 1026 * Design_Note: Although the ETM protocol supports it, we do not (yet) 1027 * want responses/ACKs on FMA events that we send. All 1028 * such messages are sent with ETM_PROTO_V1_TIMEOUT_NONE. 1029 */ 1030 1031 hdrp->ev_pp.pp_magic_num = ETM_PROTO_MAGIC_NUM; 1032 hdrp->ev_pp.pp_magic_num = htonl(hdrp->ev_pp.pp_magic_num); 1033 hdrp->ev_pp.pp_proto_ver = ETM_PROTO_V1; 1034 hdrp->ev_pp.pp_msg_type = ETM_MSG_TYPE_FMA_EVENT; 1035 hdrp->ev_pp.pp_sub_type = 0; 1036 hdrp->ev_pp.pp_rsvd_pad = 0; 1037 hdrp->ev_pp.pp_xid = etm_xid_cur; 1038 hdrp->ev_pp.pp_xid = htonl(hdrp->ev_pp.pp_xid); 1039 etm_xid_cur += ETM_XID_INC; 1040 hdrp->ev_pp.pp_timeout = ETM_PROTO_V1_TIMEOUT_NONE; 1041 hdrp->ev_pp.pp_timeout = htonl(hdrp->ev_pp.pp_timeout); 1042 1043 lenp = &hdrp->ev_lens[0]; 1044 1045 if ((n = nvlist_size(evp, &evsz, encoding)) != 0) { 1046 errno = n; 1047 fmd_hdl_free(hdl, hdrp, hdr_sz); 1048 etm_stats.etm_os_nvlist_size_fail.fmds_value.ui64++; 1049 return (NULL); 1050 } 1051 1052 /* indicate 1 FMA event, network encode its length, and 0-terminate */ 1053 1054 *lenp = evsz; *lenp = htonl(*lenp); lenp++; 1055 *lenp = 0; *lenp = htonl(*lenp); lenp++; 1056 1057 /* 1058 * write the network encoded header to the transport, and 1059 * return alloc size to caller for later free 1060 */ 1061 1062 if ((n = etm_io_op(hdl, "bad io write on event hdr", 1063 conn, hdrp, hdr_sz, ETM_IO_OP_WR)) < 0) { 1064 errno = (-n); 1065 fmd_hdl_free(hdl, hdrp, hdr_sz); 1066 return (NULL); 1067 } 1068 1069 *szp = hdr_sz; 1070 return (hdrp); 1071 1072 } /* etm_hdr_write() */ 1073 1074 /* 1075 * etm_post_to_fmd - post the given FMA event to FMD 1076 * via a FMD transport API call, 1077 * return 0 or -errno value 1078 * 1079 * caveats: the FMA event (evp) is freed by FMD, 1080 * thus callers of this function should 1081 * immediately discard any ptr they have to the 1082 * nvlist without freeing or dereferencing it 1083 */ 1084 1085 static int 1086 etm_post_to_fmd(fmd_hdl_t *hdl, nvlist_t *evp) 1087 { 1088 ssize_t ev_sz; /* sizeof *evp */ 1089 1090 (void) nvlist_size(evp, (size_t *)&ev_sz, NV_ENCODE_XDR); 1091 1092 if (etm_debug_lvl >= 2) { 1093 etm_show_time(hdl, "ante ev post"); 1094 } 1095 fmd_xprt_post(hdl, etm_fmd_xprt, evp, 0); 1096 etm_stats.etm_wr_fmd_fmaevent.fmds_value.ui64++; 1097 etm_stats.etm_wr_fmd_bytes.fmds_value.ui64 += ev_sz; 1098 if (etm_debug_lvl >= 1) { 1099 fmd_hdl_debug(hdl, "info: event %p post ok to FMD\n", evp); 1100 } 1101 if (etm_debug_lvl >= 2) { 1102 etm_show_time(hdl, "post ev post"); 1103 } 1104 return (0); 1105 1106 } /* etm_post_to_fmd() */ 1107 1108 /* 1109 * Ideally we would just use syslog(3C) for outputting our messages. 1110 * Unfortunately, as this module is running within the FMA daemon context, 1111 * that would create the situation where this module's openlog() would 1112 * have the monopoly on syslog(3C) for the daemon and all its modules. 1113 * To avoid that situation, this module uses the same logic as the 1114 * syslog-msgs FM module to directly call into the log(7D) and sysmsg(7D) 1115 * devices for syslog and console. 1116 */ 1117 1118 static int 1119 etm_post_to_syslog(fmd_hdl_t *hdl, uint32_t priority, uint32_t body_sz, 1120 uint8_t *body_buf) 1121 { 1122 char *sysmessage; /* Formatted message */ 1123 size_t formatlen; /* maximum length of sysmessage */ 1124 struct strbuf ctl, dat; /* structs pushed to the logfd */ 1125 uint32_t msgid; /* syslog message ID number */ 1126 1127 if ((syslog_file == 0) && (syslog_cons == 0)) { 1128 return (0); 1129 } 1130 1131 if (etm_debug_lvl >= 2) { 1132 etm_show_time(hdl, "ante syslog post"); 1133 } 1134 1135 formatlen = body_sz + 64; /* +64 for prefix strings added below */ 1136 sysmessage = fmd_hdl_zalloc(hdl, formatlen, FMD_SLEEP); 1137 1138 if (syslog_file) { 1139 STRLOG_MAKE_MSGID(body_buf, msgid); 1140 (void) snprintf(sysmessage, formatlen, 1141 "SC Alert: [ID %u FACILITY_AND_PRIORITY] %s", msgid, 1142 body_buf); 1143 1144 syslog_ctl.pri = syslog_facility | priority; 1145 1146 ctl.buf = (void *)&syslog_ctl; 1147 ctl.len = sizeof (syslog_ctl); 1148 1149 dat.buf = sysmessage; 1150 dat.len = strlen(sysmessage) + 1; 1151 1152 if (putmsg(syslog_logfd, &ctl, &dat, 0) != 0) { 1153 fmd_hdl_debug(hdl, "putmsg failed: %s\n", 1154 strerror(errno)); 1155 etm_stats.etm_log_err.fmds_value.ui64++; 1156 } 1157 } 1158 1159 if (syslog_cons) { 1160 (void) snprintf(sysmessage, formatlen, 1161 "SC Alert: %s\r\n", body_buf); 1162 1163 dat.buf = sysmessage; 1164 dat.len = strlen(sysmessage) + 1; 1165 1166 if (write(syslog_msgfd, dat.buf, dat.len) != dat.len) { 1167 fmd_hdl_debug(hdl, "write failed: %s\n", 1168 strerror(errno)); 1169 etm_stats.etm_msg_err.fmds_value.ui64++; 1170 } 1171 } 1172 1173 fmd_hdl_free(hdl, sysmessage, formatlen); 1174 1175 if (etm_debug_lvl >= 2) { 1176 etm_show_time(hdl, "post syslog post"); 1177 } 1178 1179 return (0); 1180 } 1181 1182 1183 /* 1184 * etm_req_ver_negot - send an ETM control message to the other end requesting 1185 * that the ETM protocol version be negotiated/set 1186 */ 1187 1188 static void 1189 etm_req_ver_negot(fmd_hdl_t *hdl) 1190 { 1191 etm_xport_addr_t *addrv; /* default dst addr(s) */ 1192 etm_xport_conn_t conn; /* connection to other end */ 1193 etm_proto_v1_ctl_hdr_t *ctl_hdrp; /* for CONTROL msg */ 1194 size_t hdr_sz; /* sizeof header */ 1195 uint8_t *body_buf; /* msg body buffer */ 1196 uint32_t body_sz; /* sizeof *body_buf */ 1197 ssize_t i; /* gen use */ 1198 1199 /* populate an ETM control msg to send */ 1200 1201 hdr_sz = sizeof (*ctl_hdrp); 1202 body_sz = (3 + 1); /* version bytes plus null byte */ 1203 1204 ctl_hdrp = fmd_hdl_zalloc(hdl, hdr_sz + body_sz, FMD_SLEEP); 1205 1206 ctl_hdrp->ctl_pp.pp_magic_num = htonl(ETM_PROTO_MAGIC_NUM); 1207 ctl_hdrp->ctl_pp.pp_proto_ver = ETM_PROTO_V1; 1208 ctl_hdrp->ctl_pp.pp_msg_type = ETM_MSG_TYPE_CONTROL; 1209 ctl_hdrp->ctl_pp.pp_sub_type = ETM_CTL_SEL_VER_NEGOT_REQ; 1210 ctl_hdrp->ctl_pp.pp_rsvd_pad = 0; 1211 etm_xid_ver_negot = etm_xid_cur; 1212 etm_xid_cur += ETM_XID_INC; 1213 ctl_hdrp->ctl_pp.pp_xid = htonl(etm_xid_ver_negot); 1214 ctl_hdrp->ctl_pp.pp_timeout = htonl(ETM_PROTO_V1_TIMEOUT_FOREVER); 1215 ctl_hdrp->ctl_len = htonl(body_sz); 1216 1217 body_buf = (void*)&ctl_hdrp->ctl_len; 1218 body_buf += sizeof (ctl_hdrp->ctl_len); 1219 *body_buf++ = ETM_PROTO_V3; 1220 *body_buf++ = ETM_PROTO_V2; 1221 *body_buf++ = ETM_PROTO_V1; 1222 *body_buf++ = '\0'; 1223 1224 /* 1225 * open and close a connection to send the ETM control msg 1226 * to any/all of the default dst addrs 1227 */ 1228 1229 if ((addrv = etm_xport_get_ev_addrv(hdl, NULL)) == NULL) { 1230 fmd_hdl_error(hdl, 1231 "error: bad ctl dst addrs errno %d\n", errno); 1232 etm_stats.etm_xport_get_ev_addrv_fail.fmds_value.ui64++; 1233 goto func_ret; 1234 } 1235 1236 for (i = 0; addrv[i] != NULL; i++) { 1237 1238 etm_stats.etm_xport_open_fail.fmds_value.ui64++; 1239 if (etm_conn_open(hdl, "bad conn open during ver negot", 1240 addrv[i], &conn) < 0) { 1241 continue; 1242 } 1243 if (etm_io_op(hdl, "bad io write on ctl hdr+body", 1244 conn, ctl_hdrp, hdr_sz + body_sz, 1245 ETM_IO_OP_WR) >= 0) { 1246 etm_stats.etm_wr_hdr_control.fmds_value.ui64++; 1247 etm_stats.etm_wr_body_control.fmds_value.ui64++; 1248 } 1249 (void) etm_conn_close(hdl, "bad conn close during ver negot", 1250 conn); 1251 1252 } /* foreach dst addr */ 1253 1254 func_ret: 1255 1256 if (addrv != NULL) { 1257 etm_xport_free_addrv(hdl, addrv); 1258 } 1259 fmd_hdl_free(hdl, ctl_hdrp, hdr_sz + body_sz); 1260 1261 } /* etm_req_ver_negot() */ 1262 1263 /* 1264 * Design_Note: We rely on the fact that all message types have 1265 * a common protocol preamble; if this fact should 1266 * ever change it may break the code below. We also 1267 * rely on the fact that FMA_EVENT and CONTROL headers 1268 * returned will be sized large enough to reuse them 1269 * as RESPONSE headers if the remote endpt asked 1270 * for a response via the pp_timeout field. 1271 */ 1272 1273 /* 1274 * etm_maybe_send_response - check the given message header to see 1275 * whether a response has been requested, 1276 * if so then send an appropriate response 1277 * back on the given connection using the 1278 * given response code, 1279 * return 0 or -errno value 1280 */ 1281 1282 static ssize_t 1283 etm_maybe_send_response(fmd_hdl_t *hdl, etm_xport_conn_t conn, 1284 void *hdrp, int32_t resp_code) 1285 { 1286 ssize_t rv; /* ret val */ 1287 etm_proto_v1_pp_t *ppp; /* protocol preamble ptr */ 1288 etm_proto_v1_resp_hdr_t *resp_hdrp; /* for RESPONSE msg */ 1289 uint8_t resp_body[4]; /* response body if needed */ 1290 uint8_t *resp_msg; /* response hdr+body */ 1291 size_t hdr_sz; /* sizeof response hdr */ 1292 uint8_t orig_msg_type; /* orig hdr's message type */ 1293 uint32_t orig_timeout; /* orig hdr's timeout */ 1294 ssize_t n; /* gen use */ 1295 1296 rv = 0; /* default is success */ 1297 ppp = hdrp; 1298 orig_msg_type = ppp->pp_msg_type; 1299 orig_timeout = ppp->pp_timeout; 1300 1301 /* bail out now if no response is to be sent */ 1302 1303 if (orig_timeout == ETM_PROTO_V1_TIMEOUT_NONE) { 1304 return (0); 1305 } /* if a nop */ 1306 1307 if ((orig_msg_type != ETM_MSG_TYPE_FMA_EVENT) && 1308 (orig_msg_type != ETM_MSG_TYPE_ALERT) && 1309 (orig_msg_type != ETM_MSG_TYPE_CONTROL)) { 1310 return (-EINVAL); 1311 } /* if inappropriate hdr for a response msg */ 1312 1313 /* reuse the given header as a response header */ 1314 1315 if (etm_debug_lvl >= 2) { 1316 etm_show_time(hdl, "ante resp send"); 1317 } 1318 1319 resp_hdrp = hdrp; 1320 resp_hdrp->resp_code = resp_code; 1321 resp_hdrp->resp_len = 0; /* default is empty body */ 1322 1323 if ((orig_msg_type == ETM_MSG_TYPE_CONTROL) && 1324 (ppp->pp_sub_type == ETM_CTL_SEL_VER_NEGOT_REQ)) { 1325 resp_body[0] = ETM_PROTO_V2; 1326 resp_body[1] = ETM_PROTO_V3; 1327 resp_body[2] = 0; 1328 resp_hdrp->resp_len = 3; 1329 } /* if should send our/negotiated proto ver in resp body */ 1330 1331 /* respond with the proto ver that was negotiated */ 1332 1333 resp_hdrp->resp_pp.pp_proto_ver = etm_resp_ver; 1334 resp_hdrp->resp_pp.pp_msg_type = ETM_MSG_TYPE_RESPONSE; 1335 resp_hdrp->resp_pp.pp_timeout = ETM_PROTO_V1_TIMEOUT_NONE; 1336 1337 /* 1338 * send the whole response msg in one write, header and body; 1339 * avoid the alloc-and-copy if we can reuse the hdr as the msg, 1340 * ie, if the body is empty 1341 * 1342 * update stats and note the xid associated with last ACKed FMA_EVENT 1343 * known to be successfully posted to FMD to aid duplicate filtering 1344 */ 1345 1346 hdr_sz = sizeof (etm_proto_v1_resp_hdr_t); 1347 1348 resp_msg = hdrp; 1349 if (resp_hdrp->resp_len > 0) { 1350 resp_msg = fmd_hdl_zalloc(hdl, hdr_sz + resp_hdrp->resp_len, 1351 FMD_SLEEP); 1352 (void) memcpy(resp_msg, resp_hdrp, hdr_sz); 1353 (void) memcpy(resp_msg + hdr_sz, resp_body, 1354 resp_hdrp->resp_len); 1355 } 1356 1357 if ((n = etm_io_op(hdl, "bad io write on resp msg", conn, 1358 resp_msg, hdr_sz + resp_hdrp->resp_len, ETM_IO_OP_WR)) < 0) { 1359 rv = n; 1360 goto func_ret; 1361 } 1362 1363 etm_stats.etm_wr_hdr_response.fmds_value.ui64++; 1364 etm_stats.etm_wr_body_response.fmds_value.ui64++; 1365 1366 if ((orig_msg_type == ETM_MSG_TYPE_FMA_EVENT) && 1367 (resp_code >= 0)) { 1368 etm_xid_posted_ev = resp_hdrp->resp_pp.pp_xid; 1369 } 1370 1371 fmd_hdl_debug(hdl, "info: sent V%u RESPONSE msg to xport " 1372 "xid 0x%x code %d len %u\n", 1373 (unsigned int)resp_hdrp->resp_pp.pp_proto_ver, 1374 resp_hdrp->resp_pp.pp_xid, resp_hdrp->resp_code, 1375 resp_hdrp->resp_len); 1376 func_ret: 1377 1378 if (resp_hdrp->resp_len > 0) { 1379 fmd_hdl_free(hdl, resp_msg, hdr_sz + resp_hdrp->resp_len); 1380 } 1381 if (etm_debug_lvl >= 2) { 1382 etm_show_time(hdl, "post resp send"); 1383 } 1384 return (rv); 1385 1386 } /* etm_maybe_send_response() */ 1387 1388 /* 1389 * etm_handle_new_conn - receive an ETM message sent from the other end via 1390 * the given open connection, pull out any FMA events 1391 * and post them to the local FMD (or handle any ETM 1392 * control or response msg); when done, close the 1393 * connection 1394 */ 1395 1396 static void 1397 etm_handle_new_conn(fmd_hdl_t *hdl, etm_xport_conn_t conn) 1398 { 1399 etm_proto_v1_ev_hdr_t *ev_hdrp; /* for FMA_EVENT msg */ 1400 etm_proto_v1_ctl_hdr_t *ctl_hdrp; /* for CONTROL msg */ 1401 etm_proto_v1_resp_hdr_t *resp_hdrp; /* for RESPONSE msg */ 1402 etm_proto_v3_sa_hdr_t *sa_hdrp; /* for ALERT msg */ 1403 int32_t resp_code; /* response code */ 1404 size_t hdr_sz; /* sizeof header */ 1405 uint8_t *body_buf; /* msg body buffer */ 1406 uint32_t body_sz; /* sizeof body_buf */ 1407 uint32_t ev_cnt; /* count of FMA events */ 1408 uint8_t *bp; /* byte ptr within body_buf */ 1409 nvlist_t *evp; /* ptr to unpacked FMA event */ 1410 char *class; /* FMA event class */ 1411 ssize_t i, n; /* gen use */ 1412 1413 if (etm_debug_lvl >= 2) { 1414 etm_show_time(hdl, "ante conn handle"); 1415 } 1416 fmd_hdl_debug(hdl, "info: handling new conn %p\n", conn); 1417 1418 ev_hdrp = NULL; 1419 ctl_hdrp = NULL; 1420 resp_hdrp = NULL; 1421 sa_hdrp = NULL; 1422 body_buf = NULL; 1423 class = NULL; 1424 evp = NULL; 1425 resp_code = 0; /* default is success */ 1426 1427 /* read a network decoded message header from the connection */ 1428 1429 if ((ev_hdrp = etm_hdr_read(hdl, conn, &hdr_sz)) == NULL) { 1430 /* errno assumed set by above call */ 1431 fmd_hdl_error(hdl, "error: FMA event dropped: " 1432 "bad hdr read errno %d\n", errno); 1433 etm_stats.etm_rd_drop_fmaevent.fmds_value.ui64++; 1434 goto func_ret; 1435 } 1436 1437 /* 1438 * handle the message based on its preamble pp_msg_type 1439 * which is known to be valid from etm_hdr_read() checks 1440 */ 1441 1442 if (ev_hdrp->ev_pp.pp_msg_type == ETM_MSG_TYPE_FMA_EVENT) { 1443 1444 fmd_hdl_debug(hdl, "info: rcvd FMA_EVENT msg from xport\n"); 1445 1446 /* allocate buf large enough for whole body / all FMA events */ 1447 1448 body_sz = 0; 1449 for (i = 0; ev_hdrp->ev_lens[i] != 0; i++) { 1450 body_sz += ev_hdrp->ev_lens[i]; 1451 } /* for summing sizes of all FMA events */ 1452 ev_cnt = i; 1453 1454 if (etm_debug_lvl >= 1) { 1455 fmd_hdl_debug(hdl, "info: event lengths %u sum %u\n", 1456 ev_cnt, body_sz); 1457 } 1458 1459 body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP); 1460 1461 /* read all the FMA events at once */ 1462 1463 if ((n = etm_io_op(hdl, "FMA event dropped: " 1464 "bad io read on event bodies", 1465 conn, body_buf, body_sz, 1466 ETM_IO_OP_RD)) < 0) { 1467 etm_stats.etm_rd_drop_fmaevent.fmds_value.ui64++; 1468 goto func_ret; 1469 } 1470 1471 etm_stats.etm_rd_xport_bytes.fmds_value.ui64 += body_sz; 1472 etm_stats.etm_rd_body_fmaevent.fmds_value.ui64 += ev_cnt; 1473 1474 /* 1475 * check for dup msg/xid against last good response sent, 1476 * if a dup then resend response but skip repost to FMD 1477 */ 1478 1479 if (ev_hdrp->ev_pp.pp_xid == etm_xid_posted_ev) { 1480 (void) etm_maybe_send_response(hdl, conn, ev_hdrp, 0); 1481 fmd_hdl_debug(hdl, "info: skipping dup FMA event post " 1482 "xid 0x%x\n", etm_xid_posted_ev); 1483 etm_stats.etm_rd_dup_fmaevent.fmds_value.ui64++; 1484 goto func_ret; 1485 } 1486 1487 /* unpack each FMA event and post it to FMD */ 1488 1489 bp = body_buf; 1490 for (i = 0; ev_hdrp->ev_lens[i] != 0; i++) { 1491 if ((n = nvlist_unpack((char *)bp, 1492 ev_hdrp->ev_lens[i], &evp, 0)) != 0) { 1493 resp_code = (-n); 1494 (void) etm_maybe_send_response(hdl, conn, 1495 ev_hdrp, resp_code); 1496 fmd_hdl_error(hdl, "error: FMA event dropped: " 1497 "bad event body unpack " 1498 "errno %d\n", n); 1499 if (etm_debug_lvl >= 2) { 1500 fmd_hdl_debug(hdl, "info: FMA event " 1501 "hexdump %d bytes:\n", 1502 ev_hdrp->ev_lens[i]); 1503 etm_hexdump(hdl, bp, 1504 ev_hdrp->ev_lens[i]); 1505 } 1506 etm_stats.etm_os_nvlist_unpack_fail.fmds_value. 1507 ui64++; 1508 etm_stats.etm_rd_drop_fmaevent.fmds_value. 1509 ui64++; 1510 bp += ev_hdrp->ev_lens[i]; 1511 continue; 1512 } 1513 if (etm_debug_lvl >= 1) { 1514 (void) nvlist_lookup_string(evp, FM_CLASS, 1515 &class); 1516 if (class == NULL) { 1517 class = "NULL"; 1518 } 1519 fmd_hdl_debug(hdl, "info: FMA event %p " 1520 "class %s\n", evp, class); 1521 } 1522 resp_code = etm_post_to_fmd(hdl, evp); 1523 evp = NULL; 1524 (void) etm_maybe_send_response(hdl, conn, 1525 ev_hdrp, resp_code); 1526 bp += ev_hdrp->ev_lens[i]; 1527 } /* foreach FMA event in the body buffer */ 1528 1529 } else if (ev_hdrp->ev_pp.pp_msg_type == ETM_MSG_TYPE_CONTROL) { 1530 1531 ctl_hdrp = (void*)ev_hdrp; 1532 1533 fmd_hdl_debug(hdl, "info: rcvd CONTROL msg from xport\n"); 1534 if (etm_debug_lvl >= 1) { 1535 fmd_hdl_debug(hdl, "info: ctl sel %d xid 0x%x\n", 1536 (int)ctl_hdrp->ctl_pp.pp_sub_type, 1537 ctl_hdrp->ctl_pp.pp_xid); 1538 } 1539 1540 /* 1541 * if we have a VER_NEGOT_REQ read the body and validate 1542 * the protocol version set contained therein, 1543 * otherwise we have a PING_REQ (which has no body) 1544 * and we [also] fall thru to the code which sends a 1545 * response msg if the pp_timeout field requested one 1546 */ 1547 1548 if (ctl_hdrp->ctl_pp.pp_sub_type == ETM_CTL_SEL_VER_NEGOT_REQ) { 1549 1550 body_sz = ctl_hdrp->ctl_len; 1551 body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP); 1552 1553 if ((n = etm_io_op(hdl, "bad io read on ctl body", 1554 conn, body_buf, body_sz, 1555 ETM_IO_OP_RD)) < 0) { 1556 goto func_ret; 1557 } 1558 1559 /* complain if version set completely incompatible */ 1560 1561 for (i = 0; i < body_sz; i++) { 1562 if ((body_buf[i] == ETM_PROTO_V1) || 1563 (body_buf[i] == ETM_PROTO_V2) || 1564 (body_buf[i] == ETM_PROTO_V3)) { 1565 break; 1566 } 1567 } 1568 if (i >= body_sz) { 1569 etm_stats.etm_ver_bad.fmds_value.ui64++; 1570 resp_code = (-EPROTO); 1571 } 1572 1573 } /* if got version set request */ 1574 1575 etm_stats.etm_rd_body_control.fmds_value.ui64++; 1576 1577 (void) etm_maybe_send_response(hdl, conn, ctl_hdrp, resp_code); 1578 1579 } else if (ev_hdrp->ev_pp.pp_msg_type == ETM_MSG_TYPE_RESPONSE) { 1580 1581 resp_hdrp = (void*)ev_hdrp; 1582 1583 fmd_hdl_debug(hdl, "info: rcvd RESPONSE msg from xport\n"); 1584 if (etm_debug_lvl >= 1) { 1585 fmd_hdl_debug(hdl, "info: resp xid 0x%x\n", 1586 (int)resp_hdrp->resp_pp.pp_xid); 1587 } 1588 1589 body_sz = resp_hdrp->resp_len; 1590 body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP); 1591 1592 if ((n = etm_io_op(hdl, "bad io read on resp len", 1593 conn, body_buf, body_sz, ETM_IO_OP_RD)) < 0) { 1594 goto func_ret; 1595 } 1596 1597 etm_stats.etm_rd_body_response.fmds_value.ui64++; 1598 1599 /* 1600 * look up the xid to interpret the response body 1601 * 1602 * ping is a nop; for ver negot confirm that a supported 1603 * protocol version was negotiated and remember which one 1604 */ 1605 1606 if ((resp_hdrp->resp_pp.pp_xid != etm_xid_ping) && 1607 (resp_hdrp->resp_pp.pp_xid != etm_xid_ver_negot)) { 1608 etm_stats.etm_xid_bad.fmds_value.ui64++; 1609 goto func_ret; 1610 } 1611 1612 if (resp_hdrp->resp_pp.pp_xid == etm_xid_ver_negot) { 1613 if ((body_buf[0] < ETM_PROTO_V1) || 1614 (body_buf[0] > ETM_PROTO_V3)) { 1615 etm_stats.etm_ver_bad.fmds_value.ui64++; 1616 goto func_ret; 1617 } 1618 etm_resp_ver = body_buf[0]; 1619 } /* if have resp to last req to negotiate proto ver */ 1620 1621 } else if (ev_hdrp->ev_pp.pp_msg_type == ETM_MSG_TYPE_ALERT) { 1622 1623 sa_hdrp = (void*)ev_hdrp; 1624 1625 fmd_hdl_debug(hdl, "info: rcvd ALERT msg from xport\n"); 1626 if (etm_debug_lvl >= 1) { 1627 fmd_hdl_debug(hdl, "info: sa sel %d xid 0x%x\n", 1628 (int)sa_hdrp->sa_pp.pp_sub_type, 1629 sa_hdrp->sa_pp.pp_xid); 1630 } 1631 1632 body_sz = sa_hdrp->sa_len; 1633 body_buf = fmd_hdl_zalloc(hdl, body_sz, FMD_SLEEP); 1634 1635 if ((n = etm_io_op(hdl, "bad io read on sa body", 1636 conn, body_buf, body_sz, 1637 ETM_IO_OP_RD)) < 0) { 1638 goto func_ret; 1639 } 1640 1641 etm_stats.etm_rd_body_alert.fmds_value.ui64++; 1642 1643 resp_code = etm_post_to_syslog(hdl, sa_hdrp->sa_priority, 1644 body_sz, body_buf); 1645 (void) etm_maybe_send_response(hdl, conn, sa_hdrp, resp_code); 1646 } /* whether we have a FMA_EVENT, CONTROL, RESPONSE or ALERT msg */ 1647 1648 func_ret: 1649 1650 (void) etm_conn_close(hdl, "bad conn close after msg recv", conn); 1651 1652 if (etm_debug_lvl >= 2) { 1653 etm_show_time(hdl, "post conn handle"); 1654 } 1655 if (ev_hdrp != NULL) { 1656 fmd_hdl_free(hdl, ev_hdrp, hdr_sz); 1657 } 1658 if (body_buf != NULL) { 1659 fmd_hdl_free(hdl, body_buf, body_sz); 1660 } 1661 } /* etm_handle_new_conn() */ 1662 1663 /* 1664 * etm_server - loop forever accepting new connections 1665 * using the given FMD handle, 1666 * handling any ETM msgs sent from the other side 1667 * via each such connection 1668 */ 1669 1670 static void 1671 etm_server(void *arg) 1672 { 1673 etm_xport_conn_t conn; /* connection handle */ 1674 ssize_t n; /* gen use */ 1675 fmd_hdl_t *hdl; /* FMD handle */ 1676 1677 hdl = arg; 1678 1679 fmd_hdl_debug(hdl, "info: connection server starting\n"); 1680 1681 while (!etm_is_dying) { 1682 1683 if ((conn = etm_xport_accept(hdl, NULL)) == NULL) { 1684 /* errno assumed set by above call */ 1685 n = errno; 1686 if (etm_is_dying) { 1687 break; 1688 } 1689 fmd_hdl_debug(hdl, 1690 "error: bad conn accept errno %d\n", n); 1691 etm_stats.etm_xport_accept_fail.fmds_value.ui64++; 1692 /* avoid spinning CPU */ 1693 (void) etm_sleep(ETM_SLEEP_SLOW); 1694 continue; 1695 } 1696 1697 /* 1698 * Design_Note: etm_handle_new_conn() will close the 1699 * accepted connection when done. In early designs 1700 * etm_handle_new_conn() was spawned as a 1701 * separate thread via pthread_create(); 1702 * however fmd_thr_create() constrains thread 1703 * creation to prevent spawned threads from 1704 * spawning others (ie, no grandchildren). 1705 * Hence etm_handle_new_conn() is now called 1706 * as a simple function [w/ multiple args]. 1707 */ 1708 1709 etm_handle_new_conn(hdl, conn); 1710 1711 } /* while accepting new connections until ETM dies */ 1712 1713 /* ETM is dying (probably due to "fmadm unload etm") */ 1714 1715 if (etm_debug_lvl >= 1) { 1716 fmd_hdl_debug(hdl, "info: connection server is dying\n"); 1717 } 1718 } /* etm_server() */ 1719 1720 static void * 1721 etm_init_alloc(size_t size) 1722 { 1723 return (fmd_hdl_alloc(init_hdl, size, FMD_SLEEP)); 1724 } 1725 1726 static void 1727 etm_init_free(void *addr, size_t size) 1728 { 1729 fmd_hdl_free(init_hdl, addr, size); 1730 } 1731 1732 /* 1733 * -------------------------- FMD entry points ------------------------------- 1734 */ 1735 1736 /* 1737 * _fmd_init - initialize the transport for use by ETM and start the 1738 * server daemon to accept new connections to us 1739 * 1740 * FMD will read our *.conf and subscribe us to FMA events 1741 */ 1742 1743 void 1744 _fmd_init(fmd_hdl_t *hdl) 1745 { 1746 struct timeval tmv; /* timeval */ 1747 ssize_t n; /* gen use */ 1748 ldom_hdl_t *lhp; /* ldom pointer */ 1749 const struct facility *fp; /* syslog facility matching */ 1750 char *facname; /* syslog facility property */ 1751 1752 if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) { 1753 return; /* invalid data in configuration file */ 1754 } 1755 1756 fmd_hdl_debug(hdl, "info: module initializing\n"); 1757 1758 init_hdl = hdl; 1759 lhp = ldom_init(etm_init_alloc, etm_init_free); 1760 1761 /* 1762 * Do not load this module if it is runing on a guest ldom. 1763 */ 1764 if (ldom_major_version(lhp) == 1 && ldom_on_service(lhp) == 0) { 1765 fmd_hdl_debug(hdl, "info: module unregistering\n"); 1766 ldom_fini(lhp); 1767 fmd_hdl_unregister(hdl); 1768 return; 1769 } else { 1770 ldom_fini(lhp); 1771 } 1772 1773 /* setup statistics and properties from FMD */ 1774 1775 (void) fmd_stat_create(hdl, FMD_STAT_NOALLOC, 1776 sizeof (etm_stats) / sizeof (fmd_stat_t), 1777 (fmd_stat_t *)&etm_stats); 1778 1779 etm_debug_lvl = fmd_prop_get_int32(hdl, ETM_PROP_NM_DEBUG_LVL); 1780 etm_debug_max_ev_cnt = fmd_prop_get_int32(hdl, 1781 ETM_PROP_NM_DEBUG_MAX_EV_CNT); 1782 fmd_hdl_debug(hdl, "info: etm_debug_lvl %d " 1783 "etm_debug_max_ev_cnt %d\n", 1784 etm_debug_lvl, etm_debug_max_ev_cnt); 1785 1786 /* obtain an FMD transport handle so we can post FMA events later */ 1787 1788 etm_fmd_xprt = fmd_xprt_open(hdl, FMD_XPRT_RDONLY, NULL, NULL); 1789 1790 /* encourage protocol transaction id to be unique per module load */ 1791 1792 (void) gettimeofday(&tmv, NULL); 1793 etm_xid_cur = (uint32_t)((tmv.tv_sec << 10) | 1794 ((unsigned long)tmv.tv_usec >> 10)); 1795 1796 /* 1797 * init the transport, 1798 * start the connection acceptance server, and 1799 * request protocol version be negotiated 1800 */ 1801 1802 if ((n = etm_xport_init(hdl)) != 0) { 1803 fmd_hdl_error(hdl, "error: bad xport init errno %d\n", (-n)); 1804 fmd_hdl_unregister(hdl); 1805 return; 1806 } 1807 1808 /* 1809 * Cache any properties we use every time we receive an alert. 1810 */ 1811 syslog_file = fmd_prop_get_int32(hdl, ETM_PROP_NM_SYSLOGD); 1812 syslog_cons = fmd_prop_get_int32(hdl, ETM_PROP_NM_CONSOLE); 1813 1814 if (syslog_file && (syslog_logfd = open("/dev/conslog", 1815 O_WRONLY | O_NOCTTY)) == -1) { 1816 fmd_hdl_error(hdl, "error: failed to open /dev/conslog"); 1817 syslog_file = 0; 1818 } 1819 1820 if (syslog_cons && (syslog_msgfd = open("/dev/sysmsg", 1821 O_WRONLY | O_NOCTTY)) == -1) { 1822 fmd_hdl_error(hdl, "error: failed to open /dev/sysmsg"); 1823 syslog_cons = 0; 1824 } 1825 1826 if (syslog_file) { 1827 /* 1828 * Look up the value of the "facility" property and use it to 1829 * determine * what syslog LOG_* facility value we use to 1830 * fill in our log_ctl_t. 1831 */ 1832 facname = fmd_prop_get_string(hdl, ETM_PROP_NM_FACILITY); 1833 1834 for (fp = syslog_facs; fp->fac_name != NULL; fp++) { 1835 if (strcmp(fp->fac_name, facname) == 0) 1836 break; 1837 } 1838 1839 if (fp->fac_name == NULL) { 1840 fmd_hdl_error(hdl, "error: invalid 'facility'" 1841 " setting: %s\n", facname); 1842 syslog_file = 0; 1843 } else { 1844 syslog_facility = fp->fac_value; 1845 syslog_ctl.flags = SL_CONSOLE | SL_LOGONLY; 1846 } 1847 1848 fmd_prop_free_string(hdl, facname); 1849 } 1850 1851 etm_svr_tid = fmd_thr_create(hdl, etm_server, hdl); 1852 1853 /* 1854 * Wait a second for the receiver to be ready before start handshaking 1855 * with the SP. 1856 */ 1857 (void) etm_sleep(ETM_SLEEP_QUIK); 1858 1859 etm_req_ver_negot(hdl); 1860 1861 fmd_hdl_debug(hdl, "info: module initialized ok\n"); 1862 1863 } /* _fmd_init() */ 1864 1865 /* 1866 * etm_recv - receive an FMA event from FMD and transport it 1867 * to the remote endpoint 1868 */ 1869 1870 /*ARGSUSED*/ 1871 void 1872 etm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *evp, const char *class) 1873 { 1874 etm_xport_addr_t *addrv; /* vector of transport addresses */ 1875 etm_xport_conn_t conn; /* connection handle */ 1876 etm_proto_v1_ev_hdr_t *hdrp; /* for FMA_EVENT msg */ 1877 ssize_t i, n; /* gen use */ 1878 size_t sz; /* header size */ 1879 size_t buflen; /* size of packed FMA event */ 1880 uint8_t *buf; /* tmp buffer for packed FMA event */ 1881 1882 buflen = 0; 1883 (void) nvlist_size(evp, &buflen, NV_ENCODE_XDR); 1884 etm_stats.etm_rd_fmd_bytes.fmds_value.ui64 += buflen; 1885 etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64++; 1886 1887 fmd_hdl_debug(hdl, "info: rcvd event %p from FMD\n", evp); 1888 fmd_hdl_debug(hdl, "info: cnt %llu class %s\n", 1889 etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64, class); 1890 1891 /* 1892 * if the debug limit has been set, avoid excessive traffic, 1893 * for example, an infinite cycle using loopback nodes 1894 */ 1895 1896 if ((etm_debug_max_ev_cnt >= 0) && 1897 (etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64 > 1898 etm_debug_max_ev_cnt)) { 1899 fmd_hdl_debug(hdl, "warning: FMA event dropped: " 1900 "event %p cnt %llu > debug max %d\n", evp, 1901 etm_stats.etm_rd_fmd_fmaevent.fmds_value.ui64, 1902 etm_debug_max_ev_cnt); 1903 etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; 1904 return; 1905 } 1906 1907 /* allocate a buffer for the FMA event and nvlist pack it */ 1908 1909 buf = fmd_hdl_zalloc(hdl, buflen, FMD_SLEEP); 1910 1911 if ((n = nvlist_pack(evp, (char **)&buf, &buflen, 1912 NV_ENCODE_XDR, 0)) != 0) { 1913 fmd_hdl_error(hdl, "error: FMA event dropped: " 1914 "event pack errno %d\n", n); 1915 etm_stats.etm_os_nvlist_pack_fail.fmds_value.ui64++; 1916 etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; 1917 fmd_hdl_free(hdl, buf, buflen); 1918 return; 1919 } 1920 1921 /* get vector of dst addrs and send the FMA event to each one */ 1922 1923 if ((addrv = etm_xport_get_ev_addrv(hdl, evp)) == NULL) { 1924 fmd_hdl_error(hdl, "error: FMA event dropped: " 1925 "bad event dst addrs errno %d\n", errno); 1926 etm_stats.etm_xport_get_ev_addrv_fail.fmds_value.ui64++; 1927 etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; 1928 fmd_hdl_free(hdl, buf, buflen); 1929 return; 1930 } 1931 1932 for (i = 0; addrv[i] != NULL; i++) { 1933 1934 /* open a new connection to this dst addr */ 1935 1936 if ((n = etm_conn_open(hdl, "FMA event dropped: " 1937 "bad conn open on new ev", 1938 addrv[i], &conn)) < 0) { 1939 etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; 1940 continue; 1941 } 1942 1943 /* write the ETM message header */ 1944 1945 if ((hdrp = etm_hdr_write(hdl, conn, evp, NV_ENCODE_XDR, 1946 &sz)) == NULL) { 1947 fmd_hdl_error(hdl, "error: FMA event dropped: " 1948 "bad hdr write errno %d\n", errno); 1949 (void) etm_conn_close(hdl, 1950 "bad conn close per bad hdr wr", conn); 1951 etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; 1952 continue; 1953 } 1954 1955 fmd_hdl_free(hdl, hdrp, sz); /* header not needed */ 1956 etm_stats.etm_wr_hdr_fmaevent.fmds_value.ui64++; 1957 fmd_hdl_debug(hdl, "info: hdr xport write ok for event %p\n", 1958 evp); 1959 1960 /* write the ETM message body, ie, the packed nvlist */ 1961 1962 if ((n = etm_io_op(hdl, "FMA event dropped: " 1963 "bad io write on event", conn, 1964 buf, buflen, ETM_IO_OP_WR)) < 0) { 1965 (void) etm_conn_close(hdl, 1966 "bad conn close per bad body wr", conn); 1967 etm_stats.etm_wr_drop_fmaevent.fmds_value.ui64++; 1968 continue; 1969 } 1970 1971 etm_stats.etm_wr_body_fmaevent.fmds_value.ui64++; 1972 etm_stats.etm_wr_xport_bytes.fmds_value.ui64 += buflen; 1973 fmd_hdl_debug(hdl, "info: body xport write ok for event %p\n", 1974 evp); 1975 1976 /* close the connection */ 1977 1978 (void) etm_conn_close(hdl, "bad conn close after event send", 1979 conn); 1980 } /* foreach dst addr in the vector */ 1981 1982 etm_xport_free_addrv(hdl, addrv); 1983 fmd_hdl_free(hdl, buf, buflen); 1984 1985 } /* etm_recv() */ 1986 1987 /* 1988 * _fmd_fini - stop the server daemon and teardown the transport 1989 */ 1990 1991 void 1992 _fmd_fini(fmd_hdl_t *hdl) 1993 { 1994 ssize_t n; /* gen use */ 1995 1996 fmd_hdl_debug(hdl, "info: module finializing\n"); 1997 1998 /* kill the connection server ; wait for it to die */ 1999 2000 etm_is_dying = 1; 2001 2002 if (etm_svr_tid != NULL) { 2003 fmd_thr_signal(hdl, etm_svr_tid); 2004 fmd_thr_destroy(hdl, etm_svr_tid); 2005 etm_svr_tid = NULL; 2006 } /* if server thread was successfully created */ 2007 2008 /* teardown the transport */ 2009 2010 if ((n = etm_xport_fini(hdl)) != 0) { 2011 fmd_hdl_error(hdl, "warning: xport fini errno %d\n", (-n)); 2012 } 2013 if (etm_fmd_xprt != NULL) { 2014 fmd_xprt_close(hdl, etm_fmd_xprt); 2015 } 2016 2017 if (syslog_logfd != -1) { 2018 (void) close(syslog_logfd); 2019 } 2020 if (syslog_msgfd != -1) { 2021 (void) close(syslog_msgfd); 2022 } 2023 2024 fmd_hdl_debug(hdl, "info: module finalized ok\n"); 2025 2026 } /* _fmd_fini() */ 2027