1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * Fault Management Architecture (FMA) Resource and Protocol Support 27 * 28 * The routines contained herein provide services to support kernel subsystems 29 * in publishing fault management telemetry (see PSARC 2002/412 and 2003/089). 30 * 31 * Name-Value Pair Lists 32 * 33 * The embodiment of an FMA protocol element (event, fmri or authority) is a 34 * name-value pair list (nvlist_t). FMA-specific nvlist constructor and 35 * destructor functions, fm_nvlist_create() and fm_nvlist_destroy(), are used 36 * to create an nvpair list using custom allocators. Callers may choose to 37 * allocate either from the kernel memory allocator, or from a preallocated 38 * buffer, useful in constrained contexts like high-level interrupt routines. 39 * 40 * Protocol Event and FMRI Construction 41 * 42 * Convenience routines are provided to construct nvlist events according to 43 * the FMA Event Protocol and Naming Schema specification for ereports and 44 * FMRIs for the dev, cpu, hc, mem, legacy hc and de schemes. 45 * 46 * ENA Manipulation 47 * 48 * Routines to generate ENA formats 0, 1 and 2 are available as well as 49 * routines to increment formats 1 and 2. Individual fields within the 50 * ENA are extractable via fm_ena_time_get(), fm_ena_id_get(), 51 * fm_ena_format_get() and fm_ena_gen_get(). 52 */ 53 54 #include <sys/types.h> 55 #include <sys/time.h> 56 #include <sys/list.h> 57 #include <sys/nvpair.h> 58 #include <sys/cmn_err.h> 59 #include <sys/sysmacros.h> 60 #include <sys/sunddi.h> 61 #include <sys/systeminfo.h> 62 #include <sys/fm/util.h> 63 #include <sys/fm/protocol.h> 64 #include <sys/kstat.h> 65 #include <sys/zfs_context.h> 66 #ifdef _KERNEL 67 #include <sys/atomic.h> 68 #include <sys/condvar.h> 69 #include <sys/zfs_ioctl.h> 70 71 static int zfs_zevent_len_max = 512; 72 73 static int zevent_len_cur = 0; 74 static int zevent_waiters = 0; 75 static int zevent_flags = 0; 76 77 /* Num events rate limited since the last time zfs_zevent_next() was called */ 78 static uint64_t ratelimit_dropped = 0; 79 80 /* 81 * The EID (Event IDentifier) is used to uniquely tag a zevent when it is 82 * posted. The posted EIDs are monotonically increasing but not persistent. 83 * They will be reset to the initial value (1) each time the kernel module is 84 * loaded. 85 */ 86 static uint64_t zevent_eid = 0; 87 88 static kmutex_t zevent_lock; 89 static list_t zevent_list; 90 static kcondvar_t zevent_cv; 91 #endif /* _KERNEL */ 92 93 94 /* 95 * Common fault management kstats to record event generation failures 96 */ 97 98 struct erpt_kstat { 99 kstat_named_t erpt_dropped; /* num erpts dropped on post */ 100 kstat_named_t erpt_set_failed; /* num erpt set failures */ 101 kstat_named_t fmri_set_failed; /* num fmri set failures */ 102 kstat_named_t payload_set_failed; /* num payload set failures */ 103 kstat_named_t erpt_duplicates; /* num duplicate erpts */ 104 }; 105 106 static struct erpt_kstat erpt_kstat_data = { 107 { "erpt-dropped", KSTAT_DATA_UINT64 }, 108 { "erpt-set-failed", KSTAT_DATA_UINT64 }, 109 { "fmri-set-failed", KSTAT_DATA_UINT64 }, 110 { "payload-set-failed", KSTAT_DATA_UINT64 }, 111 { "erpt-duplicates", KSTAT_DATA_UINT64 } 112 }; 113 114 kstat_t *fm_ksp; 115 116 #ifdef _KERNEL 117 118 static zevent_t * 119 zfs_zevent_alloc(void) 120 { 121 zevent_t *ev; 122 123 ev = kmem_zalloc(sizeof (zevent_t), KM_SLEEP); 124 125 list_create(&ev->ev_ze_list, sizeof (zfs_zevent_t), 126 offsetof(zfs_zevent_t, ze_node)); 127 list_link_init(&ev->ev_node); 128 129 return (ev); 130 } 131 132 static void 133 zfs_zevent_free(zevent_t *ev) 134 { 135 /* Run provided cleanup callback */ 136 ev->ev_cb(ev->ev_nvl, ev->ev_detector); 137 138 list_destroy(&ev->ev_ze_list); 139 kmem_free(ev, sizeof (zevent_t)); 140 } 141 142 static void 143 zfs_zevent_drain(zevent_t *ev) 144 { 145 zfs_zevent_t *ze; 146 147 ASSERT(MUTEX_HELD(&zevent_lock)); 148 list_remove(&zevent_list, ev); 149 150 /* Remove references to this event in all private file data */ 151 while ((ze = list_head(&ev->ev_ze_list)) != NULL) { 152 list_remove(&ev->ev_ze_list, ze); 153 ze->ze_zevent = NULL; 154 ze->ze_dropped++; 155 } 156 157 zfs_zevent_free(ev); 158 } 159 160 void 161 zfs_zevent_drain_all(int *count) 162 { 163 zevent_t *ev; 164 165 mutex_enter(&zevent_lock); 166 while ((ev = list_head(&zevent_list)) != NULL) 167 zfs_zevent_drain(ev); 168 169 *count = zevent_len_cur; 170 zevent_len_cur = 0; 171 mutex_exit(&zevent_lock); 172 } 173 174 /* 175 * New zevents are inserted at the head. If the maximum queue 176 * length is exceeded a zevent will be drained from the tail. 177 * As part of this any user space processes which currently have 178 * a reference to this zevent_t in their private data will have 179 * this reference set to NULL. 180 */ 181 static void 182 zfs_zevent_insert(zevent_t *ev) 183 { 184 ASSERT(MUTEX_HELD(&zevent_lock)); 185 list_insert_head(&zevent_list, ev); 186 187 if (zevent_len_cur >= zfs_zevent_len_max) 188 zfs_zevent_drain(list_tail(&zevent_list)); 189 else 190 zevent_len_cur++; 191 } 192 193 /* 194 * Post a zevent. The cb will be called when nvl and detector are no longer 195 * needed, i.e.: 196 * - An error happened and a zevent can't be posted. In this case, cb is called 197 * before zfs_zevent_post() returns. 198 * - The event is being drained and freed. 199 */ 200 int 201 zfs_zevent_post(nvlist_t *nvl, nvlist_t *detector, zevent_cb_t *cb) 202 { 203 inode_timespec_t tv; 204 int64_t tv_array[2]; 205 uint64_t eid; 206 size_t nvl_size = 0; 207 zevent_t *ev; 208 int error; 209 210 ASSERT(cb != NULL); 211 212 gethrestime(&tv); 213 tv_array[0] = tv.tv_sec; 214 tv_array[1] = tv.tv_nsec; 215 216 error = nvlist_add_int64_array(nvl, FM_EREPORT_TIME, tv_array, 2); 217 if (error) { 218 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64); 219 goto out; 220 } 221 222 eid = atomic_inc_64_nv(&zevent_eid); 223 error = nvlist_add_uint64(nvl, FM_EREPORT_EID, eid); 224 if (error) { 225 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64); 226 goto out; 227 } 228 229 error = nvlist_size(nvl, &nvl_size, NV_ENCODE_NATIVE); 230 if (error) { 231 atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64); 232 goto out; 233 } 234 235 if (nvl_size > ERPT_DATA_SZ || nvl_size == 0) { 236 atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64); 237 error = EOVERFLOW; 238 goto out; 239 } 240 241 ev = zfs_zevent_alloc(); 242 if (ev == NULL) { 243 atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64); 244 error = ENOMEM; 245 goto out; 246 } 247 248 ev->ev_nvl = nvl; 249 ev->ev_detector = detector; 250 ev->ev_cb = cb; 251 ev->ev_eid = eid; 252 253 mutex_enter(&zevent_lock); 254 zfs_zevent_insert(ev); 255 cv_broadcast(&zevent_cv); 256 mutex_exit(&zevent_lock); 257 258 out: 259 if (error) 260 cb(nvl, detector); 261 262 return (error); 263 } 264 265 void 266 zfs_zevent_track_duplicate(void) 267 { 268 atomic_inc_64(&erpt_kstat_data.erpt_duplicates.value.ui64); 269 } 270 271 static int 272 zfs_zevent_minor_to_state(minor_t minor, zfs_zevent_t **ze) 273 { 274 *ze = zfsdev_get_state(minor, ZST_ZEVENT); 275 if (*ze == NULL) 276 return (SET_ERROR(EBADF)); 277 278 return (0); 279 } 280 281 zfs_file_t * 282 zfs_zevent_fd_hold(int fd, minor_t *minorp, zfs_zevent_t **ze) 283 { 284 zfs_file_t *fp = zfs_file_get(fd); 285 if (fp == NULL) 286 return (NULL); 287 288 int error = zfsdev_getminor(fp, minorp); 289 if (error == 0) 290 error = zfs_zevent_minor_to_state(*minorp, ze); 291 292 if (error) { 293 zfs_zevent_fd_rele(fp); 294 fp = NULL; 295 } 296 297 return (fp); 298 } 299 300 void 301 zfs_zevent_fd_rele(zfs_file_t *fp) 302 { 303 zfs_file_put(fp); 304 } 305 306 /* 307 * Get the next zevent in the stream and place a copy in 'event'. This 308 * may fail with ENOMEM if the encoded nvlist size exceeds the passed 309 * 'event_size'. In this case the stream pointer is not advanced and 310 * and 'event_size' is set to the minimum required buffer size. 311 */ 312 int 313 zfs_zevent_next(zfs_zevent_t *ze, nvlist_t **event, uint64_t *event_size, 314 uint64_t *dropped) 315 { 316 zevent_t *ev; 317 size_t size; 318 int error = 0; 319 320 mutex_enter(&zevent_lock); 321 if (ze->ze_zevent == NULL) { 322 /* New stream start at the beginning/tail */ 323 ev = list_tail(&zevent_list); 324 if (ev == NULL) { 325 error = ENOENT; 326 goto out; 327 } 328 } else { 329 /* 330 * Existing stream continue with the next element and remove 331 * ourselves from the wait queue for the previous element 332 */ 333 ev = list_prev(&zevent_list, ze->ze_zevent); 334 if (ev == NULL) { 335 error = ENOENT; 336 goto out; 337 } 338 } 339 340 VERIFY(nvlist_size(ev->ev_nvl, &size, NV_ENCODE_NATIVE) == 0); 341 if (size > *event_size) { 342 *event_size = size; 343 error = ENOMEM; 344 goto out; 345 } 346 347 if (ze->ze_zevent) 348 list_remove(&ze->ze_zevent->ev_ze_list, ze); 349 350 ze->ze_zevent = ev; 351 list_insert_head(&ev->ev_ze_list, ze); 352 (void) nvlist_dup(ev->ev_nvl, event, KM_SLEEP); 353 *dropped = ze->ze_dropped; 354 355 #ifdef _KERNEL 356 /* Include events dropped due to rate limiting */ 357 *dropped += atomic_swap_64(&ratelimit_dropped, 0); 358 #endif 359 ze->ze_dropped = 0; 360 out: 361 mutex_exit(&zevent_lock); 362 363 return (error); 364 } 365 366 /* 367 * Wait in an interruptible state for any new events. 368 */ 369 int 370 zfs_zevent_wait(zfs_zevent_t *ze) 371 { 372 int error = EAGAIN; 373 374 mutex_enter(&zevent_lock); 375 zevent_waiters++; 376 377 while (error == EAGAIN) { 378 if (zevent_flags & ZEVENT_SHUTDOWN) { 379 error = SET_ERROR(ESHUTDOWN); 380 break; 381 } 382 383 error = cv_wait_sig(&zevent_cv, &zevent_lock); 384 if (signal_pending(current)) { 385 error = SET_ERROR(EINTR); 386 break; 387 } else if (!list_is_empty(&zevent_list)) { 388 error = 0; 389 continue; 390 } else { 391 error = EAGAIN; 392 } 393 } 394 395 zevent_waiters--; 396 mutex_exit(&zevent_lock); 397 398 return (error); 399 } 400 401 /* 402 * The caller may seek to a specific EID by passing that EID. If the EID 403 * is still available in the posted list of events the cursor is positioned 404 * there. Otherwise ENOENT is returned and the cursor is not moved. 405 * 406 * There are two reserved EIDs which may be passed and will never fail. 407 * ZEVENT_SEEK_START positions the cursor at the start of the list, and 408 * ZEVENT_SEEK_END positions the cursor at the end of the list. 409 */ 410 int 411 zfs_zevent_seek(zfs_zevent_t *ze, uint64_t eid) 412 { 413 zevent_t *ev; 414 int error = 0; 415 416 mutex_enter(&zevent_lock); 417 418 if (eid == ZEVENT_SEEK_START) { 419 if (ze->ze_zevent) 420 list_remove(&ze->ze_zevent->ev_ze_list, ze); 421 422 ze->ze_zevent = NULL; 423 goto out; 424 } 425 426 if (eid == ZEVENT_SEEK_END) { 427 if (ze->ze_zevent) 428 list_remove(&ze->ze_zevent->ev_ze_list, ze); 429 430 ev = list_head(&zevent_list); 431 if (ev) { 432 ze->ze_zevent = ev; 433 list_insert_head(&ev->ev_ze_list, ze); 434 } else { 435 ze->ze_zevent = NULL; 436 } 437 438 goto out; 439 } 440 441 for (ev = list_tail(&zevent_list); ev != NULL; 442 ev = list_prev(&zevent_list, ev)) { 443 if (ev->ev_eid == eid) { 444 if (ze->ze_zevent) 445 list_remove(&ze->ze_zevent->ev_ze_list, ze); 446 447 ze->ze_zevent = ev; 448 list_insert_head(&ev->ev_ze_list, ze); 449 break; 450 } 451 } 452 453 if (ev == NULL) 454 error = ENOENT; 455 456 out: 457 mutex_exit(&zevent_lock); 458 459 return (error); 460 } 461 462 void 463 zfs_zevent_init(zfs_zevent_t **zep) 464 { 465 zfs_zevent_t *ze; 466 467 ze = *zep = kmem_zalloc(sizeof (zfs_zevent_t), KM_SLEEP); 468 list_link_init(&ze->ze_node); 469 } 470 471 void 472 zfs_zevent_destroy(zfs_zevent_t *ze) 473 { 474 mutex_enter(&zevent_lock); 475 if (ze->ze_zevent) 476 list_remove(&ze->ze_zevent->ev_ze_list, ze); 477 mutex_exit(&zevent_lock); 478 479 kmem_free(ze, sizeof (zfs_zevent_t)); 480 } 481 #endif /* _KERNEL */ 482 483 /* 484 * Wrappers for FM nvlist allocators 485 */ 486 static void * 487 i_fm_alloc(nv_alloc_t *nva, size_t size) 488 { 489 (void) nva; 490 return (kmem_zalloc(size, KM_SLEEP)); 491 } 492 493 static void 494 i_fm_free(nv_alloc_t *nva, void *buf, size_t size) 495 { 496 (void) nva; 497 kmem_free(buf, size); 498 } 499 500 static const nv_alloc_ops_t fm_mem_alloc_ops = { 501 .nv_ao_init = NULL, 502 .nv_ao_fini = NULL, 503 .nv_ao_alloc = i_fm_alloc, 504 .nv_ao_free = i_fm_free, 505 .nv_ao_reset = NULL 506 }; 507 508 /* 509 * Create and initialize a new nv_alloc_t for a fixed buffer, buf. A pointer 510 * to the newly allocated nv_alloc_t structure is returned upon success or NULL 511 * is returned to indicate that the nv_alloc structure could not be created. 512 */ 513 nv_alloc_t * 514 fm_nva_xcreate(char *buf, size_t bufsz) 515 { 516 nv_alloc_t *nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP); 517 518 if (bufsz == 0 || nv_alloc_init(nvhdl, nv_fixed_ops, buf, bufsz) != 0) { 519 kmem_free(nvhdl, sizeof (nv_alloc_t)); 520 return (NULL); 521 } 522 523 return (nvhdl); 524 } 525 526 /* 527 * Destroy a previously allocated nv_alloc structure. The fixed buffer 528 * associated with nva must be freed by the caller. 529 */ 530 void 531 fm_nva_xdestroy(nv_alloc_t *nva) 532 { 533 nv_alloc_fini(nva); 534 kmem_free(nva, sizeof (nv_alloc_t)); 535 } 536 537 /* 538 * Create a new nv list. A pointer to a new nv list structure is returned 539 * upon success or NULL is returned to indicate that the structure could 540 * not be created. The newly created nv list is created and managed by the 541 * operations installed in nva. If nva is NULL, the default FMA nva 542 * operations are installed and used. 543 * 544 * When called from the kernel and nva == NULL, this function must be called 545 * from passive kernel context with no locks held that can prevent a 546 * sleeping memory allocation from occurring. Otherwise, this function may 547 * be called from other kernel contexts as long a valid nva created via 548 * fm_nva_create() is supplied. 549 */ 550 nvlist_t * 551 fm_nvlist_create(nv_alloc_t *nva) 552 { 553 int hdl_alloced = 0; 554 nvlist_t *nvl; 555 nv_alloc_t *nvhdl; 556 557 if (nva == NULL) { 558 nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP); 559 560 if (nv_alloc_init(nvhdl, &fm_mem_alloc_ops, NULL, 0) != 0) { 561 kmem_free(nvhdl, sizeof (nv_alloc_t)); 562 return (NULL); 563 } 564 hdl_alloced = 1; 565 } else { 566 nvhdl = nva; 567 } 568 569 if (nvlist_xalloc(&nvl, NV_UNIQUE_NAME, nvhdl) != 0) { 570 if (hdl_alloced) { 571 nv_alloc_fini(nvhdl); 572 kmem_free(nvhdl, sizeof (nv_alloc_t)); 573 } 574 return (NULL); 575 } 576 577 return (nvl); 578 } 579 580 /* 581 * Destroy a previously allocated nvlist structure. flag indicates whether 582 * or not the associated nva structure should be freed (FM_NVA_FREE) or 583 * retained (FM_NVA_RETAIN). Retaining the nv alloc structure allows 584 * it to be re-used for future nvlist creation operations. 585 */ 586 void 587 fm_nvlist_destroy(nvlist_t *nvl, int flag) 588 { 589 nv_alloc_t *nva = nvlist_lookup_nv_alloc(nvl); 590 591 nvlist_free(nvl); 592 593 if (nva != NULL) { 594 if (flag == FM_NVA_FREE) 595 fm_nva_xdestroy(nva); 596 } 597 } 598 599 int 600 i_fm_payload_set(nvlist_t *payload, const char *name, va_list ap) 601 { 602 int nelem, ret = 0; 603 data_type_t type; 604 605 while (ret == 0 && name != NULL) { 606 type = va_arg(ap, data_type_t); 607 switch (type) { 608 case DATA_TYPE_BYTE: 609 ret = nvlist_add_byte(payload, name, 610 va_arg(ap, uint_t)); 611 break; 612 case DATA_TYPE_BYTE_ARRAY: 613 nelem = va_arg(ap, int); 614 ret = nvlist_add_byte_array(payload, name, 615 va_arg(ap, uchar_t *), nelem); 616 break; 617 case DATA_TYPE_BOOLEAN_VALUE: 618 ret = nvlist_add_boolean_value(payload, name, 619 va_arg(ap, boolean_t)); 620 break; 621 case DATA_TYPE_BOOLEAN_ARRAY: 622 nelem = va_arg(ap, int); 623 ret = nvlist_add_boolean_array(payload, name, 624 va_arg(ap, boolean_t *), nelem); 625 break; 626 case DATA_TYPE_INT8: 627 ret = nvlist_add_int8(payload, name, 628 va_arg(ap, int)); 629 break; 630 case DATA_TYPE_INT8_ARRAY: 631 nelem = va_arg(ap, int); 632 ret = nvlist_add_int8_array(payload, name, 633 va_arg(ap, int8_t *), nelem); 634 break; 635 case DATA_TYPE_UINT8: 636 ret = nvlist_add_uint8(payload, name, 637 va_arg(ap, uint_t)); 638 break; 639 case DATA_TYPE_UINT8_ARRAY: 640 nelem = va_arg(ap, int); 641 ret = nvlist_add_uint8_array(payload, name, 642 va_arg(ap, uint8_t *), nelem); 643 break; 644 case DATA_TYPE_INT16: 645 ret = nvlist_add_int16(payload, name, 646 va_arg(ap, int)); 647 break; 648 case DATA_TYPE_INT16_ARRAY: 649 nelem = va_arg(ap, int); 650 ret = nvlist_add_int16_array(payload, name, 651 va_arg(ap, int16_t *), nelem); 652 break; 653 case DATA_TYPE_UINT16: 654 ret = nvlist_add_uint16(payload, name, 655 va_arg(ap, uint_t)); 656 break; 657 case DATA_TYPE_UINT16_ARRAY: 658 nelem = va_arg(ap, int); 659 ret = nvlist_add_uint16_array(payload, name, 660 va_arg(ap, uint16_t *), nelem); 661 break; 662 case DATA_TYPE_INT32: 663 ret = nvlist_add_int32(payload, name, 664 va_arg(ap, int32_t)); 665 break; 666 case DATA_TYPE_INT32_ARRAY: 667 nelem = va_arg(ap, int); 668 ret = nvlist_add_int32_array(payload, name, 669 va_arg(ap, int32_t *), nelem); 670 break; 671 case DATA_TYPE_UINT32: 672 ret = nvlist_add_uint32(payload, name, 673 va_arg(ap, uint32_t)); 674 break; 675 case DATA_TYPE_UINT32_ARRAY: 676 nelem = va_arg(ap, int); 677 ret = nvlist_add_uint32_array(payload, name, 678 va_arg(ap, uint32_t *), nelem); 679 break; 680 case DATA_TYPE_INT64: 681 ret = nvlist_add_int64(payload, name, 682 va_arg(ap, int64_t)); 683 break; 684 case DATA_TYPE_INT64_ARRAY: 685 nelem = va_arg(ap, int); 686 ret = nvlist_add_int64_array(payload, name, 687 va_arg(ap, int64_t *), nelem); 688 break; 689 case DATA_TYPE_UINT64: 690 ret = nvlist_add_uint64(payload, name, 691 va_arg(ap, uint64_t)); 692 break; 693 case DATA_TYPE_UINT64_ARRAY: 694 nelem = va_arg(ap, int); 695 ret = nvlist_add_uint64_array(payload, name, 696 va_arg(ap, uint64_t *), nelem); 697 break; 698 case DATA_TYPE_STRING: 699 ret = nvlist_add_string(payload, name, 700 va_arg(ap, char *)); 701 break; 702 case DATA_TYPE_STRING_ARRAY: 703 nelem = va_arg(ap, int); 704 ret = nvlist_add_string_array(payload, name, 705 va_arg(ap, const char **), nelem); 706 break; 707 case DATA_TYPE_NVLIST: 708 ret = nvlist_add_nvlist(payload, name, 709 va_arg(ap, nvlist_t *)); 710 break; 711 case DATA_TYPE_NVLIST_ARRAY: 712 nelem = va_arg(ap, int); 713 ret = nvlist_add_nvlist_array(payload, name, 714 va_arg(ap, const nvlist_t **), nelem); 715 break; 716 default: 717 ret = EINVAL; 718 } 719 720 name = va_arg(ap, char *); 721 } 722 return (ret); 723 } 724 725 void 726 fm_payload_set(nvlist_t *payload, ...) 727 { 728 int ret; 729 const char *name; 730 va_list ap; 731 732 va_start(ap, payload); 733 name = va_arg(ap, char *); 734 ret = i_fm_payload_set(payload, name, ap); 735 va_end(ap); 736 737 if (ret) 738 atomic_inc_64(&erpt_kstat_data.payload_set_failed.value.ui64); 739 } 740 741 /* 742 * Set-up and validate the members of an ereport event according to: 743 * 744 * Member name Type Value 745 * ==================================================== 746 * class string ereport 747 * version uint8_t 0 748 * ena uint64_t <ena> 749 * detector nvlist_t <detector> 750 * ereport-payload nvlist_t <var args> 751 * 752 * We don't actually add a 'version' member to the payload. Really, 753 * the version quoted to us by our caller is that of the category 1 754 * "ereport" event class (and we require FM_EREPORT_VERS0) but 755 * the payload version of the actual leaf class event under construction 756 * may be something else. Callers should supply a version in the varargs, 757 * or (better) we could take two version arguments - one for the 758 * ereport category 1 classification (expect FM_EREPORT_VERS0) and one 759 * for the leaf class. 760 */ 761 void 762 fm_ereport_set(nvlist_t *ereport, int version, const char *erpt_class, 763 uint64_t ena, const nvlist_t *detector, ...) 764 { 765 char ereport_class[FM_MAX_CLASS]; 766 const char *name; 767 va_list ap; 768 int ret; 769 770 if (version != FM_EREPORT_VERS0) { 771 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64); 772 return; 773 } 774 775 (void) snprintf(ereport_class, FM_MAX_CLASS, "%s.%s", 776 FM_EREPORT_CLASS, erpt_class); 777 if (nvlist_add_string(ereport, FM_CLASS, ereport_class) != 0) { 778 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64); 779 return; 780 } 781 782 if (nvlist_add_uint64(ereport, FM_EREPORT_ENA, ena)) { 783 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64); 784 } 785 786 if (nvlist_add_nvlist(ereport, FM_EREPORT_DETECTOR, 787 (nvlist_t *)detector) != 0) { 788 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64); 789 } 790 791 va_start(ap, detector); 792 name = va_arg(ap, const char *); 793 ret = i_fm_payload_set(ereport, name, ap); 794 va_end(ap); 795 796 if (ret) 797 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64); 798 } 799 800 /* 801 * Set-up and validate the members of an hc fmri according to; 802 * 803 * Member name Type Value 804 * =================================================== 805 * version uint8_t 0 806 * auth nvlist_t <auth> 807 * hc-name string <name> 808 * hc-id string <id> 809 * 810 * Note that auth and hc-id are optional members. 811 */ 812 813 #define HC_MAXPAIRS 20 814 #define HC_MAXNAMELEN 50 815 816 static int 817 fm_fmri_hc_set_common(nvlist_t *fmri, int version, const nvlist_t *auth) 818 { 819 if (version != FM_HC_SCHEME_VERSION) { 820 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 821 return (0); 822 } 823 824 if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0 || 825 nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC) != 0) { 826 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 827 return (0); 828 } 829 830 if (auth != NULL && nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY, 831 (nvlist_t *)auth) != 0) { 832 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 833 return (0); 834 } 835 836 return (1); 837 } 838 839 void 840 fm_fmri_hc_set(nvlist_t *fmri, int version, const nvlist_t *auth, 841 nvlist_t *snvl, int npairs, ...) 842 { 843 nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri); 844 nvlist_t *pairs[HC_MAXPAIRS]; 845 va_list ap; 846 int i; 847 848 if (!fm_fmri_hc_set_common(fmri, version, auth)) 849 return; 850 851 npairs = MIN(npairs, HC_MAXPAIRS); 852 853 va_start(ap, npairs); 854 for (i = 0; i < npairs; i++) { 855 const char *name = va_arg(ap, const char *); 856 uint32_t id = va_arg(ap, uint32_t); 857 char idstr[11]; 858 859 (void) snprintf(idstr, sizeof (idstr), "%u", id); 860 861 pairs[i] = fm_nvlist_create(nva); 862 if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 || 863 nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) { 864 atomic_inc_64( 865 &erpt_kstat_data.fmri_set_failed.value.ui64); 866 } 867 } 868 va_end(ap); 869 870 if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, 871 (const nvlist_t **)pairs, npairs) != 0) { 872 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 873 } 874 875 for (i = 0; i < npairs; i++) 876 fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN); 877 878 if (snvl != NULL) { 879 if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) { 880 atomic_inc_64( 881 &erpt_kstat_data.fmri_set_failed.value.ui64); 882 } 883 } 884 } 885 886 void 887 fm_fmri_hc_create(nvlist_t *fmri, int version, const nvlist_t *auth, 888 nvlist_t *snvl, nvlist_t *bboard, int npairs, ...) 889 { 890 nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri); 891 nvlist_t *pairs[HC_MAXPAIRS]; 892 nvlist_t **hcl; 893 uint_t n; 894 int i, j; 895 va_list ap; 896 char *hcname, *hcid; 897 898 if (!fm_fmri_hc_set_common(fmri, version, auth)) 899 return; 900 901 /* 902 * copy the bboard nvpairs to the pairs array 903 */ 904 if (nvlist_lookup_nvlist_array(bboard, FM_FMRI_HC_LIST, &hcl, &n) 905 != 0) { 906 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 907 return; 908 } 909 910 for (i = 0; i < n; i++) { 911 if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_NAME, 912 &hcname) != 0) { 913 atomic_inc_64( 914 &erpt_kstat_data.fmri_set_failed.value.ui64); 915 return; 916 } 917 if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_ID, &hcid) != 0) { 918 atomic_inc_64( 919 &erpt_kstat_data.fmri_set_failed.value.ui64); 920 return; 921 } 922 923 pairs[i] = fm_nvlist_create(nva); 924 if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, hcname) != 0 || 925 nvlist_add_string(pairs[i], FM_FMRI_HC_ID, hcid) != 0) { 926 for (j = 0; j <= i; j++) { 927 if (pairs[j] != NULL) 928 fm_nvlist_destroy(pairs[j], 929 FM_NVA_RETAIN); 930 } 931 atomic_inc_64( 932 &erpt_kstat_data.fmri_set_failed.value.ui64); 933 return; 934 } 935 } 936 937 /* 938 * create the pairs from passed in pairs 939 */ 940 npairs = MIN(npairs, HC_MAXPAIRS); 941 942 va_start(ap, npairs); 943 for (i = n; i < npairs + n; i++) { 944 const char *name = va_arg(ap, const char *); 945 uint32_t id = va_arg(ap, uint32_t); 946 char idstr[11]; 947 (void) snprintf(idstr, sizeof (idstr), "%u", id); 948 pairs[i] = fm_nvlist_create(nva); 949 if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 || 950 nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) { 951 for (j = 0; j <= i; j++) { 952 if (pairs[j] != NULL) 953 fm_nvlist_destroy(pairs[j], 954 FM_NVA_RETAIN); 955 } 956 atomic_inc_64( 957 &erpt_kstat_data.fmri_set_failed.value.ui64); 958 return; 959 } 960 } 961 va_end(ap); 962 963 /* 964 * Create the fmri hc list 965 */ 966 if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, 967 (const nvlist_t **)pairs, npairs + n) != 0) { 968 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 969 return; 970 } 971 972 for (i = 0; i < npairs + n; i++) { 973 fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN); 974 } 975 976 if (snvl != NULL) { 977 if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) { 978 atomic_inc_64( 979 &erpt_kstat_data.fmri_set_failed.value.ui64); 980 return; 981 } 982 } 983 } 984 985 /* 986 * Set-up and validate the members of an dev fmri according to: 987 * 988 * Member name Type Value 989 * ==================================================== 990 * version uint8_t 0 991 * auth nvlist_t <auth> 992 * devpath string <devpath> 993 * [devid] string <devid> 994 * [target-port-l0id] string <target-port-lun0-id> 995 * 996 * Note that auth and devid are optional members. 997 */ 998 void 999 fm_fmri_dev_set(nvlist_t *fmri_dev, int version, const nvlist_t *auth, 1000 const char *devpath, const char *devid, const char *tpl0) 1001 { 1002 int err = 0; 1003 1004 if (version != DEV_SCHEME_VERSION0) { 1005 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1006 return; 1007 } 1008 1009 err |= nvlist_add_uint8(fmri_dev, FM_VERSION, version); 1010 err |= nvlist_add_string(fmri_dev, FM_FMRI_SCHEME, FM_FMRI_SCHEME_DEV); 1011 1012 if (auth != NULL) { 1013 err |= nvlist_add_nvlist(fmri_dev, FM_FMRI_AUTHORITY, 1014 (nvlist_t *)auth); 1015 } 1016 1017 err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_PATH, devpath); 1018 1019 if (devid != NULL) 1020 err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_ID, devid); 1021 1022 if (tpl0 != NULL) 1023 err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_TGTPTLUN0, tpl0); 1024 1025 if (err) 1026 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1027 1028 } 1029 1030 /* 1031 * Set-up and validate the members of an cpu fmri according to: 1032 * 1033 * Member name Type Value 1034 * ==================================================== 1035 * version uint8_t 0 1036 * auth nvlist_t <auth> 1037 * cpuid uint32_t <cpu_id> 1038 * cpumask uint8_t <cpu_mask> 1039 * serial uint64_t <serial_id> 1040 * 1041 * Note that auth, cpumask, serial are optional members. 1042 * 1043 */ 1044 void 1045 fm_fmri_cpu_set(nvlist_t *fmri_cpu, int version, const nvlist_t *auth, 1046 uint32_t cpu_id, uint8_t *cpu_maskp, const char *serial_idp) 1047 { 1048 uint64_t *failedp = &erpt_kstat_data.fmri_set_failed.value.ui64; 1049 1050 if (version < CPU_SCHEME_VERSION1) { 1051 atomic_inc_64(failedp); 1052 return; 1053 } 1054 1055 if (nvlist_add_uint8(fmri_cpu, FM_VERSION, version) != 0) { 1056 atomic_inc_64(failedp); 1057 return; 1058 } 1059 1060 if (nvlist_add_string(fmri_cpu, FM_FMRI_SCHEME, 1061 FM_FMRI_SCHEME_CPU) != 0) { 1062 atomic_inc_64(failedp); 1063 return; 1064 } 1065 1066 if (auth != NULL && nvlist_add_nvlist(fmri_cpu, FM_FMRI_AUTHORITY, 1067 (nvlist_t *)auth) != 0) 1068 atomic_inc_64(failedp); 1069 1070 if (nvlist_add_uint32(fmri_cpu, FM_FMRI_CPU_ID, cpu_id) != 0) 1071 atomic_inc_64(failedp); 1072 1073 if (cpu_maskp != NULL && nvlist_add_uint8(fmri_cpu, FM_FMRI_CPU_MASK, 1074 *cpu_maskp) != 0) 1075 atomic_inc_64(failedp); 1076 1077 if (serial_idp == NULL || nvlist_add_string(fmri_cpu, 1078 FM_FMRI_CPU_SERIAL_ID, (char *)serial_idp) != 0) 1079 atomic_inc_64(failedp); 1080 } 1081 1082 /* 1083 * Set-up and validate the members of a mem according to: 1084 * 1085 * Member name Type Value 1086 * ==================================================== 1087 * version uint8_t 0 1088 * auth nvlist_t <auth> [optional] 1089 * unum string <unum> 1090 * serial string <serial> [optional*] 1091 * offset uint64_t <offset> [optional] 1092 * 1093 * * serial is required if offset is present 1094 */ 1095 void 1096 fm_fmri_mem_set(nvlist_t *fmri, int version, const nvlist_t *auth, 1097 const char *unum, const char *serial, uint64_t offset) 1098 { 1099 if (version != MEM_SCHEME_VERSION0) { 1100 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1101 return; 1102 } 1103 1104 if (!serial && (offset != (uint64_t)-1)) { 1105 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1106 return; 1107 } 1108 1109 if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) { 1110 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1111 return; 1112 } 1113 1114 if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_MEM) != 0) { 1115 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1116 return; 1117 } 1118 1119 if (auth != NULL) { 1120 if (nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY, 1121 (nvlist_t *)auth) != 0) { 1122 atomic_inc_64( 1123 &erpt_kstat_data.fmri_set_failed.value.ui64); 1124 } 1125 } 1126 1127 if (nvlist_add_string(fmri, FM_FMRI_MEM_UNUM, unum) != 0) { 1128 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1129 } 1130 1131 if (serial != NULL) { 1132 if (nvlist_add_string_array(fmri, FM_FMRI_MEM_SERIAL_ID, 1133 (const char **)&serial, 1) != 0) { 1134 atomic_inc_64( 1135 &erpt_kstat_data.fmri_set_failed.value.ui64); 1136 } 1137 if (offset != (uint64_t)-1 && nvlist_add_uint64(fmri, 1138 FM_FMRI_MEM_OFFSET, offset) != 0) { 1139 atomic_inc_64( 1140 &erpt_kstat_data.fmri_set_failed.value.ui64); 1141 } 1142 } 1143 } 1144 1145 void 1146 fm_fmri_zfs_set(nvlist_t *fmri, int version, uint64_t pool_guid, 1147 uint64_t vdev_guid) 1148 { 1149 if (version != ZFS_SCHEME_VERSION0) { 1150 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1151 return; 1152 } 1153 1154 if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) { 1155 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1156 return; 1157 } 1158 1159 if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_ZFS) != 0) { 1160 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1161 return; 1162 } 1163 1164 if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_POOL, pool_guid) != 0) { 1165 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1166 } 1167 1168 if (vdev_guid != 0) { 1169 if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_VDEV, vdev_guid) != 0) { 1170 atomic_inc_64( 1171 &erpt_kstat_data.fmri_set_failed.value.ui64); 1172 } 1173 } 1174 } 1175 1176 uint64_t 1177 fm_ena_increment(uint64_t ena) 1178 { 1179 uint64_t new_ena; 1180 1181 switch (ENA_FORMAT(ena)) { 1182 case FM_ENA_FMT1: 1183 new_ena = ena + (1 << ENA_FMT1_GEN_SHFT); 1184 break; 1185 case FM_ENA_FMT2: 1186 new_ena = ena + (1 << ENA_FMT2_GEN_SHFT); 1187 break; 1188 default: 1189 new_ena = 0; 1190 } 1191 1192 return (new_ena); 1193 } 1194 1195 uint64_t 1196 fm_ena_generate_cpu(uint64_t timestamp, processorid_t cpuid, uchar_t format) 1197 { 1198 uint64_t ena = 0; 1199 1200 switch (format) { 1201 case FM_ENA_FMT1: 1202 if (timestamp) { 1203 ena = (uint64_t)((format & ENA_FORMAT_MASK) | 1204 ((cpuid << ENA_FMT1_CPUID_SHFT) & 1205 ENA_FMT1_CPUID_MASK) | 1206 ((timestamp << ENA_FMT1_TIME_SHFT) & 1207 ENA_FMT1_TIME_MASK)); 1208 } else { 1209 ena = (uint64_t)((format & ENA_FORMAT_MASK) | 1210 ((cpuid << ENA_FMT1_CPUID_SHFT) & 1211 ENA_FMT1_CPUID_MASK) | 1212 ((gethrtime() << ENA_FMT1_TIME_SHFT) & 1213 ENA_FMT1_TIME_MASK)); 1214 } 1215 break; 1216 case FM_ENA_FMT2: 1217 ena = (uint64_t)((format & ENA_FORMAT_MASK) | 1218 ((timestamp << ENA_FMT2_TIME_SHFT) & ENA_FMT2_TIME_MASK)); 1219 break; 1220 default: 1221 break; 1222 } 1223 1224 return (ena); 1225 } 1226 1227 uint64_t 1228 fm_ena_generate(uint64_t timestamp, uchar_t format) 1229 { 1230 uint64_t ena; 1231 1232 kpreempt_disable(); 1233 ena = fm_ena_generate_cpu(timestamp, getcpuid(), format); 1234 kpreempt_enable(); 1235 1236 return (ena); 1237 } 1238 1239 uint64_t 1240 fm_ena_generation_get(uint64_t ena) 1241 { 1242 uint64_t gen; 1243 1244 switch (ENA_FORMAT(ena)) { 1245 case FM_ENA_FMT1: 1246 gen = (ena & ENA_FMT1_GEN_MASK) >> ENA_FMT1_GEN_SHFT; 1247 break; 1248 case FM_ENA_FMT2: 1249 gen = (ena & ENA_FMT2_GEN_MASK) >> ENA_FMT2_GEN_SHFT; 1250 break; 1251 default: 1252 gen = 0; 1253 break; 1254 } 1255 1256 return (gen); 1257 } 1258 1259 uchar_t 1260 fm_ena_format_get(uint64_t ena) 1261 { 1262 1263 return (ENA_FORMAT(ena)); 1264 } 1265 1266 uint64_t 1267 fm_ena_id_get(uint64_t ena) 1268 { 1269 uint64_t id; 1270 1271 switch (ENA_FORMAT(ena)) { 1272 case FM_ENA_FMT1: 1273 id = (ena & ENA_FMT1_ID_MASK) >> ENA_FMT1_ID_SHFT; 1274 break; 1275 case FM_ENA_FMT2: 1276 id = (ena & ENA_FMT2_ID_MASK) >> ENA_FMT2_ID_SHFT; 1277 break; 1278 default: 1279 id = 0; 1280 } 1281 1282 return (id); 1283 } 1284 1285 uint64_t 1286 fm_ena_time_get(uint64_t ena) 1287 { 1288 uint64_t time; 1289 1290 switch (ENA_FORMAT(ena)) { 1291 case FM_ENA_FMT1: 1292 time = (ena & ENA_FMT1_TIME_MASK) >> ENA_FMT1_TIME_SHFT; 1293 break; 1294 case FM_ENA_FMT2: 1295 time = (ena & ENA_FMT2_TIME_MASK) >> ENA_FMT2_TIME_SHFT; 1296 break; 1297 default: 1298 time = 0; 1299 } 1300 1301 return (time); 1302 } 1303 1304 #ifdef _KERNEL 1305 /* 1306 * Helper function to increment ereport dropped count. Used by the event 1307 * rate limiting code to give feedback to the user about how many events were 1308 * rate limited by including them in the 'dropped' count. 1309 */ 1310 void 1311 fm_erpt_dropped_increment(void) 1312 { 1313 atomic_inc_64(&ratelimit_dropped); 1314 } 1315 1316 void 1317 fm_init(void) 1318 { 1319 zevent_len_cur = 0; 1320 zevent_flags = 0; 1321 1322 /* Initialize zevent allocation and generation kstats */ 1323 fm_ksp = kstat_create("zfs", 0, "fm", "misc", KSTAT_TYPE_NAMED, 1324 sizeof (struct erpt_kstat) / sizeof (kstat_named_t), 1325 KSTAT_FLAG_VIRTUAL); 1326 1327 if (fm_ksp != NULL) { 1328 fm_ksp->ks_data = &erpt_kstat_data; 1329 kstat_install(fm_ksp); 1330 } else { 1331 cmn_err(CE_NOTE, "failed to create fm/misc kstat\n"); 1332 } 1333 1334 mutex_init(&zevent_lock, NULL, MUTEX_DEFAULT, NULL); 1335 list_create(&zevent_list, sizeof (zevent_t), 1336 offsetof(zevent_t, ev_node)); 1337 cv_init(&zevent_cv, NULL, CV_DEFAULT, NULL); 1338 1339 zfs_ereport_init(); 1340 } 1341 1342 void 1343 fm_fini(void) 1344 { 1345 int count; 1346 1347 zfs_ereport_fini(); 1348 1349 zfs_zevent_drain_all(&count); 1350 1351 mutex_enter(&zevent_lock); 1352 cv_broadcast(&zevent_cv); 1353 1354 zevent_flags |= ZEVENT_SHUTDOWN; 1355 while (zevent_waiters > 0) { 1356 mutex_exit(&zevent_lock); 1357 schedule(); 1358 mutex_enter(&zevent_lock); 1359 } 1360 mutex_exit(&zevent_lock); 1361 1362 cv_destroy(&zevent_cv); 1363 list_destroy(&zevent_list); 1364 mutex_destroy(&zevent_lock); 1365 1366 if (fm_ksp != NULL) { 1367 kstat_delete(fm_ksp); 1368 fm_ksp = NULL; 1369 } 1370 } 1371 #endif /* _KERNEL */ 1372 1373 ZFS_MODULE_PARAM(zfs_zevent, zfs_zevent_, len_max, INT, ZMOD_RW, 1374 "Max event queue length"); 1375