1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * Fault Management Architecture (FMA) Resource and Protocol Support 27 * 28 * The routines contained herein provide services to support kernel subsystems 29 * in publishing fault management telemetry (see PSARC 2002/412 and 2003/089). 30 * 31 * Name-Value Pair Lists 32 * 33 * The embodiment of an FMA protocol element (event, fmri or authority) is a 34 * name-value pair list (nvlist_t). FMA-specific nvlist constructor and 35 * destructor functions, fm_nvlist_create() and fm_nvlist_destroy(), are used 36 * to create an nvpair list using custom allocators. Callers may choose to 37 * allocate either from the kernel memory allocator, or from a preallocated 38 * buffer, useful in constrained contexts like high-level interrupt routines. 39 * 40 * Protocol Event and FMRI Construction 41 * 42 * Convenience routines are provided to construct nvlist events according to 43 * the FMA Event Protocol and Naming Schema specification for ereports and 44 * FMRIs for the dev, cpu, hc, mem, legacy hc and de schemes. 45 * 46 * ENA Manipulation 47 * 48 * Routines to generate ENA formats 0, 1 and 2 are available as well as 49 * routines to increment formats 1 and 2. Individual fields within the 50 * ENA are extractable via fm_ena_time_get(), fm_ena_id_get(), 51 * fm_ena_format_get() and fm_ena_gen_get(). 52 */ 53 54 #include <sys/types.h> 55 #include <sys/time.h> 56 #include <sys/list.h> 57 #include <sys/nvpair.h> 58 #include <sys/cmn_err.h> 59 #include <sys/sysmacros.h> 60 #include <sys/sunddi.h> 61 #include <sys/systeminfo.h> 62 #include <sys/fm/util.h> 63 #include <sys/fm/protocol.h> 64 #include <sys/kstat.h> 65 #include <sys/zfs_context.h> 66 #ifdef _KERNEL 67 #include <sys/atomic.h> 68 #include <sys/condvar.h> 69 #include <sys/zfs_ioctl.h> 70 71 static uint_t zfs_zevent_len_max = 512; 72 73 static uint_t zevent_len_cur = 0; 74 static int zevent_waiters = 0; 75 static int zevent_flags = 0; 76 77 /* Num events rate limited since the last time zfs_zevent_next() was called */ 78 static uint64_t ratelimit_dropped = 0; 79 80 /* 81 * The EID (Event IDentifier) is used to uniquely tag a zevent when it is 82 * posted. The posted EIDs are monotonically increasing but not persistent. 83 * They will be reset to the initial value (1) each time the kernel module is 84 * loaded. 85 */ 86 static uint64_t zevent_eid = 0; 87 88 static kmutex_t zevent_lock; 89 static list_t zevent_list; 90 static kcondvar_t zevent_cv; 91 #endif /* _KERNEL */ 92 93 94 /* 95 * Common fault management kstats to record event generation failures 96 */ 97 98 struct erpt_kstat { 99 kstat_named_t erpt_dropped; /* num erpts dropped on post */ 100 kstat_named_t erpt_set_failed; /* num erpt set failures */ 101 kstat_named_t fmri_set_failed; /* num fmri set failures */ 102 kstat_named_t payload_set_failed; /* num payload set failures */ 103 kstat_named_t erpt_duplicates; /* num duplicate erpts */ 104 }; 105 106 static struct erpt_kstat erpt_kstat_data = { 107 { "erpt-dropped", KSTAT_DATA_UINT64 }, 108 { "erpt-set-failed", KSTAT_DATA_UINT64 }, 109 { "fmri-set-failed", KSTAT_DATA_UINT64 }, 110 { "payload-set-failed", KSTAT_DATA_UINT64 }, 111 { "erpt-duplicates", KSTAT_DATA_UINT64 } 112 }; 113 114 kstat_t *fm_ksp; 115 116 #ifdef _KERNEL 117 118 static zevent_t * 119 zfs_zevent_alloc(void) 120 { 121 zevent_t *ev; 122 123 ev = kmem_zalloc(sizeof (zevent_t), KM_SLEEP); 124 125 list_create(&ev->ev_ze_list, sizeof (zfs_zevent_t), 126 offsetof(zfs_zevent_t, ze_node)); 127 list_link_init(&ev->ev_node); 128 129 return (ev); 130 } 131 132 static void 133 zfs_zevent_free(zevent_t *ev) 134 { 135 /* Run provided cleanup callback */ 136 ev->ev_cb(ev->ev_nvl, ev->ev_detector); 137 138 list_destroy(&ev->ev_ze_list); 139 kmem_free(ev, sizeof (zevent_t)); 140 } 141 142 static void 143 zfs_zevent_drain(zevent_t *ev) 144 { 145 zfs_zevent_t *ze; 146 147 ASSERT(MUTEX_HELD(&zevent_lock)); 148 list_remove(&zevent_list, ev); 149 150 /* Remove references to this event in all private file data */ 151 while ((ze = list_head(&ev->ev_ze_list)) != NULL) { 152 list_remove(&ev->ev_ze_list, ze); 153 ze->ze_zevent = NULL; 154 ze->ze_dropped++; 155 } 156 157 zfs_zevent_free(ev); 158 } 159 160 void 161 zfs_zevent_drain_all(uint_t *count) 162 { 163 zevent_t *ev; 164 165 mutex_enter(&zevent_lock); 166 while ((ev = list_head(&zevent_list)) != NULL) 167 zfs_zevent_drain(ev); 168 169 *count = zevent_len_cur; 170 zevent_len_cur = 0; 171 mutex_exit(&zevent_lock); 172 } 173 174 /* 175 * New zevents are inserted at the head. If the maximum queue 176 * length is exceeded a zevent will be drained from the tail. 177 * As part of this any user space processes which currently have 178 * a reference to this zevent_t in their private data will have 179 * this reference set to NULL. 180 */ 181 static void 182 zfs_zevent_insert(zevent_t *ev) 183 { 184 ASSERT(MUTEX_HELD(&zevent_lock)); 185 list_insert_head(&zevent_list, ev); 186 187 if (zevent_len_cur >= zfs_zevent_len_max) 188 zfs_zevent_drain(list_tail(&zevent_list)); 189 else 190 zevent_len_cur++; 191 } 192 193 /* 194 * Post a zevent. The cb will be called when nvl and detector are no longer 195 * needed, i.e.: 196 * - An error happened and a zevent can't be posted. In this case, cb is called 197 * before zfs_zevent_post() returns. 198 * - The event is being drained and freed. 199 */ 200 int 201 zfs_zevent_post(nvlist_t *nvl, nvlist_t *detector, zevent_cb_t *cb) 202 { 203 inode_timespec_t tv; 204 int64_t tv_array[2]; 205 uint64_t eid; 206 size_t nvl_size = 0; 207 zevent_t *ev; 208 int error; 209 210 ASSERT(cb != NULL); 211 212 gethrestime(&tv); 213 tv_array[0] = tv.tv_sec; 214 tv_array[1] = tv.tv_nsec; 215 216 error = nvlist_add_int64_array(nvl, FM_EREPORT_TIME, tv_array, 2); 217 if (error) { 218 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64); 219 goto out; 220 } 221 222 eid = atomic_inc_64_nv(&zevent_eid); 223 error = nvlist_add_uint64(nvl, FM_EREPORT_EID, eid); 224 if (error) { 225 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64); 226 goto out; 227 } 228 229 error = nvlist_size(nvl, &nvl_size, NV_ENCODE_NATIVE); 230 if (error) { 231 atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64); 232 goto out; 233 } 234 235 if (nvl_size > ERPT_DATA_SZ || nvl_size == 0) { 236 atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64); 237 error = EOVERFLOW; 238 goto out; 239 } 240 241 ev = zfs_zevent_alloc(); 242 if (ev == NULL) { 243 atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64); 244 error = ENOMEM; 245 goto out; 246 } 247 248 ev->ev_nvl = nvl; 249 ev->ev_detector = detector; 250 ev->ev_cb = cb; 251 ev->ev_eid = eid; 252 253 mutex_enter(&zevent_lock); 254 zfs_zevent_insert(ev); 255 cv_broadcast(&zevent_cv); 256 mutex_exit(&zevent_lock); 257 258 out: 259 if (error) 260 cb(nvl, detector); 261 262 return (error); 263 } 264 265 void 266 zfs_zevent_track_duplicate(void) 267 { 268 atomic_inc_64(&erpt_kstat_data.erpt_duplicates.value.ui64); 269 } 270 271 static int 272 zfs_zevent_minor_to_state(minor_t minor, zfs_zevent_t **ze) 273 { 274 *ze = zfsdev_get_state(minor, ZST_ZEVENT); 275 if (*ze == NULL) 276 return (SET_ERROR(EBADF)); 277 278 return (0); 279 } 280 281 zfs_file_t * 282 zfs_zevent_fd_hold(int fd, minor_t *minorp, zfs_zevent_t **ze) 283 { 284 zfs_file_t *fp = zfs_file_get(fd); 285 if (fp == NULL) 286 return (NULL); 287 288 int error = zfsdev_getminor(fp, minorp); 289 if (error == 0) 290 error = zfs_zevent_minor_to_state(*minorp, ze); 291 292 if (error) { 293 zfs_zevent_fd_rele(fp); 294 fp = NULL; 295 } 296 297 return (fp); 298 } 299 300 void 301 zfs_zevent_fd_rele(zfs_file_t *fp) 302 { 303 zfs_file_put(fp); 304 } 305 306 /* 307 * Get the next zevent in the stream and place a copy in 'event'. This 308 * may fail with ENOMEM if the encoded nvlist size exceeds the passed 309 * 'event_size'. In this case the stream pointer is not advanced and 310 * and 'event_size' is set to the minimum required buffer size. 311 */ 312 int 313 zfs_zevent_next(zfs_zevent_t *ze, nvlist_t **event, uint64_t *event_size, 314 uint64_t *dropped) 315 { 316 zevent_t *ev; 317 size_t size; 318 int error = 0; 319 320 mutex_enter(&zevent_lock); 321 if (ze->ze_zevent == NULL) { 322 /* New stream start at the beginning/tail */ 323 ev = list_tail(&zevent_list); 324 if (ev == NULL) { 325 error = ENOENT; 326 goto out; 327 } 328 } else { 329 /* 330 * Existing stream continue with the next element and remove 331 * ourselves from the wait queue for the previous element 332 */ 333 ev = list_prev(&zevent_list, ze->ze_zevent); 334 if (ev == NULL) { 335 error = ENOENT; 336 goto out; 337 } 338 } 339 340 VERIFY(nvlist_size(ev->ev_nvl, &size, NV_ENCODE_NATIVE) == 0); 341 if (size > *event_size) { 342 *event_size = size; 343 error = ENOMEM; 344 goto out; 345 } 346 347 if (ze->ze_zevent) 348 list_remove(&ze->ze_zevent->ev_ze_list, ze); 349 350 ze->ze_zevent = ev; 351 list_insert_head(&ev->ev_ze_list, ze); 352 (void) nvlist_dup(ev->ev_nvl, event, KM_SLEEP); 353 *dropped = ze->ze_dropped; 354 355 #ifdef _KERNEL 356 /* Include events dropped due to rate limiting */ 357 *dropped += atomic_swap_64(&ratelimit_dropped, 0); 358 #endif 359 ze->ze_dropped = 0; 360 out: 361 mutex_exit(&zevent_lock); 362 363 return (error); 364 } 365 366 /* 367 * Wait in an interruptible state for any new events. 368 */ 369 int 370 zfs_zevent_wait(zfs_zevent_t *ze) 371 { 372 int error = EAGAIN; 373 374 mutex_enter(&zevent_lock); 375 zevent_waiters++; 376 377 while (error == EAGAIN) { 378 if (zevent_flags & ZEVENT_SHUTDOWN) { 379 error = SET_ERROR(ESHUTDOWN); 380 break; 381 } 382 383 if (cv_wait_sig(&zevent_cv, &zevent_lock) == 0) { 384 error = SET_ERROR(EINTR); 385 break; 386 } else if (!list_is_empty(&zevent_list)) { 387 error = 0; 388 continue; 389 } else { 390 error = EAGAIN; 391 } 392 } 393 394 zevent_waiters--; 395 mutex_exit(&zevent_lock); 396 397 return (error); 398 } 399 400 /* 401 * The caller may seek to a specific EID by passing that EID. If the EID 402 * is still available in the posted list of events the cursor is positioned 403 * there. Otherwise ENOENT is returned and the cursor is not moved. 404 * 405 * There are two reserved EIDs which may be passed and will never fail. 406 * ZEVENT_SEEK_START positions the cursor at the start of the list, and 407 * ZEVENT_SEEK_END positions the cursor at the end of the list. 408 */ 409 int 410 zfs_zevent_seek(zfs_zevent_t *ze, uint64_t eid) 411 { 412 zevent_t *ev; 413 int error = 0; 414 415 mutex_enter(&zevent_lock); 416 417 if (eid == ZEVENT_SEEK_START) { 418 if (ze->ze_zevent) 419 list_remove(&ze->ze_zevent->ev_ze_list, ze); 420 421 ze->ze_zevent = NULL; 422 goto out; 423 } 424 425 if (eid == ZEVENT_SEEK_END) { 426 if (ze->ze_zevent) 427 list_remove(&ze->ze_zevent->ev_ze_list, ze); 428 429 ev = list_head(&zevent_list); 430 if (ev) { 431 ze->ze_zevent = ev; 432 list_insert_head(&ev->ev_ze_list, ze); 433 } else { 434 ze->ze_zevent = NULL; 435 } 436 437 goto out; 438 } 439 440 for (ev = list_tail(&zevent_list); ev != NULL; 441 ev = list_prev(&zevent_list, ev)) { 442 if (ev->ev_eid == eid) { 443 if (ze->ze_zevent) 444 list_remove(&ze->ze_zevent->ev_ze_list, ze); 445 446 ze->ze_zevent = ev; 447 list_insert_head(&ev->ev_ze_list, ze); 448 break; 449 } 450 } 451 452 if (ev == NULL) 453 error = ENOENT; 454 455 out: 456 mutex_exit(&zevent_lock); 457 458 return (error); 459 } 460 461 void 462 zfs_zevent_init(zfs_zevent_t **zep) 463 { 464 zfs_zevent_t *ze; 465 466 ze = *zep = kmem_zalloc(sizeof (zfs_zevent_t), KM_SLEEP); 467 list_link_init(&ze->ze_node); 468 } 469 470 void 471 zfs_zevent_destroy(zfs_zevent_t *ze) 472 { 473 mutex_enter(&zevent_lock); 474 if (ze->ze_zevent) 475 list_remove(&ze->ze_zevent->ev_ze_list, ze); 476 mutex_exit(&zevent_lock); 477 478 kmem_free(ze, sizeof (zfs_zevent_t)); 479 } 480 #endif /* _KERNEL */ 481 482 /* 483 * Wrappers for FM nvlist allocators 484 */ 485 static void * 486 i_fm_alloc(nv_alloc_t *nva, size_t size) 487 { 488 (void) nva; 489 return (kmem_alloc(size, KM_SLEEP)); 490 } 491 492 static void 493 i_fm_free(nv_alloc_t *nva, void *buf, size_t size) 494 { 495 (void) nva; 496 kmem_free(buf, size); 497 } 498 499 static const nv_alloc_ops_t fm_mem_alloc_ops = { 500 .nv_ao_init = NULL, 501 .nv_ao_fini = NULL, 502 .nv_ao_alloc = i_fm_alloc, 503 .nv_ao_free = i_fm_free, 504 .nv_ao_reset = NULL 505 }; 506 507 /* 508 * Create and initialize a new nv_alloc_t for a fixed buffer, buf. A pointer 509 * to the newly allocated nv_alloc_t structure is returned upon success or NULL 510 * is returned to indicate that the nv_alloc structure could not be created. 511 */ 512 nv_alloc_t * 513 fm_nva_xcreate(char *buf, size_t bufsz) 514 { 515 nv_alloc_t *nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP); 516 517 if (bufsz == 0 || nv_alloc_init(nvhdl, nv_fixed_ops, buf, bufsz) != 0) { 518 kmem_free(nvhdl, sizeof (nv_alloc_t)); 519 return (NULL); 520 } 521 522 return (nvhdl); 523 } 524 525 /* 526 * Destroy a previously allocated nv_alloc structure. The fixed buffer 527 * associated with nva must be freed by the caller. 528 */ 529 void 530 fm_nva_xdestroy(nv_alloc_t *nva) 531 { 532 nv_alloc_fini(nva); 533 kmem_free(nva, sizeof (nv_alloc_t)); 534 } 535 536 /* 537 * Create a new nv list. A pointer to a new nv list structure is returned 538 * upon success or NULL is returned to indicate that the structure could 539 * not be created. The newly created nv list is created and managed by the 540 * operations installed in nva. If nva is NULL, the default FMA nva 541 * operations are installed and used. 542 * 543 * When called from the kernel and nva == NULL, this function must be called 544 * from passive kernel context with no locks held that can prevent a 545 * sleeping memory allocation from occurring. Otherwise, this function may 546 * be called from other kernel contexts as long a valid nva created via 547 * fm_nva_create() is supplied. 548 */ 549 nvlist_t * 550 fm_nvlist_create(nv_alloc_t *nva) 551 { 552 int hdl_alloced = 0; 553 nvlist_t *nvl; 554 nv_alloc_t *nvhdl; 555 556 if (nva == NULL) { 557 nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP); 558 559 if (nv_alloc_init(nvhdl, &fm_mem_alloc_ops, NULL, 0) != 0) { 560 kmem_free(nvhdl, sizeof (nv_alloc_t)); 561 return (NULL); 562 } 563 hdl_alloced = 1; 564 } else { 565 nvhdl = nva; 566 } 567 568 if (nvlist_xalloc(&nvl, NV_UNIQUE_NAME, nvhdl) != 0) { 569 if (hdl_alloced) { 570 nv_alloc_fini(nvhdl); 571 kmem_free(nvhdl, sizeof (nv_alloc_t)); 572 } 573 return (NULL); 574 } 575 576 return (nvl); 577 } 578 579 /* 580 * Destroy a previously allocated nvlist structure. flag indicates whether 581 * or not the associated nva structure should be freed (FM_NVA_FREE) or 582 * retained (FM_NVA_RETAIN). Retaining the nv alloc structure allows 583 * it to be re-used for future nvlist creation operations. 584 */ 585 void 586 fm_nvlist_destroy(nvlist_t *nvl, int flag) 587 { 588 nv_alloc_t *nva = nvlist_lookup_nv_alloc(nvl); 589 590 nvlist_free(nvl); 591 592 if (nva != NULL) { 593 if (flag == FM_NVA_FREE) 594 fm_nva_xdestroy(nva); 595 } 596 } 597 598 int 599 i_fm_payload_set(nvlist_t *payload, const char *name, va_list ap) 600 { 601 int nelem, ret = 0; 602 data_type_t type; 603 604 while (ret == 0 && name != NULL) { 605 type = va_arg(ap, data_type_t); 606 switch (type) { 607 case DATA_TYPE_BYTE: 608 ret = nvlist_add_byte(payload, name, 609 va_arg(ap, uint_t)); 610 break; 611 case DATA_TYPE_BYTE_ARRAY: 612 nelem = va_arg(ap, int); 613 ret = nvlist_add_byte_array(payload, name, 614 va_arg(ap, uchar_t *), nelem); 615 break; 616 case DATA_TYPE_BOOLEAN_VALUE: 617 ret = nvlist_add_boolean_value(payload, name, 618 va_arg(ap, boolean_t)); 619 break; 620 case DATA_TYPE_BOOLEAN_ARRAY: 621 nelem = va_arg(ap, int); 622 ret = nvlist_add_boolean_array(payload, name, 623 va_arg(ap, boolean_t *), nelem); 624 break; 625 case DATA_TYPE_INT8: 626 ret = nvlist_add_int8(payload, name, 627 va_arg(ap, int)); 628 break; 629 case DATA_TYPE_INT8_ARRAY: 630 nelem = va_arg(ap, int); 631 ret = nvlist_add_int8_array(payload, name, 632 va_arg(ap, int8_t *), nelem); 633 break; 634 case DATA_TYPE_UINT8: 635 ret = nvlist_add_uint8(payload, name, 636 va_arg(ap, uint_t)); 637 break; 638 case DATA_TYPE_UINT8_ARRAY: 639 nelem = va_arg(ap, int); 640 ret = nvlist_add_uint8_array(payload, name, 641 va_arg(ap, uint8_t *), nelem); 642 break; 643 case DATA_TYPE_INT16: 644 ret = nvlist_add_int16(payload, name, 645 va_arg(ap, int)); 646 break; 647 case DATA_TYPE_INT16_ARRAY: 648 nelem = va_arg(ap, int); 649 ret = nvlist_add_int16_array(payload, name, 650 va_arg(ap, int16_t *), nelem); 651 break; 652 case DATA_TYPE_UINT16: 653 ret = nvlist_add_uint16(payload, name, 654 va_arg(ap, uint_t)); 655 break; 656 case DATA_TYPE_UINT16_ARRAY: 657 nelem = va_arg(ap, int); 658 ret = nvlist_add_uint16_array(payload, name, 659 va_arg(ap, uint16_t *), nelem); 660 break; 661 case DATA_TYPE_INT32: 662 ret = nvlist_add_int32(payload, name, 663 va_arg(ap, int32_t)); 664 break; 665 case DATA_TYPE_INT32_ARRAY: 666 nelem = va_arg(ap, int); 667 ret = nvlist_add_int32_array(payload, name, 668 va_arg(ap, int32_t *), nelem); 669 break; 670 case DATA_TYPE_UINT32: 671 ret = nvlist_add_uint32(payload, name, 672 va_arg(ap, uint32_t)); 673 break; 674 case DATA_TYPE_UINT32_ARRAY: 675 nelem = va_arg(ap, int); 676 ret = nvlist_add_uint32_array(payload, name, 677 va_arg(ap, uint32_t *), nelem); 678 break; 679 case DATA_TYPE_INT64: 680 ret = nvlist_add_int64(payload, name, 681 va_arg(ap, int64_t)); 682 break; 683 case DATA_TYPE_INT64_ARRAY: 684 nelem = va_arg(ap, int); 685 ret = nvlist_add_int64_array(payload, name, 686 va_arg(ap, int64_t *), nelem); 687 break; 688 case DATA_TYPE_UINT64: 689 ret = nvlist_add_uint64(payload, name, 690 va_arg(ap, uint64_t)); 691 break; 692 case DATA_TYPE_UINT64_ARRAY: 693 nelem = va_arg(ap, int); 694 ret = nvlist_add_uint64_array(payload, name, 695 va_arg(ap, uint64_t *), nelem); 696 break; 697 case DATA_TYPE_STRING: 698 ret = nvlist_add_string(payload, name, 699 va_arg(ap, char *)); 700 break; 701 case DATA_TYPE_STRING_ARRAY: 702 nelem = va_arg(ap, int); 703 ret = nvlist_add_string_array(payload, name, 704 va_arg(ap, const char **), nelem); 705 break; 706 case DATA_TYPE_NVLIST: 707 ret = nvlist_add_nvlist(payload, name, 708 va_arg(ap, nvlist_t *)); 709 break; 710 case DATA_TYPE_NVLIST_ARRAY: 711 nelem = va_arg(ap, int); 712 ret = nvlist_add_nvlist_array(payload, name, 713 va_arg(ap, const nvlist_t **), nelem); 714 break; 715 default: 716 ret = EINVAL; 717 } 718 719 name = va_arg(ap, char *); 720 } 721 return (ret); 722 } 723 724 void 725 fm_payload_set(nvlist_t *payload, ...) 726 { 727 int ret; 728 const char *name; 729 va_list ap; 730 731 va_start(ap, payload); 732 name = va_arg(ap, char *); 733 ret = i_fm_payload_set(payload, name, ap); 734 va_end(ap); 735 736 if (ret) 737 atomic_inc_64(&erpt_kstat_data.payload_set_failed.value.ui64); 738 } 739 740 /* 741 * Set-up and validate the members of an ereport event according to: 742 * 743 * Member name Type Value 744 * ==================================================== 745 * class string ereport 746 * version uint8_t 0 747 * ena uint64_t <ena> 748 * detector nvlist_t <detector> 749 * ereport-payload nvlist_t <var args> 750 * 751 * We don't actually add a 'version' member to the payload. Really, 752 * the version quoted to us by our caller is that of the category 1 753 * "ereport" event class (and we require FM_EREPORT_VERS0) but 754 * the payload version of the actual leaf class event under construction 755 * may be something else. Callers should supply a version in the varargs, 756 * or (better) we could take two version arguments - one for the 757 * ereport category 1 classification (expect FM_EREPORT_VERS0) and one 758 * for the leaf class. 759 */ 760 void 761 fm_ereport_set(nvlist_t *ereport, int version, const char *erpt_class, 762 uint64_t ena, const nvlist_t *detector, ...) 763 { 764 char ereport_class[FM_MAX_CLASS]; 765 const char *name; 766 va_list ap; 767 int ret; 768 769 if (version != FM_EREPORT_VERS0) { 770 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64); 771 return; 772 } 773 774 (void) snprintf(ereport_class, FM_MAX_CLASS, "%s.%s", 775 FM_EREPORT_CLASS, erpt_class); 776 if (nvlist_add_string(ereport, FM_CLASS, ereport_class) != 0) { 777 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64); 778 return; 779 } 780 781 if (nvlist_add_uint64(ereport, FM_EREPORT_ENA, ena)) { 782 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64); 783 } 784 785 if (nvlist_add_nvlist(ereport, FM_EREPORT_DETECTOR, 786 (nvlist_t *)detector) != 0) { 787 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64); 788 } 789 790 va_start(ap, detector); 791 name = va_arg(ap, const char *); 792 ret = i_fm_payload_set(ereport, name, ap); 793 va_end(ap); 794 795 if (ret) 796 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64); 797 } 798 799 /* 800 * Set-up and validate the members of an hc fmri according to; 801 * 802 * Member name Type Value 803 * =================================================== 804 * version uint8_t 0 805 * auth nvlist_t <auth> 806 * hc-name string <name> 807 * hc-id string <id> 808 * 809 * Note that auth and hc-id are optional members. 810 */ 811 812 #define HC_MAXPAIRS 20 813 #define HC_MAXNAMELEN 50 814 815 static int 816 fm_fmri_hc_set_common(nvlist_t *fmri, int version, const nvlist_t *auth) 817 { 818 if (version != FM_HC_SCHEME_VERSION) { 819 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 820 return (0); 821 } 822 823 if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0 || 824 nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC) != 0) { 825 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 826 return (0); 827 } 828 829 if (auth != NULL && nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY, 830 (nvlist_t *)auth) != 0) { 831 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 832 return (0); 833 } 834 835 return (1); 836 } 837 838 void 839 fm_fmri_hc_set(nvlist_t *fmri, int version, const nvlist_t *auth, 840 nvlist_t *snvl, int npairs, ...) 841 { 842 nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri); 843 nvlist_t *pairs[HC_MAXPAIRS]; 844 va_list ap; 845 int i; 846 847 if (!fm_fmri_hc_set_common(fmri, version, auth)) 848 return; 849 850 npairs = MIN(npairs, HC_MAXPAIRS); 851 852 va_start(ap, npairs); 853 for (i = 0; i < npairs; i++) { 854 const char *name = va_arg(ap, const char *); 855 uint32_t id = va_arg(ap, uint32_t); 856 char idstr[11]; 857 858 (void) snprintf(idstr, sizeof (idstr), "%u", id); 859 860 pairs[i] = fm_nvlist_create(nva); 861 if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 || 862 nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) { 863 atomic_inc_64( 864 &erpt_kstat_data.fmri_set_failed.value.ui64); 865 } 866 } 867 va_end(ap); 868 869 if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, 870 (const nvlist_t **)pairs, npairs) != 0) { 871 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 872 } 873 874 for (i = 0; i < npairs; i++) 875 fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN); 876 877 if (snvl != NULL) { 878 if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) { 879 atomic_inc_64( 880 &erpt_kstat_data.fmri_set_failed.value.ui64); 881 } 882 } 883 } 884 885 void 886 fm_fmri_hc_create(nvlist_t *fmri, int version, const nvlist_t *auth, 887 nvlist_t *snvl, nvlist_t *bboard, int npairs, ...) 888 { 889 nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri); 890 nvlist_t *pairs[HC_MAXPAIRS]; 891 nvlist_t **hcl; 892 uint_t n; 893 int i, j; 894 va_list ap; 895 const char *hcname, *hcid; 896 897 if (!fm_fmri_hc_set_common(fmri, version, auth)) 898 return; 899 900 /* 901 * copy the bboard nvpairs to the pairs array 902 */ 903 if (nvlist_lookup_nvlist_array(bboard, FM_FMRI_HC_LIST, &hcl, &n) 904 != 0) { 905 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 906 return; 907 } 908 909 for (i = 0; i < n; i++) { 910 if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_NAME, 911 &hcname) != 0) { 912 atomic_inc_64( 913 &erpt_kstat_data.fmri_set_failed.value.ui64); 914 return; 915 } 916 if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_ID, &hcid) != 0) { 917 atomic_inc_64( 918 &erpt_kstat_data.fmri_set_failed.value.ui64); 919 return; 920 } 921 922 pairs[i] = fm_nvlist_create(nva); 923 if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, hcname) != 0 || 924 nvlist_add_string(pairs[i], FM_FMRI_HC_ID, hcid) != 0) { 925 for (j = 0; j <= i; j++) { 926 if (pairs[j] != NULL) 927 fm_nvlist_destroy(pairs[j], 928 FM_NVA_RETAIN); 929 } 930 atomic_inc_64( 931 &erpt_kstat_data.fmri_set_failed.value.ui64); 932 return; 933 } 934 } 935 936 /* 937 * create the pairs from passed in pairs 938 */ 939 npairs = MIN(npairs, HC_MAXPAIRS); 940 941 va_start(ap, npairs); 942 for (i = n; i < npairs + n; i++) { 943 const char *name = va_arg(ap, const char *); 944 uint32_t id = va_arg(ap, uint32_t); 945 char idstr[11]; 946 (void) snprintf(idstr, sizeof (idstr), "%u", id); 947 pairs[i] = fm_nvlist_create(nva); 948 if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 || 949 nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) { 950 for (j = 0; j <= i; j++) { 951 if (pairs[j] != NULL) 952 fm_nvlist_destroy(pairs[j], 953 FM_NVA_RETAIN); 954 } 955 atomic_inc_64( 956 &erpt_kstat_data.fmri_set_failed.value.ui64); 957 va_end(ap); 958 return; 959 } 960 } 961 va_end(ap); 962 963 /* 964 * Create the fmri hc list 965 */ 966 if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, 967 (const nvlist_t **)pairs, npairs + n) != 0) { 968 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 969 return; 970 } 971 972 for (i = 0; i < npairs + n; i++) { 973 fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN); 974 } 975 976 if (snvl != NULL) { 977 if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) { 978 atomic_inc_64( 979 &erpt_kstat_data.fmri_set_failed.value.ui64); 980 return; 981 } 982 } 983 } 984 985 /* 986 * Set-up and validate the members of an dev fmri according to: 987 * 988 * Member name Type Value 989 * ==================================================== 990 * version uint8_t 0 991 * auth nvlist_t <auth> 992 * devpath string <devpath> 993 * [devid] string <devid> 994 * [target-port-l0id] string <target-port-lun0-id> 995 * 996 * Note that auth and devid are optional members. 997 */ 998 void 999 fm_fmri_dev_set(nvlist_t *fmri_dev, int version, const nvlist_t *auth, 1000 const char *devpath, const char *devid, const char *tpl0) 1001 { 1002 int err = 0; 1003 1004 if (version != DEV_SCHEME_VERSION0) { 1005 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1006 return; 1007 } 1008 1009 err |= nvlist_add_uint8(fmri_dev, FM_VERSION, version); 1010 err |= nvlist_add_string(fmri_dev, FM_FMRI_SCHEME, FM_FMRI_SCHEME_DEV); 1011 1012 if (auth != NULL) { 1013 err |= nvlist_add_nvlist(fmri_dev, FM_FMRI_AUTHORITY, 1014 (nvlist_t *)auth); 1015 } 1016 1017 err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_PATH, devpath); 1018 1019 if (devid != NULL) 1020 err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_ID, devid); 1021 1022 if (tpl0 != NULL) 1023 err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_TGTPTLUN0, tpl0); 1024 1025 if (err) 1026 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1027 1028 } 1029 1030 /* 1031 * Set-up and validate the members of an cpu fmri according to: 1032 * 1033 * Member name Type Value 1034 * ==================================================== 1035 * version uint8_t 0 1036 * auth nvlist_t <auth> 1037 * cpuid uint32_t <cpu_id> 1038 * cpumask uint8_t <cpu_mask> 1039 * serial uint64_t <serial_id> 1040 * 1041 * Note that auth, cpumask, serial are optional members. 1042 * 1043 */ 1044 void 1045 fm_fmri_cpu_set(nvlist_t *fmri_cpu, int version, const nvlist_t *auth, 1046 uint32_t cpu_id, uint8_t *cpu_maskp, const char *serial_idp) 1047 { 1048 uint64_t *failedp = &erpt_kstat_data.fmri_set_failed.value.ui64; 1049 1050 if (version < CPU_SCHEME_VERSION1) { 1051 atomic_inc_64(failedp); 1052 return; 1053 } 1054 1055 if (nvlist_add_uint8(fmri_cpu, FM_VERSION, version) != 0) { 1056 atomic_inc_64(failedp); 1057 return; 1058 } 1059 1060 if (nvlist_add_string(fmri_cpu, FM_FMRI_SCHEME, 1061 FM_FMRI_SCHEME_CPU) != 0) { 1062 atomic_inc_64(failedp); 1063 return; 1064 } 1065 1066 if (auth != NULL && nvlist_add_nvlist(fmri_cpu, FM_FMRI_AUTHORITY, 1067 (nvlist_t *)auth) != 0) 1068 atomic_inc_64(failedp); 1069 1070 if (nvlist_add_uint32(fmri_cpu, FM_FMRI_CPU_ID, cpu_id) != 0) 1071 atomic_inc_64(failedp); 1072 1073 if (cpu_maskp != NULL && nvlist_add_uint8(fmri_cpu, FM_FMRI_CPU_MASK, 1074 *cpu_maskp) != 0) 1075 atomic_inc_64(failedp); 1076 1077 if (serial_idp == NULL || nvlist_add_string(fmri_cpu, 1078 FM_FMRI_CPU_SERIAL_ID, (char *)serial_idp) != 0) 1079 atomic_inc_64(failedp); 1080 } 1081 1082 /* 1083 * Set-up and validate the members of a mem according to: 1084 * 1085 * Member name Type Value 1086 * ==================================================== 1087 * version uint8_t 0 1088 * auth nvlist_t <auth> [optional] 1089 * unum string <unum> 1090 * serial string <serial> [optional*] 1091 * offset uint64_t <offset> [optional] 1092 * 1093 * * serial is required if offset is present 1094 */ 1095 void 1096 fm_fmri_mem_set(nvlist_t *fmri, int version, const nvlist_t *auth, 1097 const char *unum, const char *serial, uint64_t offset) 1098 { 1099 if (version != MEM_SCHEME_VERSION0) { 1100 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1101 return; 1102 } 1103 1104 if (!serial && (offset != (uint64_t)-1)) { 1105 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1106 return; 1107 } 1108 1109 if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) { 1110 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1111 return; 1112 } 1113 1114 if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_MEM) != 0) { 1115 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1116 return; 1117 } 1118 1119 if (auth != NULL) { 1120 if (nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY, 1121 (nvlist_t *)auth) != 0) { 1122 atomic_inc_64( 1123 &erpt_kstat_data.fmri_set_failed.value.ui64); 1124 } 1125 } 1126 1127 if (nvlist_add_string(fmri, FM_FMRI_MEM_UNUM, unum) != 0) { 1128 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1129 } 1130 1131 if (serial != NULL) { 1132 if (nvlist_add_string_array(fmri, FM_FMRI_MEM_SERIAL_ID, 1133 (const char **)&serial, 1) != 0) { 1134 atomic_inc_64( 1135 &erpt_kstat_data.fmri_set_failed.value.ui64); 1136 } 1137 if (offset != (uint64_t)-1 && nvlist_add_uint64(fmri, 1138 FM_FMRI_MEM_OFFSET, offset) != 0) { 1139 atomic_inc_64( 1140 &erpt_kstat_data.fmri_set_failed.value.ui64); 1141 } 1142 } 1143 } 1144 1145 void 1146 fm_fmri_zfs_set(nvlist_t *fmri, int version, uint64_t pool_guid, 1147 uint64_t vdev_guid) 1148 { 1149 if (version != ZFS_SCHEME_VERSION0) { 1150 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1151 return; 1152 } 1153 1154 if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) { 1155 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1156 return; 1157 } 1158 1159 if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_ZFS) != 0) { 1160 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1161 return; 1162 } 1163 1164 if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_POOL, pool_guid) != 0) { 1165 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1166 } 1167 1168 if (vdev_guid != 0) { 1169 if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_VDEV, vdev_guid) != 0) { 1170 atomic_inc_64( 1171 &erpt_kstat_data.fmri_set_failed.value.ui64); 1172 } 1173 } 1174 } 1175 1176 uint64_t 1177 fm_ena_increment(uint64_t ena) 1178 { 1179 uint64_t new_ena; 1180 1181 switch (ENA_FORMAT(ena)) { 1182 case FM_ENA_FMT1: 1183 new_ena = ena + (1 << ENA_FMT1_GEN_SHFT); 1184 break; 1185 case FM_ENA_FMT2: 1186 new_ena = ena + (1 << ENA_FMT2_GEN_SHFT); 1187 break; 1188 default: 1189 new_ena = 0; 1190 } 1191 1192 return (new_ena); 1193 } 1194 1195 uint64_t 1196 fm_ena_generate_cpu(uint64_t timestamp, processorid_t cpuid, uchar_t format) 1197 { 1198 uint64_t ena = 0; 1199 1200 switch (format) { 1201 case FM_ENA_FMT1: 1202 if (timestamp) { 1203 ena = (uint64_t)((format & ENA_FORMAT_MASK) | 1204 ((cpuid << ENA_FMT1_CPUID_SHFT) & 1205 ENA_FMT1_CPUID_MASK) | 1206 ((timestamp << ENA_FMT1_TIME_SHFT) & 1207 ENA_FMT1_TIME_MASK)); 1208 } else { 1209 ena = (uint64_t)((format & ENA_FORMAT_MASK) | 1210 ((cpuid << ENA_FMT1_CPUID_SHFT) & 1211 ENA_FMT1_CPUID_MASK) | 1212 ((gethrtime() << ENA_FMT1_TIME_SHFT) & 1213 ENA_FMT1_TIME_MASK)); 1214 } 1215 break; 1216 case FM_ENA_FMT2: 1217 ena = (uint64_t)((format & ENA_FORMAT_MASK) | 1218 ((timestamp << ENA_FMT2_TIME_SHFT) & ENA_FMT2_TIME_MASK)); 1219 break; 1220 default: 1221 break; 1222 } 1223 1224 return (ena); 1225 } 1226 1227 uint64_t 1228 fm_ena_generate(uint64_t timestamp, uchar_t format) 1229 { 1230 uint64_t ena; 1231 1232 kpreempt_disable(); 1233 ena = fm_ena_generate_cpu(timestamp, getcpuid(), format); 1234 kpreempt_enable(); 1235 1236 return (ena); 1237 } 1238 1239 uint64_t 1240 fm_ena_generation_get(uint64_t ena) 1241 { 1242 uint64_t gen; 1243 1244 switch (ENA_FORMAT(ena)) { 1245 case FM_ENA_FMT1: 1246 gen = (ena & ENA_FMT1_GEN_MASK) >> ENA_FMT1_GEN_SHFT; 1247 break; 1248 case FM_ENA_FMT2: 1249 gen = (ena & ENA_FMT2_GEN_MASK) >> ENA_FMT2_GEN_SHFT; 1250 break; 1251 default: 1252 gen = 0; 1253 break; 1254 } 1255 1256 return (gen); 1257 } 1258 1259 uchar_t 1260 fm_ena_format_get(uint64_t ena) 1261 { 1262 1263 return (ENA_FORMAT(ena)); 1264 } 1265 1266 uint64_t 1267 fm_ena_id_get(uint64_t ena) 1268 { 1269 uint64_t id; 1270 1271 switch (ENA_FORMAT(ena)) { 1272 case FM_ENA_FMT1: 1273 id = (ena & ENA_FMT1_ID_MASK) >> ENA_FMT1_ID_SHFT; 1274 break; 1275 case FM_ENA_FMT2: 1276 id = (ena & ENA_FMT2_ID_MASK) >> ENA_FMT2_ID_SHFT; 1277 break; 1278 default: 1279 id = 0; 1280 } 1281 1282 return (id); 1283 } 1284 1285 uint64_t 1286 fm_ena_time_get(uint64_t ena) 1287 { 1288 uint64_t time; 1289 1290 switch (ENA_FORMAT(ena)) { 1291 case FM_ENA_FMT1: 1292 time = (ena & ENA_FMT1_TIME_MASK) >> ENA_FMT1_TIME_SHFT; 1293 break; 1294 case FM_ENA_FMT2: 1295 time = (ena & ENA_FMT2_TIME_MASK) >> ENA_FMT2_TIME_SHFT; 1296 break; 1297 default: 1298 time = 0; 1299 } 1300 1301 return (time); 1302 } 1303 1304 #ifdef _KERNEL 1305 /* 1306 * Helper function to increment ereport dropped count. Used by the event 1307 * rate limiting code to give feedback to the user about how many events were 1308 * rate limited by including them in the 'dropped' count. 1309 */ 1310 void 1311 fm_erpt_dropped_increment(void) 1312 { 1313 atomic_inc_64(&ratelimit_dropped); 1314 } 1315 1316 void 1317 fm_init(void) 1318 { 1319 zevent_len_cur = 0; 1320 zevent_flags = 0; 1321 1322 /* Initialize zevent allocation and generation kstats */ 1323 fm_ksp = kstat_create("zfs", 0, "fm", "misc", KSTAT_TYPE_NAMED, 1324 sizeof (struct erpt_kstat) / sizeof (kstat_named_t), 1325 KSTAT_FLAG_VIRTUAL); 1326 1327 if (fm_ksp != NULL) { 1328 fm_ksp->ks_data = &erpt_kstat_data; 1329 kstat_install(fm_ksp); 1330 } else { 1331 cmn_err(CE_NOTE, "failed to create fm/misc kstat\n"); 1332 } 1333 1334 mutex_init(&zevent_lock, NULL, MUTEX_DEFAULT, NULL); 1335 list_create(&zevent_list, sizeof (zevent_t), 1336 offsetof(zevent_t, ev_node)); 1337 cv_init(&zevent_cv, NULL, CV_DEFAULT, NULL); 1338 1339 zfs_ereport_init(); 1340 } 1341 1342 void 1343 fm_fini(void) 1344 { 1345 uint_t count; 1346 1347 zfs_ereport_fini(); 1348 1349 zfs_zevent_drain_all(&count); 1350 1351 mutex_enter(&zevent_lock); 1352 cv_broadcast(&zevent_cv); 1353 1354 zevent_flags |= ZEVENT_SHUTDOWN; 1355 while (zevent_waiters > 0) { 1356 mutex_exit(&zevent_lock); 1357 kpreempt(KPREEMPT_SYNC); 1358 mutex_enter(&zevent_lock); 1359 } 1360 mutex_exit(&zevent_lock); 1361 1362 cv_destroy(&zevent_cv); 1363 list_destroy(&zevent_list); 1364 mutex_destroy(&zevent_lock); 1365 1366 if (fm_ksp != NULL) { 1367 kstat_delete(fm_ksp); 1368 fm_ksp = NULL; 1369 } 1370 } 1371 #endif /* _KERNEL */ 1372 1373 ZFS_MODULE_PARAM(zfs_zevent, zfs_zevent_, len_max, UINT, ZMOD_RW, 1374 "Max event queue length"); 1375