1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * Fault Management Architecture (FMA) Resource and Protocol Support 27 * 28 * The routines contained herein provide services to support kernel subsystems 29 * in publishing fault management telemetry (see PSARC 2002/412 and 2003/089). 30 * 31 * Name-Value Pair Lists 32 * 33 * The embodiment of an FMA protocol element (event, fmri or authority) is a 34 * name-value pair list (nvlist_t). FMA-specific nvlist constructor and 35 * destructor functions, fm_nvlist_create() and fm_nvlist_destroy(), are used 36 * to create an nvpair list using custom allocators. Callers may choose to 37 * allocate either from the kernel memory allocator, or from a preallocated 38 * buffer, useful in constrained contexts like high-level interrupt routines. 39 * 40 * Protocol Event and FMRI Construction 41 * 42 * Convenience routines are provided to construct nvlist events according to 43 * the FMA Event Protocol and Naming Schema specification for ereports and 44 * FMRIs for the dev, cpu, hc, mem, legacy hc and de schemes. 45 * 46 * ENA Manipulation 47 * 48 * Routines to generate ENA formats 0, 1 and 2 are available as well as 49 * routines to increment formats 1 and 2. Individual fields within the 50 * ENA are extractable via fm_ena_time_get(), fm_ena_id_get(), 51 * fm_ena_format_get() and fm_ena_gen_get(). 52 */ 53 54 #include <sys/types.h> 55 #include <sys/time.h> 56 #include <sys/list.h> 57 #include <sys/nvpair.h> 58 #include <sys/cmn_err.h> 59 #include <sys/sysmacros.h> 60 #include <sys/sunddi.h> 61 #include <sys/systeminfo.h> 62 #include <sys/fm/util.h> 63 #include <sys/fm/protocol.h> 64 #include <sys/kstat.h> 65 #include <sys/zfs_context.h> 66 #ifdef _KERNEL 67 #include <sys/atomic.h> 68 #include <sys/condvar.h> 69 #include <sys/zfs_ioctl.h> 70 71 static uint_t zfs_zevent_len_max = 512; 72 73 static uint_t zevent_len_cur = 0; 74 static int zevent_waiters = 0; 75 static int zevent_flags = 0; 76 77 /* Num events rate limited since the last time zfs_zevent_next() was called */ 78 static uint64_t ratelimit_dropped = 0; 79 80 /* 81 * The EID (Event IDentifier) is used to uniquely tag a zevent when it is 82 * posted. The posted EIDs are monotonically increasing but not persistent. 83 * They will be reset to the initial value (1) each time the kernel module is 84 * loaded. 85 */ 86 static uint64_t zevent_eid = 0; 87 88 static kmutex_t zevent_lock; 89 static list_t zevent_list; 90 static kcondvar_t zevent_cv; 91 #endif /* _KERNEL */ 92 93 94 /* 95 * Common fault management kstats to record event generation failures 96 */ 97 98 struct erpt_kstat { 99 kstat_named_t erpt_dropped; /* num erpts dropped on post */ 100 kstat_named_t erpt_set_failed; /* num erpt set failures */ 101 kstat_named_t fmri_set_failed; /* num fmri set failures */ 102 kstat_named_t payload_set_failed; /* num payload set failures */ 103 kstat_named_t erpt_duplicates; /* num duplicate erpts */ 104 }; 105 106 static struct erpt_kstat erpt_kstat_data = { 107 { "erpt-dropped", KSTAT_DATA_UINT64 }, 108 { "erpt-set-failed", KSTAT_DATA_UINT64 }, 109 { "fmri-set-failed", KSTAT_DATA_UINT64 }, 110 { "payload-set-failed", KSTAT_DATA_UINT64 }, 111 { "erpt-duplicates", KSTAT_DATA_UINT64 } 112 }; 113 114 kstat_t *fm_ksp; 115 116 #ifdef _KERNEL 117 118 static zevent_t * 119 zfs_zevent_alloc(void) 120 { 121 zevent_t *ev; 122 123 ev = kmem_zalloc(sizeof (zevent_t), KM_SLEEP); 124 125 list_create(&ev->ev_ze_list, sizeof (zfs_zevent_t), 126 offsetof(zfs_zevent_t, ze_node)); 127 list_link_init(&ev->ev_node); 128 129 return (ev); 130 } 131 132 static void 133 zfs_zevent_free(zevent_t *ev) 134 { 135 /* Run provided cleanup callback */ 136 ev->ev_cb(ev->ev_nvl, ev->ev_detector); 137 138 list_destroy(&ev->ev_ze_list); 139 kmem_free(ev, sizeof (zevent_t)); 140 } 141 142 static void 143 zfs_zevent_drain(zevent_t *ev) 144 { 145 zfs_zevent_t *ze; 146 147 ASSERT(MUTEX_HELD(&zevent_lock)); 148 list_remove(&zevent_list, ev); 149 150 /* Remove references to this event in all private file data */ 151 while ((ze = list_remove_head(&ev->ev_ze_list)) != NULL) { 152 ze->ze_zevent = NULL; 153 ze->ze_dropped++; 154 } 155 156 zfs_zevent_free(ev); 157 } 158 159 void 160 zfs_zevent_drain_all(uint_t *count) 161 { 162 zevent_t *ev; 163 164 mutex_enter(&zevent_lock); 165 while ((ev = list_head(&zevent_list)) != NULL) 166 zfs_zevent_drain(ev); 167 168 *count = zevent_len_cur; 169 zevent_len_cur = 0; 170 mutex_exit(&zevent_lock); 171 } 172 173 /* 174 * New zevents are inserted at the head. If the maximum queue 175 * length is exceeded a zevent will be drained from the tail. 176 * As part of this any user space processes which currently have 177 * a reference to this zevent_t in their private data will have 178 * this reference set to NULL. 179 */ 180 static void 181 zfs_zevent_insert(zevent_t *ev) 182 { 183 ASSERT(MUTEX_HELD(&zevent_lock)); 184 list_insert_head(&zevent_list, ev); 185 186 if (zevent_len_cur >= zfs_zevent_len_max) 187 zfs_zevent_drain(list_tail(&zevent_list)); 188 else 189 zevent_len_cur++; 190 } 191 192 /* 193 * Post a zevent. The cb will be called when nvl and detector are no longer 194 * needed, i.e.: 195 * - An error happened and a zevent can't be posted. In this case, cb is called 196 * before zfs_zevent_post() returns. 197 * - The event is being drained and freed. 198 */ 199 int 200 zfs_zevent_post(nvlist_t *nvl, nvlist_t *detector, zevent_cb_t *cb) 201 { 202 inode_timespec_t tv; 203 int64_t tv_array[2]; 204 uint64_t eid; 205 size_t nvl_size = 0; 206 zevent_t *ev; 207 int error; 208 209 ASSERT(cb != NULL); 210 211 gethrestime(&tv); 212 tv_array[0] = tv.tv_sec; 213 tv_array[1] = tv.tv_nsec; 214 215 error = nvlist_add_int64_array(nvl, FM_EREPORT_TIME, tv_array, 2); 216 if (error) { 217 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64); 218 goto out; 219 } 220 221 eid = atomic_inc_64_nv(&zevent_eid); 222 error = nvlist_add_uint64(nvl, FM_EREPORT_EID, eid); 223 if (error) { 224 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64); 225 goto out; 226 } 227 228 error = nvlist_size(nvl, &nvl_size, NV_ENCODE_NATIVE); 229 if (error) { 230 atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64); 231 goto out; 232 } 233 234 if (nvl_size > ERPT_DATA_SZ || nvl_size == 0) { 235 atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64); 236 error = EOVERFLOW; 237 goto out; 238 } 239 240 ev = zfs_zevent_alloc(); 241 if (ev == NULL) { 242 atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64); 243 error = ENOMEM; 244 goto out; 245 } 246 247 ev->ev_nvl = nvl; 248 ev->ev_detector = detector; 249 ev->ev_cb = cb; 250 ev->ev_eid = eid; 251 252 mutex_enter(&zevent_lock); 253 zfs_zevent_insert(ev); 254 cv_broadcast(&zevent_cv); 255 mutex_exit(&zevent_lock); 256 257 out: 258 if (error) 259 cb(nvl, detector); 260 261 return (error); 262 } 263 264 void 265 zfs_zevent_track_duplicate(void) 266 { 267 atomic_inc_64(&erpt_kstat_data.erpt_duplicates.value.ui64); 268 } 269 270 static int 271 zfs_zevent_minor_to_state(minor_t minor, zfs_zevent_t **ze) 272 { 273 *ze = zfsdev_get_state(minor, ZST_ZEVENT); 274 if (*ze == NULL) 275 return (SET_ERROR(EBADF)); 276 277 return (0); 278 } 279 280 zfs_file_t * 281 zfs_zevent_fd_hold(int fd, minor_t *minorp, zfs_zevent_t **ze) 282 { 283 zfs_file_t *fp = zfs_file_get(fd); 284 if (fp == NULL) 285 return (NULL); 286 287 int error = zfsdev_getminor(fp, minorp); 288 if (error == 0) 289 error = zfs_zevent_minor_to_state(*minorp, ze); 290 291 if (error) { 292 zfs_zevent_fd_rele(fp); 293 fp = NULL; 294 } 295 296 return (fp); 297 } 298 299 void 300 zfs_zevent_fd_rele(zfs_file_t *fp) 301 { 302 zfs_file_put(fp); 303 } 304 305 /* 306 * Get the next zevent in the stream and place a copy in 'event'. This 307 * may fail with ENOMEM if the encoded nvlist size exceeds the passed 308 * 'event_size'. In this case the stream pointer is not advanced and 309 * and 'event_size' is set to the minimum required buffer size. 310 */ 311 int 312 zfs_zevent_next(zfs_zevent_t *ze, nvlist_t **event, uint64_t *event_size, 313 uint64_t *dropped) 314 { 315 zevent_t *ev; 316 size_t size; 317 int error = 0; 318 319 mutex_enter(&zevent_lock); 320 if (ze->ze_zevent == NULL) { 321 /* New stream start at the beginning/tail */ 322 ev = list_tail(&zevent_list); 323 if (ev == NULL) { 324 error = ENOENT; 325 goto out; 326 } 327 } else { 328 /* 329 * Existing stream continue with the next element and remove 330 * ourselves from the wait queue for the previous element 331 */ 332 ev = list_prev(&zevent_list, ze->ze_zevent); 333 if (ev == NULL) { 334 error = ENOENT; 335 goto out; 336 } 337 } 338 339 VERIFY(nvlist_size(ev->ev_nvl, &size, NV_ENCODE_NATIVE) == 0); 340 if (size > *event_size) { 341 *event_size = size; 342 error = ENOMEM; 343 goto out; 344 } 345 346 if (ze->ze_zevent) 347 list_remove(&ze->ze_zevent->ev_ze_list, ze); 348 349 ze->ze_zevent = ev; 350 list_insert_head(&ev->ev_ze_list, ze); 351 (void) nvlist_dup(ev->ev_nvl, event, KM_SLEEP); 352 *dropped = ze->ze_dropped; 353 354 #ifdef _KERNEL 355 /* Include events dropped due to rate limiting */ 356 *dropped += atomic_swap_64(&ratelimit_dropped, 0); 357 #endif 358 ze->ze_dropped = 0; 359 out: 360 mutex_exit(&zevent_lock); 361 362 return (error); 363 } 364 365 /* 366 * Wait in an interruptible state for any new events. 367 */ 368 int 369 zfs_zevent_wait(zfs_zevent_t *ze) 370 { 371 int error = EAGAIN; 372 373 mutex_enter(&zevent_lock); 374 zevent_waiters++; 375 376 while (error == EAGAIN) { 377 if (zevent_flags & ZEVENT_SHUTDOWN) { 378 error = SET_ERROR(ESHUTDOWN); 379 break; 380 } 381 382 if (cv_wait_sig(&zevent_cv, &zevent_lock) == 0) { 383 error = SET_ERROR(EINTR); 384 break; 385 } else if (!list_is_empty(&zevent_list)) { 386 error = 0; 387 continue; 388 } else { 389 error = EAGAIN; 390 } 391 } 392 393 zevent_waiters--; 394 mutex_exit(&zevent_lock); 395 396 return (error); 397 } 398 399 /* 400 * The caller may seek to a specific EID by passing that EID. If the EID 401 * is still available in the posted list of events the cursor is positioned 402 * there. Otherwise ENOENT is returned and the cursor is not moved. 403 * 404 * There are two reserved EIDs which may be passed and will never fail. 405 * ZEVENT_SEEK_START positions the cursor at the start of the list, and 406 * ZEVENT_SEEK_END positions the cursor at the end of the list. 407 */ 408 int 409 zfs_zevent_seek(zfs_zevent_t *ze, uint64_t eid) 410 { 411 zevent_t *ev; 412 int error = 0; 413 414 mutex_enter(&zevent_lock); 415 416 if (eid == ZEVENT_SEEK_START) { 417 if (ze->ze_zevent) 418 list_remove(&ze->ze_zevent->ev_ze_list, ze); 419 420 ze->ze_zevent = NULL; 421 goto out; 422 } 423 424 if (eid == ZEVENT_SEEK_END) { 425 if (ze->ze_zevent) 426 list_remove(&ze->ze_zevent->ev_ze_list, ze); 427 428 ev = list_head(&zevent_list); 429 if (ev) { 430 ze->ze_zevent = ev; 431 list_insert_head(&ev->ev_ze_list, ze); 432 } else { 433 ze->ze_zevent = NULL; 434 } 435 436 goto out; 437 } 438 439 for (ev = list_tail(&zevent_list); ev != NULL; 440 ev = list_prev(&zevent_list, ev)) { 441 if (ev->ev_eid == eid) { 442 if (ze->ze_zevent) 443 list_remove(&ze->ze_zevent->ev_ze_list, ze); 444 445 ze->ze_zevent = ev; 446 list_insert_head(&ev->ev_ze_list, ze); 447 break; 448 } 449 } 450 451 if (ev == NULL) 452 error = ENOENT; 453 454 out: 455 mutex_exit(&zevent_lock); 456 457 return (error); 458 } 459 460 void 461 zfs_zevent_init(zfs_zevent_t **zep) 462 { 463 zfs_zevent_t *ze; 464 465 ze = *zep = kmem_zalloc(sizeof (zfs_zevent_t), KM_SLEEP); 466 list_link_init(&ze->ze_node); 467 } 468 469 void 470 zfs_zevent_destroy(zfs_zevent_t *ze) 471 { 472 mutex_enter(&zevent_lock); 473 if (ze->ze_zevent) 474 list_remove(&ze->ze_zevent->ev_ze_list, ze); 475 mutex_exit(&zevent_lock); 476 477 kmem_free(ze, sizeof (zfs_zevent_t)); 478 } 479 #endif /* _KERNEL */ 480 481 /* 482 * Wrappers for FM nvlist allocators 483 */ 484 static void * 485 i_fm_alloc(nv_alloc_t *nva, size_t size) 486 { 487 (void) nva; 488 return (kmem_alloc(size, KM_SLEEP)); 489 } 490 491 static void 492 i_fm_free(nv_alloc_t *nva, void *buf, size_t size) 493 { 494 (void) nva; 495 kmem_free(buf, size); 496 } 497 498 static const nv_alloc_ops_t fm_mem_alloc_ops = { 499 .nv_ao_init = NULL, 500 .nv_ao_fini = NULL, 501 .nv_ao_alloc = i_fm_alloc, 502 .nv_ao_free = i_fm_free, 503 .nv_ao_reset = NULL 504 }; 505 506 /* 507 * Create and initialize a new nv_alloc_t for a fixed buffer, buf. A pointer 508 * to the newly allocated nv_alloc_t structure is returned upon success or NULL 509 * is returned to indicate that the nv_alloc structure could not be created. 510 */ 511 nv_alloc_t * 512 fm_nva_xcreate(char *buf, size_t bufsz) 513 { 514 nv_alloc_t *nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP); 515 516 if (bufsz == 0 || nv_alloc_init(nvhdl, nv_fixed_ops, buf, bufsz) != 0) { 517 kmem_free(nvhdl, sizeof (nv_alloc_t)); 518 return (NULL); 519 } 520 521 return (nvhdl); 522 } 523 524 /* 525 * Destroy a previously allocated nv_alloc structure. The fixed buffer 526 * associated with nva must be freed by the caller. 527 */ 528 void 529 fm_nva_xdestroy(nv_alloc_t *nva) 530 { 531 nv_alloc_fini(nva); 532 kmem_free(nva, sizeof (nv_alloc_t)); 533 } 534 535 /* 536 * Create a new nv list. A pointer to a new nv list structure is returned 537 * upon success or NULL is returned to indicate that the structure could 538 * not be created. The newly created nv list is created and managed by the 539 * operations installed in nva. If nva is NULL, the default FMA nva 540 * operations are installed and used. 541 * 542 * When called from the kernel and nva == NULL, this function must be called 543 * from passive kernel context with no locks held that can prevent a 544 * sleeping memory allocation from occurring. Otherwise, this function may 545 * be called from other kernel contexts as long a valid nva created via 546 * fm_nva_create() is supplied. 547 */ 548 nvlist_t * 549 fm_nvlist_create(nv_alloc_t *nva) 550 { 551 int hdl_alloced = 0; 552 nvlist_t *nvl; 553 nv_alloc_t *nvhdl; 554 555 if (nva == NULL) { 556 nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP); 557 558 if (nv_alloc_init(nvhdl, &fm_mem_alloc_ops, NULL, 0) != 0) { 559 kmem_free(nvhdl, sizeof (nv_alloc_t)); 560 return (NULL); 561 } 562 hdl_alloced = 1; 563 } else { 564 nvhdl = nva; 565 } 566 567 if (nvlist_xalloc(&nvl, NV_UNIQUE_NAME, nvhdl) != 0) { 568 if (hdl_alloced) { 569 nv_alloc_fini(nvhdl); 570 kmem_free(nvhdl, sizeof (nv_alloc_t)); 571 } 572 return (NULL); 573 } 574 575 return (nvl); 576 } 577 578 /* 579 * Destroy a previously allocated nvlist structure. flag indicates whether 580 * or not the associated nva structure should be freed (FM_NVA_FREE) or 581 * retained (FM_NVA_RETAIN). Retaining the nv alloc structure allows 582 * it to be re-used for future nvlist creation operations. 583 */ 584 void 585 fm_nvlist_destroy(nvlist_t *nvl, int flag) 586 { 587 nv_alloc_t *nva = nvlist_lookup_nv_alloc(nvl); 588 589 nvlist_free(nvl); 590 591 if (nva != NULL) { 592 if (flag == FM_NVA_FREE) 593 fm_nva_xdestroy(nva); 594 } 595 } 596 597 int 598 i_fm_payload_set(nvlist_t *payload, const char *name, va_list ap) 599 { 600 int nelem, ret = 0; 601 data_type_t type; 602 603 while (ret == 0 && name != NULL) { 604 type = va_arg(ap, data_type_t); 605 switch (type) { 606 case DATA_TYPE_BYTE: 607 ret = nvlist_add_byte(payload, name, 608 va_arg(ap, uint_t)); 609 break; 610 case DATA_TYPE_BYTE_ARRAY: 611 nelem = va_arg(ap, int); 612 ret = nvlist_add_byte_array(payload, name, 613 va_arg(ap, uchar_t *), nelem); 614 break; 615 case DATA_TYPE_BOOLEAN_VALUE: 616 ret = nvlist_add_boolean_value(payload, name, 617 va_arg(ap, boolean_t)); 618 break; 619 case DATA_TYPE_BOOLEAN_ARRAY: 620 nelem = va_arg(ap, int); 621 ret = nvlist_add_boolean_array(payload, name, 622 va_arg(ap, boolean_t *), nelem); 623 break; 624 case DATA_TYPE_INT8: 625 ret = nvlist_add_int8(payload, name, 626 va_arg(ap, int)); 627 break; 628 case DATA_TYPE_INT8_ARRAY: 629 nelem = va_arg(ap, int); 630 ret = nvlist_add_int8_array(payload, name, 631 va_arg(ap, int8_t *), nelem); 632 break; 633 case DATA_TYPE_UINT8: 634 ret = nvlist_add_uint8(payload, name, 635 va_arg(ap, uint_t)); 636 break; 637 case DATA_TYPE_UINT8_ARRAY: 638 nelem = va_arg(ap, int); 639 ret = nvlist_add_uint8_array(payload, name, 640 va_arg(ap, uint8_t *), nelem); 641 break; 642 case DATA_TYPE_INT16: 643 ret = nvlist_add_int16(payload, name, 644 va_arg(ap, int)); 645 break; 646 case DATA_TYPE_INT16_ARRAY: 647 nelem = va_arg(ap, int); 648 ret = nvlist_add_int16_array(payload, name, 649 va_arg(ap, int16_t *), nelem); 650 break; 651 case DATA_TYPE_UINT16: 652 ret = nvlist_add_uint16(payload, name, 653 va_arg(ap, uint_t)); 654 break; 655 case DATA_TYPE_UINT16_ARRAY: 656 nelem = va_arg(ap, int); 657 ret = nvlist_add_uint16_array(payload, name, 658 va_arg(ap, uint16_t *), nelem); 659 break; 660 case DATA_TYPE_INT32: 661 ret = nvlist_add_int32(payload, name, 662 va_arg(ap, int32_t)); 663 break; 664 case DATA_TYPE_INT32_ARRAY: 665 nelem = va_arg(ap, int); 666 ret = nvlist_add_int32_array(payload, name, 667 va_arg(ap, int32_t *), nelem); 668 break; 669 case DATA_TYPE_UINT32: 670 ret = nvlist_add_uint32(payload, name, 671 va_arg(ap, uint32_t)); 672 break; 673 case DATA_TYPE_UINT32_ARRAY: 674 nelem = va_arg(ap, int); 675 ret = nvlist_add_uint32_array(payload, name, 676 va_arg(ap, uint32_t *), nelem); 677 break; 678 case DATA_TYPE_INT64: 679 ret = nvlist_add_int64(payload, name, 680 va_arg(ap, int64_t)); 681 break; 682 case DATA_TYPE_INT64_ARRAY: 683 nelem = va_arg(ap, int); 684 ret = nvlist_add_int64_array(payload, name, 685 va_arg(ap, int64_t *), nelem); 686 break; 687 case DATA_TYPE_UINT64: 688 ret = nvlist_add_uint64(payload, name, 689 va_arg(ap, uint64_t)); 690 break; 691 case DATA_TYPE_UINT64_ARRAY: 692 nelem = va_arg(ap, int); 693 ret = nvlist_add_uint64_array(payload, name, 694 va_arg(ap, uint64_t *), nelem); 695 break; 696 case DATA_TYPE_STRING: 697 ret = nvlist_add_string(payload, name, 698 va_arg(ap, char *)); 699 break; 700 case DATA_TYPE_STRING_ARRAY: 701 nelem = va_arg(ap, int); 702 ret = nvlist_add_string_array(payload, name, 703 va_arg(ap, const char **), nelem); 704 break; 705 case DATA_TYPE_NVLIST: 706 ret = nvlist_add_nvlist(payload, name, 707 va_arg(ap, nvlist_t *)); 708 break; 709 case DATA_TYPE_NVLIST_ARRAY: 710 nelem = va_arg(ap, int); 711 ret = nvlist_add_nvlist_array(payload, name, 712 va_arg(ap, const nvlist_t **), nelem); 713 break; 714 default: 715 ret = EINVAL; 716 } 717 718 name = va_arg(ap, char *); 719 } 720 return (ret); 721 } 722 723 void 724 fm_payload_set(nvlist_t *payload, ...) 725 { 726 int ret; 727 const char *name; 728 va_list ap; 729 730 va_start(ap, payload); 731 name = va_arg(ap, char *); 732 ret = i_fm_payload_set(payload, name, ap); 733 va_end(ap); 734 735 if (ret) 736 atomic_inc_64(&erpt_kstat_data.payload_set_failed.value.ui64); 737 } 738 739 /* 740 * Set-up and validate the members of an ereport event according to: 741 * 742 * Member name Type Value 743 * ==================================================== 744 * class string ereport 745 * version uint8_t 0 746 * ena uint64_t <ena> 747 * detector nvlist_t <detector> 748 * ereport-payload nvlist_t <var args> 749 * 750 * We don't actually add a 'version' member to the payload. Really, 751 * the version quoted to us by our caller is that of the category 1 752 * "ereport" event class (and we require FM_EREPORT_VERS0) but 753 * the payload version of the actual leaf class event under construction 754 * may be something else. Callers should supply a version in the varargs, 755 * or (better) we could take two version arguments - one for the 756 * ereport category 1 classification (expect FM_EREPORT_VERS0) and one 757 * for the leaf class. 758 */ 759 void 760 fm_ereport_set(nvlist_t *ereport, int version, const char *erpt_class, 761 uint64_t ena, const nvlist_t *detector, ...) 762 { 763 char ereport_class[FM_MAX_CLASS]; 764 const char *name; 765 va_list ap; 766 int ret; 767 768 if (version != FM_EREPORT_VERS0) { 769 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64); 770 return; 771 } 772 773 (void) snprintf(ereport_class, FM_MAX_CLASS, "%s.%s", 774 FM_EREPORT_CLASS, erpt_class); 775 if (nvlist_add_string(ereport, FM_CLASS, ereport_class) != 0) { 776 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64); 777 return; 778 } 779 780 if (nvlist_add_uint64(ereport, FM_EREPORT_ENA, ena)) { 781 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64); 782 } 783 784 if (nvlist_add_nvlist(ereport, FM_EREPORT_DETECTOR, 785 (nvlist_t *)detector) != 0) { 786 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64); 787 } 788 789 va_start(ap, detector); 790 name = va_arg(ap, const char *); 791 ret = i_fm_payload_set(ereport, name, ap); 792 va_end(ap); 793 794 if (ret) 795 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64); 796 } 797 798 /* 799 * Set-up and validate the members of an hc fmri according to; 800 * 801 * Member name Type Value 802 * =================================================== 803 * version uint8_t 0 804 * auth nvlist_t <auth> 805 * hc-name string <name> 806 * hc-id string <id> 807 * 808 * Note that auth and hc-id are optional members. 809 */ 810 811 #define HC_MAXPAIRS 20 812 #define HC_MAXNAMELEN 50 813 814 static int 815 fm_fmri_hc_set_common(nvlist_t *fmri, int version, const nvlist_t *auth) 816 { 817 if (version != FM_HC_SCHEME_VERSION) { 818 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 819 return (0); 820 } 821 822 if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0 || 823 nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC) != 0) { 824 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 825 return (0); 826 } 827 828 if (auth != NULL && nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY, 829 (nvlist_t *)auth) != 0) { 830 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 831 return (0); 832 } 833 834 return (1); 835 } 836 837 void 838 fm_fmri_hc_set(nvlist_t *fmri, int version, const nvlist_t *auth, 839 nvlist_t *snvl, int npairs, ...) 840 { 841 nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri); 842 nvlist_t *pairs[HC_MAXPAIRS]; 843 va_list ap; 844 int i; 845 846 if (!fm_fmri_hc_set_common(fmri, version, auth)) 847 return; 848 849 npairs = MIN(npairs, HC_MAXPAIRS); 850 851 va_start(ap, npairs); 852 for (i = 0; i < npairs; i++) { 853 const char *name = va_arg(ap, const char *); 854 uint32_t id = va_arg(ap, uint32_t); 855 char idstr[11]; 856 857 (void) snprintf(idstr, sizeof (idstr), "%u", id); 858 859 pairs[i] = fm_nvlist_create(nva); 860 if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 || 861 nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) { 862 atomic_inc_64( 863 &erpt_kstat_data.fmri_set_failed.value.ui64); 864 } 865 } 866 va_end(ap); 867 868 if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, 869 (const nvlist_t **)pairs, npairs) != 0) { 870 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 871 } 872 873 for (i = 0; i < npairs; i++) 874 fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN); 875 876 if (snvl != NULL) { 877 if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) { 878 atomic_inc_64( 879 &erpt_kstat_data.fmri_set_failed.value.ui64); 880 } 881 } 882 } 883 884 void 885 fm_fmri_hc_create(nvlist_t *fmri, int version, const nvlist_t *auth, 886 nvlist_t *snvl, nvlist_t *bboard, int npairs, ...) 887 { 888 nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri); 889 nvlist_t *pairs[HC_MAXPAIRS]; 890 nvlist_t **hcl; 891 uint_t n; 892 int i, j; 893 va_list ap; 894 const char *hcname, *hcid; 895 896 if (!fm_fmri_hc_set_common(fmri, version, auth)) 897 return; 898 899 /* 900 * copy the bboard nvpairs to the pairs array 901 */ 902 if (nvlist_lookup_nvlist_array(bboard, FM_FMRI_HC_LIST, &hcl, &n) 903 != 0) { 904 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 905 return; 906 } 907 908 for (i = 0; i < n; i++) { 909 if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_NAME, 910 &hcname) != 0) { 911 atomic_inc_64( 912 &erpt_kstat_data.fmri_set_failed.value.ui64); 913 return; 914 } 915 if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_ID, &hcid) != 0) { 916 atomic_inc_64( 917 &erpt_kstat_data.fmri_set_failed.value.ui64); 918 return; 919 } 920 921 pairs[i] = fm_nvlist_create(nva); 922 if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, hcname) != 0 || 923 nvlist_add_string(pairs[i], FM_FMRI_HC_ID, hcid) != 0) { 924 for (j = 0; j <= i; j++) { 925 if (pairs[j] != NULL) 926 fm_nvlist_destroy(pairs[j], 927 FM_NVA_RETAIN); 928 } 929 atomic_inc_64( 930 &erpt_kstat_data.fmri_set_failed.value.ui64); 931 return; 932 } 933 } 934 935 /* 936 * create the pairs from passed in pairs 937 */ 938 npairs = MIN(npairs, HC_MAXPAIRS); 939 940 va_start(ap, npairs); 941 for (i = n; i < npairs + n; i++) { 942 const char *name = va_arg(ap, const char *); 943 uint32_t id = va_arg(ap, uint32_t); 944 char idstr[11]; 945 (void) snprintf(idstr, sizeof (idstr), "%u", id); 946 pairs[i] = fm_nvlist_create(nva); 947 if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 || 948 nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) { 949 for (j = 0; j <= i; j++) { 950 if (pairs[j] != NULL) 951 fm_nvlist_destroy(pairs[j], 952 FM_NVA_RETAIN); 953 } 954 atomic_inc_64( 955 &erpt_kstat_data.fmri_set_failed.value.ui64); 956 va_end(ap); 957 return; 958 } 959 } 960 va_end(ap); 961 962 /* 963 * Create the fmri hc list 964 */ 965 if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, 966 (const nvlist_t **)pairs, npairs + n) != 0) { 967 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 968 return; 969 } 970 971 for (i = 0; i < npairs + n; i++) { 972 fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN); 973 } 974 975 if (snvl != NULL) { 976 if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) { 977 atomic_inc_64( 978 &erpt_kstat_data.fmri_set_failed.value.ui64); 979 return; 980 } 981 } 982 } 983 984 /* 985 * Set-up and validate the members of an dev fmri according to: 986 * 987 * Member name Type Value 988 * ==================================================== 989 * version uint8_t 0 990 * auth nvlist_t <auth> 991 * devpath string <devpath> 992 * [devid] string <devid> 993 * [target-port-l0id] string <target-port-lun0-id> 994 * 995 * Note that auth and devid are optional members. 996 */ 997 void 998 fm_fmri_dev_set(nvlist_t *fmri_dev, int version, const nvlist_t *auth, 999 const char *devpath, const char *devid, const char *tpl0) 1000 { 1001 int err = 0; 1002 1003 if (version != DEV_SCHEME_VERSION0) { 1004 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1005 return; 1006 } 1007 1008 err |= nvlist_add_uint8(fmri_dev, FM_VERSION, version); 1009 err |= nvlist_add_string(fmri_dev, FM_FMRI_SCHEME, FM_FMRI_SCHEME_DEV); 1010 1011 if (auth != NULL) { 1012 err |= nvlist_add_nvlist(fmri_dev, FM_FMRI_AUTHORITY, 1013 (nvlist_t *)auth); 1014 } 1015 1016 err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_PATH, devpath); 1017 1018 if (devid != NULL) 1019 err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_ID, devid); 1020 1021 if (tpl0 != NULL) 1022 err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_TGTPTLUN0, tpl0); 1023 1024 if (err) 1025 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1026 1027 } 1028 1029 /* 1030 * Set-up and validate the members of an cpu fmri according to: 1031 * 1032 * Member name Type Value 1033 * ==================================================== 1034 * version uint8_t 0 1035 * auth nvlist_t <auth> 1036 * cpuid uint32_t <cpu_id> 1037 * cpumask uint8_t <cpu_mask> 1038 * serial uint64_t <serial_id> 1039 * 1040 * Note that auth, cpumask, serial are optional members. 1041 * 1042 */ 1043 void 1044 fm_fmri_cpu_set(nvlist_t *fmri_cpu, int version, const nvlist_t *auth, 1045 uint32_t cpu_id, uint8_t *cpu_maskp, const char *serial_idp) 1046 { 1047 uint64_t *failedp = &erpt_kstat_data.fmri_set_failed.value.ui64; 1048 1049 if (version < CPU_SCHEME_VERSION1) { 1050 atomic_inc_64(failedp); 1051 return; 1052 } 1053 1054 if (nvlist_add_uint8(fmri_cpu, FM_VERSION, version) != 0) { 1055 atomic_inc_64(failedp); 1056 return; 1057 } 1058 1059 if (nvlist_add_string(fmri_cpu, FM_FMRI_SCHEME, 1060 FM_FMRI_SCHEME_CPU) != 0) { 1061 atomic_inc_64(failedp); 1062 return; 1063 } 1064 1065 if (auth != NULL && nvlist_add_nvlist(fmri_cpu, FM_FMRI_AUTHORITY, 1066 (nvlist_t *)auth) != 0) 1067 atomic_inc_64(failedp); 1068 1069 if (nvlist_add_uint32(fmri_cpu, FM_FMRI_CPU_ID, cpu_id) != 0) 1070 atomic_inc_64(failedp); 1071 1072 if (cpu_maskp != NULL && nvlist_add_uint8(fmri_cpu, FM_FMRI_CPU_MASK, 1073 *cpu_maskp) != 0) 1074 atomic_inc_64(failedp); 1075 1076 if (serial_idp == NULL || nvlist_add_string(fmri_cpu, 1077 FM_FMRI_CPU_SERIAL_ID, (char *)serial_idp) != 0) 1078 atomic_inc_64(failedp); 1079 } 1080 1081 /* 1082 * Set-up and validate the members of a mem according to: 1083 * 1084 * Member name Type Value 1085 * ==================================================== 1086 * version uint8_t 0 1087 * auth nvlist_t <auth> [optional] 1088 * unum string <unum> 1089 * serial string <serial> [optional*] 1090 * offset uint64_t <offset> [optional] 1091 * 1092 * * serial is required if offset is present 1093 */ 1094 void 1095 fm_fmri_mem_set(nvlist_t *fmri, int version, const nvlist_t *auth, 1096 const char *unum, const char *serial, uint64_t offset) 1097 { 1098 if (version != MEM_SCHEME_VERSION0) { 1099 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1100 return; 1101 } 1102 1103 if (!serial && (offset != (uint64_t)-1)) { 1104 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1105 return; 1106 } 1107 1108 if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) { 1109 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1110 return; 1111 } 1112 1113 if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_MEM) != 0) { 1114 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1115 return; 1116 } 1117 1118 if (auth != NULL) { 1119 if (nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY, 1120 (nvlist_t *)auth) != 0) { 1121 atomic_inc_64( 1122 &erpt_kstat_data.fmri_set_failed.value.ui64); 1123 } 1124 } 1125 1126 if (nvlist_add_string(fmri, FM_FMRI_MEM_UNUM, unum) != 0) { 1127 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1128 } 1129 1130 if (serial != NULL) { 1131 if (nvlist_add_string_array(fmri, FM_FMRI_MEM_SERIAL_ID, 1132 (const char **)&serial, 1) != 0) { 1133 atomic_inc_64( 1134 &erpt_kstat_data.fmri_set_failed.value.ui64); 1135 } 1136 if (offset != (uint64_t)-1 && nvlist_add_uint64(fmri, 1137 FM_FMRI_MEM_OFFSET, offset) != 0) { 1138 atomic_inc_64( 1139 &erpt_kstat_data.fmri_set_failed.value.ui64); 1140 } 1141 } 1142 } 1143 1144 void 1145 fm_fmri_zfs_set(nvlist_t *fmri, int version, uint64_t pool_guid, 1146 uint64_t vdev_guid) 1147 { 1148 if (version != ZFS_SCHEME_VERSION0) { 1149 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1150 return; 1151 } 1152 1153 if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) { 1154 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1155 return; 1156 } 1157 1158 if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_ZFS) != 0) { 1159 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1160 return; 1161 } 1162 1163 if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_POOL, pool_guid) != 0) { 1164 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1165 } 1166 1167 if (vdev_guid != 0) { 1168 if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_VDEV, vdev_guid) != 0) { 1169 atomic_inc_64( 1170 &erpt_kstat_data.fmri_set_failed.value.ui64); 1171 } 1172 } 1173 } 1174 1175 uint64_t 1176 fm_ena_increment(uint64_t ena) 1177 { 1178 uint64_t new_ena; 1179 1180 switch (ENA_FORMAT(ena)) { 1181 case FM_ENA_FMT1: 1182 new_ena = ena + (1 << ENA_FMT1_GEN_SHFT); 1183 break; 1184 case FM_ENA_FMT2: 1185 new_ena = ena + (1 << ENA_FMT2_GEN_SHFT); 1186 break; 1187 default: 1188 new_ena = 0; 1189 } 1190 1191 return (new_ena); 1192 } 1193 1194 uint64_t 1195 fm_ena_generate_cpu(uint64_t timestamp, processorid_t cpuid, uchar_t format) 1196 { 1197 uint64_t ena = 0; 1198 1199 switch (format) { 1200 case FM_ENA_FMT1: 1201 if (timestamp) { 1202 ena = (uint64_t)((format & ENA_FORMAT_MASK) | 1203 ((cpuid << ENA_FMT1_CPUID_SHFT) & 1204 ENA_FMT1_CPUID_MASK) | 1205 ((timestamp << ENA_FMT1_TIME_SHFT) & 1206 ENA_FMT1_TIME_MASK)); 1207 } else { 1208 ena = (uint64_t)((format & ENA_FORMAT_MASK) | 1209 ((cpuid << ENA_FMT1_CPUID_SHFT) & 1210 ENA_FMT1_CPUID_MASK) | 1211 ((gethrtime() << ENA_FMT1_TIME_SHFT) & 1212 ENA_FMT1_TIME_MASK)); 1213 } 1214 break; 1215 case FM_ENA_FMT2: 1216 ena = (uint64_t)((format & ENA_FORMAT_MASK) | 1217 ((timestamp << ENA_FMT2_TIME_SHFT) & ENA_FMT2_TIME_MASK)); 1218 break; 1219 default: 1220 break; 1221 } 1222 1223 return (ena); 1224 } 1225 1226 uint64_t 1227 fm_ena_generate(uint64_t timestamp, uchar_t format) 1228 { 1229 uint64_t ena; 1230 1231 kpreempt_disable(); 1232 ena = fm_ena_generate_cpu(timestamp, getcpuid(), format); 1233 kpreempt_enable(); 1234 1235 return (ena); 1236 } 1237 1238 uint64_t 1239 fm_ena_generation_get(uint64_t ena) 1240 { 1241 uint64_t gen; 1242 1243 switch (ENA_FORMAT(ena)) { 1244 case FM_ENA_FMT1: 1245 gen = (ena & ENA_FMT1_GEN_MASK) >> ENA_FMT1_GEN_SHFT; 1246 break; 1247 case FM_ENA_FMT2: 1248 gen = (ena & ENA_FMT2_GEN_MASK) >> ENA_FMT2_GEN_SHFT; 1249 break; 1250 default: 1251 gen = 0; 1252 break; 1253 } 1254 1255 return (gen); 1256 } 1257 1258 uchar_t 1259 fm_ena_format_get(uint64_t ena) 1260 { 1261 1262 return (ENA_FORMAT(ena)); 1263 } 1264 1265 uint64_t 1266 fm_ena_id_get(uint64_t ena) 1267 { 1268 uint64_t id; 1269 1270 switch (ENA_FORMAT(ena)) { 1271 case FM_ENA_FMT1: 1272 id = (ena & ENA_FMT1_ID_MASK) >> ENA_FMT1_ID_SHFT; 1273 break; 1274 case FM_ENA_FMT2: 1275 id = (ena & ENA_FMT2_ID_MASK) >> ENA_FMT2_ID_SHFT; 1276 break; 1277 default: 1278 id = 0; 1279 } 1280 1281 return (id); 1282 } 1283 1284 uint64_t 1285 fm_ena_time_get(uint64_t ena) 1286 { 1287 uint64_t time; 1288 1289 switch (ENA_FORMAT(ena)) { 1290 case FM_ENA_FMT1: 1291 time = (ena & ENA_FMT1_TIME_MASK) >> ENA_FMT1_TIME_SHFT; 1292 break; 1293 case FM_ENA_FMT2: 1294 time = (ena & ENA_FMT2_TIME_MASK) >> ENA_FMT2_TIME_SHFT; 1295 break; 1296 default: 1297 time = 0; 1298 } 1299 1300 return (time); 1301 } 1302 1303 #ifdef _KERNEL 1304 /* 1305 * Helper function to increment ereport dropped count. Used by the event 1306 * rate limiting code to give feedback to the user about how many events were 1307 * rate limited by including them in the 'dropped' count. 1308 */ 1309 void 1310 fm_erpt_dropped_increment(void) 1311 { 1312 atomic_inc_64(&ratelimit_dropped); 1313 } 1314 1315 void 1316 fm_init(void) 1317 { 1318 zevent_len_cur = 0; 1319 zevent_flags = 0; 1320 1321 /* Initialize zevent allocation and generation kstats */ 1322 fm_ksp = kstat_create("zfs", 0, "fm", "misc", KSTAT_TYPE_NAMED, 1323 sizeof (struct erpt_kstat) / sizeof (kstat_named_t), 1324 KSTAT_FLAG_VIRTUAL); 1325 1326 if (fm_ksp != NULL) { 1327 fm_ksp->ks_data = &erpt_kstat_data; 1328 kstat_install(fm_ksp); 1329 } else { 1330 cmn_err(CE_NOTE, "failed to create fm/misc kstat\n"); 1331 } 1332 1333 mutex_init(&zevent_lock, NULL, MUTEX_DEFAULT, NULL); 1334 list_create(&zevent_list, sizeof (zevent_t), 1335 offsetof(zevent_t, ev_node)); 1336 cv_init(&zevent_cv, NULL, CV_DEFAULT, NULL); 1337 1338 zfs_ereport_init(); 1339 } 1340 1341 void 1342 fm_fini(void) 1343 { 1344 uint_t count; 1345 1346 zfs_ereport_fini(); 1347 1348 zfs_zevent_drain_all(&count); 1349 1350 mutex_enter(&zevent_lock); 1351 cv_broadcast(&zevent_cv); 1352 1353 zevent_flags |= ZEVENT_SHUTDOWN; 1354 while (zevent_waiters > 0) { 1355 mutex_exit(&zevent_lock); 1356 kpreempt(KPREEMPT_SYNC); 1357 mutex_enter(&zevent_lock); 1358 } 1359 mutex_exit(&zevent_lock); 1360 1361 cv_destroy(&zevent_cv); 1362 list_destroy(&zevent_list); 1363 mutex_destroy(&zevent_lock); 1364 1365 if (fm_ksp != NULL) { 1366 kstat_delete(fm_ksp); 1367 fm_ksp = NULL; 1368 } 1369 } 1370 #endif /* _KERNEL */ 1371 1372 ZFS_MODULE_PARAM(zfs_zevent, zfs_zevent_, len_max, UINT, ZMOD_RW, 1373 "Max event queue length"); 1374