1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * Fault Management Architecture (FMA) Resource and Protocol Support 27 * 28 * The routines contained herein provide services to support kernel subsystems 29 * in publishing fault management telemetry (see PSARC 2002/412 and 2003/089). 30 * 31 * Name-Value Pair Lists 32 * 33 * The embodiment of an FMA protocol element (event, fmri or authority) is a 34 * name-value pair list (nvlist_t). FMA-specific nvlist constructor and 35 * destructor functions, fm_nvlist_create() and fm_nvlist_destroy(), are used 36 * to create an nvpair list using custom allocators. Callers may choose to 37 * allocate either from the kernel memory allocator, or from a preallocated 38 * buffer, useful in constrained contexts like high-level interrupt routines. 39 * 40 * Protocol Event and FMRI Construction 41 * 42 * Convenience routines are provided to construct nvlist events according to 43 * the FMA Event Protocol and Naming Schema specification for ereports and 44 * FMRIs for the dev, cpu, hc, mem, legacy hc and de schemes. 45 * 46 * ENA Manipulation 47 * 48 * Routines to generate ENA formats 0, 1 and 2 are available as well as 49 * routines to increment formats 1 and 2. Individual fields within the 50 * ENA are extractable via fm_ena_time_get(), fm_ena_id_get(), 51 * fm_ena_format_get() and fm_ena_gen_get(). 52 */ 53 54 #include <sys/types.h> 55 #include <sys/time.h> 56 #include <sys/list.h> 57 #include <sys/nvpair.h> 58 #include <sys/cmn_err.h> 59 #include <sys/sysmacros.h> 60 #include <sys/sunddi.h> 61 #include <sys/systeminfo.h> 62 #include <sys/fm/util.h> 63 #include <sys/fm/protocol.h> 64 #include <sys/kstat.h> 65 #include <sys/zfs_context.h> 66 #ifdef _KERNEL 67 #include <sys/atomic.h> 68 #include <sys/condvar.h> 69 #include <sys/zfs_ioctl.h> 70 71 static uint_t zfs_zevent_len_max = 512; 72 73 static uint_t zevent_len_cur = 0; 74 static int zevent_waiters = 0; 75 static int zevent_flags = 0; 76 77 /* Num events rate limited since the last time zfs_zevent_next() was called */ 78 static uint64_t ratelimit_dropped = 0; 79 80 /* 81 * The EID (Event IDentifier) is used to uniquely tag a zevent when it is 82 * posted. The posted EIDs are monotonically increasing but not persistent. 83 * They will be reset to the initial value (1) each time the kernel module is 84 * loaded. 85 */ 86 static uint64_t zevent_eid = 0; 87 88 static kmutex_t zevent_lock; 89 static list_t zevent_list; 90 static kcondvar_t zevent_cv; 91 #endif /* _KERNEL */ 92 93 94 /* 95 * Common fault management kstats to record event generation failures 96 */ 97 98 struct erpt_kstat { 99 kstat_named_t erpt_dropped; /* num erpts dropped on post */ 100 kstat_named_t erpt_set_failed; /* num erpt set failures */ 101 kstat_named_t fmri_set_failed; /* num fmri set failures */ 102 kstat_named_t payload_set_failed; /* num payload set failures */ 103 kstat_named_t erpt_duplicates; /* num duplicate erpts */ 104 }; 105 106 static struct erpt_kstat erpt_kstat_data = { 107 { "erpt-dropped", KSTAT_DATA_UINT64 }, 108 { "erpt-set-failed", KSTAT_DATA_UINT64 }, 109 { "fmri-set-failed", KSTAT_DATA_UINT64 }, 110 { "payload-set-failed", KSTAT_DATA_UINT64 }, 111 { "erpt-duplicates", KSTAT_DATA_UINT64 } 112 }; 113 114 kstat_t *fm_ksp; 115 116 #ifdef _KERNEL 117 118 static zevent_t * 119 zfs_zevent_alloc(void) 120 { 121 zevent_t *ev; 122 123 ev = kmem_zalloc(sizeof (zevent_t), KM_SLEEP); 124 125 list_create(&ev->ev_ze_list, sizeof (zfs_zevent_t), 126 offsetof(zfs_zevent_t, ze_node)); 127 list_link_init(&ev->ev_node); 128 129 return (ev); 130 } 131 132 static void 133 zfs_zevent_free(zevent_t *ev) 134 { 135 /* Run provided cleanup callback */ 136 ev->ev_cb(ev->ev_nvl, ev->ev_detector); 137 138 list_destroy(&ev->ev_ze_list); 139 kmem_free(ev, sizeof (zevent_t)); 140 } 141 142 static void 143 zfs_zevent_drain(zevent_t *ev) 144 { 145 zfs_zevent_t *ze; 146 147 ASSERT(MUTEX_HELD(&zevent_lock)); 148 list_remove(&zevent_list, ev); 149 150 /* Remove references to this event in all private file data */ 151 while ((ze = list_head(&ev->ev_ze_list)) != NULL) { 152 list_remove(&ev->ev_ze_list, ze); 153 ze->ze_zevent = NULL; 154 ze->ze_dropped++; 155 } 156 157 zfs_zevent_free(ev); 158 } 159 160 void 161 zfs_zevent_drain_all(uint_t *count) 162 { 163 zevent_t *ev; 164 165 mutex_enter(&zevent_lock); 166 while ((ev = list_head(&zevent_list)) != NULL) 167 zfs_zevent_drain(ev); 168 169 *count = zevent_len_cur; 170 zevent_len_cur = 0; 171 mutex_exit(&zevent_lock); 172 } 173 174 /* 175 * New zevents are inserted at the head. If the maximum queue 176 * length is exceeded a zevent will be drained from the tail. 177 * As part of this any user space processes which currently have 178 * a reference to this zevent_t in their private data will have 179 * this reference set to NULL. 180 */ 181 static void 182 zfs_zevent_insert(zevent_t *ev) 183 { 184 ASSERT(MUTEX_HELD(&zevent_lock)); 185 list_insert_head(&zevent_list, ev); 186 187 if (zevent_len_cur >= zfs_zevent_len_max) 188 zfs_zevent_drain(list_tail(&zevent_list)); 189 else 190 zevent_len_cur++; 191 } 192 193 /* 194 * Post a zevent. The cb will be called when nvl and detector are no longer 195 * needed, i.e.: 196 * - An error happened and a zevent can't be posted. In this case, cb is called 197 * before zfs_zevent_post() returns. 198 * - The event is being drained and freed. 199 */ 200 int 201 zfs_zevent_post(nvlist_t *nvl, nvlist_t *detector, zevent_cb_t *cb) 202 { 203 inode_timespec_t tv; 204 int64_t tv_array[2]; 205 uint64_t eid; 206 size_t nvl_size = 0; 207 zevent_t *ev; 208 int error; 209 210 ASSERT(cb != NULL); 211 212 gethrestime(&tv); 213 tv_array[0] = tv.tv_sec; 214 tv_array[1] = tv.tv_nsec; 215 216 error = nvlist_add_int64_array(nvl, FM_EREPORT_TIME, tv_array, 2); 217 if (error) { 218 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64); 219 goto out; 220 } 221 222 eid = atomic_inc_64_nv(&zevent_eid); 223 error = nvlist_add_uint64(nvl, FM_EREPORT_EID, eid); 224 if (error) { 225 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64); 226 goto out; 227 } 228 229 error = nvlist_size(nvl, &nvl_size, NV_ENCODE_NATIVE); 230 if (error) { 231 atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64); 232 goto out; 233 } 234 235 if (nvl_size > ERPT_DATA_SZ || nvl_size == 0) { 236 atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64); 237 error = EOVERFLOW; 238 goto out; 239 } 240 241 ev = zfs_zevent_alloc(); 242 if (ev == NULL) { 243 atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64); 244 error = ENOMEM; 245 goto out; 246 } 247 248 ev->ev_nvl = nvl; 249 ev->ev_detector = detector; 250 ev->ev_cb = cb; 251 ev->ev_eid = eid; 252 253 mutex_enter(&zevent_lock); 254 zfs_zevent_insert(ev); 255 cv_broadcast(&zevent_cv); 256 mutex_exit(&zevent_lock); 257 258 out: 259 if (error) 260 cb(nvl, detector); 261 262 return (error); 263 } 264 265 void 266 zfs_zevent_track_duplicate(void) 267 { 268 atomic_inc_64(&erpt_kstat_data.erpt_duplicates.value.ui64); 269 } 270 271 static int 272 zfs_zevent_minor_to_state(minor_t minor, zfs_zevent_t **ze) 273 { 274 *ze = zfsdev_get_state(minor, ZST_ZEVENT); 275 if (*ze == NULL) 276 return (SET_ERROR(EBADF)); 277 278 return (0); 279 } 280 281 zfs_file_t * 282 zfs_zevent_fd_hold(int fd, minor_t *minorp, zfs_zevent_t **ze) 283 { 284 zfs_file_t *fp = zfs_file_get(fd); 285 if (fp == NULL) 286 return (NULL); 287 288 int error = zfsdev_getminor(fp, minorp); 289 if (error == 0) 290 error = zfs_zevent_minor_to_state(*minorp, ze); 291 292 if (error) { 293 zfs_zevent_fd_rele(fp); 294 fp = NULL; 295 } 296 297 return (fp); 298 } 299 300 void 301 zfs_zevent_fd_rele(zfs_file_t *fp) 302 { 303 zfs_file_put(fp); 304 } 305 306 /* 307 * Get the next zevent in the stream and place a copy in 'event'. This 308 * may fail with ENOMEM if the encoded nvlist size exceeds the passed 309 * 'event_size'. In this case the stream pointer is not advanced and 310 * and 'event_size' is set to the minimum required buffer size. 311 */ 312 int 313 zfs_zevent_next(zfs_zevent_t *ze, nvlist_t **event, uint64_t *event_size, 314 uint64_t *dropped) 315 { 316 zevent_t *ev; 317 size_t size; 318 int error = 0; 319 320 mutex_enter(&zevent_lock); 321 if (ze->ze_zevent == NULL) { 322 /* New stream start at the beginning/tail */ 323 ev = list_tail(&zevent_list); 324 if (ev == NULL) { 325 error = ENOENT; 326 goto out; 327 } 328 } else { 329 /* 330 * Existing stream continue with the next element and remove 331 * ourselves from the wait queue for the previous element 332 */ 333 ev = list_prev(&zevent_list, ze->ze_zevent); 334 if (ev == NULL) { 335 error = ENOENT; 336 goto out; 337 } 338 } 339 340 VERIFY(nvlist_size(ev->ev_nvl, &size, NV_ENCODE_NATIVE) == 0); 341 if (size > *event_size) { 342 *event_size = size; 343 error = ENOMEM; 344 goto out; 345 } 346 347 if (ze->ze_zevent) 348 list_remove(&ze->ze_zevent->ev_ze_list, ze); 349 350 ze->ze_zevent = ev; 351 list_insert_head(&ev->ev_ze_list, ze); 352 (void) nvlist_dup(ev->ev_nvl, event, KM_SLEEP); 353 *dropped = ze->ze_dropped; 354 355 #ifdef _KERNEL 356 /* Include events dropped due to rate limiting */ 357 *dropped += atomic_swap_64(&ratelimit_dropped, 0); 358 #endif 359 ze->ze_dropped = 0; 360 out: 361 mutex_exit(&zevent_lock); 362 363 return (error); 364 } 365 366 /* 367 * Wait in an interruptible state for any new events. 368 */ 369 int 370 zfs_zevent_wait(zfs_zevent_t *ze) 371 { 372 int error = EAGAIN; 373 374 mutex_enter(&zevent_lock); 375 zevent_waiters++; 376 377 while (error == EAGAIN) { 378 if (zevent_flags & ZEVENT_SHUTDOWN) { 379 error = SET_ERROR(ESHUTDOWN); 380 break; 381 } 382 383 error = cv_wait_sig(&zevent_cv, &zevent_lock); 384 if (signal_pending(current)) { 385 error = SET_ERROR(EINTR); 386 break; 387 } else if (!list_is_empty(&zevent_list)) { 388 error = 0; 389 continue; 390 } else { 391 error = EAGAIN; 392 } 393 } 394 395 zevent_waiters--; 396 mutex_exit(&zevent_lock); 397 398 return (error); 399 } 400 401 /* 402 * The caller may seek to a specific EID by passing that EID. If the EID 403 * is still available in the posted list of events the cursor is positioned 404 * there. Otherwise ENOENT is returned and the cursor is not moved. 405 * 406 * There are two reserved EIDs which may be passed and will never fail. 407 * ZEVENT_SEEK_START positions the cursor at the start of the list, and 408 * ZEVENT_SEEK_END positions the cursor at the end of the list. 409 */ 410 int 411 zfs_zevent_seek(zfs_zevent_t *ze, uint64_t eid) 412 { 413 zevent_t *ev; 414 int error = 0; 415 416 mutex_enter(&zevent_lock); 417 418 if (eid == ZEVENT_SEEK_START) { 419 if (ze->ze_zevent) 420 list_remove(&ze->ze_zevent->ev_ze_list, ze); 421 422 ze->ze_zevent = NULL; 423 goto out; 424 } 425 426 if (eid == ZEVENT_SEEK_END) { 427 if (ze->ze_zevent) 428 list_remove(&ze->ze_zevent->ev_ze_list, ze); 429 430 ev = list_head(&zevent_list); 431 if (ev) { 432 ze->ze_zevent = ev; 433 list_insert_head(&ev->ev_ze_list, ze); 434 } else { 435 ze->ze_zevent = NULL; 436 } 437 438 goto out; 439 } 440 441 for (ev = list_tail(&zevent_list); ev != NULL; 442 ev = list_prev(&zevent_list, ev)) { 443 if (ev->ev_eid == eid) { 444 if (ze->ze_zevent) 445 list_remove(&ze->ze_zevent->ev_ze_list, ze); 446 447 ze->ze_zevent = ev; 448 list_insert_head(&ev->ev_ze_list, ze); 449 break; 450 } 451 } 452 453 if (ev == NULL) 454 error = ENOENT; 455 456 out: 457 mutex_exit(&zevent_lock); 458 459 return (error); 460 } 461 462 void 463 zfs_zevent_init(zfs_zevent_t **zep) 464 { 465 zfs_zevent_t *ze; 466 467 ze = *zep = kmem_zalloc(sizeof (zfs_zevent_t), KM_SLEEP); 468 list_link_init(&ze->ze_node); 469 } 470 471 void 472 zfs_zevent_destroy(zfs_zevent_t *ze) 473 { 474 mutex_enter(&zevent_lock); 475 if (ze->ze_zevent) 476 list_remove(&ze->ze_zevent->ev_ze_list, ze); 477 mutex_exit(&zevent_lock); 478 479 kmem_free(ze, sizeof (zfs_zevent_t)); 480 } 481 #endif /* _KERNEL */ 482 483 /* 484 * Wrappers for FM nvlist allocators 485 */ 486 static void * 487 i_fm_alloc(nv_alloc_t *nva, size_t size) 488 { 489 (void) nva; 490 return (kmem_alloc(size, KM_SLEEP)); 491 } 492 493 static void 494 i_fm_free(nv_alloc_t *nva, void *buf, size_t size) 495 { 496 (void) nva; 497 kmem_free(buf, size); 498 } 499 500 static const nv_alloc_ops_t fm_mem_alloc_ops = { 501 .nv_ao_init = NULL, 502 .nv_ao_fini = NULL, 503 .nv_ao_alloc = i_fm_alloc, 504 .nv_ao_free = i_fm_free, 505 .nv_ao_reset = NULL 506 }; 507 508 /* 509 * Create and initialize a new nv_alloc_t for a fixed buffer, buf. A pointer 510 * to the newly allocated nv_alloc_t structure is returned upon success or NULL 511 * is returned to indicate that the nv_alloc structure could not be created. 512 */ 513 nv_alloc_t * 514 fm_nva_xcreate(char *buf, size_t bufsz) 515 { 516 nv_alloc_t *nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP); 517 518 if (bufsz == 0 || nv_alloc_init(nvhdl, nv_fixed_ops, buf, bufsz) != 0) { 519 kmem_free(nvhdl, sizeof (nv_alloc_t)); 520 return (NULL); 521 } 522 523 return (nvhdl); 524 } 525 526 /* 527 * Destroy a previously allocated nv_alloc structure. The fixed buffer 528 * associated with nva must be freed by the caller. 529 */ 530 void 531 fm_nva_xdestroy(nv_alloc_t *nva) 532 { 533 nv_alloc_fini(nva); 534 kmem_free(nva, sizeof (nv_alloc_t)); 535 } 536 537 /* 538 * Create a new nv list. A pointer to a new nv list structure is returned 539 * upon success or NULL is returned to indicate that the structure could 540 * not be created. The newly created nv list is created and managed by the 541 * operations installed in nva. If nva is NULL, the default FMA nva 542 * operations are installed and used. 543 * 544 * When called from the kernel and nva == NULL, this function must be called 545 * from passive kernel context with no locks held that can prevent a 546 * sleeping memory allocation from occurring. Otherwise, this function may 547 * be called from other kernel contexts as long a valid nva created via 548 * fm_nva_create() is supplied. 549 */ 550 nvlist_t * 551 fm_nvlist_create(nv_alloc_t *nva) 552 { 553 int hdl_alloced = 0; 554 nvlist_t *nvl; 555 nv_alloc_t *nvhdl; 556 557 if (nva == NULL) { 558 nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP); 559 560 if (nv_alloc_init(nvhdl, &fm_mem_alloc_ops, NULL, 0) != 0) { 561 kmem_free(nvhdl, sizeof (nv_alloc_t)); 562 return (NULL); 563 } 564 hdl_alloced = 1; 565 } else { 566 nvhdl = nva; 567 } 568 569 if (nvlist_xalloc(&nvl, NV_UNIQUE_NAME, nvhdl) != 0) { 570 if (hdl_alloced) { 571 nv_alloc_fini(nvhdl); 572 kmem_free(nvhdl, sizeof (nv_alloc_t)); 573 } 574 return (NULL); 575 } 576 577 return (nvl); 578 } 579 580 /* 581 * Destroy a previously allocated nvlist structure. flag indicates whether 582 * or not the associated nva structure should be freed (FM_NVA_FREE) or 583 * retained (FM_NVA_RETAIN). Retaining the nv alloc structure allows 584 * it to be re-used for future nvlist creation operations. 585 */ 586 void 587 fm_nvlist_destroy(nvlist_t *nvl, int flag) 588 { 589 nv_alloc_t *nva = nvlist_lookup_nv_alloc(nvl); 590 591 nvlist_free(nvl); 592 593 if (nva != NULL) { 594 if (flag == FM_NVA_FREE) 595 fm_nva_xdestroy(nva); 596 } 597 } 598 599 int 600 i_fm_payload_set(nvlist_t *payload, const char *name, va_list ap) 601 { 602 int nelem, ret = 0; 603 data_type_t type; 604 605 while (ret == 0 && name != NULL) { 606 type = va_arg(ap, data_type_t); 607 switch (type) { 608 case DATA_TYPE_BYTE: 609 ret = nvlist_add_byte(payload, name, 610 va_arg(ap, uint_t)); 611 break; 612 case DATA_TYPE_BYTE_ARRAY: 613 nelem = va_arg(ap, int); 614 ret = nvlist_add_byte_array(payload, name, 615 va_arg(ap, uchar_t *), nelem); 616 break; 617 case DATA_TYPE_BOOLEAN_VALUE: 618 ret = nvlist_add_boolean_value(payload, name, 619 va_arg(ap, boolean_t)); 620 break; 621 case DATA_TYPE_BOOLEAN_ARRAY: 622 nelem = va_arg(ap, int); 623 ret = nvlist_add_boolean_array(payload, name, 624 va_arg(ap, boolean_t *), nelem); 625 break; 626 case DATA_TYPE_INT8: 627 ret = nvlist_add_int8(payload, name, 628 va_arg(ap, int)); 629 break; 630 case DATA_TYPE_INT8_ARRAY: 631 nelem = va_arg(ap, int); 632 ret = nvlist_add_int8_array(payload, name, 633 va_arg(ap, int8_t *), nelem); 634 break; 635 case DATA_TYPE_UINT8: 636 ret = nvlist_add_uint8(payload, name, 637 va_arg(ap, uint_t)); 638 break; 639 case DATA_TYPE_UINT8_ARRAY: 640 nelem = va_arg(ap, int); 641 ret = nvlist_add_uint8_array(payload, name, 642 va_arg(ap, uint8_t *), nelem); 643 break; 644 case DATA_TYPE_INT16: 645 ret = nvlist_add_int16(payload, name, 646 va_arg(ap, int)); 647 break; 648 case DATA_TYPE_INT16_ARRAY: 649 nelem = va_arg(ap, int); 650 ret = nvlist_add_int16_array(payload, name, 651 va_arg(ap, int16_t *), nelem); 652 break; 653 case DATA_TYPE_UINT16: 654 ret = nvlist_add_uint16(payload, name, 655 va_arg(ap, uint_t)); 656 break; 657 case DATA_TYPE_UINT16_ARRAY: 658 nelem = va_arg(ap, int); 659 ret = nvlist_add_uint16_array(payload, name, 660 va_arg(ap, uint16_t *), nelem); 661 break; 662 case DATA_TYPE_INT32: 663 ret = nvlist_add_int32(payload, name, 664 va_arg(ap, int32_t)); 665 break; 666 case DATA_TYPE_INT32_ARRAY: 667 nelem = va_arg(ap, int); 668 ret = nvlist_add_int32_array(payload, name, 669 va_arg(ap, int32_t *), nelem); 670 break; 671 case DATA_TYPE_UINT32: 672 ret = nvlist_add_uint32(payload, name, 673 va_arg(ap, uint32_t)); 674 break; 675 case DATA_TYPE_UINT32_ARRAY: 676 nelem = va_arg(ap, int); 677 ret = nvlist_add_uint32_array(payload, name, 678 va_arg(ap, uint32_t *), nelem); 679 break; 680 case DATA_TYPE_INT64: 681 ret = nvlist_add_int64(payload, name, 682 va_arg(ap, int64_t)); 683 break; 684 case DATA_TYPE_INT64_ARRAY: 685 nelem = va_arg(ap, int); 686 ret = nvlist_add_int64_array(payload, name, 687 va_arg(ap, int64_t *), nelem); 688 break; 689 case DATA_TYPE_UINT64: 690 ret = nvlist_add_uint64(payload, name, 691 va_arg(ap, uint64_t)); 692 break; 693 case DATA_TYPE_UINT64_ARRAY: 694 nelem = va_arg(ap, int); 695 ret = nvlist_add_uint64_array(payload, name, 696 va_arg(ap, uint64_t *), nelem); 697 break; 698 case DATA_TYPE_STRING: 699 ret = nvlist_add_string(payload, name, 700 va_arg(ap, char *)); 701 break; 702 case DATA_TYPE_STRING_ARRAY: 703 nelem = va_arg(ap, int); 704 ret = nvlist_add_string_array(payload, name, 705 va_arg(ap, const char **), nelem); 706 break; 707 case DATA_TYPE_NVLIST: 708 ret = nvlist_add_nvlist(payload, name, 709 va_arg(ap, nvlist_t *)); 710 break; 711 case DATA_TYPE_NVLIST_ARRAY: 712 nelem = va_arg(ap, int); 713 ret = nvlist_add_nvlist_array(payload, name, 714 va_arg(ap, const nvlist_t **), nelem); 715 break; 716 default: 717 ret = EINVAL; 718 } 719 720 name = va_arg(ap, char *); 721 } 722 return (ret); 723 } 724 725 void 726 fm_payload_set(nvlist_t *payload, ...) 727 { 728 int ret; 729 const char *name; 730 va_list ap; 731 732 va_start(ap, payload); 733 name = va_arg(ap, char *); 734 ret = i_fm_payload_set(payload, name, ap); 735 va_end(ap); 736 737 if (ret) 738 atomic_inc_64(&erpt_kstat_data.payload_set_failed.value.ui64); 739 } 740 741 /* 742 * Set-up and validate the members of an ereport event according to: 743 * 744 * Member name Type Value 745 * ==================================================== 746 * class string ereport 747 * version uint8_t 0 748 * ena uint64_t <ena> 749 * detector nvlist_t <detector> 750 * ereport-payload nvlist_t <var args> 751 * 752 * We don't actually add a 'version' member to the payload. Really, 753 * the version quoted to us by our caller is that of the category 1 754 * "ereport" event class (and we require FM_EREPORT_VERS0) but 755 * the payload version of the actual leaf class event under construction 756 * may be something else. Callers should supply a version in the varargs, 757 * or (better) we could take two version arguments - one for the 758 * ereport category 1 classification (expect FM_EREPORT_VERS0) and one 759 * for the leaf class. 760 */ 761 void 762 fm_ereport_set(nvlist_t *ereport, int version, const char *erpt_class, 763 uint64_t ena, const nvlist_t *detector, ...) 764 { 765 char ereport_class[FM_MAX_CLASS]; 766 const char *name; 767 va_list ap; 768 int ret; 769 770 if (version != FM_EREPORT_VERS0) { 771 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64); 772 return; 773 } 774 775 (void) snprintf(ereport_class, FM_MAX_CLASS, "%s.%s", 776 FM_EREPORT_CLASS, erpt_class); 777 if (nvlist_add_string(ereport, FM_CLASS, ereport_class) != 0) { 778 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64); 779 return; 780 } 781 782 if (nvlist_add_uint64(ereport, FM_EREPORT_ENA, ena)) { 783 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64); 784 } 785 786 if (nvlist_add_nvlist(ereport, FM_EREPORT_DETECTOR, 787 (nvlist_t *)detector) != 0) { 788 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64); 789 } 790 791 va_start(ap, detector); 792 name = va_arg(ap, const char *); 793 ret = i_fm_payload_set(ereport, name, ap); 794 va_end(ap); 795 796 if (ret) 797 atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64); 798 } 799 800 /* 801 * Set-up and validate the members of an hc fmri according to; 802 * 803 * Member name Type Value 804 * =================================================== 805 * version uint8_t 0 806 * auth nvlist_t <auth> 807 * hc-name string <name> 808 * hc-id string <id> 809 * 810 * Note that auth and hc-id are optional members. 811 */ 812 813 #define HC_MAXPAIRS 20 814 #define HC_MAXNAMELEN 50 815 816 static int 817 fm_fmri_hc_set_common(nvlist_t *fmri, int version, const nvlist_t *auth) 818 { 819 if (version != FM_HC_SCHEME_VERSION) { 820 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 821 return (0); 822 } 823 824 if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0 || 825 nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC) != 0) { 826 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 827 return (0); 828 } 829 830 if (auth != NULL && nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY, 831 (nvlist_t *)auth) != 0) { 832 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 833 return (0); 834 } 835 836 return (1); 837 } 838 839 void 840 fm_fmri_hc_set(nvlist_t *fmri, int version, const nvlist_t *auth, 841 nvlist_t *snvl, int npairs, ...) 842 { 843 nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri); 844 nvlist_t *pairs[HC_MAXPAIRS]; 845 va_list ap; 846 int i; 847 848 if (!fm_fmri_hc_set_common(fmri, version, auth)) 849 return; 850 851 npairs = MIN(npairs, HC_MAXPAIRS); 852 853 va_start(ap, npairs); 854 for (i = 0; i < npairs; i++) { 855 const char *name = va_arg(ap, const char *); 856 uint32_t id = va_arg(ap, uint32_t); 857 char idstr[11]; 858 859 (void) snprintf(idstr, sizeof (idstr), "%u", id); 860 861 pairs[i] = fm_nvlist_create(nva); 862 if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 || 863 nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) { 864 atomic_inc_64( 865 &erpt_kstat_data.fmri_set_failed.value.ui64); 866 } 867 } 868 va_end(ap); 869 870 if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, 871 (const nvlist_t **)pairs, npairs) != 0) { 872 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 873 } 874 875 for (i = 0; i < npairs; i++) 876 fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN); 877 878 if (snvl != NULL) { 879 if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) { 880 atomic_inc_64( 881 &erpt_kstat_data.fmri_set_failed.value.ui64); 882 } 883 } 884 } 885 886 void 887 fm_fmri_hc_create(nvlist_t *fmri, int version, const nvlist_t *auth, 888 nvlist_t *snvl, nvlist_t *bboard, int npairs, ...) 889 { 890 nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri); 891 nvlist_t *pairs[HC_MAXPAIRS]; 892 nvlist_t **hcl; 893 uint_t n; 894 int i, j; 895 va_list ap; 896 char *hcname, *hcid; 897 898 if (!fm_fmri_hc_set_common(fmri, version, auth)) 899 return; 900 901 /* 902 * copy the bboard nvpairs to the pairs array 903 */ 904 if (nvlist_lookup_nvlist_array(bboard, FM_FMRI_HC_LIST, &hcl, &n) 905 != 0) { 906 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 907 return; 908 } 909 910 for (i = 0; i < n; i++) { 911 if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_NAME, 912 &hcname) != 0) { 913 atomic_inc_64( 914 &erpt_kstat_data.fmri_set_failed.value.ui64); 915 return; 916 } 917 if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_ID, &hcid) != 0) { 918 atomic_inc_64( 919 &erpt_kstat_data.fmri_set_failed.value.ui64); 920 return; 921 } 922 923 pairs[i] = fm_nvlist_create(nva); 924 if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, hcname) != 0 || 925 nvlist_add_string(pairs[i], FM_FMRI_HC_ID, hcid) != 0) { 926 for (j = 0; j <= i; j++) { 927 if (pairs[j] != NULL) 928 fm_nvlist_destroy(pairs[j], 929 FM_NVA_RETAIN); 930 } 931 atomic_inc_64( 932 &erpt_kstat_data.fmri_set_failed.value.ui64); 933 return; 934 } 935 } 936 937 /* 938 * create the pairs from passed in pairs 939 */ 940 npairs = MIN(npairs, HC_MAXPAIRS); 941 942 va_start(ap, npairs); 943 for (i = n; i < npairs + n; i++) { 944 const char *name = va_arg(ap, const char *); 945 uint32_t id = va_arg(ap, uint32_t); 946 char idstr[11]; 947 (void) snprintf(idstr, sizeof (idstr), "%u", id); 948 pairs[i] = fm_nvlist_create(nva); 949 if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 || 950 nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) { 951 for (j = 0; j <= i; j++) { 952 if (pairs[j] != NULL) 953 fm_nvlist_destroy(pairs[j], 954 FM_NVA_RETAIN); 955 } 956 atomic_inc_64( 957 &erpt_kstat_data.fmri_set_failed.value.ui64); 958 va_end(ap); 959 return; 960 } 961 } 962 va_end(ap); 963 964 /* 965 * Create the fmri hc list 966 */ 967 if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, 968 (const nvlist_t **)pairs, npairs + n) != 0) { 969 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 970 return; 971 } 972 973 for (i = 0; i < npairs + n; i++) { 974 fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN); 975 } 976 977 if (snvl != NULL) { 978 if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) { 979 atomic_inc_64( 980 &erpt_kstat_data.fmri_set_failed.value.ui64); 981 return; 982 } 983 } 984 } 985 986 /* 987 * Set-up and validate the members of an dev fmri according to: 988 * 989 * Member name Type Value 990 * ==================================================== 991 * version uint8_t 0 992 * auth nvlist_t <auth> 993 * devpath string <devpath> 994 * [devid] string <devid> 995 * [target-port-l0id] string <target-port-lun0-id> 996 * 997 * Note that auth and devid are optional members. 998 */ 999 void 1000 fm_fmri_dev_set(nvlist_t *fmri_dev, int version, const nvlist_t *auth, 1001 const char *devpath, const char *devid, const char *tpl0) 1002 { 1003 int err = 0; 1004 1005 if (version != DEV_SCHEME_VERSION0) { 1006 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1007 return; 1008 } 1009 1010 err |= nvlist_add_uint8(fmri_dev, FM_VERSION, version); 1011 err |= nvlist_add_string(fmri_dev, FM_FMRI_SCHEME, FM_FMRI_SCHEME_DEV); 1012 1013 if (auth != NULL) { 1014 err |= nvlist_add_nvlist(fmri_dev, FM_FMRI_AUTHORITY, 1015 (nvlist_t *)auth); 1016 } 1017 1018 err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_PATH, devpath); 1019 1020 if (devid != NULL) 1021 err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_ID, devid); 1022 1023 if (tpl0 != NULL) 1024 err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_TGTPTLUN0, tpl0); 1025 1026 if (err) 1027 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1028 1029 } 1030 1031 /* 1032 * Set-up and validate the members of an cpu fmri according to: 1033 * 1034 * Member name Type Value 1035 * ==================================================== 1036 * version uint8_t 0 1037 * auth nvlist_t <auth> 1038 * cpuid uint32_t <cpu_id> 1039 * cpumask uint8_t <cpu_mask> 1040 * serial uint64_t <serial_id> 1041 * 1042 * Note that auth, cpumask, serial are optional members. 1043 * 1044 */ 1045 void 1046 fm_fmri_cpu_set(nvlist_t *fmri_cpu, int version, const nvlist_t *auth, 1047 uint32_t cpu_id, uint8_t *cpu_maskp, const char *serial_idp) 1048 { 1049 uint64_t *failedp = &erpt_kstat_data.fmri_set_failed.value.ui64; 1050 1051 if (version < CPU_SCHEME_VERSION1) { 1052 atomic_inc_64(failedp); 1053 return; 1054 } 1055 1056 if (nvlist_add_uint8(fmri_cpu, FM_VERSION, version) != 0) { 1057 atomic_inc_64(failedp); 1058 return; 1059 } 1060 1061 if (nvlist_add_string(fmri_cpu, FM_FMRI_SCHEME, 1062 FM_FMRI_SCHEME_CPU) != 0) { 1063 atomic_inc_64(failedp); 1064 return; 1065 } 1066 1067 if (auth != NULL && nvlist_add_nvlist(fmri_cpu, FM_FMRI_AUTHORITY, 1068 (nvlist_t *)auth) != 0) 1069 atomic_inc_64(failedp); 1070 1071 if (nvlist_add_uint32(fmri_cpu, FM_FMRI_CPU_ID, cpu_id) != 0) 1072 atomic_inc_64(failedp); 1073 1074 if (cpu_maskp != NULL && nvlist_add_uint8(fmri_cpu, FM_FMRI_CPU_MASK, 1075 *cpu_maskp) != 0) 1076 atomic_inc_64(failedp); 1077 1078 if (serial_idp == NULL || nvlist_add_string(fmri_cpu, 1079 FM_FMRI_CPU_SERIAL_ID, (char *)serial_idp) != 0) 1080 atomic_inc_64(failedp); 1081 } 1082 1083 /* 1084 * Set-up and validate the members of a mem according to: 1085 * 1086 * Member name Type Value 1087 * ==================================================== 1088 * version uint8_t 0 1089 * auth nvlist_t <auth> [optional] 1090 * unum string <unum> 1091 * serial string <serial> [optional*] 1092 * offset uint64_t <offset> [optional] 1093 * 1094 * * serial is required if offset is present 1095 */ 1096 void 1097 fm_fmri_mem_set(nvlist_t *fmri, int version, const nvlist_t *auth, 1098 const char *unum, const char *serial, uint64_t offset) 1099 { 1100 if (version != MEM_SCHEME_VERSION0) { 1101 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1102 return; 1103 } 1104 1105 if (!serial && (offset != (uint64_t)-1)) { 1106 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1107 return; 1108 } 1109 1110 if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) { 1111 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1112 return; 1113 } 1114 1115 if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_MEM) != 0) { 1116 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1117 return; 1118 } 1119 1120 if (auth != NULL) { 1121 if (nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY, 1122 (nvlist_t *)auth) != 0) { 1123 atomic_inc_64( 1124 &erpt_kstat_data.fmri_set_failed.value.ui64); 1125 } 1126 } 1127 1128 if (nvlist_add_string(fmri, FM_FMRI_MEM_UNUM, unum) != 0) { 1129 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1130 } 1131 1132 if (serial != NULL) { 1133 if (nvlist_add_string_array(fmri, FM_FMRI_MEM_SERIAL_ID, 1134 (const char **)&serial, 1) != 0) { 1135 atomic_inc_64( 1136 &erpt_kstat_data.fmri_set_failed.value.ui64); 1137 } 1138 if (offset != (uint64_t)-1 && nvlist_add_uint64(fmri, 1139 FM_FMRI_MEM_OFFSET, offset) != 0) { 1140 atomic_inc_64( 1141 &erpt_kstat_data.fmri_set_failed.value.ui64); 1142 } 1143 } 1144 } 1145 1146 void 1147 fm_fmri_zfs_set(nvlist_t *fmri, int version, uint64_t pool_guid, 1148 uint64_t vdev_guid) 1149 { 1150 if (version != ZFS_SCHEME_VERSION0) { 1151 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1152 return; 1153 } 1154 1155 if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) { 1156 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1157 return; 1158 } 1159 1160 if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_ZFS) != 0) { 1161 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1162 return; 1163 } 1164 1165 if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_POOL, pool_guid) != 0) { 1166 atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64); 1167 } 1168 1169 if (vdev_guid != 0) { 1170 if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_VDEV, vdev_guid) != 0) { 1171 atomic_inc_64( 1172 &erpt_kstat_data.fmri_set_failed.value.ui64); 1173 } 1174 } 1175 } 1176 1177 uint64_t 1178 fm_ena_increment(uint64_t ena) 1179 { 1180 uint64_t new_ena; 1181 1182 switch (ENA_FORMAT(ena)) { 1183 case FM_ENA_FMT1: 1184 new_ena = ena + (1 << ENA_FMT1_GEN_SHFT); 1185 break; 1186 case FM_ENA_FMT2: 1187 new_ena = ena + (1 << ENA_FMT2_GEN_SHFT); 1188 break; 1189 default: 1190 new_ena = 0; 1191 } 1192 1193 return (new_ena); 1194 } 1195 1196 uint64_t 1197 fm_ena_generate_cpu(uint64_t timestamp, processorid_t cpuid, uchar_t format) 1198 { 1199 uint64_t ena = 0; 1200 1201 switch (format) { 1202 case FM_ENA_FMT1: 1203 if (timestamp) { 1204 ena = (uint64_t)((format & ENA_FORMAT_MASK) | 1205 ((cpuid << ENA_FMT1_CPUID_SHFT) & 1206 ENA_FMT1_CPUID_MASK) | 1207 ((timestamp << ENA_FMT1_TIME_SHFT) & 1208 ENA_FMT1_TIME_MASK)); 1209 } else { 1210 ena = (uint64_t)((format & ENA_FORMAT_MASK) | 1211 ((cpuid << ENA_FMT1_CPUID_SHFT) & 1212 ENA_FMT1_CPUID_MASK) | 1213 ((gethrtime() << ENA_FMT1_TIME_SHFT) & 1214 ENA_FMT1_TIME_MASK)); 1215 } 1216 break; 1217 case FM_ENA_FMT2: 1218 ena = (uint64_t)((format & ENA_FORMAT_MASK) | 1219 ((timestamp << ENA_FMT2_TIME_SHFT) & ENA_FMT2_TIME_MASK)); 1220 break; 1221 default: 1222 break; 1223 } 1224 1225 return (ena); 1226 } 1227 1228 uint64_t 1229 fm_ena_generate(uint64_t timestamp, uchar_t format) 1230 { 1231 uint64_t ena; 1232 1233 kpreempt_disable(); 1234 ena = fm_ena_generate_cpu(timestamp, getcpuid(), format); 1235 kpreempt_enable(); 1236 1237 return (ena); 1238 } 1239 1240 uint64_t 1241 fm_ena_generation_get(uint64_t ena) 1242 { 1243 uint64_t gen; 1244 1245 switch (ENA_FORMAT(ena)) { 1246 case FM_ENA_FMT1: 1247 gen = (ena & ENA_FMT1_GEN_MASK) >> ENA_FMT1_GEN_SHFT; 1248 break; 1249 case FM_ENA_FMT2: 1250 gen = (ena & ENA_FMT2_GEN_MASK) >> ENA_FMT2_GEN_SHFT; 1251 break; 1252 default: 1253 gen = 0; 1254 break; 1255 } 1256 1257 return (gen); 1258 } 1259 1260 uchar_t 1261 fm_ena_format_get(uint64_t ena) 1262 { 1263 1264 return (ENA_FORMAT(ena)); 1265 } 1266 1267 uint64_t 1268 fm_ena_id_get(uint64_t ena) 1269 { 1270 uint64_t id; 1271 1272 switch (ENA_FORMAT(ena)) { 1273 case FM_ENA_FMT1: 1274 id = (ena & ENA_FMT1_ID_MASK) >> ENA_FMT1_ID_SHFT; 1275 break; 1276 case FM_ENA_FMT2: 1277 id = (ena & ENA_FMT2_ID_MASK) >> ENA_FMT2_ID_SHFT; 1278 break; 1279 default: 1280 id = 0; 1281 } 1282 1283 return (id); 1284 } 1285 1286 uint64_t 1287 fm_ena_time_get(uint64_t ena) 1288 { 1289 uint64_t time; 1290 1291 switch (ENA_FORMAT(ena)) { 1292 case FM_ENA_FMT1: 1293 time = (ena & ENA_FMT1_TIME_MASK) >> ENA_FMT1_TIME_SHFT; 1294 break; 1295 case FM_ENA_FMT2: 1296 time = (ena & ENA_FMT2_TIME_MASK) >> ENA_FMT2_TIME_SHFT; 1297 break; 1298 default: 1299 time = 0; 1300 } 1301 1302 return (time); 1303 } 1304 1305 #ifdef _KERNEL 1306 /* 1307 * Helper function to increment ereport dropped count. Used by the event 1308 * rate limiting code to give feedback to the user about how many events were 1309 * rate limited by including them in the 'dropped' count. 1310 */ 1311 void 1312 fm_erpt_dropped_increment(void) 1313 { 1314 atomic_inc_64(&ratelimit_dropped); 1315 } 1316 1317 void 1318 fm_init(void) 1319 { 1320 zevent_len_cur = 0; 1321 zevent_flags = 0; 1322 1323 /* Initialize zevent allocation and generation kstats */ 1324 fm_ksp = kstat_create("zfs", 0, "fm", "misc", KSTAT_TYPE_NAMED, 1325 sizeof (struct erpt_kstat) / sizeof (kstat_named_t), 1326 KSTAT_FLAG_VIRTUAL); 1327 1328 if (fm_ksp != NULL) { 1329 fm_ksp->ks_data = &erpt_kstat_data; 1330 kstat_install(fm_ksp); 1331 } else { 1332 cmn_err(CE_NOTE, "failed to create fm/misc kstat\n"); 1333 } 1334 1335 mutex_init(&zevent_lock, NULL, MUTEX_DEFAULT, NULL); 1336 list_create(&zevent_list, sizeof (zevent_t), 1337 offsetof(zevent_t, ev_node)); 1338 cv_init(&zevent_cv, NULL, CV_DEFAULT, NULL); 1339 1340 zfs_ereport_init(); 1341 } 1342 1343 void 1344 fm_fini(void) 1345 { 1346 uint_t count; 1347 1348 zfs_ereport_fini(); 1349 1350 zfs_zevent_drain_all(&count); 1351 1352 mutex_enter(&zevent_lock); 1353 cv_broadcast(&zevent_cv); 1354 1355 zevent_flags |= ZEVENT_SHUTDOWN; 1356 while (zevent_waiters > 0) { 1357 mutex_exit(&zevent_lock); 1358 kpreempt(KPREEMPT_SYNC); 1359 mutex_enter(&zevent_lock); 1360 } 1361 mutex_exit(&zevent_lock); 1362 1363 cv_destroy(&zevent_cv); 1364 list_destroy(&zevent_list); 1365 mutex_destroy(&zevent_lock); 1366 1367 if (fm_ksp != NULL) { 1368 kstat_delete(fm_ksp); 1369 fm_ksp = NULL; 1370 } 1371 } 1372 #endif /* _KERNEL */ 1373 1374 ZFS_MODULE_PARAM(zfs_zevent, zfs_zevent_, len_max, UINT, ZMOD_RW, 1375 "Max event queue length"); 1376