xref: /freebsd/sys/contrib/openzfs/module/zfs/fm.c (revision f126d349810fdb512c0b01e101342d430b947488)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or https://opensource.org/licenses/CDDL-1.0.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 /*
26  * Fault Management Architecture (FMA) Resource and Protocol Support
27  *
28  * The routines contained herein provide services to support kernel subsystems
29  * in publishing fault management telemetry (see PSARC 2002/412 and 2003/089).
30  *
31  * Name-Value Pair Lists
32  *
33  * The embodiment of an FMA protocol element (event, fmri or authority) is a
34  * name-value pair list (nvlist_t).  FMA-specific nvlist constructor and
35  * destructor functions, fm_nvlist_create() and fm_nvlist_destroy(), are used
36  * to create an nvpair list using custom allocators.  Callers may choose to
37  * allocate either from the kernel memory allocator, or from a preallocated
38  * buffer, useful in constrained contexts like high-level interrupt routines.
39  *
40  * Protocol Event and FMRI Construction
41  *
42  * Convenience routines are provided to construct nvlist events according to
43  * the FMA Event Protocol and Naming Schema specification for ereports and
44  * FMRIs for the dev, cpu, hc, mem, legacy hc and de schemes.
45  *
46  * ENA Manipulation
47  *
48  * Routines to generate ENA formats 0, 1 and 2 are available as well as
49  * routines to increment formats 1 and 2.  Individual fields within the
50  * ENA are extractable via fm_ena_time_get(), fm_ena_id_get(),
51  * fm_ena_format_get() and fm_ena_gen_get().
52  */
53 
54 #include <sys/types.h>
55 #include <sys/time.h>
56 #include <sys/list.h>
57 #include <sys/nvpair.h>
58 #include <sys/cmn_err.h>
59 #include <sys/sysmacros.h>
60 #include <sys/sunddi.h>
61 #include <sys/systeminfo.h>
62 #include <sys/fm/util.h>
63 #include <sys/fm/protocol.h>
64 #include <sys/kstat.h>
65 #include <sys/zfs_context.h>
66 #ifdef _KERNEL
67 #include <sys/atomic.h>
68 #include <sys/condvar.h>
69 #include <sys/zfs_ioctl.h>
70 
71 static uint_t zfs_zevent_len_max = 512;
72 
73 static uint_t zevent_len_cur = 0;
74 static int zevent_waiters = 0;
75 static int zevent_flags = 0;
76 
77 /* Num events rate limited since the last time zfs_zevent_next() was called */
78 static uint64_t ratelimit_dropped = 0;
79 
80 /*
81  * The EID (Event IDentifier) is used to uniquely tag a zevent when it is
82  * posted.  The posted EIDs are monotonically increasing but not persistent.
83  * They will be reset to the initial value (1) each time the kernel module is
84  * loaded.
85  */
86 static uint64_t zevent_eid = 0;
87 
88 static kmutex_t zevent_lock;
89 static list_t zevent_list;
90 static kcondvar_t zevent_cv;
91 #endif /* _KERNEL */
92 
93 
94 /*
95  * Common fault management kstats to record event generation failures
96  */
97 
98 struct erpt_kstat {
99 	kstat_named_t	erpt_dropped;		/* num erpts dropped on post */
100 	kstat_named_t	erpt_set_failed;	/* num erpt set failures */
101 	kstat_named_t	fmri_set_failed;	/* num fmri set failures */
102 	kstat_named_t	payload_set_failed;	/* num payload set failures */
103 	kstat_named_t	erpt_duplicates;	/* num duplicate erpts */
104 };
105 
106 static struct erpt_kstat erpt_kstat_data = {
107 	{ "erpt-dropped", KSTAT_DATA_UINT64 },
108 	{ "erpt-set-failed", KSTAT_DATA_UINT64 },
109 	{ "fmri-set-failed", KSTAT_DATA_UINT64 },
110 	{ "payload-set-failed", KSTAT_DATA_UINT64 },
111 	{ "erpt-duplicates", KSTAT_DATA_UINT64 }
112 };
113 
114 kstat_t *fm_ksp;
115 
116 #ifdef _KERNEL
117 
118 static zevent_t *
119 zfs_zevent_alloc(void)
120 {
121 	zevent_t *ev;
122 
123 	ev = kmem_zalloc(sizeof (zevent_t), KM_SLEEP);
124 
125 	list_create(&ev->ev_ze_list, sizeof (zfs_zevent_t),
126 	    offsetof(zfs_zevent_t, ze_node));
127 	list_link_init(&ev->ev_node);
128 
129 	return (ev);
130 }
131 
132 static void
133 zfs_zevent_free(zevent_t *ev)
134 {
135 	/* Run provided cleanup callback */
136 	ev->ev_cb(ev->ev_nvl, ev->ev_detector);
137 
138 	list_destroy(&ev->ev_ze_list);
139 	kmem_free(ev, sizeof (zevent_t));
140 }
141 
142 static void
143 zfs_zevent_drain(zevent_t *ev)
144 {
145 	zfs_zevent_t *ze;
146 
147 	ASSERT(MUTEX_HELD(&zevent_lock));
148 	list_remove(&zevent_list, ev);
149 
150 	/* Remove references to this event in all private file data */
151 	while ((ze = list_head(&ev->ev_ze_list)) != NULL) {
152 		list_remove(&ev->ev_ze_list, ze);
153 		ze->ze_zevent = NULL;
154 		ze->ze_dropped++;
155 	}
156 
157 	zfs_zevent_free(ev);
158 }
159 
160 void
161 zfs_zevent_drain_all(uint_t *count)
162 {
163 	zevent_t *ev;
164 
165 	mutex_enter(&zevent_lock);
166 	while ((ev = list_head(&zevent_list)) != NULL)
167 		zfs_zevent_drain(ev);
168 
169 	*count = zevent_len_cur;
170 	zevent_len_cur = 0;
171 	mutex_exit(&zevent_lock);
172 }
173 
174 /*
175  * New zevents are inserted at the head.  If the maximum queue
176  * length is exceeded a zevent will be drained from the tail.
177  * As part of this any user space processes which currently have
178  * a reference to this zevent_t in their private data will have
179  * this reference set to NULL.
180  */
181 static void
182 zfs_zevent_insert(zevent_t *ev)
183 {
184 	ASSERT(MUTEX_HELD(&zevent_lock));
185 	list_insert_head(&zevent_list, ev);
186 
187 	if (zevent_len_cur >= zfs_zevent_len_max)
188 		zfs_zevent_drain(list_tail(&zevent_list));
189 	else
190 		zevent_len_cur++;
191 }
192 
193 /*
194  * Post a zevent. The cb will be called when nvl and detector are no longer
195  * needed, i.e.:
196  * - An error happened and a zevent can't be posted. In this case, cb is called
197  *   before zfs_zevent_post() returns.
198  * - The event is being drained and freed.
199  */
200 int
201 zfs_zevent_post(nvlist_t *nvl, nvlist_t *detector, zevent_cb_t *cb)
202 {
203 	inode_timespec_t tv;
204 	int64_t tv_array[2];
205 	uint64_t eid;
206 	size_t nvl_size = 0;
207 	zevent_t *ev;
208 	int error;
209 
210 	ASSERT(cb != NULL);
211 
212 	gethrestime(&tv);
213 	tv_array[0] = tv.tv_sec;
214 	tv_array[1] = tv.tv_nsec;
215 
216 	error = nvlist_add_int64_array(nvl, FM_EREPORT_TIME, tv_array, 2);
217 	if (error) {
218 		atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
219 		goto out;
220 	}
221 
222 	eid = atomic_inc_64_nv(&zevent_eid);
223 	error = nvlist_add_uint64(nvl, FM_EREPORT_EID, eid);
224 	if (error) {
225 		atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
226 		goto out;
227 	}
228 
229 	error = nvlist_size(nvl, &nvl_size, NV_ENCODE_NATIVE);
230 	if (error) {
231 		atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64);
232 		goto out;
233 	}
234 
235 	if (nvl_size > ERPT_DATA_SZ || nvl_size == 0) {
236 		atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64);
237 		error = EOVERFLOW;
238 		goto out;
239 	}
240 
241 	ev = zfs_zevent_alloc();
242 	if (ev == NULL) {
243 		atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64);
244 		error = ENOMEM;
245 		goto out;
246 	}
247 
248 	ev->ev_nvl = nvl;
249 	ev->ev_detector = detector;
250 	ev->ev_cb = cb;
251 	ev->ev_eid = eid;
252 
253 	mutex_enter(&zevent_lock);
254 	zfs_zevent_insert(ev);
255 	cv_broadcast(&zevent_cv);
256 	mutex_exit(&zevent_lock);
257 
258 out:
259 	if (error)
260 		cb(nvl, detector);
261 
262 	return (error);
263 }
264 
265 void
266 zfs_zevent_track_duplicate(void)
267 {
268 	atomic_inc_64(&erpt_kstat_data.erpt_duplicates.value.ui64);
269 }
270 
271 static int
272 zfs_zevent_minor_to_state(minor_t minor, zfs_zevent_t **ze)
273 {
274 	*ze = zfsdev_get_state(minor, ZST_ZEVENT);
275 	if (*ze == NULL)
276 		return (SET_ERROR(EBADF));
277 
278 	return (0);
279 }
280 
281 zfs_file_t *
282 zfs_zevent_fd_hold(int fd, minor_t *minorp, zfs_zevent_t **ze)
283 {
284 	zfs_file_t *fp = zfs_file_get(fd);
285 	if (fp == NULL)
286 		return (NULL);
287 
288 	int error = zfsdev_getminor(fp, minorp);
289 	if (error == 0)
290 		error = zfs_zevent_minor_to_state(*minorp, ze);
291 
292 	if (error) {
293 		zfs_zevent_fd_rele(fp);
294 		fp = NULL;
295 	}
296 
297 	return (fp);
298 }
299 
300 void
301 zfs_zevent_fd_rele(zfs_file_t *fp)
302 {
303 	zfs_file_put(fp);
304 }
305 
306 /*
307  * Get the next zevent in the stream and place a copy in 'event'.  This
308  * may fail with ENOMEM if the encoded nvlist size exceeds the passed
309  * 'event_size'.  In this case the stream pointer is not advanced and
310  * and 'event_size' is set to the minimum required buffer size.
311  */
312 int
313 zfs_zevent_next(zfs_zevent_t *ze, nvlist_t **event, uint64_t *event_size,
314     uint64_t *dropped)
315 {
316 	zevent_t *ev;
317 	size_t size;
318 	int error = 0;
319 
320 	mutex_enter(&zevent_lock);
321 	if (ze->ze_zevent == NULL) {
322 		/* New stream start at the beginning/tail */
323 		ev = list_tail(&zevent_list);
324 		if (ev == NULL) {
325 			error = ENOENT;
326 			goto out;
327 		}
328 	} else {
329 		/*
330 		 * Existing stream continue with the next element and remove
331 		 * ourselves from the wait queue for the previous element
332 		 */
333 		ev = list_prev(&zevent_list, ze->ze_zevent);
334 		if (ev == NULL) {
335 			error = ENOENT;
336 			goto out;
337 		}
338 	}
339 
340 	VERIFY(nvlist_size(ev->ev_nvl, &size, NV_ENCODE_NATIVE) == 0);
341 	if (size > *event_size) {
342 		*event_size = size;
343 		error = ENOMEM;
344 		goto out;
345 	}
346 
347 	if (ze->ze_zevent)
348 		list_remove(&ze->ze_zevent->ev_ze_list, ze);
349 
350 	ze->ze_zevent = ev;
351 	list_insert_head(&ev->ev_ze_list, ze);
352 	(void) nvlist_dup(ev->ev_nvl, event, KM_SLEEP);
353 	*dropped = ze->ze_dropped;
354 
355 #ifdef _KERNEL
356 	/* Include events dropped due to rate limiting */
357 	*dropped += atomic_swap_64(&ratelimit_dropped, 0);
358 #endif
359 	ze->ze_dropped = 0;
360 out:
361 	mutex_exit(&zevent_lock);
362 
363 	return (error);
364 }
365 
366 /*
367  * Wait in an interruptible state for any new events.
368  */
369 int
370 zfs_zevent_wait(zfs_zevent_t *ze)
371 {
372 	int error = EAGAIN;
373 
374 	mutex_enter(&zevent_lock);
375 	zevent_waiters++;
376 
377 	while (error == EAGAIN) {
378 		if (zevent_flags & ZEVENT_SHUTDOWN) {
379 			error = SET_ERROR(ESHUTDOWN);
380 			break;
381 		}
382 
383 		error = cv_wait_sig(&zevent_cv, &zevent_lock);
384 		if (signal_pending(current)) {
385 			error = SET_ERROR(EINTR);
386 			break;
387 		} else if (!list_is_empty(&zevent_list)) {
388 			error = 0;
389 			continue;
390 		} else {
391 			error = EAGAIN;
392 		}
393 	}
394 
395 	zevent_waiters--;
396 	mutex_exit(&zevent_lock);
397 
398 	return (error);
399 }
400 
401 /*
402  * The caller may seek to a specific EID by passing that EID.  If the EID
403  * is still available in the posted list of events the cursor is positioned
404  * there.  Otherwise ENOENT is returned and the cursor is not moved.
405  *
406  * There are two reserved EIDs which may be passed and will never fail.
407  * ZEVENT_SEEK_START positions the cursor at the start of the list, and
408  * ZEVENT_SEEK_END positions the cursor at the end of the list.
409  */
410 int
411 zfs_zevent_seek(zfs_zevent_t *ze, uint64_t eid)
412 {
413 	zevent_t *ev;
414 	int error = 0;
415 
416 	mutex_enter(&zevent_lock);
417 
418 	if (eid == ZEVENT_SEEK_START) {
419 		if (ze->ze_zevent)
420 			list_remove(&ze->ze_zevent->ev_ze_list, ze);
421 
422 		ze->ze_zevent = NULL;
423 		goto out;
424 	}
425 
426 	if (eid == ZEVENT_SEEK_END) {
427 		if (ze->ze_zevent)
428 			list_remove(&ze->ze_zevent->ev_ze_list, ze);
429 
430 		ev = list_head(&zevent_list);
431 		if (ev) {
432 			ze->ze_zevent = ev;
433 			list_insert_head(&ev->ev_ze_list, ze);
434 		} else {
435 			ze->ze_zevent = NULL;
436 		}
437 
438 		goto out;
439 	}
440 
441 	for (ev = list_tail(&zevent_list); ev != NULL;
442 	    ev = list_prev(&zevent_list, ev)) {
443 		if (ev->ev_eid == eid) {
444 			if (ze->ze_zevent)
445 				list_remove(&ze->ze_zevent->ev_ze_list, ze);
446 
447 			ze->ze_zevent = ev;
448 			list_insert_head(&ev->ev_ze_list, ze);
449 			break;
450 		}
451 	}
452 
453 	if (ev == NULL)
454 		error = ENOENT;
455 
456 out:
457 	mutex_exit(&zevent_lock);
458 
459 	return (error);
460 }
461 
462 void
463 zfs_zevent_init(zfs_zevent_t **zep)
464 {
465 	zfs_zevent_t *ze;
466 
467 	ze = *zep = kmem_zalloc(sizeof (zfs_zevent_t), KM_SLEEP);
468 	list_link_init(&ze->ze_node);
469 }
470 
471 void
472 zfs_zevent_destroy(zfs_zevent_t *ze)
473 {
474 	mutex_enter(&zevent_lock);
475 	if (ze->ze_zevent)
476 		list_remove(&ze->ze_zevent->ev_ze_list, ze);
477 	mutex_exit(&zevent_lock);
478 
479 	kmem_free(ze, sizeof (zfs_zevent_t));
480 }
481 #endif /* _KERNEL */
482 
483 /*
484  * Wrappers for FM nvlist allocators
485  */
486 static void *
487 i_fm_alloc(nv_alloc_t *nva, size_t size)
488 {
489 	(void) nva;
490 	return (kmem_alloc(size, KM_SLEEP));
491 }
492 
493 static void
494 i_fm_free(nv_alloc_t *nva, void *buf, size_t size)
495 {
496 	(void) nva;
497 	kmem_free(buf, size);
498 }
499 
500 static const nv_alloc_ops_t fm_mem_alloc_ops = {
501 	.nv_ao_init = NULL,
502 	.nv_ao_fini = NULL,
503 	.nv_ao_alloc = i_fm_alloc,
504 	.nv_ao_free = i_fm_free,
505 	.nv_ao_reset = NULL
506 };
507 
508 /*
509  * Create and initialize a new nv_alloc_t for a fixed buffer, buf.  A pointer
510  * to the newly allocated nv_alloc_t structure is returned upon success or NULL
511  * is returned to indicate that the nv_alloc structure could not be created.
512  */
513 nv_alloc_t *
514 fm_nva_xcreate(char *buf, size_t bufsz)
515 {
516 	nv_alloc_t *nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP);
517 
518 	if (bufsz == 0 || nv_alloc_init(nvhdl, nv_fixed_ops, buf, bufsz) != 0) {
519 		kmem_free(nvhdl, sizeof (nv_alloc_t));
520 		return (NULL);
521 	}
522 
523 	return (nvhdl);
524 }
525 
526 /*
527  * Destroy a previously allocated nv_alloc structure.  The fixed buffer
528  * associated with nva must be freed by the caller.
529  */
530 void
531 fm_nva_xdestroy(nv_alloc_t *nva)
532 {
533 	nv_alloc_fini(nva);
534 	kmem_free(nva, sizeof (nv_alloc_t));
535 }
536 
537 /*
538  * Create a new nv list.  A pointer to a new nv list structure is returned
539  * upon success or NULL is returned to indicate that the structure could
540  * not be created.  The newly created nv list is created and managed by the
541  * operations installed in nva.   If nva is NULL, the default FMA nva
542  * operations are installed and used.
543  *
544  * When called from the kernel and nva == NULL, this function must be called
545  * from passive kernel context with no locks held that can prevent a
546  * sleeping memory allocation from occurring.  Otherwise, this function may
547  * be called from other kernel contexts as long a valid nva created via
548  * fm_nva_create() is supplied.
549  */
550 nvlist_t *
551 fm_nvlist_create(nv_alloc_t *nva)
552 {
553 	int hdl_alloced = 0;
554 	nvlist_t *nvl;
555 	nv_alloc_t *nvhdl;
556 
557 	if (nva == NULL) {
558 		nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP);
559 
560 		if (nv_alloc_init(nvhdl, &fm_mem_alloc_ops, NULL, 0) != 0) {
561 			kmem_free(nvhdl, sizeof (nv_alloc_t));
562 			return (NULL);
563 		}
564 		hdl_alloced = 1;
565 	} else {
566 		nvhdl = nva;
567 	}
568 
569 	if (nvlist_xalloc(&nvl, NV_UNIQUE_NAME, nvhdl) != 0) {
570 		if (hdl_alloced) {
571 			nv_alloc_fini(nvhdl);
572 			kmem_free(nvhdl, sizeof (nv_alloc_t));
573 		}
574 		return (NULL);
575 	}
576 
577 	return (nvl);
578 }
579 
580 /*
581  * Destroy a previously allocated nvlist structure.  flag indicates whether
582  * or not the associated nva structure should be freed (FM_NVA_FREE) or
583  * retained (FM_NVA_RETAIN).  Retaining the nv alloc structure allows
584  * it to be re-used for future nvlist creation operations.
585  */
586 void
587 fm_nvlist_destroy(nvlist_t *nvl, int flag)
588 {
589 	nv_alloc_t *nva = nvlist_lookup_nv_alloc(nvl);
590 
591 	nvlist_free(nvl);
592 
593 	if (nva != NULL) {
594 		if (flag == FM_NVA_FREE)
595 			fm_nva_xdestroy(nva);
596 	}
597 }
598 
599 int
600 i_fm_payload_set(nvlist_t *payload, const char *name, va_list ap)
601 {
602 	int nelem, ret = 0;
603 	data_type_t type;
604 
605 	while (ret == 0 && name != NULL) {
606 		type = va_arg(ap, data_type_t);
607 		switch (type) {
608 		case DATA_TYPE_BYTE:
609 			ret = nvlist_add_byte(payload, name,
610 			    va_arg(ap, uint_t));
611 			break;
612 		case DATA_TYPE_BYTE_ARRAY:
613 			nelem = va_arg(ap, int);
614 			ret = nvlist_add_byte_array(payload, name,
615 			    va_arg(ap, uchar_t *), nelem);
616 			break;
617 		case DATA_TYPE_BOOLEAN_VALUE:
618 			ret = nvlist_add_boolean_value(payload, name,
619 			    va_arg(ap, boolean_t));
620 			break;
621 		case DATA_TYPE_BOOLEAN_ARRAY:
622 			nelem = va_arg(ap, int);
623 			ret = nvlist_add_boolean_array(payload, name,
624 			    va_arg(ap, boolean_t *), nelem);
625 			break;
626 		case DATA_TYPE_INT8:
627 			ret = nvlist_add_int8(payload, name,
628 			    va_arg(ap, int));
629 			break;
630 		case DATA_TYPE_INT8_ARRAY:
631 			nelem = va_arg(ap, int);
632 			ret = nvlist_add_int8_array(payload, name,
633 			    va_arg(ap, int8_t *), nelem);
634 			break;
635 		case DATA_TYPE_UINT8:
636 			ret = nvlist_add_uint8(payload, name,
637 			    va_arg(ap, uint_t));
638 			break;
639 		case DATA_TYPE_UINT8_ARRAY:
640 			nelem = va_arg(ap, int);
641 			ret = nvlist_add_uint8_array(payload, name,
642 			    va_arg(ap, uint8_t *), nelem);
643 			break;
644 		case DATA_TYPE_INT16:
645 			ret = nvlist_add_int16(payload, name,
646 			    va_arg(ap, int));
647 			break;
648 		case DATA_TYPE_INT16_ARRAY:
649 			nelem = va_arg(ap, int);
650 			ret = nvlist_add_int16_array(payload, name,
651 			    va_arg(ap, int16_t *), nelem);
652 			break;
653 		case DATA_TYPE_UINT16:
654 			ret = nvlist_add_uint16(payload, name,
655 			    va_arg(ap, uint_t));
656 			break;
657 		case DATA_TYPE_UINT16_ARRAY:
658 			nelem = va_arg(ap, int);
659 			ret = nvlist_add_uint16_array(payload, name,
660 			    va_arg(ap, uint16_t *), nelem);
661 			break;
662 		case DATA_TYPE_INT32:
663 			ret = nvlist_add_int32(payload, name,
664 			    va_arg(ap, int32_t));
665 			break;
666 		case DATA_TYPE_INT32_ARRAY:
667 			nelem = va_arg(ap, int);
668 			ret = nvlist_add_int32_array(payload, name,
669 			    va_arg(ap, int32_t *), nelem);
670 			break;
671 		case DATA_TYPE_UINT32:
672 			ret = nvlist_add_uint32(payload, name,
673 			    va_arg(ap, uint32_t));
674 			break;
675 		case DATA_TYPE_UINT32_ARRAY:
676 			nelem = va_arg(ap, int);
677 			ret = nvlist_add_uint32_array(payload, name,
678 			    va_arg(ap, uint32_t *), nelem);
679 			break;
680 		case DATA_TYPE_INT64:
681 			ret = nvlist_add_int64(payload, name,
682 			    va_arg(ap, int64_t));
683 			break;
684 		case DATA_TYPE_INT64_ARRAY:
685 			nelem = va_arg(ap, int);
686 			ret = nvlist_add_int64_array(payload, name,
687 			    va_arg(ap, int64_t *), nelem);
688 			break;
689 		case DATA_TYPE_UINT64:
690 			ret = nvlist_add_uint64(payload, name,
691 			    va_arg(ap, uint64_t));
692 			break;
693 		case DATA_TYPE_UINT64_ARRAY:
694 			nelem = va_arg(ap, int);
695 			ret = nvlist_add_uint64_array(payload, name,
696 			    va_arg(ap, uint64_t *), nelem);
697 			break;
698 		case DATA_TYPE_STRING:
699 			ret = nvlist_add_string(payload, name,
700 			    va_arg(ap, char *));
701 			break;
702 		case DATA_TYPE_STRING_ARRAY:
703 			nelem = va_arg(ap, int);
704 			ret = nvlist_add_string_array(payload, name,
705 			    va_arg(ap, const char **), nelem);
706 			break;
707 		case DATA_TYPE_NVLIST:
708 			ret = nvlist_add_nvlist(payload, name,
709 			    va_arg(ap, nvlist_t *));
710 			break;
711 		case DATA_TYPE_NVLIST_ARRAY:
712 			nelem = va_arg(ap, int);
713 			ret = nvlist_add_nvlist_array(payload, name,
714 			    va_arg(ap, const nvlist_t **), nelem);
715 			break;
716 		default:
717 			ret = EINVAL;
718 		}
719 
720 		name = va_arg(ap, char *);
721 	}
722 	return (ret);
723 }
724 
725 void
726 fm_payload_set(nvlist_t *payload, ...)
727 {
728 	int ret;
729 	const char *name;
730 	va_list ap;
731 
732 	va_start(ap, payload);
733 	name = va_arg(ap, char *);
734 	ret = i_fm_payload_set(payload, name, ap);
735 	va_end(ap);
736 
737 	if (ret)
738 		atomic_inc_64(&erpt_kstat_data.payload_set_failed.value.ui64);
739 }
740 
741 /*
742  * Set-up and validate the members of an ereport event according to:
743  *
744  *	Member name		Type		Value
745  *	====================================================
746  *	class			string		ereport
747  *	version			uint8_t		0
748  *	ena			uint64_t	<ena>
749  *	detector		nvlist_t	<detector>
750  *	ereport-payload		nvlist_t	<var args>
751  *
752  * We don't actually add a 'version' member to the payload.  Really,
753  * the version quoted to us by our caller is that of the category 1
754  * "ereport" event class (and we require FM_EREPORT_VERS0) but
755  * the payload version of the actual leaf class event under construction
756  * may be something else.  Callers should supply a version in the varargs,
757  * or (better) we could take two version arguments - one for the
758  * ereport category 1 classification (expect FM_EREPORT_VERS0) and one
759  * for the leaf class.
760  */
761 void
762 fm_ereport_set(nvlist_t *ereport, int version, const char *erpt_class,
763     uint64_t ena, const nvlist_t *detector, ...)
764 {
765 	char ereport_class[FM_MAX_CLASS];
766 	const char *name;
767 	va_list ap;
768 	int ret;
769 
770 	if (version != FM_EREPORT_VERS0) {
771 		atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
772 		return;
773 	}
774 
775 	(void) snprintf(ereport_class, FM_MAX_CLASS, "%s.%s",
776 	    FM_EREPORT_CLASS, erpt_class);
777 	if (nvlist_add_string(ereport, FM_CLASS, ereport_class) != 0) {
778 		atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
779 		return;
780 	}
781 
782 	if (nvlist_add_uint64(ereport, FM_EREPORT_ENA, ena)) {
783 		atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
784 	}
785 
786 	if (nvlist_add_nvlist(ereport, FM_EREPORT_DETECTOR,
787 	    (nvlist_t *)detector) != 0) {
788 		atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
789 	}
790 
791 	va_start(ap, detector);
792 	name = va_arg(ap, const char *);
793 	ret = i_fm_payload_set(ereport, name, ap);
794 	va_end(ap);
795 
796 	if (ret)
797 		atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
798 }
799 
800 /*
801  * Set-up and validate the members of an hc fmri according to;
802  *
803  *	Member name		Type		Value
804  *	===================================================
805  *	version			uint8_t		0
806  *	auth			nvlist_t	<auth>
807  *	hc-name			string		<name>
808  *	hc-id			string		<id>
809  *
810  * Note that auth and hc-id are optional members.
811  */
812 
813 #define	HC_MAXPAIRS	20
814 #define	HC_MAXNAMELEN	50
815 
816 static int
817 fm_fmri_hc_set_common(nvlist_t *fmri, int version, const nvlist_t *auth)
818 {
819 	if (version != FM_HC_SCHEME_VERSION) {
820 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
821 		return (0);
822 	}
823 
824 	if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0 ||
825 	    nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC) != 0) {
826 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
827 		return (0);
828 	}
829 
830 	if (auth != NULL && nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY,
831 	    (nvlist_t *)auth) != 0) {
832 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
833 		return (0);
834 	}
835 
836 	return (1);
837 }
838 
839 void
840 fm_fmri_hc_set(nvlist_t *fmri, int version, const nvlist_t *auth,
841     nvlist_t *snvl, int npairs, ...)
842 {
843 	nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri);
844 	nvlist_t *pairs[HC_MAXPAIRS];
845 	va_list ap;
846 	int i;
847 
848 	if (!fm_fmri_hc_set_common(fmri, version, auth))
849 		return;
850 
851 	npairs = MIN(npairs, HC_MAXPAIRS);
852 
853 	va_start(ap, npairs);
854 	for (i = 0; i < npairs; i++) {
855 		const char *name = va_arg(ap, const char *);
856 		uint32_t id = va_arg(ap, uint32_t);
857 		char idstr[11];
858 
859 		(void) snprintf(idstr, sizeof (idstr), "%u", id);
860 
861 		pairs[i] = fm_nvlist_create(nva);
862 		if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 ||
863 		    nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) {
864 			atomic_inc_64(
865 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
866 		}
867 	}
868 	va_end(ap);
869 
870 	if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST,
871 	    (const nvlist_t **)pairs, npairs) != 0) {
872 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
873 	}
874 
875 	for (i = 0; i < npairs; i++)
876 		fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN);
877 
878 	if (snvl != NULL) {
879 		if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) {
880 			atomic_inc_64(
881 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
882 		}
883 	}
884 }
885 
886 void
887 fm_fmri_hc_create(nvlist_t *fmri, int version, const nvlist_t *auth,
888     nvlist_t *snvl, nvlist_t *bboard, int npairs, ...)
889 {
890 	nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri);
891 	nvlist_t *pairs[HC_MAXPAIRS];
892 	nvlist_t **hcl;
893 	uint_t n;
894 	int i, j;
895 	va_list ap;
896 	char *hcname, *hcid;
897 
898 	if (!fm_fmri_hc_set_common(fmri, version, auth))
899 		return;
900 
901 	/*
902 	 * copy the bboard nvpairs to the pairs array
903 	 */
904 	if (nvlist_lookup_nvlist_array(bboard, FM_FMRI_HC_LIST, &hcl, &n)
905 	    != 0) {
906 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
907 		return;
908 	}
909 
910 	for (i = 0; i < n; i++) {
911 		if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_NAME,
912 		    &hcname) != 0) {
913 			atomic_inc_64(
914 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
915 			return;
916 		}
917 		if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_ID, &hcid) != 0) {
918 			atomic_inc_64(
919 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
920 			return;
921 		}
922 
923 		pairs[i] = fm_nvlist_create(nva);
924 		if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, hcname) != 0 ||
925 		    nvlist_add_string(pairs[i], FM_FMRI_HC_ID, hcid) != 0) {
926 			for (j = 0; j <= i; j++) {
927 				if (pairs[j] != NULL)
928 					fm_nvlist_destroy(pairs[j],
929 					    FM_NVA_RETAIN);
930 			}
931 			atomic_inc_64(
932 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
933 			return;
934 		}
935 	}
936 
937 	/*
938 	 * create the pairs from passed in pairs
939 	 */
940 	npairs = MIN(npairs, HC_MAXPAIRS);
941 
942 	va_start(ap, npairs);
943 	for (i = n; i < npairs + n; i++) {
944 		const char *name = va_arg(ap, const char *);
945 		uint32_t id = va_arg(ap, uint32_t);
946 		char idstr[11];
947 		(void) snprintf(idstr, sizeof (idstr), "%u", id);
948 		pairs[i] = fm_nvlist_create(nva);
949 		if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 ||
950 		    nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) {
951 			for (j = 0; j <= i; j++) {
952 				if (pairs[j] != NULL)
953 					fm_nvlist_destroy(pairs[j],
954 					    FM_NVA_RETAIN);
955 			}
956 			atomic_inc_64(
957 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
958 			va_end(ap);
959 			return;
960 		}
961 	}
962 	va_end(ap);
963 
964 	/*
965 	 * Create the fmri hc list
966 	 */
967 	if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST,
968 	    (const nvlist_t **)pairs, npairs + n) != 0) {
969 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
970 		return;
971 	}
972 
973 	for (i = 0; i < npairs + n; i++) {
974 			fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN);
975 	}
976 
977 	if (snvl != NULL) {
978 		if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) {
979 			atomic_inc_64(
980 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
981 			return;
982 		}
983 	}
984 }
985 
986 /*
987  * Set-up and validate the members of an dev fmri according to:
988  *
989  *	Member name		Type		Value
990  *	====================================================
991  *	version			uint8_t		0
992  *	auth			nvlist_t	<auth>
993  *	devpath			string		<devpath>
994  *	[devid]			string		<devid>
995  *	[target-port-l0id]	string		<target-port-lun0-id>
996  *
997  * Note that auth and devid are optional members.
998  */
999 void
1000 fm_fmri_dev_set(nvlist_t *fmri_dev, int version, const nvlist_t *auth,
1001     const char *devpath, const char *devid, const char *tpl0)
1002 {
1003 	int err = 0;
1004 
1005 	if (version != DEV_SCHEME_VERSION0) {
1006 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1007 		return;
1008 	}
1009 
1010 	err |= nvlist_add_uint8(fmri_dev, FM_VERSION, version);
1011 	err |= nvlist_add_string(fmri_dev, FM_FMRI_SCHEME, FM_FMRI_SCHEME_DEV);
1012 
1013 	if (auth != NULL) {
1014 		err |= nvlist_add_nvlist(fmri_dev, FM_FMRI_AUTHORITY,
1015 		    (nvlist_t *)auth);
1016 	}
1017 
1018 	err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_PATH, devpath);
1019 
1020 	if (devid != NULL)
1021 		err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_ID, devid);
1022 
1023 	if (tpl0 != NULL)
1024 		err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_TGTPTLUN0, tpl0);
1025 
1026 	if (err)
1027 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1028 
1029 }
1030 
1031 /*
1032  * Set-up and validate the members of an cpu fmri according to:
1033  *
1034  *	Member name		Type		Value
1035  *	====================================================
1036  *	version			uint8_t		0
1037  *	auth			nvlist_t	<auth>
1038  *	cpuid			uint32_t	<cpu_id>
1039  *	cpumask			uint8_t		<cpu_mask>
1040  *	serial			uint64_t	<serial_id>
1041  *
1042  * Note that auth, cpumask, serial are optional members.
1043  *
1044  */
1045 void
1046 fm_fmri_cpu_set(nvlist_t *fmri_cpu, int version, const nvlist_t *auth,
1047     uint32_t cpu_id, uint8_t *cpu_maskp, const char *serial_idp)
1048 {
1049 	uint64_t *failedp = &erpt_kstat_data.fmri_set_failed.value.ui64;
1050 
1051 	if (version < CPU_SCHEME_VERSION1) {
1052 		atomic_inc_64(failedp);
1053 		return;
1054 	}
1055 
1056 	if (nvlist_add_uint8(fmri_cpu, FM_VERSION, version) != 0) {
1057 		atomic_inc_64(failedp);
1058 		return;
1059 	}
1060 
1061 	if (nvlist_add_string(fmri_cpu, FM_FMRI_SCHEME,
1062 	    FM_FMRI_SCHEME_CPU) != 0) {
1063 		atomic_inc_64(failedp);
1064 		return;
1065 	}
1066 
1067 	if (auth != NULL && nvlist_add_nvlist(fmri_cpu, FM_FMRI_AUTHORITY,
1068 	    (nvlist_t *)auth) != 0)
1069 		atomic_inc_64(failedp);
1070 
1071 	if (nvlist_add_uint32(fmri_cpu, FM_FMRI_CPU_ID, cpu_id) != 0)
1072 		atomic_inc_64(failedp);
1073 
1074 	if (cpu_maskp != NULL && nvlist_add_uint8(fmri_cpu, FM_FMRI_CPU_MASK,
1075 	    *cpu_maskp) != 0)
1076 		atomic_inc_64(failedp);
1077 
1078 	if (serial_idp == NULL || nvlist_add_string(fmri_cpu,
1079 	    FM_FMRI_CPU_SERIAL_ID, (char *)serial_idp) != 0)
1080 			atomic_inc_64(failedp);
1081 }
1082 
1083 /*
1084  * Set-up and validate the members of a mem according to:
1085  *
1086  *	Member name		Type		Value
1087  *	====================================================
1088  *	version			uint8_t		0
1089  *	auth			nvlist_t	<auth>		[optional]
1090  *	unum			string		<unum>
1091  *	serial			string		<serial>	[optional*]
1092  *	offset			uint64_t	<offset>	[optional]
1093  *
1094  *	* serial is required if offset is present
1095  */
1096 void
1097 fm_fmri_mem_set(nvlist_t *fmri, int version, const nvlist_t *auth,
1098     const char *unum, const char *serial, uint64_t offset)
1099 {
1100 	if (version != MEM_SCHEME_VERSION0) {
1101 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1102 		return;
1103 	}
1104 
1105 	if (!serial && (offset != (uint64_t)-1)) {
1106 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1107 		return;
1108 	}
1109 
1110 	if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) {
1111 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1112 		return;
1113 	}
1114 
1115 	if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_MEM) != 0) {
1116 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1117 		return;
1118 	}
1119 
1120 	if (auth != NULL) {
1121 		if (nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY,
1122 		    (nvlist_t *)auth) != 0) {
1123 			atomic_inc_64(
1124 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
1125 		}
1126 	}
1127 
1128 	if (nvlist_add_string(fmri, FM_FMRI_MEM_UNUM, unum) != 0) {
1129 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1130 	}
1131 
1132 	if (serial != NULL) {
1133 		if (nvlist_add_string_array(fmri, FM_FMRI_MEM_SERIAL_ID,
1134 		    (const char **)&serial, 1) != 0) {
1135 			atomic_inc_64(
1136 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
1137 		}
1138 		if (offset != (uint64_t)-1 && nvlist_add_uint64(fmri,
1139 		    FM_FMRI_MEM_OFFSET, offset) != 0) {
1140 			atomic_inc_64(
1141 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
1142 		}
1143 	}
1144 }
1145 
1146 void
1147 fm_fmri_zfs_set(nvlist_t *fmri, int version, uint64_t pool_guid,
1148     uint64_t vdev_guid)
1149 {
1150 	if (version != ZFS_SCHEME_VERSION0) {
1151 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1152 		return;
1153 	}
1154 
1155 	if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) {
1156 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1157 		return;
1158 	}
1159 
1160 	if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_ZFS) != 0) {
1161 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1162 		return;
1163 	}
1164 
1165 	if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_POOL, pool_guid) != 0) {
1166 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1167 	}
1168 
1169 	if (vdev_guid != 0) {
1170 		if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_VDEV, vdev_guid) != 0) {
1171 			atomic_inc_64(
1172 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
1173 		}
1174 	}
1175 }
1176 
1177 uint64_t
1178 fm_ena_increment(uint64_t ena)
1179 {
1180 	uint64_t new_ena;
1181 
1182 	switch (ENA_FORMAT(ena)) {
1183 	case FM_ENA_FMT1:
1184 		new_ena = ena + (1 << ENA_FMT1_GEN_SHFT);
1185 		break;
1186 	case FM_ENA_FMT2:
1187 		new_ena = ena + (1 << ENA_FMT2_GEN_SHFT);
1188 		break;
1189 	default:
1190 		new_ena = 0;
1191 	}
1192 
1193 	return (new_ena);
1194 }
1195 
1196 uint64_t
1197 fm_ena_generate_cpu(uint64_t timestamp, processorid_t cpuid, uchar_t format)
1198 {
1199 	uint64_t ena = 0;
1200 
1201 	switch (format) {
1202 	case FM_ENA_FMT1:
1203 		if (timestamp) {
1204 			ena = (uint64_t)((format & ENA_FORMAT_MASK) |
1205 			    ((cpuid << ENA_FMT1_CPUID_SHFT) &
1206 			    ENA_FMT1_CPUID_MASK) |
1207 			    ((timestamp << ENA_FMT1_TIME_SHFT) &
1208 			    ENA_FMT1_TIME_MASK));
1209 		} else {
1210 			ena = (uint64_t)((format & ENA_FORMAT_MASK) |
1211 			    ((cpuid << ENA_FMT1_CPUID_SHFT) &
1212 			    ENA_FMT1_CPUID_MASK) |
1213 			    ((gethrtime() << ENA_FMT1_TIME_SHFT) &
1214 			    ENA_FMT1_TIME_MASK));
1215 		}
1216 		break;
1217 	case FM_ENA_FMT2:
1218 		ena = (uint64_t)((format & ENA_FORMAT_MASK) |
1219 		    ((timestamp << ENA_FMT2_TIME_SHFT) & ENA_FMT2_TIME_MASK));
1220 		break;
1221 	default:
1222 		break;
1223 	}
1224 
1225 	return (ena);
1226 }
1227 
1228 uint64_t
1229 fm_ena_generate(uint64_t timestamp, uchar_t format)
1230 {
1231 	uint64_t ena;
1232 
1233 	kpreempt_disable();
1234 	ena = fm_ena_generate_cpu(timestamp, getcpuid(), format);
1235 	kpreempt_enable();
1236 
1237 	return (ena);
1238 }
1239 
1240 uint64_t
1241 fm_ena_generation_get(uint64_t ena)
1242 {
1243 	uint64_t gen;
1244 
1245 	switch (ENA_FORMAT(ena)) {
1246 	case FM_ENA_FMT1:
1247 		gen = (ena & ENA_FMT1_GEN_MASK) >> ENA_FMT1_GEN_SHFT;
1248 		break;
1249 	case FM_ENA_FMT2:
1250 		gen = (ena & ENA_FMT2_GEN_MASK) >> ENA_FMT2_GEN_SHFT;
1251 		break;
1252 	default:
1253 		gen = 0;
1254 		break;
1255 	}
1256 
1257 	return (gen);
1258 }
1259 
1260 uchar_t
1261 fm_ena_format_get(uint64_t ena)
1262 {
1263 
1264 	return (ENA_FORMAT(ena));
1265 }
1266 
1267 uint64_t
1268 fm_ena_id_get(uint64_t ena)
1269 {
1270 	uint64_t id;
1271 
1272 	switch (ENA_FORMAT(ena)) {
1273 	case FM_ENA_FMT1:
1274 		id = (ena & ENA_FMT1_ID_MASK) >> ENA_FMT1_ID_SHFT;
1275 		break;
1276 	case FM_ENA_FMT2:
1277 		id = (ena & ENA_FMT2_ID_MASK) >> ENA_FMT2_ID_SHFT;
1278 		break;
1279 	default:
1280 		id = 0;
1281 	}
1282 
1283 	return (id);
1284 }
1285 
1286 uint64_t
1287 fm_ena_time_get(uint64_t ena)
1288 {
1289 	uint64_t time;
1290 
1291 	switch (ENA_FORMAT(ena)) {
1292 	case FM_ENA_FMT1:
1293 		time = (ena & ENA_FMT1_TIME_MASK) >> ENA_FMT1_TIME_SHFT;
1294 		break;
1295 	case FM_ENA_FMT2:
1296 		time = (ena & ENA_FMT2_TIME_MASK) >> ENA_FMT2_TIME_SHFT;
1297 		break;
1298 	default:
1299 		time = 0;
1300 	}
1301 
1302 	return (time);
1303 }
1304 
1305 #ifdef _KERNEL
1306 /*
1307  * Helper function to increment ereport dropped count.  Used by the event
1308  * rate limiting code to give feedback to the user about how many events were
1309  * rate limited by including them in the 'dropped' count.
1310  */
1311 void
1312 fm_erpt_dropped_increment(void)
1313 {
1314 	atomic_inc_64(&ratelimit_dropped);
1315 }
1316 
1317 void
1318 fm_init(void)
1319 {
1320 	zevent_len_cur = 0;
1321 	zevent_flags = 0;
1322 
1323 	/* Initialize zevent allocation and generation kstats */
1324 	fm_ksp = kstat_create("zfs", 0, "fm", "misc", KSTAT_TYPE_NAMED,
1325 	    sizeof (struct erpt_kstat) / sizeof (kstat_named_t),
1326 	    KSTAT_FLAG_VIRTUAL);
1327 
1328 	if (fm_ksp != NULL) {
1329 		fm_ksp->ks_data = &erpt_kstat_data;
1330 		kstat_install(fm_ksp);
1331 	} else {
1332 		cmn_err(CE_NOTE, "failed to create fm/misc kstat\n");
1333 	}
1334 
1335 	mutex_init(&zevent_lock, NULL, MUTEX_DEFAULT, NULL);
1336 	list_create(&zevent_list, sizeof (zevent_t),
1337 	    offsetof(zevent_t, ev_node));
1338 	cv_init(&zevent_cv, NULL, CV_DEFAULT, NULL);
1339 
1340 	zfs_ereport_init();
1341 }
1342 
1343 void
1344 fm_fini(void)
1345 {
1346 	uint_t count;
1347 
1348 	zfs_ereport_fini();
1349 
1350 	zfs_zevent_drain_all(&count);
1351 
1352 	mutex_enter(&zevent_lock);
1353 	cv_broadcast(&zevent_cv);
1354 
1355 	zevent_flags |= ZEVENT_SHUTDOWN;
1356 	while (zevent_waiters > 0) {
1357 		mutex_exit(&zevent_lock);
1358 		kpreempt(KPREEMPT_SYNC);
1359 		mutex_enter(&zevent_lock);
1360 	}
1361 	mutex_exit(&zevent_lock);
1362 
1363 	cv_destroy(&zevent_cv);
1364 	list_destroy(&zevent_list);
1365 	mutex_destroy(&zevent_lock);
1366 
1367 	if (fm_ksp != NULL) {
1368 		kstat_delete(fm_ksp);
1369 		fm_ksp = NULL;
1370 	}
1371 }
1372 #endif /* _KERNEL */
1373 
1374 ZFS_MODULE_PARAM(zfs_zevent, zfs_zevent_, len_max, UINT, ZMOD_RW,
1375 	"Max event queue length");
1376