xref: /titanic_44/usr/src/uts/common/os/fm.c (revision b369f4b871a39ef94e220443957975f445f52eb6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Fault Management Architecture (FMA) Resource and Protocol Support
30  *
31  * The routines contained herein provide services to support kernel subsystems
32  * in publishing fault management telemetry (see PSARC 2002/412 and 2003/089).
33  *
34  * Name-Value Pair Lists
35  *
36  * The embodiment of an FMA protocol element (event, fmri or authority) is a
37  * name-value pair list (nvlist_t).  FMA-specific nvlist construtor and
38  * destructor functions, fm_nvlist_create() and fm_nvlist_destroy(), are used
39  * to create an nvpair list using custom allocators.  Callers may choose to
40  * allocate either from the kernel memory allocator, or from a preallocated
41  * buffer, useful in constrained contexts like high-level interrupt routines.
42  *
43  * Protocol Event and FMRI Construction
44  *
45  * Convenience routines are provided to construct nvlist events according to
46  * the FMA Event Protocol and Naming Schema specification for ereports and
47  * FMRIs for the dev, cpu, hc, mem, legacy hc and de schemes.
48  *
49  * ENA Manipulation
50  *
51  * Routines to generate ENA formats 0, 1 and 2 are available as well as
52  * routines to increment formats 1 and 2.  Individual fields within the
53  * ENA are extractable via fm_ena_time_get(), fm_ena_id_get(),
54  * fm_ena_format_get() and fm_ena_gen_get().
55  */
56 
57 #include <sys/types.h>
58 #include <sys/time.h>
59 #include <sys/sysevent.h>
60 #include <sys/sysevent_impl.h>
61 #include <sys/nvpair.h>
62 #include <sys/cmn_err.h>
63 #include <sys/cpuvar.h>
64 #include <sys/sysmacros.h>
65 #include <sys/systm.h>
66 #include <sys/ddifm.h>
67 #include <sys/ddifm_impl.h>
68 #include <sys/spl.h>
69 #include <sys/dumphdr.h>
70 #include <sys/compress.h>
71 #include <sys/cpuvar.h>
72 #include <sys/console.h>
73 #include <sys/panic.h>
74 #include <sys/kobj.h>
75 #include <sys/sunddi.h>
76 #include <sys/systeminfo.h>
77 #include <sys/sysevent/eventdefs.h>
78 #include <sys/fm/util.h>
79 #include <sys/fm/protocol.h>
80 
81 /*
82  * URL and SUNW-MSG-ID value to display for fm_panic(), defined below.  These
83  * values must be kept in sync with the FMA source code in usr/src/cmd/fm.
84  */
85 static const char *fm_url = "http://www.sun.com/msg";
86 static const char *fm_msgid = "SUNOS-8000-0G";
87 static char *volatile fm_panicstr = NULL;
88 
89 errorq_t *ereport_errorq;
90 void *ereport_dumpbuf;
91 size_t ereport_dumplen;
92 
93 static uint_t ereport_chanlen = ERPT_EVCH_MAX;
94 static evchan_t *ereport_chan = NULL;
95 static ulong_t ereport_qlen = 0;
96 static size_t ereport_size = 0;
97 static int ereport_cols = 80;
98 
99 /*
100  * Common fault management kstats to record ereport generation
101  * failures
102  */
103 
104 struct erpt_kstat {
105 	kstat_named_t	erpt_dropped;		/* num erpts dropped on post */
106 	kstat_named_t	erpt_set_failed;	/* num erpt set failures */
107 	kstat_named_t	fmri_set_failed;	/* num fmri set failures */
108 	kstat_named_t	payload_set_failed;	/* num payload set failures */
109 };
110 
111 static struct erpt_kstat erpt_kstat_data = {
112 	{ "erpt-dropped", KSTAT_DATA_UINT64 },
113 	{ "erpt-set-failed", KSTAT_DATA_UINT64 },
114 	{ "fmri-set-failed", KSTAT_DATA_UINT64 },
115 	{ "payload-set-failed", KSTAT_DATA_UINT64 }
116 };
117 
118 /*ARGSUSED*/
119 static void
120 fm_drain(void *private, void *data, errorq_elem_t *eep)
121 {
122 	nvlist_t *nvl = errorq_elem_nvl(ereport_errorq, eep);
123 
124 	if (!panicstr)
125 		(void) fm_ereport_post(nvl, EVCH_TRYHARD);
126 	else
127 		fm_nvprint(nvl);
128 }
129 
130 void
131 fm_init(void)
132 {
133 	kstat_t *ksp;
134 
135 	(void) sysevent_evc_bind(FM_ERROR_CHAN,
136 	    &ereport_chan, EVCH_CREAT | EVCH_HOLD_PEND);
137 
138 	(void) sysevent_evc_control(ereport_chan,
139 	    EVCH_SET_CHAN_LEN, &ereport_chanlen);
140 
141 	if (ereport_qlen == 0)
142 		ereport_qlen = ERPT_MAX_ERRS * MAX(max_ncpus, 4);
143 
144 	if (ereport_size == 0)
145 		ereport_size = ERPT_DATA_SZ;
146 
147 	ereport_errorq = errorq_nvcreate("fm_ereport_queue",
148 	    (errorq_func_t)fm_drain, NULL, ereport_qlen, ereport_size,
149 	    FM_ERR_PIL, ERRORQ_VITAL);
150 	if (ereport_errorq == NULL)
151 		panic("failed to create required ereport error queue");
152 
153 	ereport_dumpbuf = kmem_alloc(ereport_size, KM_SLEEP);
154 	ereport_dumplen = ereport_size;
155 
156 	/* Initialize ereport allocation and generation kstats */
157 	ksp = kstat_create("unix", 0, "fm", "misc", KSTAT_TYPE_NAMED,
158 	    sizeof (struct erpt_kstat) / sizeof (kstat_named_t),
159 	    KSTAT_FLAG_VIRTUAL);
160 
161 	if (ksp != NULL) {
162 		ksp->ks_data = &erpt_kstat_data;
163 		kstat_install(ksp);
164 	} else {
165 		cmn_err(CE_NOTE, "failed to create fm/misc kstat\n");
166 
167 	}
168 }
169 
170 /*
171  * Formatting utility function for fm_nvprintr.  We attempt to wrap chunks of
172  * output so they aren't split across console lines, and return the end column.
173  */
174 /*PRINTFLIKE4*/
175 static int
176 fm_printf(int depth, int c, int cols, const char *format, ...)
177 {
178 	va_list ap;
179 	int width;
180 	char c1;
181 
182 	va_start(ap, format);
183 	width = vsnprintf(&c1, sizeof (c1), format, ap);
184 	va_end(ap);
185 
186 	if (c + width >= cols) {
187 		console_printf("\n\r");
188 		c = 0;
189 		if (format[0] != ' ' && depth > 0) {
190 			console_printf(" ");
191 			c++;
192 		}
193 	}
194 
195 	va_start(ap, format);
196 	console_vprintf(format, ap);
197 	va_end(ap);
198 
199 	return ((c + width) % cols);
200 }
201 
202 /*
203  * Recursively print a nvlist in the specified column width and return the
204  * column we end up in.  This function is called recursively by fm_nvprint(),
205  * below.  We generically format the entire nvpair using hexadecimal
206  * integers and strings, and elide any integer arrays.  Arrays are basically
207  * used for cache dumps right now, so we suppress them so as not to overwhelm
208  * the amount of console output we produce at panic time.  This can be further
209  * enhanced as FMA technology grows based upon the needs of consumers.  All
210  * FMA telemetry is logged using the dump device transport, so the console
211  * output serves only as a fallback in case this procedure is unsuccessful.
212  */
213 static int
214 fm_nvprintr(nvlist_t *nvl, int d, int c, int cols)
215 {
216 	nvpair_t *nvp;
217 
218 	for (nvp = nvlist_next_nvpair(nvl, NULL);
219 	    nvp != NULL; nvp = nvlist_next_nvpair(nvl, nvp)) {
220 
221 		data_type_t type = nvpair_type(nvp);
222 		const char *name = nvpair_name(nvp);
223 
224 		boolean_t b;
225 		uint8_t i8;
226 		uint16_t i16;
227 		uint32_t i32;
228 		uint64_t i64;
229 		char *str;
230 		nvlist_t *cnv;
231 
232 		if (strcmp(name, FM_CLASS) == 0)
233 			continue; /* already printed by caller */
234 
235 		c = fm_printf(d, c, cols, " %s=", name);
236 
237 		switch (type) {
238 		case DATA_TYPE_BOOLEAN:
239 			c = fm_printf(d + 1, c, cols, " 1");
240 			break;
241 
242 		case DATA_TYPE_BOOLEAN_VALUE:
243 			(void) nvpair_value_boolean_value(nvp, &b);
244 			c = fm_printf(d + 1, c, cols, b ? "1" : "0");
245 			break;
246 
247 		case DATA_TYPE_BYTE:
248 			(void) nvpair_value_byte(nvp, &i8);
249 			c = fm_printf(d + 1, c, cols, "%x", i8);
250 			break;
251 
252 		case DATA_TYPE_INT8:
253 			(void) nvpair_value_int8(nvp, (void *)&i8);
254 			c = fm_printf(d + 1, c, cols, "%x", i8);
255 			break;
256 
257 		case DATA_TYPE_UINT8:
258 			(void) nvpair_value_uint8(nvp, &i8);
259 			c = fm_printf(d + 1, c, cols, "%x", i8);
260 			break;
261 
262 		case DATA_TYPE_INT16:
263 			(void) nvpair_value_int16(nvp, (void *)&i16);
264 			c = fm_printf(d + 1, c, cols, "%x", i16);
265 			break;
266 
267 		case DATA_TYPE_UINT16:
268 			(void) nvpair_value_uint16(nvp, &i16);
269 			c = fm_printf(d + 1, c, cols, "%x", i16);
270 			break;
271 
272 		case DATA_TYPE_INT32:
273 			(void) nvpair_value_int32(nvp, (void *)&i32);
274 			c = fm_printf(d + 1, c, cols, "%x", i32);
275 			break;
276 
277 		case DATA_TYPE_UINT32:
278 			(void) nvpair_value_uint32(nvp, &i32);
279 			c = fm_printf(d + 1, c, cols, "%x", i32);
280 			break;
281 
282 		case DATA_TYPE_INT64:
283 			(void) nvpair_value_int64(nvp, (void *)&i64);
284 			c = fm_printf(d + 1, c, cols, "%llx",
285 			    (u_longlong_t)i64);
286 			break;
287 
288 		case DATA_TYPE_UINT64:
289 			(void) nvpair_value_uint64(nvp, &i64);
290 			c = fm_printf(d + 1, c, cols, "%llx",
291 			    (u_longlong_t)i64);
292 			break;
293 
294 		case DATA_TYPE_HRTIME:
295 			(void) nvpair_value_hrtime(nvp, (void *)&i64);
296 			c = fm_printf(d + 1, c, cols, "%llx",
297 			    (u_longlong_t)i64);
298 			break;
299 
300 		case DATA_TYPE_STRING:
301 			(void) nvpair_value_string(nvp, &str);
302 			c = fm_printf(d + 1, c, cols, "\"%s\"",
303 			    str ? str : "<NULL>");
304 			break;
305 
306 		case DATA_TYPE_NVLIST:
307 			c = fm_printf(d + 1, c, cols, "[");
308 			(void) nvpair_value_nvlist(nvp, &cnv);
309 			c = fm_nvprintr(cnv, d + 1, c, cols);
310 			c = fm_printf(d + 1, c, cols, " ]");
311 			break;
312 
313 		case DATA_TYPE_BOOLEAN_ARRAY:
314 		case DATA_TYPE_BYTE_ARRAY:
315 		case DATA_TYPE_INT8_ARRAY:
316 		case DATA_TYPE_UINT8_ARRAY:
317 		case DATA_TYPE_INT16_ARRAY:
318 		case DATA_TYPE_UINT16_ARRAY:
319 		case DATA_TYPE_INT32_ARRAY:
320 		case DATA_TYPE_UINT32_ARRAY:
321 		case DATA_TYPE_INT64_ARRAY:
322 		case DATA_TYPE_UINT64_ARRAY:
323 		case DATA_TYPE_STRING_ARRAY:
324 		case DATA_TYPE_NVLIST_ARRAY:
325 			c = fm_printf(d + 1, c, cols, "[...]");
326 			break;
327 		case DATA_TYPE_UNKNOWN:
328 			c = fm_printf(d + 1, c, cols, "<unknown>");
329 			break;
330 		}
331 	}
332 
333 	return (c);
334 }
335 
336 void
337 fm_nvprint(nvlist_t *nvl)
338 {
339 	char *class;
340 	int c = 0;
341 
342 	console_printf("\r");
343 
344 	if (nvlist_lookup_string(nvl, FM_CLASS, &class) == 0)
345 		c = fm_printf(0, c, ereport_cols, "%s", class);
346 
347 	if (fm_nvprintr(nvl, 0, c, ereport_cols) != 0)
348 		console_printf("\n");
349 
350 	console_printf("\n");
351 }
352 
353 /*
354  * Wrapper for panic() that first produces an FMA-style message for admins.
355  * Normally such messages are generated by fmd(1M)'s syslog-msgs agent: this
356  * is the one exception to that rule and the only error that gets messaged.
357  * This function is intended for use by subsystems that have detected a fatal
358  * error and enqueued appropriate ereports and wish to then force a panic.
359  */
360 /*PRINTFLIKE1*/
361 void
362 fm_panic(const char *format, ...)
363 {
364 	va_list ap;
365 
366 	(void) casptr((void *)&fm_panicstr, NULL, (void *)format);
367 	va_start(ap, format);
368 	vpanic(format, ap);
369 	va_end(ap);
370 }
371 
372 /*
373  * Print any appropriate FMA banner message before the panic message.  This
374  * function is called by panicsys() and prints the message for fm_panic().
375  * We print the message here so that it comes after the system is quiesced.
376  * A one-line summary is recorded in the log only (cmn_err(9F) with "!" prefix).
377  * The rest of the message is for the console only and not needed in the log,
378  * so it is printed using console_printf().  We break it up into multiple
379  * chunks so as to avoid overflowing any small legacy prom_printf() buffers.
380  */
381 void
382 fm_banner(void)
383 {
384 	timespec_t tod;
385 	hrtime_t now;
386 
387 	if (!fm_panicstr)
388 		return; /* panic was not initiated by fm_panic(); do nothing */
389 
390 	if (panicstr) {
391 		tod = panic_hrestime;
392 		now = panic_hrtime;
393 	} else {
394 		gethrestime(&tod);
395 		now = gethrtime_waitfree();
396 	}
397 
398 	cmn_err(CE_NOTE, "!SUNW-MSG-ID: %s, "
399 	    "TYPE: Error, VER: 1, SEVERITY: Major\n", fm_msgid);
400 
401 	console_printf(
402 "\n\rSUNW-MSG-ID: %s, TYPE: Error, VER: 1, SEVERITY: Major\n"
403 "EVENT-TIME: 0x%lx.0x%lx (0x%llx)\n",
404 	    fm_msgid, tod.tv_sec, tod.tv_nsec, (u_longlong_t)now);
405 
406 	console_printf(
407 "PLATFORM: %s, CSN: -, HOSTNAME: %s\n"
408 "SOURCE: %s, REV: %s %s\n",
409 	    platform, utsname.nodename, utsname.sysname,
410 	    utsname.release, utsname.version);
411 
412 	console_printf(
413 "DESC: Errors have been detected that require a reboot to ensure system\n"
414 "integrity.  See %s/%s for more information.\n",
415 	    fm_url, fm_msgid);
416 
417 	console_printf(
418 "AUTO-RESPONSE: Solaris will attempt to save and diagnose the error telemetry\n"
419 "IMPACT: The system will sync files, save a crash dump if needed, and reboot\n"
420 "REC-ACTION: Save the error summary below in case telemetry cannot be saved\n");
421 
422 	console_printf("\n");
423 }
424 
425 /*
426  * Utility function to write all of the pending ereports to the dump device.
427  * This function is called at either normal reboot or panic time, and simply
428  * iterates over the in-transit messages in the ereport sysevent channel.
429  */
430 void
431 fm_ereport_dump(void)
432 {
433 	evchanq_t *chq;
434 	sysevent_t *sep;
435 	erpt_dump_t ed;
436 
437 	timespec_t tod;
438 	hrtime_t now;
439 	char *buf;
440 	size_t len;
441 
442 	if (panicstr) {
443 		tod = panic_hrestime;
444 		now = panic_hrtime;
445 	} else {
446 		if (ereport_errorq != NULL)
447 			errorq_drain(ereport_errorq);
448 		gethrestime(&tod);
449 		now = gethrtime_waitfree();
450 	}
451 
452 	/*
453 	 * In the panic case, sysevent_evc_walk_init() will return NULL.
454 	 */
455 	if ((chq = sysevent_evc_walk_init(ereport_chan, NULL)) == NULL &&
456 	    !panicstr)
457 		return; /* event channel isn't initialized yet */
458 
459 	while ((sep = sysevent_evc_walk_step(chq)) != NULL) {
460 		if ((buf = sysevent_evc_event_attr(sep, &len)) == NULL)
461 			break;
462 
463 		ed.ed_magic = ERPT_MAGIC;
464 		ed.ed_chksum = checksum32(buf, len);
465 		ed.ed_size = (uint32_t)len;
466 		ed.ed_pad = 0;
467 		ed.ed_hrt_nsec = SE_TIME(sep);
468 		ed.ed_hrt_base = now;
469 		ed.ed_tod_base.sec = tod.tv_sec;
470 		ed.ed_tod_base.nsec = tod.tv_nsec;
471 
472 		dumpvp_write(&ed, sizeof (ed));
473 		dumpvp_write(buf, len);
474 	}
475 
476 	sysevent_evc_walk_fini(chq);
477 }
478 
479 /*
480  * Post an error report (ereport) to the sysevent error channel.  The error
481  * channel must be established with a prior call to sysevent_evc_create()
482  * before publication may occur.
483  */
484 void
485 fm_ereport_post(nvlist_t *ereport, int evc_flag)
486 {
487 	size_t nvl_size = 0;
488 	evchan_t *error_chan;
489 
490 	(void) nvlist_size(ereport, &nvl_size, NV_ENCODE_NATIVE);
491 	if (nvl_size > ERPT_DATA_SZ || nvl_size == 0) {
492 		atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1);
493 		return;
494 	}
495 
496 	if (sysevent_evc_bind(FM_ERROR_CHAN, &error_chan,
497 	    EVCH_CREAT|EVCH_HOLD_PEND) != 0) {
498 		atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1);
499 		return;
500 	}
501 
502 	if (sysevent_evc_publish(error_chan, EC_FM, ESC_FM_ERROR,
503 	    SUNW_VENDOR, FM_PUB, ereport, evc_flag) != 0) {
504 		atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1);
505 		sysevent_evc_unbind(error_chan);
506 		return;
507 	}
508 	sysevent_evc_unbind(error_chan);
509 }
510 
511 /*
512  * Wrapppers for FM nvlist allocators
513  */
514 /* ARGSUSED */
515 static void *
516 i_fm_alloc(nv_alloc_t *nva, size_t size)
517 {
518 	return (kmem_zalloc(size, KM_SLEEP));
519 }
520 
521 /* ARGSUSED */
522 static void
523 i_fm_free(nv_alloc_t *nva, void *buf, size_t size)
524 {
525 	kmem_free(buf, size);
526 }
527 
528 const nv_alloc_ops_t fm_mem_alloc_ops = {
529 	NULL,
530 	NULL,
531 	i_fm_alloc,
532 	i_fm_free,
533 	NULL
534 };
535 
536 /*
537  * Create and initialize a new nv_alloc_t for a fixed buffer, buf.  A pointer
538  * to the newly allocated nv_alloc_t structure is returned upon success or NULL
539  * is returned to indicate that the nv_alloc structure could not be created.
540  */
541 nv_alloc_t *
542 fm_nva_xcreate(char *buf, size_t bufsz)
543 {
544 	nv_alloc_t *nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP);
545 
546 	if (bufsz == 0 || nv_alloc_init(nvhdl, nv_fixed_ops, buf, bufsz) != 0) {
547 		kmem_free(nvhdl, sizeof (nv_alloc_t));
548 		return (NULL);
549 	}
550 
551 	return (nvhdl);
552 }
553 
554 /*
555  * Destroy a previously allocated nv_alloc structure.  The fixed buffer
556  * associated with nva must be freed by the caller.
557  */
558 void
559 fm_nva_xdestroy(nv_alloc_t *nva)
560 {
561 	nv_alloc_fini(nva);
562 	kmem_free(nva, sizeof (nv_alloc_t));
563 }
564 
565 /*
566  * Create a new nv list.  A pointer to a new nv list structure is returned
567  * upon success or NULL is returned to indicate that the structure could
568  * not be created.  The newly created nv list is created and managed by the
569  * operations installed in nva.   If nva is NULL, the default FMA nva
570  * operations are installed and used.
571  *
572  * When called from the kernel and nva == NULL, this function must be called
573  * from passive kernel context with no locks held that can prevent a
574  * sleeping memory allocation from occurring.  Otherwise, this function may
575  * be called from other kernel contexts as long a valid nva created via
576  * fm_nva_create() is supplied.
577  */
578 nvlist_t *
579 fm_nvlist_create(nv_alloc_t *nva)
580 {
581 	int hdl_alloced = 0;
582 	nvlist_t *nvl;
583 	nv_alloc_t *nvhdl;
584 
585 	if (nva == NULL) {
586 		nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP);
587 
588 		if (nv_alloc_init(nvhdl, &fm_mem_alloc_ops, NULL, 0) != 0) {
589 			kmem_free(nvhdl, sizeof (nv_alloc_t));
590 			return (NULL);
591 		}
592 		hdl_alloced = 1;
593 	} else {
594 		nvhdl = nva;
595 	}
596 
597 	if (nvlist_xalloc(&nvl, NV_UNIQUE_NAME, nvhdl) != 0) {
598 		if (hdl_alloced) {
599 			kmem_free(nvhdl, sizeof (nv_alloc_t));
600 			nv_alloc_fini(nvhdl);
601 		}
602 		return (NULL);
603 	}
604 
605 	return (nvl);
606 }
607 
608 /*
609  * Destroy a previously allocated nvlist structure.  flag indicates whether
610  * or not the associated nva structure should be freed (FM_NVA_FREE) or
611  * retained (FM_NVA_RETAIN).  Retaining the nv alloc structure allows
612  * it to be re-used for future nvlist creation operations.
613  */
614 void
615 fm_nvlist_destroy(nvlist_t *nvl, int flag)
616 {
617 	nv_alloc_t *nva = nvlist_lookup_nv_alloc(nvl);
618 
619 	nvlist_free(nvl);
620 
621 	if (nva != NULL) {
622 		if (flag == FM_NVA_FREE)
623 			fm_nva_xdestroy(nva);
624 	}
625 }
626 
627 int
628 i_fm_payload_set(nvlist_t *payload, const char *name, va_list ap)
629 {
630 	int nelem, ret = 0;
631 	data_type_t type;
632 
633 	while (ret == 0 && name != NULL) {
634 		type = va_arg(ap, data_type_t);
635 		switch (type) {
636 		case DATA_TYPE_BYTE:
637 			ret = nvlist_add_byte(payload, name,
638 			    va_arg(ap, uint_t));
639 			break;
640 		case DATA_TYPE_BYTE_ARRAY:
641 			nelem = va_arg(ap, int);
642 			ret = nvlist_add_byte_array(payload, name,
643 			    va_arg(ap, uchar_t *), nelem);
644 			break;
645 		case DATA_TYPE_BOOLEAN_VALUE:
646 			ret = nvlist_add_boolean_value(payload, name,
647 			    va_arg(ap, boolean_t));
648 			break;
649 		case DATA_TYPE_BOOLEAN_ARRAY:
650 			nelem = va_arg(ap, int);
651 			ret = nvlist_add_boolean_array(payload, name,
652 			    va_arg(ap, boolean_t *), nelem);
653 			break;
654 		case DATA_TYPE_INT8:
655 			ret = nvlist_add_int8(payload, name,
656 			    va_arg(ap, int));
657 			break;
658 		case DATA_TYPE_INT8_ARRAY:
659 			nelem = va_arg(ap, int);
660 			ret = nvlist_add_int8_array(payload, name,
661 			    va_arg(ap, int8_t *), nelem);
662 			break;
663 		case DATA_TYPE_UINT8:
664 			ret = nvlist_add_uint8(payload, name,
665 			    va_arg(ap, uint_t));
666 			break;
667 		case DATA_TYPE_UINT8_ARRAY:
668 			nelem = va_arg(ap, int);
669 			ret = nvlist_add_uint8_array(payload, name,
670 			    va_arg(ap, uint8_t *), nelem);
671 			break;
672 		case DATA_TYPE_INT16:
673 			ret = nvlist_add_int16(payload, name,
674 			    va_arg(ap, int));
675 			break;
676 		case DATA_TYPE_INT16_ARRAY:
677 			nelem = va_arg(ap, int);
678 			ret = nvlist_add_int16_array(payload, name,
679 			    va_arg(ap, int16_t *), nelem);
680 			break;
681 		case DATA_TYPE_UINT16:
682 			ret = nvlist_add_uint16(payload, name,
683 			    va_arg(ap, uint_t));
684 			break;
685 		case DATA_TYPE_UINT16_ARRAY:
686 			nelem = va_arg(ap, int);
687 			ret = nvlist_add_uint16_array(payload, name,
688 			    va_arg(ap, uint16_t *), nelem);
689 			break;
690 		case DATA_TYPE_INT32:
691 			ret = nvlist_add_int32(payload, name,
692 			    va_arg(ap, int32_t));
693 			break;
694 		case DATA_TYPE_INT32_ARRAY:
695 			nelem = va_arg(ap, int);
696 			ret = nvlist_add_int32_array(payload, name,
697 			    va_arg(ap, int32_t *), nelem);
698 			break;
699 		case DATA_TYPE_UINT32:
700 			ret = nvlist_add_uint32(payload, name,
701 			    va_arg(ap, uint32_t));
702 			break;
703 		case DATA_TYPE_UINT32_ARRAY:
704 			nelem = va_arg(ap, int);
705 			ret = nvlist_add_uint32_array(payload, name,
706 			    va_arg(ap, uint32_t *), nelem);
707 			break;
708 		case DATA_TYPE_INT64:
709 			ret = nvlist_add_int64(payload, name,
710 			    va_arg(ap, int64_t));
711 			break;
712 		case DATA_TYPE_INT64_ARRAY:
713 			nelem = va_arg(ap, int);
714 			ret = nvlist_add_int64_array(payload, name,
715 			    va_arg(ap, int64_t *), nelem);
716 			break;
717 		case DATA_TYPE_UINT64:
718 			ret = nvlist_add_uint64(payload, name,
719 			    va_arg(ap, uint64_t));
720 			break;
721 		case DATA_TYPE_UINT64_ARRAY:
722 			nelem = va_arg(ap, int);
723 			ret = nvlist_add_uint64_array(payload, name,
724 			    va_arg(ap, uint64_t *), nelem);
725 			break;
726 		case DATA_TYPE_STRING:
727 			ret = nvlist_add_string(payload, name,
728 			    va_arg(ap, char *));
729 			break;
730 		case DATA_TYPE_STRING_ARRAY:
731 			nelem = va_arg(ap, int);
732 			ret = nvlist_add_string_array(payload, name,
733 			    va_arg(ap, char **), nelem);
734 			break;
735 		case DATA_TYPE_NVLIST:
736 			ret = nvlist_add_nvlist(payload, name,
737 			    va_arg(ap, nvlist_t *));
738 			break;
739 		case DATA_TYPE_NVLIST_ARRAY:
740 			nelem = va_arg(ap, int);
741 			ret = nvlist_add_nvlist_array(payload, name,
742 			    va_arg(ap, nvlist_t **), nelem);
743 			break;
744 		default:
745 			ret = EINVAL;
746 		}
747 
748 		name = va_arg(ap, char *);
749 	}
750 	return (ret);
751 }
752 
753 void
754 fm_payload_set(nvlist_t *payload, ...)
755 {
756 	int ret;
757 	const char *name;
758 	va_list ap;
759 
760 	va_start(ap, payload);
761 	name = va_arg(ap, char *);
762 	ret = i_fm_payload_set(payload, name, ap);
763 	va_end(ap);
764 
765 	if (ret)
766 		atomic_add_64(
767 		    &erpt_kstat_data.payload_set_failed.value.ui64, 1);
768 }
769 
770 /*
771  * Set-up and validate the members of an ereport event according to:
772  *
773  *	Member name		Type		Value
774  *	====================================================
775  *	class			string		ereport
776  *	version			uint8_t		0
777  *	ena			uint64_t	<ena>
778  *	detector		nvlist_t	<detector>
779  *	ereport-payload		nvlist_t	<var args>
780  *
781  */
782 void
783 fm_ereport_set(nvlist_t *ereport, int version, const char *erpt_class,
784     uint64_t ena, const nvlist_t *detector, ...)
785 {
786 	char ereport_class[FM_MAX_CLASS];
787 	const char *name;
788 	va_list ap;
789 	int ret;
790 
791 	if (version != FM_EREPORT_VERS0) {
792 		atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1);
793 		return;
794 	}
795 
796 	(void) snprintf(ereport_class, FM_MAX_CLASS, "%s.%s",
797 	    FM_EREPORT_CLASS, erpt_class);
798 	if (nvlist_add_string(ereport, FM_CLASS, ereport_class) != 0) {
799 		atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1);
800 		return;
801 	}
802 
803 	if (nvlist_add_uint64(ereport, FM_EREPORT_ENA, ena)) {
804 		atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1);
805 	}
806 
807 	if (nvlist_add_nvlist(ereport, FM_EREPORT_DETECTOR,
808 	    (nvlist_t *)detector) != 0) {
809 		atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1);
810 	}
811 
812 	va_start(ap, detector);
813 	name = va_arg(ap, const char *);
814 	ret = i_fm_payload_set(ereport, name, ap);
815 	va_end(ap);
816 
817 	if (ret)
818 		atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1);
819 }
820 
821 /*
822  * Set-up and validate the members of an hc fmri according to;
823  *
824  *	Member name		Type		Value
825  *	===================================================
826  *	version			uint8_t		0
827  *	auth			nvlist_t	<auth>
828  *	hc-name			string		<name>
829  *	hc-id			string		<id>
830  *
831  * Note that auth and hc-id are optional members.
832  */
833 
834 #define	HC_MAXPAIRS	20
835 #define	HC_MAXNAMELEN	50
836 
837 static int
838 fm_fmri_hc_set_common(nvlist_t *fmri, int version, const nvlist_t *auth)
839 {
840 	if (version != FM_HC_SCHEME_VERSION) {
841 		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
842 		return (0);
843 	}
844 
845 	if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0 ||
846 	    nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC) != 0) {
847 		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
848 		return (0);
849 	}
850 
851 	if (auth != NULL && nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY,
852 	    (nvlist_t *)auth) != 0) {
853 		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
854 		return (0);
855 	}
856 
857 	return (1);
858 }
859 
860 void
861 fm_fmri_hc_set(nvlist_t *fmri, int version, const nvlist_t *auth,
862     nvlist_t *snvl, int npairs, ...)
863 {
864 	nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri);
865 	nvlist_t *pairs[HC_MAXPAIRS];
866 	va_list ap;
867 	int i;
868 
869 	if (!fm_fmri_hc_set_common(fmri, version, auth))
870 		return;
871 
872 	npairs = MIN(npairs, HC_MAXPAIRS);
873 
874 	va_start(ap, npairs);
875 	for (i = 0; i < npairs; i++) {
876 		const char *name = va_arg(ap, const char *);
877 		uint32_t id = va_arg(ap, uint32_t);
878 		char idstr[11];
879 
880 		(void) snprintf(idstr, sizeof (idstr), "%u", id);
881 
882 		pairs[i] = fm_nvlist_create(nva);
883 		if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 ||
884 		    nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) {
885 			atomic_add_64(
886 			    &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
887 		}
888 	}
889 	va_end(ap);
890 
891 	if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, pairs, npairs) != 0)
892 		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
893 
894 	for (i = 0; i < npairs; i++)
895 		fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN);
896 
897 	if (snvl != NULL) {
898 		if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) {
899 			atomic_add_64(
900 			    &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
901 		}
902 	}
903 }
904 
905 /*
906  * Set-up and validate the members of an dev fmri according to:
907  *
908  *	Member name		Type		Value
909  *	====================================================
910  *	version			uint8_t		0
911  *	auth			nvlist_t	<auth>
912  *	devpath			string		<devpath>
913  *	devid			string		<devid>
914  *
915  * Note that auth and devid are optional members.
916  */
917 void
918 fm_fmri_dev_set(nvlist_t *fmri_dev, int version, const nvlist_t *auth,
919     const char *devpath, const char *devid)
920 {
921 	if (version != DEV_SCHEME_VERSION0) {
922 		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
923 		return;
924 	}
925 
926 	if (nvlist_add_uint8(fmri_dev, FM_VERSION, version) != 0) {
927 		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
928 		return;
929 	}
930 
931 	if (nvlist_add_string(fmri_dev, FM_FMRI_SCHEME,
932 	    FM_FMRI_SCHEME_DEV) != 0) {
933 		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
934 		return;
935 	}
936 
937 	if (auth != NULL) {
938 		if (nvlist_add_nvlist(fmri_dev, FM_FMRI_AUTHORITY,
939 		    (nvlist_t *)auth) != 0) {
940 			atomic_add_64(
941 			    &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
942 		}
943 	}
944 
945 	if (nvlist_add_string(fmri_dev, FM_FMRI_DEV_PATH, devpath) != 0) {
946 		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
947 	}
948 
949 	if (devid != NULL)
950 		if (nvlist_add_string(fmri_dev, FM_FMRI_DEV_ID, devid) != 0)
951 			atomic_add_64(
952 			    &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
953 }
954 
955 /*
956  * Set-up and validate the members of an cpu fmri according to:
957  *
958  *	Member name		Type		Value
959  *	====================================================
960  *	version			uint8_t		0
961  *	auth			nvlist_t	<auth>
962  *	cpuid			uint32_t	<cpu_id>
963  *	cpumask			uint8_t		<cpu_mask>
964  *	serial			uint64_t	<serial_id>
965  *
966  * Note that auth, cpumask, serial are optional members.
967  *
968  */
969 void
970 fm_fmri_cpu_set(nvlist_t *fmri_cpu, int version, const nvlist_t *auth,
971     uint32_t cpu_id, uint8_t *cpu_maskp, const char *serial_idp)
972 {
973 	uint64_t *failedp = &erpt_kstat_data.fmri_set_failed.value.ui64;
974 
975 	if (version < CPU_SCHEME_VERSION1) {
976 		atomic_add_64(failedp, 1);
977 		return;
978 	}
979 
980 	if (nvlist_add_uint8(fmri_cpu, FM_VERSION, version) != 0) {
981 		atomic_add_64(failedp, 1);
982 		return;
983 	}
984 
985 	if (nvlist_add_string(fmri_cpu, FM_FMRI_SCHEME,
986 	    FM_FMRI_SCHEME_CPU) != 0) {
987 		atomic_add_64(failedp, 1);
988 		return;
989 	}
990 
991 	if (auth != NULL && nvlist_add_nvlist(fmri_cpu, FM_FMRI_AUTHORITY,
992 	    (nvlist_t *)auth) != 0)
993 		atomic_add_64(failedp, 1);
994 
995 	if (nvlist_add_uint32(fmri_cpu, FM_FMRI_CPU_ID, cpu_id) != 0)
996 		atomic_add_64(failedp, 1);
997 
998 	if (cpu_maskp != NULL && nvlist_add_uint8(fmri_cpu, FM_FMRI_CPU_MASK,
999 	    *cpu_maskp) != 0)
1000 		atomic_add_64(failedp, 1);
1001 
1002 	if (serial_idp == NULL || nvlist_add_string(fmri_cpu,
1003 	    FM_FMRI_CPU_SERIAL_ID, (char *)serial_idp) != 0)
1004 			atomic_add_64(failedp, 1);
1005 }
1006 
1007 /*
1008  * Set-up and validate the members of a mem according to:
1009  *
1010  *	Member name		Type		Value
1011  *	====================================================
1012  *	version			uint8_t		0
1013  *	auth			nvlist_t	<auth>		[optional]
1014  *	unum			string		<unum>
1015  *	serial			string		<serial>	[optional*]
1016  *	offset			uint64_t	<offset>	[optional]
1017  *
1018  *	* serial is required if offset is present
1019  */
1020 void
1021 fm_fmri_mem_set(nvlist_t *fmri, int version, const nvlist_t *auth,
1022     const char *unum, const char *serial, uint64_t offset)
1023 {
1024 	if (version != MEM_SCHEME_VERSION0) {
1025 		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
1026 		return;
1027 	}
1028 
1029 	if (!serial && (offset != (uint64_t)-1)) {
1030 		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
1031 		return;
1032 	}
1033 
1034 	if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) {
1035 		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
1036 		return;
1037 	}
1038 
1039 	if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_MEM) != 0) {
1040 		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
1041 		return;
1042 	}
1043 
1044 	if (auth != NULL) {
1045 		if (nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY,
1046 		    (nvlist_t *)auth) != 0) {
1047 			atomic_add_64(
1048 			    &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
1049 		}
1050 	}
1051 
1052 	if (nvlist_add_string(fmri, FM_FMRI_MEM_UNUM, unum) != 0) {
1053 		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
1054 	}
1055 
1056 	if (serial != NULL) {
1057 		if (nvlist_add_string_array(fmri, FM_FMRI_MEM_SERIAL_ID,
1058 		    (char **)&serial, 1) != 0) {
1059 			atomic_add_64(
1060 			    &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
1061 		}
1062 		if (offset != (uint64_t)-1) {
1063 			if (nvlist_add_uint64(fmri, FM_FMRI_MEM_OFFSET,
1064 			    offset) != 0) {
1065 				atomic_add_64(&erpt_kstat_data.
1066 				    fmri_set_failed.value.ui64, 1);
1067 			}
1068 		}
1069 	}
1070 }
1071 
1072 void
1073 fm_fmri_zfs_set(nvlist_t *fmri, int version, uint64_t pool_guid,
1074     uint64_t vdev_guid)
1075 {
1076 	if (version != ZFS_SCHEME_VERSION0) {
1077 		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
1078 		return;
1079 	}
1080 
1081 	if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) {
1082 		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
1083 		return;
1084 	}
1085 
1086 	if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_ZFS) != 0) {
1087 		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
1088 		return;
1089 	}
1090 
1091 	if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_POOL, pool_guid) != 0) {
1092 		atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
1093 	}
1094 
1095 	if (vdev_guid != 0) {
1096 		if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_VDEV, vdev_guid) != 0) {
1097 			atomic_add_64(
1098 			    &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
1099 		}
1100 	}
1101 }
1102 
1103 uint64_t
1104 fm_ena_increment(uint64_t ena)
1105 {
1106 	uint64_t new_ena;
1107 
1108 	switch (ENA_FORMAT(ena)) {
1109 	case FM_ENA_FMT1:
1110 		new_ena = ena + (1 << ENA_FMT1_GEN_SHFT);
1111 		break;
1112 	case FM_ENA_FMT2:
1113 		new_ena = ena + (1 << ENA_FMT2_GEN_SHFT);
1114 		break;
1115 	default:
1116 		new_ena = 0;
1117 	}
1118 
1119 	return (new_ena);
1120 }
1121 
1122 uint64_t
1123 fm_ena_generate_cpu(uint64_t timestamp, processorid_t cpuid, uchar_t format)
1124 {
1125 	uint64_t ena = 0;
1126 
1127 	switch (format) {
1128 	case FM_ENA_FMT1:
1129 		if (timestamp) {
1130 			ena = (uint64_t)((format & ENA_FORMAT_MASK) |
1131 			    ((cpuid << ENA_FMT1_CPUID_SHFT) &
1132 			    ENA_FMT1_CPUID_MASK) |
1133 			    ((timestamp << ENA_FMT1_TIME_SHFT) &
1134 			    ENA_FMT1_TIME_MASK));
1135 		} else {
1136 			ena = (uint64_t)((format & ENA_FORMAT_MASK) |
1137 			    ((cpuid << ENA_FMT1_CPUID_SHFT) &
1138 			    ENA_FMT1_CPUID_MASK) |
1139 			    ((gethrtime_waitfree() << ENA_FMT1_TIME_SHFT) &
1140 			    ENA_FMT1_TIME_MASK));
1141 		}
1142 		break;
1143 	case FM_ENA_FMT2:
1144 		ena = (uint64_t)((format & ENA_FORMAT_MASK) |
1145 		    ((timestamp << ENA_FMT2_TIME_SHFT) & ENA_FMT2_TIME_MASK));
1146 		break;
1147 	default:
1148 		break;
1149 	}
1150 
1151 	return (ena);
1152 }
1153 
1154 uint64_t
1155 fm_ena_generate(uint64_t timestamp, uchar_t format)
1156 {
1157 	return (fm_ena_generate_cpu(timestamp, CPU->cpu_id, format));
1158 }
1159 
1160 uint64_t
1161 fm_ena_generation_get(uint64_t ena)
1162 {
1163 	uint64_t gen;
1164 
1165 	switch (ENA_FORMAT(ena)) {
1166 	case FM_ENA_FMT1:
1167 		gen = (ena & ENA_FMT1_GEN_MASK) >> ENA_FMT1_GEN_SHFT;
1168 		break;
1169 	case FM_ENA_FMT2:
1170 		gen = (ena & ENA_FMT2_GEN_MASK) >> ENA_FMT2_GEN_SHFT;
1171 		break;
1172 	default:
1173 		gen = 0;
1174 		break;
1175 	}
1176 
1177 	return (gen);
1178 }
1179 
1180 uchar_t
1181 fm_ena_format_get(uint64_t ena)
1182 {
1183 
1184 	return (ENA_FORMAT(ena));
1185 }
1186 
1187 uint64_t
1188 fm_ena_id_get(uint64_t ena)
1189 {
1190 	uint64_t id;
1191 
1192 	switch (ENA_FORMAT(ena)) {
1193 	case FM_ENA_FMT1:
1194 		id = (ena & ENA_FMT1_ID_MASK) >> ENA_FMT1_ID_SHFT;
1195 		break;
1196 	case FM_ENA_FMT2:
1197 		id = (ena & ENA_FMT2_ID_MASK) >> ENA_FMT2_ID_SHFT;
1198 		break;
1199 	default:
1200 		id = 0;
1201 	}
1202 
1203 	return (id);
1204 }
1205 
1206 uint64_t
1207 fm_ena_time_get(uint64_t ena)
1208 {
1209 	uint64_t time;
1210 
1211 	switch (ENA_FORMAT(ena)) {
1212 	case FM_ENA_FMT1:
1213 		time = (ena & ENA_FMT1_TIME_MASK) >> ENA_FMT1_TIME_SHFT;
1214 		break;
1215 	case FM_ENA_FMT2:
1216 		time = (ena & ENA_FMT2_TIME_MASK) >> ENA_FMT2_TIME_SHFT;
1217 		break;
1218 	default:
1219 		time = 0;
1220 	}
1221 
1222 	return (time);
1223 }
1224 
1225 /*
1226  * Convert a getpcstack() trace to symbolic name+offset, and add the resulting
1227  * string array to a Fault Management ereport as FM_EREPORT_PAYLOAD_NAME_STACK.
1228  */
1229 void
1230 fm_payload_stack_add(nvlist_t *payload, const pc_t *stack, int depth)
1231 {
1232 	int i;
1233 	char *sym;
1234 	ulong_t off;
1235 	char *stkpp[FM_STK_DEPTH];
1236 	char buf[FM_STK_DEPTH * FM_SYM_SZ];
1237 	char *stkp = buf;
1238 
1239 	for (i = 0; i < depth && i != FM_STK_DEPTH; i++, stkp += FM_SYM_SZ) {
1240 		if ((sym = kobj_getsymname(stack[i], &off)) != NULL)
1241 			(void) snprintf(stkp, FM_SYM_SZ, "%s+%lx", sym, off);
1242 		else
1243 			(void) snprintf(stkp, FM_SYM_SZ, "%lx", (long)stack[i]);
1244 		stkpp[i] = stkp;
1245 	}
1246 
1247 	fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_STACK,
1248 	    DATA_TYPE_STRING_ARRAY, depth, stkpp, NULL);
1249 }
1250