xref: /titanic_44/usr/src/cmd/fm/fmadm/common/faulty.c (revision c5d54b671ea36a2cdc29e488d3a5c42e7b569851)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 #include <sys/types.h>
26 #include <fmadm.h>
27 #include <errno.h>
28 #include <limits.h>
29 #include <strings.h>
30 #include <stdio.h>
31 #include <unistd.h>
32 #include <sys/wait.h>
33 #include <sys/stat.h>
34 #include <fcntl.h>
35 #include <fm/fmd_log.h>
36 #include <sys/fm/protocol.h>
37 #include <fm/libtopo.h>
38 #include <fm/fmd_adm.h>
39 #include <fm/fmd_msg.h>
40 #include <dlfcn.h>
41 #include <sys/systeminfo.h>
42 #include <sys/utsname.h>
43 #include <libintl.h>
44 #include <locale.h>
45 #include <sys/smbios.h>
46 #include <libdevinfo.h>
47 #include <stdlib.h>
48 #include <stddef.h>
49 
50 /*
51  * Fault records are added to catalog by calling add_fault_record_to_catalog()
52  * records are stored in order of importance to the system.
53  * If -g flag is set or not_suppressed is not set and the class fru, fault,
54  * type are the same then details are merged into an existing record, with uuid
55  * records are stored in time order.
56  * For each record information is extracted from nvlist and merged into linked
57  * list each is checked for identical records for which percentage certainty are
58  * added together.
59  * print_catalog() is called to print out catalog and release external resources
60  *
61  *                         /---------------\
62  *	status_rec_list -> |               | -|
63  *                         \---------------/
64  *                                \/
65  *                         /---------------\    /-------\    /-------\
66  *      status_fru_list    | status_record | -> | uurec | -> | uurec | -|
67  *            \/           |               | |- |       | <- |       |
68  *      /-------------\    |               |    \-------/    \-------/
69  *      |             | -> |               |       \/           \/
70  *      \-------------/    |               |    /-------\    /-------\
71  *            \/           |               | -> | asru  | -> | asru  |
72  *            ---          |               |    |       | <- |       |
73  *                         |               |    \-------/    \-------/
74  *      status_asru_list   |  class        |
75  *            \/           |  resource     |    /-------\    /-------\
76  *      /-------------\    |  fru          | -> | list  | -> | list  |
77  *      |             | -> |  serial       |    |       | <- |       |
78  *      \-------------/    |               |    \-------/    \-------/
79  *            \/           \---------------/
80  *            ---               \/    /\
81  *                         /---------------\
82  *                         | status_record |
83  *                         \---------------/
84  *
85  * Fmadm faulty takes a number of options which affect the format of the
86  * output displayed. By default, the display reports the FRU and ASRU along
87  * with other information on per-case basis as in the example below.
88  *
89  * --------------- ------------------------------------  -------------- -------
90  * TIME            EVENT-ID                              MSG-ID         SEVERITY
91  * --------------- ------------------------------------  -------------- -------
92  * Sep 21 10:01:36 d482f935-5c8f-e9ab-9f25-d0aaafec1e6c  AMD-8000-2F    Major
93  *
94  * Fault class	: fault.memory.dimm_sb
95  * Affects	: mem:///motherboard=0/chip=0/memory-controller=0/dimm=0/rank=0
96  *		    faulted but still in service
97  * FRU		: "CPU 0 DIMM 0" (hc://.../memory-controller=0/dimm=0)
98  *		    faulty
99  *
100  * Description	: The number of errors associated with this memory module has
101  *		exceeded acceptable levels.  Refer to
102  *		http://illumos.org/msg/AMD-8000-2F for more information.
103  *
104  * Response	: Pages of memory associated with this memory module are being
105  *		removed from service as errors are reported.
106  *
107  * Impact	: Total system memory capacity will be reduced as pages are
108  *		retired.
109  *
110  * Action	: Schedule a repair procedure to replace the affected memory
111  *		module.  Use fmdump -v -u <EVENT_ID> to identify the module.
112  *
113  * The -v flag is similar, but adds some additonal information such as the
114  * resource. The -s flag is also similar but just gives the top line summary.
115  * All these options (ie without the -f or -r flags) use the print_catalog()
116  * function to do the display.
117  *
118  * The -f flag changes the output so that it appears sorted on a per-fru basis.
119  * The output is somewhat cut down compared to the default output. If -f is
120  * used, then print_fru() is used to print the output.
121  *
122  * -----------------------------------------------------------------------------
123  * "SLOT 2" (hc://.../hostbridge=3/pciexrc=3/pciexbus=4/pciexdev=0) faulty
124  * 5ca4aeb3-36...f6be-c2e8166dc484 2 suspects in this FRU total certainty 100%
125  *
126  * Description	: A problem was detected for a PCI device.
127  *		Refer to http://illumos.org/msg/PCI-8000-7J
128  *		for more information.
129  *
130  * Response	: One or more device instances may be disabled
131  *
132  * Impact	: Possible loss of services provided by the device instances
133  *		associated with this fault
134  *
135  * Action	: Schedule a repair procedure to replace the affected device.
136  * 		Use fmdump -v -u <EVENT_ID> to identify the device or contact
137  *		Sun for support.
138  *
139  * The -r flag changes the output so that it appears sorted on a per-asru basis.
140  * The output is very much cut down compared to the default output, just giving
141  * the asru fmri and state. Here print_asru() is used to print the output.
142  *
143  * mem:///motherboard=0/chip=0/memory-controller=0/dimm=0/rank=0	degraded
144  *
145  * For all fmadm faulty options, the sequence of events is
146  *
147  * 1) Walk through all the cases in the system using fmd_adm_case_iter() and
148  * for each case call dfault_rec(). This will call add_fault_record_to_catalog()
149  * This will extract the data from the nvlist and call catalog_new_record() to
150  * save the data away in various linked lists in the catalogue.
151  *
152  * 2) Once this is done, the data can be supplemented by using
153  * fmd_adm_rsrc_iter(). However this is now only necessary for the -i option.
154  *
155  * 3) Finally print_catalog(), print_fru() or print_asru() are called as
156  * appropriate to display the information from the catalogue sorted in the
157  * requested way.
158  *
159  */
160 
161 typedef struct name_list {
162 	struct name_list *next;
163 	struct name_list *prev;
164 	char *name;
165 	uint8_t pct;
166 	uint8_t max_pct;
167 	ushort_t count;
168 	int status;
169 	char *label;
170 } name_list_t;
171 
172 typedef struct ari_list {
173 	char *ari_uuid;
174 	struct ari_list *next;
175 } ari_list_t;
176 
177 typedef struct uurec {
178 	struct uurec *next;
179 	struct uurec *prev;
180 	char *uuid;
181 	ari_list_t *ari_uuid_list;
182 	name_list_t *asru;
183 	uint64_t sec;
184 	nvlist_t *event;
185 } uurec_t;
186 
187 typedef struct uurec_select {
188 	struct uurec_select *next;
189 	char *uuid;
190 } uurec_select_t;
191 
192 typedef struct host_id {
193 	char *chassis;
194 	char *server;
195 	char *platform;
196 	char *domain;
197 	char *product_sn;
198 } hostid_t;
199 
200 typedef struct host_id_list {
201 	hostid_t hostid;
202 	struct host_id_list *next;
203 } host_id_list_t;
204 
205 typedef struct status_record {
206 	hostid_t *host;
207 	int nrecs;
208 	uurec_t *uurec;
209 	char *severity;			/* in C locale */
210 	char *msgid;
211 	name_list_t *class;
212 	name_list_t *resource;
213 	name_list_t *asru;
214 	name_list_t *fru;
215 	name_list_t *serial;
216 	uint8_t not_suppressed;
217 	uint8_t injected;
218 } status_record_t;
219 
220 typedef struct sr_list {
221 	struct sr_list *next;
222 	struct sr_list *prev;
223 	struct status_record *status_record;
224 } sr_list_t;
225 
226 typedef struct resource_list {
227 	struct resource_list *next;
228 	struct resource_list *prev;
229 	sr_list_t *status_rec_list;
230 	char *resource;
231 	uint8_t not_suppressed;
232 	uint8_t injected;
233 	uint8_t max_pct;
234 } resource_list_t;
235 
236 sr_list_t *status_rec_list;
237 resource_list_t *status_fru_list;
238 resource_list_t *status_asru_list;
239 
240 static int max_display;
241 static int max_fault = 0;
242 static topo_hdl_t *topo_handle;
243 static host_id_list_t *host_list;
244 static int n_server;
245 static int opt_g;
246 static fmd_msg_hdl_t *fmadm_msghdl = NULL; /* handle for libfmd_msg calls */
247 
248 static char *
format_date(char * buf,size_t len,uint64_t sec)249 format_date(char *buf, size_t len, uint64_t sec)
250 {
251 	if (sec > LONG_MAX) {
252 		(void) fprintf(stderr,
253 		    "record time is too large for 32-bit utility\n");
254 		(void) snprintf(buf, len, "0x%llx", sec);
255 	} else {
256 		time_t tod = (time_t)sec;
257 		time_t now = time(NULL);
258 		if (tod > now+60 ||
259 		    tod < now - 6L*30L*24L*60L*60L) { /* 6 months ago */
260 			(void) strftime(buf, len, "%b %d %Y    ",
261 			    localtime(&tod));
262 		} else {
263 			(void) strftime(buf, len, "%b %d %T", localtime(&tod));
264 		}
265 	}
266 
267 	return (buf);
268 }
269 
270 static hostid_t *
find_hostid_in_list(char * platform,char * chassis,char * server,char * domain,char * product_sn)271 find_hostid_in_list(char *platform, char *chassis, char *server, char *domain,
272     char *product_sn)
273 {
274 	hostid_t *rt = NULL;
275 	host_id_list_t *hostp;
276 
277 	if (platform == NULL)
278 		platform = "-";
279 	if (server == NULL)
280 		server = "-";
281 	hostp = host_list;
282 	while (hostp) {
283 		if (hostp->hostid.platform &&
284 		    strcmp(hostp->hostid.platform, platform) == 0 &&
285 		    hostp->hostid.server &&
286 		    strcmp(hostp->hostid.server, server) == 0 &&
287 		    (chassis == NULL || hostp->hostid.chassis == NULL ||
288 		    strcmp(chassis, hostp->hostid.chassis) == 0) &&
289 		    (product_sn == NULL || hostp->hostid.product_sn == NULL ||
290 		    strcmp(product_sn, hostp->hostid.product_sn) == 0) &&
291 		    (domain == NULL || hostp->hostid.domain == NULL ||
292 		    strcmp(domain, hostp->hostid.domain) == 0)) {
293 			rt = &hostp->hostid;
294 			break;
295 		}
296 		hostp = hostp->next;
297 	}
298 	if (rt == NULL) {
299 		hostp = malloc(sizeof (host_id_list_t));
300 		hostp->hostid.platform = strdup(platform);
301 		hostp->hostid.product_sn =
302 		    product_sn ? strdup(product_sn) : NULL;
303 		hostp->hostid.server = strdup(server);
304 		hostp->hostid.chassis = chassis ? strdup(chassis) : NULL;
305 		hostp->hostid.domain = domain ? strdup(domain) : NULL;
306 		hostp->next = host_list;
307 		host_list = hostp;
308 		rt = &hostp->hostid;
309 		n_server++;
310 	}
311 	return (rt);
312 }
313 
314 static hostid_t *
find_hostid(nvlist_t * nvl)315 find_hostid(nvlist_t *nvl)
316 {
317 	char *platform = NULL, *chassis = NULL, *server = NULL, *domain = NULL;
318 	char *product_sn = NULL;
319 	nvlist_t *auth, *fmri;
320 	hostid_t *rt = NULL;
321 
322 	if (nvlist_lookup_nvlist(nvl, FM_SUSPECT_DE, &fmri) == 0 &&
323 	    nvlist_lookup_nvlist(fmri, FM_FMRI_AUTHORITY, &auth) == 0) {
324 		(void) nvlist_lookup_string(auth, FM_FMRI_AUTH_PRODUCT,
325 		    &platform);
326 		(void) nvlist_lookup_string(auth, FM_FMRI_AUTH_PRODUCT_SN,
327 		    &product_sn);
328 		(void) nvlist_lookup_string(auth, FM_FMRI_AUTH_SERVER, &server);
329 		(void) nvlist_lookup_string(auth, FM_FMRI_AUTH_CHASSIS,
330 		    &chassis);
331 		(void) nvlist_lookup_string(auth, FM_FMRI_AUTH_DOMAIN, &domain);
332 		rt = find_hostid_in_list(platform, chassis, server,
333 		    domain, product_sn);
334 	}
335 	return (rt);
336 }
337 
338 static char *
get_nvl2str_topo(nvlist_t * nvl)339 get_nvl2str_topo(nvlist_t *nvl)
340 {
341 	char *name = NULL;
342 	char *tname;
343 	int err;
344 	char *scheme = NULL;
345 	char *mod_name = NULL;
346 	char buf[128];
347 
348 	if (topo_handle == NULL)
349 		topo_handle = topo_open(TOPO_VERSION, 0, &err);
350 	if (topo_fmri_nvl2str(topo_handle, nvl, &tname, &err) == 0) {
351 		name = strdup(tname);
352 		topo_hdl_strfree(topo_handle, tname);
353 	} else {
354 		(void) nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &scheme);
355 		(void) nvlist_lookup_string(nvl, FM_FMRI_MOD_NAME, &mod_name);
356 		if (scheme && strcmp(scheme, FM_FMRI_SCHEME_FMD) == 0 &&
357 		    mod_name) {
358 			(void) snprintf(buf, sizeof (buf), "%s:///module/%s",
359 			    scheme, mod_name);
360 			name = strdup(buf);
361 		}
362 	}
363 	return (name);
364 }
365 
366 static int
set_priority(char * s)367 set_priority(char *s)
368 {
369 	int rt = 0;
370 
371 	if (s) {
372 		if (strcmp(s, "Minor") == 0)
373 			rt = 1;
374 		else if (strcmp(s, "Major") == 0)
375 			rt = 10;
376 		else if (strcmp(s, "Critical") == 0)
377 			rt = 100;
378 	}
379 	return (rt);
380 }
381 
382 static int
cmp_priority(char * s1,char * s2,uint64_t t1,uint64_t t2,uint8_t p1,uint8_t p2)383 cmp_priority(char *s1, char *s2, uint64_t t1, uint64_t t2, uint8_t p1,
384     uint8_t p2)
385 {
386 	int r1, r2;
387 	int rt;
388 
389 	r1 = set_priority(s1);
390 	r2 = set_priority(s2);
391 	rt = r1 - r2;
392 	if (rt == 0) {
393 		if (t1 > t2)
394 			rt = 1;
395 		else if (t1 < t2)
396 			rt = -1;
397 		else
398 			rt = p1 - p2;
399 	}
400 	return (rt);
401 }
402 
403 /*
404  * merge two lists into one, by comparing enties in new and moving into list if
405  * name is not there or free off memory for names which are already there
406  * add_pct indicates if pct is the sum or highest pct
407  */
408 static name_list_t *
merge_name_list(name_list_t ** list,name_list_t * new,int add_pct)409 merge_name_list(name_list_t **list, name_list_t *new, int add_pct)
410 {
411 	name_list_t *lp, *np, *sp, *rt = NULL;
412 	int max_pct;
413 
414 	rt = *list;
415 	np = new;
416 	while (np) {
417 		lp = *list;
418 		while (lp) {
419 			if (strcmp(lp->name, np->name) == 0)
420 				break;
421 			lp = lp->next;
422 			if (lp == *list)
423 				lp = NULL;
424 		}
425 		if (np->next == new)
426 			sp = NULL;
427 		else
428 			sp = np->next;
429 		if (lp) {
430 			lp->status |= (np->status & FM_SUSPECT_FAULTY);
431 			if (add_pct) {
432 				lp->pct += np->pct;
433 				lp->count += np->count;
434 			} else if (np->pct > lp->pct) {
435 				lp->pct = np->pct;
436 			}
437 			max_pct = np->max_pct;
438 			if (np->label)
439 				free(np->label);
440 			free(np->name);
441 			free(np);
442 			np = NULL;
443 			if (max_pct > lp->max_pct) {
444 				lp->max_pct = max_pct;
445 				if (lp->max_pct > lp->prev->max_pct &&
446 				    lp != *list) {
447 					lp->prev->next = lp->next;
448 					lp->next->prev = lp->prev;
449 					np = lp;
450 				}
451 			}
452 		}
453 		if (np) {
454 			lp = *list;
455 			if (lp) {
456 				if (np->max_pct > lp->max_pct) {
457 					np->next = lp;
458 					np->prev = lp->prev;
459 					lp->prev->next = np;
460 					lp->prev = np;
461 					*list = np;
462 					rt = np;
463 				} else {
464 					lp = lp->next;
465 					while (lp != *list &&
466 					    np->max_pct < lp->max_pct) {
467 						lp = lp->next;
468 					}
469 					np->next = lp;
470 					np->prev = lp->prev;
471 					lp->prev->next = np;
472 					lp->prev = np;
473 				}
474 			} else {
475 				*list = np;
476 				np->next = np;
477 				np->prev = np;
478 				rt = np;
479 			}
480 		}
481 		np = sp;
482 	}
483 	return (rt);
484 }
485 
486 static name_list_t *
alloc_name_list(char * name,uint8_t pct)487 alloc_name_list(char *name, uint8_t pct)
488 {
489 	name_list_t *nlp;
490 
491 	nlp = malloc(sizeof (*nlp));
492 	nlp->name = strdup(name);
493 	nlp->pct = pct;
494 	nlp->max_pct = pct;
495 	nlp->count = 1;
496 	nlp->next = nlp;
497 	nlp->prev = nlp;
498 	nlp->status = 0;
499 	nlp->label = NULL;
500 	return (nlp);
501 }
502 
503 static status_record_t *
new_record_init(uurec_t * uurec_p,char * msgid,name_list_t * class,name_list_t * fru,name_list_t * asru,name_list_t * resource,name_list_t * serial,boolean_t not_suppressed,hostid_t * hostid,boolean_t injected)504 new_record_init(uurec_t *uurec_p, char *msgid, name_list_t *class,
505     name_list_t *fru, name_list_t *asru, name_list_t *resource,
506     name_list_t *serial, boolean_t not_suppressed,
507     hostid_t *hostid, boolean_t injected)
508 {
509 	status_record_t *status_rec_p;
510 
511 	status_rec_p = (status_record_t *)malloc(sizeof (status_record_t));
512 	status_rec_p->nrecs = 1;
513 	status_rec_p->host = hostid;
514 	status_rec_p->uurec = uurec_p;
515 	uurec_p->next = NULL;
516 	uurec_p->prev = NULL;
517 	uurec_p->asru = asru;
518 	if ((status_rec_p->severity = fmd_msg_getitem_id(fmadm_msghdl, NULL,
519 	    msgid, FMD_MSG_ITEM_SEVERITY)) == NULL)
520 		status_rec_p->severity = strdup("unknown");
521 	status_rec_p->class = class;
522 	status_rec_p->fru = fru;
523 	status_rec_p->asru = asru;
524 	status_rec_p->resource = resource;
525 	status_rec_p->serial = serial;
526 	status_rec_p->msgid = strdup(msgid);
527 	status_rec_p->not_suppressed = not_suppressed;
528 	status_rec_p->injected = injected;
529 	return (status_rec_p);
530 }
531 
532 /*
533  * add record to given list maintaining order higher priority first.
534  */
535 static void
add_rec_list(status_record_t * status_rec_p,sr_list_t ** list_pp)536 add_rec_list(status_record_t *status_rec_p, sr_list_t **list_pp)
537 {
538 	sr_list_t *tp, *np, *sp;
539 	int order;
540 	uint64_t sec;
541 
542 	np = malloc(sizeof (sr_list_t));
543 	np->status_record = status_rec_p;
544 	sec = status_rec_p->uurec->sec;
545 	if ((sp = *list_pp) == NULL) {
546 		*list_pp = np;
547 		np->next = np;
548 		np->prev = np;
549 	} else {
550 		/* insert new record in front of lower priority */
551 		tp = sp;
552 		order = cmp_priority(status_rec_p->severity,
553 		    sp->status_record->severity, sec,
554 		    tp->status_record->uurec->sec, 0, 0);
555 		if (order > 0) {
556 			*list_pp = np;
557 		} else {
558 			tp = sp->next;
559 			while (tp != sp &&
560 			    cmp_priority(status_rec_p->severity,
561 			    tp->status_record->severity, sec,
562 			    tp->status_record->uurec->sec, 0, 0)) {
563 				tp = tp->next;
564 			}
565 		}
566 		np->next = tp;
567 		np->prev = tp->prev;
568 		tp->prev->next = np;
569 		tp->prev = np;
570 	}
571 }
572 
573 static void
add_resource(status_record_t * status_rec_p,resource_list_t ** rp,resource_list_t * np)574 add_resource(status_record_t *status_rec_p, resource_list_t **rp,
575     resource_list_t *np)
576 {
577 	int order;
578 	uint64_t sec;
579 	resource_list_t *sp, *tp;
580 	status_record_t *srp;
581 	char *severity = status_rec_p->severity;
582 
583 	add_rec_list(status_rec_p, &np->status_rec_list);
584 	if ((sp = *rp) == NULL) {
585 		np->next = np;
586 		np->prev = np;
587 		*rp = np;
588 	} else {
589 		/*
590 		 * insert new record in front of lower priority
591 		 */
592 		tp = sp->next;
593 		srp = sp->status_rec_list->status_record;
594 		sec = status_rec_p->uurec->sec;
595 		order = cmp_priority(severity, srp->severity, sec,
596 		    srp->uurec->sec, np->max_pct, sp->max_pct);
597 		if (order > 0) {
598 			*rp = np;
599 		} else {
600 			srp = tp->status_rec_list->status_record;
601 			while (tp != sp &&
602 			    cmp_priority(severity, srp->severity, sec,
603 			    srp->uurec->sec, np->max_pct, sp->max_pct) < 0) {
604 				tp = tp->next;
605 				srp = tp->status_rec_list->status_record;
606 			}
607 		}
608 		np->next = tp;
609 		np->prev = tp->prev;
610 		tp->prev->next = np;
611 		tp->prev = np;
612 	}
613 }
614 
615 static void
add_resource_list(status_record_t * status_rec_p,name_list_t * fp,resource_list_t ** rpp)616 add_resource_list(status_record_t *status_rec_p, name_list_t *fp,
617     resource_list_t **rpp)
618 {
619 	int order;
620 	resource_list_t *np, *end;
621 	status_record_t *srp;
622 
623 	np = *rpp;
624 	end = np;
625 	while (np) {
626 		if (strcmp(fp->name, np->resource) == 0) {
627 			np->not_suppressed |= status_rec_p->not_suppressed;
628 			np->injected |= status_rec_p->injected;
629 			srp = np->status_rec_list->status_record;
630 			order = cmp_priority(status_rec_p->severity,
631 			    srp->severity, status_rec_p->uurec->sec,
632 			    srp->uurec->sec, fp->max_pct, np->max_pct);
633 			if (order > 0 && np != end) {
634 				/*
635 				 * remove from list and add again using
636 				 * new priority
637 				 */
638 				np->prev->next = np->next;
639 				np->next->prev = np->prev;
640 				add_resource(status_rec_p,
641 				    rpp, np);
642 			} else {
643 				add_rec_list(status_rec_p,
644 				    &np->status_rec_list);
645 			}
646 			break;
647 		}
648 		np = np->next;
649 		if (np == end) {
650 			np = NULL;
651 			break;
652 		}
653 	}
654 	if (np == NULL) {
655 		np = malloc(sizeof (resource_list_t));
656 		np->resource = fp->name;
657 		np->not_suppressed = status_rec_p->not_suppressed;
658 		np->injected = status_rec_p->injected;
659 		np->status_rec_list = NULL;
660 		np->max_pct = fp->max_pct;
661 		add_resource(status_rec_p, rpp, np);
662 	}
663 }
664 
665 static void
add_list(status_record_t * status_rec_p,name_list_t * listp,resource_list_t ** glistp)666 add_list(status_record_t *status_rec_p, name_list_t *listp,
667     resource_list_t **glistp)
668 {
669 	name_list_t *fp, *end;
670 
671 	fp = listp;
672 	end = fp;
673 	while (fp) {
674 		add_resource_list(status_rec_p, fp, glistp);
675 		fp = fp->next;
676 		if (fp == end)
677 			break;
678 	}
679 }
680 
681 /*
682  * add record to rec, fru and asru lists.
683  */
684 static void
catalog_new_record(uurec_t * uurec_p,char * msgid,name_list_t * class,name_list_t * fru,name_list_t * asru,name_list_t * resource,name_list_t * serial,boolean_t not_suppressed,hostid_t * hostid,boolean_t injected,boolean_t dummy_fru)685 catalog_new_record(uurec_t *uurec_p, char *msgid, name_list_t *class,
686     name_list_t *fru, name_list_t *asru, name_list_t *resource,
687     name_list_t *serial, boolean_t not_suppressed,
688     hostid_t *hostid, boolean_t injected, boolean_t dummy_fru)
689 {
690 	status_record_t *status_rec_p;
691 
692 	status_rec_p = new_record_init(uurec_p, msgid, class, fru, asru,
693 	    resource, serial, not_suppressed, hostid, injected);
694 	add_rec_list(status_rec_p, &status_rec_list);
695 	if (status_rec_p->fru && !dummy_fru)
696 		add_list(status_rec_p, status_rec_p->fru, &status_fru_list);
697 	if (status_rec_p->asru)
698 		add_list(status_rec_p, status_rec_p->asru, &status_asru_list);
699 }
700 
701 static void
get_serial_no(nvlist_t * nvl,name_list_t ** serial_p,uint8_t pct)702 get_serial_no(nvlist_t *nvl, name_list_t **serial_p, uint8_t pct)
703 {
704 	char *name;
705 	char *serial = NULL;
706 	char **lserial = NULL;
707 	uint64_t serint;
708 	name_list_t *nlp;
709 	int j;
710 	uint_t nelem;
711 	char buf[64];
712 
713 	if (nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &name) == 0) {
714 		if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) {
715 			if (nvlist_lookup_uint64(nvl, FM_FMRI_CPU_SERIAL_ID,
716 			    &serint) == 0) {
717 				(void) snprintf(buf, sizeof (buf), "%llX",
718 				    serint);
719 				nlp = alloc_name_list(buf, pct);
720 				(void) merge_name_list(serial_p, nlp, 1);
721 			}
722 		} else if (strcmp(name, FM_FMRI_SCHEME_MEM) == 0) {
723 			if (nvlist_lookup_string_array(nvl,
724 			    FM_FMRI_MEM_SERIAL_ID, &lserial, &nelem) == 0) {
725 				nlp = alloc_name_list(lserial[0], pct);
726 				for (j = 1; j < nelem; j++) {
727 					name_list_t *n1lp;
728 					n1lp = alloc_name_list(lserial[j], pct);
729 					(void) merge_name_list(&nlp, n1lp, 1);
730 				}
731 				(void) merge_name_list(serial_p, nlp, 1);
732 			}
733 		} else if (strcmp(name, FM_FMRI_SCHEME_HC) == 0) {
734 			if (nvlist_lookup_string(nvl, FM_FMRI_HC_SERIAL_ID,
735 			    &serial) == 0) {
736 				nlp = alloc_name_list(serial, pct);
737 				(void) merge_name_list(serial_p, nlp, 1);
738 			}
739 		}
740 	}
741 }
742 
743 static void
extract_record_info(nvlist_t * nvl,name_list_t ** class_p,name_list_t ** fru_p,name_list_t ** serial_p,name_list_t ** resource_p,name_list_t ** asru_p,boolean_t * dummy_fru,uint8_t status)744 extract_record_info(nvlist_t *nvl, name_list_t **class_p,
745     name_list_t **fru_p, name_list_t **serial_p, name_list_t **resource_p,
746     name_list_t **asru_p, boolean_t *dummy_fru, uint8_t status)
747 {
748 	nvlist_t *lfru, *lasru, *rsrc;
749 	name_list_t *nlp;
750 	char *name;
751 	uint8_t lpct = 0;
752 	char *lclass = NULL;
753 	char *label;
754 
755 	(void) nvlist_lookup_uint8(nvl, FM_FAULT_CERTAINTY, &lpct);
756 	if (nvlist_lookup_string(nvl, FM_CLASS, &lclass) == 0) {
757 		nlp = alloc_name_list(lclass, lpct);
758 		(void) merge_name_list(class_p, nlp, 1);
759 	}
760 	if (nvlist_lookup_nvlist(nvl, FM_FAULT_FRU, &lfru) == 0) {
761 		name = get_nvl2str_topo(lfru);
762 		if (name != NULL) {
763 			nlp = alloc_name_list(name, lpct);
764 			nlp->status = status & ~(FM_SUSPECT_UNUSABLE |
765 			    FM_SUSPECT_DEGRADED);
766 			free(name);
767 			if (nvlist_lookup_string(nvl, FM_FAULT_LOCATION,
768 			    &label) == 0)
769 				nlp->label = strdup(label);
770 			(void) merge_name_list(fru_p, nlp, 1);
771 		}
772 		get_serial_no(lfru, serial_p, lpct);
773 	} else if (nvlist_lookup_nvlist(nvl, FM_FAULT_RESOURCE, &rsrc) != 0) {
774 		/*
775 		 * No FRU or resource. But we want to display the repair status
776 		 * somehow, so create a dummy FRU field.
777 		 */
778 		*dummy_fru = 1;
779 		nlp = alloc_name_list(dgettext("FMD", "None"), lpct);
780 		nlp->status = status & ~(FM_SUSPECT_UNUSABLE |
781 		    FM_SUSPECT_DEGRADED);
782 		(void) merge_name_list(fru_p, nlp, 1);
783 	}
784 	if (nvlist_lookup_nvlist(nvl, FM_FAULT_ASRU, &lasru) == 0) {
785 		name = get_nvl2str_topo(lasru);
786 		if (name != NULL) {
787 			nlp = alloc_name_list(name, lpct);
788 			nlp->status = status & ~(FM_SUSPECT_NOT_PRESENT |
789 			    FM_SUSPECT_REPAIRED | FM_SUSPECT_REPLACED |
790 			    FM_SUSPECT_ACQUITTED);
791 			free(name);
792 			(void) merge_name_list(asru_p, nlp, 1);
793 		}
794 		get_serial_no(lasru, serial_p, lpct);
795 	}
796 	if (nvlist_lookup_nvlist(nvl, FM_FAULT_RESOURCE, &rsrc) == 0) {
797 		name = get_nvl2str_topo(rsrc);
798 		if (name != NULL) {
799 			nlp = alloc_name_list(name, lpct);
800 			nlp->status = status;
801 			free(name);
802 			if (nvlist_lookup_string(nvl, FM_FAULT_LOCATION,
803 			    &label) == 0)
804 				nlp->label = strdup(label);
805 			(void) merge_name_list(resource_p, nlp, 1);
806 		}
807 	}
808 }
809 
810 static void
add_fault_record_to_catalog(nvlist_t * nvl,uint64_t sec,char * uuid)811 add_fault_record_to_catalog(nvlist_t *nvl, uint64_t sec, char *uuid)
812 {
813 	char *msgid = "-";
814 	uint_t i, size = 0;
815 	name_list_t *class = NULL, *resource = NULL;
816 	name_list_t *asru = NULL, *fru = NULL, *serial = NULL;
817 	nvlist_t **nva;
818 	uint8_t *ba;
819 	uurec_t *uurec_p;
820 	hostid_t *host;
821 	boolean_t not_suppressed = 1;
822 	boolean_t any_present = 0;
823 	boolean_t injected = 0;
824 	boolean_t dummy_fru = 0;
825 
826 	(void) nvlist_lookup_string(nvl, FM_SUSPECT_DIAG_CODE, &msgid);
827 	(void) nvlist_lookup_uint32(nvl, FM_SUSPECT_FAULT_SZ, &size);
828 	(void) nvlist_lookup_boolean_value(nvl, FM_SUSPECT_MESSAGE,
829 	    &not_suppressed);
830 	(void) nvlist_lookup_boolean_value(nvl, FM_SUSPECT_INJECTED, &injected);
831 
832 	if (size != 0) {
833 		(void) nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST,
834 		    &nva, &size);
835 		(void) nvlist_lookup_uint8_array(nvl, FM_SUSPECT_FAULT_STATUS,
836 		    &ba, &size);
837 		for (i = 0; i < size; i++) {
838 			extract_record_info(nva[i], &class, &fru, &serial,
839 			    &resource, &asru, &dummy_fru, ba[i]);
840 			if (!(ba[i] & FM_SUSPECT_NOT_PRESENT) &&
841 			    (ba[i] & FM_SUSPECT_FAULTY))
842 				any_present = 1;
843 		}
844 		/*
845 		 * also suppress if no resources present
846 		 */
847 		if (any_present == 0)
848 			not_suppressed = 0;
849 	}
850 
851 	uurec_p = (uurec_t *)malloc(sizeof (uurec_t));
852 	uurec_p->uuid = strdup(uuid);
853 	uurec_p->sec = sec;
854 	uurec_p->ari_uuid_list = NULL;
855 	uurec_p->event = NULL;
856 	(void) nvlist_dup(nvl, &uurec_p->event, 0);
857 	host = find_hostid(nvl);
858 	catalog_new_record(uurec_p, msgid, class, fru, asru,
859 	    resource, serial, not_suppressed, host, injected, dummy_fru);
860 }
861 
862 static void
update_asru_state_in_catalog(const char * uuid,const char * ari_uuid)863 update_asru_state_in_catalog(const char *uuid, const char *ari_uuid)
864 {
865 	sr_list_t *srp;
866 	uurec_t *uurp;
867 	ari_list_t *ari_list;
868 
869 	srp = status_rec_list;
870 	if (srp) {
871 		for (;;) {
872 			uurp = srp->status_record->uurec;
873 			while (uurp) {
874 				if (strcmp(uuid, uurp->uuid) == 0) {
875 					ari_list = (ari_list_t *)
876 					    malloc(sizeof (ari_list_t));
877 					ari_list->ari_uuid = strdup(ari_uuid);
878 					ari_list->next = uurp->ari_uuid_list;
879 					uurp->ari_uuid_list = ari_list;
880 					return;
881 				}
882 				uurp = uurp->next;
883 			}
884 			if (srp->next == status_rec_list)
885 				break;
886 			srp = srp->next;
887 		}
888 	}
889 }
890 
891 static void
print_line(char * label,char * buf)892 print_line(char *label, char *buf)
893 {
894 	char *cp, *ep, *wp;
895 	char c;
896 	int i;
897 	int lsz;
898 	char *padding;
899 
900 	lsz = strlen(label);
901 	padding = malloc(lsz + 1);
902 	for (i = 0; i < lsz; i++)
903 		padding[i] = ' ';
904 	padding[i] = 0;
905 	cp = buf;
906 	ep = buf;
907 	c = *ep;
908 	(void) printf("\n");
909 	while (c) {
910 		i = lsz;
911 		wp = NULL;
912 		while ((c = *ep) != NULL && (wp == NULL || i < 80)) {
913 			if (c == ' ')
914 				wp = ep;
915 			else if (c == '\n') {
916 				i = 0;
917 				*ep = 0;
918 				do {
919 					ep++;
920 				} while ((c = *ep) != NULL && c == ' ');
921 				break;
922 			}
923 			ep++;
924 			i++;
925 		}
926 		if (i >= 80 && wp) {
927 			*wp = 0;
928 			ep = wp + 1;
929 			c = *ep;
930 		}
931 		(void) printf("%s%s\n", label, cp);
932 		cp = ep;
933 		label = padding;
934 	}
935 	free(padding);
936 }
937 
938 static void
print_dict_info_line(nvlist_t * e,fmd_msg_item_t what,const char * linehdr)939 print_dict_info_line(nvlist_t *e, fmd_msg_item_t what, const char *linehdr)
940 {
941 	char *cp = fmd_msg_getitem_nv(fmadm_msghdl, NULL, e, what);
942 
943 	if (cp) {
944 		print_line(dgettext("FMD", linehdr), cp);
945 		free(cp);
946 	}
947 }
948 
949 static void
print_dict_info(nvlist_t * nvl)950 print_dict_info(nvlist_t *nvl)
951 {
952 	print_dict_info_line(nvl, FMD_MSG_ITEM_DESC, "Description : ");
953 	print_dict_info_line(nvl, FMD_MSG_ITEM_RESPONSE, "Response    : ");
954 	print_dict_info_line(nvl, FMD_MSG_ITEM_IMPACT, "Impact      : ");
955 	print_dict_info_line(nvl, FMD_MSG_ITEM_ACTION, "Action      : ");
956 }
957 
958 static void
print_name(name_list_t * list,char * padding,int * np,int pct,int full)959 print_name(name_list_t *list, char *padding, int *np, int pct, int full)
960 {
961 	char *name;
962 
963 	name = list->name;
964 	if (list->label) {
965 		(void) printf("%s \"%s\" (%s)", padding, list->label, name);
966 		*np += 1;
967 	} else {
968 		(void) printf("%s %s", padding, name);
969 		*np += 1;
970 	}
971 	if (list->pct && pct > 0 && pct < 100) {
972 		if (list->count > 1) {
973 			if (full) {
974 				(void) printf(" %d @ %s %d%%\n", list->count,
975 				    dgettext("FMD", "max"),
976 				    list->max_pct);
977 			} else {
978 				(void) printf(" %s %d%%\n",
979 				    dgettext("FMD", "max"),
980 				    list->max_pct);
981 			}
982 		} else {
983 			(void) printf(" %d%%\n", list->pct);
984 		}
985 	} else {
986 		(void) printf("\n");
987 	}
988 }
989 
990 static void
print_asru_status(int status,char * label)991 print_asru_status(int status, char *label)
992 {
993 	char *msg = NULL;
994 
995 	switch (status) {
996 	case 0:
997 		msg = dgettext("FMD", "ok and in service");
998 		break;
999 	case FM_SUSPECT_DEGRADED:
1000 		msg = dgettext("FMD", "service degraded, "
1001 		    "but associated components no longer faulty");
1002 		break;
1003 	case FM_SUSPECT_FAULTY | FM_SUSPECT_DEGRADED:
1004 		msg = dgettext("FMD", "faulted but still "
1005 		    "providing degraded service");
1006 		break;
1007 	case FM_SUSPECT_FAULTY:
1008 		msg = dgettext("FMD", "faulted but still in service");
1009 		break;
1010 	case FM_SUSPECT_UNUSABLE:
1011 		msg = dgettext("FMD", "out of service, "
1012 		    "but associated components no longer faulty");
1013 		break;
1014 	case FM_SUSPECT_FAULTY | FM_SUSPECT_UNUSABLE:
1015 		msg = dgettext("FMD", "faulted and taken out of service");
1016 		break;
1017 	default:
1018 		break;
1019 	}
1020 	if (msg) {
1021 		(void) printf("%s     %s\n", label, msg);
1022 	}
1023 }
1024 
1025 static void
print_fru_status(int status,char * label)1026 print_fru_status(int status, char *label)
1027 {
1028 	char *msg = NULL;
1029 
1030 	if (status & FM_SUSPECT_NOT_PRESENT)
1031 		msg = dgettext("FMD", "not present");
1032 	else if (status & FM_SUSPECT_FAULTY)
1033 		msg = dgettext("FMD", "faulty");
1034 	else if (status & FM_SUSPECT_REPLACED)
1035 		msg = dgettext("FMD", "replaced");
1036 	else if (status & FM_SUSPECT_REPAIRED)
1037 		msg = dgettext("FMD", "repair attempted");
1038 	else if (status & FM_SUSPECT_ACQUITTED)
1039 		msg = dgettext("FMD", "acquitted");
1040 	else
1041 		msg = dgettext("FMD", "removed");
1042 	(void) printf("%s     %s\n", label, msg);
1043 }
1044 
1045 static void
print_rsrc_status(int status,char * label)1046 print_rsrc_status(int status, char *label)
1047 {
1048 	char *msg = "";
1049 
1050 	if (status & FM_SUSPECT_NOT_PRESENT)
1051 		msg = dgettext("FMD", "not present");
1052 	else if (status & FM_SUSPECT_FAULTY) {
1053 		if (status & FM_SUSPECT_DEGRADED)
1054 			msg = dgettext("FMD",
1055 			    "faulted but still providing degraded service");
1056 		else if (status & FM_SUSPECT_UNUSABLE)
1057 			msg = dgettext("FMD",
1058 			    "faulted and taken out of service");
1059 		else
1060 			msg = dgettext("FMD", "faulted but still in service");
1061 	} else if (status & FM_SUSPECT_REPLACED)
1062 		msg = dgettext("FMD", "replaced");
1063 	else if (status & FM_SUSPECT_REPAIRED)
1064 		msg = dgettext("FMD", "repair attempted");
1065 	else if (status & FM_SUSPECT_ACQUITTED)
1066 		msg = dgettext("FMD", "acquitted");
1067 	else
1068 		msg = dgettext("FMD", "removed");
1069 	(void) printf("%s     %s\n", label, msg);
1070 }
1071 
1072 static void
print_name_list(name_list_t * list,char * label,int limit,int pct,void (func1)(int,char *),int full)1073 print_name_list(name_list_t *list, char *label,
1074     int limit, int pct, void (func1)(int, char *), int full)
1075 {
1076 	char *name;
1077 	char *padding;
1078 	int i, j, l, n;
1079 	name_list_t *end = list;
1080 
1081 	l = strlen(label);
1082 	padding = malloc(l + 1);
1083 	for (i = 0; i < l; i++)
1084 		padding[i] = ' ';
1085 	padding[l] = 0;
1086 	(void) printf("%s", label);
1087 	name = list->name;
1088 	if (list->label)
1089 		(void) printf(" \"%s\" (%s)", list->label, name);
1090 	else
1091 		(void) printf(" %s", name);
1092 	if (list->pct && pct > 0 && pct < 100) {
1093 		if (list->count > 1) {
1094 			if (full) {
1095 				(void) printf(" %d @ %s %d%%\n", list->count,
1096 				    dgettext("FMD", "max"), list->max_pct);
1097 			} else {
1098 				(void) printf(" %s %d%%\n",
1099 				    dgettext("FMD", "max"), list->max_pct);
1100 			}
1101 		} else {
1102 			(void) printf(" %d%%\n", list->pct);
1103 		}
1104 	} else {
1105 		(void) printf("\n");
1106 	}
1107 	if (func1)
1108 		func1(list->status, padding);
1109 	n = 1;
1110 	j = 0;
1111 	while ((list = list->next) != end) {
1112 		if (limit == 0 || n < limit) {
1113 			print_name(list, padding, &n, pct, full);
1114 			if (func1)
1115 				func1(list->status, padding);
1116 		} else
1117 			j++;
1118 	}
1119 	if (j == 1) {
1120 		print_name(list->prev, padding, &n, pct, full);
1121 	} else if (j > 1) {
1122 		(void) printf("%s... %d %s\n", padding, j,
1123 		    dgettext("FMD", "more entries suppressed,"
1124 		    " use -v option for full list"));
1125 	}
1126 	free(padding);
1127 }
1128 
1129 static int
asru_same_status(name_list_t * list)1130 asru_same_status(name_list_t *list)
1131 {
1132 	name_list_t *end = list;
1133 	int status = list->status;
1134 
1135 	while ((list = list->next) != end) {
1136 		if (status == -1) {
1137 			status = list->status;
1138 			continue;
1139 		}
1140 		if (list->status != -1 && status != list->status) {
1141 			status = -1;
1142 			break;
1143 		}
1144 	}
1145 	return (status);
1146 }
1147 
1148 static int
serial_in_fru(name_list_t * fru,name_list_t * serial)1149 serial_in_fru(name_list_t *fru, name_list_t *serial)
1150 {
1151 	name_list_t *sp = serial;
1152 	name_list_t *fp;
1153 	int nserial = 0;
1154 	int found = 0;
1155 	char buf[128];
1156 
1157 	while (sp) {
1158 		fp = fru;
1159 		nserial++;
1160 		(void) snprintf(buf, sizeof (buf), "serial=%s", sp->name);
1161 		buf[sizeof (buf) - 1] = 0;
1162 		while (fp) {
1163 			if (strstr(fp->name, buf) != NULL) {
1164 				found++;
1165 				break;
1166 			}
1167 			fp = fp->next;
1168 			if (fp == fru)
1169 				break;
1170 		}
1171 		sp = sp->next;
1172 		if (sp == serial)
1173 			break;
1174 	}
1175 	return (found == nserial ? 1 : 0);
1176 }
1177 
1178 static void
print_sup_record(status_record_t * srp,int opt_i,int full)1179 print_sup_record(status_record_t *srp, int opt_i, int full)
1180 {
1181 	char buf[32];
1182 	uurec_t *uurp = srp->uurec;
1183 	int n, j, k, max;
1184 	int status;
1185 	ari_list_t *ari_list;
1186 
1187 	n = 0;
1188 	max = max_fault;
1189 	if (max < 0) {
1190 		max = 0;
1191 	}
1192 	j = max / 2;
1193 	max -= j;
1194 	k = srp->nrecs - max;
1195 	while ((uurp = uurp->next) != NULL) {
1196 		if (full || n < j || n >= k || max_fault == 0 ||
1197 		    srp->nrecs == max_fault+1) {
1198 			if (opt_i) {
1199 				ari_list = uurp->ari_uuid_list;
1200 				while (ari_list) {
1201 					(void) printf("%-15s %s\n",
1202 					    format_date(buf, sizeof (buf),
1203 					    uurp->sec), ari_list->ari_uuid);
1204 					ari_list = ari_list->next;
1205 				}
1206 			} else {
1207 				(void) printf("%-15s %s\n",
1208 				    format_date(buf, sizeof (buf), uurp->sec),
1209 				    uurp->uuid);
1210 			}
1211 		} else if (n == j)
1212 			(void) printf("... %d %s\n", srp->nrecs - max_fault,
1213 			    dgettext("FMD", "more entries suppressed"));
1214 		n++;
1215 	}
1216 	(void) printf("\n");
1217 	(void) printf("%s %s", dgettext("FMD", "Host        :"),
1218 	    srp->host->server);
1219 	if (srp->host->domain)
1220 		(void) printf("\t%s %s", dgettext("FMD", "Domain      :"),
1221 		    srp->host->domain);
1222 	(void) printf("\n%s %s", dgettext("FMD", "Platform    :"),
1223 	    srp->host->platform);
1224 	(void) printf("\t%s %s", dgettext("FMD", "Chassis_id  :"),
1225 	    srp->host->chassis ? srp->host->chassis : "");
1226 	(void) printf("\n%s %s\n\n", dgettext("FMD", "Product_sn  :"),
1227 	    srp->host->product_sn? srp->host->product_sn : "");
1228 	if (srp->class)
1229 		print_name_list(srp->class,
1230 		    dgettext("FMD", "Fault class :"), 0, srp->class->pct,
1231 		    NULL, full);
1232 	if (srp->asru) {
1233 		status = asru_same_status(srp->asru);
1234 		if (status != -1) {
1235 			print_name_list(srp->asru,
1236 			    dgettext("FMD", "Affects     :"),
1237 			    full ? 0 : max_display, 0, NULL, full);
1238 			print_asru_status(status, "             ");
1239 		} else
1240 			print_name_list(srp->asru,
1241 			    dgettext("FMD", "Affects     :"),
1242 			    full ? 0 : max_display, 0, print_asru_status, full);
1243 	}
1244 	if (full || srp->fru == NULL || srp->asru == NULL) {
1245 		if (srp->resource) {
1246 			status = asru_same_status(srp->resource);
1247 			if (status != -1) {
1248 				print_name_list(srp->resource,
1249 				    dgettext("FMD", "Problem in  :"),
1250 				    full ? 0 : max_display, 0, NULL, full);
1251 				print_rsrc_status(status, "             ");
1252 			} else
1253 				print_name_list(srp->resource,
1254 				    dgettext("FMD", "Problem in  :"),
1255 				    full ? 0 : max_display, 0,
1256 				    print_rsrc_status, full);
1257 		}
1258 	}
1259 	if (srp->fru) {
1260 		status = asru_same_status(srp->fru);
1261 		if (status != -1) {
1262 			print_name_list(srp->fru, dgettext("FMD",
1263 			    "FRU         :"), 0,
1264 			    srp->fru->pct == 100 ? 100 : srp->fru->max_pct,
1265 			    NULL, full);
1266 			print_fru_status(status, "             ");
1267 		} else
1268 			print_name_list(srp->fru, dgettext("FMD",
1269 			    "FRU         :"), 0,
1270 			    srp->fru->pct == 100 ? 100 : srp->fru->max_pct,
1271 			    print_fru_status, full);
1272 	}
1273 	if (srp->serial && !serial_in_fru(srp->fru, srp->serial) &&
1274 	    !serial_in_fru(srp->asru, srp->serial)) {
1275 		print_name_list(srp->serial, dgettext("FMD", "Serial ID.  :"),
1276 		    0, 0, NULL, full);
1277 	}
1278 	print_dict_info(srp->uurec->event);
1279 	(void) printf("\n");
1280 }
1281 
1282 static void
print_status_record(status_record_t * srp,int summary,int opt_i,int full)1283 print_status_record(status_record_t *srp, int summary, int opt_i, int full)
1284 {
1285 	char buf[32];
1286 	uurec_t *uurp = srp->uurec;
1287 	static int header = 0;
1288 	char *head;
1289 	ari_list_t *ari_list;
1290 
1291 	if (!summary || !header) {
1292 		if (opt_i) {
1293 			head = "--------------- "
1294 			    "------------------------------------  "
1295 			    "-------------- ---------\n"
1296 			    "TIME            CACHE-ID"
1297 			    "                              MSG-ID"
1298 			    "         SEVERITY\n--------------- "
1299 			    "------------------------------------ "
1300 			    " -------------- ---------";
1301 		} else {
1302 			head = "--------------- "
1303 			    "------------------------------------  "
1304 			    "-------------- ---------\n"
1305 			    "TIME            EVENT-ID"
1306 			    "                              MSG-ID"
1307 			    "         SEVERITY\n--------------- "
1308 			    "------------------------------------ "
1309 			    " -------------- ---------";
1310 		}
1311 		(void) printf("%s\n", dgettext("FMD", head));
1312 		header = 1;
1313 	}
1314 	if (opt_i) {
1315 		ari_list = uurp->ari_uuid_list;
1316 		while (ari_list) {
1317 			(void) printf("%-15s %-37s %-14s %-9s %s\n",
1318 			    format_date(buf, sizeof (buf), uurp->sec),
1319 			    ari_list->ari_uuid, srp->msgid, srp->severity,
1320 			    srp->injected ? dgettext("FMD", "injected") : "");
1321 			ari_list = ari_list->next;
1322 		}
1323 	} else {
1324 		(void) printf("%-15s %-37s %-14s %-9s %s\n",
1325 		    format_date(buf, sizeof (buf), uurp->sec),
1326 		    uurp->uuid, srp->msgid, srp->severity,
1327 		    srp->injected ? dgettext("FMD", "injected") : "");
1328 	}
1329 
1330 	if (!summary)
1331 		print_sup_record(srp, opt_i, full);
1332 }
1333 
1334 static void
print_catalog(int summary,int opt_a,int full,int opt_i,int page_feed)1335 print_catalog(int summary, int opt_a, int full, int opt_i, int page_feed)
1336 {
1337 	status_record_t *srp;
1338 	sr_list_t *slp;
1339 
1340 	slp = status_rec_list;
1341 	if (slp) {
1342 		for (;;) {
1343 			srp = slp->status_record;
1344 			if (opt_a || srp->not_suppressed) {
1345 				if (page_feed)
1346 					(void) printf("\f\n");
1347 				print_status_record(srp, summary, opt_i, full);
1348 			}
1349 			if (slp->next == status_rec_list)
1350 				break;
1351 			slp = slp->next;
1352 		}
1353 	}
1354 }
1355 
1356 static name_list_t *
find_fru(status_record_t * srp,char * resource)1357 find_fru(status_record_t *srp, char *resource)
1358 {
1359 	name_list_t *rt = NULL;
1360 	name_list_t *fru = srp->fru;
1361 
1362 	while (fru) {
1363 		if (strcmp(resource, fru->name) == 0) {
1364 			rt = fru;
1365 			break;
1366 		}
1367 		fru = fru->next;
1368 		if (fru == srp->fru)
1369 			break;
1370 	}
1371 	return (rt);
1372 }
1373 
1374 static void
print_fru_line(name_list_t * fru,char * uuid)1375 print_fru_line(name_list_t *fru, char *uuid)
1376 {
1377 	if (fru->pct == 100) {
1378 		(void) printf("%s %d %s %d%%\n", uuid, fru->count,
1379 		    dgettext("FMD", "suspects in this FRU total certainty"),
1380 		    100);
1381 	} else {
1382 		(void) printf("%s %d %s %d%%\n", uuid, fru->count,
1383 		    dgettext("FMD", "suspects in this FRU max certainty"),
1384 		    fru->max_pct);
1385 	}
1386 }
1387 
1388 static void
print_fru(int summary,int opt_a,int opt_i,int page_feed)1389 print_fru(int summary, int opt_a, int opt_i, int page_feed)
1390 {
1391 	resource_list_t *tp = status_fru_list;
1392 	status_record_t *srp;
1393 	sr_list_t *slp, *end;
1394 	uurec_t *uurp;
1395 	name_list_t *fru;
1396 	int status;
1397 	ari_list_t *ari_list;
1398 
1399 	while (tp) {
1400 		if (opt_a || tp->not_suppressed) {
1401 			if (page_feed)
1402 				(void) printf("\f\n");
1403 			if (!summary)
1404 				(void) printf("-----------------------------"
1405 				    "---------------------------------------"
1406 				    "----------\n");
1407 			slp = tp->status_rec_list;
1408 			end = slp;
1409 			do {
1410 				srp = slp->status_record;
1411 				if (!srp->not_suppressed) {
1412 					slp = slp->next;
1413 					continue;
1414 				}
1415 				fru = find_fru(srp, tp->resource);
1416 				if (fru) {
1417 					if (fru->label)
1418 						(void) printf("\"%s\" (%s) ",
1419 						    fru->label, fru->name);
1420 					else
1421 						(void) printf("%s ",
1422 						    fru->name);
1423 					break;
1424 				}
1425 				slp = slp->next;
1426 			} while (slp != end);
1427 
1428 			slp = tp->status_rec_list;
1429 			end = slp;
1430 			status = 0;
1431 			do {
1432 				srp = slp->status_record;
1433 				if (!srp->not_suppressed) {
1434 					slp = slp->next;
1435 					continue;
1436 				}
1437 				fru = srp->fru;
1438 				while (fru) {
1439 					if (strcmp(tp->resource,
1440 					    fru->name) == 0)
1441 						status |= fru->status;
1442 					fru = fru->next;
1443 					if (fru == srp->fru)
1444 						break;
1445 				}
1446 				slp = slp->next;
1447 			} while (slp != end);
1448 			if (status & FM_SUSPECT_NOT_PRESENT)
1449 				(void) printf(dgettext("FMD", "not present"));
1450 			else if (status & FM_SUSPECT_FAULTY)
1451 				(void) printf(dgettext("FMD", "faulty"));
1452 			else if (status & FM_SUSPECT_REPLACED)
1453 				(void) printf(dgettext("FMD", "replaced"));
1454 			else if (status & FM_SUSPECT_REPAIRED)
1455 				(void) printf(dgettext("FMD",
1456 				    "repair attempted"));
1457 			else if (status & FM_SUSPECT_ACQUITTED)
1458 				(void) printf(dgettext("FMD", "acquitted"));
1459 			else
1460 				(void) printf(dgettext("FMD", "removed"));
1461 
1462 			if (tp->injected)
1463 				(void) printf(dgettext("FMD", " injected\n"));
1464 			else
1465 				(void) printf(dgettext("FMD", "\n"));
1466 
1467 			slp = tp->status_rec_list;
1468 			end = slp;
1469 			do {
1470 				srp = slp->status_record;
1471 				if (!srp->not_suppressed) {
1472 					slp = slp->next;
1473 					continue;
1474 				}
1475 				uurp = srp->uurec;
1476 				fru = find_fru(srp, tp->resource);
1477 				if (fru) {
1478 					if (opt_i) {
1479 						ari_list = uurp->ari_uuid_list;
1480 						while (ari_list) {
1481 							print_fru_line(fru,
1482 							    ari_list->ari_uuid);
1483 							ari_list =
1484 							    ari_list->next;
1485 						}
1486 					} else {
1487 						print_fru_line(fru, uurp->uuid);
1488 					}
1489 				}
1490 				slp = slp->next;
1491 			} while (slp != end);
1492 			if (!summary) {
1493 				slp = tp->status_rec_list;
1494 				end = slp;
1495 				do {
1496 					srp = slp->status_record;
1497 					if (!srp->not_suppressed) {
1498 						slp = slp->next;
1499 						continue;
1500 					}
1501 					if (srp->serial &&
1502 					    !serial_in_fru(srp->fru,
1503 					    srp->serial)) {
1504 						print_name_list(srp->serial,
1505 						    dgettext("FMD",
1506 						    "Serial ID.  :"),
1507 						    0, 0, NULL, 1);
1508 						break;
1509 					}
1510 					slp = slp->next;
1511 				} while (slp != end);
1512 			}
1513 		}
1514 		tp = tp->next;
1515 		if (tp == status_fru_list)
1516 			break;
1517 	}
1518 }
1519 
1520 static void
print_asru(int opt_a)1521 print_asru(int opt_a)
1522 {
1523 	resource_list_t *tp = status_asru_list;
1524 	status_record_t *srp;
1525 	sr_list_t *slp, *end;
1526 	char *msg;
1527 	int status;
1528 	name_list_t *asru;
1529 
1530 	while (tp) {
1531 		if (opt_a || tp->not_suppressed) {
1532 			status = 0;
1533 			slp = tp->status_rec_list;
1534 			end = slp;
1535 			do {
1536 				srp = slp->status_record;
1537 				if (!srp->not_suppressed) {
1538 					slp = slp->next;
1539 					continue;
1540 				}
1541 				asru = srp->asru;
1542 				while (asru) {
1543 					if (strcmp(tp->resource,
1544 					    asru->name) == 0)
1545 						status |= asru->status;
1546 					asru = asru->next;
1547 					if (asru == srp->asru)
1548 						break;
1549 				}
1550 				slp = slp->next;
1551 			} while (slp != end);
1552 			switch (status) {
1553 			case 0:
1554 				msg = dgettext("FMD", "ok");
1555 				break;
1556 			case FM_SUSPECT_DEGRADED:
1557 				msg = dgettext("FMD", "degraded");
1558 				break;
1559 			case FM_SUSPECT_FAULTY | FM_SUSPECT_DEGRADED:
1560 				msg = dgettext("FMD", "degraded");
1561 				break;
1562 			case FM_SUSPECT_FAULTY:
1563 				msg = dgettext("FMD", "degraded");
1564 				break;
1565 			case FM_SUSPECT_UNUSABLE:
1566 				msg = dgettext("FMD", "unknown");
1567 				break;
1568 			case FM_SUSPECT_FAULTY | FM_SUSPECT_UNUSABLE:
1569 				msg = dgettext("FMD", "faulted");
1570 				break;
1571 			default:
1572 				msg = "";
1573 				break;
1574 			}
1575 			(void) printf("%-69s %s", tp->resource, msg);
1576 			if (tp->injected)
1577 				(void) printf(dgettext("FMD", " injected\n"));
1578 			else
1579 				(void) printf(dgettext("FMD", "\n"));
1580 		}
1581 		tp = tp->next;
1582 		if (tp == status_asru_list)
1583 			break;
1584 	}
1585 }
1586 
1587 static int
uuid_in_list(char * uuid,uurec_select_t * uurecp)1588 uuid_in_list(char *uuid, uurec_select_t *uurecp)
1589 {
1590 	while (uurecp) {
1591 		if (strcmp(uuid, uurecp->uuid) == 0)
1592 			return (1);
1593 		uurecp = uurecp->next;
1594 	}
1595 	return (0);
1596 }
1597 
1598 static int
dfault_rec(const fmd_adm_caseinfo_t * acp,void * arg)1599 dfault_rec(const fmd_adm_caseinfo_t *acp, void *arg)
1600 {
1601 	int64_t *diag_time;
1602 	uint_t nelem;
1603 	int rt = 0;
1604 	char *uuid = "-";
1605 	uurec_select_t *uurecp = (uurec_select_t *)arg;
1606 
1607 	if (nvlist_lookup_int64_array(acp->aci_event, FM_SUSPECT_DIAG_TIME,
1608 	    &diag_time, &nelem) == 0 && nelem >= 2) {
1609 		(void) nvlist_lookup_string(acp->aci_event, FM_SUSPECT_UUID,
1610 		    &uuid);
1611 		if (uurecp == NULL || uuid_in_list(uuid, uurecp))
1612 			add_fault_record_to_catalog(acp->aci_event, *diag_time,
1613 			    uuid);
1614 	} else {
1615 		rt = -1;
1616 	}
1617 	return (rt);
1618 }
1619 
1620 /*ARGSUSED*/
1621 static int
dstatus_rec(const fmd_adm_rsrcinfo_t * ari,void * unused)1622 dstatus_rec(const fmd_adm_rsrcinfo_t *ari, void *unused)
1623 {
1624 	update_asru_state_in_catalog(ari->ari_case, ari->ari_uuid);
1625 	return (0);
1626 }
1627 
1628 static int
get_cases_from_fmd(fmd_adm_t * adm,uurec_select_t * uurecp,int opt_i)1629 get_cases_from_fmd(fmd_adm_t *adm, uurec_select_t *uurecp, int opt_i)
1630 {
1631 	int rt = FMADM_EXIT_SUCCESS;
1632 
1633 	/*
1634 	 * These calls may fail with Protocol error if message payload is
1635 	 * too big
1636 	 */
1637 	if (fmd_adm_case_iter(adm, NULL, dfault_rec, uurecp) != 0)
1638 		die("failed to get case list from fmd");
1639 	if (opt_i && fmd_adm_rsrc_iter(adm, 1, dstatus_rec, NULL) != 0)
1640 		die("failed to get case status from fmd");
1641 	return (rt);
1642 }
1643 
1644 /*
1645  * fmadm faulty command
1646  *
1647  *	-a		show hidden fault records
1648  *	-f		show faulty fru's
1649  *	-g		force grouping of similar faults on the same fru
1650  *	-n		number of fault records to display
1651  *	-p		pipe output through pager
1652  *	-r		show faulty asru's
1653  *	-s		print summary of first fault
1654  *	-u		print listed uuid's only
1655  *	-v		full output
1656  */
1657 
1658 int
cmd_faulty(fmd_adm_t * adm,int argc,char * argv[])1659 cmd_faulty(fmd_adm_t *adm, int argc, char *argv[])
1660 {
1661 	int opt_a = 0, opt_v = 0, opt_p = 0, opt_s = 0, opt_r = 0, opt_f = 0;
1662 	int opt_i = 0;
1663 	char *pager;
1664 	FILE *fp;
1665 	int rt, c, stat;
1666 	uurec_select_t *tp;
1667 	uurec_select_t *uurecp = NULL;
1668 
1669 	while ((c = getopt(argc, argv, "afgin:prsu:v")) != EOF) {
1670 		switch (c) {
1671 		case 'a':
1672 			opt_a++;
1673 			break;
1674 		case 'f':
1675 			opt_f++;
1676 			break;
1677 		case 'g':
1678 			opt_g++;
1679 			break;
1680 		case 'i':
1681 			opt_i++;
1682 			break;
1683 		case 'n':
1684 			max_fault = atoi(optarg);
1685 			break;
1686 		case 'p':
1687 			opt_p++;
1688 			break;
1689 		case 'r':
1690 			opt_r++;
1691 			break;
1692 		case 's':
1693 			opt_s++;
1694 			break;
1695 		case 'u':
1696 			tp = (uurec_select_t *)malloc(sizeof (uurec_select_t));
1697 			tp->uuid = optarg;
1698 			tp->next = uurecp;
1699 			uurecp = tp;
1700 			opt_a = 1;
1701 			break;
1702 		case 'v':
1703 			opt_v++;
1704 			break;
1705 		default:
1706 			return (FMADM_EXIT_USAGE);
1707 		}
1708 	}
1709 	if (optind < argc)
1710 		return (FMADM_EXIT_USAGE);
1711 
1712 	if ((fmadm_msghdl = fmd_msg_init(NULL, FMD_MSG_VERSION)) == NULL)
1713 		return (FMADM_EXIT_ERROR);
1714 	rt = get_cases_from_fmd(adm, uurecp, opt_i);
1715 	if (opt_p) {
1716 		if ((pager = getenv("PAGER")) == NULL)
1717 			pager = "/usr/bin/more";
1718 		fp = popen(pager, "w");
1719 		if (fp == NULL) {
1720 			rt = FMADM_EXIT_ERROR;
1721 			opt_p = 0;
1722 		} else {
1723 			(void) dup2(fileno(fp), 1);
1724 			setbuf(stdout, NULL);
1725 			(void) fclose(fp);
1726 		}
1727 	}
1728 	max_display = max_fault;
1729 	if (opt_f)
1730 		print_fru(opt_s, opt_a, opt_i, opt_p && !opt_s);
1731 	if (opt_r)
1732 		print_asru(opt_a);
1733 	if (opt_f == 0 && opt_r == 0)
1734 		print_catalog(opt_s, opt_a, opt_v, opt_i, opt_p && !opt_s);
1735 	fmd_msg_fini(fmadm_msghdl);
1736 	if (topo_handle)
1737 		topo_close(topo_handle);
1738 	if (opt_p) {
1739 		(void) fclose(stdout);
1740 		(void) wait(&stat);
1741 	}
1742 	return (rt);
1743 }
1744 
1745 int
cmd_flush(fmd_adm_t * adm,int argc,char * argv[])1746 cmd_flush(fmd_adm_t *adm, int argc, char *argv[])
1747 {
1748 	int i, status = FMADM_EXIT_SUCCESS;
1749 
1750 	if (argc < 2 || (i = getopt(argc, argv, "")) != EOF)
1751 		return (FMADM_EXIT_USAGE);
1752 
1753 	for (i = 1; i < argc; i++) {
1754 		if (fmd_adm_rsrc_flush(adm, argv[i]) != 0) {
1755 			warn("failed to flush %s", argv[i]);
1756 			status = FMADM_EXIT_ERROR;
1757 		} else
1758 			note("flushed resource history for %s\n", argv[i]);
1759 	}
1760 
1761 	return (status);
1762 }
1763 
1764 int
cmd_repair(fmd_adm_t * adm,int argc,char * argv[])1765 cmd_repair(fmd_adm_t *adm, int argc, char *argv[])
1766 {
1767 	int err;
1768 
1769 	if (getopt(argc, argv, "") != EOF)
1770 		return (FMADM_EXIT_USAGE);
1771 
1772 	if (argc - optind != 1)
1773 		return (FMADM_EXIT_USAGE);
1774 
1775 	/*
1776 	 * argument could be a uuid, an fmri (asru, fru or resource)
1777 	 * or a label. Try uuid first, If that fails try the others.
1778 	 */
1779 	err = fmd_adm_case_repair(adm, argv[optind]);
1780 	if (err != 0)
1781 		err = fmd_adm_rsrc_repaired(adm, argv[optind]);
1782 
1783 	if (err != 0)
1784 		die("failed to record repair to %s", argv[optind]);
1785 
1786 	note("recorded repair to %s\n", argv[optind]);
1787 	return (FMADM_EXIT_SUCCESS);
1788 }
1789 
1790 int
cmd_repaired(fmd_adm_t * adm,int argc,char * argv[])1791 cmd_repaired(fmd_adm_t *adm, int argc, char *argv[])
1792 {
1793 	int err;
1794 
1795 	if (getopt(argc, argv, "") != EOF)
1796 		return (FMADM_EXIT_USAGE);
1797 
1798 	if (argc - optind != 1)
1799 		return (FMADM_EXIT_USAGE);
1800 
1801 	/*
1802 	 * argument could be an fmri (asru, fru or resource) or a label.
1803 	 */
1804 	err = fmd_adm_rsrc_repaired(adm, argv[optind]);
1805 	if (err != 0)
1806 		die("failed to record repair to %s", argv[optind]);
1807 
1808 	note("recorded repair to of %s\n", argv[optind]);
1809 	return (FMADM_EXIT_SUCCESS);
1810 }
1811 
1812 int
cmd_replaced(fmd_adm_t * adm,int argc,char * argv[])1813 cmd_replaced(fmd_adm_t *adm, int argc, char *argv[])
1814 {
1815 	int err;
1816 
1817 	if (getopt(argc, argv, "") != EOF)
1818 		return (FMADM_EXIT_USAGE);
1819 
1820 	if (argc - optind != 1)
1821 		return (FMADM_EXIT_USAGE);
1822 
1823 	/*
1824 	 * argument could be an fmri (asru, fru or resource) or a label.
1825 	 */
1826 	err = fmd_adm_rsrc_replaced(adm, argv[optind]);
1827 	if (err != 0)
1828 		die("failed to record replacement of %s", argv[optind]);
1829 
1830 	note("recorded replacement of %s\n", argv[optind]);
1831 	return (FMADM_EXIT_SUCCESS);
1832 }
1833 
1834 int
cmd_acquit(fmd_adm_t * adm,int argc,char * argv[])1835 cmd_acquit(fmd_adm_t *adm, int argc, char *argv[])
1836 {
1837 	int err;
1838 
1839 	if (getopt(argc, argv, "") != EOF)
1840 		return (FMADM_EXIT_USAGE);
1841 
1842 	if (argc - optind != 1 && argc - optind != 2)
1843 		return (FMADM_EXIT_USAGE);
1844 
1845 	/*
1846 	 * argument could be a uuid, an fmri (asru, fru or resource)
1847 	 * or a label. Or it could be a uuid and an fmri or label.
1848 	 */
1849 	if (argc - optind == 2) {
1850 		err = fmd_adm_rsrc_acquit(adm, argv[optind], argv[optind + 1]);
1851 		if (err != 0)
1852 			err = fmd_adm_rsrc_acquit(adm, argv[optind + 1],
1853 			    argv[optind]);
1854 	} else {
1855 		err = fmd_adm_case_acquit(adm, argv[optind]);
1856 		if (err != 0)
1857 			err = fmd_adm_rsrc_acquit(adm, argv[optind], "");
1858 	}
1859 
1860 	if (err != 0)
1861 		die("failed to record acquital of %s", argv[optind]);
1862 
1863 	note("recorded acquital of %s\n", argv[optind]);
1864 	return (FMADM_EXIT_SUCCESS);
1865 }
1866