1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <fmadm.h> 28 #include <errno.h> 29 #include <limits.h> 30 #include <strings.h> 31 #include <stdio.h> 32 #include <unistd.h> 33 #include <sys/wait.h> 34 #include <sys/stat.h> 35 #include <fcntl.h> 36 #include <fm/fmd_log.h> 37 #include <sys/fm/protocol.h> 38 #include <fm/libtopo.h> 39 #include <fm/fmd_adm.h> 40 #include <fm/fmd_msg.h> 41 #include <dlfcn.h> 42 #include <sys/systeminfo.h> 43 #include <sys/utsname.h> 44 #include <libintl.h> 45 #include <locale.h> 46 #include <sys/smbios.h> 47 #include <libdevinfo.h> 48 #include <stdlib.h> 49 50 #define offsetof(s, m) ((size_t)(&(((s*)0)->m))) 51 52 /* 53 * Fault records are added to catalog by calling add_fault_record_to_catalog() 54 * records are stored in order of importance to the system. 55 * If -g flag is set or not_suppressed is not set and the class fru, fault, 56 * type are the same then details are merged into an existing record, with uuid 57 * records are stored in time order. 58 * For each record information is extracted from nvlist and merged into linked 59 * list each is checked for identical records for which percentage certainty are 60 * added together. 61 * print_catalog() is called to print out catalog and release external resources 62 * 63 * /---------------\ 64 * status_rec_list -> | | -| 65 * \---------------/ 66 * \/ 67 * /---------------\ /-------\ /-------\ 68 * status_fru_list | status_record | -> | uurec | -> | uurec | -| 69 * \/ | | |- | | <- | | 70 * /-------------\ | | \-------/ \-------/ 71 * | | -> | | \/ \/ 72 * \-------------/ | | /-------\ /-------\ 73 * \/ | | -> | asru | -> | asru | 74 * --- | | | | <- | | 75 * | | \-------/ \-------/ 76 * status_asru_list | class | 77 * \/ | resource | /-------\ /-------\ 78 * /-------------\ | fru | -> | list | -> | list | 79 * | | -> | serial | | | <- | | 80 * \-------------/ | | \-------/ \-------/ 81 * \/ \---------------/ 82 * --- \/ /\ 83 * /---------------\ 84 * | status_record | 85 * \---------------/ 86 * 87 * Fmadm faulty takes a number of options which affect the format of the 88 * output displayed. By default, the display reports the FRU and ASRU along 89 * with other information on per-case basis as in the example below. 90 * 91 * --------------- ------------------------------------ -------------- ------- 92 * TIME EVENT-ID MSG-ID SEVERITY 93 * --------------- ------------------------------------ -------------- ------- 94 * Sep 21 10:01:36 d482f935-5c8f-e9ab-9f25-d0aaafec1e6c AMD-8000-2F Major 95 * 96 * Fault class : fault.memory.dimm_sb 97 * Affects : mem:///motherboard=0/chip=0/memory-controller=0/dimm=0/rank=0 98 * faulted but still in service 99 * FRU : "CPU 0 DIMM 0" (hc://.../memory-controller=0/dimm=0) 100 * faulty 101 * 102 * Description : The number of errors associated with this memory module has 103 * exceeded acceptable levels. Refer to 104 * http://sun.com/msg/AMD-8000-2F for more information. 105 * 106 * Response : Pages of memory associated with this memory module are being 107 * removed from service as errors are reported. 108 * 109 * Impact : Total system memory capacity will be reduced as pages are 110 * retired. 111 * 112 * Action : Schedule a repair procedure to replace the affected memory 113 * module. Use fmdump -v -u <EVENT_ID> to identify the module. 114 * 115 * The -v flag is similar, but adds some additonal information such as the 116 * resource. The -s flag is also similar but just gives the top line summary. 117 * All these options (ie without the -f or -r flags) use the print_catalog() 118 * function to do the display. 119 * 120 * The -f flag changes the output so that it appears sorted on a per-fru basis. 121 * The output is somewhat cut down compared to the default output. If -f is 122 * used, then print_fru() is used to print the output. 123 * 124 * ----------------------------------------------------------------------------- 125 * "SLOT 2" (hc://.../hostbridge=3/pciexrc=3/pciexbus=4/pciexdev=0) faulty 126 * 5ca4aeb3-36...f6be-c2e8166dc484 2 suspects in this FRU total certainty 100% 127 * 128 * Description : A problem was detected for a PCI device. 129 * Refer to http://sun.com/msg/PCI-8000-7J for more information. 130 * 131 * Response : One or more device instances may be disabled 132 * 133 * Impact : Possible loss of services provided by the device instances 134 * associated with this fault 135 * 136 * Action : Schedule a repair procedure to replace the affected device. 137 * Use fmdump -v -u <EVENT_ID> to identify the device or contact 138 * Sun for support. 139 * 140 * The -r flag changes the output so that it appears sorted on a per-asru basis. 141 * The output is very much cut down compared to the default output, just giving 142 * the asru fmri and state. Here print_asru() is used to print the output. 143 * 144 * mem:///motherboard=0/chip=0/memory-controller=0/dimm=0/rank=0 degraded 145 * 146 * For all fmadm faulty options, the sequence of events is 147 * 148 * 1) Walk through all the cases in the system using fmd_adm_case_iter() and 149 * for each case call dfault_rec(). This will call add_fault_record_to_catalog() 150 * This will extract the data from the nvlist and call catalog_new_record() to 151 * save the data away in various linked lists in the catalogue. 152 * 153 * 2) Once this is done, the data can be supplemented by using 154 * fmd_adm_rsrc_iter(). However this is now only necessary for the -i option. 155 * 156 * 3) Finally print_catalog(), print_fru() or print_asru() are called as 157 * appropriate to display the information from the catalogue sorted in the 158 * requested way. 159 * 160 */ 161 162 typedef struct name_list { 163 struct name_list *next; 164 struct name_list *prev; 165 char *name; 166 uint8_t pct; 167 uint8_t max_pct; 168 ushort_t count; 169 int status; 170 char *label; 171 } name_list_t; 172 173 typedef struct ari_list { 174 char *ari_uuid; 175 struct ari_list *next; 176 } ari_list_t; 177 178 typedef struct uurec { 179 struct uurec *next; 180 struct uurec *prev; 181 char *uuid; 182 ari_list_t *ari_uuid_list; 183 name_list_t *asru; 184 uint64_t sec; 185 nvlist_t *event; 186 } uurec_t; 187 188 typedef struct uurec_select { 189 struct uurec_select *next; 190 char *uuid; 191 } uurec_select_t; 192 193 typedef struct host_id { 194 char *chassis; 195 char *server; 196 char *platform; 197 char *domain; 198 } hostid_t; 199 200 typedef struct host_id_list { 201 hostid_t hostid; 202 struct host_id_list *next; 203 } host_id_list_t; 204 205 typedef struct status_record { 206 hostid_t *host; 207 int nrecs; 208 uurec_t *uurec; 209 char *severity; /* in C locale */ 210 char *msgid; 211 name_list_t *class; 212 name_list_t *resource; 213 name_list_t *asru; 214 name_list_t *fru; 215 name_list_t *serial; 216 uint8_t not_suppressed; 217 } status_record_t; 218 219 typedef struct sr_list { 220 struct sr_list *next; 221 struct sr_list *prev; 222 struct status_record *status_record; 223 } sr_list_t; 224 225 typedef struct resource_list { 226 struct resource_list *next; 227 struct resource_list *prev; 228 sr_list_t *status_rec_list; 229 char *resource; 230 uint8_t not_suppressed; 231 uint8_t max_pct; 232 } resource_list_t; 233 234 typedef struct tgetlabel_data { 235 char *label; 236 char *fru; 237 } tgetlabel_data_t; 238 239 sr_list_t *status_rec_list; 240 resource_list_t *status_fru_list; 241 resource_list_t *status_asru_list; 242 243 static int max_display; 244 static int max_fault = 0; 245 static topo_hdl_t *topo_handle; 246 static char *topo_handle_uuid; 247 static host_id_list_t *host_list; 248 static int n_server; 249 static int opt_g; 250 static fmd_msg_hdl_t *fmadm_msghdl = NULL; /* handle for libfmd_msg calls */ 251 252 static char * 253 format_date(char *buf, size_t len, uint64_t sec) 254 { 255 if (sec > LONG_MAX) { 256 (void) fprintf(stderr, 257 "record time is too large for 32-bit utility\n"); 258 (void) snprintf(buf, len, "0x%llx", sec); 259 } else { 260 time_t tod = (time_t)sec; 261 (void) strftime(buf, len, "%b %d %T", localtime(&tod)); 262 } 263 264 return (buf); 265 } 266 267 static hostid_t * 268 find_hostid_in_list(char *platform, char *chassis, char *server, char *domain) 269 { 270 hostid_t *rt = NULL; 271 host_id_list_t *hostp; 272 273 if (platform == NULL) 274 platform = "-"; 275 if (server == NULL) 276 server = "-"; 277 hostp = host_list; 278 while (hostp) { 279 if (hostp->hostid.platform && 280 strcmp(hostp->hostid.platform, platform) == 0 && 281 hostp->hostid.server && 282 strcmp(hostp->hostid.server, server) == 0 && 283 (chassis == NULL || hostp->hostid.chassis == NULL || 284 strcmp(chassis, hostp->hostid.chassis) == 0) && 285 (domain == NULL || hostp->hostid.domain == NULL || 286 strcmp(domain, hostp->hostid.domain) == 0)) { 287 rt = &hostp->hostid; 288 break; 289 } 290 hostp = hostp->next; 291 } 292 if (rt == NULL) { 293 hostp = malloc(sizeof (host_id_list_t)); 294 hostp->hostid.platform = strdup(platform); 295 hostp->hostid.server = strdup(server); 296 hostp->hostid.chassis = chassis ? strdup(chassis) : NULL; 297 hostp->hostid.domain = domain ? strdup(domain) : NULL; 298 hostp->next = host_list; 299 host_list = hostp; 300 rt = &hostp->hostid; 301 n_server++; 302 } 303 return (rt); 304 } 305 306 static hostid_t * 307 find_hostid(nvlist_t *nvl) 308 { 309 char *platform = NULL, *chassis = NULL, *server = NULL, *domain = NULL; 310 nvlist_t *auth, *fmri; 311 hostid_t *rt = NULL; 312 313 if (nvlist_lookup_nvlist(nvl, FM_SUSPECT_DE, &fmri) == 0 && 314 nvlist_lookup_nvlist(fmri, FM_FMRI_AUTHORITY, &auth) == 0) { 315 (void) nvlist_lookup_string(auth, FM_FMRI_AUTH_PRODUCT, 316 &platform); 317 (void) nvlist_lookup_string(auth, FM_FMRI_AUTH_SERVER, &server); 318 (void) nvlist_lookup_string(auth, FM_FMRI_AUTH_CHASSIS, 319 &chassis); 320 (void) nvlist_lookup_string(auth, FM_FMRI_AUTH_DOMAIN, &domain); 321 rt = find_hostid_in_list(platform, chassis, server, domain); 322 } 323 return (rt); 324 } 325 326 /* 327 * compare two fru strings which are made up of substrings seperated by '/' 328 * return true if every substring is the same in the two strings, or if a 329 * substring is null in one. 330 */ 331 332 static int 333 frucmp(char *f1, char *f2) 334 { 335 char c1, c2; 336 int i = 0; 337 338 for (;;) { 339 c1 = *f1; 340 c2 = *f2; 341 if (c1 == c2) { 342 i = (c1 == '/') ? 0 : i + 1; 343 } else if (i == 0) { 344 if (c1 == '/') { 345 do { 346 f2++; 347 } while ((c2 = *f2) != 0 && c2 != '/'); 348 if (c2 == NULL) 349 break; 350 } else if (c2 == '/') { 351 do { 352 f1++; 353 } while ((c1 = *f1) != 0 && c1 != '/'); 354 if (c1 == NULL) 355 break; 356 } else 357 break; 358 } else 359 break; 360 if (c1 == NULL) 361 return (0); 362 f1++; 363 f2++; 364 } 365 return (1); 366 } 367 368 static int 369 tgetlabel(topo_hdl_t *thp, tnode_t *node, void *arg) 370 { 371 int err; 372 char *fru_name, *lname; 373 nvlist_t *fru = NULL; 374 int rt = TOPO_WALK_NEXT; 375 tgetlabel_data_t *tdp = (tgetlabel_data_t *)arg; 376 377 if (topo_node_fru(node, &fru, NULL, &err) == 0) { 378 if (topo_fmri_nvl2str(thp, fru, &fru_name, &err) == 0) { 379 if (frucmp(tdp->fru, fru_name) == 0 && 380 topo_node_label(node, &lname, &err) == 0) { 381 tdp->label = strdup(lname); 382 topo_hdl_strfree(thp, lname); 383 rt = TOPO_WALK_TERMINATE; 384 } 385 topo_hdl_strfree(thp, fru_name); 386 } 387 nvlist_free(fru); 388 } 389 return (rt); 390 } 391 392 static void 393 label_get_topo(void) 394 { 395 int err; 396 397 topo_handle = topo_open(TOPO_VERSION, 0, &err); 398 if (topo_handle) { 399 topo_handle_uuid = topo_snap_hold(topo_handle, NULL, &err); 400 } 401 } 402 403 static void 404 label_release_topo(void) 405 { 406 if (topo_handle_uuid) 407 topo_hdl_strfree(topo_handle, topo_handle_uuid); 408 if (topo_handle) { 409 topo_snap_release(topo_handle); 410 topo_close(topo_handle); 411 } 412 } 413 414 static char * 415 get_fmri_label(char *fru) 416 { 417 topo_walk_t *twp; 418 tgetlabel_data_t td; 419 int err; 420 421 td.label = NULL; 422 td.fru = fru; 423 if (topo_handle == NULL) 424 label_get_topo(); 425 if (topo_handle_uuid) { 426 twp = topo_walk_init(topo_handle, FM_FMRI_SCHEME_HC, 427 tgetlabel, &td, &err); 428 if (twp) { 429 topo_walk_step(twp, TOPO_WALK_CHILD); 430 topo_walk_fini(twp); 431 } 432 } 433 return (td.label); 434 } 435 436 static char * 437 get_nvl2str_topo(nvlist_t *nvl) 438 { 439 char *name = NULL; 440 char *tname; 441 int err; 442 char *scheme = NULL; 443 char *mod_name = NULL; 444 char buf[128]; 445 446 if (topo_handle == NULL) 447 label_get_topo(); 448 if (topo_fmri_nvl2str(topo_handle, nvl, &tname, &err) == 0) { 449 name = strdup(tname); 450 topo_hdl_strfree(topo_handle, tname); 451 } else { 452 (void) nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &scheme); 453 (void) nvlist_lookup_string(nvl, FM_FMRI_MOD_NAME, &mod_name); 454 if (scheme && strcmp(scheme, FM_FMRI_SCHEME_FMD) == 0 && 455 mod_name) { 456 (void) snprintf(buf, sizeof (buf), "%s:///module/%s", 457 scheme, mod_name); 458 name = strdup(buf); 459 } 460 } 461 return (name); 462 } 463 464 static int 465 set_priority(char *s) 466 { 467 int rt = 0; 468 469 if (s) { 470 if (strcmp(s, "Minor") == 0) 471 rt = 1; 472 else if (strcmp(s, "Major") == 0) 473 rt = 10; 474 else if (strcmp(s, "Critical") == 0) 475 rt = 100; 476 } 477 return (rt); 478 } 479 480 static int 481 cmp_priority(char *s1, char *s2, uint64_t t1, uint64_t t2, uint8_t p1, 482 uint8_t p2) 483 { 484 int r1, r2; 485 int rt; 486 487 r1 = set_priority(s1); 488 r2 = set_priority(s2); 489 rt = r1 - r2; 490 if (rt == 0) { 491 if (t1 > t2) 492 rt = 1; 493 else if (t1 < t2) 494 rt = -1; 495 else 496 rt = p1 - p2; 497 } 498 return (rt); 499 } 500 501 /* 502 * merge two lists into one, by comparing enties in new and moving into list if 503 * name is not there or free off memory for names which are already there 504 * add_pct indicates if pct is the sum or highest pct 505 */ 506 static name_list_t * 507 merge_name_list(name_list_t **list, name_list_t *new, int add_pct) 508 { 509 name_list_t *lp, *np, *sp, *rt = NULL; 510 int max_pct; 511 512 rt = *list; 513 np = new; 514 while (np) { 515 lp = *list; 516 while (lp) { 517 if (strcmp(lp->name, np->name) == 0) 518 break; 519 lp = lp->next; 520 if (lp == *list) 521 lp = NULL; 522 } 523 if (np->next == new) 524 sp = NULL; 525 else 526 sp = np->next; 527 if (lp) { 528 lp->status |= (np->status & FM_SUSPECT_FAULTY); 529 if (add_pct) { 530 lp->pct += np->pct; 531 lp->count += np->count; 532 } else if (np->pct > lp->pct) { 533 lp->pct = np->pct; 534 } 535 max_pct = np->max_pct; 536 if (np->label) 537 free(np->label); 538 free(np->name); 539 free(np); 540 np = NULL; 541 if (max_pct > lp->max_pct) { 542 lp->max_pct = max_pct; 543 if (lp->max_pct > lp->prev->max_pct && 544 lp != *list) { 545 lp->prev->next = lp->next; 546 lp->next->prev = lp->prev; 547 np = lp; 548 } 549 } 550 } 551 if (np) { 552 lp = *list; 553 if (lp) { 554 if (np->max_pct > lp->max_pct) { 555 np->next = lp; 556 np->prev = lp->prev; 557 lp->prev->next = np; 558 lp->prev = np; 559 *list = np; 560 rt = np; 561 } else { 562 lp = lp->next; 563 while (lp != *list && 564 np->max_pct < lp->max_pct) { 565 lp = lp->next; 566 } 567 np->next = lp; 568 np->prev = lp->prev; 569 lp->prev->next = np; 570 lp->prev = np; 571 } 572 } else { 573 *list = np; 574 np->next = np; 575 np->prev = np; 576 rt = np; 577 } 578 } 579 np = sp; 580 } 581 return (rt); 582 } 583 584 /* 585 * compare entries in two lists return true if the two lists have identical 586 * content. The two lists may not have entries in the same order, so we compare 587 * the size of the list as well as trying to find every entry from one list in 588 * the other. 589 */ 590 static int 591 cmp_name_list(name_list_t *lxp1, name_list_t *lxp2) 592 { 593 name_list_t *lp1, *lp2; 594 int l1 = 0, l2 = 0, common = 0; 595 596 lp2 = lxp2; 597 while (lp2) { 598 l2++; 599 lp2 = lp2->next; 600 if (lp2 == lxp2) 601 break; 602 } 603 lp1 = lxp1; 604 while (lp1) { 605 l1++; 606 lp2 = lxp2; 607 while (lp2) { 608 if (strcmp(lp2->name, lp1->name) == 0) { 609 common++; 610 break; 611 } 612 lp2 = lp2->next; 613 if (lp2 == lxp2) 614 break; 615 } 616 lp1 = lp1->next; 617 if (lp1 == lxp1) 618 break; 619 } 620 if (l1 == l2 && l2 == common) 621 return (0); 622 else 623 return (1); 624 } 625 626 static name_list_t * 627 alloc_name_list(char *name, uint8_t pct) 628 { 629 name_list_t *nlp; 630 631 nlp = malloc(sizeof (*nlp)); 632 nlp->name = strdup(name); 633 nlp->pct = pct; 634 nlp->max_pct = pct; 635 nlp->count = 1; 636 nlp->next = nlp; 637 nlp->prev = nlp; 638 nlp->status = 0; 639 nlp->label = NULL; 640 return (nlp); 641 } 642 643 static void 644 free_name_list(name_list_t *list) 645 { 646 name_list_t *next = list; 647 name_list_t *lp; 648 649 if (list) { 650 do { 651 lp = next; 652 next = lp->next; 653 if (lp->label) 654 free(lp->label); 655 free(lp->name); 656 free(lp); 657 } while (next != list); 658 } 659 } 660 661 static status_record_t * 662 new_record_init(uurec_t *uurec_p, char *msgid, name_list_t *class, 663 name_list_t *fru, name_list_t *asru, name_list_t *resource, 664 name_list_t *serial, boolean_t not_suppressed, 665 hostid_t *hostid) 666 { 667 status_record_t *status_rec_p; 668 669 status_rec_p = (status_record_t *)malloc(sizeof (status_record_t)); 670 status_rec_p->nrecs = 1; 671 status_rec_p->host = hostid; 672 status_rec_p->uurec = uurec_p; 673 uurec_p->next = NULL; 674 uurec_p->prev = NULL; 675 uurec_p->asru = asru; 676 if ((status_rec_p->severity = fmd_msg_getitem_id(fmadm_msghdl, NULL, 677 msgid, FMD_MSG_ITEM_SEVERITY)) == NULL) 678 status_rec_p->severity = strdup("unknown"); 679 status_rec_p->class = class; 680 status_rec_p->fru = fru; 681 status_rec_p->asru = asru; 682 status_rec_p->resource = resource; 683 status_rec_p->serial = serial; 684 status_rec_p->msgid = strdup(msgid); 685 status_rec_p->not_suppressed = not_suppressed; 686 return (status_rec_p); 687 } 688 689 /* 690 * add record to given list maintaining order higher priority first. 691 */ 692 static void 693 add_rec_list(status_record_t *status_rec_p, sr_list_t **list_pp) 694 { 695 sr_list_t *tp, *np, *sp; 696 int order; 697 uint64_t sec; 698 699 np = malloc(sizeof (sr_list_t)); 700 np->status_record = status_rec_p; 701 sec = status_rec_p->uurec->sec; 702 if ((sp = *list_pp) == NULL) { 703 *list_pp = np; 704 np->next = np; 705 np->prev = np; 706 } else { 707 /* insert new record in front of lower priority */ 708 tp = sp; 709 order = cmp_priority(status_rec_p->severity, 710 sp->status_record->severity, sec, 711 tp->status_record->uurec->sec, 0, 0); 712 if (order > 0) { 713 *list_pp = np; 714 } else { 715 tp = sp->next; 716 while (tp != sp && 717 cmp_priority(status_rec_p->severity, 718 tp->status_record->severity, sec, 719 tp->status_record->uurec->sec, 0, 0)) { 720 tp = tp->next; 721 } 722 } 723 np->next = tp; 724 np->prev = tp->prev; 725 tp->prev->next = np; 726 tp->prev = np; 727 } 728 } 729 730 static void 731 add_resource(status_record_t *status_rec_p, resource_list_t **rp, 732 resource_list_t *np) 733 { 734 int order; 735 uint64_t sec; 736 resource_list_t *sp, *tp; 737 status_record_t *srp; 738 char *severity = status_rec_p->severity; 739 740 add_rec_list(status_rec_p, &np->status_rec_list); 741 if ((sp = *rp) == NULL) { 742 np->next = np; 743 np->prev = np; 744 *rp = np; 745 } else { 746 /* 747 * insert new record in front of lower priority 748 */ 749 tp = sp->next; 750 srp = sp->status_rec_list->status_record; 751 sec = status_rec_p->uurec->sec; 752 order = cmp_priority(severity, srp->severity, sec, 753 srp->uurec->sec, np->max_pct, sp->max_pct); 754 if (order > 0) { 755 *rp = np; 756 } else { 757 srp = tp->status_rec_list->status_record; 758 while (tp != sp && 759 cmp_priority(severity, srp->severity, sec, 760 srp->uurec->sec, np->max_pct, sp->max_pct) < 0) { 761 tp = tp->next; 762 srp = tp->status_rec_list->status_record; 763 } 764 } 765 np->next = tp; 766 np->prev = tp->prev; 767 tp->prev->next = np; 768 tp->prev = np; 769 } 770 } 771 772 static void 773 add_resource_list(status_record_t *status_rec_p, name_list_t *fp, 774 resource_list_t **rpp) 775 { 776 int order; 777 resource_list_t *np, *end; 778 status_record_t *srp; 779 780 np = *rpp; 781 end = np; 782 while (np) { 783 if (strcmp(fp->name, np->resource) == 0) { 784 np->not_suppressed |= status_rec_p->not_suppressed; 785 srp = np->status_rec_list->status_record; 786 order = cmp_priority(status_rec_p->severity, 787 srp->severity, status_rec_p->uurec->sec, 788 srp->uurec->sec, fp->max_pct, np->max_pct); 789 if (order > 0 && np != end) { 790 /* 791 * remove from list and add again using 792 * new priority 793 */ 794 np->prev->next = np->next; 795 np->next->prev = np->prev; 796 add_resource(status_rec_p, 797 rpp, np); 798 } else { 799 add_rec_list(status_rec_p, 800 &np->status_rec_list); 801 } 802 break; 803 } 804 np = np->next; 805 if (np == end) { 806 np = NULL; 807 break; 808 } 809 } 810 if (np == NULL) { 811 np = malloc(sizeof (resource_list_t)); 812 np->resource = fp->name; 813 np->not_suppressed = status_rec_p->not_suppressed; 814 np->status_rec_list = NULL; 815 np->max_pct = fp->max_pct; 816 add_resource(status_rec_p, rpp, np); 817 } 818 } 819 820 static void 821 add_list(status_record_t *status_rec_p, name_list_t *listp, 822 resource_list_t **glistp) 823 { 824 name_list_t *fp, *end; 825 826 fp = listp; 827 end = fp; 828 while (fp) { 829 add_resource_list(status_rec_p, fp, glistp); 830 fp = fp->next; 831 if (fp == end) 832 break; 833 } 834 } 835 836 /* 837 * add record to rec, fru and asru lists. 838 */ 839 static void 840 catalog_new_record(uurec_t *uurec_p, char *msgid, name_list_t *class, 841 name_list_t *fru, name_list_t *asru, name_list_t *resource, 842 name_list_t *serial, boolean_t not_suppressed, 843 hostid_t *hostid) 844 { 845 status_record_t *status_rec_p; 846 847 status_rec_p = new_record_init(uurec_p, msgid, class, fru, asru, 848 resource, serial, not_suppressed, hostid); 849 add_rec_list(status_rec_p, &status_rec_list); 850 if (status_rec_p->fru) 851 add_list(status_rec_p, status_rec_p->fru, &status_fru_list); 852 if (status_rec_p->asru) 853 add_list(status_rec_p, status_rec_p->asru, &status_asru_list); 854 } 855 856 /* 857 * add uuid and diagnoses time to an existing record for similar fault on the 858 * same fru 859 */ 860 static void 861 catalog_merge_record(status_record_t *status_rec_p, uurec_t *uurec_p, 862 name_list_t *asru, name_list_t *resource, name_list_t *serial, 863 boolean_t not_suppressed) 864 { 865 uurec_t *uurec1_p; 866 867 status_rec_p->nrecs++; 868 /* add uurec in time order */ 869 if (status_rec_p->uurec->sec > uurec_p->sec) { 870 uurec_p->next = status_rec_p->uurec; 871 uurec_p->prev = NULL; 872 status_rec_p->uurec = uurec_p; 873 } else { 874 uurec1_p = status_rec_p->uurec; 875 while (uurec1_p->next && uurec1_p->next->sec <= uurec_p->sec) 876 uurec1_p = uurec1_p->next; 877 if (uurec1_p->next) 878 uurec1_p->next->prev = uurec_p; 879 uurec_p->next = uurec1_p->next; 880 uurec_p->prev = uurec1_p; 881 uurec1_p->next = uurec_p; 882 } 883 status_rec_p->not_suppressed |= not_suppressed; 884 uurec_p->asru = merge_name_list(&status_rec_p->asru, asru, 0); 885 (void) merge_name_list(&status_rec_p->resource, resource, 0); 886 (void) merge_name_list(&status_rec_p->serial, serial, 0); 887 } 888 889 static status_record_t * 890 record_in_catalog(name_list_t *class, name_list_t *fru, 891 char *msgid, hostid_t *host) 892 { 893 sr_list_t *status_rec_p; 894 status_record_t *srp = NULL; 895 896 status_rec_p = status_rec_list; 897 while (status_rec_p) { 898 srp = status_rec_p->status_record; 899 if (host == srp->host && 900 cmp_name_list(class, srp->class) == 0 && 901 cmp_name_list(fru, srp->fru) == 0 && 902 strcmp(msgid, srp->msgid) == 0) 903 break; 904 if (status_rec_p->next == status_rec_list) { 905 srp = NULL; 906 break; 907 } else { 908 status_rec_p = status_rec_p->next; 909 } 910 } 911 return (srp); 912 } 913 914 static void 915 get_serial_no(nvlist_t *nvl, name_list_t **serial_p, uint8_t pct) 916 { 917 char *name; 918 char *serial = NULL; 919 char **lserial = NULL; 920 uint64_t serint; 921 name_list_t *nlp; 922 int j; 923 uint_t nelem; 924 char buf[64]; 925 926 if (nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &name) == 0) { 927 if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) { 928 if (nvlist_lookup_uint64(nvl, FM_FMRI_CPU_SERIAL_ID, 929 &serint) == 0) { 930 (void) snprintf(buf, sizeof (buf), "%llX", 931 serint); 932 nlp = alloc_name_list(buf, pct); 933 (void) merge_name_list(serial_p, nlp, 1); 934 } 935 } else if (strcmp(name, FM_FMRI_SCHEME_MEM) == 0) { 936 if (nvlist_lookup_string_array(nvl, 937 FM_FMRI_MEM_SERIAL_ID, &lserial, &nelem) == 0) { 938 nlp = alloc_name_list(lserial[0], pct); 939 for (j = 1; j < nelem; j++) { 940 name_list_t *n1lp; 941 n1lp = alloc_name_list(lserial[j], pct); 942 (void) merge_name_list(&nlp, n1lp, 1); 943 } 944 (void) merge_name_list(serial_p, nlp, 1); 945 } 946 } else if (strcmp(name, FM_FMRI_SCHEME_HC) == 0) { 947 if (nvlist_lookup_string(nvl, FM_FMRI_HC_SERIAL_ID, 948 &serial) == 0) { 949 nlp = alloc_name_list(serial, pct); 950 (void) merge_name_list(serial_p, nlp, 1); 951 } 952 } 953 } 954 } 955 956 static void 957 extract_record_info(nvlist_t *nvl, name_list_t **class_p, 958 name_list_t **fru_p, name_list_t **serial_p, 959 name_list_t **resource_p, name_list_t **asru_p, uint8_t status) 960 { 961 nvlist_t *lfru, *lasru, *rsrc; 962 name_list_t *nlp; 963 char *name; 964 uint8_t lpct = 0; 965 char *lclass = NULL; 966 char *label; 967 968 (void) nvlist_lookup_uint8(nvl, FM_FAULT_CERTAINTY, &lpct); 969 if (nvlist_lookup_string(nvl, FM_CLASS, &lclass) == 0) { 970 nlp = alloc_name_list(lclass, lpct); 971 (void) merge_name_list(class_p, nlp, 1); 972 } 973 if (nvlist_lookup_nvlist(nvl, FM_FAULT_FRU, &lfru) == 0) { 974 name = get_nvl2str_topo(lfru); 975 if (name != NULL) { 976 nlp = alloc_name_list(name, lpct); 977 nlp->status = status & ~(FM_SUSPECT_UNUSABLE | 978 FM_SUSPECT_DEGRADED); 979 free(name); 980 if (nvlist_lookup_string(nvl, FM_FAULT_LOCATION, 981 &label) == 0) 982 nlp->label = strdup(label); 983 (void) merge_name_list(fru_p, nlp, 1); 984 } 985 get_serial_no(lfru, serial_p, lpct); 986 } 987 if (nvlist_lookup_nvlist(nvl, FM_FAULT_ASRU, &lasru) == 0) { 988 name = get_nvl2str_topo(lasru); 989 if (name != NULL) { 990 nlp = alloc_name_list(name, lpct); 991 nlp->status = status & ~(FM_SUSPECT_NOT_PRESENT | 992 FM_SUSPECT_REPAIRED | FM_SUSPECT_REPLACED | 993 FM_SUSPECT_ACQUITTED); 994 free(name); 995 (void) merge_name_list(asru_p, nlp, 1); 996 } 997 get_serial_no(lasru, serial_p, lpct); 998 } 999 if (nvlist_lookup_nvlist(nvl, FM_FAULT_RESOURCE, &rsrc) == 0) { 1000 name = get_nvl2str_topo(rsrc); 1001 if (name != NULL) { 1002 nlp = alloc_name_list(name, lpct); 1003 nlp->status = status; 1004 free(name); 1005 if (nvlist_lookup_string(nvl, FM_FAULT_LOCATION, 1006 &label) == 0) 1007 nlp->label = strdup(label); 1008 (void) merge_name_list(resource_p, nlp, 1); 1009 } 1010 } 1011 } 1012 1013 static void 1014 add_fault_record_to_catalog(nvlist_t *nvl, uint64_t sec, char *uuid) 1015 { 1016 char *msgid = "-"; 1017 uint_t i, size = 0; 1018 name_list_t *class = NULL, *resource = NULL; 1019 name_list_t *asru = NULL, *fru = NULL, *serial = NULL; 1020 nvlist_t **nva; 1021 uint8_t *ba; 1022 status_record_t *status_rec_p; 1023 uurec_t *uurec_p; 1024 hostid_t *host; 1025 boolean_t not_suppressed = 1; 1026 boolean_t any_present = 0; 1027 1028 (void) nvlist_lookup_string(nvl, FM_SUSPECT_DIAG_CODE, &msgid); 1029 (void) nvlist_lookup_uint32(nvl, FM_SUSPECT_FAULT_SZ, &size); 1030 (void) nvlist_lookup_boolean_value(nvl, FM_SUSPECT_MESSAGE, 1031 ¬_suppressed); 1032 1033 if (size != 0) { 1034 (void) nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST, 1035 &nva, &size); 1036 (void) nvlist_lookup_uint8_array(nvl, FM_SUSPECT_FAULT_STATUS, 1037 &ba, &size); 1038 for (i = 0; i < size; i++) { 1039 extract_record_info(nva[i], &class, &fru, &serial, 1040 &resource, &asru, ba[i]); 1041 if (!(ba[i] & FM_SUSPECT_NOT_PRESENT) && 1042 (ba[i] & FM_SUSPECT_FAULTY)) 1043 any_present = 1; 1044 } 1045 /* 1046 * also suppress if no resources present 1047 */ 1048 if (any_present == 0) 1049 not_suppressed = 0; 1050 } 1051 1052 uurec_p = (uurec_t *)malloc(sizeof (uurec_t)); 1053 uurec_p->uuid = strdup(uuid); 1054 uurec_p->sec = sec; 1055 uurec_p->ari_uuid_list = NULL; 1056 uurec_p->event = NULL; 1057 (void) nvlist_dup(nvl, &uurec_p->event, 0); 1058 host = find_hostid(nvl); 1059 if (not_suppressed && !opt_g) 1060 status_rec_p = NULL; 1061 else 1062 status_rec_p = record_in_catalog(class, fru, msgid, host); 1063 if (status_rec_p) { 1064 catalog_merge_record(status_rec_p, uurec_p, asru, resource, 1065 serial, not_suppressed); 1066 free_name_list(class); 1067 free_name_list(fru); 1068 } else { 1069 catalog_new_record(uurec_p, msgid, class, fru, asru, 1070 resource, serial, not_suppressed, host); 1071 } 1072 } 1073 1074 static void 1075 update_asru_state_in_catalog(const char *uuid, const char *ari_uuid) 1076 { 1077 sr_list_t *srp; 1078 uurec_t *uurp; 1079 ari_list_t *ari_list; 1080 1081 srp = status_rec_list; 1082 if (srp) { 1083 for (;;) { 1084 uurp = srp->status_record->uurec; 1085 while (uurp) { 1086 if (strcmp(uuid, uurp->uuid) == 0) { 1087 ari_list = (ari_list_t *) 1088 malloc(sizeof (ari_list_t)); 1089 ari_list->ari_uuid = strdup(ari_uuid); 1090 ari_list->next = uurp->ari_uuid_list; 1091 uurp->ari_uuid_list = ari_list; 1092 return; 1093 } 1094 uurp = uurp->next; 1095 } 1096 if (srp->next == status_rec_list) 1097 break; 1098 srp = srp->next; 1099 } 1100 } 1101 } 1102 1103 static void 1104 print_line(char *label, char *buf) 1105 { 1106 char *cp, *ep, *wp; 1107 char c; 1108 int i; 1109 int lsz; 1110 char *padding; 1111 1112 lsz = strlen(label); 1113 padding = malloc(lsz + 1); 1114 for (i = 0; i < lsz; i++) 1115 padding[i] = ' '; 1116 padding[i] = 0; 1117 cp = buf; 1118 ep = buf; 1119 c = *ep; 1120 (void) printf("\n"); 1121 while (c) { 1122 i = lsz; 1123 wp = NULL; 1124 while ((c = *ep) != NULL && (wp == NULL || i < 80)) { 1125 if (c == ' ') 1126 wp = ep; 1127 else if (c == '\n') { 1128 i = 0; 1129 *ep = 0; 1130 do { 1131 ep++; 1132 } while ((c = *ep) != NULL && c == ' '); 1133 break; 1134 } 1135 ep++; 1136 i++; 1137 } 1138 if (i >= 80 && wp) { 1139 *wp = 0; 1140 ep = wp + 1; 1141 c = *ep; 1142 } 1143 (void) printf("%s%s\n", label, cp); 1144 cp = ep; 1145 label = padding; 1146 } 1147 free(padding); 1148 } 1149 1150 static void 1151 print_dict_info_line(nvlist_t *e, fmd_msg_item_t what, const char *linehdr) 1152 { 1153 char *cp = fmd_msg_getitem_nv(fmadm_msghdl, NULL, e, what); 1154 1155 if (cp) { 1156 print_line(dgettext("FMD", linehdr), cp); 1157 free(cp); 1158 } 1159 } 1160 1161 static void 1162 print_dict_info(nvlist_t *nvl) 1163 { 1164 print_dict_info_line(nvl, FMD_MSG_ITEM_DESC, "Description : "); 1165 print_dict_info_line(nvl, FMD_MSG_ITEM_RESPONSE, "Response : "); 1166 print_dict_info_line(nvl, FMD_MSG_ITEM_IMPACT, "Impact : "); 1167 print_dict_info_line(nvl, FMD_MSG_ITEM_ACTION, "Action : "); 1168 } 1169 1170 static void 1171 print_name(name_list_t *list, char *(func)(char *), char *padding, int *np, 1172 int pct, int full) 1173 { 1174 char *name, *fru_label = NULL; 1175 1176 name = list->name; 1177 if (list->label) { 1178 (void) printf("%s \"%s\" (%s)", padding, list->label, name); 1179 *np += 1; 1180 } else if (func && (fru_label = func(list->name)) != NULL) { 1181 (void) printf("%s \"%s\" (%s)", padding, fru_label, name); 1182 *np += 1; 1183 free(fru_label); 1184 } else { 1185 (void) printf("%s %s", padding, name); 1186 *np += 1; 1187 } 1188 if (list->pct && pct > 0 && pct < 100) { 1189 if (list->count > 1) { 1190 if (full) { 1191 (void) printf(" %d @ %s %d%%\n", list->count, 1192 dgettext("FMD", "max"), 1193 list->max_pct); 1194 } else { 1195 (void) printf(" %s %d%%\n", 1196 dgettext("FMD", "max"), 1197 list->max_pct); 1198 } 1199 } else { 1200 (void) printf(" %d%%\n", list->pct); 1201 } 1202 } else { 1203 (void) printf("\n"); 1204 } 1205 } 1206 1207 static void 1208 print_asru_status(int status, char *label) 1209 { 1210 char *msg = NULL; 1211 1212 switch (status) { 1213 case 0: 1214 msg = dgettext("FMD", "ok and in service"); 1215 break; 1216 case FM_SUSPECT_DEGRADED: 1217 msg = dgettext("FMD", "service degraded, " 1218 "but associated components no longer faulty"); 1219 break; 1220 case FM_SUSPECT_FAULTY | FM_SUSPECT_DEGRADED: 1221 msg = dgettext("FMD", "faulted but still " 1222 "providing degraded service"); 1223 break; 1224 case FM_SUSPECT_FAULTY: 1225 msg = dgettext("FMD", "faulted but still in service"); 1226 break; 1227 case FM_SUSPECT_UNUSABLE: 1228 msg = dgettext("FMD", "out of service, " 1229 "but associated components no longer faulty"); 1230 break; 1231 case FM_SUSPECT_FAULTY | FM_SUSPECT_UNUSABLE: 1232 msg = dgettext("FMD", "faulted and taken out of service"); 1233 break; 1234 default: 1235 break; 1236 } 1237 if (msg) { 1238 (void) printf("%s %s\n", label, msg); 1239 } 1240 } 1241 1242 static void 1243 print_fru_status(int status, char *label) 1244 { 1245 char *msg = NULL; 1246 1247 if (status & FM_SUSPECT_NOT_PRESENT) 1248 msg = dgettext("FMD", "not present"); 1249 else if (status & FM_SUSPECT_FAULTY) 1250 msg = dgettext("FMD", "faulty"); 1251 else if (status & FM_SUSPECT_REPLACED) 1252 msg = dgettext("FMD", "replaced"); 1253 else if (status & FM_SUSPECT_REPAIRED) 1254 msg = dgettext("FMD", "repair attempted"); 1255 else if (status & FM_SUSPECT_ACQUITTED) 1256 msg = dgettext("FMD", "acquitted"); 1257 else 1258 msg = dgettext("FMD", "removed"); 1259 (void) printf("%s %s\n", label, msg); 1260 } 1261 1262 static void 1263 print_rsrc_status(int status, char *label) 1264 { 1265 char *msg = ""; 1266 1267 if (status & FM_SUSPECT_NOT_PRESENT) 1268 msg = dgettext("FMD", "not present"); 1269 else if (status & FM_SUSPECT_FAULTY) { 1270 if (status & FM_SUSPECT_DEGRADED) 1271 msg = dgettext("FMD", 1272 "faulted but still providing degraded service"); 1273 else if (status & FM_SUSPECT_UNUSABLE) 1274 msg = dgettext("FMD", 1275 "faulted and taken out of service"); 1276 else 1277 msg = dgettext("FMD", "faulted but still in service"); 1278 } else if (status & FM_SUSPECT_REPLACED) 1279 msg = dgettext("FMD", "replaced"); 1280 else if (status & FM_SUSPECT_REPAIRED) 1281 msg = dgettext("FMD", "repair attempted"); 1282 else if (status & FM_SUSPECT_ACQUITTED) 1283 msg = dgettext("FMD", "acquitted"); 1284 else 1285 msg = dgettext("FMD", "removed"); 1286 (void) printf("%s %s\n", label, msg); 1287 } 1288 1289 static void 1290 print_name_list(name_list_t *list, char *label, char *(func)(char *), 1291 int limit, int pct, void (func1)(int, char *), int full) 1292 { 1293 char *name, *fru_label = NULL; 1294 char *padding; 1295 int i, j, l, n; 1296 name_list_t *end = list; 1297 1298 l = strlen(label); 1299 padding = malloc(l + 1); 1300 for (i = 0; i < l; i++) 1301 padding[i] = ' '; 1302 padding[l] = 0; 1303 (void) printf("%s", label); 1304 name = list->name; 1305 if (list->label) 1306 (void) printf(" \"%s\" (%s)", list->label, name); 1307 else if (func && (fru_label = func(list->name)) != NULL) { 1308 (void) printf(" \"%s\" (%s)", fru_label, name); 1309 free(fru_label); 1310 } else 1311 (void) printf(" %s", name); 1312 if (list->pct && pct > 0 && pct < 100) { 1313 if (list->count > 1) { 1314 if (full) { 1315 (void) printf(" %d @ %s %d%%\n", list->count, 1316 dgettext("FMD", "max"), list->max_pct); 1317 } else { 1318 (void) printf(" %s %d%%\n", 1319 dgettext("FMD", "max"), list->max_pct); 1320 } 1321 } else { 1322 (void) printf(" %d%%\n", list->pct); 1323 } 1324 } else { 1325 (void) printf("\n"); 1326 } 1327 if (func1) 1328 func1(list->status, padding); 1329 n = 1; 1330 j = 0; 1331 while ((list = list->next) != end) { 1332 if (limit == 0 || n < limit) { 1333 print_name(list, func, padding, &n, pct, full); 1334 if (func1) 1335 func1(list->status, padding); 1336 } else 1337 j++; 1338 } 1339 if (j == 1) { 1340 print_name(list->prev, func, padding, &n, pct, full); 1341 } else if (j > 1) { 1342 (void) printf("%s... %d %s\n", padding, j, 1343 dgettext("FMD", "more entries suppressed," 1344 " use -v option for full list")); 1345 } 1346 free(padding); 1347 } 1348 1349 static int 1350 asru_same_status(name_list_t *list) 1351 { 1352 name_list_t *end = list; 1353 int status = list->status; 1354 1355 while ((list = list->next) != end) { 1356 if (status == -1) { 1357 status = list->status; 1358 continue; 1359 } 1360 if (list->status != -1 && status != list->status) { 1361 status = -1; 1362 break; 1363 } 1364 } 1365 return (status); 1366 } 1367 1368 static int 1369 serial_in_fru(name_list_t *fru, name_list_t *serial) 1370 { 1371 name_list_t *sp = serial; 1372 name_list_t *fp; 1373 int nserial = 0; 1374 int found = 0; 1375 char buf[128]; 1376 1377 while (sp) { 1378 fp = fru; 1379 nserial++; 1380 (void) snprintf(buf, sizeof (buf), "serial=%s", sp->name); 1381 buf[sizeof (buf) - 1] = 0; 1382 while (fp) { 1383 if (strstr(fp->name, buf) != NULL) { 1384 found++; 1385 break; 1386 } 1387 fp = fp->next; 1388 if (fp == fru) 1389 break; 1390 } 1391 sp = sp->next; 1392 if (sp == serial) 1393 break; 1394 } 1395 return (found == nserial ? 1 : 0); 1396 } 1397 1398 static void 1399 print_sup_record(status_record_t *srp, int opt_i, int full) 1400 { 1401 char buf[32]; 1402 uurec_t *uurp = srp->uurec; 1403 int n, j, k, max; 1404 int status; 1405 ari_list_t *ari_list; 1406 1407 n = 0; 1408 max = max_fault; 1409 if (max < 0) { 1410 max = 0; 1411 } 1412 j = max / 2; 1413 max -= j; 1414 k = srp->nrecs - max; 1415 while ((uurp = uurp->next) != NULL) { 1416 if (full || n < j || n >= k || max_fault == 0 || 1417 srp->nrecs == max_fault+1) { 1418 if (opt_i) { 1419 ari_list = uurp->ari_uuid_list; 1420 while (ari_list) { 1421 (void) printf("%-15s %s\n", 1422 format_date(buf, sizeof (buf), 1423 uurp->sec), ari_list->ari_uuid); 1424 ari_list = ari_list->next; 1425 } 1426 } else { 1427 (void) printf("%-15s %s\n", 1428 format_date(buf, sizeof (buf), uurp->sec), 1429 uurp->uuid); 1430 } 1431 } else if (n == j) 1432 (void) printf("... %d %s\n", srp->nrecs - max_fault, 1433 dgettext("FMD", "more entries suppressed")); 1434 n++; 1435 } 1436 (void) printf("\n"); 1437 (void) printf("%s %s", dgettext("FMD", "Host :"), 1438 srp->host->server); 1439 if (srp->host->domain) 1440 (void) printf("\t%s %s", dgettext("FMD", "Domain :"), 1441 srp->host->domain); 1442 (void) printf("\n%s %s", dgettext("FMD", "Platform :"), 1443 srp->host->platform); 1444 (void) printf("\t%s %s\n\n", dgettext("FMD", "Chassis_id :"), 1445 srp->host->chassis ? srp->host->chassis : ""); 1446 if (srp->class) 1447 print_name_list(srp->class, 1448 dgettext("FMD", "Fault class :"), NULL, 0, srp->class->pct, 1449 NULL, full); 1450 if (srp->asru) { 1451 status = asru_same_status(srp->asru); 1452 if (status != -1) { 1453 print_name_list(srp->asru, 1454 dgettext("FMD", "Affects :"), NULL, 1455 full ? 0 : max_display, 0, NULL, full); 1456 print_asru_status(status, " "); 1457 } else 1458 print_name_list(srp->asru, 1459 dgettext("FMD", "Affects :"), NULL, 1460 full ? 0 : max_display, 0, print_asru_status, full); 1461 } 1462 if (full || srp->fru == NULL || srp->asru == NULL) { 1463 if (srp->resource) { 1464 print_name_list(srp->resource, 1465 dgettext("FMD", "Problem in :"), 1466 NULL, full ? 0 : max_display, 0, print_rsrc_status, 1467 full); 1468 } 1469 } 1470 if (srp->fru) { 1471 status = asru_same_status(srp->fru); 1472 if (status != -1) { 1473 print_name_list(srp->fru, dgettext("FMD", 1474 "FRU :"), get_fmri_label, 0, 1475 srp->fru->pct == 100 ? 100 : srp->fru->max_pct, 1476 NULL, full); 1477 print_fru_status(status, " "); 1478 } else 1479 print_name_list(srp->fru, dgettext("FMD", 1480 "FRU :"), get_fmri_label, 0, 1481 srp->fru->pct == 100 ? 100 : srp->fru->max_pct, 1482 print_fru_status, full); 1483 } 1484 if (srp->serial && !serial_in_fru(srp->fru, srp->serial) && 1485 !serial_in_fru(srp->asru, srp->serial)) { 1486 print_name_list(srp->serial, dgettext("FMD", "Serial ID. :"), 1487 NULL, 0, 0, NULL, full); 1488 } 1489 print_dict_info(srp->uurec->event); 1490 (void) printf("\n"); 1491 } 1492 1493 static void 1494 print_status_record(status_record_t *srp, int summary, int opt_i, int full) 1495 { 1496 char buf[32]; 1497 uurec_t *uurp = srp->uurec; 1498 static int header = 0; 1499 char *head; 1500 ari_list_t *ari_list; 1501 1502 if (!summary || !header) { 1503 if (opt_i) { 1504 head = "--------------- " 1505 "------------------------------------ " 1506 "-------------- ---------\n" 1507 "TIME CACHE-ID" 1508 " MSG-ID" 1509 " SEVERITY\n--------------- " 1510 "------------------------------------ " 1511 " -------------- ---------"; 1512 } else { 1513 head = "--------------- " 1514 "------------------------------------ " 1515 "-------------- ---------\n" 1516 "TIME EVENT-ID" 1517 " MSG-ID" 1518 " SEVERITY\n--------------- " 1519 "------------------------------------ " 1520 " -------------- ---------"; 1521 } 1522 (void) printf("%s\n", dgettext("FMD", head)); 1523 header = 1; 1524 } 1525 if (opt_i) { 1526 ari_list = uurp->ari_uuid_list; 1527 while (ari_list) { 1528 (void) printf("%-15s %-37s %-14s %-9s\n", 1529 format_date(buf, sizeof (buf), uurp->sec), 1530 ari_list->ari_uuid, srp->msgid, srp->severity); 1531 ari_list = ari_list->next; 1532 } 1533 } else { 1534 (void) printf("%-15s %-37s %-14s %-9s\n", 1535 format_date(buf, sizeof (buf), uurp->sec), 1536 uurp->uuid, srp->msgid, srp->severity); 1537 } 1538 1539 if (!summary) 1540 print_sup_record(srp, opt_i, full); 1541 } 1542 1543 static void 1544 print_catalog(int summary, int opt_a, int full, int opt_i, int page_feed) 1545 { 1546 status_record_t *srp; 1547 sr_list_t *slp; 1548 1549 slp = status_rec_list; 1550 if (slp) { 1551 for (;;) { 1552 srp = slp->status_record; 1553 if (opt_a || srp->not_suppressed) { 1554 if (page_feed) 1555 (void) printf("\f\n"); 1556 print_status_record(srp, summary, opt_i, full); 1557 } 1558 if (slp->next == status_rec_list) 1559 break; 1560 slp = slp->next; 1561 } 1562 } 1563 } 1564 1565 static name_list_t * 1566 find_fru(status_record_t *srp, char *resource) 1567 { 1568 name_list_t *rt = NULL; 1569 name_list_t *fru = srp->fru; 1570 1571 while (fru) { 1572 if (strcmp(resource, fru->name) == 0) { 1573 rt = fru; 1574 break; 1575 } 1576 fru = fru->next; 1577 if (fru == srp->fru) 1578 break; 1579 } 1580 return (rt); 1581 } 1582 1583 static void 1584 print_fru_line(name_list_t *fru, char *uuid) 1585 { 1586 if (fru->pct == 100) { 1587 (void) printf("%s %d %s %d%%\n", uuid, fru->count, 1588 dgettext("FMD", "suspects in this FRU total certainty"), 1589 100); 1590 } else { 1591 (void) printf("%s %d %s %d%%\n", uuid, fru->count, 1592 dgettext("FMD", "suspects in this FRU max certainty"), 1593 fru->max_pct); 1594 } 1595 } 1596 1597 static void 1598 print_fru(int summary, int opt_a, int opt_i, int page_feed) 1599 { 1600 resource_list_t *tp = status_fru_list; 1601 status_record_t *srp; 1602 sr_list_t *slp, *end; 1603 char *msgid, *fru_label; 1604 uurec_t *uurp; 1605 name_list_t *fru; 1606 int status; 1607 ari_list_t *ari_list; 1608 1609 while (tp) { 1610 if (opt_a || tp->not_suppressed) { 1611 if (page_feed) 1612 (void) printf("\f\n"); 1613 if (!summary) 1614 (void) printf("-----------------------------" 1615 "---------------------------------------" 1616 "----------\n"); 1617 slp = tp->status_rec_list; 1618 end = slp; 1619 do { 1620 srp = slp->status_record; 1621 fru = find_fru(srp, tp->resource); 1622 if (fru) { 1623 if (fru->label) 1624 (void) printf("\"%s\" (%s) ", 1625 fru->label, fru->name); 1626 else if ((fru_label = get_fmri_label( 1627 fru->name)) != NULL) { 1628 (void) printf("\"%s\" (%s) ", 1629 fru_label, fru->name); 1630 free(fru_label); 1631 } else 1632 (void) printf("%s ", 1633 fru->name); 1634 break; 1635 } 1636 slp = slp->next; 1637 } while (slp != end); 1638 1639 slp = tp->status_rec_list; 1640 end = slp; 1641 status = 0; 1642 do { 1643 srp = slp->status_record; 1644 fru = srp->fru; 1645 while (fru) { 1646 if (strcmp(tp->resource, 1647 fru->name) == 0) 1648 status |= fru->status; 1649 fru = fru->next; 1650 if (fru == srp->fru) 1651 break; 1652 } 1653 slp = slp->next; 1654 } while (slp != end); 1655 if (status & FM_SUSPECT_NOT_PRESENT) 1656 (void) printf(dgettext("FMD", "not present\n")); 1657 else if (status & FM_SUSPECT_FAULTY) 1658 (void) printf(dgettext("FMD", "faulty\n")); 1659 else if (status & FM_SUSPECT_REPLACED) 1660 (void) printf(dgettext("FMD", "replaced\n")); 1661 else if (status & FM_SUSPECT_REPAIRED) 1662 (void) printf(dgettext("FMD", 1663 "repair attempted\n")); 1664 else if (status & FM_SUSPECT_ACQUITTED) 1665 (void) printf(dgettext("FMD", "acquitted\n")); 1666 else 1667 (void) printf(dgettext("FMD", "removed\n")); 1668 1669 slp = tp->status_rec_list; 1670 end = slp; 1671 do { 1672 srp = slp->status_record; 1673 uurp = srp->uurec; 1674 fru = find_fru(srp, tp->resource); 1675 if (fru) { 1676 if (opt_i) { 1677 ari_list = uurp->ari_uuid_list; 1678 while (ari_list) { 1679 print_fru_line(fru, 1680 ari_list->ari_uuid); 1681 ari_list = 1682 ari_list->next; 1683 } 1684 } else { 1685 print_fru_line(fru, uurp->uuid); 1686 } 1687 } 1688 slp = slp->next; 1689 } while (slp != end); 1690 if (!summary) { 1691 slp = tp->status_rec_list; 1692 end = slp; 1693 srp = slp->status_record; 1694 if (srp->serial && 1695 !serial_in_fru(srp->fru, srp->serial)) { 1696 print_name_list(srp->serial, 1697 dgettext("FMD", "Serial ID. :"), 1698 NULL, 0, 0, NULL, 1); 1699 } 1700 msgid = NULL; 1701 do { 1702 if (msgid == NULL || 1703 strcmp(msgid, srp->msgid) != 0) { 1704 msgid = srp->msgid; 1705 print_dict_info(uurp->event); 1706 } 1707 slp = slp->next; 1708 } while (slp != end); 1709 } 1710 } 1711 tp = tp->next; 1712 if (tp == status_fru_list) 1713 break; 1714 } 1715 } 1716 1717 static void 1718 print_asru(int opt_a) 1719 { 1720 resource_list_t *tp = status_asru_list; 1721 status_record_t *srp; 1722 sr_list_t *slp, *end; 1723 char *msg; 1724 int status; 1725 name_list_t *asru; 1726 1727 while (tp) { 1728 if (opt_a || tp->not_suppressed) { 1729 status = 0; 1730 slp = tp->status_rec_list; 1731 end = slp; 1732 do { 1733 srp = slp->status_record; 1734 asru = srp->asru; 1735 while (asru) { 1736 if (strcmp(tp->resource, 1737 asru->name) == 0) 1738 status |= asru->status; 1739 asru = asru->next; 1740 if (asru == srp->asru) 1741 break; 1742 } 1743 slp = slp->next; 1744 } while (slp != end); 1745 switch (status) { 1746 case 0: 1747 msg = dgettext("FMD", "ok"); 1748 break; 1749 case FM_SUSPECT_DEGRADED: 1750 msg = dgettext("FMD", "degraded"); 1751 break; 1752 case FM_SUSPECT_FAULTY | FM_SUSPECT_DEGRADED: 1753 msg = dgettext("FMD", "degraded"); 1754 break; 1755 case FM_SUSPECT_FAULTY: 1756 msg = dgettext("FMD", "degraded"); 1757 break; 1758 case FM_SUSPECT_UNUSABLE: 1759 msg = dgettext("FMD", "unknown"); 1760 break; 1761 case FM_SUSPECT_FAULTY | FM_SUSPECT_UNUSABLE: 1762 msg = dgettext("FMD", "faulted"); 1763 break; 1764 default: 1765 msg = ""; 1766 break; 1767 } 1768 (void) printf("%-69s %s\n", tp->resource, msg); 1769 } 1770 tp = tp->next; 1771 if (tp == status_asru_list) 1772 break; 1773 } 1774 } 1775 1776 static int 1777 uuid_in_list(char *uuid, uurec_select_t *uurecp) 1778 { 1779 while (uurecp) { 1780 if (strcmp(uuid, uurecp->uuid) == 0) 1781 return (1); 1782 uurecp = uurecp->next; 1783 } 1784 return (0); 1785 } 1786 1787 static int 1788 dfault_rec(const fmd_adm_caseinfo_t *acp, void *arg) 1789 { 1790 int64_t *diag_time; 1791 uint_t nelem; 1792 int rt = 0; 1793 char *uuid = "-"; 1794 uurec_select_t *uurecp = (uurec_select_t *)arg; 1795 1796 if (nvlist_lookup_int64_array(acp->aci_event, FM_SUSPECT_DIAG_TIME, 1797 &diag_time, &nelem) == 0 && nelem >= 2) { 1798 (void) nvlist_lookup_string(acp->aci_event, FM_SUSPECT_UUID, 1799 &uuid); 1800 if (uurecp == NULL || uuid_in_list(uuid, uurecp)) 1801 add_fault_record_to_catalog(acp->aci_event, *diag_time, 1802 uuid); 1803 } else { 1804 rt = -1; 1805 } 1806 return (rt); 1807 } 1808 1809 /*ARGSUSED*/ 1810 static int 1811 dstatus_rec(const fmd_adm_rsrcinfo_t *ari, void *unused) 1812 { 1813 update_asru_state_in_catalog(ari->ari_case, ari->ari_uuid); 1814 return (0); 1815 } 1816 1817 static int 1818 get_cases_from_fmd(fmd_adm_t *adm, uurec_select_t *uurecp, int opt_i) 1819 { 1820 int rt = FMADM_EXIT_SUCCESS; 1821 1822 /* 1823 * These calls may fail with Protocol error if message payload is 1824 * too big 1825 */ 1826 if (fmd_adm_case_iter(adm, NULL, dfault_rec, uurecp) != 0) 1827 die("failed to get case list from fmd"); 1828 if (opt_i && fmd_adm_rsrc_iter(adm, 1, dstatus_rec, NULL) != 0) 1829 die("failed to get case status from fmd"); 1830 return (rt); 1831 } 1832 1833 /* 1834 * fmadm faulty command 1835 * 1836 * -a show hidden fault records 1837 * -f show faulty fru's 1838 * -g force grouping of similar faults on the same fru 1839 * -n number of fault records to display 1840 * -p pipe output through pager 1841 * -r show faulty asru's 1842 * -s print summary of first fault 1843 * -u print listed uuid's only 1844 * -v full output 1845 */ 1846 1847 int 1848 cmd_faulty(fmd_adm_t *adm, int argc, char *argv[]) 1849 { 1850 int opt_a = 0, opt_v = 0, opt_p = 0, opt_s = 0, opt_r = 0, opt_f = 0; 1851 int opt_i = 0; 1852 char *pager; 1853 FILE *fp; 1854 int rt, c, stat; 1855 uurec_select_t *tp; 1856 uurec_select_t *uurecp = NULL; 1857 1858 while ((c = getopt(argc, argv, "afgin:prsu:v")) != EOF) { 1859 switch (c) { 1860 case 'a': 1861 opt_a++; 1862 break; 1863 case 'f': 1864 opt_f++; 1865 break; 1866 case 'g': 1867 opt_g++; 1868 break; 1869 case 'i': 1870 opt_i++; 1871 break; 1872 case 'n': 1873 max_fault = atoi(optarg); 1874 break; 1875 case 'p': 1876 opt_p++; 1877 break; 1878 case 'r': 1879 opt_r++; 1880 break; 1881 case 's': 1882 opt_s++; 1883 break; 1884 case 'u': 1885 tp = (uurec_select_t *)malloc(sizeof (uurec_select_t)); 1886 tp->uuid = optarg; 1887 tp->next = uurecp; 1888 uurecp = tp; 1889 opt_a = 1; 1890 break; 1891 case 'v': 1892 opt_v++; 1893 break; 1894 default: 1895 return (FMADM_EXIT_USAGE); 1896 } 1897 } 1898 if (optind < argc) 1899 return (FMADM_EXIT_USAGE); 1900 1901 if ((fmadm_msghdl = fmd_msg_init(NULL, FMD_MSG_VERSION)) == NULL) 1902 return (FMADM_EXIT_ERROR); 1903 rt = get_cases_from_fmd(adm, uurecp, opt_i); 1904 if (opt_p) { 1905 if ((pager = getenv("PAGER")) == NULL) 1906 pager = "/usr/bin/more"; 1907 fp = popen(pager, "w"); 1908 if (fp == NULL) { 1909 rt = FMADM_EXIT_ERROR; 1910 opt_p = 0; 1911 } else { 1912 dup2(fileno(fp), 1); 1913 setbuf(stdout, NULL); 1914 (void) fclose(fp); 1915 } 1916 } 1917 max_display = max_fault; 1918 if (opt_f) 1919 print_fru(opt_s, opt_a, opt_i, opt_p && !opt_s); 1920 if (opt_r) 1921 print_asru(opt_a); 1922 if (opt_f == 0 && opt_r == 0) 1923 print_catalog(opt_s, opt_a, opt_v, opt_i, opt_p && !opt_s); 1924 fmd_msg_fini(fmadm_msghdl); 1925 label_release_topo(); 1926 if (opt_p) { 1927 (void) fclose(stdout); 1928 (void) wait(&stat); 1929 } 1930 return (rt); 1931 } 1932 1933 int 1934 cmd_flush(fmd_adm_t *adm, int argc, char *argv[]) 1935 { 1936 int i, status = FMADM_EXIT_SUCCESS; 1937 1938 if (argc < 2 || (i = getopt(argc, argv, "")) != EOF) 1939 return (FMADM_EXIT_USAGE); 1940 1941 for (i = 1; i < argc; i++) { 1942 if (fmd_adm_rsrc_flush(adm, argv[i]) != 0) { 1943 warn("failed to flush %s", argv[i]); 1944 status = FMADM_EXIT_ERROR; 1945 } else 1946 note("flushed resource history for %s\n", argv[i]); 1947 } 1948 1949 return (status); 1950 } 1951 1952 int 1953 cmd_repair(fmd_adm_t *adm, int argc, char *argv[]) 1954 { 1955 int err; 1956 1957 if (getopt(argc, argv, "") != EOF) 1958 return (FMADM_EXIT_USAGE); 1959 1960 if (argc - optind != 1) 1961 return (FMADM_EXIT_USAGE); 1962 1963 /* 1964 * argument could be a uuid, an fmri (asru, fru or resource) 1965 * or a label. Try uuid first, If that fails try the others. 1966 */ 1967 err = fmd_adm_case_repair(adm, argv[optind]); 1968 if (err != 0) 1969 err = fmd_adm_rsrc_repaired(adm, argv[optind]); 1970 1971 if (err != 0) 1972 die("failed to record repair to %s", argv[optind]); 1973 1974 note("recorded repair to %s\n", argv[optind]); 1975 return (FMADM_EXIT_SUCCESS); 1976 } 1977 1978 int 1979 cmd_repaired(fmd_adm_t *adm, int argc, char *argv[]) 1980 { 1981 int err; 1982 1983 if (getopt(argc, argv, "") != EOF) 1984 return (FMADM_EXIT_USAGE); 1985 1986 if (argc - optind != 1) 1987 return (FMADM_EXIT_USAGE); 1988 1989 /* 1990 * argument could be an fmri (asru, fru or resource) or a label. 1991 */ 1992 err = fmd_adm_rsrc_repaired(adm, argv[optind]); 1993 if (err != 0) 1994 die("failed to record repair to %s", argv[optind]); 1995 1996 note("recorded repair to of %s\n", argv[optind]); 1997 return (FMADM_EXIT_SUCCESS); 1998 } 1999 2000 int 2001 cmd_replaced(fmd_adm_t *adm, int argc, char *argv[]) 2002 { 2003 int err; 2004 2005 if (getopt(argc, argv, "") != EOF) 2006 return (FMADM_EXIT_USAGE); 2007 2008 if (argc - optind != 1) 2009 return (FMADM_EXIT_USAGE); 2010 2011 /* 2012 * argument could be an fmri (asru, fru or resource) or a label. 2013 */ 2014 err = fmd_adm_rsrc_replaced(adm, argv[optind]); 2015 if (err != 0) 2016 die("failed to record replacement of %s", argv[optind]); 2017 2018 note("recorded replacement of %s\n", argv[optind]); 2019 return (FMADM_EXIT_SUCCESS); 2020 } 2021 2022 int 2023 cmd_acquit(fmd_adm_t *adm, int argc, char *argv[]) 2024 { 2025 int err; 2026 2027 if (getopt(argc, argv, "") != EOF) 2028 return (FMADM_EXIT_USAGE); 2029 2030 if (argc - optind != 1 && argc - optind != 2) 2031 return (FMADM_EXIT_USAGE); 2032 2033 /* 2034 * argument could be a uuid, an fmri (asru, fru or resource) 2035 * or a label. Or it could be a uuid and an fmri or label. 2036 */ 2037 if (argc - optind == 2) { 2038 err = fmd_adm_rsrc_acquit(adm, argv[optind], argv[optind + 1]); 2039 if (err != 0) 2040 err = fmd_adm_rsrc_acquit(adm, argv[optind + 1], 2041 argv[optind]); 2042 } else { 2043 err = fmd_adm_case_acquit(adm, argv[optind]); 2044 if (err != 0) 2045 err = fmd_adm_rsrc_acquit(adm, argv[optind], ""); 2046 } 2047 2048 if (err != 0) 2049 die("failed to record acquital of %s", argv[optind]); 2050 2051 note("recorded acquital of %s\n", argv[optind]); 2052 return (FMADM_EXIT_SUCCESS); 2053 } 2054