1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <fmadm.h> 28 #include <errno.h> 29 #include <limits.h> 30 #include <strings.h> 31 #include <stdio.h> 32 #include <unistd.h> 33 #include <sys/wait.h> 34 #include <sys/stat.h> 35 #include <fcntl.h> 36 #include <fm/fmd_log.h> 37 #include <sys/fm/protocol.h> 38 #include <fm/libtopo.h> 39 #include <fm/fmd_adm.h> 40 #include <dlfcn.h> 41 #include <sys/systeminfo.h> 42 #include <sys/utsname.h> 43 #include <libintl.h> 44 #include <locale.h> 45 #include <sys/smbios.h> 46 #include <libdevinfo.h> 47 #include <stdlib.h> 48 49 #define offsetof(s, m) ((size_t)(&(((s*)0)->m))) 50 51 /* 52 * catalog_setup() must be called to setup support functions. 53 * Fault records are added to catalog by calling add_fault_record_to_catalog() 54 * records are stored in order of importance to the system. 55 * If -g flag is set or not_suppressed is not set and the class fru, fault, 56 * type are the same then details are merged into an existing record, with uuid 57 * records are stored in time order. 58 * For each record information is extracted from nvlist and merged into linked 59 * list each is checked for identical records for which percentage certainty are 60 * added together. 61 * print_catalog() is called to print out catalog and release external resources 62 * 63 * /---------------\ 64 * status_rec_list -> | | -| 65 * \---------------/ 66 * \/ 67 * /---------------\ /-------\ /-------\ 68 * status_fru_list | status_record | -> | uurec | -> | uurec | -| 69 * \/ | | |- | | <- | | 70 * /-------------\ | | \-------/ \-------/ 71 * | | -> | | \/ \/ 72 * \-------------/ | | /-------\ /-------\ 73 * \/ | | -> | asru | -> | asru | 74 * --- | | | | <- | | 75 * | | \-------/ \-------/ 76 * status_asru_list | class | 77 * \/ | resource | /-------\ /-------\ 78 * /-------------\ | fru | -> | list | -> | list | 79 * | | -> | serial | | | <- | | 80 * \-------------/ | | \-------/ \-------/ 81 * \/ \---------------/ 82 * --- \/ /\ 83 * /---------------\ 84 * | status_record | 85 * \---------------/ 86 * 87 * Fmadm faulty takes a number of options which affect the format of the 88 * output displayed. By default, the display reports the FRU and ASRU along 89 * with other information on per-case basis as in the example below. 90 * 91 * --------------- ------------------------------------ -------------- ------- 92 * TIME EVENT-ID MSG-ID SEVERITY 93 * --------------- ------------------------------------ -------------- ------- 94 * Sep 21 10:01:36 d482f935-5c8f-e9ab-9f25-d0aaafec1e6c AMD-8000-2F Major 95 * 96 * Fault class : fault.memory.dimm_sb 97 * Affects : mem:///motherboard=0/chip=0/memory-controller=0/dimm=0/rank=0 98 * faulted but still in service 99 * FRU : "CPU 0 DIMM 0" (hc://.../memory-controller=0/dimm=0) 100 * faulty 101 * 102 * Description : The number of errors associated with this memory module has 103 * exceeded acceptable levels. Refer to 104 * http://sun.com/msg/AMD-8000-2F for more information. 105 * 106 * Response : Pages of memory associated with this memory module are being 107 * removed from service as errors are reported. 108 * 109 * Impact : Total system memory capacity will be reduced as pages are 110 * retired. 111 * 112 * Action : Schedule a repair procedure to replace the affected memory 113 * module. Use fmdump -v -u <EVENT_ID> to identify the module. 114 * 115 * The -v flag is similar, but adds some additonal information such as the 116 * resource. The -s flag is also similar but just gives the top line summary. 117 * All these options (ie without the -f or -r flags) use the print_catalog() 118 * function to do the display. 119 * 120 * The -f flag changes the output so that it appears sorted on a per-fru basis. 121 * The output is somewhat cut down compared to the default output. If -f is 122 * used, then print_fru() is used to print the output. 123 * 124 * ----------------------------------------------------------------------------- 125 * "SLOT 2" (hc://.../hostbridge=3/pciexrc=3/pciexbus=4/pciexdev=0) faulty 126 * 5ca4aeb3-36...f6be-c2e8166dc484 2 suspects in this FRU total certainty 100% 127 * 128 * Description : A problem was detected for a PCI device. 129 * Refer to http://sun.com/msg/PCI-8000-7J for more information. 130 * 131 * Response : One or more device instances may be disabled 132 * 133 * Impact : Possible loss of services provided by the device instances 134 * associated with this fault 135 * 136 * Action : Schedule a repair procedure to replace the affected device. 137 * Use fmdump -v -u <EVENT_ID> to identify the device or contact 138 * Sun for support. 139 * 140 * The -r flag changes the output so that it appears sorted on a per-asru basis. 141 * The output is very much cut down compared to the default output, just giving 142 * the asru fmri and state. Here print_asru() is used to print the output. 143 * 144 * mem:///motherboard=0/chip=0/memory-controller=0/dimm=0/rank=0 degraded 145 * 146 * For all fmadm faulty options, the sequence of events is 147 * 148 * 1) Walk through all the cases in the system using fmd_adm_case_iter() and 149 * for each case call dfault_rec(). This will call add_fault_record_to_catalog() 150 * This will extract the data from the nvlist and call catalog_new_record() to 151 * save the data away in various linked lists in the catalogue. 152 * 153 * 2) Once this is done, the data can be supplemented by using 154 * fmd_adm_rsrc_iter(). However this is now only necessary for the -i option. 155 * 156 * 3) Finally print_catalog(), print_fru() or print_asru() are called as 157 * appropriate to display the information from the catalogue sorted in the 158 * requested way. 159 * 160 */ 161 162 typedef struct name_list { 163 struct name_list *next; 164 struct name_list *prev; 165 char *name; 166 uint8_t pct; 167 uint8_t max_pct; 168 ushort_t count; 169 int status; 170 char *label; 171 } name_list_t; 172 173 typedef struct ari_list { 174 char *ari_uuid; 175 struct ari_list *next; 176 } ari_list_t; 177 178 typedef struct uurec { 179 struct uurec *next; 180 struct uurec *prev; 181 char *uuid; 182 ari_list_t *ari_uuid_list; 183 name_list_t *asru; 184 uint64_t sec; 185 } uurec_t; 186 187 typedef struct uurec_select { 188 struct uurec_select *next; 189 char *uuid; 190 } uurec_select_t; 191 192 typedef struct host_id { 193 char *chassis; 194 char *server; 195 char *platform; 196 } hostid_t; 197 198 typedef struct host_id_list { 199 hostid_t hostid; 200 struct host_id_list *next; 201 } host_id_list_t; 202 203 typedef struct status_record { 204 hostid_t *host; 205 int nrecs; 206 uurec_t *uurec; 207 char *severity; /* in C locale */ 208 char *msgid; 209 name_list_t *class; 210 name_list_t *resource; 211 name_list_t *asru; 212 name_list_t *fru; 213 name_list_t *serial; 214 char *url; 215 uint8_t not_suppressed; 216 } status_record_t; 217 218 typedef struct sr_list { 219 struct sr_list *next; 220 struct sr_list *prev; 221 struct status_record *status_record; 222 } sr_list_t; 223 224 typedef struct resource_list { 225 struct resource_list *next; 226 struct resource_list *prev; 227 sr_list_t *status_rec_list; 228 char *resource; 229 uint8_t not_suppressed; 230 uint8_t max_pct; 231 } resource_list_t; 232 233 typedef struct tgetlabel_data { 234 char *label; 235 char *fru; 236 } tgetlabel_data_t; 237 238 sr_list_t *status_rec_list; 239 resource_list_t *status_fru_list; 240 resource_list_t *status_asru_list; 241 242 static char *locale; 243 static char *nlspath; 244 static int max_display; 245 static int max_fault = 0; 246 static topo_hdl_t *topo_handle; 247 static char *topo_handle_uuid; 248 static host_id_list_t *host_list; 249 static int n_server; 250 static int opt_g; 251 252 static char * 253 format_date(char *buf, size_t len, uint64_t sec) 254 { 255 if (sec > LONG_MAX) { 256 (void) fprintf(stderr, 257 "record time is too large for 32-bit utility\n"); 258 (void) snprintf(buf, len, "0x%llx", sec); 259 } else { 260 time_t tod = (time_t)sec; 261 (void) strftime(buf, len, "%b %d %T", localtime(&tod)); 262 } 263 264 return (buf); 265 } 266 267 static hostid_t * 268 find_hostid_in_list(char *platform, char *chassis, char *server) 269 { 270 hostid_t *rt = NULL; 271 host_id_list_t *hostp; 272 273 if (platform == NULL) 274 platform = "-"; 275 if (server == NULL) 276 server = "-"; 277 hostp = host_list; 278 while (hostp) { 279 if (hostp->hostid.platform && 280 strcmp(hostp->hostid.platform, platform) == 0 && 281 hostp->hostid.server && 282 strcmp(hostp->hostid.server, server) == 0 && 283 (chassis == NULL || hostp->hostid.chassis == NULL || 284 strcmp(chassis, hostp->hostid.chassis) == 0)) { 285 rt = &hostp->hostid; 286 break; 287 } 288 hostp = hostp->next; 289 } 290 if (rt == NULL) { 291 hostp = malloc(sizeof (host_id_list_t)); 292 hostp->hostid.platform = strdup(platform); 293 hostp->hostid.server = strdup(server); 294 hostp->hostid.chassis = chassis ? strdup(chassis) : NULL; 295 hostp->next = host_list; 296 host_list = hostp; 297 rt = &hostp->hostid; 298 n_server++; 299 } 300 return (rt); 301 } 302 303 static hostid_t * 304 find_hostid(nvlist_t *nvl) 305 { 306 char *platform = NULL, *chassis = NULL, *server = NULL; 307 nvlist_t *auth, *fmri; 308 hostid_t *rt = NULL; 309 310 if (nvlist_lookup_nvlist(nvl, FM_SUSPECT_DE, &fmri) == 0 && 311 nvlist_lookup_nvlist(fmri, FM_FMRI_AUTHORITY, &auth) == 0) { 312 (void) nvlist_lookup_string(auth, FM_FMRI_AUTH_PRODUCT, 313 &platform); 314 (void) nvlist_lookup_string(auth, FM_FMRI_AUTH_SERVER, &server); 315 (void) nvlist_lookup_string(auth, FM_FMRI_AUTH_CHASSIS, 316 &chassis); 317 rt = find_hostid_in_list(platform, chassis, server); 318 } 319 return (rt); 320 } 321 322 static void 323 catalog_setup(void) 324 { 325 char *tp; 326 int pl; 327 328 /* 329 * All FMA event dictionaries use msgfmt(1) message objects to produce 330 * messages, even for the C locale. We therefore want to use dgettext 331 * for all message lookups, but its defined behavior in the C locale is 332 * to return the input string. Since our input strings are event codes 333 * and not format strings, this doesn't help us. We resolve this nit 334 * by setting NLSPATH to a non-existent file: the presence of NLSPATH 335 * is defined to force dgettext(3C) to do a full lookup even for C. 336 */ 337 nlspath = getenv("NLSPATH"); 338 if (nlspath == NULL) 339 putenv("NLSPATH=/usr/lib/fm/fmd/fmd.cat"); 340 else { 341 pl = strlen(nlspath) + sizeof ("NLSPATH=") + 1; 342 tp = malloc(pl); 343 (void) snprintf(tp, pl, "NLSPATH=%s", nlspath); 344 nlspath = tp; 345 } 346 347 locale = setlocale(LC_MESSAGES, ""); 348 } 349 350 static char * 351 get_dict_url(char *id) 352 { 353 char *url = "http://sun.com/msg/"; 354 int msz = sizeof (url) + strlen(id) + 1; 355 char *cp; 356 357 cp = malloc(msz); 358 (void) snprintf(cp, msz, "%s%s", url, id); 359 return (cp); 360 } 361 362 static char * 363 get_dict_msg(char *id, char *idx, int unknown, int translate) 364 { 365 char mbuf[128]; 366 char *msg; 367 char dbuf[32]; 368 char *p; 369 int restore_env = 0; 370 int restore_locale = 0; 371 372 p = strchr(id, '-'); 373 if (p == NULL || p == id || (p - id) >= 32) { 374 msg = mbuf; 375 } else { 376 strncpy(dbuf, id, (size_t)(p - id)); 377 dbuf[(size_t)(p - id)] = 0; 378 379 (void) snprintf(mbuf, sizeof (mbuf), "%s.%s", id, idx); 380 if (translate == 0 || nlspath == NULL) { 381 (void) setlocale(LC_MESSAGES, "C"); 382 restore_locale = 1; 383 } 384 bindtextdomain("FMD", "/usr/lib/locale"); 385 msg = dgettext(dbuf, mbuf); 386 if (msg == mbuf) { 387 (void) setlocale(LC_MESSAGES, "C"); 388 restore_locale = 1; 389 msg = dgettext(dbuf, mbuf); 390 } 391 if (msg == mbuf) { 392 putenv("NLSPATH=/usr/lib/fm/fmd/fmd.cat"); 393 restore_env = 1; 394 (void) setlocale(LC_MESSAGES, "C"); 395 msg = dgettext(dbuf, mbuf); 396 } 397 if (restore_locale) 398 (void) setlocale(LC_MESSAGES, locale); 399 if (restore_env && nlspath) 400 putenv(nlspath); 401 } 402 if (msg == mbuf) { 403 if (unknown) 404 msg = "unknown"; 405 else 406 msg = NULL; 407 } 408 return (msg); 409 } 410 411 /* 412 * compare two fru strings which are made up of substrings seperated by '/' 413 * return true if every substring is the same in the two strings, or if a 414 * substring is null in one. 415 */ 416 417 static int 418 frucmp(char *f1, char *f2) 419 { 420 char c1, c2; 421 int i = 0; 422 423 for (;;) { 424 c1 = *f1; 425 c2 = *f2; 426 if (c1 == c2) { 427 i = (c1 == '/') ? 0 : i + 1; 428 } else if (i == 0) { 429 if (c1 == '/') { 430 do { 431 f2++; 432 } while ((c2 = *f2) != 0 && c2 != '/'); 433 if (c2 == NULL) 434 break; 435 } else if (c2 == '/') { 436 do { 437 f1++; 438 } while ((c1 = *f1) != 0 && c1 != '/'); 439 if (c1 == NULL) 440 break; 441 } else 442 break; 443 } else 444 break; 445 if (c1 == NULL) 446 return (0); 447 f1++; 448 f2++; 449 } 450 return (1); 451 } 452 453 static int 454 tgetlabel(topo_hdl_t *thp, tnode_t *node, void *arg) 455 { 456 int err; 457 char *fru_name, *lname; 458 nvlist_t *fru = NULL; 459 int rt = TOPO_WALK_NEXT; 460 tgetlabel_data_t *tdp = (tgetlabel_data_t *)arg; 461 462 if (topo_node_fru(node, &fru, NULL, &err) == 0) { 463 if (topo_fmri_nvl2str(thp, fru, &fru_name, &err) == 0) { 464 if (frucmp(tdp->fru, fru_name) == 0 && 465 topo_node_label(node, &lname, &err) == 0) { 466 tdp->label = strdup(lname); 467 topo_hdl_strfree(thp, lname); 468 rt = TOPO_WALK_TERMINATE; 469 } 470 topo_hdl_strfree(thp, fru_name); 471 } 472 nvlist_free(fru); 473 } 474 return (rt); 475 } 476 477 static void 478 label_get_topo(void) 479 { 480 int err; 481 482 topo_handle = topo_open(TOPO_VERSION, 0, &err); 483 if (topo_handle) { 484 topo_handle_uuid = topo_snap_hold(topo_handle, NULL, &err); 485 } 486 } 487 488 static void 489 label_release_topo(void) 490 { 491 if (topo_handle_uuid) 492 topo_hdl_strfree(topo_handle, topo_handle_uuid); 493 if (topo_handle) { 494 topo_snap_release(topo_handle); 495 topo_close(topo_handle); 496 } 497 } 498 499 static char * 500 get_fmri_label(char *fru) 501 { 502 topo_walk_t *twp; 503 tgetlabel_data_t td; 504 int err; 505 506 td.label = NULL; 507 td.fru = fru; 508 if (topo_handle == NULL) 509 label_get_topo(); 510 if (topo_handle_uuid) { 511 twp = topo_walk_init(topo_handle, FM_FMRI_SCHEME_HC, 512 tgetlabel, &td, &err); 513 if (twp) { 514 topo_walk_step(twp, TOPO_WALK_CHILD); 515 topo_walk_fini(twp); 516 } 517 } 518 return (td.label); 519 } 520 521 static char * 522 get_nvl2str_topo(nvlist_t *nvl) 523 { 524 char *name = NULL; 525 char *tname; 526 int err; 527 char *scheme = NULL; 528 char *mod_name = NULL; 529 char buf[128]; 530 531 if (topo_handle == NULL) 532 label_get_topo(); 533 if (topo_fmri_nvl2str(topo_handle, nvl, &tname, &err) == 0) { 534 name = strdup(tname); 535 topo_hdl_strfree(topo_handle, tname); 536 } else { 537 (void) nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &scheme); 538 (void) nvlist_lookup_string(nvl, FM_FMRI_MOD_NAME, &mod_name); 539 if (scheme && strcmp(scheme, FM_FMRI_SCHEME_FMD) == 0 && 540 mod_name) { 541 (void) snprintf(buf, sizeof (buf), "%s:///module/%s", 542 scheme, mod_name); 543 name = strdup(buf); 544 } 545 } 546 return (name); 547 } 548 549 static int 550 set_priority(char *s) 551 { 552 int rt = 0; 553 554 if (s) { 555 if (strcmp(s, "Minor") == 0) 556 rt = 1; 557 else if (strcmp(s, "Major") == 0) 558 rt = 10; 559 else if (strcmp(s, "Critical") == 0) 560 rt = 100; 561 } 562 return (rt); 563 } 564 565 static int 566 cmp_priority(char *s1, char *s2, uint64_t t1, uint64_t t2, uint8_t p1, 567 uint8_t p2) 568 { 569 int r1, r2; 570 int rt; 571 572 r1 = set_priority(s1); 573 r2 = set_priority(s2); 574 rt = r1 - r2; 575 if (rt == 0) { 576 if (t1 > t2) 577 rt = 1; 578 else if (t1 < t2) 579 rt = -1; 580 else 581 rt = p1 - p2; 582 } 583 return (rt); 584 } 585 586 /* 587 * merge two lists into one, by comparing enties in new and moving into list if 588 * name is not there or free off memory for names which are already there 589 * add_pct indicates if pct is the sum or highest pct 590 */ 591 static name_list_t * 592 merge_name_list(name_list_t **list, name_list_t *new, int add_pct) 593 { 594 name_list_t *lp, *np, *sp, *rt = NULL; 595 int max_pct; 596 597 rt = *list; 598 np = new; 599 while (np) { 600 lp = *list; 601 while (lp) { 602 if (strcmp(lp->name, np->name) == 0) 603 break; 604 lp = lp->next; 605 if (lp == *list) 606 lp = NULL; 607 } 608 if (np->next == new) 609 sp = NULL; 610 else 611 sp = np->next; 612 if (lp) { 613 lp->status |= (np->status & FM_SUSPECT_FAULTY); 614 if (add_pct) { 615 lp->pct += np->pct; 616 lp->count += np->count; 617 } else if (np->pct > lp->pct) { 618 lp->pct = np->pct; 619 } 620 max_pct = np->max_pct; 621 if (np->label) 622 free(np->label); 623 free(np->name); 624 free(np); 625 np = NULL; 626 if (max_pct > lp->max_pct) { 627 lp->max_pct = max_pct; 628 if (lp->max_pct > lp->prev->max_pct && 629 lp != *list) { 630 lp->prev->next = lp->next; 631 lp->next->prev = lp->prev; 632 np = lp; 633 } 634 } 635 } 636 if (np) { 637 lp = *list; 638 if (lp) { 639 if (np->max_pct > lp->max_pct) { 640 np->next = lp; 641 np->prev = lp->prev; 642 lp->prev->next = np; 643 lp->prev = np; 644 *list = np; 645 rt = np; 646 } else { 647 lp = lp->next; 648 while (lp != *list && 649 np->max_pct < lp->max_pct) { 650 lp = lp->next; 651 } 652 np->next = lp; 653 np->prev = lp->prev; 654 lp->prev->next = np; 655 lp->prev = np; 656 } 657 } else { 658 *list = np; 659 np->next = np; 660 np->prev = np; 661 rt = np; 662 } 663 } 664 np = sp; 665 } 666 return (rt); 667 } 668 669 /* 670 * compare entries in two lists return true if the two lists have identical 671 * content. The two lists may not have entries in the same order, so we compare 672 * the size of the list as well as trying to find every entry from one list in 673 * the other. 674 */ 675 static int 676 cmp_name_list(name_list_t *lxp1, name_list_t *lxp2) 677 { 678 name_list_t *lp1, *lp2; 679 int l1 = 0, l2 = 0, common = 0; 680 681 lp2 = lxp2; 682 while (lp2) { 683 l2++; 684 lp2 = lp2->next; 685 if (lp2 == lxp2) 686 break; 687 } 688 lp1 = lxp1; 689 while (lp1) { 690 l1++; 691 lp2 = lxp2; 692 while (lp2) { 693 if (strcmp(lp2->name, lp1->name) == 0) { 694 common++; 695 break; 696 } 697 lp2 = lp2->next; 698 if (lp2 == lxp2) 699 break; 700 } 701 lp1 = lp1->next; 702 if (lp1 == lxp1) 703 break; 704 } 705 if (l1 == l2 && l2 == common) 706 return (0); 707 else 708 return (1); 709 } 710 711 static name_list_t * 712 alloc_name_list(char *name, uint8_t pct) 713 { 714 name_list_t *nlp; 715 716 nlp = malloc(sizeof (*nlp)); 717 nlp->name = strdup(name); 718 nlp->pct = pct; 719 nlp->max_pct = pct; 720 nlp->count = 1; 721 nlp->next = nlp; 722 nlp->prev = nlp; 723 nlp->status = 0; 724 nlp->label = NULL; 725 return (nlp); 726 } 727 728 static void 729 free_name_list(name_list_t *list) 730 { 731 name_list_t *next = list; 732 name_list_t *lp; 733 734 if (list) { 735 do { 736 lp = next; 737 next = lp->next; 738 if (lp->label) 739 free(lp->label); 740 free(lp->name); 741 free(lp); 742 } while (next != list); 743 } 744 } 745 746 static status_record_t * 747 new_record_init(uurec_t *uurec_p, char *msgid, name_list_t *class, 748 name_list_t *fru, name_list_t *asru, name_list_t *resource, 749 name_list_t *serial, const char *url, boolean_t not_suppressed, 750 hostid_t *hostid) 751 { 752 status_record_t *status_rec_p; 753 754 status_rec_p = (status_record_t *)malloc(sizeof (status_record_t)); 755 status_rec_p->nrecs = 1; 756 status_rec_p->host = hostid; 757 status_rec_p->uurec = uurec_p; 758 uurec_p->next = NULL; 759 uurec_p->prev = NULL; 760 uurec_p->asru = asru; 761 status_rec_p->severity = get_dict_msg(msgid, "severity", 1, 0); 762 status_rec_p->class = class; 763 status_rec_p->fru = fru; 764 status_rec_p->asru = asru; 765 status_rec_p->resource = resource; 766 status_rec_p->serial = serial; 767 status_rec_p->url = url ? strdup(url) : NULL; 768 status_rec_p->msgid = strdup(msgid); 769 status_rec_p->not_suppressed = not_suppressed; 770 return (status_rec_p); 771 } 772 773 /* 774 * add record to given list maintaining order higher priority first. 775 */ 776 static void 777 add_rec_list(status_record_t *status_rec_p, sr_list_t **list_pp) 778 { 779 sr_list_t *tp, *np, *sp; 780 int order; 781 uint64_t sec; 782 783 np = malloc(sizeof (sr_list_t)); 784 np->status_record = status_rec_p; 785 sec = status_rec_p->uurec->sec; 786 if ((sp = *list_pp) == NULL) { 787 *list_pp = np; 788 np->next = np; 789 np->prev = np; 790 } else { 791 /* insert new record in front of lower priority */ 792 tp = sp; 793 order = cmp_priority(status_rec_p->severity, 794 sp->status_record->severity, sec, 795 tp->status_record->uurec->sec, 0, 0); 796 if (order > 0) { 797 *list_pp = np; 798 } else { 799 tp = sp->next; 800 while (tp != sp && 801 cmp_priority(status_rec_p->severity, 802 tp->status_record->severity, sec, 803 tp->status_record->uurec->sec, 0, 0)) { 804 tp = tp->next; 805 } 806 } 807 np->next = tp; 808 np->prev = tp->prev; 809 tp->prev->next = np; 810 tp->prev = np; 811 } 812 } 813 814 static void 815 add_resource(status_record_t *status_rec_p, resource_list_t **rp, 816 resource_list_t *np) 817 { 818 int order; 819 uint64_t sec; 820 resource_list_t *sp, *tp; 821 status_record_t *srp; 822 char *severity = status_rec_p->severity; 823 824 add_rec_list(status_rec_p, &np->status_rec_list); 825 if ((sp = *rp) == NULL) { 826 np->next = np; 827 np->prev = np; 828 *rp = np; 829 } else { 830 /* 831 * insert new record in front of lower priority 832 */ 833 tp = sp->next; 834 srp = sp->status_rec_list->status_record; 835 sec = status_rec_p->uurec->sec; 836 order = cmp_priority(severity, srp->severity, sec, 837 srp->uurec->sec, np->max_pct, sp->max_pct); 838 if (order > 0) { 839 *rp = np; 840 } else { 841 srp = tp->status_rec_list->status_record; 842 while (tp != sp && 843 cmp_priority(severity, srp->severity, sec, 844 srp->uurec->sec, np->max_pct, sp->max_pct) < 0) { 845 tp = tp->next; 846 srp = tp->status_rec_list->status_record; 847 } 848 } 849 np->next = tp; 850 np->prev = tp->prev; 851 tp->prev->next = np; 852 tp->prev = np; 853 } 854 } 855 856 static void 857 add_resource_list(status_record_t *status_rec_p, name_list_t *fp, 858 resource_list_t **rpp) 859 { 860 int order; 861 resource_list_t *np, *end; 862 status_record_t *srp; 863 864 np = *rpp; 865 end = np; 866 while (np) { 867 if (strcmp(fp->name, np->resource) == 0) { 868 np->not_suppressed |= status_rec_p->not_suppressed; 869 srp = np->status_rec_list->status_record; 870 order = cmp_priority(status_rec_p->severity, 871 srp->severity, status_rec_p->uurec->sec, 872 srp->uurec->sec, fp->max_pct, np->max_pct); 873 if (order > 0 && np != end) { 874 /* 875 * remove from list and add again using 876 * new priority 877 */ 878 np->prev->next = np->next; 879 np->next->prev = np->prev; 880 add_resource(status_rec_p, 881 rpp, np); 882 } else { 883 add_rec_list(status_rec_p, 884 &np->status_rec_list); 885 } 886 break; 887 } 888 np = np->next; 889 if (np == end) { 890 np = NULL; 891 break; 892 } 893 } 894 if (np == NULL) { 895 np = malloc(sizeof (resource_list_t)); 896 np->resource = fp->name; 897 np->not_suppressed = status_rec_p->not_suppressed; 898 np->status_rec_list = NULL; 899 np->max_pct = fp->max_pct; 900 add_resource(status_rec_p, rpp, np); 901 } 902 } 903 904 static void 905 add_list(status_record_t *status_rec_p, name_list_t *listp, 906 resource_list_t **glistp) 907 { 908 name_list_t *fp, *end; 909 910 fp = listp; 911 end = fp; 912 while (fp) { 913 add_resource_list(status_rec_p, fp, glistp); 914 fp = fp->next; 915 if (fp == end) 916 break; 917 } 918 } 919 920 /* 921 * add record to rec, fru and asru lists. 922 */ 923 static void 924 catalog_new_record(uurec_t *uurec_p, char *msgid, name_list_t *class, 925 name_list_t *fru, name_list_t *asru, name_list_t *resource, 926 name_list_t *serial, const char *url, boolean_t not_suppressed, 927 hostid_t *hostid) 928 { 929 status_record_t *status_rec_p; 930 931 status_rec_p = new_record_init(uurec_p, msgid, class, fru, asru, 932 resource, serial, url, not_suppressed, hostid); 933 add_rec_list(status_rec_p, &status_rec_list); 934 if (status_rec_p->fru) 935 add_list(status_rec_p, status_rec_p->fru, &status_fru_list); 936 if (status_rec_p->asru) 937 add_list(status_rec_p, status_rec_p->asru, &status_asru_list); 938 } 939 940 /* 941 * add uuid and diagnoses time to an existing record for similar fault on the 942 * same fru 943 */ 944 static void 945 catalog_merge_record(status_record_t *status_rec_p, uurec_t *uurec_p, 946 name_list_t *asru, name_list_t *resource, name_list_t *serial, 947 const char *url, boolean_t not_suppressed) 948 { 949 uurec_t *uurec1_p; 950 951 status_rec_p->nrecs++; 952 /* add uurec in time order */ 953 if (status_rec_p->uurec->sec > uurec_p->sec) { 954 uurec_p->next = status_rec_p->uurec; 955 uurec_p->prev = NULL; 956 status_rec_p->uurec = uurec_p; 957 } else { 958 uurec1_p = status_rec_p->uurec; 959 while (uurec1_p->next && uurec1_p->next->sec <= uurec_p->sec) 960 uurec1_p = uurec1_p->next; 961 if (uurec1_p->next) 962 uurec1_p->next->prev = uurec_p; 963 uurec_p->next = uurec1_p->next; 964 uurec_p->prev = uurec1_p; 965 uurec1_p->next = uurec_p; 966 } 967 if (status_rec_p->url == NULL && url != NULL) 968 status_rec_p->url = strdup(url); 969 status_rec_p->not_suppressed |= not_suppressed; 970 uurec_p->asru = merge_name_list(&status_rec_p->asru, asru, 0); 971 (void) merge_name_list(&status_rec_p->resource, resource, 0); 972 (void) merge_name_list(&status_rec_p->serial, serial, 0); 973 } 974 975 static status_record_t * 976 record_in_catalog(name_list_t *class, name_list_t *fru, 977 char *msgid, hostid_t *host) 978 { 979 sr_list_t *status_rec_p; 980 status_record_t *srp = NULL; 981 982 status_rec_p = status_rec_list; 983 while (status_rec_p) { 984 srp = status_rec_p->status_record; 985 if (host == srp->host && 986 cmp_name_list(class, srp->class) == 0 && 987 cmp_name_list(fru, srp->fru) == 0 && 988 strcmp(msgid, srp->msgid) == 0) 989 break; 990 if (status_rec_p->next == status_rec_list) { 991 srp = NULL; 992 break; 993 } else { 994 status_rec_p = status_rec_p->next; 995 } 996 } 997 return (srp); 998 } 999 1000 static void 1001 get_serial_no(nvlist_t *nvl, name_list_t **serial_p, uint8_t pct) 1002 { 1003 char *name; 1004 char *serial = NULL; 1005 char **lserial = NULL; 1006 uint64_t serint; 1007 name_list_t *nlp; 1008 int j; 1009 uint_t nelem; 1010 char buf[64]; 1011 1012 if (nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &name) == 0) { 1013 if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) { 1014 if (nvlist_lookup_uint64(nvl, FM_FMRI_CPU_SERIAL_ID, 1015 &serint) == 0) { 1016 (void) snprintf(buf, sizeof (buf), "%llX", 1017 serint); 1018 nlp = alloc_name_list(buf, pct); 1019 (void) merge_name_list(serial_p, nlp, 1); 1020 } 1021 } else if (strcmp(name, FM_FMRI_SCHEME_MEM) == 0) { 1022 if (nvlist_lookup_string_array(nvl, 1023 FM_FMRI_MEM_SERIAL_ID, &lserial, &nelem) == 0) { 1024 nlp = alloc_name_list(lserial[0], pct); 1025 for (j = 1; j < nelem; j++) { 1026 name_list_t *n1lp; 1027 n1lp = alloc_name_list(lserial[j], pct); 1028 (void) merge_name_list(&nlp, n1lp, 1); 1029 } 1030 (void) merge_name_list(serial_p, nlp, 1); 1031 } 1032 } else if (strcmp(name, FM_FMRI_SCHEME_HC) == 0) { 1033 if (nvlist_lookup_string(nvl, FM_FMRI_HC_SERIAL_ID, 1034 &serial) == 0) { 1035 nlp = alloc_name_list(serial, pct); 1036 (void) merge_name_list(serial_p, nlp, 1); 1037 } 1038 } 1039 } 1040 } 1041 1042 static void 1043 extract_record_info(nvlist_t *nvl, name_list_t **class_p, 1044 name_list_t **fru_p, name_list_t **serial_p, 1045 name_list_t **resource_p, name_list_t **asru_p, uint8_t status) 1046 { 1047 nvlist_t *lfru, *lasru, *rsrc; 1048 name_list_t *nlp; 1049 char *name; 1050 uint8_t lpct = 0; 1051 char *lclass = NULL; 1052 char *label; 1053 1054 (void) nvlist_lookup_uint8(nvl, FM_FAULT_CERTAINTY, &lpct); 1055 if (nvlist_lookup_string(nvl, FM_CLASS, &lclass) == 0) { 1056 nlp = alloc_name_list(lclass, lpct); 1057 (void) merge_name_list(class_p, nlp, 1); 1058 } 1059 if (nvlist_lookup_nvlist(nvl, FM_FAULT_FRU, &lfru) == 0) { 1060 name = get_nvl2str_topo(lfru); 1061 if (name != NULL) { 1062 nlp = alloc_name_list(name, lpct); 1063 nlp->status = status & ~(FM_SUSPECT_UNUSABLE | 1064 FM_SUSPECT_DEGRADED); 1065 free(name); 1066 if (nvlist_lookup_string(nvl, FM_FAULT_LOCATION, 1067 &label) == 0) 1068 nlp->label = strdup(label); 1069 (void) merge_name_list(fru_p, nlp, 1); 1070 } 1071 get_serial_no(lfru, serial_p, lpct); 1072 } 1073 if (nvlist_lookup_nvlist(nvl, FM_FAULT_ASRU, &lasru) == 0) { 1074 name = get_nvl2str_topo(lasru); 1075 if (name != NULL) { 1076 nlp = alloc_name_list(name, lpct); 1077 nlp->status = status & ~(FM_SUSPECT_NOT_PRESENT | 1078 FM_SUSPECT_REPAIRED | FM_SUSPECT_REPLACED | 1079 FM_SUSPECT_ACQUITTED); 1080 free(name); 1081 (void) merge_name_list(asru_p, nlp, 1); 1082 } 1083 get_serial_no(lasru, serial_p, lpct); 1084 } 1085 if (nvlist_lookup_nvlist(nvl, FM_FAULT_RESOURCE, &rsrc) == 0) { 1086 name = get_nvl2str_topo(rsrc); 1087 if (name != NULL) { 1088 nlp = alloc_name_list(name, lpct); 1089 nlp->status = status; 1090 free(name); 1091 if (nvlist_lookup_string(nvl, FM_FAULT_LOCATION, 1092 &label) == 0) 1093 nlp->label = strdup(label); 1094 (void) merge_name_list(resource_p, nlp, 1); 1095 } 1096 } 1097 } 1098 1099 static void 1100 add_fault_record_to_catalog(nvlist_t *nvl, uint64_t sec, char *uuid, 1101 const char *url) 1102 { 1103 char *msgid = "-"; 1104 uint_t i, size = 0; 1105 name_list_t *class = NULL, *resource = NULL; 1106 name_list_t *asru = NULL, *fru = NULL, *serial = NULL; 1107 nvlist_t **nva; 1108 uint8_t *ba; 1109 status_record_t *status_rec_p; 1110 uurec_t *uurec_p; 1111 hostid_t *host; 1112 boolean_t not_suppressed = 1; 1113 boolean_t any_present = 0; 1114 1115 (void) nvlist_lookup_string(nvl, FM_SUSPECT_DIAG_CODE, &msgid); 1116 (void) nvlist_lookup_uint32(nvl, FM_SUSPECT_FAULT_SZ, &size); 1117 (void) nvlist_lookup_boolean_value(nvl, FM_SUSPECT_MESSAGE, 1118 ¬_suppressed); 1119 1120 if (size != 0) { 1121 (void) nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST, 1122 &nva, &size); 1123 (void) nvlist_lookup_uint8_array(nvl, FM_SUSPECT_FAULT_STATUS, 1124 &ba, &size); 1125 for (i = 0; i < size; i++) { 1126 extract_record_info(nva[i], &class, &fru, &serial, 1127 &resource, &asru, ba[i]); 1128 if (!(ba[i] & FM_SUSPECT_NOT_PRESENT) && 1129 (ba[i] & FM_SUSPECT_FAULTY)) 1130 any_present = 1; 1131 } 1132 /* 1133 * also suppress if no resources present 1134 */ 1135 if (any_present == 0) 1136 not_suppressed = 0; 1137 } 1138 1139 uurec_p = (uurec_t *)malloc(sizeof (uurec_t)); 1140 uurec_p->uuid = strdup(uuid); 1141 uurec_p->sec = sec; 1142 uurec_p->ari_uuid_list = NULL; 1143 host = find_hostid(nvl); 1144 if (not_suppressed && !opt_g) 1145 status_rec_p = NULL; 1146 else 1147 status_rec_p = record_in_catalog(class, fru, msgid, host); 1148 if (status_rec_p) { 1149 catalog_merge_record(status_rec_p, uurec_p, asru, resource, 1150 serial, url, not_suppressed); 1151 free_name_list(class); 1152 free_name_list(fru); 1153 } else { 1154 catalog_new_record(uurec_p, msgid, class, fru, asru, 1155 resource, serial, url, not_suppressed, host); 1156 } 1157 } 1158 1159 static void 1160 update_asru_state_in_catalog(const char *uuid, const char *ari_uuid) 1161 { 1162 sr_list_t *srp; 1163 uurec_t *uurp; 1164 ari_list_t *ari_list; 1165 1166 srp = status_rec_list; 1167 if (srp) { 1168 for (;;) { 1169 uurp = srp->status_record->uurec; 1170 while (uurp) { 1171 if (strcmp(uuid, uurp->uuid) == 0) { 1172 ari_list = (ari_list_t *) 1173 malloc(sizeof (ari_list_t)); 1174 ari_list->ari_uuid = strdup(ari_uuid); 1175 ari_list->next = uurp->ari_uuid_list; 1176 uurp->ari_uuid_list = ari_list; 1177 return; 1178 } 1179 uurp = uurp->next; 1180 } 1181 if (srp->next == status_rec_list) 1182 break; 1183 srp = srp->next; 1184 } 1185 } 1186 } 1187 1188 static void 1189 print_line(char *label, char *buf) 1190 { 1191 char *cp, *ep, *wp; 1192 char c; 1193 int i; 1194 int lsz; 1195 char *padding; 1196 1197 lsz = strlen(label); 1198 padding = malloc(lsz + 1); 1199 for (i = 0; i < lsz; i++) 1200 padding[i] = ' '; 1201 padding[i] = 0; 1202 cp = buf; 1203 ep = buf; 1204 c = *ep; 1205 (void) printf("\n"); 1206 while (c) { 1207 i = lsz; 1208 wp = NULL; 1209 while ((c = *ep) != NULL && (wp == NULL || i < 80)) { 1210 if (c == ' ') 1211 wp = ep; 1212 else if (c == '\n') { 1213 i = 0; 1214 *ep = 0; 1215 do { 1216 ep++; 1217 } while ((c = *ep) != NULL && c == ' '); 1218 break; 1219 } 1220 ep++; 1221 i++; 1222 } 1223 if (i >= 80 && wp) { 1224 *wp = 0; 1225 ep = wp + 1; 1226 c = *ep; 1227 } 1228 (void) printf("%s%s\n", label, cp); 1229 cp = ep; 1230 label = padding; 1231 } 1232 free(padding); 1233 } 1234 1235 static void 1236 print_dict_info(char *msgid, char *url) 1237 { 1238 const char *cp; 1239 char *l_url; 1240 char *buf; 1241 int bufsz; 1242 1243 cp = get_dict_msg(msgid, "description", 0, 1); 1244 if (cp) { 1245 if (url) 1246 l_url = url; 1247 else 1248 l_url = get_dict_url(msgid); 1249 bufsz = strlen(cp) + strlen(l_url) + 1; 1250 buf = malloc(bufsz); 1251 (void) snprintf(buf, bufsz, cp, l_url); 1252 print_line(dgettext("FMD", "Description : "), buf); 1253 free(buf); 1254 if (!url) 1255 free(l_url); 1256 } 1257 cp = get_dict_msg(msgid, "response", 0, 1); 1258 if (cp) { 1259 buf = strdup(cp); 1260 print_line(dgettext("FMD", "Response : "), buf); 1261 free(buf); 1262 } 1263 cp = get_dict_msg(msgid, "impact", 0, 1); 1264 if (cp) { 1265 buf = strdup(cp); 1266 print_line(dgettext("FMD", "Impact : "), buf); 1267 free(buf); 1268 } 1269 cp = get_dict_msg(msgid, "action", 0, 1); 1270 if (cp) { 1271 buf = strdup(cp); 1272 print_line(dgettext("FMD", "Action : "), buf); 1273 free(buf); 1274 } 1275 } 1276 1277 static void 1278 print_name(name_list_t *list, char *(func)(char *), char *padding, int *np, 1279 int pct, int full) 1280 { 1281 char *name, *fru_label = NULL; 1282 1283 name = list->name; 1284 if (list->label) { 1285 (void) printf("%s \"%s\" (%s)", padding, list->label, name); 1286 *np += 1; 1287 } else if (func && (fru_label = func(list->name)) != NULL) { 1288 (void) printf("%s \"%s\" (%s)", padding, fru_label, name); 1289 *np += 1; 1290 free(fru_label); 1291 } else { 1292 (void) printf("%s %s", padding, name); 1293 *np += 1; 1294 } 1295 if (list->pct && pct > 0 && pct < 100) { 1296 if (list->count > 1) { 1297 if (full) { 1298 (void) printf(" %d @ %s %d%%\n", list->count, 1299 dgettext("FMD", "max"), 1300 list->max_pct); 1301 } else { 1302 (void) printf(" %s %d%%\n", 1303 dgettext("FMD", "max"), 1304 list->max_pct); 1305 } 1306 } else { 1307 (void) printf(" %d%%\n", list->pct); 1308 } 1309 } else { 1310 (void) printf("\n"); 1311 } 1312 } 1313 1314 static void 1315 print_asru_status(int status, char *label) 1316 { 1317 char *msg = NULL; 1318 1319 switch (status) { 1320 case 0: 1321 msg = dgettext("FMD", "ok and in service"); 1322 break; 1323 case FM_SUSPECT_DEGRADED: 1324 msg = dgettext("FMD", "service degraded, " 1325 "but associated components no longer faulty"); 1326 break; 1327 case FM_SUSPECT_FAULTY | FM_SUSPECT_DEGRADED: 1328 msg = dgettext("FMD", "faulted but still " 1329 "providing degraded service"); 1330 break; 1331 case FM_SUSPECT_FAULTY: 1332 msg = dgettext("FMD", "faulted but still in service"); 1333 break; 1334 case FM_SUSPECT_UNUSABLE: 1335 msg = dgettext("FMD", "out of service, " 1336 "but associated components no longer faulty"); 1337 break; 1338 case FM_SUSPECT_FAULTY | FM_SUSPECT_UNUSABLE: 1339 msg = dgettext("FMD", "faulted and taken out of service"); 1340 break; 1341 default: 1342 break; 1343 } 1344 if (msg) { 1345 (void) printf("%s %s\n", label, msg); 1346 } 1347 } 1348 1349 static void 1350 print_fru_status(int status, char *label) 1351 { 1352 char *msg = NULL; 1353 1354 if (status & FM_SUSPECT_NOT_PRESENT) 1355 msg = dgettext("FMD", "not present"); 1356 else if (status & FM_SUSPECT_FAULTY) 1357 msg = dgettext("FMD", "faulty"); 1358 else if (status & FM_SUSPECT_REPLACED) 1359 msg = dgettext("FMD", "replaced"); 1360 else if (status & FM_SUSPECT_REPAIRED) 1361 msg = dgettext("FMD", "repair attempted"); 1362 else if (status & FM_SUSPECT_ACQUITTED) 1363 msg = dgettext("FMD", "acquitted"); 1364 else 1365 msg = dgettext("FMD", "removed"); 1366 (void) printf("%s %s\n", label, msg); 1367 } 1368 1369 static void 1370 print_name_list(name_list_t *list, char *label, char *(func)(char *), 1371 int limit, int pct, void (func1)(int, char *), int full) 1372 { 1373 char *name, *fru_label = NULL; 1374 char *padding; 1375 int i, j, l, n; 1376 name_list_t *end = list; 1377 1378 l = strlen(label); 1379 padding = malloc(l + 1); 1380 for (i = 0; i < l; i++) 1381 padding[i] = ' '; 1382 padding[l] = 0; 1383 (void) printf("%s", label); 1384 name = list->name; 1385 if (list->label) 1386 (void) printf(" \"%s\" (%s)", list->label, name); 1387 else if (func && (fru_label = func(list->name)) != NULL) { 1388 (void) printf(" \"%s\" (%s)", fru_label, name); 1389 free(fru_label); 1390 } else 1391 (void) printf(" %s", name); 1392 if (list->pct && pct > 0 && pct < 100) { 1393 if (list->count > 1) { 1394 if (full) { 1395 (void) printf(" %d @ %s %d%%\n", list->count, 1396 dgettext("FMD", "max"), list->max_pct); 1397 } else { 1398 (void) printf(" %s %d%%\n", 1399 dgettext("FMD", "max"), list->max_pct); 1400 } 1401 } else { 1402 (void) printf(" %d%%\n", list->pct); 1403 } 1404 } else { 1405 (void) printf("\n"); 1406 } 1407 if (func1) 1408 func1(list->status, padding); 1409 n = 1; 1410 j = 0; 1411 while ((list = list->next) != end) { 1412 if (limit == 0 || n < limit) { 1413 print_name(list, func, padding, &n, pct, full); 1414 if (func1) 1415 func1(list->status, padding); 1416 } else 1417 j++; 1418 } 1419 if (j == 1) { 1420 print_name(list->prev, func, padding, &n, pct, full); 1421 } else if (j > 1) { 1422 (void) printf("%s... %d %s\n", padding, j, 1423 dgettext("FMD", "more entries suppressed," 1424 " use -v option for full list")); 1425 } 1426 free(padding); 1427 } 1428 1429 static int 1430 asru_same_status(name_list_t *list) 1431 { 1432 name_list_t *end = list; 1433 int status = list->status; 1434 1435 while ((list = list->next) != end) { 1436 if (status == -1) { 1437 status = list->status; 1438 continue; 1439 } 1440 if (list->status != -1 && status != list->status) { 1441 status = -1; 1442 break; 1443 } 1444 } 1445 return (status); 1446 } 1447 1448 static int 1449 serial_in_fru(name_list_t *fru, name_list_t *serial) 1450 { 1451 name_list_t *sp = serial; 1452 name_list_t *fp; 1453 int nserial = 0; 1454 int found = 0; 1455 char buf[128]; 1456 1457 while (sp) { 1458 fp = fru; 1459 nserial++; 1460 (void) snprintf(buf, sizeof (buf), "serial=%s", sp->name); 1461 buf[sizeof (buf) - 1] = 0; 1462 while (fp) { 1463 if (strstr(fp->name, buf) != NULL) { 1464 found++; 1465 break; 1466 } 1467 fp = fp->next; 1468 if (fp == fru) 1469 break; 1470 } 1471 sp = sp->next; 1472 if (sp == serial) 1473 break; 1474 } 1475 return (found == nserial ? 1 : 0); 1476 } 1477 1478 static void 1479 print_server_name(hostid_t *host, char *label) 1480 { 1481 (void) printf("%s %s %s %s\n", label, host->server, host->platform, 1482 host->chassis ? host->chassis : ""); 1483 } 1484 1485 static void 1486 print_sup_record(status_record_t *srp, int opt_i, int full) 1487 { 1488 char buf[32]; 1489 uurec_t *uurp = srp->uurec; 1490 int n, j, k, max; 1491 int status; 1492 ari_list_t *ari_list; 1493 1494 n = 0; 1495 max = max_fault; 1496 if (max < 0) { 1497 max = 0; 1498 } 1499 j = max / 2; 1500 max -= j; 1501 k = srp->nrecs - max; 1502 while ((uurp = uurp->next) != NULL) { 1503 if (full || n < j || n >= k || max_fault == 0 || 1504 srp->nrecs == max_fault+1) { 1505 if (opt_i) { 1506 ari_list = uurp->ari_uuid_list; 1507 while (ari_list) { 1508 (void) printf("%-15s %s\n", 1509 format_date(buf, sizeof (buf), 1510 uurp->sec), ari_list->ari_uuid); 1511 ari_list = ari_list->next; 1512 } 1513 } else { 1514 (void) printf("%-15s %s\n", 1515 format_date(buf, sizeof (buf), uurp->sec), 1516 uurp->uuid); 1517 } 1518 } else if (n == j) 1519 (void) printf("... %d %s\n", srp->nrecs - max_fault, 1520 dgettext("FMD", "more entries suppressed")); 1521 n++; 1522 } 1523 (void) printf("\n"); 1524 if (n_server > 1) 1525 print_server_name(srp->host, dgettext("FMD", "Host :")); 1526 if (srp->class) 1527 print_name_list(srp->class, 1528 dgettext("FMD", "Fault class :"), NULL, 0, srp->class->pct, 1529 NULL, full); 1530 if (srp->asru) { 1531 status = asru_same_status(srp->asru); 1532 if (status != -1) { 1533 print_name_list(srp->asru, 1534 dgettext("FMD", "Affects :"), NULL, 1535 full ? 0 : max_display, 0, NULL, full); 1536 print_asru_status(status, " "); 1537 } else 1538 print_name_list(srp->asru, 1539 dgettext("FMD", "Affects :"), NULL, 1540 full ? 0 : max_display, 0, print_asru_status, full); 1541 } 1542 if (full || srp->fru == NULL) { 1543 if (srp->resource) { 1544 print_name_list(srp->resource, 1545 dgettext("FMD", "Problem in :"), 1546 NULL, full ? 0 : max_display, 0, print_fru_status, 1547 full); 1548 } 1549 } 1550 if (srp->fru) { 1551 status = asru_same_status(srp->fru); 1552 if (status != -1) { 1553 print_name_list(srp->fru, dgettext("FMD", 1554 "FRU :"), get_fmri_label, 0, 1555 srp->fru->pct == 100 ? 100 : srp->fru->max_pct, 1556 NULL, full); 1557 print_fru_status(status, " "); 1558 } else 1559 print_name_list(srp->fru, dgettext("FMD", 1560 "FRU :"), get_fmri_label, 0, 1561 srp->fru->pct == 100 ? 100 : srp->fru->max_pct, 1562 print_fru_status, full); 1563 } 1564 if (srp->serial && !serial_in_fru(srp->fru, srp->serial) && 1565 !serial_in_fru(srp->asru, srp->serial)) { 1566 print_name_list(srp->serial, dgettext("FMD", "Serial ID. :"), 1567 NULL, 0, 0, NULL, full); 1568 } 1569 print_dict_info(srp->msgid, srp->url); 1570 (void) printf("\n"); 1571 } 1572 1573 static void 1574 print_status_record(status_record_t *srp, int summary, int opt_i, int full) 1575 { 1576 char buf[32]; 1577 uurec_t *uurp = srp->uurec; 1578 char *severity; 1579 static int header = 0; 1580 char *head; 1581 ari_list_t *ari_list; 1582 1583 if (nlspath) 1584 severity = get_dict_msg(srp->msgid, "severity", 1, 1); 1585 else 1586 severity = srp->severity; 1587 1588 if (!summary || !header) { 1589 if (opt_i) { 1590 head = "--------------- " 1591 "------------------------------------ " 1592 "-------------- ---------\n" 1593 "TIME CACHE-ID" 1594 " MSG-ID" 1595 " SEVERITY\n--------------- " 1596 "------------------------------------ " 1597 " -------------- ---------"; 1598 } else { 1599 head = "--------------- " 1600 "------------------------------------ " 1601 "-------------- ---------\n" 1602 "TIME EVENT-ID" 1603 " MSG-ID" 1604 " SEVERITY\n--------------- " 1605 "------------------------------------ " 1606 " -------------- ---------"; 1607 } 1608 (void) printf("%s\n", dgettext("FMD", head)); 1609 header = 1; 1610 } 1611 if (opt_i) { 1612 ari_list = uurp->ari_uuid_list; 1613 while (ari_list) { 1614 (void) printf("%-15s %-37s %-14s %-9s\n", 1615 format_date(buf, sizeof (buf), uurp->sec), 1616 ari_list->ari_uuid, srp->msgid, severity); 1617 ari_list = ari_list->next; 1618 } 1619 } else { 1620 (void) printf("%-15s %-37s %-14s %-9s\n", 1621 format_date(buf, sizeof (buf), uurp->sec), 1622 uurp->uuid, srp->msgid, severity); 1623 } 1624 1625 if (!summary) 1626 print_sup_record(srp, opt_i, full); 1627 } 1628 1629 static void 1630 print_catalog(int summary, int opt_a, int full, int opt_i, int page_feed) 1631 { 1632 status_record_t *srp; 1633 sr_list_t *slp; 1634 1635 slp = status_rec_list; 1636 if (slp) { 1637 for (;;) { 1638 srp = slp->status_record; 1639 if (opt_a || srp->not_suppressed) { 1640 if (page_feed) 1641 (void) printf("\f\n"); 1642 print_status_record(srp, summary, opt_i, full); 1643 } 1644 if (slp->next == status_rec_list) 1645 break; 1646 slp = slp->next; 1647 } 1648 } 1649 } 1650 1651 static name_list_t * 1652 find_fru(status_record_t *srp, char *resource) 1653 { 1654 name_list_t *rt = NULL; 1655 name_list_t *fru = srp->fru; 1656 1657 while (fru) { 1658 if (strcmp(resource, fru->name) == 0) { 1659 rt = fru; 1660 break; 1661 } 1662 fru = fru->next; 1663 if (fru == srp->fru) 1664 break; 1665 } 1666 return (rt); 1667 } 1668 1669 static void 1670 print_fru_line(name_list_t *fru, char *uuid) 1671 { 1672 if (fru->pct == 100) { 1673 (void) printf("%s %d %s %d%%\n", uuid, fru->count, 1674 dgettext("FMD", "suspects in this FRU total certainty"), 1675 100); 1676 } else { 1677 (void) printf("%s %d %s %d%%\n", uuid, fru->count, 1678 dgettext("FMD", "suspects in this FRU max certainty"), 1679 fru->max_pct); 1680 } 1681 } 1682 1683 static void 1684 print_fru(int summary, int opt_a, int opt_i, int page_feed) 1685 { 1686 resource_list_t *tp = status_fru_list; 1687 status_record_t *srp; 1688 sr_list_t *slp, *end; 1689 char *msgid, *fru_label; 1690 uurec_t *uurp; 1691 name_list_t *fru; 1692 int status; 1693 ari_list_t *ari_list; 1694 1695 while (tp) { 1696 if (opt_a || tp->not_suppressed) { 1697 if (page_feed) 1698 (void) printf("\f\n"); 1699 if (!summary) 1700 (void) printf("-----------------------------" 1701 "---------------------------------------" 1702 "----------\n"); 1703 slp = tp->status_rec_list; 1704 end = slp; 1705 do { 1706 srp = slp->status_record; 1707 fru = find_fru(srp, tp->resource); 1708 if (fru) { 1709 if (fru->label) 1710 (void) printf("\"%s\" (%s) ", 1711 fru->label, fru->name); 1712 else if ((fru_label = get_fmri_label( 1713 fru->name)) != NULL) { 1714 (void) printf("\"%s\" (%s) ", 1715 fru_label, fru->name); 1716 free(fru_label); 1717 } else 1718 (void) printf("%s ", 1719 fru->name); 1720 break; 1721 } 1722 slp = slp->next; 1723 } while (slp != end); 1724 1725 slp = tp->status_rec_list; 1726 end = slp; 1727 status = 0; 1728 do { 1729 srp = slp->status_record; 1730 fru = srp->fru; 1731 while (fru) { 1732 if (strcmp(tp->resource, 1733 fru->name) == 0) 1734 status |= fru->status; 1735 fru = fru->next; 1736 if (fru == srp->fru) 1737 break; 1738 } 1739 slp = slp->next; 1740 } while (slp != end); 1741 if (status & FM_SUSPECT_NOT_PRESENT) 1742 (void) printf(dgettext("FMD", "not present\n")); 1743 else if (status & FM_SUSPECT_FAULTY) 1744 (void) printf(dgettext("FMD", "faulty\n")); 1745 else if (status & FM_SUSPECT_REPLACED) 1746 (void) printf(dgettext("FMD", "replaced\n")); 1747 else if (status & FM_SUSPECT_REPAIRED) 1748 (void) printf(dgettext("FMD", 1749 "repair attempted\n")); 1750 else if (status & FM_SUSPECT_ACQUITTED) 1751 (void) printf(dgettext("FMD", "acquitted\n")); 1752 else 1753 (void) printf(dgettext("FMD", "removed\n")); 1754 1755 slp = tp->status_rec_list; 1756 end = slp; 1757 do { 1758 srp = slp->status_record; 1759 uurp = srp->uurec; 1760 fru = find_fru(srp, tp->resource); 1761 if (fru) { 1762 if (opt_i) { 1763 ari_list = uurp->ari_uuid_list; 1764 while (ari_list) { 1765 print_fru_line(fru, 1766 ari_list->ari_uuid); 1767 ari_list = 1768 ari_list->next; 1769 } 1770 } else { 1771 print_fru_line(fru, uurp->uuid); 1772 } 1773 } 1774 slp = slp->next; 1775 } while (slp != end); 1776 if (!summary) { 1777 slp = tp->status_rec_list; 1778 end = slp; 1779 srp = slp->status_record; 1780 if (srp->serial && 1781 !serial_in_fru(srp->fru, srp->serial)) { 1782 print_name_list(srp->serial, 1783 dgettext("FMD", "Serial ID. :"), 1784 NULL, 0, 0, NULL, 1); 1785 } 1786 msgid = NULL; 1787 do { 1788 if (msgid == NULL || 1789 strcmp(msgid, srp->msgid) != 0) { 1790 msgid = srp->msgid; 1791 print_dict_info(srp->msgid, 1792 srp->url); 1793 } 1794 slp = slp->next; 1795 } while (slp != end); 1796 } 1797 } 1798 tp = tp->next; 1799 if (tp == status_fru_list) 1800 break; 1801 } 1802 } 1803 1804 static void 1805 print_asru(int opt_a) 1806 { 1807 resource_list_t *tp = status_asru_list; 1808 status_record_t *srp; 1809 sr_list_t *slp, *end; 1810 char *msg; 1811 int status; 1812 name_list_t *asru; 1813 1814 while (tp) { 1815 if (opt_a || tp->not_suppressed) { 1816 status = 0; 1817 slp = tp->status_rec_list; 1818 end = slp; 1819 do { 1820 srp = slp->status_record; 1821 asru = srp->asru; 1822 while (asru) { 1823 if (strcmp(tp->resource, 1824 asru->name) == 0) 1825 status |= asru->status; 1826 asru = asru->next; 1827 if (asru == srp->asru) 1828 break; 1829 } 1830 slp = slp->next; 1831 } while (slp != end); 1832 switch (status) { 1833 case 0: 1834 msg = dgettext("FMD", "ok"); 1835 break; 1836 case FM_SUSPECT_DEGRADED: 1837 msg = dgettext("FMD", "degraded"); 1838 break; 1839 case FM_SUSPECT_FAULTY | FM_SUSPECT_DEGRADED: 1840 msg = dgettext("FMD", "degraded"); 1841 break; 1842 case FM_SUSPECT_FAULTY: 1843 msg = dgettext("FMD", "degraded"); 1844 break; 1845 case FM_SUSPECT_UNUSABLE: 1846 msg = dgettext("FMD", "unknown"); 1847 break; 1848 case FM_SUSPECT_FAULTY | FM_SUSPECT_UNUSABLE: 1849 msg = dgettext("FMD", "faulted"); 1850 break; 1851 default: 1852 msg = ""; 1853 break; 1854 } 1855 (void) printf("%-69s %s\n", tp->resource, msg); 1856 } 1857 tp = tp->next; 1858 if (tp == status_asru_list) 1859 break; 1860 } 1861 } 1862 1863 static int 1864 uuid_in_list(char *uuid, uurec_select_t *uurecp) 1865 { 1866 while (uurecp) { 1867 if (strcmp(uuid, uurecp->uuid) == 0) 1868 return (1); 1869 uurecp = uurecp->next; 1870 } 1871 return (0); 1872 } 1873 1874 static int 1875 dfault_rec(const fmd_adm_caseinfo_t *acp, void *arg) 1876 { 1877 int64_t *diag_time; 1878 uint_t nelem; 1879 int rt = 0; 1880 char *uuid = "-"; 1881 uurec_select_t *uurecp = (uurec_select_t *)arg; 1882 1883 if (nvlist_lookup_int64_array(acp->aci_event, FM_SUSPECT_DIAG_TIME, 1884 &diag_time, &nelem) == 0 && nelem >= 2) { 1885 (void) nvlist_lookup_string(acp->aci_event, FM_SUSPECT_UUID, 1886 &uuid); 1887 if (uurecp == NULL || uuid_in_list(uuid, uurecp)) 1888 add_fault_record_to_catalog(acp->aci_event, *diag_time, 1889 uuid, acp->aci_url); 1890 } else { 1891 rt = -1; 1892 } 1893 return (rt); 1894 } 1895 1896 /*ARGSUSED*/ 1897 static int 1898 dstatus_rec(const fmd_adm_rsrcinfo_t *ari, void *unused) 1899 { 1900 update_asru_state_in_catalog(ari->ari_case, ari->ari_uuid); 1901 return (0); 1902 } 1903 1904 static int 1905 get_cases_from_fmd(fmd_adm_t *adm, uurec_select_t *uurecp, int opt_i) 1906 { 1907 int rt = FMADM_EXIT_SUCCESS; 1908 1909 /* 1910 * These calls may fail with Protocol error if message payload is to big 1911 */ 1912 if (fmd_adm_case_iter(adm, NULL, dfault_rec, uurecp) != 0) 1913 die("failed to get case list from fmd"); 1914 if (opt_i && fmd_adm_rsrc_iter(adm, 1, dstatus_rec, NULL) != 0) 1915 die("failed to get case status from fmd"); 1916 return (rt); 1917 } 1918 1919 /* 1920 * fmadm faulty command 1921 * 1922 * -a show hidden fault records 1923 * -f show faulty fru's 1924 * -g force grouping of similar faults on the same fru 1925 * -n number of fault records to display 1926 * -p pipe output through pager 1927 * -r show faulty asru's 1928 * -s print summary of first fault 1929 * -u print listed uuid's only 1930 * -v full output 1931 */ 1932 1933 int 1934 cmd_faulty(fmd_adm_t *adm, int argc, char *argv[]) 1935 { 1936 int opt_a = 0, opt_v = 0, opt_p = 0, opt_s = 0, opt_r = 0, opt_f = 0; 1937 int opt_i = 0; 1938 char *pager; 1939 FILE *fp; 1940 int rt, c, stat; 1941 uurec_select_t *tp; 1942 uurec_select_t *uurecp = NULL; 1943 1944 catalog_setup(); 1945 while ((c = getopt(argc, argv, "afgin:prsu:v")) != EOF) { 1946 switch (c) { 1947 case 'a': 1948 opt_a++; 1949 break; 1950 case 'f': 1951 opt_f++; 1952 break; 1953 case 'g': 1954 opt_g++; 1955 break; 1956 case 'i': 1957 opt_i++; 1958 break; 1959 case 'n': 1960 max_fault = atoi(optarg); 1961 break; 1962 case 'p': 1963 opt_p++; 1964 break; 1965 case 'r': 1966 opt_r++; 1967 break; 1968 case 's': 1969 opt_s++; 1970 break; 1971 case 'u': 1972 tp = (uurec_select_t *)malloc(sizeof (uurec_select_t)); 1973 tp->uuid = optarg; 1974 tp->next = uurecp; 1975 uurecp = tp; 1976 opt_a = 1; 1977 break; 1978 case 'v': 1979 opt_v++; 1980 break; 1981 default: 1982 return (FMADM_EXIT_USAGE); 1983 } 1984 } 1985 if (optind < argc) 1986 return (FMADM_EXIT_USAGE); 1987 1988 rt = get_cases_from_fmd(adm, uurecp, opt_i); 1989 if (opt_p) { 1990 if ((pager = getenv("PAGER")) == NULL) 1991 pager = "/usr/bin/more"; 1992 fp = popen(pager, "w"); 1993 if (fp == NULL) { 1994 rt = FMADM_EXIT_ERROR; 1995 opt_p = 0; 1996 } else { 1997 dup2(fileno(fp), 1); 1998 setbuf(stdout, NULL); 1999 (void) fclose(fp); 2000 } 2001 } 2002 max_display = max_fault; 2003 if (opt_f) 2004 print_fru(opt_s, opt_a, opt_i, opt_p && !opt_s); 2005 if (opt_r) 2006 print_asru(opt_a); 2007 if (opt_f == 0 && opt_r == 0) 2008 print_catalog(opt_s, opt_a, opt_v, opt_i, opt_p && !opt_s); 2009 label_release_topo(); 2010 if (opt_p) { 2011 (void) fclose(stdout); 2012 (void) wait(&stat); 2013 } 2014 return (rt); 2015 } 2016 2017 int 2018 cmd_flush(fmd_adm_t *adm, int argc, char *argv[]) 2019 { 2020 int i, status = FMADM_EXIT_SUCCESS; 2021 2022 if (argc < 2 || (i = getopt(argc, argv, "")) != EOF) 2023 return (FMADM_EXIT_USAGE); 2024 2025 for (i = 1; i < argc; i++) { 2026 if (fmd_adm_rsrc_flush(adm, argv[i]) != 0) { 2027 warn("failed to flush %s", argv[i]); 2028 status = FMADM_EXIT_ERROR; 2029 } else 2030 note("flushed resource history for %s\n", argv[i]); 2031 } 2032 2033 return (status); 2034 } 2035 2036 int 2037 cmd_repair(fmd_adm_t *adm, int argc, char *argv[]) 2038 { 2039 int err; 2040 2041 if (getopt(argc, argv, "") != EOF) 2042 return (FMADM_EXIT_USAGE); 2043 2044 if (argc - optind != 1) 2045 return (FMADM_EXIT_USAGE); 2046 2047 /* 2048 * argument could be a uuid, an fmri (asru, fru or resource) 2049 * or a label. Try uuid first, If that fails try the others. 2050 */ 2051 err = fmd_adm_case_repair(adm, argv[optind]); 2052 if (err != 0) 2053 err = fmd_adm_rsrc_repaired(adm, argv[optind]); 2054 2055 if (err != 0) 2056 die("failed to record repair to %s", argv[optind]); 2057 2058 note("recorded repair to %s\n", argv[optind]); 2059 return (FMADM_EXIT_SUCCESS); 2060 } 2061 2062 int 2063 cmd_repaired(fmd_adm_t *adm, int argc, char *argv[]) 2064 { 2065 int err; 2066 2067 if (getopt(argc, argv, "") != EOF) 2068 return (FMADM_EXIT_USAGE); 2069 2070 if (argc - optind != 1) 2071 return (FMADM_EXIT_USAGE); 2072 2073 /* 2074 * argument could be an fmri (asru, fru or resource) or a label. 2075 */ 2076 err = fmd_adm_rsrc_repaired(adm, argv[optind]); 2077 if (err != 0) 2078 die("failed to record repair to %s", argv[optind]); 2079 2080 note("recorded repair to of %s\n", argv[optind]); 2081 return (FMADM_EXIT_SUCCESS); 2082 } 2083 2084 int 2085 cmd_replaced(fmd_adm_t *adm, int argc, char *argv[]) 2086 { 2087 int err; 2088 2089 if (getopt(argc, argv, "") != EOF) 2090 return (FMADM_EXIT_USAGE); 2091 2092 if (argc - optind != 1) 2093 return (FMADM_EXIT_USAGE); 2094 2095 /* 2096 * argument could be an fmri (asru, fru or resource) or a label. 2097 */ 2098 err = fmd_adm_rsrc_replaced(adm, argv[optind]); 2099 if (err != 0) 2100 die("failed to record replacement of %s", argv[optind]); 2101 2102 note("recorded replacement of %s\n", argv[optind]); 2103 return (FMADM_EXIT_SUCCESS); 2104 } 2105 2106 int 2107 cmd_acquit(fmd_adm_t *adm, int argc, char *argv[]) 2108 { 2109 int err; 2110 2111 if (getopt(argc, argv, "") != EOF) 2112 return (FMADM_EXIT_USAGE); 2113 2114 if (argc - optind != 1 && argc - optind != 2) 2115 return (FMADM_EXIT_USAGE); 2116 2117 /* 2118 * argument could be a uuid, an fmri (asru, fru or resource) 2119 * or a label. Or it could be a uuid and an fmri or label. 2120 */ 2121 if (argc - optind == 2) { 2122 err = fmd_adm_rsrc_acquit(adm, argv[optind], argv[optind + 1]); 2123 if (err != 0) 2124 err = fmd_adm_rsrc_acquit(adm, argv[optind + 1], 2125 argv[optind]); 2126 } else { 2127 err = fmd_adm_case_acquit(adm, argv[optind]); 2128 if (err != 0) 2129 err = fmd_adm_rsrc_acquit(adm, argv[optind], ""); 2130 } 2131 2132 if (err != 0) 2133 die("failed to record acquital of %s", argv[optind]); 2134 2135 note("recorded acquital of %s\n", argv[optind]); 2136 return (FMADM_EXIT_SUCCESS); 2137 } 2138