1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <fmadm.h> 30 #include <errno.h> 31 #include <limits.h> 32 #include <strings.h> 33 #include <stdio.h> 34 #include <unistd.h> 35 #include <sys/wait.h> 36 #include <sys/stat.h> 37 #include <fcntl.h> 38 #include <fm/fmd_log.h> 39 #include <sys/fm/protocol.h> 40 #include <fm/libtopo.h> 41 #include <fm/fmd_adm.h> 42 #include <dlfcn.h> 43 #include <sys/systeminfo.h> 44 #include <sys/utsname.h> 45 #include <libintl.h> 46 #include <locale.h> 47 #include <sys/smbios.h> 48 #include <libdevinfo.h> 49 #include <stdlib.h> 50 51 #define offsetof(s, m) ((size_t)(&(((s*)0)->m))) 52 53 /* 54 * catalog_setup() must be called to setup support functions. 55 * Fault records are added to catalog by calling add_fault_record_to_catalog() 56 * records are stored in order of importance to the system. 57 * If -g flag is set or not_suppressed is not set and the class fru, fault, 58 * type are the same then details are merged into an existing record, with uuid 59 * records are stored in time order. 60 * For each record information is extracted from nvlist and merged into linked 61 * list each is checked for identical records for which percentage certainty are 62 * added together. 63 * print_catalog() is called to print out catalog and release external resources 64 * 65 * /---------------\ 66 * status_rec_list -> | | -| 67 * \---------------/ 68 * \/ 69 * /---------------\ /-------\ /-------\ 70 * status_fru_list | status_record | -> | uurec | -> | uurec | -| 71 * \/ | | |- | | <- | | 72 * /-------------\ | | \-------/ \-------/ 73 * | | -> | | \/ \/ 74 * \-------------/ | | /-------\ /-------\ 75 * \/ | | -> | asru | -> | asru | 76 * --- | | | | <- | | 77 * | | \-------/ \-------/ 78 * status_asru_list | class | 79 * \/ | resource | /-------\ /-------\ 80 * /-------------\ | fru | -> | list | -> | list | 81 * | | -> | serial | | | <- | | 82 * \-------------/ | | \-------/ \-------/ 83 * \/ \---------------/ 84 * --- \/ /\ 85 * /---------------\ 86 * | status_record | 87 * \---------------/ 88 * 89 * Fmadm faulty takes a number of options which affect the format of the 90 * output displayed. By default, the display reports the FRU and ASRU along 91 * with other information on per-case basis as in the example below. 92 * 93 * --------------- ------------------------------------ -------------- ------- 94 * TIME EVENT-ID MSG-ID SEVERITY 95 * --------------- ------------------------------------ -------------- ------- 96 * Sep 21 10:01:36 d482f935-5c8f-e9ab-9f25-d0aaafec1e6c AMD-8000-2F Major 97 * 98 * Fault class : fault.memory.dimm_sb 99 * Affects : mem:///motherboard=0/chip=0/memory-controller=0/dimm=0/rank=0 100 * faulted but still in service 101 * FRU : "CPU 0 DIMM 0" (hc://.../memory-controller=0/dimm=0) 102 * faulty 103 * 104 * Description : The number of errors associated with this memory module has 105 * exceeded acceptable levels. Refer to 106 * http://sun.com/msg/AMD-8000-2F for more information. 107 * 108 * Response : Pages of memory associated with this memory module are being 109 * removed from service as errors are reported. 110 * 111 * Impact : Total system memory capacity will be reduced as pages are 112 * retired. 113 * 114 * Action : Schedule a repair procedure to replace the affected memory 115 * module. Use fmdump -v -u <EVENT_ID> to identify the module. 116 * 117 * The -v flag is similar, but adds some additonal information such as the 118 * resource. The -s flag is also similar but just gives the top line summary. 119 * All these options (ie without the -f or -r flags) use the print_catalog() 120 * function to do the display. 121 * 122 * The -f flag changes the output so that it appears sorted on a per-fru basis. 123 * The output is somewhat cut down compared to the default output. If -f is 124 * used, then print_fru() is used to print the output. 125 * 126 * ----------------------------------------------------------------------------- 127 * "SLOT 2" (hc://.../hostbridge=3/pciexrc=3/pciexbus=4/pciexdev=0) faulty 128 * 5ca4aeb3-36...f6be-c2e8166dc484 2 suspects in this FRU total certainty 100% 129 * 130 * Description : A problem was detected for a PCI device. 131 * Refer to http://sun.com/msg/PCI-8000-7J for more information. 132 * 133 * Response : One or more device instances may be disabled 134 * 135 * Impact : Possible loss of services provided by the device instances 136 * associated with this fault 137 * 138 * Action : Schedule a repair procedure to replace the affected device. 139 * Use fmdump -v -u <EVENT_ID> to identify the device or contact 140 * Sun for support. 141 * 142 * The -r flag changes the output so that it appears sorted on a per-asru basis. 143 * The output is very much cut down compared to the default output, just giving 144 * the asru fmri and state. Here print_asru() is used to print the output. 145 * 146 * mem:///motherboard=0/chip=0/memory-controller=0/dimm=0/rank=0 degraded 147 * 148 * For all fmadm faulty options, the sequence of events is 149 * 150 * 1) Walk through all the cases in the system using fmd_adm_case_iter() and 151 * for each case call dfault_rec(). This will call add_fault_record_to_catalog() 152 * This will extract the data from the nvlist and call catalog_new_record() to 153 * save the data away in various linked lists in the catalogue. 154 * 155 * 2) Once this is done, the data can be supplemented by using 156 * fmd_adm_rsrc_iter(). However this is now only necessary for the -i option. 157 * 158 * 3) Finally print_catalog(), print_fru() or print_asru() are called as 159 * appropriate to display the information from the catalogue sorted in the 160 * requested way. 161 * 162 */ 163 164 typedef struct name_list { 165 struct name_list *next; 166 struct name_list *prev; 167 char *name; 168 uint8_t pct; 169 uint8_t max_pct; 170 ushort_t count; 171 int status; 172 char *label; 173 } name_list_t; 174 175 typedef struct ari_list { 176 char *ari_uuid; 177 struct ari_list *next; 178 } ari_list_t; 179 180 typedef struct uurec { 181 struct uurec *next; 182 struct uurec *prev; 183 char *uuid; 184 ari_list_t *ari_uuid_list; 185 name_list_t *asru; 186 uint64_t sec; 187 } uurec_t; 188 189 typedef struct uurec_select { 190 struct uurec_select *next; 191 char *uuid; 192 } uurec_select_t; 193 194 typedef struct host_id { 195 char *chassis; 196 char *server; 197 char *platform; 198 } hostid_t; 199 200 typedef struct host_id_list { 201 hostid_t hostid; 202 struct host_id_list *next; 203 } host_id_list_t; 204 205 typedef struct status_record { 206 hostid_t *host; 207 int nrecs; 208 uurec_t *uurec; 209 char *severity; /* in C locale */ 210 char *msgid; 211 name_list_t *class; 212 name_list_t *resource; 213 name_list_t *asru; 214 name_list_t *fru; 215 name_list_t *serial; 216 char *url; 217 uint8_t not_suppressed; 218 } status_record_t; 219 220 typedef struct sr_list { 221 struct sr_list *next; 222 struct sr_list *prev; 223 struct status_record *status_record; 224 } sr_list_t; 225 226 typedef struct resource_list { 227 struct resource_list *next; 228 struct resource_list *prev; 229 sr_list_t *status_rec_list; 230 char *resource; 231 uint8_t not_suppressed; 232 uint8_t max_pct; 233 } resource_list_t; 234 235 typedef struct tgetlabel_data { 236 char *label; 237 char *fru; 238 } tgetlabel_data_t; 239 240 sr_list_t *status_rec_list; 241 resource_list_t *status_fru_list; 242 resource_list_t *status_asru_list; 243 244 static char *locale; 245 static char *nlspath; 246 static int max_display; 247 static int max_fault = 0; 248 static topo_hdl_t *topo_handle; 249 static char *topo_handle_uuid; 250 static host_id_list_t *host_list; 251 static int n_server; 252 static int opt_g; 253 254 static char * 255 format_date(char *buf, size_t len, uint64_t sec) 256 { 257 if (sec > LONG_MAX) { 258 (void) fprintf(stderr, 259 "record time is too large for 32-bit utility\n"); 260 (void) snprintf(buf, len, "0x%llx", sec); 261 } else { 262 time_t tod = (time_t)sec; 263 (void) strftime(buf, len, "%b %d %T", localtime(&tod)); 264 } 265 266 return (buf); 267 } 268 269 static hostid_t * 270 find_hostid_in_list(char *platform, char *chassis, char *server) 271 { 272 hostid_t *rt = NULL; 273 host_id_list_t *hostp; 274 275 if (platform == NULL) 276 platform = "-"; 277 if (server == NULL) 278 server = "-"; 279 hostp = host_list; 280 while (hostp) { 281 if (hostp->hostid.platform && 282 strcmp(hostp->hostid.platform, platform) == 0 && 283 hostp->hostid.server && 284 strcmp(hostp->hostid.server, server) == 0 && 285 (chassis == NULL || hostp->hostid.chassis == NULL || 286 strcmp(chassis, hostp->hostid.chassis) == 0)) { 287 rt = &hostp->hostid; 288 break; 289 } 290 hostp = hostp->next; 291 } 292 if (rt == NULL) { 293 hostp = malloc(sizeof (host_id_list_t)); 294 hostp->hostid.platform = strdup(platform); 295 hostp->hostid.server = strdup(server); 296 hostp->hostid.chassis = chassis ? strdup(chassis) : NULL; 297 hostp->next = host_list; 298 host_list = hostp; 299 rt = &hostp->hostid; 300 n_server++; 301 } 302 return (rt); 303 } 304 305 static hostid_t * 306 find_hostid(nvlist_t *nvl) 307 { 308 char *platform = NULL, *chassis = NULL, *server = NULL; 309 nvlist_t *auth, *fmri; 310 hostid_t *rt = NULL; 311 312 if (nvlist_lookup_nvlist(nvl, FM_SUSPECT_DE, &fmri) == 0 && 313 nvlist_lookup_nvlist(fmri, FM_FMRI_AUTHORITY, &auth) == 0) { 314 (void) nvlist_lookup_string(auth, FM_FMRI_AUTH_PRODUCT, 315 &platform); 316 (void) nvlist_lookup_string(auth, FM_FMRI_AUTH_SERVER, &server); 317 (void) nvlist_lookup_string(auth, FM_FMRI_AUTH_CHASSIS, 318 &chassis); 319 rt = find_hostid_in_list(platform, chassis, server); 320 } 321 return (rt); 322 } 323 324 static void 325 catalog_setup(void) 326 { 327 char *tp; 328 int pl; 329 330 /* 331 * All FMA event dictionaries use msgfmt(1) message objects to produce 332 * messages, even for the C locale. We therefore want to use dgettext 333 * for all message lookups, but its defined behavior in the C locale is 334 * to return the input string. Since our input strings are event codes 335 * and not format strings, this doesn't help us. We resolve this nit 336 * by setting NLSPATH to a non-existent file: the presence of NLSPATH 337 * is defined to force dgettext(3C) to do a full lookup even for C. 338 */ 339 nlspath = getenv("NLSPATH"); 340 if (nlspath == NULL) 341 putenv("NLSPATH=/usr/lib/fm/fmd/fmd.cat"); 342 else { 343 pl = strlen(nlspath) + sizeof ("NLSPATH=") + 1; 344 tp = malloc(pl); 345 (void) snprintf(tp, pl, "NLSPATH=%s", nlspath); 346 nlspath = tp; 347 } 348 349 locale = setlocale(LC_MESSAGES, ""); 350 } 351 352 static char * 353 get_dict_url(char *id) 354 { 355 char *url = "http://sun.com/msg/"; 356 int msz = sizeof (url) + strlen(id) + 1; 357 char *cp; 358 359 cp = malloc(msz); 360 (void) snprintf(cp, msz, "%s%s", url, id); 361 return (cp); 362 } 363 364 static char * 365 get_dict_msg(char *id, char *idx, int unknown, int translate) 366 { 367 char mbuf[128]; 368 char *msg; 369 char dbuf[32]; 370 char *p; 371 int restore_env = 0; 372 int restore_locale = 0; 373 374 p = strchr(id, '-'); 375 if (p == NULL || p == id || (p - id) >= 32) { 376 msg = mbuf; 377 } else { 378 strncpy(dbuf, id, (size_t)(p - id)); 379 dbuf[(size_t)(p - id)] = 0; 380 381 (void) snprintf(mbuf, sizeof (mbuf), "%s.%s", id, idx); 382 if (translate == 0 || nlspath == NULL) { 383 (void) setlocale(LC_MESSAGES, "C"); 384 restore_locale = 1; 385 } 386 bindtextdomain("FMD", "/usr/lib/locale"); 387 msg = dgettext(dbuf, mbuf); 388 if (msg == mbuf) { 389 (void) setlocale(LC_MESSAGES, "C"); 390 restore_locale = 1; 391 msg = dgettext(dbuf, mbuf); 392 } 393 if (msg == mbuf) { 394 putenv("NLSPATH=/usr/lib/fm/fmd/fmd.cat"); 395 restore_env = 1; 396 (void) setlocale(LC_MESSAGES, "C"); 397 msg = dgettext(dbuf, mbuf); 398 } 399 if (restore_locale) 400 (void) setlocale(LC_MESSAGES, locale); 401 if (restore_env && nlspath) 402 putenv(nlspath); 403 } 404 if (msg == mbuf) { 405 if (unknown) 406 msg = "unknown"; 407 else 408 msg = NULL; 409 } 410 return (msg); 411 } 412 413 /* 414 * compare two fru strings which are made up of substrings seperated by '/' 415 * return true if every substring is the same in the two strings, or if a 416 * substring is null in one. 417 */ 418 419 static int 420 frucmp(char *f1, char *f2) 421 { 422 char c1, c2; 423 int i = 0; 424 425 for (;;) { 426 c1 = *f1; 427 c2 = *f2; 428 if (c1 == c2) { 429 i = (c1 == '/') ? 0 : i + 1; 430 } else if (i == 0) { 431 if (c1 == '/') { 432 do { 433 f2++; 434 } while ((c2 = *f2) != 0 && c2 != '/'); 435 if (c2 == NULL) 436 break; 437 } else if (c2 == '/') { 438 do { 439 f1++; 440 } while ((c1 = *f1) != 0 && c1 != '/'); 441 if (c1 == NULL) 442 break; 443 } else 444 break; 445 } else 446 break; 447 if (c1 == NULL) 448 return (0); 449 f1++; 450 f2++; 451 } 452 return (1); 453 } 454 455 static int 456 tgetlabel(topo_hdl_t *thp, tnode_t *node, void *arg) 457 { 458 int err; 459 char *fru_name, *lname; 460 nvlist_t *fru = NULL; 461 int rt = TOPO_WALK_NEXT; 462 tgetlabel_data_t *tdp = (tgetlabel_data_t *)arg; 463 464 if (topo_node_fru(node, &fru, NULL, &err) == 0) { 465 if (topo_fmri_nvl2str(thp, fru, &fru_name, &err) == 0) { 466 if (frucmp(tdp->fru, fru_name) == 0 && 467 topo_node_label(node, &lname, &err) == 0) { 468 tdp->label = strdup(lname); 469 topo_hdl_strfree(thp, lname); 470 rt = TOPO_WALK_TERMINATE; 471 } 472 topo_hdl_strfree(thp, fru_name); 473 } 474 nvlist_free(fru); 475 } 476 return (rt); 477 } 478 479 static void 480 label_get_topo(void) 481 { 482 int err; 483 484 topo_handle = topo_open(TOPO_VERSION, 0, &err); 485 if (topo_handle) { 486 topo_handle_uuid = topo_snap_hold(topo_handle, NULL, &err); 487 } 488 } 489 490 static void 491 label_release_topo(void) 492 { 493 if (topo_handle_uuid) 494 topo_hdl_strfree(topo_handle, topo_handle_uuid); 495 if (topo_handle) { 496 topo_snap_release(topo_handle); 497 topo_close(topo_handle); 498 } 499 } 500 501 static char * 502 get_fmri_label(char *fru) 503 { 504 topo_walk_t *twp; 505 tgetlabel_data_t td; 506 int err; 507 508 td.label = NULL; 509 td.fru = fru; 510 if (topo_handle == NULL) 511 label_get_topo(); 512 if (topo_handle_uuid) { 513 twp = topo_walk_init(topo_handle, FM_FMRI_SCHEME_HC, 514 tgetlabel, &td, &err); 515 if (twp) { 516 topo_walk_step(twp, TOPO_WALK_CHILD); 517 topo_walk_fini(twp); 518 } 519 } 520 return (td.label); 521 } 522 523 static char * 524 get_nvl2str_topo(nvlist_t *nvl) 525 { 526 char *name = NULL; 527 char *tname; 528 int err; 529 char *scheme = NULL; 530 char *mod_name = NULL; 531 char buf[128]; 532 533 if (topo_handle == NULL) 534 label_get_topo(); 535 if (topo_fmri_nvl2str(topo_handle, nvl, &tname, &err) == 0) { 536 name = strdup(tname); 537 topo_hdl_strfree(topo_handle, tname); 538 } else { 539 (void) nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &scheme); 540 (void) nvlist_lookup_string(nvl, FM_FMRI_MOD_NAME, &mod_name); 541 if (scheme && strcmp(scheme, FM_FMRI_SCHEME_FMD) == 0 && 542 mod_name) { 543 (void) snprintf(buf, sizeof (buf), "%s:///module/%s", 544 scheme, mod_name); 545 name = strdup(buf); 546 } 547 } 548 return (name); 549 } 550 551 static int 552 set_priority(char *s) 553 { 554 int rt = 0; 555 556 if (s) { 557 if (strcmp(s, "Minor") == 0) 558 rt = 1; 559 else if (strcmp(s, "Major") == 0) 560 rt = 10; 561 else if (strcmp(s, "Critical") == 0) 562 rt = 100; 563 } 564 return (rt); 565 } 566 567 static int 568 cmp_priority(char *s1, char *s2, uint64_t t1, uint64_t t2, uint8_t p1, 569 uint8_t p2) 570 { 571 int r1, r2; 572 int rt; 573 574 r1 = set_priority(s1); 575 r2 = set_priority(s2); 576 rt = r1 - r2; 577 if (rt == 0) { 578 if (t1 > t2) 579 rt = 1; 580 else if (t1 < t2) 581 rt = -1; 582 else 583 rt = p1 - p2; 584 } 585 return (rt); 586 } 587 588 /* 589 * merge two lists into one, by comparing enties in new and moving into list if 590 * name is not there or free off memory for names which are already there 591 * add_pct indicates if pct is the sum or highest pct 592 */ 593 static name_list_t * 594 merge_name_list(name_list_t **list, name_list_t *new, int add_pct) 595 { 596 name_list_t *lp, *np, *sp, *rt = NULL; 597 int max_pct; 598 599 rt = *list; 600 np = new; 601 while (np) { 602 lp = *list; 603 while (lp) { 604 if (strcmp(lp->name, np->name) == 0) 605 break; 606 lp = lp->next; 607 if (lp == *list) 608 lp = NULL; 609 } 610 if (np->next == new) 611 sp = NULL; 612 else 613 sp = np->next; 614 if (lp) { 615 lp->status |= (np->status & FM_SUSPECT_FAULTY); 616 if (add_pct) { 617 lp->pct += np->pct; 618 lp->count += np->count; 619 } else if (np->pct > lp->pct) { 620 lp->pct = np->pct; 621 } 622 max_pct = np->max_pct; 623 if (np->label) 624 free(np->label); 625 free(np->name); 626 free(np); 627 np = NULL; 628 if (max_pct > lp->max_pct) { 629 lp->max_pct = max_pct; 630 if (lp->max_pct > lp->prev->max_pct && 631 lp != *list) { 632 lp->prev->next = lp->next; 633 lp->next->prev = lp->prev; 634 np = lp; 635 } 636 } 637 } 638 if (np) { 639 lp = *list; 640 if (lp) { 641 if (np->max_pct > lp->max_pct) { 642 np->next = lp; 643 np->prev = lp->prev; 644 lp->prev->next = np; 645 lp->prev = np; 646 *list = np; 647 rt = np; 648 } else { 649 lp = lp->next; 650 while (lp != *list && 651 np->max_pct < lp->max_pct) { 652 lp = lp->next; 653 } 654 np->next = lp; 655 np->prev = lp->prev; 656 lp->prev->next = np; 657 lp->prev = np; 658 } 659 } else { 660 *list = np; 661 np->next = np; 662 np->prev = np; 663 rt = np; 664 } 665 } 666 np = sp; 667 } 668 return (rt); 669 } 670 671 /* 672 * compare entries in two lists return true if the two lists have identical 673 * content. The two lists may not have entries in the same order, so we compare 674 * the size of the list as well as trying to find every entry from one list in 675 * the other. 676 */ 677 static int 678 cmp_name_list(name_list_t *lxp1, name_list_t *lxp2) 679 { 680 name_list_t *lp1, *lp2; 681 int l1 = 0, l2 = 0, common = 0; 682 683 lp2 = lxp2; 684 while (lp2) { 685 l2++; 686 lp2 = lp2->next; 687 if (lp2 == lxp2) 688 break; 689 } 690 lp1 = lxp1; 691 while (lp1) { 692 l1++; 693 lp2 = lxp2; 694 while (lp2) { 695 if (strcmp(lp2->name, lp1->name) == 0) { 696 common++; 697 break; 698 } 699 lp2 = lp2->next; 700 if (lp2 == lxp2) 701 break; 702 } 703 lp1 = lp1->next; 704 if (lp1 == lxp1) 705 break; 706 } 707 if (l1 == l2 && l2 == common) 708 return (0); 709 else 710 return (1); 711 } 712 713 static name_list_t * 714 alloc_name_list(char *name, uint8_t pct) 715 { 716 name_list_t *nlp; 717 718 nlp = malloc(sizeof (*nlp)); 719 nlp->name = strdup(name); 720 nlp->pct = pct; 721 nlp->max_pct = pct; 722 nlp->count = 1; 723 nlp->next = nlp; 724 nlp->prev = nlp; 725 nlp->status = 0; 726 nlp->label = NULL; 727 return (nlp); 728 } 729 730 static void 731 free_name_list(name_list_t *list) 732 { 733 name_list_t *next = list; 734 name_list_t *lp; 735 736 if (list) { 737 do { 738 lp = next; 739 next = lp->next; 740 if (lp->label) 741 free(lp->label); 742 free(lp->name); 743 free(lp); 744 } while (next != list); 745 } 746 } 747 748 static status_record_t * 749 new_record_init(uurec_t *uurec_p, char *msgid, name_list_t *class, 750 name_list_t *fru, name_list_t *asru, name_list_t *resource, 751 name_list_t *serial, const char *url, boolean_t not_suppressed, 752 hostid_t *hostid) 753 { 754 status_record_t *status_rec_p; 755 756 status_rec_p = (status_record_t *)malloc(sizeof (status_record_t)); 757 status_rec_p->nrecs = 1; 758 status_rec_p->host = hostid; 759 status_rec_p->uurec = uurec_p; 760 uurec_p->next = NULL; 761 uurec_p->prev = NULL; 762 uurec_p->asru = asru; 763 status_rec_p->severity = get_dict_msg(msgid, "severity", 1, 0); 764 status_rec_p->class = class; 765 status_rec_p->fru = fru; 766 status_rec_p->asru = asru; 767 status_rec_p->resource = resource; 768 status_rec_p->serial = serial; 769 status_rec_p->url = url ? strdup(url) : NULL; 770 status_rec_p->msgid = strdup(msgid); 771 status_rec_p->not_suppressed = not_suppressed; 772 return (status_rec_p); 773 } 774 775 /* 776 * add record to given list maintaining order higher priority first. 777 */ 778 static void 779 add_rec_list(status_record_t *status_rec_p, sr_list_t **list_pp) 780 { 781 sr_list_t *tp, *np, *sp; 782 int order; 783 uint64_t sec; 784 785 np = malloc(sizeof (sr_list_t)); 786 np->status_record = status_rec_p; 787 sec = status_rec_p->uurec->sec; 788 if ((sp = *list_pp) == NULL) { 789 *list_pp = np; 790 np->next = np; 791 np->prev = np; 792 } else { 793 /* insert new record in front of lower priority */ 794 tp = sp; 795 order = cmp_priority(status_rec_p->severity, 796 sp->status_record->severity, sec, 797 tp->status_record->uurec->sec, 0, 0); 798 if (order > 0) { 799 *list_pp = np; 800 } else { 801 tp = sp->next; 802 while (tp != sp && 803 cmp_priority(status_rec_p->severity, 804 tp->status_record->severity, sec, 805 tp->status_record->uurec->sec, 0, 0)) { 806 tp = tp->next; 807 } 808 } 809 np->next = tp; 810 np->prev = tp->prev; 811 tp->prev->next = np; 812 tp->prev = np; 813 } 814 } 815 816 static void 817 add_resource(status_record_t *status_rec_p, resource_list_t **rp, 818 resource_list_t *np) 819 { 820 int order; 821 uint64_t sec; 822 resource_list_t *sp, *tp; 823 status_record_t *srp; 824 char *severity = status_rec_p->severity; 825 826 add_rec_list(status_rec_p, &np->status_rec_list); 827 if ((sp = *rp) == NULL) { 828 np->next = np; 829 np->prev = np; 830 *rp = np; 831 } else { 832 /* 833 * insert new record in front of lower priority 834 */ 835 tp = sp->next; 836 srp = sp->status_rec_list->status_record; 837 sec = status_rec_p->uurec->sec; 838 order = cmp_priority(severity, srp->severity, sec, 839 srp->uurec->sec, np->max_pct, sp->max_pct); 840 if (order > 0) { 841 *rp = np; 842 } else { 843 srp = tp->status_rec_list->status_record; 844 while (tp != sp && 845 cmp_priority(severity, srp->severity, sec, 846 srp->uurec->sec, np->max_pct, sp->max_pct) < 0) { 847 tp = tp->next; 848 srp = tp->status_rec_list->status_record; 849 } 850 } 851 np->next = tp; 852 np->prev = tp->prev; 853 tp->prev->next = np; 854 tp->prev = np; 855 } 856 } 857 858 static void 859 add_resource_list(status_record_t *status_rec_p, name_list_t *fp, 860 resource_list_t **rpp) 861 { 862 int order; 863 resource_list_t *np, *end; 864 status_record_t *srp; 865 866 np = *rpp; 867 end = np; 868 while (np) { 869 if (strcmp(fp->name, np->resource) == 0) { 870 np->not_suppressed |= status_rec_p->not_suppressed; 871 srp = np->status_rec_list->status_record; 872 order = cmp_priority(status_rec_p->severity, 873 srp->severity, status_rec_p->uurec->sec, 874 srp->uurec->sec, fp->max_pct, np->max_pct); 875 if (order > 0 && np != end) { 876 /* 877 * remove from list and add again using 878 * new priority 879 */ 880 np->prev->next = np->next; 881 np->next->prev = np->prev; 882 add_resource(status_rec_p, 883 rpp, np); 884 } else { 885 add_rec_list(status_rec_p, 886 &np->status_rec_list); 887 } 888 break; 889 } 890 np = np->next; 891 if (np == end) { 892 np = NULL; 893 break; 894 } 895 } 896 if (np == NULL) { 897 np = malloc(sizeof (resource_list_t)); 898 np->resource = fp->name; 899 np->not_suppressed = status_rec_p->not_suppressed; 900 np->status_rec_list = NULL; 901 np->max_pct = fp->max_pct; 902 add_resource(status_rec_p, rpp, np); 903 } 904 } 905 906 static void 907 add_list(status_record_t *status_rec_p, name_list_t *listp, 908 resource_list_t **glistp) 909 { 910 name_list_t *fp, *end; 911 912 fp = listp; 913 end = fp; 914 while (fp) { 915 add_resource_list(status_rec_p, fp, glistp); 916 fp = fp->next; 917 if (fp == end) 918 break; 919 } 920 } 921 922 /* 923 * add record to rec, fru and asru lists. 924 */ 925 static void 926 catalog_new_record(uurec_t *uurec_p, char *msgid, name_list_t *class, 927 name_list_t *fru, name_list_t *asru, name_list_t *resource, 928 name_list_t *serial, const char *url, boolean_t not_suppressed, 929 hostid_t *hostid) 930 { 931 status_record_t *status_rec_p; 932 933 status_rec_p = new_record_init(uurec_p, msgid, class, fru, asru, 934 resource, serial, url, not_suppressed, hostid); 935 add_rec_list(status_rec_p, &status_rec_list); 936 if (status_rec_p->fru) 937 add_list(status_rec_p, status_rec_p->fru, &status_fru_list); 938 if (status_rec_p->asru) 939 add_list(status_rec_p, status_rec_p->asru, &status_asru_list); 940 } 941 942 /* 943 * add uuid and diagnoses time to an existing record for similar fault on the 944 * same fru 945 */ 946 static void 947 catalog_merge_record(status_record_t *status_rec_p, uurec_t *uurec_p, 948 name_list_t *asru, name_list_t *resource, name_list_t *serial, 949 const char *url, boolean_t not_suppressed) 950 { 951 uurec_t *uurec1_p; 952 953 status_rec_p->nrecs++; 954 /* add uurec in time order */ 955 if (status_rec_p->uurec->sec > uurec_p->sec) { 956 uurec_p->next = status_rec_p->uurec; 957 uurec_p->prev = NULL; 958 status_rec_p->uurec = uurec_p; 959 } else { 960 uurec1_p = status_rec_p->uurec; 961 while (uurec1_p->next && uurec1_p->next->sec <= uurec_p->sec) 962 uurec1_p = uurec1_p->next; 963 if (uurec1_p->next) 964 uurec1_p->next->prev = uurec_p; 965 uurec_p->next = uurec1_p->next; 966 uurec_p->prev = uurec1_p; 967 uurec1_p->next = uurec_p; 968 } 969 if (status_rec_p->url == NULL && url != NULL) 970 status_rec_p->url = strdup(url); 971 status_rec_p->not_suppressed |= not_suppressed; 972 uurec_p->asru = merge_name_list(&status_rec_p->asru, asru, 0); 973 (void) merge_name_list(&status_rec_p->resource, resource, 0); 974 (void) merge_name_list(&status_rec_p->serial, serial, 0); 975 } 976 977 static status_record_t * 978 record_in_catalog(name_list_t *class, name_list_t *fru, 979 char *msgid, hostid_t *host) 980 { 981 sr_list_t *status_rec_p; 982 status_record_t *srp = NULL; 983 984 status_rec_p = status_rec_list; 985 while (status_rec_p) { 986 srp = status_rec_p->status_record; 987 if (host == srp->host && 988 cmp_name_list(class, srp->class) == 0 && 989 cmp_name_list(fru, srp->fru) == 0 && 990 strcmp(msgid, srp->msgid) == 0) 991 break; 992 if (status_rec_p->next == status_rec_list) { 993 srp = NULL; 994 break; 995 } else { 996 status_rec_p = status_rec_p->next; 997 } 998 } 999 return (srp); 1000 } 1001 1002 static void 1003 get_serial_no(nvlist_t *nvl, name_list_t **serial_p, uint8_t pct) 1004 { 1005 char *name; 1006 char *serial = NULL; 1007 char **lserial = NULL; 1008 uint64_t serint; 1009 name_list_t *nlp; 1010 int j; 1011 uint_t nelem; 1012 char buf[64]; 1013 1014 if (nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &name) == 0) { 1015 if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) { 1016 if (nvlist_lookup_uint64(nvl, FM_FMRI_CPU_SERIAL_ID, 1017 &serint) == 0) { 1018 (void) snprintf(buf, sizeof (buf), "%llX", 1019 serint); 1020 nlp = alloc_name_list(buf, pct); 1021 (void) merge_name_list(serial_p, nlp, 1); 1022 } 1023 } else if (strcmp(name, FM_FMRI_SCHEME_MEM) == 0) { 1024 if (nvlist_lookup_string_array(nvl, 1025 FM_FMRI_MEM_SERIAL_ID, &lserial, &nelem) == 0) { 1026 nlp = alloc_name_list(lserial[0], pct); 1027 for (j = 1; j < nelem; j++) { 1028 name_list_t *n1lp; 1029 n1lp = alloc_name_list(lserial[j], pct); 1030 (void) merge_name_list(&nlp, n1lp, 1); 1031 } 1032 (void) merge_name_list(serial_p, nlp, 1); 1033 } 1034 } else if (strcmp(name, FM_FMRI_SCHEME_HC) == 0) { 1035 if (nvlist_lookup_string(nvl, FM_FMRI_HC_SERIAL_ID, 1036 &serial) == 0) { 1037 nlp = alloc_name_list(serial, pct); 1038 (void) merge_name_list(serial_p, nlp, 1); 1039 } 1040 } 1041 } 1042 } 1043 1044 static void 1045 extract_record_info(nvlist_t *nvl, name_list_t **class_p, 1046 name_list_t **fru_p, name_list_t **serial_p, 1047 name_list_t **resource_p, name_list_t **asru_p, uint8_t status) 1048 { 1049 nvlist_t *lfru, *lasru, *rsrc; 1050 name_list_t *nlp; 1051 char *name; 1052 uint8_t lpct = 0; 1053 char *lclass = NULL; 1054 char *label; 1055 1056 (void) nvlist_lookup_uint8(nvl, FM_FAULT_CERTAINTY, &lpct); 1057 if (nvlist_lookup_string(nvl, FM_CLASS, &lclass) == 0) { 1058 nlp = alloc_name_list(lclass, lpct); 1059 (void) merge_name_list(class_p, nlp, 1); 1060 } 1061 if (nvlist_lookup_nvlist(nvl, FM_FAULT_FRU, &lfru) == 0) { 1062 name = get_nvl2str_topo(lfru); 1063 if (name != NULL) { 1064 nlp = alloc_name_list(name, lpct); 1065 nlp->status = status & ~(FM_SUSPECT_UNUSABLE | 1066 FM_SUSPECT_DEGRADED); 1067 free(name); 1068 if (nvlist_lookup_string(nvl, FM_FAULT_LOCATION, 1069 &label) == 0) 1070 nlp->label = strdup(label); 1071 (void) merge_name_list(fru_p, nlp, 1); 1072 } 1073 get_serial_no(lfru, serial_p, lpct); 1074 } 1075 if (nvlist_lookup_nvlist(nvl, FM_FAULT_ASRU, &lasru) == 0) { 1076 name = get_nvl2str_topo(lasru); 1077 if (name != NULL) { 1078 nlp = alloc_name_list(name, lpct); 1079 nlp->status = status & ~(FM_SUSPECT_NOT_PRESENT | 1080 FM_SUSPECT_REPAIRED | FM_SUSPECT_REPLACED | 1081 FM_SUSPECT_ACQUITTED); 1082 free(name); 1083 (void) merge_name_list(asru_p, nlp, 1); 1084 } 1085 get_serial_no(lasru, serial_p, lpct); 1086 } 1087 if (nvlist_lookup_nvlist(nvl, FM_FAULT_RESOURCE, &rsrc) == 0) { 1088 name = get_nvl2str_topo(rsrc); 1089 if (name != NULL) { 1090 nlp = alloc_name_list(name, lpct); 1091 nlp->status = status; 1092 free(name); 1093 (void) merge_name_list(resource_p, nlp, 1); 1094 } 1095 } 1096 } 1097 1098 static void 1099 add_fault_record_to_catalog(nvlist_t *nvl, uint64_t sec, char *uuid, 1100 const char *url) 1101 { 1102 char *msgid = "-"; 1103 uint_t i, size = 0; 1104 name_list_t *class = NULL, *resource = NULL; 1105 name_list_t *asru = NULL, *fru = NULL, *serial = NULL; 1106 nvlist_t **nva; 1107 uint8_t *ba; 1108 status_record_t *status_rec_p; 1109 uurec_t *uurec_p; 1110 hostid_t *host; 1111 boolean_t not_suppressed = 1; 1112 boolean_t any_present = 0; 1113 1114 (void) nvlist_lookup_string(nvl, FM_SUSPECT_DIAG_CODE, &msgid); 1115 (void) nvlist_lookup_uint32(nvl, FM_SUSPECT_FAULT_SZ, &size); 1116 (void) nvlist_lookup_boolean_value(nvl, FM_SUSPECT_MESSAGE, 1117 ¬_suppressed); 1118 1119 if (size != 0) { 1120 (void) nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST, 1121 &nva, &size); 1122 (void) nvlist_lookup_uint8_array(nvl, FM_SUSPECT_FAULT_STATUS, 1123 &ba, &size); 1124 for (i = 0; i < size; i++) { 1125 extract_record_info(nva[i], &class, &fru, &serial, 1126 &resource, &asru, ba[i]); 1127 if (!(ba[i] & FM_SUSPECT_NOT_PRESENT) && 1128 (ba[i] & FM_SUSPECT_FAULTY)) 1129 any_present = 1; 1130 } 1131 /* 1132 * also suppress if no resources present 1133 */ 1134 if (any_present == 0) 1135 not_suppressed = 0; 1136 } 1137 1138 uurec_p = (uurec_t *)malloc(sizeof (uurec_t)); 1139 uurec_p->uuid = strdup(uuid); 1140 uurec_p->sec = sec; 1141 uurec_p->ari_uuid_list = NULL; 1142 host = find_hostid(nvl); 1143 if (not_suppressed && !opt_g) 1144 status_rec_p = NULL; 1145 else 1146 status_rec_p = record_in_catalog(class, fru, msgid, host); 1147 if (status_rec_p) { 1148 catalog_merge_record(status_rec_p, uurec_p, asru, resource, 1149 serial, url, not_suppressed); 1150 free_name_list(class); 1151 free_name_list(fru); 1152 } else { 1153 catalog_new_record(uurec_p, msgid, class, fru, asru, 1154 resource, serial, url, not_suppressed, host); 1155 } 1156 } 1157 1158 static void 1159 update_asru_state_in_catalog(const char *uuid, const char *ari_uuid) 1160 { 1161 sr_list_t *srp; 1162 uurec_t *uurp; 1163 ari_list_t *ari_list; 1164 1165 srp = status_rec_list; 1166 if (srp) { 1167 for (;;) { 1168 uurp = srp->status_record->uurec; 1169 while (uurp) { 1170 if (strcmp(uuid, uurp->uuid) == 0) { 1171 ari_list = (ari_list_t *) 1172 malloc(sizeof (ari_list_t)); 1173 ari_list->ari_uuid = strdup(ari_uuid); 1174 ari_list->next = uurp->ari_uuid_list; 1175 uurp->ari_uuid_list = ari_list; 1176 return; 1177 } 1178 uurp = uurp->next; 1179 } 1180 if (srp->next == status_rec_list) 1181 break; 1182 srp = srp->next; 1183 } 1184 } 1185 } 1186 1187 static void 1188 print_line(char *label, char *buf) 1189 { 1190 char *cp, *ep, *wp; 1191 char c; 1192 int i; 1193 int lsz; 1194 char *padding; 1195 1196 lsz = strlen(label); 1197 padding = malloc(lsz + 1); 1198 for (i = 0; i < lsz; i++) 1199 padding[i] = ' '; 1200 padding[i] = 0; 1201 cp = buf; 1202 ep = buf; 1203 c = *ep; 1204 (void) printf("\n"); 1205 while (c) { 1206 i = lsz; 1207 wp = NULL; 1208 while ((c = *ep) != NULL && (wp == NULL || i < 80)) { 1209 if (c == ' ') 1210 wp = ep; 1211 else if (c == '\n') { 1212 i = 0; 1213 *ep = 0; 1214 do { 1215 ep++; 1216 } while ((c = *ep) != NULL && c == ' '); 1217 break; 1218 } 1219 ep++; 1220 i++; 1221 } 1222 if (i >= 80 && wp) { 1223 *wp = 0; 1224 ep = wp + 1; 1225 c = *ep; 1226 } 1227 (void) printf("%s%s\n", label, cp); 1228 cp = ep; 1229 label = padding; 1230 } 1231 free(padding); 1232 } 1233 1234 static void 1235 print_dict_info(char *msgid, char *url) 1236 { 1237 const char *cp; 1238 char *l_url; 1239 char *buf; 1240 int bufsz; 1241 1242 cp = get_dict_msg(msgid, "description", 0, 1); 1243 if (cp) { 1244 if (url) 1245 l_url = url; 1246 else 1247 l_url = get_dict_url(msgid); 1248 bufsz = strlen(cp) + strlen(l_url) + 1; 1249 buf = malloc(bufsz); 1250 (void) snprintf(buf, bufsz, cp, l_url); 1251 print_line(dgettext("FMD", "Description : "), buf); 1252 free(buf); 1253 if (!url) 1254 free(l_url); 1255 } 1256 cp = get_dict_msg(msgid, "response", 0, 1); 1257 if (cp) { 1258 buf = strdup(cp); 1259 print_line(dgettext("FMD", "Response : "), buf); 1260 free(buf); 1261 } 1262 cp = get_dict_msg(msgid, "impact", 0, 1); 1263 if (cp) { 1264 buf = strdup(cp); 1265 print_line(dgettext("FMD", "Impact : "), buf); 1266 free(buf); 1267 } 1268 cp = get_dict_msg(msgid, "action", 0, 1); 1269 if (cp) { 1270 buf = strdup(cp); 1271 print_line(dgettext("FMD", "Action : "), buf); 1272 free(buf); 1273 } 1274 } 1275 1276 static void 1277 print_name(name_list_t *list, char *(func)(char *), char *padding, int *np, 1278 int pct, int full) 1279 { 1280 char *name, *fru = NULL; 1281 1282 name = list->name; 1283 if (func) 1284 fru = func(list->name); 1285 if (fru) { 1286 (void) printf("%s \"%s\" (%s)", padding, fru, name); 1287 *np += 1; 1288 free(fru); 1289 } else { 1290 (void) printf("%s %s", padding, name); 1291 *np += 1; 1292 } 1293 if (list->pct && pct > 0 && pct < 100) { 1294 if (list->count > 1) { 1295 if (full) { 1296 (void) printf(" %d @ %s %d%%\n", list->count, 1297 dgettext("FMD", "max"), 1298 list->max_pct); 1299 } else { 1300 (void) printf(" %s %d%%\n", 1301 dgettext("FMD", "max"), 1302 list->max_pct); 1303 } 1304 } else { 1305 (void) printf(" %d%%\n", list->pct); 1306 } 1307 } else { 1308 (void) printf("\n"); 1309 } 1310 } 1311 1312 static void 1313 print_asru_status(int status, char *label) 1314 { 1315 char *msg = NULL; 1316 1317 switch (status) { 1318 case 0: 1319 msg = dgettext("FMD", "ok and in service"); 1320 break; 1321 case FM_SUSPECT_DEGRADED: 1322 msg = dgettext("FMD", "service degraded, " 1323 "but associated components no longer faulty"); 1324 break; 1325 case FM_SUSPECT_FAULTY | FM_SUSPECT_DEGRADED: 1326 msg = dgettext("FMD", "faulted but still " 1327 "providing degraded service"); 1328 break; 1329 case FM_SUSPECT_FAULTY: 1330 msg = dgettext("FMD", "faulted but still in service"); 1331 break; 1332 case FM_SUSPECT_UNUSABLE: 1333 msg = dgettext("FMD", "out of service, " 1334 "but associated components no longer faulty"); 1335 break; 1336 case FM_SUSPECT_FAULTY | FM_SUSPECT_UNUSABLE: 1337 msg = dgettext("FMD", "faulted and taken out of service"); 1338 break; 1339 default: 1340 break; 1341 } 1342 if (msg) { 1343 (void) printf("%s %s\n", label, msg); 1344 } 1345 } 1346 1347 static void 1348 print_fru_status(int status, char *label) 1349 { 1350 char *msg = NULL; 1351 1352 if (status & FM_SUSPECT_NOT_PRESENT) 1353 msg = dgettext("FMD", "not present"); 1354 else if (status & FM_SUSPECT_FAULTY) 1355 msg = dgettext("FMD", "faulty"); 1356 else if (status & FM_SUSPECT_REPLACED) 1357 msg = dgettext("FMD", "replaced"); 1358 else if (status & FM_SUSPECT_REPAIRED) 1359 msg = dgettext("FMD", "repair attempted"); 1360 else if (status & FM_SUSPECT_ACQUITTED) 1361 msg = dgettext("FMD", "acquitted"); 1362 else 1363 msg = dgettext("FMD", "removed"); 1364 (void) printf("%s %s\n", label, msg); 1365 } 1366 1367 static void 1368 print_name_list(name_list_t *list, char *label, char *(func)(char *), 1369 int limit, int pct, void (func1)(int, char *), int full) 1370 { 1371 char *name, *fru = NULL; 1372 char *padding; 1373 int i, j, l, n; 1374 name_list_t *end = list; 1375 1376 l = strlen(label); 1377 padding = malloc(l + 1); 1378 for (i = 0; i < l; i++) 1379 padding[i] = ' '; 1380 padding[l] = 0; 1381 (void) printf("%s", label); 1382 name = list->name; 1383 if (func == NULL) 1384 (void) printf(" %s", name); 1385 else if (list->label) 1386 (void) printf(" \"%s\" (%s)", list->label, name); 1387 else { 1388 fru = func(list->name); 1389 if (fru) { 1390 (void) printf(" \"%s\" (%s)", fru, name); 1391 free(fru); 1392 } else 1393 (void) printf(" %s", name); 1394 } 1395 if (list->pct && pct > 0 && pct < 100) { 1396 if (list->count > 1) { 1397 if (full) { 1398 (void) printf(" %d @ %s %d%%\n", list->count, 1399 dgettext("FMD", "max"), list->max_pct); 1400 } else { 1401 (void) printf(" %s %d%%\n", 1402 dgettext("FMD", "max"), list->max_pct); 1403 } 1404 } else { 1405 (void) printf(" %d%%\n", list->pct); 1406 } 1407 } else { 1408 (void) printf("\n"); 1409 } 1410 if (func1) 1411 func1(list->status, padding); 1412 n = 1; 1413 j = 0; 1414 while ((list = list->next) != end) { 1415 if (limit == 0 || n < limit) { 1416 print_name(list, func, padding, &n, pct, full); 1417 if (func1) 1418 func1(list->status, padding); 1419 } else 1420 j++; 1421 } 1422 if (j == 1) { 1423 print_name(list->prev, func, padding, &n, pct, full); 1424 } else if (j > 1) { 1425 (void) printf("%s... %d %s\n", padding, j, 1426 dgettext("FMD", "more entries suppressed," 1427 " use -v option for full list")); 1428 } 1429 free(padding); 1430 } 1431 1432 static int 1433 asru_same_status(name_list_t *list) 1434 { 1435 name_list_t *end = list; 1436 int status = list->status; 1437 1438 while ((list = list->next) != end) { 1439 if (status == -1) { 1440 status = list->status; 1441 continue; 1442 } 1443 if (list->status != -1 && status != list->status) { 1444 status = -1; 1445 break; 1446 } 1447 } 1448 return (status); 1449 } 1450 1451 static int 1452 serial_in_fru(name_list_t *fru, name_list_t *serial) 1453 { 1454 name_list_t *sp = serial; 1455 name_list_t *fp; 1456 int nserial = 0; 1457 int found = 0; 1458 char buf[128]; 1459 1460 while (sp) { 1461 fp = fru; 1462 nserial++; 1463 (void) snprintf(buf, sizeof (buf), "serial=%s", sp->name); 1464 buf[sizeof (buf) - 1] = 0; 1465 while (fp) { 1466 if (strstr(fp->name, buf) != NULL) { 1467 found++; 1468 break; 1469 } 1470 fp = fp->next; 1471 if (fp == fru) 1472 break; 1473 } 1474 sp = sp->next; 1475 if (sp == serial) 1476 break; 1477 } 1478 return (found == nserial ? 1 : 0); 1479 } 1480 1481 static void 1482 print_server_name(hostid_t *host, char *label) 1483 { 1484 (void) printf("%s %s %s %s\n", label, host->server, host->platform, 1485 host->chassis ? host->chassis : ""); 1486 } 1487 1488 static void 1489 print_sup_record(status_record_t *srp, int opt_i, int full) 1490 { 1491 char buf[32]; 1492 uurec_t *uurp = srp->uurec; 1493 int n, j, k, max; 1494 int status; 1495 ari_list_t *ari_list; 1496 1497 n = 0; 1498 max = max_fault; 1499 if (max < 0) { 1500 max = 0; 1501 } 1502 j = max / 2; 1503 max -= j; 1504 k = srp->nrecs - max; 1505 while ((uurp = uurp->next) != NULL) { 1506 if (full || n < j || n >= k || max_fault == 0 || 1507 srp->nrecs == max_fault+1) { 1508 if (opt_i) { 1509 ari_list = uurp->ari_uuid_list; 1510 while (ari_list) { 1511 (void) printf("%-15s %s\n", 1512 format_date(buf, sizeof (buf), 1513 uurp->sec), ari_list->ari_uuid); 1514 ari_list = ari_list->next; 1515 } 1516 } else { 1517 (void) printf("%-15s %s\n", 1518 format_date(buf, sizeof (buf), uurp->sec), 1519 uurp->uuid); 1520 } 1521 } else if (n == j) 1522 (void) printf("... %d %s\n", srp->nrecs - max_fault, 1523 dgettext("FMD", "more entries suppressed")); 1524 n++; 1525 } 1526 (void) printf("\n"); 1527 if (n_server > 1) 1528 print_server_name(srp->host, dgettext("FMD", "Host :")); 1529 if (srp->class) 1530 print_name_list(srp->class, 1531 dgettext("FMD", "Fault class :"), NULL, 0, srp->class->pct, 1532 NULL, full); 1533 if (srp->asru) { 1534 status = asru_same_status(srp->asru); 1535 if (status != -1) { 1536 print_name_list(srp->asru, 1537 dgettext("FMD", "Affects :"), NULL, 1538 full ? 0 : max_display, 0, NULL, full); 1539 print_asru_status(status, " "); 1540 } else 1541 print_name_list(srp->asru, 1542 dgettext("FMD", "Affects :"), NULL, 1543 full ? 0 : max_display, 0, print_asru_status, full); 1544 } 1545 if (full || srp->fru == NULL) { 1546 if (srp->resource) { 1547 print_name_list(srp->resource, 1548 dgettext("FMD", "Problem in :"), 1549 NULL, full ? 0 : max_display, 0, print_fru_status, 1550 full); 1551 } 1552 } 1553 if (srp->fru) { 1554 status = asru_same_status(srp->fru); 1555 if (status != -1) { 1556 print_name_list(srp->fru, dgettext("FMD", 1557 "FRU :"), get_fmri_label, 0, 1558 srp->fru->pct == 100 ? 100 : srp->fru->max_pct, 1559 NULL, full); 1560 print_fru_status(status, " "); 1561 } else 1562 print_name_list(srp->fru, dgettext("FMD", 1563 "FRU :"), get_fmri_label, 0, 1564 srp->fru->pct == 100 ? 100 : srp->fru->max_pct, 1565 print_fru_status, full); 1566 } 1567 if (srp->serial && !serial_in_fru(srp->fru, srp->serial) && 1568 !serial_in_fru(srp->asru, srp->serial)) { 1569 print_name_list(srp->serial, dgettext("FMD", "Serial ID. :"), 1570 NULL, 0, 0, NULL, full); 1571 } 1572 print_dict_info(srp->msgid, srp->url); 1573 (void) printf("\n"); 1574 } 1575 1576 static void 1577 print_status_record(status_record_t *srp, int summary, int opt_i, int full) 1578 { 1579 char buf[32]; 1580 uurec_t *uurp = srp->uurec; 1581 char *severity; 1582 static int header = 0; 1583 char *head; 1584 ari_list_t *ari_list; 1585 1586 if (nlspath) 1587 severity = get_dict_msg(srp->msgid, "severity", 1, 1); 1588 else 1589 severity = srp->severity; 1590 1591 if (!summary || !header) { 1592 if (opt_i) { 1593 head = "--------------- " 1594 "------------------------------------ " 1595 "-------------- ---------\n" 1596 "TIME CACHE-ID" 1597 " MSG-ID" 1598 " SEVERITY\n--------------- " 1599 "------------------------------------ " 1600 " -------------- ---------"; 1601 } else { 1602 head = "--------------- " 1603 "------------------------------------ " 1604 "-------------- ---------\n" 1605 "TIME EVENT-ID" 1606 " MSG-ID" 1607 " SEVERITY\n--------------- " 1608 "------------------------------------ " 1609 " -------------- ---------"; 1610 } 1611 (void) printf("%s\n", dgettext("FMD", head)); 1612 header = 1; 1613 } 1614 if (opt_i) { 1615 ari_list = uurp->ari_uuid_list; 1616 while (ari_list) { 1617 (void) printf("%-15s %-37s %-14s %-9s\n", 1618 format_date(buf, sizeof (buf), uurp->sec), 1619 ari_list->ari_uuid, srp->msgid, severity); 1620 ari_list = ari_list->next; 1621 } 1622 } else { 1623 (void) printf("%-15s %-37s %-14s %-9s\n", 1624 format_date(buf, sizeof (buf), uurp->sec), 1625 uurp->uuid, srp->msgid, severity); 1626 } 1627 1628 if (!summary) 1629 print_sup_record(srp, opt_i, full); 1630 } 1631 1632 static void 1633 print_catalog(int summary, int opt_a, int full, int opt_i, int page_feed) 1634 { 1635 status_record_t *srp; 1636 sr_list_t *slp; 1637 1638 slp = status_rec_list; 1639 if (slp) { 1640 for (;;) { 1641 srp = slp->status_record; 1642 if (opt_a || srp->not_suppressed) { 1643 if (page_feed) 1644 (void) printf("\f\n"); 1645 print_status_record(srp, summary, opt_i, full); 1646 } 1647 if (slp->next == status_rec_list) 1648 break; 1649 slp = slp->next; 1650 } 1651 } 1652 } 1653 1654 static name_list_t * 1655 find_fru(status_record_t *srp, char *resource) 1656 { 1657 name_list_t *rt = NULL; 1658 name_list_t *fru = srp->fru; 1659 1660 while (fru) { 1661 if (strcmp(resource, fru->name) == 0) { 1662 rt = fru; 1663 break; 1664 } 1665 fru = fru->next; 1666 if (fru == srp->fru) 1667 break; 1668 } 1669 return (rt); 1670 } 1671 1672 static void 1673 print_fru_line(name_list_t *fru, char *uuid) 1674 { 1675 if (fru->pct == 100) { 1676 (void) printf("%s %d %s %d%%\n", uuid, fru->count, 1677 dgettext("FMD", "suspects in this FRU total certainty"), 1678 100); 1679 } else { 1680 (void) printf("%s %d %s %d%%\n", uuid, fru->count, 1681 dgettext("FMD", "suspects in this FRU max certainty"), 1682 fru->max_pct); 1683 } 1684 } 1685 1686 static void 1687 print_fru(int summary, int opt_a, int opt_i, int page_feed) 1688 { 1689 resource_list_t *tp = status_fru_list; 1690 status_record_t *srp; 1691 sr_list_t *slp, *end; 1692 char *msgid, *fru_label; 1693 uurec_t *uurp; 1694 name_list_t *fru; 1695 int status; 1696 ari_list_t *ari_list; 1697 1698 while (tp) { 1699 if (opt_a || tp->not_suppressed) { 1700 if (page_feed) 1701 (void) printf("\f\n"); 1702 if (!summary) 1703 (void) printf("-----------------------------" 1704 "---------------------------------------" 1705 "----------\n"); 1706 slp = tp->status_rec_list; 1707 end = slp; 1708 do { 1709 srp = slp->status_record; 1710 fru = find_fru(srp, tp->resource); 1711 if (fru) { 1712 if (fru->label) 1713 (void) printf("\"%s\" (%s) ", 1714 fru->label, fru->name); 1715 else if ((fru_label = get_fmri_label( 1716 fru->name)) != NULL) { 1717 (void) printf("\"%s\" (%s) ", 1718 fru_label, fru->name); 1719 free(fru_label); 1720 } else 1721 (void) printf("%s ", 1722 fru->name); 1723 break; 1724 } 1725 slp = slp->next; 1726 } while (slp != end); 1727 1728 slp = tp->status_rec_list; 1729 end = slp; 1730 status = 0; 1731 do { 1732 srp = slp->status_record; 1733 fru = srp->fru; 1734 while (fru) { 1735 if (strcmp(tp->resource, 1736 fru->name) == 0) 1737 status |= fru->status; 1738 fru = fru->next; 1739 if (fru == srp->fru) 1740 break; 1741 } 1742 slp = slp->next; 1743 } while (slp != end); 1744 if (status & FM_SUSPECT_NOT_PRESENT) 1745 (void) printf(dgettext("FMD", "not present\n")); 1746 else if (status & FM_SUSPECT_FAULTY) 1747 (void) printf(dgettext("FMD", "faulty\n")); 1748 else if (status & FM_SUSPECT_REPLACED) 1749 (void) printf(dgettext("FMD", "replaced\n")); 1750 else if (status & FM_SUSPECT_REPAIRED) 1751 (void) printf(dgettext("FMD", 1752 "repair attempted\n")); 1753 else if (status & FM_SUSPECT_ACQUITTED) 1754 (void) printf(dgettext("FMD", "acquitted\n")); 1755 else 1756 (void) printf(dgettext("FMD", "removed\n")); 1757 1758 slp = tp->status_rec_list; 1759 end = slp; 1760 do { 1761 srp = slp->status_record; 1762 uurp = srp->uurec; 1763 fru = find_fru(srp, tp->resource); 1764 if (fru) { 1765 if (opt_i) { 1766 ari_list = uurp->ari_uuid_list; 1767 while (ari_list) { 1768 print_fru_line(fru, 1769 ari_list->ari_uuid); 1770 ari_list = 1771 ari_list->next; 1772 } 1773 } else { 1774 print_fru_line(fru, uurp->uuid); 1775 } 1776 } 1777 slp = slp->next; 1778 } while (slp != end); 1779 if (!summary) { 1780 slp = tp->status_rec_list; 1781 end = slp; 1782 srp = slp->status_record; 1783 if (srp->serial && 1784 !serial_in_fru(srp->fru, srp->serial)) { 1785 print_name_list(srp->serial, 1786 dgettext("FMD", "Serial ID. :"), 1787 NULL, 0, 0, NULL, 1); 1788 } 1789 msgid = NULL; 1790 do { 1791 if (msgid == NULL || 1792 strcmp(msgid, srp->msgid) != 0) { 1793 msgid = srp->msgid; 1794 print_dict_info(srp->msgid, 1795 srp->url); 1796 } 1797 slp = slp->next; 1798 } while (slp != end); 1799 } 1800 } 1801 tp = tp->next; 1802 if (tp == status_fru_list) 1803 break; 1804 } 1805 } 1806 1807 static void 1808 print_asru(int opt_a) 1809 { 1810 resource_list_t *tp = status_asru_list; 1811 status_record_t *srp; 1812 sr_list_t *slp, *end; 1813 char *msg; 1814 int status; 1815 name_list_t *asru; 1816 1817 while (tp) { 1818 if (opt_a || tp->not_suppressed) { 1819 status = 0; 1820 slp = tp->status_rec_list; 1821 end = slp; 1822 do { 1823 srp = slp->status_record; 1824 asru = srp->asru; 1825 while (asru) { 1826 if (strcmp(tp->resource, 1827 asru->name) == 0) 1828 status |= asru->status; 1829 asru = asru->next; 1830 if (asru == srp->asru) 1831 break; 1832 } 1833 slp = slp->next; 1834 } while (slp != end); 1835 switch (status) { 1836 case 0: 1837 msg = dgettext("FMD", "ok"); 1838 break; 1839 case FM_SUSPECT_DEGRADED: 1840 msg = dgettext("FMD", "degraded"); 1841 break; 1842 case FM_SUSPECT_FAULTY | FM_SUSPECT_DEGRADED: 1843 msg = dgettext("FMD", "degraded"); 1844 break; 1845 case FM_SUSPECT_FAULTY: 1846 msg = dgettext("FMD", "degraded"); 1847 break; 1848 case FM_SUSPECT_UNUSABLE: 1849 msg = dgettext("FMD", "unknown"); 1850 break; 1851 case FM_SUSPECT_FAULTY | FM_SUSPECT_UNUSABLE: 1852 msg = dgettext("FMD", "faulted"); 1853 break; 1854 default: 1855 msg = ""; 1856 break; 1857 } 1858 (void) printf("%-69s %s\n", tp->resource, msg); 1859 } 1860 tp = tp->next; 1861 if (tp == status_asru_list) 1862 break; 1863 } 1864 } 1865 1866 static int 1867 uuid_in_list(char *uuid, uurec_select_t *uurecp) 1868 { 1869 while (uurecp) { 1870 if (strcmp(uuid, uurecp->uuid) == 0) 1871 return (1); 1872 uurecp = uurecp->next; 1873 } 1874 return (0); 1875 } 1876 1877 static int 1878 dfault_rec(const fmd_adm_caseinfo_t *acp, void *arg) 1879 { 1880 int64_t *diag_time; 1881 uint_t nelem; 1882 int rt = 0; 1883 char *uuid = "-"; 1884 uurec_select_t *uurecp = (uurec_select_t *)arg; 1885 1886 if (nvlist_lookup_int64_array(acp->aci_event, FM_SUSPECT_DIAG_TIME, 1887 &diag_time, &nelem) == 0 && nelem >= 2) { 1888 (void) nvlist_lookup_string(acp->aci_event, FM_SUSPECT_UUID, 1889 &uuid); 1890 if (uurecp == NULL || uuid_in_list(uuid, uurecp)) 1891 add_fault_record_to_catalog(acp->aci_event, *diag_time, 1892 uuid, acp->aci_url); 1893 } else { 1894 rt = -1; 1895 } 1896 return (rt); 1897 } 1898 1899 /*ARGSUSED*/ 1900 static int 1901 dstatus_rec(const fmd_adm_rsrcinfo_t *ari, void *unused) 1902 { 1903 update_asru_state_in_catalog(ari->ari_case, ari->ari_uuid); 1904 return (0); 1905 } 1906 1907 static int 1908 get_cases_from_fmd(fmd_adm_t *adm, uurec_select_t *uurecp, int opt_i) 1909 { 1910 int rt = FMADM_EXIT_SUCCESS; 1911 1912 /* 1913 * These calls may fail with Protocol error if message payload is to big 1914 */ 1915 if (fmd_adm_case_iter(adm, NULL, dfault_rec, uurecp) != 0) 1916 die("failed to get case list from fmd"); 1917 if (opt_i && fmd_adm_rsrc_iter(adm, 1, dstatus_rec, NULL) != 0) 1918 die("failed to get case status from fmd"); 1919 return (rt); 1920 } 1921 1922 /* 1923 * fmadm faulty command 1924 * 1925 * -a show hidden fault records 1926 * -f show faulty fru's 1927 * -g force grouping of similar faults on the same fru 1928 * -n number of fault records to display 1929 * -p pipe output through pager 1930 * -r show faulty asru's 1931 * -s print summary of first fault 1932 * -u print listed uuid's only 1933 * -v full output 1934 */ 1935 1936 int 1937 cmd_faulty(fmd_adm_t *adm, int argc, char *argv[]) 1938 { 1939 int opt_a = 0, opt_v = 0, opt_p = 0, opt_s = 0, opt_r = 0, opt_f = 0; 1940 int opt_i = 0; 1941 char *pager; 1942 FILE *fp; 1943 int rt, c, stat; 1944 uurec_select_t *tp; 1945 uurec_select_t *uurecp = NULL; 1946 1947 catalog_setup(); 1948 while ((c = getopt(argc, argv, "afgin:prsu:v")) != EOF) { 1949 switch (c) { 1950 case 'a': 1951 opt_a++; 1952 break; 1953 case 'f': 1954 opt_f++; 1955 break; 1956 case 'g': 1957 opt_g++; 1958 break; 1959 case 'i': 1960 opt_i++; 1961 break; 1962 case 'n': 1963 max_fault = atoi(optarg); 1964 break; 1965 case 'p': 1966 opt_p++; 1967 break; 1968 case 'r': 1969 opt_r++; 1970 break; 1971 case 's': 1972 opt_s++; 1973 break; 1974 case 'u': 1975 tp = (uurec_select_t *)malloc(sizeof (uurec_select_t)); 1976 tp->uuid = optarg; 1977 tp->next = uurecp; 1978 uurecp = tp; 1979 opt_a = 1; 1980 break; 1981 case 'v': 1982 opt_v++; 1983 break; 1984 default: 1985 return (FMADM_EXIT_USAGE); 1986 } 1987 } 1988 if (optind < argc) 1989 return (FMADM_EXIT_USAGE); 1990 1991 rt = get_cases_from_fmd(adm, uurecp, opt_i); 1992 if (opt_p) { 1993 if ((pager = getenv("PAGER")) == NULL) 1994 pager = "/usr/bin/more"; 1995 fp = popen(pager, "w"); 1996 if (fp == NULL) { 1997 rt = FMADM_EXIT_ERROR; 1998 opt_p = 0; 1999 } else { 2000 dup2(fileno(fp), 1); 2001 setbuf(stdout, NULL); 2002 (void) fclose(fp); 2003 } 2004 } 2005 max_display = max_fault; 2006 if (opt_f) 2007 print_fru(opt_s, opt_a, opt_i, opt_p && !opt_s); 2008 if (opt_r) 2009 print_asru(opt_a); 2010 if (opt_f == 0 && opt_r == 0) 2011 print_catalog(opt_s, opt_a, opt_v, opt_i, opt_p && !opt_s); 2012 label_release_topo(); 2013 if (opt_p) { 2014 (void) fclose(stdout); 2015 (void) wait(&stat); 2016 } 2017 return (rt); 2018 } 2019 2020 int 2021 cmd_flush(fmd_adm_t *adm, int argc, char *argv[]) 2022 { 2023 int i, status = FMADM_EXIT_SUCCESS; 2024 2025 if (argc < 2 || (i = getopt(argc, argv, "")) != EOF) 2026 return (FMADM_EXIT_USAGE); 2027 2028 for (i = 1; i < argc; i++) { 2029 if (fmd_adm_rsrc_flush(adm, argv[i]) != 0) { 2030 warn("failed to flush %s", argv[i]); 2031 status = FMADM_EXIT_ERROR; 2032 } else 2033 note("flushed resource history for %s\n", argv[i]); 2034 } 2035 2036 return (status); 2037 } 2038 2039 int 2040 cmd_repair(fmd_adm_t *adm, int argc, char *argv[]) 2041 { 2042 int err; 2043 2044 if (getopt(argc, argv, "") != EOF) 2045 return (FMADM_EXIT_USAGE); 2046 2047 if (argc - optind != 1) 2048 return (FMADM_EXIT_USAGE); 2049 2050 /* 2051 * argument could be a uuid, an fmri (asru, fru or resource) 2052 * or a label. Try uuid first, If that fails try the others. 2053 */ 2054 err = fmd_adm_case_repair(adm, argv[optind]); 2055 if (err != 0) 2056 err = fmd_adm_rsrc_repaired(adm, argv[optind]); 2057 2058 if (err != 0) 2059 die("failed to record repair to %s", argv[optind]); 2060 2061 note("recorded repair to %s\n", argv[optind]); 2062 return (FMADM_EXIT_SUCCESS); 2063 } 2064 2065 int 2066 cmd_repaired(fmd_adm_t *adm, int argc, char *argv[]) 2067 { 2068 int err; 2069 2070 if (getopt(argc, argv, "") != EOF) 2071 return (FMADM_EXIT_USAGE); 2072 2073 if (argc - optind != 1) 2074 return (FMADM_EXIT_USAGE); 2075 2076 /* 2077 * argument could be an fmri (asru, fru or resource) or a label. 2078 */ 2079 err = fmd_adm_rsrc_repaired(adm, argv[optind]); 2080 if (err != 0) 2081 die("failed to record repair to %s", argv[optind]); 2082 2083 note("recorded repair to of %s\n", argv[optind]); 2084 return (FMADM_EXIT_SUCCESS); 2085 } 2086 2087 int 2088 cmd_replaced(fmd_adm_t *adm, int argc, char *argv[]) 2089 { 2090 int err; 2091 2092 if (getopt(argc, argv, "") != EOF) 2093 return (FMADM_EXIT_USAGE); 2094 2095 if (argc - optind != 1) 2096 return (FMADM_EXIT_USAGE); 2097 2098 /* 2099 * argument could be an fmri (asru, fru or resource) or a label. 2100 */ 2101 err = fmd_adm_rsrc_replaced(adm, argv[optind]); 2102 if (err != 0) 2103 die("failed to record replacement of %s", argv[optind]); 2104 2105 note("recorded replacement of %s\n", argv[optind]); 2106 return (FMADM_EXIT_SUCCESS); 2107 } 2108 2109 int 2110 cmd_acquit(fmd_adm_t *adm, int argc, char *argv[]) 2111 { 2112 int err; 2113 2114 if (getopt(argc, argv, "") != EOF) 2115 return (FMADM_EXIT_USAGE); 2116 2117 if (argc - optind != 1 && argc - optind != 2) 2118 return (FMADM_EXIT_USAGE); 2119 2120 /* 2121 * argument could be a uuid, an fmri (asru, fru or resource) 2122 * or a label. Or it could be a uuid and an fmri or label. 2123 */ 2124 if (argc - optind == 2) { 2125 err = fmd_adm_rsrc_acquit(adm, argv[optind], argv[optind + 1]); 2126 if (err != 0) 2127 err = fmd_adm_rsrc_acquit(adm, argv[optind + 1], 2128 argv[optind]); 2129 } else { 2130 err = fmd_adm_case_acquit(adm, argv[optind]); 2131 if (err != 0) 2132 err = fmd_adm_rsrc_acquit(adm, argv[optind], ""); 2133 } 2134 2135 if (err != 0) 2136 die("failed to record acquital of %s", argv[optind]); 2137 2138 note("recorded acquital of %s\n", argv[optind]); 2139 return (FMADM_EXIT_SUCCESS); 2140 } 2141