1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <fmadm.h> 28 #include <errno.h> 29 #include <limits.h> 30 #include <strings.h> 31 #include <stdio.h> 32 #include <unistd.h> 33 #include <sys/wait.h> 34 #include <sys/stat.h> 35 #include <fcntl.h> 36 #include <fm/fmd_log.h> 37 #include <sys/fm/protocol.h> 38 #include <fm/libtopo.h> 39 #include <fm/fmd_adm.h> 40 #include <dlfcn.h> 41 #include <sys/systeminfo.h> 42 #include <sys/utsname.h> 43 #include <libintl.h> 44 #include <locale.h> 45 #include <sys/smbios.h> 46 #include <libdevinfo.h> 47 #include <stdlib.h> 48 49 #define offsetof(s, m) ((size_t)(&(((s*)0)->m))) 50 51 /* 52 * catalog_setup() must be called to setup support functions. 53 * Fault records are added to catalog by calling add_fault_record_to_catalog() 54 * records are stored in order of importance to the system. 55 * If -g flag is set or not_suppressed is not set and the class fru, fault, 56 * type are the same then details are merged into an existing record, with uuid 57 * records are stored in time order. 58 * For each record information is extracted from nvlist and merged into linked 59 * list each is checked for identical records for which percentage certainty are 60 * added together. 61 * print_catalog() is called to print out catalog and release external resources 62 * 63 * /---------------\ 64 * status_rec_list -> | | -| 65 * \---------------/ 66 * \/ 67 * /---------------\ /-------\ /-------\ 68 * status_fru_list | status_record | -> | uurec | -> | uurec | -| 69 * \/ | | |- | | <- | | 70 * /-------------\ | | \-------/ \-------/ 71 * | | -> | | \/ \/ 72 * \-------------/ | | /-------\ /-------\ 73 * \/ | | -> | asru | -> | asru | 74 * --- | | | | <- | | 75 * | | \-------/ \-------/ 76 * status_asru_list | class | 77 * \/ | resource | /-------\ /-------\ 78 * /-------------\ | fru | -> | list | -> | list | 79 * | | -> | serial | | | <- | | 80 * \-------------/ | | \-------/ \-------/ 81 * \/ \---------------/ 82 * --- \/ /\ 83 * /---------------\ 84 * | status_record | 85 * \---------------/ 86 * 87 * Fmadm faulty takes a number of options which affect the format of the 88 * output displayed. By default, the display reports the FRU and ASRU along 89 * with other information on per-case basis as in the example below. 90 * 91 * --------------- ------------------------------------ -------------- ------- 92 * TIME EVENT-ID MSG-ID SEVERITY 93 * --------------- ------------------------------------ -------------- ------- 94 * Sep 21 10:01:36 d482f935-5c8f-e9ab-9f25-d0aaafec1e6c AMD-8000-2F Major 95 * 96 * Fault class : fault.memory.dimm_sb 97 * Affects : mem:///motherboard=0/chip=0/memory-controller=0/dimm=0/rank=0 98 * faulted but still in service 99 * FRU : "CPU 0 DIMM 0" (hc://.../memory-controller=0/dimm=0) 100 * faulty 101 * 102 * Description : The number of errors associated with this memory module has 103 * exceeded acceptable levels. Refer to 104 * http://sun.com/msg/AMD-8000-2F for more information. 105 * 106 * Response : Pages of memory associated with this memory module are being 107 * removed from service as errors are reported. 108 * 109 * Impact : Total system memory capacity will be reduced as pages are 110 * retired. 111 * 112 * Action : Schedule a repair procedure to replace the affected memory 113 * module. Use fmdump -v -u <EVENT_ID> to identify the module. 114 * 115 * The -v flag is similar, but adds some additonal information such as the 116 * resource. The -s flag is also similar but just gives the top line summary. 117 * All these options (ie without the -f or -r flags) use the print_catalog() 118 * function to do the display. 119 * 120 * The -f flag changes the output so that it appears sorted on a per-fru basis. 121 * The output is somewhat cut down compared to the default output. If -f is 122 * used, then print_fru() is used to print the output. 123 * 124 * ----------------------------------------------------------------------------- 125 * "SLOT 2" (hc://.../hostbridge=3/pciexrc=3/pciexbus=4/pciexdev=0) faulty 126 * 5ca4aeb3-36...f6be-c2e8166dc484 2 suspects in this FRU total certainty 100% 127 * 128 * Description : A problem was detected for a PCI device. 129 * Refer to http://sun.com/msg/PCI-8000-7J for more information. 130 * 131 * Response : One or more device instances may be disabled 132 * 133 * Impact : Possible loss of services provided by the device instances 134 * associated with this fault 135 * 136 * Action : Schedule a repair procedure to replace the affected device. 137 * Use fmdump -v -u <EVENT_ID> to identify the device or contact 138 * Sun for support. 139 * 140 * The -r flag changes the output so that it appears sorted on a per-asru basis. 141 * The output is very much cut down compared to the default output, just giving 142 * the asru fmri and state. Here print_asru() is used to print the output. 143 * 144 * mem:///motherboard=0/chip=0/memory-controller=0/dimm=0/rank=0 degraded 145 * 146 * For all fmadm faulty options, the sequence of events is 147 * 148 * 1) Walk through all the cases in the system using fmd_adm_case_iter() and 149 * for each case call dfault_rec(). This will call add_fault_record_to_catalog() 150 * This will extract the data from the nvlist and call catalog_new_record() to 151 * save the data away in various linked lists in the catalogue. 152 * 153 * 2) Once this is done, the data can be supplemented by using 154 * fmd_adm_rsrc_iter(). However this is now only necessary for the -i option. 155 * 156 * 3) Finally print_catalog(), print_fru() or print_asru() are called as 157 * appropriate to display the information from the catalogue sorted in the 158 * requested way. 159 * 160 */ 161 162 typedef struct name_list { 163 struct name_list *next; 164 struct name_list *prev; 165 char *name; 166 uint8_t pct; 167 uint8_t max_pct; 168 ushort_t count; 169 int status; 170 char *label; 171 } name_list_t; 172 173 typedef struct ari_list { 174 char *ari_uuid; 175 struct ari_list *next; 176 } ari_list_t; 177 178 typedef struct uurec { 179 struct uurec *next; 180 struct uurec *prev; 181 char *uuid; 182 ari_list_t *ari_uuid_list; 183 name_list_t *asru; 184 uint64_t sec; 185 } uurec_t; 186 187 typedef struct uurec_select { 188 struct uurec_select *next; 189 char *uuid; 190 } uurec_select_t; 191 192 typedef struct host_id { 193 char *chassis; 194 char *server; 195 char *platform; 196 char *domain; 197 } hostid_t; 198 199 typedef struct host_id_list { 200 hostid_t hostid; 201 struct host_id_list *next; 202 } host_id_list_t; 203 204 typedef struct status_record { 205 hostid_t *host; 206 int nrecs; 207 uurec_t *uurec; 208 char *severity; /* in C locale */ 209 char *msgid; 210 name_list_t *class; 211 name_list_t *resource; 212 name_list_t *asru; 213 name_list_t *fru; 214 name_list_t *serial; 215 char *url; 216 uint8_t not_suppressed; 217 } status_record_t; 218 219 typedef struct sr_list { 220 struct sr_list *next; 221 struct sr_list *prev; 222 struct status_record *status_record; 223 } sr_list_t; 224 225 typedef struct resource_list { 226 struct resource_list *next; 227 struct resource_list *prev; 228 sr_list_t *status_rec_list; 229 char *resource; 230 uint8_t not_suppressed; 231 uint8_t max_pct; 232 } resource_list_t; 233 234 typedef struct tgetlabel_data { 235 char *label; 236 char *fru; 237 } tgetlabel_data_t; 238 239 sr_list_t *status_rec_list; 240 resource_list_t *status_fru_list; 241 resource_list_t *status_asru_list; 242 243 static char *locale; 244 static char *nlspath; 245 static int max_display; 246 static int max_fault = 0; 247 static topo_hdl_t *topo_handle; 248 static char *topo_handle_uuid; 249 static host_id_list_t *host_list; 250 static int n_server; 251 static int opt_g; 252 253 static char * 254 format_date(char *buf, size_t len, uint64_t sec) 255 { 256 if (sec > LONG_MAX) { 257 (void) fprintf(stderr, 258 "record time is too large for 32-bit utility\n"); 259 (void) snprintf(buf, len, "0x%llx", sec); 260 } else { 261 time_t tod = (time_t)sec; 262 (void) strftime(buf, len, "%b %d %T", localtime(&tod)); 263 } 264 265 return (buf); 266 } 267 268 static hostid_t * 269 find_hostid_in_list(char *platform, char *chassis, char *server, char *domain) 270 { 271 hostid_t *rt = NULL; 272 host_id_list_t *hostp; 273 274 if (platform == NULL) 275 platform = "-"; 276 if (server == NULL) 277 server = "-"; 278 hostp = host_list; 279 while (hostp) { 280 if (hostp->hostid.platform && 281 strcmp(hostp->hostid.platform, platform) == 0 && 282 hostp->hostid.server && 283 strcmp(hostp->hostid.server, server) == 0 && 284 (chassis == NULL || hostp->hostid.chassis == NULL || 285 strcmp(chassis, hostp->hostid.chassis) == 0) && 286 (domain == NULL || hostp->hostid.domain == NULL || 287 strcmp(domain, hostp->hostid.domain) == 0)) { 288 rt = &hostp->hostid; 289 break; 290 } 291 hostp = hostp->next; 292 } 293 if (rt == NULL) { 294 hostp = malloc(sizeof (host_id_list_t)); 295 hostp->hostid.platform = strdup(platform); 296 hostp->hostid.server = strdup(server); 297 hostp->hostid.chassis = chassis ? strdup(chassis) : NULL; 298 hostp->hostid.domain = domain ? strdup(domain) : NULL; 299 hostp->next = host_list; 300 host_list = hostp; 301 rt = &hostp->hostid; 302 n_server++; 303 } 304 return (rt); 305 } 306 307 static hostid_t * 308 find_hostid(nvlist_t *nvl) 309 { 310 char *platform = NULL, *chassis = NULL, *server = NULL, *domain = NULL; 311 nvlist_t *auth, *fmri; 312 hostid_t *rt = NULL; 313 314 if (nvlist_lookup_nvlist(nvl, FM_SUSPECT_DE, &fmri) == 0 && 315 nvlist_lookup_nvlist(fmri, FM_FMRI_AUTHORITY, &auth) == 0) { 316 (void) nvlist_lookup_string(auth, FM_FMRI_AUTH_PRODUCT, 317 &platform); 318 (void) nvlist_lookup_string(auth, FM_FMRI_AUTH_SERVER, &server); 319 (void) nvlist_lookup_string(auth, FM_FMRI_AUTH_CHASSIS, 320 &chassis); 321 (void) nvlist_lookup_string(auth, FM_FMRI_AUTH_DOMAIN, &domain); 322 rt = find_hostid_in_list(platform, chassis, server, domain); 323 } 324 return (rt); 325 } 326 327 static void 328 catalog_setup(void) 329 { 330 char *tp; 331 int pl; 332 333 /* 334 * All FMA event dictionaries use msgfmt(1) message objects to produce 335 * messages, even for the C locale. We therefore want to use dgettext 336 * for all message lookups, but its defined behavior in the C locale is 337 * to return the input string. Since our input strings are event codes 338 * and not format strings, this doesn't help us. We resolve this nit 339 * by setting NLSPATH to a non-existent file: the presence of NLSPATH 340 * is defined to force dgettext(3C) to do a full lookup even for C. 341 */ 342 nlspath = getenv("NLSPATH"); 343 if (nlspath == NULL) 344 putenv("NLSPATH=/usr/lib/fm/fmd/fmd.cat"); 345 else { 346 pl = strlen(nlspath) + sizeof ("NLSPATH=") + 1; 347 tp = malloc(pl); 348 (void) snprintf(tp, pl, "NLSPATH=%s", nlspath); 349 nlspath = tp; 350 } 351 352 locale = setlocale(LC_MESSAGES, ""); 353 } 354 355 static char * 356 get_dict_url(char *id) 357 { 358 char *url = "http://sun.com/msg/"; 359 int msz = sizeof (url) + strlen(id) + 1; 360 char *cp; 361 362 cp = malloc(msz); 363 (void) snprintf(cp, msz, "%s%s", url, id); 364 return (cp); 365 } 366 367 static char * 368 get_dict_msg(char *id, char *idx, int unknown, int translate) 369 { 370 char mbuf[128]; 371 char *msg; 372 char dbuf[32]; 373 char *p; 374 int restore_env = 0; 375 int restore_locale = 0; 376 377 p = strchr(id, '-'); 378 if (p == NULL || p == id || (p - id) >= 32) { 379 msg = mbuf; 380 } else { 381 strncpy(dbuf, id, (size_t)(p - id)); 382 dbuf[(size_t)(p - id)] = 0; 383 384 (void) snprintf(mbuf, sizeof (mbuf), "%s.%s", id, idx); 385 if (translate == 0 || nlspath == NULL) { 386 (void) setlocale(LC_MESSAGES, "C"); 387 restore_locale = 1; 388 } 389 bindtextdomain("FMD", "/usr/lib/locale"); 390 msg = dgettext(dbuf, mbuf); 391 if (msg == mbuf) { 392 (void) setlocale(LC_MESSAGES, "C"); 393 restore_locale = 1; 394 msg = dgettext(dbuf, mbuf); 395 } 396 if (msg == mbuf) { 397 putenv("NLSPATH=/usr/lib/fm/fmd/fmd.cat"); 398 restore_env = 1; 399 (void) setlocale(LC_MESSAGES, "C"); 400 msg = dgettext(dbuf, mbuf); 401 } 402 if (restore_locale) 403 (void) setlocale(LC_MESSAGES, locale); 404 if (restore_env && nlspath) 405 putenv(nlspath); 406 } 407 if (msg == mbuf) { 408 if (unknown) 409 msg = "unknown"; 410 else 411 msg = NULL; 412 } 413 return (msg); 414 } 415 416 /* 417 * compare two fru strings which are made up of substrings seperated by '/' 418 * return true if every substring is the same in the two strings, or if a 419 * substring is null in one. 420 */ 421 422 static int 423 frucmp(char *f1, char *f2) 424 { 425 char c1, c2; 426 int i = 0; 427 428 for (;;) { 429 c1 = *f1; 430 c2 = *f2; 431 if (c1 == c2) { 432 i = (c1 == '/') ? 0 : i + 1; 433 } else if (i == 0) { 434 if (c1 == '/') { 435 do { 436 f2++; 437 } while ((c2 = *f2) != 0 && c2 != '/'); 438 if (c2 == NULL) 439 break; 440 } else if (c2 == '/') { 441 do { 442 f1++; 443 } while ((c1 = *f1) != 0 && c1 != '/'); 444 if (c1 == NULL) 445 break; 446 } else 447 break; 448 } else 449 break; 450 if (c1 == NULL) 451 return (0); 452 f1++; 453 f2++; 454 } 455 return (1); 456 } 457 458 static int 459 tgetlabel(topo_hdl_t *thp, tnode_t *node, void *arg) 460 { 461 int err; 462 char *fru_name, *lname; 463 nvlist_t *fru = NULL; 464 int rt = TOPO_WALK_NEXT; 465 tgetlabel_data_t *tdp = (tgetlabel_data_t *)arg; 466 467 if (topo_node_fru(node, &fru, NULL, &err) == 0) { 468 if (topo_fmri_nvl2str(thp, fru, &fru_name, &err) == 0) { 469 if (frucmp(tdp->fru, fru_name) == 0 && 470 topo_node_label(node, &lname, &err) == 0) { 471 tdp->label = strdup(lname); 472 topo_hdl_strfree(thp, lname); 473 rt = TOPO_WALK_TERMINATE; 474 } 475 topo_hdl_strfree(thp, fru_name); 476 } 477 nvlist_free(fru); 478 } 479 return (rt); 480 } 481 482 static void 483 label_get_topo(void) 484 { 485 int err; 486 487 topo_handle = topo_open(TOPO_VERSION, 0, &err); 488 if (topo_handle) { 489 topo_handle_uuid = topo_snap_hold(topo_handle, NULL, &err); 490 } 491 } 492 493 static void 494 label_release_topo(void) 495 { 496 if (topo_handle_uuid) 497 topo_hdl_strfree(topo_handle, topo_handle_uuid); 498 if (topo_handle) { 499 topo_snap_release(topo_handle); 500 topo_close(topo_handle); 501 } 502 } 503 504 static char * 505 get_fmri_label(char *fru) 506 { 507 topo_walk_t *twp; 508 tgetlabel_data_t td; 509 int err; 510 511 td.label = NULL; 512 td.fru = fru; 513 if (topo_handle == NULL) 514 label_get_topo(); 515 if (topo_handle_uuid) { 516 twp = topo_walk_init(topo_handle, FM_FMRI_SCHEME_HC, 517 tgetlabel, &td, &err); 518 if (twp) { 519 topo_walk_step(twp, TOPO_WALK_CHILD); 520 topo_walk_fini(twp); 521 } 522 } 523 return (td.label); 524 } 525 526 static char * 527 get_nvl2str_topo(nvlist_t *nvl) 528 { 529 char *name = NULL; 530 char *tname; 531 int err; 532 char *scheme = NULL; 533 char *mod_name = NULL; 534 char buf[128]; 535 536 if (topo_handle == NULL) 537 label_get_topo(); 538 if (topo_fmri_nvl2str(topo_handle, nvl, &tname, &err) == 0) { 539 name = strdup(tname); 540 topo_hdl_strfree(topo_handle, tname); 541 } else { 542 (void) nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &scheme); 543 (void) nvlist_lookup_string(nvl, FM_FMRI_MOD_NAME, &mod_name); 544 if (scheme && strcmp(scheme, FM_FMRI_SCHEME_FMD) == 0 && 545 mod_name) { 546 (void) snprintf(buf, sizeof (buf), "%s:///module/%s", 547 scheme, mod_name); 548 name = strdup(buf); 549 } 550 } 551 return (name); 552 } 553 554 static int 555 set_priority(char *s) 556 { 557 int rt = 0; 558 559 if (s) { 560 if (strcmp(s, "Minor") == 0) 561 rt = 1; 562 else if (strcmp(s, "Major") == 0) 563 rt = 10; 564 else if (strcmp(s, "Critical") == 0) 565 rt = 100; 566 } 567 return (rt); 568 } 569 570 static int 571 cmp_priority(char *s1, char *s2, uint64_t t1, uint64_t t2, uint8_t p1, 572 uint8_t p2) 573 { 574 int r1, r2; 575 int rt; 576 577 r1 = set_priority(s1); 578 r2 = set_priority(s2); 579 rt = r1 - r2; 580 if (rt == 0) { 581 if (t1 > t2) 582 rt = 1; 583 else if (t1 < t2) 584 rt = -1; 585 else 586 rt = p1 - p2; 587 } 588 return (rt); 589 } 590 591 /* 592 * merge two lists into one, by comparing enties in new and moving into list if 593 * name is not there or free off memory for names which are already there 594 * add_pct indicates if pct is the sum or highest pct 595 */ 596 static name_list_t * 597 merge_name_list(name_list_t **list, name_list_t *new, int add_pct) 598 { 599 name_list_t *lp, *np, *sp, *rt = NULL; 600 int max_pct; 601 602 rt = *list; 603 np = new; 604 while (np) { 605 lp = *list; 606 while (lp) { 607 if (strcmp(lp->name, np->name) == 0) 608 break; 609 lp = lp->next; 610 if (lp == *list) 611 lp = NULL; 612 } 613 if (np->next == new) 614 sp = NULL; 615 else 616 sp = np->next; 617 if (lp) { 618 lp->status |= (np->status & FM_SUSPECT_FAULTY); 619 if (add_pct) { 620 lp->pct += np->pct; 621 lp->count += np->count; 622 } else if (np->pct > lp->pct) { 623 lp->pct = np->pct; 624 } 625 max_pct = np->max_pct; 626 if (np->label) 627 free(np->label); 628 free(np->name); 629 free(np); 630 np = NULL; 631 if (max_pct > lp->max_pct) { 632 lp->max_pct = max_pct; 633 if (lp->max_pct > lp->prev->max_pct && 634 lp != *list) { 635 lp->prev->next = lp->next; 636 lp->next->prev = lp->prev; 637 np = lp; 638 } 639 } 640 } 641 if (np) { 642 lp = *list; 643 if (lp) { 644 if (np->max_pct > lp->max_pct) { 645 np->next = lp; 646 np->prev = lp->prev; 647 lp->prev->next = np; 648 lp->prev = np; 649 *list = np; 650 rt = np; 651 } else { 652 lp = lp->next; 653 while (lp != *list && 654 np->max_pct < lp->max_pct) { 655 lp = lp->next; 656 } 657 np->next = lp; 658 np->prev = lp->prev; 659 lp->prev->next = np; 660 lp->prev = np; 661 } 662 } else { 663 *list = np; 664 np->next = np; 665 np->prev = np; 666 rt = np; 667 } 668 } 669 np = sp; 670 } 671 return (rt); 672 } 673 674 /* 675 * compare entries in two lists return true if the two lists have identical 676 * content. The two lists may not have entries in the same order, so we compare 677 * the size of the list as well as trying to find every entry from one list in 678 * the other. 679 */ 680 static int 681 cmp_name_list(name_list_t *lxp1, name_list_t *lxp2) 682 { 683 name_list_t *lp1, *lp2; 684 int l1 = 0, l2 = 0, common = 0; 685 686 lp2 = lxp2; 687 while (lp2) { 688 l2++; 689 lp2 = lp2->next; 690 if (lp2 == lxp2) 691 break; 692 } 693 lp1 = lxp1; 694 while (lp1) { 695 l1++; 696 lp2 = lxp2; 697 while (lp2) { 698 if (strcmp(lp2->name, lp1->name) == 0) { 699 common++; 700 break; 701 } 702 lp2 = lp2->next; 703 if (lp2 == lxp2) 704 break; 705 } 706 lp1 = lp1->next; 707 if (lp1 == lxp1) 708 break; 709 } 710 if (l1 == l2 && l2 == common) 711 return (0); 712 else 713 return (1); 714 } 715 716 static name_list_t * 717 alloc_name_list(char *name, uint8_t pct) 718 { 719 name_list_t *nlp; 720 721 nlp = malloc(sizeof (*nlp)); 722 nlp->name = strdup(name); 723 nlp->pct = pct; 724 nlp->max_pct = pct; 725 nlp->count = 1; 726 nlp->next = nlp; 727 nlp->prev = nlp; 728 nlp->status = 0; 729 nlp->label = NULL; 730 return (nlp); 731 } 732 733 static void 734 free_name_list(name_list_t *list) 735 { 736 name_list_t *next = list; 737 name_list_t *lp; 738 739 if (list) { 740 do { 741 lp = next; 742 next = lp->next; 743 if (lp->label) 744 free(lp->label); 745 free(lp->name); 746 free(lp); 747 } while (next != list); 748 } 749 } 750 751 static status_record_t * 752 new_record_init(uurec_t *uurec_p, char *msgid, name_list_t *class, 753 name_list_t *fru, name_list_t *asru, name_list_t *resource, 754 name_list_t *serial, const char *url, boolean_t not_suppressed, 755 hostid_t *hostid) 756 { 757 status_record_t *status_rec_p; 758 759 status_rec_p = (status_record_t *)malloc(sizeof (status_record_t)); 760 status_rec_p->nrecs = 1; 761 status_rec_p->host = hostid; 762 status_rec_p->uurec = uurec_p; 763 uurec_p->next = NULL; 764 uurec_p->prev = NULL; 765 uurec_p->asru = asru; 766 status_rec_p->severity = get_dict_msg(msgid, "severity", 1, 0); 767 status_rec_p->class = class; 768 status_rec_p->fru = fru; 769 status_rec_p->asru = asru; 770 status_rec_p->resource = resource; 771 status_rec_p->serial = serial; 772 status_rec_p->url = url ? strdup(url) : NULL; 773 status_rec_p->msgid = strdup(msgid); 774 status_rec_p->not_suppressed = not_suppressed; 775 return (status_rec_p); 776 } 777 778 /* 779 * add record to given list maintaining order higher priority first. 780 */ 781 static void 782 add_rec_list(status_record_t *status_rec_p, sr_list_t **list_pp) 783 { 784 sr_list_t *tp, *np, *sp; 785 int order; 786 uint64_t sec; 787 788 np = malloc(sizeof (sr_list_t)); 789 np->status_record = status_rec_p; 790 sec = status_rec_p->uurec->sec; 791 if ((sp = *list_pp) == NULL) { 792 *list_pp = np; 793 np->next = np; 794 np->prev = np; 795 } else { 796 /* insert new record in front of lower priority */ 797 tp = sp; 798 order = cmp_priority(status_rec_p->severity, 799 sp->status_record->severity, sec, 800 tp->status_record->uurec->sec, 0, 0); 801 if (order > 0) { 802 *list_pp = np; 803 } else { 804 tp = sp->next; 805 while (tp != sp && 806 cmp_priority(status_rec_p->severity, 807 tp->status_record->severity, sec, 808 tp->status_record->uurec->sec, 0, 0)) { 809 tp = tp->next; 810 } 811 } 812 np->next = tp; 813 np->prev = tp->prev; 814 tp->prev->next = np; 815 tp->prev = np; 816 } 817 } 818 819 static void 820 add_resource(status_record_t *status_rec_p, resource_list_t **rp, 821 resource_list_t *np) 822 { 823 int order; 824 uint64_t sec; 825 resource_list_t *sp, *tp; 826 status_record_t *srp; 827 char *severity = status_rec_p->severity; 828 829 add_rec_list(status_rec_p, &np->status_rec_list); 830 if ((sp = *rp) == NULL) { 831 np->next = np; 832 np->prev = np; 833 *rp = np; 834 } else { 835 /* 836 * insert new record in front of lower priority 837 */ 838 tp = sp->next; 839 srp = sp->status_rec_list->status_record; 840 sec = status_rec_p->uurec->sec; 841 order = cmp_priority(severity, srp->severity, sec, 842 srp->uurec->sec, np->max_pct, sp->max_pct); 843 if (order > 0) { 844 *rp = np; 845 } else { 846 srp = tp->status_rec_list->status_record; 847 while (tp != sp && 848 cmp_priority(severity, srp->severity, sec, 849 srp->uurec->sec, np->max_pct, sp->max_pct) < 0) { 850 tp = tp->next; 851 srp = tp->status_rec_list->status_record; 852 } 853 } 854 np->next = tp; 855 np->prev = tp->prev; 856 tp->prev->next = np; 857 tp->prev = np; 858 } 859 } 860 861 static void 862 add_resource_list(status_record_t *status_rec_p, name_list_t *fp, 863 resource_list_t **rpp) 864 { 865 int order; 866 resource_list_t *np, *end; 867 status_record_t *srp; 868 869 np = *rpp; 870 end = np; 871 while (np) { 872 if (strcmp(fp->name, np->resource) == 0) { 873 np->not_suppressed |= status_rec_p->not_suppressed; 874 srp = np->status_rec_list->status_record; 875 order = cmp_priority(status_rec_p->severity, 876 srp->severity, status_rec_p->uurec->sec, 877 srp->uurec->sec, fp->max_pct, np->max_pct); 878 if (order > 0 && np != end) { 879 /* 880 * remove from list and add again using 881 * new priority 882 */ 883 np->prev->next = np->next; 884 np->next->prev = np->prev; 885 add_resource(status_rec_p, 886 rpp, np); 887 } else { 888 add_rec_list(status_rec_p, 889 &np->status_rec_list); 890 } 891 break; 892 } 893 np = np->next; 894 if (np == end) { 895 np = NULL; 896 break; 897 } 898 } 899 if (np == NULL) { 900 np = malloc(sizeof (resource_list_t)); 901 np->resource = fp->name; 902 np->not_suppressed = status_rec_p->not_suppressed; 903 np->status_rec_list = NULL; 904 np->max_pct = fp->max_pct; 905 add_resource(status_rec_p, rpp, np); 906 } 907 } 908 909 static void 910 add_list(status_record_t *status_rec_p, name_list_t *listp, 911 resource_list_t **glistp) 912 { 913 name_list_t *fp, *end; 914 915 fp = listp; 916 end = fp; 917 while (fp) { 918 add_resource_list(status_rec_p, fp, glistp); 919 fp = fp->next; 920 if (fp == end) 921 break; 922 } 923 } 924 925 /* 926 * add record to rec, fru and asru lists. 927 */ 928 static void 929 catalog_new_record(uurec_t *uurec_p, char *msgid, name_list_t *class, 930 name_list_t *fru, name_list_t *asru, name_list_t *resource, 931 name_list_t *serial, const char *url, boolean_t not_suppressed, 932 hostid_t *hostid) 933 { 934 status_record_t *status_rec_p; 935 936 status_rec_p = new_record_init(uurec_p, msgid, class, fru, asru, 937 resource, serial, url, not_suppressed, hostid); 938 add_rec_list(status_rec_p, &status_rec_list); 939 if (status_rec_p->fru) 940 add_list(status_rec_p, status_rec_p->fru, &status_fru_list); 941 if (status_rec_p->asru) 942 add_list(status_rec_p, status_rec_p->asru, &status_asru_list); 943 } 944 945 /* 946 * add uuid and diagnoses time to an existing record for similar fault on the 947 * same fru 948 */ 949 static void 950 catalog_merge_record(status_record_t *status_rec_p, uurec_t *uurec_p, 951 name_list_t *asru, name_list_t *resource, name_list_t *serial, 952 const char *url, boolean_t not_suppressed) 953 { 954 uurec_t *uurec1_p; 955 956 status_rec_p->nrecs++; 957 /* add uurec in time order */ 958 if (status_rec_p->uurec->sec > uurec_p->sec) { 959 uurec_p->next = status_rec_p->uurec; 960 uurec_p->prev = NULL; 961 status_rec_p->uurec = uurec_p; 962 } else { 963 uurec1_p = status_rec_p->uurec; 964 while (uurec1_p->next && uurec1_p->next->sec <= uurec_p->sec) 965 uurec1_p = uurec1_p->next; 966 if (uurec1_p->next) 967 uurec1_p->next->prev = uurec_p; 968 uurec_p->next = uurec1_p->next; 969 uurec_p->prev = uurec1_p; 970 uurec1_p->next = uurec_p; 971 } 972 if (status_rec_p->url == NULL && url != NULL) 973 status_rec_p->url = strdup(url); 974 status_rec_p->not_suppressed |= not_suppressed; 975 uurec_p->asru = merge_name_list(&status_rec_p->asru, asru, 0); 976 (void) merge_name_list(&status_rec_p->resource, resource, 0); 977 (void) merge_name_list(&status_rec_p->serial, serial, 0); 978 } 979 980 static status_record_t * 981 record_in_catalog(name_list_t *class, name_list_t *fru, 982 char *msgid, hostid_t *host) 983 { 984 sr_list_t *status_rec_p; 985 status_record_t *srp = NULL; 986 987 status_rec_p = status_rec_list; 988 while (status_rec_p) { 989 srp = status_rec_p->status_record; 990 if (host == srp->host && 991 cmp_name_list(class, srp->class) == 0 && 992 cmp_name_list(fru, srp->fru) == 0 && 993 strcmp(msgid, srp->msgid) == 0) 994 break; 995 if (status_rec_p->next == status_rec_list) { 996 srp = NULL; 997 break; 998 } else { 999 status_rec_p = status_rec_p->next; 1000 } 1001 } 1002 return (srp); 1003 } 1004 1005 static void 1006 get_serial_no(nvlist_t *nvl, name_list_t **serial_p, uint8_t pct) 1007 { 1008 char *name; 1009 char *serial = NULL; 1010 char **lserial = NULL; 1011 uint64_t serint; 1012 name_list_t *nlp; 1013 int j; 1014 uint_t nelem; 1015 char buf[64]; 1016 1017 if (nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &name) == 0) { 1018 if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) { 1019 if (nvlist_lookup_uint64(nvl, FM_FMRI_CPU_SERIAL_ID, 1020 &serint) == 0) { 1021 (void) snprintf(buf, sizeof (buf), "%llX", 1022 serint); 1023 nlp = alloc_name_list(buf, pct); 1024 (void) merge_name_list(serial_p, nlp, 1); 1025 } 1026 } else if (strcmp(name, FM_FMRI_SCHEME_MEM) == 0) { 1027 if (nvlist_lookup_string_array(nvl, 1028 FM_FMRI_MEM_SERIAL_ID, &lserial, &nelem) == 0) { 1029 nlp = alloc_name_list(lserial[0], pct); 1030 for (j = 1; j < nelem; j++) { 1031 name_list_t *n1lp; 1032 n1lp = alloc_name_list(lserial[j], pct); 1033 (void) merge_name_list(&nlp, n1lp, 1); 1034 } 1035 (void) merge_name_list(serial_p, nlp, 1); 1036 } 1037 } else if (strcmp(name, FM_FMRI_SCHEME_HC) == 0) { 1038 if (nvlist_lookup_string(nvl, FM_FMRI_HC_SERIAL_ID, 1039 &serial) == 0) { 1040 nlp = alloc_name_list(serial, pct); 1041 (void) merge_name_list(serial_p, nlp, 1); 1042 } 1043 } 1044 } 1045 } 1046 1047 static void 1048 extract_record_info(nvlist_t *nvl, name_list_t **class_p, 1049 name_list_t **fru_p, name_list_t **serial_p, 1050 name_list_t **resource_p, name_list_t **asru_p, uint8_t status) 1051 { 1052 nvlist_t *lfru, *lasru, *rsrc; 1053 name_list_t *nlp; 1054 char *name; 1055 uint8_t lpct = 0; 1056 char *lclass = NULL; 1057 char *label; 1058 1059 (void) nvlist_lookup_uint8(nvl, FM_FAULT_CERTAINTY, &lpct); 1060 if (nvlist_lookup_string(nvl, FM_CLASS, &lclass) == 0) { 1061 nlp = alloc_name_list(lclass, lpct); 1062 (void) merge_name_list(class_p, nlp, 1); 1063 } 1064 if (nvlist_lookup_nvlist(nvl, FM_FAULT_FRU, &lfru) == 0) { 1065 name = get_nvl2str_topo(lfru); 1066 if (name != NULL) { 1067 nlp = alloc_name_list(name, lpct); 1068 nlp->status = status & ~(FM_SUSPECT_UNUSABLE | 1069 FM_SUSPECT_DEGRADED); 1070 free(name); 1071 if (nvlist_lookup_string(nvl, FM_FAULT_LOCATION, 1072 &label) == 0) 1073 nlp->label = strdup(label); 1074 (void) merge_name_list(fru_p, nlp, 1); 1075 } 1076 get_serial_no(lfru, serial_p, lpct); 1077 } 1078 if (nvlist_lookup_nvlist(nvl, FM_FAULT_ASRU, &lasru) == 0) { 1079 name = get_nvl2str_topo(lasru); 1080 if (name != NULL) { 1081 nlp = alloc_name_list(name, lpct); 1082 nlp->status = status & ~(FM_SUSPECT_NOT_PRESENT | 1083 FM_SUSPECT_REPAIRED | FM_SUSPECT_REPLACED | 1084 FM_SUSPECT_ACQUITTED); 1085 free(name); 1086 (void) merge_name_list(asru_p, nlp, 1); 1087 } 1088 get_serial_no(lasru, serial_p, lpct); 1089 } 1090 if (nvlist_lookup_nvlist(nvl, FM_FAULT_RESOURCE, &rsrc) == 0) { 1091 name = get_nvl2str_topo(rsrc); 1092 if (name != NULL) { 1093 nlp = alloc_name_list(name, lpct); 1094 nlp->status = status; 1095 free(name); 1096 if (nvlist_lookup_string(nvl, FM_FAULT_LOCATION, 1097 &label) == 0) 1098 nlp->label = strdup(label); 1099 (void) merge_name_list(resource_p, nlp, 1); 1100 } 1101 } 1102 } 1103 1104 static void 1105 add_fault_record_to_catalog(nvlist_t *nvl, uint64_t sec, char *uuid, 1106 const char *url) 1107 { 1108 char *msgid = "-"; 1109 uint_t i, size = 0; 1110 name_list_t *class = NULL, *resource = NULL; 1111 name_list_t *asru = NULL, *fru = NULL, *serial = NULL; 1112 nvlist_t **nva; 1113 uint8_t *ba; 1114 status_record_t *status_rec_p; 1115 uurec_t *uurec_p; 1116 hostid_t *host; 1117 boolean_t not_suppressed = 1; 1118 boolean_t any_present = 0; 1119 1120 (void) nvlist_lookup_string(nvl, FM_SUSPECT_DIAG_CODE, &msgid); 1121 (void) nvlist_lookup_uint32(nvl, FM_SUSPECT_FAULT_SZ, &size); 1122 (void) nvlist_lookup_boolean_value(nvl, FM_SUSPECT_MESSAGE, 1123 ¬_suppressed); 1124 1125 if (size != 0) { 1126 (void) nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST, 1127 &nva, &size); 1128 (void) nvlist_lookup_uint8_array(nvl, FM_SUSPECT_FAULT_STATUS, 1129 &ba, &size); 1130 for (i = 0; i < size; i++) { 1131 extract_record_info(nva[i], &class, &fru, &serial, 1132 &resource, &asru, ba[i]); 1133 if (!(ba[i] & FM_SUSPECT_NOT_PRESENT) && 1134 (ba[i] & FM_SUSPECT_FAULTY)) 1135 any_present = 1; 1136 } 1137 /* 1138 * also suppress if no resources present 1139 */ 1140 if (any_present == 0) 1141 not_suppressed = 0; 1142 } 1143 1144 uurec_p = (uurec_t *)malloc(sizeof (uurec_t)); 1145 uurec_p->uuid = strdup(uuid); 1146 uurec_p->sec = sec; 1147 uurec_p->ari_uuid_list = NULL; 1148 host = find_hostid(nvl); 1149 if (not_suppressed && !opt_g) 1150 status_rec_p = NULL; 1151 else 1152 status_rec_p = record_in_catalog(class, fru, msgid, host); 1153 if (status_rec_p) { 1154 catalog_merge_record(status_rec_p, uurec_p, asru, resource, 1155 serial, url, not_suppressed); 1156 free_name_list(class); 1157 free_name_list(fru); 1158 } else { 1159 catalog_new_record(uurec_p, msgid, class, fru, asru, 1160 resource, serial, url, not_suppressed, host); 1161 } 1162 } 1163 1164 static void 1165 update_asru_state_in_catalog(const char *uuid, const char *ari_uuid) 1166 { 1167 sr_list_t *srp; 1168 uurec_t *uurp; 1169 ari_list_t *ari_list; 1170 1171 srp = status_rec_list; 1172 if (srp) { 1173 for (;;) { 1174 uurp = srp->status_record->uurec; 1175 while (uurp) { 1176 if (strcmp(uuid, uurp->uuid) == 0) { 1177 ari_list = (ari_list_t *) 1178 malloc(sizeof (ari_list_t)); 1179 ari_list->ari_uuid = strdup(ari_uuid); 1180 ari_list->next = uurp->ari_uuid_list; 1181 uurp->ari_uuid_list = ari_list; 1182 return; 1183 } 1184 uurp = uurp->next; 1185 } 1186 if (srp->next == status_rec_list) 1187 break; 1188 srp = srp->next; 1189 } 1190 } 1191 } 1192 1193 static void 1194 print_line(char *label, char *buf) 1195 { 1196 char *cp, *ep, *wp; 1197 char c; 1198 int i; 1199 int lsz; 1200 char *padding; 1201 1202 lsz = strlen(label); 1203 padding = malloc(lsz + 1); 1204 for (i = 0; i < lsz; i++) 1205 padding[i] = ' '; 1206 padding[i] = 0; 1207 cp = buf; 1208 ep = buf; 1209 c = *ep; 1210 (void) printf("\n"); 1211 while (c) { 1212 i = lsz; 1213 wp = NULL; 1214 while ((c = *ep) != NULL && (wp == NULL || i < 80)) { 1215 if (c == ' ') 1216 wp = ep; 1217 else if (c == '\n') { 1218 i = 0; 1219 *ep = 0; 1220 do { 1221 ep++; 1222 } while ((c = *ep) != NULL && c == ' '); 1223 break; 1224 } 1225 ep++; 1226 i++; 1227 } 1228 if (i >= 80 && wp) { 1229 *wp = 0; 1230 ep = wp + 1; 1231 c = *ep; 1232 } 1233 (void) printf("%s%s\n", label, cp); 1234 cp = ep; 1235 label = padding; 1236 } 1237 free(padding); 1238 } 1239 1240 static void 1241 print_dict_info(char *msgid, char *url) 1242 { 1243 const char *cp; 1244 char *l_url; 1245 char *buf; 1246 int bufsz; 1247 1248 cp = get_dict_msg(msgid, "description", 0, 1); 1249 if (cp) { 1250 if (url) 1251 l_url = url; 1252 else 1253 l_url = get_dict_url(msgid); 1254 bufsz = strlen(cp) + strlen(l_url) + 1; 1255 buf = malloc(bufsz); 1256 (void) snprintf(buf, bufsz, cp, l_url); 1257 print_line(dgettext("FMD", "Description : "), buf); 1258 free(buf); 1259 if (!url) 1260 free(l_url); 1261 } 1262 cp = get_dict_msg(msgid, "response", 0, 1); 1263 if (cp) { 1264 buf = strdup(cp); 1265 print_line(dgettext("FMD", "Response : "), buf); 1266 free(buf); 1267 } 1268 cp = get_dict_msg(msgid, "impact", 0, 1); 1269 if (cp) { 1270 buf = strdup(cp); 1271 print_line(dgettext("FMD", "Impact : "), buf); 1272 free(buf); 1273 } 1274 cp = get_dict_msg(msgid, "action", 0, 1); 1275 if (cp) { 1276 buf = strdup(cp); 1277 print_line(dgettext("FMD", "Action : "), buf); 1278 free(buf); 1279 } 1280 } 1281 1282 static void 1283 print_name(name_list_t *list, char *(func)(char *), char *padding, int *np, 1284 int pct, int full) 1285 { 1286 char *name, *fru_label = NULL; 1287 1288 name = list->name; 1289 if (list->label) { 1290 (void) printf("%s \"%s\" (%s)", padding, list->label, name); 1291 *np += 1; 1292 } else if (func && (fru_label = func(list->name)) != NULL) { 1293 (void) printf("%s \"%s\" (%s)", padding, fru_label, name); 1294 *np += 1; 1295 free(fru_label); 1296 } else { 1297 (void) printf("%s %s", padding, name); 1298 *np += 1; 1299 } 1300 if (list->pct && pct > 0 && pct < 100) { 1301 if (list->count > 1) { 1302 if (full) { 1303 (void) printf(" %d @ %s %d%%\n", list->count, 1304 dgettext("FMD", "max"), 1305 list->max_pct); 1306 } else { 1307 (void) printf(" %s %d%%\n", 1308 dgettext("FMD", "max"), 1309 list->max_pct); 1310 } 1311 } else { 1312 (void) printf(" %d%%\n", list->pct); 1313 } 1314 } else { 1315 (void) printf("\n"); 1316 } 1317 } 1318 1319 static void 1320 print_asru_status(int status, char *label) 1321 { 1322 char *msg = NULL; 1323 1324 switch (status) { 1325 case 0: 1326 msg = dgettext("FMD", "ok and in service"); 1327 break; 1328 case FM_SUSPECT_DEGRADED: 1329 msg = dgettext("FMD", "service degraded, " 1330 "but associated components no longer faulty"); 1331 break; 1332 case FM_SUSPECT_FAULTY | FM_SUSPECT_DEGRADED: 1333 msg = dgettext("FMD", "faulted but still " 1334 "providing degraded service"); 1335 break; 1336 case FM_SUSPECT_FAULTY: 1337 msg = dgettext("FMD", "faulted but still in service"); 1338 break; 1339 case FM_SUSPECT_UNUSABLE: 1340 msg = dgettext("FMD", "out of service, " 1341 "but associated components no longer faulty"); 1342 break; 1343 case FM_SUSPECT_FAULTY | FM_SUSPECT_UNUSABLE: 1344 msg = dgettext("FMD", "faulted and taken out of service"); 1345 break; 1346 default: 1347 break; 1348 } 1349 if (msg) { 1350 (void) printf("%s %s\n", label, msg); 1351 } 1352 } 1353 1354 static void 1355 print_fru_status(int status, char *label) 1356 { 1357 char *msg = NULL; 1358 1359 if (status & FM_SUSPECT_NOT_PRESENT) 1360 msg = dgettext("FMD", "not present"); 1361 else if (status & FM_SUSPECT_FAULTY) 1362 msg = dgettext("FMD", "faulty"); 1363 else if (status & FM_SUSPECT_REPLACED) 1364 msg = dgettext("FMD", "replaced"); 1365 else if (status & FM_SUSPECT_REPAIRED) 1366 msg = dgettext("FMD", "repair attempted"); 1367 else if (status & FM_SUSPECT_ACQUITTED) 1368 msg = dgettext("FMD", "acquitted"); 1369 else 1370 msg = dgettext("FMD", "removed"); 1371 (void) printf("%s %s\n", label, msg); 1372 } 1373 1374 static void 1375 print_rsrc_status(int status, char *label) 1376 { 1377 char *msg = ""; 1378 1379 if (status & FM_SUSPECT_NOT_PRESENT) 1380 msg = dgettext("FMD", "not present"); 1381 else if (status & FM_SUSPECT_FAULTY) { 1382 if (status & FM_SUSPECT_DEGRADED) 1383 msg = dgettext("FMD", 1384 "faulted but still providing degraded service"); 1385 else if (status & FM_SUSPECT_UNUSABLE) 1386 msg = dgettext("FMD", 1387 "faulted and taken out of service"); 1388 else 1389 msg = dgettext("FMD", "faulted but still in service"); 1390 } else if (status & FM_SUSPECT_REPLACED) 1391 msg = dgettext("FMD", "replaced"); 1392 else if (status & FM_SUSPECT_REPAIRED) 1393 msg = dgettext("FMD", "repair attempted"); 1394 else if (status & FM_SUSPECT_ACQUITTED) 1395 msg = dgettext("FMD", "acquitted"); 1396 else 1397 msg = dgettext("FMD", "removed"); 1398 (void) printf("%s %s\n", label, msg); 1399 } 1400 1401 static void 1402 print_name_list(name_list_t *list, char *label, char *(func)(char *), 1403 int limit, int pct, void (func1)(int, char *), int full) 1404 { 1405 char *name, *fru_label = NULL; 1406 char *padding; 1407 int i, j, l, n; 1408 name_list_t *end = list; 1409 1410 l = strlen(label); 1411 padding = malloc(l + 1); 1412 for (i = 0; i < l; i++) 1413 padding[i] = ' '; 1414 padding[l] = 0; 1415 (void) printf("%s", label); 1416 name = list->name; 1417 if (list->label) 1418 (void) printf(" \"%s\" (%s)", list->label, name); 1419 else if (func && (fru_label = func(list->name)) != NULL) { 1420 (void) printf(" \"%s\" (%s)", fru_label, name); 1421 free(fru_label); 1422 } else 1423 (void) printf(" %s", name); 1424 if (list->pct && pct > 0 && pct < 100) { 1425 if (list->count > 1) { 1426 if (full) { 1427 (void) printf(" %d @ %s %d%%\n", list->count, 1428 dgettext("FMD", "max"), list->max_pct); 1429 } else { 1430 (void) printf(" %s %d%%\n", 1431 dgettext("FMD", "max"), list->max_pct); 1432 } 1433 } else { 1434 (void) printf(" %d%%\n", list->pct); 1435 } 1436 } else { 1437 (void) printf("\n"); 1438 } 1439 if (func1) 1440 func1(list->status, padding); 1441 n = 1; 1442 j = 0; 1443 while ((list = list->next) != end) { 1444 if (limit == 0 || n < limit) { 1445 print_name(list, func, padding, &n, pct, full); 1446 if (func1) 1447 func1(list->status, padding); 1448 } else 1449 j++; 1450 } 1451 if (j == 1) { 1452 print_name(list->prev, func, padding, &n, pct, full); 1453 } else if (j > 1) { 1454 (void) printf("%s... %d %s\n", padding, j, 1455 dgettext("FMD", "more entries suppressed," 1456 " use -v option for full list")); 1457 } 1458 free(padding); 1459 } 1460 1461 static int 1462 asru_same_status(name_list_t *list) 1463 { 1464 name_list_t *end = list; 1465 int status = list->status; 1466 1467 while ((list = list->next) != end) { 1468 if (status == -1) { 1469 status = list->status; 1470 continue; 1471 } 1472 if (list->status != -1 && status != list->status) { 1473 status = -1; 1474 break; 1475 } 1476 } 1477 return (status); 1478 } 1479 1480 static int 1481 serial_in_fru(name_list_t *fru, name_list_t *serial) 1482 { 1483 name_list_t *sp = serial; 1484 name_list_t *fp; 1485 int nserial = 0; 1486 int found = 0; 1487 char buf[128]; 1488 1489 while (sp) { 1490 fp = fru; 1491 nserial++; 1492 (void) snprintf(buf, sizeof (buf), "serial=%s", sp->name); 1493 buf[sizeof (buf) - 1] = 0; 1494 while (fp) { 1495 if (strstr(fp->name, buf) != NULL) { 1496 found++; 1497 break; 1498 } 1499 fp = fp->next; 1500 if (fp == fru) 1501 break; 1502 } 1503 sp = sp->next; 1504 if (sp == serial) 1505 break; 1506 } 1507 return (found == nserial ? 1 : 0); 1508 } 1509 1510 static void 1511 print_sup_record(status_record_t *srp, int opt_i, int full) 1512 { 1513 char buf[32]; 1514 uurec_t *uurp = srp->uurec; 1515 int n, j, k, max; 1516 int status; 1517 ari_list_t *ari_list; 1518 1519 n = 0; 1520 max = max_fault; 1521 if (max < 0) { 1522 max = 0; 1523 } 1524 j = max / 2; 1525 max -= j; 1526 k = srp->nrecs - max; 1527 while ((uurp = uurp->next) != NULL) { 1528 if (full || n < j || n >= k || max_fault == 0 || 1529 srp->nrecs == max_fault+1) { 1530 if (opt_i) { 1531 ari_list = uurp->ari_uuid_list; 1532 while (ari_list) { 1533 (void) printf("%-15s %s\n", 1534 format_date(buf, sizeof (buf), 1535 uurp->sec), ari_list->ari_uuid); 1536 ari_list = ari_list->next; 1537 } 1538 } else { 1539 (void) printf("%-15s %s\n", 1540 format_date(buf, sizeof (buf), uurp->sec), 1541 uurp->uuid); 1542 } 1543 } else if (n == j) 1544 (void) printf("... %d %s\n", srp->nrecs - max_fault, 1545 dgettext("FMD", "more entries suppressed")); 1546 n++; 1547 } 1548 (void) printf("\n"); 1549 (void) printf("%s %s", dgettext("FMD", "Host :"), 1550 srp->host->server); 1551 if (srp->host->domain) 1552 (void) printf("\t%s %s", dgettext("FMD", "Domain :"), 1553 srp->host->domain); 1554 (void) printf("\n%s %s", dgettext("FMD", "Platform :"), 1555 srp->host->platform); 1556 (void) printf("\t%s %s\n\n", dgettext("FMD", "Chassis_id :"), 1557 srp->host->chassis ? srp->host->chassis : ""); 1558 if (srp->class) 1559 print_name_list(srp->class, 1560 dgettext("FMD", "Fault class :"), NULL, 0, srp->class->pct, 1561 NULL, full); 1562 if (srp->asru) { 1563 status = asru_same_status(srp->asru); 1564 if (status != -1) { 1565 print_name_list(srp->asru, 1566 dgettext("FMD", "Affects :"), NULL, 1567 full ? 0 : max_display, 0, NULL, full); 1568 print_asru_status(status, " "); 1569 } else 1570 print_name_list(srp->asru, 1571 dgettext("FMD", "Affects :"), NULL, 1572 full ? 0 : max_display, 0, print_asru_status, full); 1573 } 1574 if (full || srp->fru == NULL || srp->asru == NULL) { 1575 if (srp->resource) { 1576 print_name_list(srp->resource, 1577 dgettext("FMD", "Problem in :"), 1578 NULL, full ? 0 : max_display, 0, print_rsrc_status, 1579 full); 1580 } 1581 } 1582 if (srp->fru) { 1583 status = asru_same_status(srp->fru); 1584 if (status != -1) { 1585 print_name_list(srp->fru, dgettext("FMD", 1586 "FRU :"), get_fmri_label, 0, 1587 srp->fru->pct == 100 ? 100 : srp->fru->max_pct, 1588 NULL, full); 1589 print_fru_status(status, " "); 1590 } else 1591 print_name_list(srp->fru, dgettext("FMD", 1592 "FRU :"), get_fmri_label, 0, 1593 srp->fru->pct == 100 ? 100 : srp->fru->max_pct, 1594 print_fru_status, full); 1595 } 1596 if (srp->serial && !serial_in_fru(srp->fru, srp->serial) && 1597 !serial_in_fru(srp->asru, srp->serial)) { 1598 print_name_list(srp->serial, dgettext("FMD", "Serial ID. :"), 1599 NULL, 0, 0, NULL, full); 1600 } 1601 print_dict_info(srp->msgid, srp->url); 1602 (void) printf("\n"); 1603 } 1604 1605 static void 1606 print_status_record(status_record_t *srp, int summary, int opt_i, int full) 1607 { 1608 char buf[32]; 1609 uurec_t *uurp = srp->uurec; 1610 char *severity; 1611 static int header = 0; 1612 char *head; 1613 ari_list_t *ari_list; 1614 1615 if (nlspath) 1616 severity = get_dict_msg(srp->msgid, "severity", 1, 1); 1617 else 1618 severity = srp->severity; 1619 1620 if (!summary || !header) { 1621 if (opt_i) { 1622 head = "--------------- " 1623 "------------------------------------ " 1624 "-------------- ---------\n" 1625 "TIME CACHE-ID" 1626 " MSG-ID" 1627 " SEVERITY\n--------------- " 1628 "------------------------------------ " 1629 " -------------- ---------"; 1630 } else { 1631 head = "--------------- " 1632 "------------------------------------ " 1633 "-------------- ---------\n" 1634 "TIME EVENT-ID" 1635 " MSG-ID" 1636 " SEVERITY\n--------------- " 1637 "------------------------------------ " 1638 " -------------- ---------"; 1639 } 1640 (void) printf("%s\n", dgettext("FMD", head)); 1641 header = 1; 1642 } 1643 if (opt_i) { 1644 ari_list = uurp->ari_uuid_list; 1645 while (ari_list) { 1646 (void) printf("%-15s %-37s %-14s %-9s\n", 1647 format_date(buf, sizeof (buf), uurp->sec), 1648 ari_list->ari_uuid, srp->msgid, severity); 1649 ari_list = ari_list->next; 1650 } 1651 } else { 1652 (void) printf("%-15s %-37s %-14s %-9s\n", 1653 format_date(buf, sizeof (buf), uurp->sec), 1654 uurp->uuid, srp->msgid, severity); 1655 } 1656 1657 if (!summary) 1658 print_sup_record(srp, opt_i, full); 1659 } 1660 1661 static void 1662 print_catalog(int summary, int opt_a, int full, int opt_i, int page_feed) 1663 { 1664 status_record_t *srp; 1665 sr_list_t *slp; 1666 1667 slp = status_rec_list; 1668 if (slp) { 1669 for (;;) { 1670 srp = slp->status_record; 1671 if (opt_a || srp->not_suppressed) { 1672 if (page_feed) 1673 (void) printf("\f\n"); 1674 print_status_record(srp, summary, opt_i, full); 1675 } 1676 if (slp->next == status_rec_list) 1677 break; 1678 slp = slp->next; 1679 } 1680 } 1681 } 1682 1683 static name_list_t * 1684 find_fru(status_record_t *srp, char *resource) 1685 { 1686 name_list_t *rt = NULL; 1687 name_list_t *fru = srp->fru; 1688 1689 while (fru) { 1690 if (strcmp(resource, fru->name) == 0) { 1691 rt = fru; 1692 break; 1693 } 1694 fru = fru->next; 1695 if (fru == srp->fru) 1696 break; 1697 } 1698 return (rt); 1699 } 1700 1701 static void 1702 print_fru_line(name_list_t *fru, char *uuid) 1703 { 1704 if (fru->pct == 100) { 1705 (void) printf("%s %d %s %d%%\n", uuid, fru->count, 1706 dgettext("FMD", "suspects in this FRU total certainty"), 1707 100); 1708 } else { 1709 (void) printf("%s %d %s %d%%\n", uuid, fru->count, 1710 dgettext("FMD", "suspects in this FRU max certainty"), 1711 fru->max_pct); 1712 } 1713 } 1714 1715 static void 1716 print_fru(int summary, int opt_a, int opt_i, int page_feed) 1717 { 1718 resource_list_t *tp = status_fru_list; 1719 status_record_t *srp; 1720 sr_list_t *slp, *end; 1721 char *msgid, *fru_label; 1722 uurec_t *uurp; 1723 name_list_t *fru; 1724 int status; 1725 ari_list_t *ari_list; 1726 1727 while (tp) { 1728 if (opt_a || tp->not_suppressed) { 1729 if (page_feed) 1730 (void) printf("\f\n"); 1731 if (!summary) 1732 (void) printf("-----------------------------" 1733 "---------------------------------------" 1734 "----------\n"); 1735 slp = tp->status_rec_list; 1736 end = slp; 1737 do { 1738 srp = slp->status_record; 1739 fru = find_fru(srp, tp->resource); 1740 if (fru) { 1741 if (fru->label) 1742 (void) printf("\"%s\" (%s) ", 1743 fru->label, fru->name); 1744 else if ((fru_label = get_fmri_label( 1745 fru->name)) != NULL) { 1746 (void) printf("\"%s\" (%s) ", 1747 fru_label, fru->name); 1748 free(fru_label); 1749 } else 1750 (void) printf("%s ", 1751 fru->name); 1752 break; 1753 } 1754 slp = slp->next; 1755 } while (slp != end); 1756 1757 slp = tp->status_rec_list; 1758 end = slp; 1759 status = 0; 1760 do { 1761 srp = slp->status_record; 1762 fru = srp->fru; 1763 while (fru) { 1764 if (strcmp(tp->resource, 1765 fru->name) == 0) 1766 status |= fru->status; 1767 fru = fru->next; 1768 if (fru == srp->fru) 1769 break; 1770 } 1771 slp = slp->next; 1772 } while (slp != end); 1773 if (status & FM_SUSPECT_NOT_PRESENT) 1774 (void) printf(dgettext("FMD", "not present\n")); 1775 else if (status & FM_SUSPECT_FAULTY) 1776 (void) printf(dgettext("FMD", "faulty\n")); 1777 else if (status & FM_SUSPECT_REPLACED) 1778 (void) printf(dgettext("FMD", "replaced\n")); 1779 else if (status & FM_SUSPECT_REPAIRED) 1780 (void) printf(dgettext("FMD", 1781 "repair attempted\n")); 1782 else if (status & FM_SUSPECT_ACQUITTED) 1783 (void) printf(dgettext("FMD", "acquitted\n")); 1784 else 1785 (void) printf(dgettext("FMD", "removed\n")); 1786 1787 slp = tp->status_rec_list; 1788 end = slp; 1789 do { 1790 srp = slp->status_record; 1791 uurp = srp->uurec; 1792 fru = find_fru(srp, tp->resource); 1793 if (fru) { 1794 if (opt_i) { 1795 ari_list = uurp->ari_uuid_list; 1796 while (ari_list) { 1797 print_fru_line(fru, 1798 ari_list->ari_uuid); 1799 ari_list = 1800 ari_list->next; 1801 } 1802 } else { 1803 print_fru_line(fru, uurp->uuid); 1804 } 1805 } 1806 slp = slp->next; 1807 } while (slp != end); 1808 if (!summary) { 1809 slp = tp->status_rec_list; 1810 end = slp; 1811 srp = slp->status_record; 1812 if (srp->serial && 1813 !serial_in_fru(srp->fru, srp->serial)) { 1814 print_name_list(srp->serial, 1815 dgettext("FMD", "Serial ID. :"), 1816 NULL, 0, 0, NULL, 1); 1817 } 1818 msgid = NULL; 1819 do { 1820 if (msgid == NULL || 1821 strcmp(msgid, srp->msgid) != 0) { 1822 msgid = srp->msgid; 1823 print_dict_info(srp->msgid, 1824 srp->url); 1825 } 1826 slp = slp->next; 1827 } while (slp != end); 1828 } 1829 } 1830 tp = tp->next; 1831 if (tp == status_fru_list) 1832 break; 1833 } 1834 } 1835 1836 static void 1837 print_asru(int opt_a) 1838 { 1839 resource_list_t *tp = status_asru_list; 1840 status_record_t *srp; 1841 sr_list_t *slp, *end; 1842 char *msg; 1843 int status; 1844 name_list_t *asru; 1845 1846 while (tp) { 1847 if (opt_a || tp->not_suppressed) { 1848 status = 0; 1849 slp = tp->status_rec_list; 1850 end = slp; 1851 do { 1852 srp = slp->status_record; 1853 asru = srp->asru; 1854 while (asru) { 1855 if (strcmp(tp->resource, 1856 asru->name) == 0) 1857 status |= asru->status; 1858 asru = asru->next; 1859 if (asru == srp->asru) 1860 break; 1861 } 1862 slp = slp->next; 1863 } while (slp != end); 1864 switch (status) { 1865 case 0: 1866 msg = dgettext("FMD", "ok"); 1867 break; 1868 case FM_SUSPECT_DEGRADED: 1869 msg = dgettext("FMD", "degraded"); 1870 break; 1871 case FM_SUSPECT_FAULTY | FM_SUSPECT_DEGRADED: 1872 msg = dgettext("FMD", "degraded"); 1873 break; 1874 case FM_SUSPECT_FAULTY: 1875 msg = dgettext("FMD", "degraded"); 1876 break; 1877 case FM_SUSPECT_UNUSABLE: 1878 msg = dgettext("FMD", "unknown"); 1879 break; 1880 case FM_SUSPECT_FAULTY | FM_SUSPECT_UNUSABLE: 1881 msg = dgettext("FMD", "faulted"); 1882 break; 1883 default: 1884 msg = ""; 1885 break; 1886 } 1887 (void) printf("%-69s %s\n", tp->resource, msg); 1888 } 1889 tp = tp->next; 1890 if (tp == status_asru_list) 1891 break; 1892 } 1893 } 1894 1895 static int 1896 uuid_in_list(char *uuid, uurec_select_t *uurecp) 1897 { 1898 while (uurecp) { 1899 if (strcmp(uuid, uurecp->uuid) == 0) 1900 return (1); 1901 uurecp = uurecp->next; 1902 } 1903 return (0); 1904 } 1905 1906 static int 1907 dfault_rec(const fmd_adm_caseinfo_t *acp, void *arg) 1908 { 1909 int64_t *diag_time; 1910 uint_t nelem; 1911 int rt = 0; 1912 char *uuid = "-"; 1913 uurec_select_t *uurecp = (uurec_select_t *)arg; 1914 1915 if (nvlist_lookup_int64_array(acp->aci_event, FM_SUSPECT_DIAG_TIME, 1916 &diag_time, &nelem) == 0 && nelem >= 2) { 1917 (void) nvlist_lookup_string(acp->aci_event, FM_SUSPECT_UUID, 1918 &uuid); 1919 if (uurecp == NULL || uuid_in_list(uuid, uurecp)) 1920 add_fault_record_to_catalog(acp->aci_event, *diag_time, 1921 uuid, acp->aci_url); 1922 } else { 1923 rt = -1; 1924 } 1925 return (rt); 1926 } 1927 1928 /*ARGSUSED*/ 1929 static int 1930 dstatus_rec(const fmd_adm_rsrcinfo_t *ari, void *unused) 1931 { 1932 update_asru_state_in_catalog(ari->ari_case, ari->ari_uuid); 1933 return (0); 1934 } 1935 1936 static int 1937 get_cases_from_fmd(fmd_adm_t *adm, uurec_select_t *uurecp, int opt_i) 1938 { 1939 int rt = FMADM_EXIT_SUCCESS; 1940 1941 /* 1942 * These calls may fail with Protocol error if message payload is to big 1943 */ 1944 if (fmd_adm_case_iter(adm, NULL, dfault_rec, uurecp) != 0) 1945 die("failed to get case list from fmd"); 1946 if (opt_i && fmd_adm_rsrc_iter(adm, 1, dstatus_rec, NULL) != 0) 1947 die("failed to get case status from fmd"); 1948 return (rt); 1949 } 1950 1951 /* 1952 * fmadm faulty command 1953 * 1954 * -a show hidden fault records 1955 * -f show faulty fru's 1956 * -g force grouping of similar faults on the same fru 1957 * -n number of fault records to display 1958 * -p pipe output through pager 1959 * -r show faulty asru's 1960 * -s print summary of first fault 1961 * -u print listed uuid's only 1962 * -v full output 1963 */ 1964 1965 int 1966 cmd_faulty(fmd_adm_t *adm, int argc, char *argv[]) 1967 { 1968 int opt_a = 0, opt_v = 0, opt_p = 0, opt_s = 0, opt_r = 0, opt_f = 0; 1969 int opt_i = 0; 1970 char *pager; 1971 FILE *fp; 1972 int rt, c, stat; 1973 uurec_select_t *tp; 1974 uurec_select_t *uurecp = NULL; 1975 1976 catalog_setup(); 1977 while ((c = getopt(argc, argv, "afgin:prsu:v")) != EOF) { 1978 switch (c) { 1979 case 'a': 1980 opt_a++; 1981 break; 1982 case 'f': 1983 opt_f++; 1984 break; 1985 case 'g': 1986 opt_g++; 1987 break; 1988 case 'i': 1989 opt_i++; 1990 break; 1991 case 'n': 1992 max_fault = atoi(optarg); 1993 break; 1994 case 'p': 1995 opt_p++; 1996 break; 1997 case 'r': 1998 opt_r++; 1999 break; 2000 case 's': 2001 opt_s++; 2002 break; 2003 case 'u': 2004 tp = (uurec_select_t *)malloc(sizeof (uurec_select_t)); 2005 tp->uuid = optarg; 2006 tp->next = uurecp; 2007 uurecp = tp; 2008 opt_a = 1; 2009 break; 2010 case 'v': 2011 opt_v++; 2012 break; 2013 default: 2014 return (FMADM_EXIT_USAGE); 2015 } 2016 } 2017 if (optind < argc) 2018 return (FMADM_EXIT_USAGE); 2019 2020 rt = get_cases_from_fmd(adm, uurecp, opt_i); 2021 if (opt_p) { 2022 if ((pager = getenv("PAGER")) == NULL) 2023 pager = "/usr/bin/more"; 2024 fp = popen(pager, "w"); 2025 if (fp == NULL) { 2026 rt = FMADM_EXIT_ERROR; 2027 opt_p = 0; 2028 } else { 2029 dup2(fileno(fp), 1); 2030 setbuf(stdout, NULL); 2031 (void) fclose(fp); 2032 } 2033 } 2034 max_display = max_fault; 2035 if (opt_f) 2036 print_fru(opt_s, opt_a, opt_i, opt_p && !opt_s); 2037 if (opt_r) 2038 print_asru(opt_a); 2039 if (opt_f == 0 && opt_r == 0) 2040 print_catalog(opt_s, opt_a, opt_v, opt_i, opt_p && !opt_s); 2041 label_release_topo(); 2042 if (opt_p) { 2043 (void) fclose(stdout); 2044 (void) wait(&stat); 2045 } 2046 return (rt); 2047 } 2048 2049 int 2050 cmd_flush(fmd_adm_t *adm, int argc, char *argv[]) 2051 { 2052 int i, status = FMADM_EXIT_SUCCESS; 2053 2054 if (argc < 2 || (i = getopt(argc, argv, "")) != EOF) 2055 return (FMADM_EXIT_USAGE); 2056 2057 for (i = 1; i < argc; i++) { 2058 if (fmd_adm_rsrc_flush(adm, argv[i]) != 0) { 2059 warn("failed to flush %s", argv[i]); 2060 status = FMADM_EXIT_ERROR; 2061 } else 2062 note("flushed resource history for %s\n", argv[i]); 2063 } 2064 2065 return (status); 2066 } 2067 2068 int 2069 cmd_repair(fmd_adm_t *adm, int argc, char *argv[]) 2070 { 2071 int err; 2072 2073 if (getopt(argc, argv, "") != EOF) 2074 return (FMADM_EXIT_USAGE); 2075 2076 if (argc - optind != 1) 2077 return (FMADM_EXIT_USAGE); 2078 2079 /* 2080 * argument could be a uuid, an fmri (asru, fru or resource) 2081 * or a label. Try uuid first, If that fails try the others. 2082 */ 2083 err = fmd_adm_case_repair(adm, argv[optind]); 2084 if (err != 0) 2085 err = fmd_adm_rsrc_repaired(adm, argv[optind]); 2086 2087 if (err != 0) 2088 die("failed to record repair to %s", argv[optind]); 2089 2090 note("recorded repair to %s\n", argv[optind]); 2091 return (FMADM_EXIT_SUCCESS); 2092 } 2093 2094 int 2095 cmd_repaired(fmd_adm_t *adm, int argc, char *argv[]) 2096 { 2097 int err; 2098 2099 if (getopt(argc, argv, "") != EOF) 2100 return (FMADM_EXIT_USAGE); 2101 2102 if (argc - optind != 1) 2103 return (FMADM_EXIT_USAGE); 2104 2105 /* 2106 * argument could be an fmri (asru, fru or resource) or a label. 2107 */ 2108 err = fmd_adm_rsrc_repaired(adm, argv[optind]); 2109 if (err != 0) 2110 die("failed to record repair to %s", argv[optind]); 2111 2112 note("recorded repair to of %s\n", argv[optind]); 2113 return (FMADM_EXIT_SUCCESS); 2114 } 2115 2116 int 2117 cmd_replaced(fmd_adm_t *adm, int argc, char *argv[]) 2118 { 2119 int err; 2120 2121 if (getopt(argc, argv, "") != EOF) 2122 return (FMADM_EXIT_USAGE); 2123 2124 if (argc - optind != 1) 2125 return (FMADM_EXIT_USAGE); 2126 2127 /* 2128 * argument could be an fmri (asru, fru or resource) or a label. 2129 */ 2130 err = fmd_adm_rsrc_replaced(adm, argv[optind]); 2131 if (err != 0) 2132 die("failed to record replacement of %s", argv[optind]); 2133 2134 note("recorded replacement of %s\n", argv[optind]); 2135 return (FMADM_EXIT_SUCCESS); 2136 } 2137 2138 int 2139 cmd_acquit(fmd_adm_t *adm, int argc, char *argv[]) 2140 { 2141 int err; 2142 2143 if (getopt(argc, argv, "") != EOF) 2144 return (FMADM_EXIT_USAGE); 2145 2146 if (argc - optind != 1 && argc - optind != 2) 2147 return (FMADM_EXIT_USAGE); 2148 2149 /* 2150 * argument could be a uuid, an fmri (asru, fru or resource) 2151 * or a label. Or it could be a uuid and an fmri or label. 2152 */ 2153 if (argc - optind == 2) { 2154 err = fmd_adm_rsrc_acquit(adm, argv[optind], argv[optind + 1]); 2155 if (err != 0) 2156 err = fmd_adm_rsrc_acquit(adm, argv[optind + 1], 2157 argv[optind]); 2158 } else { 2159 err = fmd_adm_case_acquit(adm, argv[optind]); 2160 if (err != 0) 2161 err = fmd_adm_rsrc_acquit(adm, argv[optind], ""); 2162 } 2163 2164 if (err != 0) 2165 die("failed to record acquital of %s", argv[optind]); 2166 2167 note("recorded acquital of %s\n", argv[optind]); 2168 return (FMADM_EXIT_SUCCESS); 2169 } 2170