/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License, Version 1.0 only * (the "License"). You may not use this file except in compliance * with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2005 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * * Sunfire Platform specific functions. * * called when : * machine_type == MTYPE_SUNFIRE */ #pragma ident "%Z%%M% %I% %E% SMI" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pdevinfo.h" #include "display.h" #include "pdevinfo_sun4u.h" #include "display_sun4u.h" #include "libprtdiag.h" #if !defined(TEXT_DOMAIN) #define TEXT_DOMAIN "SYS_TEST" #endif /* Macros for manipulating UPA IDs and board numbers on Sunfire. */ #define bd_to_upa(bd) ((bd) << 1) #define upa_to_bd(upa) ((upa) >> 1) #define MAX_MSGS 64 extern int print_flag; /* * these functions will overlay the symbol table of libprtdiag * at runtime (sunfire systems only) */ int error_check(Sys_tree *tree, struct system_kstat_data *kstats); void display_memoryconf(Sys_tree *tree, struct grp_info *grps); int disp_fail_parts(Sys_tree *tree); void display_memorysize(Sys_tree *tree, struct system_kstat_data *kstats, struct grp_info *grps, struct mem_total *memory_total); void display_hp_fail_fault(Sys_tree *tree, struct system_kstat_data *kstats); void display_diaginfo(int flag, Prom_node *root, Sys_tree *tree, struct system_kstat_data *kstats); void display_mid(int mid); void display_pci(Board_node *); void display_ffb(Board_node *, int); void add_node(Sys_tree *, Prom_node *); void resolve_board_types(Sys_tree *); /* local functions */ static void build_mem_tables(Sys_tree *, struct system_kstat_data *, struct grp_info *); static void get_mem_total(struct mem_total *, struct grp_info *); static int disp_fault_list(Sys_tree *, struct system_kstat_data *); static int disp_err_log(struct system_kstat_data *); static int disp_env_status(struct system_kstat_data *); static int disp_keysw_and_leds(struct system_kstat_data *); static void sunfire_disp_prom_versions(Sys_tree *); static void erase_msgs(char **); static void display_msgs(char **msgs, int board); static void sunfire_disp_asic_revs(Sys_tree *, struct system_kstat_data *); static void display_hp_boards(struct system_kstat_data *); static int disp_parts(char **, u_longlong_t, int); /* * Error analysis routines. These routines decode data from specified * error registers. They are meant to be used for decoding the fatal * hardware reset data passed to the kernel by sun4u POST. */ static int analyze_cpu(char **, int, u_longlong_t); static int analyze_ac(char **, u_longlong_t); static int analyze_dc(int, char **, u_longlong_t); #define RESERVED_STR "Reserved" #define MAX_PARTS 5 #define MAX_FRUS 5 #define MAXSTRLEN 256 /* Define special bits */ #define UPA_PORT_A 0x1 #define UPA_PORT_B 0x2 /* * These defines comne from async.h, but it does not get exported from * uts/sun4u/sys, so they must be redefined. */ #define P_AFSR_ISAP 0x0000000040000000ULL /* incoming addr. parity err */ #define P_AFSR_ETP 0x0000000020000000ULL /* ecache tag parity */ #define P_AFSR_ETS 0x00000000000F0000ULL /* cache tag parity syndrome */ #define ETS_SHIFT 16 /* List of parts possible */ #define RSVD_PART 1 #define UPA_PART 2 #define UPA_A_PART 3 #define UPA_B_PART 4 #define SOFTWARE_PART 5 #define AC_PART 6 #define AC_ANY_PART 7 #define DTAG_PART 8 #define DTAG_A_PART 9 #define DTAG_B_PART 10 #define FHC_PART 11 #define BOARD_PART 12 #define BOARD_ANY_PART 13 #define BOARD_CONN_PART 14 #define BACK_PIN_PART 15 #define BACK_TERM_PART 16 #define CPU_PART 17 /* List of possible parts */ static char *part_str[] = { "", /* 0, a placeholder for indexing */ "", /* 1, reserved strings shouldn't be printed */ "UPA devices", /* 2 */ "UPA Port A device", /* 3 */ "UPA Port B device", /* 4 */ "Software error", /* 5 */ "Address Controller", /* 6 */ "Undetermined Address Controller in system", /* 7 */ "Data Tags", /* 8 */ "Data Tags for UPA Port A", /* 9 */ "Data Tags for UPA Port B", /* 10 */ "Firehose Controller", /* 11 */ "This Board", /* 12 */ "Undetermined Board in system", /* 13 */ "Board Connector", /* 14 */ "Centerplane pins ", /* 15 */ "Centerplane terminators", /* 16 */ "CPU", /* 17 */ }; /* Ecache parity error messages. Tells which bits are bad. */ static char *ecache_parity[] = { "Bits 7:0 ", "Bits 15:8 ", "Bits 21:16 ", "Bits 24:22 " }; struct ac_error { char *error; int part[MAX_PARTS]; }; typedef struct ac_error ac_err; /* * Hardware error register meanings, failed parts and FRUs. The * following strings are indexed for the bit positions of the * corresponding bits in the hardware. The code checks bit x of * the hardware error register and prints out string[x] if the bit * is turned on. * * This database of parts which are probably failed and which FRU's * to replace was based on knowledge of the Sunfire Programmers Spec. * and discussions with the hardware designers. The order of the part * lists and consequently the FRU lists are in the order of most * likely cause first. */ static ac_err ac_errors[] = { { /* 0 */ "UPA Port A Error", { UPA_A_PART, 0, 0, 0, 0 }, }, { /* 1 */ "UPA Port B Error", { UPA_B_PART, 0, 0, 0, 0 }, }, { /* 2 */ NULL, { RSVD_PART, 0, 0, 0, 0 }, }, { /* 3 */ NULL, { RSVD_PART, 0, 0, 0, 0 }, }, { /* 4 */ "UPA Interrupt to unmapped destination", { BOARD_PART, 0, 0, 0, 0 }, }, { /* 5 */ "UPA Non-cacheable write to unmapped destination", { BOARD_PART, 0, 0, 0, 0 }, }, { /* 6 */ "UPA Cacheable write to unmapped destination", { BOARD_PART, 0, 0, 0, 0 }, }, { /* 7 */ "Illegal Write Received", { BOARD_PART, 0, 0, 0, 0 }, }, { /* 8 */ "Local Writeback match with line in state S", { AC_PART, DTAG_PART, 0, 0, 0 }, }, { /* 9 */ "Local Read match with valid line in Tags", { AC_PART, DTAG_PART, 0, 0, 0 }, }, { /* 10 */ NULL, { RSVD_PART, 0, 0, 0, 0 }, }, { /* 11 */ NULL, { RSVD_PART, 0, 0, 0, 0 }, }, { /* 12 */ "Tag and Victim were valid during lookup", { AC_PART, DTAG_PART, 0, 0, 0 }, }, { /* 13 */ "Local Writeback matches a victim in state S", { AC_PART, CPU_PART, 0, 0, 0 }, }, { /* 14 */ "Local Read matches valid line in victim buffer", { AC_PART, CPU_PART, 0, 0, 0 }, }, { /* 15 */ "Local Read victim bit set and victim is S state", { AC_PART, CPU_PART, 0, 0, 0 }, }, { /* 16 */ "Local Read Victim bit set and Valid Victim Buffer", { AC_PART, CPU_PART, 0, 0, 0 }, }, { /* 17 */ NULL, { RSVD_PART, 0, 0, 0, 0 }, }, { /* 18 */ NULL, { RSVD_PART, 0, 0, 0, 0 }, }, { /* 19 */ NULL, { RSVD_PART, 0, 0, 0, 0 }, }, { /* 20 */ "UPA Transaction received in Sleep mode", { AC_PART, 0, 0, 0, 0 }, }, { /* 21 */ "P_FERR error P_REPLY received from UPA Port", { CPU_PART, AC_PART, 0, 0, 0 }, }, { /* 22 */ "Illegal P_REPLY received from UPA Port", { CPU_PART, AC_PART, 0, 0, 0 }, }, { /* 23 */ NULL, { RSVD_PART, 0, 0, 0, 0 }, }, { /* 24 */ "Timeout on a UPA Master Port", { AC_ANY_PART, BOARD_ANY_PART, 0, 0, 0 }, }, { /* 25 */ NULL, { RSVD_PART, 0, 0, 0, 0 }, }, { /* 26 */ NULL, { RSVD_PART, 0, 0, 0, 0 }, }, { /* 27 */ NULL, { RSVD_PART, 0, 0, 0, 0 }, }, { /* 28 */ "Coherent Transactions Queue Overflow Error", { BACK_PIN_PART, BOARD_CONN_PART, AC_PART, AC_ANY_PART, 0 }, }, { /* 29 */ "Non-cacheable Request Queue Overflow Error", { AC_PART, AC_ANY_PART, 0, 0, 0 }, }, { /* 30 */ "Non-cacheable Reply Queue Overflow Error", { AC_PART, 0, 0, 0, 0 }, }, { /* 31 */ "PREQ Queue Overflow Error", { CPU_PART, AC_PART, 0, 0, 0 }, }, { /* 32 */ "Foreign DID CAM Overflow Error", { AC_PART, AC_ANY_PART, 0, 0, 0 }, }, { /* 33 */ "FT->UPA Queue Overflow Error", { BACK_PIN_PART, BOARD_CONN_PART, AC_PART, AC_ANY_PART, 0 }, }, { /* 34 */ NULL, { RSVD_PART, 0, 0, 0, 0 }, }, { /* 35 */ NULL, { RSVD_PART, 0, 0, 0, 0 }, }, { /* 36 */ "UPA Port B Dtag Parity Error", { DTAG_B_PART, AC_PART, 0, 0, 0 }, }, { /* 37 */ "UPA Port A Dtag Parity Error", { DTAG_A_PART, AC_PART, 0, 0, 0 }, }, { /* 38 */ NULL, { RSVD_PART, 0, 0, 0, 0 }, }, { /* 39 */ NULL, { RSVD_PART, 0, 0, 0, 0 }, }, { /* 40 */ "UPA Bus Parity Error", { UPA_PART, AC_PART, 0, 0, 0 }, }, { /* 41 */ "Data ID Line Mismatch", { BACK_PIN_PART, BOARD_CONN_PART, AC_PART, 0, 0 }, }, { /* 42 */ "Arbitration Line Mismatch", { BACK_PIN_PART, BOARD_CONN_PART, AC_PART, 0, 0 }, }, { /* 43 */ "Shared Line Parity Mismatch", { BACK_PIN_PART, BOARD_CONN_PART, AC_PART, 0, 0 }, }, { /* 44 */ "FireTruck Control Line Parity Error", { AC_PART, BACK_PIN_PART, 0, 0, 0 }, }, { /* 45 */ "FireTruck Address Bus Parity Error", { AC_PART, BACK_PIN_PART, 0, 0, 0 }, }, { /* 46 */ "Internal RAM Parity Error", { AC_PART, 0, 0, 0, 0 }, }, { /* 47 */ NULL, { RSVD_PART, 0, 0, 0, 0 }, }, { /* 48 */ "Internal Hardware Error", { AC_PART, 0, 0, 0, 0 }, }, { /* 49 */ "FHC Communications Error", { FHC_PART, AC_PART, 0, 0, 0 }, }, /* Bits 50-63 are reserved in this implementation. */ }; #define MAX_BITS (sizeof (ac_errors)/ sizeof (ac_err)) /* * There are only two error bits in the DC shadow chain that are * important. They indicate an overflow error and a parity error, * respectively. The other bits are not error bits and should not * be checked for. */ #define DC_OVERFLOW 0x2 #define DC_PARITY 0x4 static char dc_overflow_txt[] = "Board %d DC %d Overflow Error"; static char dc_parity_txt[] = "Board %d DC %d Parity Error"; /* defines for the sysio */ #define UPA_APERR 0x4 int error_check(Sys_tree *tree, struct system_kstat_data *kstats) { int exit_code = 0; /* init to all OK */ /* * silently check for any types of machine errors */ print_flag = 0; if (disp_fail_parts(tree) || disp_fault_list(tree, kstats) || disp_err_log(kstats) || disp_env_status(kstats)) { /* set exit_code to show failures */ exit_code = 1; } print_flag = 1; return (exit_code); } /* * disp_fail_parts * * Display the failed parts in the system. This function looks for * the status property in all PROM nodes. On systems where * the PROM does not supports passing diagnostic information * thruogh the device tree, this routine will be silent. */ int disp_fail_parts(Sys_tree *tree) { int exit_code; int system_failed = 0; Board_node *bnode = tree->bd_list; Prom_node *pnode; exit_code = 0; /* go through all of the boards looking for failed units. */ while (bnode != NULL) { /* find failed chips */ pnode = find_failed_node(bnode->nodes); if ((pnode != NULL) && !system_failed) { system_failed = 1; exit_code = 1; if (print_flag == 0) { return (exit_code); } log_printf("\n", 0); log_printf(dgettext(TEXT_DOMAIN, "Failed Field Replaceable Units (FRU) " "in System:\n"), 0); log_printf("==========================" "====================\n", 0); } while (pnode != NULL) { void *value; char *name; /* node name string */ char *type; /* node type string */ char *board_type = NULL; value = get_prop_val(find_prop(pnode, "status")); name = get_node_name(pnode); /* sanity check of data retreived from PROM */ if ((value == NULL) || (name == NULL)) { pnode = next_failed_node(pnode); continue; } /* Find the board type of this board */ if (bnode->board_type == CPU_BOARD) { board_type = "CPU"; } else { board_type = "IO"; } log_printf(dgettext(TEXT_DOMAIN, "%s unavailable on %s Board #%d\n"), name, board_type, bnode->board_num, 0); log_printf(dgettext(TEXT_DOMAIN, "\tPROM fault string: %s\n"), value, 0); log_printf(dgettext(TEXT_DOMAIN, "\tFailed Field Replaceable Unit is "), 0); /* * Determine whether FRU is CPU module, system * board, or SBus card. */ if ((name != NULL) && (strstr(name, "sbus"))) { log_printf(dgettext(TEXT_DOMAIN, "SBus Card %d\n"), get_sbus_slot(pnode), 0); } else if (((name = get_node_name(pnode->parent)) != NULL) && (strstr(name, "pci"))) { log_printf(dgettext(TEXT_DOMAIN, "PCI Card %d"), get_pci_device(pnode), 0); } else if (((type = get_node_type(pnode)) != NULL) && (strstr(type, "cpu"))) { log_printf(dgettext(TEXT_DOMAIN, "UltraSPARC module " "Board %d Module %d\n"), get_id(pnode) >> 1, get_id(pnode) & 0x1); } else { log_printf(dgettext(TEXT_DOMAIN, "%s board %d\n"), board_type, bnode->board_num, 0); } pnode = next_failed_node(pnode); } bnode = bnode->next; } if (!system_failed) { log_printf("\n", 0); log_printf(dgettext(TEXT_DOMAIN, "No failures found in System\n"), 0); log_printf("===========================\n", 0); } if (system_failed) return (1); else return (0); } void display_memorysize(Sys_tree *tree, struct system_kstat_data *kstats, struct grp_info *grps, struct mem_total *memory_total) { /* Build the memory group tables and interleave data */ build_mem_tables(tree, kstats, grps); /* display total usable installed memory */ get_mem_total(memory_total, grps); (void) log_printf(dgettext(TEXT_DOMAIN, "Memory size: %4dMb\n"), memory_total->dram, 0); /* We display the NVSIMM size totals separately. */ if (memory_total->nvsimm != 0) { (void) log_printf(dgettext(TEXT_DOMAIN, "NVSIMM size: %4dMb\n"), memory_total->nvsimm); } } /* * This routine displays the memory configuration for all boards in the * system. */ void display_memoryconf(Sys_tree *tree, struct grp_info *grps) { int group; char *status_str[] = { "Unknown", " Empty ", " Failed", " Active", " Spare " }; char *cond_str[] = { " Unknown ", " OK ", " Failing ", " Failed ", " Uninit. " }; #ifdef lint tree = tree; #endif /* Print the header for the memory section. */ log_printf("\n", 0); log_printf("=========================", 0); log_printf(dgettext(TEXT_DOMAIN, " Memory "), 0); log_printf("=========================", 0); log_printf("\n", 0); log_printf("\n", 0); log_printf(" Intrlv. " "Intrlv.\n", 0); log_printf("Brd Bank MB Status Condition Speed Factor " " With\n", 0); log_printf("--- ----- ---- ------- ---------- ----- ------- " "-------\n", 0); /* Print the Memory groups information. */ for (group = 0; group < MAX_GROUPS; group++) { struct grp *grp; grp = &grps->grp[group]; /* If this board is not a CPU or MEM board, skip it. */ if ((grp->type != MEM_BOARD) && (grp->type != CPU_BOARD)) { continue; } if (grp->valid) { log_printf("%2d ", grp->board, 0); log_printf(" %1d ", grp->group, 0); log_printf("%4d ", grp->size, 0); log_printf("%7s ", status_str[grp->status], 0); log_printf("%10s ", cond_str[grp->condition], 0); log_printf("%3dns ", grp->speed, 0); log_printf("%3d-way ", grp->factor, 0); if (grp->factor > 1) { log_printf("%4c", grp->groupid, 0); } log_printf("\n", 0); } } } void display_hp_fail_fault(Sys_tree *tree, struct system_kstat_data *kstats) { /* Display Hot plugged, disabled and failed boards */ (void) display_hp_boards(kstats); /* Display failed units */ (void) disp_fail_parts(tree); /* Display fault info */ (void) disp_fault_list(tree, kstats); } void display_diaginfo(int flag, Prom_node *root, Sys_tree *tree, struct system_kstat_data *kstats) { /* * Now display the last powerfail time and the fatal hardware * reset information. We do this under a couple of conditions. * First if the user asks for it. The second is iof the user * told us to do logging, and we found a system failure. */ if (flag) { /* * display time of latest powerfail. Not all systems * have this capability. For those that do not, this * is just a no-op. */ disp_powerfail(root); /* Display system environmental conditions. */ (void) disp_env_status(kstats); /* Display ASIC Chip revs for all boards. */ sunfire_disp_asic_revs(tree, kstats); /* Print the PROM revisions here */ sunfire_disp_prom_versions(tree); /* * Display the latest system fatal hardware * error data, if any. The system holds this * data in SRAM, so it does not persist * across power-on resets. */ (void) disp_err_log(kstats); } } void display_mid(int mid) { log_printf(" %2d ", mid % 2, 0); } /* * display_pci * Call the generic psycho version of this function. */ void display_pci(Board_node *board) { display_psycho_pci(board); } /* * display_ffb * Display all FFBs on this board. It can either be in tabular format, * or a more verbose format. */ void display_ffb(Board_node *board, int table) { Prom_node *ffb; void *value; struct io_card *card_list = NULL; struct io_card card; if (board == NULL) return; /* Fill in common information */ card.display = 1; card.board = board->board_num; (void) sprintf(card.bus_type, "UPA"); card.freq = sys_clk; for (ffb = dev_find_node(board->nodes, FFB_NAME); ffb != NULL; ffb = dev_next_node(ffb, FFB_NAME)) { if (table == 1) { /* Print out in table format */ /* XXX - Get the slot number (hack) */ card.slot = get_id(ffb); /* Find out if it's single or double buffered */ (void) sprintf(card.name, "FFB"); value = get_prop_val(find_prop(ffb, "board_type")); if (value != NULL) if ((*(int *)value) & FFB_B_BUFF) (void) sprintf(card.name, "FFB, " "Double Buffered"); else (void) sprintf(card.name, "FFB, " "Single Buffered"); /* Print model number */ card.model[0] = '\0'; value = get_prop_val(find_prop(ffb, "model")); if (value != NULL) (void) sprintf(card.model, "%s", (char *)value); card_list = insert_io_card(card_list, &card); } else { /* print in long format */ char device[MAXSTRLEN]; int fd = -1; struct dirent *direntp; DIR *dirp; union strap_un strap; struct ffb_sys_info fsi; /* Find the device node using upa address */ value = get_prop_val(find_prop(ffb, "upa-portid")); if (value == NULL) continue; (void) sprintf(device, "%s@%x", FFB_NAME, *(int *)value); if ((dirp = opendir("/devices")) == NULL) continue; while ((direntp = readdir(dirp)) != NULL) { if (strstr(direntp->d_name, device) != NULL) { (void) sprintf(device, "/devices/%s", direntp->d_name); fd = open(device, O_RDWR, 0666); break; } } (void) closedir(dirp); if (fd == -1) continue; if (ioctl(fd, FFB_SYS_INFO, &fsi) < 0) continue; log_printf("Board %d FFB Hardware Configuration:\n", board->board_num, 0); log_printf("-----------------------------------\n", 0); strap.ffb_strap_bits = fsi.ffb_strap_bits; log_printf("\tBoard rev: %d\n", (int)strap.fld.board_rev, 0); log_printf("\tFBC version: 0x%x\n", fsi.fbc_version, 0); log_printf("\tDAC: %s\n", fmt_manf_id(fsi.dac_version, device), 0); log_printf("\t3DRAM: %s\n", fmt_manf_id(fsi.fbram_version, device), 0); log_printf("\n", 0); } } display_io_cards(card_list); free_io_cards(card_list); } /* * add_node * * This function adds a board node to the board structure where that * that node's physical component lives. */ void add_node(Sys_tree *root, Prom_node *pnode) { int board; Board_node *bnode; char *name = get_node_name(pnode); Prom_node *p; /* add this node to the Board list of the appropriate board */ if ((board = get_board_num(pnode)) == -1) { void *value; /* * if it is a server, pci nodes and ffb nodes never have * board number properties and software can find the board * number from the reg property. It is derived from the * high word of the 'reg' property, which contains the * mid. */ if ((name != NULL) && ((strcmp(name, FFB_NAME) == 0) || (strcmp(name, "pci") == 0) || (strcmp(name, "counter-timer") == 0))) { /* extract the board number from the 'reg' prop. */ if ((value = get_prop_val(find_prop(pnode, "reg"))) == NULL) { (void) printf("add_node() no reg property\n"); exit(2); } board = (*(int *)value - 0x1c0) / 4; } } /* find the node with the same board number */ if ((bnode = find_board(root, board)) == NULL) { bnode = insert_board(root, board); bnode->board_type = UNKNOWN_BOARD; } /* now attach this prom node to the board list */ /* Insert this node at the end of the list */ pnode->sibling = NULL; if (bnode->nodes == NULL) bnode->nodes = pnode; else { p = bnode->nodes; while (p->sibling != NULL) p = p->sibling; p->sibling = pnode; } } /* * Function resolve_board_types * * After the tree is walked and all the information is gathered, this * function is called to resolve the type of each board. */ void resolve_board_types(Sys_tree *tree) { Board_node *bnode; Prom_node *pnode; char *type; bnode = tree->bd_list; while (bnode != NULL) { bnode->board_type = UNKNOWN_BOARD; pnode = dev_find_node(bnode->nodes, "fhc"); type = get_prop_val(find_prop(pnode, "board-type")); if (type == NULL) { bnode = bnode->next; continue; } if (strcmp(type, CPU_BD_NAME) == 0) { bnode->board_type = CPU_BOARD; } else if (strcmp(type, MEM_BD_NAME) == 0) { bnode->board_type = MEM_BOARD; } else if (strcmp(type, DISK_BD_NAME) == 0) { bnode->board_type = DISK_BOARD; } else if (strcmp(type, IO_SBUS_FFB_BD_NAME) == 0) { bnode->board_type = IO_SBUS_FFB_BOARD; } else if (strcmp(type, IO_2SBUS_BD_NAME) == 0) { bnode->board_type = IO_2SBUS_BOARD; } else if (strcmp(type, IO_PCI_BD_NAME) == 0) { bnode->board_type = IO_PCI_BOARD; } else if (strcmp(type, IO_2SBUS_SOCPLUS_BD_NAME) == 0) { bnode->board_type = IO_2SBUS_SOCPLUS_BOARD; } else if (strcmp(type, IO_SBUS_FFB_SOCPLUS_BD_NAME) == 0) { bnode->board_type = IO_SBUS_FFB_SOCPLUS_BOARD; } bnode = bnode->next; } } /* * local functions */ static void sunfire_disp_prom_versions(Sys_tree *tree) { Board_node *bnode; /* Display Prom revision header */ log_printf("System Board PROM revisions:\n", 0); log_printf("----------------------------\n", 0); /* For each board, print the POST and OBP versions */ for (bnode = tree->bd_list; bnode != NULL; bnode = bnode->next) { Prom_node *flashprom; /* flashprom device node */ /* find a flashprom node for this board */ flashprom = dev_find_node(bnode->nodes, "flashprom"); /* If no flashprom node found, continue */ if (flashprom == NULL) continue; /* flashprom node found, display board# */ log_printf("Board %2d: ", bnode->board_num, 0); disp_prom_version(flashprom); } } /* * functions that are only needed inside this library */ /* * build_mem_tables * * This routine builds the memory table which tells how much memory * is present in each SIMM group of each board, what the interleave * factors are, and the group ID of the interleave group. * * The algorithms used are: * First fill in the sizes of groups. * Next build lists of all groups with same physical base. * From #of members in each list, interleave factor is * determined. * All members of a certain list get the same interleave * group ID. */ static void build_mem_tables(Sys_tree *tree, struct system_kstat_data *kstats, struct grp_info *grps) { struct mem_inter inter_grps; /* temp structure for interleaves */ struct inter_grp *intrp; int group; int i; /* initialize the interleave lists */ for (i = 0, intrp = &inter_grps.i_grp[0]; i < MAX_GROUPS; i++, intrp++) { intrp->valid = 0; intrp->count = 0; intrp->groupid = '\0'; intrp->base = 0; } for (group = 0; group < MAX_GROUPS; group++) { int found; int board; struct grp *grp; struct bd_kstat_data *bksp; uchar_t simm_reg; Board_node *bnode; board = group/2; bksp = &kstats->bd_ksp_list[board]; grp = &grps->grp[group]; grp->group = group % 2; /* * Copy the board type field into the group record. */ if ((bnode = find_board(tree, board)) != NULL) { grp->type = bnode->board_type; } else { grp->type = UNKNOWN_BOARD; continue; } /* Make sure we have kstats for this board */ if (bksp->ac_kstats_ok == 0) { /* Mark this group as invalid and move to next one */ grp->valid = 0; continue; } /* Find the bank status property */ if (bksp->ac_memstat_ok) { grp->status = bksp->mem_stat[grp->group].status; grp->condition = bksp->mem_stat[grp->group].condition; } else { grp->status = StUnknown; grp->condition = ConUnknown; } switch (grp->status) { case StBad: case StActive: case StSpare: break; default: grp->status = StUnknown; break; } switch (grp->condition) { case ConOK: case ConFailing: case ConFailed: case ConTest: case ConBad: break; default: grp->condition = ConUnknown; break; } /* base the group size off of the simmstat kstat. */ if (bksp->simmstat_kstats_ok == 0) { grp->valid = 0; continue; } /* Is it bank 0 or bank 1 */ if (grp->group == 0) { simm_reg = bksp->simm_status[0]; } else { simm_reg = bksp->simm_status[1]; } /* Now decode the size field. */ switch (simm_reg & 0x1f) { case MEM_SIZE_64M: grp->size = 64; break; case MEM_SIZE_256M: grp->size = 256; break; case MEM_SIZE_1G: grp->size = 1024; break; case MEM_SIZE_2G: grp->size = 2048; break; default: grp->valid = 0; continue; } /* Decode the speed field */ switch ((simm_reg & 0x60) >> 5) { case MEM_SPEED_50ns: grp->speed = 50; break; case MEM_SPEED_60ns: grp->speed = 60; break; case MEM_SPEED_70ns: grp->speed = 70; break; case MEM_SPEED_80ns: grp->speed = 80; break; } grp->valid = 1; grp->base = GRP_BASE(bksp->ac_memdecode[grp->group]); grp->board = board; if (grp->group == 0) { grp->factor = INTLV0(bksp->ac_memctl); } else { /* assume it is group 1 */ grp->factor = INTLV1(bksp->ac_memctl); } grp->groupid = '\0'; /* Not in a group yet */ /* * find the interleave list this group belongs on. If the * interleave list corresponding to this base address is * not found, then create a new one. */ i = 0; intrp = &inter_grps.i_grp[0]; found = 0; while ((i < MAX_GROUPS) && !found && (intrp->valid != 0)) { if ((intrp->valid != 0) && (intrp->base == grp->base)) { grp->groupid = intrp->groupid; intrp->count++; found = 1; } i++; intrp++; } /* * We did not find a matching base. So now i and intrp * now point to the next interleave group in the list. */ if (!found) { intrp->count++; intrp->valid = 1; intrp->groupid = 'A' + (char)i; intrp->base = grp->base; grp->groupid = intrp->groupid; } } } static void get_mem_total(struct mem_total *mem_total, struct grp_info *grps) { struct grp *grp; int i; /* Start with total of zero */ mem_total->dram = 0; mem_total->nvsimm = 0; /* For now we ignore NVSIMMs. We might want to fix this later. */ for (i = 0, grp = &grps->grp[0]; i < MAX_GROUPS; i++, grp++) { if (grp->valid == 1 && grp->status == StActive) { mem_total->dram += grp->size; } } } static int disp_fault_list(Sys_tree *tree, struct system_kstat_data *kstats) { struct ft_list *ftp; int i; int result = 0; time_t t; if (!kstats->ft_kstat_ok) { return (result); } for (i = 0, ftp = kstats->ft_array; i < kstats->nfaults; i++, ftp++) { if (!result) { log_printf("\n", 0); log_printf("Detected System Faults\n", 0); log_printf("======================\n", 0); } result = 1; if (ftp->fclass == FT_BOARD) { log_printf("Board %d fault: %s\n", ftp->unit, ftp->msg, 0); /* * If the fault on this board is PROM inherited, see * if we can find some failed component information * in the PROM device tree. The general solution * would be to fix the fhc driver and have it put in * more descriptive messages, but that's for another * day. */ if (ftp->type == FT_PROM) { Board_node *bn; Prom_node *pn; char *str; bn = find_board(tree, ftp->unit); /* * If any nodes under this board have a * status containing "fail", print it out. */ pn = find_failed_node(bn->nodes); while (pn) { str = get_prop_val(find_prop(pn, "status")); if (str != NULL) { log_printf("Fault: %s\n", str, 0); } pn = next_failed_node(pn); } } } else if ((ftp->type == FT_CORE_PS) || (ftp->type == FT_PPS)) { log_printf("Unit %d %s failure\n", ftp->unit, ftp->msg, 0); } else if ((ftp->type == FT_OVERTEMP) && (ftp->fclass == FT_SYSTEM)) { log_printf("Clock board %s\n", ftp->msg, 0); } else { log_printf("%s failure\n", ftp->msg, 0); } t = (time_t)ftp->create_time; log_printf("\tDetected %s", asctime(localtime(&t)), 0); } if (!result) { log_printf("\n", 0); log_printf("No System Faults found\n", 0); log_printf("======================\n", 0); } log_printf("\n", 0); return (result); } /* * disp_err_log * * Display the fatal hardware reset system error logs. These logs are * collected by POST and passed up through the kernel to userland. * They will not necessarily be present in all systems. Their form * might also be different in different systems. * * NOTE - We are comparing POST defined board types here. Do not confuse * them with kernel board types. The structure being analyzed in this * function is created by POST. All the defines for it are in reset_info.h, * which was ported from POST header files. */ static int disp_err_log(struct system_kstat_data *kstats) { int exit_code = 0; int i; struct reset_info *rst_info; struct board_info *bdp; char *err_msgs[MAX_MSGS]; /* holds all messages for a system board */ int msg_idx; /* current msg number */ int count; /* number added by last analyze call */ char **msgs; /* start by initializing the err_msgs array to all NULLs */ for (i = 0; i < MAX_MSGS; i++) { err_msgs[i] = NULL; } /* First check to see that the reset-info kstats are present. */ if (kstats->reset_kstats_ok == 0) { return (exit_code); } rst_info = &kstats->reset_info; /* Everything is OK, so print out time/date stamp first */ log_printf("\n", 0); log_printf( dgettext(TEXT_DOMAIN, "Analysis of most recent Fatal Hardware Watchdog:\n"), 0); log_printf("======================================================\n", 0); log_printf("Log Date: %s\n", get_time(&kstats->reset_info.tod_timestamp[0]), 0); /* initialize the vector and the message index. */ msgs = err_msgs; msg_idx = 0; /* Loop Through all of the boards. */ bdp = &rst_info->bd_reset_info[0]; for (i = 0; i < MAX_BOARDS; i++, bdp++) { /* Is there data for this board? */ if ((bdp->board_desc & BD_STATE_MASK) == BD_NOT_PRESENT) { continue; } /* If it is a CPU Board, look for CPU data. */ if (BOARD_TYPE(bdp->board_desc) == CPU_TYPE) { /* analyze CPU 0 if present */ if (bdp->board_desc & CPU0_OK) { count = analyze_cpu(msgs, 0, bdp->cpu[0].afsr); msgs += count; msg_idx += count; } /* analyze CPU1 if present. */ if (bdp->board_desc & CPU1_OK) { count = analyze_cpu(msgs, 1, bdp->cpu[1].afsr); msgs += count; msg_idx += count; } } /* Always Analyze the AC and the DCs on a board. */ count = analyze_ac(msgs, bdp->ac_error_status); msgs += count; msg_idx += count; count = analyze_dc(i, msgs, bdp->dc_shadow_chain); msgs += count; msg_idx += count; if (msg_idx != 0) display_msgs(err_msgs, i); erase_msgs(err_msgs); /* If any messages are logged, we have errors */ if (msg_idx != 0) { exit_code = 1; } /* reset the vector and the message index */ msg_idx = 0; msgs = &err_msgs[0]; } return (exit_code); } static void erase_msgs(char **msgs) { int i; for (i = 0; (*msgs != NULL) && (i < MAX_MSGS); i++, msgs++) { free(*msgs); *msgs = NULL; } } static void display_msgs(char **msgs, int board) { int i; /* display the header for this board */ print_header(board); for (i = 0; (*msgs != NULL) && (i < MAX_MSGS); i++, msgs++) { log_printf(*msgs, 0); } } /* * disp_keysw_and_leds * * This routine displays the position of the keyswitch and the front panel * system LEDs. The keyswitch can be in either normal, diagnostic, or * secure position. The three front panel LEDs are of importance because * the center LED indicates component failure on the system. */ static int disp_keysw_and_leds(struct system_kstat_data *kstats) { int board; int diag_mode = 0; int secure_mode = 0; int result = 0; /* Check the first valid board to determeine the diag bit */ /* Find the first valid board */ for (board = 0; board < MAX_BOARDS; board++) { if (kstats->bd_ksp_list[board].fhc_kstats_ok != 0) { /* If this was successful, break out of loop */ if ((kstats->bd_ksp_list[board].fhc_bsr & FHC_DIAG_MODE) == 0) diag_mode = 1; break; } } /* * Check the register on the clock-board to determine the * secure bit. */ if (kstats->sys_kstats_ok) { /* The secure bit is negative logic. */ if (kstats->keysw_status == KEY_SECURE) { secure_mode = 1; } } /* * The system cannot be in diag and secure mode. This is * illegal. */ if (secure_mode && diag_mode) { result = 2; return (result); } /* Now print the keyswitch position. */ log_printf("Keyswitch position is in ", 0); if (diag_mode) { log_printf("Diagnostic Mode\n"); } else if (secure_mode) { log_printf("Secure Mode\n", 0); } else { log_printf("Normal Mode\n"); } /* display the redundant power status */ if (kstats->sys_kstats_ok) { log_printf("System Power Status: ", 0); switch (kstats->power_state) { case REDUNDANT: log_printf("Redundant\n", 0); break; case MINIMUM: log_printf("Minimum Available\n", 0); break; case BELOW_MINIMUM: log_printf("Insufficient Power Available\n", 0); break; default: log_printf("Unknown\n", 0); break; } } if (kstats->sys_kstats_ok) { /* * If the center LED is on, then we return a non-zero * result. */ log_printf("System LED Status: GREEN YELLOW " "GREEN\n", 0); if ((kstats->sysctrl & SYS_LED_MID) != 0) { log_printf("WARNING ", 0); } else { log_printf("Normal ", 0); } /* * Left LED is negative logic, center and right LEDs * are positive logic. */ if ((kstats->sysctrl & SYS_LED_LEFT) == 0) { log_printf("ON ", 0); } else { log_printf("OFF", 0); } log_printf(" ", 0); if ((kstats->sysctrl & SYS_LED_MID) != 0) { log_printf("ON ", 0); } else { log_printf("OFF", 0); } log_printf(" BLINKING", 0); } log_printf("\n", 0); return (result); } /* * disp_env_status * * This routine displays the environmental status passed up from * device drivers via kstats. The kstat names are defined in * kernel header files included by this module. */ static int disp_env_status(struct system_kstat_data *kstats) { struct bd_kstat_data *bksp; int exit_code = 0; int i; uchar_t curr_temp; int is4slot = 0; /* * Define some message arrays to make life simpler. These * messages correspond to definitions in for * temperature trend (enum temp_trend) and temperature state * (enum temp_state). */ static char *temp_trend_msg[] = { "unknown", "rapidly falling", "falling", "stable", "rising", "rapidly rising", "unknown (noisy)" }; static char *temp_state_msg[] = { " OK ", "WARNING ", " DANGER " }; log_printf("\n", 0); log_printf("=========================", 0); log_printf(dgettext(TEXT_DOMAIN, " Environmental Status "), 0); log_printf("=========================", 0); log_printf("\n", 0); exit_code = disp_keysw_and_leds(kstats); if (!kstats->sys_kstats_ok) { log_printf(dgettext(TEXT_DOMAIN, "*** Error: Unavailable ***\n\n")); return (1); } /* * for purposes within this routine, * 5 slot behaves the same as a 4 slot */ if (SYS_TYPE(kstats->sysstat1) == SYS_4_SLOT) is4slot = 1; log_printf("\n", 0); log_printf("\nFans:\n", 0); log_printf("-----\n", 0); log_printf("Unit Status\n", 0); log_printf("---- ------\n", 0); log_printf("%-4s ", is4slot ? "Disk" : "Rack", 0); /* Check the status of the Rack Fans */ if ((kstats->fan_status & SYS_RACK_FANFAIL) == 0) { log_printf("OK\n", 0); } else { log_printf("FAIL\n", 0); exit_code = 1; } if (!is4slot) { /* * keyswitch and ac box are on 8 & 16 slot only */ /* Check the status of the Keyswitch Fan assembly. */ log_printf("%-4s ", "Key", 0); if ((kstats->fan_status & SYS_KEYSW_FAN_OK) != 0) { log_printf("OK\n", 0); } else { log_printf("FAIL\n", 0); exit_code = 1; } log_printf("%-4s ", "AC", 0); if ((kstats->fan_status & SYS_AC_FAN_OK) != 0) { log_printf("OK\n", 0); } else { log_printf("FAIL\n", 0); exit_code = 1; } } else { /* * peripheral fan is on 4 slot only * XXX might want to indicate transient states too */ if (kstats->psstat_kstat_ok) { if (kstats->ps_shadow[SYS_P_FAN_INDEX] == PS_OK) { log_printf("PPS OK\n", 0); } else if (kstats->ps_shadow[SYS_P_FAN_INDEX] == PS_FAIL) { log_printf("PPS FAIL\n", 0); exit_code = 1; } } } log_printf("\n", 0); log_printf("System Temperatures (Celsius):\n", 0); log_printf("------------------------------\n", 0); log_printf("Brd State Current Min Max Trend\n", 0); log_printf("--- ------- ------- --- --- -----\n", 0); for (i = 0, bksp = &kstats->bd_ksp_list[0]; i < MAX_BOARDS; i++, bksp++) { /* Make sure we have kstats for this board first */ if (!bksp->temp_kstat_ok) { continue; } log_printf("%2d ", i, 0); /* Print the current state of the temperature */ log_printf("%s", temp_state_msg[bksp->tempstat.state], 0); /* Set exit code for WARNING and DANGER */ if (bksp->tempstat.state != 0) exit_code = 1; /* Print the current temperature */ curr_temp = bksp->tempstat.l1[bksp->tempstat.index % L1_SZ]; log_printf(" %2d ", curr_temp, 0); /* Print the minimum recorded temperature */ log_printf(" %2d ", bksp->tempstat.min, 0); /* Print the maximum recorded temperature */ log_printf(" %2d ", bksp->tempstat.max, 0); /* Print the current trend in temperature (if available) */ if (bksp->tempstat.version < 2) log_printf("unknown\n", 0); else log_printf("%s\n", temp_trend_msg[bksp->tempstat.trend], 0); } if (kstats->temp_kstat_ok) { log_printf("CLK ", 0); /* Print the current state of the temperature */ log_printf("%s", temp_state_msg[kstats->tempstat.state], 0); /* Set exit code for WARNING or DANGER */ if (kstats->tempstat.state != 0) exit_code = 1; /* Print the current temperature */ curr_temp = kstats->tempstat.l1[kstats->tempstat.index % L1_SZ]; log_printf(" %2d ", curr_temp, 0); /* Print the minimum recorded temperature */ log_printf(" %2d ", kstats->tempstat.min, 0); /* Print the maximum recorded temperature */ log_printf(" %2d ", kstats->tempstat.max, 0); /* Print the current trend in temperature (if available) */ if (kstats->tempstat.version < 2) log_printf("unknown\n\n", 0); else log_printf("%s\n\n", temp_trend_msg[kstats->tempstat.trend], 0); } else { log_printf("\n"); } log_printf("\n", 0); log_printf("Power Supplies:\n", 0); log_printf("---------------\n", 0); log_printf("Supply Status\n", 0); log_printf("--------- ------\n", 0); if (kstats->psstat_kstat_ok) { for (i = 0; i < SYS_PS_COUNT; i++) { char *ps, *state; /* skip core power supplies that are not present */ if (i <= SYS_PPS0_INDEX && kstats->ps_shadow[i] == PS_OUT) continue; /* Display the unit Number */ switch (i) { case 0: ps = "0"; break; case 1: ps = "1"; break; case 2: ps = "2"; break; case 3: ps = "3"; break; case 4: ps = "4"; break; case 5: ps = "5"; break; case 6: ps = "6"; break; case 7: ps = is4slot ? "2nd PPS" : "7"; break; case SYS_PPS0_INDEX: ps = "PPS"; break; case SYS_CLK_33_INDEX: ps = " System 3.3v"; break; case SYS_CLK_50_INDEX: ps = " System 5.0v"; break; case SYS_V5_P_INDEX: ps = " Peripheral 5.0v"; break; case SYS_V12_P_INDEX: ps = " Peripheral 12v"; break; case SYS_V5_AUX_INDEX: ps = " Auxiliary 5.0v"; break; case SYS_V5_P_PCH_INDEX: ps = " Peripheral 5.0v precharge"; break; case SYS_V12_P_PCH_INDEX: ps = " Peripheral 12v precharge"; break; case SYS_V3_PCH_INDEX: ps = " System 3.3v precharge"; break; case SYS_V5_PCH_INDEX: ps = " System 5.0v precharge"; break; /* skip the peripheral fan here */ case SYS_P_FAN_INDEX: continue; } /* what is the state? */ switch (kstats->ps_shadow[i]) { case PS_OK: state = "OK"; break; case PS_FAIL: state = "FAIL"; exit_code = 1; break; /* XXX is this an exit_code condition? */ case PS_OUT: state = "PPS Out"; exit_code = 1; break; case PS_UNKNOWN: state = "Unknown"; break; default: state = "Illegal State"; break; } log_printf("%-32s %s\n", ps, state, 0); } } /* Check status of the system AC Power Source */ log_printf("%-32s ", "AC Power", 0); if ((kstats->sysstat2 & SYS_AC_FAIL) == 0) { log_printf("OK\n", 0); } else { log_printf("failed\n", 0); exit_code = 1; } log_printf("\n", 0); return (exit_code); } /* * Many of the ASICs present in fusion machines have implementation and * version numbers stored in the OBP device tree. These codes are displayed * in this routine in an effort to aid Engineering and Field service * in detecting old ASICs which may have bugs in them. */ static void sunfire_disp_asic_revs(Sys_tree *tree, struct system_kstat_data *kstats) { Board_node *bnode; Prom_node *pnode; int isplusbrd; char *board_str[] = { "Uninitialized", "Unknown", "CPU", "Memory", "Dual-SBus", "UPA-SBus", "Dual-PCI", "Disk", "Clock", "Dual-SBus-SOC+", "UPA-SBus-SOC+"}; /* Print the header */ log_printf("\n", 0); log_printf("=========================", 0); log_printf(" HW Revisions ", 0); log_printf("=========================", 0); log_printf("\n", 0); log_printf("\n", 0); /* Else this is a Sunfire or campfire */ log_printf("ASIC Revisions:\n", 0); log_printf("---------------\n", 0); /* Display Firetruck ASIC Revisions first */ log_printf("Brd FHC AC SBus0 SBus1 PCI0 PCI1 FEPS", 0); log_printf(" Board Type Attributes", 0); log_printf("\n", 0); log_printf("--- --- -- ----- ----- ---- ---- ----", 0); log_printf(" ---------- ----------", 0); log_printf("\n", 0); /* * Display all of the FHC, AC, and chip revisions for the entire * machine. The AC anf FHC chip revs are available from the device * tree that was read out of the PROM, but the DC chip revs will be * read via a kstat. The interfaces for this are not completely * available at this time. */ bnode = tree->bd_list; while (bnode != NULL) { int *version; int upa = bd_to_upa(bnode->board_num); /* Display the header with the board number */ log_printf("%2d ", bnode->board_num, 0); /* display the FHC version */ if ((pnode = dev_find_node(bnode->nodes, "fhc")) == NULL) { log_printf(" ", 0); } else { if ((version = (int *)get_prop_val(find_prop(pnode, "version#"))) == NULL) { log_printf(" ", 0); } else { log_printf(" %d ", *version, 0); } } /* display the AC version */ if ((pnode = dev_find_node(bnode->nodes, "ac")) == NULL) { log_printf(" ", 0); } else { if ((version = (int *)get_prop_val(find_prop(pnode, "version#"))) == NULL) { log_printf(" ", 0); } else { log_printf(" %d ", *version, 0); } } /* Find sysio 0 on board and print rev */ if ((pnode = find_device(bnode, upa, "sbus")) == NULL) { log_printf(" ", 0); } else { if ((version = (int *)get_prop_val(find_prop(pnode, "version#"))) == NULL) { log_printf(" ", 0); } else { log_printf(" %d ", *version, 0); } } /* Find sysio 1 on board and print rev */ if ((pnode = find_device(bnode, upa+1, "sbus")) == NULL) { log_printf(" ", 0); } else { if ((version = (int *)get_prop_val(find_prop(pnode, "version#"))) == NULL) { log_printf(" ", 0); } else { log_printf(" %d ", *version, 0); } } /* Find Psycho 0 on board and print rev */ if ((pnode = find_device(bnode, upa, "pci")) == NULL) { log_printf(" ", 0); } else { if ((version = (int *)get_prop_val(find_prop(pnode, "version#"))) == NULL) { log_printf(" ", 0); } else { log_printf(" %d ", *version, 0); } } /* Find Psycho 1 on board and print rev */ if ((pnode = find_device(bnode, upa+1, "pci")) == NULL) { log_printf(" ", 0); } else { if ((version = (int *)get_prop_val(find_prop(pnode, "version#"))) == NULL) { log_printf(" ", 0); } else { log_printf(" %d ", *version, 0); } } /* Find the FEPS on board and print rev */ if ((pnode = dev_find_node(bnode->nodes, "SUNW,hme")) != NULL) { if ((version = (int *)get_prop_val(find_prop(pnode, "hm-rev"))) != NULL) { if (*version == 0xa0) { log_printf(" 2.0 ", 0); } else if (*version == 0x20) { log_printf(" 2.1 ", 0); } else { log_printf(" %2x ", *version, 0); } } } else log_printf(" ", 0); /* print out the board type */ isplusbrd = ISPLUSBRD(kstats->bd_ksp_list [bnode->board_num].fhc_bsr); log_printf("%-16s", board_str[bnode->board_type], 0); if (isplusbrd) log_printf("100MHz Capable", 0); else log_printf("84MHz Capable", 0); log_printf("\n", 0); bnode = bnode->next; } log_printf("\n", 0); /* Now display the FFB board component revisions */ for (bnode = tree->bd_list; bnode != NULL; bnode = bnode->next) { display_ffb(bnode, 0); } } static void display_hp_boards(struct system_kstat_data *kstats) { int i; int j; int hp_found = 0; struct hp_info *hp; char *state; for (i = 0, hp = &kstats->hp_info[0]; i < MAX_BOARDS; i++, hp++) { if (!hp->kstat_ok) { continue; } hp_found = 1; } /* return if there are no hotplug boards in the system. */ if (!hp_found) { return; } if (hp_found != 0) { log_printf("\n", 0); log_printf("Detached Boards\n", 0); log_printf("===============\n", 0); log_printf(" Slot State Type Info\n", 0); log_printf(" ---- --------- ------ ----" "-------------------------------------\n", 0); } /* Display all detached boards */ for (i = 0, hp = &kstats->hp_info[0]; i < MAX_BOARDS; i++, hp++) { struct cpu_info *cpu; if (hp->kstat_ok == 0) { continue; } switch (hp->bd_info.state) { case UNKNOWN_STATE: state = "unknown"; break; case ACTIVE_STATE: state = "active"; break; case LOWPOWER_STATE: state = "low-power"; break; case HOTPLUG_STATE: state = "hot-plug"; break; case DISABLED_STATE: state = "disabled"; break; case FAILED_STATE: state = "failed"; break; default: state = "unknown"; break; } log_printf(" %2d %9s ", i, state, 0); switch (hp->bd_info.type) { case MEM_BOARD: log_printf("%-14s ", MEM_BD_NAME, 0); break; case CPU_BOARD: log_printf("%-14s ", CPU_BD_NAME, 0); /* Cannot display CPU info for disabled boards */ if ((hp->bd_info.state == DISABLED_STATE) || (hp->bd_info.state == FAILED_STATE)) { break; } /* Display both CPUs if present */ cpu = &hp->bd_info.bd.cpu[0]; for (j = 0; j < 2; j++, cpu++) { log_printf("CPU %d: ", j, 0); /* Print the rated speed of the CPU. */ if (cpu->cpu_speed > 1) { log_printf("%3d MHz", cpu->cpu_speed, 0); } else { log_printf("no CPU ", 0); continue; } /* Display the size of the cache */ if (cpu->cache_size != 0) { log_printf(" %0.1fM ", (float)cpu->cache_size / (float)(1024*1024), 0); } else { log_printf(" ", 0); } } break; case IO_2SBUS_BOARD: log_printf("%-14s ", IO_2SBUS_BD_NAME, 0); break; case IO_2SBUS_SOCPLUS_BOARD: log_printf("%-14s ", IO_2SBUS_SOCPLUS_BD_NAME, 0); break; case IO_SBUS_FFB_BOARD: log_printf("%-14s ", IO_SBUS_FFB_BD_NAME, 0); switch (hp->bd_info.bd.io2.ffb_size) { case FFB_SINGLE: log_printf("Single buffered FFB", 0); break; case FFB_DOUBLE: log_printf("Double buffered FFB", 0); break; case FFB_NOT_FOUND: log_printf("No FFB installed", 0); break; default: log_printf("Illegal FFB size", 0); break; } break; case IO_SBUS_FFB_SOCPLUS_BOARD: log_printf("%-14s ", IO_SBUS_FFB_SOCPLUS_BD_NAME, 0); switch (hp->bd_info.bd.io2.ffb_size) { case FFB_SINGLE: log_printf("Single buffered FFB", 0); break; case FFB_DOUBLE: log_printf("Double buffered FFB", 0); break; case FFB_NOT_FOUND: log_printf("No FFB installed", 0); break; default: log_printf("Illegal FFB size", 0); break; } break; case IO_PCI_BOARD: log_printf("%-14s ", IO_PCI_BD_NAME, 0); break; case DISK_BOARD: log_printf("%-14s ", "disk", 0); for (j = 0; j < 2; j++) { log_printf("Disk %d:", j, 0); if (hp->bd_info.bd.dsk.disk_pres[j]) { log_printf(" Target: %2d ", hp->bd_info.bd.dsk.disk_id[j], 0); } else { log_printf(" no disk ", 0); } } break; case UNKNOWN_BOARD: case UNINIT_BOARD: default: log_printf("UNKNOWN ", 0); break; } log_printf("\n"); } } /* * Analysis functions: * * Most of the Fatal error data analyzed from error registers is not * very complicated. This is because the FRUs for errors detected by * most parts is either a CPU module, a FFB, or the system board * itself. * The analysis of the Address Controller errors is the most complicated. * These errors can be caused by other boards as well as the local board. */ /* * analyze_cpu * * Analyze the CPU MFSR passed in and determine what type of fatal * hardware errors occurred at the time of the crash. This function * returns a pointer to a string to the calling routine. */ static int analyze_cpu(char **msgs, int cpu_id, u_longlong_t afsr) { int count = 0; int i; int syndrome; char msgbuf[MAXSTRLEN]; if (msgs == NULL) { return (count); } if (afsr & P_AFSR_ETP) { (void) sprintf(msgbuf, "CPU %d Ecache Tag Parity Error, ", cpu_id); /* extract syndrome for afsr */ syndrome = (afsr & P_AFSR_ETS) >> ETS_SHIFT; /* now concat the parity syndrome msg */ for (i = 0; i < 4; i++) { if ((0x1 << i) & syndrome) { (void) strcat(msgbuf, ecache_parity[i]); } } (void) strcat(msgbuf, "\n"); *msgs++ = strdup(msgbuf); count++; } if (afsr & P_AFSR_ISAP) { (void) sprintf(msgbuf, "CPU %d Incoming System Address Parity Error\n", cpu_id); *msgs++ = strdup(msgbuf); count++; } return (count); } /* * analyze_ac * * This function checks the AC error register passed in and checks * for any errors that occured during the fatal hardware reset. */ static int analyze_ac(char **msgs, u_longlong_t ac_error) { int i; int count = 0; char msgbuf[MAXSTRLEN]; int tmp_cnt; if (msgs == NULL) { return (count); } for (i = 2; i < MAX_BITS; i++) { if ((((u_longlong_t)0x1 << i) & ac_error) != 0) { if (ac_errors[i].error != NULL) { (void) sprintf(msgbuf, "AC: %s\n", ac_errors[i].error); *msgs++ = strdup(msgbuf); count++; /* display the part that might cause this */ tmp_cnt = disp_parts(msgs, ac_error, i); count += tmp_cnt; msgs += tmp_cnt; } } } return (count); } /* * analyze_dc * * This routine checks the DC shdow chain and tries to determine * what type of error might have caused the fatal hardware reset * error. */ static int analyze_dc(int board, char **msgs, u_longlong_t dc_error) { int i; int count = 0; char msgbuf[MAXSTRLEN]; if (msgs == NULL) { return (count); } /* * The DC scan data is contained in 8 bytes, one byte per * DC. There are 8 DCs on a system board. */ for (i = 0; i < 8; i++) { if (dc_error & DC_OVERFLOW) { (void) sprintf(msgbuf, dc_overflow_txt, board, i); *msgs++ = strdup(msgbuf); count++; } if (dc_error & DC_PARITY) { (void) sprintf(msgbuf, dc_parity_txt, board, i); *msgs++ = strdup(msgbuf); count++; } dc_error = dc_error >> 8; /* shift over to next byte */ } return (count); } static int disp_parts(char **msgs, u_longlong_t ac_error, int type) { int count = 0; int part; char msgbuf[MAXSTRLEN]; int i; if (msgs == NULL) { return (count); } (void) sprintf(msgbuf, "\tThe error could be caused by:\n"); *msgs++ = strdup(msgbuf); count++; for (i = 0; (i < MAX_FRUS) && ac_errors[type].part[i]; i++) { part = ac_errors[type].part[i]; if (part == UPA_PART) { if (ac_error & UPA_PORT_A) { part = UPA_A_PART; } else if (ac_error & UPA_PORT_B) { part = UPA_B_PART; } } if (part == DTAG_PART) { if (ac_error & UPA_PORT_A) { part = DTAG_A_PART; } else if (ac_error & UPA_PORT_B) { part = DTAG_B_PART; } } (void) sprintf(msgbuf, "\t\t%s\n", part_str[part]); *msgs++ = strdup(msgbuf); count++; } return (count); }