1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 *
26 * Sunfire Platform specific functions.
27 *
28 * called when :
29 * machine_type == MTYPE_SUNFIRE
30 */
31
32 #pragma ident "%Z%%M% %I% %E% SMI"
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <unistd.h>
37 #include <ctype.h>
38 #include <string.h>
39 #include <kvm.h>
40 #include <varargs.h>
41 #include <time.h>
42 #include <dirent.h>
43 #include <fcntl.h>
44 #include <errno.h>
45 #include <sys/param.h>
46 #include <sys/stat.h>
47 #include <sys/types.h>
48 #include <sys/utsname.h>
49 #include <sys/openpromio.h>
50 #include <libintl.h>
51 #include <syslog.h>
52 #include <sys/dkio.h>
53 #include "pdevinfo.h"
54 #include "display.h"
55 #include "pdevinfo_sun4u.h"
56 #include "display_sun4u.h"
57 #include "libprtdiag.h"
58
59 #if !defined(TEXT_DOMAIN)
60 #define TEXT_DOMAIN "SYS_TEST"
61 #endif
62
63 /* Macros for manipulating UPA IDs and board numbers on Sunfire. */
64 #define bd_to_upa(bd) ((bd) << 1)
65 #define upa_to_bd(upa) ((upa) >> 1)
66
67 #define MAX_MSGS 64
68
69 extern int print_flag;
70
71 /*
72 * these functions will overlay the symbol table of libprtdiag
73 * at runtime (sunfire systems only)
74 */
75 int error_check(Sys_tree *tree, struct system_kstat_data *kstats);
76 void display_memoryconf(Sys_tree *tree, struct grp_info *grps);
77 int disp_fail_parts(Sys_tree *tree);
78 void display_memorysize(Sys_tree *tree, struct system_kstat_data *kstats,
79 struct grp_info *grps, struct mem_total *memory_total);
80 void display_hp_fail_fault(Sys_tree *tree, struct system_kstat_data *kstats);
81 void display_diaginfo(int flag, Prom_node *root, Sys_tree *tree,
82 struct system_kstat_data *kstats);
83 void display_mid(int mid);
84 void display_pci(Board_node *);
85 void display_ffb(Board_node *, int);
86 void add_node(Sys_tree *, Prom_node *);
87 void resolve_board_types(Sys_tree *);
88
89 /* local functions */
90 static void build_mem_tables(Sys_tree *, struct system_kstat_data *,
91 struct grp_info *);
92 static void get_mem_total(struct mem_total *, struct grp_info *);
93 static int disp_fault_list(Sys_tree *, struct system_kstat_data *);
94 static int disp_err_log(struct system_kstat_data *);
95 static int disp_env_status(struct system_kstat_data *);
96 static int disp_keysw_and_leds(struct system_kstat_data *);
97 static void sunfire_disp_prom_versions(Sys_tree *);
98 static void erase_msgs(char **);
99 static void display_msgs(char **msgs, int board);
100 static void sunfire_disp_asic_revs(Sys_tree *, struct system_kstat_data *);
101 static void display_hp_boards(struct system_kstat_data *);
102 static int disp_parts(char **, u_longlong_t, int);
103 /*
104 * Error analysis routines. These routines decode data from specified
105 * error registers. They are meant to be used for decoding the fatal
106 * hardware reset data passed to the kernel by sun4u POST.
107 */
108 static int analyze_cpu(char **, int, u_longlong_t);
109 static int analyze_ac(char **, u_longlong_t);
110 static int analyze_dc(int, char **, u_longlong_t);
111
112 #define RESERVED_STR "Reserved"
113
114 #define MAX_PARTS 5
115 #define MAX_FRUS 5
116
117 #define MAXSTRLEN 256
118
119 /* Define special bits */
120 #define UPA_PORT_A 0x1
121 #define UPA_PORT_B 0x2
122
123
124 /*
125 * These defines comne from async.h, but it does not get exported from
126 * uts/sun4u/sys, so they must be redefined.
127 */
128 #define P_AFSR_ISAP 0x0000000040000000ULL /* incoming addr. parity err */
129 #define P_AFSR_ETP 0x0000000020000000ULL /* ecache tag parity */
130 #define P_AFSR_ETS 0x00000000000F0000ULL /* cache tag parity syndrome */
131 #define ETS_SHIFT 16
132
133 /* List of parts possible */
134 #define RSVD_PART 1
135 #define UPA_PART 2
136 #define UPA_A_PART 3
137 #define UPA_B_PART 4
138 #define SOFTWARE_PART 5
139 #define AC_PART 6
140 #define AC_ANY_PART 7
141 #define DTAG_PART 8
142 #define DTAG_A_PART 9
143 #define DTAG_B_PART 10
144 #define FHC_PART 11
145 #define BOARD_PART 12
146 #define BOARD_ANY_PART 13
147 #define BOARD_CONN_PART 14
148 #define BACK_PIN_PART 15
149 #define BACK_TERM_PART 16
150 #define CPU_PART 17
151
152 /* List of possible parts */
153 static char *part_str[] = {
154 "", /* 0, a placeholder for indexing */
155 "", /* 1, reserved strings shouldn't be printed */
156 "UPA devices", /* 2 */
157 "UPA Port A device", /* 3 */
158 "UPA Port B device", /* 4 */
159 "Software error", /* 5 */
160 "Address Controller", /* 6 */
161 "Undetermined Address Controller in system", /* 7 */
162 "Data Tags", /* 8 */
163 "Data Tags for UPA Port A", /* 9 */
164 "Data Tags for UPA Port B", /* 10 */
165 "Firehose Controller", /* 11 */
166 "This Board", /* 12 */
167 "Undetermined Board in system", /* 13 */
168 "Board Connector", /* 14 */
169 "Centerplane pins ", /* 15 */
170 "Centerplane terminators", /* 16 */
171 "CPU", /* 17 */
172 };
173
174 /* Ecache parity error messages. Tells which bits are bad. */
175 static char *ecache_parity[] = {
176 "Bits 7:0 ",
177 "Bits 15:8 ",
178 "Bits 21:16 ",
179 "Bits 24:22 "
180 };
181
182
183 struct ac_error {
184 char *error;
185 int part[MAX_PARTS];
186 };
187
188 typedef struct ac_error ac_err;
189
190 /*
191 * Hardware error register meanings, failed parts and FRUs. The
192 * following strings are indexed for the bit positions of the
193 * corresponding bits in the hardware. The code checks bit x of
194 * the hardware error register and prints out string[x] if the bit
195 * is turned on.
196 *
197 * This database of parts which are probably failed and which FRU's
198 * to replace was based on knowledge of the Sunfire Programmers Spec.
199 * and discussions with the hardware designers. The order of the part
200 * lists and consequently the FRU lists are in the order of most
201 * likely cause first.
202 */
203 static ac_err ac_errors[] = {
204 { /* 0 */
205 "UPA Port A Error",
206 { UPA_A_PART, 0, 0, 0, 0 },
207 },
208 { /* 1 */
209 "UPA Port B Error",
210 { UPA_B_PART, 0, 0, 0, 0 },
211 },
212 { /* 2 */
213 NULL,
214 { RSVD_PART, 0, 0, 0, 0 },
215 },
216 { /* 3 */
217 NULL,
218 { RSVD_PART, 0, 0, 0, 0 },
219 },
220 { /* 4 */
221 "UPA Interrupt to unmapped destination",
222 { BOARD_PART, 0, 0, 0, 0 },
223 },
224 { /* 5 */
225 "UPA Non-cacheable write to unmapped destination",
226 { BOARD_PART, 0, 0, 0, 0 },
227 },
228 { /* 6 */
229 "UPA Cacheable write to unmapped destination",
230 { BOARD_PART, 0, 0, 0, 0 },
231 },
232 { /* 7 */
233 "Illegal Write Received",
234 { BOARD_PART, 0, 0, 0, 0 },
235 },
236 { /* 8 */
237 "Local Writeback match with line in state S",
238 { AC_PART, DTAG_PART, 0, 0, 0 },
239 },
240 { /* 9 */
241 "Local Read match with valid line in Tags",
242 { AC_PART, DTAG_PART, 0, 0, 0 },
243 },
244 { /* 10 */
245 NULL,
246 { RSVD_PART, 0, 0, 0, 0 },
247 },
248 { /* 11 */
249 NULL,
250 { RSVD_PART, 0, 0, 0, 0 },
251 },
252 { /* 12 */
253 "Tag and Victim were valid during lookup",
254 { AC_PART, DTAG_PART, 0, 0, 0 },
255 },
256 { /* 13 */
257 "Local Writeback matches a victim in state S",
258 { AC_PART, CPU_PART, 0, 0, 0 },
259 },
260 { /* 14 */
261 "Local Read matches valid line in victim buffer",
262 { AC_PART, CPU_PART, 0, 0, 0 },
263 },
264 { /* 15 */
265 "Local Read victim bit set and victim is S state",
266 { AC_PART, CPU_PART, 0, 0, 0 },
267 },
268 { /* 16 */
269 "Local Read Victim bit set and Valid Victim Buffer",
270 { AC_PART, CPU_PART, 0, 0, 0 },
271 },
272 { /* 17 */
273 NULL,
274 { RSVD_PART, 0, 0, 0, 0 },
275 },
276 { /* 18 */
277 NULL,
278 { RSVD_PART, 0, 0, 0, 0 },
279 },
280 { /* 19 */
281 NULL,
282 { RSVD_PART, 0, 0, 0, 0 },
283 },
284 { /* 20 */
285 "UPA Transaction received in Sleep mode",
286 { AC_PART, 0, 0, 0, 0 },
287 },
288 { /* 21 */
289 "P_FERR error P_REPLY received from UPA Port",
290 { CPU_PART, AC_PART, 0, 0, 0 },
291 },
292 { /* 22 */
293 "Illegal P_REPLY received from UPA Port",
294 { CPU_PART, AC_PART, 0, 0, 0 },
295 },
296 { /* 23 */
297 NULL,
298 { RSVD_PART, 0, 0, 0, 0 },
299 },
300 { /* 24 */
301 "Timeout on a UPA Master Port",
302 { AC_ANY_PART, BOARD_ANY_PART, 0, 0, 0 },
303 },
304 { /* 25 */
305 NULL,
306 { RSVD_PART, 0, 0, 0, 0 },
307 },
308 { /* 26 */
309 NULL,
310 { RSVD_PART, 0, 0, 0, 0 },
311 },
312 { /* 27 */
313 NULL,
314 { RSVD_PART, 0, 0, 0, 0 },
315 },
316 { /* 28 */
317 "Coherent Transactions Queue Overflow Error",
318 { BACK_PIN_PART, BOARD_CONN_PART, AC_PART, AC_ANY_PART, 0 },
319 },
320 { /* 29 */
321 "Non-cacheable Request Queue Overflow Error",
322 { AC_PART, AC_ANY_PART, 0, 0, 0 },
323 },
324 { /* 30 */
325 "Non-cacheable Reply Queue Overflow Error",
326 { AC_PART, 0, 0, 0, 0 },
327 },
328 { /* 31 */
329 "PREQ Queue Overflow Error",
330 { CPU_PART, AC_PART, 0, 0, 0 },
331 },
332 { /* 32 */
333 "Foreign DID CAM Overflow Error",
334 { AC_PART, AC_ANY_PART, 0, 0, 0 },
335 },
336 { /* 33 */
337 "FT->UPA Queue Overflow Error",
338 { BACK_PIN_PART, BOARD_CONN_PART, AC_PART, AC_ANY_PART, 0 },
339 },
340 { /* 34 */
341 NULL,
342 { RSVD_PART, 0, 0, 0, 0 },
343 },
344 { /* 35 */
345 NULL,
346 { RSVD_PART, 0, 0, 0, 0 },
347 },
348 { /* 36 */
349 "UPA Port B Dtag Parity Error",
350 { DTAG_B_PART, AC_PART, 0, 0, 0 },
351 },
352 { /* 37 */
353 "UPA Port A Dtag Parity Error",
354 { DTAG_A_PART, AC_PART, 0, 0, 0 },
355 },
356 { /* 38 */
357 NULL,
358 { RSVD_PART, 0, 0, 0, 0 },
359 },
360 { /* 39 */
361 NULL,
362 { RSVD_PART, 0, 0, 0, 0 },
363 },
364 { /* 40 */
365 "UPA Bus Parity Error",
366 { UPA_PART, AC_PART, 0, 0, 0 },
367 },
368 { /* 41 */
369 "Data ID Line Mismatch",
370 { BACK_PIN_PART, BOARD_CONN_PART, AC_PART, 0, 0 },
371 },
372 { /* 42 */
373 "Arbitration Line Mismatch",
374 { BACK_PIN_PART, BOARD_CONN_PART, AC_PART, 0, 0 },
375 },
376 { /* 43 */
377 "Shared Line Parity Mismatch",
378 { BACK_PIN_PART, BOARD_CONN_PART, AC_PART, 0, 0 },
379 },
380 { /* 44 */
381 "FireTruck Control Line Parity Error",
382 { AC_PART, BACK_PIN_PART, 0, 0, 0 },
383 },
384 { /* 45 */
385 "FireTruck Address Bus Parity Error",
386 { AC_PART, BACK_PIN_PART, 0, 0, 0 },
387 },
388 { /* 46 */
389 "Internal RAM Parity Error",
390 { AC_PART, 0, 0, 0, 0 },
391 },
392 { /* 47 */
393 NULL,
394 { RSVD_PART, 0, 0, 0, 0 },
395 },
396 { /* 48 */
397 "Internal Hardware Error",
398 { AC_PART, 0, 0, 0, 0 },
399 },
400 { /* 49 */
401 "FHC Communications Error",
402 { FHC_PART, AC_PART, 0, 0, 0 },
403 },
404 /* Bits 50-63 are reserved in this implementation. */
405 };
406
407
408 #define MAX_BITS (sizeof (ac_errors)/ sizeof (ac_err))
409
410 /*
411 * There are only two error bits in the DC shadow chain that are
412 * important. They indicate an overflow error and a parity error,
413 * respectively. The other bits are not error bits and should not
414 * be checked for.
415 */
416 #define DC_OVERFLOW 0x2
417 #define DC_PARITY 0x4
418
419 static char dc_overflow_txt[] = "Board %d DC %d Overflow Error";
420 static char dc_parity_txt[] = "Board %d DC %d Parity Error";
421
422 /* defines for the sysio */
423 #define UPA_APERR 0x4
424
425 int
error_check(Sys_tree * tree,struct system_kstat_data * kstats)426 error_check(Sys_tree *tree, struct system_kstat_data *kstats)
427 {
428 int exit_code = 0; /* init to all OK */
429
430 /*
431 * silently check for any types of machine errors
432 */
433 print_flag = 0;
434 if (disp_fail_parts(tree) || disp_fault_list(tree, kstats) ||
435 disp_err_log(kstats) || disp_env_status(kstats)) {
436 /* set exit_code to show failures */
437 exit_code = 1;
438 }
439 print_flag = 1;
440
441 return (exit_code);
442 }
443
444 /*
445 * disp_fail_parts
446 *
447 * Display the failed parts in the system. This function looks for
448 * the status property in all PROM nodes. On systems where
449 * the PROM does not supports passing diagnostic information
450 * thruogh the device tree, this routine will be silent.
451 */
452 int
disp_fail_parts(Sys_tree * tree)453 disp_fail_parts(Sys_tree *tree)
454 {
455 int exit_code;
456 int system_failed = 0;
457 Board_node *bnode = tree->bd_list;
458 Prom_node *pnode;
459
460 exit_code = 0;
461
462 /* go through all of the boards looking for failed units. */
463 while (bnode != NULL) {
464 /* find failed chips */
465 pnode = find_failed_node(bnode->nodes);
466 if ((pnode != NULL) && !system_failed) {
467 system_failed = 1;
468 exit_code = 1;
469 if (print_flag == 0) {
470 return (exit_code);
471 }
472 log_printf("\n", 0);
473 log_printf(dgettext(TEXT_DOMAIN,
474 "Failed Field Replaceable Units (FRU) "
475 "in System:\n"), 0);
476 log_printf("=========================="
477 "====================\n", 0);
478 }
479
480 while (pnode != NULL) {
481 void *value;
482 char *name; /* node name string */
483 char *type; /* node type string */
484 char *board_type = NULL;
485
486 value = get_prop_val(find_prop(pnode, "status"));
487 name = get_node_name(pnode);
488
489 /* sanity check of data retreived from PROM */
490 if ((value == NULL) || (name == NULL)) {
491 pnode = next_failed_node(pnode);
492 continue;
493 }
494
495 /* Find the board type of this board */
496 if (bnode->board_type == CPU_BOARD) {
497 board_type = "CPU";
498 } else {
499 board_type = "IO";
500 }
501
502 log_printf(dgettext(TEXT_DOMAIN,
503 "%s unavailable on %s Board #%d\n"),
504 name, board_type, bnode->board_num, 0);
505
506 log_printf(dgettext(TEXT_DOMAIN,
507 "\tPROM fault string: %s\n"), value, 0);
508
509 log_printf(dgettext(TEXT_DOMAIN,
510 "\tFailed Field Replaceable Unit is "), 0);
511
512 /*
513 * Determine whether FRU is CPU module, system
514 * board, or SBus card.
515 */
516 if ((name != NULL) && (strstr(name, "sbus"))) {
517
518 log_printf(dgettext(TEXT_DOMAIN,
519 "SBus Card %d\n"),
520 get_sbus_slot(pnode), 0);
521
522 } else if (((name = get_node_name(pnode->parent)) !=
523 NULL) && (strstr(name, "pci"))) {
524
525 log_printf(dgettext(TEXT_DOMAIN,
526 "PCI Card %d"),
527 get_pci_device(pnode), 0);
528
529 } else if (((type = get_node_type(pnode)) != NULL) &&
530 (strstr(type, "cpu"))) {
531
532 log_printf(dgettext(TEXT_DOMAIN,
533 "UltraSPARC module "
534 "Board %d Module %d\n"),
535 get_id(pnode) >> 1,
536 get_id(pnode) & 0x1);
537
538 } else {
539 log_printf(dgettext(TEXT_DOMAIN,
540 "%s board %d\n"), board_type,
541 bnode->board_num, 0);
542 }
543 pnode = next_failed_node(pnode);
544 }
545 bnode = bnode->next;
546 }
547
548 if (!system_failed) {
549 log_printf("\n", 0);
550 log_printf(dgettext(TEXT_DOMAIN,
551 "No failures found in System\n"), 0);
552 log_printf("===========================\n", 0);
553 }
554
555 if (system_failed)
556 return (1);
557 else
558 return (0);
559 }
560
561 void
display_memorysize(Sys_tree * tree,struct system_kstat_data * kstats,struct grp_info * grps,struct mem_total * memory_total)562 display_memorysize(Sys_tree *tree, struct system_kstat_data *kstats,
563 struct grp_info *grps, struct mem_total *memory_total) {
564
565 /* Build the memory group tables and interleave data */
566 build_mem_tables(tree, kstats, grps);
567
568 /* display total usable installed memory */
569 get_mem_total(memory_total, grps);
570 (void) log_printf(dgettext(TEXT_DOMAIN,
571 "Memory size: %4dMb\n"), memory_total->dram, 0);
572
573 /* We display the NVSIMM size totals separately. */
574 if (memory_total->nvsimm != 0) {
575 (void) log_printf(dgettext(TEXT_DOMAIN,
576 "NVSIMM size: %4dMb\n"), memory_total->nvsimm);
577 }
578 }
579
580 /*
581 * This routine displays the memory configuration for all boards in the
582 * system.
583 */
584 void
display_memoryconf(Sys_tree * tree,struct grp_info * grps)585 display_memoryconf(Sys_tree *tree, struct grp_info *grps)
586 {
587 int group;
588 char *status_str[] = { "Unknown", " Empty ", " Failed", " Active",
589 " Spare " };
590 char *cond_str[] = { " Unknown ", " OK ", " Failing ",
591 " Failed ", " Uninit. " };
592
593 #ifdef lint
594 tree = tree;
595 #endif
596 /* Print the header for the memory section. */
597 log_printf("\n", 0);
598 log_printf("=========================", 0);
599 log_printf(dgettext(TEXT_DOMAIN, " Memory "), 0);
600 log_printf("=========================", 0);
601 log_printf("\n", 0);
602 log_printf("\n", 0);
603 log_printf(" Intrlv. "
604 "Intrlv.\n", 0);
605 log_printf("Brd Bank MB Status Condition Speed Factor "
606 " With\n", 0);
607 log_printf("--- ----- ---- ------- ---------- ----- ------- "
608 "-------\n", 0);
609
610 /* Print the Memory groups information. */
611 for (group = 0; group < MAX_GROUPS; group++) {
612 struct grp *grp;
613
614 grp = &grps->grp[group];
615
616 /* If this board is not a CPU or MEM board, skip it. */
617 if ((grp->type != MEM_BOARD) && (grp->type != CPU_BOARD)) {
618 continue;
619 }
620
621 if (grp->valid) {
622 log_printf("%2d ", grp->board, 0);
623 log_printf(" %1d ", grp->group, 0);
624 log_printf("%4d ", grp->size, 0);
625 log_printf("%7s ", status_str[grp->status], 0);
626 log_printf("%10s ", cond_str[grp->condition], 0);
627 log_printf("%3dns ", grp->speed, 0);
628 log_printf("%3d-way ", grp->factor, 0);
629 if (grp->factor > 1) {
630 log_printf("%4c", grp->groupid, 0);
631 }
632 log_printf("\n", 0);
633 }
634 }
635
636 }
637
638
639 void
display_hp_fail_fault(Sys_tree * tree,struct system_kstat_data * kstats)640 display_hp_fail_fault(Sys_tree *tree, struct system_kstat_data *kstats)
641 {
642 /* Display Hot plugged, disabled and failed boards */
643 (void) display_hp_boards(kstats);
644
645 /* Display failed units */
646 (void) disp_fail_parts(tree);
647
648 /* Display fault info */
649 (void) disp_fault_list(tree, kstats);
650 }
651
652 void
display_diaginfo(int flag,Prom_node * root,Sys_tree * tree,struct system_kstat_data * kstats)653 display_diaginfo(int flag, Prom_node *root, Sys_tree *tree,
654 struct system_kstat_data *kstats)
655 {
656 /*
657 * Now display the last powerfail time and the fatal hardware
658 * reset information. We do this under a couple of conditions.
659 * First if the user asks for it. The second is iof the user
660 * told us to do logging, and we found a system failure.
661 */
662 if (flag) {
663 /*
664 * display time of latest powerfail. Not all systems
665 * have this capability. For those that do not, this
666 * is just a no-op.
667 */
668 disp_powerfail(root);
669
670 /* Display system environmental conditions. */
671 (void) disp_env_status(kstats);
672
673 /* Display ASIC Chip revs for all boards. */
674 sunfire_disp_asic_revs(tree, kstats);
675
676 /* Print the PROM revisions here */
677 sunfire_disp_prom_versions(tree);
678
679 /*
680 * Display the latest system fatal hardware
681 * error data, if any. The system holds this
682 * data in SRAM, so it does not persist
683 * across power-on resets.
684 */
685 (void) disp_err_log(kstats);
686 }
687 }
688
689 void
display_mid(int mid)690 display_mid(int mid)
691 {
692 log_printf(" %2d ", mid % 2, 0);
693 }
694
695 /*
696 * display_pci
697 * Call the generic psycho version of this function.
698 */
699 void
display_pci(Board_node * board)700 display_pci(Board_node *board)
701 {
702 display_psycho_pci(board);
703 }
704
705 /*
706 * display_ffb
707 * Display all FFBs on this board. It can either be in tabular format,
708 * or a more verbose format.
709 */
710 void
display_ffb(Board_node * board,int table)711 display_ffb(Board_node *board, int table)
712 {
713 Prom_node *ffb;
714 void *value;
715 struct io_card *card_list = NULL;
716 struct io_card card;
717
718 if (board == NULL)
719 return;
720
721 /* Fill in common information */
722 card.display = 1;
723 card.board = board->board_num;
724 (void) sprintf(card.bus_type, "UPA");
725 card.freq = sys_clk;
726
727 for (ffb = dev_find_node(board->nodes, FFB_NAME); ffb != NULL;
728 ffb = dev_next_node(ffb, FFB_NAME)) {
729 if (table == 1) {
730 /* Print out in table format */
731
732 /* XXX - Get the slot number (hack) */
733 card.slot = get_id(ffb);
734
735 /* Find out if it's single or double buffered */
736 (void) sprintf(card.name, "FFB");
737 value = get_prop_val(find_prop(ffb, "board_type"));
738 if (value != NULL)
739 if ((*(int *)value) & FFB_B_BUFF)
740 (void) sprintf(card.name, "FFB, "
741 "Double Buffered");
742 else
743 (void) sprintf(card.name, "FFB, "
744 "Single Buffered");
745
746 /* Print model number */
747 card.model[0] = '\0';
748 value = get_prop_val(find_prop(ffb, "model"));
749 if (value != NULL)
750 (void) sprintf(card.model, "%s",
751 (char *)value);
752
753 card_list = insert_io_card(card_list, &card);
754 } else {
755 /* print in long format */
756 char device[MAXSTRLEN];
757 int fd = -1;
758 struct dirent *direntp;
759 DIR *dirp;
760 union strap_un strap;
761 struct ffb_sys_info fsi;
762
763 /* Find the device node using upa address */
764 value = get_prop_val(find_prop(ffb, "upa-portid"));
765 if (value == NULL)
766 continue;
767
768 (void) sprintf(device, "%s@%x", FFB_NAME,
769 *(int *)value);
770 if ((dirp = opendir("/devices")) == NULL)
771 continue;
772
773 while ((direntp = readdir(dirp)) != NULL) {
774 if (strstr(direntp->d_name, device) != NULL) {
775 (void) sprintf(device, "/devices/%s",
776 direntp->d_name);
777 fd = open(device, O_RDWR, 0666);
778 break;
779 }
780 }
781 (void) closedir(dirp);
782
783 if (fd == -1)
784 continue;
785
786 if (ioctl(fd, FFB_SYS_INFO, &fsi) < 0)
787 continue;
788
789 log_printf("Board %d FFB Hardware Configuration:\n",
790 board->board_num, 0);
791 log_printf("-----------------------------------\n", 0);
792
793 strap.ffb_strap_bits = fsi.ffb_strap_bits;
794 log_printf("\tBoard rev: %d\n",
795 (int)strap.fld.board_rev, 0);
796 log_printf("\tFBC version: 0x%x\n", fsi.fbc_version, 0);
797 log_printf("\tDAC: %s\n",
798 fmt_manf_id(fsi.dac_version, device), 0);
799 log_printf("\t3DRAM: %s\n",
800 fmt_manf_id(fsi.fbram_version, device), 0);
801 log_printf("\n", 0);
802 }
803 }
804
805 display_io_cards(card_list);
806 free_io_cards(card_list);
807 }
808
809 /*
810 * add_node
811 *
812 * This function adds a board node to the board structure where that
813 * that node's physical component lives.
814 */
815 void
add_node(Sys_tree * root,Prom_node * pnode)816 add_node(Sys_tree *root, Prom_node *pnode)
817 {
818 int board;
819 Board_node *bnode;
820 char *name = get_node_name(pnode);
821 Prom_node *p;
822
823 /* add this node to the Board list of the appropriate board */
824 if ((board = get_board_num(pnode)) == -1) {
825 void *value;
826
827 /*
828 * if it is a server, pci nodes and ffb nodes never have
829 * board number properties and software can find the board
830 * number from the reg property. It is derived from the
831 * high word of the 'reg' property, which contains the
832 * mid.
833 */
834 if ((name != NULL) &&
835 ((strcmp(name, FFB_NAME) == 0) ||
836 (strcmp(name, "pci") == 0) ||
837 (strcmp(name, "counter-timer") == 0))) {
838 /* extract the board number from the 'reg' prop. */
839 if ((value = get_prop_val(find_prop(pnode,
840 "reg"))) == NULL) {
841 (void) printf("add_node() no reg property\n");
842 exit(2);
843 }
844 board = (*(int *)value - 0x1c0) / 4;
845 }
846 }
847
848 /* find the node with the same board number */
849 if ((bnode = find_board(root, board)) == NULL) {
850 bnode = insert_board(root, board);
851 bnode->board_type = UNKNOWN_BOARD;
852 }
853
854 /* now attach this prom node to the board list */
855 /* Insert this node at the end of the list */
856 pnode->sibling = NULL;
857 if (bnode->nodes == NULL)
858 bnode->nodes = pnode;
859 else {
860 p = bnode->nodes;
861 while (p->sibling != NULL)
862 p = p->sibling;
863 p->sibling = pnode;
864 }
865
866 }
867
868 /*
869 * Function resolve_board_types
870 *
871 * After the tree is walked and all the information is gathered, this
872 * function is called to resolve the type of each board.
873 */
874 void
resolve_board_types(Sys_tree * tree)875 resolve_board_types(Sys_tree *tree)
876 {
877 Board_node *bnode;
878 Prom_node *pnode;
879 char *type;
880
881 bnode = tree->bd_list;
882 while (bnode != NULL) {
883 bnode->board_type = UNKNOWN_BOARD;
884
885 pnode = dev_find_node(bnode->nodes, "fhc");
886 type = get_prop_val(find_prop(pnode, "board-type"));
887 if (type == NULL) {
888 bnode = bnode->next;
889 continue;
890 }
891
892 if (strcmp(type, CPU_BD_NAME) == 0) {
893 bnode->board_type = CPU_BOARD;
894 } else if (strcmp(type, MEM_BD_NAME) == 0) {
895 bnode->board_type = MEM_BOARD;
896 } else if (strcmp(type, DISK_BD_NAME) == 0) {
897 bnode->board_type = DISK_BOARD;
898 } else if (strcmp(type, IO_SBUS_FFB_BD_NAME) == 0) {
899 bnode->board_type = IO_SBUS_FFB_BOARD;
900 } else if (strcmp(type, IO_2SBUS_BD_NAME) == 0) {
901 bnode->board_type = IO_2SBUS_BOARD;
902 } else if (strcmp(type, IO_PCI_BD_NAME) == 0) {
903 bnode->board_type = IO_PCI_BOARD;
904 } else if (strcmp(type, IO_2SBUS_SOCPLUS_BD_NAME) == 0) {
905 bnode->board_type = IO_2SBUS_SOCPLUS_BOARD;
906 } else if (strcmp(type, IO_SBUS_FFB_SOCPLUS_BD_NAME) == 0) {
907 bnode->board_type = IO_SBUS_FFB_SOCPLUS_BOARD;
908 }
909
910 bnode = bnode->next;
911 }
912
913 }
914
915 /*
916 * local functions
917 */
918
919 static void
sunfire_disp_prom_versions(Sys_tree * tree)920 sunfire_disp_prom_versions(Sys_tree *tree)
921 {
922 Board_node *bnode;
923
924 /* Display Prom revision header */
925 log_printf("System Board PROM revisions:\n", 0);
926 log_printf("----------------------------\n", 0);
927
928 /* For each board, print the POST and OBP versions */
929 for (bnode = tree->bd_list; bnode != NULL; bnode = bnode->next) {
930 Prom_node *flashprom; /* flashprom device node */
931
932 /* find a flashprom node for this board */
933 flashprom = dev_find_node(bnode->nodes, "flashprom");
934
935 /* If no flashprom node found, continue */
936 if (flashprom == NULL)
937 continue;
938
939 /* flashprom node found, display board# */
940 log_printf("Board %2d: ", bnode->board_num, 0);
941
942 disp_prom_version(flashprom);
943 }
944 }
945
946
947 /*
948 * functions that are only needed inside this library
949 */
950
951 /*
952 * build_mem_tables
953 *
954 * This routine builds the memory table which tells how much memory
955 * is present in each SIMM group of each board, what the interleave
956 * factors are, and the group ID of the interleave group.
957 *
958 * The algorithms used are:
959 * First fill in the sizes of groups.
960 * Next build lists of all groups with same physical base.
961 * From #of members in each list, interleave factor is
962 * determined.
963 * All members of a certain list get the same interleave
964 * group ID.
965 */
966 static void
build_mem_tables(Sys_tree * tree,struct system_kstat_data * kstats,struct grp_info * grps)967 build_mem_tables(Sys_tree *tree,
968 struct system_kstat_data *kstats,
969 struct grp_info *grps)
970 {
971 struct mem_inter inter_grps; /* temp structure for interleaves */
972 struct inter_grp *intrp;
973 int group;
974 int i;
975
976 /* initialize the interleave lists */
977 for (i = 0, intrp = &inter_grps.i_grp[0]; i < MAX_GROUPS; i++,
978 intrp++) {
979 intrp->valid = 0;
980 intrp->count = 0;
981 intrp->groupid = '\0';
982 intrp->base = 0;
983 }
984
985 for (group = 0; group < MAX_GROUPS; group++) {
986 int found;
987 int board;
988 struct grp *grp;
989 struct bd_kstat_data *bksp;
990 uchar_t simm_reg;
991 Board_node *bnode;
992
993 board = group/2;
994 bksp = &kstats->bd_ksp_list[board];
995 grp = &grps->grp[group];
996 grp->group = group % 2;
997
998 /*
999 * Copy the board type field into the group record.
1000 */
1001 if ((bnode = find_board(tree, board)) != NULL) {
1002 grp->type = bnode->board_type;
1003 } else {
1004 grp->type = UNKNOWN_BOARD;
1005 continue;
1006 }
1007
1008 /* Make sure we have kstats for this board */
1009 if (bksp->ac_kstats_ok == 0) {
1010 /* Mark this group as invalid and move to next one */
1011 grp->valid = 0;
1012 continue;
1013 }
1014
1015 /* Find the bank status property */
1016 if (bksp->ac_memstat_ok) {
1017 grp->status = bksp->mem_stat[grp->group].status;
1018 grp->condition = bksp->mem_stat[grp->group].condition;
1019 } else {
1020 grp->status = StUnknown;
1021 grp->condition = ConUnknown;
1022 }
1023
1024 switch (grp->status) {
1025 case StBad:
1026 case StActive:
1027 case StSpare:
1028 break;
1029 default:
1030 grp->status = StUnknown;
1031 break;
1032 }
1033
1034 switch (grp->condition) {
1035 case ConOK:
1036 case ConFailing:
1037 case ConFailed:
1038 case ConTest:
1039 case ConBad:
1040 break;
1041 default:
1042 grp->condition = ConUnknown;
1043 break;
1044 }
1045
1046 /* base the group size off of the simmstat kstat. */
1047 if (bksp->simmstat_kstats_ok == 0) {
1048 grp->valid = 0;
1049 continue;
1050 }
1051
1052 /* Is it bank 0 or bank 1 */
1053 if (grp->group == 0) {
1054 simm_reg = bksp->simm_status[0];
1055 } else {
1056 simm_reg = bksp->simm_status[1];
1057 }
1058
1059 /* Now decode the size field. */
1060 switch (simm_reg & 0x1f) {
1061 case MEM_SIZE_64M:
1062 grp->size = 64;
1063 break;
1064 case MEM_SIZE_256M:
1065 grp->size = 256;
1066 break;
1067 case MEM_SIZE_1G:
1068 grp->size = 1024;
1069 break;
1070 case MEM_SIZE_2G:
1071 grp->size = 2048;
1072 break;
1073 default:
1074 grp->valid = 0;
1075 continue;
1076 }
1077
1078 /* Decode the speed field */
1079 switch ((simm_reg & 0x60) >> 5) {
1080 case MEM_SPEED_50ns:
1081 grp->speed = 50;
1082 break;
1083 case MEM_SPEED_60ns:
1084 grp->speed = 60;
1085 break;
1086 case MEM_SPEED_70ns:
1087 grp->speed = 70;
1088 break;
1089 case MEM_SPEED_80ns:
1090 grp->speed = 80;
1091 break;
1092 }
1093
1094 grp->valid = 1;
1095 grp->base = GRP_BASE(bksp->ac_memdecode[grp->group]);
1096 grp->board = board;
1097 if (grp->group == 0) {
1098 grp->factor = INTLV0(bksp->ac_memctl);
1099 } else { /* assume it is group 1 */
1100 grp->factor = INTLV1(bksp->ac_memctl);
1101 }
1102 grp->groupid = '\0'; /* Not in a group yet */
1103
1104 /*
1105 * find the interleave list this group belongs on. If the
1106 * interleave list corresponding to this base address is
1107 * not found, then create a new one.
1108 */
1109
1110 i = 0;
1111 intrp = &inter_grps.i_grp[0];
1112 found = 0;
1113 while ((i < MAX_GROUPS) && !found && (intrp->valid != 0)) {
1114 if ((intrp->valid != 0) &&
1115 (intrp->base == grp->base)) {
1116 grp->groupid = intrp->groupid;
1117 intrp->count++;
1118 found = 1;
1119 }
1120 i++;
1121 intrp++;
1122 }
1123 /*
1124 * We did not find a matching base. So now i and intrp
1125 * now point to the next interleave group in the list.
1126 */
1127 if (!found) {
1128 intrp->count++;
1129 intrp->valid = 1;
1130 intrp->groupid = 'A' + (char)i;
1131 intrp->base = grp->base;
1132 grp->groupid = intrp->groupid;
1133 }
1134 }
1135 }
1136
1137
1138 static void
get_mem_total(struct mem_total * mem_total,struct grp_info * grps)1139 get_mem_total(struct mem_total *mem_total, struct grp_info *grps)
1140 {
1141 struct grp *grp;
1142 int i;
1143
1144 /* Start with total of zero */
1145 mem_total->dram = 0;
1146 mem_total->nvsimm = 0;
1147
1148 /* For now we ignore NVSIMMs. We might want to fix this later. */
1149 for (i = 0, grp = &grps->grp[0]; i < MAX_GROUPS; i++, grp++) {
1150 if (grp->valid == 1 && grp->status == StActive) {
1151 mem_total->dram += grp->size;
1152 }
1153 }
1154 }
1155
1156 static int
disp_fault_list(Sys_tree * tree,struct system_kstat_data * kstats)1157 disp_fault_list(Sys_tree *tree, struct system_kstat_data *kstats)
1158 {
1159 struct ft_list *ftp;
1160 int i;
1161 int result = 0;
1162 time_t t;
1163
1164 if (!kstats->ft_kstat_ok) {
1165 return (result);
1166 }
1167
1168 for (i = 0, ftp = kstats->ft_array; i < kstats->nfaults; i++, ftp++) {
1169 if (!result) {
1170 log_printf("\n", 0);
1171 log_printf("Detected System Faults\n", 0);
1172 log_printf("======================\n", 0);
1173 }
1174 result = 1;
1175 if (ftp->fclass == FT_BOARD) {
1176 log_printf("Board %d fault: %s\n", ftp->unit,
1177 ftp->msg, 0);
1178
1179 /*
1180 * If the fault on this board is PROM inherited, see
1181 * if we can find some failed component information
1182 * in the PROM device tree. The general solution
1183 * would be to fix the fhc driver and have it put in
1184 * more descriptive messages, but that's for another
1185 * day.
1186 */
1187
1188 if (ftp->type == FT_PROM) {
1189 Board_node *bn;
1190 Prom_node *pn;
1191 char *str;
1192
1193 bn = find_board(tree, ftp->unit);
1194 /*
1195 * If any nodes under this board have a
1196 * status containing "fail", print it out.
1197 */
1198 pn = find_failed_node(bn->nodes);
1199 while (pn) {
1200 str = get_prop_val(find_prop(pn,
1201 "status"));
1202 if (str != NULL) {
1203 log_printf("Fault: %s\n", str,
1204 0);
1205 }
1206
1207 pn = next_failed_node(pn);
1208 }
1209 }
1210 } else if ((ftp->type == FT_CORE_PS) || (ftp->type == FT_PPS)) {
1211 log_printf("Unit %d %s failure\n", ftp->unit,
1212 ftp->msg, 0);
1213 } else if ((ftp->type == FT_OVERTEMP) &&
1214 (ftp->fclass == FT_SYSTEM)) {
1215 log_printf("Clock board %s\n", ftp->msg, 0);
1216 } else {
1217 log_printf("%s failure\n", ftp->msg, 0);
1218 }
1219
1220 t = (time_t)ftp->create_time;
1221 log_printf("\tDetected %s",
1222 asctime(localtime(&t)), 0);
1223 }
1224
1225 if (!result) {
1226 log_printf("\n", 0);
1227 log_printf("No System Faults found\n", 0);
1228 log_printf("======================\n", 0);
1229 }
1230
1231 log_printf("\n", 0);
1232
1233 return (result);
1234 }
1235
1236
1237 /*
1238 * disp_err_log
1239 *
1240 * Display the fatal hardware reset system error logs. These logs are
1241 * collected by POST and passed up through the kernel to userland.
1242 * They will not necessarily be present in all systems. Their form
1243 * might also be different in different systems.
1244 *
1245 * NOTE - We are comparing POST defined board types here. Do not confuse
1246 * them with kernel board types. The structure being analyzed in this
1247 * function is created by POST. All the defines for it are in reset_info.h,
1248 * which was ported from POST header files.
1249 */
1250 static int
disp_err_log(struct system_kstat_data * kstats)1251 disp_err_log(struct system_kstat_data *kstats)
1252 {
1253 int exit_code = 0;
1254 int i;
1255 struct reset_info *rst_info;
1256 struct board_info *bdp;
1257 char *err_msgs[MAX_MSGS]; /* holds all messages for a system board */
1258 int msg_idx; /* current msg number */
1259 int count; /* number added by last analyze call */
1260 char **msgs;
1261
1262 /* start by initializing the err_msgs array to all NULLs */
1263 for (i = 0; i < MAX_MSGS; i++) {
1264 err_msgs[i] = NULL;
1265 }
1266
1267 /* First check to see that the reset-info kstats are present. */
1268 if (kstats->reset_kstats_ok == 0) {
1269 return (exit_code);
1270 }
1271
1272 rst_info = &kstats->reset_info;
1273
1274 /* Everything is OK, so print out time/date stamp first */
1275 log_printf("\n", 0);
1276 log_printf(
1277 dgettext(TEXT_DOMAIN,
1278 "Analysis of most recent Fatal Hardware Watchdog:\n"),
1279 0);
1280 log_printf("======================================================\n",
1281 0);
1282 log_printf("Log Date: %s\n",
1283 get_time(&kstats->reset_info.tod_timestamp[0]), 0);
1284
1285 /* initialize the vector and the message index. */
1286 msgs = err_msgs;
1287 msg_idx = 0;
1288
1289 /* Loop Through all of the boards. */
1290 bdp = &rst_info->bd_reset_info[0];
1291 for (i = 0; i < MAX_BOARDS; i++, bdp++) {
1292
1293 /* Is there data for this board? */
1294 if ((bdp->board_desc & BD_STATE_MASK) == BD_NOT_PRESENT) {
1295 continue;
1296 }
1297
1298 /* If it is a CPU Board, look for CPU data. */
1299 if (BOARD_TYPE(bdp->board_desc) == CPU_TYPE) {
1300 /* analyze CPU 0 if present */
1301 if (bdp->board_desc & CPU0_OK) {
1302 count = analyze_cpu(msgs, 0,
1303 bdp->cpu[0].afsr);
1304 msgs += count;
1305 msg_idx += count;
1306 }
1307
1308 /* analyze CPU1 if present. */
1309 if (bdp->board_desc & CPU1_OK) {
1310 count = analyze_cpu(msgs, 1,
1311 bdp->cpu[1].afsr);
1312 msgs += count;
1313 msg_idx += count;
1314 }
1315 }
1316
1317 /* Always Analyze the AC and the DCs on a board. */
1318 count = analyze_ac(msgs, bdp->ac_error_status);
1319 msgs += count;
1320 msg_idx += count;
1321
1322 count = analyze_dc(i, msgs, bdp->dc_shadow_chain);
1323 msgs += count;
1324 msg_idx += count;
1325
1326 if (msg_idx != 0)
1327 display_msgs(err_msgs, i);
1328
1329 erase_msgs(err_msgs);
1330
1331 /* If any messages are logged, we have errors */
1332 if (msg_idx != 0) {
1333 exit_code = 1;
1334 }
1335
1336 /* reset the vector and the message index */
1337 msg_idx = 0;
1338 msgs = &err_msgs[0];
1339 }
1340
1341 return (exit_code);
1342 }
1343
1344 static void
erase_msgs(char ** msgs)1345 erase_msgs(char **msgs)
1346 {
1347 int i;
1348
1349 for (i = 0; (*msgs != NULL) && (i < MAX_MSGS); i++, msgs++) {
1350 free(*msgs);
1351 *msgs = NULL;
1352 }
1353 }
1354
1355
1356 static void
display_msgs(char ** msgs,int board)1357 display_msgs(char **msgs, int board)
1358 {
1359 int i;
1360
1361 /* display the header for this board */
1362 print_header(board);
1363
1364 for (i = 0; (*msgs != NULL) && (i < MAX_MSGS); i++, msgs++) {
1365 log_printf(*msgs, 0);
1366 }
1367 }
1368
1369
1370
1371 /*
1372 * disp_keysw_and_leds
1373 *
1374 * This routine displays the position of the keyswitch and the front panel
1375 * system LEDs. The keyswitch can be in either normal, diagnostic, or
1376 * secure position. The three front panel LEDs are of importance because
1377 * the center LED indicates component failure on the system.
1378 */
1379 static int
disp_keysw_and_leds(struct system_kstat_data * kstats)1380 disp_keysw_and_leds(struct system_kstat_data *kstats)
1381 {
1382 int board;
1383 int diag_mode = 0;
1384 int secure_mode = 0;
1385 int result = 0;
1386
1387 /* Check the first valid board to determeine the diag bit */
1388 /* Find the first valid board */
1389 for (board = 0; board < MAX_BOARDS; board++) {
1390 if (kstats->bd_ksp_list[board].fhc_kstats_ok != 0) {
1391 /* If this was successful, break out of loop */
1392 if ((kstats->bd_ksp_list[board].fhc_bsr &
1393 FHC_DIAG_MODE) == 0)
1394 diag_mode = 1;
1395 break;
1396 }
1397 }
1398
1399 /*
1400 * Check the register on the clock-board to determine the
1401 * secure bit.
1402 */
1403 if (kstats->sys_kstats_ok) {
1404 /* The secure bit is negative logic. */
1405 if (kstats->keysw_status == KEY_SECURE) {
1406 secure_mode = 1;
1407 }
1408 }
1409
1410 /*
1411 * The system cannot be in diag and secure mode. This is
1412 * illegal.
1413 */
1414 if (secure_mode && diag_mode) {
1415 result = 2;
1416 return (result);
1417 }
1418
1419 /* Now print the keyswitch position. */
1420 log_printf("Keyswitch position is in ", 0);
1421
1422 if (diag_mode) {
1423 log_printf("Diagnostic Mode\n");
1424 } else if (secure_mode) {
1425 log_printf("Secure Mode\n", 0);
1426 } else {
1427 log_printf("Normal Mode\n");
1428 }
1429
1430 /* display the redundant power status */
1431 if (kstats->sys_kstats_ok) {
1432 log_printf("System Power Status: ", 0);
1433
1434 switch (kstats->power_state) {
1435 case REDUNDANT:
1436 log_printf("Redundant\n", 0);
1437 break;
1438
1439 case MINIMUM:
1440 log_printf("Minimum Available\n", 0);
1441 break;
1442
1443 case BELOW_MINIMUM:
1444 log_printf("Insufficient Power Available\n", 0);
1445 break;
1446
1447 default:
1448 log_printf("Unknown\n", 0);
1449 break;
1450 }
1451 }
1452
1453 if (kstats->sys_kstats_ok) {
1454 /*
1455 * If the center LED is on, then we return a non-zero
1456 * result.
1457 */
1458 log_printf("System LED Status: GREEN YELLOW "
1459 "GREEN\n", 0);
1460 if ((kstats->sysctrl & SYS_LED_MID) != 0) {
1461 log_printf("WARNING ", 0);
1462 } else {
1463 log_printf("Normal ", 0);
1464 }
1465
1466 /*
1467 * Left LED is negative logic, center and right LEDs
1468 * are positive logic.
1469 */
1470 if ((kstats->sysctrl & SYS_LED_LEFT) == 0) {
1471 log_printf("ON ", 0);
1472 } else {
1473 log_printf("OFF", 0);
1474 }
1475
1476 log_printf(" ", 0);
1477 if ((kstats->sysctrl & SYS_LED_MID) != 0) {
1478 log_printf("ON ", 0);
1479 } else {
1480 log_printf("OFF", 0);
1481 }
1482
1483 log_printf(" BLINKING", 0);
1484 }
1485
1486 log_printf("\n", 0);
1487 return (result);
1488 }
1489
1490 /*
1491 * disp_env_status
1492 *
1493 * This routine displays the environmental status passed up from
1494 * device drivers via kstats. The kstat names are defined in
1495 * kernel header files included by this module.
1496 */
1497 static int
disp_env_status(struct system_kstat_data * kstats)1498 disp_env_status(struct system_kstat_data *kstats)
1499 {
1500 struct bd_kstat_data *bksp;
1501 int exit_code = 0;
1502 int i;
1503 uchar_t curr_temp;
1504 int is4slot = 0;
1505
1506 /*
1507 * Define some message arrays to make life simpler. These
1508 * messages correspond to definitions in <sys/fhc.c> for
1509 * temperature trend (enum temp_trend) and temperature state
1510 * (enum temp_state).
1511 */
1512 static char *temp_trend_msg[] = { "unknown",
1513 "rapidly falling",
1514 "falling",
1515 "stable",
1516 "rising",
1517 "rapidly rising",
1518 "unknown (noisy)"
1519 };
1520 static char *temp_state_msg[] = { " OK ",
1521 "WARNING ",
1522 " DANGER "
1523 };
1524
1525 log_printf("\n", 0);
1526 log_printf("=========================", 0);
1527 log_printf(dgettext(TEXT_DOMAIN, " Environmental Status "), 0);
1528 log_printf("=========================", 0);
1529 log_printf("\n", 0);
1530
1531 exit_code = disp_keysw_and_leds(kstats);
1532
1533 if (!kstats->sys_kstats_ok) {
1534 log_printf(dgettext(TEXT_DOMAIN,
1535 "*** Error: Unavailable ***\n\n"));
1536 return (1);
1537 }
1538
1539 /*
1540 * for purposes within this routine,
1541 * 5 slot behaves the same as a 4 slot
1542 */
1543 if (SYS_TYPE(kstats->sysstat1) == SYS_4_SLOT)
1544 is4slot = 1;
1545
1546 log_printf("\n", 0);
1547 log_printf("\nFans:\n", 0);
1548 log_printf("-----\n", 0);
1549
1550 log_printf("Unit Status\n", 0);
1551 log_printf("---- ------\n", 0);
1552
1553 log_printf("%-4s ", is4slot ? "Disk" : "Rack", 0);
1554 /* Check the status of the Rack Fans */
1555 if ((kstats->fan_status & SYS_RACK_FANFAIL) == 0) {
1556 log_printf("OK\n", 0);
1557 } else {
1558 log_printf("FAIL\n", 0);
1559 exit_code = 1;
1560 }
1561
1562 if (!is4slot) {
1563 /*
1564 * keyswitch and ac box are on 8 & 16 slot only
1565 */
1566 /* Check the status of the Keyswitch Fan assembly. */
1567 log_printf("%-4s ", "Key", 0);
1568 if ((kstats->fan_status & SYS_KEYSW_FAN_OK) != 0) {
1569 log_printf("OK\n", 0);
1570 } else {
1571 log_printf("FAIL\n", 0);
1572 exit_code = 1;
1573 }
1574
1575 log_printf("%-4s ", "AC", 0);
1576 if ((kstats->fan_status & SYS_AC_FAN_OK) != 0) {
1577 log_printf("OK\n", 0);
1578 } else {
1579 log_printf("FAIL\n", 0);
1580 exit_code = 1;
1581 }
1582 } else {
1583 /*
1584 * peripheral fan is on 4 slot only
1585 * XXX might want to indicate transient states too
1586 */
1587 if (kstats->psstat_kstat_ok) {
1588 if (kstats->ps_shadow[SYS_P_FAN_INDEX] == PS_OK) {
1589 log_printf("PPS OK\n", 0);
1590 } else if (kstats->ps_shadow[SYS_P_FAN_INDEX] ==
1591 PS_FAIL) {
1592 log_printf("PPS FAIL\n", 0);
1593 exit_code = 1;
1594 }
1595 }
1596 }
1597
1598 log_printf("\n", 0);
1599
1600
1601 log_printf("System Temperatures (Celsius):\n", 0);
1602 log_printf("------------------------------\n", 0);
1603 log_printf("Brd State Current Min Max Trend\n", 0);
1604 log_printf("--- ------- ------- --- --- -----\n", 0);
1605
1606 for (i = 0, bksp = &kstats->bd_ksp_list[0]; i < MAX_BOARDS;
1607 i++, bksp++) {
1608
1609 /* Make sure we have kstats for this board first */
1610 if (!bksp->temp_kstat_ok) {
1611 continue;
1612 }
1613 log_printf("%2d ", i, 0);
1614
1615 /* Print the current state of the temperature */
1616 log_printf("%s", temp_state_msg[bksp->tempstat.state], 0);
1617 /* Set exit code for WARNING and DANGER */
1618 if (bksp->tempstat.state != 0)
1619 exit_code = 1;
1620
1621 /* Print the current temperature */
1622 curr_temp = bksp->tempstat.l1[bksp->tempstat.index % L1_SZ];
1623 log_printf(" %2d ", curr_temp, 0);
1624
1625 /* Print the minimum recorded temperature */
1626 log_printf(" %2d ", bksp->tempstat.min, 0);
1627
1628 /* Print the maximum recorded temperature */
1629 log_printf(" %2d ", bksp->tempstat.max, 0);
1630
1631 /* Print the current trend in temperature (if available) */
1632 if (bksp->tempstat.version < 2)
1633 log_printf("unknown\n", 0);
1634 else
1635 log_printf("%s\n", temp_trend_msg[bksp->tempstat.trend], 0);
1636 }
1637 if (kstats->temp_kstat_ok) {
1638 log_printf("CLK ", 0);
1639
1640 /* Print the current state of the temperature */
1641 log_printf("%s", temp_state_msg[kstats->tempstat.state], 0);
1642 /* Set exit code for WARNING or DANGER */
1643 if (kstats->tempstat.state != 0)
1644 exit_code = 1;
1645
1646 /* Print the current temperature */
1647 curr_temp = kstats->tempstat.l1[kstats->tempstat.index % L1_SZ];
1648 log_printf(" %2d ", curr_temp, 0);
1649
1650 /* Print the minimum recorded temperature */
1651 log_printf(" %2d ", kstats->tempstat.min, 0);
1652
1653 /* Print the maximum recorded temperature */
1654 log_printf(" %2d ", kstats->tempstat.max, 0);
1655
1656 /* Print the current trend in temperature (if available) */
1657 if (kstats->tempstat.version < 2)
1658 log_printf("unknown\n\n", 0);
1659 else
1660 log_printf("%s\n\n",
1661 temp_trend_msg[kstats->tempstat.trend], 0);
1662 } else {
1663 log_printf("\n");
1664 }
1665
1666 log_printf("\n", 0);
1667 log_printf("Power Supplies:\n", 0);
1668 log_printf("---------------\n", 0);
1669 log_printf("Supply Status\n", 0);
1670 log_printf("--------- ------\n", 0);
1671 if (kstats->psstat_kstat_ok) {
1672 for (i = 0; i < SYS_PS_COUNT; i++) {
1673 char *ps, *state;
1674
1675 /* skip core power supplies that are not present */
1676 if (i <= SYS_PPS0_INDEX && kstats->ps_shadow[i] ==
1677 PS_OUT)
1678 continue;
1679
1680 /* Display the unit Number */
1681 switch (i) {
1682 case 0: ps = "0"; break;
1683 case 1: ps = "1"; break;
1684 case 2: ps = "2"; break;
1685 case 3: ps = "3"; break;
1686 case 4: ps = "4"; break;
1687 case 5: ps = "5"; break;
1688 case 6: ps = "6"; break;
1689 case 7: ps = is4slot ? "2nd PPS" : "7"; break;
1690
1691 case SYS_PPS0_INDEX: ps = "PPS"; break;
1692 case SYS_CLK_33_INDEX: ps = " System 3.3v"; break;
1693 case SYS_CLK_50_INDEX: ps = " System 5.0v"; break;
1694 case SYS_V5_P_INDEX: ps = " Peripheral 5.0v"; break;
1695 case SYS_V12_P_INDEX: ps = " Peripheral 12v"; break;
1696 case SYS_V5_AUX_INDEX: ps = " Auxiliary 5.0v"; break;
1697 case SYS_V5_P_PCH_INDEX: ps =
1698 " Peripheral 5.0v precharge";
1699 break;
1700 case SYS_V12_P_PCH_INDEX: ps =
1701 " Peripheral 12v precharge";
1702 break;
1703 case SYS_V3_PCH_INDEX: ps =
1704 " System 3.3v precharge"; break;
1705 case SYS_V5_PCH_INDEX: ps =
1706 " System 5.0v precharge"; break;
1707
1708 /* skip the peripheral fan here */
1709 case SYS_P_FAN_INDEX:
1710 continue;
1711 }
1712
1713 /* what is the state? */
1714 switch (kstats->ps_shadow[i]) {
1715 case PS_OK:
1716 state = "OK";
1717 break;
1718
1719 case PS_FAIL:
1720 state = "FAIL";
1721 exit_code = 1;
1722 break;
1723
1724 /* XXX is this an exit_code condition? */
1725 case PS_OUT:
1726 state = "PPS Out";
1727 exit_code = 1;
1728 break;
1729
1730 case PS_UNKNOWN:
1731 state = "Unknown";
1732 break;
1733
1734 default:
1735 state = "Illegal State";
1736 break;
1737 }
1738
1739 log_printf("%-32s %s\n", ps, state, 0);
1740 }
1741 }
1742
1743 /* Check status of the system AC Power Source */
1744 log_printf("%-32s ", "AC Power", 0);
1745 if ((kstats->sysstat2 & SYS_AC_FAIL) == 0) {
1746 log_printf("OK\n", 0);
1747 } else {
1748 log_printf("failed\n", 0);
1749 exit_code = 1;
1750 }
1751 log_printf("\n", 0);
1752
1753 return (exit_code);
1754 }
1755
1756
1757 /*
1758 * Many of the ASICs present in fusion machines have implementation and
1759 * version numbers stored in the OBP device tree. These codes are displayed
1760 * in this routine in an effort to aid Engineering and Field service
1761 * in detecting old ASICs which may have bugs in them.
1762 */
1763 static void
sunfire_disp_asic_revs(Sys_tree * tree,struct system_kstat_data * kstats)1764 sunfire_disp_asic_revs(Sys_tree *tree, struct system_kstat_data *kstats)
1765 {
1766 Board_node *bnode;
1767 Prom_node *pnode;
1768 int isplusbrd;
1769 char *board_str[] = { "Uninitialized", "Unknown", "CPU",
1770 "Memory", "Dual-SBus", "UPA-SBus",
1771 "Dual-PCI", "Disk", "Clock",
1772 "Dual-SBus-SOC+", "UPA-SBus-SOC+"};
1773
1774 /* Print the header */
1775 log_printf("\n", 0);
1776 log_printf("=========================", 0);
1777 log_printf(" HW Revisions ", 0);
1778 log_printf("=========================", 0);
1779 log_printf("\n", 0);
1780 log_printf("\n", 0);
1781
1782 /* Else this is a Sunfire or campfire */
1783 log_printf("ASIC Revisions:\n", 0);
1784 log_printf("---------------\n", 0);
1785
1786 /* Display Firetruck ASIC Revisions first */
1787 log_printf("Brd FHC AC SBus0 SBus1 PCI0 PCI1 FEPS", 0);
1788 log_printf(" Board Type Attributes", 0);
1789 log_printf("\n", 0);
1790 log_printf("--- --- -- ----- ----- ---- ---- ----", 0);
1791 log_printf(" ---------- ----------", 0);
1792 log_printf("\n", 0);
1793
1794 /*
1795 * Display all of the FHC, AC, and chip revisions for the entire
1796 * machine. The AC anf FHC chip revs are available from the device
1797 * tree that was read out of the PROM, but the DC chip revs will be
1798 * read via a kstat. The interfaces for this are not completely
1799 * available at this time.
1800 */
1801 bnode = tree->bd_list;
1802 while (bnode != NULL) {
1803 int *version;
1804 int upa = bd_to_upa(bnode->board_num);
1805
1806 /* Display the header with the board number */
1807 log_printf("%2d ", bnode->board_num, 0);
1808
1809 /* display the FHC version */
1810 if ((pnode = dev_find_node(bnode->nodes, "fhc")) == NULL) {
1811 log_printf(" ", 0);
1812 } else {
1813 if ((version = (int *)get_prop_val(find_prop(pnode,
1814 "version#"))) == NULL) {
1815 log_printf(" ", 0);
1816 } else {
1817 log_printf(" %d ", *version, 0);
1818 }
1819 }
1820
1821 /* display the AC version */
1822 if ((pnode = dev_find_node(bnode->nodes, "ac")) == NULL) {
1823 log_printf(" ", 0);
1824 } else {
1825 if ((version = (int *)get_prop_val(find_prop(pnode,
1826 "version#"))) == NULL) {
1827 log_printf(" ", 0);
1828 } else {
1829 log_printf(" %d ", *version, 0);
1830 }
1831 }
1832
1833 /* Find sysio 0 on board and print rev */
1834 if ((pnode = find_device(bnode, upa, "sbus")) == NULL) {
1835 log_printf(" ", 0);
1836 } else {
1837 if ((version = (int *)get_prop_val(find_prop(pnode,
1838 "version#"))) == NULL) {
1839 log_printf(" ", 0);
1840 } else {
1841 log_printf(" %d ", *version, 0);
1842 }
1843 }
1844
1845 /* Find sysio 1 on board and print rev */
1846 if ((pnode = find_device(bnode, upa+1, "sbus")) == NULL) {
1847 log_printf(" ", 0);
1848 } else {
1849 if ((version = (int *)get_prop_val(find_prop(pnode,
1850 "version#"))) == NULL) {
1851 log_printf(" ", 0);
1852 } else {
1853 log_printf(" %d ", *version, 0);
1854 }
1855 }
1856
1857 /* Find Psycho 0 on board and print rev */
1858 if ((pnode = find_device(bnode, upa, "pci")) == NULL) {
1859 log_printf(" ", 0);
1860 } else {
1861 if ((version = (int *)get_prop_val(find_prop(pnode,
1862 "version#"))) == NULL) {
1863 log_printf(" ", 0);
1864 } else {
1865 log_printf(" %d ", *version, 0);
1866 }
1867 }
1868
1869 /* Find Psycho 1 on board and print rev */
1870 if ((pnode = find_device(bnode, upa+1, "pci")) == NULL) {
1871 log_printf(" ", 0);
1872 } else {
1873 if ((version = (int *)get_prop_val(find_prop(pnode,
1874 "version#"))) == NULL) {
1875 log_printf(" ", 0);
1876 } else {
1877 log_printf(" %d ", *version, 0);
1878 }
1879 }
1880
1881 /* Find the FEPS on board and print rev */
1882 if ((pnode = dev_find_node(bnode->nodes, "SUNW,hme")) != NULL) {
1883 if ((version = (int *)get_prop_val(find_prop(pnode,
1884 "hm-rev"))) != NULL) {
1885 if (*version == 0xa0) {
1886 log_printf(" 2.0 ", 0);
1887 } else if (*version == 0x20) {
1888 log_printf(" 2.1 ", 0);
1889 } else {
1890 log_printf(" %2x ", *version, 0);
1891 }
1892 }
1893 } else
1894 log_printf(" ", 0);
1895
1896 /* print out the board type */
1897 isplusbrd = ISPLUSBRD(kstats->bd_ksp_list
1898 [bnode->board_num].fhc_bsr);
1899
1900 log_printf("%-16s", board_str[bnode->board_type], 0);
1901 if (isplusbrd)
1902 log_printf("100MHz Capable", 0);
1903 else
1904 log_printf("84MHz Capable", 0);
1905
1906 log_printf("\n", 0);
1907 bnode = bnode->next;
1908 }
1909 log_printf("\n", 0);
1910
1911 /* Now display the FFB board component revisions */
1912 for (bnode = tree->bd_list; bnode != NULL; bnode = bnode->next) {
1913 display_ffb(bnode, 0);
1914 }
1915 }
1916
1917 static void
display_hp_boards(struct system_kstat_data * kstats)1918 display_hp_boards(struct system_kstat_data *kstats)
1919 {
1920 int i;
1921 int j;
1922 int hp_found = 0;
1923 struct hp_info *hp;
1924 char *state;
1925
1926 for (i = 0, hp = &kstats->hp_info[0]; i < MAX_BOARDS; i++, hp++) {
1927 if (!hp->kstat_ok) {
1928 continue;
1929 }
1930
1931 hp_found = 1;
1932 }
1933
1934 /* return if there are no hotplug boards in the system. */
1935 if (!hp_found) {
1936 return;
1937 }
1938
1939 if (hp_found != 0) {
1940 log_printf("\n", 0);
1941 log_printf("Detached Boards\n", 0);
1942 log_printf("===============\n", 0);
1943 log_printf(" Slot State Type Info\n", 0);
1944 log_printf(" ---- --------- ------ ----"
1945 "-------------------------------------\n", 0);
1946 }
1947
1948 /* Display all detached boards */
1949 for (i = 0, hp = &kstats->hp_info[0]; i < MAX_BOARDS; i++, hp++) {
1950 struct cpu_info *cpu;
1951
1952 if (hp->kstat_ok == 0) {
1953 continue;
1954 }
1955
1956
1957 switch (hp->bd_info.state) {
1958 case UNKNOWN_STATE:
1959 state = "unknown";
1960 break;
1961
1962 case ACTIVE_STATE:
1963 state = "active";
1964 break;
1965
1966 case LOWPOWER_STATE:
1967 state = "low-power";
1968 break;
1969
1970 case HOTPLUG_STATE:
1971 state = "hot-plug";
1972 break;
1973
1974 case DISABLED_STATE:
1975 state = "disabled";
1976 break;
1977
1978 case FAILED_STATE:
1979 state = "failed";
1980 break;
1981
1982 default:
1983 state = "unknown";
1984 break;
1985 }
1986
1987 log_printf(" %2d %9s ", i, state, 0);
1988
1989 switch (hp->bd_info.type) {
1990 case MEM_BOARD:
1991 log_printf("%-14s ", MEM_BD_NAME, 0);
1992 break;
1993
1994 case CPU_BOARD:
1995 log_printf("%-14s ", CPU_BD_NAME, 0);
1996
1997 /* Cannot display CPU info for disabled boards */
1998 if ((hp->bd_info.state == DISABLED_STATE) ||
1999 (hp->bd_info.state == FAILED_STATE)) {
2000 break;
2001 }
2002
2003 /* Display both CPUs if present */
2004 cpu = &hp->bd_info.bd.cpu[0];
2005 for (j = 0; j < 2; j++, cpu++) {
2006 log_printf("CPU %d: ", j, 0);
2007 /* Print the rated speed of the CPU. */
2008 if (cpu->cpu_speed > 1) {
2009 log_printf("%3d MHz", cpu->cpu_speed,
2010 0);
2011 } else {
2012 log_printf("no CPU ", 0);
2013 continue;
2014 }
2015
2016 /* Display the size of the cache */
2017 if (cpu->cache_size != 0) {
2018 log_printf(" %0.1fM ",
2019 (float)cpu->cache_size /
2020 (float)(1024*1024), 0);
2021 } else {
2022 log_printf(" ", 0);
2023 }
2024 }
2025 break;
2026
2027 case IO_2SBUS_BOARD:
2028 log_printf("%-14s ", IO_2SBUS_BD_NAME, 0);
2029 break;
2030
2031 case IO_2SBUS_SOCPLUS_BOARD:
2032 log_printf("%-14s ", IO_2SBUS_SOCPLUS_BD_NAME, 0);
2033 break;
2034
2035 case IO_SBUS_FFB_BOARD:
2036 log_printf("%-14s ", IO_SBUS_FFB_BD_NAME, 0);
2037 switch (hp->bd_info.bd.io2.ffb_size) {
2038 case FFB_SINGLE:
2039 log_printf("Single buffered FFB", 0);
2040 break;
2041
2042 case FFB_DOUBLE:
2043 log_printf("Double buffered FFB", 0);
2044 break;
2045
2046 case FFB_NOT_FOUND:
2047 log_printf("No FFB installed", 0);
2048 break;
2049
2050 default:
2051 log_printf("Illegal FFB size", 0);
2052 break;
2053 }
2054 break;
2055
2056 case IO_SBUS_FFB_SOCPLUS_BOARD:
2057 log_printf("%-14s ", IO_SBUS_FFB_SOCPLUS_BD_NAME, 0);
2058 switch (hp->bd_info.bd.io2.ffb_size) {
2059 case FFB_SINGLE:
2060 log_printf("Single buffered FFB", 0);
2061 break;
2062
2063 case FFB_DOUBLE:
2064 log_printf("Double buffered FFB", 0);
2065 break;
2066
2067 case FFB_NOT_FOUND:
2068 log_printf("No FFB installed", 0);
2069 break;
2070
2071 default:
2072 log_printf("Illegal FFB size", 0);
2073 break;
2074 }
2075 break;
2076
2077 case IO_PCI_BOARD:
2078 log_printf("%-14s ", IO_PCI_BD_NAME, 0);
2079 break;
2080
2081 case DISK_BOARD:
2082 log_printf("%-14s ", "disk", 0);
2083 for (j = 0; j < 2; j++) {
2084 log_printf("Disk %d:", j, 0);
2085 if (hp->bd_info.bd.dsk.disk_pres[j]) {
2086 log_printf(" Target: %2d ",
2087 hp->bd_info.bd.dsk.disk_id[j],
2088 0);
2089 } else {
2090 log_printf(" no disk ", 0);
2091 }
2092 }
2093 break;
2094
2095 case UNKNOWN_BOARD:
2096 case UNINIT_BOARD:
2097 default:
2098 log_printf("UNKNOWN ", 0);
2099 break;
2100 }
2101 log_printf("\n");
2102 }
2103 }
2104
2105 /*
2106 * Analysis functions:
2107 *
2108 * Most of the Fatal error data analyzed from error registers is not
2109 * very complicated. This is because the FRUs for errors detected by
2110 * most parts is either a CPU module, a FFB, or the system board
2111 * itself.
2112 * The analysis of the Address Controller errors is the most complicated.
2113 * These errors can be caused by other boards as well as the local board.
2114 */
2115
2116 /*
2117 * analyze_cpu
2118 *
2119 * Analyze the CPU MFSR passed in and determine what type of fatal
2120 * hardware errors occurred at the time of the crash. This function
2121 * returns a pointer to a string to the calling routine.
2122 */
2123 static int
analyze_cpu(char ** msgs,int cpu_id,u_longlong_t afsr)2124 analyze_cpu(char **msgs, int cpu_id, u_longlong_t afsr)
2125 {
2126 int count = 0;
2127 int i;
2128 int syndrome;
2129 char msgbuf[MAXSTRLEN];
2130
2131 if (msgs == NULL) {
2132 return (count);
2133 }
2134
2135 if (afsr & P_AFSR_ETP) {
2136 (void) sprintf(msgbuf, "CPU %d Ecache Tag Parity Error, ",
2137 cpu_id);
2138
2139 /* extract syndrome for afsr */
2140 syndrome = (afsr & P_AFSR_ETS) >> ETS_SHIFT;
2141
2142 /* now concat the parity syndrome msg */
2143 for (i = 0; i < 4; i++) {
2144 if ((0x1 << i) & syndrome) {
2145 (void) strcat(msgbuf, ecache_parity[i]);
2146 }
2147 }
2148 (void) strcat(msgbuf, "\n");
2149 *msgs++ = strdup(msgbuf);
2150 count++;
2151 }
2152
2153 if (afsr & P_AFSR_ISAP) {
2154 (void) sprintf(msgbuf,
2155 "CPU %d Incoming System Address Parity Error\n",
2156 cpu_id);
2157 *msgs++ = strdup(msgbuf);
2158 count++;
2159 }
2160
2161 return (count);
2162 }
2163
2164 /*
2165 * analyze_ac
2166 *
2167 * This function checks the AC error register passed in and checks
2168 * for any errors that occured during the fatal hardware reset.
2169 */
2170 static int
analyze_ac(char ** msgs,u_longlong_t ac_error)2171 analyze_ac(char **msgs, u_longlong_t ac_error)
2172 {
2173 int i;
2174 int count = 0;
2175 char msgbuf[MAXSTRLEN];
2176 int tmp_cnt;
2177
2178 if (msgs == NULL) {
2179 return (count);
2180 }
2181
2182 for (i = 2; i < MAX_BITS; i++) {
2183 if ((((u_longlong_t)0x1 << i) & ac_error) != 0) {
2184 if (ac_errors[i].error != NULL) {
2185 (void) sprintf(msgbuf, "AC: %s\n",
2186 ac_errors[i].error);
2187 *msgs++ = strdup(msgbuf);
2188 count++;
2189
2190 /* display the part that might cause this */
2191 tmp_cnt = disp_parts(msgs, ac_error, i);
2192 count += tmp_cnt;
2193 msgs += tmp_cnt;
2194 }
2195 }
2196 }
2197
2198 return (count);
2199 }
2200
2201 /*
2202 * analyze_dc
2203 *
2204 * This routine checks the DC shdow chain and tries to determine
2205 * what type of error might have caused the fatal hardware reset
2206 * error.
2207 */
2208 static int
analyze_dc(int board,char ** msgs,u_longlong_t dc_error)2209 analyze_dc(int board, char **msgs, u_longlong_t dc_error)
2210 {
2211 int i;
2212 int count = 0;
2213 char msgbuf[MAXSTRLEN];
2214
2215 if (msgs == NULL) {
2216 return (count);
2217 }
2218
2219 /*
2220 * The DC scan data is contained in 8 bytes, one byte per
2221 * DC. There are 8 DCs on a system board.
2222 */
2223
2224 for (i = 0; i < 8; i++) {
2225 if (dc_error & DC_OVERFLOW) {
2226 (void) sprintf(msgbuf, dc_overflow_txt, board, i);
2227 *msgs++ = strdup(msgbuf);
2228 count++;
2229 }
2230
2231 if (dc_error & DC_PARITY) {
2232 (void) sprintf(msgbuf, dc_parity_txt, board, i);
2233 *msgs++ = strdup(msgbuf);
2234 count++;
2235 }
2236 dc_error = dc_error >> 8; /* shift over to next byte */
2237 }
2238
2239 return (count);
2240 }
2241
2242 static int
disp_parts(char ** msgs,u_longlong_t ac_error,int type)2243 disp_parts(char **msgs, u_longlong_t ac_error, int type)
2244 {
2245 int count = 0;
2246 int part;
2247 char msgbuf[MAXSTRLEN];
2248 int i;
2249
2250 if (msgs == NULL) {
2251 return (count);
2252 }
2253
2254 (void) sprintf(msgbuf, "\tThe error could be caused by:\n");
2255 *msgs++ = strdup(msgbuf);
2256 count++;
2257
2258 for (i = 0; (i < MAX_FRUS) && ac_errors[type].part[i]; i++) {
2259 part = ac_errors[type].part[i];
2260
2261 if (part == UPA_PART) {
2262 if (ac_error & UPA_PORT_A) {
2263 part = UPA_A_PART;
2264 } else if (ac_error & UPA_PORT_B) {
2265 part = UPA_B_PART;
2266 }
2267 }
2268
2269 if (part == DTAG_PART) {
2270 if (ac_error & UPA_PORT_A) {
2271 part = DTAG_A_PART;
2272 } else if (ac_error & UPA_PORT_B) {
2273 part = DTAG_B_PART;
2274 }
2275 }
2276
2277 (void) sprintf(msgbuf, "\t\t%s\n", part_str[part]);
2278
2279 *msgs++ = strdup(msgbuf);
2280 count++;
2281 }
2282
2283 return (count);
2284 }
2285