xref: /illumos-gate/usr/src/uts/sun4u/sunfire/io/fhc.c (revision 5422785d352a2bb398daceab3d1898a8aa64d006)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 
28 #include <sys/types.h>
29 #include <sys/conf.h>
30 #include <sys/ddi.h>
31 #include <sys/sunddi.h>
32 #include <sys/ddi_impldefs.h>
33 #include <sys/obpdefs.h>
34 #include <sys/promif.h>
35 #include <sys/cmn_err.h>
36 #include <sys/errno.h>
37 #include <sys/kmem.h>
38 #include <sys/vmem.h>
39 #include <sys/debug.h>
40 #include <sys/sysmacros.h>
41 #include <sys/intreg.h>
42 #include <sys/autoconf.h>
43 #include <sys/modctl.h>
44 #include <sys/spl.h>
45 #include <sys/time.h>
46 #include <sys/systm.h>
47 #include <sys/machsystm.h>
48 #include <sys/cpu.h>
49 #include <sys/cpuvar.h>
50 #include <sys/x_call.h>		/* xt_one() */
51 #include <sys/membar.h>
52 #include <sys/vm.h>
53 #include <vm/seg_kmem.h>
54 #include <vm/hat_sfmmu.h>
55 #include <sys/promimpl.h>
56 #include <sys/prom_plat.h>
57 #include <sys/cpu_module.h>	/* flush_instr_mem() */
58 #include <sys/procset.h>
59 #include <sys/fhc.h>
60 #include <sys/ac.h>
61 #include <sys/environ.h>
62 #include <sys/jtag.h>
63 #include <sys/nexusdebug.h>
64 #include <sys/ac.h>
65 #include <sys/ddi_subrdefs.h>
66 #include <sys/eeprom.h>
67 #include <sys/sdt.h>
68 #include <sys/ddi_implfuncs.h>
69 #include <sys/ontrap.h>
70 
71 #ifndef TRUE
72 #define	TRUE (1)
73 #endif
74 #ifndef FALSE
75 #define	FALSE (0)
76 #endif
77 
78 /*
79  * Function to register and deregister callbacks, for sunfire only.
80  */
81 extern void plat_register_tod_fault(void (*func)(enum tod_fault_type));
82 
83 /*
84  * This table represents the FHC interrupt priorities.  They range from
85  * 1-15, and have been modeled after the sun4d interrupts. The mondo
86  * number anded with 0x7 is used to index into this table. This was
87  * done to save table space.
88  */
89 static int fhc_int_priorities[] = {
90 	PIL_15,			/* System interrupt priority */
91 	PIL_12,			/* zs interrupt priority */
92 	PIL_15,			/* TOD interrupt priority */
93 	PIL_15			/* Fan Fail priority */
94 };
95 
96 static void fhc_tod_fault(enum tod_fault_type tod_bad);
97 static void fhc_cpu_shutdown_self(void);
98 static void os_completes_shutdown(void);
99 
100 /*
101  * The dont_calibrate variable is meant to be set to one in /etc/system
102  * or by boot -h so that the calibration tables are not used. This
103  * is useful for checking thermistors whose output seems to be incorrect.
104  */
105 static int dont_calibrate = 0;
106 
107 /* Only one processor should powerdown the system. */
108 static int powerdown_started = 0;
109 
110 /* Let user disable overtemp powerdown. */
111 int enable_overtemp_powerdown = 1;
112 
113 /*
114  * The following tables correspond to the degress Celcius for each count
115  * value possible from the 8-bit A/C convertors on each type of system
116  * board for the UltraSPARC Server systems. To access a temperature,
117  * just index into the correct table using the count from the A/D convertor
118  * register, and that is the correct temperature in degress Celsius. These
119  * values can be negative.
120  */
121 static short cpu_table[] = {
122 -16,	-14,	-12,	-10,	-8,	-6,	-4,	-2,	/* 0-7 */
123 1,	4,	6,	8,	10,	12,	13,	15,	/* 8-15 */
124 16,	18,	19,	20,	22,	23,	24,	25,	/* 16-23 */
125 26,	27,	28,	29,	30,	31,	32,	33,	/* 24-31 */
126 34,	35,	35,	36,	37,	38,	39,	39,	/* 32-39 */
127 40,	41,	41,	42,	43,	44,	44,	45,	/* 40-47 */
128 46,	46,	47,	47,	48,	49,	49,	50,	/* 48-55 */
129 51,	51,	52,	53,	53,	54,	54,	55,	/* 56-63 */
130 55,	56,	56,	57,	57,	58,	58,	59,	/* 64-71 */
131 60,	60,	61,	61,	62,	62,	63,	63,	/* 72-79 */
132 64,	64,	65,	65,	66,	66,	67,	67,	/* 80-87 */
133 68,	68,	69,	69,	70,	70,	71,	71,	/* 88-95 */
134 72,	72,	73,	73,	74,	74,	75,	75,	/* 96-103 */
135 76,	76,	77,	77,	78,	78,	79,	79,	/* 104-111 */
136 80,	80,	81,	81,	82,	82,	83,	83,	/* 112-119 */
137 84,	84,	85,	85,	86,	86,	87,	87,	/* 120-127 */
138 88,	88,	89,	89,	90,	90,	91,	91,	/* 128-135 */
139 92,	92,	93,	93,	94,	94,	95,	95,	/* 136-143 */
140 96,	96,	97,	98,	98,	99,	99,	100,	/* 144-151 */
141 100,	101,	101,	102,	103,	103,	104,	104,	/* 152-159 */
142 105,	106,	106,	107,	107,	108,	109,	109,	/* 160-167 */
143 110,								/* 168 */
144 };
145 
146 #define	CPU_MX_CNT	(sizeof (cpu_table)/sizeof (short))
147 
148 static short cpu2_table[] = {
149 -17,	-16,	-15,	-14,	-13,	-12,	-11,	-10,	/* 0-7 */
150 -9,	-8,	-7,	-6,	-5,	-4,	-3,	-2,	/* 8-15 */
151 -1,	0,	1,	2,	3,	4,	5,	6,	/* 16-23 */
152 7,	8,	9,	10,	11,	12,	13,	13,	/* 24-31 */
153 14,	15,	16,	16,	17,	18,	18,	19,	/* 32-39 */
154 20,	20,	21,	22,	22,	23,	24,	24,	/* 40-47 */
155 25,	25,	26,	26,	27,	27,	28,	28,	/* 48-55 */
156 29,	30,	30,	31,	31,	32,	32,	33,	/* 56-63 */
157 33,	34,	34,	35,	35,	36,	36,	37,	/* 64-71 */
158 37,	37,	38,	38,	39,	39,	40,	40,	/* 72-79 */
159 41,	41,	42,	42,	43,	43,	43,	44,	/* 80-87 */
160 44,	45,	45,	46,	46,	46,	47,	47,	/* 88-95 */
161 48,	48,	49,	49,	50,	50,	50,	51,	/* 96-103 */
162 51,	52,	52,	53,	53,	53,	54,	54,	/* 104-111 */
163 55,	55,	56,	56,	56,	57,	57,	58,	/* 112-119 */
164 58,	59,	59,	59,	60,	60,	61,	61,	/* 120-127 */
165 62,	62,	63,	63,	63,	64,	64,	65,	/* 128-135 */
166 65,	66,	66,	67,	67,	68,	68,	68,	/* 136-143 */
167 69,	69,	70,	70,	71,	71,	72,	72,	/* 144-151 */
168 73,	73,	74,	74,	75,	75,	76,	76,	/* 152-159 */
169 77,	77,	78,	78,	79,	79,	80,	80,	/* 160-167 */
170 81,	81,	82,	83,	83,	84,	84,	85,	/* 168-175 */
171 85,	86,	87,	87,	88,	88,	89,	90,	/* 176-183 */
172 90,	91,	92,	92,	93,	94,	94,	95,	/* 184-191 */
173 96,	96,	97,	98,	99,	99,	100,	101,	/* 192-199 */
174 102,	103,	103,	104,	105,	106,	107,	108,	/* 200-207 */
175 109,	110,							/* 208-209 */
176 };
177 
178 #define	CPU2_MX_CNT	(sizeof (cpu2_table)/sizeof (short))
179 
180 static short io_table[] = {
181 0,	0,	0,	0,	0,	0,	0,	0,	/* 0-7 */
182 0,	0,	0,	0,	0,	0,	0,	0,	/* 8-15 */
183 0,	0,	0,	0,	0,	0,	0,	0,	/* 16-23 */
184 0,	0,	0,	0,	0,	0,	0,	0,	/* 24-31 */
185 0,	0,	0,	0,	0,	0,	0,	0,	/* 32-39 */
186 0,	3,	7,	10,	13,	15,	17,	19,	/* 40-47 */
187 21,	23,	25,	27,	28,	30,	31,	32,	/* 48-55 */
188 34,	35,	36,	37,	38,	39,	41,	42,	/* 56-63 */
189 43,	44,	45,	46,	46,	47,	48,	49,	/* 64-71 */
190 50,	51,	52,	53,	53,	54,	55,	56,	/* 72-79 */
191 57,	57,	58,	59,	60,	60,	61,	62,	/* 80-87 */
192 62,	63,	64,	64,	65,	66,	66,	67,	/* 88-95 */
193 68,	68,	69,	70,	70,	71,	72,	72,	/* 96-103 */
194 73,	73,	74,	75,	75,	76,	77,	77,	/* 104-111 */
195 78,	78,	79,	80,	80,	81,	81,	82,	/* 112-119 */
196 };
197 
198 #define	IO_MN_CNT	40
199 #define	IO_MX_CNT	(sizeof (io_table)/sizeof (short))
200 
201 static short clock_table[] = {
202 0,	0,	0,	0,	0,	0,	0,	0,	/* 0-7 */
203 0,	0,	0,	0,	1,	2,	4,	5,	/* 8-15 */
204 7,	8,	10,	11,	12,	13,	14,	15,	/* 16-23 */
205 17,	18,	19,	20,	21,	22,	23,	24,	/* 24-31 */
206 24,	25,	26,	27,	28,	29,	29,	30,	/* 32-39 */
207 31,	32,	32,	33,	34,	35,	35,	36,	/* 40-47 */
208 37,	38,	38,	39,	40,	40,	41,	42,	/* 48-55 */
209 42,	43,	44,	44,	45,	46,	46,	47,	/* 56-63 */
210 48,	48,	49,	50,	50,	51,	52,	52,	/* 64-71 */
211 53,	54,	54,	55,	56,	57,	57,	58,	/* 72-79 */
212 59,	59,	60,	60,	61,	62,	63,	63,	/* 80-87 */
213 64,	65,	65,	66,	67,	68,	68,	69,	/* 88-95 */
214 70,	70,	71,	72,	73,	74,	74,	75,	/* 96-103 */
215 76,	77,	78,	78,	79,	80,	81,	82,	/* 104-111 */
216 };
217 
218 #define	CLK_MN_CNT	11
219 #define	CLK_MX_CNT	(sizeof (clock_table)/sizeof (short))
220 
221 /*
222  * System temperature limits.
223  *
224  * The following variables are the warning and danger limits for the
225  * different types of system boards. The limits are different because
226  * the various boards reach different nominal temperatures because
227  * of the different components that they contain.
228  *
229  * The warning limit is the temperature at which the user is warned.
230  * The danger limit is the temperature at which the system is shutdown.
231  * In the case of CPU/Memory system boards, the system will attempt
232  * to offline and power down processors on a board in an attempt to
233  * bring the board back into the nominal temperature range before
234  * shutting down the system.
235  *
236  * These values can be tuned via /etc/system or boot -h.
237  */
238 short cpu_warn_temp = 73;	/* CPU/Memory Warning Temperature */
239 short cpu_danger_temp = 83;	/* CPU/Memory Danger Temperature */
240 short io_warn_temp = 60;	/* IO Board Warning Temperature */
241 short io_danger_temp = 68;	/* IO Board Danger Temperature */
242 short clk_warn_temp = 60;	/* Clock Board Warning Temperature */
243 short clk_danger_temp = 68;	/* Clock Board Danger Temperature */
244 
245 short dft_warn_temp = 60;	/* default warning temp value */
246 short dft_danger_temp = 68;	/* default danger temp value */
247 
248 short cpu_warn_temp_4x = 60; 	/* CPU/Memory warning temp for 400 MHZ */
249 short cpu_danger_temp_4x = 68;	/* CPU/Memory danger temp for 400 MHZ */
250 
251 /*
252  * This variable tells us if we are in a heat chamber. It is set
253  * early on in boot, after we check the OBP 'mfg-mode' property in
254  * the options node.
255  */
256 static int temperature_chamber = -1;
257 
258 /*
259  * The fhc memloc structure is protected under the bdlist lock
260  */
261 static struct fhc_memloc *fhc_base_memloc = NULL;
262 
263 /*
264  * Driver global fault list mutex and list head pointer. The list is
265  * protected by the mutex and contains a record of all known faults.
266  * Faults can be inherited from the PROM or detected by the kernel.
267  */
268 static kmutex_t ftlist_mutex;
269 static struct ft_link_list *ft_list = NULL;
270 static int ft_nfaults = 0;
271 
272 /*
273  * Table of all known fault strings. This table is indexed by the fault
274  * type. Do not change the ordering of the table without redefining the
275  * fault type enum list on fhc.h.
276  */
277 char *ft_str_table[] = {
278 	"Core Power Supply",		/* FT_CORE_PS */
279 	"Overtemp",			/* FT_OVERTEMP */
280 	"AC Power",			/* FT_AC_PWR */
281 	"Peripheral Power Supply",	/* FT_PPS */
282 	"System 3.3 Volt Power",	/* FT_CLK_33 */
283 	"System 5.0 Volt Power",	/* FT_CLK_50 */
284 	"Peripheral 5.0 Volt Power",	/* FT_V5_P */
285 	"Peripheral 12 Volt Power",	/* FT_V12_P */
286 	"Auxiliary 5.0 Volt Power",	/* FT_V5_AUX */
287 	"Peripheral 5.0 Volt Precharge", /* FT_V5_P_PCH */
288 	"Peripheral 12 Volt Precharge",	/* FT_V12_P_PCH */
289 	"System 3.3 Volt Precharge",	/* FT_V3_PCH */
290 	"System 5.0 Volt Precharge",	/* FT_V5_PCH */
291 	"Peripheral Power Supply Fans",	/* FT_PPS_FAN */
292 	"Rack Exhaust Fan",		/* FT_RACK_EXH */
293 	"Disk Drive Fan",		/* FT_DSK_FAN */
294 	"AC Box Fan",			/* FT_AC_FAN */
295 	"Key Switch Fan",		/* FT_KEYSW_FAN */
296 	"Minimum Power",		/* FT_INSUFFICIENT_POWER */
297 	"PROM detected",		/* FT_PROM */
298 	"Hot Plug Support System",	/* FT_HOT_PLUG */
299 	"TOD"				/* FT_TODFAULT */
300 };
301 
302 static int ft_max_index = (sizeof (ft_str_table) / sizeof (char *));
303 
304 /*
305  * Function prototypes
306  */
307 static int fhc_ctlops(dev_info_t *, dev_info_t *, ddi_ctl_enum_t,
308 	void *, void *);
309 static int fhc_intr_ops(dev_info_t *dip, dev_info_t *rdip,
310 	ddi_intr_op_t intr_op, ddi_intr_handle_impl_t *hdlp, void *result);
311 
312 static int fhc_add_intr_impl(dev_info_t *dip, dev_info_t *rdip,
313 	ddi_intr_handle_impl_t *hdlp);
314 static void fhc_remove_intr_impl(dev_info_t *dip, dev_info_t *rdip,
315 	ddi_intr_handle_impl_t *hdlp);
316 
317 static int fhc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd);
318 static int fhc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd);
319 static int fhc_init(struct fhc_soft_state *softsp);
320 static void fhc_unmap_regs(struct fhc_soft_state *softsp);
321 static enum board_type fhc_board_type(struct fhc_soft_state *, int);
322 
323 static void
324 fhc_xlate_intrs(ddi_intr_handle_impl_t *hdlp, uint32_t ign);
325 
326 static int
327 fhc_ctlops_peekpoke(ddi_ctl_enum_t, peekpoke_ctlops_t *, void *result);
328 
329 static void fhc_add_kstats(struct fhc_soft_state *);
330 static int fhc_kstat_update(kstat_t *, int);
331 static int check_for_chamber(void);
332 static int ft_ks_snapshot(struct kstat *, void *, int);
333 static int ft_ks_update(struct kstat *, int);
334 static int check_central(int board);
335 
336 /*
337  * board type and A/D convertor output passed in and real temperature
338  * is returned.
339  */
340 static short calibrate_temp(enum board_type, uchar_t, uint_t);
341 static enum temp_state get_temp_state(enum board_type, short, int);
342 
343 /* Routine to determine if there are CPUs on this board. */
344 static int cpu_on_board(int);
345 
346 static void build_bd_display_str(char *, enum board_type, int);
347 
348 /* Interrupt distribution callback function. */
349 static void fhc_intrdist(void *);
350 
351 /* CPU power control */
352 int fhc_cpu_poweroff(struct cpu *);	/* cpu_poweroff()->platform */
353 int fhc_cpu_poweron(struct cpu *);	/* cpu_poweron()->platform */
354 
355 extern struct cpu_node cpunodes[];
356 extern void halt(char *);
357 
358 /*
359  * Configuration data structures
360  */
361 static struct bus_ops fhc_bus_ops = {
362 	BUSO_REV,
363 	ddi_bus_map,		/* map */
364 	0,			/* get_intrspec */
365 	0,			/* add_intrspec */
366 	0,			/* remove_intrspec */
367 	i_ddi_map_fault,	/* map_fault */
368 	ddi_no_dma_map,		/* dma_map */
369 	ddi_no_dma_allochdl,
370 	ddi_no_dma_freehdl,
371 	ddi_no_dma_bindhdl,
372 	ddi_no_dma_unbindhdl,
373 	ddi_no_dma_flush,
374 	ddi_no_dma_win,
375 	ddi_dma_mctl,		/* dma_ctl */
376 	fhc_ctlops,		/* ctl */
377 	ddi_bus_prop_op,	/* prop_op */
378 	0,			/* (*bus_get_eventcookie)();	*/
379 	0,			/* (*bus_add_eventcall)();	*/
380 	0,			/* (*bus_remove_eventcall)();	*/
381 	0,			/* (*bus_post_event)();		*/
382 	0,			/* (*bus_intr_control)();	*/
383 	0,			/* (*bus_config)();		*/
384 	0,			/* (*bus_unconfig)();		*/
385 	0,			/* (*bus_fm_init)();		*/
386 	0,			/* (*bus_fm_fini)();		*/
387 	0,			/* (*bus_fm_access_enter)();	*/
388 	0,			/* (*bus_fm_access_exit)();	*/
389 	0,			/* (*bus_power)();		*/
390 	fhc_intr_ops		/* (*bus_intr_op)();		*/
391 };
392 
393 static struct cb_ops fhc_cb_ops = {
394 	nulldev,		/* open */
395 	nulldev,		/* close */
396 	nulldev,		/* strategy */
397 	nulldev,		/* print */
398 	nulldev,		/* dump */
399 	nulldev,		/* read */
400 	nulldev,		/* write */
401 	nulldev, 		/* ioctl */
402 	nodev,			/* devmap */
403 	nodev,			/* mmap */
404 	nodev,			/* segmap */
405 	nochpoll,		/* poll */
406 	ddi_prop_op,		/* cb_prop_op */
407 	0,			/* streamtab */
408 	D_MP|D_NEW|D_HOTPLUG,	/* Driver compatibility flag */
409 	CB_REV,			/* rev */
410 	nodev,			/* cb_aread */
411 	nodev			/* cb_awrite */
412 };
413 
414 static struct dev_ops fhc_ops = {
415 	DEVO_REV,		/* rev */
416 	0,			/* refcnt  */
417 	ddi_no_info,		/* getinfo */
418 	nulldev,		/* identify */
419 	nulldev,		/* probe */
420 	fhc_attach,		/* attach */
421 	fhc_detach,		/* detach */
422 	nulldev,		/* reset */
423 	&fhc_cb_ops,		/* cb_ops */
424 	&fhc_bus_ops,		/* bus_ops */
425 	nulldev,		/* power */
426 	ddi_quiesce_not_needed,		/* quiesce */
427 };
428 
429 /*
430  * Driver globals
431  * TODO - We need to investigate what locking needs to be done here.
432  */
433 void *fhcp;				/* fhc soft state hook */
434 
435 extern struct mod_ops mod_driverops;
436 
437 static struct modldrv modldrv = {
438 	&mod_driverops,		/* Type of module.  This one is a driver */
439 	"FHC Nexus",		/* Name of module. */
440 	&fhc_ops,		/* driver ops */
441 };
442 
443 static struct modlinkage modlinkage = {
444 	MODREV_1,		/* rev */
445 	(void *)&modldrv,
446 	NULL
447 };
448 
449 
450 /*
451  * These are the module initialization routines.
452  */
453 
454 static caddr_t shutdown_va;
455 
456 int
457 _init(void)
458 {
459 	int error;
460 
461 	if ((error = ddi_soft_state_init(&fhcp,
462 	    sizeof (struct fhc_soft_state), 1)) != 0)
463 		return (error);
464 
465 	fhc_bdlist_init();
466 	mutex_init(&ftlist_mutex, NULL, MUTEX_DEFAULT, NULL);
467 
468 	shutdown_va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
469 	ASSERT(shutdown_va != NULL);
470 
471 	plat_register_tod_fault(fhc_tod_fault);
472 
473 	return (mod_install(&modlinkage));
474 }
475 
476 int
477 _fini(void)
478 {
479 	int error;
480 
481 	if ((error = mod_remove(&modlinkage)) != 0)
482 		return (error);
483 
484 	plat_register_tod_fault(NULL);
485 
486 	mutex_destroy(&ftlist_mutex);
487 
488 	fhc_bdlist_fini();
489 
490 	ddi_soft_state_fini(&fhcp);
491 
492 	return (0);
493 }
494 
495 int
496 _info(struct modinfo *modinfop)
497 {
498 	return (mod_info(&modlinkage, modinfop));
499 }
500 
501 /*
502  * Reset the interrupt mapping registers.
503  * This function resets the values during DDI_RESUME.
504  *
505  * NOTE: This function will not work for a full CPR cycle
506  * and is currently designed to handle the RESUME after a connect.
507  *
508  * Note about the PROM handling of moving CENTRAL to another board:
509  * The PROM moves the IGN identity (igr register) from the
510  * original CENTRAL to the new one. This means that we do not
511  * duplicate the fhc_attach code that sets it to (board number * 2).
512  * We rely on only using FHC interrupts from one board only
513  * (the UART and SYS interrupts) so that the values of the other IGNs
514  * are irrelevant. The benefit of this approach is that we don't
515  * have to have to tear down and rebuild the interrupt records
516  * for UART and SYS. It is also why we don't try to change the
517  * board number in the fhc instance for the clock board.
518  */
519 static void
520 fhc_handle_imr(struct fhc_soft_state *softsp)
521 {
522 	int i;
523 	int cent;
524 	uint_t tmp_reg;
525 
526 
527 	if (softsp->is_central) {
528 		uint_t want_igr, act_igr;
529 
530 		want_igr = softsp->list->sc.board << 1;
531 		act_igr = *softsp->igr & 0x1f;
532 		if (want_igr != act_igr) {
533 			*softsp->igr = want_igr;
534 			tmp_reg = *softsp->igr;
535 #ifdef lint
536 			tmp_reg = tmp_reg;
537 #endif
538 			/* We must now re-issue any pending interrupts. */
539 			for (i = 0; i < FHC_MAX_INO; i++) {
540 				if (*(softsp->intr_regs[i].clear_reg) == 3) {
541 					*(softsp->intr_regs[i].clear_reg) =
542 					    ISM_IDLE;
543 
544 					tmp_reg =
545 					    *(softsp->intr_regs[i].clear_reg);
546 #ifdef lint
547 					tmp_reg = tmp_reg;
548 #endif
549 				}
550 			}
551 			cmn_err(CE_NOTE, "central IGN corruption fixed: "
552 			    "got %x wanted %x", act_igr, want_igr);
553 		}
554 		return;
555 	}
556 
557 	ASSERT(softsp->list->sc.board == FHC_BSR_TO_BD(*(softsp->bsr)));
558 	cent = check_central(softsp->list->sc.board);
559 
560 	/* Loop through all 4 FHC interrupt mapping registers */
561 	for (i = 0; i < FHC_MAX_INO; i++) {
562 
563 		if (i == FHC_SYS_INO &&
564 		    *(softsp->intr_regs[i].clear_reg) == 3) {
565 			cmn_err(CE_NOTE,
566 			    "found lost system interrupt, resetting..");
567 
568 			*(softsp->intr_regs[i].clear_reg) = ISM_IDLE;
569 
570 			/*
571 			 * ensure atomic write with this read.
572 			 */
573 			tmp_reg = *(softsp->intr_regs[i].clear_reg);
574 #ifdef lint
575 			tmp_reg = tmp_reg;
576 #endif
577 		}
578 
579 		/*
580 		 * The mapping registers on the board with the "central" bit
581 		 * set should not be touched as it has been taken care by POST.
582 		 */
583 
584 		if (cent)
585 			continue;
586 
587 		*(softsp->intr_regs[i].mapping_reg) = 0;
588 
589 		/*
590 		 * ensure atomic write with this read.
591 		 */
592 		tmp_reg = *(softsp->intr_regs[i].mapping_reg);
593 #ifdef lint
594 		tmp_reg = tmp_reg;
595 #endif
596 
597 	}
598 }
599 
600 static int
601 check_central(int board)
602 {
603 	uint_t cs_value;
604 
605 	/*
606 	 * This is the value of AC configuration and status reg
607 	 * in the Local Devices space. We access it as a physical
608 	 * address.
609 	 */
610 	cs_value = ldphysio(AC_BCSR(board));
611 	if (cs_value & AC_CENTRAL)
612 		return (TRUE);
613 	else
614 		return (FALSE);
615 }
616 
617 static int
618 fhc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
619 {
620 	struct fhc_soft_state *softsp;
621 	int instance;
622 
623 	instance = ddi_get_instance(devi);
624 
625 	switch (cmd) {
626 	case DDI_ATTACH:
627 		break;
628 
629 	case DDI_RESUME:
630 		softsp = ddi_get_soft_state(fhcp, instance);
631 		/* IGR, NOT_BRD_PRES handled by prom */
632 		/* reset interrupt mapping registers */
633 		fhc_handle_imr(softsp);
634 
635 		return (DDI_SUCCESS);
636 
637 	default:
638 		return (DDI_FAILURE);
639 	}
640 
641 
642 	if (ddi_soft_state_zalloc(fhcp, instance) != DDI_SUCCESS)
643 		return (DDI_FAILURE);
644 
645 	softsp = ddi_get_soft_state(fhcp, instance);
646 
647 	/* Set the dip in the soft state */
648 	softsp->dip = devi;
649 
650 	if (fhc_init(softsp) != DDI_SUCCESS)
651 		goto bad;
652 
653 	ddi_report_dev(devi);
654 
655 	return (DDI_SUCCESS);
656 
657 bad:
658 	ddi_soft_state_free(fhcp, instance);
659 	return (DDI_FAILURE);
660 }
661 
662 static int
663 fhc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
664 {
665 	int board;
666 	int instance;
667 	struct fhc_soft_state *softsp;
668 	fhc_bd_t *list = NULL;
669 
670 	/* get the instance of this devi */
671 	instance = ddi_get_instance(devi);
672 
673 	/* get the soft state pointer for this device node */
674 	softsp = ddi_get_soft_state(fhcp, instance);
675 
676 	board = softsp->list->sc.board;
677 
678 	switch (cmd) {
679 	case DDI_SUSPEND:
680 
681 		return (DDI_SUCCESS);
682 
683 	case DDI_DETACH:
684 		/* grab the lock on the board list */
685 		list = fhc_bdlist_lock(board);
686 
687 		if (fhc_bd_detachable(board) &&
688 		    !fhc_bd_is_jtag_master(board))
689 			break;
690 		else
691 			fhc_bdlist_unlock();
692 		/* FALLTHROUGH */
693 
694 	default:
695 		return (DDI_FAILURE);
696 	}
697 
698 	/* Remove the interrupt redistribution callback. */
699 	intr_dist_rem(fhc_intrdist, (void *)devi);
700 
701 	/* remove the soft state pointer from the board list */
702 	list->softsp = NULL;
703 
704 	/* clear inherited faults from the PROM. */
705 	clear_fault(list->sc.board, FT_PROM, FT_BOARD);
706 
707 	/* remove the kstat for this board */
708 	kstat_delete(softsp->fhc_ksp);
709 
710 	/* destroy the mutexes in this soft state structure */
711 	mutex_destroy(&softsp->poll_list_lock);
712 	mutex_destroy(&softsp->ctrl_lock);
713 
714 	/* unmap all the register sets */
715 	fhc_unmap_regs(softsp);
716 
717 	/* release the board list lock now */
718 	fhc_bdlist_unlock();
719 
720 	/* free the soft state structure */
721 	ddi_soft_state_free(fhcp, instance);
722 
723 	return (DDI_SUCCESS);
724 }
725 
726 static enum board_type
727 fhc_board_type(struct fhc_soft_state *softsp, int board)
728 {
729 	int proplen;
730 	char *board_type;
731 	enum board_type type;
732 
733 	if (softsp->is_central)
734 		type = CLOCK_BOARD;
735 	else if (ddi_getlongprop(DDI_DEV_T_ANY, softsp->dip,
736 	    DDI_PROP_DONTPASS, "board-type", (caddr_t)&board_type,
737 	    &proplen) == DDI_PROP_SUCCESS) {
738 		/* match the board-type string */
739 		if (strcmp(CPU_BD_NAME, board_type) == 0) {
740 			type = CPU_BOARD;
741 		} else if (strcmp(MEM_BD_NAME, board_type) == 0) {
742 			type = MEM_BOARD;
743 		} else if (strcmp(IO_2SBUS_BD_NAME, board_type) == 0) {
744 			type = IO_2SBUS_BOARD;
745 		} else if (strcmp(IO_SBUS_FFB_BD_NAME, board_type) == 0) {
746 			type = IO_SBUS_FFB_BOARD;
747 		} else if (strcmp(IO_2SBUS_SOCPLUS_BD_NAME, board_type) == 0) {
748 			type = IO_2SBUS_SOCPLUS_BOARD;
749 		} else if (strcmp(IO_SBUS_FFB_SOCPLUS_BD_NAME, board_type)
750 		    == 0) {
751 			type = IO_SBUS_FFB_SOCPLUS_BOARD;
752 		} else if (strcmp(IO_PCI_BD_NAME, board_type) == 0) {
753 			type = IO_PCI_BOARD;
754 		} else {
755 			type = UNKNOWN_BOARD;
756 		}
757 		kmem_free(board_type, proplen);
758 	} else
759 		type = UNKNOWN_BOARD;
760 
761 	/*
762 	 * if the board type is indeterminate, it must be determined.
763 	 */
764 	if (type == UNKNOWN_BOARD) {
765 		/*
766 		 * Use the UPA64 bits from the FHC.
767 		 * This is not the best solution since we
768 		 * cannot fully type the IO boards.
769 		 */
770 		if (cpu_on_board(board))
771 			type = CPU_BOARD;
772 		else if ((*(softsp->bsr) & FHC_UPADATA64A) ||
773 		    (*(softsp->bsr) & FHC_UPADATA64B))
774 			type = IO_2SBUS_BOARD;
775 		else
776 			type = MEM_BOARD;
777 	}
778 
779 	return (type);
780 }
781 
782 static void
783 fhc_unmap_regs(struct fhc_soft_state *softsp)
784 {
785 	dev_info_t *dip = softsp->dip;
786 
787 	if (softsp->id) {
788 		ddi_unmap_regs(dip, 0, (caddr_t *)&softsp->id, 0, 0);
789 		softsp->id = NULL;
790 	}
791 	if (softsp->igr) {
792 		ddi_unmap_regs(dip, 1, (caddr_t *)&softsp->igr, 0, 0);
793 		softsp->igr = NULL;
794 	}
795 	if (softsp->intr_regs[FHC_FANFAIL_INO].mapping_reg) {
796 		ddi_unmap_regs(dip, 2,
797 		    (caddr_t *)&softsp->intr_regs[FHC_FANFAIL_INO].mapping_reg,
798 		    0, 0);
799 		softsp->intr_regs[FHC_FANFAIL_INO].mapping_reg = NULL;
800 	}
801 	if (softsp->intr_regs[FHC_SYS_INO].mapping_reg) {
802 		ddi_unmap_regs(dip, 3,
803 		    (caddr_t *)&softsp->intr_regs[FHC_SYS_INO].mapping_reg,
804 		    0, 0);
805 		softsp->intr_regs[FHC_SYS_INO].mapping_reg = NULL;
806 	}
807 	if (softsp->intr_regs[FHC_UART_INO].mapping_reg) {
808 		ddi_unmap_regs(dip, 4,
809 		    (caddr_t *)&softsp->intr_regs[FHC_UART_INO].mapping_reg,
810 		    0, 0);
811 		softsp->intr_regs[FHC_UART_INO].mapping_reg = NULL;
812 	}
813 	if (softsp->intr_regs[FHC_TOD_INO].mapping_reg) {
814 		ddi_unmap_regs(dip, 5,
815 		    (caddr_t *)&softsp->intr_regs[FHC_TOD_INO].mapping_reg,
816 		    0, 0);
817 		softsp->intr_regs[FHC_TOD_INO].mapping_reg = NULL;
818 	}
819 }
820 
821 static int
822 fhc_init(struct fhc_soft_state *softsp)
823 {
824 	int i;
825 	uint_t tmp_reg;
826 	int board;
827 
828 	/*
829 	 * Map in the FHC registers. Specifying length and offset of
830 	 * zero maps in the entire OBP register set.
831 	 */
832 
833 	/* map in register set 0 */
834 	if (ddi_map_regs(softsp->dip, 0,
835 	    (caddr_t *)&softsp->id, 0, 0)) {
836 		cmn_err(CE_WARN, "fhc%d: unable to map internal "
837 		    "registers", ddi_get_instance(softsp->dip));
838 		goto bad;
839 	}
840 
841 	/*
842 	 * Fill in the virtual addresses of the registers in the
843 	 * fhc_soft_state structure.
844 	 */
845 	softsp->rctrl = (uint_t *)((char *)(softsp->id) +
846 	    FHC_OFF_RCTRL);
847 	softsp->ctrl = (uint_t *)((char *)(softsp->id) +
848 	    FHC_OFF_CTRL);
849 	softsp->bsr = (uint_t *)((char *)(softsp->id) +
850 	    FHC_OFF_BSR);
851 	softsp->jtag_ctrl = (uint_t *)((char *)(softsp->id) +
852 	    FHC_OFF_JTAG_CTRL);
853 	softsp->jt_master.jtag_cmd = (uint_t *)((char *)(softsp->id) +
854 	    FHC_OFF_JTAG_CMD);
855 
856 	/* map in register set 1 */
857 	if (ddi_map_regs(softsp->dip, 1,
858 	    (caddr_t *)&softsp->igr, 0, 0)) {
859 		cmn_err(CE_WARN, "fhc%d: unable to map IGR "
860 		    "register", ddi_get_instance(softsp->dip));
861 		goto bad;
862 	}
863 
864 	/*
865 	 * map in register set 2
866 	 * XXX this can never be used as an interrupt generator
867 	 * (hardware queue overflow in fhc)
868 	 */
869 	if (ddi_map_regs(softsp->dip, 2,
870 	    (caddr_t *)&softsp->intr_regs[FHC_FANFAIL_INO].mapping_reg,
871 	    0, 0)) {
872 		cmn_err(CE_WARN, "fhc%d: unable to map Fan Fail "
873 		    "IMR register", ddi_get_instance(softsp->dip));
874 		goto bad;
875 	}
876 
877 	/* map in register set 3 */
878 	if (ddi_map_regs(softsp->dip, 3,
879 	    (caddr_t *)&softsp->intr_regs[FHC_SYS_INO].mapping_reg,
880 	    0, 0)) {
881 		cmn_err(CE_WARN, "fhc%d: unable to map System "
882 		    "IMR register\n", ddi_get_instance(softsp->dip));
883 		goto bad;
884 	}
885 
886 	/* map in register set 4 */
887 	if (ddi_map_regs(softsp->dip, 4,
888 	    (caddr_t *)&softsp->intr_regs[FHC_UART_INO].mapping_reg,
889 	    0, 0)) {
890 		cmn_err(CE_WARN, "fhc%d: unable to map UART "
891 		    "IMR register\n", ddi_get_instance(softsp->dip));
892 		goto bad;
893 	}
894 
895 	/* map in register set 5 */
896 	if (ddi_map_regs(softsp->dip, 5,
897 	    (caddr_t *)&softsp->intr_regs[FHC_TOD_INO].mapping_reg,
898 	    0, 0)) {
899 		cmn_err(CE_WARN, "fhc%d: unable to map FHC TOD "
900 		    "IMR register", ddi_get_instance(softsp->dip));
901 		goto bad;
902 	}
903 
904 	/* Loop over all intr sets and setup the VAs for the ISMR */
905 	/* TODO - Make sure we are calculating the ISMR correctly. */
906 	for (i = 0; i < FHC_MAX_INO; i++) {
907 		softsp->intr_regs[i].clear_reg =
908 		    (uint_t *)((char *)(softsp->intr_regs[i].mapping_reg) +
909 		    FHC_OFF_ISMR);
910 
911 		/* Now clear the state machines to idle */
912 		*(softsp->intr_regs[i].clear_reg) = ISM_IDLE;
913 	}
914 
915 	/*
916 	 * It is OK to not have a OBP_BOARDNUM property. This happens for
917 	 * the board which is a child of central. However this FHC
918 	 * still needs a proper Interrupt Group Number programmed
919 	 * into the Interrupt Group register, because the other
920 	 * instance of FHC, which is not under central, will properly
921 	 * program the IGR. The numbers from the two settings of the
922 	 * IGR need to be the same. One driver cannot wait for the
923 	 * other to program the IGR, because there is no guarantee
924 	 * which instance of FHC will get attached first.
925 	 */
926 	if ((board = (int)ddi_getprop(DDI_DEV_T_ANY, softsp->dip,
927 	    DDI_PROP_DONTPASS, OBP_BOARDNUM, -1)) == -1) {
928 		/*
929 		 * Now determine the board number by reading the
930 		 * hardware register.
931 		 */
932 		board = FHC_BSR_TO_BD(*(softsp->bsr));
933 		softsp->is_central = 1;
934 	}
935 
936 	/*
937 	 * If this fhc holds JTAG master line, and is not the central fhc,
938 	 * (this avoids two JTAG master nodes) then initialize the
939 	 * mutex and set the flag in the structure.
940 	 */
941 	if ((*(softsp->jtag_ctrl) & JTAG_MASTER_EN) && !softsp->is_central) {
942 		mutex_init(&(softsp->jt_master.lock), NULL, MUTEX_DEFAULT,
943 		    NULL);
944 		softsp->jt_master.is_master = 1;
945 	} else {
946 		softsp->jt_master.is_master = 0;
947 	}
948 
949 	fhc_bd_init(softsp, board, fhc_board_type(softsp, board));
950 
951 	/* Initialize the mutex guarding the poll_list. */
952 	mutex_init(&softsp->poll_list_lock, NULL, MUTEX_DRIVER, NULL);
953 
954 	/* Initialize the mutex guarding the FHC CSR */
955 	mutex_init(&softsp->ctrl_lock, NULL, MUTEX_DRIVER, NULL);
956 
957 	/* Initialize the poll_list to be empty */
958 	for (i = 0; i < MAX_ZS_CNT; i++) {
959 		softsp->poll_list[i].funcp = NULL;
960 	}
961 
962 	/* Modify the various registers in the FHC now */
963 
964 	/*
965 	 * We know this board to be present now, record that state and
966 	 * remove the NOT_BRD_PRES condition
967 	 */
968 	if (!(softsp->is_central)) {
969 		mutex_enter(&softsp->ctrl_lock);
970 		*(softsp->ctrl) |= FHC_NOT_BRD_PRES;
971 		/* Now flush the hardware store buffers. */
972 		tmp_reg = *(softsp->ctrl);
973 #ifdef lint
974 		tmp_reg = tmp_reg;
975 #endif
976 		/* XXX record the board state in global space */
977 		mutex_exit(&softsp->ctrl_lock);
978 
979 		/* Add kstats for all non-central instances of the FHC. */
980 		fhc_add_kstats(softsp);
981 	}
982 
983 	/*
984 	 * Read the device tree to see if this system is in an environmental
985 	 * chamber.
986 	 */
987 	if (temperature_chamber == -1) {
988 		temperature_chamber = check_for_chamber();
989 	}
990 
991 	/* Check for inherited faults from the PROM. */
992 	if (*softsp->ctrl & FHC_LED_MID) {
993 		reg_fault(softsp->list->sc.board, FT_PROM, FT_BOARD);
994 	}
995 
996 	/*
997 	 * setup the IGR. Shift the board number over by one to get
998 	 * the UPA MID.
999 	 */
1000 	*(softsp->igr) = (softsp->list->sc.board) << 1;
1001 
1002 	/* Now flush the hardware store buffers. */
1003 	tmp_reg = *(softsp->id);
1004 #ifdef lint
1005 	tmp_reg = tmp_reg;
1006 #endif
1007 
1008 	/* Add the interrupt redistribution callback. */
1009 	intr_dist_add(fhc_intrdist, (void *)softsp->dip);
1010 
1011 	return (DDI_SUCCESS);
1012 bad:
1013 	fhc_unmap_regs(softsp);
1014 	return (DDI_FAILURE);
1015 }
1016 
1017 static uint_t
1018 fhc_intr_wrapper(caddr_t arg)
1019 {
1020 	uint_t intr_return;
1021 	uint_t tmpreg;
1022 	struct fhc_wrapper_arg *intr_info = (struct fhc_wrapper_arg *)arg;
1023 	uint_t (*funcp)(caddr_t, caddr_t) = intr_info->funcp;
1024 	caddr_t iarg1 = intr_info->arg1;
1025 	caddr_t iarg2 = intr_info->arg2;
1026 	dev_info_t *dip = intr_info->child;
1027 
1028 	tmpreg = ISM_IDLE;
1029 
1030 	DTRACE_PROBE4(interrupt__start, dev_info_t, dip,
1031 	    void *, funcp, caddr_t, iarg1, caddr_t, iarg2);
1032 
1033 	intr_return = (*funcp)(iarg1, iarg2);
1034 
1035 	DTRACE_PROBE4(interrupt__complete, dev_info_t, dip,
1036 	    void *, funcp, caddr_t, iarg1, int, intr_return);
1037 
1038 	/* Idle the state machine. */
1039 	*(intr_info->clear_reg) = tmpreg;
1040 
1041 	/* Flush the hardware store buffers. */
1042 	tmpreg = *(intr_info->clear_reg);
1043 #ifdef lint
1044 	tmpreg = tmpreg;
1045 #endif	/* lint */
1046 
1047 	return (intr_return);
1048 }
1049 
1050 /*
1051  * fhc_zs_intr_wrapper
1052  *
1053  * This function handles intrerrupts where more than one device may interupt
1054  * the fhc with the same mondo.
1055  */
1056 
1057 #define	MAX_INTR_CNT 10
1058 
1059 static uint_t
1060 fhc_zs_intr_wrapper(caddr_t arg)
1061 {
1062 	struct fhc_soft_state *softsp = (struct fhc_soft_state *)arg;
1063 	uint_t (*funcp0)(caddr_t, caddr_t);
1064 	uint_t (*funcp1)(caddr_t, caddr_t);
1065 	caddr_t funcp0_arg1, funcp0_arg2, funcp1_arg1, funcp1_arg2;
1066 	uint_t tmp_reg;
1067 	uint_t result = DDI_INTR_UNCLAIMED;
1068 	volatile uint_t *clear_reg;
1069 	uchar_t *spurious_cntr = &softsp->spurious_zs_cntr;
1070 
1071 	funcp0 = softsp->poll_list[0].funcp;
1072 	funcp1 = softsp->poll_list[1].funcp;
1073 	funcp0_arg1 = softsp->poll_list[0].arg1;
1074 	funcp0_arg2 = softsp->poll_list[0].arg2;
1075 	funcp1_arg1 = softsp->poll_list[1].arg1;
1076 	funcp1_arg2 = softsp->poll_list[1].arg2;
1077 	clear_reg = softsp->intr_regs[FHC_UART_INO].clear_reg;
1078 
1079 	if (funcp0 != NULL) {
1080 		if ((funcp0)(funcp0_arg1, funcp0_arg2) == DDI_INTR_CLAIMED) {
1081 			result = DDI_INTR_CLAIMED;
1082 		}
1083 	}
1084 
1085 	if (funcp1 != NULL) {
1086 		if ((funcp1)(funcp1_arg1, funcp1_arg2) == DDI_INTR_CLAIMED) {
1087 			result = DDI_INTR_CLAIMED;
1088 		}
1089 	}
1090 
1091 	if (result == DDI_INTR_UNCLAIMED) {
1092 		(*spurious_cntr)++;
1093 
1094 		if (*spurious_cntr < MAX_INTR_CNT) {
1095 			result = DDI_INTR_CLAIMED;
1096 		} else {
1097 			*spurious_cntr = (uchar_t)0;
1098 		}
1099 	} else {
1100 		*spurious_cntr = (uchar_t)0;
1101 	}
1102 
1103 	/* Idle the state machine. */
1104 	*(clear_reg) = ISM_IDLE;
1105 
1106 	/* flush the store buffers. */
1107 	tmp_reg = *(clear_reg);
1108 #ifdef lint
1109 	tmp_reg = tmp_reg;
1110 #endif
1111 
1112 	return (result);
1113 }
1114 
1115 
1116 /*
1117  * add_intrspec - Add an interrupt specification.
1118  */
1119 static int
1120 fhc_add_intr_impl(dev_info_t *dip, dev_info_t *rdip,
1121     ddi_intr_handle_impl_t *hdlp)
1122 {
1123 	int ino;
1124 	struct fhc_wrapper_arg *fhc_arg;
1125 	struct fhc_soft_state *softsp = (struct fhc_soft_state *)
1126 	    ddi_get_soft_state(fhcp, ddi_get_instance(dip));
1127 	volatile uint_t *mondo_vec_reg;
1128 	uint_t tmp_mondo_vec;
1129 	uint_t tmpreg; /* HW flush reg */
1130 	uint_t cpu_id;
1131 	int ret = DDI_SUCCESS;
1132 
1133 	/* Xlate the interrupt */
1134 	fhc_xlate_intrs(hdlp,
1135 	    (softsp->list->sc.board << BD_IVINTR_SHFT));
1136 
1137 	/* get the mondo number */
1138 	ino = FHC_INO(hdlp->ih_vector);
1139 	mondo_vec_reg = softsp->intr_regs[ino].mapping_reg;
1140 
1141 	ASSERT(ino < FHC_MAX_INO);
1142 
1143 	/* We don't use the two spare interrupts. */
1144 	if (ino >= FHC_MAX_INO) {
1145 		cmn_err(CE_WARN, "fhc%d: Spare interrupt %d not usable",
1146 		    ddi_get_instance(dip), ino);
1147 		return (DDI_FAILURE);
1148 	}
1149 
1150 	/* TOD and Fan Fail interrupts are not usable */
1151 	if (ino == FHC_TOD_INO) {
1152 		cmn_err(CE_WARN, "fhc%d: TOD interrupt not usable",
1153 		    ddi_get_instance(dip));
1154 		return (DDI_FAILURE);
1155 	}
1156 	if (ino == FHC_FANFAIL_INO) {
1157 		cmn_err(CE_WARN, "fhc%d: Fan fail interrupt not usable",
1158 		    ddi_get_instance(dip));
1159 		return (DDI_FAILURE);
1160 	}
1161 
1162 	/*
1163 	 * If the interrupt is for the zs chips, use the vector
1164 	 * polling lists. Otherwise use a straight handler.
1165 	 */
1166 	if (ino == FHC_UART_INO) {
1167 		int32_t zs_inst;
1168 		/* First lock the mutex for this poll_list */
1169 		mutex_enter(&softsp->poll_list_lock);
1170 
1171 		/*
1172 		 * Add this interrupt to the polling list.
1173 		 */
1174 
1175 		/* figure out where to add this item in the list */
1176 		for (zs_inst = 0; zs_inst < MAX_ZS_CNT; zs_inst++) {
1177 			if (softsp->poll_list[zs_inst].funcp == NULL) {
1178 				softsp->poll_list[zs_inst].arg1 =
1179 				    hdlp->ih_cb_arg1;
1180 				softsp->poll_list[zs_inst].arg2 =
1181 				    hdlp->ih_cb_arg2;
1182 				softsp->poll_list[zs_inst].funcp =
1183 				    (ddi_intr_handler_t *)
1184 				    hdlp->ih_cb_func;
1185 				softsp->poll_list[zs_inst].inum =
1186 				    hdlp->ih_inum;
1187 				softsp->poll_list[zs_inst].child = rdip;
1188 
1189 				break;
1190 			}
1191 		}
1192 
1193 		if (zs_inst >= MAX_ZS_CNT) {
1194 			cmn_err(CE_WARN,
1195 			    "fhc%d: poll list overflow",
1196 			    ddi_get_instance(dip));
1197 			mutex_exit(&softsp->poll_list_lock);
1198 			ret = DDI_FAILURE;
1199 			goto done;
1200 		}
1201 
1202 		/*
1203 		 * If polling list is empty, then install handler
1204 		 * and enable interrupts for this ino.
1205 		 */
1206 		if (zs_inst == 0) {
1207 			DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp,
1208 			    (ddi_intr_handler_t *)fhc_zs_intr_wrapper,
1209 			    (caddr_t)softsp, NULL);
1210 
1211 			ret = i_ddi_add_ivintr(hdlp);
1212 
1213 			DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp,
1214 			    softsp->poll_list[zs_inst].funcp,
1215 			    softsp->poll_list[zs_inst].arg1,
1216 			    softsp->poll_list[zs_inst].arg2);
1217 
1218 			if (ret != DDI_SUCCESS)
1219 				goto done;
1220 		}
1221 
1222 		/*
1223 		 * If both zs handlers are active, then this is the
1224 		 * second add_intrspec called, so do not enable
1225 		 * the IMR_VALID bit, it is already on.
1226 		 */
1227 		if (zs_inst > 0) {
1228 				/* now release the mutex and return */
1229 			mutex_exit(&softsp->poll_list_lock);
1230 
1231 			goto done;
1232 		} else {
1233 			/* just release the mutex */
1234 			mutex_exit(&softsp->poll_list_lock);
1235 		}
1236 	} else {	/* normal interrupt installation */
1237 		int32_t i;
1238 
1239 		/* Allocate a nexus interrupt data structure */
1240 		fhc_arg = kmem_alloc(sizeof (struct fhc_wrapper_arg), KM_SLEEP);
1241 		fhc_arg->child = rdip;
1242 		fhc_arg->mapping_reg = mondo_vec_reg;
1243 		fhc_arg->clear_reg = (softsp->intr_regs[ino].clear_reg);
1244 		fhc_arg->softsp = softsp;
1245 		fhc_arg->funcp =
1246 		    (ddi_intr_handler_t *)hdlp->ih_cb_func;
1247 		fhc_arg->arg1 = hdlp->ih_cb_arg1;
1248 		fhc_arg->arg2 = hdlp->ih_cb_arg2;
1249 		fhc_arg->inum = hdlp->ih_inum;
1250 
1251 		for (i = 0; i < FHC_MAX_INO; i++) {
1252 			if (softsp->intr_list[i] == 0) {
1253 				softsp->intr_list[i] = fhc_arg;
1254 				break;
1255 			}
1256 		}
1257 
1258 		/*
1259 		 * Save the fhc_arg in the ispec so we can use this info
1260 		 * later to uninstall this interrupt spec.
1261 		 */
1262 		DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp,
1263 		    (ddi_intr_handler_t *)fhc_intr_wrapper,
1264 		    (caddr_t)fhc_arg, NULL);
1265 
1266 		ret = i_ddi_add_ivintr(hdlp);
1267 
1268 		DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp, fhc_arg->funcp,
1269 		    fhc_arg->arg1, fhc_arg->arg2);
1270 
1271 		if (ret != DDI_SUCCESS)
1272 			goto done;
1273 	}
1274 
1275 	/*
1276 	 * Clear out a stale 'pending' or 'transmit' state in
1277 	 * this device's ISM that might have been left from a
1278 	 * previous session.
1279 	 *
1280 	 * Since all FHC interrupts are level interrupts, any
1281 	 * real interrupting condition will immediately transition
1282 	 * the ISM back to pending.
1283 	 */
1284 	*(softsp->intr_regs[ino].clear_reg) = ISM_IDLE;
1285 
1286 	/*
1287 	 * Program the mondo vector accordingly.  This MUST be the
1288 	 * last thing we do.  Once we program the ino, the device
1289 	 * may begin to interrupt.
1290 	 */
1291 	cpu_id = intr_dist_cpuid();
1292 
1293 	tmp_mondo_vec = cpu_id << INR_PID_SHIFT;
1294 
1295 	/* don't do this for fan because fan has a special control */
1296 	if (ino == FHC_FANFAIL_INO)
1297 		panic("fhc%d: enabling fanfail interrupt",
1298 		    ddi_get_instance(dip));
1299 	else
1300 		tmp_mondo_vec |= IMR_VALID;
1301 
1302 	DPRINTF(FHC_INTERRUPT_DEBUG,
1303 	    ("Mondo 0x%x mapping reg: 0x%p", hdlp->ih_vector,
1304 	    (void *)mondo_vec_reg));
1305 
1306 	/* Store it in the hardware reg. */
1307 	*mondo_vec_reg = tmp_mondo_vec;
1308 
1309 	/* Read a FHC register to flush store buffers */
1310 	tmpreg = *(softsp->id);
1311 #ifdef lint
1312 	tmpreg = tmpreg;
1313 #endif
1314 
1315 done:
1316 	return (ret);
1317 }
1318 
1319 /*
1320  * remove_intrspec - Remove an interrupt specification.
1321  */
1322 static void
1323 fhc_remove_intr_impl(dev_info_t *dip, dev_info_t *rdip,
1324 	ddi_intr_handle_impl_t *hdlp)
1325 {
1326 	volatile uint_t *mondo_vec_reg;
1327 	volatile uint_t tmpreg;
1328 	int i;
1329 	struct fhc_soft_state *softsp = (struct fhc_soft_state *)
1330 	    ddi_get_soft_state(fhcp, ddi_get_instance(dip));
1331 	int ino;
1332 
1333 	/* Xlate the interrupt */
1334 	fhc_xlate_intrs(hdlp,
1335 	    (softsp->list->sc.board << BD_IVINTR_SHFT));
1336 
1337 	/* get the mondo number */
1338 	ino = FHC_INO(hdlp->ih_vector);
1339 
1340 	if (ino == FHC_UART_INO) {
1341 		int intr_found = 0;
1342 
1343 		/* Lock the poll_list first */
1344 		mutex_enter(&softsp->poll_list_lock);
1345 
1346 		/*
1347 		 * Find which entry in the poll list belongs to this
1348 		 * intrspec.
1349 		 */
1350 		for (i = 0; i < MAX_ZS_CNT; i++) {
1351 			if (softsp->poll_list[i].child == rdip &&
1352 			    softsp->poll_list[i].inum == hdlp->ih_inum) {
1353 				softsp->poll_list[i].funcp = NULL;
1354 				intr_found++;
1355 			}
1356 		}
1357 
1358 		/* If we did not find an entry, then we have a problem */
1359 		if (!intr_found) {
1360 			cmn_err(CE_WARN, "fhc%d: Intrspec not found in"
1361 			    " poll list", ddi_get_instance(dip));
1362 			mutex_exit(&softsp->poll_list_lock);
1363 			goto done;
1364 		}
1365 
1366 		/*
1367 		 * If we have removed all active entries for the poll
1368 		 * list, then we have to disable interupts at this point.
1369 		 */
1370 		if ((softsp->poll_list[0].funcp == NULL) &&
1371 		    (softsp->poll_list[1].funcp == NULL)) {
1372 			mondo_vec_reg =
1373 			    softsp->intr_regs[FHC_UART_INO].mapping_reg;
1374 			*mondo_vec_reg &= ~IMR_VALID;
1375 
1376 			/* flush the hardware buffers */
1377 			tmpreg = *(softsp->ctrl);
1378 
1379 			/* Eliminate the particular handler from the system. */
1380 			i_ddi_rem_ivintr(hdlp);
1381 		}
1382 
1383 		mutex_exit(&softsp->poll_list_lock);
1384 	} else {
1385 		int32_t i;
1386 
1387 
1388 		for (i = 0; i < FHC_MAX_INO; i++)
1389 			if (softsp->intr_list[i]->child == rdip &&
1390 			    softsp->intr_list[i]->inum == hdlp->ih_inum)
1391 				break;
1392 
1393 		if (i >= FHC_MAX_INO)
1394 			goto done;
1395 
1396 		mondo_vec_reg = softsp->intr_list[i]->mapping_reg;
1397 
1398 		/* Turn off the valid bit in the mapping register. */
1399 		/* XXX what about FHC_FANFAIL owned imr? */
1400 		*mondo_vec_reg &= ~IMR_VALID;
1401 
1402 		/* flush the hardware store buffers */
1403 		tmpreg = *(softsp->id);
1404 #ifdef lint
1405 		tmpreg = tmpreg;
1406 #endif
1407 
1408 		/* Eliminate the particular handler from the system. */
1409 		i_ddi_rem_ivintr(hdlp);
1410 
1411 		kmem_free(softsp->intr_list[i],
1412 		    sizeof (struct fhc_wrapper_arg));
1413 		softsp->intr_list[i] = 0;
1414 	}
1415 
1416 done:
1417 	;
1418 }
1419 
1420 /* new intr_ops structure */
1421 static int
1422 fhc_intr_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t intr_op,
1423     ddi_intr_handle_impl_t *hdlp, void *result)
1424 {
1425 	int	ret = DDI_SUCCESS;
1426 
1427 	switch (intr_op) {
1428 	case DDI_INTROP_GETCAP:
1429 		*(int *)result = DDI_INTR_FLAG_LEVEL;
1430 		break;
1431 	case DDI_INTROP_ALLOC:
1432 		*(int *)result = hdlp->ih_scratch1;
1433 		break;
1434 	case DDI_INTROP_FREE:
1435 		break;
1436 	case DDI_INTROP_GETPRI:
1437 		if (hdlp->ih_pri == 0) {
1438 			struct fhc_soft_state *softsp =
1439 			    (struct fhc_soft_state *)ddi_get_soft_state(fhcp,
1440 			    ddi_get_instance(dip));
1441 
1442 			/* Xlate the interrupt */
1443 			fhc_xlate_intrs(hdlp,
1444 			    (softsp->list->sc.board << BD_IVINTR_SHFT));
1445 		}
1446 
1447 		*(int *)result = hdlp->ih_pri;
1448 		break;
1449 	case DDI_INTROP_SETPRI:
1450 		break;
1451 	case DDI_INTROP_ADDISR:
1452 		ret = fhc_add_intr_impl(dip, rdip, hdlp);
1453 		break;
1454 	case DDI_INTROP_REMISR:
1455 		fhc_remove_intr_impl(dip, rdip, hdlp);
1456 		break;
1457 	case DDI_INTROP_ENABLE:
1458 	case DDI_INTROP_DISABLE:
1459 		break;
1460 	case DDI_INTROP_NINTRS:
1461 	case DDI_INTROP_NAVAIL:
1462 		*(int *)result = i_ddi_get_intx_nintrs(rdip);
1463 		break;
1464 	case DDI_INTROP_SETCAP:
1465 	case DDI_INTROP_SETMASK:
1466 	case DDI_INTROP_CLRMASK:
1467 	case DDI_INTROP_GETPENDING:
1468 		ret = DDI_ENOTSUP;
1469 		break;
1470 	case DDI_INTROP_SUPPORTED_TYPES:
1471 		/* only support fixed interrupts */
1472 		*(int *)result = i_ddi_get_intx_nintrs(rdip) ?
1473 		    DDI_INTR_TYPE_FIXED : 0;
1474 		break;
1475 	default:
1476 		ret = i_ddi_intr_ops(dip, rdip, intr_op, hdlp, result);
1477 		break;
1478 	}
1479 
1480 	return (ret);
1481 }
1482 
1483 /*
1484  * FHC Control Ops routine
1485  *
1486  * Requests handled here:
1487  *	DDI_CTLOPS_INITCHILD	see impl_ddi_sunbus_initchild() for details
1488  *	DDI_CTLOPS_UNINITCHILD	see fhc_uninit_child() for details
1489  *	DDI_CTLOPS_REPORTDEV	TODO - need to implement this.
1490  */
1491 static int
1492 fhc_ctlops(dev_info_t *dip, dev_info_t *rdip,
1493 	ddi_ctl_enum_t op, void *arg, void *result)
1494 {
1495 
1496 	switch (op) {
1497 	case DDI_CTLOPS_INITCHILD:
1498 		DPRINTF(FHC_CTLOPS_DEBUG, ("DDI_CTLOPS_INITCHILD\n"));
1499 		return (impl_ddi_sunbus_initchild((dev_info_t *)arg));
1500 
1501 	case DDI_CTLOPS_UNINITCHILD:
1502 		impl_ddi_sunbus_removechild((dev_info_t *)arg);
1503 		return (DDI_SUCCESS);
1504 
1505 	case DDI_CTLOPS_REPORTDEV:
1506 		/*
1507 		 * TODO - Figure out what makes sense to report here.
1508 		 */
1509 		return (DDI_SUCCESS);
1510 
1511 	case DDI_CTLOPS_POKE:
1512 	case DDI_CTLOPS_PEEK:
1513 		return (fhc_ctlops_peekpoke(op, (peekpoke_ctlops_t *)arg,
1514 		    result));
1515 
1516 	default:
1517 		return (ddi_ctlops(dip, rdip, op, arg, result));
1518 	}
1519 }
1520 
1521 
1522 /*
1523  * We're prepared to claim that the interrupt string is in
1524  * the form of a list of <FHCintr> specifications, or we're dealing
1525  * with on-board devices and we have an interrupt_number property which
1526  * gives us our mondo number.
1527  * Translate the mondos into fhcintrspecs.
1528  */
1529 /* ARGSUSED */
1530 static void
1531 fhc_xlate_intrs(ddi_intr_handle_impl_t *hdlp, uint32_t ign)
1532 
1533 {
1534 	uint32_t mondo;
1535 
1536 	mondo = hdlp->ih_vector;
1537 
1538 	hdlp->ih_vector = (mondo | ign);
1539 	if (hdlp->ih_pri == 0)
1540 		hdlp->ih_pri = fhc_int_priorities[FHC_INO(mondo)];
1541 }
1542 
1543 static int
1544 fhc_ctlops_peekpoke(ddi_ctl_enum_t cmd, peekpoke_ctlops_t *in_args,
1545     void *result)
1546 {
1547 	int err = DDI_SUCCESS;
1548 	on_trap_data_t otd;
1549 
1550 	/* No safe access except for peek/poke is supported. */
1551 	if (in_args->handle != NULL)
1552 		return (DDI_FAILURE);
1553 
1554 	/* Set up protected environment. */
1555 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
1556 		uintptr_t tramp = otd.ot_trampoline;
1557 
1558 		if (cmd == DDI_CTLOPS_POKE) {
1559 			otd.ot_trampoline = (uintptr_t)&poke_fault;
1560 			err = do_poke(in_args->size, (void *)in_args->dev_addr,
1561 			    (void *)in_args->host_addr);
1562 		} else {
1563 			otd.ot_trampoline = (uintptr_t)&peek_fault;
1564 			err = do_peek(in_args->size, (void *)in_args->dev_addr,
1565 			    (void *)in_args->host_addr);
1566 			result = (void *)in_args->host_addr;
1567 		}
1568 		otd.ot_trampoline = tramp;
1569 	} else
1570 		err = DDI_FAILURE;
1571 
1572 	/* Take down protected environment. */
1573 	no_trap();
1574 
1575 	return (err);
1576 }
1577 
1578 /*
1579  * This function initializes the temperature arrays for use. All
1580  * temperatures are set in to invalid value to start.
1581  */
1582 void
1583 init_temp_arrays(struct temp_stats *envstat)
1584 {
1585 	int i;
1586 
1587 	envstat->index = 0;
1588 
1589 	for (i = 0; i < L1_SZ; i++) {
1590 		envstat->l1[i] = NA_TEMP;
1591 	}
1592 
1593 	for (i = 0; i < L2_SZ; i++) {
1594 		envstat->l2[i] = NA_TEMP;
1595 	}
1596 
1597 	for (i = 0; i < L3_SZ; i++) {
1598 		envstat->l3[i] = NA_TEMP;
1599 	}
1600 
1601 	for (i = 0; i < L4_SZ; i++) {
1602 		envstat->l4[i] = NA_TEMP;
1603 	}
1604 
1605 	for (i = 0; i < L5_SZ; i++) {
1606 		envstat->l5[i] = NA_TEMP;
1607 	}
1608 
1609 	envstat->max = NA_TEMP;
1610 	envstat->min = NA_TEMP;
1611 	envstat->trend = TREND_UNKNOWN;
1612 	envstat->version = TEMP_KSTAT_VERSION;
1613 	envstat->override = NA_TEMP;
1614 }
1615 
1616 /* Inhibit warning messages below this temperature, eg for CPU poweron. */
1617 static uint_t fhc_cpu_warning_temp_threshold = FHC_CPU_WARNING_TEMP_THRESHOLD;
1618 
1619 /*
1620  * This function manages the temperature history in the temperature
1621  * statistics buffer passed in. It calls the temperature calibration
1622  * routines and maintains the time averaged temperature data.
1623  */
1624 void
1625 update_temp(dev_info_t *pdip, struct temp_stats *envstat, uchar_t value)
1626 {
1627 	uint_t index;		    /* The absolute temperature counter */
1628 	uint_t tmp_index;	    /* temp index into upper level array */
1629 	int count;		    /* Count of non-zero values in array */
1630 	int total;		    /* sum total of non-zero values in array */
1631 	short real_temp;	    /* calibrated temperature */
1632 	int i;
1633 	struct fhc_soft_state *softsp;
1634 	char buffer[256];	    /* buffer for warning of overtemp */
1635 	enum temp_state temp_state; /* Temperature state */
1636 
1637 	/*
1638 	 * NOTE: This global counter is not protected since we're called
1639 	 * serially for each board.
1640 	 */
1641 	static int shutdown_msg = 0; /* Flag if shutdown warning issued */
1642 
1643 	/* determine soft state pointer of parent */
1644 	softsp = ddi_get_soft_state(fhcp, ddi_get_instance(pdip));
1645 
1646 	envstat->index++;
1647 	index = envstat->index;
1648 
1649 	/*
1650 	 * You need to update the level 5 intervals first, since
1651 	 * they are based on the data from the level 4 intervals,
1652 	 * and so on, down to the level 1 intervals.
1653 	 */
1654 
1655 	/* update the level 5 intervals if it is time */
1656 	if (((tmp_index = L5_INDEX(index)) > 0) && (L5_REM(index) == 0)) {
1657 		/* Generate the index within the level 5 array */
1658 		tmp_index -= 1;		/* decrement by 1 for indexing */
1659 		tmp_index = tmp_index % L5_SZ;
1660 
1661 		/* take an average of the level 4 array */
1662 		for (i = 0, count = 0, total = 0; i < L4_SZ; i++) {
1663 			/* Do not include zero values in average */
1664 			if (envstat->l4[i] != NA_TEMP) {
1665 				total += (int)envstat->l4[i];
1666 				count++;
1667 			}
1668 		}
1669 
1670 		/*
1671 		 * If there were any level 4 data points to average,
1672 		 * do so.
1673 		 */
1674 		if (count != 0) {
1675 			envstat->l5[tmp_index] = total/count;
1676 		} else {
1677 			envstat->l5[tmp_index] = NA_TEMP;
1678 		}
1679 	}
1680 
1681 	/* update the level 4 intervals if it is time */
1682 	if (((tmp_index = L4_INDEX(index)) > 0) && (L4_REM(index) == 0)) {
1683 		/* Generate the index within the level 4 array */
1684 		tmp_index -= 1;		/* decrement by 1 for indexing */
1685 		tmp_index = tmp_index % L4_SZ;
1686 
1687 		/* take an average of the level 3 array */
1688 		for (i = 0, count = 0, total = 0; i < L3_SZ; i++) {
1689 			/* Do not include zero values in average */
1690 			if (envstat->l3[i] != NA_TEMP) {
1691 				total += (int)envstat->l3[i];
1692 				count++;
1693 			}
1694 		}
1695 
1696 		/*
1697 		 * If there were any level 3 data points to average,
1698 		 * do so.
1699 		 */
1700 		if (count != 0) {
1701 			envstat->l4[tmp_index] = total/count;
1702 		} else {
1703 			envstat->l4[tmp_index] = NA_TEMP;
1704 		}
1705 	}
1706 
1707 	/* update the level 3 intervals if it is time */
1708 	if (((tmp_index = L3_INDEX(index)) > 0) && (L3_REM(index) == 0)) {
1709 		/* Generate the index within the level 3 array */
1710 		tmp_index -= 1;		/* decrement by 1 for indexing */
1711 		tmp_index = tmp_index % L3_SZ;
1712 
1713 		/* take an average of the level 2 array */
1714 		for (i = 0, count = 0, total = 0; i < L2_SZ; i++) {
1715 			/* Do not include zero values in average */
1716 			if (envstat->l2[i] != NA_TEMP) {
1717 				total += (int)envstat->l2[i];
1718 				count++;
1719 			}
1720 		}
1721 
1722 		/*
1723 		 * If there were any level 2 data points to average,
1724 		 * do so.
1725 		 */
1726 		if (count != 0) {
1727 			envstat->l3[tmp_index] = total/count;
1728 		} else {
1729 			envstat->l3[tmp_index] = NA_TEMP;
1730 		}
1731 	}
1732 
1733 	/* update the level 2 intervals if it is time */
1734 	if (((tmp_index = L2_INDEX(index)) > 0) && (L2_REM(index) == 0)) {
1735 		/* Generate the index within the level 2 array */
1736 		tmp_index -= 1;		/* decrement by 1 for indexing */
1737 		tmp_index = tmp_index % L2_SZ;
1738 
1739 		/* take an average of the level 1 array */
1740 		for (i = 0, count = 0, total = 0; i < L1_SZ; i++) {
1741 			/* Do not include zero values in average */
1742 			if (envstat->l1[i] != NA_TEMP) {
1743 				total += (int)envstat->l1[i];
1744 				count++;
1745 			}
1746 		}
1747 
1748 		/*
1749 		 * If there were any level 1 data points to average,
1750 		 * do so.
1751 		 */
1752 		if (count != 0) {
1753 			envstat->l2[tmp_index] = total/count;
1754 		} else {
1755 			envstat->l2[tmp_index] = NA_TEMP;
1756 		}
1757 	}
1758 
1759 	/* determine the current temperature in degrees Celcius */
1760 	if (envstat->override != NA_TEMP) {
1761 		/* use override temperature for this board */
1762 		real_temp = envstat->override;
1763 	} else {
1764 		/* Run the calibration function using this board type */
1765 		real_temp = calibrate_temp(softsp->list->sc.type, value,
1766 		    softsp->list->sc.ac_compid);
1767 	}
1768 
1769 	envstat->l1[index % L1_SZ] = real_temp;
1770 
1771 	/* check if the temperature state for this device needs to change */
1772 	temp_state = get_temp_state(softsp->list->sc.type, real_temp,
1773 	    softsp->list->sc.board);
1774 
1775 	/* has the state changed? Then get the board string ready */
1776 	if (temp_state != envstat->state) {
1777 		int board = softsp->list->sc.board;
1778 		enum board_type type = softsp->list->sc.type;
1779 
1780 		build_bd_display_str(buffer, type, board);
1781 
1782 		if (temp_state > envstat->state) {
1783 			if (envstat->state == TEMP_OK) {
1784 				if (type == CLOCK_BOARD) {
1785 					reg_fault(0, FT_OVERTEMP, FT_SYSTEM);
1786 				} else {
1787 					reg_fault(board, FT_OVERTEMP,
1788 					    FT_BOARD);
1789 				}
1790 			}
1791 
1792 			/* heating up, change state now */
1793 			envstat->temp_cnt = 0;
1794 			envstat->state = temp_state;
1795 
1796 			if (temp_state == TEMP_WARN) {
1797 				/* now warn the user of the problem */
1798 				cmn_err(CE_WARN,
1799 				    "%s is warm (temperature: %dC). "
1800 				    "Please check system cooling", buffer,
1801 				    real_temp);
1802 				fhc_bd_update(board, SYSC_EVT_BD_OVERTEMP);
1803 				if (temperature_chamber == -1)
1804 					temperature_chamber =
1805 					    check_for_chamber();
1806 			} else if (temp_state == TEMP_DANGER) {
1807 				cmn_err(CE_WARN,
1808 				    "%s is very hot (temperature: %dC)",
1809 				    buffer, real_temp);
1810 
1811 				envstat->shutdown_cnt = 1;
1812 				if (temperature_chamber == -1)
1813 					temperature_chamber =
1814 					    check_for_chamber();
1815 				if ((temperature_chamber == 0) &&
1816 				    enable_overtemp_powerdown) {
1817 					/*
1818 					 * NOTE: The "%d seconds" is not
1819 					 * necessarily accurate in the case
1820 					 * where we have multiple boards
1821 					 * overheating and subsequently cooling
1822 					 * down.
1823 					 */
1824 					if (shutdown_msg == 0) {
1825 						cmn_err(CE_WARN, "System "
1826 						    "shutdown scheduled "
1827 						    "in %d seconds due to "
1828 						    "over-temperature "
1829 						    "condition on %s",
1830 						    SHUTDOWN_TIMEOUT_SEC,
1831 						    buffer);
1832 					}
1833 					shutdown_msg++;
1834 				}
1835 			}
1836 
1837 			/*
1838 			 * If this is a cpu board, power them off.
1839 			 */
1840 			if (temperature_chamber == 0) {
1841 				mutex_enter(&cpu_lock);
1842 				(void) fhc_board_poweroffcpus(board, NULL,
1843 				    CPU_FORCED);
1844 				mutex_exit(&cpu_lock);
1845 			}
1846 		} else if (temp_state < envstat->state) {
1847 			/*
1848 			 * Avert the sigpower that would
1849 			 * otherwise be sent to init.
1850 			 */
1851 			envstat->shutdown_cnt = 0;
1852 
1853 			/* cooling down, use state counter */
1854 			if (envstat->temp_cnt == 0) {
1855 				envstat->temp_cnt = TEMP_STATE_COUNT;
1856 			} else if (--envstat->temp_cnt == 0) {
1857 				if (temp_state == TEMP_WARN) {
1858 					cmn_err(CE_NOTE,
1859 					    "%s is cooling "
1860 					    "(temperature: %dC)", buffer,
1861 					    real_temp);
1862 
1863 				} else if (temp_state == TEMP_OK) {
1864 					cmn_err(CE_NOTE,
1865 					    "%s has cooled down "
1866 					    "(temperature: %dC), system OK",
1867 					    buffer, real_temp);
1868 
1869 					if (type == CLOCK_BOARD) {
1870 						clear_fault(0, FT_OVERTEMP,
1871 						    FT_SYSTEM);
1872 					} else {
1873 						clear_fault(board, FT_OVERTEMP,
1874 						    FT_BOARD);
1875 					}
1876 				}
1877 
1878 				/*
1879 				 * If we just came out of TEMP_DANGER, and
1880 				 * a warning was issued about shutting down,
1881 				 * let the user know it's been cancelled
1882 				 */
1883 				if (envstat->state == TEMP_DANGER &&
1884 				    (temperature_chamber == 0) &&
1885 				    enable_overtemp_powerdown &&
1886 				    (powerdown_started == 0) &&
1887 				    (--shutdown_msg == 0)) {
1888 					cmn_err(CE_NOTE, "System "
1889 					    "shutdown due to over-"
1890 					    "temperature "
1891 					    "condition cancelled");
1892 				}
1893 				envstat->state = temp_state;
1894 
1895 				fhc_bd_update(board, SYSC_EVT_BD_TEMP_OK);
1896 			}
1897 		}
1898 	} else {
1899 		envstat->temp_cnt = 0;
1900 
1901 		if (temp_state == TEMP_DANGER) {
1902 			if (temperature_chamber == -1) {
1903 				temperature_chamber = check_for_chamber();
1904 			}
1905 
1906 			if ((envstat->shutdown_cnt++ >= SHUTDOWN_COUNT) &&
1907 			    (temperature_chamber == 0) &&
1908 			    enable_overtemp_powerdown &&
1909 			    (powerdown_started == 0)) {
1910 				powerdown_started = 1;
1911 
1912 				/* the system is still too hot */
1913 				build_bd_display_str(buffer,
1914 				    softsp->list->sc.type,
1915 				    softsp->list->sc.board);
1916 
1917 				cmn_err(CE_WARN, "%s still too hot "
1918 				    "(temperature: %dC)."
1919 				    " Overtemp shutdown started", buffer,
1920 				    real_temp);
1921 
1922 				fhc_reboot();
1923 			}
1924 		}
1925 	}
1926 
1927 	/* update the maximum and minimum temperatures if necessary */
1928 	if ((envstat->max == NA_TEMP) || (real_temp > envstat->max)) {
1929 		envstat->max = real_temp;
1930 	}
1931 
1932 	if ((envstat->min == NA_TEMP) || (real_temp < envstat->min)) {
1933 		envstat->min = real_temp;
1934 	}
1935 
1936 	/*
1937 	 * Update the temperature trend.  Currently, the temperature
1938 	 * trend algorithm is based on the level 2 stats.  So, we
1939 	 * only need to run every time the level 2 stats get updated.
1940 	 */
1941 	if (((tmp_index = L2_INDEX(index)) > 0) && (L2_REM(index) == 0))  {
1942 		enum board_type type = softsp->list->sc.type;
1943 
1944 		envstat->trend = temp_trend(envstat);
1945 
1946 		/* Issue a warning if the temperature is rising rapidly. */
1947 		/* For CPU boards, don't warn if CPUs just powered on. */
1948 		if (envstat->trend == TREND_RAPID_RISE &&
1949 		    (type != CPU_BOARD || real_temp >
1950 		    fhc_cpu_warning_temp_threshold))  {
1951 			int board = softsp->list->sc.board;
1952 
1953 			build_bd_display_str(buffer, type, board);
1954 			cmn_err(CE_WARN, "%s temperature is rising rapidly!  "
1955 			    "Current temperature is %dC", buffer,
1956 			    real_temp);
1957 		}
1958 	}
1959 }
1960 
1961 #define	PREV_L2_INDEX(x)    ((x) ? ((x) - 1) : (L2_SZ - 1))
1962 
1963 /*
1964  * This routine determines if the temp of the device passed in is heating
1965  * up, cooling down, or staying stable.
1966  */
1967 enum temp_trend
1968 temp_trend(struct temp_stats *tempstat)
1969 {
1970 	int		ii;
1971 	uint_t		curr_index;
1972 	int		curr_temp;
1973 	uint_t		prev_index;
1974 	int		prev_temp;
1975 	int		trail_temp;
1976 	int		delta;
1977 	int		read_cnt;
1978 	enum temp_trend	result = TREND_STABLE;
1979 
1980 	if (tempstat == NULL)
1981 		return (TREND_UNKNOWN);
1982 
1983 	curr_index = (L2_INDEX(tempstat->index) - 1) % L2_SZ;
1984 	curr_temp = tempstat->l2[curr_index];
1985 
1986 	/* Count how many temperature readings are available */
1987 	prev_index = curr_index;
1988 	for (read_cnt = 0; read_cnt < L2_SZ - 1; read_cnt++) {
1989 		if (tempstat->l2[prev_index] == NA_TEMP)
1990 			break;
1991 		prev_index = PREV_L2_INDEX(prev_index);
1992 	}
1993 
1994 	switch (read_cnt) {
1995 	case 0:
1996 	case 1:
1997 		result = TREND_UNKNOWN;
1998 		break;
1999 
2000 	default:
2001 		delta = curr_temp - tempstat->l2[PREV_L2_INDEX(curr_index)];
2002 		prev_index = curr_index;
2003 		trail_temp = prev_temp = curr_temp;
2004 		if (delta >= RAPID_RISE_THRESH) {	    /* rapid rise? */
2005 			result = TREND_RAPID_RISE;
2006 		} else if (delta > 0) {			    /* rise? */
2007 			for (ii = 1; ii < read_cnt; ii++) {
2008 				prev_index = PREV_L2_INDEX(prev_index);
2009 				prev_temp = tempstat->l2[prev_index];
2010 				if (prev_temp > trail_temp) {
2011 					break;
2012 				}
2013 				trail_temp = prev_temp;
2014 				if (prev_temp <= curr_temp - NOISE_THRESH) {
2015 					result = TREND_RISE;
2016 					break;
2017 				}
2018 			}
2019 		} else if (delta <= -RAPID_FALL_THRESH) {   /* rapid fall? */
2020 			result = TREND_RAPID_FALL;
2021 		} else if (delta < 0) {			    /* fall? */
2022 			for (ii = 1; ii < read_cnt; ii++) {
2023 				prev_index = PREV_L2_INDEX(prev_index);
2024 				prev_temp = tempstat->l2[prev_index];
2025 				if (prev_temp < trail_temp) {
2026 					break;
2027 				}
2028 				trail_temp = prev_temp;
2029 				if (prev_temp >= curr_temp + NOISE_THRESH) {
2030 					result = TREND_FALL;
2031 					break;
2032 				}
2033 			}
2034 		}
2035 	}
2036 	return (result);
2037 }
2038 
2039 /*
2040  * Reboot the system if we can, otherwise attempt a power down
2041  */
2042 void
2043 fhc_reboot(void)
2044 {
2045 	proc_t *initpp;
2046 
2047 	/* send a SIGPWR to init process */
2048 	mutex_enter(&pidlock);
2049 	initpp = prfind(P_INITPID);
2050 	mutex_exit(&pidlock);
2051 
2052 	/*
2053 	 * If we're still booting and init(1) isn't
2054 	 * set up yet, simply halt.
2055 	 */
2056 	if (initpp != NULL) {
2057 		psignal(initpp, SIGFPE);	/* init 6 */
2058 	} else {
2059 		power_down("Environmental Shutdown");
2060 		halt("Power off the System");
2061 	}
2062 }
2063 
2064 int
2065 overtemp_kstat_update(kstat_t *ksp, int rw)
2066 {
2067 	struct temp_stats *tempstat;
2068 	char *kstatp;
2069 	int i;
2070 
2071 	kstatp = (char *)ksp->ks_data;
2072 	tempstat = (struct temp_stats *)ksp->ks_private;
2073 
2074 	/*
2075 	 * Kstat reads are used to retrieve the current system temperature
2076 	 * history. Kstat writes are used to reset the max and min
2077 	 * temperatures.
2078 	 */
2079 	if (rw == KSTAT_WRITE) {
2080 		short max;	/* temporary copy of max temperature */
2081 		short min;	/* temporary copy of min temperature */
2082 
2083 		/*
2084 		 * search for and reset the max and min to the current
2085 		 * array contents. Old max and min values will get
2086 		 * averaged out as they move into the higher level arrays.
2087 		 */
2088 		max = tempstat->l1[0];
2089 		min = tempstat->l1[0];
2090 
2091 		/* Pull the max and min from Level 1 array */
2092 		for (i = 0; i < L1_SZ; i++) {
2093 			if ((tempstat->l1[i] != NA_TEMP) &&
2094 			    (tempstat->l1[i] > max)) {
2095 				max = tempstat->l1[i];
2096 			}
2097 
2098 			if ((tempstat->l1[i] != NA_TEMP) &&
2099 			    (tempstat->l1[i] < min)) {
2100 				min = tempstat->l1[i];
2101 			}
2102 		}
2103 
2104 		/* Pull the max and min from Level 2 array */
2105 		for (i = 0; i < L2_SZ; i++) {
2106 			if ((tempstat->l2[i] != NA_TEMP) &&
2107 			    (tempstat->l2[i] > max)) {
2108 				max = tempstat->l2[i];
2109 			}
2110 
2111 			if ((tempstat->l2[i] != NA_TEMP) &&
2112 			    (tempstat->l2[i] < min)) {
2113 				min = tempstat->l2[i];
2114 			}
2115 		}
2116 
2117 		/* Pull the max and min from Level 3 array */
2118 		for (i = 0; i < L3_SZ; i++) {
2119 			if ((tempstat->l3[i] != NA_TEMP) &&
2120 			    (tempstat->l3[i] > max)) {
2121 				max = tempstat->l3[i];
2122 			}
2123 
2124 			if ((tempstat->l3[i] != NA_TEMP) &&
2125 			    (tempstat->l3[i] < min)) {
2126 				min = tempstat->l3[i];
2127 			}
2128 		}
2129 
2130 		/* Pull the max and min from Level 4 array */
2131 		for (i = 0; i < L4_SZ; i++) {
2132 			if ((tempstat->l4[i] != NA_TEMP) &&
2133 			    (tempstat->l4[i] > max)) {
2134 				max = tempstat->l4[i];
2135 			}
2136 
2137 			if ((tempstat->l4[i] != NA_TEMP) &&
2138 			    (tempstat->l4[i] < min)) {
2139 				min = tempstat->l4[i];
2140 			}
2141 		}
2142 
2143 		/* Pull the max and min from Level 5 array */
2144 		for (i = 0; i < L5_SZ; i++) {
2145 			if ((tempstat->l5[i] != NA_TEMP) &&
2146 			    (tempstat->l5[i] > max)) {
2147 				max = tempstat->l5[i];
2148 			}
2149 
2150 			if ((tempstat->l5[i] != NA_TEMP) &&
2151 			    (tempstat->l5[i] < min)) {
2152 				min = tempstat->l5[i];
2153 			}
2154 		}
2155 	} else {
2156 		/*
2157 		 * copy the temperature history buffer into the
2158 		 * kstat structure.
2159 		 */
2160 		bcopy(tempstat, kstatp, sizeof (struct temp_stats));
2161 	}
2162 	return (0);
2163 }
2164 
2165 int
2166 temp_override_kstat_update(kstat_t *ksp, int rw)
2167 {
2168 	short *over;
2169 	short *kstatp;
2170 
2171 	kstatp = (short *)ksp->ks_data;
2172 	over = (short *)ksp->ks_private;
2173 
2174 	/*
2175 	 * Kstat reads are used to get the temperature override setting.
2176 	 * Kstat writes are used to set the temperature override setting.
2177 	 */
2178 	if (rw == KSTAT_WRITE) {
2179 		*over = *kstatp;
2180 	} else {
2181 		*kstatp = *over;
2182 	}
2183 	return (0);
2184 }
2185 
2186 /*
2187  * This function uses the calibration tables at the beginning of this file
2188  * to lookup the actual temperature of the thermistor in degrees Celcius.
2189  * If the measurement is out of the bounds of the acceptable values, the
2190  * closest boundary value is used instead.
2191  */
2192 static short
2193 calibrate_temp(enum board_type type, uchar_t temp, uint_t ac_comp)
2194 {
2195 	short result = NA_TEMP;
2196 
2197 	if (dont_calibrate == 1) {
2198 		return ((short)temp);
2199 	}
2200 
2201 	switch (type) {
2202 	case CPU_BOARD:
2203 		/*
2204 		 * If AC chip revision is >= 4 or if it is unitialized,
2205 		 * then use the new calibration tables.
2206 		 */
2207 		if ((CHIP_REV(ac_comp) >= 4) || (CHIP_REV(ac_comp) == 0)) {
2208 			if (temp >= CPU2_MX_CNT) {
2209 				result = cpu2_table[CPU2_MX_CNT-1];
2210 			} else {
2211 				result = cpu2_table[temp];
2212 			}
2213 		} else {
2214 			if (temp >= CPU_MX_CNT) {
2215 				result = cpu_table[CPU_MX_CNT-1];
2216 			} else {
2217 				result = cpu_table[temp];
2218 			}
2219 		}
2220 		break;
2221 
2222 	case IO_2SBUS_BOARD:
2223 	case IO_SBUS_FFB_BOARD:
2224 	case IO_PCI_BOARD:
2225 	case IO_2SBUS_SOCPLUS_BOARD:
2226 	case IO_SBUS_FFB_SOCPLUS_BOARD:
2227 		if (temp < IO_MN_CNT) {
2228 			result = io_table[IO_MN_CNT];
2229 		} else if (temp >= IO_MX_CNT) {
2230 			result = io_table[IO_MX_CNT-1];
2231 		} else {
2232 			result = io_table[temp];
2233 		}
2234 		break;
2235 
2236 	case CLOCK_BOARD:
2237 		if (temp < CLK_MN_CNT) {
2238 			result = clock_table[CLK_MN_CNT];
2239 		} else if (temp >= CLK_MX_CNT) {
2240 			result = clock_table[CLK_MX_CNT-1];
2241 		} else {
2242 			result = clock_table[temp];
2243 		}
2244 		break;
2245 
2246 	default:
2247 		break;
2248 	}
2249 
2250 	return (result);
2251 }
2252 
2253 /*
2254  * Determine the temperature state of this board based on its type and
2255  * the actual temperature in degrees Celcius.
2256  */
2257 static enum temp_state
2258 get_temp_state(enum board_type type, short temp, int board)
2259 {
2260 	enum temp_state state = TEMP_OK;
2261 	short warn_limit;
2262 	short danger_limit;
2263 	struct cpu *cpa, *cpb;
2264 
2265 	switch (type) {
2266 	case CPU_BOARD:
2267 		warn_limit = cpu_warn_temp;
2268 		danger_limit = cpu_danger_temp;
2269 
2270 		/*
2271 		 * For CPU boards with frequency >= 400 MHZ,
2272 		 * temperature zones are different.
2273 		 */
2274 
2275 		mutex_enter(&cpu_lock);
2276 
2277 		if ((cpa = cpu_get(FHC_BOARD2CPU_A(board))) != NULL) {
2278 			if ((cpa->cpu_type_info.pi_clock) >= 400) {
2279 				warn_limit = cpu_warn_temp_4x;
2280 				danger_limit = cpu_danger_temp_4x;
2281 			}
2282 		}
2283 		if ((cpb = cpu_get(FHC_BOARD2CPU_B(board))) != NULL) {
2284 			if ((cpb->cpu_type_info.pi_clock) >= 400) {
2285 				warn_limit = cpu_warn_temp_4x;
2286 				danger_limit = cpu_danger_temp_4x;
2287 			}
2288 		}
2289 
2290 		mutex_exit(&cpu_lock);
2291 
2292 		break;
2293 
2294 	case IO_2SBUS_BOARD:
2295 	case IO_SBUS_FFB_BOARD:
2296 	case IO_PCI_BOARD:
2297 	case IO_2SBUS_SOCPLUS_BOARD:
2298 	case IO_SBUS_FFB_SOCPLUS_BOARD:
2299 		warn_limit = io_warn_temp;
2300 		danger_limit = io_danger_temp;
2301 		break;
2302 
2303 	case CLOCK_BOARD:
2304 		warn_limit = clk_warn_temp;
2305 		danger_limit = clk_danger_temp;
2306 		break;
2307 
2308 	case UNINIT_BOARD:
2309 	case UNKNOWN_BOARD:
2310 	case MEM_BOARD:
2311 	default:
2312 		warn_limit = dft_warn_temp;
2313 		danger_limit = dft_danger_temp;
2314 		break;
2315 	}
2316 
2317 	if (temp >= danger_limit) {
2318 		state = TEMP_DANGER;
2319 	} else if (temp >= warn_limit) {
2320 		state = TEMP_WARN;
2321 	}
2322 
2323 	return (state);
2324 }
2325 
2326 static void
2327 fhc_add_kstats(struct fhc_soft_state *softsp)
2328 {
2329 	struct kstat *fhc_ksp;
2330 	struct fhc_kstat *fhc_named_ksp;
2331 
2332 	if ((fhc_ksp = kstat_create("unix", softsp->list->sc.board,
2333 	    FHC_KSTAT_NAME, "misc", KSTAT_TYPE_NAMED,
2334 	    sizeof (struct fhc_kstat) / sizeof (kstat_named_t),
2335 	    KSTAT_FLAG_PERSISTENT)) == NULL) {
2336 		cmn_err(CE_WARN, "fhc%d kstat_create failed",
2337 		    ddi_get_instance(softsp->dip));
2338 		return;
2339 	}
2340 
2341 	fhc_named_ksp = (struct fhc_kstat *)(fhc_ksp->ks_data);
2342 
2343 	/* initialize the named kstats */
2344 	kstat_named_init(&fhc_named_ksp->csr,
2345 	    CSR_KSTAT_NAMED,
2346 	    KSTAT_DATA_UINT32);
2347 
2348 	kstat_named_init(&fhc_named_ksp->bsr,
2349 	    BSR_KSTAT_NAMED,
2350 	    KSTAT_DATA_UINT32);
2351 
2352 	fhc_ksp->ks_update = fhc_kstat_update;
2353 	fhc_ksp->ks_private = (void *)softsp;
2354 	softsp->fhc_ksp = fhc_ksp;
2355 	kstat_install(fhc_ksp);
2356 }
2357 
2358 static int
2359 fhc_kstat_update(kstat_t *ksp, int rw)
2360 {
2361 	struct fhc_kstat *fhcksp;
2362 	struct fhc_soft_state *softsp;
2363 
2364 	fhcksp = (struct fhc_kstat *)ksp->ks_data;
2365 	softsp = (struct fhc_soft_state *)ksp->ks_private;
2366 
2367 	/* this is a read-only kstat. Bail out on a write */
2368 	if (rw == KSTAT_WRITE) {
2369 		return (EACCES);
2370 	} else {
2371 		/*
2372 		 * copy the current state of the hardware into the
2373 		 * kstat structure.
2374 		 */
2375 		fhcksp->csr.value.ui32 = *softsp->ctrl;
2376 		fhcksp->bsr.value.ui32 = *softsp->bsr;
2377 	}
2378 	return (0);
2379 }
2380 
2381 static int
2382 cpu_on_board(int board)
2383 {
2384 	int upa_a = board << 1;
2385 	int upa_b = (board << 1) + 1;
2386 
2387 	if ((cpunodes[upa_a].nodeid != NULL) ||
2388 	    (cpunodes[upa_b].nodeid != NULL)) {
2389 		return (1);
2390 	} else {
2391 		return (0);
2392 	}
2393 }
2394 
2395 /*
2396  * This function uses the board list and toggles the OS green board
2397  * LED. The mask input tells which bit fields are being modified,
2398  * and the value input tells the states of the bits.
2399  */
2400 void
2401 update_board_leds(fhc_bd_t *board, uint_t mask, uint_t value)
2402 {
2403 	volatile uint_t temp;
2404 
2405 	ASSERT(fhc_bdlist_locked());
2406 
2407 	/* mask off mask and value for only the LED bits */
2408 	mask &= (FHC_LED_LEFT|FHC_LED_MID|FHC_LED_RIGHT);
2409 	value &= (FHC_LED_LEFT|FHC_LED_MID|FHC_LED_RIGHT);
2410 
2411 	if (board != NULL) {
2412 		mutex_enter(&board->softsp->ctrl_lock);
2413 
2414 		/* read the current register state */
2415 		temp = *board->softsp->ctrl;
2416 
2417 		/*
2418 		 * The EPDA bits are special since the register is
2419 		 * special.  We don't want to set them, since setting
2420 		 * the bits on a shutdown cpu keeps the cpu permanently
2421 		 * powered off.  Also, the CSR_SYNC bit must always be
2422 		 * set to 0 as it is an OBP semaphore that is expected to
2423 		 * be clear for cpu restart.
2424 		 */
2425 		temp &= ~(FHC_CSR_SYNC | FHC_EPDA_OFF | FHC_EPDB_OFF);
2426 
2427 		/* mask off the bits to change */
2428 		temp &= ~mask;
2429 
2430 		/* or in the new values of the bits. */
2431 		temp |= value;
2432 
2433 		/* update the register */
2434 		*board->softsp->ctrl = temp;
2435 
2436 		/* flush the hardware registers */
2437 		temp = *board->softsp->ctrl;
2438 #ifdef lint
2439 		temp = temp;
2440 #endif
2441 
2442 		mutex_exit(&board->softsp->ctrl_lock);
2443 	}
2444 }
2445 
2446 static int
2447 check_for_chamber(void)
2448 {
2449 	int chamber = 0;
2450 	dev_info_t *options_dip;
2451 	pnode_t options_node_id;
2452 	int mfgmode_len;
2453 	int retval;
2454 	char *mfgmode;
2455 
2456 	/*
2457 	 * The operator can disable overtemp powerdown from /etc/system or
2458 	 * boot -h.
2459 	 */
2460 	if (!enable_overtemp_powerdown) {
2461 		cmn_err(CE_WARN, "Operator has disabled overtemp powerdown");
2462 		return (1);
2463 	}
2464 
2465 	/*
2466 	 * An OBP option, 'mfg-mode' is being used to inform us as to
2467 	 * whether we are in an enviromental chamber. It exists in
2468 	 * the 'options' node. This is where all OBP 'setenv' (eeprom)
2469 	 * parameters live.
2470 	 */
2471 	if ((options_dip = ddi_find_devinfo("options", -1, 0)) != NULL) {
2472 		options_node_id = (pnode_t)ddi_get_nodeid(options_dip);
2473 		mfgmode_len = prom_getproplen(options_node_id, "mfg-mode");
2474 		if (mfgmode_len == -1) {
2475 			return (chamber);
2476 		}
2477 		mfgmode = kmem_alloc(mfgmode_len+1, KM_SLEEP);
2478 
2479 		retval = prom_getprop(options_node_id, "mfg-mode", mfgmode);
2480 		if (retval != -1) {
2481 			mfgmode[retval] = 0;
2482 			if (strcmp(mfgmode, CHAMBER_VALUE) == 0) {
2483 				chamber = 1;
2484 				cmn_err(CE_WARN, "System in Temperature"
2485 				    " Chamber Mode. Overtemperature"
2486 				    " Shutdown disabled");
2487 			}
2488 		}
2489 		kmem_free(mfgmode, mfgmode_len+1);
2490 	}
2491 	return (chamber);
2492 }
2493 
2494 static void
2495 build_bd_display_str(char *buffer, enum board_type type, int board)
2496 {
2497 	if (buffer == NULL) {
2498 		return;
2499 	}
2500 
2501 	/* fill in board type to display */
2502 	switch (type) {
2503 	case UNINIT_BOARD:
2504 		(void) sprintf(buffer, "Uninitialized Board type board %d",
2505 		    board);
2506 		break;
2507 
2508 	case UNKNOWN_BOARD:
2509 		(void) sprintf(buffer, "Unknown Board type board %d", board);
2510 		break;
2511 
2512 	case CPU_BOARD:
2513 	case MEM_BOARD:
2514 		(void) sprintf(buffer, "CPU/Memory board %d", board);
2515 		break;
2516 
2517 	case IO_2SBUS_BOARD:
2518 		(void) sprintf(buffer, "2 SBus IO board %d", board);
2519 		break;
2520 
2521 	case IO_SBUS_FFB_BOARD:
2522 		(void) sprintf(buffer, "SBus FFB IO board %d", board);
2523 		break;
2524 
2525 	case IO_PCI_BOARD:
2526 		(void) sprintf(buffer, "PCI IO board %d", board);
2527 		break;
2528 
2529 	case CLOCK_BOARD:
2530 		(void) sprintf(buffer, "Clock board");
2531 		break;
2532 
2533 	case IO_2SBUS_SOCPLUS_BOARD:
2534 		(void) sprintf(buffer, "2 SBus SOC+ IO board %d", board);
2535 		break;
2536 
2537 	case IO_SBUS_FFB_SOCPLUS_BOARD:
2538 		(void) sprintf(buffer, "SBus FFB SOC+ IO board %d", board);
2539 		break;
2540 
2541 	default:
2542 		(void) sprintf(buffer, "Unrecognized board type board %d",
2543 		    board);
2544 		break;
2545 	}
2546 }
2547 
2548 void
2549 fhc_intrdist(void *arg)
2550 {
2551 	struct fhc_soft_state *softsp;
2552 	dev_info_t *dip = (dev_info_t *)arg;
2553 	volatile uint_t *mondo_vec_reg;
2554 	volatile uint_t *intr_state_reg;
2555 	uint_t mondo_vec;
2556 	uint_t tmp_reg;
2557 	uint_t cpu_id;
2558 	uint_t i;
2559 
2560 	/* extract the soft state pointer */
2561 	softsp = ddi_get_soft_state(fhcp, ddi_get_instance(dip));
2562 
2563 	/*
2564 	 * Loop through all the interrupt mapping registers and reprogram
2565 	 * the target CPU for all valid registers.
2566 	 */
2567 	for (i = 0; i < FHC_MAX_INO; i++) {
2568 		mondo_vec_reg = softsp->intr_regs[i].mapping_reg;
2569 		intr_state_reg = softsp->intr_regs[i].clear_reg;
2570 
2571 		if ((*mondo_vec_reg & IMR_VALID) == 0)
2572 			continue;
2573 
2574 		cpu_id = intr_dist_cpuid();
2575 
2576 		/* Check the current target of the mondo */
2577 		if (((*mondo_vec_reg & INR_PID_MASK) >> INR_PID_SHIFT) ==
2578 		    cpu_id) {
2579 			/* It is the same, don't reprogram */
2580 			return;
2581 		}
2582 
2583 		/* So it's OK to reprogram the CPU target */
2584 
2585 		/* turn off the valid bit */
2586 		*mondo_vec_reg &= ~IMR_VALID;
2587 
2588 		/* flush the hardware registers */
2589 		tmp_reg = *softsp->id;
2590 
2591 		/*
2592 		 * wait for the state machine to idle. Do not loop on panic, so
2593 		 * that system does not hang.
2594 		 */
2595 		while (((*intr_state_reg & INT_PENDING) == INT_PENDING) &&
2596 		    !panicstr)
2597 			;
2598 
2599 		/* re-target the mondo and turn it on */
2600 		mondo_vec = (cpu_id << INR_PID_SHIFT) | IMR_VALID;
2601 
2602 		/* write it back to the hardware. */
2603 		*mondo_vec_reg = mondo_vec;
2604 
2605 		/* flush the hardware buffers. */
2606 		tmp_reg = *(softsp->id);
2607 
2608 #ifdef	lint
2609 		tmp_reg = tmp_reg;
2610 #endif	/* lint */
2611 	}
2612 }
2613 
2614 /*
2615  * reg_fault
2616  *
2617  * This routine registers a fault in the fault list. If the fault
2618  * is unique (does not exist in fault list) then a new fault is
2619  * added to the fault list, with the appropriate structure elements
2620  * filled in.
2621  */
2622 void
2623 reg_fault(int unit, enum ft_type type, enum ft_class fclass)
2624 {
2625 	struct ft_link_list *list;	/* temporary list pointer */
2626 
2627 	if (type >= ft_max_index) {
2628 		cmn_err(CE_WARN, "Illegal Fault type %x", type);
2629 		return;
2630 	}
2631 
2632 	mutex_enter(&ftlist_mutex);
2633 
2634 	/* Search for the requested fault. If it already exists, return. */
2635 	for (list = ft_list; list != NULL; list = list->next) {
2636 		if ((list->f.unit == unit) && (list->f.type == type) &&
2637 		    (list->f.fclass == fclass)) {
2638 			mutex_exit(&ftlist_mutex);
2639 			return;
2640 		}
2641 	}
2642 
2643 	/* Allocate a new fault structure. */
2644 	list = kmem_zalloc(sizeof (struct ft_link_list), KM_SLEEP);
2645 
2646 	/* fill in the fault list elements */
2647 	list->f.unit = unit;
2648 	list->f.type = type;
2649 	list->f.fclass = fclass;
2650 	list->f.create_time = (time32_t)gethrestime_sec(); /* XX64 */
2651 	(void) strncpy(list->f.msg, ft_str_table[type], MAX_FT_DESC);
2652 
2653 	/* link it into the list. */
2654 	list->next = ft_list;
2655 	ft_list = list;
2656 
2657 	/* Update the total fault count */
2658 	ft_nfaults++;
2659 
2660 	mutex_exit(&ftlist_mutex);
2661 }
2662 
2663 /*
2664  * clear_fault
2665  *
2666  * This routine finds the fault list entry specified by the caller,
2667  * deletes it from the fault list, and frees up the memory used for
2668  * the entry. If the requested fault is not found, it exits silently.
2669  */
2670 void
2671 clear_fault(int unit, enum ft_type type, enum ft_class fclass)
2672 {
2673 	struct ft_link_list *list;		/* temporary list pointer */
2674 	struct ft_link_list **vect;
2675 
2676 	mutex_enter(&ftlist_mutex);
2677 
2678 	list = ft_list;
2679 	vect = &ft_list;
2680 
2681 	/*
2682 	 * Search for the requested fault. If it exists, delete it
2683 	 * and relink the fault list.
2684 	 */
2685 	for (; list != NULL; vect = &list->next, list = list->next) {
2686 		if ((list->f.unit == unit) && (list->f.type == type) &&
2687 		    (list->f.fclass == fclass)) {
2688 			/* remove the item from the list */
2689 			*vect = list->next;
2690 
2691 			/* free the memory allocated */
2692 			kmem_free(list, sizeof (struct ft_link_list));
2693 
2694 			/* Update the total fault count */
2695 			ft_nfaults--;
2696 			break;
2697 		}
2698 	}
2699 	mutex_exit(&ftlist_mutex);
2700 }
2701 
2702 /*
2703  * process_fault_list
2704  *
2705  * This routine walks the global fault list and updates the board list
2706  * with the current status of each Yellow LED. If any faults are found
2707  * in the system, then a non-zero value is returned. Else zero is returned.
2708  */
2709 int
2710 process_fault_list(void)
2711 {
2712 	int fault = 0;
2713 	struct ft_link_list *ftlist;	/* fault list pointer */
2714 	fhc_bd_t *bdlist;		/* board list pointer */
2715 
2716 	/*
2717 	 * Note on locking. The bdlist mutex is always acquired and
2718 	 * held around the ftlist mutex when both are needed for an
2719 	 * operation. This is to avoid deadlock.
2720 	 */
2721 
2722 	/* First lock the board list */
2723 	(void) fhc_bdlist_lock(-1);
2724 
2725 	/* Grab the fault list lock first */
2726 	mutex_enter(&ftlist_mutex);
2727 
2728 	/* clear the board list of all faults first */
2729 	for (bdlist = fhc_bd_first(); bdlist; bdlist = fhc_bd_next(bdlist))
2730 		bdlist->fault = 0;
2731 
2732 	/* walk the fault list here */
2733 	for (ftlist = ft_list; ftlist != NULL; ftlist = ftlist->next) {
2734 		fault++;
2735 
2736 		/*
2737 		 * If this is a board level fault, find the board, The
2738 		 * unit number for all board class faults must be the
2739 		 * actual board number. The caller of reg_fault must
2740 		 * ensure this for FT_BOARD class faults.
2741 		 */
2742 		if (ftlist->f.fclass == FT_BOARD) {
2743 			/* Sanity check the board first */
2744 			if (fhc_bd_valid(ftlist->f.unit)) {
2745 				bdlist = fhc_bd(ftlist->f.unit);
2746 				bdlist->fault = 1;
2747 			} else {
2748 				cmn_err(CE_WARN, "No board %d list entry found",
2749 				    ftlist->f.unit);
2750 			}
2751 		}
2752 	}
2753 
2754 	/* now unlock the fault list */
2755 	mutex_exit(&ftlist_mutex);
2756 
2757 	/* unlock the board list before leaving */
2758 	fhc_bdlist_unlock();
2759 
2760 	return (fault);
2761 }
2762 
2763 /*
2764  * Add a new memloc to the database (and keep 'em sorted by PA)
2765  */
2766 void
2767 fhc_add_memloc(int board, uint64_t pa, uint_t size)
2768 {
2769 	struct fhc_memloc *p, **pp;
2770 	uint_t ipa = pa >> FHC_MEMLOC_SHIFT;
2771 
2772 	ASSERT(fhc_bdlist_locked());
2773 	ASSERT((size & (size-1)) == 0);		/* size must be power of 2 */
2774 
2775 	/* look for a comparable memloc (as long as new PA smaller) */
2776 	for (p = fhc_base_memloc, pp = &fhc_base_memloc;
2777 	    p != NULL; pp = &p->next, p = p->next) {
2778 		/* have we passed our place in the sort? */
2779 		if (ipa < p->pa) {
2780 			break;
2781 		}
2782 	}
2783 	p = kmem_alloc(sizeof (struct fhc_memloc), KM_SLEEP);
2784 	p->next = *pp;
2785 	p->board = board;
2786 	p->pa = ipa;
2787 	p->size = size;
2788 #ifdef DEBUG_MEMDEC
2789 	cmn_err(CE_NOTE, "fhc_add_memloc: adding %d 0x%x 0x%x",
2790 	    p->board, p->pa, p->size);
2791 #endif /* DEBUG_MEMDEC */
2792 	*pp = p;
2793 }
2794 
2795 /*
2796  * Delete all memloc records for a board from the database
2797  */
2798 void
2799 fhc_del_memloc(int board)
2800 {
2801 	struct fhc_memloc *p, **pp;
2802 
2803 	ASSERT(fhc_bdlist_locked());
2804 
2805 	/* delete all entries that match board */
2806 	pp = &fhc_base_memloc;
2807 	while ((p = *pp) != NULL) {
2808 		if (p->board == board) {
2809 #ifdef DEBUG_MEMDEC
2810 			cmn_err(CE_NOTE, "fhc_del_memloc: removing %d "
2811 			    "0x%x 0x%x", board, p->pa, p->size);
2812 #endif /* DEBUG_MEMDEC */
2813 			*pp = p->next;
2814 			kmem_free(p, sizeof (struct fhc_memloc));
2815 		} else {
2816 			pp = &(p->next);
2817 		}
2818 	}
2819 }
2820 
2821 /*
2822  * Find a physical address range of sufficient size and return a starting PA
2823  */
2824 uint64_t
2825 fhc_find_memloc_gap(uint_t size)
2826 {
2827 	struct fhc_memloc *p;
2828 	uint_t base_pa = 0;
2829 	uint_t mask = ~(size-1);
2830 
2831 	ASSERT(fhc_bdlist_locked());
2832 	ASSERT((size & (size-1)) == 0);		/* size must be power of 2 */
2833 
2834 	/*
2835 	 * walk the list of known memlocs and measure the 'gaps'.
2836 	 * we will need a hole that can align the 'size' requested.
2837 	 * (e.g. a 256mb bank needs to be on a 256mb boundary).
2838 	 */
2839 	for (p = fhc_base_memloc; p != NULL; p = p->next) {
2840 		if (base_pa != (base_pa & mask))
2841 			base_pa = (base_pa + size) & mask;
2842 		if (base_pa + size <= p->pa)
2843 			break;
2844 		base_pa = p->pa + p->size;
2845 	}
2846 
2847 	/*
2848 	 * At this point, we assume that base_pa is good enough.
2849 	 */
2850 	ASSERT((base_pa + size) <= FHC_MEMLOC_MAX);
2851 	if (base_pa != (base_pa & mask))
2852 		base_pa = (base_pa + size) & mask;	/* align */
2853 	return ((uint64_t)base_pa << FHC_MEMLOC_SHIFT);
2854 }
2855 
2856 /*
2857  * This simple function to write the MCRs can only be used when
2858  * the contents of memory are not valid as there is a bug in the AC
2859  * ASIC concerning refresh.
2860  */
2861 static void
2862 fhc_write_mcrs(
2863 	uint64_t cpa,
2864 	uint64_t dpa0,
2865 	uint64_t dpa1,
2866 	uint64_t c,
2867 	uint64_t d0,
2868 	uint64_t d1)
2869 {
2870 	stdphysio(cpa, c & ~AC_CSR_REFEN);
2871 	(void) lddphysio(cpa);
2872 	if (GRP_SIZE_IS_SET(d0)) {
2873 		stdphysio(dpa0, d0);
2874 		(void) lddphysio(dpa0);
2875 	}
2876 	if (GRP_SIZE_IS_SET(d1)) {
2877 		stdphysio(dpa1, d1);
2878 		(void) lddphysio(dpa1);
2879 	}
2880 	stdphysio(cpa, c);
2881 	(void) lddphysio(cpa);
2882 }
2883 
2884 /* compute the appropriate RASIZE for bank size */
2885 static uint_t
2886 fhc_cvt_size(uint64_t bsz)
2887 {
2888 	uint_t csz;
2889 
2890 	csz = 0;
2891 	bsz /= 64;
2892 	while (bsz) {
2893 		csz++;
2894 		bsz /= 2;
2895 	}
2896 	csz /= 2;
2897 
2898 	return (csz);
2899 }
2900 
2901 void
2902 fhc_program_memory(int board, uint64_t pa)
2903 {
2904 	uint64_t cpa, dpa0, dpa1;
2905 	uint64_t c, d0, d1;
2906 	uint64_t b0_pa, b1_pa;
2907 	uint64_t memdec0, memdec1;
2908 	uint_t b0_size, b1_size;
2909 
2910 	/* XXX gross hack to get to board via board number */
2911 	cpa = 0x1c0f9000060ull + (board * 0x400000000ull);
2912 #ifdef DEBUG_MEMDEC
2913 	prom_printf("cpa = 0x%llx\n", cpa);
2914 #endif /* DEBUG_MEMDEC */
2915 	dpa0 = cpa + 0x10;
2916 	dpa1 = cpa + 0x20;
2917 
2918 /* assume size is set by connect */
2919 	memdec0 = lddphysio(dpa0);
2920 #ifdef DEBUG_MEMDEC
2921 	prom_printf("memdec0 = 0x%llx\n", memdec0);
2922 #endif /* DEBUG_MEMDEC */
2923 	memdec1 = lddphysio(dpa1);
2924 #ifdef DEBUG_MEMDEC
2925 	prom_printf("memdec1 = 0x%llx\n", memdec1);
2926 #endif /* DEBUG_MEMDEC */
2927 	if (GRP_SIZE_IS_SET(memdec0)) {
2928 		b0_size = GRP_SPANMB(memdec0);
2929 	} else {
2930 		b0_size = 0;
2931 	}
2932 	if (GRP_SIZE_IS_SET(memdec1)) {
2933 		b1_size = GRP_SPANMB(memdec1);
2934 	} else {
2935 		b1_size = 0;
2936 	}
2937 
2938 	c = lddphysio(cpa);
2939 #ifdef DEBUG_MEMDEC
2940 	prom_printf("c = 0x%llx\n", c);
2941 #endif /* DEBUG_MEMDEC */
2942 	if (b0_size) {
2943 		b0_pa = pa;
2944 		d0 = SETUP_DECODE(b0_pa, b0_size, 0, 0);
2945 		d0 |= AC_MEM_VALID;
2946 
2947 		c &= ~0x7;
2948 		c |= 0;
2949 		c &= ~(0x7 << 8);
2950 		c |= (fhc_cvt_size(b0_size) << 8);  /* match row size */
2951 	} else {
2952 		d0 = memdec0;
2953 	}
2954 	if (b1_size) {
2955 		b1_pa = pa + 0x80000000ull; /* XXX 2gb */
2956 		d1 = SETUP_DECODE(b1_pa, b1_size, 0, 0);
2957 		d1 |= AC_MEM_VALID;
2958 
2959 		c &= ~(0x7 << 3);
2960 		c |= (0 << 3);
2961 		c &= ~(0x7 << 11);
2962 		c |= (fhc_cvt_size(b1_size) << 11); /* match row size */
2963 	} else {
2964 		d1 = memdec1;
2965 	}
2966 #ifdef DEBUG_MEMDEC
2967 	prom_printf("c 0x%llx, d0 0x%llx, d1 0x%llx\n", c, d0, d1);
2968 #endif /* DEBUG_MEMDEC */
2969 	fhc_write_mcrs(cpa, dpa0, dpa1, c, d0, d1);
2970 }
2971 
2972 /*
2973  * Creates a variable sized virtual kstat with a snapshot routine in order
2974  * to pass the linked list fault list up to userland. Also creates a
2975  * virtual kstat to pass up the string table for faults.
2976  */
2977 void
2978 create_ft_kstats(int instance)
2979 {
2980 	struct kstat *ksp;
2981 
2982 	ksp = kstat_create("unix", instance, FT_LIST_KSTAT_NAME, "misc",
2983 	    KSTAT_TYPE_RAW, 1, KSTAT_FLAG_VIRTUAL|KSTAT_FLAG_VAR_SIZE);
2984 
2985 	if (ksp != NULL) {
2986 		ksp->ks_data = NULL;
2987 		ksp->ks_update = ft_ks_update;
2988 		ksp->ks_snapshot = ft_ks_snapshot;
2989 		ksp->ks_data_size = 1;
2990 		ksp->ks_lock = &ftlist_mutex;
2991 		kstat_install(ksp);
2992 	}
2993 }
2994 
2995 /*
2996  * This routine creates a snapshot of all the fault list data. It is
2997  * called by the kstat framework when a kstat read is done.
2998  */
2999 static int
3000 ft_ks_snapshot(struct kstat *ksp, void *buf, int rw)
3001 {
3002 	struct ft_link_list *ftlist;
3003 
3004 	if (rw == KSTAT_WRITE) {
3005 		return (EACCES);
3006 	}
3007 
3008 	ksp->ks_snaptime = gethrtime();
3009 
3010 	for (ftlist = ft_list; ftlist != NULL; ftlist = ftlist->next) {
3011 		bcopy(&ftlist->f, buf, sizeof (struct ft_list));
3012 		buf = ((struct ft_list *)buf) + 1;
3013 	}
3014 	return (0);
3015 }
3016 
3017 /*
3018  * Setup the kstat data size for the kstat framework. This is used in
3019  * conjunction with the ks_snapshot routine. This routine sets the size,
3020  * the kstat framework allocates the memory, and ks_shapshot does the
3021  * data transfer.
3022  */
3023 static int
3024 ft_ks_update(struct kstat *ksp, int rw)
3025 {
3026 	if (rw == KSTAT_WRITE) {
3027 		return (EACCES);
3028 	} else {
3029 		if (ft_nfaults) {
3030 			ksp->ks_data_size = ft_nfaults *
3031 			    sizeof (struct ft_list);
3032 		} else {
3033 			ksp->ks_data_size = 1;
3034 		}
3035 	}
3036 
3037 	return (0);
3038 }
3039 
3040 /*
3041  * Power off any cpus on the board.
3042  */
3043 int
3044 fhc_board_poweroffcpus(int board, char *errbuf, int cpu_flags)
3045 {
3046 	cpu_t *cpa, *cpb;
3047 	enum board_type type;
3048 	int error = 0;
3049 
3050 	ASSERT(MUTEX_HELD(&cpu_lock));
3051 
3052 	/*
3053 	 * what type of board are we dealing with?
3054 	 */
3055 	type = fhc_bd_type(board);
3056 
3057 	switch (type) {
3058 	case CPU_BOARD:
3059 
3060 		/*
3061 		 * the shutdown sequence will be:
3062 		 *
3063 		 * idle both cpus then shut them off.
3064 		 * it looks like the hardware gets corrupted if one
3065 		 * cpu is busy while the other is shutting down...
3066 		 */
3067 
3068 		if ((cpa = cpu_get(FHC_BOARD2CPU_A(board))) != NULL &&
3069 		    cpu_is_active(cpa)) {
3070 			if (!cpu_intr_on(cpa)) {
3071 				cpu_intr_enable(cpa);
3072 			}
3073 			if ((error = cpu_offline(cpa, cpu_flags)) != 0) {
3074 				cmn_err(CE_WARN,
3075 				    "Processor %d failed to offline.",
3076 				    cpa->cpu_id);
3077 				if (errbuf != NULL) {
3078 					(void) snprintf(errbuf, SYSC_OUTPUT_LEN,
3079 					    "processor %d failed to offline",
3080 					    cpa->cpu_id);
3081 				}
3082 			}
3083 		}
3084 
3085 		if (error == 0 &&
3086 		    (cpb = cpu_get(FHC_BOARD2CPU_B(board))) != NULL &&
3087 		    cpu_is_active(cpb)) {
3088 			if (!cpu_intr_on(cpb)) {
3089 				cpu_intr_enable(cpb);
3090 			}
3091 			if ((error = cpu_offline(cpb, cpu_flags)) != 0) {
3092 				cmn_err(CE_WARN,
3093 				    "Processor %d failed to offline.",
3094 				    cpb->cpu_id);
3095 
3096 				if (errbuf != NULL) {
3097 					(void) snprintf(errbuf, SYSC_OUTPUT_LEN,
3098 					    "processor %d failed to offline",
3099 					    cpb->cpu_id);
3100 				}
3101 			}
3102 		}
3103 
3104 		if (error == 0 && cpa != NULL && cpu_is_offline(cpa)) {
3105 			if ((error = cpu_poweroff(cpa)) != 0) {
3106 				cmn_err(CE_WARN,
3107 				    "Processor %d failed to power off.",
3108 				    cpa->cpu_id);
3109 				if (errbuf != NULL) {
3110 					(void) snprintf(errbuf, SYSC_OUTPUT_LEN,
3111 					    "processor %d failed to power off",
3112 					    cpa->cpu_id);
3113 				}
3114 			} else {
3115 				cmn_err(CE_NOTE, "Processor %d powered off.",
3116 				    cpa->cpu_id);
3117 			}
3118 		}
3119 
3120 		if (error == 0 && cpb != NULL && cpu_is_offline(cpb)) {
3121 			if ((error = cpu_poweroff(cpb)) != 0) {
3122 				cmn_err(CE_WARN,
3123 				    "Processor %d failed to power off.",
3124 				    cpb->cpu_id);
3125 
3126 				if (errbuf != NULL) {
3127 					(void) snprintf(errbuf, SYSC_OUTPUT_LEN,
3128 					    "processor %d failed to power off",
3129 					    cpb->cpu_id);
3130 				}
3131 			} else {
3132 				cmn_err(CE_NOTE, "Processor %d powered off.",
3133 				    cpb->cpu_id);
3134 			}
3135 		}
3136 
3137 		/*
3138 		 * If all the shutdowns completed, ONLY THEN, clear the
3139 		 * incorrectly valid dtags...
3140 		 *
3141 		 * IMPORTANT: it is an error to read or write dtags while
3142 		 * they are 'active'
3143 		 */
3144 		if (error == 0 && (cpa != NULL || cpb != NULL)) {
3145 			u_longlong_t base = 0;
3146 			int i;
3147 #ifdef DEBUG
3148 			int nonz0 = 0;
3149 			int nonz1 = 0;
3150 #endif
3151 			if (cpa != NULL)
3152 				base = FHC_DTAG_BASE(cpa->cpu_id);
3153 			if (cpb != NULL)
3154 				base = FHC_DTAG_BASE(cpb->cpu_id);
3155 			ASSERT(base != 0);
3156 
3157 			for (i = 0; i < FHC_DTAG_SIZE; i += FHC_DTAG_SKIP) {
3158 				u_longlong_t value = lddphysio(base+i);
3159 #ifdef lint
3160 				value = value;
3161 #endif
3162 #ifdef DEBUG
3163 				if (cpa != NULL && (value & FHC_DTAG_LOW))
3164 					nonz0++;
3165 				if (cpb != NULL && (value & FHC_DTAG_HIGH))
3166 					nonz1++;
3167 #endif
3168 				/* always clear the dtags */
3169 				stdphysio(base + i, 0ull);
3170 			}
3171 #ifdef DEBUG
3172 			if (nonz0 || nonz1) {
3173 				cmn_err(CE_NOTE, "!dtag results: "
3174 				    "cpua valid %d, cpub valid %d",
3175 				    nonz0, nonz1);
3176 			}
3177 #endif
3178 		}
3179 
3180 		break;
3181 
3182 	default:
3183 		break;
3184 	}
3185 
3186 	return (error);
3187 }
3188 
3189 /*
3190  * platform code for shutting down cpus.
3191  */
3192 int
3193 fhc_cpu_poweroff(struct cpu *cp)
3194 {
3195 	int board;
3196 	fhc_bd_t *bd_list;
3197 	int delays;
3198 	extern void idle_stop_xcall(void);
3199 
3200 	ASSERT(MUTEX_HELD(&cpu_lock));
3201 	ASSERT((cp->cpu_flags & (CPU_EXISTS | CPU_OFFLINE | CPU_QUIESCED)) ==
3202 	    (CPU_EXISTS | CPU_OFFLINE | CPU_QUIESCED));
3203 
3204 	/*
3205 	 * Lock the board so that we can safely access the
3206 	 * registers. This cannot be done inside the pause_cpus().
3207 	 */
3208 	board = FHC_CPU2BOARD(cp->cpu_id);
3209 	bd_list = fhc_bdlist_lock(board);
3210 	ASSERT(fhc_bd_valid(board) && (bd_list->sc.type == CPU_BOARD));
3211 
3212 	/*
3213 	 * Capture all CPUs (except for detaching proc) to prevent
3214 	 * crosscalls to the detaching proc until it has cleared its
3215 	 * bit in cpu_ready_set.
3216 	 *
3217 	 * The CPU's remain paused and the prom_mutex is known to be free.
3218 	 * This prevents the x-trap victim from blocking when doing prom
3219 	 * IEEE-1275 calls at a high PIL level.
3220 	 */
3221 	promsafe_pause_cpus();
3222 
3223 	/*
3224 	 * Quiesce interrupts on the target CPU. We do this by setting
3225 	 * the CPU 'not ready'- (i.e. removing the CPU from cpu_ready_set) to
3226 	 * prevent it from receiving cross calls and cross traps.
3227 	 * This prevents the processor from receiving any new soft interrupts.
3228 	 */
3229 	mp_cpu_quiesce(cp);
3230 
3231 	xt_one_unchecked(cp->cpu_id, (xcfunc_t *)idle_stop_xcall,
3232 	    (uint64_t)fhc_cpu_shutdown_self, (uint64_t)NULL);
3233 
3234 	/*
3235 	 * Wait for slave cpu to shutdown.
3236 	 * Sense this by watching the hardware EPDx bit.
3237 	 */
3238 	for (delays = FHC_SHUTDOWN_WAIT_MSEC; delays != 0; delays--) {
3239 		uint_t temp;
3240 
3241 		DELAY(1000);
3242 
3243 		/* get the current cpu power status */
3244 		temp = *bd_list->softsp->ctrl;
3245 
3246 		/* has the cpu actually signalled shutdown? */
3247 		if (FHC_CPU_IS_A(cp->cpu_id)) {
3248 			if (temp & FHC_EPDA_OFF)
3249 				break;
3250 		} else {
3251 			if (temp & FHC_EPDB_OFF)
3252 				break;
3253 		}
3254 	}
3255 
3256 	start_cpus();
3257 
3258 	fhc_bdlist_unlock();
3259 
3260 	/* A timeout means we've lost control of the cpu. */
3261 	if (delays == 0)
3262 		panic("Processor %d failed during shutdown", cp->cpu_id);
3263 
3264 	return (0);
3265 }
3266 
3267 /*
3268  * shutdown_self
3269  * slave side shutdown.  clean up and execute the shutdown sequence.
3270  */
3271 static void
3272 fhc_cpu_shutdown_self(void)
3273 {
3274 	extern void flush_windows(void);
3275 
3276 	flush_windows();
3277 
3278 	ASSERT(CPU->cpu_intr_actv == 0);
3279 	ASSERT(CPU->cpu_thread == CPU->cpu_idle_thread ||
3280 	    CPU->cpu_thread == CPU->cpu_startup_thread);
3281 
3282 	CPU->cpu_flags = CPU_POWEROFF | CPU_OFFLINE | CPU_QUIESCED;
3283 
3284 	(void) prom_sunfire_cpu_off();	/* inform Ultra Enterprise prom */
3285 
3286 	os_completes_shutdown();
3287 
3288 	panic("fhc_cpu_shutdown_self: cannot return");
3289 	/*NOTREACHED*/
3290 }
3291 
3292 /*
3293  * Warm start CPU.
3294  */
3295 static int
3296 fhc_cpu_start(struct cpu *cp)
3297 {
3298 	int rv;
3299 	int cpuid = cp->cpu_id;
3300 	pnode_t nodeid;
3301 	extern void restart_other_cpu(int);
3302 
3303 	ASSERT(MUTEX_HELD(&cpu_lock));
3304 
3305 	/* power on cpu */
3306 	nodeid = cpunodes[cpuid].nodeid;
3307 	ASSERT(nodeid != (pnode_t)0);
3308 	rv = prom_wakeupcpu(nodeid);
3309 	if (rv != 0) {
3310 		cmn_err(CE_WARN, "Processor %d failed to power on.", cpuid);
3311 		return (EBUSY);
3312 	}
3313 
3314 	cp->cpu_flags &= ~CPU_POWEROFF;
3315 
3316 	/*
3317 	 * NOTE: restart_other_cpu pauses cpus during the slave cpu start.
3318 	 * This helps to quiesce the bus traffic a bit which makes
3319 	 * the tick sync routine in the prom more robust.
3320 	 */
3321 	restart_other_cpu(cpuid);
3322 
3323 	return (0);
3324 }
3325 
3326 /*
3327  * Power on CPU.
3328  */
3329 int
3330 fhc_cpu_poweron(struct cpu *cp)
3331 {
3332 	fhc_bd_t *bd_list;
3333 	enum temp_state state;
3334 	int board;
3335 	int status;
3336 	int status_other;
3337 	struct cpu *cp_other;
3338 
3339 	ASSERT(MUTEX_HELD(&cpu_lock));
3340 	ASSERT(cpu_is_poweredoff(cp));
3341 
3342 	/* do not power on overtemperature cpu */
3343 	board = FHC_CPU2BOARD(cp->cpu_id);
3344 	bd_list = fhc_bdlist_lock(board);
3345 
3346 	ASSERT(bd_list != NULL);
3347 	ASSERT(bd_list->sc.type == CPU_BOARD);
3348 	ASSERT(bd_list->dev_softsp != NULL);
3349 
3350 	state = ((struct environ_soft_state *)
3351 	    bd_list->dev_softsp)->tempstat.state;
3352 
3353 	fhc_bdlist_unlock();
3354 	if ((state == TEMP_WARN) || (state == TEMP_DANGER))
3355 		return (EBUSY);
3356 
3357 	status = fhc_cpu_start(cp);
3358 
3359 	/* policy for dual cpu boards */
3360 
3361 	if ((status == 0) &&
3362 	    ((cp_other = cpu_get(FHC_OTHER_CPU_ID(cp->cpu_id))) != NULL)) {
3363 		/*
3364 		 * Do not leave board's other cpu idling in the prom.
3365 		 * Start the other cpu and set its state to P_OFFLINE.
3366 		 */
3367 		status_other = fhc_cpu_start(cp_other);
3368 		if (status_other != 0) {
3369 			panic("fhc: failed to start second CPU"
3370 			    " in pair %d & %d, error %d",
3371 			    cp->cpu_id, cp_other->cpu_id, status_other);
3372 		}
3373 	}
3374 
3375 	return (status);
3376 }
3377 
3378 /*
3379  * complete the shutdown sequence in case the firmware doesn't.
3380  *
3381  * If the firmware returns, then complete the shutdown code.
3382  * (sunfire firmware presently only updates its status.  the
3383  * OS must flush the D-tags and execute the shutdown instruction.)
3384  */
3385 static void
3386 os_completes_shutdown(void)
3387 {
3388 	pfn_t 			pfn;
3389 	tte_t			tte;
3390 	volatile uint_t		*src;
3391 	volatile uint_t		*dst;
3392 	caddr_t			copy_addr;
3393 	extern void fhc_shutdown_asm(u_longlong_t, int);
3394 	extern void fhc_shutdown_asm_end(void);
3395 
3396 	copy_addr = shutdown_va + FHC_SRAM_OS_OFFSET;
3397 
3398 	/* compute sram global address for this operation */
3399 	pfn = FHC_LOCAL_OS_PAGEBASE >> MMU_PAGESHIFT;
3400 
3401 	/* force load i and d translations */
3402 	tte.tte_inthi = TTE_VALID_INT | TTE_SZ_INT(TTE8K) |
3403 	    TTE_PFN_INTHI(pfn);
3404 	tte.tte_intlo = TTE_PFN_INTLO(pfn) |
3405 	    TTE_HWWR_INT | TTE_PRIV_INT | TTE_LCK_INT; /* un$ */
3406 	sfmmu_dtlb_ld_kva(shutdown_va, &tte);	/* load dtlb */
3407 	sfmmu_itlb_ld_kva(shutdown_va, &tte);	/* load itlb */
3408 
3409 	/*
3410 	 * copy the special shutdown function to sram
3411 	 * (this is a special integer copy that synchronizes with localspace
3412 	 * accesses.  we need special throttling to ensure copy integrity)
3413 	 */
3414 	for (src = (uint_t *)fhc_shutdown_asm, dst = (uint_t *)copy_addr;
3415 	    src < (uint_t *)fhc_shutdown_asm_end;
3416 	    src++, dst++) {
3417 		volatile uint_t dummy;
3418 
3419 		*dst = *src;
3420 		/*
3421 		 * ensure non corrupting single write operations to
3422 		 * localspace sram by interleaving reads with writes.
3423 		 */
3424 		dummy = *dst;
3425 #ifdef lint
3426 		dummy = dummy;
3427 #endif
3428 	}
3429 
3430 	/*
3431 	 * Call the shutdown sequencer.
3432 	 * NOTE: the base flush address must be unique for each MID.
3433 	 */
3434 	((void (*)(u_longlong_t, int))copy_addr)(
3435 	    FHC_BASE_NOMEM + CPU->cpu_id * FHC_MAX_ECACHE_SIZE,
3436 	    cpunodes[CPU->cpu_id].ecache_size);
3437 }
3438 
3439 enum temp_state
3440 fhc_env_temp_state(int board)
3441 {
3442 	fhc_bd_t *bdp;
3443 	struct environ_soft_state *envp;
3444 
3445 	ASSERT(fhc_bd_valid(board));
3446 
3447 	bdp = fhc_bd(board);
3448 
3449 	/*
3450 	 * Due to asynchronous attach of environ, environ may
3451 	 * not be attached by the time we start calling this routine
3452 	 * to check the temperature state.  Environ not attaching is
3453 	 * pathological so this will only cover the time between
3454 	 * board connect and environ attach.
3455 	 */
3456 	if (!bdp->dev_softsp) {
3457 		return (TEMP_OK);
3458 	}
3459 	envp = (struct environ_soft_state *)bdp->dev_softsp;
3460 
3461 	return (envp->tempstat.state);
3462 }
3463 
3464 static void
3465 fhc_tod_fault(enum tod_fault_type tod_bad)
3466 {
3467 	int board_num = 0;
3468 	enum ft_class class = FT_SYSTEM;
3469 	uint64_t addr;
3470 
3471 	addr = (va_to_pa((void *)v_eeprom_addr)) >> BOARD_PHYADDR_SHIFT;
3472 
3473 	if ((addr & CLOCKBOARD_PHYADDR_BITS) != CLOCKBOARD_PHYADDR_BITS) {
3474 		/* if tod is not on clock board, */
3475 		/* it'd be on one of io boards */
3476 		board_num = (addr >> IO_BOARD_NUMBER_SHIFT)
3477 		    & IO_BOARD_NUMBER_MASK;
3478 		class = FT_BOARD;
3479 	}
3480 
3481 	switch (tod_bad) {
3482 	case TOD_NOFAULT:
3483 		clear_fault(board_num, FT_TODFAULT, class);
3484 		break;
3485 	case TOD_REVERSED:
3486 	case TOD_STALLED:
3487 	case TOD_JUMPED:
3488 	case TOD_RATECHANGED:
3489 		reg_fault(board_num, FT_TODFAULT, class);
3490 		break;
3491 	default:
3492 		break;
3493 	}
3494 }
3495