xref: /titanic_41/usr/src/uts/sun4u/sunfire/io/fhc.c (revision bdb9230ac765cb7af3fc1f4119caf2c5720dceb3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 
28 #include <sys/types.h>
29 #include <sys/conf.h>
30 #include <sys/ddi.h>
31 #include <sys/sunddi.h>
32 #include <sys/ddi_impldefs.h>
33 #include <sys/obpdefs.h>
34 #include <sys/promif.h>
35 #include <sys/cmn_err.h>
36 #include <sys/errno.h>
37 #include <sys/kmem.h>
38 #include <sys/vmem.h>
39 #include <sys/debug.h>
40 #include <sys/sysmacros.h>
41 #include <sys/intreg.h>
42 #include <sys/autoconf.h>
43 #include <sys/modctl.h>
44 #include <sys/spl.h>
45 #include <sys/time.h>
46 #include <sys/systm.h>
47 #include <sys/machsystm.h>
48 #include <sys/cpu.h>
49 #include <sys/cpuvar.h>
50 #include <sys/x_call.h>		/* xt_one() */
51 #include <sys/membar.h>
52 #include <sys/vm.h>
53 #include <vm/seg_kmem.h>
54 #include <vm/hat_sfmmu.h>
55 #include <sys/promimpl.h>
56 #include <sys/prom_plat.h>
57 #include <sys/cpu_module.h>	/* flush_instr_mem() */
58 #include <sys/procset.h>
59 #include <sys/fhc.h>
60 #include <sys/ac.h>
61 #include <sys/environ.h>
62 #include <sys/jtag.h>
63 #include <sys/nexusdebug.h>
64 #include <sys/ac.h>
65 #include <sys/ddi_subrdefs.h>
66 #include <sys/eeprom.h>
67 #include <sys/sdt.h>
68 #include <sys/ddi_implfuncs.h>
69 #include <sys/ontrap.h>
70 
71 #ifndef TRUE
72 #define	TRUE (1)
73 #endif
74 #ifndef FALSE
75 #define	FALSE (0)
76 #endif
77 
78 /*
79  * Function to register and deregister callbacks, for sunfire only.
80  */
81 extern void plat_register_tod_fault(void (*func)(enum tod_fault_type));
82 
83 /*
84  * This table represents the FHC interrupt priorities.  They range from
85  * 1-15, and have been modeled after the sun4d interrupts. The mondo
86  * number anded with 0x7 is used to index into this table. This was
87  * done to save table space.
88  */
89 static int fhc_int_priorities[] = {
90 	PIL_15,			/* System interrupt priority */
91 	PIL_12,			/* zs interrupt priority */
92 	PIL_15,			/* TOD interrupt priority */
93 	PIL_15			/* Fan Fail priority */
94 };
95 
96 static void fhc_tod_fault(enum tod_fault_type tod_bad);
97 
98 /*
99  * The dont_calibrate variable is meant to be set to one in /etc/system
100  * or by boot -h so that the calibration tables are not used. This
101  * is useful for checking thermistors whose output seems to be incorrect.
102  */
103 static int dont_calibrate = 0;
104 
105 /* Only one processor should powerdown the system. */
106 static int powerdown_started = 0;
107 
108 /* Let user disable overtemp powerdown. */
109 int enable_overtemp_powerdown = 1;
110 
111 /*
112  * The following tables correspond to the degress Celcius for each count
113  * value possible from the 8-bit A/C convertors on each type of system
114  * board for the UltraSPARC Server systems. To access a temperature,
115  * just index into the correct table using the count from the A/D convertor
116  * register, and that is the correct temperature in degress Celsius. These
117  * values can be negative.
118  */
119 static short cpu_table[] = {
120 -16,	-14,	-12,	-10,	-8,	-6,	-4,	-2,	/* 0-7 */
121 1,	4,	6,	8,	10,	12,	13,	15,	/* 8-15 */
122 16,	18,	19,	20,	22,	23,	24,	25,	/* 16-23 */
123 26,	27,	28,	29,	30,	31,	32,	33,	/* 24-31 */
124 34,	35,	35,	36,	37,	38,	39,	39,	/* 32-39 */
125 40,	41,	41,	42,	43,	44,	44,	45,	/* 40-47 */
126 46,	46,	47,	47,	48,	49,	49,	50,	/* 48-55 */
127 51,	51,	52,	53,	53,	54,	54,	55,	/* 56-63 */
128 55,	56,	56,	57,	57,	58,	58,	59,	/* 64-71 */
129 60,	60,	61,	61,	62,	62,	63,	63,	/* 72-79 */
130 64,	64,	65,	65,	66,	66,	67,	67,	/* 80-87 */
131 68,	68,	69,	69,	70,	70,	71,	71,	/* 88-95 */
132 72,	72,	73,	73,	74,	74,	75,	75,	/* 96-103 */
133 76,	76,	77,	77,	78,	78,	79,	79,	/* 104-111 */
134 80,	80,	81,	81,	82,	82,	83,	83,	/* 112-119 */
135 84,	84,	85,	85,	86,	86,	87,	87,	/* 120-127 */
136 88,	88,	89,	89,	90,	90,	91,	91,	/* 128-135 */
137 92,	92,	93,	93,	94,	94,	95,	95,	/* 136-143 */
138 96,	96,	97,	98,	98,	99,	99,	100,	/* 144-151 */
139 100,	101,	101,	102,	103,	103,	104,	104,	/* 152-159 */
140 105,	106,	106,	107,	107,	108,	109,	109,	/* 160-167 */
141 110,								/* 168 */
142 };
143 
144 #define	CPU_MX_CNT	(sizeof (cpu_table)/sizeof (short))
145 
146 static short cpu2_table[] = {
147 -17,	-16,	-15,	-14,	-13,	-12,	-11,	-10,	/* 0-7 */
148 -9,	-8,	-7,	-6,	-5,	-4,	-3,	-2,	/* 8-15 */
149 -1,	0,	1,	2,	3,	4,	5,	6,	/* 16-23 */
150 7,	8,	9,	10,	11,	12,	13,	13,	/* 24-31 */
151 14,	15,	16,	16,	17,	18,	18,	19,	/* 32-39 */
152 20,	20,	21,	22,	22,	23,	24,	24,	/* 40-47 */
153 25,	25,	26,	26,	27,	27,	28,	28,	/* 48-55 */
154 29,	30,	30,	31,	31,	32,	32,	33,	/* 56-63 */
155 33,	34,	34,	35,	35,	36,	36,	37,	/* 64-71 */
156 37,	37,	38,	38,	39,	39,	40,	40,	/* 72-79 */
157 41,	41,	42,	42,	43,	43,	43,	44,	/* 80-87 */
158 44,	45,	45,	46,	46,	46,	47,	47,	/* 88-95 */
159 48,	48,	49,	49,	50,	50,	50,	51,	/* 96-103 */
160 51,	52,	52,	53,	53,	53,	54,	54,	/* 104-111 */
161 55,	55,	56,	56,	56,	57,	57,	58,	/* 112-119 */
162 58,	59,	59,	59,	60,	60,	61,	61,	/* 120-127 */
163 62,	62,	63,	63,	63,	64,	64,	65,	/* 128-135 */
164 65,	66,	66,	67,	67,	68,	68,	68,	/* 136-143 */
165 69,	69,	70,	70,	71,	71,	72,	72,	/* 144-151 */
166 73,	73,	74,	74,	75,	75,	76,	76,	/* 152-159 */
167 77,	77,	78,	78,	79,	79,	80,	80,	/* 160-167 */
168 81,	81,	82,	83,	83,	84,	84,	85,	/* 168-175 */
169 85,	86,	87,	87,	88,	88,	89,	90,	/* 176-183 */
170 90,	91,	92,	92,	93,	94,	94,	95,	/* 184-191 */
171 96,	96,	97,	98,	99,	99,	100,	101,	/* 192-199 */
172 102,	103,	103,	104,	105,	106,	107,	108,	/* 200-207 */
173 109,	110,							/* 208-209 */
174 };
175 
176 #define	CPU2_MX_CNT	(sizeof (cpu2_table)/sizeof (short))
177 
178 static short io_table[] = {
179 0,	0,	0,	0,	0,	0,	0,	0,	/* 0-7 */
180 0,	0,	0,	0,	0,	0,	0,	0,	/* 8-15 */
181 0,	0,	0,	0,	0,	0,	0,	0,	/* 16-23 */
182 0,	0,	0,	0,	0,	0,	0,	0,	/* 24-31 */
183 0,	0,	0,	0,	0,	0,	0,	0,	/* 32-39 */
184 0,	3,	7,	10,	13,	15,	17,	19,	/* 40-47 */
185 21,	23,	25,	27,	28,	30,	31,	32,	/* 48-55 */
186 34,	35,	36,	37,	38,	39,	41,	42,	/* 56-63 */
187 43,	44,	45,	46,	46,	47,	48,	49,	/* 64-71 */
188 50,	51,	52,	53,	53,	54,	55,	56,	/* 72-79 */
189 57,	57,	58,	59,	60,	60,	61,	62,	/* 80-87 */
190 62,	63,	64,	64,	65,	66,	66,	67,	/* 88-95 */
191 68,	68,	69,	70,	70,	71,	72,	72,	/* 96-103 */
192 73,	73,	74,	75,	75,	76,	77,	77,	/* 104-111 */
193 78,	78,	79,	80,	80,	81,	81,	82,	/* 112-119 */
194 };
195 
196 #define	IO_MN_CNT	40
197 #define	IO_MX_CNT	(sizeof (io_table)/sizeof (short))
198 
199 static short clock_table[] = {
200 0,	0,	0,	0,	0,	0,	0,	0,	/* 0-7 */
201 0,	0,	0,	0,	1,	2,	4,	5,	/* 8-15 */
202 7,	8,	10,	11,	12,	13,	14,	15,	/* 16-23 */
203 17,	18,	19,	20,	21,	22,	23,	24,	/* 24-31 */
204 24,	25,	26,	27,	28,	29,	29,	30,	/* 32-39 */
205 31,	32,	32,	33,	34,	35,	35,	36,	/* 40-47 */
206 37,	38,	38,	39,	40,	40,	41,	42,	/* 48-55 */
207 42,	43,	44,	44,	45,	46,	46,	47,	/* 56-63 */
208 48,	48,	49,	50,	50,	51,	52,	52,	/* 64-71 */
209 53,	54,	54,	55,	56,	57,	57,	58,	/* 72-79 */
210 59,	59,	60,	60,	61,	62,	63,	63,	/* 80-87 */
211 64,	65,	65,	66,	67,	68,	68,	69,	/* 88-95 */
212 70,	70,	71,	72,	73,	74,	74,	75,	/* 96-103 */
213 76,	77,	78,	78,	79,	80,	81,	82,	/* 104-111 */
214 };
215 
216 #define	CLK_MN_CNT	11
217 #define	CLK_MX_CNT	(sizeof (clock_table)/sizeof (short))
218 
219 /*
220  * System temperature limits.
221  *
222  * The following variables are the warning and danger limits for the
223  * different types of system boards. The limits are different because
224  * the various boards reach different nominal temperatures because
225  * of the different components that they contain.
226  *
227  * The warning limit is the temperature at which the user is warned.
228  * The danger limit is the temperature at which the system is shutdown.
229  * In the case of CPU/Memory system boards, the system will attempt
230  * to offline and power down processors on a board in an attempt to
231  * bring the board back into the nominal temperature range before
232  * shutting down the system.
233  *
234  * These values can be tuned via /etc/system or boot -h.
235  */
236 short cpu_warn_temp = 73;	/* CPU/Memory Warning Temperature */
237 short cpu_danger_temp = 83;	/* CPU/Memory Danger Temperature */
238 short io_warn_temp = 60;	/* IO Board Warning Temperature */
239 short io_danger_temp = 68;	/* IO Board Danger Temperature */
240 short clk_warn_temp = 60;	/* Clock Board Warning Temperature */
241 short clk_danger_temp = 68;	/* Clock Board Danger Temperature */
242 
243 short dft_warn_temp = 60;	/* default warning temp value */
244 short dft_danger_temp = 68;	/* default danger temp value */
245 
246 short cpu_warn_temp_4x = 60; 	/* CPU/Memory warning temp for 400 MHZ */
247 short cpu_danger_temp_4x = 68;	/* CPU/Memory danger temp for 400 MHZ */
248 
249 /*
250  * This variable tells us if we are in a heat chamber. It is set
251  * early on in boot, after we check the OBP 'mfg-mode' property in
252  * the options node.
253  */
254 static int temperature_chamber = -1;
255 
256 /*
257  * The fhc memloc structure is protected under the bdlist lock
258  */
259 static struct fhc_memloc *fhc_base_memloc = NULL;
260 
261 /*
262  * Driver global fault list mutex and list head pointer. The list is
263  * protected by the mutex and contains a record of all known faults.
264  * Faults can be inherited from the PROM or detected by the kernel.
265  */
266 static kmutex_t ftlist_mutex;
267 static struct ft_link_list *ft_list = NULL;
268 static int ft_nfaults = 0;
269 
270 /*
271  * Table of all known fault strings. This table is indexed by the fault
272  * type. Do not change the ordering of the table without redefining the
273  * fault type enum list on fhc.h.
274  */
275 char *ft_str_table[] = {
276 	"Core Power Supply",		/* FT_CORE_PS */
277 	"Overtemp",			/* FT_OVERTEMP */
278 	"AC Power",			/* FT_AC_PWR */
279 	"Peripheral Power Supply",	/* FT_PPS */
280 	"System 3.3 Volt Power",	/* FT_CLK_33 */
281 	"System 5.0 Volt Power",	/* FT_CLK_50 */
282 	"Peripheral 5.0 Volt Power",	/* FT_V5_P */
283 	"Peripheral 12 Volt Power",	/* FT_V12_P */
284 	"Auxiliary 5.0 Volt Power",	/* FT_V5_AUX */
285 	"Peripheral 5.0 Volt Precharge", /* FT_V5_P_PCH */
286 	"Peripheral 12 Volt Precharge",	/* FT_V12_P_PCH */
287 	"System 3.3 Volt Precharge",	/* FT_V3_PCH */
288 	"System 5.0 Volt Precharge",	/* FT_V5_PCH */
289 	"Peripheral Power Supply Fans",	/* FT_PPS_FAN */
290 	"Rack Exhaust Fan",		/* FT_RACK_EXH */
291 	"Disk Drive Fan",		/* FT_DSK_FAN */
292 	"AC Box Fan",			/* FT_AC_FAN */
293 	"Key Switch Fan",		/* FT_KEYSW_FAN */
294 	"Minimum Power",		/* FT_INSUFFICIENT_POWER */
295 	"PROM detected",		/* FT_PROM */
296 	"Hot Plug Support System",	/* FT_HOT_PLUG */
297 	"TOD"				/* FT_TODFAULT */
298 };
299 
300 static int ft_max_index = (sizeof (ft_str_table) / sizeof (char *));
301 
302 /*
303  * Function prototypes
304  */
305 static int fhc_ctlops(dev_info_t *, dev_info_t *, ddi_ctl_enum_t,
306 	void *, void *);
307 static int fhc_intr_ops(dev_info_t *dip, dev_info_t *rdip,
308 	ddi_intr_op_t intr_op, ddi_intr_handle_impl_t *hdlp, void *result);
309 
310 static int fhc_add_intr_impl(dev_info_t *dip, dev_info_t *rdip,
311 	ddi_intr_handle_impl_t *hdlp);
312 static void fhc_remove_intr_impl(dev_info_t *dip, dev_info_t *rdip,
313 	ddi_intr_handle_impl_t *hdlp);
314 
315 static int fhc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd);
316 static int fhc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd);
317 static int fhc_init(struct fhc_soft_state *softsp);
318 static void fhc_unmap_regs(struct fhc_soft_state *softsp);
319 static enum board_type fhc_board_type(struct fhc_soft_state *, int);
320 
321 static void
322 fhc_xlate_intrs(ddi_intr_handle_impl_t *hdlp, uint32_t ign);
323 
324 static int
325 fhc_ctlops_peekpoke(ddi_ctl_enum_t, peekpoke_ctlops_t *, void *result);
326 
327 static void fhc_add_kstats(struct fhc_soft_state *);
328 static int fhc_kstat_update(kstat_t *, int);
329 static int check_for_chamber(void);
330 static int ft_ks_snapshot(struct kstat *, void *, int);
331 static int ft_ks_update(struct kstat *, int);
332 static int check_central(int board);
333 
334 /*
335  * board type and A/D convertor output passed in and real temperature
336  * is returned.
337  */
338 static short calibrate_temp(enum board_type, uchar_t, uint_t);
339 static enum temp_state get_temp_state(enum board_type, short, int);
340 
341 /* Routine to determine if there are CPUs on this board. */
342 static int cpu_on_board(int);
343 
344 static void build_bd_display_str(char *, enum board_type, int);
345 
346 /* Interrupt distribution callback function. */
347 static void fhc_intrdist(void *);
348 
349 /* CPU power control */
350 int fhc_cpu_poweroff(struct cpu *);	/* cpu_poweroff()->platform */
351 int fhc_cpu_poweron(struct cpu *);	/* cpu_poweron()->platform */
352 
353 extern struct cpu_node cpunodes[];
354 extern void halt(char *);
355 
356 /*
357  * Configuration data structures
358  */
359 static struct bus_ops fhc_bus_ops = {
360 	BUSO_REV,
361 	ddi_bus_map,		/* map */
362 	0,			/* get_intrspec */
363 	0,			/* add_intrspec */
364 	0,			/* remove_intrspec */
365 	i_ddi_map_fault,	/* map_fault */
366 	ddi_no_dma_map,		/* dma_map */
367 	ddi_no_dma_allochdl,
368 	ddi_no_dma_freehdl,
369 	ddi_no_dma_bindhdl,
370 	ddi_no_dma_unbindhdl,
371 	ddi_no_dma_flush,
372 	ddi_no_dma_win,
373 	ddi_dma_mctl,		/* dma_ctl */
374 	fhc_ctlops,		/* ctl */
375 	ddi_bus_prop_op,	/* prop_op */
376 	0,			/* (*bus_get_eventcookie)();	*/
377 	0,			/* (*bus_add_eventcall)();	*/
378 	0,			/* (*bus_remove_eventcall)();	*/
379 	0,			/* (*bus_post_event)();		*/
380 	0,			/* (*bus_intr_control)();	*/
381 	0,			/* (*bus_config)();		*/
382 	0,			/* (*bus_unconfig)();		*/
383 	0,			/* (*bus_fm_init)();		*/
384 	0,			/* (*bus_fm_fini)();		*/
385 	0,			/* (*bus_fm_access_enter)();	*/
386 	0,			/* (*bus_fm_access_exit)();	*/
387 	0,			/* (*bus_power)();		*/
388 	fhc_intr_ops		/* (*bus_intr_op)();		*/
389 };
390 
391 static struct cb_ops fhc_cb_ops = {
392 	nulldev,		/* open */
393 	nulldev,		/* close */
394 	nulldev,		/* strategy */
395 	nulldev,		/* print */
396 	nulldev,		/* dump */
397 	nulldev,		/* read */
398 	nulldev,		/* write */
399 	nulldev, 		/* ioctl */
400 	nodev,			/* devmap */
401 	nodev,			/* mmap */
402 	nodev,			/* segmap */
403 	nochpoll,		/* poll */
404 	ddi_prop_op,		/* cb_prop_op */
405 	0,			/* streamtab */
406 	D_MP|D_NEW|D_HOTPLUG,	/* Driver compatibility flag */
407 	CB_REV,			/* rev */
408 	nodev,			/* cb_aread */
409 	nodev			/* cb_awrite */
410 };
411 
412 static struct dev_ops fhc_ops = {
413 	DEVO_REV,		/* rev */
414 	0,			/* refcnt  */
415 	ddi_no_info,		/* getinfo */
416 	nulldev,		/* identify */
417 	nulldev,		/* probe */
418 	fhc_attach,		/* attach */
419 	fhc_detach,		/* detach */
420 	nulldev,		/* reset */
421 	&fhc_cb_ops,		/* cb_ops */
422 	&fhc_bus_ops,		/* bus_ops */
423 	nulldev,		/* power */
424 	ddi_quiesce_not_needed,		/* quiesce */
425 };
426 
427 /*
428  * Driver globals
429  * TODO - We need to investigate what locking needs to be done here.
430  */
431 void *fhcp;				/* fhc soft state hook */
432 
433 extern struct mod_ops mod_driverops;
434 
435 static struct modldrv modldrv = {
436 	&mod_driverops,		/* Type of module.  This one is a driver */
437 	"FHC Nexus",		/* Name of module. */
438 	&fhc_ops,		/* driver ops */
439 };
440 
441 static struct modlinkage modlinkage = {
442 	MODREV_1,		/* rev */
443 	(void *)&modldrv,
444 	NULL
445 };
446 
447 
448 /*
449  * These are the module initialization routines.
450  */
451 
452 static caddr_t shutdown_va;
453 
454 int
455 _init(void)
456 {
457 	int error;
458 
459 	if ((error = ddi_soft_state_init(&fhcp,
460 	    sizeof (struct fhc_soft_state), 1)) != 0)
461 		return (error);
462 
463 	fhc_bdlist_init();
464 	mutex_init(&ftlist_mutex, NULL, MUTEX_DEFAULT, NULL);
465 
466 	shutdown_va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
467 	ASSERT(shutdown_va != NULL);
468 
469 	plat_register_tod_fault(fhc_tod_fault);
470 
471 	return (mod_install(&modlinkage));
472 }
473 
474 int
475 _fini(void)
476 {
477 	int error;
478 
479 	if ((error = mod_remove(&modlinkage)) != 0)
480 		return (error);
481 
482 	plat_register_tod_fault(NULL);
483 
484 	mutex_destroy(&ftlist_mutex);
485 
486 	fhc_bdlist_fini();
487 
488 	ddi_soft_state_fini(&fhcp);
489 
490 	return (0);
491 }
492 
493 int
494 _info(struct modinfo *modinfop)
495 {
496 	return (mod_info(&modlinkage, modinfop));
497 }
498 
499 /*
500  * Reset the interrupt mapping registers.
501  * This function resets the values during DDI_RESUME.
502  *
503  * NOTE: This function will not work for a full CPR cycle
504  * and is currently designed to handle the RESUME after a connect.
505  *
506  * Note about the PROM handling of moving CENTRAL to another board:
507  * The PROM moves the IGN identity (igr register) from the
508  * original CENTRAL to the new one. This means that we do not
509  * duplicate the fhc_attach code that sets it to (board number * 2).
510  * We rely on only using FHC interrupts from one board only
511  * (the UART and SYS interrupts) so that the values of the other IGNs
512  * are irrelevant. The benefit of this approach is that we don't
513  * have to have to tear down and rebuild the interrupt records
514  * for UART and SYS. It is also why we don't try to change the
515  * board number in the fhc instance for the clock board.
516  */
517 static void
518 fhc_handle_imr(struct fhc_soft_state *softsp)
519 {
520 	int i;
521 	int cent;
522 	uint_t tmp_reg;
523 
524 
525 	if (softsp->is_central) {
526 		uint_t want_igr, act_igr;
527 
528 		want_igr = softsp->list->sc.board << 1;
529 		act_igr = *softsp->igr & 0x1f;
530 		if (want_igr != act_igr) {
531 			*softsp->igr = want_igr;
532 			tmp_reg = *softsp->igr;
533 #ifdef lint
534 			tmp_reg = tmp_reg;
535 #endif
536 			/* We must now re-issue any pending interrupts. */
537 			for (i = 0; i < FHC_MAX_INO; i++) {
538 				if (*(softsp->intr_regs[i].clear_reg) == 3) {
539 					*(softsp->intr_regs[i].clear_reg) =
540 					    ISM_IDLE;
541 
542 					tmp_reg =
543 					    *(softsp->intr_regs[i].clear_reg);
544 #ifdef lint
545 					tmp_reg = tmp_reg;
546 #endif
547 				}
548 			}
549 			cmn_err(CE_NOTE, "central IGN corruption fixed: "
550 			    "got %x wanted %x", act_igr, want_igr);
551 		}
552 		return;
553 	}
554 
555 	ASSERT(softsp->list->sc.board == FHC_BSR_TO_BD(*(softsp->bsr)));
556 	cent = check_central(softsp->list->sc.board);
557 
558 	/* Loop through all 4 FHC interrupt mapping registers */
559 	for (i = 0; i < FHC_MAX_INO; i++) {
560 
561 		if (i == FHC_SYS_INO &&
562 		    *(softsp->intr_regs[i].clear_reg) == 3) {
563 			cmn_err(CE_NOTE,
564 			    "found lost system interrupt, resetting..");
565 
566 			*(softsp->intr_regs[i].clear_reg) = ISM_IDLE;
567 
568 			/*
569 			 * ensure atomic write with this read.
570 			 */
571 			tmp_reg = *(softsp->intr_regs[i].clear_reg);
572 #ifdef lint
573 			tmp_reg = tmp_reg;
574 #endif
575 		}
576 
577 		/*
578 		 * The mapping registers on the board with the "central" bit
579 		 * set should not be touched as it has been taken care by POST.
580 		 */
581 
582 		if (cent)
583 			continue;
584 
585 		*(softsp->intr_regs[i].mapping_reg) = 0;
586 
587 		/*
588 		 * ensure atomic write with this read.
589 		 */
590 		tmp_reg = *(softsp->intr_regs[i].mapping_reg);
591 #ifdef lint
592 		tmp_reg = tmp_reg;
593 #endif
594 
595 	}
596 }
597 
598 static int
599 check_central(int board)
600 {
601 	uint_t cs_value;
602 
603 	/*
604 	 * This is the value of AC configuration and status reg
605 	 * in the Local Devices space. We access it as a physical
606 	 * address.
607 	 */
608 	cs_value = ldphysio(AC_BCSR(board));
609 	if (cs_value & AC_CENTRAL)
610 		return (TRUE);
611 	else
612 		return (FALSE);
613 }
614 
615 static int
616 fhc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
617 {
618 	struct fhc_soft_state *softsp;
619 	int instance;
620 
621 	instance = ddi_get_instance(devi);
622 
623 	switch (cmd) {
624 	case DDI_ATTACH:
625 		break;
626 
627 	case DDI_RESUME:
628 		softsp = ddi_get_soft_state(fhcp, instance);
629 		/* IGR, NOT_BRD_PRES handled by prom */
630 		/* reset interrupt mapping registers */
631 		fhc_handle_imr(softsp);
632 
633 		return (DDI_SUCCESS);
634 
635 	default:
636 		return (DDI_FAILURE);
637 	}
638 
639 
640 	if (ddi_soft_state_zalloc(fhcp, instance) != DDI_SUCCESS)
641 		return (DDI_FAILURE);
642 
643 	softsp = ddi_get_soft_state(fhcp, instance);
644 
645 	/* Set the dip in the soft state */
646 	softsp->dip = devi;
647 
648 	if (fhc_init(softsp) != DDI_SUCCESS)
649 		goto bad;
650 
651 	ddi_report_dev(devi);
652 
653 	return (DDI_SUCCESS);
654 
655 bad:
656 	ddi_soft_state_free(fhcp, instance);
657 	return (DDI_FAILURE);
658 }
659 
660 static int
661 fhc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
662 {
663 	int board;
664 	int instance;
665 	struct fhc_soft_state *softsp;
666 	fhc_bd_t *list = NULL;
667 
668 	/* get the instance of this devi */
669 	instance = ddi_get_instance(devi);
670 
671 	/* get the soft state pointer for this device node */
672 	softsp = ddi_get_soft_state(fhcp, instance);
673 
674 	board = softsp->list->sc.board;
675 
676 	switch (cmd) {
677 	case DDI_SUSPEND:
678 
679 		return (DDI_SUCCESS);
680 
681 	case DDI_DETACH:
682 		/* grab the lock on the board list */
683 		list = fhc_bdlist_lock(board);
684 
685 		if (fhc_bd_detachable(board) &&
686 		    !fhc_bd_is_jtag_master(board))
687 			break;
688 		else
689 			fhc_bdlist_unlock();
690 		/* FALLTHROUGH */
691 
692 	default:
693 		return (DDI_FAILURE);
694 	}
695 
696 	/* Remove the interrupt redistribution callback. */
697 	intr_dist_rem(fhc_intrdist, (void *)devi);
698 
699 	/* remove the soft state pointer from the board list */
700 	list->softsp = NULL;
701 
702 	/* clear inherited faults from the PROM. */
703 	clear_fault(list->sc.board, FT_PROM, FT_BOARD);
704 
705 	/* remove the kstat for this board */
706 	kstat_delete(softsp->fhc_ksp);
707 
708 	/* destroy the mutexes in this soft state structure */
709 	mutex_destroy(&softsp->poll_list_lock);
710 	mutex_destroy(&softsp->ctrl_lock);
711 
712 	/* unmap all the register sets */
713 	fhc_unmap_regs(softsp);
714 
715 	/* release the board list lock now */
716 	fhc_bdlist_unlock();
717 
718 	/* free the soft state structure */
719 	ddi_soft_state_free(fhcp, instance);
720 
721 	return (DDI_SUCCESS);
722 }
723 
724 static enum board_type
725 fhc_board_type(struct fhc_soft_state *softsp, int board)
726 {
727 	int proplen;
728 	char *board_type;
729 	enum board_type type;
730 
731 	if (softsp->is_central)
732 		type = CLOCK_BOARD;
733 	else if (ddi_getlongprop(DDI_DEV_T_ANY, softsp->dip,
734 	    DDI_PROP_DONTPASS, "board-type", (caddr_t)&board_type,
735 	    &proplen) == DDI_PROP_SUCCESS) {
736 		/* match the board-type string */
737 		if (strcmp(CPU_BD_NAME, board_type) == 0) {
738 			type = CPU_BOARD;
739 		} else if (strcmp(MEM_BD_NAME, board_type) == 0) {
740 			type = MEM_BOARD;
741 		} else if (strcmp(IO_2SBUS_BD_NAME, board_type) == 0) {
742 			type = IO_2SBUS_BOARD;
743 		} else if (strcmp(IO_SBUS_FFB_BD_NAME, board_type) == 0) {
744 			type = IO_SBUS_FFB_BOARD;
745 		} else if (strcmp(IO_2SBUS_SOCPLUS_BD_NAME, board_type) == 0) {
746 			type = IO_2SBUS_SOCPLUS_BOARD;
747 		} else if (strcmp(IO_SBUS_FFB_SOCPLUS_BD_NAME, board_type)
748 		    == 0) {
749 			type = IO_SBUS_FFB_SOCPLUS_BOARD;
750 		} else if (strcmp(IO_PCI_BD_NAME, board_type) == 0) {
751 			type = IO_PCI_BOARD;
752 		} else {
753 			type = UNKNOWN_BOARD;
754 		}
755 		kmem_free(board_type, proplen);
756 	} else
757 		type = UNKNOWN_BOARD;
758 
759 	/*
760 	 * if the board type is indeterminate, it must be determined.
761 	 */
762 	if (type == UNKNOWN_BOARD) {
763 		/*
764 		 * Use the UPA64 bits from the FHC.
765 		 * This is not the best solution since we
766 		 * cannot fully type the IO boards.
767 		 */
768 		if (cpu_on_board(board))
769 			type = CPU_BOARD;
770 		else if ((*(softsp->bsr) & FHC_UPADATA64A) ||
771 		    (*(softsp->bsr) & FHC_UPADATA64B))
772 			type = IO_2SBUS_BOARD;
773 		else
774 			type = MEM_BOARD;
775 	}
776 
777 	return (type);
778 }
779 
780 static void
781 fhc_unmap_regs(struct fhc_soft_state *softsp)
782 {
783 	dev_info_t *dip = softsp->dip;
784 
785 	if (softsp->id) {
786 		ddi_unmap_regs(dip, 0, (caddr_t *)&softsp->id, 0, 0);
787 		softsp->id = NULL;
788 	}
789 	if (softsp->igr) {
790 		ddi_unmap_regs(dip, 1, (caddr_t *)&softsp->igr, 0, 0);
791 		softsp->igr = NULL;
792 	}
793 	if (softsp->intr_regs[FHC_FANFAIL_INO].mapping_reg) {
794 		ddi_unmap_regs(dip, 2,
795 		    (caddr_t *)&softsp->intr_regs[FHC_FANFAIL_INO].mapping_reg,
796 		    0, 0);
797 		softsp->intr_regs[FHC_FANFAIL_INO].mapping_reg = NULL;
798 	}
799 	if (softsp->intr_regs[FHC_SYS_INO].mapping_reg) {
800 		ddi_unmap_regs(dip, 3,
801 		    (caddr_t *)&softsp->intr_regs[FHC_SYS_INO].mapping_reg,
802 		    0, 0);
803 		softsp->intr_regs[FHC_SYS_INO].mapping_reg = NULL;
804 	}
805 	if (softsp->intr_regs[FHC_UART_INO].mapping_reg) {
806 		ddi_unmap_regs(dip, 4,
807 		    (caddr_t *)&softsp->intr_regs[FHC_UART_INO].mapping_reg,
808 		    0, 0);
809 		softsp->intr_regs[FHC_UART_INO].mapping_reg = NULL;
810 	}
811 	if (softsp->intr_regs[FHC_TOD_INO].mapping_reg) {
812 		ddi_unmap_regs(dip, 5,
813 		    (caddr_t *)&softsp->intr_regs[FHC_TOD_INO].mapping_reg,
814 		    0, 0);
815 		softsp->intr_regs[FHC_TOD_INO].mapping_reg = NULL;
816 	}
817 }
818 
819 static int
820 fhc_init(struct fhc_soft_state *softsp)
821 {
822 	int i;
823 	uint_t tmp_reg;
824 	int board;
825 
826 	/*
827 	 * Map in the FHC registers. Specifying length and offset of
828 	 * zero maps in the entire OBP register set.
829 	 */
830 
831 	/* map in register set 0 */
832 	if (ddi_map_regs(softsp->dip, 0,
833 	    (caddr_t *)&softsp->id, 0, 0)) {
834 		cmn_err(CE_WARN, "fhc%d: unable to map internal "
835 		    "registers", ddi_get_instance(softsp->dip));
836 		goto bad;
837 	}
838 
839 	/*
840 	 * Fill in the virtual addresses of the registers in the
841 	 * fhc_soft_state structure.
842 	 */
843 	softsp->rctrl = (uint_t *)((char *)(softsp->id) +
844 	    FHC_OFF_RCTRL);
845 	softsp->ctrl = (uint_t *)((char *)(softsp->id) +
846 	    FHC_OFF_CTRL);
847 	softsp->bsr = (uint_t *)((char *)(softsp->id) +
848 	    FHC_OFF_BSR);
849 	softsp->jtag_ctrl = (uint_t *)((char *)(softsp->id) +
850 	    FHC_OFF_JTAG_CTRL);
851 	softsp->jt_master.jtag_cmd = (uint_t *)((char *)(softsp->id) +
852 	    FHC_OFF_JTAG_CMD);
853 
854 	/* map in register set 1 */
855 	if (ddi_map_regs(softsp->dip, 1,
856 	    (caddr_t *)&softsp->igr, 0, 0)) {
857 		cmn_err(CE_WARN, "fhc%d: unable to map IGR "
858 		    "register", ddi_get_instance(softsp->dip));
859 		goto bad;
860 	}
861 
862 	/*
863 	 * map in register set 2
864 	 * XXX this can never be used as an interrupt generator
865 	 * (hardware queue overflow in fhc)
866 	 */
867 	if (ddi_map_regs(softsp->dip, 2,
868 	    (caddr_t *)&softsp->intr_regs[FHC_FANFAIL_INO].mapping_reg,
869 	    0, 0)) {
870 		cmn_err(CE_WARN, "fhc%d: unable to map Fan Fail "
871 		    "IMR register", ddi_get_instance(softsp->dip));
872 		goto bad;
873 	}
874 
875 	/* map in register set 3 */
876 	if (ddi_map_regs(softsp->dip, 3,
877 	    (caddr_t *)&softsp->intr_regs[FHC_SYS_INO].mapping_reg,
878 	    0, 0)) {
879 		cmn_err(CE_WARN, "fhc%d: unable to map System "
880 		    "IMR register\n", ddi_get_instance(softsp->dip));
881 		goto bad;
882 	}
883 
884 	/* map in register set 4 */
885 	if (ddi_map_regs(softsp->dip, 4,
886 	    (caddr_t *)&softsp->intr_regs[FHC_UART_INO].mapping_reg,
887 	    0, 0)) {
888 		cmn_err(CE_WARN, "fhc%d: unable to map UART "
889 		    "IMR register\n", ddi_get_instance(softsp->dip));
890 		goto bad;
891 	}
892 
893 	/* map in register set 5 */
894 	if (ddi_map_regs(softsp->dip, 5,
895 	    (caddr_t *)&softsp->intr_regs[FHC_TOD_INO].mapping_reg,
896 	    0, 0)) {
897 		cmn_err(CE_WARN, "fhc%d: unable to map FHC TOD "
898 		    "IMR register", ddi_get_instance(softsp->dip));
899 		goto bad;
900 	}
901 
902 	/* Loop over all intr sets and setup the VAs for the ISMR */
903 	/* TODO - Make sure we are calculating the ISMR correctly. */
904 	for (i = 0; i < FHC_MAX_INO; i++) {
905 		softsp->intr_regs[i].clear_reg =
906 		    (uint_t *)((char *)(softsp->intr_regs[i].mapping_reg) +
907 		    FHC_OFF_ISMR);
908 
909 		/* Now clear the state machines to idle */
910 		*(softsp->intr_regs[i].clear_reg) = ISM_IDLE;
911 	}
912 
913 	/*
914 	 * It is OK to not have a OBP_BOARDNUM property. This happens for
915 	 * the board which is a child of central. However this FHC
916 	 * still needs a proper Interrupt Group Number programmed
917 	 * into the Interrupt Group register, because the other
918 	 * instance of FHC, which is not under central, will properly
919 	 * program the IGR. The numbers from the two settings of the
920 	 * IGR need to be the same. One driver cannot wait for the
921 	 * other to program the IGR, because there is no guarantee
922 	 * which instance of FHC will get attached first.
923 	 */
924 	if ((board = (int)ddi_getprop(DDI_DEV_T_ANY, softsp->dip,
925 	    DDI_PROP_DONTPASS, OBP_BOARDNUM, -1)) == -1) {
926 		/*
927 		 * Now determine the board number by reading the
928 		 * hardware register.
929 		 */
930 		board = FHC_BSR_TO_BD(*(softsp->bsr));
931 		softsp->is_central = 1;
932 	}
933 
934 	/*
935 	 * If this fhc holds JTAG master line, and is not the central fhc,
936 	 * (this avoids two JTAG master nodes) then initialize the
937 	 * mutex and set the flag in the structure.
938 	 */
939 	if ((*(softsp->jtag_ctrl) & JTAG_MASTER_EN) && !softsp->is_central) {
940 		mutex_init(&(softsp->jt_master.lock), NULL, MUTEX_DEFAULT,
941 		    NULL);
942 		softsp->jt_master.is_master = 1;
943 	} else {
944 		softsp->jt_master.is_master = 0;
945 	}
946 
947 	fhc_bd_init(softsp, board, fhc_board_type(softsp, board));
948 
949 	/* Initialize the mutex guarding the poll_list. */
950 	mutex_init(&softsp->poll_list_lock, NULL, MUTEX_DRIVER, NULL);
951 
952 	/* Initialize the mutex guarding the FHC CSR */
953 	mutex_init(&softsp->ctrl_lock, NULL, MUTEX_DRIVER, NULL);
954 
955 	/* Initialize the poll_list to be empty */
956 	for (i = 0; i < MAX_ZS_CNT; i++) {
957 		softsp->poll_list[i].funcp = NULL;
958 	}
959 
960 	/* Modify the various registers in the FHC now */
961 
962 	/*
963 	 * We know this board to be present now, record that state and
964 	 * remove the NOT_BRD_PRES condition
965 	 */
966 	if (!(softsp->is_central)) {
967 		mutex_enter(&softsp->ctrl_lock);
968 		*(softsp->ctrl) |= FHC_NOT_BRD_PRES;
969 		/* Now flush the hardware store buffers. */
970 		tmp_reg = *(softsp->ctrl);
971 #ifdef lint
972 		tmp_reg = tmp_reg;
973 #endif
974 		/* XXX record the board state in global space */
975 		mutex_exit(&softsp->ctrl_lock);
976 
977 		/* Add kstats for all non-central instances of the FHC. */
978 		fhc_add_kstats(softsp);
979 	}
980 
981 	/*
982 	 * Read the device tree to see if this system is in an environmental
983 	 * chamber.
984 	 */
985 	if (temperature_chamber == -1) {
986 		temperature_chamber = check_for_chamber();
987 	}
988 
989 	/* Check for inherited faults from the PROM. */
990 	if (*softsp->ctrl & FHC_LED_MID) {
991 		reg_fault(softsp->list->sc.board, FT_PROM, FT_BOARD);
992 	}
993 
994 	/*
995 	 * setup the IGR. Shift the board number over by one to get
996 	 * the UPA MID.
997 	 */
998 	*(softsp->igr) = (softsp->list->sc.board) << 1;
999 
1000 	/* Now flush the hardware store buffers. */
1001 	tmp_reg = *(softsp->id);
1002 #ifdef lint
1003 	tmp_reg = tmp_reg;
1004 #endif
1005 
1006 	/* Add the interrupt redistribution callback. */
1007 	intr_dist_add(fhc_intrdist, (void *)softsp->dip);
1008 
1009 	return (DDI_SUCCESS);
1010 bad:
1011 	fhc_unmap_regs(softsp);
1012 	return (DDI_FAILURE);
1013 }
1014 
1015 static uint_t
1016 fhc_intr_wrapper(caddr_t arg)
1017 {
1018 	uint_t intr_return;
1019 	uint_t tmpreg;
1020 	struct fhc_wrapper_arg *intr_info = (struct fhc_wrapper_arg *)arg;
1021 	uint_t (*funcp)(caddr_t, caddr_t) = intr_info->funcp;
1022 	caddr_t iarg1 = intr_info->arg1;
1023 	caddr_t iarg2 = intr_info->arg2;
1024 	dev_info_t *dip = intr_info->child;
1025 
1026 	tmpreg = ISM_IDLE;
1027 
1028 	DTRACE_PROBE4(interrupt__start, dev_info_t, dip,
1029 	    void *, funcp, caddr_t, iarg1, caddr_t, iarg2);
1030 
1031 	intr_return = (*funcp)(iarg1, iarg2);
1032 
1033 	DTRACE_PROBE4(interrupt__complete, dev_info_t, dip,
1034 	    void *, funcp, caddr_t, iarg1, int, intr_return);
1035 
1036 	/* Idle the state machine. */
1037 	*(intr_info->clear_reg) = tmpreg;
1038 
1039 	/* Flush the hardware store buffers. */
1040 	tmpreg = *(intr_info->clear_reg);
1041 #ifdef lint
1042 	tmpreg = tmpreg;
1043 #endif	/* lint */
1044 
1045 	return (intr_return);
1046 }
1047 
1048 /*
1049  * fhc_zs_intr_wrapper
1050  *
1051  * This function handles intrerrupts where more than one device may interupt
1052  * the fhc with the same mondo.
1053  */
1054 
1055 #define	MAX_INTR_CNT 10
1056 
1057 static uint_t
1058 fhc_zs_intr_wrapper(caddr_t arg)
1059 {
1060 	struct fhc_soft_state *softsp = (struct fhc_soft_state *)arg;
1061 	uint_t (*funcp0)(caddr_t, caddr_t);
1062 	uint_t (*funcp1)(caddr_t, caddr_t);
1063 	caddr_t funcp0_arg1, funcp0_arg2, funcp1_arg1, funcp1_arg2;
1064 	uint_t tmp_reg;
1065 	uint_t result = DDI_INTR_UNCLAIMED;
1066 	volatile uint_t *clear_reg;
1067 	uchar_t *spurious_cntr = &softsp->spurious_zs_cntr;
1068 
1069 	funcp0 = softsp->poll_list[0].funcp;
1070 	funcp1 = softsp->poll_list[1].funcp;
1071 	funcp0_arg1 = softsp->poll_list[0].arg1;
1072 	funcp0_arg2 = softsp->poll_list[0].arg2;
1073 	funcp1_arg1 = softsp->poll_list[1].arg1;
1074 	funcp1_arg2 = softsp->poll_list[1].arg2;
1075 	clear_reg = softsp->intr_regs[FHC_UART_INO].clear_reg;
1076 
1077 	if (funcp0 != NULL) {
1078 		if ((funcp0)(funcp0_arg1, funcp0_arg2) == DDI_INTR_CLAIMED) {
1079 			result = DDI_INTR_CLAIMED;
1080 		}
1081 	}
1082 
1083 	if (funcp1 != NULL) {
1084 		if ((funcp1)(funcp1_arg1, funcp1_arg2) == DDI_INTR_CLAIMED) {
1085 			result = DDI_INTR_CLAIMED;
1086 		}
1087 	}
1088 
1089 	if (result == DDI_INTR_UNCLAIMED) {
1090 		(*spurious_cntr)++;
1091 
1092 		if (*spurious_cntr < MAX_INTR_CNT) {
1093 			result = DDI_INTR_CLAIMED;
1094 		} else {
1095 			*spurious_cntr = (uchar_t)0;
1096 		}
1097 	} else {
1098 		*spurious_cntr = (uchar_t)0;
1099 	}
1100 
1101 	/* Idle the state machine. */
1102 	*(clear_reg) = ISM_IDLE;
1103 
1104 	/* flush the store buffers. */
1105 	tmp_reg = *(clear_reg);
1106 #ifdef lint
1107 	tmp_reg = tmp_reg;
1108 #endif
1109 
1110 	return (result);
1111 }
1112 
1113 
1114 /*
1115  * add_intrspec - Add an interrupt specification.
1116  */
1117 static int
1118 fhc_add_intr_impl(dev_info_t *dip, dev_info_t *rdip,
1119     ddi_intr_handle_impl_t *hdlp)
1120 {
1121 	int ino;
1122 	struct fhc_wrapper_arg *fhc_arg;
1123 	struct fhc_soft_state *softsp = (struct fhc_soft_state *)
1124 	    ddi_get_soft_state(fhcp, ddi_get_instance(dip));
1125 	volatile uint_t *mondo_vec_reg;
1126 	uint_t tmp_mondo_vec;
1127 	uint_t tmpreg; /* HW flush reg */
1128 	uint_t cpu_id;
1129 	int ret = DDI_SUCCESS;
1130 
1131 	/* Xlate the interrupt */
1132 	fhc_xlate_intrs(hdlp,
1133 	    (softsp->list->sc.board << BD_IVINTR_SHFT));
1134 
1135 	/* get the mondo number */
1136 	ino = FHC_INO(hdlp->ih_vector);
1137 	mondo_vec_reg = softsp->intr_regs[ino].mapping_reg;
1138 
1139 	ASSERT(ino < FHC_MAX_INO);
1140 
1141 	/* We don't use the two spare interrupts. */
1142 	if (ino >= FHC_MAX_INO) {
1143 		cmn_err(CE_WARN, "fhc%d: Spare interrupt %d not usable",
1144 		    ddi_get_instance(dip), ino);
1145 		return (DDI_FAILURE);
1146 	}
1147 
1148 	/* TOD and Fan Fail interrupts are not usable */
1149 	if (ino == FHC_TOD_INO) {
1150 		cmn_err(CE_WARN, "fhc%d: TOD interrupt not usable",
1151 		    ddi_get_instance(dip));
1152 		return (DDI_FAILURE);
1153 	}
1154 	if (ino == FHC_FANFAIL_INO) {
1155 		cmn_err(CE_WARN, "fhc%d: Fan fail interrupt not usable",
1156 		    ddi_get_instance(dip));
1157 		return (DDI_FAILURE);
1158 	}
1159 
1160 	/*
1161 	 * If the interrupt is for the zs chips, use the vector
1162 	 * polling lists. Otherwise use a straight handler.
1163 	 */
1164 	if (ino == FHC_UART_INO) {
1165 		int32_t zs_inst;
1166 		/* First lock the mutex for this poll_list */
1167 		mutex_enter(&softsp->poll_list_lock);
1168 
1169 		/*
1170 		 * Add this interrupt to the polling list.
1171 		 */
1172 
1173 		/* figure out where to add this item in the list */
1174 		for (zs_inst = 0; zs_inst < MAX_ZS_CNT; zs_inst++) {
1175 			if (softsp->poll_list[zs_inst].funcp == NULL) {
1176 				softsp->poll_list[zs_inst].arg1 =
1177 				    hdlp->ih_cb_arg1;
1178 				softsp->poll_list[zs_inst].arg2 =
1179 				    hdlp->ih_cb_arg2;
1180 				softsp->poll_list[zs_inst].funcp =
1181 				    (ddi_intr_handler_t *)
1182 				    hdlp->ih_cb_func;
1183 				softsp->poll_list[zs_inst].inum =
1184 				    hdlp->ih_inum;
1185 				softsp->poll_list[zs_inst].child = rdip;
1186 
1187 				break;
1188 			}
1189 		}
1190 
1191 		if (zs_inst >= MAX_ZS_CNT) {
1192 			cmn_err(CE_WARN,
1193 			    "fhc%d: poll list overflow",
1194 			    ddi_get_instance(dip));
1195 			mutex_exit(&softsp->poll_list_lock);
1196 			ret = DDI_FAILURE;
1197 			goto done;
1198 		}
1199 
1200 		/*
1201 		 * If polling list is empty, then install handler
1202 		 * and enable interrupts for this ino.
1203 		 */
1204 		if (zs_inst == 0) {
1205 			DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp,
1206 			    (ddi_intr_handler_t *)fhc_zs_intr_wrapper,
1207 			    (caddr_t)softsp, NULL);
1208 
1209 			ret = i_ddi_add_ivintr(hdlp);
1210 
1211 			DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp,
1212 			    softsp->poll_list[zs_inst].funcp,
1213 			    softsp->poll_list[zs_inst].arg1,
1214 			    softsp->poll_list[zs_inst].arg2);
1215 
1216 			if (ret != DDI_SUCCESS)
1217 				goto done;
1218 		}
1219 
1220 		/*
1221 		 * If both zs handlers are active, then this is the
1222 		 * second add_intrspec called, so do not enable
1223 		 * the IMR_VALID bit, it is already on.
1224 		 */
1225 		if (zs_inst > 0) {
1226 				/* now release the mutex and return */
1227 			mutex_exit(&softsp->poll_list_lock);
1228 
1229 			goto done;
1230 		} else {
1231 			/* just release the mutex */
1232 			mutex_exit(&softsp->poll_list_lock);
1233 		}
1234 	} else {	/* normal interrupt installation */
1235 		int32_t i;
1236 
1237 		/* Allocate a nexus interrupt data structure */
1238 		fhc_arg = kmem_alloc(sizeof (struct fhc_wrapper_arg), KM_SLEEP);
1239 		fhc_arg->child = rdip;
1240 		fhc_arg->mapping_reg = mondo_vec_reg;
1241 		fhc_arg->clear_reg = (softsp->intr_regs[ino].clear_reg);
1242 		fhc_arg->softsp = softsp;
1243 		fhc_arg->funcp =
1244 		    (ddi_intr_handler_t *)hdlp->ih_cb_func;
1245 		fhc_arg->arg1 = hdlp->ih_cb_arg1;
1246 		fhc_arg->arg2 = hdlp->ih_cb_arg2;
1247 		fhc_arg->inum = hdlp->ih_inum;
1248 
1249 		for (i = 0; i < FHC_MAX_INO; i++) {
1250 			if (softsp->intr_list[i] == 0) {
1251 				softsp->intr_list[i] = fhc_arg;
1252 				break;
1253 			}
1254 		}
1255 
1256 		/*
1257 		 * Save the fhc_arg in the ispec so we can use this info
1258 		 * later to uninstall this interrupt spec.
1259 		 */
1260 		DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp,
1261 		    (ddi_intr_handler_t *)fhc_intr_wrapper,
1262 		    (caddr_t)fhc_arg, NULL);
1263 
1264 		ret = i_ddi_add_ivintr(hdlp);
1265 
1266 		DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp, fhc_arg->funcp,
1267 		    fhc_arg->arg1, fhc_arg->arg2);
1268 
1269 		if (ret != DDI_SUCCESS)
1270 			goto done;
1271 	}
1272 
1273 	/*
1274 	 * Clear out a stale 'pending' or 'transmit' state in
1275 	 * this device's ISM that might have been left from a
1276 	 * previous session.
1277 	 *
1278 	 * Since all FHC interrupts are level interrupts, any
1279 	 * real interrupting condition will immediately transition
1280 	 * the ISM back to pending.
1281 	 */
1282 	*(softsp->intr_regs[ino].clear_reg) = ISM_IDLE;
1283 
1284 	/*
1285 	 * Program the mondo vector accordingly.  This MUST be the
1286 	 * last thing we do.  Once we program the ino, the device
1287 	 * may begin to interrupt.
1288 	 */
1289 	cpu_id = intr_dist_cpuid();
1290 
1291 	tmp_mondo_vec = cpu_id << INR_PID_SHIFT;
1292 
1293 	/* don't do this for fan because fan has a special control */
1294 	if (ino == FHC_FANFAIL_INO)
1295 		panic("fhc%d: enabling fanfail interrupt",
1296 		    ddi_get_instance(dip));
1297 	else
1298 		tmp_mondo_vec |= IMR_VALID;
1299 
1300 	DPRINTF(FHC_INTERRUPT_DEBUG,
1301 	    ("Mondo 0x%x mapping reg: 0x%p", hdlp->ih_vector, mondo_vec_reg));
1302 
1303 	/* Store it in the hardware reg. */
1304 	*mondo_vec_reg = tmp_mondo_vec;
1305 
1306 	/* Read a FHC register to flush store buffers */
1307 	tmpreg = *(softsp->id);
1308 #ifdef lint
1309 	tmpreg = tmpreg;
1310 #endif
1311 
1312 done:
1313 	return (ret);
1314 }
1315 
1316 /*
1317  * remove_intrspec - Remove an interrupt specification.
1318  */
1319 static void
1320 fhc_remove_intr_impl(dev_info_t *dip, dev_info_t *rdip,
1321 	ddi_intr_handle_impl_t *hdlp)
1322 {
1323 	volatile uint_t *mondo_vec_reg;
1324 	volatile uint_t tmpreg;
1325 	int i;
1326 	struct fhc_soft_state *softsp = (struct fhc_soft_state *)
1327 	    ddi_get_soft_state(fhcp, ddi_get_instance(dip));
1328 	int ino;
1329 
1330 	/* Xlate the interrupt */
1331 	fhc_xlate_intrs(hdlp,
1332 	    (softsp->list->sc.board << BD_IVINTR_SHFT));
1333 
1334 	/* get the mondo number */
1335 	ino = FHC_INO(hdlp->ih_vector);
1336 
1337 	if (ino == FHC_UART_INO) {
1338 		int intr_found = 0;
1339 
1340 		/* Lock the poll_list first */
1341 		mutex_enter(&softsp->poll_list_lock);
1342 
1343 		/*
1344 		 * Find which entry in the poll list belongs to this
1345 		 * intrspec.
1346 		 */
1347 		for (i = 0; i < MAX_ZS_CNT; i++) {
1348 			if (softsp->poll_list[i].child == rdip &&
1349 			    softsp->poll_list[i].inum == hdlp->ih_inum) {
1350 				softsp->poll_list[i].funcp = NULL;
1351 				intr_found++;
1352 			}
1353 		}
1354 
1355 		/* If we did not find an entry, then we have a problem */
1356 		if (!intr_found) {
1357 			cmn_err(CE_WARN, "fhc%d: Intrspec not found in"
1358 			    " poll list", ddi_get_instance(dip));
1359 			mutex_exit(&softsp->poll_list_lock);
1360 			goto done;
1361 		}
1362 
1363 		/*
1364 		 * If we have removed all active entries for the poll
1365 		 * list, then we have to disable interupts at this point.
1366 		 */
1367 		if ((softsp->poll_list[0].funcp == NULL) &&
1368 		    (softsp->poll_list[1].funcp == NULL)) {
1369 			mondo_vec_reg =
1370 			    softsp->intr_regs[FHC_UART_INO].mapping_reg;
1371 			*mondo_vec_reg &= ~IMR_VALID;
1372 
1373 			/* flush the hardware buffers */
1374 			tmpreg = *(softsp->ctrl);
1375 
1376 			/* Eliminate the particular handler from the system. */
1377 			i_ddi_rem_ivintr(hdlp);
1378 		}
1379 
1380 		mutex_exit(&softsp->poll_list_lock);
1381 	} else {
1382 		int32_t i;
1383 
1384 
1385 		for (i = 0; i < FHC_MAX_INO; i++)
1386 			if (softsp->intr_list[i]->child == rdip &&
1387 			    softsp->intr_list[i]->inum == hdlp->ih_inum)
1388 				break;
1389 
1390 		if (i >= FHC_MAX_INO)
1391 			goto done;
1392 
1393 		mondo_vec_reg = softsp->intr_list[i]->mapping_reg;
1394 
1395 		/* Turn off the valid bit in the mapping register. */
1396 		/* XXX what about FHC_FANFAIL owned imr? */
1397 		*mondo_vec_reg &= ~IMR_VALID;
1398 
1399 		/* flush the hardware store buffers */
1400 		tmpreg = *(softsp->id);
1401 #ifdef lint
1402 		tmpreg = tmpreg;
1403 #endif
1404 
1405 		/* Eliminate the particular handler from the system. */
1406 		i_ddi_rem_ivintr(hdlp);
1407 
1408 		kmem_free(softsp->intr_list[i],
1409 		    sizeof (struct fhc_wrapper_arg));
1410 		softsp->intr_list[i] = 0;
1411 	}
1412 
1413 done:
1414 	;
1415 }
1416 
1417 /* new intr_ops structure */
1418 static int
1419 fhc_intr_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t intr_op,
1420     ddi_intr_handle_impl_t *hdlp, void *result)
1421 {
1422 	int	ret = DDI_SUCCESS;
1423 
1424 	switch (intr_op) {
1425 	case DDI_INTROP_GETCAP:
1426 		*(int *)result = DDI_INTR_FLAG_LEVEL;
1427 		break;
1428 	case DDI_INTROP_ALLOC:
1429 		*(int *)result = hdlp->ih_scratch1;
1430 		break;
1431 	case DDI_INTROP_FREE:
1432 		break;
1433 	case DDI_INTROP_GETPRI:
1434 		if (hdlp->ih_pri == 0) {
1435 			struct fhc_soft_state *softsp =
1436 			    (struct fhc_soft_state *)ddi_get_soft_state(fhcp,
1437 			    ddi_get_instance(dip));
1438 
1439 			/* Xlate the interrupt */
1440 			fhc_xlate_intrs(hdlp,
1441 			    (softsp->list->sc.board << BD_IVINTR_SHFT));
1442 		}
1443 
1444 		*(int *)result = hdlp->ih_pri;
1445 		break;
1446 	case DDI_INTROP_SETPRI:
1447 		break;
1448 	case DDI_INTROP_ADDISR:
1449 		ret = fhc_add_intr_impl(dip, rdip, hdlp);
1450 		break;
1451 	case DDI_INTROP_REMISR:
1452 		fhc_remove_intr_impl(dip, rdip, hdlp);
1453 		break;
1454 	case DDI_INTROP_ENABLE:
1455 	case DDI_INTROP_DISABLE:
1456 		break;
1457 	case DDI_INTROP_NINTRS:
1458 	case DDI_INTROP_NAVAIL:
1459 		*(int *)result = i_ddi_get_intx_nintrs(rdip);
1460 		break;
1461 	case DDI_INTROP_SETCAP:
1462 	case DDI_INTROP_SETMASK:
1463 	case DDI_INTROP_CLRMASK:
1464 	case DDI_INTROP_GETPENDING:
1465 		ret = DDI_ENOTSUP;
1466 		break;
1467 	case DDI_INTROP_SUPPORTED_TYPES:
1468 		/* only support fixed interrupts */
1469 		*(int *)result = i_ddi_get_intx_nintrs(rdip) ?
1470 		    DDI_INTR_TYPE_FIXED : 0;
1471 		break;
1472 	default:
1473 		ret = i_ddi_intr_ops(dip, rdip, intr_op, hdlp, result);
1474 		break;
1475 	}
1476 
1477 	return (ret);
1478 }
1479 
1480 /*
1481  * FHC Control Ops routine
1482  *
1483  * Requests handled here:
1484  *	DDI_CTLOPS_INITCHILD	see impl_ddi_sunbus_initchild() for details
1485  *	DDI_CTLOPS_UNINITCHILD	see fhc_uninit_child() for details
1486  *	DDI_CTLOPS_REPORTDEV	TODO - need to implement this.
1487  */
1488 static int
1489 fhc_ctlops(dev_info_t *dip, dev_info_t *rdip,
1490 	ddi_ctl_enum_t op, void *arg, void *result)
1491 {
1492 
1493 	switch (op) {
1494 	case DDI_CTLOPS_INITCHILD:
1495 		DPRINTF(FHC_CTLOPS_DEBUG, ("DDI_CTLOPS_INITCHILD\n"));
1496 		return (impl_ddi_sunbus_initchild((dev_info_t *)arg));
1497 
1498 	case DDI_CTLOPS_UNINITCHILD:
1499 		impl_ddi_sunbus_removechild((dev_info_t *)arg);
1500 		return (DDI_SUCCESS);
1501 
1502 	case DDI_CTLOPS_REPORTDEV:
1503 		/*
1504 		 * TODO - Figure out what makes sense to report here.
1505 		 */
1506 		return (DDI_SUCCESS);
1507 
1508 	case DDI_CTLOPS_POKE:
1509 	case DDI_CTLOPS_PEEK:
1510 		return (fhc_ctlops_peekpoke(op, (peekpoke_ctlops_t *)arg,
1511 		    result));
1512 
1513 	default:
1514 		return (ddi_ctlops(dip, rdip, op, arg, result));
1515 	}
1516 }
1517 
1518 
1519 /*
1520  * We're prepared to claim that the interrupt string is in
1521  * the form of a list of <FHCintr> specifications, or we're dealing
1522  * with on-board devices and we have an interrupt_number property which
1523  * gives us our mondo number.
1524  * Translate the mondos into fhcintrspecs.
1525  */
1526 /* ARGSUSED */
1527 static void
1528 fhc_xlate_intrs(ddi_intr_handle_impl_t *hdlp, uint32_t ign)
1529 
1530 {
1531 	uint32_t mondo;
1532 
1533 	mondo = hdlp->ih_vector;
1534 
1535 	hdlp->ih_vector = (mondo | ign);
1536 	if (hdlp->ih_pri == 0)
1537 		hdlp->ih_pri = fhc_int_priorities[FHC_INO(mondo)];
1538 }
1539 
1540 static int
1541 fhc_ctlops_peekpoke(ddi_ctl_enum_t cmd, peekpoke_ctlops_t *in_args,
1542     void *result)
1543 {
1544 	int err = DDI_SUCCESS;
1545 	on_trap_data_t otd;
1546 
1547 	/* No safe access except for peek/poke is supported. */
1548 	if (in_args->handle != NULL)
1549 		return (DDI_FAILURE);
1550 
1551 	/* Set up protected environment. */
1552 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
1553 		uintptr_t tramp = otd.ot_trampoline;
1554 
1555 		if (cmd == DDI_CTLOPS_POKE) {
1556 			otd.ot_trampoline = (uintptr_t)&poke_fault;
1557 			err = do_poke(in_args->size, (void *)in_args->dev_addr,
1558 			    (void *)in_args->host_addr);
1559 		} else {
1560 			otd.ot_trampoline = (uintptr_t)&peek_fault;
1561 			err = do_peek(in_args->size, (void *)in_args->dev_addr,
1562 			    (void *)in_args->host_addr);
1563 			result = (void *)in_args->host_addr;
1564 		}
1565 		otd.ot_trampoline = tramp;
1566 	} else
1567 		err = DDI_FAILURE;
1568 
1569 	/* Take down protected environment. */
1570 	no_trap();
1571 
1572 	return (err);
1573 }
1574 
1575 /*
1576  * This function initializes the temperature arrays for use. All
1577  * temperatures are set in to invalid value to start.
1578  */
1579 void
1580 init_temp_arrays(struct temp_stats *envstat)
1581 {
1582 	int i;
1583 
1584 	envstat->index = 0;
1585 
1586 	for (i = 0; i < L1_SZ; i++) {
1587 		envstat->l1[i] = NA_TEMP;
1588 	}
1589 
1590 	for (i = 0; i < L2_SZ; i++) {
1591 		envstat->l2[i] = NA_TEMP;
1592 	}
1593 
1594 	for (i = 0; i < L3_SZ; i++) {
1595 		envstat->l3[i] = NA_TEMP;
1596 	}
1597 
1598 	for (i = 0; i < L4_SZ; i++) {
1599 		envstat->l4[i] = NA_TEMP;
1600 	}
1601 
1602 	for (i = 0; i < L5_SZ; i++) {
1603 		envstat->l5[i] = NA_TEMP;
1604 	}
1605 
1606 	envstat->max = NA_TEMP;
1607 	envstat->min = NA_TEMP;
1608 	envstat->trend = TREND_UNKNOWN;
1609 	envstat->version = TEMP_KSTAT_VERSION;
1610 	envstat->override = NA_TEMP;
1611 }
1612 
1613 /* Inhibit warning messages below this temperature, eg for CPU poweron. */
1614 static uint_t fhc_cpu_warning_temp_threshold = FHC_CPU_WARNING_TEMP_THRESHOLD;
1615 
1616 /*
1617  * This function manages the temperature history in the temperature
1618  * statistics buffer passed in. It calls the temperature calibration
1619  * routines and maintains the time averaged temperature data.
1620  */
1621 void
1622 update_temp(dev_info_t *pdip, struct temp_stats *envstat, uchar_t value)
1623 {
1624 	uint_t index;		    /* The absolute temperature counter */
1625 	uint_t tmp_index;	    /* temp index into upper level array */
1626 	int count;		    /* Count of non-zero values in array */
1627 	int total;		    /* sum total of non-zero values in array */
1628 	short real_temp;	    /* calibrated temperature */
1629 	int i;
1630 	struct fhc_soft_state *softsp;
1631 	char buffer[256];	    /* buffer for warning of overtemp */
1632 	enum temp_state temp_state; /* Temperature state */
1633 
1634 	/*
1635 	 * NOTE: This global counter is not protected since we're called
1636 	 * serially for each board.
1637 	 */
1638 	static int shutdown_msg = 0; /* Flag if shutdown warning issued */
1639 
1640 	/* determine soft state pointer of parent */
1641 	softsp = ddi_get_soft_state(fhcp, ddi_get_instance(pdip));
1642 
1643 	envstat->index++;
1644 	index = envstat->index;
1645 
1646 	/*
1647 	 * You need to update the level 5 intervals first, since
1648 	 * they are based on the data from the level 4 intervals,
1649 	 * and so on, down to the level 1 intervals.
1650 	 */
1651 
1652 	/* update the level 5 intervals if it is time */
1653 	if (((tmp_index = L5_INDEX(index)) > 0) && (L5_REM(index) == 0)) {
1654 		/* Generate the index within the level 5 array */
1655 		tmp_index -= 1;		/* decrement by 1 for indexing */
1656 		tmp_index = tmp_index % L5_SZ;
1657 
1658 		/* take an average of the level 4 array */
1659 		for (i = 0, count = 0, total = 0; i < L4_SZ; i++) {
1660 			/* Do not include zero values in average */
1661 			if (envstat->l4[i] != NA_TEMP) {
1662 				total += (int)envstat->l4[i];
1663 				count++;
1664 			}
1665 		}
1666 
1667 		/*
1668 		 * If there were any level 4 data points to average,
1669 		 * do so.
1670 		 */
1671 		if (count != 0) {
1672 			envstat->l5[tmp_index] = total/count;
1673 		} else {
1674 			envstat->l5[tmp_index] = NA_TEMP;
1675 		}
1676 	}
1677 
1678 	/* update the level 4 intervals if it is time */
1679 	if (((tmp_index = L4_INDEX(index)) > 0) && (L4_REM(index) == 0)) {
1680 		/* Generate the index within the level 4 array */
1681 		tmp_index -= 1;		/* decrement by 1 for indexing */
1682 		tmp_index = tmp_index % L4_SZ;
1683 
1684 		/* take an average of the level 3 array */
1685 		for (i = 0, count = 0, total = 0; i < L3_SZ; i++) {
1686 			/* Do not include zero values in average */
1687 			if (envstat->l3[i] != NA_TEMP) {
1688 				total += (int)envstat->l3[i];
1689 				count++;
1690 			}
1691 		}
1692 
1693 		/*
1694 		 * If there were any level 3 data points to average,
1695 		 * do so.
1696 		 */
1697 		if (count != 0) {
1698 			envstat->l4[tmp_index] = total/count;
1699 		} else {
1700 			envstat->l4[tmp_index] = NA_TEMP;
1701 		}
1702 	}
1703 
1704 	/* update the level 3 intervals if it is time */
1705 	if (((tmp_index = L3_INDEX(index)) > 0) && (L3_REM(index) == 0)) {
1706 		/* Generate the index within the level 3 array */
1707 		tmp_index -= 1;		/* decrement by 1 for indexing */
1708 		tmp_index = tmp_index % L3_SZ;
1709 
1710 		/* take an average of the level 2 array */
1711 		for (i = 0, count = 0, total = 0; i < L2_SZ; i++) {
1712 			/* Do not include zero values in average */
1713 			if (envstat->l2[i] != NA_TEMP) {
1714 				total += (int)envstat->l2[i];
1715 				count++;
1716 			}
1717 		}
1718 
1719 		/*
1720 		 * If there were any level 2 data points to average,
1721 		 * do so.
1722 		 */
1723 		if (count != 0) {
1724 			envstat->l3[tmp_index] = total/count;
1725 		} else {
1726 			envstat->l3[tmp_index] = NA_TEMP;
1727 		}
1728 	}
1729 
1730 	/* update the level 2 intervals if it is time */
1731 	if (((tmp_index = L2_INDEX(index)) > 0) && (L2_REM(index) == 0)) {
1732 		/* Generate the index within the level 2 array */
1733 		tmp_index -= 1;		/* decrement by 1 for indexing */
1734 		tmp_index = tmp_index % L2_SZ;
1735 
1736 		/* take an average of the level 1 array */
1737 		for (i = 0, count = 0, total = 0; i < L1_SZ; i++) {
1738 			/* Do not include zero values in average */
1739 			if (envstat->l1[i] != NA_TEMP) {
1740 				total += (int)envstat->l1[i];
1741 				count++;
1742 			}
1743 		}
1744 
1745 		/*
1746 		 * If there were any level 1 data points to average,
1747 		 * do so.
1748 		 */
1749 		if (count != 0) {
1750 			envstat->l2[tmp_index] = total/count;
1751 		} else {
1752 			envstat->l2[tmp_index] = NA_TEMP;
1753 		}
1754 	}
1755 
1756 	/* determine the current temperature in degrees Celcius */
1757 	if (envstat->override != NA_TEMP) {
1758 		/* use override temperature for this board */
1759 		real_temp = envstat->override;
1760 	} else {
1761 		/* Run the calibration function using this board type */
1762 		real_temp = calibrate_temp(softsp->list->sc.type, value,
1763 		    softsp->list->sc.ac_compid);
1764 	}
1765 
1766 	envstat->l1[index % L1_SZ] = real_temp;
1767 
1768 	/* check if the temperature state for this device needs to change */
1769 	temp_state = get_temp_state(softsp->list->sc.type, real_temp,
1770 	    softsp->list->sc.board);
1771 
1772 	/* has the state changed? Then get the board string ready */
1773 	if (temp_state != envstat->state) {
1774 		int board = softsp->list->sc.board;
1775 		enum board_type type = softsp->list->sc.type;
1776 
1777 		build_bd_display_str(buffer, type, board);
1778 
1779 		if (temp_state > envstat->state) {
1780 			if (envstat->state == TEMP_OK) {
1781 				if (type == CLOCK_BOARD) {
1782 					reg_fault(0, FT_OVERTEMP, FT_SYSTEM);
1783 				} else {
1784 					reg_fault(board, FT_OVERTEMP,
1785 					    FT_BOARD);
1786 				}
1787 			}
1788 
1789 			/* heating up, change state now */
1790 			envstat->temp_cnt = 0;
1791 			envstat->state = temp_state;
1792 
1793 			if (temp_state == TEMP_WARN) {
1794 				/* now warn the user of the problem */
1795 				cmn_err(CE_WARN,
1796 				    "%s is warm (temperature: %dC). "
1797 				    "Please check system cooling", buffer,
1798 				    real_temp);
1799 				fhc_bd_update(board, SYSC_EVT_BD_OVERTEMP);
1800 				if (temperature_chamber == -1)
1801 					temperature_chamber =
1802 					    check_for_chamber();
1803 			} else if (temp_state == TEMP_DANGER) {
1804 				cmn_err(CE_WARN,
1805 				    "%s is very hot (temperature: %dC)",
1806 				    buffer, real_temp);
1807 
1808 				envstat->shutdown_cnt = 1;
1809 				if (temperature_chamber == -1)
1810 					temperature_chamber =
1811 					    check_for_chamber();
1812 				if ((temperature_chamber == 0) &&
1813 				    enable_overtemp_powerdown) {
1814 					/*
1815 					 * NOTE: The "%d seconds" is not
1816 					 * necessarily accurate in the case
1817 					 * where we have multiple boards
1818 					 * overheating and subsequently cooling
1819 					 * down.
1820 					 */
1821 					if (shutdown_msg == 0) {
1822 						cmn_err(CE_WARN, "System "
1823 						    "shutdown scheduled "
1824 						    "in %d seconds due to "
1825 						    "over-temperature "
1826 						    "condition on %s",
1827 						    SHUTDOWN_TIMEOUT_SEC,
1828 						    buffer);
1829 					}
1830 					shutdown_msg++;
1831 				}
1832 			}
1833 
1834 			/*
1835 			 * If this is a cpu board, power them off.
1836 			 */
1837 			if (temperature_chamber == 0) {
1838 				mutex_enter(&cpu_lock);
1839 				(void) fhc_board_poweroffcpus(board, NULL,
1840 				    CPU_FORCED);
1841 				mutex_exit(&cpu_lock);
1842 			}
1843 		} else if (temp_state < envstat->state) {
1844 			/*
1845 			 * Avert the sigpower that would
1846 			 * otherwise be sent to init.
1847 			 */
1848 			envstat->shutdown_cnt = 0;
1849 
1850 			/* cooling down, use state counter */
1851 			if (envstat->temp_cnt == 0) {
1852 				envstat->temp_cnt = TEMP_STATE_COUNT;
1853 			} else if (--envstat->temp_cnt == 0) {
1854 				if (temp_state == TEMP_WARN) {
1855 					cmn_err(CE_NOTE,
1856 					    "%s is cooling "
1857 					    "(temperature: %dC)", buffer,
1858 					    real_temp);
1859 
1860 				} else if (temp_state == TEMP_OK) {
1861 					cmn_err(CE_NOTE,
1862 					    "%s has cooled down "
1863 					    "(temperature: %dC), system OK",
1864 					    buffer, real_temp);
1865 
1866 					if (type == CLOCK_BOARD) {
1867 						clear_fault(0, FT_OVERTEMP,
1868 						    FT_SYSTEM);
1869 					} else {
1870 						clear_fault(board, FT_OVERTEMP,
1871 						    FT_BOARD);
1872 					}
1873 				}
1874 
1875 				/*
1876 				 * If we just came out of TEMP_DANGER, and
1877 				 * a warning was issued about shutting down,
1878 				 * let the user know it's been cancelled
1879 				 */
1880 				if (envstat->state == TEMP_DANGER &&
1881 				    (temperature_chamber == 0) &&
1882 				    enable_overtemp_powerdown &&
1883 				    (powerdown_started == 0) &&
1884 				    (--shutdown_msg == 0)) {
1885 					cmn_err(CE_NOTE, "System "
1886 					    "shutdown due to over-"
1887 					    "temperature "
1888 					    "condition cancelled");
1889 				}
1890 				envstat->state = temp_state;
1891 
1892 				fhc_bd_update(board, SYSC_EVT_BD_TEMP_OK);
1893 			}
1894 		}
1895 	} else {
1896 		envstat->temp_cnt = 0;
1897 
1898 		if (temp_state == TEMP_DANGER) {
1899 			if (temperature_chamber == -1) {
1900 				temperature_chamber = check_for_chamber();
1901 			}
1902 
1903 			if ((envstat->shutdown_cnt++ >= SHUTDOWN_COUNT) &&
1904 			    (temperature_chamber == 0) &&
1905 			    enable_overtemp_powerdown &&
1906 			    (powerdown_started == 0)) {
1907 				powerdown_started = 1;
1908 
1909 				/* the system is still too hot */
1910 				build_bd_display_str(buffer,
1911 				    softsp->list->sc.type,
1912 				    softsp->list->sc.board);
1913 
1914 				cmn_err(CE_WARN, "%s still too hot "
1915 				    "(temperature: %dC)."
1916 				    " Overtemp shutdown started", buffer,
1917 				    real_temp);
1918 
1919 				fhc_reboot();
1920 			}
1921 		}
1922 	}
1923 
1924 	/* update the maximum and minimum temperatures if necessary */
1925 	if ((envstat->max == NA_TEMP) || (real_temp > envstat->max)) {
1926 		envstat->max = real_temp;
1927 	}
1928 
1929 	if ((envstat->min == NA_TEMP) || (real_temp < envstat->min)) {
1930 		envstat->min = real_temp;
1931 	}
1932 
1933 	/*
1934 	 * Update the temperature trend.  Currently, the temperature
1935 	 * trend algorithm is based on the level 2 stats.  So, we
1936 	 * only need to run every time the level 2 stats get updated.
1937 	 */
1938 	if (((tmp_index = L2_INDEX(index)) > 0) && (L2_REM(index) == 0))  {
1939 		enum board_type type = softsp->list->sc.type;
1940 
1941 		envstat->trend = temp_trend(envstat);
1942 
1943 		/* Issue a warning if the temperature is rising rapidly. */
1944 		/* For CPU boards, don't warn if CPUs just powered on. */
1945 		if (envstat->trend == TREND_RAPID_RISE &&
1946 		    (type != CPU_BOARD || real_temp >
1947 		    fhc_cpu_warning_temp_threshold))  {
1948 			int board = softsp->list->sc.board;
1949 
1950 			build_bd_display_str(buffer, type, board);
1951 			cmn_err(CE_WARN, "%s temperature is rising rapidly!  "
1952 			    "Current temperature is %dC", buffer,
1953 			    real_temp);
1954 		}
1955 	}
1956 }
1957 
1958 #define	PREV_L2_INDEX(x)    ((x) ? ((x) - 1) : (L2_SZ - 1))
1959 
1960 /*
1961  * This routine determines if the temp of the device passed in is heating
1962  * up, cooling down, or staying stable.
1963  */
1964 enum temp_trend
1965 temp_trend(struct temp_stats *tempstat)
1966 {
1967 	int		ii;
1968 	uint_t		curr_index;
1969 	int		curr_temp;
1970 	uint_t		prev_index;
1971 	int		prev_temp;
1972 	int		trail_temp;
1973 	int		delta;
1974 	int		read_cnt;
1975 	enum temp_trend	result = TREND_STABLE;
1976 
1977 	if (tempstat == NULL)
1978 		return (TREND_UNKNOWN);
1979 
1980 	curr_index = (L2_INDEX(tempstat->index) - 1) % L2_SZ;
1981 	curr_temp = tempstat->l2[curr_index];
1982 
1983 	/* Count how many temperature readings are available */
1984 	prev_index = curr_index;
1985 	for (read_cnt = 0; read_cnt < L2_SZ - 1; read_cnt++) {
1986 		if (tempstat->l2[prev_index] == NA_TEMP)
1987 			break;
1988 		prev_index = PREV_L2_INDEX(prev_index);
1989 	}
1990 
1991 	switch (read_cnt) {
1992 	case 0:
1993 	case 1:
1994 		result = TREND_UNKNOWN;
1995 		break;
1996 
1997 	default:
1998 		delta = curr_temp - tempstat->l2[PREV_L2_INDEX(curr_index)];
1999 		prev_index = curr_index;
2000 		trail_temp = prev_temp = curr_temp;
2001 		if (delta >= RAPID_RISE_THRESH) {	    /* rapid rise? */
2002 			result = TREND_RAPID_RISE;
2003 		} else if (delta > 0) {			    /* rise? */
2004 			for (ii = 1; ii < read_cnt; ii++) {
2005 				prev_index = PREV_L2_INDEX(prev_index);
2006 				prev_temp = tempstat->l2[prev_index];
2007 				if (prev_temp > trail_temp) {
2008 					break;
2009 				}
2010 				trail_temp = prev_temp;
2011 				if (prev_temp <= curr_temp - NOISE_THRESH) {
2012 					result = TREND_RISE;
2013 					break;
2014 				}
2015 			}
2016 		} else if (delta <= -RAPID_FALL_THRESH) {   /* rapid fall? */
2017 			result = TREND_RAPID_FALL;
2018 		} else if (delta < 0) {			    /* fall? */
2019 			for (ii = 1; ii < read_cnt; ii++) {
2020 				prev_index = PREV_L2_INDEX(prev_index);
2021 				prev_temp = tempstat->l2[prev_index];
2022 				if (prev_temp < trail_temp) {
2023 					break;
2024 				}
2025 				trail_temp = prev_temp;
2026 				if (prev_temp >= curr_temp + NOISE_THRESH) {
2027 					result = TREND_FALL;
2028 					break;
2029 				}
2030 			}
2031 		}
2032 	}
2033 	return (result);
2034 }
2035 
2036 /*
2037  * Reboot the system if we can, otherwise attempt a power down
2038  */
2039 void
2040 fhc_reboot(void)
2041 {
2042 	proc_t *initpp;
2043 
2044 	/* send a SIGPWR to init process */
2045 	mutex_enter(&pidlock);
2046 	initpp = prfind(P_INITPID);
2047 	mutex_exit(&pidlock);
2048 
2049 	/*
2050 	 * If we're still booting and init(1) isn't
2051 	 * set up yet, simply halt.
2052 	 */
2053 	if (initpp != NULL) {
2054 		psignal(initpp, SIGFPE);	/* init 6 */
2055 	} else {
2056 		power_down("Environmental Shutdown");
2057 		halt("Power off the System");
2058 	}
2059 }
2060 
2061 int
2062 overtemp_kstat_update(kstat_t *ksp, int rw)
2063 {
2064 	struct temp_stats *tempstat;
2065 	char *kstatp;
2066 	int i;
2067 
2068 	kstatp = (char *)ksp->ks_data;
2069 	tempstat = (struct temp_stats *)ksp->ks_private;
2070 
2071 	/*
2072 	 * Kstat reads are used to retrieve the current system temperature
2073 	 * history. Kstat writes are used to reset the max and min
2074 	 * temperatures.
2075 	 */
2076 	if (rw == KSTAT_WRITE) {
2077 		short max;	/* temporary copy of max temperature */
2078 		short min;	/* temporary copy of min temperature */
2079 
2080 		/*
2081 		 * search for and reset the max and min to the current
2082 		 * array contents. Old max and min values will get
2083 		 * averaged out as they move into the higher level arrays.
2084 		 */
2085 		max = tempstat->l1[0];
2086 		min = tempstat->l1[0];
2087 
2088 		/* Pull the max and min from Level 1 array */
2089 		for (i = 0; i < L1_SZ; i++) {
2090 			if ((tempstat->l1[i] != NA_TEMP) &&
2091 			    (tempstat->l1[i] > max)) {
2092 				max = tempstat->l1[i];
2093 			}
2094 
2095 			if ((tempstat->l1[i] != NA_TEMP) &&
2096 			    (tempstat->l1[i] < min)) {
2097 				min = tempstat->l1[i];
2098 			}
2099 		}
2100 
2101 		/* Pull the max and min from Level 2 array */
2102 		for (i = 0; i < L2_SZ; i++) {
2103 			if ((tempstat->l2[i] != NA_TEMP) &&
2104 			    (tempstat->l2[i] > max)) {
2105 				max = tempstat->l2[i];
2106 			}
2107 
2108 			if ((tempstat->l2[i] != NA_TEMP) &&
2109 			    (tempstat->l2[i] < min)) {
2110 				min = tempstat->l2[i];
2111 			}
2112 		}
2113 
2114 		/* Pull the max and min from Level 3 array */
2115 		for (i = 0; i < L3_SZ; i++) {
2116 			if ((tempstat->l3[i] != NA_TEMP) &&
2117 			    (tempstat->l3[i] > max)) {
2118 				max = tempstat->l3[i];
2119 			}
2120 
2121 			if ((tempstat->l3[i] != NA_TEMP) &&
2122 			    (tempstat->l3[i] < min)) {
2123 				min = tempstat->l3[i];
2124 			}
2125 		}
2126 
2127 		/* Pull the max and min from Level 4 array */
2128 		for (i = 0; i < L4_SZ; i++) {
2129 			if ((tempstat->l4[i] != NA_TEMP) &&
2130 			    (tempstat->l4[i] > max)) {
2131 				max = tempstat->l4[i];
2132 			}
2133 
2134 			if ((tempstat->l4[i] != NA_TEMP) &&
2135 			    (tempstat->l4[i] < min)) {
2136 				min = tempstat->l4[i];
2137 			}
2138 		}
2139 
2140 		/* Pull the max and min from Level 5 array */
2141 		for (i = 0; i < L5_SZ; i++) {
2142 			if ((tempstat->l5[i] != NA_TEMP) &&
2143 			    (tempstat->l5[i] > max)) {
2144 				max = tempstat->l5[i];
2145 			}
2146 
2147 			if ((tempstat->l5[i] != NA_TEMP) &&
2148 			    (tempstat->l5[i] < min)) {
2149 				min = tempstat->l5[i];
2150 			}
2151 		}
2152 	} else {
2153 		/*
2154 		 * copy the temperature history buffer into the
2155 		 * kstat structure.
2156 		 */
2157 		bcopy(tempstat, kstatp, sizeof (struct temp_stats));
2158 	}
2159 	return (0);
2160 }
2161 
2162 int
2163 temp_override_kstat_update(kstat_t *ksp, int rw)
2164 {
2165 	short *over;
2166 	short *kstatp;
2167 
2168 	kstatp = (short *)ksp->ks_data;
2169 	over = (short *)ksp->ks_private;
2170 
2171 	/*
2172 	 * Kstat reads are used to get the temperature override setting.
2173 	 * Kstat writes are used to set the temperature override setting.
2174 	 */
2175 	if (rw == KSTAT_WRITE) {
2176 		*over = *kstatp;
2177 	} else {
2178 		*kstatp = *over;
2179 	}
2180 	return (0);
2181 }
2182 
2183 /*
2184  * This function uses the calibration tables at the beginning of this file
2185  * to lookup the actual temperature of the thermistor in degrees Celcius.
2186  * If the measurement is out of the bounds of the acceptable values, the
2187  * closest boundary value is used instead.
2188  */
2189 static short
2190 calibrate_temp(enum board_type type, uchar_t temp, uint_t ac_comp)
2191 {
2192 	short result = NA_TEMP;
2193 
2194 	if (dont_calibrate == 1) {
2195 		return ((short)temp);
2196 	}
2197 
2198 	switch (type) {
2199 	case CPU_BOARD:
2200 		/*
2201 		 * If AC chip revision is >= 4 or if it is unitialized,
2202 		 * then use the new calibration tables.
2203 		 */
2204 		if ((CHIP_REV(ac_comp) >= 4) || (CHIP_REV(ac_comp) == 0)) {
2205 			if (temp >= CPU2_MX_CNT) {
2206 				result = cpu2_table[CPU2_MX_CNT-1];
2207 			} else {
2208 				result = cpu2_table[temp];
2209 			}
2210 		} else {
2211 			if (temp >= CPU_MX_CNT) {
2212 				result = cpu_table[CPU_MX_CNT-1];
2213 			} else {
2214 				result = cpu_table[temp];
2215 			}
2216 		}
2217 		break;
2218 
2219 	case IO_2SBUS_BOARD:
2220 	case IO_SBUS_FFB_BOARD:
2221 	case IO_PCI_BOARD:
2222 	case IO_2SBUS_SOCPLUS_BOARD:
2223 	case IO_SBUS_FFB_SOCPLUS_BOARD:
2224 		if (temp < IO_MN_CNT) {
2225 			result = io_table[IO_MN_CNT];
2226 		} else if (temp >= IO_MX_CNT) {
2227 			result = io_table[IO_MX_CNT-1];
2228 		} else {
2229 			result = io_table[temp];
2230 		}
2231 		break;
2232 
2233 	case CLOCK_BOARD:
2234 		if (temp < CLK_MN_CNT) {
2235 			result = clock_table[CLK_MN_CNT];
2236 		} else if (temp >= CLK_MX_CNT) {
2237 			result = clock_table[CLK_MX_CNT-1];
2238 		} else {
2239 			result = clock_table[temp];
2240 		}
2241 		break;
2242 
2243 	default:
2244 		break;
2245 	}
2246 
2247 	return (result);
2248 }
2249 
2250 /*
2251  * Determine the temperature state of this board based on its type and
2252  * the actual temperature in degrees Celcius.
2253  */
2254 static enum temp_state
2255 get_temp_state(enum board_type type, short temp, int board)
2256 {
2257 	enum temp_state state = TEMP_OK;
2258 	short warn_limit;
2259 	short danger_limit;
2260 	struct cpu *cpa, *cpb;
2261 
2262 	switch (type) {
2263 	case CPU_BOARD:
2264 		warn_limit = cpu_warn_temp;
2265 		danger_limit = cpu_danger_temp;
2266 
2267 		/*
2268 		 * For CPU boards with frequency >= 400 MHZ,
2269 		 * temperature zones are different.
2270 		 */
2271 
2272 		mutex_enter(&cpu_lock);
2273 
2274 		if ((cpa = cpu_get(FHC_BOARD2CPU_A(board))) != NULL) {
2275 			if ((cpa->cpu_type_info.pi_clock) >= 400) {
2276 				warn_limit = cpu_warn_temp_4x;
2277 				danger_limit = cpu_danger_temp_4x;
2278 			}
2279 		}
2280 		if ((cpb = cpu_get(FHC_BOARD2CPU_B(board))) != NULL) {
2281 			if ((cpb->cpu_type_info.pi_clock) >= 400) {
2282 				warn_limit = cpu_warn_temp_4x;
2283 				danger_limit = cpu_danger_temp_4x;
2284 			}
2285 		}
2286 
2287 		mutex_exit(&cpu_lock);
2288 
2289 		break;
2290 
2291 	case IO_2SBUS_BOARD:
2292 	case IO_SBUS_FFB_BOARD:
2293 	case IO_PCI_BOARD:
2294 	case IO_2SBUS_SOCPLUS_BOARD:
2295 	case IO_SBUS_FFB_SOCPLUS_BOARD:
2296 		warn_limit = io_warn_temp;
2297 		danger_limit = io_danger_temp;
2298 		break;
2299 
2300 	case CLOCK_BOARD:
2301 		warn_limit = clk_warn_temp;
2302 		danger_limit = clk_danger_temp;
2303 		break;
2304 
2305 	case UNINIT_BOARD:
2306 	case UNKNOWN_BOARD:
2307 	case MEM_BOARD:
2308 	default:
2309 		warn_limit = dft_warn_temp;
2310 		danger_limit = dft_danger_temp;
2311 		break;
2312 	}
2313 
2314 	if (temp >= danger_limit) {
2315 		state = TEMP_DANGER;
2316 	} else if (temp >= warn_limit) {
2317 		state = TEMP_WARN;
2318 	}
2319 
2320 	return (state);
2321 }
2322 
2323 static void
2324 fhc_add_kstats(struct fhc_soft_state *softsp)
2325 {
2326 	struct kstat *fhc_ksp;
2327 	struct fhc_kstat *fhc_named_ksp;
2328 
2329 	if ((fhc_ksp = kstat_create("unix", softsp->list->sc.board,
2330 	    FHC_KSTAT_NAME, "misc", KSTAT_TYPE_NAMED,
2331 	    sizeof (struct fhc_kstat) / sizeof (kstat_named_t),
2332 	    KSTAT_FLAG_PERSISTENT)) == NULL) {
2333 		cmn_err(CE_WARN, "fhc%d kstat_create failed",
2334 		    ddi_get_instance(softsp->dip));
2335 		return;
2336 	}
2337 
2338 	fhc_named_ksp = (struct fhc_kstat *)(fhc_ksp->ks_data);
2339 
2340 	/* initialize the named kstats */
2341 	kstat_named_init(&fhc_named_ksp->csr,
2342 	    CSR_KSTAT_NAMED,
2343 	    KSTAT_DATA_UINT32);
2344 
2345 	kstat_named_init(&fhc_named_ksp->bsr,
2346 	    BSR_KSTAT_NAMED,
2347 	    KSTAT_DATA_UINT32);
2348 
2349 	fhc_ksp->ks_update = fhc_kstat_update;
2350 	fhc_ksp->ks_private = (void *)softsp;
2351 	softsp->fhc_ksp = fhc_ksp;
2352 	kstat_install(fhc_ksp);
2353 }
2354 
2355 static int
2356 fhc_kstat_update(kstat_t *ksp, int rw)
2357 {
2358 	struct fhc_kstat *fhcksp;
2359 	struct fhc_soft_state *softsp;
2360 
2361 	fhcksp = (struct fhc_kstat *)ksp->ks_data;
2362 	softsp = (struct fhc_soft_state *)ksp->ks_private;
2363 
2364 	/* this is a read-only kstat. Bail out on a write */
2365 	if (rw == KSTAT_WRITE) {
2366 		return (EACCES);
2367 	} else {
2368 		/*
2369 		 * copy the current state of the hardware into the
2370 		 * kstat structure.
2371 		 */
2372 		fhcksp->csr.value.ui32 = *softsp->ctrl;
2373 		fhcksp->bsr.value.ui32 = *softsp->bsr;
2374 	}
2375 	return (0);
2376 }
2377 
2378 static int
2379 cpu_on_board(int board)
2380 {
2381 	int upa_a = board << 1;
2382 	int upa_b = (board << 1) + 1;
2383 
2384 	if ((cpunodes[upa_a].nodeid != NULL) ||
2385 	    (cpunodes[upa_b].nodeid != NULL)) {
2386 		return (1);
2387 	} else {
2388 		return (0);
2389 	}
2390 }
2391 
2392 /*
2393  * This function uses the board list and toggles the OS green board
2394  * LED. The mask input tells which bit fields are being modified,
2395  * and the value input tells the states of the bits.
2396  */
2397 void
2398 update_board_leds(fhc_bd_t *board, uint_t mask, uint_t value)
2399 {
2400 	volatile uint_t temp;
2401 
2402 	ASSERT(fhc_bdlist_locked());
2403 
2404 	/* mask off mask and value for only the LED bits */
2405 	mask &= (FHC_LED_LEFT|FHC_LED_MID|FHC_LED_RIGHT);
2406 	value &= (FHC_LED_LEFT|FHC_LED_MID|FHC_LED_RIGHT);
2407 
2408 	if (board != NULL) {
2409 		mutex_enter(&board->softsp->ctrl_lock);
2410 
2411 		/* read the current register state */
2412 		temp = *board->softsp->ctrl;
2413 
2414 		/*
2415 		 * The EPDA bits are special since the register is
2416 		 * special.  We don't want to set them, since setting
2417 		 * the bits on a shutdown cpu keeps the cpu permanently
2418 		 * powered off.  Also, the CSR_SYNC bit must always be
2419 		 * set to 0 as it is an OBP semaphore that is expected to
2420 		 * be clear for cpu restart.
2421 		 */
2422 		temp &= ~(FHC_CSR_SYNC | FHC_EPDA_OFF | FHC_EPDB_OFF);
2423 
2424 		/* mask off the bits to change */
2425 		temp &= ~mask;
2426 
2427 		/* or in the new values of the bits. */
2428 		temp |= value;
2429 
2430 		/* update the register */
2431 		*board->softsp->ctrl = temp;
2432 
2433 		/* flush the hardware registers */
2434 		temp = *board->softsp->ctrl;
2435 #ifdef lint
2436 		temp = temp;
2437 #endif
2438 
2439 		mutex_exit(&board->softsp->ctrl_lock);
2440 	}
2441 }
2442 
2443 static int
2444 check_for_chamber(void)
2445 {
2446 	int chamber = 0;
2447 	dev_info_t *options_dip;
2448 	pnode_t options_node_id;
2449 	int mfgmode_len;
2450 	int retval;
2451 	char *mfgmode;
2452 
2453 	/*
2454 	 * The operator can disable overtemp powerdown from /etc/system or
2455 	 * boot -h.
2456 	 */
2457 	if (!enable_overtemp_powerdown) {
2458 		cmn_err(CE_WARN, "Operator has disabled overtemp powerdown");
2459 		return (1);
2460 	}
2461 
2462 	/*
2463 	 * An OBP option, 'mfg-mode' is being used to inform us as to
2464 	 * whether we are in an enviromental chamber. It exists in
2465 	 * the 'options' node. This is where all OBP 'setenv' (eeprom)
2466 	 * parameters live.
2467 	 */
2468 	if ((options_dip = ddi_find_devinfo("options", -1, 0)) != NULL) {
2469 		options_node_id = (pnode_t)ddi_get_nodeid(options_dip);
2470 		mfgmode_len = prom_getproplen(options_node_id, "mfg-mode");
2471 		if (mfgmode_len == -1) {
2472 			return (chamber);
2473 		}
2474 		mfgmode = kmem_alloc(mfgmode_len+1, KM_SLEEP);
2475 
2476 		retval = prom_getprop(options_node_id, "mfg-mode", mfgmode);
2477 		if (retval != -1) {
2478 			mfgmode[retval] = 0;
2479 			if (strcmp(mfgmode, CHAMBER_VALUE) == 0) {
2480 				chamber = 1;
2481 				cmn_err(CE_WARN, "System in Temperature"
2482 				    " Chamber Mode. Overtemperature"
2483 				    " Shutdown disabled");
2484 			}
2485 		}
2486 		kmem_free(mfgmode, mfgmode_len+1);
2487 	}
2488 	return (chamber);
2489 }
2490 
2491 static void
2492 build_bd_display_str(char *buffer, enum board_type type, int board)
2493 {
2494 	if (buffer == NULL) {
2495 		return;
2496 	}
2497 
2498 	/* fill in board type to display */
2499 	switch (type) {
2500 	case UNINIT_BOARD:
2501 		(void) sprintf(buffer, "Uninitialized Board type board %d",
2502 		    board);
2503 		break;
2504 
2505 	case UNKNOWN_BOARD:
2506 		(void) sprintf(buffer, "Unknown Board type board %d", board);
2507 		break;
2508 
2509 	case CPU_BOARD:
2510 	case MEM_BOARD:
2511 		(void) sprintf(buffer, "CPU/Memory board %d", board);
2512 		break;
2513 
2514 	case IO_2SBUS_BOARD:
2515 		(void) sprintf(buffer, "2 SBus IO board %d", board);
2516 		break;
2517 
2518 	case IO_SBUS_FFB_BOARD:
2519 		(void) sprintf(buffer, "SBus FFB IO board %d", board);
2520 		break;
2521 
2522 	case IO_PCI_BOARD:
2523 		(void) sprintf(buffer, "PCI IO board %d", board);
2524 		break;
2525 
2526 	case CLOCK_BOARD:
2527 		(void) sprintf(buffer, "Clock board");
2528 		break;
2529 
2530 	case IO_2SBUS_SOCPLUS_BOARD:
2531 		(void) sprintf(buffer, "2 SBus SOC+ IO board %d", board);
2532 		break;
2533 
2534 	case IO_SBUS_FFB_SOCPLUS_BOARD:
2535 		(void) sprintf(buffer, "SBus FFB SOC+ IO board %d", board);
2536 		break;
2537 
2538 	default:
2539 		(void) sprintf(buffer, "Unrecognized board type board %d",
2540 		    board);
2541 		break;
2542 	}
2543 }
2544 
2545 void
2546 fhc_intrdist(void *arg)
2547 {
2548 	struct fhc_soft_state *softsp;
2549 	dev_info_t *dip = (dev_info_t *)arg;
2550 	volatile uint_t *mondo_vec_reg;
2551 	volatile uint_t *intr_state_reg;
2552 	uint_t mondo_vec;
2553 	uint_t tmp_reg;
2554 	uint_t cpu_id;
2555 	uint_t i;
2556 
2557 	/* extract the soft state pointer */
2558 	softsp = ddi_get_soft_state(fhcp, ddi_get_instance(dip));
2559 
2560 	/*
2561 	 * Loop through all the interrupt mapping registers and reprogram
2562 	 * the target CPU for all valid registers.
2563 	 */
2564 	for (i = 0; i < FHC_MAX_INO; i++) {
2565 		mondo_vec_reg = softsp->intr_regs[i].mapping_reg;
2566 		intr_state_reg = softsp->intr_regs[i].clear_reg;
2567 
2568 		if ((*mondo_vec_reg & IMR_VALID) == 0)
2569 			continue;
2570 
2571 		cpu_id = intr_dist_cpuid();
2572 
2573 		/* Check the current target of the mondo */
2574 		if (((*mondo_vec_reg & INR_PID_MASK) >> INR_PID_SHIFT) ==
2575 		    cpu_id) {
2576 			/* It is the same, don't reprogram */
2577 			return;
2578 		}
2579 
2580 		/* So it's OK to reprogram the CPU target */
2581 
2582 		/* turn off the valid bit */
2583 		*mondo_vec_reg &= ~IMR_VALID;
2584 
2585 		/* flush the hardware registers */
2586 		tmp_reg = *softsp->id;
2587 
2588 		/*
2589 		 * wait for the state machine to idle. Do not loop on panic, so
2590 		 * that system does not hang.
2591 		 */
2592 		while (((*intr_state_reg & INT_PENDING) == INT_PENDING) &&
2593 		    !panicstr)
2594 			;
2595 
2596 		/* re-target the mondo and turn it on */
2597 		mondo_vec = (cpu_id << INR_PID_SHIFT) | IMR_VALID;
2598 
2599 		/* write it back to the hardware. */
2600 		*mondo_vec_reg = mondo_vec;
2601 
2602 		/* flush the hardware buffers. */
2603 		tmp_reg = *(softsp->id);
2604 
2605 #ifdef	lint
2606 		tmp_reg = tmp_reg;
2607 #endif	/* lint */
2608 	}
2609 }
2610 
2611 /*
2612  * reg_fault
2613  *
2614  * This routine registers a fault in the fault list. If the fault
2615  * is unique (does not exist in fault list) then a new fault is
2616  * added to the fault list, with the appropriate structure elements
2617  * filled in.
2618  */
2619 void
2620 reg_fault(int unit, enum ft_type type, enum ft_class fclass)
2621 {
2622 	struct ft_link_list *list;	/* temporary list pointer */
2623 
2624 	if (type >= ft_max_index) {
2625 		cmn_err(CE_WARN, "Illegal Fault type %x", type);
2626 		return;
2627 	}
2628 
2629 	mutex_enter(&ftlist_mutex);
2630 
2631 	/* Search for the requested fault. If it already exists, return. */
2632 	for (list = ft_list; list != NULL; list = list->next) {
2633 		if ((list->f.unit == unit) && (list->f.type == type) &&
2634 		    (list->f.fclass == fclass)) {
2635 			mutex_exit(&ftlist_mutex);
2636 			return;
2637 		}
2638 	}
2639 
2640 	/* Allocate a new fault structure. */
2641 	list = kmem_zalloc(sizeof (struct ft_link_list), KM_SLEEP);
2642 
2643 	/* fill in the fault list elements */
2644 	list->f.unit = unit;
2645 	list->f.type = type;
2646 	list->f.fclass = fclass;
2647 	list->f.create_time = (time32_t)gethrestime_sec(); /* XX64 */
2648 	(void) strncpy(list->f.msg, ft_str_table[type], MAX_FT_DESC);
2649 
2650 	/* link it into the list. */
2651 	list->next = ft_list;
2652 	ft_list = list;
2653 
2654 	/* Update the total fault count */
2655 	ft_nfaults++;
2656 
2657 	mutex_exit(&ftlist_mutex);
2658 }
2659 
2660 /*
2661  * clear_fault
2662  *
2663  * This routine finds the fault list entry specified by the caller,
2664  * deletes it from the fault list, and frees up the memory used for
2665  * the entry. If the requested fault is not found, it exits silently.
2666  */
2667 void
2668 clear_fault(int unit, enum ft_type type, enum ft_class fclass)
2669 {
2670 	struct ft_link_list *list;		/* temporary list pointer */
2671 	struct ft_link_list **vect;
2672 
2673 	mutex_enter(&ftlist_mutex);
2674 
2675 	list = ft_list;
2676 	vect = &ft_list;
2677 
2678 	/*
2679 	 * Search for the requested fault. If it exists, delete it
2680 	 * and relink the fault list.
2681 	 */
2682 	for (; list != NULL; vect = &list->next, list = list->next) {
2683 		if ((list->f.unit == unit) && (list->f.type == type) &&
2684 		    (list->f.fclass == fclass)) {
2685 			/* remove the item from the list */
2686 			*vect = list->next;
2687 
2688 			/* free the memory allocated */
2689 			kmem_free(list, sizeof (struct ft_link_list));
2690 
2691 			/* Update the total fault count */
2692 			ft_nfaults--;
2693 			break;
2694 		}
2695 	}
2696 	mutex_exit(&ftlist_mutex);
2697 }
2698 
2699 /*
2700  * process_fault_list
2701  *
2702  * This routine walks the global fault list and updates the board list
2703  * with the current status of each Yellow LED. If any faults are found
2704  * in the system, then a non-zero value is returned. Else zero is returned.
2705  */
2706 int
2707 process_fault_list(void)
2708 {
2709 	int fault = 0;
2710 	struct ft_link_list *ftlist;	/* fault list pointer */
2711 	fhc_bd_t *bdlist;		/* board list pointer */
2712 
2713 	/*
2714 	 * Note on locking. The bdlist mutex is always acquired and
2715 	 * held around the ftlist mutex when both are needed for an
2716 	 * operation. This is to avoid deadlock.
2717 	 */
2718 
2719 	/* First lock the board list */
2720 	(void) fhc_bdlist_lock(-1);
2721 
2722 	/* Grab the fault list lock first */
2723 	mutex_enter(&ftlist_mutex);
2724 
2725 	/* clear the board list of all faults first */
2726 	for (bdlist = fhc_bd_first(); bdlist; bdlist = fhc_bd_next(bdlist))
2727 		bdlist->fault = 0;
2728 
2729 	/* walk the fault list here */
2730 	for (ftlist = ft_list; ftlist != NULL; ftlist = ftlist->next) {
2731 		fault++;
2732 
2733 		/*
2734 		 * If this is a board level fault, find the board, The
2735 		 * unit number for all board class faults must be the
2736 		 * actual board number. The caller of reg_fault must
2737 		 * ensure this for FT_BOARD class faults.
2738 		 */
2739 		if (ftlist->f.fclass == FT_BOARD) {
2740 			/* Sanity check the board first */
2741 			if (fhc_bd_valid(ftlist->f.unit)) {
2742 				bdlist = fhc_bd(ftlist->f.unit);
2743 				bdlist->fault = 1;
2744 			} else {
2745 				cmn_err(CE_WARN, "No board %d list entry found",
2746 				    ftlist->f.unit);
2747 			}
2748 		}
2749 	}
2750 
2751 	/* now unlock the fault list */
2752 	mutex_exit(&ftlist_mutex);
2753 
2754 	/* unlock the board list before leaving */
2755 	fhc_bdlist_unlock();
2756 
2757 	return (fault);
2758 }
2759 
2760 /*
2761  * Add a new memloc to the database (and keep 'em sorted by PA)
2762  */
2763 void
2764 fhc_add_memloc(int board, uint64_t pa, uint_t size)
2765 {
2766 	struct fhc_memloc *p, **pp;
2767 	uint_t ipa = pa >> FHC_MEMLOC_SHIFT;
2768 
2769 	ASSERT(fhc_bdlist_locked());
2770 	ASSERT((size & (size-1)) == 0);		/* size must be power of 2 */
2771 
2772 	/* look for a comparable memloc (as long as new PA smaller) */
2773 	for (p = fhc_base_memloc, pp = &fhc_base_memloc;
2774 	    p != NULL; pp = &p->next, p = p->next) {
2775 		/* have we passed our place in the sort? */
2776 		if (ipa < p->pa) {
2777 			break;
2778 		}
2779 	}
2780 	p = kmem_alloc(sizeof (struct fhc_memloc), KM_SLEEP);
2781 	p->next = *pp;
2782 	p->board = board;
2783 	p->pa = ipa;
2784 	p->size = size;
2785 #ifdef DEBUG_MEMDEC
2786 	cmn_err(CE_NOTE, "fhc_add_memloc: adding %d 0x%x 0x%x",
2787 	    p->board, p->pa, p->size);
2788 #endif /* DEBUG_MEMDEC */
2789 	*pp = p;
2790 }
2791 
2792 /*
2793  * Delete all memloc records for a board from the database
2794  */
2795 void
2796 fhc_del_memloc(int board)
2797 {
2798 	struct fhc_memloc *p, **pp;
2799 
2800 	ASSERT(fhc_bdlist_locked());
2801 
2802 	/* delete all entries that match board */
2803 	pp = &fhc_base_memloc;
2804 	while ((p = *pp) != NULL) {
2805 		if (p->board == board) {
2806 #ifdef DEBUG_MEMDEC
2807 			cmn_err(CE_NOTE, "fhc_del_memloc: removing %d "
2808 			    "0x%x 0x%x", board, p->pa, p->size);
2809 #endif /* DEBUG_MEMDEC */
2810 			*pp = p->next;
2811 			kmem_free(p, sizeof (struct fhc_memloc));
2812 		} else {
2813 			pp = &(p->next);
2814 		}
2815 	}
2816 }
2817 
2818 /*
2819  * Find a physical address range of sufficient size and return a starting PA
2820  */
2821 uint64_t
2822 fhc_find_memloc_gap(uint_t size)
2823 {
2824 	struct fhc_memloc *p;
2825 	uint_t base_pa = 0;
2826 	uint_t mask = ~(size-1);
2827 
2828 	ASSERT(fhc_bdlist_locked());
2829 	ASSERT((size & (size-1)) == 0);		/* size must be power of 2 */
2830 
2831 	/*
2832 	 * walk the list of known memlocs and measure the 'gaps'.
2833 	 * we will need a hole that can align the 'size' requested.
2834 	 * (e.g. a 256mb bank needs to be on a 256mb boundary).
2835 	 */
2836 	for (p = fhc_base_memloc; p != NULL; p = p->next) {
2837 		if (base_pa != (base_pa & mask))
2838 			base_pa = (base_pa + size) & mask;
2839 		if (base_pa + size <= p->pa)
2840 			break;
2841 		base_pa = p->pa + p->size;
2842 	}
2843 
2844 	/*
2845 	 * At this point, we assume that base_pa is good enough.
2846 	 */
2847 	ASSERT((base_pa + size) <= FHC_MEMLOC_MAX);
2848 	if (base_pa != (base_pa & mask))
2849 		base_pa = (base_pa + size) & mask;	/* align */
2850 	return ((uint64_t)base_pa << FHC_MEMLOC_SHIFT);
2851 }
2852 
2853 /*
2854  * This simple function to write the MCRs can only be used when
2855  * the contents of memory are not valid as there is a bug in the AC
2856  * ASIC concerning refresh.
2857  */
2858 static void
2859 fhc_write_mcrs(
2860 	uint64_t cpa,
2861 	uint64_t dpa0,
2862 	uint64_t dpa1,
2863 	uint64_t c,
2864 	uint64_t d0,
2865 	uint64_t d1)
2866 {
2867 	stdphysio(cpa, c & ~AC_CSR_REFEN);
2868 	(void) lddphysio(cpa);
2869 	if (GRP_SIZE_IS_SET(d0)) {
2870 		stdphysio(dpa0, d0);
2871 		(void) lddphysio(dpa0);
2872 	}
2873 	if (GRP_SIZE_IS_SET(d1)) {
2874 		stdphysio(dpa1, d1);
2875 		(void) lddphysio(dpa1);
2876 	}
2877 	stdphysio(cpa, c);
2878 	(void) lddphysio(cpa);
2879 }
2880 
2881 /* compute the appropriate RASIZE for bank size */
2882 static uint_t
2883 fhc_cvt_size(uint64_t bsz)
2884 {
2885 	uint_t csz;
2886 
2887 	csz = 0;
2888 	bsz /= 64;
2889 	while (bsz) {
2890 		csz++;
2891 		bsz /= 2;
2892 	}
2893 	csz /= 2;
2894 
2895 	return (csz);
2896 }
2897 
2898 void
2899 fhc_program_memory(int board, uint64_t pa)
2900 {
2901 	uint64_t cpa, dpa0, dpa1;
2902 	uint64_t c, d0, d1;
2903 	uint64_t b0_pa, b1_pa;
2904 	uint64_t memdec0, memdec1;
2905 	uint_t b0_size, b1_size;
2906 
2907 	/* XXX gross hack to get to board via board number */
2908 	cpa = 0x1c0f9000060ull + (board * 0x400000000ull);
2909 #ifdef DEBUG_MEMDEC
2910 	prom_printf("cpa = 0x%llx\n", cpa);
2911 #endif /* DEBUG_MEMDEC */
2912 	dpa0 = cpa + 0x10;
2913 	dpa1 = cpa + 0x20;
2914 
2915 /* assume size is set by connect */
2916 	memdec0 = lddphysio(dpa0);
2917 #ifdef DEBUG_MEMDEC
2918 	prom_printf("memdec0 = 0x%llx\n", memdec0);
2919 #endif /* DEBUG_MEMDEC */
2920 	memdec1 = lddphysio(dpa1);
2921 #ifdef DEBUG_MEMDEC
2922 	prom_printf("memdec1 = 0x%llx\n", memdec1);
2923 #endif /* DEBUG_MEMDEC */
2924 	if (GRP_SIZE_IS_SET(memdec0)) {
2925 		b0_size = GRP_SPANMB(memdec0);
2926 	} else {
2927 		b0_size = 0;
2928 	}
2929 	if (GRP_SIZE_IS_SET(memdec1)) {
2930 		b1_size = GRP_SPANMB(memdec1);
2931 	} else {
2932 		b1_size = 0;
2933 	}
2934 
2935 	c = lddphysio(cpa);
2936 #ifdef DEBUG_MEMDEC
2937 	prom_printf("c = 0x%llx\n", c);
2938 #endif /* DEBUG_MEMDEC */
2939 	if (b0_size) {
2940 		b0_pa = pa;
2941 		d0 = SETUP_DECODE(b0_pa, b0_size, 0, 0);
2942 		d0 |= AC_MEM_VALID;
2943 
2944 		c &= ~0x7;
2945 		c |= 0;
2946 		c &= ~(0x7 << 8);
2947 		c |= (fhc_cvt_size(b0_size) << 8);  /* match row size */
2948 	} else {
2949 		d0 = memdec0;
2950 	}
2951 	if (b1_size) {
2952 		b1_pa = pa + 0x80000000ull; /* XXX 2gb */
2953 		d1 = SETUP_DECODE(b1_pa, b1_size, 0, 0);
2954 		d1 |= AC_MEM_VALID;
2955 
2956 		c &= ~(0x7 << 3);
2957 		c |= (0 << 3);
2958 		c &= ~(0x7 << 11);
2959 		c |= (fhc_cvt_size(b1_size) << 11); /* match row size */
2960 	} else {
2961 		d1 = memdec1;
2962 	}
2963 #ifdef DEBUG_MEMDEC
2964 	prom_printf("c 0x%llx, d0 0x%llx, d1 0x%llx\n", c, d0, d1);
2965 #endif /* DEBUG_MEMDEC */
2966 	fhc_write_mcrs(cpa, dpa0, dpa1, c, d0, d1);
2967 }
2968 
2969 /*
2970  * Creates a variable sized virtual kstat with a snapshot routine in order
2971  * to pass the linked list fault list up to userland. Also creates a
2972  * virtual kstat to pass up the string table for faults.
2973  */
2974 void
2975 create_ft_kstats(int instance)
2976 {
2977 	struct kstat *ksp;
2978 
2979 	ksp = kstat_create("unix", instance, FT_LIST_KSTAT_NAME, "misc",
2980 	    KSTAT_TYPE_RAW, 1, KSTAT_FLAG_VIRTUAL|KSTAT_FLAG_VAR_SIZE);
2981 
2982 	if (ksp != NULL) {
2983 		ksp->ks_data = NULL;
2984 		ksp->ks_update = ft_ks_update;
2985 		ksp->ks_snapshot = ft_ks_snapshot;
2986 		ksp->ks_data_size = 1;
2987 		ksp->ks_lock = &ftlist_mutex;
2988 		kstat_install(ksp);
2989 	}
2990 }
2991 
2992 /*
2993  * This routine creates a snapshot of all the fault list data. It is
2994  * called by the kstat framework when a kstat read is done.
2995  */
2996 static int
2997 ft_ks_snapshot(struct kstat *ksp, void *buf, int rw)
2998 {
2999 	struct ft_link_list *ftlist;
3000 
3001 	if (rw == KSTAT_WRITE) {
3002 		return (EACCES);
3003 	}
3004 
3005 	ksp->ks_snaptime = gethrtime();
3006 
3007 	for (ftlist = ft_list; ftlist != NULL; ftlist = ftlist->next) {
3008 		bcopy(&ftlist->f, buf, sizeof (struct ft_list));
3009 		buf = ((struct ft_list *)buf) + 1;
3010 	}
3011 	return (0);
3012 }
3013 
3014 /*
3015  * Setup the kstat data size for the kstat framework. This is used in
3016  * conjunction with the ks_snapshot routine. This routine sets the size,
3017  * the kstat framework allocates the memory, and ks_shapshot does the
3018  * data transfer.
3019  */
3020 static int
3021 ft_ks_update(struct kstat *ksp, int rw)
3022 {
3023 	if (rw == KSTAT_WRITE) {
3024 		return (EACCES);
3025 	} else {
3026 		if (ft_nfaults) {
3027 			ksp->ks_data_size = ft_nfaults *
3028 			    sizeof (struct ft_list);
3029 		} else {
3030 			ksp->ks_data_size = 1;
3031 		}
3032 	}
3033 
3034 	return (0);
3035 }
3036 
3037 /*
3038  * Power off any cpus on the board.
3039  */
3040 int
3041 fhc_board_poweroffcpus(int board, char *errbuf, int cpu_flags)
3042 {
3043 	cpu_t *cpa, *cpb;
3044 	enum board_type type;
3045 	int error = 0;
3046 
3047 	ASSERT(MUTEX_HELD(&cpu_lock));
3048 
3049 	/*
3050 	 * what type of board are we dealing with?
3051 	 */
3052 	type = fhc_bd_type(board);
3053 
3054 	switch (type) {
3055 	case CPU_BOARD:
3056 
3057 		/*
3058 		 * the shutdown sequence will be:
3059 		 *
3060 		 * idle both cpus then shut them off.
3061 		 * it looks like the hardware gets corrupted if one
3062 		 * cpu is busy while the other is shutting down...
3063 		 */
3064 
3065 		if ((cpa = cpu_get(FHC_BOARD2CPU_A(board))) != NULL &&
3066 		    cpu_is_active(cpa)) {
3067 			if (!cpu_intr_on(cpa)) {
3068 				cpu_intr_enable(cpa);
3069 			}
3070 			if ((error = cpu_offline(cpa, cpu_flags)) != 0) {
3071 				cmn_err(CE_WARN,
3072 				    "Processor %d failed to offline.",
3073 				    cpa->cpu_id);
3074 				if (errbuf != NULL) {
3075 					(void) snprintf(errbuf, SYSC_OUTPUT_LEN,
3076 					    "processor %d failed to offline",
3077 					    cpa->cpu_id);
3078 				}
3079 			}
3080 		}
3081 
3082 		if (error == 0 &&
3083 		    (cpb = cpu_get(FHC_BOARD2CPU_B(board))) != NULL &&
3084 		    cpu_is_active(cpb)) {
3085 			if (!cpu_intr_on(cpb)) {
3086 				cpu_intr_enable(cpb);
3087 			}
3088 			if ((error = cpu_offline(cpb, cpu_flags)) != 0) {
3089 				cmn_err(CE_WARN,
3090 				    "Processor %d failed to offline.",
3091 				    cpb->cpu_id);
3092 
3093 				if (errbuf != NULL) {
3094 					(void) snprintf(errbuf, SYSC_OUTPUT_LEN,
3095 					    "processor %d failed to offline",
3096 					    cpb->cpu_id);
3097 				}
3098 			}
3099 		}
3100 
3101 		if (error == 0 && cpa != NULL && cpu_is_offline(cpa)) {
3102 			if ((error = cpu_poweroff(cpa)) != 0) {
3103 				cmn_err(CE_WARN,
3104 				    "Processor %d failed to power off.",
3105 				    cpa->cpu_id);
3106 				if (errbuf != NULL) {
3107 					(void) snprintf(errbuf, SYSC_OUTPUT_LEN,
3108 					    "processor %d failed to power off",
3109 					    cpa->cpu_id);
3110 				}
3111 			} else {
3112 				cmn_err(CE_NOTE, "Processor %d powered off.",
3113 				    cpa->cpu_id);
3114 			}
3115 		}
3116 
3117 		if (error == 0 && cpb != NULL && cpu_is_offline(cpb)) {
3118 			if ((error = cpu_poweroff(cpb)) != 0) {
3119 				cmn_err(CE_WARN,
3120 				    "Processor %d failed to power off.",
3121 				    cpb->cpu_id);
3122 
3123 				if (errbuf != NULL) {
3124 					(void) snprintf(errbuf, SYSC_OUTPUT_LEN,
3125 					    "processor %d failed to power off",
3126 					    cpb->cpu_id);
3127 				}
3128 			} else {
3129 				cmn_err(CE_NOTE, "Processor %d powered off.",
3130 				    cpb->cpu_id);
3131 			}
3132 		}
3133 
3134 		/*
3135 		 * If all the shutdowns completed, ONLY THEN, clear the
3136 		 * incorrectly valid dtags...
3137 		 *
3138 		 * IMPORTANT: it is an error to read or write dtags while
3139 		 * they are 'active'
3140 		 */
3141 		if (error == 0 && (cpa != NULL || cpb != NULL)) {
3142 			u_longlong_t base = 0;
3143 			int i;
3144 #ifdef DEBUG
3145 			int nonz0 = 0;
3146 			int nonz1 = 0;
3147 #endif
3148 			if (cpa != NULL)
3149 				base = FHC_DTAG_BASE(cpa->cpu_id);
3150 			if (cpb != NULL)
3151 				base = FHC_DTAG_BASE(cpb->cpu_id);
3152 			ASSERT(base != 0);
3153 
3154 			for (i = 0; i < FHC_DTAG_SIZE; i += FHC_DTAG_SKIP) {
3155 				u_longlong_t value = lddphysio(base+i);
3156 #ifdef lint
3157 				value = value;
3158 #endif
3159 #ifdef DEBUG
3160 				if (cpa != NULL && (value & FHC_DTAG_LOW))
3161 					nonz0++;
3162 				if (cpb != NULL && (value & FHC_DTAG_HIGH))
3163 					nonz1++;
3164 #endif
3165 				/* always clear the dtags */
3166 				stdphysio(base + i, 0ull);
3167 			}
3168 #ifdef DEBUG
3169 			if (nonz0 || nonz1) {
3170 				cmn_err(CE_NOTE, "!dtag results: "
3171 				    "cpua valid %d, cpub valid %d",
3172 				    nonz0, nonz1);
3173 			}
3174 #endif
3175 		}
3176 
3177 		break;
3178 
3179 	default:
3180 		break;
3181 	}
3182 
3183 	return (error);
3184 }
3185 
3186 /*
3187  * platform code for shutting down cpus.
3188  */
3189 int
3190 fhc_cpu_poweroff(struct cpu *cp)
3191 {
3192 	int board;
3193 	fhc_bd_t *bd_list;
3194 	int delays;
3195 	extern void idle_stop_xcall(void);
3196 	static void fhc_cpu_shutdown_self(void);
3197 
3198 	ASSERT(MUTEX_HELD(&cpu_lock));
3199 	ASSERT((cp->cpu_flags & (CPU_EXISTS | CPU_OFFLINE | CPU_QUIESCED)) ==
3200 	    (CPU_EXISTS | CPU_OFFLINE | CPU_QUIESCED));
3201 
3202 	/*
3203 	 * Lock the board so that we can safely access the
3204 	 * registers. This cannot be done inside the pause_cpus().
3205 	 */
3206 	board = FHC_CPU2BOARD(cp->cpu_id);
3207 	bd_list = fhc_bdlist_lock(board);
3208 	ASSERT(fhc_bd_valid(board) && (bd_list->sc.type == CPU_BOARD));
3209 
3210 	/*
3211 	 * Capture all CPUs (except for detaching proc) to prevent
3212 	 * crosscalls to the detaching proc until it has cleared its
3213 	 * bit in cpu_ready_set.
3214 	 *
3215 	 * The CPU's remain paused and the prom_mutex is known to be free.
3216 	 * This prevents the x-trap victim from blocking when doing prom
3217 	 * IEEE-1275 calls at a high PIL level.
3218 	 */
3219 	promsafe_pause_cpus();
3220 
3221 	/*
3222 	 * Quiesce interrupts on the target CPU. We do this by setting
3223 	 * the CPU 'not ready'- (i.e. removing the CPU from cpu_ready_set) to
3224 	 * prevent it from receiving cross calls and cross traps.
3225 	 * This prevents the processor from receiving any new soft interrupts.
3226 	 */
3227 	mp_cpu_quiesce(cp);
3228 
3229 	xt_one_unchecked(cp->cpu_id, (xcfunc_t *)idle_stop_xcall,
3230 	    (uint64_t)fhc_cpu_shutdown_self, (uint64_t)NULL);
3231 
3232 	/*
3233 	 * Wait for slave cpu to shutdown.
3234 	 * Sense this by watching the hardware EPDx bit.
3235 	 */
3236 	for (delays = FHC_SHUTDOWN_WAIT_MSEC; delays != 0; delays--) {
3237 		uint_t temp;
3238 
3239 		DELAY(1000);
3240 
3241 		/* get the current cpu power status */
3242 		temp = *bd_list->softsp->ctrl;
3243 
3244 		/* has the cpu actually signalled shutdown? */
3245 		if (FHC_CPU_IS_A(cp->cpu_id)) {
3246 			if (temp & FHC_EPDA_OFF)
3247 				break;
3248 		} else {
3249 			if (temp & FHC_EPDB_OFF)
3250 				break;
3251 		}
3252 	}
3253 
3254 	start_cpus();
3255 
3256 	fhc_bdlist_unlock();
3257 
3258 	/* A timeout means we've lost control of the cpu. */
3259 	if (delays == 0)
3260 		panic("Processor %d failed during shutdown", cp->cpu_id);
3261 
3262 	return (0);
3263 }
3264 
3265 /*
3266  * shutdown_self
3267  * slave side shutdown.  clean up and execute the shutdown sequence.
3268  */
3269 static void
3270 fhc_cpu_shutdown_self(void)
3271 {
3272 	extern void flush_windows(void);
3273 	static void os_completes_shutdown(void);
3274 
3275 	flush_windows();
3276 
3277 	ASSERT(CPU->cpu_intr_actv == 0);
3278 	ASSERT(CPU->cpu_thread == CPU->cpu_idle_thread ||
3279 	    CPU->cpu_thread == CPU->cpu_startup_thread);
3280 
3281 	CPU->cpu_flags = CPU_POWEROFF | CPU_OFFLINE | CPU_QUIESCED;
3282 
3283 	(void) prom_sunfire_cpu_off();	/* inform Ultra Enterprise prom */
3284 
3285 	os_completes_shutdown();
3286 
3287 	panic("fhc_cpu_shutdown_self: cannot return");
3288 	/*NOTREACHED*/
3289 }
3290 
3291 /*
3292  * Warm start CPU.
3293  */
3294 static int
3295 fhc_cpu_start(struct cpu *cp)
3296 {
3297 	int rv;
3298 	int cpuid = cp->cpu_id;
3299 	pnode_t nodeid;
3300 	extern void restart_other_cpu(int);
3301 
3302 	ASSERT(MUTEX_HELD(&cpu_lock));
3303 
3304 	/* power on cpu */
3305 	nodeid = cpunodes[cpuid].nodeid;
3306 	ASSERT(nodeid != (pnode_t)0);
3307 	rv = prom_wakeupcpu(nodeid);
3308 	if (rv != 0) {
3309 		cmn_err(CE_WARN, "Processor %d failed to power on.", cpuid);
3310 		return (EBUSY);
3311 	}
3312 
3313 	cp->cpu_flags &= ~CPU_POWEROFF;
3314 
3315 	/*
3316 	 * NOTE: restart_other_cpu pauses cpus during the slave cpu start.
3317 	 * This helps to quiesce the bus traffic a bit which makes
3318 	 * the tick sync routine in the prom more robust.
3319 	 */
3320 	restart_other_cpu(cpuid);
3321 
3322 	return (0);
3323 }
3324 
3325 /*
3326  * Power on CPU.
3327  */
3328 int
3329 fhc_cpu_poweron(struct cpu *cp)
3330 {
3331 	fhc_bd_t *bd_list;
3332 	enum temp_state state;
3333 	int board;
3334 	int status;
3335 	int status_other;
3336 	struct cpu *cp_other;
3337 
3338 	ASSERT(MUTEX_HELD(&cpu_lock));
3339 	ASSERT(cpu_is_poweredoff(cp));
3340 
3341 	/* do not power on overtemperature cpu */
3342 	board = FHC_CPU2BOARD(cp->cpu_id);
3343 	bd_list = fhc_bdlist_lock(board);
3344 
3345 	ASSERT(bd_list != NULL);
3346 	ASSERT(bd_list->sc.type == CPU_BOARD);
3347 	ASSERT(bd_list->dev_softsp != NULL);
3348 
3349 	state = ((struct environ_soft_state *)
3350 	    bd_list->dev_softsp)->tempstat.state;
3351 
3352 	fhc_bdlist_unlock();
3353 	if ((state == TEMP_WARN) || (state == TEMP_DANGER))
3354 		return (EBUSY);
3355 
3356 	status = fhc_cpu_start(cp);
3357 
3358 	/* policy for dual cpu boards */
3359 
3360 	if ((status == 0) &&
3361 	    ((cp_other = cpu_get(FHC_OTHER_CPU_ID(cp->cpu_id))) != NULL)) {
3362 		/*
3363 		 * Do not leave board's other cpu idling in the prom.
3364 		 * Start the other cpu and set its state to P_OFFLINE.
3365 		 */
3366 		status_other = fhc_cpu_start(cp_other);
3367 		if (status_other != 0) {
3368 			panic("fhc: failed to start second CPU"
3369 			    " in pair %d & %d, error %d",
3370 			    cp->cpu_id, cp_other->cpu_id, status_other);
3371 		}
3372 	}
3373 
3374 	return (status);
3375 }
3376 
3377 /*
3378  * complete the shutdown sequence in case the firmware doesn't.
3379  *
3380  * If the firmware returns, then complete the shutdown code.
3381  * (sunfire firmware presently only updates its status.  the
3382  * OS must flush the D-tags and execute the shutdown instruction.)
3383  */
3384 static void
3385 os_completes_shutdown(void)
3386 {
3387 	pfn_t 			pfn;
3388 	tte_t			tte;
3389 	volatile uint_t		*src;
3390 	volatile uint_t		*dst;
3391 	caddr_t			copy_addr;
3392 	extern void fhc_shutdown_asm(u_longlong_t, int);
3393 	extern void fhc_shutdown_asm_end(void);
3394 
3395 	copy_addr = shutdown_va + FHC_SRAM_OS_OFFSET;
3396 
3397 	/* compute sram global address for this operation */
3398 	pfn = FHC_LOCAL_OS_PAGEBASE >> MMU_PAGESHIFT;
3399 
3400 	/* force load i and d translations */
3401 	tte.tte_inthi = TTE_VALID_INT | TTE_SZ_INT(TTE8K) |
3402 	    TTE_PFN_INTHI(pfn);
3403 	tte.tte_intlo = TTE_PFN_INTLO(pfn) |
3404 	    TTE_HWWR_INT | TTE_PRIV_INT | TTE_LCK_INT; /* un$ */
3405 	sfmmu_dtlb_ld_kva(shutdown_va, &tte);	/* load dtlb */
3406 	sfmmu_itlb_ld_kva(shutdown_va, &tte);	/* load itlb */
3407 
3408 	/*
3409 	 * copy the special shutdown function to sram
3410 	 * (this is a special integer copy that synchronizes with localspace
3411 	 * accesses.  we need special throttling to ensure copy integrity)
3412 	 */
3413 	for (src = (uint_t *)fhc_shutdown_asm, dst = (uint_t *)copy_addr;
3414 	    src < (uint_t *)fhc_shutdown_asm_end;
3415 	    src++, dst++) {
3416 		volatile uint_t dummy;
3417 
3418 		*dst = *src;
3419 		/*
3420 		 * ensure non corrupting single write operations to
3421 		 * localspace sram by interleaving reads with writes.
3422 		 */
3423 		dummy = *dst;
3424 #ifdef lint
3425 		dummy = dummy;
3426 #endif
3427 	}
3428 
3429 	/*
3430 	 * Call the shutdown sequencer.
3431 	 * NOTE: the base flush address must be unique for each MID.
3432 	 */
3433 	((void (*)(u_longlong_t, int))copy_addr)(
3434 	    FHC_BASE_NOMEM + CPU->cpu_id * FHC_MAX_ECACHE_SIZE,
3435 	    cpunodes[CPU->cpu_id].ecache_size);
3436 }
3437 
3438 enum temp_state
3439 fhc_env_temp_state(int board)
3440 {
3441 	fhc_bd_t *bdp;
3442 	struct environ_soft_state *envp;
3443 
3444 	ASSERT(fhc_bd_valid(board));
3445 
3446 	bdp = fhc_bd(board);
3447 
3448 	/*
3449 	 * Due to asynchronous attach of environ, environ may
3450 	 * not be attached by the time we start calling this routine
3451 	 * to check the temperature state.  Environ not attaching is
3452 	 * pathological so this will only cover the time between
3453 	 * board connect and environ attach.
3454 	 */
3455 	if (!bdp->dev_softsp) {
3456 		return (TEMP_OK);
3457 	}
3458 	envp = (struct environ_soft_state *)bdp->dev_softsp;
3459 
3460 	return (envp->tempstat.state);
3461 }
3462 
3463 static void
3464 fhc_tod_fault(enum tod_fault_type tod_bad)
3465 {
3466 	int board_num = 0;
3467 	enum ft_class class = FT_SYSTEM;
3468 	uint64_t addr;
3469 
3470 	addr = (va_to_pa((void *)v_eeprom_addr)) >> BOARD_PHYADDR_SHIFT;
3471 
3472 	if ((addr & CLOCKBOARD_PHYADDR_BITS) != CLOCKBOARD_PHYADDR_BITS) {
3473 		/* if tod is not on clock board, */
3474 		/* it'd be on one of io boards */
3475 		board_num = (addr >> IO_BOARD_NUMBER_SHIFT)
3476 		    & IO_BOARD_NUMBER_MASK;
3477 		class = FT_BOARD;
3478 	}
3479 
3480 	switch (tod_bad) {
3481 	case TOD_NOFAULT:
3482 		clear_fault(board_num, FT_TODFAULT, class);
3483 		break;
3484 	case TOD_REVERSED:
3485 	case TOD_STALLED:
3486 	case TOD_JUMPED:
3487 	case TOD_RATECHANGED:
3488 		reg_fault(board_num, FT_TODFAULT, class);
3489 		break;
3490 	default:
3491 		break;
3492 	}
3493 }
3494