xref: /titanic_52/usr/src/uts/sun4u/starcat/io/drmach.c (revision 2f172c55ef76964744bc62b4500ece87f3089b4d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/note.h>
27 #include <sys/debug.h>
28 #include <sys/types.h>
29 #include <sys/varargs.h>
30 #include <sys/errno.h>
31 #include <sys/cred.h>
32 #include <sys/dditypes.h>
33 #include <sys/devops.h>
34 #include <sys/modctl.h>
35 #include <sys/poll.h>
36 #include <sys/conf.h>
37 #include <sys/ddi.h>
38 #include <sys/sunddi.h>
39 #include <sys/sunndi.h>
40 #include <sys/ndi_impldefs.h>
41 #include <sys/stat.h>
42 #include <sys/kmem.h>
43 #include <sys/vmem.h>
44 #include <sys/disp.h>
45 #include <sys/processor.h>
46 #include <sys/cheetahregs.h>
47 #include <sys/cpuvar.h>
48 #include <sys/mem_config.h>
49 #include <sys/ddi_impldefs.h>
50 #include <sys/systm.h>
51 #include <sys/machsystm.h>
52 #include <sys/autoconf.h>
53 #include <sys/cmn_err.h>
54 #include <sys/sysmacros.h>
55 #include <sys/x_call.h>
56 #include <sys/promif.h>
57 #include <sys/prom_plat.h>
58 #include <sys/membar.h>
59 #include <vm/seg_kmem.h>
60 #include <sys/mem_cage.h>
61 #include <sys/stack.h>
62 #include <sys/archsystm.h>
63 #include <vm/hat_sfmmu.h>
64 #include <sys/pte.h>
65 #include <sys/mmu.h>
66 #include <sys/cpu_module.h>
67 #include <sys/obpdefs.h>
68 #include <sys/mboxsc.h>
69 #include <sys/plat_ecc_dimm.h>
70 
71 #include <sys/hotplug/hpctrl.h>		/* XXX should be included by schpc.h */
72 #include <sys/schpc.h>
73 #include <sys/pci.h>
74 
75 #include <sys/starcat.h>
76 #include <sys/cpu_sgnblk_defs.h>
77 #include <sys/drmach.h>
78 #include <sys/dr_util.h>
79 #include <sys/dr_mbx.h>
80 #include <sys/sc_gptwocfg.h>
81 #include <sys/iosramreg.h>
82 #include <sys/iosramio.h>
83 #include <sys/iosramvar.h>
84 #include <sys/axq.h>
85 #include <sys/post/scat_dcd.h>
86 #include <sys/kobj.h>
87 #include <sys/taskq.h>
88 #include <sys/cmp.h>
89 #include <sys/sbd_ioctl.h>
90 
91 #include <sys/sysevent.h>
92 #include <sys/sysevent/dr.h>
93 #include <sys/sysevent/eventdefs.h>
94 
95 #include <sys/pci/pcisch.h>
96 #include <sys/pci/pci_regs.h>
97 
98 #include <sys/ontrap.h>
99 
100 /* defined in ../ml/drmach.il.cpp */
101 extern void		bcopy32_il(uint64_t, uint64_t);
102 extern void		flush_ecache_il(int64_t physaddr, int size, int linesz);
103 extern void		flush_dcache_il(void);
104 extern void		flush_icache_il(void);
105 extern void		flush_pcache_il(void);
106 
107 /* defined in ../ml/drmach_asm.s */
108 extern uint64_t		lddmcdecode(uint64_t physaddr);
109 extern uint64_t		lddsafconfig(void);
110 
111 /* XXX here until provided by sys/dman.h */
112 extern int man_dr_attach(dev_info_t *);
113 extern int man_dr_detach(dev_info_t *);
114 
115 #define	DRMACH_BNUM2EXP(bnum)		((bnum) >> 1)
116 #define	DRMACH_BNUM2SLOT(bnum)		((bnum) & 1)
117 #define	DRMACH_EXPSLOT2BNUM(exp, slot)	(((exp) << 1) + (slot))
118 
119 #define	DRMACH_SLICE_MASK		0x1Full
120 #define	DRMACH_SLICE_TO_PA(s)		(((s) & DRMACH_SLICE_MASK) << 37)
121 #define	DRMACH_PA_TO_SLICE(a)		(((a) >> 37) & DRMACH_SLICE_MASK)
122 
123 /*
124  * DRMACH_MEM_SLICE_SIZE and DRMACH_MEM_USABLE_SLICE_SIZE define the
125  * available address space and the usable address space for every slice.
126  * There must be a distinction between the available and usable do to a
127  * restriction imposed by CDC memory size.
128  */
129 
130 #define	DRMACH_MEM_SLICE_SIZE		(1ull << 37)	/* 128GB */
131 #define	DRMACH_MEM_USABLE_SLICE_SIZE	(1ull << 36)	/* 64GB */
132 
133 #define	DRMACH_MC_NBANKS		4
134 
135 #define	DRMACH_MC_ADDR(mp, bank)	((mp)->madr_pa + 16 + 8 * (bank))
136 #define	DRMACH_MC_ASI_ADDR(mp, bank)	(DRMACH_MC_ADDR(mp, bank) & 0xFF)
137 
138 #define	DRMACH_EMU_ACT_STATUS_OFFSET	0x50
139 #define	DRMACH_EMU_ACT_STATUS_ADDR(mp)	\
140 	((mp)->madr_pa + DRMACH_EMU_ACT_STATUS_OFFSET)
141 
142 /*
143  * The Cheetah's Safari Configuration Register and the Schizo's
144  * Safari Control/Status Register place the LPA base and bound fields in
145  * same bit locations with in their register word. This source code takes
146  * advantage of this by defining only one set of LPA encoding/decoding macros
147  * which are shared by various Cheetah and Schizo drmach routines.
148  */
149 #define	DRMACH_LPA_BASE_MASK		(0x3Full	<< 3)
150 #define	DRMACH_LPA_BND_MASK		(0x3Full	<< 9)
151 
152 #define	DRMACH_LPA_BASE_TO_PA(scr)	(((scr) & DRMACH_LPA_BASE_MASK) << 34)
153 #define	DRMACH_LPA_BND_TO_PA(scr)	(((scr) & DRMACH_LPA_BND_MASK) << 28)
154 #define	DRMACH_PA_TO_LPA_BASE(pa)	(((pa) >> 34) & DRMACH_LPA_BASE_MASK)
155 #define	DRMACH_PA_TO_LPA_BND(pa)	(((pa) >> 28) & DRMACH_LPA_BND_MASK)
156 
157 #define	DRMACH_L1_SET_LPA(b)		\
158 	(((b)->flags & DRMACH_NULL_PROC_LPA) == 0)
159 
160 #define	DRMACH_CPU_SRAM_ADDR    	0x7fff0900000ull
161 #define	DRMACH_CPU_SRAM_SIZE    	0x20000ull
162 
163 /*
164  * Name properties for frequently accessed device nodes.
165  */
166 #define	DRMACH_CPU_NAMEPROP		"cpu"
167 #define	DRMACH_CMP_NAMEPROP		"cmp"
168 #define	DRMACH_AXQ_NAMEPROP		"address-extender-queue"
169 #define	DRMACH_PCI_NAMEPROP		"pci"
170 
171 /*
172  * Maximum value of processor Safari Timeout Log (TOL) field of
173  * Safari Config reg (7 secs).
174  */
175 #define	DRMACH_SAF_TOL_MAX		7 * 1000000
176 
177 /*
178  * drmach_board_t flag definitions
179  */
180 #define	DRMACH_NULL_PROC_LPA		0x1
181 
182 typedef struct {
183 	uint32_t	reg_addr_hi;
184 	uint32_t	reg_addr_lo;
185 	uint32_t	reg_size_hi;
186 	uint32_t	reg_size_lo;
187 } drmach_reg_t;
188 
189 typedef struct {
190 	struct drmach_node	*node;
191 	void			*data;
192 } drmach_node_walk_args_t;
193 
194 typedef struct drmach_node {
195 	void		*here;
196 
197 	pnode_t		 (*get_dnode)(struct drmach_node *node);
198 	int		 (*walk)(struct drmach_node *node, void *data,
199 				int (*cb)(drmach_node_walk_args_t *args));
200 	dev_info_t	*(*n_getdip)(struct drmach_node *node);
201 	int		 (*n_getproplen)(struct drmach_node *node, char *name,
202 				int *len);
203 	int		 (*n_getprop)(struct drmach_node *node, char *name,
204 				void *buf, int len);
205 	int		 (*get_parent)(struct drmach_node *node,
206 				struct drmach_node *pnode);
207 } drmach_node_t;
208 
209 typedef struct {
210 	int		 min_index;
211 	int		 max_index;
212 	int		 arr_sz;
213 	drmachid_t	*arr;
214 } drmach_array_t;
215 
216 typedef struct {
217 	void		*isa;
218 
219 	void		 (*dispose)(drmachid_t);
220 	sbd_error_t	*(*release)(drmachid_t);
221 	sbd_error_t	*(*status)(drmachid_t, drmach_status_t *);
222 
223 	char		 name[MAXNAMELEN];
224 } drmach_common_t;
225 
226 struct drmach_board;
227 typedef struct drmach_board drmach_board_t;
228 
229 typedef struct {
230 	drmach_common_t	 cm;
231 	const char	*type;
232 	drmach_board_t	*bp;
233 	drmach_node_t	*node;
234 	int		 portid;
235 	int		 unum;
236 	int		 busy;
237 	int		 powered;
238 } drmach_device_t;
239 
240 typedef struct drmach_cpu {
241 	drmach_device_t	 dev;
242 	uint64_t	 scr_pa;
243 	processorid_t	 cpuid;
244 	int		 coreid;
245 } drmach_cpu_t;
246 
247 typedef struct drmach_mem {
248 	drmach_device_t	 dev;
249 	struct drmach_mem *next;
250 	uint64_t	 nbytes;
251 	uint64_t	 madr_pa;
252 } drmach_mem_t;
253 
254 typedef struct drmach_io {
255 	drmach_device_t	 dev;
256 	uint64_t	 scsr_pa; /* PA of Schizo Control/Status Register */
257 } drmach_io_t;
258 
259 struct drmach_board {
260 	drmach_common_t	 cm;
261 	int		 bnum;
262 	int		 assigned;
263 	int		 powered;
264 	int		 connected;
265 	int		 empty;
266 	int		 cond;
267 	uint_t		 cpu_impl;
268 	uint_t		 flags;
269 	drmach_node_t	*tree;
270 	drmach_array_t	*devices;
271 	drmach_mem_t	*mem;
272 	uint64_t	 stardrb_offset;
273 	char		 type[BD_TYPELEN];
274 };
275 
276 typedef struct {
277 	int		 flags;
278 	drmach_device_t	*dp;
279 	sbd_error_t	*err;
280 	dev_info_t	*fdip;
281 } drmach_config_args_t;
282 
283 typedef struct {
284 	drmach_board_t	*obj;
285 	int		 ndevs;
286 	void		*a;
287 	sbd_error_t	*(*found)(void *a, const char *, int, drmachid_t);
288 	sbd_error_t	*err;
289 } drmach_board_cb_data_t;
290 
291 typedef struct drmach_casmslot {
292 	int	valid;
293 	int	slice;
294 } drmach_casmslot_t;
295 
296 typedef enum {
297 	DRMACH_CR_OK,
298 	DRMACH_CR_MC_IDLE_ERR,
299 	DRMACH_CR_IOPAUSE_ERR,
300 	DRMACH_CR_ONTRAP_ERR
301 } drmach_cr_err_t;
302 
303 typedef struct {
304 	void		*isa;
305 	caddr_t		 data;
306 	drmach_mem_t	*s_mp;
307 	drmach_mem_t	*t_mp;
308 	struct memlist	*c_ml;
309 	uint64_t	 s_copybasepa;
310 	uint64_t	 t_copybasepa;
311 	drmach_cr_err_t	 ecode;
312 	void		*earg;
313 } drmach_copy_rename_t;
314 
315 /*
316  * The following global is read as a boolean value, non-zero is true.
317  * If zero, DR copy-rename and cpu poweron will not set the processor
318  * LPA settings (CBASE, CBND of Safari config register) to correspond
319  * to the current memory slice map. LPAs of processors present at boot
320  * will remain as programmed by POST. LPAs of processors on boards added
321  * by DR will remain NULL, as programmed by POST. This can be used to
322  * to override the per-board L1SSFLG_THIS_L1_NULL_PROC_LPA flag set by
323  * POST in the LDCD (and copied to the GDCD by SMS).
324  *
325  * drmach_reprogram_lpa and L1SSFLG_THIS_L1_NULL_PROC_LPA do not apply
326  * to Schizo device LPAs. These are always set by DR.
327  */
328 static int		 drmach_reprogram_lpa = 1;
329 
330 /*
331  * There is a known HW bug where a Jaguar CPU in Safari port 0 (SBX/P0)
332  * can fail to receive an XIR. To workaround this issue until a hardware
333  * fix is implemented, we will exclude the selection of these CPUs.
334  * Setting this to 0 will allow their selection again.
335  */
336 static int		 drmach_iocage_exclude_jaguar_port_zero = 1;
337 
338 static int		 drmach_initialized;
339 static drmach_array_t	*drmach_boards;
340 
341 static int		 drmach_cpu_delay = 1000;
342 static int		 drmach_cpu_ntries = 50000;
343 
344 static uint32_t		 drmach_slice_table[AXQ_MAX_EXP];
345 static kmutex_t		 drmach_slice_table_lock;
346 
347 tte_t			 drmach_cpu_sram_tte[NCPU];
348 caddr_t			 drmach_cpu_sram_va;
349 
350 /*
351  * Setting to non-zero will enable delay before all disconnect ops.
352  */
353 static int		 drmach_unclaim_delay_all;
354 /*
355  * Default delay is slightly greater than the max processor Safari timeout.
356  * This delay is intended to ensure the outstanding Safari activity has
357  * retired on this board prior to a board disconnect.
358  */
359 static clock_t		 drmach_unclaim_usec_delay = DRMACH_SAF_TOL_MAX + 10;
360 
361 /*
362  * By default, DR of non-Panther procs is not allowed into a Panther
363  * domain with large page sizes enabled.  Setting this to 0 will remove
364  * the restriction.
365  */
366 static int		 drmach_large_page_restriction = 1;
367 
368 /*
369  * Used to pass updated LPA values to procs.
370  * Protocol is to clear the array before use.
371  */
372 volatile uchar_t	*drmach_xt_mb;
373 volatile uint64_t	 drmach_xt_ready;
374 static kmutex_t		 drmach_xt_mb_lock;
375 static int		 drmach_xt_mb_size;
376 
377 uint64_t		 drmach_bus_sync_list[18 * 4 * 4 + 1];
378 static kmutex_t		 drmach_bus_sync_lock;
379 
380 static sbd_error_t	*drmach_device_new(drmach_node_t *,
381 				drmach_board_t *, int, drmachid_t *);
382 static sbd_error_t	*drmach_cpu_new(drmach_device_t *, drmachid_t *);
383 static sbd_error_t	*drmach_mem_new(drmach_device_t *, drmachid_t *);
384 static sbd_error_t	*drmach_pci_new(drmach_device_t *, drmachid_t *);
385 static sbd_error_t	*drmach_io_new(drmach_device_t *, drmachid_t *);
386 
387 static dev_info_t	*drmach_node_ddi_get_dip(drmach_node_t *np);
388 static int		 drmach_node_ddi_get_prop(drmach_node_t *np,
389 				char *name, void *buf, int len);
390 static int		 drmach_node_ddi_get_proplen(drmach_node_t *np,
391 				char *name, int *len);
392 
393 static dev_info_t	*drmach_node_obp_get_dip(drmach_node_t *np);
394 static int		 drmach_node_obp_get_prop(drmach_node_t *np,
395 				char *name, void *buf, int len);
396 static int		 drmach_node_obp_get_proplen(drmach_node_t *np,
397 				char *name, int *len);
398 
399 static sbd_error_t	*drmach_mbox_trans(uint8_t msgtype, int bnum,
400 				caddr_t obufp, int olen,
401 				caddr_t ibufp, int ilen);
402 
403 sbd_error_t		*drmach_io_post_attach(drmachid_t id);
404 sbd_error_t		*drmach_io_post_release(drmachid_t id);
405 
406 static sbd_error_t	*drmach_iocage_setup(dr_testboard_req_t *,
407 				drmach_device_t **dpp, cpu_flag_t *oflags);
408 static int		drmach_iocage_cpu_return(drmach_device_t *dp,
409 				cpu_flag_t oflags);
410 static sbd_error_t	*drmach_iocage_mem_return(dr_testboard_reply_t *tbr);
411 void			drmach_iocage_mem_scrub(uint64_t nbytes);
412 
413 static sbd_error_t 	*drmach_i_status(drmachid_t id, drmach_status_t *stat);
414 
415 static void		drmach_slot1_lpa_set(drmach_board_t *bp);
416 
417 static void		drmach_cpu_read(uint64_t arg1, uint64_t arg2);
418 static int		drmach_cpu_read_scr(drmach_cpu_t *cp, uint64_t *scr);
419 
420 static void		 drmach_bus_sync_list_update(void);
421 static void		 drmach_slice_table_update(drmach_board_t *, int);
422 static int		 drmach_portid2bnum(int);
423 
424 static void		drmach_msg_memslice_init(dr_memslice_t slice_arr[]);
425 static void		drmach_msg_memregs_init(dr_memregs_t regs_arr[]);
426 
427 static int		drmach_panther_boards(void);
428 
429 static int		drmach_name2type_idx(char *);
430 
431 #ifdef DEBUG
432 
433 #define	DRMACH_PR		if (drmach_debug) printf
434 #define	DRMACH_MEMLIST_DUMP	if (drmach_debug) MEMLIST_DUMP
435 int drmach_debug = 0;		 /* set to non-zero to enable debug messages */
436 #else
437 
438 #define	DRMACH_PR		_NOTE(CONSTANTCONDITION) if (0) printf
439 #define	DRMACH_MEMLIST_DUMP	_NOTE(CONSTANTCONDITION) if (0) MEMLIST_DUMP
440 #endif /* DEBUG */
441 
442 #define	DRMACH_OBJ(id)		((drmach_common_t *)id)
443 
444 #define	DRMACH_IS_BOARD_ID(id)	\
445 	((id != 0) &&		\
446 	(DRMACH_OBJ(id)->isa == (void *)drmach_board_new))
447 
448 #define	DRMACH_IS_CPU_ID(id)	\
449 	((id != 0) &&		\
450 	(DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new))
451 
452 #define	DRMACH_IS_MEM_ID(id)	\
453 	((id != 0) &&		\
454 	(DRMACH_OBJ(id)->isa == (void *)drmach_mem_new))
455 
456 #define	DRMACH_IS_IO_ID(id)	\
457 	((id != 0) &&		\
458 	(DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
459 
460 #define	DRMACH_IS_DEVICE_ID(id)					\
461 	((id != 0) &&						\
462 	(DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new ||	\
463 	    DRMACH_OBJ(id)->isa == (void *)drmach_mem_new ||	\
464 	    DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
465 
466 #define	DRMACH_IS_ID(id)					\
467 	((id != 0) &&						\
468 	(DRMACH_OBJ(id)->isa == (void *)drmach_board_new ||	\
469 	    DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new ||	\
470 	    DRMACH_OBJ(id)->isa == (void *)drmach_mem_new ||	\
471 	    DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
472 
473 #define	DRMACH_INTERNAL_ERROR() \
474 	drerr_new(1, ESTC_INTERNAL, drmach_ie_fmt, __LINE__)
475 static char		*drmach_ie_fmt = "drmach.c %d";
476 
477 static struct {
478 	const char	 *name;
479 	const char	 *type;
480 	sbd_error_t	 *(*new)(drmach_device_t *, drmachid_t *);
481 } drmach_name2type[] = {
482 	{"cmp",			    DRMACH_DEVTYPE_CMP,    NULL },
483 	{"cpu",			    DRMACH_DEVTYPE_CPU,    drmach_cpu_new },
484 	{"SUNW,UltraSPARC-III",	    DRMACH_DEVTYPE_CPU,    drmach_cpu_new },
485 	{"SUNW,UltraSPARC-III+",    DRMACH_DEVTYPE_CPU,    drmach_cpu_new },
486 	{"memory-controller",	    DRMACH_DEVTYPE_MEM,    drmach_mem_new },
487 	{"pci",			    DRMACH_DEVTYPE_PCI,    drmach_pci_new },
488 	{"SUNW,wci",		    DRMACH_DEVTYPE_WCI,    drmach_io_new  },
489 };
490 
491 /*
492  * drmach autoconfiguration data structures and interfaces
493  */
494 
495 extern struct mod_ops mod_miscops;
496 
497 static struct modlmisc modlmisc = {
498 	&mod_miscops,
499 	"Sun Fire 15000 DR"
500 };
501 
502 static struct modlinkage modlinkage = {
503 	MODREV_1,
504 	(void *)&modlmisc,
505 	NULL
506 };
507 
508 /*
509  * drmach_boards_rwlock is used to synchronize read/write
510  * access to drmach_boards array between status and board lookup
511  * as READERS, and assign, and unassign threads as WRITERS.
512  */
513 static krwlock_t	drmach_boards_rwlock;
514 
515 static kmutex_t		drmach_i_lock;
516 static kmutex_t		drmach_iocage_lock;
517 static kcondvar_t 	drmach_iocage_cv;
518 static int		drmach_iocage_is_busy = 0;
519 uint64_t		drmach_iocage_paddr;
520 static caddr_t		drmach_iocage_vaddr;
521 static int		drmach_iocage_size = 0;
522 static int		drmach_is_cheetah = -1;
523 
524 int
525 _init(void)
526 {
527 	int	err;
528 
529 	mutex_init(&drmach_i_lock, NULL, MUTEX_DRIVER, NULL);
530 	rw_init(&drmach_boards_rwlock, NULL, RW_DEFAULT, NULL);
531 	drmach_xt_mb_size = NCPU * sizeof (uchar_t);
532 	drmach_xt_mb = (uchar_t *)vmem_alloc(static_alloc_arena,
533 	    drmach_xt_mb_size, VM_SLEEP);
534 	bzero((void *)drmach_xt_mb, drmach_xt_mb_size);
535 	if ((err = mod_install(&modlinkage)) != 0) {
536 		mutex_destroy(&drmach_i_lock);
537 		rw_destroy(&drmach_boards_rwlock);
538 		vmem_free(static_alloc_arena, (void *)drmach_xt_mb,
539 		    drmach_xt_mb_size);
540 	}
541 
542 	return (err);
543 }
544 
545 int
546 _fini(void)
547 {
548 	static void	drmach_fini(void);
549 	int		err;
550 
551 	if ((err = mod_remove(&modlinkage)) == 0)
552 		drmach_fini();
553 
554 	return (err);
555 }
556 
557 int
558 _info(struct modinfo *modinfop)
559 {
560 	return (mod_info(&modlinkage, modinfop));
561 }
562 
563 /*
564  * drmach_node_* routines serve the purpose of separating the
565  * rest of the code from the device tree and OBP.  This is necessary
566  * because of In-Kernel-Probing.  Devices probed after stod, are probed
567  * by the in-kernel-prober, not OBP.  These devices, therefore, do not
568  * have dnode ids.
569  */
570 
571 static int
572 drmach_node_obp_get_parent(drmach_node_t *np, drmach_node_t *pp)
573 {
574 	pnode_t		nodeid;
575 	static char	*fn = "drmach_node_obp_get_parent";
576 
577 	nodeid = np->get_dnode(np);
578 	if (nodeid == OBP_NONODE) {
579 		cmn_err(CE_WARN, "%s: invalid dnode", fn);
580 		return (-1);
581 	}
582 
583 	bcopy(np, pp, sizeof (drmach_node_t));
584 
585 	pp->here = (void *)(uintptr_t)prom_parentnode(nodeid);
586 	if (pp->here == OBP_NONODE) {
587 		cmn_err(CE_WARN, "%s: invalid parent dnode", fn);
588 		return (-1);
589 	}
590 
591 	return (0);
592 }
593 
594 static pnode_t
595 drmach_node_obp_get_dnode(drmach_node_t *np)
596 {
597 	return ((pnode_t)(uintptr_t)np->here);
598 }
599 
600 typedef struct {
601 	drmach_node_walk_args_t	*nwargs;
602 	int 			(*cb)(drmach_node_walk_args_t *args);
603 	int			err;
604 } drmach_node_ddi_walk_args_t;
605 
606 int
607 drmach_node_ddi_walk_cb(dev_info_t *dip, void *arg)
608 {
609 	drmach_node_ddi_walk_args_t	*nargs;
610 
611 	nargs = (drmach_node_ddi_walk_args_t *)arg;
612 
613 	/*
614 	 * dip doesn't have to be held here as we are called
615 	 * from ddi_walk_devs() which holds the dip.
616 	 */
617 	nargs->nwargs->node->here = (void *)dip;
618 
619 	nargs->err = nargs->cb(nargs->nwargs);
620 
621 	/*
622 	 * Set "here" to NULL so that unheld dip is not accessible
623 	 * outside ddi_walk_devs()
624 	 */
625 	nargs->nwargs->node->here = NULL;
626 
627 	if (nargs->err)
628 		return (DDI_WALK_TERMINATE);
629 	else
630 		return (DDI_WALK_CONTINUE);
631 }
632 
633 static int
634 drmach_node_ddi_walk(drmach_node_t *np, void *data,
635 		int (*cb)(drmach_node_walk_args_t *args))
636 {
637 	drmach_node_walk_args_t		args;
638 	drmach_node_ddi_walk_args_t	nargs;
639 
640 	/* initialized args structure for callback */
641 	args.node = np;
642 	args.data = data;
643 
644 	nargs.nwargs = &args;
645 	nargs.cb = cb;
646 	nargs.err = 0;
647 
648 	/*
649 	 * Root node doesn't have to be held in any way.
650 	 */
651 	ddi_walk_devs(ddi_root_node(), drmach_node_ddi_walk_cb, (void *)&nargs);
652 
653 	return (nargs.err);
654 }
655 
656 static int
657 drmach_node_obp_walk(drmach_node_t *np, void *data,
658 		int (*cb)(drmach_node_walk_args_t *args))
659 {
660 	pnode_t			nodeid;
661 	int			rv;
662 	drmach_node_walk_args_t	args;
663 
664 	/* initialized args structure for callback */
665 	args.node = np;
666 	args.data = data;
667 
668 	nodeid = prom_childnode(prom_rootnode());
669 
670 	/* save our new position within the tree */
671 	np->here = (void *)(uintptr_t)nodeid;
672 
673 	rv = 0;
674 	while (nodeid != OBP_NONODE) {
675 
676 		pnode_t child;
677 
678 		rv = (*cb)(&args);
679 		if (rv)
680 			break;
681 
682 		child = prom_childnode(nodeid);
683 		np->here = (void *)(uintptr_t)child;
684 
685 		while (child != OBP_NONODE) {
686 			rv = (*cb)(&args);
687 			if (rv)
688 				break;
689 
690 			child = prom_nextnode(child);
691 			np->here = (void *)(uintptr_t)child;
692 		}
693 
694 		nodeid = prom_nextnode(nodeid);
695 
696 		/* save our new position within the tree */
697 		np->here = (void *)(uintptr_t)nodeid;
698 	}
699 
700 	return (rv);
701 }
702 
703 static int
704 drmach_node_ddi_get_parent(drmach_node_t *np, drmach_node_t *pp)
705 {
706 	dev_info_t	*ndip;
707 	static char	*fn = "drmach_node_ddi_get_parent";
708 
709 	ndip = np->n_getdip(np);
710 	if (ndip == NULL) {
711 		cmn_err(CE_WARN, "%s: NULL dip", fn);
712 		return (-1);
713 	}
714 
715 	bcopy(np, pp, sizeof (drmach_node_t));
716 
717 	pp->here = (void *)ddi_get_parent(ndip);
718 	if (pp->here == NULL) {
719 		cmn_err(CE_WARN, "%s: NULL parent dip", fn);
720 		return (-1);
721 	}
722 
723 	return (0);
724 }
725 
726 /*ARGSUSED*/
727 static pnode_t
728 drmach_node_ddi_get_dnode(drmach_node_t *np)
729 {
730 	return ((pnode_t)NULL);
731 }
732 
733 static drmach_node_t *
734 drmach_node_new(void)
735 {
736 	drmach_node_t *np;
737 
738 	np = kmem_zalloc(sizeof (drmach_node_t), KM_SLEEP);
739 
740 	if (drmach_initialized) {
741 		np->get_dnode = drmach_node_ddi_get_dnode;
742 		np->walk = drmach_node_ddi_walk;
743 		np->n_getdip = drmach_node_ddi_get_dip;
744 		np->n_getproplen = drmach_node_ddi_get_proplen;
745 		np->n_getprop = drmach_node_ddi_get_prop;
746 		np->get_parent = drmach_node_ddi_get_parent;
747 	} else {
748 		np->get_dnode = drmach_node_obp_get_dnode;
749 		np->walk = drmach_node_obp_walk;
750 		np->n_getdip = drmach_node_obp_get_dip;
751 		np->n_getproplen = drmach_node_obp_get_proplen;
752 		np->n_getprop = drmach_node_obp_get_prop;
753 		np->get_parent = drmach_node_obp_get_parent;
754 	}
755 
756 	return (np);
757 }
758 
759 static void
760 drmach_node_dispose(drmach_node_t *np)
761 {
762 	kmem_free(np, sizeof (*np));
763 }
764 
765 /*
766  * Check if a CPU node is part of a CMP.
767  */
768 static int
769 drmach_is_cmp_child(dev_info_t *dip)
770 {
771 	dev_info_t *pdip;
772 
773 	if (strcmp(ddi_node_name(dip), DRMACH_CPU_NAMEPROP) != 0) {
774 		return (0);
775 	}
776 
777 	pdip = ddi_get_parent(dip);
778 
779 	ASSERT(pdip);
780 
781 	if (strcmp(ddi_node_name(pdip), DRMACH_CMP_NAMEPROP) == 0) {
782 		return (1);
783 	}
784 
785 	return (0);
786 }
787 
788 static dev_info_t *
789 drmach_node_obp_get_dip(drmach_node_t *np)
790 {
791 	pnode_t		nodeid;
792 	dev_info_t	*dip;
793 
794 	nodeid = np->get_dnode(np);
795 	if (nodeid == OBP_NONODE)
796 		return (NULL);
797 
798 	dip = e_ddi_nodeid_to_dip(nodeid);
799 	if (dip) {
800 		/*
801 		 * The branch rooted at dip will have been previously
802 		 * held, or it will be the child of a CMP. In either
803 		 * case, the hold acquired in e_ddi_nodeid_to_dip()
804 		 * is not needed.
805 		 */
806 		ddi_release_devi(dip);
807 		ASSERT(drmach_is_cmp_child(dip) || e_ddi_branch_held(dip));
808 	}
809 
810 	return (dip);
811 }
812 
813 static dev_info_t *
814 drmach_node_ddi_get_dip(drmach_node_t *np)
815 {
816 	return ((dev_info_t *)np->here);
817 }
818 
819 static int
820 drmach_node_walk(drmach_node_t *np, void *param,
821 		int (*cb)(drmach_node_walk_args_t *args))
822 {
823 	return (np->walk(np, param, cb));
824 }
825 
826 static int
827 drmach_node_ddi_get_prop(drmach_node_t *np, char *name, void *buf, int len)
828 {
829 	int		rv = 0;
830 	dev_info_t	*ndip;
831 	static char	*fn = "drmach_node_ddi_get_prop";
832 
833 	ndip = np->n_getdip(np);
834 	if (ndip == NULL) {
835 		cmn_err(CE_WARN, "%s: NULL dip", fn);
836 		rv = -1;
837 	} else if (ddi_getlongprop_buf(DDI_DEV_T_ANY, ndip,
838 	    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, name,
839 	    (caddr_t)buf, &len) != DDI_PROP_SUCCESS) {
840 		rv = -1;
841 	}
842 
843 	return (rv);
844 }
845 
846 /* ARGSUSED */
847 static int
848 drmach_node_obp_get_prop(drmach_node_t *np, char *name, void *buf, int len)
849 {
850 	int		rv = 0;
851 	pnode_t		nodeid;
852 	static char	*fn = "drmach_node_obp_get_prop";
853 
854 	nodeid = np->get_dnode(np);
855 	if (nodeid == OBP_NONODE) {
856 		cmn_err(CE_WARN, "%s: invalid dnode", fn);
857 		rv = -1;
858 	} else if (prom_getproplen(nodeid, (caddr_t)name) < 0) {
859 		rv = -1;
860 	} else {
861 		(void) prom_getprop(nodeid, (caddr_t)name, (caddr_t)buf);
862 	}
863 
864 	return (rv);
865 }
866 
867 static int
868 drmach_node_ddi_get_proplen(drmach_node_t *np, char *name, int *len)
869 {
870 	int		rv = 0;
871 	dev_info_t	*ndip;
872 
873 	ndip = np->n_getdip(np);
874 	if (ndip == NULL) {
875 		rv = -1;
876 	} else if (ddi_getproplen(DDI_DEV_T_ANY, ndip, DDI_PROP_DONTPASS,
877 	    name, len) != DDI_PROP_SUCCESS) {
878 		rv = -1;
879 	}
880 
881 	return (rv);
882 }
883 
884 static int
885 drmach_node_obp_get_proplen(drmach_node_t *np, char *name, int *len)
886 {
887 	pnode_t	 nodeid;
888 	int	 rv;
889 
890 	nodeid = np->get_dnode(np);
891 	if (nodeid == OBP_NONODE)
892 		rv = -1;
893 	else {
894 		*len = prom_getproplen(nodeid, (caddr_t)name);
895 		rv = (*len < 0 ? -1 : 0);
896 	}
897 
898 	return (rv);
899 }
900 
901 static drmachid_t
902 drmach_node_dup(drmach_node_t *np)
903 {
904 	drmach_node_t *dup;
905 
906 	dup = drmach_node_new();
907 	dup->here = np->here;
908 	dup->get_dnode = np->get_dnode;
909 	dup->walk = np->walk;
910 	dup->n_getdip = np->n_getdip;
911 	dup->n_getproplen = np->n_getproplen;
912 	dup->n_getprop = np->n_getprop;
913 	dup->get_parent = np->get_parent;
914 
915 	return (dup);
916 }
917 
918 /*
919  * drmach_array provides convenient array construction, access,
920  * bounds checking and array destruction logic.
921  */
922 
923 static drmach_array_t *
924 drmach_array_new(int min_index, int max_index)
925 {
926 	drmach_array_t *arr;
927 
928 	arr = kmem_zalloc(sizeof (drmach_array_t), KM_SLEEP);
929 
930 	arr->arr_sz = (max_index - min_index + 1) * sizeof (void *);
931 	if (arr->arr_sz > 0) {
932 		arr->min_index = min_index;
933 		arr->max_index = max_index;
934 
935 		arr->arr = kmem_zalloc(arr->arr_sz, KM_SLEEP);
936 		return (arr);
937 	} else {
938 		kmem_free(arr, sizeof (*arr));
939 		return (0);
940 	}
941 }
942 
943 static int
944 drmach_array_set(drmach_array_t *arr, int idx, drmachid_t val)
945 {
946 	if (idx < arr->min_index || idx > arr->max_index)
947 		return (-1);
948 	else {
949 		arr->arr[idx - arr->min_index] = val;
950 		return (0);
951 	}
952 	/*NOTREACHED*/
953 }
954 
955 static int
956 drmach_array_get(drmach_array_t *arr, int idx, drmachid_t *val)
957 {
958 	if (idx < arr->min_index || idx > arr->max_index)
959 		return (-1);
960 	else {
961 		*val = arr->arr[idx - arr->min_index];
962 		return (0);
963 	}
964 	/*NOTREACHED*/
965 }
966 
967 static int
968 drmach_array_first(drmach_array_t *arr, int *idx, drmachid_t *val)
969 {
970 	int rv;
971 
972 	*idx = arr->min_index;
973 	while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
974 		*idx += 1;
975 
976 	return (rv);
977 }
978 
979 static int
980 drmach_array_next(drmach_array_t *arr, int *idx, drmachid_t *val)
981 {
982 	int rv;
983 
984 	*idx += 1;
985 	while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
986 		*idx += 1;
987 
988 	return (rv);
989 }
990 
991 static void
992 drmach_array_dispose(drmach_array_t *arr, void (*disposer)(drmachid_t))
993 {
994 	drmachid_t	val;
995 	int		idx;
996 	int		rv;
997 
998 	rv = drmach_array_first(arr, &idx, &val);
999 	while (rv == 0) {
1000 		(*disposer)(val);
1001 
1002 		/* clear the array entry */
1003 		rv = drmach_array_set(arr, idx, NULL);
1004 		ASSERT(rv == 0);
1005 
1006 		rv = drmach_array_next(arr, &idx, &val);
1007 	}
1008 
1009 	kmem_free(arr->arr, arr->arr_sz);
1010 	kmem_free(arr, sizeof (*arr));
1011 }
1012 
1013 
1014 static gdcd_t *
1015 drmach_gdcd_new()
1016 {
1017 	gdcd_t *gdcd;
1018 
1019 	gdcd = kmem_zalloc(sizeof (gdcd_t), KM_SLEEP);
1020 
1021 	/* read the gdcd, bail if magic or ver #s are not what is expected */
1022 	if (iosram_rd(GDCD_MAGIC, 0, sizeof (gdcd_t), (caddr_t)gdcd)) {
1023 bail:
1024 		kmem_free(gdcd, sizeof (gdcd_t));
1025 		return (NULL);
1026 	} else if (gdcd->h.dcd_magic != GDCD_MAGIC) {
1027 		goto bail;
1028 	} else if (gdcd->h.dcd_version != DCD_VERSION) {
1029 		goto bail;
1030 	}
1031 
1032 	return (gdcd);
1033 }
1034 
1035 static void
1036 drmach_gdcd_dispose(gdcd_t *gdcd)
1037 {
1038 	kmem_free(gdcd, sizeof (gdcd_t));
1039 }
1040 
1041 /*ARGSUSED*/
1042 sbd_error_t *
1043 drmach_configure(drmachid_t id, int flags)
1044 {
1045 	drmach_device_t	*dp;
1046 	dev_info_t	*rdip;
1047 	sbd_error_t	*err = NULL;
1048 
1049 	/*
1050 	 * On Starcat, there is no CPU driver, so it is
1051 	 * not necessary to configure any CPU nodes.
1052 	 */
1053 	if (DRMACH_IS_CPU_ID(id)) {
1054 		return (NULL);
1055 	}
1056 
1057 	for (; id; ) {
1058 		dev_info_t	*fdip = NULL;
1059 
1060 		if (!DRMACH_IS_DEVICE_ID(id))
1061 			return (drerr_new(0, ESTC_INAPPROP, NULL));
1062 		dp = id;
1063 
1064 		rdip = dp->node->n_getdip(dp->node);
1065 
1066 		/*
1067 		 * We held this branch earlier, so at a minimum its
1068 		 * root should still be present in the device tree.
1069 		 */
1070 		ASSERT(rdip);
1071 
1072 		DRMACH_PR("drmach_configure: configuring DDI branch");
1073 
1074 		ASSERT(e_ddi_branch_held(rdip));
1075 		if (e_ddi_branch_configure(rdip, &fdip, 0) != 0) {
1076 			if (err == NULL) {
1077 				/*
1078 				 * Record first failure but don't stop
1079 				 */
1080 				char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1081 				dev_info_t *dip = (fdip != NULL) ? fdip : rdip;
1082 
1083 				(void) ddi_pathname(dip, path);
1084 				err = drerr_new(1, ESTC_DRVFAIL, path);
1085 
1086 				kmem_free(path, MAXPATHLEN);
1087 			}
1088 
1089 			/*
1090 			 * If non-NULL, fdip is returned held and must be
1091 			 * released.
1092 			 */
1093 			if (fdip != NULL) {
1094 				ddi_release_devi(fdip);
1095 			}
1096 		}
1097 
1098 		if (DRMACH_IS_MEM_ID(id)) {
1099 			drmach_mem_t	*mp = id;
1100 			id = mp->next;
1101 		} else {
1102 			id = NULL;
1103 		}
1104 	}
1105 
1106 	return (err);
1107 }
1108 
1109 static sbd_error_t *
1110 drmach_device_new(drmach_node_t *node,
1111 	drmach_board_t *bp, int portid, drmachid_t *idp)
1112 {
1113 	int		i, rv, device_id, unum;
1114 	char		name[OBP_MAXDRVNAME];
1115 	drmach_device_t	proto;
1116 
1117 	rv = node->n_getprop(node, "name", name, OBP_MAXDRVNAME);
1118 	if (rv) {
1119 		sbd_error_t *err;
1120 
1121 		/* every node is expected to have a name */
1122 		err = drerr_new(1, ESTC_GETPROP,
1123 		    "dip: 0x%p: property %s",
1124 		    node->n_getdip(node), OBP_NAME);
1125 
1126 		return (err);
1127 	}
1128 
1129 	i = drmach_name2type_idx(name);
1130 
1131 	if (i < 0 || strcmp(name, "cmp") == 0) {
1132 		/*
1133 		 * Not a node of interest to dr - including "cmp",
1134 		 * but it is in drmach_name2type[], which lets gptwocfg
1135 		 * driver to check if node is OBP created.
1136 		 */
1137 		*idp = (drmachid_t)0;
1138 		return (NULL);
1139 	}
1140 
1141 	/*
1142 	 * Derive a best-guess unit number from the portid value.
1143 	 * Some drmach_*_new constructors (drmach_pci_new, for example)
1144 	 * will overwrite the prototype unum value with one that is more
1145 	 * appropriate for the device.
1146 	 */
1147 	device_id = portid & 0x1f;
1148 	if (device_id < 4)
1149 		unum = device_id;
1150 	else if (device_id == 8) {
1151 		unum = 0;
1152 	} else if (device_id == 9) {
1153 		unum = 1;
1154 	} else if (device_id == 0x1c) {
1155 		unum = 0;
1156 	} else if (device_id == 0x1d) {
1157 		unum = 1;
1158 	} else {
1159 		return (DRMACH_INTERNAL_ERROR());
1160 	}
1161 
1162 	bzero(&proto, sizeof (proto));
1163 	proto.type = drmach_name2type[i].type;
1164 	proto.bp = bp;
1165 	proto.node = node;
1166 	proto.portid = portid;
1167 	proto.unum = unum;
1168 
1169 	return (drmach_name2type[i].new(&proto, idp));
1170 }
1171 
1172 static void
1173 drmach_device_dispose(drmachid_t id)
1174 {
1175 	drmach_device_t *self = id;
1176 
1177 	self->cm.dispose(id);
1178 }
1179 
1180 static drmach_board_t *
1181 drmach_board_new(int bnum)
1182 {
1183 	static sbd_error_t *drmach_board_release(drmachid_t);
1184 	static sbd_error_t *drmach_board_status(drmachid_t, drmach_status_t *);
1185 
1186 	drmach_board_t	*bp;
1187 
1188 	bp = kmem_zalloc(sizeof (drmach_board_t), KM_SLEEP);
1189 
1190 	bp->cm.isa = (void *)drmach_board_new;
1191 	bp->cm.release = drmach_board_release;
1192 	bp->cm.status = drmach_board_status;
1193 
1194 	(void) drmach_board_name(bnum, bp->cm.name, sizeof (bp->cm.name));
1195 
1196 	bp->bnum = bnum;
1197 	bp->devices = NULL;
1198 	bp->tree = drmach_node_new();
1199 
1200 	drmach_array_set(drmach_boards, bnum, bp);
1201 	return (bp);
1202 }
1203 
1204 static void
1205 drmach_board_dispose(drmachid_t id)
1206 {
1207 	drmach_board_t *bp;
1208 
1209 	ASSERT(DRMACH_IS_BOARD_ID(id));
1210 	bp = id;
1211 
1212 	if (bp->tree)
1213 		drmach_node_dispose(bp->tree);
1214 
1215 	if (bp->devices)
1216 		drmach_array_dispose(bp->devices, drmach_device_dispose);
1217 
1218 	kmem_free(bp, sizeof (*bp));
1219 }
1220 
1221 static sbd_error_t *
1222 drmach_board_status(drmachid_t id, drmach_status_t *stat)
1223 {
1224 	sbd_error_t	*err = NULL;
1225 	drmach_board_t	*bp;
1226 	caddr_t		obufp;
1227 	dr_showboard_t	shb;
1228 
1229 	if (!DRMACH_IS_BOARD_ID(id))
1230 		return (drerr_new(0, ESTC_INAPPROP, NULL));
1231 
1232 	bp = id;
1233 
1234 	/*
1235 	 * we need to know if the board's connected before
1236 	 * issuing a showboard message.  If it's connected, we just
1237 	 * reply with status composed of cached info
1238 	 */
1239 
1240 	if (!bp->connected) {
1241 		obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
1242 		err = drmach_mbox_trans(DRMSG_SHOWBOARD, bp->bnum, obufp,
1243 		    sizeof (dr_proto_hdr_t), (caddr_t)&shb,
1244 		    sizeof (dr_showboard_t));
1245 
1246 		kmem_free(obufp, sizeof (dr_proto_hdr_t));
1247 		if (err)
1248 			return (err);
1249 
1250 		bp->connected = (shb.bd_assigned && shb.bd_active);
1251 		strncpy(bp->type, shb.board_type, sizeof (bp->type));
1252 		stat->assigned = bp->assigned = shb.bd_assigned;
1253 		stat->powered = bp->powered = shb.power_on;
1254 		stat->empty = bp->empty = shb.slot_empty;
1255 
1256 		switch (shb.test_status) {
1257 			case DR_TEST_STATUS_UNKNOWN:
1258 			case DR_TEST_STATUS_IPOST:
1259 			case DR_TEST_STATUS_ABORTED:
1260 				stat->cond = bp->cond = SBD_COND_UNKNOWN;
1261 				break;
1262 			case DR_TEST_STATUS_PASSED:
1263 				stat->cond = bp->cond = SBD_COND_OK;
1264 				break;
1265 			case DR_TEST_STATUS_FAILED:
1266 				stat->cond = bp->cond = SBD_COND_FAILED;
1267 				break;
1268 			default:
1269 				stat->cond = bp->cond = SBD_COND_UNKNOWN;
1270 				DRMACH_PR("Unknown test status=0x%x from SC\n",
1271 				    shb.test_status);
1272 				break;
1273 
1274 		}
1275 
1276 		strncpy(stat->type, shb.board_type, sizeof (stat->type));
1277 		snprintf(stat->info, sizeof (stat->info), "Test Level=%d",
1278 		    shb.test_level);
1279 	} else {
1280 		stat->assigned = bp->assigned;
1281 		stat->powered = bp->powered;
1282 		stat->empty = bp->empty;
1283 		stat->cond = bp->cond;
1284 		strncpy(stat->type, bp->type, sizeof (stat->type));
1285 	}
1286 
1287 	stat->busy = 0;			/* assume not busy */
1288 	stat->configured = 0;		/* assume not configured */
1289 	if (bp->devices) {
1290 		int		 rv;
1291 		int		 d_idx;
1292 		drmachid_t	 d_id;
1293 
1294 		rv = drmach_array_first(bp->devices, &d_idx, &d_id);
1295 		while (rv == 0) {
1296 			drmach_status_t	d_stat;
1297 
1298 			err = drmach_i_status(d_id, &d_stat);
1299 			if (err)
1300 				break;
1301 
1302 			stat->busy |= d_stat.busy;
1303 			stat->configured |= d_stat.configured;
1304 
1305 			rv = drmach_array_next(bp->devices, &d_idx, &d_id);
1306 		}
1307 	}
1308 
1309 	return (err);
1310 }
1311 
1312 typedef struct drmach_msglist {
1313 	kcondvar_t		s_cv; 		/* condvar for sending msg */
1314 	kmutex_t		s_lock;		/* mutex for sending */
1315 	kcondvar_t		g_cv;		/* condvar for getting reply */
1316 	kmutex_t		g_lock;		/* mutex for getting reply */
1317 	struct drmach_msglist	*prev;		/* link to previous entry */
1318 	struct drmach_msglist	*next;		/* link to next entry */
1319 	struct drmach_msglist	*link;		/* link to related entry */
1320 	caddr_t			o_buf;		/* address of output buffer */
1321 	caddr_t			i_buf; 		/* address of input buffer */
1322 	uint32_t		o_buflen;	/* output buffer length */
1323 	uint32_t		i_buflen;	/* input buffer length */
1324 	uint32_t		msgid;		/* message identifier */
1325 	int			o_nretry;	/* number of sending retries */
1326 	int			f_error;	/* mailbox framework error */
1327 	uint8_t			e_code;		/* error code returned by SC */
1328 	uint8_t			p_flag	:1,	/* successfully putmsg */
1329 				m_reply	:1,	/* msg reply received */
1330 				unused	:6;
1331 } drmach_msglist_t;
1332 
1333 kmutex_t		drmach_g_mbox_mutex;	/* mutex for mailbox globals */
1334 kmutex_t		drmach_ri_mbox_mutex;	/* mutex for mailbox reinit */
1335 kmutex_t		drmach_msglist_mutex;	/* mutex for message list */
1336 drmach_msglist_t	*drmach_msglist_first;	/* first entry in msg list */
1337 drmach_msglist_t	*drmach_msglist_last;	/* last entry in msg list */
1338 uint32_t		drmach_msgid;		/* current message id */
1339 kthread_t		*drmach_getmsg_thread;	/* ptr to getmsg thread */
1340 volatile int		drmach_getmsg_thread_run; /* run flag for getmsg thr */
1341 kmutex_t		drmach_sendmsg_mutex;	/* mutex for sendmsg cv */
1342 kcondvar_t		drmach_sendmsg_cv;	/* signaled to send new msg */
1343 kthread_t		*drmach_sendmsg_thread; /* ptr to sendmsg thread */
1344 volatile int		drmach_sendmsg_thread_run; /* run flag for sendmsg */
1345 int			drmach_mbox_istate;	/* mailbox init state */
1346 int			drmach_mbox_iflag;	/* set if init'd with SC */
1347 int			drmach_mbox_ipending;	/* set if reinit scheduled */
1348 
1349 /*
1350  * Timeout values (in seconds) used when waiting for replies (from the SC) to
1351  * requests that we sent.  Since we only receive boardevent messages, and they
1352  * are events rather than replies, there is no boardevent timeout.
1353  */
1354 int	drmach_to_mbxinit	= 60;		/* 1 minute */
1355 int	drmach_to_assign	= 60;		/* 1 minute */
1356 int	drmach_to_unassign	= 60;		/* 1 minute */
1357 int	drmach_to_claim		= 3600;		/* 1 hour */
1358 int	drmach_to_unclaim	= 3600;		/* 1 hour */
1359 int	drmach_to_poweron	= 480;		/* 8 minutes */
1360 int	drmach_to_poweroff	= 480;		/* 8 minutes */
1361 int	drmach_to_testboard	= 43200;	/* 12 hours */
1362 int	drmach_to_aborttest	= 180;		/* 3 minutes */
1363 int	drmach_to_showboard	= 180;		/* 3 minutes */
1364 int	drmach_to_unconfig	= 180;		/* 3 minutes */
1365 
1366 /*
1367  * Delay (in seconds) used after receiving a non-transient error indication from
1368  * an mboxsc_getmsg call in the thread that loops waiting for incoming messages.
1369  */
1370 int	drmach_mbxerr_delay	= 15;		/* 15 seconds */
1371 
1372 /*
1373  * Timeout values (in milliseconds) for mboxsc_putmsg and mboxsc_getmsg calls.
1374  */
1375 clock_t	drmach_to_putmsg;			/* set in drmach_mbox_init */
1376 clock_t	drmach_to_getmsg	= 31000;	/* 31 seconds */
1377 
1378 /*
1379  * Normally, drmach_to_putmsg is set dynamically during initialization in
1380  * drmach_mbox_init.  This has the potentially undesirable side effect of
1381  * clobbering any value that might have been set in /etc/system.  To prevent
1382  * dynamic setting of drmach_to_putmsg (thereby allowing it to be tuned in
1383  * /etc/system), set drmach_use_tuned_putmsg_to to 1.
1384  */
1385 int	drmach_use_tuned_putmsg_to	= 0;
1386 
1387 
1388 /* maximum conceivable message size for future mailbox protocol versions */
1389 #define	DRMACH_MAX_MBOX_MSG_SIZE	4096
1390 
1391 /*ARGSUSED*/
1392 void
1393 drmach_mbox_prmsg(dr_mbox_msg_t *mbp, int dir)
1394 {
1395 	int		i, j;
1396 	dr_memregs_t	*memregs;
1397 	dr_proto_hdr_t	*php = &mbp->p_hdr;
1398 	dr_msg_t	*mp = &mbp->msgdata;
1399 
1400 #ifdef DEBUG
1401 	switch (php->command) {
1402 		case DRMSG_BOARDEVENT:
1403 			if (dir) {
1404 				DRMACH_PR("ERROR!! outgoing BOARDEVENT\n");
1405 			} else {
1406 				DRMACH_PR("BOARDEVENT received:\n");
1407 				DRMACH_PR("init=%d ins=%d rem=%d asgn=%d\n",
1408 				    mp->dm_be.initialized,
1409 				    mp->dm_be.board_insertion,
1410 				    mp->dm_be.board_removal,
1411 				    mp->dm_be.slot_assign);
1412 				DRMACH_PR("unasgn=%d avail=%d unavail=%d\n",
1413 				    mp->dm_be.slot_unassign,
1414 				    mp->dm_be.slot_avail,
1415 				    mp->dm_be.slot_unavail);
1416 			}
1417 			break;
1418 		case DRMSG_MBOX_INIT:
1419 			if (dir) {
1420 				DRMACH_PR("MBOX_INIT Request:\n");
1421 			} else {
1422 				DRMACH_PR("MBOX_INIT Reply:\n");
1423 			}
1424 			break;
1425 		case DRMSG_ASSIGN:
1426 			if (dir) {
1427 				DRMACH_PR("ASSIGN Request:\n");
1428 			} else {
1429 				DRMACH_PR("ASSIGN Reply:\n");
1430 			}
1431 			break;
1432 		case DRMSG_UNASSIGN:
1433 			if (dir) {
1434 				DRMACH_PR("UNASSIGN Request:\n");
1435 			} else {
1436 				DRMACH_PR("UNASSIGN Reply:\n");
1437 			}
1438 			break;
1439 		case DRMSG_CLAIM:
1440 			if (!dir) {
1441 				DRMACH_PR("CLAIM Reply:\n");
1442 				break;
1443 			}
1444 
1445 			DRMACH_PR("CLAIM Request:\n");
1446 			for (i = 0; i < 18; ++i) {
1447 				DRMACH_PR("exp%d: val=%d slice=0x%x\n", i,
1448 				    mp->dm_cr.mem_slice[i].valid,
1449 				    mp->dm_cr.mem_slice[i].slice);
1450 				memregs = &(mp->dm_cr.mem_regs[i]);
1451 				for (j = 0; j < S0_LPORT_COUNT; j++) {
1452 					DRMACH_PR("  MC %2d: "
1453 					    "MADR[%d] = 0x%lx, "
1454 					    "MADR[%d] = 0x%lx\n", j,
1455 					    0, DRMACH_MCREG_TO_U64(
1456 					    memregs->madr[j][0]),
1457 					    1, DRMACH_MCREG_TO_U64(
1458 					    memregs->madr[j][1]));
1459 					DRMACH_PR("       : "
1460 					    "MADR[%d] = 0x%lx, "
1461 					    "MADR[%d] = 0x%lx\n",
1462 					    2, DRMACH_MCREG_TO_U64(
1463 					    memregs->madr[j][2]),
1464 					    3, DRMACH_MCREG_TO_U64(
1465 					    memregs->madr[j][3]));
1466 				}
1467 			}
1468 			break;
1469 		case DRMSG_UNCLAIM:
1470 			if (!dir) {
1471 				DRMACH_PR("UNCLAIM Reply:\n");
1472 				break;
1473 			}
1474 
1475 			DRMACH_PR("UNCLAIM Request:\n");
1476 			for (i = 0; i < 18; ++i) {
1477 				DRMACH_PR("exp%d: val=%d slice=0x%x\n", i,
1478 				    mp->dm_ur.mem_slice[i].valid,
1479 				    mp->dm_ur.mem_slice[i].slice);
1480 				memregs = &(mp->dm_ur.mem_regs[i]);
1481 				for (j = 0; j < S0_LPORT_COUNT; j++) {
1482 					DRMACH_PR("  MC %2d: "
1483 					    "MADR[%d] = 0x%lx, "
1484 					    "MADR[%d] = 0x%lx\n", j,
1485 					    0, DRMACH_MCREG_TO_U64(
1486 					    memregs->madr[j][0]),
1487 					    1, DRMACH_MCREG_TO_U64(
1488 					    memregs->madr[j][1]));
1489 					DRMACH_PR("       : "
1490 					    "MADR[%d] = 0x%lx, "
1491 					    "MADR[%d] = 0x%lx\n",
1492 					    2, DRMACH_MCREG_TO_U64(
1493 					    memregs->madr[j][2]),
1494 					    3, DRMACH_MCREG_TO_U64(
1495 					    memregs->madr[j][3]));
1496 				}
1497 			}
1498 			DRMACH_PR(" mem_clear=%d\n", mp->dm_ur.mem_clear);
1499 			break;
1500 		case DRMSG_UNCONFIG:
1501 			if (!dir) {
1502 				DRMACH_PR("UNCONFIG Reply:\n");
1503 				break;
1504 			}
1505 
1506 			DRMACH_PR("UNCONFIG Request:\n");
1507 			for (i = 0; i < 18; ++i) {
1508 				DRMACH_PR("exp%d: val=%d slice=0x%x\n", i,
1509 				    mp->dm_uc.mem_slice[i].valid,
1510 				    mp->dm_uc.mem_slice[i].slice);
1511 				memregs = &(mp->dm_uc.mem_regs[i]);
1512 				for (j = 0; j < S0_LPORT_COUNT; j++) {
1513 					DRMACH_PR("  MC %2d: "
1514 					    "MADR[%d] = 0x%lx, "
1515 					    "MADR[%d] = 0x%lx\n", j,
1516 					    0, DRMACH_MCREG_TO_U64(
1517 					    memregs->madr[j][0]),
1518 					    1, DRMACH_MCREG_TO_U64(
1519 					    memregs->madr[j][1]));
1520 					DRMACH_PR("       : "
1521 					    "MADR[%d] = 0x%lx, "
1522 					    "MADR[%d] = 0x%lx\n",
1523 					    2, DRMACH_MCREG_TO_U64(
1524 					    memregs->madr[j][2]),
1525 					    3, DRMACH_MCREG_TO_U64(
1526 					    memregs->madr[j][3]));
1527 				}
1528 			}
1529 			break;
1530 		case DRMSG_POWERON:
1531 			if (dir) {
1532 				DRMACH_PR("POWERON Request:\n");
1533 			} else {
1534 				DRMACH_PR("POWERON Reply:\n");
1535 			}
1536 			break;
1537 		case DRMSG_POWEROFF:
1538 			if (dir) {
1539 				DRMACH_PR("POWEROFF Request:\n");
1540 			} else {
1541 				DRMACH_PR("POWEROFF Reply:\n");
1542 			}
1543 			break;
1544 		case DRMSG_TESTBOARD:
1545 			if (dir) {
1546 				DRMACH_PR("TESTBOARD Request:\n");
1547 				DRMACH_PR("\tmemaddrhi=0x%x memaddrlo=0x%x ",
1548 				    mp->dm_tb.memaddrhi,
1549 				    mp->dm_tb.memaddrlo);
1550 				DRMACH_PR("memlen=0x%x cpu_portid=0x%x\n",
1551 				    mp->dm_tb.memlen, mp->dm_tb.cpu_portid);
1552 				DRMACH_PR("\tforce=0x%x imm=0x%x\n",
1553 				    mp->dm_tb.force, mp->dm_tb.immediate);
1554 			} else {
1555 				DRMACH_PR("TESTBOARD Reply:\n");
1556 				DRMACH_PR("\tmemaddrhi=0x%x memaddrlo=0x%x ",
1557 				    mp->dm_tr.memaddrhi,
1558 				    mp->dm_tr.memaddrlo);
1559 				DRMACH_PR("memlen=0x%x cpu_portid=0x%x\n",
1560 				    mp->dm_tr.memlen, mp->dm_tr.cpu_portid);
1561 				DRMACH_PR("\trecovered=0x%x test status=0x%x\n",
1562 				    mp->dm_tr.cpu_recovered,
1563 				    mp->dm_tr.test_status);
1564 
1565 			}
1566 			break;
1567 		case DRMSG_ABORT_TEST:
1568 			if (dir) {
1569 				DRMACH_PR("ABORT_TEST Request:\n");
1570 			} else {
1571 				DRMACH_PR("ABORT_TEST Reply:\n");
1572 			}
1573 
1574 			DRMACH_PR("\tmemaddrhi=0x%x memaddrlo=0x%x ",
1575 			    mp->dm_ta.memaddrhi,
1576 			    mp->dm_ta.memaddrlo);
1577 			DRMACH_PR("memlen=0x%x cpu_portid=0x%x\n",
1578 			    mp->dm_ta.memlen, mp->dm_ta.cpu_portid);
1579 			break;
1580 		case DRMSG_SHOWBOARD:
1581 			if (dir) {
1582 				DRMACH_PR("SHOWBOARD Request:\n");
1583 			} else {
1584 				DRMACH_PR("SHOWBOARD Reply:\n");
1585 
1586 				DRMACH_PR(": empty=%d power=%d assigned=%d",
1587 				    mp->dm_sb.slot_empty,
1588 				    mp->dm_sb.power_on,
1589 				    mp->dm_sb.bd_assigned);
1590 				DRMACH_PR(": active=%d t_status=%d t_level=%d ",
1591 				    mp->dm_sb.bd_active,
1592 				    mp->dm_sb.test_status,
1593 				    mp->dm_sb.test_level);
1594 				DRMACH_PR(": type=%s ", mp->dm_sb.board_type);
1595 			}
1596 			break;
1597 		default:
1598 			DRMACH_PR("Unknown message type\n");
1599 			break;
1600 	}
1601 
1602 	DRMACH_PR("dr hdr:\n\tid=0x%x vers=0x%x cmd=0x%x exp=0x%x slot=0x%x\n",
1603 	    php->message_id, php->drproto_version, php->command,
1604 	    php->expbrd, php->slot);
1605 #endif
1606 	DRMACH_PR("\treply_status=0x%x error_code=0x%x\n", php->reply_status,
1607 	    php->error_code);
1608 }
1609 
1610 /*
1611  * Callback function passed to taskq_dispatch when a mailbox reinitialization
1612  * handshake needs to be scheduled.  The handshake can't be performed by the
1613  * thread that determines it is needed, in most cases, so this function is
1614  * dispatched on the system-wide taskq pool of threads.  Failure is reported but
1615  * otherwise ignored, since any situation that requires a mailbox initialization
1616  * handshake will continue to request the handshake until it succeeds.
1617  */
1618 static void
1619 drmach_mbox_reinit(void *unused)
1620 {
1621 	_NOTE(ARGUNUSED(unused))
1622 
1623 	caddr_t		obufp = NULL;
1624 	sbd_error_t	*serr = NULL;
1625 
1626 	DRMACH_PR("scheduled mailbox reinit running\n");
1627 
1628 	mutex_enter(&drmach_ri_mbox_mutex);
1629 	mutex_enter(&drmach_g_mbox_mutex);
1630 	if (drmach_mbox_iflag == 0) {
1631 		/* need to initialize the mailbox */
1632 		mutex_exit(&drmach_g_mbox_mutex);
1633 
1634 		cmn_err(CE_NOTE, "!reinitializing DR mailbox");
1635 		obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
1636 		serr = drmach_mbox_trans(DRMSG_MBOX_INIT, 0, obufp,
1637 		    sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
1638 		kmem_free(obufp, sizeof (dr_proto_hdr_t));
1639 
1640 		if (serr) {
1641 			cmn_err(CE_WARN,
1642 			    "mbox_init: MBOX_INIT failed ecode=0x%x",
1643 			    serr->e_code);
1644 			sbd_err_clear(&serr);
1645 		}
1646 		mutex_enter(&drmach_g_mbox_mutex);
1647 		if (!serr) {
1648 			drmach_mbox_iflag = 1;
1649 		}
1650 	}
1651 	drmach_mbox_ipending = 0;
1652 	mutex_exit(&drmach_g_mbox_mutex);
1653 	mutex_exit(&drmach_ri_mbox_mutex);
1654 }
1655 
1656 /*
1657  * To ensure sufficient compatibility with future versions of the DR mailbox
1658  * protocol, we use a buffer that is large enough to receive the largest message
1659  * that could possibly be sent to us.  However, since that ends up being fairly
1660  * large, allocating it on the stack is a bad idea.  Fortunately, this function
1661  * does not need to be MT-safe since it is only invoked by the mailbox
1662  * framework, which will never invoke it multiple times concurrently.  Since
1663  * that is the case, we can use a static buffer.
1664  */
1665 void
1666 drmach_mbox_event(void)
1667 {
1668 	static uint8_t	buf[DRMACH_MAX_MBOX_MSG_SIZE];
1669 	dr_mbox_msg_t	*msg = (dr_mbox_msg_t *)buf;
1670 	int		err;
1671 	uint32_t	type = MBOXSC_MSG_EVENT;
1672 	uint32_t	command = DRMSG_BOARDEVENT;
1673 	uint64_t	transid = 0;
1674 	uint32_t	length = DRMACH_MAX_MBOX_MSG_SIZE;
1675 	char		*hint = "";
1676 	int		logsys = 0;
1677 
1678 	do {
1679 		err = mboxsc_getmsg(KEY_SCDR, &type, &command, &transid,
1680 		    &length, (void *)msg, 0);
1681 	} while (err == EAGAIN);
1682 
1683 	/* don't try to interpret anything with the wrong version number */
1684 	if ((err == 0) && (msg->p_hdr.drproto_version != DRMBX_VERSION)) {
1685 		cmn_err(CE_WARN, "mailbox version mismatch 0x%x vs 0x%x",
1686 		    msg->p_hdr.drproto_version, DRMBX_VERSION);
1687 		mutex_enter(&drmach_g_mbox_mutex);
1688 		drmach_mbox_iflag = 0;
1689 		/* schedule a reinit handshake if one isn't pending */
1690 		if (!drmach_mbox_ipending) {
1691 			if (taskq_dispatch(system_taskq, drmach_mbox_reinit,
1692 			    NULL, TQ_NOSLEEP) != NULL) {
1693 				drmach_mbox_ipending = 1;
1694 			} else {
1695 				cmn_err(CE_WARN,
1696 				    "failed to schedule mailbox reinit");
1697 			}
1698 		}
1699 		mutex_exit(&drmach_g_mbox_mutex);
1700 		return;
1701 	}
1702 
1703 	if ((err != 0) || (msg->p_hdr.reply_status != DRMSG_REPLY_OK)) {
1704 		cmn_err(CE_WARN,
1705 		    "Unsolicited mboxsc_getmsg failed: err=0x%x code=0x%x",
1706 		    err, msg->p_hdr.error_code);
1707 	} else {
1708 		dr_boardevent_t	*be;
1709 		be = (dr_boardevent_t *)&msg->msgdata;
1710 
1711 		/* check for initialization event */
1712 		if (be->initialized) {
1713 			mutex_enter(&drmach_g_mbox_mutex);
1714 			drmach_mbox_iflag = 0;
1715 			/* schedule a reinit handshake if one isn't pending */
1716 			if (!drmach_mbox_ipending) {
1717 				if (taskq_dispatch(system_taskq,
1718 				    drmach_mbox_reinit, NULL, TQ_NOSLEEP)
1719 				    != NULL) {
1720 					drmach_mbox_ipending = 1;
1721 				} else {
1722 					cmn_err(CE_WARN, "failed to schedule "
1723 					    "mailbox reinit");
1724 				}
1725 			}
1726 			mutex_exit(&drmach_g_mbox_mutex);
1727 			cmn_err(CE_NOTE, "!Mailbox Init event received");
1728 		}
1729 
1730 		/* anything else will be a log_sysevent call */
1731 
1732 		if (be->board_insertion) {
1733 			DRMACH_PR("Board Insertion event received");
1734 			hint = DR_HINT_INSERT;
1735 			logsys++;
1736 	}
1737 		if (be->board_removal) {
1738 			DRMACH_PR("Board Removal event received");
1739 			hint = DR_HINT_REMOVE;
1740 			logsys++;
1741 		}
1742 		if (be->slot_assign) {
1743 			DRMACH_PR("Slot Assign event received");
1744 			logsys++;
1745 		}
1746 		if (be->slot_unassign) {
1747 			DRMACH_PR("Slot Unassign event received");
1748 			logsys++;
1749 		}
1750 		if (be->slot_avail) {
1751 			DRMACH_PR("Slot Available event received");
1752 			logsys++;
1753 		}
1754 		if (be->slot_unavail) {
1755 			DRMACH_PR("Slot Unavailable event received");
1756 			logsys++;
1757 		}
1758 		if (be->power_on) {
1759 			DRMACH_PR("Power ON event received");
1760 			logsys++;
1761 		}
1762 		if (be->power_off) {
1763 			DRMACH_PR("Power OFF event received");
1764 			logsys++;
1765 		}
1766 
1767 		if (logsys)
1768 			drmach_log_sysevent(
1769 			    DRMACH_EXPSLOT2BNUM(msg->p_hdr.expbrd,
1770 			    msg->p_hdr.slot), hint, SE_NOSLEEP, 1);
1771 	}
1772 }
1773 
1774 static uint32_t
1775 drmach_get_msgid()
1776 {
1777 	uint32_t	rv;
1778 	mutex_enter(&drmach_msglist_mutex);
1779 	if (!(++drmach_msgid))
1780 		++drmach_msgid;
1781 	rv = drmach_msgid;
1782 	mutex_exit(&drmach_msglist_mutex);
1783 	return (rv);
1784 }
1785 
1786 /*
1787  *	unlink an entry from the message transaction list
1788  *
1789  *	caller must hold drmach_msglist_mutex
1790  */
1791 void
1792 drmach_msglist_unlink(drmach_msglist_t *entry)
1793 {
1794 	ASSERT(mutex_owned(&drmach_msglist_mutex));
1795 	if (entry->prev) {
1796 		entry->prev->next = entry->next;
1797 		if (entry->next)
1798 			entry->next->prev = entry->prev;
1799 	} else {
1800 		drmach_msglist_first = entry->next;
1801 		if (entry->next)
1802 			entry->next->prev = NULL;
1803 	}
1804 	if (entry == drmach_msglist_last) {
1805 		drmach_msglist_last = entry->prev;
1806 	}
1807 }
1808 
1809 void
1810 drmach_msglist_link(drmach_msglist_t *entry)
1811 {
1812 	mutex_enter(&drmach_msglist_mutex);
1813 	if (drmach_msglist_last) {
1814 		entry->prev = drmach_msglist_last;
1815 		drmach_msglist_last->next = entry;
1816 		drmach_msglist_last = entry;
1817 	} else {
1818 		drmach_msglist_last = drmach_msglist_first = entry;
1819 	}
1820 	mutex_exit(&drmach_msglist_mutex);
1821 }
1822 
1823 void
1824 drmach_mbox_getmsg()
1825 {
1826 	int			err;
1827 	register int		msgid;
1828 	static uint8_t		buf[DRMACH_MAX_MBOX_MSG_SIZE];
1829 	dr_mbox_msg_t		*msg = (dr_mbox_msg_t *)buf;
1830 	dr_proto_hdr_t		*php;
1831 	drmach_msglist_t	*found, *entry;
1832 	uint32_t		type = MBOXSC_MSG_REPLY;
1833 	uint32_t		command;
1834 	uint64_t		transid;
1835 	uint32_t		length;
1836 
1837 	php = &msg->p_hdr;
1838 
1839 	while (drmach_getmsg_thread_run != 0) {
1840 		/* get a reply message */
1841 		command = 0;
1842 		transid = 0;
1843 		length = DRMACH_MAX_MBOX_MSG_SIZE;
1844 		err = mboxsc_getmsg(KEY_SCDR, &type, &command, &transid,
1845 		    &length, (void *)msg, drmach_to_getmsg);
1846 
1847 		if (err) {
1848 			/*
1849 			 * If mboxsc_getmsg returns ETIMEDOUT or EAGAIN, then
1850 			 * the "error" is really just a normal, transient
1851 			 * condition and we can retry the operation right away.
1852 			 * Any other error suggests a more serious problem,
1853 			 * ranging from a message being too big for our buffer
1854 			 * (EMSGSIZE) to total failure of the mailbox layer.
1855 			 * This second class of errors is much less "transient",
1856 			 * so rather than retrying over and over (and getting
1857 			 * the same error over and over) as fast as we can,
1858 			 * we'll sleep for a while before retrying.
1859 			 */
1860 			if ((err != ETIMEDOUT) && (err != EAGAIN)) {
1861 				cmn_err(CE_WARN,
1862 				    "mboxsc_getmsg failed, err=0x%x", err);
1863 				delay(drmach_mbxerr_delay * hz);
1864 			}
1865 			continue;
1866 		}
1867 
1868 		drmach_mbox_prmsg(msg, 0);
1869 
1870 		if (php->drproto_version != DRMBX_VERSION) {
1871 			cmn_err(CE_WARN,
1872 			    "mailbox version mismatch 0x%x vs 0x%x",
1873 			    php->drproto_version, DRMBX_VERSION);
1874 
1875 			mutex_enter(&drmach_g_mbox_mutex);
1876 			drmach_mbox_iflag = 0;
1877 			/* schedule a reinit handshake if one isn't pending */
1878 			if (!drmach_mbox_ipending) {
1879 				if (taskq_dispatch(system_taskq,
1880 				    drmach_mbox_reinit, NULL, TQ_NOSLEEP)
1881 				    != NULL) {
1882 					drmach_mbox_ipending = 1;
1883 				} else {
1884 					cmn_err(CE_WARN, "failed to schedule "
1885 					    "mailbox reinit");
1886 				}
1887 			}
1888 			mutex_exit(&drmach_g_mbox_mutex);
1889 
1890 			continue;
1891 		}
1892 
1893 		msgid = php->message_id;
1894 		found = NULL;
1895 		mutex_enter(&drmach_msglist_mutex);
1896 		entry = drmach_msglist_first;
1897 		while (entry != NULL) {
1898 			if (entry->msgid == msgid) {
1899 				found = entry;
1900 				drmach_msglist_unlink(entry);
1901 				entry = NULL;
1902 			} else
1903 				entry = entry->next;
1904 		}
1905 
1906 		if (found) {
1907 			mutex_enter(&found->g_lock);
1908 
1909 			found->e_code = php->error_code;
1910 			if (found->i_buflen > 0)
1911 				bcopy((caddr_t)&msg->msgdata, found->i_buf,
1912 				    found->i_buflen);
1913 			found->m_reply = 1;
1914 
1915 			cv_signal(&found->g_cv);
1916 			mutex_exit(&found->g_lock);
1917 		} else {
1918 			cmn_err(CE_WARN, "!mbox_getmsg: no match for id 0x%x",
1919 			    msgid);
1920 			cmn_err(CE_WARN, "!    cmd = 0x%x, exb = %d, slot = %d",
1921 			    php->command, php->expbrd, php->slot);
1922 		}
1923 
1924 		mutex_exit(&drmach_msglist_mutex);
1925 	}
1926 	cmn_err(CE_WARN, "mbox_getmsg: exiting");
1927 	mutex_enter(&drmach_msglist_mutex);
1928 	entry = drmach_msglist_first;
1929 	while (entry != NULL) {
1930 		if (entry->p_flag == 1) {
1931 			entry->f_error = -1;
1932 			mutex_enter(&entry->g_lock);
1933 			cv_signal(&entry->g_cv);
1934 			mutex_exit(&entry->g_lock);
1935 			drmach_msglist_unlink(entry);
1936 		}
1937 		entry = entry->next;
1938 	}
1939 	mutex_exit(&drmach_msglist_mutex);
1940 	drmach_getmsg_thread_run = -1;
1941 	thread_exit();
1942 }
1943 
1944 void
1945 drmach_mbox_sendmsg()
1946 {
1947 	int		err, retry;
1948 	drmach_msglist_t *entry;
1949 	dr_mbox_msg_t   *mp;
1950 	dr_proto_hdr_t  *php;
1951 
1952 	while (drmach_sendmsg_thread_run != 0) {
1953 		/*
1954 		 * Search through the list to find entries awaiting
1955 		 * transmission to the SC
1956 		 */
1957 		mutex_enter(&drmach_msglist_mutex);
1958 		entry = drmach_msglist_first;
1959 		retry = 0;
1960 		while (entry != NULL) {
1961 			if (entry->p_flag == 1) {
1962 				entry = entry->next;
1963 				continue;
1964 			}
1965 
1966 			mutex_exit(&drmach_msglist_mutex);
1967 
1968 			if (!retry)
1969 				mutex_enter(&entry->s_lock);
1970 			mp = (dr_mbox_msg_t *)entry->o_buf;
1971 			php = &mp->p_hdr;
1972 
1973 			drmach_mbox_prmsg(mp, 1);
1974 
1975 			err = mboxsc_putmsg(KEY_DRSC, MBOXSC_MSG_REQUEST,
1976 			    php->command, NULL, entry->o_buflen, (void *)mp,
1977 			    drmach_to_putmsg);
1978 
1979 			if (err) {
1980 				switch (err) {
1981 
1982 				case EAGAIN:
1983 				case EBUSY:
1984 					++retry;
1985 					mutex_enter(&drmach_msglist_mutex);
1986 					continue;
1987 
1988 				case ETIMEDOUT:
1989 					if (--entry->o_nretry <= 0) {
1990 						mutex_enter(
1991 						    &drmach_msglist_mutex);
1992 						drmach_msglist_unlink(entry);
1993 						mutex_exit(
1994 						    &drmach_msglist_mutex);
1995 						entry->f_error = err;
1996 						entry->p_flag = 1;
1997 						cv_signal(&entry->s_cv);
1998 					} else {
1999 						++retry;
2000 						mutex_enter(
2001 						    &drmach_msglist_mutex);
2002 						continue;
2003 					}
2004 					break;
2005 				default:
2006 					mutex_enter(&drmach_msglist_mutex);
2007 					drmach_msglist_unlink(entry);
2008 					mutex_exit(&drmach_msglist_mutex);
2009 					entry->f_error = err;
2010 					entry->p_flag = 1;
2011 					cv_signal(&entry->s_cv);
2012 					break;
2013 				}
2014 			} else {
2015 				entry->p_flag = 1;
2016 				cv_signal(&entry->s_cv);
2017 			}
2018 
2019 			mutex_exit(&entry->s_lock);
2020 			retry = 0;
2021 			mutex_enter(&drmach_msglist_mutex);
2022 			entry = drmach_msglist_first;
2023 		}
2024 		mutex_exit(&drmach_msglist_mutex);
2025 
2026 		mutex_enter(&drmach_sendmsg_mutex);
2027 		(void) cv_reltimedwait(&drmach_sendmsg_cv,
2028 		    &drmach_sendmsg_mutex, (5 * hz), TR_CLOCK_TICK);
2029 		mutex_exit(&drmach_sendmsg_mutex);
2030 	}
2031 	cmn_err(CE_WARN, "mbox_sendmsg: exiting");
2032 	mutex_enter(&drmach_msglist_mutex);
2033 	entry = drmach_msglist_first;
2034 	while (entry != NULL) {
2035 		if (entry->p_flag == 0) {
2036 			entry->f_error = -1;
2037 			mutex_enter(&entry->s_lock);
2038 			cv_signal(&entry->s_cv);
2039 			mutex_exit(&entry->s_lock);
2040 			drmach_msglist_unlink(entry);
2041 		}
2042 		entry = entry->next;
2043 	}
2044 	mutex_exit(&drmach_msglist_mutex);
2045 	cv_destroy(&drmach_sendmsg_cv);
2046 	mutex_destroy(&drmach_sendmsg_mutex);
2047 
2048 	drmach_sendmsg_thread_run = -1;
2049 	thread_exit();
2050 }
2051 
2052 void
2053 drmach_msglist_destroy(drmach_msglist_t *listp)
2054 {
2055 	if (listp != NULL) {
2056 		drmach_msglist_t	*entry;
2057 
2058 		mutex_enter(&drmach_msglist_mutex);
2059 		entry = drmach_msglist_first;
2060 		while (entry) {
2061 			if (listp == entry) {
2062 				drmach_msglist_unlink(listp);
2063 				entry = NULL;
2064 			} else
2065 				entry = entry->next;
2066 		}
2067 
2068 		mutex_destroy(&listp->s_lock);
2069 		cv_destroy(&listp->s_cv);
2070 		mutex_destroy(&listp->g_lock);
2071 		cv_destroy(&listp->g_cv);
2072 		kmem_free(listp, sizeof (drmach_msglist_t));
2073 
2074 		mutex_exit(&drmach_msglist_mutex);
2075 	}
2076 }
2077 
2078 static drmach_msglist_t	*
2079 drmach_msglist_new(caddr_t ibufp, uint32_t ilen, dr_proto_hdr_t *hdrp,
2080 	uint32_t olen, int nrtry)
2081 {
2082 	drmach_msglist_t	*listp;
2083 
2084 	listp = kmem_zalloc(sizeof (drmach_msglist_t), KM_SLEEP);
2085 	mutex_init(&listp->s_lock, NULL, MUTEX_DRIVER, NULL);
2086 	cv_init(&listp->s_cv, NULL, CV_DRIVER, NULL);
2087 	mutex_init(&listp->g_lock, NULL, MUTEX_DRIVER, NULL);
2088 	cv_init(&listp->g_cv, NULL, CV_DRIVER, NULL);
2089 	listp->o_buf = (caddr_t)hdrp;
2090 	listp->o_buflen = olen;
2091 	listp->i_buf = ibufp;
2092 	listp->i_buflen = ilen;
2093 	listp->o_nretry = nrtry;
2094 	listp->msgid = hdrp->message_id;
2095 
2096 	return (listp);
2097 }
2098 
2099 static drmach_msglist_t *
2100 drmach_mbox_req_rply(dr_proto_hdr_t *hdrp, uint32_t olen, caddr_t ibufp,
2101 	uint32_t ilen, int timeout, int nrtry, int nosig,
2102 	drmach_msglist_t *link)
2103 {
2104 	int		crv;
2105 	drmach_msglist_t *listp;
2106 	clock_t		to_val;
2107 	dr_proto_hdr_t	*php;
2108 
2109 	/* setup transaction list entry */
2110 	listp = drmach_msglist_new(ibufp, ilen, hdrp, olen, nrtry);
2111 
2112 	/* send mailbox message, await reply */
2113 	mutex_enter(&listp->s_lock);
2114 	mutex_enter(&listp->g_lock);
2115 
2116 	listp->link = link;
2117 	drmach_msglist_link(listp);
2118 
2119 	mutex_enter(&drmach_sendmsg_mutex);
2120 	cv_signal(&drmach_sendmsg_cv);
2121 	mutex_exit(&drmach_sendmsg_mutex);
2122 
2123 	while (listp->p_flag == 0) {
2124 		cv_wait(&listp->s_cv, &listp->s_lock);
2125 	}
2126 
2127 	to_val = ddi_get_lbolt() + (timeout * hz);
2128 
2129 	if (listp->f_error) {
2130 		listp->p_flag = 0;
2131 		cmn_err(CE_WARN, "!mboxsc_putmsg failed: 0x%x", listp->f_error);
2132 		php = (dr_proto_hdr_t *)listp->o_buf;
2133 		cmn_err(CE_WARN, "!    cmd = 0x%x, exb = %d, slot = %d",
2134 		    php->command, php->expbrd, php->slot);
2135 	} else {
2136 		while (listp->m_reply == 0 && listp->f_error == 0) {
2137 			if (nosig)
2138 				crv = cv_timedwait(&listp->g_cv, &listp->g_lock,
2139 				    to_val);
2140 			else
2141 				crv = cv_timedwait_sig(&listp->g_cv,
2142 				    &listp->g_lock, to_val);
2143 			switch (crv) {
2144 				case -1: /* timed out */
2145 					cmn_err(CE_WARN,
2146 					    "!msgid=0x%x reply timed out",
2147 					    hdrp->message_id);
2148 					php = (dr_proto_hdr_t *)listp->o_buf;
2149 					cmn_err(CE_WARN, "!    cmd = 0x%x, "
2150 					    "exb = %d, slot = %d", php->command,
2151 					    php->expbrd, php->slot);
2152 					listp->f_error = ETIMEDOUT;
2153 					break;
2154 				case 0: /* signal received */
2155 					cmn_err(CE_WARN,
2156 					    "operation interrupted by signal");
2157 					listp->f_error = EINTR;
2158 					break;
2159 				default:
2160 					break;
2161 				}
2162 		}
2163 
2164 		/*
2165 		 * If link is set for this entry, check to see if
2166 		 * the linked entry has been replied to.  If not,
2167 		 * wait for the response.
2168 		 * Currently, this is only used for ABORT_TEST functionality,
2169 		 * wherein a check is made for the TESTBOARD reply when
2170 		 * the ABORT_TEST reply is received.
2171 		 */
2172 
2173 		if (link) {
2174 			mutex_enter(&link->g_lock);
2175 			/*
2176 			 * If the reply to the linked entry hasn't been
2177 			 * received, clear the existing link->f_error,
2178 			 * and await the reply.
2179 			 */
2180 			if (link->m_reply == 0) {
2181 				link->f_error = 0;
2182 			}
2183 			to_val =  ddi_get_lbolt() + (timeout * hz);
2184 			while (link->m_reply == 0 && link->f_error == 0) {
2185 				crv = cv_timedwait(&link->g_cv, &link->g_lock,
2186 				    to_val);
2187 				switch (crv) {
2188 				case -1: /* timed out */
2189 					cmn_err(CE_NOTE,
2190 					    "!link msgid=0x%x reply timed out",
2191 					    link->msgid);
2192 					link->f_error = ETIMEDOUT;
2193 					break;
2194 				default:
2195 					break;
2196 				}
2197 			}
2198 			mutex_exit(&link->g_lock);
2199 		}
2200 	}
2201 	mutex_exit(&listp->g_lock);
2202 	mutex_exit(&listp->s_lock);
2203 	return (listp);
2204 }
2205 
2206 static sbd_error_t *
2207 drmach_mbx2sbderr(drmach_msglist_t *mlp)
2208 {
2209 	char		a_pnt[MAXNAMELEN];
2210 	dr_proto_hdr_t	*php;
2211 	int		bnum;
2212 
2213 	if (mlp->f_error) {
2214 		/*
2215 		 * If framework failure is due to signal, return "no error"
2216 		 * error.
2217 		 */
2218 		if (mlp->f_error == EINTR)
2219 			return (drerr_new(0, ESTC_NONE, NULL));
2220 
2221 		mutex_enter(&drmach_g_mbox_mutex);
2222 		drmach_mbox_iflag = 0;
2223 		mutex_exit(&drmach_g_mbox_mutex);
2224 		if (!mlp->p_flag)
2225 			return (drerr_new(1, ESTC_MBXRQST, NULL));
2226 		else
2227 			return (drerr_new(1, ESTC_MBXRPLY, NULL));
2228 	}
2229 	php = (dr_proto_hdr_t *)mlp->o_buf;
2230 	bnum = 2 * php->expbrd + php->slot;
2231 	a_pnt[0] = '\0';
2232 	(void) drmach_board_name(bnum, a_pnt, MAXNAMELEN);
2233 
2234 	switch (mlp->e_code) {
2235 		case 0:
2236 			return (NULL);
2237 		case DRERR_NOACL:
2238 			return (drerr_new(0, ESTC_NOACL, "%s", a_pnt));
2239 		case DRERR_NOT_ASSIGNED:
2240 			return (drerr_new(0, ESTC_NOT_ASSIGNED, "%s", a_pnt));
2241 		case DRERR_NOT_ACTIVE:
2242 			return (drerr_new(0, ESTC_NOT_ACTIVE, "%s", a_pnt));
2243 		case DRERR_EMPTY_SLOT:
2244 			return (drerr_new(0, ESTC_EMPTY_SLOT, "%s", a_pnt));
2245 		case DRERR_POWER_OFF:
2246 			return (drerr_new(0, ESTC_POWER_OFF, "%s", a_pnt));
2247 		case DRERR_TEST_IN_PROGRESS:
2248 			return (drerr_new(0, ESTC_TEST_IN_PROGRESS, "%s",
2249 			    a_pnt));
2250 		case DRERR_TESTING_BUSY:
2251 			return (drerr_new(0, ESTC_TESTING_BUSY, "%s", a_pnt));
2252 		case DRERR_TEST_REQUIRED:
2253 			return (drerr_new(0, ESTC_TEST_REQUIRED, "%s", a_pnt));
2254 		case DRERR_UNAVAILABLE:
2255 			return (drerr_new(0, ESTC_UNAVAILABLE, "%s", a_pnt));
2256 		case DRERR_RECOVERABLE:
2257 			return (drerr_new(0, ESTC_SMS_ERR_RECOVERABLE, "%s",
2258 			    a_pnt));
2259 		case DRERR_UNRECOVERABLE:
2260 			return (drerr_new(1, ESTC_SMS_ERR_UNRECOVERABLE, "%s",
2261 			    a_pnt));
2262 		default:
2263 			return (drerr_new(1, ESTC_MBOX_UNKNOWN, NULL));
2264 	}
2265 }
2266 
2267 static sbd_error_t *
2268 drmach_mbox_trans(uint8_t msgtype, int bnum, caddr_t obufp, int olen,
2269 	caddr_t ibufp, int ilen)
2270 {
2271 	int			timeout = 0;
2272 	int			ntries = 0;
2273 	int			nosignals = 0;
2274 	dr_proto_hdr_t 		*hdrp;
2275 	drmach_msglist_t 	*mlp;
2276 	sbd_error_t		*err = NULL;
2277 
2278 	if (msgtype != DRMSG_MBOX_INIT) {
2279 		mutex_enter(&drmach_ri_mbox_mutex);
2280 		mutex_enter(&drmach_g_mbox_mutex);
2281 		if (drmach_mbox_iflag == 0) {
2282 			/* need to initialize the mailbox */
2283 			dr_proto_hdr_t	imsg;
2284 
2285 			mutex_exit(&drmach_g_mbox_mutex);
2286 
2287 			imsg.command = DRMSG_MBOX_INIT;
2288 
2289 			imsg.message_id = drmach_get_msgid();
2290 			imsg.drproto_version = DRMBX_VERSION;
2291 			imsg.expbrd = 0;
2292 			imsg.slot = 0;
2293 
2294 			cmn_err(CE_WARN, "!reinitializing DR mailbox");
2295 			mlp = drmach_mbox_req_rply(&imsg, sizeof (imsg), 0, 0,
2296 			    10, 5, 0, NULL);
2297 			err = drmach_mbx2sbderr(mlp);
2298 			/*
2299 			 * If framework failure incoming is encountered on
2300 			 * the MBOX_INIT [timeout on SMS reply], the error
2301 			 * type must be changed before returning to caller.
2302 			 * This is to prevent drmach_board_connect() and
2303 			 * drmach_board_disconnect() from marking boards
2304 			 * UNUSABLE based on MBOX_INIT failures.
2305 			 */
2306 			if ((err != NULL) && (err->e_code == ESTC_MBXRPLY)) {
2307 				cmn_err(CE_WARN,
2308 				    "!Changed mbox incoming to outgoing"
2309 				    " failure on reinit");
2310 				sbd_err_clear(&err);
2311 				err = drerr_new(0, ESTC_MBXRQST, NULL);
2312 			}
2313 			drmach_msglist_destroy(mlp);
2314 			if (err) {
2315 				mutex_exit(&drmach_ri_mbox_mutex);
2316 				return (err);
2317 			}
2318 			mutex_enter(&drmach_g_mbox_mutex);
2319 			drmach_mbox_iflag = 1;
2320 		}
2321 		mutex_exit(&drmach_g_mbox_mutex);
2322 		mutex_exit(&drmach_ri_mbox_mutex);
2323 	}
2324 
2325 	hdrp = (dr_proto_hdr_t *)obufp;
2326 
2327 	/* setup outgoing mailbox header */
2328 	hdrp->command = msgtype;
2329 	hdrp->message_id = drmach_get_msgid();
2330 	hdrp->drproto_version = DRMBX_VERSION;
2331 	hdrp->expbrd = DRMACH_BNUM2EXP(bnum);
2332 	hdrp->slot = DRMACH_BNUM2SLOT(bnum);
2333 
2334 	switch (msgtype) {
2335 
2336 		case DRMSG_MBOX_INIT:
2337 			timeout = drmach_to_mbxinit;
2338 			ntries = 1;
2339 			nosignals = 0;
2340 			break;
2341 
2342 		case DRMSG_ASSIGN:
2343 			timeout = drmach_to_assign;
2344 			ntries = 1;
2345 			nosignals = 0;
2346 			break;
2347 
2348 		case DRMSG_UNASSIGN:
2349 			timeout = drmach_to_unassign;
2350 			ntries = 1;
2351 			nosignals = 0;
2352 			break;
2353 
2354 		case DRMSG_POWERON:
2355 			timeout = drmach_to_poweron;
2356 			ntries = 1;
2357 			nosignals = 0;
2358 			break;
2359 
2360 		case DRMSG_POWEROFF:
2361 			timeout = drmach_to_poweroff;
2362 			ntries = 1;
2363 			nosignals = 0;
2364 			break;
2365 
2366 		case DRMSG_SHOWBOARD:
2367 			timeout = drmach_to_showboard;
2368 			ntries = 1;
2369 			nosignals = 0;
2370 			break;
2371 
2372 		case DRMSG_CLAIM:
2373 			timeout = drmach_to_claim;
2374 			ntries = 1;
2375 			nosignals = 1;
2376 			break;
2377 
2378 		case DRMSG_UNCLAIM:
2379 			timeout = drmach_to_unclaim;
2380 			ntries = 1;
2381 			nosignals = 1;
2382 			break;
2383 
2384 		case DRMSG_UNCONFIG:
2385 			timeout = drmach_to_unconfig;
2386 			ntries = 1;
2387 			nosignals = 0;
2388 			break;
2389 
2390 		case DRMSG_TESTBOARD:
2391 			timeout = drmach_to_testboard;
2392 			ntries = 1;
2393 			nosignals = 0;
2394 			break;
2395 
2396 		default:
2397 			cmn_err(CE_WARN, "Unknown outgoing message type 0x%x",
2398 			    msgtype);
2399 			err = DRMACH_INTERNAL_ERROR();
2400 			break;
2401 	}
2402 
2403 	if (err == NULL) {
2404 		mlp = drmach_mbox_req_rply(hdrp, olen, ibufp, ilen, timeout,
2405 		    ntries, nosignals, NULL);
2406 		err = drmach_mbx2sbderr(mlp);
2407 
2408 		/*
2409 		 * For DRMSG_TESTBOARD attempts which have timed out, or
2410 		 * been aborted due to a signal received after mboxsc_putmsg()
2411 		 * has succeeded in sending the message, a DRMSG_ABORT_TEST
2412 		 * must be sent.
2413 		 */
2414 		if ((msgtype == DRMSG_TESTBOARD) && (err != NULL) &&
2415 		    ((mlp->f_error == EINTR) || ((mlp->f_error == ETIMEDOUT) &&
2416 		    (mlp->p_flag != 0)))) {
2417 			drmach_msglist_t	*abmlp;
2418 			dr_abort_test_t		abibuf;
2419 
2420 			hdrp->command = DRMSG_ABORT_TEST;
2421 			hdrp->message_id = drmach_get_msgid();
2422 			abmlp = drmach_mbox_req_rply(hdrp,
2423 			    sizeof (dr_abort_test_t), (caddr_t)&abibuf,
2424 			    sizeof (abibuf), drmach_to_aborttest, 5, 1, mlp);
2425 			cmn_err(CE_WARN, "test aborted");
2426 			drmach_msglist_destroy(abmlp);
2427 		}
2428 
2429 		drmach_msglist_destroy(mlp);
2430 	}
2431 
2432 	return (err);
2433 }
2434 
2435 static int
2436 drmach_mbox_init()
2437 {
2438 	int			err;
2439 	caddr_t			obufp;
2440 	sbd_error_t		*serr = NULL;
2441 	mboxsc_timeout_range_t	mbxtoz;
2442 
2443 	drmach_mbox_istate = 0;
2444 	/* register the outgoing mailbox */
2445 	if ((err = mboxsc_init(KEY_DRSC, MBOXSC_MBOX_OUT,
2446 	    NULL)) != 0) {
2447 		cmn_err(CE_WARN, "DR - SC mboxsc_init failed: 0x%x", err);
2448 		return (-1);
2449 	}
2450 	drmach_mbox_istate = 1;
2451 
2452 	/* setup the mboxsc_putmsg timeout value */
2453 	if (drmach_use_tuned_putmsg_to) {
2454 		cmn_err(CE_NOTE, "!using tuned drmach_to_putmsg = 0x%lx\n",
2455 		    drmach_to_putmsg);
2456 	} else {
2457 		if ((err = mboxsc_ctrl(KEY_DRSC,
2458 		    MBOXSC_CMD_PUTMSG_TIMEOUT_RANGE, &mbxtoz)) != 0) {
2459 			cmn_err(CE_WARN, "mboxsc_ctrl failed: 0x%x", err);
2460 			drmach_to_putmsg = 60000;
2461 		} else {
2462 			drmach_to_putmsg = mboxsc_putmsg_def_timeout() * 6;
2463 			DRMACH_PR("putmsg range is 0x%lx - 0x%lx value"
2464 			    " is 0x%lx\n", mbxtoz.min_timeout,
2465 			    mbxtoz.max_timeout, drmach_to_putmsg);
2466 		}
2467 	}
2468 
2469 	/* register the incoming mailbox */
2470 	if ((err = mboxsc_init(KEY_SCDR, MBOXSC_MBOX_IN,
2471 	    drmach_mbox_event)) != 0) {
2472 		cmn_err(CE_WARN, "SC - DR mboxsc_init failed: 0x%x", err);
2473 		return (-1);
2474 	}
2475 	drmach_mbox_istate = 2;
2476 
2477 	/* initialize mutex for mailbox globals */
2478 	mutex_init(&drmach_g_mbox_mutex, NULL, MUTEX_DRIVER, NULL);
2479 
2480 	/* initialize mutex for mailbox re-init */
2481 	mutex_init(&drmach_ri_mbox_mutex, NULL, MUTEX_DRIVER, NULL);
2482 
2483 	/* initialize mailbox message list elements */
2484 	drmach_msglist_first = drmach_msglist_last = NULL;
2485 	mutex_init(&drmach_msglist_mutex, NULL, MUTEX_DRIVER, NULL);
2486 
2487 	mutex_init(&drmach_sendmsg_mutex, NULL, MUTEX_DRIVER, NULL);
2488 	cv_init(&drmach_sendmsg_cv, NULL, CV_DRIVER, NULL);
2489 
2490 	drmach_mbox_istate = 3;
2491 
2492 	/* start mailbox sendmsg thread */
2493 	drmach_sendmsg_thread_run = 1;
2494 	if (drmach_sendmsg_thread == NULL)
2495 		drmach_sendmsg_thread = thread_create(NULL, 0,
2496 		    (void (*)())drmach_mbox_sendmsg, NULL, 0, &p0,
2497 		    TS_RUN, minclsyspri);
2498 
2499 	/* start mailbox getmsg thread */
2500 	drmach_getmsg_thread_run = 1;
2501 	if (drmach_getmsg_thread == NULL)
2502 		drmach_getmsg_thread = thread_create(NULL, 0,
2503 		    (void (*)())drmach_mbox_getmsg, NULL, 0, &p0,
2504 		    TS_RUN, minclsyspri);
2505 
2506 	obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
2507 	serr = drmach_mbox_trans(DRMSG_MBOX_INIT, 0, obufp,
2508 	    sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
2509 	kmem_free(obufp, sizeof (dr_proto_hdr_t));
2510 	if (serr) {
2511 		cmn_err(CE_WARN, "mbox_init: MBOX_INIT failed ecode=0x%x",
2512 		    serr->e_code);
2513 		sbd_err_clear(&serr);
2514 		return (-1);
2515 	}
2516 	mutex_enter(&drmach_g_mbox_mutex);
2517 	drmach_mbox_iflag = 1;
2518 	drmach_mbox_ipending = 0;
2519 	mutex_exit(&drmach_g_mbox_mutex);
2520 
2521 	return (0);
2522 }
2523 
2524 static int
2525 drmach_mbox_fini()
2526 {
2527 	int err, rv = 0;
2528 
2529 	if (drmach_mbox_istate > 2) {
2530 		drmach_getmsg_thread_run = 0;
2531 		drmach_sendmsg_thread_run = 0;
2532 		cmn_err(CE_WARN,
2533 		    "drmach_mbox_fini: waiting for mbox threads...");
2534 		while ((drmach_getmsg_thread_run == 0) ||
2535 		    (drmach_sendmsg_thread_run == 0)) {
2536 			continue;
2537 		}
2538 		cmn_err(CE_WARN, "drmach_mbox_fini: mbox threads done.");
2539 		mutex_destroy(&drmach_msglist_mutex);
2540 
2541 	}
2542 	if (drmach_mbox_istate) {
2543 		/* de-register the outgoing mailbox */
2544 		if ((err = mboxsc_fini(KEY_DRSC)) != 0) {
2545 			cmn_err(CE_WARN, "DR - SC mboxsc_fini failed: 0x%x",
2546 			    err);
2547 			rv = -1;
2548 		}
2549 	}
2550 	if (drmach_mbox_istate > 1) {
2551 		/* de-register the incoming mailbox */
2552 		if ((err = mboxsc_fini(KEY_SCDR)) != 0) {
2553 			cmn_err(CE_WARN, "SC - DR mboxsc_fini failed: 0x%x",
2554 			    err);
2555 			rv = -1;
2556 		}
2557 	}
2558 	mutex_destroy(&drmach_g_mbox_mutex);
2559 	mutex_destroy(&drmach_ri_mbox_mutex);
2560 	return (rv);
2561 }
2562 
2563 static int
2564 drmach_portid2bnum(int portid)
2565 {
2566 	int slot;
2567 
2568 	switch (portid & 0x1f) {
2569 	case 0: case 1: case 2: case 3:	/* cpu/wci devices */
2570 	case 0x1e:			/* slot 0 axq registers */
2571 		slot = 0;
2572 		break;
2573 
2574 	case 8: case 9:			/* cpu devices */
2575 	case 0x1c: case 0x1d:		/* schizo/wci devices */
2576 	case 0x1f:			/* slot 1 axq registers */
2577 		slot = 1;
2578 		break;
2579 
2580 	default:
2581 		ASSERT(0);		/* catch in debug kernels */
2582 	}
2583 
2584 	return (((portid >> 4) & 0x7e) | slot);
2585 }
2586 
2587 extern int axq_suspend_iopause;
2588 
2589 static int
2590 hold_rele_branch(dev_info_t *rdip, void *arg)
2591 {
2592 	int	i;
2593 	int	*holdp	= (int *)arg;
2594 	char	*name = ddi_node_name(rdip);
2595 
2596 	/*
2597 	 * For Starcat, we must be children of the root devinfo node
2598 	 */
2599 	ASSERT(ddi_get_parent(rdip) == ddi_root_node());
2600 
2601 	i = drmach_name2type_idx(name);
2602 
2603 	/*
2604 	 * Only children of the root devinfo node need to be
2605 	 * held/released since they are the only valid targets
2606 	 * of tree operations. This corresponds to the node types
2607 	 * listed in the drmach_name2type array.
2608 	 */
2609 	if (i < 0) {
2610 		/* Not of interest to us */
2611 		return (DDI_WALK_PRUNECHILD);
2612 	}
2613 
2614 	if (*holdp) {
2615 		ASSERT(!e_ddi_branch_held(rdip));
2616 		e_ddi_branch_hold(rdip);
2617 	} else {
2618 		ASSERT(e_ddi_branch_held(rdip));
2619 		e_ddi_branch_rele(rdip);
2620 	}
2621 
2622 	return (DDI_WALK_PRUNECHILD);
2623 }
2624 
2625 static int
2626 drmach_init(void)
2627 {
2628 	pnode_t 	nodeid;
2629 	gdcd_t		*gdcd;
2630 	int		bnum;
2631 	dev_info_t	*rdip;
2632 	int		hold, circ;
2633 
2634 	mutex_enter(&drmach_i_lock);
2635 	if (drmach_initialized) {
2636 		mutex_exit(&drmach_i_lock);
2637 		return (0);
2638 	}
2639 
2640 	gdcd = drmach_gdcd_new();
2641 	if (gdcd == NULL) {
2642 		mutex_exit(&drmach_i_lock);
2643 		cmn_err(CE_WARN, "drmach_init: failed to access GDCD\n");
2644 		return (-1);
2645 	}
2646 
2647 	drmach_boards = drmach_array_new(0, MAX_BOARDS - 1);
2648 
2649 	nodeid = prom_childnode(prom_rootnode());
2650 	do {
2651 		int		 len;
2652 		int		 portid;
2653 		drmachid_t	 id;
2654 
2655 		len = prom_getproplen(nodeid, "portid");
2656 		if (len != sizeof (portid))
2657 			continue;
2658 
2659 		portid = -1;
2660 		(void) prom_getprop(nodeid, "portid", (caddr_t)&portid);
2661 		if (portid == -1)
2662 			continue;
2663 
2664 		bnum = drmach_portid2bnum(portid);
2665 
2666 		if (drmach_array_get(drmach_boards, bnum, &id) == -1) {
2667 			/* portid translated to an invalid board number */
2668 			cmn_err(CE_WARN, "OBP node 0x%x has"
2669 			    " invalid property value, %s=%u",
2670 			    nodeid, "portid", portid);
2671 
2672 			/* clean up */
2673 			drmach_array_dispose(drmach_boards,
2674 			    drmach_board_dispose);
2675 			drmach_gdcd_dispose(gdcd);
2676 			mutex_exit(&drmach_i_lock);
2677 			return (-1);
2678 		} else if (id == NULL) {
2679 			drmach_board_t	*bp;
2680 			l1_slot_stat_t	*dcd;
2681 			int		exp, slot;
2682 
2683 			bp = drmach_board_new(bnum);
2684 			bp->assigned = !drmach_initialized;
2685 			bp->powered = !drmach_initialized;
2686 
2687 			exp = DRMACH_BNUM2EXP(bnum);
2688 			slot = DRMACH_BNUM2SLOT(bnum);
2689 			dcd = &gdcd->dcd_slot[exp][slot];
2690 			bp->stardrb_offset =
2691 			    dcd->l1ss_cpu_drblock_xwd_offset << 3;
2692 			DRMACH_PR("%s: stardrb_offset=0x%lx\n", bp->cm.name,
2693 			    bp->stardrb_offset);
2694 
2695 			if (gdcd->dcd_slot[exp][slot].l1ss_flags &
2696 			    L1SSFLG_THIS_L1_NULL_PROC_LPA) {
2697 				bp->flags |= DRMACH_NULL_PROC_LPA;
2698 				DRMACH_PR("%s: NULL proc LPA\n", bp->cm.name);
2699 			}
2700 		}
2701 	} while ((nodeid = prom_nextnode(nodeid)) != OBP_NONODE);
2702 
2703 	drmach_cpu_sram_va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
2704 
2705 	if (gdcd->dcd_testcage_log2_mbytes_size != DCD_DR_TESTCAGE_DISABLED) {
2706 		ASSERT(gdcd->dcd_testcage_log2_mbytes_size ==
2707 		    gdcd->dcd_testcage_log2_mbytes_align);
2708 		drmach_iocage_paddr =
2709 		    (uint64_t)gdcd->dcd_testcage_mbyte_PA << 20;
2710 		drmach_iocage_size =
2711 		    1 << (gdcd->dcd_testcage_log2_mbytes_size + 20);
2712 
2713 		drmach_iocage_vaddr = (caddr_t)vmem_alloc(heap_arena,
2714 		    drmach_iocage_size, VM_SLEEP);
2715 		hat_devload(kas.a_hat, drmach_iocage_vaddr, drmach_iocage_size,
2716 		    mmu_btop(drmach_iocage_paddr),
2717 		    PROT_READ | PROT_WRITE,
2718 		    HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST);
2719 
2720 		DRMACH_PR("gdcd size=0x%x align=0x%x PA=0x%x\n",
2721 		    gdcd->dcd_testcage_log2_mbytes_size,
2722 		    gdcd->dcd_testcage_log2_mbytes_align,
2723 		    gdcd->dcd_testcage_mbyte_PA);
2724 		DRMACH_PR("drmach size=0x%x PA=0x%lx VA=0x%p\n",
2725 		    drmach_iocage_size, drmach_iocage_paddr,
2726 		    drmach_iocage_vaddr);
2727 	}
2728 
2729 	if (drmach_iocage_size == 0) {
2730 		drmach_array_dispose(drmach_boards, drmach_board_dispose);
2731 		drmach_boards = NULL;
2732 		vmem_free(heap_arena, drmach_cpu_sram_va, PAGESIZE);
2733 		drmach_gdcd_dispose(gdcd);
2734 		mutex_exit(&drmach_i_lock);
2735 		cmn_err(CE_WARN, "drmach_init: iocage not available\n");
2736 		return (-1);
2737 	}
2738 
2739 	drmach_gdcd_dispose(gdcd);
2740 
2741 	mutex_init(&drmach_iocage_lock, NULL, MUTEX_DRIVER, NULL);
2742 	cv_init(&drmach_iocage_cv, NULL, CV_DRIVER, NULL);
2743 	mutex_init(&drmach_xt_mb_lock, NULL, MUTEX_DRIVER, NULL);
2744 	mutex_init(&drmach_bus_sync_lock, NULL, MUTEX_DRIVER, NULL);
2745 	mutex_init(&drmach_slice_table_lock, NULL, MUTEX_DRIVER, NULL);
2746 
2747 	mutex_enter(&cpu_lock);
2748 	mutex_enter(&drmach_iocage_lock);
2749 	ASSERT(drmach_iocage_is_busy == 0);
2750 	drmach_iocage_is_busy = 1;
2751 	drmach_iocage_mem_scrub(drmach_iocage_size);
2752 	drmach_iocage_is_busy = 0;
2753 	cv_signal(&drmach_iocage_cv);
2754 	mutex_exit(&drmach_iocage_lock);
2755 	mutex_exit(&cpu_lock);
2756 
2757 
2758 	if (drmach_mbox_init() == -1) {
2759 		cmn_err(CE_WARN, "DR - SC mailbox initialization Failed");
2760 	}
2761 
2762 	/*
2763 	 * Walk immediate children of devinfo root node and hold
2764 	 * all devinfo branches of interest.
2765 	 */
2766 	hold = 1;
2767 	rdip = ddi_root_node();
2768 
2769 	ndi_devi_enter(rdip, &circ);
2770 	ddi_walk_devs(ddi_get_child(rdip), hold_rele_branch, &hold);
2771 	ndi_devi_exit(rdip, circ);
2772 
2773 	drmach_initialized = 1;
2774 
2775 	/*
2776 	 * To avoid a circular patch dependency between DR and AXQ, the AXQ
2777 	 * rev introducing the axq_iopause_*_all interfaces should not regress
2778 	 * when installed without the DR rev using those interfaces. The default
2779 	 * is for iopause to be enabled/disabled during axq suspend/resume. By
2780 	 * setting the following axq flag to zero, axq will not enable iopause
2781 	 * during suspend/resume, instead DR will call the axq_iopause_*_all
2782 	 * interfaces during drmach_copy_rename.
2783 	 */
2784 	axq_suspend_iopause = 0;
2785 
2786 	mutex_exit(&drmach_i_lock);
2787 
2788 	return (0);
2789 }
2790 
2791 static void
2792 drmach_fini(void)
2793 {
2794 	dev_info_t	*rdip;
2795 	int		hold, circ;
2796 
2797 	if (drmach_initialized) {
2798 		rw_enter(&drmach_boards_rwlock, RW_WRITER);
2799 		drmach_array_dispose(drmach_boards, drmach_board_dispose);
2800 		drmach_boards = NULL;
2801 		rw_exit(&drmach_boards_rwlock);
2802 
2803 		mutex_destroy(&drmach_slice_table_lock);
2804 		mutex_destroy(&drmach_xt_mb_lock);
2805 		mutex_destroy(&drmach_bus_sync_lock);
2806 		cv_destroy(&drmach_iocage_cv);
2807 		mutex_destroy(&drmach_iocage_lock);
2808 
2809 		vmem_free(heap_arena, drmach_cpu_sram_va, PAGESIZE);
2810 
2811 		/*
2812 		 * Walk immediate children of the root devinfo node
2813 		 * releasing holds acquired on branches in drmach_init()
2814 		 */
2815 		hold = 0;
2816 		rdip = ddi_root_node();
2817 
2818 		ndi_devi_enter(rdip, &circ);
2819 		ddi_walk_devs(ddi_get_child(rdip), hold_rele_branch, &hold);
2820 		ndi_devi_exit(rdip, circ);
2821 
2822 		drmach_initialized = 0;
2823 	}
2824 
2825 	drmach_mbox_fini();
2826 	if (drmach_xt_mb != NULL) {
2827 		vmem_free(static_alloc_arena, (void *)drmach_xt_mb,
2828 		    drmach_xt_mb_size);
2829 	}
2830 	rw_destroy(&drmach_boards_rwlock);
2831 	mutex_destroy(&drmach_i_lock);
2832 }
2833 
2834 static void
2835 drmach_mem_read_madr(drmach_mem_t *mp, int bank, uint64_t *madr)
2836 {
2837 	kpreempt_disable();
2838 
2839 	/* get register address, read madr value */
2840 	if (STARCAT_CPUID_TO_PORTID(CPU->cpu_id) == mp->dev.portid) {
2841 		*madr = lddmcdecode(DRMACH_MC_ASI_ADDR(mp, bank));
2842 	} else {
2843 		*madr = lddphysio(DRMACH_MC_ADDR(mp, bank));
2844 	}
2845 
2846 	kpreempt_enable();
2847 }
2848 
2849 
2850 static uint64_t *
2851 drmach_prep_mc_rename(uint64_t *p, int local,
2852 	drmach_mem_t *mp, uint64_t current_basepa, uint64_t new_basepa)
2853 {
2854 	int bank;
2855 
2856 	for (bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
2857 		uint64_t madr, bank_offset;
2858 
2859 		/* fetch mc's bank madr register value */
2860 		drmach_mem_read_madr(mp, bank, &madr);
2861 		if (madr & DRMACH_MC_VALID_MASK) {
2862 			uint64_t bankpa;
2863 
2864 			bank_offset = (DRMACH_MC_UM_TO_PA(madr) |
2865 			    DRMACH_MC_LM_TO_PA(madr)) - current_basepa;
2866 			bankpa = new_basepa + bank_offset;
2867 
2868 			/* encode new base pa into madr */
2869 			madr &= ~DRMACH_MC_UM_MASK;
2870 			madr |= DRMACH_MC_PA_TO_UM(bankpa);
2871 			madr &= ~DRMACH_MC_LM_MASK;
2872 			madr |= DRMACH_MC_PA_TO_LM(bankpa);
2873 
2874 			if (local)
2875 				*p++ = DRMACH_MC_ASI_ADDR(mp, bank);
2876 			else
2877 				*p++ = DRMACH_MC_ADDR(mp, bank);
2878 
2879 			*p++ = madr;
2880 		}
2881 	}
2882 
2883 	return (p);
2884 }
2885 
2886 static uint64_t *
2887 drmach_prep_schizo_script(uint64_t *p, drmach_mem_t *mp, uint64_t new_basepa)
2888 {
2889 	drmach_board_t	*bp;
2890 	int		 rv;
2891 	int		 idx;
2892 	drmachid_t	 id;
2893 	uint64_t	 last_scsr_pa = 0;
2894 
2895 	/* memory is always in slot 0 */
2896 	ASSERT(DRMACH_BNUM2SLOT(mp->dev.bp->bnum) == 0);
2897 
2898 	/* look up slot 1 board on same expander */
2899 	idx = DRMACH_EXPSLOT2BNUM(DRMACH_BNUM2EXP(mp->dev.bp->bnum), 1);
2900 	rv = drmach_array_get(drmach_boards, idx, &id);
2901 	bp = id; /* bp will be NULL if board not found */
2902 
2903 	/* look up should never be out of bounds */
2904 	ASSERT(rv == 0);
2905 
2906 	/* nothing to do when board is not found or has no devices */
2907 	if (rv == -1 || bp == NULL || bp->devices == NULL)
2908 		return (p);
2909 
2910 	rv = drmach_array_first(bp->devices, &idx, &id);
2911 	while (rv == 0) {
2912 		if (DRMACH_IS_IO_ID(id)) {
2913 			drmach_io_t *io = id;
2914 
2915 			/*
2916 			 * Skip all non-Schizo IO devices (only IO nodes
2917 			 * that are Schizo devices have non-zero scsr_pa).
2918 			 * Filter out "other" leaf to avoid writing to the
2919 			 * same Schizo Control/Status Register twice.
2920 			 */
2921 			if (io->scsr_pa && io->scsr_pa != last_scsr_pa) {
2922 				uint64_t scsr;
2923 
2924 				scsr  = lddphysio(io->scsr_pa);
2925 				scsr &= ~(DRMACH_LPA_BASE_MASK |
2926 				    DRMACH_LPA_BND_MASK);
2927 				scsr |= DRMACH_PA_TO_LPA_BASE(new_basepa);
2928 				scsr |= DRMACH_PA_TO_LPA_BND(
2929 				    new_basepa + DRMACH_MEM_SLICE_SIZE);
2930 
2931 				*p++ = io->scsr_pa;
2932 				*p++ = scsr;
2933 
2934 				last_scsr_pa = io->scsr_pa;
2935 			}
2936 		}
2937 		rv = drmach_array_next(bp->devices, &idx, &id);
2938 	}
2939 
2940 	return (p);
2941 }
2942 
2943 /*
2944  * For Panther MCs, append the MC idle reg address and drmach_mem_t pointer.
2945  * The latter is returned when drmach_rename fails to idle a Panther MC and
2946  * is used to identify the MC for error reporting.
2947  */
2948 static uint64_t *
2949 drmach_prep_pn_mc_idle(uint64_t *p, drmach_mem_t *mp, int local)
2950 {
2951 	/* only slot 0 has memory */
2952 	ASSERT(DRMACH_BNUM2SLOT(mp->dev.bp->bnum) == 0);
2953 	ASSERT(IS_PANTHER(mp->dev.bp->cpu_impl));
2954 
2955 	for (mp = mp->dev.bp->mem; mp != NULL; mp = mp->next) {
2956 		ASSERT(DRMACH_IS_MEM_ID(mp));
2957 
2958 		if (mp->dev.portid == STARCAT_CPUID_TO_PORTID(CPU->cpu_id)) {
2959 			if (local) {
2960 				*p++ = ASI_EMU_ACT_STATUS_VA;	/* local ASI */
2961 				*p++ = (uintptr_t)mp;
2962 			}
2963 		} else if (!local) {
2964 			*p++ = DRMACH_EMU_ACT_STATUS_ADDR(mp);	/* PIO */
2965 			*p++ = (uintptr_t)mp;
2966 		}
2967 	}
2968 
2969 	return (p);
2970 }
2971 
2972 static sbd_error_t *
2973 drmach_prep_rename_script(drmach_mem_t *s_mp, drmach_mem_t *t_mp,
2974 	uint64_t t_slice_offset, caddr_t buf, int buflen)
2975 {
2976 	_NOTE(ARGUNUSED(buflen))
2977 
2978 	uint64_t		*p = (uint64_t *)buf, *q;
2979 	sbd_error_t		*err;
2980 	int			 rv;
2981 	drmach_mem_t		*mp, *skip_mp;
2982 	uint64_t		 s_basepa, t_basepa;
2983 	uint64_t		 s_new_basepa, t_new_basepa;
2984 
2985 	/* verify supplied buffer space is adequate */
2986 	ASSERT(buflen >=
2987 	    /* addr for all possible MC banks */
2988 	    (sizeof (uint64_t) * 4 * 4 * 18) +
2989 	    /* list section terminator */
2990 	    (sizeof (uint64_t) * 1) +
2991 	    /* addr/id tuple for local Panther MC idle reg */
2992 	    (sizeof (uint64_t) * 2) +
2993 	    /* list section terminator */
2994 	    (sizeof (uint64_t) * 1) +
2995 	    /* addr/id tuple for 2 boards with 4 Panther MC idle regs */
2996 	    (sizeof (uint64_t) * 2 * 2 * 4) +
2997 	    /* list section terminator */
2998 	    (sizeof (uint64_t) * 1) +
2999 	    /* addr/val tuple for 1 proc with 4 MC banks */
3000 	    (sizeof (uint64_t) * 2 * 4) +
3001 	    /* list section terminator */
3002 	    (sizeof (uint64_t) * 1) +
3003 	    /* addr/val tuple for 2 boards w/ 2 schizos each */
3004 	    (sizeof (uint64_t) * 2 * 2 * 2) +
3005 	    /* addr/val tuple for 2 boards w/ 16 MC banks each */
3006 	    (sizeof (uint64_t) * 2 * 2 * 16) +
3007 	    /* list section terminator */
3008 	    (sizeof (uint64_t) * 1) +
3009 	    /* addr/val tuple for 18 AXQs w/ two slots each */
3010 	    (sizeof (uint64_t) * 2 * 2 * 18) +
3011 	    /* list section terminator */
3012 	    (sizeof (uint64_t) * 1) +
3013 	    /* list terminator */
3014 	    (sizeof (uint64_t) * 1));
3015 
3016 	/* copy bank list to rename script */
3017 	mutex_enter(&drmach_bus_sync_lock);
3018 	for (q = drmach_bus_sync_list; *q; q++, p++)
3019 		*p = *q;
3020 	mutex_exit(&drmach_bus_sync_lock);
3021 
3022 	/* list section terminator */
3023 	*p++ = 0;
3024 
3025 	/*
3026 	 * Write idle script for MC on this processor.  A script will be
3027 	 * produced only if this is a Panther processor on the source or
3028 	 * target board.
3029 	 */
3030 	if (IS_PANTHER(s_mp->dev.bp->cpu_impl))
3031 		p = drmach_prep_pn_mc_idle(p, s_mp, 1);
3032 
3033 	if (IS_PANTHER(t_mp->dev.bp->cpu_impl))
3034 		p = drmach_prep_pn_mc_idle(p, t_mp, 1);
3035 
3036 	/* list section terminator */
3037 	*p++ = 0;
3038 
3039 	/*
3040 	 * Write idle script for all other MCs on source and target
3041 	 * Panther boards.
3042 	 */
3043 	if (IS_PANTHER(s_mp->dev.bp->cpu_impl))
3044 		p = drmach_prep_pn_mc_idle(p, s_mp, 0);
3045 
3046 	if (IS_PANTHER(t_mp->dev.bp->cpu_impl))
3047 		p = drmach_prep_pn_mc_idle(p, t_mp, 0);
3048 
3049 	/* list section terminator */
3050 	*p++ = 0;
3051 
3052 	/*
3053 	 * Step 1:	Write source base address to target MC
3054 	 *		with present bit off.
3055 	 * Step 2:	Now rewrite target reg with present bit on.
3056 	 */
3057 	err = drmach_mem_get_base_physaddr(s_mp, &s_basepa);
3058 	ASSERT(err == NULL);
3059 	err = drmach_mem_get_base_physaddr(t_mp, &t_basepa);
3060 	ASSERT(err == NULL);
3061 
3062 	/* exchange base pa. include slice offset in new target base pa */
3063 	s_new_basepa = t_basepa & ~ (DRMACH_MEM_SLICE_SIZE - 1);
3064 	t_new_basepa = (s_basepa & ~ (DRMACH_MEM_SLICE_SIZE - 1)) +
3065 	    t_slice_offset;
3066 
3067 	DRMACH_PR("s_new_basepa 0x%lx\n", s_new_basepa);
3068 	DRMACH_PR("t_new_basepa 0x%lx\n", t_new_basepa);
3069 
3070 	DRMACH_PR("preparing MC MADR rename script (master is CPU%d):\n",
3071 	    CPU->cpu_id);
3072 
3073 	/*
3074 	 * Write rename script for MC on this processor.  A script will
3075 	 * be produced only if this processor is on the source or target
3076 	 * board.
3077 	 */
3078 
3079 	skip_mp = NULL;
3080 	mp = s_mp->dev.bp->mem;
3081 	while (mp != NULL && skip_mp == NULL) {
3082 		if (mp->dev.portid == STARCAT_CPUID_TO_PORTID(CPU->cpu_id)) {
3083 			skip_mp = mp;
3084 			p = drmach_prep_mc_rename(p, 1, mp, s_basepa,
3085 			    s_new_basepa);
3086 		}
3087 
3088 		mp = mp->next;
3089 	}
3090 
3091 	mp = t_mp->dev.bp->mem;
3092 	while (mp != NULL && skip_mp == NULL) {
3093 		if (mp->dev.portid == STARCAT_CPUID_TO_PORTID(CPU->cpu_id)) {
3094 			skip_mp = mp;
3095 			p = drmach_prep_mc_rename(p, 1, mp, t_basepa,
3096 			    t_new_basepa);
3097 		}
3098 
3099 		mp = mp->next;
3100 	}
3101 
3102 	/* list section terminator */
3103 	*p++ = 0;
3104 
3105 	/*
3106 	 * Write rename script for all other MCs on source and target
3107 	 * boards.
3108 	 */
3109 
3110 	for (mp = s_mp->dev.bp->mem; mp; mp = mp->next) {
3111 		if (mp == skip_mp)
3112 			continue;
3113 		p = drmach_prep_mc_rename(p, 0, mp, s_basepa, s_new_basepa);
3114 	}
3115 
3116 	for (mp = t_mp->dev.bp->mem; mp; mp = mp->next) {
3117 		if (mp == skip_mp)
3118 			continue;
3119 		p = drmach_prep_mc_rename(p, 0, mp, t_basepa, t_new_basepa);
3120 	}
3121 
3122 	/* Write rename script for Schizo LPA_BASE/LPA_BND */
3123 	p = drmach_prep_schizo_script(p, s_mp, s_new_basepa);
3124 	p = drmach_prep_schizo_script(p, t_mp, t_new_basepa);
3125 
3126 	/* list section terminator */
3127 	*p++ = 0;
3128 
3129 	DRMACH_PR("preparing AXQ CASM rename script (EXP%d <> EXP%d):\n",
3130 	    DRMACH_BNUM2EXP(s_mp->dev.bp->bnum),
3131 	    DRMACH_BNUM2EXP(t_mp->dev.bp->bnum));
3132 
3133 	rv = axq_do_casm_rename_script(&p,
3134 	    DRMACH_PA_TO_SLICE(s_new_basepa),
3135 	    DRMACH_PA_TO_SLICE(t_new_basepa));
3136 	if (rv == DDI_FAILURE)
3137 		return (DRMACH_INTERNAL_ERROR());
3138 
3139 	/* list section & final terminator */
3140 	*p++ = 0;
3141 	*p++ = 0;
3142 
3143 #ifdef DEBUG
3144 	{
3145 		uint64_t *q = (uint64_t *)buf;
3146 
3147 		/* paranoia */
3148 		ASSERT((caddr_t)p <= buf + buflen);
3149 
3150 		DRMACH_PR("MC bank base pa list:\n");
3151 		while (*q) {
3152 			uint64_t a = *q++;
3153 
3154 			DRMACH_PR("0x%lx\n", a);
3155 		}
3156 
3157 		/* skip terminator */
3158 		q += 1;
3159 
3160 		DRMACH_PR("local Panther MC idle reg (via ASI 0x4a):\n");
3161 		while (*q) {
3162 			DRMACH_PR("addr=0x%lx, mp=0x%lx\n", *q, *(q + 1));
3163 			q += 2;
3164 		}
3165 
3166 		/* skip terminator */
3167 		q += 1;
3168 
3169 		DRMACH_PR("non-local Panther MC idle reg (via ASI 0x15):\n");
3170 		while (*q) {
3171 			DRMACH_PR("addr=0x%lx, mp=0x%lx\n", *q, *(q + 1));
3172 			q += 2;
3173 		}
3174 
3175 		/* skip terminator */
3176 		q += 1;
3177 
3178 		DRMACH_PR("MC reprogramming script (via ASI 0x72):\n");
3179 		while (*q) {
3180 			uint64_t r = *q++;	/* register address */
3181 			uint64_t v = *q++;	/* new register value */
3182 
3183 			DRMACH_PR("0x%lx = 0x%lx, basepa 0x%lx\n",
3184 			    r, v, DRMACH_MC_UM_TO_PA(v)|DRMACH_MC_LM_TO_PA(v));
3185 		}
3186 
3187 		/* skip terminator */
3188 		q += 1;
3189 
3190 		DRMACH_PR("MC/SCHIZO reprogramming script:\n");
3191 		while (*q) {
3192 			DRMACH_PR("0x%lx = 0x%lx\n", *q, *(q + 1));
3193 			q += 2;
3194 		}
3195 
3196 		/* skip terminator */
3197 		q += 1;
3198 
3199 		DRMACH_PR("AXQ reprogramming script:\n");
3200 		while (*q) {
3201 			DRMACH_PR("0x%lx = 0x%lx\n", *q, *(q + 1));
3202 			q += 2;
3203 		}
3204 
3205 		/* verify final terminator is present */
3206 		ASSERT(*(q + 1) == 0);
3207 
3208 		DRMACH_PR("copy-rename script 0x%p, len %d\n", buf,
3209 		    (int)((intptr_t)p - (intptr_t)buf));
3210 
3211 		if (drmach_debug)
3212 			DELAY(10000000);
3213 	}
3214 #endif
3215 
3216 	return (NULL);
3217 }
3218 
3219 static void
3220 drmach_prep_xt_mb_for_slice_update(drmach_board_t *bp, uchar_t slice)
3221 {
3222 	int		 rv;
3223 
3224 	ASSERT(MUTEX_HELD(&drmach_xt_mb_lock));
3225 
3226 	if (bp->devices) {
3227 		int		 d_idx;
3228 		drmachid_t	 d_id;
3229 
3230 		rv = drmach_array_first(bp->devices, &d_idx, &d_id);
3231 		while (rv == 0) {
3232 			if (DRMACH_IS_CPU_ID(d_id)) {
3233 				drmach_cpu_t	*cp = d_id;
3234 				processorid_t	 cpuid = cp->cpuid;
3235 
3236 				mutex_enter(&cpu_lock);
3237 				if (cpu[cpuid] && cpu[cpuid]->cpu_flags)
3238 					drmach_xt_mb[cpuid] = 0x80 | slice;
3239 				mutex_exit(&cpu_lock);
3240 			}
3241 			rv = drmach_array_next(bp->devices, &d_idx, &d_id);
3242 		}
3243 	}
3244 	if (DRMACH_BNUM2SLOT(bp->bnum) == 0) {
3245 		drmach_board_t	*s1bp = NULL;
3246 
3247 		rv = drmach_array_get(drmach_boards, bp->bnum + 1,
3248 		    (void *) &s1bp);
3249 		if (rv == 0 && s1bp != NULL) {
3250 			ASSERT(DRMACH_IS_BOARD_ID(s1bp));
3251 			ASSERT(DRMACH_BNUM2SLOT(s1bp->bnum) == 1);
3252 			drmach_prep_xt_mb_for_slice_update(s1bp, slice);
3253 		}
3254 	}
3255 }
3256 
3257 sbd_error_t *
3258 drmach_copy_rename_init(drmachid_t t_id, uint64_t t_slice_offset,
3259 	drmachid_t s_id, struct memlist *c_ml, drmachid_t *cr_id)
3260 {
3261 	extern void drmach_rename(uint64_t *, uint_t *, uint64_t *);
3262 	extern void drmach_rename_end(void);
3263 
3264 	drmach_mem_t	*s_mp, *t_mp;
3265 	struct memlist	*x_ml;
3266 	uint64_t	 off_mask, s_copybasepa, t_copybasepa, t_basepa;
3267 	int		 len;
3268 	caddr_t		 bp, wp;
3269 	uint_t		*p, *q;
3270 	sbd_error_t	*err;
3271 	tte_t		*tte;
3272 	drmach_copy_rename_t *cr;
3273 
3274 	if (!DRMACH_IS_MEM_ID(s_id))
3275 		return (drerr_new(0, ESTC_INAPPROP, NULL));
3276 	if (!DRMACH_IS_MEM_ID(t_id))
3277 		return (drerr_new(0, ESTC_INAPPROP, NULL));
3278 	s_mp = s_id;
3279 	t_mp = t_id;
3280 
3281 	/* get starting physical address of target memory */
3282 	err = drmach_mem_get_base_physaddr(t_id, &t_basepa);
3283 	if (err)
3284 		return (err);
3285 
3286 	/* calculate slice offset mask from slice size */
3287 	off_mask = DRMACH_MEM_SLICE_SIZE - 1;
3288 
3289 	/* calculate source and target base pa */
3290 	s_copybasepa = c_ml->address;
3291 	t_copybasepa = t_basepa + ((c_ml->address & off_mask) - t_slice_offset);
3292 
3293 	/* paranoia */
3294 	ASSERT((c_ml->address & off_mask) >= t_slice_offset);
3295 
3296 	/* adjust copy memlist addresses to be relative to copy base pa */
3297 	x_ml = c_ml;
3298 	while (x_ml != NULL) {
3299 		x_ml->address -= s_copybasepa;
3300 		x_ml = x_ml->next;
3301 	}
3302 
3303 #ifdef DEBUG
3304 	{
3305 	uint64_t s_basepa, s_size, t_size;
3306 
3307 	x_ml = c_ml;
3308 	while (x_ml->next != NULL)
3309 		x_ml = x_ml->next;
3310 
3311 	DRMACH_PR("source copy span: base pa 0x%lx, end pa 0x%lx\n",
3312 	    s_copybasepa,
3313 	    s_copybasepa + x_ml->address + x_ml->size);
3314 
3315 	DRMACH_PR("target copy span: base pa 0x%lx, end pa 0x%lx\n",
3316 	    t_copybasepa,
3317 	    t_copybasepa + x_ml->address + x_ml->size);
3318 
3319 	DRMACH_PR("copy memlist (relative to copy base pa):\n");
3320 	DRMACH_MEMLIST_DUMP(c_ml);
3321 
3322 	err = drmach_mem_get_base_physaddr(s_id, &s_basepa);
3323 	ASSERT(err == NULL);
3324 
3325 	err = drmach_mem_get_size(s_id, &s_size);
3326 	ASSERT(err == NULL);
3327 
3328 	err = drmach_mem_get_size(t_id, &t_size);
3329 	ASSERT(err == NULL);
3330 
3331 	DRMACH_PR("current source base pa 0x%lx, size 0x%lx\n",
3332 	    s_basepa, s_size);
3333 	DRMACH_PR("current target base pa 0x%lx, size 0x%lx\n",
3334 	    t_basepa, t_size);
3335 	}
3336 #endif /* DEBUG */
3337 
3338 	/* Map in appropriate cpu sram page */
3339 	tte = &drmach_cpu_sram_tte[CPU->cpu_id];
3340 	ASSERT(TTE_IS_VALID(tte) && TTE_IS_8K(tte) &&
3341 	    TTE_IS_PRIVILEGED(tte) && TTE_IS_LOCKED(tte));
3342 	sfmmu_dtlb_ld_kva(drmach_cpu_sram_va, tte);
3343 	sfmmu_itlb_ld_kva(drmach_cpu_sram_va, tte);
3344 
3345 	bp = wp = drmach_cpu_sram_va;
3346 
3347 	/* Make sure the rename routine will fit */
3348 	len = (ptrdiff_t)drmach_rename_end - (ptrdiff_t)drmach_rename;
3349 	ASSERT(wp + len < bp + PAGESIZE);
3350 
3351 	/* copy text. standard bcopy not designed to work in nc space */
3352 	p = (uint_t *)wp;
3353 	q = (uint_t *)drmach_rename;
3354 	while (q < (uint_t *)drmach_rename_end)
3355 		*p++ = *q++;
3356 
3357 	/* zero remainder. standard bzero not designed to work in nc space */
3358 	while (p < (uint_t *)(bp + PAGESIZE))
3359 		*p++ = 0;
3360 
3361 	DRMACH_PR("drmach_rename function 0x%p, len %d\n", wp, len);
3362 	wp += (len + 15) & ~15;
3363 
3364 	err = drmach_prep_rename_script(s_mp, t_mp, t_slice_offset, wp,
3365 	    PAGESIZE - (wp - bp));
3366 	if (err) {
3367 cleanup:
3368 		xt_one(CPU->cpu_id, vtag_flushpage_tl1,
3369 		    (uint64_t)drmach_cpu_sram_va, (uint64_t)ksfmmup);
3370 		return (err);
3371 	}
3372 
3373 	/* disable and flush CDC */
3374 	if (axq_cdc_disable_flush_all() != DDI_SUCCESS) {
3375 		axq_cdc_enable_all();	/* paranoia */
3376 		err = DRMACH_INTERNAL_ERROR();
3377 		goto cleanup;
3378 	}
3379 
3380 	/* mark both memory units busy */
3381 	t_mp->dev.busy++;
3382 	s_mp->dev.busy++;
3383 
3384 	cr = vmem_alloc(static_alloc_arena, sizeof (drmach_copy_rename_t),
3385 	    VM_SLEEP);
3386 	cr->isa = (void *)drmach_copy_rename_init;
3387 	cr->data = wp;
3388 	cr->c_ml = c_ml;
3389 	cr->s_mp = s_mp;
3390 	cr->t_mp = t_mp;
3391 	cr->s_copybasepa = s_copybasepa;
3392 	cr->t_copybasepa = t_copybasepa;
3393 	cr->ecode = DRMACH_CR_OK;
3394 
3395 	mutex_enter(&drmach_slice_table_lock);
3396 
3397 	mutex_enter(&drmach_xt_mb_lock);
3398 	bzero((void *)drmach_xt_mb, drmach_xt_mb_size);
3399 
3400 	if (DRMACH_L1_SET_LPA(s_mp->dev.bp) && drmach_reprogram_lpa) {
3401 		drmach_prep_xt_mb_for_slice_update(s_mp->dev.bp,
3402 		    DRMACH_PA_TO_SLICE(t_copybasepa));
3403 	}
3404 	if (DRMACH_L1_SET_LPA(t_mp->dev.bp) && drmach_reprogram_lpa) {
3405 		drmach_prep_xt_mb_for_slice_update(t_mp->dev.bp,
3406 		    DRMACH_PA_TO_SLICE(s_copybasepa));
3407 	}
3408 
3409 	*cr_id = cr;
3410 	return (NULL);
3411 }
3412 
3413 int drmach_rename_count;
3414 int drmach_rename_ntries;
3415 
3416 sbd_error_t *
3417 drmach_copy_rename_fini(drmachid_t id)
3418 {
3419 	drmach_copy_rename_t	*cr = id;
3420 	sbd_error_t		*err = NULL;
3421 	dr_mbox_msg_t		*obufp;
3422 
3423 	ASSERT(cr->isa == (void *)drmach_copy_rename_init);
3424 
3425 	axq_cdc_enable_all();
3426 
3427 	xt_one(CPU->cpu_id, vtag_flushpage_tl1,
3428 	    (uint64_t)drmach_cpu_sram_va, (uint64_t)ksfmmup);
3429 
3430 	switch (cr->ecode) {
3431 	case DRMACH_CR_OK:
3432 		break;
3433 	case DRMACH_CR_MC_IDLE_ERR: {
3434 		dev_info_t	*dip = NULL;
3435 		drmach_mem_t	*mp = (drmach_mem_t *)cr->earg;
3436 		char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3437 
3438 		ASSERT(DRMACH_IS_MEM_ID(mp));
3439 
3440 		err = drmach_get_dip(mp, &dip);
3441 
3442 		ASSERT(err == NULL);
3443 		ASSERT(dip != NULL);
3444 
3445 		err = drerr_new(0, ESBD_MEMFAIL, NULL);
3446 		(void) ddi_pathname(dip, path);
3447 		cmn_err(CE_WARN, "failed to idle memory controller %s on %s: "
3448 		    "copy-rename aborted", path, mp->dev.bp->cm.name);
3449 		kmem_free(path, MAXPATHLEN);
3450 		break;
3451 	}
3452 	case DRMACH_CR_IOPAUSE_ERR:
3453 		ASSERT((uintptr_t)cr->earg >= 0 &&
3454 		    (uintptr_t)cr->earg < AXQ_MAX_EXP);
3455 
3456 		err = drerr_new(0,  ESBD_SUSPEND, "EX%d", (uintptr_t)cr->earg);
3457 		cmn_err(CE_WARN, "failed to idle EX%ld AXQ slot1 activity prior"
3458 		    " to copy-rename", (uintptr_t)cr->earg);
3459 		break;
3460 	case DRMACH_CR_ONTRAP_ERR:
3461 		err = drerr_new(0, ESBD_MEMFAIL, NULL);
3462 		cmn_err(CE_WARN, "copy-rename aborted due to uncorrectable "
3463 		    "memory error");
3464 		break;
3465 	default:
3466 		err = DRMACH_INTERNAL_ERROR();
3467 		cmn_err(CE_WARN, "unknown copy-rename error code (%d)\n",
3468 		    cr->ecode);
3469 		break;
3470 	}
3471 
3472 #ifdef DEBUG
3473 	if ((DRMACH_L1_SET_LPA(cr->s_mp->dev.bp) ||
3474 	    DRMACH_L1_SET_LPA(cr->t_mp->dev.bp)) && drmach_reprogram_lpa) {
3475 		int	i;
3476 		for (i = 0; i < NCPU; i++) {
3477 			if (drmach_xt_mb[i])
3478 				DRMACH_PR("cpu%d ignored drmach_xt_mb", i);
3479 		}
3480 	}
3481 #endif
3482 	mutex_exit(&drmach_xt_mb_lock);
3483 
3484 	if (cr->c_ml != NULL)
3485 		memlist_delete(cr->c_ml);
3486 
3487 	cr->t_mp->dev.busy--;
3488 	cr->s_mp->dev.busy--;
3489 
3490 	if (err) {
3491 		mutex_exit(&drmach_slice_table_lock);
3492 		goto done;
3493 	}
3494 
3495 	/* update casm shadow for target and source board */
3496 	drmach_slice_table_update(cr->t_mp->dev.bp, 0);
3497 	drmach_slice_table_update(cr->s_mp->dev.bp, 0);
3498 	mutex_exit(&drmach_slice_table_lock);
3499 
3500 	mutex_enter(&drmach_bus_sync_lock);
3501 	drmach_bus_sync_list_update();
3502 	mutex_exit(&drmach_bus_sync_lock);
3503 
3504 	/*
3505 	 * Make a good-faith effort to notify the SC about the copy-rename, but
3506 	 * don't worry if it fails, since a subsequent claim/unconfig/unclaim
3507 	 * will duplicate the update.
3508 	 */
3509 	obufp = kmem_zalloc(sizeof (dr_mbox_msg_t), KM_SLEEP);
3510 	mutex_enter(&drmach_slice_table_lock);
3511 	drmach_msg_memslice_init(obufp->msgdata.dm_uc.mem_slice);
3512 	drmach_msg_memregs_init(obufp->msgdata.dm_uc.mem_regs);
3513 	mutex_exit(&drmach_slice_table_lock);
3514 	(void) drmach_mbox_trans(DRMSG_UNCONFIG, cr->s_mp->dev.bp->bnum,
3515 	    (caddr_t)obufp, sizeof (dr_mbox_msg_t), (caddr_t)NULL, 0);
3516 	kmem_free(obufp, sizeof (dr_mbox_msg_t));
3517 
3518 done:
3519 	vmem_free(static_alloc_arena, cr, sizeof (drmach_copy_rename_t));
3520 
3521 	DRMACH_PR("waited %d out of %d tries for drmach_rename_wait on %d cpus",
3522 	    drmach_rename_ntries, drmach_cpu_ntries, drmach_rename_count);
3523 
3524 	return (err);
3525 }
3526 
3527 int drmach_slow_copy = 0;
3528 
3529 void
3530 drmach_copy_rename(drmachid_t id)
3531 {
3532 	extern uint_t		 getpstate(void);
3533 	extern void		 setpstate(uint_t);
3534 
3535 	extern xcfunc_t		 drmach_rename_wait;
3536 	extern xcfunc_t		 drmach_rename_done;
3537 	extern xcfunc_t		 drmach_rename_abort;
3538 
3539 	drmach_copy_rename_t	*cr = id;
3540 	uint64_t		 neer;
3541 	struct memlist		*ml;
3542 	int			 i, count;
3543 	int			 csize, lnsize;
3544 	uint64_t		 caddr;
3545 	cpuset_t		 cpuset;
3546 	uint_t			 pstate;
3547 	uint32_t		 exp = 0;
3548 	on_trap_data_t		 otd;
3549 	xcfunc_t		*drmach_end_wait_xcall = drmach_rename_done;
3550 
3551 	ASSERT(cr->isa == (void *)drmach_copy_rename_init);
3552 	ASSERT(MUTEX_HELD(&cpu_lock));
3553 	ASSERT(cr->ecode == DRMACH_CR_OK);
3554 
3555 	/*
3556 	 * Prevent slot1 IO from accessing Safari memory bus.
3557 	 */
3558 	if (axq_iopause_enable_all(&exp) != DDI_SUCCESS) {
3559 		ASSERT(exp >= 0 && exp < AXQ_MAX_EXP);
3560 		cr->ecode = DRMACH_CR_IOPAUSE_ERR;
3561 		cr->earg = (void *)(uintptr_t)exp;
3562 		return;
3563 	}
3564 
3565 	cpuset = cpu_ready_set;
3566 	CPUSET_DEL(cpuset, CPU->cpu_id);
3567 	count = ncpus - 1;
3568 	drmach_rename_count = count;	/* for debug */
3569 
3570 	drmach_xt_ready = 0;
3571 	xt_some(cpuset, drmach_rename_wait, NULL, NULL);
3572 
3573 	for (i = 0; i < drmach_cpu_ntries; i++) {
3574 		if (drmach_xt_ready == count)
3575 			break;
3576 		DELAY(drmach_cpu_delay);
3577 	}
3578 
3579 	drmach_rename_ntries = i;	/* for debug */
3580 
3581 	drmach_xt_ready = 0;		/* steal the line back */
3582 	for (i = 0; i < NCPU; i++)	/* steal the line back, preserve data */
3583 		drmach_xt_mb[i] = drmach_xt_mb[i];
3584 
3585 	caddr = drmach_iocage_paddr;
3586 	csize = cpunodes[CPU->cpu_id].ecache_size;
3587 	lnsize = cpunodes[CPU->cpu_id].ecache_linesize;
3588 
3589 	/* disable CE reporting */
3590 	neer = get_error_enable();
3591 	set_error_enable(neer & ~EN_REG_CEEN);
3592 
3593 	/* disable interrupts (paranoia) */
3594 	pstate = getpstate();
3595 	setpstate(pstate & ~PSTATE_IE);
3596 
3597 	/*
3598 	 * Execute copy-rename under on_trap to protect against a panic due
3599 	 * to an uncorrectable error. Instead, DR will abort the copy-rename
3600 	 * operation and rely on the OS to do the error reporting.
3601 	 *
3602 	 * In general, trap handling on any cpu once the copy begins
3603 	 * can result in an inconsistent memory image on the target.
3604 	 */
3605 	if (on_trap(&otd, OT_DATA_EC)) {
3606 		cr->ecode = DRMACH_CR_ONTRAP_ERR;
3607 		goto copy_rename_end;
3608 	}
3609 
3610 	/*
3611 	 * DO COPY.
3612 	 */
3613 	for (ml = cr->c_ml; ml; ml = ml->next) {
3614 		uint64_t	s_pa, t_pa;
3615 		uint64_t	nbytes;
3616 
3617 		s_pa = cr->s_copybasepa + ml->address;
3618 		t_pa = cr->t_copybasepa + ml->address;
3619 		nbytes = ml->size;
3620 
3621 		while (nbytes != 0ull) {
3622 			/* copy 32 bytes at src_pa to dst_pa */
3623 			bcopy32_il(s_pa, t_pa);
3624 
3625 			/* increment by 32 bytes */
3626 			s_pa += (4 * sizeof (uint64_t));
3627 			t_pa += (4 * sizeof (uint64_t));
3628 
3629 			/* decrement by 32 bytes */
3630 			nbytes -= (4 * sizeof (uint64_t));
3631 
3632 			if (drmach_slow_copy) {	/* for debug */
3633 				uint64_t i = 13 * 50;
3634 				while (i--)
3635 					;
3636 			}
3637 		}
3638 	}
3639 
3640 	/*
3641 	 * XXX CHEETAH SUPPORT
3642 	 * For cheetah, we need to grab the iocage lock since iocage
3643 	 * memory is used for e$ flush.
3644 	 *
3645 	 * NOTE: This code block is dangerous at this point in the
3646 	 * copy-rename operation. It modifies memory after the copy
3647 	 * has taken place which means that any persistent state will
3648 	 * be abandoned after the rename operation. The code is also
3649 	 * performing thread synchronization at a time when all but
3650 	 * one processors are paused. This is a potential deadlock
3651 	 * situation.
3652 	 *
3653 	 * This code block must be moved to drmach_copy_rename_init.
3654 	 */
3655 	if (drmach_is_cheetah) {
3656 		mutex_enter(&drmach_iocage_lock);
3657 		while (drmach_iocage_is_busy)
3658 			cv_wait(&drmach_iocage_cv, &drmach_iocage_lock);
3659 		drmach_iocage_is_busy = 1;
3660 		drmach_iocage_mem_scrub(ecache_size * 2);
3661 		mutex_exit(&drmach_iocage_lock);
3662 	}
3663 
3664 	/*
3665 	 * bcopy32_il is implemented as a series of ldxa/stxa via
3666 	 * ASI_MEM instructions. Following the copy loop, the E$
3667 	 * of the master (this) processor will have lines in state
3668 	 * O that correspond to lines of home memory in state gI.
3669 	 * An E$ flush is necessary to commit these lines before
3670 	 * proceeding with the rename operation.
3671 	 *
3672 	 * Flushing the E$ will automatically flush the W$, but
3673 	 * the D$ and I$ must be flushed separately and explicitly.
3674 	 */
3675 	flush_ecache_il(caddr, csize, lnsize);	/* inline version */
3676 
3677 	/*
3678 	 * Each line of home memory is now in state gM, except in
3679 	 * the case of a cheetah processor when the E$ flush area
3680 	 * is included within the copied region. In such a case,
3681 	 * the lines of home memory for the upper half of the
3682 	 * flush area are in state gS.
3683 	 *
3684 	 * Each line of target memory is in state gM.
3685 	 *
3686 	 * Each line of this processor's E$ is in state I, except
3687 	 * those of a cheetah processor. All lines of a cheetah
3688 	 * processor's E$ are in state S and correspond to the lines
3689 	 * in upper half of the E$ flush area.
3690 	 *
3691 	 * It is vital at this point that none of the lines in the
3692 	 * home or target memories are in state gI and that none
3693 	 * of the lines in this processor's E$ are in state O or Os.
3694 	 * A single instance of such a condition will cause loss of
3695 	 * coherency following the rename operation.
3696 	 */
3697 
3698 	/*
3699 	 * Rename
3700 	 */
3701 	(*(void(*)())drmach_cpu_sram_va)(cr->data, &cr->ecode, &cr->earg);
3702 
3703 	/*
3704 	 * Rename operation complete. The physical address space
3705 	 * of the home and target memories have been swapped, the
3706 	 * routing data in the respective CASM entries have been
3707 	 * swapped, and LPA settings in the processor and schizo
3708 	 * devices have been reprogrammed accordingly.
3709 	 *
3710 	 * In the case of a cheetah processor, the E$ remains
3711 	 * populated with lines in state S that correspond to the
3712 	 * lines in the former home memory. Now that the physical
3713 	 * addresses have been swapped, these E$ lines correspond
3714 	 * to lines in the new home memory which are in state gM.
3715 	 * This combination is invalid. An additional E$ flush is
3716 	 * necessary to restore coherency. The E$ flush will cause
3717 	 * the lines of the new home memory for the flush region
3718 	 * to transition from state gM to gS. The former home memory
3719 	 * remains unmodified. This additional E$ flush has no effect
3720 	 * on a cheetah+ processor.
3721 	 */
3722 	flush_ecache_il(caddr, csize, lnsize);	/* inline version */
3723 
3724 	/*
3725 	 * The D$ and I$ must be flushed to ensure that coherency is
3726 	 * maintained. Any line in a cache that is in the valid
3727 	 * state has its corresponding line of the new home memory
3728 	 * in the gM state. This is an invalid condition. When the
3729 	 * flushes are complete the cache line states will be
3730 	 * resynchronized with those in the new home memory.
3731 	 */
3732 	flush_icache_il();			/* inline version */
3733 	flush_dcache_il();			/* inline version */
3734 	flush_pcache_il();			/* inline version */
3735 
3736 copy_rename_end:
3737 
3738 	no_trap();
3739 
3740 	/* enable interrupts */
3741 	setpstate(pstate);
3742 
3743 	/* enable CE reporting */
3744 	set_error_enable(neer);
3745 
3746 	if (cr->ecode != DRMACH_CR_OK)
3747 		drmach_end_wait_xcall = drmach_rename_abort;
3748 
3749 	/*
3750 	 * XXX CHEETAH SUPPORT
3751 	 */
3752 	if (drmach_is_cheetah) {
3753 		mutex_enter(&drmach_iocage_lock);
3754 		drmach_iocage_mem_scrub(ecache_size * 2);
3755 		drmach_iocage_is_busy = 0;
3756 		cv_signal(&drmach_iocage_cv);
3757 		mutex_exit(&drmach_iocage_lock);
3758 	}
3759 
3760 	axq_iopause_disable_all();
3761 
3762 	xt_some(cpuset, drmach_end_wait_xcall, NULL, NULL);
3763 }
3764 
3765 static void drmach_io_dispose(drmachid_t);
3766 static sbd_error_t *drmach_io_release(drmachid_t);
3767 static sbd_error_t *drmach_io_status(drmachid_t, drmach_status_t *);
3768 
3769 static sbd_error_t *
3770 drmach_pci_new(drmach_device_t *proto, drmachid_t *idp)
3771 {
3772 	drmach_node_t	*node = proto->node;
3773 	sbd_error_t	*err;
3774 	drmach_reg_t	 regs[3];
3775 	int		 rv;
3776 	int		 len = 0;
3777 
3778 	rv = node->n_getproplen(node, "reg", &len);
3779 	if (rv != 0 || len != sizeof (regs)) {
3780 		sbd_error_t *err;
3781 
3782 		/* pci nodes are expected to have regs */
3783 		err = drerr_new(1, ESTC_GETPROP,
3784 		    "Device Node 0x%x: property %s",
3785 		    (uint_t)node->get_dnode(node), "reg");
3786 		return (err);
3787 	}
3788 
3789 	rv = node->n_getprop(node, "reg", (void *)regs, sizeof (regs));
3790 	if (rv) {
3791 		sbd_error_t *err;
3792 
3793 		err = drerr_new(1, ESTC_GETPROP,
3794 		    "Device Node 0x%x: property %s",
3795 		    (uint_t)node->get_dnode(node), "reg");
3796 
3797 		return (err);
3798 	}
3799 
3800 	/*
3801 	 * Fix up unit number so that Leaf A has a lower unit number
3802 	 * than Leaf B.
3803 	 */
3804 	if ((proto->portid % 2) != 0) {
3805 		if ((regs[0].reg_addr_lo & 0x700000) == 0x700000)
3806 			proto->unum = 0;
3807 		else
3808 			proto->unum = 1;
3809 	} else {
3810 		if ((regs[0].reg_addr_lo & 0x700000) == 0x700000)
3811 			proto->unum = 2;
3812 		else
3813 			proto->unum = 3;
3814 	}
3815 
3816 	err = drmach_io_new(proto, idp);
3817 	if (err == NULL) {
3818 		drmach_io_t *self = *idp;
3819 
3820 		/* reassemble 64-bit base address */
3821 		self->scsr_pa  = (uint64_t)regs[1].reg_addr_hi << 32;
3822 		self->scsr_pa |= (uint64_t)regs[1].reg_addr_lo;
3823 	}
3824 
3825 	return (err);
3826 }
3827 
3828 static sbd_error_t *
3829 drmach_io_new(drmach_device_t *proto, drmachid_t *idp)
3830 {
3831 	drmach_io_t	*ip;
3832 
3833 	ip = kmem_zalloc(sizeof (drmach_io_t), KM_SLEEP);
3834 	bcopy(proto, &ip->dev, sizeof (ip->dev));
3835 	ip->dev.node = drmach_node_dup(proto->node);
3836 	ip->dev.cm.isa = (void *)drmach_io_new;
3837 	ip->dev.cm.dispose = drmach_io_dispose;
3838 	ip->dev.cm.release = drmach_io_release;
3839 	ip->dev.cm.status = drmach_io_status;
3840 
3841 	snprintf(ip->dev.cm.name, sizeof (ip->dev.cm.name), "%s%d",
3842 	    ip->dev.type, ip->dev.unum);
3843 
3844 	*idp = (drmachid_t)ip;
3845 	return (NULL);
3846 }
3847 
3848 static void
3849 drmach_io_dispose(drmachid_t id)
3850 {
3851 	drmach_io_t *self;
3852 
3853 	ASSERT(DRMACH_IS_IO_ID(id));
3854 
3855 	self = id;
3856 	if (self->dev.node)
3857 		drmach_node_dispose(self->dev.node);
3858 
3859 	kmem_free(self, sizeof (*self));
3860 }
3861 
3862 /*ARGSUSED*/
3863 sbd_error_t *
3864 drmach_pre_op(int cmd, drmachid_t id, drmach_opts_t *opts)
3865 {
3866 	drmach_board_t	*bp = (drmach_board_t *)id;
3867 	sbd_error_t	*err = NULL;
3868 
3869 	if (id && DRMACH_IS_BOARD_ID(id)) {
3870 		switch (cmd) {
3871 			case SBD_CMD_TEST:
3872 			case SBD_CMD_STATUS:
3873 			case SBD_CMD_GETNCM:
3874 				break;
3875 			case SBD_CMD_CONNECT:
3876 				if (bp->connected)
3877 					err = drerr_new(0, ESBD_STATE, NULL);
3878 
3879 				if (bp->cond == SBD_COND_UNUSABLE)
3880 					err = drerr_new(0,
3881 					    ESBD_FATAL_STATE, NULL);
3882 				break;
3883 			case SBD_CMD_DISCONNECT:
3884 				if (!bp->connected)
3885 					err = drerr_new(0, ESBD_STATE, NULL);
3886 
3887 				if (bp->cond == SBD_COND_UNUSABLE)
3888 					err = drerr_new(0,
3889 					    ESBD_FATAL_STATE, NULL);
3890 				break;
3891 			default:
3892 				if (bp->cond == SBD_COND_UNUSABLE)
3893 					err = drerr_new(0,
3894 					    ESBD_FATAL_STATE, NULL);
3895 				break;
3896 
3897 		}
3898 	}
3899 
3900 	return (err);
3901 }
3902 
3903 /*ARGSUSED*/
3904 sbd_error_t *
3905 drmach_post_op(int cmd, drmachid_t id, drmach_opts_t *opts)
3906 {
3907 	return (NULL);
3908 }
3909 
3910 sbd_error_t *
3911 drmach_board_assign(int bnum, drmachid_t *id)
3912 {
3913 	sbd_error_t	*err = NULL;
3914 	caddr_t		obufp;
3915 
3916 	if (!drmach_initialized && drmach_init() == -1) {
3917 		err = DRMACH_INTERNAL_ERROR();
3918 	}
3919 
3920 	rw_enter(&drmach_boards_rwlock, RW_WRITER);
3921 
3922 	if (!err) {
3923 		if (drmach_array_get(drmach_boards, bnum, id) == -1) {
3924 			err = drerr_new(0, ESTC_BNUM, "%d", bnum);
3925 		} else {
3926 			drmach_board_t	*bp;
3927 
3928 			if (*id)
3929 				rw_downgrade(&drmach_boards_rwlock);
3930 
3931 			obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
3932 			err = drmach_mbox_trans(DRMSG_ASSIGN, bnum, obufp,
3933 			    sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
3934 			kmem_free(obufp, sizeof (dr_proto_hdr_t));
3935 
3936 			if (!err) {
3937 				bp = *id;
3938 				if (!*id)
3939 					bp = *id  =
3940 					    (drmachid_t)drmach_board_new(bnum);
3941 				bp->assigned = 1;
3942 			}
3943 		}
3944 	}
3945 	rw_exit(&drmach_boards_rwlock);
3946 	return (err);
3947 }
3948 
3949 static uint_t
3950 drmach_board_non_panther_cpus(gdcd_t *gdcd, uint_t exp, uint_t slot)
3951 {
3952 	uint_t	port, port_start, port_end;
3953 	uint_t	non_panther_cpus = 0;
3954 	uint_t	impl;
3955 
3956 	ASSERT(gdcd != NULL);
3957 
3958 	/*
3959 	 * Determine PRD port indices based on slot location.
3960 	 */
3961 	switch (slot) {
3962 	case 0:
3963 		port_start = 0;
3964 		port_end = 3;
3965 		break;
3966 	case 1:
3967 		port_start = 4;
3968 		port_end = 5;
3969 		break;
3970 	default:
3971 		ASSERT(0);
3972 		/* check all */
3973 		port_start = 0;
3974 		port_end = 5;
3975 		break;
3976 	}
3977 
3978 	for (port = port_start; port <= port_end; port++) {
3979 		if (gdcd->dcd_prd[exp][port].prd_ptype == SAFPTYPE_CPU &&
3980 		    RSV_GOOD(gdcd->dcd_prd[exp][port].prd_prsv)) {
3981 			/*
3982 			 * This Safari port passed POST and represents a
3983 			 * cpu, so check the implementation.
3984 			 */
3985 			impl = (gdcd->dcd_prd[exp][port].prd_ver_reg >> 32)
3986 			    & 0xffff;
3987 
3988 			switch (impl) {
3989 			case CHEETAH_IMPL:
3990 			case CHEETAH_PLUS_IMPL:
3991 			case JAGUAR_IMPL:
3992 				non_panther_cpus++;
3993 				break;
3994 			case PANTHER_IMPL:
3995 				break;
3996 			default:
3997 				ASSERT(0);
3998 				non_panther_cpus++;
3999 				break;
4000 			}
4001 		}
4002 	}
4003 
4004 	DRMACH_PR("drmach_board_non_panther_cpus: exp=%d, slot=%d, "
4005 	    "non_panther_cpus=%d", exp, slot, non_panther_cpus);
4006 
4007 	return (non_panther_cpus);
4008 }
4009 
4010 sbd_error_t *
4011 drmach_board_connect(drmachid_t id, drmach_opts_t *opts)
4012 {
4013 	_NOTE(ARGUNUSED(opts))
4014 
4015 	drmach_board_t		*bp = (drmach_board_t *)id;
4016 	sbd_error_t		*err;
4017 	dr_mbox_msg_t		*obufp;
4018 	gdcd_t			*gdcd = NULL;
4019 	uint_t			exp, slot;
4020 	sc_gptwocfg_cookie_t	scc;
4021 	int			panther_pages_enabled;
4022 
4023 	if (!DRMACH_IS_BOARD_ID(id))
4024 		return (drerr_new(0, ESTC_INAPPROP, NULL));
4025 
4026 	/*
4027 	 * Build the casm info portion of the CLAIM message.
4028 	 */
4029 	obufp = kmem_zalloc(sizeof (dr_mbox_msg_t), KM_SLEEP);
4030 	mutex_enter(&drmach_slice_table_lock);
4031 	drmach_msg_memslice_init(obufp->msgdata.dm_cr.mem_slice);
4032 	drmach_msg_memregs_init(obufp->msgdata.dm_cr.mem_regs);
4033 	mutex_exit(&drmach_slice_table_lock);
4034 	err = drmach_mbox_trans(DRMSG_CLAIM, bp->bnum, (caddr_t)obufp,
4035 	    sizeof (dr_mbox_msg_t), (caddr_t)NULL, 0);
4036 	kmem_free(obufp, sizeof (dr_mbox_msg_t));
4037 
4038 	if (err) {
4039 		/*
4040 		 * if mailbox timeout or unrecoverable error from SC,
4041 		 * board cannot be touched.  Mark the status as
4042 		 * unusable.
4043 		 */
4044 		if ((err->e_code == ESTC_SMS_ERR_UNRECOVERABLE) ||
4045 		    (err->e_code == ESTC_MBXRPLY))
4046 			bp->cond = SBD_COND_UNUSABLE;
4047 		return (err);
4048 	}
4049 
4050 	gdcd = drmach_gdcd_new();
4051 	if (gdcd == NULL) {
4052 		cmn_err(CE_WARN, "failed to read GDCD info for %s\n",
4053 		    bp->cm.name);
4054 		return (DRMACH_INTERNAL_ERROR());
4055 	}
4056 
4057 	/*
4058 	 * Read CPU SRAM DR buffer offset from GDCD.
4059 	 */
4060 	exp = DRMACH_BNUM2EXP(bp->bnum);
4061 	slot = DRMACH_BNUM2SLOT(bp->bnum);
4062 	bp->stardrb_offset =
4063 	    gdcd->dcd_slot[exp][slot].l1ss_cpu_drblock_xwd_offset << 3;
4064 	DRMACH_PR("%s: stardrb_offset=0x%lx\n", bp->cm.name,
4065 	    bp->stardrb_offset);
4066 
4067 	/*
4068 	 * Read board LPA setting from GDCD.
4069 	 */
4070 	bp->flags &= ~DRMACH_NULL_PROC_LPA;
4071 	if (gdcd->dcd_slot[exp][slot].l1ss_flags &
4072 	    L1SSFLG_THIS_L1_NULL_PROC_LPA) {
4073 		bp->flags |= DRMACH_NULL_PROC_LPA;
4074 		DRMACH_PR("%s: NULL proc LPA\n", bp->cm.name);
4075 	}
4076 
4077 	/*
4078 	 * XXX Until the Solaris large pages support heterogeneous cpu
4079 	 * domains, DR needs to prevent the addition of non-Panther cpus
4080 	 * to an all-Panther domain with large pages enabled.
4081 	 */
4082 	panther_pages_enabled = (page_num_pagesizes() > DEFAULT_MMU_PAGE_SIZES);
4083 	if (drmach_board_non_panther_cpus(gdcd, exp, slot) > 0 &&
4084 	    panther_pages_enabled && drmach_large_page_restriction) {
4085 		cmn_err(CE_WARN, "Domain shutdown is required to add a non-"
4086 		    "UltraSPARC-IV+ board into an all UltraSPARC-IV+ domain");
4087 		err = drerr_new(0, ESTC_SUPPORT, NULL);
4088 	}
4089 
4090 	if (err == NULL) {
4091 		/* do saf configurator stuff */
4092 		DRMACH_PR("calling sc_probe_board for bnum=%d\n", bp->bnum);
4093 		scc = sc_probe_board(bp->bnum);
4094 		if (scc == NULL)
4095 			err = drerr_new(0, ESTC_PROBE, bp->cm.name);
4096 	}
4097 
4098 	if (err) {
4099 		/* flush CDC srams */
4100 		if (axq_cdc_flush_all() != DDI_SUCCESS) {
4101 			goto out;
4102 		}
4103 
4104 		/*
4105 		 * Build the casm info portion of the UNCLAIM message.
4106 		 */
4107 		obufp = kmem_zalloc(sizeof (dr_mbox_msg_t), KM_SLEEP);
4108 		mutex_enter(&drmach_slice_table_lock);
4109 		drmach_msg_memslice_init(obufp->msgdata.dm_ur.mem_slice);
4110 		drmach_msg_memregs_init(obufp->msgdata.dm_ur.mem_regs);
4111 		mutex_exit(&drmach_slice_table_lock);
4112 		(void) drmach_mbox_trans(DRMSG_UNCLAIM, bp->bnum,
4113 		    (caddr_t)obufp, sizeof (dr_mbox_msg_t),
4114 		    (caddr_t)NULL, 0);
4115 
4116 		kmem_free(obufp, sizeof (dr_mbox_msg_t));
4117 
4118 		/*
4119 		 * we clear the connected flag just in case it would have
4120 		 * been set by a concurrent drmach_board_status() thread
4121 		 * before the UNCLAIM completed.
4122 		 */
4123 		bp->connected = 0;
4124 		goto out;
4125 	}
4126 
4127 	/*
4128 	 * Now that the board has been successfully attached, obtain
4129 	 * platform-specific DIMM serial id information for the board.
4130 	 */
4131 	if ((DRMACH_BNUM2SLOT(bp->bnum) == 0) &&
4132 	    plat_ecc_capability_sc_get(PLAT_ECC_DIMM_SID_MESSAGE)) {
4133 		(void) plat_request_mem_sids(DRMACH_BNUM2EXP(bp->bnum));
4134 	}
4135 
4136 out:
4137 	if (gdcd != NULL)
4138 		drmach_gdcd_dispose(gdcd);
4139 
4140 	return (err);
4141 }
4142 
4143 static void
4144 drmach_slice_table_update(drmach_board_t *bp, int invalidate)
4145 {
4146 	static char		*axq_name = "address-extender-queue";
4147 	static dev_info_t	*axq_dip = NULL;
4148 	static int		 axq_exp = -1;
4149 	static int		 axq_slot;
4150 	int			 e, s, slice;
4151 
4152 	ASSERT(MUTEX_HELD(&drmach_slice_table_lock));
4153 
4154 	e = DRMACH_BNUM2EXP(bp->bnum);
4155 	if (invalidate) {
4156 		ASSERT(DRMACH_BNUM2SLOT(bp->bnum) == 0);
4157 
4158 		/* invalidate cached casm value */
4159 		drmach_slice_table[e] = 0;
4160 
4161 		/* invalidate cached axq info if for same exp */
4162 		if (e == axq_exp && axq_dip) {
4163 			ndi_rele_devi(axq_dip);
4164 			axq_dip = NULL;
4165 		}
4166 	}
4167 
4168 	if (axq_dip == NULL || !i_ddi_devi_attached(axq_dip)) {
4169 		int i, portid;
4170 
4171 		/* search for an attached slot0 axq instance */
4172 		for (i = 0; i < AXQ_MAX_EXP * AXQ_MAX_SLOT_PER_EXP; i++) {
4173 			if (axq_dip)
4174 				ndi_rele_devi(axq_dip);
4175 			axq_dip = ddi_find_devinfo(axq_name, i, 0);
4176 			if (axq_dip && DDI_CF2(axq_dip)) {
4177 				portid = ddi_getprop(DDI_DEV_T_ANY, axq_dip,
4178 				    DDI_PROP_DONTPASS, "portid", -1);
4179 				if (portid == -1) {
4180 					DRMACH_PR("cant get portid of axq "
4181 					    "instance %d\n", i);
4182 					continue;
4183 				}
4184 
4185 				axq_exp = (portid >> 5) & 0x1f;
4186 				axq_slot = portid & 1;
4187 
4188 				if (invalidate && axq_exp == e)
4189 					continue;
4190 
4191 				if (axq_slot == 0)
4192 					break;	/* found */
4193 			}
4194 		}
4195 
4196 		if (i == AXQ_MAX_EXP * AXQ_MAX_SLOT_PER_EXP) {
4197 			if (axq_dip) {
4198 				ndi_rele_devi(axq_dip);
4199 				axq_dip = NULL;
4200 			}
4201 			DRMACH_PR("drmach_slice_table_update: failed to "
4202 			    "update axq dip\n");
4203 			return;
4204 		}
4205 
4206 	}
4207 
4208 	ASSERT(axq_dip);
4209 	ASSERT(axq_slot == 0);
4210 
4211 	if (invalidate)
4212 		return;
4213 
4214 	s = DRMACH_BNUM2SLOT(bp->bnum);
4215 	DRMACH_PR("using AXQ casm %d.%d for slot%d.%d\n", axq_exp, axq_slot,
4216 	    e, s);
4217 
4218 	/* invalidate entry */
4219 	drmach_slice_table[e] &= ~0x20;
4220 
4221 	/*
4222 	 * find a slice that routes to expander e. If no match
4223 	 * is found, drmach_slice_table[e] will remain invalid.
4224 	 *
4225 	 * The CASM is a routing table indexed by slice number.
4226 	 * Each element in the table contains permission bits,
4227 	 * a destination expander number and a valid bit. The
4228 	 * valid bit must true for the element to be meaningful.
4229 	 *
4230 	 * CASM entry structure
4231 	 *   Bits 15..6 ignored
4232 	 *   Bit  5	valid
4233 	 *   Bits 0..4	expander number
4234 	 *
4235 	 * NOTE: the for loop is really enumerating the range of slices,
4236 	 * which is ALWAYS equal to the range of expanders. Hence,
4237 	 * AXQ_MAX_EXP is okay to use in this loop.
4238 	 */
4239 	for (slice = 0; slice < AXQ_MAX_EXP; slice++) {
4240 		uint32_t casm = axq_casm_read(axq_exp, axq_slot, slice);
4241 
4242 		if ((casm & 0x20) && (casm & 0x1f) == e)
4243 			drmach_slice_table[e] = 0x20 | slice;
4244 	}
4245 }
4246 
4247 /*
4248  * Get base and bound PAs for slot 1 board lpa programming
4249  * If a cpu/mem board is present in the same expander, use slice
4250  * information corresponding to the CASM.  Otherwise, set base and
4251  * bound PAs to 0.
4252  */
4253 static void
4254 drmach_lpa_bb_get(drmach_board_t *s1bp, uint64_t *basep, uint64_t *boundp)
4255 {
4256 	drmachid_t s0id;
4257 
4258 	ASSERT(mutex_owned(&drmach_slice_table_lock));
4259 	ASSERT(DRMACH_BNUM2SLOT(s1bp->bnum) == 1);
4260 
4261 	*basep = *boundp = 0;
4262 	if (drmach_array_get(drmach_boards, s1bp->bnum - 1, &s0id) == 0 &&
4263 	    s0id != 0) {
4264 
4265 		uint32_t slice;
4266 		if ((slice = drmach_slice_table[DRMACH_BNUM2EXP(s1bp->bnum)])
4267 		    & 0x20) {
4268 			*basep = DRMACH_SLICE_TO_PA(slice & DRMACH_SLICE_MASK);
4269 			*boundp = *basep + DRMACH_MEM_SLICE_SIZE;
4270 		}
4271 	}
4272 }
4273 
4274 
4275 /*
4276  * Reprogram slot 1 lpa's as required.
4277  * The purpose of this routine is maintain the LPA settings of the devices
4278  * in slot 1. To date we know Schizo and Cheetah are the only devices that
4279  * require this attention. The LPA setting must match the slice field in the
4280  * CASM element for the local expander. This field is guaranteed to be
4281  * programmed in accordance with the cacheable address space on the slot 0
4282  * board of the local expander. If no memory is present on the slot 0 board,
4283  * there is no cacheable address space and, hence, the CASM slice field will
4284  * be zero or its valid bit will be false (or both).
4285  */
4286 
4287 static void
4288 drmach_slot1_lpa_set(drmach_board_t *bp)
4289 {
4290 	drmachid_t	id;
4291 	drmach_board_t	*s1bp = NULL;
4292 	int		rv, idx, is_maxcat = 1;
4293 	uint64_t	last_scsr_pa = 0;
4294 	uint64_t	new_basepa, new_boundpa;
4295 
4296 	if (DRMACH_BNUM2SLOT(bp->bnum)) {
4297 		s1bp = bp;
4298 		if (s1bp->devices == NULL) {
4299 			DRMACH_PR("drmach...lpa_set: slot1=%d not present",
4300 			    bp->bnum);
4301 			return;
4302 		}
4303 	} else {
4304 		rv = drmach_array_get(drmach_boards, bp->bnum + 1, &id);
4305 		/* nothing to do when board is not found or has no devices */
4306 		s1bp = id;
4307 		if (rv == -1 || s1bp == NULL || s1bp->devices == NULL) {
4308 			DRMACH_PR("drmach...lpa_set: slot1=%d not present",
4309 			    bp->bnum + 1);
4310 			return;
4311 		}
4312 		ASSERT(DRMACH_IS_BOARD_ID(id));
4313 	}
4314 	mutex_enter(&drmach_slice_table_lock);
4315 	drmach_lpa_bb_get(s1bp, &new_basepa, &new_boundpa);
4316 	DRMACH_PR("drmach_...lpa_set: bnum=%d base=0x%lx bound=0x%lx\n",
4317 	    s1bp->bnum, new_basepa, new_boundpa);
4318 
4319 	rv = drmach_array_first(s1bp->devices, &idx, &id);
4320 	while (rv == 0) {
4321 		if (DRMACH_IS_IO_ID(id)) {
4322 			drmach_io_t *io = id;
4323 
4324 			is_maxcat = 0;
4325 
4326 			/*
4327 			 * Skip all non-Schizo IO devices (only IO nodes
4328 			 * that are Schizo devices have non-zero scsr_pa).
4329 			 * Filter out "other" leaf to avoid writing to the
4330 			 * same Schizo Control/Status Register twice.
4331 			 */
4332 			if (io->scsr_pa && io->scsr_pa != last_scsr_pa) {
4333 				uint64_t scsr;
4334 
4335 				scsr  = lddphysio(io->scsr_pa);
4336 				DRMACH_PR("drmach...lpa_set: old scsr=0x%lx\n",
4337 				    scsr);
4338 				scsr &= ~(DRMACH_LPA_BASE_MASK |
4339 				    DRMACH_LPA_BND_MASK);
4340 				scsr |= DRMACH_PA_TO_LPA_BASE(new_basepa);
4341 				scsr |= DRMACH_PA_TO_LPA_BND(new_boundpa);
4342 
4343 				stdphysio(io->scsr_pa, scsr);
4344 				DRMACH_PR("drmach...lpa_set: new scsr=0x%lx\n",
4345 				    scsr);
4346 
4347 				last_scsr_pa = io->scsr_pa;
4348 			}
4349 		}
4350 		rv = drmach_array_next(s1bp->devices, &idx, &id);
4351 	}
4352 
4353 	if (is_maxcat && DRMACH_L1_SET_LPA(s1bp) && drmach_reprogram_lpa) {
4354 		extern xcfunc_t	drmach_set_lpa;
4355 
4356 		DRMACH_PR("reprogramming maxcat lpa's");
4357 
4358 		mutex_enter(&cpu_lock);
4359 		rv = drmach_array_first(s1bp->devices, &idx, &id);
4360 		while (rv == 0 && id != NULL) {
4361 			if (DRMACH_IS_CPU_ID(id)) {
4362 				int ntries;
4363 				processorid_t cpuid;
4364 
4365 				cpuid = ((drmach_cpu_t *)id)->cpuid;
4366 
4367 				/*
4368 				 * Check for unconfigured or powered-off
4369 				 * MCPUs.  If CPU_READY flag is clear, the
4370 				 * MCPU cannot be xcalled.
4371 				 */
4372 				if ((cpu[cpuid] == NULL) ||
4373 				    (cpu[cpuid]->cpu_flags &
4374 				    CPU_READY) == 0) {
4375 
4376 					rv = drmach_array_next(s1bp->devices,
4377 					    &idx, &id);
4378 					continue;
4379 				}
4380 
4381 				/*
4382 				 * XXX CHEETAH SUPPORT
4383 				 * for cheetah, we need to clear iocage
4384 				 * memory since it will be used for e$ flush
4385 				 * in drmach_set_lpa.
4386 				 */
4387 				if (drmach_is_cheetah) {
4388 					mutex_enter(&drmach_iocage_lock);
4389 					while (drmach_iocage_is_busy)
4390 						cv_wait(&drmach_iocage_cv,
4391 						    &drmach_iocage_lock);
4392 					drmach_iocage_is_busy = 1;
4393 					drmach_iocage_mem_scrub(ecache_size *
4394 					    2);
4395 					mutex_exit(&drmach_iocage_lock);
4396 				}
4397 
4398 				/*
4399 				 * drmach_slice_table[*]
4400 				 *	bit 5	valid
4401 				 *	bit 0:4	slice number
4402 				 *
4403 				 * drmach_xt_mb[*] format for drmach_set_lpa
4404 				 *	bit 7	valid
4405 				 *	bit 6	set null LPA
4406 				 *			(overrides bits 0:4)
4407 				 *	bit 0:4	slice number
4408 				 *
4409 				 * drmach_set_lpa derives processor CBASE and
4410 				 * CBND from bits 6 and 0:4 of drmach_xt_mb.
4411 				 * If bit 6 is set, then CBASE = CBND = 0.
4412 				 * Otherwise, CBASE = slice number;
4413 				 * CBND = slice number + 1.
4414 				 * No action is taken if bit 7 is zero.
4415 				 */
4416 
4417 				mutex_enter(&drmach_xt_mb_lock);
4418 				bzero((void *)drmach_xt_mb,
4419 				    drmach_xt_mb_size);
4420 
4421 				if (new_basepa == 0 && new_boundpa == 0)
4422 					drmach_xt_mb[cpuid] = 0x80 | 0x40;
4423 				else
4424 					drmach_xt_mb[cpuid] = 0x80 |
4425 					    DRMACH_PA_TO_SLICE(new_basepa);
4426 
4427 				drmach_xt_ready = 0;
4428 
4429 				xt_one(cpuid, drmach_set_lpa, NULL, NULL);
4430 
4431 				ntries = drmach_cpu_ntries;
4432 				while (!drmach_xt_ready && ntries) {
4433 					DELAY(drmach_cpu_delay);
4434 					ntries--;
4435 				}
4436 				mutex_exit(&drmach_xt_mb_lock);
4437 				drmach_xt_ready = 0;
4438 
4439 				/*
4440 				 * XXX CHEETAH SUPPORT
4441 				 * for cheetah, we need to clear iocage
4442 				 * memory since it was used for e$ flush
4443 				 * in performed drmach_set_lpa.
4444 				 */
4445 				if (drmach_is_cheetah) {
4446 					mutex_enter(&drmach_iocage_lock);
4447 					drmach_iocage_mem_scrub(ecache_size *
4448 					    2);
4449 					drmach_iocage_is_busy = 0;
4450 					cv_signal(&drmach_iocage_cv);
4451 					mutex_exit(&drmach_iocage_lock);
4452 				}
4453 			}
4454 			rv = drmach_array_next(s1bp->devices, &idx, &id);
4455 		}
4456 		mutex_exit(&cpu_lock);
4457 	}
4458 	mutex_exit(&drmach_slice_table_lock);
4459 }
4460 
4461 /*
4462  * Return the number of connected Panther boards in the domain.
4463  */
4464 static int
4465 drmach_panther_boards(void)
4466 {
4467 	int		rv;
4468 	int		b_idx;
4469 	drmachid_t	b_id;
4470 	drmach_board_t	*bp;
4471 	int		npanther = 0;
4472 
4473 	rv = drmach_array_first(drmach_boards, &b_idx, &b_id);
4474 	while (rv == 0) {
4475 		ASSERT(DRMACH_IS_BOARD_ID(b_id));
4476 		bp = b_id;
4477 
4478 		if (IS_PANTHER(bp->cpu_impl))
4479 			npanther++;
4480 
4481 		rv = drmach_array_next(drmach_boards, &b_idx, &b_id);
4482 	}
4483 
4484 	return (npanther);
4485 }
4486 
4487 /*ARGSUSED*/
4488 sbd_error_t *
4489 drmach_board_disconnect(drmachid_t id, drmach_opts_t *opts)
4490 {
4491 	drmach_board_t	*bp;
4492 	dr_mbox_msg_t	*obufp;
4493 	sbd_error_t	*err = NULL;
4494 
4495 	sc_gptwocfg_cookie_t	scc;
4496 
4497 	if (!DRMACH_IS_BOARD_ID(id))
4498 		return (drerr_new(0, ESTC_INAPPROP, NULL));
4499 	bp = id;
4500 
4501 	/*
4502 	 * Build the casm info portion of the UNCLAIM message.
4503 	 * This must be done prior to calling for saf configurator
4504 	 * deprobe, to ensure that the associated axq instance
4505 	 * is not detached.
4506 	 */
4507 	obufp = kmem_zalloc(sizeof (dr_mbox_msg_t), KM_SLEEP);
4508 	mutex_enter(&drmach_slice_table_lock);
4509 	drmach_msg_memslice_init(obufp->msgdata.dm_ur.mem_slice);
4510 
4511 	/*
4512 	 * If disconnecting slot 0 board, update the casm slice table
4513 	 * info now, for use by drmach_slot1_lpa_set()
4514 	 */
4515 	if (DRMACH_BNUM2SLOT(bp->bnum) == 0)
4516 		drmach_slice_table_update(bp, 1);
4517 
4518 	drmach_msg_memregs_init(obufp->msgdata.dm_ur.mem_regs);
4519 	mutex_exit(&drmach_slice_table_lock);
4520 
4521 	/*
4522 	 * Update LPA information for slot1 board
4523 	 */
4524 	drmach_slot1_lpa_set(bp);
4525 
4526 	/* disable and flush CDC */
4527 	if (axq_cdc_disable_flush_all() != DDI_SUCCESS) {
4528 		axq_cdc_enable_all();	/* paranoia */
4529 		err = DRMACH_INTERNAL_ERROR();
4530 	}
4531 
4532 	/*
4533 	 * call saf configurator for deprobe
4534 	 * It's done now before sending an UNCLAIM message because
4535 	 * IKP will probe boards it doesn't know about <present at boot>
4536 	 * prior to unprobing them.  If this happens after sending the
4537 	 * UNCLAIM, it will cause a dstop for domain transgression error.
4538 	 */
4539 
4540 	if (!err) {
4541 		scc = sc_unprobe_board(bp->bnum);
4542 		axq_cdc_enable_all();
4543 		if (scc != NULL) {
4544 			err = drerr_new(0, ESTC_DEPROBE, bp->cm.name);
4545 		}
4546 	}
4547 
4548 	/*
4549 	 * If disconnecting a board from a Panther domain, wait a fixed-
4550 	 * time delay for pending Safari transactions to complete on the
4551 	 * disconnecting board's processors.  The bus sync list read used
4552 	 * in drmach_shutdown_asm to synchronize with outstanding Safari
4553 	 * transactions assumes no read-bypass-write mode for all memory
4554 	 * controllers.  Since Panther supports read-bypass-write, a
4555 	 * delay is used that is slightly larger than the maximum Safari
4556 	 * timeout value in the Safari/Fireplane Config Reg.
4557 	 */
4558 	if (drmach_panther_boards() > 0 || drmach_unclaim_delay_all) {
4559 		clock_t	stime = ddi_get_lbolt();
4560 
4561 		delay(drv_usectohz(drmach_unclaim_usec_delay));
4562 
4563 		stime = ddi_get_lbolt() - stime;
4564 		DRMACH_PR("delayed %ld ticks (%ld secs) before disconnecting "
4565 		    "board %s from domain\n", stime, stime / hz, bp->cm.name);
4566 	}
4567 
4568 	if (!err) {
4569 		obufp->msgdata.dm_ur.mem_clear = 0;
4570 
4571 		err = drmach_mbox_trans(DRMSG_UNCLAIM, bp->bnum, (caddr_t)obufp,
4572 		    sizeof (dr_mbox_msg_t), (caddr_t)NULL, 0);
4573 
4574 		if (err) {
4575 			/*
4576 			 * if mailbox timeout or unrecoverable error from SC,
4577 			 * board cannot be touched.  Mark the status as
4578 			 * unusable.
4579 			 */
4580 			if ((err->e_code == ESTC_SMS_ERR_UNRECOVERABLE) ||
4581 			    (err->e_code == ESTC_MBXRPLY))
4582 				bp->cond = SBD_COND_UNUSABLE;
4583 			else {
4584 				DRMACH_PR("UNCLAIM failed for bnum=%d\n",
4585 				    bp->bnum);
4586 				DRMACH_PR("calling sc_probe_board: bnum=%d\n",
4587 				    bp->bnum);
4588 				scc = sc_probe_board(bp->bnum);
4589 				if (scc == NULL) {
4590 					cmn_err(CE_WARN,
4591 					"sc_probe_board failed for bnum=%d",
4592 					    bp->bnum);
4593 				} else {
4594 					if (DRMACH_BNUM2SLOT(bp->bnum) == 0) {
4595 						mutex_enter(
4596 						    &drmach_slice_table_lock);
4597 						drmach_slice_table_update(bp,
4598 						    0);
4599 						mutex_exit(
4600 						    &drmach_slice_table_lock);
4601 					}
4602 					drmach_slot1_lpa_set(bp);
4603 				}
4604 			}
4605 		} else {
4606 			bp->connected = 0;
4607 			/*
4608 			 * Now that the board has been successfully detached,
4609 			 * discard platform-specific DIMM serial id information
4610 			 * for the board.
4611 			 */
4612 			if ((DRMACH_BNUM2SLOT(bp->bnum) == 0) &&
4613 			    plat_ecc_capability_sc_get(
4614 			    PLAT_ECC_DIMM_SID_MESSAGE)) {
4615 				(void) plat_discard_mem_sids(
4616 				    DRMACH_BNUM2EXP(bp->bnum));
4617 			}
4618 		}
4619 	}
4620 	kmem_free(obufp, sizeof (dr_mbox_msg_t));
4621 
4622 	return (err);
4623 }
4624 
4625 static int
4626 drmach_get_portid(drmach_node_t *np)
4627 {
4628 	drmach_node_t	pp;
4629 	int		portid;
4630 	char		type[OBP_MAXPROPNAME];
4631 
4632 	if (np->n_getprop(np, "portid", &portid, sizeof (portid)) == 0)
4633 		return (portid);
4634 
4635 	/*
4636 	 * Get the device_type property to see if we should
4637 	 * continue processing this node.
4638 	 */
4639 	if (np->n_getprop(np, "device_type", &type, sizeof (type)) != 0)
4640 		return (-1);
4641 
4642 	/*
4643 	 * If the device is a CPU without a 'portid' property,
4644 	 * it is a CMP core. For such cases, the parent node
4645 	 * has the portid.
4646 	 */
4647 	if (strcmp(type, DRMACH_CPU_NAMEPROP) == 0) {
4648 		if (np->get_parent(np, &pp) != 0)
4649 			return (-1);
4650 
4651 		if (pp.n_getprop(&pp, "portid", &portid, sizeof (portid)) == 0)
4652 			return (portid);
4653 	}
4654 
4655 	return (-1);
4656 }
4657 
4658 /*
4659  * This is a helper function to determine if a given
4660  * node should be considered for a dr operation according
4661  * to predefined dr type nodes and the node's name.
4662  * Formal Parameter : The name of a device node.
4663  * Return Value: -1, name does not map to a valid dr type.
4664  *		 A value greater or equal to 0, name is a valid dr type.
4665  */
4666 static int
4667 drmach_name2type_idx(char *name)
4668 {
4669 	int 	index, ntypes;
4670 
4671 	if (name == NULL)
4672 		return (-1);
4673 
4674 	/*
4675 	 * Determine how many possible types are currently supported
4676 	 * for dr.
4677 	 */
4678 	ntypes = sizeof (drmach_name2type) / sizeof (drmach_name2type[0]);
4679 
4680 	/* Determine if the node's name correspond to a predefined type. */
4681 	for (index = 0; index < ntypes; index++) {
4682 		if (strcmp(drmach_name2type[index].name, name) == 0)
4683 			/* The node is an allowed type for dr. */
4684 			return (index);
4685 	}
4686 
4687 	/*
4688 	 * If the name of the node does not map to any of the
4689 	 * types in the array drmach_name2type then the node is not of
4690 	 * interest to dr.
4691 	 */
4692 	return (-1);
4693 }
4694 
4695 static int
4696 drmach_board_find_devices_cb(drmach_node_walk_args_t *args)
4697 {
4698 	drmach_node_t			*node = args->node;
4699 	drmach_board_cb_data_t		*data = args->data;
4700 	drmach_board_t			*obj = data->obj;
4701 
4702 	int		rv, portid;
4703 	drmachid_t	id;
4704 	drmach_device_t	*device;
4705 	char	name[OBP_MAXDRVNAME];
4706 
4707 	portid = drmach_get_portid(node);
4708 	if (portid == -1) {
4709 		/*
4710 		 * if the node does not have a portid property, then
4711 		 * by that information alone it is known that drmach
4712 		 * is not interested in it.
4713 		 */
4714 		return (0);
4715 	}
4716 	rv = node->n_getprop(node, "name", name, OBP_MAXDRVNAME);
4717 
4718 	/* The node must have a name */
4719 	if (rv)
4720 		return (0);
4721 
4722 	/*
4723 	 * Ignore devices whose portid do not map to this board,
4724 	 * or that their name property is not mapped to a valid
4725 	 * dr device name.
4726 	 */
4727 	if ((drmach_portid2bnum(portid) != obj->bnum) ||
4728 	    (drmach_name2type_idx(name) < 0))
4729 		return (0);
4730 
4731 	/*
4732 	 * Create a device data structure from this node data.
4733 	 * The call may yield nothing if the node is not of interest
4734 	 * to drmach.
4735 	 */
4736 	data->err = drmach_device_new(node, obj, portid, &id);
4737 	if (data->err)
4738 		return (-1);
4739 	else if (!id) {
4740 		/*
4741 		 * drmach_device_new examined the node we passed in
4742 		 * and determined that it was either one not of
4743 		 * interest to drmach or the PIM dr layer.
4744 		 * So, it is skipped.
4745 		 */
4746 		return (0);
4747 	}
4748 
4749 	rv = drmach_array_set(obj->devices, data->ndevs++, id);
4750 	if (rv) {
4751 		data->err = DRMACH_INTERNAL_ERROR();
4752 		return (-1);
4753 	}
4754 
4755 	device = id;
4756 
4757 #ifdef DEBUG
4758 	DRMACH_PR("%d %s %d %p\n", portid, device->type, device->unum, id);
4759 	if (DRMACH_IS_IO_ID(id))
4760 		DRMACH_PR("ndevs = %d dip/node = %p", data->ndevs, node->here);
4761 #endif
4762 
4763 	data->err = (*data->found)(data->a, device->type, device->unum, id);
4764 	return (data->err == NULL ? 0 : -1);
4765 }
4766 
4767 sbd_error_t *
4768 drmach_board_find_devices(drmachid_t id, void *a,
4769 	sbd_error_t *(*found)(void *a, const char *, int, drmachid_t))
4770 {
4771 	drmach_board_t		*bp = (drmach_board_t *)id;
4772 	sbd_error_t		*err;
4773 	int			 max_devices;
4774 	int			 rv;
4775 	drmach_board_cb_data_t	data;
4776 
4777 	if (!DRMACH_IS_BOARD_ID(id))
4778 		return (drerr_new(0, ESTC_INAPPROP, NULL));
4779 
4780 	max_devices  = plat_max_cpu_units_per_board();
4781 	max_devices += plat_max_mem_units_per_board();
4782 	max_devices += plat_max_io_units_per_board();
4783 
4784 	bp->devices = drmach_array_new(0, max_devices);
4785 
4786 	if (bp->tree == NULL)
4787 		bp->tree = drmach_node_new();
4788 
4789 	data.obj = bp;
4790 	data.ndevs = 0;
4791 	data.found = found;
4792 	data.a = a;
4793 	data.err = NULL;
4794 
4795 	mutex_enter(&drmach_slice_table_lock);
4796 	mutex_enter(&drmach_bus_sync_lock);
4797 
4798 	rv = drmach_node_walk(bp->tree, &data, drmach_board_find_devices_cb);
4799 
4800 	drmach_slice_table_update(bp, 0);
4801 	drmach_bus_sync_list_update();
4802 
4803 	mutex_exit(&drmach_bus_sync_lock);
4804 	mutex_exit(&drmach_slice_table_lock);
4805 
4806 	if (rv == 0) {
4807 		err = NULL;
4808 		drmach_slot1_lpa_set(bp);
4809 	} else {
4810 		drmach_array_dispose(bp->devices, drmach_device_dispose);
4811 		bp->devices = NULL;
4812 
4813 		if (data.err)
4814 			err = data.err;
4815 		else
4816 			err = DRMACH_INTERNAL_ERROR();
4817 	}
4818 
4819 	return (err);
4820 }
4821 
4822 int
4823 drmach_board_lookup(int bnum, drmachid_t *id)
4824 {
4825 	int	rv = 0;
4826 
4827 	if (!drmach_initialized && drmach_init() == -1) {
4828 		*id = 0;
4829 		return (-1);
4830 	}
4831 	rw_enter(&drmach_boards_rwlock, RW_WRITER);
4832 	if (drmach_array_get(drmach_boards, bnum, id)) {
4833 		*id = 0;
4834 		rv = -1;
4835 	} else {
4836 		caddr_t		obufp;
4837 		dr_showboard_t	shb;
4838 		sbd_error_t	*err = NULL;
4839 		drmach_board_t	*bp;
4840 
4841 		bp = *id;
4842 
4843 		if (bp)
4844 			rw_downgrade(&drmach_boards_rwlock);
4845 
4846 		obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
4847 		err = drmach_mbox_trans(DRMSG_SHOWBOARD, bnum, obufp,
4848 		    sizeof (dr_proto_hdr_t), (caddr_t)&shb,
4849 		    sizeof (dr_showboard_t));
4850 		kmem_free(obufp, sizeof (dr_proto_hdr_t));
4851 
4852 		if (err) {
4853 			if (err->e_code == ESTC_UNAVAILABLE) {
4854 				*id = 0;
4855 				rv = -1;
4856 			}
4857 			sbd_err_clear(&err);
4858 		} else {
4859 			if (!bp)
4860 				bp = *id  = (drmachid_t)drmach_board_new(bnum);
4861 			bp->connected = (shb.bd_assigned && shb.bd_active);
4862 			bp->empty = shb.slot_empty;
4863 
4864 			switch (shb.test_status) {
4865 				case DR_TEST_STATUS_UNKNOWN:
4866 				case DR_TEST_STATUS_IPOST:
4867 				case DR_TEST_STATUS_ABORTED:
4868 					bp->cond = SBD_COND_UNKNOWN;
4869 					break;
4870 				case DR_TEST_STATUS_PASSED:
4871 					bp->cond = SBD_COND_OK;
4872 					break;
4873 				case DR_TEST_STATUS_FAILED:
4874 					bp->cond = SBD_COND_FAILED;
4875 					break;
4876 				default:
4877 					bp->cond = SBD_COND_UNKNOWN;
4878 				DRMACH_PR("Unknown test status=0x%x from SC\n",
4879 				    shb.test_status);
4880 					break;
4881 			}
4882 			strncpy(bp->type, shb.board_type, sizeof (bp->type));
4883 			bp->assigned = shb.bd_assigned;
4884 			bp->powered = shb.power_on;
4885 		}
4886 	}
4887 	rw_exit(&drmach_boards_rwlock);
4888 	return (rv);
4889 }
4890 
4891 sbd_error_t *
4892 drmach_board_name(int bnum, char *buf, int buflen)
4893 {
4894 	snprintf(buf, buflen, "%s%d", DRMACH_BNUM2SLOT(bnum) ?
4895 	    "IO" : "SB", DRMACH_BNUM2EXP(bnum));
4896 
4897 	return (NULL);
4898 }
4899 
4900 sbd_error_t *
4901 drmach_board_poweroff(drmachid_t id)
4902 {
4903 	drmach_board_t	*bp;
4904 	sbd_error_t	*err;
4905 	drmach_status_t	 stat;
4906 
4907 	if (!DRMACH_IS_BOARD_ID(id))
4908 		return (drerr_new(0, ESTC_INAPPROP, NULL));
4909 	bp = id;
4910 
4911 	err = drmach_board_status(id, &stat);
4912 	if (!err) {
4913 		if (stat.configured || stat.busy)
4914 			err = drerr_new(0, ESTC_CONFIGBUSY, bp->cm.name);
4915 		else {
4916 			caddr_t	obufp;
4917 
4918 			obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
4919 			err = drmach_mbox_trans(DRMSG_POWEROFF, bp->bnum, obufp,
4920 			    sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
4921 			kmem_free(obufp, sizeof (dr_proto_hdr_t));
4922 			if (!err)
4923 				bp->powered = 0;
4924 		}
4925 	}
4926 	return (err);
4927 }
4928 
4929 sbd_error_t *
4930 drmach_board_poweron(drmachid_t id)
4931 {
4932 	drmach_board_t	*bp;
4933 	caddr_t		obufp;
4934 	sbd_error_t	*err;
4935 
4936 	if (!DRMACH_IS_BOARD_ID(id))
4937 		return (drerr_new(0, ESTC_INAPPROP, NULL));
4938 	bp = id;
4939 
4940 	obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
4941 	err = drmach_mbox_trans(DRMSG_POWERON, bp->bnum, obufp,
4942 	    sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
4943 	if (!err)
4944 		bp->powered = 1;
4945 
4946 	kmem_free(obufp, sizeof (dr_proto_hdr_t));
4947 
4948 	return (err);
4949 }
4950 
4951 static sbd_error_t *
4952 drmach_board_release(drmachid_t id)
4953 {
4954 	if (!DRMACH_IS_BOARD_ID(id))
4955 		return (drerr_new(0, ESTC_INAPPROP, NULL));
4956 	return (NULL);
4957 }
4958 
4959 sbd_error_t *
4960 drmach_board_test(drmachid_t id, drmach_opts_t *opts, int force)
4961 {
4962 	drmach_board_t		*bp;
4963 	drmach_device_t		*dp[MAX_CORES_PER_CMP];
4964 	dr_mbox_msg_t		*obufp;
4965 	sbd_error_t		*err;
4966 	dr_testboard_reply_t	tbr;
4967 	int			cpylen;
4968 	char			*copts;
4969 	int			is_io;
4970 	cpu_flag_t		oflags[MAX_CORES_PER_CMP];
4971 
4972 	if (!DRMACH_IS_BOARD_ID(id))
4973 		return (drerr_new(0, ESTC_INAPPROP, NULL));
4974 	bp = id;
4975 
4976 	/*
4977 	 * If the board is an I/O or MAXCAT board, setup I/O cage for
4978 	 * testing. Slot 1 indicates I/O or MAXCAT board.
4979 	 */
4980 
4981 	is_io = DRMACH_BNUM2SLOT(bp->bnum);
4982 
4983 	obufp = kmem_zalloc(sizeof (dr_mbox_msg_t), KM_SLEEP);
4984 
4985 	if (force)
4986 		obufp->msgdata.dm_tb.force = 1;
4987 
4988 	obufp->msgdata.dm_tb.immediate = 1;
4989 
4990 	if ((opts->size > 0) && ((copts = opts->copts) != NULL)) {
4991 		cpylen = (opts->size > DR_HPOPTLEN ? DR_HPOPTLEN : opts->size);
4992 		bcopy(copts, obufp->msgdata.dm_tb.hpost_opts, cpylen);
4993 	}
4994 
4995 	if (is_io) {
4996 		err = drmach_iocage_setup(&obufp->msgdata.dm_tb, dp, oflags);
4997 
4998 		if (err) {
4999 			kmem_free(obufp, sizeof (dr_mbox_msg_t));
5000 			return (err);
5001 		}
5002 	}
5003 
5004 	err = drmach_mbox_trans(DRMSG_TESTBOARD, bp->bnum, (caddr_t)obufp,
5005 	    sizeof (dr_mbox_msg_t), (caddr_t)&tbr, sizeof (tbr));
5006 
5007 	if (!err)
5008 		bp->cond = SBD_COND_OK;
5009 	else
5010 		bp->cond = SBD_COND_UNKNOWN;
5011 
5012 	if ((!err) && (tbr.test_status != DR_TEST_STATUS_PASSED)) {
5013 		/* examine test status */
5014 		switch (tbr.test_status) {
5015 			case DR_TEST_STATUS_IPOST:
5016 				bp->cond = SBD_COND_UNKNOWN;
5017 				err = drerr_new(0, ESTC_TEST_IN_PROGRESS, NULL);
5018 				break;
5019 			case DR_TEST_STATUS_UNKNOWN:
5020 				bp->cond = SBD_COND_UNKNOWN;
5021 				err = drerr_new(1,
5022 				    ESTC_TEST_STATUS_UNKNOWN, NULL);
5023 				break;
5024 			case DR_TEST_STATUS_FAILED:
5025 				bp->cond = SBD_COND_FAILED;
5026 				err = drerr_new(1, ESTC_TEST_FAILED, NULL);
5027 				break;
5028 			case DR_TEST_STATUS_ABORTED:
5029 				bp->cond = SBD_COND_UNKNOWN;
5030 				err = drerr_new(1, ESTC_TEST_ABORTED, NULL);
5031 				break;
5032 			default:
5033 				bp->cond = SBD_COND_UNKNOWN;
5034 				err = drerr_new(1, ESTC_TEST_RESULT_UNKNOWN,
5035 				    NULL);
5036 				break;
5037 		}
5038 	}
5039 
5040 	/*
5041 	 * If I/O cage test was performed, check for availability of the
5042 	 * cpu used.  If cpu has been returned, it's OK to proceed with
5043 	 * reconfiguring it for use.
5044 	 */
5045 	if (is_io) {
5046 		DRMACH_PR("drmach_board_test: tbr.cpu_recovered: %d",
5047 		    tbr.cpu_recovered);
5048 		DRMACH_PR("drmach_board_test: port id: %d",
5049 		    tbr.cpu_portid);
5050 
5051 		/*
5052 		 * Check the cpu_recovered flag in the testboard reply, or
5053 		 * if the testboard request message was not sent to SMS due
5054 		 * to an mboxsc_putmsg() failure, it's OK to recover the
5055 		 * cpu since hpost hasn't touched it.
5056 		 */
5057 		if ((tbr.cpu_recovered && tbr.cpu_portid ==
5058 		    obufp->msgdata.dm_tb.cpu_portid) ||
5059 		    ((err) && (err->e_code == ESTC_MBXRQST))) {
5060 
5061 			int i;
5062 
5063 			mutex_enter(&cpu_lock);
5064 			for (i = 0; i < MAX_CORES_PER_CMP; i++) {
5065 				if (dp[i] != NULL) {
5066 					(void) drmach_iocage_cpu_return(dp[i],
5067 					    oflags[i]);
5068 				}
5069 			}
5070 			mutex_exit(&cpu_lock);
5071 		} else {
5072 			cmn_err(CE_WARN, "Unable to recover port id %d "
5073 			    "after I/O cage test: cpu_recovered=%d, "
5074 			    "returned portid=%d",
5075 			    obufp->msgdata.dm_tb.cpu_portid,
5076 			    tbr.cpu_recovered, tbr.cpu_portid);
5077 		}
5078 		drmach_iocage_mem_return(&tbr);
5079 	}
5080 	kmem_free(obufp, sizeof (dr_mbox_msg_t));
5081 
5082 	return (err);
5083 }
5084 
5085 sbd_error_t *
5086 drmach_board_unassign(drmachid_t id)
5087 {
5088 	drmach_board_t	*bp;
5089 	sbd_error_t	*err;
5090 	drmach_status_t	 stat;
5091 	caddr_t		obufp;
5092 
5093 	rw_enter(&drmach_boards_rwlock, RW_WRITER);
5094 
5095 	if (!DRMACH_IS_BOARD_ID(id)) {
5096 		rw_exit(&drmach_boards_rwlock);
5097 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5098 	}
5099 	bp = id;
5100 
5101 	err = drmach_board_status(id, &stat);
5102 	if (err) {
5103 		rw_exit(&drmach_boards_rwlock);
5104 		return (err);
5105 	}
5106 
5107 	if (stat.configured || stat.busy) {
5108 		err = drerr_new(0, ESTC_CONFIGBUSY, bp->cm.name);
5109 	} else {
5110 
5111 		obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
5112 		err = drmach_mbox_trans(DRMSG_UNASSIGN, bp->bnum, obufp,
5113 		    sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
5114 		kmem_free(obufp, sizeof (dr_proto_hdr_t));
5115 		if (!err) {
5116 			if (drmach_array_set(drmach_boards, bp->bnum, 0) != 0)
5117 				err = DRMACH_INTERNAL_ERROR();
5118 			else
5119 				drmach_board_dispose(bp);
5120 		}
5121 	}
5122 	rw_exit(&drmach_boards_rwlock);
5123 	return (err);
5124 }
5125 
5126 static sbd_error_t *
5127 drmach_read_reg_addr(drmach_device_t *dp, uint64_t *p)
5128 {
5129 	int		len;
5130 	drmach_reg_t	reg;
5131 	drmach_node_t	pp;
5132 	drmach_node_t	*np = dp->node;
5133 
5134 	/*
5135 	 * If the node does not have a portid property,
5136 	 * it represents a CMP device. For a CMP, the reg
5137 	 * property of the parent holds the information of
5138 	 * interest.
5139 	 */
5140 	if (dp->node->n_getproplen(dp->node, "portid", &len) != 0) {
5141 
5142 		if (dp->node->get_parent(dp->node, &pp) != 0) {
5143 			return (DRMACH_INTERNAL_ERROR());
5144 		}
5145 		np = &pp;
5146 	}
5147 
5148 	if (np->n_getproplen(np, "reg", &len) != 0)
5149 		return (DRMACH_INTERNAL_ERROR());
5150 
5151 	if (len != sizeof (reg))
5152 		return (DRMACH_INTERNAL_ERROR());
5153 
5154 	if (np->n_getprop(np, "reg", &reg, sizeof (reg)) != 0)
5155 		return (DRMACH_INTERNAL_ERROR());
5156 
5157 	/* reassemble 64-bit base address */
5158 	*p = ((uint64_t)reg.reg_addr_hi << 32) | reg.reg_addr_lo;
5159 
5160 	return (NULL);
5161 }
5162 
5163 static void
5164 drmach_cpu_read(uint64_t arg1, uint64_t arg2)
5165 {
5166 	uint64_t	*saf_config_reg = (uint64_t *)arg1;
5167 	uint_t		*reg_read = (uint_t *)arg2;
5168 
5169 	*saf_config_reg = lddsafconfig();
5170 	*reg_read = 0x1;
5171 }
5172 
5173 /*
5174  * A return value of 1 indicates success and 0 indicates a failure
5175  */
5176 static int
5177 drmach_cpu_read_scr(drmach_cpu_t *cp, uint64_t *scr)
5178 {
5179 
5180 	int 	rv = 0x0;
5181 
5182 	*scr = 0x0;
5183 
5184 	/*
5185 	 * Confirm cpu was in ready set when xc was issued.
5186 	 * This is done by verifying rv which is
5187 	 * set to 0x1 when xc_one is successful.
5188 	 */
5189 	xc_one(cp->dev.portid, (xcfunc_t *)drmach_cpu_read,
5190 	    (uint64_t)scr, (uint64_t)&rv);
5191 
5192 	return (rv);
5193 
5194 }
5195 
5196 static sbd_error_t *
5197 drmach_cpu_read_cpuid(drmach_cpu_t *cp, processorid_t *cpuid)
5198 {
5199 	drmach_node_t	*np;
5200 
5201 	np = cp->dev.node;
5202 
5203 	/*
5204 	 * If a CPU does not have a portid property, it must
5205 	 * be a CMP device with a cpuid property.
5206 	 */
5207 	if (np->n_getprop(np, "portid", cpuid, sizeof (*cpuid)) != 0) {
5208 
5209 		if (np->n_getprop(np, "cpuid", cpuid, sizeof (*cpuid)) != 0) {
5210 			return (DRMACH_INTERNAL_ERROR());
5211 		}
5212 	}
5213 
5214 	return (NULL);
5215 }
5216 
5217 /* Starcat CMP core id is bit 2 of the cpuid */
5218 #define	DRMACH_COREID_MASK	(1u << 2)
5219 #define	DRMACH_CPUID2SRAM_IDX(id) \
5220 		((id & DRMACH_COREID_MASK) >> 1 | (id & 0x1))
5221 
5222 static sbd_error_t *
5223 drmach_cpu_new(drmach_device_t *proto, drmachid_t *idp)
5224 {
5225 	static void drmach_cpu_dispose(drmachid_t);
5226 	static sbd_error_t *drmach_cpu_release(drmachid_t);
5227 	static sbd_error_t *drmach_cpu_status(drmachid_t, drmach_status_t *);
5228 
5229 	sbd_error_t	*err;
5230 	uint64_t	scr_pa;
5231 	drmach_cpu_t	*cp = NULL;
5232 	pfn_t		pfn;
5233 	uint64_t	cpu_stardrb_offset, cpu_sram_pa;
5234 	int		idx;
5235 	int		impl;
5236 	processorid_t	cpuid;
5237 
5238 	err = drmach_read_reg_addr(proto, &scr_pa);
5239 	if (err) {
5240 		goto fail;
5241 	}
5242 
5243 	cp = kmem_zalloc(sizeof (drmach_cpu_t), KM_SLEEP);
5244 	bcopy(proto, &cp->dev, sizeof (cp->dev));
5245 	cp->dev.node = drmach_node_dup(proto->node);
5246 	cp->dev.cm.isa = (void *)drmach_cpu_new;
5247 	cp->dev.cm.dispose = drmach_cpu_dispose;
5248 	cp->dev.cm.release = drmach_cpu_release;
5249 	cp->dev.cm.status = drmach_cpu_status;
5250 	cp->scr_pa = scr_pa;
5251 
5252 	err = drmach_cpu_read_cpuid(cp, &cpuid);
5253 	if (err) {
5254 		goto fail;
5255 	}
5256 
5257 	err = drmach_cpu_get_impl(cp, &impl);
5258 	if (err) {
5259 		goto fail;
5260 	}
5261 
5262 	cp->cpuid = cpuid;
5263 	cp->coreid = STARCAT_CPUID_TO_COREID(cp->cpuid);
5264 	cp->dev.unum = STARCAT_CPUID_TO_AGENT(cp->cpuid);
5265 
5266 	/*
5267 	 * Init the board cpu type.  Assumes all board cpus are the same type.
5268 	 */
5269 	if (cp->dev.bp->cpu_impl == 0) {
5270 		cp->dev.bp->cpu_impl = impl;
5271 	}
5272 	ASSERT(cp->dev.bp->cpu_impl == impl);
5273 
5274 	/*
5275 	 * XXX CHEETAH SUPPORT
5276 	 * determine if the domain uses Cheetah procs
5277 	 */
5278 	if (drmach_is_cheetah < 0) {
5279 		drmach_is_cheetah = IS_CHEETAH(impl);
5280 	}
5281 
5282 	/*
5283 	 * Initialize TTE for mapping CPU SRAM STARDRB buffer.
5284 	 * The STARDRB buffer (16KB on Cheetah+ boards, 32KB on
5285 	 * Jaguar/Panther boards) is shared by all cpus in a Safari port
5286 	 * pair. Each cpu uses 8KB according to the following layout:
5287 	 *
5288 	 * Page 0:	even numbered Cheetah+'s and Panther/Jaguar core 0's
5289 	 * Page 1:	odd numbered Cheetah+'s and Panther/Jaguar core 0's
5290 	 * Page 2:	even numbered Panther/Jaguar core 1's
5291 	 * Page 3:	odd numbered Panther/Jaguar core 1's
5292 	 */
5293 	idx = DRMACH_CPUID2SRAM_IDX(cp->cpuid);
5294 	cpu_stardrb_offset = cp->dev.bp->stardrb_offset + (PAGESIZE * idx);
5295 	cpu_sram_pa = DRMACH_CPU_SRAM_ADDR + cpu_stardrb_offset;
5296 	pfn = cpu_sram_pa >> PAGESHIFT;
5297 
5298 	ASSERT(drmach_cpu_sram_tte[cp->cpuid].tte_inthi == 0 &&
5299 	    drmach_cpu_sram_tte[cp->cpuid].tte_intlo == 0);
5300 	drmach_cpu_sram_tte[cp->cpuid].tte_inthi = TTE_PFN_INTHI(pfn) |
5301 	    TTE_VALID_INT | TTE_SZ_INT(TTE8K);
5302 	drmach_cpu_sram_tte[cp->cpuid].tte_intlo = TTE_PFN_INTLO(pfn) |
5303 	    TTE_HWWR_INT | TTE_PRIV_INT | TTE_LCK_INT;
5304 
5305 	DRMACH_PR("drmach_cpu_new: cpuid=%d, coreid=%d, stardrb_offset=0x%lx, "
5306 	    "cpu_sram_offset=0x%lx, idx=%d\n", cp->cpuid, cp->coreid,
5307 	    cp->dev.bp->stardrb_offset, cpu_stardrb_offset, idx);
5308 
5309 	snprintf(cp->dev.cm.name, sizeof (cp->dev.cm.name), "%s%d",
5310 	    cp->dev.type, cp->dev.unum);
5311 
5312 	*idp = (drmachid_t)cp;
5313 	return (NULL);
5314 
5315 fail:
5316 	if (cp) {
5317 		drmach_node_dispose(cp->dev.node);
5318 		kmem_free(cp, sizeof (*cp));
5319 	}
5320 
5321 	*idp = (drmachid_t)0;
5322 	return (err);
5323 }
5324 
5325 static void
5326 drmach_cpu_dispose(drmachid_t id)
5327 {
5328 	drmach_cpu_t	*self;
5329 	processorid_t	cpuid;
5330 
5331 	ASSERT(DRMACH_IS_CPU_ID(id));
5332 
5333 	self = id;
5334 	if (self->dev.node)
5335 		drmach_node_dispose(self->dev.node);
5336 
5337 	cpuid = self->cpuid;
5338 	ASSERT(TTE_IS_VALID(&drmach_cpu_sram_tte[cpuid]) &&
5339 	    TTE_IS_8K(&drmach_cpu_sram_tte[cpuid]) &&
5340 	    TTE_IS_PRIVILEGED(&drmach_cpu_sram_tte[cpuid]) &&
5341 	    TTE_IS_LOCKED(&drmach_cpu_sram_tte[cpuid]));
5342 	drmach_cpu_sram_tte[cpuid].tte_inthi = 0;
5343 	drmach_cpu_sram_tte[cpuid].tte_intlo = 0;
5344 
5345 	kmem_free(self, sizeof (*self));
5346 }
5347 
5348 static int
5349 drmach_cpu_start(struct cpu *cp)
5350 {
5351 	extern xcfunc_t	drmach_set_lpa;
5352 	extern void	restart_other_cpu(int);
5353 	int		cpuid = cp->cpu_id;
5354 	int		rv, bnum;
5355 	drmach_board_t	*bp;
5356 
5357 	ASSERT(MUTEX_HELD(&cpu_lock));
5358 	ASSERT(cpunodes[cpuid].nodeid != (pnode_t)0);
5359 
5360 	cp->cpu_flags &= ~CPU_POWEROFF;
5361 
5362 	/*
5363 	 * NOTE: restart_other_cpu pauses cpus during the
5364 	 *	 slave cpu start.  This helps to quiesce the
5365 	 *	 bus traffic a bit which makes the tick sync
5366 	 *	 routine in the prom more robust.
5367 	 */
5368 	DRMACH_PR("COLD START for cpu (%d)\n", cpuid);
5369 
5370 	if (prom_hotaddcpu(cpuid) != 0) {
5371 		cmn_err(CE_PANIC, "prom_hotaddcpu() for cpuid=%d failed.",
5372 		    cpuid);
5373 	}
5374 
5375 	restart_other_cpu(cpuid);
5376 
5377 	bnum = drmach_portid2bnum(cpunodes[cpuid].portid);
5378 	rv = drmach_array_get(drmach_boards, bnum, (drmachid_t)&bp);
5379 	if (rv == -1 || bp == NULL) {
5380 		DRMACH_PR("drmach_cpu_start: cannot read board info for "
5381 		    "cpuid=%d: rv=%d, bp=%p\n", cpuid, rv, bp);
5382 	} else if (DRMACH_L1_SET_LPA(bp) && drmach_reprogram_lpa) {
5383 		int exp;
5384 		int ntries;
5385 
5386 		mutex_enter(&drmach_xt_mb_lock);
5387 		mutex_enter(&drmach_slice_table_lock);
5388 		bzero((void *)drmach_xt_mb, drmach_xt_mb_size);
5389 
5390 		/*
5391 		 * drmach_slice_table[*]
5392 		 *	bit 5	valid
5393 		 *	bit 0:4	slice number
5394 		 *
5395 		 * drmach_xt_mb[*] format for drmach_set_lpa
5396 		 *	bit 7	valid
5397 		 *	bit 6	set null LPA (overrides bits 0:4)
5398 		 *	bit 0:4	slice number
5399 		 *
5400 		 * drmach_set_lpa derives processor CBASE and CBND
5401 		 * from bits 6 and 0:4 of drmach_xt_mb.  If bit 6 is
5402 		 * set, then CBASE = CBND = 0. Otherwise, CBASE = slice
5403 		 * number; CBND = slice number + 1.
5404 		 * No action is taken if bit 7 is zero.
5405 		 */
5406 		exp = (cpuid >> 5) & 0x1f;
5407 		if (drmach_slice_table[exp] & 0x20) {
5408 			drmach_xt_mb[cpuid] = 0x80 |
5409 			    (drmach_slice_table[exp] & 0x1f);
5410 		} else {
5411 			drmach_xt_mb[cpuid] = 0x80 | 0x40;
5412 		}
5413 
5414 		drmach_xt_ready = 0;
5415 
5416 		xt_one(cpuid, drmach_set_lpa, NULL, NULL);
5417 
5418 		ntries = drmach_cpu_ntries;
5419 		while (!drmach_xt_ready && ntries) {
5420 			DELAY(drmach_cpu_delay);
5421 			ntries--;
5422 		}
5423 
5424 		mutex_exit(&drmach_slice_table_lock);
5425 		mutex_exit(&drmach_xt_mb_lock);
5426 
5427 		DRMACH_PR(
5428 		    "waited %d out of %d tries for drmach_set_lpa on cpu%d",
5429 		    drmach_cpu_ntries - ntries, drmach_cpu_ntries,
5430 		    cp->cpu_id);
5431 	}
5432 
5433 	xt_one(cpuid, vtag_flushpage_tl1, (uint64_t)drmach_cpu_sram_va,
5434 	    (uint64_t)ksfmmup);
5435 
5436 	return (0);
5437 }
5438 
5439 /*
5440  * A detaching CPU is xcalled with an xtrap to drmach_cpu_stop_self() after
5441  * it has been offlined. The function of this routine is to get the cpu
5442  * spinning in a safe place. The requirement is that the system will not
5443  * reference anything on the detaching board (memory and i/o is detached
5444  * elsewhere) and that the CPU not reference anything on any other board
5445  * in the system.  This isolation is required during and after the writes
5446  * to the domain masks to remove the board from the domain.
5447  *
5448  * To accomplish this isolation the following is done:
5449  *	1) Create a locked mapping to the STARDRB data buffer located
5450  *	   in this cpu's sram. There is one TTE per cpu, initialized in
5451  *	   drmach_cpu_new(). The cpuid is used to select which TTE to use.
5452  *	   Each Safari port pair shares the CPU SRAM on a Serengeti CPU/MEM
5453  *	   board. The STARDRB buffer is 16KB on Cheetah+ boards, 32KB on Jaguar
5454  *	   boards. Each STARDRB buffer is logically divided by DR into one
5455  *	   8KB page per cpu (or Jaguar core).
5456  *	2) Copy the target function (drmach_shutdown_asm) into buffer.
5457  *	3) Jump to function now in the cpu sram.
5458  *	   Function will:
5459  *	   3.1) Flush its Ecache (displacement).
5460  *	   3.2) Flush its Dcache with HW mechanism.
5461  *	   3.3) Flush its Icache with HW mechanism.
5462  *	   3.4) Flush all valid and _unlocked_ D-TLB and I-TLB entries.
5463  *	   3.5) Set LPA to NULL
5464  *	   3.6) Clear xt_mb to signal completion. Note: cache line is
5465  *	        recovered by drmach_cpu_poweroff().
5466  *	4) Jump into an infinite loop.
5467  */
5468 
5469 static void
5470 drmach_cpu_stop_self(void)
5471 {
5472 	extern void drmach_shutdown_asm(uint64_t, uint64_t, int, int, uint64_t);
5473 	extern void drmach_shutdown_asm_end(void);
5474 
5475 	tte_t		*tte;
5476 	uint_t		*p, *q;
5477 	uint64_t	 stack_pointer;
5478 
5479 	ASSERT(((ptrdiff_t)drmach_shutdown_asm_end -
5480 	    (ptrdiff_t)drmach_shutdown_asm) < PAGESIZE);
5481 
5482 	tte = &drmach_cpu_sram_tte[CPU->cpu_id];
5483 	ASSERT(TTE_IS_VALID(tte) && TTE_IS_8K(tte) && TTE_IS_PRIVILEGED(tte) &&
5484 	    TTE_IS_LOCKED(tte));
5485 	sfmmu_dtlb_ld_kva(drmach_cpu_sram_va, tte);
5486 	sfmmu_itlb_ld_kva(drmach_cpu_sram_va, tte);
5487 
5488 	/* copy text. standard bcopy not designed to work in nc space */
5489 	p = (uint_t *)drmach_cpu_sram_va;
5490 	q = (uint_t *)drmach_shutdown_asm;
5491 	while (q < (uint_t *)drmach_shutdown_asm_end)
5492 		*p++ = *q++;
5493 
5494 	/* zero to assist debug */
5495 	q = (uint_t *)(drmach_cpu_sram_va + PAGESIZE);
5496 	while (p < q)
5497 		*p++ = 0;
5498 
5499 	/* a parking spot for the stack pointer */
5500 	stack_pointer = (uint64_t)q;
5501 
5502 	/* call copy of drmach_shutdown_asm */
5503 	(*(void (*)())drmach_cpu_sram_va)(
5504 	    stack_pointer,
5505 	    drmach_iocage_paddr,
5506 	    cpunodes[CPU->cpu_id].ecache_size,
5507 	    cpunodes[CPU->cpu_id].ecache_linesize,
5508 	    va_to_pa((void *)&drmach_xt_mb[CPU->cpu_id]));
5509 }
5510 
5511 static void
5512 drmach_cpu_shutdown_self(void)
5513 {
5514 	cpu_t		*cp = CPU;
5515 	int		cpuid = cp->cpu_id;
5516 	extern void	flush_windows(void);
5517 
5518 	flush_windows();
5519 
5520 	(void) spl8();
5521 
5522 	ASSERT(cp->cpu_intr_actv == 0);
5523 	ASSERT(cp->cpu_thread == cp->cpu_idle_thread ||
5524 	    cp->cpu_thread == cp->cpu_startup_thread);
5525 
5526 	cp->cpu_flags = CPU_OFFLINE | CPU_QUIESCED | CPU_POWEROFF;
5527 
5528 	drmach_cpu_stop_self();
5529 
5530 	cmn_err(CE_PANIC, "CPU %d FAILED TO SHUTDOWN", cpuid);
5531 }
5532 
5533 static sbd_error_t *
5534 drmach_cpu_release(drmachid_t id)
5535 {
5536 	drmach_cpu_t	*cp;
5537 	struct cpu	*cpu;
5538 	sbd_error_t	*err;
5539 
5540 	if (!DRMACH_IS_CPU_ID(id))
5541 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5542 	cp = id;
5543 
5544 	ASSERT(MUTEX_HELD(&cpu_lock));
5545 
5546 	cpu = cpu_get(cp->cpuid);
5547 	if (cpu == NULL)
5548 		err = DRMACH_INTERNAL_ERROR();
5549 	else
5550 		err = NULL;
5551 
5552 	return (err);
5553 }
5554 
5555 static sbd_error_t *
5556 drmach_cpu_status(drmachid_t id, drmach_status_t *stat)
5557 {
5558 	drmach_cpu_t	*cp;
5559 	drmach_device_t	*dp;
5560 
5561 	ASSERT(DRMACH_IS_CPU_ID(id));
5562 	cp = id;
5563 	dp = &cp->dev;
5564 
5565 	stat->assigned = dp->bp->assigned;
5566 	stat->powered = dp->bp->powered;
5567 	mutex_enter(&cpu_lock);
5568 	stat->configured = (cpu_get(cp->cpuid) != NULL);
5569 	mutex_exit(&cpu_lock);
5570 	stat->busy = dp->busy;
5571 	strncpy(stat->type, dp->type, sizeof (stat->type));
5572 	stat->info[0] = '\0';
5573 
5574 	return (NULL);
5575 }
5576 
5577 sbd_error_t *
5578 drmach_cpu_disconnect(drmachid_t id)
5579 {
5580 	if (!DRMACH_IS_CPU_ID(id))
5581 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5582 
5583 	return (NULL);
5584 }
5585 
5586 sbd_error_t *
5587 drmach_cpu_get_id(drmachid_t id, processorid_t *cpuid)
5588 {
5589 	drmach_cpu_t	*cpu;
5590 
5591 	if (!DRMACH_IS_CPU_ID(id))
5592 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5593 	cpu = id;
5594 
5595 	*cpuid = cpu->cpuid;
5596 	return (NULL);
5597 }
5598 
5599 sbd_error_t *
5600 drmach_cpu_get_impl(drmachid_t id, int *ip)
5601 {
5602 	drmach_node_t	*np;
5603 	int		impl;
5604 
5605 	if (!DRMACH_IS_CPU_ID(id))
5606 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5607 
5608 	np = ((drmach_device_t *)id)->node;
5609 
5610 	if (np->n_getprop(np, "implementation#", &impl, sizeof (impl)) == -1) {
5611 		return (DRMACH_INTERNAL_ERROR());
5612 	}
5613 
5614 	*ip = impl;
5615 
5616 	return (NULL);
5617 }
5618 
5619 /*
5620  * Flush this cpu's ecache, then ensure all outstanding safari
5621  * transactions have retired.
5622  */
5623 void
5624 drmach_cpu_flush_ecache_sync(void)
5625 {
5626 	uint64_t *p;
5627 
5628 	ASSERT(curthread->t_bound_cpu == CPU);
5629 
5630 	cpu_flush_ecache();
5631 
5632 	mutex_enter(&drmach_bus_sync_lock);
5633 	for (p = drmach_bus_sync_list; *p; p++)
5634 		(void) ldphys(*p);
5635 	mutex_exit(&drmach_bus_sync_lock);
5636 
5637 	cpu_flush_ecache();
5638 }
5639 
5640 sbd_error_t *
5641 drmach_get_dip(drmachid_t id, dev_info_t **dip)
5642 {
5643 	drmach_device_t	*dp;
5644 
5645 	if (!DRMACH_IS_DEVICE_ID(id))
5646 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5647 	dp = id;
5648 
5649 	*dip = dp->node->n_getdip(dp->node);
5650 	return (NULL);
5651 }
5652 
5653 sbd_error_t *
5654 drmach_io_is_attached(drmachid_t id, int *yes)
5655 {
5656 	drmach_device_t *dp;
5657 	dev_info_t	*dip;
5658 	int state;
5659 
5660 	if (!DRMACH_IS_IO_ID(id))
5661 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5662 	dp = id;
5663 
5664 	dip = dp->node->n_getdip(dp->node);
5665 	if (dip == NULL) {
5666 		*yes = 0;
5667 		return (NULL);
5668 	}
5669 
5670 	state = ddi_get_devstate(dip);
5671 	*yes = i_ddi_devi_attached(dip) || (state == DDI_DEVSTATE_UP);
5672 
5673 	return (NULL);
5674 }
5675 
5676 static int
5677 drmach_dip_is_schizo_xmits_0_pci_b(dev_info_t *dip)
5678 {
5679 	char			dtype[OBP_MAXPROPNAME];
5680 	int			portid;
5681 	uint_t			pci_csr_base;
5682 	struct pci_phys_spec	*regbuf = NULL;
5683 	int			rv, len;
5684 
5685 	ASSERT(dip != NULL);
5686 	rv = ddi_getproplen(DDI_DEV_T_ANY, dip, 0, "device_type", &len);
5687 	if ((rv != DDI_PROP_SUCCESS) || (len > sizeof (dtype)))
5688 		return (0);
5689 
5690 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, 0, "device_type",
5691 	    (caddr_t)dtype, &len) == DDI_PROP_SUCCESS) {
5692 
5693 		if (strncmp(dtype, "pci", 3) == 0) {
5694 
5695 			/*
5696 			 * Get safari portid. All schizo/xmits 0
5697 			 * safari IDs end in 0x1C.
5698 			 */
5699 			rv = ddi_getproplen(DDI_DEV_T_ANY, dip, 0, "portid",
5700 			    &len);
5701 
5702 			if ((rv != DDI_PROP_SUCCESS) ||
5703 			    (len > sizeof (portid)))
5704 				return (0);
5705 
5706 			rv = ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, 0,
5707 			    "portid", (caddr_t)&portid, &len);
5708 
5709 			if (rv != DDI_PROP_SUCCESS)
5710 				return (0);
5711 
5712 			if ((portid & 0x1F) != 0x1C)
5713 				return (0);
5714 
5715 			if (ddi_getlongprop(DDI_DEV_T_ANY, dip,
5716 			    DDI_PROP_DONTPASS, "reg", (caddr_t)&regbuf,
5717 			    &len) == DDI_PROP_SUCCESS) {
5718 
5719 				pci_csr_base = regbuf[0].pci_phys_mid &
5720 				    PCI_CONF_ADDR_MASK;
5721 				kmem_free(regbuf, len);
5722 				/*
5723 				 * All PCI B-Leafs are at configspace 0x70.0000.
5724 				 */
5725 				if (pci_csr_base == 0x700000)
5726 					return (1);
5727 			}
5728 		}
5729 	}
5730 	return (0);
5731 }
5732 
5733 #define	SCHIZO_BINDING_NAME		"pci108e,8001"
5734 #define	XMITS_BINDING_NAME		"pci108e,8002"
5735 
5736 /*
5737  * Verify if the dip is an instance of MAN 'eri'.
5738  */
5739 static int
5740 drmach_dip_is_man_eri(dev_info_t *dip)
5741 {
5742 	struct pci_phys_spec	*regbuf = NULL;
5743 	dev_info_t		*parent_dip;
5744 	char			*name;
5745 	uint_t			pci_device;
5746 	uint_t			pci_function;
5747 	int			len;
5748 
5749 	if (dip == NULL)
5750 		return (0);
5751 	/*
5752 	 * Verify if the parent is schizo(xmits)0 and pci B leaf.
5753 	 */
5754 	if (((parent_dip = ddi_get_parent(dip)) == NULL) ||
5755 	    ((name = ddi_binding_name(parent_dip)) == NULL))
5756 		return (0);
5757 	if (strcmp(name, SCHIZO_BINDING_NAME) != 0) {
5758 		/*
5759 		 * This RIO could be on XMITS, so get the dip to
5760 		 * XMITS PCI Leaf.
5761 		 */
5762 		if ((parent_dip = ddi_get_parent(parent_dip)) == NULL)
5763 			return (0);
5764 		if (((name = ddi_binding_name(parent_dip)) == NULL) ||
5765 		    (strcmp(name, XMITS_BINDING_NAME) != 0)) {
5766 			return (0);
5767 		}
5768 	}
5769 	if (!drmach_dip_is_schizo_xmits_0_pci_b(parent_dip))
5770 		return (0);
5771 	/*
5772 	 * Finally make sure it is the MAN eri.
5773 	 */
5774 	if (ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
5775 	    "reg", (caddr_t)&regbuf, &len) == DDI_PROP_SUCCESS) {
5776 
5777 		pci_device = PCI_REG_DEV_G(regbuf->pci_phys_hi);
5778 		pci_function = PCI_REG_FUNC_G(regbuf->pci_phys_hi);
5779 		kmem_free(regbuf, len);
5780 
5781 		/*
5782 		 * The network function of the RIO ASIC will always be
5783 		 * device 3 and function 1 ("network@3,1").
5784 		 */
5785 		if ((pci_device == 3) && (pci_function == 1))
5786 			return (1);
5787 	}
5788 	return (0);
5789 }
5790 
5791 typedef struct {
5792 	int		iosram_inst;
5793 	dev_info_t	*eri_dip;
5794 	int		bnum;
5795 } drmach_io_inst_t;
5796 
5797 int
5798 drmach_board_find_io_insts(dev_info_t *dip, void *args)
5799 {
5800 	drmach_io_inst_t	*ios = (drmach_io_inst_t *)args;
5801 
5802 	int	rv;
5803 	int	len;
5804 	int	portid;
5805 	char	name[OBP_MAXDRVNAME];
5806 
5807 	rv = ddi_getproplen(DDI_DEV_T_ANY, dip, 0, "portid", &len);
5808 
5809 	if ((rv != DDI_PROP_SUCCESS) || (len > sizeof (portid))) {
5810 		return (DDI_WALK_CONTINUE);
5811 	}
5812 
5813 	rv = ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, 0,
5814 	    "portid", (caddr_t)&portid, &len);
5815 	if (rv != DDI_PROP_SUCCESS)
5816 		return (DDI_WALK_CONTINUE);
5817 
5818 	/* ignore devices that are not on this board */
5819 	if (drmach_portid2bnum(portid) != ios->bnum)
5820 		return (DDI_WALK_CONTINUE);
5821 
5822 	if ((ios->iosram_inst < 0) || (ios->eri_dip == NULL)) {
5823 		rv = ddi_getproplen(DDI_DEV_T_ANY, dip, 0, "name", &len);
5824 		if (rv == DDI_PROP_SUCCESS) {
5825 
5826 			rv = ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
5827 			    0, "name",
5828 			    (caddr_t)name, &len);
5829 			if (rv != DDI_PROP_SUCCESS)
5830 				return (DDI_WALK_CONTINUE);
5831 
5832 			if (strncmp("iosram", name, 6) == 0) {
5833 				ios->iosram_inst = ddi_get_instance(dip);
5834 				if (ios->eri_dip == NULL)
5835 					return (DDI_WALK_CONTINUE);
5836 				else
5837 					return (DDI_WALK_TERMINATE);
5838 			} else {
5839 				if (drmach_dip_is_man_eri(dip)) {
5840 					ASSERT(ios->eri_dip == NULL);
5841 					ndi_hold_devi(dip);
5842 					ios->eri_dip = dip;
5843 					if (ios->iosram_inst < 0)
5844 						return (DDI_WALK_CONTINUE);
5845 					else
5846 						return (DDI_WALK_TERMINATE);
5847 				}
5848 			}
5849 		}
5850 	}
5851 	return (DDI_WALK_CONTINUE);
5852 }
5853 
5854 sbd_error_t *
5855 drmach_io_pre_release(drmachid_t id)
5856 {
5857 	drmach_io_inst_t	ios;
5858 	drmach_board_t		*bp;
5859 	int			rv = 0;
5860 	sbd_error_t		*err = NULL;
5861 	drmach_device_t		*dp;
5862 	dev_info_t		*rdip;
5863 	int			circ;
5864 
5865 	if (!DRMACH_IS_IO_ID(id))
5866 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5867 	dp = id;
5868 	bp = dp->bp;
5869 
5870 	rdip = dp->node->n_getdip(dp->node);
5871 
5872 	/* walk device tree to find iosram instance for the board */
5873 	ios.iosram_inst = -1;
5874 	ios.eri_dip = NULL;
5875 	ios.bnum = bp->bnum;
5876 
5877 	ndi_devi_enter(rdip, &circ);
5878 	ddi_walk_devs(ddi_get_child(rdip), drmach_board_find_io_insts,
5879 	    (void *)&ios);
5880 
5881 	DRMACH_PR("drmach_io_pre_release: bnum=%d iosram=%d eri=0x%p\n",
5882 	    ios.bnum, ios.iosram_inst, ios.eri_dip);
5883 	ndi_devi_exit(rdip, circ);
5884 
5885 	if (ios.eri_dip) {
5886 		/*
5887 		 * Release hold acquired in drmach_board_find_io_insts()
5888 		 */
5889 		ndi_rele_devi(ios.eri_dip);
5890 	}
5891 	if (ios.iosram_inst >= 0) {
5892 		/* call for tunnel switch */
5893 		do {
5894 			DRMACH_PR("calling iosram_switchfrom(%d)\n",
5895 			    ios.iosram_inst);
5896 			rv = iosram_switchfrom(ios.iosram_inst);
5897 			if (rv)
5898 				DRMACH_PR("iosram_switchfrom returned %d\n",
5899 				    rv);
5900 		} while (rv == EAGAIN);
5901 
5902 		if (rv)
5903 			err = drerr_new(0, ESTC_IOSWITCH, NULL);
5904 	}
5905 	return (err);
5906 }
5907 
5908 sbd_error_t *
5909 drmach_io_unrelease(drmachid_t id)
5910 {
5911 	dev_info_t	*dip;
5912 	sbd_error_t	*err = NULL;
5913 	drmach_device_t	*dp;
5914 
5915 	if (!DRMACH_IS_IO_ID(id))
5916 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5917 	dp = id;
5918 
5919 	dip = dp->node->n_getdip(dp->node);
5920 
5921 	if (dip == NULL)
5922 		err = DRMACH_INTERNAL_ERROR();
5923 	else {
5924 		int (*func)(dev_info_t *dip);
5925 
5926 		func = (int (*)(dev_info_t *))kobj_getsymvalue("man_dr_attach",
5927 		    0);
5928 
5929 		if (func) {
5930 			drmach_io_inst_t ios;
5931 			dev_info_t	*pdip;
5932 			int		circ;
5933 
5934 			/*
5935 			 * Walk device tree to find rio dip for the board
5936 			 * Since we are not interested in iosram instance here,
5937 			 * initialize it to 0, so that the walk terminates as
5938 			 * soon as eri dip is found.
5939 			 */
5940 			ios.iosram_inst = 0;
5941 			ios.eri_dip = NULL;
5942 			ios.bnum = dp->bp->bnum;
5943 
5944 			if (pdip = ddi_get_parent(dip)) {
5945 				ndi_hold_devi(pdip);
5946 				ndi_devi_enter(pdip, &circ);
5947 			}
5948 			/*
5949 			 * Root node doesn't have to be held in any way.
5950 			 */
5951 			ddi_walk_devs(dip, drmach_board_find_io_insts,
5952 			    (void *)&ios);
5953 
5954 			if (pdip) {
5955 				ndi_devi_exit(pdip, circ);
5956 				ndi_rele_devi(pdip);
5957 			}
5958 
5959 			DRMACH_PR("drmach_io_unrelease: bnum=%d eri=0x%p\n",
5960 			    ios.bnum, ios.eri_dip);
5961 
5962 			if (ios.eri_dip) {
5963 				DRMACH_PR("calling man_dr_attach\n");
5964 				if ((*func)(ios.eri_dip))
5965 					err = drerr_new(0, ESTC_NWSWITCH, NULL);
5966 				/*
5967 				 * Release hold acquired in
5968 				 * drmach_board_find_io_insts()
5969 				 */
5970 				ndi_rele_devi(ios.eri_dip);
5971 			}
5972 		} else
5973 			DRMACH_PR("man_dr_attach NOT present\n");
5974 	}
5975 	return (err);
5976 }
5977 
5978 static sbd_error_t *
5979 drmach_io_release(drmachid_t id)
5980 {
5981 	dev_info_t	*dip;
5982 	sbd_error_t	*err = NULL;
5983 	drmach_device_t	*dp;
5984 
5985 	if (!DRMACH_IS_IO_ID(id))
5986 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5987 	dp = id;
5988 
5989 	dip = dp->node->n_getdip(dp->node);
5990 
5991 	if (dip == NULL)
5992 		err = DRMACH_INTERNAL_ERROR();
5993 	else {
5994 		int (*func)(dev_info_t *dip);
5995 
5996 		func = (int (*)(dev_info_t *))kobj_getsymvalue("man_dr_detach",
5997 		    0);
5998 
5999 		if (func) {
6000 			drmach_io_inst_t ios;
6001 			dev_info_t	*pdip;
6002 			int		circ;
6003 
6004 			/*
6005 			 * Walk device tree to find rio dip for the board
6006 			 * Since we are not interested in iosram instance here,
6007 			 * initialize it to 0, so that the walk terminates as
6008 			 * soon as eri dip is found.
6009 			 */
6010 			ios.iosram_inst = 0;
6011 			ios.eri_dip = NULL;
6012 			ios.bnum = dp->bp->bnum;
6013 
6014 			if (pdip = ddi_get_parent(dip)) {
6015 				ndi_hold_devi(pdip);
6016 				ndi_devi_enter(pdip, &circ);
6017 			}
6018 			/*
6019 			 * Root node doesn't have to be held in any way.
6020 			 */
6021 			ddi_walk_devs(dip, drmach_board_find_io_insts,
6022 			    (void *)&ios);
6023 
6024 			if (pdip) {
6025 				ndi_devi_exit(pdip, circ);
6026 				ndi_rele_devi(pdip);
6027 			}
6028 
6029 			DRMACH_PR("drmach_io_release: bnum=%d eri=0x%p\n",
6030 			    ios.bnum, ios.eri_dip);
6031 
6032 			if (ios.eri_dip) {
6033 				DRMACH_PR("calling man_dr_detach\n");
6034 				if ((*func)(ios.eri_dip))
6035 					err = drerr_new(0, ESTC_NWSWITCH, NULL);
6036 				/*
6037 				 * Release hold acquired in
6038 				 * drmach_board_find_io_insts()
6039 				 */
6040 				ndi_rele_devi(ios.eri_dip);
6041 			}
6042 		} else
6043 			DRMACH_PR("man_dr_detach NOT present\n");
6044 	}
6045 	return (err);
6046 }
6047 
6048 sbd_error_t *
6049 drmach_io_post_release(drmachid_t id)
6050 {
6051 	char 		*path;
6052 	dev_info_t	*rdip;
6053 	drmach_device_t	*dp;
6054 
6055 	if (!DRMACH_IS_DEVICE_ID(id))
6056 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6057 	dp = id;
6058 
6059 	rdip = dp->node->n_getdip(dp->node);
6060 
6061 	/*
6062 	 * Always called after drmach_unconfigure() which on Starcat
6063 	 * unconfigures the branch but doesn't remove it so the
6064 	 * dip must always exist.
6065 	 */
6066 	ASSERT(rdip);
6067 
6068 	ASSERT(e_ddi_branch_held(rdip));
6069 #ifdef DEBUG
6070 	path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6071 	(void) ddi_pathname(rdip, path);
6072 	DRMACH_PR("post_release dip path is: %s\n", path);
6073 	kmem_free(path, MAXPATHLEN);
6074 #endif
6075 
6076 	if (strcmp(dp->type, DRMACH_DEVTYPE_PCI) == 0) {
6077 		if (schpc_remove_pci(rdip)) {
6078 			DRMACH_PR("schpc_remove_pci failed\n");
6079 			return (drerr_new(0, ESBD_OFFLINE, NULL));
6080 		} else {
6081 			DRMACH_PR("schpc_remove_pci succeeded\n");
6082 		}
6083 	}
6084 
6085 	return (NULL);
6086 }
6087 
6088 sbd_error_t *
6089 drmach_io_post_attach(drmachid_t id)
6090 {
6091 	int		circ;
6092 	dev_info_t	*dip;
6093 	dev_info_t	*pdip;
6094 	drmach_device_t	*dp;
6095 	drmach_io_inst_t ios;
6096 
6097 	if (!DRMACH_IS_DEVICE_ID(id))
6098 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6099 	dp = id;
6100 
6101 	dip = dp->node->n_getdip(dp->node);
6102 
6103 	/*
6104 	 * We held the branch rooted at dip earlier, so at a minimum the
6105 	 * root i.e. dip must be present in the device tree.
6106 	 */
6107 	ASSERT(dip);
6108 
6109 	if (strcmp(dp->type, DRMACH_DEVTYPE_PCI) == 0) {
6110 		if (schpc_add_pci(dip)) {
6111 			DRMACH_PR("schpc_add_pci failed\n");
6112 		} else {
6113 			DRMACH_PR("schpc_add_pci succeeded\n");
6114 		}
6115 	}
6116 
6117 	/*
6118 	 * Walk device tree to find rio dip for the board
6119 	 * Since we are not interested in iosram instance here,
6120 	 * initialize it to 0, so that the walk terminates as
6121 	 * soon as eri dip is found.
6122 	 */
6123 	ios.iosram_inst = 0;
6124 	ios.eri_dip = NULL;
6125 	ios.bnum = dp->bp->bnum;
6126 
6127 	if (pdip = ddi_get_parent(dip)) {
6128 		ndi_hold_devi(pdip);
6129 		ndi_devi_enter(pdip, &circ);
6130 	}
6131 	/*
6132 	 * Root node doesn't have to be held in any way.
6133 	 */
6134 	ddi_walk_devs(dip, drmach_board_find_io_insts, (void *)&ios);
6135 	if (pdip) {
6136 		ndi_devi_exit(pdip, circ);
6137 		ndi_rele_devi(pdip);
6138 	}
6139 
6140 	DRMACH_PR("drmach_io_post_attach: bnum=%d eri=0x%p\n", ios.bnum,
6141 	    ios.eri_dip);
6142 
6143 	if (ios.eri_dip) {
6144 		int (*func)(dev_info_t *dip);
6145 
6146 		func =
6147 		    (int (*)(dev_info_t *))kobj_getsymvalue("man_dr_attach", 0);
6148 
6149 		if (func) {
6150 			DRMACH_PR("calling man_dr_attach\n");
6151 			(void) (*func)(ios.eri_dip);
6152 		} else {
6153 			DRMACH_PR("man_dr_attach NOT present\n");
6154 		}
6155 
6156 		/*
6157 		 * Release hold acquired in drmach_board_find_io_insts()
6158 		 */
6159 		ndi_rele_devi(ios.eri_dip);
6160 
6161 	}
6162 
6163 	return (NULL);
6164 }
6165 
6166 static sbd_error_t *
6167 drmach_io_status(drmachid_t id, drmach_status_t *stat)
6168 {
6169 	drmach_device_t *dp;
6170 	sbd_error_t	*err;
6171 	int		 configured;
6172 
6173 	ASSERT(DRMACH_IS_IO_ID(id));
6174 	dp = id;
6175 
6176 	err = drmach_io_is_attached(id, &configured);
6177 	if (err)
6178 		return (err);
6179 
6180 	stat->assigned = dp->bp->assigned;
6181 	stat->powered = dp->bp->powered;
6182 	stat->configured = (configured != 0);
6183 	stat->busy = dp->busy;
6184 	strncpy(stat->type, dp->type, sizeof (stat->type));
6185 	stat->info[0] = '\0';
6186 
6187 	return (NULL);
6188 }
6189 
6190 sbd_error_t *
6191 drmach_mem_init_size(drmachid_t id)
6192 {
6193 	drmach_mem_t	*mp;
6194 	sbd_error_t	*err;
6195 	gdcd_t		*gdcd;
6196 	mem_chunk_t	*chunk;
6197 	uint64_t	 chunks, pa, mask, sz;
6198 
6199 	if (!DRMACH_IS_MEM_ID(id))
6200 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6201 	mp = id;
6202 
6203 	err = drmach_mem_get_base_physaddr(id, &pa);
6204 	if (err)
6205 		return (err);
6206 
6207 	mask = ~ (DRMACH_MEM_SLICE_SIZE - 1);
6208 	pa &= mask;
6209 
6210 	gdcd = drmach_gdcd_new();
6211 	if (gdcd == NULL)
6212 		return (DRMACH_INTERNAL_ERROR());
6213 
6214 	sz = 0;
6215 	chunk = gdcd->dcd_chunk_list.dcl_chunk;
6216 	chunks = gdcd->dcd_chunk_list.dcl_chunks;
6217 	while (chunks-- != 0) {
6218 		if ((chunk->mc_base_pa & mask) == pa) {
6219 			sz += chunk->mc_mbytes * 1048576;
6220 		}
6221 
6222 		++chunk;
6223 	}
6224 	mp->nbytes = sz;
6225 
6226 	drmach_gdcd_dispose(gdcd);
6227 	return (NULL);
6228 }
6229 
6230 /*
6231  * Hardware registers are organized into consecutively
6232  * addressed registers.  The reg property's hi and lo fields
6233  * together describe the base address of the register set for
6234  * this memory-controller.  Register descriptions and offsets
6235  * (from the base address) are as follows:
6236  *
6237  * Description				Offset	Size (bytes)
6238  * Memory Timing Control Register I	0x00	8
6239  * Memory Timing Control Register II	0x08	8
6240  * Memory Address Decoding Register I	0x10	8
6241  * Memory Address Decoding Register II	0x18	8
6242  * Memory Address Decoding Register III	0x20	8
6243  * Memory Address Decoding Register IV	0x28	8
6244  * Memory Address Control Register	0x30	8
6245  * Memory Timing Control Register III	0x38	8
6246  * Memory Timing Control Register IV	0x40	8
6247  * Memory Timing Control Register V  	0x48	8 (Jaguar, Panther only)
6248  * EMU Activity Status Register		0x50	8 (Panther only)
6249  *
6250  * Only the Memory Address Decoding Register and EMU Activity Status
6251  * Register addresses are needed for DRMACH.
6252  */
6253 static sbd_error_t *
6254 drmach_mem_new(drmach_device_t *proto, drmachid_t *idp)
6255 {
6256 	static void drmach_mem_dispose(drmachid_t);
6257 	static sbd_error_t *drmach_mem_release(drmachid_t);
6258 	static sbd_error_t *drmach_mem_status(drmachid_t, drmach_status_t *);
6259 
6260 	sbd_error_t	*err;
6261 	uint64_t	 madr_pa;
6262 	drmach_mem_t	*mp;
6263 	int		 bank, count;
6264 
6265 	err = drmach_read_reg_addr(proto, &madr_pa);
6266 	if (err)
6267 		return (err);
6268 
6269 	mp = kmem_zalloc(sizeof (drmach_mem_t), KM_SLEEP);
6270 	bcopy(proto, &mp->dev, sizeof (mp->dev));
6271 	mp->dev.node = drmach_node_dup(proto->node);
6272 	mp->dev.cm.isa = (void *)drmach_mem_new;
6273 	mp->dev.cm.dispose = drmach_mem_dispose;
6274 	mp->dev.cm.release = drmach_mem_release;
6275 	mp->dev.cm.status = drmach_mem_status;
6276 	mp->madr_pa = madr_pa;
6277 
6278 	snprintf(mp->dev.cm.name, sizeof (mp->dev.cm.name), "%s", mp->dev.type);
6279 
6280 	for (count = bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
6281 		uint64_t madr;
6282 
6283 		drmach_mem_read_madr(mp, bank, &madr);
6284 		if (madr & DRMACH_MC_VALID_MASK) {
6285 			count += 1;
6286 			break;
6287 		}
6288 	}
6289 
6290 	/*
6291 	 * If none of the banks had their valid bit set, that means
6292 	 * post did not configure this MC to participate in the
6293 	 * domain.  So, pretend this node does not exist by returning
6294 	 * a drmachid of zero.
6295 	 */
6296 	if (count == 0) {
6297 		/* drmach_mem_dispose frees board mem list */
6298 		drmach_node_dispose(mp->dev.node);
6299 		kmem_free(mp, sizeof (*mp));
6300 		*idp = (drmachid_t)0;
6301 		return (NULL);
6302 	}
6303 
6304 	/*
6305 	 * Only one mem unit per board is exposed to the
6306 	 * PIM layer.  The first mem unit encountered during
6307 	 * tree walk is used to represent all mem units on
6308 	 * the same board.
6309 	 */
6310 	if (mp->dev.bp->mem == NULL) {
6311 		/* start list of mem units on this board */
6312 		mp->dev.bp->mem = mp;
6313 
6314 		/*
6315 		 * force unum to zero since this is the only mem unit
6316 		 * that will be visible to the PIM layer.
6317 		 */
6318 		mp->dev.unum = 0;
6319 
6320 		/*
6321 		 * board memory size kept in this mem unit only
6322 		 */
6323 		err = drmach_mem_init_size(mp);
6324 		if (err) {
6325 			mp->dev.bp->mem = NULL;
6326 			/* drmach_mem_dispose frees board mem list */
6327 			drmach_node_dispose(mp->dev.node);
6328 			kmem_free(mp, sizeof (*mp));
6329 			*idp = (drmachid_t)0;
6330 			return (NULL);
6331 		}
6332 
6333 		/*
6334 		 * allow this instance (the first encountered on this board)
6335 		 * to be visible to the PIM layer.
6336 		 */
6337 		*idp = (drmachid_t)mp;
6338 	} else {
6339 		drmach_mem_t *lp;
6340 
6341 		/* hide this mem instance behind the first. */
6342 		for (lp = mp->dev.bp->mem; lp->next; lp = lp->next)
6343 			;
6344 		lp->next = mp;
6345 
6346 		/*
6347 		 * hide this instance from the caller.
6348 		 * See drmach_board_find_devices_cb() for details.
6349 		 */
6350 		*idp = (drmachid_t)0;
6351 	}
6352 
6353 	return (NULL);
6354 }
6355 
6356 static void
6357 drmach_mem_dispose(drmachid_t id)
6358 {
6359 	drmach_mem_t *mp, *next;
6360 	drmach_board_t *bp;
6361 
6362 	ASSERT(DRMACH_IS_MEM_ID(id));
6363 
6364 	mutex_enter(&drmach_bus_sync_lock);
6365 
6366 	mp = id;
6367 	bp = mp->dev.bp;
6368 
6369 	do {
6370 		if (mp->dev.node)
6371 			drmach_node_dispose(mp->dev.node);
6372 
6373 		next = mp->next;
6374 		kmem_free(mp, sizeof (*mp));
6375 		mp = next;
6376 	} while (mp);
6377 
6378 	bp->mem = NULL;
6379 
6380 	drmach_bus_sync_list_update();
6381 	mutex_exit(&drmach_bus_sync_lock);
6382 }
6383 
6384 sbd_error_t *
6385 drmach_mem_add_span(drmachid_t id, uint64_t basepa, uint64_t size)
6386 {
6387 	pfn_t		basepfn = (pfn_t)(basepa >> PAGESHIFT);
6388 	pgcnt_t		npages = (pgcnt_t)(size >> PAGESHIFT);
6389 	int		rv;
6390 
6391 	ASSERT(size != 0);
6392 
6393 	if (!DRMACH_IS_MEM_ID(id))
6394 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6395 
6396 	rv = kcage_range_add(basepfn, npages, KCAGE_DOWN);
6397 	if (rv == ENOMEM) {
6398 		cmn_err(CE_WARN, "%lu megabytes not available"
6399 		    " to kernel cage", size >> 20);
6400 	} else if (rv != 0) {
6401 		/* catch this in debug kernels */
6402 		ASSERT(0);
6403 
6404 		cmn_err(CE_WARN, "unexpected kcage_range_add"
6405 		    " return value %d", rv);
6406 	}
6407 
6408 	return (NULL);
6409 }
6410 
6411 sbd_error_t *
6412 drmach_mem_del_span(drmachid_t id, uint64_t basepa, uint64_t size)
6413 {
6414 	pfn_t		 basepfn = (pfn_t)(basepa >> PAGESHIFT);
6415 	pgcnt_t		 npages = (pgcnt_t)(size >> PAGESHIFT);
6416 	int		 rv;
6417 
6418 	if (!DRMACH_IS_MEM_ID(id))
6419 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6420 
6421 	if (size > 0) {
6422 		rv = kcage_range_delete_post_mem_del(basepfn, npages);
6423 		if (rv != 0) {
6424 			cmn_err(CE_WARN,
6425 			    "unexpected kcage_range_delete_post_mem_del"
6426 			    " return value %d", rv);
6427 			return (DRMACH_INTERNAL_ERROR());
6428 		}
6429 	}
6430 
6431 	return (NULL);
6432 }
6433 
6434 sbd_error_t *
6435 drmach_mem_disable(drmachid_t id)
6436 {
6437 	if (!DRMACH_IS_MEM_ID(id))
6438 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6439 	else
6440 		return (NULL);
6441 }
6442 
6443 sbd_error_t *
6444 drmach_mem_enable(drmachid_t id)
6445 {
6446 	if (!DRMACH_IS_MEM_ID(id))
6447 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6448 	else
6449 		return (NULL);
6450 }
6451 
6452 sbd_error_t *
6453 drmach_mem_get_alignment(drmachid_t id, uint64_t *mask)
6454 {
6455 #define	MB(mb) ((mb) * 1048576ull)
6456 
6457 	static struct {
6458 		uint_t		uk;
6459 		uint64_t	segsz;
6460 	}  uk2segsz[] = {
6461 		{ 0x003,	MB(256)	  },
6462 		{ 0x007,	MB(512)	  },
6463 		{ 0x00f,	MB(1024)  },
6464 		{ 0x01f,	MB(2048)  },
6465 		{ 0x03f,	MB(4096)  },
6466 		{ 0x07f,	MB(8192)  },
6467 		{ 0x0ff,	MB(16384) },
6468 		{ 0x1ff,	MB(32768) },
6469 		{ 0x3ff,	MB(65536) },
6470 		{ 0x7ff,	MB(131072) }
6471 	};
6472 	static int len = sizeof (uk2segsz) / sizeof (uk2segsz[0]);
6473 
6474 #undef MB
6475 
6476 	uint64_t	 largest_sz = 0;
6477 	drmach_mem_t	*mp;
6478 
6479 	if (!DRMACH_IS_MEM_ID(id))
6480 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6481 
6482 	/* prime the result with a default value */
6483 	*mask = (DRMACH_MEM_SLICE_SIZE - 1);
6484 
6485 	for (mp = id; mp; mp = mp->next) {
6486 		int bank;
6487 
6488 		for (bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
6489 			int		i;
6490 			uint_t		uk;
6491 			uint64_t	madr;
6492 
6493 			/* get register value, extract uk and normalize */
6494 			drmach_mem_read_madr(mp, bank, &madr);
6495 
6496 			if (!(madr & DRMACH_MC_VALID_MASK))
6497 				continue;
6498 
6499 			uk = DRMACH_MC_UK(madr);
6500 
6501 			/* match uk value */
6502 			for (i = 0; i < len; i++)
6503 				if (uk == uk2segsz[i].uk)
6504 					break;
6505 
6506 			if (i < len) {
6507 				uint64_t sz = uk2segsz[i].segsz;
6508 
6509 				/*
6510 				 * remember largest segment size,
6511 				 * update mask result
6512 				 */
6513 				if (sz > largest_sz) {
6514 					largest_sz = sz;
6515 					*mask = sz - 1;
6516 				}
6517 			} else {
6518 				/*
6519 				 * uk not in table, punt using
6520 				 * entire slice size. no longer any
6521 				 * reason to check other banks.
6522 				 */
6523 				*mask = (DRMACH_MEM_SLICE_SIZE - 1);
6524 				return (NULL);
6525 			}
6526 		}
6527 	}
6528 
6529 	return (NULL);
6530 }
6531 
6532 sbd_error_t *
6533 drmach_mem_get_base_physaddr(drmachid_t id, uint64_t *base_addr)
6534 {
6535 	drmach_mem_t *mp;
6536 
6537 	if (!DRMACH_IS_MEM_ID(id))
6538 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6539 
6540 	*base_addr = (uint64_t)-1;
6541 	for (mp = id; mp; mp = mp->next) {
6542 		int bank;
6543 
6544 		for (bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
6545 			uint64_t addr, madr;
6546 
6547 			drmach_mem_read_madr(mp, bank, &madr);
6548 			if (madr & DRMACH_MC_VALID_MASK) {
6549 				addr = DRMACH_MC_UM_TO_PA(madr) |
6550 				    DRMACH_MC_LM_TO_PA(madr);
6551 
6552 				if (addr < *base_addr)
6553 					*base_addr = addr;
6554 			}
6555 		}
6556 	}
6557 
6558 	/* should not happen, but ... */
6559 	if (*base_addr == (uint64_t)-1)
6560 		return (DRMACH_INTERNAL_ERROR());
6561 
6562 	return (NULL);
6563 }
6564 
6565 void
6566 drmach_bus_sync_list_update(void)
6567 {
6568 	int		rv, idx, cnt = 0;
6569 	drmachid_t	id;
6570 
6571 	ASSERT(MUTEX_HELD(&drmach_bus_sync_lock));
6572 
6573 	rv = drmach_array_first(drmach_boards, &idx, &id);
6574 	while (rv == 0) {
6575 		drmach_board_t		*bp = id;
6576 		drmach_mem_t		*mp = bp->mem;
6577 
6578 		while (mp) {
6579 			int bank;
6580 
6581 			for (bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
6582 				uint64_t madr;
6583 
6584 				drmach_mem_read_madr(mp, bank, &madr);
6585 				if (madr & DRMACH_MC_VALID_MASK) {
6586 					uint64_t pa;
6587 
6588 					pa  = DRMACH_MC_UM_TO_PA(madr);
6589 					pa |= DRMACH_MC_LM_TO_PA(madr);
6590 
6591 					/*
6592 					 * The list is zero terminated.
6593 					 * Offset the pa by a doubleword
6594 					 * to avoid confusing a pa value of
6595 					 * of zero with the terminator.
6596 					 */
6597 					pa += sizeof (uint64_t);
6598 
6599 					drmach_bus_sync_list[cnt++] = pa;
6600 				}
6601 			}
6602 
6603 			mp = mp->next;
6604 		}
6605 
6606 		rv = drmach_array_next(drmach_boards, &idx, &id);
6607 	}
6608 
6609 	drmach_bus_sync_list[cnt] = 0;
6610 }
6611 
6612 sbd_error_t *
6613 drmach_mem_get_memlist(drmachid_t id, struct memlist **ml)
6614 {
6615 	sbd_error_t	*err;
6616 	struct memlist	*mlist;
6617 	gdcd_t		*gdcd;
6618 	mem_chunk_t	*chunk;
6619 	uint64_t	 chunks, pa, mask;
6620 
6621 	err = drmach_mem_get_base_physaddr(id, &pa);
6622 	if (err)
6623 		return (err);
6624 
6625 	gdcd = drmach_gdcd_new();
6626 	if (gdcd == NULL)
6627 		return (DRMACH_INTERNAL_ERROR());
6628 
6629 	mask = ~ (DRMACH_MEM_SLICE_SIZE - 1);
6630 	pa &= mask;
6631 
6632 	mlist = NULL;
6633 	chunk = gdcd->dcd_chunk_list.dcl_chunk;
6634 	chunks = gdcd->dcd_chunk_list.dcl_chunks;
6635 	while (chunks-- != 0) {
6636 		if ((chunk->mc_base_pa & mask) == pa) {
6637 			mlist = memlist_add_span(mlist, chunk->mc_base_pa,
6638 			    chunk->mc_mbytes * 1048576);
6639 		}
6640 
6641 		++chunk;
6642 	}
6643 
6644 	drmach_gdcd_dispose(gdcd);
6645 
6646 #ifdef DEBUG
6647 	DRMACH_PR("GDCD derived memlist:");
6648 	memlist_dump(mlist);
6649 #endif
6650 
6651 	*ml = mlist;
6652 	return (NULL);
6653 }
6654 
6655 sbd_error_t *
6656 drmach_mem_get_size(drmachid_t id, uint64_t *bytes)
6657 {
6658 	drmach_mem_t	*mp;
6659 
6660 	if (!DRMACH_IS_MEM_ID(id))
6661 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6662 	mp = id;
6663 
6664 	ASSERT(mp->nbytes != 0);
6665 	*bytes = mp->nbytes;
6666 
6667 	return (NULL);
6668 }
6669 
6670 sbd_error_t *
6671 drmach_mem_get_slice_size(drmachid_t id, uint64_t *bytes)
6672 {
6673 	sbd_error_t	*err;
6674 	drmach_device_t	*mp;
6675 
6676 	if (!DRMACH_IS_MEM_ID(id))
6677 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6678 	mp = id;
6679 
6680 	switch (DRMACH_BNUM2SLOT(mp->bp->bnum)) {
6681 		case 0:	*bytes = DRMACH_MEM_USABLE_SLICE_SIZE;
6682 			err = NULL;
6683 			break;
6684 
6685 		case 1: *bytes = 0;
6686 			err = NULL;
6687 			break;
6688 
6689 		default:
6690 			err = DRMACH_INTERNAL_ERROR();
6691 			break;
6692 	}
6693 
6694 	return (err);
6695 }
6696 
6697 processorid_t drmach_mem_cpu_affinity_nail;
6698 
6699 processorid_t
6700 drmach_mem_cpu_affinity(drmachid_t id)
6701 {
6702 	drmach_device_t	*mp;
6703 	drmach_board_t	*bp;
6704 	processorid_t	 cpuid;
6705 
6706 	if (!DRMACH_IS_MEM_ID(id))
6707 		return (CPU_CURRENT);
6708 
6709 	if (drmach_mem_cpu_affinity_nail) {
6710 		cpuid = drmach_mem_cpu_affinity_nail;
6711 
6712 		if (cpuid < 0 || cpuid > NCPU)
6713 			return (CPU_CURRENT);
6714 
6715 		mutex_enter(&cpu_lock);
6716 		if (cpu[cpuid] == NULL || !CPU_ACTIVE(cpu[cpuid]))
6717 			cpuid = CPU_CURRENT;
6718 		mutex_exit(&cpu_lock);
6719 
6720 		return (cpuid);
6721 	}
6722 
6723 	/* try to choose a proc on the target board */
6724 	mp = id;
6725 	bp = mp->bp;
6726 	if (bp->devices) {
6727 		int		 rv;
6728 		int		 d_idx;
6729 		drmachid_t	 d_id;
6730 
6731 		rv = drmach_array_first(bp->devices, &d_idx, &d_id);
6732 		while (rv == 0) {
6733 			if (DRMACH_IS_CPU_ID(d_id)) {
6734 				drmach_cpu_t	*cp = d_id;
6735 
6736 				mutex_enter(&cpu_lock);
6737 				cpuid = cp->cpuid;
6738 				if (cpu[cpuid] && CPU_ACTIVE(cpu[cpuid])) {
6739 					mutex_exit(&cpu_lock);
6740 					return (cpuid);
6741 				} else {
6742 					mutex_exit(&cpu_lock);
6743 				}
6744 			}
6745 
6746 			rv = drmach_array_next(bp->devices, &d_idx, &d_id);
6747 		}
6748 	}
6749 
6750 	/* otherwise, this proc, wherever it is */
6751 	return (CPU_CURRENT);
6752 }
6753 
6754 static sbd_error_t *
6755 drmach_mem_release(drmachid_t id)
6756 {
6757 	if (!DRMACH_IS_MEM_ID(id))
6758 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6759 	return (NULL);
6760 }
6761 
6762 static sbd_error_t *
6763 drmach_mem_status(drmachid_t id, drmach_status_t *stat)
6764 {
6765 	drmach_mem_t	*mp;
6766 	sbd_error_t	*err;
6767 	uint64_t	 pa, slice_size;
6768 	struct memlist	*ml;
6769 
6770 	ASSERT(DRMACH_IS_MEM_ID(id));
6771 	mp = id;
6772 
6773 	/* get starting physical address of target memory */
6774 	err = drmach_mem_get_base_physaddr(id, &pa);
6775 	if (err)
6776 		return (err);
6777 
6778 	/* round down to slice boundary */
6779 	slice_size = DRMACH_MEM_SLICE_SIZE;
6780 	pa &= ~ (slice_size - 1);
6781 
6782 	/* stop at first span that is in slice */
6783 	memlist_read_lock();
6784 	for (ml = phys_install; ml; ml = ml->next)
6785 		if (ml->address >= pa && ml->address < pa + slice_size)
6786 			break;
6787 	memlist_read_unlock();
6788 
6789 	stat->assigned = mp->dev.bp->assigned;
6790 	stat->powered = mp->dev.bp->powered;
6791 	stat->configured = (ml != NULL);
6792 	stat->busy = mp->dev.busy;
6793 	strncpy(stat->type, mp->dev.type, sizeof (stat->type));
6794 	stat->info[0] = '\0';
6795 
6796 	return (NULL);
6797 }
6798 
6799 sbd_error_t *
6800 drmach_board_deprobe(drmachid_t id)
6801 {
6802 	drmach_board_t	*bp;
6803 	sbd_error_t	*err = NULL;
6804 
6805 	if (!DRMACH_IS_BOARD_ID(id))
6806 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6807 	bp = id;
6808 
6809 	if (bp->tree) {
6810 		drmach_node_dispose(bp->tree);
6811 		bp->tree = NULL;
6812 	}
6813 	if (bp->devices) {
6814 		drmach_array_dispose(bp->devices, drmach_device_dispose);
6815 		bp->devices = NULL;
6816 		bp->mem = NULL;  /* TODO: still needed? */
6817 	}
6818 	return (err);
6819 }
6820 
6821 /*ARGSUSED1*/
6822 static sbd_error_t *
6823 drmach_pt_showlpa(drmachid_t id, drmach_opts_t *opts)
6824 {
6825 	drmach_device_t	*dp;
6826 	uint64_t	val;
6827 	int		err = 1;
6828 
6829 	if (DRMACH_IS_CPU_ID(id)) {
6830 		drmach_cpu_t *cp = id;
6831 		if (drmach_cpu_read_scr(cp, &val))
6832 			err = 0;
6833 	} else if (DRMACH_IS_IO_ID(id) && ((drmach_io_t *)id)->scsr_pa != 0) {
6834 		drmach_io_t *io = id;
6835 		val = lddphysio(io->scsr_pa);
6836 		err = 0;
6837 	}
6838 	if (err)
6839 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6840 
6841 	dp = id;
6842 	uprintf("showlpa %s::%s portid %d, base pa %lx, bound pa %lx\n",
6843 	    dp->bp->cm.name,
6844 	    dp->cm.name,
6845 	    dp->portid,
6846 	    DRMACH_LPA_BASE_TO_PA(val),
6847 	    DRMACH_LPA_BND_TO_PA(val));
6848 
6849 	return (NULL);
6850 }
6851 
6852 /*ARGSUSED*/
6853 static sbd_error_t *
6854 drmach_pt_ikprobe(drmachid_t id, drmach_opts_t *opts)
6855 {
6856 	drmach_board_t		*bp = (drmach_board_t *)id;
6857 	sbd_error_t		*err;
6858 	sc_gptwocfg_cookie_t	scc;
6859 
6860 	if (!DRMACH_IS_BOARD_ID(id))
6861 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6862 
6863 	/* do saf configurator stuff */
6864 	DRMACH_PR("calling sc_probe_board for bnum=%d\n", bp->bnum);
6865 	scc = sc_probe_board(bp->bnum);
6866 	if (scc == NULL) {
6867 		err = drerr_new(0, ESTC_PROBE, bp->cm.name);
6868 		return (err);
6869 	}
6870 
6871 	return (err);
6872 }
6873 
6874 /*ARGSUSED*/
6875 static sbd_error_t *
6876 drmach_pt_ikdeprobe(drmachid_t id, drmach_opts_t *opts)
6877 {
6878 	drmach_board_t	*bp;
6879 	sbd_error_t	*err = NULL;
6880 	sc_gptwocfg_cookie_t	scc;
6881 
6882 	if (!DRMACH_IS_BOARD_ID(id))
6883 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6884 	bp = id;
6885 
6886 	cmn_err(CE_CONT, "DR: in-kernel unprobe board %d\n", bp->bnum);
6887 	scc = sc_unprobe_board(bp->bnum);
6888 	if (scc != NULL) {
6889 		err = drerr_new(0, ESTC_DEPROBE, bp->cm.name);
6890 	}
6891 
6892 	if (err == NULL)
6893 		err = drmach_board_deprobe(id);
6894 
6895 	return (err);
6896 }
6897 
6898 static sbd_error_t *
6899 drmach_pt_readmem(drmachid_t id, drmach_opts_t *opts)
6900 {
6901 	_NOTE(ARGUNUSED(id))
6902 	_NOTE(ARGUNUSED(opts))
6903 
6904 	struct memlist	*ml;
6905 	uint64_t	src_pa;
6906 	uint64_t	dst_pa;
6907 	uint64_t	dst;
6908 
6909 	dst_pa = va_to_pa(&dst);
6910 
6911 	memlist_read_lock();
6912 	for (ml = phys_install; ml; ml = ml->next) {
6913 		uint64_t	nbytes;
6914 
6915 		src_pa = ml->address;
6916 		nbytes = ml->size;
6917 
6918 		while (nbytes != 0ull) {
6919 
6920 			/* copy 32 bytes at src_pa to dst_pa */
6921 			bcopy32_il(src_pa, dst_pa);
6922 
6923 			/* increment by 32 bytes */
6924 			src_pa += (4 * sizeof (uint64_t));
6925 
6926 			/* decrement by 32 bytes */
6927 			nbytes -= (4 * sizeof (uint64_t));
6928 		}
6929 	}
6930 	memlist_read_unlock();
6931 
6932 	return (NULL);
6933 }
6934 
6935 static sbd_error_t *
6936 drmach_pt_recovercpu(drmachid_t id, drmach_opts_t *opts)
6937 {
6938 	_NOTE(ARGUNUSED(opts))
6939 
6940 	drmach_cpu_t	*cp;
6941 
6942 	if (!DRMACH_IS_CPU_ID(id))
6943 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6944 	cp = id;
6945 
6946 	mutex_enter(&cpu_lock);
6947 	(void) drmach_iocage_cpu_return(&(cp->dev),
6948 	    CPU_ENABLE | CPU_EXISTS | CPU_READY | CPU_RUNNING);
6949 	mutex_exit(&cpu_lock);
6950 
6951 	return (NULL);
6952 }
6953 
6954 /*
6955  * Starcat DR passthrus are for debugging purposes only.
6956  */
6957 static struct {
6958 	const char	*name;
6959 	sbd_error_t	*(*handler)(drmachid_t id, drmach_opts_t *opts);
6960 } drmach_pt_arr[] = {
6961 	{ "showlpa",		drmach_pt_showlpa		},
6962 	{ "ikprobe",		drmach_pt_ikprobe		},
6963 	{ "ikdeprobe",		drmach_pt_ikdeprobe		},
6964 	{ "readmem",		drmach_pt_readmem		},
6965 	{ "recovercpu",		drmach_pt_recovercpu		},
6966 
6967 	/* the following line must always be last */
6968 	{ NULL,			NULL				}
6969 };
6970 
6971 /*ARGSUSED*/
6972 sbd_error_t *
6973 drmach_passthru(drmachid_t id, drmach_opts_t *opts)
6974 {
6975 	int		i;
6976 	sbd_error_t	*err;
6977 
6978 	i = 0;
6979 	while (drmach_pt_arr[i].name != NULL) {
6980 		int len = strlen(drmach_pt_arr[i].name);
6981 
6982 		if (strncmp(drmach_pt_arr[i].name, opts->copts, len) == 0)
6983 			break;
6984 
6985 		i += 1;
6986 	}
6987 
6988 	if (drmach_pt_arr[i].name == NULL)
6989 		err = drerr_new(0, ESTC_UNKPTCMD, opts->copts);
6990 	else
6991 		err = (*drmach_pt_arr[i].handler)(id, opts);
6992 
6993 	return (err);
6994 }
6995 
6996 sbd_error_t *
6997 drmach_release(drmachid_t id)
6998 {
6999 	drmach_common_t *cp;
7000 
7001 	if (!DRMACH_IS_DEVICE_ID(id))
7002 		return (drerr_new(0, ESTC_INAPPROP, NULL));
7003 	cp = id;
7004 
7005 	return (cp->release(id));
7006 }
7007 
7008 sbd_error_t *
7009 drmach_status(drmachid_t id, drmach_status_t *stat)
7010 {
7011 	drmach_common_t *cp;
7012 	sbd_error_t	*err;
7013 
7014 	rw_enter(&drmach_boards_rwlock, RW_READER);
7015 
7016 	if (!DRMACH_IS_ID(id)) {
7017 		rw_exit(&drmach_boards_rwlock);
7018 		return (drerr_new(0, ESTC_NOTID, NULL));
7019 	}
7020 
7021 	cp = id;
7022 
7023 	err = cp->status(id, stat);
7024 	rw_exit(&drmach_boards_rwlock);
7025 	return (err);
7026 }
7027 
7028 static sbd_error_t *
7029 drmach_i_status(drmachid_t id, drmach_status_t *stat)
7030 {
7031 	drmach_common_t *cp;
7032 
7033 	if (!DRMACH_IS_ID(id))
7034 		return (drerr_new(0, ESTC_NOTID, NULL));
7035 	cp = id;
7036 
7037 	return (cp->status(id, stat));
7038 }
7039 
7040 /*ARGSUSED*/
7041 sbd_error_t *
7042 drmach_unconfigure(drmachid_t id, int flags)
7043 {
7044 	drmach_device_t	*dp;
7045 	dev_info_t 	*rdip;
7046 
7047 	char	name[OBP_MAXDRVNAME];
7048 	int rv;
7049 
7050 	/*
7051 	 * Since CPU nodes are not configured, it is
7052 	 * necessary to skip the unconfigure step as
7053 	 * well.
7054 	 */
7055 	if (DRMACH_IS_CPU_ID(id)) {
7056 		return (NULL);
7057 	}
7058 
7059 	for (; id; ) {
7060 		dev_info_t	*fdip = NULL;
7061 
7062 		if (!DRMACH_IS_DEVICE_ID(id))
7063 			return (drerr_new(0, ESTC_INAPPROP, NULL));
7064 		dp = id;
7065 
7066 		rdip = dp->node->n_getdip(dp->node);
7067 
7068 		/*
7069 		 * drmach_unconfigure() is always called on a configured branch.
7070 		 * So the root of the branch was held earlier and must exist.
7071 		 */
7072 		ASSERT(rdip);
7073 
7074 		DRMACH_PR("drmach_unconfigure: unconfiguring DDI branch");
7075 
7076 		rv = dp->node->n_getprop(dp->node,
7077 		    "name", name, OBP_MAXDRVNAME);
7078 
7079 		/* The node must have a name */
7080 		if (rv)
7081 			return (0);
7082 
7083 		if (drmach_name2type_idx(name) < 0) {
7084 			if (DRMACH_IS_MEM_ID(id)) {
7085 				drmach_mem_t	*mp = id;
7086 				id = mp->next;
7087 			} else {
7088 				id = NULL;
7089 			}
7090 			continue;
7091 		}
7092 
7093 		/*
7094 		 * NOTE: FORCE flag is no longer needed under devfs
7095 		 */
7096 		ASSERT(e_ddi_branch_held(rdip));
7097 		if (e_ddi_branch_unconfigure(rdip, &fdip, 0) != 0) {
7098 			sbd_error_t *err = NULL;
7099 			char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
7100 
7101 			/*
7102 			 * If non-NULL, fdip is returned held and must be
7103 			 * released.
7104 			 */
7105 			if (fdip != NULL) {
7106 				(void) ddi_pathname(fdip, path);
7107 				ddi_release_devi(fdip);
7108 			} else {
7109 				(void) ddi_pathname(rdip, path);
7110 			}
7111 
7112 			err = drerr_new(1, ESTC_DRVFAIL, path);
7113 
7114 			kmem_free(path, MAXPATHLEN);
7115 
7116 			/*
7117 			 * If we were unconfiguring an IO board, a call was
7118 			 * made to man_dr_detach.  We now need to call
7119 			 * man_dr_attach to regain man use of the eri.
7120 			 */
7121 			if (DRMACH_IS_IO_ID(id)) {
7122 				int (*func)(dev_info_t *dip);
7123 
7124 				func = (int (*)(dev_info_t *))kobj_getsymvalue\
7125 				    ("man_dr_attach", 0);
7126 
7127 				if (func) {
7128 					drmach_io_inst_t ios;
7129 					dev_info_t 	*pdip;
7130 					int		circ;
7131 
7132 					/*
7133 					 * Walk device tree to find rio dip for
7134 					 * the board
7135 					 * Since we are not interested in iosram
7136 					 * instance here, initialize it to 0, so
7137 					 * that the walk terminates as soon as
7138 					 * eri dip is found.
7139 					 */
7140 					ios.iosram_inst = 0;
7141 					ios.eri_dip = NULL;
7142 					ios.bnum = dp->bp->bnum;
7143 
7144 					if (pdip = ddi_get_parent(rdip)) {
7145 						ndi_hold_devi(pdip);
7146 						ndi_devi_enter(pdip, &circ);
7147 					}
7148 					/*
7149 					 * Root node doesn't have to be held in
7150 					 * any way.
7151 					 */
7152 					ASSERT(e_ddi_branch_held(rdip));
7153 					ddi_walk_devs(rdip,
7154 					    drmach_board_find_io_insts,
7155 					    (void *)&ios);
7156 
7157 					DRMACH_PR("drmach_unconfigure: bnum=%d"
7158 					    " eri=0x%p\n",
7159 					    ios.bnum, ios.eri_dip);
7160 
7161 					if (pdip) {
7162 						ndi_devi_exit(pdip, circ);
7163 						ndi_rele_devi(pdip);
7164 					}
7165 
7166 					if (ios.eri_dip) {
7167 						DRMACH_PR("calling"
7168 						    " man_dr_attach\n");
7169 						(void) (*func)(ios.eri_dip);
7170 						/*
7171 						 * Release hold acquired in
7172 						 * drmach_board_find_io_insts()
7173 						 */
7174 						ndi_rele_devi(ios.eri_dip);
7175 					}
7176 				}
7177 			}
7178 			return (err);
7179 		}
7180 
7181 		if (DRMACH_IS_MEM_ID(id)) {
7182 			drmach_mem_t	*mp = id;
7183 			id = mp->next;
7184 		} else {
7185 			id = NULL;
7186 		}
7187 	}
7188 
7189 	return (NULL);
7190 }
7191 
7192 /*
7193  * drmach interfaces to legacy Starfire platmod logic
7194  * linkage via runtime symbol look up, called from plat_cpu_power*
7195  */
7196 
7197 /*
7198  * Start up a cpu.  It is possible that we're attempting to restart
7199  * the cpu after an UNCONFIGURE in which case the cpu will be
7200  * spinning in its cache.  So, all we have to do is wakeup him up.
7201  * Under normal circumstances the cpu will be coming from a previous
7202  * CONNECT and thus will be spinning in OBP.  In both cases, the
7203  * startup sequence is the same.
7204  */
7205 int
7206 drmach_cpu_poweron(struct cpu *cp)
7207 {
7208 	DRMACH_PR("drmach_cpu_poweron: starting cpuid %d\n", cp->cpu_id);
7209 
7210 	ASSERT(MUTEX_HELD(&cpu_lock));
7211 
7212 	if (drmach_cpu_start(cp) != 0)
7213 		return (EBUSY);
7214 	else
7215 		return (0);
7216 }
7217 
7218 int
7219 drmach_cpu_poweroff(struct cpu *cp)
7220 {
7221 	int		ntries;
7222 	processorid_t	cpuid;
7223 	void		drmach_cpu_shutdown_self(void);
7224 
7225 	DRMACH_PR("drmach_cpu_poweroff: stopping cpuid %d\n", cp->cpu_id);
7226 
7227 	ASSERT(MUTEX_HELD(&cpu_lock));
7228 
7229 	/*
7230 	 * XXX CHEETAH SUPPORT
7231 	 * for cheetah, we need to grab the iocage lock since iocage
7232 	 * memory is used for e$ flush.
7233 	 */
7234 	if (drmach_is_cheetah) {
7235 		mutex_enter(&drmach_iocage_lock);
7236 		while (drmach_iocage_is_busy)
7237 			cv_wait(&drmach_iocage_cv, &drmach_iocage_lock);
7238 		drmach_iocage_is_busy = 1;
7239 		drmach_iocage_mem_scrub(ecache_size * 2);
7240 		mutex_exit(&drmach_iocage_lock);
7241 	}
7242 
7243 	cpuid = cp->cpu_id;
7244 
7245 	/*
7246 	 * Set affinity to ensure consistent reading and writing of
7247 	 * drmach_xt_mb[cpuid] by one "master" CPU directing
7248 	 * the shutdown of the target CPU.
7249 	 */
7250 	affinity_set(CPU->cpu_id);
7251 
7252 	/*
7253 	 * Capture all CPUs (except for detaching proc) to prevent
7254 	 * crosscalls to the detaching proc until it has cleared its
7255 	 * bit in cpu_ready_set.
7256 	 *
7257 	 * The CPUs remain paused and the prom_mutex is known to be free.
7258 	 * This prevents blocking when doing prom IEEE-1275 calls at a
7259 	 * high PIL level.
7260 	 */
7261 	promsafe_pause_cpus();
7262 
7263 	/*
7264 	 * Quiesce interrupts on the target CPU. We do this by setting
7265 	 * the CPU 'not ready'- (i.e. removing the CPU from cpu_ready_set) to
7266 	 * prevent it from receiving cross calls and cross traps.
7267 	 * This prevents the processor from receiving any new soft interrupts.
7268 	 */
7269 	mp_cpu_quiesce(cp);
7270 
7271 	prom_hotremovecpu(cpuid);
7272 
7273 	start_cpus();
7274 
7275 	/* setup xt_mb, will be cleared by drmach_shutdown_asm when ready */
7276 	drmach_xt_mb[cpuid] = 0x80;
7277 
7278 	xt_one_unchecked(cp->cpu_id, (xcfunc_t *)idle_stop_xcall,
7279 	    (uint64_t)drmach_cpu_shutdown_self, NULL);
7280 
7281 	ntries = drmach_cpu_ntries;
7282 	while (drmach_xt_mb[cpuid] && ntries) {
7283 		DELAY(drmach_cpu_delay);
7284 		ntries--;
7285 	}
7286 
7287 	drmach_xt_mb[cpuid] = 0;	/* steal the cache line back */
7288 
7289 	membar_sync();			/* make sure copy-back retires */
7290 
7291 	affinity_clear();
7292 
7293 	/*
7294 	 * XXX CHEETAH SUPPORT
7295 	 */
7296 	if (drmach_is_cheetah) {
7297 		mutex_enter(&drmach_iocage_lock);
7298 		drmach_iocage_mem_scrub(ecache_size * 2);
7299 		drmach_iocage_is_busy = 0;
7300 		cv_signal(&drmach_iocage_cv);
7301 		mutex_exit(&drmach_iocage_lock);
7302 	}
7303 
7304 	DRMACH_PR("waited %d out of %d tries for "
7305 	    "drmach_cpu_shutdown_self on cpu%d",
7306 	    drmach_cpu_ntries - ntries, drmach_cpu_ntries, cp->cpu_id);
7307 
7308 	/*
7309 	 * Do this here instead of drmach_cpu_shutdown_self() to
7310 	 * avoid an assertion failure panic in turnstile.c.
7311 	 */
7312 	CPU_SIGNATURE(OS_SIG, SIGST_DETACHED, SIGSUBST_NULL, cpuid);
7313 
7314 	return (0);
7315 }
7316 
7317 void
7318 drmach_iocage_mem_scrub(uint64_t nbytes)
7319 {
7320 	extern int drmach_bc_bzero(void*, size_t);
7321 	int	rv;
7322 
7323 	ASSERT(MUTEX_HELD(&cpu_lock));
7324 
7325 	affinity_set(CPU->cpu_id);
7326 
7327 	rv = drmach_bc_bzero(drmach_iocage_vaddr, nbytes);
7328 	if (rv != 0) {
7329 		DRMACH_PR(
7330 		"iocage scrub failed, drmach_bc_bzero returned %d\n", rv);
7331 		rv = drmach_bc_bzero(drmach_iocage_vaddr, drmach_iocage_size);
7332 		if (rv != 0)
7333 			cmn_err(CE_PANIC,
7334 			    "iocage scrub failed, drmach_bc_bzero rv=%d\n",
7335 			    rv);
7336 	}
7337 
7338 	cpu_flush_ecache();
7339 
7340 	affinity_clear();
7341 }
7342 
7343 #define	ALIGN(x, a)	((a) == 0 ? (uintptr_t)(x) : \
7344 	(((uintptr_t)(x) + (uintptr_t)(a) - 1l) & ~((uintptr_t)(a) - 1l)))
7345 
7346 static sbd_error_t *
7347 drmach_iocage_mem_get(dr_testboard_req_t *tbrq)
7348 {
7349 	pfn_t		basepfn;
7350 	pgcnt_t		npages;
7351 	extern int	memscrub_delete_span(pfn_t, pgcnt_t);
7352 	uint64_t	drmach_iocage_paddr_mbytes;
7353 
7354 	ASSERT(drmach_iocage_paddr != -1);
7355 
7356 	basepfn = (pfn_t)(drmach_iocage_paddr >> PAGESHIFT);
7357 	npages = (pgcnt_t)(drmach_iocage_size >> PAGESHIFT);
7358 
7359 	memscrub_delete_span(basepfn, npages);
7360 
7361 	mutex_enter(&cpu_lock);
7362 	drmach_iocage_mem_scrub(drmach_iocage_size);
7363 	mutex_exit(&cpu_lock);
7364 
7365 	/*
7366 	 * HPOST wants the address of the cage to be 64 megabyte-aligned
7367 	 * and in megabyte units.
7368 	 * The size of the cage is also in megabyte units.
7369 	 */
7370 	ASSERT(drmach_iocage_paddr == ALIGN(drmach_iocage_paddr, 0x4000000));
7371 
7372 	drmach_iocage_paddr_mbytes = drmach_iocage_paddr / 0x100000;
7373 
7374 	tbrq->memaddrhi = (uint32_t)(drmach_iocage_paddr_mbytes >> 32);
7375 	tbrq->memaddrlo = (uint32_t)drmach_iocage_paddr_mbytes;
7376 	tbrq->memlen = drmach_iocage_size / 0x100000;
7377 
7378 	DRMACH_PR("drmach_iocage_mem_get: hi: 0x%x", tbrq->memaddrhi);
7379 	DRMACH_PR("drmach_iocage_mem_get: lo: 0x%x", tbrq->memaddrlo);
7380 	DRMACH_PR("drmach_iocage_mem_get: size: 0x%x", tbrq->memlen);
7381 
7382 	return (NULL);
7383 }
7384 
7385 static sbd_error_t *
7386 drmach_iocage_mem_return(dr_testboard_reply_t *tbr)
7387 {
7388 	_NOTE(ARGUNUSED(tbr))
7389 
7390 	pfn_t		basepfn;
7391 	pgcnt_t		npages;
7392 	extern int	memscrub_add_span(pfn_t, pgcnt_t);
7393 
7394 	ASSERT(drmach_iocage_paddr != -1);
7395 
7396 	basepfn = (pfn_t)(drmach_iocage_paddr >> PAGESHIFT);
7397 	npages = (pgcnt_t)(drmach_iocage_size >> PAGESHIFT);
7398 
7399 	memscrub_add_span(basepfn, npages);
7400 
7401 	mutex_enter(&cpu_lock);
7402 	mutex_enter(&drmach_iocage_lock);
7403 	drmach_iocage_mem_scrub(drmach_iocage_size);
7404 	drmach_iocage_is_busy = 0;
7405 	cv_signal(&drmach_iocage_cv);
7406 	mutex_exit(&drmach_iocage_lock);
7407 	mutex_exit(&cpu_lock);
7408 
7409 	return (NULL);
7410 }
7411 
7412 static int
7413 drmach_cpu_intr_disable(cpu_t *cp)
7414 {
7415 	if (cpu_intr_disable(cp) != 0)
7416 		return (-1);
7417 	return (0);
7418 }
7419 
7420 static int
7421 drmach_iocage_cpu_acquire(drmach_device_t *dp, cpu_flag_t *oflags)
7422 {
7423 	struct cpu	*cp;
7424 	processorid_t	cpuid;
7425 	static char	*fn = "drmach_iocage_cpu_acquire";
7426 	sbd_error_t 	*err;
7427 	int 		impl;
7428 
7429 	ASSERT(DRMACH_IS_CPU_ID(dp));
7430 	ASSERT(MUTEX_HELD(&cpu_lock));
7431 
7432 	cpuid = ((drmach_cpu_t *)dp)->cpuid;
7433 
7434 	DRMACH_PR("%s: attempting to acquire CPU id %d", fn, cpuid);
7435 
7436 	if (dp->busy)
7437 		return (-1);
7438 
7439 	if ((cp = cpu_get(cpuid)) == NULL) {
7440 		DRMACH_PR("%s: cpu_get(%d) returned NULL", fn, cpuid);
7441 		return (-1);
7442 	}
7443 
7444 	if (!CPU_ACTIVE(cp)) {
7445 		DRMACH_PR("%s: skipping offlined CPU id %d", fn, cpuid);
7446 		return (-1);
7447 	}
7448 
7449 	/*
7450 	 * There is a known HW bug where a Jaguar CPU in Safari port 0 (SBX/P0)
7451 	 * can fail to receive an XIR. To workaround this issue until a hardware
7452 	 * fix is implemented, we will exclude the selection of these CPUs.
7453 	 *
7454 	 * Once a fix is implemented in hardware, this code should be updated
7455 	 * to allow Jaguar CPUs that have the fix to be used. However, support
7456 	 * must be retained to skip revisions that do not have this fix.
7457 	 */
7458 
7459 	err = drmach_cpu_get_impl(dp, &impl);
7460 	if (err) {
7461 		DRMACH_PR("%s: error getting impl. of CPU id %d", fn, cpuid);
7462 		sbd_err_clear(&err);
7463 		return (-1);
7464 	}
7465 
7466 	if (IS_JAGUAR(impl) && (STARCAT_CPUID_TO_LPORT(cpuid) == 0) &&
7467 	    drmach_iocage_exclude_jaguar_port_zero) {
7468 		DRMACH_PR("%s: excluding CPU id %d: port 0 on jaguar",
7469 		    fn, cpuid);
7470 		return (-1);
7471 	}
7472 
7473 	ASSERT(oflags);
7474 	*oflags = cp->cpu_flags;
7475 
7476 	if (cpu_offline(cp, 0)) {
7477 		DRMACH_PR("%s: cpu_offline failed for CPU id %d", fn, cpuid);
7478 		return (-1);
7479 	}
7480 
7481 	if (cpu_poweroff(cp)) {
7482 		DRMACH_PR("%s: cpu_poweroff failed for CPU id %d", fn, cpuid);
7483 		if (cpu_online(cp)) {
7484 			cmn_err(CE_WARN, "failed to online CPU id %d "
7485 			    "during I/O cage test selection", cpuid);
7486 		}
7487 		if (CPU_ACTIVE(cp) && cpu_flagged_nointr(*oflags) &&
7488 		    drmach_cpu_intr_disable(cp) != 0) {
7489 			cmn_err(CE_WARN, "failed to restore CPU id %d "
7490 			    "no-intr during I/O cage test selection", cpuid);
7491 		}
7492 		return (-1);
7493 	}
7494 
7495 	if (cpu_unconfigure(cpuid)) {
7496 		DRMACH_PR("%s: cpu_unconfigure failed for CPU id %d", fn,
7497 		    cpuid);
7498 		(void) cpu_configure(cpuid);
7499 		if ((cp = cpu_get(cpuid)) == NULL) {
7500 			cmn_err(CE_WARN, "failed to reconfigure CPU id %d "
7501 			    "during I/O cage test selection", cpuid);
7502 			dp->busy = 1;
7503 			return (-1);
7504 		}
7505 		if (cpu_poweron(cp) || cpu_online(cp)) {
7506 			cmn_err(CE_WARN, "failed to %s CPU id %d "
7507 			    "during I/O cage test selection",
7508 			    cpu_is_poweredoff(cp) ?
7509 			    "poweron" : "online", cpuid);
7510 		}
7511 		if (CPU_ACTIVE(cp) && cpu_flagged_nointr(*oflags) &&
7512 		    drmach_cpu_intr_disable(cp) != 0) {
7513 			cmn_err(CE_WARN, "failed to restore CPU id %d "
7514 			    "no-intr during I/O cage test selection", cpuid);
7515 		}
7516 		return (-1);
7517 	}
7518 
7519 	dp->busy = 1;
7520 
7521 	DRMACH_PR("%s: acquired CPU id %d", fn, cpuid);
7522 
7523 	return (0);
7524 }
7525 
7526 /*
7527  * Attempt to acquire all the CPU devices passed in. It is
7528  * assumed that all the devices in the list are the cores of
7529  * a single CMP device. Non CMP devices can be handled as a
7530  * single core CMP by passing in a one element list.
7531  *
7532  * Success is only returned if *all* the devices in the list
7533  * can be acquired. In the failure case, none of the devices
7534  * in the list will be held as acquired.
7535  */
7536 static int
7537 drmach_iocage_cmp_acquire(drmach_device_t **dpp, cpu_flag_t *oflags)
7538 {
7539 	int	curr;
7540 	int	i;
7541 	int	rv = 0;
7542 
7543 	ASSERT((dpp != NULL) && (*dpp != NULL));
7544 
7545 	/*
7546 	 * Walk the list of CPU devices (cores of a CMP)
7547 	 * and attempt to acquire them. Bail out if an
7548 	 * error is encountered.
7549 	 */
7550 	for (curr = 0; curr < MAX_CORES_PER_CMP; curr++) {
7551 
7552 		/* check for the end of the list */
7553 		if (dpp[curr] == NULL) {
7554 			break;
7555 		}
7556 
7557 		ASSERT(DRMACH_IS_CPU_ID(dpp[curr]));
7558 		ASSERT(dpp[curr]->portid == (*dpp)->portid);
7559 
7560 		rv = drmach_iocage_cpu_acquire(dpp[curr], &oflags[curr]);
7561 		if (rv != 0) {
7562 			break;
7563 		}
7564 	}
7565 
7566 	/*
7567 	 * Check for an error.
7568 	 */
7569 	if (rv != 0) {
7570 		/*
7571 		 * Make a best effort attempt to return any cores
7572 		 * that were already acquired before the error was
7573 		 * encountered.
7574 		 */
7575 		for (i = 0; i < curr; i++) {
7576 			(void) drmach_iocage_cpu_return(dpp[i], oflags[i]);
7577 		}
7578 	}
7579 
7580 	return (rv);
7581 }
7582 
7583 static int
7584 drmach_iocage_cpu_return(drmach_device_t *dp, cpu_flag_t oflags)
7585 {
7586 	processorid_t	cpuid;
7587 	struct cpu	*cp;
7588 	int		rv = 0;
7589 	static char	*fn = "drmach_iocage_cpu_return";
7590 
7591 	ASSERT(DRMACH_IS_CPU_ID(dp));
7592 	ASSERT(MUTEX_HELD(&cpu_lock));
7593 
7594 	cpuid = ((drmach_cpu_t *)dp)->cpuid;
7595 
7596 	DRMACH_PR("%s: attempting to return CPU id: %d", fn, cpuid);
7597 
7598 	if (cpu_configure(cpuid)) {
7599 		cmn_err(CE_WARN, "failed to reconfigure CPU id %d "
7600 		    "after I/O cage test", cpuid);
7601 		/*
7602 		 * The component was never set to unconfigured during the IO
7603 		 * cage test, so we need to leave marked as busy to prevent
7604 		 * further DR operations involving this component.
7605 		 */
7606 		return (-1);
7607 	}
7608 
7609 	if ((cp = cpu_get(cpuid)) == NULL) {
7610 		cmn_err(CE_WARN, "cpu_get failed on CPU id %d after "
7611 		    "I/O cage test", cpuid);
7612 		dp->busy = 0;
7613 		return (-1);
7614 	}
7615 
7616 	if (cpu_poweron(cp) || cpu_online(cp)) {
7617 		cmn_err(CE_WARN, "failed to %s CPU id %d after I/O "
7618 		    "cage test", cpu_is_poweredoff(cp) ?
7619 		    "poweron" : "online", cpuid);
7620 		rv = -1;
7621 	}
7622 
7623 	/*
7624 	 * drmach_iocage_cpu_acquire will accept cpus in state P_ONLINE or
7625 	 * P_NOINTR. Need to return to previous user-visible state.
7626 	 */
7627 	if (CPU_ACTIVE(cp) && cpu_flagged_nointr(oflags) &&
7628 	    drmach_cpu_intr_disable(cp) != 0) {
7629 		cmn_err(CE_WARN, "failed to restore CPU id %d "
7630 		    "no-intr after I/O cage test", cpuid);
7631 		rv = -1;
7632 	}
7633 
7634 	dp->busy = 0;
7635 
7636 	DRMACH_PR("%s: returned CPU id: %d", fn, cpuid);
7637 
7638 	return (rv);
7639 }
7640 
7641 static sbd_error_t *
7642 drmach_iocage_cpu_get(dr_testboard_req_t *tbrq, drmach_device_t **dpp,
7643     cpu_flag_t *oflags)
7644 {
7645 	drmach_board_t	*bp;
7646 	int		b_rv;
7647 	int		b_idx;
7648 	drmachid_t	b_id;
7649 	int		found;
7650 
7651 	mutex_enter(&cpu_lock);
7652 
7653 	ASSERT(drmach_boards != NULL);
7654 
7655 	found = 0;
7656 
7657 	/*
7658 	 * Walk the board list.
7659 	 */
7660 	b_rv = drmach_array_first(drmach_boards, &b_idx, &b_id);
7661 
7662 	while (b_rv == 0) {
7663 
7664 		int		d_rv;
7665 		int		d_idx;
7666 		drmachid_t	d_id;
7667 
7668 		bp = b_id;
7669 
7670 		if (bp->connected == 0 || bp->devices == NULL) {
7671 			b_rv = drmach_array_next(drmach_boards, &b_idx, &b_id);
7672 			continue;
7673 		}
7674 
7675 		/* An AXQ restriction disqualifies MCPU's as candidates. */
7676 		if (DRMACH_BNUM2SLOT(bp->bnum) == 1) {
7677 			b_rv = drmach_array_next(drmach_boards, &b_idx, &b_id);
7678 			continue;
7679 		}
7680 
7681 		/*
7682 		 * Walk the device list of this board.
7683 		 */
7684 		d_rv = drmach_array_first(bp->devices, &d_idx, &d_id);
7685 
7686 		while (d_rv == 0) {
7687 
7688 			drmach_device_t	*ndp;
7689 
7690 			/* only interested in CPU devices */
7691 			if (!DRMACH_IS_CPU_ID(d_id)) {
7692 				d_rv = drmach_array_next(bp->devices, &d_idx,
7693 				    &d_id);
7694 				continue;
7695 			}
7696 
7697 			/*
7698 			 * The following code assumes two properties
7699 			 * of a CMP device:
7700 			 *
7701 			 *   1. All cores of a CMP are grouped together
7702 			 *	in the device list.
7703 			 *
7704 			 *   2. There will only be a maximum of two cores
7705 			 *	present in the CMP.
7706 			 *
7707 			 * If either of these two properties change,
7708 			 * this code will have to be revisited.
7709 			 */
7710 
7711 			dpp[0] = d_id;
7712 			dpp[1] = NULL;
7713 
7714 			/*
7715 			 * Get the next device. It may or may not be used.
7716 			 */
7717 			d_rv = drmach_array_next(bp->devices, &d_idx, &d_id);
7718 			ndp = d_id;
7719 
7720 			if ((d_rv == 0) && DRMACH_IS_CPU_ID(d_id)) {
7721 				/*
7722 				 * The second device is only interesting for
7723 				 * this pass if it has the same portid as the
7724 				 * first device. This implies that both are
7725 				 * cores of the same CMP.
7726 				 */
7727 				if (dpp[0]->portid == ndp->portid) {
7728 					dpp[1] = d_id;
7729 				}
7730 			}
7731 
7732 			/*
7733 			 * Attempt to acquire all cores of the CMP.
7734 			 */
7735 			if (drmach_iocage_cmp_acquire(dpp, oflags) == 0) {
7736 				found = 1;
7737 				break;
7738 			}
7739 
7740 			/*
7741 			 * Check if the search for the second core was
7742 			 * successful. If not, the next iteration should
7743 			 * use that device.
7744 			 */
7745 			if (dpp[1] == NULL) {
7746 				continue;
7747 			}
7748 
7749 			d_rv = drmach_array_next(bp->devices, &d_idx, &d_id);
7750 		}
7751 
7752 		if (found)
7753 			break;
7754 
7755 		b_rv = drmach_array_next(drmach_boards, &b_idx, &b_id);
7756 	}
7757 
7758 	mutex_exit(&cpu_lock);
7759 
7760 	if (!found) {
7761 		return (drerr_new(1, ESTC_IOCAGE_NO_CPU_AVAIL, NULL));
7762 	}
7763 
7764 	tbrq->cpu_portid = (*dpp)->portid;
7765 
7766 	return (NULL);
7767 }
7768 
7769 /*
7770  * Setup an iocage by acquiring a cpu and memory.
7771  */
7772 static sbd_error_t *
7773 drmach_iocage_setup(dr_testboard_req_t *tbrq, drmach_device_t **dpp,
7774     cpu_flag_t *oflags)
7775 {
7776 	sbd_error_t *err;
7777 
7778 	err = drmach_iocage_cpu_get(tbrq, dpp, oflags);
7779 	if (!err) {
7780 		mutex_enter(&drmach_iocage_lock);
7781 		while (drmach_iocage_is_busy)
7782 			cv_wait(&drmach_iocage_cv, &drmach_iocage_lock);
7783 		drmach_iocage_is_busy = 1;
7784 		mutex_exit(&drmach_iocage_lock);
7785 		err = drmach_iocage_mem_get(tbrq);
7786 		if (err) {
7787 			mutex_enter(&drmach_iocage_lock);
7788 			drmach_iocage_is_busy = 0;
7789 			cv_signal(&drmach_iocage_cv);
7790 			mutex_exit(&drmach_iocage_lock);
7791 		}
7792 	}
7793 	return (err);
7794 }
7795 
7796 #define	DRMACH_SCHIZO_PCI_LEAF_MAX	2
7797 #define	DRMACH_SCHIZO_PCI_SLOT_MAX	8
7798 #define	DRMACH_S1P_SAMPLE_MAX		2
7799 
7800 typedef enum {
7801 	DRMACH_POST_SUSPEND = 0,
7802 	DRMACH_PRE_RESUME
7803 } drmach_sr_iter_t;
7804 
7805 typedef struct {
7806 	dev_info_t	*dip;
7807 	uint32_t	portid;
7808 	uint32_t	pcr_sel_save;
7809 	uint32_t	pic_l2_io_q[DRMACH_S1P_SAMPLE_MAX];
7810 	uint64_t	reg_basepa;
7811 } drmach_s1p_axq_t;
7812 
7813 typedef struct {
7814 	dev_info_t		*dip;
7815 	uint32_t		portid;
7816 	uint64_t		csr_basepa;
7817 	struct {
7818 		uint64_t 	slot_intr_state_diag;
7819 		uint64_t 	obio_intr_state_diag;
7820 		uint_t		nmap_regs;
7821 		uint64_t	*intr_map_regs;
7822 	} regs[DRMACH_S1P_SAMPLE_MAX];
7823 } drmach_s1p_pci_t;
7824 
7825 typedef struct {
7826 	uint64_t		csr_basepa;
7827 	struct {
7828 		uint64_t	csr;
7829 		uint64_t	errctrl;
7830 		uint64_t	errlog;
7831 	} regs[DRMACH_S1P_SAMPLE_MAX];
7832 	drmach_s1p_pci_t	pci[DRMACH_SCHIZO_PCI_LEAF_MAX];
7833 } drmach_s1p_schizo_t;
7834 
7835 typedef struct {
7836 	drmach_s1p_axq_t	axq;
7837 	drmach_s1p_schizo_t	schizo[STARCAT_SLOT1_IO_MAX];
7838 } drmach_slot1_pause_t;
7839 
7840 /*
7841  * Table of saved state for paused slot1 devices.
7842  */
7843 static drmach_slot1_pause_t *drmach_slot1_paused[STARCAT_BDSET_MAX];
7844 static int drmach_slot1_pause_init = 1;
7845 
7846 #ifdef DEBUG
7847 int drmach_slot1_pause_debug = 1;
7848 #else
7849 int drmach_slot1_pause_debug = 0;
7850 #endif /* DEBUG */
7851 
7852 static int
7853 drmach_is_slot1_pause_axq(dev_info_t *dip, char *name, int *id, uint64_t *reg)
7854 {
7855 	int		portid, exp, slot, i;
7856 	drmach_reg_t	regs[2];
7857 	int		reglen = sizeof (regs);
7858 
7859 	if ((portid = ddi_getprop(DDI_DEV_T_ANY, dip,
7860 	    DDI_PROP_DONTPASS, "portid", -1)) == -1) {
7861 		return (0);
7862 	}
7863 
7864 	exp = (portid >> 5) & 0x1f;
7865 	slot = portid & 0x1;
7866 
7867 	if (slot == 0 || strncmp(name, DRMACH_AXQ_NAMEPROP,
7868 	    strlen(DRMACH_AXQ_NAMEPROP))) {
7869 		return (0);
7870 	}
7871 
7872 	mutex_enter(&cpu_lock);
7873 	for (i = 0; i < STARCAT_SLOT1_CPU_MAX; i++) {
7874 		if (cpu[MAKE_CPUID(exp, slot, i)]) {
7875 			/* maxcat cpu present */
7876 			mutex_exit(&cpu_lock);
7877 			return (0);
7878 		}
7879 	}
7880 	mutex_exit(&cpu_lock);
7881 
7882 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
7883 	    "reg", (caddr_t)regs, &reglen) != DDI_PROP_SUCCESS) {
7884 		DRMACH_PR("drmach_is_slot1_pause_axq: no reg prop for "
7885 		    "axq dip=%p\n", dip);
7886 		return (0);
7887 	}
7888 
7889 	ASSERT(id && reg);
7890 	*reg = (uint64_t)regs[0].reg_addr_hi << 32;
7891 	*reg |= (uint64_t)regs[0].reg_addr_lo;
7892 	*id = portid;
7893 
7894 	return (1);
7895 }
7896 
7897 /*
7898  * Allocate an entry in the slot1_paused state table.
7899  */
7900 static void
7901 drmach_slot1_pause_add_axq(dev_info_t *axq_dip, char *axq_name, int axq_portid,
7902     uint64_t reg, drmach_slot1_pause_t **slot1_paused)
7903 {
7904 	int	axq_exp;
7905 	drmach_slot1_pause_t *slot1;
7906 
7907 	axq_exp = (axq_portid >> 5) & 0x1f;
7908 
7909 	ASSERT(axq_portid & 0x1);
7910 	ASSERT(slot1_paused[axq_exp] == NULL);
7911 	ASSERT(strncmp(axq_name, DRMACH_AXQ_NAMEPROP,
7912 	    strlen(DRMACH_AXQ_NAMEPROP)) == 0);
7913 
7914 	slot1 = kmem_zalloc(sizeof (*slot1), KM_SLEEP);
7915 
7916 	/*
7917 	 * XXX This dip should really be held (via ndi_hold_devi())
7918 	 * before saving it in the axq pause structure. However that
7919 	 * would prevent DR as the pause data structures persist until
7920 	 * the next suspend. drmach code should be modified to free the
7921 	 * the slot 1 pause data structures for a boardset when its
7922 	 * slot 1 board is DRed out. The dip can then be released via
7923 	 * ndi_rele_devi() when the pause data structure is freed
7924 	 * allowing DR to proceed. Until this change is made, drmach
7925 	 * code should be careful about dereferencing the saved dip
7926 	 * as it may no longer exist.
7927 	 */
7928 	slot1->axq.dip = axq_dip;
7929 	slot1->axq.portid = axq_portid;
7930 	slot1->axq.reg_basepa = reg;
7931 	slot1_paused[axq_exp] = slot1;
7932 }
7933 
7934 static void
7935 drmach_s1p_pci_free(drmach_s1p_pci_t *pci)
7936 {
7937 	int	i;
7938 
7939 	for (i = 0; i < DRMACH_S1P_SAMPLE_MAX; i++) {
7940 		if (pci->regs[i].intr_map_regs != NULL) {
7941 			ASSERT(pci->regs[i].nmap_regs > 0);
7942 			kmem_free(pci->regs[i].intr_map_regs,
7943 			    pci->regs[i].nmap_regs * sizeof (uint64_t));
7944 		}
7945 	}
7946 }
7947 
7948 static void
7949 drmach_slot1_pause_free(drmach_slot1_pause_t **slot1_paused)
7950 {
7951 	int	i, j, k;
7952 	drmach_slot1_pause_t *slot1;
7953 
7954 	for (i = 0; i < STARCAT_BDSET_MAX; i++) {
7955 		if ((slot1 = slot1_paused[i]) == NULL)
7956 			continue;
7957 
7958 		for (j = 0; j < STARCAT_SLOT1_IO_MAX; j++)
7959 			for (k = 0; k < DRMACH_SCHIZO_PCI_LEAF_MAX; k++)
7960 				drmach_s1p_pci_free(&slot1->schizo[j].pci[k]);
7961 
7962 		kmem_free(slot1, sizeof (*slot1));
7963 		slot1_paused[i] = NULL;
7964 	}
7965 }
7966 
7967 /*
7968  * Tree walk callback routine. If dip represents a Schizo PCI leaf,
7969  * fill in the appropriate info in the slot1_paused state table.
7970  */
7971 static int
7972 drmach_find_slot1_io(dev_info_t *dip, void *arg)
7973 {
7974 	int		portid, exp, ioc_unum, leaf_unum;
7975 	char		buf[OBP_MAXDRVNAME];
7976 	int		buflen = sizeof (buf);
7977 	drmach_reg_t	regs[3];
7978 	int		reglen = sizeof (regs);
7979 	uint32_t	leaf_offset;
7980 	uint64_t	schizo_csr_pa, pci_csr_pa;
7981 	drmach_s1p_pci_t *pci;
7982 	drmach_slot1_pause_t **slot1_paused = (drmach_slot1_pause_t **)arg;
7983 
7984 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
7985 	    "name", (caddr_t)buf, &buflen) != DDI_PROP_SUCCESS ||
7986 	    strncmp(buf, DRMACH_PCI_NAMEPROP, strlen(DRMACH_PCI_NAMEPROP))) {
7987 		return (DDI_WALK_CONTINUE);
7988 	}
7989 
7990 	if ((portid = ddi_getprop(DDI_DEV_T_ANY, dip,
7991 	    DDI_PROP_DONTPASS, "portid", -1)) == -1) {
7992 		return (DDI_WALK_CONTINUE);
7993 	}
7994 
7995 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
7996 	    "reg", (caddr_t)regs, &reglen) != DDI_PROP_SUCCESS) {
7997 		DRMACH_PR("drmach_find_slot1_io: no reg prop for pci "
7998 		    "dip=%p\n", dip);
7999 		return (DDI_WALK_CONTINUE);
8000 	}
8001 
8002 	exp = portid >> 5;
8003 	ioc_unum = portid & 0x1;
8004 	leaf_offset = regs[0].reg_addr_lo & 0x7fffff;
8005 	pci_csr_pa = (uint64_t)regs[0].reg_addr_hi << 32;
8006 	pci_csr_pa |= (uint64_t)regs[0].reg_addr_lo;
8007 	schizo_csr_pa = (uint64_t)regs[1].reg_addr_hi << 32;
8008 	schizo_csr_pa |= (uint64_t)regs[1].reg_addr_lo;
8009 
8010 	ASSERT(exp >= 0 && exp < STARCAT_BDSET_MAX);
8011 	ASSERT(slot1_paused[exp] != NULL);
8012 	ASSERT(leaf_offset == 0x600000 || leaf_offset == 0x700000);
8013 	ASSERT(slot1_paused[exp]->schizo[ioc_unum].csr_basepa == 0x0UL ||
8014 	    slot1_paused[exp]->schizo[ioc_unum].csr_basepa == schizo_csr_pa);
8015 
8016 	leaf_unum = (leaf_offset == 0x600000) ? 0 : 1;
8017 	slot1_paused[exp]->schizo[ioc_unum].csr_basepa = schizo_csr_pa;
8018 	pci = &slot1_paused[exp]->schizo[ioc_unum].pci[leaf_unum];
8019 
8020 	/*
8021 	 * XXX This dip should really be held (via ndi_hold_devi())
8022 	 * before saving it in the pci pause structure. However that
8023 	 * would prevent DR as the pause data structures persist until
8024 	 * the next suspend. drmach code should be modified to free the
8025 	 * the slot 1 pause data structures for a boardset when its
8026 	 * slot 1 board is DRed out. The dip can then be released via
8027 	 * ndi_rele_devi() when the pause data structure is freed
8028 	 * allowing DR to proceed. Until this change is made, drmach
8029 	 * code should be careful about dereferencing the saved dip as
8030 	 * it may no longer exist.
8031 	 */
8032 	pci->dip = dip;
8033 	pci->portid = portid;
8034 	pci->csr_basepa = pci_csr_pa;
8035 
8036 	DRMACH_PR("drmach_find_slot1_io: name=%s, portid=0x%x, dip=%p\n",
8037 	    buf, portid, dip);
8038 
8039 	return (DDI_WALK_PRUNECHILD);
8040 }
8041 
8042 static void
8043 drmach_slot1_pause_add_io(drmach_slot1_pause_t **slot1_paused)
8044 {
8045 	/*
8046 	 * Root node doesn't have to be held
8047 	 */
8048 	ddi_walk_devs(ddi_root_node(), drmach_find_slot1_io,
8049 	    (void *)slot1_paused);
8050 }
8051 
8052 /*
8053  * Save the interrupt mapping registers for each non-idle interrupt
8054  * represented by the bit pairs in the saved interrupt state
8055  * diagnostic registers for this PCI leaf.
8056  */
8057 static void
8058 drmach_s1p_intr_map_reg_save(drmach_s1p_pci_t *pci, drmach_sr_iter_t iter)
8059 {
8060 	int	 i, cnt, ino;
8061 	uint64_t reg;
8062 	char	 *dname;
8063 	uchar_t	 Xmits;
8064 
8065 	dname = ddi_binding_name(pci->dip);
8066 	Xmits = (strcmp(dname, XMITS_BINDING_NAME) == 0)  ?  1 : 0;
8067 
8068 	/*
8069 	 * 1st pass allocates, 2nd pass populates.
8070 	 */
8071 	for (i = 0; i < 2; i++) {
8072 		cnt = ino = 0;
8073 
8074 		/*
8075 		 * PCI slot interrupts
8076 		 */
8077 		reg = pci->regs[iter].slot_intr_state_diag;
8078 		while (reg) {
8079 			/*
8080 			 * Xmits Interrupt Number Offset(ino) Assignments
8081 			 *   00-17 PCI Slot Interrupts
8082 			 *   18-1f Not Used
8083 			 */
8084 			if ((Xmits) && (ino > 0x17))
8085 				break;
8086 			if ((reg & COMMON_CLEAR_INTR_REG_MASK) !=
8087 			    COMMON_CLEAR_INTR_REG_IDLE) {
8088 				if (i) {
8089 					pci->regs[iter].intr_map_regs[cnt] =
8090 					    lddphysio(pci->csr_basepa +
8091 					    SCHIZO_IB_INTR_MAP_REG_OFFSET +
8092 					    ino * sizeof (reg));
8093 				}
8094 				++cnt;
8095 			}
8096 			++ino;
8097 			reg >>= 2;
8098 		}
8099 
8100 		/*
8101 		 * Xmits Interrupt Number Offset(ino) Assignments
8102 		 *   20-2f Not Used
8103 		 *   30-37 Internal interrupts
8104 		 *   38-3e Not Used
8105 		 */
8106 		ino = (Xmits)  ?  0x30 : 0x20;
8107 
8108 		/*
8109 		 * OBIO and internal schizo interrupts
8110 		 * Each PCI leaf has a set of mapping registers for all
8111 		 * possible interrupt sources except the NewLink interrupts.
8112 		 */
8113 		reg = pci->regs[iter].obio_intr_state_diag;
8114 		while (reg && ino <= 0x38) {
8115 			if ((reg & COMMON_CLEAR_INTR_REG_MASK) !=
8116 			    COMMON_CLEAR_INTR_REG_IDLE) {
8117 				if (i) {
8118 					pci->regs[iter].intr_map_regs[cnt] =
8119 					    lddphysio(pci->csr_basepa +
8120 					    SCHIZO_IB_INTR_MAP_REG_OFFSET +
8121 					    ino * sizeof (reg));
8122 				}
8123 				++cnt;
8124 			}
8125 			++ino;
8126 			reg >>= 2;
8127 		}
8128 
8129 		if (!i) {
8130 			pci->regs[iter].nmap_regs = cnt;
8131 			pci->regs[iter].intr_map_regs =
8132 			    kmem_zalloc(cnt * sizeof (reg), KM_SLEEP);
8133 		}
8134 	}
8135 }
8136 
8137 static void
8138 drmach_s1p_axq_update(drmach_s1p_axq_t *axq, drmach_sr_iter_t iter)
8139 {
8140 	uint32_t	reg;
8141 
8142 	if (axq->reg_basepa == 0x0UL)
8143 		return;
8144 
8145 	if (iter == DRMACH_POST_SUSPEND) {
8146 		axq->pcr_sel_save = ldphysio(axq->reg_basepa +
8147 		    AXQ_SLOT1_PERFCNT_SEL);
8148 		/*
8149 		 * Select l2_io_queue counter by writing L2_IO_Q mux
8150 		 * input to bits 0-6 of perf cntr select reg.
8151 		 */
8152 		reg = axq->pcr_sel_save;
8153 		reg &= ~AXQ_PIC_CLEAR_MASK;
8154 		reg |= L2_IO_Q;
8155 
8156 		stphysio(axq->reg_basepa + AXQ_SLOT1_PERFCNT_SEL, reg);
8157 	}
8158 
8159 	axq->pic_l2_io_q[iter] = ldphysio(axq->reg_basepa + AXQ_SLOT1_PERFCNT0);
8160 
8161 	if (iter == DRMACH_PRE_RESUME) {
8162 		stphysio(axq->reg_basepa + AXQ_SLOT1_PERFCNT_SEL,
8163 		    axq->pcr_sel_save);
8164 	}
8165 
8166 	DRMACH_PR("drmach_s1p_axq_update: axq #%d pic_l2_io_q[%d]=%d\n",
8167 	    ddi_get_instance(axq->dip), iter, axq->pic_l2_io_q[iter]);
8168 }
8169 
8170 static void
8171 drmach_s1p_schizo_update(drmach_s1p_schizo_t *schizo, drmach_sr_iter_t iter)
8172 {
8173 	int	i;
8174 	drmach_s1p_pci_t *pci;
8175 
8176 	if (schizo->csr_basepa == 0x0UL)
8177 		return;
8178 
8179 	schizo->regs[iter].csr =
8180 	    lddphysio(schizo->csr_basepa + SCHIZO_CB_CSR_OFFSET);
8181 	schizo->regs[iter].errctrl =
8182 	    lddphysio(schizo->csr_basepa + SCHIZO_CB_ERRCTRL_OFFSET);
8183 	schizo->regs[iter].errlog =
8184 	    lddphysio(schizo->csr_basepa + SCHIZO_CB_ERRLOG_OFFSET);
8185 
8186 	for (i = 0; i < DRMACH_SCHIZO_PCI_LEAF_MAX; i++) {
8187 		pci = &schizo->pci[i];
8188 		if (pci->dip != NULL && pci->csr_basepa != 0x0UL) {
8189 			pci->regs[iter].slot_intr_state_diag =
8190 			    lddphysio(pci->csr_basepa +
8191 			    COMMON_IB_SLOT_INTR_STATE_DIAG_REG);
8192 
8193 			pci->regs[iter].obio_intr_state_diag =
8194 			    lddphysio(pci->csr_basepa +
8195 			    COMMON_IB_OBIO_INTR_STATE_DIAG_REG);
8196 
8197 			drmach_s1p_intr_map_reg_save(pci, iter);
8198 		}
8199 	}
8200 }
8201 
8202 /*
8203  * Called post-suspend and pre-resume to snapshot the suspend state
8204  * of slot1 AXQs and Schizos.
8205  */
8206 static void
8207 drmach_slot1_pause_update(drmach_slot1_pause_t **slot1_paused,
8208     drmach_sr_iter_t iter)
8209 {
8210 	int	i, j;
8211 	drmach_slot1_pause_t *slot1;
8212 
8213 	for (i = 0; i < STARCAT_BDSET_MAX; i++) {
8214 		if ((slot1 = slot1_paused[i]) == NULL)
8215 			continue;
8216 
8217 		drmach_s1p_axq_update(&slot1->axq, iter);
8218 		for (j = 0; j < STARCAT_SLOT1_IO_MAX; j++)
8219 			drmach_s1p_schizo_update(&slot1->schizo[j], iter);
8220 	}
8221 }
8222 
8223 /*
8224  * Starcat hPCI Schizo devices.
8225  *
8226  * The name field is overloaded. NULL means the slot (interrupt concentrator
8227  * bus) is not used. intr_mask is a bit mask representing the 4 possible
8228  * interrupts per slot, on if valid (rio does not use interrupt lines 0, 1).
8229  */
8230 static struct {
8231 	char	*name;
8232 	uint8_t	intr_mask;
8233 } drmach_schz_slot_intr[][DRMACH_SCHIZO_PCI_LEAF_MAX] = {
8234 	/* Schizo 0 */		/* Schizo 1 */
8235 	{{"C3V0", 0xf},		{"C3V1", 0xf}},		/* slot 0 */
8236 	{{"C5V0", 0xf},		{"C5V1", 0xf}},		/* slot 1 */
8237 	{{"rio", 0xc},		{NULL, 0x0}},		/* slot 2 */
8238 	{{NULL, 0x0},		{NULL, 0x0}},		/* slot 3 */
8239 	{{"sbbc", 0xf},		{NULL, 0x0}},		/* slot 4 */
8240 	{{NULL, 0x0},		{NULL, 0x0}},		/* slot 5 */
8241 	{{NULL, 0x0},		{NULL, 0x0}},		/* slot 6 */
8242 	{{NULL, 0x0},		{NULL, 0x0}}		/* slot 7 */
8243 };
8244 
8245 /*
8246  * See Schizo Specification, Revision 51 (May 23, 2001), Section 22.4.4
8247  * "Interrupt Registers", Table 22-69, page 306.
8248  */
8249 static char *
8250 drmach_schz_internal_ino2str(int ino)
8251 {
8252 	int	intr;
8253 
8254 	ASSERT(ino >= 0x30 && ino <= 0x37);
8255 
8256 	intr = ino & 0x7;
8257 	switch (intr) {
8258 		case (0x0):	return ("Uncorrectable ECC error");
8259 		case (0x1):	return ("Correctable ECC error");
8260 		case (0x2):	return ("PCI Bus A Error");
8261 		case (0x3):	return ("PCI Bus B Error");
8262 		case (0x4):	return ("Safari Bus Error");
8263 		default:	return ("Reserved");
8264 	}
8265 }
8266 
8267 #define	DRMACH_INTR_MASK_SHIFT(ino)	((ino) << 1)
8268 
8269 static void
8270 drmach_s1p_decode_slot_intr(int exp, int unum, drmach_s1p_pci_t *pci,
8271     int ino, drmach_sr_iter_t iter)
8272 {
8273 	uint8_t		intr_mask;
8274 	char		*slot_devname;
8275 	char		namebuf[OBP_MAXDRVNAME];
8276 	int		slot, intr_line, slot_valid, intr_valid;
8277 
8278 	ASSERT(ino >= 0 && ino <= 0x1f);
8279 	ASSERT((pci->regs[iter].slot_intr_state_diag &
8280 	    (COMMON_CLEAR_INTR_REG_MASK << DRMACH_INTR_MASK_SHIFT(ino))) !=
8281 	    COMMON_CLEAR_INTR_REG_IDLE);
8282 
8283 	slot = (ino >> 2) & 0x7;
8284 	intr_line = ino & 0x3;
8285 
8286 	slot_devname = drmach_schz_slot_intr[slot][unum].name;
8287 	slot_valid = (slot_devname == NULL) ? 0 : 1;
8288 	if (!slot_valid) {
8289 		snprintf(namebuf, sizeof (namebuf), "slot %d (INVALID)", slot);
8290 		slot_devname = namebuf;
8291 	}
8292 
8293 	intr_mask = drmach_schz_slot_intr[slot][unum].intr_mask;
8294 	intr_valid = (1 << intr_line) & intr_mask;
8295 
8296 	prom_printf("IO%d/P%d PCI slot interrupt: ino=0x%x, source device=%s, "
8297 	    "interrupt line=%d%s\n", exp, unum, ino, slot_devname, intr_line,
8298 	    (slot_valid && !intr_valid) ? " (INVALID)" : "");
8299 }
8300 
8301 /*
8302  * Log interrupt source device info for all valid, pending interrupts
8303  * on each Schizo PCI leaf. Called if Schizo has logged a Safari bus
8304  * error in the error ctrl reg.
8305  */
8306 static void
8307 drmach_s1p_schizo_log_intr(drmach_s1p_schizo_t *schizo, int exp,
8308     int unum, drmach_sr_iter_t iter)
8309 {
8310 	uint64_t	reg;
8311 	int		i, n, ino;
8312 	drmach_s1p_pci_t *pci;
8313 
8314 	ASSERT(exp >= 0 && exp < STARCAT_BDSET_MAX);
8315 	ASSERT(unum < STARCAT_SLOT1_IO_MAX);
8316 
8317 	/*
8318 	 * Check the saved interrupt mapping registers. If interrupt is valid,
8319 	 * map the ino to the Schizo source device and check that the pci
8320 	 * slot and interrupt line are valid.
8321 	 */
8322 	for (i = 0; i < DRMACH_SCHIZO_PCI_LEAF_MAX; i++) {
8323 		pci = &schizo->pci[i];
8324 		for (n = 0; n < pci->regs[iter].nmap_regs; n++) {
8325 			reg = pci->regs[iter].intr_map_regs[n];
8326 			if (reg & COMMON_INTR_MAP_REG_VALID) {
8327 				ino = reg & COMMON_INTR_MAP_REG_INO;
8328 
8329 				if (ino <= 0x1f) {
8330 					/*
8331 					 * PCI slot interrupt
8332 					 */
8333 					drmach_s1p_decode_slot_intr(exp, unum,
8334 					    pci, ino, iter);
8335 				} else if (ino <= 0x2f) {
8336 					/*
8337 					 * OBIO interrupt
8338 					 */
8339 					prom_printf("IO%d/P%d OBIO interrupt: "
8340 					    "ino=0x%x\n", exp, unum, ino);
8341 				} else if (ino <= 0x37) {
8342 					/*
8343 					 * Internal interrupt
8344 					 */
8345 					prom_printf("IO%d/P%d Internal "
8346 					    "interrupt: ino=0x%x (%s)\n",
8347 					    exp, unum, ino,
8348 					    drmach_schz_internal_ino2str(ino));
8349 				} else {
8350 					/*
8351 					 * NewLink interrupt
8352 					 */
8353 					prom_printf("IO%d/P%d NewLink "
8354 					    "interrupt: ino=0x%x\n", exp,
8355 					    unum, ino);
8356 				}
8357 
8358 				DRMACH_PR("drmach_s1p_schizo_log_intr: "
8359 				    "exp=%d, schizo=%d, pci_leaf=%c, "
8360 				    "ino=0x%x, intr_map_reg=0x%lx\n",
8361 				    exp, unum, (i == 0) ? 'A' : 'B', ino, reg);
8362 			}
8363 		}
8364 	}
8365 }
8366 
8367 /*
8368  * See Schizo Specification, Revision 51 (May 23, 2001), Section 22.2.4
8369  * "Safari Error Control/Log Registers", Table 22-11, page 248.
8370  */
8371 #define	DRMACH_SCHIZO_SAFARI_UNMAPPED_ERR	(0x1ull << 4)
8372 
8373 /*
8374  * Check for possible error indicators prior to resuming the
8375  * AXQ driver, which will de-assert slot1 AXQ_DOMCTRL_PAUSE.
8376  */
8377 static void
8378 drmach_slot1_pause_verify(drmach_slot1_pause_t **slot1_paused,
8379     drmach_sr_iter_t iter)
8380 {
8381 	int	i, j;
8382 	int 	errflag = 0;
8383 	drmach_slot1_pause_t *slot1;
8384 
8385 	/*
8386 	 * Check for logged schizo bus error and pending interrupts.
8387 	 */
8388 	for (i = 0; i < STARCAT_BDSET_MAX; i++) {
8389 		if ((slot1 = slot1_paused[i]) == NULL)
8390 			continue;
8391 
8392 		for (j = 0; j < STARCAT_SLOT1_IO_MAX; j++) {
8393 			if (slot1->schizo[j].csr_basepa == 0x0UL)
8394 				continue;
8395 
8396 			if (slot1->schizo[j].regs[iter].errlog &
8397 			    DRMACH_SCHIZO_SAFARI_UNMAPPED_ERR) {
8398 				if (!errflag) {
8399 					prom_printf("DR WARNING: interrupt "
8400 					    "attempt detected during "
8401 					    "copy-rename (%s):\n",
8402 					    (iter == DRMACH_POST_SUSPEND) ?
8403 					    "post suspend" : "pre resume");
8404 					++errflag;
8405 				}
8406 				drmach_s1p_schizo_log_intr(&slot1->schizo[j],
8407 				    i, j, iter);
8408 			}
8409 		}
8410 	}
8411 
8412 	/*
8413 	 * Check for changes in axq l2_io_q performance counters (2nd pass only)
8414 	 */
8415 	if (iter == DRMACH_PRE_RESUME) {
8416 		for (i = 0; i < STARCAT_BDSET_MAX; i++) {
8417 			if ((slot1 = slot1_paused[i]) == NULL)
8418 				continue;
8419 
8420 			if (slot1->axq.pic_l2_io_q[DRMACH_POST_SUSPEND] !=
8421 			    slot1->axq.pic_l2_io_q[DRMACH_PRE_RESUME]) {
8422 				prom_printf("DR WARNING: IO transactions "
8423 				    "detected on IO%d during copy-rename: "
8424 				    "AXQ l2_io_q performance counter "
8425 				    "start=%d, end=%d\n", i,
8426 				    slot1->axq.pic_l2_io_q[DRMACH_POST_SUSPEND],
8427 				    slot1->axq.pic_l2_io_q[DRMACH_PRE_RESUME]);
8428 			}
8429 		}
8430 	}
8431 }
8432 
8433 struct drmach_sr_list {
8434 	dev_info_t		*dip;
8435 	struct drmach_sr_list	*next;
8436 	struct drmach_sr_list	*prev;
8437 };
8438 
8439 static struct drmach_sr_ordered {
8440 	char			*name;
8441 	struct drmach_sr_list	*ring;
8442 } drmach_sr_ordered[] = {
8443 	{ "iosram",			NULL },
8444 	{ "address-extender-queue",	NULL },
8445 	{ NULL,				NULL }, /* terminator -- required */
8446 };
8447 
8448 static void
8449 drmach_sr_insert(struct drmach_sr_list **lp, dev_info_t *dip)
8450 {
8451 	struct drmach_sr_list *np;
8452 
8453 	DRMACH_PR("drmach_sr_insert: adding dip %p\n", dip);
8454 
8455 	np = (struct drmach_sr_list *)kmem_alloc(
8456 	    sizeof (struct drmach_sr_list), KM_SLEEP);
8457 
8458 	ndi_hold_devi(dip);
8459 	np->dip = dip;
8460 
8461 	if (*lp == NULL) {
8462 		/* establish list */
8463 		*lp = np->next = np->prev = np;
8464 	} else {
8465 		/* place new node behind head node on ring list */
8466 		np->prev = (*lp)->prev;
8467 		np->next = *lp;
8468 		np->prev->next = np;
8469 		np->next->prev = np;
8470 	}
8471 }
8472 
8473 static void
8474 drmach_sr_delete(struct drmach_sr_list **lp, dev_info_t *dip)
8475 {
8476 	DRMACH_PR("drmach_sr_delete: searching for dip %p\n", dip);
8477 
8478 	if (*lp) {
8479 		struct drmach_sr_list *xp;
8480 
8481 		/* start search with mostly likely node */
8482 		xp = (*lp)->prev;
8483 		do {
8484 			if (xp->dip == dip) {
8485 				xp->prev->next = xp->next;
8486 				xp->next->prev = xp->prev;
8487 
8488 				if (xp == *lp)
8489 					*lp = xp->next;
8490 				if (xp == *lp)
8491 					*lp = NULL;
8492 				xp->dip = NULL;
8493 				ndi_rele_devi(dip);
8494 				kmem_free(xp, sizeof (*xp));
8495 
8496 				DRMACH_PR("drmach_sr_delete:"
8497 				    " disposed sr node for dip %p", dip);
8498 				return;
8499 			}
8500 
8501 			DRMACH_PR("drmach_sr_delete: still searching\n");
8502 
8503 			xp = xp->prev;
8504 		} while (xp != (*lp)->prev);
8505 	}
8506 
8507 	/* every dip should be found during resume */
8508 	DRMACH_PR("ERROR: drmach_sr_delete: can't find dip %p", dip);
8509 }
8510 
8511 int
8512 drmach_verify_sr(dev_info_t *dip, int sflag)
8513 {
8514 	int	rv;
8515 	int	len;
8516 	char    name[OBP_MAXDRVNAME];
8517 
8518 	if (drmach_slot1_pause_debug) {
8519 		if (sflag && drmach_slot1_pause_init) {
8520 			drmach_slot1_pause_free(drmach_slot1_paused);
8521 			drmach_slot1_pause_init = 0;
8522 		} else if (!sflag && !drmach_slot1_pause_init) {
8523 			/* schedule init for next suspend */
8524 			drmach_slot1_pause_init = 1;
8525 		}
8526 	}
8527 
8528 	rv = ddi_getproplen(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
8529 	    "name", &len);
8530 	if (rv == DDI_PROP_SUCCESS) {
8531 		int		portid;
8532 		uint64_t	reg;
8533 		struct drmach_sr_ordered *op;
8534 
8535 		rv = ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
8536 		    DDI_PROP_DONTPASS, "name", (caddr_t)name, &len);
8537 
8538 		if (rv != DDI_PROP_SUCCESS)
8539 			return (0);
8540 
8541 		if (drmach_slot1_pause_debug && sflag &&
8542 		    drmach_is_slot1_pause_axq(dip, name, &portid, &reg)) {
8543 			drmach_slot1_pause_add_axq(dip, name, portid, reg,
8544 			    drmach_slot1_paused);
8545 		}
8546 
8547 		for (op = drmach_sr_ordered; op->name; op++) {
8548 			if (strncmp(op->name, name, strlen(op->name)) == 0) {
8549 				if (sflag)
8550 					drmach_sr_insert(&op->ring, dip);
8551 				else
8552 					drmach_sr_delete(&op->ring, dip);
8553 				return (1);
8554 			}
8555 		}
8556 	}
8557 
8558 	return (0);
8559 }
8560 
8561 static void
8562 drmach_sr_dip(dev_info_t *dip, int suspend)
8563 {
8564 	int	 rv;
8565 	major_t	 maj;
8566 	char	*name, *name_addr, *aka;
8567 
8568 	if ((name = ddi_get_name(dip)) == NULL)
8569 		name = "<null name>";
8570 	else if ((maj = ddi_name_to_major(name)) != -1)
8571 		aka = ddi_major_to_name(maj);
8572 	else
8573 		aka = "<unknown>";
8574 
8575 	if ((name_addr = ddi_get_name_addr(dip)) == NULL)
8576 		name_addr = "<null>";
8577 
8578 	prom_printf("\t%s %s@%s (aka %s)\n",
8579 	    suspend ? "suspending" : "resuming",
8580 	    name, name_addr, aka);
8581 
8582 	if (suspend) {
8583 		rv = devi_detach(dip, DDI_SUSPEND);
8584 	} else {
8585 		rv = devi_attach(dip, DDI_RESUME);
8586 	}
8587 
8588 	if (rv != DDI_SUCCESS) {
8589 		prom_printf("\tFAILED to %s %s@%s\n",
8590 		    suspend ? "suspend" : "resume",
8591 		    name, name_addr);
8592 	}
8593 }
8594 
8595 void
8596 drmach_suspend_last()
8597 {
8598 	struct drmach_sr_ordered *op;
8599 
8600 	if (drmach_slot1_pause_debug)
8601 		drmach_slot1_pause_add_io(drmach_slot1_paused);
8602 
8603 	/*
8604 	 * The ordering array declares the strict sequence in which
8605 	 * the named drivers are to suspended. Each element in
8606 	 * the array may have a double-linked ring list of driver
8607 	 * instances (dip) in the order in which they were presented
8608 	 * to drmach_verify_sr. If present, walk the list in the
8609 	 * forward direction to suspend each instance.
8610 	 */
8611 	for (op = drmach_sr_ordered; op->name; op++) {
8612 		if (op->ring) {
8613 			struct drmach_sr_list *rp;
8614 
8615 			rp = op->ring;
8616 			do {
8617 				drmach_sr_dip(rp->dip, 1);
8618 				rp = rp->next;
8619 			} while (rp != op->ring);
8620 		}
8621 	}
8622 
8623 	if (drmach_slot1_pause_debug) {
8624 		drmach_slot1_pause_update(drmach_slot1_paused,
8625 		    DRMACH_POST_SUSPEND);
8626 		drmach_slot1_pause_verify(drmach_slot1_paused,
8627 		    DRMACH_POST_SUSPEND);
8628 	}
8629 }
8630 
8631 void
8632 drmach_resume_first()
8633 {
8634 	struct drmach_sr_ordered *op = drmach_sr_ordered +
8635 	    (sizeof (drmach_sr_ordered) / sizeof (drmach_sr_ordered[0]));
8636 
8637 	if (drmach_slot1_pause_debug) {
8638 		drmach_slot1_pause_update(drmach_slot1_paused,
8639 		    DRMACH_PRE_RESUME);
8640 		drmach_slot1_pause_verify(drmach_slot1_paused,
8641 		    DRMACH_PRE_RESUME);
8642 	}
8643 
8644 	op -= 1;	/* point at terminating element */
8645 
8646 	/*
8647 	 * walk ordering array and rings backwards to resume dips
8648 	 * in reverse order in which they were suspended
8649 	 */
8650 	while (--op >= drmach_sr_ordered) {
8651 		if (op->ring) {
8652 			struct drmach_sr_list *rp;
8653 
8654 			rp = op->ring->prev;
8655 			do {
8656 				drmach_sr_dip(rp->dip, 0);
8657 				rp = rp->prev;
8658 			} while (rp != op->ring->prev);
8659 		}
8660 	}
8661 }
8662 
8663 /*
8664  * Log a DR sysevent.
8665  * Return value: 0 success, non-zero failure.
8666  */
8667 int
8668 drmach_log_sysevent(int board, char *hint, int flag, int verbose)
8669 {
8670 	sysevent_t			*ev;
8671 	sysevent_id_t			eid;
8672 	int				rv, km_flag;
8673 	sysevent_value_t		evnt_val;
8674 	sysevent_attr_list_t		*evnt_attr_list = NULL;
8675 	char				attach_pnt[MAXNAMELEN];
8676 
8677 	km_flag = (flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
8678 	attach_pnt[0] = '\0';
8679 	if (drmach_board_name(board, attach_pnt, MAXNAMELEN)) {
8680 		rv = -1;
8681 		goto logexit;
8682 	}
8683 	if (verbose)
8684 		DRMACH_PR("drmach_log_sysevent: %s %s, flag: %d, verbose: %d\n",
8685 		    attach_pnt, hint, flag, verbose);
8686 
8687 	if ((ev = sysevent_alloc(EC_DR, ESC_DR_AP_STATE_CHANGE,
8688 	    SUNW_KERN_PUB"dr", km_flag)) == NULL) {
8689 		rv = -2;
8690 		goto logexit;
8691 	}
8692 	evnt_val.value_type = SE_DATA_TYPE_STRING;
8693 	evnt_val.value.sv_string = attach_pnt;
8694 	if ((rv = sysevent_add_attr(&evnt_attr_list, DR_AP_ID,
8695 	    &evnt_val, km_flag)) != 0)
8696 		goto logexit;
8697 
8698 	evnt_val.value_type = SE_DATA_TYPE_STRING;
8699 	evnt_val.value.sv_string = hint;
8700 	if ((rv = sysevent_add_attr(&evnt_attr_list, DR_HINT,
8701 	    &evnt_val, km_flag)) != 0) {
8702 		sysevent_free_attr(evnt_attr_list);
8703 		goto logexit;
8704 	}
8705 
8706 	(void) sysevent_attach_attributes(ev, evnt_attr_list);
8707 
8708 	/*
8709 	 * Log the event but do not sleep waiting for its
8710 	 * delivery. This provides insulation from syseventd.
8711 	 */
8712 	rv = log_sysevent(ev, SE_NOSLEEP, &eid);
8713 
8714 logexit:
8715 	if (ev)
8716 		sysevent_free(ev);
8717 	if ((rv != 0) && verbose)
8718 		cmn_err(CE_WARN,
8719 		    "drmach_log_sysevent failed (rv %d) for %s  %s\n",
8720 		    rv, attach_pnt, hint);
8721 
8722 	return (rv);
8723 }
8724 
8725 /*
8726  * Initialize the mem_slice portion of a claim/unconfig/unclaim mailbox message.
8727  * Only the valid entries are modified, so the array should be zeroed out
8728  * initially.
8729  */
8730 static void
8731 drmach_msg_memslice_init(dr_memslice_t slice_arr[]) {
8732 	int	i;
8733 	char	c;
8734 
8735 	ASSERT(mutex_owned(&drmach_slice_table_lock));
8736 
8737 	for (i = 0; i < AXQ_MAX_EXP; i++) {
8738 		c = drmach_slice_table[i];
8739 
8740 		if (c & 0x20) {
8741 			slice_arr[i].valid = 1;
8742 			slice_arr[i].slice = c & 0x1f;
8743 		}
8744 	}
8745 }
8746 
8747 /*
8748  * Initialize the mem_regs portion of a claim/unconfig/unclaim mailbox message.
8749  * Only the valid entries are modified, so the array should be zeroed out
8750  * initially.
8751  */
8752 static void
8753 drmach_msg_memregs_init(dr_memregs_t regs_arr[]) {
8754 	int		rv, exp, mcnum, bank;
8755 	uint64_t	madr;
8756 	drmachid_t	id;
8757 	drmach_board_t	*bp;
8758 	drmach_mem_t	*mp;
8759 	dr_memregs_t	*memregs;
8760 
8761 	/* CONSTCOND */
8762 	ASSERT(DRMACH_MC_NBANKS == (PMBANKS_PER_PORT * LMBANKS_PER_PMBANK));
8763 
8764 	for (exp = 0; exp < 18; exp++) {
8765 		rv = drmach_array_get(drmach_boards,
8766 		    DRMACH_EXPSLOT2BNUM(exp, 0), &id);
8767 		ASSERT(rv == 0);	/* should never be out of bounds */
8768 		if (id == NULL) {
8769 			continue;
8770 		}
8771 
8772 		memregs = &regs_arr[exp];
8773 		bp = (drmach_board_t *)id;
8774 		for (mp = bp->mem; mp != NULL; mp = mp->next) {
8775 			mcnum = mp->dev.portid & 0x3;
8776 			for (bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
8777 				drmach_mem_read_madr(mp, bank, &madr);
8778 				if (madr & DRMACH_MC_VALID_MASK) {
8779 					DRMACH_PR("%d.%d.%d.madr = 0x%lx\n",
8780 					    exp, mcnum, bank, madr);
8781 					memregs->madr[mcnum][bank].hi =
8782 					    DRMACH_U64_TO_MCREGHI(madr);
8783 					memregs->madr[mcnum][bank].lo =
8784 					    DRMACH_U64_TO_MCREGLO(madr);
8785 				}
8786 			}
8787 		}
8788 	}
8789 }
8790 
8791 /*
8792  * Do not allow physical address range modification if either board on this
8793  * expander has processors in NULL LPA mode (CBASE=CBND=NULL).
8794  *
8795  * A side effect of NULL proc LPA mode in Starcat SSM is that local reads will
8796  * install the cache line as owned/dirty as a result of the RTSR transaction.
8797  * See section 5.2.3 of the Safari spec.  All processors will read the bus sync
8798  * list before the rename after flushing local caches.  When copy-rename
8799  * requires changing the physical address ranges (i.e. smaller memory target),
8800  * the bus sync list contains physical addresses that will not exist after the
8801  * rename.  If these cache lines are owned due to a RTSR, a system error can
8802  * occur following the rename when these cache lines are evicted and a writeback
8803  * is attempted.
8804  *
8805  * Incoming parameter represents either the copy-rename source or a candidate
8806  * target memory board.  On Starcat, only slot0 boards may have memory.
8807  */
8808 int
8809 drmach_allow_memrange_modify(drmachid_t s0id)
8810 {
8811 	drmach_board_t	*s0bp, *s1bp;
8812 	drmachid_t	s1id;
8813 	int		rv;
8814 
8815 	s0bp = s0id;
8816 
8817 	ASSERT(DRMACH_IS_BOARD_ID(s0id));
8818 	ASSERT(DRMACH_BNUM2SLOT(s0bp->bnum) == 0);
8819 
8820 	if (s0bp->flags & DRMACH_NULL_PROC_LPA) {
8821 		/*
8822 		 * This is reason enough to fail the request, no need
8823 		 * to check the device list for cpus.
8824 		 */
8825 		return (0);
8826 	}
8827 
8828 	/*
8829 	 * Check for MCPU board on the same expander.
8830 	 *
8831 	 * The board flag DRMACH_NULL_PROC_LPA can be set for all board
8832 	 * types, as it is derived at from the POST gdcd board flag
8833 	 * L1SSFLG_THIS_L1_NULL_PROC_LPA, which can be set (and should be
8834 	 * ignored) for boards with no processors.  Since NULL proc LPA
8835 	 * applies only to processors, we walk the devices array to detect
8836 	 * MCPUs.
8837 	 */
8838 	rv = drmach_array_get(drmach_boards, s0bp->bnum + 1, &s1id);
8839 	s1bp = s1id;
8840 	if (rv == 0 && s1bp != NULL) {
8841 
8842 		ASSERT(DRMACH_IS_BOARD_ID(s1id));
8843 		ASSERT(DRMACH_BNUM2SLOT(s1bp->bnum) == 1);
8844 		ASSERT(DRMACH_BNUM2EXP(s0bp->bnum) ==
8845 		    DRMACH_BNUM2EXP(s1bp->bnum));
8846 
8847 		if ((s1bp->flags & DRMACH_NULL_PROC_LPA) &&
8848 		    s1bp->devices != NULL) {
8849 			int		d_idx;
8850 			drmachid_t	d_id;
8851 
8852 			rv = drmach_array_first(s1bp->devices, &d_idx, &d_id);
8853 			while (rv == 0) {
8854 				if (DRMACH_IS_CPU_ID(d_id)) {
8855 					/*
8856 					 * Fail MCPU in NULL LPA mode.
8857 					 */
8858 					return (0);
8859 				}
8860 
8861 				rv = drmach_array_next(s1bp->devices, &d_idx,
8862 				    &d_id);
8863 			}
8864 		}
8865 	}
8866 
8867 	return (1);
8868 }
8869