xref: /titanic_41/usr/src/uts/sun4u/starcat/io/drmach.c (revision 08278a5e91755ccdb5850c19d21d42fb2e16b50e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/note.h>
28 #include <sys/debug.h>
29 #include <sys/types.h>
30 #include <sys/varargs.h>
31 #include <sys/errno.h>
32 #include <sys/cred.h>
33 #include <sys/dditypes.h>
34 #include <sys/devops.h>
35 #include <sys/modctl.h>
36 #include <sys/poll.h>
37 #include <sys/conf.h>
38 #include <sys/ddi.h>
39 #include <sys/sunddi.h>
40 #include <sys/sunndi.h>
41 #include <sys/ndi_impldefs.h>
42 #include <sys/stat.h>
43 #include <sys/kmem.h>
44 #include <sys/vmem.h>
45 #include <sys/disp.h>
46 #include <sys/processor.h>
47 #include <sys/cheetahregs.h>
48 #include <sys/cpuvar.h>
49 #include <sys/mem_config.h>
50 #include <sys/ddi_impldefs.h>
51 #include <sys/systm.h>
52 #include <sys/machsystm.h>
53 #include <sys/autoconf.h>
54 #include <sys/cmn_err.h>
55 #include <sys/sysmacros.h>
56 #include <sys/x_call.h>
57 #include <sys/promif.h>
58 #include <sys/prom_plat.h>
59 #include <sys/membar.h>
60 #include <vm/seg_kmem.h>
61 #include <sys/mem_cage.h>
62 #include <sys/stack.h>
63 #include <sys/archsystm.h>
64 #include <vm/hat_sfmmu.h>
65 #include <sys/pte.h>
66 #include <sys/mmu.h>
67 #include <sys/cpu_module.h>
68 #include <sys/obpdefs.h>
69 #include <sys/mboxsc.h>
70 #include <sys/plat_ecc_dimm.h>
71 
72 #include <sys/hotplug/hpctrl.h>		/* XXX should be included by schpc.h */
73 #include <sys/schpc.h>
74 #include <sys/pci.h>
75 
76 #include <sys/starcat.h>
77 #include <sys/cpu_sgnblk_defs.h>
78 #include <sys/drmach.h>
79 #include <sys/dr_util.h>
80 #include <sys/dr_mbx.h>
81 #include <sys/sc_gptwocfg.h>
82 #include <sys/iosramreg.h>
83 #include <sys/iosramio.h>
84 #include <sys/iosramvar.h>
85 #include <sys/axq.h>
86 #include <sys/post/scat_dcd.h>
87 #include <sys/kobj.h>
88 #include <sys/taskq.h>
89 #include <sys/cmp.h>
90 #include <sys/sbd_ioctl.h>
91 
92 #include <sys/sysevent.h>
93 #include <sys/sysevent/dr.h>
94 #include <sys/sysevent/eventdefs.h>
95 
96 #include <sys/pci/pcisch.h>
97 #include <sys/pci/pci_regs.h>
98 
99 #include <sys/ontrap.h>
100 
101 /* defined in ../ml/drmach.il.cpp */
102 extern void		bcopy32_il(uint64_t, uint64_t);
103 extern void		flush_ecache_il(int64_t physaddr, int size, int linesz);
104 extern void		flush_dcache_il(void);
105 extern void		flush_icache_il(void);
106 extern void		flush_pcache_il(void);
107 
108 /* defined in ../ml/drmach_asm.s */
109 extern uint64_t		lddmcdecode(uint64_t physaddr);
110 extern uint64_t		lddsafconfig(void);
111 
112 /* XXX here until provided by sys/dman.h */
113 extern int man_dr_attach(dev_info_t *);
114 extern int man_dr_detach(dev_info_t *);
115 
116 #define	DRMACH_BNUM2EXP(bnum)		((bnum) >> 1)
117 #define	DRMACH_BNUM2SLOT(bnum)		((bnum) & 1)
118 #define	DRMACH_EXPSLOT2BNUM(exp, slot)	(((exp) << 1) + (slot))
119 
120 #define	DRMACH_SLICE_MASK		0x1Full
121 #define	DRMACH_SLICE_TO_PA(s)		(((s) & DRMACH_SLICE_MASK) << 37)
122 #define	DRMACH_PA_TO_SLICE(a)		(((a) >> 37) & DRMACH_SLICE_MASK)
123 
124 /*
125  * DRMACH_MEM_SLICE_SIZE and DRMACH_MEM_USABLE_SLICE_SIZE define the
126  * available address space and the usable address space for every slice.
127  * There must be a distinction between the available and usable do to a
128  * restriction imposed by CDC memory size.
129  */
130 
131 #define	DRMACH_MEM_SLICE_SIZE		(1ull << 37)	/* 128GB */
132 #define	DRMACH_MEM_USABLE_SLICE_SIZE	(1ull << 36)	/* 64GB */
133 
134 #define	DRMACH_MC_NBANKS		4
135 
136 #define	DRMACH_MC_ADDR(mp, bank)	((mp)->madr_pa + 16 + 8 * (bank))
137 #define	DRMACH_MC_ASI_ADDR(mp, bank)	(DRMACH_MC_ADDR(mp, bank) & 0xFF)
138 
139 #define	DRMACH_EMU_ACT_STATUS_OFFSET	0x50
140 #define	DRMACH_EMU_ACT_STATUS_ADDR(mp)	\
141 	((mp)->madr_pa + DRMACH_EMU_ACT_STATUS_OFFSET)
142 
143 /*
144  * The Cheetah's Safari Configuration Register and the Schizo's
145  * Safari Control/Status Register place the LPA base and bound fields in
146  * same bit locations with in their register word. This source code takes
147  * advantage of this by defining only one set of LPA encoding/decoding macros
148  * which are shared by various Cheetah and Schizo drmach routines.
149  */
150 #define	DRMACH_LPA_BASE_MASK		(0x3Full	<< 3)
151 #define	DRMACH_LPA_BND_MASK		(0x3Full	<< 9)
152 
153 #define	DRMACH_LPA_BASE_TO_PA(scr)	(((scr) & DRMACH_LPA_BASE_MASK) << 34)
154 #define	DRMACH_LPA_BND_TO_PA(scr)	(((scr) & DRMACH_LPA_BND_MASK) << 28)
155 #define	DRMACH_PA_TO_LPA_BASE(pa)	(((pa) >> 34) & DRMACH_LPA_BASE_MASK)
156 #define	DRMACH_PA_TO_LPA_BND(pa)	(((pa) >> 28) & DRMACH_LPA_BND_MASK)
157 
158 #define	DRMACH_L1_SET_LPA(b)		\
159 	(((b)->flags & DRMACH_NULL_PROC_LPA) == 0)
160 
161 #define	DRMACH_CPU_SRAM_ADDR    	0x7fff0900000ull
162 #define	DRMACH_CPU_SRAM_SIZE    	0x20000ull
163 
164 /*
165  * Name properties for frequently accessed device nodes.
166  */
167 #define	DRMACH_CPU_NAMEPROP		"cpu"
168 #define	DRMACH_CMP_NAMEPROP		"cmp"
169 #define	DRMACH_AXQ_NAMEPROP		"address-extender-queue"
170 #define	DRMACH_PCI_NAMEPROP		"pci"
171 
172 /*
173  * Maximum value of processor Safari Timeout Log (TOL) field of
174  * Safari Config reg (7 secs).
175  */
176 #define	DRMACH_SAF_TOL_MAX		7 * 1000000
177 
178 /*
179  * drmach_board_t flag definitions
180  */
181 #define	DRMACH_NULL_PROC_LPA		0x1
182 
183 typedef struct {
184 	uint32_t	reg_addr_hi;
185 	uint32_t	reg_addr_lo;
186 	uint32_t	reg_size_hi;
187 	uint32_t	reg_size_lo;
188 } drmach_reg_t;
189 
190 typedef struct {
191 	struct drmach_node	*node;
192 	void			*data;
193 } drmach_node_walk_args_t;
194 
195 typedef struct drmach_node {
196 	void		*here;
197 
198 	pnode_t		 (*get_dnode)(struct drmach_node *node);
199 	int		 (*walk)(struct drmach_node *node, void *data,
200 				int (*cb)(drmach_node_walk_args_t *args));
201 	dev_info_t	*(*n_getdip)(struct drmach_node *node);
202 	int		 (*n_getproplen)(struct drmach_node *node, char *name,
203 				int *len);
204 	int		 (*n_getprop)(struct drmach_node *node, char *name,
205 				void *buf, int len);
206 	int		 (*get_parent)(struct drmach_node *node,
207 				struct drmach_node *pnode);
208 } drmach_node_t;
209 
210 typedef struct {
211 	int		 min_index;
212 	int		 max_index;
213 	int		 arr_sz;
214 	drmachid_t	*arr;
215 } drmach_array_t;
216 
217 typedef struct {
218 	void		*isa;
219 
220 	void		 (*dispose)(drmachid_t);
221 	sbd_error_t	*(*release)(drmachid_t);
222 	sbd_error_t	*(*status)(drmachid_t, drmach_status_t *);
223 
224 	char		 name[MAXNAMELEN];
225 } drmach_common_t;
226 
227 struct drmach_board;
228 typedef struct drmach_board drmach_board_t;
229 
230 typedef struct {
231 	drmach_common_t	 cm;
232 	const char	*type;
233 	drmach_board_t	*bp;
234 	drmach_node_t	*node;
235 	int		 portid;
236 	int		 unum;
237 	int		 busy;
238 	int		 powered;
239 } drmach_device_t;
240 
241 typedef struct drmach_cpu {
242 	drmach_device_t	 dev;
243 	uint64_t	 scr_pa;
244 	processorid_t	 cpuid;
245 	int		 coreid;
246 } drmach_cpu_t;
247 
248 typedef struct drmach_mem {
249 	drmach_device_t	 dev;
250 	struct drmach_mem *next;
251 	uint64_t	 nbytes;
252 	uint64_t	 madr_pa;
253 } drmach_mem_t;
254 
255 typedef struct drmach_io {
256 	drmach_device_t	 dev;
257 	uint64_t	 scsr_pa; /* PA of Schizo Control/Status Register */
258 } drmach_io_t;
259 
260 struct drmach_board {
261 	drmach_common_t	 cm;
262 	int		 bnum;
263 	int		 assigned;
264 	int		 powered;
265 	int		 connected;
266 	int		 empty;
267 	int		 cond;
268 	uint_t		 cpu_impl;
269 	uint_t		 flags;
270 	drmach_node_t	*tree;
271 	drmach_array_t	*devices;
272 	drmach_mem_t	*mem;
273 	uint64_t	 stardrb_offset;
274 	char		 type[BD_TYPELEN];
275 };
276 
277 typedef struct {
278 	int		 flags;
279 	drmach_device_t	*dp;
280 	sbd_error_t	*err;
281 	dev_info_t	*fdip;
282 } drmach_config_args_t;
283 
284 typedef struct {
285 	drmach_board_t	*obj;
286 	int		 ndevs;
287 	void		*a;
288 	sbd_error_t	*(*found)(void *a, const char *, int, drmachid_t);
289 	sbd_error_t	*err;
290 } drmach_board_cb_data_t;
291 
292 typedef struct drmach_casmslot {
293 	int	valid;
294 	int	slice;
295 } drmach_casmslot_t;
296 
297 typedef enum {
298 	DRMACH_CR_OK,
299 	DRMACH_CR_MC_IDLE_ERR,
300 	DRMACH_CR_IOPAUSE_ERR,
301 	DRMACH_CR_ONTRAP_ERR
302 } drmach_cr_err_t;
303 
304 typedef struct {
305 	void		*isa;
306 	caddr_t		 data;
307 	drmach_mem_t	*s_mp;
308 	drmach_mem_t	*t_mp;
309 	struct memlist	*c_ml;
310 	uint64_t	 s_copybasepa;
311 	uint64_t	 t_copybasepa;
312 	drmach_cr_err_t	 ecode;
313 	void		*earg;
314 } drmach_copy_rename_t;
315 
316 /*
317  * The following global is read as a boolean value, non-zero is true.
318  * If zero, DR copy-rename and cpu poweron will not set the processor
319  * LPA settings (CBASE, CBND of Safari config register) to correspond
320  * to the current memory slice map. LPAs of processors present at boot
321  * will remain as programmed by POST. LPAs of processors on boards added
322  * by DR will remain NULL, as programmed by POST. This can be used to
323  * to override the per-board L1SSFLG_THIS_L1_NULL_PROC_LPA flag set by
324  * POST in the LDCD (and copied to the GDCD by SMS).
325  *
326  * drmach_reprogram_lpa and L1SSFLG_THIS_L1_NULL_PROC_LPA do not apply
327  * to Schizo device LPAs. These are always set by DR.
328  */
329 static int		 drmach_reprogram_lpa = 1;
330 
331 /*
332  * There is a known HW bug where a Jaguar CPU in Safari port 0 (SBX/P0)
333  * can fail to receive an XIR. To workaround this issue until a hardware
334  * fix is implemented, we will exclude the selection of these CPUs.
335  * Setting this to 0 will allow their selection again.
336  */
337 static int		 drmach_iocage_exclude_jaguar_port_zero = 1;
338 
339 static int		 drmach_initialized;
340 static drmach_array_t	*drmach_boards;
341 
342 static int		 drmach_cpu_delay = 1000;
343 static int		 drmach_cpu_ntries = 50000;
344 
345 static uint32_t		 drmach_slice_table[AXQ_MAX_EXP];
346 static kmutex_t		 drmach_slice_table_lock;
347 
348 tte_t			 drmach_cpu_sram_tte[NCPU];
349 caddr_t			 drmach_cpu_sram_va;
350 
351 /*
352  * Setting to non-zero will enable delay before all disconnect ops.
353  */
354 static int		 drmach_unclaim_delay_all;
355 /*
356  * Default delay is slightly greater than the max processor Safari timeout.
357  * This delay is intended to ensure the outstanding Safari activity has
358  * retired on this board prior to a board disconnect.
359  */
360 static clock_t		 drmach_unclaim_usec_delay = DRMACH_SAF_TOL_MAX + 10;
361 
362 /*
363  * By default, DR of non-Panther procs is not allowed into a Panther
364  * domain with large page sizes enabled.  Setting this to 0 will remove
365  * the restriction.
366  */
367 static int		 drmach_large_page_restriction = 1;
368 
369 /*
370  * Used to pass updated LPA values to procs.
371  * Protocol is to clear the array before use.
372  */
373 volatile uchar_t	*drmach_xt_mb;
374 volatile uint64_t	 drmach_xt_ready;
375 static kmutex_t		 drmach_xt_mb_lock;
376 static int		 drmach_xt_mb_size;
377 
378 uint64_t		 drmach_bus_sync_list[18 * 4 * 4 + 1];
379 static kmutex_t		 drmach_bus_sync_lock;
380 
381 static sbd_error_t	*drmach_device_new(drmach_node_t *,
382 				drmach_board_t *, int, drmachid_t *);
383 static sbd_error_t	*drmach_cpu_new(drmach_device_t *, drmachid_t *);
384 static sbd_error_t	*drmach_mem_new(drmach_device_t *, drmachid_t *);
385 static sbd_error_t	*drmach_pci_new(drmach_device_t *, drmachid_t *);
386 static sbd_error_t	*drmach_io_new(drmach_device_t *, drmachid_t *);
387 
388 static dev_info_t	*drmach_node_ddi_get_dip(drmach_node_t *np);
389 static int		 drmach_node_ddi_get_prop(drmach_node_t *np,
390 				char *name, void *buf, int len);
391 static int		 drmach_node_ddi_get_proplen(drmach_node_t *np,
392 				char *name, int *len);
393 
394 static dev_info_t	*drmach_node_obp_get_dip(drmach_node_t *np);
395 static int		 drmach_node_obp_get_prop(drmach_node_t *np,
396 				char *name, void *buf, int len);
397 static int		 drmach_node_obp_get_proplen(drmach_node_t *np,
398 				char *name, int *len);
399 
400 static sbd_error_t	*drmach_mbox_trans(uint8_t msgtype, int bnum,
401 				caddr_t obufp, int olen,
402 				caddr_t ibufp, int ilen);
403 
404 sbd_error_t		*drmach_io_post_attach(drmachid_t id);
405 sbd_error_t		*drmach_io_post_release(drmachid_t id);
406 
407 static sbd_error_t	*drmach_iocage_setup(dr_testboard_req_t *,
408 				drmach_device_t **dpp, cpu_flag_t *oflags);
409 static int		drmach_iocage_cpu_return(drmach_device_t *dp,
410 				cpu_flag_t oflags);
411 static sbd_error_t	*drmach_iocage_mem_return(dr_testboard_reply_t *tbr);
412 void			drmach_iocage_mem_scrub(uint64_t nbytes);
413 
414 static sbd_error_t 	*drmach_i_status(drmachid_t id, drmach_status_t *stat);
415 
416 static void		drmach_slot1_lpa_set(drmach_board_t *bp);
417 
418 static void		drmach_cpu_read(uint64_t arg1, uint64_t arg2);
419 static int		drmach_cpu_read_scr(drmach_cpu_t *cp, uint64_t *scr);
420 
421 static void		 drmach_bus_sync_list_update(void);
422 static void		 drmach_slice_table_update(drmach_board_t *, int);
423 static int		 drmach_portid2bnum(int);
424 
425 static void		drmach_msg_memslice_init(dr_memslice_t slice_arr[]);
426 static void		drmach_msg_memregs_init(dr_memregs_t regs_arr[]);
427 
428 static int		drmach_panther_boards(void);
429 
430 static int		drmach_name2type_idx(char *);
431 
432 #ifdef DEBUG
433 
434 #define	DRMACH_PR		if (drmach_debug) printf
435 #define	DRMACH_MEMLIST_DUMP	if (drmach_debug) MEMLIST_DUMP
436 int drmach_debug = 0;		 /* set to non-zero to enable debug messages */
437 #else
438 
439 #define	DRMACH_PR		_NOTE(CONSTANTCONDITION) if (0) printf
440 #define	DRMACH_MEMLIST_DUMP	_NOTE(CONSTANTCONDITION) if (0) MEMLIST_DUMP
441 #endif /* DEBUG */
442 
443 #define	DRMACH_OBJ(id)		((drmach_common_t *)id)
444 
445 #define	DRMACH_IS_BOARD_ID(id)	\
446 	((id != 0) &&		\
447 	(DRMACH_OBJ(id)->isa == (void *)drmach_board_new))
448 
449 #define	DRMACH_IS_CPU_ID(id)	\
450 	((id != 0) &&		\
451 	(DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new))
452 
453 #define	DRMACH_IS_MEM_ID(id)	\
454 	((id != 0) &&		\
455 	(DRMACH_OBJ(id)->isa == (void *)drmach_mem_new))
456 
457 #define	DRMACH_IS_IO_ID(id)	\
458 	((id != 0) &&		\
459 	(DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
460 
461 #define	DRMACH_IS_DEVICE_ID(id)					\
462 	((id != 0) &&						\
463 	(DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new ||	\
464 	    DRMACH_OBJ(id)->isa == (void *)drmach_mem_new ||	\
465 	    DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
466 
467 #define	DRMACH_IS_ID(id)					\
468 	((id != 0) &&						\
469 	(DRMACH_OBJ(id)->isa == (void *)drmach_board_new ||	\
470 	    DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new ||	\
471 	    DRMACH_OBJ(id)->isa == (void *)drmach_mem_new ||	\
472 	    DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
473 
474 #define	DRMACH_INTERNAL_ERROR() \
475 	drerr_new(1, ESTC_INTERNAL, drmach_ie_fmt, __LINE__)
476 static char		*drmach_ie_fmt = "drmach.c %d";
477 
478 static struct {
479 	const char	 *name;
480 	const char	 *type;
481 	sbd_error_t	 *(*new)(drmach_device_t *, drmachid_t *);
482 } drmach_name2type[] = {
483 	{"cmp",			    DRMACH_DEVTYPE_CMP,    NULL },
484 	{"cpu",			    DRMACH_DEVTYPE_CPU,    drmach_cpu_new },
485 	{"SUNW,UltraSPARC-III",	    DRMACH_DEVTYPE_CPU,    drmach_cpu_new },
486 	{"SUNW,UltraSPARC-III+",    DRMACH_DEVTYPE_CPU,    drmach_cpu_new },
487 	{"memory-controller",	    DRMACH_DEVTYPE_MEM,    drmach_mem_new },
488 	{"pci",			    DRMACH_DEVTYPE_PCI,    drmach_pci_new },
489 	{"SUNW,wci",		    DRMACH_DEVTYPE_WCI,    drmach_io_new  },
490 };
491 
492 /*
493  * drmach autoconfiguration data structures and interfaces
494  */
495 
496 extern struct mod_ops mod_miscops;
497 
498 static struct modlmisc modlmisc = {
499 	&mod_miscops,
500 	"Sun Fire 15000 DR"
501 };
502 
503 static struct modlinkage modlinkage = {
504 	MODREV_1,
505 	(void *)&modlmisc,
506 	NULL
507 };
508 
509 /*
510  * drmach_boards_rwlock is used to synchronize read/write
511  * access to drmach_boards array between status and board lookup
512  * as READERS, and assign, and unassign threads as WRITERS.
513  */
514 static krwlock_t	drmach_boards_rwlock;
515 
516 static kmutex_t		drmach_i_lock;
517 static kmutex_t		drmach_iocage_lock;
518 static kcondvar_t 	drmach_iocage_cv;
519 static int		drmach_iocage_is_busy = 0;
520 uint64_t		drmach_iocage_paddr;
521 static caddr_t		drmach_iocage_vaddr;
522 static int		drmach_iocage_size = 0;
523 static int		drmach_is_cheetah = -1;
524 
525 int
526 _init(void)
527 {
528 	int	err;
529 
530 	mutex_init(&drmach_i_lock, NULL, MUTEX_DRIVER, NULL);
531 	rw_init(&drmach_boards_rwlock, NULL, RW_DEFAULT, NULL);
532 	drmach_xt_mb_size = NCPU * sizeof (uchar_t);
533 	drmach_xt_mb = (uchar_t *)vmem_alloc(static_alloc_arena,
534 	    drmach_xt_mb_size, VM_SLEEP);
535 	bzero((void *)drmach_xt_mb, drmach_xt_mb_size);
536 	if ((err = mod_install(&modlinkage)) != 0) {
537 		mutex_destroy(&drmach_i_lock);
538 		rw_destroy(&drmach_boards_rwlock);
539 		vmem_free(static_alloc_arena, (void *)drmach_xt_mb,
540 		    drmach_xt_mb_size);
541 	}
542 
543 	return (err);
544 }
545 
546 int
547 _fini(void)
548 {
549 	static void	drmach_fini(void);
550 	int		err;
551 
552 	if ((err = mod_remove(&modlinkage)) == 0)
553 		drmach_fini();
554 
555 	return (err);
556 }
557 
558 int
559 _info(struct modinfo *modinfop)
560 {
561 	return (mod_info(&modlinkage, modinfop));
562 }
563 
564 /*
565  * drmach_node_* routines serve the purpose of separating the
566  * rest of the code from the device tree and OBP.  This is necessary
567  * because of In-Kernel-Probing.  Devices probed after stod, are probed
568  * by the in-kernel-prober, not OBP.  These devices, therefore, do not
569  * have dnode ids.
570  */
571 
572 static int
573 drmach_node_obp_get_parent(drmach_node_t *np, drmach_node_t *pp)
574 {
575 	pnode_t		nodeid;
576 	static char	*fn = "drmach_node_obp_get_parent";
577 
578 	nodeid = np->get_dnode(np);
579 	if (nodeid == OBP_NONODE) {
580 		cmn_err(CE_WARN, "%s: invalid dnode", fn);
581 		return (-1);
582 	}
583 
584 	bcopy(np, pp, sizeof (drmach_node_t));
585 
586 	pp->here = (void *)(uintptr_t)prom_parentnode(nodeid);
587 	if (pp->here == OBP_NONODE) {
588 		cmn_err(CE_WARN, "%s: invalid parent dnode", fn);
589 		return (-1);
590 	}
591 
592 	return (0);
593 }
594 
595 static pnode_t
596 drmach_node_obp_get_dnode(drmach_node_t *np)
597 {
598 	return ((pnode_t)(uintptr_t)np->here);
599 }
600 
601 typedef struct {
602 	drmach_node_walk_args_t	*nwargs;
603 	int 			(*cb)(drmach_node_walk_args_t *args);
604 	int			err;
605 } drmach_node_ddi_walk_args_t;
606 
607 int
608 drmach_node_ddi_walk_cb(dev_info_t *dip, void *arg)
609 {
610 	drmach_node_ddi_walk_args_t	*nargs;
611 
612 	nargs = (drmach_node_ddi_walk_args_t *)arg;
613 
614 	/*
615 	 * dip doesn't have to be held here as we are called
616 	 * from ddi_walk_devs() which holds the dip.
617 	 */
618 	nargs->nwargs->node->here = (void *)dip;
619 
620 	nargs->err = nargs->cb(nargs->nwargs);
621 
622 	/*
623 	 * Set "here" to NULL so that unheld dip is not accessible
624 	 * outside ddi_walk_devs()
625 	 */
626 	nargs->nwargs->node->here = NULL;
627 
628 	if (nargs->err)
629 		return (DDI_WALK_TERMINATE);
630 	else
631 		return (DDI_WALK_CONTINUE);
632 }
633 
634 static int
635 drmach_node_ddi_walk(drmach_node_t *np, void *data,
636 		int (*cb)(drmach_node_walk_args_t *args))
637 {
638 	drmach_node_walk_args_t		args;
639 	drmach_node_ddi_walk_args_t	nargs;
640 
641 	/* initialized args structure for callback */
642 	args.node = np;
643 	args.data = data;
644 
645 	nargs.nwargs = &args;
646 	nargs.cb = cb;
647 	nargs.err = 0;
648 
649 	/*
650 	 * Root node doesn't have to be held in any way.
651 	 */
652 	ddi_walk_devs(ddi_root_node(), drmach_node_ddi_walk_cb, (void *)&nargs);
653 
654 	return (nargs.err);
655 }
656 
657 static int
658 drmach_node_obp_walk(drmach_node_t *np, void *data,
659 		int (*cb)(drmach_node_walk_args_t *args))
660 {
661 	pnode_t			nodeid;
662 	int			rv;
663 	drmach_node_walk_args_t	args;
664 
665 	/* initialized args structure for callback */
666 	args.node = np;
667 	args.data = data;
668 
669 	nodeid = prom_childnode(prom_rootnode());
670 
671 	/* save our new position within the tree */
672 	np->here = (void *)(uintptr_t)nodeid;
673 
674 	rv = 0;
675 	while (nodeid != OBP_NONODE) {
676 
677 		pnode_t child;
678 
679 		rv = (*cb)(&args);
680 		if (rv)
681 			break;
682 
683 		child = prom_childnode(nodeid);
684 		np->here = (void *)(uintptr_t)child;
685 
686 		while (child != OBP_NONODE) {
687 			rv = (*cb)(&args);
688 			if (rv)
689 				break;
690 
691 			child = prom_nextnode(child);
692 			np->here = (void *)(uintptr_t)child;
693 		}
694 
695 		nodeid = prom_nextnode(nodeid);
696 
697 		/* save our new position within the tree */
698 		np->here = (void *)(uintptr_t)nodeid;
699 	}
700 
701 	return (rv);
702 }
703 
704 static int
705 drmach_node_ddi_get_parent(drmach_node_t *np, drmach_node_t *pp)
706 {
707 	dev_info_t	*ndip;
708 	static char	*fn = "drmach_node_ddi_get_parent";
709 
710 	ndip = np->n_getdip(np);
711 	if (ndip == NULL) {
712 		cmn_err(CE_WARN, "%s: NULL dip", fn);
713 		return (-1);
714 	}
715 
716 	bcopy(np, pp, sizeof (drmach_node_t));
717 
718 	pp->here = (void *)ddi_get_parent(ndip);
719 	if (pp->here == NULL) {
720 		cmn_err(CE_WARN, "%s: NULL parent dip", fn);
721 		return (-1);
722 	}
723 
724 	return (0);
725 }
726 
727 /*ARGSUSED*/
728 static pnode_t
729 drmach_node_ddi_get_dnode(drmach_node_t *np)
730 {
731 	return ((pnode_t)NULL);
732 }
733 
734 static drmach_node_t *
735 drmach_node_new(void)
736 {
737 	drmach_node_t *np;
738 
739 	np = kmem_zalloc(sizeof (drmach_node_t), KM_SLEEP);
740 
741 	if (drmach_initialized) {
742 		np->get_dnode = drmach_node_ddi_get_dnode;
743 		np->walk = drmach_node_ddi_walk;
744 		np->n_getdip = drmach_node_ddi_get_dip;
745 		np->n_getproplen = drmach_node_ddi_get_proplen;
746 		np->n_getprop = drmach_node_ddi_get_prop;
747 		np->get_parent = drmach_node_ddi_get_parent;
748 	} else {
749 		np->get_dnode = drmach_node_obp_get_dnode;
750 		np->walk = drmach_node_obp_walk;
751 		np->n_getdip = drmach_node_obp_get_dip;
752 		np->n_getproplen = drmach_node_obp_get_proplen;
753 		np->n_getprop = drmach_node_obp_get_prop;
754 		np->get_parent = drmach_node_obp_get_parent;
755 	}
756 
757 	return (np);
758 }
759 
760 static void
761 drmach_node_dispose(drmach_node_t *np)
762 {
763 	kmem_free(np, sizeof (*np));
764 }
765 
766 /*
767  * Check if a CPU node is part of a CMP.
768  */
769 static int
770 drmach_is_cmp_child(dev_info_t *dip)
771 {
772 	dev_info_t *pdip;
773 
774 	if (strcmp(ddi_node_name(dip), DRMACH_CPU_NAMEPROP) != 0) {
775 		return (0);
776 	}
777 
778 	pdip = ddi_get_parent(dip);
779 
780 	ASSERT(pdip);
781 
782 	if (strcmp(ddi_node_name(pdip), DRMACH_CMP_NAMEPROP) == 0) {
783 		return (1);
784 	}
785 
786 	return (0);
787 }
788 
789 static dev_info_t *
790 drmach_node_obp_get_dip(drmach_node_t *np)
791 {
792 	pnode_t		nodeid;
793 	dev_info_t	*dip;
794 
795 	nodeid = np->get_dnode(np);
796 	if (nodeid == OBP_NONODE)
797 		return (NULL);
798 
799 	dip = e_ddi_nodeid_to_dip(nodeid);
800 	if (dip) {
801 		/*
802 		 * The branch rooted at dip will have been previously
803 		 * held, or it will be the child of a CMP. In either
804 		 * case, the hold acquired in e_ddi_nodeid_to_dip()
805 		 * is not needed.
806 		 */
807 		ddi_release_devi(dip);
808 		ASSERT(drmach_is_cmp_child(dip) || e_ddi_branch_held(dip));
809 	}
810 
811 	return (dip);
812 }
813 
814 static dev_info_t *
815 drmach_node_ddi_get_dip(drmach_node_t *np)
816 {
817 	return ((dev_info_t *)np->here);
818 }
819 
820 static int
821 drmach_node_walk(drmach_node_t *np, void *param,
822 		int (*cb)(drmach_node_walk_args_t *args))
823 {
824 	return (np->walk(np, param, cb));
825 }
826 
827 static int
828 drmach_node_ddi_get_prop(drmach_node_t *np, char *name, void *buf, int len)
829 {
830 	int		rv = 0;
831 	dev_info_t	*ndip;
832 	static char	*fn = "drmach_node_ddi_get_prop";
833 
834 	ndip = np->n_getdip(np);
835 	if (ndip == NULL) {
836 		cmn_err(CE_WARN, "%s: NULL dip", fn);
837 		rv = -1;
838 	} else if (ddi_getlongprop_buf(DDI_DEV_T_ANY, ndip,
839 	    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, name,
840 	    (caddr_t)buf, &len) != DDI_PROP_SUCCESS) {
841 		rv = -1;
842 	}
843 
844 	return (rv);
845 }
846 
847 /* ARGSUSED */
848 static int
849 drmach_node_obp_get_prop(drmach_node_t *np, char *name, void *buf, int len)
850 {
851 	int		rv = 0;
852 	pnode_t		nodeid;
853 	static char	*fn = "drmach_node_obp_get_prop";
854 
855 	nodeid = np->get_dnode(np);
856 	if (nodeid == OBP_NONODE) {
857 		cmn_err(CE_WARN, "%s: invalid dnode", fn);
858 		rv = -1;
859 	} else if (prom_getproplen(nodeid, (caddr_t)name) < 0) {
860 		rv = -1;
861 	} else {
862 		(void) prom_getprop(nodeid, (caddr_t)name, (caddr_t)buf);
863 	}
864 
865 	return (rv);
866 }
867 
868 static int
869 drmach_node_ddi_get_proplen(drmach_node_t *np, char *name, int *len)
870 {
871 	int		rv = 0;
872 	dev_info_t	*ndip;
873 
874 	ndip = np->n_getdip(np);
875 	if (ndip == NULL) {
876 		rv = -1;
877 	} else if (ddi_getproplen(DDI_DEV_T_ANY, ndip, DDI_PROP_DONTPASS,
878 	    name, len) != DDI_PROP_SUCCESS) {
879 		rv = -1;
880 	}
881 
882 	return (rv);
883 }
884 
885 static int
886 drmach_node_obp_get_proplen(drmach_node_t *np, char *name, int *len)
887 {
888 	pnode_t	 nodeid;
889 	int	 rv;
890 
891 	nodeid = np->get_dnode(np);
892 	if (nodeid == OBP_NONODE)
893 		rv = -1;
894 	else {
895 		*len = prom_getproplen(nodeid, (caddr_t)name);
896 		rv = (*len < 0 ? -1 : 0);
897 	}
898 
899 	return (rv);
900 }
901 
902 static drmachid_t
903 drmach_node_dup(drmach_node_t *np)
904 {
905 	drmach_node_t *dup;
906 
907 	dup = drmach_node_new();
908 	dup->here = np->here;
909 	dup->get_dnode = np->get_dnode;
910 	dup->walk = np->walk;
911 	dup->n_getdip = np->n_getdip;
912 	dup->n_getproplen = np->n_getproplen;
913 	dup->n_getprop = np->n_getprop;
914 	dup->get_parent = np->get_parent;
915 
916 	return (dup);
917 }
918 
919 /*
920  * drmach_array provides convenient array construction, access,
921  * bounds checking and array destruction logic.
922  */
923 
924 static drmach_array_t *
925 drmach_array_new(int min_index, int max_index)
926 {
927 	drmach_array_t *arr;
928 
929 	arr = kmem_zalloc(sizeof (drmach_array_t), KM_SLEEP);
930 
931 	arr->arr_sz = (max_index - min_index + 1) * sizeof (void *);
932 	if (arr->arr_sz > 0) {
933 		arr->min_index = min_index;
934 		arr->max_index = max_index;
935 
936 		arr->arr = kmem_zalloc(arr->arr_sz, KM_SLEEP);
937 		return (arr);
938 	} else {
939 		kmem_free(arr, sizeof (*arr));
940 		return (0);
941 	}
942 }
943 
944 static int
945 drmach_array_set(drmach_array_t *arr, int idx, drmachid_t val)
946 {
947 	if (idx < arr->min_index || idx > arr->max_index)
948 		return (-1);
949 	else {
950 		arr->arr[idx - arr->min_index] = val;
951 		return (0);
952 	}
953 	/*NOTREACHED*/
954 }
955 
956 static int
957 drmach_array_get(drmach_array_t *arr, int idx, drmachid_t *val)
958 {
959 	if (idx < arr->min_index || idx > arr->max_index)
960 		return (-1);
961 	else {
962 		*val = arr->arr[idx - arr->min_index];
963 		return (0);
964 	}
965 	/*NOTREACHED*/
966 }
967 
968 static int
969 drmach_array_first(drmach_array_t *arr, int *idx, drmachid_t *val)
970 {
971 	int rv;
972 
973 	*idx = arr->min_index;
974 	while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
975 		*idx += 1;
976 
977 	return (rv);
978 }
979 
980 static int
981 drmach_array_next(drmach_array_t *arr, int *idx, drmachid_t *val)
982 {
983 	int rv;
984 
985 	*idx += 1;
986 	while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
987 		*idx += 1;
988 
989 	return (rv);
990 }
991 
992 static void
993 drmach_array_dispose(drmach_array_t *arr, void (*disposer)(drmachid_t))
994 {
995 	drmachid_t	val;
996 	int		idx;
997 	int		rv;
998 
999 	rv = drmach_array_first(arr, &idx, &val);
1000 	while (rv == 0) {
1001 		(*disposer)(val);
1002 
1003 		/* clear the array entry */
1004 		rv = drmach_array_set(arr, idx, NULL);
1005 		ASSERT(rv == 0);
1006 
1007 		rv = drmach_array_next(arr, &idx, &val);
1008 	}
1009 
1010 	kmem_free(arr->arr, arr->arr_sz);
1011 	kmem_free(arr, sizeof (*arr));
1012 }
1013 
1014 
1015 static gdcd_t *
1016 drmach_gdcd_new()
1017 {
1018 	gdcd_t *gdcd;
1019 
1020 	gdcd = kmem_zalloc(sizeof (gdcd_t), KM_SLEEP);
1021 
1022 	/* read the gdcd, bail if magic or ver #s are not what is expected */
1023 	if (iosram_rd(GDCD_MAGIC, 0, sizeof (gdcd_t), (caddr_t)gdcd)) {
1024 bail:
1025 		kmem_free(gdcd, sizeof (gdcd_t));
1026 		return (NULL);
1027 	} else if (gdcd->h.dcd_magic != GDCD_MAGIC) {
1028 		goto bail;
1029 	} else if (gdcd->h.dcd_version != DCD_VERSION) {
1030 		goto bail;
1031 	}
1032 
1033 	return (gdcd);
1034 }
1035 
1036 static void
1037 drmach_gdcd_dispose(gdcd_t *gdcd)
1038 {
1039 	kmem_free(gdcd, sizeof (gdcd_t));
1040 }
1041 
1042 /*ARGSUSED*/
1043 sbd_error_t *
1044 drmach_configure(drmachid_t id, int flags)
1045 {
1046 	drmach_device_t	*dp;
1047 	dev_info_t	*rdip;
1048 	sbd_error_t	*err = NULL;
1049 
1050 	/*
1051 	 * On Starcat, there is no CPU driver, so it is
1052 	 * not necessary to configure any CPU nodes.
1053 	 */
1054 	if (DRMACH_IS_CPU_ID(id)) {
1055 		return (NULL);
1056 	}
1057 
1058 	for (; id; ) {
1059 		dev_info_t	*fdip = NULL;
1060 
1061 		if (!DRMACH_IS_DEVICE_ID(id))
1062 			return (drerr_new(0, ESTC_INAPPROP, NULL));
1063 		dp = id;
1064 
1065 		rdip = dp->node->n_getdip(dp->node);
1066 
1067 		/*
1068 		 * We held this branch earlier, so at a minimum its
1069 		 * root should still be present in the device tree.
1070 		 */
1071 		ASSERT(rdip);
1072 
1073 		DRMACH_PR("drmach_configure: configuring DDI branch");
1074 
1075 		ASSERT(e_ddi_branch_held(rdip));
1076 		if (e_ddi_branch_configure(rdip, &fdip, 0) != 0) {
1077 			if (err == NULL) {
1078 				/*
1079 				 * Record first failure but don't stop
1080 				 */
1081 				char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1082 				dev_info_t *dip = (fdip != NULL) ? fdip : rdip;
1083 
1084 				(void) ddi_pathname(dip, path);
1085 				err = drerr_new(1, ESTC_DRVFAIL, path);
1086 
1087 				kmem_free(path, MAXPATHLEN);
1088 			}
1089 
1090 			/*
1091 			 * If non-NULL, fdip is returned held and must be
1092 			 * released.
1093 			 */
1094 			if (fdip != NULL) {
1095 				ddi_release_devi(fdip);
1096 			}
1097 		}
1098 
1099 		if (DRMACH_IS_MEM_ID(id)) {
1100 			drmach_mem_t	*mp = id;
1101 			id = mp->next;
1102 		} else {
1103 			id = NULL;
1104 		}
1105 	}
1106 
1107 	return (err);
1108 }
1109 
1110 static sbd_error_t *
1111 drmach_device_new(drmach_node_t *node,
1112 	drmach_board_t *bp, int portid, drmachid_t *idp)
1113 {
1114 	int		i, rv, device_id, unum;
1115 	char		name[OBP_MAXDRVNAME];
1116 	drmach_device_t	proto;
1117 
1118 	rv = node->n_getprop(node, "name", name, OBP_MAXDRVNAME);
1119 	if (rv) {
1120 		sbd_error_t *err;
1121 
1122 		/* every node is expected to have a name */
1123 		err = drerr_new(1, ESTC_GETPROP,
1124 		    "dip: 0x%p: property %s",
1125 		    node->n_getdip(node), OBP_NAME);
1126 
1127 		return (err);
1128 	}
1129 
1130 	i = drmach_name2type_idx(name);
1131 
1132 	if (i < 0 || strcmp(name, "cmp") == 0) {
1133 		/*
1134 		 * Not a node of interest to dr - including "cmp",
1135 		 * but it is in drmach_name2type[], which lets gptwocfg
1136 		 * driver to check if node is OBP created.
1137 		 */
1138 		*idp = (drmachid_t)0;
1139 		return (NULL);
1140 	}
1141 
1142 	/*
1143 	 * Derive a best-guess unit number from the portid value.
1144 	 * Some drmach_*_new constructors (drmach_pci_new, for example)
1145 	 * will overwrite the prototype unum value with one that is more
1146 	 * appropriate for the device.
1147 	 */
1148 	device_id = portid & 0x1f;
1149 	if (device_id < 4)
1150 		unum = device_id;
1151 	else if (device_id == 8) {
1152 		unum = 0;
1153 	} else if (device_id == 9) {
1154 		unum = 1;
1155 	} else if (device_id == 0x1c) {
1156 		unum = 0;
1157 	} else if (device_id == 0x1d) {
1158 		unum = 1;
1159 	} else {
1160 		return (DRMACH_INTERNAL_ERROR());
1161 	}
1162 
1163 	bzero(&proto, sizeof (proto));
1164 	proto.type = drmach_name2type[i].type;
1165 	proto.bp = bp;
1166 	proto.node = node;
1167 	proto.portid = portid;
1168 	proto.unum = unum;
1169 
1170 	return (drmach_name2type[i].new(&proto, idp));
1171 }
1172 
1173 static void
1174 drmach_device_dispose(drmachid_t id)
1175 {
1176 	drmach_device_t *self = id;
1177 
1178 	self->cm.dispose(id);
1179 }
1180 
1181 static drmach_board_t *
1182 drmach_board_new(int bnum)
1183 {
1184 	static sbd_error_t *drmach_board_release(drmachid_t);
1185 	static sbd_error_t *drmach_board_status(drmachid_t, drmach_status_t *);
1186 
1187 	drmach_board_t	*bp;
1188 
1189 	bp = kmem_zalloc(sizeof (drmach_board_t), KM_SLEEP);
1190 
1191 	bp->cm.isa = (void *)drmach_board_new;
1192 	bp->cm.release = drmach_board_release;
1193 	bp->cm.status = drmach_board_status;
1194 
1195 	(void) drmach_board_name(bnum, bp->cm.name, sizeof (bp->cm.name));
1196 
1197 	bp->bnum = bnum;
1198 	bp->devices = NULL;
1199 	bp->tree = drmach_node_new();
1200 
1201 	(void) drmach_array_set(drmach_boards, bnum, bp);
1202 	return (bp);
1203 }
1204 
1205 static void
1206 drmach_board_dispose(drmachid_t id)
1207 {
1208 	drmach_board_t *bp;
1209 
1210 	ASSERT(DRMACH_IS_BOARD_ID(id));
1211 	bp = id;
1212 
1213 	if (bp->tree)
1214 		drmach_node_dispose(bp->tree);
1215 
1216 	if (bp->devices)
1217 		drmach_array_dispose(bp->devices, drmach_device_dispose);
1218 
1219 	kmem_free(bp, sizeof (*bp));
1220 }
1221 
1222 static sbd_error_t *
1223 drmach_board_status(drmachid_t id, drmach_status_t *stat)
1224 {
1225 	sbd_error_t	*err = NULL;
1226 	drmach_board_t	*bp;
1227 	caddr_t		obufp;
1228 	dr_showboard_t	shb;
1229 
1230 	if (!DRMACH_IS_BOARD_ID(id))
1231 		return (drerr_new(0, ESTC_INAPPROP, NULL));
1232 
1233 	bp = id;
1234 
1235 	/*
1236 	 * we need to know if the board's connected before
1237 	 * issuing a showboard message.  If it's connected, we just
1238 	 * reply with status composed of cached info
1239 	 */
1240 
1241 	if (!bp->connected) {
1242 		obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
1243 		err = drmach_mbox_trans(DRMSG_SHOWBOARD, bp->bnum, obufp,
1244 		    sizeof (dr_proto_hdr_t), (caddr_t)&shb,
1245 		    sizeof (dr_showboard_t));
1246 
1247 		kmem_free(obufp, sizeof (dr_proto_hdr_t));
1248 		if (err)
1249 			return (err);
1250 
1251 		bp->connected = (shb.bd_assigned && shb.bd_active);
1252 		(void) strncpy(bp->type, shb.board_type, sizeof (bp->type));
1253 		stat->assigned = bp->assigned = shb.bd_assigned;
1254 		stat->powered = bp->powered = shb.power_on;
1255 		stat->empty = bp->empty = shb.slot_empty;
1256 
1257 		switch (shb.test_status) {
1258 			case DR_TEST_STATUS_UNKNOWN:
1259 			case DR_TEST_STATUS_IPOST:
1260 			case DR_TEST_STATUS_ABORTED:
1261 				stat->cond = bp->cond = SBD_COND_UNKNOWN;
1262 				break;
1263 			case DR_TEST_STATUS_PASSED:
1264 				stat->cond = bp->cond = SBD_COND_OK;
1265 				break;
1266 			case DR_TEST_STATUS_FAILED:
1267 				stat->cond = bp->cond = SBD_COND_FAILED;
1268 				break;
1269 			default:
1270 				stat->cond = bp->cond = SBD_COND_UNKNOWN;
1271 				DRMACH_PR("Unknown test status=0x%x from SC\n",
1272 				    shb.test_status);
1273 				break;
1274 
1275 		}
1276 
1277 		(void) strncpy(stat->type, shb.board_type, sizeof (stat->type));
1278 		(void) snprintf(stat->info, sizeof (stat->info),
1279 		    "Test Level=%d", shb.test_level);
1280 	} else {
1281 		stat->assigned = bp->assigned;
1282 		stat->powered = bp->powered;
1283 		stat->empty = bp->empty;
1284 		stat->cond = bp->cond;
1285 		(void) strncpy(stat->type, bp->type, sizeof (stat->type));
1286 	}
1287 
1288 	stat->busy = 0;			/* assume not busy */
1289 	stat->configured = 0;		/* assume not configured */
1290 	if (bp->devices) {
1291 		int		 rv;
1292 		int		 d_idx;
1293 		drmachid_t	 d_id;
1294 
1295 		rv = drmach_array_first(bp->devices, &d_idx, &d_id);
1296 		while (rv == 0) {
1297 			drmach_status_t	d_stat;
1298 
1299 			err = drmach_i_status(d_id, &d_stat);
1300 			if (err)
1301 				break;
1302 
1303 			stat->busy |= d_stat.busy;
1304 			stat->configured |= d_stat.configured;
1305 
1306 			rv = drmach_array_next(bp->devices, &d_idx, &d_id);
1307 		}
1308 	}
1309 
1310 	return (err);
1311 }
1312 
1313 typedef struct drmach_msglist {
1314 	kcondvar_t		s_cv; 		/* condvar for sending msg */
1315 	kmutex_t		s_lock;		/* mutex for sending */
1316 	kcondvar_t		g_cv;		/* condvar for getting reply */
1317 	kmutex_t		g_lock;		/* mutex for getting reply */
1318 	struct drmach_msglist	*prev;		/* link to previous entry */
1319 	struct drmach_msglist	*next;		/* link to next entry */
1320 	struct drmach_msglist	*link;		/* link to related entry */
1321 	caddr_t			o_buf;		/* address of output buffer */
1322 	caddr_t			i_buf; 		/* address of input buffer */
1323 	uint32_t		o_buflen;	/* output buffer length */
1324 	uint32_t		i_buflen;	/* input buffer length */
1325 	uint32_t		msgid;		/* message identifier */
1326 	int			o_nretry;	/* number of sending retries */
1327 	int			f_error;	/* mailbox framework error */
1328 	uint8_t			e_code;		/* error code returned by SC */
1329 	uint8_t			p_flag	:1,	/* successfully putmsg */
1330 				m_reply	:1,	/* msg reply received */
1331 				unused	:6;
1332 } drmach_msglist_t;
1333 
1334 kmutex_t		drmach_g_mbox_mutex;	/* mutex for mailbox globals */
1335 kmutex_t		drmach_ri_mbox_mutex;	/* mutex for mailbox reinit */
1336 kmutex_t		drmach_msglist_mutex;	/* mutex for message list */
1337 drmach_msglist_t	*drmach_msglist_first;	/* first entry in msg list */
1338 drmach_msglist_t	*drmach_msglist_last;	/* last entry in msg list */
1339 uint32_t		drmach_msgid;		/* current message id */
1340 kthread_t		*drmach_getmsg_thread;	/* ptr to getmsg thread */
1341 volatile int		drmach_getmsg_thread_run; /* run flag for getmsg thr */
1342 kmutex_t		drmach_sendmsg_mutex;	/* mutex for sendmsg cv */
1343 kcondvar_t		drmach_sendmsg_cv;	/* signaled to send new msg */
1344 kthread_t		*drmach_sendmsg_thread; /* ptr to sendmsg thread */
1345 volatile int		drmach_sendmsg_thread_run; /* run flag for sendmsg */
1346 int			drmach_mbox_istate;	/* mailbox init state */
1347 int			drmach_mbox_iflag;	/* set if init'd with SC */
1348 int			drmach_mbox_ipending;	/* set if reinit scheduled */
1349 
1350 /*
1351  * Timeout values (in seconds) used when waiting for replies (from the SC) to
1352  * requests that we sent.  Since we only receive boardevent messages, and they
1353  * are events rather than replies, there is no boardevent timeout.
1354  */
1355 int	drmach_to_mbxinit	= 60;		/* 1 minute */
1356 int	drmach_to_assign	= 60;		/* 1 minute */
1357 int	drmach_to_unassign	= 60;		/* 1 minute */
1358 int	drmach_to_claim		= 3600;		/* 1 hour */
1359 int	drmach_to_unclaim	= 3600;		/* 1 hour */
1360 int	drmach_to_poweron	= 480;		/* 8 minutes */
1361 int	drmach_to_poweroff	= 480;		/* 8 minutes */
1362 int	drmach_to_testboard	= 43200;	/* 12 hours */
1363 int	drmach_to_aborttest	= 180;		/* 3 minutes */
1364 int	drmach_to_showboard	= 180;		/* 3 minutes */
1365 int	drmach_to_unconfig	= 180;		/* 3 minutes */
1366 
1367 /*
1368  * Delay (in seconds) used after receiving a non-transient error indication from
1369  * an mboxsc_getmsg call in the thread that loops waiting for incoming messages.
1370  */
1371 int	drmach_mbxerr_delay	= 15;		/* 15 seconds */
1372 
1373 /*
1374  * Timeout values (in milliseconds) for mboxsc_putmsg and mboxsc_getmsg calls.
1375  */
1376 clock_t	drmach_to_putmsg;			/* set in drmach_mbox_init */
1377 clock_t	drmach_to_getmsg	= 31000;	/* 31 seconds */
1378 
1379 /*
1380  * Normally, drmach_to_putmsg is set dynamically during initialization in
1381  * drmach_mbox_init.  This has the potentially undesirable side effect of
1382  * clobbering any value that might have been set in /etc/system.  To prevent
1383  * dynamic setting of drmach_to_putmsg (thereby allowing it to be tuned in
1384  * /etc/system), set drmach_use_tuned_putmsg_to to 1.
1385  */
1386 int	drmach_use_tuned_putmsg_to	= 0;
1387 
1388 
1389 /* maximum conceivable message size for future mailbox protocol versions */
1390 #define	DRMACH_MAX_MBOX_MSG_SIZE	4096
1391 
1392 /*ARGSUSED*/
1393 void
1394 drmach_mbox_prmsg(dr_mbox_msg_t *mbp, int dir)
1395 {
1396 	int		i, j;
1397 	dr_memregs_t	*memregs;
1398 	dr_proto_hdr_t	*php = &mbp->p_hdr;
1399 	dr_msg_t	*mp = &mbp->msgdata;
1400 
1401 #ifdef DEBUG
1402 	switch (php->command) {
1403 		case DRMSG_BOARDEVENT:
1404 			if (dir) {
1405 				DRMACH_PR("ERROR!! outgoing BOARDEVENT\n");
1406 			} else {
1407 				DRMACH_PR("BOARDEVENT received:\n");
1408 				DRMACH_PR("init=%d ins=%d rem=%d asgn=%d\n",
1409 				    mp->dm_be.initialized,
1410 				    mp->dm_be.board_insertion,
1411 				    mp->dm_be.board_removal,
1412 				    mp->dm_be.slot_assign);
1413 				DRMACH_PR("unasgn=%d avail=%d unavail=%d\n",
1414 				    mp->dm_be.slot_unassign,
1415 				    mp->dm_be.slot_avail,
1416 				    mp->dm_be.slot_unavail);
1417 			}
1418 			break;
1419 		case DRMSG_MBOX_INIT:
1420 			if (dir) {
1421 				DRMACH_PR("MBOX_INIT Request:\n");
1422 			} else {
1423 				DRMACH_PR("MBOX_INIT Reply:\n");
1424 			}
1425 			break;
1426 		case DRMSG_ASSIGN:
1427 			if (dir) {
1428 				DRMACH_PR("ASSIGN Request:\n");
1429 			} else {
1430 				DRMACH_PR("ASSIGN Reply:\n");
1431 			}
1432 			break;
1433 		case DRMSG_UNASSIGN:
1434 			if (dir) {
1435 				DRMACH_PR("UNASSIGN Request:\n");
1436 			} else {
1437 				DRMACH_PR("UNASSIGN Reply:\n");
1438 			}
1439 			break;
1440 		case DRMSG_CLAIM:
1441 			if (!dir) {
1442 				DRMACH_PR("CLAIM Reply:\n");
1443 				break;
1444 			}
1445 
1446 			DRMACH_PR("CLAIM Request:\n");
1447 			for (i = 0; i < 18; ++i) {
1448 				DRMACH_PR("exp%d: val=%d slice=0x%x\n", i,
1449 				    mp->dm_cr.mem_slice[i].valid,
1450 				    mp->dm_cr.mem_slice[i].slice);
1451 				memregs = &(mp->dm_cr.mem_regs[i]);
1452 				for (j = 0; j < S0_LPORT_COUNT; j++) {
1453 					DRMACH_PR("  MC %2d: "
1454 					    "MADR[%d] = 0x%lx, "
1455 					    "MADR[%d] = 0x%lx\n", j,
1456 					    0, DRMACH_MCREG_TO_U64(
1457 					    memregs->madr[j][0]),
1458 					    1, DRMACH_MCREG_TO_U64(
1459 					    memregs->madr[j][1]));
1460 					DRMACH_PR("       : "
1461 					    "MADR[%d] = 0x%lx, "
1462 					    "MADR[%d] = 0x%lx\n",
1463 					    2, DRMACH_MCREG_TO_U64(
1464 					    memregs->madr[j][2]),
1465 					    3, DRMACH_MCREG_TO_U64(
1466 					    memregs->madr[j][3]));
1467 				}
1468 			}
1469 			break;
1470 		case DRMSG_UNCLAIM:
1471 			if (!dir) {
1472 				DRMACH_PR("UNCLAIM Reply:\n");
1473 				break;
1474 			}
1475 
1476 			DRMACH_PR("UNCLAIM Request:\n");
1477 			for (i = 0; i < 18; ++i) {
1478 				DRMACH_PR("exp%d: val=%d slice=0x%x\n", i,
1479 				    mp->dm_ur.mem_slice[i].valid,
1480 				    mp->dm_ur.mem_slice[i].slice);
1481 				memregs = &(mp->dm_ur.mem_regs[i]);
1482 				for (j = 0; j < S0_LPORT_COUNT; j++) {
1483 					DRMACH_PR("  MC %2d: "
1484 					    "MADR[%d] = 0x%lx, "
1485 					    "MADR[%d] = 0x%lx\n", j,
1486 					    0, DRMACH_MCREG_TO_U64(
1487 					    memregs->madr[j][0]),
1488 					    1, DRMACH_MCREG_TO_U64(
1489 					    memregs->madr[j][1]));
1490 					DRMACH_PR("       : "
1491 					    "MADR[%d] = 0x%lx, "
1492 					    "MADR[%d] = 0x%lx\n",
1493 					    2, DRMACH_MCREG_TO_U64(
1494 					    memregs->madr[j][2]),
1495 					    3, DRMACH_MCREG_TO_U64(
1496 					    memregs->madr[j][3]));
1497 				}
1498 			}
1499 			DRMACH_PR(" mem_clear=%d\n", mp->dm_ur.mem_clear);
1500 			break;
1501 		case DRMSG_UNCONFIG:
1502 			if (!dir) {
1503 				DRMACH_PR("UNCONFIG Reply:\n");
1504 				break;
1505 			}
1506 
1507 			DRMACH_PR("UNCONFIG Request:\n");
1508 			for (i = 0; i < 18; ++i) {
1509 				DRMACH_PR("exp%d: val=%d slice=0x%x\n", i,
1510 				    mp->dm_uc.mem_slice[i].valid,
1511 				    mp->dm_uc.mem_slice[i].slice);
1512 				memregs = &(mp->dm_uc.mem_regs[i]);
1513 				for (j = 0; j < S0_LPORT_COUNT; j++) {
1514 					DRMACH_PR("  MC %2d: "
1515 					    "MADR[%d] = 0x%lx, "
1516 					    "MADR[%d] = 0x%lx\n", j,
1517 					    0, DRMACH_MCREG_TO_U64(
1518 					    memregs->madr[j][0]),
1519 					    1, DRMACH_MCREG_TO_U64(
1520 					    memregs->madr[j][1]));
1521 					DRMACH_PR("       : "
1522 					    "MADR[%d] = 0x%lx, "
1523 					    "MADR[%d] = 0x%lx\n",
1524 					    2, DRMACH_MCREG_TO_U64(
1525 					    memregs->madr[j][2]),
1526 					    3, DRMACH_MCREG_TO_U64(
1527 					    memregs->madr[j][3]));
1528 				}
1529 			}
1530 			break;
1531 		case DRMSG_POWERON:
1532 			if (dir) {
1533 				DRMACH_PR("POWERON Request:\n");
1534 			} else {
1535 				DRMACH_PR("POWERON Reply:\n");
1536 			}
1537 			break;
1538 		case DRMSG_POWEROFF:
1539 			if (dir) {
1540 				DRMACH_PR("POWEROFF Request:\n");
1541 			} else {
1542 				DRMACH_PR("POWEROFF Reply:\n");
1543 			}
1544 			break;
1545 		case DRMSG_TESTBOARD:
1546 			if (dir) {
1547 				DRMACH_PR("TESTBOARD Request:\n");
1548 				DRMACH_PR("\tmemaddrhi=0x%x memaddrlo=0x%x ",
1549 				    mp->dm_tb.memaddrhi,
1550 				    mp->dm_tb.memaddrlo);
1551 				DRMACH_PR("memlen=0x%x cpu_portid=0x%x\n",
1552 				    mp->dm_tb.memlen, mp->dm_tb.cpu_portid);
1553 				DRMACH_PR("\tforce=0x%x imm=0x%x\n",
1554 				    mp->dm_tb.force, mp->dm_tb.immediate);
1555 			} else {
1556 				DRMACH_PR("TESTBOARD Reply:\n");
1557 				DRMACH_PR("\tmemaddrhi=0x%x memaddrlo=0x%x ",
1558 				    mp->dm_tr.memaddrhi,
1559 				    mp->dm_tr.memaddrlo);
1560 				DRMACH_PR("memlen=0x%x cpu_portid=0x%x\n",
1561 				    mp->dm_tr.memlen, mp->dm_tr.cpu_portid);
1562 				DRMACH_PR("\trecovered=0x%x test status=0x%x\n",
1563 				    mp->dm_tr.cpu_recovered,
1564 				    mp->dm_tr.test_status);
1565 
1566 			}
1567 			break;
1568 		case DRMSG_ABORT_TEST:
1569 			if (dir) {
1570 				DRMACH_PR("ABORT_TEST Request:\n");
1571 			} else {
1572 				DRMACH_PR("ABORT_TEST Reply:\n");
1573 			}
1574 
1575 			DRMACH_PR("\tmemaddrhi=0x%x memaddrlo=0x%x ",
1576 			    mp->dm_ta.memaddrhi,
1577 			    mp->dm_ta.memaddrlo);
1578 			DRMACH_PR("memlen=0x%x cpu_portid=0x%x\n",
1579 			    mp->dm_ta.memlen, mp->dm_ta.cpu_portid);
1580 			break;
1581 		case DRMSG_SHOWBOARD:
1582 			if (dir) {
1583 				DRMACH_PR("SHOWBOARD Request:\n");
1584 			} else {
1585 				DRMACH_PR("SHOWBOARD Reply:\n");
1586 
1587 				DRMACH_PR(": empty=%d power=%d assigned=%d",
1588 				    mp->dm_sb.slot_empty,
1589 				    mp->dm_sb.power_on,
1590 				    mp->dm_sb.bd_assigned);
1591 				DRMACH_PR(": active=%d t_status=%d t_level=%d ",
1592 				    mp->dm_sb.bd_active,
1593 				    mp->dm_sb.test_status,
1594 				    mp->dm_sb.test_level);
1595 				DRMACH_PR(": type=%s ", mp->dm_sb.board_type);
1596 			}
1597 			break;
1598 		default:
1599 			DRMACH_PR("Unknown message type\n");
1600 			break;
1601 	}
1602 
1603 	DRMACH_PR("dr hdr:\n\tid=0x%x vers=0x%x cmd=0x%x exp=0x%x slot=0x%x\n",
1604 	    php->message_id, php->drproto_version, php->command,
1605 	    php->expbrd, php->slot);
1606 #endif
1607 	DRMACH_PR("\treply_status=0x%x error_code=0x%x\n", php->reply_status,
1608 	    php->error_code);
1609 }
1610 
1611 /*
1612  * Callback function passed to taskq_dispatch when a mailbox reinitialization
1613  * handshake needs to be scheduled.  The handshake can't be performed by the
1614  * thread that determines it is needed, in most cases, so this function is
1615  * dispatched on the system-wide taskq pool of threads.  Failure is reported but
1616  * otherwise ignored, since any situation that requires a mailbox initialization
1617  * handshake will continue to request the handshake until it succeeds.
1618  */
1619 static void
1620 drmach_mbox_reinit(void *unused)
1621 {
1622 	_NOTE(ARGUNUSED(unused))
1623 
1624 	caddr_t		obufp = NULL;
1625 	sbd_error_t	*serr = NULL;
1626 
1627 	DRMACH_PR("scheduled mailbox reinit running\n");
1628 
1629 	mutex_enter(&drmach_ri_mbox_mutex);
1630 	mutex_enter(&drmach_g_mbox_mutex);
1631 	if (drmach_mbox_iflag == 0) {
1632 		/* need to initialize the mailbox */
1633 		mutex_exit(&drmach_g_mbox_mutex);
1634 
1635 		cmn_err(CE_NOTE, "!reinitializing DR mailbox");
1636 		obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
1637 		serr = drmach_mbox_trans(DRMSG_MBOX_INIT, 0, obufp,
1638 		    sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
1639 		kmem_free(obufp, sizeof (dr_proto_hdr_t));
1640 
1641 		if (serr) {
1642 			cmn_err(CE_WARN,
1643 			    "mbox_init: MBOX_INIT failed ecode=0x%x",
1644 			    serr->e_code);
1645 			sbd_err_clear(&serr);
1646 		}
1647 		mutex_enter(&drmach_g_mbox_mutex);
1648 		if (!serr) {
1649 			drmach_mbox_iflag = 1;
1650 		}
1651 	}
1652 	drmach_mbox_ipending = 0;
1653 	mutex_exit(&drmach_g_mbox_mutex);
1654 	mutex_exit(&drmach_ri_mbox_mutex);
1655 }
1656 
1657 /*
1658  * To ensure sufficient compatibility with future versions of the DR mailbox
1659  * protocol, we use a buffer that is large enough to receive the largest message
1660  * that could possibly be sent to us.  However, since that ends up being fairly
1661  * large, allocating it on the stack is a bad idea.  Fortunately, this function
1662  * does not need to be MT-safe since it is only invoked by the mailbox
1663  * framework, which will never invoke it multiple times concurrently.  Since
1664  * that is the case, we can use a static buffer.
1665  */
1666 void
1667 drmach_mbox_event(void)
1668 {
1669 	static uint8_t	buf[DRMACH_MAX_MBOX_MSG_SIZE];
1670 	dr_mbox_msg_t	*msg = (dr_mbox_msg_t *)buf;
1671 	int		err;
1672 	uint32_t	type = MBOXSC_MSG_EVENT;
1673 	uint32_t	command = DRMSG_BOARDEVENT;
1674 	uint64_t	transid = 0;
1675 	uint32_t	length = DRMACH_MAX_MBOX_MSG_SIZE;
1676 	char		*hint = "";
1677 	int		logsys = 0;
1678 
1679 	do {
1680 		err = mboxsc_getmsg(KEY_SCDR, &type, &command, &transid,
1681 		    &length, (void *)msg, 0);
1682 	} while (err == EAGAIN);
1683 
1684 	/* don't try to interpret anything with the wrong version number */
1685 	if ((err == 0) && (msg->p_hdr.drproto_version != DRMBX_VERSION)) {
1686 		cmn_err(CE_WARN, "mailbox version mismatch 0x%x vs 0x%x",
1687 		    msg->p_hdr.drproto_version, DRMBX_VERSION);
1688 		mutex_enter(&drmach_g_mbox_mutex);
1689 		drmach_mbox_iflag = 0;
1690 		/* schedule a reinit handshake if one isn't pending */
1691 		if (!drmach_mbox_ipending) {
1692 			if (taskq_dispatch(system_taskq, drmach_mbox_reinit,
1693 			    NULL, TQ_NOSLEEP) != NULL) {
1694 				drmach_mbox_ipending = 1;
1695 			} else {
1696 				cmn_err(CE_WARN,
1697 				    "failed to schedule mailbox reinit");
1698 			}
1699 		}
1700 		mutex_exit(&drmach_g_mbox_mutex);
1701 		return;
1702 	}
1703 
1704 	if ((err != 0) || (msg->p_hdr.reply_status != DRMSG_REPLY_OK)) {
1705 		cmn_err(CE_WARN,
1706 		    "Unsolicited mboxsc_getmsg failed: err=0x%x code=0x%x",
1707 		    err, msg->p_hdr.error_code);
1708 	} else {
1709 		dr_boardevent_t	*be;
1710 		be = (dr_boardevent_t *)&msg->msgdata;
1711 
1712 		/* check for initialization event */
1713 		if (be->initialized) {
1714 			mutex_enter(&drmach_g_mbox_mutex);
1715 			drmach_mbox_iflag = 0;
1716 			/* schedule a reinit handshake if one isn't pending */
1717 			if (!drmach_mbox_ipending) {
1718 				if (taskq_dispatch(system_taskq,
1719 				    drmach_mbox_reinit, NULL, TQ_NOSLEEP)
1720 				    != NULL) {
1721 					drmach_mbox_ipending = 1;
1722 				} else {
1723 					cmn_err(CE_WARN, "failed to schedule "
1724 					    "mailbox reinit");
1725 				}
1726 			}
1727 			mutex_exit(&drmach_g_mbox_mutex);
1728 			cmn_err(CE_NOTE, "!Mailbox Init event received");
1729 		}
1730 
1731 		/* anything else will be a log_sysevent call */
1732 
1733 		if (be->board_insertion) {
1734 			DRMACH_PR("Board Insertion event received");
1735 			hint = DR_HINT_INSERT;
1736 			logsys++;
1737 	}
1738 		if (be->board_removal) {
1739 			DRMACH_PR("Board Removal event received");
1740 			hint = DR_HINT_REMOVE;
1741 			logsys++;
1742 		}
1743 		if (be->slot_assign) {
1744 			DRMACH_PR("Slot Assign event received");
1745 			logsys++;
1746 		}
1747 		if (be->slot_unassign) {
1748 			DRMACH_PR("Slot Unassign event received");
1749 			logsys++;
1750 		}
1751 		if (be->slot_avail) {
1752 			DRMACH_PR("Slot Available event received");
1753 			logsys++;
1754 		}
1755 		if (be->slot_unavail) {
1756 			DRMACH_PR("Slot Unavailable event received");
1757 			logsys++;
1758 		}
1759 		if (be->power_on) {
1760 			DRMACH_PR("Power ON event received");
1761 			logsys++;
1762 		}
1763 		if (be->power_off) {
1764 			DRMACH_PR("Power OFF event received");
1765 			logsys++;
1766 		}
1767 
1768 		if (logsys)
1769 			(void) drmach_log_sysevent(
1770 			    DRMACH_EXPSLOT2BNUM(msg->p_hdr.expbrd,
1771 			    msg->p_hdr.slot), hint, SE_NOSLEEP, 1);
1772 	}
1773 }
1774 
1775 static uint32_t
1776 drmach_get_msgid()
1777 {
1778 	uint32_t	rv;
1779 	mutex_enter(&drmach_msglist_mutex);
1780 	if (!(++drmach_msgid))
1781 		++drmach_msgid;
1782 	rv = drmach_msgid;
1783 	mutex_exit(&drmach_msglist_mutex);
1784 	return (rv);
1785 }
1786 
1787 /*
1788  *	unlink an entry from the message transaction list
1789  *
1790  *	caller must hold drmach_msglist_mutex
1791  */
1792 void
1793 drmach_msglist_unlink(drmach_msglist_t *entry)
1794 {
1795 	ASSERT(mutex_owned(&drmach_msglist_mutex));
1796 	if (entry->prev) {
1797 		entry->prev->next = entry->next;
1798 		if (entry->next)
1799 			entry->next->prev = entry->prev;
1800 	} else {
1801 		drmach_msglist_first = entry->next;
1802 		if (entry->next)
1803 			entry->next->prev = NULL;
1804 	}
1805 	if (entry == drmach_msglist_last) {
1806 		drmach_msglist_last = entry->prev;
1807 	}
1808 }
1809 
1810 void
1811 drmach_msglist_link(drmach_msglist_t *entry)
1812 {
1813 	mutex_enter(&drmach_msglist_mutex);
1814 	if (drmach_msglist_last) {
1815 		entry->prev = drmach_msglist_last;
1816 		drmach_msglist_last->next = entry;
1817 		drmach_msglist_last = entry;
1818 	} else {
1819 		drmach_msglist_last = drmach_msglist_first = entry;
1820 	}
1821 	mutex_exit(&drmach_msglist_mutex);
1822 }
1823 
1824 void
1825 drmach_mbox_getmsg()
1826 {
1827 	int			err;
1828 	register int		msgid;
1829 	static uint8_t		buf[DRMACH_MAX_MBOX_MSG_SIZE];
1830 	dr_mbox_msg_t		*msg = (dr_mbox_msg_t *)buf;
1831 	dr_proto_hdr_t		*php;
1832 	drmach_msglist_t	*found, *entry;
1833 	uint32_t		type = MBOXSC_MSG_REPLY;
1834 	uint32_t		command;
1835 	uint64_t		transid;
1836 	uint32_t		length;
1837 
1838 	php = &msg->p_hdr;
1839 
1840 	while (drmach_getmsg_thread_run != 0) {
1841 		/* get a reply message */
1842 		command = 0;
1843 		transid = 0;
1844 		length = DRMACH_MAX_MBOX_MSG_SIZE;
1845 		err = mboxsc_getmsg(KEY_SCDR, &type, &command, &transid,
1846 		    &length, (void *)msg, drmach_to_getmsg);
1847 
1848 		if (err) {
1849 			/*
1850 			 * If mboxsc_getmsg returns ETIMEDOUT or EAGAIN, then
1851 			 * the "error" is really just a normal, transient
1852 			 * condition and we can retry the operation right away.
1853 			 * Any other error suggests a more serious problem,
1854 			 * ranging from a message being too big for our buffer
1855 			 * (EMSGSIZE) to total failure of the mailbox layer.
1856 			 * This second class of errors is much less "transient",
1857 			 * so rather than retrying over and over (and getting
1858 			 * the same error over and over) as fast as we can,
1859 			 * we'll sleep for a while before retrying.
1860 			 */
1861 			if ((err != ETIMEDOUT) && (err != EAGAIN)) {
1862 				cmn_err(CE_WARN,
1863 				    "mboxsc_getmsg failed, err=0x%x", err);
1864 				delay(drmach_mbxerr_delay * hz);
1865 			}
1866 			continue;
1867 		}
1868 
1869 		drmach_mbox_prmsg(msg, 0);
1870 
1871 		if (php->drproto_version != DRMBX_VERSION) {
1872 			cmn_err(CE_WARN,
1873 			    "mailbox version mismatch 0x%x vs 0x%x",
1874 			    php->drproto_version, DRMBX_VERSION);
1875 
1876 			mutex_enter(&drmach_g_mbox_mutex);
1877 			drmach_mbox_iflag = 0;
1878 			/* schedule a reinit handshake if one isn't pending */
1879 			if (!drmach_mbox_ipending) {
1880 				if (taskq_dispatch(system_taskq,
1881 				    drmach_mbox_reinit, NULL, TQ_NOSLEEP)
1882 				    != NULL) {
1883 					drmach_mbox_ipending = 1;
1884 				} else {
1885 					cmn_err(CE_WARN, "failed to schedule "
1886 					    "mailbox reinit");
1887 				}
1888 			}
1889 			mutex_exit(&drmach_g_mbox_mutex);
1890 
1891 			continue;
1892 		}
1893 
1894 		msgid = php->message_id;
1895 		found = NULL;
1896 		mutex_enter(&drmach_msglist_mutex);
1897 		entry = drmach_msglist_first;
1898 		while (entry != NULL) {
1899 			if (entry->msgid == msgid) {
1900 				found = entry;
1901 				drmach_msglist_unlink(entry);
1902 				entry = NULL;
1903 			} else
1904 				entry = entry->next;
1905 		}
1906 
1907 		if (found) {
1908 			mutex_enter(&found->g_lock);
1909 
1910 			found->e_code = php->error_code;
1911 			if (found->i_buflen > 0)
1912 				bcopy((caddr_t)&msg->msgdata, found->i_buf,
1913 				    found->i_buflen);
1914 			found->m_reply = 1;
1915 
1916 			cv_signal(&found->g_cv);
1917 			mutex_exit(&found->g_lock);
1918 		} else {
1919 			cmn_err(CE_WARN, "!mbox_getmsg: no match for id 0x%x",
1920 			    msgid);
1921 			cmn_err(CE_WARN, "!    cmd = 0x%x, exb = %d, slot = %d",
1922 			    php->command, php->expbrd, php->slot);
1923 		}
1924 
1925 		mutex_exit(&drmach_msglist_mutex);
1926 	}
1927 	cmn_err(CE_WARN, "mbox_getmsg: exiting");
1928 	mutex_enter(&drmach_msglist_mutex);
1929 	entry = drmach_msglist_first;
1930 	while (entry != NULL) {
1931 		if (entry->p_flag == 1) {
1932 			entry->f_error = -1;
1933 			mutex_enter(&entry->g_lock);
1934 			cv_signal(&entry->g_cv);
1935 			mutex_exit(&entry->g_lock);
1936 			drmach_msglist_unlink(entry);
1937 		}
1938 		entry = entry->next;
1939 	}
1940 	mutex_exit(&drmach_msglist_mutex);
1941 	drmach_getmsg_thread_run = -1;
1942 	thread_exit();
1943 }
1944 
1945 void
1946 drmach_mbox_sendmsg()
1947 {
1948 	int		err, retry;
1949 	drmach_msglist_t *entry;
1950 	dr_mbox_msg_t   *mp;
1951 	dr_proto_hdr_t  *php;
1952 
1953 	while (drmach_sendmsg_thread_run != 0) {
1954 		/*
1955 		 * Search through the list to find entries awaiting
1956 		 * transmission to the SC
1957 		 */
1958 		mutex_enter(&drmach_msglist_mutex);
1959 		entry = drmach_msglist_first;
1960 		retry = 0;
1961 		while (entry != NULL) {
1962 			if (entry->p_flag == 1) {
1963 				entry = entry->next;
1964 				continue;
1965 			}
1966 
1967 			mutex_exit(&drmach_msglist_mutex);
1968 
1969 			if (!retry)
1970 				mutex_enter(&entry->s_lock);
1971 			mp = (dr_mbox_msg_t *)entry->o_buf;
1972 			php = &mp->p_hdr;
1973 
1974 			drmach_mbox_prmsg(mp, 1);
1975 
1976 			err = mboxsc_putmsg(KEY_DRSC, MBOXSC_MSG_REQUEST,
1977 			    php->command, NULL, entry->o_buflen, (void *)mp,
1978 			    drmach_to_putmsg);
1979 
1980 			if (err) {
1981 				switch (err) {
1982 
1983 				case EAGAIN:
1984 				case EBUSY:
1985 					++retry;
1986 					mutex_enter(&drmach_msglist_mutex);
1987 					continue;
1988 
1989 				case ETIMEDOUT:
1990 					if (--entry->o_nretry <= 0) {
1991 						mutex_enter(
1992 						    &drmach_msglist_mutex);
1993 						drmach_msglist_unlink(entry);
1994 						mutex_exit(
1995 						    &drmach_msglist_mutex);
1996 						entry->f_error = err;
1997 						entry->p_flag = 1;
1998 						cv_signal(&entry->s_cv);
1999 					} else {
2000 						++retry;
2001 						mutex_enter(
2002 						    &drmach_msglist_mutex);
2003 						continue;
2004 					}
2005 					break;
2006 				default:
2007 					mutex_enter(&drmach_msglist_mutex);
2008 					drmach_msglist_unlink(entry);
2009 					mutex_exit(&drmach_msglist_mutex);
2010 					entry->f_error = err;
2011 					entry->p_flag = 1;
2012 					cv_signal(&entry->s_cv);
2013 					break;
2014 				}
2015 			} else {
2016 				entry->p_flag = 1;
2017 				cv_signal(&entry->s_cv);
2018 			}
2019 
2020 			mutex_exit(&entry->s_lock);
2021 			retry = 0;
2022 			mutex_enter(&drmach_msglist_mutex);
2023 			entry = drmach_msglist_first;
2024 		}
2025 		mutex_exit(&drmach_msglist_mutex);
2026 
2027 		mutex_enter(&drmach_sendmsg_mutex);
2028 		(void) cv_reltimedwait(&drmach_sendmsg_cv,
2029 		    &drmach_sendmsg_mutex, (5 * hz), TR_CLOCK_TICK);
2030 		mutex_exit(&drmach_sendmsg_mutex);
2031 	}
2032 	cmn_err(CE_WARN, "mbox_sendmsg: exiting");
2033 	mutex_enter(&drmach_msglist_mutex);
2034 	entry = drmach_msglist_first;
2035 	while (entry != NULL) {
2036 		if (entry->p_flag == 0) {
2037 			entry->f_error = -1;
2038 			mutex_enter(&entry->s_lock);
2039 			cv_signal(&entry->s_cv);
2040 			mutex_exit(&entry->s_lock);
2041 			drmach_msglist_unlink(entry);
2042 		}
2043 		entry = entry->next;
2044 	}
2045 	mutex_exit(&drmach_msglist_mutex);
2046 	cv_destroy(&drmach_sendmsg_cv);
2047 	mutex_destroy(&drmach_sendmsg_mutex);
2048 
2049 	drmach_sendmsg_thread_run = -1;
2050 	thread_exit();
2051 }
2052 
2053 void
2054 drmach_msglist_destroy(drmach_msglist_t *listp)
2055 {
2056 	if (listp != NULL) {
2057 		drmach_msglist_t	*entry;
2058 
2059 		mutex_enter(&drmach_msglist_mutex);
2060 		entry = drmach_msglist_first;
2061 		while (entry) {
2062 			if (listp == entry) {
2063 				drmach_msglist_unlink(listp);
2064 				entry = NULL;
2065 			} else
2066 				entry = entry->next;
2067 		}
2068 
2069 		mutex_destroy(&listp->s_lock);
2070 		cv_destroy(&listp->s_cv);
2071 		mutex_destroy(&listp->g_lock);
2072 		cv_destroy(&listp->g_cv);
2073 		kmem_free(listp, sizeof (drmach_msglist_t));
2074 
2075 		mutex_exit(&drmach_msglist_mutex);
2076 	}
2077 }
2078 
2079 static drmach_msglist_t	*
2080 drmach_msglist_new(caddr_t ibufp, uint32_t ilen, dr_proto_hdr_t *hdrp,
2081 	uint32_t olen, int nrtry)
2082 {
2083 	drmach_msglist_t	*listp;
2084 
2085 	listp = kmem_zalloc(sizeof (drmach_msglist_t), KM_SLEEP);
2086 	mutex_init(&listp->s_lock, NULL, MUTEX_DRIVER, NULL);
2087 	cv_init(&listp->s_cv, NULL, CV_DRIVER, NULL);
2088 	mutex_init(&listp->g_lock, NULL, MUTEX_DRIVER, NULL);
2089 	cv_init(&listp->g_cv, NULL, CV_DRIVER, NULL);
2090 	listp->o_buf = (caddr_t)hdrp;
2091 	listp->o_buflen = olen;
2092 	listp->i_buf = ibufp;
2093 	listp->i_buflen = ilen;
2094 	listp->o_nretry = nrtry;
2095 	listp->msgid = hdrp->message_id;
2096 
2097 	return (listp);
2098 }
2099 
2100 static drmach_msglist_t *
2101 drmach_mbox_req_rply(dr_proto_hdr_t *hdrp, uint32_t olen, caddr_t ibufp,
2102 	uint32_t ilen, int timeout, int nrtry, int nosig,
2103 	drmach_msglist_t *link)
2104 {
2105 	int		crv;
2106 	drmach_msglist_t *listp;
2107 	clock_t		to_val;
2108 	dr_proto_hdr_t	*php;
2109 
2110 	/* setup transaction list entry */
2111 	listp = drmach_msglist_new(ibufp, ilen, hdrp, olen, nrtry);
2112 
2113 	/* send mailbox message, await reply */
2114 	mutex_enter(&listp->s_lock);
2115 	mutex_enter(&listp->g_lock);
2116 
2117 	listp->link = link;
2118 	drmach_msglist_link(listp);
2119 
2120 	mutex_enter(&drmach_sendmsg_mutex);
2121 	cv_signal(&drmach_sendmsg_cv);
2122 	mutex_exit(&drmach_sendmsg_mutex);
2123 
2124 	while (listp->p_flag == 0) {
2125 		cv_wait(&listp->s_cv, &listp->s_lock);
2126 	}
2127 
2128 	to_val = ddi_get_lbolt() + (timeout * hz);
2129 
2130 	if (listp->f_error) {
2131 		listp->p_flag = 0;
2132 		cmn_err(CE_WARN, "!mboxsc_putmsg failed: 0x%x", listp->f_error);
2133 		php = (dr_proto_hdr_t *)listp->o_buf;
2134 		cmn_err(CE_WARN, "!    cmd = 0x%x, exb = %d, slot = %d",
2135 		    php->command, php->expbrd, php->slot);
2136 	} else {
2137 		while (listp->m_reply == 0 && listp->f_error == 0) {
2138 			if (nosig)
2139 				crv = cv_timedwait(&listp->g_cv, &listp->g_lock,
2140 				    to_val);
2141 			else
2142 				crv = cv_timedwait_sig(&listp->g_cv,
2143 				    &listp->g_lock, to_val);
2144 			switch (crv) {
2145 				case -1: /* timed out */
2146 					cmn_err(CE_WARN,
2147 					    "!msgid=0x%x reply timed out",
2148 					    hdrp->message_id);
2149 					php = (dr_proto_hdr_t *)listp->o_buf;
2150 					cmn_err(CE_WARN, "!    cmd = 0x%x, "
2151 					    "exb = %d, slot = %d", php->command,
2152 					    php->expbrd, php->slot);
2153 					listp->f_error = ETIMEDOUT;
2154 					break;
2155 				case 0: /* signal received */
2156 					cmn_err(CE_WARN,
2157 					    "operation interrupted by signal");
2158 					listp->f_error = EINTR;
2159 					break;
2160 				default:
2161 					break;
2162 				}
2163 		}
2164 
2165 		/*
2166 		 * If link is set for this entry, check to see if
2167 		 * the linked entry has been replied to.  If not,
2168 		 * wait for the response.
2169 		 * Currently, this is only used for ABORT_TEST functionality,
2170 		 * wherein a check is made for the TESTBOARD reply when
2171 		 * the ABORT_TEST reply is received.
2172 		 */
2173 
2174 		if (link) {
2175 			mutex_enter(&link->g_lock);
2176 			/*
2177 			 * If the reply to the linked entry hasn't been
2178 			 * received, clear the existing link->f_error,
2179 			 * and await the reply.
2180 			 */
2181 			if (link->m_reply == 0) {
2182 				link->f_error = 0;
2183 			}
2184 			to_val =  ddi_get_lbolt() + (timeout * hz);
2185 			while (link->m_reply == 0 && link->f_error == 0) {
2186 				crv = cv_timedwait(&link->g_cv, &link->g_lock,
2187 				    to_val);
2188 				switch (crv) {
2189 				case -1: /* timed out */
2190 					cmn_err(CE_NOTE,
2191 					    "!link msgid=0x%x reply timed out",
2192 					    link->msgid);
2193 					link->f_error = ETIMEDOUT;
2194 					break;
2195 				default:
2196 					break;
2197 				}
2198 			}
2199 			mutex_exit(&link->g_lock);
2200 		}
2201 	}
2202 	mutex_exit(&listp->g_lock);
2203 	mutex_exit(&listp->s_lock);
2204 	return (listp);
2205 }
2206 
2207 static sbd_error_t *
2208 drmach_mbx2sbderr(drmach_msglist_t *mlp)
2209 {
2210 	char		a_pnt[MAXNAMELEN];
2211 	dr_proto_hdr_t	*php;
2212 	int		bnum;
2213 
2214 	if (mlp->f_error) {
2215 		/*
2216 		 * If framework failure is due to signal, return "no error"
2217 		 * error.
2218 		 */
2219 		if (mlp->f_error == EINTR)
2220 			return (drerr_new(0, ESTC_NONE, NULL));
2221 
2222 		mutex_enter(&drmach_g_mbox_mutex);
2223 		drmach_mbox_iflag = 0;
2224 		mutex_exit(&drmach_g_mbox_mutex);
2225 		if (!mlp->p_flag)
2226 			return (drerr_new(1, ESTC_MBXRQST, NULL));
2227 		else
2228 			return (drerr_new(1, ESTC_MBXRPLY, NULL));
2229 	}
2230 	php = (dr_proto_hdr_t *)mlp->o_buf;
2231 	bnum = 2 * php->expbrd + php->slot;
2232 	a_pnt[0] = '\0';
2233 	(void) drmach_board_name(bnum, a_pnt, MAXNAMELEN);
2234 
2235 	switch (mlp->e_code) {
2236 		case 0:
2237 			return (NULL);
2238 		case DRERR_NOACL:
2239 			return (drerr_new(0, ESTC_NOACL, "%s", a_pnt));
2240 		case DRERR_NOT_ASSIGNED:
2241 			return (drerr_new(0, ESTC_NOT_ASSIGNED, "%s", a_pnt));
2242 		case DRERR_NOT_ACTIVE:
2243 			return (drerr_new(0, ESTC_NOT_ACTIVE, "%s", a_pnt));
2244 		case DRERR_EMPTY_SLOT:
2245 			return (drerr_new(0, ESTC_EMPTY_SLOT, "%s", a_pnt));
2246 		case DRERR_POWER_OFF:
2247 			return (drerr_new(0, ESTC_POWER_OFF, "%s", a_pnt));
2248 		case DRERR_TEST_IN_PROGRESS:
2249 			return (drerr_new(0, ESTC_TEST_IN_PROGRESS, "%s",
2250 			    a_pnt));
2251 		case DRERR_TESTING_BUSY:
2252 			return (drerr_new(0, ESTC_TESTING_BUSY, "%s", a_pnt));
2253 		case DRERR_TEST_REQUIRED:
2254 			return (drerr_new(0, ESTC_TEST_REQUIRED, "%s", a_pnt));
2255 		case DRERR_UNAVAILABLE:
2256 			return (drerr_new(0, ESTC_UNAVAILABLE, "%s", a_pnt));
2257 		case DRERR_RECOVERABLE:
2258 			return (drerr_new(0, ESTC_SMS_ERR_RECOVERABLE, "%s",
2259 			    a_pnt));
2260 		case DRERR_UNRECOVERABLE:
2261 			return (drerr_new(1, ESTC_SMS_ERR_UNRECOVERABLE, "%s",
2262 			    a_pnt));
2263 		default:
2264 			return (drerr_new(1, ESTC_MBOX_UNKNOWN, NULL));
2265 	}
2266 }
2267 
2268 static sbd_error_t *
2269 drmach_mbox_trans(uint8_t msgtype, int bnum, caddr_t obufp, int olen,
2270 	caddr_t ibufp, int ilen)
2271 {
2272 	int			timeout = 0;
2273 	int			ntries = 0;
2274 	int			nosignals = 0;
2275 	dr_proto_hdr_t 		*hdrp;
2276 	drmach_msglist_t 	*mlp;
2277 	sbd_error_t		*err = NULL;
2278 
2279 	if (msgtype != DRMSG_MBOX_INIT) {
2280 		mutex_enter(&drmach_ri_mbox_mutex);
2281 		mutex_enter(&drmach_g_mbox_mutex);
2282 		if (drmach_mbox_iflag == 0) {
2283 			/* need to initialize the mailbox */
2284 			dr_proto_hdr_t	imsg;
2285 
2286 			mutex_exit(&drmach_g_mbox_mutex);
2287 
2288 			imsg.command = DRMSG_MBOX_INIT;
2289 
2290 			imsg.message_id = drmach_get_msgid();
2291 			imsg.drproto_version = DRMBX_VERSION;
2292 			imsg.expbrd = 0;
2293 			imsg.slot = 0;
2294 
2295 			cmn_err(CE_WARN, "!reinitializing DR mailbox");
2296 			mlp = drmach_mbox_req_rply(&imsg, sizeof (imsg), 0, 0,
2297 			    10, 5, 0, NULL);
2298 			err = drmach_mbx2sbderr(mlp);
2299 			/*
2300 			 * If framework failure incoming is encountered on
2301 			 * the MBOX_INIT [timeout on SMS reply], the error
2302 			 * type must be changed before returning to caller.
2303 			 * This is to prevent drmach_board_connect() and
2304 			 * drmach_board_disconnect() from marking boards
2305 			 * UNUSABLE based on MBOX_INIT failures.
2306 			 */
2307 			if ((err != NULL) && (err->e_code == ESTC_MBXRPLY)) {
2308 				cmn_err(CE_WARN,
2309 				    "!Changed mbox incoming to outgoing"
2310 				    " failure on reinit");
2311 				sbd_err_clear(&err);
2312 				err = drerr_new(0, ESTC_MBXRQST, NULL);
2313 			}
2314 			drmach_msglist_destroy(mlp);
2315 			if (err) {
2316 				mutex_exit(&drmach_ri_mbox_mutex);
2317 				return (err);
2318 			}
2319 			mutex_enter(&drmach_g_mbox_mutex);
2320 			drmach_mbox_iflag = 1;
2321 		}
2322 		mutex_exit(&drmach_g_mbox_mutex);
2323 		mutex_exit(&drmach_ri_mbox_mutex);
2324 	}
2325 
2326 	hdrp = (dr_proto_hdr_t *)obufp;
2327 
2328 	/* setup outgoing mailbox header */
2329 	hdrp->command = msgtype;
2330 	hdrp->message_id = drmach_get_msgid();
2331 	hdrp->drproto_version = DRMBX_VERSION;
2332 	hdrp->expbrd = DRMACH_BNUM2EXP(bnum);
2333 	hdrp->slot = DRMACH_BNUM2SLOT(bnum);
2334 
2335 	switch (msgtype) {
2336 
2337 		case DRMSG_MBOX_INIT:
2338 			timeout = drmach_to_mbxinit;
2339 			ntries = 1;
2340 			nosignals = 0;
2341 			break;
2342 
2343 		case DRMSG_ASSIGN:
2344 			timeout = drmach_to_assign;
2345 			ntries = 1;
2346 			nosignals = 0;
2347 			break;
2348 
2349 		case DRMSG_UNASSIGN:
2350 			timeout = drmach_to_unassign;
2351 			ntries = 1;
2352 			nosignals = 0;
2353 			break;
2354 
2355 		case DRMSG_POWERON:
2356 			timeout = drmach_to_poweron;
2357 			ntries = 1;
2358 			nosignals = 0;
2359 			break;
2360 
2361 		case DRMSG_POWEROFF:
2362 			timeout = drmach_to_poweroff;
2363 			ntries = 1;
2364 			nosignals = 0;
2365 			break;
2366 
2367 		case DRMSG_SHOWBOARD:
2368 			timeout = drmach_to_showboard;
2369 			ntries = 1;
2370 			nosignals = 0;
2371 			break;
2372 
2373 		case DRMSG_CLAIM:
2374 			timeout = drmach_to_claim;
2375 			ntries = 1;
2376 			nosignals = 1;
2377 			break;
2378 
2379 		case DRMSG_UNCLAIM:
2380 			timeout = drmach_to_unclaim;
2381 			ntries = 1;
2382 			nosignals = 1;
2383 			break;
2384 
2385 		case DRMSG_UNCONFIG:
2386 			timeout = drmach_to_unconfig;
2387 			ntries = 1;
2388 			nosignals = 0;
2389 			break;
2390 
2391 		case DRMSG_TESTBOARD:
2392 			timeout = drmach_to_testboard;
2393 			ntries = 1;
2394 			nosignals = 0;
2395 			break;
2396 
2397 		default:
2398 			cmn_err(CE_WARN, "Unknown outgoing message type 0x%x",
2399 			    msgtype);
2400 			err = DRMACH_INTERNAL_ERROR();
2401 			break;
2402 	}
2403 
2404 	if (err == NULL) {
2405 		mlp = drmach_mbox_req_rply(hdrp, olen, ibufp, ilen, timeout,
2406 		    ntries, nosignals, NULL);
2407 		err = drmach_mbx2sbderr(mlp);
2408 
2409 		/*
2410 		 * For DRMSG_TESTBOARD attempts which have timed out, or
2411 		 * been aborted due to a signal received after mboxsc_putmsg()
2412 		 * has succeeded in sending the message, a DRMSG_ABORT_TEST
2413 		 * must be sent.
2414 		 */
2415 		if ((msgtype == DRMSG_TESTBOARD) && (err != NULL) &&
2416 		    ((mlp->f_error == EINTR) || ((mlp->f_error == ETIMEDOUT) &&
2417 		    (mlp->p_flag != 0)))) {
2418 			drmach_msglist_t	*abmlp;
2419 			dr_abort_test_t		abibuf;
2420 
2421 			hdrp->command = DRMSG_ABORT_TEST;
2422 			hdrp->message_id = drmach_get_msgid();
2423 			abmlp = drmach_mbox_req_rply(hdrp,
2424 			    sizeof (dr_abort_test_t), (caddr_t)&abibuf,
2425 			    sizeof (abibuf), drmach_to_aborttest, 5, 1, mlp);
2426 			cmn_err(CE_WARN, "test aborted");
2427 			drmach_msglist_destroy(abmlp);
2428 		}
2429 
2430 		drmach_msglist_destroy(mlp);
2431 	}
2432 
2433 	return (err);
2434 }
2435 
2436 static int
2437 drmach_mbox_init()
2438 {
2439 	int			err;
2440 	caddr_t			obufp;
2441 	sbd_error_t		*serr = NULL;
2442 	mboxsc_timeout_range_t	mbxtoz;
2443 
2444 	drmach_mbox_istate = 0;
2445 	/* register the outgoing mailbox */
2446 	if ((err = mboxsc_init(KEY_DRSC, MBOXSC_MBOX_OUT,
2447 	    NULL)) != 0) {
2448 		cmn_err(CE_WARN, "DR - SC mboxsc_init failed: 0x%x", err);
2449 		return (-1);
2450 	}
2451 	drmach_mbox_istate = 1;
2452 
2453 	/* setup the mboxsc_putmsg timeout value */
2454 	if (drmach_use_tuned_putmsg_to) {
2455 		cmn_err(CE_NOTE, "!using tuned drmach_to_putmsg = 0x%lx\n",
2456 		    drmach_to_putmsg);
2457 	} else {
2458 		if ((err = mboxsc_ctrl(KEY_DRSC,
2459 		    MBOXSC_CMD_PUTMSG_TIMEOUT_RANGE, &mbxtoz)) != 0) {
2460 			cmn_err(CE_WARN, "mboxsc_ctrl failed: 0x%x", err);
2461 			drmach_to_putmsg = 60000;
2462 		} else {
2463 			drmach_to_putmsg = mboxsc_putmsg_def_timeout() * 6;
2464 			DRMACH_PR("putmsg range is 0x%lx - 0x%lx value"
2465 			    " is 0x%lx\n", mbxtoz.min_timeout,
2466 			    mbxtoz.max_timeout, drmach_to_putmsg);
2467 		}
2468 	}
2469 
2470 	/* register the incoming mailbox */
2471 	if ((err = mboxsc_init(KEY_SCDR, MBOXSC_MBOX_IN,
2472 	    drmach_mbox_event)) != 0) {
2473 		cmn_err(CE_WARN, "SC - DR mboxsc_init failed: 0x%x", err);
2474 		return (-1);
2475 	}
2476 	drmach_mbox_istate = 2;
2477 
2478 	/* initialize mutex for mailbox globals */
2479 	mutex_init(&drmach_g_mbox_mutex, NULL, MUTEX_DRIVER, NULL);
2480 
2481 	/* initialize mutex for mailbox re-init */
2482 	mutex_init(&drmach_ri_mbox_mutex, NULL, MUTEX_DRIVER, NULL);
2483 
2484 	/* initialize mailbox message list elements */
2485 	drmach_msglist_first = drmach_msglist_last = NULL;
2486 	mutex_init(&drmach_msglist_mutex, NULL, MUTEX_DRIVER, NULL);
2487 
2488 	mutex_init(&drmach_sendmsg_mutex, NULL, MUTEX_DRIVER, NULL);
2489 	cv_init(&drmach_sendmsg_cv, NULL, CV_DRIVER, NULL);
2490 
2491 	drmach_mbox_istate = 3;
2492 
2493 	/* start mailbox sendmsg thread */
2494 	drmach_sendmsg_thread_run = 1;
2495 	if (drmach_sendmsg_thread == NULL)
2496 		drmach_sendmsg_thread = thread_create(NULL, 0,
2497 		    (void (*)())drmach_mbox_sendmsg, NULL, 0, &p0,
2498 		    TS_RUN, minclsyspri);
2499 
2500 	/* start mailbox getmsg thread */
2501 	drmach_getmsg_thread_run = 1;
2502 	if (drmach_getmsg_thread == NULL)
2503 		drmach_getmsg_thread = thread_create(NULL, 0,
2504 		    (void (*)())drmach_mbox_getmsg, NULL, 0, &p0,
2505 		    TS_RUN, minclsyspri);
2506 
2507 	obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
2508 	serr = drmach_mbox_trans(DRMSG_MBOX_INIT, 0, obufp,
2509 	    sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
2510 	kmem_free(obufp, sizeof (dr_proto_hdr_t));
2511 	if (serr) {
2512 		cmn_err(CE_WARN, "mbox_init: MBOX_INIT failed ecode=0x%x",
2513 		    serr->e_code);
2514 		sbd_err_clear(&serr);
2515 		return (-1);
2516 	}
2517 	mutex_enter(&drmach_g_mbox_mutex);
2518 	drmach_mbox_iflag = 1;
2519 	drmach_mbox_ipending = 0;
2520 	mutex_exit(&drmach_g_mbox_mutex);
2521 
2522 	return (0);
2523 }
2524 
2525 static int
2526 drmach_mbox_fini()
2527 {
2528 	int err, rv = 0;
2529 
2530 	if (drmach_mbox_istate > 2) {
2531 		drmach_getmsg_thread_run = 0;
2532 		drmach_sendmsg_thread_run = 0;
2533 		cmn_err(CE_WARN,
2534 		    "drmach_mbox_fini: waiting for mbox threads...");
2535 		while ((drmach_getmsg_thread_run == 0) ||
2536 		    (drmach_sendmsg_thread_run == 0)) {
2537 			continue;
2538 		}
2539 		cmn_err(CE_WARN, "drmach_mbox_fini: mbox threads done.");
2540 		mutex_destroy(&drmach_msglist_mutex);
2541 
2542 	}
2543 	if (drmach_mbox_istate) {
2544 		/* de-register the outgoing mailbox */
2545 		if ((err = mboxsc_fini(KEY_DRSC)) != 0) {
2546 			cmn_err(CE_WARN, "DR - SC mboxsc_fini failed: 0x%x",
2547 			    err);
2548 			rv = -1;
2549 		}
2550 	}
2551 	if (drmach_mbox_istate > 1) {
2552 		/* de-register the incoming mailbox */
2553 		if ((err = mboxsc_fini(KEY_SCDR)) != 0) {
2554 			cmn_err(CE_WARN, "SC - DR mboxsc_fini failed: 0x%x",
2555 			    err);
2556 			rv = -1;
2557 		}
2558 	}
2559 	mutex_destroy(&drmach_g_mbox_mutex);
2560 	mutex_destroy(&drmach_ri_mbox_mutex);
2561 	return (rv);
2562 }
2563 
2564 static int
2565 drmach_portid2bnum(int portid)
2566 {
2567 	int slot;
2568 
2569 	switch (portid & 0x1f) {
2570 	case 0: case 1: case 2: case 3:	/* cpu/wci devices */
2571 	case 0x1e:			/* slot 0 axq registers */
2572 		slot = 0;
2573 		break;
2574 
2575 	case 8: case 9:			/* cpu devices */
2576 	case 0x1c: case 0x1d:		/* schizo/wci devices */
2577 	case 0x1f:			/* slot 1 axq registers */
2578 		slot = 1;
2579 		break;
2580 
2581 	default:
2582 		ASSERT(0);		/* catch in debug kernels */
2583 	}
2584 
2585 	return (((portid >> 4) & 0x7e) | slot);
2586 }
2587 
2588 extern int axq_suspend_iopause;
2589 
2590 static int
2591 hold_rele_branch(dev_info_t *rdip, void *arg)
2592 {
2593 	int	i;
2594 	int	*holdp	= (int *)arg;
2595 	char	*name = ddi_node_name(rdip);
2596 
2597 	/*
2598 	 * For Starcat, we must be children of the root devinfo node
2599 	 */
2600 	ASSERT(ddi_get_parent(rdip) == ddi_root_node());
2601 
2602 	i = drmach_name2type_idx(name);
2603 
2604 	/*
2605 	 * Only children of the root devinfo node need to be
2606 	 * held/released since they are the only valid targets
2607 	 * of tree operations. This corresponds to the node types
2608 	 * listed in the drmach_name2type array.
2609 	 */
2610 	if (i < 0) {
2611 		/* Not of interest to us */
2612 		return (DDI_WALK_PRUNECHILD);
2613 	}
2614 
2615 	if (*holdp) {
2616 		ASSERT(!e_ddi_branch_held(rdip));
2617 		e_ddi_branch_hold(rdip);
2618 	} else {
2619 		ASSERT(e_ddi_branch_held(rdip));
2620 		e_ddi_branch_rele(rdip);
2621 	}
2622 
2623 	return (DDI_WALK_PRUNECHILD);
2624 }
2625 
2626 static int
2627 drmach_init(void)
2628 {
2629 	pnode_t 	nodeid;
2630 	gdcd_t		*gdcd;
2631 	int		bnum;
2632 	dev_info_t	*rdip;
2633 	int		hold, circ;
2634 
2635 	mutex_enter(&drmach_i_lock);
2636 	if (drmach_initialized) {
2637 		mutex_exit(&drmach_i_lock);
2638 		return (0);
2639 	}
2640 
2641 	gdcd = drmach_gdcd_new();
2642 	if (gdcd == NULL) {
2643 		mutex_exit(&drmach_i_lock);
2644 		cmn_err(CE_WARN, "drmach_init: failed to access GDCD\n");
2645 		return (-1);
2646 	}
2647 
2648 	drmach_boards = drmach_array_new(0, MAX_BOARDS - 1);
2649 
2650 	nodeid = prom_childnode(prom_rootnode());
2651 	do {
2652 		int		 len;
2653 		int		 portid;
2654 		drmachid_t	 id;
2655 
2656 		len = prom_getproplen(nodeid, "portid");
2657 		if (len != sizeof (portid))
2658 			continue;
2659 
2660 		portid = -1;
2661 		(void) prom_getprop(nodeid, "portid", (caddr_t)&portid);
2662 		if (portid == -1)
2663 			continue;
2664 
2665 		bnum = drmach_portid2bnum(portid);
2666 
2667 		if (drmach_array_get(drmach_boards, bnum, &id) == -1) {
2668 			/* portid translated to an invalid board number */
2669 			cmn_err(CE_WARN, "OBP node 0x%x has"
2670 			    " invalid property value, %s=%u",
2671 			    nodeid, "portid", portid);
2672 
2673 			/* clean up */
2674 			drmach_array_dispose(drmach_boards,
2675 			    drmach_board_dispose);
2676 			drmach_gdcd_dispose(gdcd);
2677 			mutex_exit(&drmach_i_lock);
2678 			return (-1);
2679 		} else if (id == NULL) {
2680 			drmach_board_t	*bp;
2681 			l1_slot_stat_t	*dcd;
2682 			int		exp, slot;
2683 
2684 			bp = drmach_board_new(bnum);
2685 			bp->assigned = !drmach_initialized;
2686 			bp->powered = !drmach_initialized;
2687 
2688 			exp = DRMACH_BNUM2EXP(bnum);
2689 			slot = DRMACH_BNUM2SLOT(bnum);
2690 			dcd = &gdcd->dcd_slot[exp][slot];
2691 			bp->stardrb_offset =
2692 			    dcd->l1ss_cpu_drblock_xwd_offset << 3;
2693 			DRMACH_PR("%s: stardrb_offset=0x%lx\n", bp->cm.name,
2694 			    bp->stardrb_offset);
2695 
2696 			if (gdcd->dcd_slot[exp][slot].l1ss_flags &
2697 			    L1SSFLG_THIS_L1_NULL_PROC_LPA) {
2698 				bp->flags |= DRMACH_NULL_PROC_LPA;
2699 				DRMACH_PR("%s: NULL proc LPA\n", bp->cm.name);
2700 			}
2701 		}
2702 	} while ((nodeid = prom_nextnode(nodeid)) != OBP_NONODE);
2703 
2704 	drmach_cpu_sram_va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
2705 
2706 	if (gdcd->dcd_testcage_log2_mbytes_size != DCD_DR_TESTCAGE_DISABLED) {
2707 		ASSERT(gdcd->dcd_testcage_log2_mbytes_size ==
2708 		    gdcd->dcd_testcage_log2_mbytes_align);
2709 		drmach_iocage_paddr =
2710 		    (uint64_t)gdcd->dcd_testcage_mbyte_PA << 20;
2711 		drmach_iocage_size =
2712 		    1 << (gdcd->dcd_testcage_log2_mbytes_size + 20);
2713 
2714 		drmach_iocage_vaddr = (caddr_t)vmem_alloc(heap_arena,
2715 		    drmach_iocage_size, VM_SLEEP);
2716 		hat_devload(kas.a_hat, drmach_iocage_vaddr, drmach_iocage_size,
2717 		    mmu_btop(drmach_iocage_paddr),
2718 		    PROT_READ | PROT_WRITE,
2719 		    HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST);
2720 
2721 		DRMACH_PR("gdcd size=0x%x align=0x%x PA=0x%x\n",
2722 		    gdcd->dcd_testcage_log2_mbytes_size,
2723 		    gdcd->dcd_testcage_log2_mbytes_align,
2724 		    gdcd->dcd_testcage_mbyte_PA);
2725 		DRMACH_PR("drmach size=0x%x PA=0x%lx VA=0x%p\n",
2726 		    drmach_iocage_size, drmach_iocage_paddr,
2727 		    (void *)drmach_iocage_vaddr);
2728 	}
2729 
2730 	if (drmach_iocage_size == 0) {
2731 		drmach_array_dispose(drmach_boards, drmach_board_dispose);
2732 		drmach_boards = NULL;
2733 		vmem_free(heap_arena, drmach_cpu_sram_va, PAGESIZE);
2734 		drmach_gdcd_dispose(gdcd);
2735 		mutex_exit(&drmach_i_lock);
2736 		cmn_err(CE_WARN, "drmach_init: iocage not available\n");
2737 		return (-1);
2738 	}
2739 
2740 	drmach_gdcd_dispose(gdcd);
2741 
2742 	mutex_init(&drmach_iocage_lock, NULL, MUTEX_DRIVER, NULL);
2743 	cv_init(&drmach_iocage_cv, NULL, CV_DRIVER, NULL);
2744 	mutex_init(&drmach_xt_mb_lock, NULL, MUTEX_DRIVER, NULL);
2745 	mutex_init(&drmach_bus_sync_lock, NULL, MUTEX_DRIVER, NULL);
2746 	mutex_init(&drmach_slice_table_lock, NULL, MUTEX_DRIVER, NULL);
2747 
2748 	mutex_enter(&cpu_lock);
2749 	mutex_enter(&drmach_iocage_lock);
2750 	ASSERT(drmach_iocage_is_busy == 0);
2751 	drmach_iocage_is_busy = 1;
2752 	drmach_iocage_mem_scrub(drmach_iocage_size);
2753 	drmach_iocage_is_busy = 0;
2754 	cv_signal(&drmach_iocage_cv);
2755 	mutex_exit(&drmach_iocage_lock);
2756 	mutex_exit(&cpu_lock);
2757 
2758 
2759 	if (drmach_mbox_init() == -1) {
2760 		cmn_err(CE_WARN, "DR - SC mailbox initialization Failed");
2761 	}
2762 
2763 	/*
2764 	 * Walk immediate children of devinfo root node and hold
2765 	 * all devinfo branches of interest.
2766 	 */
2767 	hold = 1;
2768 	rdip = ddi_root_node();
2769 
2770 	ndi_devi_enter(rdip, &circ);
2771 	ddi_walk_devs(ddi_get_child(rdip), hold_rele_branch, &hold);
2772 	ndi_devi_exit(rdip, circ);
2773 
2774 	drmach_initialized = 1;
2775 
2776 	/*
2777 	 * To avoid a circular patch dependency between DR and AXQ, the AXQ
2778 	 * rev introducing the axq_iopause_*_all interfaces should not regress
2779 	 * when installed without the DR rev using those interfaces. The default
2780 	 * is for iopause to be enabled/disabled during axq suspend/resume. By
2781 	 * setting the following axq flag to zero, axq will not enable iopause
2782 	 * during suspend/resume, instead DR will call the axq_iopause_*_all
2783 	 * interfaces during drmach_copy_rename.
2784 	 */
2785 	axq_suspend_iopause = 0;
2786 
2787 	mutex_exit(&drmach_i_lock);
2788 
2789 	return (0);
2790 }
2791 
2792 static void
2793 drmach_fini(void)
2794 {
2795 	dev_info_t	*rdip;
2796 	int		hold, circ;
2797 
2798 	if (drmach_initialized) {
2799 		rw_enter(&drmach_boards_rwlock, RW_WRITER);
2800 		drmach_array_dispose(drmach_boards, drmach_board_dispose);
2801 		drmach_boards = NULL;
2802 		rw_exit(&drmach_boards_rwlock);
2803 
2804 		mutex_destroy(&drmach_slice_table_lock);
2805 		mutex_destroy(&drmach_xt_mb_lock);
2806 		mutex_destroy(&drmach_bus_sync_lock);
2807 		cv_destroy(&drmach_iocage_cv);
2808 		mutex_destroy(&drmach_iocage_lock);
2809 
2810 		vmem_free(heap_arena, drmach_cpu_sram_va, PAGESIZE);
2811 
2812 		/*
2813 		 * Walk immediate children of the root devinfo node
2814 		 * releasing holds acquired on branches in drmach_init()
2815 		 */
2816 		hold = 0;
2817 		rdip = ddi_root_node();
2818 
2819 		ndi_devi_enter(rdip, &circ);
2820 		ddi_walk_devs(ddi_get_child(rdip), hold_rele_branch, &hold);
2821 		ndi_devi_exit(rdip, circ);
2822 
2823 		drmach_initialized = 0;
2824 	}
2825 
2826 	(void) drmach_mbox_fini();
2827 	if (drmach_xt_mb != NULL) {
2828 		vmem_free(static_alloc_arena, (void *)drmach_xt_mb,
2829 		    drmach_xt_mb_size);
2830 	}
2831 	rw_destroy(&drmach_boards_rwlock);
2832 	mutex_destroy(&drmach_i_lock);
2833 }
2834 
2835 static void
2836 drmach_mem_read_madr(drmach_mem_t *mp, int bank, uint64_t *madr)
2837 {
2838 	kpreempt_disable();
2839 
2840 	/* get register address, read madr value */
2841 	if (STARCAT_CPUID_TO_PORTID(CPU->cpu_id) == mp->dev.portid) {
2842 		*madr = lddmcdecode(DRMACH_MC_ASI_ADDR(mp, bank));
2843 	} else {
2844 		*madr = lddphysio(DRMACH_MC_ADDR(mp, bank));
2845 	}
2846 
2847 	kpreempt_enable();
2848 }
2849 
2850 
2851 static uint64_t *
2852 drmach_prep_mc_rename(uint64_t *p, int local,
2853 	drmach_mem_t *mp, uint64_t current_basepa, uint64_t new_basepa)
2854 {
2855 	int bank;
2856 
2857 	for (bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
2858 		uint64_t madr, bank_offset;
2859 
2860 		/* fetch mc's bank madr register value */
2861 		drmach_mem_read_madr(mp, bank, &madr);
2862 		if (madr & DRMACH_MC_VALID_MASK) {
2863 			uint64_t bankpa;
2864 
2865 			bank_offset = (DRMACH_MC_UM_TO_PA(madr) |
2866 			    DRMACH_MC_LM_TO_PA(madr)) - current_basepa;
2867 			bankpa = new_basepa + bank_offset;
2868 
2869 			/* encode new base pa into madr */
2870 			madr &= ~DRMACH_MC_UM_MASK;
2871 			madr |= DRMACH_MC_PA_TO_UM(bankpa);
2872 			madr &= ~DRMACH_MC_LM_MASK;
2873 			madr |= DRMACH_MC_PA_TO_LM(bankpa);
2874 
2875 			if (local)
2876 				*p++ = DRMACH_MC_ASI_ADDR(mp, bank);
2877 			else
2878 				*p++ = DRMACH_MC_ADDR(mp, bank);
2879 
2880 			*p++ = madr;
2881 		}
2882 	}
2883 
2884 	return (p);
2885 }
2886 
2887 static uint64_t *
2888 drmach_prep_schizo_script(uint64_t *p, drmach_mem_t *mp, uint64_t new_basepa)
2889 {
2890 	drmach_board_t	*bp;
2891 	int		 rv;
2892 	int		 idx;
2893 	drmachid_t	 id;
2894 	uint64_t	 last_scsr_pa = 0;
2895 
2896 	/* memory is always in slot 0 */
2897 	ASSERT(DRMACH_BNUM2SLOT(mp->dev.bp->bnum) == 0);
2898 
2899 	/* look up slot 1 board on same expander */
2900 	idx = DRMACH_EXPSLOT2BNUM(DRMACH_BNUM2EXP(mp->dev.bp->bnum), 1);
2901 	rv = drmach_array_get(drmach_boards, idx, &id);
2902 	bp = id; /* bp will be NULL if board not found */
2903 
2904 	/* look up should never be out of bounds */
2905 	ASSERT(rv == 0);
2906 
2907 	/* nothing to do when board is not found or has no devices */
2908 	if (rv == -1 || bp == NULL || bp->devices == NULL)
2909 		return (p);
2910 
2911 	rv = drmach_array_first(bp->devices, &idx, &id);
2912 	while (rv == 0) {
2913 		if (DRMACH_IS_IO_ID(id)) {
2914 			drmach_io_t *io = id;
2915 
2916 			/*
2917 			 * Skip all non-Schizo IO devices (only IO nodes
2918 			 * that are Schizo devices have non-zero scsr_pa).
2919 			 * Filter out "other" leaf to avoid writing to the
2920 			 * same Schizo Control/Status Register twice.
2921 			 */
2922 			if (io->scsr_pa && io->scsr_pa != last_scsr_pa) {
2923 				uint64_t scsr;
2924 
2925 				scsr  = lddphysio(io->scsr_pa);
2926 				scsr &= ~(DRMACH_LPA_BASE_MASK |
2927 				    DRMACH_LPA_BND_MASK);
2928 				scsr |= DRMACH_PA_TO_LPA_BASE(new_basepa);
2929 				scsr |= DRMACH_PA_TO_LPA_BND(
2930 				    new_basepa + DRMACH_MEM_SLICE_SIZE);
2931 
2932 				*p++ = io->scsr_pa;
2933 				*p++ = scsr;
2934 
2935 				last_scsr_pa = io->scsr_pa;
2936 			}
2937 		}
2938 		rv = drmach_array_next(bp->devices, &idx, &id);
2939 	}
2940 
2941 	return (p);
2942 }
2943 
2944 /*
2945  * For Panther MCs, append the MC idle reg address and drmach_mem_t pointer.
2946  * The latter is returned when drmach_rename fails to idle a Panther MC and
2947  * is used to identify the MC for error reporting.
2948  */
2949 static uint64_t *
2950 drmach_prep_pn_mc_idle(uint64_t *p, drmach_mem_t *mp, int local)
2951 {
2952 	/* only slot 0 has memory */
2953 	ASSERT(DRMACH_BNUM2SLOT(mp->dev.bp->bnum) == 0);
2954 	ASSERT(IS_PANTHER(mp->dev.bp->cpu_impl));
2955 
2956 	for (mp = mp->dev.bp->mem; mp != NULL; mp = mp->next) {
2957 		ASSERT(DRMACH_IS_MEM_ID(mp));
2958 
2959 		if (mp->dev.portid == STARCAT_CPUID_TO_PORTID(CPU->cpu_id)) {
2960 			if (local) {
2961 				*p++ = ASI_EMU_ACT_STATUS_VA;	/* local ASI */
2962 				*p++ = (uintptr_t)mp;
2963 			}
2964 		} else if (!local) {
2965 			*p++ = DRMACH_EMU_ACT_STATUS_ADDR(mp);	/* PIO */
2966 			*p++ = (uintptr_t)mp;
2967 		}
2968 	}
2969 
2970 	return (p);
2971 }
2972 
2973 static sbd_error_t *
2974 drmach_prep_rename_script(drmach_mem_t *s_mp, drmach_mem_t *t_mp,
2975 	uint64_t t_slice_offset, caddr_t buf, int buflen)
2976 {
2977 	_NOTE(ARGUNUSED(buflen))
2978 
2979 	uint64_t		*p = (uint64_t *)buf, *q;
2980 	sbd_error_t		*err;
2981 	int			 rv;
2982 	drmach_mem_t		*mp, *skip_mp;
2983 	uint64_t		 s_basepa, t_basepa;
2984 	uint64_t		 s_new_basepa, t_new_basepa;
2985 
2986 	/* verify supplied buffer space is adequate */
2987 	ASSERT(buflen >=
2988 	    /* addr for all possible MC banks */
2989 	    (sizeof (uint64_t) * 4 * 4 * 18) +
2990 	    /* list section terminator */
2991 	    (sizeof (uint64_t) * 1) +
2992 	    /* addr/id tuple for local Panther MC idle reg */
2993 	    (sizeof (uint64_t) * 2) +
2994 	    /* list section terminator */
2995 	    (sizeof (uint64_t) * 1) +
2996 	    /* addr/id tuple for 2 boards with 4 Panther MC idle regs */
2997 	    (sizeof (uint64_t) * 2 * 2 * 4) +
2998 	    /* list section terminator */
2999 	    (sizeof (uint64_t) * 1) +
3000 	    /* addr/val tuple for 1 proc with 4 MC banks */
3001 	    (sizeof (uint64_t) * 2 * 4) +
3002 	    /* list section terminator */
3003 	    (sizeof (uint64_t) * 1) +
3004 	    /* addr/val tuple for 2 boards w/ 2 schizos each */
3005 	    (sizeof (uint64_t) * 2 * 2 * 2) +
3006 	    /* addr/val tuple for 2 boards w/ 16 MC banks each */
3007 	    (sizeof (uint64_t) * 2 * 2 * 16) +
3008 	    /* list section terminator */
3009 	    (sizeof (uint64_t) * 1) +
3010 	    /* addr/val tuple for 18 AXQs w/ two slots each */
3011 	    (sizeof (uint64_t) * 2 * 2 * 18) +
3012 	    /* list section terminator */
3013 	    (sizeof (uint64_t) * 1) +
3014 	    /* list terminator */
3015 	    (sizeof (uint64_t) * 1));
3016 
3017 	/* copy bank list to rename script */
3018 	mutex_enter(&drmach_bus_sync_lock);
3019 	for (q = drmach_bus_sync_list; *q; q++, p++)
3020 		*p = *q;
3021 	mutex_exit(&drmach_bus_sync_lock);
3022 
3023 	/* list section terminator */
3024 	*p++ = 0;
3025 
3026 	/*
3027 	 * Write idle script for MC on this processor.  A script will be
3028 	 * produced only if this is a Panther processor on the source or
3029 	 * target board.
3030 	 */
3031 	if (IS_PANTHER(s_mp->dev.bp->cpu_impl))
3032 		p = drmach_prep_pn_mc_idle(p, s_mp, 1);
3033 
3034 	if (IS_PANTHER(t_mp->dev.bp->cpu_impl))
3035 		p = drmach_prep_pn_mc_idle(p, t_mp, 1);
3036 
3037 	/* list section terminator */
3038 	*p++ = 0;
3039 
3040 	/*
3041 	 * Write idle script for all other MCs on source and target
3042 	 * Panther boards.
3043 	 */
3044 	if (IS_PANTHER(s_mp->dev.bp->cpu_impl))
3045 		p = drmach_prep_pn_mc_idle(p, s_mp, 0);
3046 
3047 	if (IS_PANTHER(t_mp->dev.bp->cpu_impl))
3048 		p = drmach_prep_pn_mc_idle(p, t_mp, 0);
3049 
3050 	/* list section terminator */
3051 	*p++ = 0;
3052 
3053 	/*
3054 	 * Step 1:	Write source base address to target MC
3055 	 *		with present bit off.
3056 	 * Step 2:	Now rewrite target reg with present bit on.
3057 	 */
3058 	err = drmach_mem_get_base_physaddr(s_mp, &s_basepa);
3059 	ASSERT(err == NULL);
3060 	err = drmach_mem_get_base_physaddr(t_mp, &t_basepa);
3061 	ASSERT(err == NULL);
3062 
3063 	/* exchange base pa. include slice offset in new target base pa */
3064 	s_new_basepa = t_basepa & ~ (DRMACH_MEM_SLICE_SIZE - 1);
3065 	t_new_basepa = (s_basepa & ~ (DRMACH_MEM_SLICE_SIZE - 1)) +
3066 	    t_slice_offset;
3067 
3068 	DRMACH_PR("s_new_basepa 0x%lx\n", s_new_basepa);
3069 	DRMACH_PR("t_new_basepa 0x%lx\n", t_new_basepa);
3070 
3071 	DRMACH_PR("preparing MC MADR rename script (master is CPU%d):\n",
3072 	    CPU->cpu_id);
3073 
3074 	/*
3075 	 * Write rename script for MC on this processor.  A script will
3076 	 * be produced only if this processor is on the source or target
3077 	 * board.
3078 	 */
3079 
3080 	skip_mp = NULL;
3081 	mp = s_mp->dev.bp->mem;
3082 	while (mp != NULL && skip_mp == NULL) {
3083 		if (mp->dev.portid == STARCAT_CPUID_TO_PORTID(CPU->cpu_id)) {
3084 			skip_mp = mp;
3085 			p = drmach_prep_mc_rename(p, 1, mp, s_basepa,
3086 			    s_new_basepa);
3087 		}
3088 
3089 		mp = mp->next;
3090 	}
3091 
3092 	mp = t_mp->dev.bp->mem;
3093 	while (mp != NULL && skip_mp == NULL) {
3094 		if (mp->dev.portid == STARCAT_CPUID_TO_PORTID(CPU->cpu_id)) {
3095 			skip_mp = mp;
3096 			p = drmach_prep_mc_rename(p, 1, mp, t_basepa,
3097 			    t_new_basepa);
3098 		}
3099 
3100 		mp = mp->next;
3101 	}
3102 
3103 	/* list section terminator */
3104 	*p++ = 0;
3105 
3106 	/*
3107 	 * Write rename script for all other MCs on source and target
3108 	 * boards.
3109 	 */
3110 
3111 	for (mp = s_mp->dev.bp->mem; mp; mp = mp->next) {
3112 		if (mp == skip_mp)
3113 			continue;
3114 		p = drmach_prep_mc_rename(p, 0, mp, s_basepa, s_new_basepa);
3115 	}
3116 
3117 	for (mp = t_mp->dev.bp->mem; mp; mp = mp->next) {
3118 		if (mp == skip_mp)
3119 			continue;
3120 		p = drmach_prep_mc_rename(p, 0, mp, t_basepa, t_new_basepa);
3121 	}
3122 
3123 	/* Write rename script for Schizo LPA_BASE/LPA_BND */
3124 	p = drmach_prep_schizo_script(p, s_mp, s_new_basepa);
3125 	p = drmach_prep_schizo_script(p, t_mp, t_new_basepa);
3126 
3127 	/* list section terminator */
3128 	*p++ = 0;
3129 
3130 	DRMACH_PR("preparing AXQ CASM rename script (EXP%d <> EXP%d):\n",
3131 	    DRMACH_BNUM2EXP(s_mp->dev.bp->bnum),
3132 	    DRMACH_BNUM2EXP(t_mp->dev.bp->bnum));
3133 
3134 	rv = axq_do_casm_rename_script(&p,
3135 	    DRMACH_PA_TO_SLICE(s_new_basepa),
3136 	    DRMACH_PA_TO_SLICE(t_new_basepa));
3137 	if (rv == DDI_FAILURE)
3138 		return (DRMACH_INTERNAL_ERROR());
3139 
3140 	/* list section & final terminator */
3141 	*p++ = 0;
3142 	*p++ = 0;
3143 
3144 #ifdef DEBUG
3145 	{
3146 		uint64_t *q = (uint64_t *)buf;
3147 
3148 		/* paranoia */
3149 		ASSERT((caddr_t)p <= buf + buflen);
3150 
3151 		DRMACH_PR("MC bank base pa list:\n");
3152 		while (*q) {
3153 			uint64_t a = *q++;
3154 
3155 			DRMACH_PR("0x%lx\n", a);
3156 		}
3157 
3158 		/* skip terminator */
3159 		q += 1;
3160 
3161 		DRMACH_PR("local Panther MC idle reg (via ASI 0x4a):\n");
3162 		while (*q) {
3163 			DRMACH_PR("addr=0x%lx, mp=0x%lx\n", *q, *(q + 1));
3164 			q += 2;
3165 		}
3166 
3167 		/* skip terminator */
3168 		q += 1;
3169 
3170 		DRMACH_PR("non-local Panther MC idle reg (via ASI 0x15):\n");
3171 		while (*q) {
3172 			DRMACH_PR("addr=0x%lx, mp=0x%lx\n", *q, *(q + 1));
3173 			q += 2;
3174 		}
3175 
3176 		/* skip terminator */
3177 		q += 1;
3178 
3179 		DRMACH_PR("MC reprogramming script (via ASI 0x72):\n");
3180 		while (*q) {
3181 			uint64_t r = *q++;	/* register address */
3182 			uint64_t v = *q++;	/* new register value */
3183 
3184 			DRMACH_PR("0x%lx = 0x%lx, basepa 0x%lx\n",
3185 			    r, v, (long)(DRMACH_MC_UM_TO_PA(v)|
3186 			    DRMACH_MC_LM_TO_PA(v)));
3187 		}
3188 
3189 		/* skip terminator */
3190 		q += 1;
3191 
3192 		DRMACH_PR("MC/SCHIZO reprogramming script:\n");
3193 		while (*q) {
3194 			DRMACH_PR("0x%lx = 0x%lx\n", *q, *(q + 1));
3195 			q += 2;
3196 		}
3197 
3198 		/* skip terminator */
3199 		q += 1;
3200 
3201 		DRMACH_PR("AXQ reprogramming script:\n");
3202 		while (*q) {
3203 			DRMACH_PR("0x%lx = 0x%lx\n", *q, *(q + 1));
3204 			q += 2;
3205 		}
3206 
3207 		/* verify final terminator is present */
3208 		ASSERT(*(q + 1) == 0);
3209 
3210 		DRMACH_PR("copy-rename script 0x%p, len %d\n",
3211 		    (void *)buf, (int)((intptr_t)p - (intptr_t)buf));
3212 
3213 		if (drmach_debug)
3214 			DELAY(10000000);
3215 	}
3216 #endif
3217 
3218 	return (NULL);
3219 }
3220 
3221 static void
3222 drmach_prep_xt_mb_for_slice_update(drmach_board_t *bp, uchar_t slice)
3223 {
3224 	int		 rv;
3225 
3226 	ASSERT(MUTEX_HELD(&drmach_xt_mb_lock));
3227 
3228 	if (bp->devices) {
3229 		int		 d_idx;
3230 		drmachid_t	 d_id;
3231 
3232 		rv = drmach_array_first(bp->devices, &d_idx, &d_id);
3233 		while (rv == 0) {
3234 			if (DRMACH_IS_CPU_ID(d_id)) {
3235 				drmach_cpu_t	*cp = d_id;
3236 				processorid_t	 cpuid = cp->cpuid;
3237 
3238 				mutex_enter(&cpu_lock);
3239 				if (cpu[cpuid] && cpu[cpuid]->cpu_flags)
3240 					drmach_xt_mb[cpuid] = 0x80 | slice;
3241 				mutex_exit(&cpu_lock);
3242 			}
3243 			rv = drmach_array_next(bp->devices, &d_idx, &d_id);
3244 		}
3245 	}
3246 	if (DRMACH_BNUM2SLOT(bp->bnum) == 0) {
3247 		drmach_board_t	*s1bp = NULL;
3248 
3249 		rv = drmach_array_get(drmach_boards, bp->bnum + 1,
3250 		    (void *) &s1bp);
3251 		if (rv == 0 && s1bp != NULL) {
3252 			ASSERT(DRMACH_IS_BOARD_ID(s1bp));
3253 			ASSERT(DRMACH_BNUM2SLOT(s1bp->bnum) == 1);
3254 			drmach_prep_xt_mb_for_slice_update(s1bp, slice);
3255 		}
3256 	}
3257 }
3258 
3259 sbd_error_t *
3260 drmach_copy_rename_init(drmachid_t t_id, uint64_t t_slice_offset,
3261 	drmachid_t s_id, struct memlist *c_ml, drmachid_t *cr_id)
3262 {
3263 	extern void drmach_rename(uint64_t *, uint_t *, uint64_t *);
3264 	extern void drmach_rename_end(void);
3265 
3266 	drmach_mem_t	*s_mp, *t_mp;
3267 	struct memlist	*x_ml;
3268 	uint64_t	 off_mask, s_copybasepa, t_copybasepa, t_basepa;
3269 	int		 len;
3270 	caddr_t		 bp, wp;
3271 	uint_t		*p, *q;
3272 	sbd_error_t	*err;
3273 	tte_t		*tte;
3274 	drmach_copy_rename_t *cr;
3275 
3276 	if (!DRMACH_IS_MEM_ID(s_id))
3277 		return (drerr_new(0, ESTC_INAPPROP, NULL));
3278 	if (!DRMACH_IS_MEM_ID(t_id))
3279 		return (drerr_new(0, ESTC_INAPPROP, NULL));
3280 	s_mp = s_id;
3281 	t_mp = t_id;
3282 
3283 	/* get starting physical address of target memory */
3284 	err = drmach_mem_get_base_physaddr(t_id, &t_basepa);
3285 	if (err)
3286 		return (err);
3287 
3288 	/* calculate slice offset mask from slice size */
3289 	off_mask = DRMACH_MEM_SLICE_SIZE - 1;
3290 
3291 	/* calculate source and target base pa */
3292 	s_copybasepa = c_ml->ml_address;
3293 	t_copybasepa =
3294 	    t_basepa + ((c_ml->ml_address & off_mask) - t_slice_offset);
3295 
3296 	/* paranoia */
3297 	ASSERT((c_ml->ml_address & off_mask) >= t_slice_offset);
3298 
3299 	/* adjust copy memlist addresses to be relative to copy base pa */
3300 	x_ml = c_ml;
3301 	while (x_ml != NULL) {
3302 		x_ml->ml_address -= s_copybasepa;
3303 		x_ml = x_ml->ml_next;
3304 	}
3305 
3306 #ifdef DEBUG
3307 	{
3308 	uint64_t s_basepa, s_size, t_size;
3309 
3310 	x_ml = c_ml;
3311 	while (x_ml->ml_next != NULL)
3312 		x_ml = x_ml->ml_next;
3313 
3314 	DRMACH_PR("source copy span: base pa 0x%lx, end pa 0x%lx\n",
3315 	    s_copybasepa,
3316 	    s_copybasepa + x_ml->ml_address + x_ml->ml_size);
3317 
3318 	DRMACH_PR("target copy span: base pa 0x%lx, end pa 0x%lx\n",
3319 	    t_copybasepa,
3320 	    t_copybasepa + x_ml->ml_address + x_ml->ml_size);
3321 
3322 	DRMACH_PR("copy memlist (relative to copy base pa):\n");
3323 	DRMACH_MEMLIST_DUMP(c_ml);
3324 
3325 	err = drmach_mem_get_base_physaddr(s_id, &s_basepa);
3326 	ASSERT(err == NULL);
3327 
3328 	err = drmach_mem_get_size(s_id, &s_size);
3329 	ASSERT(err == NULL);
3330 
3331 	err = drmach_mem_get_size(t_id, &t_size);
3332 	ASSERT(err == NULL);
3333 
3334 	DRMACH_PR("current source base pa 0x%lx, size 0x%lx\n",
3335 	    s_basepa, s_size);
3336 	DRMACH_PR("current target base pa 0x%lx, size 0x%lx\n",
3337 	    t_basepa, t_size);
3338 	}
3339 #endif /* DEBUG */
3340 
3341 	/* Map in appropriate cpu sram page */
3342 	tte = &drmach_cpu_sram_tte[CPU->cpu_id];
3343 	ASSERT(TTE_IS_VALID(tte) && TTE_IS_8K(tte) &&
3344 	    TTE_IS_PRIVILEGED(tte) && TTE_IS_LOCKED(tte));
3345 	sfmmu_dtlb_ld_kva(drmach_cpu_sram_va, tte);
3346 	sfmmu_itlb_ld_kva(drmach_cpu_sram_va, tte);
3347 
3348 	bp = wp = drmach_cpu_sram_va;
3349 
3350 	/* Make sure the rename routine will fit */
3351 	len = (ptrdiff_t)drmach_rename_end - (ptrdiff_t)drmach_rename;
3352 	ASSERT(wp + len < bp + PAGESIZE);
3353 
3354 	/* copy text. standard bcopy not designed to work in nc space */
3355 	p = (uint_t *)wp;
3356 	q = (uint_t *)drmach_rename;
3357 	while (q < (uint_t *)drmach_rename_end)
3358 		*p++ = *q++;
3359 
3360 	/* zero remainder. standard bzero not designed to work in nc space */
3361 	while (p < (uint_t *)(bp + PAGESIZE))
3362 		*p++ = 0;
3363 
3364 	DRMACH_PR("drmach_rename function 0x%p, len %d\n", (void *)wp, len);
3365 	wp += (len + 15) & ~15;
3366 
3367 	err = drmach_prep_rename_script(s_mp, t_mp, t_slice_offset, wp,
3368 	    PAGESIZE - (wp - bp));
3369 	if (err) {
3370 cleanup:
3371 		xt_one(CPU->cpu_id, vtag_flushpage_tl1,
3372 		    (uint64_t)drmach_cpu_sram_va, (uint64_t)ksfmmup);
3373 		return (err);
3374 	}
3375 
3376 	/* disable and flush CDC */
3377 	if (axq_cdc_disable_flush_all() != DDI_SUCCESS) {
3378 		axq_cdc_enable_all();	/* paranoia */
3379 		err = DRMACH_INTERNAL_ERROR();
3380 		goto cleanup;
3381 	}
3382 
3383 	/* mark both memory units busy */
3384 	t_mp->dev.busy++;
3385 	s_mp->dev.busy++;
3386 
3387 	cr = vmem_alloc(static_alloc_arena, sizeof (drmach_copy_rename_t),
3388 	    VM_SLEEP);
3389 	cr->isa = (void *)drmach_copy_rename_init;
3390 	cr->data = wp;
3391 	cr->c_ml = c_ml;
3392 	cr->s_mp = s_mp;
3393 	cr->t_mp = t_mp;
3394 	cr->s_copybasepa = s_copybasepa;
3395 	cr->t_copybasepa = t_copybasepa;
3396 	cr->ecode = DRMACH_CR_OK;
3397 
3398 	mutex_enter(&drmach_slice_table_lock);
3399 
3400 	mutex_enter(&drmach_xt_mb_lock);
3401 	bzero((void *)drmach_xt_mb, drmach_xt_mb_size);
3402 
3403 	if (DRMACH_L1_SET_LPA(s_mp->dev.bp) && drmach_reprogram_lpa) {
3404 		drmach_prep_xt_mb_for_slice_update(s_mp->dev.bp,
3405 		    DRMACH_PA_TO_SLICE(t_copybasepa));
3406 	}
3407 	if (DRMACH_L1_SET_LPA(t_mp->dev.bp) && drmach_reprogram_lpa) {
3408 		drmach_prep_xt_mb_for_slice_update(t_mp->dev.bp,
3409 		    DRMACH_PA_TO_SLICE(s_copybasepa));
3410 	}
3411 
3412 	*cr_id = cr;
3413 	return (NULL);
3414 }
3415 
3416 int drmach_rename_count;
3417 int drmach_rename_ntries;
3418 
3419 sbd_error_t *
3420 drmach_copy_rename_fini(drmachid_t id)
3421 {
3422 	drmach_copy_rename_t	*cr = id;
3423 	sbd_error_t		*err = NULL;
3424 	dr_mbox_msg_t		*obufp;
3425 
3426 	ASSERT(cr->isa == (void *)drmach_copy_rename_init);
3427 
3428 	axq_cdc_enable_all();
3429 
3430 	xt_one(CPU->cpu_id, vtag_flushpage_tl1,
3431 	    (uint64_t)drmach_cpu_sram_va, (uint64_t)ksfmmup);
3432 
3433 	switch (cr->ecode) {
3434 	case DRMACH_CR_OK:
3435 		break;
3436 	case DRMACH_CR_MC_IDLE_ERR: {
3437 		dev_info_t	*dip = NULL;
3438 		drmach_mem_t	*mp = (drmach_mem_t *)cr->earg;
3439 		char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3440 
3441 		ASSERT(DRMACH_IS_MEM_ID(mp));
3442 
3443 		err = drmach_get_dip(mp, &dip);
3444 
3445 		ASSERT(err == NULL);
3446 		ASSERT(dip != NULL);
3447 
3448 		err = drerr_new(0, ESBD_MEMFAIL, NULL);
3449 		(void) ddi_pathname(dip, path);
3450 		cmn_err(CE_WARN, "failed to idle memory controller %s on %s: "
3451 		    "copy-rename aborted", path, mp->dev.bp->cm.name);
3452 		kmem_free(path, MAXPATHLEN);
3453 		break;
3454 	}
3455 	case DRMACH_CR_IOPAUSE_ERR:
3456 		ASSERT((uintptr_t)cr->earg >= 0 &&
3457 		    (uintptr_t)cr->earg < AXQ_MAX_EXP);
3458 
3459 		err = drerr_new(0,  ESBD_SUSPEND, "EX%d", (uintptr_t)cr->earg);
3460 		cmn_err(CE_WARN, "failed to idle EX%ld AXQ slot1 activity prior"
3461 		    " to copy-rename", (uintptr_t)cr->earg);
3462 		break;
3463 	case DRMACH_CR_ONTRAP_ERR:
3464 		err = drerr_new(0, ESBD_MEMFAIL, NULL);
3465 		cmn_err(CE_WARN, "copy-rename aborted due to uncorrectable "
3466 		    "memory error");
3467 		break;
3468 	default:
3469 		err = DRMACH_INTERNAL_ERROR();
3470 		cmn_err(CE_WARN, "unknown copy-rename error code (%d)\n",
3471 		    cr->ecode);
3472 		break;
3473 	}
3474 
3475 #ifdef DEBUG
3476 	if ((DRMACH_L1_SET_LPA(cr->s_mp->dev.bp) ||
3477 	    DRMACH_L1_SET_LPA(cr->t_mp->dev.bp)) && drmach_reprogram_lpa) {
3478 		int	i;
3479 		for (i = 0; i < NCPU; i++) {
3480 			if (drmach_xt_mb[i])
3481 				DRMACH_PR("cpu%d ignored drmach_xt_mb", i);
3482 		}
3483 	}
3484 #endif
3485 	mutex_exit(&drmach_xt_mb_lock);
3486 
3487 	if (cr->c_ml != NULL)
3488 		memlist_delete(cr->c_ml);
3489 
3490 	cr->t_mp->dev.busy--;
3491 	cr->s_mp->dev.busy--;
3492 
3493 	if (err) {
3494 		mutex_exit(&drmach_slice_table_lock);
3495 		goto done;
3496 	}
3497 
3498 	/* update casm shadow for target and source board */
3499 	drmach_slice_table_update(cr->t_mp->dev.bp, 0);
3500 	drmach_slice_table_update(cr->s_mp->dev.bp, 0);
3501 	mutex_exit(&drmach_slice_table_lock);
3502 
3503 	mutex_enter(&drmach_bus_sync_lock);
3504 	drmach_bus_sync_list_update();
3505 	mutex_exit(&drmach_bus_sync_lock);
3506 
3507 	/*
3508 	 * Make a good-faith effort to notify the SC about the copy-rename, but
3509 	 * don't worry if it fails, since a subsequent claim/unconfig/unclaim
3510 	 * will duplicate the update.
3511 	 */
3512 	obufp = kmem_zalloc(sizeof (dr_mbox_msg_t), KM_SLEEP);
3513 	mutex_enter(&drmach_slice_table_lock);
3514 	drmach_msg_memslice_init(obufp->msgdata.dm_uc.mem_slice);
3515 	drmach_msg_memregs_init(obufp->msgdata.dm_uc.mem_regs);
3516 	mutex_exit(&drmach_slice_table_lock);
3517 	(void) drmach_mbox_trans(DRMSG_UNCONFIG, cr->s_mp->dev.bp->bnum,
3518 	    (caddr_t)obufp, sizeof (dr_mbox_msg_t), (caddr_t)NULL, 0);
3519 	kmem_free(obufp, sizeof (dr_mbox_msg_t));
3520 
3521 done:
3522 	vmem_free(static_alloc_arena, cr, sizeof (drmach_copy_rename_t));
3523 
3524 	DRMACH_PR("waited %d out of %d tries for drmach_rename_wait on %d cpus",
3525 	    drmach_rename_ntries, drmach_cpu_ntries, drmach_rename_count);
3526 
3527 	return (err);
3528 }
3529 
3530 int drmach_slow_copy = 0;
3531 
3532 void
3533 drmach_copy_rename(drmachid_t id)
3534 {
3535 	extern uint_t		 getpstate(void);
3536 	extern void		 setpstate(uint_t);
3537 
3538 	extern xcfunc_t		 drmach_rename_wait;
3539 	extern xcfunc_t		 drmach_rename_done;
3540 	extern xcfunc_t		 drmach_rename_abort;
3541 
3542 	drmach_copy_rename_t	*cr = id;
3543 	uint64_t		 neer;
3544 	struct memlist		*ml;
3545 	int			 i, count;
3546 	int			 csize, lnsize;
3547 	uint64_t		 caddr;
3548 	cpuset_t		 cpuset;
3549 	uint_t			 pstate;
3550 	uint32_t		 exp = 0;
3551 	on_trap_data_t		 otd;
3552 	xcfunc_t		*drmach_end_wait_xcall = drmach_rename_done;
3553 
3554 	ASSERT(cr->isa == (void *)drmach_copy_rename_init);
3555 	ASSERT(MUTEX_HELD(&cpu_lock));
3556 	ASSERT(cr->ecode == DRMACH_CR_OK);
3557 
3558 	/*
3559 	 * Prevent slot1 IO from accessing Safari memory bus.
3560 	 */
3561 	if (axq_iopause_enable_all(&exp) != DDI_SUCCESS) {
3562 		ASSERT(exp >= 0 && exp < AXQ_MAX_EXP);
3563 		cr->ecode = DRMACH_CR_IOPAUSE_ERR;
3564 		cr->earg = (void *)(uintptr_t)exp;
3565 		return;
3566 	}
3567 
3568 	cpuset = cpu_ready_set;
3569 	CPUSET_DEL(cpuset, CPU->cpu_id);
3570 	count = ncpus - 1;
3571 	drmach_rename_count = count;	/* for debug */
3572 
3573 	drmach_xt_ready = 0;
3574 	xt_some(cpuset, drmach_rename_wait, NULL, NULL);
3575 
3576 	for (i = 0; i < drmach_cpu_ntries; i++) {
3577 		if (drmach_xt_ready == count)
3578 			break;
3579 		DELAY(drmach_cpu_delay);
3580 	}
3581 
3582 	drmach_rename_ntries = i;	/* for debug */
3583 
3584 	drmach_xt_ready = 0;		/* steal the line back */
3585 	for (i = 0; i < NCPU; i++)	/* steal the line back, preserve data */
3586 		drmach_xt_mb[i] = drmach_xt_mb[i];
3587 
3588 	caddr = drmach_iocage_paddr;
3589 	csize = cpunodes[CPU->cpu_id].ecache_size;
3590 	lnsize = cpunodes[CPU->cpu_id].ecache_linesize;
3591 
3592 	/* disable CE reporting */
3593 	neer = get_error_enable();
3594 	set_error_enable(neer & ~EN_REG_CEEN);
3595 
3596 	/* disable interrupts (paranoia) */
3597 	pstate = getpstate();
3598 	setpstate(pstate & ~PSTATE_IE);
3599 
3600 	/*
3601 	 * Execute copy-rename under on_trap to protect against a panic due
3602 	 * to an uncorrectable error. Instead, DR will abort the copy-rename
3603 	 * operation and rely on the OS to do the error reporting.
3604 	 *
3605 	 * In general, trap handling on any cpu once the copy begins
3606 	 * can result in an inconsistent memory image on the target.
3607 	 */
3608 	if (on_trap(&otd, OT_DATA_EC)) {
3609 		cr->ecode = DRMACH_CR_ONTRAP_ERR;
3610 		goto copy_rename_end;
3611 	}
3612 
3613 	/*
3614 	 * DO COPY.
3615 	 */
3616 	for (ml = cr->c_ml; ml; ml = ml->ml_next) {
3617 		uint64_t	s_pa, t_pa;
3618 		uint64_t	nbytes;
3619 
3620 		s_pa = cr->s_copybasepa + ml->ml_address;
3621 		t_pa = cr->t_copybasepa + ml->ml_address;
3622 		nbytes = ml->ml_size;
3623 
3624 		while (nbytes != 0ull) {
3625 			/* copy 32 bytes at src_pa to dst_pa */
3626 			bcopy32_il(s_pa, t_pa);
3627 
3628 			/* increment by 32 bytes */
3629 			s_pa += (4 * sizeof (uint64_t));
3630 			t_pa += (4 * sizeof (uint64_t));
3631 
3632 			/* decrement by 32 bytes */
3633 			nbytes -= (4 * sizeof (uint64_t));
3634 
3635 			if (drmach_slow_copy) {	/* for debug */
3636 				uint64_t i = 13 * 50;
3637 				while (i--)
3638 					;
3639 			}
3640 		}
3641 	}
3642 
3643 	/*
3644 	 * XXX CHEETAH SUPPORT
3645 	 * For cheetah, we need to grab the iocage lock since iocage
3646 	 * memory is used for e$ flush.
3647 	 *
3648 	 * NOTE: This code block is dangerous at this point in the
3649 	 * copy-rename operation. It modifies memory after the copy
3650 	 * has taken place which means that any persistent state will
3651 	 * be abandoned after the rename operation. The code is also
3652 	 * performing thread synchronization at a time when all but
3653 	 * one processors are paused. This is a potential deadlock
3654 	 * situation.
3655 	 *
3656 	 * This code block must be moved to drmach_copy_rename_init.
3657 	 */
3658 	if (drmach_is_cheetah) {
3659 		mutex_enter(&drmach_iocage_lock);
3660 		while (drmach_iocage_is_busy)
3661 			cv_wait(&drmach_iocage_cv, &drmach_iocage_lock);
3662 		drmach_iocage_is_busy = 1;
3663 		drmach_iocage_mem_scrub(ecache_size * 2);
3664 		mutex_exit(&drmach_iocage_lock);
3665 	}
3666 
3667 	/*
3668 	 * bcopy32_il is implemented as a series of ldxa/stxa via
3669 	 * ASI_MEM instructions. Following the copy loop, the E$
3670 	 * of the master (this) processor will have lines in state
3671 	 * O that correspond to lines of home memory in state gI.
3672 	 * An E$ flush is necessary to commit these lines before
3673 	 * proceeding with the rename operation.
3674 	 *
3675 	 * Flushing the E$ will automatically flush the W$, but
3676 	 * the D$ and I$ must be flushed separately and explicitly.
3677 	 */
3678 	flush_ecache_il(caddr, csize, lnsize);	/* inline version */
3679 
3680 	/*
3681 	 * Each line of home memory is now in state gM, except in
3682 	 * the case of a cheetah processor when the E$ flush area
3683 	 * is included within the copied region. In such a case,
3684 	 * the lines of home memory for the upper half of the
3685 	 * flush area are in state gS.
3686 	 *
3687 	 * Each line of target memory is in state gM.
3688 	 *
3689 	 * Each line of this processor's E$ is in state I, except
3690 	 * those of a cheetah processor. All lines of a cheetah
3691 	 * processor's E$ are in state S and correspond to the lines
3692 	 * in upper half of the E$ flush area.
3693 	 *
3694 	 * It is vital at this point that none of the lines in the
3695 	 * home or target memories are in state gI and that none
3696 	 * of the lines in this processor's E$ are in state O or Os.
3697 	 * A single instance of such a condition will cause loss of
3698 	 * coherency following the rename operation.
3699 	 */
3700 
3701 	/*
3702 	 * Rename
3703 	 */
3704 	(*(void(*)())drmach_cpu_sram_va)(cr->data, &cr->ecode, &cr->earg);
3705 
3706 	/*
3707 	 * Rename operation complete. The physical address space
3708 	 * of the home and target memories have been swapped, the
3709 	 * routing data in the respective CASM entries have been
3710 	 * swapped, and LPA settings in the processor and schizo
3711 	 * devices have been reprogrammed accordingly.
3712 	 *
3713 	 * In the case of a cheetah processor, the E$ remains
3714 	 * populated with lines in state S that correspond to the
3715 	 * lines in the former home memory. Now that the physical
3716 	 * addresses have been swapped, these E$ lines correspond
3717 	 * to lines in the new home memory which are in state gM.
3718 	 * This combination is invalid. An additional E$ flush is
3719 	 * necessary to restore coherency. The E$ flush will cause
3720 	 * the lines of the new home memory for the flush region
3721 	 * to transition from state gM to gS. The former home memory
3722 	 * remains unmodified. This additional E$ flush has no effect
3723 	 * on a cheetah+ processor.
3724 	 */
3725 	flush_ecache_il(caddr, csize, lnsize);	/* inline version */
3726 
3727 	/*
3728 	 * The D$ and I$ must be flushed to ensure that coherency is
3729 	 * maintained. Any line in a cache that is in the valid
3730 	 * state has its corresponding line of the new home memory
3731 	 * in the gM state. This is an invalid condition. When the
3732 	 * flushes are complete the cache line states will be
3733 	 * resynchronized with those in the new home memory.
3734 	 */
3735 	flush_icache_il();			/* inline version */
3736 	flush_dcache_il();			/* inline version */
3737 	flush_pcache_il();			/* inline version */
3738 
3739 copy_rename_end:
3740 
3741 	no_trap();
3742 
3743 	/* enable interrupts */
3744 	setpstate(pstate);
3745 
3746 	/* enable CE reporting */
3747 	set_error_enable(neer);
3748 
3749 	if (cr->ecode != DRMACH_CR_OK)
3750 		drmach_end_wait_xcall = drmach_rename_abort;
3751 
3752 	/*
3753 	 * XXX CHEETAH SUPPORT
3754 	 */
3755 	if (drmach_is_cheetah) {
3756 		mutex_enter(&drmach_iocage_lock);
3757 		drmach_iocage_mem_scrub(ecache_size * 2);
3758 		drmach_iocage_is_busy = 0;
3759 		cv_signal(&drmach_iocage_cv);
3760 		mutex_exit(&drmach_iocage_lock);
3761 	}
3762 
3763 	axq_iopause_disable_all();
3764 
3765 	xt_some(cpuset, drmach_end_wait_xcall, NULL, NULL);
3766 }
3767 
3768 static void drmach_io_dispose(drmachid_t);
3769 static sbd_error_t *drmach_io_release(drmachid_t);
3770 static sbd_error_t *drmach_io_status(drmachid_t, drmach_status_t *);
3771 
3772 static sbd_error_t *
3773 drmach_pci_new(drmach_device_t *proto, drmachid_t *idp)
3774 {
3775 	drmach_node_t	*node = proto->node;
3776 	sbd_error_t	*err;
3777 	drmach_reg_t	 regs[3];
3778 	int		 rv;
3779 	int		 len = 0;
3780 
3781 	rv = node->n_getproplen(node, "reg", &len);
3782 	if (rv != 0 || len != sizeof (regs)) {
3783 		sbd_error_t *err;
3784 
3785 		/* pci nodes are expected to have regs */
3786 		err = drerr_new(1, ESTC_GETPROP,
3787 		    "Device Node 0x%x: property %s",
3788 		    (uint_t)node->get_dnode(node), "reg");
3789 		return (err);
3790 	}
3791 
3792 	rv = node->n_getprop(node, "reg", (void *)regs, sizeof (regs));
3793 	if (rv) {
3794 		sbd_error_t *err;
3795 
3796 		err = drerr_new(1, ESTC_GETPROP,
3797 		    "Device Node 0x%x: property %s",
3798 		    (uint_t)node->get_dnode(node), "reg");
3799 
3800 		return (err);
3801 	}
3802 
3803 	/*
3804 	 * Fix up unit number so that Leaf A has a lower unit number
3805 	 * than Leaf B.
3806 	 */
3807 	if ((proto->portid % 2) != 0) {
3808 		if ((regs[0].reg_addr_lo & 0x700000) == 0x700000)
3809 			proto->unum = 0;
3810 		else
3811 			proto->unum = 1;
3812 	} else {
3813 		if ((regs[0].reg_addr_lo & 0x700000) == 0x700000)
3814 			proto->unum = 2;
3815 		else
3816 			proto->unum = 3;
3817 	}
3818 
3819 	err = drmach_io_new(proto, idp);
3820 	if (err == NULL) {
3821 		drmach_io_t *self = *idp;
3822 
3823 		/* reassemble 64-bit base address */
3824 		self->scsr_pa  = (uint64_t)regs[1].reg_addr_hi << 32;
3825 		self->scsr_pa |= (uint64_t)regs[1].reg_addr_lo;
3826 	}
3827 
3828 	return (err);
3829 }
3830 
3831 static sbd_error_t *
3832 drmach_io_new(drmach_device_t *proto, drmachid_t *idp)
3833 {
3834 	drmach_io_t	*ip;
3835 
3836 	ip = kmem_zalloc(sizeof (drmach_io_t), KM_SLEEP);
3837 	bcopy(proto, &ip->dev, sizeof (ip->dev));
3838 	ip->dev.node = drmach_node_dup(proto->node);
3839 	ip->dev.cm.isa = (void *)drmach_io_new;
3840 	ip->dev.cm.dispose = drmach_io_dispose;
3841 	ip->dev.cm.release = drmach_io_release;
3842 	ip->dev.cm.status = drmach_io_status;
3843 
3844 	(void) snprintf(ip->dev.cm.name, sizeof (ip->dev.cm.name), "%s%d",
3845 	    ip->dev.type, ip->dev.unum);
3846 
3847 	*idp = (drmachid_t)ip;
3848 	return (NULL);
3849 }
3850 
3851 static void
3852 drmach_io_dispose(drmachid_t id)
3853 {
3854 	drmach_io_t *self;
3855 
3856 	ASSERT(DRMACH_IS_IO_ID(id));
3857 
3858 	self = id;
3859 	if (self->dev.node)
3860 		drmach_node_dispose(self->dev.node);
3861 
3862 	kmem_free(self, sizeof (*self));
3863 }
3864 
3865 /*ARGSUSED*/
3866 sbd_error_t *
3867 drmach_pre_op(int cmd, drmachid_t id, drmach_opts_t *opts)
3868 {
3869 	drmach_board_t	*bp = (drmach_board_t *)id;
3870 	sbd_error_t	*err = NULL;
3871 
3872 	if (id && DRMACH_IS_BOARD_ID(id)) {
3873 		switch (cmd) {
3874 			case SBD_CMD_TEST:
3875 			case SBD_CMD_STATUS:
3876 			case SBD_CMD_GETNCM:
3877 				break;
3878 			case SBD_CMD_CONNECT:
3879 				if (bp->connected)
3880 					err = drerr_new(0, ESBD_STATE, NULL);
3881 
3882 				if (bp->cond == SBD_COND_UNUSABLE)
3883 					err = drerr_new(0,
3884 					    ESBD_FATAL_STATE, NULL);
3885 				break;
3886 			case SBD_CMD_DISCONNECT:
3887 				if (!bp->connected)
3888 					err = drerr_new(0, ESBD_STATE, NULL);
3889 
3890 				if (bp->cond == SBD_COND_UNUSABLE)
3891 					err = drerr_new(0,
3892 					    ESBD_FATAL_STATE, NULL);
3893 				break;
3894 			default:
3895 				if (bp->cond == SBD_COND_UNUSABLE)
3896 					err = drerr_new(0,
3897 					    ESBD_FATAL_STATE, NULL);
3898 				break;
3899 
3900 		}
3901 	}
3902 
3903 	return (err);
3904 }
3905 
3906 /*ARGSUSED*/
3907 sbd_error_t *
3908 drmach_post_op(int cmd, drmachid_t id, drmach_opts_t *opts)
3909 {
3910 	return (NULL);
3911 }
3912 
3913 sbd_error_t *
3914 drmach_board_assign(int bnum, drmachid_t *id)
3915 {
3916 	sbd_error_t	*err = NULL;
3917 	caddr_t		obufp;
3918 
3919 	if (!drmach_initialized && drmach_init() == -1) {
3920 		err = DRMACH_INTERNAL_ERROR();
3921 	}
3922 
3923 	rw_enter(&drmach_boards_rwlock, RW_WRITER);
3924 
3925 	if (!err) {
3926 		if (drmach_array_get(drmach_boards, bnum, id) == -1) {
3927 			err = drerr_new(0, ESTC_BNUM, "%d", bnum);
3928 		} else {
3929 			drmach_board_t	*bp;
3930 
3931 			if (*id)
3932 				rw_downgrade(&drmach_boards_rwlock);
3933 
3934 			obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
3935 			err = drmach_mbox_trans(DRMSG_ASSIGN, bnum, obufp,
3936 			    sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
3937 			kmem_free(obufp, sizeof (dr_proto_hdr_t));
3938 
3939 			if (!err) {
3940 				bp = *id;
3941 				if (!*id)
3942 					bp = *id  =
3943 					    (drmachid_t)drmach_board_new(bnum);
3944 				bp->assigned = 1;
3945 			}
3946 		}
3947 	}
3948 	rw_exit(&drmach_boards_rwlock);
3949 	return (err);
3950 }
3951 
3952 static uint_t
3953 drmach_board_non_panther_cpus(gdcd_t *gdcd, uint_t exp, uint_t slot)
3954 {
3955 	uint_t	port, port_start, port_end;
3956 	uint_t	non_panther_cpus = 0;
3957 	uint_t	impl;
3958 
3959 	ASSERT(gdcd != NULL);
3960 
3961 	/*
3962 	 * Determine PRD port indices based on slot location.
3963 	 */
3964 	switch (slot) {
3965 	case 0:
3966 		port_start = 0;
3967 		port_end = 3;
3968 		break;
3969 	case 1:
3970 		port_start = 4;
3971 		port_end = 5;
3972 		break;
3973 	default:
3974 		ASSERT(0);
3975 		/* check all */
3976 		port_start = 0;
3977 		port_end = 5;
3978 		break;
3979 	}
3980 
3981 	for (port = port_start; port <= port_end; port++) {
3982 		if (gdcd->dcd_prd[exp][port].prd_ptype == SAFPTYPE_CPU &&
3983 		    RSV_GOOD(gdcd->dcd_prd[exp][port].prd_prsv)) {
3984 			/*
3985 			 * This Safari port passed POST and represents a
3986 			 * cpu, so check the implementation.
3987 			 */
3988 			impl = (gdcd->dcd_prd[exp][port].prd_ver_reg >> 32)
3989 			    & 0xffff;
3990 
3991 			switch (impl) {
3992 			case CHEETAH_IMPL:
3993 			case CHEETAH_PLUS_IMPL:
3994 			case JAGUAR_IMPL:
3995 				non_panther_cpus++;
3996 				break;
3997 			case PANTHER_IMPL:
3998 				break;
3999 			default:
4000 				ASSERT(0);
4001 				non_panther_cpus++;
4002 				break;
4003 			}
4004 		}
4005 	}
4006 
4007 	DRMACH_PR("drmach_board_non_panther_cpus: exp=%d, slot=%d, "
4008 	    "non_panther_cpus=%d", exp, slot, non_panther_cpus);
4009 
4010 	return (non_panther_cpus);
4011 }
4012 
4013 sbd_error_t *
4014 drmach_board_connect(drmachid_t id, drmach_opts_t *opts)
4015 {
4016 	_NOTE(ARGUNUSED(opts))
4017 
4018 	drmach_board_t		*bp = (drmach_board_t *)id;
4019 	sbd_error_t		*err;
4020 	dr_mbox_msg_t		*obufp;
4021 	gdcd_t			*gdcd = NULL;
4022 	uint_t			exp, slot;
4023 	sc_gptwocfg_cookie_t	scc;
4024 	int			panther_pages_enabled;
4025 
4026 	if (!DRMACH_IS_BOARD_ID(id))
4027 		return (drerr_new(0, ESTC_INAPPROP, NULL));
4028 
4029 	/*
4030 	 * Build the casm info portion of the CLAIM message.
4031 	 */
4032 	obufp = kmem_zalloc(sizeof (dr_mbox_msg_t), KM_SLEEP);
4033 	mutex_enter(&drmach_slice_table_lock);
4034 	drmach_msg_memslice_init(obufp->msgdata.dm_cr.mem_slice);
4035 	drmach_msg_memregs_init(obufp->msgdata.dm_cr.mem_regs);
4036 	mutex_exit(&drmach_slice_table_lock);
4037 	err = drmach_mbox_trans(DRMSG_CLAIM, bp->bnum, (caddr_t)obufp,
4038 	    sizeof (dr_mbox_msg_t), (caddr_t)NULL, 0);
4039 	kmem_free(obufp, sizeof (dr_mbox_msg_t));
4040 
4041 	if (err) {
4042 		/*
4043 		 * if mailbox timeout or unrecoverable error from SC,
4044 		 * board cannot be touched.  Mark the status as
4045 		 * unusable.
4046 		 */
4047 		if ((err->e_code == ESTC_SMS_ERR_UNRECOVERABLE) ||
4048 		    (err->e_code == ESTC_MBXRPLY))
4049 			bp->cond = SBD_COND_UNUSABLE;
4050 		return (err);
4051 	}
4052 
4053 	gdcd = drmach_gdcd_new();
4054 	if (gdcd == NULL) {
4055 		cmn_err(CE_WARN, "failed to read GDCD info for %s\n",
4056 		    bp->cm.name);
4057 		return (DRMACH_INTERNAL_ERROR());
4058 	}
4059 
4060 	/*
4061 	 * Read CPU SRAM DR buffer offset from GDCD.
4062 	 */
4063 	exp = DRMACH_BNUM2EXP(bp->bnum);
4064 	slot = DRMACH_BNUM2SLOT(bp->bnum);
4065 	bp->stardrb_offset =
4066 	    gdcd->dcd_slot[exp][slot].l1ss_cpu_drblock_xwd_offset << 3;
4067 	DRMACH_PR("%s: stardrb_offset=0x%lx\n", bp->cm.name,
4068 	    bp->stardrb_offset);
4069 
4070 	/*
4071 	 * Read board LPA setting from GDCD.
4072 	 */
4073 	bp->flags &= ~DRMACH_NULL_PROC_LPA;
4074 	if (gdcd->dcd_slot[exp][slot].l1ss_flags &
4075 	    L1SSFLG_THIS_L1_NULL_PROC_LPA) {
4076 		bp->flags |= DRMACH_NULL_PROC_LPA;
4077 		DRMACH_PR("%s: NULL proc LPA\n", bp->cm.name);
4078 	}
4079 
4080 	/*
4081 	 * XXX Until the Solaris large pages support heterogeneous cpu
4082 	 * domains, DR needs to prevent the addition of non-Panther cpus
4083 	 * to an all-Panther domain with large pages enabled.
4084 	 */
4085 	panther_pages_enabled = (page_num_pagesizes() > DEFAULT_MMU_PAGE_SIZES);
4086 	if (drmach_board_non_panther_cpus(gdcd, exp, slot) > 0 &&
4087 	    panther_pages_enabled && drmach_large_page_restriction) {
4088 		cmn_err(CE_WARN, "Domain shutdown is required to add a non-"
4089 		    "UltraSPARC-IV+ board into an all UltraSPARC-IV+ domain");
4090 		err = drerr_new(0, ESTC_SUPPORT, NULL);
4091 	}
4092 
4093 	if (err == NULL) {
4094 		/* do saf configurator stuff */
4095 		DRMACH_PR("calling sc_probe_board for bnum=%d\n", bp->bnum);
4096 		scc = sc_probe_board(bp->bnum);
4097 		if (scc == NULL)
4098 			err = drerr_new(0, ESTC_PROBE, bp->cm.name);
4099 	}
4100 
4101 	if (err) {
4102 		/* flush CDC srams */
4103 		if (axq_cdc_flush_all() != DDI_SUCCESS) {
4104 			goto out;
4105 		}
4106 
4107 		/*
4108 		 * Build the casm info portion of the UNCLAIM message.
4109 		 */
4110 		obufp = kmem_zalloc(sizeof (dr_mbox_msg_t), KM_SLEEP);
4111 		mutex_enter(&drmach_slice_table_lock);
4112 		drmach_msg_memslice_init(obufp->msgdata.dm_ur.mem_slice);
4113 		drmach_msg_memregs_init(obufp->msgdata.dm_ur.mem_regs);
4114 		mutex_exit(&drmach_slice_table_lock);
4115 		(void) drmach_mbox_trans(DRMSG_UNCLAIM, bp->bnum,
4116 		    (caddr_t)obufp, sizeof (dr_mbox_msg_t),
4117 		    (caddr_t)NULL, 0);
4118 
4119 		kmem_free(obufp, sizeof (dr_mbox_msg_t));
4120 
4121 		/*
4122 		 * we clear the connected flag just in case it would have
4123 		 * been set by a concurrent drmach_board_status() thread
4124 		 * before the UNCLAIM completed.
4125 		 */
4126 		bp->connected = 0;
4127 		goto out;
4128 	}
4129 
4130 	/*
4131 	 * Now that the board has been successfully attached, obtain
4132 	 * platform-specific DIMM serial id information for the board.
4133 	 */
4134 	if ((DRMACH_BNUM2SLOT(bp->bnum) == 0) &&
4135 	    plat_ecc_capability_sc_get(PLAT_ECC_DIMM_SID_MESSAGE)) {
4136 		(void) plat_request_mem_sids(DRMACH_BNUM2EXP(bp->bnum));
4137 	}
4138 
4139 out:
4140 	if (gdcd != NULL)
4141 		drmach_gdcd_dispose(gdcd);
4142 
4143 	return (err);
4144 }
4145 
4146 static void
4147 drmach_slice_table_update(drmach_board_t *bp, int invalidate)
4148 {
4149 	static char		*axq_name = "address-extender-queue";
4150 	static dev_info_t	*axq_dip = NULL;
4151 	static int		 axq_exp = -1;
4152 	static int		 axq_slot;
4153 	int			 e, s, slice;
4154 
4155 	ASSERT(MUTEX_HELD(&drmach_slice_table_lock));
4156 
4157 	e = DRMACH_BNUM2EXP(bp->bnum);
4158 	if (invalidate) {
4159 		ASSERT(DRMACH_BNUM2SLOT(bp->bnum) == 0);
4160 
4161 		/* invalidate cached casm value */
4162 		drmach_slice_table[e] = 0;
4163 
4164 		/* invalidate cached axq info if for same exp */
4165 		if (e == axq_exp && axq_dip) {
4166 			ndi_rele_devi(axq_dip);
4167 			axq_dip = NULL;
4168 		}
4169 	}
4170 
4171 	if (axq_dip == NULL || !i_ddi_devi_attached(axq_dip)) {
4172 		int i, portid;
4173 
4174 		/* search for an attached slot0 axq instance */
4175 		for (i = 0; i < AXQ_MAX_EXP * AXQ_MAX_SLOT_PER_EXP; i++) {
4176 			if (axq_dip)
4177 				ndi_rele_devi(axq_dip);
4178 			axq_dip = ddi_find_devinfo(axq_name, i, 0);
4179 			if (axq_dip && DDI_CF2(axq_dip)) {
4180 				portid = ddi_getprop(DDI_DEV_T_ANY, axq_dip,
4181 				    DDI_PROP_DONTPASS, "portid", -1);
4182 				if (portid == -1) {
4183 					DRMACH_PR("cant get portid of axq "
4184 					    "instance %d\n", i);
4185 					continue;
4186 				}
4187 
4188 				axq_exp = (portid >> 5) & 0x1f;
4189 				axq_slot = portid & 1;
4190 
4191 				if (invalidate && axq_exp == e)
4192 					continue;
4193 
4194 				if (axq_slot == 0)
4195 					break;	/* found */
4196 			}
4197 		}
4198 
4199 		if (i == AXQ_MAX_EXP * AXQ_MAX_SLOT_PER_EXP) {
4200 			if (axq_dip) {
4201 				ndi_rele_devi(axq_dip);
4202 				axq_dip = NULL;
4203 			}
4204 			DRMACH_PR("drmach_slice_table_update: failed to "
4205 			    "update axq dip\n");
4206 			return;
4207 		}
4208 
4209 	}
4210 
4211 	ASSERT(axq_dip);
4212 	ASSERT(axq_slot == 0);
4213 
4214 	if (invalidate)
4215 		return;
4216 
4217 	s = DRMACH_BNUM2SLOT(bp->bnum);
4218 	DRMACH_PR("using AXQ casm %d.%d for slot%d.%d\n", axq_exp, axq_slot,
4219 	    e, s);
4220 
4221 	/* invalidate entry */
4222 	drmach_slice_table[e] &= ~0x20;
4223 
4224 	/*
4225 	 * find a slice that routes to expander e. If no match
4226 	 * is found, drmach_slice_table[e] will remain invalid.
4227 	 *
4228 	 * The CASM is a routing table indexed by slice number.
4229 	 * Each element in the table contains permission bits,
4230 	 * a destination expander number and a valid bit. The
4231 	 * valid bit must true for the element to be meaningful.
4232 	 *
4233 	 * CASM entry structure
4234 	 *   Bits 15..6 ignored
4235 	 *   Bit  5	valid
4236 	 *   Bits 0..4	expander number
4237 	 *
4238 	 * NOTE: the for loop is really enumerating the range of slices,
4239 	 * which is ALWAYS equal to the range of expanders. Hence,
4240 	 * AXQ_MAX_EXP is okay to use in this loop.
4241 	 */
4242 	for (slice = 0; slice < AXQ_MAX_EXP; slice++) {
4243 		uint32_t casm = axq_casm_read(axq_exp, axq_slot, slice);
4244 
4245 		if ((casm & 0x20) && (casm & 0x1f) == e)
4246 			drmach_slice_table[e] = 0x20 | slice;
4247 	}
4248 }
4249 
4250 /*
4251  * Get base and bound PAs for slot 1 board lpa programming
4252  * If a cpu/mem board is present in the same expander, use slice
4253  * information corresponding to the CASM.  Otherwise, set base and
4254  * bound PAs to 0.
4255  */
4256 static void
4257 drmach_lpa_bb_get(drmach_board_t *s1bp, uint64_t *basep, uint64_t *boundp)
4258 {
4259 	drmachid_t s0id;
4260 
4261 	ASSERT(mutex_owned(&drmach_slice_table_lock));
4262 	ASSERT(DRMACH_BNUM2SLOT(s1bp->bnum) == 1);
4263 
4264 	*basep = *boundp = 0;
4265 	if (drmach_array_get(drmach_boards, s1bp->bnum - 1, &s0id) == 0 &&
4266 	    s0id != 0) {
4267 
4268 		uint32_t slice;
4269 		if ((slice = drmach_slice_table[DRMACH_BNUM2EXP(s1bp->bnum)])
4270 		    & 0x20) {
4271 			*basep = DRMACH_SLICE_TO_PA(slice & DRMACH_SLICE_MASK);
4272 			*boundp = *basep + DRMACH_MEM_SLICE_SIZE;
4273 		}
4274 	}
4275 }
4276 
4277 
4278 /*
4279  * Reprogram slot 1 lpa's as required.
4280  * The purpose of this routine is maintain the LPA settings of the devices
4281  * in slot 1. To date we know Schizo and Cheetah are the only devices that
4282  * require this attention. The LPA setting must match the slice field in the
4283  * CASM element for the local expander. This field is guaranteed to be
4284  * programmed in accordance with the cacheable address space on the slot 0
4285  * board of the local expander. If no memory is present on the slot 0 board,
4286  * there is no cacheable address space and, hence, the CASM slice field will
4287  * be zero or its valid bit will be false (or both).
4288  */
4289 
4290 static void
4291 drmach_slot1_lpa_set(drmach_board_t *bp)
4292 {
4293 	drmachid_t	id;
4294 	drmach_board_t	*s1bp = NULL;
4295 	int		rv, idx, is_maxcat = 1;
4296 	uint64_t	last_scsr_pa = 0;
4297 	uint64_t	new_basepa, new_boundpa;
4298 
4299 	if (DRMACH_BNUM2SLOT(bp->bnum)) {
4300 		s1bp = bp;
4301 		if (s1bp->devices == NULL) {
4302 			DRMACH_PR("drmach...lpa_set: slot1=%d not present",
4303 			    bp->bnum);
4304 			return;
4305 		}
4306 	} else {
4307 		rv = drmach_array_get(drmach_boards, bp->bnum + 1, &id);
4308 		/* nothing to do when board is not found or has no devices */
4309 		s1bp = id;
4310 		if (rv == -1 || s1bp == NULL || s1bp->devices == NULL) {
4311 			DRMACH_PR("drmach...lpa_set: slot1=%d not present",
4312 			    bp->bnum + 1);
4313 			return;
4314 		}
4315 		ASSERT(DRMACH_IS_BOARD_ID(id));
4316 	}
4317 	mutex_enter(&drmach_slice_table_lock);
4318 	drmach_lpa_bb_get(s1bp, &new_basepa, &new_boundpa);
4319 	DRMACH_PR("drmach_...lpa_set: bnum=%d base=0x%lx bound=0x%lx\n",
4320 	    s1bp->bnum, new_basepa, new_boundpa);
4321 
4322 	rv = drmach_array_first(s1bp->devices, &idx, &id);
4323 	while (rv == 0) {
4324 		if (DRMACH_IS_IO_ID(id)) {
4325 			drmach_io_t *io = id;
4326 
4327 			is_maxcat = 0;
4328 
4329 			/*
4330 			 * Skip all non-Schizo IO devices (only IO nodes
4331 			 * that are Schizo devices have non-zero scsr_pa).
4332 			 * Filter out "other" leaf to avoid writing to the
4333 			 * same Schizo Control/Status Register twice.
4334 			 */
4335 			if (io->scsr_pa && io->scsr_pa != last_scsr_pa) {
4336 				uint64_t scsr;
4337 
4338 				scsr  = lddphysio(io->scsr_pa);
4339 				DRMACH_PR("drmach...lpa_set: old scsr=0x%lx\n",
4340 				    scsr);
4341 				scsr &= ~(DRMACH_LPA_BASE_MASK |
4342 				    DRMACH_LPA_BND_MASK);
4343 				scsr |= DRMACH_PA_TO_LPA_BASE(new_basepa);
4344 				scsr |= DRMACH_PA_TO_LPA_BND(new_boundpa);
4345 
4346 				stdphysio(io->scsr_pa, scsr);
4347 				DRMACH_PR("drmach...lpa_set: new scsr=0x%lx\n",
4348 				    scsr);
4349 
4350 				last_scsr_pa = io->scsr_pa;
4351 			}
4352 		}
4353 		rv = drmach_array_next(s1bp->devices, &idx, &id);
4354 	}
4355 
4356 	if (is_maxcat && DRMACH_L1_SET_LPA(s1bp) && drmach_reprogram_lpa) {
4357 		extern xcfunc_t	drmach_set_lpa;
4358 
4359 		DRMACH_PR("reprogramming maxcat lpa's");
4360 
4361 		mutex_enter(&cpu_lock);
4362 		rv = drmach_array_first(s1bp->devices, &idx, &id);
4363 		while (rv == 0 && id != NULL) {
4364 			if (DRMACH_IS_CPU_ID(id)) {
4365 				int ntries;
4366 				processorid_t cpuid;
4367 
4368 				cpuid = ((drmach_cpu_t *)id)->cpuid;
4369 
4370 				/*
4371 				 * Check for unconfigured or powered-off
4372 				 * MCPUs.  If CPU_READY flag is clear, the
4373 				 * MCPU cannot be xcalled.
4374 				 */
4375 				if ((cpu[cpuid] == NULL) ||
4376 				    (cpu[cpuid]->cpu_flags &
4377 				    CPU_READY) == 0) {
4378 
4379 					rv = drmach_array_next(s1bp->devices,
4380 					    &idx, &id);
4381 					continue;
4382 				}
4383 
4384 				/*
4385 				 * XXX CHEETAH SUPPORT
4386 				 * for cheetah, we need to clear iocage
4387 				 * memory since it will be used for e$ flush
4388 				 * in drmach_set_lpa.
4389 				 */
4390 				if (drmach_is_cheetah) {
4391 					mutex_enter(&drmach_iocage_lock);
4392 					while (drmach_iocage_is_busy)
4393 						cv_wait(&drmach_iocage_cv,
4394 						    &drmach_iocage_lock);
4395 					drmach_iocage_is_busy = 1;
4396 					drmach_iocage_mem_scrub(ecache_size *
4397 					    2);
4398 					mutex_exit(&drmach_iocage_lock);
4399 				}
4400 
4401 				/*
4402 				 * drmach_slice_table[*]
4403 				 *	bit 5	valid
4404 				 *	bit 0:4	slice number
4405 				 *
4406 				 * drmach_xt_mb[*] format for drmach_set_lpa
4407 				 *	bit 7	valid
4408 				 *	bit 6	set null LPA
4409 				 *			(overrides bits 0:4)
4410 				 *	bit 0:4	slice number
4411 				 *
4412 				 * drmach_set_lpa derives processor CBASE and
4413 				 * CBND from bits 6 and 0:4 of drmach_xt_mb.
4414 				 * If bit 6 is set, then CBASE = CBND = 0.
4415 				 * Otherwise, CBASE = slice number;
4416 				 * CBND = slice number + 1.
4417 				 * No action is taken if bit 7 is zero.
4418 				 */
4419 
4420 				mutex_enter(&drmach_xt_mb_lock);
4421 				bzero((void *)drmach_xt_mb,
4422 				    drmach_xt_mb_size);
4423 
4424 				if (new_basepa == 0 && new_boundpa == 0)
4425 					drmach_xt_mb[cpuid] = 0x80 | 0x40;
4426 				else
4427 					drmach_xt_mb[cpuid] = 0x80 |
4428 					    DRMACH_PA_TO_SLICE(new_basepa);
4429 
4430 				drmach_xt_ready = 0;
4431 
4432 				xt_one(cpuid, drmach_set_lpa, NULL, NULL);
4433 
4434 				ntries = drmach_cpu_ntries;
4435 				while (!drmach_xt_ready && ntries) {
4436 					DELAY(drmach_cpu_delay);
4437 					ntries--;
4438 				}
4439 				mutex_exit(&drmach_xt_mb_lock);
4440 				drmach_xt_ready = 0;
4441 
4442 				/*
4443 				 * XXX CHEETAH SUPPORT
4444 				 * for cheetah, we need to clear iocage
4445 				 * memory since it was used for e$ flush
4446 				 * in performed drmach_set_lpa.
4447 				 */
4448 				if (drmach_is_cheetah) {
4449 					mutex_enter(&drmach_iocage_lock);
4450 					drmach_iocage_mem_scrub(ecache_size *
4451 					    2);
4452 					drmach_iocage_is_busy = 0;
4453 					cv_signal(&drmach_iocage_cv);
4454 					mutex_exit(&drmach_iocage_lock);
4455 				}
4456 			}
4457 			rv = drmach_array_next(s1bp->devices, &idx, &id);
4458 		}
4459 		mutex_exit(&cpu_lock);
4460 	}
4461 	mutex_exit(&drmach_slice_table_lock);
4462 }
4463 
4464 /*
4465  * Return the number of connected Panther boards in the domain.
4466  */
4467 static int
4468 drmach_panther_boards(void)
4469 {
4470 	int		rv;
4471 	int		b_idx;
4472 	drmachid_t	b_id;
4473 	drmach_board_t	*bp;
4474 	int		npanther = 0;
4475 
4476 	rv = drmach_array_first(drmach_boards, &b_idx, &b_id);
4477 	while (rv == 0) {
4478 		ASSERT(DRMACH_IS_BOARD_ID(b_id));
4479 		bp = b_id;
4480 
4481 		if (IS_PANTHER(bp->cpu_impl))
4482 			npanther++;
4483 
4484 		rv = drmach_array_next(drmach_boards, &b_idx, &b_id);
4485 	}
4486 
4487 	return (npanther);
4488 }
4489 
4490 /*ARGSUSED*/
4491 sbd_error_t *
4492 drmach_board_disconnect(drmachid_t id, drmach_opts_t *opts)
4493 {
4494 	drmach_board_t	*bp;
4495 	dr_mbox_msg_t	*obufp;
4496 	sbd_error_t	*err = NULL;
4497 
4498 	sc_gptwocfg_cookie_t	scc;
4499 
4500 	if (!DRMACH_IS_BOARD_ID(id))
4501 		return (drerr_new(0, ESTC_INAPPROP, NULL));
4502 	bp = id;
4503 
4504 	/*
4505 	 * Build the casm info portion of the UNCLAIM message.
4506 	 * This must be done prior to calling for saf configurator
4507 	 * deprobe, to ensure that the associated axq instance
4508 	 * is not detached.
4509 	 */
4510 	obufp = kmem_zalloc(sizeof (dr_mbox_msg_t), KM_SLEEP);
4511 	mutex_enter(&drmach_slice_table_lock);
4512 	drmach_msg_memslice_init(obufp->msgdata.dm_ur.mem_slice);
4513 
4514 	/*
4515 	 * If disconnecting slot 0 board, update the casm slice table
4516 	 * info now, for use by drmach_slot1_lpa_set()
4517 	 */
4518 	if (DRMACH_BNUM2SLOT(bp->bnum) == 0)
4519 		drmach_slice_table_update(bp, 1);
4520 
4521 	drmach_msg_memregs_init(obufp->msgdata.dm_ur.mem_regs);
4522 	mutex_exit(&drmach_slice_table_lock);
4523 
4524 	/*
4525 	 * Update LPA information for slot1 board
4526 	 */
4527 	drmach_slot1_lpa_set(bp);
4528 
4529 	/* disable and flush CDC */
4530 	if (axq_cdc_disable_flush_all() != DDI_SUCCESS) {
4531 		axq_cdc_enable_all();	/* paranoia */
4532 		err = DRMACH_INTERNAL_ERROR();
4533 	}
4534 
4535 	/*
4536 	 * call saf configurator for deprobe
4537 	 * It's done now before sending an UNCLAIM message because
4538 	 * IKP will probe boards it doesn't know about <present at boot>
4539 	 * prior to unprobing them.  If this happens after sending the
4540 	 * UNCLAIM, it will cause a dstop for domain transgression error.
4541 	 */
4542 
4543 	if (!err) {
4544 		scc = sc_unprobe_board(bp->bnum);
4545 		axq_cdc_enable_all();
4546 		if (scc != NULL) {
4547 			err = drerr_new(0, ESTC_DEPROBE, bp->cm.name);
4548 		}
4549 	}
4550 
4551 	/*
4552 	 * If disconnecting a board from a Panther domain, wait a fixed-
4553 	 * time delay for pending Safari transactions to complete on the
4554 	 * disconnecting board's processors.  The bus sync list read used
4555 	 * in drmach_shutdown_asm to synchronize with outstanding Safari
4556 	 * transactions assumes no read-bypass-write mode for all memory
4557 	 * controllers.  Since Panther supports read-bypass-write, a
4558 	 * delay is used that is slightly larger than the maximum Safari
4559 	 * timeout value in the Safari/Fireplane Config Reg.
4560 	 */
4561 	if (drmach_panther_boards() > 0 || drmach_unclaim_delay_all) {
4562 		clock_t	stime = ddi_get_lbolt();
4563 
4564 		delay(drv_usectohz(drmach_unclaim_usec_delay));
4565 
4566 		stime = ddi_get_lbolt() - stime;
4567 		DRMACH_PR("delayed %ld ticks (%ld secs) before disconnecting "
4568 		    "board %s from domain\n", stime, stime / hz, bp->cm.name);
4569 	}
4570 
4571 	if (!err) {
4572 		obufp->msgdata.dm_ur.mem_clear = 0;
4573 
4574 		err = drmach_mbox_trans(DRMSG_UNCLAIM, bp->bnum, (caddr_t)obufp,
4575 		    sizeof (dr_mbox_msg_t), (caddr_t)NULL, 0);
4576 
4577 		if (err) {
4578 			/*
4579 			 * if mailbox timeout or unrecoverable error from SC,
4580 			 * board cannot be touched.  Mark the status as
4581 			 * unusable.
4582 			 */
4583 			if ((err->e_code == ESTC_SMS_ERR_UNRECOVERABLE) ||
4584 			    (err->e_code == ESTC_MBXRPLY))
4585 				bp->cond = SBD_COND_UNUSABLE;
4586 			else {
4587 				DRMACH_PR("UNCLAIM failed for bnum=%d\n",
4588 				    bp->bnum);
4589 				DRMACH_PR("calling sc_probe_board: bnum=%d\n",
4590 				    bp->bnum);
4591 				scc = sc_probe_board(bp->bnum);
4592 				if (scc == NULL) {
4593 					cmn_err(CE_WARN,
4594 					"sc_probe_board failed for bnum=%d",
4595 					    bp->bnum);
4596 				} else {
4597 					if (DRMACH_BNUM2SLOT(bp->bnum) == 0) {
4598 						mutex_enter(
4599 						    &drmach_slice_table_lock);
4600 						drmach_slice_table_update(bp,
4601 						    0);
4602 						mutex_exit(
4603 						    &drmach_slice_table_lock);
4604 					}
4605 					drmach_slot1_lpa_set(bp);
4606 				}
4607 			}
4608 		} else {
4609 			bp->connected = 0;
4610 			/*
4611 			 * Now that the board has been successfully detached,
4612 			 * discard platform-specific DIMM serial id information
4613 			 * for the board.
4614 			 */
4615 			if ((DRMACH_BNUM2SLOT(bp->bnum) == 0) &&
4616 			    plat_ecc_capability_sc_get(
4617 			    PLAT_ECC_DIMM_SID_MESSAGE)) {
4618 				(void) plat_discard_mem_sids(
4619 				    DRMACH_BNUM2EXP(bp->bnum));
4620 			}
4621 		}
4622 	}
4623 	kmem_free(obufp, sizeof (dr_mbox_msg_t));
4624 
4625 	return (err);
4626 }
4627 
4628 static int
4629 drmach_get_portid(drmach_node_t *np)
4630 {
4631 	drmach_node_t	pp;
4632 	int		portid;
4633 	char		type[OBP_MAXPROPNAME];
4634 
4635 	if (np->n_getprop(np, "portid", &portid, sizeof (portid)) == 0)
4636 		return (portid);
4637 
4638 	/*
4639 	 * Get the device_type property to see if we should
4640 	 * continue processing this node.
4641 	 */
4642 	if (np->n_getprop(np, "device_type", &type, sizeof (type)) != 0)
4643 		return (-1);
4644 
4645 	/*
4646 	 * If the device is a CPU without a 'portid' property,
4647 	 * it is a CMP core. For such cases, the parent node
4648 	 * has the portid.
4649 	 */
4650 	if (strcmp(type, DRMACH_CPU_NAMEPROP) == 0) {
4651 		if (np->get_parent(np, &pp) != 0)
4652 			return (-1);
4653 
4654 		if (pp.n_getprop(&pp, "portid", &portid, sizeof (portid)) == 0)
4655 			return (portid);
4656 	}
4657 
4658 	return (-1);
4659 }
4660 
4661 /*
4662  * This is a helper function to determine if a given
4663  * node should be considered for a dr operation according
4664  * to predefined dr type nodes and the node's name.
4665  * Formal Parameter : The name of a device node.
4666  * Return Value: -1, name does not map to a valid dr type.
4667  *		 A value greater or equal to 0, name is a valid dr type.
4668  */
4669 static int
4670 drmach_name2type_idx(char *name)
4671 {
4672 	int 	index, ntypes;
4673 
4674 	if (name == NULL)
4675 		return (-1);
4676 
4677 	/*
4678 	 * Determine how many possible types are currently supported
4679 	 * for dr.
4680 	 */
4681 	ntypes = sizeof (drmach_name2type) / sizeof (drmach_name2type[0]);
4682 
4683 	/* Determine if the node's name correspond to a predefined type. */
4684 	for (index = 0; index < ntypes; index++) {
4685 		if (strcmp(drmach_name2type[index].name, name) == 0)
4686 			/* The node is an allowed type for dr. */
4687 			return (index);
4688 	}
4689 
4690 	/*
4691 	 * If the name of the node does not map to any of the
4692 	 * types in the array drmach_name2type then the node is not of
4693 	 * interest to dr.
4694 	 */
4695 	return (-1);
4696 }
4697 
4698 static int
4699 drmach_board_find_devices_cb(drmach_node_walk_args_t *args)
4700 {
4701 	drmach_node_t			*node = args->node;
4702 	drmach_board_cb_data_t		*data = args->data;
4703 	drmach_board_t			*obj = data->obj;
4704 
4705 	int		rv, portid;
4706 	drmachid_t	id;
4707 	drmach_device_t	*device;
4708 	char	name[OBP_MAXDRVNAME];
4709 
4710 	portid = drmach_get_portid(node);
4711 	if (portid == -1) {
4712 		/*
4713 		 * if the node does not have a portid property, then
4714 		 * by that information alone it is known that drmach
4715 		 * is not interested in it.
4716 		 */
4717 		return (0);
4718 	}
4719 	rv = node->n_getprop(node, "name", name, OBP_MAXDRVNAME);
4720 
4721 	/* The node must have a name */
4722 	if (rv)
4723 		return (0);
4724 
4725 	/*
4726 	 * Ignore devices whose portid do not map to this board,
4727 	 * or that their name property is not mapped to a valid
4728 	 * dr device name.
4729 	 */
4730 	if ((drmach_portid2bnum(portid) != obj->bnum) ||
4731 	    (drmach_name2type_idx(name) < 0))
4732 		return (0);
4733 
4734 	/*
4735 	 * Create a device data structure from this node data.
4736 	 * The call may yield nothing if the node is not of interest
4737 	 * to drmach.
4738 	 */
4739 	data->err = drmach_device_new(node, obj, portid, &id);
4740 	if (data->err)
4741 		return (-1);
4742 	else if (!id) {
4743 		/*
4744 		 * drmach_device_new examined the node we passed in
4745 		 * and determined that it was either one not of
4746 		 * interest to drmach or the PIM dr layer.
4747 		 * So, it is skipped.
4748 		 */
4749 		return (0);
4750 	}
4751 
4752 	rv = drmach_array_set(obj->devices, data->ndevs++, id);
4753 	if (rv) {
4754 		data->err = DRMACH_INTERNAL_ERROR();
4755 		return (-1);
4756 	}
4757 
4758 	device = id;
4759 
4760 #ifdef DEBUG
4761 	DRMACH_PR("%d %s %d %p\n", portid, device->type, device->unum, id);
4762 	if (DRMACH_IS_IO_ID(id))
4763 		DRMACH_PR("ndevs = %d dip/node = %p", data->ndevs, node->here);
4764 #endif
4765 
4766 	data->err = (*data->found)(data->a, device->type, device->unum, id);
4767 	return (data->err == NULL ? 0 : -1);
4768 }
4769 
4770 sbd_error_t *
4771 drmach_board_find_devices(drmachid_t id, void *a,
4772 	sbd_error_t *(*found)(void *a, const char *, int, drmachid_t))
4773 {
4774 	drmach_board_t		*bp = (drmach_board_t *)id;
4775 	sbd_error_t		*err;
4776 	int			 max_devices;
4777 	int			 rv;
4778 	drmach_board_cb_data_t	data;
4779 
4780 	if (!DRMACH_IS_BOARD_ID(id))
4781 		return (drerr_new(0, ESTC_INAPPROP, NULL));
4782 
4783 	max_devices  = plat_max_cpu_units_per_board();
4784 	max_devices += plat_max_mem_units_per_board();
4785 	max_devices += plat_max_io_units_per_board();
4786 
4787 	bp->devices = drmach_array_new(0, max_devices);
4788 
4789 	if (bp->tree == NULL)
4790 		bp->tree = drmach_node_new();
4791 
4792 	data.obj = bp;
4793 	data.ndevs = 0;
4794 	data.found = found;
4795 	data.a = a;
4796 	data.err = NULL;
4797 
4798 	mutex_enter(&drmach_slice_table_lock);
4799 	mutex_enter(&drmach_bus_sync_lock);
4800 
4801 	rv = drmach_node_walk(bp->tree, &data, drmach_board_find_devices_cb);
4802 
4803 	drmach_slice_table_update(bp, 0);
4804 	drmach_bus_sync_list_update();
4805 
4806 	mutex_exit(&drmach_bus_sync_lock);
4807 	mutex_exit(&drmach_slice_table_lock);
4808 
4809 	if (rv == 0) {
4810 		err = NULL;
4811 		drmach_slot1_lpa_set(bp);
4812 	} else {
4813 		drmach_array_dispose(bp->devices, drmach_device_dispose);
4814 		bp->devices = NULL;
4815 
4816 		if (data.err)
4817 			err = data.err;
4818 		else
4819 			err = DRMACH_INTERNAL_ERROR();
4820 	}
4821 
4822 	return (err);
4823 }
4824 
4825 int
4826 drmach_board_lookup(int bnum, drmachid_t *id)
4827 {
4828 	int	rv = 0;
4829 
4830 	if (!drmach_initialized && drmach_init() == -1) {
4831 		*id = 0;
4832 		return (-1);
4833 	}
4834 	rw_enter(&drmach_boards_rwlock, RW_WRITER);
4835 	if (drmach_array_get(drmach_boards, bnum, id)) {
4836 		*id = 0;
4837 		rv = -1;
4838 	} else {
4839 		caddr_t		obufp;
4840 		dr_showboard_t	shb;
4841 		sbd_error_t	*err = NULL;
4842 		drmach_board_t	*bp;
4843 
4844 		bp = *id;
4845 
4846 		if (bp)
4847 			rw_downgrade(&drmach_boards_rwlock);
4848 
4849 		obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
4850 		err = drmach_mbox_trans(DRMSG_SHOWBOARD, bnum, obufp,
4851 		    sizeof (dr_proto_hdr_t), (caddr_t)&shb,
4852 		    sizeof (dr_showboard_t));
4853 		kmem_free(obufp, sizeof (dr_proto_hdr_t));
4854 
4855 		if (err) {
4856 			if (err->e_code == ESTC_UNAVAILABLE) {
4857 				*id = 0;
4858 				rv = -1;
4859 			}
4860 			sbd_err_clear(&err);
4861 		} else {
4862 			if (!bp)
4863 				bp = *id  = (drmachid_t)drmach_board_new(bnum);
4864 			bp->connected = (shb.bd_assigned && shb.bd_active);
4865 			bp->empty = shb.slot_empty;
4866 
4867 			switch (shb.test_status) {
4868 				case DR_TEST_STATUS_UNKNOWN:
4869 				case DR_TEST_STATUS_IPOST:
4870 				case DR_TEST_STATUS_ABORTED:
4871 					bp->cond = SBD_COND_UNKNOWN;
4872 					break;
4873 				case DR_TEST_STATUS_PASSED:
4874 					bp->cond = SBD_COND_OK;
4875 					break;
4876 				case DR_TEST_STATUS_FAILED:
4877 					bp->cond = SBD_COND_FAILED;
4878 					break;
4879 				default:
4880 					bp->cond = SBD_COND_UNKNOWN;
4881 				DRMACH_PR("Unknown test status=0x%x from SC\n",
4882 				    shb.test_status);
4883 					break;
4884 			}
4885 			(void) strncpy(bp->type, shb.board_type,
4886 			    sizeof (bp->type));
4887 			bp->assigned = shb.bd_assigned;
4888 			bp->powered = shb.power_on;
4889 		}
4890 	}
4891 	rw_exit(&drmach_boards_rwlock);
4892 	return (rv);
4893 }
4894 
4895 sbd_error_t *
4896 drmach_board_name(int bnum, char *buf, int buflen)
4897 {
4898 	(void) snprintf(buf, buflen, "%s%d", DRMACH_BNUM2SLOT(bnum) ?
4899 	    "IO" : "SB", DRMACH_BNUM2EXP(bnum));
4900 
4901 	return (NULL);
4902 }
4903 
4904 sbd_error_t *
4905 drmach_board_poweroff(drmachid_t id)
4906 {
4907 	drmach_board_t	*bp;
4908 	sbd_error_t	*err;
4909 	drmach_status_t	 stat;
4910 
4911 	if (!DRMACH_IS_BOARD_ID(id))
4912 		return (drerr_new(0, ESTC_INAPPROP, NULL));
4913 	bp = id;
4914 
4915 	err = drmach_board_status(id, &stat);
4916 	if (!err) {
4917 		if (stat.configured || stat.busy)
4918 			err = drerr_new(0, ESTC_CONFIGBUSY, bp->cm.name);
4919 		else {
4920 			caddr_t	obufp;
4921 
4922 			obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
4923 			err = drmach_mbox_trans(DRMSG_POWEROFF, bp->bnum, obufp,
4924 			    sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
4925 			kmem_free(obufp, sizeof (dr_proto_hdr_t));
4926 			if (!err)
4927 				bp->powered = 0;
4928 		}
4929 	}
4930 	return (err);
4931 }
4932 
4933 sbd_error_t *
4934 drmach_board_poweron(drmachid_t id)
4935 {
4936 	drmach_board_t	*bp;
4937 	caddr_t		obufp;
4938 	sbd_error_t	*err;
4939 
4940 	if (!DRMACH_IS_BOARD_ID(id))
4941 		return (drerr_new(0, ESTC_INAPPROP, NULL));
4942 	bp = id;
4943 
4944 	obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
4945 	err = drmach_mbox_trans(DRMSG_POWERON, bp->bnum, obufp,
4946 	    sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
4947 	if (!err)
4948 		bp->powered = 1;
4949 
4950 	kmem_free(obufp, sizeof (dr_proto_hdr_t));
4951 
4952 	return (err);
4953 }
4954 
4955 static sbd_error_t *
4956 drmach_board_release(drmachid_t id)
4957 {
4958 	if (!DRMACH_IS_BOARD_ID(id))
4959 		return (drerr_new(0, ESTC_INAPPROP, NULL));
4960 	return (NULL);
4961 }
4962 
4963 sbd_error_t *
4964 drmach_board_test(drmachid_t id, drmach_opts_t *opts, int force)
4965 {
4966 	drmach_board_t		*bp;
4967 	drmach_device_t		*dp[MAX_CORES_PER_CMP];
4968 	dr_mbox_msg_t		*obufp;
4969 	sbd_error_t		*err;
4970 	dr_testboard_reply_t	tbr;
4971 	int			cpylen;
4972 	char			*copts;
4973 	int			is_io;
4974 	cpu_flag_t		oflags[MAX_CORES_PER_CMP];
4975 
4976 	if (!DRMACH_IS_BOARD_ID(id))
4977 		return (drerr_new(0, ESTC_INAPPROP, NULL));
4978 	bp = id;
4979 
4980 	/*
4981 	 * If the board is an I/O or MAXCAT board, setup I/O cage for
4982 	 * testing. Slot 1 indicates I/O or MAXCAT board.
4983 	 */
4984 
4985 	is_io = DRMACH_BNUM2SLOT(bp->bnum);
4986 
4987 	obufp = kmem_zalloc(sizeof (dr_mbox_msg_t), KM_SLEEP);
4988 
4989 	if (force)
4990 		obufp->msgdata.dm_tb.force = 1;
4991 
4992 	obufp->msgdata.dm_tb.immediate = 1;
4993 
4994 	if ((opts->size > 0) && ((copts = opts->copts) != NULL)) {
4995 		cpylen = (opts->size > DR_HPOPTLEN ? DR_HPOPTLEN : opts->size);
4996 		bcopy(copts, obufp->msgdata.dm_tb.hpost_opts, cpylen);
4997 	}
4998 
4999 	if (is_io) {
5000 		err = drmach_iocage_setup(&obufp->msgdata.dm_tb, dp, oflags);
5001 
5002 		if (err) {
5003 			kmem_free(obufp, sizeof (dr_mbox_msg_t));
5004 			return (err);
5005 		}
5006 	}
5007 
5008 	err = drmach_mbox_trans(DRMSG_TESTBOARD, bp->bnum, (caddr_t)obufp,
5009 	    sizeof (dr_mbox_msg_t), (caddr_t)&tbr, sizeof (tbr));
5010 
5011 	if (!err)
5012 		bp->cond = SBD_COND_OK;
5013 	else
5014 		bp->cond = SBD_COND_UNKNOWN;
5015 
5016 	if ((!err) && (tbr.test_status != DR_TEST_STATUS_PASSED)) {
5017 		/* examine test status */
5018 		switch (tbr.test_status) {
5019 			case DR_TEST_STATUS_IPOST:
5020 				bp->cond = SBD_COND_UNKNOWN;
5021 				err = drerr_new(0, ESTC_TEST_IN_PROGRESS, NULL);
5022 				break;
5023 			case DR_TEST_STATUS_UNKNOWN:
5024 				bp->cond = SBD_COND_UNKNOWN;
5025 				err = drerr_new(1,
5026 				    ESTC_TEST_STATUS_UNKNOWN, NULL);
5027 				break;
5028 			case DR_TEST_STATUS_FAILED:
5029 				bp->cond = SBD_COND_FAILED;
5030 				err = drerr_new(1, ESTC_TEST_FAILED, NULL);
5031 				break;
5032 			case DR_TEST_STATUS_ABORTED:
5033 				bp->cond = SBD_COND_UNKNOWN;
5034 				err = drerr_new(1, ESTC_TEST_ABORTED, NULL);
5035 				break;
5036 			default:
5037 				bp->cond = SBD_COND_UNKNOWN;
5038 				err = drerr_new(1, ESTC_TEST_RESULT_UNKNOWN,
5039 				    NULL);
5040 				break;
5041 		}
5042 	}
5043 
5044 	/*
5045 	 * If I/O cage test was performed, check for availability of the
5046 	 * cpu used.  If cpu has been returned, it's OK to proceed with
5047 	 * reconfiguring it for use.
5048 	 */
5049 	if (is_io) {
5050 		DRMACH_PR("drmach_board_test: tbr.cpu_recovered: %d",
5051 		    tbr.cpu_recovered);
5052 		DRMACH_PR("drmach_board_test: port id: %d",
5053 		    tbr.cpu_portid);
5054 
5055 		/*
5056 		 * Check the cpu_recovered flag in the testboard reply, or
5057 		 * if the testboard request message was not sent to SMS due
5058 		 * to an mboxsc_putmsg() failure, it's OK to recover the
5059 		 * cpu since hpost hasn't touched it.
5060 		 */
5061 		if ((tbr.cpu_recovered && tbr.cpu_portid ==
5062 		    obufp->msgdata.dm_tb.cpu_portid) ||
5063 		    ((err) && (err->e_code == ESTC_MBXRQST))) {
5064 
5065 			int i;
5066 
5067 			mutex_enter(&cpu_lock);
5068 			for (i = 0; i < MAX_CORES_PER_CMP; i++) {
5069 				if (dp[i] != NULL) {
5070 					(void) drmach_iocage_cpu_return(dp[i],
5071 					    oflags[i]);
5072 				}
5073 			}
5074 			mutex_exit(&cpu_lock);
5075 		} else {
5076 			cmn_err(CE_WARN, "Unable to recover port id %d "
5077 			    "after I/O cage test: cpu_recovered=%d, "
5078 			    "returned portid=%d",
5079 			    obufp->msgdata.dm_tb.cpu_portid,
5080 			    tbr.cpu_recovered, tbr.cpu_portid);
5081 		}
5082 		(void) drmach_iocage_mem_return(&tbr);
5083 	}
5084 	kmem_free(obufp, sizeof (dr_mbox_msg_t));
5085 
5086 	return (err);
5087 }
5088 
5089 sbd_error_t *
5090 drmach_board_unassign(drmachid_t id)
5091 {
5092 	drmach_board_t	*bp;
5093 	sbd_error_t	*err;
5094 	drmach_status_t	 stat;
5095 	caddr_t		obufp;
5096 
5097 	rw_enter(&drmach_boards_rwlock, RW_WRITER);
5098 
5099 	if (!DRMACH_IS_BOARD_ID(id)) {
5100 		rw_exit(&drmach_boards_rwlock);
5101 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5102 	}
5103 	bp = id;
5104 
5105 	err = drmach_board_status(id, &stat);
5106 	if (err) {
5107 		rw_exit(&drmach_boards_rwlock);
5108 		return (err);
5109 	}
5110 
5111 	if (stat.configured || stat.busy) {
5112 		err = drerr_new(0, ESTC_CONFIGBUSY, bp->cm.name);
5113 	} else {
5114 
5115 		obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
5116 		err = drmach_mbox_trans(DRMSG_UNASSIGN, bp->bnum, obufp,
5117 		    sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
5118 		kmem_free(obufp, sizeof (dr_proto_hdr_t));
5119 		if (!err) {
5120 			if (drmach_array_set(drmach_boards, bp->bnum, 0) != 0)
5121 				err = DRMACH_INTERNAL_ERROR();
5122 			else
5123 				drmach_board_dispose(bp);
5124 		}
5125 	}
5126 	rw_exit(&drmach_boards_rwlock);
5127 	return (err);
5128 }
5129 
5130 static sbd_error_t *
5131 drmach_read_reg_addr(drmach_device_t *dp, uint64_t *p)
5132 {
5133 	int		len;
5134 	drmach_reg_t	reg;
5135 	drmach_node_t	pp;
5136 	drmach_node_t	*np = dp->node;
5137 
5138 	/*
5139 	 * If the node does not have a portid property,
5140 	 * it represents a CMP device. For a CMP, the reg
5141 	 * property of the parent holds the information of
5142 	 * interest.
5143 	 */
5144 	if (dp->node->n_getproplen(dp->node, "portid", &len) != 0) {
5145 
5146 		if (dp->node->get_parent(dp->node, &pp) != 0) {
5147 			return (DRMACH_INTERNAL_ERROR());
5148 		}
5149 		np = &pp;
5150 	}
5151 
5152 	if (np->n_getproplen(np, "reg", &len) != 0)
5153 		return (DRMACH_INTERNAL_ERROR());
5154 
5155 	if (len != sizeof (reg))
5156 		return (DRMACH_INTERNAL_ERROR());
5157 
5158 	if (np->n_getprop(np, "reg", &reg, sizeof (reg)) != 0)
5159 		return (DRMACH_INTERNAL_ERROR());
5160 
5161 	/* reassemble 64-bit base address */
5162 	*p = ((uint64_t)reg.reg_addr_hi << 32) | reg.reg_addr_lo;
5163 
5164 	return (NULL);
5165 }
5166 
5167 static void
5168 drmach_cpu_read(uint64_t arg1, uint64_t arg2)
5169 {
5170 	uint64_t	*saf_config_reg = (uint64_t *)arg1;
5171 	uint_t		*reg_read = (uint_t *)arg2;
5172 
5173 	*saf_config_reg = lddsafconfig();
5174 	*reg_read = 0x1;
5175 }
5176 
5177 /*
5178  * A return value of 1 indicates success and 0 indicates a failure
5179  */
5180 static int
5181 drmach_cpu_read_scr(drmach_cpu_t *cp, uint64_t *scr)
5182 {
5183 
5184 	int 	rv = 0x0;
5185 
5186 	*scr = 0x0;
5187 
5188 	/*
5189 	 * Confirm cpu was in ready set when xc was issued.
5190 	 * This is done by verifying rv which is
5191 	 * set to 0x1 when xc_one is successful.
5192 	 */
5193 	xc_one(cp->dev.portid, (xcfunc_t *)drmach_cpu_read,
5194 	    (uint64_t)scr, (uint64_t)&rv);
5195 
5196 	return (rv);
5197 
5198 }
5199 
5200 static sbd_error_t *
5201 drmach_cpu_read_cpuid(drmach_cpu_t *cp, processorid_t *cpuid)
5202 {
5203 	drmach_node_t	*np;
5204 
5205 	np = cp->dev.node;
5206 
5207 	/*
5208 	 * If a CPU does not have a portid property, it must
5209 	 * be a CMP device with a cpuid property.
5210 	 */
5211 	if (np->n_getprop(np, "portid", cpuid, sizeof (*cpuid)) != 0) {
5212 
5213 		if (np->n_getprop(np, "cpuid", cpuid, sizeof (*cpuid)) != 0) {
5214 			return (DRMACH_INTERNAL_ERROR());
5215 		}
5216 	}
5217 
5218 	return (NULL);
5219 }
5220 
5221 /* Starcat CMP core id is bit 2 of the cpuid */
5222 #define	DRMACH_COREID_MASK	(1u << 2)
5223 #define	DRMACH_CPUID2SRAM_IDX(id) \
5224 		((id & DRMACH_COREID_MASK) >> 1 | (id & 0x1))
5225 
5226 static sbd_error_t *
5227 drmach_cpu_new(drmach_device_t *proto, drmachid_t *idp)
5228 {
5229 	static void drmach_cpu_dispose(drmachid_t);
5230 	static sbd_error_t *drmach_cpu_release(drmachid_t);
5231 	static sbd_error_t *drmach_cpu_status(drmachid_t, drmach_status_t *);
5232 
5233 	sbd_error_t	*err;
5234 	uint64_t	scr_pa;
5235 	drmach_cpu_t	*cp = NULL;
5236 	pfn_t		pfn;
5237 	uint64_t	cpu_stardrb_offset, cpu_sram_pa;
5238 	int		idx;
5239 	int		impl;
5240 	processorid_t	cpuid;
5241 
5242 	err = drmach_read_reg_addr(proto, &scr_pa);
5243 	if (err) {
5244 		goto fail;
5245 	}
5246 
5247 	cp = kmem_zalloc(sizeof (drmach_cpu_t), KM_SLEEP);
5248 	bcopy(proto, &cp->dev, sizeof (cp->dev));
5249 	cp->dev.node = drmach_node_dup(proto->node);
5250 	cp->dev.cm.isa = (void *)drmach_cpu_new;
5251 	cp->dev.cm.dispose = drmach_cpu_dispose;
5252 	cp->dev.cm.release = drmach_cpu_release;
5253 	cp->dev.cm.status = drmach_cpu_status;
5254 	cp->scr_pa = scr_pa;
5255 
5256 	err = drmach_cpu_read_cpuid(cp, &cpuid);
5257 	if (err) {
5258 		goto fail;
5259 	}
5260 
5261 	err = drmach_cpu_get_impl(cp, &impl);
5262 	if (err) {
5263 		goto fail;
5264 	}
5265 
5266 	cp->cpuid = cpuid;
5267 	cp->coreid = STARCAT_CPUID_TO_COREID(cp->cpuid);
5268 	cp->dev.unum = STARCAT_CPUID_TO_AGENT(cp->cpuid);
5269 
5270 	/*
5271 	 * Init the board cpu type.  Assumes all board cpus are the same type.
5272 	 */
5273 	if (cp->dev.bp->cpu_impl == 0) {
5274 		cp->dev.bp->cpu_impl = impl;
5275 	}
5276 	ASSERT(cp->dev.bp->cpu_impl == impl);
5277 
5278 	/*
5279 	 * XXX CHEETAH SUPPORT
5280 	 * determine if the domain uses Cheetah procs
5281 	 */
5282 	if (drmach_is_cheetah < 0) {
5283 		drmach_is_cheetah = IS_CHEETAH(impl);
5284 	}
5285 
5286 	/*
5287 	 * Initialize TTE for mapping CPU SRAM STARDRB buffer.
5288 	 * The STARDRB buffer (16KB on Cheetah+ boards, 32KB on
5289 	 * Jaguar/Panther boards) is shared by all cpus in a Safari port
5290 	 * pair. Each cpu uses 8KB according to the following layout:
5291 	 *
5292 	 * Page 0:	even numbered Cheetah+'s and Panther/Jaguar core 0's
5293 	 * Page 1:	odd numbered Cheetah+'s and Panther/Jaguar core 0's
5294 	 * Page 2:	even numbered Panther/Jaguar core 1's
5295 	 * Page 3:	odd numbered Panther/Jaguar core 1's
5296 	 */
5297 	idx = DRMACH_CPUID2SRAM_IDX(cp->cpuid);
5298 	cpu_stardrb_offset = cp->dev.bp->stardrb_offset + (PAGESIZE * idx);
5299 	cpu_sram_pa = DRMACH_CPU_SRAM_ADDR + cpu_stardrb_offset;
5300 	pfn = cpu_sram_pa >> PAGESHIFT;
5301 
5302 	ASSERT(drmach_cpu_sram_tte[cp->cpuid].tte_inthi == 0 &&
5303 	    drmach_cpu_sram_tte[cp->cpuid].tte_intlo == 0);
5304 	drmach_cpu_sram_tte[cp->cpuid].tte_inthi = TTE_PFN_INTHI(pfn) |
5305 	    TTE_VALID_INT | TTE_SZ_INT(TTE8K);
5306 	drmach_cpu_sram_tte[cp->cpuid].tte_intlo = TTE_PFN_INTLO(pfn) |
5307 	    TTE_HWWR_INT | TTE_PRIV_INT | TTE_LCK_INT;
5308 
5309 	DRMACH_PR("drmach_cpu_new: cpuid=%d, coreid=%d, stardrb_offset=0x%lx, "
5310 	    "cpu_sram_offset=0x%lx, idx=%d\n", cp->cpuid, cp->coreid,
5311 	    cp->dev.bp->stardrb_offset, cpu_stardrb_offset, idx);
5312 
5313 	(void) snprintf(cp->dev.cm.name, sizeof (cp->dev.cm.name), "%s%d",
5314 	    cp->dev.type, cp->dev.unum);
5315 
5316 	*idp = (drmachid_t)cp;
5317 	return (NULL);
5318 
5319 fail:
5320 	if (cp) {
5321 		drmach_node_dispose(cp->dev.node);
5322 		kmem_free(cp, sizeof (*cp));
5323 	}
5324 
5325 	*idp = (drmachid_t)0;
5326 	return (err);
5327 }
5328 
5329 static void
5330 drmach_cpu_dispose(drmachid_t id)
5331 {
5332 	drmach_cpu_t	*self;
5333 	processorid_t	cpuid;
5334 
5335 	ASSERT(DRMACH_IS_CPU_ID(id));
5336 
5337 	self = id;
5338 	if (self->dev.node)
5339 		drmach_node_dispose(self->dev.node);
5340 
5341 	cpuid = self->cpuid;
5342 	ASSERT(TTE_IS_VALID(&drmach_cpu_sram_tte[cpuid]) &&
5343 	    TTE_IS_8K(&drmach_cpu_sram_tte[cpuid]) &&
5344 	    TTE_IS_PRIVILEGED(&drmach_cpu_sram_tte[cpuid]) &&
5345 	    TTE_IS_LOCKED(&drmach_cpu_sram_tte[cpuid]));
5346 	drmach_cpu_sram_tte[cpuid].tte_inthi = 0;
5347 	drmach_cpu_sram_tte[cpuid].tte_intlo = 0;
5348 
5349 	kmem_free(self, sizeof (*self));
5350 }
5351 
5352 static int
5353 drmach_cpu_start(struct cpu *cp)
5354 {
5355 	extern xcfunc_t	drmach_set_lpa;
5356 	extern void	restart_other_cpu(int);
5357 	int		cpuid = cp->cpu_id;
5358 	int		rv, bnum;
5359 	drmach_board_t	*bp;
5360 
5361 	ASSERT(MUTEX_HELD(&cpu_lock));
5362 	ASSERT(cpunodes[cpuid].nodeid != (pnode_t)0);
5363 
5364 	cp->cpu_flags &= ~CPU_POWEROFF;
5365 
5366 	/*
5367 	 * NOTE: restart_other_cpu pauses cpus during the
5368 	 *	 slave cpu start.  This helps to quiesce the
5369 	 *	 bus traffic a bit which makes the tick sync
5370 	 *	 routine in the prom more robust.
5371 	 */
5372 	DRMACH_PR("COLD START for cpu (%d)\n", cpuid);
5373 
5374 	if (prom_hotaddcpu(cpuid) != 0) {
5375 		cmn_err(CE_PANIC, "prom_hotaddcpu() for cpuid=%d failed.",
5376 		    cpuid);
5377 	}
5378 
5379 	restart_other_cpu(cpuid);
5380 
5381 	bnum = drmach_portid2bnum(cpunodes[cpuid].portid);
5382 	rv = drmach_array_get(drmach_boards, bnum, (drmachid_t)&bp);
5383 	if (rv == -1 || bp == NULL) {
5384 		DRMACH_PR("drmach_cpu_start: cannot read board info for "
5385 		    "cpuid=%d: rv=%d, bp=%p\n", cpuid, rv, (void *)bp);
5386 	} else if (DRMACH_L1_SET_LPA(bp) && drmach_reprogram_lpa) {
5387 		int exp;
5388 		int ntries;
5389 
5390 		mutex_enter(&drmach_xt_mb_lock);
5391 		mutex_enter(&drmach_slice_table_lock);
5392 		bzero((void *)drmach_xt_mb, drmach_xt_mb_size);
5393 
5394 		/*
5395 		 * drmach_slice_table[*]
5396 		 *	bit 5	valid
5397 		 *	bit 0:4	slice number
5398 		 *
5399 		 * drmach_xt_mb[*] format for drmach_set_lpa
5400 		 *	bit 7	valid
5401 		 *	bit 6	set null LPA (overrides bits 0:4)
5402 		 *	bit 0:4	slice number
5403 		 *
5404 		 * drmach_set_lpa derives processor CBASE and CBND
5405 		 * from bits 6 and 0:4 of drmach_xt_mb.  If bit 6 is
5406 		 * set, then CBASE = CBND = 0. Otherwise, CBASE = slice
5407 		 * number; CBND = slice number + 1.
5408 		 * No action is taken if bit 7 is zero.
5409 		 */
5410 		exp = (cpuid >> 5) & 0x1f;
5411 		if (drmach_slice_table[exp] & 0x20) {
5412 			drmach_xt_mb[cpuid] = 0x80 |
5413 			    (drmach_slice_table[exp] & 0x1f);
5414 		} else {
5415 			drmach_xt_mb[cpuid] = 0x80 | 0x40;
5416 		}
5417 
5418 		drmach_xt_ready = 0;
5419 
5420 		xt_one(cpuid, drmach_set_lpa, NULL, NULL);
5421 
5422 		ntries = drmach_cpu_ntries;
5423 		while (!drmach_xt_ready && ntries) {
5424 			DELAY(drmach_cpu_delay);
5425 			ntries--;
5426 		}
5427 
5428 		mutex_exit(&drmach_slice_table_lock);
5429 		mutex_exit(&drmach_xt_mb_lock);
5430 
5431 		DRMACH_PR(
5432 		    "waited %d out of %d tries for drmach_set_lpa on cpu%d",
5433 		    drmach_cpu_ntries - ntries, drmach_cpu_ntries,
5434 		    cp->cpu_id);
5435 	}
5436 
5437 	xt_one(cpuid, vtag_flushpage_tl1, (uint64_t)drmach_cpu_sram_va,
5438 	    (uint64_t)ksfmmup);
5439 
5440 	return (0);
5441 }
5442 
5443 /*
5444  * A detaching CPU is xcalled with an xtrap to drmach_cpu_stop_self() after
5445  * it has been offlined. The function of this routine is to get the cpu
5446  * spinning in a safe place. The requirement is that the system will not
5447  * reference anything on the detaching board (memory and i/o is detached
5448  * elsewhere) and that the CPU not reference anything on any other board
5449  * in the system.  This isolation is required during and after the writes
5450  * to the domain masks to remove the board from the domain.
5451  *
5452  * To accomplish this isolation the following is done:
5453  *	1) Create a locked mapping to the STARDRB data buffer located
5454  *	   in this cpu's sram. There is one TTE per cpu, initialized in
5455  *	   drmach_cpu_new(). The cpuid is used to select which TTE to use.
5456  *	   Each Safari port pair shares the CPU SRAM on a Serengeti CPU/MEM
5457  *	   board. The STARDRB buffer is 16KB on Cheetah+ boards, 32KB on Jaguar
5458  *	   boards. Each STARDRB buffer is logically divided by DR into one
5459  *	   8KB page per cpu (or Jaguar core).
5460  *	2) Copy the target function (drmach_shutdown_asm) into buffer.
5461  *	3) Jump to function now in the cpu sram.
5462  *	   Function will:
5463  *	   3.1) Flush its Ecache (displacement).
5464  *	   3.2) Flush its Dcache with HW mechanism.
5465  *	   3.3) Flush its Icache with HW mechanism.
5466  *	   3.4) Flush all valid and _unlocked_ D-TLB and I-TLB entries.
5467  *	   3.5) Set LPA to NULL
5468  *	   3.6) Clear xt_mb to signal completion. Note: cache line is
5469  *	        recovered by drmach_cpu_poweroff().
5470  *	4) Jump into an infinite loop.
5471  */
5472 
5473 static void
5474 drmach_cpu_stop_self(void)
5475 {
5476 	extern void drmach_shutdown_asm(uint64_t, uint64_t, int, int, uint64_t);
5477 	extern void drmach_shutdown_asm_end(void);
5478 
5479 	tte_t		*tte;
5480 	uint_t		*p, *q;
5481 	uint64_t	 stack_pointer;
5482 
5483 	ASSERT(((ptrdiff_t)drmach_shutdown_asm_end -
5484 	    (ptrdiff_t)drmach_shutdown_asm) < PAGESIZE);
5485 
5486 	tte = &drmach_cpu_sram_tte[CPU->cpu_id];
5487 	ASSERT(TTE_IS_VALID(tte) && TTE_IS_8K(tte) && TTE_IS_PRIVILEGED(tte) &&
5488 	    TTE_IS_LOCKED(tte));
5489 	sfmmu_dtlb_ld_kva(drmach_cpu_sram_va, tte);
5490 	sfmmu_itlb_ld_kva(drmach_cpu_sram_va, tte);
5491 
5492 	/* copy text. standard bcopy not designed to work in nc space */
5493 	p = (uint_t *)drmach_cpu_sram_va;
5494 	q = (uint_t *)drmach_shutdown_asm;
5495 	while (q < (uint_t *)drmach_shutdown_asm_end)
5496 		*p++ = *q++;
5497 
5498 	/* zero to assist debug */
5499 	q = (uint_t *)(drmach_cpu_sram_va + PAGESIZE);
5500 	while (p < q)
5501 		*p++ = 0;
5502 
5503 	/* a parking spot for the stack pointer */
5504 	stack_pointer = (uint64_t)q;
5505 
5506 	/* call copy of drmach_shutdown_asm */
5507 	(*(void (*)())drmach_cpu_sram_va)(
5508 	    stack_pointer,
5509 	    drmach_iocage_paddr,
5510 	    cpunodes[CPU->cpu_id].ecache_size,
5511 	    cpunodes[CPU->cpu_id].ecache_linesize,
5512 	    va_to_pa((void *)&drmach_xt_mb[CPU->cpu_id]));
5513 }
5514 
5515 static void
5516 drmach_cpu_shutdown_self(void)
5517 {
5518 	cpu_t		*cp = CPU;
5519 	int		cpuid = cp->cpu_id;
5520 	extern void	flush_windows(void);
5521 
5522 	flush_windows();
5523 
5524 	(void) spl8();
5525 
5526 	ASSERT(cp->cpu_intr_actv == 0);
5527 	ASSERT(cp->cpu_thread == cp->cpu_idle_thread ||
5528 	    cp->cpu_thread == cp->cpu_startup_thread);
5529 
5530 	cp->cpu_flags = CPU_OFFLINE | CPU_QUIESCED | CPU_POWEROFF;
5531 
5532 	drmach_cpu_stop_self();
5533 
5534 	cmn_err(CE_PANIC, "CPU %d FAILED TO SHUTDOWN", cpuid);
5535 }
5536 
5537 static sbd_error_t *
5538 drmach_cpu_release(drmachid_t id)
5539 {
5540 	drmach_cpu_t	*cp;
5541 	struct cpu	*cpu;
5542 	sbd_error_t	*err;
5543 
5544 	if (!DRMACH_IS_CPU_ID(id))
5545 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5546 	cp = id;
5547 
5548 	ASSERT(MUTEX_HELD(&cpu_lock));
5549 
5550 	cpu = cpu_get(cp->cpuid);
5551 	if (cpu == NULL)
5552 		err = DRMACH_INTERNAL_ERROR();
5553 	else
5554 		err = NULL;
5555 
5556 	return (err);
5557 }
5558 
5559 static sbd_error_t *
5560 drmach_cpu_status(drmachid_t id, drmach_status_t *stat)
5561 {
5562 	drmach_cpu_t	*cp;
5563 	drmach_device_t	*dp;
5564 
5565 	ASSERT(DRMACH_IS_CPU_ID(id));
5566 	cp = id;
5567 	dp = &cp->dev;
5568 
5569 	stat->assigned = dp->bp->assigned;
5570 	stat->powered = dp->bp->powered;
5571 	mutex_enter(&cpu_lock);
5572 	stat->configured = (cpu_get(cp->cpuid) != NULL);
5573 	mutex_exit(&cpu_lock);
5574 	stat->busy = dp->busy;
5575 	(void) strncpy(stat->type, dp->type, sizeof (stat->type));
5576 	stat->info[0] = '\0';
5577 
5578 	return (NULL);
5579 }
5580 
5581 sbd_error_t *
5582 drmach_cpu_disconnect(drmachid_t id)
5583 {
5584 	if (!DRMACH_IS_CPU_ID(id))
5585 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5586 
5587 	return (NULL);
5588 }
5589 
5590 sbd_error_t *
5591 drmach_cpu_get_id(drmachid_t id, processorid_t *cpuid)
5592 {
5593 	drmach_cpu_t	*cpu;
5594 
5595 	if (!DRMACH_IS_CPU_ID(id))
5596 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5597 	cpu = id;
5598 
5599 	*cpuid = cpu->cpuid;
5600 	return (NULL);
5601 }
5602 
5603 sbd_error_t *
5604 drmach_cpu_get_impl(drmachid_t id, int *ip)
5605 {
5606 	drmach_node_t	*np;
5607 	int		impl;
5608 
5609 	if (!DRMACH_IS_CPU_ID(id))
5610 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5611 
5612 	np = ((drmach_device_t *)id)->node;
5613 
5614 	if (np->n_getprop(np, "implementation#", &impl, sizeof (impl)) == -1) {
5615 		return (DRMACH_INTERNAL_ERROR());
5616 	}
5617 
5618 	*ip = impl;
5619 
5620 	return (NULL);
5621 }
5622 
5623 /*
5624  * Flush this cpu's ecache, then ensure all outstanding safari
5625  * transactions have retired.
5626  */
5627 void
5628 drmach_cpu_flush_ecache_sync(void)
5629 {
5630 	uint64_t *p;
5631 
5632 	ASSERT(curthread->t_bound_cpu == CPU);
5633 
5634 	cpu_flush_ecache();
5635 
5636 	mutex_enter(&drmach_bus_sync_lock);
5637 	for (p = drmach_bus_sync_list; *p; p++)
5638 		(void) ldphys(*p);
5639 	mutex_exit(&drmach_bus_sync_lock);
5640 
5641 	cpu_flush_ecache();
5642 }
5643 
5644 sbd_error_t *
5645 drmach_get_dip(drmachid_t id, dev_info_t **dip)
5646 {
5647 	drmach_device_t	*dp;
5648 
5649 	if (!DRMACH_IS_DEVICE_ID(id))
5650 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5651 	dp = id;
5652 
5653 	*dip = dp->node->n_getdip(dp->node);
5654 	return (NULL);
5655 }
5656 
5657 sbd_error_t *
5658 drmach_io_is_attached(drmachid_t id, int *yes)
5659 {
5660 	drmach_device_t *dp;
5661 	dev_info_t	*dip;
5662 	int state;
5663 
5664 	if (!DRMACH_IS_IO_ID(id))
5665 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5666 	dp = id;
5667 
5668 	dip = dp->node->n_getdip(dp->node);
5669 	if (dip == NULL) {
5670 		*yes = 0;
5671 		return (NULL);
5672 	}
5673 
5674 	state = ddi_get_devstate(dip);
5675 	*yes = i_ddi_devi_attached(dip) || (state == DDI_DEVSTATE_UP);
5676 
5677 	return (NULL);
5678 }
5679 
5680 static int
5681 drmach_dip_is_schizo_xmits_0_pci_b(dev_info_t *dip)
5682 {
5683 	char			dtype[OBP_MAXPROPNAME];
5684 	int			portid;
5685 	uint_t			pci_csr_base;
5686 	struct pci_phys_spec	*regbuf = NULL;
5687 	int			rv, len;
5688 
5689 	ASSERT(dip != NULL);
5690 	rv = ddi_getproplen(DDI_DEV_T_ANY, dip, 0, "device_type", &len);
5691 	if ((rv != DDI_PROP_SUCCESS) || (len > sizeof (dtype)))
5692 		return (0);
5693 
5694 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, 0, "device_type",
5695 	    (caddr_t)dtype, &len) == DDI_PROP_SUCCESS) {
5696 
5697 		if (strncmp(dtype, "pci", 3) == 0) {
5698 
5699 			/*
5700 			 * Get safari portid. All schizo/xmits 0
5701 			 * safari IDs end in 0x1C.
5702 			 */
5703 			rv = ddi_getproplen(DDI_DEV_T_ANY, dip, 0, "portid",
5704 			    &len);
5705 
5706 			if ((rv != DDI_PROP_SUCCESS) ||
5707 			    (len > sizeof (portid)))
5708 				return (0);
5709 
5710 			rv = ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, 0,
5711 			    "portid", (caddr_t)&portid, &len);
5712 
5713 			if (rv != DDI_PROP_SUCCESS)
5714 				return (0);
5715 
5716 			if ((portid & 0x1F) != 0x1C)
5717 				return (0);
5718 
5719 			if (ddi_getlongprop(DDI_DEV_T_ANY, dip,
5720 			    DDI_PROP_DONTPASS, "reg", (caddr_t)&regbuf,
5721 			    &len) == DDI_PROP_SUCCESS) {
5722 
5723 				pci_csr_base = regbuf[0].pci_phys_mid &
5724 				    PCI_CONF_ADDR_MASK;
5725 				kmem_free(regbuf, len);
5726 				/*
5727 				 * All PCI B-Leafs are at configspace 0x70.0000.
5728 				 */
5729 				if (pci_csr_base == 0x700000)
5730 					return (1);
5731 			}
5732 		}
5733 	}
5734 	return (0);
5735 }
5736 
5737 #define	SCHIZO_BINDING_NAME		"pci108e,8001"
5738 #define	XMITS_BINDING_NAME		"pci108e,8002"
5739 
5740 /*
5741  * Verify if the dip is an instance of MAN 'eri'.
5742  */
5743 static int
5744 drmach_dip_is_man_eri(dev_info_t *dip)
5745 {
5746 	struct pci_phys_spec	*regbuf = NULL;
5747 	dev_info_t		*parent_dip;
5748 	char			*name;
5749 	uint_t			pci_device;
5750 	uint_t			pci_function;
5751 	int			len;
5752 
5753 	if (dip == NULL)
5754 		return (0);
5755 	/*
5756 	 * Verify if the parent is schizo(xmits)0 and pci B leaf.
5757 	 */
5758 	if (((parent_dip = ddi_get_parent(dip)) == NULL) ||
5759 	    ((name = ddi_binding_name(parent_dip)) == NULL))
5760 		return (0);
5761 	if (strcmp(name, SCHIZO_BINDING_NAME) != 0) {
5762 		/*
5763 		 * This RIO could be on XMITS, so get the dip to
5764 		 * XMITS PCI Leaf.
5765 		 */
5766 		if ((parent_dip = ddi_get_parent(parent_dip)) == NULL)
5767 			return (0);
5768 		if (((name = ddi_binding_name(parent_dip)) == NULL) ||
5769 		    (strcmp(name, XMITS_BINDING_NAME) != 0)) {
5770 			return (0);
5771 		}
5772 	}
5773 	if (!drmach_dip_is_schizo_xmits_0_pci_b(parent_dip))
5774 		return (0);
5775 	/*
5776 	 * Finally make sure it is the MAN eri.
5777 	 */
5778 	if (ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
5779 	    "reg", (caddr_t)&regbuf, &len) == DDI_PROP_SUCCESS) {
5780 
5781 		pci_device = PCI_REG_DEV_G(regbuf->pci_phys_hi);
5782 		pci_function = PCI_REG_FUNC_G(regbuf->pci_phys_hi);
5783 		kmem_free(regbuf, len);
5784 
5785 		/*
5786 		 * The network function of the RIO ASIC will always be
5787 		 * device 3 and function 1 ("network@3,1").
5788 		 */
5789 		if ((pci_device == 3) && (pci_function == 1))
5790 			return (1);
5791 	}
5792 	return (0);
5793 }
5794 
5795 typedef struct {
5796 	int		iosram_inst;
5797 	dev_info_t	*eri_dip;
5798 	int		bnum;
5799 } drmach_io_inst_t;
5800 
5801 int
5802 drmach_board_find_io_insts(dev_info_t *dip, void *args)
5803 {
5804 	drmach_io_inst_t	*ios = (drmach_io_inst_t *)args;
5805 
5806 	int	rv;
5807 	int	len;
5808 	int	portid;
5809 	char	name[OBP_MAXDRVNAME];
5810 
5811 	rv = ddi_getproplen(DDI_DEV_T_ANY, dip, 0, "portid", &len);
5812 
5813 	if ((rv != DDI_PROP_SUCCESS) || (len > sizeof (portid))) {
5814 		return (DDI_WALK_CONTINUE);
5815 	}
5816 
5817 	rv = ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, 0,
5818 	    "portid", (caddr_t)&portid, &len);
5819 	if (rv != DDI_PROP_SUCCESS)
5820 		return (DDI_WALK_CONTINUE);
5821 
5822 	/* ignore devices that are not on this board */
5823 	if (drmach_portid2bnum(portid) != ios->bnum)
5824 		return (DDI_WALK_CONTINUE);
5825 
5826 	if ((ios->iosram_inst < 0) || (ios->eri_dip == NULL)) {
5827 		rv = ddi_getproplen(DDI_DEV_T_ANY, dip, 0, "name", &len);
5828 		if (rv == DDI_PROP_SUCCESS) {
5829 
5830 			rv = ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
5831 			    0, "name",
5832 			    (caddr_t)name, &len);
5833 			if (rv != DDI_PROP_SUCCESS)
5834 				return (DDI_WALK_CONTINUE);
5835 
5836 			if (strncmp("iosram", name, 6) == 0) {
5837 				ios->iosram_inst = ddi_get_instance(dip);
5838 				if (ios->eri_dip == NULL)
5839 					return (DDI_WALK_CONTINUE);
5840 				else
5841 					return (DDI_WALK_TERMINATE);
5842 			} else {
5843 				if (drmach_dip_is_man_eri(dip)) {
5844 					ASSERT(ios->eri_dip == NULL);
5845 					ndi_hold_devi(dip);
5846 					ios->eri_dip = dip;
5847 					if (ios->iosram_inst < 0)
5848 						return (DDI_WALK_CONTINUE);
5849 					else
5850 						return (DDI_WALK_TERMINATE);
5851 				}
5852 			}
5853 		}
5854 	}
5855 	return (DDI_WALK_CONTINUE);
5856 }
5857 
5858 sbd_error_t *
5859 drmach_io_pre_release(drmachid_t id)
5860 {
5861 	drmach_io_inst_t	ios;
5862 	drmach_board_t		*bp;
5863 	int			rv = 0;
5864 	sbd_error_t		*err = NULL;
5865 	drmach_device_t		*dp;
5866 	dev_info_t		*rdip;
5867 	int			circ;
5868 
5869 	if (!DRMACH_IS_IO_ID(id))
5870 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5871 	dp = id;
5872 	bp = dp->bp;
5873 
5874 	rdip = dp->node->n_getdip(dp->node);
5875 
5876 	/* walk device tree to find iosram instance for the board */
5877 	ios.iosram_inst = -1;
5878 	ios.eri_dip = NULL;
5879 	ios.bnum = bp->bnum;
5880 
5881 	ndi_devi_enter(rdip, &circ);
5882 	ddi_walk_devs(ddi_get_child(rdip), drmach_board_find_io_insts,
5883 	    (void *)&ios);
5884 
5885 	DRMACH_PR("drmach_io_pre_release: bnum=%d iosram=%d eri=0x%p\n",
5886 	    ios.bnum, ios.iosram_inst, (void *)ios.eri_dip);
5887 	ndi_devi_exit(rdip, circ);
5888 
5889 	if (ios.eri_dip) {
5890 		/*
5891 		 * Release hold acquired in drmach_board_find_io_insts()
5892 		 */
5893 		ndi_rele_devi(ios.eri_dip);
5894 	}
5895 	if (ios.iosram_inst >= 0) {
5896 		/* call for tunnel switch */
5897 		do {
5898 			DRMACH_PR("calling iosram_switchfrom(%d)\n",
5899 			    ios.iosram_inst);
5900 			rv = iosram_switchfrom(ios.iosram_inst);
5901 			if (rv)
5902 				DRMACH_PR("iosram_switchfrom returned %d\n",
5903 				    rv);
5904 		} while (rv == EAGAIN);
5905 
5906 		if (rv)
5907 			err = drerr_new(0, ESTC_IOSWITCH, NULL);
5908 	}
5909 	return (err);
5910 }
5911 
5912 sbd_error_t *
5913 drmach_io_unrelease(drmachid_t id)
5914 {
5915 	dev_info_t	*dip;
5916 	sbd_error_t	*err = NULL;
5917 	drmach_device_t	*dp;
5918 
5919 	if (!DRMACH_IS_IO_ID(id))
5920 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5921 	dp = id;
5922 
5923 	dip = dp->node->n_getdip(dp->node);
5924 
5925 	if (dip == NULL)
5926 		err = DRMACH_INTERNAL_ERROR();
5927 	else {
5928 		int (*func)(dev_info_t *dip);
5929 
5930 		func = (int (*)(dev_info_t *))kobj_getsymvalue("man_dr_attach",
5931 		    0);
5932 
5933 		if (func) {
5934 			drmach_io_inst_t ios;
5935 			dev_info_t	*pdip;
5936 			int		circ;
5937 
5938 			/*
5939 			 * Walk device tree to find rio dip for the board
5940 			 * Since we are not interested in iosram instance here,
5941 			 * initialize it to 0, so that the walk terminates as
5942 			 * soon as eri dip is found.
5943 			 */
5944 			ios.iosram_inst = 0;
5945 			ios.eri_dip = NULL;
5946 			ios.bnum = dp->bp->bnum;
5947 
5948 			if (pdip = ddi_get_parent(dip)) {
5949 				ndi_hold_devi(pdip);
5950 				ndi_devi_enter(pdip, &circ);
5951 			}
5952 			/*
5953 			 * Root node doesn't have to be held in any way.
5954 			 */
5955 			ddi_walk_devs(dip, drmach_board_find_io_insts,
5956 			    (void *)&ios);
5957 
5958 			if (pdip) {
5959 				ndi_devi_exit(pdip, circ);
5960 				ndi_rele_devi(pdip);
5961 			}
5962 
5963 			DRMACH_PR("drmach_io_unrelease: bnum=%d eri=0x%p\n",
5964 			    ios.bnum, (void *)ios.eri_dip);
5965 
5966 			if (ios.eri_dip) {
5967 				DRMACH_PR("calling man_dr_attach\n");
5968 				if ((*func)(ios.eri_dip))
5969 					err = drerr_new(0, ESTC_NWSWITCH, NULL);
5970 				/*
5971 				 * Release hold acquired in
5972 				 * drmach_board_find_io_insts()
5973 				 */
5974 				ndi_rele_devi(ios.eri_dip);
5975 			}
5976 		} else
5977 			DRMACH_PR("man_dr_attach NOT present\n");
5978 	}
5979 	return (err);
5980 }
5981 
5982 static sbd_error_t *
5983 drmach_io_release(drmachid_t id)
5984 {
5985 	dev_info_t	*dip;
5986 	sbd_error_t	*err = NULL;
5987 	drmach_device_t	*dp;
5988 
5989 	if (!DRMACH_IS_IO_ID(id))
5990 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5991 	dp = id;
5992 
5993 	dip = dp->node->n_getdip(dp->node);
5994 
5995 	if (dip == NULL)
5996 		err = DRMACH_INTERNAL_ERROR();
5997 	else {
5998 		int (*func)(dev_info_t *dip);
5999 
6000 		func = (int (*)(dev_info_t *))kobj_getsymvalue("man_dr_detach",
6001 		    0);
6002 
6003 		if (func) {
6004 			drmach_io_inst_t ios;
6005 			dev_info_t	*pdip;
6006 			int		circ;
6007 
6008 			/*
6009 			 * Walk device tree to find rio dip for the board
6010 			 * Since we are not interested in iosram instance here,
6011 			 * initialize it to 0, so that the walk terminates as
6012 			 * soon as eri dip is found.
6013 			 */
6014 			ios.iosram_inst = 0;
6015 			ios.eri_dip = NULL;
6016 			ios.bnum = dp->bp->bnum;
6017 
6018 			if (pdip = ddi_get_parent(dip)) {
6019 				ndi_hold_devi(pdip);
6020 				ndi_devi_enter(pdip, &circ);
6021 			}
6022 			/*
6023 			 * Root node doesn't have to be held in any way.
6024 			 */
6025 			ddi_walk_devs(dip, drmach_board_find_io_insts,
6026 			    (void *)&ios);
6027 
6028 			if (pdip) {
6029 				ndi_devi_exit(pdip, circ);
6030 				ndi_rele_devi(pdip);
6031 			}
6032 
6033 			DRMACH_PR("drmach_io_release: bnum=%d eri=0x%p\n",
6034 			    ios.bnum, (void *)ios.eri_dip);
6035 
6036 			if (ios.eri_dip) {
6037 				DRMACH_PR("calling man_dr_detach\n");
6038 				if ((*func)(ios.eri_dip))
6039 					err = drerr_new(0, ESTC_NWSWITCH, NULL);
6040 				/*
6041 				 * Release hold acquired in
6042 				 * drmach_board_find_io_insts()
6043 				 */
6044 				ndi_rele_devi(ios.eri_dip);
6045 			}
6046 		} else
6047 			DRMACH_PR("man_dr_detach NOT present\n");
6048 	}
6049 	return (err);
6050 }
6051 
6052 sbd_error_t *
6053 drmach_io_post_release(drmachid_t id)
6054 {
6055 	char 		*path;
6056 	dev_info_t	*rdip;
6057 	drmach_device_t	*dp;
6058 
6059 	if (!DRMACH_IS_DEVICE_ID(id))
6060 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6061 	dp = id;
6062 
6063 	rdip = dp->node->n_getdip(dp->node);
6064 
6065 	/*
6066 	 * Always called after drmach_unconfigure() which on Starcat
6067 	 * unconfigures the branch but doesn't remove it so the
6068 	 * dip must always exist.
6069 	 */
6070 	ASSERT(rdip);
6071 
6072 	ASSERT(e_ddi_branch_held(rdip));
6073 #ifdef DEBUG
6074 	path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6075 	(void) ddi_pathname(rdip, path);
6076 	DRMACH_PR("post_release dip path is: %s\n", path);
6077 	kmem_free(path, MAXPATHLEN);
6078 #endif
6079 
6080 	if (strcmp(dp->type, DRMACH_DEVTYPE_PCI) == 0) {
6081 		if (schpc_remove_pci(rdip)) {
6082 			DRMACH_PR("schpc_remove_pci failed\n");
6083 			return (drerr_new(0, ESBD_OFFLINE, NULL));
6084 		} else {
6085 			DRMACH_PR("schpc_remove_pci succeeded\n");
6086 		}
6087 	}
6088 
6089 	return (NULL);
6090 }
6091 
6092 sbd_error_t *
6093 drmach_io_post_attach(drmachid_t id)
6094 {
6095 	int		circ;
6096 	dev_info_t	*dip;
6097 	dev_info_t	*pdip;
6098 	drmach_device_t	*dp;
6099 	drmach_io_inst_t ios;
6100 
6101 	if (!DRMACH_IS_DEVICE_ID(id))
6102 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6103 	dp = id;
6104 
6105 	dip = dp->node->n_getdip(dp->node);
6106 
6107 	/*
6108 	 * We held the branch rooted at dip earlier, so at a minimum the
6109 	 * root i.e. dip must be present in the device tree.
6110 	 */
6111 	ASSERT(dip);
6112 
6113 	if (strcmp(dp->type, DRMACH_DEVTYPE_PCI) == 0) {
6114 		if (schpc_add_pci(dip)) {
6115 			DRMACH_PR("schpc_add_pci failed\n");
6116 		} else {
6117 			DRMACH_PR("schpc_add_pci succeeded\n");
6118 		}
6119 	}
6120 
6121 	/*
6122 	 * Walk device tree to find rio dip for the board
6123 	 * Since we are not interested in iosram instance here,
6124 	 * initialize it to 0, so that the walk terminates as
6125 	 * soon as eri dip is found.
6126 	 */
6127 	ios.iosram_inst = 0;
6128 	ios.eri_dip = NULL;
6129 	ios.bnum = dp->bp->bnum;
6130 
6131 	if (pdip = ddi_get_parent(dip)) {
6132 		ndi_hold_devi(pdip);
6133 		ndi_devi_enter(pdip, &circ);
6134 	}
6135 	/*
6136 	 * Root node doesn't have to be held in any way.
6137 	 */
6138 	ddi_walk_devs(dip, drmach_board_find_io_insts, (void *)&ios);
6139 	if (pdip) {
6140 		ndi_devi_exit(pdip, circ);
6141 		ndi_rele_devi(pdip);
6142 	}
6143 
6144 	DRMACH_PR("drmach_io_post_attach: bnum=%d eri=0x%p\n",
6145 	    ios.bnum, (void *)ios.eri_dip);
6146 
6147 	if (ios.eri_dip) {
6148 		int (*func)(dev_info_t *dip);
6149 
6150 		func =
6151 		    (int (*)(dev_info_t *))kobj_getsymvalue("man_dr_attach", 0);
6152 
6153 		if (func) {
6154 			DRMACH_PR("calling man_dr_attach\n");
6155 			(void) (*func)(ios.eri_dip);
6156 		} else {
6157 			DRMACH_PR("man_dr_attach NOT present\n");
6158 		}
6159 
6160 		/*
6161 		 * Release hold acquired in drmach_board_find_io_insts()
6162 		 */
6163 		ndi_rele_devi(ios.eri_dip);
6164 
6165 	}
6166 
6167 	return (NULL);
6168 }
6169 
6170 static sbd_error_t *
6171 drmach_io_status(drmachid_t id, drmach_status_t *stat)
6172 {
6173 	drmach_device_t *dp;
6174 	sbd_error_t	*err;
6175 	int		 configured;
6176 
6177 	ASSERT(DRMACH_IS_IO_ID(id));
6178 	dp = id;
6179 
6180 	err = drmach_io_is_attached(id, &configured);
6181 	if (err)
6182 		return (err);
6183 
6184 	stat->assigned = dp->bp->assigned;
6185 	stat->powered = dp->bp->powered;
6186 	stat->configured = (configured != 0);
6187 	stat->busy = dp->busy;
6188 	(void) strncpy(stat->type, dp->type, sizeof (stat->type));
6189 	stat->info[0] = '\0';
6190 
6191 	return (NULL);
6192 }
6193 
6194 sbd_error_t *
6195 drmach_mem_init_size(drmachid_t id)
6196 {
6197 	drmach_mem_t	*mp;
6198 	sbd_error_t	*err;
6199 	gdcd_t		*gdcd;
6200 	mem_chunk_t	*chunk;
6201 	uint64_t	 chunks, pa, mask, sz;
6202 
6203 	if (!DRMACH_IS_MEM_ID(id))
6204 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6205 	mp = id;
6206 
6207 	err = drmach_mem_get_base_physaddr(id, &pa);
6208 	if (err)
6209 		return (err);
6210 
6211 	mask = ~ (DRMACH_MEM_SLICE_SIZE - 1);
6212 	pa &= mask;
6213 
6214 	gdcd = drmach_gdcd_new();
6215 	if (gdcd == NULL)
6216 		return (DRMACH_INTERNAL_ERROR());
6217 
6218 	sz = 0;
6219 	chunk = gdcd->dcd_chunk_list.dcl_chunk;
6220 	chunks = gdcd->dcd_chunk_list.dcl_chunks;
6221 	while (chunks-- != 0) {
6222 		if ((chunk->mc_base_pa & mask) == pa) {
6223 			sz += chunk->mc_mbytes * 1048576;
6224 		}
6225 
6226 		++chunk;
6227 	}
6228 	mp->nbytes = sz;
6229 
6230 	drmach_gdcd_dispose(gdcd);
6231 	return (NULL);
6232 }
6233 
6234 /*
6235  * Hardware registers are organized into consecutively
6236  * addressed registers.  The reg property's hi and lo fields
6237  * together describe the base address of the register set for
6238  * this memory-controller.  Register descriptions and offsets
6239  * (from the base address) are as follows:
6240  *
6241  * Description				Offset	Size (bytes)
6242  * Memory Timing Control Register I	0x00	8
6243  * Memory Timing Control Register II	0x08	8
6244  * Memory Address Decoding Register I	0x10	8
6245  * Memory Address Decoding Register II	0x18	8
6246  * Memory Address Decoding Register III	0x20	8
6247  * Memory Address Decoding Register IV	0x28	8
6248  * Memory Address Control Register	0x30	8
6249  * Memory Timing Control Register III	0x38	8
6250  * Memory Timing Control Register IV	0x40	8
6251  * Memory Timing Control Register V  	0x48	8 (Jaguar, Panther only)
6252  * EMU Activity Status Register		0x50	8 (Panther only)
6253  *
6254  * Only the Memory Address Decoding Register and EMU Activity Status
6255  * Register addresses are needed for DRMACH.
6256  */
6257 static sbd_error_t *
6258 drmach_mem_new(drmach_device_t *proto, drmachid_t *idp)
6259 {
6260 	static void drmach_mem_dispose(drmachid_t);
6261 	static sbd_error_t *drmach_mem_release(drmachid_t);
6262 	static sbd_error_t *drmach_mem_status(drmachid_t, drmach_status_t *);
6263 
6264 	sbd_error_t	*err;
6265 	uint64_t	 madr_pa;
6266 	drmach_mem_t	*mp;
6267 	int		 bank, count;
6268 
6269 	err = drmach_read_reg_addr(proto, &madr_pa);
6270 	if (err)
6271 		return (err);
6272 
6273 	mp = kmem_zalloc(sizeof (drmach_mem_t), KM_SLEEP);
6274 	bcopy(proto, &mp->dev, sizeof (mp->dev));
6275 	mp->dev.node = drmach_node_dup(proto->node);
6276 	mp->dev.cm.isa = (void *)drmach_mem_new;
6277 	mp->dev.cm.dispose = drmach_mem_dispose;
6278 	mp->dev.cm.release = drmach_mem_release;
6279 	mp->dev.cm.status = drmach_mem_status;
6280 	mp->madr_pa = madr_pa;
6281 
6282 	(void) snprintf(mp->dev.cm.name,
6283 	    sizeof (mp->dev.cm.name), "%s", mp->dev.type);
6284 
6285 	for (count = bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
6286 		uint64_t madr;
6287 
6288 		drmach_mem_read_madr(mp, bank, &madr);
6289 		if (madr & DRMACH_MC_VALID_MASK) {
6290 			count += 1;
6291 			break;
6292 		}
6293 	}
6294 
6295 	/*
6296 	 * If none of the banks had their valid bit set, that means
6297 	 * post did not configure this MC to participate in the
6298 	 * domain.  So, pretend this node does not exist by returning
6299 	 * a drmachid of zero.
6300 	 */
6301 	if (count == 0) {
6302 		/* drmach_mem_dispose frees board mem list */
6303 		drmach_node_dispose(mp->dev.node);
6304 		kmem_free(mp, sizeof (*mp));
6305 		*idp = (drmachid_t)0;
6306 		return (NULL);
6307 	}
6308 
6309 	/*
6310 	 * Only one mem unit per board is exposed to the
6311 	 * PIM layer.  The first mem unit encountered during
6312 	 * tree walk is used to represent all mem units on
6313 	 * the same board.
6314 	 */
6315 	if (mp->dev.bp->mem == NULL) {
6316 		/* start list of mem units on this board */
6317 		mp->dev.bp->mem = mp;
6318 
6319 		/*
6320 		 * force unum to zero since this is the only mem unit
6321 		 * that will be visible to the PIM layer.
6322 		 */
6323 		mp->dev.unum = 0;
6324 
6325 		/*
6326 		 * board memory size kept in this mem unit only
6327 		 */
6328 		err = drmach_mem_init_size(mp);
6329 		if (err) {
6330 			mp->dev.bp->mem = NULL;
6331 			/* drmach_mem_dispose frees board mem list */
6332 			drmach_node_dispose(mp->dev.node);
6333 			kmem_free(mp, sizeof (*mp));
6334 			*idp = (drmachid_t)0;
6335 			return (NULL);
6336 		}
6337 
6338 		/*
6339 		 * allow this instance (the first encountered on this board)
6340 		 * to be visible to the PIM layer.
6341 		 */
6342 		*idp = (drmachid_t)mp;
6343 	} else {
6344 		drmach_mem_t *lp;
6345 
6346 		/* hide this mem instance behind the first. */
6347 		for (lp = mp->dev.bp->mem; lp->next; lp = lp->next)
6348 			;
6349 		lp->next = mp;
6350 
6351 		/*
6352 		 * hide this instance from the caller.
6353 		 * See drmach_board_find_devices_cb() for details.
6354 		 */
6355 		*idp = (drmachid_t)0;
6356 	}
6357 
6358 	return (NULL);
6359 }
6360 
6361 static void
6362 drmach_mem_dispose(drmachid_t id)
6363 {
6364 	drmach_mem_t *mp, *next;
6365 	drmach_board_t *bp;
6366 
6367 	ASSERT(DRMACH_IS_MEM_ID(id));
6368 
6369 	mutex_enter(&drmach_bus_sync_lock);
6370 
6371 	mp = id;
6372 	bp = mp->dev.bp;
6373 
6374 	do {
6375 		if (mp->dev.node)
6376 			drmach_node_dispose(mp->dev.node);
6377 
6378 		next = mp->next;
6379 		kmem_free(mp, sizeof (*mp));
6380 		mp = next;
6381 	} while (mp);
6382 
6383 	bp->mem = NULL;
6384 
6385 	drmach_bus_sync_list_update();
6386 	mutex_exit(&drmach_bus_sync_lock);
6387 }
6388 
6389 sbd_error_t *
6390 drmach_mem_add_span(drmachid_t id, uint64_t basepa, uint64_t size)
6391 {
6392 	pfn_t		basepfn = (pfn_t)(basepa >> PAGESHIFT);
6393 	pgcnt_t		npages = (pgcnt_t)(size >> PAGESHIFT);
6394 	int		rv;
6395 
6396 	ASSERT(size != 0);
6397 
6398 	if (!DRMACH_IS_MEM_ID(id))
6399 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6400 
6401 	rv = kcage_range_add(basepfn, npages, KCAGE_DOWN);
6402 	if (rv == ENOMEM) {
6403 		cmn_err(CE_WARN, "%lu megabytes not available"
6404 		    " to kernel cage", size >> 20);
6405 	} else if (rv != 0) {
6406 		/* catch this in debug kernels */
6407 		ASSERT(0);
6408 
6409 		cmn_err(CE_WARN, "unexpected kcage_range_add"
6410 		    " return value %d", rv);
6411 	}
6412 
6413 	return (NULL);
6414 }
6415 
6416 sbd_error_t *
6417 drmach_mem_del_span(drmachid_t id, uint64_t basepa, uint64_t size)
6418 {
6419 	pfn_t		 basepfn = (pfn_t)(basepa >> PAGESHIFT);
6420 	pgcnt_t		 npages = (pgcnt_t)(size >> PAGESHIFT);
6421 	int		 rv;
6422 
6423 	if (!DRMACH_IS_MEM_ID(id))
6424 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6425 
6426 	if (size > 0) {
6427 		rv = kcage_range_delete_post_mem_del(basepfn, npages);
6428 		if (rv != 0) {
6429 			cmn_err(CE_WARN,
6430 			    "unexpected kcage_range_delete_post_mem_del"
6431 			    " return value %d", rv);
6432 			return (DRMACH_INTERNAL_ERROR());
6433 		}
6434 	}
6435 
6436 	return (NULL);
6437 }
6438 
6439 sbd_error_t *
6440 drmach_mem_disable(drmachid_t id)
6441 {
6442 	if (!DRMACH_IS_MEM_ID(id))
6443 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6444 	else
6445 		return (NULL);
6446 }
6447 
6448 sbd_error_t *
6449 drmach_mem_enable(drmachid_t id)
6450 {
6451 	if (!DRMACH_IS_MEM_ID(id))
6452 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6453 	else
6454 		return (NULL);
6455 }
6456 
6457 sbd_error_t *
6458 drmach_mem_get_alignment(drmachid_t id, uint64_t *mask)
6459 {
6460 #define	MB(mb) ((mb) * 1048576ull)
6461 
6462 	static struct {
6463 		uint_t		uk;
6464 		uint64_t	segsz;
6465 	}  uk2segsz[] = {
6466 		{ 0x003,	MB(256)	  },
6467 		{ 0x007,	MB(512)	  },
6468 		{ 0x00f,	MB(1024)  },
6469 		{ 0x01f,	MB(2048)  },
6470 		{ 0x03f,	MB(4096)  },
6471 		{ 0x07f,	MB(8192)  },
6472 		{ 0x0ff,	MB(16384) },
6473 		{ 0x1ff,	MB(32768) },
6474 		{ 0x3ff,	MB(65536) },
6475 		{ 0x7ff,	MB(131072) }
6476 	};
6477 	static int len = sizeof (uk2segsz) / sizeof (uk2segsz[0]);
6478 
6479 #undef MB
6480 
6481 	uint64_t	 largest_sz = 0;
6482 	drmach_mem_t	*mp;
6483 
6484 	if (!DRMACH_IS_MEM_ID(id))
6485 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6486 
6487 	/* prime the result with a default value */
6488 	*mask = (DRMACH_MEM_SLICE_SIZE - 1);
6489 
6490 	for (mp = id; mp; mp = mp->next) {
6491 		int bank;
6492 
6493 		for (bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
6494 			int		i;
6495 			uint_t		uk;
6496 			uint64_t	madr;
6497 
6498 			/* get register value, extract uk and normalize */
6499 			drmach_mem_read_madr(mp, bank, &madr);
6500 
6501 			if (!(madr & DRMACH_MC_VALID_MASK))
6502 				continue;
6503 
6504 			uk = DRMACH_MC_UK(madr);
6505 
6506 			/* match uk value */
6507 			for (i = 0; i < len; i++)
6508 				if (uk == uk2segsz[i].uk)
6509 					break;
6510 
6511 			if (i < len) {
6512 				uint64_t sz = uk2segsz[i].segsz;
6513 
6514 				/*
6515 				 * remember largest segment size,
6516 				 * update mask result
6517 				 */
6518 				if (sz > largest_sz) {
6519 					largest_sz = sz;
6520 					*mask = sz - 1;
6521 				}
6522 			} else {
6523 				/*
6524 				 * uk not in table, punt using
6525 				 * entire slice size. no longer any
6526 				 * reason to check other banks.
6527 				 */
6528 				*mask = (DRMACH_MEM_SLICE_SIZE - 1);
6529 				return (NULL);
6530 			}
6531 		}
6532 	}
6533 
6534 	return (NULL);
6535 }
6536 
6537 sbd_error_t *
6538 drmach_mem_get_base_physaddr(drmachid_t id, uint64_t *base_addr)
6539 {
6540 	drmach_mem_t *mp;
6541 
6542 	if (!DRMACH_IS_MEM_ID(id))
6543 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6544 
6545 	*base_addr = (uint64_t)-1;
6546 	for (mp = id; mp; mp = mp->next) {
6547 		int bank;
6548 
6549 		for (bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
6550 			uint64_t addr, madr;
6551 
6552 			drmach_mem_read_madr(mp, bank, &madr);
6553 			if (madr & DRMACH_MC_VALID_MASK) {
6554 				addr = DRMACH_MC_UM_TO_PA(madr) |
6555 				    DRMACH_MC_LM_TO_PA(madr);
6556 
6557 				if (addr < *base_addr)
6558 					*base_addr = addr;
6559 			}
6560 		}
6561 	}
6562 
6563 	/* should not happen, but ... */
6564 	if (*base_addr == (uint64_t)-1)
6565 		return (DRMACH_INTERNAL_ERROR());
6566 
6567 	return (NULL);
6568 }
6569 
6570 void
6571 drmach_bus_sync_list_update(void)
6572 {
6573 	int		rv, idx, cnt = 0;
6574 	drmachid_t	id;
6575 
6576 	ASSERT(MUTEX_HELD(&drmach_bus_sync_lock));
6577 
6578 	rv = drmach_array_first(drmach_boards, &idx, &id);
6579 	while (rv == 0) {
6580 		drmach_board_t		*bp = id;
6581 		drmach_mem_t		*mp = bp->mem;
6582 
6583 		while (mp) {
6584 			int bank;
6585 
6586 			for (bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
6587 				uint64_t madr;
6588 
6589 				drmach_mem_read_madr(mp, bank, &madr);
6590 				if (madr & DRMACH_MC_VALID_MASK) {
6591 					uint64_t pa;
6592 
6593 					pa  = DRMACH_MC_UM_TO_PA(madr);
6594 					pa |= DRMACH_MC_LM_TO_PA(madr);
6595 
6596 					/*
6597 					 * The list is zero terminated.
6598 					 * Offset the pa by a doubleword
6599 					 * to avoid confusing a pa value of
6600 					 * of zero with the terminator.
6601 					 */
6602 					pa += sizeof (uint64_t);
6603 
6604 					drmach_bus_sync_list[cnt++] = pa;
6605 				}
6606 			}
6607 
6608 			mp = mp->next;
6609 		}
6610 
6611 		rv = drmach_array_next(drmach_boards, &idx, &id);
6612 	}
6613 
6614 	drmach_bus_sync_list[cnt] = 0;
6615 }
6616 
6617 sbd_error_t *
6618 drmach_mem_get_memlist(drmachid_t id, struct memlist **ml)
6619 {
6620 	sbd_error_t	*err;
6621 	struct memlist	*mlist;
6622 	gdcd_t		*gdcd;
6623 	mem_chunk_t	*chunk;
6624 	uint64_t	 chunks, pa, mask;
6625 
6626 	err = drmach_mem_get_base_physaddr(id, &pa);
6627 	if (err)
6628 		return (err);
6629 
6630 	gdcd = drmach_gdcd_new();
6631 	if (gdcd == NULL)
6632 		return (DRMACH_INTERNAL_ERROR());
6633 
6634 	mask = ~ (DRMACH_MEM_SLICE_SIZE - 1);
6635 	pa &= mask;
6636 
6637 	mlist = NULL;
6638 	chunk = gdcd->dcd_chunk_list.dcl_chunk;
6639 	chunks = gdcd->dcd_chunk_list.dcl_chunks;
6640 	while (chunks-- != 0) {
6641 		if ((chunk->mc_base_pa & mask) == pa) {
6642 			mlist = memlist_add_span(mlist, chunk->mc_base_pa,
6643 			    chunk->mc_mbytes * 1048576);
6644 		}
6645 
6646 		++chunk;
6647 	}
6648 
6649 	drmach_gdcd_dispose(gdcd);
6650 
6651 #ifdef DEBUG
6652 	DRMACH_PR("GDCD derived memlist:");
6653 	memlist_dump(mlist);
6654 #endif
6655 
6656 	*ml = mlist;
6657 	return (NULL);
6658 }
6659 
6660 sbd_error_t *
6661 drmach_mem_get_size(drmachid_t id, uint64_t *bytes)
6662 {
6663 	drmach_mem_t	*mp;
6664 
6665 	if (!DRMACH_IS_MEM_ID(id))
6666 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6667 	mp = id;
6668 
6669 	ASSERT(mp->nbytes != 0);
6670 	*bytes = mp->nbytes;
6671 
6672 	return (NULL);
6673 }
6674 
6675 sbd_error_t *
6676 drmach_mem_get_slice_size(drmachid_t id, uint64_t *bytes)
6677 {
6678 	sbd_error_t	*err;
6679 	drmach_device_t	*mp;
6680 
6681 	if (!DRMACH_IS_MEM_ID(id))
6682 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6683 	mp = id;
6684 
6685 	switch (DRMACH_BNUM2SLOT(mp->bp->bnum)) {
6686 		case 0:	*bytes = DRMACH_MEM_USABLE_SLICE_SIZE;
6687 			err = NULL;
6688 			break;
6689 
6690 		case 1: *bytes = 0;
6691 			err = NULL;
6692 			break;
6693 
6694 		default:
6695 			err = DRMACH_INTERNAL_ERROR();
6696 			break;
6697 	}
6698 
6699 	return (err);
6700 }
6701 
6702 processorid_t drmach_mem_cpu_affinity_nail;
6703 
6704 processorid_t
6705 drmach_mem_cpu_affinity(drmachid_t id)
6706 {
6707 	drmach_device_t	*mp;
6708 	drmach_board_t	*bp;
6709 	processorid_t	 cpuid;
6710 
6711 	if (!DRMACH_IS_MEM_ID(id))
6712 		return (CPU_CURRENT);
6713 
6714 	if (drmach_mem_cpu_affinity_nail) {
6715 		cpuid = drmach_mem_cpu_affinity_nail;
6716 
6717 		if (cpuid < 0 || cpuid > NCPU)
6718 			return (CPU_CURRENT);
6719 
6720 		mutex_enter(&cpu_lock);
6721 		if (cpu[cpuid] == NULL || !CPU_ACTIVE(cpu[cpuid]))
6722 			cpuid = CPU_CURRENT;
6723 		mutex_exit(&cpu_lock);
6724 
6725 		return (cpuid);
6726 	}
6727 
6728 	/* try to choose a proc on the target board */
6729 	mp = id;
6730 	bp = mp->bp;
6731 	if (bp->devices) {
6732 		int		 rv;
6733 		int		 d_idx;
6734 		drmachid_t	 d_id;
6735 
6736 		rv = drmach_array_first(bp->devices, &d_idx, &d_id);
6737 		while (rv == 0) {
6738 			if (DRMACH_IS_CPU_ID(d_id)) {
6739 				drmach_cpu_t	*cp = d_id;
6740 
6741 				mutex_enter(&cpu_lock);
6742 				cpuid = cp->cpuid;
6743 				if (cpu[cpuid] && CPU_ACTIVE(cpu[cpuid])) {
6744 					mutex_exit(&cpu_lock);
6745 					return (cpuid);
6746 				} else {
6747 					mutex_exit(&cpu_lock);
6748 				}
6749 			}
6750 
6751 			rv = drmach_array_next(bp->devices, &d_idx, &d_id);
6752 		}
6753 	}
6754 
6755 	/* otherwise, this proc, wherever it is */
6756 	return (CPU_CURRENT);
6757 }
6758 
6759 static sbd_error_t *
6760 drmach_mem_release(drmachid_t id)
6761 {
6762 	if (!DRMACH_IS_MEM_ID(id))
6763 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6764 	return (NULL);
6765 }
6766 
6767 static sbd_error_t *
6768 drmach_mem_status(drmachid_t id, drmach_status_t *stat)
6769 {
6770 	drmach_mem_t	*mp;
6771 	sbd_error_t	*err;
6772 	uint64_t	 pa, slice_size;
6773 	struct memlist	*ml;
6774 
6775 	ASSERT(DRMACH_IS_MEM_ID(id));
6776 	mp = id;
6777 
6778 	/* get starting physical address of target memory */
6779 	err = drmach_mem_get_base_physaddr(id, &pa);
6780 	if (err)
6781 		return (err);
6782 
6783 	/* round down to slice boundary */
6784 	slice_size = DRMACH_MEM_SLICE_SIZE;
6785 	pa &= ~ (slice_size - 1);
6786 
6787 	/* stop at first span that is in slice */
6788 	memlist_read_lock();
6789 	for (ml = phys_install; ml; ml = ml->ml_next)
6790 		if (ml->ml_address >= pa && ml->ml_address < pa + slice_size)
6791 			break;
6792 	memlist_read_unlock();
6793 
6794 	stat->assigned = mp->dev.bp->assigned;
6795 	stat->powered = mp->dev.bp->powered;
6796 	stat->configured = (ml != NULL);
6797 	stat->busy = mp->dev.busy;
6798 	(void) strncpy(stat->type, mp->dev.type, sizeof (stat->type));
6799 	stat->info[0] = '\0';
6800 
6801 	return (NULL);
6802 }
6803 
6804 sbd_error_t *
6805 drmach_board_deprobe(drmachid_t id)
6806 {
6807 	drmach_board_t	*bp;
6808 	sbd_error_t	*err = NULL;
6809 
6810 	if (!DRMACH_IS_BOARD_ID(id))
6811 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6812 	bp = id;
6813 
6814 	if (bp->tree) {
6815 		drmach_node_dispose(bp->tree);
6816 		bp->tree = NULL;
6817 	}
6818 	if (bp->devices) {
6819 		drmach_array_dispose(bp->devices, drmach_device_dispose);
6820 		bp->devices = NULL;
6821 		bp->mem = NULL;  /* TODO: still needed? */
6822 	}
6823 	return (err);
6824 }
6825 
6826 /*ARGSUSED1*/
6827 static sbd_error_t *
6828 drmach_pt_showlpa(drmachid_t id, drmach_opts_t *opts)
6829 {
6830 	drmach_device_t	*dp;
6831 	uint64_t	val;
6832 	int		err = 1;
6833 
6834 	if (DRMACH_IS_CPU_ID(id)) {
6835 		drmach_cpu_t *cp = id;
6836 		if (drmach_cpu_read_scr(cp, &val))
6837 			err = 0;
6838 	} else if (DRMACH_IS_IO_ID(id) && ((drmach_io_t *)id)->scsr_pa != 0) {
6839 		drmach_io_t *io = id;
6840 		val = lddphysio(io->scsr_pa);
6841 		err = 0;
6842 	}
6843 	if (err)
6844 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6845 
6846 	dp = id;
6847 	uprintf("showlpa %s::%s portid %d, base pa %lx, bound pa %lx\n",
6848 	    dp->bp->cm.name,
6849 	    dp->cm.name,
6850 	    dp->portid,
6851 	    (long)(DRMACH_LPA_BASE_TO_PA(val)),
6852 	    (long)(DRMACH_LPA_BND_TO_PA(val)));
6853 
6854 	return (NULL);
6855 }
6856 
6857 /*ARGSUSED*/
6858 static sbd_error_t *
6859 drmach_pt_ikprobe(drmachid_t id, drmach_opts_t *opts)
6860 {
6861 	drmach_board_t		*bp = (drmach_board_t *)id;
6862 	sbd_error_t		*err;
6863 	sc_gptwocfg_cookie_t	scc;
6864 
6865 	if (!DRMACH_IS_BOARD_ID(id))
6866 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6867 
6868 	/* do saf configurator stuff */
6869 	DRMACH_PR("calling sc_probe_board for bnum=%d\n", bp->bnum);
6870 	scc = sc_probe_board(bp->bnum);
6871 	if (scc == NULL) {
6872 		err = drerr_new(0, ESTC_PROBE, bp->cm.name);
6873 		return (err);
6874 	}
6875 
6876 	return (err);
6877 }
6878 
6879 /*ARGSUSED*/
6880 static sbd_error_t *
6881 drmach_pt_ikdeprobe(drmachid_t id, drmach_opts_t *opts)
6882 {
6883 	drmach_board_t	*bp;
6884 	sbd_error_t	*err = NULL;
6885 	sc_gptwocfg_cookie_t	scc;
6886 
6887 	if (!DRMACH_IS_BOARD_ID(id))
6888 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6889 	bp = id;
6890 
6891 	cmn_err(CE_CONT, "DR: in-kernel unprobe board %d\n", bp->bnum);
6892 	scc = sc_unprobe_board(bp->bnum);
6893 	if (scc != NULL) {
6894 		err = drerr_new(0, ESTC_DEPROBE, bp->cm.name);
6895 	}
6896 
6897 	if (err == NULL)
6898 		err = drmach_board_deprobe(id);
6899 
6900 	return (err);
6901 }
6902 
6903 static sbd_error_t *
6904 drmach_pt_readmem(drmachid_t id, drmach_opts_t *opts)
6905 {
6906 	_NOTE(ARGUNUSED(id))
6907 	_NOTE(ARGUNUSED(opts))
6908 
6909 	struct memlist	*ml;
6910 	uint64_t	src_pa;
6911 	uint64_t	dst_pa;
6912 	uint64_t	dst;
6913 
6914 	dst_pa = va_to_pa(&dst);
6915 
6916 	memlist_read_lock();
6917 	for (ml = phys_install; ml; ml = ml->ml_next) {
6918 		uint64_t	nbytes;
6919 
6920 		src_pa = ml->ml_address;
6921 		nbytes = ml->ml_size;
6922 
6923 		while (nbytes != 0ull) {
6924 
6925 			/* copy 32 bytes at src_pa to dst_pa */
6926 			bcopy32_il(src_pa, dst_pa);
6927 
6928 			/* increment by 32 bytes */
6929 			src_pa += (4 * sizeof (uint64_t));
6930 
6931 			/* decrement by 32 bytes */
6932 			nbytes -= (4 * sizeof (uint64_t));
6933 		}
6934 	}
6935 	memlist_read_unlock();
6936 
6937 	return (NULL);
6938 }
6939 
6940 static sbd_error_t *
6941 drmach_pt_recovercpu(drmachid_t id, drmach_opts_t *opts)
6942 {
6943 	_NOTE(ARGUNUSED(opts))
6944 
6945 	drmach_cpu_t	*cp;
6946 
6947 	if (!DRMACH_IS_CPU_ID(id))
6948 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6949 	cp = id;
6950 
6951 	mutex_enter(&cpu_lock);
6952 	(void) drmach_iocage_cpu_return(&(cp->dev),
6953 	    CPU_ENABLE | CPU_EXISTS | CPU_READY | CPU_RUNNING);
6954 	mutex_exit(&cpu_lock);
6955 
6956 	return (NULL);
6957 }
6958 
6959 /*
6960  * Starcat DR passthrus are for debugging purposes only.
6961  */
6962 static struct {
6963 	const char	*name;
6964 	sbd_error_t	*(*handler)(drmachid_t id, drmach_opts_t *opts);
6965 } drmach_pt_arr[] = {
6966 	{ "showlpa",		drmach_pt_showlpa		},
6967 	{ "ikprobe",		drmach_pt_ikprobe		},
6968 	{ "ikdeprobe",		drmach_pt_ikdeprobe		},
6969 	{ "readmem",		drmach_pt_readmem		},
6970 	{ "recovercpu",		drmach_pt_recovercpu		},
6971 
6972 	/* the following line must always be last */
6973 	{ NULL,			NULL				}
6974 };
6975 
6976 /*ARGSUSED*/
6977 sbd_error_t *
6978 drmach_passthru(drmachid_t id, drmach_opts_t *opts)
6979 {
6980 	int		i;
6981 	sbd_error_t	*err;
6982 
6983 	i = 0;
6984 	while (drmach_pt_arr[i].name != NULL) {
6985 		int len = strlen(drmach_pt_arr[i].name);
6986 
6987 		if (strncmp(drmach_pt_arr[i].name, opts->copts, len) == 0)
6988 			break;
6989 
6990 		i += 1;
6991 	}
6992 
6993 	if (drmach_pt_arr[i].name == NULL)
6994 		err = drerr_new(0, ESTC_UNKPTCMD, opts->copts);
6995 	else
6996 		err = (*drmach_pt_arr[i].handler)(id, opts);
6997 
6998 	return (err);
6999 }
7000 
7001 sbd_error_t *
7002 drmach_release(drmachid_t id)
7003 {
7004 	drmach_common_t *cp;
7005 
7006 	if (!DRMACH_IS_DEVICE_ID(id))
7007 		return (drerr_new(0, ESTC_INAPPROP, NULL));
7008 	cp = id;
7009 
7010 	return (cp->release(id));
7011 }
7012 
7013 sbd_error_t *
7014 drmach_status(drmachid_t id, drmach_status_t *stat)
7015 {
7016 	drmach_common_t *cp;
7017 	sbd_error_t	*err;
7018 
7019 	rw_enter(&drmach_boards_rwlock, RW_READER);
7020 
7021 	if (!DRMACH_IS_ID(id)) {
7022 		rw_exit(&drmach_boards_rwlock);
7023 		return (drerr_new(0, ESTC_NOTID, NULL));
7024 	}
7025 
7026 	cp = id;
7027 
7028 	err = cp->status(id, stat);
7029 	rw_exit(&drmach_boards_rwlock);
7030 	return (err);
7031 }
7032 
7033 static sbd_error_t *
7034 drmach_i_status(drmachid_t id, drmach_status_t *stat)
7035 {
7036 	drmach_common_t *cp;
7037 
7038 	if (!DRMACH_IS_ID(id))
7039 		return (drerr_new(0, ESTC_NOTID, NULL));
7040 	cp = id;
7041 
7042 	return (cp->status(id, stat));
7043 }
7044 
7045 /*ARGSUSED*/
7046 sbd_error_t *
7047 drmach_unconfigure(drmachid_t id, int flags)
7048 {
7049 	drmach_device_t	*dp;
7050 	dev_info_t 	*rdip;
7051 
7052 	char	name[OBP_MAXDRVNAME];
7053 	int rv;
7054 
7055 	/*
7056 	 * Since CPU nodes are not configured, it is
7057 	 * necessary to skip the unconfigure step as
7058 	 * well.
7059 	 */
7060 	if (DRMACH_IS_CPU_ID(id)) {
7061 		return (NULL);
7062 	}
7063 
7064 	for (; id; ) {
7065 		dev_info_t	*fdip = NULL;
7066 
7067 		if (!DRMACH_IS_DEVICE_ID(id))
7068 			return (drerr_new(0, ESTC_INAPPROP, NULL));
7069 		dp = id;
7070 
7071 		rdip = dp->node->n_getdip(dp->node);
7072 
7073 		/*
7074 		 * drmach_unconfigure() is always called on a configured branch.
7075 		 * So the root of the branch was held earlier and must exist.
7076 		 */
7077 		ASSERT(rdip);
7078 
7079 		DRMACH_PR("drmach_unconfigure: unconfiguring DDI branch");
7080 
7081 		rv = dp->node->n_getprop(dp->node,
7082 		    "name", name, OBP_MAXDRVNAME);
7083 
7084 		/* The node must have a name */
7085 		if (rv)
7086 			return (0);
7087 
7088 		if (drmach_name2type_idx(name) < 0) {
7089 			if (DRMACH_IS_MEM_ID(id)) {
7090 				drmach_mem_t	*mp = id;
7091 				id = mp->next;
7092 			} else {
7093 				id = NULL;
7094 			}
7095 			continue;
7096 		}
7097 
7098 		/*
7099 		 * NOTE: FORCE flag is no longer needed under devfs
7100 		 */
7101 		ASSERT(e_ddi_branch_held(rdip));
7102 		if (e_ddi_branch_unconfigure(rdip, &fdip, 0) != 0) {
7103 			sbd_error_t *err = NULL;
7104 			char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
7105 
7106 			/*
7107 			 * If non-NULL, fdip is returned held and must be
7108 			 * released.
7109 			 */
7110 			if (fdip != NULL) {
7111 				(void) ddi_pathname(fdip, path);
7112 				ddi_release_devi(fdip);
7113 			} else {
7114 				(void) ddi_pathname(rdip, path);
7115 			}
7116 
7117 			err = drerr_new(1, ESTC_DRVFAIL, path);
7118 
7119 			kmem_free(path, MAXPATHLEN);
7120 
7121 			/*
7122 			 * If we were unconfiguring an IO board, a call was
7123 			 * made to man_dr_detach.  We now need to call
7124 			 * man_dr_attach to regain man use of the eri.
7125 			 */
7126 			if (DRMACH_IS_IO_ID(id)) {
7127 				int (*func)(dev_info_t *dip);
7128 
7129 				func = (int (*)(dev_info_t *))kobj_getsymvalue\
7130 				    ("man_dr_attach", 0);
7131 
7132 				if (func) {
7133 					drmach_io_inst_t ios;
7134 					dev_info_t 	*pdip;
7135 					int		circ;
7136 
7137 					/*
7138 					 * Walk device tree to find rio dip for
7139 					 * the board
7140 					 * Since we are not interested in iosram
7141 					 * instance here, initialize it to 0, so
7142 					 * that the walk terminates as soon as
7143 					 * eri dip is found.
7144 					 */
7145 					ios.iosram_inst = 0;
7146 					ios.eri_dip = NULL;
7147 					ios.bnum = dp->bp->bnum;
7148 
7149 					if (pdip = ddi_get_parent(rdip)) {
7150 						ndi_hold_devi(pdip);
7151 						ndi_devi_enter(pdip, &circ);
7152 					}
7153 					/*
7154 					 * Root node doesn't have to be held in
7155 					 * any way.
7156 					 */
7157 					ASSERT(e_ddi_branch_held(rdip));
7158 					ddi_walk_devs(rdip,
7159 					    drmach_board_find_io_insts,
7160 					    (void *)&ios);
7161 
7162 					DRMACH_PR("drmach_unconfigure: bnum=%d"
7163 					    " eri=0x%p\n",
7164 					    ios.bnum, (void *)ios.eri_dip);
7165 
7166 					if (pdip) {
7167 						ndi_devi_exit(pdip, circ);
7168 						ndi_rele_devi(pdip);
7169 					}
7170 
7171 					if (ios.eri_dip) {
7172 						DRMACH_PR("calling"
7173 						    " man_dr_attach\n");
7174 						(void) (*func)(ios.eri_dip);
7175 						/*
7176 						 * Release hold acquired in
7177 						 * drmach_board_find_io_insts()
7178 						 */
7179 						ndi_rele_devi(ios.eri_dip);
7180 					}
7181 				}
7182 			}
7183 			return (err);
7184 		}
7185 
7186 		if (DRMACH_IS_MEM_ID(id)) {
7187 			drmach_mem_t	*mp = id;
7188 			id = mp->next;
7189 		} else {
7190 			id = NULL;
7191 		}
7192 	}
7193 
7194 	return (NULL);
7195 }
7196 
7197 /*
7198  * drmach interfaces to legacy Starfire platmod logic
7199  * linkage via runtime symbol look up, called from plat_cpu_power*
7200  */
7201 
7202 /*
7203  * Start up a cpu.  It is possible that we're attempting to restart
7204  * the cpu after an UNCONFIGURE in which case the cpu will be
7205  * spinning in its cache.  So, all we have to do is wakeup him up.
7206  * Under normal circumstances the cpu will be coming from a previous
7207  * CONNECT and thus will be spinning in OBP.  In both cases, the
7208  * startup sequence is the same.
7209  */
7210 int
7211 drmach_cpu_poweron(struct cpu *cp)
7212 {
7213 	DRMACH_PR("drmach_cpu_poweron: starting cpuid %d\n", cp->cpu_id);
7214 
7215 	ASSERT(MUTEX_HELD(&cpu_lock));
7216 
7217 	if (drmach_cpu_start(cp) != 0)
7218 		return (EBUSY);
7219 	else
7220 		return (0);
7221 }
7222 
7223 int
7224 drmach_cpu_poweroff(struct cpu *cp)
7225 {
7226 	int		ntries;
7227 	processorid_t	cpuid;
7228 	void		drmach_cpu_shutdown_self(void);
7229 
7230 	DRMACH_PR("drmach_cpu_poweroff: stopping cpuid %d\n", cp->cpu_id);
7231 
7232 	ASSERT(MUTEX_HELD(&cpu_lock));
7233 
7234 	/*
7235 	 * XXX CHEETAH SUPPORT
7236 	 * for cheetah, we need to grab the iocage lock since iocage
7237 	 * memory is used for e$ flush.
7238 	 */
7239 	if (drmach_is_cheetah) {
7240 		mutex_enter(&drmach_iocage_lock);
7241 		while (drmach_iocage_is_busy)
7242 			cv_wait(&drmach_iocage_cv, &drmach_iocage_lock);
7243 		drmach_iocage_is_busy = 1;
7244 		drmach_iocage_mem_scrub(ecache_size * 2);
7245 		mutex_exit(&drmach_iocage_lock);
7246 	}
7247 
7248 	cpuid = cp->cpu_id;
7249 
7250 	/*
7251 	 * Set affinity to ensure consistent reading and writing of
7252 	 * drmach_xt_mb[cpuid] by one "master" CPU directing
7253 	 * the shutdown of the target CPU.
7254 	 */
7255 	affinity_set(CPU->cpu_id);
7256 
7257 	/*
7258 	 * Capture all CPUs (except for detaching proc) to prevent
7259 	 * crosscalls to the detaching proc until it has cleared its
7260 	 * bit in cpu_ready_set.
7261 	 *
7262 	 * The CPUs remain paused and the prom_mutex is known to be free.
7263 	 * This prevents blocking when doing prom IEEE-1275 calls at a
7264 	 * high PIL level.
7265 	 */
7266 	promsafe_pause_cpus();
7267 
7268 	/*
7269 	 * Quiesce interrupts on the target CPU. We do this by setting
7270 	 * the CPU 'not ready'- (i.e. removing the CPU from cpu_ready_set) to
7271 	 * prevent it from receiving cross calls and cross traps.
7272 	 * This prevents the processor from receiving any new soft interrupts.
7273 	 */
7274 	mp_cpu_quiesce(cp);
7275 
7276 	(void) prom_hotremovecpu(cpuid);
7277 
7278 	start_cpus();
7279 
7280 	/* setup xt_mb, will be cleared by drmach_shutdown_asm when ready */
7281 	drmach_xt_mb[cpuid] = 0x80;
7282 
7283 	xt_one_unchecked(cp->cpu_id, (xcfunc_t *)idle_stop_xcall,
7284 	    (uint64_t)drmach_cpu_shutdown_self, NULL);
7285 
7286 	ntries = drmach_cpu_ntries;
7287 	while (drmach_xt_mb[cpuid] && ntries) {
7288 		DELAY(drmach_cpu_delay);
7289 		ntries--;
7290 	}
7291 
7292 	drmach_xt_mb[cpuid] = 0;	/* steal the cache line back */
7293 
7294 	membar_sync();			/* make sure copy-back retires */
7295 
7296 	affinity_clear();
7297 
7298 	/*
7299 	 * XXX CHEETAH SUPPORT
7300 	 */
7301 	if (drmach_is_cheetah) {
7302 		mutex_enter(&drmach_iocage_lock);
7303 		drmach_iocage_mem_scrub(ecache_size * 2);
7304 		drmach_iocage_is_busy = 0;
7305 		cv_signal(&drmach_iocage_cv);
7306 		mutex_exit(&drmach_iocage_lock);
7307 	}
7308 
7309 	DRMACH_PR("waited %d out of %d tries for "
7310 	    "drmach_cpu_shutdown_self on cpu%d",
7311 	    drmach_cpu_ntries - ntries, drmach_cpu_ntries, cp->cpu_id);
7312 
7313 	/*
7314 	 * Do this here instead of drmach_cpu_shutdown_self() to
7315 	 * avoid an assertion failure panic in turnstile.c.
7316 	 */
7317 	CPU_SIGNATURE(OS_SIG, SIGST_DETACHED, SIGSUBST_NULL, cpuid);
7318 
7319 	return (0);
7320 }
7321 
7322 void
7323 drmach_iocage_mem_scrub(uint64_t nbytes)
7324 {
7325 	extern uint32_t drmach_bc_bzero(void*, size_t);
7326 	uint32_t	rv;
7327 
7328 	ASSERT(MUTEX_HELD(&cpu_lock));
7329 
7330 	affinity_set(CPU->cpu_id);
7331 
7332 	rv = drmach_bc_bzero(drmach_iocage_vaddr, nbytes);
7333 	if (rv != 0) {
7334 		DRMACH_PR(
7335 		"iocage scrub failed, drmach_bc_bzero returned %d\n", rv);
7336 		rv = drmach_bc_bzero(drmach_iocage_vaddr, drmach_iocage_size);
7337 		if (rv != 0)
7338 			cmn_err(CE_PANIC,
7339 			    "iocage scrub failed, drmach_bc_bzero rv=%d\n",
7340 			    rv);
7341 	}
7342 
7343 	cpu_flush_ecache();
7344 
7345 	affinity_clear();
7346 }
7347 
7348 #define	ALIGN(x, a)	((a) == 0 ? (uintptr_t)(x) : \
7349 	(((uintptr_t)(x) + (uintptr_t)(a) - 1l) & ~((uintptr_t)(a) - 1l)))
7350 
7351 static sbd_error_t *
7352 drmach_iocage_mem_get(dr_testboard_req_t *tbrq)
7353 {
7354 	pfn_t		basepfn;
7355 	pgcnt_t		npages;
7356 	extern int	memscrub_delete_span(pfn_t, pgcnt_t);
7357 	uint64_t	drmach_iocage_paddr_mbytes;
7358 
7359 	ASSERT(drmach_iocage_paddr != -1);
7360 
7361 	basepfn = (pfn_t)(drmach_iocage_paddr >> PAGESHIFT);
7362 	npages = (pgcnt_t)(drmach_iocage_size >> PAGESHIFT);
7363 
7364 	(void) memscrub_delete_span(basepfn, npages);
7365 
7366 	mutex_enter(&cpu_lock);
7367 	drmach_iocage_mem_scrub(drmach_iocage_size);
7368 	mutex_exit(&cpu_lock);
7369 
7370 	/*
7371 	 * HPOST wants the address of the cage to be 64 megabyte-aligned
7372 	 * and in megabyte units.
7373 	 * The size of the cage is also in megabyte units.
7374 	 */
7375 	ASSERT(drmach_iocage_paddr == ALIGN(drmach_iocage_paddr, 0x4000000));
7376 
7377 	drmach_iocage_paddr_mbytes = drmach_iocage_paddr / 0x100000;
7378 
7379 	tbrq->memaddrhi = (uint32_t)(drmach_iocage_paddr_mbytes >> 32);
7380 	tbrq->memaddrlo = (uint32_t)drmach_iocage_paddr_mbytes;
7381 	tbrq->memlen = drmach_iocage_size / 0x100000;
7382 
7383 	DRMACH_PR("drmach_iocage_mem_get: hi: 0x%x", tbrq->memaddrhi);
7384 	DRMACH_PR("drmach_iocage_mem_get: lo: 0x%x", tbrq->memaddrlo);
7385 	DRMACH_PR("drmach_iocage_mem_get: size: 0x%x", tbrq->memlen);
7386 
7387 	return (NULL);
7388 }
7389 
7390 static sbd_error_t *
7391 drmach_iocage_mem_return(dr_testboard_reply_t *tbr)
7392 {
7393 	_NOTE(ARGUNUSED(tbr))
7394 
7395 	pfn_t		basepfn;
7396 	pgcnt_t		npages;
7397 	extern int	memscrub_add_span(pfn_t, pgcnt_t);
7398 
7399 	ASSERT(drmach_iocage_paddr != -1);
7400 
7401 	basepfn = (pfn_t)(drmach_iocage_paddr >> PAGESHIFT);
7402 	npages = (pgcnt_t)(drmach_iocage_size >> PAGESHIFT);
7403 
7404 	(void) memscrub_add_span(basepfn, npages);
7405 
7406 	mutex_enter(&cpu_lock);
7407 	mutex_enter(&drmach_iocage_lock);
7408 	drmach_iocage_mem_scrub(drmach_iocage_size);
7409 	drmach_iocage_is_busy = 0;
7410 	cv_signal(&drmach_iocage_cv);
7411 	mutex_exit(&drmach_iocage_lock);
7412 	mutex_exit(&cpu_lock);
7413 
7414 	return (NULL);
7415 }
7416 
7417 static int
7418 drmach_cpu_intr_disable(cpu_t *cp)
7419 {
7420 	if (cpu_intr_disable(cp) != 0)
7421 		return (-1);
7422 	return (0);
7423 }
7424 
7425 static int
7426 drmach_iocage_cpu_acquire(drmach_device_t *dp, cpu_flag_t *oflags)
7427 {
7428 	struct cpu	*cp;
7429 	processorid_t	cpuid;
7430 	static char	*fn = "drmach_iocage_cpu_acquire";
7431 	sbd_error_t 	*err;
7432 	int 		impl;
7433 
7434 	ASSERT(DRMACH_IS_CPU_ID(dp));
7435 	ASSERT(MUTEX_HELD(&cpu_lock));
7436 
7437 	cpuid = ((drmach_cpu_t *)dp)->cpuid;
7438 
7439 	DRMACH_PR("%s: attempting to acquire CPU id %d", fn, cpuid);
7440 
7441 	if (dp->busy)
7442 		return (-1);
7443 
7444 	if ((cp = cpu_get(cpuid)) == NULL) {
7445 		DRMACH_PR("%s: cpu_get(%d) returned NULL", fn, cpuid);
7446 		return (-1);
7447 	}
7448 
7449 	if (!CPU_ACTIVE(cp)) {
7450 		DRMACH_PR("%s: skipping offlined CPU id %d", fn, cpuid);
7451 		return (-1);
7452 	}
7453 
7454 	/*
7455 	 * There is a known HW bug where a Jaguar CPU in Safari port 0 (SBX/P0)
7456 	 * can fail to receive an XIR. To workaround this issue until a hardware
7457 	 * fix is implemented, we will exclude the selection of these CPUs.
7458 	 *
7459 	 * Once a fix is implemented in hardware, this code should be updated
7460 	 * to allow Jaguar CPUs that have the fix to be used. However, support
7461 	 * must be retained to skip revisions that do not have this fix.
7462 	 */
7463 
7464 	err = drmach_cpu_get_impl(dp, &impl);
7465 	if (err) {
7466 		DRMACH_PR("%s: error getting impl. of CPU id %d", fn, cpuid);
7467 		sbd_err_clear(&err);
7468 		return (-1);
7469 	}
7470 
7471 	if (IS_JAGUAR(impl) && (STARCAT_CPUID_TO_LPORT(cpuid) == 0) &&
7472 	    drmach_iocage_exclude_jaguar_port_zero) {
7473 		DRMACH_PR("%s: excluding CPU id %d: port 0 on jaguar",
7474 		    fn, cpuid);
7475 		return (-1);
7476 	}
7477 
7478 	ASSERT(oflags);
7479 	*oflags = cp->cpu_flags;
7480 
7481 	if (cpu_offline(cp, 0)) {
7482 		DRMACH_PR("%s: cpu_offline failed for CPU id %d", fn, cpuid);
7483 		return (-1);
7484 	}
7485 
7486 	if (cpu_poweroff(cp)) {
7487 		DRMACH_PR("%s: cpu_poweroff failed for CPU id %d", fn, cpuid);
7488 		if (cpu_online(cp)) {
7489 			cmn_err(CE_WARN, "failed to online CPU id %d "
7490 			    "during I/O cage test selection", cpuid);
7491 		}
7492 		if (CPU_ACTIVE(cp) && cpu_flagged_nointr(*oflags) &&
7493 		    drmach_cpu_intr_disable(cp) != 0) {
7494 			cmn_err(CE_WARN, "failed to restore CPU id %d "
7495 			    "no-intr during I/O cage test selection", cpuid);
7496 		}
7497 		return (-1);
7498 	}
7499 
7500 	if (cpu_unconfigure(cpuid)) {
7501 		DRMACH_PR("%s: cpu_unconfigure failed for CPU id %d", fn,
7502 		    cpuid);
7503 		(void) cpu_configure(cpuid);
7504 		if ((cp = cpu_get(cpuid)) == NULL) {
7505 			cmn_err(CE_WARN, "failed to reconfigure CPU id %d "
7506 			    "during I/O cage test selection", cpuid);
7507 			dp->busy = 1;
7508 			return (-1);
7509 		}
7510 		if (cpu_poweron(cp) || cpu_online(cp)) {
7511 			cmn_err(CE_WARN, "failed to %s CPU id %d "
7512 			    "during I/O cage test selection",
7513 			    cpu_is_poweredoff(cp) ?
7514 			    "poweron" : "online", cpuid);
7515 		}
7516 		if (CPU_ACTIVE(cp) && cpu_flagged_nointr(*oflags) &&
7517 		    drmach_cpu_intr_disable(cp) != 0) {
7518 			cmn_err(CE_WARN, "failed to restore CPU id %d "
7519 			    "no-intr during I/O cage test selection", cpuid);
7520 		}
7521 		return (-1);
7522 	}
7523 
7524 	dp->busy = 1;
7525 
7526 	DRMACH_PR("%s: acquired CPU id %d", fn, cpuid);
7527 
7528 	return (0);
7529 }
7530 
7531 /*
7532  * Attempt to acquire all the CPU devices passed in. It is
7533  * assumed that all the devices in the list are the cores of
7534  * a single CMP device. Non CMP devices can be handled as a
7535  * single core CMP by passing in a one element list.
7536  *
7537  * Success is only returned if *all* the devices in the list
7538  * can be acquired. In the failure case, none of the devices
7539  * in the list will be held as acquired.
7540  */
7541 static int
7542 drmach_iocage_cmp_acquire(drmach_device_t **dpp, cpu_flag_t *oflags)
7543 {
7544 	int	curr;
7545 	int	i;
7546 	int	rv = 0;
7547 
7548 	ASSERT((dpp != NULL) && (*dpp != NULL));
7549 
7550 	/*
7551 	 * Walk the list of CPU devices (cores of a CMP)
7552 	 * and attempt to acquire them. Bail out if an
7553 	 * error is encountered.
7554 	 */
7555 	for (curr = 0; curr < MAX_CORES_PER_CMP; curr++) {
7556 
7557 		/* check for the end of the list */
7558 		if (dpp[curr] == NULL) {
7559 			break;
7560 		}
7561 
7562 		ASSERT(DRMACH_IS_CPU_ID(dpp[curr]));
7563 		ASSERT(dpp[curr]->portid == (*dpp)->portid);
7564 
7565 		rv = drmach_iocage_cpu_acquire(dpp[curr], &oflags[curr]);
7566 		if (rv != 0) {
7567 			break;
7568 		}
7569 	}
7570 
7571 	/*
7572 	 * Check for an error.
7573 	 */
7574 	if (rv != 0) {
7575 		/*
7576 		 * Make a best effort attempt to return any cores
7577 		 * that were already acquired before the error was
7578 		 * encountered.
7579 		 */
7580 		for (i = 0; i < curr; i++) {
7581 			(void) drmach_iocage_cpu_return(dpp[i], oflags[i]);
7582 		}
7583 	}
7584 
7585 	return (rv);
7586 }
7587 
7588 static int
7589 drmach_iocage_cpu_return(drmach_device_t *dp, cpu_flag_t oflags)
7590 {
7591 	processorid_t	cpuid;
7592 	struct cpu	*cp;
7593 	int		rv = 0;
7594 	static char	*fn = "drmach_iocage_cpu_return";
7595 
7596 	ASSERT(DRMACH_IS_CPU_ID(dp));
7597 	ASSERT(MUTEX_HELD(&cpu_lock));
7598 
7599 	cpuid = ((drmach_cpu_t *)dp)->cpuid;
7600 
7601 	DRMACH_PR("%s: attempting to return CPU id: %d", fn, cpuid);
7602 
7603 	if (cpu_configure(cpuid)) {
7604 		cmn_err(CE_WARN, "failed to reconfigure CPU id %d "
7605 		    "after I/O cage test", cpuid);
7606 		/*
7607 		 * The component was never set to unconfigured during the IO
7608 		 * cage test, so we need to leave marked as busy to prevent
7609 		 * further DR operations involving this component.
7610 		 */
7611 		return (-1);
7612 	}
7613 
7614 	if ((cp = cpu_get(cpuid)) == NULL) {
7615 		cmn_err(CE_WARN, "cpu_get failed on CPU id %d after "
7616 		    "I/O cage test", cpuid);
7617 		dp->busy = 0;
7618 		return (-1);
7619 	}
7620 
7621 	if (cpu_poweron(cp) || cpu_online(cp)) {
7622 		cmn_err(CE_WARN, "failed to %s CPU id %d after I/O "
7623 		    "cage test", cpu_is_poweredoff(cp) ?
7624 		    "poweron" : "online", cpuid);
7625 		rv = -1;
7626 	}
7627 
7628 	/*
7629 	 * drmach_iocage_cpu_acquire will accept cpus in state P_ONLINE or
7630 	 * P_NOINTR. Need to return to previous user-visible state.
7631 	 */
7632 	if (CPU_ACTIVE(cp) && cpu_flagged_nointr(oflags) &&
7633 	    drmach_cpu_intr_disable(cp) != 0) {
7634 		cmn_err(CE_WARN, "failed to restore CPU id %d "
7635 		    "no-intr after I/O cage test", cpuid);
7636 		rv = -1;
7637 	}
7638 
7639 	dp->busy = 0;
7640 
7641 	DRMACH_PR("%s: returned CPU id: %d", fn, cpuid);
7642 
7643 	return (rv);
7644 }
7645 
7646 static sbd_error_t *
7647 drmach_iocage_cpu_get(dr_testboard_req_t *tbrq, drmach_device_t **dpp,
7648     cpu_flag_t *oflags)
7649 {
7650 	drmach_board_t	*bp;
7651 	int		b_rv;
7652 	int		b_idx;
7653 	drmachid_t	b_id;
7654 	int		found;
7655 
7656 	mutex_enter(&cpu_lock);
7657 
7658 	ASSERT(drmach_boards != NULL);
7659 
7660 	found = 0;
7661 
7662 	/*
7663 	 * Walk the board list.
7664 	 */
7665 	b_rv = drmach_array_first(drmach_boards, &b_idx, &b_id);
7666 
7667 	while (b_rv == 0) {
7668 
7669 		int		d_rv;
7670 		int		d_idx;
7671 		drmachid_t	d_id;
7672 
7673 		bp = b_id;
7674 
7675 		if (bp->connected == 0 || bp->devices == NULL) {
7676 			b_rv = drmach_array_next(drmach_boards, &b_idx, &b_id);
7677 			continue;
7678 		}
7679 
7680 		/* An AXQ restriction disqualifies MCPU's as candidates. */
7681 		if (DRMACH_BNUM2SLOT(bp->bnum) == 1) {
7682 			b_rv = drmach_array_next(drmach_boards, &b_idx, &b_id);
7683 			continue;
7684 		}
7685 
7686 		/*
7687 		 * Walk the device list of this board.
7688 		 */
7689 		d_rv = drmach_array_first(bp->devices, &d_idx, &d_id);
7690 
7691 		while (d_rv == 0) {
7692 
7693 			drmach_device_t	*ndp;
7694 
7695 			/* only interested in CPU devices */
7696 			if (!DRMACH_IS_CPU_ID(d_id)) {
7697 				d_rv = drmach_array_next(bp->devices, &d_idx,
7698 				    &d_id);
7699 				continue;
7700 			}
7701 
7702 			/*
7703 			 * The following code assumes two properties
7704 			 * of a CMP device:
7705 			 *
7706 			 *   1. All cores of a CMP are grouped together
7707 			 *	in the device list.
7708 			 *
7709 			 *   2. There will only be a maximum of two cores
7710 			 *	present in the CMP.
7711 			 *
7712 			 * If either of these two properties change,
7713 			 * this code will have to be revisited.
7714 			 */
7715 
7716 			dpp[0] = d_id;
7717 			dpp[1] = NULL;
7718 
7719 			/*
7720 			 * Get the next device. It may or may not be used.
7721 			 */
7722 			d_rv = drmach_array_next(bp->devices, &d_idx, &d_id);
7723 			ndp = d_id;
7724 
7725 			if ((d_rv == 0) && DRMACH_IS_CPU_ID(d_id)) {
7726 				/*
7727 				 * The second device is only interesting for
7728 				 * this pass if it has the same portid as the
7729 				 * first device. This implies that both are
7730 				 * cores of the same CMP.
7731 				 */
7732 				if (dpp[0]->portid == ndp->portid) {
7733 					dpp[1] = d_id;
7734 				}
7735 			}
7736 
7737 			/*
7738 			 * Attempt to acquire all cores of the CMP.
7739 			 */
7740 			if (drmach_iocage_cmp_acquire(dpp, oflags) == 0) {
7741 				found = 1;
7742 				break;
7743 			}
7744 
7745 			/*
7746 			 * Check if the search for the second core was
7747 			 * successful. If not, the next iteration should
7748 			 * use that device.
7749 			 */
7750 			if (dpp[1] == NULL) {
7751 				continue;
7752 			}
7753 
7754 			d_rv = drmach_array_next(bp->devices, &d_idx, &d_id);
7755 		}
7756 
7757 		if (found)
7758 			break;
7759 
7760 		b_rv = drmach_array_next(drmach_boards, &b_idx, &b_id);
7761 	}
7762 
7763 	mutex_exit(&cpu_lock);
7764 
7765 	if (!found) {
7766 		return (drerr_new(1, ESTC_IOCAGE_NO_CPU_AVAIL, NULL));
7767 	}
7768 
7769 	tbrq->cpu_portid = (*dpp)->portid;
7770 
7771 	return (NULL);
7772 }
7773 
7774 /*
7775  * Setup an iocage by acquiring a cpu and memory.
7776  */
7777 static sbd_error_t *
7778 drmach_iocage_setup(dr_testboard_req_t *tbrq, drmach_device_t **dpp,
7779     cpu_flag_t *oflags)
7780 {
7781 	sbd_error_t *err;
7782 
7783 	err = drmach_iocage_cpu_get(tbrq, dpp, oflags);
7784 	if (!err) {
7785 		mutex_enter(&drmach_iocage_lock);
7786 		while (drmach_iocage_is_busy)
7787 			cv_wait(&drmach_iocage_cv, &drmach_iocage_lock);
7788 		drmach_iocage_is_busy = 1;
7789 		mutex_exit(&drmach_iocage_lock);
7790 		err = drmach_iocage_mem_get(tbrq);
7791 		if (err) {
7792 			mutex_enter(&drmach_iocage_lock);
7793 			drmach_iocage_is_busy = 0;
7794 			cv_signal(&drmach_iocage_cv);
7795 			mutex_exit(&drmach_iocage_lock);
7796 		}
7797 	}
7798 	return (err);
7799 }
7800 
7801 #define	DRMACH_SCHIZO_PCI_LEAF_MAX	2
7802 #define	DRMACH_SCHIZO_PCI_SLOT_MAX	8
7803 #define	DRMACH_S1P_SAMPLE_MAX		2
7804 
7805 typedef enum {
7806 	DRMACH_POST_SUSPEND = 0,
7807 	DRMACH_PRE_RESUME
7808 } drmach_sr_iter_t;
7809 
7810 typedef struct {
7811 	dev_info_t	*dip;
7812 	uint32_t	portid;
7813 	uint32_t	pcr_sel_save;
7814 	uint32_t	pic_l2_io_q[DRMACH_S1P_SAMPLE_MAX];
7815 	uint64_t	reg_basepa;
7816 } drmach_s1p_axq_t;
7817 
7818 typedef struct {
7819 	dev_info_t		*dip;
7820 	uint32_t		portid;
7821 	uint64_t		csr_basepa;
7822 	struct {
7823 		uint64_t 	slot_intr_state_diag;
7824 		uint64_t 	obio_intr_state_diag;
7825 		uint_t		nmap_regs;
7826 		uint64_t	*intr_map_regs;
7827 	} regs[DRMACH_S1P_SAMPLE_MAX];
7828 } drmach_s1p_pci_t;
7829 
7830 typedef struct {
7831 	uint64_t		csr_basepa;
7832 	struct {
7833 		uint64_t	csr;
7834 		uint64_t	errctrl;
7835 		uint64_t	errlog;
7836 	} regs[DRMACH_S1P_SAMPLE_MAX];
7837 	drmach_s1p_pci_t	pci[DRMACH_SCHIZO_PCI_LEAF_MAX];
7838 } drmach_s1p_schizo_t;
7839 
7840 typedef struct {
7841 	drmach_s1p_axq_t	axq;
7842 	drmach_s1p_schizo_t	schizo[STARCAT_SLOT1_IO_MAX];
7843 } drmach_slot1_pause_t;
7844 
7845 /*
7846  * Table of saved state for paused slot1 devices.
7847  */
7848 static drmach_slot1_pause_t *drmach_slot1_paused[STARCAT_BDSET_MAX];
7849 static int drmach_slot1_pause_init = 1;
7850 
7851 #ifdef DEBUG
7852 int drmach_slot1_pause_debug = 1;
7853 #else
7854 int drmach_slot1_pause_debug = 0;
7855 #endif /* DEBUG */
7856 
7857 static int
7858 drmach_is_slot1_pause_axq(dev_info_t *dip, char *name, int *id, uint64_t *reg)
7859 {
7860 	int		portid, exp, slot, i;
7861 	drmach_reg_t	regs[2];
7862 	int		reglen = sizeof (regs);
7863 
7864 	if ((portid = ddi_getprop(DDI_DEV_T_ANY, dip,
7865 	    DDI_PROP_DONTPASS, "portid", -1)) == -1) {
7866 		return (0);
7867 	}
7868 
7869 	exp = (portid >> 5) & 0x1f;
7870 	slot = portid & 0x1;
7871 
7872 	if (slot == 0 || strncmp(name, DRMACH_AXQ_NAMEPROP,
7873 	    strlen(DRMACH_AXQ_NAMEPROP))) {
7874 		return (0);
7875 	}
7876 
7877 	mutex_enter(&cpu_lock);
7878 	for (i = 0; i < STARCAT_SLOT1_CPU_MAX; i++) {
7879 		if (cpu[MAKE_CPUID(exp, slot, i)]) {
7880 			/* maxcat cpu present */
7881 			mutex_exit(&cpu_lock);
7882 			return (0);
7883 		}
7884 	}
7885 	mutex_exit(&cpu_lock);
7886 
7887 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
7888 	    "reg", (caddr_t)regs, &reglen) != DDI_PROP_SUCCESS) {
7889 		DRMACH_PR("drmach_is_slot1_pause_axq: no reg prop for "
7890 		    "axq dip=%p\n", (void *)dip);
7891 		return (0);
7892 	}
7893 
7894 	ASSERT(id && reg);
7895 	*reg = (uint64_t)regs[0].reg_addr_hi << 32;
7896 	*reg |= (uint64_t)regs[0].reg_addr_lo;
7897 	*id = portid;
7898 
7899 	return (1);
7900 }
7901 
7902 /*
7903  * Allocate an entry in the slot1_paused state table.
7904  */
7905 static void
7906 drmach_slot1_pause_add_axq(dev_info_t *axq_dip, char *axq_name, int axq_portid,
7907     uint64_t reg, drmach_slot1_pause_t **slot1_paused)
7908 {
7909 	int	axq_exp;
7910 	drmach_slot1_pause_t *slot1;
7911 
7912 	axq_exp = (axq_portid >> 5) & 0x1f;
7913 
7914 	ASSERT(axq_portid & 0x1);
7915 	ASSERT(slot1_paused[axq_exp] == NULL);
7916 	ASSERT(strncmp(axq_name, DRMACH_AXQ_NAMEPROP,
7917 	    strlen(DRMACH_AXQ_NAMEPROP)) == 0);
7918 
7919 	slot1 = kmem_zalloc(sizeof (*slot1), KM_SLEEP);
7920 
7921 	/*
7922 	 * XXX This dip should really be held (via ndi_hold_devi())
7923 	 * before saving it in the axq pause structure. However that
7924 	 * would prevent DR as the pause data structures persist until
7925 	 * the next suspend. drmach code should be modified to free the
7926 	 * the slot 1 pause data structures for a boardset when its
7927 	 * slot 1 board is DRed out. The dip can then be released via
7928 	 * ndi_rele_devi() when the pause data structure is freed
7929 	 * allowing DR to proceed. Until this change is made, drmach
7930 	 * code should be careful about dereferencing the saved dip
7931 	 * as it may no longer exist.
7932 	 */
7933 	slot1->axq.dip = axq_dip;
7934 	slot1->axq.portid = axq_portid;
7935 	slot1->axq.reg_basepa = reg;
7936 	slot1_paused[axq_exp] = slot1;
7937 }
7938 
7939 static void
7940 drmach_s1p_pci_free(drmach_s1p_pci_t *pci)
7941 {
7942 	int	i;
7943 
7944 	for (i = 0; i < DRMACH_S1P_SAMPLE_MAX; i++) {
7945 		if (pci->regs[i].intr_map_regs != NULL) {
7946 			ASSERT(pci->regs[i].nmap_regs > 0);
7947 			kmem_free(pci->regs[i].intr_map_regs,
7948 			    pci->regs[i].nmap_regs * sizeof (uint64_t));
7949 		}
7950 	}
7951 }
7952 
7953 static void
7954 drmach_slot1_pause_free(drmach_slot1_pause_t **slot1_paused)
7955 {
7956 	int	i, j, k;
7957 	drmach_slot1_pause_t *slot1;
7958 
7959 	for (i = 0; i < STARCAT_BDSET_MAX; i++) {
7960 		if ((slot1 = slot1_paused[i]) == NULL)
7961 			continue;
7962 
7963 		for (j = 0; j < STARCAT_SLOT1_IO_MAX; j++)
7964 			for (k = 0; k < DRMACH_SCHIZO_PCI_LEAF_MAX; k++)
7965 				drmach_s1p_pci_free(&slot1->schizo[j].pci[k]);
7966 
7967 		kmem_free(slot1, sizeof (*slot1));
7968 		slot1_paused[i] = NULL;
7969 	}
7970 }
7971 
7972 /*
7973  * Tree walk callback routine. If dip represents a Schizo PCI leaf,
7974  * fill in the appropriate info in the slot1_paused state table.
7975  */
7976 static int
7977 drmach_find_slot1_io(dev_info_t *dip, void *arg)
7978 {
7979 	int		portid, exp, ioc_unum, leaf_unum;
7980 	char		buf[OBP_MAXDRVNAME];
7981 	int		buflen = sizeof (buf);
7982 	drmach_reg_t	regs[3];
7983 	int		reglen = sizeof (regs);
7984 	uint32_t	leaf_offset;
7985 	uint64_t	schizo_csr_pa, pci_csr_pa;
7986 	drmach_s1p_pci_t *pci;
7987 	drmach_slot1_pause_t **slot1_paused = (drmach_slot1_pause_t **)arg;
7988 
7989 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
7990 	    "name", (caddr_t)buf, &buflen) != DDI_PROP_SUCCESS ||
7991 	    strncmp(buf, DRMACH_PCI_NAMEPROP, strlen(DRMACH_PCI_NAMEPROP))) {
7992 		return (DDI_WALK_CONTINUE);
7993 	}
7994 
7995 	if ((portid = ddi_getprop(DDI_DEV_T_ANY, dip,
7996 	    DDI_PROP_DONTPASS, "portid", -1)) == -1) {
7997 		return (DDI_WALK_CONTINUE);
7998 	}
7999 
8000 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
8001 	    "reg", (caddr_t)regs, &reglen) != DDI_PROP_SUCCESS) {
8002 		DRMACH_PR("drmach_find_slot1_io: no reg prop for pci "
8003 		    "dip=%p\n", (void *)dip);
8004 		return (DDI_WALK_CONTINUE);
8005 	}
8006 
8007 	exp = portid >> 5;
8008 	ioc_unum = portid & 0x1;
8009 	leaf_offset = regs[0].reg_addr_lo & 0x7fffff;
8010 	pci_csr_pa = (uint64_t)regs[0].reg_addr_hi << 32;
8011 	pci_csr_pa |= (uint64_t)regs[0].reg_addr_lo;
8012 	schizo_csr_pa = (uint64_t)regs[1].reg_addr_hi << 32;
8013 	schizo_csr_pa |= (uint64_t)regs[1].reg_addr_lo;
8014 
8015 	ASSERT(exp >= 0 && exp < STARCAT_BDSET_MAX);
8016 	ASSERT(slot1_paused[exp] != NULL);
8017 	ASSERT(leaf_offset == 0x600000 || leaf_offset == 0x700000);
8018 	ASSERT(slot1_paused[exp]->schizo[ioc_unum].csr_basepa == 0x0UL ||
8019 	    slot1_paused[exp]->schizo[ioc_unum].csr_basepa == schizo_csr_pa);
8020 
8021 	leaf_unum = (leaf_offset == 0x600000) ? 0 : 1;
8022 	slot1_paused[exp]->schizo[ioc_unum].csr_basepa = schizo_csr_pa;
8023 	pci = &slot1_paused[exp]->schizo[ioc_unum].pci[leaf_unum];
8024 
8025 	/*
8026 	 * XXX This dip should really be held (via ndi_hold_devi())
8027 	 * before saving it in the pci pause structure. However that
8028 	 * would prevent DR as the pause data structures persist until
8029 	 * the next suspend. drmach code should be modified to free the
8030 	 * the slot 1 pause data structures for a boardset when its
8031 	 * slot 1 board is DRed out. The dip can then be released via
8032 	 * ndi_rele_devi() when the pause data structure is freed
8033 	 * allowing DR to proceed. Until this change is made, drmach
8034 	 * code should be careful about dereferencing the saved dip as
8035 	 * it may no longer exist.
8036 	 */
8037 	pci->dip = dip;
8038 	pci->portid = portid;
8039 	pci->csr_basepa = pci_csr_pa;
8040 
8041 	DRMACH_PR("drmach_find_slot1_io: name=%s, portid=0x%x, dip=%p\n",
8042 	    buf, portid, (void *)dip);
8043 
8044 	return (DDI_WALK_PRUNECHILD);
8045 }
8046 
8047 static void
8048 drmach_slot1_pause_add_io(drmach_slot1_pause_t **slot1_paused)
8049 {
8050 	/*
8051 	 * Root node doesn't have to be held
8052 	 */
8053 	ddi_walk_devs(ddi_root_node(), drmach_find_slot1_io,
8054 	    (void *)slot1_paused);
8055 }
8056 
8057 /*
8058  * Save the interrupt mapping registers for each non-idle interrupt
8059  * represented by the bit pairs in the saved interrupt state
8060  * diagnostic registers for this PCI leaf.
8061  */
8062 static void
8063 drmach_s1p_intr_map_reg_save(drmach_s1p_pci_t *pci, drmach_sr_iter_t iter)
8064 {
8065 	int	 i, cnt, ino;
8066 	uint64_t reg;
8067 	char	 *dname;
8068 	uchar_t	 Xmits;
8069 
8070 	dname = ddi_binding_name(pci->dip);
8071 	Xmits = (strcmp(dname, XMITS_BINDING_NAME) == 0)  ?  1 : 0;
8072 
8073 	/*
8074 	 * 1st pass allocates, 2nd pass populates.
8075 	 */
8076 	for (i = 0; i < 2; i++) {
8077 		cnt = ino = 0;
8078 
8079 		/*
8080 		 * PCI slot interrupts
8081 		 */
8082 		reg = pci->regs[iter].slot_intr_state_diag;
8083 		while (reg) {
8084 			/*
8085 			 * Xmits Interrupt Number Offset(ino) Assignments
8086 			 *   00-17 PCI Slot Interrupts
8087 			 *   18-1f Not Used
8088 			 */
8089 			if ((Xmits) && (ino > 0x17))
8090 				break;
8091 			if ((reg & COMMON_CLEAR_INTR_REG_MASK) !=
8092 			    COMMON_CLEAR_INTR_REG_IDLE) {
8093 				if (i) {
8094 					pci->regs[iter].intr_map_regs[cnt] =
8095 					    lddphysio(pci->csr_basepa +
8096 					    SCHIZO_IB_INTR_MAP_REG_OFFSET +
8097 					    ino * sizeof (reg));
8098 				}
8099 				++cnt;
8100 			}
8101 			++ino;
8102 			reg >>= 2;
8103 		}
8104 
8105 		/*
8106 		 * Xmits Interrupt Number Offset(ino) Assignments
8107 		 *   20-2f Not Used
8108 		 *   30-37 Internal interrupts
8109 		 *   38-3e Not Used
8110 		 */
8111 		ino = (Xmits)  ?  0x30 : 0x20;
8112 
8113 		/*
8114 		 * OBIO and internal schizo interrupts
8115 		 * Each PCI leaf has a set of mapping registers for all
8116 		 * possible interrupt sources except the NewLink interrupts.
8117 		 */
8118 		reg = pci->regs[iter].obio_intr_state_diag;
8119 		while (reg && ino <= 0x38) {
8120 			if ((reg & COMMON_CLEAR_INTR_REG_MASK) !=
8121 			    COMMON_CLEAR_INTR_REG_IDLE) {
8122 				if (i) {
8123 					pci->regs[iter].intr_map_regs[cnt] =
8124 					    lddphysio(pci->csr_basepa +
8125 					    SCHIZO_IB_INTR_MAP_REG_OFFSET +
8126 					    ino * sizeof (reg));
8127 				}
8128 				++cnt;
8129 			}
8130 			++ino;
8131 			reg >>= 2;
8132 		}
8133 
8134 		if (!i) {
8135 			pci->regs[iter].nmap_regs = cnt;
8136 			pci->regs[iter].intr_map_regs =
8137 			    kmem_zalloc(cnt * sizeof (reg), KM_SLEEP);
8138 		}
8139 	}
8140 }
8141 
8142 static void
8143 drmach_s1p_axq_update(drmach_s1p_axq_t *axq, drmach_sr_iter_t iter)
8144 {
8145 	uint32_t	reg;
8146 
8147 	if (axq->reg_basepa == 0x0UL)
8148 		return;
8149 
8150 	if (iter == DRMACH_POST_SUSPEND) {
8151 		axq->pcr_sel_save = ldphysio(axq->reg_basepa +
8152 		    AXQ_SLOT1_PERFCNT_SEL);
8153 		/*
8154 		 * Select l2_io_queue counter by writing L2_IO_Q mux
8155 		 * input to bits 0-6 of perf cntr select reg.
8156 		 */
8157 		reg = axq->pcr_sel_save;
8158 		reg &= ~AXQ_PIC_CLEAR_MASK;
8159 		reg |= L2_IO_Q;
8160 
8161 		stphysio(axq->reg_basepa + AXQ_SLOT1_PERFCNT_SEL, reg);
8162 	}
8163 
8164 	axq->pic_l2_io_q[iter] = ldphysio(axq->reg_basepa + AXQ_SLOT1_PERFCNT0);
8165 
8166 	if (iter == DRMACH_PRE_RESUME) {
8167 		stphysio(axq->reg_basepa + AXQ_SLOT1_PERFCNT_SEL,
8168 		    axq->pcr_sel_save);
8169 	}
8170 
8171 	DRMACH_PR("drmach_s1p_axq_update: axq #%d pic_l2_io_q[%d]=%d\n",
8172 	    ddi_get_instance(axq->dip), iter, axq->pic_l2_io_q[iter]);
8173 }
8174 
8175 static void
8176 drmach_s1p_schizo_update(drmach_s1p_schizo_t *schizo, drmach_sr_iter_t iter)
8177 {
8178 	int	i;
8179 	drmach_s1p_pci_t *pci;
8180 
8181 	if (schizo->csr_basepa == 0x0UL)
8182 		return;
8183 
8184 	schizo->regs[iter].csr =
8185 	    lddphysio(schizo->csr_basepa + SCHIZO_CB_CSR_OFFSET);
8186 	schizo->regs[iter].errctrl =
8187 	    lddphysio(schizo->csr_basepa + SCHIZO_CB_ERRCTRL_OFFSET);
8188 	schizo->regs[iter].errlog =
8189 	    lddphysio(schizo->csr_basepa + SCHIZO_CB_ERRLOG_OFFSET);
8190 
8191 	for (i = 0; i < DRMACH_SCHIZO_PCI_LEAF_MAX; i++) {
8192 		pci = &schizo->pci[i];
8193 		if (pci->dip != NULL && pci->csr_basepa != 0x0UL) {
8194 			pci->regs[iter].slot_intr_state_diag =
8195 			    lddphysio(pci->csr_basepa +
8196 			    COMMON_IB_SLOT_INTR_STATE_DIAG_REG);
8197 
8198 			pci->regs[iter].obio_intr_state_diag =
8199 			    lddphysio(pci->csr_basepa +
8200 			    COMMON_IB_OBIO_INTR_STATE_DIAG_REG);
8201 
8202 			drmach_s1p_intr_map_reg_save(pci, iter);
8203 		}
8204 	}
8205 }
8206 
8207 /*
8208  * Called post-suspend and pre-resume to snapshot the suspend state
8209  * of slot1 AXQs and Schizos.
8210  */
8211 static void
8212 drmach_slot1_pause_update(drmach_slot1_pause_t **slot1_paused,
8213     drmach_sr_iter_t iter)
8214 {
8215 	int	i, j;
8216 	drmach_slot1_pause_t *slot1;
8217 
8218 	for (i = 0; i < STARCAT_BDSET_MAX; i++) {
8219 		if ((slot1 = slot1_paused[i]) == NULL)
8220 			continue;
8221 
8222 		drmach_s1p_axq_update(&slot1->axq, iter);
8223 		for (j = 0; j < STARCAT_SLOT1_IO_MAX; j++)
8224 			drmach_s1p_schizo_update(&slot1->schizo[j], iter);
8225 	}
8226 }
8227 
8228 /*
8229  * Starcat hPCI Schizo devices.
8230  *
8231  * The name field is overloaded. NULL means the slot (interrupt concentrator
8232  * bus) is not used. intr_mask is a bit mask representing the 4 possible
8233  * interrupts per slot, on if valid (rio does not use interrupt lines 0, 1).
8234  */
8235 static struct {
8236 	char	*name;
8237 	uint8_t	intr_mask;
8238 } drmach_schz_slot_intr[][DRMACH_SCHIZO_PCI_LEAF_MAX] = {
8239 	/* Schizo 0 */		/* Schizo 1 */
8240 	{{"C3V0", 0xf},		{"C3V1", 0xf}},		/* slot 0 */
8241 	{{"C5V0", 0xf},		{"C5V1", 0xf}},		/* slot 1 */
8242 	{{"rio", 0xc},		{NULL, 0x0}},		/* slot 2 */
8243 	{{NULL, 0x0},		{NULL, 0x0}},		/* slot 3 */
8244 	{{"sbbc", 0xf},		{NULL, 0x0}},		/* slot 4 */
8245 	{{NULL, 0x0},		{NULL, 0x0}},		/* slot 5 */
8246 	{{NULL, 0x0},		{NULL, 0x0}},		/* slot 6 */
8247 	{{NULL, 0x0},		{NULL, 0x0}}		/* slot 7 */
8248 };
8249 
8250 /*
8251  * See Schizo Specification, Revision 51 (May 23, 2001), Section 22.4.4
8252  * "Interrupt Registers", Table 22-69, page 306.
8253  */
8254 static char *
8255 drmach_schz_internal_ino2str(int ino)
8256 {
8257 	int	intr;
8258 
8259 	ASSERT(ino >= 0x30 && ino <= 0x37);
8260 
8261 	intr = ino & 0x7;
8262 	switch (intr) {
8263 		case (0x0):	return ("Uncorrectable ECC error");
8264 		case (0x1):	return ("Correctable ECC error");
8265 		case (0x2):	return ("PCI Bus A Error");
8266 		case (0x3):	return ("PCI Bus B Error");
8267 		case (0x4):	return ("Safari Bus Error");
8268 		default:	return ("Reserved");
8269 	}
8270 }
8271 
8272 #define	DRMACH_INTR_MASK_SHIFT(ino)	((ino) << 1)
8273 
8274 static void
8275 drmach_s1p_decode_slot_intr(int exp, int unum, drmach_s1p_pci_t *pci,
8276     int ino, drmach_sr_iter_t iter)
8277 {
8278 	uint8_t		intr_mask;
8279 	char		*slot_devname;
8280 	char		namebuf[OBP_MAXDRVNAME];
8281 	int		slot, intr_line, slot_valid, intr_valid;
8282 
8283 	ASSERT(ino >= 0 && ino <= 0x1f);
8284 	ASSERT((pci->regs[iter].slot_intr_state_diag &
8285 	    (COMMON_CLEAR_INTR_REG_MASK << DRMACH_INTR_MASK_SHIFT(ino))) !=
8286 	    COMMON_CLEAR_INTR_REG_IDLE);
8287 
8288 	slot = (ino >> 2) & 0x7;
8289 	intr_line = ino & 0x3;
8290 
8291 	slot_devname = drmach_schz_slot_intr[slot][unum].name;
8292 	slot_valid = (slot_devname == NULL) ? 0 : 1;
8293 	if (!slot_valid) {
8294 		(void) snprintf(namebuf, sizeof (namebuf), "slot %d (INVALID)",
8295 		    slot);
8296 		slot_devname = namebuf;
8297 	}
8298 
8299 	intr_mask = drmach_schz_slot_intr[slot][unum].intr_mask;
8300 	intr_valid = (1 << intr_line) & intr_mask;
8301 
8302 	prom_printf("IO%d/P%d PCI slot interrupt: ino=0x%x, source device=%s, "
8303 	    "interrupt line=%d%s\n", exp, unum, ino, slot_devname, intr_line,
8304 	    (slot_valid && !intr_valid) ? " (INVALID)" : "");
8305 }
8306 
8307 /*
8308  * Log interrupt source device info for all valid, pending interrupts
8309  * on each Schizo PCI leaf. Called if Schizo has logged a Safari bus
8310  * error in the error ctrl reg.
8311  */
8312 static void
8313 drmach_s1p_schizo_log_intr(drmach_s1p_schizo_t *schizo, int exp,
8314     int unum, drmach_sr_iter_t iter)
8315 {
8316 	uint64_t	reg;
8317 	int		i, n, ino;
8318 	drmach_s1p_pci_t *pci;
8319 
8320 	ASSERT(exp >= 0 && exp < STARCAT_BDSET_MAX);
8321 	ASSERT(unum < STARCAT_SLOT1_IO_MAX);
8322 
8323 	/*
8324 	 * Check the saved interrupt mapping registers. If interrupt is valid,
8325 	 * map the ino to the Schizo source device and check that the pci
8326 	 * slot and interrupt line are valid.
8327 	 */
8328 	for (i = 0; i < DRMACH_SCHIZO_PCI_LEAF_MAX; i++) {
8329 		pci = &schizo->pci[i];
8330 		for (n = 0; n < pci->regs[iter].nmap_regs; n++) {
8331 			reg = pci->regs[iter].intr_map_regs[n];
8332 			if (reg & COMMON_INTR_MAP_REG_VALID) {
8333 				ino = reg & COMMON_INTR_MAP_REG_INO;
8334 
8335 				if (ino <= 0x1f) {
8336 					/*
8337 					 * PCI slot interrupt
8338 					 */
8339 					drmach_s1p_decode_slot_intr(exp, unum,
8340 					    pci, ino, iter);
8341 				} else if (ino <= 0x2f) {
8342 					/*
8343 					 * OBIO interrupt
8344 					 */
8345 					prom_printf("IO%d/P%d OBIO interrupt: "
8346 					    "ino=0x%x\n", exp, unum, ino);
8347 				} else if (ino <= 0x37) {
8348 					/*
8349 					 * Internal interrupt
8350 					 */
8351 					prom_printf("IO%d/P%d Internal "
8352 					    "interrupt: ino=0x%x (%s)\n",
8353 					    exp, unum, ino,
8354 					    drmach_schz_internal_ino2str(ino));
8355 				} else {
8356 					/*
8357 					 * NewLink interrupt
8358 					 */
8359 					prom_printf("IO%d/P%d NewLink "
8360 					    "interrupt: ino=0x%x\n", exp,
8361 					    unum, ino);
8362 				}
8363 
8364 				DRMACH_PR("drmach_s1p_schizo_log_intr: "
8365 				    "exp=%d, schizo=%d, pci_leaf=%c, "
8366 				    "ino=0x%x, intr_map_reg=0x%lx\n",
8367 				    exp, unum, (i == 0) ? 'A' : 'B', ino, reg);
8368 			}
8369 		}
8370 	}
8371 }
8372 
8373 /*
8374  * See Schizo Specification, Revision 51 (May 23, 2001), Section 22.2.4
8375  * "Safari Error Control/Log Registers", Table 22-11, page 248.
8376  */
8377 #define	DRMACH_SCHIZO_SAFARI_UNMAPPED_ERR	(0x1ull << 4)
8378 
8379 /*
8380  * Check for possible error indicators prior to resuming the
8381  * AXQ driver, which will de-assert slot1 AXQ_DOMCTRL_PAUSE.
8382  */
8383 static void
8384 drmach_slot1_pause_verify(drmach_slot1_pause_t **slot1_paused,
8385     drmach_sr_iter_t iter)
8386 {
8387 	int	i, j;
8388 	int 	errflag = 0;
8389 	drmach_slot1_pause_t *slot1;
8390 
8391 	/*
8392 	 * Check for logged schizo bus error and pending interrupts.
8393 	 */
8394 	for (i = 0; i < STARCAT_BDSET_MAX; i++) {
8395 		if ((slot1 = slot1_paused[i]) == NULL)
8396 			continue;
8397 
8398 		for (j = 0; j < STARCAT_SLOT1_IO_MAX; j++) {
8399 			if (slot1->schizo[j].csr_basepa == 0x0UL)
8400 				continue;
8401 
8402 			if (slot1->schizo[j].regs[iter].errlog &
8403 			    DRMACH_SCHIZO_SAFARI_UNMAPPED_ERR) {
8404 				if (!errflag) {
8405 					prom_printf("DR WARNING: interrupt "
8406 					    "attempt detected during "
8407 					    "copy-rename (%s):\n",
8408 					    (iter == DRMACH_POST_SUSPEND) ?
8409 					    "post suspend" : "pre resume");
8410 					++errflag;
8411 				}
8412 				drmach_s1p_schizo_log_intr(&slot1->schizo[j],
8413 				    i, j, iter);
8414 			}
8415 		}
8416 	}
8417 
8418 	/*
8419 	 * Check for changes in axq l2_io_q performance counters (2nd pass only)
8420 	 */
8421 	if (iter == DRMACH_PRE_RESUME) {
8422 		for (i = 0; i < STARCAT_BDSET_MAX; i++) {
8423 			if ((slot1 = slot1_paused[i]) == NULL)
8424 				continue;
8425 
8426 			if (slot1->axq.pic_l2_io_q[DRMACH_POST_SUSPEND] !=
8427 			    slot1->axq.pic_l2_io_q[DRMACH_PRE_RESUME]) {
8428 				prom_printf("DR WARNING: IO transactions "
8429 				    "detected on IO%d during copy-rename: "
8430 				    "AXQ l2_io_q performance counter "
8431 				    "start=%d, end=%d\n", i,
8432 				    slot1->axq.pic_l2_io_q[DRMACH_POST_SUSPEND],
8433 				    slot1->axq.pic_l2_io_q[DRMACH_PRE_RESUME]);
8434 			}
8435 		}
8436 	}
8437 }
8438 
8439 struct drmach_sr_list {
8440 	dev_info_t		*dip;
8441 	struct drmach_sr_list	*next;
8442 	struct drmach_sr_list	*prev;
8443 };
8444 
8445 static struct drmach_sr_ordered {
8446 	char			*name;
8447 	struct drmach_sr_list	*ring;
8448 } drmach_sr_ordered[] = {
8449 	{ "iosram",			NULL },
8450 	{ "address-extender-queue",	NULL },
8451 	{ NULL,				NULL }, /* terminator -- required */
8452 };
8453 
8454 static void
8455 drmach_sr_insert(struct drmach_sr_list **lp, dev_info_t *dip)
8456 {
8457 	struct drmach_sr_list *np;
8458 
8459 	DRMACH_PR("drmach_sr_insert: adding dip %p\n", (void *)dip);
8460 
8461 	np = (struct drmach_sr_list *)kmem_alloc(
8462 	    sizeof (struct drmach_sr_list), KM_SLEEP);
8463 
8464 	ndi_hold_devi(dip);
8465 	np->dip = dip;
8466 
8467 	if (*lp == NULL) {
8468 		/* establish list */
8469 		*lp = np->next = np->prev = np;
8470 	} else {
8471 		/* place new node behind head node on ring list */
8472 		np->prev = (*lp)->prev;
8473 		np->next = *lp;
8474 		np->prev->next = np;
8475 		np->next->prev = np;
8476 	}
8477 }
8478 
8479 static void
8480 drmach_sr_delete(struct drmach_sr_list **lp, dev_info_t *dip)
8481 {
8482 	DRMACH_PR("drmach_sr_delete: searching for dip %p\n", (void *)dip);
8483 
8484 	if (*lp) {
8485 		struct drmach_sr_list *xp;
8486 
8487 		/* start search with mostly likely node */
8488 		xp = (*lp)->prev;
8489 		do {
8490 			if (xp->dip == dip) {
8491 				xp->prev->next = xp->next;
8492 				xp->next->prev = xp->prev;
8493 
8494 				if (xp == *lp)
8495 					*lp = xp->next;
8496 				if (xp == *lp)
8497 					*lp = NULL;
8498 				xp->dip = NULL;
8499 				ndi_rele_devi(dip);
8500 				kmem_free(xp, sizeof (*xp));
8501 
8502 				DRMACH_PR("drmach_sr_delete:"
8503 				    " disposed sr node for dip %p",
8504 				    (void *)dip);
8505 				return;
8506 			}
8507 
8508 			DRMACH_PR("drmach_sr_delete: still searching\n");
8509 
8510 			xp = xp->prev;
8511 		} while (xp != (*lp)->prev);
8512 	}
8513 
8514 	/* every dip should be found during resume */
8515 	DRMACH_PR("ERROR: drmach_sr_delete: can't find dip %p", (void *)dip);
8516 }
8517 
8518 int
8519 drmach_verify_sr(dev_info_t *dip, int sflag)
8520 {
8521 	int	rv;
8522 	int	len;
8523 	char    name[OBP_MAXDRVNAME];
8524 
8525 	if (drmach_slot1_pause_debug) {
8526 		if (sflag && drmach_slot1_pause_init) {
8527 			drmach_slot1_pause_free(drmach_slot1_paused);
8528 			drmach_slot1_pause_init = 0;
8529 		} else if (!sflag && !drmach_slot1_pause_init) {
8530 			/* schedule init for next suspend */
8531 			drmach_slot1_pause_init = 1;
8532 		}
8533 	}
8534 
8535 	rv = ddi_getproplen(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
8536 	    "name", &len);
8537 	if (rv == DDI_PROP_SUCCESS) {
8538 		int		portid;
8539 		uint64_t	reg;
8540 		struct drmach_sr_ordered *op;
8541 
8542 		rv = ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
8543 		    DDI_PROP_DONTPASS, "name", (caddr_t)name, &len);
8544 
8545 		if (rv != DDI_PROP_SUCCESS)
8546 			return (0);
8547 
8548 		if (drmach_slot1_pause_debug && sflag &&
8549 		    drmach_is_slot1_pause_axq(dip, name, &portid, &reg)) {
8550 			drmach_slot1_pause_add_axq(dip, name, portid, reg,
8551 			    drmach_slot1_paused);
8552 		}
8553 
8554 		for (op = drmach_sr_ordered; op->name; op++) {
8555 			if (strncmp(op->name, name, strlen(op->name)) == 0) {
8556 				if (sflag)
8557 					drmach_sr_insert(&op->ring, dip);
8558 				else
8559 					drmach_sr_delete(&op->ring, dip);
8560 				return (1);
8561 			}
8562 		}
8563 	}
8564 
8565 	return (0);
8566 }
8567 
8568 static void
8569 drmach_sr_dip(dev_info_t *dip, int suspend)
8570 {
8571 	int	 rv;
8572 	major_t	 maj;
8573 	char	*name, *name_addr, *aka;
8574 
8575 	if ((name = ddi_get_name(dip)) == NULL)
8576 		name = "<null name>";
8577 	else if ((maj = ddi_name_to_major(name)) != -1)
8578 		aka = ddi_major_to_name(maj);
8579 	else
8580 		aka = "<unknown>";
8581 
8582 	if ((name_addr = ddi_get_name_addr(dip)) == NULL)
8583 		name_addr = "<null>";
8584 
8585 	prom_printf("\t%s %s@%s (aka %s)\n",
8586 	    suspend ? "suspending" : "resuming",
8587 	    name, name_addr, aka);
8588 
8589 	if (suspend) {
8590 		rv = devi_detach(dip, DDI_SUSPEND);
8591 	} else {
8592 		rv = devi_attach(dip, DDI_RESUME);
8593 	}
8594 
8595 	if (rv != DDI_SUCCESS) {
8596 		prom_printf("\tFAILED to %s %s@%s\n",
8597 		    suspend ? "suspend" : "resume",
8598 		    name, name_addr);
8599 	}
8600 }
8601 
8602 void
8603 drmach_suspend_last()
8604 {
8605 	struct drmach_sr_ordered *op;
8606 
8607 	if (drmach_slot1_pause_debug)
8608 		drmach_slot1_pause_add_io(drmach_slot1_paused);
8609 
8610 	/*
8611 	 * The ordering array declares the strict sequence in which
8612 	 * the named drivers are to suspended. Each element in
8613 	 * the array may have a double-linked ring list of driver
8614 	 * instances (dip) in the order in which they were presented
8615 	 * to drmach_verify_sr. If present, walk the list in the
8616 	 * forward direction to suspend each instance.
8617 	 */
8618 	for (op = drmach_sr_ordered; op->name; op++) {
8619 		if (op->ring) {
8620 			struct drmach_sr_list *rp;
8621 
8622 			rp = op->ring;
8623 			do {
8624 				drmach_sr_dip(rp->dip, 1);
8625 				rp = rp->next;
8626 			} while (rp != op->ring);
8627 		}
8628 	}
8629 
8630 	if (drmach_slot1_pause_debug) {
8631 		drmach_slot1_pause_update(drmach_slot1_paused,
8632 		    DRMACH_POST_SUSPEND);
8633 		drmach_slot1_pause_verify(drmach_slot1_paused,
8634 		    DRMACH_POST_SUSPEND);
8635 	}
8636 }
8637 
8638 void
8639 drmach_resume_first()
8640 {
8641 	struct drmach_sr_ordered *op = drmach_sr_ordered +
8642 	    (sizeof (drmach_sr_ordered) / sizeof (drmach_sr_ordered[0]));
8643 
8644 	if (drmach_slot1_pause_debug) {
8645 		drmach_slot1_pause_update(drmach_slot1_paused,
8646 		    DRMACH_PRE_RESUME);
8647 		drmach_slot1_pause_verify(drmach_slot1_paused,
8648 		    DRMACH_PRE_RESUME);
8649 	}
8650 
8651 	op -= 1;	/* point at terminating element */
8652 
8653 	/*
8654 	 * walk ordering array and rings backwards to resume dips
8655 	 * in reverse order in which they were suspended
8656 	 */
8657 	while (--op >= drmach_sr_ordered) {
8658 		if (op->ring) {
8659 			struct drmach_sr_list *rp;
8660 
8661 			rp = op->ring->prev;
8662 			do {
8663 				drmach_sr_dip(rp->dip, 0);
8664 				rp = rp->prev;
8665 			} while (rp != op->ring->prev);
8666 		}
8667 	}
8668 }
8669 
8670 /*
8671  * Log a DR sysevent.
8672  * Return value: 0 success, non-zero failure.
8673  */
8674 int
8675 drmach_log_sysevent(int board, char *hint, int flag, int verbose)
8676 {
8677 	sysevent_t			*ev;
8678 	sysevent_id_t			eid;
8679 	int				rv, km_flag;
8680 	sysevent_value_t		evnt_val;
8681 	sysevent_attr_list_t		*evnt_attr_list = NULL;
8682 	char				attach_pnt[MAXNAMELEN];
8683 
8684 	km_flag = (flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
8685 	attach_pnt[0] = '\0';
8686 	if (drmach_board_name(board, attach_pnt, MAXNAMELEN)) {
8687 		rv = -1;
8688 		goto logexit;
8689 	}
8690 	if (verbose)
8691 		DRMACH_PR("drmach_log_sysevent: %s %s, flag: %d, verbose: %d\n",
8692 		    attach_pnt, hint, flag, verbose);
8693 
8694 	if ((ev = sysevent_alloc(EC_DR, ESC_DR_AP_STATE_CHANGE,
8695 	    SUNW_KERN_PUB"dr", km_flag)) == NULL) {
8696 		rv = -2;
8697 		goto logexit;
8698 	}
8699 	evnt_val.value_type = SE_DATA_TYPE_STRING;
8700 	evnt_val.value.sv_string = attach_pnt;
8701 	if ((rv = sysevent_add_attr(&evnt_attr_list, DR_AP_ID,
8702 	    &evnt_val, km_flag)) != 0)
8703 		goto logexit;
8704 
8705 	evnt_val.value_type = SE_DATA_TYPE_STRING;
8706 	evnt_val.value.sv_string = hint;
8707 	if ((rv = sysevent_add_attr(&evnt_attr_list, DR_HINT,
8708 	    &evnt_val, km_flag)) != 0) {
8709 		sysevent_free_attr(evnt_attr_list);
8710 		goto logexit;
8711 	}
8712 
8713 	(void) sysevent_attach_attributes(ev, evnt_attr_list);
8714 
8715 	/*
8716 	 * Log the event but do not sleep waiting for its
8717 	 * delivery. This provides insulation from syseventd.
8718 	 */
8719 	rv = log_sysevent(ev, SE_NOSLEEP, &eid);
8720 
8721 logexit:
8722 	if (ev)
8723 		sysevent_free(ev);
8724 	if ((rv != 0) && verbose)
8725 		cmn_err(CE_WARN,
8726 		    "drmach_log_sysevent failed (rv %d) for %s  %s\n",
8727 		    rv, attach_pnt, hint);
8728 
8729 	return (rv);
8730 }
8731 
8732 /*
8733  * Initialize the mem_slice portion of a claim/unconfig/unclaim mailbox message.
8734  * Only the valid entries are modified, so the array should be zeroed out
8735  * initially.
8736  */
8737 static void
8738 drmach_msg_memslice_init(dr_memslice_t slice_arr[]) {
8739 	int	i;
8740 	char	c;
8741 
8742 	ASSERT(mutex_owned(&drmach_slice_table_lock));
8743 
8744 	for (i = 0; i < AXQ_MAX_EXP; i++) {
8745 		c = drmach_slice_table[i];
8746 
8747 		if (c & 0x20) {
8748 			slice_arr[i].valid = 1;
8749 			slice_arr[i].slice = c & 0x1f;
8750 		}
8751 	}
8752 }
8753 
8754 /*
8755  * Initialize the mem_regs portion of a claim/unconfig/unclaim mailbox message.
8756  * Only the valid entries are modified, so the array should be zeroed out
8757  * initially.
8758  */
8759 static void
8760 drmach_msg_memregs_init(dr_memregs_t regs_arr[]) {
8761 	int		rv, exp, mcnum, bank;
8762 	uint64_t	madr;
8763 	drmachid_t	id;
8764 	drmach_board_t	*bp;
8765 	drmach_mem_t	*mp;
8766 	dr_memregs_t	*memregs;
8767 
8768 	/* CONSTCOND */
8769 	ASSERT(DRMACH_MC_NBANKS == (PMBANKS_PER_PORT * LMBANKS_PER_PMBANK));
8770 
8771 	for (exp = 0; exp < 18; exp++) {
8772 		rv = drmach_array_get(drmach_boards,
8773 		    DRMACH_EXPSLOT2BNUM(exp, 0), &id);
8774 		ASSERT(rv == 0);	/* should never be out of bounds */
8775 		if (id == NULL) {
8776 			continue;
8777 		}
8778 
8779 		memregs = &regs_arr[exp];
8780 		bp = (drmach_board_t *)id;
8781 		for (mp = bp->mem; mp != NULL; mp = mp->next) {
8782 			mcnum = mp->dev.portid & 0x3;
8783 			for (bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
8784 				drmach_mem_read_madr(mp, bank, &madr);
8785 				if (madr & DRMACH_MC_VALID_MASK) {
8786 					DRMACH_PR("%d.%d.%d.madr = 0x%lx\n",
8787 					    exp, mcnum, bank, madr);
8788 					memregs->madr[mcnum][bank].hi =
8789 					    DRMACH_U64_TO_MCREGHI(madr);
8790 					memregs->madr[mcnum][bank].lo =
8791 					    DRMACH_U64_TO_MCREGLO(madr);
8792 				}
8793 			}
8794 		}
8795 	}
8796 }
8797 
8798 /*
8799  * Do not allow physical address range modification if either board on this
8800  * expander has processors in NULL LPA mode (CBASE=CBND=NULL).
8801  *
8802  * A side effect of NULL proc LPA mode in Starcat SSM is that local reads will
8803  * install the cache line as owned/dirty as a result of the RTSR transaction.
8804  * See section 5.2.3 of the Safari spec.  All processors will read the bus sync
8805  * list before the rename after flushing local caches.  When copy-rename
8806  * requires changing the physical address ranges (i.e. smaller memory target),
8807  * the bus sync list contains physical addresses that will not exist after the
8808  * rename.  If these cache lines are owned due to a RTSR, a system error can
8809  * occur following the rename when these cache lines are evicted and a writeback
8810  * is attempted.
8811  *
8812  * Incoming parameter represents either the copy-rename source or a candidate
8813  * target memory board.  On Starcat, only slot0 boards may have memory.
8814  */
8815 int
8816 drmach_allow_memrange_modify(drmachid_t s0id)
8817 {
8818 	drmach_board_t	*s0bp, *s1bp;
8819 	drmachid_t	s1id;
8820 	int		rv;
8821 
8822 	s0bp = s0id;
8823 
8824 	ASSERT(DRMACH_IS_BOARD_ID(s0id));
8825 	ASSERT(DRMACH_BNUM2SLOT(s0bp->bnum) == 0);
8826 
8827 	if (s0bp->flags & DRMACH_NULL_PROC_LPA) {
8828 		/*
8829 		 * This is reason enough to fail the request, no need
8830 		 * to check the device list for cpus.
8831 		 */
8832 		return (0);
8833 	}
8834 
8835 	/*
8836 	 * Check for MCPU board on the same expander.
8837 	 *
8838 	 * The board flag DRMACH_NULL_PROC_LPA can be set for all board
8839 	 * types, as it is derived at from the POST gdcd board flag
8840 	 * L1SSFLG_THIS_L1_NULL_PROC_LPA, which can be set (and should be
8841 	 * ignored) for boards with no processors.  Since NULL proc LPA
8842 	 * applies only to processors, we walk the devices array to detect
8843 	 * MCPUs.
8844 	 */
8845 	rv = drmach_array_get(drmach_boards, s0bp->bnum + 1, &s1id);
8846 	s1bp = s1id;
8847 	if (rv == 0 && s1bp != NULL) {
8848 
8849 		ASSERT(DRMACH_IS_BOARD_ID(s1id));
8850 		ASSERT(DRMACH_BNUM2SLOT(s1bp->bnum) == 1);
8851 		ASSERT(DRMACH_BNUM2EXP(s0bp->bnum) ==
8852 		    DRMACH_BNUM2EXP(s1bp->bnum));
8853 
8854 		if ((s1bp->flags & DRMACH_NULL_PROC_LPA) &&
8855 		    s1bp->devices != NULL) {
8856 			int		d_idx;
8857 			drmachid_t	d_id;
8858 
8859 			rv = drmach_array_first(s1bp->devices, &d_idx, &d_id);
8860 			while (rv == 0) {
8861 				if (DRMACH_IS_CPU_ID(d_id)) {
8862 					/*
8863 					 * Fail MCPU in NULL LPA mode.
8864 					 */
8865 					return (0);
8866 				}
8867 
8868 				rv = drmach_array_next(s1bp->devices, &d_idx,
8869 				    &d_id);
8870 			}
8871 		}
8872 	}
8873 
8874 	return (1);
8875 }
8876