xref: /titanic_50/usr/src/uts/sun4u/starcat/io/drmach.c (revision 64d1d4ab72834b7483c7962efc738b568ca8792e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/note.h>
27 #include <sys/debug.h>
28 #include <sys/types.h>
29 #include <sys/varargs.h>
30 #include <sys/errno.h>
31 #include <sys/cred.h>
32 #include <sys/dditypes.h>
33 #include <sys/devops.h>
34 #include <sys/modctl.h>
35 #include <sys/poll.h>
36 #include <sys/conf.h>
37 #include <sys/ddi.h>
38 #include <sys/sunddi.h>
39 #include <sys/sunndi.h>
40 #include <sys/ndi_impldefs.h>
41 #include <sys/stat.h>
42 #include <sys/kmem.h>
43 #include <sys/vmem.h>
44 #include <sys/disp.h>
45 #include <sys/processor.h>
46 #include <sys/cheetahregs.h>
47 #include <sys/cpuvar.h>
48 #include <sys/mem_config.h>
49 #include <sys/ddi_impldefs.h>
50 #include <sys/systm.h>
51 #include <sys/machsystm.h>
52 #include <sys/autoconf.h>
53 #include <sys/cmn_err.h>
54 #include <sys/sysmacros.h>
55 #include <sys/x_call.h>
56 #include <sys/promif.h>
57 #include <sys/prom_plat.h>
58 #include <sys/membar.h>
59 #include <vm/seg_kmem.h>
60 #include <sys/mem_cage.h>
61 #include <sys/stack.h>
62 #include <sys/archsystm.h>
63 #include <vm/hat_sfmmu.h>
64 #include <sys/pte.h>
65 #include <sys/mmu.h>
66 #include <sys/cpu_module.h>
67 #include <sys/obpdefs.h>
68 #include <sys/mboxsc.h>
69 #include <sys/plat_ecc_dimm.h>
70 
71 #include <sys/hotplug/hpctrl.h>		/* XXX should be included by schpc.h */
72 #include <sys/schpc.h>
73 #include <sys/pci.h>
74 
75 #include <sys/starcat.h>
76 #include <sys/cpu_sgnblk_defs.h>
77 #include <sys/drmach.h>
78 #include <sys/dr_util.h>
79 #include <sys/dr_mbx.h>
80 #include <sys/sc_gptwocfg.h>
81 #include <sys/iosramreg.h>
82 #include <sys/iosramio.h>
83 #include <sys/iosramvar.h>
84 #include <sys/axq.h>
85 #include <sys/post/scat_dcd.h>
86 #include <sys/kobj.h>
87 #include <sys/taskq.h>
88 #include <sys/cmp.h>
89 #include <sys/sbd_ioctl.h>
90 
91 #include <sys/sysevent.h>
92 #include <sys/sysevent/dr.h>
93 #include <sys/sysevent/eventdefs.h>
94 
95 #include <sys/pci/pcisch.h>
96 #include <sys/pci/pci_regs.h>
97 
98 #include <sys/ontrap.h>
99 
100 /* defined in ../ml/drmach.il.cpp */
101 extern void		bcopy32_il(uint64_t, uint64_t);
102 extern void		flush_ecache_il(int64_t physaddr, int size, int linesz);
103 extern void		flush_dcache_il(void);
104 extern void		flush_icache_il(void);
105 extern void		flush_pcache_il(void);
106 
107 /* defined in ../ml/drmach_asm.s */
108 extern uint64_t		lddmcdecode(uint64_t physaddr);
109 extern uint64_t		lddsafconfig(void);
110 
111 /* XXX here until provided by sys/dman.h */
112 extern int man_dr_attach(dev_info_t *);
113 extern int man_dr_detach(dev_info_t *);
114 
115 #define	DRMACH_BNUM2EXP(bnum)		((bnum) >> 1)
116 #define	DRMACH_BNUM2SLOT(bnum)		((bnum) & 1)
117 #define	DRMACH_EXPSLOT2BNUM(exp, slot)	(((exp) << 1) + (slot))
118 
119 #define	DRMACH_SLICE_MASK		0x1Full
120 #define	DRMACH_SLICE_TO_PA(s)		(((s) & DRMACH_SLICE_MASK) << 37)
121 #define	DRMACH_PA_TO_SLICE(a)		(((a) >> 37) & DRMACH_SLICE_MASK)
122 
123 /*
124  * DRMACH_MEM_SLICE_SIZE and DRMACH_MEM_USABLE_SLICE_SIZE define the
125  * available address space and the usable address space for every slice.
126  * There must be a distinction between the available and usable do to a
127  * restriction imposed by CDC memory size.
128  */
129 
130 #define	DRMACH_MEM_SLICE_SIZE		(1ull << 37)	/* 128GB */
131 #define	DRMACH_MEM_USABLE_SLICE_SIZE	(1ull << 36)	/* 64GB */
132 
133 #define	DRMACH_MC_NBANKS		4
134 
135 #define	DRMACH_MC_ADDR(mp, bank)	((mp)->madr_pa + 16 + 8 * (bank))
136 #define	DRMACH_MC_ASI_ADDR(mp, bank)	(DRMACH_MC_ADDR(mp, bank) & 0xFF)
137 
138 #define	DRMACH_EMU_ACT_STATUS_OFFSET	0x50
139 #define	DRMACH_EMU_ACT_STATUS_ADDR(mp)	\
140 	((mp)->madr_pa + DRMACH_EMU_ACT_STATUS_OFFSET)
141 
142 /*
143  * The Cheetah's Safari Configuration Register and the Schizo's
144  * Safari Control/Status Register place the LPA base and bound fields in
145  * same bit locations with in their register word. This source code takes
146  * advantage of this by defining only one set of LPA encoding/decoding macros
147  * which are shared by various Cheetah and Schizo drmach routines.
148  */
149 #define	DRMACH_LPA_BASE_MASK		(0x3Full	<< 3)
150 #define	DRMACH_LPA_BND_MASK		(0x3Full	<< 9)
151 
152 #define	DRMACH_LPA_BASE_TO_PA(scr)	(((scr) & DRMACH_LPA_BASE_MASK) << 34)
153 #define	DRMACH_LPA_BND_TO_PA(scr)	(((scr) & DRMACH_LPA_BND_MASK) << 28)
154 #define	DRMACH_PA_TO_LPA_BASE(pa)	(((pa) >> 34) & DRMACH_LPA_BASE_MASK)
155 #define	DRMACH_PA_TO_LPA_BND(pa)	(((pa) >> 28) & DRMACH_LPA_BND_MASK)
156 
157 #define	DRMACH_L1_SET_LPA(b)		\
158 	(((b)->flags & DRMACH_NULL_PROC_LPA) == 0)
159 
160 #define	DRMACH_CPU_SRAM_ADDR    	0x7fff0900000ull
161 #define	DRMACH_CPU_SRAM_SIZE    	0x20000ull
162 
163 /*
164  * Name properties for frequently accessed device nodes.
165  */
166 #define	DRMACH_CPU_NAMEPROP		"cpu"
167 #define	DRMACH_CMP_NAMEPROP		"cmp"
168 #define	DRMACH_AXQ_NAMEPROP		"address-extender-queue"
169 #define	DRMACH_PCI_NAMEPROP		"pci"
170 
171 /*
172  * Maximum value of processor Safari Timeout Log (TOL) field of
173  * Safari Config reg (7 secs).
174  */
175 #define	DRMACH_SAF_TOL_MAX		7 * 1000000
176 
177 /*
178  * drmach_board_t flag definitions
179  */
180 #define	DRMACH_NULL_PROC_LPA		0x1
181 
182 typedef struct {
183 	uint32_t	reg_addr_hi;
184 	uint32_t	reg_addr_lo;
185 	uint32_t	reg_size_hi;
186 	uint32_t	reg_size_lo;
187 } drmach_reg_t;
188 
189 typedef struct {
190 	struct drmach_node	*node;
191 	void			*data;
192 } drmach_node_walk_args_t;
193 
194 typedef struct drmach_node {
195 	void		*here;
196 
197 	pnode_t		 (*get_dnode)(struct drmach_node *node);
198 	int		 (*walk)(struct drmach_node *node, void *data,
199 				int (*cb)(drmach_node_walk_args_t *args));
200 	dev_info_t	*(*n_getdip)(struct drmach_node *node);
201 	int		 (*n_getproplen)(struct drmach_node *node, char *name,
202 				int *len);
203 	int		 (*n_getprop)(struct drmach_node *node, char *name,
204 				void *buf, int len);
205 	int		 (*get_parent)(struct drmach_node *node,
206 				struct drmach_node *pnode);
207 } drmach_node_t;
208 
209 typedef struct {
210 	int		 min_index;
211 	int		 max_index;
212 	int		 arr_sz;
213 	drmachid_t	*arr;
214 } drmach_array_t;
215 
216 typedef struct {
217 	void		*isa;
218 
219 	void		 (*dispose)(drmachid_t);
220 	sbd_error_t	*(*release)(drmachid_t);
221 	sbd_error_t	*(*status)(drmachid_t, drmach_status_t *);
222 
223 	char		 name[MAXNAMELEN];
224 } drmach_common_t;
225 
226 struct drmach_board;
227 typedef struct drmach_board drmach_board_t;
228 
229 typedef struct {
230 	drmach_common_t	 cm;
231 	const char	*type;
232 	drmach_board_t	*bp;
233 	drmach_node_t	*node;
234 	int		 portid;
235 	int		 unum;
236 	int		 busy;
237 	int		 powered;
238 } drmach_device_t;
239 
240 typedef struct drmach_cpu {
241 	drmach_device_t	 dev;
242 	uint64_t	 scr_pa;
243 	processorid_t	 cpuid;
244 	int		 coreid;
245 } drmach_cpu_t;
246 
247 typedef struct drmach_mem {
248 	drmach_device_t	 dev;
249 	struct drmach_mem *next;
250 	uint64_t	 nbytes;
251 	uint64_t	 madr_pa;
252 } drmach_mem_t;
253 
254 typedef struct drmach_io {
255 	drmach_device_t	 dev;
256 	uint64_t	 scsr_pa; /* PA of Schizo Control/Status Register */
257 } drmach_io_t;
258 
259 struct drmach_board {
260 	drmach_common_t	 cm;
261 	int		 bnum;
262 	int		 assigned;
263 	int		 powered;
264 	int		 connected;
265 	int		 empty;
266 	int		 cond;
267 	uint_t		 cpu_impl;
268 	uint_t		 flags;
269 	drmach_node_t	*tree;
270 	drmach_array_t	*devices;
271 	drmach_mem_t	*mem;
272 	uint64_t	 stardrb_offset;
273 	char		 type[BD_TYPELEN];
274 };
275 
276 typedef struct {
277 	int		 flags;
278 	drmach_device_t	*dp;
279 	sbd_error_t	*err;
280 	dev_info_t	*fdip;
281 } drmach_config_args_t;
282 
283 typedef struct {
284 	drmach_board_t	*obj;
285 	int		 ndevs;
286 	void		*a;
287 	sbd_error_t	*(*found)(void *a, const char *, int, drmachid_t);
288 	sbd_error_t	*err;
289 } drmach_board_cb_data_t;
290 
291 typedef struct drmach_casmslot {
292 	int	valid;
293 	int	slice;
294 } drmach_casmslot_t;
295 
296 typedef enum {
297 	DRMACH_CR_OK,
298 	DRMACH_CR_MC_IDLE_ERR,
299 	DRMACH_CR_IOPAUSE_ERR,
300 	DRMACH_CR_ONTRAP_ERR
301 } drmach_cr_err_t;
302 
303 typedef struct {
304 	void		*isa;
305 	caddr_t		 data;
306 	drmach_mem_t	*s_mp;
307 	drmach_mem_t	*t_mp;
308 	struct memlist	*c_ml;
309 	uint64_t	 s_copybasepa;
310 	uint64_t	 t_copybasepa;
311 	drmach_cr_err_t	 ecode;
312 	void		*earg;
313 } drmach_copy_rename_t;
314 
315 /*
316  * The following global is read as a boolean value, non-zero is true.
317  * If zero, DR copy-rename and cpu poweron will not set the processor
318  * LPA settings (CBASE, CBND of Safari config register) to correspond
319  * to the current memory slice map. LPAs of processors present at boot
320  * will remain as programmed by POST. LPAs of processors on boards added
321  * by DR will remain NULL, as programmed by POST. This can be used to
322  * to override the per-board L1SSFLG_THIS_L1_NULL_PROC_LPA flag set by
323  * POST in the LDCD (and copied to the GDCD by SMS).
324  *
325  * drmach_reprogram_lpa and L1SSFLG_THIS_L1_NULL_PROC_LPA do not apply
326  * to Schizo device LPAs. These are always set by DR.
327  */
328 static int		 drmach_reprogram_lpa = 1;
329 
330 /*
331  * There is a known HW bug where a Jaguar CPU in Safari port 0 (SBX/P0)
332  * can fail to receive an XIR. To workaround this issue until a hardware
333  * fix is implemented, we will exclude the selection of these CPUs.
334  * Setting this to 0 will allow their selection again.
335  */
336 static int		 drmach_iocage_exclude_jaguar_port_zero = 1;
337 
338 static int		 drmach_initialized;
339 static drmach_array_t	*drmach_boards;
340 
341 static int		 drmach_cpu_delay = 1000;
342 static int		 drmach_cpu_ntries = 50000;
343 
344 static uint32_t		 drmach_slice_table[AXQ_MAX_EXP];
345 static kmutex_t		 drmach_slice_table_lock;
346 
347 tte_t			 drmach_cpu_sram_tte[NCPU];
348 caddr_t			 drmach_cpu_sram_va;
349 
350 /*
351  * Setting to non-zero will enable delay before all disconnect ops.
352  */
353 static int		 drmach_unclaim_delay_all;
354 /*
355  * Default delay is slightly greater than the max processor Safari timeout.
356  * This delay is intended to ensure the outstanding Safari activity has
357  * retired on this board prior to a board disconnect.
358  */
359 static clock_t		 drmach_unclaim_usec_delay = DRMACH_SAF_TOL_MAX + 10;
360 
361 /*
362  * By default, DR of non-Panther procs is not allowed into a Panther
363  * domain with large page sizes enabled.  Setting this to 0 will remove
364  * the restriction.
365  */
366 static int		 drmach_large_page_restriction = 1;
367 
368 /*
369  * Used to pass updated LPA values to procs.
370  * Protocol is to clear the array before use.
371  */
372 volatile uchar_t	*drmach_xt_mb;
373 volatile uint64_t	 drmach_xt_ready;
374 static kmutex_t		 drmach_xt_mb_lock;
375 static int		 drmach_xt_mb_size;
376 
377 uint64_t		 drmach_bus_sync_list[18 * 4 * 4 + 1];
378 static kmutex_t		 drmach_bus_sync_lock;
379 
380 static sbd_error_t	*drmach_device_new(drmach_node_t *,
381 				drmach_board_t *, int, drmachid_t *);
382 static sbd_error_t	*drmach_cpu_new(drmach_device_t *, drmachid_t *);
383 static sbd_error_t	*drmach_mem_new(drmach_device_t *, drmachid_t *);
384 static sbd_error_t	*drmach_pci_new(drmach_device_t *, drmachid_t *);
385 static sbd_error_t	*drmach_io_new(drmach_device_t *, drmachid_t *);
386 
387 static dev_info_t	*drmach_node_ddi_get_dip(drmach_node_t *np);
388 static int		 drmach_node_ddi_get_prop(drmach_node_t *np,
389 				char *name, void *buf, int len);
390 static int		 drmach_node_ddi_get_proplen(drmach_node_t *np,
391 				char *name, int *len);
392 
393 static dev_info_t	*drmach_node_obp_get_dip(drmach_node_t *np);
394 static int		 drmach_node_obp_get_prop(drmach_node_t *np,
395 				char *name, void *buf, int len);
396 static int		 drmach_node_obp_get_proplen(drmach_node_t *np,
397 				char *name, int *len);
398 
399 static sbd_error_t	*drmach_mbox_trans(uint8_t msgtype, int bnum,
400 				caddr_t obufp, int olen,
401 				caddr_t ibufp, int ilen);
402 
403 sbd_error_t		*drmach_io_post_attach(drmachid_t id);
404 sbd_error_t		*drmach_io_post_release(drmachid_t id);
405 
406 static sbd_error_t	*drmach_iocage_setup(dr_testboard_req_t *,
407 				drmach_device_t **dpp, cpu_flag_t *oflags);
408 static int		drmach_iocage_cpu_return(drmach_device_t *dp,
409 				cpu_flag_t oflags);
410 static sbd_error_t	*drmach_iocage_mem_return(dr_testboard_reply_t *tbr);
411 void			drmach_iocage_mem_scrub(uint64_t nbytes);
412 
413 static sbd_error_t 	*drmach_i_status(drmachid_t id, drmach_status_t *stat);
414 
415 static void		drmach_slot1_lpa_set(drmach_board_t *bp);
416 
417 static void		drmach_cpu_read(uint64_t arg1, uint64_t arg2);
418 static int		drmach_cpu_read_scr(drmach_cpu_t *cp, uint64_t *scr);
419 
420 static void		 drmach_bus_sync_list_update(void);
421 static void		 drmach_slice_table_update(drmach_board_t *, int);
422 static int		 drmach_portid2bnum(int);
423 
424 static void		drmach_msg_memslice_init(dr_memslice_t slice_arr[]);
425 static void		drmach_msg_memregs_init(dr_memregs_t regs_arr[]);
426 
427 static int		drmach_panther_boards(void);
428 
429 static int		drmach_name2type_idx(char *);
430 
431 #ifdef DEBUG
432 
433 #define	DRMACH_PR		if (drmach_debug) printf
434 #define	DRMACH_MEMLIST_DUMP	if (drmach_debug) MEMLIST_DUMP
435 int drmach_debug = 0;		 /* set to non-zero to enable debug messages */
436 #else
437 
438 #define	DRMACH_PR		_NOTE(CONSTANTCONDITION) if (0) printf
439 #define	DRMACH_MEMLIST_DUMP	_NOTE(CONSTANTCONDITION) if (0) MEMLIST_DUMP
440 #endif /* DEBUG */
441 
442 #define	DRMACH_OBJ(id)		((drmach_common_t *)id)
443 
444 #define	DRMACH_IS_BOARD_ID(id)	\
445 	((id != 0) &&		\
446 	(DRMACH_OBJ(id)->isa == (void *)drmach_board_new))
447 
448 #define	DRMACH_IS_CPU_ID(id)	\
449 	((id != 0) &&		\
450 	(DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new))
451 
452 #define	DRMACH_IS_MEM_ID(id)	\
453 	((id != 0) &&		\
454 	(DRMACH_OBJ(id)->isa == (void *)drmach_mem_new))
455 
456 #define	DRMACH_IS_IO_ID(id)	\
457 	((id != 0) &&		\
458 	(DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
459 
460 #define	DRMACH_IS_DEVICE_ID(id)					\
461 	((id != 0) &&						\
462 	(DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new ||	\
463 	    DRMACH_OBJ(id)->isa == (void *)drmach_mem_new ||	\
464 	    DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
465 
466 #define	DRMACH_IS_ID(id)					\
467 	((id != 0) &&						\
468 	(DRMACH_OBJ(id)->isa == (void *)drmach_board_new ||	\
469 	    DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new ||	\
470 	    DRMACH_OBJ(id)->isa == (void *)drmach_mem_new ||	\
471 	    DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
472 
473 #define	DRMACH_INTERNAL_ERROR() \
474 	drerr_new(1, ESTC_INTERNAL, drmach_ie_fmt, __LINE__)
475 static char		*drmach_ie_fmt = "drmach.c %d";
476 
477 static struct {
478 	const char	 *name;
479 	const char	 *type;
480 	sbd_error_t	 *(*new)(drmach_device_t *, drmachid_t *);
481 } drmach_name2type[] = {
482 	{"cmp",			    DRMACH_DEVTYPE_CMP,    NULL },
483 	{"cpu",			    DRMACH_DEVTYPE_CPU,    drmach_cpu_new },
484 	{"SUNW,UltraSPARC-III",	    DRMACH_DEVTYPE_CPU,    drmach_cpu_new },
485 	{"SUNW,UltraSPARC-III+",    DRMACH_DEVTYPE_CPU,    drmach_cpu_new },
486 	{"memory-controller",	    DRMACH_DEVTYPE_MEM,    drmach_mem_new },
487 	{"pci",			    DRMACH_DEVTYPE_PCI,    drmach_pci_new },
488 	{"SUNW,wci",		    DRMACH_DEVTYPE_WCI,    drmach_io_new  },
489 };
490 
491 /*
492  * drmach autoconfiguration data structures and interfaces
493  */
494 
495 extern struct mod_ops mod_miscops;
496 
497 static struct modlmisc modlmisc = {
498 	&mod_miscops,
499 	"Sun Fire 15000 DR"
500 };
501 
502 static struct modlinkage modlinkage = {
503 	MODREV_1,
504 	(void *)&modlmisc,
505 	NULL
506 };
507 
508 /*
509  * drmach_boards_rwlock is used to synchronize read/write
510  * access to drmach_boards array between status and board lookup
511  * as READERS, and assign, and unassign threads as WRITERS.
512  */
513 static krwlock_t	drmach_boards_rwlock;
514 
515 static kmutex_t		drmach_i_lock;
516 static kmutex_t		drmach_iocage_lock;
517 static kcondvar_t 	drmach_iocage_cv;
518 static int		drmach_iocage_is_busy = 0;
519 uint64_t		drmach_iocage_paddr;
520 static caddr_t		drmach_iocage_vaddr;
521 static int		drmach_iocage_size = 0;
522 static int		drmach_is_cheetah = -1;
523 
524 int
525 _init(void)
526 {
527 	int	err;
528 
529 	mutex_init(&drmach_i_lock, NULL, MUTEX_DRIVER, NULL);
530 	rw_init(&drmach_boards_rwlock, NULL, RW_DEFAULT, NULL);
531 	drmach_xt_mb_size = NCPU * sizeof (uchar_t);
532 	drmach_xt_mb = (uchar_t *)vmem_alloc(static_alloc_arena,
533 	    drmach_xt_mb_size, VM_SLEEP);
534 	bzero((void *)drmach_xt_mb, drmach_xt_mb_size);
535 	if ((err = mod_install(&modlinkage)) != 0) {
536 		mutex_destroy(&drmach_i_lock);
537 		rw_destroy(&drmach_boards_rwlock);
538 		vmem_free(static_alloc_arena, (void *)drmach_xt_mb,
539 		    drmach_xt_mb_size);
540 	}
541 
542 	return (err);
543 }
544 
545 int
546 _fini(void)
547 {
548 	static void	drmach_fini(void);
549 	int		err;
550 
551 	if ((err = mod_remove(&modlinkage)) == 0)
552 		drmach_fini();
553 
554 	return (err);
555 }
556 
557 int
558 _info(struct modinfo *modinfop)
559 {
560 	return (mod_info(&modlinkage, modinfop));
561 }
562 
563 /*
564  * drmach_node_* routines serve the purpose of separating the
565  * rest of the code from the device tree and OBP.  This is necessary
566  * because of In-Kernel-Probing.  Devices probed after stod, are probed
567  * by the in-kernel-prober, not OBP.  These devices, therefore, do not
568  * have dnode ids.
569  */
570 
571 static int
572 drmach_node_obp_get_parent(drmach_node_t *np, drmach_node_t *pp)
573 {
574 	pnode_t		nodeid;
575 	static char	*fn = "drmach_node_obp_get_parent";
576 
577 	nodeid = np->get_dnode(np);
578 	if (nodeid == OBP_NONODE) {
579 		cmn_err(CE_WARN, "%s: invalid dnode", fn);
580 		return (-1);
581 	}
582 
583 	bcopy(np, pp, sizeof (drmach_node_t));
584 
585 	pp->here = (void *)(uintptr_t)prom_parentnode(nodeid);
586 	if (pp->here == OBP_NONODE) {
587 		cmn_err(CE_WARN, "%s: invalid parent dnode", fn);
588 		return (-1);
589 	}
590 
591 	return (0);
592 }
593 
594 static pnode_t
595 drmach_node_obp_get_dnode(drmach_node_t *np)
596 {
597 	return ((pnode_t)(uintptr_t)np->here);
598 }
599 
600 typedef struct {
601 	drmach_node_walk_args_t	*nwargs;
602 	int 			(*cb)(drmach_node_walk_args_t *args);
603 	int			err;
604 } drmach_node_ddi_walk_args_t;
605 
606 int
607 drmach_node_ddi_walk_cb(dev_info_t *dip, void *arg)
608 {
609 	drmach_node_ddi_walk_args_t	*nargs;
610 
611 	nargs = (drmach_node_ddi_walk_args_t *)arg;
612 
613 	/*
614 	 * dip doesn't have to be held here as we are called
615 	 * from ddi_walk_devs() which holds the dip.
616 	 */
617 	nargs->nwargs->node->here = (void *)dip;
618 
619 	nargs->err = nargs->cb(nargs->nwargs);
620 
621 	/*
622 	 * Set "here" to NULL so that unheld dip is not accessible
623 	 * outside ddi_walk_devs()
624 	 */
625 	nargs->nwargs->node->here = NULL;
626 
627 	if (nargs->err)
628 		return (DDI_WALK_TERMINATE);
629 	else
630 		return (DDI_WALK_CONTINUE);
631 }
632 
633 static int
634 drmach_node_ddi_walk(drmach_node_t *np, void *data,
635 		int (*cb)(drmach_node_walk_args_t *args))
636 {
637 	drmach_node_walk_args_t		args;
638 	drmach_node_ddi_walk_args_t	nargs;
639 
640 	/* initialized args structure for callback */
641 	args.node = np;
642 	args.data = data;
643 
644 	nargs.nwargs = &args;
645 	nargs.cb = cb;
646 	nargs.err = 0;
647 
648 	/*
649 	 * Root node doesn't have to be held in any way.
650 	 */
651 	ddi_walk_devs(ddi_root_node(), drmach_node_ddi_walk_cb,
652 		(void *)&nargs);
653 
654 	return (nargs.err);
655 }
656 
657 static int
658 drmach_node_obp_walk(drmach_node_t *np, void *data,
659 		int (*cb)(drmach_node_walk_args_t *args))
660 {
661 	pnode_t			nodeid;
662 	int			rv;
663 	drmach_node_walk_args_t	args;
664 
665 	/* initialized args structure for callback */
666 	args.node = np;
667 	args.data = data;
668 
669 	nodeid = prom_childnode(prom_rootnode());
670 
671 	/* save our new position within the tree */
672 	np->here = (void *)(uintptr_t)nodeid;
673 
674 	rv = 0;
675 	while (nodeid != OBP_NONODE) {
676 
677 		pnode_t child;
678 
679 		rv = (*cb)(&args);
680 		if (rv)
681 			break;
682 
683 		child = prom_childnode(nodeid);
684 		np->here = (void *)(uintptr_t)child;
685 
686 		while (child != OBP_NONODE) {
687 			rv = (*cb)(&args);
688 			if (rv)
689 				break;
690 
691 			child = prom_nextnode(child);
692 			np->here = (void *)(uintptr_t)child;
693 		}
694 
695 		nodeid = prom_nextnode(nodeid);
696 
697 		/* save our new position within the tree */
698 		np->here = (void *)(uintptr_t)nodeid;
699 	}
700 
701 	return (rv);
702 }
703 
704 static int
705 drmach_node_ddi_get_parent(drmach_node_t *np, drmach_node_t *pp)
706 {
707 	dev_info_t	*ndip;
708 	static char	*fn = "drmach_node_ddi_get_parent";
709 
710 	ndip = np->n_getdip(np);
711 	if (ndip == NULL) {
712 		cmn_err(CE_WARN, "%s: NULL dip", fn);
713 		return (-1);
714 	}
715 
716 	bcopy(np, pp, sizeof (drmach_node_t));
717 
718 	pp->here = (void *)ddi_get_parent(ndip);
719 	if (pp->here == NULL) {
720 		cmn_err(CE_WARN, "%s: NULL parent dip", fn);
721 		return (-1);
722 	}
723 
724 	return (0);
725 }
726 
727 /*ARGSUSED*/
728 static pnode_t
729 drmach_node_ddi_get_dnode(drmach_node_t *np)
730 {
731 	return ((pnode_t)NULL);
732 }
733 
734 static drmach_node_t *
735 drmach_node_new(void)
736 {
737 	drmach_node_t *np;
738 
739 	np = kmem_zalloc(sizeof (drmach_node_t), KM_SLEEP);
740 
741 	if (drmach_initialized) {
742 		np->get_dnode = drmach_node_ddi_get_dnode;
743 		np->walk = drmach_node_ddi_walk;
744 		np->n_getdip = drmach_node_ddi_get_dip;
745 		np->n_getproplen = drmach_node_ddi_get_proplen;
746 		np->n_getprop = drmach_node_ddi_get_prop;
747 		np->get_parent = drmach_node_ddi_get_parent;
748 	} else {
749 		np->get_dnode = drmach_node_obp_get_dnode;
750 		np->walk = drmach_node_obp_walk;
751 		np->n_getdip = drmach_node_obp_get_dip;
752 		np->n_getproplen = drmach_node_obp_get_proplen;
753 		np->n_getprop = drmach_node_obp_get_prop;
754 		np->get_parent = drmach_node_obp_get_parent;
755 	}
756 
757 	return (np);
758 }
759 
760 static void
761 drmach_node_dispose(drmach_node_t *np)
762 {
763 	kmem_free(np, sizeof (*np));
764 }
765 
766 /*
767  * Check if a CPU node is part of a CMP.
768  */
769 static int
770 drmach_is_cmp_child(dev_info_t *dip)
771 {
772 	dev_info_t *pdip;
773 
774 	if (strcmp(ddi_node_name(dip), DRMACH_CPU_NAMEPROP) != 0) {
775 		return (0);
776 	}
777 
778 	pdip = ddi_get_parent(dip);
779 
780 	ASSERT(pdip);
781 
782 	if (strcmp(ddi_node_name(pdip), DRMACH_CMP_NAMEPROP) == 0) {
783 		return (1);
784 	}
785 
786 	return (0);
787 }
788 
789 static dev_info_t *
790 drmach_node_obp_get_dip(drmach_node_t *np)
791 {
792 	pnode_t		nodeid;
793 	dev_info_t	*dip;
794 
795 	nodeid = np->get_dnode(np);
796 	if (nodeid == OBP_NONODE)
797 		return (NULL);
798 
799 	dip = e_ddi_nodeid_to_dip(nodeid);
800 	if (dip) {
801 		/*
802 		 * The branch rooted at dip will have been previously
803 		 * held, or it will be the child of a CMP. In either
804 		 * case, the hold acquired in e_ddi_nodeid_to_dip()
805 		 * is not needed.
806 		 */
807 		ddi_release_devi(dip);
808 		ASSERT(drmach_is_cmp_child(dip) || e_ddi_branch_held(dip));
809 	}
810 
811 	return (dip);
812 }
813 
814 static dev_info_t *
815 drmach_node_ddi_get_dip(drmach_node_t *np)
816 {
817 	return ((dev_info_t *)np->here);
818 }
819 
820 static int
821 drmach_node_walk(drmach_node_t *np, void *param,
822 		int (*cb)(drmach_node_walk_args_t *args))
823 {
824 	return (np->walk(np, param, cb));
825 }
826 
827 static int
828 drmach_node_ddi_get_prop(drmach_node_t *np, char *name, void *buf, int len)
829 {
830 	int		rv = 0;
831 	dev_info_t	*ndip;
832 	static char	*fn = "drmach_node_ddi_get_prop";
833 
834 	ndip = np->n_getdip(np);
835 	if (ndip == NULL) {
836 		cmn_err(CE_WARN, "%s: NULL dip", fn);
837 		rv = -1;
838 	} else if (ddi_getlongprop_buf(DDI_DEV_T_ANY, ndip,
839 	    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, name,
840 	    (caddr_t)buf, &len) != DDI_PROP_SUCCESS) {
841 		rv = -1;
842 	}
843 
844 	return (rv);
845 }
846 
847 /* ARGSUSED */
848 static int
849 drmach_node_obp_get_prop(drmach_node_t *np, char *name, void *buf, int len)
850 {
851 	int		rv = 0;
852 	pnode_t		nodeid;
853 	static char	*fn = "drmach_node_obp_get_prop";
854 
855 	nodeid = np->get_dnode(np);
856 	if (nodeid == OBP_NONODE) {
857 		cmn_err(CE_WARN, "%s: invalid dnode", fn);
858 		rv = -1;
859 	} else if (prom_getproplen(nodeid, (caddr_t)name) < 0) {
860 		rv = -1;
861 	} else {
862 		(void) prom_getprop(nodeid, (caddr_t)name, (caddr_t)buf);
863 	}
864 
865 	return (rv);
866 }
867 
868 static int
869 drmach_node_ddi_get_proplen(drmach_node_t *np, char *name, int *len)
870 {
871 	int		rv = 0;
872 	dev_info_t	*ndip;
873 
874 	ndip = np->n_getdip(np);
875 	if (ndip == NULL) {
876 		rv = -1;
877 	} else if (ddi_getproplen(DDI_DEV_T_ANY, ndip, DDI_PROP_DONTPASS,
878 			name, len) != DDI_PROP_SUCCESS) {
879 		rv = -1;
880 	}
881 
882 	return (rv);
883 }
884 
885 static int
886 drmach_node_obp_get_proplen(drmach_node_t *np, char *name, int *len)
887 {
888 	pnode_t	 nodeid;
889 	int	 rv;
890 
891 	nodeid = np->get_dnode(np);
892 	if (nodeid == OBP_NONODE)
893 		rv = -1;
894 	else {
895 		*len = prom_getproplen(nodeid, (caddr_t)name);
896 		rv = (*len < 0 ? -1 : 0);
897 	}
898 
899 	return (rv);
900 }
901 
902 static drmachid_t
903 drmach_node_dup(drmach_node_t *np)
904 {
905 	drmach_node_t *dup;
906 
907 	dup = drmach_node_new();
908 	dup->here = np->here;
909 	dup->get_dnode = np->get_dnode;
910 	dup->walk = np->walk;
911 	dup->n_getdip = np->n_getdip;
912 	dup->n_getproplen = np->n_getproplen;
913 	dup->n_getprop = np->n_getprop;
914 	dup->get_parent = np->get_parent;
915 
916 	return (dup);
917 }
918 
919 /*
920  * drmach_array provides convenient array construction, access,
921  * bounds checking and array destruction logic.
922  */
923 
924 static drmach_array_t *
925 drmach_array_new(int min_index, int max_index)
926 {
927 	drmach_array_t *arr;
928 
929 	arr = kmem_zalloc(sizeof (drmach_array_t), KM_SLEEP);
930 
931 	arr->arr_sz = (max_index - min_index + 1) * sizeof (void *);
932 	if (arr->arr_sz > 0) {
933 		arr->min_index = min_index;
934 		arr->max_index = max_index;
935 
936 		arr->arr = kmem_zalloc(arr->arr_sz, KM_SLEEP);
937 		return (arr);
938 	} else {
939 		kmem_free(arr, sizeof (*arr));
940 		return (0);
941 	}
942 }
943 
944 static int
945 drmach_array_set(drmach_array_t *arr, int idx, drmachid_t val)
946 {
947 	if (idx < arr->min_index || idx > arr->max_index)
948 		return (-1);
949 	else {
950 		arr->arr[idx - arr->min_index] = val;
951 		return (0);
952 	}
953 	/*NOTREACHED*/
954 }
955 
956 static int
957 drmach_array_get(drmach_array_t *arr, int idx, drmachid_t *val)
958 {
959 	if (idx < arr->min_index || idx > arr->max_index)
960 		return (-1);
961 	else {
962 		*val = arr->arr[idx - arr->min_index];
963 		return (0);
964 	}
965 	/*NOTREACHED*/
966 }
967 
968 static int
969 drmach_array_first(drmach_array_t *arr, int *idx, drmachid_t *val)
970 {
971 	int rv;
972 
973 	*idx = arr->min_index;
974 	while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
975 		*idx += 1;
976 
977 	return (rv);
978 }
979 
980 static int
981 drmach_array_next(drmach_array_t *arr, int *idx, drmachid_t *val)
982 {
983 	int rv;
984 
985 	*idx += 1;
986 	while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
987 		*idx += 1;
988 
989 	return (rv);
990 }
991 
992 static void
993 drmach_array_dispose(drmach_array_t *arr, void (*disposer)(drmachid_t))
994 {
995 	drmachid_t	val;
996 	int		idx;
997 	int		rv;
998 
999 	rv = drmach_array_first(arr, &idx, &val);
1000 	while (rv == 0) {
1001 		(*disposer)(val);
1002 
1003 		/* clear the array entry */
1004 		rv = drmach_array_set(arr, idx, NULL);
1005 		ASSERT(rv == 0);
1006 
1007 		rv = drmach_array_next(arr, &idx, &val);
1008 	}
1009 
1010 	kmem_free(arr->arr, arr->arr_sz);
1011 	kmem_free(arr, sizeof (*arr));
1012 }
1013 
1014 
1015 static gdcd_t *
1016 drmach_gdcd_new()
1017 {
1018 	gdcd_t *gdcd;
1019 
1020 	gdcd = kmem_zalloc(sizeof (gdcd_t), KM_SLEEP);
1021 
1022 	/* read the gdcd, bail if magic or ver #s are not what is expected */
1023 	if (iosram_rd(GDCD_MAGIC, 0, sizeof (gdcd_t), (caddr_t)gdcd)) {
1024 bail:
1025 		kmem_free(gdcd, sizeof (gdcd_t));
1026 		return (NULL);
1027 	} else if (gdcd->h.dcd_magic != GDCD_MAGIC) {
1028 		goto bail;
1029 	} else if (gdcd->h.dcd_version != DCD_VERSION) {
1030 		goto bail;
1031 	}
1032 
1033 	return (gdcd);
1034 }
1035 
1036 static void
1037 drmach_gdcd_dispose(gdcd_t *gdcd)
1038 {
1039 	kmem_free(gdcd, sizeof (gdcd_t));
1040 }
1041 
1042 /*ARGSUSED*/
1043 sbd_error_t *
1044 drmach_configure(drmachid_t id, int flags)
1045 {
1046 	drmach_device_t	*dp;
1047 	dev_info_t	*rdip;
1048 	sbd_error_t	*err = NULL;
1049 
1050 	/*
1051 	 * On Starcat, there is no CPU driver, so it is
1052 	 * not necessary to configure any CPU nodes.
1053 	 */
1054 	if (DRMACH_IS_CPU_ID(id)) {
1055 		return (NULL);
1056 	}
1057 
1058 	for (; id; ) {
1059 		dev_info_t	*fdip = NULL;
1060 
1061 		if (!DRMACH_IS_DEVICE_ID(id))
1062 			return (drerr_new(0, ESTC_INAPPROP, NULL));
1063 		dp = id;
1064 
1065 		rdip = dp->node->n_getdip(dp->node);
1066 
1067 		/*
1068 		 * We held this branch earlier, so at a minimum its
1069 		 * root should still be present in the device tree.
1070 		 */
1071 		ASSERT(rdip);
1072 
1073 		DRMACH_PR("drmach_configure: configuring DDI branch");
1074 
1075 		ASSERT(e_ddi_branch_held(rdip));
1076 		if (e_ddi_branch_configure(rdip, &fdip, 0) != 0) {
1077 			if (err == NULL) {
1078 				/*
1079 				 * Record first failure but don't stop
1080 				 */
1081 				char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1082 				dev_info_t *dip = (fdip != NULL) ? fdip : rdip;
1083 
1084 				(void) ddi_pathname(dip, path);
1085 				err = drerr_new(1, ESTC_DRVFAIL, path);
1086 
1087 				kmem_free(path, MAXPATHLEN);
1088 			}
1089 
1090 			/*
1091 			 * If non-NULL, fdip is returned held and must be
1092 			 * released.
1093 			 */
1094 			if (fdip != NULL) {
1095 				ddi_release_devi(fdip);
1096 			}
1097 		}
1098 
1099 		if (DRMACH_IS_MEM_ID(id)) {
1100 			drmach_mem_t	*mp = id;
1101 			id = mp->next;
1102 		} else {
1103 			id = NULL;
1104 		}
1105 	}
1106 
1107 	return (err);
1108 }
1109 
1110 static sbd_error_t *
1111 drmach_device_new(drmach_node_t *node,
1112 	drmach_board_t *bp, int portid, drmachid_t *idp)
1113 {
1114 	int		i, rv, device_id, unum;
1115 	char		name[OBP_MAXDRVNAME];
1116 	drmach_device_t	proto;
1117 
1118 	rv = node->n_getprop(node, "name", name, OBP_MAXDRVNAME);
1119 	if (rv) {
1120 		sbd_error_t *err;
1121 
1122 		/* every node is expected to have a name */
1123 		err = drerr_new(1, ESTC_GETPROP,
1124 			"dip: 0x%p: property %s",
1125 			node->n_getdip(node), OBP_NAME);
1126 
1127 		return (err);
1128 	}
1129 
1130 	i = drmach_name2type_idx(name);
1131 
1132 	if (i < 0 || strcmp(name, "cmp") == 0) {
1133 		/*
1134 		 * Not a node of interest to dr - including "cmp",
1135 		 * but it is in drmach_name2type[], which lets gptwocfg
1136 		 * driver to check if node is OBP created.
1137 		 */
1138 		*idp = (drmachid_t)0;
1139 		return (NULL);
1140 	}
1141 
1142 	/*
1143 	 * Derive a best-guess unit number from the portid value.
1144 	 * Some drmach_*_new constructors (drmach_pci_new, for example)
1145 	 * will overwrite the prototype unum value with one that is more
1146 	 * appropriate for the device.
1147 	 */
1148 	device_id = portid & 0x1f;
1149 	if (device_id < 4)
1150 		unum = device_id;
1151 	else if (device_id == 8) {
1152 		unum = 0;
1153 	} else if (device_id == 9) {
1154 		unum = 1;
1155 	} else if (device_id == 0x1c) {
1156 		unum = 0;
1157 	} else if (device_id == 0x1d) {
1158 		unum = 1;
1159 	} else {
1160 		return (DRMACH_INTERNAL_ERROR());
1161 	}
1162 
1163 	bzero(&proto, sizeof (proto));
1164 	proto.type = drmach_name2type[i].type;
1165 	proto.bp = bp;
1166 	proto.node = node;
1167 	proto.portid = portid;
1168 	proto.unum = unum;
1169 
1170 	return (drmach_name2type[i].new(&proto, idp));
1171 }
1172 
1173 static void
1174 drmach_device_dispose(drmachid_t id)
1175 {
1176 	drmach_device_t *self = id;
1177 
1178 	self->cm.dispose(id);
1179 }
1180 
1181 static drmach_board_t *
1182 drmach_board_new(int bnum)
1183 {
1184 	static sbd_error_t *drmach_board_release(drmachid_t);
1185 	static sbd_error_t *drmach_board_status(drmachid_t, drmach_status_t *);
1186 
1187 	drmach_board_t	*bp;
1188 
1189 	bp = kmem_zalloc(sizeof (drmach_board_t), KM_SLEEP);
1190 
1191 	bp->cm.isa = (void *)drmach_board_new;
1192 	bp->cm.release = drmach_board_release;
1193 	bp->cm.status = drmach_board_status;
1194 
1195 	(void) drmach_board_name(bnum, bp->cm.name, sizeof (bp->cm.name));
1196 
1197 	bp->bnum = bnum;
1198 	bp->devices = NULL;
1199 	bp->tree = drmach_node_new();
1200 
1201 	drmach_array_set(drmach_boards, bnum, bp);
1202 	return (bp);
1203 }
1204 
1205 static void
1206 drmach_board_dispose(drmachid_t id)
1207 {
1208 	drmach_board_t *bp;
1209 
1210 	ASSERT(DRMACH_IS_BOARD_ID(id));
1211 	bp = id;
1212 
1213 	if (bp->tree)
1214 		drmach_node_dispose(bp->tree);
1215 
1216 	if (bp->devices)
1217 		drmach_array_dispose(bp->devices, drmach_device_dispose);
1218 
1219 	kmem_free(bp, sizeof (*bp));
1220 }
1221 
1222 static sbd_error_t *
1223 drmach_board_status(drmachid_t id, drmach_status_t *stat)
1224 {
1225 	sbd_error_t	*err = NULL;
1226 	drmach_board_t	*bp;
1227 	caddr_t		obufp;
1228 	dr_showboard_t	shb;
1229 
1230 	if (!DRMACH_IS_BOARD_ID(id))
1231 		return (drerr_new(0, ESTC_INAPPROP, NULL));
1232 
1233 	bp = id;
1234 
1235 	/*
1236 	 * we need to know if the board's connected before
1237 	 * issuing a showboard message.  If it's connected, we just
1238 	 * reply with status composed of cached info
1239 	 */
1240 
1241 	if (!bp->connected) {
1242 		obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
1243 		err = drmach_mbox_trans(DRMSG_SHOWBOARD, bp->bnum, obufp,
1244 			sizeof (dr_proto_hdr_t), (caddr_t)&shb,
1245 			sizeof (dr_showboard_t));
1246 
1247 		kmem_free(obufp, sizeof (dr_proto_hdr_t));
1248 		if (err)
1249 			return (err);
1250 
1251 		bp->connected = (shb.bd_assigned && shb.bd_active);
1252 		strncpy(bp->type, shb.board_type, sizeof (bp->type));
1253 		stat->assigned = bp->assigned = shb.bd_assigned;
1254 		stat->powered = bp->powered = shb.power_on;
1255 		stat->empty = bp->empty = shb.slot_empty;
1256 
1257 		switch (shb.test_status) {
1258 			case DR_TEST_STATUS_UNKNOWN:
1259 			case DR_TEST_STATUS_IPOST:
1260 			case DR_TEST_STATUS_ABORTED:
1261 				stat->cond = bp->cond = SBD_COND_UNKNOWN;
1262 				break;
1263 			case DR_TEST_STATUS_PASSED:
1264 				stat->cond = bp->cond = SBD_COND_OK;
1265 				break;
1266 			case DR_TEST_STATUS_FAILED:
1267 				stat->cond = bp->cond = SBD_COND_FAILED;
1268 				break;
1269 			default:
1270 				stat->cond = bp->cond = SBD_COND_UNKNOWN;
1271 				DRMACH_PR("Unknown test status=0x%x from SC\n",
1272 					shb.test_status);
1273 				break;
1274 
1275 		}
1276 
1277 		strncpy(stat->type, shb.board_type, sizeof (stat->type));
1278 		snprintf(stat->info, sizeof (stat->info), "Test Level=%d",
1279 			shb.test_level);
1280 	} else {
1281 		stat->assigned = bp->assigned;
1282 		stat->powered = bp->powered;
1283 		stat->empty = bp->empty;
1284 		stat->cond = bp->cond;
1285 		strncpy(stat->type, bp->type, sizeof (stat->type));
1286 	}
1287 
1288 	stat->busy = 0;			/* assume not busy */
1289 	stat->configured = 0;		/* assume not configured */
1290 	if (bp->devices) {
1291 		int		 rv;
1292 		int		 d_idx;
1293 		drmachid_t	 d_id;
1294 
1295 		rv = drmach_array_first(bp->devices, &d_idx, &d_id);
1296 		while (rv == 0) {
1297 			drmach_status_t	d_stat;
1298 
1299 			err = drmach_i_status(d_id, &d_stat);
1300 			if (err)
1301 				break;
1302 
1303 			stat->busy |= d_stat.busy;
1304 			stat->configured |= d_stat.configured;
1305 
1306 			rv = drmach_array_next(bp->devices, &d_idx, &d_id);
1307 		}
1308 	}
1309 
1310 	return (err);
1311 }
1312 
1313 typedef struct drmach_msglist {
1314 	kcondvar_t		s_cv; 		/* condvar for sending msg */
1315 	kmutex_t		s_lock;		/* mutex for sending */
1316 	kcondvar_t		g_cv;		/* condvar for getting reply */
1317 	kmutex_t		g_lock;		/* mutex for getting reply */
1318 	struct drmach_msglist	*prev;		/* link to previous entry */
1319 	struct drmach_msglist	*next;		/* link to next entry */
1320 	struct drmach_msglist	*link;		/* link to related entry */
1321 	caddr_t			o_buf;		/* address of output buffer */
1322 	caddr_t			i_buf; 		/* address of input buffer */
1323 	uint32_t		o_buflen;	/* output buffer length */
1324 	uint32_t		i_buflen;	/* input buffer length */
1325 	uint32_t		msgid;		/* message identifier */
1326 	int			o_nretry;	/* number of sending retries */
1327 	int			f_error;	/* mailbox framework error */
1328 	uint8_t			e_code;		/* error code returned by SC */
1329 	uint8_t			p_flag	:1,	/* successfully putmsg */
1330 				m_reply	:1,	/* msg reply received */
1331 				unused	:6;
1332 } drmach_msglist_t;
1333 
1334 kmutex_t		drmach_g_mbox_mutex;	/* mutex for mailbox globals */
1335 kmutex_t		drmach_ri_mbox_mutex;	/* mutex for mailbox reinit */
1336 kmutex_t		drmach_msglist_mutex;	/* mutex for message list */
1337 drmach_msglist_t	*drmach_msglist_first;	/* first entry in msg list */
1338 drmach_msglist_t	*drmach_msglist_last;	/* last entry in msg list */
1339 uint32_t		drmach_msgid;		/* current message id */
1340 kthread_t		*drmach_getmsg_thread;	/* ptr to getmsg thread */
1341 volatile int		drmach_getmsg_thread_run; /* run flag for getmsg thr */
1342 kmutex_t		drmach_sendmsg_mutex;	/* mutex for sendmsg cv */
1343 kcondvar_t		drmach_sendmsg_cv;	/* signaled to send new msg */
1344 kthread_t		*drmach_sendmsg_thread; /* ptr to sendmsg thread */
1345 volatile int		drmach_sendmsg_thread_run; /* run flag for sendmsg */
1346 int			drmach_mbox_istate;	/* mailbox init state */
1347 int			drmach_mbox_iflag;	/* set if init'd with SC */
1348 int			drmach_mbox_ipending;	/* set if reinit scheduled */
1349 
1350 /*
1351  * Timeout values (in seconds) used when waiting for replies (from the SC) to
1352  * requests that we sent.  Since we only receive boardevent messages, and they
1353  * are events rather than replies, there is no boardevent timeout.
1354  */
1355 int	drmach_to_mbxinit	= 60;		/* 1 minute */
1356 int	drmach_to_assign	= 60;		/* 1 minute */
1357 int	drmach_to_unassign	= 60;		/* 1 minute */
1358 int	drmach_to_claim		= 3600;		/* 1 hour */
1359 int	drmach_to_unclaim	= 3600;		/* 1 hour */
1360 int	drmach_to_poweron	= 480;		/* 8 minutes */
1361 int	drmach_to_poweroff	= 480;		/* 8 minutes */
1362 int	drmach_to_testboard	= 43200;	/* 12 hours */
1363 int	drmach_to_aborttest	= 180;		/* 3 minutes */
1364 int	drmach_to_showboard	= 180;		/* 3 minutes */
1365 int	drmach_to_unconfig	= 180;		/* 3 minutes */
1366 
1367 /*
1368  * Delay (in seconds) used after receiving a non-transient error indication from
1369  * an mboxsc_getmsg call in the thread that loops waiting for incoming messages.
1370  */
1371 int	drmach_mbxerr_delay	= 15;		/* 15 seconds */
1372 
1373 /*
1374  * Timeout values (in milliseconds) for mboxsc_putmsg and mboxsc_getmsg calls.
1375  */
1376 clock_t	drmach_to_putmsg;			/* set in drmach_mbox_init */
1377 clock_t	drmach_to_getmsg	= 31000;	/* 31 seconds */
1378 
1379 /*
1380  * Normally, drmach_to_putmsg is set dynamically during initialization in
1381  * drmach_mbox_init.  This has the potentially undesirable side effect of
1382  * clobbering any value that might have been set in /etc/system.  To prevent
1383  * dynamic setting of drmach_to_putmsg (thereby allowing it to be tuned in
1384  * /etc/system), set drmach_use_tuned_putmsg_to to 1.
1385  */
1386 int	drmach_use_tuned_putmsg_to	= 0;
1387 
1388 
1389 /* maximum conceivable message size for future mailbox protocol versions */
1390 #define	DRMACH_MAX_MBOX_MSG_SIZE	4096
1391 
1392 /*ARGSUSED*/
1393 void
1394 drmach_mbox_prmsg(dr_mbox_msg_t *mbp, int dir)
1395 {
1396 	int		i, j;
1397 	dr_memregs_t	*memregs;
1398 	dr_proto_hdr_t	*php = &mbp->p_hdr;
1399 	dr_msg_t	*mp = &mbp->msgdata;
1400 
1401 #ifdef DEBUG
1402 	switch (php->command) {
1403 		case DRMSG_BOARDEVENT:
1404 			if (dir) {
1405 				DRMACH_PR("ERROR!! outgoing BOARDEVENT\n");
1406 			} else {
1407 				DRMACH_PR("BOARDEVENT received:\n");
1408 				DRMACH_PR("init=%d ins=%d rem=%d asgn=%d\n",
1409 					mp->dm_be.initialized,
1410 					mp->dm_be.board_insertion,
1411 					mp->dm_be.board_removal,
1412 					mp->dm_be.slot_assign);
1413 				DRMACH_PR("unasgn=%d avail=%d unavail=%d\n",
1414 					mp->dm_be.slot_unassign,
1415 					mp->dm_be.slot_avail,
1416 					mp->dm_be.slot_unavail);
1417 			}
1418 			break;
1419 		case DRMSG_MBOX_INIT:
1420 			if (dir) {
1421 				DRMACH_PR("MBOX_INIT Request:\n");
1422 			} else {
1423 				DRMACH_PR("MBOX_INIT Reply:\n");
1424 			}
1425 			break;
1426 		case DRMSG_ASSIGN:
1427 			if (dir) {
1428 				DRMACH_PR("ASSIGN Request:\n");
1429 			} else {
1430 				DRMACH_PR("ASSIGN Reply:\n");
1431 			}
1432 			break;
1433 		case DRMSG_UNASSIGN:
1434 			if (dir) {
1435 				DRMACH_PR("UNASSIGN Request:\n");
1436 			} else {
1437 				DRMACH_PR("UNASSIGN Reply:\n");
1438 			}
1439 			break;
1440 		case DRMSG_CLAIM:
1441 			if (!dir) {
1442 				DRMACH_PR("CLAIM Reply:\n");
1443 				break;
1444 			}
1445 
1446 			DRMACH_PR("CLAIM Request:\n");
1447 			for (i = 0; i < 18; ++i) {
1448 				DRMACH_PR("exp%d: val=%d slice=0x%x\n", i,
1449 					mp->dm_cr.mem_slice[i].valid,
1450 					mp->dm_cr.mem_slice[i].slice);
1451 				memregs = &(mp->dm_cr.mem_regs[i]);
1452 				for (j = 0; j < S0_LPORT_COUNT; j++) {
1453 					DRMACH_PR("  MC %2d: "
1454 						"MADR[%d] = 0x%lx, "
1455 						"MADR[%d] = 0x%lx\n", j,
1456 						0, DRMACH_MCREG_TO_U64(
1457 						memregs->madr[j][0]),
1458 						1, DRMACH_MCREG_TO_U64(
1459 						memregs->madr[j][1]));
1460 					DRMACH_PR("       : "
1461 						"MADR[%d] = 0x%lx, "
1462 						"MADR[%d] = 0x%lx\n",
1463 						2, DRMACH_MCREG_TO_U64(
1464 						memregs->madr[j][2]),
1465 						3, DRMACH_MCREG_TO_U64(
1466 						memregs->madr[j][3]));
1467 				}
1468 			}
1469 			break;
1470 		case DRMSG_UNCLAIM:
1471 			if (!dir) {
1472 				DRMACH_PR("UNCLAIM Reply:\n");
1473 				break;
1474 			}
1475 
1476 			DRMACH_PR("UNCLAIM Request:\n");
1477 			for (i = 0; i < 18; ++i) {
1478 				DRMACH_PR("exp%d: val=%d slice=0x%x\n", i,
1479 					mp->dm_ur.mem_slice[i].valid,
1480 					mp->dm_ur.mem_slice[i].slice);
1481 				memregs = &(mp->dm_ur.mem_regs[i]);
1482 				for (j = 0; j < S0_LPORT_COUNT; j++) {
1483 					DRMACH_PR("  MC %2d: "
1484 						"MADR[%d] = 0x%lx, "
1485 						"MADR[%d] = 0x%lx\n", j,
1486 						0, DRMACH_MCREG_TO_U64(
1487 						memregs->madr[j][0]),
1488 						1, DRMACH_MCREG_TO_U64(
1489 						memregs->madr[j][1]));
1490 					DRMACH_PR("       : "
1491 						"MADR[%d] = 0x%lx, "
1492 						"MADR[%d] = 0x%lx\n",
1493 						2, DRMACH_MCREG_TO_U64(
1494 						memregs->madr[j][2]),
1495 						3, DRMACH_MCREG_TO_U64(
1496 						memregs->madr[j][3]));
1497 				}
1498 			}
1499 			DRMACH_PR(" mem_clear=%d\n", mp->dm_ur.mem_clear);
1500 			break;
1501 		case DRMSG_UNCONFIG:
1502 			if (!dir) {
1503 				DRMACH_PR("UNCONFIG Reply:\n");
1504 				break;
1505 			}
1506 
1507 			DRMACH_PR("UNCONFIG Request:\n");
1508 			for (i = 0; i < 18; ++i) {
1509 				DRMACH_PR("exp%d: val=%d slice=0x%x\n", i,
1510 					mp->dm_uc.mem_slice[i].valid,
1511 					mp->dm_uc.mem_slice[i].slice);
1512 				memregs = &(mp->dm_uc.mem_regs[i]);
1513 				for (j = 0; j < S0_LPORT_COUNT; j++) {
1514 					DRMACH_PR("  MC %2d: "
1515 						"MADR[%d] = 0x%lx, "
1516 						"MADR[%d] = 0x%lx\n", j,
1517 						0, DRMACH_MCREG_TO_U64(
1518 						memregs->madr[j][0]),
1519 						1, DRMACH_MCREG_TO_U64(
1520 						memregs->madr[j][1]));
1521 					DRMACH_PR("       : "
1522 						"MADR[%d] = 0x%lx, "
1523 						"MADR[%d] = 0x%lx\n",
1524 						2, DRMACH_MCREG_TO_U64(
1525 						memregs->madr[j][2]),
1526 						3, DRMACH_MCREG_TO_U64(
1527 						memregs->madr[j][3]));
1528 				}
1529 			}
1530 			break;
1531 		case DRMSG_POWERON:
1532 			if (dir) {
1533 				DRMACH_PR("POWERON Request:\n");
1534 			} else {
1535 				DRMACH_PR("POWERON Reply:\n");
1536 			}
1537 			break;
1538 		case DRMSG_POWEROFF:
1539 			if (dir) {
1540 				DRMACH_PR("POWEROFF Request:\n");
1541 			} else {
1542 				DRMACH_PR("POWEROFF Reply:\n");
1543 			}
1544 			break;
1545 		case DRMSG_TESTBOARD:
1546 			if (dir) {
1547 				DRMACH_PR("TESTBOARD Request:\n");
1548 				DRMACH_PR("\tmemaddrhi=0x%x memaddrlo=0x%x ",
1549 					mp->dm_tb.memaddrhi,
1550 					mp->dm_tb.memaddrlo);
1551 				DRMACH_PR("memlen=0x%x cpu_portid=0x%x\n",
1552 					mp->dm_tb.memlen, mp->dm_tb.cpu_portid);
1553 				DRMACH_PR("\tforce=0x%x imm=0x%x\n",
1554 					mp->dm_tb.force, mp->dm_tb.immediate);
1555 			} else {
1556 				DRMACH_PR("TESTBOARD Reply:\n");
1557 				DRMACH_PR("\tmemaddrhi=0x%x memaddrlo=0x%x ",
1558 					mp->dm_tr.memaddrhi,
1559 					mp->dm_tr.memaddrlo);
1560 				DRMACH_PR("memlen=0x%x cpu_portid=0x%x\n",
1561 					mp->dm_tr.memlen, mp->dm_tr.cpu_portid);
1562 				DRMACH_PR("\trecovered=0x%x test status=0x%x\n",
1563 					mp->dm_tr.cpu_recovered,
1564 					mp->dm_tr.test_status);
1565 
1566 			}
1567 			break;
1568 		case DRMSG_ABORT_TEST:
1569 			if (dir) {
1570 				DRMACH_PR("ABORT_TEST Request:\n");
1571 			} else {
1572 				DRMACH_PR("ABORT_TEST Reply:\n");
1573 			}
1574 
1575 			DRMACH_PR("\tmemaddrhi=0x%x memaddrlo=0x%x ",
1576 					mp->dm_ta.memaddrhi,
1577 					mp->dm_ta.memaddrlo);
1578 			DRMACH_PR("memlen=0x%x cpu_portid=0x%x\n",
1579 					mp->dm_ta.memlen, mp->dm_ta.cpu_portid);
1580 			break;
1581 		case DRMSG_SHOWBOARD:
1582 			if (dir) {
1583 				DRMACH_PR("SHOWBOARD Request:\n");
1584 			} else {
1585 				DRMACH_PR("SHOWBOARD Reply:\n");
1586 
1587 				DRMACH_PR(": empty=%d power=%d assigned=%d",
1588 					mp->dm_sb.slot_empty,
1589 					mp->dm_sb.power_on,
1590 					mp->dm_sb.bd_assigned);
1591 				DRMACH_PR(": active=%d t_status=%d t_level=%d ",
1592 					mp->dm_sb.bd_active,
1593 					mp->dm_sb.test_status,
1594 					mp->dm_sb.test_level);
1595 				DRMACH_PR(": type=%s ", mp->dm_sb.board_type);
1596 			}
1597 			break;
1598 		default:
1599 			DRMACH_PR("Unknown message type\n");
1600 			break;
1601 	}
1602 
1603 	DRMACH_PR("dr hdr:\n\tid=0x%x vers=0x%x cmd=0x%x exp=0x%x slot=0x%x\n",
1604 		php->message_id, php->drproto_version, php->command,
1605 		php->expbrd, php->slot);
1606 #endif
1607 	DRMACH_PR("\treply_status=0x%x error_code=0x%x\n", php->reply_status,
1608 		php->error_code);
1609 
1610 }
1611 
1612 /*
1613  * Callback function passed to taskq_dispatch when a mailbox reinitialization
1614  * handshake needs to be scheduled.  The handshake can't be performed by the
1615  * thread that determines it is needed, in most cases, so this function is
1616  * dispatched on the system-wide taskq pool of threads.  Failure is reported but
1617  * otherwise ignored, since any situation that requires a mailbox initialization
1618  * handshake will continue to request the handshake until it succeeds.
1619  */
1620 static void
1621 drmach_mbox_reinit(void *unused)
1622 {
1623 	_NOTE(ARGUNUSED(unused))
1624 
1625 	caddr_t		obufp = NULL;
1626 	sbd_error_t	*serr = NULL;
1627 
1628 	DRMACH_PR("scheduled mailbox reinit running\n");
1629 
1630 	mutex_enter(&drmach_ri_mbox_mutex);
1631 	mutex_enter(&drmach_g_mbox_mutex);
1632 	if (drmach_mbox_iflag == 0) {
1633 		/* need to initialize the mailbox */
1634 		mutex_exit(&drmach_g_mbox_mutex);
1635 
1636 		cmn_err(CE_NOTE, "!reinitializing DR mailbox");
1637 		obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
1638 		serr = drmach_mbox_trans(DRMSG_MBOX_INIT, 0, obufp,
1639 			sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
1640 		kmem_free(obufp, sizeof (dr_proto_hdr_t));
1641 
1642 		if (serr) {
1643 			cmn_err(CE_WARN,
1644 				"mbox_init: MBOX_INIT failed ecode=0x%x",
1645 				serr->e_code);
1646 			sbd_err_clear(&serr);
1647 		}
1648 		mutex_enter(&drmach_g_mbox_mutex);
1649 		if (!serr) {
1650 			drmach_mbox_iflag = 1;
1651 		}
1652 	}
1653 	drmach_mbox_ipending = 0;
1654 	mutex_exit(&drmach_g_mbox_mutex);
1655 	mutex_exit(&drmach_ri_mbox_mutex);
1656 }
1657 
1658 /*
1659  * To ensure sufficient compatibility with future versions of the DR mailbox
1660  * protocol, we use a buffer that is large enough to receive the largest message
1661  * that could possibly be sent to us.  However, since that ends up being fairly
1662  * large, allocating it on the stack is a bad idea.  Fortunately, this function
1663  * does not need to be MT-safe since it is only invoked by the mailbox
1664  * framework, which will never invoke it multiple times concurrently.  Since
1665  * that is the case, we can use a static buffer.
1666  */
1667 void
1668 drmach_mbox_event(void)
1669 {
1670 	static uint8_t	buf[DRMACH_MAX_MBOX_MSG_SIZE];
1671 	dr_mbox_msg_t	*msg = (dr_mbox_msg_t *)buf;
1672 	int		err;
1673 	uint32_t	type = MBOXSC_MSG_EVENT;
1674 	uint32_t	command = DRMSG_BOARDEVENT;
1675 	uint64_t	transid = 0;
1676 	uint32_t	length = DRMACH_MAX_MBOX_MSG_SIZE;
1677 	char		*hint = "";
1678 	int		logsys = 0;
1679 
1680 	do {
1681 		err = mboxsc_getmsg(KEY_SCDR, &type, &command,
1682 			&transid, &length, (void *)msg, 0);
1683 	} while (err == EAGAIN);
1684 
1685 	/* don't try to interpret anything with the wrong version number */
1686 	if ((err == 0) && (msg->p_hdr.drproto_version != DRMBX_VERSION)) {
1687 		cmn_err(CE_WARN, "mailbox version mismatch 0x%x vs 0x%x",
1688 			msg->p_hdr.drproto_version, DRMBX_VERSION);
1689 		mutex_enter(&drmach_g_mbox_mutex);
1690 		drmach_mbox_iflag = 0;
1691 		/* schedule a reinit handshake if one isn't pending */
1692 		if (!drmach_mbox_ipending) {
1693 			if (taskq_dispatch(system_taskq, drmach_mbox_reinit,
1694 				NULL, TQ_NOSLEEP) != NULL) {
1695 				drmach_mbox_ipending = 1;
1696 			} else {
1697 				cmn_err(CE_WARN,
1698 					"failed to schedule mailbox reinit");
1699 			}
1700 		}
1701 		mutex_exit(&drmach_g_mbox_mutex);
1702 		return;
1703 	}
1704 
1705 	if ((err != 0) || (msg->p_hdr.reply_status != DRMSG_REPLY_OK)) {
1706 		cmn_err(CE_WARN,
1707 			"Unsolicited mboxsc_getmsg failed: err=0x%x code=0x%x",
1708 				err, msg->p_hdr.error_code);
1709 	} else {
1710 		dr_boardevent_t	*be;
1711 		be = (dr_boardevent_t *)&msg->msgdata;
1712 
1713 		/* check for initialization event */
1714 		if (be->initialized) {
1715 			mutex_enter(&drmach_g_mbox_mutex);
1716 			drmach_mbox_iflag = 0;
1717 			/* schedule a reinit handshake if one isn't pending */
1718 			if (!drmach_mbox_ipending) {
1719 				if (taskq_dispatch(system_taskq,
1720 					drmach_mbox_reinit, NULL, TQ_NOSLEEP)
1721 					!= NULL) {
1722 					drmach_mbox_ipending = 1;
1723 				} else {
1724 					cmn_err(CE_WARN,
1725 					"failed to schedule mailbox reinit");
1726 				}
1727 			}
1728 			mutex_exit(&drmach_g_mbox_mutex);
1729 			cmn_err(CE_NOTE, "!Mailbox Init event received");
1730 		}
1731 
1732 		/* anything else will be a log_sysevent call */
1733 
1734 		if (be->board_insertion) {
1735 			DRMACH_PR("Board Insertion event received");
1736 			hint = DR_HINT_INSERT;
1737 			logsys++;
1738 	}
1739 		if (be->board_removal) {
1740 			DRMACH_PR("Board Removal event received");
1741 			hint = DR_HINT_REMOVE;
1742 			logsys++;
1743 		}
1744 		if (be->slot_assign) {
1745 			DRMACH_PR("Slot Assign event received");
1746 			logsys++;
1747 		}
1748 		if (be->slot_unassign) {
1749 			DRMACH_PR("Slot Unassign event received");
1750 			logsys++;
1751 		}
1752 		if (be->slot_avail) {
1753 			DRMACH_PR("Slot Available event received");
1754 			logsys++;
1755 		}
1756 		if (be->slot_unavail) {
1757 			DRMACH_PR("Slot Unavailable event received");
1758 			logsys++;
1759 		}
1760 		if (be->power_on) {
1761 			DRMACH_PR("Power ON event received");
1762 			logsys++;
1763 		}
1764 		if (be->power_off) {
1765 			DRMACH_PR("Power OFF event received");
1766 			logsys++;
1767 		}
1768 
1769 		if (logsys)
1770 			drmach_log_sysevent(
1771 				    DRMACH_EXPSLOT2BNUM(msg->p_hdr.expbrd,
1772 							msg->p_hdr.slot),
1773 				    hint, SE_NOSLEEP, 1);
1774 	}
1775 }
1776 
1777 static uint32_t
1778 drmach_get_msgid()
1779 {
1780 	uint32_t	rv;
1781 	mutex_enter(&drmach_msglist_mutex);
1782 	if (!(++drmach_msgid))
1783 		++drmach_msgid;
1784 	rv = drmach_msgid;
1785 	mutex_exit(&drmach_msglist_mutex);
1786 	return (rv);
1787 }
1788 
1789 /*
1790  *	unlink an entry from the message transaction list
1791  *
1792  *	caller must hold drmach_msglist_mutex
1793  */
1794 void
1795 drmach_msglist_unlink(drmach_msglist_t *entry)
1796 {
1797 	ASSERT(mutex_owned(&drmach_msglist_mutex));
1798 	if (entry->prev) {
1799 		entry->prev->next = entry->next;
1800 		if (entry->next)
1801 			entry->next->prev = entry->prev;
1802 	} else {
1803 		drmach_msglist_first = entry->next;
1804 		if (entry->next)
1805 			entry->next->prev = NULL;
1806 	}
1807 	if (entry == drmach_msglist_last) {
1808 		drmach_msglist_last = entry->prev;
1809 	}
1810 }
1811 
1812 void
1813 drmach_msglist_link(drmach_msglist_t *entry)
1814 {
1815 	mutex_enter(&drmach_msglist_mutex);
1816 	if (drmach_msglist_last) {
1817 		entry->prev = drmach_msglist_last;
1818 		drmach_msglist_last->next = entry;
1819 		drmach_msglist_last = entry;
1820 	} else {
1821 		drmach_msglist_last = drmach_msglist_first = entry;
1822 	}
1823 	mutex_exit(&drmach_msglist_mutex);
1824 }
1825 
1826 void
1827 drmach_mbox_getmsg()
1828 {
1829 	int			err;
1830 	register int		msgid;
1831 	static uint8_t		buf[DRMACH_MAX_MBOX_MSG_SIZE];
1832 	dr_mbox_msg_t		*msg = (dr_mbox_msg_t *)buf;
1833 	dr_proto_hdr_t		*php;
1834 	drmach_msglist_t	*found, *entry;
1835 	uint32_t		type = MBOXSC_MSG_REPLY;
1836 	uint32_t		command;
1837 	uint64_t		transid;
1838 	uint32_t		length;
1839 
1840 	php = &msg->p_hdr;
1841 
1842 	while (drmach_getmsg_thread_run != 0) {
1843 		/* get a reply message */
1844 		command = 0;
1845 		transid = 0;
1846 		length = DRMACH_MAX_MBOX_MSG_SIZE;
1847 		err = mboxsc_getmsg(KEY_SCDR, &type, &command,
1848 			&transid, &length, (void *)msg, drmach_to_getmsg);
1849 
1850 		if (err) {
1851 			/*
1852 			 * If mboxsc_getmsg returns ETIMEDOUT or EAGAIN, then
1853 			 * the "error" is really just a normal, transient
1854 			 * condition and we can retry the operation right away.
1855 			 * Any other error suggests a more serious problem,
1856 			 * ranging from a message being too big for our buffer
1857 			 * (EMSGSIZE) to total failure of the mailbox layer.
1858 			 * This second class of errors is much less "transient",
1859 			 * so rather than retrying over and over (and getting
1860 			 * the same error over and over) as fast as we can,
1861 			 * we'll sleep for a while before retrying.
1862 			 */
1863 			if ((err != ETIMEDOUT) && (err != EAGAIN)) {
1864 				cmn_err(CE_WARN,
1865 				"mboxsc_getmsg failed, err=0x%x", err);
1866 				delay(drmach_mbxerr_delay * hz);
1867 			}
1868 			continue;
1869 		}
1870 
1871 		drmach_mbox_prmsg(msg, 0);
1872 
1873 		if (php->drproto_version != DRMBX_VERSION) {
1874 			cmn_err(CE_WARN,
1875 				"mailbox version mismatch 0x%x vs 0x%x",
1876 				php->drproto_version, DRMBX_VERSION);
1877 
1878 			mutex_enter(&drmach_g_mbox_mutex);
1879 			drmach_mbox_iflag = 0;
1880 			/* schedule a reinit handshake if one isn't pending */
1881 			if (!drmach_mbox_ipending) {
1882 				if (taskq_dispatch(system_taskq,
1883 					drmach_mbox_reinit, NULL, TQ_NOSLEEP)
1884 					!= NULL) {
1885 					drmach_mbox_ipending = 1;
1886 				} else {
1887 					cmn_err(CE_WARN,
1888 					"failed to schedule mailbox reinit");
1889 				}
1890 			}
1891 			mutex_exit(&drmach_g_mbox_mutex);
1892 
1893 			continue;
1894 		}
1895 
1896 		msgid = php->message_id;
1897 		found = NULL;
1898 		mutex_enter(&drmach_msglist_mutex);
1899 		entry = drmach_msglist_first;
1900 		while (entry != NULL) {
1901 			if (entry->msgid == msgid) {
1902 				found = entry;
1903 				drmach_msglist_unlink(entry);
1904 				entry = NULL;
1905 			} else
1906 				entry = entry->next;
1907 		}
1908 
1909 		if (found) {
1910 			mutex_enter(&found->g_lock);
1911 
1912 			found->e_code = php->error_code;
1913 			if (found->i_buflen > 0)
1914 				bcopy((caddr_t)&msg->msgdata, found->i_buf,
1915 					found->i_buflen);
1916 			found->m_reply = 1;
1917 
1918 			cv_signal(&found->g_cv);
1919 			mutex_exit(&found->g_lock);
1920 		} else {
1921 			cmn_err(CE_WARN, "!mbox_getmsg: no match for id 0x%x",
1922 			    msgid);
1923 			cmn_err(CE_WARN, "!    cmd = 0x%x, exb = %d, slot = %d",
1924 			    php->command, php->expbrd, php->slot);
1925 		}
1926 
1927 		mutex_exit(&drmach_msglist_mutex);
1928 	}
1929 	cmn_err(CE_WARN, "mbox_getmsg: exiting");
1930 	mutex_enter(&drmach_msglist_mutex);
1931 	entry = drmach_msglist_first;
1932 	while (entry != NULL) {
1933 		if (entry->p_flag == 1) {
1934 			entry->f_error = -1;
1935 			mutex_enter(&entry->g_lock);
1936 			cv_signal(&entry->g_cv);
1937 			mutex_exit(&entry->g_lock);
1938 			drmach_msglist_unlink(entry);
1939 		}
1940 		entry = entry->next;
1941 	}
1942 	mutex_exit(&drmach_msglist_mutex);
1943 	drmach_getmsg_thread_run = -1;
1944 	thread_exit();
1945 }
1946 
1947 void
1948 drmach_mbox_sendmsg()
1949 {
1950 	int		err, retry;
1951 	drmach_msglist_t *entry;
1952 	dr_mbox_msg_t   *mp;
1953 	dr_proto_hdr_t  *php;
1954 
1955 	while (drmach_sendmsg_thread_run != 0) {
1956 		/*
1957 		 * Search through the list to find entries awaiting
1958 		 * transmission to the SC
1959 		 */
1960 		mutex_enter(&drmach_msglist_mutex);
1961 		entry = drmach_msglist_first;
1962 		retry = 0;
1963 		while (entry != NULL) {
1964 			if (entry->p_flag == 1) {
1965 				entry = entry->next;
1966 				continue;
1967 			}
1968 
1969 			mutex_exit(&drmach_msglist_mutex);
1970 
1971 			if (!retry)
1972 				mutex_enter(&entry->s_lock);
1973 			mp = (dr_mbox_msg_t *)entry->o_buf;
1974 			php = &mp->p_hdr;
1975 
1976 			drmach_mbox_prmsg(mp, 1);
1977 
1978 			err = mboxsc_putmsg(KEY_DRSC, MBOXSC_MSG_REQUEST,
1979 				php->command, NULL, entry->o_buflen, (void *)mp,
1980 				drmach_to_putmsg);
1981 
1982 			if (err) {
1983 				switch (err) {
1984 
1985 				case EAGAIN:
1986 				case EBUSY:
1987 					++retry;
1988 					mutex_enter(&drmach_msglist_mutex);
1989 					continue;
1990 
1991 				case ETIMEDOUT:
1992 					if (--entry->o_nretry <= 0) {
1993 						mutex_enter(
1994 							&drmach_msglist_mutex);
1995 						drmach_msglist_unlink(entry);
1996 						mutex_exit(
1997 							&drmach_msglist_mutex);
1998 						entry->f_error = err;
1999 						entry->p_flag = 1;
2000 						cv_signal(&entry->s_cv);
2001 					} else {
2002 						++retry;
2003 						mutex_enter(
2004 							&drmach_msglist_mutex);
2005 						continue;
2006 					}
2007 					break;
2008 				default:
2009 					mutex_enter(&drmach_msglist_mutex);
2010 					drmach_msglist_unlink(entry);
2011 					mutex_exit(&drmach_msglist_mutex);
2012 					entry->f_error = err;
2013 					entry->p_flag = 1;
2014 					cv_signal(&entry->s_cv);
2015 					break;
2016 				}
2017 			} else {
2018 				entry->p_flag = 1;
2019 				cv_signal(&entry->s_cv);
2020 			}
2021 
2022 			mutex_exit(&entry->s_lock);
2023 			retry = 0;
2024 			mutex_enter(&drmach_msglist_mutex);
2025 			entry = drmach_msglist_first;
2026 		}
2027 		mutex_exit(&drmach_msglist_mutex);
2028 
2029 		mutex_enter(&drmach_sendmsg_mutex);
2030 		(void) cv_timedwait(&drmach_sendmsg_cv,
2031 			&drmach_sendmsg_mutex, ddi_get_lbolt() + (5 * hz));
2032 		mutex_exit(&drmach_sendmsg_mutex);
2033 	}
2034 	cmn_err(CE_WARN, "mbox_sendmsg: exiting");
2035 	mutex_enter(&drmach_msglist_mutex);
2036 	entry = drmach_msglist_first;
2037 	while (entry != NULL) {
2038 		if (entry->p_flag == 0) {
2039 			entry->f_error = -1;
2040 			mutex_enter(&entry->s_lock);
2041 			cv_signal(&entry->s_cv);
2042 			mutex_exit(&entry->s_lock);
2043 			drmach_msglist_unlink(entry);
2044 		}
2045 		entry = entry->next;
2046 	}
2047 	mutex_exit(&drmach_msglist_mutex);
2048 	cv_destroy(&drmach_sendmsg_cv);
2049 	mutex_destroy(&drmach_sendmsg_mutex);
2050 
2051 	drmach_sendmsg_thread_run = -1;
2052 	thread_exit();
2053 
2054 }
2055 
2056 void
2057 drmach_msglist_destroy(drmach_msglist_t *listp)
2058 {
2059 	if (listp != NULL) {
2060 		drmach_msglist_t	*entry;
2061 
2062 		mutex_enter(&drmach_msglist_mutex);
2063 		entry = drmach_msglist_first;
2064 		while (entry) {
2065 			if (listp == entry) {
2066 				drmach_msglist_unlink(listp);
2067 				entry = NULL;
2068 			} else
2069 				entry = entry->next;
2070 		}
2071 
2072 		mutex_destroy(&listp->s_lock);
2073 		cv_destroy(&listp->s_cv);
2074 		mutex_destroy(&listp->g_lock);
2075 		cv_destroy(&listp->g_cv);
2076 		kmem_free(listp, sizeof (drmach_msglist_t));
2077 
2078 		mutex_exit(&drmach_msglist_mutex);
2079 	}
2080 }
2081 
2082 static drmach_msglist_t	*
2083 drmach_msglist_new(caddr_t ibufp, uint32_t ilen, dr_proto_hdr_t *hdrp,
2084 	uint32_t olen, int nrtry)
2085 {
2086 	drmach_msglist_t	*listp;
2087 
2088 	listp = kmem_zalloc(sizeof (drmach_msglist_t), KM_SLEEP);
2089 	mutex_init(&listp->s_lock, NULL, MUTEX_DRIVER, NULL);
2090 	cv_init(&listp->s_cv, NULL, CV_DRIVER, NULL);
2091 	mutex_init(&listp->g_lock, NULL, MUTEX_DRIVER, NULL);
2092 	cv_init(&listp->g_cv, NULL, CV_DRIVER, NULL);
2093 	listp->o_buf = (caddr_t)hdrp;
2094 	listp->o_buflen = olen;
2095 	listp->i_buf = ibufp;
2096 	listp->i_buflen = ilen;
2097 	listp->o_nretry = nrtry;
2098 	listp->msgid = hdrp->message_id;
2099 
2100 	return (listp);
2101 }
2102 
2103 static drmach_msglist_t *
2104 drmach_mbox_req_rply(dr_proto_hdr_t *hdrp, uint32_t olen, caddr_t ibufp,
2105 	uint32_t ilen, int timeout, int nrtry, int nosig,
2106 	drmach_msglist_t *link)
2107 {
2108 	int		crv;
2109 	drmach_msglist_t *listp;
2110 	clock_t		to_val;
2111 	dr_proto_hdr_t	*php;
2112 
2113 	/* setup transaction list entry */
2114 	listp = drmach_msglist_new(ibufp, ilen, hdrp, olen, nrtry);
2115 
2116 	/* send mailbox message, await reply */
2117 	mutex_enter(&listp->s_lock);
2118 	mutex_enter(&listp->g_lock);
2119 
2120 	listp->link = link;
2121 	drmach_msglist_link(listp);
2122 
2123 	mutex_enter(&drmach_sendmsg_mutex);
2124 	cv_signal(&drmach_sendmsg_cv);
2125 	mutex_exit(&drmach_sendmsg_mutex);
2126 
2127 	while (listp->p_flag == 0) {
2128 		cv_wait(&listp->s_cv, &listp->s_lock);
2129 	}
2130 
2131 	to_val =  ddi_get_lbolt() + (timeout * hz);
2132 
2133 	if (listp->f_error) {
2134 		listp->p_flag = 0;
2135 		cmn_err(CE_WARN, "!mboxsc_putmsg failed: 0x%x",
2136 			listp->f_error);
2137 		php = (dr_proto_hdr_t *)listp->o_buf;
2138 		cmn_err(CE_WARN, "!    cmd = 0x%x, exb = %d, slot = %d",
2139 		    php->command, php->expbrd, php->slot);
2140 	} else {
2141 		while (listp->m_reply == 0 && listp->f_error == 0) {
2142 			if (nosig)
2143 				crv = cv_timedwait(&listp->g_cv, &listp->g_lock,
2144 					to_val);
2145 			else
2146 				crv = cv_timedwait_sig(&listp->g_cv,
2147 					&listp->g_lock, to_val);
2148 			switch (crv) {
2149 				case -1: /* timed out */
2150 					cmn_err(CE_WARN,
2151 					    "!msgid=0x%x reply timed out",
2152 					    hdrp->message_id);
2153 					php = (dr_proto_hdr_t *)listp->o_buf;
2154 					cmn_err(CE_WARN, "!    cmd = 0x%x, "
2155 					    "exb = %d, slot = %d", php->command,
2156 					    php->expbrd, php->slot);
2157 					listp->f_error = ETIMEDOUT;
2158 					break;
2159 				case 0: /* signal received */
2160 					cmn_err(CE_WARN,
2161 					    "operation interrupted by signal");
2162 					listp->f_error = EINTR;
2163 					break;
2164 				default:
2165 					break;
2166 				}
2167 		}
2168 
2169 		/*
2170 		 * If link is set for this entry, check to see if
2171 		 * the linked entry has been replied to.  If not,
2172 		 * wait for the response.
2173 		 * Currently, this is only used for ABORT_TEST functionality,
2174 		 * wherein a check is made for the TESTBOARD reply when
2175 		 * the ABORT_TEST reply is received.
2176 		 */
2177 
2178 		if (link) {
2179 			mutex_enter(&link->g_lock);
2180 			/*
2181 			 * If the reply to the linked entry hasn't been
2182 			 * received, clear the existing link->f_error,
2183 			 * and await the reply.
2184 			 */
2185 			if (link->m_reply == 0) {
2186 				link->f_error = 0;
2187 			}
2188 			to_val =  ddi_get_lbolt() + (timeout * hz);
2189 			while (link->m_reply == 0 && link->f_error == 0) {
2190 				crv = cv_timedwait(&link->g_cv, &link->g_lock,
2191 					to_val);
2192 				switch (crv) {
2193 				case -1: /* timed out */
2194 					cmn_err(CE_NOTE,
2195 					    "!link msgid=0x%x reply timed out",
2196 					    link->msgid);
2197 					link->f_error = ETIMEDOUT;
2198 					break;
2199 				default:
2200 					break;
2201 				}
2202 			}
2203 			mutex_exit(&link->g_lock);
2204 		}
2205 	}
2206 	mutex_exit(&listp->g_lock);
2207 	mutex_exit(&listp->s_lock);
2208 	return (listp);
2209 }
2210 
2211 static sbd_error_t *
2212 drmach_mbx2sbderr(drmach_msglist_t *mlp)
2213 {
2214 	char		a_pnt[MAXNAMELEN];
2215 	dr_proto_hdr_t	*php;
2216 	int		bnum;
2217 
2218 	if (mlp->f_error) {
2219 		/*
2220 		 * If framework failure is due to signal, return "no error"
2221 		 * error.
2222 		 */
2223 		if (mlp->f_error == EINTR)
2224 			return (drerr_new(0, ESTC_NONE, NULL));
2225 
2226 		mutex_enter(&drmach_g_mbox_mutex);
2227 		drmach_mbox_iflag = 0;
2228 		mutex_exit(&drmach_g_mbox_mutex);
2229 		if (!mlp->p_flag)
2230 			return (drerr_new(1, ESTC_MBXRQST, NULL));
2231 		else
2232 			return (drerr_new(1, ESTC_MBXRPLY, NULL));
2233 	}
2234 	php = (dr_proto_hdr_t *)mlp->o_buf;
2235 	bnum = 2 * php->expbrd + php->slot;
2236 	a_pnt[0] = '\0';
2237 	(void) drmach_board_name(bnum, a_pnt, MAXNAMELEN);
2238 
2239 	switch (mlp->e_code) {
2240 		case 0:
2241 			return (NULL);
2242 		case DRERR_NOACL:
2243 			return (drerr_new(0, ESTC_NOACL, "%s", a_pnt));
2244 		case DRERR_NOT_ASSIGNED:
2245 			return (drerr_new(0, ESTC_NOT_ASSIGNED, "%s", a_pnt));
2246 		case DRERR_NOT_ACTIVE:
2247 			return (drerr_new(0, ESTC_NOT_ACTIVE, "%s", a_pnt));
2248 		case DRERR_EMPTY_SLOT:
2249 			return (drerr_new(0, ESTC_EMPTY_SLOT, "%s", a_pnt));
2250 		case DRERR_POWER_OFF:
2251 			return (drerr_new(0, ESTC_POWER_OFF, "%s", a_pnt));
2252 		case DRERR_TEST_IN_PROGRESS:
2253 			return (drerr_new(0, ESTC_TEST_IN_PROGRESS,
2254 					"%s", a_pnt));
2255 		case DRERR_TESTING_BUSY:
2256 			return (drerr_new(0, ESTC_TESTING_BUSY, "%s", a_pnt));
2257 		case DRERR_TEST_REQUIRED:
2258 			return (drerr_new(0, ESTC_TEST_REQUIRED, "%s", a_pnt));
2259 		case DRERR_UNAVAILABLE:
2260 			return (drerr_new(0, ESTC_UNAVAILABLE, "%s", a_pnt));
2261 		case DRERR_RECOVERABLE:
2262 			return (drerr_new(0, ESTC_SMS_ERR_RECOVERABLE,
2263 				"%s", a_pnt));
2264 		case DRERR_UNRECOVERABLE:
2265 			return (drerr_new(1, ESTC_SMS_ERR_UNRECOVERABLE,
2266 				"%s", a_pnt));
2267 		default:
2268 			return (drerr_new(1, ESTC_MBOX_UNKNOWN, NULL));
2269 	}
2270 }
2271 
2272 static sbd_error_t *
2273 drmach_mbox_trans(uint8_t msgtype, int bnum, caddr_t obufp, int olen,
2274 	caddr_t ibufp, int ilen)
2275 {
2276 	int			timeout = 0;
2277 	int			ntries = 0;
2278 	int			nosignals = 0;
2279 	dr_proto_hdr_t 		*hdrp;
2280 	drmach_msglist_t 	*mlp;
2281 	sbd_error_t		*err = NULL;
2282 
2283 	if (msgtype != DRMSG_MBOX_INIT) {
2284 		mutex_enter(&drmach_ri_mbox_mutex);
2285 		mutex_enter(&drmach_g_mbox_mutex);
2286 		if (drmach_mbox_iflag == 0) {
2287 			/* need to initialize the mailbox */
2288 			dr_proto_hdr_t	imsg;
2289 
2290 			mutex_exit(&drmach_g_mbox_mutex);
2291 
2292 			imsg.command = DRMSG_MBOX_INIT;
2293 
2294 			imsg.message_id = drmach_get_msgid();
2295 			imsg.drproto_version = DRMBX_VERSION;
2296 			imsg.expbrd = 0;
2297 			imsg.slot = 0;
2298 
2299 			cmn_err(CE_WARN,
2300 				"!reinitializing DR mailbox");
2301 			mlp = drmach_mbox_req_rply(&imsg, sizeof (imsg), 0, 0,
2302 				10, 5, 0, NULL);
2303 			err = drmach_mbx2sbderr(mlp);
2304 			/*
2305 			 * If framework failure incoming is encountered on
2306 			 * the MBOX_INIT [timeout on SMS reply], the error
2307 			 * type must be changed before returning to caller.
2308 			 * This is to prevent drmach_board_connect() and
2309 			 * drmach_board_disconnect() from marking boards
2310 			 * UNUSABLE based on MBOX_INIT failures.
2311 			 */
2312 			if ((err != NULL) && (err->e_code == ESTC_MBXRPLY)) {
2313 				cmn_err(CE_WARN,
2314 				    "!Changed mbox incoming to outgoing"
2315 				    " failure on reinit");
2316 				sbd_err_clear(&err);
2317 				err = drerr_new(0, ESTC_MBXRQST, NULL);
2318 			}
2319 			drmach_msglist_destroy(mlp);
2320 			if (err) {
2321 				mutex_exit(&drmach_ri_mbox_mutex);
2322 				return (err);
2323 			}
2324 			mutex_enter(&drmach_g_mbox_mutex);
2325 			drmach_mbox_iflag = 1;
2326 		}
2327 		mutex_exit(&drmach_g_mbox_mutex);
2328 		mutex_exit(&drmach_ri_mbox_mutex);
2329 	}
2330 
2331 	hdrp = (dr_proto_hdr_t *)obufp;
2332 
2333 	/* setup outgoing mailbox header */
2334 	hdrp->command = msgtype;
2335 	hdrp->message_id = drmach_get_msgid();
2336 	hdrp->drproto_version = DRMBX_VERSION;
2337 	hdrp->expbrd = DRMACH_BNUM2EXP(bnum);
2338 	hdrp->slot = DRMACH_BNUM2SLOT(bnum);
2339 
2340 	switch (msgtype) {
2341 
2342 		case DRMSG_MBOX_INIT:
2343 			timeout = drmach_to_mbxinit;
2344 			ntries = 1;
2345 			nosignals = 0;
2346 			break;
2347 
2348 		case DRMSG_ASSIGN:
2349 			timeout = drmach_to_assign;
2350 			ntries = 1;
2351 			nosignals = 0;
2352 			break;
2353 
2354 		case DRMSG_UNASSIGN:
2355 			timeout = drmach_to_unassign;
2356 			ntries = 1;
2357 			nosignals = 0;
2358 			break;
2359 
2360 		case DRMSG_POWERON:
2361 			timeout = drmach_to_poweron;
2362 			ntries = 1;
2363 			nosignals = 0;
2364 			break;
2365 
2366 		case DRMSG_POWEROFF:
2367 			timeout = drmach_to_poweroff;
2368 			ntries = 1;
2369 			nosignals = 0;
2370 			break;
2371 
2372 		case DRMSG_SHOWBOARD:
2373 			timeout = drmach_to_showboard;
2374 			ntries = 1;
2375 			nosignals = 0;
2376 			break;
2377 
2378 		case DRMSG_CLAIM:
2379 			timeout = drmach_to_claim;
2380 			ntries = 1;
2381 			nosignals = 1;
2382 			break;
2383 
2384 		case DRMSG_UNCLAIM:
2385 			timeout = drmach_to_unclaim;
2386 			ntries = 1;
2387 			nosignals = 1;
2388 			break;
2389 
2390 		case DRMSG_UNCONFIG:
2391 			timeout = drmach_to_unconfig;
2392 			ntries = 1;
2393 			nosignals = 0;
2394 			break;
2395 
2396 		case DRMSG_TESTBOARD:
2397 			timeout = drmach_to_testboard;
2398 			ntries = 1;
2399 			nosignals = 0;
2400 			break;
2401 
2402 		default:
2403 			cmn_err(CE_WARN,
2404 				"Unknown outgoing message type 0x%x", msgtype);
2405 			err = DRMACH_INTERNAL_ERROR();
2406 			break;
2407 	}
2408 
2409 	if (err == NULL) {
2410 		mlp = drmach_mbox_req_rply(hdrp, olen, ibufp, ilen,
2411 			timeout, ntries, nosignals, NULL);
2412 		err = drmach_mbx2sbderr(mlp);
2413 
2414 		/*
2415 		 * For DRMSG_TESTBOARD attempts which have timed out, or
2416 		 * been aborted due to a signal received after mboxsc_putmsg()
2417 		 * has succeeded in sending the message, a DRMSG_ABORT_TEST
2418 		 * must be sent.
2419 		 */
2420 		if ((msgtype == DRMSG_TESTBOARD) && (err != NULL) &&
2421 		    ((mlp->f_error == EINTR) || ((mlp->f_error == ETIMEDOUT) &&
2422 		    (mlp->p_flag != 0)))) {
2423 			drmach_msglist_t	*abmlp;
2424 			dr_abort_test_t		abibuf;
2425 
2426 			hdrp->command = DRMSG_ABORT_TEST;
2427 			hdrp->message_id = drmach_get_msgid();
2428 			abmlp = drmach_mbox_req_rply(hdrp,
2429 			    sizeof (dr_abort_test_t), (caddr_t)&abibuf,
2430 			    sizeof (abibuf), drmach_to_aborttest, 5, 1, mlp);
2431 			cmn_err(CE_WARN, "test aborted");
2432 			drmach_msglist_destroy(abmlp);
2433 		}
2434 
2435 		drmach_msglist_destroy(mlp);
2436 	}
2437 
2438 	return (err);
2439 }
2440 
2441 static int
2442 drmach_mbox_init()
2443 {
2444 	int			err;
2445 	caddr_t			obufp;
2446 	sbd_error_t		*serr = NULL;
2447 	mboxsc_timeout_range_t	mbxtoz;
2448 
2449 	drmach_mbox_istate = 0;
2450 	/* register the outgoing mailbox */
2451 	if ((err = mboxsc_init(KEY_DRSC, MBOXSC_MBOX_OUT,
2452 		NULL)) != 0) {
2453 		cmn_err(CE_WARN, "DR - SC mboxsc_init failed: 0x%x", err);
2454 		return (-1);
2455 	}
2456 	drmach_mbox_istate = 1;
2457 
2458 	/* setup the mboxsc_putmsg timeout value */
2459 	if (drmach_use_tuned_putmsg_to) {
2460 		cmn_err(CE_NOTE, "!using tuned drmach_to_putmsg = 0x%lx\n",
2461 		    drmach_to_putmsg);
2462 	} else {
2463 		if ((err = mboxsc_ctrl(KEY_DRSC,
2464 		    MBOXSC_CMD_PUTMSG_TIMEOUT_RANGE, &mbxtoz)) != 0) {
2465 			cmn_err(CE_WARN, "mboxsc_ctrl failed: 0x%x", err);
2466 			drmach_to_putmsg = 60000;
2467 		} else {
2468 			drmach_to_putmsg = mboxsc_putmsg_def_timeout() * 6;
2469 			DRMACH_PR("putmsg range is 0x%lx - 0x%lx value"
2470 			    " is 0x%lx\n", mbxtoz.min_timeout,
2471 			    mbxtoz.max_timeout, drmach_to_putmsg);
2472 		}
2473 	}
2474 
2475 	/* register the incoming mailbox */
2476 	if ((err = mboxsc_init(KEY_SCDR, MBOXSC_MBOX_IN,
2477 		drmach_mbox_event)) != 0) {
2478 		cmn_err(CE_WARN, "SC - DR mboxsc_init failed: 0x%x", err);
2479 		return (-1);
2480 	}
2481 	drmach_mbox_istate = 2;
2482 
2483 	/* initialize mutex for mailbox globals */
2484 	mutex_init(&drmach_g_mbox_mutex, NULL, MUTEX_DRIVER, NULL);
2485 
2486 	/* initialize mutex for mailbox re-init */
2487 	mutex_init(&drmach_ri_mbox_mutex, NULL, MUTEX_DRIVER, NULL);
2488 
2489 	/* initialize mailbox message list elements */
2490 	drmach_msglist_first = drmach_msglist_last = NULL;
2491 	mutex_init(&drmach_msglist_mutex, NULL, MUTEX_DRIVER, NULL);
2492 
2493 	mutex_init(&drmach_sendmsg_mutex, NULL, MUTEX_DRIVER, NULL);
2494 	cv_init(&drmach_sendmsg_cv, NULL, CV_DRIVER, NULL);
2495 
2496 	drmach_mbox_istate = 3;
2497 
2498 	/* start mailbox sendmsg thread */
2499 	drmach_sendmsg_thread_run = 1;
2500 	if (drmach_sendmsg_thread == NULL)
2501 		drmach_sendmsg_thread = thread_create(NULL, 0,
2502 		    (void (*)())drmach_mbox_sendmsg, NULL, 0, &p0,
2503 		    TS_RUN, minclsyspri);
2504 
2505 	/* start mailbox getmsg thread */
2506 	drmach_getmsg_thread_run = 1;
2507 	if (drmach_getmsg_thread == NULL)
2508 		drmach_getmsg_thread = thread_create(NULL, 0,
2509 		    (void (*)())drmach_mbox_getmsg, NULL, 0, &p0,
2510 		    TS_RUN, minclsyspri);
2511 
2512 	obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
2513 	serr = drmach_mbox_trans(DRMSG_MBOX_INIT, 0, obufp,
2514 		sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
2515 	kmem_free(obufp, sizeof (dr_proto_hdr_t));
2516 	if (serr) {
2517 		cmn_err(CE_WARN, "mbox_init: MBOX_INIT failed ecode=0x%x",
2518 			serr->e_code);
2519 		sbd_err_clear(&serr);
2520 		return (-1);
2521 	}
2522 	mutex_enter(&drmach_g_mbox_mutex);
2523 	drmach_mbox_iflag = 1;
2524 	drmach_mbox_ipending = 0;
2525 	mutex_exit(&drmach_g_mbox_mutex);
2526 
2527 	return (0);
2528 }
2529 
2530 static int
2531 drmach_mbox_fini()
2532 {
2533 	int err, rv = 0;
2534 
2535 	if (drmach_mbox_istate > 2) {
2536 		drmach_getmsg_thread_run = 0;
2537 		drmach_sendmsg_thread_run = 0;
2538 		cmn_err(CE_WARN,
2539 			"drmach_mbox_fini: waiting for mbox threads...");
2540 		while ((drmach_getmsg_thread_run == 0) ||
2541 			(drmach_sendmsg_thread_run == 0)) {
2542 			continue;
2543 		}
2544 		cmn_err(CE_WARN,
2545 			"drmach_mbox_fini: mbox threads done.");
2546 		mutex_destroy(&drmach_msglist_mutex);
2547 
2548 	}
2549 	if (drmach_mbox_istate) {
2550 		/* de-register the outgoing mailbox */
2551 		if ((err = mboxsc_fini(KEY_DRSC)) != 0) {
2552 			cmn_err(CE_WARN, "DR - SC mboxsc_fini failed: 0x%x",
2553 				err);
2554 			rv = -1;
2555 		}
2556 	}
2557 	if (drmach_mbox_istate > 1) {
2558 		/* de-register the incoming mailbox */
2559 		if ((err = mboxsc_fini(KEY_SCDR)) != 0) {
2560 			cmn_err(CE_WARN, "SC - DR mboxsc_fini failed: 0x%x",
2561 				err);
2562 			rv = -1;
2563 		}
2564 	}
2565 	mutex_destroy(&drmach_g_mbox_mutex);
2566 	mutex_destroy(&drmach_ri_mbox_mutex);
2567 	return (rv);
2568 }
2569 
2570 static int
2571 drmach_portid2bnum(int portid)
2572 {
2573 	int slot;
2574 
2575 	switch (portid & 0x1f) {
2576 	case 0: case 1: case 2: case 3:	/* cpu/wci devices */
2577 	case 0x1e:			/* slot 0 axq registers */
2578 		slot = 0;
2579 		break;
2580 
2581 	case 8: case 9:			/* cpu devices */
2582 	case 0x1c: case 0x1d:		/* schizo/wci devices */
2583 	case 0x1f:			/* slot 1 axq registers */
2584 		slot = 1;
2585 		break;
2586 
2587 	default:
2588 		ASSERT(0);		/* catch in debug kernels */
2589 	}
2590 
2591 	return (((portid >> 4) & 0x7e) | slot);
2592 }
2593 
2594 extern int axq_suspend_iopause;
2595 
2596 static int
2597 hold_rele_branch(dev_info_t *rdip, void *arg)
2598 {
2599 	int	i;
2600 	int	*holdp	= (int *)arg;
2601 	char	*name = ddi_node_name(rdip);
2602 
2603 	/*
2604 	 * For Starcat, we must be children of the root devinfo node
2605 	 */
2606 	ASSERT(ddi_get_parent(rdip) == ddi_root_node());
2607 
2608 	i = drmach_name2type_idx(name);
2609 
2610 	/*
2611 	 * Only children of the root devinfo node need to be
2612 	 * held/released since they are the only valid targets
2613 	 * of tree operations. This corresponds to the node types
2614 	 * listed in the drmach_name2type array.
2615 	 */
2616 	if (i < 0) {
2617 		/* Not of interest to us */
2618 		return (DDI_WALK_PRUNECHILD);
2619 	}
2620 
2621 	if (*holdp) {
2622 		ASSERT(!e_ddi_branch_held(rdip));
2623 		e_ddi_branch_hold(rdip);
2624 	} else {
2625 		ASSERT(e_ddi_branch_held(rdip));
2626 		e_ddi_branch_rele(rdip);
2627 	}
2628 
2629 	return (DDI_WALK_PRUNECHILD);
2630 }
2631 
2632 static int
2633 drmach_init(void)
2634 {
2635 	pnode_t 	nodeid;
2636 	gdcd_t		*gdcd;
2637 	int		bnum;
2638 	dev_info_t	*rdip;
2639 	int		hold, circ;
2640 
2641 	mutex_enter(&drmach_i_lock);
2642 	if (drmach_initialized) {
2643 		mutex_exit(&drmach_i_lock);
2644 		return (0);
2645 	}
2646 
2647 	gdcd = drmach_gdcd_new();
2648 	if (gdcd == NULL) {
2649 		mutex_exit(&drmach_i_lock);
2650 		cmn_err(CE_WARN, "drmach_init: failed to access GDCD\n");
2651 		return (-1);
2652 	}
2653 
2654 	drmach_boards = drmach_array_new(0, MAX_BOARDS - 1);
2655 
2656 	nodeid = prom_childnode(prom_rootnode());
2657 	do {
2658 		int		 len;
2659 		int		 portid;
2660 		drmachid_t	 id;
2661 
2662 		len = prom_getproplen(nodeid, "portid");
2663 		if (len != sizeof (portid))
2664 			continue;
2665 
2666 		portid = -1;
2667 		(void) prom_getprop(nodeid, "portid", (caddr_t)&portid);
2668 		if (portid == -1)
2669 			continue;
2670 
2671 		bnum = drmach_portid2bnum(portid);
2672 
2673 		if (drmach_array_get(drmach_boards, bnum, &id) == -1) {
2674 			/* portid translated to an invalid board number */
2675 			cmn_err(CE_WARN, "OBP node 0x%x has"
2676 				" invalid property value, %s=%u",
2677 				nodeid, "portid", portid);
2678 
2679 			/* clean up */
2680 			drmach_array_dispose(drmach_boards,
2681 			    drmach_board_dispose);
2682 			drmach_gdcd_dispose(gdcd);
2683 			mutex_exit(&drmach_i_lock);
2684 			return (-1);
2685 		} else if (id == NULL) {
2686 			drmach_board_t	*bp;
2687 			l1_slot_stat_t	*dcd;
2688 			int		exp, slot;
2689 
2690 			bp = drmach_board_new(bnum);
2691 			bp->assigned = !drmach_initialized;
2692 			bp->powered = !drmach_initialized;
2693 
2694 			exp = DRMACH_BNUM2EXP(bnum);
2695 			slot = DRMACH_BNUM2SLOT(bnum);
2696 			dcd = &gdcd->dcd_slot[exp][slot];
2697 			bp->stardrb_offset =
2698 			    dcd->l1ss_cpu_drblock_xwd_offset << 3;
2699 			DRMACH_PR("%s: stardrb_offset=0x%lx\n", bp->cm.name,
2700 			    bp->stardrb_offset);
2701 
2702 			if (gdcd->dcd_slot[exp][slot].l1ss_flags &
2703 			    L1SSFLG_THIS_L1_NULL_PROC_LPA) {
2704 				bp->flags |= DRMACH_NULL_PROC_LPA;
2705 				DRMACH_PR("%s: NULL proc LPA\n", bp->cm.name);
2706 			}
2707 		}
2708 	} while ((nodeid = prom_nextnode(nodeid)) != OBP_NONODE);
2709 
2710 	drmach_cpu_sram_va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
2711 
2712 	if (gdcd->dcd_testcage_log2_mbytes_size != DCD_DR_TESTCAGE_DISABLED) {
2713 		ASSERT(gdcd->dcd_testcage_log2_mbytes_size ==
2714 				gdcd->dcd_testcage_log2_mbytes_align);
2715 		drmach_iocage_paddr =
2716 			(uint64_t)gdcd->dcd_testcage_mbyte_PA << 20;
2717 		drmach_iocage_size =
2718 			1 << (gdcd->dcd_testcage_log2_mbytes_size + 20);
2719 
2720 		drmach_iocage_vaddr = (caddr_t)vmem_alloc(heap_arena,
2721 			drmach_iocage_size, VM_SLEEP);
2722 		hat_devload(kas.a_hat, drmach_iocage_vaddr, drmach_iocage_size,
2723 			mmu_btop(drmach_iocage_paddr),
2724 			PROT_READ | PROT_WRITE,
2725 			HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST);
2726 
2727 		DRMACH_PR("gdcd size=0x%x align=0x%x PA=0x%x\n",
2728 			gdcd->dcd_testcage_log2_mbytes_size,
2729 			gdcd->dcd_testcage_log2_mbytes_align,
2730 			gdcd->dcd_testcage_mbyte_PA);
2731 		DRMACH_PR("drmach size=0x%x PA=0x%lx VA=0x%p\n",
2732 			drmach_iocage_size, drmach_iocage_paddr,
2733 			drmach_iocage_vaddr);
2734 	}
2735 
2736 	if (drmach_iocage_size == 0) {
2737 		drmach_array_dispose(drmach_boards, drmach_board_dispose);
2738 		drmach_boards = NULL;
2739 		vmem_free(heap_arena, drmach_cpu_sram_va, PAGESIZE);
2740 		drmach_gdcd_dispose(gdcd);
2741 		mutex_exit(&drmach_i_lock);
2742 		cmn_err(CE_WARN, "drmach_init: iocage not available\n");
2743 		return (-1);
2744 	}
2745 
2746 	drmach_gdcd_dispose(gdcd);
2747 
2748 	mutex_init(&drmach_iocage_lock, NULL, MUTEX_DRIVER, NULL);
2749 	cv_init(&drmach_iocage_cv, NULL, CV_DRIVER, NULL);
2750 	mutex_init(&drmach_xt_mb_lock, NULL, MUTEX_DRIVER, NULL);
2751 	mutex_init(&drmach_bus_sync_lock, NULL, MUTEX_DRIVER, NULL);
2752 	mutex_init(&drmach_slice_table_lock, NULL, MUTEX_DRIVER, NULL);
2753 
2754 	mutex_enter(&cpu_lock);
2755 	mutex_enter(&drmach_iocage_lock);
2756 	ASSERT(drmach_iocage_is_busy == 0);
2757 	drmach_iocage_is_busy = 1;
2758 	drmach_iocage_mem_scrub(drmach_iocage_size);
2759 	drmach_iocage_is_busy = 0;
2760 	cv_signal(&drmach_iocage_cv);
2761 	mutex_exit(&drmach_iocage_lock);
2762 	mutex_exit(&cpu_lock);
2763 
2764 
2765 	if (drmach_mbox_init() == -1) {
2766 		cmn_err(CE_WARN, "DR - SC mailbox initialization Failed");
2767 	}
2768 
2769 	/*
2770 	 * Walk immediate children of devinfo root node and hold
2771 	 * all devinfo branches of interest.
2772 	 */
2773 	hold = 1;
2774 	rdip = ddi_root_node();
2775 
2776 	ndi_devi_enter(rdip, &circ);
2777 	ddi_walk_devs(ddi_get_child(rdip), hold_rele_branch, &hold);
2778 	ndi_devi_exit(rdip, circ);
2779 
2780 	drmach_initialized = 1;
2781 
2782 	/*
2783 	 * To avoid a circular patch dependency between DR and AXQ, the AXQ
2784 	 * rev introducing the axq_iopause_*_all interfaces should not regress
2785 	 * when installed without the DR rev using those interfaces. The default
2786 	 * is for iopause to be enabled/disabled during axq suspend/resume. By
2787 	 * setting the following axq flag to zero, axq will not enable iopause
2788 	 * during suspend/resume, instead DR will call the axq_iopause_*_all
2789 	 * interfaces during drmach_copy_rename.
2790 	 */
2791 	axq_suspend_iopause = 0;
2792 
2793 	mutex_exit(&drmach_i_lock);
2794 
2795 	return (0);
2796 }
2797 
2798 static void
2799 drmach_fini(void)
2800 {
2801 	dev_info_t	*rdip;
2802 	int		hold, circ;
2803 
2804 	if (drmach_initialized) {
2805 		rw_enter(&drmach_boards_rwlock, RW_WRITER);
2806 		drmach_array_dispose(drmach_boards, drmach_board_dispose);
2807 		drmach_boards = NULL;
2808 		rw_exit(&drmach_boards_rwlock);
2809 
2810 		mutex_destroy(&drmach_slice_table_lock);
2811 		mutex_destroy(&drmach_xt_mb_lock);
2812 		mutex_destroy(&drmach_bus_sync_lock);
2813 		cv_destroy(&drmach_iocage_cv);
2814 		mutex_destroy(&drmach_iocage_lock);
2815 
2816 		vmem_free(heap_arena, drmach_cpu_sram_va, PAGESIZE);
2817 
2818 		/*
2819 		 * Walk immediate children of the root devinfo node
2820 		 * releasing holds acquired on branches in drmach_init()
2821 		 */
2822 		hold = 0;
2823 		rdip = ddi_root_node();
2824 
2825 		ndi_devi_enter(rdip, &circ);
2826 		ddi_walk_devs(ddi_get_child(rdip), hold_rele_branch, &hold);
2827 		ndi_devi_exit(rdip, circ);
2828 
2829 		drmach_initialized = 0;
2830 	}
2831 
2832 	drmach_mbox_fini();
2833 	if (drmach_xt_mb != NULL) {
2834 		vmem_free(static_alloc_arena, (void *)drmach_xt_mb,
2835 		    drmach_xt_mb_size);
2836 	}
2837 	rw_destroy(&drmach_boards_rwlock);
2838 	mutex_destroy(&drmach_i_lock);
2839 }
2840 
2841 static void
2842 drmach_mem_read_madr(drmach_mem_t *mp, int bank, uint64_t *madr)
2843 {
2844 	kpreempt_disable();
2845 
2846 	/* get register address, read madr value */
2847 	if (STARCAT_CPUID_TO_PORTID(CPU->cpu_id) == mp->dev.portid) {
2848 		*madr = lddmcdecode(DRMACH_MC_ASI_ADDR(mp, bank));
2849 	} else {
2850 		*madr = lddphysio(DRMACH_MC_ADDR(mp, bank));
2851 	}
2852 
2853 	kpreempt_enable();
2854 }
2855 
2856 
2857 static uint64_t *
2858 drmach_prep_mc_rename(uint64_t *p, int local,
2859 	drmach_mem_t *mp, uint64_t current_basepa, uint64_t new_basepa)
2860 {
2861 	int bank;
2862 
2863 	for (bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
2864 		uint64_t madr, bank_offset;
2865 
2866 		/* fetch mc's bank madr register value */
2867 		drmach_mem_read_madr(mp, bank, &madr);
2868 		if (madr & DRMACH_MC_VALID_MASK) {
2869 			uint64_t bankpa;
2870 
2871 			bank_offset = (DRMACH_MC_UM_TO_PA(madr) |
2872 			    DRMACH_MC_LM_TO_PA(madr)) - current_basepa;
2873 			bankpa = new_basepa + bank_offset;
2874 
2875 			/* encode new base pa into madr */
2876 			madr &= ~DRMACH_MC_UM_MASK;
2877 			madr |= DRMACH_MC_PA_TO_UM(bankpa);
2878 			madr &= ~DRMACH_MC_LM_MASK;
2879 			madr |= DRMACH_MC_PA_TO_LM(bankpa);
2880 
2881 			if (local)
2882 				*p++ = DRMACH_MC_ASI_ADDR(mp, bank);
2883 			else
2884 				*p++ = DRMACH_MC_ADDR(mp, bank);
2885 
2886 			*p++ = madr;
2887 		}
2888 	}
2889 
2890 	return (p);
2891 }
2892 
2893 static uint64_t *
2894 drmach_prep_schizo_script(uint64_t *p, drmach_mem_t *mp, uint64_t new_basepa)
2895 {
2896 	drmach_board_t	*bp;
2897 	int		 rv;
2898 	int		 idx;
2899 	drmachid_t	 id;
2900 	uint64_t	 last_scsr_pa = 0;
2901 
2902 	/* memory is always in slot 0 */
2903 	ASSERT(DRMACH_BNUM2SLOT(mp->dev.bp->bnum) == 0);
2904 
2905 	/* look up slot 1 board on same expander */
2906 	idx = DRMACH_EXPSLOT2BNUM(DRMACH_BNUM2EXP(mp->dev.bp->bnum), 1);
2907 	rv = drmach_array_get(drmach_boards, idx, &id);
2908 	bp = id; /* bp will be NULL if board not found */
2909 
2910 	/* look up should never be out of bounds */
2911 	ASSERT(rv == 0);
2912 
2913 	/* nothing to do when board is not found or has no devices */
2914 	if (rv == -1 || bp == NULL || bp->devices == NULL)
2915 		return (p);
2916 
2917 	rv = drmach_array_first(bp->devices, &idx, &id);
2918 	while (rv == 0) {
2919 		if (DRMACH_IS_IO_ID(id)) {
2920 			drmach_io_t *io = id;
2921 
2922 			/*
2923 			 * Skip all non-Schizo IO devices (only IO nodes
2924 			 * that are Schizo devices have non-zero scsr_pa).
2925 			 * Filter out "other" leaf to avoid writing to the
2926 			 * same Schizo Control/Status Register twice.
2927 			 */
2928 			if (io->scsr_pa && io->scsr_pa != last_scsr_pa) {
2929 				uint64_t scsr;
2930 
2931 				scsr  = lddphysio(io->scsr_pa);
2932 				scsr &= ~(DRMACH_LPA_BASE_MASK |
2933 						DRMACH_LPA_BND_MASK);
2934 				scsr |= DRMACH_PA_TO_LPA_BASE(new_basepa);
2935 				scsr |= DRMACH_PA_TO_LPA_BND(
2936 					new_basepa + DRMACH_MEM_SLICE_SIZE);
2937 
2938 				*p++ = io->scsr_pa;
2939 				*p++ = scsr;
2940 
2941 				last_scsr_pa = io->scsr_pa;
2942 			}
2943 		}
2944 		rv = drmach_array_next(bp->devices, &idx, &id);
2945 	}
2946 
2947 	return (p);
2948 }
2949 
2950 /*
2951  * For Panther MCs, append the MC idle reg address and drmach_mem_t pointer.
2952  * The latter is returned when drmach_rename fails to idle a Panther MC and
2953  * is used to identify the MC for error reporting.
2954  */
2955 static uint64_t *
2956 drmach_prep_pn_mc_idle(uint64_t *p, drmach_mem_t *mp, int local)
2957 {
2958 	/* only slot 0 has memory */
2959 	ASSERT(DRMACH_BNUM2SLOT(mp->dev.bp->bnum) == 0);
2960 	ASSERT(IS_PANTHER(mp->dev.bp->cpu_impl));
2961 
2962 	for (mp = mp->dev.bp->mem; mp != NULL; mp = mp->next) {
2963 		ASSERT(DRMACH_IS_MEM_ID(mp));
2964 
2965 		if (mp->dev.portid == STARCAT_CPUID_TO_PORTID(CPU->cpu_id)) {
2966 			if (local) {
2967 				*p++ = ASI_EMU_ACT_STATUS_VA;	/* local ASI */
2968 				*p++ = (uintptr_t)mp;
2969 			}
2970 		} else if (!local) {
2971 			*p++ = DRMACH_EMU_ACT_STATUS_ADDR(mp);	/* PIO */
2972 			*p++ = (uintptr_t)mp;
2973 		}
2974 	}
2975 
2976 	return (p);
2977 }
2978 
2979 static sbd_error_t *
2980 drmach_prep_rename_script(drmach_mem_t *s_mp, drmach_mem_t *t_mp,
2981 	uint64_t t_slice_offset, caddr_t buf, int buflen)
2982 {
2983 	_NOTE(ARGUNUSED(buflen))
2984 
2985 	uint64_t		*p = (uint64_t *)buf, *q;
2986 	sbd_error_t		*err;
2987 	int			 rv;
2988 	drmach_mem_t		*mp, *skip_mp;
2989 	uint64_t		 s_basepa, t_basepa;
2990 	uint64_t		 s_new_basepa, t_new_basepa;
2991 
2992 	/* verify supplied buffer space is adequate */
2993 	ASSERT(buflen >=
2994 		/* addr for all possible MC banks */
2995 		(sizeof (uint64_t) * 4 * 4 * 18) +
2996 		/* list section terminator */
2997 		(sizeof (uint64_t) * 1) +
2998 		/* addr/id tuple for local Panther MC idle reg */
2999 		(sizeof (uint64_t) * 2) +
3000 		/* list section terminator */
3001 		(sizeof (uint64_t) * 1) +
3002 		/* addr/id tuple for 2 boards with 4 Panther MC idle regs */
3003 		(sizeof (uint64_t) * 2 * 2 * 4) +
3004 		/* list section terminator */
3005 		(sizeof (uint64_t) * 1) +
3006 		/* addr/val tuple for 1 proc with 4 MC banks */
3007 		(sizeof (uint64_t) * 2 * 4) +
3008 		/* list section terminator */
3009 		(sizeof (uint64_t) * 1) +
3010 		/* addr/val tuple for 2 boards w/ 2 schizos each */
3011 		(sizeof (uint64_t) * 2 * 2 * 2) +
3012 		/* addr/val tuple for 2 boards w/ 16 MC banks each */
3013 		(sizeof (uint64_t) * 2 * 2 * 16) +
3014 		/* list section terminator */
3015 		(sizeof (uint64_t) * 1) +
3016 		/* addr/val tuple for 18 AXQs w/ two slots each */
3017 		(sizeof (uint64_t) * 2 * 2 * 18) +
3018 		/* list section terminator */
3019 		(sizeof (uint64_t) * 1) +
3020 		/* list terminator */
3021 		(sizeof (uint64_t) * 1));
3022 
3023 	/* copy bank list to rename script */
3024 	mutex_enter(&drmach_bus_sync_lock);
3025 	for (q = drmach_bus_sync_list; *q; q++, p++)
3026 		*p = *q;
3027 	mutex_exit(&drmach_bus_sync_lock);
3028 
3029 	/* list section terminator */
3030 	*p++ = 0;
3031 
3032 	/*
3033 	 * Write idle script for MC on this processor.  A script will be
3034 	 * produced only if this is a Panther processor on the source or
3035 	 * target board.
3036 	 */
3037 	if (IS_PANTHER(s_mp->dev.bp->cpu_impl))
3038 		p = drmach_prep_pn_mc_idle(p, s_mp, 1);
3039 
3040 	if (IS_PANTHER(t_mp->dev.bp->cpu_impl))
3041 		p = drmach_prep_pn_mc_idle(p, t_mp, 1);
3042 
3043 	/* list section terminator */
3044 	*p++ = 0;
3045 
3046 	/*
3047 	 * Write idle script for all other MCs on source and target
3048 	 * Panther boards.
3049 	 */
3050 	if (IS_PANTHER(s_mp->dev.bp->cpu_impl))
3051 		p = drmach_prep_pn_mc_idle(p, s_mp, 0);
3052 
3053 	if (IS_PANTHER(t_mp->dev.bp->cpu_impl))
3054 		p = drmach_prep_pn_mc_idle(p, t_mp, 0);
3055 
3056 	/* list section terminator */
3057 	*p++ = 0;
3058 
3059 	/*
3060 	 * Step 1:	Write source base address to target MC
3061 	 *		with present bit off.
3062 	 * Step 2:	Now rewrite target reg with present bit on.
3063 	 */
3064 	err = drmach_mem_get_base_physaddr(s_mp, &s_basepa);
3065 	ASSERT(err == NULL);
3066 	err = drmach_mem_get_base_physaddr(t_mp, &t_basepa);
3067 	ASSERT(err == NULL);
3068 
3069 	/* exchange base pa. include slice offset in new target base pa */
3070 	s_new_basepa = t_basepa & ~ (DRMACH_MEM_SLICE_SIZE - 1);
3071 	t_new_basepa = (s_basepa & ~ (DRMACH_MEM_SLICE_SIZE - 1)) +
3072 			t_slice_offset;
3073 
3074 	DRMACH_PR("s_new_basepa 0x%lx\n", s_new_basepa);
3075 	DRMACH_PR("t_new_basepa 0x%lx\n", t_new_basepa);
3076 
3077 	DRMACH_PR("preparing MC MADR rename script (master is CPU%d):\n",
3078 		CPU->cpu_id);
3079 
3080 	/*
3081 	 * Write rename script for MC on this processor.  A script will
3082 	 * be produced only if this processor is on the source or target
3083 	 * board.
3084 	 */
3085 
3086 	skip_mp = NULL;
3087 	mp = s_mp->dev.bp->mem;
3088 	while (mp != NULL && skip_mp == NULL) {
3089 		if (mp->dev.portid == STARCAT_CPUID_TO_PORTID(CPU->cpu_id)) {
3090 			skip_mp = mp;
3091 			p = drmach_prep_mc_rename(p, 1, mp, s_basepa,
3092 			    s_new_basepa);
3093 		}
3094 
3095 		mp = mp->next;
3096 	}
3097 
3098 	mp = t_mp->dev.bp->mem;
3099 	while (mp != NULL && skip_mp == NULL) {
3100 		if (mp->dev.portid == STARCAT_CPUID_TO_PORTID(CPU->cpu_id)) {
3101 			skip_mp = mp;
3102 			p = drmach_prep_mc_rename(p, 1, mp, t_basepa,
3103 			    t_new_basepa);
3104 		}
3105 
3106 		mp = mp->next;
3107 	}
3108 
3109 	/* list section terminator */
3110 	*p++ = 0;
3111 
3112 	/*
3113 	 * Write rename script for all other MCs on source and target
3114 	 * boards.
3115 	 */
3116 
3117 	for (mp = s_mp->dev.bp->mem; mp; mp = mp->next) {
3118 		if (mp == skip_mp)
3119 			continue;
3120 		p = drmach_prep_mc_rename(p, 0, mp, s_basepa, s_new_basepa);
3121 	}
3122 
3123 	for (mp = t_mp->dev.bp->mem; mp; mp = mp->next) {
3124 		if (mp == skip_mp)
3125 			continue;
3126 		p = drmach_prep_mc_rename(p, 0, mp, t_basepa, t_new_basepa);
3127 	}
3128 
3129 	/* Write rename script for Schizo LPA_BASE/LPA_BND */
3130 	p = drmach_prep_schizo_script(p, s_mp, s_new_basepa);
3131 	p = drmach_prep_schizo_script(p, t_mp, t_new_basepa);
3132 
3133 	/* list section terminator */
3134 	*p++ = 0;
3135 
3136 	DRMACH_PR("preparing AXQ CASM rename script (EXP%d <> EXP%d):\n",
3137 		DRMACH_BNUM2EXP(s_mp->dev.bp->bnum),
3138 		DRMACH_BNUM2EXP(t_mp->dev.bp->bnum));
3139 
3140 	rv = axq_do_casm_rename_script(&p,
3141 		DRMACH_PA_TO_SLICE(s_new_basepa),
3142 		DRMACH_PA_TO_SLICE(t_new_basepa));
3143 	if (rv == DDI_FAILURE)
3144 		return (DRMACH_INTERNAL_ERROR());
3145 
3146 	/* list section & final terminator */
3147 	*p++ = 0;
3148 	*p++ = 0;
3149 
3150 #ifdef DEBUG
3151 	{
3152 		uint64_t *q = (uint64_t *)buf;
3153 
3154 		/* paranoia */
3155 		ASSERT((caddr_t)p <= buf + buflen);
3156 
3157 		DRMACH_PR("MC bank base pa list:\n");
3158 		while (*q) {
3159 			uint64_t a = *q++;
3160 
3161 			DRMACH_PR("0x%lx\n", a);
3162 		}
3163 
3164 		/* skip terminator */
3165 		q += 1;
3166 
3167 		DRMACH_PR("local Panther MC idle reg (via ASI 0x4a):\n");
3168 		while (*q) {
3169 			DRMACH_PR("addr=0x%lx, mp=0x%lx\n", *q, *(q + 1));
3170 			q += 2;
3171 		}
3172 
3173 		/* skip terminator */
3174 		q += 1;
3175 
3176 		DRMACH_PR("non-local Panther MC idle reg (via ASI 0x15):\n");
3177 		while (*q) {
3178 			DRMACH_PR("addr=0x%lx, mp=0x%lx\n", *q, *(q + 1));
3179 			q += 2;
3180 		}
3181 
3182 		/* skip terminator */
3183 		q += 1;
3184 
3185 		DRMACH_PR("MC reprogramming script (via ASI 0x72):\n");
3186 		while (*q) {
3187 			uint64_t r = *q++;	/* register address */
3188 			uint64_t v = *q++;	/* new register value */
3189 
3190 			DRMACH_PR("0x%lx = 0x%lx, basepa 0x%lx\n",
3191 				r,
3192 				v,
3193 				DRMACH_MC_UM_TO_PA(v)|DRMACH_MC_LM_TO_PA(v));
3194 		}
3195 
3196 		/* skip terminator */
3197 		q += 1;
3198 
3199 		DRMACH_PR("MC/SCHIZO reprogramming script:\n");
3200 		while (*q) {
3201 			DRMACH_PR("0x%lx = 0x%lx\n", *q, *(q + 1));
3202 			q += 2;
3203 		}
3204 
3205 		/* skip terminator */
3206 		q += 1;
3207 
3208 		DRMACH_PR("AXQ reprogramming script:\n");
3209 		while (*q) {
3210 			DRMACH_PR("0x%lx = 0x%lx\n", *q, *(q + 1));
3211 			q += 2;
3212 		}
3213 
3214 		/* verify final terminator is present */
3215 		ASSERT(*(q + 1) == 0);
3216 
3217 		DRMACH_PR("copy-rename script 0x%p, len %d\n",
3218 			buf, (int)((intptr_t)p - (intptr_t)buf));
3219 
3220 		if (drmach_debug)
3221 			DELAY(10000000);
3222 	}
3223 #endif
3224 
3225 	return (NULL);
3226 }
3227 
3228 static void
3229 drmach_prep_xt_mb_for_slice_update(drmach_board_t *bp, uchar_t slice)
3230 {
3231 	int		 rv;
3232 
3233 	ASSERT(MUTEX_HELD(&drmach_xt_mb_lock));
3234 
3235 	if (bp->devices) {
3236 		int		 d_idx;
3237 		drmachid_t	 d_id;
3238 
3239 		rv = drmach_array_first(bp->devices, &d_idx, &d_id);
3240 		while (rv == 0) {
3241 			if (DRMACH_IS_CPU_ID(d_id)) {
3242 				drmach_cpu_t	*cp = d_id;
3243 				processorid_t	 cpuid = cp->cpuid;
3244 
3245 				mutex_enter(&cpu_lock);
3246 				if (cpu[cpuid] && cpu[cpuid]->cpu_flags)
3247 					drmach_xt_mb[cpuid] = 0x80 | slice;
3248 				mutex_exit(&cpu_lock);
3249 			}
3250 			rv = drmach_array_next(bp->devices, &d_idx, &d_id);
3251 		}
3252 	}
3253 	if (DRMACH_BNUM2SLOT(bp->bnum) == 0) {
3254 		drmach_board_t	*s1bp = NULL;
3255 
3256 		rv = drmach_array_get(drmach_boards, bp->bnum + 1,
3257 		    (void *) &s1bp);
3258 		if (rv == 0 && s1bp != NULL) {
3259 			ASSERT(DRMACH_IS_BOARD_ID(s1bp));
3260 			ASSERT(DRMACH_BNUM2SLOT(s1bp->bnum) == 1);
3261 			drmach_prep_xt_mb_for_slice_update(s1bp, slice);
3262 		}
3263 	}
3264 }
3265 
3266 sbd_error_t *
3267 drmach_copy_rename_init(drmachid_t t_id, uint64_t t_slice_offset,
3268 	drmachid_t s_id, struct memlist *c_ml, drmachid_t *cr_id)
3269 {
3270 	extern void drmach_rename(uint64_t *, uint_t *, uint64_t *);
3271 	extern void drmach_rename_end(void);
3272 
3273 	drmach_mem_t	*s_mp, *t_mp;
3274 	struct memlist	*x_ml;
3275 	uint64_t	 off_mask, s_copybasepa, t_copybasepa, t_basepa;
3276 	int		 len;
3277 	caddr_t		 bp, wp;
3278 	uint_t		*p, *q;
3279 	sbd_error_t	*err;
3280 	tte_t		*tte;
3281 	drmach_copy_rename_t *cr;
3282 
3283 	if (!DRMACH_IS_MEM_ID(s_id))
3284 		return (drerr_new(0, ESTC_INAPPROP, NULL));
3285 	if (!DRMACH_IS_MEM_ID(t_id))
3286 		return (drerr_new(0, ESTC_INAPPROP, NULL));
3287 	s_mp = s_id;
3288 	t_mp = t_id;
3289 
3290 	/* get starting physical address of target memory */
3291 	err = drmach_mem_get_base_physaddr(t_id, &t_basepa);
3292 	if (err)
3293 		return (err);
3294 
3295 	/* calculate slice offset mask from slice size */
3296 	off_mask = DRMACH_MEM_SLICE_SIZE - 1;
3297 
3298 	/* calculate source and target base pa */
3299 	s_copybasepa = c_ml->address;
3300 	t_copybasepa = t_basepa + ((c_ml->address & off_mask) - t_slice_offset);
3301 
3302 	/* paranoia */
3303 	ASSERT((c_ml->address & off_mask) >= t_slice_offset);
3304 
3305 	/* adjust copy memlist addresses to be relative to copy base pa */
3306 	x_ml = c_ml;
3307 	while (x_ml != NULL) {
3308 		x_ml->address -= s_copybasepa;
3309 		x_ml = x_ml->next;
3310 	}
3311 
3312 #ifdef DEBUG
3313 	{
3314 	uint64_t s_basepa, s_size, t_size;
3315 
3316 	x_ml = c_ml;
3317 	while (x_ml->next != NULL)
3318 		x_ml = x_ml->next;
3319 
3320 	DRMACH_PR("source copy span: base pa 0x%lx, end pa 0x%lx\n",
3321 		s_copybasepa,
3322 		s_copybasepa + x_ml->address + x_ml->size);
3323 
3324 	DRMACH_PR("target copy span: base pa 0x%lx, end pa 0x%lx\n",
3325 		t_copybasepa,
3326 		t_copybasepa + x_ml->address + x_ml->size);
3327 
3328 	DRMACH_PR("copy memlist (relative to copy base pa):\n");
3329 	DRMACH_MEMLIST_DUMP(c_ml);
3330 
3331 	err = drmach_mem_get_base_physaddr(s_id, &s_basepa);
3332 	ASSERT(err == NULL);
3333 
3334 	err = drmach_mem_get_size(s_id, &s_size);
3335 	ASSERT(err == NULL);
3336 
3337 	err = drmach_mem_get_size(t_id, &t_size);
3338 	ASSERT(err == NULL);
3339 
3340 	DRMACH_PR("current source base pa 0x%lx, size 0x%lx\n",
3341 		s_basepa, s_size);
3342 	DRMACH_PR("current target base pa 0x%lx, size 0x%lx\n",
3343 		t_basepa, t_size);
3344 	}
3345 #endif /* DEBUG */
3346 
3347 	/* Map in appropriate cpu sram page */
3348 	tte = &drmach_cpu_sram_tte[CPU->cpu_id];
3349 	ASSERT(TTE_IS_VALID(tte) && TTE_IS_8K(tte) &&
3350 	    TTE_IS_PRIVILEGED(tte) && TTE_IS_LOCKED(tte));
3351 	sfmmu_dtlb_ld_kva(drmach_cpu_sram_va, tte);
3352 	sfmmu_itlb_ld_kva(drmach_cpu_sram_va, tte);
3353 
3354 	bp = wp = drmach_cpu_sram_va;
3355 
3356 	/* Make sure the rename routine will fit */
3357 	len = (ptrdiff_t)drmach_rename_end - (ptrdiff_t)drmach_rename;
3358 	ASSERT(wp + len < bp + PAGESIZE);
3359 
3360 	/* copy text. standard bcopy not designed to work in nc space */
3361 	p = (uint_t *)wp;
3362 	q = (uint_t *)drmach_rename;
3363 	while (q < (uint_t *)drmach_rename_end)
3364 		*p++ = *q++;
3365 
3366 	/* zero remainder. standard bzero not designed to work in nc space */
3367 	while (p < (uint_t *)(bp + PAGESIZE))
3368 		*p++ = 0;
3369 
3370 	DRMACH_PR("drmach_rename function 0x%p, len %d\n", wp, len);
3371 	wp += (len + 15) & ~15;
3372 
3373 	err = drmach_prep_rename_script(s_mp, t_mp, t_slice_offset,
3374 		wp, PAGESIZE - (wp - bp));
3375 	if (err) {
3376 cleanup:
3377 		xt_one(CPU->cpu_id, vtag_flushpage_tl1,
3378 			(uint64_t)drmach_cpu_sram_va, (uint64_t)ksfmmup);
3379 		return (err);
3380 	}
3381 
3382 	/* disable and flush CDC */
3383 	if (axq_cdc_disable_flush_all() != DDI_SUCCESS) {
3384 		axq_cdc_enable_all();	/* paranoia */
3385 		err = DRMACH_INTERNAL_ERROR();
3386 		goto cleanup;
3387 	}
3388 
3389 	/* mark both memory units busy */
3390 	t_mp->dev.busy++;
3391 	s_mp->dev.busy++;
3392 
3393 	cr = vmem_alloc(static_alloc_arena, sizeof (drmach_copy_rename_t),
3394 	    VM_SLEEP);
3395 	cr->isa = (void *)drmach_copy_rename_init;
3396 	cr->data = wp;
3397 	cr->c_ml = c_ml;
3398 	cr->s_mp = s_mp;
3399 	cr->t_mp = t_mp;
3400 	cr->s_copybasepa = s_copybasepa;
3401 	cr->t_copybasepa = t_copybasepa;
3402 	cr->ecode = DRMACH_CR_OK;
3403 
3404 	mutex_enter(&drmach_slice_table_lock);
3405 
3406 	mutex_enter(&drmach_xt_mb_lock);
3407 	bzero((void *)drmach_xt_mb, drmach_xt_mb_size);
3408 
3409 	if (DRMACH_L1_SET_LPA(s_mp->dev.bp) && drmach_reprogram_lpa) {
3410 		drmach_prep_xt_mb_for_slice_update(s_mp->dev.bp,
3411 			DRMACH_PA_TO_SLICE(t_copybasepa));
3412 	}
3413 	if (DRMACH_L1_SET_LPA(t_mp->dev.bp) && drmach_reprogram_lpa) {
3414 		drmach_prep_xt_mb_for_slice_update(t_mp->dev.bp,
3415 			DRMACH_PA_TO_SLICE(s_copybasepa));
3416 	}
3417 
3418 	*cr_id = cr;
3419 	return (NULL);
3420 }
3421 
3422 int drmach_rename_count;
3423 int drmach_rename_ntries;
3424 
3425 sbd_error_t *
3426 drmach_copy_rename_fini(drmachid_t id)
3427 {
3428 	drmach_copy_rename_t	*cr = id;
3429 	sbd_error_t		*err = NULL;
3430 	dr_mbox_msg_t		*obufp;
3431 
3432 	ASSERT(cr->isa == (void *)drmach_copy_rename_init);
3433 
3434 	axq_cdc_enable_all();
3435 
3436 	xt_one(CPU->cpu_id, vtag_flushpage_tl1,
3437 		(uint64_t)drmach_cpu_sram_va, (uint64_t)ksfmmup);
3438 
3439 	switch (cr->ecode) {
3440 	case DRMACH_CR_OK:
3441 		break;
3442 	case DRMACH_CR_MC_IDLE_ERR: {
3443 		dev_info_t	*dip = NULL;
3444 		drmach_mem_t	*mp = (drmach_mem_t *)cr->earg;
3445 		char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3446 
3447 		ASSERT(DRMACH_IS_MEM_ID(mp));
3448 
3449 		err = drmach_get_dip(mp, &dip);
3450 
3451 		ASSERT(err == NULL);
3452 		ASSERT(dip != NULL);
3453 
3454 		err = drerr_new(0, ESBD_MEMFAIL, NULL);
3455 		(void) ddi_pathname(dip, path);
3456 		cmn_err(CE_WARN, "failed to idle memory controller %s on %s: "
3457 		    "copy-rename aborted", path, mp->dev.bp->cm.name);
3458 		kmem_free(path, MAXPATHLEN);
3459 		break;
3460 	}
3461 	case DRMACH_CR_IOPAUSE_ERR:
3462 		ASSERT((uintptr_t)cr->earg >= 0 &&
3463 		    (uintptr_t)cr->earg < AXQ_MAX_EXP);
3464 
3465 		err = drerr_new(0,  ESBD_SUSPEND, "EX%d", (uintptr_t)cr->earg);
3466 		cmn_err(CE_WARN, "failed to idle EX%ld AXQ slot1 activity prior"
3467 		    " to copy-rename", (uintptr_t)cr->earg);
3468 		break;
3469 	case DRMACH_CR_ONTRAP_ERR:
3470 		err = drerr_new(0, ESBD_MEMFAIL, NULL);
3471 		cmn_err(CE_WARN, "copy-rename aborted due to uncorrectable "
3472 		    "memory error");
3473 		break;
3474 	default:
3475 		err = DRMACH_INTERNAL_ERROR();
3476 		cmn_err(CE_WARN, "unknown copy-rename error code (%d)\n",
3477 		    cr->ecode);
3478 		break;
3479 	}
3480 
3481 #ifdef DEBUG
3482 	if ((DRMACH_L1_SET_LPA(cr->s_mp->dev.bp) ||
3483 	    DRMACH_L1_SET_LPA(cr->t_mp->dev.bp)) && drmach_reprogram_lpa) {
3484 		int	i;
3485 		for (i = 0; i < NCPU; i++) {
3486 			if (drmach_xt_mb[i])
3487 				DRMACH_PR("cpu%d ignored drmach_xt_mb", i);
3488 		}
3489 	}
3490 #endif
3491 	mutex_exit(&drmach_xt_mb_lock);
3492 
3493 	if (cr->c_ml != NULL)
3494 		memlist_delete(cr->c_ml);
3495 
3496 	cr->t_mp->dev.busy--;
3497 	cr->s_mp->dev.busy--;
3498 
3499 	if (err) {
3500 		mutex_exit(&drmach_slice_table_lock);
3501 		goto done;
3502 	}
3503 
3504 	/* update casm shadow for target and source board */
3505 	drmach_slice_table_update(cr->t_mp->dev.bp, 0);
3506 	drmach_slice_table_update(cr->s_mp->dev.bp, 0);
3507 	mutex_exit(&drmach_slice_table_lock);
3508 
3509 	mutex_enter(&drmach_bus_sync_lock);
3510 	drmach_bus_sync_list_update();
3511 	mutex_exit(&drmach_bus_sync_lock);
3512 
3513 	/*
3514 	 * Make a good-faith effort to notify the SC about the copy-rename, but
3515 	 * don't worry if it fails, since a subsequent claim/unconfig/unclaim
3516 	 * will duplicate the update.
3517 	 */
3518 	obufp = kmem_zalloc(sizeof (dr_mbox_msg_t), KM_SLEEP);
3519 	mutex_enter(&drmach_slice_table_lock);
3520 	drmach_msg_memslice_init(obufp->msgdata.dm_uc.mem_slice);
3521 	drmach_msg_memregs_init(obufp->msgdata.dm_uc.mem_regs);
3522 	mutex_exit(&drmach_slice_table_lock);
3523 	(void) drmach_mbox_trans(DRMSG_UNCONFIG, cr->s_mp->dev.bp->bnum,
3524 		(caddr_t)obufp, sizeof (dr_mbox_msg_t), (caddr_t)NULL, 0);
3525 	kmem_free(obufp, sizeof (dr_mbox_msg_t));
3526 
3527 done:
3528 	vmem_free(static_alloc_arena, cr, sizeof (drmach_copy_rename_t));
3529 
3530 	DRMACH_PR("waited %d out of %d tries for drmach_rename_wait on %d cpus",
3531 		drmach_rename_ntries, drmach_cpu_ntries, drmach_rename_count);
3532 
3533 	return (err);
3534 }
3535 
3536 int drmach_slow_copy = 0;
3537 
3538 void
3539 drmach_copy_rename(drmachid_t id)
3540 {
3541 	extern uint_t		 getpstate(void);
3542 	extern void		 setpstate(uint_t);
3543 
3544 	extern xcfunc_t		 drmach_rename_wait;
3545 	extern xcfunc_t		 drmach_rename_done;
3546 	extern xcfunc_t		 drmach_rename_abort;
3547 
3548 	drmach_copy_rename_t	*cr = id;
3549 	uint64_t		 neer;
3550 	struct memlist		*ml;
3551 	int			 i, count;
3552 	int			 csize, lnsize;
3553 	uint64_t		 caddr;
3554 	cpuset_t		 cpuset;
3555 	uint_t			 pstate;
3556 	uint32_t		 exp = 0;
3557 	on_trap_data_t		 otd;
3558 	xcfunc_t		*drmach_end_wait_xcall = drmach_rename_done;
3559 
3560 	ASSERT(cr->isa == (void *)drmach_copy_rename_init);
3561 	ASSERT(MUTEX_HELD(&cpu_lock));
3562 	ASSERT(cr->ecode == DRMACH_CR_OK);
3563 
3564 	/*
3565 	 * Prevent slot1 IO from accessing Safari memory bus.
3566 	 */
3567 	if (axq_iopause_enable_all(&exp) != DDI_SUCCESS) {
3568 		ASSERT(exp >= 0 && exp < AXQ_MAX_EXP);
3569 		cr->ecode = DRMACH_CR_IOPAUSE_ERR;
3570 		cr->earg = (void *)(uintptr_t)exp;
3571 		return;
3572 	}
3573 
3574 	cpuset = cpu_ready_set;
3575 	CPUSET_DEL(cpuset, CPU->cpu_id);
3576 	count = ncpus - 1;
3577 	drmach_rename_count = count;	/* for debug */
3578 
3579 	drmach_xt_ready = 0;
3580 	xt_some(cpuset, drmach_rename_wait, NULL, NULL);
3581 
3582 	for (i = 0; i < drmach_cpu_ntries; i++) {
3583 		if (drmach_xt_ready == count)
3584 			break;
3585 		DELAY(drmach_cpu_delay);
3586 	}
3587 
3588 	drmach_rename_ntries = i;	/* for debug */
3589 
3590 	drmach_xt_ready = 0;		/* steal the line back */
3591 	for (i = 0; i < NCPU; i++)	/* steal the line back, preserve data */
3592 		drmach_xt_mb[i] = drmach_xt_mb[i];
3593 
3594 	caddr = drmach_iocage_paddr;
3595 	csize = cpunodes[CPU->cpu_id].ecache_size;
3596 	lnsize = cpunodes[CPU->cpu_id].ecache_linesize;
3597 
3598 	/* disable CE reporting */
3599 	neer = get_error_enable();
3600 	set_error_enable(neer & ~EN_REG_CEEN);
3601 
3602 	/* disable interrupts (paranoia) */
3603 	pstate = getpstate();
3604 	setpstate(pstate & ~PSTATE_IE);
3605 
3606 	/*
3607 	 * Execute copy-rename under on_trap to protect against a panic due
3608 	 * to an uncorrectable error. Instead, DR will abort the copy-rename
3609 	 * operation and rely on the OS to do the error reporting.
3610 	 *
3611 	 * In general, trap handling on any cpu once the copy begins
3612 	 * can result in an inconsistent memory image on the target.
3613 	 */
3614 	if (on_trap(&otd, OT_DATA_EC)) {
3615 		cr->ecode = DRMACH_CR_ONTRAP_ERR;
3616 		goto copy_rename_end;
3617 	}
3618 
3619 	/*
3620 	 * DO COPY.
3621 	 */
3622 	for (ml = cr->c_ml; ml; ml = ml->next) {
3623 		uint64_t	s_pa, t_pa;
3624 		uint64_t	nbytes;
3625 
3626 		s_pa = cr->s_copybasepa + ml->address;
3627 		t_pa = cr->t_copybasepa + ml->address;
3628 		nbytes = ml->size;
3629 
3630 		while (nbytes != 0ull) {
3631 			/* copy 32 bytes at src_pa to dst_pa */
3632 			bcopy32_il(s_pa, t_pa);
3633 
3634 			/* increment by 32 bytes */
3635 			s_pa += (4 * sizeof (uint64_t));
3636 			t_pa += (4 * sizeof (uint64_t));
3637 
3638 			/* decrement by 32 bytes */
3639 			nbytes -= (4 * sizeof (uint64_t));
3640 
3641 			if (drmach_slow_copy) {	/* for debug */
3642 				uint64_t i = 13 * 50;
3643 				while (i--)
3644 					;
3645 			}
3646 		}
3647 	}
3648 
3649 	/*
3650 	 * XXX CHEETAH SUPPORT
3651 	 * For cheetah, we need to grab the iocage lock since iocage
3652 	 * memory is used for e$ flush.
3653 	 *
3654 	 * NOTE: This code block is dangerous at this point in the
3655 	 * copy-rename operation. It modifies memory after the copy
3656 	 * has taken place which means that any persistent state will
3657 	 * be abandoned after the rename operation. The code is also
3658 	 * performing thread synchronization at a time when all but
3659 	 * one processors are paused. This is a potential deadlock
3660 	 * situation.
3661 	 *
3662 	 * This code block must be moved to drmach_copy_rename_init.
3663 	 */
3664 	if (drmach_is_cheetah) {
3665 		mutex_enter(&drmach_iocage_lock);
3666 		while (drmach_iocage_is_busy)
3667 			cv_wait(&drmach_iocage_cv, &drmach_iocage_lock);
3668 		drmach_iocage_is_busy = 1;
3669 		drmach_iocage_mem_scrub(ecache_size * 2);
3670 		mutex_exit(&drmach_iocage_lock);
3671 	}
3672 
3673 	/*
3674 	 * bcopy32_il is implemented as a series of ldxa/stxa via
3675 	 * ASI_MEM instructions. Following the copy loop, the E$
3676 	 * of the master (this) processor will have lines in state
3677 	 * O that correspond to lines of home memory in state gI.
3678 	 * An E$ flush is necessary to commit these lines before
3679 	 * proceeding with the rename operation.
3680 	 *
3681 	 * Flushing the E$ will automatically flush the W$, but
3682 	 * the D$ and I$ must be flushed separately and explicitly.
3683 	 */
3684 	flush_ecache_il(caddr, csize, lnsize);	/* inline version */
3685 
3686 	/*
3687 	 * Each line of home memory is now in state gM, except in
3688 	 * the case of a cheetah processor when the E$ flush area
3689 	 * is included within the copied region. In such a case,
3690 	 * the lines of home memory for the upper half of the
3691 	 * flush area are in state gS.
3692 	 *
3693 	 * Each line of target memory is in state gM.
3694 	 *
3695 	 * Each line of this processor's E$ is in state I, except
3696 	 * those of a cheetah processor. All lines of a cheetah
3697 	 * processor's E$ are in state S and correspond to the lines
3698 	 * in upper half of the E$ flush area.
3699 	 *
3700 	 * It is vital at this point that none of the lines in the
3701 	 * home or target memories are in state gI and that none
3702 	 * of the lines in this processor's E$ are in state O or Os.
3703 	 * A single instance of such a condition will cause loss of
3704 	 * coherency following the rename operation.
3705 	 */
3706 
3707 	/*
3708 	 * Rename
3709 	 */
3710 	(*(void(*)())drmach_cpu_sram_va)(cr->data, &cr->ecode, &cr->earg);
3711 
3712 	/*
3713 	 * Rename operation complete. The physical address space
3714 	 * of the home and target memories have been swapped, the
3715 	 * routing data in the respective CASM entries have been
3716 	 * swapped, and LPA settings in the processor and schizo
3717 	 * devices have been reprogrammed accordingly.
3718 	 *
3719 	 * In the case of a cheetah processor, the E$ remains
3720 	 * populated with lines in state S that correspond to the
3721 	 * lines in the former home memory. Now that the physical
3722 	 * addresses have been swapped, these E$ lines correspond
3723 	 * to lines in the new home memory which are in state gM.
3724 	 * This combination is invalid. An additional E$ flush is
3725 	 * necessary to restore coherency. The E$ flush will cause
3726 	 * the lines of the new home memory for the flush region
3727 	 * to transition from state gM to gS. The former home memory
3728 	 * remains unmodified. This additional E$ flush has no effect
3729 	 * on a cheetah+ processor.
3730 	 */
3731 	flush_ecache_il(caddr, csize, lnsize);	/* inline version */
3732 
3733 	/*
3734 	 * The D$ and I$ must be flushed to ensure that coherency is
3735 	 * maintained. Any line in a cache that is in the valid
3736 	 * state has its corresponding line of the new home memory
3737 	 * in the gM state. This is an invalid condition. When the
3738 	 * flushes are complete the cache line states will be
3739 	 * resynchronized with those in the new home memory.
3740 	 */
3741 	flush_icache_il();			/* inline version */
3742 	flush_dcache_il();			/* inline version */
3743 	flush_pcache_il();			/* inline version */
3744 
3745 copy_rename_end:
3746 
3747 	no_trap();
3748 
3749 	/* enable interrupts */
3750 	setpstate(pstate);
3751 
3752 	/* enable CE reporting */
3753 	set_error_enable(neer);
3754 
3755 	if (cr->ecode != DRMACH_CR_OK)
3756 		drmach_end_wait_xcall = drmach_rename_abort;
3757 
3758 	/*
3759 	 * XXX CHEETAH SUPPORT
3760 	 */
3761 	if (drmach_is_cheetah) {
3762 		mutex_enter(&drmach_iocage_lock);
3763 		drmach_iocage_mem_scrub(ecache_size * 2);
3764 		drmach_iocage_is_busy = 0;
3765 		cv_signal(&drmach_iocage_cv);
3766 		mutex_exit(&drmach_iocage_lock);
3767 	}
3768 
3769 	axq_iopause_disable_all();
3770 
3771 	xt_some(cpuset, drmach_end_wait_xcall, NULL, NULL);
3772 }
3773 
3774 static void drmach_io_dispose(drmachid_t);
3775 static sbd_error_t *drmach_io_release(drmachid_t);
3776 static sbd_error_t *drmach_io_status(drmachid_t, drmach_status_t *);
3777 
3778 static sbd_error_t *
3779 drmach_pci_new(drmach_device_t *proto, drmachid_t *idp)
3780 {
3781 	drmach_node_t	*node = proto->node;
3782 	sbd_error_t	*err;
3783 	drmach_reg_t	 regs[3];
3784 	int		 rv;
3785 	int		 len = 0;
3786 
3787 	rv = node->n_getproplen(node, "reg", &len);
3788 	if (rv != 0 || len != sizeof (regs)) {
3789 		sbd_error_t *err;
3790 
3791 		/* pci nodes are expected to have regs */
3792 		err = drerr_new(1, ESTC_GETPROP,
3793 			"Device Node 0x%x: property %s",
3794 			(uint_t)node->get_dnode(node), "reg");
3795 		return (err);
3796 	}
3797 
3798 	rv = node->n_getprop(node, "reg", (void *)regs, sizeof (regs));
3799 	if (rv) {
3800 		sbd_error_t *err;
3801 
3802 		err = drerr_new(1, ESTC_GETPROP,
3803 			"Device Node 0x%x: property %s",
3804 			(uint_t)node->get_dnode(node), "reg");
3805 
3806 		return (err);
3807 	}
3808 
3809 	/*
3810 	 * Fix up unit number so that Leaf A has a lower unit number
3811 	 * than Leaf B.
3812 	 */
3813 	if ((proto->portid % 2) != 0) {
3814 		if ((regs[0].reg_addr_lo & 0x700000) == 0x700000)
3815 			proto->unum = 0;
3816 		else
3817 			proto->unum = 1;
3818 	} else {
3819 		if ((regs[0].reg_addr_lo & 0x700000) == 0x700000)
3820 			proto->unum = 2;
3821 		else
3822 			proto->unum = 3;
3823 	}
3824 
3825 	err = drmach_io_new(proto, idp);
3826 	if (err == NULL) {
3827 		drmach_io_t *self = *idp;
3828 
3829 		/* reassemble 64-bit base address */
3830 		self->scsr_pa  = (uint64_t)regs[1].reg_addr_hi << 32;
3831 		self->scsr_pa |= (uint64_t)regs[1].reg_addr_lo;
3832 	}
3833 
3834 	return (err);
3835 }
3836 
3837 static sbd_error_t *
3838 drmach_io_new(drmach_device_t *proto, drmachid_t *idp)
3839 {
3840 	drmach_io_t	*ip;
3841 
3842 	ip = kmem_zalloc(sizeof (drmach_io_t), KM_SLEEP);
3843 	bcopy(proto, &ip->dev, sizeof (ip->dev));
3844 	ip->dev.node = drmach_node_dup(proto->node);
3845 	ip->dev.cm.isa = (void *)drmach_io_new;
3846 	ip->dev.cm.dispose = drmach_io_dispose;
3847 	ip->dev.cm.release = drmach_io_release;
3848 	ip->dev.cm.status = drmach_io_status;
3849 
3850 	snprintf(ip->dev.cm.name, sizeof (ip->dev.cm.name), "%s%d",
3851 		ip->dev.type, ip->dev.unum);
3852 
3853 	*idp = (drmachid_t)ip;
3854 	return (NULL);
3855 }
3856 
3857 static void
3858 drmach_io_dispose(drmachid_t id)
3859 {
3860 	drmach_io_t *self;
3861 
3862 	ASSERT(DRMACH_IS_IO_ID(id));
3863 
3864 	self = id;
3865 	if (self->dev.node)
3866 		drmach_node_dispose(self->dev.node);
3867 
3868 	kmem_free(self, sizeof (*self));
3869 }
3870 
3871 /*ARGSUSED*/
3872 sbd_error_t *
3873 drmach_pre_op(int cmd, drmachid_t id, drmach_opts_t *opts)
3874 {
3875 	drmach_board_t	*bp = (drmach_board_t *)id;
3876 	sbd_error_t	*err = NULL;
3877 
3878 	if (id && DRMACH_IS_BOARD_ID(id)) {
3879 		switch (cmd) {
3880 			case SBD_CMD_TEST:
3881 			case SBD_CMD_STATUS:
3882 			case SBD_CMD_GETNCM:
3883 				break;
3884 			case SBD_CMD_CONNECT:
3885 				if (bp->connected)
3886 					err = drerr_new(0, ESBD_STATE, NULL);
3887 
3888 				if (bp->cond == SBD_COND_UNUSABLE)
3889 					err = drerr_new(0,
3890 						ESBD_FATAL_STATE, NULL);
3891 				break;
3892 			case SBD_CMD_DISCONNECT:
3893 				if (!bp->connected)
3894 					err = drerr_new(0, ESBD_STATE, NULL);
3895 
3896 				if (bp->cond == SBD_COND_UNUSABLE)
3897 					err = drerr_new(0,
3898 						ESBD_FATAL_STATE, NULL);
3899 				break;
3900 			default:
3901 				if (bp->cond == SBD_COND_UNUSABLE)
3902 					err = drerr_new(0,
3903 						ESBD_FATAL_STATE, NULL);
3904 				break;
3905 
3906 		}
3907 	}
3908 
3909 	return (err);
3910 }
3911 
3912 /*ARGSUSED*/
3913 sbd_error_t *
3914 drmach_post_op(int cmd, drmachid_t id, drmach_opts_t *opts)
3915 {
3916 	return (NULL);
3917 }
3918 
3919 sbd_error_t *
3920 drmach_board_assign(int bnum, drmachid_t *id)
3921 {
3922 	sbd_error_t	*err = NULL;
3923 	caddr_t		obufp;
3924 
3925 	if (!drmach_initialized && drmach_init() == -1) {
3926 		err = DRMACH_INTERNAL_ERROR();
3927 	}
3928 
3929 	rw_enter(&drmach_boards_rwlock, RW_WRITER);
3930 
3931 	if (!err) {
3932 		if (drmach_array_get(drmach_boards, bnum, id) == -1) {
3933 			err = drerr_new(0, ESTC_BNUM, "%d", bnum);
3934 		} else {
3935 			drmach_board_t	*bp;
3936 
3937 			if (*id)
3938 				rw_downgrade(&drmach_boards_rwlock);
3939 
3940 			obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
3941 			err = drmach_mbox_trans(DRMSG_ASSIGN, bnum, obufp,
3942 				sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
3943 			kmem_free(obufp, sizeof (dr_proto_hdr_t));
3944 
3945 			if (!err) {
3946 				bp = *id;
3947 				if (!*id)
3948 					bp = *id  =
3949 					    (drmachid_t)drmach_board_new(bnum);
3950 				bp->assigned = 1;
3951 			}
3952 		}
3953 	}
3954 	rw_exit(&drmach_boards_rwlock);
3955 	return (err);
3956 }
3957 
3958 static uint_t
3959 drmach_board_non_panther_cpus(gdcd_t *gdcd, uint_t exp, uint_t slot)
3960 {
3961 	uint_t	port, port_start, port_end;
3962 	uint_t	non_panther_cpus = 0;
3963 	uint_t	impl;
3964 
3965 	ASSERT(gdcd != NULL);
3966 
3967 	/*
3968 	 * Determine PRD port indices based on slot location.
3969 	 */
3970 	switch (slot) {
3971 	case 0:
3972 		port_start = 0;
3973 		port_end = 3;
3974 		break;
3975 	case 1:
3976 		port_start = 4;
3977 		port_end = 5;
3978 		break;
3979 	default:
3980 		ASSERT(0);
3981 		/* check all */
3982 		port_start = 0;
3983 		port_end = 5;
3984 		break;
3985 	}
3986 
3987 	for (port = port_start; port <= port_end; port++) {
3988 		if (gdcd->dcd_prd[exp][port].prd_ptype == SAFPTYPE_CPU &&
3989 		    RSV_GOOD(gdcd->dcd_prd[exp][port].prd_prsv)) {
3990 			/*
3991 			 * This Safari port passed POST and represents a
3992 			 * cpu, so check the implementation.
3993 			 */
3994 			impl = (gdcd->dcd_prd[exp][port].prd_ver_reg >> 32)
3995 			    & 0xffff;
3996 
3997 			switch (impl) {
3998 			case CHEETAH_IMPL:
3999 			case CHEETAH_PLUS_IMPL:
4000 			case JAGUAR_IMPL:
4001 				non_panther_cpus++;
4002 				break;
4003 			case PANTHER_IMPL:
4004 				break;
4005 			default:
4006 				ASSERT(0);
4007 				non_panther_cpus++;
4008 				break;
4009 			}
4010 		}
4011 	}
4012 
4013 	DRMACH_PR("drmach_board_non_panther_cpus: exp=%d, slot=%d, "
4014 	    "non_panther_cpus=%d", exp, slot, non_panther_cpus);
4015 
4016 	return (non_panther_cpus);
4017 }
4018 
4019 sbd_error_t *
4020 drmach_board_connect(drmachid_t id, drmach_opts_t *opts)
4021 {
4022 	_NOTE(ARGUNUSED(opts))
4023 
4024 	drmach_board_t		*bp = (drmach_board_t *)id;
4025 	sbd_error_t		*err;
4026 	dr_mbox_msg_t		*obufp;
4027 	gdcd_t			*gdcd = NULL;
4028 	uint_t			exp, slot;
4029 	sc_gptwocfg_cookie_t	scc;
4030 	int			panther_pages_enabled;
4031 
4032 	if (!DRMACH_IS_BOARD_ID(id))
4033 		return (drerr_new(0, ESTC_INAPPROP, NULL));
4034 
4035 	/*
4036 	 * Build the casm info portion of the CLAIM message.
4037 	 */
4038 	obufp = kmem_zalloc(sizeof (dr_mbox_msg_t), KM_SLEEP);
4039 	mutex_enter(&drmach_slice_table_lock);
4040 	drmach_msg_memslice_init(obufp->msgdata.dm_cr.mem_slice);
4041 	drmach_msg_memregs_init(obufp->msgdata.dm_cr.mem_regs);
4042 	mutex_exit(&drmach_slice_table_lock);
4043 	err = drmach_mbox_trans(DRMSG_CLAIM, bp->bnum, (caddr_t)obufp,
4044 		sizeof (dr_mbox_msg_t), (caddr_t)NULL, 0);
4045 	kmem_free(obufp, sizeof (dr_mbox_msg_t));
4046 
4047 	if (err) {
4048 		/*
4049 		 * if mailbox timeout or unrecoverable error from SC,
4050 		 * board cannot be touched.  Mark the status as
4051 		 * unusable.
4052 		 */
4053 		if ((err->e_code == ESTC_SMS_ERR_UNRECOVERABLE) ||
4054 			(err->e_code == ESTC_MBXRPLY))
4055 				bp->cond = SBD_COND_UNUSABLE;
4056 		return (err);
4057 	}
4058 
4059 	gdcd = drmach_gdcd_new();
4060 	if (gdcd == NULL) {
4061 		cmn_err(CE_WARN, "failed to read GDCD info for %s\n",
4062 		    bp->cm.name);
4063 		return (DRMACH_INTERNAL_ERROR());
4064 	}
4065 
4066 	/*
4067 	 * Read CPU SRAM DR buffer offset from GDCD.
4068 	 */
4069 	exp = DRMACH_BNUM2EXP(bp->bnum);
4070 	slot = DRMACH_BNUM2SLOT(bp->bnum);
4071 	bp->stardrb_offset =
4072 	    gdcd->dcd_slot[exp][slot].l1ss_cpu_drblock_xwd_offset << 3;
4073 	DRMACH_PR("%s: stardrb_offset=0x%lx\n", bp->cm.name,
4074 	    bp->stardrb_offset);
4075 
4076 	/*
4077 	 * Read board LPA setting from GDCD.
4078 	 */
4079 	bp->flags &= ~DRMACH_NULL_PROC_LPA;
4080 	if (gdcd->dcd_slot[exp][slot].l1ss_flags &
4081 	    L1SSFLG_THIS_L1_NULL_PROC_LPA) {
4082 		bp->flags |= DRMACH_NULL_PROC_LPA;
4083 		DRMACH_PR("%s: NULL proc LPA\n", bp->cm.name);
4084 	}
4085 
4086 	/*
4087 	 * XXX Until the Solaris large pages support heterogeneous cpu
4088 	 * domains, DR needs to prevent the addition of non-Panther cpus
4089 	 * to an all-Panther domain with large pages enabled.
4090 	 */
4091 	panther_pages_enabled = (page_num_pagesizes() > DEFAULT_MMU_PAGE_SIZES);
4092 	if (drmach_board_non_panther_cpus(gdcd, exp, slot) > 0 &&
4093 	    panther_pages_enabled && drmach_large_page_restriction) {
4094 		cmn_err(CE_WARN, "Domain shutdown is required to add a non-"
4095 		    "UltraSPARC-IV+ board into an all UltraSPARC-IV+ domain");
4096 		err = drerr_new(0, ESTC_SUPPORT, NULL);
4097 	}
4098 
4099 	if (err == NULL) {
4100 		/* do saf configurator stuff */
4101 		DRMACH_PR("calling sc_probe_board for bnum=%d\n", bp->bnum);
4102 		scc = sc_probe_board(bp->bnum);
4103 		if (scc == NULL)
4104 			err = drerr_new(0, ESTC_PROBE, bp->cm.name);
4105 	}
4106 
4107 	if (err) {
4108 		/* flush CDC srams */
4109 		if (axq_cdc_flush_all() != DDI_SUCCESS) {
4110 			goto out;
4111 		}
4112 
4113 		/*
4114 		 * Build the casm info portion of the UNCLAIM message.
4115 		 */
4116 		obufp = kmem_zalloc(sizeof (dr_mbox_msg_t), KM_SLEEP);
4117 		mutex_enter(&drmach_slice_table_lock);
4118 		drmach_msg_memslice_init(obufp->msgdata.dm_ur.mem_slice);
4119 		drmach_msg_memregs_init(obufp->msgdata.dm_ur.mem_regs);
4120 		mutex_exit(&drmach_slice_table_lock);
4121 		(void) drmach_mbox_trans(DRMSG_UNCLAIM, bp->bnum,
4122 			(caddr_t)obufp, sizeof (dr_mbox_msg_t),
4123 			(caddr_t)NULL, 0);
4124 
4125 		kmem_free(obufp, sizeof (dr_mbox_msg_t));
4126 
4127 		/*
4128 		 * we clear the connected flag just in case it would have
4129 		 * been set by a concurrent drmach_board_status() thread
4130 		 * before the UNCLAIM completed.
4131 		 */
4132 		bp->connected = 0;
4133 		goto out;
4134 	}
4135 
4136 	/*
4137 	 * Now that the board has been successfully attached, obtain
4138 	 * platform-specific DIMM serial id information for the board.
4139 	 */
4140 	if ((DRMACH_BNUM2SLOT(bp->bnum) == 0) &&
4141 	    plat_ecc_capability_sc_get(PLAT_ECC_DIMM_SID_MESSAGE)) {
4142 		(void) plat_request_mem_sids(DRMACH_BNUM2EXP(bp->bnum));
4143 	}
4144 
4145 out:
4146 	if (gdcd != NULL)
4147 		drmach_gdcd_dispose(gdcd);
4148 
4149 	return (err);
4150 }
4151 
4152 static void
4153 drmach_slice_table_update(drmach_board_t *bp, int invalidate)
4154 {
4155 	static char		*axq_name = "address-extender-queue";
4156 	static dev_info_t	*axq_dip = NULL;
4157 	static int		 axq_exp = -1;
4158 	static int		 axq_slot;
4159 	int			 e, s, slice;
4160 
4161 	ASSERT(MUTEX_HELD(&drmach_slice_table_lock));
4162 
4163 	e = DRMACH_BNUM2EXP(bp->bnum);
4164 	if (invalidate) {
4165 		ASSERT(DRMACH_BNUM2SLOT(bp->bnum) == 0);
4166 
4167 		/* invalidate cached casm value */
4168 		drmach_slice_table[e] = 0;
4169 
4170 		/* invalidate cached axq info if for same exp */
4171 		if (e == axq_exp && axq_dip) {
4172 			ndi_rele_devi(axq_dip);
4173 			axq_dip = NULL;
4174 		}
4175 	}
4176 
4177 	if (axq_dip == NULL || !i_ddi_devi_attached(axq_dip)) {
4178 		int i, portid;
4179 
4180 		/* search for an attached slot0 axq instance */
4181 		for (i = 0; i < AXQ_MAX_EXP * AXQ_MAX_SLOT_PER_EXP; i++) {
4182 			if (axq_dip)
4183 				ndi_rele_devi(axq_dip);
4184 			axq_dip = ddi_find_devinfo(axq_name, i, 0);
4185 			if (axq_dip && DDI_CF2(axq_dip)) {
4186 				portid = ddi_getprop(DDI_DEV_T_ANY, axq_dip,
4187 				    DDI_PROP_DONTPASS, "portid", -1);
4188 				if (portid == -1) {
4189 					DRMACH_PR("cant get portid of axq "
4190 					    "instance %d\n", i);
4191 					continue;
4192 				}
4193 
4194 				axq_exp = (portid >> 5) & 0x1f;
4195 				axq_slot = portid & 1;
4196 
4197 				if (invalidate && axq_exp == e)
4198 					continue;
4199 
4200 				if (axq_slot == 0)
4201 					break;	/* found */
4202 			}
4203 		}
4204 
4205 		if (i == AXQ_MAX_EXP * AXQ_MAX_SLOT_PER_EXP) {
4206 			if (axq_dip) {
4207 				ndi_rele_devi(axq_dip);
4208 				axq_dip = NULL;
4209 			}
4210 			DRMACH_PR("drmach_slice_table_update: failed to "
4211 			    "update axq dip\n");
4212 			return;
4213 		}
4214 
4215 	}
4216 
4217 	ASSERT(axq_dip);
4218 	ASSERT(axq_slot == 0);
4219 
4220 	if (invalidate)
4221 		return;
4222 
4223 	s = DRMACH_BNUM2SLOT(bp->bnum);
4224 	DRMACH_PR("using AXQ casm %d.%d for slot%d.%d\n",
4225 		axq_exp, axq_slot, e, s);
4226 
4227 	/* invalidate entry */
4228 	drmach_slice_table[e] &= ~0x20;
4229 
4230 	/*
4231 	 * find a slice that routes to expander e. If no match
4232 	 * is found, drmach_slice_table[e] will remain invalid.
4233 	 *
4234 	 * The CASM is a routing table indexed by slice number.
4235 	 * Each element in the table contains permission bits,
4236 	 * a destination expander number and a valid bit. The
4237 	 * valid bit must true for the element to be meaningful.
4238 	 *
4239 	 * CASM entry structure
4240 	 *   Bits 15..6 ignored
4241 	 *   Bit  5	valid
4242 	 *   Bits 0..4	expander number
4243 	 *
4244 	 * NOTE: the for loop is really enumerating the range of slices,
4245 	 * which is ALWAYS equal to the range of expanders. Hence,
4246 	 * AXQ_MAX_EXP is okay to use in this loop.
4247 	 */
4248 	for (slice = 0; slice < AXQ_MAX_EXP; slice++) {
4249 		uint32_t casm = axq_casm_read(axq_exp, axq_slot, slice);
4250 
4251 		if ((casm & 0x20) && (casm & 0x1f) == e)
4252 			drmach_slice_table[e] = 0x20 | slice;
4253 	}
4254 }
4255 
4256 /*
4257  * Get base and bound PAs for slot 1 board lpa programming
4258  * If a cpu/mem board is present in the same expander, use slice
4259  * information corresponding to the CASM.  Otherwise, set base and
4260  * bound PAs to 0.
4261  */
4262 static void
4263 drmach_lpa_bb_get(drmach_board_t *s1bp, uint64_t *basep, uint64_t *boundp)
4264 {
4265 	drmachid_t s0id;
4266 
4267 	ASSERT(mutex_owned(&drmach_slice_table_lock));
4268 	ASSERT(DRMACH_BNUM2SLOT(s1bp->bnum) == 1);
4269 
4270 	*basep = *boundp = 0;
4271 	if (drmach_array_get(drmach_boards, s1bp->bnum - 1, &s0id) == 0 &&
4272 		s0id != 0) {
4273 
4274 		uint32_t slice;
4275 		if ((slice =
4276 			drmach_slice_table[DRMACH_BNUM2EXP(s1bp->bnum)])
4277 				& 0x20) {
4278 
4279 			*basep = DRMACH_SLICE_TO_PA(slice & DRMACH_SLICE_MASK);
4280 			*boundp = *basep + DRMACH_MEM_SLICE_SIZE;
4281 		}
4282 	}
4283 }
4284 
4285 
4286 /*
4287  * Reprogram slot 1 lpa's as required.
4288  * The purpose of this routine is maintain the LPA settings of the devices
4289  * in slot 1. To date we know Schizo and Cheetah are the only devices that
4290  * require this attention. The LPA setting must match the slice field in the
4291  * CASM element for the local expander. This field is guaranteed to be
4292  * programmed in accordance with the cacheable address space on the slot 0
4293  * board of the local expander. If no memory is present on the slot 0 board,
4294  * there is no cacheable address space and, hence, the CASM slice field will
4295  * be zero or its valid bit will be false (or both).
4296  */
4297 
4298 static void
4299 drmach_slot1_lpa_set(drmach_board_t *bp)
4300 {
4301 	drmachid_t	id;
4302 	drmach_board_t	*s1bp = NULL;
4303 	int		rv, idx, is_maxcat = 1;
4304 	uint64_t	last_scsr_pa = 0;
4305 	uint64_t	new_basepa, new_boundpa;
4306 
4307 	if (DRMACH_BNUM2SLOT(bp->bnum)) {
4308 		s1bp = bp;
4309 		if (s1bp->devices == NULL) {
4310 			DRMACH_PR("drmach...lpa_set: slot1=%d not present",
4311 				bp->bnum);
4312 			return;
4313 		}
4314 	} else {
4315 		rv = drmach_array_get(drmach_boards, bp->bnum + 1, &id);
4316 		/* nothing to do when board is not found or has no devices */
4317 		s1bp = id;
4318 		if (rv == -1 || s1bp == NULL || s1bp->devices == NULL) {
4319 			DRMACH_PR("drmach...lpa_set: slot1=%d not present",
4320 				bp->bnum + 1);
4321 			return;
4322 		}
4323 		ASSERT(DRMACH_IS_BOARD_ID(id));
4324 	}
4325 	mutex_enter(&drmach_slice_table_lock);
4326 	drmach_lpa_bb_get(s1bp, &new_basepa, &new_boundpa);
4327 	DRMACH_PR("drmach_...lpa_set: bnum=%d base=0x%lx bound=0x%lx\n",
4328 			s1bp->bnum, new_basepa, new_boundpa);
4329 
4330 	rv = drmach_array_first(s1bp->devices, &idx, &id);
4331 	while (rv == 0) {
4332 		if (DRMACH_IS_IO_ID(id)) {
4333 			drmach_io_t *io = id;
4334 
4335 			is_maxcat = 0;
4336 
4337 			/*
4338 			 * Skip all non-Schizo IO devices (only IO nodes
4339 			 * that are Schizo devices have non-zero scsr_pa).
4340 			 * Filter out "other" leaf to avoid writing to the
4341 			 * same Schizo Control/Status Register twice.
4342 			 */
4343 			if (io->scsr_pa && io->scsr_pa != last_scsr_pa) {
4344 				uint64_t scsr;
4345 
4346 				scsr  = lddphysio(io->scsr_pa);
4347 				DRMACH_PR("drmach...lpa_set: old scsr=0x%lx\n",
4348 					scsr);
4349 				scsr &= ~(DRMACH_LPA_BASE_MASK |
4350 						DRMACH_LPA_BND_MASK);
4351 				scsr |= DRMACH_PA_TO_LPA_BASE(new_basepa);
4352 				scsr |= DRMACH_PA_TO_LPA_BND(new_boundpa);
4353 
4354 				stdphysio(io->scsr_pa, scsr);
4355 				DRMACH_PR("drmach...lpa_set: new scsr=0x%lx\n",
4356 					scsr);
4357 
4358 				last_scsr_pa = io->scsr_pa;
4359 			}
4360 		}
4361 		rv = drmach_array_next(s1bp->devices, &idx, &id);
4362 	}
4363 
4364 	if (is_maxcat && DRMACH_L1_SET_LPA(s1bp) && drmach_reprogram_lpa) {
4365 		extern xcfunc_t	drmach_set_lpa;
4366 
4367 		DRMACH_PR("reprogramming maxcat lpa's");
4368 
4369 		mutex_enter(&cpu_lock);
4370 		rv = drmach_array_first(s1bp->devices, &idx, &id);
4371 		while (rv == 0 && id != NULL) {
4372 			if (DRMACH_IS_CPU_ID(id)) {
4373 				int ntries;
4374 				processorid_t cpuid;
4375 
4376 				cpuid = ((drmach_cpu_t *)id)->cpuid;
4377 
4378 				/*
4379 				 * Check for unconfigured or powered-off
4380 				 * MCPUs.  If CPU_READY flag is clear, the
4381 				 * MCPU cannot be xcalled.
4382 				 */
4383 				if ((cpu[cpuid] == NULL) ||
4384 					(cpu[cpuid]->cpu_flags &
4385 					CPU_READY) == 0) {
4386 
4387 					rv = drmach_array_next(s1bp->devices,
4388 						&idx, &id);
4389 					continue;
4390 				}
4391 
4392 				/*
4393 				 * XXX CHEETAH SUPPORT
4394 				 * for cheetah, we need to clear iocage
4395 				 * memory since it will be used for e$ flush
4396 				 * in drmach_set_lpa.
4397 				 */
4398 				if (drmach_is_cheetah) {
4399 					mutex_enter(&drmach_iocage_lock);
4400 					while (drmach_iocage_is_busy)
4401 						cv_wait(&drmach_iocage_cv,
4402 							&drmach_iocage_lock);
4403 					drmach_iocage_is_busy = 1;
4404 					drmach_iocage_mem_scrub(
4405 						ecache_size * 2);
4406 					mutex_exit(&drmach_iocage_lock);
4407 				}
4408 
4409 				/*
4410 				 * drmach_slice_table[*]
4411 				 *	bit 5	valid
4412 				 *	bit 0:4	slice number
4413 				 *
4414 				 * drmach_xt_mb[*] format for drmach_set_lpa
4415 				 *	bit 7	valid
4416 				 *	bit 6	set null LPA
4417 				 *			(overrides bits 0:4)
4418 				 *	bit 0:4	slice number
4419 				 *
4420 				 * drmach_set_lpa derives processor CBASE and
4421 				 * CBND from bits 6 and 0:4 of drmach_xt_mb.
4422 				 * If bit 6 is set, then CBASE = CBND = 0.
4423 				 * Otherwise, CBASE = slice number;
4424 				 * CBND = slice number + 1.
4425 				 * No action is taken if bit 7 is zero.
4426 				 */
4427 
4428 				mutex_enter(&drmach_xt_mb_lock);
4429 				bzero((void *)drmach_xt_mb,
4430 				    drmach_xt_mb_size);
4431 
4432 				if (new_basepa == 0 && new_boundpa == 0)
4433 					drmach_xt_mb[cpuid] = 0x80 | 0x40;
4434 				else
4435 					drmach_xt_mb[cpuid] = 0x80 |
4436 						DRMACH_PA_TO_SLICE(new_basepa);
4437 
4438 				drmach_xt_ready = 0;
4439 
4440 				xt_one(cpuid, drmach_set_lpa, NULL, NULL);
4441 
4442 				ntries = drmach_cpu_ntries;
4443 				while (!drmach_xt_ready && ntries) {
4444 					DELAY(drmach_cpu_delay);
4445 					ntries--;
4446 				}
4447 				mutex_exit(&drmach_xt_mb_lock);
4448 				drmach_xt_ready = 0;
4449 
4450 				/*
4451 				 * XXX CHEETAH SUPPORT
4452 				 * for cheetah, we need to clear iocage
4453 				 * memory since it was used for e$ flush
4454 				 * in performed drmach_set_lpa.
4455 				 */
4456 				if (drmach_is_cheetah) {
4457 					mutex_enter(&drmach_iocage_lock);
4458 					drmach_iocage_mem_scrub(
4459 						ecache_size * 2);
4460 					drmach_iocage_is_busy = 0;
4461 					cv_signal(&drmach_iocage_cv);
4462 					mutex_exit(&drmach_iocage_lock);
4463 				}
4464 			}
4465 			rv = drmach_array_next(s1bp->devices, &idx, &id);
4466 		}
4467 		mutex_exit(&cpu_lock);
4468 	}
4469 	mutex_exit(&drmach_slice_table_lock);
4470 }
4471 
4472 /*
4473  * Return the number of connected Panther boards in the domain.
4474  */
4475 static int
4476 drmach_panther_boards(void)
4477 {
4478 	int		rv;
4479 	int		b_idx;
4480 	drmachid_t	b_id;
4481 	drmach_board_t	*bp;
4482 	int		npanther = 0;
4483 
4484 	rv = drmach_array_first(drmach_boards, &b_idx, &b_id);
4485 	while (rv == 0) {
4486 		ASSERT(DRMACH_IS_BOARD_ID(b_id));
4487 		bp = b_id;
4488 
4489 		if (IS_PANTHER(bp->cpu_impl))
4490 			npanther++;
4491 
4492 		rv = drmach_array_next(drmach_boards, &b_idx, &b_id);
4493 	}
4494 
4495 	return (npanther);
4496 }
4497 
4498 /*ARGSUSED*/
4499 sbd_error_t *
4500 drmach_board_disconnect(drmachid_t id, drmach_opts_t *opts)
4501 {
4502 	drmach_board_t	*bp;
4503 	dr_mbox_msg_t	*obufp;
4504 	sbd_error_t	*err = NULL;
4505 
4506 	sc_gptwocfg_cookie_t	scc;
4507 
4508 	if (!DRMACH_IS_BOARD_ID(id))
4509 		return (drerr_new(0, ESTC_INAPPROP, NULL));
4510 	bp = id;
4511 
4512 	/*
4513 	 * Build the casm info portion of the UNCLAIM message.
4514 	 * This must be done prior to calling for saf configurator
4515 	 * deprobe, to ensure that the associated axq instance
4516 	 * is not detached.
4517 	 */
4518 	obufp = kmem_zalloc(sizeof (dr_mbox_msg_t), KM_SLEEP);
4519 	mutex_enter(&drmach_slice_table_lock);
4520 	drmach_msg_memslice_init(obufp->msgdata.dm_ur.mem_slice);
4521 
4522 	/*
4523 	 * If disconnecting slot 0 board, update the casm slice table
4524 	 * info now, for use by drmach_slot1_lpa_set()
4525 	 */
4526 	if (DRMACH_BNUM2SLOT(bp->bnum) == 0)
4527 			drmach_slice_table_update(bp, 1);
4528 
4529 	drmach_msg_memregs_init(obufp->msgdata.dm_ur.mem_regs);
4530 	mutex_exit(&drmach_slice_table_lock);
4531 
4532 	/*
4533 	 * Update LPA information for slot1 board
4534 	 */
4535 	drmach_slot1_lpa_set(bp);
4536 
4537 	/* disable and flush CDC */
4538 	if (axq_cdc_disable_flush_all() != DDI_SUCCESS) {
4539 		axq_cdc_enable_all();	/* paranoia */
4540 		err = DRMACH_INTERNAL_ERROR();
4541 	}
4542 
4543 	/*
4544 	 * call saf configurator for deprobe
4545 	 * It's done now before sending an UNCLAIM message because
4546 	 * IKP will probe boards it doesn't know about <present at boot>
4547 	 * prior to unprobing them.  If this happens after sending the
4548 	 * UNCLAIM, it will cause a dstop for domain transgression error.
4549 	 */
4550 
4551 	if (!err) {
4552 		scc = sc_unprobe_board(bp->bnum);
4553 		axq_cdc_enable_all();
4554 		if (scc != NULL) {
4555 			err = drerr_new(0, ESTC_DEPROBE, bp->cm.name);
4556 		}
4557 	}
4558 
4559 	/*
4560 	 * If disconnecting a board from a Panther domain, wait a fixed-
4561 	 * time delay for pending Safari transactions to complete on the
4562 	 * disconnecting board's processors.  The bus sync list read used
4563 	 * in drmach_shutdown_asm to synchronize with outstanding Safari
4564 	 * transactions assumes no read-bypass-write mode for all memory
4565 	 * controllers.  Since Panther supports read-bypass-write, a
4566 	 * delay is used that is slightly larger than the maximum Safari
4567 	 * timeout value in the Safari/Fireplane Config Reg.
4568 	 */
4569 	if (drmach_panther_boards() > 0 || drmach_unclaim_delay_all) {
4570 		clock_t	stime = lbolt;
4571 
4572 		delay(drv_usectohz(drmach_unclaim_usec_delay));
4573 
4574 		stime = lbolt - stime;
4575 		DRMACH_PR("delayed %ld ticks (%ld secs) before disconnecting "
4576 		    "board %s from domain\n", stime, stime / hz, bp->cm.name);
4577 	}
4578 
4579 	if (!err) {
4580 		obufp->msgdata.dm_ur.mem_clear = 0;
4581 
4582 		err = drmach_mbox_trans(DRMSG_UNCLAIM, bp->bnum, (caddr_t)obufp,
4583 			sizeof (dr_mbox_msg_t), (caddr_t)NULL, 0);
4584 
4585 		if (err) {
4586 			/*
4587 			 * if mailbox timeout or unrecoverable error from SC,
4588 			 * board cannot be touched.  Mark the status as
4589 			 * unusable.
4590 			 */
4591 			if ((err->e_code == ESTC_SMS_ERR_UNRECOVERABLE) ||
4592 				(err->e_code == ESTC_MBXRPLY))
4593 					bp->cond = SBD_COND_UNUSABLE;
4594 			else {
4595 				DRMACH_PR("UNCLAIM failed for bnum=%d\n",
4596 					bp->bnum);
4597 				DRMACH_PR("calling sc_probe_board: bnum=%d\n",
4598 					bp->bnum);
4599 				scc = sc_probe_board(bp->bnum);
4600 				if (scc == NULL) {
4601 					cmn_err(CE_WARN,
4602 					"sc_probe_board failed for bnum=%d",
4603 						bp->bnum);
4604 				} else {
4605 					if (DRMACH_BNUM2SLOT(bp->bnum) == 0) {
4606 						mutex_enter(
4607 						    &drmach_slice_table_lock);
4608 						drmach_slice_table_update(bp,
4609 						    0);
4610 						mutex_exit(
4611 						    &drmach_slice_table_lock);
4612 					}
4613 					drmach_slot1_lpa_set(bp);
4614 				}
4615 			}
4616 		} else {
4617 			bp->connected = 0;
4618 			/*
4619 			 * Now that the board has been successfully detached,
4620 			 * discard platform-specific DIMM serial id information
4621 			 * for the board.
4622 			 */
4623 			if ((DRMACH_BNUM2SLOT(bp->bnum) == 0) &&
4624 			    plat_ecc_capability_sc_get(
4625 			    PLAT_ECC_DIMM_SID_MESSAGE)) {
4626 				(void) plat_discard_mem_sids(
4627 				    DRMACH_BNUM2EXP(bp->bnum));
4628 			}
4629 		}
4630 	}
4631 	kmem_free(obufp, sizeof (dr_mbox_msg_t));
4632 
4633 	return (err);
4634 }
4635 
4636 static int
4637 drmach_get_portid(drmach_node_t *np)
4638 {
4639 	drmach_node_t	pp;
4640 	int		portid;
4641 	char		type[OBP_MAXPROPNAME];
4642 
4643 	if (np->n_getprop(np, "portid", &portid, sizeof (portid)) == 0)
4644 		return (portid);
4645 
4646 	/*
4647 	 * Get the device_type property to see if we should
4648 	 * continue processing this node.
4649 	 */
4650 	if (np->n_getprop(np, "device_type", &type, sizeof (type)) != 0)
4651 		return (-1);
4652 
4653 	/*
4654 	 * If the device is a CPU without a 'portid' property,
4655 	 * it is a CMP core. For such cases, the parent node
4656 	 * has the portid.
4657 	 */
4658 	if (strcmp(type, DRMACH_CPU_NAMEPROP) == 0) {
4659 		if (np->get_parent(np, &pp) != 0)
4660 			return (-1);
4661 
4662 		if (pp.n_getprop(&pp, "portid", &portid, sizeof (portid)) == 0)
4663 			return (portid);
4664 	}
4665 
4666 	return (-1);
4667 }
4668 
4669 /*
4670  * This is a helper function to determine if a given
4671  * node should be considered for a dr operation according
4672  * to predefined dr type nodes and the node's name.
4673  * Formal Parameter : The name of a device node.
4674  * Return Value: -1, name does not map to a valid dr type.
4675  *		 A value greater or equal to 0, name is a valid dr type.
4676  */
4677 static int
4678 drmach_name2type_idx(char *name)
4679 {
4680 	int 	index, ntypes;
4681 
4682 	if (name == NULL)
4683 		return (-1);
4684 
4685 	/*
4686 	 * Determine how many possible types are currently supported
4687 	 * for dr.
4688 	 */
4689 	ntypes = sizeof (drmach_name2type) / sizeof (drmach_name2type[0]);
4690 
4691 	/* Determine if the node's name correspond to a predefined type. */
4692 	for (index = 0; index < ntypes; index++) {
4693 		if (strcmp(drmach_name2type[index].name, name) == 0)
4694 			/* The node is an allowed type for dr. */
4695 			return (index);
4696 	}
4697 
4698 	/*
4699 	 * If the name of the node does not map to any of the
4700 	 * types in the array drmach_name2type then the node is not of
4701 	 * interest to dr.
4702 	 */
4703 	return (-1);
4704 }
4705 
4706 static int
4707 drmach_board_find_devices_cb(drmach_node_walk_args_t *args)
4708 {
4709 	drmach_node_t			*node = args->node;
4710 	drmach_board_cb_data_t		*data = args->data;
4711 	drmach_board_t			*obj = data->obj;
4712 
4713 	int		rv, portid;
4714 	drmachid_t	id;
4715 	drmach_device_t	*device;
4716 	char	name[OBP_MAXDRVNAME];
4717 
4718 	portid = drmach_get_portid(node);
4719 	if (portid == -1) {
4720 		/*
4721 		 * if the node does not have a portid property, then
4722 		 * by that information alone it is known that drmach
4723 		 * is not interested in it.
4724 		 */
4725 		return (0);
4726 	}
4727 	rv = node->n_getprop(node, "name", name, OBP_MAXDRVNAME);
4728 
4729 	/* The node must have a name */
4730 	if (rv)
4731 		return (0);
4732 
4733 	/*
4734 	 * Ignore devices whose portid do not map to this board,
4735 	 * or that their name property is not mapped to a valid
4736 	 * dr device name.
4737 	 */
4738 	if ((drmach_portid2bnum(portid) != obj->bnum) ||
4739 	    (drmach_name2type_idx(name) < 0))
4740 		return (0);
4741 
4742 	/*
4743 	 * Create a device data structure from this node data.
4744 	 * The call may yield nothing if the node is not of interest
4745 	 * to drmach.
4746 	 */
4747 	data->err = drmach_device_new(node, obj, portid, &id);
4748 	if (data->err)
4749 		return (-1);
4750 	else if (!id) {
4751 		/*
4752 		 * drmach_device_new examined the node we passed in
4753 		 * and determined that it was either one not of
4754 		 * interest to drmach or the PIM dr layer.
4755 		 * So, it is skipped.
4756 		 */
4757 		return (0);
4758 	}
4759 
4760 	rv = drmach_array_set(obj->devices, data->ndevs++, id);
4761 	if (rv) {
4762 		data->err = DRMACH_INTERNAL_ERROR();
4763 		return (-1);
4764 	}
4765 
4766 	device = id;
4767 
4768 #ifdef DEBUG
4769 	DRMACH_PR("%d %s %d %p\n", portid, device->type, device->unum, id);
4770 	if (DRMACH_IS_IO_ID(id))
4771 		DRMACH_PR("ndevs = %d dip/node = %p", data->ndevs, node->here);
4772 #endif
4773 
4774 	data->err = (*data->found)(data->a, device->type, device->unum, id);
4775 	return (data->err == NULL ? 0 : -1);
4776 }
4777 
4778 sbd_error_t *
4779 drmach_board_find_devices(drmachid_t id, void *a,
4780 	sbd_error_t *(*found)(void *a, const char *, int, drmachid_t))
4781 {
4782 	drmach_board_t		*bp = (drmach_board_t *)id;
4783 	sbd_error_t		*err;
4784 	int			 max_devices;
4785 	int			 rv;
4786 	drmach_board_cb_data_t	data;
4787 
4788 	if (!DRMACH_IS_BOARD_ID(id))
4789 		return (drerr_new(0, ESTC_INAPPROP, NULL));
4790 
4791 	max_devices  = plat_max_cpu_units_per_board();
4792 	max_devices += plat_max_mem_units_per_board();
4793 	max_devices += plat_max_io_units_per_board();
4794 
4795 	bp->devices = drmach_array_new(0, max_devices);
4796 
4797 	if (bp->tree == NULL)
4798 		bp->tree = drmach_node_new();
4799 
4800 	data.obj = bp;
4801 	data.ndevs = 0;
4802 	data.found = found;
4803 	data.a = a;
4804 	data.err = NULL;
4805 
4806 	mutex_enter(&drmach_slice_table_lock);
4807 	mutex_enter(&drmach_bus_sync_lock);
4808 
4809 	rv = drmach_node_walk(bp->tree, &data, drmach_board_find_devices_cb);
4810 
4811 	drmach_slice_table_update(bp, 0);
4812 	drmach_bus_sync_list_update();
4813 
4814 	mutex_exit(&drmach_bus_sync_lock);
4815 	mutex_exit(&drmach_slice_table_lock);
4816 
4817 	if (rv == 0) {
4818 		err = NULL;
4819 		drmach_slot1_lpa_set(bp);
4820 	} else {
4821 		drmach_array_dispose(bp->devices, drmach_device_dispose);
4822 		bp->devices = NULL;
4823 
4824 		if (data.err)
4825 			err = data.err;
4826 		else
4827 			err = DRMACH_INTERNAL_ERROR();
4828 	}
4829 
4830 	return (err);
4831 }
4832 
4833 int
4834 drmach_board_lookup(int bnum, drmachid_t *id)
4835 {
4836 	int	rv = 0;
4837 
4838 	if (!drmach_initialized && drmach_init() == -1) {
4839 		*id = 0;
4840 		return (-1);
4841 	}
4842 	rw_enter(&drmach_boards_rwlock, RW_WRITER);
4843 	if (drmach_array_get(drmach_boards, bnum, id)) {
4844 		*id = 0;
4845 		rv = -1;
4846 	} else {
4847 		caddr_t		obufp;
4848 		dr_showboard_t	shb;
4849 		sbd_error_t	*err = NULL;
4850 		drmach_board_t	*bp;
4851 
4852 		bp = *id;
4853 
4854 		if (bp)
4855 			rw_downgrade(&drmach_boards_rwlock);
4856 
4857 		obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
4858 		err = drmach_mbox_trans(DRMSG_SHOWBOARD, bnum, obufp,
4859 			sizeof (dr_proto_hdr_t), (caddr_t)&shb,
4860 			sizeof (dr_showboard_t));
4861 		kmem_free(obufp, sizeof (dr_proto_hdr_t));
4862 
4863 		if (err) {
4864 			if (err->e_code == ESTC_UNAVAILABLE) {
4865 				*id = 0;
4866 				rv = -1;
4867 			}
4868 			sbd_err_clear(&err);
4869 		} else {
4870 			if (!bp)
4871 				bp = *id  = (drmachid_t)drmach_board_new(bnum);
4872 			bp->connected = (shb.bd_assigned && shb.bd_active);
4873 			bp->empty = shb.slot_empty;
4874 
4875 			switch (shb.test_status) {
4876 				case DR_TEST_STATUS_UNKNOWN:
4877 				case DR_TEST_STATUS_IPOST:
4878 				case DR_TEST_STATUS_ABORTED:
4879 					bp->cond = SBD_COND_UNKNOWN;
4880 					break;
4881 				case DR_TEST_STATUS_PASSED:
4882 					bp->cond = SBD_COND_OK;
4883 					break;
4884 				case DR_TEST_STATUS_FAILED:
4885 					bp->cond = SBD_COND_FAILED;
4886 					break;
4887 				default:
4888 					bp->cond = SBD_COND_UNKNOWN;
4889 				DRMACH_PR("Unknown test status=0x%x from SC\n",
4890 						shb.test_status);
4891 					break;
4892 			}
4893 			strncpy(bp->type, shb.board_type, sizeof (bp->type));
4894 			bp->assigned = shb.bd_assigned;
4895 			bp->powered = shb.power_on;
4896 		}
4897 	}
4898 	rw_exit(&drmach_boards_rwlock);
4899 	return (rv);
4900 }
4901 
4902 sbd_error_t *
4903 drmach_board_name(int bnum, char *buf, int buflen)
4904 {
4905 	snprintf(buf, buflen, "%s%d", DRMACH_BNUM2SLOT(bnum) ?
4906 	    "IO" : "SB", DRMACH_BNUM2EXP(bnum));
4907 
4908 	return (NULL);
4909 }
4910 
4911 sbd_error_t *
4912 drmach_board_poweroff(drmachid_t id)
4913 {
4914 	drmach_board_t	*bp;
4915 	sbd_error_t	*err;
4916 	drmach_status_t	 stat;
4917 
4918 	if (!DRMACH_IS_BOARD_ID(id))
4919 		return (drerr_new(0, ESTC_INAPPROP, NULL));
4920 	bp = id;
4921 
4922 	err = drmach_board_status(id, &stat);
4923 	if (!err) {
4924 		if (stat.configured || stat.busy)
4925 			err = drerr_new(0, ESTC_CONFIGBUSY, bp->cm.name);
4926 		else {
4927 			caddr_t	obufp;
4928 
4929 			obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
4930 			err = drmach_mbox_trans(DRMSG_POWEROFF, bp->bnum, obufp,
4931 				sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
4932 			kmem_free(obufp, sizeof (dr_proto_hdr_t));
4933 			if (!err)
4934 				bp->powered = 0;
4935 		}
4936 	}
4937 	return (err);
4938 }
4939 
4940 sbd_error_t *
4941 drmach_board_poweron(drmachid_t id)
4942 {
4943 	drmach_board_t	*bp;
4944 	caddr_t		obufp;
4945 	sbd_error_t	*err;
4946 
4947 	if (!DRMACH_IS_BOARD_ID(id))
4948 		return (drerr_new(0, ESTC_INAPPROP, NULL));
4949 	bp = id;
4950 
4951 	obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
4952 	err = drmach_mbox_trans(DRMSG_POWERON, bp->bnum, obufp,
4953 		sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
4954 	if (!err)
4955 		bp->powered = 1;
4956 
4957 	kmem_free(obufp, sizeof (dr_proto_hdr_t));
4958 
4959 	return (err);
4960 }
4961 
4962 static sbd_error_t *
4963 drmach_board_release(drmachid_t id)
4964 {
4965 	if (!DRMACH_IS_BOARD_ID(id))
4966 		return (drerr_new(0, ESTC_INAPPROP, NULL));
4967 	return (NULL);
4968 }
4969 
4970 sbd_error_t *
4971 drmach_board_test(drmachid_t id, drmach_opts_t *opts, int force)
4972 {
4973 	drmach_board_t		*bp;
4974 	drmach_device_t		*dp[MAX_CORES_PER_CMP];
4975 	dr_mbox_msg_t		*obufp;
4976 	sbd_error_t		*err;
4977 	dr_testboard_reply_t	tbr;
4978 	int			cpylen;
4979 	char			*copts;
4980 	int			is_io;
4981 	cpu_flag_t		oflags[MAX_CORES_PER_CMP];
4982 
4983 	if (!DRMACH_IS_BOARD_ID(id))
4984 		return (drerr_new(0, ESTC_INAPPROP, NULL));
4985 	bp = id;
4986 
4987 	/*
4988 	 * If the board is an I/O or MAXCAT board, setup I/O cage for
4989 	 * testing. Slot 1 indicates I/O or MAXCAT board.
4990 	 */
4991 
4992 	is_io = DRMACH_BNUM2SLOT(bp->bnum);
4993 
4994 	obufp = kmem_zalloc(sizeof (dr_mbox_msg_t), KM_SLEEP);
4995 
4996 	if (force)
4997 		obufp->msgdata.dm_tb.force = 1;
4998 
4999 	obufp->msgdata.dm_tb.immediate = 1;
5000 
5001 	if ((opts->size > 0) && ((copts = opts->copts) != NULL)) {
5002 		cpylen = (opts->size > DR_HPOPTLEN ? DR_HPOPTLEN : opts->size);
5003 		bcopy(copts, obufp->msgdata.dm_tb.hpost_opts, cpylen);
5004 	}
5005 
5006 	if (is_io) {
5007 		err = drmach_iocage_setup(&obufp->msgdata.dm_tb, dp, oflags);
5008 
5009 		if (err) {
5010 			kmem_free(obufp, sizeof (dr_mbox_msg_t));
5011 			return (err);
5012 		}
5013 	}
5014 
5015 	err = drmach_mbox_trans(DRMSG_TESTBOARD, bp->bnum, (caddr_t)obufp,
5016 		sizeof (dr_mbox_msg_t), (caddr_t)&tbr, sizeof (tbr));
5017 
5018 	if (!err)
5019 		bp->cond = SBD_COND_OK;
5020 	else
5021 		bp->cond = SBD_COND_UNKNOWN;
5022 
5023 	if ((!err) && (tbr.test_status != DR_TEST_STATUS_PASSED)) {
5024 		/* examine test status */
5025 		switch (tbr.test_status) {
5026 			case DR_TEST_STATUS_IPOST:
5027 				bp->cond = SBD_COND_UNKNOWN;
5028 				err = drerr_new(0, ESTC_TEST_IN_PROGRESS,
5029 					NULL);
5030 				break;
5031 			case DR_TEST_STATUS_UNKNOWN:
5032 				bp->cond = SBD_COND_UNKNOWN;
5033 				err = drerr_new(1,
5034 					ESTC_TEST_STATUS_UNKNOWN, NULL);
5035 				break;
5036 			case DR_TEST_STATUS_FAILED:
5037 				bp->cond = SBD_COND_FAILED;
5038 				err = drerr_new(1, ESTC_TEST_FAILED,
5039 					NULL);
5040 				break;
5041 			case DR_TEST_STATUS_ABORTED:
5042 				bp->cond = SBD_COND_UNKNOWN;
5043 				err = drerr_new(1, ESTC_TEST_ABORTED,
5044 					NULL);
5045 				break;
5046 			default:
5047 				bp->cond = SBD_COND_UNKNOWN;
5048 				err = drerr_new(1,
5049 					ESTC_TEST_RESULT_UNKNOWN,
5050 					NULL);
5051 				break;
5052 		}
5053 	}
5054 
5055 	/*
5056 	 * If I/O cage test was performed, check for availability of the
5057 	 * cpu used.  If cpu has been returned, it's OK to proceed with
5058 	 * reconfiguring it for use.
5059 	 */
5060 	if (is_io) {
5061 		DRMACH_PR("drmach_board_test: tbr.cpu_recovered: %d",
5062 			tbr.cpu_recovered);
5063 		DRMACH_PR("drmach_board_test: port id: %d",
5064 			tbr.cpu_portid);
5065 
5066 		/*
5067 		 * Check the cpu_recovered flag in the testboard reply, or
5068 		 * if the testboard request message was not sent to SMS due
5069 		 * to an mboxsc_putmsg() failure, it's OK to recover the
5070 		 * cpu since hpost hasn't touched it.
5071 		 */
5072 		if ((tbr.cpu_recovered && tbr.cpu_portid ==
5073 		    obufp->msgdata.dm_tb.cpu_portid) ||
5074 		    ((err) && (err->e_code == ESTC_MBXRQST))) {
5075 
5076 			int i;
5077 
5078 			mutex_enter(&cpu_lock);
5079 			for (i = 0; i < MAX_CORES_PER_CMP; i++) {
5080 				if (dp[i] != NULL) {
5081 					(void) drmach_iocage_cpu_return(dp[i],
5082 					    oflags[i]);
5083 				}
5084 			}
5085 			mutex_exit(&cpu_lock);
5086 		} else {
5087 			cmn_err(CE_WARN, "Unable to recover port id %d "
5088 			    "after I/O cage test: cpu_recovered=%d, "
5089 			    "returned portid=%d",
5090 			    obufp->msgdata.dm_tb.cpu_portid,
5091 			    tbr.cpu_recovered, tbr.cpu_portid);
5092 		}
5093 		drmach_iocage_mem_return(&tbr);
5094 	}
5095 	kmem_free(obufp, sizeof (dr_mbox_msg_t));
5096 
5097 	return (err);
5098 }
5099 
5100 sbd_error_t *
5101 drmach_board_unassign(drmachid_t id)
5102 {
5103 	drmach_board_t	*bp;
5104 	sbd_error_t	*err;
5105 	drmach_status_t	 stat;
5106 	caddr_t		obufp;
5107 
5108 	rw_enter(&drmach_boards_rwlock, RW_WRITER);
5109 
5110 	if (!DRMACH_IS_BOARD_ID(id)) {
5111 		rw_exit(&drmach_boards_rwlock);
5112 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5113 	}
5114 	bp = id;
5115 
5116 	err = drmach_board_status(id, &stat);
5117 	if (err) {
5118 		rw_exit(&drmach_boards_rwlock);
5119 		return (err);
5120 	}
5121 
5122 	if (stat.configured || stat.busy) {
5123 		err = drerr_new(0, ESTC_CONFIGBUSY, bp->cm.name);
5124 	} else {
5125 
5126 		obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
5127 		err = drmach_mbox_trans(DRMSG_UNASSIGN, bp->bnum, obufp,
5128 			sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
5129 		kmem_free(obufp, sizeof (dr_proto_hdr_t));
5130 		if (!err) {
5131 			if (drmach_array_set(drmach_boards, bp->bnum, 0) != 0)
5132 				err = DRMACH_INTERNAL_ERROR();
5133 			else
5134 				drmach_board_dispose(bp);
5135 		}
5136 	}
5137 	rw_exit(&drmach_boards_rwlock);
5138 	return (err);
5139 }
5140 
5141 static sbd_error_t *
5142 drmach_read_reg_addr(drmach_device_t *dp, uint64_t *p)
5143 {
5144 	int		len;
5145 	drmach_reg_t	reg;
5146 	drmach_node_t	pp;
5147 	drmach_node_t	*np = dp->node;
5148 
5149 	/*
5150 	 * If the node does not have a portid property,
5151 	 * it represents a CMP device. For a CMP, the reg
5152 	 * property of the parent holds the information of
5153 	 * interest.
5154 	 */
5155 	if (dp->node->n_getproplen(dp->node, "portid", &len) != 0) {
5156 
5157 		if (dp->node->get_parent(dp->node, &pp) != 0) {
5158 			return (DRMACH_INTERNAL_ERROR());
5159 		}
5160 		np = &pp;
5161 	}
5162 
5163 	if (np->n_getproplen(np, "reg", &len) != 0)
5164 		return (DRMACH_INTERNAL_ERROR());
5165 
5166 	if (len != sizeof (reg))
5167 		return (DRMACH_INTERNAL_ERROR());
5168 
5169 	if (np->n_getprop(np, "reg", &reg, sizeof (reg)) != 0)
5170 		return (DRMACH_INTERNAL_ERROR());
5171 
5172 	/* reassemble 64-bit base address */
5173 	*p = ((uint64_t)reg.reg_addr_hi << 32) | reg.reg_addr_lo;
5174 
5175 	return (NULL);
5176 }
5177 
5178 static void
5179 drmach_cpu_read(uint64_t arg1, uint64_t arg2)
5180 {
5181 	uint64_t	*saf_config_reg = (uint64_t *)arg1;
5182 	uint_t		*reg_read = (uint_t *)arg2;
5183 
5184 	*saf_config_reg = lddsafconfig();
5185 	*reg_read = 0x1;
5186 }
5187 
5188 /*
5189  * A return value of 1 indicates success and 0 indicates a failure
5190  */
5191 static int
5192 drmach_cpu_read_scr(drmach_cpu_t *cp, uint64_t *scr)
5193 {
5194 
5195 	int 	rv = 0x0;
5196 
5197 	*scr = 0x0;
5198 
5199 	/*
5200 	 * Confirm cpu was in ready set when xc was issued.
5201 	 * This is done by verifying rv which is
5202 	 * set to 0x1 when xc_one is successful.
5203 	 */
5204 	xc_one(cp->dev.portid, (xcfunc_t *)drmach_cpu_read,
5205 	    (uint64_t)scr, (uint64_t)&rv);
5206 
5207 	return (rv);
5208 
5209 }
5210 
5211 static sbd_error_t *
5212 drmach_cpu_read_cpuid(drmach_cpu_t *cp, processorid_t *cpuid)
5213 {
5214 	drmach_node_t	*np;
5215 
5216 	np = cp->dev.node;
5217 
5218 	/*
5219 	 * If a CPU does not have a portid property, it must
5220 	 * be a CMP device with a cpuid property.
5221 	 */
5222 	if (np->n_getprop(np, "portid", cpuid, sizeof (*cpuid)) != 0) {
5223 
5224 		if (np->n_getprop(np, "cpuid", cpuid, sizeof (*cpuid)) != 0) {
5225 			return (DRMACH_INTERNAL_ERROR());
5226 		}
5227 	}
5228 
5229 	return (NULL);
5230 }
5231 
5232 /* Starcat CMP core id is bit 2 of the cpuid */
5233 #define	DRMACH_COREID_MASK	(1u << 2)
5234 #define	DRMACH_CPUID2SRAM_IDX(id) \
5235 		((id & DRMACH_COREID_MASK) >> 1 | (id & 0x1))
5236 
5237 static sbd_error_t *
5238 drmach_cpu_new(drmach_device_t *proto, drmachid_t *idp)
5239 {
5240 	static void drmach_cpu_dispose(drmachid_t);
5241 	static sbd_error_t *drmach_cpu_release(drmachid_t);
5242 	static sbd_error_t *drmach_cpu_status(drmachid_t, drmach_status_t *);
5243 
5244 	sbd_error_t	*err;
5245 	uint64_t	scr_pa;
5246 	drmach_cpu_t	*cp = NULL;
5247 	pfn_t		pfn;
5248 	uint64_t	cpu_stardrb_offset, cpu_sram_pa;
5249 	int		idx;
5250 	int		impl;
5251 	processorid_t	cpuid;
5252 
5253 	err = drmach_read_reg_addr(proto, &scr_pa);
5254 	if (err) {
5255 		goto fail;
5256 	}
5257 
5258 	cp = kmem_zalloc(sizeof (drmach_cpu_t), KM_SLEEP);
5259 	bcopy(proto, &cp->dev, sizeof (cp->dev));
5260 	cp->dev.node = drmach_node_dup(proto->node);
5261 	cp->dev.cm.isa = (void *)drmach_cpu_new;
5262 	cp->dev.cm.dispose = drmach_cpu_dispose;
5263 	cp->dev.cm.release = drmach_cpu_release;
5264 	cp->dev.cm.status = drmach_cpu_status;
5265 	cp->scr_pa = scr_pa;
5266 
5267 	err = drmach_cpu_read_cpuid(cp, &cpuid);
5268 	if (err) {
5269 		goto fail;
5270 	}
5271 
5272 	err = drmach_cpu_get_impl(cp, &impl);
5273 	if (err) {
5274 		goto fail;
5275 	}
5276 
5277 	cp->cpuid = cpuid;
5278 	cp->coreid = STARCAT_CPUID_TO_COREID(cp->cpuid);
5279 	cp->dev.unum = STARCAT_CPUID_TO_AGENT(cp->cpuid);
5280 
5281 	/*
5282 	 * Init the board cpu type.  Assumes all board cpus are the same type.
5283 	 */
5284 	if (cp->dev.bp->cpu_impl == 0) {
5285 		cp->dev.bp->cpu_impl = impl;
5286 	}
5287 	ASSERT(cp->dev.bp->cpu_impl == impl);
5288 
5289 	/*
5290 	 * XXX CHEETAH SUPPORT
5291 	 * determine if the domain uses Cheetah procs
5292 	 */
5293 	if (drmach_is_cheetah < 0) {
5294 		drmach_is_cheetah = IS_CHEETAH(impl);
5295 	}
5296 
5297 	/*
5298 	 * Initialize TTE for mapping CPU SRAM STARDRB buffer.
5299 	 * The STARDRB buffer (16KB on Cheetah+ boards, 32KB on
5300 	 * Jaguar/Panther boards) is shared by all cpus in a Safari port
5301 	 * pair. Each cpu uses 8KB according to the following layout:
5302 	 *
5303 	 * Page 0:	even numbered Cheetah+'s and Panther/Jaguar core 0's
5304 	 * Page 1:	odd numbered Cheetah+'s and Panther/Jaguar core 0's
5305 	 * Page 2:	even numbered Panther/Jaguar core 1's
5306 	 * Page 3:	odd numbered Panther/Jaguar core 1's
5307 	 */
5308 	idx = DRMACH_CPUID2SRAM_IDX(cp->cpuid);
5309 	cpu_stardrb_offset = cp->dev.bp->stardrb_offset + (PAGESIZE * idx);
5310 	cpu_sram_pa = DRMACH_CPU_SRAM_ADDR + cpu_stardrb_offset;
5311 	pfn = cpu_sram_pa >> PAGESHIFT;
5312 
5313 	ASSERT(drmach_cpu_sram_tte[cp->cpuid].tte_inthi == 0 &&
5314 	    drmach_cpu_sram_tte[cp->cpuid].tte_intlo == 0);
5315 	drmach_cpu_sram_tte[cp->cpuid].tte_inthi = TTE_PFN_INTHI(pfn) |
5316 		TTE_VALID_INT | TTE_SZ_INT(TTE8K);
5317 	drmach_cpu_sram_tte[cp->cpuid].tte_intlo = TTE_PFN_INTLO(pfn) |
5318 		TTE_HWWR_INT | TTE_PRIV_INT | TTE_LCK_INT;
5319 
5320 	DRMACH_PR("drmach_cpu_new: cpuid=%d, coreid=%d, stardrb_offset=0x%lx, "
5321 	    "cpu_sram_offset=0x%lx, idx=%d\n", cp->cpuid, cp->coreid,
5322 	    cp->dev.bp->stardrb_offset, cpu_stardrb_offset, idx);
5323 
5324 	snprintf(cp->dev.cm.name, sizeof (cp->dev.cm.name), "%s%d",
5325 	    cp->dev.type, cp->dev.unum);
5326 
5327 	*idp = (drmachid_t)cp;
5328 	return (NULL);
5329 
5330 fail:
5331 	if (cp) {
5332 		drmach_node_dispose(cp->dev.node);
5333 		kmem_free(cp, sizeof (*cp));
5334 	}
5335 
5336 	*idp = (drmachid_t)0;
5337 	return (err);
5338 }
5339 
5340 static void
5341 drmach_cpu_dispose(drmachid_t id)
5342 {
5343 	drmach_cpu_t	*self;
5344 	processorid_t	cpuid;
5345 
5346 	ASSERT(DRMACH_IS_CPU_ID(id));
5347 
5348 	self = id;
5349 	if (self->dev.node)
5350 		drmach_node_dispose(self->dev.node);
5351 
5352 	cpuid = self->cpuid;
5353 	ASSERT(TTE_IS_VALID(&drmach_cpu_sram_tte[cpuid]) &&
5354 	    TTE_IS_8K(&drmach_cpu_sram_tte[cpuid]) &&
5355 	    TTE_IS_PRIVILEGED(&drmach_cpu_sram_tte[cpuid]) &&
5356 	    TTE_IS_LOCKED(&drmach_cpu_sram_tte[cpuid]));
5357 	drmach_cpu_sram_tte[cpuid].tte_inthi = 0;
5358 	drmach_cpu_sram_tte[cpuid].tte_intlo = 0;
5359 
5360 	kmem_free(self, sizeof (*self));
5361 }
5362 
5363 static int
5364 drmach_cpu_start(struct cpu *cp)
5365 {
5366 	extern xcfunc_t	drmach_set_lpa;
5367 	extern void	restart_other_cpu(int);
5368 	int		cpuid = cp->cpu_id;
5369 	int		rv, bnum;
5370 	drmach_board_t	*bp;
5371 
5372 	ASSERT(MUTEX_HELD(&cpu_lock));
5373 	ASSERT(cpunodes[cpuid].nodeid != (pnode_t)0);
5374 
5375 	cp->cpu_flags &= ~CPU_POWEROFF;
5376 
5377 	/*
5378 	 * NOTE: restart_other_cpu pauses cpus during the
5379 	 *	 slave cpu start.  This helps to quiesce the
5380 	 *	 bus traffic a bit which makes the tick sync
5381 	 *	 routine in the prom more robust.
5382 	 */
5383 	DRMACH_PR("COLD START for cpu (%d)\n", cpuid);
5384 
5385 	if (prom_hotaddcpu(cpuid) != 0) {
5386 		cmn_err(CE_PANIC, "prom_hotaddcpu() for cpuid=%d failed.",
5387 			cpuid);
5388 	}
5389 
5390 	restart_other_cpu(cpuid);
5391 
5392 	bnum = drmach_portid2bnum(cpunodes[cpuid].portid);
5393 	rv = drmach_array_get(drmach_boards, bnum, (drmachid_t)&bp);
5394 	if (rv == -1 || bp == NULL) {
5395 		DRMACH_PR("drmach_cpu_start: cannot read board info for "
5396 		    "cpuid=%d: rv=%d, bp=%p\n", cpuid, rv, bp);
5397 	} else if (DRMACH_L1_SET_LPA(bp) && drmach_reprogram_lpa) {
5398 		int exp;
5399 		int ntries;
5400 
5401 		mutex_enter(&drmach_xt_mb_lock);
5402 		mutex_enter(&drmach_slice_table_lock);
5403 		bzero((void *)drmach_xt_mb, drmach_xt_mb_size);
5404 
5405 		/*
5406 		 * drmach_slice_table[*]
5407 		 *	bit 5	valid
5408 		 *	bit 0:4	slice number
5409 		 *
5410 		 * drmach_xt_mb[*] format for drmach_set_lpa
5411 		 *	bit 7	valid
5412 		 *	bit 6	set null LPA (overrides bits 0:4)
5413 		 *	bit 0:4	slice number
5414 		 *
5415 		 * drmach_set_lpa derives processor CBASE and CBND
5416 		 * from bits 6 and 0:4 of drmach_xt_mb.  If bit 6 is
5417 		 * set, then CBASE = CBND = 0. Otherwise, CBASE = slice
5418 		 * number; CBND = slice number + 1.
5419 		 * No action is taken if bit 7 is zero.
5420 		 */
5421 		exp = (cpuid >> 5) & 0x1f;
5422 		if (drmach_slice_table[exp] & 0x20) {
5423 			drmach_xt_mb[cpuid] = 0x80 |
5424 				(drmach_slice_table[exp] & 0x1f);
5425 		} else {
5426 			drmach_xt_mb[cpuid] = 0x80 | 0x40;
5427 		}
5428 
5429 		drmach_xt_ready = 0;
5430 
5431 		xt_one(cpuid, drmach_set_lpa, NULL, NULL);
5432 
5433 		ntries = drmach_cpu_ntries;
5434 		while (!drmach_xt_ready && ntries) {
5435 			DELAY(drmach_cpu_delay);
5436 			ntries--;
5437 		}
5438 
5439 		mutex_exit(&drmach_slice_table_lock);
5440 		mutex_exit(&drmach_xt_mb_lock);
5441 
5442 		DRMACH_PR(
5443 			"waited %d out of %d tries for drmach_set_lpa on cpu%d",
5444 			drmach_cpu_ntries - ntries, drmach_cpu_ntries,
5445 			cp->cpu_id);
5446 	}
5447 
5448 	xt_one(cpuid, vtag_flushpage_tl1,
5449 		(uint64_t)drmach_cpu_sram_va, (uint64_t)ksfmmup);
5450 
5451 	return (0);
5452 }
5453 
5454 /*
5455  * A detaching CPU is xcalled with an xtrap to drmach_cpu_stop_self() after
5456  * it has been offlined. The function of this routine is to get the cpu
5457  * spinning in a safe place. The requirement is that the system will not
5458  * reference anything on the detaching board (memory and i/o is detached
5459  * elsewhere) and that the CPU not reference anything on any other board
5460  * in the system.  This isolation is required during and after the writes
5461  * to the domain masks to remove the board from the domain.
5462  *
5463  * To accomplish this isolation the following is done:
5464  *	1) Create a locked mapping to the STARDRB data buffer located
5465  *	   in this cpu's sram. There is one TTE per cpu, initialized in
5466  *	   drmach_cpu_new(). The cpuid is used to select which TTE to use.
5467  *	   Each Safari port pair shares the CPU SRAM on a Serengeti CPU/MEM
5468  *	   board. The STARDRB buffer is 16KB on Cheetah+ boards, 32KB on Jaguar
5469  *	   boards. Each STARDRB buffer is logically divided by DR into one
5470  *	   8KB page per cpu (or Jaguar core).
5471  *	2) Copy the target function (drmach_shutdown_asm) into buffer.
5472  *	3) Jump to function now in the cpu sram.
5473  *	   Function will:
5474  *	   3.1) Flush its Ecache (displacement).
5475  *	   3.2) Flush its Dcache with HW mechanism.
5476  *	   3.3) Flush its Icache with HW mechanism.
5477  *	   3.4) Flush all valid and _unlocked_ D-TLB and I-TLB entries.
5478  *	   3.5) Set LPA to NULL
5479  *	   3.6) Clear xt_mb to signal completion. Note: cache line is
5480  *	        recovered by drmach_cpu_poweroff().
5481  *	4) Jump into an infinite loop.
5482  */
5483 
5484 static void
5485 drmach_cpu_stop_self(void)
5486 {
5487 	extern void	drmach_shutdown_asm(
5488 				uint64_t, uint64_t, int, int, uint64_t);
5489 	extern void	drmach_shutdown_asm_end(void);
5490 
5491 	tte_t		*tte;
5492 	uint_t		*p, *q;
5493 	uint64_t	 stack_pointer;
5494 
5495 	ASSERT(((ptrdiff_t)drmach_shutdown_asm_end -
5496 		(ptrdiff_t)drmach_shutdown_asm) < PAGESIZE);
5497 
5498 	tte = &drmach_cpu_sram_tte[CPU->cpu_id];
5499 	ASSERT(TTE_IS_VALID(tte) && TTE_IS_8K(tte) &&
5500 	    TTE_IS_PRIVILEGED(tte) && TTE_IS_LOCKED(tte));
5501 	sfmmu_dtlb_ld_kva(drmach_cpu_sram_va, tte);
5502 	sfmmu_itlb_ld_kva(drmach_cpu_sram_va, tte);
5503 
5504 	/* copy text. standard bcopy not designed to work in nc space */
5505 	p = (uint_t *)drmach_cpu_sram_va;
5506 	q = (uint_t *)drmach_shutdown_asm;
5507 	while (q < (uint_t *)drmach_shutdown_asm_end)
5508 		*p++ = *q++;
5509 
5510 	/* zero to assist debug */
5511 	q = (uint_t *)(drmach_cpu_sram_va + PAGESIZE);
5512 	while (p < q)
5513 		*p++ = 0;
5514 
5515 	/* a parking spot for the stack pointer */
5516 	stack_pointer = (uint64_t)q;
5517 
5518 	/* call copy of drmach_shutdown_asm */
5519 	(*(void (*)())drmach_cpu_sram_va)(
5520 		stack_pointer,
5521 		drmach_iocage_paddr,
5522 		cpunodes[CPU->cpu_id].ecache_size,
5523 		cpunodes[CPU->cpu_id].ecache_linesize,
5524 		va_to_pa((void *)&drmach_xt_mb[CPU->cpu_id]));
5525 }
5526 
5527 static void
5528 drmach_cpu_shutdown_self(void)
5529 {
5530 	cpu_t		*cp = CPU;
5531 	int		cpuid = cp->cpu_id;
5532 	extern void	flush_windows(void);
5533 
5534 	flush_windows();
5535 
5536 	(void) spl8();
5537 
5538 	ASSERT(cp->cpu_intr_actv == 0);
5539 	ASSERT(cp->cpu_thread == cp->cpu_idle_thread ||
5540 	    cp->cpu_thread == cp->cpu_startup_thread);
5541 
5542 	cp->cpu_flags = CPU_OFFLINE | CPU_QUIESCED | CPU_POWEROFF;
5543 
5544 	drmach_cpu_stop_self();
5545 
5546 	cmn_err(CE_PANIC, "CPU %d FAILED TO SHUTDOWN", cpuid);
5547 }
5548 
5549 static sbd_error_t *
5550 drmach_cpu_release(drmachid_t id)
5551 {
5552 	drmach_cpu_t	*cp;
5553 	struct cpu	*cpu;
5554 	sbd_error_t	*err;
5555 
5556 	if (!DRMACH_IS_CPU_ID(id))
5557 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5558 	cp = id;
5559 
5560 	ASSERT(MUTEX_HELD(&cpu_lock));
5561 
5562 	cpu = cpu_get(cp->cpuid);
5563 	if (cpu == NULL)
5564 		err = DRMACH_INTERNAL_ERROR();
5565 	else
5566 		err = NULL;
5567 
5568 	return (err);
5569 }
5570 
5571 static sbd_error_t *
5572 drmach_cpu_status(drmachid_t id, drmach_status_t *stat)
5573 {
5574 	drmach_cpu_t	*cp;
5575 	drmach_device_t	*dp;
5576 
5577 	ASSERT(DRMACH_IS_CPU_ID(id));
5578 	cp = id;
5579 	dp = &cp->dev;
5580 
5581 	stat->assigned = dp->bp->assigned;
5582 	stat->powered = dp->bp->powered;
5583 	mutex_enter(&cpu_lock);
5584 	stat->configured = (cpu_get(cp->cpuid) != NULL);
5585 	mutex_exit(&cpu_lock);
5586 	stat->busy = dp->busy;
5587 	strncpy(stat->type, dp->type, sizeof (stat->type));
5588 	stat->info[0] = '\0';
5589 
5590 	return (NULL);
5591 }
5592 
5593 sbd_error_t *
5594 drmach_cpu_disconnect(drmachid_t id)
5595 {
5596 
5597 	if (!DRMACH_IS_CPU_ID(id))
5598 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5599 
5600 	return (NULL);
5601 
5602 }
5603 
5604 sbd_error_t *
5605 drmach_cpu_get_id(drmachid_t id, processorid_t *cpuid)
5606 {
5607 	drmach_cpu_t	*cpu;
5608 
5609 	if (!DRMACH_IS_CPU_ID(id))
5610 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5611 	cpu = id;
5612 
5613 	*cpuid = cpu->cpuid;
5614 	return (NULL);
5615 }
5616 
5617 sbd_error_t *
5618 drmach_cpu_get_impl(drmachid_t id, int *ip)
5619 {
5620 	drmach_node_t	*np;
5621 	int		impl;
5622 
5623 	if (!DRMACH_IS_CPU_ID(id))
5624 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5625 
5626 	np = ((drmach_device_t *)id)->node;
5627 
5628 	if (np->n_getprop(np, "implementation#", &impl, sizeof (impl)) == -1) {
5629 		return (DRMACH_INTERNAL_ERROR());
5630 	}
5631 
5632 	*ip = impl;
5633 
5634 	return (NULL);
5635 }
5636 
5637 /*
5638  * Flush this cpu's ecache, then ensure all outstanding safari
5639  * transactions have retired.
5640  */
5641 void
5642 drmach_cpu_flush_ecache_sync(void)
5643 {
5644 	uint64_t *p;
5645 
5646 	ASSERT(curthread->t_bound_cpu == CPU);
5647 
5648 	cpu_flush_ecache();
5649 
5650 	mutex_enter(&drmach_bus_sync_lock);
5651 	for (p = drmach_bus_sync_list; *p; p++)
5652 		(void) ldphys(*p);
5653 	mutex_exit(&drmach_bus_sync_lock);
5654 
5655 	cpu_flush_ecache();
5656 }
5657 
5658 sbd_error_t *
5659 drmach_get_dip(drmachid_t id, dev_info_t **dip)
5660 {
5661 	drmach_device_t	*dp;
5662 
5663 	if (!DRMACH_IS_DEVICE_ID(id))
5664 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5665 	dp = id;
5666 
5667 	*dip = dp->node->n_getdip(dp->node);
5668 	return (NULL);
5669 }
5670 
5671 sbd_error_t *
5672 drmach_io_is_attached(drmachid_t id, int *yes)
5673 {
5674 	drmach_device_t *dp;
5675 	dev_info_t	*dip;
5676 	int state;
5677 
5678 	if (!DRMACH_IS_IO_ID(id))
5679 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5680 	dp = id;
5681 
5682 	dip = dp->node->n_getdip(dp->node);
5683 	if (dip == NULL) {
5684 		*yes = 0;
5685 		return (NULL);
5686 	}
5687 
5688 	state = ddi_get_devstate(dip);
5689 	*yes = i_ddi_devi_attached(dip) || (state == DDI_DEVSTATE_UP);
5690 
5691 	return (NULL);
5692 }
5693 
5694 static int
5695 drmach_dip_is_schizo_xmits_0_pci_b(dev_info_t *dip)
5696 {
5697 	char			dtype[OBP_MAXPROPNAME];
5698 	int			portid;
5699 	uint_t			pci_csr_base;
5700 	struct pci_phys_spec	*regbuf = NULL;
5701 	int			rv, len;
5702 
5703 	ASSERT(dip != NULL);
5704 	rv = ddi_getproplen(DDI_DEV_T_ANY, dip, 0, "device_type", &len);
5705 	if ((rv != DDI_PROP_SUCCESS) || (len > sizeof (dtype)))
5706 		return (0);
5707 
5708 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, 0, "device_type",
5709 		(caddr_t)dtype, &len) == DDI_PROP_SUCCESS) {
5710 
5711 		if (strncmp(dtype, "pci", 3) == 0) {
5712 
5713 			/*
5714 			 * Get safari portid. All schizo/xmits 0
5715 			 * safari IDs end in 0x1C.
5716 			 */
5717 			rv = ddi_getproplen(DDI_DEV_T_ANY, dip, 0,
5718 				"portid", &len);
5719 
5720 			if ((rv != DDI_PROP_SUCCESS) ||
5721 				(len > sizeof (portid)))
5722 					return (0);
5723 
5724 			rv = ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, 0,
5725 				"portid", (caddr_t)&portid, &len);
5726 
5727 			if (rv != DDI_PROP_SUCCESS)
5728 				return (0);
5729 
5730 			if ((portid & 0x1F) != 0x1C)
5731 				return (0);
5732 
5733 			if (ddi_getlongprop(DDI_DEV_T_ANY, dip,
5734 				DDI_PROP_DONTPASS, "reg", (caddr_t)&regbuf,
5735 						&len) == DDI_PROP_SUCCESS) {
5736 
5737 				pci_csr_base = regbuf[0].pci_phys_mid &
5738 							PCI_CONF_ADDR_MASK;
5739 				kmem_free(regbuf, len);
5740 				/*
5741 				 * All PCI B-Leafs are at configspace 0x70.0000.
5742 				 */
5743 				if (pci_csr_base == 0x700000)
5744 					return (1);
5745 			}
5746 		}
5747 	}
5748 	return (0);
5749 }
5750 
5751 #define	SCHIZO_BINDING_NAME		"pci108e,8001"
5752 #define	XMITS_BINDING_NAME		"pci108e,8002"
5753 
5754 /*
5755  * Verify if the dip is an instance of MAN 'eri'.
5756  */
5757 static int
5758 drmach_dip_is_man_eri(dev_info_t *dip)
5759 {
5760 	struct pci_phys_spec	*regbuf = NULL;
5761 	dev_info_t		*parent_dip;
5762 	char			*name;
5763 	uint_t			pci_device;
5764 	uint_t			pci_function;
5765 	int			len;
5766 
5767 	if (dip == NULL)
5768 		return (0);
5769 	/*
5770 	 * Verify if the parent is schizo(xmits)0 and pci B leaf.
5771 	 */
5772 	if (((parent_dip = ddi_get_parent(dip)) == NULL) ||
5773 		((name = ddi_binding_name(parent_dip)) == NULL))
5774 		return (0);
5775 	if (strcmp(name, SCHIZO_BINDING_NAME) != 0) {
5776 		/*
5777 		 * This RIO could be on XMITS, so get the dip to
5778 		 * XMITS PCI Leaf.
5779 		 */
5780 		if ((parent_dip = ddi_get_parent(parent_dip)) == NULL)
5781 			return (0);
5782 		if (((name = ddi_binding_name(parent_dip)) == NULL) ||
5783 			(strcmp(name, XMITS_BINDING_NAME) != 0)) {
5784 			return (0);
5785 		}
5786 	}
5787 	if (!drmach_dip_is_schizo_xmits_0_pci_b(parent_dip))
5788 		return (0);
5789 	/*
5790 	 * Finally make sure it is the MAN eri.
5791 	 */
5792 	if (ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
5793 			"reg", (caddr_t)&regbuf, &len) == DDI_PROP_SUCCESS) {
5794 
5795 		pci_device = PCI_REG_DEV_G(regbuf->pci_phys_hi);
5796 		pci_function = PCI_REG_FUNC_G(regbuf->pci_phys_hi);
5797 		kmem_free(regbuf, len);
5798 
5799 		/*
5800 		 * The network function of the RIO ASIC will always be
5801 		 * device 3 and function 1 ("network@3,1").
5802 		 */
5803 		if ((pci_device == 3) && (pci_function == 1))
5804 			return (1);
5805 	}
5806 	return (0);
5807 }
5808 
5809 typedef struct {
5810 	int		iosram_inst;
5811 	dev_info_t	*eri_dip;
5812 	int		bnum;
5813 } drmach_io_inst_t;
5814 
5815 int
5816 drmach_board_find_io_insts(dev_info_t *dip, void *args)
5817 {
5818 	drmach_io_inst_t	*ios = (drmach_io_inst_t *)args;
5819 
5820 	int	rv;
5821 	int	len;
5822 	int	portid;
5823 	char	name[OBP_MAXDRVNAME];
5824 
5825 	rv = ddi_getproplen(DDI_DEV_T_ANY, dip, 0, "portid", &len);
5826 
5827 	if ((rv != DDI_PROP_SUCCESS) || (len > sizeof (portid))) {
5828 		return (DDI_WALK_CONTINUE);
5829 	}
5830 
5831 	rv = ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, 0,
5832 			"portid", (caddr_t)&portid, &len);
5833 	if (rv != DDI_PROP_SUCCESS)
5834 		return (DDI_WALK_CONTINUE);
5835 
5836 	/* ignore devices that are not on this board */
5837 	if (drmach_portid2bnum(portid) != ios->bnum)
5838 		return (DDI_WALK_CONTINUE);
5839 
5840 	if ((ios->iosram_inst < 0) || (ios->eri_dip == NULL)) {
5841 		rv = ddi_getproplen(DDI_DEV_T_ANY, dip, 0,
5842 			"name", &len);
5843 		if (rv == DDI_PROP_SUCCESS) {
5844 
5845 			rv = ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
5846 				0, "name",
5847 				(caddr_t)name, &len);
5848 			if (rv != DDI_PROP_SUCCESS)
5849 				return (DDI_WALK_CONTINUE);
5850 
5851 			if (strncmp("iosram", name, 6) == 0) {
5852 				ios->iosram_inst = ddi_get_instance(dip);
5853 				if (ios->eri_dip == NULL)
5854 					return (DDI_WALK_CONTINUE);
5855 				else
5856 					return (DDI_WALK_TERMINATE);
5857 			} else {
5858 				if (drmach_dip_is_man_eri(dip)) {
5859 					ASSERT(ios->eri_dip == NULL);
5860 					ndi_hold_devi(dip);
5861 					ios->eri_dip = dip;
5862 					if (ios->iosram_inst < 0)
5863 						return (DDI_WALK_CONTINUE);
5864 					else
5865 						return (DDI_WALK_TERMINATE);
5866 				}
5867 			}
5868 		}
5869 	}
5870 	return (DDI_WALK_CONTINUE);
5871 }
5872 
5873 sbd_error_t *
5874 drmach_io_pre_release(drmachid_t id)
5875 {
5876 	drmach_io_inst_t	ios;
5877 	drmach_board_t		*bp;
5878 	int			rv = 0;
5879 	sbd_error_t		*err = NULL;
5880 	drmach_device_t		*dp;
5881 	dev_info_t		*rdip;
5882 	int			circ;
5883 
5884 	if (!DRMACH_IS_IO_ID(id))
5885 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5886 	dp = id;
5887 	bp = dp->bp;
5888 
5889 	rdip = dp->node->n_getdip(dp->node);
5890 
5891 	/* walk device tree to find iosram instance for the board */
5892 	ios.iosram_inst = -1;
5893 	ios.eri_dip = NULL;
5894 	ios.bnum = bp->bnum;
5895 
5896 	ndi_devi_enter(rdip, &circ);
5897 	ddi_walk_devs(ddi_get_child(rdip), drmach_board_find_io_insts,
5898 				(void *)&ios);
5899 
5900 	DRMACH_PR("drmach_io_pre_release: bnum=%d iosram=%d eri=0x%p\n",
5901 			ios.bnum, ios.iosram_inst, ios.eri_dip);
5902 	ndi_devi_exit(rdip, circ);
5903 
5904 	if (ios.eri_dip) {
5905 		/*
5906 		 * Release hold acquired in drmach_board_find_io_insts()
5907 		 */
5908 		ndi_rele_devi(ios.eri_dip);
5909 	}
5910 	if (ios.iosram_inst >= 0) {
5911 		/* call for tunnel switch */
5912 		do {
5913 			DRMACH_PR("calling iosram_switchfrom(%d)\n",
5914 				ios.iosram_inst);
5915 			rv = iosram_switchfrom(ios.iosram_inst);
5916 			if (rv)
5917 				DRMACH_PR("iosram_switchfrom returned %d\n",
5918 					rv);
5919 		} while (rv == EAGAIN);
5920 
5921 		if (rv)
5922 			err = drerr_new(0, ESTC_IOSWITCH, NULL);
5923 	}
5924 	return (err);
5925 }
5926 
5927 sbd_error_t *
5928 drmach_io_unrelease(drmachid_t id)
5929 {
5930 	dev_info_t	*dip;
5931 	sbd_error_t	*err = NULL;
5932 	drmach_device_t	*dp;
5933 
5934 	if (!DRMACH_IS_IO_ID(id))
5935 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5936 	dp = id;
5937 
5938 	dip = dp->node->n_getdip(dp->node);
5939 
5940 	if (dip == NULL)
5941 		err = DRMACH_INTERNAL_ERROR();
5942 	else {
5943 		int (*func)(dev_info_t *dip);
5944 
5945 		func = (int (*)(dev_info_t *))kobj_getsymvalue("man_dr_attach",
5946 			0);
5947 
5948 		if (func) {
5949 			drmach_io_inst_t ios;
5950 			dev_info_t	*pdip;
5951 			int		circ;
5952 
5953 			/*
5954 			 * Walk device tree to find rio dip for the board
5955 			 * Since we are not interested in iosram instance here,
5956 			 * initialize it to 0, so that the walk terminates as
5957 			 * soon as eri dip is found.
5958 			 */
5959 			ios.iosram_inst = 0;
5960 			ios.eri_dip = NULL;
5961 			ios.bnum = dp->bp->bnum;
5962 
5963 			if (pdip = ddi_get_parent(dip)) {
5964 				ndi_hold_devi(pdip);
5965 				ndi_devi_enter(pdip, &circ);
5966 			}
5967 			/*
5968 			 * Root node doesn't have to be held in any way.
5969 			 */
5970 			ddi_walk_devs(dip,
5971 				drmach_board_find_io_insts, (void *)&ios);
5972 
5973 			if (pdip) {
5974 				ndi_devi_exit(pdip, circ);
5975 				ndi_rele_devi(pdip);
5976 			}
5977 
5978 			DRMACH_PR("drmach_io_unrelease: bnum=%d eri=0x%p\n",
5979 				ios.bnum, ios.eri_dip);
5980 
5981 			if (ios.eri_dip) {
5982 				DRMACH_PR("calling man_dr_attach\n");
5983 				if ((*func)(ios.eri_dip))
5984 					err = drerr_new(0,
5985 						ESTC_NWSWITCH, NULL);
5986 				/*
5987 				 * Release hold acquired in
5988 				 * drmach_board_find_io_insts()
5989 				 */
5990 				ndi_rele_devi(ios.eri_dip);
5991 			}
5992 		} else
5993 			DRMACH_PR("man_dr_attach NOT present\n");
5994 	}
5995 	return (err);
5996 }
5997 
5998 static sbd_error_t *
5999 drmach_io_release(drmachid_t id)
6000 {
6001 	dev_info_t	*dip;
6002 	sbd_error_t	*err = NULL;
6003 	drmach_device_t	*dp;
6004 
6005 	if (!DRMACH_IS_IO_ID(id))
6006 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6007 	dp = id;
6008 
6009 	dip = dp->node->n_getdip(dp->node);
6010 
6011 	if (dip == NULL)
6012 		err = DRMACH_INTERNAL_ERROR();
6013 	else {
6014 		int (*func)(dev_info_t *dip);
6015 
6016 		func = (int (*)(dev_info_t *))kobj_getsymvalue("man_dr_detach",
6017 			0);
6018 
6019 		if (func) {
6020 			drmach_io_inst_t ios;
6021 			dev_info_t	*pdip;
6022 			int		circ;
6023 
6024 			/*
6025 			 * Walk device tree to find rio dip for the board
6026 			 * Since we are not interested in iosram instance here,
6027 			 * initialize it to 0, so that the walk terminates as
6028 			 * soon as eri dip is found.
6029 			 */
6030 			ios.iosram_inst = 0;
6031 			ios.eri_dip = NULL;
6032 			ios.bnum = dp->bp->bnum;
6033 
6034 			if (pdip = ddi_get_parent(dip)) {
6035 				ndi_hold_devi(pdip);
6036 				ndi_devi_enter(pdip, &circ);
6037 			}
6038 			/*
6039 			 * Root node doesn't have to be held in any way.
6040 			 */
6041 			ddi_walk_devs(dip,
6042 				drmach_board_find_io_insts, (void *)&ios);
6043 
6044 			if (pdip) {
6045 				ndi_devi_exit(pdip, circ);
6046 				ndi_rele_devi(pdip);
6047 			}
6048 
6049 			DRMACH_PR("drmach_io_release: bnum=%d eri=0x%p\n",
6050 				ios.bnum, ios.eri_dip);
6051 
6052 			if (ios.eri_dip) {
6053 				DRMACH_PR("calling man_dr_detach\n");
6054 				if ((*func)(ios.eri_dip))
6055 					err = drerr_new(0,
6056 						ESTC_NWSWITCH, NULL);
6057 				/*
6058 				 * Release hold acquired in
6059 				 * drmach_board_find_io_insts()
6060 				 */
6061 				ndi_rele_devi(ios.eri_dip);
6062 			}
6063 		} else
6064 			DRMACH_PR("man_dr_detach NOT present\n");
6065 	}
6066 	return (err);
6067 }
6068 
6069 sbd_error_t *
6070 drmach_io_post_release(drmachid_t id)
6071 {
6072 	char 		*path;
6073 	dev_info_t	*rdip;
6074 	drmach_device_t	*dp;
6075 
6076 	if (!DRMACH_IS_DEVICE_ID(id))
6077 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6078 	dp = id;
6079 
6080 	rdip = dp->node->n_getdip(dp->node);
6081 
6082 	/*
6083 	 * Always called after drmach_unconfigure() which on Starcat
6084 	 * unconfigures the branch but doesn't remove it so the
6085 	 * dip must always exist.
6086 	 */
6087 	ASSERT(rdip);
6088 
6089 	ASSERT(e_ddi_branch_held(rdip));
6090 #ifdef DEBUG
6091 	path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6092 	(void) ddi_pathname(rdip, path);
6093 	DRMACH_PR("post_release dip path is: %s\n", path);
6094 	kmem_free(path, MAXPATHLEN);
6095 #endif
6096 
6097 	if (strcmp(dp->type, DRMACH_DEVTYPE_PCI) == 0) {
6098 		if (schpc_remove_pci(rdip)) {
6099 			DRMACH_PR("schpc_remove_pci failed\n");
6100 			return (drerr_new(0, ESBD_OFFLINE, NULL));
6101 		} else {
6102 			DRMACH_PR("schpc_remove_pci succeeded\n");
6103 		}
6104 	}
6105 
6106 	return (NULL);
6107 }
6108 
6109 sbd_error_t *
6110 drmach_io_post_attach(drmachid_t id)
6111 {
6112 	int		circ;
6113 	dev_info_t	*dip;
6114 	dev_info_t	*pdip;
6115 	drmach_device_t	*dp;
6116 	drmach_io_inst_t ios;
6117 
6118 	if (!DRMACH_IS_DEVICE_ID(id))
6119 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6120 	dp = id;
6121 
6122 	dip = dp->node->n_getdip(dp->node);
6123 
6124 	/*
6125 	 * We held the branch rooted at dip earlier, so at a minimum the
6126 	 * root i.e. dip must be present in the device tree.
6127 	 */
6128 	ASSERT(dip);
6129 
6130 	if (strcmp(dp->type, DRMACH_DEVTYPE_PCI) == 0) {
6131 		if (schpc_add_pci(dip)) {
6132 			DRMACH_PR("schpc_add_pci failed\n");
6133 		} else {
6134 			DRMACH_PR("schpc_add_pci succeeded\n");
6135 		}
6136 	}
6137 
6138 	/*
6139 	 * Walk device tree to find rio dip for the board
6140 	 * Since we are not interested in iosram instance here,
6141 	 * initialize it to 0, so that the walk terminates as
6142 	 * soon as eri dip is found.
6143 	 */
6144 	ios.iosram_inst = 0;
6145 	ios.eri_dip = NULL;
6146 	ios.bnum = dp->bp->bnum;
6147 
6148 	if (pdip = ddi_get_parent(dip)) {
6149 		ndi_hold_devi(pdip);
6150 		ndi_devi_enter(pdip, &circ);
6151 	}
6152 	/*
6153 	 * Root node doesn't have to be held in any way.
6154 	 */
6155 	ddi_walk_devs(dip, drmach_board_find_io_insts,
6156 				(void *)&ios);
6157 	if (pdip) {
6158 		ndi_devi_exit(pdip, circ);
6159 		ndi_rele_devi(pdip);
6160 	}
6161 
6162 	DRMACH_PR("drmach_io_post_attach: bnum=%d eri=0x%p\n",
6163 		ios.bnum, ios.eri_dip);
6164 
6165 	if (ios.eri_dip) {
6166 		int (*func)(dev_info_t *dip);
6167 
6168 		func =
6169 		(int (*)(dev_info_t *))kobj_getsymvalue("man_dr_attach", 0);
6170 
6171 		if (func) {
6172 			DRMACH_PR("calling man_dr_attach\n");
6173 			(void) (*func)(ios.eri_dip);
6174 		} else {
6175 			DRMACH_PR("man_dr_attach NOT present\n");
6176 		}
6177 
6178 		/*
6179 		 * Release hold acquired in drmach_board_find_io_insts()
6180 		 */
6181 		ndi_rele_devi(ios.eri_dip);
6182 
6183 	}
6184 
6185 	return (NULL);
6186 }
6187 
6188 static sbd_error_t *
6189 drmach_io_status(drmachid_t id, drmach_status_t *stat)
6190 {
6191 	drmach_device_t *dp;
6192 	sbd_error_t	*err;
6193 	int		 configured;
6194 
6195 	ASSERT(DRMACH_IS_IO_ID(id));
6196 	dp = id;
6197 
6198 	err = drmach_io_is_attached(id, &configured);
6199 	if (err)
6200 		return (err);
6201 
6202 	stat->assigned = dp->bp->assigned;
6203 	stat->powered = dp->bp->powered;
6204 	stat->configured = (configured != 0);
6205 	stat->busy = dp->busy;
6206 	strncpy(stat->type, dp->type, sizeof (stat->type));
6207 	stat->info[0] = '\0';
6208 
6209 	return (NULL);
6210 }
6211 
6212 sbd_error_t *
6213 drmach_mem_init_size(drmachid_t id)
6214 {
6215 	drmach_mem_t	*mp;
6216 	sbd_error_t	*err;
6217 	gdcd_t		*gdcd;
6218 	mem_chunk_t	*chunk;
6219 	uint64_t	 chunks, pa, mask, sz;
6220 
6221 	if (!DRMACH_IS_MEM_ID(id))
6222 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6223 	mp = id;
6224 
6225 	err = drmach_mem_get_base_physaddr(id, &pa);
6226 	if (err)
6227 		return (err);
6228 
6229 	mask = ~ (DRMACH_MEM_SLICE_SIZE - 1);
6230 	pa &= mask;
6231 
6232 	gdcd = drmach_gdcd_new();
6233 	if (gdcd == NULL)
6234 		return (DRMACH_INTERNAL_ERROR());
6235 
6236 	sz = 0;
6237 	chunk = gdcd->dcd_chunk_list.dcl_chunk;
6238 	chunks = gdcd->dcd_chunk_list.dcl_chunks;
6239 	while (chunks-- != 0) {
6240 		if ((chunk->mc_base_pa & mask) == pa) {
6241 			sz += chunk->mc_mbytes * 1048576;
6242 		}
6243 
6244 		++chunk;
6245 	}
6246 	mp->nbytes = sz;
6247 
6248 	drmach_gdcd_dispose(gdcd);
6249 	return (NULL);
6250 }
6251 
6252 /*
6253  * Hardware registers are organized into consecutively
6254  * addressed registers.  The reg property's hi and lo fields
6255  * together describe the base address of the register set for
6256  * this memory-controller.  Register descriptions and offsets
6257  * (from the base address) are as follows:
6258  *
6259  * Description				Offset	Size (bytes)
6260  * Memory Timing Control Register I	0x00	8
6261  * Memory Timing Control Register II	0x08	8
6262  * Memory Address Decoding Register I	0x10	8
6263  * Memory Address Decoding Register II	0x18	8
6264  * Memory Address Decoding Register III	0x20	8
6265  * Memory Address Decoding Register IV	0x28	8
6266  * Memory Address Control Register	0x30	8
6267  * Memory Timing Control Register III	0x38	8
6268  * Memory Timing Control Register IV	0x40	8
6269  * Memory Timing Control Register V  	0x48	8 (Jaguar, Panther only)
6270  * EMU Activity Status Register		0x50	8 (Panther only)
6271  *
6272  * Only the Memory Address Decoding Register and EMU Activity Status
6273  * Register addresses are needed for DRMACH.
6274  */
6275 static sbd_error_t *
6276 drmach_mem_new(drmach_device_t *proto, drmachid_t *idp)
6277 {
6278 	static void drmach_mem_dispose(drmachid_t);
6279 	static sbd_error_t *drmach_mem_release(drmachid_t);
6280 	static sbd_error_t *drmach_mem_status(drmachid_t, drmach_status_t *);
6281 
6282 	sbd_error_t	*err;
6283 	uint64_t	 madr_pa;
6284 	drmach_mem_t	*mp;
6285 	int		 bank, count;
6286 
6287 	err = drmach_read_reg_addr(proto, &madr_pa);
6288 	if (err)
6289 		return (err);
6290 
6291 	mp = kmem_zalloc(sizeof (drmach_mem_t), KM_SLEEP);
6292 	bcopy(proto, &mp->dev, sizeof (mp->dev));
6293 	mp->dev.node = drmach_node_dup(proto->node);
6294 	mp->dev.cm.isa = (void *)drmach_mem_new;
6295 	mp->dev.cm.dispose = drmach_mem_dispose;
6296 	mp->dev.cm.release = drmach_mem_release;
6297 	mp->dev.cm.status = drmach_mem_status;
6298 	mp->madr_pa = madr_pa;
6299 
6300 	snprintf(mp->dev.cm.name,
6301 		sizeof (mp->dev.cm.name), "%s", mp->dev.type);
6302 
6303 	for (count = bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
6304 		uint64_t madr;
6305 
6306 		drmach_mem_read_madr(mp, bank, &madr);
6307 		if (madr & DRMACH_MC_VALID_MASK) {
6308 			count += 1;
6309 			break;
6310 		}
6311 	}
6312 
6313 	/*
6314 	 * If none of the banks had their valid bit set, that means
6315 	 * post did not configure this MC to participate in the
6316 	 * domain.  So, pretend this node does not exist by returning
6317 	 * a drmachid of zero.
6318 	 */
6319 	if (count == 0) {
6320 		/* drmach_mem_dispose frees board mem list */
6321 		drmach_node_dispose(mp->dev.node);
6322 		kmem_free(mp, sizeof (*mp));
6323 		*idp = (drmachid_t)0;
6324 		return (NULL);
6325 	}
6326 
6327 	/*
6328 	 * Only one mem unit per board is exposed to the
6329 	 * PIM layer.  The first mem unit encountered during
6330 	 * tree walk is used to represent all mem units on
6331 	 * the same board.
6332 	 */
6333 	if (mp->dev.bp->mem == NULL) {
6334 		/* start list of mem units on this board */
6335 		mp->dev.bp->mem = mp;
6336 
6337 		/*
6338 		 * force unum to zero since this is the only mem unit
6339 		 * that will be visible to the PIM layer.
6340 		 */
6341 		mp->dev.unum = 0;
6342 
6343 		/*
6344 		 * board memory size kept in this mem unit only
6345 		 */
6346 		err = drmach_mem_init_size(mp);
6347 		if (err) {
6348 			mp->dev.bp->mem = NULL;
6349 			/* drmach_mem_dispose frees board mem list */
6350 			drmach_node_dispose(mp->dev.node);
6351 			kmem_free(mp, sizeof (*mp));
6352 			*idp = (drmachid_t)0;
6353 			return (NULL);
6354 		}
6355 
6356 		/*
6357 		 * allow this instance (the first encountered on this board)
6358 		 * to be visible to the PIM layer.
6359 		 */
6360 		*idp = (drmachid_t)mp;
6361 	} else {
6362 		drmach_mem_t *lp;
6363 
6364 		/* hide this mem instance behind the first. */
6365 		for (lp = mp->dev.bp->mem; lp->next; lp = lp->next)
6366 			;
6367 		lp->next = mp;
6368 
6369 		/*
6370 		 * hide this instance from the caller.
6371 		 * See drmach_board_find_devices_cb() for details.
6372 		 */
6373 		*idp = (drmachid_t)0;
6374 	}
6375 
6376 	return (NULL);
6377 }
6378 
6379 static void
6380 drmach_mem_dispose(drmachid_t id)
6381 {
6382 	drmach_mem_t *mp, *next;
6383 	drmach_board_t *bp;
6384 
6385 	ASSERT(DRMACH_IS_MEM_ID(id));
6386 
6387 	mutex_enter(&drmach_bus_sync_lock);
6388 
6389 	mp = id;
6390 	bp = mp->dev.bp;
6391 
6392 	do {
6393 		if (mp->dev.node)
6394 			drmach_node_dispose(mp->dev.node);
6395 
6396 		next = mp->next;
6397 		kmem_free(mp, sizeof (*mp));
6398 		mp = next;
6399 	} while (mp);
6400 
6401 	bp->mem = NULL;
6402 
6403 	drmach_bus_sync_list_update();
6404 	mutex_exit(&drmach_bus_sync_lock);
6405 }
6406 
6407 sbd_error_t *
6408 drmach_mem_add_span(drmachid_t id, uint64_t basepa, uint64_t size)
6409 {
6410 	pfn_t		basepfn = (pfn_t)(basepa >> PAGESHIFT);
6411 	pgcnt_t		npages = (pgcnt_t)(size >> PAGESHIFT);
6412 	int		rv;
6413 
6414 	ASSERT(size != 0);
6415 
6416 	if (!DRMACH_IS_MEM_ID(id))
6417 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6418 
6419 	rv = kcage_range_add(basepfn, npages, KCAGE_DOWN);
6420 	if (rv == ENOMEM) {
6421 		cmn_err(CE_WARN, "%lu megabytes not available"
6422 			" to kernel cage", size >> 20);
6423 	} else if (rv != 0) {
6424 		/* catch this in debug kernels */
6425 		ASSERT(0);
6426 
6427 		cmn_err(CE_WARN, "unexpected kcage_range_add"
6428 			" return value %d", rv);
6429 	}
6430 
6431 	return (NULL);
6432 }
6433 
6434 sbd_error_t *
6435 drmach_mem_del_span(drmachid_t id, uint64_t basepa, uint64_t size)
6436 {
6437 	pfn_t		 basepfn = (pfn_t)(basepa >> PAGESHIFT);
6438 	pgcnt_t		 npages = (pgcnt_t)(size >> PAGESHIFT);
6439 	int		 rv;
6440 
6441 	if (!DRMACH_IS_MEM_ID(id))
6442 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6443 
6444 	if (size > 0) {
6445 		rv = kcage_range_delete_post_mem_del(basepfn, npages);
6446 		if (rv != 0) {
6447 			cmn_err(CE_WARN,
6448 			    "unexpected kcage_range_delete_post_mem_del"
6449 			    " return value %d", rv);
6450 			return (DRMACH_INTERNAL_ERROR());
6451 		}
6452 	}
6453 
6454 	return (NULL);
6455 }
6456 
6457 sbd_error_t *
6458 drmach_mem_disable(drmachid_t id)
6459 {
6460 	if (!DRMACH_IS_MEM_ID(id))
6461 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6462 	else
6463 		return (NULL);
6464 }
6465 
6466 sbd_error_t *
6467 drmach_mem_enable(drmachid_t id)
6468 {
6469 	if (!DRMACH_IS_MEM_ID(id))
6470 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6471 	else
6472 		return (NULL);
6473 }
6474 
6475 sbd_error_t *
6476 drmach_mem_get_alignment(drmachid_t id, uint64_t *mask)
6477 {
6478 #define	MB(mb) ((mb) * 1048576ull)
6479 
6480 	static struct {
6481 		uint_t		uk;
6482 		uint64_t	segsz;
6483 	}  uk2segsz[] = {
6484 		{ 0x003,	MB(256)	  },
6485 		{ 0x007,	MB(512)	  },
6486 		{ 0x00f,	MB(1024)  },
6487 		{ 0x01f,	MB(2048)  },
6488 		{ 0x03f,	MB(4096)  },
6489 		{ 0x07f,	MB(8192)  },
6490 		{ 0x0ff,	MB(16384) },
6491 		{ 0x1ff,	MB(32768) },
6492 		{ 0x3ff,	MB(65536) },
6493 		{ 0x7ff,	MB(131072) }
6494 	};
6495 	static int len = sizeof (uk2segsz) / sizeof (uk2segsz[0]);
6496 
6497 #undef MB
6498 
6499 	uint64_t	 largest_sz = 0;
6500 	drmach_mem_t	*mp;
6501 
6502 	if (!DRMACH_IS_MEM_ID(id))
6503 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6504 
6505 	/* prime the result with a default value */
6506 	*mask = (DRMACH_MEM_SLICE_SIZE - 1);
6507 
6508 	for (mp = id; mp; mp = mp->next) {
6509 		int bank;
6510 
6511 		for (bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
6512 			int		i;
6513 			uint_t		uk;
6514 			uint64_t	madr;
6515 
6516 			/* get register value, extract uk and normalize */
6517 			drmach_mem_read_madr(mp, bank, &madr);
6518 
6519 			if (!(madr & DRMACH_MC_VALID_MASK))
6520 				continue;
6521 
6522 			uk = DRMACH_MC_UK(madr);
6523 
6524 			/* match uk value */
6525 			for (i = 0; i < len; i++)
6526 				if (uk == uk2segsz[i].uk)
6527 					break;
6528 
6529 			if (i < len) {
6530 				uint64_t sz = uk2segsz[i].segsz;
6531 
6532 				/*
6533 				 * remember largest segment size,
6534 				 * update mask result
6535 				 */
6536 				if (sz > largest_sz) {
6537 					largest_sz = sz;
6538 					*mask = sz - 1;
6539 				}
6540 			} else {
6541 				/*
6542 				 * uk not in table, punt using
6543 				 * entire slice size. no longer any
6544 				 * reason to check other banks.
6545 				 */
6546 				*mask = (DRMACH_MEM_SLICE_SIZE - 1);
6547 				return (NULL);
6548 			}
6549 		}
6550 	}
6551 
6552 	return (NULL);
6553 }
6554 
6555 sbd_error_t *
6556 drmach_mem_get_base_physaddr(drmachid_t id, uint64_t *base_addr)
6557 {
6558 	drmach_mem_t *mp;
6559 
6560 	if (!DRMACH_IS_MEM_ID(id))
6561 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6562 
6563 	*base_addr = (uint64_t)-1;
6564 	for (mp = id; mp; mp = mp->next) {
6565 		int bank;
6566 
6567 		for (bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
6568 			uint64_t addr, madr;
6569 
6570 			drmach_mem_read_madr(mp, bank, &madr);
6571 			if (madr & DRMACH_MC_VALID_MASK) {
6572 				addr = DRMACH_MC_UM_TO_PA(madr) |
6573 					DRMACH_MC_LM_TO_PA(madr);
6574 
6575 				if (addr < *base_addr)
6576 					*base_addr = addr;
6577 			}
6578 		}
6579 	}
6580 
6581 	/* should not happen, but ... */
6582 	if (*base_addr == (uint64_t)-1)
6583 		return (DRMACH_INTERNAL_ERROR());
6584 
6585 	return (NULL);
6586 }
6587 
6588 void
6589 drmach_bus_sync_list_update(void)
6590 {
6591 	int		rv, idx, cnt = 0;
6592 	drmachid_t	id;
6593 
6594 	ASSERT(MUTEX_HELD(&drmach_bus_sync_lock));
6595 
6596 	rv = drmach_array_first(drmach_boards, &idx, &id);
6597 	while (rv == 0) {
6598 		drmach_board_t		*bp = id;
6599 		drmach_mem_t		*mp = bp->mem;
6600 
6601 		while (mp) {
6602 			int bank;
6603 
6604 			for (bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
6605 				uint64_t madr;
6606 
6607 				drmach_mem_read_madr(mp, bank, &madr);
6608 				if (madr & DRMACH_MC_VALID_MASK) {
6609 					uint64_t pa;
6610 
6611 					pa  = DRMACH_MC_UM_TO_PA(madr);
6612 					pa |= DRMACH_MC_LM_TO_PA(madr);
6613 
6614 					/*
6615 					 * The list is zero terminated.
6616 					 * Offset the pa by a doubleword
6617 					 * to avoid confusing a pa value of
6618 					 * of zero with the terminator.
6619 					 */
6620 					pa += sizeof (uint64_t);
6621 
6622 					drmach_bus_sync_list[cnt++] = pa;
6623 				}
6624 			}
6625 
6626 			mp = mp->next;
6627 		}
6628 
6629 		rv = drmach_array_next(drmach_boards, &idx, &id);
6630 	}
6631 
6632 	drmach_bus_sync_list[cnt] = 0;
6633 }
6634 
6635 sbd_error_t *
6636 drmach_mem_get_memlist(drmachid_t id, struct memlist **ml)
6637 {
6638 	sbd_error_t	*err;
6639 	struct memlist	*mlist;
6640 	gdcd_t		*gdcd;
6641 	mem_chunk_t	*chunk;
6642 	uint64_t	 chunks, pa, mask;
6643 
6644 	err = drmach_mem_get_base_physaddr(id, &pa);
6645 	if (err)
6646 		return (err);
6647 
6648 	gdcd = drmach_gdcd_new();
6649 	if (gdcd == NULL)
6650 		return (DRMACH_INTERNAL_ERROR());
6651 
6652 	mask = ~ (DRMACH_MEM_SLICE_SIZE - 1);
6653 	pa &= mask;
6654 
6655 	mlist = NULL;
6656 	chunk = gdcd->dcd_chunk_list.dcl_chunk;
6657 	chunks = gdcd->dcd_chunk_list.dcl_chunks;
6658 	while (chunks-- != 0) {
6659 		if ((chunk->mc_base_pa & mask) == pa) {
6660 			mlist = memlist_add_span(mlist,
6661 				chunk->mc_base_pa, chunk->mc_mbytes * 1048576);
6662 		}
6663 
6664 		++chunk;
6665 	}
6666 
6667 	drmach_gdcd_dispose(gdcd);
6668 
6669 #ifdef DEBUG
6670 	DRMACH_PR("GDCD derived memlist:");
6671 	memlist_dump(mlist);
6672 #endif
6673 
6674 	*ml = mlist;
6675 	return (NULL);
6676 }
6677 
6678 sbd_error_t *
6679 drmach_mem_get_size(drmachid_t id, uint64_t *bytes)
6680 {
6681 	drmach_mem_t	*mp;
6682 
6683 	if (!DRMACH_IS_MEM_ID(id))
6684 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6685 	mp = id;
6686 
6687 	ASSERT(mp->nbytes != 0);
6688 	*bytes = mp->nbytes;
6689 
6690 	return (NULL);
6691 }
6692 
6693 sbd_error_t *
6694 drmach_mem_get_slice_size(drmachid_t id, uint64_t *bytes)
6695 {
6696 	sbd_error_t	*err;
6697 	drmach_device_t	*mp;
6698 
6699 	if (!DRMACH_IS_MEM_ID(id))
6700 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6701 	mp = id;
6702 
6703 	switch (DRMACH_BNUM2SLOT(mp->bp->bnum)) {
6704 		case 0:	*bytes = DRMACH_MEM_USABLE_SLICE_SIZE;
6705 			err = NULL;
6706 			break;
6707 
6708 		case 1: *bytes = 0;
6709 			err = NULL;
6710 			break;
6711 
6712 		default:
6713 			err = DRMACH_INTERNAL_ERROR();
6714 			break;
6715 	}
6716 
6717 	return (err);
6718 }
6719 
6720 processorid_t drmach_mem_cpu_affinity_nail;
6721 
6722 processorid_t
6723 drmach_mem_cpu_affinity(drmachid_t id)
6724 {
6725 	drmach_device_t	*mp;
6726 	drmach_board_t	*bp;
6727 	processorid_t	 cpuid;
6728 
6729 	if (!DRMACH_IS_MEM_ID(id))
6730 		return (CPU_CURRENT);
6731 
6732 	if (drmach_mem_cpu_affinity_nail) {
6733 		cpuid = drmach_mem_cpu_affinity_nail;
6734 
6735 		if (cpuid < 0 || cpuid > NCPU)
6736 			return (CPU_CURRENT);
6737 
6738 		mutex_enter(&cpu_lock);
6739 		if (cpu[cpuid] == NULL || !CPU_ACTIVE(cpu[cpuid]))
6740 			cpuid = CPU_CURRENT;
6741 		mutex_exit(&cpu_lock);
6742 
6743 		return (cpuid);
6744 	}
6745 
6746 	/* try to choose a proc on the target board */
6747 	mp = id;
6748 	bp = mp->bp;
6749 	if (bp->devices) {
6750 		int		 rv;
6751 		int		 d_idx;
6752 		drmachid_t	 d_id;
6753 
6754 		rv = drmach_array_first(bp->devices, &d_idx, &d_id);
6755 		while (rv == 0) {
6756 			if (DRMACH_IS_CPU_ID(d_id)) {
6757 				drmach_cpu_t	*cp = d_id;
6758 
6759 				mutex_enter(&cpu_lock);
6760 				cpuid = cp->cpuid;
6761 				if (cpu[cpuid] && CPU_ACTIVE(cpu[cpuid])) {
6762 					mutex_exit(&cpu_lock);
6763 					return (cpuid);
6764 				} else {
6765 					mutex_exit(&cpu_lock);
6766 				}
6767 			}
6768 
6769 			rv = drmach_array_next(bp->devices, &d_idx, &d_id);
6770 		}
6771 	}
6772 
6773 	/* otherwise, this proc, wherever it is */
6774 	return (CPU_CURRENT);
6775 }
6776 
6777 static sbd_error_t *
6778 drmach_mem_release(drmachid_t id)
6779 {
6780 	if (!DRMACH_IS_MEM_ID(id))
6781 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6782 	return (NULL);
6783 }
6784 
6785 static sbd_error_t *
6786 drmach_mem_status(drmachid_t id, drmach_status_t *stat)
6787 {
6788 	drmach_mem_t	*mp;
6789 	sbd_error_t	*err;
6790 	uint64_t	 pa, slice_size;
6791 	struct memlist	*ml;
6792 
6793 	ASSERT(DRMACH_IS_MEM_ID(id));
6794 	mp = id;
6795 
6796 	/* get starting physical address of target memory */
6797 	err = drmach_mem_get_base_physaddr(id, &pa);
6798 	if (err)
6799 		return (err);
6800 
6801 	/* round down to slice boundary */
6802 	slice_size = DRMACH_MEM_SLICE_SIZE;
6803 	pa &= ~ (slice_size - 1);
6804 
6805 	/* stop at first span that is in slice */
6806 	memlist_read_lock();
6807 	for (ml = phys_install; ml; ml = ml->next)
6808 		if (ml->address >= pa && ml->address < pa + slice_size)
6809 			break;
6810 	memlist_read_unlock();
6811 
6812 	stat->assigned = mp->dev.bp->assigned;
6813 	stat->powered = mp->dev.bp->powered;
6814 	stat->configured = (ml != NULL);
6815 	stat->busy = mp->dev.busy;
6816 	strncpy(stat->type, mp->dev.type, sizeof (stat->type));
6817 	stat->info[0] = '\0';
6818 
6819 	return (NULL);
6820 }
6821 
6822 sbd_error_t *
6823 drmach_board_deprobe(drmachid_t id)
6824 {
6825 	drmach_board_t	*bp;
6826 	sbd_error_t	*err = NULL;
6827 
6828 	if (!DRMACH_IS_BOARD_ID(id))
6829 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6830 	bp = id;
6831 
6832 	if (bp->tree) {
6833 		drmach_node_dispose(bp->tree);
6834 		bp->tree = NULL;
6835 	}
6836 	if (bp->devices) {
6837 		drmach_array_dispose(bp->devices, drmach_device_dispose);
6838 		bp->devices = NULL;
6839 		bp->mem = NULL;  /* TODO: still needed? */
6840 	}
6841 	return (err);
6842 }
6843 
6844 /*ARGSUSED1*/
6845 static sbd_error_t *
6846 drmach_pt_showlpa(drmachid_t id, drmach_opts_t *opts)
6847 {
6848 	drmach_device_t	*dp;
6849 	uint64_t	val;
6850 	int		err = 1;
6851 
6852 	if (DRMACH_IS_CPU_ID(id)) {
6853 		drmach_cpu_t *cp = id;
6854 		if (drmach_cpu_read_scr(cp, &val))
6855 			err = 0;
6856 	} else if (DRMACH_IS_IO_ID(id) && ((drmach_io_t *)id)->scsr_pa != 0) {
6857 		drmach_io_t *io = id;
6858 		val = lddphysio(io->scsr_pa);
6859 		err = 0;
6860 	}
6861 	if (err)
6862 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6863 
6864 	dp = id;
6865 	uprintf("showlpa %s::%s portid %d, base pa %lx, bound pa %lx\n",
6866 		dp->bp->cm.name,
6867 		dp->cm.name,
6868 		dp->portid,
6869 		DRMACH_LPA_BASE_TO_PA(val),
6870 		DRMACH_LPA_BND_TO_PA(val));
6871 
6872 	return (NULL);
6873 }
6874 
6875 /*ARGSUSED*/
6876 static sbd_error_t *
6877 drmach_pt_ikprobe(drmachid_t id, drmach_opts_t *opts)
6878 {
6879 
6880 	drmach_board_t		*bp = (drmach_board_t *)id;
6881 
6882 	sbd_error_t		*err;
6883 	sc_gptwocfg_cookie_t	scc;
6884 
6885 	if (!DRMACH_IS_BOARD_ID(id))
6886 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6887 
6888 	/* do saf configurator stuff */
6889 	DRMACH_PR("calling sc_probe_board for bnum=%d\n", bp->bnum);
6890 	scc = sc_probe_board(bp->bnum);
6891 	if (scc == NULL) {
6892 		err = drerr_new(0, ESTC_PROBE, bp->cm.name);
6893 		return (err);
6894 	}
6895 
6896 	return (err);
6897 }
6898 
6899 /*ARGSUSED*/
6900 static sbd_error_t *
6901 drmach_pt_ikdeprobe(drmachid_t id, drmach_opts_t *opts)
6902 {
6903 
6904 	drmach_board_t	*bp;
6905 	sbd_error_t	*err = NULL;
6906 	sc_gptwocfg_cookie_t	scc;
6907 
6908 	if (!DRMACH_IS_BOARD_ID(id))
6909 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6910 	bp = id;
6911 
6912 	cmn_err(CE_CONT, "DR: in-kernel unprobe board %d\n", bp->bnum);
6913 	scc = sc_unprobe_board(bp->bnum);
6914 	if (scc != NULL) {
6915 		err = drerr_new(0, ESTC_DEPROBE, bp->cm.name);
6916 	}
6917 
6918 	if (err == NULL)
6919 		err = drmach_board_deprobe(id);
6920 
6921 	return (err);
6922 
6923 }
6924 
6925 static sbd_error_t *
6926 drmach_pt_readmem(drmachid_t id, drmach_opts_t *opts)
6927 {
6928 	_NOTE(ARGUNUSED(id))
6929 	_NOTE(ARGUNUSED(opts))
6930 
6931 	struct memlist	*ml;
6932 	uint64_t	src_pa;
6933 	uint64_t	dst_pa;
6934 	uint64_t	dst;
6935 
6936 	dst_pa = va_to_pa(&dst);
6937 
6938 	memlist_read_lock();
6939 	for (ml = phys_install; ml; ml = ml->next) {
6940 		uint64_t	nbytes;
6941 
6942 		src_pa = ml->address;
6943 		nbytes = ml->size;
6944 
6945 		while (nbytes != 0ull) {
6946 
6947 			/* copy 32 bytes at src_pa to dst_pa */
6948 			bcopy32_il(src_pa, dst_pa);
6949 
6950 			/* increment by 32 bytes */
6951 			src_pa += (4 * sizeof (uint64_t));
6952 
6953 			/* decrement by 32 bytes */
6954 			nbytes -= (4 * sizeof (uint64_t));
6955 		}
6956 	}
6957 	memlist_read_unlock();
6958 
6959 	return (NULL);
6960 }
6961 
6962 static sbd_error_t *
6963 drmach_pt_recovercpu(drmachid_t id, drmach_opts_t *opts)
6964 {
6965 	_NOTE(ARGUNUSED(opts))
6966 
6967 	drmach_cpu_t	*cp;
6968 
6969 	if (!DRMACH_IS_CPU_ID(id))
6970 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6971 	cp = id;
6972 
6973 	mutex_enter(&cpu_lock);
6974 	(void) drmach_iocage_cpu_return(&(cp->dev),
6975 	    CPU_ENABLE | CPU_EXISTS | CPU_READY | CPU_RUNNING);
6976 	mutex_exit(&cpu_lock);
6977 
6978 	return (NULL);
6979 }
6980 
6981 /*
6982  * Starcat DR passthrus are for debugging purposes only.
6983  */
6984 static struct {
6985 	const char	*name;
6986 	sbd_error_t	*(*handler)(drmachid_t id, drmach_opts_t *opts);
6987 } drmach_pt_arr[] = {
6988 	{ "showlpa",		drmach_pt_showlpa		},
6989 	{ "ikprobe",		drmach_pt_ikprobe		},
6990 	{ "ikdeprobe",		drmach_pt_ikdeprobe		},
6991 	{ "readmem",		drmach_pt_readmem		},
6992 	{ "recovercpu",		drmach_pt_recovercpu		},
6993 
6994 	/* the following line must always be last */
6995 	{ NULL,			NULL				}
6996 };
6997 
6998 /*ARGSUSED*/
6999 sbd_error_t *
7000 drmach_passthru(drmachid_t id, drmach_opts_t *opts)
7001 {
7002 	int		i;
7003 	sbd_error_t	*err;
7004 
7005 	i = 0;
7006 	while (drmach_pt_arr[i].name != NULL) {
7007 		int len = strlen(drmach_pt_arr[i].name);
7008 
7009 		if (strncmp(drmach_pt_arr[i].name, opts->copts, len) == 0)
7010 			break;
7011 
7012 		i += 1;
7013 	}
7014 
7015 	if (drmach_pt_arr[i].name == NULL)
7016 		err = drerr_new(0, ESTC_UNKPTCMD, opts->copts);
7017 	else
7018 		err = (*drmach_pt_arr[i].handler)(id, opts);
7019 
7020 	return (err);
7021 }
7022 
7023 sbd_error_t *
7024 drmach_release(drmachid_t id)
7025 {
7026 	drmach_common_t *cp;
7027 
7028 	if (!DRMACH_IS_DEVICE_ID(id))
7029 		return (drerr_new(0, ESTC_INAPPROP, NULL));
7030 	cp = id;
7031 
7032 	return (cp->release(id));
7033 }
7034 
7035 sbd_error_t *
7036 drmach_status(drmachid_t id, drmach_status_t *stat)
7037 {
7038 	drmach_common_t *cp;
7039 	sbd_error_t	*err;
7040 
7041 	rw_enter(&drmach_boards_rwlock, RW_READER);
7042 
7043 	if (!DRMACH_IS_ID(id)) {
7044 		rw_exit(&drmach_boards_rwlock);
7045 		return (drerr_new(0, ESTC_NOTID, NULL));
7046 	}
7047 
7048 	cp = id;
7049 
7050 	err = cp->status(id, stat);
7051 	rw_exit(&drmach_boards_rwlock);
7052 	return (err);
7053 }
7054 
7055 static sbd_error_t *
7056 drmach_i_status(drmachid_t id, drmach_status_t *stat)
7057 {
7058 	drmach_common_t *cp;
7059 
7060 	if (!DRMACH_IS_ID(id))
7061 		return (drerr_new(0, ESTC_NOTID, NULL));
7062 	cp = id;
7063 
7064 	return (cp->status(id, stat));
7065 }
7066 
7067 /*ARGSUSED*/
7068 sbd_error_t *
7069 drmach_unconfigure(drmachid_t id, int flags)
7070 {
7071 	drmach_device_t	*dp;
7072 	dev_info_t 	*rdip;
7073 
7074 	char	name[OBP_MAXDRVNAME];
7075 	int rv;
7076 
7077 	/*
7078 	 * Since CPU nodes are not configured, it is
7079 	 * necessary to skip the unconfigure step as
7080 	 * well.
7081 	 */
7082 	if (DRMACH_IS_CPU_ID(id)) {
7083 		return (NULL);
7084 	}
7085 
7086 	for (; id; ) {
7087 		dev_info_t	*fdip = NULL;
7088 
7089 		if (!DRMACH_IS_DEVICE_ID(id))
7090 			return (drerr_new(0, ESTC_INAPPROP, NULL));
7091 		dp = id;
7092 
7093 		rdip = dp->node->n_getdip(dp->node);
7094 
7095 		/*
7096 		 * drmach_unconfigure() is always called on a configured branch.
7097 		 * So the root of the branch was held earlier and must exist.
7098 		 */
7099 		ASSERT(rdip);
7100 
7101 		DRMACH_PR("drmach_unconfigure: unconfiguring DDI branch");
7102 
7103 		rv = dp->node->n_getprop(dp->node,
7104 		    "name", name, OBP_MAXDRVNAME);
7105 
7106 		/* The node must have a name */
7107 		if (rv)
7108 			return (0);
7109 
7110 		if (drmach_name2type_idx(name) < 0) {
7111 			if (DRMACH_IS_MEM_ID(id)) {
7112 				drmach_mem_t	*mp = id;
7113 				id = mp->next;
7114 			} else {
7115 				id = NULL;
7116 			}
7117 			continue;
7118 		}
7119 
7120 		/*
7121 		 * NOTE: FORCE flag is no longer needed under devfs
7122 		 */
7123 		ASSERT(e_ddi_branch_held(rdip));
7124 		if (e_ddi_branch_unconfigure(rdip, &fdip, 0) != 0) {
7125 			sbd_error_t	*err = NULL;
7126 			char		*path = kmem_alloc(MAXPATHLEN,
7127 					    KM_SLEEP);
7128 
7129 			/*
7130 			 * If non-NULL, fdip is returned held and must be
7131 			 * released.
7132 			 */
7133 			if (fdip != NULL) {
7134 				(void) ddi_pathname(fdip, path);
7135 				ddi_release_devi(fdip);
7136 			} else {
7137 				(void) ddi_pathname(rdip, path);
7138 			}
7139 
7140 			err = drerr_new(1, ESTC_DRVFAIL, path);
7141 
7142 			kmem_free(path, MAXPATHLEN);
7143 
7144 			/*
7145 			 * If we were unconfiguring an IO board, a call was
7146 			 * made to man_dr_detach.  We now need to call
7147 			 * man_dr_attach to regain man use of the eri.
7148 			 */
7149 			if (DRMACH_IS_IO_ID(id)) {
7150 				int (*func)(dev_info_t *dip);
7151 
7152 				func = (int (*)(dev_info_t *))kobj_getsymvalue\
7153 					("man_dr_attach", 0);
7154 
7155 				if (func) {
7156 					drmach_io_inst_t ios;
7157 					dev_info_t 	*pdip;
7158 					int		circ;
7159 
7160 					/*
7161 					 * Walk device tree to find rio dip for
7162 					 * the board
7163 					 * Since we are not interested in iosram
7164 					 * instance here, initialize it to 0, so
7165 					 * that the walk terminates as soon as
7166 					 * eri dip is found.
7167 					 */
7168 					ios.iosram_inst = 0;
7169 					ios.eri_dip = NULL;
7170 					ios.bnum = dp->bp->bnum;
7171 
7172 					if (pdip = ddi_get_parent(rdip)) {
7173 						ndi_hold_devi(pdip);
7174 						ndi_devi_enter(pdip, &circ);
7175 					}
7176 					/*
7177 					 * Root node doesn't have to be held in
7178 					 * any way.
7179 					 */
7180 					ASSERT(e_ddi_branch_held(rdip));
7181 					ddi_walk_devs(rdip,
7182 						drmach_board_find_io_insts,
7183 						(void *)&ios);
7184 
7185 					DRMACH_PR("drmach_unconfigure: bnum=%d"
7186 						" eri=0x%p\n",
7187 						ios.bnum, ios.eri_dip);
7188 
7189 					if (pdip) {
7190 						ndi_devi_exit(pdip, circ);
7191 						ndi_rele_devi(pdip);
7192 					}
7193 
7194 					if (ios.eri_dip) {
7195 						DRMACH_PR("calling"
7196 							" man_dr_attach\n");
7197 						(void) (*func)(ios.eri_dip);
7198 						/*
7199 						 * Release hold acquired in
7200 						 * drmach_board_find_io_insts()
7201 						 */
7202 						ndi_rele_devi(ios.eri_dip);
7203 					}
7204 				}
7205 			}
7206 			return (err);
7207 		}
7208 
7209 		if (DRMACH_IS_MEM_ID(id)) {
7210 			drmach_mem_t	*mp = id;
7211 			id = mp->next;
7212 		} else {
7213 			id = NULL;
7214 		}
7215 	}
7216 
7217 	return (NULL);
7218 }
7219 
7220 /*
7221  * drmach interfaces to legacy Starfire platmod logic
7222  * linkage via runtime symbol look up, called from plat_cpu_power*
7223  */
7224 
7225 /*
7226  * Start up a cpu.  It is possible that we're attempting to restart
7227  * the cpu after an UNCONFIGURE in which case the cpu will be
7228  * spinning in its cache.  So, all we have to do is wakeup him up.
7229  * Under normal circumstances the cpu will be coming from a previous
7230  * CONNECT and thus will be spinning in OBP.  In both cases, the
7231  * startup sequence is the same.
7232  */
7233 int
7234 drmach_cpu_poweron(struct cpu *cp)
7235 {
7236 	DRMACH_PR("drmach_cpu_poweron: starting cpuid %d\n", cp->cpu_id);
7237 
7238 	ASSERT(MUTEX_HELD(&cpu_lock));
7239 
7240 	if (drmach_cpu_start(cp) != 0)
7241 		return (EBUSY);
7242 	else
7243 		return (0);
7244 }
7245 
7246 int
7247 drmach_cpu_poweroff(struct cpu *cp)
7248 {
7249 	int		ntries;
7250 	processorid_t	cpuid;
7251 	void		drmach_cpu_shutdown_self(void);
7252 
7253 	DRMACH_PR("drmach_cpu_poweroff: stopping cpuid %d\n", cp->cpu_id);
7254 
7255 	ASSERT(MUTEX_HELD(&cpu_lock));
7256 
7257 	/*
7258 	 * XXX CHEETAH SUPPORT
7259 	 * for cheetah, we need to grab the iocage lock since iocage
7260 	 * memory is used for e$ flush.
7261 	 */
7262 	if (drmach_is_cheetah) {
7263 		mutex_enter(&drmach_iocage_lock);
7264 		while (drmach_iocage_is_busy)
7265 			cv_wait(&drmach_iocage_cv, &drmach_iocage_lock);
7266 		drmach_iocage_is_busy = 1;
7267 		drmach_iocage_mem_scrub(ecache_size * 2);
7268 		mutex_exit(&drmach_iocage_lock);
7269 	}
7270 
7271 	cpuid = cp->cpu_id;
7272 
7273 	/*
7274 	 * Set affinity to ensure consistent reading and writing of
7275 	 * drmach_xt_mb[cpuid] by one "master" CPU directing
7276 	 * the shutdown of the target CPU.
7277 	 */
7278 	affinity_set(CPU->cpu_id);
7279 
7280 	/*
7281 	 * Capture all CPUs (except for detaching proc) to prevent
7282 	 * crosscalls to the detaching proc until it has cleared its
7283 	 * bit in cpu_ready_set.
7284 	 *
7285 	 * The CPUs remain paused and the prom_mutex is known to be free.
7286 	 * This prevents blocking when doing prom IEEE-1275 calls at a
7287 	 * high PIL level.
7288 	 */
7289 	promsafe_pause_cpus();
7290 
7291 	/*
7292 	 * Quiesce interrupts on the target CPU. We do this by setting
7293 	 * the CPU 'not ready'- (i.e. removing the CPU from cpu_ready_set) to
7294 	 * prevent it from receiving cross calls and cross traps.
7295 	 * This prevents the processor from receiving any new soft interrupts.
7296 	 */
7297 	mp_cpu_quiesce(cp);
7298 
7299 	prom_hotremovecpu(cpuid);
7300 
7301 	start_cpus();
7302 
7303 	/* setup xt_mb, will be cleared by drmach_shutdown_asm when ready */
7304 	drmach_xt_mb[cpuid] = 0x80;
7305 
7306 	xt_one_unchecked(cp->cpu_id, (xcfunc_t *)idle_stop_xcall,
7307 		(uint64_t)drmach_cpu_shutdown_self, NULL);
7308 
7309 	ntries = drmach_cpu_ntries;
7310 	while (drmach_xt_mb[cpuid] && ntries) {
7311 		DELAY(drmach_cpu_delay);
7312 		ntries--;
7313 	}
7314 
7315 	drmach_xt_mb[cpuid] = 0;	/* steal the cache line back */
7316 
7317 	membar_sync();			/* make sure copy-back retires */
7318 
7319 	affinity_clear();
7320 
7321 	/*
7322 	 * XXX CHEETAH SUPPORT
7323 	 */
7324 	if (drmach_is_cheetah) {
7325 		mutex_enter(&drmach_iocage_lock);
7326 		drmach_iocage_mem_scrub(ecache_size * 2);
7327 		drmach_iocage_is_busy = 0;
7328 		cv_signal(&drmach_iocage_cv);
7329 		mutex_exit(&drmach_iocage_lock);
7330 	}
7331 
7332 	DRMACH_PR("waited %d out of %d tries for "
7333 		"drmach_cpu_shutdown_self on cpu%d",
7334 		drmach_cpu_ntries - ntries, drmach_cpu_ntries, cp->cpu_id);
7335 
7336 	/*
7337 	 * Do this here instead of drmach_cpu_shutdown_self() to
7338 	 * avoid an assertion failure panic in turnstile.c.
7339 	 */
7340 	CPU_SIGNATURE(OS_SIG, SIGST_DETACHED, SIGSUBST_NULL, cpuid);
7341 
7342 	return (0);
7343 }
7344 
7345 void
7346 drmach_iocage_mem_scrub(uint64_t nbytes)
7347 {
7348 	extern int drmach_bc_bzero(void*, size_t);
7349 	int	rv;
7350 
7351 	ASSERT(MUTEX_HELD(&cpu_lock));
7352 
7353 	affinity_set(CPU->cpu_id);
7354 
7355 	rv = drmach_bc_bzero(drmach_iocage_vaddr, nbytes);
7356 	if (rv != 0) {
7357 		DRMACH_PR(
7358 		"iocage scrub failed, drmach_bc_bzero returned %d\n", rv);
7359 		rv = drmach_bc_bzero(drmach_iocage_vaddr,
7360 			drmach_iocage_size);
7361 		if (rv != 0)
7362 			cmn_err(CE_PANIC,
7363 			    "iocage scrub failed, drmach_bc_bzero rv=%d\n",
7364 			    rv);
7365 	}
7366 
7367 	cpu_flush_ecache();
7368 
7369 	affinity_clear();
7370 }
7371 
7372 #define	ALIGN(x, a)	((a) == 0 ? (uintptr_t)(x) : \
7373 	(((uintptr_t)(x) + (uintptr_t)(a) - 1l) & ~((uintptr_t)(a) - 1l)))
7374 
7375 static sbd_error_t *
7376 drmach_iocage_mem_get(dr_testboard_req_t *tbrq)
7377 {
7378 	pfn_t		basepfn;
7379 	pgcnt_t		npages;
7380 	extern int	memscrub_delete_span(pfn_t, pgcnt_t);
7381 	uint64_t	drmach_iocage_paddr_mbytes;
7382 
7383 	ASSERT(drmach_iocage_paddr != -1);
7384 
7385 	basepfn = (pfn_t)(drmach_iocage_paddr >> PAGESHIFT);
7386 	npages = (pgcnt_t)(drmach_iocage_size >> PAGESHIFT);
7387 
7388 	memscrub_delete_span(basepfn, npages);
7389 
7390 	mutex_enter(&cpu_lock);
7391 	drmach_iocage_mem_scrub(drmach_iocage_size);
7392 	mutex_exit(&cpu_lock);
7393 
7394 	/*
7395 	 * HPOST wants the address of the cage to be 64 megabyte-aligned
7396 	 * and in megabyte units.
7397 	 * The size of the cage is also in megabyte units.
7398 	 */
7399 	ASSERT(drmach_iocage_paddr == ALIGN(drmach_iocage_paddr, 0x4000000));
7400 
7401 	drmach_iocage_paddr_mbytes = drmach_iocage_paddr / 0x100000;
7402 
7403 	tbrq->memaddrhi = (uint32_t)(drmach_iocage_paddr_mbytes >> 32);
7404 	tbrq->memaddrlo = (uint32_t)drmach_iocage_paddr_mbytes;
7405 	tbrq->memlen = drmach_iocage_size / 0x100000;
7406 
7407 	DRMACH_PR("drmach_iocage_mem_get: hi: 0x%x", tbrq->memaddrhi);
7408 	DRMACH_PR("drmach_iocage_mem_get: lo: 0x%x", tbrq->memaddrlo);
7409 	DRMACH_PR("drmach_iocage_mem_get: size: 0x%x", tbrq->memlen);
7410 
7411 	return (NULL);
7412 }
7413 
7414 static sbd_error_t *
7415 drmach_iocage_mem_return(dr_testboard_reply_t *tbr)
7416 {
7417 	_NOTE(ARGUNUSED(tbr))
7418 
7419 	pfn_t		basepfn;
7420 	pgcnt_t		npages;
7421 	extern int	memscrub_add_span(pfn_t, pgcnt_t);
7422 
7423 	ASSERT(drmach_iocage_paddr != -1);
7424 
7425 	basepfn = (pfn_t)(drmach_iocage_paddr >> PAGESHIFT);
7426 	npages = (pgcnt_t)(drmach_iocage_size >> PAGESHIFT);
7427 
7428 	memscrub_add_span(basepfn, npages);
7429 
7430 	mutex_enter(&cpu_lock);
7431 	mutex_enter(&drmach_iocage_lock);
7432 	drmach_iocage_mem_scrub(drmach_iocage_size);
7433 	drmach_iocage_is_busy = 0;
7434 	cv_signal(&drmach_iocage_cv);
7435 	mutex_exit(&drmach_iocage_lock);
7436 	mutex_exit(&cpu_lock);
7437 
7438 	return (NULL);
7439 }
7440 
7441 static int
7442 drmach_cpu_intr_disable(cpu_t *cp)
7443 {
7444 	if (cpu_intr_disable(cp) != 0)
7445 		return (-1);
7446 	return (0);
7447 }
7448 
7449 static int
7450 drmach_iocage_cpu_acquire(drmach_device_t *dp, cpu_flag_t *oflags)
7451 {
7452 	struct cpu	*cp;
7453 	processorid_t	cpuid;
7454 	static char	*fn = "drmach_iocage_cpu_acquire";
7455 	sbd_error_t 	*err;
7456 	int 		impl;
7457 
7458 	ASSERT(DRMACH_IS_CPU_ID(dp));
7459 	ASSERT(MUTEX_HELD(&cpu_lock));
7460 
7461 	cpuid = ((drmach_cpu_t *)dp)->cpuid;
7462 
7463 	DRMACH_PR("%s: attempting to acquire CPU id %d", fn, cpuid);
7464 
7465 	if (dp->busy)
7466 		return (-1);
7467 
7468 	if ((cp = cpu_get(cpuid)) == NULL) {
7469 		DRMACH_PR("%s: cpu_get(%d) returned NULL", fn, cpuid);
7470 		return (-1);
7471 	}
7472 
7473 	if (!CPU_ACTIVE(cp)) {
7474 		DRMACH_PR("%s: skipping offlined CPU id %d", fn, cpuid);
7475 		return (-1);
7476 	}
7477 
7478 	/*
7479 	 * There is a known HW bug where a Jaguar CPU in Safari port 0 (SBX/P0)
7480 	 * can fail to receive an XIR. To workaround this issue until a hardware
7481 	 * fix is implemented, we will exclude the selection of these CPUs.
7482 	 *
7483 	 * Once a fix is implemented in hardware, this code should be updated
7484 	 * to allow Jaguar CPUs that have the fix to be used. However, support
7485 	 * must be retained to skip revisions that do not have this fix.
7486 	 */
7487 
7488 	err = drmach_cpu_get_impl(dp, &impl);
7489 	if (err) {
7490 		DRMACH_PR("%s: error getting impl. of CPU id %d", fn, cpuid);
7491 		sbd_err_clear(&err);
7492 		return (-1);
7493 	}
7494 
7495 	if (IS_JAGUAR(impl) && (STARCAT_CPUID_TO_LPORT(cpuid) == 0) &&
7496 	    drmach_iocage_exclude_jaguar_port_zero) {
7497 		DRMACH_PR("%s: excluding CPU id %d: port 0 on jaguar",
7498 		    fn, cpuid);
7499 		return (-1);
7500 	}
7501 
7502 	ASSERT(oflags);
7503 	*oflags = cp->cpu_flags;
7504 
7505 	if (cpu_offline(cp, 0)) {
7506 		DRMACH_PR("%s: cpu_offline failed for CPU id %d", fn, cpuid);
7507 		return (-1);
7508 	}
7509 
7510 	if (cpu_poweroff(cp)) {
7511 		DRMACH_PR("%s: cpu_poweroff failed for CPU id %d", fn, cpuid);
7512 		if (cpu_online(cp)) {
7513 			cmn_err(CE_WARN, "failed to online CPU id %d "
7514 			    "during I/O cage test selection", cpuid);
7515 		}
7516 		if (CPU_ACTIVE(cp) && cpu_flagged_nointr(*oflags) &&
7517 		    drmach_cpu_intr_disable(cp) != 0) {
7518 			cmn_err(CE_WARN, "failed to restore CPU id %d "
7519 			    "no-intr during I/O cage test selection", cpuid);
7520 		}
7521 		return (-1);
7522 	}
7523 
7524 	if (cpu_unconfigure(cpuid)) {
7525 		DRMACH_PR("%s: cpu_unconfigure failed for CPU id %d", fn,
7526 		    cpuid);
7527 		(void) cpu_configure(cpuid);
7528 		if ((cp = cpu_get(cpuid)) == NULL) {
7529 			cmn_err(CE_WARN, "failed to reconfigure CPU id %d "
7530 			    "during I/O cage test selection", cpuid);
7531 			dp->busy = 1;
7532 			return (-1);
7533 		}
7534 		if (cpu_poweron(cp) || cpu_online(cp)) {
7535 			cmn_err(CE_WARN, "failed to %s CPU id %d "
7536 			    "during I/O cage test selection",
7537 			    cpu_is_poweredoff(cp) ?
7538 			    "poweron" : "online", cpuid);
7539 		}
7540 		if (CPU_ACTIVE(cp) && cpu_flagged_nointr(*oflags) &&
7541 		    drmach_cpu_intr_disable(cp) != 0) {
7542 			cmn_err(CE_WARN, "failed to restore CPU id %d "
7543 			    "no-intr during I/O cage test selection", cpuid);
7544 		}
7545 		return (-1);
7546 	}
7547 
7548 	dp->busy = 1;
7549 
7550 	DRMACH_PR("%s: acquired CPU id %d", fn, cpuid);
7551 
7552 	return (0);
7553 }
7554 
7555 /*
7556  * Attempt to acquire all the CPU devices passed in. It is
7557  * assumed that all the devices in the list are the cores of
7558  * a single CMP device. Non CMP devices can be handled as a
7559  * single core CMP by passing in a one element list.
7560  *
7561  * Success is only returned if *all* the devices in the list
7562  * can be acquired. In the failure case, none of the devices
7563  * in the list will be held as acquired.
7564  */
7565 static int
7566 drmach_iocage_cmp_acquire(drmach_device_t **dpp, cpu_flag_t *oflags)
7567 {
7568 	int	curr;
7569 	int	i;
7570 	int	rv = 0;
7571 
7572 	ASSERT((dpp != NULL) && (*dpp != NULL));
7573 
7574 	/*
7575 	 * Walk the list of CPU devices (cores of a CMP)
7576 	 * and attempt to acquire them. Bail out if an
7577 	 * error is encountered.
7578 	 */
7579 	for (curr = 0; curr < MAX_CORES_PER_CMP; curr++) {
7580 
7581 		/* check for the end of the list */
7582 		if (dpp[curr] == NULL) {
7583 			break;
7584 		}
7585 
7586 		ASSERT(DRMACH_IS_CPU_ID(dpp[curr]));
7587 		ASSERT(dpp[curr]->portid == (*dpp)->portid);
7588 
7589 		rv = drmach_iocage_cpu_acquire(dpp[curr], &oflags[curr]);
7590 		if (rv != 0) {
7591 			break;
7592 		}
7593 	}
7594 
7595 	/*
7596 	 * Check for an error.
7597 	 */
7598 	if (rv != 0) {
7599 		/*
7600 		 * Make a best effort attempt to return any cores
7601 		 * that were already acquired before the error was
7602 		 * encountered.
7603 		 */
7604 		for (i = 0; i < curr; i++) {
7605 			(void) drmach_iocage_cpu_return(dpp[i], oflags[i]);
7606 		}
7607 	}
7608 
7609 	return (rv);
7610 }
7611 
7612 static int
7613 drmach_iocage_cpu_return(drmach_device_t *dp, cpu_flag_t oflags)
7614 {
7615 	processorid_t	cpuid;
7616 	struct cpu	*cp;
7617 	int		rv = 0;
7618 	static char	*fn = "drmach_iocage_cpu_return";
7619 
7620 	ASSERT(DRMACH_IS_CPU_ID(dp));
7621 	ASSERT(MUTEX_HELD(&cpu_lock));
7622 
7623 	cpuid = ((drmach_cpu_t *)dp)->cpuid;
7624 
7625 	DRMACH_PR("%s: attempting to return CPU id: %d", fn, cpuid);
7626 
7627 	if (cpu_configure(cpuid)) {
7628 		cmn_err(CE_WARN, "failed to reconfigure CPU id %d "
7629 		    "after I/O cage test", cpuid);
7630 		/*
7631 		 * The component was never set to unconfigured during the IO
7632 		 * cage test, so we need to leave marked as busy to prevent
7633 		 * further DR operations involving this component.
7634 		 */
7635 		return (-1);
7636 	}
7637 
7638 	if ((cp = cpu_get(cpuid)) == NULL) {
7639 		cmn_err(CE_WARN, "cpu_get failed on CPU id %d after "
7640 		    "I/O cage test", cpuid);
7641 		dp->busy = 0;
7642 		return (-1);
7643 	}
7644 
7645 	if (cpu_poweron(cp) || cpu_online(cp)) {
7646 		cmn_err(CE_WARN, "failed to %s CPU id %d after I/O "
7647 		    "cage test", cpu_is_poweredoff(cp) ?
7648 		    "poweron" : "online", cpuid);
7649 		rv = -1;
7650 	}
7651 
7652 	/*
7653 	 * drmach_iocage_cpu_acquire will accept cpus in state P_ONLINE or
7654 	 * P_NOINTR. Need to return to previous user-visible state.
7655 	 */
7656 	if (CPU_ACTIVE(cp) && cpu_flagged_nointr(oflags) &&
7657 	    drmach_cpu_intr_disable(cp) != 0) {
7658 		cmn_err(CE_WARN, "failed to restore CPU id %d "
7659 		    "no-intr after I/O cage test", cpuid);
7660 		rv = -1;
7661 	}
7662 
7663 	dp->busy = 0;
7664 
7665 	DRMACH_PR("%s: returned CPU id: %d", fn, cpuid);
7666 
7667 	return (rv);
7668 }
7669 
7670 static sbd_error_t *
7671 drmach_iocage_cpu_get(dr_testboard_req_t *tbrq, drmach_device_t **dpp,
7672     cpu_flag_t *oflags)
7673 {
7674 	drmach_board_t	*bp;
7675 	int		b_rv;
7676 	int		b_idx;
7677 	drmachid_t	b_id;
7678 	int		found;
7679 
7680 	mutex_enter(&cpu_lock);
7681 
7682 	ASSERT(drmach_boards != NULL);
7683 
7684 	found = 0;
7685 
7686 	/*
7687 	 * Walk the board list.
7688 	 */
7689 	b_rv = drmach_array_first(drmach_boards, &b_idx, &b_id);
7690 
7691 	while (b_rv == 0) {
7692 
7693 		int		d_rv;
7694 		int		d_idx;
7695 		drmachid_t	d_id;
7696 
7697 		bp = b_id;
7698 
7699 		if (bp->connected == 0 || bp->devices == NULL) {
7700 			b_rv = drmach_array_next(drmach_boards, &b_idx, &b_id);
7701 			continue;
7702 		}
7703 
7704 		/* An AXQ restriction disqualifies MCPU's as candidates. */
7705 		if (DRMACH_BNUM2SLOT(bp->bnum) == 1) {
7706 			b_rv = drmach_array_next(drmach_boards, &b_idx, &b_id);
7707 			continue;
7708 		}
7709 
7710 		/*
7711 		 * Walk the device list of this board.
7712 		 */
7713 		d_rv = drmach_array_first(bp->devices, &d_idx, &d_id);
7714 
7715 		while (d_rv == 0) {
7716 
7717 			drmach_device_t	*ndp;
7718 
7719 			/* only interested in CPU devices */
7720 			if (!DRMACH_IS_CPU_ID(d_id)) {
7721 				d_rv = drmach_array_next(bp->devices, &d_idx,
7722 				    &d_id);
7723 				continue;
7724 			}
7725 
7726 			/*
7727 			 * The following code assumes two properties
7728 			 * of a CMP device:
7729 			 *
7730 			 *   1. All cores of a CMP are grouped together
7731 			 *	in the device list.
7732 			 *
7733 			 *   2. There will only be a maximum of two cores
7734 			 *	present in the CMP.
7735 			 *
7736 			 * If either of these two properties change,
7737 			 * this code will have to be revisited.
7738 			 */
7739 
7740 			dpp[0] = d_id;
7741 			dpp[1] = NULL;
7742 
7743 			/*
7744 			 * Get the next device. It may or may not be used.
7745 			 */
7746 			d_rv = drmach_array_next(bp->devices, &d_idx, &d_id);
7747 			ndp = d_id;
7748 
7749 			if ((d_rv == 0) && DRMACH_IS_CPU_ID(d_id)) {
7750 				/*
7751 				 * The second device is only interesting for
7752 				 * this pass if it has the same portid as the
7753 				 * first device. This implies that both are
7754 				 * cores of the same CMP.
7755 				 */
7756 				if (dpp[0]->portid == ndp->portid) {
7757 					dpp[1] = d_id;
7758 				}
7759 			}
7760 
7761 			/*
7762 			 * Attempt to acquire all cores of the CMP.
7763 			 */
7764 			if (drmach_iocage_cmp_acquire(dpp, oflags) == 0) {
7765 				found = 1;
7766 				break;
7767 			}
7768 
7769 			/*
7770 			 * Check if the search for the second core was
7771 			 * successful. If not, the next iteration should
7772 			 * use that device.
7773 			 */
7774 			if (dpp[1] == NULL) {
7775 				continue;
7776 			}
7777 
7778 			d_rv = drmach_array_next(bp->devices, &d_idx, &d_id);
7779 		}
7780 
7781 		if (found)
7782 			break;
7783 
7784 		b_rv = drmach_array_next(drmach_boards, &b_idx, &b_id);
7785 	}
7786 
7787 	mutex_exit(&cpu_lock);
7788 
7789 	if (!found) {
7790 		return (drerr_new(1, ESTC_IOCAGE_NO_CPU_AVAIL, NULL));
7791 	}
7792 
7793 	tbrq->cpu_portid = (*dpp)->portid;
7794 
7795 	return (NULL);
7796 }
7797 
7798 /*
7799  * Setup an iocage by acquiring a cpu and memory.
7800  */
7801 static sbd_error_t *
7802 drmach_iocage_setup(dr_testboard_req_t *tbrq, drmach_device_t **dpp,
7803     cpu_flag_t *oflags)
7804 {
7805 	sbd_error_t *err;
7806 
7807 	err = drmach_iocage_cpu_get(tbrq, dpp, oflags);
7808 	if (!err) {
7809 		mutex_enter(&drmach_iocage_lock);
7810 		while (drmach_iocage_is_busy)
7811 			cv_wait(&drmach_iocage_cv, &drmach_iocage_lock);
7812 		drmach_iocage_is_busy = 1;
7813 		mutex_exit(&drmach_iocage_lock);
7814 		err = drmach_iocage_mem_get(tbrq);
7815 		if (err) {
7816 			mutex_enter(&drmach_iocage_lock);
7817 			drmach_iocage_is_busy = 0;
7818 			cv_signal(&drmach_iocage_cv);
7819 			mutex_exit(&drmach_iocage_lock);
7820 		}
7821 	}
7822 	return (err);
7823 }
7824 
7825 #define	DRMACH_SCHIZO_PCI_LEAF_MAX	2
7826 #define	DRMACH_SCHIZO_PCI_SLOT_MAX	8
7827 #define	DRMACH_S1P_SAMPLE_MAX		2
7828 
7829 typedef enum {
7830 	DRMACH_POST_SUSPEND = 0,
7831 	DRMACH_PRE_RESUME
7832 } drmach_sr_iter_t;
7833 
7834 typedef struct {
7835 	dev_info_t	*dip;
7836 	uint32_t	portid;
7837 	uint32_t	pcr_sel_save;
7838 	uint32_t	pic_l2_io_q[DRMACH_S1P_SAMPLE_MAX];
7839 	uint64_t	reg_basepa;
7840 } drmach_s1p_axq_t;
7841 
7842 typedef struct {
7843 	dev_info_t		*dip;
7844 	uint32_t		portid;
7845 	uint64_t		csr_basepa;
7846 	struct {
7847 		uint64_t 	slot_intr_state_diag;
7848 		uint64_t 	obio_intr_state_diag;
7849 		uint_t		nmap_regs;
7850 		uint64_t	*intr_map_regs;
7851 	} regs[DRMACH_S1P_SAMPLE_MAX];
7852 } drmach_s1p_pci_t;
7853 
7854 typedef struct {
7855 	uint64_t		csr_basepa;
7856 	struct {
7857 		uint64_t	csr;
7858 		uint64_t	errctrl;
7859 		uint64_t	errlog;
7860 	} regs[DRMACH_S1P_SAMPLE_MAX];
7861 	drmach_s1p_pci_t	pci[DRMACH_SCHIZO_PCI_LEAF_MAX];
7862 } drmach_s1p_schizo_t;
7863 
7864 typedef struct {
7865 	drmach_s1p_axq_t	axq;
7866 	drmach_s1p_schizo_t	schizo[STARCAT_SLOT1_IO_MAX];
7867 } drmach_slot1_pause_t;
7868 
7869 /*
7870  * Table of saved state for paused slot1 devices.
7871  */
7872 static drmach_slot1_pause_t *drmach_slot1_paused[STARCAT_BDSET_MAX];
7873 static int drmach_slot1_pause_init = 1;
7874 
7875 #ifdef DEBUG
7876 int drmach_slot1_pause_debug = 1;
7877 #else
7878 int drmach_slot1_pause_debug = 0;
7879 #endif /* DEBUG */
7880 
7881 static int
7882 drmach_is_slot1_pause_axq(dev_info_t *dip, char *name, int *id, uint64_t *reg)
7883 {
7884 	int		portid, exp, slot, i;
7885 	drmach_reg_t	regs[2];
7886 	int		reglen = sizeof (regs);
7887 
7888 	if ((portid = ddi_getprop(DDI_DEV_T_ANY, dip,
7889 	    DDI_PROP_DONTPASS, "portid", -1)) == -1) {
7890 		return (0);
7891 	}
7892 
7893 	exp = (portid >> 5) & 0x1f;
7894 	slot = portid & 0x1;
7895 
7896 	if (slot == 0 || strncmp(name, DRMACH_AXQ_NAMEPROP,
7897 	    strlen(DRMACH_AXQ_NAMEPROP))) {
7898 		return (0);
7899 	}
7900 
7901 	mutex_enter(&cpu_lock);
7902 	for (i = 0; i < STARCAT_SLOT1_CPU_MAX; i++) {
7903 		if (cpu[MAKE_CPUID(exp, slot, i)]) {
7904 			/* maxcat cpu present */
7905 			mutex_exit(&cpu_lock);
7906 			return (0);
7907 		}
7908 	}
7909 	mutex_exit(&cpu_lock);
7910 
7911 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
7912 	    "reg", (caddr_t)regs, &reglen) != DDI_PROP_SUCCESS) {
7913 		DRMACH_PR("drmach_is_slot1_pause_axq: no reg prop for "
7914 		    "axq dip=%p\n", dip);
7915 		return (0);
7916 	}
7917 
7918 	ASSERT(id && reg);
7919 	*reg = (uint64_t)regs[0].reg_addr_hi << 32;
7920 	*reg |= (uint64_t)regs[0].reg_addr_lo;
7921 	*id = portid;
7922 
7923 	return (1);
7924 }
7925 
7926 /*
7927  * Allocate an entry in the slot1_paused state table.
7928  */
7929 static void
7930 drmach_slot1_pause_add_axq(dev_info_t *axq_dip, char *axq_name, int axq_portid,
7931     uint64_t reg, drmach_slot1_pause_t **slot1_paused)
7932 {
7933 	int	axq_exp;
7934 	drmach_slot1_pause_t *slot1;
7935 
7936 	axq_exp = (axq_portid >> 5) & 0x1f;
7937 
7938 	ASSERT(axq_portid & 0x1);
7939 	ASSERT(slot1_paused[axq_exp] == NULL);
7940 	ASSERT(strncmp(axq_name, DRMACH_AXQ_NAMEPROP,
7941 	    strlen(DRMACH_AXQ_NAMEPROP)) == 0);
7942 
7943 	slot1 = kmem_zalloc(sizeof (*slot1), KM_SLEEP);
7944 
7945 	/*
7946 	 * XXX This dip should really be held (via ndi_hold_devi())
7947 	 * before saving it in the axq pause structure. However that
7948 	 * would prevent DR as the pause data structures persist until
7949 	 * the next suspend. drmach code should be modified to free the
7950 	 * the slot 1 pause data structures for a boardset when its
7951 	 * slot 1 board is DRed out. The dip can then be released via
7952 	 * ndi_rele_devi() when the pause data structure is freed
7953 	 * allowing DR to proceed. Until this change is made, drmach
7954 	 * code should be careful about dereferencing the saved dip
7955 	 * as it may no longer exist.
7956 	 */
7957 	slot1->axq.dip = axq_dip;
7958 	slot1->axq.portid = axq_portid;
7959 	slot1->axq.reg_basepa = reg;
7960 	slot1_paused[axq_exp] = slot1;
7961 }
7962 
7963 static void
7964 drmach_s1p_pci_free(drmach_s1p_pci_t *pci)
7965 {
7966 	int	i;
7967 
7968 	for (i = 0; i < DRMACH_S1P_SAMPLE_MAX; i++) {
7969 		if (pci->regs[i].intr_map_regs != NULL) {
7970 			ASSERT(pci->regs[i].nmap_regs > 0);
7971 			kmem_free(pci->regs[i].intr_map_regs,
7972 			    pci->regs[i].nmap_regs * sizeof (uint64_t));
7973 		}
7974 	}
7975 }
7976 
7977 static void
7978 drmach_slot1_pause_free(drmach_slot1_pause_t **slot1_paused)
7979 {
7980 	int	i, j, k;
7981 	drmach_slot1_pause_t *slot1;
7982 
7983 	for (i = 0; i < STARCAT_BDSET_MAX; i++) {
7984 		if ((slot1 = slot1_paused[i]) == NULL)
7985 			continue;
7986 
7987 		for (j = 0; j < STARCAT_SLOT1_IO_MAX; j++)
7988 			for (k = 0; k < DRMACH_SCHIZO_PCI_LEAF_MAX; k++)
7989 				drmach_s1p_pci_free(&slot1->schizo[j].pci[k]);
7990 
7991 		kmem_free(slot1, sizeof (*slot1));
7992 		slot1_paused[i] = NULL;
7993 	}
7994 }
7995 
7996 /*
7997  * Tree walk callback routine. If dip represents a Schizo PCI leaf,
7998  * fill in the appropriate info in the slot1_paused state table.
7999  */
8000 static int
8001 drmach_find_slot1_io(dev_info_t *dip, void *arg)
8002 {
8003 	int		portid, exp, ioc_unum, leaf_unum;
8004 	char		buf[OBP_MAXDRVNAME];
8005 	int		buflen = sizeof (buf);
8006 	drmach_reg_t	regs[3];
8007 	int		reglen = sizeof (regs);
8008 	uint32_t	leaf_offset;
8009 	uint64_t	schizo_csr_pa, pci_csr_pa;
8010 	drmach_s1p_pci_t *pci;
8011 	drmach_slot1_pause_t **slot1_paused = (drmach_slot1_pause_t **)arg;
8012 
8013 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
8014 	    "name", (caddr_t)buf, &buflen) != DDI_PROP_SUCCESS ||
8015 	    strncmp(buf, DRMACH_PCI_NAMEPROP, strlen(DRMACH_PCI_NAMEPROP))) {
8016 		return (DDI_WALK_CONTINUE);
8017 	}
8018 
8019 	if ((portid = ddi_getprop(DDI_DEV_T_ANY, dip,
8020 	    DDI_PROP_DONTPASS, "portid", -1)) == -1) {
8021 		return (DDI_WALK_CONTINUE);
8022 	}
8023 
8024 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
8025 	    "reg", (caddr_t)regs, &reglen) != DDI_PROP_SUCCESS) {
8026 		DRMACH_PR("drmach_find_slot1_io: no reg prop for pci "
8027 		    "dip=%p\n", dip);
8028 		return (DDI_WALK_CONTINUE);
8029 	}
8030 
8031 	exp = portid >> 5;
8032 	ioc_unum = portid & 0x1;
8033 	leaf_offset = regs[0].reg_addr_lo & 0x7fffff;
8034 	pci_csr_pa = (uint64_t)regs[0].reg_addr_hi << 32;
8035 	pci_csr_pa |= (uint64_t)regs[0].reg_addr_lo;
8036 	schizo_csr_pa = (uint64_t)regs[1].reg_addr_hi << 32;
8037 	schizo_csr_pa |= (uint64_t)regs[1].reg_addr_lo;
8038 
8039 	ASSERT(exp >= 0 && exp < STARCAT_BDSET_MAX);
8040 	ASSERT(slot1_paused[exp] != NULL);
8041 	ASSERT(leaf_offset == 0x600000 || leaf_offset == 0x700000);
8042 	ASSERT(slot1_paused[exp]->schizo[ioc_unum].csr_basepa == 0x0UL ||
8043 	    slot1_paused[exp]->schizo[ioc_unum].csr_basepa == schizo_csr_pa);
8044 
8045 	leaf_unum = (leaf_offset == 0x600000) ? 0 : 1;
8046 	slot1_paused[exp]->schizo[ioc_unum].csr_basepa = schizo_csr_pa;
8047 	pci = &slot1_paused[exp]->schizo[ioc_unum].pci[leaf_unum];
8048 
8049 	/*
8050 	 * XXX This dip should really be held (via ndi_hold_devi())
8051 	 * before saving it in the pci pause structure. However that
8052 	 * would prevent DR as the pause data structures persist until
8053 	 * the next suspend. drmach code should be modified to free the
8054 	 * the slot 1 pause data structures for a boardset when its
8055 	 * slot 1 board is DRed out. The dip can then be released via
8056 	 * ndi_rele_devi() when the pause data structure is freed
8057 	 * allowing DR to proceed. Until this change is made, drmach
8058 	 * code should be careful about dereferencing the saved dip as
8059 	 * it may no longer exist.
8060 	 */
8061 	pci->dip = dip;
8062 	pci->portid = portid;
8063 	pci->csr_basepa = pci_csr_pa;
8064 
8065 	DRMACH_PR("drmach_find_slot1_io: name=%s, portid=0x%x, dip=%p\n",
8066 	    buf, portid, dip);
8067 
8068 	return (DDI_WALK_PRUNECHILD);
8069 }
8070 
8071 static void
8072 drmach_slot1_pause_add_io(drmach_slot1_pause_t **slot1_paused)
8073 {
8074 	/*
8075 	 * Root node doesn't have to be held
8076 	 */
8077 	ddi_walk_devs(ddi_root_node(), drmach_find_slot1_io,
8078 	    (void *)slot1_paused);
8079 }
8080 
8081 /*
8082  * Save the interrupt mapping registers for each non-idle interrupt
8083  * represented by the bit pairs in the saved interrupt state
8084  * diagnostic registers for this PCI leaf.
8085  */
8086 static void
8087 drmach_s1p_intr_map_reg_save(drmach_s1p_pci_t *pci, drmach_sr_iter_t iter)
8088 {
8089 	int	 i, cnt, ino;
8090 	uint64_t reg;
8091 	char	 *dname;
8092 	uchar_t	 Xmits;
8093 
8094 	dname = ddi_binding_name(pci->dip);
8095 	Xmits = (strcmp(dname, XMITS_BINDING_NAME) == 0)  ?  1 : 0;
8096 
8097 	/*
8098 	 * 1st pass allocates, 2nd pass populates.
8099 	 */
8100 	for (i = 0; i < 2; i++) {
8101 		cnt = ino = 0;
8102 
8103 		/*
8104 		 * PCI slot interrupts
8105 		 */
8106 		reg = pci->regs[iter].slot_intr_state_diag;
8107 		while (reg) {
8108 			/*
8109 			 * Xmits Interrupt Number Offset(ino) Assignments
8110 			 *   00-17 PCI Slot Interrupts
8111 			 *   18-1f Not Used
8112 			 */
8113 			if ((Xmits) && (ino > 0x17))
8114 				break;
8115 			if ((reg & COMMON_CLEAR_INTR_REG_MASK) !=
8116 			    COMMON_CLEAR_INTR_REG_IDLE) {
8117 				if (i) {
8118 					pci->regs[iter].intr_map_regs[cnt] =
8119 					    lddphysio(pci->csr_basepa +
8120 					    SCHIZO_IB_INTR_MAP_REG_OFFSET +
8121 					    ino * sizeof (reg));
8122 				}
8123 				++cnt;
8124 			}
8125 			++ino;
8126 			reg >>= 2;
8127 		}
8128 
8129 		/*
8130 		 * Xmits Interrupt Number Offset(ino) Assignments
8131 		 *   20-2f Not Used
8132 		 *   30-37 Internal interrupts
8133 		 *   38-3e Not Used
8134 		 */
8135 		ino = (Xmits)  ?  0x30 : 0x20;
8136 
8137 		/*
8138 		 * OBIO and internal schizo interrupts
8139 		 * Each PCI leaf has a set of mapping registers for all
8140 		 * possible interrupt sources except the NewLink interrupts.
8141 		 */
8142 		reg = pci->regs[iter].obio_intr_state_diag;
8143 		while (reg && ino <= 0x38) {
8144 			if ((reg & COMMON_CLEAR_INTR_REG_MASK) !=
8145 			    COMMON_CLEAR_INTR_REG_IDLE) {
8146 				if (i) {
8147 					pci->regs[iter].intr_map_regs[cnt] =
8148 					    lddphysio(pci->csr_basepa +
8149 					    SCHIZO_IB_INTR_MAP_REG_OFFSET +
8150 					    ino * sizeof (reg));
8151 				}
8152 				++cnt;
8153 			}
8154 			++ino;
8155 			reg >>= 2;
8156 		}
8157 
8158 		if (!i) {
8159 			pci->regs[iter].nmap_regs = cnt;
8160 			pci->regs[iter].intr_map_regs =
8161 			    kmem_zalloc(cnt * sizeof (reg), KM_SLEEP);
8162 		}
8163 	}
8164 }
8165 
8166 static void
8167 drmach_s1p_axq_update(drmach_s1p_axq_t *axq, drmach_sr_iter_t iter)
8168 {
8169 	uint32_t	reg;
8170 
8171 	if (axq->reg_basepa == 0x0UL)
8172 		return;
8173 
8174 	if (iter == DRMACH_POST_SUSPEND) {
8175 		axq->pcr_sel_save = ldphysio(axq->reg_basepa +
8176 		    AXQ_SLOT1_PERFCNT_SEL);
8177 		/*
8178 		 * Select l2_io_queue counter by writing L2_IO_Q mux
8179 		 * input to bits 0-6 of perf cntr select reg.
8180 		 */
8181 		reg = axq->pcr_sel_save;
8182 		reg &= ~AXQ_PIC_CLEAR_MASK;
8183 		reg |= L2_IO_Q;
8184 
8185 		stphysio(axq->reg_basepa + AXQ_SLOT1_PERFCNT_SEL, reg);
8186 	}
8187 
8188 	axq->pic_l2_io_q[iter] = ldphysio(axq->reg_basepa + AXQ_SLOT1_PERFCNT0);
8189 
8190 	if (iter == DRMACH_PRE_RESUME) {
8191 		stphysio(axq->reg_basepa + AXQ_SLOT1_PERFCNT_SEL,
8192 		    axq->pcr_sel_save);
8193 	}
8194 
8195 	DRMACH_PR("drmach_s1p_axq_update: axq #%d pic_l2_io_q[%d]=%d\n",
8196 	    ddi_get_instance(axq->dip), iter, axq->pic_l2_io_q[iter]);
8197 }
8198 
8199 static void
8200 drmach_s1p_schizo_update(drmach_s1p_schizo_t *schizo, drmach_sr_iter_t iter)
8201 {
8202 	int	i;
8203 	drmach_s1p_pci_t *pci;
8204 
8205 	if (schizo->csr_basepa == 0x0UL)
8206 		return;
8207 
8208 	schizo->regs[iter].csr =
8209 	    lddphysio(schizo->csr_basepa + SCHIZO_CB_CSR_OFFSET);
8210 	schizo->regs[iter].errctrl =
8211 	    lddphysio(schizo->csr_basepa + SCHIZO_CB_ERRCTRL_OFFSET);
8212 	schizo->regs[iter].errlog =
8213 	    lddphysio(schizo->csr_basepa + SCHIZO_CB_ERRLOG_OFFSET);
8214 
8215 	for (i = 0; i < DRMACH_SCHIZO_PCI_LEAF_MAX; i++) {
8216 		pci = &schizo->pci[i];
8217 		if (pci->dip != NULL && pci->csr_basepa != 0x0UL) {
8218 			pci->regs[iter].slot_intr_state_diag =
8219 			    lddphysio(pci->csr_basepa +
8220 			    COMMON_IB_SLOT_INTR_STATE_DIAG_REG);
8221 
8222 			pci->regs[iter].obio_intr_state_diag =
8223 			    lddphysio(pci->csr_basepa +
8224 			    COMMON_IB_OBIO_INTR_STATE_DIAG_REG);
8225 
8226 			drmach_s1p_intr_map_reg_save(pci, iter);
8227 		}
8228 	}
8229 }
8230 
8231 /*
8232  * Called post-suspend and pre-resume to snapshot the suspend state
8233  * of slot1 AXQs and Schizos.
8234  */
8235 static void
8236 drmach_slot1_pause_update(drmach_slot1_pause_t **slot1_paused,
8237     drmach_sr_iter_t iter)
8238 {
8239 	int	i, j;
8240 	drmach_slot1_pause_t *slot1;
8241 
8242 	for (i = 0; i < STARCAT_BDSET_MAX; i++) {
8243 		if ((slot1 = slot1_paused[i]) == NULL)
8244 			continue;
8245 
8246 		drmach_s1p_axq_update(&slot1->axq, iter);
8247 		for (j = 0; j < STARCAT_SLOT1_IO_MAX; j++)
8248 			drmach_s1p_schizo_update(&slot1->schizo[j], iter);
8249 	}
8250 }
8251 
8252 /*
8253  * Starcat hPCI Schizo devices.
8254  *
8255  * The name field is overloaded. NULL means the slot (interrupt concentrator
8256  * bus) is not used. intr_mask is a bit mask representing the 4 possible
8257  * interrupts per slot, on if valid (rio does not use interrupt lines 0, 1).
8258  */
8259 static struct {
8260 	char	*name;
8261 	uint8_t	intr_mask;
8262 } drmach_schz_slot_intr[][DRMACH_SCHIZO_PCI_LEAF_MAX] = {
8263 	/* Schizo 0 */		/* Schizo 1 */
8264 	{{"C3V0", 0xf},		{"C3V1", 0xf}},		/* slot 0 */
8265 	{{"C5V0", 0xf},		{"C5V1", 0xf}},		/* slot 1 */
8266 	{{"rio", 0xc},		{NULL, 0x0}},		/* slot 2 */
8267 	{{NULL, 0x0},		{NULL, 0x0}},		/* slot 3 */
8268 	{{"sbbc", 0xf},		{NULL, 0x0}},		/* slot 4 */
8269 	{{NULL, 0x0},		{NULL, 0x0}},		/* slot 5 */
8270 	{{NULL, 0x0},		{NULL, 0x0}},		/* slot 6 */
8271 	{{NULL, 0x0},		{NULL, 0x0}}		/* slot 7 */
8272 };
8273 
8274 /*
8275  * See Schizo Specification, Revision 51 (May 23, 2001), Section 22.4.4
8276  * "Interrupt Registers", Table 22-69, page 306.
8277  */
8278 static char *
8279 drmach_schz_internal_ino2str(int ino)
8280 {
8281 	int	intr;
8282 
8283 	ASSERT(ino >= 0x30 && ino <= 0x37);
8284 
8285 	intr = ino & 0x7;
8286 	switch (intr) {
8287 		case (0x0):	return ("Uncorrectable ECC error");
8288 		case (0x1):	return ("Correctable ECC error");
8289 		case (0x2):	return ("PCI Bus A Error");
8290 		case (0x3):	return ("PCI Bus B Error");
8291 		case (0x4):	return ("Safari Bus Error");
8292 		default:	return ("Reserved");
8293 	}
8294 }
8295 
8296 #define	DRMACH_INTR_MASK_SHIFT(ino)	((ino) << 1)
8297 
8298 static void
8299 drmach_s1p_decode_slot_intr(int exp, int unum, drmach_s1p_pci_t *pci,
8300     int ino, drmach_sr_iter_t iter)
8301 {
8302 	uint8_t		intr_mask;
8303 	char		*slot_devname;
8304 	char		namebuf[OBP_MAXDRVNAME];
8305 	int		slot, intr_line, slot_valid, intr_valid;
8306 
8307 	ASSERT(ino >= 0 && ino <= 0x1f);
8308 	ASSERT((pci->regs[iter].slot_intr_state_diag &
8309 	    (COMMON_CLEAR_INTR_REG_MASK << DRMACH_INTR_MASK_SHIFT(ino))) !=
8310 	    COMMON_CLEAR_INTR_REG_IDLE);
8311 
8312 	slot = (ino >> 2) & 0x7;
8313 	intr_line = ino & 0x3;
8314 
8315 	slot_devname = drmach_schz_slot_intr[slot][unum].name;
8316 	slot_valid = (slot_devname == NULL) ? 0 : 1;
8317 	if (!slot_valid) {
8318 		snprintf(namebuf, sizeof (namebuf), "slot %d (INVALID)", slot);
8319 		slot_devname = namebuf;
8320 	}
8321 
8322 	intr_mask = drmach_schz_slot_intr[slot][unum].intr_mask;
8323 	intr_valid = (1 << intr_line) & intr_mask;
8324 
8325 	prom_printf("IO%d/P%d PCI slot interrupt: ino=0x%x, source device=%s, "
8326 	    "interrupt line=%d%s\n", exp, unum, ino, slot_devname, intr_line,
8327 	    (slot_valid && !intr_valid) ? " (INVALID)" : "");
8328 }
8329 
8330 /*
8331  * Log interrupt source device info for all valid, pending interrupts
8332  * on each Schizo PCI leaf. Called if Schizo has logged a Safari bus
8333  * error in the error ctrl reg.
8334  */
8335 static void
8336 drmach_s1p_schizo_log_intr(drmach_s1p_schizo_t *schizo, int exp,
8337     int unum, drmach_sr_iter_t iter)
8338 {
8339 	uint64_t	reg;
8340 	int		i, n, ino;
8341 	drmach_s1p_pci_t *pci;
8342 
8343 	ASSERT(exp >= 0 && exp < STARCAT_BDSET_MAX);
8344 	ASSERT(unum < STARCAT_SLOT1_IO_MAX);
8345 
8346 	/*
8347 	 * Check the saved interrupt mapping registers. If interrupt is valid,
8348 	 * map the ino to the Schizo source device and check that the pci
8349 	 * slot and interrupt line are valid.
8350 	 */
8351 	for (i = 0; i < DRMACH_SCHIZO_PCI_LEAF_MAX; i++) {
8352 		pci = &schizo->pci[i];
8353 		for (n = 0; n < pci->regs[iter].nmap_regs; n++) {
8354 			reg = pci->regs[iter].intr_map_regs[n];
8355 			if (reg & COMMON_INTR_MAP_REG_VALID) {
8356 				ino = reg & COMMON_INTR_MAP_REG_INO;
8357 
8358 				if (ino <= 0x1f) {
8359 					/*
8360 					 * PCI slot interrupt
8361 					 */
8362 					drmach_s1p_decode_slot_intr(exp, unum,
8363 					    pci, ino, iter);
8364 				} else if (ino <= 0x2f) {
8365 					/*
8366 					 * OBIO interrupt
8367 					 */
8368 					prom_printf("IO%d/P%d OBIO interrupt: "
8369 					    "ino=0x%x\n", exp, unum, ino);
8370 				} else if (ino <= 0x37) {
8371 					/*
8372 					 * Internal interrupt
8373 					 */
8374 					prom_printf("IO%d/P%d Internal "
8375 					    "interrupt: ino=0x%x (%s)\n",
8376 					    exp, unum, ino,
8377 					    drmach_schz_internal_ino2str(ino));
8378 				} else {
8379 					/*
8380 					 * NewLink interrupt
8381 					 */
8382 					prom_printf("IO%d/P%d NewLink "
8383 					    "interrupt: ino=0x%x\n", exp,
8384 					    unum, ino);
8385 				}
8386 
8387 				DRMACH_PR("drmach_s1p_schizo_log_intr: "
8388 				    "exp=%d, schizo=%d, pci_leaf=%c, "
8389 				    "ino=0x%x, intr_map_reg=0x%lx\n",
8390 				    exp, unum, (i == 0) ? 'A' : 'B', ino, reg);
8391 			}
8392 		}
8393 	}
8394 }
8395 
8396 /*
8397  * See Schizo Specification, Revision 51 (May 23, 2001), Section 22.2.4
8398  * "Safari Error Control/Log Registers", Table 22-11, page 248.
8399  */
8400 #define	DRMACH_SCHIZO_SAFARI_UNMAPPED_ERR	(0x1ull << 4)
8401 
8402 /*
8403  * Check for possible error indicators prior to resuming the
8404  * AXQ driver, which will de-assert slot1 AXQ_DOMCTRL_PAUSE.
8405  */
8406 static void
8407 drmach_slot1_pause_verify(drmach_slot1_pause_t **slot1_paused,
8408     drmach_sr_iter_t iter)
8409 {
8410 	int	i, j;
8411 	int 	errflag = 0;
8412 	drmach_slot1_pause_t *slot1;
8413 
8414 	/*
8415 	 * Check for logged schizo bus error and pending interrupts.
8416 	 */
8417 	for (i = 0; i < STARCAT_BDSET_MAX; i++) {
8418 		if ((slot1 = slot1_paused[i]) == NULL)
8419 			continue;
8420 
8421 		for (j = 0; j < STARCAT_SLOT1_IO_MAX; j++) {
8422 			if (slot1->schizo[j].csr_basepa == 0x0UL)
8423 				continue;
8424 
8425 			if (slot1->schizo[j].regs[iter].errlog &
8426 			    DRMACH_SCHIZO_SAFARI_UNMAPPED_ERR) {
8427 				if (!errflag) {
8428 					prom_printf("DR WARNING: interrupt "
8429 					    "attempt detected during "
8430 					    "copy-rename (%s):\n",
8431 					    (iter == DRMACH_POST_SUSPEND) ?
8432 					    "post suspend" : "pre resume");
8433 					++errflag;
8434 				}
8435 				drmach_s1p_schizo_log_intr(&slot1->schizo[j],
8436 				    i, j, iter);
8437 			}
8438 		}
8439 	}
8440 
8441 	/*
8442 	 * Check for changes in axq l2_io_q performance counters (2nd pass only)
8443 	 */
8444 	if (iter == DRMACH_PRE_RESUME) {
8445 		for (i = 0; i < STARCAT_BDSET_MAX; i++) {
8446 			if ((slot1 = slot1_paused[i]) == NULL)
8447 				continue;
8448 
8449 			if (slot1->axq.pic_l2_io_q[DRMACH_POST_SUSPEND] !=
8450 			    slot1->axq.pic_l2_io_q[DRMACH_PRE_RESUME]) {
8451 				prom_printf("DR WARNING: IO transactions "
8452 				    "detected on IO%d during copy-rename: "
8453 				    "AXQ l2_io_q performance counter "
8454 				    "start=%d, end=%d\n", i,
8455 				    slot1->axq.pic_l2_io_q[DRMACH_POST_SUSPEND],
8456 				    slot1->axq.pic_l2_io_q[DRMACH_PRE_RESUME]);
8457 			}
8458 		}
8459 	}
8460 }
8461 
8462 struct drmach_sr_list {
8463 	dev_info_t		*dip;
8464 	struct drmach_sr_list	*next;
8465 	struct drmach_sr_list	*prev;
8466 };
8467 
8468 static struct drmach_sr_ordered {
8469 	char			*name;
8470 	struct drmach_sr_list	*ring;
8471 } drmach_sr_ordered[] = {
8472 	{ "iosram",			NULL },
8473 	{ "address-extender-queue",	NULL },
8474 	{ NULL,				NULL }, /* terminator -- required */
8475 };
8476 
8477 static void
8478 drmach_sr_insert(struct drmach_sr_list **lp, dev_info_t *dip)
8479 {
8480 	struct drmach_sr_list *np;
8481 
8482 	DRMACH_PR("drmach_sr_insert: adding dip %p\n", dip);
8483 
8484 	np = (struct drmach_sr_list *)kmem_alloc(
8485 		sizeof (struct drmach_sr_list), KM_SLEEP);
8486 
8487 	ndi_hold_devi(dip);
8488 	np->dip = dip;
8489 
8490 	if (*lp == NULL) {
8491 		/* establish list */
8492 		*lp = np->next = np->prev = np;
8493 	} else {
8494 		/* place new node behind head node on ring list */
8495 		np->prev = (*lp)->prev;
8496 		np->next = *lp;
8497 		np->prev->next = np;
8498 		np->next->prev = np;
8499 	}
8500 }
8501 
8502 static void
8503 drmach_sr_delete(struct drmach_sr_list **lp, dev_info_t *dip)
8504 {
8505 	DRMACH_PR("drmach_sr_delete: searching for dip %p\n", dip);
8506 
8507 	if (*lp) {
8508 		struct drmach_sr_list *xp;
8509 
8510 		/* start search with mostly likely node */
8511 		xp = (*lp)->prev;
8512 		do {
8513 			if (xp->dip == dip) {
8514 				xp->prev->next = xp->next;
8515 				xp->next->prev = xp->prev;
8516 
8517 				if (xp == *lp)
8518 					*lp = xp->next;
8519 				if (xp == *lp)
8520 					*lp = NULL;
8521 				xp->dip = NULL;
8522 				ndi_rele_devi(dip);
8523 				kmem_free(xp, sizeof (*xp));
8524 
8525 				DRMACH_PR("drmach_sr_delete:"
8526 					" disposed sr node for dip %p", dip);
8527 				return;
8528 			}
8529 
8530 			DRMACH_PR("drmach_sr_delete: still searching\n");
8531 
8532 			xp = xp->prev;
8533 		} while (xp != (*lp)->prev);
8534 	}
8535 
8536 	/* every dip should be found during resume */
8537 	DRMACH_PR("ERROR: drmach_sr_delete: can't find dip %p", dip);
8538 }
8539 
8540 int
8541 drmach_verify_sr(dev_info_t *dip, int sflag)
8542 {
8543 	int	rv;
8544 	int	len;
8545 	char    name[OBP_MAXDRVNAME];
8546 
8547 	if (drmach_slot1_pause_debug) {
8548 		if (sflag && drmach_slot1_pause_init) {
8549 			drmach_slot1_pause_free(drmach_slot1_paused);
8550 			drmach_slot1_pause_init = 0;
8551 		} else if (!sflag && !drmach_slot1_pause_init) {
8552 			/* schedule init for next suspend */
8553 			drmach_slot1_pause_init = 1;
8554 		}
8555 	}
8556 
8557 	rv = ddi_getproplen(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
8558 		"name", &len);
8559 	if (rv == DDI_PROP_SUCCESS) {
8560 		int		portid;
8561 		uint64_t	reg;
8562 		struct drmach_sr_ordered *op;
8563 
8564 		rv = ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
8565 			DDI_PROP_DONTPASS, "name", (caddr_t)name, &len);
8566 
8567 		if (rv != DDI_PROP_SUCCESS)
8568 			return (0);
8569 
8570 		if (drmach_slot1_pause_debug && sflag &&
8571 		    drmach_is_slot1_pause_axq(dip, name, &portid, &reg)) {
8572 			drmach_slot1_pause_add_axq(dip, name, portid, reg,
8573 			    drmach_slot1_paused);
8574 		}
8575 
8576 		for (op = drmach_sr_ordered; op->name; op++) {
8577 			if (strncmp(op->name, name, strlen(op->name)) == 0) {
8578 				if (sflag)
8579 					drmach_sr_insert(&op->ring, dip);
8580 				else
8581 					drmach_sr_delete(&op->ring, dip);
8582 				return (1);
8583 			}
8584 		}
8585 	}
8586 
8587 	return (0);
8588 }
8589 
8590 static void
8591 drmach_sr_dip(dev_info_t *dip, int suspend)
8592 {
8593 	int	 rv;
8594 	major_t	 maj;
8595 	char	*name, *name_addr, *aka;
8596 
8597 	if ((name = ddi_get_name(dip)) == NULL)
8598 		name = "<null name>";
8599 	else if ((maj = ddi_name_to_major(name)) != -1)
8600 		aka = ddi_major_to_name(maj);
8601 	else
8602 		aka = "<unknown>";
8603 
8604 	if ((name_addr = ddi_get_name_addr(dip)) == NULL)
8605 		name_addr = "<null>";
8606 
8607 	prom_printf("\t%s %s@%s (aka %s)\n",
8608 		suspend ? "suspending" : "resuming",
8609 		name, name_addr, aka);
8610 
8611 	if (suspend) {
8612 		rv = devi_detach(dip, DDI_SUSPEND);
8613 	} else {
8614 		rv = devi_attach(dip, DDI_RESUME);
8615 	}
8616 
8617 	if (rv != DDI_SUCCESS) {
8618 		prom_printf("\tFAILED to %s %s@%s\n",
8619 			suspend ? "suspend" : "resume",
8620 			name, name_addr);
8621 	}
8622 }
8623 
8624 void
8625 drmach_suspend_last()
8626 {
8627 	struct drmach_sr_ordered *op;
8628 
8629 	if (drmach_slot1_pause_debug)
8630 		drmach_slot1_pause_add_io(drmach_slot1_paused);
8631 
8632 	/*
8633 	 * The ordering array declares the strict sequence in which
8634 	 * the named drivers are to suspended. Each element in
8635 	 * the array may have a double-linked ring list of driver
8636 	 * instances (dip) in the order in which they were presented
8637 	 * to drmach_verify_sr. If present, walk the list in the
8638 	 * forward direction to suspend each instance.
8639 	 */
8640 	for (op = drmach_sr_ordered; op->name; op++) {
8641 		if (op->ring) {
8642 			struct drmach_sr_list *rp;
8643 
8644 			rp = op->ring;
8645 			do {
8646 				drmach_sr_dip(rp->dip, 1);
8647 				rp = rp->next;
8648 			} while (rp != op->ring);
8649 		}
8650 	}
8651 
8652 	if (drmach_slot1_pause_debug) {
8653 		drmach_slot1_pause_update(drmach_slot1_paused,
8654 		    DRMACH_POST_SUSPEND);
8655 		drmach_slot1_pause_verify(drmach_slot1_paused,
8656 		    DRMACH_POST_SUSPEND);
8657 	}
8658 }
8659 
8660 void
8661 drmach_resume_first()
8662 {
8663 	struct drmach_sr_ordered *op = drmach_sr_ordered +
8664 		(sizeof (drmach_sr_ordered) / sizeof (drmach_sr_ordered[0]));
8665 
8666 	if (drmach_slot1_pause_debug) {
8667 		drmach_slot1_pause_update(drmach_slot1_paused,
8668 		    DRMACH_PRE_RESUME);
8669 		drmach_slot1_pause_verify(drmach_slot1_paused,
8670 		    DRMACH_PRE_RESUME);
8671 	}
8672 
8673 	op -= 1;	/* point at terminating element */
8674 
8675 	/*
8676 	 * walk ordering array and rings backwards to resume dips
8677 	 * in reverse order in which they were suspended
8678 	 */
8679 	while (--op >= drmach_sr_ordered) {
8680 		if (op->ring) {
8681 			struct drmach_sr_list *rp;
8682 
8683 			rp = op->ring->prev;
8684 			do {
8685 				drmach_sr_dip(rp->dip, 0);
8686 				rp = rp->prev;
8687 			} while (rp != op->ring->prev);
8688 		}
8689 	}
8690 }
8691 
8692 /*
8693  * Log a DR sysevent.
8694  * Return value: 0 success, non-zero failure.
8695  */
8696 int
8697 drmach_log_sysevent(int board, char *hint, int flag, int verbose)
8698 {
8699 	sysevent_t			*ev;
8700 	sysevent_id_t			eid;
8701 	int				rv, km_flag;
8702 	sysevent_value_t		evnt_val;
8703 	sysevent_attr_list_t		*evnt_attr_list = NULL;
8704 	char				attach_pnt[MAXNAMELEN];
8705 
8706 	km_flag = (flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
8707 	attach_pnt[0] = '\0';
8708 	if (drmach_board_name(board, attach_pnt, MAXNAMELEN)) {
8709 		rv = -1;
8710 		goto logexit;
8711 	}
8712 	if (verbose)
8713 		DRMACH_PR("drmach_log_sysevent: %s %s, flag: %d, verbose: %d\n",
8714 			    attach_pnt, hint, flag, verbose);
8715 
8716 	if ((ev = sysevent_alloc(EC_DR, ESC_DR_AP_STATE_CHANGE,
8717 				    SUNW_KERN_PUB"dr", km_flag)) == NULL) {
8718 		rv = -2;
8719 		goto logexit;
8720 	}
8721 	evnt_val.value_type = SE_DATA_TYPE_STRING;
8722 	evnt_val.value.sv_string = attach_pnt;
8723 	if ((rv = sysevent_add_attr(&evnt_attr_list, DR_AP_ID,
8724 				    &evnt_val, km_flag)) != 0)
8725 		goto logexit;
8726 
8727 	evnt_val.value_type = SE_DATA_TYPE_STRING;
8728 	evnt_val.value.sv_string = hint;
8729 	if ((rv = sysevent_add_attr(&evnt_attr_list, DR_HINT,
8730 				    &evnt_val, km_flag)) != 0) {
8731 		sysevent_free_attr(evnt_attr_list);
8732 		goto logexit;
8733 	}
8734 
8735 	(void) sysevent_attach_attributes(ev, evnt_attr_list);
8736 
8737 	/*
8738 	 * Log the event but do not sleep waiting for its
8739 	 * delivery. This provides insulation from syseventd.
8740 	 */
8741 	rv = log_sysevent(ev, SE_NOSLEEP, &eid);
8742 
8743 logexit:
8744 	if (ev)
8745 		sysevent_free(ev);
8746 	if ((rv != 0) && verbose)
8747 		cmn_err(CE_WARN,
8748 			    "drmach_log_sysevent failed (rv %d) for %s  %s\n",
8749 			    rv, attach_pnt, hint);
8750 
8751 	return (rv);
8752 }
8753 
8754 /*
8755  * Initialize the mem_slice portion of a claim/unconfig/unclaim mailbox message.
8756  * Only the valid entries are modified, so the array should be zeroed out
8757  * initially.
8758  */
8759 static void
8760 drmach_msg_memslice_init(dr_memslice_t slice_arr[]) {
8761 	int	i;
8762 	char	c;
8763 
8764 	ASSERT(mutex_owned(&drmach_slice_table_lock));
8765 
8766 	for (i = 0; i < AXQ_MAX_EXP; i++) {
8767 		c = drmach_slice_table[i];
8768 
8769 		if (c & 0x20) {
8770 			slice_arr[i].valid = 1;
8771 			slice_arr[i].slice = c & 0x1f;
8772 		}
8773 	}
8774 }
8775 
8776 /*
8777  * Initialize the mem_regs portion of a claim/unconfig/unclaim mailbox message.
8778  * Only the valid entries are modified, so the array should be zeroed out
8779  * initially.
8780  */
8781 static void
8782 drmach_msg_memregs_init(dr_memregs_t regs_arr[]) {
8783 	int		rv, exp, mcnum, bank;
8784 	uint64_t	madr;
8785 	drmachid_t	id;
8786 	drmach_board_t	*bp;
8787 	drmach_mem_t	*mp;
8788 	dr_memregs_t	*memregs;
8789 
8790 	/* CONSTCOND */
8791 	ASSERT(DRMACH_MC_NBANKS == (PMBANKS_PER_PORT * LMBANKS_PER_PMBANK));
8792 
8793 	for (exp = 0; exp < 18; exp++) {
8794 		rv = drmach_array_get(drmach_boards,
8795 		    DRMACH_EXPSLOT2BNUM(exp, 0), &id);
8796 		ASSERT(rv == 0);	/* should never be out of bounds */
8797 		if (id == NULL) {
8798 			continue;
8799 		}
8800 
8801 		memregs = &regs_arr[exp];
8802 		bp = (drmach_board_t *)id;
8803 		for (mp = bp->mem; mp != NULL; mp = mp->next) {
8804 			mcnum = mp->dev.portid & 0x3;
8805 			for (bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
8806 				drmach_mem_read_madr(mp, bank, &madr);
8807 				if (madr & DRMACH_MC_VALID_MASK) {
8808 					DRMACH_PR("%d.%d.%d.madr = 0x%lx\n",
8809 						exp, mcnum, bank, madr);
8810 					memregs->madr[mcnum][bank].hi =
8811 					    DRMACH_U64_TO_MCREGHI(madr);
8812 					memregs->madr[mcnum][bank].lo =
8813 					    DRMACH_U64_TO_MCREGLO(madr);
8814 				}
8815 			}
8816 		}
8817 	}
8818 }
8819 
8820 /*
8821  * Do not allow physical address range modification if either board on this
8822  * expander has processors in NULL LPA mode (CBASE=CBND=NULL).
8823  *
8824  * A side effect of NULL proc LPA mode in Starcat SSM is that local reads will
8825  * install the cache line as owned/dirty as a result of the RTSR transaction.
8826  * See section 5.2.3 of the Safari spec.  All processors will read the bus sync
8827  * list before the rename after flushing local caches.  When copy-rename
8828  * requires changing the physical address ranges (i.e. smaller memory target),
8829  * the bus sync list contains physical addresses that will not exist after the
8830  * rename.  If these cache lines are owned due to a RTSR, a system error can
8831  * occur following the rename when these cache lines are evicted and a writeback
8832  * is attempted.
8833  *
8834  * Incoming parameter represents either the copy-rename source or a candidate
8835  * target memory board.  On Starcat, only slot0 boards may have memory.
8836  */
8837 int
8838 drmach_allow_memrange_modify(drmachid_t s0id)
8839 {
8840 	drmach_board_t	*s0bp, *s1bp;
8841 	drmachid_t	s1id;
8842 	int		rv;
8843 
8844 	s0bp = s0id;
8845 
8846 	ASSERT(DRMACH_IS_BOARD_ID(s0id));
8847 	ASSERT(DRMACH_BNUM2SLOT(s0bp->bnum) == 0);
8848 
8849 	if (s0bp->flags & DRMACH_NULL_PROC_LPA) {
8850 		/*
8851 		 * This is reason enough to fail the request, no need
8852 		 * to check the device list for cpus.
8853 		 */
8854 		return (0);
8855 	}
8856 
8857 	/*
8858 	 * Check for MCPU board on the same expander.
8859 	 *
8860 	 * The board flag DRMACH_NULL_PROC_LPA can be set for all board
8861 	 * types, as it is derived at from the POST gdcd board flag
8862 	 * L1SSFLG_THIS_L1_NULL_PROC_LPA, which can be set (and should be
8863 	 * ignored) for boards with no processors.  Since NULL proc LPA
8864 	 * applies only to processors, we walk the devices array to detect
8865 	 * MCPUs.
8866 	 */
8867 	rv = drmach_array_get(drmach_boards, s0bp->bnum + 1, &s1id);
8868 	s1bp = s1id;
8869 	if (rv == 0 && s1bp != NULL) {
8870 
8871 		ASSERT(DRMACH_IS_BOARD_ID(s1id));
8872 		ASSERT(DRMACH_BNUM2SLOT(s1bp->bnum) == 1);
8873 		ASSERT(DRMACH_BNUM2EXP(s0bp->bnum) ==
8874 		    DRMACH_BNUM2EXP(s1bp->bnum));
8875 
8876 		if ((s1bp->flags & DRMACH_NULL_PROC_LPA) &&
8877 		    s1bp->devices != NULL) {
8878 			int		d_idx;
8879 			drmachid_t	d_id;
8880 
8881 			rv = drmach_array_first(s1bp->devices, &d_idx, &d_id);
8882 			while (rv == 0) {
8883 				if (DRMACH_IS_CPU_ID(d_id)) {
8884 					/*
8885 					 * Fail MCPU in NULL LPA mode.
8886 					 */
8887 					return (0);
8888 				}
8889 
8890 				rv = drmach_array_next(s1bp->devices, &d_idx,
8891 				    &d_id);
8892 			}
8893 		}
8894 	}
8895 
8896 	return (1);
8897 }
8898