xref: /titanic_50/usr/src/uts/sun4u/starcat/io/drmach.c (revision eaca9bbd5f5d1e4e554da4c7108e8a03c8c33481)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/note.h>
30 #include <sys/debug.h>
31 #include <sys/types.h>
32 #include <sys/varargs.h>
33 #include <sys/errno.h>
34 #include <sys/cred.h>
35 #include <sys/dditypes.h>
36 #include <sys/devops.h>
37 #include <sys/modctl.h>
38 #include <sys/poll.h>
39 #include <sys/conf.h>
40 #include <sys/ddi.h>
41 #include <sys/sunddi.h>
42 #include <sys/sunndi.h>
43 #include <sys/ndi_impldefs.h>
44 #include <sys/stat.h>
45 #include <sys/kmem.h>
46 #include <sys/vmem.h>
47 #include <sys/disp.h>
48 #include <sys/processor.h>
49 #include <sys/cheetahregs.h>
50 #include <sys/cpuvar.h>
51 #include <sys/mem_config.h>
52 #include <sys/ddi_impldefs.h>
53 #include <sys/systm.h>
54 #include <sys/machsystm.h>
55 #include <sys/autoconf.h>
56 #include <sys/cmn_err.h>
57 #include <sys/sysmacros.h>
58 #include <sys/x_call.h>
59 #include <sys/promif.h>
60 #include <sys/prom_plat.h>
61 #include <sys/membar.h>
62 #include <vm/seg_kmem.h>
63 #include <sys/mem_cage.h>
64 #include <sys/stack.h>
65 #include <sys/archsystm.h>
66 #include <vm/hat_sfmmu.h>
67 #include <sys/pte.h>
68 #include <sys/mmu.h>
69 #include <sys/cpu_module.h>
70 #include <sys/obpdefs.h>
71 #include <sys/mboxsc.h>
72 #include <sys/plat_ecc_dimm.h>
73 
74 #include <sys/hotplug/hpctrl.h>		/* XXX should be included by schpc.h */
75 #include <sys/schpc.h>
76 #include <sys/pci.h>
77 
78 #include <sys/starcat.h>
79 #include <sys/cpu_sgnblk_defs.h>
80 #include <sys/drmach.h>
81 #include <sys/dr_util.h>
82 #include <sys/dr_mbx.h>
83 #include <sys/sc_gptwocfg.h>
84 #include <sys/iosramreg.h>
85 #include <sys/iosramio.h>
86 #include <sys/iosramvar.h>
87 #include <sys/axq.h>
88 #include <sys/post/scat_dcd.h>
89 #include <sys/kobj.h>
90 #include <sys/taskq.h>
91 #include <sys/cmp.h>
92 #include <sys/sbd_ioctl.h>
93 
94 #include <sys/sysevent.h>
95 #include <sys/sysevent/dr.h>
96 #include <sys/sysevent/eventdefs.h>
97 
98 #include <sys/pci/pcisch.h>
99 #include <sys/pci/pci_regs.h>
100 
101 #include <sys/ontrap.h>
102 
103 /* defined in ../ml/drmach.il.cpp */
104 extern void		bcopy32_il(uint64_t, uint64_t);
105 extern void		flush_ecache_il(int64_t physaddr, int size, int linesz);
106 extern void		flush_dcache_il(void);
107 extern void		flush_icache_il(void);
108 extern void		flush_pcache_il(void);
109 
110 /* defined in ../ml/drmach_asm.s */
111 extern uint64_t		lddmcdecode(uint64_t physaddr);
112 extern uint64_t		lddsafconfig(void);
113 
114 /* XXX here until provided by sys/dman.h */
115 extern int man_dr_attach(dev_info_t *);
116 extern int man_dr_detach(dev_info_t *);
117 
118 #define	DRMACH_BNUM2EXP(bnum)		((bnum) >> 1)
119 #define	DRMACH_BNUM2SLOT(bnum)		((bnum) & 1)
120 #define	DRMACH_EXPSLOT2BNUM(exp, slot)	(((exp) << 1) + (slot))
121 
122 #define	DRMACH_SLICE_MASK		0x1Full
123 #define	DRMACH_SLICE_TO_PA(s)		(((s) & DRMACH_SLICE_MASK) << 37)
124 #define	DRMACH_PA_TO_SLICE(a)		(((a) >> 37) & DRMACH_SLICE_MASK)
125 
126 /*
127  * DRMACH_MEM_SLICE_SIZE and DRMACH_MEM_USABLE_SLICE_SIZE define the
128  * available address space and the usable address space for every slice.
129  * There must be a distinction between the available and usable do to a
130  * restriction imposed by CDC memory size.
131  */
132 
133 #define	DRMACH_MEM_SLICE_SIZE		(1ull << 37)	/* 128GB */
134 #define	DRMACH_MEM_USABLE_SLICE_SIZE	(1ull << 36)	/* 64GB */
135 
136 #define	DRMACH_MC_NBANKS		4
137 
138 #define	DRMACH_MC_ADDR(mp, bank)	((mp)->madr_pa + 16 + 8 * (bank))
139 #define	DRMACH_MC_ASI_ADDR(mp, bank)	(DRMACH_MC_ADDR(mp, bank) & 0xFF)
140 
141 #define	DRMACH_EMU_ACT_STATUS_OFFSET	0x50
142 #define	DRMACH_EMU_ACT_STATUS_ADDR(mp)	\
143 	((mp)->madr_pa + DRMACH_EMU_ACT_STATUS_OFFSET)
144 
145 /*
146  * The Cheetah's Safari Configuration Register and the Schizo's
147  * Safari Control/Status Register place the LPA base and bound fields in
148  * same bit locations with in their register word. This source code takes
149  * advantage of this by defining only one set of LPA encoding/decoding macros
150  * which are shared by various Cheetah and Schizo drmach routines.
151  */
152 #define	DRMACH_LPA_BASE_MASK		(0x3Full	<< 3)
153 #define	DRMACH_LPA_BND_MASK		(0x3Full	<< 9)
154 
155 #define	DRMACH_LPA_BASE_TO_PA(scr)	(((scr) & DRMACH_LPA_BASE_MASK) << 34)
156 #define	DRMACH_LPA_BND_TO_PA(scr)	(((scr) & DRMACH_LPA_BND_MASK) << 28)
157 #define	DRMACH_PA_TO_LPA_BASE(pa)	(((pa) >> 34) & DRMACH_LPA_BASE_MASK)
158 #define	DRMACH_PA_TO_LPA_BND(pa)	(((pa) >> 28) & DRMACH_LPA_BND_MASK)
159 
160 #define	DRMACH_L1_SET_LPA(b)		\
161 	(((b)->flags & DRMACH_NULL_PROC_LPA) == 0)
162 
163 #define	DRMACH_CPU_SRAM_ADDR    	0x7fff0900000ull
164 #define	DRMACH_CPU_SRAM_SIZE    	0x20000ull
165 
166 /*
167  * Name properties for frequently accessed device nodes.
168  */
169 #define	DRMACH_CPU_NAMEPROP		"cpu"
170 #define	DRMACH_CMP_NAMEPROP		"cmp"
171 #define	DRMACH_AXQ_NAMEPROP		"address-extender-queue"
172 #define	DRMACH_PCI_NAMEPROP		"pci"
173 
174 /*
175  * Maximum value of processor Safari Timeout Log (TOL) field of
176  * Safari Config reg (7 secs).
177  */
178 #define	DRMACH_SAF_TOL_MAX		7 * 1000000
179 
180 /*
181  * drmach_board_t flag definitions
182  */
183 #define	DRMACH_NULL_PROC_LPA		0x1
184 
185 typedef struct {
186 	uint32_t	reg_addr_hi;
187 	uint32_t	reg_addr_lo;
188 	uint32_t	reg_size_hi;
189 	uint32_t	reg_size_lo;
190 } drmach_reg_t;
191 
192 typedef struct {
193 	struct drmach_node	*node;
194 	void			*data;
195 } drmach_node_walk_args_t;
196 
197 typedef struct drmach_node {
198 	void		*here;
199 
200 	pnode_t		 (*get_dnode)(struct drmach_node *node);
201 	int		 (*walk)(struct drmach_node *node, void *data,
202 				int (*cb)(drmach_node_walk_args_t *args));
203 	dev_info_t	*(*n_getdip)(struct drmach_node *node);
204 	int		 (*n_getproplen)(struct drmach_node *node, char *name,
205 				int *len);
206 	int		 (*n_getprop)(struct drmach_node *node, char *name,
207 				void *buf, int len);
208 	int		 (*get_parent)(struct drmach_node *node,
209 				struct drmach_node *pnode);
210 } drmach_node_t;
211 
212 typedef struct {
213 	int		 min_index;
214 	int		 max_index;
215 	int		 arr_sz;
216 	drmachid_t	*arr;
217 } drmach_array_t;
218 
219 typedef struct {
220 	void		*isa;
221 
222 	void		 (*dispose)(drmachid_t);
223 	sbd_error_t	*(*release)(drmachid_t);
224 	sbd_error_t	*(*status)(drmachid_t, drmach_status_t *);
225 
226 	char		 name[MAXNAMELEN];
227 } drmach_common_t;
228 
229 struct drmach_board;
230 typedef struct drmach_board drmach_board_t;
231 
232 typedef struct {
233 	drmach_common_t	 cm;
234 	const char	*type;
235 	drmach_board_t	*bp;
236 	drmach_node_t	*node;
237 	int		 portid;
238 	int		 unum;
239 	int		 busy;
240 	int		 powered;
241 } drmach_device_t;
242 
243 typedef struct drmach_cpu {
244 	drmach_device_t	 dev;
245 	uint64_t	 scr_pa;
246 	processorid_t	 cpuid;
247 	int		 coreid;
248 } drmach_cpu_t;
249 
250 typedef struct drmach_mem {
251 	drmach_device_t	 dev;
252 	struct drmach_mem *next;
253 	uint64_t	 nbytes;
254 	uint64_t	 madr_pa;
255 } drmach_mem_t;
256 
257 typedef struct drmach_io {
258 	drmach_device_t	 dev;
259 	uint64_t	 scsr_pa; /* PA of Schizo Control/Status Register */
260 } drmach_io_t;
261 
262 struct drmach_board {
263 	drmach_common_t	 cm;
264 	int		 bnum;
265 	int		 assigned;
266 	int		 powered;
267 	int		 connected;
268 	int		 empty;
269 	int		 cond;
270 	uint_t		 cpu_impl;
271 	uint_t		 flags;
272 	drmach_node_t	*tree;
273 	drmach_array_t	*devices;
274 	drmach_mem_t	*mem;
275 	uint64_t	 stardrb_offset;
276 	char		 type[BD_TYPELEN];
277 };
278 
279 typedef struct {
280 	int		 flags;
281 	drmach_device_t	*dp;
282 	sbd_error_t	*err;
283 	dev_info_t	*fdip;
284 } drmach_config_args_t;
285 
286 typedef struct {
287 	drmach_board_t	*obj;
288 	int		 ndevs;
289 	void		*a;
290 	sbd_error_t	*(*found)(void *a, const char *, int, drmachid_t);
291 	sbd_error_t	*err;
292 } drmach_board_cb_data_t;
293 
294 typedef struct drmach_casmslot {
295 	int	valid;
296 	int	slice;
297 } drmach_casmslot_t;
298 
299 typedef enum {
300 	DRMACH_CR_OK,
301 	DRMACH_CR_MC_IDLE_ERR,
302 	DRMACH_CR_IOPAUSE_ERR,
303 	DRMACH_CR_ONTRAP_ERR
304 } drmach_cr_err_t;
305 
306 typedef struct {
307 	void		*isa;
308 	caddr_t		 data;
309 	drmach_mem_t	*s_mp;
310 	drmach_mem_t	*t_mp;
311 	struct memlist	*c_ml;
312 	uint64_t	 s_copybasepa;
313 	uint64_t	 t_copybasepa;
314 	drmach_cr_err_t	 ecode;
315 	void		*earg;
316 } drmach_copy_rename_t;
317 
318 /*
319  * The following global is read as a boolean value, non-zero is true.
320  * If zero, DR copy-rename and cpu poweron will not set the processor
321  * LPA settings (CBASE, CBND of Safari config register) to correspond
322  * to the current memory slice map. LPAs of processors present at boot
323  * will remain as programmed by POST. LPAs of processors on boards added
324  * by DR will remain NULL, as programmed by POST. This can be used to
325  * to override the per-board L1SSFLG_THIS_L1_NULL_PROC_LPA flag set by
326  * POST in the LDCD (and copied to the GDCD by SMS).
327  *
328  * drmach_reprogram_lpa and L1SSFLG_THIS_L1_NULL_PROC_LPA do not apply
329  * to Schizo device LPAs. These are always set by DR.
330  */
331 static int		 drmach_reprogram_lpa = 1;
332 
333 /*
334  * There is a known HW bug where a Jaguar CPU in Safari port 0 (SBX/P0)
335  * can fail to receive an XIR. To workaround this issue until a hardware
336  * fix is implemented, we will exclude the selection of these CPUs.
337  * Setting this to 0 will allow their selection again.
338  */
339 static int		 drmach_iocage_exclude_jaguar_port_zero = 1;
340 
341 static int		 drmach_initialized;
342 static drmach_array_t	*drmach_boards;
343 
344 static int		 drmach_cpu_delay = 1000;
345 static int		 drmach_cpu_ntries = 50000;
346 
347 static uint32_t		 drmach_slice_table[AXQ_MAX_EXP];
348 static kmutex_t		 drmach_slice_table_lock;
349 
350 tte_t			 drmach_cpu_sram_tte[NCPU];
351 caddr_t			 drmach_cpu_sram_va;
352 
353 /*
354  * Setting to non-zero will enable delay before all disconnect ops.
355  */
356 static int		 drmach_unclaim_delay_all;
357 /*
358  * Default delay is slightly greater than the max processor Safari timeout.
359  * This delay is intended to ensure the outstanding Safari activity has
360  * retired on this board prior to a board disconnect.
361  */
362 static clock_t		 drmach_unclaim_usec_delay = DRMACH_SAF_TOL_MAX + 10;
363 
364 /*
365  * By default, DR of non-Panther procs is not allowed into a Panther
366  * domain with large page sizes enabled.  Setting this to 0 will remove
367  * the restriction.
368  */
369 static int		 drmach_large_page_restriction = 1;
370 
371 /*
372  * Used to pass updated LPA values to procs.
373  * Protocol is to clear the array before use.
374  */
375 volatile uchar_t	*drmach_xt_mb;
376 volatile uint64_t	 drmach_xt_ready;
377 static kmutex_t		 drmach_xt_mb_lock;
378 static int		 drmach_xt_mb_size;
379 
380 uint64_t		 drmach_bus_sync_list[18 * 4 * 4 + 1];
381 static kmutex_t		 drmach_bus_sync_lock;
382 
383 static sbd_error_t	*drmach_device_new(drmach_node_t *,
384 				drmach_board_t *, int, drmachid_t *);
385 static sbd_error_t	*drmach_cpu_new(drmach_device_t *, drmachid_t *);
386 static sbd_error_t	*drmach_mem_new(drmach_device_t *, drmachid_t *);
387 static sbd_error_t	*drmach_pci_new(drmach_device_t *, drmachid_t *);
388 static sbd_error_t	*drmach_io_new(drmach_device_t *, drmachid_t *);
389 
390 static dev_info_t	*drmach_node_ddi_get_dip(drmach_node_t *np);
391 static int		 drmach_node_ddi_get_prop(drmach_node_t *np,
392 				char *name, void *buf, int len);
393 static int		 drmach_node_ddi_get_proplen(drmach_node_t *np,
394 				char *name, int *len);
395 
396 static dev_info_t	*drmach_node_obp_get_dip(drmach_node_t *np);
397 static int		 drmach_node_obp_get_prop(drmach_node_t *np,
398 				char *name, void *buf, int len);
399 static int		 drmach_node_obp_get_proplen(drmach_node_t *np,
400 				char *name, int *len);
401 
402 static sbd_error_t	*drmach_mbox_trans(uint8_t msgtype, int bnum,
403 				caddr_t obufp, int olen,
404 				caddr_t ibufp, int ilen);
405 
406 sbd_error_t		*drmach_io_post_attach(drmachid_t id);
407 sbd_error_t		*drmach_io_post_release(drmachid_t id);
408 
409 static sbd_error_t	*drmach_iocage_setup(dr_testboard_req_t *,
410 				drmach_device_t **dpp, cpu_flag_t *oflags);
411 static int		drmach_iocage_cpu_return(drmach_device_t *dp,
412 				cpu_flag_t oflags);
413 static sbd_error_t	*drmach_iocage_mem_return(dr_testboard_reply_t *tbr);
414 void			drmach_iocage_mem_scrub(uint64_t nbytes);
415 
416 static sbd_error_t 	*drmach_i_status(drmachid_t id, drmach_status_t *stat);
417 
418 static void		drmach_slot1_lpa_set(drmach_board_t *bp);
419 
420 static void		drmach_cpu_read(uint64_t arg1, uint64_t arg2);
421 static int		drmach_cpu_read_scr(drmach_cpu_t *cp, uint64_t *scr);
422 
423 static void		 drmach_bus_sync_list_update(void);
424 static void		 drmach_slice_table_update(drmach_board_t *, int);
425 static int		 drmach_portid2bnum(int);
426 
427 static void		drmach_msg_memslice_init(dr_memslice_t slice_arr[]);
428 static void		drmach_msg_memregs_init(dr_memregs_t regs_arr[]);
429 
430 static int		drmach_panther_boards(void);
431 
432 static int		drmach_name2type_idx(char *);
433 
434 #ifdef DEBUG
435 
436 #define	DRMACH_PR		if (drmach_debug) printf
437 #define	DRMACH_MEMLIST_DUMP	if (drmach_debug) MEMLIST_DUMP
438 int drmach_debug = 0;		 /* set to non-zero to enable debug messages */
439 #else
440 
441 #define	DRMACH_PR		_NOTE(CONSTANTCONDITION) if (0) printf
442 #define	DRMACH_MEMLIST_DUMP	_NOTE(CONSTANTCONDITION) if (0) MEMLIST_DUMP
443 #endif /* DEBUG */
444 
445 #define	DRMACH_OBJ(id)		((drmach_common_t *)id)
446 
447 #define	DRMACH_IS_BOARD_ID(id)	\
448 	((id != 0) &&		\
449 	(DRMACH_OBJ(id)->isa == (void *)drmach_board_new))
450 
451 #define	DRMACH_IS_CPU_ID(id)	\
452 	((id != 0) &&		\
453 	(DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new))
454 
455 #define	DRMACH_IS_MEM_ID(id)	\
456 	((id != 0) &&		\
457 	(DRMACH_OBJ(id)->isa == (void *)drmach_mem_new))
458 
459 #define	DRMACH_IS_IO_ID(id)	\
460 	((id != 0) &&		\
461 	(DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
462 
463 #define	DRMACH_IS_DEVICE_ID(id)					\
464 	((id != 0) &&						\
465 	(DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new ||	\
466 	    DRMACH_OBJ(id)->isa == (void *)drmach_mem_new ||	\
467 	    DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
468 
469 #define	DRMACH_IS_ID(id)					\
470 	((id != 0) &&						\
471 	(DRMACH_OBJ(id)->isa == (void *)drmach_board_new ||	\
472 	    DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new ||	\
473 	    DRMACH_OBJ(id)->isa == (void *)drmach_mem_new ||	\
474 	    DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
475 
476 #define	DRMACH_INTERNAL_ERROR() \
477 	drerr_new(1, ESTC_INTERNAL, drmach_ie_fmt, __LINE__)
478 static char		*drmach_ie_fmt = "drmach.c %d";
479 
480 static struct {
481 	const char	 *name;
482 	const char	 *type;
483 	sbd_error_t	 *(*new)(drmach_device_t *, drmachid_t *);
484 } drmach_name2type[] = {
485 	{"cmp",			    DRMACH_DEVTYPE_CMP,    NULL },
486 	{"cpu",			    DRMACH_DEVTYPE_CPU,    drmach_cpu_new },
487 	{"SUNW,UltraSPARC-III",	    DRMACH_DEVTYPE_CPU,    drmach_cpu_new },
488 	{"SUNW,UltraSPARC-III+",    DRMACH_DEVTYPE_CPU,    drmach_cpu_new },
489 	{"memory-controller",	    DRMACH_DEVTYPE_MEM,    drmach_mem_new },
490 	{"pci",			    DRMACH_DEVTYPE_PCI,    drmach_pci_new },
491 	{"SUNW,wci",		    DRMACH_DEVTYPE_WCI,    drmach_io_new  },
492 };
493 
494 /*
495  * drmach autoconfiguration data structures and interfaces
496  */
497 
498 extern struct mod_ops mod_miscops;
499 
500 static struct modlmisc modlmisc = {
501 	&mod_miscops,
502 	"Sun Fire 15000 DR %I%"
503 };
504 
505 static struct modlinkage modlinkage = {
506 	MODREV_1,
507 	(void *)&modlmisc,
508 	NULL
509 };
510 
511 /*
512  * drmach_boards_rwlock is used to synchronize read/write
513  * access to drmach_boards array between status and board lookup
514  * as READERS, and assign, and unassign threads as WRITERS.
515  */
516 static krwlock_t	drmach_boards_rwlock;
517 
518 static kmutex_t		drmach_i_lock;
519 static kmutex_t		drmach_iocage_lock;
520 static kcondvar_t 	drmach_iocage_cv;
521 static int		drmach_iocage_is_busy = 0;
522 uint64_t		drmach_iocage_paddr;
523 static caddr_t		drmach_iocage_vaddr;
524 static int		drmach_iocage_size = 0;
525 static int		drmach_is_cheetah = -1;
526 
527 int
528 _init(void)
529 {
530 	int	err;
531 
532 	mutex_init(&drmach_i_lock, NULL, MUTEX_DRIVER, NULL);
533 	rw_init(&drmach_boards_rwlock, NULL, RW_DEFAULT, NULL);
534 	drmach_xt_mb_size = NCPU * sizeof (uchar_t);
535 	drmach_xt_mb = (uchar_t *)vmem_alloc(static_alloc_arena,
536 	    drmach_xt_mb_size, VM_SLEEP);
537 	bzero((void *)drmach_xt_mb, drmach_xt_mb_size);
538 	if ((err = mod_install(&modlinkage)) != 0) {
539 		mutex_destroy(&drmach_i_lock);
540 		rw_destroy(&drmach_boards_rwlock);
541 		vmem_free(static_alloc_arena, (void *)drmach_xt_mb,
542 		    drmach_xt_mb_size);
543 	}
544 
545 	return (err);
546 }
547 
548 int
549 _fini(void)
550 {
551 	static void	drmach_fini(void);
552 	int		err;
553 
554 	if ((err = mod_remove(&modlinkage)) == 0)
555 		drmach_fini();
556 
557 	return (err);
558 }
559 
560 int
561 _info(struct modinfo *modinfop)
562 {
563 	return (mod_info(&modlinkage, modinfop));
564 }
565 
566 /*
567  * drmach_node_* routines serve the purpose of separating the
568  * rest of the code from the device tree and OBP.  This is necessary
569  * because of In-Kernel-Probing.  Devices probed after stod, are probed
570  * by the in-kernel-prober, not OBP.  These devices, therefore, do not
571  * have dnode ids.
572  */
573 
574 static int
575 drmach_node_obp_get_parent(drmach_node_t *np, drmach_node_t *pp)
576 {
577 	pnode_t		nodeid;
578 	static char	*fn = "drmach_node_obp_get_parent";
579 
580 	nodeid = np->get_dnode(np);
581 	if (nodeid == OBP_NONODE) {
582 		cmn_err(CE_WARN, "%s: invalid dnode", fn);
583 		return (-1);
584 	}
585 
586 	bcopy(np, pp, sizeof (drmach_node_t));
587 
588 	pp->here = (void *)(uintptr_t)prom_parentnode(nodeid);
589 	if (pp->here == OBP_NONODE) {
590 		cmn_err(CE_WARN, "%s: invalid parent dnode", fn);
591 		return (-1);
592 	}
593 
594 	return (0);
595 }
596 
597 static pnode_t
598 drmach_node_obp_get_dnode(drmach_node_t *np)
599 {
600 	return ((pnode_t)(uintptr_t)np->here);
601 }
602 
603 typedef struct {
604 	drmach_node_walk_args_t	*nwargs;
605 	int 			(*cb)(drmach_node_walk_args_t *args);
606 	int			err;
607 } drmach_node_ddi_walk_args_t;
608 
609 int
610 drmach_node_ddi_walk_cb(dev_info_t *dip, void *arg)
611 {
612 	drmach_node_ddi_walk_args_t	*nargs;
613 
614 	nargs = (drmach_node_ddi_walk_args_t *)arg;
615 
616 	/*
617 	 * dip doesn't have to be held here as we are called
618 	 * from ddi_walk_devs() which holds the dip.
619 	 */
620 	nargs->nwargs->node->here = (void *)dip;
621 
622 	nargs->err = nargs->cb(nargs->nwargs);
623 
624 	/*
625 	 * Set "here" to NULL so that unheld dip is not accessible
626 	 * outside ddi_walk_devs()
627 	 */
628 	nargs->nwargs->node->here = NULL;
629 
630 	if (nargs->err)
631 		return (DDI_WALK_TERMINATE);
632 	else
633 		return (DDI_WALK_CONTINUE);
634 }
635 
636 static int
637 drmach_node_ddi_walk(drmach_node_t *np, void *data,
638 		int (*cb)(drmach_node_walk_args_t *args))
639 {
640 	drmach_node_walk_args_t		args;
641 	drmach_node_ddi_walk_args_t	nargs;
642 
643 	/* initialized args structure for callback */
644 	args.node = np;
645 	args.data = data;
646 
647 	nargs.nwargs = &args;
648 	nargs.cb = cb;
649 	nargs.err = 0;
650 
651 	/*
652 	 * Root node doesn't have to be held in any way.
653 	 */
654 	ddi_walk_devs(ddi_root_node(), drmach_node_ddi_walk_cb,
655 		(void *)&nargs);
656 
657 	return (nargs.err);
658 }
659 
660 static int
661 drmach_node_obp_walk(drmach_node_t *np, void *data,
662 		int (*cb)(drmach_node_walk_args_t *args))
663 {
664 	pnode_t			nodeid;
665 	int			rv;
666 	drmach_node_walk_args_t	args;
667 
668 	/* initialized args structure for callback */
669 	args.node = np;
670 	args.data = data;
671 
672 	nodeid = prom_childnode(prom_rootnode());
673 
674 	/* save our new position within the tree */
675 	np->here = (void *)(uintptr_t)nodeid;
676 
677 	rv = 0;
678 	while (nodeid != OBP_NONODE) {
679 
680 		pnode_t child;
681 
682 		rv = (*cb)(&args);
683 		if (rv)
684 			break;
685 
686 		child = prom_childnode(nodeid);
687 		np->here = (void *)(uintptr_t)child;
688 
689 		while (child != OBP_NONODE) {
690 			rv = (*cb)(&args);
691 			if (rv)
692 				break;
693 
694 			child = prom_nextnode(child);
695 			np->here = (void *)(uintptr_t)child;
696 		}
697 
698 		nodeid = prom_nextnode(nodeid);
699 
700 		/* save our new position within the tree */
701 		np->here = (void *)(uintptr_t)nodeid;
702 	}
703 
704 	return (rv);
705 }
706 
707 static int
708 drmach_node_ddi_get_parent(drmach_node_t *np, drmach_node_t *pp)
709 {
710 	dev_info_t	*ndip;
711 	static char	*fn = "drmach_node_ddi_get_parent";
712 
713 	ndip = np->n_getdip(np);
714 	if (ndip == NULL) {
715 		cmn_err(CE_WARN, "%s: NULL dip", fn);
716 		return (-1);
717 	}
718 
719 	bcopy(np, pp, sizeof (drmach_node_t));
720 
721 	pp->here = (void *)ddi_get_parent(ndip);
722 	if (pp->here == NULL) {
723 		cmn_err(CE_WARN, "%s: NULL parent dip", fn);
724 		return (-1);
725 	}
726 
727 	return (0);
728 }
729 
730 /*ARGSUSED*/
731 static pnode_t
732 drmach_node_ddi_get_dnode(drmach_node_t *np)
733 {
734 	return ((pnode_t)NULL);
735 }
736 
737 static drmach_node_t *
738 drmach_node_new(void)
739 {
740 	drmach_node_t *np;
741 
742 	np = kmem_zalloc(sizeof (drmach_node_t), KM_SLEEP);
743 
744 	if (drmach_initialized) {
745 		np->get_dnode = drmach_node_ddi_get_dnode;
746 		np->walk = drmach_node_ddi_walk;
747 		np->n_getdip = drmach_node_ddi_get_dip;
748 		np->n_getproplen = drmach_node_ddi_get_proplen;
749 		np->n_getprop = drmach_node_ddi_get_prop;
750 		np->get_parent = drmach_node_ddi_get_parent;
751 	} else {
752 		np->get_dnode = drmach_node_obp_get_dnode;
753 		np->walk = drmach_node_obp_walk;
754 		np->n_getdip = drmach_node_obp_get_dip;
755 		np->n_getproplen = drmach_node_obp_get_proplen;
756 		np->n_getprop = drmach_node_obp_get_prop;
757 		np->get_parent = drmach_node_obp_get_parent;
758 	}
759 
760 	return (np);
761 }
762 
763 static void
764 drmach_node_dispose(drmach_node_t *np)
765 {
766 	kmem_free(np, sizeof (*np));
767 }
768 
769 /*
770  * Check if a CPU node is part of a CMP.
771  */
772 static int
773 drmach_is_cmp_child(dev_info_t *dip)
774 {
775 	dev_info_t *pdip;
776 
777 	if (strcmp(ddi_node_name(dip), DRMACH_CPU_NAMEPROP) != 0) {
778 		return (0);
779 	}
780 
781 	pdip = ddi_get_parent(dip);
782 
783 	ASSERT(pdip);
784 
785 	if (strcmp(ddi_node_name(pdip), DRMACH_CMP_NAMEPROP) == 0) {
786 		return (1);
787 	}
788 
789 	return (0);
790 }
791 
792 static dev_info_t *
793 drmach_node_obp_get_dip(drmach_node_t *np)
794 {
795 	pnode_t		nodeid;
796 	dev_info_t	*dip;
797 
798 	nodeid = np->get_dnode(np);
799 	if (nodeid == OBP_NONODE)
800 		return (NULL);
801 
802 	dip = e_ddi_nodeid_to_dip(nodeid);
803 	if (dip) {
804 		/*
805 		 * The branch rooted at dip will have been previously
806 		 * held, or it will be the child of a CMP. In either
807 		 * case, the hold acquired in e_ddi_nodeid_to_dip()
808 		 * is not needed.
809 		 */
810 		ddi_release_devi(dip);
811 		ASSERT(drmach_is_cmp_child(dip) || e_ddi_branch_held(dip));
812 	}
813 
814 	return (dip);
815 }
816 
817 static dev_info_t *
818 drmach_node_ddi_get_dip(drmach_node_t *np)
819 {
820 	return ((dev_info_t *)np->here);
821 }
822 
823 static int
824 drmach_node_walk(drmach_node_t *np, void *param,
825 		int (*cb)(drmach_node_walk_args_t *args))
826 {
827 	return (np->walk(np, param, cb));
828 }
829 
830 static int
831 drmach_node_ddi_get_prop(drmach_node_t *np, char *name, void *buf, int len)
832 {
833 	int		rv = 0;
834 	dev_info_t	*ndip;
835 	static char	*fn = "drmach_node_ddi_get_prop";
836 
837 	ndip = np->n_getdip(np);
838 	if (ndip == NULL) {
839 		cmn_err(CE_WARN, "%s: NULL dip", fn);
840 		rv = -1;
841 	} else if (ddi_getlongprop_buf(DDI_DEV_T_ANY, ndip,
842 	    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, name,
843 	    (caddr_t)buf, &len) != DDI_PROP_SUCCESS) {
844 		rv = -1;
845 	}
846 
847 	return (rv);
848 }
849 
850 /* ARGSUSED */
851 static int
852 drmach_node_obp_get_prop(drmach_node_t *np, char *name, void *buf, int len)
853 {
854 	int		rv = 0;
855 	pnode_t		nodeid;
856 	static char	*fn = "drmach_node_obp_get_prop";
857 
858 	nodeid = np->get_dnode(np);
859 	if (nodeid == OBP_NONODE) {
860 		cmn_err(CE_WARN, "%s: invalid dnode", fn);
861 		rv = -1;
862 	} else if (prom_getproplen(nodeid, (caddr_t)name) < 0) {
863 		rv = -1;
864 	} else {
865 		(void) prom_getprop(nodeid, (caddr_t)name, (caddr_t)buf);
866 	}
867 
868 	return (rv);
869 }
870 
871 static int
872 drmach_node_ddi_get_proplen(drmach_node_t *np, char *name, int *len)
873 {
874 	int		rv = 0;
875 	dev_info_t	*ndip;
876 
877 	ndip = np->n_getdip(np);
878 	if (ndip == NULL) {
879 		rv = -1;
880 	} else if (ddi_getproplen(DDI_DEV_T_ANY, ndip, DDI_PROP_DONTPASS,
881 			name, len) != DDI_PROP_SUCCESS) {
882 		rv = -1;
883 	}
884 
885 	return (rv);
886 }
887 
888 static int
889 drmach_node_obp_get_proplen(drmach_node_t *np, char *name, int *len)
890 {
891 	pnode_t	 nodeid;
892 	int	 rv;
893 
894 	nodeid = np->get_dnode(np);
895 	if (nodeid == OBP_NONODE)
896 		rv = -1;
897 	else {
898 		*len = prom_getproplen(nodeid, (caddr_t)name);
899 		rv = (*len < 0 ? -1 : 0);
900 	}
901 
902 	return (rv);
903 }
904 
905 static drmachid_t
906 drmach_node_dup(drmach_node_t *np)
907 {
908 	drmach_node_t *dup;
909 
910 	dup = drmach_node_new();
911 	dup->here = np->here;
912 	dup->get_dnode = np->get_dnode;
913 	dup->walk = np->walk;
914 	dup->n_getdip = np->n_getdip;
915 	dup->n_getproplen = np->n_getproplen;
916 	dup->n_getprop = np->n_getprop;
917 	dup->get_parent = np->get_parent;
918 
919 	return (dup);
920 }
921 
922 /*
923  * drmach_array provides convenient array construction, access,
924  * bounds checking and array destruction logic.
925  */
926 
927 static drmach_array_t *
928 drmach_array_new(int min_index, int max_index)
929 {
930 	drmach_array_t *arr;
931 
932 	arr = kmem_zalloc(sizeof (drmach_array_t), KM_SLEEP);
933 
934 	arr->arr_sz = (max_index - min_index + 1) * sizeof (void *);
935 	if (arr->arr_sz > 0) {
936 		arr->min_index = min_index;
937 		arr->max_index = max_index;
938 
939 		arr->arr = kmem_zalloc(arr->arr_sz, KM_SLEEP);
940 		return (arr);
941 	} else {
942 		kmem_free(arr, sizeof (*arr));
943 		return (0);
944 	}
945 }
946 
947 static int
948 drmach_array_set(drmach_array_t *arr, int idx, drmachid_t val)
949 {
950 	if (idx < arr->min_index || idx > arr->max_index)
951 		return (-1);
952 	else {
953 		arr->arr[idx - arr->min_index] = val;
954 		return (0);
955 	}
956 	/*NOTREACHED*/
957 }
958 
959 static int
960 drmach_array_get(drmach_array_t *arr, int idx, drmachid_t *val)
961 {
962 	if (idx < arr->min_index || idx > arr->max_index)
963 		return (-1);
964 	else {
965 		*val = arr->arr[idx - arr->min_index];
966 		return (0);
967 	}
968 	/*NOTREACHED*/
969 }
970 
971 static int
972 drmach_array_first(drmach_array_t *arr, int *idx, drmachid_t *val)
973 {
974 	int rv;
975 
976 	*idx = arr->min_index;
977 	while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
978 		*idx += 1;
979 
980 	return (rv);
981 }
982 
983 static int
984 drmach_array_next(drmach_array_t *arr, int *idx, drmachid_t *val)
985 {
986 	int rv;
987 
988 	*idx += 1;
989 	while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
990 		*idx += 1;
991 
992 	return (rv);
993 }
994 
995 static void
996 drmach_array_dispose(drmach_array_t *arr, void (*disposer)(drmachid_t))
997 {
998 	drmachid_t	val;
999 	int		idx;
1000 	int		rv;
1001 
1002 	rv = drmach_array_first(arr, &idx, &val);
1003 	while (rv == 0) {
1004 		(*disposer)(val);
1005 
1006 		/* clear the array entry */
1007 		rv = drmach_array_set(arr, idx, NULL);
1008 		ASSERT(rv == 0);
1009 
1010 		rv = drmach_array_next(arr, &idx, &val);
1011 	}
1012 
1013 	kmem_free(arr->arr, arr->arr_sz);
1014 	kmem_free(arr, sizeof (*arr));
1015 }
1016 
1017 
1018 static gdcd_t *
1019 drmach_gdcd_new()
1020 {
1021 	gdcd_t *gdcd;
1022 
1023 	gdcd = kmem_zalloc(sizeof (gdcd_t), KM_SLEEP);
1024 
1025 	/* read the gdcd, bail if magic or ver #s are not what is expected */
1026 	if (iosram_rd(GDCD_MAGIC, 0, sizeof (gdcd_t), (caddr_t)gdcd)) {
1027 bail:
1028 		kmem_free(gdcd, sizeof (gdcd_t));
1029 		return (NULL);
1030 	} else if (gdcd->h.dcd_magic != GDCD_MAGIC) {
1031 		goto bail;
1032 	} else if (gdcd->h.dcd_version != DCD_VERSION) {
1033 		goto bail;
1034 	}
1035 
1036 	return (gdcd);
1037 }
1038 
1039 static void
1040 drmach_gdcd_dispose(gdcd_t *gdcd)
1041 {
1042 	kmem_free(gdcd, sizeof (gdcd_t));
1043 }
1044 
1045 /*ARGSUSED*/
1046 sbd_error_t *
1047 drmach_configure(drmachid_t id, int flags)
1048 {
1049 	drmach_device_t	*dp;
1050 	dev_info_t	*rdip;
1051 	sbd_error_t	*err = NULL;
1052 
1053 	/*
1054 	 * On Starcat, there is no CPU driver, so it is
1055 	 * not necessary to configure any CPU nodes.
1056 	 */
1057 	if (DRMACH_IS_CPU_ID(id)) {
1058 		return (NULL);
1059 	}
1060 
1061 	for (; id; ) {
1062 		dev_info_t	*fdip = NULL;
1063 
1064 		if (!DRMACH_IS_DEVICE_ID(id))
1065 			return (drerr_new(0, ESTC_INAPPROP, NULL));
1066 		dp = id;
1067 
1068 		rdip = dp->node->n_getdip(dp->node);
1069 
1070 		/*
1071 		 * We held this branch earlier, so at a minimum its
1072 		 * root should still be present in the device tree.
1073 		 */
1074 		ASSERT(rdip);
1075 
1076 		DRMACH_PR("drmach_configure: configuring DDI branch");
1077 
1078 		ASSERT(e_ddi_branch_held(rdip));
1079 		if (e_ddi_branch_configure(rdip, &fdip, 0) != 0) {
1080 			if (err == NULL) {
1081 				/*
1082 				 * Record first failure but don't stop
1083 				 */
1084 				char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1085 				dev_info_t *dip = (fdip != NULL) ? fdip : rdip;
1086 
1087 				(void) ddi_pathname(dip, path);
1088 				err = drerr_new(1, ESTC_DRVFAIL, path);
1089 
1090 				kmem_free(path, MAXPATHLEN);
1091 			}
1092 
1093 			/*
1094 			 * If non-NULL, fdip is returned held and must be
1095 			 * released.
1096 			 */
1097 			if (fdip != NULL) {
1098 				ddi_release_devi(fdip);
1099 			}
1100 		}
1101 
1102 		if (DRMACH_IS_MEM_ID(id)) {
1103 			drmach_mem_t	*mp = id;
1104 			id = mp->next;
1105 		} else {
1106 			id = NULL;
1107 		}
1108 	}
1109 
1110 	return (err);
1111 }
1112 
1113 static sbd_error_t *
1114 drmach_device_new(drmach_node_t *node,
1115 	drmach_board_t *bp, int portid, drmachid_t *idp)
1116 {
1117 	int		i, rv, device_id, unum;
1118 	char		name[OBP_MAXDRVNAME];
1119 	drmach_device_t	proto;
1120 
1121 	rv = node->n_getprop(node, "name", name, OBP_MAXDRVNAME);
1122 	if (rv) {
1123 		sbd_error_t *err;
1124 
1125 		/* every node is expected to have a name */
1126 		err = drerr_new(1, ESTC_GETPROP,
1127 			"dip: 0x%p: property %s",
1128 			node->n_getdip(node), OBP_NAME);
1129 
1130 		return (err);
1131 	}
1132 
1133 	i = drmach_name2type_idx(name);
1134 
1135 	if (i < 0 || strcmp(name, "cmp") == 0) {
1136 		/*
1137 		 * Not a node of interest to dr - including "cmp",
1138 		 * but it is in drmach_name2type[], which lets gptwocfg
1139 		 * driver to check if node is OBP created.
1140 		 */
1141 		*idp = (drmachid_t)0;
1142 		return (NULL);
1143 	}
1144 
1145 	/*
1146 	 * Derive a best-guess unit number from the portid value.
1147 	 * Some drmach_*_new constructors (drmach_pci_new, for example)
1148 	 * will overwrite the prototype unum value with one that is more
1149 	 * appropriate for the device.
1150 	 */
1151 	device_id = portid & 0x1f;
1152 	if (device_id < 4)
1153 		unum = device_id;
1154 	else if (device_id == 8) {
1155 		unum = 0;
1156 	} else if (device_id == 9) {
1157 		unum = 1;
1158 	} else if (device_id == 0x1c) {
1159 		unum = 0;
1160 	} else if (device_id == 0x1d) {
1161 		unum = 1;
1162 	} else {
1163 		return (DRMACH_INTERNAL_ERROR());
1164 	}
1165 
1166 	bzero(&proto, sizeof (proto));
1167 	proto.type = drmach_name2type[i].type;
1168 	proto.bp = bp;
1169 	proto.node = node;
1170 	proto.portid = portid;
1171 	proto.unum = unum;
1172 
1173 	return (drmach_name2type[i].new(&proto, idp));
1174 }
1175 
1176 static void
1177 drmach_device_dispose(drmachid_t id)
1178 {
1179 	drmach_device_t *self = id;
1180 
1181 	self->cm.dispose(id);
1182 }
1183 
1184 static drmach_board_t *
1185 drmach_board_new(int bnum)
1186 {
1187 	static sbd_error_t *drmach_board_release(drmachid_t);
1188 	static sbd_error_t *drmach_board_status(drmachid_t, drmach_status_t *);
1189 
1190 	drmach_board_t	*bp;
1191 
1192 	bp = kmem_zalloc(sizeof (drmach_board_t), KM_SLEEP);
1193 
1194 	bp->cm.isa = (void *)drmach_board_new;
1195 	bp->cm.release = drmach_board_release;
1196 	bp->cm.status = drmach_board_status;
1197 
1198 	(void) drmach_board_name(bnum, bp->cm.name, sizeof (bp->cm.name));
1199 
1200 	bp->bnum = bnum;
1201 	bp->devices = NULL;
1202 	bp->tree = drmach_node_new();
1203 
1204 	drmach_array_set(drmach_boards, bnum, bp);
1205 	return (bp);
1206 }
1207 
1208 static void
1209 drmach_board_dispose(drmachid_t id)
1210 {
1211 	drmach_board_t *bp;
1212 
1213 	ASSERT(DRMACH_IS_BOARD_ID(id));
1214 	bp = id;
1215 
1216 	if (bp->tree)
1217 		drmach_node_dispose(bp->tree);
1218 
1219 	if (bp->devices)
1220 		drmach_array_dispose(bp->devices, drmach_device_dispose);
1221 
1222 	kmem_free(bp, sizeof (*bp));
1223 }
1224 
1225 static sbd_error_t *
1226 drmach_board_status(drmachid_t id, drmach_status_t *stat)
1227 {
1228 	sbd_error_t	*err = NULL;
1229 	drmach_board_t	*bp;
1230 	caddr_t		obufp;
1231 	dr_showboard_t	shb;
1232 
1233 	if (!DRMACH_IS_BOARD_ID(id))
1234 		return (drerr_new(0, ESTC_INAPPROP, NULL));
1235 
1236 	bp = id;
1237 
1238 	/*
1239 	 * we need to know if the board's connected before
1240 	 * issuing a showboard message.  If it's connected, we just
1241 	 * reply with status composed of cached info
1242 	 */
1243 
1244 	if (!bp->connected) {
1245 		obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
1246 		err = drmach_mbox_trans(DRMSG_SHOWBOARD, bp->bnum, obufp,
1247 			sizeof (dr_proto_hdr_t), (caddr_t)&shb,
1248 			sizeof (dr_showboard_t));
1249 
1250 		kmem_free(obufp, sizeof (dr_proto_hdr_t));
1251 		if (err)
1252 			return (err);
1253 
1254 		bp->connected = (shb.bd_assigned && shb.bd_active);
1255 		strncpy(bp->type, shb.board_type, sizeof (bp->type));
1256 		stat->assigned = bp->assigned = shb.bd_assigned;
1257 		stat->powered = bp->powered = shb.power_on;
1258 		stat->empty = bp->empty = shb.slot_empty;
1259 
1260 		switch (shb.test_status) {
1261 			case DR_TEST_STATUS_UNKNOWN:
1262 			case DR_TEST_STATUS_IPOST:
1263 			case DR_TEST_STATUS_ABORTED:
1264 				stat->cond = bp->cond = SBD_COND_UNKNOWN;
1265 				break;
1266 			case DR_TEST_STATUS_PASSED:
1267 				stat->cond = bp->cond = SBD_COND_OK;
1268 				break;
1269 			case DR_TEST_STATUS_FAILED:
1270 				stat->cond = bp->cond = SBD_COND_FAILED;
1271 				break;
1272 			default:
1273 				stat->cond = bp->cond = SBD_COND_UNKNOWN;
1274 				DRMACH_PR("Unknown test status=0x%x from SC\n",
1275 					shb.test_status);
1276 				break;
1277 
1278 		}
1279 
1280 		strncpy(stat->type, shb.board_type, sizeof (stat->type));
1281 		snprintf(stat->info, sizeof (stat->info), "Test Level=%d",
1282 			shb.test_level);
1283 	} else {
1284 		stat->assigned = bp->assigned;
1285 		stat->powered = bp->powered;
1286 		stat->empty = bp->empty;
1287 		stat->cond = bp->cond;
1288 		strncpy(stat->type, bp->type, sizeof (stat->type));
1289 	}
1290 
1291 	stat->busy = 0;			/* assume not busy */
1292 	stat->configured = 0;		/* assume not configured */
1293 	if (bp->devices) {
1294 		int		 rv;
1295 		int		 d_idx;
1296 		drmachid_t	 d_id;
1297 
1298 		rv = drmach_array_first(bp->devices, &d_idx, &d_id);
1299 		while (rv == 0) {
1300 			drmach_status_t	d_stat;
1301 
1302 			err = drmach_i_status(d_id, &d_stat);
1303 			if (err)
1304 				break;
1305 
1306 			stat->busy |= d_stat.busy;
1307 			stat->configured |= d_stat.configured;
1308 
1309 			rv = drmach_array_next(bp->devices, &d_idx, &d_id);
1310 		}
1311 	}
1312 
1313 	return (err);
1314 }
1315 
1316 typedef struct drmach_msglist {
1317 	kcondvar_t		s_cv; 		/* condvar for sending msg */
1318 	kmutex_t		s_lock;		/* mutex for sending */
1319 	kcondvar_t		g_cv;		/* condvar for getting reply */
1320 	kmutex_t		g_lock;		/* mutex for getting reply */
1321 	struct drmach_msglist	*prev;		/* link to previous entry */
1322 	struct drmach_msglist	*next;		/* link to next entry */
1323 	struct drmach_msglist	*link;		/* link to related entry */
1324 	caddr_t			o_buf;		/* address of output buffer */
1325 	caddr_t			i_buf; 		/* address of input buffer */
1326 	uint32_t		o_buflen;	/* output buffer length */
1327 	uint32_t		i_buflen;	/* input buffer length */
1328 	uint32_t		msgid;		/* message identifier */
1329 	int			o_nretry;	/* number of sending retries */
1330 	int			f_error;	/* mailbox framework error */
1331 	uint8_t			e_code;		/* error code returned by SC */
1332 	uint8_t			p_flag	:1,	/* successfully putmsg */
1333 				m_reply	:1,	/* msg reply received */
1334 				unused	:6;
1335 } drmach_msglist_t;
1336 
1337 kmutex_t		drmach_g_mbox_mutex;	/* mutex for mailbox globals */
1338 kmutex_t		drmach_ri_mbox_mutex;	/* mutex for mailbox reinit */
1339 kmutex_t		drmach_msglist_mutex;	/* mutex for message list */
1340 drmach_msglist_t	*drmach_msglist_first;	/* first entry in msg list */
1341 drmach_msglist_t	*drmach_msglist_last;	/* last entry in msg list */
1342 uint32_t		drmach_msgid;		/* current message id */
1343 kthread_t		*drmach_getmsg_thread;	/* ptr to getmsg thread */
1344 volatile int		drmach_getmsg_thread_run; /* run flag for getmsg thr */
1345 kmutex_t		drmach_sendmsg_mutex;	/* mutex for sendmsg cv */
1346 kcondvar_t		drmach_sendmsg_cv;	/* signaled to send new msg */
1347 kthread_t		*drmach_sendmsg_thread; /* ptr to sendmsg thread */
1348 volatile int		drmach_sendmsg_thread_run; /* run flag for sendmsg */
1349 int			drmach_mbox_istate;	/* mailbox init state */
1350 int			drmach_mbox_iflag;	/* set if init'd with SC */
1351 int			drmach_mbox_ipending;	/* set if reinit scheduled */
1352 
1353 /*
1354  * Timeout values (in seconds) used when waiting for replies (from the SC) to
1355  * requests that we sent.  Since we only receive boardevent messages, and they
1356  * are events rather than replies, there is no boardevent timeout.
1357  */
1358 int	drmach_to_mbxinit	= 60;		/* 1 minute */
1359 int	drmach_to_assign	= 60;		/* 1 minute */
1360 int	drmach_to_unassign	= 60;		/* 1 minute */
1361 int	drmach_to_claim		= 3600;		/* 1 hour */
1362 int	drmach_to_unclaim	= 3600;		/* 1 hour */
1363 int	drmach_to_poweron	= 480;		/* 8 minutes */
1364 int	drmach_to_poweroff	= 480;		/* 8 minutes */
1365 int	drmach_to_testboard	= 43200;	/* 12 hours */
1366 int	drmach_to_aborttest	= 180;		/* 3 minutes */
1367 int	drmach_to_showboard	= 180;		/* 3 minutes */
1368 int	drmach_to_unconfig	= 180;		/* 3 minutes */
1369 
1370 /*
1371  * Delay (in seconds) used after receiving a non-transient error indication from
1372  * an mboxsc_getmsg call in the thread that loops waiting for incoming messages.
1373  */
1374 int	drmach_mbxerr_delay	= 15;		/* 15 seconds */
1375 
1376 /*
1377  * Timeout values (in milliseconds) for mboxsc_putmsg and mboxsc_getmsg calls.
1378  */
1379 clock_t	drmach_to_putmsg;			/* set in drmach_mbox_init */
1380 clock_t	drmach_to_getmsg	= 31000;	/* 31 seconds */
1381 
1382 /*
1383  * Normally, drmach_to_putmsg is set dynamically during initialization in
1384  * drmach_mbox_init.  This has the potentially undesirable side effect of
1385  * clobbering any value that might have been set in /etc/system.  To prevent
1386  * dynamic setting of drmach_to_putmsg (thereby allowing it to be tuned in
1387  * /etc/system), set drmach_use_tuned_putmsg_to to 1.
1388  */
1389 int	drmach_use_tuned_putmsg_to	= 0;
1390 
1391 
1392 /* maximum conceivable message size for future mailbox protocol versions */
1393 #define	DRMACH_MAX_MBOX_MSG_SIZE	4096
1394 
1395 /*ARGSUSED*/
1396 void
1397 drmach_mbox_prmsg(dr_mbox_msg_t *mbp, int dir)
1398 {
1399 	int		i, j;
1400 	dr_memregs_t	*memregs;
1401 	dr_proto_hdr_t	*php = &mbp->p_hdr;
1402 	dr_msg_t	*mp = &mbp->msgdata;
1403 
1404 #ifdef DEBUG
1405 	switch (php->command) {
1406 		case DRMSG_BOARDEVENT:
1407 			if (dir) {
1408 				DRMACH_PR("ERROR!! outgoing BOARDEVENT\n");
1409 			} else {
1410 				DRMACH_PR("BOARDEVENT received:\n");
1411 				DRMACH_PR("init=%d ins=%d rem=%d asgn=%d\n",
1412 					mp->dm_be.initialized,
1413 					mp->dm_be.board_insertion,
1414 					mp->dm_be.board_removal,
1415 					mp->dm_be.slot_assign);
1416 				DRMACH_PR("unasgn=%d avail=%d unavail=%d\n",
1417 					mp->dm_be.slot_unassign,
1418 					mp->dm_be.slot_avail,
1419 					mp->dm_be.slot_unavail);
1420 			}
1421 			break;
1422 		case DRMSG_MBOX_INIT:
1423 			if (dir) {
1424 				DRMACH_PR("MBOX_INIT Request:\n");
1425 			} else {
1426 				DRMACH_PR("MBOX_INIT Reply:\n");
1427 			}
1428 			break;
1429 		case DRMSG_ASSIGN:
1430 			if (dir) {
1431 				DRMACH_PR("ASSIGN Request:\n");
1432 			} else {
1433 				DRMACH_PR("ASSIGN Reply:\n");
1434 			}
1435 			break;
1436 		case DRMSG_UNASSIGN:
1437 			if (dir) {
1438 				DRMACH_PR("UNASSIGN Request:\n");
1439 			} else {
1440 				DRMACH_PR("UNASSIGN Reply:\n");
1441 			}
1442 			break;
1443 		case DRMSG_CLAIM:
1444 			if (!dir) {
1445 				DRMACH_PR("CLAIM Reply:\n");
1446 				break;
1447 			}
1448 
1449 			DRMACH_PR("CLAIM Request:\n");
1450 			for (i = 0; i < 18; ++i) {
1451 				DRMACH_PR("exp%d: val=%d slice=0x%x\n", i,
1452 					mp->dm_cr.mem_slice[i].valid,
1453 					mp->dm_cr.mem_slice[i].slice);
1454 				memregs = &(mp->dm_cr.mem_regs[i]);
1455 				for (j = 0; j < S0_LPORT_COUNT; j++) {
1456 					DRMACH_PR("  MC %2d: "
1457 						"MADR[%d] = 0x%lx, "
1458 						"MADR[%d] = 0x%lx\n", j,
1459 						0, DRMACH_MCREG_TO_U64(
1460 						memregs->madr[j][0]),
1461 						1, DRMACH_MCREG_TO_U64(
1462 						memregs->madr[j][1]));
1463 					DRMACH_PR("       : "
1464 						"MADR[%d] = 0x%lx, "
1465 						"MADR[%d] = 0x%lx\n",
1466 						2, DRMACH_MCREG_TO_U64(
1467 						memregs->madr[j][2]),
1468 						3, DRMACH_MCREG_TO_U64(
1469 						memregs->madr[j][3]));
1470 				}
1471 			}
1472 			break;
1473 		case DRMSG_UNCLAIM:
1474 			if (!dir) {
1475 				DRMACH_PR("UNCLAIM Reply:\n");
1476 				break;
1477 			}
1478 
1479 			DRMACH_PR("UNCLAIM Request:\n");
1480 			for (i = 0; i < 18; ++i) {
1481 				DRMACH_PR("exp%d: val=%d slice=0x%x\n", i,
1482 					mp->dm_ur.mem_slice[i].valid,
1483 					mp->dm_ur.mem_slice[i].slice);
1484 				memregs = &(mp->dm_ur.mem_regs[i]);
1485 				for (j = 0; j < S0_LPORT_COUNT; j++) {
1486 					DRMACH_PR("  MC %2d: "
1487 						"MADR[%d] = 0x%lx, "
1488 						"MADR[%d] = 0x%lx\n", j,
1489 						0, DRMACH_MCREG_TO_U64(
1490 						memregs->madr[j][0]),
1491 						1, DRMACH_MCREG_TO_U64(
1492 						memregs->madr[j][1]));
1493 					DRMACH_PR("       : "
1494 						"MADR[%d] = 0x%lx, "
1495 						"MADR[%d] = 0x%lx\n",
1496 						2, DRMACH_MCREG_TO_U64(
1497 						memregs->madr[j][2]),
1498 						3, DRMACH_MCREG_TO_U64(
1499 						memregs->madr[j][3]));
1500 				}
1501 			}
1502 			DRMACH_PR(" mem_clear=%d\n", mp->dm_ur.mem_clear);
1503 			break;
1504 		case DRMSG_UNCONFIG:
1505 			if (!dir) {
1506 				DRMACH_PR("UNCONFIG Reply:\n");
1507 				break;
1508 			}
1509 
1510 			DRMACH_PR("UNCONFIG Request:\n");
1511 			for (i = 0; i < 18; ++i) {
1512 				DRMACH_PR("exp%d: val=%d slice=0x%x\n", i,
1513 					mp->dm_uc.mem_slice[i].valid,
1514 					mp->dm_uc.mem_slice[i].slice);
1515 				memregs = &(mp->dm_uc.mem_regs[i]);
1516 				for (j = 0; j < S0_LPORT_COUNT; j++) {
1517 					DRMACH_PR("  MC %2d: "
1518 						"MADR[%d] = 0x%lx, "
1519 						"MADR[%d] = 0x%lx\n", j,
1520 						0, DRMACH_MCREG_TO_U64(
1521 						memregs->madr[j][0]),
1522 						1, DRMACH_MCREG_TO_U64(
1523 						memregs->madr[j][1]));
1524 					DRMACH_PR("       : "
1525 						"MADR[%d] = 0x%lx, "
1526 						"MADR[%d] = 0x%lx\n",
1527 						2, DRMACH_MCREG_TO_U64(
1528 						memregs->madr[j][2]),
1529 						3, DRMACH_MCREG_TO_U64(
1530 						memregs->madr[j][3]));
1531 				}
1532 			}
1533 			break;
1534 		case DRMSG_POWERON:
1535 			if (dir) {
1536 				DRMACH_PR("POWERON Request:\n");
1537 			} else {
1538 				DRMACH_PR("POWERON Reply:\n");
1539 			}
1540 			break;
1541 		case DRMSG_POWEROFF:
1542 			if (dir) {
1543 				DRMACH_PR("POWEROFF Request:\n");
1544 			} else {
1545 				DRMACH_PR("POWEROFF Reply:\n");
1546 			}
1547 			break;
1548 		case DRMSG_TESTBOARD:
1549 			if (dir) {
1550 				DRMACH_PR("TESTBOARD Request:\n");
1551 				DRMACH_PR("\tmemaddrhi=0x%x memaddrlo=0x%x ",
1552 					mp->dm_tb.memaddrhi,
1553 					mp->dm_tb.memaddrlo);
1554 				DRMACH_PR("memlen=0x%x cpu_portid=0x%x\n",
1555 					mp->dm_tb.memlen, mp->dm_tb.cpu_portid);
1556 				DRMACH_PR("\tforce=0x%x imm=0x%x\n",
1557 					mp->dm_tb.force, mp->dm_tb.immediate);
1558 			} else {
1559 				DRMACH_PR("TESTBOARD Reply:\n");
1560 				DRMACH_PR("\tmemaddrhi=0x%x memaddrlo=0x%x ",
1561 					mp->dm_tr.memaddrhi,
1562 					mp->dm_tr.memaddrlo);
1563 				DRMACH_PR("memlen=0x%x cpu_portid=0x%x\n",
1564 					mp->dm_tr.memlen, mp->dm_tr.cpu_portid);
1565 				DRMACH_PR("\trecovered=0x%x test status=0x%x\n",
1566 					mp->dm_tr.cpu_recovered,
1567 					mp->dm_tr.test_status);
1568 
1569 			}
1570 			break;
1571 		case DRMSG_ABORT_TEST:
1572 			if (dir) {
1573 				DRMACH_PR("ABORT_TEST Request:\n");
1574 			} else {
1575 				DRMACH_PR("ABORT_TEST Reply:\n");
1576 			}
1577 
1578 			DRMACH_PR("\tmemaddrhi=0x%x memaddrlo=0x%x ",
1579 					mp->dm_ta.memaddrhi,
1580 					mp->dm_ta.memaddrlo);
1581 			DRMACH_PR("memlen=0x%x cpu_portid=0x%x\n",
1582 					mp->dm_ta.memlen, mp->dm_ta.cpu_portid);
1583 			break;
1584 		case DRMSG_SHOWBOARD:
1585 			if (dir) {
1586 				DRMACH_PR("SHOWBOARD Request:\n");
1587 			} else {
1588 				DRMACH_PR("SHOWBOARD Reply:\n");
1589 
1590 				DRMACH_PR(": empty=%d power=%d assigned=%d",
1591 					mp->dm_sb.slot_empty,
1592 					mp->dm_sb.power_on,
1593 					mp->dm_sb.bd_assigned);
1594 				DRMACH_PR(": active=%d t_status=%d t_level=%d ",
1595 					mp->dm_sb.bd_active,
1596 					mp->dm_sb.test_status,
1597 					mp->dm_sb.test_level);
1598 				DRMACH_PR(": type=%s ", mp->dm_sb.board_type);
1599 			}
1600 			break;
1601 		default:
1602 			DRMACH_PR("Unknown message type\n");
1603 			break;
1604 	}
1605 
1606 	DRMACH_PR("dr hdr:\n\tid=0x%x vers=0x%x cmd=0x%x exp=0x%x slot=0x%x\n",
1607 		php->message_id, php->drproto_version, php->command,
1608 		php->expbrd, php->slot);
1609 #endif
1610 	DRMACH_PR("\treply_status=0x%x error_code=0x%x\n", php->reply_status,
1611 		php->error_code);
1612 
1613 }
1614 
1615 /*
1616  * Callback function passed to taskq_dispatch when a mailbox reinitialization
1617  * handshake needs to be scheduled.  The handshake can't be performed by the
1618  * thread that determines it is needed, in most cases, so this function is
1619  * dispatched on the system-wide taskq pool of threads.  Failure is reported but
1620  * otherwise ignored, since any situation that requires a mailbox initialization
1621  * handshake will continue to request the handshake until it succeeds.
1622  */
1623 static void
1624 drmach_mbox_reinit(void *unused)
1625 {
1626 	_NOTE(ARGUNUSED(unused))
1627 
1628 	caddr_t		obufp = NULL;
1629 	sbd_error_t	*serr = NULL;
1630 
1631 	DRMACH_PR("scheduled mailbox reinit running\n");
1632 
1633 	mutex_enter(&drmach_ri_mbox_mutex);
1634 	mutex_enter(&drmach_g_mbox_mutex);
1635 	if (drmach_mbox_iflag == 0) {
1636 		/* need to initialize the mailbox */
1637 		mutex_exit(&drmach_g_mbox_mutex);
1638 
1639 		cmn_err(CE_NOTE, "!reinitializing DR mailbox");
1640 		obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
1641 		serr = drmach_mbox_trans(DRMSG_MBOX_INIT, 0, obufp,
1642 			sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
1643 		kmem_free(obufp, sizeof (dr_proto_hdr_t));
1644 
1645 		if (serr) {
1646 			cmn_err(CE_WARN,
1647 				"mbox_init: MBOX_INIT failed ecode=0x%x",
1648 				serr->e_code);
1649 			sbd_err_clear(&serr);
1650 		}
1651 		mutex_enter(&drmach_g_mbox_mutex);
1652 		if (!serr) {
1653 			drmach_mbox_iflag = 1;
1654 		}
1655 	}
1656 	drmach_mbox_ipending = 0;
1657 	mutex_exit(&drmach_g_mbox_mutex);
1658 	mutex_exit(&drmach_ri_mbox_mutex);
1659 }
1660 
1661 /*
1662  * To ensure sufficient compatibility with future versions of the DR mailbox
1663  * protocol, we use a buffer that is large enough to receive the largest message
1664  * that could possibly be sent to us.  However, since that ends up being fairly
1665  * large, allocating it on the stack is a bad idea.  Fortunately, this function
1666  * does not need to be MT-safe since it is only invoked by the mailbox
1667  * framework, which will never invoke it multiple times concurrently.  Since
1668  * that is the case, we can use a static buffer.
1669  */
1670 void
1671 drmach_mbox_event(void)
1672 {
1673 	static uint8_t	buf[DRMACH_MAX_MBOX_MSG_SIZE];
1674 	dr_mbox_msg_t	*msg = (dr_mbox_msg_t *)buf;
1675 	int		err;
1676 	uint32_t	type = MBOXSC_MSG_EVENT;
1677 	uint32_t	command = DRMSG_BOARDEVENT;
1678 	uint64_t	transid = 0;
1679 	uint32_t	length = DRMACH_MAX_MBOX_MSG_SIZE;
1680 	char		*hint = "";
1681 	int		logsys = 0;
1682 
1683 	do {
1684 		err = mboxsc_getmsg(KEY_SCDR, &type, &command,
1685 			&transid, &length, (void *)msg, 0);
1686 	} while (err == EAGAIN);
1687 
1688 	/* don't try to interpret anything with the wrong version number */
1689 	if ((err == 0) && (msg->p_hdr.drproto_version != DRMBX_VERSION)) {
1690 		cmn_err(CE_WARN, "mailbox version mismatch 0x%x vs 0x%x",
1691 			msg->p_hdr.drproto_version, DRMBX_VERSION);
1692 		mutex_enter(&drmach_g_mbox_mutex);
1693 		drmach_mbox_iflag = 0;
1694 		/* schedule a reinit handshake if one isn't pending */
1695 		if (!drmach_mbox_ipending) {
1696 			if (taskq_dispatch(system_taskq, drmach_mbox_reinit,
1697 				NULL, TQ_NOSLEEP) != NULL) {
1698 				drmach_mbox_ipending = 1;
1699 			} else {
1700 				cmn_err(CE_WARN,
1701 					"failed to schedule mailbox reinit");
1702 			}
1703 		}
1704 		mutex_exit(&drmach_g_mbox_mutex);
1705 		return;
1706 	}
1707 
1708 	if ((err != 0) || (msg->p_hdr.reply_status != DRMSG_REPLY_OK)) {
1709 		cmn_err(CE_WARN,
1710 			"Unsolicited mboxsc_getmsg failed: err=0x%x code=0x%x",
1711 				err, msg->p_hdr.error_code);
1712 	} else {
1713 		dr_boardevent_t	*be;
1714 		be = (dr_boardevent_t *)&msg->msgdata;
1715 
1716 		/* check for initialization event */
1717 		if (be->initialized) {
1718 			mutex_enter(&drmach_g_mbox_mutex);
1719 			drmach_mbox_iflag = 0;
1720 			/* schedule a reinit handshake if one isn't pending */
1721 			if (!drmach_mbox_ipending) {
1722 				if (taskq_dispatch(system_taskq,
1723 					drmach_mbox_reinit, NULL, TQ_NOSLEEP)
1724 					!= NULL) {
1725 					drmach_mbox_ipending = 1;
1726 				} else {
1727 					cmn_err(CE_WARN,
1728 					"failed to schedule mailbox reinit");
1729 				}
1730 			}
1731 			mutex_exit(&drmach_g_mbox_mutex);
1732 			cmn_err(CE_NOTE, "!Mailbox Init event received");
1733 		}
1734 
1735 		/* anything else will be a log_sysevent call */
1736 
1737 		if (be->board_insertion) {
1738 			DRMACH_PR("Board Insertion event received");
1739 			hint = DR_HINT_INSERT;
1740 			logsys++;
1741 	}
1742 		if (be->board_removal) {
1743 			DRMACH_PR("Board Removal event received");
1744 			hint = DR_HINT_REMOVE;
1745 			logsys++;
1746 		}
1747 		if (be->slot_assign) {
1748 			DRMACH_PR("Slot Assign event received");
1749 			logsys++;
1750 		}
1751 		if (be->slot_unassign) {
1752 			DRMACH_PR("Slot Unassign event received");
1753 			logsys++;
1754 		}
1755 		if (be->slot_avail) {
1756 			DRMACH_PR("Slot Available event received");
1757 			logsys++;
1758 		}
1759 		if (be->slot_unavail) {
1760 			DRMACH_PR("Slot Unavailable event received");
1761 			logsys++;
1762 		}
1763 		if (be->power_on) {
1764 			DRMACH_PR("Power ON event received");
1765 			logsys++;
1766 		}
1767 		if (be->power_off) {
1768 			DRMACH_PR("Power OFF event received");
1769 			logsys++;
1770 		}
1771 
1772 		if (logsys)
1773 			drmach_log_sysevent(
1774 				    DRMACH_EXPSLOT2BNUM(msg->p_hdr.expbrd,
1775 							msg->p_hdr.slot),
1776 				    hint, SE_NOSLEEP, 1);
1777 	}
1778 }
1779 
1780 static uint32_t
1781 drmach_get_msgid()
1782 {
1783 	uint32_t	rv;
1784 	mutex_enter(&drmach_msglist_mutex);
1785 	if (!(++drmach_msgid))
1786 		++drmach_msgid;
1787 	rv = drmach_msgid;
1788 	mutex_exit(&drmach_msglist_mutex);
1789 	return (rv);
1790 }
1791 
1792 /*
1793  *	unlink an entry from the message transaction list
1794  *
1795  *	caller must hold drmach_msglist_mutex
1796  */
1797 void
1798 drmach_msglist_unlink(drmach_msglist_t *entry)
1799 {
1800 	ASSERT(mutex_owned(&drmach_msglist_mutex));
1801 	if (entry->prev) {
1802 		entry->prev->next = entry->next;
1803 		if (entry->next)
1804 			entry->next->prev = entry->prev;
1805 	} else {
1806 		drmach_msglist_first = entry->next;
1807 		if (entry->next)
1808 			entry->next->prev = NULL;
1809 	}
1810 	if (entry == drmach_msglist_last) {
1811 		drmach_msglist_last = entry->prev;
1812 	}
1813 }
1814 
1815 void
1816 drmach_msglist_link(drmach_msglist_t *entry)
1817 {
1818 	mutex_enter(&drmach_msglist_mutex);
1819 	if (drmach_msglist_last) {
1820 		entry->prev = drmach_msglist_last;
1821 		drmach_msglist_last->next = entry;
1822 		drmach_msglist_last = entry;
1823 	} else {
1824 		drmach_msglist_last = drmach_msglist_first = entry;
1825 	}
1826 	mutex_exit(&drmach_msglist_mutex);
1827 }
1828 
1829 void
1830 drmach_mbox_getmsg()
1831 {
1832 	int			err;
1833 	register int		msgid;
1834 	static uint8_t		buf[DRMACH_MAX_MBOX_MSG_SIZE];
1835 	dr_mbox_msg_t		*msg = (dr_mbox_msg_t *)buf;
1836 	dr_proto_hdr_t		*php;
1837 	drmach_msglist_t	*found, *entry;
1838 	uint32_t		type = MBOXSC_MSG_REPLY;
1839 	uint32_t		command;
1840 	uint64_t		transid;
1841 	uint32_t		length;
1842 
1843 	php = &msg->p_hdr;
1844 
1845 	while (drmach_getmsg_thread_run != 0) {
1846 		/* get a reply message */
1847 		command = 0;
1848 		transid = 0;
1849 		length = DRMACH_MAX_MBOX_MSG_SIZE;
1850 		err = mboxsc_getmsg(KEY_SCDR, &type, &command,
1851 			&transid, &length, (void *)msg, drmach_to_getmsg);
1852 
1853 		if (err) {
1854 			/*
1855 			 * If mboxsc_getmsg returns ETIMEDOUT or EAGAIN, then
1856 			 * the "error" is really just a normal, transient
1857 			 * condition and we can retry the operation right away.
1858 			 * Any other error suggests a more serious problem,
1859 			 * ranging from a message being too big for our buffer
1860 			 * (EMSGSIZE) to total failure of the mailbox layer.
1861 			 * This second class of errors is much less "transient",
1862 			 * so rather than retrying over and over (and getting
1863 			 * the same error over and over) as fast as we can,
1864 			 * we'll sleep for a while before retrying.
1865 			 */
1866 			if ((err != ETIMEDOUT) && (err != EAGAIN)) {
1867 				cmn_err(CE_WARN,
1868 				"mboxsc_getmsg failed, err=0x%x", err);
1869 				delay(drmach_mbxerr_delay * hz);
1870 			}
1871 			continue;
1872 		}
1873 
1874 		drmach_mbox_prmsg(msg, 0);
1875 
1876 		if (php->drproto_version != DRMBX_VERSION) {
1877 			cmn_err(CE_WARN,
1878 				"mailbox version mismatch 0x%x vs 0x%x",
1879 				php->drproto_version, DRMBX_VERSION);
1880 
1881 			mutex_enter(&drmach_g_mbox_mutex);
1882 			drmach_mbox_iflag = 0;
1883 			/* schedule a reinit handshake if one isn't pending */
1884 			if (!drmach_mbox_ipending) {
1885 				if (taskq_dispatch(system_taskq,
1886 					drmach_mbox_reinit, NULL, TQ_NOSLEEP)
1887 					!= NULL) {
1888 					drmach_mbox_ipending = 1;
1889 				} else {
1890 					cmn_err(CE_WARN,
1891 					"failed to schedule mailbox reinit");
1892 				}
1893 			}
1894 			mutex_exit(&drmach_g_mbox_mutex);
1895 
1896 			continue;
1897 		}
1898 
1899 		msgid = php->message_id;
1900 		found = NULL;
1901 		mutex_enter(&drmach_msglist_mutex);
1902 		entry = drmach_msglist_first;
1903 		while (entry != NULL) {
1904 			if (entry->msgid == msgid) {
1905 				found = entry;
1906 				drmach_msglist_unlink(entry);
1907 				entry = NULL;
1908 			} else
1909 				entry = entry->next;
1910 		}
1911 
1912 		if (found) {
1913 			mutex_enter(&found->g_lock);
1914 
1915 			found->e_code = php->error_code;
1916 			if (found->i_buflen > 0)
1917 				bcopy((caddr_t)&msg->msgdata, found->i_buf,
1918 					found->i_buflen);
1919 			found->m_reply = 1;
1920 
1921 			cv_signal(&found->g_cv);
1922 			mutex_exit(&found->g_lock);
1923 		} else {
1924 			cmn_err(CE_WARN, "!mbox_getmsg: no match for id 0x%x",
1925 			    msgid);
1926 			cmn_err(CE_WARN, "!    cmd = 0x%x, exb = %d, slot = %d",
1927 			    php->command, php->expbrd, php->slot);
1928 		}
1929 
1930 		mutex_exit(&drmach_msglist_mutex);
1931 	}
1932 	cmn_err(CE_WARN, "mbox_getmsg: exiting");
1933 	mutex_enter(&drmach_msglist_mutex);
1934 	entry = drmach_msglist_first;
1935 	while (entry != NULL) {
1936 		if (entry->p_flag == 1) {
1937 			entry->f_error = -1;
1938 			mutex_enter(&entry->g_lock);
1939 			cv_signal(&entry->g_cv);
1940 			mutex_exit(&entry->g_lock);
1941 			drmach_msglist_unlink(entry);
1942 		}
1943 		entry = entry->next;
1944 	}
1945 	mutex_exit(&drmach_msglist_mutex);
1946 	drmach_getmsg_thread_run = -1;
1947 	thread_exit();
1948 }
1949 
1950 void
1951 drmach_mbox_sendmsg()
1952 {
1953 	int		err, retry;
1954 	drmach_msglist_t *entry;
1955 	dr_mbox_msg_t   *mp;
1956 	dr_proto_hdr_t  *php;
1957 
1958 	while (drmach_sendmsg_thread_run != 0) {
1959 		/*
1960 		 * Search through the list to find entries awaiting
1961 		 * transmission to the SC
1962 		 */
1963 		mutex_enter(&drmach_msglist_mutex);
1964 		entry = drmach_msglist_first;
1965 		retry = 0;
1966 		while (entry != NULL) {
1967 			if (entry->p_flag == 1) {
1968 				entry = entry->next;
1969 				continue;
1970 			}
1971 
1972 			mutex_exit(&drmach_msglist_mutex);
1973 
1974 			if (!retry)
1975 				mutex_enter(&entry->s_lock);
1976 			mp = (dr_mbox_msg_t *)entry->o_buf;
1977 			php = &mp->p_hdr;
1978 
1979 			drmach_mbox_prmsg(mp, 1);
1980 
1981 			err = mboxsc_putmsg(KEY_DRSC, MBOXSC_MSG_REQUEST,
1982 				php->command, NULL, entry->o_buflen, (void *)mp,
1983 				drmach_to_putmsg);
1984 
1985 			if (err) {
1986 				switch (err) {
1987 
1988 				case EAGAIN:
1989 				case EBUSY:
1990 					++retry;
1991 					mutex_enter(&drmach_msglist_mutex);
1992 					continue;
1993 
1994 				case ETIMEDOUT:
1995 					if (--entry->o_nretry <= 0) {
1996 						mutex_enter(
1997 							&drmach_msglist_mutex);
1998 						drmach_msglist_unlink(entry);
1999 						mutex_exit(
2000 							&drmach_msglist_mutex);
2001 						entry->f_error = err;
2002 						entry->p_flag = 1;
2003 						cv_signal(&entry->s_cv);
2004 					} else {
2005 						++retry;
2006 						mutex_enter(
2007 							&drmach_msglist_mutex);
2008 						continue;
2009 					}
2010 					break;
2011 				default:
2012 					mutex_enter(&drmach_msglist_mutex);
2013 					drmach_msglist_unlink(entry);
2014 					mutex_exit(&drmach_msglist_mutex);
2015 					entry->f_error = err;
2016 					entry->p_flag = 1;
2017 					cv_signal(&entry->s_cv);
2018 					break;
2019 				}
2020 			} else {
2021 				entry->p_flag = 1;
2022 				cv_signal(&entry->s_cv);
2023 			}
2024 
2025 			mutex_exit(&entry->s_lock);
2026 			retry = 0;
2027 			mutex_enter(&drmach_msglist_mutex);
2028 			entry = drmach_msglist_first;
2029 		}
2030 		mutex_exit(&drmach_msglist_mutex);
2031 
2032 		mutex_enter(&drmach_sendmsg_mutex);
2033 		(void) cv_timedwait(&drmach_sendmsg_cv,
2034 			&drmach_sendmsg_mutex, ddi_get_lbolt() + (5 * hz));
2035 		mutex_exit(&drmach_sendmsg_mutex);
2036 	}
2037 	cmn_err(CE_WARN, "mbox_sendmsg: exiting");
2038 	mutex_enter(&drmach_msglist_mutex);
2039 	entry = drmach_msglist_first;
2040 	while (entry != NULL) {
2041 		if (entry->p_flag == 0) {
2042 			entry->f_error = -1;
2043 			mutex_enter(&entry->s_lock);
2044 			cv_signal(&entry->s_cv);
2045 			mutex_exit(&entry->s_lock);
2046 			drmach_msglist_unlink(entry);
2047 		}
2048 		entry = entry->next;
2049 	}
2050 	mutex_exit(&drmach_msglist_mutex);
2051 	cv_destroy(&drmach_sendmsg_cv);
2052 	mutex_destroy(&drmach_sendmsg_mutex);
2053 
2054 	drmach_sendmsg_thread_run = -1;
2055 	thread_exit();
2056 
2057 }
2058 
2059 void
2060 drmach_msglist_destroy(drmach_msglist_t *listp)
2061 {
2062 	if (listp != NULL) {
2063 		drmach_msglist_t	*entry;
2064 
2065 		mutex_enter(&drmach_msglist_mutex);
2066 		entry = drmach_msglist_first;
2067 		while (entry) {
2068 			if (listp == entry) {
2069 				drmach_msglist_unlink(listp);
2070 				entry = NULL;
2071 			} else
2072 				entry = entry->next;
2073 		}
2074 
2075 		mutex_destroy(&listp->s_lock);
2076 		cv_destroy(&listp->s_cv);
2077 		mutex_destroy(&listp->g_lock);
2078 		cv_destroy(&listp->g_cv);
2079 		kmem_free(listp, sizeof (drmach_msglist_t));
2080 
2081 		mutex_exit(&drmach_msglist_mutex);
2082 	}
2083 }
2084 
2085 static drmach_msglist_t	*
2086 drmach_msglist_new(caddr_t ibufp, uint32_t ilen, dr_proto_hdr_t *hdrp,
2087 	uint32_t olen, int nrtry)
2088 {
2089 	drmach_msglist_t	*listp;
2090 
2091 	listp = kmem_zalloc(sizeof (drmach_msglist_t), KM_SLEEP);
2092 	mutex_init(&listp->s_lock, NULL, MUTEX_DRIVER, NULL);
2093 	cv_init(&listp->s_cv, NULL, CV_DRIVER, NULL);
2094 	mutex_init(&listp->g_lock, NULL, MUTEX_DRIVER, NULL);
2095 	cv_init(&listp->g_cv, NULL, CV_DRIVER, NULL);
2096 	listp->o_buf = (caddr_t)hdrp;
2097 	listp->o_buflen = olen;
2098 	listp->i_buf = ibufp;
2099 	listp->i_buflen = ilen;
2100 	listp->o_nretry = nrtry;
2101 	listp->msgid = hdrp->message_id;
2102 
2103 	return (listp);
2104 }
2105 
2106 static drmach_msglist_t *
2107 drmach_mbox_req_rply(dr_proto_hdr_t *hdrp, uint32_t olen, caddr_t ibufp,
2108 	uint32_t ilen, int timeout, int nrtry, int nosig,
2109 	drmach_msglist_t *link)
2110 {
2111 	int		crv;
2112 	drmach_msglist_t *listp;
2113 	clock_t		to_val;
2114 	dr_proto_hdr_t	*php;
2115 
2116 	/* setup transaction list entry */
2117 	listp = drmach_msglist_new(ibufp, ilen, hdrp, olen, nrtry);
2118 
2119 	/* send mailbox message, await reply */
2120 	mutex_enter(&listp->s_lock);
2121 	mutex_enter(&listp->g_lock);
2122 
2123 	listp->link = link;
2124 	drmach_msglist_link(listp);
2125 
2126 	mutex_enter(&drmach_sendmsg_mutex);
2127 	cv_signal(&drmach_sendmsg_cv);
2128 	mutex_exit(&drmach_sendmsg_mutex);
2129 
2130 	while (listp->p_flag == 0) {
2131 		cv_wait(&listp->s_cv, &listp->s_lock);
2132 	}
2133 
2134 	to_val =  ddi_get_lbolt() + (timeout * hz);
2135 
2136 	if (listp->f_error) {
2137 		listp->p_flag = 0;
2138 		cmn_err(CE_WARN, "!mboxsc_putmsg failed: 0x%x",
2139 			listp->f_error);
2140 		php = (dr_proto_hdr_t *)listp->o_buf;
2141 		cmn_err(CE_WARN, "!    cmd = 0x%x, exb = %d, slot = %d",
2142 		    php->command, php->expbrd, php->slot);
2143 	} else {
2144 		while (listp->m_reply == 0 && listp->f_error == 0) {
2145 			if (nosig)
2146 				crv = cv_timedwait(&listp->g_cv, &listp->g_lock,
2147 					to_val);
2148 			else
2149 				crv = cv_timedwait_sig(&listp->g_cv,
2150 					&listp->g_lock, to_val);
2151 			switch (crv) {
2152 				case -1: /* timed out */
2153 					cmn_err(CE_WARN,
2154 					    "!msgid=0x%x reply timed out",
2155 					    hdrp->message_id);
2156 					php = (dr_proto_hdr_t *)listp->o_buf;
2157 					cmn_err(CE_WARN, "!    cmd = 0x%x, "
2158 					    "exb = %d, slot = %d", php->command,
2159 					    php->expbrd, php->slot);
2160 					listp->f_error = ETIMEDOUT;
2161 					break;
2162 				case 0: /* signal received */
2163 					cmn_err(CE_WARN,
2164 					    "operation interrupted by signal");
2165 					listp->f_error = EINTR;
2166 					break;
2167 				default:
2168 					break;
2169 				}
2170 		}
2171 
2172 		/*
2173 		 * If link is set for this entry, check to see if
2174 		 * the linked entry has been replied to.  If not,
2175 		 * wait for the response.
2176 		 * Currently, this is only used for ABORT_TEST functionality,
2177 		 * wherein a check is made for the TESTBOARD reply when
2178 		 * the ABORT_TEST reply is received.
2179 		 */
2180 
2181 		if (link) {
2182 			mutex_enter(&link->g_lock);
2183 			/*
2184 			 * If the reply to the linked entry hasn't been
2185 			 * received, clear the existing link->f_error,
2186 			 * and await the reply.
2187 			 */
2188 			if (link->m_reply == 0) {
2189 				link->f_error = 0;
2190 			}
2191 			to_val =  ddi_get_lbolt() + (timeout * hz);
2192 			while (link->m_reply == 0 && link->f_error == 0) {
2193 				crv = cv_timedwait(&link->g_cv, &link->g_lock,
2194 					to_val);
2195 				switch (crv) {
2196 				case -1: /* timed out */
2197 					cmn_err(CE_NOTE,
2198 					    "!link msgid=0x%x reply timed out",
2199 					    link->msgid);
2200 					link->f_error = ETIMEDOUT;
2201 					break;
2202 				default:
2203 					break;
2204 				}
2205 			}
2206 			mutex_exit(&link->g_lock);
2207 		}
2208 	}
2209 	mutex_exit(&listp->g_lock);
2210 	mutex_exit(&listp->s_lock);
2211 	return (listp);
2212 }
2213 
2214 static sbd_error_t *
2215 drmach_mbx2sbderr(drmach_msglist_t *mlp)
2216 {
2217 	char		a_pnt[MAXNAMELEN];
2218 	dr_proto_hdr_t	*php;
2219 	int		bnum;
2220 
2221 	if (mlp->f_error) {
2222 		/*
2223 		 * If framework failure is due to signal, return "no error"
2224 		 * error.
2225 		 */
2226 		if (mlp->f_error == EINTR)
2227 			return (drerr_new(0, ESTC_NONE, NULL));
2228 
2229 		mutex_enter(&drmach_g_mbox_mutex);
2230 		drmach_mbox_iflag = 0;
2231 		mutex_exit(&drmach_g_mbox_mutex);
2232 		if (!mlp->p_flag)
2233 			return (drerr_new(1, ESTC_MBXRQST, NULL));
2234 		else
2235 			return (drerr_new(1, ESTC_MBXRPLY, NULL));
2236 	}
2237 	php = (dr_proto_hdr_t *)mlp->o_buf;
2238 	bnum = 2 * php->expbrd + php->slot;
2239 	a_pnt[0] = '\0';
2240 	(void) drmach_board_name(bnum, a_pnt, MAXNAMELEN);
2241 
2242 	switch (mlp->e_code) {
2243 		case 0:
2244 			return (NULL);
2245 		case DRERR_NOACL:
2246 			return (drerr_new(0, ESTC_NOACL, "%s", a_pnt));
2247 		case DRERR_NOT_ASSIGNED:
2248 			return (drerr_new(0, ESTC_NOT_ASSIGNED, "%s", a_pnt));
2249 		case DRERR_NOT_ACTIVE:
2250 			return (drerr_new(0, ESTC_NOT_ACTIVE, "%s", a_pnt));
2251 		case DRERR_EMPTY_SLOT:
2252 			return (drerr_new(0, ESTC_EMPTY_SLOT, "%s", a_pnt));
2253 		case DRERR_POWER_OFF:
2254 			return (drerr_new(0, ESTC_POWER_OFF, "%s", a_pnt));
2255 		case DRERR_TEST_IN_PROGRESS:
2256 			return (drerr_new(0, ESTC_TEST_IN_PROGRESS,
2257 					"%s", a_pnt));
2258 		case DRERR_TESTING_BUSY:
2259 			return (drerr_new(0, ESTC_TESTING_BUSY, "%s", a_pnt));
2260 		case DRERR_TEST_REQUIRED:
2261 			return (drerr_new(0, ESTC_TEST_REQUIRED, "%s", a_pnt));
2262 		case DRERR_UNAVAILABLE:
2263 			return (drerr_new(0, ESTC_UNAVAILABLE, "%s", a_pnt));
2264 		case DRERR_RECOVERABLE:
2265 			return (drerr_new(0, ESTC_SMS_ERR_RECOVERABLE,
2266 				"%s", a_pnt));
2267 		case DRERR_UNRECOVERABLE:
2268 			return (drerr_new(1, ESTC_SMS_ERR_UNRECOVERABLE,
2269 				"%s", a_pnt));
2270 		default:
2271 			return (drerr_new(1, ESTC_MBOX_UNKNOWN, NULL));
2272 	}
2273 }
2274 
2275 static sbd_error_t *
2276 drmach_mbox_trans(uint8_t msgtype, int bnum, caddr_t obufp, int olen,
2277 	caddr_t ibufp, int ilen)
2278 {
2279 	int			timeout = 0;
2280 	int			ntries = 0;
2281 	int			nosignals = 0;
2282 	dr_proto_hdr_t 		*hdrp;
2283 	drmach_msglist_t 	*mlp;
2284 	sbd_error_t		*err = NULL;
2285 
2286 	if (msgtype != DRMSG_MBOX_INIT) {
2287 		mutex_enter(&drmach_ri_mbox_mutex);
2288 		mutex_enter(&drmach_g_mbox_mutex);
2289 		if (drmach_mbox_iflag == 0) {
2290 			/* need to initialize the mailbox */
2291 			dr_proto_hdr_t	imsg;
2292 
2293 			mutex_exit(&drmach_g_mbox_mutex);
2294 
2295 			imsg.command = DRMSG_MBOX_INIT;
2296 
2297 			imsg.message_id = drmach_get_msgid();
2298 			imsg.drproto_version = DRMBX_VERSION;
2299 			imsg.expbrd = 0;
2300 			imsg.slot = 0;
2301 
2302 			cmn_err(CE_WARN,
2303 				"!reinitializing DR mailbox");
2304 			mlp = drmach_mbox_req_rply(&imsg, sizeof (imsg), 0, 0,
2305 				10, 5, 0, NULL);
2306 			err = drmach_mbx2sbderr(mlp);
2307 			/*
2308 			 * If framework failure incoming is encountered on
2309 			 * the MBOX_INIT [timeout on SMS reply], the error
2310 			 * type must be changed before returning to caller.
2311 			 * This is to prevent drmach_board_connect() and
2312 			 * drmach_board_disconnect() from marking boards
2313 			 * UNUSABLE based on MBOX_INIT failures.
2314 			 */
2315 			if ((err != NULL) && (err->e_code == ESTC_MBXRPLY)) {
2316 				cmn_err(CE_WARN,
2317 				    "!Changed mbox incoming to outgoing"
2318 				    " failure on reinit");
2319 				sbd_err_clear(&err);
2320 				err = drerr_new(0, ESTC_MBXRQST, NULL);
2321 			}
2322 			drmach_msglist_destroy(mlp);
2323 			if (err) {
2324 				mutex_exit(&drmach_ri_mbox_mutex);
2325 				return (err);
2326 			}
2327 			mutex_enter(&drmach_g_mbox_mutex);
2328 			drmach_mbox_iflag = 1;
2329 		}
2330 		mutex_exit(&drmach_g_mbox_mutex);
2331 		mutex_exit(&drmach_ri_mbox_mutex);
2332 	}
2333 
2334 	hdrp = (dr_proto_hdr_t *)obufp;
2335 
2336 	/* setup outgoing mailbox header */
2337 	hdrp->command = msgtype;
2338 	hdrp->message_id = drmach_get_msgid();
2339 	hdrp->drproto_version = DRMBX_VERSION;
2340 	hdrp->expbrd = DRMACH_BNUM2EXP(bnum);
2341 	hdrp->slot = DRMACH_BNUM2SLOT(bnum);
2342 
2343 	switch (msgtype) {
2344 
2345 		case DRMSG_MBOX_INIT:
2346 			timeout = drmach_to_mbxinit;
2347 			ntries = 1;
2348 			nosignals = 0;
2349 			break;
2350 
2351 		case DRMSG_ASSIGN:
2352 			timeout = drmach_to_assign;
2353 			ntries = 1;
2354 			nosignals = 0;
2355 			break;
2356 
2357 		case DRMSG_UNASSIGN:
2358 			timeout = drmach_to_unassign;
2359 			ntries = 1;
2360 			nosignals = 0;
2361 			break;
2362 
2363 		case DRMSG_POWERON:
2364 			timeout = drmach_to_poweron;
2365 			ntries = 1;
2366 			nosignals = 0;
2367 			break;
2368 
2369 		case DRMSG_POWEROFF:
2370 			timeout = drmach_to_poweroff;
2371 			ntries = 1;
2372 			nosignals = 0;
2373 			break;
2374 
2375 		case DRMSG_SHOWBOARD:
2376 			timeout = drmach_to_showboard;
2377 			ntries = 1;
2378 			nosignals = 0;
2379 			break;
2380 
2381 		case DRMSG_CLAIM:
2382 			timeout = drmach_to_claim;
2383 			ntries = 1;
2384 			nosignals = 1;
2385 			break;
2386 
2387 		case DRMSG_UNCLAIM:
2388 			timeout = drmach_to_unclaim;
2389 			ntries = 1;
2390 			nosignals = 1;
2391 			break;
2392 
2393 		case DRMSG_UNCONFIG:
2394 			timeout = drmach_to_unconfig;
2395 			ntries = 1;
2396 			nosignals = 0;
2397 			break;
2398 
2399 		case DRMSG_TESTBOARD:
2400 			timeout = drmach_to_testboard;
2401 			ntries = 1;
2402 			nosignals = 0;
2403 			break;
2404 
2405 		default:
2406 			cmn_err(CE_WARN,
2407 				"Unknown outgoing message type 0x%x", msgtype);
2408 			err = DRMACH_INTERNAL_ERROR();
2409 			break;
2410 	}
2411 
2412 	if (err == NULL) {
2413 		mlp = drmach_mbox_req_rply(hdrp, olen, ibufp, ilen,
2414 			timeout, ntries, nosignals, NULL);
2415 		err = drmach_mbx2sbderr(mlp);
2416 
2417 		/*
2418 		 * For DRMSG_TESTBOARD attempts which have timed out, or
2419 		 * been aborted due to a signal received after mboxsc_putmsg()
2420 		 * has succeeded in sending the message, a DRMSG_ABORT_TEST
2421 		 * must be sent.
2422 		 */
2423 		if ((msgtype == DRMSG_TESTBOARD) && (err != NULL) &&
2424 		    ((mlp->f_error == EINTR) || ((mlp->f_error == ETIMEDOUT) &&
2425 		    (mlp->p_flag != 0)))) {
2426 			drmach_msglist_t	*abmlp;
2427 			dr_abort_test_t		abibuf;
2428 
2429 			hdrp->command = DRMSG_ABORT_TEST;
2430 			hdrp->message_id = drmach_get_msgid();
2431 			abmlp = drmach_mbox_req_rply(hdrp,
2432 			    sizeof (dr_abort_test_t), (caddr_t)&abibuf,
2433 			    sizeof (abibuf), drmach_to_aborttest, 5, 1, mlp);
2434 			cmn_err(CE_WARN, "test aborted");
2435 			drmach_msglist_destroy(abmlp);
2436 		}
2437 
2438 		drmach_msglist_destroy(mlp);
2439 	}
2440 
2441 	return (err);
2442 }
2443 
2444 static int
2445 drmach_mbox_init()
2446 {
2447 	int			err;
2448 	caddr_t			obufp;
2449 	sbd_error_t		*serr = NULL;
2450 	mboxsc_timeout_range_t	mbxtoz;
2451 
2452 	drmach_mbox_istate = 0;
2453 	/* register the outgoing mailbox */
2454 	if ((err = mboxsc_init(KEY_DRSC, MBOXSC_MBOX_OUT,
2455 		NULL)) != 0) {
2456 		cmn_err(CE_WARN, "DR - SC mboxsc_init failed: 0x%x", err);
2457 		return (-1);
2458 	}
2459 	drmach_mbox_istate = 1;
2460 
2461 	/* setup the mboxsc_putmsg timeout value */
2462 	if (drmach_use_tuned_putmsg_to) {
2463 		cmn_err(CE_NOTE, "!using tuned drmach_to_putmsg = 0x%lx\n",
2464 		    drmach_to_putmsg);
2465 	} else {
2466 		if ((err = mboxsc_ctrl(KEY_DRSC,
2467 		    MBOXSC_CMD_PUTMSG_TIMEOUT_RANGE, &mbxtoz)) != 0) {
2468 			cmn_err(CE_WARN, "mboxsc_ctrl failed: 0x%x", err);
2469 			drmach_to_putmsg = 60000;
2470 		} else {
2471 			drmach_to_putmsg = mboxsc_putmsg_def_timeout() * 6;
2472 			DRMACH_PR("putmsg range is 0x%lx - 0x%lx value"
2473 			    " is 0x%lx\n", mbxtoz.min_timeout,
2474 			    mbxtoz.max_timeout, drmach_to_putmsg);
2475 		}
2476 	}
2477 
2478 	/* register the incoming mailbox */
2479 	if ((err = mboxsc_init(KEY_SCDR, MBOXSC_MBOX_IN,
2480 		drmach_mbox_event)) != 0) {
2481 		cmn_err(CE_WARN, "SC - DR mboxsc_init failed: 0x%x", err);
2482 		return (-1);
2483 	}
2484 	drmach_mbox_istate = 2;
2485 
2486 	/* initialize mutex for mailbox globals */
2487 	mutex_init(&drmach_g_mbox_mutex, NULL, MUTEX_DRIVER, NULL);
2488 
2489 	/* initialize mutex for mailbox re-init */
2490 	mutex_init(&drmach_ri_mbox_mutex, NULL, MUTEX_DRIVER, NULL);
2491 
2492 	/* initialize mailbox message list elements */
2493 	drmach_msglist_first = drmach_msglist_last = NULL;
2494 	mutex_init(&drmach_msglist_mutex, NULL, MUTEX_DRIVER, NULL);
2495 
2496 	mutex_init(&drmach_sendmsg_mutex, NULL, MUTEX_DRIVER, NULL);
2497 	cv_init(&drmach_sendmsg_cv, NULL, CV_DRIVER, NULL);
2498 
2499 	drmach_mbox_istate = 3;
2500 
2501 	/* start mailbox sendmsg thread */
2502 	drmach_sendmsg_thread_run = 1;
2503 	if (drmach_sendmsg_thread == NULL)
2504 		drmach_sendmsg_thread = thread_create(NULL, 0,
2505 		    (void (*)())drmach_mbox_sendmsg, NULL, 0, &p0,
2506 		    TS_RUN, minclsyspri);
2507 
2508 	/* start mailbox getmsg thread */
2509 	drmach_getmsg_thread_run = 1;
2510 	if (drmach_getmsg_thread == NULL)
2511 		drmach_getmsg_thread = thread_create(NULL, 0,
2512 		    (void (*)())drmach_mbox_getmsg, NULL, 0, &p0,
2513 		    TS_RUN, minclsyspri);
2514 
2515 	obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
2516 	serr = drmach_mbox_trans(DRMSG_MBOX_INIT, 0, obufp,
2517 		sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
2518 	kmem_free(obufp, sizeof (dr_proto_hdr_t));
2519 	if (serr) {
2520 		cmn_err(CE_WARN, "mbox_init: MBOX_INIT failed ecode=0x%x",
2521 			serr->e_code);
2522 		sbd_err_clear(&serr);
2523 		return (-1);
2524 	}
2525 	mutex_enter(&drmach_g_mbox_mutex);
2526 	drmach_mbox_iflag = 1;
2527 	drmach_mbox_ipending = 0;
2528 	mutex_exit(&drmach_g_mbox_mutex);
2529 
2530 	return (0);
2531 }
2532 
2533 static int
2534 drmach_mbox_fini()
2535 {
2536 	int err, rv = 0;
2537 
2538 	if (drmach_mbox_istate > 2) {
2539 		drmach_getmsg_thread_run = 0;
2540 		drmach_sendmsg_thread_run = 0;
2541 		cmn_err(CE_WARN,
2542 			"drmach_mbox_fini: waiting for mbox threads...");
2543 		while ((drmach_getmsg_thread_run == 0) ||
2544 			(drmach_sendmsg_thread_run == 0)) {
2545 			continue;
2546 		}
2547 		cmn_err(CE_WARN,
2548 			"drmach_mbox_fini: mbox threads done.");
2549 		mutex_destroy(&drmach_msglist_mutex);
2550 
2551 	}
2552 	if (drmach_mbox_istate) {
2553 		/* de-register the outgoing mailbox */
2554 		if ((err = mboxsc_fini(KEY_DRSC)) != 0) {
2555 			cmn_err(CE_WARN, "DR - SC mboxsc_fini failed: 0x%x",
2556 				err);
2557 			rv = -1;
2558 		}
2559 	}
2560 	if (drmach_mbox_istate > 1) {
2561 		/* de-register the incoming mailbox */
2562 		if ((err = mboxsc_fini(KEY_SCDR)) != 0) {
2563 			cmn_err(CE_WARN, "SC - DR mboxsc_fini failed: 0x%x",
2564 				err);
2565 			rv = -1;
2566 		}
2567 	}
2568 	mutex_destroy(&drmach_g_mbox_mutex);
2569 	mutex_destroy(&drmach_ri_mbox_mutex);
2570 	return (rv);
2571 }
2572 
2573 static int
2574 drmach_portid2bnum(int portid)
2575 {
2576 	int slot;
2577 
2578 	switch (portid & 0x1f) {
2579 	case 0: case 1: case 2: case 3:	/* cpu/wci devices */
2580 	case 0x1e:			/* slot 0 axq registers */
2581 		slot = 0;
2582 		break;
2583 
2584 	case 8: case 9:			/* cpu devices */
2585 	case 0x1c: case 0x1d:		/* schizo/wci devices */
2586 	case 0x1f:			/* slot 1 axq registers */
2587 		slot = 1;
2588 		break;
2589 
2590 	default:
2591 		ASSERT(0);		/* catch in debug kernels */
2592 	}
2593 
2594 	return (((portid >> 4) & 0x7e) | slot);
2595 }
2596 
2597 extern int axq_suspend_iopause;
2598 
2599 static int
2600 hold_rele_branch(dev_info_t *rdip, void *arg)
2601 {
2602 	int	i;
2603 	int	*holdp	= (int *)arg;
2604 	char	*name = ddi_node_name(rdip);
2605 
2606 	/*
2607 	 * For Starcat, we must be children of the root devinfo node
2608 	 */
2609 	ASSERT(ddi_get_parent(rdip) == ddi_root_node());
2610 
2611 	i = drmach_name2type_idx(name);
2612 
2613 	/*
2614 	 * Only children of the root devinfo node need to be
2615 	 * held/released since they are the only valid targets
2616 	 * of tree operations. This corresponds to the node types
2617 	 * listed in the drmach_name2type array.
2618 	 */
2619 	if (i < 0) {
2620 		/* Not of interest to us */
2621 		return (DDI_WALK_PRUNECHILD);
2622 	}
2623 
2624 	if (*holdp) {
2625 		ASSERT(!e_ddi_branch_held(rdip));
2626 		e_ddi_branch_hold(rdip);
2627 	} else {
2628 		ASSERT(e_ddi_branch_held(rdip));
2629 		e_ddi_branch_rele(rdip);
2630 	}
2631 
2632 	return (DDI_WALK_PRUNECHILD);
2633 }
2634 
2635 static int
2636 drmach_init(void)
2637 {
2638 	pnode_t 	nodeid;
2639 	gdcd_t		*gdcd;
2640 	int		bnum;
2641 	dev_info_t	*rdip;
2642 	int		hold, circ;
2643 
2644 	mutex_enter(&drmach_i_lock);
2645 	if (drmach_initialized) {
2646 		mutex_exit(&drmach_i_lock);
2647 		return (0);
2648 	}
2649 
2650 	gdcd = drmach_gdcd_new();
2651 	if (gdcd == NULL) {
2652 		mutex_exit(&drmach_i_lock);
2653 		cmn_err(CE_WARN, "drmach_init: failed to access GDCD\n");
2654 		return (-1);
2655 	}
2656 
2657 	drmach_boards = drmach_array_new(0, MAX_BOARDS - 1);
2658 
2659 	nodeid = prom_childnode(prom_rootnode());
2660 	do {
2661 		int		 len;
2662 		int		 portid;
2663 		drmachid_t	 id;
2664 
2665 		len = prom_getproplen(nodeid, "portid");
2666 		if (len != sizeof (portid))
2667 			continue;
2668 
2669 		portid = -1;
2670 		(void) prom_getprop(nodeid, "portid", (caddr_t)&portid);
2671 		if (portid == -1)
2672 			continue;
2673 
2674 		bnum = drmach_portid2bnum(portid);
2675 
2676 		if (drmach_array_get(drmach_boards, bnum, &id) == -1) {
2677 			/* portid translated to an invalid board number */
2678 			cmn_err(CE_WARN, "OBP node 0x%x has"
2679 				" invalid property value, %s=%u",
2680 				nodeid, "portid", portid);
2681 
2682 			/* clean up */
2683 			drmach_array_dispose(drmach_boards,
2684 			    drmach_board_dispose);
2685 			drmach_gdcd_dispose(gdcd);
2686 			mutex_exit(&drmach_i_lock);
2687 			return (-1);
2688 		} else if (id == NULL) {
2689 			drmach_board_t	*bp;
2690 			l1_slot_stat_t	*dcd;
2691 			int		exp, slot;
2692 
2693 			bp = drmach_board_new(bnum);
2694 			bp->assigned = !drmach_initialized;
2695 			bp->powered = !drmach_initialized;
2696 
2697 			exp = DRMACH_BNUM2EXP(bnum);
2698 			slot = DRMACH_BNUM2SLOT(bnum);
2699 			dcd = &gdcd->dcd_slot[exp][slot];
2700 			bp->stardrb_offset =
2701 			    dcd->l1ss_cpu_drblock_xwd_offset << 3;
2702 			DRMACH_PR("%s: stardrb_offset=0x%lx\n", bp->cm.name,
2703 			    bp->stardrb_offset);
2704 
2705 			if (gdcd->dcd_slot[exp][slot].l1ss_flags &
2706 			    L1SSFLG_THIS_L1_NULL_PROC_LPA) {
2707 				bp->flags |= DRMACH_NULL_PROC_LPA;
2708 				DRMACH_PR("%s: NULL proc LPA\n", bp->cm.name);
2709 			}
2710 		}
2711 	} while ((nodeid = prom_nextnode(nodeid)) != OBP_NONODE);
2712 
2713 	drmach_cpu_sram_va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
2714 
2715 	if (gdcd->dcd_testcage_log2_mbytes_size != DCD_DR_TESTCAGE_DISABLED) {
2716 		ASSERT(gdcd->dcd_testcage_log2_mbytes_size ==
2717 				gdcd->dcd_testcage_log2_mbytes_align);
2718 		drmach_iocage_paddr =
2719 			(uint64_t)gdcd->dcd_testcage_mbyte_PA << 20;
2720 		drmach_iocage_size =
2721 			1 << (gdcd->dcd_testcage_log2_mbytes_size + 20);
2722 
2723 		drmach_iocage_vaddr = (caddr_t)vmem_alloc(heap_arena,
2724 			drmach_iocage_size, VM_SLEEP);
2725 		hat_devload(kas.a_hat, drmach_iocage_vaddr, drmach_iocage_size,
2726 			mmu_btop(drmach_iocage_paddr),
2727 			PROT_READ | PROT_WRITE,
2728 			HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST);
2729 
2730 		DRMACH_PR("gdcd size=0x%x align=0x%x PA=0x%x\n",
2731 			gdcd->dcd_testcage_log2_mbytes_size,
2732 			gdcd->dcd_testcage_log2_mbytes_align,
2733 			gdcd->dcd_testcage_mbyte_PA);
2734 		DRMACH_PR("drmach size=0x%x PA=0x%lx VA=0x%p\n",
2735 			drmach_iocage_size, drmach_iocage_paddr,
2736 			drmach_iocage_vaddr);
2737 	}
2738 
2739 	if (drmach_iocage_size == 0) {
2740 		drmach_array_dispose(drmach_boards, drmach_board_dispose);
2741 		drmach_boards = NULL;
2742 		vmem_free(heap_arena, drmach_cpu_sram_va, PAGESIZE);
2743 		drmach_gdcd_dispose(gdcd);
2744 		mutex_exit(&drmach_i_lock);
2745 		cmn_err(CE_WARN, "drmach_init: iocage not available\n");
2746 		return (-1);
2747 	}
2748 
2749 	drmach_gdcd_dispose(gdcd);
2750 
2751 	mutex_init(&drmach_iocage_lock, NULL, MUTEX_DRIVER, NULL);
2752 	cv_init(&drmach_iocage_cv, NULL, CV_DRIVER, NULL);
2753 	mutex_init(&drmach_xt_mb_lock, NULL, MUTEX_DRIVER, NULL);
2754 	mutex_init(&drmach_bus_sync_lock, NULL, MUTEX_DRIVER, NULL);
2755 	mutex_init(&drmach_slice_table_lock, NULL, MUTEX_DRIVER, NULL);
2756 
2757 	mutex_enter(&cpu_lock);
2758 	mutex_enter(&drmach_iocage_lock);
2759 	ASSERT(drmach_iocage_is_busy == 0);
2760 	drmach_iocage_is_busy = 1;
2761 	drmach_iocage_mem_scrub(drmach_iocage_size);
2762 	drmach_iocage_is_busy = 0;
2763 	cv_signal(&drmach_iocage_cv);
2764 	mutex_exit(&drmach_iocage_lock);
2765 	mutex_exit(&cpu_lock);
2766 
2767 
2768 	if (drmach_mbox_init() == -1) {
2769 		cmn_err(CE_WARN, "DR - SC mailbox initialization Failed");
2770 	}
2771 
2772 	/*
2773 	 * Walk immediate children of devinfo root node and hold
2774 	 * all devinfo branches of interest.
2775 	 */
2776 	hold = 1;
2777 	rdip = ddi_root_node();
2778 
2779 	ndi_devi_enter(rdip, &circ);
2780 	ddi_walk_devs(ddi_get_child(rdip), hold_rele_branch, &hold);
2781 	ndi_devi_exit(rdip, circ);
2782 
2783 	drmach_initialized = 1;
2784 
2785 	/*
2786 	 * To avoid a circular patch dependency between DR and AXQ, the AXQ
2787 	 * rev introducing the axq_iopause_*_all interfaces should not regress
2788 	 * when installed without the DR rev using those interfaces. The default
2789 	 * is for iopause to be enabled/disabled during axq suspend/resume. By
2790 	 * setting the following axq flag to zero, axq will not enable iopause
2791 	 * during suspend/resume, instead DR will call the axq_iopause_*_all
2792 	 * interfaces during drmach_copy_rename.
2793 	 */
2794 	axq_suspend_iopause = 0;
2795 
2796 	mutex_exit(&drmach_i_lock);
2797 
2798 	return (0);
2799 }
2800 
2801 static void
2802 drmach_fini(void)
2803 {
2804 	dev_info_t	*rdip;
2805 	int		hold, circ;
2806 
2807 	if (drmach_initialized) {
2808 		rw_enter(&drmach_boards_rwlock, RW_WRITER);
2809 		drmach_array_dispose(drmach_boards, drmach_board_dispose);
2810 		drmach_boards = NULL;
2811 		rw_exit(&drmach_boards_rwlock);
2812 
2813 		mutex_destroy(&drmach_slice_table_lock);
2814 		mutex_destroy(&drmach_xt_mb_lock);
2815 		mutex_destroy(&drmach_bus_sync_lock);
2816 		cv_destroy(&drmach_iocage_cv);
2817 		mutex_destroy(&drmach_iocage_lock);
2818 
2819 		vmem_free(heap_arena, drmach_cpu_sram_va, PAGESIZE);
2820 
2821 		/*
2822 		 * Walk immediate children of the root devinfo node
2823 		 * releasing holds acquired on branches in drmach_init()
2824 		 */
2825 		hold = 0;
2826 		rdip = ddi_root_node();
2827 
2828 		ndi_devi_enter(rdip, &circ);
2829 		ddi_walk_devs(ddi_get_child(rdip), hold_rele_branch, &hold);
2830 		ndi_devi_exit(rdip, circ);
2831 
2832 		drmach_initialized = 0;
2833 	}
2834 
2835 	drmach_mbox_fini();
2836 	if (drmach_xt_mb != NULL) {
2837 		vmem_free(static_alloc_arena, (void *)drmach_xt_mb,
2838 		    drmach_xt_mb_size);
2839 	}
2840 	rw_destroy(&drmach_boards_rwlock);
2841 	mutex_destroy(&drmach_i_lock);
2842 }
2843 
2844 static struct memlist *
2845 memlist_add_span(struct memlist *mlist, uint64_t base, uint64_t len)
2846 {
2847 	struct memlist	*ml, *tl, *nl;
2848 
2849 	if (len == 0ull)
2850 		return (NULL);
2851 
2852 	if (mlist == NULL) {
2853 		mlist = GETSTRUCT(struct memlist, 1);
2854 		mlist->address = base;
2855 		mlist->size = len;
2856 		mlist->next = mlist->prev = NULL;
2857 
2858 		return (mlist);
2859 	}
2860 
2861 	for (tl = ml = mlist; ml; tl = ml, ml = ml->next) {
2862 		if (base < ml->address) {
2863 			if ((base + len) < ml->address) {
2864 				nl = GETSTRUCT(struct memlist, 1);
2865 				nl->address = base;
2866 				nl->size = len;
2867 				nl->next = ml;
2868 				if ((nl->prev = ml->prev) != NULL)
2869 					nl->prev->next = nl;
2870 				ml->prev = nl;
2871 				if (mlist == ml)
2872 					mlist = nl;
2873 			} else {
2874 				ml->size = MAX((base + len),
2875 						(ml->address + ml->size)) -
2876 						base;
2877 				ml->address = base;
2878 			}
2879 			break;
2880 
2881 		} else if (base <= (ml->address + ml->size)) {
2882 			ml->size = MAX((base + len),
2883 					(ml->address + ml->size)) -
2884 					MIN(ml->address, base);
2885 			ml->address = MIN(ml->address, base);
2886 			break;
2887 		}
2888 	}
2889 	if (ml == NULL) {
2890 		nl = GETSTRUCT(struct memlist, 1);
2891 		nl->address = base;
2892 		nl->size = len;
2893 		nl->next = NULL;
2894 		nl->prev = tl;
2895 		tl->next = nl;
2896 	}
2897 
2898 	memlist_coalesce(mlist);
2899 
2900 	return (mlist);
2901 }
2902 
2903 static void
2904 drmach_mem_read_madr(drmach_mem_t *mp, int bank, uint64_t *madr)
2905 {
2906 	kpreempt_disable();
2907 
2908 	/* get register address, read madr value */
2909 	if (STARCAT_CPUID_TO_PORTID(CPU->cpu_id) == mp->dev.portid) {
2910 		*madr = lddmcdecode(DRMACH_MC_ASI_ADDR(mp, bank));
2911 	} else {
2912 		*madr = lddphysio(DRMACH_MC_ADDR(mp, bank));
2913 	}
2914 
2915 	kpreempt_enable();
2916 }
2917 
2918 
2919 static uint64_t *
2920 drmach_prep_mc_rename(uint64_t *p, int local,
2921 	drmach_mem_t *mp, uint64_t current_basepa, uint64_t new_basepa)
2922 {
2923 	int bank;
2924 
2925 	for (bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
2926 		uint64_t madr, bank_offset;
2927 
2928 		/* fetch mc's bank madr register value */
2929 		drmach_mem_read_madr(mp, bank, &madr);
2930 		if (madr & DRMACH_MC_VALID_MASK) {
2931 			uint64_t bankpa;
2932 
2933 			bank_offset = (DRMACH_MC_UM_TO_PA(madr) |
2934 			    DRMACH_MC_LM_TO_PA(madr)) - current_basepa;
2935 			bankpa = new_basepa + bank_offset;
2936 
2937 			/* encode new base pa into madr */
2938 			madr &= ~DRMACH_MC_UM_MASK;
2939 			madr |= DRMACH_MC_PA_TO_UM(bankpa);
2940 			madr &= ~DRMACH_MC_LM_MASK;
2941 			madr |= DRMACH_MC_PA_TO_LM(bankpa);
2942 
2943 			if (local)
2944 				*p++ = DRMACH_MC_ASI_ADDR(mp, bank);
2945 			else
2946 				*p++ = DRMACH_MC_ADDR(mp, bank);
2947 
2948 			*p++ = madr;
2949 		}
2950 	}
2951 
2952 	return (p);
2953 }
2954 
2955 static uint64_t *
2956 drmach_prep_schizo_script(uint64_t *p, drmach_mem_t *mp, uint64_t new_basepa)
2957 {
2958 	drmach_board_t	*bp;
2959 	int		 rv;
2960 	int		 idx;
2961 	drmachid_t	 id;
2962 	uint64_t	 last_scsr_pa = 0;
2963 
2964 	/* memory is always in slot 0 */
2965 	ASSERT(DRMACH_BNUM2SLOT(mp->dev.bp->bnum) == 0);
2966 
2967 	/* look up slot 1 board on same expander */
2968 	idx = DRMACH_EXPSLOT2BNUM(DRMACH_BNUM2EXP(mp->dev.bp->bnum), 1);
2969 	rv = drmach_array_get(drmach_boards, idx, &id);
2970 	bp = id; /* bp will be NULL if board not found */
2971 
2972 	/* look up should never be out of bounds */
2973 	ASSERT(rv == 0);
2974 
2975 	/* nothing to do when board is not found or has no devices */
2976 	if (rv == -1 || bp == NULL || bp->devices == NULL)
2977 		return (p);
2978 
2979 	rv = drmach_array_first(bp->devices, &idx, &id);
2980 	while (rv == 0) {
2981 		if (DRMACH_IS_IO_ID(id)) {
2982 			drmach_io_t *io = id;
2983 
2984 			/*
2985 			 * Skip all non-Schizo IO devices (only IO nodes
2986 			 * that are Schizo devices have non-zero scsr_pa).
2987 			 * Filter out "other" leaf to avoid writing to the
2988 			 * same Schizo Control/Status Register twice.
2989 			 */
2990 			if (io->scsr_pa && io->scsr_pa != last_scsr_pa) {
2991 				uint64_t scsr;
2992 
2993 				scsr  = lddphysio(io->scsr_pa);
2994 				scsr &= ~(DRMACH_LPA_BASE_MASK |
2995 						DRMACH_LPA_BND_MASK);
2996 				scsr |= DRMACH_PA_TO_LPA_BASE(new_basepa);
2997 				scsr |= DRMACH_PA_TO_LPA_BND(
2998 					new_basepa + DRMACH_MEM_SLICE_SIZE);
2999 
3000 				*p++ = io->scsr_pa;
3001 				*p++ = scsr;
3002 
3003 				last_scsr_pa = io->scsr_pa;
3004 			}
3005 		}
3006 		rv = drmach_array_next(bp->devices, &idx, &id);
3007 	}
3008 
3009 	return (p);
3010 }
3011 
3012 /*
3013  * For Panther MCs, append the MC idle reg address and drmach_mem_t pointer.
3014  * The latter is returned when drmach_rename fails to idle a Panther MC and
3015  * is used to identify the MC for error reporting.
3016  */
3017 static uint64_t *
3018 drmach_prep_pn_mc_idle(uint64_t *p, drmach_mem_t *mp, int local)
3019 {
3020 	/* only slot 0 has memory */
3021 	ASSERT(DRMACH_BNUM2SLOT(mp->dev.bp->bnum) == 0);
3022 	ASSERT(IS_PANTHER(mp->dev.bp->cpu_impl));
3023 
3024 	for (mp = mp->dev.bp->mem; mp != NULL; mp = mp->next) {
3025 		ASSERT(DRMACH_IS_MEM_ID(mp));
3026 
3027 		if (mp->dev.portid == STARCAT_CPUID_TO_PORTID(CPU->cpu_id)) {
3028 			if (local) {
3029 				*p++ = ASI_EMU_ACT_STATUS_VA;	/* local ASI */
3030 				*p++ = (uintptr_t)mp;
3031 			}
3032 		} else if (!local) {
3033 			*p++ = DRMACH_EMU_ACT_STATUS_ADDR(mp);	/* PIO */
3034 			*p++ = (uintptr_t)mp;
3035 		}
3036 	}
3037 
3038 	return (p);
3039 }
3040 
3041 static sbd_error_t *
3042 drmach_prep_rename_script(drmach_mem_t *s_mp, drmach_mem_t *t_mp,
3043 	uint64_t t_slice_offset, caddr_t buf, int buflen)
3044 {
3045 	_NOTE(ARGUNUSED(buflen))
3046 
3047 	uint64_t		*p = (uint64_t *)buf, *q;
3048 	sbd_error_t		*err;
3049 	int			 rv;
3050 	drmach_mem_t		*mp, *skip_mp;
3051 	uint64_t		 s_basepa, t_basepa;
3052 	uint64_t		 s_new_basepa, t_new_basepa;
3053 
3054 	/* verify supplied buffer space is adequate */
3055 	ASSERT(buflen >=
3056 		/* addr for all possible MC banks */
3057 		(sizeof (uint64_t) * 4 * 4 * 18) +
3058 		/* list section terminator */
3059 		(sizeof (uint64_t) * 1) +
3060 		/* addr/id tuple for local Panther MC idle reg */
3061 		(sizeof (uint64_t) * 2) +
3062 		/* list section terminator */
3063 		(sizeof (uint64_t) * 1) +
3064 		/* addr/id tuple for 2 boards with 4 Panther MC idle regs */
3065 		(sizeof (uint64_t) * 2 * 2 * 4) +
3066 		/* list section terminator */
3067 		(sizeof (uint64_t) * 1) +
3068 		/* addr/val tuple for 1 proc with 4 MC banks */
3069 		(sizeof (uint64_t) * 2 * 4) +
3070 		/* list section terminator */
3071 		(sizeof (uint64_t) * 1) +
3072 		/* addr/val tuple for 2 boards w/ 2 schizos each */
3073 		(sizeof (uint64_t) * 2 * 2 * 2) +
3074 		/* addr/val tuple for 2 boards w/ 16 MC banks each */
3075 		(sizeof (uint64_t) * 2 * 2 * 16) +
3076 		/* list section terminator */
3077 		(sizeof (uint64_t) * 1) +
3078 		/* addr/val tuple for 18 AXQs w/ two slots each */
3079 		(sizeof (uint64_t) * 2 * 2 * 18) +
3080 		/* list section terminator */
3081 		(sizeof (uint64_t) * 1) +
3082 		/* list terminator */
3083 		(sizeof (uint64_t) * 1));
3084 
3085 	/* copy bank list to rename script */
3086 	mutex_enter(&drmach_bus_sync_lock);
3087 	for (q = drmach_bus_sync_list; *q; q++, p++)
3088 		*p = *q;
3089 	mutex_exit(&drmach_bus_sync_lock);
3090 
3091 	/* list section terminator */
3092 	*p++ = 0;
3093 
3094 	/*
3095 	 * Write idle script for MC on this processor.  A script will be
3096 	 * produced only if this is a Panther processor on the source or
3097 	 * target board.
3098 	 */
3099 	if (IS_PANTHER(s_mp->dev.bp->cpu_impl))
3100 		p = drmach_prep_pn_mc_idle(p, s_mp, 1);
3101 
3102 	if (IS_PANTHER(t_mp->dev.bp->cpu_impl))
3103 		p = drmach_prep_pn_mc_idle(p, t_mp, 1);
3104 
3105 	/* list section terminator */
3106 	*p++ = 0;
3107 
3108 	/*
3109 	 * Write idle script for all other MCs on source and target
3110 	 * Panther boards.
3111 	 */
3112 	if (IS_PANTHER(s_mp->dev.bp->cpu_impl))
3113 		p = drmach_prep_pn_mc_idle(p, s_mp, 0);
3114 
3115 	if (IS_PANTHER(t_mp->dev.bp->cpu_impl))
3116 		p = drmach_prep_pn_mc_idle(p, t_mp, 0);
3117 
3118 	/* list section terminator */
3119 	*p++ = 0;
3120 
3121 	/*
3122 	 * Step 1:	Write source base address to target MC
3123 	 *		with present bit off.
3124 	 * Step 2:	Now rewrite target reg with present bit on.
3125 	 */
3126 	err = drmach_mem_get_base_physaddr(s_mp, &s_basepa);
3127 	ASSERT(err == NULL);
3128 	err = drmach_mem_get_base_physaddr(t_mp, &t_basepa);
3129 	ASSERT(err == NULL);
3130 
3131 	/* exchange base pa. include slice offset in new target base pa */
3132 	s_new_basepa = t_basepa & ~ (DRMACH_MEM_SLICE_SIZE - 1);
3133 	t_new_basepa = (s_basepa & ~ (DRMACH_MEM_SLICE_SIZE - 1)) +
3134 			t_slice_offset;
3135 
3136 	DRMACH_PR("s_new_basepa 0x%lx\n", s_new_basepa);
3137 	DRMACH_PR("t_new_basepa 0x%lx\n", t_new_basepa);
3138 
3139 	DRMACH_PR("preparing MC MADR rename script (master is CPU%d):\n",
3140 		CPU->cpu_id);
3141 
3142 	/*
3143 	 * Write rename script for MC on this processor.  A script will
3144 	 * be produced only if this processor is on the source or target
3145 	 * board.
3146 	 */
3147 
3148 	skip_mp = NULL;
3149 	mp = s_mp->dev.bp->mem;
3150 	while (mp != NULL && skip_mp == NULL) {
3151 		if (mp->dev.portid == STARCAT_CPUID_TO_PORTID(CPU->cpu_id)) {
3152 			skip_mp = mp;
3153 			p = drmach_prep_mc_rename(p, 1, mp, s_basepa,
3154 			    s_new_basepa);
3155 		}
3156 
3157 		mp = mp->next;
3158 	}
3159 
3160 	mp = t_mp->dev.bp->mem;
3161 	while (mp != NULL && skip_mp == NULL) {
3162 		if (mp->dev.portid == STARCAT_CPUID_TO_PORTID(CPU->cpu_id)) {
3163 			skip_mp = mp;
3164 			p = drmach_prep_mc_rename(p, 1, mp, t_basepa,
3165 			    t_new_basepa);
3166 		}
3167 
3168 		mp = mp->next;
3169 	}
3170 
3171 	/* list section terminator */
3172 	*p++ = 0;
3173 
3174 	/*
3175 	 * Write rename script for all other MCs on source and target
3176 	 * boards.
3177 	 */
3178 
3179 	for (mp = s_mp->dev.bp->mem; mp; mp = mp->next) {
3180 		if (mp == skip_mp)
3181 			continue;
3182 		p = drmach_prep_mc_rename(p, 0, mp, s_basepa, s_new_basepa);
3183 	}
3184 
3185 	for (mp = t_mp->dev.bp->mem; mp; mp = mp->next) {
3186 		if (mp == skip_mp)
3187 			continue;
3188 		p = drmach_prep_mc_rename(p, 0, mp, t_basepa, t_new_basepa);
3189 	}
3190 
3191 	/* Write rename script for Schizo LPA_BASE/LPA_BND */
3192 	p = drmach_prep_schizo_script(p, s_mp, s_new_basepa);
3193 	p = drmach_prep_schizo_script(p, t_mp, t_new_basepa);
3194 
3195 	/* list section terminator */
3196 	*p++ = 0;
3197 
3198 	DRMACH_PR("preparing AXQ CASM rename script (EXP%d <> EXP%d):\n",
3199 		DRMACH_BNUM2EXP(s_mp->dev.bp->bnum),
3200 		DRMACH_BNUM2EXP(t_mp->dev.bp->bnum));
3201 
3202 	rv = axq_do_casm_rename_script(&p,
3203 		DRMACH_PA_TO_SLICE(s_new_basepa),
3204 		DRMACH_PA_TO_SLICE(t_new_basepa));
3205 	if (rv == DDI_FAILURE)
3206 		return (DRMACH_INTERNAL_ERROR());
3207 
3208 	/* list section & final terminator */
3209 	*p++ = 0;
3210 	*p++ = 0;
3211 
3212 #ifdef DEBUG
3213 	{
3214 		uint64_t *q = (uint64_t *)buf;
3215 
3216 		/* paranoia */
3217 		ASSERT((caddr_t)p <= buf + buflen);
3218 
3219 		DRMACH_PR("MC bank base pa list:\n");
3220 		while (*q) {
3221 			uint64_t a = *q++;
3222 
3223 			DRMACH_PR("0x%lx\n", a);
3224 		}
3225 
3226 		/* skip terminator */
3227 		q += 1;
3228 
3229 		DRMACH_PR("local Panther MC idle reg (via ASI 0x4a):\n");
3230 		while (*q) {
3231 			DRMACH_PR("addr=0x%lx, mp=0x%lx\n", *q, *(q + 1));
3232 			q += 2;
3233 		}
3234 
3235 		/* skip terminator */
3236 		q += 1;
3237 
3238 		DRMACH_PR("non-local Panther MC idle reg (via ASI 0x15):\n");
3239 		while (*q) {
3240 			DRMACH_PR("addr=0x%lx, mp=0x%lx\n", *q, *(q + 1));
3241 			q += 2;
3242 		}
3243 
3244 		/* skip terminator */
3245 		q += 1;
3246 
3247 		DRMACH_PR("MC reprogramming script (via ASI 0x72):\n");
3248 		while (*q) {
3249 			uint64_t r = *q++;	/* register address */
3250 			uint64_t v = *q++;	/* new register value */
3251 
3252 			DRMACH_PR("0x%lx = 0x%lx, basepa 0x%lx\n",
3253 				r,
3254 				v,
3255 				DRMACH_MC_UM_TO_PA(v)|DRMACH_MC_LM_TO_PA(v));
3256 		}
3257 
3258 		/* skip terminator */
3259 		q += 1;
3260 
3261 		DRMACH_PR("MC/SCHIZO reprogramming script:\n");
3262 		while (*q) {
3263 			DRMACH_PR("0x%lx = 0x%lx\n", *q, *(q + 1));
3264 			q += 2;
3265 		}
3266 
3267 		/* skip terminator */
3268 		q += 1;
3269 
3270 		DRMACH_PR("AXQ reprogramming script:\n");
3271 		while (*q) {
3272 			DRMACH_PR("0x%lx = 0x%lx\n", *q, *(q + 1));
3273 			q += 2;
3274 		}
3275 
3276 		/* verify final terminator is present */
3277 		ASSERT(*(q + 1) == 0);
3278 
3279 		DRMACH_PR("copy-rename script 0x%p, len %d\n",
3280 			buf, (int)((intptr_t)p - (intptr_t)buf));
3281 
3282 		if (drmach_debug)
3283 			DELAY(10000000);
3284 	}
3285 #endif
3286 
3287 	return (NULL);
3288 }
3289 
3290 static void
3291 drmach_prep_xt_mb_for_slice_update(drmach_board_t *bp, uchar_t slice)
3292 {
3293 	int		 rv;
3294 
3295 	ASSERT(MUTEX_HELD(&drmach_xt_mb_lock));
3296 
3297 	if (bp->devices) {
3298 		int		 d_idx;
3299 		drmachid_t	 d_id;
3300 
3301 		rv = drmach_array_first(bp->devices, &d_idx, &d_id);
3302 		while (rv == 0) {
3303 			if (DRMACH_IS_CPU_ID(d_id)) {
3304 				drmach_cpu_t	*cp = d_id;
3305 				processorid_t	 cpuid = cp->cpuid;
3306 
3307 				mutex_enter(&cpu_lock);
3308 				if (cpu[cpuid] && cpu[cpuid]->cpu_flags)
3309 					drmach_xt_mb[cpuid] = 0x80 | slice;
3310 				mutex_exit(&cpu_lock);
3311 			}
3312 			rv = drmach_array_next(bp->devices, &d_idx, &d_id);
3313 		}
3314 	}
3315 	if (DRMACH_BNUM2SLOT(bp->bnum) == 0) {
3316 		drmach_board_t	*s1bp = NULL;
3317 
3318 		rv = drmach_array_get(drmach_boards, bp->bnum + 1,
3319 		    (void *) &s1bp);
3320 		if (rv == 0 && s1bp != NULL) {
3321 			ASSERT(DRMACH_IS_BOARD_ID(s1bp));
3322 			ASSERT(DRMACH_BNUM2SLOT(s1bp->bnum) == 1);
3323 			drmach_prep_xt_mb_for_slice_update(s1bp, slice);
3324 		}
3325 	}
3326 }
3327 
3328 sbd_error_t *
3329 drmach_copy_rename_init(drmachid_t t_id, uint64_t t_slice_offset,
3330 	drmachid_t s_id, struct memlist *c_ml, drmachid_t *cr_id)
3331 {
3332 	extern void drmach_rename(uint64_t *, uint_t *, uint64_t *);
3333 	extern void drmach_rename_end(void);
3334 
3335 	drmach_mem_t	*s_mp, *t_mp;
3336 	struct memlist	*x_ml;
3337 	uint64_t	 off_mask, s_copybasepa, t_copybasepa, t_basepa;
3338 	int		 len;
3339 	caddr_t		 bp, wp;
3340 	uint_t		*p, *q;
3341 	sbd_error_t	*err;
3342 	tte_t		*tte;
3343 	drmach_copy_rename_t *cr;
3344 
3345 	if (!DRMACH_IS_MEM_ID(s_id))
3346 		return (drerr_new(0, ESTC_INAPPROP, NULL));
3347 	if (!DRMACH_IS_MEM_ID(t_id))
3348 		return (drerr_new(0, ESTC_INAPPROP, NULL));
3349 	s_mp = s_id;
3350 	t_mp = t_id;
3351 
3352 	/* get starting physical address of target memory */
3353 	err = drmach_mem_get_base_physaddr(t_id, &t_basepa);
3354 	if (err)
3355 		return (err);
3356 
3357 	/* calculate slice offset mask from slice size */
3358 	off_mask = DRMACH_MEM_SLICE_SIZE - 1;
3359 
3360 	/* calculate source and target base pa */
3361 	s_copybasepa = c_ml->address;
3362 	t_copybasepa = t_basepa + ((c_ml->address & off_mask) - t_slice_offset);
3363 
3364 	/* paranoia */
3365 	ASSERT((c_ml->address & off_mask) >= t_slice_offset);
3366 
3367 	/* adjust copy memlist addresses to be relative to copy base pa */
3368 	x_ml = c_ml;
3369 	while (x_ml != NULL) {
3370 		x_ml->address -= s_copybasepa;
3371 		x_ml = x_ml->next;
3372 	}
3373 
3374 #ifdef DEBUG
3375 	{
3376 	uint64_t s_basepa, s_size, t_size;
3377 
3378 	x_ml = c_ml;
3379 	while (x_ml->next != NULL)
3380 		x_ml = x_ml->next;
3381 
3382 	DRMACH_PR("source copy span: base pa 0x%lx, end pa 0x%lx\n",
3383 		s_copybasepa,
3384 		s_copybasepa + x_ml->address + x_ml->size);
3385 
3386 	DRMACH_PR("target copy span: base pa 0x%lx, end pa 0x%lx\n",
3387 		t_copybasepa,
3388 		t_copybasepa + x_ml->address + x_ml->size);
3389 
3390 	DRMACH_PR("copy memlist (relative to copy base pa):\n");
3391 	DRMACH_MEMLIST_DUMP(c_ml);
3392 
3393 	err = drmach_mem_get_base_physaddr(s_id, &s_basepa);
3394 	ASSERT(err == NULL);
3395 
3396 	err = drmach_mem_get_size(s_id, &s_size);
3397 	ASSERT(err == NULL);
3398 
3399 	err = drmach_mem_get_size(t_id, &t_size);
3400 	ASSERT(err == NULL);
3401 
3402 	DRMACH_PR("current source base pa 0x%lx, size 0x%lx\n",
3403 		s_basepa, s_size);
3404 	DRMACH_PR("current target base pa 0x%lx, size 0x%lx\n",
3405 		t_basepa, t_size);
3406 	}
3407 #endif /* DEBUG */
3408 
3409 	/* Map in appropriate cpu sram page */
3410 	tte = &drmach_cpu_sram_tte[CPU->cpu_id];
3411 	ASSERT(TTE_IS_VALID(tte) && TTE_IS_8K(tte) &&
3412 	    TTE_IS_PRIVILEGED(tte) && TTE_IS_LOCKED(tte));
3413 	sfmmu_dtlb_ld(drmach_cpu_sram_va, KCONTEXT, tte);
3414 	sfmmu_itlb_ld(drmach_cpu_sram_va, KCONTEXT, tte);
3415 
3416 	bp = wp = drmach_cpu_sram_va;
3417 
3418 	/* Make sure the rename routine will fit */
3419 	len = (ptrdiff_t)drmach_rename_end - (ptrdiff_t)drmach_rename;
3420 	ASSERT(wp + len < bp + PAGESIZE);
3421 
3422 	/* copy text. standard bcopy not designed to work in nc space */
3423 	p = (uint_t *)wp;
3424 	q = (uint_t *)drmach_rename;
3425 	while (q < (uint_t *)drmach_rename_end)
3426 		*p++ = *q++;
3427 
3428 	/* zero remainder. standard bzero not designed to work in nc space */
3429 	while (p < (uint_t *)(bp + PAGESIZE))
3430 		*p++ = 0;
3431 
3432 	DRMACH_PR("drmach_rename function 0x%p, len %d\n", wp, len);
3433 	wp += (len + 15) & ~15;
3434 
3435 	err = drmach_prep_rename_script(s_mp, t_mp, t_slice_offset,
3436 		wp, PAGESIZE - (wp - bp));
3437 	if (err) {
3438 cleanup:
3439 		xt_one(CPU->cpu_id, vtag_flushpage_tl1,
3440 			(uint64_t)drmach_cpu_sram_va, (uint64_t)KCONTEXT);
3441 		return (err);
3442 	}
3443 
3444 	/* disable and flush CDC */
3445 	if (axq_cdc_disable_flush_all() != DDI_SUCCESS) {
3446 		axq_cdc_enable_all();	/* paranoia */
3447 		err = DRMACH_INTERNAL_ERROR();
3448 		goto cleanup;
3449 	}
3450 
3451 	/* mark both memory units busy */
3452 	t_mp->dev.busy++;
3453 	s_mp->dev.busy++;
3454 
3455 	cr = vmem_alloc(static_alloc_arena, sizeof (drmach_copy_rename_t),
3456 	    VM_SLEEP);
3457 	cr->isa = (void *)drmach_copy_rename_init;
3458 	cr->data = wp;
3459 	cr->c_ml = c_ml;
3460 	cr->s_mp = s_mp;
3461 	cr->t_mp = t_mp;
3462 	cr->s_copybasepa = s_copybasepa;
3463 	cr->t_copybasepa = t_copybasepa;
3464 	cr->ecode = DRMACH_CR_OK;
3465 
3466 	mutex_enter(&drmach_slice_table_lock);
3467 
3468 	mutex_enter(&drmach_xt_mb_lock);
3469 	bzero((void *)drmach_xt_mb, drmach_xt_mb_size);
3470 
3471 	if (DRMACH_L1_SET_LPA(s_mp->dev.bp) && drmach_reprogram_lpa) {
3472 		drmach_prep_xt_mb_for_slice_update(s_mp->dev.bp,
3473 			DRMACH_PA_TO_SLICE(t_copybasepa));
3474 	}
3475 	if (DRMACH_L1_SET_LPA(t_mp->dev.bp) && drmach_reprogram_lpa) {
3476 		drmach_prep_xt_mb_for_slice_update(t_mp->dev.bp,
3477 			DRMACH_PA_TO_SLICE(s_copybasepa));
3478 	}
3479 
3480 	*cr_id = cr;
3481 	return (NULL);
3482 }
3483 
3484 int drmach_rename_count;
3485 int drmach_rename_ntries;
3486 
3487 sbd_error_t *
3488 drmach_copy_rename_fini(drmachid_t id)
3489 {
3490 	drmach_copy_rename_t	*cr = id;
3491 	sbd_error_t		*err = NULL;
3492 	dr_mbox_msg_t		*obufp;
3493 
3494 	ASSERT(cr->isa == (void *)drmach_copy_rename_init);
3495 
3496 	axq_cdc_enable_all();
3497 
3498 	xt_one(CPU->cpu_id, vtag_flushpage_tl1,
3499 		(uint64_t)drmach_cpu_sram_va, (uint64_t)KCONTEXT);
3500 
3501 	switch (cr->ecode) {
3502 	case DRMACH_CR_OK:
3503 		break;
3504 	case DRMACH_CR_MC_IDLE_ERR: {
3505 		dev_info_t	*dip = NULL;
3506 		drmach_mem_t	*mp = (drmach_mem_t *)cr->earg;
3507 		char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3508 
3509 		ASSERT(DRMACH_IS_MEM_ID(mp));
3510 
3511 		err = drmach_get_dip(mp, &dip);
3512 
3513 		ASSERT(err == NULL);
3514 		ASSERT(dip != NULL);
3515 
3516 		err = drerr_new(0, ESBD_MEMFAIL, NULL);
3517 		(void) ddi_pathname(dip, path);
3518 		cmn_err(CE_WARN, "failed to idle memory controller %s on %s: "
3519 		    "copy-rename aborted", path, mp->dev.bp->cm.name);
3520 		kmem_free(path, MAXPATHLEN);
3521 		break;
3522 	}
3523 	case DRMACH_CR_IOPAUSE_ERR:
3524 		ASSERT((uintptr_t)cr->earg >= 0 &&
3525 		    (uintptr_t)cr->earg < AXQ_MAX_EXP);
3526 
3527 		err = drerr_new(0,  ESBD_SUSPEND, "EX%d", (uintptr_t)cr->earg);
3528 		cmn_err(CE_WARN, "failed to idle EX%ld AXQ slot1 activity prior"
3529 		    " to copy-rename", (uintptr_t)cr->earg);
3530 		break;
3531 	case DRMACH_CR_ONTRAP_ERR:
3532 		err = drerr_new(0, ESBD_MEMFAIL, NULL);
3533 		cmn_err(CE_WARN, "copy-rename aborted due to uncorrectable "
3534 		    "memory error");
3535 		break;
3536 	default:
3537 		err = DRMACH_INTERNAL_ERROR();
3538 		cmn_err(CE_WARN, "unknown copy-rename error code (%d)\n",
3539 		    cr->ecode);
3540 		break;
3541 	}
3542 
3543 #ifdef DEBUG
3544 	if ((DRMACH_L1_SET_LPA(cr->s_mp->dev.bp) ||
3545 	    DRMACH_L1_SET_LPA(cr->t_mp->dev.bp)) && drmach_reprogram_lpa) {
3546 		int	i;
3547 		for (i = 0; i < NCPU; i++) {
3548 			if (drmach_xt_mb[i])
3549 				DRMACH_PR("cpu%d ignored drmach_xt_mb", i);
3550 		}
3551 	}
3552 #endif
3553 	mutex_exit(&drmach_xt_mb_lock);
3554 
3555 	if (cr->c_ml != NULL)
3556 		memlist_delete(cr->c_ml);
3557 
3558 	cr->t_mp->dev.busy--;
3559 	cr->s_mp->dev.busy--;
3560 
3561 	if (err) {
3562 		mutex_exit(&drmach_slice_table_lock);
3563 		goto done;
3564 	}
3565 
3566 	/* update casm shadow for target and source board */
3567 	drmach_slice_table_update(cr->t_mp->dev.bp, 0);
3568 	drmach_slice_table_update(cr->s_mp->dev.bp, 0);
3569 	mutex_exit(&drmach_slice_table_lock);
3570 
3571 	mutex_enter(&drmach_bus_sync_lock);
3572 	drmach_bus_sync_list_update();
3573 	mutex_exit(&drmach_bus_sync_lock);
3574 
3575 	/*
3576 	 * Make a good-faith effort to notify the SC about the copy-rename, but
3577 	 * don't worry if it fails, since a subsequent claim/unconfig/unclaim
3578 	 * will duplicate the update.
3579 	 */
3580 	obufp = kmem_zalloc(sizeof (dr_mbox_msg_t), KM_SLEEP);
3581 	mutex_enter(&drmach_slice_table_lock);
3582 	drmach_msg_memslice_init(obufp->msgdata.dm_uc.mem_slice);
3583 	drmach_msg_memregs_init(obufp->msgdata.dm_uc.mem_regs);
3584 	mutex_exit(&drmach_slice_table_lock);
3585 	(void) drmach_mbox_trans(DRMSG_UNCONFIG, cr->s_mp->dev.bp->bnum,
3586 		(caddr_t)obufp, sizeof (dr_mbox_msg_t), (caddr_t)NULL, 0);
3587 	kmem_free(obufp, sizeof (dr_mbox_msg_t));
3588 
3589 done:
3590 	vmem_free(static_alloc_arena, cr, sizeof (drmach_copy_rename_t));
3591 
3592 	DRMACH_PR("waited %d out of %d tries for drmach_rename_wait on %d cpus",
3593 		drmach_rename_ntries, drmach_cpu_ntries, drmach_rename_count);
3594 
3595 	return (err);
3596 }
3597 
3598 int drmach_slow_copy = 0;
3599 
3600 void
3601 drmach_copy_rename(drmachid_t id)
3602 {
3603 	extern uint_t		 getpstate(void);
3604 	extern void		 setpstate(uint_t);
3605 
3606 	extern xcfunc_t		 drmach_rename_wait;
3607 	extern xcfunc_t		 drmach_rename_done;
3608 	extern xcfunc_t		 drmach_rename_abort;
3609 
3610 	drmach_copy_rename_t	*cr = id;
3611 	uint64_t		 neer;
3612 	struct memlist		*ml;
3613 	int			 i, count;
3614 	int			 csize, lnsize;
3615 	uint64_t		 caddr;
3616 	cpuset_t		 cpuset;
3617 	uint_t			 pstate;
3618 	uint32_t		 exp = 0;
3619 	on_trap_data_t		 otd;
3620 	xcfunc_t		*drmach_end_wait_xcall = drmach_rename_done;
3621 
3622 	ASSERT(cr->isa == (void *)drmach_copy_rename_init);
3623 	ASSERT(MUTEX_HELD(&cpu_lock));
3624 	ASSERT(cr->ecode == DRMACH_CR_OK);
3625 
3626 	/*
3627 	 * Prevent slot1 IO from accessing Safari memory bus.
3628 	 */
3629 	if (axq_iopause_enable_all(&exp) != DDI_SUCCESS) {
3630 		ASSERT(exp >= 0 && exp < AXQ_MAX_EXP);
3631 		cr->ecode = DRMACH_CR_IOPAUSE_ERR;
3632 		cr->earg = (void *)(uintptr_t)exp;
3633 		return;
3634 	}
3635 
3636 	cpuset = cpu_ready_set;
3637 	CPUSET_DEL(cpuset, CPU->cpu_id);
3638 	count = ncpus - 1;
3639 	drmach_rename_count = count;	/* for debug */
3640 
3641 	drmach_xt_ready = 0;
3642 	xt_some(cpuset, drmach_rename_wait, NULL, NULL);
3643 
3644 	for (i = 0; i < drmach_cpu_ntries; i++) {
3645 		if (drmach_xt_ready == count)
3646 			break;
3647 		DELAY(drmach_cpu_delay);
3648 	}
3649 
3650 	drmach_rename_ntries = i;	/* for debug */
3651 
3652 	drmach_xt_ready = 0;		/* steal the line back */
3653 	for (i = 0; i < NCPU; i++)	/* steal the line back, preserve data */
3654 		drmach_xt_mb[i] = drmach_xt_mb[i];
3655 
3656 	caddr = drmach_iocage_paddr;
3657 	csize = cpunodes[CPU->cpu_id].ecache_size;
3658 	lnsize = cpunodes[CPU->cpu_id].ecache_linesize;
3659 
3660 	/* disable CE reporting */
3661 	neer = get_error_enable();
3662 	set_error_enable(neer & ~EN_REG_CEEN);
3663 
3664 	/* disable interrupts (paranoia) */
3665 	pstate = getpstate();
3666 	setpstate(pstate & ~PSTATE_IE);
3667 
3668 	/*
3669 	 * Execute copy-rename under on_trap to protect against a panic due
3670 	 * to an uncorrectable error. Instead, DR will abort the copy-rename
3671 	 * operation and rely on the OS to do the error reporting.
3672 	 *
3673 	 * In general, trap handling on any cpu once the copy begins
3674 	 * can result in an inconsistent memory image on the target.
3675 	 */
3676 	if (on_trap(&otd, OT_DATA_EC)) {
3677 		cr->ecode = DRMACH_CR_ONTRAP_ERR;
3678 		goto copy_rename_end;
3679 	}
3680 
3681 	/*
3682 	 * DO COPY.
3683 	 */
3684 	for (ml = cr->c_ml; ml; ml = ml->next) {
3685 		uint64_t	s_pa, t_pa;
3686 		uint64_t	nbytes;
3687 
3688 		s_pa = cr->s_copybasepa + ml->address;
3689 		t_pa = cr->t_copybasepa + ml->address;
3690 		nbytes = ml->size;
3691 
3692 		while (nbytes != 0ull) {
3693 			/* copy 32 bytes at src_pa to dst_pa */
3694 			bcopy32_il(s_pa, t_pa);
3695 
3696 			/* increment by 32 bytes */
3697 			s_pa += (4 * sizeof (uint64_t));
3698 			t_pa += (4 * sizeof (uint64_t));
3699 
3700 			/* decrement by 32 bytes */
3701 			nbytes -= (4 * sizeof (uint64_t));
3702 
3703 			if (drmach_slow_copy) {	/* for debug */
3704 				uint64_t i = 13 * 50;
3705 				while (i--);
3706 			}
3707 		}
3708 	}
3709 
3710 	/*
3711 	 * XXX CHEETAH SUPPORT
3712 	 * For cheetah, we need to grab the iocage lock since iocage
3713 	 * memory is used for e$ flush.
3714 	 *
3715 	 * NOTE: This code block is dangerous at this point in the
3716 	 * copy-rename operation. It modifies memory after the copy
3717 	 * has taken place which means that any persistent state will
3718 	 * be abandoned after the rename operation. The code is also
3719 	 * performing thread synchronization at a time when all but
3720 	 * one processors are paused. This is a potential deadlock
3721 	 * situation.
3722 	 *
3723 	 * This code block must be moved to drmach_copy_rename_init.
3724 	 */
3725 	if (drmach_is_cheetah) {
3726 		mutex_enter(&drmach_iocage_lock);
3727 		while (drmach_iocage_is_busy)
3728 			cv_wait(&drmach_iocage_cv, &drmach_iocage_lock);
3729 		drmach_iocage_is_busy = 1;
3730 		drmach_iocage_mem_scrub(ecache_size * 2);
3731 		mutex_exit(&drmach_iocage_lock);
3732 	}
3733 
3734 	/*
3735 	 * bcopy32_il is implemented as a series of ldxa/stxa via
3736 	 * ASI_MEM instructions. Following the copy loop, the E$
3737 	 * of the master (this) processor will have lines in state
3738 	 * O that correspond to lines of home memory in state gI.
3739 	 * An E$ flush is necessary to commit these lines before
3740 	 * proceeding with the rename operation.
3741 	 *
3742 	 * Flushing the E$ will automatically flush the W$, but
3743 	 * the D$ and I$ must be flushed separately and explicitly.
3744 	 */
3745 	flush_ecache_il(caddr, csize, lnsize);	/* inline version */
3746 
3747 	/*
3748 	 * Each line of home memory is now in state gM, except in
3749 	 * the case of a cheetah processor when the E$ flush area
3750 	 * is included within the copied region. In such a case,
3751 	 * the lines of home memory for the upper half of the
3752 	 * flush area are in state gS.
3753 	 *
3754 	 * Each line of target memory is in state gM.
3755 	 *
3756 	 * Each line of this processor's E$ is in state I, except
3757 	 * those of a cheetah processor. All lines of a cheetah
3758 	 * processor's E$ are in state S and correspond to the lines
3759 	 * in upper half of the E$ flush area.
3760 	 *
3761 	 * It is vital at this point that none of the lines in the
3762 	 * home or target memories are in state gI and that none
3763 	 * of the lines in this processor's E$ are in state O or Os.
3764 	 * A single instance of such a condition will cause loss of
3765 	 * coherency following the rename operation.
3766 	 */
3767 
3768 	/*
3769 	 * Rename
3770 	 */
3771 	(*(void(*)())drmach_cpu_sram_va)(cr->data, &cr->ecode, &cr->earg);
3772 
3773 	/*
3774 	 * Rename operation complete. The physical address space
3775 	 * of the home and target memories have been swapped, the
3776 	 * routing data in the respective CASM entries have been
3777 	 * swapped, and LPA settings in the processor and schizo
3778 	 * devices have been reprogrammed accordingly.
3779 	 *
3780 	 * In the case of a cheetah processor, the E$ remains
3781 	 * populated with lines in state S that correspond to the
3782 	 * lines in the former home memory. Now that the physical
3783 	 * addresses have been swapped, these E$ lines correspond
3784 	 * to lines in the new home memory which are in state gM.
3785 	 * This combination is invalid. An additional E$ flush is
3786 	 * necessary to restore coherency. The E$ flush will cause
3787 	 * the lines of the new home memory for the flush region
3788 	 * to transition from state gM to gS. The former home memory
3789 	 * remains unmodified. This additional E$ flush has no effect
3790 	 * on a cheetah+ processor.
3791 	 */
3792 	flush_ecache_il(caddr, csize, lnsize);	/* inline version */
3793 
3794 	/*
3795 	 * The D$ and I$ must be flushed to ensure that coherency is
3796 	 * maintained. Any line in a cache that is in the valid
3797 	 * state has its corresponding line of the new home memory
3798 	 * in the gM state. This is an invalid condition. When the
3799 	 * flushes are complete the cache line states will be
3800 	 * resynchronized with those in the new home memory.
3801 	 */
3802 	flush_icache_il();			/* inline version */
3803 	flush_dcache_il();			/* inline version */
3804 	flush_pcache_il();			/* inline version */
3805 
3806 copy_rename_end:
3807 
3808 	no_trap();
3809 
3810 	/* enable interrupts */
3811 	setpstate(pstate);
3812 
3813 	/* enable CE reporting */
3814 	set_error_enable(neer);
3815 
3816 	if (cr->ecode != DRMACH_CR_OK)
3817 		drmach_end_wait_xcall = drmach_rename_abort;
3818 
3819 	/*
3820 	 * XXX CHEETAH SUPPORT
3821 	 */
3822 	if (drmach_is_cheetah) {
3823 		mutex_enter(&drmach_iocage_lock);
3824 		drmach_iocage_mem_scrub(ecache_size * 2);
3825 		drmach_iocage_is_busy = 0;
3826 		cv_signal(&drmach_iocage_cv);
3827 		mutex_exit(&drmach_iocage_lock);
3828 	}
3829 
3830 	axq_iopause_disable_all();
3831 
3832 	xt_some(cpuset, drmach_end_wait_xcall, NULL, NULL);
3833 }
3834 
3835 static void drmach_io_dispose(drmachid_t);
3836 static sbd_error_t *drmach_io_release(drmachid_t);
3837 static sbd_error_t *drmach_io_status(drmachid_t, drmach_status_t *);
3838 
3839 static sbd_error_t *
3840 drmach_pci_new(drmach_device_t *proto, drmachid_t *idp)
3841 {
3842 	drmach_node_t	*node = proto->node;
3843 	sbd_error_t	*err;
3844 	drmach_reg_t	 regs[3];
3845 	int		 rv;
3846 	int		 len = 0;
3847 
3848 	rv = node->n_getproplen(node, "reg", &len);
3849 	if (rv != 0 || len != sizeof (regs)) {
3850 		sbd_error_t *err;
3851 
3852 		/* pci nodes are expected to have regs */
3853 		err = drerr_new(1, ESTC_GETPROP,
3854 			"Device Node 0x%x: property %s",
3855 			(uint_t)node->get_dnode(node), "reg");
3856 		return (err);
3857 	}
3858 
3859 	rv = node->n_getprop(node, "reg", (void *)regs, sizeof (regs));
3860 	if (rv) {
3861 		sbd_error_t *err;
3862 
3863 		err = drerr_new(1, ESTC_GETPROP,
3864 			"Device Node 0x%x: property %s",
3865 			(uint_t)node->get_dnode(node), "reg");
3866 
3867 		return (err);
3868 	}
3869 
3870 	/*
3871 	 * Fix up unit number so that Leaf A has a lower unit number
3872 	 * than Leaf B.
3873 	 */
3874 	if ((proto->portid % 2) != 0) {
3875 		if ((regs[0].reg_addr_lo & 0x700000) == 0x700000)
3876 			proto->unum = 0;
3877 		else
3878 			proto->unum = 1;
3879 	} else {
3880 		if ((regs[0].reg_addr_lo & 0x700000) == 0x700000)
3881 			proto->unum = 2;
3882 		else
3883 			proto->unum = 3;
3884 	}
3885 
3886 	err = drmach_io_new(proto, idp);
3887 	if (err == NULL) {
3888 		drmach_io_t *self = *idp;
3889 
3890 		/* reassemble 64-bit base address */
3891 		self->scsr_pa  = (uint64_t)regs[1].reg_addr_hi << 32;
3892 		self->scsr_pa |= (uint64_t)regs[1].reg_addr_lo;
3893 	}
3894 
3895 	return (err);
3896 }
3897 
3898 static sbd_error_t *
3899 drmach_io_new(drmach_device_t *proto, drmachid_t *idp)
3900 {
3901 	drmach_io_t	*ip;
3902 
3903 	ip = kmem_zalloc(sizeof (drmach_io_t), KM_SLEEP);
3904 	bcopy(proto, &ip->dev, sizeof (ip->dev));
3905 	ip->dev.node = drmach_node_dup(proto->node);
3906 	ip->dev.cm.isa = (void *)drmach_io_new;
3907 	ip->dev.cm.dispose = drmach_io_dispose;
3908 	ip->dev.cm.release = drmach_io_release;
3909 	ip->dev.cm.status = drmach_io_status;
3910 
3911 	snprintf(ip->dev.cm.name, sizeof (ip->dev.cm.name), "%s%d",
3912 		ip->dev.type, ip->dev.unum);
3913 
3914 	*idp = (drmachid_t)ip;
3915 	return (NULL);
3916 }
3917 
3918 static void
3919 drmach_io_dispose(drmachid_t id)
3920 {
3921 	drmach_io_t *self;
3922 
3923 	ASSERT(DRMACH_IS_IO_ID(id));
3924 
3925 	self = id;
3926 	if (self->dev.node)
3927 		drmach_node_dispose(self->dev.node);
3928 
3929 	kmem_free(self, sizeof (*self));
3930 }
3931 
3932 /*ARGSUSED*/
3933 sbd_error_t *
3934 drmach_pre_op(int cmd, drmachid_t id, drmach_opts_t *opts)
3935 {
3936 	drmach_board_t	*bp = (drmach_board_t *)id;
3937 	sbd_error_t	*err = NULL;
3938 
3939 	if (id && DRMACH_IS_BOARD_ID(id)) {
3940 		switch (cmd) {
3941 			case SBD_CMD_TEST:
3942 			case SBD_CMD_STATUS:
3943 			case SBD_CMD_GETNCM:
3944 				break;
3945 			case SBD_CMD_CONNECT:
3946 				if (bp->connected)
3947 					err = drerr_new(0, ESBD_STATE, NULL);
3948 
3949 				if (bp->cond == SBD_COND_UNUSABLE)
3950 					err = drerr_new(0,
3951 						ESBD_FATAL_STATE, NULL);
3952 				break;
3953 			case SBD_CMD_DISCONNECT:
3954 				if (!bp->connected)
3955 					err = drerr_new(0, ESBD_STATE, NULL);
3956 
3957 				if (bp->cond == SBD_COND_UNUSABLE)
3958 					err = drerr_new(0,
3959 						ESBD_FATAL_STATE, NULL);
3960 				break;
3961 			default:
3962 				if (bp->cond == SBD_COND_UNUSABLE)
3963 					err = drerr_new(0,
3964 						ESBD_FATAL_STATE, NULL);
3965 				break;
3966 
3967 		}
3968 	}
3969 
3970 	return (err);
3971 }
3972 
3973 /*ARGSUSED*/
3974 sbd_error_t *
3975 drmach_post_op(int cmd, drmachid_t id, drmach_opts_t *opts)
3976 {
3977 	return (NULL);
3978 }
3979 
3980 sbd_error_t *
3981 drmach_board_assign(int bnum, drmachid_t *id)
3982 {
3983 	sbd_error_t	*err = NULL;
3984 	caddr_t		obufp;
3985 
3986 	if (!drmach_initialized && drmach_init() == -1) {
3987 		err = DRMACH_INTERNAL_ERROR();
3988 	}
3989 
3990 	rw_enter(&drmach_boards_rwlock, RW_WRITER);
3991 
3992 	if (!err) {
3993 		if (drmach_array_get(drmach_boards, bnum, id) == -1) {
3994 			err = drerr_new(0, ESTC_BNUM, "%d", bnum);
3995 		} else {
3996 			drmach_board_t	*bp;
3997 
3998 			if (*id)
3999 				rw_downgrade(&drmach_boards_rwlock);
4000 
4001 			obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
4002 			err = drmach_mbox_trans(DRMSG_ASSIGN, bnum, obufp,
4003 				sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
4004 			kmem_free(obufp, sizeof (dr_proto_hdr_t));
4005 
4006 			if (!err) {
4007 				bp = *id;
4008 				if (!*id)
4009 					bp = *id  =
4010 					    (drmachid_t)drmach_board_new(bnum);
4011 				bp->assigned = 1;
4012 			}
4013 		}
4014 	}
4015 	rw_exit(&drmach_boards_rwlock);
4016 	return (err);
4017 }
4018 
4019 static uint_t
4020 drmach_board_non_panther_cpus(gdcd_t *gdcd, uint_t exp, uint_t slot)
4021 {
4022 	uint_t	port, port_start, port_end;
4023 	uint_t	non_panther_cpus = 0;
4024 	uint_t	impl;
4025 
4026 	ASSERT(gdcd != NULL);
4027 
4028 	/*
4029 	 * Determine PRD port indices based on slot location.
4030 	 */
4031 	switch (slot) {
4032 	case 0:
4033 		port_start = 0;
4034 		port_end = 3;
4035 		break;
4036 	case 1:
4037 		port_start = 4;
4038 		port_end = 5;
4039 		break;
4040 	default:
4041 		ASSERT(0);
4042 		/* check all */
4043 		port_start = 0;
4044 		port_end = 5;
4045 		break;
4046 	}
4047 
4048 	for (port = port_start; port <= port_end; port++) {
4049 		if (gdcd->dcd_prd[exp][port].prd_ptype == SAFPTYPE_CPU &&
4050 		    RSV_GOOD(gdcd->dcd_prd[exp][port].prd_prsv)) {
4051 			/*
4052 			 * This Safari port passed POST and represents a
4053 			 * cpu, so check the implementation.
4054 			 */
4055 			impl = (gdcd->dcd_prd[exp][port].prd_ver_reg >> 32)
4056 			    & 0xffff;
4057 
4058 			switch (impl) {
4059 			case CHEETAH_IMPL:
4060 			case CHEETAH_PLUS_IMPL:
4061 			case JAGUAR_IMPL:
4062 				non_panther_cpus++;
4063 				break;
4064 			case PANTHER_IMPL:
4065 				break;
4066 			default:
4067 				ASSERT(0);
4068 				non_panther_cpus++;
4069 				break;
4070 			}
4071 		}
4072 	}
4073 
4074 	DRMACH_PR("drmach_board_non_panther_cpus: exp=%d, slot=%d, "
4075 	    "non_panther_cpus=%d", exp, slot, non_panther_cpus);
4076 
4077 	return (non_panther_cpus);
4078 }
4079 
4080 sbd_error_t *
4081 drmach_board_connect(drmachid_t id, drmach_opts_t *opts)
4082 {
4083 	_NOTE(ARGUNUSED(opts))
4084 
4085 	drmach_board_t		*bp = (drmach_board_t *)id;
4086 	sbd_error_t		*err;
4087 	dr_mbox_msg_t		*obufp;
4088 	gdcd_t			*gdcd = NULL;
4089 	uint_t			exp, slot;
4090 	sc_gptwocfg_cookie_t	scc;
4091 	int			panther_pages_enabled;
4092 
4093 	if (!DRMACH_IS_BOARD_ID(id))
4094 		return (drerr_new(0, ESTC_INAPPROP, NULL));
4095 
4096 	/*
4097 	 * Build the casm info portion of the CLAIM message.
4098 	 */
4099 	obufp = kmem_zalloc(sizeof (dr_mbox_msg_t), KM_SLEEP);
4100 	mutex_enter(&drmach_slice_table_lock);
4101 	drmach_msg_memslice_init(obufp->msgdata.dm_cr.mem_slice);
4102 	drmach_msg_memregs_init(obufp->msgdata.dm_cr.mem_regs);
4103 	mutex_exit(&drmach_slice_table_lock);
4104 	err = drmach_mbox_trans(DRMSG_CLAIM, bp->bnum, (caddr_t)obufp,
4105 		sizeof (dr_mbox_msg_t), (caddr_t)NULL, 0);
4106 	kmem_free(obufp, sizeof (dr_mbox_msg_t));
4107 
4108 	if (err) {
4109 		/*
4110 		 * if mailbox timeout or unrecoverable error from SC,
4111 		 * board cannot be touched.  Mark the status as
4112 		 * unusable.
4113 		 */
4114 		if ((err->e_code == ESTC_SMS_ERR_UNRECOVERABLE) ||
4115 			(err->e_code == ESTC_MBXRPLY))
4116 				bp->cond = SBD_COND_UNUSABLE;
4117 		return (err);
4118 	}
4119 
4120 	gdcd = drmach_gdcd_new();
4121 	if (gdcd == NULL) {
4122 		cmn_err(CE_WARN, "failed to read GDCD info for %s\n",
4123 		    bp->cm.name);
4124 		return (DRMACH_INTERNAL_ERROR());
4125 	}
4126 
4127 	/*
4128 	 * Read CPU SRAM DR buffer offset from GDCD.
4129 	 */
4130 	exp = DRMACH_BNUM2EXP(bp->bnum);
4131 	slot = DRMACH_BNUM2SLOT(bp->bnum);
4132 	bp->stardrb_offset =
4133 	    gdcd->dcd_slot[exp][slot].l1ss_cpu_drblock_xwd_offset << 3;
4134 	DRMACH_PR("%s: stardrb_offset=0x%lx\n", bp->cm.name,
4135 	    bp->stardrb_offset);
4136 
4137 	/*
4138 	 * Read board LPA setting from GDCD.
4139 	 */
4140 	bp->flags &= ~DRMACH_NULL_PROC_LPA;
4141 	if (gdcd->dcd_slot[exp][slot].l1ss_flags &
4142 	    L1SSFLG_THIS_L1_NULL_PROC_LPA) {
4143 		bp->flags |= DRMACH_NULL_PROC_LPA;
4144 		DRMACH_PR("%s: NULL proc LPA\n", bp->cm.name);
4145 	}
4146 
4147 	/*
4148 	 * XXX Until the Solaris large pages support heterogeneous cpu
4149 	 * domains, DR needs to prevent the addition of non-Panther cpus
4150 	 * to an all-Panther domain with large pages enabled.
4151 	 */
4152 	panther_pages_enabled = (page_num_pagesizes() > DEFAULT_MMU_PAGE_SIZES);
4153 	if (drmach_board_non_panther_cpus(gdcd, exp, slot) > 0 &&
4154 	    panther_pages_enabled && drmach_large_page_restriction) {
4155 		cmn_err(CE_WARN, "Domain shutdown is required to add a non-"
4156 		    "UltraSPARC-IV+ board into an all UltraSPARC-IV+ domain");
4157 		err = drerr_new(0, ESTC_SUPPORT, NULL);
4158 	}
4159 
4160 	if (err == NULL) {
4161 		/* do saf configurator stuff */
4162 		DRMACH_PR("calling sc_probe_board for bnum=%d\n", bp->bnum);
4163 		scc = sc_probe_board(bp->bnum);
4164 		if (scc == NULL)
4165 			err = drerr_new(0, ESTC_PROBE, bp->cm.name);
4166 	}
4167 
4168 	if (err) {
4169 		/* flush CDC srams */
4170 		if (axq_cdc_flush_all() != DDI_SUCCESS) {
4171 			goto out;
4172 		}
4173 
4174 		/*
4175 		 * Build the casm info portion of the UNCLAIM message.
4176 		 */
4177 		obufp = kmem_zalloc(sizeof (dr_mbox_msg_t), KM_SLEEP);
4178 		mutex_enter(&drmach_slice_table_lock);
4179 		drmach_msg_memslice_init(obufp->msgdata.dm_ur.mem_slice);
4180 		drmach_msg_memregs_init(obufp->msgdata.dm_ur.mem_regs);
4181 		mutex_exit(&drmach_slice_table_lock);
4182 		(void) drmach_mbox_trans(DRMSG_UNCLAIM, bp->bnum,
4183 			(caddr_t)obufp, sizeof (dr_mbox_msg_t),
4184 			(caddr_t)NULL, 0);
4185 
4186 		kmem_free(obufp, sizeof (dr_mbox_msg_t));
4187 
4188 		/*
4189 		 * we clear the connected flag just in case it would have
4190 		 * been set by a concurrent drmach_board_status() thread
4191 		 * before the UNCLAIM completed.
4192 		 */
4193 		bp->connected = 0;
4194 		goto out;
4195 	}
4196 
4197 	/*
4198 	 * Now that the board has been successfully attached, obtain
4199 	 * platform-specific DIMM serial id information for the board.
4200 	 */
4201 	if ((DRMACH_BNUM2SLOT(bp->bnum) == 0) &&
4202 	    plat_ecc_capability_sc_get(PLAT_ECC_DIMM_SID_MESSAGE)) {
4203 		(void) plat_request_mem_sids(DRMACH_BNUM2EXP(bp->bnum));
4204 	}
4205 
4206 out:
4207 	if (gdcd != NULL)
4208 		drmach_gdcd_dispose(gdcd);
4209 
4210 	return (err);
4211 }
4212 
4213 static void
4214 drmach_slice_table_update(drmach_board_t *bp, int invalidate)
4215 {
4216 	static char		*axq_name = "address-extender-queue";
4217 	static dev_info_t	*axq_dip = NULL;
4218 	static int		 axq_exp = -1;
4219 	static int		 axq_slot;
4220 	int			 e, s, slice;
4221 
4222 	ASSERT(MUTEX_HELD(&drmach_slice_table_lock));
4223 
4224 	e = DRMACH_BNUM2EXP(bp->bnum);
4225 	if (invalidate) {
4226 		ASSERT(DRMACH_BNUM2SLOT(bp->bnum) == 0);
4227 
4228 		/* invalidate cached casm value */
4229 		drmach_slice_table[e] = 0;
4230 
4231 		/* invalidate cached axq info if for same exp */
4232 		if (e == axq_exp && axq_dip) {
4233 			ndi_rele_devi(axq_dip);
4234 			axq_dip = NULL;
4235 		}
4236 	}
4237 
4238 	if (axq_dip == NULL || !i_ddi_devi_attached(axq_dip)) {
4239 		int i, portid;
4240 
4241 		/* search for an attached slot0 axq instance */
4242 		for (i = 0; i < AXQ_MAX_EXP * AXQ_MAX_SLOT_PER_EXP; i++) {
4243 			if (axq_dip)
4244 				ndi_rele_devi(axq_dip);
4245 			axq_dip = ddi_find_devinfo(axq_name, i, 0);
4246 			if (axq_dip && DDI_CF2(axq_dip)) {
4247 				portid = ddi_getprop(DDI_DEV_T_ANY, axq_dip,
4248 				    DDI_PROP_DONTPASS, "portid", -1);
4249 				if (portid == -1) {
4250 					DRMACH_PR("cant get portid of axq "
4251 					    "instance %d\n", i);
4252 					continue;
4253 				}
4254 
4255 				axq_exp = (portid >> 5) & 0x1f;
4256 				axq_slot = portid & 1;
4257 
4258 				if (invalidate && axq_exp == e)
4259 					continue;
4260 
4261 				if (axq_slot == 0)
4262 					break;	/* found */
4263 			}
4264 		}
4265 
4266 		if (i == AXQ_MAX_EXP * AXQ_MAX_SLOT_PER_EXP) {
4267 			if (axq_dip) {
4268 				ndi_rele_devi(axq_dip);
4269 				axq_dip = NULL;
4270 			}
4271 			DRMACH_PR("drmach_slice_table_update: failed to "
4272 			    "update axq dip\n");
4273 			return;
4274 		}
4275 
4276 	}
4277 
4278 	ASSERT(axq_dip);
4279 	ASSERT(axq_slot == 0);
4280 
4281 	if (invalidate)
4282 		return;
4283 
4284 	s = DRMACH_BNUM2SLOT(bp->bnum);
4285 	DRMACH_PR("using AXQ casm %d.%d for slot%d.%d\n",
4286 		axq_exp, axq_slot, e, s);
4287 
4288 	/* invalidate entry */
4289 	drmach_slice_table[e] &= ~0x20;
4290 
4291 	/*
4292 	 * find a slice that routes to expander e. If no match
4293 	 * is found, drmach_slice_table[e] will remain invalid.
4294 	 *
4295 	 * The CASM is a routing table indexed by slice number.
4296 	 * Each element in the table contains permission bits,
4297 	 * a destination expander number and a valid bit. The
4298 	 * valid bit must true for the element to be meaningful.
4299 	 *
4300 	 * CASM entry structure
4301 	 *   Bits 15..6 ignored
4302 	 *   Bit  5	valid
4303 	 *   Bits 0..4	expander number
4304 	 *
4305 	 * NOTE: the for loop is really enumerating the range of slices,
4306 	 * which is ALWAYS equal to the range of expanders. Hence,
4307 	 * AXQ_MAX_EXP is okay to use in this loop.
4308 	 */
4309 	for (slice = 0; slice < AXQ_MAX_EXP; slice++) {
4310 		uint32_t casm = axq_casm_read(axq_exp, axq_slot, slice);
4311 
4312 		if ((casm & 0x20) && (casm & 0x1f) == e)
4313 			drmach_slice_table[e] = 0x20 | slice;
4314 	}
4315 }
4316 
4317 /*
4318  * Get base and bound PAs for slot 1 board lpa programming
4319  * If a cpu/mem board is present in the same expander, use slice
4320  * information corresponding to the CASM.  Otherwise, set base and
4321  * bound PAs to 0.
4322  */
4323 static void
4324 drmach_lpa_bb_get(drmach_board_t *s1bp, uint64_t *basep, uint64_t *boundp)
4325 {
4326 	drmachid_t s0id;
4327 
4328 	ASSERT(mutex_owned(&drmach_slice_table_lock));
4329 	ASSERT(DRMACH_BNUM2SLOT(s1bp->bnum) == 1);
4330 
4331 	*basep = *boundp = 0;
4332 	if (drmach_array_get(drmach_boards, s1bp->bnum - 1, &s0id) == 0 &&
4333 		s0id != 0) {
4334 
4335 		uint32_t slice;
4336 		if ((slice =
4337 			drmach_slice_table[DRMACH_BNUM2EXP(s1bp->bnum)])
4338 				& 0x20) {
4339 
4340 			*basep = DRMACH_SLICE_TO_PA(slice & DRMACH_SLICE_MASK);
4341 			*boundp = *basep + DRMACH_MEM_SLICE_SIZE;
4342 		}
4343 	}
4344 }
4345 
4346 
4347 /*
4348  * Reprogram slot 1 lpa's as required.
4349  * The purpose of this routine is maintain the LPA settings of the devices
4350  * in slot 1. To date we know Schizo and Cheetah are the only devices that
4351  * require this attention. The LPA setting must match the slice field in the
4352  * CASM element for the local expander. This field is guaranteed to be
4353  * programmed in accordance with the cacheable address space on the slot 0
4354  * board of the local expander. If no memory is present on the slot 0 board,
4355  * there is no cacheable address space and, hence, the CASM slice field will
4356  * be zero or its valid bit will be false (or both).
4357  */
4358 
4359 static void
4360 drmach_slot1_lpa_set(drmach_board_t *bp)
4361 {
4362 	drmachid_t	id;
4363 	drmach_board_t	*s1bp = NULL;
4364 	int		rv, idx, is_maxcat = 1;
4365 	uint64_t	last_scsr_pa = 0;
4366 	uint64_t	new_basepa, new_boundpa;
4367 
4368 	if (DRMACH_BNUM2SLOT(bp->bnum)) {
4369 		s1bp = bp;
4370 		if (s1bp->devices == NULL) {
4371 			DRMACH_PR("drmach...lpa_set: slot1=%d not present",
4372 				bp->bnum);
4373 			return;
4374 		}
4375 	} else {
4376 		rv = drmach_array_get(drmach_boards, bp->bnum + 1, &id);
4377 		/* nothing to do when board is not found or has no devices */
4378 		s1bp = id;
4379 		if (rv == -1 || s1bp == NULL || s1bp->devices == NULL) {
4380 			DRMACH_PR("drmach...lpa_set: slot1=%d not present",
4381 				bp->bnum + 1);
4382 			return;
4383 		}
4384 		ASSERT(DRMACH_IS_BOARD_ID(id));
4385 	}
4386 	mutex_enter(&drmach_slice_table_lock);
4387 	drmach_lpa_bb_get(s1bp, &new_basepa, &new_boundpa);
4388 	DRMACH_PR("drmach_...lpa_set: bnum=%d base=0x%lx bound=0x%lx\n",
4389 			s1bp->bnum, new_basepa, new_boundpa);
4390 
4391 	rv = drmach_array_first(s1bp->devices, &idx, &id);
4392 	while (rv == 0) {
4393 		if (DRMACH_IS_IO_ID(id)) {
4394 			drmach_io_t *io = id;
4395 
4396 			is_maxcat = 0;
4397 
4398 			/*
4399 			 * Skip all non-Schizo IO devices (only IO nodes
4400 			 * that are Schizo devices have non-zero scsr_pa).
4401 			 * Filter out "other" leaf to avoid writing to the
4402 			 * same Schizo Control/Status Register twice.
4403 			 */
4404 			if (io->scsr_pa && io->scsr_pa != last_scsr_pa) {
4405 				uint64_t scsr;
4406 
4407 				scsr  = lddphysio(io->scsr_pa);
4408 				DRMACH_PR("drmach...lpa_set: old scsr=0x%lx\n",
4409 					scsr);
4410 				scsr &= ~(DRMACH_LPA_BASE_MASK |
4411 						DRMACH_LPA_BND_MASK);
4412 				scsr |= DRMACH_PA_TO_LPA_BASE(new_basepa);
4413 				scsr |= DRMACH_PA_TO_LPA_BND(new_boundpa);
4414 
4415 				stdphysio(io->scsr_pa, scsr);
4416 				DRMACH_PR("drmach...lpa_set: new scsr=0x%lx\n",
4417 					scsr);
4418 
4419 				last_scsr_pa = io->scsr_pa;
4420 			}
4421 		}
4422 		rv = drmach_array_next(s1bp->devices, &idx, &id);
4423 	}
4424 
4425 	if (is_maxcat && DRMACH_L1_SET_LPA(s1bp) && drmach_reprogram_lpa) {
4426 		extern xcfunc_t	drmach_set_lpa;
4427 
4428 		DRMACH_PR("reprogramming maxcat lpa's");
4429 
4430 		mutex_enter(&cpu_lock);
4431 		rv = drmach_array_first(s1bp->devices, &idx, &id);
4432 		while (rv == 0 && id != NULL) {
4433 			if (DRMACH_IS_CPU_ID(id)) {
4434 				int ntries;
4435 				processorid_t cpuid;
4436 
4437 				cpuid = ((drmach_cpu_t *)id)->cpuid;
4438 
4439 				/*
4440 				 * Check for unconfigured or powered-off
4441 				 * MCPUs.  If CPU_READY flag is clear, the
4442 				 * MCPU cannot be xcalled.
4443 				 */
4444 				if ((cpu[cpuid] == NULL) ||
4445 					(cpu[cpuid]->cpu_flags &
4446 					CPU_READY) == 0) {
4447 
4448 					rv = drmach_array_next(s1bp->devices,
4449 						&idx, &id);
4450 					continue;
4451 				}
4452 
4453 				/*
4454 				 * XXX CHEETAH SUPPORT
4455 				 * for cheetah, we need to clear iocage
4456 				 * memory since it will be used for e$ flush
4457 				 * in drmach_set_lpa.
4458 				 */
4459 				if (drmach_is_cheetah) {
4460 					mutex_enter(&drmach_iocage_lock);
4461 					while (drmach_iocage_is_busy)
4462 						cv_wait(&drmach_iocage_cv,
4463 							&drmach_iocage_lock);
4464 					drmach_iocage_is_busy = 1;
4465 					drmach_iocage_mem_scrub(
4466 						ecache_size * 2);
4467 					mutex_exit(&drmach_iocage_lock);
4468 				}
4469 
4470 				/*
4471 				 * drmach_slice_table[*]
4472 				 *	bit 5	valid
4473 				 *	bit 0:4	slice number
4474 				 *
4475 				 * drmach_xt_mb[*] format for drmach_set_lpa
4476 				 *	bit 7	valid
4477 				 *	bit 6	set null LPA
4478 				 *			(overrides bits 0:4)
4479 				 *	bit 0:4	slice number
4480 				 *
4481 				 * drmach_set_lpa derives processor CBASE and
4482 				 * CBND from bits 6 and 0:4 of drmach_xt_mb.
4483 				 * If bit 6 is set, then CBASE = CBND = 0.
4484 				 * Otherwise, CBASE = slice number;
4485 				 * CBND = slice number + 1.
4486 				 * No action is taken if bit 7 is zero.
4487 				 */
4488 
4489 				mutex_enter(&drmach_xt_mb_lock);
4490 				bzero((void *)drmach_xt_mb,
4491 				    drmach_xt_mb_size);
4492 
4493 				if (new_basepa == 0 && new_boundpa == 0)
4494 					drmach_xt_mb[cpuid] = 0x80 | 0x40;
4495 				else
4496 					drmach_xt_mb[cpuid] = 0x80 |
4497 						DRMACH_PA_TO_SLICE(new_basepa);
4498 
4499 				drmach_xt_ready = 0;
4500 
4501 				xt_one(cpuid, drmach_set_lpa, NULL, NULL);
4502 
4503 				ntries = drmach_cpu_ntries;
4504 				while (!drmach_xt_ready && ntries) {
4505 					DELAY(drmach_cpu_delay);
4506 					ntries--;
4507 				}
4508 				mutex_exit(&drmach_xt_mb_lock);
4509 				drmach_xt_ready = 0;
4510 
4511 				/*
4512 				 * XXX CHEETAH SUPPORT
4513 				 * for cheetah, we need to clear iocage
4514 				 * memory since it was used for e$ flush
4515 				 * in performed drmach_set_lpa.
4516 				 */
4517 				if (drmach_is_cheetah) {
4518 					mutex_enter(&drmach_iocage_lock);
4519 					drmach_iocage_mem_scrub(
4520 						ecache_size * 2);
4521 					drmach_iocage_is_busy = 0;
4522 					cv_signal(&drmach_iocage_cv);
4523 					mutex_exit(&drmach_iocage_lock);
4524 				}
4525 			}
4526 			rv = drmach_array_next(s1bp->devices, &idx, &id);
4527 		}
4528 		mutex_exit(&cpu_lock);
4529 	}
4530 	mutex_exit(&drmach_slice_table_lock);
4531 }
4532 
4533 /*
4534  * Return the number of connected Panther boards in the domain.
4535  */
4536 static int
4537 drmach_panther_boards(void)
4538 {
4539 	int		rv;
4540 	int		b_idx;
4541 	drmachid_t	b_id;
4542 	drmach_board_t	*bp;
4543 	int		npanther = 0;
4544 
4545 	rv = drmach_array_first(drmach_boards, &b_idx, &b_id);
4546 	while (rv == 0) {
4547 		ASSERT(DRMACH_IS_BOARD_ID(b_id));
4548 		bp = b_id;
4549 
4550 		if (IS_PANTHER(bp->cpu_impl))
4551 			npanther++;
4552 
4553 		rv = drmach_array_next(drmach_boards, &b_idx, &b_id);
4554 	}
4555 
4556 	return (npanther);
4557 }
4558 
4559 /*ARGSUSED*/
4560 sbd_error_t *
4561 drmach_board_disconnect(drmachid_t id, drmach_opts_t *opts)
4562 {
4563 	drmach_board_t	*bp;
4564 	dr_mbox_msg_t	*obufp;
4565 	sbd_error_t	*err = NULL;
4566 
4567 	sc_gptwocfg_cookie_t	scc;
4568 
4569 	if (!DRMACH_IS_BOARD_ID(id))
4570 		return (drerr_new(0, ESTC_INAPPROP, NULL));
4571 	bp = id;
4572 
4573 	/*
4574 	 * Build the casm info portion of the UNCLAIM message.
4575 	 * This must be done prior to calling for saf configurator
4576 	 * deprobe, to ensure that the associated axq instance
4577 	 * is not detached.
4578 	 */
4579 	obufp = kmem_zalloc(sizeof (dr_mbox_msg_t), KM_SLEEP);
4580 	mutex_enter(&drmach_slice_table_lock);
4581 	drmach_msg_memslice_init(obufp->msgdata.dm_ur.mem_slice);
4582 
4583 	/*
4584 	 * If disconnecting slot 0 board, update the casm slice table
4585 	 * info now, for use by drmach_slot1_lpa_set()
4586 	 */
4587 	if (DRMACH_BNUM2SLOT(bp->bnum) == 0)
4588 			drmach_slice_table_update(bp, 1);
4589 
4590 	drmach_msg_memregs_init(obufp->msgdata.dm_ur.mem_regs);
4591 	mutex_exit(&drmach_slice_table_lock);
4592 
4593 	/*
4594 	 * Update LPA information for slot1 board
4595 	 */
4596 	drmach_slot1_lpa_set(bp);
4597 
4598 	/* disable and flush CDC */
4599 	if (axq_cdc_disable_flush_all() != DDI_SUCCESS) {
4600 		axq_cdc_enable_all();	/* paranoia */
4601 		err = DRMACH_INTERNAL_ERROR();
4602 	}
4603 
4604 	/*
4605 	 * call saf configurator for deprobe
4606 	 * It's done now before sending an UNCLAIM message because
4607 	 * IKP will probe boards it doesn't know about <present at boot>
4608 	 * prior to unprobing them.  If this happens after sending the
4609 	 * UNCLAIM, it will cause a dstop for domain transgression error.
4610 	 */
4611 
4612 	if (!err) {
4613 		scc = sc_unprobe_board(bp->bnum);
4614 		axq_cdc_enable_all();
4615 		if (scc != NULL) {
4616 			err = drerr_new(0, ESTC_DEPROBE, bp->cm.name);
4617 		}
4618 	}
4619 
4620 	/*
4621 	 * If disconnecting a board from a Panther domain, wait a fixed-
4622 	 * time delay for pending Safari transactions to complete on the
4623 	 * disconnecting board's processors.  The bus sync list read used
4624 	 * in drmach_shutdown_asm to synchronize with outstanding Safari
4625 	 * transactions assumes no read-bypass-write mode for all memory
4626 	 * controllers.  Since Panther supports read-bypass-write, a
4627 	 * delay is used that is slightly larger than the maximum Safari
4628 	 * timeout value in the Safari/Fireplane Config Reg.
4629 	 */
4630 	if (drmach_panther_boards() > 0 || drmach_unclaim_delay_all) {
4631 		clock_t	stime = lbolt;
4632 
4633 		delay(drv_usectohz(drmach_unclaim_usec_delay));
4634 
4635 		stime = lbolt - stime;
4636 		DRMACH_PR("delayed %ld ticks (%ld secs) before disconnecting "
4637 		    "board %s from domain\n", stime, stime / hz, bp->cm.name);
4638 	}
4639 
4640 	if (!err) {
4641 		obufp->msgdata.dm_ur.mem_clear = 0;
4642 
4643 		err = drmach_mbox_trans(DRMSG_UNCLAIM, bp->bnum, (caddr_t)obufp,
4644 			sizeof (dr_mbox_msg_t), (caddr_t)NULL, 0);
4645 
4646 		if (err) {
4647 			/*
4648 			 * if mailbox timeout or unrecoverable error from SC,
4649 			 * board cannot be touched.  Mark the status as
4650 			 * unusable.
4651 			 */
4652 			if ((err->e_code == ESTC_SMS_ERR_UNRECOVERABLE) ||
4653 				(err->e_code == ESTC_MBXRPLY))
4654 					bp->cond = SBD_COND_UNUSABLE;
4655 			else {
4656 				DRMACH_PR("UNCLAIM failed for bnum=%d\n",
4657 					bp->bnum);
4658 				DRMACH_PR("calling sc_probe_board: bnum=%d\n",
4659 					bp->bnum);
4660 				scc = sc_probe_board(bp->bnum);
4661 				if (scc == NULL) {
4662 					cmn_err(CE_WARN,
4663 					"sc_probe_board failed for bnum=%d",
4664 						bp->bnum);
4665 				} else {
4666 					if (DRMACH_BNUM2SLOT(bp->bnum) == 0) {
4667 						mutex_enter(
4668 						    &drmach_slice_table_lock);
4669 						drmach_slice_table_update(bp,
4670 						    0);
4671 						mutex_exit(
4672 						    &drmach_slice_table_lock);
4673 					}
4674 					drmach_slot1_lpa_set(bp);
4675 				}
4676 			}
4677 		} else {
4678 			bp->connected = 0;
4679 			/*
4680 			 * Now that the board has been successfully detached,
4681 			 * discard platform-specific DIMM serial id information
4682 			 * for the board.
4683 			 */
4684 			if ((DRMACH_BNUM2SLOT(bp->bnum) == 0) &&
4685 			    plat_ecc_capability_sc_get(
4686 			    PLAT_ECC_DIMM_SID_MESSAGE)) {
4687 				(void) plat_discard_mem_sids(
4688 				    DRMACH_BNUM2EXP(bp->bnum));
4689 			}
4690 		}
4691 	}
4692 	kmem_free(obufp, sizeof (dr_mbox_msg_t));
4693 
4694 	return (err);
4695 }
4696 
4697 static int
4698 drmach_get_portid(drmach_node_t *np)
4699 {
4700 	drmach_node_t	pp;
4701 	int		portid;
4702 	char		type[OBP_MAXPROPNAME];
4703 
4704 	if (np->n_getprop(np, "portid", &portid, sizeof (portid)) == 0)
4705 		return (portid);
4706 
4707 	/*
4708 	 * Get the device_type property to see if we should
4709 	 * continue processing this node.
4710 	 */
4711 	if (np->n_getprop(np, "device_type", &type, sizeof (type)) != 0)
4712 		return (-1);
4713 
4714 	/*
4715 	 * If the device is a CPU without a 'portid' property,
4716 	 * it is a CMP core. For such cases, the parent node
4717 	 * has the portid.
4718 	 */
4719 	if (strcmp(type, DRMACH_CPU_NAMEPROP) == 0) {
4720 		if (np->get_parent(np, &pp) != 0)
4721 			return (-1);
4722 
4723 		if (pp.n_getprop(&pp, "portid", &portid, sizeof (portid)) == 0)
4724 			return (portid);
4725 	}
4726 
4727 	return (-1);
4728 }
4729 
4730 /*
4731  * This is a helper function to determine if a given
4732  * node should be considered for a dr operation according
4733  * to predefined dr type nodes and the node's name.
4734  * Formal Parameter : The name of a device node.
4735  * Return Value: -1, name does not map to a valid dr type.
4736  *		 A value greater or equal to 0, name is a valid dr type.
4737  */
4738 static int
4739 drmach_name2type_idx(char *name)
4740 {
4741 	int 	index, ntypes;
4742 
4743 	if (name == NULL)
4744 		return (-1);
4745 
4746 	/*
4747 	 * Determine how many possible types are currently supported
4748 	 * for dr.
4749 	 */
4750 	ntypes = sizeof (drmach_name2type) / sizeof (drmach_name2type[0]);
4751 
4752 	/* Determine if the node's name correspond to a predefined type. */
4753 	for (index = 0; index < ntypes; index++) {
4754 		if (strcmp(drmach_name2type[index].name, name) == 0)
4755 			/* The node is an allowed type for dr. */
4756 			return (index);
4757 	}
4758 
4759 	/*
4760 	 * If the name of the node does not map to any of the
4761 	 * types in the array drmach_name2type then the node is not of
4762 	 * interest to dr.
4763 	 */
4764 	return (-1);
4765 }
4766 
4767 static int
4768 drmach_board_find_devices_cb(drmach_node_walk_args_t *args)
4769 {
4770 	drmach_node_t			*node = args->node;
4771 	drmach_board_cb_data_t		*data = args->data;
4772 	drmach_board_t			*obj = data->obj;
4773 
4774 	int		rv, portid;
4775 	drmachid_t	id;
4776 	drmach_device_t	*device;
4777 	char	name[OBP_MAXDRVNAME];
4778 
4779 	portid = drmach_get_portid(node);
4780 	if (portid == -1) {
4781 		/*
4782 		 * if the node does not have a portid property, then
4783 		 * by that information alone it is known that drmach
4784 		 * is not interested in it.
4785 		 */
4786 		return (0);
4787 	}
4788 	rv = node->n_getprop(node, "name", name, OBP_MAXDRVNAME);
4789 
4790 	/* The node must have a name */
4791 	if (rv)
4792 		return (0);
4793 
4794 	/*
4795 	 * Ignore devices whose portid do not map to this board,
4796 	 * or that their name property is not mapped to a valid
4797 	 * dr device name.
4798 	 */
4799 	if ((drmach_portid2bnum(portid) != obj->bnum) ||
4800 	    (drmach_name2type_idx(name) < 0))
4801 		return (0);
4802 
4803 	/*
4804 	 * Create a device data structure from this node data.
4805 	 * The call may yield nothing if the node is not of interest
4806 	 * to drmach.
4807 	 */
4808 	data->err = drmach_device_new(node, obj, portid, &id);
4809 	if (data->err)
4810 		return (-1);
4811 	else if (!id) {
4812 		/*
4813 		 * drmach_device_new examined the node we passed in
4814 		 * and determined that it was either one not of
4815 		 * interest to drmach or the PIM dr layer.
4816 		 * So, it is skipped.
4817 		 */
4818 		return (0);
4819 	}
4820 
4821 	rv = drmach_array_set(obj->devices, data->ndevs++, id);
4822 	if (rv) {
4823 		data->err = DRMACH_INTERNAL_ERROR();
4824 		return (-1);
4825 	}
4826 
4827 	device = id;
4828 
4829 #ifdef DEBUG
4830 	DRMACH_PR("%d %s %d %p\n", portid, device->type, device->unum, id);
4831 	if (DRMACH_IS_IO_ID(id))
4832 		DRMACH_PR("ndevs = %d dip/node = %p", data->ndevs, node->here);
4833 #endif
4834 
4835 	data->err = (*data->found)(data->a, device->type, device->unum, id);
4836 	return (data->err == NULL ? 0 : -1);
4837 }
4838 
4839 sbd_error_t *
4840 drmach_board_find_devices(drmachid_t id, void *a,
4841 	sbd_error_t *(*found)(void *a, const char *, int, drmachid_t))
4842 {
4843 	drmach_board_t		*bp = (drmach_board_t *)id;
4844 	sbd_error_t		*err;
4845 	int			 max_devices;
4846 	int			 rv;
4847 	drmach_board_cb_data_t	data;
4848 
4849 	if (!DRMACH_IS_BOARD_ID(id))
4850 		return (drerr_new(0, ESTC_INAPPROP, NULL));
4851 
4852 	max_devices  = plat_max_cpu_units_per_board();
4853 	max_devices += plat_max_mem_units_per_board();
4854 	max_devices += plat_max_io_units_per_board();
4855 
4856 	bp->devices = drmach_array_new(0, max_devices);
4857 
4858 	if (bp->tree == NULL)
4859 		bp->tree = drmach_node_new();
4860 
4861 	data.obj = bp;
4862 	data.ndevs = 0;
4863 	data.found = found;
4864 	data.a = a;
4865 	data.err = NULL;
4866 
4867 	mutex_enter(&drmach_slice_table_lock);
4868 	mutex_enter(&drmach_bus_sync_lock);
4869 
4870 	rv = drmach_node_walk(bp->tree, &data, drmach_board_find_devices_cb);
4871 
4872 	drmach_slice_table_update(bp, 0);
4873 	drmach_bus_sync_list_update();
4874 
4875 	mutex_exit(&drmach_bus_sync_lock);
4876 	mutex_exit(&drmach_slice_table_lock);
4877 
4878 	if (rv == 0) {
4879 		err = NULL;
4880 		drmach_slot1_lpa_set(bp);
4881 	} else {
4882 		drmach_array_dispose(bp->devices, drmach_device_dispose);
4883 		bp->devices = NULL;
4884 
4885 		if (data.err)
4886 			err = data.err;
4887 		else
4888 			err = DRMACH_INTERNAL_ERROR();
4889 	}
4890 
4891 	return (err);
4892 }
4893 
4894 int
4895 drmach_board_lookup(int bnum, drmachid_t *id)
4896 {
4897 	int	rv = 0;
4898 
4899 	if (!drmach_initialized && drmach_init() == -1) {
4900 		*id = 0;
4901 		return (-1);
4902 	}
4903 	rw_enter(&drmach_boards_rwlock, RW_WRITER);
4904 	if (drmach_array_get(drmach_boards, bnum, id)) {
4905 		*id = 0;
4906 		rv = -1;
4907 	} else {
4908 		caddr_t		obufp;
4909 		dr_showboard_t	shb;
4910 		sbd_error_t	*err = NULL;
4911 		drmach_board_t	*bp;
4912 
4913 		bp = *id;
4914 
4915 		if (bp)
4916 			rw_downgrade(&drmach_boards_rwlock);
4917 
4918 		obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
4919 		err = drmach_mbox_trans(DRMSG_SHOWBOARD, bnum, obufp,
4920 			sizeof (dr_proto_hdr_t), (caddr_t)&shb,
4921 			sizeof (dr_showboard_t));
4922 		kmem_free(obufp, sizeof (dr_proto_hdr_t));
4923 
4924 		if (err) {
4925 			if (err->e_code == ESTC_UNAVAILABLE) {
4926 				*id = 0;
4927 				rv = -1;
4928 			}
4929 			sbd_err_clear(&err);
4930 		} else {
4931 			if (!bp)
4932 				bp = *id  = (drmachid_t)drmach_board_new(bnum);
4933 			bp->connected = (shb.bd_assigned && shb.bd_active);
4934 			bp->empty = shb.slot_empty;
4935 
4936 			switch (shb.test_status) {
4937 				case DR_TEST_STATUS_UNKNOWN:
4938 				case DR_TEST_STATUS_IPOST:
4939 				case DR_TEST_STATUS_ABORTED:
4940 					bp->cond = SBD_COND_UNKNOWN;
4941 					break;
4942 				case DR_TEST_STATUS_PASSED:
4943 					bp->cond = SBD_COND_OK;
4944 					break;
4945 				case DR_TEST_STATUS_FAILED:
4946 					bp->cond = SBD_COND_FAILED;
4947 					break;
4948 				default:
4949 					bp->cond = SBD_COND_UNKNOWN;
4950 				DRMACH_PR("Unknown test status=0x%x from SC\n",
4951 						shb.test_status);
4952 					break;
4953 			}
4954 			strncpy(bp->type, shb.board_type, sizeof (bp->type));
4955 			bp->assigned = shb.bd_assigned;
4956 			bp->powered = shb.power_on;
4957 		}
4958 	}
4959 	rw_exit(&drmach_boards_rwlock);
4960 	return (rv);
4961 }
4962 
4963 sbd_error_t *
4964 drmach_board_name(int bnum, char *buf, int buflen)
4965 {
4966 	snprintf(buf, buflen, "%s%d", DRMACH_BNUM2SLOT(bnum) ?
4967 	    "IO" : "SB", DRMACH_BNUM2EXP(bnum));
4968 
4969 	return (NULL);
4970 }
4971 
4972 sbd_error_t *
4973 drmach_board_poweroff(drmachid_t id)
4974 {
4975 	drmach_board_t	*bp;
4976 	sbd_error_t	*err;
4977 	drmach_status_t	 stat;
4978 
4979 	if (!DRMACH_IS_BOARD_ID(id))
4980 		return (drerr_new(0, ESTC_INAPPROP, NULL));
4981 	bp = id;
4982 
4983 	err = drmach_board_status(id, &stat);
4984 	if (!err) {
4985 		if (stat.configured || stat.busy)
4986 			err = drerr_new(0, ESTC_CONFIGBUSY, bp->cm.name);
4987 		else {
4988 			caddr_t	obufp;
4989 
4990 			obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
4991 			err = drmach_mbox_trans(DRMSG_POWEROFF, bp->bnum, obufp,
4992 				sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
4993 			kmem_free(obufp, sizeof (dr_proto_hdr_t));
4994 			if (!err)
4995 				bp->powered = 0;
4996 		}
4997 	}
4998 	return (err);
4999 }
5000 
5001 sbd_error_t *
5002 drmach_board_poweron(drmachid_t id)
5003 {
5004 	drmach_board_t	*bp;
5005 	caddr_t		obufp;
5006 	sbd_error_t	*err;
5007 
5008 	if (!DRMACH_IS_BOARD_ID(id))
5009 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5010 	bp = id;
5011 
5012 	obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
5013 	err = drmach_mbox_trans(DRMSG_POWERON, bp->bnum, obufp,
5014 		sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
5015 	if (!err)
5016 		bp->powered = 1;
5017 
5018 	kmem_free(obufp, sizeof (dr_proto_hdr_t));
5019 
5020 	return (err);
5021 }
5022 
5023 static sbd_error_t *
5024 drmach_board_release(drmachid_t id)
5025 {
5026 	if (!DRMACH_IS_BOARD_ID(id))
5027 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5028 	return (NULL);
5029 }
5030 
5031 sbd_error_t *
5032 drmach_board_test(drmachid_t id, drmach_opts_t *opts, int force)
5033 {
5034 	drmach_board_t		*bp;
5035 	drmach_device_t		*dp[SBD_MAX_CORES_PER_CMP];
5036 	dr_mbox_msg_t		*obufp;
5037 	sbd_error_t		*err;
5038 	dr_testboard_reply_t	tbr;
5039 	int			cpylen;
5040 	char			*copts;
5041 	int			is_io;
5042 	cpu_flag_t		oflags[SBD_MAX_CORES_PER_CMP];
5043 
5044 	if (!DRMACH_IS_BOARD_ID(id))
5045 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5046 	bp = id;
5047 
5048 	/*
5049 	 * If the board is an I/O or MAXCAT board, setup I/O cage for
5050 	 * testing. Slot 1 indicates I/O or MAXCAT board.
5051 	 */
5052 
5053 	is_io = DRMACH_BNUM2SLOT(bp->bnum);
5054 
5055 	obufp = kmem_zalloc(sizeof (dr_mbox_msg_t), KM_SLEEP);
5056 
5057 	if (force)
5058 		obufp->msgdata.dm_tb.force = 1;
5059 
5060 	obufp->msgdata.dm_tb.immediate = 1;
5061 
5062 	if ((opts->size > 0) && ((copts = opts->copts) != NULL)) {
5063 		cpylen = (opts->size > DR_HPOPTLEN ? DR_HPOPTLEN : opts->size);
5064 		bcopy(copts, obufp->msgdata.dm_tb.hpost_opts, cpylen);
5065 	}
5066 
5067 	if (is_io) {
5068 		err = drmach_iocage_setup(&obufp->msgdata.dm_tb, dp, oflags);
5069 
5070 		if (err) {
5071 			kmem_free(obufp, sizeof (dr_mbox_msg_t));
5072 			return (err);
5073 		}
5074 	}
5075 
5076 	err = drmach_mbox_trans(DRMSG_TESTBOARD, bp->bnum, (caddr_t)obufp,
5077 		sizeof (dr_mbox_msg_t), (caddr_t)&tbr, sizeof (tbr));
5078 
5079 	if (!err)
5080 		bp->cond = SBD_COND_OK;
5081 	else
5082 		bp->cond = SBD_COND_UNKNOWN;
5083 
5084 	if ((!err) && (tbr.test_status != DR_TEST_STATUS_PASSED)) {
5085 		/* examine test status */
5086 		switch (tbr.test_status) {
5087 			case DR_TEST_STATUS_IPOST:
5088 				bp->cond = SBD_COND_UNKNOWN;
5089 				err = drerr_new(0, ESTC_TEST_IN_PROGRESS,
5090 					NULL);
5091 				break;
5092 			case DR_TEST_STATUS_UNKNOWN:
5093 				bp->cond = SBD_COND_UNKNOWN;
5094 				err = drerr_new(1,
5095 					ESTC_TEST_STATUS_UNKNOWN, NULL);
5096 				break;
5097 			case DR_TEST_STATUS_FAILED:
5098 				bp->cond = SBD_COND_FAILED;
5099 				err = drerr_new(1, ESTC_TEST_FAILED,
5100 					NULL);
5101 				break;
5102 			case DR_TEST_STATUS_ABORTED:
5103 				bp->cond = SBD_COND_UNKNOWN;
5104 				err = drerr_new(1, ESTC_TEST_ABORTED,
5105 					NULL);
5106 				break;
5107 			default:
5108 				bp->cond = SBD_COND_UNKNOWN;
5109 				err = drerr_new(1,
5110 					ESTC_TEST_RESULT_UNKNOWN,
5111 					NULL);
5112 				break;
5113 		}
5114 	}
5115 
5116 	/*
5117 	 * If I/O cage test was performed, check for availability of the
5118 	 * cpu used.  If cpu has been returned, it's OK to proceed with
5119 	 * reconfiguring it for use.
5120 	 */
5121 	if (is_io) {
5122 		DRMACH_PR("drmach_board_test: tbr.cpu_recovered: %d",
5123 			tbr.cpu_recovered);
5124 		DRMACH_PR("drmach_board_test: port id: %d",
5125 			tbr.cpu_portid);
5126 
5127 		/*
5128 		 * Check the cpu_recovered flag in the testboard reply, or
5129 		 * if the testboard request message was not sent to SMS due
5130 		 * to an mboxsc_putmsg() failure, it's OK to recover the
5131 		 * cpu since hpost hasn't touched it.
5132 		 */
5133 		if ((tbr.cpu_recovered && tbr.cpu_portid ==
5134 		    obufp->msgdata.dm_tb.cpu_portid) ||
5135 		    ((err) && (err->e_code == ESTC_MBXRQST))) {
5136 
5137 			int i;
5138 
5139 			mutex_enter(&cpu_lock);
5140 			for (i = 0; i < SBD_MAX_CORES_PER_CMP; i++) {
5141 				if (dp[i] != NULL) {
5142 					(void) drmach_iocage_cpu_return(dp[i],
5143 					    oflags[i]);
5144 				}
5145 			}
5146 			mutex_exit(&cpu_lock);
5147 		} else {
5148 			cmn_err(CE_WARN, "Unable to recover port id %d "
5149 			    "after I/O cage test: cpu_recovered=%d, "
5150 			    "returned portid=%d",
5151 			    obufp->msgdata.dm_tb.cpu_portid,
5152 			    tbr.cpu_recovered, tbr.cpu_portid);
5153 		}
5154 		drmach_iocage_mem_return(&tbr);
5155 	}
5156 	kmem_free(obufp, sizeof (dr_mbox_msg_t));
5157 
5158 	return (err);
5159 }
5160 
5161 sbd_error_t *
5162 drmach_board_unassign(drmachid_t id)
5163 {
5164 	drmach_board_t	*bp;
5165 	sbd_error_t	*err;
5166 	drmach_status_t	 stat;
5167 	caddr_t		obufp;
5168 
5169 	rw_enter(&drmach_boards_rwlock, RW_WRITER);
5170 
5171 	if (!DRMACH_IS_BOARD_ID(id)) {
5172 		rw_exit(&drmach_boards_rwlock);
5173 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5174 	}
5175 	bp = id;
5176 
5177 	err = drmach_board_status(id, &stat);
5178 	if (err) {
5179 		rw_exit(&drmach_boards_rwlock);
5180 		return (err);
5181 	}
5182 
5183 	if (stat.configured || stat.busy) {
5184 		err = drerr_new(0, ESTC_CONFIGBUSY, bp->cm.name);
5185 	} else {
5186 
5187 		obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
5188 		err = drmach_mbox_trans(DRMSG_UNASSIGN, bp->bnum, obufp,
5189 			sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
5190 		kmem_free(obufp, sizeof (dr_proto_hdr_t));
5191 		if (!err) {
5192 			if (drmach_array_set(drmach_boards, bp->bnum, 0) != 0)
5193 				err = DRMACH_INTERNAL_ERROR();
5194 			else
5195 				drmach_board_dispose(bp);
5196 		}
5197 	}
5198 	rw_exit(&drmach_boards_rwlock);
5199 	return (err);
5200 }
5201 
5202 static sbd_error_t *
5203 drmach_read_reg_addr(drmach_device_t *dp, uint64_t *p)
5204 {
5205 	int		len;
5206 	drmach_reg_t	reg;
5207 	drmach_node_t	pp;
5208 	drmach_node_t	*np = dp->node;
5209 
5210 	/*
5211 	 * If the node does not have a portid property,
5212 	 * it represents a CMP device. For a CMP, the reg
5213 	 * property of the parent holds the information of
5214 	 * interest.
5215 	 */
5216 	if (dp->node->n_getproplen(dp->node, "portid", &len) != 0) {
5217 
5218 		if (dp->node->get_parent(dp->node, &pp) != 0) {
5219 			return (DRMACH_INTERNAL_ERROR());
5220 		}
5221 		np = &pp;
5222 	}
5223 
5224 	if (np->n_getproplen(np, "reg", &len) != 0)
5225 		return (DRMACH_INTERNAL_ERROR());
5226 
5227 	if (len != sizeof (reg))
5228 		return (DRMACH_INTERNAL_ERROR());
5229 
5230 	if (np->n_getprop(np, "reg", &reg, sizeof (reg)) != 0)
5231 		return (DRMACH_INTERNAL_ERROR());
5232 
5233 	/* reassemble 64-bit base address */
5234 	*p = ((uint64_t)reg.reg_addr_hi << 32) | reg.reg_addr_lo;
5235 
5236 	return (NULL);
5237 }
5238 
5239 static void
5240 drmach_cpu_read(uint64_t arg1, uint64_t arg2)
5241 {
5242 	uint64_t	*saf_config_reg = (uint64_t *)arg1;
5243 	uint_t		*reg_read = (uint_t *)arg2;
5244 
5245 	*saf_config_reg = lddsafconfig();
5246 	*reg_read = 0x1;
5247 }
5248 
5249 /*
5250  * A return value of 1 indicates success and 0 indicates a failure
5251  */
5252 static int
5253 drmach_cpu_read_scr(drmach_cpu_t *cp, uint64_t *scr)
5254 {
5255 
5256 	int 	rv = 0x0;
5257 
5258 	*scr = 0x0;
5259 
5260 	/*
5261 	 * Confirm cpu was in ready set when xc was issued.
5262 	 * This is done by verifying rv which is
5263 	 * set to 0x1 when xc_one is successful.
5264 	 */
5265 	xc_one(cp->dev.portid, (xcfunc_t *)drmach_cpu_read,
5266 	    (uint64_t)scr, (uint64_t)&rv);
5267 
5268 	return (rv);
5269 
5270 }
5271 
5272 static sbd_error_t *
5273 drmach_cpu_read_cpuid(drmach_cpu_t *cp, processorid_t *cpuid)
5274 {
5275 	drmach_node_t	*np;
5276 
5277 	np = cp->dev.node;
5278 
5279 	/*
5280 	 * If a CPU does not have a portid property, it must
5281 	 * be a CMP device with a cpuid property.
5282 	 */
5283 	if (np->n_getprop(np, "portid", cpuid, sizeof (*cpuid)) != 0) {
5284 
5285 		if (np->n_getprop(np, "cpuid", cpuid, sizeof (*cpuid)) != 0) {
5286 			return (DRMACH_INTERNAL_ERROR());
5287 		}
5288 	}
5289 
5290 	return (NULL);
5291 }
5292 
5293 /* Starcat CMP core id is bit 2 of the cpuid */
5294 #define	DRMACH_COREID_MASK	(1u << 2)
5295 #define	DRMACH_CPUID2SRAM_IDX(id) \
5296 		((id & DRMACH_COREID_MASK) >> 1 | (id & 0x1))
5297 
5298 static sbd_error_t *
5299 drmach_cpu_new(drmach_device_t *proto, drmachid_t *idp)
5300 {
5301 	static void drmach_cpu_dispose(drmachid_t);
5302 	static sbd_error_t *drmach_cpu_release(drmachid_t);
5303 	static sbd_error_t *drmach_cpu_status(drmachid_t, drmach_status_t *);
5304 
5305 	sbd_error_t	*err;
5306 	uint64_t	scr_pa;
5307 	drmach_cpu_t	*cp = NULL;
5308 	pfn_t		pfn;
5309 	uint64_t	cpu_stardrb_offset, cpu_sram_pa;
5310 	int		idx;
5311 	int		impl;
5312 	processorid_t	cpuid;
5313 
5314 	err = drmach_read_reg_addr(proto, &scr_pa);
5315 	if (err) {
5316 		goto fail;
5317 	}
5318 
5319 	cp = kmem_zalloc(sizeof (drmach_cpu_t), KM_SLEEP);
5320 	bcopy(proto, &cp->dev, sizeof (cp->dev));
5321 	cp->dev.node = drmach_node_dup(proto->node);
5322 	cp->dev.cm.isa = (void *)drmach_cpu_new;
5323 	cp->dev.cm.dispose = drmach_cpu_dispose;
5324 	cp->dev.cm.release = drmach_cpu_release;
5325 	cp->dev.cm.status = drmach_cpu_status;
5326 	cp->scr_pa = scr_pa;
5327 
5328 	err = drmach_cpu_read_cpuid(cp, &cpuid);
5329 	if (err) {
5330 		goto fail;
5331 	}
5332 
5333 	err = drmach_cpu_get_impl(cp, &impl);
5334 	if (err) {
5335 		goto fail;
5336 	}
5337 
5338 	cp->cpuid = cpuid;
5339 	cp->coreid = STARCAT_CPUID_TO_COREID(cp->cpuid);
5340 	cp->dev.unum = STARCAT_CPUID_TO_AGENT(cp->cpuid);
5341 
5342 	/*
5343 	 * Init the board cpu type.  Assumes all board cpus are the same type.
5344 	 */
5345 	if (cp->dev.bp->cpu_impl == 0) {
5346 		cp->dev.bp->cpu_impl = impl;
5347 	}
5348 	ASSERT(cp->dev.bp->cpu_impl == impl);
5349 
5350 	/*
5351 	 * XXX CHEETAH SUPPORT
5352 	 * determine if the domain uses Cheetah procs
5353 	 */
5354 	if (drmach_is_cheetah < 0) {
5355 		drmach_is_cheetah = IS_CHEETAH(impl);
5356 	}
5357 
5358 	/*
5359 	 * Initialize TTE for mapping CPU SRAM STARDRB buffer.
5360 	 * The STARDRB buffer (16KB on Cheetah+ boards, 32KB on
5361 	 * Jaguar/Panther boards) is shared by all cpus in a Safari port
5362 	 * pair. Each cpu uses 8KB according to the following layout:
5363 	 *
5364 	 * Page 0:	even numbered Cheetah+'s and Panther/Jaguar core 0's
5365 	 * Page 1:	odd numbered Cheetah+'s and Panther/Jaguar core 0's
5366 	 * Page 2:	even numbered Panther/Jaguar core 1's
5367 	 * Page 3:	odd numbered Panther/Jaguar core 1's
5368 	 */
5369 	idx = DRMACH_CPUID2SRAM_IDX(cp->cpuid);
5370 	cpu_stardrb_offset = cp->dev.bp->stardrb_offset + (PAGESIZE * idx);
5371 	cpu_sram_pa = DRMACH_CPU_SRAM_ADDR + cpu_stardrb_offset;
5372 	pfn = cpu_sram_pa >> PAGESHIFT;
5373 
5374 	ASSERT(drmach_cpu_sram_tte[cp->cpuid].tte_inthi == 0 &&
5375 	    drmach_cpu_sram_tte[cp->cpuid].tte_intlo == 0);
5376 	drmach_cpu_sram_tte[cp->cpuid].tte_inthi = TTE_PFN_INTHI(pfn) |
5377 		TTE_VALID_INT | TTE_SZ_INT(TTE8K);
5378 	drmach_cpu_sram_tte[cp->cpuid].tte_intlo = TTE_PFN_INTLO(pfn) |
5379 		TTE_HWWR_INT | TTE_PRIV_INT | TTE_LCK_INT;
5380 
5381 	DRMACH_PR("drmach_cpu_new: cpuid=%d, coreid=%d, stardrb_offset=0x%lx, "
5382 	    "cpu_sram_offset=0x%lx, idx=%d\n", cp->cpuid, cp->coreid,
5383 	    cp->dev.bp->stardrb_offset, cpu_stardrb_offset, idx);
5384 
5385 	snprintf(cp->dev.cm.name, sizeof (cp->dev.cm.name), "%s%d",
5386 	    cp->dev.type, cp->dev.unum);
5387 
5388 	*idp = (drmachid_t)cp;
5389 	return (NULL);
5390 
5391 fail:
5392 	if (cp) {
5393 		drmach_node_dispose(cp->dev.node);
5394 		kmem_free(cp, sizeof (*cp));
5395 	}
5396 
5397 	*idp = (drmachid_t)0;
5398 	return (err);
5399 }
5400 
5401 static void
5402 drmach_cpu_dispose(drmachid_t id)
5403 {
5404 	drmach_cpu_t	*self;
5405 	processorid_t	cpuid;
5406 
5407 	ASSERT(DRMACH_IS_CPU_ID(id));
5408 
5409 	self = id;
5410 	if (self->dev.node)
5411 		drmach_node_dispose(self->dev.node);
5412 
5413 	cpuid = self->cpuid;
5414 	ASSERT(TTE_IS_VALID(&drmach_cpu_sram_tte[cpuid]) &&
5415 	    TTE_IS_8K(&drmach_cpu_sram_tte[cpuid]) &&
5416 	    TTE_IS_PRIVILEGED(&drmach_cpu_sram_tte[cpuid]) &&
5417 	    TTE_IS_LOCKED(&drmach_cpu_sram_tte[cpuid]));
5418 	drmach_cpu_sram_tte[cpuid].tte_inthi = 0;
5419 	drmach_cpu_sram_tte[cpuid].tte_intlo = 0;
5420 
5421 	kmem_free(self, sizeof (*self));
5422 }
5423 
5424 static int
5425 drmach_cpu_start(struct cpu *cp)
5426 {
5427 	extern xcfunc_t	drmach_set_lpa;
5428 	extern void	restart_other_cpu(int);
5429 	int		cpuid = cp->cpu_id;
5430 	int		rv, bnum;
5431 	drmach_board_t	*bp;
5432 
5433 	ASSERT(MUTEX_HELD(&cpu_lock));
5434 	ASSERT(cpunodes[cpuid].nodeid != (pnode_t)0);
5435 
5436 	cp->cpu_flags &= ~CPU_POWEROFF;
5437 
5438 	/*
5439 	 * NOTE: restart_other_cpu pauses cpus during the
5440 	 *	 slave cpu start.  This helps to quiesce the
5441 	 *	 bus traffic a bit which makes the tick sync
5442 	 *	 routine in the prom more robust.
5443 	 */
5444 	DRMACH_PR("COLD START for cpu (%d)\n", cpuid);
5445 
5446 	if (prom_hotaddcpu(cpuid) != 0) {
5447 		cmn_err(CE_PANIC, "prom_hotaddcpu() for cpuid=%d failed.",
5448 			cpuid);
5449 	}
5450 
5451 	restart_other_cpu(cpuid);
5452 
5453 	bnum = drmach_portid2bnum(cpunodes[cpuid].portid);
5454 	rv = drmach_array_get(drmach_boards, bnum, (drmachid_t)&bp);
5455 	if (rv == -1 || bp == NULL) {
5456 		DRMACH_PR("drmach_cpu_start: cannot read board info for "
5457 		    "cpuid=%d: rv=%d, bp=%p\n", cpuid, rv, bp);
5458 	} else if (DRMACH_L1_SET_LPA(bp) && drmach_reprogram_lpa) {
5459 		int exp;
5460 		int ntries;
5461 
5462 		mutex_enter(&drmach_xt_mb_lock);
5463 		mutex_enter(&drmach_slice_table_lock);
5464 		bzero((void *)drmach_xt_mb, drmach_xt_mb_size);
5465 
5466 		/*
5467 		 * drmach_slice_table[*]
5468 		 *	bit 5	valid
5469 		 *	bit 0:4	slice number
5470 		 *
5471 		 * drmach_xt_mb[*] format for drmach_set_lpa
5472 		 *	bit 7	valid
5473 		 *	bit 6	set null LPA (overrides bits 0:4)
5474 		 *	bit 0:4	slice number
5475 		 *
5476 		 * drmach_set_lpa derives processor CBASE and CBND
5477 		 * from bits 6 and 0:4 of drmach_xt_mb.  If bit 6 is
5478 		 * set, then CBASE = CBND = 0. Otherwise, CBASE = slice
5479 		 * number; CBND = slice number + 1.
5480 		 * No action is taken if bit 7 is zero.
5481 		 */
5482 		exp = (cpuid >> 5) & 0x1f;
5483 		if (drmach_slice_table[exp] & 0x20) {
5484 			drmach_xt_mb[cpuid] = 0x80 |
5485 				(drmach_slice_table[exp] & 0x1f);
5486 		} else {
5487 			drmach_xt_mb[cpuid] = 0x80 | 0x40;
5488 		}
5489 
5490 		drmach_xt_ready = 0;
5491 
5492 		xt_one(cpuid, drmach_set_lpa, NULL, NULL);
5493 
5494 		ntries = drmach_cpu_ntries;
5495 		while (!drmach_xt_ready && ntries) {
5496 			DELAY(drmach_cpu_delay);
5497 			ntries--;
5498 		}
5499 
5500 		mutex_exit(&drmach_slice_table_lock);
5501 		mutex_exit(&drmach_xt_mb_lock);
5502 
5503 		DRMACH_PR(
5504 			"waited %d out of %d tries for drmach_set_lpa on cpu%d",
5505 			drmach_cpu_ntries - ntries, drmach_cpu_ntries,
5506 			cp->cpu_id);
5507 	}
5508 
5509 	xt_one(cpuid, vtag_flushpage_tl1,
5510 		(uint64_t)drmach_cpu_sram_va, (uint64_t)KCONTEXT);
5511 
5512 	return (0);
5513 }
5514 
5515 /*
5516  * A detaching CPU is xcalled with an xtrap to drmach_cpu_stop_self() after
5517  * it has been offlined. The function of this routine is to get the cpu
5518  * spinning in a safe place. The requirement is that the system will not
5519  * reference anything on the detaching board (memory and i/o is detached
5520  * elsewhere) and that the CPU not reference anything on any other board
5521  * in the system.  This isolation is required during and after the writes
5522  * to the domain masks to remove the board from the domain.
5523  *
5524  * To accomplish this isolation the following is done:
5525  *	1) Create a locked mapping to the STARDRB data buffer located
5526  *	   in this cpu's sram. There is one TTE per cpu, initialized in
5527  *	   drmach_cpu_new(). The cpuid is used to select which TTE to use.
5528  *	   Each Safari port pair shares the CPU SRAM on a Serengeti CPU/MEM
5529  *	   board. The STARDRB buffer is 16KB on Cheetah+ boards, 32KB on Jaguar
5530  *	   boards. Each STARDRB buffer is logically divided by DR into one
5531  *	   8KB page per cpu (or Jaguar core).
5532  *	2) Copy the target function (drmach_shutdown_asm) into buffer.
5533  *	3) Jump to function now in the cpu sram.
5534  *	   Function will:
5535  *	   3.1) Flush its Ecache (displacement).
5536  *	   3.2) Flush its Dcache with HW mechanism.
5537  *	   3.3) Flush its Icache with HW mechanism.
5538  *	   3.4) Flush all valid and _unlocked_ D-TLB and I-TLB entries.
5539  *	   3.5) Set LPA to NULL
5540  *	   3.6) Clear xt_mb to signal completion. Note: cache line is
5541  *	        recovered by drmach_cpu_poweroff().
5542  *	4) Jump into an infinite loop.
5543  */
5544 
5545 static void
5546 drmach_cpu_stop_self(void)
5547 {
5548 	extern void	drmach_shutdown_asm(
5549 				uint64_t, uint64_t, int, int, uint64_t);
5550 	extern void	drmach_shutdown_asm_end(void);
5551 
5552 	tte_t		*tte;
5553 	uint_t		*p, *q;
5554 	uint64_t	 stack_pointer;
5555 
5556 	ASSERT(((ptrdiff_t)drmach_shutdown_asm_end -
5557 		(ptrdiff_t)drmach_shutdown_asm) < PAGESIZE);
5558 
5559 	tte = &drmach_cpu_sram_tte[CPU->cpu_id];
5560 	ASSERT(TTE_IS_VALID(tte) && TTE_IS_8K(tte) &&
5561 	    TTE_IS_PRIVILEGED(tte) && TTE_IS_LOCKED(tte));
5562 	sfmmu_dtlb_ld(drmach_cpu_sram_va, KCONTEXT, tte);
5563 	sfmmu_itlb_ld(drmach_cpu_sram_va, KCONTEXT, tte);
5564 
5565 	/* copy text. standard bcopy not designed to work in nc space */
5566 	p = (uint_t *)drmach_cpu_sram_va;
5567 	q = (uint_t *)drmach_shutdown_asm;
5568 	while (q < (uint_t *)drmach_shutdown_asm_end)
5569 		*p++ = *q++;
5570 
5571 	/* zero to assist debug */
5572 	q = (uint_t *)(drmach_cpu_sram_va + PAGESIZE);
5573 	while (p < q)
5574 		*p++ = 0;
5575 
5576 	/* a parking spot for the stack pointer */
5577 	stack_pointer = (uint64_t)q;
5578 
5579 	/* call copy of drmach_shutdown_asm */
5580 	(*(void (*)())drmach_cpu_sram_va)(
5581 		stack_pointer,
5582 		drmach_iocage_paddr,
5583 		cpunodes[CPU->cpu_id].ecache_size,
5584 		cpunodes[CPU->cpu_id].ecache_linesize,
5585 		va_to_pa((void *)&drmach_xt_mb[CPU->cpu_id]));
5586 }
5587 
5588 static void
5589 drmach_cpu_shutdown_self(void)
5590 {
5591 	cpu_t		*cp = CPU;
5592 	int		cpuid = cp->cpu_id;
5593 	extern void	flush_windows(void);
5594 
5595 	flush_windows();
5596 
5597 	(void) spl8();
5598 
5599 	ASSERT(cp->cpu_intr_actv == 0);
5600 	ASSERT(cp->cpu_thread == cp->cpu_idle_thread ||
5601 	    cp->cpu_thread == cp->cpu_startup_thread);
5602 
5603 	cp->cpu_flags = CPU_OFFLINE | CPU_QUIESCED | CPU_POWEROFF;
5604 
5605 	drmach_cpu_stop_self();
5606 
5607 	cmn_err(CE_PANIC, "CPU %d FAILED TO SHUTDOWN", cpuid);
5608 }
5609 
5610 static sbd_error_t *
5611 drmach_cpu_release(drmachid_t id)
5612 {
5613 	drmach_cpu_t	*cp;
5614 	struct cpu	*cpu;
5615 	sbd_error_t	*err;
5616 
5617 	if (!DRMACH_IS_CPU_ID(id))
5618 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5619 	cp = id;
5620 
5621 	ASSERT(MUTEX_HELD(&cpu_lock));
5622 
5623 	cpu = cpu_get(cp->cpuid);
5624 	if (cpu == NULL)
5625 		err = DRMACH_INTERNAL_ERROR();
5626 	else
5627 		err = NULL;
5628 
5629 	return (err);
5630 }
5631 
5632 static sbd_error_t *
5633 drmach_cpu_status(drmachid_t id, drmach_status_t *stat)
5634 {
5635 	drmach_cpu_t	*cp;
5636 	drmach_device_t	*dp;
5637 
5638 	ASSERT(DRMACH_IS_CPU_ID(id));
5639 	cp = id;
5640 	dp = &cp->dev;
5641 
5642 	stat->assigned = dp->bp->assigned;
5643 	stat->powered = dp->bp->powered;
5644 	mutex_enter(&cpu_lock);
5645 	stat->configured = (cpu_get(cp->cpuid) != NULL);
5646 	mutex_exit(&cpu_lock);
5647 	stat->busy = dp->busy;
5648 	strncpy(stat->type, dp->type, sizeof (stat->type));
5649 	stat->info[0] = '\0';
5650 
5651 	return (NULL);
5652 }
5653 
5654 sbd_error_t *
5655 drmach_cpu_disconnect(drmachid_t id)
5656 {
5657 
5658 	if (!DRMACH_IS_CPU_ID(id))
5659 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5660 
5661 	return (NULL);
5662 
5663 }
5664 
5665 sbd_error_t *
5666 drmach_cpu_get_id(drmachid_t id, processorid_t *cpuid)
5667 {
5668 	drmach_cpu_t	*cpu;
5669 
5670 	if (!DRMACH_IS_CPU_ID(id))
5671 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5672 	cpu = id;
5673 
5674 	*cpuid = cpu->cpuid;
5675 	return (NULL);
5676 }
5677 
5678 sbd_error_t *
5679 drmach_cpu_get_impl(drmachid_t id, int *ip)
5680 {
5681 	drmach_node_t	*np;
5682 	int		impl;
5683 
5684 	if (!DRMACH_IS_CPU_ID(id))
5685 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5686 
5687 	np = ((drmach_device_t *)id)->node;
5688 
5689 	if (np->n_getprop(np, "implementation#", &impl, sizeof (impl)) == -1) {
5690 		return (DRMACH_INTERNAL_ERROR());
5691 	}
5692 
5693 	*ip = impl;
5694 
5695 	return (NULL);
5696 }
5697 
5698 /*
5699  * Flush this cpu's ecache, then ensure all outstanding safari
5700  * transactions have retired.
5701  */
5702 void
5703 drmach_cpu_flush_ecache_sync(void)
5704 {
5705 	uint64_t *p;
5706 
5707 	ASSERT(curthread->t_bound_cpu == CPU);
5708 
5709 	cpu_flush_ecache();
5710 
5711 	mutex_enter(&drmach_bus_sync_lock);
5712 	for (p = drmach_bus_sync_list; *p; p++)
5713 		(void) ldphys(*p);
5714 	mutex_exit(&drmach_bus_sync_lock);
5715 
5716 	cpu_flush_ecache();
5717 }
5718 
5719 sbd_error_t *
5720 drmach_get_dip(drmachid_t id, dev_info_t **dip)
5721 {
5722 	drmach_device_t	*dp;
5723 
5724 	if (!DRMACH_IS_DEVICE_ID(id))
5725 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5726 	dp = id;
5727 
5728 	*dip = dp->node->n_getdip(dp->node);
5729 	return (NULL);
5730 }
5731 
5732 sbd_error_t *
5733 drmach_io_is_attached(drmachid_t id, int *yes)
5734 {
5735 	drmach_device_t *dp;
5736 	dev_info_t	*dip;
5737 	int state;
5738 
5739 	if (!DRMACH_IS_IO_ID(id))
5740 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5741 	dp = id;
5742 
5743 	dip = dp->node->n_getdip(dp->node);
5744 	if (dip == NULL) {
5745 		*yes = 0;
5746 		return (NULL);
5747 	}
5748 
5749 	state = ddi_get_devstate(dip);
5750 	*yes = i_ddi_devi_attached(dip) || (state == DDI_DEVSTATE_UP);
5751 
5752 	return (NULL);
5753 }
5754 
5755 static int
5756 drmach_dip_is_schizo_xmits_0_pci_b(dev_info_t *dip)
5757 {
5758 	char			dtype[OBP_MAXPROPNAME];
5759 	int			portid;
5760 	uint_t			pci_csr_base;
5761 	struct pci_phys_spec	*regbuf = NULL;
5762 	int			rv, len;
5763 
5764 	ASSERT(dip != NULL);
5765 	rv = ddi_getproplen(DDI_DEV_T_ANY, dip, 0, "device_type", &len);
5766 	if ((rv != DDI_PROP_SUCCESS) || (len > sizeof (dtype)))
5767 		return (0);
5768 
5769 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, 0, "device_type",
5770 		(caddr_t)dtype, &len) == DDI_PROP_SUCCESS) {
5771 
5772 		if (strncmp(dtype, "pci", 3) == 0) {
5773 
5774 			/*
5775 			 * Get safari portid. All schizo/xmits 0
5776 			 * safari IDs end in 0x1C.
5777 			 */
5778 			rv = ddi_getproplen(DDI_DEV_T_ANY, dip, 0,
5779 				"portid", &len);
5780 
5781 			if ((rv != DDI_PROP_SUCCESS) ||
5782 				(len > sizeof (portid)))
5783 					return (0);
5784 
5785 			rv = ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, 0,
5786 				"portid", (caddr_t)&portid, &len);
5787 
5788 			if (rv != DDI_PROP_SUCCESS)
5789 				return (0);
5790 
5791 			if ((portid & 0x1F) != 0x1C)
5792 				return (0);
5793 
5794 			if (ddi_getlongprop(DDI_DEV_T_ANY, dip,
5795 				DDI_PROP_DONTPASS, "reg", (caddr_t)&regbuf,
5796 						&len) == DDI_PROP_SUCCESS) {
5797 
5798 				pci_csr_base = regbuf[0].pci_phys_mid &
5799 							PCI_CONF_ADDR_MASK;
5800 				kmem_free(regbuf, len);
5801 				/*
5802 				 * All PCI B-Leafs are at configspace 0x70.0000.
5803 				 */
5804 				if (pci_csr_base == 0x700000)
5805 					return (1);
5806 			}
5807 		}
5808 	}
5809 	return (0);
5810 }
5811 
5812 #define	SCHIZO_BINDING_NAME		"pci108e,8001"
5813 #define	XMITS_BINDING_NAME		"pci108e,8002"
5814 
5815 /*
5816  * Verify if the dip is an instance of MAN 'eri'.
5817  */
5818 static int
5819 drmach_dip_is_man_eri(dev_info_t *dip)
5820 {
5821 	struct pci_phys_spec	*regbuf = NULL;
5822 	dev_info_t		*parent_dip;
5823 	char			*name;
5824 	uint_t			pci_device;
5825 	uint_t			pci_function;
5826 	int			len;
5827 
5828 	if (dip == NULL)
5829 		return (0);
5830 	/*
5831 	 * Verify if the parent is schizo(xmits)0 and pci B leaf.
5832 	 */
5833 	if (((parent_dip = ddi_get_parent(dip)) == NULL) ||
5834 		((name = ddi_binding_name(parent_dip)) == NULL))
5835 		return (0);
5836 	if (strcmp(name, SCHIZO_BINDING_NAME) != 0) {
5837 		/*
5838 		 * This RIO could be on XMITS, so get the dip to
5839 		 * XMITS PCI Leaf.
5840 		 */
5841 		if ((parent_dip = ddi_get_parent(parent_dip)) == NULL)
5842 			return (0);
5843 		if (((name = ddi_binding_name(parent_dip)) == NULL) ||
5844 			(strcmp(name, XMITS_BINDING_NAME) != 0)) {
5845 			return (0);
5846 		}
5847 	}
5848 	if (!drmach_dip_is_schizo_xmits_0_pci_b(parent_dip))
5849 		return (0);
5850 	/*
5851 	 * Finally make sure it is the MAN eri.
5852 	 */
5853 	if (ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
5854 			"reg", (caddr_t)&regbuf, &len) == DDI_PROP_SUCCESS) {
5855 
5856 		pci_device = PCI_REG_DEV_G(regbuf->pci_phys_hi);
5857 		pci_function = PCI_REG_FUNC_G(regbuf->pci_phys_hi);
5858 		kmem_free(regbuf, len);
5859 
5860 		/*
5861 		 * The network function of the RIO ASIC will always be
5862 		 * device 3 and function 1 ("network@3,1").
5863 		 */
5864 		if ((pci_device == 3) && (pci_function == 1))
5865 			return (1);
5866 	}
5867 	return (0);
5868 }
5869 
5870 typedef struct {
5871 	int		iosram_inst;
5872 	dev_info_t	*eri_dip;
5873 	int		bnum;
5874 } drmach_io_inst_t;
5875 
5876 int
5877 drmach_board_find_io_insts(dev_info_t *dip, void *args)
5878 {
5879 	drmach_io_inst_t	*ios = (drmach_io_inst_t *)args;
5880 
5881 	int	rv;
5882 	int	len;
5883 	int	portid;
5884 	char	name[OBP_MAXDRVNAME];
5885 
5886 	rv = ddi_getproplen(DDI_DEV_T_ANY, dip, 0, "portid", &len);
5887 
5888 	if ((rv != DDI_PROP_SUCCESS) || (len > sizeof (portid))) {
5889 		return (DDI_WALK_CONTINUE);
5890 	}
5891 
5892 	rv = ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, 0,
5893 			"portid", (caddr_t)&portid, &len);
5894 	if (rv != DDI_PROP_SUCCESS)
5895 		return (DDI_WALK_CONTINUE);
5896 
5897 	/* ignore devices that are not on this board */
5898 	if (drmach_portid2bnum(portid) != ios->bnum)
5899 		return (DDI_WALK_CONTINUE);
5900 
5901 	if ((ios->iosram_inst < 0) || (ios->eri_dip == NULL)) {
5902 		rv = ddi_getproplen(DDI_DEV_T_ANY, dip, 0,
5903 			"name", &len);
5904 		if (rv == DDI_PROP_SUCCESS) {
5905 
5906 			rv = ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
5907 				0, "name",
5908 				(caddr_t)name, &len);
5909 			if (rv != DDI_PROP_SUCCESS)
5910 				return (DDI_WALK_CONTINUE);
5911 
5912 			if (strncmp("iosram", name, 6) == 0) {
5913 				ios->iosram_inst = ddi_get_instance(dip);
5914 				if (ios->eri_dip == NULL)
5915 					return (DDI_WALK_CONTINUE);
5916 				else
5917 					return (DDI_WALK_TERMINATE);
5918 			} else {
5919 				if (drmach_dip_is_man_eri(dip)) {
5920 					ASSERT(ios->eri_dip == NULL);
5921 					ndi_hold_devi(dip);
5922 					ios->eri_dip = dip;
5923 					if (ios->iosram_inst < 0)
5924 						return (DDI_WALK_CONTINUE);
5925 					else
5926 						return (DDI_WALK_TERMINATE);
5927 				}
5928 			}
5929 		}
5930 	}
5931 	return (DDI_WALK_CONTINUE);
5932 }
5933 
5934 sbd_error_t *
5935 drmach_io_pre_release(drmachid_t id)
5936 {
5937 	drmach_io_inst_t	ios;
5938 	drmach_board_t		*bp;
5939 	int			rv = 0;
5940 	sbd_error_t		*err = NULL;
5941 	drmach_device_t		*dp;
5942 	dev_info_t		*rdip;
5943 	int			circ;
5944 
5945 	if (!DRMACH_IS_IO_ID(id))
5946 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5947 	dp = id;
5948 	bp = dp->bp;
5949 
5950 	rdip = dp->node->n_getdip(dp->node);
5951 
5952 	/* walk device tree to find iosram instance for the board */
5953 	ios.iosram_inst = -1;
5954 	ios.eri_dip = NULL;
5955 	ios.bnum = bp->bnum;
5956 
5957 	ndi_devi_enter(rdip, &circ);
5958 	ddi_walk_devs(ddi_get_child(rdip), drmach_board_find_io_insts,
5959 				(void *)&ios);
5960 
5961 	DRMACH_PR("drmach_io_pre_release: bnum=%d iosram=%d eri=0x%p\n",
5962 			ios.bnum, ios.iosram_inst, ios.eri_dip);
5963 	ndi_devi_exit(rdip, circ);
5964 
5965 	if (ios.eri_dip) {
5966 		/*
5967 		 * Release hold acquired in drmach_board_find_io_insts()
5968 		 */
5969 		ndi_rele_devi(ios.eri_dip);
5970 	}
5971 	if (ios.iosram_inst >= 0) {
5972 		/* call for tunnel switch */
5973 		do {
5974 			DRMACH_PR("calling iosram_switchfrom(%d)\n",
5975 				ios.iosram_inst);
5976 			rv = iosram_switchfrom(ios.iosram_inst);
5977 			if (rv)
5978 				DRMACH_PR("iosram_switchfrom returned %d\n",
5979 					rv);
5980 		} while (rv == EAGAIN);
5981 
5982 		if (rv)
5983 			err = drerr_new(0, ESTC_IOSWITCH, NULL);
5984 	}
5985 	return (err);
5986 }
5987 
5988 sbd_error_t *
5989 drmach_io_unrelease(drmachid_t id)
5990 {
5991 	dev_info_t	*dip;
5992 	sbd_error_t	*err = NULL;
5993 	drmach_device_t	*dp;
5994 
5995 	if (!DRMACH_IS_IO_ID(id))
5996 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5997 	dp = id;
5998 
5999 	dip = dp->node->n_getdip(dp->node);
6000 
6001 	if (dip == NULL)
6002 		err = DRMACH_INTERNAL_ERROR();
6003 	else {
6004 		int (*func)(dev_info_t *dip);
6005 
6006 		func = (int (*)(dev_info_t *))kobj_getsymvalue("man_dr_attach",
6007 			0);
6008 
6009 		if (func) {
6010 			drmach_io_inst_t ios;
6011 			dev_info_t	*pdip;
6012 			int		circ;
6013 
6014 			/*
6015 			 * Walk device tree to find rio dip for the board
6016 			 * Since we are not interested in iosram instance here,
6017 			 * initialize it to 0, so that the walk terminates as
6018 			 * soon as eri dip is found.
6019 			 */
6020 			ios.iosram_inst = 0;
6021 			ios.eri_dip = NULL;
6022 			ios.bnum = dp->bp->bnum;
6023 
6024 			if (pdip = ddi_get_parent(dip)) {
6025 				ndi_hold_devi(pdip);
6026 				ndi_devi_enter(pdip, &circ);
6027 			}
6028 			/*
6029 			 * Root node doesn't have to be held in any way.
6030 			 */
6031 			ddi_walk_devs(dip,
6032 				drmach_board_find_io_insts, (void *)&ios);
6033 
6034 			if (pdip) {
6035 				ndi_devi_exit(pdip, circ);
6036 				ndi_rele_devi(pdip);
6037 			}
6038 
6039 			DRMACH_PR("drmach_io_unrelease: bnum=%d eri=0x%p\n",
6040 				ios.bnum, ios.eri_dip);
6041 
6042 			if (ios.eri_dip) {
6043 				DRMACH_PR("calling man_dr_attach\n");
6044 				if ((*func)(ios.eri_dip))
6045 					err = drerr_new(0,
6046 						ESTC_NWSWITCH, NULL);
6047 				/*
6048 				 * Release hold acquired in
6049 				 * drmach_board_find_io_insts()
6050 				 */
6051 				ndi_rele_devi(ios.eri_dip);
6052 			}
6053 		} else
6054 			DRMACH_PR("man_dr_attach NOT present\n");
6055 	}
6056 	return (err);
6057 }
6058 
6059 static sbd_error_t *
6060 drmach_io_release(drmachid_t id)
6061 {
6062 	dev_info_t	*dip;
6063 	sbd_error_t	*err = NULL;
6064 	drmach_device_t	*dp;
6065 
6066 	if (!DRMACH_IS_IO_ID(id))
6067 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6068 	dp = id;
6069 
6070 	dip = dp->node->n_getdip(dp->node);
6071 
6072 	if (dip == NULL)
6073 		err = DRMACH_INTERNAL_ERROR();
6074 	else {
6075 		int (*func)(dev_info_t *dip);
6076 
6077 		func = (int (*)(dev_info_t *))kobj_getsymvalue("man_dr_detach",
6078 			0);
6079 
6080 		if (func) {
6081 			drmach_io_inst_t ios;
6082 			dev_info_t	*pdip;
6083 			int		circ;
6084 
6085 			/*
6086 			 * Walk device tree to find rio dip for the board
6087 			 * Since we are not interested in iosram instance here,
6088 			 * initialize it to 0, so that the walk terminates as
6089 			 * soon as eri dip is found.
6090 			 */
6091 			ios.iosram_inst = 0;
6092 			ios.eri_dip = NULL;
6093 			ios.bnum = dp->bp->bnum;
6094 
6095 			if (pdip = ddi_get_parent(dip)) {
6096 				ndi_hold_devi(pdip);
6097 				ndi_devi_enter(pdip, &circ);
6098 			}
6099 			/*
6100 			 * Root node doesn't have to be held in any way.
6101 			 */
6102 			ddi_walk_devs(dip,
6103 				drmach_board_find_io_insts, (void *)&ios);
6104 
6105 			if (pdip) {
6106 				ndi_devi_exit(pdip, circ);
6107 				ndi_rele_devi(pdip);
6108 			}
6109 
6110 			DRMACH_PR("drmach_io_release: bnum=%d eri=0x%p\n",
6111 				ios.bnum, ios.eri_dip);
6112 
6113 			if (ios.eri_dip) {
6114 				DRMACH_PR("calling man_dr_detach\n");
6115 				if ((*func)(ios.eri_dip))
6116 					err = drerr_new(0,
6117 						ESTC_NWSWITCH, NULL);
6118 				/*
6119 				 * Release hold acquired in
6120 				 * drmach_board_find_io_insts()
6121 				 */
6122 				ndi_rele_devi(ios.eri_dip);
6123 			}
6124 		} else
6125 			DRMACH_PR("man_dr_detach NOT present\n");
6126 	}
6127 	return (err);
6128 }
6129 
6130 sbd_error_t *
6131 drmach_io_post_release(drmachid_t id)
6132 {
6133 	char 		*path;
6134 	dev_info_t	*rdip;
6135 	drmach_device_t	*dp;
6136 
6137 	if (!DRMACH_IS_DEVICE_ID(id))
6138 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6139 	dp = id;
6140 
6141 	rdip = dp->node->n_getdip(dp->node);
6142 
6143 	/*
6144 	 * Always called after drmach_unconfigure() which on Starcat
6145 	 * unconfigures the branch but doesn't remove it so the
6146 	 * dip must always exist.
6147 	 */
6148 	ASSERT(rdip);
6149 
6150 	ASSERT(e_ddi_branch_held(rdip));
6151 #ifdef DEBUG
6152 	path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6153 	(void) ddi_pathname(rdip, path);
6154 	DRMACH_PR("post_release dip path is: %s\n", path);
6155 	kmem_free(path, MAXPATHLEN);
6156 #endif
6157 
6158 	if (strcmp(dp->type, DRMACH_DEVTYPE_PCI) == 0) {
6159 		if (schpc_remove_pci(rdip)) {
6160 			DRMACH_PR("schpc_remove_pci failed\n");
6161 			return (drerr_new(0, ESBD_OFFLINE, NULL));
6162 		} else {
6163 			DRMACH_PR("schpc_remove_pci succeeded\n");
6164 		}
6165 	}
6166 
6167 	return (NULL);
6168 }
6169 
6170 sbd_error_t *
6171 drmach_io_post_attach(drmachid_t id)
6172 {
6173 	int		circ;
6174 	dev_info_t	*dip;
6175 	dev_info_t	*pdip;
6176 	drmach_device_t	*dp;
6177 	drmach_io_inst_t ios;
6178 
6179 	if (!DRMACH_IS_DEVICE_ID(id))
6180 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6181 	dp = id;
6182 
6183 	dip = dp->node->n_getdip(dp->node);
6184 
6185 	/*
6186 	 * We held the branch rooted at dip earlier, so at a minimum the
6187 	 * root i.e. dip must be present in the device tree.
6188 	 */
6189 	ASSERT(dip);
6190 
6191 	if (strcmp(dp->type, DRMACH_DEVTYPE_PCI) == 0) {
6192 		if (schpc_add_pci(dip)) {
6193 			DRMACH_PR("schpc_add_pci failed\n");
6194 		} else {
6195 			DRMACH_PR("schpc_add_pci succeeded\n");
6196 		}
6197 	}
6198 
6199 	/*
6200 	 * Walk device tree to find rio dip for the board
6201 	 * Since we are not interested in iosram instance here,
6202 	 * initialize it to 0, so that the walk terminates as
6203 	 * soon as eri dip is found.
6204 	 */
6205 	ios.iosram_inst = 0;
6206 	ios.eri_dip = NULL;
6207 	ios.bnum = dp->bp->bnum;
6208 
6209 	if (pdip = ddi_get_parent(dip)) {
6210 		ndi_hold_devi(pdip);
6211 		ndi_devi_enter(pdip, &circ);
6212 	}
6213 	/*
6214 	 * Root node doesn't have to be held in any way.
6215 	 */
6216 	ddi_walk_devs(dip, drmach_board_find_io_insts,
6217 				(void *)&ios);
6218 	if (pdip) {
6219 		ndi_devi_exit(pdip, circ);
6220 		ndi_rele_devi(pdip);
6221 	}
6222 
6223 	DRMACH_PR("drmach_io_post_attach: bnum=%d eri=0x%p\n",
6224 		ios.bnum, ios.eri_dip);
6225 
6226 	if (ios.eri_dip) {
6227 		int (*func)(dev_info_t *dip);
6228 
6229 		func =
6230 		(int (*)(dev_info_t *))kobj_getsymvalue("man_dr_attach", 0);
6231 
6232 		if (func) {
6233 			DRMACH_PR("calling man_dr_attach\n");
6234 			(void) (*func)(ios.eri_dip);
6235 		} else {
6236 			DRMACH_PR("man_dr_attach NOT present\n");
6237 		}
6238 
6239 		/*
6240 		 * Release hold acquired in drmach_board_find_io_insts()
6241 		 */
6242 		ndi_rele_devi(ios.eri_dip);
6243 
6244 	}
6245 
6246 	return (NULL);
6247 }
6248 
6249 static sbd_error_t *
6250 drmach_io_status(drmachid_t id, drmach_status_t *stat)
6251 {
6252 	drmach_device_t *dp;
6253 	sbd_error_t	*err;
6254 	int		 configured;
6255 
6256 	ASSERT(DRMACH_IS_IO_ID(id));
6257 	dp = id;
6258 
6259 	err = drmach_io_is_attached(id, &configured);
6260 	if (err)
6261 		return (err);
6262 
6263 	stat->assigned = dp->bp->assigned;
6264 	stat->powered = dp->bp->powered;
6265 	stat->configured = (configured != 0);
6266 	stat->busy = dp->busy;
6267 	strncpy(stat->type, dp->type, sizeof (stat->type));
6268 	stat->info[0] = '\0';
6269 
6270 	return (NULL);
6271 }
6272 
6273 sbd_error_t *
6274 drmach_mem_init_size(drmachid_t id)
6275 {
6276 	drmach_mem_t	*mp;
6277 	sbd_error_t	*err;
6278 	gdcd_t		*gdcd;
6279 	mem_chunk_t	*chunk;
6280 	uint64_t	 chunks, pa, mask, sz;
6281 
6282 	if (!DRMACH_IS_MEM_ID(id))
6283 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6284 	mp = id;
6285 
6286 	err = drmach_mem_get_base_physaddr(id, &pa);
6287 	if (err)
6288 		return (err);
6289 
6290 	mask = ~ (DRMACH_MEM_SLICE_SIZE - 1);
6291 	pa &= mask;
6292 
6293 	gdcd = drmach_gdcd_new();
6294 	if (gdcd == NULL)
6295 		return (DRMACH_INTERNAL_ERROR());
6296 
6297 	sz = 0;
6298 	chunk = gdcd->dcd_chunk_list.dcl_chunk;
6299 	chunks = gdcd->dcd_chunk_list.dcl_chunks;
6300 	while (chunks-- != 0) {
6301 		if ((chunk->mc_base_pa & mask) == pa) {
6302 			sz += chunk->mc_mbytes * 1048576;
6303 		}
6304 
6305 		++chunk;
6306 	}
6307 	mp->nbytes = sz;
6308 
6309 	drmach_gdcd_dispose(gdcd);
6310 	return (NULL);
6311 }
6312 
6313 /*
6314  * Hardware registers are organized into consecutively
6315  * addressed registers.  The reg property's hi and lo fields
6316  * together describe the base address of the register set for
6317  * this memory-controller.  Register descriptions and offsets
6318  * (from the base address) are as follows:
6319  *
6320  * Description				Offset	Size (bytes)
6321  * Memory Timing Control Register I	0x00	8
6322  * Memory Timing Control Register II	0x08	8
6323  * Memory Address Decoding Register I	0x10	8
6324  * Memory Address Decoding Register II	0x18	8
6325  * Memory Address Decoding Register III	0x20	8
6326  * Memory Address Decoding Register IV	0x28	8
6327  * Memory Address Control Register	0x30	8
6328  * Memory Timing Control Register III	0x38	8
6329  * Memory Timing Control Register IV	0x40	8
6330  * Memory Timing Control Register V  	0x48	8 (Jaguar, Panther only)
6331  * EMU Activity Status Register		0x50	8 (Panther only)
6332  *
6333  * Only the Memory Address Decoding Register and EMU Activity Status
6334  * Register addresses are needed for DRMACH.
6335  */
6336 static sbd_error_t *
6337 drmach_mem_new(drmach_device_t *proto, drmachid_t *idp)
6338 {
6339 	static void drmach_mem_dispose(drmachid_t);
6340 	static sbd_error_t *drmach_mem_release(drmachid_t);
6341 	static sbd_error_t *drmach_mem_status(drmachid_t, drmach_status_t *);
6342 
6343 	sbd_error_t	*err;
6344 	uint64_t	 madr_pa;
6345 	drmach_mem_t	*mp;
6346 	int		 bank, count;
6347 
6348 	err = drmach_read_reg_addr(proto, &madr_pa);
6349 	if (err)
6350 		return (err);
6351 
6352 	mp = kmem_zalloc(sizeof (drmach_mem_t), KM_SLEEP);
6353 	bcopy(proto, &mp->dev, sizeof (mp->dev));
6354 	mp->dev.node = drmach_node_dup(proto->node);
6355 	mp->dev.cm.isa = (void *)drmach_mem_new;
6356 	mp->dev.cm.dispose = drmach_mem_dispose;
6357 	mp->dev.cm.release = drmach_mem_release;
6358 	mp->dev.cm.status = drmach_mem_status;
6359 	mp->madr_pa = madr_pa;
6360 
6361 	snprintf(mp->dev.cm.name,
6362 		sizeof (mp->dev.cm.name), "%s", mp->dev.type);
6363 
6364 	for (count = bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
6365 		uint64_t madr;
6366 
6367 		drmach_mem_read_madr(mp, bank, &madr);
6368 		if (madr & DRMACH_MC_VALID_MASK) {
6369 			count += 1;
6370 			break;
6371 		}
6372 	}
6373 
6374 	/*
6375 	 * If none of the banks had their valid bit set, that means
6376 	 * post did not configure this MC to participate in the
6377 	 * domain.  So, pretend this node does not exist by returning
6378 	 * a drmachid of zero.
6379 	 */
6380 	if (count == 0) {
6381 		/* drmach_mem_dispose frees board mem list */
6382 		drmach_node_dispose(mp->dev.node);
6383 		kmem_free(mp, sizeof (*mp));
6384 		*idp = (drmachid_t)0;
6385 		return (NULL);
6386 	}
6387 
6388 	/*
6389 	 * Only one mem unit per board is exposed to the
6390 	 * PIM layer.  The first mem unit encountered during
6391 	 * tree walk is used to represent all mem units on
6392 	 * the same board.
6393 	 */
6394 	if (mp->dev.bp->mem == NULL) {
6395 		/* start list of mem units on this board */
6396 		mp->dev.bp->mem = mp;
6397 
6398 		/*
6399 		 * force unum to zero since this is the only mem unit
6400 		 * that will be visible to the PIM layer.
6401 		 */
6402 		mp->dev.unum = 0;
6403 
6404 		/*
6405 		 * board memory size kept in this mem unit only
6406 		 */
6407 		err = drmach_mem_init_size(mp);
6408 		if (err) {
6409 			mp->dev.bp->mem = NULL;
6410 			/* drmach_mem_dispose frees board mem list */
6411 			drmach_node_dispose(mp->dev.node);
6412 			kmem_free(mp, sizeof (*mp));
6413 			*idp = (drmachid_t)0;
6414 			return (NULL);
6415 		}
6416 
6417 		/*
6418 		 * allow this instance (the first encountered on this board)
6419 		 * to be visible to the PIM layer.
6420 		 */
6421 		*idp = (drmachid_t)mp;
6422 	} else {
6423 		drmach_mem_t *lp;
6424 
6425 		/* hide this mem instance behind the first. */
6426 		for (lp = mp->dev.bp->mem; lp->next; lp = lp->next)
6427 			;
6428 		lp->next = mp;
6429 
6430 		/*
6431 		 * hide this instance from the caller.
6432 		 * See drmach_board_find_devices_cb() for details.
6433 		 */
6434 		*idp = (drmachid_t)0;
6435 	}
6436 
6437 	return (NULL);
6438 }
6439 
6440 static void
6441 drmach_mem_dispose(drmachid_t id)
6442 {
6443 	drmach_mem_t *mp, *next;
6444 	drmach_board_t *bp;
6445 
6446 	ASSERT(DRMACH_IS_MEM_ID(id));
6447 
6448 	mutex_enter(&drmach_bus_sync_lock);
6449 
6450 	mp = id;
6451 	bp = mp->dev.bp;
6452 
6453 	do {
6454 		if (mp->dev.node)
6455 			drmach_node_dispose(mp->dev.node);
6456 
6457 		next = mp->next;
6458 		kmem_free(mp, sizeof (*mp));
6459 		mp = next;
6460 	} while (mp);
6461 
6462 	bp->mem = NULL;
6463 
6464 	drmach_bus_sync_list_update();
6465 	mutex_exit(&drmach_bus_sync_lock);
6466 }
6467 
6468 sbd_error_t *
6469 drmach_mem_add_span(drmachid_t id, uint64_t basepa, uint64_t size)
6470 {
6471 	pfn_t		basepfn = (pfn_t)(basepa >> PAGESHIFT);
6472 	pgcnt_t		npages = (pgcnt_t)(size >> PAGESHIFT);
6473 	int		rv;
6474 
6475 	ASSERT(size != 0);
6476 
6477 	if (!DRMACH_IS_MEM_ID(id))
6478 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6479 
6480 	kcage_range_lock();
6481 	rv = kcage_range_add(basepfn, npages, 1);
6482 	kcage_range_unlock();
6483 	if (rv == ENOMEM) {
6484 		cmn_err(CE_WARN, "%lu megabytes not available"
6485 			" to kernel cage", size >> 20);
6486 	} else if (rv != 0) {
6487 		/* catch this in debug kernels */
6488 		ASSERT(0);
6489 
6490 		cmn_err(CE_WARN, "unexpected kcage_range_add"
6491 			" return value %d", rv);
6492 	}
6493 
6494 	return (NULL);
6495 }
6496 
6497 sbd_error_t *
6498 drmach_mem_del_span(drmachid_t id, uint64_t basepa, uint64_t size)
6499 {
6500 	pfn_t		 basepfn = (pfn_t)(basepa >> PAGESHIFT);
6501 	pgcnt_t		 npages = (pgcnt_t)(size >> PAGESHIFT);
6502 	int		 rv;
6503 
6504 	if (!DRMACH_IS_MEM_ID(id))
6505 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6506 
6507 	if (size > 0) {
6508 		kcage_range_lock();
6509 		rv = kcage_range_delete_post_mem_del(basepfn, npages);
6510 		kcage_range_unlock();
6511 		if (rv != 0) {
6512 			cmn_err(CE_WARN,
6513 			    "unexpected kcage_range_delete_post_mem_del"
6514 			    " return value %d", rv);
6515 			return (DRMACH_INTERNAL_ERROR());
6516 		}
6517 	}
6518 
6519 	return (NULL);
6520 }
6521 
6522 sbd_error_t *
6523 drmach_mem_disable(drmachid_t id)
6524 {
6525 	if (!DRMACH_IS_MEM_ID(id))
6526 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6527 	else
6528 		return (NULL);
6529 }
6530 
6531 sbd_error_t *
6532 drmach_mem_enable(drmachid_t id)
6533 {
6534 	if (!DRMACH_IS_MEM_ID(id))
6535 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6536 	else
6537 		return (NULL);
6538 }
6539 
6540 sbd_error_t *
6541 drmach_mem_get_alignment(drmachid_t id, uint64_t *mask)
6542 {
6543 #define	MB(mb) ((mb) * 1048576ull)
6544 
6545 	static struct {
6546 		uint_t		uk;
6547 		uint64_t	segsz;
6548 	}  uk2segsz[] = {
6549 		{ 0x003,	MB(256)	  },
6550 		{ 0x007,	MB(512)	  },
6551 		{ 0x00f,	MB(1024)  },
6552 		{ 0x01f,	MB(2048)  },
6553 		{ 0x03f,	MB(4096)  },
6554 		{ 0x07f,	MB(8192)  },
6555 		{ 0x0ff,	MB(16384) },
6556 		{ 0x1ff,	MB(32768) },
6557 		{ 0x3ff,	MB(65536) },
6558 		{ 0x7ff,	MB(131072) }
6559 	};
6560 	static int len = sizeof (uk2segsz) / sizeof (uk2segsz[0]);
6561 
6562 #undef MB
6563 
6564 	uint64_t	 largest_sz = 0;
6565 	drmach_mem_t	*mp;
6566 
6567 	if (!DRMACH_IS_MEM_ID(id))
6568 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6569 
6570 	/* prime the result with a default value */
6571 	*mask = (DRMACH_MEM_SLICE_SIZE - 1);
6572 
6573 	for (mp = id; mp; mp = mp->next) {
6574 		int bank;
6575 
6576 		for (bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
6577 			int		i;
6578 			uint_t		uk;
6579 			uint64_t	madr;
6580 
6581 			/* get register value, extract uk and normalize */
6582 			drmach_mem_read_madr(mp, bank, &madr);
6583 
6584 			if (!(madr & DRMACH_MC_VALID_MASK))
6585 				continue;
6586 
6587 			uk = DRMACH_MC_UK(madr);
6588 
6589 			/* match uk value */
6590 			for (i = 0; i < len; i++)
6591 				if (uk == uk2segsz[i].uk)
6592 					break;
6593 
6594 			if (i < len) {
6595 				uint64_t sz = uk2segsz[i].segsz;
6596 
6597 				/*
6598 				 * remember largest segment size,
6599 				 * update mask result
6600 				 */
6601 				if (sz > largest_sz) {
6602 					largest_sz = sz;
6603 					*mask = sz - 1;
6604 				}
6605 			} else {
6606 				/*
6607 				 * uk not in table, punt using
6608 				 * entire slice size. no longer any
6609 				 * reason to check other banks.
6610 				 */
6611 				*mask = (DRMACH_MEM_SLICE_SIZE - 1);
6612 				return (NULL);
6613 			}
6614 		}
6615 	}
6616 
6617 	return (NULL);
6618 }
6619 
6620 sbd_error_t *
6621 drmach_mem_get_base_physaddr(drmachid_t id, uint64_t *base_addr)
6622 {
6623 	drmach_mem_t *mp;
6624 
6625 	if (!DRMACH_IS_MEM_ID(id))
6626 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6627 
6628 	*base_addr = (uint64_t)-1;
6629 	for (mp = id; mp; mp = mp->next) {
6630 		int bank;
6631 
6632 		for (bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
6633 			uint64_t addr, madr;
6634 
6635 			drmach_mem_read_madr(mp, bank, &madr);
6636 			if (madr & DRMACH_MC_VALID_MASK) {
6637 				addr = DRMACH_MC_UM_TO_PA(madr) |
6638 					DRMACH_MC_LM_TO_PA(madr);
6639 
6640 				if (addr < *base_addr)
6641 					*base_addr = addr;
6642 			}
6643 		}
6644 	}
6645 
6646 	/* should not happen, but ... */
6647 	if (*base_addr == (uint64_t)-1)
6648 		return (DRMACH_INTERNAL_ERROR());
6649 
6650 	return (NULL);
6651 }
6652 
6653 void
6654 drmach_bus_sync_list_update(void)
6655 {
6656 	int		rv, idx, cnt = 0;
6657 	drmachid_t	id;
6658 
6659 	ASSERT(MUTEX_HELD(&drmach_bus_sync_lock));
6660 
6661 	rv = drmach_array_first(drmach_boards, &idx, &id);
6662 	while (rv == 0) {
6663 		drmach_board_t		*bp = id;
6664 		drmach_mem_t		*mp = bp->mem;
6665 
6666 		while (mp) {
6667 			int bank;
6668 
6669 			for (bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
6670 				uint64_t madr;
6671 
6672 				drmach_mem_read_madr(mp, bank, &madr);
6673 				if (madr & DRMACH_MC_VALID_MASK) {
6674 					uint64_t pa;
6675 
6676 					pa  = DRMACH_MC_UM_TO_PA(madr);
6677 					pa |= DRMACH_MC_LM_TO_PA(madr);
6678 
6679 					/*
6680 					 * The list is zero terminated.
6681 					 * Offset the pa by a doubleword
6682 					 * to avoid confusing a pa value of
6683 					 * of zero with the terminator.
6684 					 */
6685 					pa += sizeof (uint64_t);
6686 
6687 					drmach_bus_sync_list[cnt++] = pa;
6688 				}
6689 			}
6690 
6691 			mp = mp->next;
6692 		}
6693 
6694 		rv = drmach_array_next(drmach_boards, &idx, &id);
6695 	}
6696 
6697 	drmach_bus_sync_list[cnt] = 0;
6698 }
6699 
6700 sbd_error_t *
6701 drmach_mem_get_memlist(drmachid_t id, struct memlist **ml)
6702 {
6703 	sbd_error_t	*err;
6704 	struct memlist	*mlist;
6705 	gdcd_t		*gdcd;
6706 	mem_chunk_t	*chunk;
6707 	uint64_t	 chunks, pa, mask;
6708 
6709 	err = drmach_mem_get_base_physaddr(id, &pa);
6710 	if (err)
6711 		return (err);
6712 
6713 	gdcd = drmach_gdcd_new();
6714 	if (gdcd == NULL)
6715 		return (DRMACH_INTERNAL_ERROR());
6716 
6717 	mask = ~ (DRMACH_MEM_SLICE_SIZE - 1);
6718 	pa &= mask;
6719 
6720 	mlist = NULL;
6721 	chunk = gdcd->dcd_chunk_list.dcl_chunk;
6722 	chunks = gdcd->dcd_chunk_list.dcl_chunks;
6723 	while (chunks-- != 0) {
6724 		if ((chunk->mc_base_pa & mask) == pa) {
6725 			mlist = memlist_add_span(mlist,
6726 				chunk->mc_base_pa, chunk->mc_mbytes * 1048576);
6727 		}
6728 
6729 		++chunk;
6730 	}
6731 
6732 	drmach_gdcd_dispose(gdcd);
6733 
6734 #ifdef DEBUG
6735 	DRMACH_PR("GDCD derived memlist:");
6736 	memlist_dump(mlist);
6737 #endif
6738 
6739 	*ml = mlist;
6740 	return (NULL);
6741 }
6742 
6743 sbd_error_t *
6744 drmach_mem_get_size(drmachid_t id, uint64_t *bytes)
6745 {
6746 	drmach_mem_t	*mp;
6747 
6748 	if (!DRMACH_IS_MEM_ID(id))
6749 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6750 	mp = id;
6751 
6752 	ASSERT(mp->nbytes != 0);
6753 	*bytes = mp->nbytes;
6754 
6755 	return (NULL);
6756 }
6757 
6758 sbd_error_t *
6759 drmach_mem_get_slice_size(drmachid_t id, uint64_t *bytes)
6760 {
6761 	sbd_error_t	*err;
6762 	drmach_device_t	*mp;
6763 
6764 	if (!DRMACH_IS_MEM_ID(id))
6765 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6766 	mp = id;
6767 
6768 	switch (DRMACH_BNUM2SLOT(mp->bp->bnum)) {
6769 		case 0:	*bytes = DRMACH_MEM_USABLE_SLICE_SIZE;
6770 			err = NULL;
6771 			break;
6772 
6773 		case 1: *bytes = 0;
6774 			err = NULL;
6775 			break;
6776 
6777 		default:
6778 			err = DRMACH_INTERNAL_ERROR();
6779 			break;
6780 	}
6781 
6782 	return (err);
6783 }
6784 
6785 processorid_t drmach_mem_cpu_affinity_nail;
6786 
6787 processorid_t
6788 drmach_mem_cpu_affinity(drmachid_t id)
6789 {
6790 	drmach_device_t	*mp;
6791 	drmach_board_t	*bp;
6792 	processorid_t	 cpuid;
6793 
6794 	if (!DRMACH_IS_MEM_ID(id))
6795 		return (CPU_CURRENT);
6796 
6797 	if (drmach_mem_cpu_affinity_nail) {
6798 		cpuid = drmach_mem_cpu_affinity_nail;
6799 
6800 		if (cpuid < 0 || cpuid > NCPU)
6801 			return (CPU_CURRENT);
6802 
6803 		mutex_enter(&cpu_lock);
6804 		if (cpu[cpuid] == NULL || !CPU_ACTIVE(cpu[cpuid]))
6805 			cpuid = CPU_CURRENT;
6806 		mutex_exit(&cpu_lock);
6807 
6808 		return (cpuid);
6809 	}
6810 
6811 	/* try to choose a proc on the target board */
6812 	mp = id;
6813 	bp = mp->bp;
6814 	if (bp->devices) {
6815 		int		 rv;
6816 		int		 d_idx;
6817 		drmachid_t	 d_id;
6818 
6819 		rv = drmach_array_first(bp->devices, &d_idx, &d_id);
6820 		while (rv == 0) {
6821 			if (DRMACH_IS_CPU_ID(d_id)) {
6822 				drmach_cpu_t	*cp = d_id;
6823 
6824 				mutex_enter(&cpu_lock);
6825 				cpuid = cp->cpuid;
6826 				if (cpu[cpuid] && CPU_ACTIVE(cpu[cpuid])) {
6827 					mutex_exit(&cpu_lock);
6828 					return (cpuid);
6829 				} else {
6830 					mutex_exit(&cpu_lock);
6831 				}
6832 			}
6833 
6834 			rv = drmach_array_next(bp->devices, &d_idx, &d_id);
6835 		}
6836 	}
6837 
6838 	/* otherwise, this proc, wherever it is */
6839 	return (CPU_CURRENT);
6840 }
6841 
6842 static sbd_error_t *
6843 drmach_mem_release(drmachid_t id)
6844 {
6845 	if (!DRMACH_IS_MEM_ID(id))
6846 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6847 	return (NULL);
6848 }
6849 
6850 static sbd_error_t *
6851 drmach_mem_status(drmachid_t id, drmach_status_t *stat)
6852 {
6853 	drmach_mem_t	*mp;
6854 	sbd_error_t	*err;
6855 	uint64_t	 pa, slice_size;
6856 	struct memlist	*ml;
6857 
6858 	ASSERT(DRMACH_IS_MEM_ID(id));
6859 	mp = id;
6860 
6861 	/* get starting physical address of target memory */
6862 	err = drmach_mem_get_base_physaddr(id, &pa);
6863 	if (err)
6864 		return (err);
6865 
6866 	/* round down to slice boundary */
6867 	slice_size = DRMACH_MEM_SLICE_SIZE;
6868 	pa &= ~ (slice_size - 1);
6869 
6870 	/* stop at first span that is in slice */
6871 	memlist_read_lock();
6872 	for (ml = phys_install; ml; ml = ml->next)
6873 		if (ml->address >= pa && ml->address < pa + slice_size)
6874 			break;
6875 	memlist_read_unlock();
6876 
6877 	stat->assigned = mp->dev.bp->assigned;
6878 	stat->powered = mp->dev.bp->powered;
6879 	stat->configured = (ml != NULL);
6880 	stat->busy = mp->dev.busy;
6881 	strncpy(stat->type, mp->dev.type, sizeof (stat->type));
6882 	stat->info[0] = '\0';
6883 
6884 	return (NULL);
6885 }
6886 
6887 sbd_error_t *
6888 drmach_board_deprobe(drmachid_t id)
6889 {
6890 	drmach_board_t	*bp;
6891 	sbd_error_t	*err = NULL;
6892 
6893 	if (!DRMACH_IS_BOARD_ID(id))
6894 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6895 	bp = id;
6896 
6897 	if (bp->tree) {
6898 		drmach_node_dispose(bp->tree);
6899 		bp->tree = NULL;
6900 	}
6901 	if (bp->devices) {
6902 		drmach_array_dispose(bp->devices, drmach_device_dispose);
6903 		bp->devices = NULL;
6904 		bp->mem = NULL;  /* TODO: still needed? */
6905 	}
6906 	return (err);
6907 }
6908 
6909 /*ARGSUSED1*/
6910 static sbd_error_t *
6911 drmach_pt_showlpa(drmachid_t id, drmach_opts_t *opts)
6912 {
6913 	drmach_device_t	*dp;
6914 	uint64_t	val;
6915 	int		err = 1;
6916 
6917 	if (DRMACH_IS_CPU_ID(id)) {
6918 		drmach_cpu_t *cp = id;
6919 		if (drmach_cpu_read_scr(cp, &val))
6920 			err = 0;
6921 	} else if (DRMACH_IS_IO_ID(id) && ((drmach_io_t *)id)->scsr_pa != 0) {
6922 		drmach_io_t *io = id;
6923 		val = lddphysio(io->scsr_pa);
6924 		err = 0;
6925 	}
6926 	if (err)
6927 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6928 
6929 	dp = id;
6930 	uprintf("showlpa %s::%s portid %d, base pa %lx, bound pa %lx\n",
6931 		dp->bp->cm.name,
6932 		dp->cm.name,
6933 		dp->portid,
6934 		DRMACH_LPA_BASE_TO_PA(val),
6935 		DRMACH_LPA_BND_TO_PA(val));
6936 
6937 	return (NULL);
6938 }
6939 
6940 /*ARGSUSED*/
6941 static sbd_error_t *
6942 drmach_pt_ikprobe(drmachid_t id, drmach_opts_t *opts)
6943 {
6944 
6945 	drmach_board_t		*bp = (drmach_board_t *)id;
6946 
6947 	sbd_error_t		*err;
6948 	sc_gptwocfg_cookie_t	scc;
6949 
6950 	if (!DRMACH_IS_BOARD_ID(id))
6951 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6952 
6953 	/* do saf configurator stuff */
6954 	DRMACH_PR("calling sc_probe_board for bnum=%d\n", bp->bnum);
6955 	scc = sc_probe_board(bp->bnum);
6956 	if (scc == NULL) {
6957 		err = drerr_new(0, ESTC_PROBE, bp->cm.name);
6958 		return (err);
6959 	}
6960 
6961 	return (err);
6962 }
6963 
6964 /*ARGSUSED*/
6965 static sbd_error_t *
6966 drmach_pt_ikdeprobe(drmachid_t id, drmach_opts_t *opts)
6967 {
6968 
6969 	drmach_board_t	*bp;
6970 	sbd_error_t	*err = NULL;
6971 	sc_gptwocfg_cookie_t	scc;
6972 
6973 	if (!DRMACH_IS_BOARD_ID(id))
6974 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6975 	bp = id;
6976 
6977 	cmn_err(CE_CONT, "DR: in-kernel unprobe board %d\n", bp->bnum);
6978 	scc = sc_unprobe_board(bp->bnum);
6979 	if (scc != NULL) {
6980 		err = drerr_new(0, ESTC_DEPROBE, bp->cm.name);
6981 	}
6982 
6983 	if (err == NULL)
6984 		err = drmach_board_deprobe(id);
6985 
6986 	return (err);
6987 
6988 }
6989 
6990 static sbd_error_t *
6991 drmach_pt_readmem(drmachid_t id, drmach_opts_t *opts)
6992 {
6993 	_NOTE(ARGUNUSED(id))
6994 	_NOTE(ARGUNUSED(opts))
6995 
6996 	struct memlist	*ml;
6997 	uint64_t	src_pa;
6998 	uint64_t	dst_pa;
6999 	uint64_t	dst;
7000 
7001 	dst_pa = va_to_pa(&dst);
7002 
7003 	memlist_read_lock();
7004 	for (ml = phys_install; ml; ml = ml->next) {
7005 		uint64_t	nbytes;
7006 
7007 		src_pa = ml->address;
7008 		nbytes = ml->size;
7009 
7010 		while (nbytes != 0ull) {
7011 
7012 			/* copy 32 bytes at src_pa to dst_pa */
7013 			bcopy32_il(src_pa, dst_pa);
7014 
7015 			/* increment by 32 bytes */
7016 			src_pa += (4 * sizeof (uint64_t));
7017 
7018 			/* decrement by 32 bytes */
7019 			nbytes -= (4 * sizeof (uint64_t));
7020 		}
7021 	}
7022 	memlist_read_unlock();
7023 
7024 	return (NULL);
7025 }
7026 
7027 static sbd_error_t *
7028 drmach_pt_recovercpu(drmachid_t id, drmach_opts_t *opts)
7029 {
7030 	_NOTE(ARGUNUSED(opts))
7031 
7032 	drmach_cpu_t	*cp;
7033 
7034 	if (!DRMACH_IS_CPU_ID(id))
7035 		return (drerr_new(0, ESTC_INAPPROP, NULL));
7036 	cp = id;
7037 
7038 	mutex_enter(&cpu_lock);
7039 	(void) drmach_iocage_cpu_return(&(cp->dev),
7040 	    CPU_ENABLE | CPU_EXISTS | CPU_READY | CPU_RUNNING);
7041 	mutex_exit(&cpu_lock);
7042 
7043 	return (NULL);
7044 }
7045 
7046 /*
7047  * Starcat DR passthrus are for debugging purposes only.
7048  */
7049 static struct {
7050 	const char	*name;
7051 	sbd_error_t	*(*handler)(drmachid_t id, drmach_opts_t *opts);
7052 } drmach_pt_arr[] = {
7053 	{ "showlpa",		drmach_pt_showlpa		},
7054 	{ "ikprobe",		drmach_pt_ikprobe		},
7055 	{ "ikdeprobe",		drmach_pt_ikdeprobe		},
7056 	{ "readmem",		drmach_pt_readmem		},
7057 	{ "recovercpu",		drmach_pt_recovercpu		},
7058 
7059 	/* the following line must always be last */
7060 	{ NULL,			NULL				}
7061 };
7062 
7063 /*ARGSUSED*/
7064 sbd_error_t *
7065 drmach_passthru(drmachid_t id, drmach_opts_t *opts)
7066 {
7067 	int		i;
7068 	sbd_error_t	*err;
7069 
7070 	i = 0;
7071 	while (drmach_pt_arr[i].name != NULL) {
7072 		int len = strlen(drmach_pt_arr[i].name);
7073 
7074 		if (strncmp(drmach_pt_arr[i].name, opts->copts, len) == 0)
7075 			break;
7076 
7077 		i += 1;
7078 	}
7079 
7080 	if (drmach_pt_arr[i].name == NULL)
7081 		err = drerr_new(0, ESTC_UNKPTCMD, opts->copts);
7082 	else
7083 		err = (*drmach_pt_arr[i].handler)(id, opts);
7084 
7085 	return (err);
7086 }
7087 
7088 sbd_error_t *
7089 drmach_release(drmachid_t id)
7090 {
7091 	drmach_common_t *cp;
7092 
7093 	if (!DRMACH_IS_DEVICE_ID(id))
7094 		return (drerr_new(0, ESTC_INAPPROP, NULL));
7095 	cp = id;
7096 
7097 	return (cp->release(id));
7098 }
7099 
7100 sbd_error_t *
7101 drmach_status(drmachid_t id, drmach_status_t *stat)
7102 {
7103 	drmach_common_t *cp;
7104 	sbd_error_t	*err;
7105 
7106 	rw_enter(&drmach_boards_rwlock, RW_READER);
7107 
7108 	if (!DRMACH_IS_ID(id)) {
7109 		rw_exit(&drmach_boards_rwlock);
7110 		return (drerr_new(0, ESTC_NOTID, NULL));
7111 	}
7112 
7113 	cp = id;
7114 
7115 	err = cp->status(id, stat);
7116 	rw_exit(&drmach_boards_rwlock);
7117 	return (err);
7118 }
7119 
7120 static sbd_error_t *
7121 drmach_i_status(drmachid_t id, drmach_status_t *stat)
7122 {
7123 	drmach_common_t *cp;
7124 
7125 	if (!DRMACH_IS_ID(id))
7126 		return (drerr_new(0, ESTC_NOTID, NULL));
7127 	cp = id;
7128 
7129 	return (cp->status(id, stat));
7130 }
7131 
7132 /*ARGSUSED*/
7133 sbd_error_t *
7134 drmach_unconfigure(drmachid_t id, int flags)
7135 {
7136 	drmach_device_t	*dp;
7137 	dev_info_t 	*rdip;
7138 
7139 	char	name[OBP_MAXDRVNAME];
7140 	int rv;
7141 
7142 	/*
7143 	 * Since CPU nodes are not configured, it is
7144 	 * necessary to skip the unconfigure step as
7145 	 * well.
7146 	 */
7147 	if (DRMACH_IS_CPU_ID(id)) {
7148 		return (NULL);
7149 	}
7150 
7151 	for (; id; ) {
7152 		dev_info_t	*fdip = NULL;
7153 
7154 		if (!DRMACH_IS_DEVICE_ID(id))
7155 			return (drerr_new(0, ESTC_INAPPROP, NULL));
7156 		dp = id;
7157 
7158 		rdip = dp->node->n_getdip(dp->node);
7159 
7160 		/*
7161 		 * drmach_unconfigure() is always called on a configured branch.
7162 		 * So the root of the branch was held earlier and must exist.
7163 		 */
7164 		ASSERT(rdip);
7165 
7166 		DRMACH_PR("drmach_unconfigure: unconfiguring DDI branch");
7167 
7168 		rv = dp->node->n_getprop(dp->node,
7169 		    "name", name, OBP_MAXDRVNAME);
7170 
7171 		/* The node must have a name */
7172 		if (rv)
7173 			return (0);
7174 
7175 		if (drmach_name2type_idx(name) < 0) {
7176 			if (DRMACH_IS_MEM_ID(id)) {
7177 				drmach_mem_t	*mp = id;
7178 				id = mp->next;
7179 			} else {
7180 				id = NULL;
7181 			}
7182 			continue;
7183 		}
7184 
7185 		/*
7186 		 * NOTE: FORCE flag is no longer needed under devfs
7187 		 */
7188 		ASSERT(e_ddi_branch_held(rdip));
7189 		if (e_ddi_branch_unconfigure(rdip, &fdip, 0) != 0) {
7190 			sbd_error_t	*err = NULL;
7191 			char		*path = kmem_alloc(MAXPATHLEN,
7192 					    KM_SLEEP);
7193 
7194 			/*
7195 			 * If non-NULL, fdip is returned held and must be
7196 			 * released.
7197 			 */
7198 			if (fdip != NULL) {
7199 				(void) ddi_pathname(fdip, path);
7200 				ddi_release_devi(fdip);
7201 			} else {
7202 				(void) ddi_pathname(rdip, path);
7203 			}
7204 
7205 			err = drerr_new(1, ESTC_DRVFAIL, path);
7206 
7207 			kmem_free(path, MAXPATHLEN);
7208 
7209 			/*
7210 			 * If we were unconfiguring an IO board, a call was
7211 			 * made to man_dr_detach.  We now need to call
7212 			 * man_dr_attach to regain man use of the eri.
7213 			 */
7214 			if (DRMACH_IS_IO_ID(id)) {
7215 				int (*func)(dev_info_t *dip);
7216 
7217 				func = (int (*)(dev_info_t *))kobj_getsymvalue\
7218 					("man_dr_attach", 0);
7219 
7220 				if (func) {
7221 					drmach_io_inst_t ios;
7222 					dev_info_t 	*pdip;
7223 					int		circ;
7224 
7225 					/*
7226 					 * Walk device tree to find rio dip for
7227 					 * the board
7228 					 * Since we are not interested in iosram
7229 					 * instance here, initialize it to 0, so
7230 					 * that the walk terminates as soon as
7231 					 * eri dip is found.
7232 					 */
7233 					ios.iosram_inst = 0;
7234 					ios.eri_dip = NULL;
7235 					ios.bnum = dp->bp->bnum;
7236 
7237 					if (pdip = ddi_get_parent(rdip)) {
7238 						ndi_hold_devi(pdip);
7239 						ndi_devi_enter(pdip, &circ);
7240 					}
7241 					/*
7242 					 * Root node doesn't have to be held in
7243 					 * any way.
7244 					 */
7245 					ASSERT(e_ddi_branch_held(rdip));
7246 					ddi_walk_devs(rdip,
7247 						drmach_board_find_io_insts,
7248 						(void *)&ios);
7249 
7250 					DRMACH_PR("drmach_unconfigure: bnum=%d"
7251 						" eri=0x%p\n",
7252 						ios.bnum, ios.eri_dip);
7253 
7254 					if (pdip) {
7255 						ndi_devi_exit(pdip, circ);
7256 						ndi_rele_devi(pdip);
7257 					}
7258 
7259 					if (ios.eri_dip) {
7260 						DRMACH_PR("calling"
7261 							" man_dr_attach\n");
7262 						(void) (*func)(ios.eri_dip);
7263 						/*
7264 						 * Release hold acquired in
7265 						 * drmach_board_find_io_insts()
7266 						 */
7267 						ndi_rele_devi(ios.eri_dip);
7268 					}
7269 				}
7270 			}
7271 			return (err);
7272 		}
7273 
7274 		if (DRMACH_IS_MEM_ID(id)) {
7275 			drmach_mem_t	*mp = id;
7276 			id = mp->next;
7277 		} else {
7278 			id = NULL;
7279 		}
7280 	}
7281 
7282 	return (NULL);
7283 }
7284 
7285 /*
7286  * drmach interfaces to legacy Starfire platmod logic
7287  * linkage via runtime symbol look up, called from plat_cpu_power*
7288  */
7289 
7290 /*
7291  * Start up a cpu.  It is possible that we're attempting to restart
7292  * the cpu after an UNCONFIGURE in which case the cpu will be
7293  * spinning in its cache.  So, all we have to do is wakeup him up.
7294  * Under normal circumstances the cpu will be coming from a previous
7295  * CONNECT and thus will be spinning in OBP.  In both cases, the
7296  * startup sequence is the same.
7297  */
7298 int
7299 drmach_cpu_poweron(struct cpu *cp)
7300 {
7301 	DRMACH_PR("drmach_cpu_poweron: starting cpuid %d\n", cp->cpu_id);
7302 
7303 	ASSERT(MUTEX_HELD(&cpu_lock));
7304 
7305 	if (drmach_cpu_start(cp) != 0)
7306 		return (EBUSY);
7307 	else
7308 		return (0);
7309 }
7310 
7311 int
7312 drmach_cpu_poweroff(struct cpu *cp)
7313 {
7314 	int		ntries;
7315 	processorid_t	cpuid;
7316 	void		drmach_cpu_shutdown_self(void);
7317 
7318 	DRMACH_PR("drmach_cpu_poweroff: stopping cpuid %d\n", cp->cpu_id);
7319 
7320 	ASSERT(MUTEX_HELD(&cpu_lock));
7321 
7322 	/*
7323 	 * XXX CHEETAH SUPPORT
7324 	 * for cheetah, we need to grab the iocage lock since iocage
7325 	 * memory is used for e$ flush.
7326 	 */
7327 	if (drmach_is_cheetah) {
7328 		mutex_enter(&drmach_iocage_lock);
7329 		while (drmach_iocage_is_busy)
7330 			cv_wait(&drmach_iocage_cv, &drmach_iocage_lock);
7331 		drmach_iocage_is_busy = 1;
7332 		drmach_iocage_mem_scrub(ecache_size * 2);
7333 		mutex_exit(&drmach_iocage_lock);
7334 	}
7335 
7336 	cpuid = cp->cpu_id;
7337 
7338 	/*
7339 	 * Set affinity to ensure consistent reading and writing of
7340 	 * drmach_xt_mb[cpuid] by one "master" CPU directing
7341 	 * the shutdown of the target CPU.
7342 	 */
7343 	affinity_set(CPU->cpu_id);
7344 
7345 	/*
7346 	 * Capture all CPUs (except for detaching proc) to prevent
7347 	 * crosscalls to the detaching proc until it has cleared its
7348 	 * bit in cpu_ready_set.
7349 	 *
7350 	 * The CPUs remain paused and the prom_mutex is known to be free.
7351 	 * This prevents blocking when doing prom IEEE-1275 calls at a
7352 	 * high PIL level.
7353 	 */
7354 	promsafe_pause_cpus();
7355 
7356 	/*
7357 	 * Quiesce interrupts on the target CPU. We do this by setting
7358 	 * the CPU 'not ready'- (i.e. removing the CPU from cpu_ready_set) to
7359 	 * prevent it from receiving cross calls and cross traps.
7360 	 * This prevents the processor from receiving any new soft interrupts.
7361 	 */
7362 	mp_cpu_quiesce(cp);
7363 
7364 	prom_hotremovecpu(cpuid);
7365 
7366 	start_cpus();
7367 
7368 	/* setup xt_mb, will be cleared by drmach_shutdown_asm when ready */
7369 	drmach_xt_mb[cpuid] = 0x80;
7370 
7371 	xt_one_unchecked(cp->cpu_id, (xcfunc_t *)idle_stop_xcall,
7372 		(uint64_t)drmach_cpu_shutdown_self, NULL);
7373 
7374 	ntries = drmach_cpu_ntries;
7375 	while (drmach_xt_mb[cpuid] && ntries) {
7376 		DELAY(drmach_cpu_delay);
7377 		ntries--;
7378 	}
7379 
7380 	drmach_xt_mb[cpuid] = 0;	/* steal the cache line back */
7381 
7382 	membar_sync();			/* make sure copy-back retires */
7383 
7384 	affinity_clear();
7385 
7386 	/*
7387 	 * XXX CHEETAH SUPPORT
7388 	 */
7389 	if (drmach_is_cheetah) {
7390 		mutex_enter(&drmach_iocage_lock);
7391 		drmach_iocage_mem_scrub(ecache_size * 2);
7392 		drmach_iocage_is_busy = 0;
7393 		cv_signal(&drmach_iocage_cv);
7394 		mutex_exit(&drmach_iocage_lock);
7395 	}
7396 
7397 	DRMACH_PR("waited %d out of %d tries for "
7398 		"drmach_cpu_shutdown_self on cpu%d",
7399 		drmach_cpu_ntries - ntries, drmach_cpu_ntries, cp->cpu_id);
7400 
7401 	/*
7402 	 * Do this here instead of drmach_cpu_shutdown_self() to
7403 	 * avoid an assertion failure panic in turnstile.c.
7404 	 */
7405 	CPU_SIGNATURE(OS_SIG, SIGST_DETACHED, SIGSUBST_NULL, cpuid);
7406 
7407 	return (0);
7408 }
7409 
7410 void
7411 drmach_iocage_mem_scrub(uint64_t nbytes)
7412 {
7413 	extern int drmach_bc_bzero(void*, size_t);
7414 	int	rv;
7415 
7416 	ASSERT(MUTEX_HELD(&cpu_lock));
7417 
7418 	affinity_set(CPU->cpu_id);
7419 
7420 	rv = drmach_bc_bzero(drmach_iocage_vaddr, nbytes);
7421 	if (rv != 0) {
7422 		DRMACH_PR(
7423 		"iocage scrub failed, drmach_bc_bzero returned %d\n", rv);
7424 		rv = drmach_bc_bzero(drmach_iocage_vaddr,
7425 			drmach_iocage_size);
7426 		if (rv != 0)
7427 			cmn_err(CE_PANIC,
7428 			    "iocage scrub failed, drmach_bc_bzero rv=%d\n",
7429 			    rv);
7430 	}
7431 
7432 	cpu_flush_ecache();
7433 
7434 	affinity_clear();
7435 }
7436 
7437 #define	ALIGN(x, a)	((a) == 0 ? (uintptr_t)(x) : \
7438 	(((uintptr_t)(x) + (uintptr_t)(a) - 1l) & ~((uintptr_t)(a) - 1l)))
7439 
7440 static sbd_error_t *
7441 drmach_iocage_mem_get(dr_testboard_req_t *tbrq)
7442 {
7443 	pfn_t		basepfn;
7444 	pgcnt_t		npages;
7445 	extern int	memscrub_delete_span(pfn_t, pgcnt_t);
7446 	uint64_t	drmach_iocage_paddr_mbytes;
7447 
7448 	ASSERT(drmach_iocage_paddr != -1);
7449 
7450 	basepfn = (pfn_t)(drmach_iocage_paddr >> PAGESHIFT);
7451 	npages = (pgcnt_t)(drmach_iocage_size >> PAGESHIFT);
7452 
7453 	memscrub_delete_span(basepfn, npages);
7454 
7455 	mutex_enter(&cpu_lock);
7456 	drmach_iocage_mem_scrub(drmach_iocage_size);
7457 	mutex_exit(&cpu_lock);
7458 
7459 	/*
7460 	 * HPOST wants the address of the cage to be 64 megabyte-aligned
7461 	 * and in megabyte units.
7462 	 * The size of the cage is also in megabyte units.
7463 	 */
7464 	ASSERT(drmach_iocage_paddr == ALIGN(drmach_iocage_paddr, 0x4000000));
7465 
7466 	drmach_iocage_paddr_mbytes = drmach_iocage_paddr / 0x100000;
7467 
7468 	tbrq->memaddrhi = (uint32_t)(drmach_iocage_paddr_mbytes >> 32);
7469 	tbrq->memaddrlo = (uint32_t)drmach_iocage_paddr_mbytes;
7470 	tbrq->memlen = drmach_iocage_size / 0x100000;
7471 
7472 	DRMACH_PR("drmach_iocage_mem_get: hi: 0x%x", tbrq->memaddrhi);
7473 	DRMACH_PR("drmach_iocage_mem_get: lo: 0x%x", tbrq->memaddrlo);
7474 	DRMACH_PR("drmach_iocage_mem_get: size: 0x%x", tbrq->memlen);
7475 
7476 	return (NULL);
7477 }
7478 
7479 static sbd_error_t *
7480 drmach_iocage_mem_return(dr_testboard_reply_t *tbr)
7481 {
7482 	_NOTE(ARGUNUSED(tbr))
7483 
7484 	pfn_t		basepfn;
7485 	pgcnt_t		npages;
7486 	extern int	memscrub_add_span(pfn_t, pgcnt_t);
7487 
7488 	ASSERT(drmach_iocage_paddr != -1);
7489 
7490 	basepfn = (pfn_t)(drmach_iocage_paddr >> PAGESHIFT);
7491 	npages = (pgcnt_t)(drmach_iocage_size >> PAGESHIFT);
7492 
7493 	memscrub_add_span(basepfn, npages);
7494 
7495 	mutex_enter(&cpu_lock);
7496 	mutex_enter(&drmach_iocage_lock);
7497 	drmach_iocage_mem_scrub(drmach_iocage_size);
7498 	drmach_iocage_is_busy = 0;
7499 	cv_signal(&drmach_iocage_cv);
7500 	mutex_exit(&drmach_iocage_lock);
7501 	mutex_exit(&cpu_lock);
7502 
7503 	return (NULL);
7504 }
7505 
7506 static int
7507 drmach_cpu_intr_disable(cpu_t *cp)
7508 {
7509 	if (cpu_intr_disable(cp) != 0)
7510 		return (-1);
7511 	return (0);
7512 }
7513 
7514 static int
7515 drmach_iocage_cpu_acquire(drmach_device_t *dp, cpu_flag_t *oflags)
7516 {
7517 	struct cpu	*cp;
7518 	processorid_t	cpuid;
7519 	static char	*fn = "drmach_iocage_cpu_acquire";
7520 	sbd_error_t 	*err;
7521 	int 		impl;
7522 
7523 	ASSERT(DRMACH_IS_CPU_ID(dp));
7524 	ASSERT(MUTEX_HELD(&cpu_lock));
7525 
7526 	cpuid = ((drmach_cpu_t *)dp)->cpuid;
7527 
7528 	DRMACH_PR("%s: attempting to acquire CPU id %d", fn, cpuid);
7529 
7530 	if (dp->busy)
7531 		return (-1);
7532 
7533 	if ((cp = cpu_get(cpuid)) == NULL) {
7534 		DRMACH_PR("%s: cpu_get(%d) returned NULL", fn, cpuid);
7535 		return (-1);
7536 	}
7537 
7538 	if (!CPU_ACTIVE(cp)) {
7539 		DRMACH_PR("%s: skipping offlined CPU id %d", fn, cpuid);
7540 		return (-1);
7541 	}
7542 
7543 	/*
7544 	 * There is a known HW bug where a Jaguar CPU in Safari port 0 (SBX/P0)
7545 	 * can fail to receive an XIR. To workaround this issue until a hardware
7546 	 * fix is implemented, we will exclude the selection of these CPUs.
7547 	 *
7548 	 * Once a fix is implemented in hardware, this code should be updated
7549 	 * to allow Jaguar CPUs that have the fix to be used. However, support
7550 	 * must be retained to skip revisions that do not have this fix.
7551 	 */
7552 
7553 	err = drmach_cpu_get_impl(dp, &impl);
7554 	if (err) {
7555 		DRMACH_PR("%s: error getting impl. of CPU id %d", fn, cpuid);
7556 		sbd_err_clear(&err);
7557 		return (-1);
7558 	}
7559 
7560 	if (IS_JAGUAR(impl) && (STARCAT_CPUID_TO_LPORT(cpuid) == 0) &&
7561 	    drmach_iocage_exclude_jaguar_port_zero) {
7562 		DRMACH_PR("%s: excluding CPU id %d: port 0 on jaguar",
7563 		    fn, cpuid);
7564 		return (-1);
7565 	}
7566 
7567 	ASSERT(oflags);
7568 	*oflags = cp->cpu_flags;
7569 
7570 	if (cpu_offline(cp, 0)) {
7571 		DRMACH_PR("%s: cpu_offline failed for CPU id %d", fn, cpuid);
7572 		return (-1);
7573 	}
7574 
7575 	if (cpu_poweroff(cp)) {
7576 		DRMACH_PR("%s: cpu_poweroff failed for CPU id %d", fn, cpuid);
7577 		if (cpu_online(cp)) {
7578 			cmn_err(CE_WARN, "failed to online CPU id %d "
7579 			    "during I/O cage test selection", cpuid);
7580 		}
7581 		if (CPU_ACTIVE(cp) && cpu_flagged_nointr(*oflags) &&
7582 		    drmach_cpu_intr_disable(cp) != 0) {
7583 			cmn_err(CE_WARN, "failed to restore CPU id %d "
7584 			    "no-intr during I/O cage test selection", cpuid);
7585 		}
7586 		return (-1);
7587 	}
7588 
7589 	if (cpu_unconfigure(cpuid)) {
7590 		DRMACH_PR("%s: cpu_unconfigure failed for CPU id %d", fn,
7591 		    cpuid);
7592 		(void) cpu_configure(cpuid);
7593 		if ((cp = cpu_get(cpuid)) == NULL) {
7594 			cmn_err(CE_WARN, "failed to reconfigure CPU id %d "
7595 			    "during I/O cage test selection", cpuid);
7596 			dp->busy = 1;
7597 			return (-1);
7598 		}
7599 		if (cpu_poweron(cp) || cpu_online(cp)) {
7600 			cmn_err(CE_WARN, "failed to %s CPU id %d "
7601 			    "during I/O cage test selection",
7602 			    cpu_is_poweredoff(cp) ?
7603 			    "poweron" : "online", cpuid);
7604 		}
7605 		if (CPU_ACTIVE(cp) && cpu_flagged_nointr(*oflags) &&
7606 		    drmach_cpu_intr_disable(cp) != 0) {
7607 			cmn_err(CE_WARN, "failed to restore CPU id %d "
7608 			    "no-intr during I/O cage test selection", cpuid);
7609 		}
7610 		return (-1);
7611 	}
7612 
7613 	dp->busy = 1;
7614 
7615 	DRMACH_PR("%s: acquired CPU id %d", fn, cpuid);
7616 
7617 	return (0);
7618 }
7619 
7620 /*
7621  * Attempt to acquire all the CPU devices passed in. It is
7622  * assumed that all the devices in the list are the cores of
7623  * a single CMP device. Non CMP devices can be handled as a
7624  * single core CMP by passing in a one element list.
7625  *
7626  * Success is only returned if *all* the devices in the list
7627  * can be acquired. In the failure case, none of the devices
7628  * in the list will be held as acquired.
7629  */
7630 static int
7631 drmach_iocage_cmp_acquire(drmach_device_t **dpp, cpu_flag_t *oflags)
7632 {
7633 	int	curr;
7634 	int	i;
7635 	int	rv = 0;
7636 
7637 	ASSERT((dpp != NULL) && (*dpp != NULL));
7638 
7639 	/*
7640 	 * Walk the list of CPU devices (cores of a CMP)
7641 	 * and attempt to acquire them. Bail out if an
7642 	 * error is encountered.
7643 	 */
7644 	for (curr = 0; curr < SBD_MAX_CORES_PER_CMP; curr++) {
7645 
7646 		/* check for the end of the list */
7647 		if (dpp[curr] == NULL) {
7648 			break;
7649 		}
7650 
7651 		ASSERT(DRMACH_IS_CPU_ID(dpp[curr]));
7652 		ASSERT(dpp[curr]->portid == (*dpp)->portid);
7653 
7654 		rv = drmach_iocage_cpu_acquire(dpp[curr], &oflags[curr]);
7655 		if (rv != 0) {
7656 			break;
7657 		}
7658 	}
7659 
7660 	/*
7661 	 * Check for an error.
7662 	 */
7663 	if (rv != 0) {
7664 		/*
7665 		 * Make a best effort attempt to return any cores
7666 		 * that were already acquired before the error was
7667 		 * encountered.
7668 		 */
7669 		for (i = 0; i < curr; i++) {
7670 			(void) drmach_iocage_cpu_return(dpp[i], oflags[i]);
7671 		}
7672 	}
7673 
7674 	return (rv);
7675 }
7676 
7677 static int
7678 drmach_iocage_cpu_return(drmach_device_t *dp, cpu_flag_t oflags)
7679 {
7680 	processorid_t	cpuid;
7681 	struct cpu	*cp;
7682 	int		rv = 0;
7683 	static char	*fn = "drmach_iocage_cpu_return";
7684 
7685 	ASSERT(DRMACH_IS_CPU_ID(dp));
7686 	ASSERT(MUTEX_HELD(&cpu_lock));
7687 
7688 	cpuid = ((drmach_cpu_t *)dp)->cpuid;
7689 
7690 	DRMACH_PR("%s: attempting to return CPU id: %d", fn, cpuid);
7691 
7692 	if (cpu_configure(cpuid)) {
7693 		cmn_err(CE_WARN, "failed to reconfigure CPU id %d "
7694 		    "after I/O cage test", cpuid);
7695 		/*
7696 		 * The component was never set to unconfigured during the IO
7697 		 * cage test, so we need to leave marked as busy to prevent
7698 		 * further DR operations involving this component.
7699 		 */
7700 		return (-1);
7701 	}
7702 
7703 	if ((cp = cpu_get(cpuid)) == NULL) {
7704 		cmn_err(CE_WARN, "cpu_get failed on CPU id %d after "
7705 		    "I/O cage test", cpuid);
7706 		dp->busy = 0;
7707 		return (-1);
7708 	}
7709 
7710 	if (cpu_poweron(cp) || cpu_online(cp)) {
7711 		cmn_err(CE_WARN, "failed to %s CPU id %d after I/O "
7712 		    "cage test", cpu_is_poweredoff(cp) ?
7713 		    "poweron" : "online", cpuid);
7714 		rv = -1;
7715 	}
7716 
7717 	/*
7718 	 * drmach_iocage_cpu_acquire will accept cpus in state P_ONLINE or
7719 	 * P_NOINTR. Need to return to previous user-visible state.
7720 	 */
7721 	if (CPU_ACTIVE(cp) && cpu_flagged_nointr(oflags) &&
7722 	    drmach_cpu_intr_disable(cp) != 0) {
7723 		cmn_err(CE_WARN, "failed to restore CPU id %d "
7724 		    "no-intr after I/O cage test", cpuid);
7725 		rv = -1;
7726 	}
7727 
7728 	dp->busy = 0;
7729 
7730 	DRMACH_PR("%s: returned CPU id: %d", fn, cpuid);
7731 
7732 	return (rv);
7733 }
7734 
7735 static sbd_error_t *
7736 drmach_iocage_cpu_get(dr_testboard_req_t *tbrq, drmach_device_t **dpp,
7737     cpu_flag_t *oflags)
7738 {
7739 	drmach_board_t	*bp;
7740 	int		b_rv;
7741 	int		b_idx;
7742 	drmachid_t	b_id;
7743 	int		found;
7744 
7745 	mutex_enter(&cpu_lock);
7746 
7747 	ASSERT(drmach_boards != NULL);
7748 
7749 	found = 0;
7750 
7751 	/*
7752 	 * Walk the board list.
7753 	 */
7754 	b_rv = drmach_array_first(drmach_boards, &b_idx, &b_id);
7755 
7756 	while (b_rv == 0) {
7757 
7758 		int		d_rv;
7759 		int		d_idx;
7760 		drmachid_t	d_id;
7761 
7762 		bp = b_id;
7763 
7764 		if (bp->connected == 0 || bp->devices == NULL) {
7765 			b_rv = drmach_array_next(drmach_boards, &b_idx, &b_id);
7766 			continue;
7767 		}
7768 
7769 		/* An AXQ restriction disqualifies MCPU's as candidates. */
7770 		if (DRMACH_BNUM2SLOT(bp->bnum) == 1) {
7771 			b_rv = drmach_array_next(drmach_boards, &b_idx, &b_id);
7772 			continue;
7773 		}
7774 
7775 		/*
7776 		 * Walk the device list of this board.
7777 		 */
7778 		d_rv = drmach_array_first(bp->devices, &d_idx, &d_id);
7779 
7780 		while (d_rv == 0) {
7781 
7782 			drmach_device_t	*ndp;
7783 
7784 			/* only interested in CPU devices */
7785 			if (!DRMACH_IS_CPU_ID(d_id)) {
7786 				d_rv = drmach_array_next(bp->devices, &d_idx,
7787 				    &d_id);
7788 				continue;
7789 			}
7790 
7791 			/*
7792 			 * The following code assumes two properties
7793 			 * of a CMP device:
7794 			 *
7795 			 *   1. All cores of a CMP are grouped together
7796 			 *	in the device list.
7797 			 *
7798 			 *   2. There will only be a maximum of two cores
7799 			 *	present in the CMP.
7800 			 *
7801 			 * If either of these two properties change,
7802 			 * this code will have to be revisited.
7803 			 */
7804 
7805 			dpp[0] = d_id;
7806 			dpp[1] = NULL;
7807 
7808 			/*
7809 			 * Get the next device. It may or may not be used.
7810 			 */
7811 			d_rv = drmach_array_next(bp->devices, &d_idx, &d_id);
7812 			ndp = d_id;
7813 
7814 			if ((d_rv == 0) && DRMACH_IS_CPU_ID(d_id)) {
7815 				/*
7816 				 * The second device is only interesting for
7817 				 * this pass if it has the same portid as the
7818 				 * first device. This implies that both are
7819 				 * cores of the same CMP.
7820 				 */
7821 				if (dpp[0]->portid == ndp->portid) {
7822 					dpp[1] = d_id;
7823 				}
7824 			}
7825 
7826 			/*
7827 			 * Attempt to acquire all cores of the CMP.
7828 			 */
7829 			if (drmach_iocage_cmp_acquire(dpp, oflags) == 0) {
7830 				found = 1;
7831 				break;
7832 			}
7833 
7834 			/*
7835 			 * Check if the search for the second core was
7836 			 * successful. If not, the next iteration should
7837 			 * use that device.
7838 			 */
7839 			if (dpp[1] == NULL) {
7840 				continue;
7841 			}
7842 
7843 			d_rv = drmach_array_next(bp->devices, &d_idx, &d_id);
7844 		}
7845 
7846 		if (found)
7847 			break;
7848 
7849 		b_rv = drmach_array_next(drmach_boards, &b_idx, &b_id);
7850 	}
7851 
7852 	mutex_exit(&cpu_lock);
7853 
7854 	if (!found) {
7855 		return (drerr_new(1, ESTC_IOCAGE_NO_CPU_AVAIL, NULL));
7856 	}
7857 
7858 	tbrq->cpu_portid = (*dpp)->portid;
7859 
7860 	return (NULL);
7861 }
7862 
7863 /*
7864  * Setup an iocage by acquiring a cpu and memory.
7865  */
7866 static sbd_error_t *
7867 drmach_iocage_setup(dr_testboard_req_t *tbrq, drmach_device_t **dpp,
7868     cpu_flag_t *oflags)
7869 {
7870 	sbd_error_t *err;
7871 
7872 	err = drmach_iocage_cpu_get(tbrq, dpp, oflags);
7873 	if (!err) {
7874 		mutex_enter(&drmach_iocage_lock);
7875 		while (drmach_iocage_is_busy)
7876 			cv_wait(&drmach_iocage_cv, &drmach_iocage_lock);
7877 		drmach_iocage_is_busy = 1;
7878 		mutex_exit(&drmach_iocage_lock);
7879 		err = drmach_iocage_mem_get(tbrq);
7880 		if (err) {
7881 			mutex_enter(&drmach_iocage_lock);
7882 			drmach_iocage_is_busy = 0;
7883 			cv_signal(&drmach_iocage_cv);
7884 			mutex_exit(&drmach_iocage_lock);
7885 		}
7886 	}
7887 	return (err);
7888 }
7889 
7890 #define	DRMACH_SCHIZO_PCI_LEAF_MAX	2
7891 #define	DRMACH_SCHIZO_PCI_SLOT_MAX	8
7892 #define	DRMACH_S1P_SAMPLE_MAX		2
7893 
7894 typedef enum {
7895 	DRMACH_POST_SUSPEND = 0,
7896 	DRMACH_PRE_RESUME
7897 } drmach_sr_iter_t;
7898 
7899 typedef struct {
7900 	dev_info_t	*dip;
7901 	uint32_t	portid;
7902 	uint32_t	pcr_sel_save;
7903 	uint32_t	pic_l2_io_q[DRMACH_S1P_SAMPLE_MAX];
7904 	uint64_t	reg_basepa;
7905 } drmach_s1p_axq_t;
7906 
7907 typedef struct {
7908 	dev_info_t		*dip;
7909 	uint32_t		portid;
7910 	uint64_t		csr_basepa;
7911 	struct {
7912 		uint64_t 	slot_intr_state_diag;
7913 		uint64_t 	obio_intr_state_diag;
7914 		uint_t		nmap_regs;
7915 		uint64_t	*intr_map_regs;
7916 	} regs[DRMACH_S1P_SAMPLE_MAX];
7917 } drmach_s1p_pci_t;
7918 
7919 typedef struct {
7920 	uint64_t		csr_basepa;
7921 	struct {
7922 		uint64_t	csr;
7923 		uint64_t	errctrl;
7924 		uint64_t	errlog;
7925 	} regs[DRMACH_S1P_SAMPLE_MAX];
7926 	drmach_s1p_pci_t	pci[DRMACH_SCHIZO_PCI_LEAF_MAX];
7927 } drmach_s1p_schizo_t;
7928 
7929 typedef struct {
7930 	drmach_s1p_axq_t	axq;
7931 	drmach_s1p_schizo_t	schizo[STARCAT_SLOT1_IO_MAX];
7932 } drmach_slot1_pause_t;
7933 
7934 /*
7935  * Table of saved state for paused slot1 devices.
7936  */
7937 static drmach_slot1_pause_t *drmach_slot1_paused[STARCAT_BDSET_MAX];
7938 static int drmach_slot1_pause_init = 1;
7939 
7940 #ifdef DEBUG
7941 int drmach_slot1_pause_debug = 1;
7942 #else
7943 int drmach_slot1_pause_debug = 0;
7944 #endif /* DEBUG */
7945 
7946 static int
7947 drmach_is_slot1_pause_axq(dev_info_t *dip, char *name, int *id, uint64_t *reg)
7948 {
7949 	int		portid, exp, slot, i;
7950 	drmach_reg_t	regs[2];
7951 	int		reglen = sizeof (regs);
7952 
7953 	if ((portid = ddi_getprop(DDI_DEV_T_ANY, dip,
7954 	    DDI_PROP_DONTPASS, "portid", -1)) == -1) {
7955 		return (0);
7956 	}
7957 
7958 	exp = (portid >> 5) & 0x1f;
7959 	slot = portid & 0x1;
7960 
7961 	if (slot == 0 || strncmp(name, DRMACH_AXQ_NAMEPROP,
7962 	    strlen(DRMACH_AXQ_NAMEPROP))) {
7963 		return (0);
7964 	}
7965 
7966 	mutex_enter(&cpu_lock);
7967 	for (i = 0; i < STARCAT_SLOT1_CPU_MAX; i++) {
7968 		if (cpu[MAKE_CPUID(exp, slot, i)]) {
7969 			/* maxcat cpu present */
7970 			mutex_exit(&cpu_lock);
7971 			return (0);
7972 		}
7973 	}
7974 	mutex_exit(&cpu_lock);
7975 
7976 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
7977 	    "reg", (caddr_t)regs, &reglen) != DDI_PROP_SUCCESS) {
7978 		DRMACH_PR("drmach_is_slot1_pause_axq: no reg prop for "
7979 		    "axq dip=%p\n", dip);
7980 		return (0);
7981 	}
7982 
7983 	ASSERT(id && reg);
7984 	*reg = (uint64_t)regs[0].reg_addr_hi << 32;
7985 	*reg |= (uint64_t)regs[0].reg_addr_lo;
7986 	*id = portid;
7987 
7988 	return (1);
7989 }
7990 
7991 /*
7992  * Allocate an entry in the slot1_paused state table.
7993  */
7994 static void
7995 drmach_slot1_pause_add_axq(dev_info_t *axq_dip, char *axq_name, int axq_portid,
7996     uint64_t reg, drmach_slot1_pause_t **slot1_paused)
7997 {
7998 	int	axq_exp;
7999 	drmach_slot1_pause_t *slot1;
8000 
8001 	axq_exp = (axq_portid >> 5) & 0x1f;
8002 
8003 	ASSERT(axq_portid & 0x1);
8004 	ASSERT(slot1_paused[axq_exp] == NULL);
8005 	ASSERT(strncmp(axq_name, DRMACH_AXQ_NAMEPROP,
8006 	    strlen(DRMACH_AXQ_NAMEPROP)) == 0);
8007 
8008 	slot1 = kmem_zalloc(sizeof (*slot1), KM_SLEEP);
8009 
8010 	/*
8011 	 * XXX This dip should really be held (via ndi_hold_devi())
8012 	 * before saving it in the axq pause structure. However that
8013 	 * would prevent DR as the pause data structures persist until
8014 	 * the next suspend. drmach code should be modified to free the
8015 	 * the slot 1 pause data structures for a boardset when its
8016 	 * slot 1 board is DRed out. The dip can then be released via
8017 	 * ndi_rele_devi() when the pause data structure is freed
8018 	 * allowing DR to proceed. Until this change is made, drmach
8019 	 * code should be careful about dereferencing the saved dip
8020 	 * as it may no longer exist.
8021 	 */
8022 	slot1->axq.dip = axq_dip;
8023 	slot1->axq.portid = axq_portid;
8024 	slot1->axq.reg_basepa = reg;
8025 	slot1_paused[axq_exp] = slot1;
8026 }
8027 
8028 static void
8029 drmach_s1p_pci_free(drmach_s1p_pci_t *pci)
8030 {
8031 	int	i;
8032 
8033 	for (i = 0; i < DRMACH_S1P_SAMPLE_MAX; i++) {
8034 		if (pci->regs[i].intr_map_regs != NULL) {
8035 			ASSERT(pci->regs[i].nmap_regs > 0);
8036 			kmem_free(pci->regs[i].intr_map_regs,
8037 			    pci->regs[i].nmap_regs * sizeof (uint64_t));
8038 		}
8039 	}
8040 }
8041 
8042 static void
8043 drmach_slot1_pause_free(drmach_slot1_pause_t **slot1_paused)
8044 {
8045 	int	i, j, k;
8046 	drmach_slot1_pause_t *slot1;
8047 
8048 	for (i = 0; i < STARCAT_BDSET_MAX; i++) {
8049 		if ((slot1 = slot1_paused[i]) == NULL)
8050 			continue;
8051 
8052 		for (j = 0; j < STARCAT_SLOT1_IO_MAX; j++)
8053 			for (k = 0; k < DRMACH_SCHIZO_PCI_LEAF_MAX; k++)
8054 				drmach_s1p_pci_free(&slot1->schizo[j].pci[k]);
8055 
8056 		kmem_free(slot1, sizeof (*slot1));
8057 		slot1_paused[i] = NULL;
8058 	}
8059 }
8060 
8061 /*
8062  * Tree walk callback routine. If dip represents a Schizo PCI leaf,
8063  * fill in the appropriate info in the slot1_paused state table.
8064  */
8065 static int
8066 drmach_find_slot1_io(dev_info_t *dip, void *arg)
8067 {
8068 	int		portid, exp, ioc_unum, leaf_unum;
8069 	char		buf[OBP_MAXDRVNAME];
8070 	int		buflen = sizeof (buf);
8071 	drmach_reg_t	regs[3];
8072 	int		reglen = sizeof (regs);
8073 	uint32_t	leaf_offset;
8074 	uint64_t	schizo_csr_pa, pci_csr_pa;
8075 	drmach_s1p_pci_t *pci;
8076 	drmach_slot1_pause_t **slot1_paused = (drmach_slot1_pause_t **)arg;
8077 
8078 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
8079 	    "name", (caddr_t)buf, &buflen) != DDI_PROP_SUCCESS ||
8080 	    strncmp(buf, DRMACH_PCI_NAMEPROP, strlen(DRMACH_PCI_NAMEPROP))) {
8081 		return (DDI_WALK_CONTINUE);
8082 	}
8083 
8084 	if ((portid = ddi_getprop(DDI_DEV_T_ANY, dip,
8085 	    DDI_PROP_DONTPASS, "portid", -1)) == -1) {
8086 		return (DDI_WALK_CONTINUE);
8087 	}
8088 
8089 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
8090 	    "reg", (caddr_t)regs, &reglen) != DDI_PROP_SUCCESS) {
8091 		DRMACH_PR("drmach_find_slot1_io: no reg prop for pci "
8092 		    "dip=%p\n", dip);
8093 		return (DDI_WALK_CONTINUE);
8094 	}
8095 
8096 	exp = portid >> 5;
8097 	ioc_unum = portid & 0x1;
8098 	leaf_offset = regs[0].reg_addr_lo & 0x7fffff;
8099 	pci_csr_pa = (uint64_t)regs[0].reg_addr_hi << 32;
8100 	pci_csr_pa |= (uint64_t)regs[0].reg_addr_lo;
8101 	schizo_csr_pa = (uint64_t)regs[1].reg_addr_hi << 32;
8102 	schizo_csr_pa |= (uint64_t)regs[1].reg_addr_lo;
8103 
8104 	ASSERT(exp >= 0 && exp < STARCAT_BDSET_MAX);
8105 	ASSERT(slot1_paused[exp] != NULL);
8106 	ASSERT(leaf_offset == 0x600000 || leaf_offset == 0x700000);
8107 	ASSERT(slot1_paused[exp]->schizo[ioc_unum].csr_basepa == 0x0UL ||
8108 	    slot1_paused[exp]->schizo[ioc_unum].csr_basepa == schizo_csr_pa);
8109 
8110 	leaf_unum = (leaf_offset == 0x600000) ? 0 : 1;
8111 	slot1_paused[exp]->schizo[ioc_unum].csr_basepa = schizo_csr_pa;
8112 	pci = &slot1_paused[exp]->schizo[ioc_unum].pci[leaf_unum];
8113 
8114 	/*
8115 	 * XXX This dip should really be held (via ndi_hold_devi())
8116 	 * before saving it in the pci pause structure. However that
8117 	 * would prevent DR as the pause data structures persist until
8118 	 * the next suspend. drmach code should be modified to free the
8119 	 * the slot 1 pause data structures for a boardset when its
8120 	 * slot 1 board is DRed out. The dip can then be released via
8121 	 * ndi_rele_devi() when the pause data structure is freed
8122 	 * allowing DR to proceed. Until this change is made, drmach
8123 	 * code should be careful about dereferencing the saved dip as
8124 	 * it may no longer exist.
8125 	 */
8126 	pci->dip = dip;
8127 	pci->portid = portid;
8128 	pci->csr_basepa = pci_csr_pa;
8129 
8130 	DRMACH_PR("drmach_find_slot1_io: name=%s, portid=0x%x, dip=%p\n",
8131 	    buf, portid, dip);
8132 
8133 	return (DDI_WALK_PRUNECHILD);
8134 }
8135 
8136 static void
8137 drmach_slot1_pause_add_io(drmach_slot1_pause_t **slot1_paused)
8138 {
8139 	/*
8140 	 * Root node doesn't have to be held
8141 	 */
8142 	ddi_walk_devs(ddi_root_node(), drmach_find_slot1_io,
8143 	    (void *)slot1_paused);
8144 }
8145 
8146 /*
8147  * Save the interrupt mapping registers for each non-idle interrupt
8148  * represented by the bit pairs in the saved interrupt state
8149  * diagnostic registers for this PCI leaf.
8150  */
8151 static void
8152 drmach_s1p_intr_map_reg_save(drmach_s1p_pci_t *pci, drmach_sr_iter_t iter)
8153 {
8154 	int	 i, cnt, ino;
8155 	uint64_t reg;
8156 	char	 *dname;
8157 	uchar_t	 Xmits;
8158 
8159 	dname = ddi_binding_name(pci->dip);
8160 	Xmits = (strcmp(dname, XMITS_BINDING_NAME) == 0)  ?  1 : 0;
8161 
8162 	/*
8163 	 * 1st pass allocates, 2nd pass populates.
8164 	 */
8165 	for (i = 0; i < 2; i++) {
8166 		cnt = ino = 0;
8167 
8168 		/*
8169 		 * PCI slot interrupts
8170 		 */
8171 		reg = pci->regs[iter].slot_intr_state_diag;
8172 		while (reg) {
8173 			/*
8174 			 * Xmits Interrupt Number Offset(ino) Assignments
8175 			 *   00-17 PCI Slot Interrupts
8176 			 *   18-1f Not Used
8177 			 */
8178 			if ((Xmits) && (ino > 0x17))
8179 				break;
8180 			if ((reg & COMMON_CLEAR_INTR_REG_MASK) !=
8181 			    COMMON_CLEAR_INTR_REG_IDLE) {
8182 				if (i) {
8183 					pci->regs[iter].intr_map_regs[cnt] =
8184 					    lddphysio(pci->csr_basepa +
8185 					    SCHIZO_IB_INTR_MAP_REG_OFFSET +
8186 					    ino * sizeof (reg));
8187 				}
8188 				++cnt;
8189 			}
8190 			++ino;
8191 			reg >>= 2;
8192 		}
8193 
8194 		/*
8195 		 * Xmits Interrupt Number Offset(ino) Assignments
8196 		 *   20-2f Not Used
8197 		 *   30-37 Internal interrupts
8198 		 *   38-3e Not Used
8199 		 */
8200 		ino = (Xmits)  ?  0x30 : 0x20;
8201 
8202 		/*
8203 		 * OBIO and internal schizo interrupts
8204 		 * Each PCI leaf has a set of mapping registers for all
8205 		 * possible interrupt sources except the NewLink interrupts.
8206 		 */
8207 		reg = pci->regs[iter].obio_intr_state_diag;
8208 		while (reg && ino <= 0x38) {
8209 			if ((reg & COMMON_CLEAR_INTR_REG_MASK) !=
8210 			    COMMON_CLEAR_INTR_REG_IDLE) {
8211 				if (i) {
8212 					pci->regs[iter].intr_map_regs[cnt] =
8213 					    lddphysio(pci->csr_basepa +
8214 					    SCHIZO_IB_INTR_MAP_REG_OFFSET +
8215 					    ino * sizeof (reg));
8216 				}
8217 				++cnt;
8218 			}
8219 			++ino;
8220 			reg >>= 2;
8221 		}
8222 
8223 		if (!i) {
8224 			pci->regs[iter].nmap_regs = cnt;
8225 			pci->regs[iter].intr_map_regs =
8226 			    kmem_zalloc(cnt * sizeof (reg), KM_SLEEP);
8227 		}
8228 	}
8229 }
8230 
8231 static void
8232 drmach_s1p_axq_update(drmach_s1p_axq_t *axq, drmach_sr_iter_t iter)
8233 {
8234 	uint32_t	reg;
8235 
8236 	if (axq->reg_basepa == 0x0UL)
8237 		return;
8238 
8239 	if (iter == DRMACH_POST_SUSPEND) {
8240 		axq->pcr_sel_save = ldphysio(axq->reg_basepa +
8241 		    AXQ_SLOT1_PERFCNT_SEL);
8242 		/*
8243 		 * Select l2_io_queue counter by writing L2_IO_Q mux
8244 		 * input to bits 0-6 of perf cntr select reg.
8245 		 */
8246 		reg = axq->pcr_sel_save;
8247 		reg &= ~AXQ_PIC_CLEAR_MASK;
8248 		reg |= L2_IO_Q;
8249 
8250 		stphysio(axq->reg_basepa + AXQ_SLOT1_PERFCNT_SEL, reg);
8251 	}
8252 
8253 	axq->pic_l2_io_q[iter] = ldphysio(axq->reg_basepa + AXQ_SLOT1_PERFCNT0);
8254 
8255 	if (iter == DRMACH_PRE_RESUME) {
8256 		stphysio(axq->reg_basepa + AXQ_SLOT1_PERFCNT_SEL,
8257 		    axq->pcr_sel_save);
8258 	}
8259 
8260 	DRMACH_PR("drmach_s1p_axq_update: axq #%d pic_l2_io_q[%d]=%d\n",
8261 	    ddi_get_instance(axq->dip), iter, axq->pic_l2_io_q[iter]);
8262 }
8263 
8264 static void
8265 drmach_s1p_schizo_update(drmach_s1p_schizo_t *schizo, drmach_sr_iter_t iter)
8266 {
8267 	int	i;
8268 	drmach_s1p_pci_t *pci;
8269 
8270 	if (schizo->csr_basepa == 0x0UL)
8271 		return;
8272 
8273 	schizo->regs[iter].csr =
8274 	    lddphysio(schizo->csr_basepa + SCHIZO_CB_CSR_OFFSET);
8275 	schizo->regs[iter].errctrl =
8276 	    lddphysio(schizo->csr_basepa + SCHIZO_CB_ERRCTRL_OFFSET);
8277 	schizo->regs[iter].errlog =
8278 	    lddphysio(schizo->csr_basepa + SCHIZO_CB_ERRLOG_OFFSET);
8279 
8280 	for (i = 0; i < DRMACH_SCHIZO_PCI_LEAF_MAX; i++) {
8281 		pci = &schizo->pci[i];
8282 		if (pci->dip != NULL && pci->csr_basepa != 0x0UL) {
8283 			pci->regs[iter].slot_intr_state_diag =
8284 			    lddphysio(pci->csr_basepa +
8285 			    COMMON_IB_SLOT_INTR_STATE_DIAG_REG);
8286 
8287 			pci->regs[iter].obio_intr_state_diag =
8288 			    lddphysio(pci->csr_basepa +
8289 			    COMMON_IB_OBIO_INTR_STATE_DIAG_REG);
8290 
8291 			drmach_s1p_intr_map_reg_save(pci, iter);
8292 		}
8293 	}
8294 }
8295 
8296 /*
8297  * Called post-suspend and pre-resume to snapshot the suspend state
8298  * of slot1 AXQs and Schizos.
8299  */
8300 static void
8301 drmach_slot1_pause_update(drmach_slot1_pause_t **slot1_paused,
8302     drmach_sr_iter_t iter)
8303 {
8304 	int	i, j;
8305 	drmach_slot1_pause_t *slot1;
8306 
8307 	for (i = 0; i < STARCAT_BDSET_MAX; i++) {
8308 		if ((slot1 = slot1_paused[i]) == NULL)
8309 			continue;
8310 
8311 		drmach_s1p_axq_update(&slot1->axq, iter);
8312 		for (j = 0; j < STARCAT_SLOT1_IO_MAX; j++)
8313 			drmach_s1p_schizo_update(&slot1->schizo[j], iter);
8314 	}
8315 }
8316 
8317 /*
8318  * Starcat hPCI Schizo devices.
8319  *
8320  * The name field is overloaded. NULL means the slot (interrupt concentrator
8321  * bus) is not used. intr_mask is a bit mask representing the 4 possible
8322  * interrupts per slot, on if valid (rio does not use interrupt lines 0, 1).
8323  */
8324 static struct {
8325 	char	*name;
8326 	uint8_t	intr_mask;
8327 } drmach_schz_slot_intr[][DRMACH_SCHIZO_PCI_LEAF_MAX] = {
8328 	/* Schizo 0 */		/* Schizo 1 */
8329 	{{"C3V0", 0xf},		{"C3V1", 0xf}},		/* slot 0 */
8330 	{{"C5V0", 0xf},		{"C5V1", 0xf}},		/* slot 1 */
8331 	{{"rio", 0xc},		{NULL, 0x0}},		/* slot 2 */
8332 	{{NULL, 0x0},		{NULL, 0x0}},		/* slot 3 */
8333 	{{"sbbc", 0xf},		{NULL, 0x0}},		/* slot 4 */
8334 	{{NULL, 0x0},		{NULL, 0x0}},		/* slot 5 */
8335 	{{NULL, 0x0},		{NULL, 0x0}},		/* slot 6 */
8336 	{{NULL, 0x0},		{NULL, 0x0}}		/* slot 7 */
8337 };
8338 
8339 /*
8340  * See Schizo Specification, Revision 51 (May 23, 2001), Section 22.4.4
8341  * "Interrupt Registers", Table 22-69, page 306.
8342  */
8343 static char *
8344 drmach_schz_internal_ino2str(int ino)
8345 {
8346 	int	intr;
8347 
8348 	ASSERT(ino >= 0x30 && ino <= 0x37);
8349 
8350 	intr = ino & 0x7;
8351 	switch (intr) {
8352 		case (0x0):	return ("Uncorrectable ECC error");
8353 		case (0x1):	return ("Correctable ECC error");
8354 		case (0x2):	return ("PCI Bus A Error");
8355 		case (0x3):	return ("PCI Bus B Error");
8356 		case (0x4):	return ("Safari Bus Error");
8357 		default:	return ("Reserved");
8358 	}
8359 }
8360 
8361 #define	DRMACH_INTR_MASK_SHIFT(ino)	((ino) << 1)
8362 
8363 static void
8364 drmach_s1p_decode_slot_intr(int exp, int unum, drmach_s1p_pci_t *pci,
8365     int ino, drmach_sr_iter_t iter)
8366 {
8367 	uint8_t		intr_mask;
8368 	char		*slot_devname;
8369 	char		namebuf[OBP_MAXDRVNAME];
8370 	int		slot, intr_line, slot_valid, intr_valid;
8371 
8372 	ASSERT(ino >= 0 && ino <= 0x1f);
8373 	ASSERT((pci->regs[iter].slot_intr_state_diag &
8374 	    (COMMON_CLEAR_INTR_REG_MASK << DRMACH_INTR_MASK_SHIFT(ino))) !=
8375 	    COMMON_CLEAR_INTR_REG_IDLE);
8376 
8377 	slot = (ino >> 2) & 0x7;
8378 	intr_line = ino & 0x3;
8379 
8380 	slot_devname = drmach_schz_slot_intr[slot][unum].name;
8381 	slot_valid = (slot_devname == NULL) ? 0 : 1;
8382 	if (!slot_valid) {
8383 		snprintf(namebuf, sizeof (namebuf), "slot %d (INVALID)", slot);
8384 		slot_devname = namebuf;
8385 	}
8386 
8387 	intr_mask = drmach_schz_slot_intr[slot][unum].intr_mask;
8388 	intr_valid = (1 << intr_line) & intr_mask;
8389 
8390 	prom_printf("IO%d/P%d PCI slot interrupt: ino=0x%x, source device=%s, "
8391 	    "interrupt line=%d%s\n", exp, unum, ino, slot_devname, intr_line,
8392 	    (slot_valid && !intr_valid) ? " (INVALID)" : "");
8393 }
8394 
8395 /*
8396  * Log interrupt source device info for all valid, pending interrupts
8397  * on each Schizo PCI leaf. Called if Schizo has logged a Safari bus
8398  * error in the error ctrl reg.
8399  */
8400 static void
8401 drmach_s1p_schizo_log_intr(drmach_s1p_schizo_t *schizo, int exp,
8402     int unum, drmach_sr_iter_t iter)
8403 {
8404 	uint64_t	reg;
8405 	int		i, n, ino;
8406 	drmach_s1p_pci_t *pci;
8407 
8408 	ASSERT(exp >= 0 && exp < STARCAT_BDSET_MAX);
8409 	ASSERT(unum < STARCAT_SLOT1_IO_MAX);
8410 
8411 	/*
8412 	 * Check the saved interrupt mapping registers. If interrupt is valid,
8413 	 * map the ino to the Schizo source device and check that the pci
8414 	 * slot and interrupt line are valid.
8415 	 */
8416 	for (i = 0; i < DRMACH_SCHIZO_PCI_LEAF_MAX; i++) {
8417 		pci = &schizo->pci[i];
8418 		for (n = 0; n < pci->regs[iter].nmap_regs; n++) {
8419 			reg = pci->regs[iter].intr_map_regs[n];
8420 			if (reg & COMMON_INTR_MAP_REG_VALID) {
8421 				ino = reg & COMMON_INTR_MAP_REG_INO;
8422 
8423 				if (ino <= 0x1f) {
8424 					/*
8425 					 * PCI slot interrupt
8426 					 */
8427 					drmach_s1p_decode_slot_intr(exp, unum,
8428 					    pci, ino, iter);
8429 				} else if (ino <= 0x2f) {
8430 					/*
8431 					 * OBIO interrupt
8432 					 */
8433 					prom_printf("IO%d/P%d OBIO interrupt: "
8434 					    "ino=0x%x\n", exp, unum, ino);
8435 				} else if (ino <= 0x37) {
8436 					/*
8437 					 * Internal interrupt
8438 					 */
8439 					prom_printf("IO%d/P%d Internal "
8440 					    "interrupt: ino=0x%x (%s)\n",
8441 					    exp, unum, ino,
8442 					    drmach_schz_internal_ino2str(ino));
8443 				} else {
8444 					/*
8445 					 * NewLink interrupt
8446 					 */
8447 					prom_printf("IO%d/P%d NewLink "
8448 					    "interrupt: ino=0x%x\n", exp,
8449 					    unum, ino);
8450 				}
8451 
8452 				DRMACH_PR("drmach_s1p_schizo_log_intr: "
8453 				    "exp=%d, schizo=%d, pci_leaf=%c, "
8454 				    "ino=0x%x, intr_map_reg=0x%lx\n",
8455 				    exp, unum, (i == 0) ? 'A' : 'B', ino, reg);
8456 			}
8457 		}
8458 	}
8459 }
8460 
8461 /*
8462  * See Schizo Specification, Revision 51 (May 23, 2001), Section 22.2.4
8463  * "Safari Error Control/Log Registers", Table 22-11, page 248.
8464  */
8465 #define	DRMACH_SCHIZO_SAFARI_UNMAPPED_ERR	(0x1ull << 4)
8466 
8467 /*
8468  * Check for possible error indicators prior to resuming the
8469  * AXQ driver, which will de-assert slot1 AXQ_DOMCTRL_PAUSE.
8470  */
8471 static void
8472 drmach_slot1_pause_verify(drmach_slot1_pause_t **slot1_paused,
8473     drmach_sr_iter_t iter)
8474 {
8475 	int	i, j;
8476 	int 	errflag = 0;
8477 	drmach_slot1_pause_t *slot1;
8478 
8479 	/*
8480 	 * Check for logged schizo bus error and pending interrupts.
8481 	 */
8482 	for (i = 0; i < STARCAT_BDSET_MAX; i++) {
8483 		if ((slot1 = slot1_paused[i]) == NULL)
8484 			continue;
8485 
8486 		for (j = 0; j < STARCAT_SLOT1_IO_MAX; j++) {
8487 			if (slot1->schizo[j].csr_basepa == 0x0UL)
8488 				continue;
8489 
8490 			if (slot1->schizo[j].regs[iter].errlog &
8491 			    DRMACH_SCHIZO_SAFARI_UNMAPPED_ERR) {
8492 				if (!errflag) {
8493 					prom_printf("DR WARNING: interrupt "
8494 					    "attempt detected during "
8495 					    "copy-rename (%s):\n",
8496 					    (iter == DRMACH_POST_SUSPEND) ?
8497 					    "post suspend" : "pre resume");
8498 					++errflag;
8499 				}
8500 				drmach_s1p_schizo_log_intr(&slot1->schizo[j],
8501 				    i, j, iter);
8502 			}
8503 		}
8504 	}
8505 
8506 	/*
8507 	 * Check for changes in axq l2_io_q performance counters (2nd pass only)
8508 	 */
8509 	if (iter == DRMACH_PRE_RESUME) {
8510 		for (i = 0; i < STARCAT_BDSET_MAX; i++) {
8511 			if ((slot1 = slot1_paused[i]) == NULL)
8512 				continue;
8513 
8514 			if (slot1->axq.pic_l2_io_q[DRMACH_POST_SUSPEND] !=
8515 			    slot1->axq.pic_l2_io_q[DRMACH_PRE_RESUME]) {
8516 				prom_printf("DR WARNING: IO transactions "
8517 				    "detected on IO%d during copy-rename: "
8518 				    "AXQ l2_io_q performance counter "
8519 				    "start=%d, end=%d\n", i,
8520 				    slot1->axq.pic_l2_io_q[DRMACH_POST_SUSPEND],
8521 				    slot1->axq.pic_l2_io_q[DRMACH_PRE_RESUME]);
8522 			}
8523 		}
8524 	}
8525 }
8526 
8527 struct drmach_sr_list {
8528 	dev_info_t		*dip;
8529 	struct drmach_sr_list	*next;
8530 	struct drmach_sr_list	*prev;
8531 };
8532 
8533 static struct drmach_sr_ordered {
8534 	char			*name;
8535 	struct drmach_sr_list	*ring;
8536 } drmach_sr_ordered[] = {
8537 	{ "iosram",			NULL },
8538 	{ "address-extender-queue",	NULL },
8539 	{ NULL,				NULL }, /* terminator -- required */
8540 };
8541 
8542 static void
8543 drmach_sr_insert(struct drmach_sr_list **lp, dev_info_t *dip)
8544 {
8545 	struct drmach_sr_list *np;
8546 
8547 	DRMACH_PR("drmach_sr_insert: adding dip %p\n", dip);
8548 
8549 	np = (struct drmach_sr_list *)kmem_alloc(
8550 		sizeof (struct drmach_sr_list), KM_SLEEP);
8551 
8552 	ndi_hold_devi(dip);
8553 	np->dip = dip;
8554 
8555 	if (*lp == NULL) {
8556 		/* establish list */
8557 		*lp = np->next = np->prev = np;
8558 	} else {
8559 		/* place new node behind head node on ring list */
8560 		np->prev = (*lp)->prev;
8561 		np->next = *lp;
8562 		np->prev->next = np;
8563 		np->next->prev = np;
8564 	}
8565 }
8566 
8567 static void
8568 drmach_sr_delete(struct drmach_sr_list **lp, dev_info_t *dip)
8569 {
8570 	DRMACH_PR("drmach_sr_delete: searching for dip %p\n", dip);
8571 
8572 	if (*lp) {
8573 		struct drmach_sr_list *xp;
8574 
8575 		/* start search with mostly likely node */
8576 		xp = (*lp)->prev;
8577 		do {
8578 			if (xp->dip == dip) {
8579 				xp->prev->next = xp->next;
8580 				xp->next->prev = xp->prev;
8581 
8582 				if (xp == *lp)
8583 					*lp = xp->next;
8584 				if (xp == *lp)
8585 					*lp = NULL;
8586 				xp->dip = NULL;
8587 				ndi_rele_devi(dip);
8588 				kmem_free(xp, sizeof (*xp));
8589 
8590 				DRMACH_PR("drmach_sr_delete:"
8591 					" disposed sr node for dip %p", dip);
8592 				return;
8593 			}
8594 
8595 			DRMACH_PR("drmach_sr_delete: still searching\n");
8596 
8597 			xp = xp->prev;
8598 		} while (xp != (*lp)->prev);
8599 	}
8600 
8601 	/* every dip should be found during resume */
8602 	DRMACH_PR("ERROR: drmach_sr_delete: can't find dip %p", dip);
8603 }
8604 
8605 int
8606 drmach_verify_sr(dev_info_t *dip, int sflag)
8607 {
8608 	int	rv;
8609 	int	len;
8610 	char    name[OBP_MAXDRVNAME];
8611 
8612 	if (drmach_slot1_pause_debug) {
8613 		if (sflag && drmach_slot1_pause_init) {
8614 			drmach_slot1_pause_free(drmach_slot1_paused);
8615 			drmach_slot1_pause_init = 0;
8616 		} else if (!sflag && !drmach_slot1_pause_init) {
8617 			/* schedule init for next suspend */
8618 			drmach_slot1_pause_init = 1;
8619 		}
8620 	}
8621 
8622 	rv = ddi_getproplen(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
8623 		"name", &len);
8624 	if (rv == DDI_PROP_SUCCESS) {
8625 		int		portid;
8626 		uint64_t	reg;
8627 		struct drmach_sr_ordered *op;
8628 
8629 		rv = ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
8630 			DDI_PROP_DONTPASS, "name", (caddr_t)name, &len);
8631 
8632 		if (rv != DDI_PROP_SUCCESS)
8633 			return (0);
8634 
8635 		if (drmach_slot1_pause_debug && sflag &&
8636 		    drmach_is_slot1_pause_axq(dip, name, &portid, &reg)) {
8637 			drmach_slot1_pause_add_axq(dip, name, portid, reg,
8638 			    drmach_slot1_paused);
8639 		}
8640 
8641 		for (op = drmach_sr_ordered; op->name; op++) {
8642 			if (strncmp(op->name, name, strlen(op->name)) == 0) {
8643 				if (sflag)
8644 					drmach_sr_insert(&op->ring, dip);
8645 				else
8646 					drmach_sr_delete(&op->ring, dip);
8647 				return (1);
8648 			}
8649 		}
8650 	}
8651 
8652 	return (0);
8653 }
8654 
8655 static void
8656 drmach_sr_dip(dev_info_t *dip, int suspend)
8657 {
8658 	int	 rv;
8659 	major_t	 maj;
8660 	char	*name, *name_addr, *aka;
8661 
8662 	if ((name = ddi_get_name(dip)) == NULL)
8663 		name = "<null name>";
8664 	else if ((maj = ddi_name_to_major(name)) != -1)
8665 		aka = ddi_major_to_name(maj);
8666 	else
8667 		aka = "<unknown>";
8668 
8669 	if ((name_addr = ddi_get_name_addr(dip)) == NULL)
8670 		name_addr = "<null>";
8671 
8672 	prom_printf("\t%s %s@%s (aka %s)\n",
8673 		suspend ? "suspending" : "resuming",
8674 		name, name_addr, aka);
8675 
8676 	if (suspend) {
8677 		rv = devi_detach(dip, DDI_SUSPEND);
8678 	} else {
8679 		rv = devi_attach(dip, DDI_RESUME);
8680 	}
8681 
8682 	if (rv != DDI_SUCCESS) {
8683 		prom_printf("\tFAILED to %s %s@%s\n",
8684 			suspend ? "suspend" : "resume",
8685 			name, name_addr);
8686 	}
8687 }
8688 
8689 void
8690 drmach_suspend_last()
8691 {
8692 	struct drmach_sr_ordered *op;
8693 
8694 	if (drmach_slot1_pause_debug)
8695 		drmach_slot1_pause_add_io(drmach_slot1_paused);
8696 
8697 	/*
8698 	 * The ordering array declares the strict sequence in which
8699 	 * the named drivers are to suspended. Each element in
8700 	 * the array may have a double-linked ring list of driver
8701 	 * instances (dip) in the order in which they were presented
8702 	 * to drmach_verify_sr. If present, walk the list in the
8703 	 * forward direction to suspend each instance.
8704 	 */
8705 	for (op = drmach_sr_ordered; op->name; op++) {
8706 		if (op->ring) {
8707 			struct drmach_sr_list *rp;
8708 
8709 			rp = op->ring;
8710 			do {
8711 				drmach_sr_dip(rp->dip, 1);
8712 				rp = rp->next;
8713 			} while (rp != op->ring);
8714 		}
8715 	}
8716 
8717 	if (drmach_slot1_pause_debug) {
8718 		drmach_slot1_pause_update(drmach_slot1_paused,
8719 		    DRMACH_POST_SUSPEND);
8720 		drmach_slot1_pause_verify(drmach_slot1_paused,
8721 		    DRMACH_POST_SUSPEND);
8722 	}
8723 }
8724 
8725 void
8726 drmach_resume_first()
8727 {
8728 	struct drmach_sr_ordered *op = drmach_sr_ordered +
8729 		(sizeof (drmach_sr_ordered) / sizeof (drmach_sr_ordered[0]));
8730 
8731 	if (drmach_slot1_pause_debug) {
8732 		drmach_slot1_pause_update(drmach_slot1_paused,
8733 		    DRMACH_PRE_RESUME);
8734 		drmach_slot1_pause_verify(drmach_slot1_paused,
8735 		    DRMACH_PRE_RESUME);
8736 	}
8737 
8738 	op -= 1;	/* point at terminating element */
8739 
8740 	/*
8741 	 * walk ordering array and rings backwards to resume dips
8742 	 * in reverse order in which they were suspended
8743 	 */
8744 	while (--op >= drmach_sr_ordered) {
8745 		if (op->ring) {
8746 			struct drmach_sr_list *rp;
8747 
8748 			rp = op->ring->prev;
8749 			do {
8750 				drmach_sr_dip(rp->dip, 0);
8751 				rp = rp->prev;
8752 			} while (rp != op->ring->prev);
8753 		}
8754 	}
8755 }
8756 
8757 /*
8758  * Log a DR sysevent.
8759  * Return value: 0 success, non-zero failure.
8760  */
8761 int
8762 drmach_log_sysevent(int board, char *hint, int flag, int verbose)
8763 {
8764 	sysevent_t			*ev;
8765 	sysevent_id_t			eid;
8766 	int				rv, km_flag;
8767 	sysevent_value_t		evnt_val;
8768 	sysevent_attr_list_t		*evnt_attr_list = NULL;
8769 	char				attach_pnt[MAXNAMELEN];
8770 
8771 	km_flag = (flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
8772 	attach_pnt[0] = '\0';
8773 	if (drmach_board_name(board, attach_pnt, MAXNAMELEN)) {
8774 		rv = -1;
8775 		goto logexit;
8776 	}
8777 	if (verbose)
8778 		DRMACH_PR("drmach_log_sysevent: %s %s, flag: %d, verbose: %d\n",
8779 			    attach_pnt, hint, flag, verbose);
8780 
8781 	if ((ev = sysevent_alloc(EC_DR, ESC_DR_AP_STATE_CHANGE,
8782 				    SUNW_KERN_PUB"dr", km_flag)) == NULL) {
8783 		rv = -2;
8784 		goto logexit;
8785 	}
8786 	evnt_val.value_type = SE_DATA_TYPE_STRING;
8787 	evnt_val.value.sv_string = attach_pnt;
8788 	if ((rv = sysevent_add_attr(&evnt_attr_list, DR_AP_ID,
8789 				    &evnt_val, km_flag)) != 0)
8790 		goto logexit;
8791 
8792 	evnt_val.value_type = SE_DATA_TYPE_STRING;
8793 	evnt_val.value.sv_string = hint;
8794 	if ((rv = sysevent_add_attr(&evnt_attr_list, DR_HINT,
8795 				    &evnt_val, km_flag)) != 0) {
8796 		sysevent_free_attr(evnt_attr_list);
8797 		goto logexit;
8798 	}
8799 
8800 	(void) sysevent_attach_attributes(ev, evnt_attr_list);
8801 
8802 	/*
8803 	 * Log the event but do not sleep waiting for its
8804 	 * delivery. This provides insulation from syseventd.
8805 	 */
8806 	rv = log_sysevent(ev, SE_NOSLEEP, &eid);
8807 
8808 logexit:
8809 	if (ev)
8810 		sysevent_free(ev);
8811 	if ((rv != 0) && verbose)
8812 		cmn_err(CE_WARN,
8813 			    "drmach_log_sysevent failed (rv %d) for %s  %s\n",
8814 			    rv, attach_pnt, hint);
8815 
8816 	return (rv);
8817 }
8818 
8819 /*
8820  * Initialize the mem_slice portion of a claim/unconfig/unclaim mailbox message.
8821  * Only the valid entries are modified, so the array should be zeroed out
8822  * initially.
8823  */
8824 static void
8825 drmach_msg_memslice_init(dr_memslice_t slice_arr[]) {
8826 	int	i;
8827 	char	c;
8828 
8829 	ASSERT(mutex_owned(&drmach_slice_table_lock));
8830 
8831 	for (i = 0; i < AXQ_MAX_EXP; i++) {
8832 		c = drmach_slice_table[i];
8833 
8834 		if (c & 0x20) {
8835 			slice_arr[i].valid = 1;
8836 			slice_arr[i].slice = c & 0x1f;
8837 		}
8838 	}
8839 }
8840 
8841 /*
8842  * Initialize the mem_regs portion of a claim/unconfig/unclaim mailbox message.
8843  * Only the valid entries are modified, so the array should be zeroed out
8844  * initially.
8845  */
8846 static void
8847 drmach_msg_memregs_init(dr_memregs_t regs_arr[]) {
8848 	int		rv, exp, mcnum, bank;
8849 	uint64_t	madr;
8850 	drmachid_t	id;
8851 	drmach_board_t	*bp;
8852 	drmach_mem_t	*mp;
8853 	dr_memregs_t	*memregs;
8854 
8855 	/* CONSTCOND */
8856 	ASSERT(DRMACH_MC_NBANKS == (PMBANKS_PER_PORT * LMBANKS_PER_PMBANK));
8857 
8858 	for (exp = 0; exp < 18; exp++) {
8859 		rv = drmach_array_get(drmach_boards,
8860 		    DRMACH_EXPSLOT2BNUM(exp, 0), &id);
8861 		ASSERT(rv == 0);	/* should never be out of bounds */
8862 		if (id == NULL) {
8863 			continue;
8864 		}
8865 
8866 		memregs = &regs_arr[exp];
8867 		bp = (drmach_board_t *)id;
8868 		for (mp = bp->mem; mp != NULL; mp = mp->next) {
8869 			mcnum = mp->dev.portid & 0x3;
8870 			for (bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
8871 				drmach_mem_read_madr(mp, bank, &madr);
8872 				if (madr & DRMACH_MC_VALID_MASK) {
8873 					DRMACH_PR("%d.%d.%d.madr = 0x%lx\n",
8874 						exp, mcnum, bank, madr);
8875 					memregs->madr[mcnum][bank].hi =
8876 					    DRMACH_U64_TO_MCREGHI(madr);
8877 					memregs->madr[mcnum][bank].lo =
8878 					    DRMACH_U64_TO_MCREGLO(madr);
8879 				}
8880 			}
8881 		}
8882 	}
8883 }
8884 
8885 /*
8886  * Do not allow physical address range modification if either board on this
8887  * expander has processors in NULL LPA mode (CBASE=CBND=NULL).
8888  *
8889  * A side effect of NULL proc LPA mode in Starcat SSM is that local reads will
8890  * install the cache line as owned/dirty as a result of the RTSR transaction.
8891  * See section 5.2.3 of the Safari spec.  All processors will read the bus sync
8892  * list before the rename after flushing local caches.  When copy-rename
8893  * requires changing the physical address ranges (i.e. smaller memory target),
8894  * the bus sync list contains physical addresses that will not exist after the
8895  * rename.  If these cache lines are owned due to a RTSR, a system error can
8896  * occur following the rename when these cache lines are evicted and a writeback
8897  * is attempted.
8898  *
8899  * Incoming parameter represents either the copy-rename source or a candidate
8900  * target memory board.  On Starcat, only slot0 boards may have memory.
8901  */
8902 int
8903 drmach_allow_memrange_modify(drmachid_t s0id)
8904 {
8905 	drmach_board_t	*s0bp, *s1bp;
8906 	drmachid_t	s1id;
8907 	int		rv;
8908 
8909 	s0bp = s0id;
8910 
8911 	ASSERT(DRMACH_IS_BOARD_ID(s0id));
8912 	ASSERT(DRMACH_BNUM2SLOT(s0bp->bnum) == 0);
8913 
8914 	if (s0bp->flags & DRMACH_NULL_PROC_LPA) {
8915 		/*
8916 		 * This is reason enough to fail the request, no need
8917 		 * to check the device list for cpus.
8918 		 */
8919 		return (0);
8920 	}
8921 
8922 	/*
8923 	 * Check for MCPU board on the same expander.
8924 	 *
8925 	 * The board flag DRMACH_NULL_PROC_LPA can be set for all board
8926 	 * types, as it is derived at from the POST gdcd board flag
8927 	 * L1SSFLG_THIS_L1_NULL_PROC_LPA, which can be set (and should be
8928 	 * ignored) for boards with no processors.  Since NULL proc LPA
8929 	 * applies only to processors, we walk the devices array to detect
8930 	 * MCPUs.
8931 	 */
8932 	rv = drmach_array_get(drmach_boards, s0bp->bnum + 1, &s1id);
8933 	s1bp = s1id;
8934 	if (rv == 0 && s1bp != NULL) {
8935 
8936 		ASSERT(DRMACH_IS_BOARD_ID(s1id));
8937 		ASSERT(DRMACH_BNUM2SLOT(s1bp->bnum) == 1);
8938 		ASSERT(DRMACH_BNUM2EXP(s0bp->bnum) ==
8939 		    DRMACH_BNUM2EXP(s1bp->bnum));
8940 
8941 		if ((s1bp->flags & DRMACH_NULL_PROC_LPA) &&
8942 		    s1bp->devices != NULL) {
8943 			int		d_idx;
8944 			drmachid_t	d_id;
8945 
8946 			rv = drmach_array_first(s1bp->devices, &d_idx, &d_id);
8947 			while (rv == 0) {
8948 				if (DRMACH_IS_CPU_ID(d_id)) {
8949 					/*
8950 					 * Fail MCPU in NULL LPA mode.
8951 					 */
8952 					return (0);
8953 				}
8954 
8955 				rv = drmach_array_next(s1bp->devices, &d_idx,
8956 				    &d_id);
8957 			}
8958 		}
8959 	}
8960 
8961 	return (1);
8962 }
8963