xref: /titanic_52/usr/src/uts/sun4u/starcat/io/drmach.c (revision f936286c99fb83153e4bfd870eb2830a990a82c1)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/note.h>
28 #include <sys/debug.h>
29 #include <sys/types.h>
30 #include <sys/varargs.h>
31 #include <sys/errno.h>
32 #include <sys/cred.h>
33 #include <sys/dditypes.h>
34 #include <sys/devops.h>
35 #include <sys/modctl.h>
36 #include <sys/poll.h>
37 #include <sys/conf.h>
38 #include <sys/ddi.h>
39 #include <sys/sunddi.h>
40 #include <sys/sunndi.h>
41 #include <sys/ndi_impldefs.h>
42 #include <sys/stat.h>
43 #include <sys/kmem.h>
44 #include <sys/vmem.h>
45 #include <sys/disp.h>
46 #include <sys/processor.h>
47 #include <sys/cheetahregs.h>
48 #include <sys/cpuvar.h>
49 #include <sys/mem_config.h>
50 #include <sys/ddi_impldefs.h>
51 #include <sys/systm.h>
52 #include <sys/machsystm.h>
53 #include <sys/autoconf.h>
54 #include <sys/cmn_err.h>
55 #include <sys/sysmacros.h>
56 #include <sys/x_call.h>
57 #include <sys/promif.h>
58 #include <sys/prom_plat.h>
59 #include <sys/membar.h>
60 #include <vm/seg_kmem.h>
61 #include <sys/mem_cage.h>
62 #include <sys/stack.h>
63 #include <sys/archsystm.h>
64 #include <vm/hat_sfmmu.h>
65 #include <sys/pte.h>
66 #include <sys/mmu.h>
67 #include <sys/cpu_module.h>
68 #include <sys/obpdefs.h>
69 #include <sys/mboxsc.h>
70 #include <sys/plat_ecc_dimm.h>
71 
72 #include <sys/hotplug/hpctrl.h>		/* XXX should be included by schpc.h */
73 #include <sys/schpc.h>
74 #include <sys/pci.h>
75 
76 #include <sys/starcat.h>
77 #include <sys/cpu_sgnblk_defs.h>
78 #include <sys/drmach.h>
79 #include <sys/dr_util.h>
80 #include <sys/dr_mbx.h>
81 #include <sys/sc_gptwocfg.h>
82 #include <sys/iosramreg.h>
83 #include <sys/iosramio.h>
84 #include <sys/iosramvar.h>
85 #include <sys/axq.h>
86 #include <sys/post/scat_dcd.h>
87 #include <sys/kobj.h>
88 #include <sys/taskq.h>
89 #include <sys/cmp.h>
90 #include <sys/sbd_ioctl.h>
91 
92 #include <sys/sysevent.h>
93 #include <sys/sysevent/dr.h>
94 #include <sys/sysevent/eventdefs.h>
95 
96 #include <sys/pci/pcisch.h>
97 #include <sys/pci/pci_regs.h>
98 
99 #include <sys/ontrap.h>
100 
101 /* defined in ../ml/drmach.il.cpp */
102 extern void		bcopy32_il(uint64_t, uint64_t);
103 extern void		flush_ecache_il(int64_t physaddr, int size, int linesz);
104 extern void		flush_dcache_il(void);
105 extern void		flush_icache_il(void);
106 extern void		flush_pcache_il(void);
107 
108 /* defined in ../ml/drmach_asm.s */
109 extern uint64_t		lddmcdecode(uint64_t physaddr);
110 extern uint64_t		lddsafconfig(void);
111 
112 /* XXX here until provided by sys/dman.h */
113 extern int man_dr_attach(dev_info_t *);
114 extern int man_dr_detach(dev_info_t *);
115 
116 #define	DRMACH_BNUM2EXP(bnum)		((bnum) >> 1)
117 #define	DRMACH_BNUM2SLOT(bnum)		((bnum) & 1)
118 #define	DRMACH_EXPSLOT2BNUM(exp, slot)	(((exp) << 1) + (slot))
119 
120 #define	DRMACH_SLICE_MASK		0x1Full
121 #define	DRMACH_SLICE_TO_PA(s)		(((s) & DRMACH_SLICE_MASK) << 37)
122 #define	DRMACH_PA_TO_SLICE(a)		(((a) >> 37) & DRMACH_SLICE_MASK)
123 
124 /*
125  * DRMACH_MEM_SLICE_SIZE and DRMACH_MEM_USABLE_SLICE_SIZE define the
126  * available address space and the usable address space for every slice.
127  * There must be a distinction between the available and usable do to a
128  * restriction imposed by CDC memory size.
129  */
130 
131 #define	DRMACH_MEM_SLICE_SIZE		(1ull << 37)	/* 128GB */
132 #define	DRMACH_MEM_USABLE_SLICE_SIZE	(1ull << 36)	/* 64GB */
133 
134 #define	DRMACH_MC_NBANKS		4
135 
136 #define	DRMACH_MC_ADDR(mp, bank)	((mp)->madr_pa + 16 + 8 * (bank))
137 #define	DRMACH_MC_ASI_ADDR(mp, bank)	(DRMACH_MC_ADDR(mp, bank) & 0xFF)
138 
139 #define	DRMACH_EMU_ACT_STATUS_OFFSET	0x50
140 #define	DRMACH_EMU_ACT_STATUS_ADDR(mp)	\
141 	((mp)->madr_pa + DRMACH_EMU_ACT_STATUS_OFFSET)
142 
143 /*
144  * The Cheetah's Safari Configuration Register and the Schizo's
145  * Safari Control/Status Register place the LPA base and bound fields in
146  * same bit locations with in their register word. This source code takes
147  * advantage of this by defining only one set of LPA encoding/decoding macros
148  * which are shared by various Cheetah and Schizo drmach routines.
149  */
150 #define	DRMACH_LPA_BASE_MASK		(0x3Full	<< 3)
151 #define	DRMACH_LPA_BND_MASK		(0x3Full	<< 9)
152 
153 #define	DRMACH_LPA_BASE_TO_PA(scr)	(((scr) & DRMACH_LPA_BASE_MASK) << 34)
154 #define	DRMACH_LPA_BND_TO_PA(scr)	(((scr) & DRMACH_LPA_BND_MASK) << 28)
155 #define	DRMACH_PA_TO_LPA_BASE(pa)	(((pa) >> 34) & DRMACH_LPA_BASE_MASK)
156 #define	DRMACH_PA_TO_LPA_BND(pa)	(((pa) >> 28) & DRMACH_LPA_BND_MASK)
157 
158 #define	DRMACH_L1_SET_LPA(b)		\
159 	(((b)->flags & DRMACH_NULL_PROC_LPA) == 0)
160 
161 #define	DRMACH_CPU_SRAM_ADDR    	0x7fff0900000ull
162 #define	DRMACH_CPU_SRAM_SIZE    	0x20000ull
163 
164 /*
165  * Name properties for frequently accessed device nodes.
166  */
167 #define	DRMACH_CPU_NAMEPROP		"cpu"
168 #define	DRMACH_CMP_NAMEPROP		"cmp"
169 #define	DRMACH_AXQ_NAMEPROP		"address-extender-queue"
170 #define	DRMACH_PCI_NAMEPROP		"pci"
171 
172 /*
173  * Maximum value of processor Safari Timeout Log (TOL) field of
174  * Safari Config reg (7 secs).
175  */
176 #define	DRMACH_SAF_TOL_MAX		7 * 1000000
177 
178 /*
179  * drmach_board_t flag definitions
180  */
181 #define	DRMACH_NULL_PROC_LPA		0x1
182 
183 typedef struct {
184 	uint32_t	reg_addr_hi;
185 	uint32_t	reg_addr_lo;
186 	uint32_t	reg_size_hi;
187 	uint32_t	reg_size_lo;
188 } drmach_reg_t;
189 
190 typedef struct {
191 	struct drmach_node	*node;
192 	void			*data;
193 } drmach_node_walk_args_t;
194 
195 typedef struct drmach_node {
196 	void		*here;
197 
198 	pnode_t		 (*get_dnode)(struct drmach_node *node);
199 	int		 (*walk)(struct drmach_node *node, void *data,
200 				int (*cb)(drmach_node_walk_args_t *args));
201 	dev_info_t	*(*n_getdip)(struct drmach_node *node);
202 	int		 (*n_getproplen)(struct drmach_node *node, char *name,
203 				int *len);
204 	int		 (*n_getprop)(struct drmach_node *node, char *name,
205 				void *buf, int len);
206 	int		 (*get_parent)(struct drmach_node *node,
207 				struct drmach_node *pnode);
208 } drmach_node_t;
209 
210 typedef struct {
211 	int		 min_index;
212 	int		 max_index;
213 	int		 arr_sz;
214 	drmachid_t	*arr;
215 } drmach_array_t;
216 
217 typedef struct {
218 	void		*isa;
219 
220 	void		 (*dispose)(drmachid_t);
221 	sbd_error_t	*(*release)(drmachid_t);
222 	sbd_error_t	*(*status)(drmachid_t, drmach_status_t *);
223 
224 	char		 name[MAXNAMELEN];
225 } drmach_common_t;
226 
227 struct drmach_board;
228 typedef struct drmach_board drmach_board_t;
229 
230 typedef struct {
231 	drmach_common_t	 cm;
232 	const char	*type;
233 	drmach_board_t	*bp;
234 	drmach_node_t	*node;
235 	int		 portid;
236 	int		 unum;
237 	int		 busy;
238 	int		 powered;
239 } drmach_device_t;
240 
241 typedef struct drmach_cpu {
242 	drmach_device_t	 dev;
243 	uint64_t	 scr_pa;
244 	processorid_t	 cpuid;
245 	int		 coreid;
246 } drmach_cpu_t;
247 
248 typedef struct drmach_mem {
249 	drmach_device_t	 dev;
250 	struct drmach_mem *next;
251 	uint64_t	 nbytes;
252 	uint64_t	 madr_pa;
253 } drmach_mem_t;
254 
255 typedef struct drmach_io {
256 	drmach_device_t	 dev;
257 	uint64_t	 scsr_pa; /* PA of Schizo Control/Status Register */
258 } drmach_io_t;
259 
260 struct drmach_board {
261 	drmach_common_t	 cm;
262 	int		 bnum;
263 	int		 assigned;
264 	int		 powered;
265 	int		 connected;
266 	int		 empty;
267 	int		 cond;
268 	uint_t		 cpu_impl;
269 	uint_t		 flags;
270 	drmach_node_t	*tree;
271 	drmach_array_t	*devices;
272 	drmach_mem_t	*mem;
273 	uint64_t	 stardrb_offset;
274 	char		 type[BD_TYPELEN];
275 };
276 
277 typedef struct {
278 	int		 flags;
279 	drmach_device_t	*dp;
280 	sbd_error_t	*err;
281 	dev_info_t	*fdip;
282 } drmach_config_args_t;
283 
284 typedef struct {
285 	drmach_board_t	*obj;
286 	int		 ndevs;
287 	void		*a;
288 	sbd_error_t	*(*found)(void *a, const char *, int, drmachid_t);
289 	sbd_error_t	*err;
290 } drmach_board_cb_data_t;
291 
292 typedef struct drmach_casmslot {
293 	int	valid;
294 	int	slice;
295 } drmach_casmslot_t;
296 
297 typedef enum {
298 	DRMACH_CR_OK,
299 	DRMACH_CR_MC_IDLE_ERR,
300 	DRMACH_CR_IOPAUSE_ERR,
301 	DRMACH_CR_ONTRAP_ERR
302 } drmach_cr_err_t;
303 
304 typedef struct {
305 	void		*isa;
306 	caddr_t		 data;
307 	drmach_mem_t	*s_mp;
308 	drmach_mem_t	*t_mp;
309 	struct memlist	*c_ml;
310 	uint64_t	 s_copybasepa;
311 	uint64_t	 t_copybasepa;
312 	drmach_cr_err_t	 ecode;
313 	void		*earg;
314 } drmach_copy_rename_t;
315 
316 /*
317  * The following global is read as a boolean value, non-zero is true.
318  * If zero, DR copy-rename and cpu poweron will not set the processor
319  * LPA settings (CBASE, CBND of Safari config register) to correspond
320  * to the current memory slice map. LPAs of processors present at boot
321  * will remain as programmed by POST. LPAs of processors on boards added
322  * by DR will remain NULL, as programmed by POST. This can be used to
323  * to override the per-board L1SSFLG_THIS_L1_NULL_PROC_LPA flag set by
324  * POST in the LDCD (and copied to the GDCD by SMS).
325  *
326  * drmach_reprogram_lpa and L1SSFLG_THIS_L1_NULL_PROC_LPA do not apply
327  * to Schizo device LPAs. These are always set by DR.
328  */
329 static int		 drmach_reprogram_lpa = 1;
330 
331 /*
332  * There is a known HW bug where a Jaguar CPU in Safari port 0 (SBX/P0)
333  * can fail to receive an XIR. To workaround this issue until a hardware
334  * fix is implemented, we will exclude the selection of these CPUs.
335  * Setting this to 0 will allow their selection again.
336  */
337 static int		 drmach_iocage_exclude_jaguar_port_zero = 1;
338 
339 static int		 drmach_initialized;
340 static drmach_array_t	*drmach_boards;
341 
342 static int		 drmach_cpu_delay = 1000;
343 static int		 drmach_cpu_ntries = 50000;
344 
345 static uint32_t		 drmach_slice_table[AXQ_MAX_EXP];
346 static kmutex_t		 drmach_slice_table_lock;
347 
348 tte_t			 drmach_cpu_sram_tte[NCPU];
349 caddr_t			 drmach_cpu_sram_va;
350 
351 /*
352  * Setting to non-zero will enable delay before all disconnect ops.
353  */
354 static int		 drmach_unclaim_delay_all;
355 /*
356  * Default delay is slightly greater than the max processor Safari timeout.
357  * This delay is intended to ensure the outstanding Safari activity has
358  * retired on this board prior to a board disconnect.
359  */
360 static clock_t		 drmach_unclaim_usec_delay = DRMACH_SAF_TOL_MAX + 10;
361 
362 /*
363  * By default, DR of non-Panther procs is not allowed into a Panther
364  * domain with large page sizes enabled.  Setting this to 0 will remove
365  * the restriction.
366  */
367 static int		 drmach_large_page_restriction = 1;
368 
369 /*
370  * Used to pass updated LPA values to procs.
371  * Protocol is to clear the array before use.
372  */
373 volatile uchar_t	*drmach_xt_mb;
374 volatile uint64_t	 drmach_xt_ready;
375 static kmutex_t		 drmach_xt_mb_lock;
376 static int		 drmach_xt_mb_size;
377 
378 uint64_t		 drmach_bus_sync_list[18 * 4 * 4 + 1];
379 static kmutex_t		 drmach_bus_sync_lock;
380 
381 static void		drmach_fini(void);
382 
383 static sbd_error_t	*drmach_device_new(drmach_node_t *,
384 				drmach_board_t *, int, drmachid_t *);
385 static sbd_error_t	*drmach_cpu_new(drmach_device_t *, drmachid_t *);
386 static sbd_error_t	*drmach_mem_new(drmach_device_t *, drmachid_t *);
387 static sbd_error_t	*drmach_pci_new(drmach_device_t *, drmachid_t *);
388 static sbd_error_t	*drmach_io_new(drmach_device_t *, drmachid_t *);
389 
390 static sbd_error_t 	*drmach_board_release(drmachid_t);
391 static sbd_error_t 	*drmach_board_status(drmachid_t, drmach_status_t *);
392 
393 static void 		drmach_cpu_dispose(drmachid_t);
394 static sbd_error_t 	*drmach_cpu_release(drmachid_t);
395 static sbd_error_t 	*drmach_cpu_status(drmachid_t, drmach_status_t *);
396 
397 static void 		drmach_mem_dispose(drmachid_t);
398 static sbd_error_t 	*drmach_mem_release(drmachid_t);
399 static sbd_error_t 	*drmach_mem_status(drmachid_t, drmach_status_t *);
400 
401 static dev_info_t	*drmach_node_ddi_get_dip(drmach_node_t *np);
402 static int		 drmach_node_ddi_get_prop(drmach_node_t *np,
403 				char *name, void *buf, int len);
404 static int		 drmach_node_ddi_get_proplen(drmach_node_t *np,
405 				char *name, int *len);
406 
407 static dev_info_t	*drmach_node_obp_get_dip(drmach_node_t *np);
408 static int		 drmach_node_obp_get_prop(drmach_node_t *np,
409 				char *name, void *buf, int len);
410 static int		 drmach_node_obp_get_proplen(drmach_node_t *np,
411 				char *name, int *len);
412 
413 static sbd_error_t	*drmach_mbox_trans(uint8_t msgtype, int bnum,
414 				caddr_t obufp, int olen,
415 				caddr_t ibufp, int ilen);
416 
417 sbd_error_t		*drmach_io_post_attach(drmachid_t id);
418 sbd_error_t		*drmach_io_post_release(drmachid_t id);
419 
420 static sbd_error_t	*drmach_iocage_setup(dr_testboard_req_t *,
421 				drmach_device_t **dpp, cpu_flag_t *oflags);
422 static int		drmach_iocage_cpu_return(drmach_device_t *dp,
423 				cpu_flag_t oflags);
424 static sbd_error_t	*drmach_iocage_mem_return(dr_testboard_reply_t *tbr);
425 void			drmach_iocage_mem_scrub(uint64_t nbytes);
426 
427 static sbd_error_t 	*drmach_i_status(drmachid_t id, drmach_status_t *stat);
428 
429 static void		drmach_slot1_lpa_set(drmach_board_t *bp);
430 
431 static void		drmach_cpu_read(uint64_t arg1, uint64_t arg2);
432 static int		drmach_cpu_read_scr(drmach_cpu_t *cp, uint64_t *scr);
433 
434 static void		 drmach_bus_sync_list_update(void);
435 static void		 drmach_slice_table_update(drmach_board_t *, int);
436 static int		 drmach_portid2bnum(int);
437 
438 static void		drmach_msg_memslice_init(dr_memslice_t slice_arr[]);
439 static void		drmach_msg_memregs_init(dr_memregs_t regs_arr[]);
440 
441 static int		drmach_panther_boards(void);
442 
443 static int		drmach_name2type_idx(char *);
444 
445 #ifdef DEBUG
446 
447 #define	DRMACH_PR		if (drmach_debug) printf
448 #define	DRMACH_MEMLIST_DUMP	if (drmach_debug) MEMLIST_DUMP
449 int drmach_debug = 0;		 /* set to non-zero to enable debug messages */
450 #else
451 
452 #define	DRMACH_PR		_NOTE(CONSTANTCONDITION) if (0) printf
453 #define	DRMACH_MEMLIST_DUMP	_NOTE(CONSTANTCONDITION) if (0) MEMLIST_DUMP
454 #endif /* DEBUG */
455 
456 #define	DRMACH_OBJ(id)		((drmach_common_t *)id)
457 
458 #define	DRMACH_IS_BOARD_ID(id)	\
459 	((id != 0) &&		\
460 	(DRMACH_OBJ(id)->isa == (void *)drmach_board_new))
461 
462 #define	DRMACH_IS_CPU_ID(id)	\
463 	((id != 0) &&		\
464 	(DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new))
465 
466 #define	DRMACH_IS_MEM_ID(id)	\
467 	((id != 0) &&		\
468 	(DRMACH_OBJ(id)->isa == (void *)drmach_mem_new))
469 
470 #define	DRMACH_IS_IO_ID(id)	\
471 	((id != 0) &&		\
472 	(DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
473 
474 #define	DRMACH_IS_DEVICE_ID(id)					\
475 	((id != 0) &&						\
476 	(DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new ||	\
477 	    DRMACH_OBJ(id)->isa == (void *)drmach_mem_new ||	\
478 	    DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
479 
480 #define	DRMACH_IS_ID(id)					\
481 	((id != 0) &&						\
482 	(DRMACH_OBJ(id)->isa == (void *)drmach_board_new ||	\
483 	    DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new ||	\
484 	    DRMACH_OBJ(id)->isa == (void *)drmach_mem_new ||	\
485 	    DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
486 
487 #define	DRMACH_INTERNAL_ERROR() \
488 	drerr_new(1, ESTC_INTERNAL, drmach_ie_fmt, __LINE__)
489 static char		*drmach_ie_fmt = "drmach.c %d";
490 
491 static struct {
492 	const char	 *name;
493 	const char	 *type;
494 	sbd_error_t	 *(*new)(drmach_device_t *, drmachid_t *);
495 } drmach_name2type[] = {
496 	{"cmp",			    DRMACH_DEVTYPE_CMP,    NULL },
497 	{"cpu",			    DRMACH_DEVTYPE_CPU,    drmach_cpu_new },
498 	{"SUNW,UltraSPARC-III",	    DRMACH_DEVTYPE_CPU,    drmach_cpu_new },
499 	{"SUNW,UltraSPARC-III+",    DRMACH_DEVTYPE_CPU,    drmach_cpu_new },
500 	{"memory-controller",	    DRMACH_DEVTYPE_MEM,    drmach_mem_new },
501 	{"pci",			    DRMACH_DEVTYPE_PCI,    drmach_pci_new },
502 	{"SUNW,wci",		    DRMACH_DEVTYPE_WCI,    drmach_io_new  },
503 };
504 
505 /*
506  * drmach autoconfiguration data structures and interfaces
507  */
508 
509 extern struct mod_ops mod_miscops;
510 
511 static struct modlmisc modlmisc = {
512 	&mod_miscops,
513 	"Sun Fire 15000 DR"
514 };
515 
516 static struct modlinkage modlinkage = {
517 	MODREV_1,
518 	(void *)&modlmisc,
519 	NULL
520 };
521 
522 /*
523  * drmach_boards_rwlock is used to synchronize read/write
524  * access to drmach_boards array between status and board lookup
525  * as READERS, and assign, and unassign threads as WRITERS.
526  */
527 static krwlock_t	drmach_boards_rwlock;
528 
529 static kmutex_t		drmach_i_lock;
530 static kmutex_t		drmach_iocage_lock;
531 static kcondvar_t 	drmach_iocage_cv;
532 static int		drmach_iocage_is_busy = 0;
533 uint64_t		drmach_iocage_paddr;
534 static caddr_t		drmach_iocage_vaddr;
535 static int		drmach_iocage_size = 0;
536 static int		drmach_is_cheetah = -1;
537 
538 int
539 _init(void)
540 {
541 	int	err;
542 
543 	mutex_init(&drmach_i_lock, NULL, MUTEX_DRIVER, NULL);
544 	rw_init(&drmach_boards_rwlock, NULL, RW_DEFAULT, NULL);
545 	drmach_xt_mb_size = NCPU * sizeof (uchar_t);
546 	drmach_xt_mb = (uchar_t *)vmem_alloc(static_alloc_arena,
547 	    drmach_xt_mb_size, VM_SLEEP);
548 	bzero((void *)drmach_xt_mb, drmach_xt_mb_size);
549 	if ((err = mod_install(&modlinkage)) != 0) {
550 		mutex_destroy(&drmach_i_lock);
551 		rw_destroy(&drmach_boards_rwlock);
552 		vmem_free(static_alloc_arena, (void *)drmach_xt_mb,
553 		    drmach_xt_mb_size);
554 	}
555 
556 	return (err);
557 }
558 
559 int
560 _fini(void)
561 {
562 	int		err;
563 
564 	if ((err = mod_remove(&modlinkage)) == 0)
565 		drmach_fini();
566 
567 	return (err);
568 }
569 
570 int
571 _info(struct modinfo *modinfop)
572 {
573 	return (mod_info(&modlinkage, modinfop));
574 }
575 
576 /*
577  * drmach_node_* routines serve the purpose of separating the
578  * rest of the code from the device tree and OBP.  This is necessary
579  * because of In-Kernel-Probing.  Devices probed after stod, are probed
580  * by the in-kernel-prober, not OBP.  These devices, therefore, do not
581  * have dnode ids.
582  */
583 
584 static int
585 drmach_node_obp_get_parent(drmach_node_t *np, drmach_node_t *pp)
586 {
587 	pnode_t		nodeid;
588 	static char	*fn = "drmach_node_obp_get_parent";
589 
590 	nodeid = np->get_dnode(np);
591 	if (nodeid == OBP_NONODE) {
592 		cmn_err(CE_WARN, "%s: invalid dnode", fn);
593 		return (-1);
594 	}
595 
596 	bcopy(np, pp, sizeof (drmach_node_t));
597 
598 	pp->here = (void *)(uintptr_t)prom_parentnode(nodeid);
599 	if (pp->here == OBP_NONODE) {
600 		cmn_err(CE_WARN, "%s: invalid parent dnode", fn);
601 		return (-1);
602 	}
603 
604 	return (0);
605 }
606 
607 static pnode_t
608 drmach_node_obp_get_dnode(drmach_node_t *np)
609 {
610 	return ((pnode_t)(uintptr_t)np->here);
611 }
612 
613 typedef struct {
614 	drmach_node_walk_args_t	*nwargs;
615 	int 			(*cb)(drmach_node_walk_args_t *args);
616 	int			err;
617 } drmach_node_ddi_walk_args_t;
618 
619 int
620 drmach_node_ddi_walk_cb(dev_info_t *dip, void *arg)
621 {
622 	drmach_node_ddi_walk_args_t	*nargs;
623 
624 	nargs = (drmach_node_ddi_walk_args_t *)arg;
625 
626 	/*
627 	 * dip doesn't have to be held here as we are called
628 	 * from ddi_walk_devs() which holds the dip.
629 	 */
630 	nargs->nwargs->node->here = (void *)dip;
631 
632 	nargs->err = nargs->cb(nargs->nwargs);
633 
634 	/*
635 	 * Set "here" to NULL so that unheld dip is not accessible
636 	 * outside ddi_walk_devs()
637 	 */
638 	nargs->nwargs->node->here = NULL;
639 
640 	if (nargs->err)
641 		return (DDI_WALK_TERMINATE);
642 	else
643 		return (DDI_WALK_CONTINUE);
644 }
645 
646 static int
647 drmach_node_ddi_walk(drmach_node_t *np, void *data,
648 		int (*cb)(drmach_node_walk_args_t *args))
649 {
650 	drmach_node_walk_args_t		args;
651 	drmach_node_ddi_walk_args_t	nargs;
652 
653 	/* initialized args structure for callback */
654 	args.node = np;
655 	args.data = data;
656 
657 	nargs.nwargs = &args;
658 	nargs.cb = cb;
659 	nargs.err = 0;
660 
661 	/*
662 	 * Root node doesn't have to be held in any way.
663 	 */
664 	ddi_walk_devs(ddi_root_node(), drmach_node_ddi_walk_cb, (void *)&nargs);
665 
666 	return (nargs.err);
667 }
668 
669 static int
670 drmach_node_obp_walk(drmach_node_t *np, void *data,
671 		int (*cb)(drmach_node_walk_args_t *args))
672 {
673 	pnode_t			nodeid;
674 	int			rv;
675 	drmach_node_walk_args_t	args;
676 
677 	/* initialized args structure for callback */
678 	args.node = np;
679 	args.data = data;
680 
681 	nodeid = prom_childnode(prom_rootnode());
682 
683 	/* save our new position within the tree */
684 	np->here = (void *)(uintptr_t)nodeid;
685 
686 	rv = 0;
687 	while (nodeid != OBP_NONODE) {
688 
689 		pnode_t child;
690 
691 		rv = (*cb)(&args);
692 		if (rv)
693 			break;
694 
695 		child = prom_childnode(nodeid);
696 		np->here = (void *)(uintptr_t)child;
697 
698 		while (child != OBP_NONODE) {
699 			rv = (*cb)(&args);
700 			if (rv)
701 				break;
702 
703 			child = prom_nextnode(child);
704 			np->here = (void *)(uintptr_t)child;
705 		}
706 
707 		nodeid = prom_nextnode(nodeid);
708 
709 		/* save our new position within the tree */
710 		np->here = (void *)(uintptr_t)nodeid;
711 	}
712 
713 	return (rv);
714 }
715 
716 static int
717 drmach_node_ddi_get_parent(drmach_node_t *np, drmach_node_t *pp)
718 {
719 	dev_info_t	*ndip;
720 	static char	*fn = "drmach_node_ddi_get_parent";
721 
722 	ndip = np->n_getdip(np);
723 	if (ndip == NULL) {
724 		cmn_err(CE_WARN, "%s: NULL dip", fn);
725 		return (-1);
726 	}
727 
728 	bcopy(np, pp, sizeof (drmach_node_t));
729 
730 	pp->here = (void *)ddi_get_parent(ndip);
731 	if (pp->here == NULL) {
732 		cmn_err(CE_WARN, "%s: NULL parent dip", fn);
733 		return (-1);
734 	}
735 
736 	return (0);
737 }
738 
739 /*ARGSUSED*/
740 static pnode_t
741 drmach_node_ddi_get_dnode(drmach_node_t *np)
742 {
743 	return ((pnode_t)NULL);
744 }
745 
746 static drmach_node_t *
747 drmach_node_new(void)
748 {
749 	drmach_node_t *np;
750 
751 	np = kmem_zalloc(sizeof (drmach_node_t), KM_SLEEP);
752 
753 	if (drmach_initialized) {
754 		np->get_dnode = drmach_node_ddi_get_dnode;
755 		np->walk = drmach_node_ddi_walk;
756 		np->n_getdip = drmach_node_ddi_get_dip;
757 		np->n_getproplen = drmach_node_ddi_get_proplen;
758 		np->n_getprop = drmach_node_ddi_get_prop;
759 		np->get_parent = drmach_node_ddi_get_parent;
760 	} else {
761 		np->get_dnode = drmach_node_obp_get_dnode;
762 		np->walk = drmach_node_obp_walk;
763 		np->n_getdip = drmach_node_obp_get_dip;
764 		np->n_getproplen = drmach_node_obp_get_proplen;
765 		np->n_getprop = drmach_node_obp_get_prop;
766 		np->get_parent = drmach_node_obp_get_parent;
767 	}
768 
769 	return (np);
770 }
771 
772 static void
773 drmach_node_dispose(drmach_node_t *np)
774 {
775 	kmem_free(np, sizeof (*np));
776 }
777 
778 /*
779  * Check if a CPU node is part of a CMP.
780  */
781 static int
782 drmach_is_cmp_child(dev_info_t *dip)
783 {
784 	dev_info_t *pdip;
785 
786 	if (strcmp(ddi_node_name(dip), DRMACH_CPU_NAMEPROP) != 0) {
787 		return (0);
788 	}
789 
790 	pdip = ddi_get_parent(dip);
791 
792 	ASSERT(pdip);
793 
794 	if (strcmp(ddi_node_name(pdip), DRMACH_CMP_NAMEPROP) == 0) {
795 		return (1);
796 	}
797 
798 	return (0);
799 }
800 
801 static dev_info_t *
802 drmach_node_obp_get_dip(drmach_node_t *np)
803 {
804 	pnode_t		nodeid;
805 	dev_info_t	*dip;
806 
807 	nodeid = np->get_dnode(np);
808 	if (nodeid == OBP_NONODE)
809 		return (NULL);
810 
811 	dip = e_ddi_nodeid_to_dip(nodeid);
812 	if (dip) {
813 		/*
814 		 * The branch rooted at dip will have been previously
815 		 * held, or it will be the child of a CMP. In either
816 		 * case, the hold acquired in e_ddi_nodeid_to_dip()
817 		 * is not needed.
818 		 */
819 		ddi_release_devi(dip);
820 		ASSERT(drmach_is_cmp_child(dip) || e_ddi_branch_held(dip));
821 	}
822 
823 	return (dip);
824 }
825 
826 static dev_info_t *
827 drmach_node_ddi_get_dip(drmach_node_t *np)
828 {
829 	return ((dev_info_t *)np->here);
830 }
831 
832 static int
833 drmach_node_walk(drmach_node_t *np, void *param,
834 		int (*cb)(drmach_node_walk_args_t *args))
835 {
836 	return (np->walk(np, param, cb));
837 }
838 
839 static int
840 drmach_node_ddi_get_prop(drmach_node_t *np, char *name, void *buf, int len)
841 {
842 	int		rv = 0;
843 	dev_info_t	*ndip;
844 	static char	*fn = "drmach_node_ddi_get_prop";
845 
846 	ndip = np->n_getdip(np);
847 	if (ndip == NULL) {
848 		cmn_err(CE_WARN, "%s: NULL dip", fn);
849 		rv = -1;
850 	} else if (ddi_getlongprop_buf(DDI_DEV_T_ANY, ndip,
851 	    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, name,
852 	    (caddr_t)buf, &len) != DDI_PROP_SUCCESS) {
853 		rv = -1;
854 	}
855 
856 	return (rv);
857 }
858 
859 /* ARGSUSED */
860 static int
861 drmach_node_obp_get_prop(drmach_node_t *np, char *name, void *buf, int len)
862 {
863 	int		rv = 0;
864 	pnode_t		nodeid;
865 	static char	*fn = "drmach_node_obp_get_prop";
866 
867 	nodeid = np->get_dnode(np);
868 	if (nodeid == OBP_NONODE) {
869 		cmn_err(CE_WARN, "%s: invalid dnode", fn);
870 		rv = -1;
871 	} else if (prom_getproplen(nodeid, (caddr_t)name) < 0) {
872 		rv = -1;
873 	} else {
874 		(void) prom_getprop(nodeid, (caddr_t)name, (caddr_t)buf);
875 	}
876 
877 	return (rv);
878 }
879 
880 static int
881 drmach_node_ddi_get_proplen(drmach_node_t *np, char *name, int *len)
882 {
883 	int		rv = 0;
884 	dev_info_t	*ndip;
885 
886 	ndip = np->n_getdip(np);
887 	if (ndip == NULL) {
888 		rv = -1;
889 	} else if (ddi_getproplen(DDI_DEV_T_ANY, ndip, DDI_PROP_DONTPASS,
890 	    name, len) != DDI_PROP_SUCCESS) {
891 		rv = -1;
892 	}
893 
894 	return (rv);
895 }
896 
897 static int
898 drmach_node_obp_get_proplen(drmach_node_t *np, char *name, int *len)
899 {
900 	pnode_t	 nodeid;
901 	int	 rv;
902 
903 	nodeid = np->get_dnode(np);
904 	if (nodeid == OBP_NONODE)
905 		rv = -1;
906 	else {
907 		*len = prom_getproplen(nodeid, (caddr_t)name);
908 		rv = (*len < 0 ? -1 : 0);
909 	}
910 
911 	return (rv);
912 }
913 
914 static drmachid_t
915 drmach_node_dup(drmach_node_t *np)
916 {
917 	drmach_node_t *dup;
918 
919 	dup = drmach_node_new();
920 	dup->here = np->here;
921 	dup->get_dnode = np->get_dnode;
922 	dup->walk = np->walk;
923 	dup->n_getdip = np->n_getdip;
924 	dup->n_getproplen = np->n_getproplen;
925 	dup->n_getprop = np->n_getprop;
926 	dup->get_parent = np->get_parent;
927 
928 	return (dup);
929 }
930 
931 /*
932  * drmach_array provides convenient array construction, access,
933  * bounds checking and array destruction logic.
934  */
935 
936 static drmach_array_t *
937 drmach_array_new(int min_index, int max_index)
938 {
939 	drmach_array_t *arr;
940 
941 	arr = kmem_zalloc(sizeof (drmach_array_t), KM_SLEEP);
942 
943 	arr->arr_sz = (max_index - min_index + 1) * sizeof (void *);
944 	if (arr->arr_sz > 0) {
945 		arr->min_index = min_index;
946 		arr->max_index = max_index;
947 
948 		arr->arr = kmem_zalloc(arr->arr_sz, KM_SLEEP);
949 		return (arr);
950 	} else {
951 		kmem_free(arr, sizeof (*arr));
952 		return (0);
953 	}
954 }
955 
956 static int
957 drmach_array_set(drmach_array_t *arr, int idx, drmachid_t val)
958 {
959 	if (idx < arr->min_index || idx > arr->max_index)
960 		return (-1);
961 	else {
962 		arr->arr[idx - arr->min_index] = val;
963 		return (0);
964 	}
965 	/*NOTREACHED*/
966 }
967 
968 static int
969 drmach_array_get(drmach_array_t *arr, int idx, drmachid_t *val)
970 {
971 	if (idx < arr->min_index || idx > arr->max_index)
972 		return (-1);
973 	else {
974 		*val = arr->arr[idx - arr->min_index];
975 		return (0);
976 	}
977 	/*NOTREACHED*/
978 }
979 
980 static int
981 drmach_array_first(drmach_array_t *arr, int *idx, drmachid_t *val)
982 {
983 	int rv;
984 
985 	*idx = arr->min_index;
986 	while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
987 		*idx += 1;
988 
989 	return (rv);
990 }
991 
992 static int
993 drmach_array_next(drmach_array_t *arr, int *idx, drmachid_t *val)
994 {
995 	int rv;
996 
997 	*idx += 1;
998 	while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
999 		*idx += 1;
1000 
1001 	return (rv);
1002 }
1003 
1004 static void
1005 drmach_array_dispose(drmach_array_t *arr, void (*disposer)(drmachid_t))
1006 {
1007 	drmachid_t	val;
1008 	int		idx;
1009 	int		rv;
1010 
1011 	rv = drmach_array_first(arr, &idx, &val);
1012 	while (rv == 0) {
1013 		(*disposer)(val);
1014 
1015 		/* clear the array entry */
1016 		rv = drmach_array_set(arr, idx, NULL);
1017 		ASSERT(rv == 0);
1018 
1019 		rv = drmach_array_next(arr, &idx, &val);
1020 	}
1021 
1022 	kmem_free(arr->arr, arr->arr_sz);
1023 	kmem_free(arr, sizeof (*arr));
1024 }
1025 
1026 
1027 static gdcd_t *
1028 drmach_gdcd_new()
1029 {
1030 	gdcd_t *gdcd;
1031 
1032 	gdcd = kmem_zalloc(sizeof (gdcd_t), KM_SLEEP);
1033 
1034 	/* read the gdcd, bail if magic or ver #s are not what is expected */
1035 	if (iosram_rd(GDCD_MAGIC, 0, sizeof (gdcd_t), (caddr_t)gdcd)) {
1036 bail:
1037 		kmem_free(gdcd, sizeof (gdcd_t));
1038 		return (NULL);
1039 	} else if (gdcd->h.dcd_magic != GDCD_MAGIC) {
1040 		goto bail;
1041 	} else if (gdcd->h.dcd_version != DCD_VERSION) {
1042 		goto bail;
1043 	}
1044 
1045 	return (gdcd);
1046 }
1047 
1048 static void
1049 drmach_gdcd_dispose(gdcd_t *gdcd)
1050 {
1051 	kmem_free(gdcd, sizeof (gdcd_t));
1052 }
1053 
1054 /*ARGSUSED*/
1055 sbd_error_t *
1056 drmach_configure(drmachid_t id, int flags)
1057 {
1058 	drmach_device_t	*dp;
1059 	dev_info_t	*rdip;
1060 	sbd_error_t	*err = NULL;
1061 
1062 	/*
1063 	 * On Starcat, there is no CPU driver, so it is
1064 	 * not necessary to configure any CPU nodes.
1065 	 */
1066 	if (DRMACH_IS_CPU_ID(id)) {
1067 		return (NULL);
1068 	}
1069 
1070 	for (; id; ) {
1071 		dev_info_t	*fdip = NULL;
1072 
1073 		if (!DRMACH_IS_DEVICE_ID(id))
1074 			return (drerr_new(0, ESTC_INAPPROP, NULL));
1075 		dp = id;
1076 
1077 		rdip = dp->node->n_getdip(dp->node);
1078 
1079 		/*
1080 		 * We held this branch earlier, so at a minimum its
1081 		 * root should still be present in the device tree.
1082 		 */
1083 		ASSERT(rdip);
1084 
1085 		DRMACH_PR("drmach_configure: configuring DDI branch");
1086 
1087 		ASSERT(e_ddi_branch_held(rdip));
1088 		if (e_ddi_branch_configure(rdip, &fdip, 0) != 0) {
1089 			if (err == NULL) {
1090 				/*
1091 				 * Record first failure but don't stop
1092 				 */
1093 				char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1094 				dev_info_t *dip = (fdip != NULL) ? fdip : rdip;
1095 
1096 				(void) ddi_pathname(dip, path);
1097 				err = drerr_new(1, ESTC_DRVFAIL, path);
1098 
1099 				kmem_free(path, MAXPATHLEN);
1100 			}
1101 
1102 			/*
1103 			 * If non-NULL, fdip is returned held and must be
1104 			 * released.
1105 			 */
1106 			if (fdip != NULL) {
1107 				ddi_release_devi(fdip);
1108 			}
1109 		}
1110 
1111 		if (DRMACH_IS_MEM_ID(id)) {
1112 			drmach_mem_t	*mp = id;
1113 			id = mp->next;
1114 		} else {
1115 			id = NULL;
1116 		}
1117 	}
1118 
1119 	return (err);
1120 }
1121 
1122 static sbd_error_t *
1123 drmach_device_new(drmach_node_t *node,
1124 	drmach_board_t *bp, int portid, drmachid_t *idp)
1125 {
1126 	int		i, rv, device_id, unum;
1127 	char		name[OBP_MAXDRVNAME];
1128 	drmach_device_t	proto;
1129 
1130 	rv = node->n_getprop(node, "name", name, OBP_MAXDRVNAME);
1131 	if (rv) {
1132 		sbd_error_t *err;
1133 
1134 		/* every node is expected to have a name */
1135 		err = drerr_new(1, ESTC_GETPROP,
1136 		    "dip: 0x%p: property %s",
1137 		    node->n_getdip(node), OBP_NAME);
1138 
1139 		return (err);
1140 	}
1141 
1142 	i = drmach_name2type_idx(name);
1143 
1144 	if (i < 0 || strcmp(name, "cmp") == 0) {
1145 		/*
1146 		 * Not a node of interest to dr - including "cmp",
1147 		 * but it is in drmach_name2type[], which lets gptwocfg
1148 		 * driver to check if node is OBP created.
1149 		 */
1150 		*idp = (drmachid_t)0;
1151 		return (NULL);
1152 	}
1153 
1154 	/*
1155 	 * Derive a best-guess unit number from the portid value.
1156 	 * Some drmach_*_new constructors (drmach_pci_new, for example)
1157 	 * will overwrite the prototype unum value with one that is more
1158 	 * appropriate for the device.
1159 	 */
1160 	device_id = portid & 0x1f;
1161 	if (device_id < 4)
1162 		unum = device_id;
1163 	else if (device_id == 8) {
1164 		unum = 0;
1165 	} else if (device_id == 9) {
1166 		unum = 1;
1167 	} else if (device_id == 0x1c) {
1168 		unum = 0;
1169 	} else if (device_id == 0x1d) {
1170 		unum = 1;
1171 	} else {
1172 		return (DRMACH_INTERNAL_ERROR());
1173 	}
1174 
1175 	bzero(&proto, sizeof (proto));
1176 	proto.type = drmach_name2type[i].type;
1177 	proto.bp = bp;
1178 	proto.node = node;
1179 	proto.portid = portid;
1180 	proto.unum = unum;
1181 
1182 	return (drmach_name2type[i].new(&proto, idp));
1183 }
1184 
1185 static void
1186 drmach_device_dispose(drmachid_t id)
1187 {
1188 	drmach_device_t *self = id;
1189 
1190 	self->cm.dispose(id);
1191 }
1192 
1193 static drmach_board_t *
1194 drmach_board_new(int bnum)
1195 {
1196 	drmach_board_t	*bp;
1197 
1198 	bp = kmem_zalloc(sizeof (drmach_board_t), KM_SLEEP);
1199 
1200 	bp->cm.isa = (void *)drmach_board_new;
1201 	bp->cm.release = drmach_board_release;
1202 	bp->cm.status = drmach_board_status;
1203 
1204 	(void) drmach_board_name(bnum, bp->cm.name, sizeof (bp->cm.name));
1205 
1206 	bp->bnum = bnum;
1207 	bp->devices = NULL;
1208 	bp->tree = drmach_node_new();
1209 
1210 	(void) drmach_array_set(drmach_boards, bnum, bp);
1211 	return (bp);
1212 }
1213 
1214 static void
1215 drmach_board_dispose(drmachid_t id)
1216 {
1217 	drmach_board_t *bp;
1218 
1219 	ASSERT(DRMACH_IS_BOARD_ID(id));
1220 	bp = id;
1221 
1222 	if (bp->tree)
1223 		drmach_node_dispose(bp->tree);
1224 
1225 	if (bp->devices)
1226 		drmach_array_dispose(bp->devices, drmach_device_dispose);
1227 
1228 	kmem_free(bp, sizeof (*bp));
1229 }
1230 
1231 static sbd_error_t *
1232 drmach_board_status(drmachid_t id, drmach_status_t *stat)
1233 {
1234 	sbd_error_t	*err = NULL;
1235 	drmach_board_t	*bp;
1236 	caddr_t		obufp;
1237 	dr_showboard_t	shb;
1238 
1239 	if (!DRMACH_IS_BOARD_ID(id))
1240 		return (drerr_new(0, ESTC_INAPPROP, NULL));
1241 
1242 	bp = id;
1243 
1244 	/*
1245 	 * we need to know if the board's connected before
1246 	 * issuing a showboard message.  If it's connected, we just
1247 	 * reply with status composed of cached info
1248 	 */
1249 
1250 	if (!bp->connected) {
1251 		obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
1252 		err = drmach_mbox_trans(DRMSG_SHOWBOARD, bp->bnum, obufp,
1253 		    sizeof (dr_proto_hdr_t), (caddr_t)&shb,
1254 		    sizeof (dr_showboard_t));
1255 
1256 		kmem_free(obufp, sizeof (dr_proto_hdr_t));
1257 		if (err)
1258 			return (err);
1259 
1260 		bp->connected = (shb.bd_assigned && shb.bd_active);
1261 		(void) strncpy(bp->type, shb.board_type, sizeof (bp->type));
1262 		stat->assigned = bp->assigned = shb.bd_assigned;
1263 		stat->powered = bp->powered = shb.power_on;
1264 		stat->empty = bp->empty = shb.slot_empty;
1265 
1266 		switch (shb.test_status) {
1267 			case DR_TEST_STATUS_UNKNOWN:
1268 			case DR_TEST_STATUS_IPOST:
1269 			case DR_TEST_STATUS_ABORTED:
1270 				stat->cond = bp->cond = SBD_COND_UNKNOWN;
1271 				break;
1272 			case DR_TEST_STATUS_PASSED:
1273 				stat->cond = bp->cond = SBD_COND_OK;
1274 				break;
1275 			case DR_TEST_STATUS_FAILED:
1276 				stat->cond = bp->cond = SBD_COND_FAILED;
1277 				break;
1278 			default:
1279 				stat->cond = bp->cond = SBD_COND_UNKNOWN;
1280 				DRMACH_PR("Unknown test status=0x%x from SC\n",
1281 				    shb.test_status);
1282 				break;
1283 
1284 		}
1285 
1286 		(void) strncpy(stat->type, shb.board_type, sizeof (stat->type));
1287 		(void) snprintf(stat->info, sizeof (stat->info),
1288 		    "Test Level=%d", shb.test_level);
1289 	} else {
1290 		stat->assigned = bp->assigned;
1291 		stat->powered = bp->powered;
1292 		stat->empty = bp->empty;
1293 		stat->cond = bp->cond;
1294 		(void) strncpy(stat->type, bp->type, sizeof (stat->type));
1295 	}
1296 
1297 	stat->busy = 0;			/* assume not busy */
1298 	stat->configured = 0;		/* assume not configured */
1299 	if (bp->devices) {
1300 		int		 rv;
1301 		int		 d_idx;
1302 		drmachid_t	 d_id;
1303 
1304 		rv = drmach_array_first(bp->devices, &d_idx, &d_id);
1305 		while (rv == 0) {
1306 			drmach_status_t	d_stat;
1307 
1308 			err = drmach_i_status(d_id, &d_stat);
1309 			if (err)
1310 				break;
1311 
1312 			stat->busy |= d_stat.busy;
1313 			stat->configured |= d_stat.configured;
1314 
1315 			rv = drmach_array_next(bp->devices, &d_idx, &d_id);
1316 		}
1317 	}
1318 
1319 	return (err);
1320 }
1321 
1322 typedef struct drmach_msglist {
1323 	kcondvar_t		s_cv; 		/* condvar for sending msg */
1324 	kmutex_t		s_lock;		/* mutex for sending */
1325 	kcondvar_t		g_cv;		/* condvar for getting reply */
1326 	kmutex_t		g_lock;		/* mutex for getting reply */
1327 	struct drmach_msglist	*prev;		/* link to previous entry */
1328 	struct drmach_msglist	*next;		/* link to next entry */
1329 	struct drmach_msglist	*link;		/* link to related entry */
1330 	caddr_t			o_buf;		/* address of output buffer */
1331 	caddr_t			i_buf; 		/* address of input buffer */
1332 	uint32_t		o_buflen;	/* output buffer length */
1333 	uint32_t		i_buflen;	/* input buffer length */
1334 	uint32_t		msgid;		/* message identifier */
1335 	int			o_nretry;	/* number of sending retries */
1336 	int			f_error;	/* mailbox framework error */
1337 	uint8_t			e_code;		/* error code returned by SC */
1338 	uint8_t			p_flag	:1,	/* successfully putmsg */
1339 				m_reply	:1,	/* msg reply received */
1340 				unused	:6;
1341 } drmach_msglist_t;
1342 
1343 kmutex_t		drmach_g_mbox_mutex;	/* mutex for mailbox globals */
1344 kmutex_t		drmach_ri_mbox_mutex;	/* mutex for mailbox reinit */
1345 kmutex_t		drmach_msglist_mutex;	/* mutex for message list */
1346 drmach_msglist_t	*drmach_msglist_first;	/* first entry in msg list */
1347 drmach_msglist_t	*drmach_msglist_last;	/* last entry in msg list */
1348 uint32_t		drmach_msgid;		/* current message id */
1349 kthread_t		*drmach_getmsg_thread;	/* ptr to getmsg thread */
1350 volatile int		drmach_getmsg_thread_run; /* run flag for getmsg thr */
1351 kmutex_t		drmach_sendmsg_mutex;	/* mutex for sendmsg cv */
1352 kcondvar_t		drmach_sendmsg_cv;	/* signaled to send new msg */
1353 kthread_t		*drmach_sendmsg_thread; /* ptr to sendmsg thread */
1354 volatile int		drmach_sendmsg_thread_run; /* run flag for sendmsg */
1355 int			drmach_mbox_istate;	/* mailbox init state */
1356 int			drmach_mbox_iflag;	/* set if init'd with SC */
1357 int			drmach_mbox_ipending;	/* set if reinit scheduled */
1358 
1359 /*
1360  * Timeout values (in seconds) used when waiting for replies (from the SC) to
1361  * requests that we sent.  Since we only receive boardevent messages, and they
1362  * are events rather than replies, there is no boardevent timeout.
1363  */
1364 int	drmach_to_mbxinit	= 60;		/* 1 minute */
1365 int	drmach_to_assign	= 60;		/* 1 minute */
1366 int	drmach_to_unassign	= 60;		/* 1 minute */
1367 int	drmach_to_claim		= 3600;		/* 1 hour */
1368 int	drmach_to_unclaim	= 3600;		/* 1 hour */
1369 int	drmach_to_poweron	= 480;		/* 8 minutes */
1370 int	drmach_to_poweroff	= 480;		/* 8 minutes */
1371 int	drmach_to_testboard	= 43200;	/* 12 hours */
1372 int	drmach_to_aborttest	= 180;		/* 3 minutes */
1373 int	drmach_to_showboard	= 180;		/* 3 minutes */
1374 int	drmach_to_unconfig	= 180;		/* 3 minutes */
1375 
1376 /*
1377  * Delay (in seconds) used after receiving a non-transient error indication from
1378  * an mboxsc_getmsg call in the thread that loops waiting for incoming messages.
1379  */
1380 int	drmach_mbxerr_delay	= 15;		/* 15 seconds */
1381 
1382 /*
1383  * Timeout values (in milliseconds) for mboxsc_putmsg and mboxsc_getmsg calls.
1384  */
1385 clock_t	drmach_to_putmsg;			/* set in drmach_mbox_init */
1386 clock_t	drmach_to_getmsg	= 31000;	/* 31 seconds */
1387 
1388 /*
1389  * Normally, drmach_to_putmsg is set dynamically during initialization in
1390  * drmach_mbox_init.  This has the potentially undesirable side effect of
1391  * clobbering any value that might have been set in /etc/system.  To prevent
1392  * dynamic setting of drmach_to_putmsg (thereby allowing it to be tuned in
1393  * /etc/system), set drmach_use_tuned_putmsg_to to 1.
1394  */
1395 int	drmach_use_tuned_putmsg_to	= 0;
1396 
1397 
1398 /* maximum conceivable message size for future mailbox protocol versions */
1399 #define	DRMACH_MAX_MBOX_MSG_SIZE	4096
1400 
1401 /*ARGSUSED*/
1402 void
1403 drmach_mbox_prmsg(dr_mbox_msg_t *mbp, int dir)
1404 {
1405 	int		i, j;
1406 	dr_memregs_t	*memregs;
1407 	dr_proto_hdr_t	*php = &mbp->p_hdr;
1408 	dr_msg_t	*mp = &mbp->msgdata;
1409 
1410 #ifdef DEBUG
1411 	switch (php->command) {
1412 		case DRMSG_BOARDEVENT:
1413 			if (dir) {
1414 				DRMACH_PR("ERROR!! outgoing BOARDEVENT\n");
1415 			} else {
1416 				DRMACH_PR("BOARDEVENT received:\n");
1417 				DRMACH_PR("init=%d ins=%d rem=%d asgn=%d\n",
1418 				    mp->dm_be.initialized,
1419 				    mp->dm_be.board_insertion,
1420 				    mp->dm_be.board_removal,
1421 				    mp->dm_be.slot_assign);
1422 				DRMACH_PR("unasgn=%d avail=%d unavail=%d\n",
1423 				    mp->dm_be.slot_unassign,
1424 				    mp->dm_be.slot_avail,
1425 				    mp->dm_be.slot_unavail);
1426 			}
1427 			break;
1428 		case DRMSG_MBOX_INIT:
1429 			if (dir) {
1430 				DRMACH_PR("MBOX_INIT Request:\n");
1431 			} else {
1432 				DRMACH_PR("MBOX_INIT Reply:\n");
1433 			}
1434 			break;
1435 		case DRMSG_ASSIGN:
1436 			if (dir) {
1437 				DRMACH_PR("ASSIGN Request:\n");
1438 			} else {
1439 				DRMACH_PR("ASSIGN Reply:\n");
1440 			}
1441 			break;
1442 		case DRMSG_UNASSIGN:
1443 			if (dir) {
1444 				DRMACH_PR("UNASSIGN Request:\n");
1445 			} else {
1446 				DRMACH_PR("UNASSIGN Reply:\n");
1447 			}
1448 			break;
1449 		case DRMSG_CLAIM:
1450 			if (!dir) {
1451 				DRMACH_PR("CLAIM Reply:\n");
1452 				break;
1453 			}
1454 
1455 			DRMACH_PR("CLAIM Request:\n");
1456 			for (i = 0; i < 18; ++i) {
1457 				DRMACH_PR("exp%d: val=%d slice=0x%x\n", i,
1458 				    mp->dm_cr.mem_slice[i].valid,
1459 				    mp->dm_cr.mem_slice[i].slice);
1460 				memregs = &(mp->dm_cr.mem_regs[i]);
1461 				for (j = 0; j < S0_LPORT_COUNT; j++) {
1462 					DRMACH_PR("  MC %2d: "
1463 					    "MADR[%d] = 0x%lx, "
1464 					    "MADR[%d] = 0x%lx\n", j,
1465 					    0, DRMACH_MCREG_TO_U64(
1466 					    memregs->madr[j][0]),
1467 					    1, DRMACH_MCREG_TO_U64(
1468 					    memregs->madr[j][1]));
1469 					DRMACH_PR("       : "
1470 					    "MADR[%d] = 0x%lx, "
1471 					    "MADR[%d] = 0x%lx\n",
1472 					    2, DRMACH_MCREG_TO_U64(
1473 					    memregs->madr[j][2]),
1474 					    3, DRMACH_MCREG_TO_U64(
1475 					    memregs->madr[j][3]));
1476 				}
1477 			}
1478 			break;
1479 		case DRMSG_UNCLAIM:
1480 			if (!dir) {
1481 				DRMACH_PR("UNCLAIM Reply:\n");
1482 				break;
1483 			}
1484 
1485 			DRMACH_PR("UNCLAIM Request:\n");
1486 			for (i = 0; i < 18; ++i) {
1487 				DRMACH_PR("exp%d: val=%d slice=0x%x\n", i,
1488 				    mp->dm_ur.mem_slice[i].valid,
1489 				    mp->dm_ur.mem_slice[i].slice);
1490 				memregs = &(mp->dm_ur.mem_regs[i]);
1491 				for (j = 0; j < S0_LPORT_COUNT; j++) {
1492 					DRMACH_PR("  MC %2d: "
1493 					    "MADR[%d] = 0x%lx, "
1494 					    "MADR[%d] = 0x%lx\n", j,
1495 					    0, DRMACH_MCREG_TO_U64(
1496 					    memregs->madr[j][0]),
1497 					    1, DRMACH_MCREG_TO_U64(
1498 					    memregs->madr[j][1]));
1499 					DRMACH_PR("       : "
1500 					    "MADR[%d] = 0x%lx, "
1501 					    "MADR[%d] = 0x%lx\n",
1502 					    2, DRMACH_MCREG_TO_U64(
1503 					    memregs->madr[j][2]),
1504 					    3, DRMACH_MCREG_TO_U64(
1505 					    memregs->madr[j][3]));
1506 				}
1507 			}
1508 			DRMACH_PR(" mem_clear=%d\n", mp->dm_ur.mem_clear);
1509 			break;
1510 		case DRMSG_UNCONFIG:
1511 			if (!dir) {
1512 				DRMACH_PR("UNCONFIG Reply:\n");
1513 				break;
1514 			}
1515 
1516 			DRMACH_PR("UNCONFIG Request:\n");
1517 			for (i = 0; i < 18; ++i) {
1518 				DRMACH_PR("exp%d: val=%d slice=0x%x\n", i,
1519 				    mp->dm_uc.mem_slice[i].valid,
1520 				    mp->dm_uc.mem_slice[i].slice);
1521 				memregs = &(mp->dm_uc.mem_regs[i]);
1522 				for (j = 0; j < S0_LPORT_COUNT; j++) {
1523 					DRMACH_PR("  MC %2d: "
1524 					    "MADR[%d] = 0x%lx, "
1525 					    "MADR[%d] = 0x%lx\n", j,
1526 					    0, DRMACH_MCREG_TO_U64(
1527 					    memregs->madr[j][0]),
1528 					    1, DRMACH_MCREG_TO_U64(
1529 					    memregs->madr[j][1]));
1530 					DRMACH_PR("       : "
1531 					    "MADR[%d] = 0x%lx, "
1532 					    "MADR[%d] = 0x%lx\n",
1533 					    2, DRMACH_MCREG_TO_U64(
1534 					    memregs->madr[j][2]),
1535 					    3, DRMACH_MCREG_TO_U64(
1536 					    memregs->madr[j][3]));
1537 				}
1538 			}
1539 			break;
1540 		case DRMSG_POWERON:
1541 			if (dir) {
1542 				DRMACH_PR("POWERON Request:\n");
1543 			} else {
1544 				DRMACH_PR("POWERON Reply:\n");
1545 			}
1546 			break;
1547 		case DRMSG_POWEROFF:
1548 			if (dir) {
1549 				DRMACH_PR("POWEROFF Request:\n");
1550 			} else {
1551 				DRMACH_PR("POWEROFF Reply:\n");
1552 			}
1553 			break;
1554 		case DRMSG_TESTBOARD:
1555 			if (dir) {
1556 				DRMACH_PR("TESTBOARD Request:\n");
1557 				DRMACH_PR("\tmemaddrhi=0x%x memaddrlo=0x%x ",
1558 				    mp->dm_tb.memaddrhi,
1559 				    mp->dm_tb.memaddrlo);
1560 				DRMACH_PR("memlen=0x%x cpu_portid=0x%x\n",
1561 				    mp->dm_tb.memlen, mp->dm_tb.cpu_portid);
1562 				DRMACH_PR("\tforce=0x%x imm=0x%x\n",
1563 				    mp->dm_tb.force, mp->dm_tb.immediate);
1564 			} else {
1565 				DRMACH_PR("TESTBOARD Reply:\n");
1566 				DRMACH_PR("\tmemaddrhi=0x%x memaddrlo=0x%x ",
1567 				    mp->dm_tr.memaddrhi,
1568 				    mp->dm_tr.memaddrlo);
1569 				DRMACH_PR("memlen=0x%x cpu_portid=0x%x\n",
1570 				    mp->dm_tr.memlen, mp->dm_tr.cpu_portid);
1571 				DRMACH_PR("\trecovered=0x%x test status=0x%x\n",
1572 				    mp->dm_tr.cpu_recovered,
1573 				    mp->dm_tr.test_status);
1574 
1575 			}
1576 			break;
1577 		case DRMSG_ABORT_TEST:
1578 			if (dir) {
1579 				DRMACH_PR("ABORT_TEST Request:\n");
1580 			} else {
1581 				DRMACH_PR("ABORT_TEST Reply:\n");
1582 			}
1583 
1584 			DRMACH_PR("\tmemaddrhi=0x%x memaddrlo=0x%x ",
1585 			    mp->dm_ta.memaddrhi,
1586 			    mp->dm_ta.memaddrlo);
1587 			DRMACH_PR("memlen=0x%x cpu_portid=0x%x\n",
1588 			    mp->dm_ta.memlen, mp->dm_ta.cpu_portid);
1589 			break;
1590 		case DRMSG_SHOWBOARD:
1591 			if (dir) {
1592 				DRMACH_PR("SHOWBOARD Request:\n");
1593 			} else {
1594 				DRMACH_PR("SHOWBOARD Reply:\n");
1595 
1596 				DRMACH_PR(": empty=%d power=%d assigned=%d",
1597 				    mp->dm_sb.slot_empty,
1598 				    mp->dm_sb.power_on,
1599 				    mp->dm_sb.bd_assigned);
1600 				DRMACH_PR(": active=%d t_status=%d t_level=%d ",
1601 				    mp->dm_sb.bd_active,
1602 				    mp->dm_sb.test_status,
1603 				    mp->dm_sb.test_level);
1604 				DRMACH_PR(": type=%s ", mp->dm_sb.board_type);
1605 			}
1606 			break;
1607 		default:
1608 			DRMACH_PR("Unknown message type\n");
1609 			break;
1610 	}
1611 
1612 	DRMACH_PR("dr hdr:\n\tid=0x%x vers=0x%x cmd=0x%x exp=0x%x slot=0x%x\n",
1613 	    php->message_id, php->drproto_version, php->command,
1614 	    php->expbrd, php->slot);
1615 #endif
1616 	DRMACH_PR("\treply_status=0x%x error_code=0x%x\n", php->reply_status,
1617 	    php->error_code);
1618 }
1619 
1620 /*
1621  * Callback function passed to taskq_dispatch when a mailbox reinitialization
1622  * handshake needs to be scheduled.  The handshake can't be performed by the
1623  * thread that determines it is needed, in most cases, so this function is
1624  * dispatched on the system-wide taskq pool of threads.  Failure is reported but
1625  * otherwise ignored, since any situation that requires a mailbox initialization
1626  * handshake will continue to request the handshake until it succeeds.
1627  */
1628 static void
1629 drmach_mbox_reinit(void *unused)
1630 {
1631 	_NOTE(ARGUNUSED(unused))
1632 
1633 	caddr_t		obufp = NULL;
1634 	sbd_error_t	*serr = NULL;
1635 
1636 	DRMACH_PR("scheduled mailbox reinit running\n");
1637 
1638 	mutex_enter(&drmach_ri_mbox_mutex);
1639 	mutex_enter(&drmach_g_mbox_mutex);
1640 	if (drmach_mbox_iflag == 0) {
1641 		/* need to initialize the mailbox */
1642 		mutex_exit(&drmach_g_mbox_mutex);
1643 
1644 		cmn_err(CE_NOTE, "!reinitializing DR mailbox");
1645 		obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
1646 		serr = drmach_mbox_trans(DRMSG_MBOX_INIT, 0, obufp,
1647 		    sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
1648 		kmem_free(obufp, sizeof (dr_proto_hdr_t));
1649 
1650 		if (serr) {
1651 			cmn_err(CE_WARN,
1652 			    "mbox_init: MBOX_INIT failed ecode=0x%x",
1653 			    serr->e_code);
1654 			sbd_err_clear(&serr);
1655 		}
1656 		mutex_enter(&drmach_g_mbox_mutex);
1657 		if (!serr) {
1658 			drmach_mbox_iflag = 1;
1659 		}
1660 	}
1661 	drmach_mbox_ipending = 0;
1662 	mutex_exit(&drmach_g_mbox_mutex);
1663 	mutex_exit(&drmach_ri_mbox_mutex);
1664 }
1665 
1666 /*
1667  * To ensure sufficient compatibility with future versions of the DR mailbox
1668  * protocol, we use a buffer that is large enough to receive the largest message
1669  * that could possibly be sent to us.  However, since that ends up being fairly
1670  * large, allocating it on the stack is a bad idea.  Fortunately, this function
1671  * does not need to be MT-safe since it is only invoked by the mailbox
1672  * framework, which will never invoke it multiple times concurrently.  Since
1673  * that is the case, we can use a static buffer.
1674  */
1675 void
1676 drmach_mbox_event(void)
1677 {
1678 	static uint8_t	buf[DRMACH_MAX_MBOX_MSG_SIZE];
1679 	dr_mbox_msg_t	*msg = (dr_mbox_msg_t *)buf;
1680 	int		err;
1681 	uint32_t	type = MBOXSC_MSG_EVENT;
1682 	uint32_t	command = DRMSG_BOARDEVENT;
1683 	uint64_t	transid = 0;
1684 	uint32_t	length = DRMACH_MAX_MBOX_MSG_SIZE;
1685 	char		*hint = "";
1686 	int		logsys = 0;
1687 
1688 	do {
1689 		err = mboxsc_getmsg(KEY_SCDR, &type, &command, &transid,
1690 		    &length, (void *)msg, 0);
1691 	} while (err == EAGAIN);
1692 
1693 	/* don't try to interpret anything with the wrong version number */
1694 	if ((err == 0) && (msg->p_hdr.drproto_version != DRMBX_VERSION)) {
1695 		cmn_err(CE_WARN, "mailbox version mismatch 0x%x vs 0x%x",
1696 		    msg->p_hdr.drproto_version, DRMBX_VERSION);
1697 		mutex_enter(&drmach_g_mbox_mutex);
1698 		drmach_mbox_iflag = 0;
1699 		/* schedule a reinit handshake if one isn't pending */
1700 		if (!drmach_mbox_ipending) {
1701 			if (taskq_dispatch(system_taskq, drmach_mbox_reinit,
1702 			    NULL, TQ_NOSLEEP) != NULL) {
1703 				drmach_mbox_ipending = 1;
1704 			} else {
1705 				cmn_err(CE_WARN,
1706 				    "failed to schedule mailbox reinit");
1707 			}
1708 		}
1709 		mutex_exit(&drmach_g_mbox_mutex);
1710 		return;
1711 	}
1712 
1713 	if ((err != 0) || (msg->p_hdr.reply_status != DRMSG_REPLY_OK)) {
1714 		cmn_err(CE_WARN,
1715 		    "Unsolicited mboxsc_getmsg failed: err=0x%x code=0x%x",
1716 		    err, msg->p_hdr.error_code);
1717 	} else {
1718 		dr_boardevent_t	*be;
1719 		be = (dr_boardevent_t *)&msg->msgdata;
1720 
1721 		/* check for initialization event */
1722 		if (be->initialized) {
1723 			mutex_enter(&drmach_g_mbox_mutex);
1724 			drmach_mbox_iflag = 0;
1725 			/* schedule a reinit handshake if one isn't pending */
1726 			if (!drmach_mbox_ipending) {
1727 				if (taskq_dispatch(system_taskq,
1728 				    drmach_mbox_reinit, NULL, TQ_NOSLEEP)
1729 				    != NULL) {
1730 					drmach_mbox_ipending = 1;
1731 				} else {
1732 					cmn_err(CE_WARN, "failed to schedule "
1733 					    "mailbox reinit");
1734 				}
1735 			}
1736 			mutex_exit(&drmach_g_mbox_mutex);
1737 			cmn_err(CE_NOTE, "!Mailbox Init event received");
1738 		}
1739 
1740 		/* anything else will be a log_sysevent call */
1741 
1742 		if (be->board_insertion) {
1743 			DRMACH_PR("Board Insertion event received");
1744 			hint = DR_HINT_INSERT;
1745 			logsys++;
1746 	}
1747 		if (be->board_removal) {
1748 			DRMACH_PR("Board Removal event received");
1749 			hint = DR_HINT_REMOVE;
1750 			logsys++;
1751 		}
1752 		if (be->slot_assign) {
1753 			DRMACH_PR("Slot Assign event received");
1754 			logsys++;
1755 		}
1756 		if (be->slot_unassign) {
1757 			DRMACH_PR("Slot Unassign event received");
1758 			logsys++;
1759 		}
1760 		if (be->slot_avail) {
1761 			DRMACH_PR("Slot Available event received");
1762 			logsys++;
1763 		}
1764 		if (be->slot_unavail) {
1765 			DRMACH_PR("Slot Unavailable event received");
1766 			logsys++;
1767 		}
1768 		if (be->power_on) {
1769 			DRMACH_PR("Power ON event received");
1770 			logsys++;
1771 		}
1772 		if (be->power_off) {
1773 			DRMACH_PR("Power OFF event received");
1774 			logsys++;
1775 		}
1776 
1777 		if (logsys)
1778 			(void) drmach_log_sysevent(
1779 			    DRMACH_EXPSLOT2BNUM(msg->p_hdr.expbrd,
1780 			    msg->p_hdr.slot), hint, SE_NOSLEEP, 1);
1781 	}
1782 }
1783 
1784 static uint32_t
1785 drmach_get_msgid()
1786 {
1787 	uint32_t	rv;
1788 	mutex_enter(&drmach_msglist_mutex);
1789 	if (!(++drmach_msgid))
1790 		++drmach_msgid;
1791 	rv = drmach_msgid;
1792 	mutex_exit(&drmach_msglist_mutex);
1793 	return (rv);
1794 }
1795 
1796 /*
1797  *	unlink an entry from the message transaction list
1798  *
1799  *	caller must hold drmach_msglist_mutex
1800  */
1801 void
1802 drmach_msglist_unlink(drmach_msglist_t *entry)
1803 {
1804 	ASSERT(mutex_owned(&drmach_msglist_mutex));
1805 	if (entry->prev) {
1806 		entry->prev->next = entry->next;
1807 		if (entry->next)
1808 			entry->next->prev = entry->prev;
1809 	} else {
1810 		drmach_msglist_first = entry->next;
1811 		if (entry->next)
1812 			entry->next->prev = NULL;
1813 	}
1814 	if (entry == drmach_msglist_last) {
1815 		drmach_msglist_last = entry->prev;
1816 	}
1817 }
1818 
1819 void
1820 drmach_msglist_link(drmach_msglist_t *entry)
1821 {
1822 	mutex_enter(&drmach_msglist_mutex);
1823 	if (drmach_msglist_last) {
1824 		entry->prev = drmach_msglist_last;
1825 		drmach_msglist_last->next = entry;
1826 		drmach_msglist_last = entry;
1827 	} else {
1828 		drmach_msglist_last = drmach_msglist_first = entry;
1829 	}
1830 	mutex_exit(&drmach_msglist_mutex);
1831 }
1832 
1833 void
1834 drmach_mbox_getmsg()
1835 {
1836 	int			err;
1837 	register int		msgid;
1838 	static uint8_t		buf[DRMACH_MAX_MBOX_MSG_SIZE];
1839 	dr_mbox_msg_t		*msg = (dr_mbox_msg_t *)buf;
1840 	dr_proto_hdr_t		*php;
1841 	drmach_msglist_t	*found, *entry;
1842 	uint32_t		type = MBOXSC_MSG_REPLY;
1843 	uint32_t		command;
1844 	uint64_t		transid;
1845 	uint32_t		length;
1846 
1847 	php = &msg->p_hdr;
1848 
1849 	while (drmach_getmsg_thread_run != 0) {
1850 		/* get a reply message */
1851 		command = 0;
1852 		transid = 0;
1853 		length = DRMACH_MAX_MBOX_MSG_SIZE;
1854 		err = mboxsc_getmsg(KEY_SCDR, &type, &command, &transid,
1855 		    &length, (void *)msg, drmach_to_getmsg);
1856 
1857 		if (err) {
1858 			/*
1859 			 * If mboxsc_getmsg returns ETIMEDOUT or EAGAIN, then
1860 			 * the "error" is really just a normal, transient
1861 			 * condition and we can retry the operation right away.
1862 			 * Any other error suggests a more serious problem,
1863 			 * ranging from a message being too big for our buffer
1864 			 * (EMSGSIZE) to total failure of the mailbox layer.
1865 			 * This second class of errors is much less "transient",
1866 			 * so rather than retrying over and over (and getting
1867 			 * the same error over and over) as fast as we can,
1868 			 * we'll sleep for a while before retrying.
1869 			 */
1870 			if ((err != ETIMEDOUT) && (err != EAGAIN)) {
1871 				cmn_err(CE_WARN,
1872 				    "mboxsc_getmsg failed, err=0x%x", err);
1873 				delay(drmach_mbxerr_delay * hz);
1874 			}
1875 			continue;
1876 		}
1877 
1878 		drmach_mbox_prmsg(msg, 0);
1879 
1880 		if (php->drproto_version != DRMBX_VERSION) {
1881 			cmn_err(CE_WARN,
1882 			    "mailbox version mismatch 0x%x vs 0x%x",
1883 			    php->drproto_version, DRMBX_VERSION);
1884 
1885 			mutex_enter(&drmach_g_mbox_mutex);
1886 			drmach_mbox_iflag = 0;
1887 			/* schedule a reinit handshake if one isn't pending */
1888 			if (!drmach_mbox_ipending) {
1889 				if (taskq_dispatch(system_taskq,
1890 				    drmach_mbox_reinit, NULL, TQ_NOSLEEP)
1891 				    != NULL) {
1892 					drmach_mbox_ipending = 1;
1893 				} else {
1894 					cmn_err(CE_WARN, "failed to schedule "
1895 					    "mailbox reinit");
1896 				}
1897 			}
1898 			mutex_exit(&drmach_g_mbox_mutex);
1899 
1900 			continue;
1901 		}
1902 
1903 		msgid = php->message_id;
1904 		found = NULL;
1905 		mutex_enter(&drmach_msglist_mutex);
1906 		entry = drmach_msglist_first;
1907 		while (entry != NULL) {
1908 			if (entry->msgid == msgid) {
1909 				found = entry;
1910 				drmach_msglist_unlink(entry);
1911 				entry = NULL;
1912 			} else
1913 				entry = entry->next;
1914 		}
1915 
1916 		if (found) {
1917 			mutex_enter(&found->g_lock);
1918 
1919 			found->e_code = php->error_code;
1920 			if (found->i_buflen > 0)
1921 				bcopy((caddr_t)&msg->msgdata, found->i_buf,
1922 				    found->i_buflen);
1923 			found->m_reply = 1;
1924 
1925 			cv_signal(&found->g_cv);
1926 			mutex_exit(&found->g_lock);
1927 		} else {
1928 			cmn_err(CE_WARN, "!mbox_getmsg: no match for id 0x%x",
1929 			    msgid);
1930 			cmn_err(CE_WARN, "!    cmd = 0x%x, exb = %d, slot = %d",
1931 			    php->command, php->expbrd, php->slot);
1932 		}
1933 
1934 		mutex_exit(&drmach_msglist_mutex);
1935 	}
1936 	cmn_err(CE_WARN, "mbox_getmsg: exiting");
1937 	mutex_enter(&drmach_msglist_mutex);
1938 	entry = drmach_msglist_first;
1939 	while (entry != NULL) {
1940 		if (entry->p_flag == 1) {
1941 			entry->f_error = -1;
1942 			mutex_enter(&entry->g_lock);
1943 			cv_signal(&entry->g_cv);
1944 			mutex_exit(&entry->g_lock);
1945 			drmach_msglist_unlink(entry);
1946 		}
1947 		entry = entry->next;
1948 	}
1949 	mutex_exit(&drmach_msglist_mutex);
1950 	drmach_getmsg_thread_run = -1;
1951 	thread_exit();
1952 }
1953 
1954 void
1955 drmach_mbox_sendmsg()
1956 {
1957 	int		err, retry;
1958 	drmach_msglist_t *entry;
1959 	dr_mbox_msg_t   *mp;
1960 	dr_proto_hdr_t  *php;
1961 
1962 	while (drmach_sendmsg_thread_run != 0) {
1963 		/*
1964 		 * Search through the list to find entries awaiting
1965 		 * transmission to the SC
1966 		 */
1967 		mutex_enter(&drmach_msglist_mutex);
1968 		entry = drmach_msglist_first;
1969 		retry = 0;
1970 		while (entry != NULL) {
1971 			if (entry->p_flag == 1) {
1972 				entry = entry->next;
1973 				continue;
1974 			}
1975 
1976 			mutex_exit(&drmach_msglist_mutex);
1977 
1978 			if (!retry)
1979 				mutex_enter(&entry->s_lock);
1980 			mp = (dr_mbox_msg_t *)entry->o_buf;
1981 			php = &mp->p_hdr;
1982 
1983 			drmach_mbox_prmsg(mp, 1);
1984 
1985 			err = mboxsc_putmsg(KEY_DRSC, MBOXSC_MSG_REQUEST,
1986 			    php->command, NULL, entry->o_buflen, (void *)mp,
1987 			    drmach_to_putmsg);
1988 
1989 			if (err) {
1990 				switch (err) {
1991 
1992 				case EAGAIN:
1993 				case EBUSY:
1994 					++retry;
1995 					mutex_enter(&drmach_msglist_mutex);
1996 					continue;
1997 
1998 				case ETIMEDOUT:
1999 					if (--entry->o_nretry <= 0) {
2000 						mutex_enter(
2001 						    &drmach_msglist_mutex);
2002 						drmach_msglist_unlink(entry);
2003 						mutex_exit(
2004 						    &drmach_msglist_mutex);
2005 						entry->f_error = err;
2006 						entry->p_flag = 1;
2007 						cv_signal(&entry->s_cv);
2008 					} else {
2009 						++retry;
2010 						mutex_enter(
2011 						    &drmach_msglist_mutex);
2012 						continue;
2013 					}
2014 					break;
2015 				default:
2016 					mutex_enter(&drmach_msglist_mutex);
2017 					drmach_msglist_unlink(entry);
2018 					mutex_exit(&drmach_msglist_mutex);
2019 					entry->f_error = err;
2020 					entry->p_flag = 1;
2021 					cv_signal(&entry->s_cv);
2022 					break;
2023 				}
2024 			} else {
2025 				entry->p_flag = 1;
2026 				cv_signal(&entry->s_cv);
2027 			}
2028 
2029 			mutex_exit(&entry->s_lock);
2030 			retry = 0;
2031 			mutex_enter(&drmach_msglist_mutex);
2032 			entry = drmach_msglist_first;
2033 		}
2034 		mutex_exit(&drmach_msglist_mutex);
2035 
2036 		mutex_enter(&drmach_sendmsg_mutex);
2037 		(void) cv_reltimedwait(&drmach_sendmsg_cv,
2038 		    &drmach_sendmsg_mutex, (5 * hz), TR_CLOCK_TICK);
2039 		mutex_exit(&drmach_sendmsg_mutex);
2040 	}
2041 	cmn_err(CE_WARN, "mbox_sendmsg: exiting");
2042 	mutex_enter(&drmach_msglist_mutex);
2043 	entry = drmach_msglist_first;
2044 	while (entry != NULL) {
2045 		if (entry->p_flag == 0) {
2046 			entry->f_error = -1;
2047 			mutex_enter(&entry->s_lock);
2048 			cv_signal(&entry->s_cv);
2049 			mutex_exit(&entry->s_lock);
2050 			drmach_msglist_unlink(entry);
2051 		}
2052 		entry = entry->next;
2053 	}
2054 	mutex_exit(&drmach_msglist_mutex);
2055 	cv_destroy(&drmach_sendmsg_cv);
2056 	mutex_destroy(&drmach_sendmsg_mutex);
2057 
2058 	drmach_sendmsg_thread_run = -1;
2059 	thread_exit();
2060 }
2061 
2062 void
2063 drmach_msglist_destroy(drmach_msglist_t *listp)
2064 {
2065 	if (listp != NULL) {
2066 		drmach_msglist_t	*entry;
2067 
2068 		mutex_enter(&drmach_msglist_mutex);
2069 		entry = drmach_msglist_first;
2070 		while (entry) {
2071 			if (listp == entry) {
2072 				drmach_msglist_unlink(listp);
2073 				entry = NULL;
2074 			} else
2075 				entry = entry->next;
2076 		}
2077 
2078 		mutex_destroy(&listp->s_lock);
2079 		cv_destroy(&listp->s_cv);
2080 		mutex_destroy(&listp->g_lock);
2081 		cv_destroy(&listp->g_cv);
2082 		kmem_free(listp, sizeof (drmach_msglist_t));
2083 
2084 		mutex_exit(&drmach_msglist_mutex);
2085 	}
2086 }
2087 
2088 static drmach_msglist_t	*
2089 drmach_msglist_new(caddr_t ibufp, uint32_t ilen, dr_proto_hdr_t *hdrp,
2090 	uint32_t olen, int nrtry)
2091 {
2092 	drmach_msglist_t	*listp;
2093 
2094 	listp = kmem_zalloc(sizeof (drmach_msglist_t), KM_SLEEP);
2095 	mutex_init(&listp->s_lock, NULL, MUTEX_DRIVER, NULL);
2096 	cv_init(&listp->s_cv, NULL, CV_DRIVER, NULL);
2097 	mutex_init(&listp->g_lock, NULL, MUTEX_DRIVER, NULL);
2098 	cv_init(&listp->g_cv, NULL, CV_DRIVER, NULL);
2099 	listp->o_buf = (caddr_t)hdrp;
2100 	listp->o_buflen = olen;
2101 	listp->i_buf = ibufp;
2102 	listp->i_buflen = ilen;
2103 	listp->o_nretry = nrtry;
2104 	listp->msgid = hdrp->message_id;
2105 
2106 	return (listp);
2107 }
2108 
2109 static drmach_msglist_t *
2110 drmach_mbox_req_rply(dr_proto_hdr_t *hdrp, uint32_t olen, caddr_t ibufp,
2111 	uint32_t ilen, int timeout, int nrtry, int nosig,
2112 	drmach_msglist_t *link)
2113 {
2114 	int		crv;
2115 	drmach_msglist_t *listp;
2116 	clock_t		to_val;
2117 	dr_proto_hdr_t	*php;
2118 
2119 	/* setup transaction list entry */
2120 	listp = drmach_msglist_new(ibufp, ilen, hdrp, olen, nrtry);
2121 
2122 	/* send mailbox message, await reply */
2123 	mutex_enter(&listp->s_lock);
2124 	mutex_enter(&listp->g_lock);
2125 
2126 	listp->link = link;
2127 	drmach_msglist_link(listp);
2128 
2129 	mutex_enter(&drmach_sendmsg_mutex);
2130 	cv_signal(&drmach_sendmsg_cv);
2131 	mutex_exit(&drmach_sendmsg_mutex);
2132 
2133 	while (listp->p_flag == 0) {
2134 		cv_wait(&listp->s_cv, &listp->s_lock);
2135 	}
2136 
2137 	to_val = ddi_get_lbolt() + (timeout * hz);
2138 
2139 	if (listp->f_error) {
2140 		listp->p_flag = 0;
2141 		cmn_err(CE_WARN, "!mboxsc_putmsg failed: 0x%x", listp->f_error);
2142 		php = (dr_proto_hdr_t *)listp->o_buf;
2143 		cmn_err(CE_WARN, "!    cmd = 0x%x, exb = %d, slot = %d",
2144 		    php->command, php->expbrd, php->slot);
2145 	} else {
2146 		while (listp->m_reply == 0 && listp->f_error == 0) {
2147 			if (nosig)
2148 				crv = cv_timedwait(&listp->g_cv, &listp->g_lock,
2149 				    to_val);
2150 			else
2151 				crv = cv_timedwait_sig(&listp->g_cv,
2152 				    &listp->g_lock, to_val);
2153 			switch (crv) {
2154 				case -1: /* timed out */
2155 					cmn_err(CE_WARN,
2156 					    "!msgid=0x%x reply timed out",
2157 					    hdrp->message_id);
2158 					php = (dr_proto_hdr_t *)listp->o_buf;
2159 					cmn_err(CE_WARN, "!    cmd = 0x%x, "
2160 					    "exb = %d, slot = %d", php->command,
2161 					    php->expbrd, php->slot);
2162 					listp->f_error = ETIMEDOUT;
2163 					break;
2164 				case 0: /* signal received */
2165 					cmn_err(CE_WARN,
2166 					    "operation interrupted by signal");
2167 					listp->f_error = EINTR;
2168 					break;
2169 				default:
2170 					break;
2171 				}
2172 		}
2173 
2174 		/*
2175 		 * If link is set for this entry, check to see if
2176 		 * the linked entry has been replied to.  If not,
2177 		 * wait for the response.
2178 		 * Currently, this is only used for ABORT_TEST functionality,
2179 		 * wherein a check is made for the TESTBOARD reply when
2180 		 * the ABORT_TEST reply is received.
2181 		 */
2182 
2183 		if (link) {
2184 			mutex_enter(&link->g_lock);
2185 			/*
2186 			 * If the reply to the linked entry hasn't been
2187 			 * received, clear the existing link->f_error,
2188 			 * and await the reply.
2189 			 */
2190 			if (link->m_reply == 0) {
2191 				link->f_error = 0;
2192 			}
2193 			to_val =  ddi_get_lbolt() + (timeout * hz);
2194 			while (link->m_reply == 0 && link->f_error == 0) {
2195 				crv = cv_timedwait(&link->g_cv, &link->g_lock,
2196 				    to_val);
2197 				switch (crv) {
2198 				case -1: /* timed out */
2199 					cmn_err(CE_NOTE,
2200 					    "!link msgid=0x%x reply timed out",
2201 					    link->msgid);
2202 					link->f_error = ETIMEDOUT;
2203 					break;
2204 				default:
2205 					break;
2206 				}
2207 			}
2208 			mutex_exit(&link->g_lock);
2209 		}
2210 	}
2211 	mutex_exit(&listp->g_lock);
2212 	mutex_exit(&listp->s_lock);
2213 	return (listp);
2214 }
2215 
2216 static sbd_error_t *
2217 drmach_mbx2sbderr(drmach_msglist_t *mlp)
2218 {
2219 	char		a_pnt[MAXNAMELEN];
2220 	dr_proto_hdr_t	*php;
2221 	int		bnum;
2222 
2223 	if (mlp->f_error) {
2224 		/*
2225 		 * If framework failure is due to signal, return "no error"
2226 		 * error.
2227 		 */
2228 		if (mlp->f_error == EINTR)
2229 			return (drerr_new(0, ESTC_NONE, NULL));
2230 
2231 		mutex_enter(&drmach_g_mbox_mutex);
2232 		drmach_mbox_iflag = 0;
2233 		mutex_exit(&drmach_g_mbox_mutex);
2234 		if (!mlp->p_flag)
2235 			return (drerr_new(1, ESTC_MBXRQST, NULL));
2236 		else
2237 			return (drerr_new(1, ESTC_MBXRPLY, NULL));
2238 	}
2239 	php = (dr_proto_hdr_t *)mlp->o_buf;
2240 	bnum = 2 * php->expbrd + php->slot;
2241 	a_pnt[0] = '\0';
2242 	(void) drmach_board_name(bnum, a_pnt, MAXNAMELEN);
2243 
2244 	switch (mlp->e_code) {
2245 		case 0:
2246 			return (NULL);
2247 		case DRERR_NOACL:
2248 			return (drerr_new(0, ESTC_NOACL, "%s", a_pnt));
2249 		case DRERR_NOT_ASSIGNED:
2250 			return (drerr_new(0, ESTC_NOT_ASSIGNED, "%s", a_pnt));
2251 		case DRERR_NOT_ACTIVE:
2252 			return (drerr_new(0, ESTC_NOT_ACTIVE, "%s", a_pnt));
2253 		case DRERR_EMPTY_SLOT:
2254 			return (drerr_new(0, ESTC_EMPTY_SLOT, "%s", a_pnt));
2255 		case DRERR_POWER_OFF:
2256 			return (drerr_new(0, ESTC_POWER_OFF, "%s", a_pnt));
2257 		case DRERR_TEST_IN_PROGRESS:
2258 			return (drerr_new(0, ESTC_TEST_IN_PROGRESS, "%s",
2259 			    a_pnt));
2260 		case DRERR_TESTING_BUSY:
2261 			return (drerr_new(0, ESTC_TESTING_BUSY, "%s", a_pnt));
2262 		case DRERR_TEST_REQUIRED:
2263 			return (drerr_new(0, ESTC_TEST_REQUIRED, "%s", a_pnt));
2264 		case DRERR_UNAVAILABLE:
2265 			return (drerr_new(0, ESTC_UNAVAILABLE, "%s", a_pnt));
2266 		case DRERR_RECOVERABLE:
2267 			return (drerr_new(0, ESTC_SMS_ERR_RECOVERABLE, "%s",
2268 			    a_pnt));
2269 		case DRERR_UNRECOVERABLE:
2270 			return (drerr_new(1, ESTC_SMS_ERR_UNRECOVERABLE, "%s",
2271 			    a_pnt));
2272 		default:
2273 			return (drerr_new(1, ESTC_MBOX_UNKNOWN, NULL));
2274 	}
2275 }
2276 
2277 static sbd_error_t *
2278 drmach_mbox_trans(uint8_t msgtype, int bnum, caddr_t obufp, int olen,
2279 	caddr_t ibufp, int ilen)
2280 {
2281 	int			timeout = 0;
2282 	int			ntries = 0;
2283 	int			nosignals = 0;
2284 	dr_proto_hdr_t 		*hdrp;
2285 	drmach_msglist_t 	*mlp;
2286 	sbd_error_t		*err = NULL;
2287 
2288 	if (msgtype != DRMSG_MBOX_INIT) {
2289 		mutex_enter(&drmach_ri_mbox_mutex);
2290 		mutex_enter(&drmach_g_mbox_mutex);
2291 		if (drmach_mbox_iflag == 0) {
2292 			/* need to initialize the mailbox */
2293 			dr_proto_hdr_t	imsg;
2294 
2295 			mutex_exit(&drmach_g_mbox_mutex);
2296 
2297 			imsg.command = DRMSG_MBOX_INIT;
2298 
2299 			imsg.message_id = drmach_get_msgid();
2300 			imsg.drproto_version = DRMBX_VERSION;
2301 			imsg.expbrd = 0;
2302 			imsg.slot = 0;
2303 
2304 			cmn_err(CE_WARN, "!reinitializing DR mailbox");
2305 			mlp = drmach_mbox_req_rply(&imsg, sizeof (imsg), 0, 0,
2306 			    10, 5, 0, NULL);
2307 			err = drmach_mbx2sbderr(mlp);
2308 			/*
2309 			 * If framework failure incoming is encountered on
2310 			 * the MBOX_INIT [timeout on SMS reply], the error
2311 			 * type must be changed before returning to caller.
2312 			 * This is to prevent drmach_board_connect() and
2313 			 * drmach_board_disconnect() from marking boards
2314 			 * UNUSABLE based on MBOX_INIT failures.
2315 			 */
2316 			if ((err != NULL) && (err->e_code == ESTC_MBXRPLY)) {
2317 				cmn_err(CE_WARN,
2318 				    "!Changed mbox incoming to outgoing"
2319 				    " failure on reinit");
2320 				sbd_err_clear(&err);
2321 				err = drerr_new(0, ESTC_MBXRQST, NULL);
2322 			}
2323 			drmach_msglist_destroy(mlp);
2324 			if (err) {
2325 				mutex_exit(&drmach_ri_mbox_mutex);
2326 				return (err);
2327 			}
2328 			mutex_enter(&drmach_g_mbox_mutex);
2329 			drmach_mbox_iflag = 1;
2330 		}
2331 		mutex_exit(&drmach_g_mbox_mutex);
2332 		mutex_exit(&drmach_ri_mbox_mutex);
2333 	}
2334 
2335 	hdrp = (dr_proto_hdr_t *)obufp;
2336 
2337 	/* setup outgoing mailbox header */
2338 	hdrp->command = msgtype;
2339 	hdrp->message_id = drmach_get_msgid();
2340 	hdrp->drproto_version = DRMBX_VERSION;
2341 	hdrp->expbrd = DRMACH_BNUM2EXP(bnum);
2342 	hdrp->slot = DRMACH_BNUM2SLOT(bnum);
2343 
2344 	switch (msgtype) {
2345 
2346 		case DRMSG_MBOX_INIT:
2347 			timeout = drmach_to_mbxinit;
2348 			ntries = 1;
2349 			nosignals = 0;
2350 			break;
2351 
2352 		case DRMSG_ASSIGN:
2353 			timeout = drmach_to_assign;
2354 			ntries = 1;
2355 			nosignals = 0;
2356 			break;
2357 
2358 		case DRMSG_UNASSIGN:
2359 			timeout = drmach_to_unassign;
2360 			ntries = 1;
2361 			nosignals = 0;
2362 			break;
2363 
2364 		case DRMSG_POWERON:
2365 			timeout = drmach_to_poweron;
2366 			ntries = 1;
2367 			nosignals = 0;
2368 			break;
2369 
2370 		case DRMSG_POWEROFF:
2371 			timeout = drmach_to_poweroff;
2372 			ntries = 1;
2373 			nosignals = 0;
2374 			break;
2375 
2376 		case DRMSG_SHOWBOARD:
2377 			timeout = drmach_to_showboard;
2378 			ntries = 1;
2379 			nosignals = 0;
2380 			break;
2381 
2382 		case DRMSG_CLAIM:
2383 			timeout = drmach_to_claim;
2384 			ntries = 1;
2385 			nosignals = 1;
2386 			break;
2387 
2388 		case DRMSG_UNCLAIM:
2389 			timeout = drmach_to_unclaim;
2390 			ntries = 1;
2391 			nosignals = 1;
2392 			break;
2393 
2394 		case DRMSG_UNCONFIG:
2395 			timeout = drmach_to_unconfig;
2396 			ntries = 1;
2397 			nosignals = 0;
2398 			break;
2399 
2400 		case DRMSG_TESTBOARD:
2401 			timeout = drmach_to_testboard;
2402 			ntries = 1;
2403 			nosignals = 0;
2404 			break;
2405 
2406 		default:
2407 			cmn_err(CE_WARN, "Unknown outgoing message type 0x%x",
2408 			    msgtype);
2409 			err = DRMACH_INTERNAL_ERROR();
2410 			break;
2411 	}
2412 
2413 	if (err == NULL) {
2414 		mlp = drmach_mbox_req_rply(hdrp, olen, ibufp, ilen, timeout,
2415 		    ntries, nosignals, NULL);
2416 		err = drmach_mbx2sbderr(mlp);
2417 
2418 		/*
2419 		 * For DRMSG_TESTBOARD attempts which have timed out, or
2420 		 * been aborted due to a signal received after mboxsc_putmsg()
2421 		 * has succeeded in sending the message, a DRMSG_ABORT_TEST
2422 		 * must be sent.
2423 		 */
2424 		if ((msgtype == DRMSG_TESTBOARD) && (err != NULL) &&
2425 		    ((mlp->f_error == EINTR) || ((mlp->f_error == ETIMEDOUT) &&
2426 		    (mlp->p_flag != 0)))) {
2427 			drmach_msglist_t	*abmlp;
2428 			dr_abort_test_t		abibuf;
2429 
2430 			hdrp->command = DRMSG_ABORT_TEST;
2431 			hdrp->message_id = drmach_get_msgid();
2432 			abmlp = drmach_mbox_req_rply(hdrp,
2433 			    sizeof (dr_abort_test_t), (caddr_t)&abibuf,
2434 			    sizeof (abibuf), drmach_to_aborttest, 5, 1, mlp);
2435 			cmn_err(CE_WARN, "test aborted");
2436 			drmach_msglist_destroy(abmlp);
2437 		}
2438 
2439 		drmach_msglist_destroy(mlp);
2440 	}
2441 
2442 	return (err);
2443 }
2444 
2445 static int
2446 drmach_mbox_init()
2447 {
2448 	int			err;
2449 	caddr_t			obufp;
2450 	sbd_error_t		*serr = NULL;
2451 	mboxsc_timeout_range_t	mbxtoz;
2452 
2453 	drmach_mbox_istate = 0;
2454 	/* register the outgoing mailbox */
2455 	if ((err = mboxsc_init(KEY_DRSC, MBOXSC_MBOX_OUT,
2456 	    NULL)) != 0) {
2457 		cmn_err(CE_WARN, "DR - SC mboxsc_init failed: 0x%x", err);
2458 		return (-1);
2459 	}
2460 	drmach_mbox_istate = 1;
2461 
2462 	/* setup the mboxsc_putmsg timeout value */
2463 	if (drmach_use_tuned_putmsg_to) {
2464 		cmn_err(CE_NOTE, "!using tuned drmach_to_putmsg = 0x%lx\n",
2465 		    drmach_to_putmsg);
2466 	} else {
2467 		if ((err = mboxsc_ctrl(KEY_DRSC,
2468 		    MBOXSC_CMD_PUTMSG_TIMEOUT_RANGE, &mbxtoz)) != 0) {
2469 			cmn_err(CE_WARN, "mboxsc_ctrl failed: 0x%x", err);
2470 			drmach_to_putmsg = 60000;
2471 		} else {
2472 			drmach_to_putmsg = mboxsc_putmsg_def_timeout() * 6;
2473 			DRMACH_PR("putmsg range is 0x%lx - 0x%lx value"
2474 			    " is 0x%lx\n", mbxtoz.min_timeout,
2475 			    mbxtoz.max_timeout, drmach_to_putmsg);
2476 		}
2477 	}
2478 
2479 	/* register the incoming mailbox */
2480 	if ((err = mboxsc_init(KEY_SCDR, MBOXSC_MBOX_IN,
2481 	    drmach_mbox_event)) != 0) {
2482 		cmn_err(CE_WARN, "SC - DR mboxsc_init failed: 0x%x", err);
2483 		return (-1);
2484 	}
2485 	drmach_mbox_istate = 2;
2486 
2487 	/* initialize mutex for mailbox globals */
2488 	mutex_init(&drmach_g_mbox_mutex, NULL, MUTEX_DRIVER, NULL);
2489 
2490 	/* initialize mutex for mailbox re-init */
2491 	mutex_init(&drmach_ri_mbox_mutex, NULL, MUTEX_DRIVER, NULL);
2492 
2493 	/* initialize mailbox message list elements */
2494 	drmach_msglist_first = drmach_msglist_last = NULL;
2495 	mutex_init(&drmach_msglist_mutex, NULL, MUTEX_DRIVER, NULL);
2496 
2497 	mutex_init(&drmach_sendmsg_mutex, NULL, MUTEX_DRIVER, NULL);
2498 	cv_init(&drmach_sendmsg_cv, NULL, CV_DRIVER, NULL);
2499 
2500 	drmach_mbox_istate = 3;
2501 
2502 	/* start mailbox sendmsg thread */
2503 	drmach_sendmsg_thread_run = 1;
2504 	if (drmach_sendmsg_thread == NULL)
2505 		drmach_sendmsg_thread = thread_create(NULL, 0,
2506 		    (void (*)())drmach_mbox_sendmsg, NULL, 0, &p0,
2507 		    TS_RUN, minclsyspri);
2508 
2509 	/* start mailbox getmsg thread */
2510 	drmach_getmsg_thread_run = 1;
2511 	if (drmach_getmsg_thread == NULL)
2512 		drmach_getmsg_thread = thread_create(NULL, 0,
2513 		    (void (*)())drmach_mbox_getmsg, NULL, 0, &p0,
2514 		    TS_RUN, minclsyspri);
2515 
2516 	obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
2517 	serr = drmach_mbox_trans(DRMSG_MBOX_INIT, 0, obufp,
2518 	    sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
2519 	kmem_free(obufp, sizeof (dr_proto_hdr_t));
2520 	if (serr) {
2521 		cmn_err(CE_WARN, "mbox_init: MBOX_INIT failed ecode=0x%x",
2522 		    serr->e_code);
2523 		sbd_err_clear(&serr);
2524 		return (-1);
2525 	}
2526 	mutex_enter(&drmach_g_mbox_mutex);
2527 	drmach_mbox_iflag = 1;
2528 	drmach_mbox_ipending = 0;
2529 	mutex_exit(&drmach_g_mbox_mutex);
2530 
2531 	return (0);
2532 }
2533 
2534 static int
2535 drmach_mbox_fini()
2536 {
2537 	int err, rv = 0;
2538 
2539 	if (drmach_mbox_istate > 2) {
2540 		drmach_getmsg_thread_run = 0;
2541 		drmach_sendmsg_thread_run = 0;
2542 		cmn_err(CE_WARN,
2543 		    "drmach_mbox_fini: waiting for mbox threads...");
2544 		while ((drmach_getmsg_thread_run == 0) ||
2545 		    (drmach_sendmsg_thread_run == 0)) {
2546 			continue;
2547 		}
2548 		cmn_err(CE_WARN, "drmach_mbox_fini: mbox threads done.");
2549 		mutex_destroy(&drmach_msglist_mutex);
2550 
2551 	}
2552 	if (drmach_mbox_istate) {
2553 		/* de-register the outgoing mailbox */
2554 		if ((err = mboxsc_fini(KEY_DRSC)) != 0) {
2555 			cmn_err(CE_WARN, "DR - SC mboxsc_fini failed: 0x%x",
2556 			    err);
2557 			rv = -1;
2558 		}
2559 	}
2560 	if (drmach_mbox_istate > 1) {
2561 		/* de-register the incoming mailbox */
2562 		if ((err = mboxsc_fini(KEY_SCDR)) != 0) {
2563 			cmn_err(CE_WARN, "SC - DR mboxsc_fini failed: 0x%x",
2564 			    err);
2565 			rv = -1;
2566 		}
2567 	}
2568 	mutex_destroy(&drmach_g_mbox_mutex);
2569 	mutex_destroy(&drmach_ri_mbox_mutex);
2570 	return (rv);
2571 }
2572 
2573 static int
2574 drmach_portid2bnum(int portid)
2575 {
2576 	int slot;
2577 
2578 	switch (portid & 0x1f) {
2579 	case 0: case 1: case 2: case 3:	/* cpu/wci devices */
2580 	case 0x1e:			/* slot 0 axq registers */
2581 		slot = 0;
2582 		break;
2583 
2584 	case 8: case 9:			/* cpu devices */
2585 	case 0x1c: case 0x1d:		/* schizo/wci devices */
2586 	case 0x1f:			/* slot 1 axq registers */
2587 		slot = 1;
2588 		break;
2589 
2590 	default:
2591 		ASSERT(0);		/* catch in debug kernels */
2592 	}
2593 
2594 	return (((portid >> 4) & 0x7e) | slot);
2595 }
2596 
2597 extern int axq_suspend_iopause;
2598 
2599 static int
2600 hold_rele_branch(dev_info_t *rdip, void *arg)
2601 {
2602 	int	i;
2603 	int	*holdp	= (int *)arg;
2604 	char	*name = ddi_node_name(rdip);
2605 
2606 	/*
2607 	 * For Starcat, we must be children of the root devinfo node
2608 	 */
2609 	ASSERT(ddi_get_parent(rdip) == ddi_root_node());
2610 
2611 	i = drmach_name2type_idx(name);
2612 
2613 	/*
2614 	 * Only children of the root devinfo node need to be
2615 	 * held/released since they are the only valid targets
2616 	 * of tree operations. This corresponds to the node types
2617 	 * listed in the drmach_name2type array.
2618 	 */
2619 	if (i < 0) {
2620 		/* Not of interest to us */
2621 		return (DDI_WALK_PRUNECHILD);
2622 	}
2623 
2624 	if (*holdp) {
2625 		ASSERT(!e_ddi_branch_held(rdip));
2626 		e_ddi_branch_hold(rdip);
2627 	} else {
2628 		ASSERT(e_ddi_branch_held(rdip));
2629 		e_ddi_branch_rele(rdip);
2630 	}
2631 
2632 	return (DDI_WALK_PRUNECHILD);
2633 }
2634 
2635 static int
2636 drmach_init(void)
2637 {
2638 	pnode_t 	nodeid;
2639 	gdcd_t		*gdcd;
2640 	int		bnum;
2641 	dev_info_t	*rdip;
2642 	int		hold, circ;
2643 
2644 	mutex_enter(&drmach_i_lock);
2645 	if (drmach_initialized) {
2646 		mutex_exit(&drmach_i_lock);
2647 		return (0);
2648 	}
2649 
2650 	gdcd = drmach_gdcd_new();
2651 	if (gdcd == NULL) {
2652 		mutex_exit(&drmach_i_lock);
2653 		cmn_err(CE_WARN, "drmach_init: failed to access GDCD\n");
2654 		return (-1);
2655 	}
2656 
2657 	drmach_boards = drmach_array_new(0, MAX_BOARDS - 1);
2658 
2659 	nodeid = prom_childnode(prom_rootnode());
2660 	do {
2661 		int		 len;
2662 		int		 portid;
2663 		drmachid_t	 id;
2664 
2665 		len = prom_getproplen(nodeid, "portid");
2666 		if (len != sizeof (portid))
2667 			continue;
2668 
2669 		portid = -1;
2670 		(void) prom_getprop(nodeid, "portid", (caddr_t)&portid);
2671 		if (portid == -1)
2672 			continue;
2673 
2674 		bnum = drmach_portid2bnum(portid);
2675 
2676 		if (drmach_array_get(drmach_boards, bnum, &id) == -1) {
2677 			/* portid translated to an invalid board number */
2678 			cmn_err(CE_WARN, "OBP node 0x%x has"
2679 			    " invalid property value, %s=%u",
2680 			    nodeid, "portid", portid);
2681 
2682 			/* clean up */
2683 			drmach_array_dispose(drmach_boards,
2684 			    drmach_board_dispose);
2685 			drmach_gdcd_dispose(gdcd);
2686 			mutex_exit(&drmach_i_lock);
2687 			return (-1);
2688 		} else if (id == NULL) {
2689 			drmach_board_t	*bp;
2690 			l1_slot_stat_t	*dcd;
2691 			int		exp, slot;
2692 
2693 			bp = drmach_board_new(bnum);
2694 			bp->assigned = !drmach_initialized;
2695 			bp->powered = !drmach_initialized;
2696 
2697 			exp = DRMACH_BNUM2EXP(bnum);
2698 			slot = DRMACH_BNUM2SLOT(bnum);
2699 			dcd = &gdcd->dcd_slot[exp][slot];
2700 			bp->stardrb_offset =
2701 			    dcd->l1ss_cpu_drblock_xwd_offset << 3;
2702 			DRMACH_PR("%s: stardrb_offset=0x%lx\n", bp->cm.name,
2703 			    bp->stardrb_offset);
2704 
2705 			if (gdcd->dcd_slot[exp][slot].l1ss_flags &
2706 			    L1SSFLG_THIS_L1_NULL_PROC_LPA) {
2707 				bp->flags |= DRMACH_NULL_PROC_LPA;
2708 				DRMACH_PR("%s: NULL proc LPA\n", bp->cm.name);
2709 			}
2710 		}
2711 	} while ((nodeid = prom_nextnode(nodeid)) != OBP_NONODE);
2712 
2713 	drmach_cpu_sram_va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
2714 
2715 	if (gdcd->dcd_testcage_log2_mbytes_size != DCD_DR_TESTCAGE_DISABLED) {
2716 		ASSERT(gdcd->dcd_testcage_log2_mbytes_size ==
2717 		    gdcd->dcd_testcage_log2_mbytes_align);
2718 		drmach_iocage_paddr =
2719 		    (uint64_t)gdcd->dcd_testcage_mbyte_PA << 20;
2720 		drmach_iocage_size =
2721 		    1 << (gdcd->dcd_testcage_log2_mbytes_size + 20);
2722 
2723 		drmach_iocage_vaddr = (caddr_t)vmem_alloc(heap_arena,
2724 		    drmach_iocage_size, VM_SLEEP);
2725 		hat_devload(kas.a_hat, drmach_iocage_vaddr, drmach_iocage_size,
2726 		    mmu_btop(drmach_iocage_paddr),
2727 		    PROT_READ | PROT_WRITE,
2728 		    HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST);
2729 
2730 		DRMACH_PR("gdcd size=0x%x align=0x%x PA=0x%x\n",
2731 		    gdcd->dcd_testcage_log2_mbytes_size,
2732 		    gdcd->dcd_testcage_log2_mbytes_align,
2733 		    gdcd->dcd_testcage_mbyte_PA);
2734 		DRMACH_PR("drmach size=0x%x PA=0x%lx VA=0x%p\n",
2735 		    drmach_iocage_size, drmach_iocage_paddr,
2736 		    (void *)drmach_iocage_vaddr);
2737 	}
2738 
2739 	if (drmach_iocage_size == 0) {
2740 		drmach_array_dispose(drmach_boards, drmach_board_dispose);
2741 		drmach_boards = NULL;
2742 		vmem_free(heap_arena, drmach_cpu_sram_va, PAGESIZE);
2743 		drmach_gdcd_dispose(gdcd);
2744 		mutex_exit(&drmach_i_lock);
2745 		cmn_err(CE_WARN, "drmach_init: iocage not available\n");
2746 		return (-1);
2747 	}
2748 
2749 	drmach_gdcd_dispose(gdcd);
2750 
2751 	mutex_init(&drmach_iocage_lock, NULL, MUTEX_DRIVER, NULL);
2752 	cv_init(&drmach_iocage_cv, NULL, CV_DRIVER, NULL);
2753 	mutex_init(&drmach_xt_mb_lock, NULL, MUTEX_DRIVER, NULL);
2754 	mutex_init(&drmach_bus_sync_lock, NULL, MUTEX_DRIVER, NULL);
2755 	mutex_init(&drmach_slice_table_lock, NULL, MUTEX_DRIVER, NULL);
2756 
2757 	mutex_enter(&cpu_lock);
2758 	mutex_enter(&drmach_iocage_lock);
2759 	ASSERT(drmach_iocage_is_busy == 0);
2760 	drmach_iocage_is_busy = 1;
2761 	drmach_iocage_mem_scrub(drmach_iocage_size);
2762 	drmach_iocage_is_busy = 0;
2763 	cv_signal(&drmach_iocage_cv);
2764 	mutex_exit(&drmach_iocage_lock);
2765 	mutex_exit(&cpu_lock);
2766 
2767 
2768 	if (drmach_mbox_init() == -1) {
2769 		cmn_err(CE_WARN, "DR - SC mailbox initialization Failed");
2770 	}
2771 
2772 	/*
2773 	 * Walk immediate children of devinfo root node and hold
2774 	 * all devinfo branches of interest.
2775 	 */
2776 	hold = 1;
2777 	rdip = ddi_root_node();
2778 
2779 	ndi_devi_enter(rdip, &circ);
2780 	ddi_walk_devs(ddi_get_child(rdip), hold_rele_branch, &hold);
2781 	ndi_devi_exit(rdip, circ);
2782 
2783 	drmach_initialized = 1;
2784 
2785 	/*
2786 	 * To avoid a circular patch dependency between DR and AXQ, the AXQ
2787 	 * rev introducing the axq_iopause_*_all interfaces should not regress
2788 	 * when installed without the DR rev using those interfaces. The default
2789 	 * is for iopause to be enabled/disabled during axq suspend/resume. By
2790 	 * setting the following axq flag to zero, axq will not enable iopause
2791 	 * during suspend/resume, instead DR will call the axq_iopause_*_all
2792 	 * interfaces during drmach_copy_rename.
2793 	 */
2794 	axq_suspend_iopause = 0;
2795 
2796 	mutex_exit(&drmach_i_lock);
2797 
2798 	return (0);
2799 }
2800 
2801 static void
2802 drmach_fini(void)
2803 {
2804 	dev_info_t	*rdip;
2805 	int		hold, circ;
2806 
2807 	if (drmach_initialized) {
2808 		rw_enter(&drmach_boards_rwlock, RW_WRITER);
2809 		drmach_array_dispose(drmach_boards, drmach_board_dispose);
2810 		drmach_boards = NULL;
2811 		rw_exit(&drmach_boards_rwlock);
2812 
2813 		mutex_destroy(&drmach_slice_table_lock);
2814 		mutex_destroy(&drmach_xt_mb_lock);
2815 		mutex_destroy(&drmach_bus_sync_lock);
2816 		cv_destroy(&drmach_iocage_cv);
2817 		mutex_destroy(&drmach_iocage_lock);
2818 
2819 		vmem_free(heap_arena, drmach_cpu_sram_va, PAGESIZE);
2820 
2821 		/*
2822 		 * Walk immediate children of the root devinfo node
2823 		 * releasing holds acquired on branches in drmach_init()
2824 		 */
2825 		hold = 0;
2826 		rdip = ddi_root_node();
2827 
2828 		ndi_devi_enter(rdip, &circ);
2829 		ddi_walk_devs(ddi_get_child(rdip), hold_rele_branch, &hold);
2830 		ndi_devi_exit(rdip, circ);
2831 
2832 		drmach_initialized = 0;
2833 	}
2834 
2835 	(void) drmach_mbox_fini();
2836 	if (drmach_xt_mb != NULL) {
2837 		vmem_free(static_alloc_arena, (void *)drmach_xt_mb,
2838 		    drmach_xt_mb_size);
2839 	}
2840 	rw_destroy(&drmach_boards_rwlock);
2841 	mutex_destroy(&drmach_i_lock);
2842 }
2843 
2844 static void
2845 drmach_mem_read_madr(drmach_mem_t *mp, int bank, uint64_t *madr)
2846 {
2847 	kpreempt_disable();
2848 
2849 	/* get register address, read madr value */
2850 	if (STARCAT_CPUID_TO_PORTID(CPU->cpu_id) == mp->dev.portid) {
2851 		*madr = lddmcdecode(DRMACH_MC_ASI_ADDR(mp, bank));
2852 	} else {
2853 		*madr = lddphysio(DRMACH_MC_ADDR(mp, bank));
2854 	}
2855 
2856 	kpreempt_enable();
2857 }
2858 
2859 
2860 static uint64_t *
2861 drmach_prep_mc_rename(uint64_t *p, int local,
2862 	drmach_mem_t *mp, uint64_t current_basepa, uint64_t new_basepa)
2863 {
2864 	int bank;
2865 
2866 	for (bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
2867 		uint64_t madr, bank_offset;
2868 
2869 		/* fetch mc's bank madr register value */
2870 		drmach_mem_read_madr(mp, bank, &madr);
2871 		if (madr & DRMACH_MC_VALID_MASK) {
2872 			uint64_t bankpa;
2873 
2874 			bank_offset = (DRMACH_MC_UM_TO_PA(madr) |
2875 			    DRMACH_MC_LM_TO_PA(madr)) - current_basepa;
2876 			bankpa = new_basepa + bank_offset;
2877 
2878 			/* encode new base pa into madr */
2879 			madr &= ~DRMACH_MC_UM_MASK;
2880 			madr |= DRMACH_MC_PA_TO_UM(bankpa);
2881 			madr &= ~DRMACH_MC_LM_MASK;
2882 			madr |= DRMACH_MC_PA_TO_LM(bankpa);
2883 
2884 			if (local)
2885 				*p++ = DRMACH_MC_ASI_ADDR(mp, bank);
2886 			else
2887 				*p++ = DRMACH_MC_ADDR(mp, bank);
2888 
2889 			*p++ = madr;
2890 		}
2891 	}
2892 
2893 	return (p);
2894 }
2895 
2896 static uint64_t *
2897 drmach_prep_schizo_script(uint64_t *p, drmach_mem_t *mp, uint64_t new_basepa)
2898 {
2899 	drmach_board_t	*bp;
2900 	int		 rv;
2901 	int		 idx;
2902 	drmachid_t	 id;
2903 	uint64_t	 last_scsr_pa = 0;
2904 
2905 	/* memory is always in slot 0 */
2906 	ASSERT(DRMACH_BNUM2SLOT(mp->dev.bp->bnum) == 0);
2907 
2908 	/* look up slot 1 board on same expander */
2909 	idx = DRMACH_EXPSLOT2BNUM(DRMACH_BNUM2EXP(mp->dev.bp->bnum), 1);
2910 	rv = drmach_array_get(drmach_boards, idx, &id);
2911 	bp = id; /* bp will be NULL if board not found */
2912 
2913 	/* look up should never be out of bounds */
2914 	ASSERT(rv == 0);
2915 
2916 	/* nothing to do when board is not found or has no devices */
2917 	if (rv == -1 || bp == NULL || bp->devices == NULL)
2918 		return (p);
2919 
2920 	rv = drmach_array_first(bp->devices, &idx, &id);
2921 	while (rv == 0) {
2922 		if (DRMACH_IS_IO_ID(id)) {
2923 			drmach_io_t *io = id;
2924 
2925 			/*
2926 			 * Skip all non-Schizo IO devices (only IO nodes
2927 			 * that are Schizo devices have non-zero scsr_pa).
2928 			 * Filter out "other" leaf to avoid writing to the
2929 			 * same Schizo Control/Status Register twice.
2930 			 */
2931 			if (io->scsr_pa && io->scsr_pa != last_scsr_pa) {
2932 				uint64_t scsr;
2933 
2934 				scsr  = lddphysio(io->scsr_pa);
2935 				scsr &= ~(DRMACH_LPA_BASE_MASK |
2936 				    DRMACH_LPA_BND_MASK);
2937 				scsr |= DRMACH_PA_TO_LPA_BASE(new_basepa);
2938 				scsr |= DRMACH_PA_TO_LPA_BND(
2939 				    new_basepa + DRMACH_MEM_SLICE_SIZE);
2940 
2941 				*p++ = io->scsr_pa;
2942 				*p++ = scsr;
2943 
2944 				last_scsr_pa = io->scsr_pa;
2945 			}
2946 		}
2947 		rv = drmach_array_next(bp->devices, &idx, &id);
2948 	}
2949 
2950 	return (p);
2951 }
2952 
2953 /*
2954  * For Panther MCs, append the MC idle reg address and drmach_mem_t pointer.
2955  * The latter is returned when drmach_rename fails to idle a Panther MC and
2956  * is used to identify the MC for error reporting.
2957  */
2958 static uint64_t *
2959 drmach_prep_pn_mc_idle(uint64_t *p, drmach_mem_t *mp, int local)
2960 {
2961 	/* only slot 0 has memory */
2962 	ASSERT(DRMACH_BNUM2SLOT(mp->dev.bp->bnum) == 0);
2963 	ASSERT(IS_PANTHER(mp->dev.bp->cpu_impl));
2964 
2965 	for (mp = mp->dev.bp->mem; mp != NULL; mp = mp->next) {
2966 		ASSERT(DRMACH_IS_MEM_ID(mp));
2967 
2968 		if (mp->dev.portid == STARCAT_CPUID_TO_PORTID(CPU->cpu_id)) {
2969 			if (local) {
2970 				*p++ = ASI_EMU_ACT_STATUS_VA;	/* local ASI */
2971 				*p++ = (uintptr_t)mp;
2972 			}
2973 		} else if (!local) {
2974 			*p++ = DRMACH_EMU_ACT_STATUS_ADDR(mp);	/* PIO */
2975 			*p++ = (uintptr_t)mp;
2976 		}
2977 	}
2978 
2979 	return (p);
2980 }
2981 
2982 static sbd_error_t *
2983 drmach_prep_rename_script(drmach_mem_t *s_mp, drmach_mem_t *t_mp,
2984 	uint64_t t_slice_offset, caddr_t buf, int buflen)
2985 {
2986 	_NOTE(ARGUNUSED(buflen))
2987 
2988 	uint64_t		*p = (uint64_t *)buf, *q;
2989 	sbd_error_t		*err;
2990 	int			 rv;
2991 	drmach_mem_t		*mp, *skip_mp;
2992 	uint64_t		 s_basepa, t_basepa;
2993 	uint64_t		 s_new_basepa, t_new_basepa;
2994 
2995 	/* verify supplied buffer space is adequate */
2996 	ASSERT(buflen >=
2997 	    /* addr for all possible MC banks */
2998 	    (sizeof (uint64_t) * 4 * 4 * 18) +
2999 	    /* list section terminator */
3000 	    (sizeof (uint64_t) * 1) +
3001 	    /* addr/id tuple for local Panther MC idle reg */
3002 	    (sizeof (uint64_t) * 2) +
3003 	    /* list section terminator */
3004 	    (sizeof (uint64_t) * 1) +
3005 	    /* addr/id tuple for 2 boards with 4 Panther MC idle regs */
3006 	    (sizeof (uint64_t) * 2 * 2 * 4) +
3007 	    /* list section terminator */
3008 	    (sizeof (uint64_t) * 1) +
3009 	    /* addr/val tuple for 1 proc with 4 MC banks */
3010 	    (sizeof (uint64_t) * 2 * 4) +
3011 	    /* list section terminator */
3012 	    (sizeof (uint64_t) * 1) +
3013 	    /* addr/val tuple for 2 boards w/ 2 schizos each */
3014 	    (sizeof (uint64_t) * 2 * 2 * 2) +
3015 	    /* addr/val tuple for 2 boards w/ 16 MC banks each */
3016 	    (sizeof (uint64_t) * 2 * 2 * 16) +
3017 	    /* list section terminator */
3018 	    (sizeof (uint64_t) * 1) +
3019 	    /* addr/val tuple for 18 AXQs w/ two slots each */
3020 	    (sizeof (uint64_t) * 2 * 2 * 18) +
3021 	    /* list section terminator */
3022 	    (sizeof (uint64_t) * 1) +
3023 	    /* list terminator */
3024 	    (sizeof (uint64_t) * 1));
3025 
3026 	/* copy bank list to rename script */
3027 	mutex_enter(&drmach_bus_sync_lock);
3028 	for (q = drmach_bus_sync_list; *q; q++, p++)
3029 		*p = *q;
3030 	mutex_exit(&drmach_bus_sync_lock);
3031 
3032 	/* list section terminator */
3033 	*p++ = 0;
3034 
3035 	/*
3036 	 * Write idle script for MC on this processor.  A script will be
3037 	 * produced only if this is a Panther processor on the source or
3038 	 * target board.
3039 	 */
3040 	if (IS_PANTHER(s_mp->dev.bp->cpu_impl))
3041 		p = drmach_prep_pn_mc_idle(p, s_mp, 1);
3042 
3043 	if (IS_PANTHER(t_mp->dev.bp->cpu_impl))
3044 		p = drmach_prep_pn_mc_idle(p, t_mp, 1);
3045 
3046 	/* list section terminator */
3047 	*p++ = 0;
3048 
3049 	/*
3050 	 * Write idle script for all other MCs on source and target
3051 	 * Panther boards.
3052 	 */
3053 	if (IS_PANTHER(s_mp->dev.bp->cpu_impl))
3054 		p = drmach_prep_pn_mc_idle(p, s_mp, 0);
3055 
3056 	if (IS_PANTHER(t_mp->dev.bp->cpu_impl))
3057 		p = drmach_prep_pn_mc_idle(p, t_mp, 0);
3058 
3059 	/* list section terminator */
3060 	*p++ = 0;
3061 
3062 	/*
3063 	 * Step 1:	Write source base address to target MC
3064 	 *		with present bit off.
3065 	 * Step 2:	Now rewrite target reg with present bit on.
3066 	 */
3067 	err = drmach_mem_get_base_physaddr(s_mp, &s_basepa);
3068 	ASSERT(err == NULL);
3069 	err = drmach_mem_get_base_physaddr(t_mp, &t_basepa);
3070 	ASSERT(err == NULL);
3071 
3072 	/* exchange base pa. include slice offset in new target base pa */
3073 	s_new_basepa = t_basepa & ~ (DRMACH_MEM_SLICE_SIZE - 1);
3074 	t_new_basepa = (s_basepa & ~ (DRMACH_MEM_SLICE_SIZE - 1)) +
3075 	    t_slice_offset;
3076 
3077 	DRMACH_PR("s_new_basepa 0x%lx\n", s_new_basepa);
3078 	DRMACH_PR("t_new_basepa 0x%lx\n", t_new_basepa);
3079 
3080 	DRMACH_PR("preparing MC MADR rename script (master is CPU%d):\n",
3081 	    CPU->cpu_id);
3082 
3083 	/*
3084 	 * Write rename script for MC on this processor.  A script will
3085 	 * be produced only if this processor is on the source or target
3086 	 * board.
3087 	 */
3088 
3089 	skip_mp = NULL;
3090 	mp = s_mp->dev.bp->mem;
3091 	while (mp != NULL && skip_mp == NULL) {
3092 		if (mp->dev.portid == STARCAT_CPUID_TO_PORTID(CPU->cpu_id)) {
3093 			skip_mp = mp;
3094 			p = drmach_prep_mc_rename(p, 1, mp, s_basepa,
3095 			    s_new_basepa);
3096 		}
3097 
3098 		mp = mp->next;
3099 	}
3100 
3101 	mp = t_mp->dev.bp->mem;
3102 	while (mp != NULL && skip_mp == NULL) {
3103 		if (mp->dev.portid == STARCAT_CPUID_TO_PORTID(CPU->cpu_id)) {
3104 			skip_mp = mp;
3105 			p = drmach_prep_mc_rename(p, 1, mp, t_basepa,
3106 			    t_new_basepa);
3107 		}
3108 
3109 		mp = mp->next;
3110 	}
3111 
3112 	/* list section terminator */
3113 	*p++ = 0;
3114 
3115 	/*
3116 	 * Write rename script for all other MCs on source and target
3117 	 * boards.
3118 	 */
3119 
3120 	for (mp = s_mp->dev.bp->mem; mp; mp = mp->next) {
3121 		if (mp == skip_mp)
3122 			continue;
3123 		p = drmach_prep_mc_rename(p, 0, mp, s_basepa, s_new_basepa);
3124 	}
3125 
3126 	for (mp = t_mp->dev.bp->mem; mp; mp = mp->next) {
3127 		if (mp == skip_mp)
3128 			continue;
3129 		p = drmach_prep_mc_rename(p, 0, mp, t_basepa, t_new_basepa);
3130 	}
3131 
3132 	/* Write rename script for Schizo LPA_BASE/LPA_BND */
3133 	p = drmach_prep_schizo_script(p, s_mp, s_new_basepa);
3134 	p = drmach_prep_schizo_script(p, t_mp, t_new_basepa);
3135 
3136 	/* list section terminator */
3137 	*p++ = 0;
3138 
3139 	DRMACH_PR("preparing AXQ CASM rename script (EXP%d <> EXP%d):\n",
3140 	    DRMACH_BNUM2EXP(s_mp->dev.bp->bnum),
3141 	    DRMACH_BNUM2EXP(t_mp->dev.bp->bnum));
3142 
3143 	rv = axq_do_casm_rename_script(&p,
3144 	    DRMACH_PA_TO_SLICE(s_new_basepa),
3145 	    DRMACH_PA_TO_SLICE(t_new_basepa));
3146 	if (rv == DDI_FAILURE)
3147 		return (DRMACH_INTERNAL_ERROR());
3148 
3149 	/* list section & final terminator */
3150 	*p++ = 0;
3151 	*p++ = 0;
3152 
3153 #ifdef DEBUG
3154 	{
3155 		uint64_t *q = (uint64_t *)buf;
3156 
3157 		/* paranoia */
3158 		ASSERT((caddr_t)p <= buf + buflen);
3159 
3160 		DRMACH_PR("MC bank base pa list:\n");
3161 		while (*q) {
3162 			uint64_t a = *q++;
3163 
3164 			DRMACH_PR("0x%lx\n", a);
3165 		}
3166 
3167 		/* skip terminator */
3168 		q += 1;
3169 
3170 		DRMACH_PR("local Panther MC idle reg (via ASI 0x4a):\n");
3171 		while (*q) {
3172 			DRMACH_PR("addr=0x%lx, mp=0x%lx\n", *q, *(q + 1));
3173 			q += 2;
3174 		}
3175 
3176 		/* skip terminator */
3177 		q += 1;
3178 
3179 		DRMACH_PR("non-local Panther MC idle reg (via ASI 0x15):\n");
3180 		while (*q) {
3181 			DRMACH_PR("addr=0x%lx, mp=0x%lx\n", *q, *(q + 1));
3182 			q += 2;
3183 		}
3184 
3185 		/* skip terminator */
3186 		q += 1;
3187 
3188 		DRMACH_PR("MC reprogramming script (via ASI 0x72):\n");
3189 		while (*q) {
3190 			uint64_t r = *q++;	/* register address */
3191 			uint64_t v = *q++;	/* new register value */
3192 
3193 			DRMACH_PR("0x%lx = 0x%lx, basepa 0x%lx\n",
3194 			    r, v, (long)(DRMACH_MC_UM_TO_PA(v)|
3195 			    DRMACH_MC_LM_TO_PA(v)));
3196 		}
3197 
3198 		/* skip terminator */
3199 		q += 1;
3200 
3201 		DRMACH_PR("MC/SCHIZO reprogramming script:\n");
3202 		while (*q) {
3203 			DRMACH_PR("0x%lx = 0x%lx\n", *q, *(q + 1));
3204 			q += 2;
3205 		}
3206 
3207 		/* skip terminator */
3208 		q += 1;
3209 
3210 		DRMACH_PR("AXQ reprogramming script:\n");
3211 		while (*q) {
3212 			DRMACH_PR("0x%lx = 0x%lx\n", *q, *(q + 1));
3213 			q += 2;
3214 		}
3215 
3216 		/* verify final terminator is present */
3217 		ASSERT(*(q + 1) == 0);
3218 
3219 		DRMACH_PR("copy-rename script 0x%p, len %d\n",
3220 		    (void *)buf, (int)((intptr_t)p - (intptr_t)buf));
3221 
3222 		if (drmach_debug)
3223 			DELAY(10000000);
3224 	}
3225 #endif
3226 
3227 	return (NULL);
3228 }
3229 
3230 static void
3231 drmach_prep_xt_mb_for_slice_update(drmach_board_t *bp, uchar_t slice)
3232 {
3233 	int		 rv;
3234 
3235 	ASSERT(MUTEX_HELD(&drmach_xt_mb_lock));
3236 
3237 	if (bp->devices) {
3238 		int		 d_idx;
3239 		drmachid_t	 d_id;
3240 
3241 		rv = drmach_array_first(bp->devices, &d_idx, &d_id);
3242 		while (rv == 0) {
3243 			if (DRMACH_IS_CPU_ID(d_id)) {
3244 				drmach_cpu_t	*cp = d_id;
3245 				processorid_t	 cpuid = cp->cpuid;
3246 
3247 				mutex_enter(&cpu_lock);
3248 				if (cpu[cpuid] && cpu[cpuid]->cpu_flags)
3249 					drmach_xt_mb[cpuid] = 0x80 | slice;
3250 				mutex_exit(&cpu_lock);
3251 			}
3252 			rv = drmach_array_next(bp->devices, &d_idx, &d_id);
3253 		}
3254 	}
3255 	if (DRMACH_BNUM2SLOT(bp->bnum) == 0) {
3256 		drmach_board_t	*s1bp = NULL;
3257 
3258 		rv = drmach_array_get(drmach_boards, bp->bnum + 1,
3259 		    (void *) &s1bp);
3260 		if (rv == 0 && s1bp != NULL) {
3261 			ASSERT(DRMACH_IS_BOARD_ID(s1bp));
3262 			ASSERT(DRMACH_BNUM2SLOT(s1bp->bnum) == 1);
3263 			drmach_prep_xt_mb_for_slice_update(s1bp, slice);
3264 		}
3265 	}
3266 }
3267 
3268 sbd_error_t *
3269 drmach_copy_rename_init(drmachid_t t_id, uint64_t t_slice_offset,
3270 	drmachid_t s_id, struct memlist *c_ml, drmachid_t *cr_id)
3271 {
3272 	extern void drmach_rename(uint64_t *, uint_t *, uint64_t *);
3273 	extern void drmach_rename_end(void);
3274 
3275 	drmach_mem_t	*s_mp, *t_mp;
3276 	struct memlist	*x_ml;
3277 	uint64_t	 off_mask, s_copybasepa, t_copybasepa, t_basepa;
3278 	int		 len;
3279 	caddr_t		 bp, wp;
3280 	uint_t		*p, *q;
3281 	sbd_error_t	*err;
3282 	tte_t		*tte;
3283 	drmach_copy_rename_t *cr;
3284 
3285 	if (!DRMACH_IS_MEM_ID(s_id))
3286 		return (drerr_new(0, ESTC_INAPPROP, NULL));
3287 	if (!DRMACH_IS_MEM_ID(t_id))
3288 		return (drerr_new(0, ESTC_INAPPROP, NULL));
3289 	s_mp = s_id;
3290 	t_mp = t_id;
3291 
3292 	/* get starting physical address of target memory */
3293 	err = drmach_mem_get_base_physaddr(t_id, &t_basepa);
3294 	if (err)
3295 		return (err);
3296 
3297 	/* calculate slice offset mask from slice size */
3298 	off_mask = DRMACH_MEM_SLICE_SIZE - 1;
3299 
3300 	/* calculate source and target base pa */
3301 	s_copybasepa = c_ml->ml_address;
3302 	t_copybasepa =
3303 	    t_basepa + ((c_ml->ml_address & off_mask) - t_slice_offset);
3304 
3305 	/* paranoia */
3306 	ASSERT((c_ml->ml_address & off_mask) >= t_slice_offset);
3307 
3308 	/* adjust copy memlist addresses to be relative to copy base pa */
3309 	x_ml = c_ml;
3310 	while (x_ml != NULL) {
3311 		x_ml->ml_address -= s_copybasepa;
3312 		x_ml = x_ml->ml_next;
3313 	}
3314 
3315 #ifdef DEBUG
3316 	{
3317 	uint64_t s_basepa, s_size, t_size;
3318 
3319 	x_ml = c_ml;
3320 	while (x_ml->ml_next != NULL)
3321 		x_ml = x_ml->ml_next;
3322 
3323 	DRMACH_PR("source copy span: base pa 0x%lx, end pa 0x%lx\n",
3324 	    s_copybasepa,
3325 	    s_copybasepa + x_ml->ml_address + x_ml->ml_size);
3326 
3327 	DRMACH_PR("target copy span: base pa 0x%lx, end pa 0x%lx\n",
3328 	    t_copybasepa,
3329 	    t_copybasepa + x_ml->ml_address + x_ml->ml_size);
3330 
3331 	DRMACH_PR("copy memlist (relative to copy base pa):\n");
3332 	DRMACH_MEMLIST_DUMP(c_ml);
3333 
3334 	err = drmach_mem_get_base_physaddr(s_id, &s_basepa);
3335 	ASSERT(err == NULL);
3336 
3337 	err = drmach_mem_get_size(s_id, &s_size);
3338 	ASSERT(err == NULL);
3339 
3340 	err = drmach_mem_get_size(t_id, &t_size);
3341 	ASSERT(err == NULL);
3342 
3343 	DRMACH_PR("current source base pa 0x%lx, size 0x%lx\n",
3344 	    s_basepa, s_size);
3345 	DRMACH_PR("current target base pa 0x%lx, size 0x%lx\n",
3346 	    t_basepa, t_size);
3347 	}
3348 #endif /* DEBUG */
3349 
3350 	/* Map in appropriate cpu sram page */
3351 	tte = &drmach_cpu_sram_tte[CPU->cpu_id];
3352 	ASSERT(TTE_IS_VALID(tte) && TTE_IS_8K(tte) &&
3353 	    TTE_IS_PRIVILEGED(tte) && TTE_IS_LOCKED(tte));
3354 	sfmmu_dtlb_ld_kva(drmach_cpu_sram_va, tte);
3355 	sfmmu_itlb_ld_kva(drmach_cpu_sram_va, tte);
3356 
3357 	bp = wp = drmach_cpu_sram_va;
3358 
3359 	/* Make sure the rename routine will fit */
3360 	len = (ptrdiff_t)drmach_rename_end - (ptrdiff_t)drmach_rename;
3361 	ASSERT(wp + len < bp + PAGESIZE);
3362 
3363 	/* copy text. standard bcopy not designed to work in nc space */
3364 	p = (uint_t *)wp;
3365 	q = (uint_t *)drmach_rename;
3366 	while (q < (uint_t *)drmach_rename_end)
3367 		*p++ = *q++;
3368 
3369 	/* zero remainder. standard bzero not designed to work in nc space */
3370 	while (p < (uint_t *)(bp + PAGESIZE))
3371 		*p++ = 0;
3372 
3373 	DRMACH_PR("drmach_rename function 0x%p, len %d\n", (void *)wp, len);
3374 	wp += (len + 15) & ~15;
3375 
3376 	err = drmach_prep_rename_script(s_mp, t_mp, t_slice_offset, wp,
3377 	    PAGESIZE - (wp - bp));
3378 	if (err) {
3379 cleanup:
3380 		xt_one(CPU->cpu_id, vtag_flushpage_tl1,
3381 		    (uint64_t)drmach_cpu_sram_va, (uint64_t)ksfmmup);
3382 		return (err);
3383 	}
3384 
3385 	/* disable and flush CDC */
3386 	if (axq_cdc_disable_flush_all() != DDI_SUCCESS) {
3387 		axq_cdc_enable_all();	/* paranoia */
3388 		err = DRMACH_INTERNAL_ERROR();
3389 		goto cleanup;
3390 	}
3391 
3392 	/* mark both memory units busy */
3393 	t_mp->dev.busy++;
3394 	s_mp->dev.busy++;
3395 
3396 	cr = vmem_alloc(static_alloc_arena, sizeof (drmach_copy_rename_t),
3397 	    VM_SLEEP);
3398 	cr->isa = (void *)drmach_copy_rename_init;
3399 	cr->data = wp;
3400 	cr->c_ml = c_ml;
3401 	cr->s_mp = s_mp;
3402 	cr->t_mp = t_mp;
3403 	cr->s_copybasepa = s_copybasepa;
3404 	cr->t_copybasepa = t_copybasepa;
3405 	cr->ecode = DRMACH_CR_OK;
3406 
3407 	mutex_enter(&drmach_slice_table_lock);
3408 
3409 	mutex_enter(&drmach_xt_mb_lock);
3410 	bzero((void *)drmach_xt_mb, drmach_xt_mb_size);
3411 
3412 	if (DRMACH_L1_SET_LPA(s_mp->dev.bp) && drmach_reprogram_lpa) {
3413 		drmach_prep_xt_mb_for_slice_update(s_mp->dev.bp,
3414 		    DRMACH_PA_TO_SLICE(t_copybasepa));
3415 	}
3416 	if (DRMACH_L1_SET_LPA(t_mp->dev.bp) && drmach_reprogram_lpa) {
3417 		drmach_prep_xt_mb_for_slice_update(t_mp->dev.bp,
3418 		    DRMACH_PA_TO_SLICE(s_copybasepa));
3419 	}
3420 
3421 	*cr_id = cr;
3422 	return (NULL);
3423 }
3424 
3425 int drmach_rename_count;
3426 int drmach_rename_ntries;
3427 
3428 sbd_error_t *
3429 drmach_copy_rename_fini(drmachid_t id)
3430 {
3431 	drmach_copy_rename_t	*cr = id;
3432 	sbd_error_t		*err = NULL;
3433 	dr_mbox_msg_t		*obufp;
3434 
3435 	ASSERT(cr->isa == (void *)drmach_copy_rename_init);
3436 
3437 	axq_cdc_enable_all();
3438 
3439 	xt_one(CPU->cpu_id, vtag_flushpage_tl1,
3440 	    (uint64_t)drmach_cpu_sram_va, (uint64_t)ksfmmup);
3441 
3442 	switch (cr->ecode) {
3443 	case DRMACH_CR_OK:
3444 		break;
3445 	case DRMACH_CR_MC_IDLE_ERR: {
3446 		dev_info_t	*dip = NULL;
3447 		drmach_mem_t	*mp = (drmach_mem_t *)cr->earg;
3448 		char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3449 
3450 		ASSERT(DRMACH_IS_MEM_ID(mp));
3451 
3452 		err = drmach_get_dip(mp, &dip);
3453 
3454 		ASSERT(err == NULL);
3455 		ASSERT(dip != NULL);
3456 
3457 		err = drerr_new(0, ESBD_MEMFAIL, NULL);
3458 		(void) ddi_pathname(dip, path);
3459 		cmn_err(CE_WARN, "failed to idle memory controller %s on %s: "
3460 		    "copy-rename aborted", path, mp->dev.bp->cm.name);
3461 		kmem_free(path, MAXPATHLEN);
3462 		break;
3463 	}
3464 	case DRMACH_CR_IOPAUSE_ERR:
3465 		ASSERT((uintptr_t)cr->earg >= 0 &&
3466 		    (uintptr_t)cr->earg < AXQ_MAX_EXP);
3467 
3468 		err = drerr_new(0,  ESBD_SUSPEND, "EX%d", (uintptr_t)cr->earg);
3469 		cmn_err(CE_WARN, "failed to idle EX%ld AXQ slot1 activity prior"
3470 		    " to copy-rename", (uintptr_t)cr->earg);
3471 		break;
3472 	case DRMACH_CR_ONTRAP_ERR:
3473 		err = drerr_new(0, ESBD_MEMFAIL, NULL);
3474 		cmn_err(CE_WARN, "copy-rename aborted due to uncorrectable "
3475 		    "memory error");
3476 		break;
3477 	default:
3478 		err = DRMACH_INTERNAL_ERROR();
3479 		cmn_err(CE_WARN, "unknown copy-rename error code (%d)\n",
3480 		    cr->ecode);
3481 		break;
3482 	}
3483 
3484 #ifdef DEBUG
3485 	if ((DRMACH_L1_SET_LPA(cr->s_mp->dev.bp) ||
3486 	    DRMACH_L1_SET_LPA(cr->t_mp->dev.bp)) && drmach_reprogram_lpa) {
3487 		int	i;
3488 		for (i = 0; i < NCPU; i++) {
3489 			if (drmach_xt_mb[i])
3490 				DRMACH_PR("cpu%d ignored drmach_xt_mb", i);
3491 		}
3492 	}
3493 #endif
3494 	mutex_exit(&drmach_xt_mb_lock);
3495 
3496 	if (cr->c_ml != NULL)
3497 		memlist_delete(cr->c_ml);
3498 
3499 	cr->t_mp->dev.busy--;
3500 	cr->s_mp->dev.busy--;
3501 
3502 	if (err) {
3503 		mutex_exit(&drmach_slice_table_lock);
3504 		goto done;
3505 	}
3506 
3507 	/* update casm shadow for target and source board */
3508 	drmach_slice_table_update(cr->t_mp->dev.bp, 0);
3509 	drmach_slice_table_update(cr->s_mp->dev.bp, 0);
3510 	mutex_exit(&drmach_slice_table_lock);
3511 
3512 	mutex_enter(&drmach_bus_sync_lock);
3513 	drmach_bus_sync_list_update();
3514 	mutex_exit(&drmach_bus_sync_lock);
3515 
3516 	/*
3517 	 * Make a good-faith effort to notify the SC about the copy-rename, but
3518 	 * don't worry if it fails, since a subsequent claim/unconfig/unclaim
3519 	 * will duplicate the update.
3520 	 */
3521 	obufp = kmem_zalloc(sizeof (dr_mbox_msg_t), KM_SLEEP);
3522 	mutex_enter(&drmach_slice_table_lock);
3523 	drmach_msg_memslice_init(obufp->msgdata.dm_uc.mem_slice);
3524 	drmach_msg_memregs_init(obufp->msgdata.dm_uc.mem_regs);
3525 	mutex_exit(&drmach_slice_table_lock);
3526 	(void) drmach_mbox_trans(DRMSG_UNCONFIG, cr->s_mp->dev.bp->bnum,
3527 	    (caddr_t)obufp, sizeof (dr_mbox_msg_t), (caddr_t)NULL, 0);
3528 	kmem_free(obufp, sizeof (dr_mbox_msg_t));
3529 
3530 done:
3531 	vmem_free(static_alloc_arena, cr, sizeof (drmach_copy_rename_t));
3532 
3533 	DRMACH_PR("waited %d out of %d tries for drmach_rename_wait on %d cpus",
3534 	    drmach_rename_ntries, drmach_cpu_ntries, drmach_rename_count);
3535 
3536 	return (err);
3537 }
3538 
3539 int drmach_slow_copy = 0;
3540 
3541 void
3542 drmach_copy_rename(drmachid_t id)
3543 {
3544 	extern uint_t		 getpstate(void);
3545 	extern void		 setpstate(uint_t);
3546 
3547 	extern xcfunc_t		 drmach_rename_wait;
3548 	extern xcfunc_t		 drmach_rename_done;
3549 	extern xcfunc_t		 drmach_rename_abort;
3550 
3551 	drmach_copy_rename_t	*cr = id;
3552 	uint64_t		 neer;
3553 	struct memlist		*ml;
3554 	int			 i, count;
3555 	int			 csize, lnsize;
3556 	uint64_t		 caddr;
3557 	cpuset_t		 cpuset;
3558 	uint_t			 pstate;
3559 	uint32_t		 exp = 0;
3560 	on_trap_data_t		 otd;
3561 	xcfunc_t		*drmach_end_wait_xcall = drmach_rename_done;
3562 
3563 	ASSERT(cr->isa == (void *)drmach_copy_rename_init);
3564 	ASSERT(MUTEX_HELD(&cpu_lock));
3565 	ASSERT(cr->ecode == DRMACH_CR_OK);
3566 
3567 	/*
3568 	 * Prevent slot1 IO from accessing Safari memory bus.
3569 	 */
3570 	if (axq_iopause_enable_all(&exp) != DDI_SUCCESS) {
3571 		ASSERT(exp >= 0 && exp < AXQ_MAX_EXP);
3572 		cr->ecode = DRMACH_CR_IOPAUSE_ERR;
3573 		cr->earg = (void *)(uintptr_t)exp;
3574 		return;
3575 	}
3576 
3577 	cpuset = cpu_ready_set;
3578 	CPUSET_DEL(cpuset, CPU->cpu_id);
3579 	count = ncpus - 1;
3580 	drmach_rename_count = count;	/* for debug */
3581 
3582 	drmach_xt_ready = 0;
3583 	xt_some(cpuset, drmach_rename_wait, NULL, NULL);
3584 
3585 	for (i = 0; i < drmach_cpu_ntries; i++) {
3586 		if (drmach_xt_ready == count)
3587 			break;
3588 		DELAY(drmach_cpu_delay);
3589 	}
3590 
3591 	drmach_rename_ntries = i;	/* for debug */
3592 
3593 	drmach_xt_ready = 0;		/* steal the line back */
3594 	for (i = 0; i < NCPU; i++)	/* steal the line back, preserve data */
3595 		drmach_xt_mb[i] = drmach_xt_mb[i];
3596 
3597 	caddr = drmach_iocage_paddr;
3598 	csize = cpunodes[CPU->cpu_id].ecache_size;
3599 	lnsize = cpunodes[CPU->cpu_id].ecache_linesize;
3600 
3601 	/* disable CE reporting */
3602 	neer = get_error_enable();
3603 	set_error_enable(neer & ~EN_REG_CEEN);
3604 
3605 	/* disable interrupts (paranoia) */
3606 	pstate = getpstate();
3607 	setpstate(pstate & ~PSTATE_IE);
3608 
3609 	/*
3610 	 * Execute copy-rename under on_trap to protect against a panic due
3611 	 * to an uncorrectable error. Instead, DR will abort the copy-rename
3612 	 * operation and rely on the OS to do the error reporting.
3613 	 *
3614 	 * In general, trap handling on any cpu once the copy begins
3615 	 * can result in an inconsistent memory image on the target.
3616 	 */
3617 	if (on_trap(&otd, OT_DATA_EC)) {
3618 		cr->ecode = DRMACH_CR_ONTRAP_ERR;
3619 		goto copy_rename_end;
3620 	}
3621 
3622 	/*
3623 	 * DO COPY.
3624 	 */
3625 	for (ml = cr->c_ml; ml; ml = ml->ml_next) {
3626 		uint64_t	s_pa, t_pa;
3627 		uint64_t	nbytes;
3628 
3629 		s_pa = cr->s_copybasepa + ml->ml_address;
3630 		t_pa = cr->t_copybasepa + ml->ml_address;
3631 		nbytes = ml->ml_size;
3632 
3633 		while (nbytes != 0ull) {
3634 			/* copy 32 bytes at src_pa to dst_pa */
3635 			bcopy32_il(s_pa, t_pa);
3636 
3637 			/* increment by 32 bytes */
3638 			s_pa += (4 * sizeof (uint64_t));
3639 			t_pa += (4 * sizeof (uint64_t));
3640 
3641 			/* decrement by 32 bytes */
3642 			nbytes -= (4 * sizeof (uint64_t));
3643 
3644 			if (drmach_slow_copy) {	/* for debug */
3645 				uint64_t i = 13 * 50;
3646 				while (i--)
3647 					;
3648 			}
3649 		}
3650 	}
3651 
3652 	/*
3653 	 * XXX CHEETAH SUPPORT
3654 	 * For cheetah, we need to grab the iocage lock since iocage
3655 	 * memory is used for e$ flush.
3656 	 *
3657 	 * NOTE: This code block is dangerous at this point in the
3658 	 * copy-rename operation. It modifies memory after the copy
3659 	 * has taken place which means that any persistent state will
3660 	 * be abandoned after the rename operation. The code is also
3661 	 * performing thread synchronization at a time when all but
3662 	 * one processors are paused. This is a potential deadlock
3663 	 * situation.
3664 	 *
3665 	 * This code block must be moved to drmach_copy_rename_init.
3666 	 */
3667 	if (drmach_is_cheetah) {
3668 		mutex_enter(&drmach_iocage_lock);
3669 		while (drmach_iocage_is_busy)
3670 			cv_wait(&drmach_iocage_cv, &drmach_iocage_lock);
3671 		drmach_iocage_is_busy = 1;
3672 		drmach_iocage_mem_scrub(ecache_size * 2);
3673 		mutex_exit(&drmach_iocage_lock);
3674 	}
3675 
3676 	/*
3677 	 * bcopy32_il is implemented as a series of ldxa/stxa via
3678 	 * ASI_MEM instructions. Following the copy loop, the E$
3679 	 * of the master (this) processor will have lines in state
3680 	 * O that correspond to lines of home memory in state gI.
3681 	 * An E$ flush is necessary to commit these lines before
3682 	 * proceeding with the rename operation.
3683 	 *
3684 	 * Flushing the E$ will automatically flush the W$, but
3685 	 * the D$ and I$ must be flushed separately and explicitly.
3686 	 */
3687 	flush_ecache_il(caddr, csize, lnsize);	/* inline version */
3688 
3689 	/*
3690 	 * Each line of home memory is now in state gM, except in
3691 	 * the case of a cheetah processor when the E$ flush area
3692 	 * is included within the copied region. In such a case,
3693 	 * the lines of home memory for the upper half of the
3694 	 * flush area are in state gS.
3695 	 *
3696 	 * Each line of target memory is in state gM.
3697 	 *
3698 	 * Each line of this processor's E$ is in state I, except
3699 	 * those of a cheetah processor. All lines of a cheetah
3700 	 * processor's E$ are in state S and correspond to the lines
3701 	 * in upper half of the E$ flush area.
3702 	 *
3703 	 * It is vital at this point that none of the lines in the
3704 	 * home or target memories are in state gI and that none
3705 	 * of the lines in this processor's E$ are in state O or Os.
3706 	 * A single instance of such a condition will cause loss of
3707 	 * coherency following the rename operation.
3708 	 */
3709 
3710 	/*
3711 	 * Rename
3712 	 */
3713 	(*(void(*)())drmach_cpu_sram_va)(cr->data, &cr->ecode, &cr->earg);
3714 
3715 	/*
3716 	 * Rename operation complete. The physical address space
3717 	 * of the home and target memories have been swapped, the
3718 	 * routing data in the respective CASM entries have been
3719 	 * swapped, and LPA settings in the processor and schizo
3720 	 * devices have been reprogrammed accordingly.
3721 	 *
3722 	 * In the case of a cheetah processor, the E$ remains
3723 	 * populated with lines in state S that correspond to the
3724 	 * lines in the former home memory. Now that the physical
3725 	 * addresses have been swapped, these E$ lines correspond
3726 	 * to lines in the new home memory which are in state gM.
3727 	 * This combination is invalid. An additional E$ flush is
3728 	 * necessary to restore coherency. The E$ flush will cause
3729 	 * the lines of the new home memory for the flush region
3730 	 * to transition from state gM to gS. The former home memory
3731 	 * remains unmodified. This additional E$ flush has no effect
3732 	 * on a cheetah+ processor.
3733 	 */
3734 	flush_ecache_il(caddr, csize, lnsize);	/* inline version */
3735 
3736 	/*
3737 	 * The D$ and I$ must be flushed to ensure that coherency is
3738 	 * maintained. Any line in a cache that is in the valid
3739 	 * state has its corresponding line of the new home memory
3740 	 * in the gM state. This is an invalid condition. When the
3741 	 * flushes are complete the cache line states will be
3742 	 * resynchronized with those in the new home memory.
3743 	 */
3744 	flush_icache_il();			/* inline version */
3745 	flush_dcache_il();			/* inline version */
3746 	flush_pcache_il();			/* inline version */
3747 
3748 copy_rename_end:
3749 
3750 	no_trap();
3751 
3752 	/* enable interrupts */
3753 	setpstate(pstate);
3754 
3755 	/* enable CE reporting */
3756 	set_error_enable(neer);
3757 
3758 	if (cr->ecode != DRMACH_CR_OK)
3759 		drmach_end_wait_xcall = drmach_rename_abort;
3760 
3761 	/*
3762 	 * XXX CHEETAH SUPPORT
3763 	 */
3764 	if (drmach_is_cheetah) {
3765 		mutex_enter(&drmach_iocage_lock);
3766 		drmach_iocage_mem_scrub(ecache_size * 2);
3767 		drmach_iocage_is_busy = 0;
3768 		cv_signal(&drmach_iocage_cv);
3769 		mutex_exit(&drmach_iocage_lock);
3770 	}
3771 
3772 	axq_iopause_disable_all();
3773 
3774 	xt_some(cpuset, drmach_end_wait_xcall, NULL, NULL);
3775 }
3776 
3777 static void drmach_io_dispose(drmachid_t);
3778 static sbd_error_t *drmach_io_release(drmachid_t);
3779 static sbd_error_t *drmach_io_status(drmachid_t, drmach_status_t *);
3780 
3781 static sbd_error_t *
3782 drmach_pci_new(drmach_device_t *proto, drmachid_t *idp)
3783 {
3784 	drmach_node_t	*node = proto->node;
3785 	sbd_error_t	*err;
3786 	drmach_reg_t	 regs[3];
3787 	int		 rv;
3788 	int		 len = 0;
3789 
3790 	rv = node->n_getproplen(node, "reg", &len);
3791 	if (rv != 0 || len != sizeof (regs)) {
3792 		sbd_error_t *err;
3793 
3794 		/* pci nodes are expected to have regs */
3795 		err = drerr_new(1, ESTC_GETPROP,
3796 		    "Device Node 0x%x: property %s",
3797 		    (uint_t)node->get_dnode(node), "reg");
3798 		return (err);
3799 	}
3800 
3801 	rv = node->n_getprop(node, "reg", (void *)regs, sizeof (regs));
3802 	if (rv) {
3803 		sbd_error_t *err;
3804 
3805 		err = drerr_new(1, ESTC_GETPROP,
3806 		    "Device Node 0x%x: property %s",
3807 		    (uint_t)node->get_dnode(node), "reg");
3808 
3809 		return (err);
3810 	}
3811 
3812 	/*
3813 	 * Fix up unit number so that Leaf A has a lower unit number
3814 	 * than Leaf B.
3815 	 */
3816 	if ((proto->portid % 2) != 0) {
3817 		if ((regs[0].reg_addr_lo & 0x700000) == 0x700000)
3818 			proto->unum = 0;
3819 		else
3820 			proto->unum = 1;
3821 	} else {
3822 		if ((regs[0].reg_addr_lo & 0x700000) == 0x700000)
3823 			proto->unum = 2;
3824 		else
3825 			proto->unum = 3;
3826 	}
3827 
3828 	err = drmach_io_new(proto, idp);
3829 	if (err == NULL) {
3830 		drmach_io_t *self = *idp;
3831 
3832 		/* reassemble 64-bit base address */
3833 		self->scsr_pa  = (uint64_t)regs[1].reg_addr_hi << 32;
3834 		self->scsr_pa |= (uint64_t)regs[1].reg_addr_lo;
3835 	}
3836 
3837 	return (err);
3838 }
3839 
3840 static sbd_error_t *
3841 drmach_io_new(drmach_device_t *proto, drmachid_t *idp)
3842 {
3843 	drmach_io_t	*ip;
3844 
3845 	ip = kmem_zalloc(sizeof (drmach_io_t), KM_SLEEP);
3846 	bcopy(proto, &ip->dev, sizeof (ip->dev));
3847 	ip->dev.node = drmach_node_dup(proto->node);
3848 	ip->dev.cm.isa = (void *)drmach_io_new;
3849 	ip->dev.cm.dispose = drmach_io_dispose;
3850 	ip->dev.cm.release = drmach_io_release;
3851 	ip->dev.cm.status = drmach_io_status;
3852 
3853 	(void) snprintf(ip->dev.cm.name, sizeof (ip->dev.cm.name), "%s%d",
3854 	    ip->dev.type, ip->dev.unum);
3855 
3856 	*idp = (drmachid_t)ip;
3857 	return (NULL);
3858 }
3859 
3860 static void
3861 drmach_io_dispose(drmachid_t id)
3862 {
3863 	drmach_io_t *self;
3864 
3865 	ASSERT(DRMACH_IS_IO_ID(id));
3866 
3867 	self = id;
3868 	if (self->dev.node)
3869 		drmach_node_dispose(self->dev.node);
3870 
3871 	kmem_free(self, sizeof (*self));
3872 }
3873 
3874 /*ARGSUSED*/
3875 sbd_error_t *
3876 drmach_pre_op(int cmd, drmachid_t id, drmach_opts_t *opts)
3877 {
3878 	drmach_board_t	*bp = (drmach_board_t *)id;
3879 	sbd_error_t	*err = NULL;
3880 
3881 	if (id && DRMACH_IS_BOARD_ID(id)) {
3882 		switch (cmd) {
3883 			case SBD_CMD_TEST:
3884 			case SBD_CMD_STATUS:
3885 			case SBD_CMD_GETNCM:
3886 				break;
3887 			case SBD_CMD_CONNECT:
3888 				if (bp->connected)
3889 					err = drerr_new(0, ESBD_STATE, NULL);
3890 
3891 				if (bp->cond == SBD_COND_UNUSABLE)
3892 					err = drerr_new(0,
3893 					    ESBD_FATAL_STATE, NULL);
3894 				break;
3895 			case SBD_CMD_DISCONNECT:
3896 				if (!bp->connected)
3897 					err = drerr_new(0, ESBD_STATE, NULL);
3898 
3899 				if (bp->cond == SBD_COND_UNUSABLE)
3900 					err = drerr_new(0,
3901 					    ESBD_FATAL_STATE, NULL);
3902 				break;
3903 			default:
3904 				if (bp->cond == SBD_COND_UNUSABLE)
3905 					err = drerr_new(0,
3906 					    ESBD_FATAL_STATE, NULL);
3907 				break;
3908 
3909 		}
3910 	}
3911 
3912 	return (err);
3913 }
3914 
3915 /*ARGSUSED*/
3916 sbd_error_t *
3917 drmach_post_op(int cmd, drmachid_t id, drmach_opts_t *opts)
3918 {
3919 	return (NULL);
3920 }
3921 
3922 sbd_error_t *
3923 drmach_board_assign(int bnum, drmachid_t *id)
3924 {
3925 	sbd_error_t	*err = NULL;
3926 	caddr_t		obufp;
3927 
3928 	if (!drmach_initialized && drmach_init() == -1) {
3929 		err = DRMACH_INTERNAL_ERROR();
3930 	}
3931 
3932 	rw_enter(&drmach_boards_rwlock, RW_WRITER);
3933 
3934 	if (!err) {
3935 		if (drmach_array_get(drmach_boards, bnum, id) == -1) {
3936 			err = drerr_new(0, ESTC_BNUM, "%d", bnum);
3937 		} else {
3938 			drmach_board_t	*bp;
3939 
3940 			if (*id)
3941 				rw_downgrade(&drmach_boards_rwlock);
3942 
3943 			obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
3944 			err = drmach_mbox_trans(DRMSG_ASSIGN, bnum, obufp,
3945 			    sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
3946 			kmem_free(obufp, sizeof (dr_proto_hdr_t));
3947 
3948 			if (!err) {
3949 				bp = *id;
3950 				if (!*id)
3951 					bp = *id  =
3952 					    (drmachid_t)drmach_board_new(bnum);
3953 				bp->assigned = 1;
3954 			}
3955 		}
3956 	}
3957 	rw_exit(&drmach_boards_rwlock);
3958 	return (err);
3959 }
3960 
3961 static uint_t
3962 drmach_board_non_panther_cpus(gdcd_t *gdcd, uint_t exp, uint_t slot)
3963 {
3964 	uint_t	port, port_start, port_end;
3965 	uint_t	non_panther_cpus = 0;
3966 	uint_t	impl;
3967 
3968 	ASSERT(gdcd != NULL);
3969 
3970 	/*
3971 	 * Determine PRD port indices based on slot location.
3972 	 */
3973 	switch (slot) {
3974 	case 0:
3975 		port_start = 0;
3976 		port_end = 3;
3977 		break;
3978 	case 1:
3979 		port_start = 4;
3980 		port_end = 5;
3981 		break;
3982 	default:
3983 		ASSERT(0);
3984 		/* check all */
3985 		port_start = 0;
3986 		port_end = 5;
3987 		break;
3988 	}
3989 
3990 	for (port = port_start; port <= port_end; port++) {
3991 		if (gdcd->dcd_prd[exp][port].prd_ptype == SAFPTYPE_CPU &&
3992 		    RSV_GOOD(gdcd->dcd_prd[exp][port].prd_prsv)) {
3993 			/*
3994 			 * This Safari port passed POST and represents a
3995 			 * cpu, so check the implementation.
3996 			 */
3997 			impl = (gdcd->dcd_prd[exp][port].prd_ver_reg >> 32)
3998 			    & 0xffff;
3999 
4000 			switch (impl) {
4001 			case CHEETAH_IMPL:
4002 			case CHEETAH_PLUS_IMPL:
4003 			case JAGUAR_IMPL:
4004 				non_panther_cpus++;
4005 				break;
4006 			case PANTHER_IMPL:
4007 				break;
4008 			default:
4009 				ASSERT(0);
4010 				non_panther_cpus++;
4011 				break;
4012 			}
4013 		}
4014 	}
4015 
4016 	DRMACH_PR("drmach_board_non_panther_cpus: exp=%d, slot=%d, "
4017 	    "non_panther_cpus=%d", exp, slot, non_panther_cpus);
4018 
4019 	return (non_panther_cpus);
4020 }
4021 
4022 sbd_error_t *
4023 drmach_board_connect(drmachid_t id, drmach_opts_t *opts)
4024 {
4025 	_NOTE(ARGUNUSED(opts))
4026 
4027 	drmach_board_t		*bp = (drmach_board_t *)id;
4028 	sbd_error_t		*err;
4029 	dr_mbox_msg_t		*obufp;
4030 	gdcd_t			*gdcd = NULL;
4031 	uint_t			exp, slot;
4032 	sc_gptwocfg_cookie_t	scc;
4033 	int			panther_pages_enabled;
4034 
4035 	if (!DRMACH_IS_BOARD_ID(id))
4036 		return (drerr_new(0, ESTC_INAPPROP, NULL));
4037 
4038 	/*
4039 	 * Build the casm info portion of the CLAIM message.
4040 	 */
4041 	obufp = kmem_zalloc(sizeof (dr_mbox_msg_t), KM_SLEEP);
4042 	mutex_enter(&drmach_slice_table_lock);
4043 	drmach_msg_memslice_init(obufp->msgdata.dm_cr.mem_slice);
4044 	drmach_msg_memregs_init(obufp->msgdata.dm_cr.mem_regs);
4045 	mutex_exit(&drmach_slice_table_lock);
4046 	err = drmach_mbox_trans(DRMSG_CLAIM, bp->bnum, (caddr_t)obufp,
4047 	    sizeof (dr_mbox_msg_t), (caddr_t)NULL, 0);
4048 	kmem_free(obufp, sizeof (dr_mbox_msg_t));
4049 
4050 	if (err) {
4051 		/*
4052 		 * if mailbox timeout or unrecoverable error from SC,
4053 		 * board cannot be touched.  Mark the status as
4054 		 * unusable.
4055 		 */
4056 		if ((err->e_code == ESTC_SMS_ERR_UNRECOVERABLE) ||
4057 		    (err->e_code == ESTC_MBXRPLY))
4058 			bp->cond = SBD_COND_UNUSABLE;
4059 		return (err);
4060 	}
4061 
4062 	gdcd = drmach_gdcd_new();
4063 	if (gdcd == NULL) {
4064 		cmn_err(CE_WARN, "failed to read GDCD info for %s\n",
4065 		    bp->cm.name);
4066 		return (DRMACH_INTERNAL_ERROR());
4067 	}
4068 
4069 	/*
4070 	 * Read CPU SRAM DR buffer offset from GDCD.
4071 	 */
4072 	exp = DRMACH_BNUM2EXP(bp->bnum);
4073 	slot = DRMACH_BNUM2SLOT(bp->bnum);
4074 	bp->stardrb_offset =
4075 	    gdcd->dcd_slot[exp][slot].l1ss_cpu_drblock_xwd_offset << 3;
4076 	DRMACH_PR("%s: stardrb_offset=0x%lx\n", bp->cm.name,
4077 	    bp->stardrb_offset);
4078 
4079 	/*
4080 	 * Read board LPA setting from GDCD.
4081 	 */
4082 	bp->flags &= ~DRMACH_NULL_PROC_LPA;
4083 	if (gdcd->dcd_slot[exp][slot].l1ss_flags &
4084 	    L1SSFLG_THIS_L1_NULL_PROC_LPA) {
4085 		bp->flags |= DRMACH_NULL_PROC_LPA;
4086 		DRMACH_PR("%s: NULL proc LPA\n", bp->cm.name);
4087 	}
4088 
4089 	/*
4090 	 * XXX Until the Solaris large pages support heterogeneous cpu
4091 	 * domains, DR needs to prevent the addition of non-Panther cpus
4092 	 * to an all-Panther domain with large pages enabled.
4093 	 */
4094 	panther_pages_enabled = (page_num_pagesizes() > DEFAULT_MMU_PAGE_SIZES);
4095 	if (drmach_board_non_panther_cpus(gdcd, exp, slot) > 0 &&
4096 	    panther_pages_enabled && drmach_large_page_restriction) {
4097 		cmn_err(CE_WARN, "Domain shutdown is required to add a non-"
4098 		    "UltraSPARC-IV+ board into an all UltraSPARC-IV+ domain");
4099 		err = drerr_new(0, ESTC_SUPPORT, NULL);
4100 	}
4101 
4102 	if (err == NULL) {
4103 		/* do saf configurator stuff */
4104 		DRMACH_PR("calling sc_probe_board for bnum=%d\n", bp->bnum);
4105 		scc = sc_probe_board(bp->bnum);
4106 		if (scc == NULL)
4107 			err = drerr_new(0, ESTC_PROBE, bp->cm.name);
4108 	}
4109 
4110 	if (err) {
4111 		/* flush CDC srams */
4112 		if (axq_cdc_flush_all() != DDI_SUCCESS) {
4113 			goto out;
4114 		}
4115 
4116 		/*
4117 		 * Build the casm info portion of the UNCLAIM message.
4118 		 */
4119 		obufp = kmem_zalloc(sizeof (dr_mbox_msg_t), KM_SLEEP);
4120 		mutex_enter(&drmach_slice_table_lock);
4121 		drmach_msg_memslice_init(obufp->msgdata.dm_ur.mem_slice);
4122 		drmach_msg_memregs_init(obufp->msgdata.dm_ur.mem_regs);
4123 		mutex_exit(&drmach_slice_table_lock);
4124 		(void) drmach_mbox_trans(DRMSG_UNCLAIM, bp->bnum,
4125 		    (caddr_t)obufp, sizeof (dr_mbox_msg_t),
4126 		    (caddr_t)NULL, 0);
4127 
4128 		kmem_free(obufp, sizeof (dr_mbox_msg_t));
4129 
4130 		/*
4131 		 * we clear the connected flag just in case it would have
4132 		 * been set by a concurrent drmach_board_status() thread
4133 		 * before the UNCLAIM completed.
4134 		 */
4135 		bp->connected = 0;
4136 		goto out;
4137 	}
4138 
4139 	/*
4140 	 * Now that the board has been successfully attached, obtain
4141 	 * platform-specific DIMM serial id information for the board.
4142 	 */
4143 	if ((DRMACH_BNUM2SLOT(bp->bnum) == 0) &&
4144 	    plat_ecc_capability_sc_get(PLAT_ECC_DIMM_SID_MESSAGE)) {
4145 		(void) plat_request_mem_sids(DRMACH_BNUM2EXP(bp->bnum));
4146 	}
4147 
4148 out:
4149 	if (gdcd != NULL)
4150 		drmach_gdcd_dispose(gdcd);
4151 
4152 	return (err);
4153 }
4154 
4155 static void
4156 drmach_slice_table_update(drmach_board_t *bp, int invalidate)
4157 {
4158 	static char		*axq_name = "address-extender-queue";
4159 	static dev_info_t	*axq_dip = NULL;
4160 	static int		 axq_exp = -1;
4161 	static int		 axq_slot;
4162 	int			 e, s, slice;
4163 
4164 	ASSERT(MUTEX_HELD(&drmach_slice_table_lock));
4165 
4166 	e = DRMACH_BNUM2EXP(bp->bnum);
4167 	if (invalidate) {
4168 		ASSERT(DRMACH_BNUM2SLOT(bp->bnum) == 0);
4169 
4170 		/* invalidate cached casm value */
4171 		drmach_slice_table[e] = 0;
4172 
4173 		/* invalidate cached axq info if for same exp */
4174 		if (e == axq_exp && axq_dip) {
4175 			ndi_rele_devi(axq_dip);
4176 			axq_dip = NULL;
4177 		}
4178 	}
4179 
4180 	if (axq_dip == NULL || !i_ddi_devi_attached(axq_dip)) {
4181 		int i, portid;
4182 
4183 		/* search for an attached slot0 axq instance */
4184 		for (i = 0; i < AXQ_MAX_EXP * AXQ_MAX_SLOT_PER_EXP; i++) {
4185 			if (axq_dip)
4186 				ndi_rele_devi(axq_dip);
4187 			axq_dip = ddi_find_devinfo(axq_name, i, 0);
4188 			if (axq_dip && DDI_CF2(axq_dip)) {
4189 				portid = ddi_getprop(DDI_DEV_T_ANY, axq_dip,
4190 				    DDI_PROP_DONTPASS, "portid", -1);
4191 				if (portid == -1) {
4192 					DRMACH_PR("cant get portid of axq "
4193 					    "instance %d\n", i);
4194 					continue;
4195 				}
4196 
4197 				axq_exp = (portid >> 5) & 0x1f;
4198 				axq_slot = portid & 1;
4199 
4200 				if (invalidate && axq_exp == e)
4201 					continue;
4202 
4203 				if (axq_slot == 0)
4204 					break;	/* found */
4205 			}
4206 		}
4207 
4208 		if (i == AXQ_MAX_EXP * AXQ_MAX_SLOT_PER_EXP) {
4209 			if (axq_dip) {
4210 				ndi_rele_devi(axq_dip);
4211 				axq_dip = NULL;
4212 			}
4213 			DRMACH_PR("drmach_slice_table_update: failed to "
4214 			    "update axq dip\n");
4215 			return;
4216 		}
4217 
4218 	}
4219 
4220 	ASSERT(axq_dip);
4221 	ASSERT(axq_slot == 0);
4222 
4223 	if (invalidate)
4224 		return;
4225 
4226 	s = DRMACH_BNUM2SLOT(bp->bnum);
4227 	DRMACH_PR("using AXQ casm %d.%d for slot%d.%d\n", axq_exp, axq_slot,
4228 	    e, s);
4229 
4230 	/* invalidate entry */
4231 	drmach_slice_table[e] &= ~0x20;
4232 
4233 	/*
4234 	 * find a slice that routes to expander e. If no match
4235 	 * is found, drmach_slice_table[e] will remain invalid.
4236 	 *
4237 	 * The CASM is a routing table indexed by slice number.
4238 	 * Each element in the table contains permission bits,
4239 	 * a destination expander number and a valid bit. The
4240 	 * valid bit must true for the element to be meaningful.
4241 	 *
4242 	 * CASM entry structure
4243 	 *   Bits 15..6 ignored
4244 	 *   Bit  5	valid
4245 	 *   Bits 0..4	expander number
4246 	 *
4247 	 * NOTE: the for loop is really enumerating the range of slices,
4248 	 * which is ALWAYS equal to the range of expanders. Hence,
4249 	 * AXQ_MAX_EXP is okay to use in this loop.
4250 	 */
4251 	for (slice = 0; slice < AXQ_MAX_EXP; slice++) {
4252 		uint32_t casm = axq_casm_read(axq_exp, axq_slot, slice);
4253 
4254 		if ((casm & 0x20) && (casm & 0x1f) == e)
4255 			drmach_slice_table[e] = 0x20 | slice;
4256 	}
4257 }
4258 
4259 /*
4260  * Get base and bound PAs for slot 1 board lpa programming
4261  * If a cpu/mem board is present in the same expander, use slice
4262  * information corresponding to the CASM.  Otherwise, set base and
4263  * bound PAs to 0.
4264  */
4265 static void
4266 drmach_lpa_bb_get(drmach_board_t *s1bp, uint64_t *basep, uint64_t *boundp)
4267 {
4268 	drmachid_t s0id;
4269 
4270 	ASSERT(mutex_owned(&drmach_slice_table_lock));
4271 	ASSERT(DRMACH_BNUM2SLOT(s1bp->bnum) == 1);
4272 
4273 	*basep = *boundp = 0;
4274 	if (drmach_array_get(drmach_boards, s1bp->bnum - 1, &s0id) == 0 &&
4275 	    s0id != 0) {
4276 
4277 		uint32_t slice;
4278 		if ((slice = drmach_slice_table[DRMACH_BNUM2EXP(s1bp->bnum)])
4279 		    & 0x20) {
4280 			*basep = DRMACH_SLICE_TO_PA(slice & DRMACH_SLICE_MASK);
4281 			*boundp = *basep + DRMACH_MEM_SLICE_SIZE;
4282 		}
4283 	}
4284 }
4285 
4286 
4287 /*
4288  * Reprogram slot 1 lpa's as required.
4289  * The purpose of this routine is maintain the LPA settings of the devices
4290  * in slot 1. To date we know Schizo and Cheetah are the only devices that
4291  * require this attention. The LPA setting must match the slice field in the
4292  * CASM element for the local expander. This field is guaranteed to be
4293  * programmed in accordance with the cacheable address space on the slot 0
4294  * board of the local expander. If no memory is present on the slot 0 board,
4295  * there is no cacheable address space and, hence, the CASM slice field will
4296  * be zero or its valid bit will be false (or both).
4297  */
4298 
4299 static void
4300 drmach_slot1_lpa_set(drmach_board_t *bp)
4301 {
4302 	drmachid_t	id;
4303 	drmach_board_t	*s1bp = NULL;
4304 	int		rv, idx, is_maxcat = 1;
4305 	uint64_t	last_scsr_pa = 0;
4306 	uint64_t	new_basepa, new_boundpa;
4307 
4308 	if (DRMACH_BNUM2SLOT(bp->bnum)) {
4309 		s1bp = bp;
4310 		if (s1bp->devices == NULL) {
4311 			DRMACH_PR("drmach...lpa_set: slot1=%d not present",
4312 			    bp->bnum);
4313 			return;
4314 		}
4315 	} else {
4316 		rv = drmach_array_get(drmach_boards, bp->bnum + 1, &id);
4317 		/* nothing to do when board is not found or has no devices */
4318 		s1bp = id;
4319 		if (rv == -1 || s1bp == NULL || s1bp->devices == NULL) {
4320 			DRMACH_PR("drmach...lpa_set: slot1=%d not present",
4321 			    bp->bnum + 1);
4322 			return;
4323 		}
4324 		ASSERT(DRMACH_IS_BOARD_ID(id));
4325 	}
4326 	mutex_enter(&drmach_slice_table_lock);
4327 	drmach_lpa_bb_get(s1bp, &new_basepa, &new_boundpa);
4328 	DRMACH_PR("drmach_...lpa_set: bnum=%d base=0x%lx bound=0x%lx\n",
4329 	    s1bp->bnum, new_basepa, new_boundpa);
4330 
4331 	rv = drmach_array_first(s1bp->devices, &idx, &id);
4332 	while (rv == 0) {
4333 		if (DRMACH_IS_IO_ID(id)) {
4334 			drmach_io_t *io = id;
4335 
4336 			is_maxcat = 0;
4337 
4338 			/*
4339 			 * Skip all non-Schizo IO devices (only IO nodes
4340 			 * that are Schizo devices have non-zero scsr_pa).
4341 			 * Filter out "other" leaf to avoid writing to the
4342 			 * same Schizo Control/Status Register twice.
4343 			 */
4344 			if (io->scsr_pa && io->scsr_pa != last_scsr_pa) {
4345 				uint64_t scsr;
4346 
4347 				scsr  = lddphysio(io->scsr_pa);
4348 				DRMACH_PR("drmach...lpa_set: old scsr=0x%lx\n",
4349 				    scsr);
4350 				scsr &= ~(DRMACH_LPA_BASE_MASK |
4351 				    DRMACH_LPA_BND_MASK);
4352 				scsr |= DRMACH_PA_TO_LPA_BASE(new_basepa);
4353 				scsr |= DRMACH_PA_TO_LPA_BND(new_boundpa);
4354 
4355 				stdphysio(io->scsr_pa, scsr);
4356 				DRMACH_PR("drmach...lpa_set: new scsr=0x%lx\n",
4357 				    scsr);
4358 
4359 				last_scsr_pa = io->scsr_pa;
4360 			}
4361 		}
4362 		rv = drmach_array_next(s1bp->devices, &idx, &id);
4363 	}
4364 
4365 	if (is_maxcat && DRMACH_L1_SET_LPA(s1bp) && drmach_reprogram_lpa) {
4366 		extern xcfunc_t	drmach_set_lpa;
4367 
4368 		DRMACH_PR("reprogramming maxcat lpa's");
4369 
4370 		mutex_enter(&cpu_lock);
4371 		rv = drmach_array_first(s1bp->devices, &idx, &id);
4372 		while (rv == 0 && id != NULL) {
4373 			if (DRMACH_IS_CPU_ID(id)) {
4374 				int ntries;
4375 				processorid_t cpuid;
4376 
4377 				cpuid = ((drmach_cpu_t *)id)->cpuid;
4378 
4379 				/*
4380 				 * Check for unconfigured or powered-off
4381 				 * MCPUs.  If CPU_READY flag is clear, the
4382 				 * MCPU cannot be xcalled.
4383 				 */
4384 				if ((cpu[cpuid] == NULL) ||
4385 				    (cpu[cpuid]->cpu_flags &
4386 				    CPU_READY) == 0) {
4387 
4388 					rv = drmach_array_next(s1bp->devices,
4389 					    &idx, &id);
4390 					continue;
4391 				}
4392 
4393 				/*
4394 				 * XXX CHEETAH SUPPORT
4395 				 * for cheetah, we need to clear iocage
4396 				 * memory since it will be used for e$ flush
4397 				 * in drmach_set_lpa.
4398 				 */
4399 				if (drmach_is_cheetah) {
4400 					mutex_enter(&drmach_iocage_lock);
4401 					while (drmach_iocage_is_busy)
4402 						cv_wait(&drmach_iocage_cv,
4403 						    &drmach_iocage_lock);
4404 					drmach_iocage_is_busy = 1;
4405 					drmach_iocage_mem_scrub(ecache_size *
4406 					    2);
4407 					mutex_exit(&drmach_iocage_lock);
4408 				}
4409 
4410 				/*
4411 				 * drmach_slice_table[*]
4412 				 *	bit 5	valid
4413 				 *	bit 0:4	slice number
4414 				 *
4415 				 * drmach_xt_mb[*] format for drmach_set_lpa
4416 				 *	bit 7	valid
4417 				 *	bit 6	set null LPA
4418 				 *			(overrides bits 0:4)
4419 				 *	bit 0:4	slice number
4420 				 *
4421 				 * drmach_set_lpa derives processor CBASE and
4422 				 * CBND from bits 6 and 0:4 of drmach_xt_mb.
4423 				 * If bit 6 is set, then CBASE = CBND = 0.
4424 				 * Otherwise, CBASE = slice number;
4425 				 * CBND = slice number + 1.
4426 				 * No action is taken if bit 7 is zero.
4427 				 */
4428 
4429 				mutex_enter(&drmach_xt_mb_lock);
4430 				bzero((void *)drmach_xt_mb,
4431 				    drmach_xt_mb_size);
4432 
4433 				if (new_basepa == 0 && new_boundpa == 0)
4434 					drmach_xt_mb[cpuid] = 0x80 | 0x40;
4435 				else
4436 					drmach_xt_mb[cpuid] = 0x80 |
4437 					    DRMACH_PA_TO_SLICE(new_basepa);
4438 
4439 				drmach_xt_ready = 0;
4440 
4441 				xt_one(cpuid, drmach_set_lpa, NULL, NULL);
4442 
4443 				ntries = drmach_cpu_ntries;
4444 				while (!drmach_xt_ready && ntries) {
4445 					DELAY(drmach_cpu_delay);
4446 					ntries--;
4447 				}
4448 				mutex_exit(&drmach_xt_mb_lock);
4449 				drmach_xt_ready = 0;
4450 
4451 				/*
4452 				 * XXX CHEETAH SUPPORT
4453 				 * for cheetah, we need to clear iocage
4454 				 * memory since it was used for e$ flush
4455 				 * in performed drmach_set_lpa.
4456 				 */
4457 				if (drmach_is_cheetah) {
4458 					mutex_enter(&drmach_iocage_lock);
4459 					drmach_iocage_mem_scrub(ecache_size *
4460 					    2);
4461 					drmach_iocage_is_busy = 0;
4462 					cv_signal(&drmach_iocage_cv);
4463 					mutex_exit(&drmach_iocage_lock);
4464 				}
4465 			}
4466 			rv = drmach_array_next(s1bp->devices, &idx, &id);
4467 		}
4468 		mutex_exit(&cpu_lock);
4469 	}
4470 	mutex_exit(&drmach_slice_table_lock);
4471 }
4472 
4473 /*
4474  * Return the number of connected Panther boards in the domain.
4475  */
4476 static int
4477 drmach_panther_boards(void)
4478 {
4479 	int		rv;
4480 	int		b_idx;
4481 	drmachid_t	b_id;
4482 	drmach_board_t	*bp;
4483 	int		npanther = 0;
4484 
4485 	rv = drmach_array_first(drmach_boards, &b_idx, &b_id);
4486 	while (rv == 0) {
4487 		ASSERT(DRMACH_IS_BOARD_ID(b_id));
4488 		bp = b_id;
4489 
4490 		if (IS_PANTHER(bp->cpu_impl))
4491 			npanther++;
4492 
4493 		rv = drmach_array_next(drmach_boards, &b_idx, &b_id);
4494 	}
4495 
4496 	return (npanther);
4497 }
4498 
4499 /*ARGSUSED*/
4500 sbd_error_t *
4501 drmach_board_disconnect(drmachid_t id, drmach_opts_t *opts)
4502 {
4503 	drmach_board_t	*bp;
4504 	dr_mbox_msg_t	*obufp;
4505 	sbd_error_t	*err = NULL;
4506 
4507 	sc_gptwocfg_cookie_t	scc;
4508 
4509 	if (!DRMACH_IS_BOARD_ID(id))
4510 		return (drerr_new(0, ESTC_INAPPROP, NULL));
4511 	bp = id;
4512 
4513 	/*
4514 	 * Build the casm info portion of the UNCLAIM message.
4515 	 * This must be done prior to calling for saf configurator
4516 	 * deprobe, to ensure that the associated axq instance
4517 	 * is not detached.
4518 	 */
4519 	obufp = kmem_zalloc(sizeof (dr_mbox_msg_t), KM_SLEEP);
4520 	mutex_enter(&drmach_slice_table_lock);
4521 	drmach_msg_memslice_init(obufp->msgdata.dm_ur.mem_slice);
4522 
4523 	/*
4524 	 * If disconnecting slot 0 board, update the casm slice table
4525 	 * info now, for use by drmach_slot1_lpa_set()
4526 	 */
4527 	if (DRMACH_BNUM2SLOT(bp->bnum) == 0)
4528 		drmach_slice_table_update(bp, 1);
4529 
4530 	drmach_msg_memregs_init(obufp->msgdata.dm_ur.mem_regs);
4531 	mutex_exit(&drmach_slice_table_lock);
4532 
4533 	/*
4534 	 * Update LPA information for slot1 board
4535 	 */
4536 	drmach_slot1_lpa_set(bp);
4537 
4538 	/* disable and flush CDC */
4539 	if (axq_cdc_disable_flush_all() != DDI_SUCCESS) {
4540 		axq_cdc_enable_all();	/* paranoia */
4541 		err = DRMACH_INTERNAL_ERROR();
4542 	}
4543 
4544 	/*
4545 	 * call saf configurator for deprobe
4546 	 * It's done now before sending an UNCLAIM message because
4547 	 * IKP will probe boards it doesn't know about <present at boot>
4548 	 * prior to unprobing them.  If this happens after sending the
4549 	 * UNCLAIM, it will cause a dstop for domain transgression error.
4550 	 */
4551 
4552 	if (!err) {
4553 		scc = sc_unprobe_board(bp->bnum);
4554 		axq_cdc_enable_all();
4555 		if (scc != NULL) {
4556 			err = drerr_new(0, ESTC_DEPROBE, bp->cm.name);
4557 		}
4558 	}
4559 
4560 	/*
4561 	 * If disconnecting a board from a Panther domain, wait a fixed-
4562 	 * time delay for pending Safari transactions to complete on the
4563 	 * disconnecting board's processors.  The bus sync list read used
4564 	 * in drmach_shutdown_asm to synchronize with outstanding Safari
4565 	 * transactions assumes no read-bypass-write mode for all memory
4566 	 * controllers.  Since Panther supports read-bypass-write, a
4567 	 * delay is used that is slightly larger than the maximum Safari
4568 	 * timeout value in the Safari/Fireplane Config Reg.
4569 	 */
4570 	if (drmach_panther_boards() > 0 || drmach_unclaim_delay_all) {
4571 		clock_t	stime = ddi_get_lbolt();
4572 
4573 		delay(drv_usectohz(drmach_unclaim_usec_delay));
4574 
4575 		stime = ddi_get_lbolt() - stime;
4576 		DRMACH_PR("delayed %ld ticks (%ld secs) before disconnecting "
4577 		    "board %s from domain\n", stime, stime / hz, bp->cm.name);
4578 	}
4579 
4580 	if (!err) {
4581 		obufp->msgdata.dm_ur.mem_clear = 0;
4582 
4583 		err = drmach_mbox_trans(DRMSG_UNCLAIM, bp->bnum, (caddr_t)obufp,
4584 		    sizeof (dr_mbox_msg_t), (caddr_t)NULL, 0);
4585 
4586 		if (err) {
4587 			/*
4588 			 * if mailbox timeout or unrecoverable error from SC,
4589 			 * board cannot be touched.  Mark the status as
4590 			 * unusable.
4591 			 */
4592 			if ((err->e_code == ESTC_SMS_ERR_UNRECOVERABLE) ||
4593 			    (err->e_code == ESTC_MBXRPLY))
4594 				bp->cond = SBD_COND_UNUSABLE;
4595 			else {
4596 				DRMACH_PR("UNCLAIM failed for bnum=%d\n",
4597 				    bp->bnum);
4598 				DRMACH_PR("calling sc_probe_board: bnum=%d\n",
4599 				    bp->bnum);
4600 				scc = sc_probe_board(bp->bnum);
4601 				if (scc == NULL) {
4602 					cmn_err(CE_WARN,
4603 					"sc_probe_board failed for bnum=%d",
4604 					    bp->bnum);
4605 				} else {
4606 					if (DRMACH_BNUM2SLOT(bp->bnum) == 0) {
4607 						mutex_enter(
4608 						    &drmach_slice_table_lock);
4609 						drmach_slice_table_update(bp,
4610 						    0);
4611 						mutex_exit(
4612 						    &drmach_slice_table_lock);
4613 					}
4614 					drmach_slot1_lpa_set(bp);
4615 				}
4616 			}
4617 		} else {
4618 			bp->connected = 0;
4619 			/*
4620 			 * Now that the board has been successfully detached,
4621 			 * discard platform-specific DIMM serial id information
4622 			 * for the board.
4623 			 */
4624 			if ((DRMACH_BNUM2SLOT(bp->bnum) == 0) &&
4625 			    plat_ecc_capability_sc_get(
4626 			    PLAT_ECC_DIMM_SID_MESSAGE)) {
4627 				(void) plat_discard_mem_sids(
4628 				    DRMACH_BNUM2EXP(bp->bnum));
4629 			}
4630 		}
4631 	}
4632 	kmem_free(obufp, sizeof (dr_mbox_msg_t));
4633 
4634 	return (err);
4635 }
4636 
4637 static int
4638 drmach_get_portid(drmach_node_t *np)
4639 {
4640 	drmach_node_t	pp;
4641 	int		portid;
4642 	char		type[OBP_MAXPROPNAME];
4643 
4644 	if (np->n_getprop(np, "portid", &portid, sizeof (portid)) == 0)
4645 		return (portid);
4646 
4647 	/*
4648 	 * Get the device_type property to see if we should
4649 	 * continue processing this node.
4650 	 */
4651 	if (np->n_getprop(np, "device_type", &type, sizeof (type)) != 0)
4652 		return (-1);
4653 
4654 	/*
4655 	 * If the device is a CPU without a 'portid' property,
4656 	 * it is a CMP core. For such cases, the parent node
4657 	 * has the portid.
4658 	 */
4659 	if (strcmp(type, DRMACH_CPU_NAMEPROP) == 0) {
4660 		if (np->get_parent(np, &pp) != 0)
4661 			return (-1);
4662 
4663 		if (pp.n_getprop(&pp, "portid", &portid, sizeof (portid)) == 0)
4664 			return (portid);
4665 	}
4666 
4667 	return (-1);
4668 }
4669 
4670 /*
4671  * This is a helper function to determine if a given
4672  * node should be considered for a dr operation according
4673  * to predefined dr type nodes and the node's name.
4674  * Formal Parameter : The name of a device node.
4675  * Return Value: -1, name does not map to a valid dr type.
4676  *		 A value greater or equal to 0, name is a valid dr type.
4677  */
4678 static int
4679 drmach_name2type_idx(char *name)
4680 {
4681 	int 	index, ntypes;
4682 
4683 	if (name == NULL)
4684 		return (-1);
4685 
4686 	/*
4687 	 * Determine how many possible types are currently supported
4688 	 * for dr.
4689 	 */
4690 	ntypes = sizeof (drmach_name2type) / sizeof (drmach_name2type[0]);
4691 
4692 	/* Determine if the node's name correspond to a predefined type. */
4693 	for (index = 0; index < ntypes; index++) {
4694 		if (strcmp(drmach_name2type[index].name, name) == 0)
4695 			/* The node is an allowed type for dr. */
4696 			return (index);
4697 	}
4698 
4699 	/*
4700 	 * If the name of the node does not map to any of the
4701 	 * types in the array drmach_name2type then the node is not of
4702 	 * interest to dr.
4703 	 */
4704 	return (-1);
4705 }
4706 
4707 static int
4708 drmach_board_find_devices_cb(drmach_node_walk_args_t *args)
4709 {
4710 	drmach_node_t			*node = args->node;
4711 	drmach_board_cb_data_t		*data = args->data;
4712 	drmach_board_t			*obj = data->obj;
4713 
4714 	int		rv, portid;
4715 	drmachid_t	id;
4716 	drmach_device_t	*device;
4717 	char	name[OBP_MAXDRVNAME];
4718 
4719 	portid = drmach_get_portid(node);
4720 	if (portid == -1) {
4721 		/*
4722 		 * if the node does not have a portid property, then
4723 		 * by that information alone it is known that drmach
4724 		 * is not interested in it.
4725 		 */
4726 		return (0);
4727 	}
4728 	rv = node->n_getprop(node, "name", name, OBP_MAXDRVNAME);
4729 
4730 	/* The node must have a name */
4731 	if (rv)
4732 		return (0);
4733 
4734 	/*
4735 	 * Ignore devices whose portid do not map to this board,
4736 	 * or that their name property is not mapped to a valid
4737 	 * dr device name.
4738 	 */
4739 	if ((drmach_portid2bnum(portid) != obj->bnum) ||
4740 	    (drmach_name2type_idx(name) < 0))
4741 		return (0);
4742 
4743 	/*
4744 	 * Create a device data structure from this node data.
4745 	 * The call may yield nothing if the node is not of interest
4746 	 * to drmach.
4747 	 */
4748 	data->err = drmach_device_new(node, obj, portid, &id);
4749 	if (data->err)
4750 		return (-1);
4751 	else if (!id) {
4752 		/*
4753 		 * drmach_device_new examined the node we passed in
4754 		 * and determined that it was either one not of
4755 		 * interest to drmach or the PIM dr layer.
4756 		 * So, it is skipped.
4757 		 */
4758 		return (0);
4759 	}
4760 
4761 	rv = drmach_array_set(obj->devices, data->ndevs++, id);
4762 	if (rv) {
4763 		data->err = DRMACH_INTERNAL_ERROR();
4764 		return (-1);
4765 	}
4766 
4767 	device = id;
4768 
4769 #ifdef DEBUG
4770 	DRMACH_PR("%d %s %d %p\n", portid, device->type, device->unum, id);
4771 	if (DRMACH_IS_IO_ID(id))
4772 		DRMACH_PR("ndevs = %d dip/node = %p", data->ndevs, node->here);
4773 #endif
4774 
4775 	data->err = (*data->found)(data->a, device->type, device->unum, id);
4776 	return (data->err == NULL ? 0 : -1);
4777 }
4778 
4779 sbd_error_t *
4780 drmach_board_find_devices(drmachid_t id, void *a,
4781 	sbd_error_t *(*found)(void *a, const char *, int, drmachid_t))
4782 {
4783 	drmach_board_t		*bp = (drmach_board_t *)id;
4784 	sbd_error_t		*err;
4785 	int			 max_devices;
4786 	int			 rv;
4787 	drmach_board_cb_data_t	data;
4788 
4789 	if (!DRMACH_IS_BOARD_ID(id))
4790 		return (drerr_new(0, ESTC_INAPPROP, NULL));
4791 
4792 	max_devices  = plat_max_cpu_units_per_board();
4793 	max_devices += plat_max_mem_units_per_board();
4794 	max_devices += plat_max_io_units_per_board();
4795 
4796 	bp->devices = drmach_array_new(0, max_devices);
4797 
4798 	if (bp->tree == NULL)
4799 		bp->tree = drmach_node_new();
4800 
4801 	data.obj = bp;
4802 	data.ndevs = 0;
4803 	data.found = found;
4804 	data.a = a;
4805 	data.err = NULL;
4806 
4807 	mutex_enter(&drmach_slice_table_lock);
4808 	mutex_enter(&drmach_bus_sync_lock);
4809 
4810 	rv = drmach_node_walk(bp->tree, &data, drmach_board_find_devices_cb);
4811 
4812 	drmach_slice_table_update(bp, 0);
4813 	drmach_bus_sync_list_update();
4814 
4815 	mutex_exit(&drmach_bus_sync_lock);
4816 	mutex_exit(&drmach_slice_table_lock);
4817 
4818 	if (rv == 0) {
4819 		err = NULL;
4820 		drmach_slot1_lpa_set(bp);
4821 	} else {
4822 		drmach_array_dispose(bp->devices, drmach_device_dispose);
4823 		bp->devices = NULL;
4824 
4825 		if (data.err)
4826 			err = data.err;
4827 		else
4828 			err = DRMACH_INTERNAL_ERROR();
4829 	}
4830 
4831 	return (err);
4832 }
4833 
4834 int
4835 drmach_board_lookup(int bnum, drmachid_t *id)
4836 {
4837 	int	rv = 0;
4838 
4839 	if (!drmach_initialized && drmach_init() == -1) {
4840 		*id = 0;
4841 		return (-1);
4842 	}
4843 	rw_enter(&drmach_boards_rwlock, RW_WRITER);
4844 	if (drmach_array_get(drmach_boards, bnum, id)) {
4845 		*id = 0;
4846 		rv = -1;
4847 	} else {
4848 		caddr_t		obufp;
4849 		dr_showboard_t	shb;
4850 		sbd_error_t	*err = NULL;
4851 		drmach_board_t	*bp;
4852 
4853 		bp = *id;
4854 
4855 		if (bp)
4856 			rw_downgrade(&drmach_boards_rwlock);
4857 
4858 		obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
4859 		err = drmach_mbox_trans(DRMSG_SHOWBOARD, bnum, obufp,
4860 		    sizeof (dr_proto_hdr_t), (caddr_t)&shb,
4861 		    sizeof (dr_showboard_t));
4862 		kmem_free(obufp, sizeof (dr_proto_hdr_t));
4863 
4864 		if (err) {
4865 			if (err->e_code == ESTC_UNAVAILABLE) {
4866 				*id = 0;
4867 				rv = -1;
4868 			}
4869 			sbd_err_clear(&err);
4870 		} else {
4871 			if (!bp)
4872 				bp = *id  = (drmachid_t)drmach_board_new(bnum);
4873 			bp->connected = (shb.bd_assigned && shb.bd_active);
4874 			bp->empty = shb.slot_empty;
4875 
4876 			switch (shb.test_status) {
4877 				case DR_TEST_STATUS_UNKNOWN:
4878 				case DR_TEST_STATUS_IPOST:
4879 				case DR_TEST_STATUS_ABORTED:
4880 					bp->cond = SBD_COND_UNKNOWN;
4881 					break;
4882 				case DR_TEST_STATUS_PASSED:
4883 					bp->cond = SBD_COND_OK;
4884 					break;
4885 				case DR_TEST_STATUS_FAILED:
4886 					bp->cond = SBD_COND_FAILED;
4887 					break;
4888 				default:
4889 					bp->cond = SBD_COND_UNKNOWN;
4890 				DRMACH_PR("Unknown test status=0x%x from SC\n",
4891 				    shb.test_status);
4892 					break;
4893 			}
4894 			(void) strncpy(bp->type, shb.board_type,
4895 			    sizeof (bp->type));
4896 			bp->assigned = shb.bd_assigned;
4897 			bp->powered = shb.power_on;
4898 		}
4899 	}
4900 	rw_exit(&drmach_boards_rwlock);
4901 	return (rv);
4902 }
4903 
4904 sbd_error_t *
4905 drmach_board_name(int bnum, char *buf, int buflen)
4906 {
4907 	(void) snprintf(buf, buflen, "%s%d", DRMACH_BNUM2SLOT(bnum) ?
4908 	    "IO" : "SB", DRMACH_BNUM2EXP(bnum));
4909 
4910 	return (NULL);
4911 }
4912 
4913 sbd_error_t *
4914 drmach_board_poweroff(drmachid_t id)
4915 {
4916 	drmach_board_t	*bp;
4917 	sbd_error_t	*err;
4918 	drmach_status_t	 stat;
4919 
4920 	if (!DRMACH_IS_BOARD_ID(id))
4921 		return (drerr_new(0, ESTC_INAPPROP, NULL));
4922 	bp = id;
4923 
4924 	err = drmach_board_status(id, &stat);
4925 	if (!err) {
4926 		if (stat.configured || stat.busy)
4927 			err = drerr_new(0, ESTC_CONFIGBUSY, bp->cm.name);
4928 		else {
4929 			caddr_t	obufp;
4930 
4931 			obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
4932 			err = drmach_mbox_trans(DRMSG_POWEROFF, bp->bnum, obufp,
4933 			    sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
4934 			kmem_free(obufp, sizeof (dr_proto_hdr_t));
4935 			if (!err)
4936 				bp->powered = 0;
4937 		}
4938 	}
4939 	return (err);
4940 }
4941 
4942 sbd_error_t *
4943 drmach_board_poweron(drmachid_t id)
4944 {
4945 	drmach_board_t	*bp;
4946 	caddr_t		obufp;
4947 	sbd_error_t	*err;
4948 
4949 	if (!DRMACH_IS_BOARD_ID(id))
4950 		return (drerr_new(0, ESTC_INAPPROP, NULL));
4951 	bp = id;
4952 
4953 	obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
4954 	err = drmach_mbox_trans(DRMSG_POWERON, bp->bnum, obufp,
4955 	    sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
4956 	if (!err)
4957 		bp->powered = 1;
4958 
4959 	kmem_free(obufp, sizeof (dr_proto_hdr_t));
4960 
4961 	return (err);
4962 }
4963 
4964 static sbd_error_t *
4965 drmach_board_release(drmachid_t id)
4966 {
4967 	if (!DRMACH_IS_BOARD_ID(id))
4968 		return (drerr_new(0, ESTC_INAPPROP, NULL));
4969 	return (NULL);
4970 }
4971 
4972 sbd_error_t *
4973 drmach_board_test(drmachid_t id, drmach_opts_t *opts, int force)
4974 {
4975 	drmach_board_t		*bp;
4976 	drmach_device_t		*dp[MAX_CORES_PER_CMP];
4977 	dr_mbox_msg_t		*obufp;
4978 	sbd_error_t		*err;
4979 	dr_testboard_reply_t	tbr;
4980 	int			cpylen;
4981 	char			*copts;
4982 	int			is_io;
4983 	cpu_flag_t		oflags[MAX_CORES_PER_CMP];
4984 
4985 	if (!DRMACH_IS_BOARD_ID(id))
4986 		return (drerr_new(0, ESTC_INAPPROP, NULL));
4987 	bp = id;
4988 
4989 	/*
4990 	 * If the board is an I/O or MAXCAT board, setup I/O cage for
4991 	 * testing. Slot 1 indicates I/O or MAXCAT board.
4992 	 */
4993 
4994 	is_io = DRMACH_BNUM2SLOT(bp->bnum);
4995 
4996 	obufp = kmem_zalloc(sizeof (dr_mbox_msg_t), KM_SLEEP);
4997 
4998 	if (force)
4999 		obufp->msgdata.dm_tb.force = 1;
5000 
5001 	obufp->msgdata.dm_tb.immediate = 1;
5002 
5003 	if ((opts->size > 0) && ((copts = opts->copts) != NULL)) {
5004 		cpylen = (opts->size > DR_HPOPTLEN ? DR_HPOPTLEN : opts->size);
5005 		bcopy(copts, obufp->msgdata.dm_tb.hpost_opts, cpylen);
5006 	}
5007 
5008 	if (is_io) {
5009 		err = drmach_iocage_setup(&obufp->msgdata.dm_tb, dp, oflags);
5010 
5011 		if (err) {
5012 			kmem_free(obufp, sizeof (dr_mbox_msg_t));
5013 			return (err);
5014 		}
5015 	}
5016 
5017 	err = drmach_mbox_trans(DRMSG_TESTBOARD, bp->bnum, (caddr_t)obufp,
5018 	    sizeof (dr_mbox_msg_t), (caddr_t)&tbr, sizeof (tbr));
5019 
5020 	if (!err)
5021 		bp->cond = SBD_COND_OK;
5022 	else
5023 		bp->cond = SBD_COND_UNKNOWN;
5024 
5025 	if ((!err) && (tbr.test_status != DR_TEST_STATUS_PASSED)) {
5026 		/* examine test status */
5027 		switch (tbr.test_status) {
5028 			case DR_TEST_STATUS_IPOST:
5029 				bp->cond = SBD_COND_UNKNOWN;
5030 				err = drerr_new(0, ESTC_TEST_IN_PROGRESS, NULL);
5031 				break;
5032 			case DR_TEST_STATUS_UNKNOWN:
5033 				bp->cond = SBD_COND_UNKNOWN;
5034 				err = drerr_new(1,
5035 				    ESTC_TEST_STATUS_UNKNOWN, NULL);
5036 				break;
5037 			case DR_TEST_STATUS_FAILED:
5038 				bp->cond = SBD_COND_FAILED;
5039 				err = drerr_new(1, ESTC_TEST_FAILED, NULL);
5040 				break;
5041 			case DR_TEST_STATUS_ABORTED:
5042 				bp->cond = SBD_COND_UNKNOWN;
5043 				err = drerr_new(1, ESTC_TEST_ABORTED, NULL);
5044 				break;
5045 			default:
5046 				bp->cond = SBD_COND_UNKNOWN;
5047 				err = drerr_new(1, ESTC_TEST_RESULT_UNKNOWN,
5048 				    NULL);
5049 				break;
5050 		}
5051 	}
5052 
5053 	/*
5054 	 * If I/O cage test was performed, check for availability of the
5055 	 * cpu used.  If cpu has been returned, it's OK to proceed with
5056 	 * reconfiguring it for use.
5057 	 */
5058 	if (is_io) {
5059 		DRMACH_PR("drmach_board_test: tbr.cpu_recovered: %d",
5060 		    tbr.cpu_recovered);
5061 		DRMACH_PR("drmach_board_test: port id: %d",
5062 		    tbr.cpu_portid);
5063 
5064 		/*
5065 		 * Check the cpu_recovered flag in the testboard reply, or
5066 		 * if the testboard request message was not sent to SMS due
5067 		 * to an mboxsc_putmsg() failure, it's OK to recover the
5068 		 * cpu since hpost hasn't touched it.
5069 		 */
5070 		if ((tbr.cpu_recovered && tbr.cpu_portid ==
5071 		    obufp->msgdata.dm_tb.cpu_portid) ||
5072 		    ((err) && (err->e_code == ESTC_MBXRQST))) {
5073 
5074 			int i;
5075 
5076 			mutex_enter(&cpu_lock);
5077 			for (i = 0; i < MAX_CORES_PER_CMP; i++) {
5078 				if (dp[i] != NULL) {
5079 					(void) drmach_iocage_cpu_return(dp[i],
5080 					    oflags[i]);
5081 				}
5082 			}
5083 			mutex_exit(&cpu_lock);
5084 		} else {
5085 			cmn_err(CE_WARN, "Unable to recover port id %d "
5086 			    "after I/O cage test: cpu_recovered=%d, "
5087 			    "returned portid=%d",
5088 			    obufp->msgdata.dm_tb.cpu_portid,
5089 			    tbr.cpu_recovered, tbr.cpu_portid);
5090 		}
5091 		(void) drmach_iocage_mem_return(&tbr);
5092 	}
5093 	kmem_free(obufp, sizeof (dr_mbox_msg_t));
5094 
5095 	return (err);
5096 }
5097 
5098 sbd_error_t *
5099 drmach_board_unassign(drmachid_t id)
5100 {
5101 	drmach_board_t	*bp;
5102 	sbd_error_t	*err;
5103 	drmach_status_t	 stat;
5104 	caddr_t		obufp;
5105 
5106 	rw_enter(&drmach_boards_rwlock, RW_WRITER);
5107 
5108 	if (!DRMACH_IS_BOARD_ID(id)) {
5109 		rw_exit(&drmach_boards_rwlock);
5110 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5111 	}
5112 	bp = id;
5113 
5114 	err = drmach_board_status(id, &stat);
5115 	if (err) {
5116 		rw_exit(&drmach_boards_rwlock);
5117 		return (err);
5118 	}
5119 
5120 	if (stat.configured || stat.busy) {
5121 		err = drerr_new(0, ESTC_CONFIGBUSY, bp->cm.name);
5122 	} else {
5123 
5124 		obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
5125 		err = drmach_mbox_trans(DRMSG_UNASSIGN, bp->bnum, obufp,
5126 		    sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
5127 		kmem_free(obufp, sizeof (dr_proto_hdr_t));
5128 		if (!err) {
5129 			if (drmach_array_set(drmach_boards, bp->bnum, 0) != 0)
5130 				err = DRMACH_INTERNAL_ERROR();
5131 			else
5132 				drmach_board_dispose(bp);
5133 		}
5134 	}
5135 	rw_exit(&drmach_boards_rwlock);
5136 	return (err);
5137 }
5138 
5139 static sbd_error_t *
5140 drmach_read_reg_addr(drmach_device_t *dp, uint64_t *p)
5141 {
5142 	int		len;
5143 	drmach_reg_t	reg;
5144 	drmach_node_t	pp;
5145 	drmach_node_t	*np = dp->node;
5146 
5147 	/*
5148 	 * If the node does not have a portid property,
5149 	 * it represents a CMP device. For a CMP, the reg
5150 	 * property of the parent holds the information of
5151 	 * interest.
5152 	 */
5153 	if (dp->node->n_getproplen(dp->node, "portid", &len) != 0) {
5154 
5155 		if (dp->node->get_parent(dp->node, &pp) != 0) {
5156 			return (DRMACH_INTERNAL_ERROR());
5157 		}
5158 		np = &pp;
5159 	}
5160 
5161 	if (np->n_getproplen(np, "reg", &len) != 0)
5162 		return (DRMACH_INTERNAL_ERROR());
5163 
5164 	if (len != sizeof (reg))
5165 		return (DRMACH_INTERNAL_ERROR());
5166 
5167 	if (np->n_getprop(np, "reg", &reg, sizeof (reg)) != 0)
5168 		return (DRMACH_INTERNAL_ERROR());
5169 
5170 	/* reassemble 64-bit base address */
5171 	*p = ((uint64_t)reg.reg_addr_hi << 32) | reg.reg_addr_lo;
5172 
5173 	return (NULL);
5174 }
5175 
5176 static void
5177 drmach_cpu_read(uint64_t arg1, uint64_t arg2)
5178 {
5179 	uint64_t	*saf_config_reg = (uint64_t *)arg1;
5180 	uint_t		*reg_read = (uint_t *)arg2;
5181 
5182 	*saf_config_reg = lddsafconfig();
5183 	*reg_read = 0x1;
5184 }
5185 
5186 /*
5187  * A return value of 1 indicates success and 0 indicates a failure
5188  */
5189 static int
5190 drmach_cpu_read_scr(drmach_cpu_t *cp, uint64_t *scr)
5191 {
5192 
5193 	int 	rv = 0x0;
5194 
5195 	*scr = 0x0;
5196 
5197 	/*
5198 	 * Confirm cpu was in ready set when xc was issued.
5199 	 * This is done by verifying rv which is
5200 	 * set to 0x1 when xc_one is successful.
5201 	 */
5202 	xc_one(cp->dev.portid, (xcfunc_t *)drmach_cpu_read,
5203 	    (uint64_t)scr, (uint64_t)&rv);
5204 
5205 	return (rv);
5206 
5207 }
5208 
5209 static sbd_error_t *
5210 drmach_cpu_read_cpuid(drmach_cpu_t *cp, processorid_t *cpuid)
5211 {
5212 	drmach_node_t	*np;
5213 
5214 	np = cp->dev.node;
5215 
5216 	/*
5217 	 * If a CPU does not have a portid property, it must
5218 	 * be a CMP device with a cpuid property.
5219 	 */
5220 	if (np->n_getprop(np, "portid", cpuid, sizeof (*cpuid)) != 0) {
5221 
5222 		if (np->n_getprop(np, "cpuid", cpuid, sizeof (*cpuid)) != 0) {
5223 			return (DRMACH_INTERNAL_ERROR());
5224 		}
5225 	}
5226 
5227 	return (NULL);
5228 }
5229 
5230 /* Starcat CMP core id is bit 2 of the cpuid */
5231 #define	DRMACH_COREID_MASK	(1u << 2)
5232 #define	DRMACH_CPUID2SRAM_IDX(id) \
5233 		((id & DRMACH_COREID_MASK) >> 1 | (id & 0x1))
5234 
5235 static sbd_error_t *
5236 drmach_cpu_new(drmach_device_t *proto, drmachid_t *idp)
5237 {
5238 	sbd_error_t	*err;
5239 	uint64_t	scr_pa;
5240 	drmach_cpu_t	*cp = NULL;
5241 	pfn_t		pfn;
5242 	uint64_t	cpu_stardrb_offset, cpu_sram_pa;
5243 	int		idx;
5244 	int		impl;
5245 	processorid_t	cpuid;
5246 
5247 	err = drmach_read_reg_addr(proto, &scr_pa);
5248 	if (err) {
5249 		goto fail;
5250 	}
5251 
5252 	cp = kmem_zalloc(sizeof (drmach_cpu_t), KM_SLEEP);
5253 	bcopy(proto, &cp->dev, sizeof (cp->dev));
5254 	cp->dev.node = drmach_node_dup(proto->node);
5255 	cp->dev.cm.isa = (void *)drmach_cpu_new;
5256 	cp->dev.cm.dispose = drmach_cpu_dispose;
5257 	cp->dev.cm.release = drmach_cpu_release;
5258 	cp->dev.cm.status = drmach_cpu_status;
5259 	cp->scr_pa = scr_pa;
5260 
5261 	err = drmach_cpu_read_cpuid(cp, &cpuid);
5262 	if (err) {
5263 		goto fail;
5264 	}
5265 
5266 	err = drmach_cpu_get_impl(cp, &impl);
5267 	if (err) {
5268 		goto fail;
5269 	}
5270 
5271 	cp->cpuid = cpuid;
5272 	cp->coreid = STARCAT_CPUID_TO_COREID(cp->cpuid);
5273 	cp->dev.unum = STARCAT_CPUID_TO_AGENT(cp->cpuid);
5274 
5275 	/*
5276 	 * Init the board cpu type.  Assumes all board cpus are the same type.
5277 	 */
5278 	if (cp->dev.bp->cpu_impl == 0) {
5279 		cp->dev.bp->cpu_impl = impl;
5280 	}
5281 	ASSERT(cp->dev.bp->cpu_impl == impl);
5282 
5283 	/*
5284 	 * XXX CHEETAH SUPPORT
5285 	 * determine if the domain uses Cheetah procs
5286 	 */
5287 	if (drmach_is_cheetah < 0) {
5288 		drmach_is_cheetah = IS_CHEETAH(impl);
5289 	}
5290 
5291 	/*
5292 	 * Initialize TTE for mapping CPU SRAM STARDRB buffer.
5293 	 * The STARDRB buffer (16KB on Cheetah+ boards, 32KB on
5294 	 * Jaguar/Panther boards) is shared by all cpus in a Safari port
5295 	 * pair. Each cpu uses 8KB according to the following layout:
5296 	 *
5297 	 * Page 0:	even numbered Cheetah+'s and Panther/Jaguar core 0's
5298 	 * Page 1:	odd numbered Cheetah+'s and Panther/Jaguar core 0's
5299 	 * Page 2:	even numbered Panther/Jaguar core 1's
5300 	 * Page 3:	odd numbered Panther/Jaguar core 1's
5301 	 */
5302 	idx = DRMACH_CPUID2SRAM_IDX(cp->cpuid);
5303 	cpu_stardrb_offset = cp->dev.bp->stardrb_offset + (PAGESIZE * idx);
5304 	cpu_sram_pa = DRMACH_CPU_SRAM_ADDR + cpu_stardrb_offset;
5305 	pfn = cpu_sram_pa >> PAGESHIFT;
5306 
5307 	ASSERT(drmach_cpu_sram_tte[cp->cpuid].tte_inthi == 0 &&
5308 	    drmach_cpu_sram_tte[cp->cpuid].tte_intlo == 0);
5309 	drmach_cpu_sram_tte[cp->cpuid].tte_inthi = TTE_PFN_INTHI(pfn) |
5310 	    TTE_VALID_INT | TTE_SZ_INT(TTE8K);
5311 	drmach_cpu_sram_tte[cp->cpuid].tte_intlo = TTE_PFN_INTLO(pfn) |
5312 	    TTE_HWWR_INT | TTE_PRIV_INT | TTE_LCK_INT;
5313 
5314 	DRMACH_PR("drmach_cpu_new: cpuid=%d, coreid=%d, stardrb_offset=0x%lx, "
5315 	    "cpu_sram_offset=0x%lx, idx=%d\n", cp->cpuid, cp->coreid,
5316 	    cp->dev.bp->stardrb_offset, cpu_stardrb_offset, idx);
5317 
5318 	(void) snprintf(cp->dev.cm.name, sizeof (cp->dev.cm.name), "%s%d",
5319 	    cp->dev.type, cp->dev.unum);
5320 
5321 	*idp = (drmachid_t)cp;
5322 	return (NULL);
5323 
5324 fail:
5325 	if (cp) {
5326 		drmach_node_dispose(cp->dev.node);
5327 		kmem_free(cp, sizeof (*cp));
5328 	}
5329 
5330 	*idp = (drmachid_t)0;
5331 	return (err);
5332 }
5333 
5334 static void
5335 drmach_cpu_dispose(drmachid_t id)
5336 {
5337 	drmach_cpu_t	*self;
5338 	processorid_t	cpuid;
5339 
5340 	ASSERT(DRMACH_IS_CPU_ID(id));
5341 
5342 	self = id;
5343 	if (self->dev.node)
5344 		drmach_node_dispose(self->dev.node);
5345 
5346 	cpuid = self->cpuid;
5347 	ASSERT(TTE_IS_VALID(&drmach_cpu_sram_tte[cpuid]) &&
5348 	    TTE_IS_8K(&drmach_cpu_sram_tte[cpuid]) &&
5349 	    TTE_IS_PRIVILEGED(&drmach_cpu_sram_tte[cpuid]) &&
5350 	    TTE_IS_LOCKED(&drmach_cpu_sram_tte[cpuid]));
5351 	drmach_cpu_sram_tte[cpuid].tte_inthi = 0;
5352 	drmach_cpu_sram_tte[cpuid].tte_intlo = 0;
5353 
5354 	kmem_free(self, sizeof (*self));
5355 }
5356 
5357 static int
5358 drmach_cpu_start(struct cpu *cp)
5359 {
5360 	extern xcfunc_t	drmach_set_lpa;
5361 	extern void	restart_other_cpu(int);
5362 	int		cpuid = cp->cpu_id;
5363 	int		rv, bnum;
5364 	drmach_board_t	*bp;
5365 
5366 	ASSERT(MUTEX_HELD(&cpu_lock));
5367 	ASSERT(cpunodes[cpuid].nodeid != (pnode_t)0);
5368 
5369 	cp->cpu_flags &= ~CPU_POWEROFF;
5370 
5371 	/*
5372 	 * NOTE: restart_other_cpu pauses cpus during the
5373 	 *	 slave cpu start.  This helps to quiesce the
5374 	 *	 bus traffic a bit which makes the tick sync
5375 	 *	 routine in the prom more robust.
5376 	 */
5377 	DRMACH_PR("COLD START for cpu (%d)\n", cpuid);
5378 
5379 	if (prom_hotaddcpu(cpuid) != 0) {
5380 		cmn_err(CE_PANIC, "prom_hotaddcpu() for cpuid=%d failed.",
5381 		    cpuid);
5382 	}
5383 
5384 	restart_other_cpu(cpuid);
5385 
5386 	bnum = drmach_portid2bnum(cpunodes[cpuid].portid);
5387 	rv = drmach_array_get(drmach_boards, bnum, (drmachid_t)&bp);
5388 	if (rv == -1 || bp == NULL) {
5389 		DRMACH_PR("drmach_cpu_start: cannot read board info for "
5390 		    "cpuid=%d: rv=%d, bp=%p\n", cpuid, rv, (void *)bp);
5391 	} else if (DRMACH_L1_SET_LPA(bp) && drmach_reprogram_lpa) {
5392 		int exp;
5393 		int ntries;
5394 
5395 		mutex_enter(&drmach_xt_mb_lock);
5396 		mutex_enter(&drmach_slice_table_lock);
5397 		bzero((void *)drmach_xt_mb, drmach_xt_mb_size);
5398 
5399 		/*
5400 		 * drmach_slice_table[*]
5401 		 *	bit 5	valid
5402 		 *	bit 0:4	slice number
5403 		 *
5404 		 * drmach_xt_mb[*] format for drmach_set_lpa
5405 		 *	bit 7	valid
5406 		 *	bit 6	set null LPA (overrides bits 0:4)
5407 		 *	bit 0:4	slice number
5408 		 *
5409 		 * drmach_set_lpa derives processor CBASE and CBND
5410 		 * from bits 6 and 0:4 of drmach_xt_mb.  If bit 6 is
5411 		 * set, then CBASE = CBND = 0. Otherwise, CBASE = slice
5412 		 * number; CBND = slice number + 1.
5413 		 * No action is taken if bit 7 is zero.
5414 		 */
5415 		exp = (cpuid >> 5) & 0x1f;
5416 		if (drmach_slice_table[exp] & 0x20) {
5417 			drmach_xt_mb[cpuid] = 0x80 |
5418 			    (drmach_slice_table[exp] & 0x1f);
5419 		} else {
5420 			drmach_xt_mb[cpuid] = 0x80 | 0x40;
5421 		}
5422 
5423 		drmach_xt_ready = 0;
5424 
5425 		xt_one(cpuid, drmach_set_lpa, NULL, NULL);
5426 
5427 		ntries = drmach_cpu_ntries;
5428 		while (!drmach_xt_ready && ntries) {
5429 			DELAY(drmach_cpu_delay);
5430 			ntries--;
5431 		}
5432 
5433 		mutex_exit(&drmach_slice_table_lock);
5434 		mutex_exit(&drmach_xt_mb_lock);
5435 
5436 		DRMACH_PR(
5437 		    "waited %d out of %d tries for drmach_set_lpa on cpu%d",
5438 		    drmach_cpu_ntries - ntries, drmach_cpu_ntries,
5439 		    cp->cpu_id);
5440 	}
5441 
5442 	xt_one(cpuid, vtag_flushpage_tl1, (uint64_t)drmach_cpu_sram_va,
5443 	    (uint64_t)ksfmmup);
5444 
5445 	return (0);
5446 }
5447 
5448 /*
5449  * A detaching CPU is xcalled with an xtrap to drmach_cpu_stop_self() after
5450  * it has been offlined. The function of this routine is to get the cpu
5451  * spinning in a safe place. The requirement is that the system will not
5452  * reference anything on the detaching board (memory and i/o is detached
5453  * elsewhere) and that the CPU not reference anything on any other board
5454  * in the system.  This isolation is required during and after the writes
5455  * to the domain masks to remove the board from the domain.
5456  *
5457  * To accomplish this isolation the following is done:
5458  *	1) Create a locked mapping to the STARDRB data buffer located
5459  *	   in this cpu's sram. There is one TTE per cpu, initialized in
5460  *	   drmach_cpu_new(). The cpuid is used to select which TTE to use.
5461  *	   Each Safari port pair shares the CPU SRAM on a Serengeti CPU/MEM
5462  *	   board. The STARDRB buffer is 16KB on Cheetah+ boards, 32KB on Jaguar
5463  *	   boards. Each STARDRB buffer is logically divided by DR into one
5464  *	   8KB page per cpu (or Jaguar core).
5465  *	2) Copy the target function (drmach_shutdown_asm) into buffer.
5466  *	3) Jump to function now in the cpu sram.
5467  *	   Function will:
5468  *	   3.1) Flush its Ecache (displacement).
5469  *	   3.2) Flush its Dcache with HW mechanism.
5470  *	   3.3) Flush its Icache with HW mechanism.
5471  *	   3.4) Flush all valid and _unlocked_ D-TLB and I-TLB entries.
5472  *	   3.5) Set LPA to NULL
5473  *	   3.6) Clear xt_mb to signal completion. Note: cache line is
5474  *	        recovered by drmach_cpu_poweroff().
5475  *	4) Jump into an infinite loop.
5476  */
5477 
5478 static void
5479 drmach_cpu_stop_self(void)
5480 {
5481 	extern void drmach_shutdown_asm(uint64_t, uint64_t, int, int, uint64_t);
5482 	extern void drmach_shutdown_asm_end(void);
5483 
5484 	tte_t		*tte;
5485 	uint_t		*p, *q;
5486 	uint64_t	 stack_pointer;
5487 
5488 	ASSERT(((ptrdiff_t)drmach_shutdown_asm_end -
5489 	    (ptrdiff_t)drmach_shutdown_asm) < PAGESIZE);
5490 
5491 	tte = &drmach_cpu_sram_tte[CPU->cpu_id];
5492 	ASSERT(TTE_IS_VALID(tte) && TTE_IS_8K(tte) && TTE_IS_PRIVILEGED(tte) &&
5493 	    TTE_IS_LOCKED(tte));
5494 	sfmmu_dtlb_ld_kva(drmach_cpu_sram_va, tte);
5495 	sfmmu_itlb_ld_kva(drmach_cpu_sram_va, tte);
5496 
5497 	/* copy text. standard bcopy not designed to work in nc space */
5498 	p = (uint_t *)drmach_cpu_sram_va;
5499 	q = (uint_t *)drmach_shutdown_asm;
5500 	while (q < (uint_t *)drmach_shutdown_asm_end)
5501 		*p++ = *q++;
5502 
5503 	/* zero to assist debug */
5504 	q = (uint_t *)(drmach_cpu_sram_va + PAGESIZE);
5505 	while (p < q)
5506 		*p++ = 0;
5507 
5508 	/* a parking spot for the stack pointer */
5509 	stack_pointer = (uint64_t)q;
5510 
5511 	/* call copy of drmach_shutdown_asm */
5512 	(*(void (*)())drmach_cpu_sram_va)(
5513 	    stack_pointer,
5514 	    drmach_iocage_paddr,
5515 	    cpunodes[CPU->cpu_id].ecache_size,
5516 	    cpunodes[CPU->cpu_id].ecache_linesize,
5517 	    va_to_pa((void *)&drmach_xt_mb[CPU->cpu_id]));
5518 }
5519 
5520 static void
5521 drmach_cpu_shutdown_self(void)
5522 {
5523 	cpu_t		*cp = CPU;
5524 	int		cpuid = cp->cpu_id;
5525 	extern void	flush_windows(void);
5526 
5527 	flush_windows();
5528 
5529 	(void) spl8();
5530 
5531 	ASSERT(cp->cpu_intr_actv == 0);
5532 	ASSERT(cp->cpu_thread == cp->cpu_idle_thread ||
5533 	    cp->cpu_thread == cp->cpu_startup_thread);
5534 
5535 	cp->cpu_flags = CPU_OFFLINE | CPU_QUIESCED | CPU_POWEROFF;
5536 
5537 	drmach_cpu_stop_self();
5538 
5539 	cmn_err(CE_PANIC, "CPU %d FAILED TO SHUTDOWN", cpuid);
5540 }
5541 
5542 static sbd_error_t *
5543 drmach_cpu_release(drmachid_t id)
5544 {
5545 	drmach_cpu_t	*cp;
5546 	struct cpu	*cpu;
5547 	sbd_error_t	*err;
5548 
5549 	if (!DRMACH_IS_CPU_ID(id))
5550 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5551 	cp = id;
5552 
5553 	ASSERT(MUTEX_HELD(&cpu_lock));
5554 
5555 	cpu = cpu_get(cp->cpuid);
5556 	if (cpu == NULL)
5557 		err = DRMACH_INTERNAL_ERROR();
5558 	else
5559 		err = NULL;
5560 
5561 	return (err);
5562 }
5563 
5564 static sbd_error_t *
5565 drmach_cpu_status(drmachid_t id, drmach_status_t *stat)
5566 {
5567 	drmach_cpu_t	*cp;
5568 	drmach_device_t	*dp;
5569 
5570 	ASSERT(DRMACH_IS_CPU_ID(id));
5571 	cp = id;
5572 	dp = &cp->dev;
5573 
5574 	stat->assigned = dp->bp->assigned;
5575 	stat->powered = dp->bp->powered;
5576 	mutex_enter(&cpu_lock);
5577 	stat->configured = (cpu_get(cp->cpuid) != NULL);
5578 	mutex_exit(&cpu_lock);
5579 	stat->busy = dp->busy;
5580 	(void) strncpy(stat->type, dp->type, sizeof (stat->type));
5581 	stat->info[0] = '\0';
5582 
5583 	return (NULL);
5584 }
5585 
5586 sbd_error_t *
5587 drmach_cpu_disconnect(drmachid_t id)
5588 {
5589 	if (!DRMACH_IS_CPU_ID(id))
5590 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5591 
5592 	return (NULL);
5593 }
5594 
5595 sbd_error_t *
5596 drmach_cpu_get_id(drmachid_t id, processorid_t *cpuid)
5597 {
5598 	drmach_cpu_t	*cpu;
5599 
5600 	if (!DRMACH_IS_CPU_ID(id))
5601 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5602 	cpu = id;
5603 
5604 	*cpuid = cpu->cpuid;
5605 	return (NULL);
5606 }
5607 
5608 sbd_error_t *
5609 drmach_cpu_get_impl(drmachid_t id, int *ip)
5610 {
5611 	drmach_node_t	*np;
5612 	int		impl;
5613 
5614 	if (!DRMACH_IS_CPU_ID(id))
5615 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5616 
5617 	np = ((drmach_device_t *)id)->node;
5618 
5619 	if (np->n_getprop(np, "implementation#", &impl, sizeof (impl)) == -1) {
5620 		return (DRMACH_INTERNAL_ERROR());
5621 	}
5622 
5623 	*ip = impl;
5624 
5625 	return (NULL);
5626 }
5627 
5628 /*
5629  * Flush this cpu's ecache, then ensure all outstanding safari
5630  * transactions have retired.
5631  */
5632 void
5633 drmach_cpu_flush_ecache_sync(void)
5634 {
5635 	uint64_t *p;
5636 
5637 	ASSERT(curthread->t_bound_cpu == CPU);
5638 
5639 	cpu_flush_ecache();
5640 
5641 	mutex_enter(&drmach_bus_sync_lock);
5642 	for (p = drmach_bus_sync_list; *p; p++)
5643 		(void) ldphys(*p);
5644 	mutex_exit(&drmach_bus_sync_lock);
5645 
5646 	cpu_flush_ecache();
5647 }
5648 
5649 sbd_error_t *
5650 drmach_get_dip(drmachid_t id, dev_info_t **dip)
5651 {
5652 	drmach_device_t	*dp;
5653 
5654 	if (!DRMACH_IS_DEVICE_ID(id))
5655 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5656 	dp = id;
5657 
5658 	*dip = dp->node->n_getdip(dp->node);
5659 	return (NULL);
5660 }
5661 
5662 sbd_error_t *
5663 drmach_io_is_attached(drmachid_t id, int *yes)
5664 {
5665 	drmach_device_t *dp;
5666 	dev_info_t	*dip;
5667 	int state;
5668 
5669 	if (!DRMACH_IS_IO_ID(id))
5670 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5671 	dp = id;
5672 
5673 	dip = dp->node->n_getdip(dp->node);
5674 	if (dip == NULL) {
5675 		*yes = 0;
5676 		return (NULL);
5677 	}
5678 
5679 	state = ddi_get_devstate(dip);
5680 	*yes = i_ddi_devi_attached(dip) || (state == DDI_DEVSTATE_UP);
5681 
5682 	return (NULL);
5683 }
5684 
5685 static int
5686 drmach_dip_is_schizo_xmits_0_pci_b(dev_info_t *dip)
5687 {
5688 	char			dtype[OBP_MAXPROPNAME];
5689 	int			portid;
5690 	uint_t			pci_csr_base;
5691 	struct pci_phys_spec	*regbuf = NULL;
5692 	int			rv, len;
5693 
5694 	ASSERT(dip != NULL);
5695 	rv = ddi_getproplen(DDI_DEV_T_ANY, dip, 0, "device_type", &len);
5696 	if ((rv != DDI_PROP_SUCCESS) || (len > sizeof (dtype)))
5697 		return (0);
5698 
5699 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, 0, "device_type",
5700 	    (caddr_t)dtype, &len) == DDI_PROP_SUCCESS) {
5701 
5702 		if (strncmp(dtype, "pci", 3) == 0) {
5703 
5704 			/*
5705 			 * Get safari portid. All schizo/xmits 0
5706 			 * safari IDs end in 0x1C.
5707 			 */
5708 			rv = ddi_getproplen(DDI_DEV_T_ANY, dip, 0, "portid",
5709 			    &len);
5710 
5711 			if ((rv != DDI_PROP_SUCCESS) ||
5712 			    (len > sizeof (portid)))
5713 				return (0);
5714 
5715 			rv = ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, 0,
5716 			    "portid", (caddr_t)&portid, &len);
5717 
5718 			if (rv != DDI_PROP_SUCCESS)
5719 				return (0);
5720 
5721 			if ((portid & 0x1F) != 0x1C)
5722 				return (0);
5723 
5724 			if (ddi_getlongprop(DDI_DEV_T_ANY, dip,
5725 			    DDI_PROP_DONTPASS, "reg", (caddr_t)&regbuf,
5726 			    &len) == DDI_PROP_SUCCESS) {
5727 
5728 				pci_csr_base = regbuf[0].pci_phys_mid &
5729 				    PCI_CONF_ADDR_MASK;
5730 				kmem_free(regbuf, len);
5731 				/*
5732 				 * All PCI B-Leafs are at configspace 0x70.0000.
5733 				 */
5734 				if (pci_csr_base == 0x700000)
5735 					return (1);
5736 			}
5737 		}
5738 	}
5739 	return (0);
5740 }
5741 
5742 #define	SCHIZO_BINDING_NAME		"pci108e,8001"
5743 #define	XMITS_BINDING_NAME		"pci108e,8002"
5744 
5745 /*
5746  * Verify if the dip is an instance of MAN 'eri'.
5747  */
5748 static int
5749 drmach_dip_is_man_eri(dev_info_t *dip)
5750 {
5751 	struct pci_phys_spec	*regbuf = NULL;
5752 	dev_info_t		*parent_dip;
5753 	char			*name;
5754 	uint_t			pci_device;
5755 	uint_t			pci_function;
5756 	int			len;
5757 
5758 	if (dip == NULL)
5759 		return (0);
5760 	/*
5761 	 * Verify if the parent is schizo(xmits)0 and pci B leaf.
5762 	 */
5763 	if (((parent_dip = ddi_get_parent(dip)) == NULL) ||
5764 	    ((name = ddi_binding_name(parent_dip)) == NULL))
5765 		return (0);
5766 	if (strcmp(name, SCHIZO_BINDING_NAME) != 0) {
5767 		/*
5768 		 * This RIO could be on XMITS, so get the dip to
5769 		 * XMITS PCI Leaf.
5770 		 */
5771 		if ((parent_dip = ddi_get_parent(parent_dip)) == NULL)
5772 			return (0);
5773 		if (((name = ddi_binding_name(parent_dip)) == NULL) ||
5774 		    (strcmp(name, XMITS_BINDING_NAME) != 0)) {
5775 			return (0);
5776 		}
5777 	}
5778 	if (!drmach_dip_is_schizo_xmits_0_pci_b(parent_dip))
5779 		return (0);
5780 	/*
5781 	 * Finally make sure it is the MAN eri.
5782 	 */
5783 	if (ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
5784 	    "reg", (caddr_t)&regbuf, &len) == DDI_PROP_SUCCESS) {
5785 
5786 		pci_device = PCI_REG_DEV_G(regbuf->pci_phys_hi);
5787 		pci_function = PCI_REG_FUNC_G(regbuf->pci_phys_hi);
5788 		kmem_free(regbuf, len);
5789 
5790 		/*
5791 		 * The network function of the RIO ASIC will always be
5792 		 * device 3 and function 1 ("network@3,1").
5793 		 */
5794 		if ((pci_device == 3) && (pci_function == 1))
5795 			return (1);
5796 	}
5797 	return (0);
5798 }
5799 
5800 typedef struct {
5801 	int		iosram_inst;
5802 	dev_info_t	*eri_dip;
5803 	int		bnum;
5804 } drmach_io_inst_t;
5805 
5806 int
5807 drmach_board_find_io_insts(dev_info_t *dip, void *args)
5808 {
5809 	drmach_io_inst_t	*ios = (drmach_io_inst_t *)args;
5810 
5811 	int	rv;
5812 	int	len;
5813 	int	portid;
5814 	char	name[OBP_MAXDRVNAME];
5815 
5816 	rv = ddi_getproplen(DDI_DEV_T_ANY, dip, 0, "portid", &len);
5817 
5818 	if ((rv != DDI_PROP_SUCCESS) || (len > sizeof (portid))) {
5819 		return (DDI_WALK_CONTINUE);
5820 	}
5821 
5822 	rv = ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, 0,
5823 	    "portid", (caddr_t)&portid, &len);
5824 	if (rv != DDI_PROP_SUCCESS)
5825 		return (DDI_WALK_CONTINUE);
5826 
5827 	/* ignore devices that are not on this board */
5828 	if (drmach_portid2bnum(portid) != ios->bnum)
5829 		return (DDI_WALK_CONTINUE);
5830 
5831 	if ((ios->iosram_inst < 0) || (ios->eri_dip == NULL)) {
5832 		rv = ddi_getproplen(DDI_DEV_T_ANY, dip, 0, "name", &len);
5833 		if (rv == DDI_PROP_SUCCESS) {
5834 
5835 			rv = ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
5836 			    0, "name",
5837 			    (caddr_t)name, &len);
5838 			if (rv != DDI_PROP_SUCCESS)
5839 				return (DDI_WALK_CONTINUE);
5840 
5841 			if (strncmp("iosram", name, 6) == 0) {
5842 				ios->iosram_inst = ddi_get_instance(dip);
5843 				if (ios->eri_dip == NULL)
5844 					return (DDI_WALK_CONTINUE);
5845 				else
5846 					return (DDI_WALK_TERMINATE);
5847 			} else {
5848 				if (drmach_dip_is_man_eri(dip)) {
5849 					ASSERT(ios->eri_dip == NULL);
5850 					ndi_hold_devi(dip);
5851 					ios->eri_dip = dip;
5852 					if (ios->iosram_inst < 0)
5853 						return (DDI_WALK_CONTINUE);
5854 					else
5855 						return (DDI_WALK_TERMINATE);
5856 				}
5857 			}
5858 		}
5859 	}
5860 	return (DDI_WALK_CONTINUE);
5861 }
5862 
5863 sbd_error_t *
5864 drmach_io_pre_release(drmachid_t id)
5865 {
5866 	drmach_io_inst_t	ios;
5867 	drmach_board_t		*bp;
5868 	int			rv = 0;
5869 	sbd_error_t		*err = NULL;
5870 	drmach_device_t		*dp;
5871 	dev_info_t		*rdip;
5872 	int			circ;
5873 
5874 	if (!DRMACH_IS_IO_ID(id))
5875 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5876 	dp = id;
5877 	bp = dp->bp;
5878 
5879 	rdip = dp->node->n_getdip(dp->node);
5880 
5881 	/* walk device tree to find iosram instance for the board */
5882 	ios.iosram_inst = -1;
5883 	ios.eri_dip = NULL;
5884 	ios.bnum = bp->bnum;
5885 
5886 	ndi_devi_enter(rdip, &circ);
5887 	ddi_walk_devs(ddi_get_child(rdip), drmach_board_find_io_insts,
5888 	    (void *)&ios);
5889 
5890 	DRMACH_PR("drmach_io_pre_release: bnum=%d iosram=%d eri=0x%p\n",
5891 	    ios.bnum, ios.iosram_inst, (void *)ios.eri_dip);
5892 	ndi_devi_exit(rdip, circ);
5893 
5894 	if (ios.eri_dip) {
5895 		/*
5896 		 * Release hold acquired in drmach_board_find_io_insts()
5897 		 */
5898 		ndi_rele_devi(ios.eri_dip);
5899 	}
5900 	if (ios.iosram_inst >= 0) {
5901 		/* call for tunnel switch */
5902 		do {
5903 			DRMACH_PR("calling iosram_switchfrom(%d)\n",
5904 			    ios.iosram_inst);
5905 			rv = iosram_switchfrom(ios.iosram_inst);
5906 			if (rv)
5907 				DRMACH_PR("iosram_switchfrom returned %d\n",
5908 				    rv);
5909 		} while (rv == EAGAIN);
5910 
5911 		if (rv)
5912 			err = drerr_new(0, ESTC_IOSWITCH, NULL);
5913 	}
5914 	return (err);
5915 }
5916 
5917 sbd_error_t *
5918 drmach_io_unrelease(drmachid_t id)
5919 {
5920 	dev_info_t	*dip;
5921 	sbd_error_t	*err = NULL;
5922 	drmach_device_t	*dp;
5923 
5924 	if (!DRMACH_IS_IO_ID(id))
5925 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5926 	dp = id;
5927 
5928 	dip = dp->node->n_getdip(dp->node);
5929 
5930 	if (dip == NULL)
5931 		err = DRMACH_INTERNAL_ERROR();
5932 	else {
5933 		int (*func)(dev_info_t *dip);
5934 
5935 		func = (int (*)(dev_info_t *))kobj_getsymvalue("man_dr_attach",
5936 		    0);
5937 
5938 		if (func) {
5939 			drmach_io_inst_t ios;
5940 			dev_info_t	*pdip;
5941 			int		circ;
5942 
5943 			/*
5944 			 * Walk device tree to find rio dip for the board
5945 			 * Since we are not interested in iosram instance here,
5946 			 * initialize it to 0, so that the walk terminates as
5947 			 * soon as eri dip is found.
5948 			 */
5949 			ios.iosram_inst = 0;
5950 			ios.eri_dip = NULL;
5951 			ios.bnum = dp->bp->bnum;
5952 
5953 			if (pdip = ddi_get_parent(dip)) {
5954 				ndi_hold_devi(pdip);
5955 				ndi_devi_enter(pdip, &circ);
5956 			}
5957 			/*
5958 			 * Root node doesn't have to be held in any way.
5959 			 */
5960 			ddi_walk_devs(dip, drmach_board_find_io_insts,
5961 			    (void *)&ios);
5962 
5963 			if (pdip) {
5964 				ndi_devi_exit(pdip, circ);
5965 				ndi_rele_devi(pdip);
5966 			}
5967 
5968 			DRMACH_PR("drmach_io_unrelease: bnum=%d eri=0x%p\n",
5969 			    ios.bnum, (void *)ios.eri_dip);
5970 
5971 			if (ios.eri_dip) {
5972 				DRMACH_PR("calling man_dr_attach\n");
5973 				if ((*func)(ios.eri_dip))
5974 					err = drerr_new(0, ESTC_NWSWITCH, NULL);
5975 				/*
5976 				 * Release hold acquired in
5977 				 * drmach_board_find_io_insts()
5978 				 */
5979 				ndi_rele_devi(ios.eri_dip);
5980 			}
5981 		} else
5982 			DRMACH_PR("man_dr_attach NOT present\n");
5983 	}
5984 	return (err);
5985 }
5986 
5987 static sbd_error_t *
5988 drmach_io_release(drmachid_t id)
5989 {
5990 	dev_info_t	*dip;
5991 	sbd_error_t	*err = NULL;
5992 	drmach_device_t	*dp;
5993 
5994 	if (!DRMACH_IS_IO_ID(id))
5995 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5996 	dp = id;
5997 
5998 	dip = dp->node->n_getdip(dp->node);
5999 
6000 	if (dip == NULL)
6001 		err = DRMACH_INTERNAL_ERROR();
6002 	else {
6003 		int (*func)(dev_info_t *dip);
6004 
6005 		func = (int (*)(dev_info_t *))kobj_getsymvalue("man_dr_detach",
6006 		    0);
6007 
6008 		if (func) {
6009 			drmach_io_inst_t ios;
6010 			dev_info_t	*pdip;
6011 			int		circ;
6012 
6013 			/*
6014 			 * Walk device tree to find rio dip for the board
6015 			 * Since we are not interested in iosram instance here,
6016 			 * initialize it to 0, so that the walk terminates as
6017 			 * soon as eri dip is found.
6018 			 */
6019 			ios.iosram_inst = 0;
6020 			ios.eri_dip = NULL;
6021 			ios.bnum = dp->bp->bnum;
6022 
6023 			if (pdip = ddi_get_parent(dip)) {
6024 				ndi_hold_devi(pdip);
6025 				ndi_devi_enter(pdip, &circ);
6026 			}
6027 			/*
6028 			 * Root node doesn't have to be held in any way.
6029 			 */
6030 			ddi_walk_devs(dip, drmach_board_find_io_insts,
6031 			    (void *)&ios);
6032 
6033 			if (pdip) {
6034 				ndi_devi_exit(pdip, circ);
6035 				ndi_rele_devi(pdip);
6036 			}
6037 
6038 			DRMACH_PR("drmach_io_release: bnum=%d eri=0x%p\n",
6039 			    ios.bnum, (void *)ios.eri_dip);
6040 
6041 			if (ios.eri_dip) {
6042 				DRMACH_PR("calling man_dr_detach\n");
6043 				if ((*func)(ios.eri_dip))
6044 					err = drerr_new(0, ESTC_NWSWITCH, NULL);
6045 				/*
6046 				 * Release hold acquired in
6047 				 * drmach_board_find_io_insts()
6048 				 */
6049 				ndi_rele_devi(ios.eri_dip);
6050 			}
6051 		} else
6052 			DRMACH_PR("man_dr_detach NOT present\n");
6053 	}
6054 	return (err);
6055 }
6056 
6057 sbd_error_t *
6058 drmach_io_post_release(drmachid_t id)
6059 {
6060 	char 		*path;
6061 	dev_info_t	*rdip;
6062 	drmach_device_t	*dp;
6063 
6064 	if (!DRMACH_IS_DEVICE_ID(id))
6065 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6066 	dp = id;
6067 
6068 	rdip = dp->node->n_getdip(dp->node);
6069 
6070 	/*
6071 	 * Always called after drmach_unconfigure() which on Starcat
6072 	 * unconfigures the branch but doesn't remove it so the
6073 	 * dip must always exist.
6074 	 */
6075 	ASSERT(rdip);
6076 
6077 	ASSERT(e_ddi_branch_held(rdip));
6078 #ifdef DEBUG
6079 	path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6080 	(void) ddi_pathname(rdip, path);
6081 	DRMACH_PR("post_release dip path is: %s\n", path);
6082 	kmem_free(path, MAXPATHLEN);
6083 #endif
6084 
6085 	if (strcmp(dp->type, DRMACH_DEVTYPE_PCI) == 0) {
6086 		if (schpc_remove_pci(rdip)) {
6087 			DRMACH_PR("schpc_remove_pci failed\n");
6088 			return (drerr_new(0, ESBD_OFFLINE, NULL));
6089 		} else {
6090 			DRMACH_PR("schpc_remove_pci succeeded\n");
6091 		}
6092 	}
6093 
6094 	return (NULL);
6095 }
6096 
6097 sbd_error_t *
6098 drmach_io_post_attach(drmachid_t id)
6099 {
6100 	int		circ;
6101 	dev_info_t	*dip;
6102 	dev_info_t	*pdip;
6103 	drmach_device_t	*dp;
6104 	drmach_io_inst_t ios;
6105 
6106 	if (!DRMACH_IS_DEVICE_ID(id))
6107 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6108 	dp = id;
6109 
6110 	dip = dp->node->n_getdip(dp->node);
6111 
6112 	/*
6113 	 * We held the branch rooted at dip earlier, so at a minimum the
6114 	 * root i.e. dip must be present in the device tree.
6115 	 */
6116 	ASSERT(dip);
6117 
6118 	if (strcmp(dp->type, DRMACH_DEVTYPE_PCI) == 0) {
6119 		if (schpc_add_pci(dip)) {
6120 			DRMACH_PR("schpc_add_pci failed\n");
6121 		} else {
6122 			DRMACH_PR("schpc_add_pci succeeded\n");
6123 		}
6124 	}
6125 
6126 	/*
6127 	 * Walk device tree to find rio dip for the board
6128 	 * Since we are not interested in iosram instance here,
6129 	 * initialize it to 0, so that the walk terminates as
6130 	 * soon as eri dip is found.
6131 	 */
6132 	ios.iosram_inst = 0;
6133 	ios.eri_dip = NULL;
6134 	ios.bnum = dp->bp->bnum;
6135 
6136 	if (pdip = ddi_get_parent(dip)) {
6137 		ndi_hold_devi(pdip);
6138 		ndi_devi_enter(pdip, &circ);
6139 	}
6140 	/*
6141 	 * Root node doesn't have to be held in any way.
6142 	 */
6143 	ddi_walk_devs(dip, drmach_board_find_io_insts, (void *)&ios);
6144 	if (pdip) {
6145 		ndi_devi_exit(pdip, circ);
6146 		ndi_rele_devi(pdip);
6147 	}
6148 
6149 	DRMACH_PR("drmach_io_post_attach: bnum=%d eri=0x%p\n",
6150 	    ios.bnum, (void *)ios.eri_dip);
6151 
6152 	if (ios.eri_dip) {
6153 		int (*func)(dev_info_t *dip);
6154 
6155 		func =
6156 		    (int (*)(dev_info_t *))kobj_getsymvalue("man_dr_attach", 0);
6157 
6158 		if (func) {
6159 			DRMACH_PR("calling man_dr_attach\n");
6160 			(void) (*func)(ios.eri_dip);
6161 		} else {
6162 			DRMACH_PR("man_dr_attach NOT present\n");
6163 		}
6164 
6165 		/*
6166 		 * Release hold acquired in drmach_board_find_io_insts()
6167 		 */
6168 		ndi_rele_devi(ios.eri_dip);
6169 
6170 	}
6171 
6172 	return (NULL);
6173 }
6174 
6175 static sbd_error_t *
6176 drmach_io_status(drmachid_t id, drmach_status_t *stat)
6177 {
6178 	drmach_device_t *dp;
6179 	sbd_error_t	*err;
6180 	int		 configured;
6181 
6182 	ASSERT(DRMACH_IS_IO_ID(id));
6183 	dp = id;
6184 
6185 	err = drmach_io_is_attached(id, &configured);
6186 	if (err)
6187 		return (err);
6188 
6189 	stat->assigned = dp->bp->assigned;
6190 	stat->powered = dp->bp->powered;
6191 	stat->configured = (configured != 0);
6192 	stat->busy = dp->busy;
6193 	(void) strncpy(stat->type, dp->type, sizeof (stat->type));
6194 	stat->info[0] = '\0';
6195 
6196 	return (NULL);
6197 }
6198 
6199 sbd_error_t *
6200 drmach_mem_init_size(drmachid_t id)
6201 {
6202 	drmach_mem_t	*mp;
6203 	sbd_error_t	*err;
6204 	gdcd_t		*gdcd;
6205 	mem_chunk_t	*chunk;
6206 	uint64_t	 chunks, pa, mask, sz;
6207 
6208 	if (!DRMACH_IS_MEM_ID(id))
6209 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6210 	mp = id;
6211 
6212 	err = drmach_mem_get_base_physaddr(id, &pa);
6213 	if (err)
6214 		return (err);
6215 
6216 	mask = ~ (DRMACH_MEM_SLICE_SIZE - 1);
6217 	pa &= mask;
6218 
6219 	gdcd = drmach_gdcd_new();
6220 	if (gdcd == NULL)
6221 		return (DRMACH_INTERNAL_ERROR());
6222 
6223 	sz = 0;
6224 	chunk = gdcd->dcd_chunk_list.dcl_chunk;
6225 	chunks = gdcd->dcd_chunk_list.dcl_chunks;
6226 	while (chunks-- != 0) {
6227 		if ((chunk->mc_base_pa & mask) == pa) {
6228 			sz += chunk->mc_mbytes * 1048576;
6229 		}
6230 
6231 		++chunk;
6232 	}
6233 	mp->nbytes = sz;
6234 
6235 	drmach_gdcd_dispose(gdcd);
6236 	return (NULL);
6237 }
6238 
6239 /*
6240  * Hardware registers are organized into consecutively
6241  * addressed registers.  The reg property's hi and lo fields
6242  * together describe the base address of the register set for
6243  * this memory-controller.  Register descriptions and offsets
6244  * (from the base address) are as follows:
6245  *
6246  * Description				Offset	Size (bytes)
6247  * Memory Timing Control Register I	0x00	8
6248  * Memory Timing Control Register II	0x08	8
6249  * Memory Address Decoding Register I	0x10	8
6250  * Memory Address Decoding Register II	0x18	8
6251  * Memory Address Decoding Register III	0x20	8
6252  * Memory Address Decoding Register IV	0x28	8
6253  * Memory Address Control Register	0x30	8
6254  * Memory Timing Control Register III	0x38	8
6255  * Memory Timing Control Register IV	0x40	8
6256  * Memory Timing Control Register V  	0x48	8 (Jaguar, Panther only)
6257  * EMU Activity Status Register		0x50	8 (Panther only)
6258  *
6259  * Only the Memory Address Decoding Register and EMU Activity Status
6260  * Register addresses are needed for DRMACH.
6261  */
6262 static sbd_error_t *
6263 drmach_mem_new(drmach_device_t *proto, drmachid_t *idp)
6264 {
6265 	sbd_error_t	*err;
6266 	uint64_t	 madr_pa;
6267 	drmach_mem_t	*mp;
6268 	int		 bank, count;
6269 
6270 	err = drmach_read_reg_addr(proto, &madr_pa);
6271 	if (err)
6272 		return (err);
6273 
6274 	mp = kmem_zalloc(sizeof (drmach_mem_t), KM_SLEEP);
6275 	bcopy(proto, &mp->dev, sizeof (mp->dev));
6276 	mp->dev.node = drmach_node_dup(proto->node);
6277 	mp->dev.cm.isa = (void *)drmach_mem_new;
6278 	mp->dev.cm.dispose = drmach_mem_dispose;
6279 	mp->dev.cm.release = drmach_mem_release;
6280 	mp->dev.cm.status = drmach_mem_status;
6281 	mp->madr_pa = madr_pa;
6282 
6283 	(void) snprintf(mp->dev.cm.name,
6284 	    sizeof (mp->dev.cm.name), "%s", mp->dev.type);
6285 
6286 	for (count = bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
6287 		uint64_t madr;
6288 
6289 		drmach_mem_read_madr(mp, bank, &madr);
6290 		if (madr & DRMACH_MC_VALID_MASK) {
6291 			count += 1;
6292 			break;
6293 		}
6294 	}
6295 
6296 	/*
6297 	 * If none of the banks had their valid bit set, that means
6298 	 * post did not configure this MC to participate in the
6299 	 * domain.  So, pretend this node does not exist by returning
6300 	 * a drmachid of zero.
6301 	 */
6302 	if (count == 0) {
6303 		/* drmach_mem_dispose frees board mem list */
6304 		drmach_node_dispose(mp->dev.node);
6305 		kmem_free(mp, sizeof (*mp));
6306 		*idp = (drmachid_t)0;
6307 		return (NULL);
6308 	}
6309 
6310 	/*
6311 	 * Only one mem unit per board is exposed to the
6312 	 * PIM layer.  The first mem unit encountered during
6313 	 * tree walk is used to represent all mem units on
6314 	 * the same board.
6315 	 */
6316 	if (mp->dev.bp->mem == NULL) {
6317 		/* start list of mem units on this board */
6318 		mp->dev.bp->mem = mp;
6319 
6320 		/*
6321 		 * force unum to zero since this is the only mem unit
6322 		 * that will be visible to the PIM layer.
6323 		 */
6324 		mp->dev.unum = 0;
6325 
6326 		/*
6327 		 * board memory size kept in this mem unit only
6328 		 */
6329 		err = drmach_mem_init_size(mp);
6330 		if (err) {
6331 			mp->dev.bp->mem = NULL;
6332 			/* drmach_mem_dispose frees board mem list */
6333 			drmach_node_dispose(mp->dev.node);
6334 			kmem_free(mp, sizeof (*mp));
6335 			*idp = (drmachid_t)0;
6336 			return (NULL);
6337 		}
6338 
6339 		/*
6340 		 * allow this instance (the first encountered on this board)
6341 		 * to be visible to the PIM layer.
6342 		 */
6343 		*idp = (drmachid_t)mp;
6344 	} else {
6345 		drmach_mem_t *lp;
6346 
6347 		/* hide this mem instance behind the first. */
6348 		for (lp = mp->dev.bp->mem; lp->next; lp = lp->next)
6349 			;
6350 		lp->next = mp;
6351 
6352 		/*
6353 		 * hide this instance from the caller.
6354 		 * See drmach_board_find_devices_cb() for details.
6355 		 */
6356 		*idp = (drmachid_t)0;
6357 	}
6358 
6359 	return (NULL);
6360 }
6361 
6362 static void
6363 drmach_mem_dispose(drmachid_t id)
6364 {
6365 	drmach_mem_t *mp, *next;
6366 	drmach_board_t *bp;
6367 
6368 	ASSERT(DRMACH_IS_MEM_ID(id));
6369 
6370 	mutex_enter(&drmach_bus_sync_lock);
6371 
6372 	mp = id;
6373 	bp = mp->dev.bp;
6374 
6375 	do {
6376 		if (mp->dev.node)
6377 			drmach_node_dispose(mp->dev.node);
6378 
6379 		next = mp->next;
6380 		kmem_free(mp, sizeof (*mp));
6381 		mp = next;
6382 	} while (mp);
6383 
6384 	bp->mem = NULL;
6385 
6386 	drmach_bus_sync_list_update();
6387 	mutex_exit(&drmach_bus_sync_lock);
6388 }
6389 
6390 sbd_error_t *
6391 drmach_mem_add_span(drmachid_t id, uint64_t basepa, uint64_t size)
6392 {
6393 	pfn_t		basepfn = (pfn_t)(basepa >> PAGESHIFT);
6394 	pgcnt_t		npages = (pgcnt_t)(size >> PAGESHIFT);
6395 	int		rv;
6396 
6397 	ASSERT(size != 0);
6398 
6399 	if (!DRMACH_IS_MEM_ID(id))
6400 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6401 
6402 	rv = kcage_range_add(basepfn, npages, KCAGE_DOWN);
6403 	if (rv == ENOMEM) {
6404 		cmn_err(CE_WARN, "%lu megabytes not available"
6405 		    " to kernel cage", size >> 20);
6406 	} else if (rv != 0) {
6407 		/* catch this in debug kernels */
6408 		ASSERT(0);
6409 
6410 		cmn_err(CE_WARN, "unexpected kcage_range_add"
6411 		    " return value %d", rv);
6412 	}
6413 
6414 	return (NULL);
6415 }
6416 
6417 sbd_error_t *
6418 drmach_mem_del_span(drmachid_t id, uint64_t basepa, uint64_t size)
6419 {
6420 	pfn_t		 basepfn = (pfn_t)(basepa >> PAGESHIFT);
6421 	pgcnt_t		 npages = (pgcnt_t)(size >> PAGESHIFT);
6422 	int		 rv;
6423 
6424 	if (!DRMACH_IS_MEM_ID(id))
6425 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6426 
6427 	if (size > 0) {
6428 		rv = kcage_range_delete_post_mem_del(basepfn, npages);
6429 		if (rv != 0) {
6430 			cmn_err(CE_WARN,
6431 			    "unexpected kcage_range_delete_post_mem_del"
6432 			    " return value %d", rv);
6433 			return (DRMACH_INTERNAL_ERROR());
6434 		}
6435 	}
6436 
6437 	return (NULL);
6438 }
6439 
6440 sbd_error_t *
6441 drmach_mem_disable(drmachid_t id)
6442 {
6443 	if (!DRMACH_IS_MEM_ID(id))
6444 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6445 	else
6446 		return (NULL);
6447 }
6448 
6449 sbd_error_t *
6450 drmach_mem_enable(drmachid_t id)
6451 {
6452 	if (!DRMACH_IS_MEM_ID(id))
6453 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6454 	else
6455 		return (NULL);
6456 }
6457 
6458 sbd_error_t *
6459 drmach_mem_get_alignment(drmachid_t id, uint64_t *mask)
6460 {
6461 #define	MB(mb) ((mb) * 1048576ull)
6462 
6463 	static struct {
6464 		uint_t		uk;
6465 		uint64_t	segsz;
6466 	}  uk2segsz[] = {
6467 		{ 0x003,	MB(256)	  },
6468 		{ 0x007,	MB(512)	  },
6469 		{ 0x00f,	MB(1024)  },
6470 		{ 0x01f,	MB(2048)  },
6471 		{ 0x03f,	MB(4096)  },
6472 		{ 0x07f,	MB(8192)  },
6473 		{ 0x0ff,	MB(16384) },
6474 		{ 0x1ff,	MB(32768) },
6475 		{ 0x3ff,	MB(65536) },
6476 		{ 0x7ff,	MB(131072) }
6477 	};
6478 	static int len = sizeof (uk2segsz) / sizeof (uk2segsz[0]);
6479 
6480 #undef MB
6481 
6482 	uint64_t	 largest_sz = 0;
6483 	drmach_mem_t	*mp;
6484 
6485 	if (!DRMACH_IS_MEM_ID(id))
6486 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6487 
6488 	/* prime the result with a default value */
6489 	*mask = (DRMACH_MEM_SLICE_SIZE - 1);
6490 
6491 	for (mp = id; mp; mp = mp->next) {
6492 		int bank;
6493 
6494 		for (bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
6495 			int		i;
6496 			uint_t		uk;
6497 			uint64_t	madr;
6498 
6499 			/* get register value, extract uk and normalize */
6500 			drmach_mem_read_madr(mp, bank, &madr);
6501 
6502 			if (!(madr & DRMACH_MC_VALID_MASK))
6503 				continue;
6504 
6505 			uk = DRMACH_MC_UK(madr);
6506 
6507 			/* match uk value */
6508 			for (i = 0; i < len; i++)
6509 				if (uk == uk2segsz[i].uk)
6510 					break;
6511 
6512 			if (i < len) {
6513 				uint64_t sz = uk2segsz[i].segsz;
6514 
6515 				/*
6516 				 * remember largest segment size,
6517 				 * update mask result
6518 				 */
6519 				if (sz > largest_sz) {
6520 					largest_sz = sz;
6521 					*mask = sz - 1;
6522 				}
6523 			} else {
6524 				/*
6525 				 * uk not in table, punt using
6526 				 * entire slice size. no longer any
6527 				 * reason to check other banks.
6528 				 */
6529 				*mask = (DRMACH_MEM_SLICE_SIZE - 1);
6530 				return (NULL);
6531 			}
6532 		}
6533 	}
6534 
6535 	return (NULL);
6536 }
6537 
6538 sbd_error_t *
6539 drmach_mem_get_base_physaddr(drmachid_t id, uint64_t *base_addr)
6540 {
6541 	drmach_mem_t *mp;
6542 
6543 	if (!DRMACH_IS_MEM_ID(id))
6544 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6545 
6546 	*base_addr = (uint64_t)-1;
6547 	for (mp = id; mp; mp = mp->next) {
6548 		int bank;
6549 
6550 		for (bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
6551 			uint64_t addr, madr;
6552 
6553 			drmach_mem_read_madr(mp, bank, &madr);
6554 			if (madr & DRMACH_MC_VALID_MASK) {
6555 				addr = DRMACH_MC_UM_TO_PA(madr) |
6556 				    DRMACH_MC_LM_TO_PA(madr);
6557 
6558 				if (addr < *base_addr)
6559 					*base_addr = addr;
6560 			}
6561 		}
6562 	}
6563 
6564 	/* should not happen, but ... */
6565 	if (*base_addr == (uint64_t)-1)
6566 		return (DRMACH_INTERNAL_ERROR());
6567 
6568 	return (NULL);
6569 }
6570 
6571 void
6572 drmach_bus_sync_list_update(void)
6573 {
6574 	int		rv, idx, cnt = 0;
6575 	drmachid_t	id;
6576 
6577 	ASSERT(MUTEX_HELD(&drmach_bus_sync_lock));
6578 
6579 	rv = drmach_array_first(drmach_boards, &idx, &id);
6580 	while (rv == 0) {
6581 		drmach_board_t		*bp = id;
6582 		drmach_mem_t		*mp = bp->mem;
6583 
6584 		while (mp) {
6585 			int bank;
6586 
6587 			for (bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
6588 				uint64_t madr;
6589 
6590 				drmach_mem_read_madr(mp, bank, &madr);
6591 				if (madr & DRMACH_MC_VALID_MASK) {
6592 					uint64_t pa;
6593 
6594 					pa  = DRMACH_MC_UM_TO_PA(madr);
6595 					pa |= DRMACH_MC_LM_TO_PA(madr);
6596 
6597 					/*
6598 					 * The list is zero terminated.
6599 					 * Offset the pa by a doubleword
6600 					 * to avoid confusing a pa value of
6601 					 * of zero with the terminator.
6602 					 */
6603 					pa += sizeof (uint64_t);
6604 
6605 					drmach_bus_sync_list[cnt++] = pa;
6606 				}
6607 			}
6608 
6609 			mp = mp->next;
6610 		}
6611 
6612 		rv = drmach_array_next(drmach_boards, &idx, &id);
6613 	}
6614 
6615 	drmach_bus_sync_list[cnt] = 0;
6616 }
6617 
6618 sbd_error_t *
6619 drmach_mem_get_memlist(drmachid_t id, struct memlist **ml)
6620 {
6621 	sbd_error_t	*err;
6622 	struct memlist	*mlist;
6623 	gdcd_t		*gdcd;
6624 	mem_chunk_t	*chunk;
6625 	uint64_t	 chunks, pa, mask;
6626 
6627 	err = drmach_mem_get_base_physaddr(id, &pa);
6628 	if (err)
6629 		return (err);
6630 
6631 	gdcd = drmach_gdcd_new();
6632 	if (gdcd == NULL)
6633 		return (DRMACH_INTERNAL_ERROR());
6634 
6635 	mask = ~ (DRMACH_MEM_SLICE_SIZE - 1);
6636 	pa &= mask;
6637 
6638 	mlist = NULL;
6639 	chunk = gdcd->dcd_chunk_list.dcl_chunk;
6640 	chunks = gdcd->dcd_chunk_list.dcl_chunks;
6641 	while (chunks-- != 0) {
6642 		if ((chunk->mc_base_pa & mask) == pa) {
6643 			mlist = memlist_add_span(mlist, chunk->mc_base_pa,
6644 			    chunk->mc_mbytes * 1048576);
6645 		}
6646 
6647 		++chunk;
6648 	}
6649 
6650 	drmach_gdcd_dispose(gdcd);
6651 
6652 #ifdef DEBUG
6653 	DRMACH_PR("GDCD derived memlist:");
6654 	memlist_dump(mlist);
6655 #endif
6656 
6657 	*ml = mlist;
6658 	return (NULL);
6659 }
6660 
6661 sbd_error_t *
6662 drmach_mem_get_size(drmachid_t id, uint64_t *bytes)
6663 {
6664 	drmach_mem_t	*mp;
6665 
6666 	if (!DRMACH_IS_MEM_ID(id))
6667 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6668 	mp = id;
6669 
6670 	ASSERT(mp->nbytes != 0);
6671 	*bytes = mp->nbytes;
6672 
6673 	return (NULL);
6674 }
6675 
6676 sbd_error_t *
6677 drmach_mem_get_slice_size(drmachid_t id, uint64_t *bytes)
6678 {
6679 	sbd_error_t	*err;
6680 	drmach_device_t	*mp;
6681 
6682 	if (!DRMACH_IS_MEM_ID(id))
6683 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6684 	mp = id;
6685 
6686 	switch (DRMACH_BNUM2SLOT(mp->bp->bnum)) {
6687 		case 0:	*bytes = DRMACH_MEM_USABLE_SLICE_SIZE;
6688 			err = NULL;
6689 			break;
6690 
6691 		case 1: *bytes = 0;
6692 			err = NULL;
6693 			break;
6694 
6695 		default:
6696 			err = DRMACH_INTERNAL_ERROR();
6697 			break;
6698 	}
6699 
6700 	return (err);
6701 }
6702 
6703 processorid_t drmach_mem_cpu_affinity_nail;
6704 
6705 processorid_t
6706 drmach_mem_cpu_affinity(drmachid_t id)
6707 {
6708 	drmach_device_t	*mp;
6709 	drmach_board_t	*bp;
6710 	processorid_t	 cpuid;
6711 
6712 	if (!DRMACH_IS_MEM_ID(id))
6713 		return (CPU_CURRENT);
6714 
6715 	if (drmach_mem_cpu_affinity_nail) {
6716 		cpuid = drmach_mem_cpu_affinity_nail;
6717 
6718 		if (cpuid < 0 || cpuid > NCPU)
6719 			return (CPU_CURRENT);
6720 
6721 		mutex_enter(&cpu_lock);
6722 		if (cpu[cpuid] == NULL || !CPU_ACTIVE(cpu[cpuid]))
6723 			cpuid = CPU_CURRENT;
6724 		mutex_exit(&cpu_lock);
6725 
6726 		return (cpuid);
6727 	}
6728 
6729 	/* try to choose a proc on the target board */
6730 	mp = id;
6731 	bp = mp->bp;
6732 	if (bp->devices) {
6733 		int		 rv;
6734 		int		 d_idx;
6735 		drmachid_t	 d_id;
6736 
6737 		rv = drmach_array_first(bp->devices, &d_idx, &d_id);
6738 		while (rv == 0) {
6739 			if (DRMACH_IS_CPU_ID(d_id)) {
6740 				drmach_cpu_t	*cp = d_id;
6741 
6742 				mutex_enter(&cpu_lock);
6743 				cpuid = cp->cpuid;
6744 				if (cpu[cpuid] && CPU_ACTIVE(cpu[cpuid])) {
6745 					mutex_exit(&cpu_lock);
6746 					return (cpuid);
6747 				} else {
6748 					mutex_exit(&cpu_lock);
6749 				}
6750 			}
6751 
6752 			rv = drmach_array_next(bp->devices, &d_idx, &d_id);
6753 		}
6754 	}
6755 
6756 	/* otherwise, this proc, wherever it is */
6757 	return (CPU_CURRENT);
6758 }
6759 
6760 static sbd_error_t *
6761 drmach_mem_release(drmachid_t id)
6762 {
6763 	if (!DRMACH_IS_MEM_ID(id))
6764 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6765 	return (NULL);
6766 }
6767 
6768 static sbd_error_t *
6769 drmach_mem_status(drmachid_t id, drmach_status_t *stat)
6770 {
6771 	drmach_mem_t	*mp;
6772 	sbd_error_t	*err;
6773 	uint64_t	 pa, slice_size;
6774 	struct memlist	*ml;
6775 
6776 	ASSERT(DRMACH_IS_MEM_ID(id));
6777 	mp = id;
6778 
6779 	/* get starting physical address of target memory */
6780 	err = drmach_mem_get_base_physaddr(id, &pa);
6781 	if (err)
6782 		return (err);
6783 
6784 	/* round down to slice boundary */
6785 	slice_size = DRMACH_MEM_SLICE_SIZE;
6786 	pa &= ~ (slice_size - 1);
6787 
6788 	/* stop at first span that is in slice */
6789 	memlist_read_lock();
6790 	for (ml = phys_install; ml; ml = ml->ml_next)
6791 		if (ml->ml_address >= pa && ml->ml_address < pa + slice_size)
6792 			break;
6793 	memlist_read_unlock();
6794 
6795 	stat->assigned = mp->dev.bp->assigned;
6796 	stat->powered = mp->dev.bp->powered;
6797 	stat->configured = (ml != NULL);
6798 	stat->busy = mp->dev.busy;
6799 	(void) strncpy(stat->type, mp->dev.type, sizeof (stat->type));
6800 	stat->info[0] = '\0';
6801 
6802 	return (NULL);
6803 }
6804 
6805 sbd_error_t *
6806 drmach_board_deprobe(drmachid_t id)
6807 {
6808 	drmach_board_t	*bp;
6809 	sbd_error_t	*err = NULL;
6810 
6811 	if (!DRMACH_IS_BOARD_ID(id))
6812 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6813 	bp = id;
6814 
6815 	if (bp->tree) {
6816 		drmach_node_dispose(bp->tree);
6817 		bp->tree = NULL;
6818 	}
6819 	if (bp->devices) {
6820 		drmach_array_dispose(bp->devices, drmach_device_dispose);
6821 		bp->devices = NULL;
6822 		bp->mem = NULL;  /* TODO: still needed? */
6823 	}
6824 	return (err);
6825 }
6826 
6827 /*ARGSUSED1*/
6828 static sbd_error_t *
6829 drmach_pt_showlpa(drmachid_t id, drmach_opts_t *opts)
6830 {
6831 	drmach_device_t	*dp;
6832 	uint64_t	val;
6833 	int		err = 1;
6834 
6835 	if (DRMACH_IS_CPU_ID(id)) {
6836 		drmach_cpu_t *cp = id;
6837 		if (drmach_cpu_read_scr(cp, &val))
6838 			err = 0;
6839 	} else if (DRMACH_IS_IO_ID(id) && ((drmach_io_t *)id)->scsr_pa != 0) {
6840 		drmach_io_t *io = id;
6841 		val = lddphysio(io->scsr_pa);
6842 		err = 0;
6843 	}
6844 	if (err)
6845 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6846 
6847 	dp = id;
6848 	uprintf("showlpa %s::%s portid %d, base pa %lx, bound pa %lx\n",
6849 	    dp->bp->cm.name,
6850 	    dp->cm.name,
6851 	    dp->portid,
6852 	    (long)(DRMACH_LPA_BASE_TO_PA(val)),
6853 	    (long)(DRMACH_LPA_BND_TO_PA(val)));
6854 
6855 	return (NULL);
6856 }
6857 
6858 /*ARGSUSED*/
6859 static sbd_error_t *
6860 drmach_pt_ikprobe(drmachid_t id, drmach_opts_t *opts)
6861 {
6862 	drmach_board_t		*bp = (drmach_board_t *)id;
6863 	sbd_error_t		*err;
6864 	sc_gptwocfg_cookie_t	scc;
6865 
6866 	if (!DRMACH_IS_BOARD_ID(id))
6867 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6868 
6869 	/* do saf configurator stuff */
6870 	DRMACH_PR("calling sc_probe_board for bnum=%d\n", bp->bnum);
6871 	scc = sc_probe_board(bp->bnum);
6872 	if (scc == NULL) {
6873 		err = drerr_new(0, ESTC_PROBE, bp->cm.name);
6874 		return (err);
6875 	}
6876 
6877 	return (err);
6878 }
6879 
6880 /*ARGSUSED*/
6881 static sbd_error_t *
6882 drmach_pt_ikdeprobe(drmachid_t id, drmach_opts_t *opts)
6883 {
6884 	drmach_board_t	*bp;
6885 	sbd_error_t	*err = NULL;
6886 	sc_gptwocfg_cookie_t	scc;
6887 
6888 	if (!DRMACH_IS_BOARD_ID(id))
6889 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6890 	bp = id;
6891 
6892 	cmn_err(CE_CONT, "DR: in-kernel unprobe board %d\n", bp->bnum);
6893 	scc = sc_unprobe_board(bp->bnum);
6894 	if (scc != NULL) {
6895 		err = drerr_new(0, ESTC_DEPROBE, bp->cm.name);
6896 	}
6897 
6898 	if (err == NULL)
6899 		err = drmach_board_deprobe(id);
6900 
6901 	return (err);
6902 }
6903 
6904 static sbd_error_t *
6905 drmach_pt_readmem(drmachid_t id, drmach_opts_t *opts)
6906 {
6907 	_NOTE(ARGUNUSED(id))
6908 	_NOTE(ARGUNUSED(opts))
6909 
6910 	struct memlist	*ml;
6911 	uint64_t	src_pa;
6912 	uint64_t	dst_pa;
6913 	uint64_t	dst;
6914 
6915 	dst_pa = va_to_pa(&dst);
6916 
6917 	memlist_read_lock();
6918 	for (ml = phys_install; ml; ml = ml->ml_next) {
6919 		uint64_t	nbytes;
6920 
6921 		src_pa = ml->ml_address;
6922 		nbytes = ml->ml_size;
6923 
6924 		while (nbytes != 0ull) {
6925 
6926 			/* copy 32 bytes at src_pa to dst_pa */
6927 			bcopy32_il(src_pa, dst_pa);
6928 
6929 			/* increment by 32 bytes */
6930 			src_pa += (4 * sizeof (uint64_t));
6931 
6932 			/* decrement by 32 bytes */
6933 			nbytes -= (4 * sizeof (uint64_t));
6934 		}
6935 	}
6936 	memlist_read_unlock();
6937 
6938 	return (NULL);
6939 }
6940 
6941 static sbd_error_t *
6942 drmach_pt_recovercpu(drmachid_t id, drmach_opts_t *opts)
6943 {
6944 	_NOTE(ARGUNUSED(opts))
6945 
6946 	drmach_cpu_t	*cp;
6947 
6948 	if (!DRMACH_IS_CPU_ID(id))
6949 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6950 	cp = id;
6951 
6952 	mutex_enter(&cpu_lock);
6953 	(void) drmach_iocage_cpu_return(&(cp->dev),
6954 	    CPU_ENABLE | CPU_EXISTS | CPU_READY | CPU_RUNNING);
6955 	mutex_exit(&cpu_lock);
6956 
6957 	return (NULL);
6958 }
6959 
6960 /*
6961  * Starcat DR passthrus are for debugging purposes only.
6962  */
6963 static struct {
6964 	const char	*name;
6965 	sbd_error_t	*(*handler)(drmachid_t id, drmach_opts_t *opts);
6966 } drmach_pt_arr[] = {
6967 	{ "showlpa",		drmach_pt_showlpa		},
6968 	{ "ikprobe",		drmach_pt_ikprobe		},
6969 	{ "ikdeprobe",		drmach_pt_ikdeprobe		},
6970 	{ "readmem",		drmach_pt_readmem		},
6971 	{ "recovercpu",		drmach_pt_recovercpu		},
6972 
6973 	/* the following line must always be last */
6974 	{ NULL,			NULL				}
6975 };
6976 
6977 /*ARGSUSED*/
6978 sbd_error_t *
6979 drmach_passthru(drmachid_t id, drmach_opts_t *opts)
6980 {
6981 	int		i;
6982 	sbd_error_t	*err;
6983 
6984 	i = 0;
6985 	while (drmach_pt_arr[i].name != NULL) {
6986 		int len = strlen(drmach_pt_arr[i].name);
6987 
6988 		if (strncmp(drmach_pt_arr[i].name, opts->copts, len) == 0)
6989 			break;
6990 
6991 		i += 1;
6992 	}
6993 
6994 	if (drmach_pt_arr[i].name == NULL)
6995 		err = drerr_new(0, ESTC_UNKPTCMD, opts->copts);
6996 	else
6997 		err = (*drmach_pt_arr[i].handler)(id, opts);
6998 
6999 	return (err);
7000 }
7001 
7002 sbd_error_t *
7003 drmach_release(drmachid_t id)
7004 {
7005 	drmach_common_t *cp;
7006 
7007 	if (!DRMACH_IS_DEVICE_ID(id))
7008 		return (drerr_new(0, ESTC_INAPPROP, NULL));
7009 	cp = id;
7010 
7011 	return (cp->release(id));
7012 }
7013 
7014 sbd_error_t *
7015 drmach_status(drmachid_t id, drmach_status_t *stat)
7016 {
7017 	drmach_common_t *cp;
7018 	sbd_error_t	*err;
7019 
7020 	rw_enter(&drmach_boards_rwlock, RW_READER);
7021 
7022 	if (!DRMACH_IS_ID(id)) {
7023 		rw_exit(&drmach_boards_rwlock);
7024 		return (drerr_new(0, ESTC_NOTID, NULL));
7025 	}
7026 
7027 	cp = id;
7028 
7029 	err = cp->status(id, stat);
7030 	rw_exit(&drmach_boards_rwlock);
7031 	return (err);
7032 }
7033 
7034 static sbd_error_t *
7035 drmach_i_status(drmachid_t id, drmach_status_t *stat)
7036 {
7037 	drmach_common_t *cp;
7038 
7039 	if (!DRMACH_IS_ID(id))
7040 		return (drerr_new(0, ESTC_NOTID, NULL));
7041 	cp = id;
7042 
7043 	return (cp->status(id, stat));
7044 }
7045 
7046 /*ARGSUSED*/
7047 sbd_error_t *
7048 drmach_unconfigure(drmachid_t id, int flags)
7049 {
7050 	drmach_device_t	*dp;
7051 	dev_info_t 	*rdip;
7052 
7053 	char	name[OBP_MAXDRVNAME];
7054 	int rv;
7055 
7056 	/*
7057 	 * Since CPU nodes are not configured, it is
7058 	 * necessary to skip the unconfigure step as
7059 	 * well.
7060 	 */
7061 	if (DRMACH_IS_CPU_ID(id)) {
7062 		return (NULL);
7063 	}
7064 
7065 	for (; id; ) {
7066 		dev_info_t	*fdip = NULL;
7067 
7068 		if (!DRMACH_IS_DEVICE_ID(id))
7069 			return (drerr_new(0, ESTC_INAPPROP, NULL));
7070 		dp = id;
7071 
7072 		rdip = dp->node->n_getdip(dp->node);
7073 
7074 		/*
7075 		 * drmach_unconfigure() is always called on a configured branch.
7076 		 * So the root of the branch was held earlier and must exist.
7077 		 */
7078 		ASSERT(rdip);
7079 
7080 		DRMACH_PR("drmach_unconfigure: unconfiguring DDI branch");
7081 
7082 		rv = dp->node->n_getprop(dp->node,
7083 		    "name", name, OBP_MAXDRVNAME);
7084 
7085 		/* The node must have a name */
7086 		if (rv)
7087 			return (0);
7088 
7089 		if (drmach_name2type_idx(name) < 0) {
7090 			if (DRMACH_IS_MEM_ID(id)) {
7091 				drmach_mem_t	*mp = id;
7092 				id = mp->next;
7093 			} else {
7094 				id = NULL;
7095 			}
7096 			continue;
7097 		}
7098 
7099 		/*
7100 		 * NOTE: FORCE flag is no longer needed under devfs
7101 		 */
7102 		ASSERT(e_ddi_branch_held(rdip));
7103 		if (e_ddi_branch_unconfigure(rdip, &fdip, 0) != 0) {
7104 			sbd_error_t *err = NULL;
7105 			char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
7106 
7107 			/*
7108 			 * If non-NULL, fdip is returned held and must be
7109 			 * released.
7110 			 */
7111 			if (fdip != NULL) {
7112 				(void) ddi_pathname(fdip, path);
7113 				ddi_release_devi(fdip);
7114 			} else {
7115 				(void) ddi_pathname(rdip, path);
7116 			}
7117 
7118 			err = drerr_new(1, ESTC_DRVFAIL, path);
7119 
7120 			kmem_free(path, MAXPATHLEN);
7121 
7122 			/*
7123 			 * If we were unconfiguring an IO board, a call was
7124 			 * made to man_dr_detach.  We now need to call
7125 			 * man_dr_attach to regain man use of the eri.
7126 			 */
7127 			if (DRMACH_IS_IO_ID(id)) {
7128 				int (*func)(dev_info_t *dip);
7129 
7130 				func = (int (*)(dev_info_t *))kobj_getsymvalue\
7131 				    ("man_dr_attach", 0);
7132 
7133 				if (func) {
7134 					drmach_io_inst_t ios;
7135 					dev_info_t 	*pdip;
7136 					int		circ;
7137 
7138 					/*
7139 					 * Walk device tree to find rio dip for
7140 					 * the board
7141 					 * Since we are not interested in iosram
7142 					 * instance here, initialize it to 0, so
7143 					 * that the walk terminates as soon as
7144 					 * eri dip is found.
7145 					 */
7146 					ios.iosram_inst = 0;
7147 					ios.eri_dip = NULL;
7148 					ios.bnum = dp->bp->bnum;
7149 
7150 					if (pdip = ddi_get_parent(rdip)) {
7151 						ndi_hold_devi(pdip);
7152 						ndi_devi_enter(pdip, &circ);
7153 					}
7154 					/*
7155 					 * Root node doesn't have to be held in
7156 					 * any way.
7157 					 */
7158 					ASSERT(e_ddi_branch_held(rdip));
7159 					ddi_walk_devs(rdip,
7160 					    drmach_board_find_io_insts,
7161 					    (void *)&ios);
7162 
7163 					DRMACH_PR("drmach_unconfigure: bnum=%d"
7164 					    " eri=0x%p\n",
7165 					    ios.bnum, (void *)ios.eri_dip);
7166 
7167 					if (pdip) {
7168 						ndi_devi_exit(pdip, circ);
7169 						ndi_rele_devi(pdip);
7170 					}
7171 
7172 					if (ios.eri_dip) {
7173 						DRMACH_PR("calling"
7174 						    " man_dr_attach\n");
7175 						(void) (*func)(ios.eri_dip);
7176 						/*
7177 						 * Release hold acquired in
7178 						 * drmach_board_find_io_insts()
7179 						 */
7180 						ndi_rele_devi(ios.eri_dip);
7181 					}
7182 				}
7183 			}
7184 			return (err);
7185 		}
7186 
7187 		if (DRMACH_IS_MEM_ID(id)) {
7188 			drmach_mem_t	*mp = id;
7189 			id = mp->next;
7190 		} else {
7191 			id = NULL;
7192 		}
7193 	}
7194 
7195 	return (NULL);
7196 }
7197 
7198 /*
7199  * drmach interfaces to legacy Starfire platmod logic
7200  * linkage via runtime symbol look up, called from plat_cpu_power*
7201  */
7202 
7203 /*
7204  * Start up a cpu.  It is possible that we're attempting to restart
7205  * the cpu after an UNCONFIGURE in which case the cpu will be
7206  * spinning in its cache.  So, all we have to do is wakeup him up.
7207  * Under normal circumstances the cpu will be coming from a previous
7208  * CONNECT and thus will be spinning in OBP.  In both cases, the
7209  * startup sequence is the same.
7210  */
7211 int
7212 drmach_cpu_poweron(struct cpu *cp)
7213 {
7214 	DRMACH_PR("drmach_cpu_poweron: starting cpuid %d\n", cp->cpu_id);
7215 
7216 	ASSERT(MUTEX_HELD(&cpu_lock));
7217 
7218 	if (drmach_cpu_start(cp) != 0)
7219 		return (EBUSY);
7220 	else
7221 		return (0);
7222 }
7223 
7224 int
7225 drmach_cpu_poweroff(struct cpu *cp)
7226 {
7227 	int		ntries;
7228 	processorid_t	cpuid;
7229 	void		drmach_cpu_shutdown_self(void);
7230 
7231 	DRMACH_PR("drmach_cpu_poweroff: stopping cpuid %d\n", cp->cpu_id);
7232 
7233 	ASSERT(MUTEX_HELD(&cpu_lock));
7234 
7235 	/*
7236 	 * XXX CHEETAH SUPPORT
7237 	 * for cheetah, we need to grab the iocage lock since iocage
7238 	 * memory is used for e$ flush.
7239 	 */
7240 	if (drmach_is_cheetah) {
7241 		mutex_enter(&drmach_iocage_lock);
7242 		while (drmach_iocage_is_busy)
7243 			cv_wait(&drmach_iocage_cv, &drmach_iocage_lock);
7244 		drmach_iocage_is_busy = 1;
7245 		drmach_iocage_mem_scrub(ecache_size * 2);
7246 		mutex_exit(&drmach_iocage_lock);
7247 	}
7248 
7249 	cpuid = cp->cpu_id;
7250 
7251 	/*
7252 	 * Set affinity to ensure consistent reading and writing of
7253 	 * drmach_xt_mb[cpuid] by one "master" CPU directing
7254 	 * the shutdown of the target CPU.
7255 	 */
7256 	affinity_set(CPU->cpu_id);
7257 
7258 	/*
7259 	 * Capture all CPUs (except for detaching proc) to prevent
7260 	 * crosscalls to the detaching proc until it has cleared its
7261 	 * bit in cpu_ready_set.
7262 	 *
7263 	 * The CPUs remain paused and the prom_mutex is known to be free.
7264 	 * This prevents blocking when doing prom IEEE-1275 calls at a
7265 	 * high PIL level.
7266 	 */
7267 	promsafe_pause_cpus();
7268 
7269 	/*
7270 	 * Quiesce interrupts on the target CPU. We do this by setting
7271 	 * the CPU 'not ready'- (i.e. removing the CPU from cpu_ready_set) to
7272 	 * prevent it from receiving cross calls and cross traps.
7273 	 * This prevents the processor from receiving any new soft interrupts.
7274 	 */
7275 	mp_cpu_quiesce(cp);
7276 
7277 	(void) prom_hotremovecpu(cpuid);
7278 
7279 	start_cpus();
7280 
7281 	/* setup xt_mb, will be cleared by drmach_shutdown_asm when ready */
7282 	drmach_xt_mb[cpuid] = 0x80;
7283 
7284 	xt_one_unchecked(cp->cpu_id, (xcfunc_t *)idle_stop_xcall,
7285 	    (uint64_t)drmach_cpu_shutdown_self, NULL);
7286 
7287 	ntries = drmach_cpu_ntries;
7288 	while (drmach_xt_mb[cpuid] && ntries) {
7289 		DELAY(drmach_cpu_delay);
7290 		ntries--;
7291 	}
7292 
7293 	drmach_xt_mb[cpuid] = 0;	/* steal the cache line back */
7294 
7295 	membar_sync();			/* make sure copy-back retires */
7296 
7297 	affinity_clear();
7298 
7299 	/*
7300 	 * XXX CHEETAH SUPPORT
7301 	 */
7302 	if (drmach_is_cheetah) {
7303 		mutex_enter(&drmach_iocage_lock);
7304 		drmach_iocage_mem_scrub(ecache_size * 2);
7305 		drmach_iocage_is_busy = 0;
7306 		cv_signal(&drmach_iocage_cv);
7307 		mutex_exit(&drmach_iocage_lock);
7308 	}
7309 
7310 	DRMACH_PR("waited %d out of %d tries for "
7311 	    "drmach_cpu_shutdown_self on cpu%d",
7312 	    drmach_cpu_ntries - ntries, drmach_cpu_ntries, cp->cpu_id);
7313 
7314 	/*
7315 	 * Do this here instead of drmach_cpu_shutdown_self() to
7316 	 * avoid an assertion failure panic in turnstile.c.
7317 	 */
7318 	CPU_SIGNATURE(OS_SIG, SIGST_DETACHED, SIGSUBST_NULL, cpuid);
7319 
7320 	return (0);
7321 }
7322 
7323 void
7324 drmach_iocage_mem_scrub(uint64_t nbytes)
7325 {
7326 	extern uint32_t drmach_bc_bzero(void*, size_t);
7327 	uint32_t	rv;
7328 
7329 	ASSERT(MUTEX_HELD(&cpu_lock));
7330 
7331 	affinity_set(CPU->cpu_id);
7332 
7333 	rv = drmach_bc_bzero(drmach_iocage_vaddr, nbytes);
7334 	if (rv != 0) {
7335 		DRMACH_PR(
7336 		"iocage scrub failed, drmach_bc_bzero returned %d\n", rv);
7337 		rv = drmach_bc_bzero(drmach_iocage_vaddr, drmach_iocage_size);
7338 		if (rv != 0)
7339 			cmn_err(CE_PANIC,
7340 			    "iocage scrub failed, drmach_bc_bzero rv=%d\n",
7341 			    rv);
7342 	}
7343 
7344 	cpu_flush_ecache();
7345 
7346 	affinity_clear();
7347 }
7348 
7349 #define	ALIGN(x, a)	((a) == 0 ? (uintptr_t)(x) : \
7350 	(((uintptr_t)(x) + (uintptr_t)(a) - 1l) & ~((uintptr_t)(a) - 1l)))
7351 
7352 static sbd_error_t *
7353 drmach_iocage_mem_get(dr_testboard_req_t *tbrq)
7354 {
7355 	pfn_t		basepfn;
7356 	pgcnt_t		npages;
7357 	extern int	memscrub_delete_span(pfn_t, pgcnt_t);
7358 	uint64_t	drmach_iocage_paddr_mbytes;
7359 
7360 	ASSERT(drmach_iocage_paddr != -1);
7361 
7362 	basepfn = (pfn_t)(drmach_iocage_paddr >> PAGESHIFT);
7363 	npages = (pgcnt_t)(drmach_iocage_size >> PAGESHIFT);
7364 
7365 	(void) memscrub_delete_span(basepfn, npages);
7366 
7367 	mutex_enter(&cpu_lock);
7368 	drmach_iocage_mem_scrub(drmach_iocage_size);
7369 	mutex_exit(&cpu_lock);
7370 
7371 	/*
7372 	 * HPOST wants the address of the cage to be 64 megabyte-aligned
7373 	 * and in megabyte units.
7374 	 * The size of the cage is also in megabyte units.
7375 	 */
7376 	ASSERT(drmach_iocage_paddr == ALIGN(drmach_iocage_paddr, 0x4000000));
7377 
7378 	drmach_iocage_paddr_mbytes = drmach_iocage_paddr / 0x100000;
7379 
7380 	tbrq->memaddrhi = (uint32_t)(drmach_iocage_paddr_mbytes >> 32);
7381 	tbrq->memaddrlo = (uint32_t)drmach_iocage_paddr_mbytes;
7382 	tbrq->memlen = drmach_iocage_size / 0x100000;
7383 
7384 	DRMACH_PR("drmach_iocage_mem_get: hi: 0x%x", tbrq->memaddrhi);
7385 	DRMACH_PR("drmach_iocage_mem_get: lo: 0x%x", tbrq->memaddrlo);
7386 	DRMACH_PR("drmach_iocage_mem_get: size: 0x%x", tbrq->memlen);
7387 
7388 	return (NULL);
7389 }
7390 
7391 static sbd_error_t *
7392 drmach_iocage_mem_return(dr_testboard_reply_t *tbr)
7393 {
7394 	_NOTE(ARGUNUSED(tbr))
7395 
7396 	pfn_t		basepfn;
7397 	pgcnt_t		npages;
7398 	extern int	memscrub_add_span(pfn_t, pgcnt_t);
7399 
7400 	ASSERT(drmach_iocage_paddr != -1);
7401 
7402 	basepfn = (pfn_t)(drmach_iocage_paddr >> PAGESHIFT);
7403 	npages = (pgcnt_t)(drmach_iocage_size >> PAGESHIFT);
7404 
7405 	(void) memscrub_add_span(basepfn, npages);
7406 
7407 	mutex_enter(&cpu_lock);
7408 	mutex_enter(&drmach_iocage_lock);
7409 	drmach_iocage_mem_scrub(drmach_iocage_size);
7410 	drmach_iocage_is_busy = 0;
7411 	cv_signal(&drmach_iocage_cv);
7412 	mutex_exit(&drmach_iocage_lock);
7413 	mutex_exit(&cpu_lock);
7414 
7415 	return (NULL);
7416 }
7417 
7418 static int
7419 drmach_cpu_intr_disable(cpu_t *cp)
7420 {
7421 	if (cpu_intr_disable(cp) != 0)
7422 		return (-1);
7423 	return (0);
7424 }
7425 
7426 static int
7427 drmach_iocage_cpu_acquire(drmach_device_t *dp, cpu_flag_t *oflags)
7428 {
7429 	struct cpu	*cp;
7430 	processorid_t	cpuid;
7431 	static char	*fn = "drmach_iocage_cpu_acquire";
7432 	sbd_error_t 	*err;
7433 	int 		impl;
7434 
7435 	ASSERT(DRMACH_IS_CPU_ID(dp));
7436 	ASSERT(MUTEX_HELD(&cpu_lock));
7437 
7438 	cpuid = ((drmach_cpu_t *)dp)->cpuid;
7439 
7440 	DRMACH_PR("%s: attempting to acquire CPU id %d", fn, cpuid);
7441 
7442 	if (dp->busy)
7443 		return (-1);
7444 
7445 	if ((cp = cpu_get(cpuid)) == NULL) {
7446 		DRMACH_PR("%s: cpu_get(%d) returned NULL", fn, cpuid);
7447 		return (-1);
7448 	}
7449 
7450 	if (!CPU_ACTIVE(cp)) {
7451 		DRMACH_PR("%s: skipping offlined CPU id %d", fn, cpuid);
7452 		return (-1);
7453 	}
7454 
7455 	/*
7456 	 * There is a known HW bug where a Jaguar CPU in Safari port 0 (SBX/P0)
7457 	 * can fail to receive an XIR. To workaround this issue until a hardware
7458 	 * fix is implemented, we will exclude the selection of these CPUs.
7459 	 *
7460 	 * Once a fix is implemented in hardware, this code should be updated
7461 	 * to allow Jaguar CPUs that have the fix to be used. However, support
7462 	 * must be retained to skip revisions that do not have this fix.
7463 	 */
7464 
7465 	err = drmach_cpu_get_impl(dp, &impl);
7466 	if (err) {
7467 		DRMACH_PR("%s: error getting impl. of CPU id %d", fn, cpuid);
7468 		sbd_err_clear(&err);
7469 		return (-1);
7470 	}
7471 
7472 	if (IS_JAGUAR(impl) && (STARCAT_CPUID_TO_LPORT(cpuid) == 0) &&
7473 	    drmach_iocage_exclude_jaguar_port_zero) {
7474 		DRMACH_PR("%s: excluding CPU id %d: port 0 on jaguar",
7475 		    fn, cpuid);
7476 		return (-1);
7477 	}
7478 
7479 	ASSERT(oflags);
7480 	*oflags = cp->cpu_flags;
7481 
7482 	if (cpu_offline(cp, 0)) {
7483 		DRMACH_PR("%s: cpu_offline failed for CPU id %d", fn, cpuid);
7484 		return (-1);
7485 	}
7486 
7487 	if (cpu_poweroff(cp)) {
7488 		DRMACH_PR("%s: cpu_poweroff failed for CPU id %d", fn, cpuid);
7489 		if (cpu_online(cp)) {
7490 			cmn_err(CE_WARN, "failed to online CPU id %d "
7491 			    "during I/O cage test selection", cpuid);
7492 		}
7493 		if (CPU_ACTIVE(cp) && cpu_flagged_nointr(*oflags) &&
7494 		    drmach_cpu_intr_disable(cp) != 0) {
7495 			cmn_err(CE_WARN, "failed to restore CPU id %d "
7496 			    "no-intr during I/O cage test selection", cpuid);
7497 		}
7498 		return (-1);
7499 	}
7500 
7501 	if (cpu_unconfigure(cpuid)) {
7502 		DRMACH_PR("%s: cpu_unconfigure failed for CPU id %d", fn,
7503 		    cpuid);
7504 		(void) cpu_configure(cpuid);
7505 		if ((cp = cpu_get(cpuid)) == NULL) {
7506 			cmn_err(CE_WARN, "failed to reconfigure CPU id %d "
7507 			    "during I/O cage test selection", cpuid);
7508 			dp->busy = 1;
7509 			return (-1);
7510 		}
7511 		if (cpu_poweron(cp) || cpu_online(cp)) {
7512 			cmn_err(CE_WARN, "failed to %s CPU id %d "
7513 			    "during I/O cage test selection",
7514 			    cpu_is_poweredoff(cp) ?
7515 			    "poweron" : "online", cpuid);
7516 		}
7517 		if (CPU_ACTIVE(cp) && cpu_flagged_nointr(*oflags) &&
7518 		    drmach_cpu_intr_disable(cp) != 0) {
7519 			cmn_err(CE_WARN, "failed to restore CPU id %d "
7520 			    "no-intr during I/O cage test selection", cpuid);
7521 		}
7522 		return (-1);
7523 	}
7524 
7525 	dp->busy = 1;
7526 
7527 	DRMACH_PR("%s: acquired CPU id %d", fn, cpuid);
7528 
7529 	return (0);
7530 }
7531 
7532 /*
7533  * Attempt to acquire all the CPU devices passed in. It is
7534  * assumed that all the devices in the list are the cores of
7535  * a single CMP device. Non CMP devices can be handled as a
7536  * single core CMP by passing in a one element list.
7537  *
7538  * Success is only returned if *all* the devices in the list
7539  * can be acquired. In the failure case, none of the devices
7540  * in the list will be held as acquired.
7541  */
7542 static int
7543 drmach_iocage_cmp_acquire(drmach_device_t **dpp, cpu_flag_t *oflags)
7544 {
7545 	int	curr;
7546 	int	i;
7547 	int	rv = 0;
7548 
7549 	ASSERT((dpp != NULL) && (*dpp != NULL));
7550 
7551 	/*
7552 	 * Walk the list of CPU devices (cores of a CMP)
7553 	 * and attempt to acquire them. Bail out if an
7554 	 * error is encountered.
7555 	 */
7556 	for (curr = 0; curr < MAX_CORES_PER_CMP; curr++) {
7557 
7558 		/* check for the end of the list */
7559 		if (dpp[curr] == NULL) {
7560 			break;
7561 		}
7562 
7563 		ASSERT(DRMACH_IS_CPU_ID(dpp[curr]));
7564 		ASSERT(dpp[curr]->portid == (*dpp)->portid);
7565 
7566 		rv = drmach_iocage_cpu_acquire(dpp[curr], &oflags[curr]);
7567 		if (rv != 0) {
7568 			break;
7569 		}
7570 	}
7571 
7572 	/*
7573 	 * Check for an error.
7574 	 */
7575 	if (rv != 0) {
7576 		/*
7577 		 * Make a best effort attempt to return any cores
7578 		 * that were already acquired before the error was
7579 		 * encountered.
7580 		 */
7581 		for (i = 0; i < curr; i++) {
7582 			(void) drmach_iocage_cpu_return(dpp[i], oflags[i]);
7583 		}
7584 	}
7585 
7586 	return (rv);
7587 }
7588 
7589 static int
7590 drmach_iocage_cpu_return(drmach_device_t *dp, cpu_flag_t oflags)
7591 {
7592 	processorid_t	cpuid;
7593 	struct cpu	*cp;
7594 	int		rv = 0;
7595 	static char	*fn = "drmach_iocage_cpu_return";
7596 
7597 	ASSERT(DRMACH_IS_CPU_ID(dp));
7598 	ASSERT(MUTEX_HELD(&cpu_lock));
7599 
7600 	cpuid = ((drmach_cpu_t *)dp)->cpuid;
7601 
7602 	DRMACH_PR("%s: attempting to return CPU id: %d", fn, cpuid);
7603 
7604 	if (cpu_configure(cpuid)) {
7605 		cmn_err(CE_WARN, "failed to reconfigure CPU id %d "
7606 		    "after I/O cage test", cpuid);
7607 		/*
7608 		 * The component was never set to unconfigured during the IO
7609 		 * cage test, so we need to leave marked as busy to prevent
7610 		 * further DR operations involving this component.
7611 		 */
7612 		return (-1);
7613 	}
7614 
7615 	if ((cp = cpu_get(cpuid)) == NULL) {
7616 		cmn_err(CE_WARN, "cpu_get failed on CPU id %d after "
7617 		    "I/O cage test", cpuid);
7618 		dp->busy = 0;
7619 		return (-1);
7620 	}
7621 
7622 	if (cpu_poweron(cp) || cpu_online(cp)) {
7623 		cmn_err(CE_WARN, "failed to %s CPU id %d after I/O "
7624 		    "cage test", cpu_is_poweredoff(cp) ?
7625 		    "poweron" : "online", cpuid);
7626 		rv = -1;
7627 	}
7628 
7629 	/*
7630 	 * drmach_iocage_cpu_acquire will accept cpus in state P_ONLINE or
7631 	 * P_NOINTR. Need to return to previous user-visible state.
7632 	 */
7633 	if (CPU_ACTIVE(cp) && cpu_flagged_nointr(oflags) &&
7634 	    drmach_cpu_intr_disable(cp) != 0) {
7635 		cmn_err(CE_WARN, "failed to restore CPU id %d "
7636 		    "no-intr after I/O cage test", cpuid);
7637 		rv = -1;
7638 	}
7639 
7640 	dp->busy = 0;
7641 
7642 	DRMACH_PR("%s: returned CPU id: %d", fn, cpuid);
7643 
7644 	return (rv);
7645 }
7646 
7647 static sbd_error_t *
7648 drmach_iocage_cpu_get(dr_testboard_req_t *tbrq, drmach_device_t **dpp,
7649     cpu_flag_t *oflags)
7650 {
7651 	drmach_board_t	*bp;
7652 	int		b_rv;
7653 	int		b_idx;
7654 	drmachid_t	b_id;
7655 	int		found;
7656 
7657 	mutex_enter(&cpu_lock);
7658 
7659 	ASSERT(drmach_boards != NULL);
7660 
7661 	found = 0;
7662 
7663 	/*
7664 	 * Walk the board list.
7665 	 */
7666 	b_rv = drmach_array_first(drmach_boards, &b_idx, &b_id);
7667 
7668 	while (b_rv == 0) {
7669 
7670 		int		d_rv;
7671 		int		d_idx;
7672 		drmachid_t	d_id;
7673 
7674 		bp = b_id;
7675 
7676 		if (bp->connected == 0 || bp->devices == NULL) {
7677 			b_rv = drmach_array_next(drmach_boards, &b_idx, &b_id);
7678 			continue;
7679 		}
7680 
7681 		/* An AXQ restriction disqualifies MCPU's as candidates. */
7682 		if (DRMACH_BNUM2SLOT(bp->bnum) == 1) {
7683 			b_rv = drmach_array_next(drmach_boards, &b_idx, &b_id);
7684 			continue;
7685 		}
7686 
7687 		/*
7688 		 * Walk the device list of this board.
7689 		 */
7690 		d_rv = drmach_array_first(bp->devices, &d_idx, &d_id);
7691 
7692 		while (d_rv == 0) {
7693 
7694 			drmach_device_t	*ndp;
7695 
7696 			/* only interested in CPU devices */
7697 			if (!DRMACH_IS_CPU_ID(d_id)) {
7698 				d_rv = drmach_array_next(bp->devices, &d_idx,
7699 				    &d_id);
7700 				continue;
7701 			}
7702 
7703 			/*
7704 			 * The following code assumes two properties
7705 			 * of a CMP device:
7706 			 *
7707 			 *   1. All cores of a CMP are grouped together
7708 			 *	in the device list.
7709 			 *
7710 			 *   2. There will only be a maximum of two cores
7711 			 *	present in the CMP.
7712 			 *
7713 			 * If either of these two properties change,
7714 			 * this code will have to be revisited.
7715 			 */
7716 
7717 			dpp[0] = d_id;
7718 			dpp[1] = NULL;
7719 
7720 			/*
7721 			 * Get the next device. It may or may not be used.
7722 			 */
7723 			d_rv = drmach_array_next(bp->devices, &d_idx, &d_id);
7724 			ndp = d_id;
7725 
7726 			if ((d_rv == 0) && DRMACH_IS_CPU_ID(d_id)) {
7727 				/*
7728 				 * The second device is only interesting for
7729 				 * this pass if it has the same portid as the
7730 				 * first device. This implies that both are
7731 				 * cores of the same CMP.
7732 				 */
7733 				if (dpp[0]->portid == ndp->portid) {
7734 					dpp[1] = d_id;
7735 				}
7736 			}
7737 
7738 			/*
7739 			 * Attempt to acquire all cores of the CMP.
7740 			 */
7741 			if (drmach_iocage_cmp_acquire(dpp, oflags) == 0) {
7742 				found = 1;
7743 				break;
7744 			}
7745 
7746 			/*
7747 			 * Check if the search for the second core was
7748 			 * successful. If not, the next iteration should
7749 			 * use that device.
7750 			 */
7751 			if (dpp[1] == NULL) {
7752 				continue;
7753 			}
7754 
7755 			d_rv = drmach_array_next(bp->devices, &d_idx, &d_id);
7756 		}
7757 
7758 		if (found)
7759 			break;
7760 
7761 		b_rv = drmach_array_next(drmach_boards, &b_idx, &b_id);
7762 	}
7763 
7764 	mutex_exit(&cpu_lock);
7765 
7766 	if (!found) {
7767 		return (drerr_new(1, ESTC_IOCAGE_NO_CPU_AVAIL, NULL));
7768 	}
7769 
7770 	tbrq->cpu_portid = (*dpp)->portid;
7771 
7772 	return (NULL);
7773 }
7774 
7775 /*
7776  * Setup an iocage by acquiring a cpu and memory.
7777  */
7778 static sbd_error_t *
7779 drmach_iocage_setup(dr_testboard_req_t *tbrq, drmach_device_t **dpp,
7780     cpu_flag_t *oflags)
7781 {
7782 	sbd_error_t *err;
7783 
7784 	err = drmach_iocage_cpu_get(tbrq, dpp, oflags);
7785 	if (!err) {
7786 		mutex_enter(&drmach_iocage_lock);
7787 		while (drmach_iocage_is_busy)
7788 			cv_wait(&drmach_iocage_cv, &drmach_iocage_lock);
7789 		drmach_iocage_is_busy = 1;
7790 		mutex_exit(&drmach_iocage_lock);
7791 		err = drmach_iocage_mem_get(tbrq);
7792 		if (err) {
7793 			mutex_enter(&drmach_iocage_lock);
7794 			drmach_iocage_is_busy = 0;
7795 			cv_signal(&drmach_iocage_cv);
7796 			mutex_exit(&drmach_iocage_lock);
7797 		}
7798 	}
7799 	return (err);
7800 }
7801 
7802 #define	DRMACH_SCHIZO_PCI_LEAF_MAX	2
7803 #define	DRMACH_SCHIZO_PCI_SLOT_MAX	8
7804 #define	DRMACH_S1P_SAMPLE_MAX		2
7805 
7806 typedef enum {
7807 	DRMACH_POST_SUSPEND = 0,
7808 	DRMACH_PRE_RESUME
7809 } drmach_sr_iter_t;
7810 
7811 typedef struct {
7812 	dev_info_t	*dip;
7813 	uint32_t	portid;
7814 	uint32_t	pcr_sel_save;
7815 	uint32_t	pic_l2_io_q[DRMACH_S1P_SAMPLE_MAX];
7816 	uint64_t	reg_basepa;
7817 } drmach_s1p_axq_t;
7818 
7819 typedef struct {
7820 	dev_info_t		*dip;
7821 	uint32_t		portid;
7822 	uint64_t		csr_basepa;
7823 	struct {
7824 		uint64_t 	slot_intr_state_diag;
7825 		uint64_t 	obio_intr_state_diag;
7826 		uint_t		nmap_regs;
7827 		uint64_t	*intr_map_regs;
7828 	} regs[DRMACH_S1P_SAMPLE_MAX];
7829 } drmach_s1p_pci_t;
7830 
7831 typedef struct {
7832 	uint64_t		csr_basepa;
7833 	struct {
7834 		uint64_t	csr;
7835 		uint64_t	errctrl;
7836 		uint64_t	errlog;
7837 	} regs[DRMACH_S1P_SAMPLE_MAX];
7838 	drmach_s1p_pci_t	pci[DRMACH_SCHIZO_PCI_LEAF_MAX];
7839 } drmach_s1p_schizo_t;
7840 
7841 typedef struct {
7842 	drmach_s1p_axq_t	axq;
7843 	drmach_s1p_schizo_t	schizo[STARCAT_SLOT1_IO_MAX];
7844 } drmach_slot1_pause_t;
7845 
7846 /*
7847  * Table of saved state for paused slot1 devices.
7848  */
7849 static drmach_slot1_pause_t *drmach_slot1_paused[STARCAT_BDSET_MAX];
7850 static int drmach_slot1_pause_init = 1;
7851 
7852 #ifdef DEBUG
7853 int drmach_slot1_pause_debug = 1;
7854 #else
7855 int drmach_slot1_pause_debug = 0;
7856 #endif /* DEBUG */
7857 
7858 static int
7859 drmach_is_slot1_pause_axq(dev_info_t *dip, char *name, int *id, uint64_t *reg)
7860 {
7861 	int		portid, exp, slot, i;
7862 	drmach_reg_t	regs[2];
7863 	int		reglen = sizeof (regs);
7864 
7865 	if ((portid = ddi_getprop(DDI_DEV_T_ANY, dip,
7866 	    DDI_PROP_DONTPASS, "portid", -1)) == -1) {
7867 		return (0);
7868 	}
7869 
7870 	exp = (portid >> 5) & 0x1f;
7871 	slot = portid & 0x1;
7872 
7873 	if (slot == 0 || strncmp(name, DRMACH_AXQ_NAMEPROP,
7874 	    strlen(DRMACH_AXQ_NAMEPROP))) {
7875 		return (0);
7876 	}
7877 
7878 	mutex_enter(&cpu_lock);
7879 	for (i = 0; i < STARCAT_SLOT1_CPU_MAX; i++) {
7880 		if (cpu[MAKE_CPUID(exp, slot, i)]) {
7881 			/* maxcat cpu present */
7882 			mutex_exit(&cpu_lock);
7883 			return (0);
7884 		}
7885 	}
7886 	mutex_exit(&cpu_lock);
7887 
7888 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
7889 	    "reg", (caddr_t)regs, &reglen) != DDI_PROP_SUCCESS) {
7890 		DRMACH_PR("drmach_is_slot1_pause_axq: no reg prop for "
7891 		    "axq dip=%p\n", (void *)dip);
7892 		return (0);
7893 	}
7894 
7895 	ASSERT(id && reg);
7896 	*reg = (uint64_t)regs[0].reg_addr_hi << 32;
7897 	*reg |= (uint64_t)regs[0].reg_addr_lo;
7898 	*id = portid;
7899 
7900 	return (1);
7901 }
7902 
7903 /*
7904  * Allocate an entry in the slot1_paused state table.
7905  */
7906 static void
7907 drmach_slot1_pause_add_axq(dev_info_t *axq_dip, char *axq_name, int axq_portid,
7908     uint64_t reg, drmach_slot1_pause_t **slot1_paused)
7909 {
7910 	int	axq_exp;
7911 	drmach_slot1_pause_t *slot1;
7912 
7913 	axq_exp = (axq_portid >> 5) & 0x1f;
7914 
7915 	ASSERT(axq_portid & 0x1);
7916 	ASSERT(slot1_paused[axq_exp] == NULL);
7917 	ASSERT(strncmp(axq_name, DRMACH_AXQ_NAMEPROP,
7918 	    strlen(DRMACH_AXQ_NAMEPROP)) == 0);
7919 
7920 	slot1 = kmem_zalloc(sizeof (*slot1), KM_SLEEP);
7921 
7922 	/*
7923 	 * XXX This dip should really be held (via ndi_hold_devi())
7924 	 * before saving it in the axq pause structure. However that
7925 	 * would prevent DR as the pause data structures persist until
7926 	 * the next suspend. drmach code should be modified to free the
7927 	 * the slot 1 pause data structures for a boardset when its
7928 	 * slot 1 board is DRed out. The dip can then be released via
7929 	 * ndi_rele_devi() when the pause data structure is freed
7930 	 * allowing DR to proceed. Until this change is made, drmach
7931 	 * code should be careful about dereferencing the saved dip
7932 	 * as it may no longer exist.
7933 	 */
7934 	slot1->axq.dip = axq_dip;
7935 	slot1->axq.portid = axq_portid;
7936 	slot1->axq.reg_basepa = reg;
7937 	slot1_paused[axq_exp] = slot1;
7938 }
7939 
7940 static void
7941 drmach_s1p_pci_free(drmach_s1p_pci_t *pci)
7942 {
7943 	int	i;
7944 
7945 	for (i = 0; i < DRMACH_S1P_SAMPLE_MAX; i++) {
7946 		if (pci->regs[i].intr_map_regs != NULL) {
7947 			ASSERT(pci->regs[i].nmap_regs > 0);
7948 			kmem_free(pci->regs[i].intr_map_regs,
7949 			    pci->regs[i].nmap_regs * sizeof (uint64_t));
7950 		}
7951 	}
7952 }
7953 
7954 static void
7955 drmach_slot1_pause_free(drmach_slot1_pause_t **slot1_paused)
7956 {
7957 	int	i, j, k;
7958 	drmach_slot1_pause_t *slot1;
7959 
7960 	for (i = 0; i < STARCAT_BDSET_MAX; i++) {
7961 		if ((slot1 = slot1_paused[i]) == NULL)
7962 			continue;
7963 
7964 		for (j = 0; j < STARCAT_SLOT1_IO_MAX; j++)
7965 			for (k = 0; k < DRMACH_SCHIZO_PCI_LEAF_MAX; k++)
7966 				drmach_s1p_pci_free(&slot1->schizo[j].pci[k]);
7967 
7968 		kmem_free(slot1, sizeof (*slot1));
7969 		slot1_paused[i] = NULL;
7970 	}
7971 }
7972 
7973 /*
7974  * Tree walk callback routine. If dip represents a Schizo PCI leaf,
7975  * fill in the appropriate info in the slot1_paused state table.
7976  */
7977 static int
7978 drmach_find_slot1_io(dev_info_t *dip, void *arg)
7979 {
7980 	int		portid, exp, ioc_unum, leaf_unum;
7981 	char		buf[OBP_MAXDRVNAME];
7982 	int		buflen = sizeof (buf);
7983 	drmach_reg_t	regs[3];
7984 	int		reglen = sizeof (regs);
7985 	uint32_t	leaf_offset;
7986 	uint64_t	schizo_csr_pa, pci_csr_pa;
7987 	drmach_s1p_pci_t *pci;
7988 	drmach_slot1_pause_t **slot1_paused = (drmach_slot1_pause_t **)arg;
7989 
7990 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
7991 	    "name", (caddr_t)buf, &buflen) != DDI_PROP_SUCCESS ||
7992 	    strncmp(buf, DRMACH_PCI_NAMEPROP, strlen(DRMACH_PCI_NAMEPROP))) {
7993 		return (DDI_WALK_CONTINUE);
7994 	}
7995 
7996 	if ((portid = ddi_getprop(DDI_DEV_T_ANY, dip,
7997 	    DDI_PROP_DONTPASS, "portid", -1)) == -1) {
7998 		return (DDI_WALK_CONTINUE);
7999 	}
8000 
8001 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
8002 	    "reg", (caddr_t)regs, &reglen) != DDI_PROP_SUCCESS) {
8003 		DRMACH_PR("drmach_find_slot1_io: no reg prop for pci "
8004 		    "dip=%p\n", (void *)dip);
8005 		return (DDI_WALK_CONTINUE);
8006 	}
8007 
8008 	exp = portid >> 5;
8009 	ioc_unum = portid & 0x1;
8010 	leaf_offset = regs[0].reg_addr_lo & 0x7fffff;
8011 	pci_csr_pa = (uint64_t)regs[0].reg_addr_hi << 32;
8012 	pci_csr_pa |= (uint64_t)regs[0].reg_addr_lo;
8013 	schizo_csr_pa = (uint64_t)regs[1].reg_addr_hi << 32;
8014 	schizo_csr_pa |= (uint64_t)regs[1].reg_addr_lo;
8015 
8016 	ASSERT(exp >= 0 && exp < STARCAT_BDSET_MAX);
8017 	ASSERT(slot1_paused[exp] != NULL);
8018 	ASSERT(leaf_offset == 0x600000 || leaf_offset == 0x700000);
8019 	ASSERT(slot1_paused[exp]->schizo[ioc_unum].csr_basepa == 0x0UL ||
8020 	    slot1_paused[exp]->schizo[ioc_unum].csr_basepa == schizo_csr_pa);
8021 
8022 	leaf_unum = (leaf_offset == 0x600000) ? 0 : 1;
8023 	slot1_paused[exp]->schizo[ioc_unum].csr_basepa = schizo_csr_pa;
8024 	pci = &slot1_paused[exp]->schizo[ioc_unum].pci[leaf_unum];
8025 
8026 	/*
8027 	 * XXX This dip should really be held (via ndi_hold_devi())
8028 	 * before saving it in the pci pause structure. However that
8029 	 * would prevent DR as the pause data structures persist until
8030 	 * the next suspend. drmach code should be modified to free the
8031 	 * the slot 1 pause data structures for a boardset when its
8032 	 * slot 1 board is DRed out. The dip can then be released via
8033 	 * ndi_rele_devi() when the pause data structure is freed
8034 	 * allowing DR to proceed. Until this change is made, drmach
8035 	 * code should be careful about dereferencing the saved dip as
8036 	 * it may no longer exist.
8037 	 */
8038 	pci->dip = dip;
8039 	pci->portid = portid;
8040 	pci->csr_basepa = pci_csr_pa;
8041 
8042 	DRMACH_PR("drmach_find_slot1_io: name=%s, portid=0x%x, dip=%p\n",
8043 	    buf, portid, (void *)dip);
8044 
8045 	return (DDI_WALK_PRUNECHILD);
8046 }
8047 
8048 static void
8049 drmach_slot1_pause_add_io(drmach_slot1_pause_t **slot1_paused)
8050 {
8051 	/*
8052 	 * Root node doesn't have to be held
8053 	 */
8054 	ddi_walk_devs(ddi_root_node(), drmach_find_slot1_io,
8055 	    (void *)slot1_paused);
8056 }
8057 
8058 /*
8059  * Save the interrupt mapping registers for each non-idle interrupt
8060  * represented by the bit pairs in the saved interrupt state
8061  * diagnostic registers for this PCI leaf.
8062  */
8063 static void
8064 drmach_s1p_intr_map_reg_save(drmach_s1p_pci_t *pci, drmach_sr_iter_t iter)
8065 {
8066 	int	 i, cnt, ino;
8067 	uint64_t reg;
8068 	char	 *dname;
8069 	uchar_t	 Xmits;
8070 
8071 	dname = ddi_binding_name(pci->dip);
8072 	Xmits = (strcmp(dname, XMITS_BINDING_NAME) == 0)  ?  1 : 0;
8073 
8074 	/*
8075 	 * 1st pass allocates, 2nd pass populates.
8076 	 */
8077 	for (i = 0; i < 2; i++) {
8078 		cnt = ino = 0;
8079 
8080 		/*
8081 		 * PCI slot interrupts
8082 		 */
8083 		reg = pci->regs[iter].slot_intr_state_diag;
8084 		while (reg) {
8085 			/*
8086 			 * Xmits Interrupt Number Offset(ino) Assignments
8087 			 *   00-17 PCI Slot Interrupts
8088 			 *   18-1f Not Used
8089 			 */
8090 			if ((Xmits) && (ino > 0x17))
8091 				break;
8092 			if ((reg & COMMON_CLEAR_INTR_REG_MASK) !=
8093 			    COMMON_CLEAR_INTR_REG_IDLE) {
8094 				if (i) {
8095 					pci->regs[iter].intr_map_regs[cnt] =
8096 					    lddphysio(pci->csr_basepa +
8097 					    SCHIZO_IB_INTR_MAP_REG_OFFSET +
8098 					    ino * sizeof (reg));
8099 				}
8100 				++cnt;
8101 			}
8102 			++ino;
8103 			reg >>= 2;
8104 		}
8105 
8106 		/*
8107 		 * Xmits Interrupt Number Offset(ino) Assignments
8108 		 *   20-2f Not Used
8109 		 *   30-37 Internal interrupts
8110 		 *   38-3e Not Used
8111 		 */
8112 		ino = (Xmits)  ?  0x30 : 0x20;
8113 
8114 		/*
8115 		 * OBIO and internal schizo interrupts
8116 		 * Each PCI leaf has a set of mapping registers for all
8117 		 * possible interrupt sources except the NewLink interrupts.
8118 		 */
8119 		reg = pci->regs[iter].obio_intr_state_diag;
8120 		while (reg && ino <= 0x38) {
8121 			if ((reg & COMMON_CLEAR_INTR_REG_MASK) !=
8122 			    COMMON_CLEAR_INTR_REG_IDLE) {
8123 				if (i) {
8124 					pci->regs[iter].intr_map_regs[cnt] =
8125 					    lddphysio(pci->csr_basepa +
8126 					    SCHIZO_IB_INTR_MAP_REG_OFFSET +
8127 					    ino * sizeof (reg));
8128 				}
8129 				++cnt;
8130 			}
8131 			++ino;
8132 			reg >>= 2;
8133 		}
8134 
8135 		if (!i) {
8136 			pci->regs[iter].nmap_regs = cnt;
8137 			pci->regs[iter].intr_map_regs =
8138 			    kmem_zalloc(cnt * sizeof (reg), KM_SLEEP);
8139 		}
8140 	}
8141 }
8142 
8143 static void
8144 drmach_s1p_axq_update(drmach_s1p_axq_t *axq, drmach_sr_iter_t iter)
8145 {
8146 	uint32_t	reg;
8147 
8148 	if (axq->reg_basepa == 0x0UL)
8149 		return;
8150 
8151 	if (iter == DRMACH_POST_SUSPEND) {
8152 		axq->pcr_sel_save = ldphysio(axq->reg_basepa +
8153 		    AXQ_SLOT1_PERFCNT_SEL);
8154 		/*
8155 		 * Select l2_io_queue counter by writing L2_IO_Q mux
8156 		 * input to bits 0-6 of perf cntr select reg.
8157 		 */
8158 		reg = axq->pcr_sel_save;
8159 		reg &= ~AXQ_PIC_CLEAR_MASK;
8160 		reg |= L2_IO_Q;
8161 
8162 		stphysio(axq->reg_basepa + AXQ_SLOT1_PERFCNT_SEL, reg);
8163 	}
8164 
8165 	axq->pic_l2_io_q[iter] = ldphysio(axq->reg_basepa + AXQ_SLOT1_PERFCNT0);
8166 
8167 	if (iter == DRMACH_PRE_RESUME) {
8168 		stphysio(axq->reg_basepa + AXQ_SLOT1_PERFCNT_SEL,
8169 		    axq->pcr_sel_save);
8170 	}
8171 
8172 	DRMACH_PR("drmach_s1p_axq_update: axq #%d pic_l2_io_q[%d]=%d\n",
8173 	    ddi_get_instance(axq->dip), iter, axq->pic_l2_io_q[iter]);
8174 }
8175 
8176 static void
8177 drmach_s1p_schizo_update(drmach_s1p_schizo_t *schizo, drmach_sr_iter_t iter)
8178 {
8179 	int	i;
8180 	drmach_s1p_pci_t *pci;
8181 
8182 	if (schizo->csr_basepa == 0x0UL)
8183 		return;
8184 
8185 	schizo->regs[iter].csr =
8186 	    lddphysio(schizo->csr_basepa + SCHIZO_CB_CSR_OFFSET);
8187 	schizo->regs[iter].errctrl =
8188 	    lddphysio(schizo->csr_basepa + SCHIZO_CB_ERRCTRL_OFFSET);
8189 	schizo->regs[iter].errlog =
8190 	    lddphysio(schizo->csr_basepa + SCHIZO_CB_ERRLOG_OFFSET);
8191 
8192 	for (i = 0; i < DRMACH_SCHIZO_PCI_LEAF_MAX; i++) {
8193 		pci = &schizo->pci[i];
8194 		if (pci->dip != NULL && pci->csr_basepa != 0x0UL) {
8195 			pci->regs[iter].slot_intr_state_diag =
8196 			    lddphysio(pci->csr_basepa +
8197 			    COMMON_IB_SLOT_INTR_STATE_DIAG_REG);
8198 
8199 			pci->regs[iter].obio_intr_state_diag =
8200 			    lddphysio(pci->csr_basepa +
8201 			    COMMON_IB_OBIO_INTR_STATE_DIAG_REG);
8202 
8203 			drmach_s1p_intr_map_reg_save(pci, iter);
8204 		}
8205 	}
8206 }
8207 
8208 /*
8209  * Called post-suspend and pre-resume to snapshot the suspend state
8210  * of slot1 AXQs and Schizos.
8211  */
8212 static void
8213 drmach_slot1_pause_update(drmach_slot1_pause_t **slot1_paused,
8214     drmach_sr_iter_t iter)
8215 {
8216 	int	i, j;
8217 	drmach_slot1_pause_t *slot1;
8218 
8219 	for (i = 0; i < STARCAT_BDSET_MAX; i++) {
8220 		if ((slot1 = slot1_paused[i]) == NULL)
8221 			continue;
8222 
8223 		drmach_s1p_axq_update(&slot1->axq, iter);
8224 		for (j = 0; j < STARCAT_SLOT1_IO_MAX; j++)
8225 			drmach_s1p_schizo_update(&slot1->schizo[j], iter);
8226 	}
8227 }
8228 
8229 /*
8230  * Starcat hPCI Schizo devices.
8231  *
8232  * The name field is overloaded. NULL means the slot (interrupt concentrator
8233  * bus) is not used. intr_mask is a bit mask representing the 4 possible
8234  * interrupts per slot, on if valid (rio does not use interrupt lines 0, 1).
8235  */
8236 static struct {
8237 	char	*name;
8238 	uint8_t	intr_mask;
8239 } drmach_schz_slot_intr[][DRMACH_SCHIZO_PCI_LEAF_MAX] = {
8240 	/* Schizo 0 */		/* Schizo 1 */
8241 	{{"C3V0", 0xf},		{"C3V1", 0xf}},		/* slot 0 */
8242 	{{"C5V0", 0xf},		{"C5V1", 0xf}},		/* slot 1 */
8243 	{{"rio", 0xc},		{NULL, 0x0}},		/* slot 2 */
8244 	{{NULL, 0x0},		{NULL, 0x0}},		/* slot 3 */
8245 	{{"sbbc", 0xf},		{NULL, 0x0}},		/* slot 4 */
8246 	{{NULL, 0x0},		{NULL, 0x0}},		/* slot 5 */
8247 	{{NULL, 0x0},		{NULL, 0x0}},		/* slot 6 */
8248 	{{NULL, 0x0},		{NULL, 0x0}}		/* slot 7 */
8249 };
8250 
8251 /*
8252  * See Schizo Specification, Revision 51 (May 23, 2001), Section 22.4.4
8253  * "Interrupt Registers", Table 22-69, page 306.
8254  */
8255 static char *
8256 drmach_schz_internal_ino2str(int ino)
8257 {
8258 	int	intr;
8259 
8260 	ASSERT(ino >= 0x30 && ino <= 0x37);
8261 
8262 	intr = ino & 0x7;
8263 	switch (intr) {
8264 		case (0x0):	return ("Uncorrectable ECC error");
8265 		case (0x1):	return ("Correctable ECC error");
8266 		case (0x2):	return ("PCI Bus A Error");
8267 		case (0x3):	return ("PCI Bus B Error");
8268 		case (0x4):	return ("Safari Bus Error");
8269 		default:	return ("Reserved");
8270 	}
8271 }
8272 
8273 #define	DRMACH_INTR_MASK_SHIFT(ino)	((ino) << 1)
8274 
8275 static void
8276 drmach_s1p_decode_slot_intr(int exp, int unum, drmach_s1p_pci_t *pci,
8277     int ino, drmach_sr_iter_t iter)
8278 {
8279 	uint8_t		intr_mask;
8280 	char		*slot_devname;
8281 	char		namebuf[OBP_MAXDRVNAME];
8282 	int		slot, intr_line, slot_valid, intr_valid;
8283 
8284 	ASSERT(ino >= 0 && ino <= 0x1f);
8285 	ASSERT((pci->regs[iter].slot_intr_state_diag &
8286 	    (COMMON_CLEAR_INTR_REG_MASK << DRMACH_INTR_MASK_SHIFT(ino))) !=
8287 	    COMMON_CLEAR_INTR_REG_IDLE);
8288 
8289 	slot = (ino >> 2) & 0x7;
8290 	intr_line = ino & 0x3;
8291 
8292 	slot_devname = drmach_schz_slot_intr[slot][unum].name;
8293 	slot_valid = (slot_devname == NULL) ? 0 : 1;
8294 	if (!slot_valid) {
8295 		(void) snprintf(namebuf, sizeof (namebuf), "slot %d (INVALID)",
8296 		    slot);
8297 		slot_devname = namebuf;
8298 	}
8299 
8300 	intr_mask = drmach_schz_slot_intr[slot][unum].intr_mask;
8301 	intr_valid = (1 << intr_line) & intr_mask;
8302 
8303 	prom_printf("IO%d/P%d PCI slot interrupt: ino=0x%x, source device=%s, "
8304 	    "interrupt line=%d%s\n", exp, unum, ino, slot_devname, intr_line,
8305 	    (slot_valid && !intr_valid) ? " (INVALID)" : "");
8306 }
8307 
8308 /*
8309  * Log interrupt source device info for all valid, pending interrupts
8310  * on each Schizo PCI leaf. Called if Schizo has logged a Safari bus
8311  * error in the error ctrl reg.
8312  */
8313 static void
8314 drmach_s1p_schizo_log_intr(drmach_s1p_schizo_t *schizo, int exp,
8315     int unum, drmach_sr_iter_t iter)
8316 {
8317 	uint64_t	reg;
8318 	int		i, n, ino;
8319 	drmach_s1p_pci_t *pci;
8320 
8321 	ASSERT(exp >= 0 && exp < STARCAT_BDSET_MAX);
8322 	ASSERT(unum < STARCAT_SLOT1_IO_MAX);
8323 
8324 	/*
8325 	 * Check the saved interrupt mapping registers. If interrupt is valid,
8326 	 * map the ino to the Schizo source device and check that the pci
8327 	 * slot and interrupt line are valid.
8328 	 */
8329 	for (i = 0; i < DRMACH_SCHIZO_PCI_LEAF_MAX; i++) {
8330 		pci = &schizo->pci[i];
8331 		for (n = 0; n < pci->regs[iter].nmap_regs; n++) {
8332 			reg = pci->regs[iter].intr_map_regs[n];
8333 			if (reg & COMMON_INTR_MAP_REG_VALID) {
8334 				ino = reg & COMMON_INTR_MAP_REG_INO;
8335 
8336 				if (ino <= 0x1f) {
8337 					/*
8338 					 * PCI slot interrupt
8339 					 */
8340 					drmach_s1p_decode_slot_intr(exp, unum,
8341 					    pci, ino, iter);
8342 				} else if (ino <= 0x2f) {
8343 					/*
8344 					 * OBIO interrupt
8345 					 */
8346 					prom_printf("IO%d/P%d OBIO interrupt: "
8347 					    "ino=0x%x\n", exp, unum, ino);
8348 				} else if (ino <= 0x37) {
8349 					/*
8350 					 * Internal interrupt
8351 					 */
8352 					prom_printf("IO%d/P%d Internal "
8353 					    "interrupt: ino=0x%x (%s)\n",
8354 					    exp, unum, ino,
8355 					    drmach_schz_internal_ino2str(ino));
8356 				} else {
8357 					/*
8358 					 * NewLink interrupt
8359 					 */
8360 					prom_printf("IO%d/P%d NewLink "
8361 					    "interrupt: ino=0x%x\n", exp,
8362 					    unum, ino);
8363 				}
8364 
8365 				DRMACH_PR("drmach_s1p_schizo_log_intr: "
8366 				    "exp=%d, schizo=%d, pci_leaf=%c, "
8367 				    "ino=0x%x, intr_map_reg=0x%lx\n",
8368 				    exp, unum, (i == 0) ? 'A' : 'B', ino, reg);
8369 			}
8370 		}
8371 	}
8372 }
8373 
8374 /*
8375  * See Schizo Specification, Revision 51 (May 23, 2001), Section 22.2.4
8376  * "Safari Error Control/Log Registers", Table 22-11, page 248.
8377  */
8378 #define	DRMACH_SCHIZO_SAFARI_UNMAPPED_ERR	(0x1ull << 4)
8379 
8380 /*
8381  * Check for possible error indicators prior to resuming the
8382  * AXQ driver, which will de-assert slot1 AXQ_DOMCTRL_PAUSE.
8383  */
8384 static void
8385 drmach_slot1_pause_verify(drmach_slot1_pause_t **slot1_paused,
8386     drmach_sr_iter_t iter)
8387 {
8388 	int	i, j;
8389 	int 	errflag = 0;
8390 	drmach_slot1_pause_t *slot1;
8391 
8392 	/*
8393 	 * Check for logged schizo bus error and pending interrupts.
8394 	 */
8395 	for (i = 0; i < STARCAT_BDSET_MAX; i++) {
8396 		if ((slot1 = slot1_paused[i]) == NULL)
8397 			continue;
8398 
8399 		for (j = 0; j < STARCAT_SLOT1_IO_MAX; j++) {
8400 			if (slot1->schizo[j].csr_basepa == 0x0UL)
8401 				continue;
8402 
8403 			if (slot1->schizo[j].regs[iter].errlog &
8404 			    DRMACH_SCHIZO_SAFARI_UNMAPPED_ERR) {
8405 				if (!errflag) {
8406 					prom_printf("DR WARNING: interrupt "
8407 					    "attempt detected during "
8408 					    "copy-rename (%s):\n",
8409 					    (iter == DRMACH_POST_SUSPEND) ?
8410 					    "post suspend" : "pre resume");
8411 					++errflag;
8412 				}
8413 				drmach_s1p_schizo_log_intr(&slot1->schizo[j],
8414 				    i, j, iter);
8415 			}
8416 		}
8417 	}
8418 
8419 	/*
8420 	 * Check for changes in axq l2_io_q performance counters (2nd pass only)
8421 	 */
8422 	if (iter == DRMACH_PRE_RESUME) {
8423 		for (i = 0; i < STARCAT_BDSET_MAX; i++) {
8424 			if ((slot1 = slot1_paused[i]) == NULL)
8425 				continue;
8426 
8427 			if (slot1->axq.pic_l2_io_q[DRMACH_POST_SUSPEND] !=
8428 			    slot1->axq.pic_l2_io_q[DRMACH_PRE_RESUME]) {
8429 				prom_printf("DR WARNING: IO transactions "
8430 				    "detected on IO%d during copy-rename: "
8431 				    "AXQ l2_io_q performance counter "
8432 				    "start=%d, end=%d\n", i,
8433 				    slot1->axq.pic_l2_io_q[DRMACH_POST_SUSPEND],
8434 				    slot1->axq.pic_l2_io_q[DRMACH_PRE_RESUME]);
8435 			}
8436 		}
8437 	}
8438 }
8439 
8440 struct drmach_sr_list {
8441 	dev_info_t		*dip;
8442 	struct drmach_sr_list	*next;
8443 	struct drmach_sr_list	*prev;
8444 };
8445 
8446 static struct drmach_sr_ordered {
8447 	char			*name;
8448 	struct drmach_sr_list	*ring;
8449 } drmach_sr_ordered[] = {
8450 	{ "iosram",			NULL },
8451 	{ "address-extender-queue",	NULL },
8452 	{ NULL,				NULL }, /* terminator -- required */
8453 };
8454 
8455 static void
8456 drmach_sr_insert(struct drmach_sr_list **lp, dev_info_t *dip)
8457 {
8458 	struct drmach_sr_list *np;
8459 
8460 	DRMACH_PR("drmach_sr_insert: adding dip %p\n", (void *)dip);
8461 
8462 	np = (struct drmach_sr_list *)kmem_alloc(
8463 	    sizeof (struct drmach_sr_list), KM_SLEEP);
8464 
8465 	ndi_hold_devi(dip);
8466 	np->dip = dip;
8467 
8468 	if (*lp == NULL) {
8469 		/* establish list */
8470 		*lp = np->next = np->prev = np;
8471 	} else {
8472 		/* place new node behind head node on ring list */
8473 		np->prev = (*lp)->prev;
8474 		np->next = *lp;
8475 		np->prev->next = np;
8476 		np->next->prev = np;
8477 	}
8478 }
8479 
8480 static void
8481 drmach_sr_delete(struct drmach_sr_list **lp, dev_info_t *dip)
8482 {
8483 	DRMACH_PR("drmach_sr_delete: searching for dip %p\n", (void *)dip);
8484 
8485 	if (*lp) {
8486 		struct drmach_sr_list *xp;
8487 
8488 		/* start search with mostly likely node */
8489 		xp = (*lp)->prev;
8490 		do {
8491 			if (xp->dip == dip) {
8492 				xp->prev->next = xp->next;
8493 				xp->next->prev = xp->prev;
8494 
8495 				if (xp == *lp)
8496 					*lp = xp->next;
8497 				if (xp == *lp)
8498 					*lp = NULL;
8499 				xp->dip = NULL;
8500 				ndi_rele_devi(dip);
8501 				kmem_free(xp, sizeof (*xp));
8502 
8503 				DRMACH_PR("drmach_sr_delete:"
8504 				    " disposed sr node for dip %p",
8505 				    (void *)dip);
8506 				return;
8507 			}
8508 
8509 			DRMACH_PR("drmach_sr_delete: still searching\n");
8510 
8511 			xp = xp->prev;
8512 		} while (xp != (*lp)->prev);
8513 	}
8514 
8515 	/* every dip should be found during resume */
8516 	DRMACH_PR("ERROR: drmach_sr_delete: can't find dip %p", (void *)dip);
8517 }
8518 
8519 int
8520 drmach_verify_sr(dev_info_t *dip, int sflag)
8521 {
8522 	int	rv;
8523 	int	len;
8524 	char    name[OBP_MAXDRVNAME];
8525 
8526 	if (drmach_slot1_pause_debug) {
8527 		if (sflag && drmach_slot1_pause_init) {
8528 			drmach_slot1_pause_free(drmach_slot1_paused);
8529 			drmach_slot1_pause_init = 0;
8530 		} else if (!sflag && !drmach_slot1_pause_init) {
8531 			/* schedule init for next suspend */
8532 			drmach_slot1_pause_init = 1;
8533 		}
8534 	}
8535 
8536 	rv = ddi_getproplen(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
8537 	    "name", &len);
8538 	if (rv == DDI_PROP_SUCCESS) {
8539 		int		portid;
8540 		uint64_t	reg;
8541 		struct drmach_sr_ordered *op;
8542 
8543 		rv = ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
8544 		    DDI_PROP_DONTPASS, "name", (caddr_t)name, &len);
8545 
8546 		if (rv != DDI_PROP_SUCCESS)
8547 			return (0);
8548 
8549 		if (drmach_slot1_pause_debug && sflag &&
8550 		    drmach_is_slot1_pause_axq(dip, name, &portid, &reg)) {
8551 			drmach_slot1_pause_add_axq(dip, name, portid, reg,
8552 			    drmach_slot1_paused);
8553 		}
8554 
8555 		for (op = drmach_sr_ordered; op->name; op++) {
8556 			if (strncmp(op->name, name, strlen(op->name)) == 0) {
8557 				if (sflag)
8558 					drmach_sr_insert(&op->ring, dip);
8559 				else
8560 					drmach_sr_delete(&op->ring, dip);
8561 				return (1);
8562 			}
8563 		}
8564 	}
8565 
8566 	return (0);
8567 }
8568 
8569 static void
8570 drmach_sr_dip(dev_info_t *dip, int suspend)
8571 {
8572 	int	 rv;
8573 	major_t	 maj;
8574 	char	*name, *name_addr, *aka;
8575 
8576 	if ((name = ddi_get_name(dip)) == NULL)
8577 		name = "<null name>";
8578 	else if ((maj = ddi_name_to_major(name)) != -1)
8579 		aka = ddi_major_to_name(maj);
8580 	else
8581 		aka = "<unknown>";
8582 
8583 	if ((name_addr = ddi_get_name_addr(dip)) == NULL)
8584 		name_addr = "<null>";
8585 
8586 	prom_printf("\t%s %s@%s (aka %s)\n",
8587 	    suspend ? "suspending" : "resuming",
8588 	    name, name_addr, aka);
8589 
8590 	if (suspend) {
8591 		rv = devi_detach(dip, DDI_SUSPEND);
8592 	} else {
8593 		rv = devi_attach(dip, DDI_RESUME);
8594 	}
8595 
8596 	if (rv != DDI_SUCCESS) {
8597 		prom_printf("\tFAILED to %s %s@%s\n",
8598 		    suspend ? "suspend" : "resume",
8599 		    name, name_addr);
8600 	}
8601 }
8602 
8603 void
8604 drmach_suspend_last()
8605 {
8606 	struct drmach_sr_ordered *op;
8607 
8608 	if (drmach_slot1_pause_debug)
8609 		drmach_slot1_pause_add_io(drmach_slot1_paused);
8610 
8611 	/*
8612 	 * The ordering array declares the strict sequence in which
8613 	 * the named drivers are to suspended. Each element in
8614 	 * the array may have a double-linked ring list of driver
8615 	 * instances (dip) in the order in which they were presented
8616 	 * to drmach_verify_sr. If present, walk the list in the
8617 	 * forward direction to suspend each instance.
8618 	 */
8619 	for (op = drmach_sr_ordered; op->name; op++) {
8620 		if (op->ring) {
8621 			struct drmach_sr_list *rp;
8622 
8623 			rp = op->ring;
8624 			do {
8625 				drmach_sr_dip(rp->dip, 1);
8626 				rp = rp->next;
8627 			} while (rp != op->ring);
8628 		}
8629 	}
8630 
8631 	if (drmach_slot1_pause_debug) {
8632 		drmach_slot1_pause_update(drmach_slot1_paused,
8633 		    DRMACH_POST_SUSPEND);
8634 		drmach_slot1_pause_verify(drmach_slot1_paused,
8635 		    DRMACH_POST_SUSPEND);
8636 	}
8637 }
8638 
8639 void
8640 drmach_resume_first()
8641 {
8642 	struct drmach_sr_ordered *op = drmach_sr_ordered +
8643 	    (sizeof (drmach_sr_ordered) / sizeof (drmach_sr_ordered[0]));
8644 
8645 	if (drmach_slot1_pause_debug) {
8646 		drmach_slot1_pause_update(drmach_slot1_paused,
8647 		    DRMACH_PRE_RESUME);
8648 		drmach_slot1_pause_verify(drmach_slot1_paused,
8649 		    DRMACH_PRE_RESUME);
8650 	}
8651 
8652 	op -= 1;	/* point at terminating element */
8653 
8654 	/*
8655 	 * walk ordering array and rings backwards to resume dips
8656 	 * in reverse order in which they were suspended
8657 	 */
8658 	while (--op >= drmach_sr_ordered) {
8659 		if (op->ring) {
8660 			struct drmach_sr_list *rp;
8661 
8662 			rp = op->ring->prev;
8663 			do {
8664 				drmach_sr_dip(rp->dip, 0);
8665 				rp = rp->prev;
8666 			} while (rp != op->ring->prev);
8667 		}
8668 	}
8669 }
8670 
8671 /*
8672  * Log a DR sysevent.
8673  * Return value: 0 success, non-zero failure.
8674  */
8675 int
8676 drmach_log_sysevent(int board, char *hint, int flag, int verbose)
8677 {
8678 	sysevent_t			*ev;
8679 	sysevent_id_t			eid;
8680 	int				rv, km_flag;
8681 	sysevent_value_t		evnt_val;
8682 	sysevent_attr_list_t		*evnt_attr_list = NULL;
8683 	char				attach_pnt[MAXNAMELEN];
8684 
8685 	km_flag = (flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
8686 	attach_pnt[0] = '\0';
8687 	if (drmach_board_name(board, attach_pnt, MAXNAMELEN)) {
8688 		rv = -1;
8689 		goto logexit;
8690 	}
8691 	if (verbose)
8692 		DRMACH_PR("drmach_log_sysevent: %s %s, flag: %d, verbose: %d\n",
8693 		    attach_pnt, hint, flag, verbose);
8694 
8695 	if ((ev = sysevent_alloc(EC_DR, ESC_DR_AP_STATE_CHANGE,
8696 	    SUNW_KERN_PUB"dr", km_flag)) == NULL) {
8697 		rv = -2;
8698 		goto logexit;
8699 	}
8700 	evnt_val.value_type = SE_DATA_TYPE_STRING;
8701 	evnt_val.value.sv_string = attach_pnt;
8702 	if ((rv = sysevent_add_attr(&evnt_attr_list, DR_AP_ID,
8703 	    &evnt_val, km_flag)) != 0)
8704 		goto logexit;
8705 
8706 	evnt_val.value_type = SE_DATA_TYPE_STRING;
8707 	evnt_val.value.sv_string = hint;
8708 	if ((rv = sysevent_add_attr(&evnt_attr_list, DR_HINT,
8709 	    &evnt_val, km_flag)) != 0) {
8710 		sysevent_free_attr(evnt_attr_list);
8711 		goto logexit;
8712 	}
8713 
8714 	(void) sysevent_attach_attributes(ev, evnt_attr_list);
8715 
8716 	/*
8717 	 * Log the event but do not sleep waiting for its
8718 	 * delivery. This provides insulation from syseventd.
8719 	 */
8720 	rv = log_sysevent(ev, SE_NOSLEEP, &eid);
8721 
8722 logexit:
8723 	if (ev)
8724 		sysevent_free(ev);
8725 	if ((rv != 0) && verbose)
8726 		cmn_err(CE_WARN,
8727 		    "drmach_log_sysevent failed (rv %d) for %s  %s\n",
8728 		    rv, attach_pnt, hint);
8729 
8730 	return (rv);
8731 }
8732 
8733 /*
8734  * Initialize the mem_slice portion of a claim/unconfig/unclaim mailbox message.
8735  * Only the valid entries are modified, so the array should be zeroed out
8736  * initially.
8737  */
8738 static void
8739 drmach_msg_memslice_init(dr_memslice_t slice_arr[]) {
8740 	int	i;
8741 	char	c;
8742 
8743 	ASSERT(mutex_owned(&drmach_slice_table_lock));
8744 
8745 	for (i = 0; i < AXQ_MAX_EXP; i++) {
8746 		c = drmach_slice_table[i];
8747 
8748 		if (c & 0x20) {
8749 			slice_arr[i].valid = 1;
8750 			slice_arr[i].slice = c & 0x1f;
8751 		}
8752 	}
8753 }
8754 
8755 /*
8756  * Initialize the mem_regs portion of a claim/unconfig/unclaim mailbox message.
8757  * Only the valid entries are modified, so the array should be zeroed out
8758  * initially.
8759  */
8760 static void
8761 drmach_msg_memregs_init(dr_memregs_t regs_arr[]) {
8762 	int		rv, exp, mcnum, bank;
8763 	uint64_t	madr;
8764 	drmachid_t	id;
8765 	drmach_board_t	*bp;
8766 	drmach_mem_t	*mp;
8767 	dr_memregs_t	*memregs;
8768 
8769 	/* CONSTCOND */
8770 	ASSERT(DRMACH_MC_NBANKS == (PMBANKS_PER_PORT * LMBANKS_PER_PMBANK));
8771 
8772 	for (exp = 0; exp < 18; exp++) {
8773 		rv = drmach_array_get(drmach_boards,
8774 		    DRMACH_EXPSLOT2BNUM(exp, 0), &id);
8775 		ASSERT(rv == 0);	/* should never be out of bounds */
8776 		if (id == NULL) {
8777 			continue;
8778 		}
8779 
8780 		memregs = &regs_arr[exp];
8781 		bp = (drmach_board_t *)id;
8782 		for (mp = bp->mem; mp != NULL; mp = mp->next) {
8783 			mcnum = mp->dev.portid & 0x3;
8784 			for (bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
8785 				drmach_mem_read_madr(mp, bank, &madr);
8786 				if (madr & DRMACH_MC_VALID_MASK) {
8787 					DRMACH_PR("%d.%d.%d.madr = 0x%lx\n",
8788 					    exp, mcnum, bank, madr);
8789 					memregs->madr[mcnum][bank].hi =
8790 					    DRMACH_U64_TO_MCREGHI(madr);
8791 					memregs->madr[mcnum][bank].lo =
8792 					    DRMACH_U64_TO_MCREGLO(madr);
8793 				}
8794 			}
8795 		}
8796 	}
8797 }
8798 
8799 /*
8800  * Do not allow physical address range modification if either board on this
8801  * expander has processors in NULL LPA mode (CBASE=CBND=NULL).
8802  *
8803  * A side effect of NULL proc LPA mode in Starcat SSM is that local reads will
8804  * install the cache line as owned/dirty as a result of the RTSR transaction.
8805  * See section 5.2.3 of the Safari spec.  All processors will read the bus sync
8806  * list before the rename after flushing local caches.  When copy-rename
8807  * requires changing the physical address ranges (i.e. smaller memory target),
8808  * the bus sync list contains physical addresses that will not exist after the
8809  * rename.  If these cache lines are owned due to a RTSR, a system error can
8810  * occur following the rename when these cache lines are evicted and a writeback
8811  * is attempted.
8812  *
8813  * Incoming parameter represents either the copy-rename source or a candidate
8814  * target memory board.  On Starcat, only slot0 boards may have memory.
8815  */
8816 int
8817 drmach_allow_memrange_modify(drmachid_t s0id)
8818 {
8819 	drmach_board_t	*s0bp, *s1bp;
8820 	drmachid_t	s1id;
8821 	int		rv;
8822 
8823 	s0bp = s0id;
8824 
8825 	ASSERT(DRMACH_IS_BOARD_ID(s0id));
8826 	ASSERT(DRMACH_BNUM2SLOT(s0bp->bnum) == 0);
8827 
8828 	if (s0bp->flags & DRMACH_NULL_PROC_LPA) {
8829 		/*
8830 		 * This is reason enough to fail the request, no need
8831 		 * to check the device list for cpus.
8832 		 */
8833 		return (0);
8834 	}
8835 
8836 	/*
8837 	 * Check for MCPU board on the same expander.
8838 	 *
8839 	 * The board flag DRMACH_NULL_PROC_LPA can be set for all board
8840 	 * types, as it is derived at from the POST gdcd board flag
8841 	 * L1SSFLG_THIS_L1_NULL_PROC_LPA, which can be set (and should be
8842 	 * ignored) for boards with no processors.  Since NULL proc LPA
8843 	 * applies only to processors, we walk the devices array to detect
8844 	 * MCPUs.
8845 	 */
8846 	rv = drmach_array_get(drmach_boards, s0bp->bnum + 1, &s1id);
8847 	s1bp = s1id;
8848 	if (rv == 0 && s1bp != NULL) {
8849 
8850 		ASSERT(DRMACH_IS_BOARD_ID(s1id));
8851 		ASSERT(DRMACH_BNUM2SLOT(s1bp->bnum) == 1);
8852 		ASSERT(DRMACH_BNUM2EXP(s0bp->bnum) ==
8853 		    DRMACH_BNUM2EXP(s1bp->bnum));
8854 
8855 		if ((s1bp->flags & DRMACH_NULL_PROC_LPA) &&
8856 		    s1bp->devices != NULL) {
8857 			int		d_idx;
8858 			drmachid_t	d_id;
8859 
8860 			rv = drmach_array_first(s1bp->devices, &d_idx, &d_id);
8861 			while (rv == 0) {
8862 				if (DRMACH_IS_CPU_ID(d_id)) {
8863 					/*
8864 					 * Fail MCPU in NULL LPA mode.
8865 					 */
8866 					return (0);
8867 				}
8868 
8869 				rv = drmach_array_next(s1bp->devices, &d_idx,
8870 				    &d_id);
8871 			}
8872 		}
8873 	}
8874 
8875 	return (1);
8876 }
8877