xref: /titanic_44/usr/src/uts/sun4u/starcat/io/drmach.c (revision ab9a77c71b58e388d02c6199c8bfbcb998aae845)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/note.h>
29 #include <sys/debug.h>
30 #include <sys/types.h>
31 #include <sys/varargs.h>
32 #include <sys/errno.h>
33 #include <sys/cred.h>
34 #include <sys/dditypes.h>
35 #include <sys/devops.h>
36 #include <sys/modctl.h>
37 #include <sys/poll.h>
38 #include <sys/conf.h>
39 #include <sys/ddi.h>
40 #include <sys/sunddi.h>
41 #include <sys/sunndi.h>
42 #include <sys/ndi_impldefs.h>
43 #include <sys/stat.h>
44 #include <sys/kmem.h>
45 #include <sys/vmem.h>
46 #include <sys/disp.h>
47 #include <sys/processor.h>
48 #include <sys/cheetahregs.h>
49 #include <sys/cpuvar.h>
50 #include <sys/mem_config.h>
51 #include <sys/ddi_impldefs.h>
52 #include <sys/systm.h>
53 #include <sys/machsystm.h>
54 #include <sys/autoconf.h>
55 #include <sys/cmn_err.h>
56 #include <sys/sysmacros.h>
57 #include <sys/x_call.h>
58 #include <sys/promif.h>
59 #include <sys/prom_plat.h>
60 #include <sys/membar.h>
61 #include <vm/seg_kmem.h>
62 #include <sys/mem_cage.h>
63 #include <sys/stack.h>
64 #include <sys/archsystm.h>
65 #include <vm/hat_sfmmu.h>
66 #include <sys/pte.h>
67 #include <sys/mmu.h>
68 #include <sys/cpu_module.h>
69 #include <sys/obpdefs.h>
70 #include <sys/mboxsc.h>
71 #include <sys/plat_ecc_dimm.h>
72 
73 #include <sys/hotplug/hpctrl.h>		/* XXX should be included by schpc.h */
74 #include <sys/schpc.h>
75 #include <sys/pci.h>
76 
77 #include <sys/starcat.h>
78 #include <sys/cpu_sgnblk_defs.h>
79 #include <sys/drmach.h>
80 #include <sys/dr_util.h>
81 #include <sys/dr_mbx.h>
82 #include <sys/sc_gptwocfg.h>
83 #include <sys/iosramreg.h>
84 #include <sys/iosramio.h>
85 #include <sys/iosramvar.h>
86 #include <sys/axq.h>
87 #include <sys/post/scat_dcd.h>
88 #include <sys/kobj.h>
89 #include <sys/taskq.h>
90 #include <sys/cmp.h>
91 #include <sys/sbd_ioctl.h>
92 
93 #include <sys/sysevent.h>
94 #include <sys/sysevent/dr.h>
95 #include <sys/sysevent/eventdefs.h>
96 
97 #include <sys/pci/pcisch.h>
98 #include <sys/pci/pci_regs.h>
99 
100 #include <sys/ontrap.h>
101 
102 /* defined in ../ml/drmach.il.cpp */
103 extern void		bcopy32_il(uint64_t, uint64_t);
104 extern void		flush_ecache_il(int64_t physaddr, int size, int linesz);
105 extern void		flush_dcache_il(void);
106 extern void		flush_icache_il(void);
107 extern void		flush_pcache_il(void);
108 
109 /* defined in ../ml/drmach_asm.s */
110 extern uint64_t		lddmcdecode(uint64_t physaddr);
111 extern uint64_t		lddsafconfig(void);
112 
113 /* XXX here until provided by sys/dman.h */
114 extern int man_dr_attach(dev_info_t *);
115 extern int man_dr_detach(dev_info_t *);
116 
117 #define	DRMACH_BNUM2EXP(bnum)		((bnum) >> 1)
118 #define	DRMACH_BNUM2SLOT(bnum)		((bnum) & 1)
119 #define	DRMACH_EXPSLOT2BNUM(exp, slot)	(((exp) << 1) + (slot))
120 
121 #define	DRMACH_SLICE_MASK		0x1Full
122 #define	DRMACH_SLICE_TO_PA(s)		(((s) & DRMACH_SLICE_MASK) << 37)
123 #define	DRMACH_PA_TO_SLICE(a)		(((a) >> 37) & DRMACH_SLICE_MASK)
124 
125 /*
126  * DRMACH_MEM_SLICE_SIZE and DRMACH_MEM_USABLE_SLICE_SIZE define the
127  * available address space and the usable address space for every slice.
128  * There must be a distinction between the available and usable do to a
129  * restriction imposed by CDC memory size.
130  */
131 
132 #define	DRMACH_MEM_SLICE_SIZE		(1ull << 37)	/* 128GB */
133 #define	DRMACH_MEM_USABLE_SLICE_SIZE	(1ull << 36)	/* 64GB */
134 
135 #define	DRMACH_MC_NBANKS		4
136 
137 #define	DRMACH_MC_ADDR(mp, bank)	((mp)->madr_pa + 16 + 8 * (bank))
138 #define	DRMACH_MC_ASI_ADDR(mp, bank)	(DRMACH_MC_ADDR(mp, bank) & 0xFF)
139 
140 #define	DRMACH_EMU_ACT_STATUS_OFFSET	0x50
141 #define	DRMACH_EMU_ACT_STATUS_ADDR(mp)	\
142 	((mp)->madr_pa + DRMACH_EMU_ACT_STATUS_OFFSET)
143 
144 /*
145  * The Cheetah's Safari Configuration Register and the Schizo's
146  * Safari Control/Status Register place the LPA base and bound fields in
147  * same bit locations with in their register word. This source code takes
148  * advantage of this by defining only one set of LPA encoding/decoding macros
149  * which are shared by various Cheetah and Schizo drmach routines.
150  */
151 #define	DRMACH_LPA_BASE_MASK		(0x3Full	<< 3)
152 #define	DRMACH_LPA_BND_MASK		(0x3Full	<< 9)
153 
154 #define	DRMACH_LPA_BASE_TO_PA(scr)	(((scr) & DRMACH_LPA_BASE_MASK) << 34)
155 #define	DRMACH_LPA_BND_TO_PA(scr)	(((scr) & DRMACH_LPA_BND_MASK) << 28)
156 #define	DRMACH_PA_TO_LPA_BASE(pa)	(((pa) >> 34) & DRMACH_LPA_BASE_MASK)
157 #define	DRMACH_PA_TO_LPA_BND(pa)	(((pa) >> 28) & DRMACH_LPA_BND_MASK)
158 
159 #define	DRMACH_L1_SET_LPA(b)		\
160 	(((b)->flags & DRMACH_NULL_PROC_LPA) == 0)
161 
162 #define	DRMACH_CPU_SRAM_ADDR    	0x7fff0900000ull
163 #define	DRMACH_CPU_SRAM_SIZE    	0x20000ull
164 
165 /*
166  * Name properties for frequently accessed device nodes.
167  */
168 #define	DRMACH_CPU_NAMEPROP		"cpu"
169 #define	DRMACH_CMP_NAMEPROP		"cmp"
170 #define	DRMACH_AXQ_NAMEPROP		"address-extender-queue"
171 #define	DRMACH_PCI_NAMEPROP		"pci"
172 
173 /*
174  * Maximum value of processor Safari Timeout Log (TOL) field of
175  * Safari Config reg (7 secs).
176  */
177 #define	DRMACH_SAF_TOL_MAX		7 * 1000000
178 
179 /*
180  * drmach_board_t flag definitions
181  */
182 #define	DRMACH_NULL_PROC_LPA		0x1
183 
184 typedef struct {
185 	uint32_t	reg_addr_hi;
186 	uint32_t	reg_addr_lo;
187 	uint32_t	reg_size_hi;
188 	uint32_t	reg_size_lo;
189 } drmach_reg_t;
190 
191 typedef struct {
192 	struct drmach_node	*node;
193 	void			*data;
194 } drmach_node_walk_args_t;
195 
196 typedef struct drmach_node {
197 	void		*here;
198 
199 	pnode_t		 (*get_dnode)(struct drmach_node *node);
200 	int		 (*walk)(struct drmach_node *node, void *data,
201 				int (*cb)(drmach_node_walk_args_t *args));
202 	dev_info_t	*(*n_getdip)(struct drmach_node *node);
203 	int		 (*n_getproplen)(struct drmach_node *node, char *name,
204 				int *len);
205 	int		 (*n_getprop)(struct drmach_node *node, char *name,
206 				void *buf, int len);
207 	int		 (*get_parent)(struct drmach_node *node,
208 				struct drmach_node *pnode);
209 } drmach_node_t;
210 
211 typedef struct {
212 	int		 min_index;
213 	int		 max_index;
214 	int		 arr_sz;
215 	drmachid_t	*arr;
216 } drmach_array_t;
217 
218 typedef struct {
219 	void		*isa;
220 
221 	void		 (*dispose)(drmachid_t);
222 	sbd_error_t	*(*release)(drmachid_t);
223 	sbd_error_t	*(*status)(drmachid_t, drmach_status_t *);
224 
225 	char		 name[MAXNAMELEN];
226 } drmach_common_t;
227 
228 struct drmach_board;
229 typedef struct drmach_board drmach_board_t;
230 
231 typedef struct {
232 	drmach_common_t	 cm;
233 	const char	*type;
234 	drmach_board_t	*bp;
235 	drmach_node_t	*node;
236 	int		 portid;
237 	int		 unum;
238 	int		 busy;
239 	int		 powered;
240 } drmach_device_t;
241 
242 typedef struct drmach_cpu {
243 	drmach_device_t	 dev;
244 	uint64_t	 scr_pa;
245 	processorid_t	 cpuid;
246 	int		 coreid;
247 } drmach_cpu_t;
248 
249 typedef struct drmach_mem {
250 	drmach_device_t	 dev;
251 	struct drmach_mem *next;
252 	uint64_t	 nbytes;
253 	uint64_t	 madr_pa;
254 } drmach_mem_t;
255 
256 typedef struct drmach_io {
257 	drmach_device_t	 dev;
258 	uint64_t	 scsr_pa; /* PA of Schizo Control/Status Register */
259 } drmach_io_t;
260 
261 struct drmach_board {
262 	drmach_common_t	 cm;
263 	int		 bnum;
264 	int		 assigned;
265 	int		 powered;
266 	int		 connected;
267 	int		 empty;
268 	int		 cond;
269 	uint_t		 cpu_impl;
270 	uint_t		 flags;
271 	drmach_node_t	*tree;
272 	drmach_array_t	*devices;
273 	drmach_mem_t	*mem;
274 	uint64_t	 stardrb_offset;
275 	char		 type[BD_TYPELEN];
276 };
277 
278 typedef struct {
279 	int		 flags;
280 	drmach_device_t	*dp;
281 	sbd_error_t	*err;
282 	dev_info_t	*fdip;
283 } drmach_config_args_t;
284 
285 typedef struct {
286 	drmach_board_t	*obj;
287 	int		 ndevs;
288 	void		*a;
289 	sbd_error_t	*(*found)(void *a, const char *, int, drmachid_t);
290 	sbd_error_t	*err;
291 } drmach_board_cb_data_t;
292 
293 typedef struct drmach_casmslot {
294 	int	valid;
295 	int	slice;
296 } drmach_casmslot_t;
297 
298 typedef enum {
299 	DRMACH_CR_OK,
300 	DRMACH_CR_MC_IDLE_ERR,
301 	DRMACH_CR_IOPAUSE_ERR,
302 	DRMACH_CR_ONTRAP_ERR
303 } drmach_cr_err_t;
304 
305 typedef struct {
306 	void		*isa;
307 	caddr_t		 data;
308 	drmach_mem_t	*s_mp;
309 	drmach_mem_t	*t_mp;
310 	struct memlist	*c_ml;
311 	uint64_t	 s_copybasepa;
312 	uint64_t	 t_copybasepa;
313 	drmach_cr_err_t	 ecode;
314 	void		*earg;
315 } drmach_copy_rename_t;
316 
317 /*
318  * The following global is read as a boolean value, non-zero is true.
319  * If zero, DR copy-rename and cpu poweron will not set the processor
320  * LPA settings (CBASE, CBND of Safari config register) to correspond
321  * to the current memory slice map. LPAs of processors present at boot
322  * will remain as programmed by POST. LPAs of processors on boards added
323  * by DR will remain NULL, as programmed by POST. This can be used to
324  * to override the per-board L1SSFLG_THIS_L1_NULL_PROC_LPA flag set by
325  * POST in the LDCD (and copied to the GDCD by SMS).
326  *
327  * drmach_reprogram_lpa and L1SSFLG_THIS_L1_NULL_PROC_LPA do not apply
328  * to Schizo device LPAs. These are always set by DR.
329  */
330 static int		 drmach_reprogram_lpa = 1;
331 
332 /*
333  * There is a known HW bug where a Jaguar CPU in Safari port 0 (SBX/P0)
334  * can fail to receive an XIR. To workaround this issue until a hardware
335  * fix is implemented, we will exclude the selection of these CPUs.
336  * Setting this to 0 will allow their selection again.
337  */
338 static int		 drmach_iocage_exclude_jaguar_port_zero = 1;
339 
340 static int		 drmach_initialized;
341 static drmach_array_t	*drmach_boards;
342 
343 static int		 drmach_cpu_delay = 1000;
344 static int		 drmach_cpu_ntries = 50000;
345 
346 static uint32_t		 drmach_slice_table[AXQ_MAX_EXP];
347 static kmutex_t		 drmach_slice_table_lock;
348 
349 tte_t			 drmach_cpu_sram_tte[NCPU];
350 caddr_t			 drmach_cpu_sram_va;
351 
352 /*
353  * Setting to non-zero will enable delay before all disconnect ops.
354  */
355 static int		 drmach_unclaim_delay_all;
356 /*
357  * Default delay is slightly greater than the max processor Safari timeout.
358  * This delay is intended to ensure the outstanding Safari activity has
359  * retired on this board prior to a board disconnect.
360  */
361 static clock_t		 drmach_unclaim_usec_delay = DRMACH_SAF_TOL_MAX + 10;
362 
363 /*
364  * By default, DR of non-Panther procs is not allowed into a Panther
365  * domain with large page sizes enabled.  Setting this to 0 will remove
366  * the restriction.
367  */
368 static int		 drmach_large_page_restriction = 1;
369 
370 /*
371  * Used to pass updated LPA values to procs.
372  * Protocol is to clear the array before use.
373  */
374 volatile uchar_t	*drmach_xt_mb;
375 volatile uint64_t	 drmach_xt_ready;
376 static kmutex_t		 drmach_xt_mb_lock;
377 static int		 drmach_xt_mb_size;
378 
379 uint64_t		 drmach_bus_sync_list[18 * 4 * 4 + 1];
380 static kmutex_t		 drmach_bus_sync_lock;
381 
382 static sbd_error_t	*drmach_device_new(drmach_node_t *,
383 				drmach_board_t *, int, drmachid_t *);
384 static sbd_error_t	*drmach_cpu_new(drmach_device_t *, drmachid_t *);
385 static sbd_error_t	*drmach_mem_new(drmach_device_t *, drmachid_t *);
386 static sbd_error_t	*drmach_pci_new(drmach_device_t *, drmachid_t *);
387 static sbd_error_t	*drmach_io_new(drmach_device_t *, drmachid_t *);
388 
389 static dev_info_t	*drmach_node_ddi_get_dip(drmach_node_t *np);
390 static int		 drmach_node_ddi_get_prop(drmach_node_t *np,
391 				char *name, void *buf, int len);
392 static int		 drmach_node_ddi_get_proplen(drmach_node_t *np,
393 				char *name, int *len);
394 
395 static dev_info_t	*drmach_node_obp_get_dip(drmach_node_t *np);
396 static int		 drmach_node_obp_get_prop(drmach_node_t *np,
397 				char *name, void *buf, int len);
398 static int		 drmach_node_obp_get_proplen(drmach_node_t *np,
399 				char *name, int *len);
400 
401 static sbd_error_t	*drmach_mbox_trans(uint8_t msgtype, int bnum,
402 				caddr_t obufp, int olen,
403 				caddr_t ibufp, int ilen);
404 
405 sbd_error_t		*drmach_io_post_attach(drmachid_t id);
406 sbd_error_t		*drmach_io_post_release(drmachid_t id);
407 
408 static sbd_error_t	*drmach_iocage_setup(dr_testboard_req_t *,
409 				drmach_device_t **dpp, cpu_flag_t *oflags);
410 static int		drmach_iocage_cpu_return(drmach_device_t *dp,
411 				cpu_flag_t oflags);
412 static sbd_error_t	*drmach_iocage_mem_return(dr_testboard_reply_t *tbr);
413 void			drmach_iocage_mem_scrub(uint64_t nbytes);
414 
415 static sbd_error_t 	*drmach_i_status(drmachid_t id, drmach_status_t *stat);
416 
417 static void		drmach_slot1_lpa_set(drmach_board_t *bp);
418 
419 static void		drmach_cpu_read(uint64_t arg1, uint64_t arg2);
420 static int		drmach_cpu_read_scr(drmach_cpu_t *cp, uint64_t *scr);
421 
422 static void		 drmach_bus_sync_list_update(void);
423 static void		 drmach_slice_table_update(drmach_board_t *, int);
424 static int		 drmach_portid2bnum(int);
425 
426 static void		drmach_msg_memslice_init(dr_memslice_t slice_arr[]);
427 static void		drmach_msg_memregs_init(dr_memregs_t regs_arr[]);
428 
429 static int		drmach_panther_boards(void);
430 
431 static int		drmach_name2type_idx(char *);
432 
433 #ifdef DEBUG
434 
435 #define	DRMACH_PR		if (drmach_debug) printf
436 #define	DRMACH_MEMLIST_DUMP	if (drmach_debug) MEMLIST_DUMP
437 int drmach_debug = 0;		 /* set to non-zero to enable debug messages */
438 #else
439 
440 #define	DRMACH_PR		_NOTE(CONSTANTCONDITION) if (0) printf
441 #define	DRMACH_MEMLIST_DUMP	_NOTE(CONSTANTCONDITION) if (0) MEMLIST_DUMP
442 #endif /* DEBUG */
443 
444 #define	DRMACH_OBJ(id)		((drmach_common_t *)id)
445 
446 #define	DRMACH_IS_BOARD_ID(id)	\
447 	((id != 0) &&		\
448 	(DRMACH_OBJ(id)->isa == (void *)drmach_board_new))
449 
450 #define	DRMACH_IS_CPU_ID(id)	\
451 	((id != 0) &&		\
452 	(DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new))
453 
454 #define	DRMACH_IS_MEM_ID(id)	\
455 	((id != 0) &&		\
456 	(DRMACH_OBJ(id)->isa == (void *)drmach_mem_new))
457 
458 #define	DRMACH_IS_IO_ID(id)	\
459 	((id != 0) &&		\
460 	(DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
461 
462 #define	DRMACH_IS_DEVICE_ID(id)					\
463 	((id != 0) &&						\
464 	(DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new ||	\
465 	    DRMACH_OBJ(id)->isa == (void *)drmach_mem_new ||	\
466 	    DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
467 
468 #define	DRMACH_IS_ID(id)					\
469 	((id != 0) &&						\
470 	(DRMACH_OBJ(id)->isa == (void *)drmach_board_new ||	\
471 	    DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new ||	\
472 	    DRMACH_OBJ(id)->isa == (void *)drmach_mem_new ||	\
473 	    DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
474 
475 #define	DRMACH_INTERNAL_ERROR() \
476 	drerr_new(1, ESTC_INTERNAL, drmach_ie_fmt, __LINE__)
477 static char		*drmach_ie_fmt = "drmach.c %d";
478 
479 static struct {
480 	const char	 *name;
481 	const char	 *type;
482 	sbd_error_t	 *(*new)(drmach_device_t *, drmachid_t *);
483 } drmach_name2type[] = {
484 	{"cmp",			    DRMACH_DEVTYPE_CMP,    NULL },
485 	{"cpu",			    DRMACH_DEVTYPE_CPU,    drmach_cpu_new },
486 	{"SUNW,UltraSPARC-III",	    DRMACH_DEVTYPE_CPU,    drmach_cpu_new },
487 	{"SUNW,UltraSPARC-III+",    DRMACH_DEVTYPE_CPU,    drmach_cpu_new },
488 	{"memory-controller",	    DRMACH_DEVTYPE_MEM,    drmach_mem_new },
489 	{"pci",			    DRMACH_DEVTYPE_PCI,    drmach_pci_new },
490 	{"SUNW,wci",		    DRMACH_DEVTYPE_WCI,    drmach_io_new  },
491 };
492 
493 /*
494  * drmach autoconfiguration data structures and interfaces
495  */
496 
497 extern struct mod_ops mod_miscops;
498 
499 static struct modlmisc modlmisc = {
500 	&mod_miscops,
501 	"Sun Fire 15000 DR %I%"
502 };
503 
504 static struct modlinkage modlinkage = {
505 	MODREV_1,
506 	(void *)&modlmisc,
507 	NULL
508 };
509 
510 /*
511  * drmach_boards_rwlock is used to synchronize read/write
512  * access to drmach_boards array between status and board lookup
513  * as READERS, and assign, and unassign threads as WRITERS.
514  */
515 static krwlock_t	drmach_boards_rwlock;
516 
517 static kmutex_t		drmach_i_lock;
518 static kmutex_t		drmach_iocage_lock;
519 static kcondvar_t 	drmach_iocage_cv;
520 static int		drmach_iocage_is_busy = 0;
521 uint64_t		drmach_iocage_paddr;
522 static caddr_t		drmach_iocage_vaddr;
523 static int		drmach_iocage_size = 0;
524 static int		drmach_is_cheetah = -1;
525 
526 int
527 _init(void)
528 {
529 	int	err;
530 
531 	mutex_init(&drmach_i_lock, NULL, MUTEX_DRIVER, NULL);
532 	rw_init(&drmach_boards_rwlock, NULL, RW_DEFAULT, NULL);
533 	drmach_xt_mb_size = NCPU * sizeof (uchar_t);
534 	drmach_xt_mb = (uchar_t *)vmem_alloc(static_alloc_arena,
535 	    drmach_xt_mb_size, VM_SLEEP);
536 	bzero((void *)drmach_xt_mb, drmach_xt_mb_size);
537 	if ((err = mod_install(&modlinkage)) != 0) {
538 		mutex_destroy(&drmach_i_lock);
539 		rw_destroy(&drmach_boards_rwlock);
540 		vmem_free(static_alloc_arena, (void *)drmach_xt_mb,
541 		    drmach_xt_mb_size);
542 	}
543 
544 	return (err);
545 }
546 
547 int
548 _fini(void)
549 {
550 	static void	drmach_fini(void);
551 	int		err;
552 
553 	if ((err = mod_remove(&modlinkage)) == 0)
554 		drmach_fini();
555 
556 	return (err);
557 }
558 
559 int
560 _info(struct modinfo *modinfop)
561 {
562 	return (mod_info(&modlinkage, modinfop));
563 }
564 
565 /*
566  * drmach_node_* routines serve the purpose of separating the
567  * rest of the code from the device tree and OBP.  This is necessary
568  * because of In-Kernel-Probing.  Devices probed after stod, are probed
569  * by the in-kernel-prober, not OBP.  These devices, therefore, do not
570  * have dnode ids.
571  */
572 
573 static int
574 drmach_node_obp_get_parent(drmach_node_t *np, drmach_node_t *pp)
575 {
576 	pnode_t		nodeid;
577 	static char	*fn = "drmach_node_obp_get_parent";
578 
579 	nodeid = np->get_dnode(np);
580 	if (nodeid == OBP_NONODE) {
581 		cmn_err(CE_WARN, "%s: invalid dnode", fn);
582 		return (-1);
583 	}
584 
585 	bcopy(np, pp, sizeof (drmach_node_t));
586 
587 	pp->here = (void *)(uintptr_t)prom_parentnode(nodeid);
588 	if (pp->here == OBP_NONODE) {
589 		cmn_err(CE_WARN, "%s: invalid parent dnode", fn);
590 		return (-1);
591 	}
592 
593 	return (0);
594 }
595 
596 static pnode_t
597 drmach_node_obp_get_dnode(drmach_node_t *np)
598 {
599 	return ((pnode_t)(uintptr_t)np->here);
600 }
601 
602 typedef struct {
603 	drmach_node_walk_args_t	*nwargs;
604 	int 			(*cb)(drmach_node_walk_args_t *args);
605 	int			err;
606 } drmach_node_ddi_walk_args_t;
607 
608 int
609 drmach_node_ddi_walk_cb(dev_info_t *dip, void *arg)
610 {
611 	drmach_node_ddi_walk_args_t	*nargs;
612 
613 	nargs = (drmach_node_ddi_walk_args_t *)arg;
614 
615 	/*
616 	 * dip doesn't have to be held here as we are called
617 	 * from ddi_walk_devs() which holds the dip.
618 	 */
619 	nargs->nwargs->node->here = (void *)dip;
620 
621 	nargs->err = nargs->cb(nargs->nwargs);
622 
623 	/*
624 	 * Set "here" to NULL so that unheld dip is not accessible
625 	 * outside ddi_walk_devs()
626 	 */
627 	nargs->nwargs->node->here = NULL;
628 
629 	if (nargs->err)
630 		return (DDI_WALK_TERMINATE);
631 	else
632 		return (DDI_WALK_CONTINUE);
633 }
634 
635 static int
636 drmach_node_ddi_walk(drmach_node_t *np, void *data,
637 		int (*cb)(drmach_node_walk_args_t *args))
638 {
639 	drmach_node_walk_args_t		args;
640 	drmach_node_ddi_walk_args_t	nargs;
641 
642 	/* initialized args structure for callback */
643 	args.node = np;
644 	args.data = data;
645 
646 	nargs.nwargs = &args;
647 	nargs.cb = cb;
648 	nargs.err = 0;
649 
650 	/*
651 	 * Root node doesn't have to be held in any way.
652 	 */
653 	ddi_walk_devs(ddi_root_node(), drmach_node_ddi_walk_cb,
654 		(void *)&nargs);
655 
656 	return (nargs.err);
657 }
658 
659 static int
660 drmach_node_obp_walk(drmach_node_t *np, void *data,
661 		int (*cb)(drmach_node_walk_args_t *args))
662 {
663 	pnode_t			nodeid;
664 	int			rv;
665 	drmach_node_walk_args_t	args;
666 
667 	/* initialized args structure for callback */
668 	args.node = np;
669 	args.data = data;
670 
671 	nodeid = prom_childnode(prom_rootnode());
672 
673 	/* save our new position within the tree */
674 	np->here = (void *)(uintptr_t)nodeid;
675 
676 	rv = 0;
677 	while (nodeid != OBP_NONODE) {
678 
679 		pnode_t child;
680 
681 		rv = (*cb)(&args);
682 		if (rv)
683 			break;
684 
685 		child = prom_childnode(nodeid);
686 		np->here = (void *)(uintptr_t)child;
687 
688 		while (child != OBP_NONODE) {
689 			rv = (*cb)(&args);
690 			if (rv)
691 				break;
692 
693 			child = prom_nextnode(child);
694 			np->here = (void *)(uintptr_t)child;
695 		}
696 
697 		nodeid = prom_nextnode(nodeid);
698 
699 		/* save our new position within the tree */
700 		np->here = (void *)(uintptr_t)nodeid;
701 	}
702 
703 	return (rv);
704 }
705 
706 static int
707 drmach_node_ddi_get_parent(drmach_node_t *np, drmach_node_t *pp)
708 {
709 	dev_info_t	*ndip;
710 	static char	*fn = "drmach_node_ddi_get_parent";
711 
712 	ndip = np->n_getdip(np);
713 	if (ndip == NULL) {
714 		cmn_err(CE_WARN, "%s: NULL dip", fn);
715 		return (-1);
716 	}
717 
718 	bcopy(np, pp, sizeof (drmach_node_t));
719 
720 	pp->here = (void *)ddi_get_parent(ndip);
721 	if (pp->here == NULL) {
722 		cmn_err(CE_WARN, "%s: NULL parent dip", fn);
723 		return (-1);
724 	}
725 
726 	return (0);
727 }
728 
729 /*ARGSUSED*/
730 static pnode_t
731 drmach_node_ddi_get_dnode(drmach_node_t *np)
732 {
733 	return ((pnode_t)NULL);
734 }
735 
736 static drmach_node_t *
737 drmach_node_new(void)
738 {
739 	drmach_node_t *np;
740 
741 	np = kmem_zalloc(sizeof (drmach_node_t), KM_SLEEP);
742 
743 	if (drmach_initialized) {
744 		np->get_dnode = drmach_node_ddi_get_dnode;
745 		np->walk = drmach_node_ddi_walk;
746 		np->n_getdip = drmach_node_ddi_get_dip;
747 		np->n_getproplen = drmach_node_ddi_get_proplen;
748 		np->n_getprop = drmach_node_ddi_get_prop;
749 		np->get_parent = drmach_node_ddi_get_parent;
750 	} else {
751 		np->get_dnode = drmach_node_obp_get_dnode;
752 		np->walk = drmach_node_obp_walk;
753 		np->n_getdip = drmach_node_obp_get_dip;
754 		np->n_getproplen = drmach_node_obp_get_proplen;
755 		np->n_getprop = drmach_node_obp_get_prop;
756 		np->get_parent = drmach_node_obp_get_parent;
757 	}
758 
759 	return (np);
760 }
761 
762 static void
763 drmach_node_dispose(drmach_node_t *np)
764 {
765 	kmem_free(np, sizeof (*np));
766 }
767 
768 /*
769  * Check if a CPU node is part of a CMP.
770  */
771 static int
772 drmach_is_cmp_child(dev_info_t *dip)
773 {
774 	dev_info_t *pdip;
775 
776 	if (strcmp(ddi_node_name(dip), DRMACH_CPU_NAMEPROP) != 0) {
777 		return (0);
778 	}
779 
780 	pdip = ddi_get_parent(dip);
781 
782 	ASSERT(pdip);
783 
784 	if (strcmp(ddi_node_name(pdip), DRMACH_CMP_NAMEPROP) == 0) {
785 		return (1);
786 	}
787 
788 	return (0);
789 }
790 
791 static dev_info_t *
792 drmach_node_obp_get_dip(drmach_node_t *np)
793 {
794 	pnode_t		nodeid;
795 	dev_info_t	*dip;
796 
797 	nodeid = np->get_dnode(np);
798 	if (nodeid == OBP_NONODE)
799 		return (NULL);
800 
801 	dip = e_ddi_nodeid_to_dip(nodeid);
802 	if (dip) {
803 		/*
804 		 * The branch rooted at dip will have been previously
805 		 * held, or it will be the child of a CMP. In either
806 		 * case, the hold acquired in e_ddi_nodeid_to_dip()
807 		 * is not needed.
808 		 */
809 		ddi_release_devi(dip);
810 		ASSERT(drmach_is_cmp_child(dip) || e_ddi_branch_held(dip));
811 	}
812 
813 	return (dip);
814 }
815 
816 static dev_info_t *
817 drmach_node_ddi_get_dip(drmach_node_t *np)
818 {
819 	return ((dev_info_t *)np->here);
820 }
821 
822 static int
823 drmach_node_walk(drmach_node_t *np, void *param,
824 		int (*cb)(drmach_node_walk_args_t *args))
825 {
826 	return (np->walk(np, param, cb));
827 }
828 
829 static int
830 drmach_node_ddi_get_prop(drmach_node_t *np, char *name, void *buf, int len)
831 {
832 	int		rv = 0;
833 	dev_info_t	*ndip;
834 	static char	*fn = "drmach_node_ddi_get_prop";
835 
836 	ndip = np->n_getdip(np);
837 	if (ndip == NULL) {
838 		cmn_err(CE_WARN, "%s: NULL dip", fn);
839 		rv = -1;
840 	} else if (ddi_getlongprop_buf(DDI_DEV_T_ANY, ndip,
841 	    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, name,
842 	    (caddr_t)buf, &len) != DDI_PROP_SUCCESS) {
843 		rv = -1;
844 	}
845 
846 	return (rv);
847 }
848 
849 /* ARGSUSED */
850 static int
851 drmach_node_obp_get_prop(drmach_node_t *np, char *name, void *buf, int len)
852 {
853 	int		rv = 0;
854 	pnode_t		nodeid;
855 	static char	*fn = "drmach_node_obp_get_prop";
856 
857 	nodeid = np->get_dnode(np);
858 	if (nodeid == OBP_NONODE) {
859 		cmn_err(CE_WARN, "%s: invalid dnode", fn);
860 		rv = -1;
861 	} else if (prom_getproplen(nodeid, (caddr_t)name) < 0) {
862 		rv = -1;
863 	} else {
864 		(void) prom_getprop(nodeid, (caddr_t)name, (caddr_t)buf);
865 	}
866 
867 	return (rv);
868 }
869 
870 static int
871 drmach_node_ddi_get_proplen(drmach_node_t *np, char *name, int *len)
872 {
873 	int		rv = 0;
874 	dev_info_t	*ndip;
875 
876 	ndip = np->n_getdip(np);
877 	if (ndip == NULL) {
878 		rv = -1;
879 	} else if (ddi_getproplen(DDI_DEV_T_ANY, ndip, DDI_PROP_DONTPASS,
880 			name, len) != DDI_PROP_SUCCESS) {
881 		rv = -1;
882 	}
883 
884 	return (rv);
885 }
886 
887 static int
888 drmach_node_obp_get_proplen(drmach_node_t *np, char *name, int *len)
889 {
890 	pnode_t	 nodeid;
891 	int	 rv;
892 
893 	nodeid = np->get_dnode(np);
894 	if (nodeid == OBP_NONODE)
895 		rv = -1;
896 	else {
897 		*len = prom_getproplen(nodeid, (caddr_t)name);
898 		rv = (*len < 0 ? -1 : 0);
899 	}
900 
901 	return (rv);
902 }
903 
904 static drmachid_t
905 drmach_node_dup(drmach_node_t *np)
906 {
907 	drmach_node_t *dup;
908 
909 	dup = drmach_node_new();
910 	dup->here = np->here;
911 	dup->get_dnode = np->get_dnode;
912 	dup->walk = np->walk;
913 	dup->n_getdip = np->n_getdip;
914 	dup->n_getproplen = np->n_getproplen;
915 	dup->n_getprop = np->n_getprop;
916 	dup->get_parent = np->get_parent;
917 
918 	return (dup);
919 }
920 
921 /*
922  * drmach_array provides convenient array construction, access,
923  * bounds checking and array destruction logic.
924  */
925 
926 static drmach_array_t *
927 drmach_array_new(int min_index, int max_index)
928 {
929 	drmach_array_t *arr;
930 
931 	arr = kmem_zalloc(sizeof (drmach_array_t), KM_SLEEP);
932 
933 	arr->arr_sz = (max_index - min_index + 1) * sizeof (void *);
934 	if (arr->arr_sz > 0) {
935 		arr->min_index = min_index;
936 		arr->max_index = max_index;
937 
938 		arr->arr = kmem_zalloc(arr->arr_sz, KM_SLEEP);
939 		return (arr);
940 	} else {
941 		kmem_free(arr, sizeof (*arr));
942 		return (0);
943 	}
944 }
945 
946 static int
947 drmach_array_set(drmach_array_t *arr, int idx, drmachid_t val)
948 {
949 	if (idx < arr->min_index || idx > arr->max_index)
950 		return (-1);
951 	else {
952 		arr->arr[idx - arr->min_index] = val;
953 		return (0);
954 	}
955 	/*NOTREACHED*/
956 }
957 
958 static int
959 drmach_array_get(drmach_array_t *arr, int idx, drmachid_t *val)
960 {
961 	if (idx < arr->min_index || idx > arr->max_index)
962 		return (-1);
963 	else {
964 		*val = arr->arr[idx - arr->min_index];
965 		return (0);
966 	}
967 	/*NOTREACHED*/
968 }
969 
970 static int
971 drmach_array_first(drmach_array_t *arr, int *idx, drmachid_t *val)
972 {
973 	int rv;
974 
975 	*idx = arr->min_index;
976 	while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
977 		*idx += 1;
978 
979 	return (rv);
980 }
981 
982 static int
983 drmach_array_next(drmach_array_t *arr, int *idx, drmachid_t *val)
984 {
985 	int rv;
986 
987 	*idx += 1;
988 	while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
989 		*idx += 1;
990 
991 	return (rv);
992 }
993 
994 static void
995 drmach_array_dispose(drmach_array_t *arr, void (*disposer)(drmachid_t))
996 {
997 	drmachid_t	val;
998 	int		idx;
999 	int		rv;
1000 
1001 	rv = drmach_array_first(arr, &idx, &val);
1002 	while (rv == 0) {
1003 		(*disposer)(val);
1004 
1005 		/* clear the array entry */
1006 		rv = drmach_array_set(arr, idx, NULL);
1007 		ASSERT(rv == 0);
1008 
1009 		rv = drmach_array_next(arr, &idx, &val);
1010 	}
1011 
1012 	kmem_free(arr->arr, arr->arr_sz);
1013 	kmem_free(arr, sizeof (*arr));
1014 }
1015 
1016 
1017 static gdcd_t *
1018 drmach_gdcd_new()
1019 {
1020 	gdcd_t *gdcd;
1021 
1022 	gdcd = kmem_zalloc(sizeof (gdcd_t), KM_SLEEP);
1023 
1024 	/* read the gdcd, bail if magic or ver #s are not what is expected */
1025 	if (iosram_rd(GDCD_MAGIC, 0, sizeof (gdcd_t), (caddr_t)gdcd)) {
1026 bail:
1027 		kmem_free(gdcd, sizeof (gdcd_t));
1028 		return (NULL);
1029 	} else if (gdcd->h.dcd_magic != GDCD_MAGIC) {
1030 		goto bail;
1031 	} else if (gdcd->h.dcd_version != DCD_VERSION) {
1032 		goto bail;
1033 	}
1034 
1035 	return (gdcd);
1036 }
1037 
1038 static void
1039 drmach_gdcd_dispose(gdcd_t *gdcd)
1040 {
1041 	kmem_free(gdcd, sizeof (gdcd_t));
1042 }
1043 
1044 /*ARGSUSED*/
1045 sbd_error_t *
1046 drmach_configure(drmachid_t id, int flags)
1047 {
1048 	drmach_device_t	*dp;
1049 	dev_info_t	*rdip;
1050 	sbd_error_t	*err = NULL;
1051 
1052 	/*
1053 	 * On Starcat, there is no CPU driver, so it is
1054 	 * not necessary to configure any CPU nodes.
1055 	 */
1056 	if (DRMACH_IS_CPU_ID(id)) {
1057 		return (NULL);
1058 	}
1059 
1060 	for (; id; ) {
1061 		dev_info_t	*fdip = NULL;
1062 
1063 		if (!DRMACH_IS_DEVICE_ID(id))
1064 			return (drerr_new(0, ESTC_INAPPROP, NULL));
1065 		dp = id;
1066 
1067 		rdip = dp->node->n_getdip(dp->node);
1068 
1069 		/*
1070 		 * We held this branch earlier, so at a minimum its
1071 		 * root should still be present in the device tree.
1072 		 */
1073 		ASSERT(rdip);
1074 
1075 		DRMACH_PR("drmach_configure: configuring DDI branch");
1076 
1077 		ASSERT(e_ddi_branch_held(rdip));
1078 		if (e_ddi_branch_configure(rdip, &fdip, 0) != 0) {
1079 			if (err == NULL) {
1080 				/*
1081 				 * Record first failure but don't stop
1082 				 */
1083 				char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1084 				dev_info_t *dip = (fdip != NULL) ? fdip : rdip;
1085 
1086 				(void) ddi_pathname(dip, path);
1087 				err = drerr_new(1, ESTC_DRVFAIL, path);
1088 
1089 				kmem_free(path, MAXPATHLEN);
1090 			}
1091 
1092 			/*
1093 			 * If non-NULL, fdip is returned held and must be
1094 			 * released.
1095 			 */
1096 			if (fdip != NULL) {
1097 				ddi_release_devi(fdip);
1098 			}
1099 		}
1100 
1101 		if (DRMACH_IS_MEM_ID(id)) {
1102 			drmach_mem_t	*mp = id;
1103 			id = mp->next;
1104 		} else {
1105 			id = NULL;
1106 		}
1107 	}
1108 
1109 	return (err);
1110 }
1111 
1112 static sbd_error_t *
1113 drmach_device_new(drmach_node_t *node,
1114 	drmach_board_t *bp, int portid, drmachid_t *idp)
1115 {
1116 	int		i, rv, device_id, unum;
1117 	char		name[OBP_MAXDRVNAME];
1118 	drmach_device_t	proto;
1119 
1120 	rv = node->n_getprop(node, "name", name, OBP_MAXDRVNAME);
1121 	if (rv) {
1122 		sbd_error_t *err;
1123 
1124 		/* every node is expected to have a name */
1125 		err = drerr_new(1, ESTC_GETPROP,
1126 			"dip: 0x%p: property %s",
1127 			node->n_getdip(node), OBP_NAME);
1128 
1129 		return (err);
1130 	}
1131 
1132 	i = drmach_name2type_idx(name);
1133 
1134 	if (i < 0 || strcmp(name, "cmp") == 0) {
1135 		/*
1136 		 * Not a node of interest to dr - including "cmp",
1137 		 * but it is in drmach_name2type[], which lets gptwocfg
1138 		 * driver to check if node is OBP created.
1139 		 */
1140 		*idp = (drmachid_t)0;
1141 		return (NULL);
1142 	}
1143 
1144 	/*
1145 	 * Derive a best-guess unit number from the portid value.
1146 	 * Some drmach_*_new constructors (drmach_pci_new, for example)
1147 	 * will overwrite the prototype unum value with one that is more
1148 	 * appropriate for the device.
1149 	 */
1150 	device_id = portid & 0x1f;
1151 	if (device_id < 4)
1152 		unum = device_id;
1153 	else if (device_id == 8) {
1154 		unum = 0;
1155 	} else if (device_id == 9) {
1156 		unum = 1;
1157 	} else if (device_id == 0x1c) {
1158 		unum = 0;
1159 	} else if (device_id == 0x1d) {
1160 		unum = 1;
1161 	} else {
1162 		return (DRMACH_INTERNAL_ERROR());
1163 	}
1164 
1165 	bzero(&proto, sizeof (proto));
1166 	proto.type = drmach_name2type[i].type;
1167 	proto.bp = bp;
1168 	proto.node = node;
1169 	proto.portid = portid;
1170 	proto.unum = unum;
1171 
1172 	return (drmach_name2type[i].new(&proto, idp));
1173 }
1174 
1175 static void
1176 drmach_device_dispose(drmachid_t id)
1177 {
1178 	drmach_device_t *self = id;
1179 
1180 	self->cm.dispose(id);
1181 }
1182 
1183 static drmach_board_t *
1184 drmach_board_new(int bnum)
1185 {
1186 	static sbd_error_t *drmach_board_release(drmachid_t);
1187 	static sbd_error_t *drmach_board_status(drmachid_t, drmach_status_t *);
1188 
1189 	drmach_board_t	*bp;
1190 
1191 	bp = kmem_zalloc(sizeof (drmach_board_t), KM_SLEEP);
1192 
1193 	bp->cm.isa = (void *)drmach_board_new;
1194 	bp->cm.release = drmach_board_release;
1195 	bp->cm.status = drmach_board_status;
1196 
1197 	(void) drmach_board_name(bnum, bp->cm.name, sizeof (bp->cm.name));
1198 
1199 	bp->bnum = bnum;
1200 	bp->devices = NULL;
1201 	bp->tree = drmach_node_new();
1202 
1203 	drmach_array_set(drmach_boards, bnum, bp);
1204 	return (bp);
1205 }
1206 
1207 static void
1208 drmach_board_dispose(drmachid_t id)
1209 {
1210 	drmach_board_t *bp;
1211 
1212 	ASSERT(DRMACH_IS_BOARD_ID(id));
1213 	bp = id;
1214 
1215 	if (bp->tree)
1216 		drmach_node_dispose(bp->tree);
1217 
1218 	if (bp->devices)
1219 		drmach_array_dispose(bp->devices, drmach_device_dispose);
1220 
1221 	kmem_free(bp, sizeof (*bp));
1222 }
1223 
1224 static sbd_error_t *
1225 drmach_board_status(drmachid_t id, drmach_status_t *stat)
1226 {
1227 	sbd_error_t	*err = NULL;
1228 	drmach_board_t	*bp;
1229 	caddr_t		obufp;
1230 	dr_showboard_t	shb;
1231 
1232 	if (!DRMACH_IS_BOARD_ID(id))
1233 		return (drerr_new(0, ESTC_INAPPROP, NULL));
1234 
1235 	bp = id;
1236 
1237 	/*
1238 	 * we need to know if the board's connected before
1239 	 * issuing a showboard message.  If it's connected, we just
1240 	 * reply with status composed of cached info
1241 	 */
1242 
1243 	if (!bp->connected) {
1244 		obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
1245 		err = drmach_mbox_trans(DRMSG_SHOWBOARD, bp->bnum, obufp,
1246 			sizeof (dr_proto_hdr_t), (caddr_t)&shb,
1247 			sizeof (dr_showboard_t));
1248 
1249 		kmem_free(obufp, sizeof (dr_proto_hdr_t));
1250 		if (err)
1251 			return (err);
1252 
1253 		bp->connected = (shb.bd_assigned && shb.bd_active);
1254 		strncpy(bp->type, shb.board_type, sizeof (bp->type));
1255 		stat->assigned = bp->assigned = shb.bd_assigned;
1256 		stat->powered = bp->powered = shb.power_on;
1257 		stat->empty = bp->empty = shb.slot_empty;
1258 
1259 		switch (shb.test_status) {
1260 			case DR_TEST_STATUS_UNKNOWN:
1261 			case DR_TEST_STATUS_IPOST:
1262 			case DR_TEST_STATUS_ABORTED:
1263 				stat->cond = bp->cond = SBD_COND_UNKNOWN;
1264 				break;
1265 			case DR_TEST_STATUS_PASSED:
1266 				stat->cond = bp->cond = SBD_COND_OK;
1267 				break;
1268 			case DR_TEST_STATUS_FAILED:
1269 				stat->cond = bp->cond = SBD_COND_FAILED;
1270 				break;
1271 			default:
1272 				stat->cond = bp->cond = SBD_COND_UNKNOWN;
1273 				DRMACH_PR("Unknown test status=0x%x from SC\n",
1274 					shb.test_status);
1275 				break;
1276 
1277 		}
1278 
1279 		strncpy(stat->type, shb.board_type, sizeof (stat->type));
1280 		snprintf(stat->info, sizeof (stat->info), "Test Level=%d",
1281 			shb.test_level);
1282 	} else {
1283 		stat->assigned = bp->assigned;
1284 		stat->powered = bp->powered;
1285 		stat->empty = bp->empty;
1286 		stat->cond = bp->cond;
1287 		strncpy(stat->type, bp->type, sizeof (stat->type));
1288 	}
1289 
1290 	stat->busy = 0;			/* assume not busy */
1291 	stat->configured = 0;		/* assume not configured */
1292 	if (bp->devices) {
1293 		int		 rv;
1294 		int		 d_idx;
1295 		drmachid_t	 d_id;
1296 
1297 		rv = drmach_array_first(bp->devices, &d_idx, &d_id);
1298 		while (rv == 0) {
1299 			drmach_status_t	d_stat;
1300 
1301 			err = drmach_i_status(d_id, &d_stat);
1302 			if (err)
1303 				break;
1304 
1305 			stat->busy |= d_stat.busy;
1306 			stat->configured |= d_stat.configured;
1307 
1308 			rv = drmach_array_next(bp->devices, &d_idx, &d_id);
1309 		}
1310 	}
1311 
1312 	return (err);
1313 }
1314 
1315 typedef struct drmach_msglist {
1316 	kcondvar_t		s_cv; 		/* condvar for sending msg */
1317 	kmutex_t		s_lock;		/* mutex for sending */
1318 	kcondvar_t		g_cv;		/* condvar for getting reply */
1319 	kmutex_t		g_lock;		/* mutex for getting reply */
1320 	struct drmach_msglist	*prev;		/* link to previous entry */
1321 	struct drmach_msglist	*next;		/* link to next entry */
1322 	struct drmach_msglist	*link;		/* link to related entry */
1323 	caddr_t			o_buf;		/* address of output buffer */
1324 	caddr_t			i_buf; 		/* address of input buffer */
1325 	uint32_t		o_buflen;	/* output buffer length */
1326 	uint32_t		i_buflen;	/* input buffer length */
1327 	uint32_t		msgid;		/* message identifier */
1328 	int			o_nretry;	/* number of sending retries */
1329 	int			f_error;	/* mailbox framework error */
1330 	uint8_t			e_code;		/* error code returned by SC */
1331 	uint8_t			p_flag	:1,	/* successfully putmsg */
1332 				m_reply	:1,	/* msg reply received */
1333 				unused	:6;
1334 } drmach_msglist_t;
1335 
1336 kmutex_t		drmach_g_mbox_mutex;	/* mutex for mailbox globals */
1337 kmutex_t		drmach_ri_mbox_mutex;	/* mutex for mailbox reinit */
1338 kmutex_t		drmach_msglist_mutex;	/* mutex for message list */
1339 drmach_msglist_t	*drmach_msglist_first;	/* first entry in msg list */
1340 drmach_msglist_t	*drmach_msglist_last;	/* last entry in msg list */
1341 uint32_t		drmach_msgid;		/* current message id */
1342 kthread_t		*drmach_getmsg_thread;	/* ptr to getmsg thread */
1343 volatile int		drmach_getmsg_thread_run; /* run flag for getmsg thr */
1344 kmutex_t		drmach_sendmsg_mutex;	/* mutex for sendmsg cv */
1345 kcondvar_t		drmach_sendmsg_cv;	/* signaled to send new msg */
1346 kthread_t		*drmach_sendmsg_thread; /* ptr to sendmsg thread */
1347 volatile int		drmach_sendmsg_thread_run; /* run flag for sendmsg */
1348 int			drmach_mbox_istate;	/* mailbox init state */
1349 int			drmach_mbox_iflag;	/* set if init'd with SC */
1350 int			drmach_mbox_ipending;	/* set if reinit scheduled */
1351 
1352 /*
1353  * Timeout values (in seconds) used when waiting for replies (from the SC) to
1354  * requests that we sent.  Since we only receive boardevent messages, and they
1355  * are events rather than replies, there is no boardevent timeout.
1356  */
1357 int	drmach_to_mbxinit	= 60;		/* 1 minute */
1358 int	drmach_to_assign	= 60;		/* 1 minute */
1359 int	drmach_to_unassign	= 60;		/* 1 minute */
1360 int	drmach_to_claim		= 3600;		/* 1 hour */
1361 int	drmach_to_unclaim	= 3600;		/* 1 hour */
1362 int	drmach_to_poweron	= 480;		/* 8 minutes */
1363 int	drmach_to_poweroff	= 480;		/* 8 minutes */
1364 int	drmach_to_testboard	= 43200;	/* 12 hours */
1365 int	drmach_to_aborttest	= 180;		/* 3 minutes */
1366 int	drmach_to_showboard	= 180;		/* 3 minutes */
1367 int	drmach_to_unconfig	= 180;		/* 3 minutes */
1368 
1369 /*
1370  * Delay (in seconds) used after receiving a non-transient error indication from
1371  * an mboxsc_getmsg call in the thread that loops waiting for incoming messages.
1372  */
1373 int	drmach_mbxerr_delay	= 15;		/* 15 seconds */
1374 
1375 /*
1376  * Timeout values (in milliseconds) for mboxsc_putmsg and mboxsc_getmsg calls.
1377  */
1378 clock_t	drmach_to_putmsg;			/* set in drmach_mbox_init */
1379 clock_t	drmach_to_getmsg	= 31000;	/* 31 seconds */
1380 
1381 /*
1382  * Normally, drmach_to_putmsg is set dynamically during initialization in
1383  * drmach_mbox_init.  This has the potentially undesirable side effect of
1384  * clobbering any value that might have been set in /etc/system.  To prevent
1385  * dynamic setting of drmach_to_putmsg (thereby allowing it to be tuned in
1386  * /etc/system), set drmach_use_tuned_putmsg_to to 1.
1387  */
1388 int	drmach_use_tuned_putmsg_to	= 0;
1389 
1390 
1391 /* maximum conceivable message size for future mailbox protocol versions */
1392 #define	DRMACH_MAX_MBOX_MSG_SIZE	4096
1393 
1394 /*ARGSUSED*/
1395 void
1396 drmach_mbox_prmsg(dr_mbox_msg_t *mbp, int dir)
1397 {
1398 	int		i, j;
1399 	dr_memregs_t	*memregs;
1400 	dr_proto_hdr_t	*php = &mbp->p_hdr;
1401 	dr_msg_t	*mp = &mbp->msgdata;
1402 
1403 #ifdef DEBUG
1404 	switch (php->command) {
1405 		case DRMSG_BOARDEVENT:
1406 			if (dir) {
1407 				DRMACH_PR("ERROR!! outgoing BOARDEVENT\n");
1408 			} else {
1409 				DRMACH_PR("BOARDEVENT received:\n");
1410 				DRMACH_PR("init=%d ins=%d rem=%d asgn=%d\n",
1411 					mp->dm_be.initialized,
1412 					mp->dm_be.board_insertion,
1413 					mp->dm_be.board_removal,
1414 					mp->dm_be.slot_assign);
1415 				DRMACH_PR("unasgn=%d avail=%d unavail=%d\n",
1416 					mp->dm_be.slot_unassign,
1417 					mp->dm_be.slot_avail,
1418 					mp->dm_be.slot_unavail);
1419 			}
1420 			break;
1421 		case DRMSG_MBOX_INIT:
1422 			if (dir) {
1423 				DRMACH_PR("MBOX_INIT Request:\n");
1424 			} else {
1425 				DRMACH_PR("MBOX_INIT Reply:\n");
1426 			}
1427 			break;
1428 		case DRMSG_ASSIGN:
1429 			if (dir) {
1430 				DRMACH_PR("ASSIGN Request:\n");
1431 			} else {
1432 				DRMACH_PR("ASSIGN Reply:\n");
1433 			}
1434 			break;
1435 		case DRMSG_UNASSIGN:
1436 			if (dir) {
1437 				DRMACH_PR("UNASSIGN Request:\n");
1438 			} else {
1439 				DRMACH_PR("UNASSIGN Reply:\n");
1440 			}
1441 			break;
1442 		case DRMSG_CLAIM:
1443 			if (!dir) {
1444 				DRMACH_PR("CLAIM Reply:\n");
1445 				break;
1446 			}
1447 
1448 			DRMACH_PR("CLAIM Request:\n");
1449 			for (i = 0; i < 18; ++i) {
1450 				DRMACH_PR("exp%d: val=%d slice=0x%x\n", i,
1451 					mp->dm_cr.mem_slice[i].valid,
1452 					mp->dm_cr.mem_slice[i].slice);
1453 				memregs = &(mp->dm_cr.mem_regs[i]);
1454 				for (j = 0; j < S0_LPORT_COUNT; j++) {
1455 					DRMACH_PR("  MC %2d: "
1456 						"MADR[%d] = 0x%lx, "
1457 						"MADR[%d] = 0x%lx\n", j,
1458 						0, DRMACH_MCREG_TO_U64(
1459 						memregs->madr[j][0]),
1460 						1, DRMACH_MCREG_TO_U64(
1461 						memregs->madr[j][1]));
1462 					DRMACH_PR("       : "
1463 						"MADR[%d] = 0x%lx, "
1464 						"MADR[%d] = 0x%lx\n",
1465 						2, DRMACH_MCREG_TO_U64(
1466 						memregs->madr[j][2]),
1467 						3, DRMACH_MCREG_TO_U64(
1468 						memregs->madr[j][3]));
1469 				}
1470 			}
1471 			break;
1472 		case DRMSG_UNCLAIM:
1473 			if (!dir) {
1474 				DRMACH_PR("UNCLAIM Reply:\n");
1475 				break;
1476 			}
1477 
1478 			DRMACH_PR("UNCLAIM Request:\n");
1479 			for (i = 0; i < 18; ++i) {
1480 				DRMACH_PR("exp%d: val=%d slice=0x%x\n", i,
1481 					mp->dm_ur.mem_slice[i].valid,
1482 					mp->dm_ur.mem_slice[i].slice);
1483 				memregs = &(mp->dm_ur.mem_regs[i]);
1484 				for (j = 0; j < S0_LPORT_COUNT; j++) {
1485 					DRMACH_PR("  MC %2d: "
1486 						"MADR[%d] = 0x%lx, "
1487 						"MADR[%d] = 0x%lx\n", j,
1488 						0, DRMACH_MCREG_TO_U64(
1489 						memregs->madr[j][0]),
1490 						1, DRMACH_MCREG_TO_U64(
1491 						memregs->madr[j][1]));
1492 					DRMACH_PR("       : "
1493 						"MADR[%d] = 0x%lx, "
1494 						"MADR[%d] = 0x%lx\n",
1495 						2, DRMACH_MCREG_TO_U64(
1496 						memregs->madr[j][2]),
1497 						3, DRMACH_MCREG_TO_U64(
1498 						memregs->madr[j][3]));
1499 				}
1500 			}
1501 			DRMACH_PR(" mem_clear=%d\n", mp->dm_ur.mem_clear);
1502 			break;
1503 		case DRMSG_UNCONFIG:
1504 			if (!dir) {
1505 				DRMACH_PR("UNCONFIG Reply:\n");
1506 				break;
1507 			}
1508 
1509 			DRMACH_PR("UNCONFIG Request:\n");
1510 			for (i = 0; i < 18; ++i) {
1511 				DRMACH_PR("exp%d: val=%d slice=0x%x\n", i,
1512 					mp->dm_uc.mem_slice[i].valid,
1513 					mp->dm_uc.mem_slice[i].slice);
1514 				memregs = &(mp->dm_uc.mem_regs[i]);
1515 				for (j = 0; j < S0_LPORT_COUNT; j++) {
1516 					DRMACH_PR("  MC %2d: "
1517 						"MADR[%d] = 0x%lx, "
1518 						"MADR[%d] = 0x%lx\n", j,
1519 						0, DRMACH_MCREG_TO_U64(
1520 						memregs->madr[j][0]),
1521 						1, DRMACH_MCREG_TO_U64(
1522 						memregs->madr[j][1]));
1523 					DRMACH_PR("       : "
1524 						"MADR[%d] = 0x%lx, "
1525 						"MADR[%d] = 0x%lx\n",
1526 						2, DRMACH_MCREG_TO_U64(
1527 						memregs->madr[j][2]),
1528 						3, DRMACH_MCREG_TO_U64(
1529 						memregs->madr[j][3]));
1530 				}
1531 			}
1532 			break;
1533 		case DRMSG_POWERON:
1534 			if (dir) {
1535 				DRMACH_PR("POWERON Request:\n");
1536 			} else {
1537 				DRMACH_PR("POWERON Reply:\n");
1538 			}
1539 			break;
1540 		case DRMSG_POWEROFF:
1541 			if (dir) {
1542 				DRMACH_PR("POWEROFF Request:\n");
1543 			} else {
1544 				DRMACH_PR("POWEROFF Reply:\n");
1545 			}
1546 			break;
1547 		case DRMSG_TESTBOARD:
1548 			if (dir) {
1549 				DRMACH_PR("TESTBOARD Request:\n");
1550 				DRMACH_PR("\tmemaddrhi=0x%x memaddrlo=0x%x ",
1551 					mp->dm_tb.memaddrhi,
1552 					mp->dm_tb.memaddrlo);
1553 				DRMACH_PR("memlen=0x%x cpu_portid=0x%x\n",
1554 					mp->dm_tb.memlen, mp->dm_tb.cpu_portid);
1555 				DRMACH_PR("\tforce=0x%x imm=0x%x\n",
1556 					mp->dm_tb.force, mp->dm_tb.immediate);
1557 			} else {
1558 				DRMACH_PR("TESTBOARD Reply:\n");
1559 				DRMACH_PR("\tmemaddrhi=0x%x memaddrlo=0x%x ",
1560 					mp->dm_tr.memaddrhi,
1561 					mp->dm_tr.memaddrlo);
1562 				DRMACH_PR("memlen=0x%x cpu_portid=0x%x\n",
1563 					mp->dm_tr.memlen, mp->dm_tr.cpu_portid);
1564 				DRMACH_PR("\trecovered=0x%x test status=0x%x\n",
1565 					mp->dm_tr.cpu_recovered,
1566 					mp->dm_tr.test_status);
1567 
1568 			}
1569 			break;
1570 		case DRMSG_ABORT_TEST:
1571 			if (dir) {
1572 				DRMACH_PR("ABORT_TEST Request:\n");
1573 			} else {
1574 				DRMACH_PR("ABORT_TEST Reply:\n");
1575 			}
1576 
1577 			DRMACH_PR("\tmemaddrhi=0x%x memaddrlo=0x%x ",
1578 					mp->dm_ta.memaddrhi,
1579 					mp->dm_ta.memaddrlo);
1580 			DRMACH_PR("memlen=0x%x cpu_portid=0x%x\n",
1581 					mp->dm_ta.memlen, mp->dm_ta.cpu_portid);
1582 			break;
1583 		case DRMSG_SHOWBOARD:
1584 			if (dir) {
1585 				DRMACH_PR("SHOWBOARD Request:\n");
1586 			} else {
1587 				DRMACH_PR("SHOWBOARD Reply:\n");
1588 
1589 				DRMACH_PR(": empty=%d power=%d assigned=%d",
1590 					mp->dm_sb.slot_empty,
1591 					mp->dm_sb.power_on,
1592 					mp->dm_sb.bd_assigned);
1593 				DRMACH_PR(": active=%d t_status=%d t_level=%d ",
1594 					mp->dm_sb.bd_active,
1595 					mp->dm_sb.test_status,
1596 					mp->dm_sb.test_level);
1597 				DRMACH_PR(": type=%s ", mp->dm_sb.board_type);
1598 			}
1599 			break;
1600 		default:
1601 			DRMACH_PR("Unknown message type\n");
1602 			break;
1603 	}
1604 
1605 	DRMACH_PR("dr hdr:\n\tid=0x%x vers=0x%x cmd=0x%x exp=0x%x slot=0x%x\n",
1606 		php->message_id, php->drproto_version, php->command,
1607 		php->expbrd, php->slot);
1608 #endif
1609 	DRMACH_PR("\treply_status=0x%x error_code=0x%x\n", php->reply_status,
1610 		php->error_code);
1611 
1612 }
1613 
1614 /*
1615  * Callback function passed to taskq_dispatch when a mailbox reinitialization
1616  * handshake needs to be scheduled.  The handshake can't be performed by the
1617  * thread that determines it is needed, in most cases, so this function is
1618  * dispatched on the system-wide taskq pool of threads.  Failure is reported but
1619  * otherwise ignored, since any situation that requires a mailbox initialization
1620  * handshake will continue to request the handshake until it succeeds.
1621  */
1622 static void
1623 drmach_mbox_reinit(void *unused)
1624 {
1625 	_NOTE(ARGUNUSED(unused))
1626 
1627 	caddr_t		obufp = NULL;
1628 	sbd_error_t	*serr = NULL;
1629 
1630 	DRMACH_PR("scheduled mailbox reinit running\n");
1631 
1632 	mutex_enter(&drmach_ri_mbox_mutex);
1633 	mutex_enter(&drmach_g_mbox_mutex);
1634 	if (drmach_mbox_iflag == 0) {
1635 		/* need to initialize the mailbox */
1636 		mutex_exit(&drmach_g_mbox_mutex);
1637 
1638 		cmn_err(CE_NOTE, "!reinitializing DR mailbox");
1639 		obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
1640 		serr = drmach_mbox_trans(DRMSG_MBOX_INIT, 0, obufp,
1641 			sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
1642 		kmem_free(obufp, sizeof (dr_proto_hdr_t));
1643 
1644 		if (serr) {
1645 			cmn_err(CE_WARN,
1646 				"mbox_init: MBOX_INIT failed ecode=0x%x",
1647 				serr->e_code);
1648 			sbd_err_clear(&serr);
1649 		}
1650 		mutex_enter(&drmach_g_mbox_mutex);
1651 		if (!serr) {
1652 			drmach_mbox_iflag = 1;
1653 		}
1654 	}
1655 	drmach_mbox_ipending = 0;
1656 	mutex_exit(&drmach_g_mbox_mutex);
1657 	mutex_exit(&drmach_ri_mbox_mutex);
1658 }
1659 
1660 /*
1661  * To ensure sufficient compatibility with future versions of the DR mailbox
1662  * protocol, we use a buffer that is large enough to receive the largest message
1663  * that could possibly be sent to us.  However, since that ends up being fairly
1664  * large, allocating it on the stack is a bad idea.  Fortunately, this function
1665  * does not need to be MT-safe since it is only invoked by the mailbox
1666  * framework, which will never invoke it multiple times concurrently.  Since
1667  * that is the case, we can use a static buffer.
1668  */
1669 void
1670 drmach_mbox_event(void)
1671 {
1672 	static uint8_t	buf[DRMACH_MAX_MBOX_MSG_SIZE];
1673 	dr_mbox_msg_t	*msg = (dr_mbox_msg_t *)buf;
1674 	int		err;
1675 	uint32_t	type = MBOXSC_MSG_EVENT;
1676 	uint32_t	command = DRMSG_BOARDEVENT;
1677 	uint64_t	transid = 0;
1678 	uint32_t	length = DRMACH_MAX_MBOX_MSG_SIZE;
1679 	char		*hint = "";
1680 	int		logsys = 0;
1681 
1682 	do {
1683 		err = mboxsc_getmsg(KEY_SCDR, &type, &command,
1684 			&transid, &length, (void *)msg, 0);
1685 	} while (err == EAGAIN);
1686 
1687 	/* don't try to interpret anything with the wrong version number */
1688 	if ((err == 0) && (msg->p_hdr.drproto_version != DRMBX_VERSION)) {
1689 		cmn_err(CE_WARN, "mailbox version mismatch 0x%x vs 0x%x",
1690 			msg->p_hdr.drproto_version, DRMBX_VERSION);
1691 		mutex_enter(&drmach_g_mbox_mutex);
1692 		drmach_mbox_iflag = 0;
1693 		/* schedule a reinit handshake if one isn't pending */
1694 		if (!drmach_mbox_ipending) {
1695 			if (taskq_dispatch(system_taskq, drmach_mbox_reinit,
1696 				NULL, TQ_NOSLEEP) != NULL) {
1697 				drmach_mbox_ipending = 1;
1698 			} else {
1699 				cmn_err(CE_WARN,
1700 					"failed to schedule mailbox reinit");
1701 			}
1702 		}
1703 		mutex_exit(&drmach_g_mbox_mutex);
1704 		return;
1705 	}
1706 
1707 	if ((err != 0) || (msg->p_hdr.reply_status != DRMSG_REPLY_OK)) {
1708 		cmn_err(CE_WARN,
1709 			"Unsolicited mboxsc_getmsg failed: err=0x%x code=0x%x",
1710 				err, msg->p_hdr.error_code);
1711 	} else {
1712 		dr_boardevent_t	*be;
1713 		be = (dr_boardevent_t *)&msg->msgdata;
1714 
1715 		/* check for initialization event */
1716 		if (be->initialized) {
1717 			mutex_enter(&drmach_g_mbox_mutex);
1718 			drmach_mbox_iflag = 0;
1719 			/* schedule a reinit handshake if one isn't pending */
1720 			if (!drmach_mbox_ipending) {
1721 				if (taskq_dispatch(system_taskq,
1722 					drmach_mbox_reinit, NULL, TQ_NOSLEEP)
1723 					!= NULL) {
1724 					drmach_mbox_ipending = 1;
1725 				} else {
1726 					cmn_err(CE_WARN,
1727 					"failed to schedule mailbox reinit");
1728 				}
1729 			}
1730 			mutex_exit(&drmach_g_mbox_mutex);
1731 			cmn_err(CE_NOTE, "!Mailbox Init event received");
1732 		}
1733 
1734 		/* anything else will be a log_sysevent call */
1735 
1736 		if (be->board_insertion) {
1737 			DRMACH_PR("Board Insertion event received");
1738 			hint = DR_HINT_INSERT;
1739 			logsys++;
1740 	}
1741 		if (be->board_removal) {
1742 			DRMACH_PR("Board Removal event received");
1743 			hint = DR_HINT_REMOVE;
1744 			logsys++;
1745 		}
1746 		if (be->slot_assign) {
1747 			DRMACH_PR("Slot Assign event received");
1748 			logsys++;
1749 		}
1750 		if (be->slot_unassign) {
1751 			DRMACH_PR("Slot Unassign event received");
1752 			logsys++;
1753 		}
1754 		if (be->slot_avail) {
1755 			DRMACH_PR("Slot Available event received");
1756 			logsys++;
1757 		}
1758 		if (be->slot_unavail) {
1759 			DRMACH_PR("Slot Unavailable event received");
1760 			logsys++;
1761 		}
1762 		if (be->power_on) {
1763 			DRMACH_PR("Power ON event received");
1764 			logsys++;
1765 		}
1766 		if (be->power_off) {
1767 			DRMACH_PR("Power OFF event received");
1768 			logsys++;
1769 		}
1770 
1771 		if (logsys)
1772 			drmach_log_sysevent(
1773 				    DRMACH_EXPSLOT2BNUM(msg->p_hdr.expbrd,
1774 							msg->p_hdr.slot),
1775 				    hint, SE_NOSLEEP, 1);
1776 	}
1777 }
1778 
1779 static uint32_t
1780 drmach_get_msgid()
1781 {
1782 	uint32_t	rv;
1783 	mutex_enter(&drmach_msglist_mutex);
1784 	if (!(++drmach_msgid))
1785 		++drmach_msgid;
1786 	rv = drmach_msgid;
1787 	mutex_exit(&drmach_msglist_mutex);
1788 	return (rv);
1789 }
1790 
1791 /*
1792  *	unlink an entry from the message transaction list
1793  *
1794  *	caller must hold drmach_msglist_mutex
1795  */
1796 void
1797 drmach_msglist_unlink(drmach_msglist_t *entry)
1798 {
1799 	ASSERT(mutex_owned(&drmach_msglist_mutex));
1800 	if (entry->prev) {
1801 		entry->prev->next = entry->next;
1802 		if (entry->next)
1803 			entry->next->prev = entry->prev;
1804 	} else {
1805 		drmach_msglist_first = entry->next;
1806 		if (entry->next)
1807 			entry->next->prev = NULL;
1808 	}
1809 	if (entry == drmach_msglist_last) {
1810 		drmach_msglist_last = entry->prev;
1811 	}
1812 }
1813 
1814 void
1815 drmach_msglist_link(drmach_msglist_t *entry)
1816 {
1817 	mutex_enter(&drmach_msglist_mutex);
1818 	if (drmach_msglist_last) {
1819 		entry->prev = drmach_msglist_last;
1820 		drmach_msglist_last->next = entry;
1821 		drmach_msglist_last = entry;
1822 	} else {
1823 		drmach_msglist_last = drmach_msglist_first = entry;
1824 	}
1825 	mutex_exit(&drmach_msglist_mutex);
1826 }
1827 
1828 void
1829 drmach_mbox_getmsg()
1830 {
1831 	int			err;
1832 	register int		msgid;
1833 	static uint8_t		buf[DRMACH_MAX_MBOX_MSG_SIZE];
1834 	dr_mbox_msg_t		*msg = (dr_mbox_msg_t *)buf;
1835 	dr_proto_hdr_t		*php;
1836 	drmach_msglist_t	*found, *entry;
1837 	uint32_t		type = MBOXSC_MSG_REPLY;
1838 	uint32_t		command;
1839 	uint64_t		transid;
1840 	uint32_t		length;
1841 
1842 	php = &msg->p_hdr;
1843 
1844 	while (drmach_getmsg_thread_run != 0) {
1845 		/* get a reply message */
1846 		command = 0;
1847 		transid = 0;
1848 		length = DRMACH_MAX_MBOX_MSG_SIZE;
1849 		err = mboxsc_getmsg(KEY_SCDR, &type, &command,
1850 			&transid, &length, (void *)msg, drmach_to_getmsg);
1851 
1852 		if (err) {
1853 			/*
1854 			 * If mboxsc_getmsg returns ETIMEDOUT or EAGAIN, then
1855 			 * the "error" is really just a normal, transient
1856 			 * condition and we can retry the operation right away.
1857 			 * Any other error suggests a more serious problem,
1858 			 * ranging from a message being too big for our buffer
1859 			 * (EMSGSIZE) to total failure of the mailbox layer.
1860 			 * This second class of errors is much less "transient",
1861 			 * so rather than retrying over and over (and getting
1862 			 * the same error over and over) as fast as we can,
1863 			 * we'll sleep for a while before retrying.
1864 			 */
1865 			if ((err != ETIMEDOUT) && (err != EAGAIN)) {
1866 				cmn_err(CE_WARN,
1867 				"mboxsc_getmsg failed, err=0x%x", err);
1868 				delay(drmach_mbxerr_delay * hz);
1869 			}
1870 			continue;
1871 		}
1872 
1873 		drmach_mbox_prmsg(msg, 0);
1874 
1875 		if (php->drproto_version != DRMBX_VERSION) {
1876 			cmn_err(CE_WARN,
1877 				"mailbox version mismatch 0x%x vs 0x%x",
1878 				php->drproto_version, DRMBX_VERSION);
1879 
1880 			mutex_enter(&drmach_g_mbox_mutex);
1881 			drmach_mbox_iflag = 0;
1882 			/* schedule a reinit handshake if one isn't pending */
1883 			if (!drmach_mbox_ipending) {
1884 				if (taskq_dispatch(system_taskq,
1885 					drmach_mbox_reinit, NULL, TQ_NOSLEEP)
1886 					!= NULL) {
1887 					drmach_mbox_ipending = 1;
1888 				} else {
1889 					cmn_err(CE_WARN,
1890 					"failed to schedule mailbox reinit");
1891 				}
1892 			}
1893 			mutex_exit(&drmach_g_mbox_mutex);
1894 
1895 			continue;
1896 		}
1897 
1898 		msgid = php->message_id;
1899 		found = NULL;
1900 		mutex_enter(&drmach_msglist_mutex);
1901 		entry = drmach_msglist_first;
1902 		while (entry != NULL) {
1903 			if (entry->msgid == msgid) {
1904 				found = entry;
1905 				drmach_msglist_unlink(entry);
1906 				entry = NULL;
1907 			} else
1908 				entry = entry->next;
1909 		}
1910 
1911 		if (found) {
1912 			mutex_enter(&found->g_lock);
1913 
1914 			found->e_code = php->error_code;
1915 			if (found->i_buflen > 0)
1916 				bcopy((caddr_t)&msg->msgdata, found->i_buf,
1917 					found->i_buflen);
1918 			found->m_reply = 1;
1919 
1920 			cv_signal(&found->g_cv);
1921 			mutex_exit(&found->g_lock);
1922 		} else {
1923 			cmn_err(CE_WARN, "!mbox_getmsg: no match for id 0x%x",
1924 			    msgid);
1925 			cmn_err(CE_WARN, "!    cmd = 0x%x, exb = %d, slot = %d",
1926 			    php->command, php->expbrd, php->slot);
1927 		}
1928 
1929 		mutex_exit(&drmach_msglist_mutex);
1930 	}
1931 	cmn_err(CE_WARN, "mbox_getmsg: exiting");
1932 	mutex_enter(&drmach_msglist_mutex);
1933 	entry = drmach_msglist_first;
1934 	while (entry != NULL) {
1935 		if (entry->p_flag == 1) {
1936 			entry->f_error = -1;
1937 			mutex_enter(&entry->g_lock);
1938 			cv_signal(&entry->g_cv);
1939 			mutex_exit(&entry->g_lock);
1940 			drmach_msglist_unlink(entry);
1941 		}
1942 		entry = entry->next;
1943 	}
1944 	mutex_exit(&drmach_msglist_mutex);
1945 	drmach_getmsg_thread_run = -1;
1946 	thread_exit();
1947 }
1948 
1949 void
1950 drmach_mbox_sendmsg()
1951 {
1952 	int		err, retry;
1953 	drmach_msglist_t *entry;
1954 	dr_mbox_msg_t   *mp;
1955 	dr_proto_hdr_t  *php;
1956 
1957 	while (drmach_sendmsg_thread_run != 0) {
1958 		/*
1959 		 * Search through the list to find entries awaiting
1960 		 * transmission to the SC
1961 		 */
1962 		mutex_enter(&drmach_msglist_mutex);
1963 		entry = drmach_msglist_first;
1964 		retry = 0;
1965 		while (entry != NULL) {
1966 			if (entry->p_flag == 1) {
1967 				entry = entry->next;
1968 				continue;
1969 			}
1970 
1971 			mutex_exit(&drmach_msglist_mutex);
1972 
1973 			if (!retry)
1974 				mutex_enter(&entry->s_lock);
1975 			mp = (dr_mbox_msg_t *)entry->o_buf;
1976 			php = &mp->p_hdr;
1977 
1978 			drmach_mbox_prmsg(mp, 1);
1979 
1980 			err = mboxsc_putmsg(KEY_DRSC, MBOXSC_MSG_REQUEST,
1981 				php->command, NULL, entry->o_buflen, (void *)mp,
1982 				drmach_to_putmsg);
1983 
1984 			if (err) {
1985 				switch (err) {
1986 
1987 				case EAGAIN:
1988 				case EBUSY:
1989 					++retry;
1990 					mutex_enter(&drmach_msglist_mutex);
1991 					continue;
1992 
1993 				case ETIMEDOUT:
1994 					if (--entry->o_nretry <= 0) {
1995 						mutex_enter(
1996 							&drmach_msglist_mutex);
1997 						drmach_msglist_unlink(entry);
1998 						mutex_exit(
1999 							&drmach_msglist_mutex);
2000 						entry->f_error = err;
2001 						entry->p_flag = 1;
2002 						cv_signal(&entry->s_cv);
2003 					} else {
2004 						++retry;
2005 						mutex_enter(
2006 							&drmach_msglist_mutex);
2007 						continue;
2008 					}
2009 					break;
2010 				default:
2011 					mutex_enter(&drmach_msglist_mutex);
2012 					drmach_msglist_unlink(entry);
2013 					mutex_exit(&drmach_msglist_mutex);
2014 					entry->f_error = err;
2015 					entry->p_flag = 1;
2016 					cv_signal(&entry->s_cv);
2017 					break;
2018 				}
2019 			} else {
2020 				entry->p_flag = 1;
2021 				cv_signal(&entry->s_cv);
2022 			}
2023 
2024 			mutex_exit(&entry->s_lock);
2025 			retry = 0;
2026 			mutex_enter(&drmach_msglist_mutex);
2027 			entry = drmach_msglist_first;
2028 		}
2029 		mutex_exit(&drmach_msglist_mutex);
2030 
2031 		mutex_enter(&drmach_sendmsg_mutex);
2032 		(void) cv_timedwait(&drmach_sendmsg_cv,
2033 			&drmach_sendmsg_mutex, ddi_get_lbolt() + (5 * hz));
2034 		mutex_exit(&drmach_sendmsg_mutex);
2035 	}
2036 	cmn_err(CE_WARN, "mbox_sendmsg: exiting");
2037 	mutex_enter(&drmach_msglist_mutex);
2038 	entry = drmach_msglist_first;
2039 	while (entry != NULL) {
2040 		if (entry->p_flag == 0) {
2041 			entry->f_error = -1;
2042 			mutex_enter(&entry->s_lock);
2043 			cv_signal(&entry->s_cv);
2044 			mutex_exit(&entry->s_lock);
2045 			drmach_msglist_unlink(entry);
2046 		}
2047 		entry = entry->next;
2048 	}
2049 	mutex_exit(&drmach_msglist_mutex);
2050 	cv_destroy(&drmach_sendmsg_cv);
2051 	mutex_destroy(&drmach_sendmsg_mutex);
2052 
2053 	drmach_sendmsg_thread_run = -1;
2054 	thread_exit();
2055 
2056 }
2057 
2058 void
2059 drmach_msglist_destroy(drmach_msglist_t *listp)
2060 {
2061 	if (listp != NULL) {
2062 		drmach_msglist_t	*entry;
2063 
2064 		mutex_enter(&drmach_msglist_mutex);
2065 		entry = drmach_msglist_first;
2066 		while (entry) {
2067 			if (listp == entry) {
2068 				drmach_msglist_unlink(listp);
2069 				entry = NULL;
2070 			} else
2071 				entry = entry->next;
2072 		}
2073 
2074 		mutex_destroy(&listp->s_lock);
2075 		cv_destroy(&listp->s_cv);
2076 		mutex_destroy(&listp->g_lock);
2077 		cv_destroy(&listp->g_cv);
2078 		kmem_free(listp, sizeof (drmach_msglist_t));
2079 
2080 		mutex_exit(&drmach_msglist_mutex);
2081 	}
2082 }
2083 
2084 static drmach_msglist_t	*
2085 drmach_msglist_new(caddr_t ibufp, uint32_t ilen, dr_proto_hdr_t *hdrp,
2086 	uint32_t olen, int nrtry)
2087 {
2088 	drmach_msglist_t	*listp;
2089 
2090 	listp = kmem_zalloc(sizeof (drmach_msglist_t), KM_SLEEP);
2091 	mutex_init(&listp->s_lock, NULL, MUTEX_DRIVER, NULL);
2092 	cv_init(&listp->s_cv, NULL, CV_DRIVER, NULL);
2093 	mutex_init(&listp->g_lock, NULL, MUTEX_DRIVER, NULL);
2094 	cv_init(&listp->g_cv, NULL, CV_DRIVER, NULL);
2095 	listp->o_buf = (caddr_t)hdrp;
2096 	listp->o_buflen = olen;
2097 	listp->i_buf = ibufp;
2098 	listp->i_buflen = ilen;
2099 	listp->o_nretry = nrtry;
2100 	listp->msgid = hdrp->message_id;
2101 
2102 	return (listp);
2103 }
2104 
2105 static drmach_msglist_t *
2106 drmach_mbox_req_rply(dr_proto_hdr_t *hdrp, uint32_t olen, caddr_t ibufp,
2107 	uint32_t ilen, int timeout, int nrtry, int nosig,
2108 	drmach_msglist_t *link)
2109 {
2110 	int		crv;
2111 	drmach_msglist_t *listp;
2112 	clock_t		to_val;
2113 	dr_proto_hdr_t	*php;
2114 
2115 	/* setup transaction list entry */
2116 	listp = drmach_msglist_new(ibufp, ilen, hdrp, olen, nrtry);
2117 
2118 	/* send mailbox message, await reply */
2119 	mutex_enter(&listp->s_lock);
2120 	mutex_enter(&listp->g_lock);
2121 
2122 	listp->link = link;
2123 	drmach_msglist_link(listp);
2124 
2125 	mutex_enter(&drmach_sendmsg_mutex);
2126 	cv_signal(&drmach_sendmsg_cv);
2127 	mutex_exit(&drmach_sendmsg_mutex);
2128 
2129 	while (listp->p_flag == 0) {
2130 		cv_wait(&listp->s_cv, &listp->s_lock);
2131 	}
2132 
2133 	to_val =  ddi_get_lbolt() + (timeout * hz);
2134 
2135 	if (listp->f_error) {
2136 		listp->p_flag = 0;
2137 		cmn_err(CE_WARN, "!mboxsc_putmsg failed: 0x%x",
2138 			listp->f_error);
2139 		php = (dr_proto_hdr_t *)listp->o_buf;
2140 		cmn_err(CE_WARN, "!    cmd = 0x%x, exb = %d, slot = %d",
2141 		    php->command, php->expbrd, php->slot);
2142 	} else {
2143 		while (listp->m_reply == 0 && listp->f_error == 0) {
2144 			if (nosig)
2145 				crv = cv_timedwait(&listp->g_cv, &listp->g_lock,
2146 					to_val);
2147 			else
2148 				crv = cv_timedwait_sig(&listp->g_cv,
2149 					&listp->g_lock, to_val);
2150 			switch (crv) {
2151 				case -1: /* timed out */
2152 					cmn_err(CE_WARN,
2153 					    "!msgid=0x%x reply timed out",
2154 					    hdrp->message_id);
2155 					php = (dr_proto_hdr_t *)listp->o_buf;
2156 					cmn_err(CE_WARN, "!    cmd = 0x%x, "
2157 					    "exb = %d, slot = %d", php->command,
2158 					    php->expbrd, php->slot);
2159 					listp->f_error = ETIMEDOUT;
2160 					break;
2161 				case 0: /* signal received */
2162 					cmn_err(CE_WARN,
2163 					    "operation interrupted by signal");
2164 					listp->f_error = EINTR;
2165 					break;
2166 				default:
2167 					break;
2168 				}
2169 		}
2170 
2171 		/*
2172 		 * If link is set for this entry, check to see if
2173 		 * the linked entry has been replied to.  If not,
2174 		 * wait for the response.
2175 		 * Currently, this is only used for ABORT_TEST functionality,
2176 		 * wherein a check is made for the TESTBOARD reply when
2177 		 * the ABORT_TEST reply is received.
2178 		 */
2179 
2180 		if (link) {
2181 			mutex_enter(&link->g_lock);
2182 			/*
2183 			 * If the reply to the linked entry hasn't been
2184 			 * received, clear the existing link->f_error,
2185 			 * and await the reply.
2186 			 */
2187 			if (link->m_reply == 0) {
2188 				link->f_error = 0;
2189 			}
2190 			to_val =  ddi_get_lbolt() + (timeout * hz);
2191 			while (link->m_reply == 0 && link->f_error == 0) {
2192 				crv = cv_timedwait(&link->g_cv, &link->g_lock,
2193 					to_val);
2194 				switch (crv) {
2195 				case -1: /* timed out */
2196 					cmn_err(CE_NOTE,
2197 					    "!link msgid=0x%x reply timed out",
2198 					    link->msgid);
2199 					link->f_error = ETIMEDOUT;
2200 					break;
2201 				default:
2202 					break;
2203 				}
2204 			}
2205 			mutex_exit(&link->g_lock);
2206 		}
2207 	}
2208 	mutex_exit(&listp->g_lock);
2209 	mutex_exit(&listp->s_lock);
2210 	return (listp);
2211 }
2212 
2213 static sbd_error_t *
2214 drmach_mbx2sbderr(drmach_msglist_t *mlp)
2215 {
2216 	char		a_pnt[MAXNAMELEN];
2217 	dr_proto_hdr_t	*php;
2218 	int		bnum;
2219 
2220 	if (mlp->f_error) {
2221 		/*
2222 		 * If framework failure is due to signal, return "no error"
2223 		 * error.
2224 		 */
2225 		if (mlp->f_error == EINTR)
2226 			return (drerr_new(0, ESTC_NONE, NULL));
2227 
2228 		mutex_enter(&drmach_g_mbox_mutex);
2229 		drmach_mbox_iflag = 0;
2230 		mutex_exit(&drmach_g_mbox_mutex);
2231 		if (!mlp->p_flag)
2232 			return (drerr_new(1, ESTC_MBXRQST, NULL));
2233 		else
2234 			return (drerr_new(1, ESTC_MBXRPLY, NULL));
2235 	}
2236 	php = (dr_proto_hdr_t *)mlp->o_buf;
2237 	bnum = 2 * php->expbrd + php->slot;
2238 	a_pnt[0] = '\0';
2239 	(void) drmach_board_name(bnum, a_pnt, MAXNAMELEN);
2240 
2241 	switch (mlp->e_code) {
2242 		case 0:
2243 			return (NULL);
2244 		case DRERR_NOACL:
2245 			return (drerr_new(0, ESTC_NOACL, "%s", a_pnt));
2246 		case DRERR_NOT_ASSIGNED:
2247 			return (drerr_new(0, ESTC_NOT_ASSIGNED, "%s", a_pnt));
2248 		case DRERR_NOT_ACTIVE:
2249 			return (drerr_new(0, ESTC_NOT_ACTIVE, "%s", a_pnt));
2250 		case DRERR_EMPTY_SLOT:
2251 			return (drerr_new(0, ESTC_EMPTY_SLOT, "%s", a_pnt));
2252 		case DRERR_POWER_OFF:
2253 			return (drerr_new(0, ESTC_POWER_OFF, "%s", a_pnt));
2254 		case DRERR_TEST_IN_PROGRESS:
2255 			return (drerr_new(0, ESTC_TEST_IN_PROGRESS,
2256 					"%s", a_pnt));
2257 		case DRERR_TESTING_BUSY:
2258 			return (drerr_new(0, ESTC_TESTING_BUSY, "%s", a_pnt));
2259 		case DRERR_TEST_REQUIRED:
2260 			return (drerr_new(0, ESTC_TEST_REQUIRED, "%s", a_pnt));
2261 		case DRERR_UNAVAILABLE:
2262 			return (drerr_new(0, ESTC_UNAVAILABLE, "%s", a_pnt));
2263 		case DRERR_RECOVERABLE:
2264 			return (drerr_new(0, ESTC_SMS_ERR_RECOVERABLE,
2265 				"%s", a_pnt));
2266 		case DRERR_UNRECOVERABLE:
2267 			return (drerr_new(1, ESTC_SMS_ERR_UNRECOVERABLE,
2268 				"%s", a_pnt));
2269 		default:
2270 			return (drerr_new(1, ESTC_MBOX_UNKNOWN, NULL));
2271 	}
2272 }
2273 
2274 static sbd_error_t *
2275 drmach_mbox_trans(uint8_t msgtype, int bnum, caddr_t obufp, int olen,
2276 	caddr_t ibufp, int ilen)
2277 {
2278 	int			timeout = 0;
2279 	int			ntries = 0;
2280 	int			nosignals = 0;
2281 	dr_proto_hdr_t 		*hdrp;
2282 	drmach_msglist_t 	*mlp;
2283 	sbd_error_t		*err = NULL;
2284 
2285 	if (msgtype != DRMSG_MBOX_INIT) {
2286 		mutex_enter(&drmach_ri_mbox_mutex);
2287 		mutex_enter(&drmach_g_mbox_mutex);
2288 		if (drmach_mbox_iflag == 0) {
2289 			/* need to initialize the mailbox */
2290 			dr_proto_hdr_t	imsg;
2291 
2292 			mutex_exit(&drmach_g_mbox_mutex);
2293 
2294 			imsg.command = DRMSG_MBOX_INIT;
2295 
2296 			imsg.message_id = drmach_get_msgid();
2297 			imsg.drproto_version = DRMBX_VERSION;
2298 			imsg.expbrd = 0;
2299 			imsg.slot = 0;
2300 
2301 			cmn_err(CE_WARN,
2302 				"!reinitializing DR mailbox");
2303 			mlp = drmach_mbox_req_rply(&imsg, sizeof (imsg), 0, 0,
2304 				10, 5, 0, NULL);
2305 			err = drmach_mbx2sbderr(mlp);
2306 			/*
2307 			 * If framework failure incoming is encountered on
2308 			 * the MBOX_INIT [timeout on SMS reply], the error
2309 			 * type must be changed before returning to caller.
2310 			 * This is to prevent drmach_board_connect() and
2311 			 * drmach_board_disconnect() from marking boards
2312 			 * UNUSABLE based on MBOX_INIT failures.
2313 			 */
2314 			if ((err != NULL) && (err->e_code == ESTC_MBXRPLY)) {
2315 				cmn_err(CE_WARN,
2316 				    "!Changed mbox incoming to outgoing"
2317 				    " failure on reinit");
2318 				sbd_err_clear(&err);
2319 				err = drerr_new(0, ESTC_MBXRQST, NULL);
2320 			}
2321 			drmach_msglist_destroy(mlp);
2322 			if (err) {
2323 				mutex_exit(&drmach_ri_mbox_mutex);
2324 				return (err);
2325 			}
2326 			mutex_enter(&drmach_g_mbox_mutex);
2327 			drmach_mbox_iflag = 1;
2328 		}
2329 		mutex_exit(&drmach_g_mbox_mutex);
2330 		mutex_exit(&drmach_ri_mbox_mutex);
2331 	}
2332 
2333 	hdrp = (dr_proto_hdr_t *)obufp;
2334 
2335 	/* setup outgoing mailbox header */
2336 	hdrp->command = msgtype;
2337 	hdrp->message_id = drmach_get_msgid();
2338 	hdrp->drproto_version = DRMBX_VERSION;
2339 	hdrp->expbrd = DRMACH_BNUM2EXP(bnum);
2340 	hdrp->slot = DRMACH_BNUM2SLOT(bnum);
2341 
2342 	switch (msgtype) {
2343 
2344 		case DRMSG_MBOX_INIT:
2345 			timeout = drmach_to_mbxinit;
2346 			ntries = 1;
2347 			nosignals = 0;
2348 			break;
2349 
2350 		case DRMSG_ASSIGN:
2351 			timeout = drmach_to_assign;
2352 			ntries = 1;
2353 			nosignals = 0;
2354 			break;
2355 
2356 		case DRMSG_UNASSIGN:
2357 			timeout = drmach_to_unassign;
2358 			ntries = 1;
2359 			nosignals = 0;
2360 			break;
2361 
2362 		case DRMSG_POWERON:
2363 			timeout = drmach_to_poweron;
2364 			ntries = 1;
2365 			nosignals = 0;
2366 			break;
2367 
2368 		case DRMSG_POWEROFF:
2369 			timeout = drmach_to_poweroff;
2370 			ntries = 1;
2371 			nosignals = 0;
2372 			break;
2373 
2374 		case DRMSG_SHOWBOARD:
2375 			timeout = drmach_to_showboard;
2376 			ntries = 1;
2377 			nosignals = 0;
2378 			break;
2379 
2380 		case DRMSG_CLAIM:
2381 			timeout = drmach_to_claim;
2382 			ntries = 1;
2383 			nosignals = 1;
2384 			break;
2385 
2386 		case DRMSG_UNCLAIM:
2387 			timeout = drmach_to_unclaim;
2388 			ntries = 1;
2389 			nosignals = 1;
2390 			break;
2391 
2392 		case DRMSG_UNCONFIG:
2393 			timeout = drmach_to_unconfig;
2394 			ntries = 1;
2395 			nosignals = 0;
2396 			break;
2397 
2398 		case DRMSG_TESTBOARD:
2399 			timeout = drmach_to_testboard;
2400 			ntries = 1;
2401 			nosignals = 0;
2402 			break;
2403 
2404 		default:
2405 			cmn_err(CE_WARN,
2406 				"Unknown outgoing message type 0x%x", msgtype);
2407 			err = DRMACH_INTERNAL_ERROR();
2408 			break;
2409 	}
2410 
2411 	if (err == NULL) {
2412 		mlp = drmach_mbox_req_rply(hdrp, olen, ibufp, ilen,
2413 			timeout, ntries, nosignals, NULL);
2414 		err = drmach_mbx2sbderr(mlp);
2415 
2416 		/*
2417 		 * For DRMSG_TESTBOARD attempts which have timed out, or
2418 		 * been aborted due to a signal received after mboxsc_putmsg()
2419 		 * has succeeded in sending the message, a DRMSG_ABORT_TEST
2420 		 * must be sent.
2421 		 */
2422 		if ((msgtype == DRMSG_TESTBOARD) && (err != NULL) &&
2423 		    ((mlp->f_error == EINTR) || ((mlp->f_error == ETIMEDOUT) &&
2424 		    (mlp->p_flag != 0)))) {
2425 			drmach_msglist_t	*abmlp;
2426 			dr_abort_test_t		abibuf;
2427 
2428 			hdrp->command = DRMSG_ABORT_TEST;
2429 			hdrp->message_id = drmach_get_msgid();
2430 			abmlp = drmach_mbox_req_rply(hdrp,
2431 			    sizeof (dr_abort_test_t), (caddr_t)&abibuf,
2432 			    sizeof (abibuf), drmach_to_aborttest, 5, 1, mlp);
2433 			cmn_err(CE_WARN, "test aborted");
2434 			drmach_msglist_destroy(abmlp);
2435 		}
2436 
2437 		drmach_msglist_destroy(mlp);
2438 	}
2439 
2440 	return (err);
2441 }
2442 
2443 static int
2444 drmach_mbox_init()
2445 {
2446 	int			err;
2447 	caddr_t			obufp;
2448 	sbd_error_t		*serr = NULL;
2449 	mboxsc_timeout_range_t	mbxtoz;
2450 
2451 	drmach_mbox_istate = 0;
2452 	/* register the outgoing mailbox */
2453 	if ((err = mboxsc_init(KEY_DRSC, MBOXSC_MBOX_OUT,
2454 		NULL)) != 0) {
2455 		cmn_err(CE_WARN, "DR - SC mboxsc_init failed: 0x%x", err);
2456 		return (-1);
2457 	}
2458 	drmach_mbox_istate = 1;
2459 
2460 	/* setup the mboxsc_putmsg timeout value */
2461 	if (drmach_use_tuned_putmsg_to) {
2462 		cmn_err(CE_NOTE, "!using tuned drmach_to_putmsg = 0x%lx\n",
2463 		    drmach_to_putmsg);
2464 	} else {
2465 		if ((err = mboxsc_ctrl(KEY_DRSC,
2466 		    MBOXSC_CMD_PUTMSG_TIMEOUT_RANGE, &mbxtoz)) != 0) {
2467 			cmn_err(CE_WARN, "mboxsc_ctrl failed: 0x%x", err);
2468 			drmach_to_putmsg = 60000;
2469 		} else {
2470 			drmach_to_putmsg = mboxsc_putmsg_def_timeout() * 6;
2471 			DRMACH_PR("putmsg range is 0x%lx - 0x%lx value"
2472 			    " is 0x%lx\n", mbxtoz.min_timeout,
2473 			    mbxtoz.max_timeout, drmach_to_putmsg);
2474 		}
2475 	}
2476 
2477 	/* register the incoming mailbox */
2478 	if ((err = mboxsc_init(KEY_SCDR, MBOXSC_MBOX_IN,
2479 		drmach_mbox_event)) != 0) {
2480 		cmn_err(CE_WARN, "SC - DR mboxsc_init failed: 0x%x", err);
2481 		return (-1);
2482 	}
2483 	drmach_mbox_istate = 2;
2484 
2485 	/* initialize mutex for mailbox globals */
2486 	mutex_init(&drmach_g_mbox_mutex, NULL, MUTEX_DRIVER, NULL);
2487 
2488 	/* initialize mutex for mailbox re-init */
2489 	mutex_init(&drmach_ri_mbox_mutex, NULL, MUTEX_DRIVER, NULL);
2490 
2491 	/* initialize mailbox message list elements */
2492 	drmach_msglist_first = drmach_msglist_last = NULL;
2493 	mutex_init(&drmach_msglist_mutex, NULL, MUTEX_DRIVER, NULL);
2494 
2495 	mutex_init(&drmach_sendmsg_mutex, NULL, MUTEX_DRIVER, NULL);
2496 	cv_init(&drmach_sendmsg_cv, NULL, CV_DRIVER, NULL);
2497 
2498 	drmach_mbox_istate = 3;
2499 
2500 	/* start mailbox sendmsg thread */
2501 	drmach_sendmsg_thread_run = 1;
2502 	if (drmach_sendmsg_thread == NULL)
2503 		drmach_sendmsg_thread = thread_create(NULL, 0,
2504 		    (void (*)())drmach_mbox_sendmsg, NULL, 0, &p0,
2505 		    TS_RUN, minclsyspri);
2506 
2507 	/* start mailbox getmsg thread */
2508 	drmach_getmsg_thread_run = 1;
2509 	if (drmach_getmsg_thread == NULL)
2510 		drmach_getmsg_thread = thread_create(NULL, 0,
2511 		    (void (*)())drmach_mbox_getmsg, NULL, 0, &p0,
2512 		    TS_RUN, minclsyspri);
2513 
2514 	obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
2515 	serr = drmach_mbox_trans(DRMSG_MBOX_INIT, 0, obufp,
2516 		sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
2517 	kmem_free(obufp, sizeof (dr_proto_hdr_t));
2518 	if (serr) {
2519 		cmn_err(CE_WARN, "mbox_init: MBOX_INIT failed ecode=0x%x",
2520 			serr->e_code);
2521 		sbd_err_clear(&serr);
2522 		return (-1);
2523 	}
2524 	mutex_enter(&drmach_g_mbox_mutex);
2525 	drmach_mbox_iflag = 1;
2526 	drmach_mbox_ipending = 0;
2527 	mutex_exit(&drmach_g_mbox_mutex);
2528 
2529 	return (0);
2530 }
2531 
2532 static int
2533 drmach_mbox_fini()
2534 {
2535 	int err, rv = 0;
2536 
2537 	if (drmach_mbox_istate > 2) {
2538 		drmach_getmsg_thread_run = 0;
2539 		drmach_sendmsg_thread_run = 0;
2540 		cmn_err(CE_WARN,
2541 			"drmach_mbox_fini: waiting for mbox threads...");
2542 		while ((drmach_getmsg_thread_run == 0) ||
2543 			(drmach_sendmsg_thread_run == 0)) {
2544 			continue;
2545 		}
2546 		cmn_err(CE_WARN,
2547 			"drmach_mbox_fini: mbox threads done.");
2548 		mutex_destroy(&drmach_msglist_mutex);
2549 
2550 	}
2551 	if (drmach_mbox_istate) {
2552 		/* de-register the outgoing mailbox */
2553 		if ((err = mboxsc_fini(KEY_DRSC)) != 0) {
2554 			cmn_err(CE_WARN, "DR - SC mboxsc_fini failed: 0x%x",
2555 				err);
2556 			rv = -1;
2557 		}
2558 	}
2559 	if (drmach_mbox_istate > 1) {
2560 		/* de-register the incoming mailbox */
2561 		if ((err = mboxsc_fini(KEY_SCDR)) != 0) {
2562 			cmn_err(CE_WARN, "SC - DR mboxsc_fini failed: 0x%x",
2563 				err);
2564 			rv = -1;
2565 		}
2566 	}
2567 	mutex_destroy(&drmach_g_mbox_mutex);
2568 	mutex_destroy(&drmach_ri_mbox_mutex);
2569 	return (rv);
2570 }
2571 
2572 static int
2573 drmach_portid2bnum(int portid)
2574 {
2575 	int slot;
2576 
2577 	switch (portid & 0x1f) {
2578 	case 0: case 1: case 2: case 3:	/* cpu/wci devices */
2579 	case 0x1e:			/* slot 0 axq registers */
2580 		slot = 0;
2581 		break;
2582 
2583 	case 8: case 9:			/* cpu devices */
2584 	case 0x1c: case 0x1d:		/* schizo/wci devices */
2585 	case 0x1f:			/* slot 1 axq registers */
2586 		slot = 1;
2587 		break;
2588 
2589 	default:
2590 		ASSERT(0);		/* catch in debug kernels */
2591 	}
2592 
2593 	return (((portid >> 4) & 0x7e) | slot);
2594 }
2595 
2596 extern int axq_suspend_iopause;
2597 
2598 static int
2599 hold_rele_branch(dev_info_t *rdip, void *arg)
2600 {
2601 	int	i;
2602 	int	*holdp	= (int *)arg;
2603 	char	*name = ddi_node_name(rdip);
2604 
2605 	/*
2606 	 * For Starcat, we must be children of the root devinfo node
2607 	 */
2608 	ASSERT(ddi_get_parent(rdip) == ddi_root_node());
2609 
2610 	i = drmach_name2type_idx(name);
2611 
2612 	/*
2613 	 * Only children of the root devinfo node need to be
2614 	 * held/released since they are the only valid targets
2615 	 * of tree operations. This corresponds to the node types
2616 	 * listed in the drmach_name2type array.
2617 	 */
2618 	if (i < 0) {
2619 		/* Not of interest to us */
2620 		return (DDI_WALK_PRUNECHILD);
2621 	}
2622 
2623 	if (*holdp) {
2624 		ASSERT(!e_ddi_branch_held(rdip));
2625 		e_ddi_branch_hold(rdip);
2626 	} else {
2627 		ASSERT(e_ddi_branch_held(rdip));
2628 		e_ddi_branch_rele(rdip);
2629 	}
2630 
2631 	return (DDI_WALK_PRUNECHILD);
2632 }
2633 
2634 static int
2635 drmach_init(void)
2636 {
2637 	pnode_t 	nodeid;
2638 	gdcd_t		*gdcd;
2639 	int		bnum;
2640 	dev_info_t	*rdip;
2641 	int		hold, circ;
2642 
2643 	mutex_enter(&drmach_i_lock);
2644 	if (drmach_initialized) {
2645 		mutex_exit(&drmach_i_lock);
2646 		return (0);
2647 	}
2648 
2649 	gdcd = drmach_gdcd_new();
2650 	if (gdcd == NULL) {
2651 		mutex_exit(&drmach_i_lock);
2652 		cmn_err(CE_WARN, "drmach_init: failed to access GDCD\n");
2653 		return (-1);
2654 	}
2655 
2656 	drmach_boards = drmach_array_new(0, MAX_BOARDS - 1);
2657 
2658 	nodeid = prom_childnode(prom_rootnode());
2659 	do {
2660 		int		 len;
2661 		int		 portid;
2662 		drmachid_t	 id;
2663 
2664 		len = prom_getproplen(nodeid, "portid");
2665 		if (len != sizeof (portid))
2666 			continue;
2667 
2668 		portid = -1;
2669 		(void) prom_getprop(nodeid, "portid", (caddr_t)&portid);
2670 		if (portid == -1)
2671 			continue;
2672 
2673 		bnum = drmach_portid2bnum(portid);
2674 
2675 		if (drmach_array_get(drmach_boards, bnum, &id) == -1) {
2676 			/* portid translated to an invalid board number */
2677 			cmn_err(CE_WARN, "OBP node 0x%x has"
2678 				" invalid property value, %s=%u",
2679 				nodeid, "portid", portid);
2680 
2681 			/* clean up */
2682 			drmach_array_dispose(drmach_boards,
2683 			    drmach_board_dispose);
2684 			drmach_gdcd_dispose(gdcd);
2685 			mutex_exit(&drmach_i_lock);
2686 			return (-1);
2687 		} else if (id == NULL) {
2688 			drmach_board_t	*bp;
2689 			l1_slot_stat_t	*dcd;
2690 			int		exp, slot;
2691 
2692 			bp = drmach_board_new(bnum);
2693 			bp->assigned = !drmach_initialized;
2694 			bp->powered = !drmach_initialized;
2695 
2696 			exp = DRMACH_BNUM2EXP(bnum);
2697 			slot = DRMACH_BNUM2SLOT(bnum);
2698 			dcd = &gdcd->dcd_slot[exp][slot];
2699 			bp->stardrb_offset =
2700 			    dcd->l1ss_cpu_drblock_xwd_offset << 3;
2701 			DRMACH_PR("%s: stardrb_offset=0x%lx\n", bp->cm.name,
2702 			    bp->stardrb_offset);
2703 
2704 			if (gdcd->dcd_slot[exp][slot].l1ss_flags &
2705 			    L1SSFLG_THIS_L1_NULL_PROC_LPA) {
2706 				bp->flags |= DRMACH_NULL_PROC_LPA;
2707 				DRMACH_PR("%s: NULL proc LPA\n", bp->cm.name);
2708 			}
2709 		}
2710 	} while ((nodeid = prom_nextnode(nodeid)) != OBP_NONODE);
2711 
2712 	drmach_cpu_sram_va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
2713 
2714 	if (gdcd->dcd_testcage_log2_mbytes_size != DCD_DR_TESTCAGE_DISABLED) {
2715 		ASSERT(gdcd->dcd_testcage_log2_mbytes_size ==
2716 				gdcd->dcd_testcage_log2_mbytes_align);
2717 		drmach_iocage_paddr =
2718 			(uint64_t)gdcd->dcd_testcage_mbyte_PA << 20;
2719 		drmach_iocage_size =
2720 			1 << (gdcd->dcd_testcage_log2_mbytes_size + 20);
2721 
2722 		drmach_iocage_vaddr = (caddr_t)vmem_alloc(heap_arena,
2723 			drmach_iocage_size, VM_SLEEP);
2724 		hat_devload(kas.a_hat, drmach_iocage_vaddr, drmach_iocage_size,
2725 			mmu_btop(drmach_iocage_paddr),
2726 			PROT_READ | PROT_WRITE,
2727 			HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST);
2728 
2729 		DRMACH_PR("gdcd size=0x%x align=0x%x PA=0x%x\n",
2730 			gdcd->dcd_testcage_log2_mbytes_size,
2731 			gdcd->dcd_testcage_log2_mbytes_align,
2732 			gdcd->dcd_testcage_mbyte_PA);
2733 		DRMACH_PR("drmach size=0x%x PA=0x%lx VA=0x%p\n",
2734 			drmach_iocage_size, drmach_iocage_paddr,
2735 			drmach_iocage_vaddr);
2736 	}
2737 
2738 	if (drmach_iocage_size == 0) {
2739 		drmach_array_dispose(drmach_boards, drmach_board_dispose);
2740 		drmach_boards = NULL;
2741 		vmem_free(heap_arena, drmach_cpu_sram_va, PAGESIZE);
2742 		drmach_gdcd_dispose(gdcd);
2743 		mutex_exit(&drmach_i_lock);
2744 		cmn_err(CE_WARN, "drmach_init: iocage not available\n");
2745 		return (-1);
2746 	}
2747 
2748 	drmach_gdcd_dispose(gdcd);
2749 
2750 	mutex_init(&drmach_iocage_lock, NULL, MUTEX_DRIVER, NULL);
2751 	cv_init(&drmach_iocage_cv, NULL, CV_DRIVER, NULL);
2752 	mutex_init(&drmach_xt_mb_lock, NULL, MUTEX_DRIVER, NULL);
2753 	mutex_init(&drmach_bus_sync_lock, NULL, MUTEX_DRIVER, NULL);
2754 	mutex_init(&drmach_slice_table_lock, NULL, MUTEX_DRIVER, NULL);
2755 
2756 	mutex_enter(&cpu_lock);
2757 	mutex_enter(&drmach_iocage_lock);
2758 	ASSERT(drmach_iocage_is_busy == 0);
2759 	drmach_iocage_is_busy = 1;
2760 	drmach_iocage_mem_scrub(drmach_iocage_size);
2761 	drmach_iocage_is_busy = 0;
2762 	cv_signal(&drmach_iocage_cv);
2763 	mutex_exit(&drmach_iocage_lock);
2764 	mutex_exit(&cpu_lock);
2765 
2766 
2767 	if (drmach_mbox_init() == -1) {
2768 		cmn_err(CE_WARN, "DR - SC mailbox initialization Failed");
2769 	}
2770 
2771 	/*
2772 	 * Walk immediate children of devinfo root node and hold
2773 	 * all devinfo branches of interest.
2774 	 */
2775 	hold = 1;
2776 	rdip = ddi_root_node();
2777 
2778 	ndi_devi_enter(rdip, &circ);
2779 	ddi_walk_devs(ddi_get_child(rdip), hold_rele_branch, &hold);
2780 	ndi_devi_exit(rdip, circ);
2781 
2782 	drmach_initialized = 1;
2783 
2784 	/*
2785 	 * To avoid a circular patch dependency between DR and AXQ, the AXQ
2786 	 * rev introducing the axq_iopause_*_all interfaces should not regress
2787 	 * when installed without the DR rev using those interfaces. The default
2788 	 * is for iopause to be enabled/disabled during axq suspend/resume. By
2789 	 * setting the following axq flag to zero, axq will not enable iopause
2790 	 * during suspend/resume, instead DR will call the axq_iopause_*_all
2791 	 * interfaces during drmach_copy_rename.
2792 	 */
2793 	axq_suspend_iopause = 0;
2794 
2795 	mutex_exit(&drmach_i_lock);
2796 
2797 	return (0);
2798 }
2799 
2800 static void
2801 drmach_fini(void)
2802 {
2803 	dev_info_t	*rdip;
2804 	int		hold, circ;
2805 
2806 	if (drmach_initialized) {
2807 		rw_enter(&drmach_boards_rwlock, RW_WRITER);
2808 		drmach_array_dispose(drmach_boards, drmach_board_dispose);
2809 		drmach_boards = NULL;
2810 		rw_exit(&drmach_boards_rwlock);
2811 
2812 		mutex_destroy(&drmach_slice_table_lock);
2813 		mutex_destroy(&drmach_xt_mb_lock);
2814 		mutex_destroy(&drmach_bus_sync_lock);
2815 		cv_destroy(&drmach_iocage_cv);
2816 		mutex_destroy(&drmach_iocage_lock);
2817 
2818 		vmem_free(heap_arena, drmach_cpu_sram_va, PAGESIZE);
2819 
2820 		/*
2821 		 * Walk immediate children of the root devinfo node
2822 		 * releasing holds acquired on branches in drmach_init()
2823 		 */
2824 		hold = 0;
2825 		rdip = ddi_root_node();
2826 
2827 		ndi_devi_enter(rdip, &circ);
2828 		ddi_walk_devs(ddi_get_child(rdip), hold_rele_branch, &hold);
2829 		ndi_devi_exit(rdip, circ);
2830 
2831 		drmach_initialized = 0;
2832 	}
2833 
2834 	drmach_mbox_fini();
2835 	if (drmach_xt_mb != NULL) {
2836 		vmem_free(static_alloc_arena, (void *)drmach_xt_mb,
2837 		    drmach_xt_mb_size);
2838 	}
2839 	rw_destroy(&drmach_boards_rwlock);
2840 	mutex_destroy(&drmach_i_lock);
2841 }
2842 
2843 static void
2844 drmach_mem_read_madr(drmach_mem_t *mp, int bank, uint64_t *madr)
2845 {
2846 	kpreempt_disable();
2847 
2848 	/* get register address, read madr value */
2849 	if (STARCAT_CPUID_TO_PORTID(CPU->cpu_id) == mp->dev.portid) {
2850 		*madr = lddmcdecode(DRMACH_MC_ASI_ADDR(mp, bank));
2851 	} else {
2852 		*madr = lddphysio(DRMACH_MC_ADDR(mp, bank));
2853 	}
2854 
2855 	kpreempt_enable();
2856 }
2857 
2858 
2859 static uint64_t *
2860 drmach_prep_mc_rename(uint64_t *p, int local,
2861 	drmach_mem_t *mp, uint64_t current_basepa, uint64_t new_basepa)
2862 {
2863 	int bank;
2864 
2865 	for (bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
2866 		uint64_t madr, bank_offset;
2867 
2868 		/* fetch mc's bank madr register value */
2869 		drmach_mem_read_madr(mp, bank, &madr);
2870 		if (madr & DRMACH_MC_VALID_MASK) {
2871 			uint64_t bankpa;
2872 
2873 			bank_offset = (DRMACH_MC_UM_TO_PA(madr) |
2874 			    DRMACH_MC_LM_TO_PA(madr)) - current_basepa;
2875 			bankpa = new_basepa + bank_offset;
2876 
2877 			/* encode new base pa into madr */
2878 			madr &= ~DRMACH_MC_UM_MASK;
2879 			madr |= DRMACH_MC_PA_TO_UM(bankpa);
2880 			madr &= ~DRMACH_MC_LM_MASK;
2881 			madr |= DRMACH_MC_PA_TO_LM(bankpa);
2882 
2883 			if (local)
2884 				*p++ = DRMACH_MC_ASI_ADDR(mp, bank);
2885 			else
2886 				*p++ = DRMACH_MC_ADDR(mp, bank);
2887 
2888 			*p++ = madr;
2889 		}
2890 	}
2891 
2892 	return (p);
2893 }
2894 
2895 static uint64_t *
2896 drmach_prep_schizo_script(uint64_t *p, drmach_mem_t *mp, uint64_t new_basepa)
2897 {
2898 	drmach_board_t	*bp;
2899 	int		 rv;
2900 	int		 idx;
2901 	drmachid_t	 id;
2902 	uint64_t	 last_scsr_pa = 0;
2903 
2904 	/* memory is always in slot 0 */
2905 	ASSERT(DRMACH_BNUM2SLOT(mp->dev.bp->bnum) == 0);
2906 
2907 	/* look up slot 1 board on same expander */
2908 	idx = DRMACH_EXPSLOT2BNUM(DRMACH_BNUM2EXP(mp->dev.bp->bnum), 1);
2909 	rv = drmach_array_get(drmach_boards, idx, &id);
2910 	bp = id; /* bp will be NULL if board not found */
2911 
2912 	/* look up should never be out of bounds */
2913 	ASSERT(rv == 0);
2914 
2915 	/* nothing to do when board is not found or has no devices */
2916 	if (rv == -1 || bp == NULL || bp->devices == NULL)
2917 		return (p);
2918 
2919 	rv = drmach_array_first(bp->devices, &idx, &id);
2920 	while (rv == 0) {
2921 		if (DRMACH_IS_IO_ID(id)) {
2922 			drmach_io_t *io = id;
2923 
2924 			/*
2925 			 * Skip all non-Schizo IO devices (only IO nodes
2926 			 * that are Schizo devices have non-zero scsr_pa).
2927 			 * Filter out "other" leaf to avoid writing to the
2928 			 * same Schizo Control/Status Register twice.
2929 			 */
2930 			if (io->scsr_pa && io->scsr_pa != last_scsr_pa) {
2931 				uint64_t scsr;
2932 
2933 				scsr  = lddphysio(io->scsr_pa);
2934 				scsr &= ~(DRMACH_LPA_BASE_MASK |
2935 						DRMACH_LPA_BND_MASK);
2936 				scsr |= DRMACH_PA_TO_LPA_BASE(new_basepa);
2937 				scsr |= DRMACH_PA_TO_LPA_BND(
2938 					new_basepa + DRMACH_MEM_SLICE_SIZE);
2939 
2940 				*p++ = io->scsr_pa;
2941 				*p++ = scsr;
2942 
2943 				last_scsr_pa = io->scsr_pa;
2944 			}
2945 		}
2946 		rv = drmach_array_next(bp->devices, &idx, &id);
2947 	}
2948 
2949 	return (p);
2950 }
2951 
2952 /*
2953  * For Panther MCs, append the MC idle reg address and drmach_mem_t pointer.
2954  * The latter is returned when drmach_rename fails to idle a Panther MC and
2955  * is used to identify the MC for error reporting.
2956  */
2957 static uint64_t *
2958 drmach_prep_pn_mc_idle(uint64_t *p, drmach_mem_t *mp, int local)
2959 {
2960 	/* only slot 0 has memory */
2961 	ASSERT(DRMACH_BNUM2SLOT(mp->dev.bp->bnum) == 0);
2962 	ASSERT(IS_PANTHER(mp->dev.bp->cpu_impl));
2963 
2964 	for (mp = mp->dev.bp->mem; mp != NULL; mp = mp->next) {
2965 		ASSERT(DRMACH_IS_MEM_ID(mp));
2966 
2967 		if (mp->dev.portid == STARCAT_CPUID_TO_PORTID(CPU->cpu_id)) {
2968 			if (local) {
2969 				*p++ = ASI_EMU_ACT_STATUS_VA;	/* local ASI */
2970 				*p++ = (uintptr_t)mp;
2971 			}
2972 		} else if (!local) {
2973 			*p++ = DRMACH_EMU_ACT_STATUS_ADDR(mp);	/* PIO */
2974 			*p++ = (uintptr_t)mp;
2975 		}
2976 	}
2977 
2978 	return (p);
2979 }
2980 
2981 static sbd_error_t *
2982 drmach_prep_rename_script(drmach_mem_t *s_mp, drmach_mem_t *t_mp,
2983 	uint64_t t_slice_offset, caddr_t buf, int buflen)
2984 {
2985 	_NOTE(ARGUNUSED(buflen))
2986 
2987 	uint64_t		*p = (uint64_t *)buf, *q;
2988 	sbd_error_t		*err;
2989 	int			 rv;
2990 	drmach_mem_t		*mp, *skip_mp;
2991 	uint64_t		 s_basepa, t_basepa;
2992 	uint64_t		 s_new_basepa, t_new_basepa;
2993 
2994 	/* verify supplied buffer space is adequate */
2995 	ASSERT(buflen >=
2996 		/* addr for all possible MC banks */
2997 		(sizeof (uint64_t) * 4 * 4 * 18) +
2998 		/* list section terminator */
2999 		(sizeof (uint64_t) * 1) +
3000 		/* addr/id tuple for local Panther MC idle reg */
3001 		(sizeof (uint64_t) * 2) +
3002 		/* list section terminator */
3003 		(sizeof (uint64_t) * 1) +
3004 		/* addr/id tuple for 2 boards with 4 Panther MC idle regs */
3005 		(sizeof (uint64_t) * 2 * 2 * 4) +
3006 		/* list section terminator */
3007 		(sizeof (uint64_t) * 1) +
3008 		/* addr/val tuple for 1 proc with 4 MC banks */
3009 		(sizeof (uint64_t) * 2 * 4) +
3010 		/* list section terminator */
3011 		(sizeof (uint64_t) * 1) +
3012 		/* addr/val tuple for 2 boards w/ 2 schizos each */
3013 		(sizeof (uint64_t) * 2 * 2 * 2) +
3014 		/* addr/val tuple for 2 boards w/ 16 MC banks each */
3015 		(sizeof (uint64_t) * 2 * 2 * 16) +
3016 		/* list section terminator */
3017 		(sizeof (uint64_t) * 1) +
3018 		/* addr/val tuple for 18 AXQs w/ two slots each */
3019 		(sizeof (uint64_t) * 2 * 2 * 18) +
3020 		/* list section terminator */
3021 		(sizeof (uint64_t) * 1) +
3022 		/* list terminator */
3023 		(sizeof (uint64_t) * 1));
3024 
3025 	/* copy bank list to rename script */
3026 	mutex_enter(&drmach_bus_sync_lock);
3027 	for (q = drmach_bus_sync_list; *q; q++, p++)
3028 		*p = *q;
3029 	mutex_exit(&drmach_bus_sync_lock);
3030 
3031 	/* list section terminator */
3032 	*p++ = 0;
3033 
3034 	/*
3035 	 * Write idle script for MC on this processor.  A script will be
3036 	 * produced only if this is a Panther processor on the source or
3037 	 * target board.
3038 	 */
3039 	if (IS_PANTHER(s_mp->dev.bp->cpu_impl))
3040 		p = drmach_prep_pn_mc_idle(p, s_mp, 1);
3041 
3042 	if (IS_PANTHER(t_mp->dev.bp->cpu_impl))
3043 		p = drmach_prep_pn_mc_idle(p, t_mp, 1);
3044 
3045 	/* list section terminator */
3046 	*p++ = 0;
3047 
3048 	/*
3049 	 * Write idle script for all other MCs on source and target
3050 	 * Panther boards.
3051 	 */
3052 	if (IS_PANTHER(s_mp->dev.bp->cpu_impl))
3053 		p = drmach_prep_pn_mc_idle(p, s_mp, 0);
3054 
3055 	if (IS_PANTHER(t_mp->dev.bp->cpu_impl))
3056 		p = drmach_prep_pn_mc_idle(p, t_mp, 0);
3057 
3058 	/* list section terminator */
3059 	*p++ = 0;
3060 
3061 	/*
3062 	 * Step 1:	Write source base address to target MC
3063 	 *		with present bit off.
3064 	 * Step 2:	Now rewrite target reg with present bit on.
3065 	 */
3066 	err = drmach_mem_get_base_physaddr(s_mp, &s_basepa);
3067 	ASSERT(err == NULL);
3068 	err = drmach_mem_get_base_physaddr(t_mp, &t_basepa);
3069 	ASSERT(err == NULL);
3070 
3071 	/* exchange base pa. include slice offset in new target base pa */
3072 	s_new_basepa = t_basepa & ~ (DRMACH_MEM_SLICE_SIZE - 1);
3073 	t_new_basepa = (s_basepa & ~ (DRMACH_MEM_SLICE_SIZE - 1)) +
3074 			t_slice_offset;
3075 
3076 	DRMACH_PR("s_new_basepa 0x%lx\n", s_new_basepa);
3077 	DRMACH_PR("t_new_basepa 0x%lx\n", t_new_basepa);
3078 
3079 	DRMACH_PR("preparing MC MADR rename script (master is CPU%d):\n",
3080 		CPU->cpu_id);
3081 
3082 	/*
3083 	 * Write rename script for MC on this processor.  A script will
3084 	 * be produced only if this processor is on the source or target
3085 	 * board.
3086 	 */
3087 
3088 	skip_mp = NULL;
3089 	mp = s_mp->dev.bp->mem;
3090 	while (mp != NULL && skip_mp == NULL) {
3091 		if (mp->dev.portid == STARCAT_CPUID_TO_PORTID(CPU->cpu_id)) {
3092 			skip_mp = mp;
3093 			p = drmach_prep_mc_rename(p, 1, mp, s_basepa,
3094 			    s_new_basepa);
3095 		}
3096 
3097 		mp = mp->next;
3098 	}
3099 
3100 	mp = t_mp->dev.bp->mem;
3101 	while (mp != NULL && skip_mp == NULL) {
3102 		if (mp->dev.portid == STARCAT_CPUID_TO_PORTID(CPU->cpu_id)) {
3103 			skip_mp = mp;
3104 			p = drmach_prep_mc_rename(p, 1, mp, t_basepa,
3105 			    t_new_basepa);
3106 		}
3107 
3108 		mp = mp->next;
3109 	}
3110 
3111 	/* list section terminator */
3112 	*p++ = 0;
3113 
3114 	/*
3115 	 * Write rename script for all other MCs on source and target
3116 	 * boards.
3117 	 */
3118 
3119 	for (mp = s_mp->dev.bp->mem; mp; mp = mp->next) {
3120 		if (mp == skip_mp)
3121 			continue;
3122 		p = drmach_prep_mc_rename(p, 0, mp, s_basepa, s_new_basepa);
3123 	}
3124 
3125 	for (mp = t_mp->dev.bp->mem; mp; mp = mp->next) {
3126 		if (mp == skip_mp)
3127 			continue;
3128 		p = drmach_prep_mc_rename(p, 0, mp, t_basepa, t_new_basepa);
3129 	}
3130 
3131 	/* Write rename script for Schizo LPA_BASE/LPA_BND */
3132 	p = drmach_prep_schizo_script(p, s_mp, s_new_basepa);
3133 	p = drmach_prep_schizo_script(p, t_mp, t_new_basepa);
3134 
3135 	/* list section terminator */
3136 	*p++ = 0;
3137 
3138 	DRMACH_PR("preparing AXQ CASM rename script (EXP%d <> EXP%d):\n",
3139 		DRMACH_BNUM2EXP(s_mp->dev.bp->bnum),
3140 		DRMACH_BNUM2EXP(t_mp->dev.bp->bnum));
3141 
3142 	rv = axq_do_casm_rename_script(&p,
3143 		DRMACH_PA_TO_SLICE(s_new_basepa),
3144 		DRMACH_PA_TO_SLICE(t_new_basepa));
3145 	if (rv == DDI_FAILURE)
3146 		return (DRMACH_INTERNAL_ERROR());
3147 
3148 	/* list section & final terminator */
3149 	*p++ = 0;
3150 	*p++ = 0;
3151 
3152 #ifdef DEBUG
3153 	{
3154 		uint64_t *q = (uint64_t *)buf;
3155 
3156 		/* paranoia */
3157 		ASSERT((caddr_t)p <= buf + buflen);
3158 
3159 		DRMACH_PR("MC bank base pa list:\n");
3160 		while (*q) {
3161 			uint64_t a = *q++;
3162 
3163 			DRMACH_PR("0x%lx\n", a);
3164 		}
3165 
3166 		/* skip terminator */
3167 		q += 1;
3168 
3169 		DRMACH_PR("local Panther MC idle reg (via ASI 0x4a):\n");
3170 		while (*q) {
3171 			DRMACH_PR("addr=0x%lx, mp=0x%lx\n", *q, *(q + 1));
3172 			q += 2;
3173 		}
3174 
3175 		/* skip terminator */
3176 		q += 1;
3177 
3178 		DRMACH_PR("non-local Panther MC idle reg (via ASI 0x15):\n");
3179 		while (*q) {
3180 			DRMACH_PR("addr=0x%lx, mp=0x%lx\n", *q, *(q + 1));
3181 			q += 2;
3182 		}
3183 
3184 		/* skip terminator */
3185 		q += 1;
3186 
3187 		DRMACH_PR("MC reprogramming script (via ASI 0x72):\n");
3188 		while (*q) {
3189 			uint64_t r = *q++;	/* register address */
3190 			uint64_t v = *q++;	/* new register value */
3191 
3192 			DRMACH_PR("0x%lx = 0x%lx, basepa 0x%lx\n",
3193 				r,
3194 				v,
3195 				DRMACH_MC_UM_TO_PA(v)|DRMACH_MC_LM_TO_PA(v));
3196 		}
3197 
3198 		/* skip terminator */
3199 		q += 1;
3200 
3201 		DRMACH_PR("MC/SCHIZO reprogramming script:\n");
3202 		while (*q) {
3203 			DRMACH_PR("0x%lx = 0x%lx\n", *q, *(q + 1));
3204 			q += 2;
3205 		}
3206 
3207 		/* skip terminator */
3208 		q += 1;
3209 
3210 		DRMACH_PR("AXQ reprogramming script:\n");
3211 		while (*q) {
3212 			DRMACH_PR("0x%lx = 0x%lx\n", *q, *(q + 1));
3213 			q += 2;
3214 		}
3215 
3216 		/* verify final terminator is present */
3217 		ASSERT(*(q + 1) == 0);
3218 
3219 		DRMACH_PR("copy-rename script 0x%p, len %d\n",
3220 			buf, (int)((intptr_t)p - (intptr_t)buf));
3221 
3222 		if (drmach_debug)
3223 			DELAY(10000000);
3224 	}
3225 #endif
3226 
3227 	return (NULL);
3228 }
3229 
3230 static void
3231 drmach_prep_xt_mb_for_slice_update(drmach_board_t *bp, uchar_t slice)
3232 {
3233 	int		 rv;
3234 
3235 	ASSERT(MUTEX_HELD(&drmach_xt_mb_lock));
3236 
3237 	if (bp->devices) {
3238 		int		 d_idx;
3239 		drmachid_t	 d_id;
3240 
3241 		rv = drmach_array_first(bp->devices, &d_idx, &d_id);
3242 		while (rv == 0) {
3243 			if (DRMACH_IS_CPU_ID(d_id)) {
3244 				drmach_cpu_t	*cp = d_id;
3245 				processorid_t	 cpuid = cp->cpuid;
3246 
3247 				mutex_enter(&cpu_lock);
3248 				if (cpu[cpuid] && cpu[cpuid]->cpu_flags)
3249 					drmach_xt_mb[cpuid] = 0x80 | slice;
3250 				mutex_exit(&cpu_lock);
3251 			}
3252 			rv = drmach_array_next(bp->devices, &d_idx, &d_id);
3253 		}
3254 	}
3255 	if (DRMACH_BNUM2SLOT(bp->bnum) == 0) {
3256 		drmach_board_t	*s1bp = NULL;
3257 
3258 		rv = drmach_array_get(drmach_boards, bp->bnum + 1,
3259 		    (void *) &s1bp);
3260 		if (rv == 0 && s1bp != NULL) {
3261 			ASSERT(DRMACH_IS_BOARD_ID(s1bp));
3262 			ASSERT(DRMACH_BNUM2SLOT(s1bp->bnum) == 1);
3263 			drmach_prep_xt_mb_for_slice_update(s1bp, slice);
3264 		}
3265 	}
3266 }
3267 
3268 sbd_error_t *
3269 drmach_copy_rename_init(drmachid_t t_id, uint64_t t_slice_offset,
3270 	drmachid_t s_id, struct memlist *c_ml, drmachid_t *cr_id)
3271 {
3272 	extern void drmach_rename(uint64_t *, uint_t *, uint64_t *);
3273 	extern void drmach_rename_end(void);
3274 
3275 	drmach_mem_t	*s_mp, *t_mp;
3276 	struct memlist	*x_ml;
3277 	uint64_t	 off_mask, s_copybasepa, t_copybasepa, t_basepa;
3278 	int		 len;
3279 	caddr_t		 bp, wp;
3280 	uint_t		*p, *q;
3281 	sbd_error_t	*err;
3282 	tte_t		*tte;
3283 	drmach_copy_rename_t *cr;
3284 
3285 	if (!DRMACH_IS_MEM_ID(s_id))
3286 		return (drerr_new(0, ESTC_INAPPROP, NULL));
3287 	if (!DRMACH_IS_MEM_ID(t_id))
3288 		return (drerr_new(0, ESTC_INAPPROP, NULL));
3289 	s_mp = s_id;
3290 	t_mp = t_id;
3291 
3292 	/* get starting physical address of target memory */
3293 	err = drmach_mem_get_base_physaddr(t_id, &t_basepa);
3294 	if (err)
3295 		return (err);
3296 
3297 	/* calculate slice offset mask from slice size */
3298 	off_mask = DRMACH_MEM_SLICE_SIZE - 1;
3299 
3300 	/* calculate source and target base pa */
3301 	s_copybasepa = c_ml->address;
3302 	t_copybasepa = t_basepa + ((c_ml->address & off_mask) - t_slice_offset);
3303 
3304 	/* paranoia */
3305 	ASSERT((c_ml->address & off_mask) >= t_slice_offset);
3306 
3307 	/* adjust copy memlist addresses to be relative to copy base pa */
3308 	x_ml = c_ml;
3309 	while (x_ml != NULL) {
3310 		x_ml->address -= s_copybasepa;
3311 		x_ml = x_ml->next;
3312 	}
3313 
3314 #ifdef DEBUG
3315 	{
3316 	uint64_t s_basepa, s_size, t_size;
3317 
3318 	x_ml = c_ml;
3319 	while (x_ml->next != NULL)
3320 		x_ml = x_ml->next;
3321 
3322 	DRMACH_PR("source copy span: base pa 0x%lx, end pa 0x%lx\n",
3323 		s_copybasepa,
3324 		s_copybasepa + x_ml->address + x_ml->size);
3325 
3326 	DRMACH_PR("target copy span: base pa 0x%lx, end pa 0x%lx\n",
3327 		t_copybasepa,
3328 		t_copybasepa + x_ml->address + x_ml->size);
3329 
3330 	DRMACH_PR("copy memlist (relative to copy base pa):\n");
3331 	DRMACH_MEMLIST_DUMP(c_ml);
3332 
3333 	err = drmach_mem_get_base_physaddr(s_id, &s_basepa);
3334 	ASSERT(err == NULL);
3335 
3336 	err = drmach_mem_get_size(s_id, &s_size);
3337 	ASSERT(err == NULL);
3338 
3339 	err = drmach_mem_get_size(t_id, &t_size);
3340 	ASSERT(err == NULL);
3341 
3342 	DRMACH_PR("current source base pa 0x%lx, size 0x%lx\n",
3343 		s_basepa, s_size);
3344 	DRMACH_PR("current target base pa 0x%lx, size 0x%lx\n",
3345 		t_basepa, t_size);
3346 	}
3347 #endif /* DEBUG */
3348 
3349 	/* Map in appropriate cpu sram page */
3350 	tte = &drmach_cpu_sram_tte[CPU->cpu_id];
3351 	ASSERT(TTE_IS_VALID(tte) && TTE_IS_8K(tte) &&
3352 	    TTE_IS_PRIVILEGED(tte) && TTE_IS_LOCKED(tte));
3353 	sfmmu_dtlb_ld(drmach_cpu_sram_va, KCONTEXT, tte);
3354 	sfmmu_itlb_ld(drmach_cpu_sram_va, KCONTEXT, tte);
3355 
3356 	bp = wp = drmach_cpu_sram_va;
3357 
3358 	/* Make sure the rename routine will fit */
3359 	len = (ptrdiff_t)drmach_rename_end - (ptrdiff_t)drmach_rename;
3360 	ASSERT(wp + len < bp + PAGESIZE);
3361 
3362 	/* copy text. standard bcopy not designed to work in nc space */
3363 	p = (uint_t *)wp;
3364 	q = (uint_t *)drmach_rename;
3365 	while (q < (uint_t *)drmach_rename_end)
3366 		*p++ = *q++;
3367 
3368 	/* zero remainder. standard bzero not designed to work in nc space */
3369 	while (p < (uint_t *)(bp + PAGESIZE))
3370 		*p++ = 0;
3371 
3372 	DRMACH_PR("drmach_rename function 0x%p, len %d\n", wp, len);
3373 	wp += (len + 15) & ~15;
3374 
3375 	err = drmach_prep_rename_script(s_mp, t_mp, t_slice_offset,
3376 		wp, PAGESIZE - (wp - bp));
3377 	if (err) {
3378 cleanup:
3379 		xt_one(CPU->cpu_id, vtag_flushpage_tl1,
3380 			(uint64_t)drmach_cpu_sram_va, (uint64_t)KCONTEXT);
3381 		return (err);
3382 	}
3383 
3384 	/* disable and flush CDC */
3385 	if (axq_cdc_disable_flush_all() != DDI_SUCCESS) {
3386 		axq_cdc_enable_all();	/* paranoia */
3387 		err = DRMACH_INTERNAL_ERROR();
3388 		goto cleanup;
3389 	}
3390 
3391 	/* mark both memory units busy */
3392 	t_mp->dev.busy++;
3393 	s_mp->dev.busy++;
3394 
3395 	cr = vmem_alloc(static_alloc_arena, sizeof (drmach_copy_rename_t),
3396 	    VM_SLEEP);
3397 	cr->isa = (void *)drmach_copy_rename_init;
3398 	cr->data = wp;
3399 	cr->c_ml = c_ml;
3400 	cr->s_mp = s_mp;
3401 	cr->t_mp = t_mp;
3402 	cr->s_copybasepa = s_copybasepa;
3403 	cr->t_copybasepa = t_copybasepa;
3404 	cr->ecode = DRMACH_CR_OK;
3405 
3406 	mutex_enter(&drmach_slice_table_lock);
3407 
3408 	mutex_enter(&drmach_xt_mb_lock);
3409 	bzero((void *)drmach_xt_mb, drmach_xt_mb_size);
3410 
3411 	if (DRMACH_L1_SET_LPA(s_mp->dev.bp) && drmach_reprogram_lpa) {
3412 		drmach_prep_xt_mb_for_slice_update(s_mp->dev.bp,
3413 			DRMACH_PA_TO_SLICE(t_copybasepa));
3414 	}
3415 	if (DRMACH_L1_SET_LPA(t_mp->dev.bp) && drmach_reprogram_lpa) {
3416 		drmach_prep_xt_mb_for_slice_update(t_mp->dev.bp,
3417 			DRMACH_PA_TO_SLICE(s_copybasepa));
3418 	}
3419 
3420 	*cr_id = cr;
3421 	return (NULL);
3422 }
3423 
3424 int drmach_rename_count;
3425 int drmach_rename_ntries;
3426 
3427 sbd_error_t *
3428 drmach_copy_rename_fini(drmachid_t id)
3429 {
3430 	drmach_copy_rename_t	*cr = id;
3431 	sbd_error_t		*err = NULL;
3432 	dr_mbox_msg_t		*obufp;
3433 
3434 	ASSERT(cr->isa == (void *)drmach_copy_rename_init);
3435 
3436 	axq_cdc_enable_all();
3437 
3438 	xt_one(CPU->cpu_id, vtag_flushpage_tl1,
3439 		(uint64_t)drmach_cpu_sram_va, (uint64_t)KCONTEXT);
3440 
3441 	switch (cr->ecode) {
3442 	case DRMACH_CR_OK:
3443 		break;
3444 	case DRMACH_CR_MC_IDLE_ERR: {
3445 		dev_info_t	*dip = NULL;
3446 		drmach_mem_t	*mp = (drmach_mem_t *)cr->earg;
3447 		char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3448 
3449 		ASSERT(DRMACH_IS_MEM_ID(mp));
3450 
3451 		err = drmach_get_dip(mp, &dip);
3452 
3453 		ASSERT(err == NULL);
3454 		ASSERT(dip != NULL);
3455 
3456 		err = drerr_new(0, ESBD_MEMFAIL, NULL);
3457 		(void) ddi_pathname(dip, path);
3458 		cmn_err(CE_WARN, "failed to idle memory controller %s on %s: "
3459 		    "copy-rename aborted", path, mp->dev.bp->cm.name);
3460 		kmem_free(path, MAXPATHLEN);
3461 		break;
3462 	}
3463 	case DRMACH_CR_IOPAUSE_ERR:
3464 		ASSERT((uintptr_t)cr->earg >= 0 &&
3465 		    (uintptr_t)cr->earg < AXQ_MAX_EXP);
3466 
3467 		err = drerr_new(0,  ESBD_SUSPEND, "EX%d", (uintptr_t)cr->earg);
3468 		cmn_err(CE_WARN, "failed to idle EX%ld AXQ slot1 activity prior"
3469 		    " to copy-rename", (uintptr_t)cr->earg);
3470 		break;
3471 	case DRMACH_CR_ONTRAP_ERR:
3472 		err = drerr_new(0, ESBD_MEMFAIL, NULL);
3473 		cmn_err(CE_WARN, "copy-rename aborted due to uncorrectable "
3474 		    "memory error");
3475 		break;
3476 	default:
3477 		err = DRMACH_INTERNAL_ERROR();
3478 		cmn_err(CE_WARN, "unknown copy-rename error code (%d)\n",
3479 		    cr->ecode);
3480 		break;
3481 	}
3482 
3483 #ifdef DEBUG
3484 	if ((DRMACH_L1_SET_LPA(cr->s_mp->dev.bp) ||
3485 	    DRMACH_L1_SET_LPA(cr->t_mp->dev.bp)) && drmach_reprogram_lpa) {
3486 		int	i;
3487 		for (i = 0; i < NCPU; i++) {
3488 			if (drmach_xt_mb[i])
3489 				DRMACH_PR("cpu%d ignored drmach_xt_mb", i);
3490 		}
3491 	}
3492 #endif
3493 	mutex_exit(&drmach_xt_mb_lock);
3494 
3495 	if (cr->c_ml != NULL)
3496 		memlist_delete(cr->c_ml);
3497 
3498 	cr->t_mp->dev.busy--;
3499 	cr->s_mp->dev.busy--;
3500 
3501 	if (err) {
3502 		mutex_exit(&drmach_slice_table_lock);
3503 		goto done;
3504 	}
3505 
3506 	/* update casm shadow for target and source board */
3507 	drmach_slice_table_update(cr->t_mp->dev.bp, 0);
3508 	drmach_slice_table_update(cr->s_mp->dev.bp, 0);
3509 	mutex_exit(&drmach_slice_table_lock);
3510 
3511 	mutex_enter(&drmach_bus_sync_lock);
3512 	drmach_bus_sync_list_update();
3513 	mutex_exit(&drmach_bus_sync_lock);
3514 
3515 	/*
3516 	 * Make a good-faith effort to notify the SC about the copy-rename, but
3517 	 * don't worry if it fails, since a subsequent claim/unconfig/unclaim
3518 	 * will duplicate the update.
3519 	 */
3520 	obufp = kmem_zalloc(sizeof (dr_mbox_msg_t), KM_SLEEP);
3521 	mutex_enter(&drmach_slice_table_lock);
3522 	drmach_msg_memslice_init(obufp->msgdata.dm_uc.mem_slice);
3523 	drmach_msg_memregs_init(obufp->msgdata.dm_uc.mem_regs);
3524 	mutex_exit(&drmach_slice_table_lock);
3525 	(void) drmach_mbox_trans(DRMSG_UNCONFIG, cr->s_mp->dev.bp->bnum,
3526 		(caddr_t)obufp, sizeof (dr_mbox_msg_t), (caddr_t)NULL, 0);
3527 	kmem_free(obufp, sizeof (dr_mbox_msg_t));
3528 
3529 done:
3530 	vmem_free(static_alloc_arena, cr, sizeof (drmach_copy_rename_t));
3531 
3532 	DRMACH_PR("waited %d out of %d tries for drmach_rename_wait on %d cpus",
3533 		drmach_rename_ntries, drmach_cpu_ntries, drmach_rename_count);
3534 
3535 	return (err);
3536 }
3537 
3538 int drmach_slow_copy = 0;
3539 
3540 void
3541 drmach_copy_rename(drmachid_t id)
3542 {
3543 	extern uint_t		 getpstate(void);
3544 	extern void		 setpstate(uint_t);
3545 
3546 	extern xcfunc_t		 drmach_rename_wait;
3547 	extern xcfunc_t		 drmach_rename_done;
3548 	extern xcfunc_t		 drmach_rename_abort;
3549 
3550 	drmach_copy_rename_t	*cr = id;
3551 	uint64_t		 neer;
3552 	struct memlist		*ml;
3553 	int			 i, count;
3554 	int			 csize, lnsize;
3555 	uint64_t		 caddr;
3556 	cpuset_t		 cpuset;
3557 	uint_t			 pstate;
3558 	uint32_t		 exp = 0;
3559 	on_trap_data_t		 otd;
3560 	xcfunc_t		*drmach_end_wait_xcall = drmach_rename_done;
3561 
3562 	ASSERT(cr->isa == (void *)drmach_copy_rename_init);
3563 	ASSERT(MUTEX_HELD(&cpu_lock));
3564 	ASSERT(cr->ecode == DRMACH_CR_OK);
3565 
3566 	/*
3567 	 * Prevent slot1 IO from accessing Safari memory bus.
3568 	 */
3569 	if (axq_iopause_enable_all(&exp) != DDI_SUCCESS) {
3570 		ASSERT(exp >= 0 && exp < AXQ_MAX_EXP);
3571 		cr->ecode = DRMACH_CR_IOPAUSE_ERR;
3572 		cr->earg = (void *)(uintptr_t)exp;
3573 		return;
3574 	}
3575 
3576 	cpuset = cpu_ready_set;
3577 	CPUSET_DEL(cpuset, CPU->cpu_id);
3578 	count = ncpus - 1;
3579 	drmach_rename_count = count;	/* for debug */
3580 
3581 	drmach_xt_ready = 0;
3582 	xt_some(cpuset, drmach_rename_wait, NULL, NULL);
3583 
3584 	for (i = 0; i < drmach_cpu_ntries; i++) {
3585 		if (drmach_xt_ready == count)
3586 			break;
3587 		DELAY(drmach_cpu_delay);
3588 	}
3589 
3590 	drmach_rename_ntries = i;	/* for debug */
3591 
3592 	drmach_xt_ready = 0;		/* steal the line back */
3593 	for (i = 0; i < NCPU; i++)	/* steal the line back, preserve data */
3594 		drmach_xt_mb[i] = drmach_xt_mb[i];
3595 
3596 	caddr = drmach_iocage_paddr;
3597 	csize = cpunodes[CPU->cpu_id].ecache_size;
3598 	lnsize = cpunodes[CPU->cpu_id].ecache_linesize;
3599 
3600 	/* disable CE reporting */
3601 	neer = get_error_enable();
3602 	set_error_enable(neer & ~EN_REG_CEEN);
3603 
3604 	/* disable interrupts (paranoia) */
3605 	pstate = getpstate();
3606 	setpstate(pstate & ~PSTATE_IE);
3607 
3608 	/*
3609 	 * Execute copy-rename under on_trap to protect against a panic due
3610 	 * to an uncorrectable error. Instead, DR will abort the copy-rename
3611 	 * operation and rely on the OS to do the error reporting.
3612 	 *
3613 	 * In general, trap handling on any cpu once the copy begins
3614 	 * can result in an inconsistent memory image on the target.
3615 	 */
3616 	if (on_trap(&otd, OT_DATA_EC)) {
3617 		cr->ecode = DRMACH_CR_ONTRAP_ERR;
3618 		goto copy_rename_end;
3619 	}
3620 
3621 	/*
3622 	 * DO COPY.
3623 	 */
3624 	for (ml = cr->c_ml; ml; ml = ml->next) {
3625 		uint64_t	s_pa, t_pa;
3626 		uint64_t	nbytes;
3627 
3628 		s_pa = cr->s_copybasepa + ml->address;
3629 		t_pa = cr->t_copybasepa + ml->address;
3630 		nbytes = ml->size;
3631 
3632 		while (nbytes != 0ull) {
3633 			/* copy 32 bytes at src_pa to dst_pa */
3634 			bcopy32_il(s_pa, t_pa);
3635 
3636 			/* increment by 32 bytes */
3637 			s_pa += (4 * sizeof (uint64_t));
3638 			t_pa += (4 * sizeof (uint64_t));
3639 
3640 			/* decrement by 32 bytes */
3641 			nbytes -= (4 * sizeof (uint64_t));
3642 
3643 			if (drmach_slow_copy) {	/* for debug */
3644 				uint64_t i = 13 * 50;
3645 				while (i--);
3646 			}
3647 		}
3648 	}
3649 
3650 	/*
3651 	 * XXX CHEETAH SUPPORT
3652 	 * For cheetah, we need to grab the iocage lock since iocage
3653 	 * memory is used for e$ flush.
3654 	 *
3655 	 * NOTE: This code block is dangerous at this point in the
3656 	 * copy-rename operation. It modifies memory after the copy
3657 	 * has taken place which means that any persistent state will
3658 	 * be abandoned after the rename operation. The code is also
3659 	 * performing thread synchronization at a time when all but
3660 	 * one processors are paused. This is a potential deadlock
3661 	 * situation.
3662 	 *
3663 	 * This code block must be moved to drmach_copy_rename_init.
3664 	 */
3665 	if (drmach_is_cheetah) {
3666 		mutex_enter(&drmach_iocage_lock);
3667 		while (drmach_iocage_is_busy)
3668 			cv_wait(&drmach_iocage_cv, &drmach_iocage_lock);
3669 		drmach_iocage_is_busy = 1;
3670 		drmach_iocage_mem_scrub(ecache_size * 2);
3671 		mutex_exit(&drmach_iocage_lock);
3672 	}
3673 
3674 	/*
3675 	 * bcopy32_il is implemented as a series of ldxa/stxa via
3676 	 * ASI_MEM instructions. Following the copy loop, the E$
3677 	 * of the master (this) processor will have lines in state
3678 	 * O that correspond to lines of home memory in state gI.
3679 	 * An E$ flush is necessary to commit these lines before
3680 	 * proceeding with the rename operation.
3681 	 *
3682 	 * Flushing the E$ will automatically flush the W$, but
3683 	 * the D$ and I$ must be flushed separately and explicitly.
3684 	 */
3685 	flush_ecache_il(caddr, csize, lnsize);	/* inline version */
3686 
3687 	/*
3688 	 * Each line of home memory is now in state gM, except in
3689 	 * the case of a cheetah processor when the E$ flush area
3690 	 * is included within the copied region. In such a case,
3691 	 * the lines of home memory for the upper half of the
3692 	 * flush area are in state gS.
3693 	 *
3694 	 * Each line of target memory is in state gM.
3695 	 *
3696 	 * Each line of this processor's E$ is in state I, except
3697 	 * those of a cheetah processor. All lines of a cheetah
3698 	 * processor's E$ are in state S and correspond to the lines
3699 	 * in upper half of the E$ flush area.
3700 	 *
3701 	 * It is vital at this point that none of the lines in the
3702 	 * home or target memories are in state gI and that none
3703 	 * of the lines in this processor's E$ are in state O or Os.
3704 	 * A single instance of such a condition will cause loss of
3705 	 * coherency following the rename operation.
3706 	 */
3707 
3708 	/*
3709 	 * Rename
3710 	 */
3711 	(*(void(*)())drmach_cpu_sram_va)(cr->data, &cr->ecode, &cr->earg);
3712 
3713 	/*
3714 	 * Rename operation complete. The physical address space
3715 	 * of the home and target memories have been swapped, the
3716 	 * routing data in the respective CASM entries have been
3717 	 * swapped, and LPA settings in the processor and schizo
3718 	 * devices have been reprogrammed accordingly.
3719 	 *
3720 	 * In the case of a cheetah processor, the E$ remains
3721 	 * populated with lines in state S that correspond to the
3722 	 * lines in the former home memory. Now that the physical
3723 	 * addresses have been swapped, these E$ lines correspond
3724 	 * to lines in the new home memory which are in state gM.
3725 	 * This combination is invalid. An additional E$ flush is
3726 	 * necessary to restore coherency. The E$ flush will cause
3727 	 * the lines of the new home memory for the flush region
3728 	 * to transition from state gM to gS. The former home memory
3729 	 * remains unmodified. This additional E$ flush has no effect
3730 	 * on a cheetah+ processor.
3731 	 */
3732 	flush_ecache_il(caddr, csize, lnsize);	/* inline version */
3733 
3734 	/*
3735 	 * The D$ and I$ must be flushed to ensure that coherency is
3736 	 * maintained. Any line in a cache that is in the valid
3737 	 * state has its corresponding line of the new home memory
3738 	 * in the gM state. This is an invalid condition. When the
3739 	 * flushes are complete the cache line states will be
3740 	 * resynchronized with those in the new home memory.
3741 	 */
3742 	flush_icache_il();			/* inline version */
3743 	flush_dcache_il();			/* inline version */
3744 	flush_pcache_il();			/* inline version */
3745 
3746 copy_rename_end:
3747 
3748 	no_trap();
3749 
3750 	/* enable interrupts */
3751 	setpstate(pstate);
3752 
3753 	/* enable CE reporting */
3754 	set_error_enable(neer);
3755 
3756 	if (cr->ecode != DRMACH_CR_OK)
3757 		drmach_end_wait_xcall = drmach_rename_abort;
3758 
3759 	/*
3760 	 * XXX CHEETAH SUPPORT
3761 	 */
3762 	if (drmach_is_cheetah) {
3763 		mutex_enter(&drmach_iocage_lock);
3764 		drmach_iocage_mem_scrub(ecache_size * 2);
3765 		drmach_iocage_is_busy = 0;
3766 		cv_signal(&drmach_iocage_cv);
3767 		mutex_exit(&drmach_iocage_lock);
3768 	}
3769 
3770 	axq_iopause_disable_all();
3771 
3772 	xt_some(cpuset, drmach_end_wait_xcall, NULL, NULL);
3773 }
3774 
3775 static void drmach_io_dispose(drmachid_t);
3776 static sbd_error_t *drmach_io_release(drmachid_t);
3777 static sbd_error_t *drmach_io_status(drmachid_t, drmach_status_t *);
3778 
3779 static sbd_error_t *
3780 drmach_pci_new(drmach_device_t *proto, drmachid_t *idp)
3781 {
3782 	drmach_node_t	*node = proto->node;
3783 	sbd_error_t	*err;
3784 	drmach_reg_t	 regs[3];
3785 	int		 rv;
3786 	int		 len = 0;
3787 
3788 	rv = node->n_getproplen(node, "reg", &len);
3789 	if (rv != 0 || len != sizeof (regs)) {
3790 		sbd_error_t *err;
3791 
3792 		/* pci nodes are expected to have regs */
3793 		err = drerr_new(1, ESTC_GETPROP,
3794 			"Device Node 0x%x: property %s",
3795 			(uint_t)node->get_dnode(node), "reg");
3796 		return (err);
3797 	}
3798 
3799 	rv = node->n_getprop(node, "reg", (void *)regs, sizeof (regs));
3800 	if (rv) {
3801 		sbd_error_t *err;
3802 
3803 		err = drerr_new(1, ESTC_GETPROP,
3804 			"Device Node 0x%x: property %s",
3805 			(uint_t)node->get_dnode(node), "reg");
3806 
3807 		return (err);
3808 	}
3809 
3810 	/*
3811 	 * Fix up unit number so that Leaf A has a lower unit number
3812 	 * than Leaf B.
3813 	 */
3814 	if ((proto->portid % 2) != 0) {
3815 		if ((regs[0].reg_addr_lo & 0x700000) == 0x700000)
3816 			proto->unum = 0;
3817 		else
3818 			proto->unum = 1;
3819 	} else {
3820 		if ((regs[0].reg_addr_lo & 0x700000) == 0x700000)
3821 			proto->unum = 2;
3822 		else
3823 			proto->unum = 3;
3824 	}
3825 
3826 	err = drmach_io_new(proto, idp);
3827 	if (err == NULL) {
3828 		drmach_io_t *self = *idp;
3829 
3830 		/* reassemble 64-bit base address */
3831 		self->scsr_pa  = (uint64_t)regs[1].reg_addr_hi << 32;
3832 		self->scsr_pa |= (uint64_t)regs[1].reg_addr_lo;
3833 	}
3834 
3835 	return (err);
3836 }
3837 
3838 static sbd_error_t *
3839 drmach_io_new(drmach_device_t *proto, drmachid_t *idp)
3840 {
3841 	drmach_io_t	*ip;
3842 
3843 	ip = kmem_zalloc(sizeof (drmach_io_t), KM_SLEEP);
3844 	bcopy(proto, &ip->dev, sizeof (ip->dev));
3845 	ip->dev.node = drmach_node_dup(proto->node);
3846 	ip->dev.cm.isa = (void *)drmach_io_new;
3847 	ip->dev.cm.dispose = drmach_io_dispose;
3848 	ip->dev.cm.release = drmach_io_release;
3849 	ip->dev.cm.status = drmach_io_status;
3850 
3851 	snprintf(ip->dev.cm.name, sizeof (ip->dev.cm.name), "%s%d",
3852 		ip->dev.type, ip->dev.unum);
3853 
3854 	*idp = (drmachid_t)ip;
3855 	return (NULL);
3856 }
3857 
3858 static void
3859 drmach_io_dispose(drmachid_t id)
3860 {
3861 	drmach_io_t *self;
3862 
3863 	ASSERT(DRMACH_IS_IO_ID(id));
3864 
3865 	self = id;
3866 	if (self->dev.node)
3867 		drmach_node_dispose(self->dev.node);
3868 
3869 	kmem_free(self, sizeof (*self));
3870 }
3871 
3872 /*ARGSUSED*/
3873 sbd_error_t *
3874 drmach_pre_op(int cmd, drmachid_t id, drmach_opts_t *opts)
3875 {
3876 	drmach_board_t	*bp = (drmach_board_t *)id;
3877 	sbd_error_t	*err = NULL;
3878 
3879 	if (id && DRMACH_IS_BOARD_ID(id)) {
3880 		switch (cmd) {
3881 			case SBD_CMD_TEST:
3882 			case SBD_CMD_STATUS:
3883 			case SBD_CMD_GETNCM:
3884 				break;
3885 			case SBD_CMD_CONNECT:
3886 				if (bp->connected)
3887 					err = drerr_new(0, ESBD_STATE, NULL);
3888 
3889 				if (bp->cond == SBD_COND_UNUSABLE)
3890 					err = drerr_new(0,
3891 						ESBD_FATAL_STATE, NULL);
3892 				break;
3893 			case SBD_CMD_DISCONNECT:
3894 				if (!bp->connected)
3895 					err = drerr_new(0, ESBD_STATE, NULL);
3896 
3897 				if (bp->cond == SBD_COND_UNUSABLE)
3898 					err = drerr_new(0,
3899 						ESBD_FATAL_STATE, NULL);
3900 				break;
3901 			default:
3902 				if (bp->cond == SBD_COND_UNUSABLE)
3903 					err = drerr_new(0,
3904 						ESBD_FATAL_STATE, NULL);
3905 				break;
3906 
3907 		}
3908 	}
3909 
3910 	return (err);
3911 }
3912 
3913 /*ARGSUSED*/
3914 sbd_error_t *
3915 drmach_post_op(int cmd, drmachid_t id, drmach_opts_t *opts)
3916 {
3917 	return (NULL);
3918 }
3919 
3920 sbd_error_t *
3921 drmach_board_assign(int bnum, drmachid_t *id)
3922 {
3923 	sbd_error_t	*err = NULL;
3924 	caddr_t		obufp;
3925 
3926 	if (!drmach_initialized && drmach_init() == -1) {
3927 		err = DRMACH_INTERNAL_ERROR();
3928 	}
3929 
3930 	rw_enter(&drmach_boards_rwlock, RW_WRITER);
3931 
3932 	if (!err) {
3933 		if (drmach_array_get(drmach_boards, bnum, id) == -1) {
3934 			err = drerr_new(0, ESTC_BNUM, "%d", bnum);
3935 		} else {
3936 			drmach_board_t	*bp;
3937 
3938 			if (*id)
3939 				rw_downgrade(&drmach_boards_rwlock);
3940 
3941 			obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
3942 			err = drmach_mbox_trans(DRMSG_ASSIGN, bnum, obufp,
3943 				sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
3944 			kmem_free(obufp, sizeof (dr_proto_hdr_t));
3945 
3946 			if (!err) {
3947 				bp = *id;
3948 				if (!*id)
3949 					bp = *id  =
3950 					    (drmachid_t)drmach_board_new(bnum);
3951 				bp->assigned = 1;
3952 			}
3953 		}
3954 	}
3955 	rw_exit(&drmach_boards_rwlock);
3956 	return (err);
3957 }
3958 
3959 static uint_t
3960 drmach_board_non_panther_cpus(gdcd_t *gdcd, uint_t exp, uint_t slot)
3961 {
3962 	uint_t	port, port_start, port_end;
3963 	uint_t	non_panther_cpus = 0;
3964 	uint_t	impl;
3965 
3966 	ASSERT(gdcd != NULL);
3967 
3968 	/*
3969 	 * Determine PRD port indices based on slot location.
3970 	 */
3971 	switch (slot) {
3972 	case 0:
3973 		port_start = 0;
3974 		port_end = 3;
3975 		break;
3976 	case 1:
3977 		port_start = 4;
3978 		port_end = 5;
3979 		break;
3980 	default:
3981 		ASSERT(0);
3982 		/* check all */
3983 		port_start = 0;
3984 		port_end = 5;
3985 		break;
3986 	}
3987 
3988 	for (port = port_start; port <= port_end; port++) {
3989 		if (gdcd->dcd_prd[exp][port].prd_ptype == SAFPTYPE_CPU &&
3990 		    RSV_GOOD(gdcd->dcd_prd[exp][port].prd_prsv)) {
3991 			/*
3992 			 * This Safari port passed POST and represents a
3993 			 * cpu, so check the implementation.
3994 			 */
3995 			impl = (gdcd->dcd_prd[exp][port].prd_ver_reg >> 32)
3996 			    & 0xffff;
3997 
3998 			switch (impl) {
3999 			case CHEETAH_IMPL:
4000 			case CHEETAH_PLUS_IMPL:
4001 			case JAGUAR_IMPL:
4002 				non_panther_cpus++;
4003 				break;
4004 			case PANTHER_IMPL:
4005 				break;
4006 			default:
4007 				ASSERT(0);
4008 				non_panther_cpus++;
4009 				break;
4010 			}
4011 		}
4012 	}
4013 
4014 	DRMACH_PR("drmach_board_non_panther_cpus: exp=%d, slot=%d, "
4015 	    "non_panther_cpus=%d", exp, slot, non_panther_cpus);
4016 
4017 	return (non_panther_cpus);
4018 }
4019 
4020 sbd_error_t *
4021 drmach_board_connect(drmachid_t id, drmach_opts_t *opts)
4022 {
4023 	_NOTE(ARGUNUSED(opts))
4024 
4025 	drmach_board_t		*bp = (drmach_board_t *)id;
4026 	sbd_error_t		*err;
4027 	dr_mbox_msg_t		*obufp;
4028 	gdcd_t			*gdcd = NULL;
4029 	uint_t			exp, slot;
4030 	sc_gptwocfg_cookie_t	scc;
4031 	int			panther_pages_enabled;
4032 
4033 	if (!DRMACH_IS_BOARD_ID(id))
4034 		return (drerr_new(0, ESTC_INAPPROP, NULL));
4035 
4036 	/*
4037 	 * Build the casm info portion of the CLAIM message.
4038 	 */
4039 	obufp = kmem_zalloc(sizeof (dr_mbox_msg_t), KM_SLEEP);
4040 	mutex_enter(&drmach_slice_table_lock);
4041 	drmach_msg_memslice_init(obufp->msgdata.dm_cr.mem_slice);
4042 	drmach_msg_memregs_init(obufp->msgdata.dm_cr.mem_regs);
4043 	mutex_exit(&drmach_slice_table_lock);
4044 	err = drmach_mbox_trans(DRMSG_CLAIM, bp->bnum, (caddr_t)obufp,
4045 		sizeof (dr_mbox_msg_t), (caddr_t)NULL, 0);
4046 	kmem_free(obufp, sizeof (dr_mbox_msg_t));
4047 
4048 	if (err) {
4049 		/*
4050 		 * if mailbox timeout or unrecoverable error from SC,
4051 		 * board cannot be touched.  Mark the status as
4052 		 * unusable.
4053 		 */
4054 		if ((err->e_code == ESTC_SMS_ERR_UNRECOVERABLE) ||
4055 			(err->e_code == ESTC_MBXRPLY))
4056 				bp->cond = SBD_COND_UNUSABLE;
4057 		return (err);
4058 	}
4059 
4060 	gdcd = drmach_gdcd_new();
4061 	if (gdcd == NULL) {
4062 		cmn_err(CE_WARN, "failed to read GDCD info for %s\n",
4063 		    bp->cm.name);
4064 		return (DRMACH_INTERNAL_ERROR());
4065 	}
4066 
4067 	/*
4068 	 * Read CPU SRAM DR buffer offset from GDCD.
4069 	 */
4070 	exp = DRMACH_BNUM2EXP(bp->bnum);
4071 	slot = DRMACH_BNUM2SLOT(bp->bnum);
4072 	bp->stardrb_offset =
4073 	    gdcd->dcd_slot[exp][slot].l1ss_cpu_drblock_xwd_offset << 3;
4074 	DRMACH_PR("%s: stardrb_offset=0x%lx\n", bp->cm.name,
4075 	    bp->stardrb_offset);
4076 
4077 	/*
4078 	 * Read board LPA setting from GDCD.
4079 	 */
4080 	bp->flags &= ~DRMACH_NULL_PROC_LPA;
4081 	if (gdcd->dcd_slot[exp][slot].l1ss_flags &
4082 	    L1SSFLG_THIS_L1_NULL_PROC_LPA) {
4083 		bp->flags |= DRMACH_NULL_PROC_LPA;
4084 		DRMACH_PR("%s: NULL proc LPA\n", bp->cm.name);
4085 	}
4086 
4087 	/*
4088 	 * XXX Until the Solaris large pages support heterogeneous cpu
4089 	 * domains, DR needs to prevent the addition of non-Panther cpus
4090 	 * to an all-Panther domain with large pages enabled.
4091 	 */
4092 	panther_pages_enabled = (page_num_pagesizes() > DEFAULT_MMU_PAGE_SIZES);
4093 	if (drmach_board_non_panther_cpus(gdcd, exp, slot) > 0 &&
4094 	    panther_pages_enabled && drmach_large_page_restriction) {
4095 		cmn_err(CE_WARN, "Domain shutdown is required to add a non-"
4096 		    "UltraSPARC-IV+ board into an all UltraSPARC-IV+ domain");
4097 		err = drerr_new(0, ESTC_SUPPORT, NULL);
4098 	}
4099 
4100 	if (err == NULL) {
4101 		/* do saf configurator stuff */
4102 		DRMACH_PR("calling sc_probe_board for bnum=%d\n", bp->bnum);
4103 		scc = sc_probe_board(bp->bnum);
4104 		if (scc == NULL)
4105 			err = drerr_new(0, ESTC_PROBE, bp->cm.name);
4106 	}
4107 
4108 	if (err) {
4109 		/* flush CDC srams */
4110 		if (axq_cdc_flush_all() != DDI_SUCCESS) {
4111 			goto out;
4112 		}
4113 
4114 		/*
4115 		 * Build the casm info portion of the UNCLAIM message.
4116 		 */
4117 		obufp = kmem_zalloc(sizeof (dr_mbox_msg_t), KM_SLEEP);
4118 		mutex_enter(&drmach_slice_table_lock);
4119 		drmach_msg_memslice_init(obufp->msgdata.dm_ur.mem_slice);
4120 		drmach_msg_memregs_init(obufp->msgdata.dm_ur.mem_regs);
4121 		mutex_exit(&drmach_slice_table_lock);
4122 		(void) drmach_mbox_trans(DRMSG_UNCLAIM, bp->bnum,
4123 			(caddr_t)obufp, sizeof (dr_mbox_msg_t),
4124 			(caddr_t)NULL, 0);
4125 
4126 		kmem_free(obufp, sizeof (dr_mbox_msg_t));
4127 
4128 		/*
4129 		 * we clear the connected flag just in case it would have
4130 		 * been set by a concurrent drmach_board_status() thread
4131 		 * before the UNCLAIM completed.
4132 		 */
4133 		bp->connected = 0;
4134 		goto out;
4135 	}
4136 
4137 	/*
4138 	 * Now that the board has been successfully attached, obtain
4139 	 * platform-specific DIMM serial id information for the board.
4140 	 */
4141 	if ((DRMACH_BNUM2SLOT(bp->bnum) == 0) &&
4142 	    plat_ecc_capability_sc_get(PLAT_ECC_DIMM_SID_MESSAGE)) {
4143 		(void) plat_request_mem_sids(DRMACH_BNUM2EXP(bp->bnum));
4144 	}
4145 
4146 out:
4147 	if (gdcd != NULL)
4148 		drmach_gdcd_dispose(gdcd);
4149 
4150 	return (err);
4151 }
4152 
4153 static void
4154 drmach_slice_table_update(drmach_board_t *bp, int invalidate)
4155 {
4156 	static char		*axq_name = "address-extender-queue";
4157 	static dev_info_t	*axq_dip = NULL;
4158 	static int		 axq_exp = -1;
4159 	static int		 axq_slot;
4160 	int			 e, s, slice;
4161 
4162 	ASSERT(MUTEX_HELD(&drmach_slice_table_lock));
4163 
4164 	e = DRMACH_BNUM2EXP(bp->bnum);
4165 	if (invalidate) {
4166 		ASSERT(DRMACH_BNUM2SLOT(bp->bnum) == 0);
4167 
4168 		/* invalidate cached casm value */
4169 		drmach_slice_table[e] = 0;
4170 
4171 		/* invalidate cached axq info if for same exp */
4172 		if (e == axq_exp && axq_dip) {
4173 			ndi_rele_devi(axq_dip);
4174 			axq_dip = NULL;
4175 		}
4176 	}
4177 
4178 	if (axq_dip == NULL || !i_ddi_devi_attached(axq_dip)) {
4179 		int i, portid;
4180 
4181 		/* search for an attached slot0 axq instance */
4182 		for (i = 0; i < AXQ_MAX_EXP * AXQ_MAX_SLOT_PER_EXP; i++) {
4183 			if (axq_dip)
4184 				ndi_rele_devi(axq_dip);
4185 			axq_dip = ddi_find_devinfo(axq_name, i, 0);
4186 			if (axq_dip && DDI_CF2(axq_dip)) {
4187 				portid = ddi_getprop(DDI_DEV_T_ANY, axq_dip,
4188 				    DDI_PROP_DONTPASS, "portid", -1);
4189 				if (portid == -1) {
4190 					DRMACH_PR("cant get portid of axq "
4191 					    "instance %d\n", i);
4192 					continue;
4193 				}
4194 
4195 				axq_exp = (portid >> 5) & 0x1f;
4196 				axq_slot = portid & 1;
4197 
4198 				if (invalidate && axq_exp == e)
4199 					continue;
4200 
4201 				if (axq_slot == 0)
4202 					break;	/* found */
4203 			}
4204 		}
4205 
4206 		if (i == AXQ_MAX_EXP * AXQ_MAX_SLOT_PER_EXP) {
4207 			if (axq_dip) {
4208 				ndi_rele_devi(axq_dip);
4209 				axq_dip = NULL;
4210 			}
4211 			DRMACH_PR("drmach_slice_table_update: failed to "
4212 			    "update axq dip\n");
4213 			return;
4214 		}
4215 
4216 	}
4217 
4218 	ASSERT(axq_dip);
4219 	ASSERT(axq_slot == 0);
4220 
4221 	if (invalidate)
4222 		return;
4223 
4224 	s = DRMACH_BNUM2SLOT(bp->bnum);
4225 	DRMACH_PR("using AXQ casm %d.%d for slot%d.%d\n",
4226 		axq_exp, axq_slot, e, s);
4227 
4228 	/* invalidate entry */
4229 	drmach_slice_table[e] &= ~0x20;
4230 
4231 	/*
4232 	 * find a slice that routes to expander e. If no match
4233 	 * is found, drmach_slice_table[e] will remain invalid.
4234 	 *
4235 	 * The CASM is a routing table indexed by slice number.
4236 	 * Each element in the table contains permission bits,
4237 	 * a destination expander number and a valid bit. The
4238 	 * valid bit must true for the element to be meaningful.
4239 	 *
4240 	 * CASM entry structure
4241 	 *   Bits 15..6 ignored
4242 	 *   Bit  5	valid
4243 	 *   Bits 0..4	expander number
4244 	 *
4245 	 * NOTE: the for loop is really enumerating the range of slices,
4246 	 * which is ALWAYS equal to the range of expanders. Hence,
4247 	 * AXQ_MAX_EXP is okay to use in this loop.
4248 	 */
4249 	for (slice = 0; slice < AXQ_MAX_EXP; slice++) {
4250 		uint32_t casm = axq_casm_read(axq_exp, axq_slot, slice);
4251 
4252 		if ((casm & 0x20) && (casm & 0x1f) == e)
4253 			drmach_slice_table[e] = 0x20 | slice;
4254 	}
4255 }
4256 
4257 /*
4258  * Get base and bound PAs for slot 1 board lpa programming
4259  * If a cpu/mem board is present in the same expander, use slice
4260  * information corresponding to the CASM.  Otherwise, set base and
4261  * bound PAs to 0.
4262  */
4263 static void
4264 drmach_lpa_bb_get(drmach_board_t *s1bp, uint64_t *basep, uint64_t *boundp)
4265 {
4266 	drmachid_t s0id;
4267 
4268 	ASSERT(mutex_owned(&drmach_slice_table_lock));
4269 	ASSERT(DRMACH_BNUM2SLOT(s1bp->bnum) == 1);
4270 
4271 	*basep = *boundp = 0;
4272 	if (drmach_array_get(drmach_boards, s1bp->bnum - 1, &s0id) == 0 &&
4273 		s0id != 0) {
4274 
4275 		uint32_t slice;
4276 		if ((slice =
4277 			drmach_slice_table[DRMACH_BNUM2EXP(s1bp->bnum)])
4278 				& 0x20) {
4279 
4280 			*basep = DRMACH_SLICE_TO_PA(slice & DRMACH_SLICE_MASK);
4281 			*boundp = *basep + DRMACH_MEM_SLICE_SIZE;
4282 		}
4283 	}
4284 }
4285 
4286 
4287 /*
4288  * Reprogram slot 1 lpa's as required.
4289  * The purpose of this routine is maintain the LPA settings of the devices
4290  * in slot 1. To date we know Schizo and Cheetah are the only devices that
4291  * require this attention. The LPA setting must match the slice field in the
4292  * CASM element for the local expander. This field is guaranteed to be
4293  * programmed in accordance with the cacheable address space on the slot 0
4294  * board of the local expander. If no memory is present on the slot 0 board,
4295  * there is no cacheable address space and, hence, the CASM slice field will
4296  * be zero or its valid bit will be false (or both).
4297  */
4298 
4299 static void
4300 drmach_slot1_lpa_set(drmach_board_t *bp)
4301 {
4302 	drmachid_t	id;
4303 	drmach_board_t	*s1bp = NULL;
4304 	int		rv, idx, is_maxcat = 1;
4305 	uint64_t	last_scsr_pa = 0;
4306 	uint64_t	new_basepa, new_boundpa;
4307 
4308 	if (DRMACH_BNUM2SLOT(bp->bnum)) {
4309 		s1bp = bp;
4310 		if (s1bp->devices == NULL) {
4311 			DRMACH_PR("drmach...lpa_set: slot1=%d not present",
4312 				bp->bnum);
4313 			return;
4314 		}
4315 	} else {
4316 		rv = drmach_array_get(drmach_boards, bp->bnum + 1, &id);
4317 		/* nothing to do when board is not found or has no devices */
4318 		s1bp = id;
4319 		if (rv == -1 || s1bp == NULL || s1bp->devices == NULL) {
4320 			DRMACH_PR("drmach...lpa_set: slot1=%d not present",
4321 				bp->bnum + 1);
4322 			return;
4323 		}
4324 		ASSERT(DRMACH_IS_BOARD_ID(id));
4325 	}
4326 	mutex_enter(&drmach_slice_table_lock);
4327 	drmach_lpa_bb_get(s1bp, &new_basepa, &new_boundpa);
4328 	DRMACH_PR("drmach_...lpa_set: bnum=%d base=0x%lx bound=0x%lx\n",
4329 			s1bp->bnum, new_basepa, new_boundpa);
4330 
4331 	rv = drmach_array_first(s1bp->devices, &idx, &id);
4332 	while (rv == 0) {
4333 		if (DRMACH_IS_IO_ID(id)) {
4334 			drmach_io_t *io = id;
4335 
4336 			is_maxcat = 0;
4337 
4338 			/*
4339 			 * Skip all non-Schizo IO devices (only IO nodes
4340 			 * that are Schizo devices have non-zero scsr_pa).
4341 			 * Filter out "other" leaf to avoid writing to the
4342 			 * same Schizo Control/Status Register twice.
4343 			 */
4344 			if (io->scsr_pa && io->scsr_pa != last_scsr_pa) {
4345 				uint64_t scsr;
4346 
4347 				scsr  = lddphysio(io->scsr_pa);
4348 				DRMACH_PR("drmach...lpa_set: old scsr=0x%lx\n",
4349 					scsr);
4350 				scsr &= ~(DRMACH_LPA_BASE_MASK |
4351 						DRMACH_LPA_BND_MASK);
4352 				scsr |= DRMACH_PA_TO_LPA_BASE(new_basepa);
4353 				scsr |= DRMACH_PA_TO_LPA_BND(new_boundpa);
4354 
4355 				stdphysio(io->scsr_pa, scsr);
4356 				DRMACH_PR("drmach...lpa_set: new scsr=0x%lx\n",
4357 					scsr);
4358 
4359 				last_scsr_pa = io->scsr_pa;
4360 			}
4361 		}
4362 		rv = drmach_array_next(s1bp->devices, &idx, &id);
4363 	}
4364 
4365 	if (is_maxcat && DRMACH_L1_SET_LPA(s1bp) && drmach_reprogram_lpa) {
4366 		extern xcfunc_t	drmach_set_lpa;
4367 
4368 		DRMACH_PR("reprogramming maxcat lpa's");
4369 
4370 		mutex_enter(&cpu_lock);
4371 		rv = drmach_array_first(s1bp->devices, &idx, &id);
4372 		while (rv == 0 && id != NULL) {
4373 			if (DRMACH_IS_CPU_ID(id)) {
4374 				int ntries;
4375 				processorid_t cpuid;
4376 
4377 				cpuid = ((drmach_cpu_t *)id)->cpuid;
4378 
4379 				/*
4380 				 * Check for unconfigured or powered-off
4381 				 * MCPUs.  If CPU_READY flag is clear, the
4382 				 * MCPU cannot be xcalled.
4383 				 */
4384 				if ((cpu[cpuid] == NULL) ||
4385 					(cpu[cpuid]->cpu_flags &
4386 					CPU_READY) == 0) {
4387 
4388 					rv = drmach_array_next(s1bp->devices,
4389 						&idx, &id);
4390 					continue;
4391 				}
4392 
4393 				/*
4394 				 * XXX CHEETAH SUPPORT
4395 				 * for cheetah, we need to clear iocage
4396 				 * memory since it will be used for e$ flush
4397 				 * in drmach_set_lpa.
4398 				 */
4399 				if (drmach_is_cheetah) {
4400 					mutex_enter(&drmach_iocage_lock);
4401 					while (drmach_iocage_is_busy)
4402 						cv_wait(&drmach_iocage_cv,
4403 							&drmach_iocage_lock);
4404 					drmach_iocage_is_busy = 1;
4405 					drmach_iocage_mem_scrub(
4406 						ecache_size * 2);
4407 					mutex_exit(&drmach_iocage_lock);
4408 				}
4409 
4410 				/*
4411 				 * drmach_slice_table[*]
4412 				 *	bit 5	valid
4413 				 *	bit 0:4	slice number
4414 				 *
4415 				 * drmach_xt_mb[*] format for drmach_set_lpa
4416 				 *	bit 7	valid
4417 				 *	bit 6	set null LPA
4418 				 *			(overrides bits 0:4)
4419 				 *	bit 0:4	slice number
4420 				 *
4421 				 * drmach_set_lpa derives processor CBASE and
4422 				 * CBND from bits 6 and 0:4 of drmach_xt_mb.
4423 				 * If bit 6 is set, then CBASE = CBND = 0.
4424 				 * Otherwise, CBASE = slice number;
4425 				 * CBND = slice number + 1.
4426 				 * No action is taken if bit 7 is zero.
4427 				 */
4428 
4429 				mutex_enter(&drmach_xt_mb_lock);
4430 				bzero((void *)drmach_xt_mb,
4431 				    drmach_xt_mb_size);
4432 
4433 				if (new_basepa == 0 && new_boundpa == 0)
4434 					drmach_xt_mb[cpuid] = 0x80 | 0x40;
4435 				else
4436 					drmach_xt_mb[cpuid] = 0x80 |
4437 						DRMACH_PA_TO_SLICE(new_basepa);
4438 
4439 				drmach_xt_ready = 0;
4440 
4441 				xt_one(cpuid, drmach_set_lpa, NULL, NULL);
4442 
4443 				ntries = drmach_cpu_ntries;
4444 				while (!drmach_xt_ready && ntries) {
4445 					DELAY(drmach_cpu_delay);
4446 					ntries--;
4447 				}
4448 				mutex_exit(&drmach_xt_mb_lock);
4449 				drmach_xt_ready = 0;
4450 
4451 				/*
4452 				 * XXX CHEETAH SUPPORT
4453 				 * for cheetah, we need to clear iocage
4454 				 * memory since it was used for e$ flush
4455 				 * in performed drmach_set_lpa.
4456 				 */
4457 				if (drmach_is_cheetah) {
4458 					mutex_enter(&drmach_iocage_lock);
4459 					drmach_iocage_mem_scrub(
4460 						ecache_size * 2);
4461 					drmach_iocage_is_busy = 0;
4462 					cv_signal(&drmach_iocage_cv);
4463 					mutex_exit(&drmach_iocage_lock);
4464 				}
4465 			}
4466 			rv = drmach_array_next(s1bp->devices, &idx, &id);
4467 		}
4468 		mutex_exit(&cpu_lock);
4469 	}
4470 	mutex_exit(&drmach_slice_table_lock);
4471 }
4472 
4473 /*
4474  * Return the number of connected Panther boards in the domain.
4475  */
4476 static int
4477 drmach_panther_boards(void)
4478 {
4479 	int		rv;
4480 	int		b_idx;
4481 	drmachid_t	b_id;
4482 	drmach_board_t	*bp;
4483 	int		npanther = 0;
4484 
4485 	rv = drmach_array_first(drmach_boards, &b_idx, &b_id);
4486 	while (rv == 0) {
4487 		ASSERT(DRMACH_IS_BOARD_ID(b_id));
4488 		bp = b_id;
4489 
4490 		if (IS_PANTHER(bp->cpu_impl))
4491 			npanther++;
4492 
4493 		rv = drmach_array_next(drmach_boards, &b_idx, &b_id);
4494 	}
4495 
4496 	return (npanther);
4497 }
4498 
4499 /*ARGSUSED*/
4500 sbd_error_t *
4501 drmach_board_disconnect(drmachid_t id, drmach_opts_t *opts)
4502 {
4503 	drmach_board_t	*bp;
4504 	dr_mbox_msg_t	*obufp;
4505 	sbd_error_t	*err = NULL;
4506 
4507 	sc_gptwocfg_cookie_t	scc;
4508 
4509 	if (!DRMACH_IS_BOARD_ID(id))
4510 		return (drerr_new(0, ESTC_INAPPROP, NULL));
4511 	bp = id;
4512 
4513 	/*
4514 	 * Build the casm info portion of the UNCLAIM message.
4515 	 * This must be done prior to calling for saf configurator
4516 	 * deprobe, to ensure that the associated axq instance
4517 	 * is not detached.
4518 	 */
4519 	obufp = kmem_zalloc(sizeof (dr_mbox_msg_t), KM_SLEEP);
4520 	mutex_enter(&drmach_slice_table_lock);
4521 	drmach_msg_memslice_init(obufp->msgdata.dm_ur.mem_slice);
4522 
4523 	/*
4524 	 * If disconnecting slot 0 board, update the casm slice table
4525 	 * info now, for use by drmach_slot1_lpa_set()
4526 	 */
4527 	if (DRMACH_BNUM2SLOT(bp->bnum) == 0)
4528 			drmach_slice_table_update(bp, 1);
4529 
4530 	drmach_msg_memregs_init(obufp->msgdata.dm_ur.mem_regs);
4531 	mutex_exit(&drmach_slice_table_lock);
4532 
4533 	/*
4534 	 * Update LPA information for slot1 board
4535 	 */
4536 	drmach_slot1_lpa_set(bp);
4537 
4538 	/* disable and flush CDC */
4539 	if (axq_cdc_disable_flush_all() != DDI_SUCCESS) {
4540 		axq_cdc_enable_all();	/* paranoia */
4541 		err = DRMACH_INTERNAL_ERROR();
4542 	}
4543 
4544 	/*
4545 	 * call saf configurator for deprobe
4546 	 * It's done now before sending an UNCLAIM message because
4547 	 * IKP will probe boards it doesn't know about <present at boot>
4548 	 * prior to unprobing them.  If this happens after sending the
4549 	 * UNCLAIM, it will cause a dstop for domain transgression error.
4550 	 */
4551 
4552 	if (!err) {
4553 		scc = sc_unprobe_board(bp->bnum);
4554 		axq_cdc_enable_all();
4555 		if (scc != NULL) {
4556 			err = drerr_new(0, ESTC_DEPROBE, bp->cm.name);
4557 		}
4558 	}
4559 
4560 	/*
4561 	 * If disconnecting a board from a Panther domain, wait a fixed-
4562 	 * time delay for pending Safari transactions to complete on the
4563 	 * disconnecting board's processors.  The bus sync list read used
4564 	 * in drmach_shutdown_asm to synchronize with outstanding Safari
4565 	 * transactions assumes no read-bypass-write mode for all memory
4566 	 * controllers.  Since Panther supports read-bypass-write, a
4567 	 * delay is used that is slightly larger than the maximum Safari
4568 	 * timeout value in the Safari/Fireplane Config Reg.
4569 	 */
4570 	if (drmach_panther_boards() > 0 || drmach_unclaim_delay_all) {
4571 		clock_t	stime = lbolt;
4572 
4573 		delay(drv_usectohz(drmach_unclaim_usec_delay));
4574 
4575 		stime = lbolt - stime;
4576 		DRMACH_PR("delayed %ld ticks (%ld secs) before disconnecting "
4577 		    "board %s from domain\n", stime, stime / hz, bp->cm.name);
4578 	}
4579 
4580 	if (!err) {
4581 		obufp->msgdata.dm_ur.mem_clear = 0;
4582 
4583 		err = drmach_mbox_trans(DRMSG_UNCLAIM, bp->bnum, (caddr_t)obufp,
4584 			sizeof (dr_mbox_msg_t), (caddr_t)NULL, 0);
4585 
4586 		if (err) {
4587 			/*
4588 			 * if mailbox timeout or unrecoverable error from SC,
4589 			 * board cannot be touched.  Mark the status as
4590 			 * unusable.
4591 			 */
4592 			if ((err->e_code == ESTC_SMS_ERR_UNRECOVERABLE) ||
4593 				(err->e_code == ESTC_MBXRPLY))
4594 					bp->cond = SBD_COND_UNUSABLE;
4595 			else {
4596 				DRMACH_PR("UNCLAIM failed for bnum=%d\n",
4597 					bp->bnum);
4598 				DRMACH_PR("calling sc_probe_board: bnum=%d\n",
4599 					bp->bnum);
4600 				scc = sc_probe_board(bp->bnum);
4601 				if (scc == NULL) {
4602 					cmn_err(CE_WARN,
4603 					"sc_probe_board failed for bnum=%d",
4604 						bp->bnum);
4605 				} else {
4606 					if (DRMACH_BNUM2SLOT(bp->bnum) == 0) {
4607 						mutex_enter(
4608 						    &drmach_slice_table_lock);
4609 						drmach_slice_table_update(bp,
4610 						    0);
4611 						mutex_exit(
4612 						    &drmach_slice_table_lock);
4613 					}
4614 					drmach_slot1_lpa_set(bp);
4615 				}
4616 			}
4617 		} else {
4618 			bp->connected = 0;
4619 			/*
4620 			 * Now that the board has been successfully detached,
4621 			 * discard platform-specific DIMM serial id information
4622 			 * for the board.
4623 			 */
4624 			if ((DRMACH_BNUM2SLOT(bp->bnum) == 0) &&
4625 			    plat_ecc_capability_sc_get(
4626 			    PLAT_ECC_DIMM_SID_MESSAGE)) {
4627 				(void) plat_discard_mem_sids(
4628 				    DRMACH_BNUM2EXP(bp->bnum));
4629 			}
4630 		}
4631 	}
4632 	kmem_free(obufp, sizeof (dr_mbox_msg_t));
4633 
4634 	return (err);
4635 }
4636 
4637 static int
4638 drmach_get_portid(drmach_node_t *np)
4639 {
4640 	drmach_node_t	pp;
4641 	int		portid;
4642 	char		type[OBP_MAXPROPNAME];
4643 
4644 	if (np->n_getprop(np, "portid", &portid, sizeof (portid)) == 0)
4645 		return (portid);
4646 
4647 	/*
4648 	 * Get the device_type property to see if we should
4649 	 * continue processing this node.
4650 	 */
4651 	if (np->n_getprop(np, "device_type", &type, sizeof (type)) != 0)
4652 		return (-1);
4653 
4654 	/*
4655 	 * If the device is a CPU without a 'portid' property,
4656 	 * it is a CMP core. For such cases, the parent node
4657 	 * has the portid.
4658 	 */
4659 	if (strcmp(type, DRMACH_CPU_NAMEPROP) == 0) {
4660 		if (np->get_parent(np, &pp) != 0)
4661 			return (-1);
4662 
4663 		if (pp.n_getprop(&pp, "portid", &portid, sizeof (portid)) == 0)
4664 			return (portid);
4665 	}
4666 
4667 	return (-1);
4668 }
4669 
4670 /*
4671  * This is a helper function to determine if a given
4672  * node should be considered for a dr operation according
4673  * to predefined dr type nodes and the node's name.
4674  * Formal Parameter : The name of a device node.
4675  * Return Value: -1, name does not map to a valid dr type.
4676  *		 A value greater or equal to 0, name is a valid dr type.
4677  */
4678 static int
4679 drmach_name2type_idx(char *name)
4680 {
4681 	int 	index, ntypes;
4682 
4683 	if (name == NULL)
4684 		return (-1);
4685 
4686 	/*
4687 	 * Determine how many possible types are currently supported
4688 	 * for dr.
4689 	 */
4690 	ntypes = sizeof (drmach_name2type) / sizeof (drmach_name2type[0]);
4691 
4692 	/* Determine if the node's name correspond to a predefined type. */
4693 	for (index = 0; index < ntypes; index++) {
4694 		if (strcmp(drmach_name2type[index].name, name) == 0)
4695 			/* The node is an allowed type for dr. */
4696 			return (index);
4697 	}
4698 
4699 	/*
4700 	 * If the name of the node does not map to any of the
4701 	 * types in the array drmach_name2type then the node is not of
4702 	 * interest to dr.
4703 	 */
4704 	return (-1);
4705 }
4706 
4707 static int
4708 drmach_board_find_devices_cb(drmach_node_walk_args_t *args)
4709 {
4710 	drmach_node_t			*node = args->node;
4711 	drmach_board_cb_data_t		*data = args->data;
4712 	drmach_board_t			*obj = data->obj;
4713 
4714 	int		rv, portid;
4715 	drmachid_t	id;
4716 	drmach_device_t	*device;
4717 	char	name[OBP_MAXDRVNAME];
4718 
4719 	portid = drmach_get_portid(node);
4720 	if (portid == -1) {
4721 		/*
4722 		 * if the node does not have a portid property, then
4723 		 * by that information alone it is known that drmach
4724 		 * is not interested in it.
4725 		 */
4726 		return (0);
4727 	}
4728 	rv = node->n_getprop(node, "name", name, OBP_MAXDRVNAME);
4729 
4730 	/* The node must have a name */
4731 	if (rv)
4732 		return (0);
4733 
4734 	/*
4735 	 * Ignore devices whose portid do not map to this board,
4736 	 * or that their name property is not mapped to a valid
4737 	 * dr device name.
4738 	 */
4739 	if ((drmach_portid2bnum(portid) != obj->bnum) ||
4740 	    (drmach_name2type_idx(name) < 0))
4741 		return (0);
4742 
4743 	/*
4744 	 * Create a device data structure from this node data.
4745 	 * The call may yield nothing if the node is not of interest
4746 	 * to drmach.
4747 	 */
4748 	data->err = drmach_device_new(node, obj, portid, &id);
4749 	if (data->err)
4750 		return (-1);
4751 	else if (!id) {
4752 		/*
4753 		 * drmach_device_new examined the node we passed in
4754 		 * and determined that it was either one not of
4755 		 * interest to drmach or the PIM dr layer.
4756 		 * So, it is skipped.
4757 		 */
4758 		return (0);
4759 	}
4760 
4761 	rv = drmach_array_set(obj->devices, data->ndevs++, id);
4762 	if (rv) {
4763 		data->err = DRMACH_INTERNAL_ERROR();
4764 		return (-1);
4765 	}
4766 
4767 	device = id;
4768 
4769 #ifdef DEBUG
4770 	DRMACH_PR("%d %s %d %p\n", portid, device->type, device->unum, id);
4771 	if (DRMACH_IS_IO_ID(id))
4772 		DRMACH_PR("ndevs = %d dip/node = %p", data->ndevs, node->here);
4773 #endif
4774 
4775 	data->err = (*data->found)(data->a, device->type, device->unum, id);
4776 	return (data->err == NULL ? 0 : -1);
4777 }
4778 
4779 sbd_error_t *
4780 drmach_board_find_devices(drmachid_t id, void *a,
4781 	sbd_error_t *(*found)(void *a, const char *, int, drmachid_t))
4782 {
4783 	drmach_board_t		*bp = (drmach_board_t *)id;
4784 	sbd_error_t		*err;
4785 	int			 max_devices;
4786 	int			 rv;
4787 	drmach_board_cb_data_t	data;
4788 
4789 	if (!DRMACH_IS_BOARD_ID(id))
4790 		return (drerr_new(0, ESTC_INAPPROP, NULL));
4791 
4792 	max_devices  = plat_max_cpu_units_per_board();
4793 	max_devices += plat_max_mem_units_per_board();
4794 	max_devices += plat_max_io_units_per_board();
4795 
4796 	bp->devices = drmach_array_new(0, max_devices);
4797 
4798 	if (bp->tree == NULL)
4799 		bp->tree = drmach_node_new();
4800 
4801 	data.obj = bp;
4802 	data.ndevs = 0;
4803 	data.found = found;
4804 	data.a = a;
4805 	data.err = NULL;
4806 
4807 	mutex_enter(&drmach_slice_table_lock);
4808 	mutex_enter(&drmach_bus_sync_lock);
4809 
4810 	rv = drmach_node_walk(bp->tree, &data, drmach_board_find_devices_cb);
4811 
4812 	drmach_slice_table_update(bp, 0);
4813 	drmach_bus_sync_list_update();
4814 
4815 	mutex_exit(&drmach_bus_sync_lock);
4816 	mutex_exit(&drmach_slice_table_lock);
4817 
4818 	if (rv == 0) {
4819 		err = NULL;
4820 		drmach_slot1_lpa_set(bp);
4821 	} else {
4822 		drmach_array_dispose(bp->devices, drmach_device_dispose);
4823 		bp->devices = NULL;
4824 
4825 		if (data.err)
4826 			err = data.err;
4827 		else
4828 			err = DRMACH_INTERNAL_ERROR();
4829 	}
4830 
4831 	return (err);
4832 }
4833 
4834 int
4835 drmach_board_lookup(int bnum, drmachid_t *id)
4836 {
4837 	int	rv = 0;
4838 
4839 	if (!drmach_initialized && drmach_init() == -1) {
4840 		*id = 0;
4841 		return (-1);
4842 	}
4843 	rw_enter(&drmach_boards_rwlock, RW_WRITER);
4844 	if (drmach_array_get(drmach_boards, bnum, id)) {
4845 		*id = 0;
4846 		rv = -1;
4847 	} else {
4848 		caddr_t		obufp;
4849 		dr_showboard_t	shb;
4850 		sbd_error_t	*err = NULL;
4851 		drmach_board_t	*bp;
4852 
4853 		bp = *id;
4854 
4855 		if (bp)
4856 			rw_downgrade(&drmach_boards_rwlock);
4857 
4858 		obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
4859 		err = drmach_mbox_trans(DRMSG_SHOWBOARD, bnum, obufp,
4860 			sizeof (dr_proto_hdr_t), (caddr_t)&shb,
4861 			sizeof (dr_showboard_t));
4862 		kmem_free(obufp, sizeof (dr_proto_hdr_t));
4863 
4864 		if (err) {
4865 			if (err->e_code == ESTC_UNAVAILABLE) {
4866 				*id = 0;
4867 				rv = -1;
4868 			}
4869 			sbd_err_clear(&err);
4870 		} else {
4871 			if (!bp)
4872 				bp = *id  = (drmachid_t)drmach_board_new(bnum);
4873 			bp->connected = (shb.bd_assigned && shb.bd_active);
4874 			bp->empty = shb.slot_empty;
4875 
4876 			switch (shb.test_status) {
4877 				case DR_TEST_STATUS_UNKNOWN:
4878 				case DR_TEST_STATUS_IPOST:
4879 				case DR_TEST_STATUS_ABORTED:
4880 					bp->cond = SBD_COND_UNKNOWN;
4881 					break;
4882 				case DR_TEST_STATUS_PASSED:
4883 					bp->cond = SBD_COND_OK;
4884 					break;
4885 				case DR_TEST_STATUS_FAILED:
4886 					bp->cond = SBD_COND_FAILED;
4887 					break;
4888 				default:
4889 					bp->cond = SBD_COND_UNKNOWN;
4890 				DRMACH_PR("Unknown test status=0x%x from SC\n",
4891 						shb.test_status);
4892 					break;
4893 			}
4894 			strncpy(bp->type, shb.board_type, sizeof (bp->type));
4895 			bp->assigned = shb.bd_assigned;
4896 			bp->powered = shb.power_on;
4897 		}
4898 	}
4899 	rw_exit(&drmach_boards_rwlock);
4900 	return (rv);
4901 }
4902 
4903 sbd_error_t *
4904 drmach_board_name(int bnum, char *buf, int buflen)
4905 {
4906 	snprintf(buf, buflen, "%s%d", DRMACH_BNUM2SLOT(bnum) ?
4907 	    "IO" : "SB", DRMACH_BNUM2EXP(bnum));
4908 
4909 	return (NULL);
4910 }
4911 
4912 sbd_error_t *
4913 drmach_board_poweroff(drmachid_t id)
4914 {
4915 	drmach_board_t	*bp;
4916 	sbd_error_t	*err;
4917 	drmach_status_t	 stat;
4918 
4919 	if (!DRMACH_IS_BOARD_ID(id))
4920 		return (drerr_new(0, ESTC_INAPPROP, NULL));
4921 	bp = id;
4922 
4923 	err = drmach_board_status(id, &stat);
4924 	if (!err) {
4925 		if (stat.configured || stat.busy)
4926 			err = drerr_new(0, ESTC_CONFIGBUSY, bp->cm.name);
4927 		else {
4928 			caddr_t	obufp;
4929 
4930 			obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
4931 			err = drmach_mbox_trans(DRMSG_POWEROFF, bp->bnum, obufp,
4932 				sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
4933 			kmem_free(obufp, sizeof (dr_proto_hdr_t));
4934 			if (!err)
4935 				bp->powered = 0;
4936 		}
4937 	}
4938 	return (err);
4939 }
4940 
4941 sbd_error_t *
4942 drmach_board_poweron(drmachid_t id)
4943 {
4944 	drmach_board_t	*bp;
4945 	caddr_t		obufp;
4946 	sbd_error_t	*err;
4947 
4948 	if (!DRMACH_IS_BOARD_ID(id))
4949 		return (drerr_new(0, ESTC_INAPPROP, NULL));
4950 	bp = id;
4951 
4952 	obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
4953 	err = drmach_mbox_trans(DRMSG_POWERON, bp->bnum, obufp,
4954 		sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
4955 	if (!err)
4956 		bp->powered = 1;
4957 
4958 	kmem_free(obufp, sizeof (dr_proto_hdr_t));
4959 
4960 	return (err);
4961 }
4962 
4963 static sbd_error_t *
4964 drmach_board_release(drmachid_t id)
4965 {
4966 	if (!DRMACH_IS_BOARD_ID(id))
4967 		return (drerr_new(0, ESTC_INAPPROP, NULL));
4968 	return (NULL);
4969 }
4970 
4971 sbd_error_t *
4972 drmach_board_test(drmachid_t id, drmach_opts_t *opts, int force)
4973 {
4974 	drmach_board_t		*bp;
4975 	drmach_device_t		*dp[MAX_CORES_PER_CMP];
4976 	dr_mbox_msg_t		*obufp;
4977 	sbd_error_t		*err;
4978 	dr_testboard_reply_t	tbr;
4979 	int			cpylen;
4980 	char			*copts;
4981 	int			is_io;
4982 	cpu_flag_t		oflags[MAX_CORES_PER_CMP];
4983 
4984 	if (!DRMACH_IS_BOARD_ID(id))
4985 		return (drerr_new(0, ESTC_INAPPROP, NULL));
4986 	bp = id;
4987 
4988 	/*
4989 	 * If the board is an I/O or MAXCAT board, setup I/O cage for
4990 	 * testing. Slot 1 indicates I/O or MAXCAT board.
4991 	 */
4992 
4993 	is_io = DRMACH_BNUM2SLOT(bp->bnum);
4994 
4995 	obufp = kmem_zalloc(sizeof (dr_mbox_msg_t), KM_SLEEP);
4996 
4997 	if (force)
4998 		obufp->msgdata.dm_tb.force = 1;
4999 
5000 	obufp->msgdata.dm_tb.immediate = 1;
5001 
5002 	if ((opts->size > 0) && ((copts = opts->copts) != NULL)) {
5003 		cpylen = (opts->size > DR_HPOPTLEN ? DR_HPOPTLEN : opts->size);
5004 		bcopy(copts, obufp->msgdata.dm_tb.hpost_opts, cpylen);
5005 	}
5006 
5007 	if (is_io) {
5008 		err = drmach_iocage_setup(&obufp->msgdata.dm_tb, dp, oflags);
5009 
5010 		if (err) {
5011 			kmem_free(obufp, sizeof (dr_mbox_msg_t));
5012 			return (err);
5013 		}
5014 	}
5015 
5016 	err = drmach_mbox_trans(DRMSG_TESTBOARD, bp->bnum, (caddr_t)obufp,
5017 		sizeof (dr_mbox_msg_t), (caddr_t)&tbr, sizeof (tbr));
5018 
5019 	if (!err)
5020 		bp->cond = SBD_COND_OK;
5021 	else
5022 		bp->cond = SBD_COND_UNKNOWN;
5023 
5024 	if ((!err) && (tbr.test_status != DR_TEST_STATUS_PASSED)) {
5025 		/* examine test status */
5026 		switch (tbr.test_status) {
5027 			case DR_TEST_STATUS_IPOST:
5028 				bp->cond = SBD_COND_UNKNOWN;
5029 				err = drerr_new(0, ESTC_TEST_IN_PROGRESS,
5030 					NULL);
5031 				break;
5032 			case DR_TEST_STATUS_UNKNOWN:
5033 				bp->cond = SBD_COND_UNKNOWN;
5034 				err = drerr_new(1,
5035 					ESTC_TEST_STATUS_UNKNOWN, NULL);
5036 				break;
5037 			case DR_TEST_STATUS_FAILED:
5038 				bp->cond = SBD_COND_FAILED;
5039 				err = drerr_new(1, ESTC_TEST_FAILED,
5040 					NULL);
5041 				break;
5042 			case DR_TEST_STATUS_ABORTED:
5043 				bp->cond = SBD_COND_UNKNOWN;
5044 				err = drerr_new(1, ESTC_TEST_ABORTED,
5045 					NULL);
5046 				break;
5047 			default:
5048 				bp->cond = SBD_COND_UNKNOWN;
5049 				err = drerr_new(1,
5050 					ESTC_TEST_RESULT_UNKNOWN,
5051 					NULL);
5052 				break;
5053 		}
5054 	}
5055 
5056 	/*
5057 	 * If I/O cage test was performed, check for availability of the
5058 	 * cpu used.  If cpu has been returned, it's OK to proceed with
5059 	 * reconfiguring it for use.
5060 	 */
5061 	if (is_io) {
5062 		DRMACH_PR("drmach_board_test: tbr.cpu_recovered: %d",
5063 			tbr.cpu_recovered);
5064 		DRMACH_PR("drmach_board_test: port id: %d",
5065 			tbr.cpu_portid);
5066 
5067 		/*
5068 		 * Check the cpu_recovered flag in the testboard reply, or
5069 		 * if the testboard request message was not sent to SMS due
5070 		 * to an mboxsc_putmsg() failure, it's OK to recover the
5071 		 * cpu since hpost hasn't touched it.
5072 		 */
5073 		if ((tbr.cpu_recovered && tbr.cpu_portid ==
5074 		    obufp->msgdata.dm_tb.cpu_portid) ||
5075 		    ((err) && (err->e_code == ESTC_MBXRQST))) {
5076 
5077 			int i;
5078 
5079 			mutex_enter(&cpu_lock);
5080 			for (i = 0; i < MAX_CORES_PER_CMP; i++) {
5081 				if (dp[i] != NULL) {
5082 					(void) drmach_iocage_cpu_return(dp[i],
5083 					    oflags[i]);
5084 				}
5085 			}
5086 			mutex_exit(&cpu_lock);
5087 		} else {
5088 			cmn_err(CE_WARN, "Unable to recover port id %d "
5089 			    "after I/O cage test: cpu_recovered=%d, "
5090 			    "returned portid=%d",
5091 			    obufp->msgdata.dm_tb.cpu_portid,
5092 			    tbr.cpu_recovered, tbr.cpu_portid);
5093 		}
5094 		drmach_iocage_mem_return(&tbr);
5095 	}
5096 	kmem_free(obufp, sizeof (dr_mbox_msg_t));
5097 
5098 	return (err);
5099 }
5100 
5101 sbd_error_t *
5102 drmach_board_unassign(drmachid_t id)
5103 {
5104 	drmach_board_t	*bp;
5105 	sbd_error_t	*err;
5106 	drmach_status_t	 stat;
5107 	caddr_t		obufp;
5108 
5109 	rw_enter(&drmach_boards_rwlock, RW_WRITER);
5110 
5111 	if (!DRMACH_IS_BOARD_ID(id)) {
5112 		rw_exit(&drmach_boards_rwlock);
5113 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5114 	}
5115 	bp = id;
5116 
5117 	err = drmach_board_status(id, &stat);
5118 	if (err) {
5119 		rw_exit(&drmach_boards_rwlock);
5120 		return (err);
5121 	}
5122 
5123 	if (stat.configured || stat.busy) {
5124 		err = drerr_new(0, ESTC_CONFIGBUSY, bp->cm.name);
5125 	} else {
5126 
5127 		obufp = kmem_zalloc(sizeof (dr_proto_hdr_t), KM_SLEEP);
5128 		err = drmach_mbox_trans(DRMSG_UNASSIGN, bp->bnum, obufp,
5129 			sizeof (dr_proto_hdr_t), (caddr_t)NULL, 0);
5130 		kmem_free(obufp, sizeof (dr_proto_hdr_t));
5131 		if (!err) {
5132 			if (drmach_array_set(drmach_boards, bp->bnum, 0) != 0)
5133 				err = DRMACH_INTERNAL_ERROR();
5134 			else
5135 				drmach_board_dispose(bp);
5136 		}
5137 	}
5138 	rw_exit(&drmach_boards_rwlock);
5139 	return (err);
5140 }
5141 
5142 static sbd_error_t *
5143 drmach_read_reg_addr(drmach_device_t *dp, uint64_t *p)
5144 {
5145 	int		len;
5146 	drmach_reg_t	reg;
5147 	drmach_node_t	pp;
5148 	drmach_node_t	*np = dp->node;
5149 
5150 	/*
5151 	 * If the node does not have a portid property,
5152 	 * it represents a CMP device. For a CMP, the reg
5153 	 * property of the parent holds the information of
5154 	 * interest.
5155 	 */
5156 	if (dp->node->n_getproplen(dp->node, "portid", &len) != 0) {
5157 
5158 		if (dp->node->get_parent(dp->node, &pp) != 0) {
5159 			return (DRMACH_INTERNAL_ERROR());
5160 		}
5161 		np = &pp;
5162 	}
5163 
5164 	if (np->n_getproplen(np, "reg", &len) != 0)
5165 		return (DRMACH_INTERNAL_ERROR());
5166 
5167 	if (len != sizeof (reg))
5168 		return (DRMACH_INTERNAL_ERROR());
5169 
5170 	if (np->n_getprop(np, "reg", &reg, sizeof (reg)) != 0)
5171 		return (DRMACH_INTERNAL_ERROR());
5172 
5173 	/* reassemble 64-bit base address */
5174 	*p = ((uint64_t)reg.reg_addr_hi << 32) | reg.reg_addr_lo;
5175 
5176 	return (NULL);
5177 }
5178 
5179 static void
5180 drmach_cpu_read(uint64_t arg1, uint64_t arg2)
5181 {
5182 	uint64_t	*saf_config_reg = (uint64_t *)arg1;
5183 	uint_t		*reg_read = (uint_t *)arg2;
5184 
5185 	*saf_config_reg = lddsafconfig();
5186 	*reg_read = 0x1;
5187 }
5188 
5189 /*
5190  * A return value of 1 indicates success and 0 indicates a failure
5191  */
5192 static int
5193 drmach_cpu_read_scr(drmach_cpu_t *cp, uint64_t *scr)
5194 {
5195 
5196 	int 	rv = 0x0;
5197 
5198 	*scr = 0x0;
5199 
5200 	/*
5201 	 * Confirm cpu was in ready set when xc was issued.
5202 	 * This is done by verifying rv which is
5203 	 * set to 0x1 when xc_one is successful.
5204 	 */
5205 	xc_one(cp->dev.portid, (xcfunc_t *)drmach_cpu_read,
5206 	    (uint64_t)scr, (uint64_t)&rv);
5207 
5208 	return (rv);
5209 
5210 }
5211 
5212 static sbd_error_t *
5213 drmach_cpu_read_cpuid(drmach_cpu_t *cp, processorid_t *cpuid)
5214 {
5215 	drmach_node_t	*np;
5216 
5217 	np = cp->dev.node;
5218 
5219 	/*
5220 	 * If a CPU does not have a portid property, it must
5221 	 * be a CMP device with a cpuid property.
5222 	 */
5223 	if (np->n_getprop(np, "portid", cpuid, sizeof (*cpuid)) != 0) {
5224 
5225 		if (np->n_getprop(np, "cpuid", cpuid, sizeof (*cpuid)) != 0) {
5226 			return (DRMACH_INTERNAL_ERROR());
5227 		}
5228 	}
5229 
5230 	return (NULL);
5231 }
5232 
5233 /* Starcat CMP core id is bit 2 of the cpuid */
5234 #define	DRMACH_COREID_MASK	(1u << 2)
5235 #define	DRMACH_CPUID2SRAM_IDX(id) \
5236 		((id & DRMACH_COREID_MASK) >> 1 | (id & 0x1))
5237 
5238 static sbd_error_t *
5239 drmach_cpu_new(drmach_device_t *proto, drmachid_t *idp)
5240 {
5241 	static void drmach_cpu_dispose(drmachid_t);
5242 	static sbd_error_t *drmach_cpu_release(drmachid_t);
5243 	static sbd_error_t *drmach_cpu_status(drmachid_t, drmach_status_t *);
5244 
5245 	sbd_error_t	*err;
5246 	uint64_t	scr_pa;
5247 	drmach_cpu_t	*cp = NULL;
5248 	pfn_t		pfn;
5249 	uint64_t	cpu_stardrb_offset, cpu_sram_pa;
5250 	int		idx;
5251 	int		impl;
5252 	processorid_t	cpuid;
5253 
5254 	err = drmach_read_reg_addr(proto, &scr_pa);
5255 	if (err) {
5256 		goto fail;
5257 	}
5258 
5259 	cp = kmem_zalloc(sizeof (drmach_cpu_t), KM_SLEEP);
5260 	bcopy(proto, &cp->dev, sizeof (cp->dev));
5261 	cp->dev.node = drmach_node_dup(proto->node);
5262 	cp->dev.cm.isa = (void *)drmach_cpu_new;
5263 	cp->dev.cm.dispose = drmach_cpu_dispose;
5264 	cp->dev.cm.release = drmach_cpu_release;
5265 	cp->dev.cm.status = drmach_cpu_status;
5266 	cp->scr_pa = scr_pa;
5267 
5268 	err = drmach_cpu_read_cpuid(cp, &cpuid);
5269 	if (err) {
5270 		goto fail;
5271 	}
5272 
5273 	err = drmach_cpu_get_impl(cp, &impl);
5274 	if (err) {
5275 		goto fail;
5276 	}
5277 
5278 	cp->cpuid = cpuid;
5279 	cp->coreid = STARCAT_CPUID_TO_COREID(cp->cpuid);
5280 	cp->dev.unum = STARCAT_CPUID_TO_AGENT(cp->cpuid);
5281 
5282 	/*
5283 	 * Init the board cpu type.  Assumes all board cpus are the same type.
5284 	 */
5285 	if (cp->dev.bp->cpu_impl == 0) {
5286 		cp->dev.bp->cpu_impl = impl;
5287 	}
5288 	ASSERT(cp->dev.bp->cpu_impl == impl);
5289 
5290 	/*
5291 	 * XXX CHEETAH SUPPORT
5292 	 * determine if the domain uses Cheetah procs
5293 	 */
5294 	if (drmach_is_cheetah < 0) {
5295 		drmach_is_cheetah = IS_CHEETAH(impl);
5296 	}
5297 
5298 	/*
5299 	 * Initialize TTE for mapping CPU SRAM STARDRB buffer.
5300 	 * The STARDRB buffer (16KB on Cheetah+ boards, 32KB on
5301 	 * Jaguar/Panther boards) is shared by all cpus in a Safari port
5302 	 * pair. Each cpu uses 8KB according to the following layout:
5303 	 *
5304 	 * Page 0:	even numbered Cheetah+'s and Panther/Jaguar core 0's
5305 	 * Page 1:	odd numbered Cheetah+'s and Panther/Jaguar core 0's
5306 	 * Page 2:	even numbered Panther/Jaguar core 1's
5307 	 * Page 3:	odd numbered Panther/Jaguar core 1's
5308 	 */
5309 	idx = DRMACH_CPUID2SRAM_IDX(cp->cpuid);
5310 	cpu_stardrb_offset = cp->dev.bp->stardrb_offset + (PAGESIZE * idx);
5311 	cpu_sram_pa = DRMACH_CPU_SRAM_ADDR + cpu_stardrb_offset;
5312 	pfn = cpu_sram_pa >> PAGESHIFT;
5313 
5314 	ASSERT(drmach_cpu_sram_tte[cp->cpuid].tte_inthi == 0 &&
5315 	    drmach_cpu_sram_tte[cp->cpuid].tte_intlo == 0);
5316 	drmach_cpu_sram_tte[cp->cpuid].tte_inthi = TTE_PFN_INTHI(pfn) |
5317 		TTE_VALID_INT | TTE_SZ_INT(TTE8K);
5318 	drmach_cpu_sram_tte[cp->cpuid].tte_intlo = TTE_PFN_INTLO(pfn) |
5319 		TTE_HWWR_INT | TTE_PRIV_INT | TTE_LCK_INT;
5320 
5321 	DRMACH_PR("drmach_cpu_new: cpuid=%d, coreid=%d, stardrb_offset=0x%lx, "
5322 	    "cpu_sram_offset=0x%lx, idx=%d\n", cp->cpuid, cp->coreid,
5323 	    cp->dev.bp->stardrb_offset, cpu_stardrb_offset, idx);
5324 
5325 	snprintf(cp->dev.cm.name, sizeof (cp->dev.cm.name), "%s%d",
5326 	    cp->dev.type, cp->dev.unum);
5327 
5328 	*idp = (drmachid_t)cp;
5329 	return (NULL);
5330 
5331 fail:
5332 	if (cp) {
5333 		drmach_node_dispose(cp->dev.node);
5334 		kmem_free(cp, sizeof (*cp));
5335 	}
5336 
5337 	*idp = (drmachid_t)0;
5338 	return (err);
5339 }
5340 
5341 static void
5342 drmach_cpu_dispose(drmachid_t id)
5343 {
5344 	drmach_cpu_t	*self;
5345 	processorid_t	cpuid;
5346 
5347 	ASSERT(DRMACH_IS_CPU_ID(id));
5348 
5349 	self = id;
5350 	if (self->dev.node)
5351 		drmach_node_dispose(self->dev.node);
5352 
5353 	cpuid = self->cpuid;
5354 	ASSERT(TTE_IS_VALID(&drmach_cpu_sram_tte[cpuid]) &&
5355 	    TTE_IS_8K(&drmach_cpu_sram_tte[cpuid]) &&
5356 	    TTE_IS_PRIVILEGED(&drmach_cpu_sram_tte[cpuid]) &&
5357 	    TTE_IS_LOCKED(&drmach_cpu_sram_tte[cpuid]));
5358 	drmach_cpu_sram_tte[cpuid].tte_inthi = 0;
5359 	drmach_cpu_sram_tte[cpuid].tte_intlo = 0;
5360 
5361 	kmem_free(self, sizeof (*self));
5362 }
5363 
5364 static int
5365 drmach_cpu_start(struct cpu *cp)
5366 {
5367 	extern xcfunc_t	drmach_set_lpa;
5368 	extern void	restart_other_cpu(int);
5369 	int		cpuid = cp->cpu_id;
5370 	int		rv, bnum;
5371 	drmach_board_t	*bp;
5372 
5373 	ASSERT(MUTEX_HELD(&cpu_lock));
5374 	ASSERT(cpunodes[cpuid].nodeid != (pnode_t)0);
5375 
5376 	cp->cpu_flags &= ~CPU_POWEROFF;
5377 
5378 	/*
5379 	 * NOTE: restart_other_cpu pauses cpus during the
5380 	 *	 slave cpu start.  This helps to quiesce the
5381 	 *	 bus traffic a bit which makes the tick sync
5382 	 *	 routine in the prom more robust.
5383 	 */
5384 	DRMACH_PR("COLD START for cpu (%d)\n", cpuid);
5385 
5386 	if (prom_hotaddcpu(cpuid) != 0) {
5387 		cmn_err(CE_PANIC, "prom_hotaddcpu() for cpuid=%d failed.",
5388 			cpuid);
5389 	}
5390 
5391 	restart_other_cpu(cpuid);
5392 
5393 	bnum = drmach_portid2bnum(cpunodes[cpuid].portid);
5394 	rv = drmach_array_get(drmach_boards, bnum, (drmachid_t)&bp);
5395 	if (rv == -1 || bp == NULL) {
5396 		DRMACH_PR("drmach_cpu_start: cannot read board info for "
5397 		    "cpuid=%d: rv=%d, bp=%p\n", cpuid, rv, bp);
5398 	} else if (DRMACH_L1_SET_LPA(bp) && drmach_reprogram_lpa) {
5399 		int exp;
5400 		int ntries;
5401 
5402 		mutex_enter(&drmach_xt_mb_lock);
5403 		mutex_enter(&drmach_slice_table_lock);
5404 		bzero((void *)drmach_xt_mb, drmach_xt_mb_size);
5405 
5406 		/*
5407 		 * drmach_slice_table[*]
5408 		 *	bit 5	valid
5409 		 *	bit 0:4	slice number
5410 		 *
5411 		 * drmach_xt_mb[*] format for drmach_set_lpa
5412 		 *	bit 7	valid
5413 		 *	bit 6	set null LPA (overrides bits 0:4)
5414 		 *	bit 0:4	slice number
5415 		 *
5416 		 * drmach_set_lpa derives processor CBASE and CBND
5417 		 * from bits 6 and 0:4 of drmach_xt_mb.  If bit 6 is
5418 		 * set, then CBASE = CBND = 0. Otherwise, CBASE = slice
5419 		 * number; CBND = slice number + 1.
5420 		 * No action is taken if bit 7 is zero.
5421 		 */
5422 		exp = (cpuid >> 5) & 0x1f;
5423 		if (drmach_slice_table[exp] & 0x20) {
5424 			drmach_xt_mb[cpuid] = 0x80 |
5425 				(drmach_slice_table[exp] & 0x1f);
5426 		} else {
5427 			drmach_xt_mb[cpuid] = 0x80 | 0x40;
5428 		}
5429 
5430 		drmach_xt_ready = 0;
5431 
5432 		xt_one(cpuid, drmach_set_lpa, NULL, NULL);
5433 
5434 		ntries = drmach_cpu_ntries;
5435 		while (!drmach_xt_ready && ntries) {
5436 			DELAY(drmach_cpu_delay);
5437 			ntries--;
5438 		}
5439 
5440 		mutex_exit(&drmach_slice_table_lock);
5441 		mutex_exit(&drmach_xt_mb_lock);
5442 
5443 		DRMACH_PR(
5444 			"waited %d out of %d tries for drmach_set_lpa on cpu%d",
5445 			drmach_cpu_ntries - ntries, drmach_cpu_ntries,
5446 			cp->cpu_id);
5447 	}
5448 
5449 	xt_one(cpuid, vtag_flushpage_tl1,
5450 		(uint64_t)drmach_cpu_sram_va, (uint64_t)KCONTEXT);
5451 
5452 	return (0);
5453 }
5454 
5455 /*
5456  * A detaching CPU is xcalled with an xtrap to drmach_cpu_stop_self() after
5457  * it has been offlined. The function of this routine is to get the cpu
5458  * spinning in a safe place. The requirement is that the system will not
5459  * reference anything on the detaching board (memory and i/o is detached
5460  * elsewhere) and that the CPU not reference anything on any other board
5461  * in the system.  This isolation is required during and after the writes
5462  * to the domain masks to remove the board from the domain.
5463  *
5464  * To accomplish this isolation the following is done:
5465  *	1) Create a locked mapping to the STARDRB data buffer located
5466  *	   in this cpu's sram. There is one TTE per cpu, initialized in
5467  *	   drmach_cpu_new(). The cpuid is used to select which TTE to use.
5468  *	   Each Safari port pair shares the CPU SRAM on a Serengeti CPU/MEM
5469  *	   board. The STARDRB buffer is 16KB on Cheetah+ boards, 32KB on Jaguar
5470  *	   boards. Each STARDRB buffer is logically divided by DR into one
5471  *	   8KB page per cpu (or Jaguar core).
5472  *	2) Copy the target function (drmach_shutdown_asm) into buffer.
5473  *	3) Jump to function now in the cpu sram.
5474  *	   Function will:
5475  *	   3.1) Flush its Ecache (displacement).
5476  *	   3.2) Flush its Dcache with HW mechanism.
5477  *	   3.3) Flush its Icache with HW mechanism.
5478  *	   3.4) Flush all valid and _unlocked_ D-TLB and I-TLB entries.
5479  *	   3.5) Set LPA to NULL
5480  *	   3.6) Clear xt_mb to signal completion. Note: cache line is
5481  *	        recovered by drmach_cpu_poweroff().
5482  *	4) Jump into an infinite loop.
5483  */
5484 
5485 static void
5486 drmach_cpu_stop_self(void)
5487 {
5488 	extern void	drmach_shutdown_asm(
5489 				uint64_t, uint64_t, int, int, uint64_t);
5490 	extern void	drmach_shutdown_asm_end(void);
5491 
5492 	tte_t		*tte;
5493 	uint_t		*p, *q;
5494 	uint64_t	 stack_pointer;
5495 
5496 	ASSERT(((ptrdiff_t)drmach_shutdown_asm_end -
5497 		(ptrdiff_t)drmach_shutdown_asm) < PAGESIZE);
5498 
5499 	tte = &drmach_cpu_sram_tte[CPU->cpu_id];
5500 	ASSERT(TTE_IS_VALID(tte) && TTE_IS_8K(tte) &&
5501 	    TTE_IS_PRIVILEGED(tte) && TTE_IS_LOCKED(tte));
5502 	sfmmu_dtlb_ld(drmach_cpu_sram_va, KCONTEXT, tte);
5503 	sfmmu_itlb_ld(drmach_cpu_sram_va, KCONTEXT, tte);
5504 
5505 	/* copy text. standard bcopy not designed to work in nc space */
5506 	p = (uint_t *)drmach_cpu_sram_va;
5507 	q = (uint_t *)drmach_shutdown_asm;
5508 	while (q < (uint_t *)drmach_shutdown_asm_end)
5509 		*p++ = *q++;
5510 
5511 	/* zero to assist debug */
5512 	q = (uint_t *)(drmach_cpu_sram_va + PAGESIZE);
5513 	while (p < q)
5514 		*p++ = 0;
5515 
5516 	/* a parking spot for the stack pointer */
5517 	stack_pointer = (uint64_t)q;
5518 
5519 	/* call copy of drmach_shutdown_asm */
5520 	(*(void (*)())drmach_cpu_sram_va)(
5521 		stack_pointer,
5522 		drmach_iocage_paddr,
5523 		cpunodes[CPU->cpu_id].ecache_size,
5524 		cpunodes[CPU->cpu_id].ecache_linesize,
5525 		va_to_pa((void *)&drmach_xt_mb[CPU->cpu_id]));
5526 }
5527 
5528 static void
5529 drmach_cpu_shutdown_self(void)
5530 {
5531 	cpu_t		*cp = CPU;
5532 	int		cpuid = cp->cpu_id;
5533 	extern void	flush_windows(void);
5534 
5535 	flush_windows();
5536 
5537 	(void) spl8();
5538 
5539 	ASSERT(cp->cpu_intr_actv == 0);
5540 	ASSERT(cp->cpu_thread == cp->cpu_idle_thread ||
5541 	    cp->cpu_thread == cp->cpu_startup_thread);
5542 
5543 	cp->cpu_flags = CPU_OFFLINE | CPU_QUIESCED | CPU_POWEROFF;
5544 
5545 	drmach_cpu_stop_self();
5546 
5547 	cmn_err(CE_PANIC, "CPU %d FAILED TO SHUTDOWN", cpuid);
5548 }
5549 
5550 static sbd_error_t *
5551 drmach_cpu_release(drmachid_t id)
5552 {
5553 	drmach_cpu_t	*cp;
5554 	struct cpu	*cpu;
5555 	sbd_error_t	*err;
5556 
5557 	if (!DRMACH_IS_CPU_ID(id))
5558 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5559 	cp = id;
5560 
5561 	ASSERT(MUTEX_HELD(&cpu_lock));
5562 
5563 	cpu = cpu_get(cp->cpuid);
5564 	if (cpu == NULL)
5565 		err = DRMACH_INTERNAL_ERROR();
5566 	else
5567 		err = NULL;
5568 
5569 	return (err);
5570 }
5571 
5572 static sbd_error_t *
5573 drmach_cpu_status(drmachid_t id, drmach_status_t *stat)
5574 {
5575 	drmach_cpu_t	*cp;
5576 	drmach_device_t	*dp;
5577 
5578 	ASSERT(DRMACH_IS_CPU_ID(id));
5579 	cp = id;
5580 	dp = &cp->dev;
5581 
5582 	stat->assigned = dp->bp->assigned;
5583 	stat->powered = dp->bp->powered;
5584 	mutex_enter(&cpu_lock);
5585 	stat->configured = (cpu_get(cp->cpuid) != NULL);
5586 	mutex_exit(&cpu_lock);
5587 	stat->busy = dp->busy;
5588 	strncpy(stat->type, dp->type, sizeof (stat->type));
5589 	stat->info[0] = '\0';
5590 
5591 	return (NULL);
5592 }
5593 
5594 sbd_error_t *
5595 drmach_cpu_disconnect(drmachid_t id)
5596 {
5597 
5598 	if (!DRMACH_IS_CPU_ID(id))
5599 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5600 
5601 	return (NULL);
5602 
5603 }
5604 
5605 sbd_error_t *
5606 drmach_cpu_get_id(drmachid_t id, processorid_t *cpuid)
5607 {
5608 	drmach_cpu_t	*cpu;
5609 
5610 	if (!DRMACH_IS_CPU_ID(id))
5611 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5612 	cpu = id;
5613 
5614 	*cpuid = cpu->cpuid;
5615 	return (NULL);
5616 }
5617 
5618 sbd_error_t *
5619 drmach_cpu_get_impl(drmachid_t id, int *ip)
5620 {
5621 	drmach_node_t	*np;
5622 	int		impl;
5623 
5624 	if (!DRMACH_IS_CPU_ID(id))
5625 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5626 
5627 	np = ((drmach_device_t *)id)->node;
5628 
5629 	if (np->n_getprop(np, "implementation#", &impl, sizeof (impl)) == -1) {
5630 		return (DRMACH_INTERNAL_ERROR());
5631 	}
5632 
5633 	*ip = impl;
5634 
5635 	return (NULL);
5636 }
5637 
5638 /*
5639  * Flush this cpu's ecache, then ensure all outstanding safari
5640  * transactions have retired.
5641  */
5642 void
5643 drmach_cpu_flush_ecache_sync(void)
5644 {
5645 	uint64_t *p;
5646 
5647 	ASSERT(curthread->t_bound_cpu == CPU);
5648 
5649 	cpu_flush_ecache();
5650 
5651 	mutex_enter(&drmach_bus_sync_lock);
5652 	for (p = drmach_bus_sync_list; *p; p++)
5653 		(void) ldphys(*p);
5654 	mutex_exit(&drmach_bus_sync_lock);
5655 
5656 	cpu_flush_ecache();
5657 }
5658 
5659 sbd_error_t *
5660 drmach_get_dip(drmachid_t id, dev_info_t **dip)
5661 {
5662 	drmach_device_t	*dp;
5663 
5664 	if (!DRMACH_IS_DEVICE_ID(id))
5665 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5666 	dp = id;
5667 
5668 	*dip = dp->node->n_getdip(dp->node);
5669 	return (NULL);
5670 }
5671 
5672 sbd_error_t *
5673 drmach_io_is_attached(drmachid_t id, int *yes)
5674 {
5675 	drmach_device_t *dp;
5676 	dev_info_t	*dip;
5677 	int state;
5678 
5679 	if (!DRMACH_IS_IO_ID(id))
5680 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5681 	dp = id;
5682 
5683 	dip = dp->node->n_getdip(dp->node);
5684 	if (dip == NULL) {
5685 		*yes = 0;
5686 		return (NULL);
5687 	}
5688 
5689 	state = ddi_get_devstate(dip);
5690 	*yes = i_ddi_devi_attached(dip) || (state == DDI_DEVSTATE_UP);
5691 
5692 	return (NULL);
5693 }
5694 
5695 static int
5696 drmach_dip_is_schizo_xmits_0_pci_b(dev_info_t *dip)
5697 {
5698 	char			dtype[OBP_MAXPROPNAME];
5699 	int			portid;
5700 	uint_t			pci_csr_base;
5701 	struct pci_phys_spec	*regbuf = NULL;
5702 	int			rv, len;
5703 
5704 	ASSERT(dip != NULL);
5705 	rv = ddi_getproplen(DDI_DEV_T_ANY, dip, 0, "device_type", &len);
5706 	if ((rv != DDI_PROP_SUCCESS) || (len > sizeof (dtype)))
5707 		return (0);
5708 
5709 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, 0, "device_type",
5710 		(caddr_t)dtype, &len) == DDI_PROP_SUCCESS) {
5711 
5712 		if (strncmp(dtype, "pci", 3) == 0) {
5713 
5714 			/*
5715 			 * Get safari portid. All schizo/xmits 0
5716 			 * safari IDs end in 0x1C.
5717 			 */
5718 			rv = ddi_getproplen(DDI_DEV_T_ANY, dip, 0,
5719 				"portid", &len);
5720 
5721 			if ((rv != DDI_PROP_SUCCESS) ||
5722 				(len > sizeof (portid)))
5723 					return (0);
5724 
5725 			rv = ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, 0,
5726 				"portid", (caddr_t)&portid, &len);
5727 
5728 			if (rv != DDI_PROP_SUCCESS)
5729 				return (0);
5730 
5731 			if ((portid & 0x1F) != 0x1C)
5732 				return (0);
5733 
5734 			if (ddi_getlongprop(DDI_DEV_T_ANY, dip,
5735 				DDI_PROP_DONTPASS, "reg", (caddr_t)&regbuf,
5736 						&len) == DDI_PROP_SUCCESS) {
5737 
5738 				pci_csr_base = regbuf[0].pci_phys_mid &
5739 							PCI_CONF_ADDR_MASK;
5740 				kmem_free(regbuf, len);
5741 				/*
5742 				 * All PCI B-Leafs are at configspace 0x70.0000.
5743 				 */
5744 				if (pci_csr_base == 0x700000)
5745 					return (1);
5746 			}
5747 		}
5748 	}
5749 	return (0);
5750 }
5751 
5752 #define	SCHIZO_BINDING_NAME		"pci108e,8001"
5753 #define	XMITS_BINDING_NAME		"pci108e,8002"
5754 
5755 /*
5756  * Verify if the dip is an instance of MAN 'eri'.
5757  */
5758 static int
5759 drmach_dip_is_man_eri(dev_info_t *dip)
5760 {
5761 	struct pci_phys_spec	*regbuf = NULL;
5762 	dev_info_t		*parent_dip;
5763 	char			*name;
5764 	uint_t			pci_device;
5765 	uint_t			pci_function;
5766 	int			len;
5767 
5768 	if (dip == NULL)
5769 		return (0);
5770 	/*
5771 	 * Verify if the parent is schizo(xmits)0 and pci B leaf.
5772 	 */
5773 	if (((parent_dip = ddi_get_parent(dip)) == NULL) ||
5774 		((name = ddi_binding_name(parent_dip)) == NULL))
5775 		return (0);
5776 	if (strcmp(name, SCHIZO_BINDING_NAME) != 0) {
5777 		/*
5778 		 * This RIO could be on XMITS, so get the dip to
5779 		 * XMITS PCI Leaf.
5780 		 */
5781 		if ((parent_dip = ddi_get_parent(parent_dip)) == NULL)
5782 			return (0);
5783 		if (((name = ddi_binding_name(parent_dip)) == NULL) ||
5784 			(strcmp(name, XMITS_BINDING_NAME) != 0)) {
5785 			return (0);
5786 		}
5787 	}
5788 	if (!drmach_dip_is_schizo_xmits_0_pci_b(parent_dip))
5789 		return (0);
5790 	/*
5791 	 * Finally make sure it is the MAN eri.
5792 	 */
5793 	if (ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
5794 			"reg", (caddr_t)&regbuf, &len) == DDI_PROP_SUCCESS) {
5795 
5796 		pci_device = PCI_REG_DEV_G(regbuf->pci_phys_hi);
5797 		pci_function = PCI_REG_FUNC_G(regbuf->pci_phys_hi);
5798 		kmem_free(regbuf, len);
5799 
5800 		/*
5801 		 * The network function of the RIO ASIC will always be
5802 		 * device 3 and function 1 ("network@3,1").
5803 		 */
5804 		if ((pci_device == 3) && (pci_function == 1))
5805 			return (1);
5806 	}
5807 	return (0);
5808 }
5809 
5810 typedef struct {
5811 	int		iosram_inst;
5812 	dev_info_t	*eri_dip;
5813 	int		bnum;
5814 } drmach_io_inst_t;
5815 
5816 int
5817 drmach_board_find_io_insts(dev_info_t *dip, void *args)
5818 {
5819 	drmach_io_inst_t	*ios = (drmach_io_inst_t *)args;
5820 
5821 	int	rv;
5822 	int	len;
5823 	int	portid;
5824 	char	name[OBP_MAXDRVNAME];
5825 
5826 	rv = ddi_getproplen(DDI_DEV_T_ANY, dip, 0, "portid", &len);
5827 
5828 	if ((rv != DDI_PROP_SUCCESS) || (len > sizeof (portid))) {
5829 		return (DDI_WALK_CONTINUE);
5830 	}
5831 
5832 	rv = ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, 0,
5833 			"portid", (caddr_t)&portid, &len);
5834 	if (rv != DDI_PROP_SUCCESS)
5835 		return (DDI_WALK_CONTINUE);
5836 
5837 	/* ignore devices that are not on this board */
5838 	if (drmach_portid2bnum(portid) != ios->bnum)
5839 		return (DDI_WALK_CONTINUE);
5840 
5841 	if ((ios->iosram_inst < 0) || (ios->eri_dip == NULL)) {
5842 		rv = ddi_getproplen(DDI_DEV_T_ANY, dip, 0,
5843 			"name", &len);
5844 		if (rv == DDI_PROP_SUCCESS) {
5845 
5846 			rv = ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
5847 				0, "name",
5848 				(caddr_t)name, &len);
5849 			if (rv != DDI_PROP_SUCCESS)
5850 				return (DDI_WALK_CONTINUE);
5851 
5852 			if (strncmp("iosram", name, 6) == 0) {
5853 				ios->iosram_inst = ddi_get_instance(dip);
5854 				if (ios->eri_dip == NULL)
5855 					return (DDI_WALK_CONTINUE);
5856 				else
5857 					return (DDI_WALK_TERMINATE);
5858 			} else {
5859 				if (drmach_dip_is_man_eri(dip)) {
5860 					ASSERT(ios->eri_dip == NULL);
5861 					ndi_hold_devi(dip);
5862 					ios->eri_dip = dip;
5863 					if (ios->iosram_inst < 0)
5864 						return (DDI_WALK_CONTINUE);
5865 					else
5866 						return (DDI_WALK_TERMINATE);
5867 				}
5868 			}
5869 		}
5870 	}
5871 	return (DDI_WALK_CONTINUE);
5872 }
5873 
5874 sbd_error_t *
5875 drmach_io_pre_release(drmachid_t id)
5876 {
5877 	drmach_io_inst_t	ios;
5878 	drmach_board_t		*bp;
5879 	int			rv = 0;
5880 	sbd_error_t		*err = NULL;
5881 	drmach_device_t		*dp;
5882 	dev_info_t		*rdip;
5883 	int			circ;
5884 
5885 	if (!DRMACH_IS_IO_ID(id))
5886 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5887 	dp = id;
5888 	bp = dp->bp;
5889 
5890 	rdip = dp->node->n_getdip(dp->node);
5891 
5892 	/* walk device tree to find iosram instance for the board */
5893 	ios.iosram_inst = -1;
5894 	ios.eri_dip = NULL;
5895 	ios.bnum = bp->bnum;
5896 
5897 	ndi_devi_enter(rdip, &circ);
5898 	ddi_walk_devs(ddi_get_child(rdip), drmach_board_find_io_insts,
5899 				(void *)&ios);
5900 
5901 	DRMACH_PR("drmach_io_pre_release: bnum=%d iosram=%d eri=0x%p\n",
5902 			ios.bnum, ios.iosram_inst, ios.eri_dip);
5903 	ndi_devi_exit(rdip, circ);
5904 
5905 	if (ios.eri_dip) {
5906 		/*
5907 		 * Release hold acquired in drmach_board_find_io_insts()
5908 		 */
5909 		ndi_rele_devi(ios.eri_dip);
5910 	}
5911 	if (ios.iosram_inst >= 0) {
5912 		/* call for tunnel switch */
5913 		do {
5914 			DRMACH_PR("calling iosram_switchfrom(%d)\n",
5915 				ios.iosram_inst);
5916 			rv = iosram_switchfrom(ios.iosram_inst);
5917 			if (rv)
5918 				DRMACH_PR("iosram_switchfrom returned %d\n",
5919 					rv);
5920 		} while (rv == EAGAIN);
5921 
5922 		if (rv)
5923 			err = drerr_new(0, ESTC_IOSWITCH, NULL);
5924 	}
5925 	return (err);
5926 }
5927 
5928 sbd_error_t *
5929 drmach_io_unrelease(drmachid_t id)
5930 {
5931 	dev_info_t	*dip;
5932 	sbd_error_t	*err = NULL;
5933 	drmach_device_t	*dp;
5934 
5935 	if (!DRMACH_IS_IO_ID(id))
5936 		return (drerr_new(0, ESTC_INAPPROP, NULL));
5937 	dp = id;
5938 
5939 	dip = dp->node->n_getdip(dp->node);
5940 
5941 	if (dip == NULL)
5942 		err = DRMACH_INTERNAL_ERROR();
5943 	else {
5944 		int (*func)(dev_info_t *dip);
5945 
5946 		func = (int (*)(dev_info_t *))kobj_getsymvalue("man_dr_attach",
5947 			0);
5948 
5949 		if (func) {
5950 			drmach_io_inst_t ios;
5951 			dev_info_t	*pdip;
5952 			int		circ;
5953 
5954 			/*
5955 			 * Walk device tree to find rio dip for the board
5956 			 * Since we are not interested in iosram instance here,
5957 			 * initialize it to 0, so that the walk terminates as
5958 			 * soon as eri dip is found.
5959 			 */
5960 			ios.iosram_inst = 0;
5961 			ios.eri_dip = NULL;
5962 			ios.bnum = dp->bp->bnum;
5963 
5964 			if (pdip = ddi_get_parent(dip)) {
5965 				ndi_hold_devi(pdip);
5966 				ndi_devi_enter(pdip, &circ);
5967 			}
5968 			/*
5969 			 * Root node doesn't have to be held in any way.
5970 			 */
5971 			ddi_walk_devs(dip,
5972 				drmach_board_find_io_insts, (void *)&ios);
5973 
5974 			if (pdip) {
5975 				ndi_devi_exit(pdip, circ);
5976 				ndi_rele_devi(pdip);
5977 			}
5978 
5979 			DRMACH_PR("drmach_io_unrelease: bnum=%d eri=0x%p\n",
5980 				ios.bnum, ios.eri_dip);
5981 
5982 			if (ios.eri_dip) {
5983 				DRMACH_PR("calling man_dr_attach\n");
5984 				if ((*func)(ios.eri_dip))
5985 					err = drerr_new(0,
5986 						ESTC_NWSWITCH, NULL);
5987 				/*
5988 				 * Release hold acquired in
5989 				 * drmach_board_find_io_insts()
5990 				 */
5991 				ndi_rele_devi(ios.eri_dip);
5992 			}
5993 		} else
5994 			DRMACH_PR("man_dr_attach NOT present\n");
5995 	}
5996 	return (err);
5997 }
5998 
5999 static sbd_error_t *
6000 drmach_io_release(drmachid_t id)
6001 {
6002 	dev_info_t	*dip;
6003 	sbd_error_t	*err = NULL;
6004 	drmach_device_t	*dp;
6005 
6006 	if (!DRMACH_IS_IO_ID(id))
6007 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6008 	dp = id;
6009 
6010 	dip = dp->node->n_getdip(dp->node);
6011 
6012 	if (dip == NULL)
6013 		err = DRMACH_INTERNAL_ERROR();
6014 	else {
6015 		int (*func)(dev_info_t *dip);
6016 
6017 		func = (int (*)(dev_info_t *))kobj_getsymvalue("man_dr_detach",
6018 			0);
6019 
6020 		if (func) {
6021 			drmach_io_inst_t ios;
6022 			dev_info_t	*pdip;
6023 			int		circ;
6024 
6025 			/*
6026 			 * Walk device tree to find rio dip for the board
6027 			 * Since we are not interested in iosram instance here,
6028 			 * initialize it to 0, so that the walk terminates as
6029 			 * soon as eri dip is found.
6030 			 */
6031 			ios.iosram_inst = 0;
6032 			ios.eri_dip = NULL;
6033 			ios.bnum = dp->bp->bnum;
6034 
6035 			if (pdip = ddi_get_parent(dip)) {
6036 				ndi_hold_devi(pdip);
6037 				ndi_devi_enter(pdip, &circ);
6038 			}
6039 			/*
6040 			 * Root node doesn't have to be held in any way.
6041 			 */
6042 			ddi_walk_devs(dip,
6043 				drmach_board_find_io_insts, (void *)&ios);
6044 
6045 			if (pdip) {
6046 				ndi_devi_exit(pdip, circ);
6047 				ndi_rele_devi(pdip);
6048 			}
6049 
6050 			DRMACH_PR("drmach_io_release: bnum=%d eri=0x%p\n",
6051 				ios.bnum, ios.eri_dip);
6052 
6053 			if (ios.eri_dip) {
6054 				DRMACH_PR("calling man_dr_detach\n");
6055 				if ((*func)(ios.eri_dip))
6056 					err = drerr_new(0,
6057 						ESTC_NWSWITCH, NULL);
6058 				/*
6059 				 * Release hold acquired in
6060 				 * drmach_board_find_io_insts()
6061 				 */
6062 				ndi_rele_devi(ios.eri_dip);
6063 			}
6064 		} else
6065 			DRMACH_PR("man_dr_detach NOT present\n");
6066 	}
6067 	return (err);
6068 }
6069 
6070 sbd_error_t *
6071 drmach_io_post_release(drmachid_t id)
6072 {
6073 	char 		*path;
6074 	dev_info_t	*rdip;
6075 	drmach_device_t	*dp;
6076 
6077 	if (!DRMACH_IS_DEVICE_ID(id))
6078 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6079 	dp = id;
6080 
6081 	rdip = dp->node->n_getdip(dp->node);
6082 
6083 	/*
6084 	 * Always called after drmach_unconfigure() which on Starcat
6085 	 * unconfigures the branch but doesn't remove it so the
6086 	 * dip must always exist.
6087 	 */
6088 	ASSERT(rdip);
6089 
6090 	ASSERT(e_ddi_branch_held(rdip));
6091 #ifdef DEBUG
6092 	path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6093 	(void) ddi_pathname(rdip, path);
6094 	DRMACH_PR("post_release dip path is: %s\n", path);
6095 	kmem_free(path, MAXPATHLEN);
6096 #endif
6097 
6098 	if (strcmp(dp->type, DRMACH_DEVTYPE_PCI) == 0) {
6099 		if (schpc_remove_pci(rdip)) {
6100 			DRMACH_PR("schpc_remove_pci failed\n");
6101 			return (drerr_new(0, ESBD_OFFLINE, NULL));
6102 		} else {
6103 			DRMACH_PR("schpc_remove_pci succeeded\n");
6104 		}
6105 	}
6106 
6107 	return (NULL);
6108 }
6109 
6110 sbd_error_t *
6111 drmach_io_post_attach(drmachid_t id)
6112 {
6113 	int		circ;
6114 	dev_info_t	*dip;
6115 	dev_info_t	*pdip;
6116 	drmach_device_t	*dp;
6117 	drmach_io_inst_t ios;
6118 
6119 	if (!DRMACH_IS_DEVICE_ID(id))
6120 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6121 	dp = id;
6122 
6123 	dip = dp->node->n_getdip(dp->node);
6124 
6125 	/*
6126 	 * We held the branch rooted at dip earlier, so at a minimum the
6127 	 * root i.e. dip must be present in the device tree.
6128 	 */
6129 	ASSERT(dip);
6130 
6131 	if (strcmp(dp->type, DRMACH_DEVTYPE_PCI) == 0) {
6132 		if (schpc_add_pci(dip)) {
6133 			DRMACH_PR("schpc_add_pci failed\n");
6134 		} else {
6135 			DRMACH_PR("schpc_add_pci succeeded\n");
6136 		}
6137 	}
6138 
6139 	/*
6140 	 * Walk device tree to find rio dip for the board
6141 	 * Since we are not interested in iosram instance here,
6142 	 * initialize it to 0, so that the walk terminates as
6143 	 * soon as eri dip is found.
6144 	 */
6145 	ios.iosram_inst = 0;
6146 	ios.eri_dip = NULL;
6147 	ios.bnum = dp->bp->bnum;
6148 
6149 	if (pdip = ddi_get_parent(dip)) {
6150 		ndi_hold_devi(pdip);
6151 		ndi_devi_enter(pdip, &circ);
6152 	}
6153 	/*
6154 	 * Root node doesn't have to be held in any way.
6155 	 */
6156 	ddi_walk_devs(dip, drmach_board_find_io_insts,
6157 				(void *)&ios);
6158 	if (pdip) {
6159 		ndi_devi_exit(pdip, circ);
6160 		ndi_rele_devi(pdip);
6161 	}
6162 
6163 	DRMACH_PR("drmach_io_post_attach: bnum=%d eri=0x%p\n",
6164 		ios.bnum, ios.eri_dip);
6165 
6166 	if (ios.eri_dip) {
6167 		int (*func)(dev_info_t *dip);
6168 
6169 		func =
6170 		(int (*)(dev_info_t *))kobj_getsymvalue("man_dr_attach", 0);
6171 
6172 		if (func) {
6173 			DRMACH_PR("calling man_dr_attach\n");
6174 			(void) (*func)(ios.eri_dip);
6175 		} else {
6176 			DRMACH_PR("man_dr_attach NOT present\n");
6177 		}
6178 
6179 		/*
6180 		 * Release hold acquired in drmach_board_find_io_insts()
6181 		 */
6182 		ndi_rele_devi(ios.eri_dip);
6183 
6184 	}
6185 
6186 	return (NULL);
6187 }
6188 
6189 static sbd_error_t *
6190 drmach_io_status(drmachid_t id, drmach_status_t *stat)
6191 {
6192 	drmach_device_t *dp;
6193 	sbd_error_t	*err;
6194 	int		 configured;
6195 
6196 	ASSERT(DRMACH_IS_IO_ID(id));
6197 	dp = id;
6198 
6199 	err = drmach_io_is_attached(id, &configured);
6200 	if (err)
6201 		return (err);
6202 
6203 	stat->assigned = dp->bp->assigned;
6204 	stat->powered = dp->bp->powered;
6205 	stat->configured = (configured != 0);
6206 	stat->busy = dp->busy;
6207 	strncpy(stat->type, dp->type, sizeof (stat->type));
6208 	stat->info[0] = '\0';
6209 
6210 	return (NULL);
6211 }
6212 
6213 sbd_error_t *
6214 drmach_mem_init_size(drmachid_t id)
6215 {
6216 	drmach_mem_t	*mp;
6217 	sbd_error_t	*err;
6218 	gdcd_t		*gdcd;
6219 	mem_chunk_t	*chunk;
6220 	uint64_t	 chunks, pa, mask, sz;
6221 
6222 	if (!DRMACH_IS_MEM_ID(id))
6223 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6224 	mp = id;
6225 
6226 	err = drmach_mem_get_base_physaddr(id, &pa);
6227 	if (err)
6228 		return (err);
6229 
6230 	mask = ~ (DRMACH_MEM_SLICE_SIZE - 1);
6231 	pa &= mask;
6232 
6233 	gdcd = drmach_gdcd_new();
6234 	if (gdcd == NULL)
6235 		return (DRMACH_INTERNAL_ERROR());
6236 
6237 	sz = 0;
6238 	chunk = gdcd->dcd_chunk_list.dcl_chunk;
6239 	chunks = gdcd->dcd_chunk_list.dcl_chunks;
6240 	while (chunks-- != 0) {
6241 		if ((chunk->mc_base_pa & mask) == pa) {
6242 			sz += chunk->mc_mbytes * 1048576;
6243 		}
6244 
6245 		++chunk;
6246 	}
6247 	mp->nbytes = sz;
6248 
6249 	drmach_gdcd_dispose(gdcd);
6250 	return (NULL);
6251 }
6252 
6253 /*
6254  * Hardware registers are organized into consecutively
6255  * addressed registers.  The reg property's hi and lo fields
6256  * together describe the base address of the register set for
6257  * this memory-controller.  Register descriptions and offsets
6258  * (from the base address) are as follows:
6259  *
6260  * Description				Offset	Size (bytes)
6261  * Memory Timing Control Register I	0x00	8
6262  * Memory Timing Control Register II	0x08	8
6263  * Memory Address Decoding Register I	0x10	8
6264  * Memory Address Decoding Register II	0x18	8
6265  * Memory Address Decoding Register III	0x20	8
6266  * Memory Address Decoding Register IV	0x28	8
6267  * Memory Address Control Register	0x30	8
6268  * Memory Timing Control Register III	0x38	8
6269  * Memory Timing Control Register IV	0x40	8
6270  * Memory Timing Control Register V  	0x48	8 (Jaguar, Panther only)
6271  * EMU Activity Status Register		0x50	8 (Panther only)
6272  *
6273  * Only the Memory Address Decoding Register and EMU Activity Status
6274  * Register addresses are needed for DRMACH.
6275  */
6276 static sbd_error_t *
6277 drmach_mem_new(drmach_device_t *proto, drmachid_t *idp)
6278 {
6279 	static void drmach_mem_dispose(drmachid_t);
6280 	static sbd_error_t *drmach_mem_release(drmachid_t);
6281 	static sbd_error_t *drmach_mem_status(drmachid_t, drmach_status_t *);
6282 
6283 	sbd_error_t	*err;
6284 	uint64_t	 madr_pa;
6285 	drmach_mem_t	*mp;
6286 	int		 bank, count;
6287 
6288 	err = drmach_read_reg_addr(proto, &madr_pa);
6289 	if (err)
6290 		return (err);
6291 
6292 	mp = kmem_zalloc(sizeof (drmach_mem_t), KM_SLEEP);
6293 	bcopy(proto, &mp->dev, sizeof (mp->dev));
6294 	mp->dev.node = drmach_node_dup(proto->node);
6295 	mp->dev.cm.isa = (void *)drmach_mem_new;
6296 	mp->dev.cm.dispose = drmach_mem_dispose;
6297 	mp->dev.cm.release = drmach_mem_release;
6298 	mp->dev.cm.status = drmach_mem_status;
6299 	mp->madr_pa = madr_pa;
6300 
6301 	snprintf(mp->dev.cm.name,
6302 		sizeof (mp->dev.cm.name), "%s", mp->dev.type);
6303 
6304 	for (count = bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
6305 		uint64_t madr;
6306 
6307 		drmach_mem_read_madr(mp, bank, &madr);
6308 		if (madr & DRMACH_MC_VALID_MASK) {
6309 			count += 1;
6310 			break;
6311 		}
6312 	}
6313 
6314 	/*
6315 	 * If none of the banks had their valid bit set, that means
6316 	 * post did not configure this MC to participate in the
6317 	 * domain.  So, pretend this node does not exist by returning
6318 	 * a drmachid of zero.
6319 	 */
6320 	if (count == 0) {
6321 		/* drmach_mem_dispose frees board mem list */
6322 		drmach_node_dispose(mp->dev.node);
6323 		kmem_free(mp, sizeof (*mp));
6324 		*idp = (drmachid_t)0;
6325 		return (NULL);
6326 	}
6327 
6328 	/*
6329 	 * Only one mem unit per board is exposed to the
6330 	 * PIM layer.  The first mem unit encountered during
6331 	 * tree walk is used to represent all mem units on
6332 	 * the same board.
6333 	 */
6334 	if (mp->dev.bp->mem == NULL) {
6335 		/* start list of mem units on this board */
6336 		mp->dev.bp->mem = mp;
6337 
6338 		/*
6339 		 * force unum to zero since this is the only mem unit
6340 		 * that will be visible to the PIM layer.
6341 		 */
6342 		mp->dev.unum = 0;
6343 
6344 		/*
6345 		 * board memory size kept in this mem unit only
6346 		 */
6347 		err = drmach_mem_init_size(mp);
6348 		if (err) {
6349 			mp->dev.bp->mem = NULL;
6350 			/* drmach_mem_dispose frees board mem list */
6351 			drmach_node_dispose(mp->dev.node);
6352 			kmem_free(mp, sizeof (*mp));
6353 			*idp = (drmachid_t)0;
6354 			return (NULL);
6355 		}
6356 
6357 		/*
6358 		 * allow this instance (the first encountered on this board)
6359 		 * to be visible to the PIM layer.
6360 		 */
6361 		*idp = (drmachid_t)mp;
6362 	} else {
6363 		drmach_mem_t *lp;
6364 
6365 		/* hide this mem instance behind the first. */
6366 		for (lp = mp->dev.bp->mem; lp->next; lp = lp->next)
6367 			;
6368 		lp->next = mp;
6369 
6370 		/*
6371 		 * hide this instance from the caller.
6372 		 * See drmach_board_find_devices_cb() for details.
6373 		 */
6374 		*idp = (drmachid_t)0;
6375 	}
6376 
6377 	return (NULL);
6378 }
6379 
6380 static void
6381 drmach_mem_dispose(drmachid_t id)
6382 {
6383 	drmach_mem_t *mp, *next;
6384 	drmach_board_t *bp;
6385 
6386 	ASSERT(DRMACH_IS_MEM_ID(id));
6387 
6388 	mutex_enter(&drmach_bus_sync_lock);
6389 
6390 	mp = id;
6391 	bp = mp->dev.bp;
6392 
6393 	do {
6394 		if (mp->dev.node)
6395 			drmach_node_dispose(mp->dev.node);
6396 
6397 		next = mp->next;
6398 		kmem_free(mp, sizeof (*mp));
6399 		mp = next;
6400 	} while (mp);
6401 
6402 	bp->mem = NULL;
6403 
6404 	drmach_bus_sync_list_update();
6405 	mutex_exit(&drmach_bus_sync_lock);
6406 }
6407 
6408 sbd_error_t *
6409 drmach_mem_add_span(drmachid_t id, uint64_t basepa, uint64_t size)
6410 {
6411 	pfn_t		basepfn = (pfn_t)(basepa >> PAGESHIFT);
6412 	pgcnt_t		npages = (pgcnt_t)(size >> PAGESHIFT);
6413 	int		rv;
6414 
6415 	ASSERT(size != 0);
6416 
6417 	if (!DRMACH_IS_MEM_ID(id))
6418 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6419 
6420 	kcage_range_lock();
6421 	rv = kcage_range_add(basepfn, npages, 1);
6422 	kcage_range_unlock();
6423 	if (rv == ENOMEM) {
6424 		cmn_err(CE_WARN, "%lu megabytes not available"
6425 			" to kernel cage", size >> 20);
6426 	} else if (rv != 0) {
6427 		/* catch this in debug kernels */
6428 		ASSERT(0);
6429 
6430 		cmn_err(CE_WARN, "unexpected kcage_range_add"
6431 			" return value %d", rv);
6432 	}
6433 
6434 	return (NULL);
6435 }
6436 
6437 sbd_error_t *
6438 drmach_mem_del_span(drmachid_t id, uint64_t basepa, uint64_t size)
6439 {
6440 	pfn_t		 basepfn = (pfn_t)(basepa >> PAGESHIFT);
6441 	pgcnt_t		 npages = (pgcnt_t)(size >> PAGESHIFT);
6442 	int		 rv;
6443 
6444 	if (!DRMACH_IS_MEM_ID(id))
6445 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6446 
6447 	if (size > 0) {
6448 		kcage_range_lock();
6449 		rv = kcage_range_delete_post_mem_del(basepfn, npages);
6450 		kcage_range_unlock();
6451 		if (rv != 0) {
6452 			cmn_err(CE_WARN,
6453 			    "unexpected kcage_range_delete_post_mem_del"
6454 			    " return value %d", rv);
6455 			return (DRMACH_INTERNAL_ERROR());
6456 		}
6457 	}
6458 
6459 	return (NULL);
6460 }
6461 
6462 sbd_error_t *
6463 drmach_mem_disable(drmachid_t id)
6464 {
6465 	if (!DRMACH_IS_MEM_ID(id))
6466 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6467 	else
6468 		return (NULL);
6469 }
6470 
6471 sbd_error_t *
6472 drmach_mem_enable(drmachid_t id)
6473 {
6474 	if (!DRMACH_IS_MEM_ID(id))
6475 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6476 	else
6477 		return (NULL);
6478 }
6479 
6480 sbd_error_t *
6481 drmach_mem_get_alignment(drmachid_t id, uint64_t *mask)
6482 {
6483 #define	MB(mb) ((mb) * 1048576ull)
6484 
6485 	static struct {
6486 		uint_t		uk;
6487 		uint64_t	segsz;
6488 	}  uk2segsz[] = {
6489 		{ 0x003,	MB(256)	  },
6490 		{ 0x007,	MB(512)	  },
6491 		{ 0x00f,	MB(1024)  },
6492 		{ 0x01f,	MB(2048)  },
6493 		{ 0x03f,	MB(4096)  },
6494 		{ 0x07f,	MB(8192)  },
6495 		{ 0x0ff,	MB(16384) },
6496 		{ 0x1ff,	MB(32768) },
6497 		{ 0x3ff,	MB(65536) },
6498 		{ 0x7ff,	MB(131072) }
6499 	};
6500 	static int len = sizeof (uk2segsz) / sizeof (uk2segsz[0]);
6501 
6502 #undef MB
6503 
6504 	uint64_t	 largest_sz = 0;
6505 	drmach_mem_t	*mp;
6506 
6507 	if (!DRMACH_IS_MEM_ID(id))
6508 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6509 
6510 	/* prime the result with a default value */
6511 	*mask = (DRMACH_MEM_SLICE_SIZE - 1);
6512 
6513 	for (mp = id; mp; mp = mp->next) {
6514 		int bank;
6515 
6516 		for (bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
6517 			int		i;
6518 			uint_t		uk;
6519 			uint64_t	madr;
6520 
6521 			/* get register value, extract uk and normalize */
6522 			drmach_mem_read_madr(mp, bank, &madr);
6523 
6524 			if (!(madr & DRMACH_MC_VALID_MASK))
6525 				continue;
6526 
6527 			uk = DRMACH_MC_UK(madr);
6528 
6529 			/* match uk value */
6530 			for (i = 0; i < len; i++)
6531 				if (uk == uk2segsz[i].uk)
6532 					break;
6533 
6534 			if (i < len) {
6535 				uint64_t sz = uk2segsz[i].segsz;
6536 
6537 				/*
6538 				 * remember largest segment size,
6539 				 * update mask result
6540 				 */
6541 				if (sz > largest_sz) {
6542 					largest_sz = sz;
6543 					*mask = sz - 1;
6544 				}
6545 			} else {
6546 				/*
6547 				 * uk not in table, punt using
6548 				 * entire slice size. no longer any
6549 				 * reason to check other banks.
6550 				 */
6551 				*mask = (DRMACH_MEM_SLICE_SIZE - 1);
6552 				return (NULL);
6553 			}
6554 		}
6555 	}
6556 
6557 	return (NULL);
6558 }
6559 
6560 sbd_error_t *
6561 drmach_mem_get_base_physaddr(drmachid_t id, uint64_t *base_addr)
6562 {
6563 	drmach_mem_t *mp;
6564 
6565 	if (!DRMACH_IS_MEM_ID(id))
6566 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6567 
6568 	*base_addr = (uint64_t)-1;
6569 	for (mp = id; mp; mp = mp->next) {
6570 		int bank;
6571 
6572 		for (bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
6573 			uint64_t addr, madr;
6574 
6575 			drmach_mem_read_madr(mp, bank, &madr);
6576 			if (madr & DRMACH_MC_VALID_MASK) {
6577 				addr = DRMACH_MC_UM_TO_PA(madr) |
6578 					DRMACH_MC_LM_TO_PA(madr);
6579 
6580 				if (addr < *base_addr)
6581 					*base_addr = addr;
6582 			}
6583 		}
6584 	}
6585 
6586 	/* should not happen, but ... */
6587 	if (*base_addr == (uint64_t)-1)
6588 		return (DRMACH_INTERNAL_ERROR());
6589 
6590 	return (NULL);
6591 }
6592 
6593 void
6594 drmach_bus_sync_list_update(void)
6595 {
6596 	int		rv, idx, cnt = 0;
6597 	drmachid_t	id;
6598 
6599 	ASSERT(MUTEX_HELD(&drmach_bus_sync_lock));
6600 
6601 	rv = drmach_array_first(drmach_boards, &idx, &id);
6602 	while (rv == 0) {
6603 		drmach_board_t		*bp = id;
6604 		drmach_mem_t		*mp = bp->mem;
6605 
6606 		while (mp) {
6607 			int bank;
6608 
6609 			for (bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
6610 				uint64_t madr;
6611 
6612 				drmach_mem_read_madr(mp, bank, &madr);
6613 				if (madr & DRMACH_MC_VALID_MASK) {
6614 					uint64_t pa;
6615 
6616 					pa  = DRMACH_MC_UM_TO_PA(madr);
6617 					pa |= DRMACH_MC_LM_TO_PA(madr);
6618 
6619 					/*
6620 					 * The list is zero terminated.
6621 					 * Offset the pa by a doubleword
6622 					 * to avoid confusing a pa value of
6623 					 * of zero with the terminator.
6624 					 */
6625 					pa += sizeof (uint64_t);
6626 
6627 					drmach_bus_sync_list[cnt++] = pa;
6628 				}
6629 			}
6630 
6631 			mp = mp->next;
6632 		}
6633 
6634 		rv = drmach_array_next(drmach_boards, &idx, &id);
6635 	}
6636 
6637 	drmach_bus_sync_list[cnt] = 0;
6638 }
6639 
6640 sbd_error_t *
6641 drmach_mem_get_memlist(drmachid_t id, struct memlist **ml)
6642 {
6643 	sbd_error_t	*err;
6644 	struct memlist	*mlist;
6645 	gdcd_t		*gdcd;
6646 	mem_chunk_t	*chunk;
6647 	uint64_t	 chunks, pa, mask;
6648 
6649 	err = drmach_mem_get_base_physaddr(id, &pa);
6650 	if (err)
6651 		return (err);
6652 
6653 	gdcd = drmach_gdcd_new();
6654 	if (gdcd == NULL)
6655 		return (DRMACH_INTERNAL_ERROR());
6656 
6657 	mask = ~ (DRMACH_MEM_SLICE_SIZE - 1);
6658 	pa &= mask;
6659 
6660 	mlist = NULL;
6661 	chunk = gdcd->dcd_chunk_list.dcl_chunk;
6662 	chunks = gdcd->dcd_chunk_list.dcl_chunks;
6663 	while (chunks-- != 0) {
6664 		if ((chunk->mc_base_pa & mask) == pa) {
6665 			mlist = memlist_add_span(mlist,
6666 				chunk->mc_base_pa, chunk->mc_mbytes * 1048576);
6667 		}
6668 
6669 		++chunk;
6670 	}
6671 
6672 	drmach_gdcd_dispose(gdcd);
6673 
6674 #ifdef DEBUG
6675 	DRMACH_PR("GDCD derived memlist:");
6676 	memlist_dump(mlist);
6677 #endif
6678 
6679 	*ml = mlist;
6680 	return (NULL);
6681 }
6682 
6683 sbd_error_t *
6684 drmach_mem_get_size(drmachid_t id, uint64_t *bytes)
6685 {
6686 	drmach_mem_t	*mp;
6687 
6688 	if (!DRMACH_IS_MEM_ID(id))
6689 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6690 	mp = id;
6691 
6692 	ASSERT(mp->nbytes != 0);
6693 	*bytes = mp->nbytes;
6694 
6695 	return (NULL);
6696 }
6697 
6698 sbd_error_t *
6699 drmach_mem_get_slice_size(drmachid_t id, uint64_t *bytes)
6700 {
6701 	sbd_error_t	*err;
6702 	drmach_device_t	*mp;
6703 
6704 	if (!DRMACH_IS_MEM_ID(id))
6705 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6706 	mp = id;
6707 
6708 	switch (DRMACH_BNUM2SLOT(mp->bp->bnum)) {
6709 		case 0:	*bytes = DRMACH_MEM_USABLE_SLICE_SIZE;
6710 			err = NULL;
6711 			break;
6712 
6713 		case 1: *bytes = 0;
6714 			err = NULL;
6715 			break;
6716 
6717 		default:
6718 			err = DRMACH_INTERNAL_ERROR();
6719 			break;
6720 	}
6721 
6722 	return (err);
6723 }
6724 
6725 processorid_t drmach_mem_cpu_affinity_nail;
6726 
6727 processorid_t
6728 drmach_mem_cpu_affinity(drmachid_t id)
6729 {
6730 	drmach_device_t	*mp;
6731 	drmach_board_t	*bp;
6732 	processorid_t	 cpuid;
6733 
6734 	if (!DRMACH_IS_MEM_ID(id))
6735 		return (CPU_CURRENT);
6736 
6737 	if (drmach_mem_cpu_affinity_nail) {
6738 		cpuid = drmach_mem_cpu_affinity_nail;
6739 
6740 		if (cpuid < 0 || cpuid > NCPU)
6741 			return (CPU_CURRENT);
6742 
6743 		mutex_enter(&cpu_lock);
6744 		if (cpu[cpuid] == NULL || !CPU_ACTIVE(cpu[cpuid]))
6745 			cpuid = CPU_CURRENT;
6746 		mutex_exit(&cpu_lock);
6747 
6748 		return (cpuid);
6749 	}
6750 
6751 	/* try to choose a proc on the target board */
6752 	mp = id;
6753 	bp = mp->bp;
6754 	if (bp->devices) {
6755 		int		 rv;
6756 		int		 d_idx;
6757 		drmachid_t	 d_id;
6758 
6759 		rv = drmach_array_first(bp->devices, &d_idx, &d_id);
6760 		while (rv == 0) {
6761 			if (DRMACH_IS_CPU_ID(d_id)) {
6762 				drmach_cpu_t	*cp = d_id;
6763 
6764 				mutex_enter(&cpu_lock);
6765 				cpuid = cp->cpuid;
6766 				if (cpu[cpuid] && CPU_ACTIVE(cpu[cpuid])) {
6767 					mutex_exit(&cpu_lock);
6768 					return (cpuid);
6769 				} else {
6770 					mutex_exit(&cpu_lock);
6771 				}
6772 			}
6773 
6774 			rv = drmach_array_next(bp->devices, &d_idx, &d_id);
6775 		}
6776 	}
6777 
6778 	/* otherwise, this proc, wherever it is */
6779 	return (CPU_CURRENT);
6780 }
6781 
6782 static sbd_error_t *
6783 drmach_mem_release(drmachid_t id)
6784 {
6785 	if (!DRMACH_IS_MEM_ID(id))
6786 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6787 	return (NULL);
6788 }
6789 
6790 static sbd_error_t *
6791 drmach_mem_status(drmachid_t id, drmach_status_t *stat)
6792 {
6793 	drmach_mem_t	*mp;
6794 	sbd_error_t	*err;
6795 	uint64_t	 pa, slice_size;
6796 	struct memlist	*ml;
6797 
6798 	ASSERT(DRMACH_IS_MEM_ID(id));
6799 	mp = id;
6800 
6801 	/* get starting physical address of target memory */
6802 	err = drmach_mem_get_base_physaddr(id, &pa);
6803 	if (err)
6804 		return (err);
6805 
6806 	/* round down to slice boundary */
6807 	slice_size = DRMACH_MEM_SLICE_SIZE;
6808 	pa &= ~ (slice_size - 1);
6809 
6810 	/* stop at first span that is in slice */
6811 	memlist_read_lock();
6812 	for (ml = phys_install; ml; ml = ml->next)
6813 		if (ml->address >= pa && ml->address < pa + slice_size)
6814 			break;
6815 	memlist_read_unlock();
6816 
6817 	stat->assigned = mp->dev.bp->assigned;
6818 	stat->powered = mp->dev.bp->powered;
6819 	stat->configured = (ml != NULL);
6820 	stat->busy = mp->dev.busy;
6821 	strncpy(stat->type, mp->dev.type, sizeof (stat->type));
6822 	stat->info[0] = '\0';
6823 
6824 	return (NULL);
6825 }
6826 
6827 sbd_error_t *
6828 drmach_board_deprobe(drmachid_t id)
6829 {
6830 	drmach_board_t	*bp;
6831 	sbd_error_t	*err = NULL;
6832 
6833 	if (!DRMACH_IS_BOARD_ID(id))
6834 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6835 	bp = id;
6836 
6837 	if (bp->tree) {
6838 		drmach_node_dispose(bp->tree);
6839 		bp->tree = NULL;
6840 	}
6841 	if (bp->devices) {
6842 		drmach_array_dispose(bp->devices, drmach_device_dispose);
6843 		bp->devices = NULL;
6844 		bp->mem = NULL;  /* TODO: still needed? */
6845 	}
6846 	return (err);
6847 }
6848 
6849 /*ARGSUSED1*/
6850 static sbd_error_t *
6851 drmach_pt_showlpa(drmachid_t id, drmach_opts_t *opts)
6852 {
6853 	drmach_device_t	*dp;
6854 	uint64_t	val;
6855 	int		err = 1;
6856 
6857 	if (DRMACH_IS_CPU_ID(id)) {
6858 		drmach_cpu_t *cp = id;
6859 		if (drmach_cpu_read_scr(cp, &val))
6860 			err = 0;
6861 	} else if (DRMACH_IS_IO_ID(id) && ((drmach_io_t *)id)->scsr_pa != 0) {
6862 		drmach_io_t *io = id;
6863 		val = lddphysio(io->scsr_pa);
6864 		err = 0;
6865 	}
6866 	if (err)
6867 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6868 
6869 	dp = id;
6870 	uprintf("showlpa %s::%s portid %d, base pa %lx, bound pa %lx\n",
6871 		dp->bp->cm.name,
6872 		dp->cm.name,
6873 		dp->portid,
6874 		DRMACH_LPA_BASE_TO_PA(val),
6875 		DRMACH_LPA_BND_TO_PA(val));
6876 
6877 	return (NULL);
6878 }
6879 
6880 /*ARGSUSED*/
6881 static sbd_error_t *
6882 drmach_pt_ikprobe(drmachid_t id, drmach_opts_t *opts)
6883 {
6884 
6885 	drmach_board_t		*bp = (drmach_board_t *)id;
6886 
6887 	sbd_error_t		*err;
6888 	sc_gptwocfg_cookie_t	scc;
6889 
6890 	if (!DRMACH_IS_BOARD_ID(id))
6891 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6892 
6893 	/* do saf configurator stuff */
6894 	DRMACH_PR("calling sc_probe_board for bnum=%d\n", bp->bnum);
6895 	scc = sc_probe_board(bp->bnum);
6896 	if (scc == NULL) {
6897 		err = drerr_new(0, ESTC_PROBE, bp->cm.name);
6898 		return (err);
6899 	}
6900 
6901 	return (err);
6902 }
6903 
6904 /*ARGSUSED*/
6905 static sbd_error_t *
6906 drmach_pt_ikdeprobe(drmachid_t id, drmach_opts_t *opts)
6907 {
6908 
6909 	drmach_board_t	*bp;
6910 	sbd_error_t	*err = NULL;
6911 	sc_gptwocfg_cookie_t	scc;
6912 
6913 	if (!DRMACH_IS_BOARD_ID(id))
6914 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6915 	bp = id;
6916 
6917 	cmn_err(CE_CONT, "DR: in-kernel unprobe board %d\n", bp->bnum);
6918 	scc = sc_unprobe_board(bp->bnum);
6919 	if (scc != NULL) {
6920 		err = drerr_new(0, ESTC_DEPROBE, bp->cm.name);
6921 	}
6922 
6923 	if (err == NULL)
6924 		err = drmach_board_deprobe(id);
6925 
6926 	return (err);
6927 
6928 }
6929 
6930 static sbd_error_t *
6931 drmach_pt_readmem(drmachid_t id, drmach_opts_t *opts)
6932 {
6933 	_NOTE(ARGUNUSED(id))
6934 	_NOTE(ARGUNUSED(opts))
6935 
6936 	struct memlist	*ml;
6937 	uint64_t	src_pa;
6938 	uint64_t	dst_pa;
6939 	uint64_t	dst;
6940 
6941 	dst_pa = va_to_pa(&dst);
6942 
6943 	memlist_read_lock();
6944 	for (ml = phys_install; ml; ml = ml->next) {
6945 		uint64_t	nbytes;
6946 
6947 		src_pa = ml->address;
6948 		nbytes = ml->size;
6949 
6950 		while (nbytes != 0ull) {
6951 
6952 			/* copy 32 bytes at src_pa to dst_pa */
6953 			bcopy32_il(src_pa, dst_pa);
6954 
6955 			/* increment by 32 bytes */
6956 			src_pa += (4 * sizeof (uint64_t));
6957 
6958 			/* decrement by 32 bytes */
6959 			nbytes -= (4 * sizeof (uint64_t));
6960 		}
6961 	}
6962 	memlist_read_unlock();
6963 
6964 	return (NULL);
6965 }
6966 
6967 static sbd_error_t *
6968 drmach_pt_recovercpu(drmachid_t id, drmach_opts_t *opts)
6969 {
6970 	_NOTE(ARGUNUSED(opts))
6971 
6972 	drmach_cpu_t	*cp;
6973 
6974 	if (!DRMACH_IS_CPU_ID(id))
6975 		return (drerr_new(0, ESTC_INAPPROP, NULL));
6976 	cp = id;
6977 
6978 	mutex_enter(&cpu_lock);
6979 	(void) drmach_iocage_cpu_return(&(cp->dev),
6980 	    CPU_ENABLE | CPU_EXISTS | CPU_READY | CPU_RUNNING);
6981 	mutex_exit(&cpu_lock);
6982 
6983 	return (NULL);
6984 }
6985 
6986 /*
6987  * Starcat DR passthrus are for debugging purposes only.
6988  */
6989 static struct {
6990 	const char	*name;
6991 	sbd_error_t	*(*handler)(drmachid_t id, drmach_opts_t *opts);
6992 } drmach_pt_arr[] = {
6993 	{ "showlpa",		drmach_pt_showlpa		},
6994 	{ "ikprobe",		drmach_pt_ikprobe		},
6995 	{ "ikdeprobe",		drmach_pt_ikdeprobe		},
6996 	{ "readmem",		drmach_pt_readmem		},
6997 	{ "recovercpu",		drmach_pt_recovercpu		},
6998 
6999 	/* the following line must always be last */
7000 	{ NULL,			NULL				}
7001 };
7002 
7003 /*ARGSUSED*/
7004 sbd_error_t *
7005 drmach_passthru(drmachid_t id, drmach_opts_t *opts)
7006 {
7007 	int		i;
7008 	sbd_error_t	*err;
7009 
7010 	i = 0;
7011 	while (drmach_pt_arr[i].name != NULL) {
7012 		int len = strlen(drmach_pt_arr[i].name);
7013 
7014 		if (strncmp(drmach_pt_arr[i].name, opts->copts, len) == 0)
7015 			break;
7016 
7017 		i += 1;
7018 	}
7019 
7020 	if (drmach_pt_arr[i].name == NULL)
7021 		err = drerr_new(0, ESTC_UNKPTCMD, opts->copts);
7022 	else
7023 		err = (*drmach_pt_arr[i].handler)(id, opts);
7024 
7025 	return (err);
7026 }
7027 
7028 sbd_error_t *
7029 drmach_release(drmachid_t id)
7030 {
7031 	drmach_common_t *cp;
7032 
7033 	if (!DRMACH_IS_DEVICE_ID(id))
7034 		return (drerr_new(0, ESTC_INAPPROP, NULL));
7035 	cp = id;
7036 
7037 	return (cp->release(id));
7038 }
7039 
7040 sbd_error_t *
7041 drmach_status(drmachid_t id, drmach_status_t *stat)
7042 {
7043 	drmach_common_t *cp;
7044 	sbd_error_t	*err;
7045 
7046 	rw_enter(&drmach_boards_rwlock, RW_READER);
7047 
7048 	if (!DRMACH_IS_ID(id)) {
7049 		rw_exit(&drmach_boards_rwlock);
7050 		return (drerr_new(0, ESTC_NOTID, NULL));
7051 	}
7052 
7053 	cp = id;
7054 
7055 	err = cp->status(id, stat);
7056 	rw_exit(&drmach_boards_rwlock);
7057 	return (err);
7058 }
7059 
7060 static sbd_error_t *
7061 drmach_i_status(drmachid_t id, drmach_status_t *stat)
7062 {
7063 	drmach_common_t *cp;
7064 
7065 	if (!DRMACH_IS_ID(id))
7066 		return (drerr_new(0, ESTC_NOTID, NULL));
7067 	cp = id;
7068 
7069 	return (cp->status(id, stat));
7070 }
7071 
7072 /*ARGSUSED*/
7073 sbd_error_t *
7074 drmach_unconfigure(drmachid_t id, int flags)
7075 {
7076 	drmach_device_t	*dp;
7077 	dev_info_t 	*rdip;
7078 
7079 	char	name[OBP_MAXDRVNAME];
7080 	int rv;
7081 
7082 	/*
7083 	 * Since CPU nodes are not configured, it is
7084 	 * necessary to skip the unconfigure step as
7085 	 * well.
7086 	 */
7087 	if (DRMACH_IS_CPU_ID(id)) {
7088 		return (NULL);
7089 	}
7090 
7091 	for (; id; ) {
7092 		dev_info_t	*fdip = NULL;
7093 
7094 		if (!DRMACH_IS_DEVICE_ID(id))
7095 			return (drerr_new(0, ESTC_INAPPROP, NULL));
7096 		dp = id;
7097 
7098 		rdip = dp->node->n_getdip(dp->node);
7099 
7100 		/*
7101 		 * drmach_unconfigure() is always called on a configured branch.
7102 		 * So the root of the branch was held earlier and must exist.
7103 		 */
7104 		ASSERT(rdip);
7105 
7106 		DRMACH_PR("drmach_unconfigure: unconfiguring DDI branch");
7107 
7108 		rv = dp->node->n_getprop(dp->node,
7109 		    "name", name, OBP_MAXDRVNAME);
7110 
7111 		/* The node must have a name */
7112 		if (rv)
7113 			return (0);
7114 
7115 		if (drmach_name2type_idx(name) < 0) {
7116 			if (DRMACH_IS_MEM_ID(id)) {
7117 				drmach_mem_t	*mp = id;
7118 				id = mp->next;
7119 			} else {
7120 				id = NULL;
7121 			}
7122 			continue;
7123 		}
7124 
7125 		/*
7126 		 * NOTE: FORCE flag is no longer needed under devfs
7127 		 */
7128 		ASSERT(e_ddi_branch_held(rdip));
7129 		if (e_ddi_branch_unconfigure(rdip, &fdip, 0) != 0) {
7130 			sbd_error_t	*err = NULL;
7131 			char		*path = kmem_alloc(MAXPATHLEN,
7132 					    KM_SLEEP);
7133 
7134 			/*
7135 			 * If non-NULL, fdip is returned held and must be
7136 			 * released.
7137 			 */
7138 			if (fdip != NULL) {
7139 				(void) ddi_pathname(fdip, path);
7140 				ddi_release_devi(fdip);
7141 			} else {
7142 				(void) ddi_pathname(rdip, path);
7143 			}
7144 
7145 			err = drerr_new(1, ESTC_DRVFAIL, path);
7146 
7147 			kmem_free(path, MAXPATHLEN);
7148 
7149 			/*
7150 			 * If we were unconfiguring an IO board, a call was
7151 			 * made to man_dr_detach.  We now need to call
7152 			 * man_dr_attach to regain man use of the eri.
7153 			 */
7154 			if (DRMACH_IS_IO_ID(id)) {
7155 				int (*func)(dev_info_t *dip);
7156 
7157 				func = (int (*)(dev_info_t *))kobj_getsymvalue\
7158 					("man_dr_attach", 0);
7159 
7160 				if (func) {
7161 					drmach_io_inst_t ios;
7162 					dev_info_t 	*pdip;
7163 					int		circ;
7164 
7165 					/*
7166 					 * Walk device tree to find rio dip for
7167 					 * the board
7168 					 * Since we are not interested in iosram
7169 					 * instance here, initialize it to 0, so
7170 					 * that the walk terminates as soon as
7171 					 * eri dip is found.
7172 					 */
7173 					ios.iosram_inst = 0;
7174 					ios.eri_dip = NULL;
7175 					ios.bnum = dp->bp->bnum;
7176 
7177 					if (pdip = ddi_get_parent(rdip)) {
7178 						ndi_hold_devi(pdip);
7179 						ndi_devi_enter(pdip, &circ);
7180 					}
7181 					/*
7182 					 * Root node doesn't have to be held in
7183 					 * any way.
7184 					 */
7185 					ASSERT(e_ddi_branch_held(rdip));
7186 					ddi_walk_devs(rdip,
7187 						drmach_board_find_io_insts,
7188 						(void *)&ios);
7189 
7190 					DRMACH_PR("drmach_unconfigure: bnum=%d"
7191 						" eri=0x%p\n",
7192 						ios.bnum, ios.eri_dip);
7193 
7194 					if (pdip) {
7195 						ndi_devi_exit(pdip, circ);
7196 						ndi_rele_devi(pdip);
7197 					}
7198 
7199 					if (ios.eri_dip) {
7200 						DRMACH_PR("calling"
7201 							" man_dr_attach\n");
7202 						(void) (*func)(ios.eri_dip);
7203 						/*
7204 						 * Release hold acquired in
7205 						 * drmach_board_find_io_insts()
7206 						 */
7207 						ndi_rele_devi(ios.eri_dip);
7208 					}
7209 				}
7210 			}
7211 			return (err);
7212 		}
7213 
7214 		if (DRMACH_IS_MEM_ID(id)) {
7215 			drmach_mem_t	*mp = id;
7216 			id = mp->next;
7217 		} else {
7218 			id = NULL;
7219 		}
7220 	}
7221 
7222 	return (NULL);
7223 }
7224 
7225 /*
7226  * drmach interfaces to legacy Starfire platmod logic
7227  * linkage via runtime symbol look up, called from plat_cpu_power*
7228  */
7229 
7230 /*
7231  * Start up a cpu.  It is possible that we're attempting to restart
7232  * the cpu after an UNCONFIGURE in which case the cpu will be
7233  * spinning in its cache.  So, all we have to do is wakeup him up.
7234  * Under normal circumstances the cpu will be coming from a previous
7235  * CONNECT and thus will be spinning in OBP.  In both cases, the
7236  * startup sequence is the same.
7237  */
7238 int
7239 drmach_cpu_poweron(struct cpu *cp)
7240 {
7241 	DRMACH_PR("drmach_cpu_poweron: starting cpuid %d\n", cp->cpu_id);
7242 
7243 	ASSERT(MUTEX_HELD(&cpu_lock));
7244 
7245 	if (drmach_cpu_start(cp) != 0)
7246 		return (EBUSY);
7247 	else
7248 		return (0);
7249 }
7250 
7251 int
7252 drmach_cpu_poweroff(struct cpu *cp)
7253 {
7254 	int		ntries;
7255 	processorid_t	cpuid;
7256 	void		drmach_cpu_shutdown_self(void);
7257 
7258 	DRMACH_PR("drmach_cpu_poweroff: stopping cpuid %d\n", cp->cpu_id);
7259 
7260 	ASSERT(MUTEX_HELD(&cpu_lock));
7261 
7262 	/*
7263 	 * XXX CHEETAH SUPPORT
7264 	 * for cheetah, we need to grab the iocage lock since iocage
7265 	 * memory is used for e$ flush.
7266 	 */
7267 	if (drmach_is_cheetah) {
7268 		mutex_enter(&drmach_iocage_lock);
7269 		while (drmach_iocage_is_busy)
7270 			cv_wait(&drmach_iocage_cv, &drmach_iocage_lock);
7271 		drmach_iocage_is_busy = 1;
7272 		drmach_iocage_mem_scrub(ecache_size * 2);
7273 		mutex_exit(&drmach_iocage_lock);
7274 	}
7275 
7276 	cpuid = cp->cpu_id;
7277 
7278 	/*
7279 	 * Set affinity to ensure consistent reading and writing of
7280 	 * drmach_xt_mb[cpuid] by one "master" CPU directing
7281 	 * the shutdown of the target CPU.
7282 	 */
7283 	affinity_set(CPU->cpu_id);
7284 
7285 	/*
7286 	 * Capture all CPUs (except for detaching proc) to prevent
7287 	 * crosscalls to the detaching proc until it has cleared its
7288 	 * bit in cpu_ready_set.
7289 	 *
7290 	 * The CPUs remain paused and the prom_mutex is known to be free.
7291 	 * This prevents blocking when doing prom IEEE-1275 calls at a
7292 	 * high PIL level.
7293 	 */
7294 	promsafe_pause_cpus();
7295 
7296 	/*
7297 	 * Quiesce interrupts on the target CPU. We do this by setting
7298 	 * the CPU 'not ready'- (i.e. removing the CPU from cpu_ready_set) to
7299 	 * prevent it from receiving cross calls and cross traps.
7300 	 * This prevents the processor from receiving any new soft interrupts.
7301 	 */
7302 	mp_cpu_quiesce(cp);
7303 
7304 	prom_hotremovecpu(cpuid);
7305 
7306 	start_cpus();
7307 
7308 	/* setup xt_mb, will be cleared by drmach_shutdown_asm when ready */
7309 	drmach_xt_mb[cpuid] = 0x80;
7310 
7311 	xt_one_unchecked(cp->cpu_id, (xcfunc_t *)idle_stop_xcall,
7312 		(uint64_t)drmach_cpu_shutdown_self, NULL);
7313 
7314 	ntries = drmach_cpu_ntries;
7315 	while (drmach_xt_mb[cpuid] && ntries) {
7316 		DELAY(drmach_cpu_delay);
7317 		ntries--;
7318 	}
7319 
7320 	drmach_xt_mb[cpuid] = 0;	/* steal the cache line back */
7321 
7322 	membar_sync();			/* make sure copy-back retires */
7323 
7324 	affinity_clear();
7325 
7326 	/*
7327 	 * XXX CHEETAH SUPPORT
7328 	 */
7329 	if (drmach_is_cheetah) {
7330 		mutex_enter(&drmach_iocage_lock);
7331 		drmach_iocage_mem_scrub(ecache_size * 2);
7332 		drmach_iocage_is_busy = 0;
7333 		cv_signal(&drmach_iocage_cv);
7334 		mutex_exit(&drmach_iocage_lock);
7335 	}
7336 
7337 	DRMACH_PR("waited %d out of %d tries for "
7338 		"drmach_cpu_shutdown_self on cpu%d",
7339 		drmach_cpu_ntries - ntries, drmach_cpu_ntries, cp->cpu_id);
7340 
7341 	/*
7342 	 * Do this here instead of drmach_cpu_shutdown_self() to
7343 	 * avoid an assertion failure panic in turnstile.c.
7344 	 */
7345 	CPU_SIGNATURE(OS_SIG, SIGST_DETACHED, SIGSUBST_NULL, cpuid);
7346 
7347 	return (0);
7348 }
7349 
7350 void
7351 drmach_iocage_mem_scrub(uint64_t nbytes)
7352 {
7353 	extern int drmach_bc_bzero(void*, size_t);
7354 	int	rv;
7355 
7356 	ASSERT(MUTEX_HELD(&cpu_lock));
7357 
7358 	affinity_set(CPU->cpu_id);
7359 
7360 	rv = drmach_bc_bzero(drmach_iocage_vaddr, nbytes);
7361 	if (rv != 0) {
7362 		DRMACH_PR(
7363 		"iocage scrub failed, drmach_bc_bzero returned %d\n", rv);
7364 		rv = drmach_bc_bzero(drmach_iocage_vaddr,
7365 			drmach_iocage_size);
7366 		if (rv != 0)
7367 			cmn_err(CE_PANIC,
7368 			    "iocage scrub failed, drmach_bc_bzero rv=%d\n",
7369 			    rv);
7370 	}
7371 
7372 	cpu_flush_ecache();
7373 
7374 	affinity_clear();
7375 }
7376 
7377 #define	ALIGN(x, a)	((a) == 0 ? (uintptr_t)(x) : \
7378 	(((uintptr_t)(x) + (uintptr_t)(a) - 1l) & ~((uintptr_t)(a) - 1l)))
7379 
7380 static sbd_error_t *
7381 drmach_iocage_mem_get(dr_testboard_req_t *tbrq)
7382 {
7383 	pfn_t		basepfn;
7384 	pgcnt_t		npages;
7385 	extern int	memscrub_delete_span(pfn_t, pgcnt_t);
7386 	uint64_t	drmach_iocage_paddr_mbytes;
7387 
7388 	ASSERT(drmach_iocage_paddr != -1);
7389 
7390 	basepfn = (pfn_t)(drmach_iocage_paddr >> PAGESHIFT);
7391 	npages = (pgcnt_t)(drmach_iocage_size >> PAGESHIFT);
7392 
7393 	memscrub_delete_span(basepfn, npages);
7394 
7395 	mutex_enter(&cpu_lock);
7396 	drmach_iocage_mem_scrub(drmach_iocage_size);
7397 	mutex_exit(&cpu_lock);
7398 
7399 	/*
7400 	 * HPOST wants the address of the cage to be 64 megabyte-aligned
7401 	 * and in megabyte units.
7402 	 * The size of the cage is also in megabyte units.
7403 	 */
7404 	ASSERT(drmach_iocage_paddr == ALIGN(drmach_iocage_paddr, 0x4000000));
7405 
7406 	drmach_iocage_paddr_mbytes = drmach_iocage_paddr / 0x100000;
7407 
7408 	tbrq->memaddrhi = (uint32_t)(drmach_iocage_paddr_mbytes >> 32);
7409 	tbrq->memaddrlo = (uint32_t)drmach_iocage_paddr_mbytes;
7410 	tbrq->memlen = drmach_iocage_size / 0x100000;
7411 
7412 	DRMACH_PR("drmach_iocage_mem_get: hi: 0x%x", tbrq->memaddrhi);
7413 	DRMACH_PR("drmach_iocage_mem_get: lo: 0x%x", tbrq->memaddrlo);
7414 	DRMACH_PR("drmach_iocage_mem_get: size: 0x%x", tbrq->memlen);
7415 
7416 	return (NULL);
7417 }
7418 
7419 static sbd_error_t *
7420 drmach_iocage_mem_return(dr_testboard_reply_t *tbr)
7421 {
7422 	_NOTE(ARGUNUSED(tbr))
7423 
7424 	pfn_t		basepfn;
7425 	pgcnt_t		npages;
7426 	extern int	memscrub_add_span(pfn_t, pgcnt_t);
7427 
7428 	ASSERT(drmach_iocage_paddr != -1);
7429 
7430 	basepfn = (pfn_t)(drmach_iocage_paddr >> PAGESHIFT);
7431 	npages = (pgcnt_t)(drmach_iocage_size >> PAGESHIFT);
7432 
7433 	memscrub_add_span(basepfn, npages);
7434 
7435 	mutex_enter(&cpu_lock);
7436 	mutex_enter(&drmach_iocage_lock);
7437 	drmach_iocage_mem_scrub(drmach_iocage_size);
7438 	drmach_iocage_is_busy = 0;
7439 	cv_signal(&drmach_iocage_cv);
7440 	mutex_exit(&drmach_iocage_lock);
7441 	mutex_exit(&cpu_lock);
7442 
7443 	return (NULL);
7444 }
7445 
7446 static int
7447 drmach_cpu_intr_disable(cpu_t *cp)
7448 {
7449 	if (cpu_intr_disable(cp) != 0)
7450 		return (-1);
7451 	return (0);
7452 }
7453 
7454 static int
7455 drmach_iocage_cpu_acquire(drmach_device_t *dp, cpu_flag_t *oflags)
7456 {
7457 	struct cpu	*cp;
7458 	processorid_t	cpuid;
7459 	static char	*fn = "drmach_iocage_cpu_acquire";
7460 	sbd_error_t 	*err;
7461 	int 		impl;
7462 
7463 	ASSERT(DRMACH_IS_CPU_ID(dp));
7464 	ASSERT(MUTEX_HELD(&cpu_lock));
7465 
7466 	cpuid = ((drmach_cpu_t *)dp)->cpuid;
7467 
7468 	DRMACH_PR("%s: attempting to acquire CPU id %d", fn, cpuid);
7469 
7470 	if (dp->busy)
7471 		return (-1);
7472 
7473 	if ((cp = cpu_get(cpuid)) == NULL) {
7474 		DRMACH_PR("%s: cpu_get(%d) returned NULL", fn, cpuid);
7475 		return (-1);
7476 	}
7477 
7478 	if (!CPU_ACTIVE(cp)) {
7479 		DRMACH_PR("%s: skipping offlined CPU id %d", fn, cpuid);
7480 		return (-1);
7481 	}
7482 
7483 	/*
7484 	 * There is a known HW bug where a Jaguar CPU in Safari port 0 (SBX/P0)
7485 	 * can fail to receive an XIR. To workaround this issue until a hardware
7486 	 * fix is implemented, we will exclude the selection of these CPUs.
7487 	 *
7488 	 * Once a fix is implemented in hardware, this code should be updated
7489 	 * to allow Jaguar CPUs that have the fix to be used. However, support
7490 	 * must be retained to skip revisions that do not have this fix.
7491 	 */
7492 
7493 	err = drmach_cpu_get_impl(dp, &impl);
7494 	if (err) {
7495 		DRMACH_PR("%s: error getting impl. of CPU id %d", fn, cpuid);
7496 		sbd_err_clear(&err);
7497 		return (-1);
7498 	}
7499 
7500 	if (IS_JAGUAR(impl) && (STARCAT_CPUID_TO_LPORT(cpuid) == 0) &&
7501 	    drmach_iocage_exclude_jaguar_port_zero) {
7502 		DRMACH_PR("%s: excluding CPU id %d: port 0 on jaguar",
7503 		    fn, cpuid);
7504 		return (-1);
7505 	}
7506 
7507 	ASSERT(oflags);
7508 	*oflags = cp->cpu_flags;
7509 
7510 	if (cpu_offline(cp, 0)) {
7511 		DRMACH_PR("%s: cpu_offline failed for CPU id %d", fn, cpuid);
7512 		return (-1);
7513 	}
7514 
7515 	if (cpu_poweroff(cp)) {
7516 		DRMACH_PR("%s: cpu_poweroff failed for CPU id %d", fn, cpuid);
7517 		if (cpu_online(cp)) {
7518 			cmn_err(CE_WARN, "failed to online CPU id %d "
7519 			    "during I/O cage test selection", cpuid);
7520 		}
7521 		if (CPU_ACTIVE(cp) && cpu_flagged_nointr(*oflags) &&
7522 		    drmach_cpu_intr_disable(cp) != 0) {
7523 			cmn_err(CE_WARN, "failed to restore CPU id %d "
7524 			    "no-intr during I/O cage test selection", cpuid);
7525 		}
7526 		return (-1);
7527 	}
7528 
7529 	if (cpu_unconfigure(cpuid)) {
7530 		DRMACH_PR("%s: cpu_unconfigure failed for CPU id %d", fn,
7531 		    cpuid);
7532 		(void) cpu_configure(cpuid);
7533 		if ((cp = cpu_get(cpuid)) == NULL) {
7534 			cmn_err(CE_WARN, "failed to reconfigure CPU id %d "
7535 			    "during I/O cage test selection", cpuid);
7536 			dp->busy = 1;
7537 			return (-1);
7538 		}
7539 		if (cpu_poweron(cp) || cpu_online(cp)) {
7540 			cmn_err(CE_WARN, "failed to %s CPU id %d "
7541 			    "during I/O cage test selection",
7542 			    cpu_is_poweredoff(cp) ?
7543 			    "poweron" : "online", cpuid);
7544 		}
7545 		if (CPU_ACTIVE(cp) && cpu_flagged_nointr(*oflags) &&
7546 		    drmach_cpu_intr_disable(cp) != 0) {
7547 			cmn_err(CE_WARN, "failed to restore CPU id %d "
7548 			    "no-intr during I/O cage test selection", cpuid);
7549 		}
7550 		return (-1);
7551 	}
7552 
7553 	dp->busy = 1;
7554 
7555 	DRMACH_PR("%s: acquired CPU id %d", fn, cpuid);
7556 
7557 	return (0);
7558 }
7559 
7560 /*
7561  * Attempt to acquire all the CPU devices passed in. It is
7562  * assumed that all the devices in the list are the cores of
7563  * a single CMP device. Non CMP devices can be handled as a
7564  * single core CMP by passing in a one element list.
7565  *
7566  * Success is only returned if *all* the devices in the list
7567  * can be acquired. In the failure case, none of the devices
7568  * in the list will be held as acquired.
7569  */
7570 static int
7571 drmach_iocage_cmp_acquire(drmach_device_t **dpp, cpu_flag_t *oflags)
7572 {
7573 	int	curr;
7574 	int	i;
7575 	int	rv = 0;
7576 
7577 	ASSERT((dpp != NULL) && (*dpp != NULL));
7578 
7579 	/*
7580 	 * Walk the list of CPU devices (cores of a CMP)
7581 	 * and attempt to acquire them. Bail out if an
7582 	 * error is encountered.
7583 	 */
7584 	for (curr = 0; curr < MAX_CORES_PER_CMP; curr++) {
7585 
7586 		/* check for the end of the list */
7587 		if (dpp[curr] == NULL) {
7588 			break;
7589 		}
7590 
7591 		ASSERT(DRMACH_IS_CPU_ID(dpp[curr]));
7592 		ASSERT(dpp[curr]->portid == (*dpp)->portid);
7593 
7594 		rv = drmach_iocage_cpu_acquire(dpp[curr], &oflags[curr]);
7595 		if (rv != 0) {
7596 			break;
7597 		}
7598 	}
7599 
7600 	/*
7601 	 * Check for an error.
7602 	 */
7603 	if (rv != 0) {
7604 		/*
7605 		 * Make a best effort attempt to return any cores
7606 		 * that were already acquired before the error was
7607 		 * encountered.
7608 		 */
7609 		for (i = 0; i < curr; i++) {
7610 			(void) drmach_iocage_cpu_return(dpp[i], oflags[i]);
7611 		}
7612 	}
7613 
7614 	return (rv);
7615 }
7616 
7617 static int
7618 drmach_iocage_cpu_return(drmach_device_t *dp, cpu_flag_t oflags)
7619 {
7620 	processorid_t	cpuid;
7621 	struct cpu	*cp;
7622 	int		rv = 0;
7623 	static char	*fn = "drmach_iocage_cpu_return";
7624 
7625 	ASSERT(DRMACH_IS_CPU_ID(dp));
7626 	ASSERT(MUTEX_HELD(&cpu_lock));
7627 
7628 	cpuid = ((drmach_cpu_t *)dp)->cpuid;
7629 
7630 	DRMACH_PR("%s: attempting to return CPU id: %d", fn, cpuid);
7631 
7632 	if (cpu_configure(cpuid)) {
7633 		cmn_err(CE_WARN, "failed to reconfigure CPU id %d "
7634 		    "after I/O cage test", cpuid);
7635 		/*
7636 		 * The component was never set to unconfigured during the IO
7637 		 * cage test, so we need to leave marked as busy to prevent
7638 		 * further DR operations involving this component.
7639 		 */
7640 		return (-1);
7641 	}
7642 
7643 	if ((cp = cpu_get(cpuid)) == NULL) {
7644 		cmn_err(CE_WARN, "cpu_get failed on CPU id %d after "
7645 		    "I/O cage test", cpuid);
7646 		dp->busy = 0;
7647 		return (-1);
7648 	}
7649 
7650 	if (cpu_poweron(cp) || cpu_online(cp)) {
7651 		cmn_err(CE_WARN, "failed to %s CPU id %d after I/O "
7652 		    "cage test", cpu_is_poweredoff(cp) ?
7653 		    "poweron" : "online", cpuid);
7654 		rv = -1;
7655 	}
7656 
7657 	/*
7658 	 * drmach_iocage_cpu_acquire will accept cpus in state P_ONLINE or
7659 	 * P_NOINTR. Need to return to previous user-visible state.
7660 	 */
7661 	if (CPU_ACTIVE(cp) && cpu_flagged_nointr(oflags) &&
7662 	    drmach_cpu_intr_disable(cp) != 0) {
7663 		cmn_err(CE_WARN, "failed to restore CPU id %d "
7664 		    "no-intr after I/O cage test", cpuid);
7665 		rv = -1;
7666 	}
7667 
7668 	dp->busy = 0;
7669 
7670 	DRMACH_PR("%s: returned CPU id: %d", fn, cpuid);
7671 
7672 	return (rv);
7673 }
7674 
7675 static sbd_error_t *
7676 drmach_iocage_cpu_get(dr_testboard_req_t *tbrq, drmach_device_t **dpp,
7677     cpu_flag_t *oflags)
7678 {
7679 	drmach_board_t	*bp;
7680 	int		b_rv;
7681 	int		b_idx;
7682 	drmachid_t	b_id;
7683 	int		found;
7684 
7685 	mutex_enter(&cpu_lock);
7686 
7687 	ASSERT(drmach_boards != NULL);
7688 
7689 	found = 0;
7690 
7691 	/*
7692 	 * Walk the board list.
7693 	 */
7694 	b_rv = drmach_array_first(drmach_boards, &b_idx, &b_id);
7695 
7696 	while (b_rv == 0) {
7697 
7698 		int		d_rv;
7699 		int		d_idx;
7700 		drmachid_t	d_id;
7701 
7702 		bp = b_id;
7703 
7704 		if (bp->connected == 0 || bp->devices == NULL) {
7705 			b_rv = drmach_array_next(drmach_boards, &b_idx, &b_id);
7706 			continue;
7707 		}
7708 
7709 		/* An AXQ restriction disqualifies MCPU's as candidates. */
7710 		if (DRMACH_BNUM2SLOT(bp->bnum) == 1) {
7711 			b_rv = drmach_array_next(drmach_boards, &b_idx, &b_id);
7712 			continue;
7713 		}
7714 
7715 		/*
7716 		 * Walk the device list of this board.
7717 		 */
7718 		d_rv = drmach_array_first(bp->devices, &d_idx, &d_id);
7719 
7720 		while (d_rv == 0) {
7721 
7722 			drmach_device_t	*ndp;
7723 
7724 			/* only interested in CPU devices */
7725 			if (!DRMACH_IS_CPU_ID(d_id)) {
7726 				d_rv = drmach_array_next(bp->devices, &d_idx,
7727 				    &d_id);
7728 				continue;
7729 			}
7730 
7731 			/*
7732 			 * The following code assumes two properties
7733 			 * of a CMP device:
7734 			 *
7735 			 *   1. All cores of a CMP are grouped together
7736 			 *	in the device list.
7737 			 *
7738 			 *   2. There will only be a maximum of two cores
7739 			 *	present in the CMP.
7740 			 *
7741 			 * If either of these two properties change,
7742 			 * this code will have to be revisited.
7743 			 */
7744 
7745 			dpp[0] = d_id;
7746 			dpp[1] = NULL;
7747 
7748 			/*
7749 			 * Get the next device. It may or may not be used.
7750 			 */
7751 			d_rv = drmach_array_next(bp->devices, &d_idx, &d_id);
7752 			ndp = d_id;
7753 
7754 			if ((d_rv == 0) && DRMACH_IS_CPU_ID(d_id)) {
7755 				/*
7756 				 * The second device is only interesting for
7757 				 * this pass if it has the same portid as the
7758 				 * first device. This implies that both are
7759 				 * cores of the same CMP.
7760 				 */
7761 				if (dpp[0]->portid == ndp->portid) {
7762 					dpp[1] = d_id;
7763 				}
7764 			}
7765 
7766 			/*
7767 			 * Attempt to acquire all cores of the CMP.
7768 			 */
7769 			if (drmach_iocage_cmp_acquire(dpp, oflags) == 0) {
7770 				found = 1;
7771 				break;
7772 			}
7773 
7774 			/*
7775 			 * Check if the search for the second core was
7776 			 * successful. If not, the next iteration should
7777 			 * use that device.
7778 			 */
7779 			if (dpp[1] == NULL) {
7780 				continue;
7781 			}
7782 
7783 			d_rv = drmach_array_next(bp->devices, &d_idx, &d_id);
7784 		}
7785 
7786 		if (found)
7787 			break;
7788 
7789 		b_rv = drmach_array_next(drmach_boards, &b_idx, &b_id);
7790 	}
7791 
7792 	mutex_exit(&cpu_lock);
7793 
7794 	if (!found) {
7795 		return (drerr_new(1, ESTC_IOCAGE_NO_CPU_AVAIL, NULL));
7796 	}
7797 
7798 	tbrq->cpu_portid = (*dpp)->portid;
7799 
7800 	return (NULL);
7801 }
7802 
7803 /*
7804  * Setup an iocage by acquiring a cpu and memory.
7805  */
7806 static sbd_error_t *
7807 drmach_iocage_setup(dr_testboard_req_t *tbrq, drmach_device_t **dpp,
7808     cpu_flag_t *oflags)
7809 {
7810 	sbd_error_t *err;
7811 
7812 	err = drmach_iocage_cpu_get(tbrq, dpp, oflags);
7813 	if (!err) {
7814 		mutex_enter(&drmach_iocage_lock);
7815 		while (drmach_iocage_is_busy)
7816 			cv_wait(&drmach_iocage_cv, &drmach_iocage_lock);
7817 		drmach_iocage_is_busy = 1;
7818 		mutex_exit(&drmach_iocage_lock);
7819 		err = drmach_iocage_mem_get(tbrq);
7820 		if (err) {
7821 			mutex_enter(&drmach_iocage_lock);
7822 			drmach_iocage_is_busy = 0;
7823 			cv_signal(&drmach_iocage_cv);
7824 			mutex_exit(&drmach_iocage_lock);
7825 		}
7826 	}
7827 	return (err);
7828 }
7829 
7830 #define	DRMACH_SCHIZO_PCI_LEAF_MAX	2
7831 #define	DRMACH_SCHIZO_PCI_SLOT_MAX	8
7832 #define	DRMACH_S1P_SAMPLE_MAX		2
7833 
7834 typedef enum {
7835 	DRMACH_POST_SUSPEND = 0,
7836 	DRMACH_PRE_RESUME
7837 } drmach_sr_iter_t;
7838 
7839 typedef struct {
7840 	dev_info_t	*dip;
7841 	uint32_t	portid;
7842 	uint32_t	pcr_sel_save;
7843 	uint32_t	pic_l2_io_q[DRMACH_S1P_SAMPLE_MAX];
7844 	uint64_t	reg_basepa;
7845 } drmach_s1p_axq_t;
7846 
7847 typedef struct {
7848 	dev_info_t		*dip;
7849 	uint32_t		portid;
7850 	uint64_t		csr_basepa;
7851 	struct {
7852 		uint64_t 	slot_intr_state_diag;
7853 		uint64_t 	obio_intr_state_diag;
7854 		uint_t		nmap_regs;
7855 		uint64_t	*intr_map_regs;
7856 	} regs[DRMACH_S1P_SAMPLE_MAX];
7857 } drmach_s1p_pci_t;
7858 
7859 typedef struct {
7860 	uint64_t		csr_basepa;
7861 	struct {
7862 		uint64_t	csr;
7863 		uint64_t	errctrl;
7864 		uint64_t	errlog;
7865 	} regs[DRMACH_S1P_SAMPLE_MAX];
7866 	drmach_s1p_pci_t	pci[DRMACH_SCHIZO_PCI_LEAF_MAX];
7867 } drmach_s1p_schizo_t;
7868 
7869 typedef struct {
7870 	drmach_s1p_axq_t	axq;
7871 	drmach_s1p_schizo_t	schizo[STARCAT_SLOT1_IO_MAX];
7872 } drmach_slot1_pause_t;
7873 
7874 /*
7875  * Table of saved state for paused slot1 devices.
7876  */
7877 static drmach_slot1_pause_t *drmach_slot1_paused[STARCAT_BDSET_MAX];
7878 static int drmach_slot1_pause_init = 1;
7879 
7880 #ifdef DEBUG
7881 int drmach_slot1_pause_debug = 1;
7882 #else
7883 int drmach_slot1_pause_debug = 0;
7884 #endif /* DEBUG */
7885 
7886 static int
7887 drmach_is_slot1_pause_axq(dev_info_t *dip, char *name, int *id, uint64_t *reg)
7888 {
7889 	int		portid, exp, slot, i;
7890 	drmach_reg_t	regs[2];
7891 	int		reglen = sizeof (regs);
7892 
7893 	if ((portid = ddi_getprop(DDI_DEV_T_ANY, dip,
7894 	    DDI_PROP_DONTPASS, "portid", -1)) == -1) {
7895 		return (0);
7896 	}
7897 
7898 	exp = (portid >> 5) & 0x1f;
7899 	slot = portid & 0x1;
7900 
7901 	if (slot == 0 || strncmp(name, DRMACH_AXQ_NAMEPROP,
7902 	    strlen(DRMACH_AXQ_NAMEPROP))) {
7903 		return (0);
7904 	}
7905 
7906 	mutex_enter(&cpu_lock);
7907 	for (i = 0; i < STARCAT_SLOT1_CPU_MAX; i++) {
7908 		if (cpu[MAKE_CPUID(exp, slot, i)]) {
7909 			/* maxcat cpu present */
7910 			mutex_exit(&cpu_lock);
7911 			return (0);
7912 		}
7913 	}
7914 	mutex_exit(&cpu_lock);
7915 
7916 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
7917 	    "reg", (caddr_t)regs, &reglen) != DDI_PROP_SUCCESS) {
7918 		DRMACH_PR("drmach_is_slot1_pause_axq: no reg prop for "
7919 		    "axq dip=%p\n", dip);
7920 		return (0);
7921 	}
7922 
7923 	ASSERT(id && reg);
7924 	*reg = (uint64_t)regs[0].reg_addr_hi << 32;
7925 	*reg |= (uint64_t)regs[0].reg_addr_lo;
7926 	*id = portid;
7927 
7928 	return (1);
7929 }
7930 
7931 /*
7932  * Allocate an entry in the slot1_paused state table.
7933  */
7934 static void
7935 drmach_slot1_pause_add_axq(dev_info_t *axq_dip, char *axq_name, int axq_portid,
7936     uint64_t reg, drmach_slot1_pause_t **slot1_paused)
7937 {
7938 	int	axq_exp;
7939 	drmach_slot1_pause_t *slot1;
7940 
7941 	axq_exp = (axq_portid >> 5) & 0x1f;
7942 
7943 	ASSERT(axq_portid & 0x1);
7944 	ASSERT(slot1_paused[axq_exp] == NULL);
7945 	ASSERT(strncmp(axq_name, DRMACH_AXQ_NAMEPROP,
7946 	    strlen(DRMACH_AXQ_NAMEPROP)) == 0);
7947 
7948 	slot1 = kmem_zalloc(sizeof (*slot1), KM_SLEEP);
7949 
7950 	/*
7951 	 * XXX This dip should really be held (via ndi_hold_devi())
7952 	 * before saving it in the axq pause structure. However that
7953 	 * would prevent DR as the pause data structures persist until
7954 	 * the next suspend. drmach code should be modified to free the
7955 	 * the slot 1 pause data structures for a boardset when its
7956 	 * slot 1 board is DRed out. The dip can then be released via
7957 	 * ndi_rele_devi() when the pause data structure is freed
7958 	 * allowing DR to proceed. Until this change is made, drmach
7959 	 * code should be careful about dereferencing the saved dip
7960 	 * as it may no longer exist.
7961 	 */
7962 	slot1->axq.dip = axq_dip;
7963 	slot1->axq.portid = axq_portid;
7964 	slot1->axq.reg_basepa = reg;
7965 	slot1_paused[axq_exp] = slot1;
7966 }
7967 
7968 static void
7969 drmach_s1p_pci_free(drmach_s1p_pci_t *pci)
7970 {
7971 	int	i;
7972 
7973 	for (i = 0; i < DRMACH_S1P_SAMPLE_MAX; i++) {
7974 		if (pci->regs[i].intr_map_regs != NULL) {
7975 			ASSERT(pci->regs[i].nmap_regs > 0);
7976 			kmem_free(pci->regs[i].intr_map_regs,
7977 			    pci->regs[i].nmap_regs * sizeof (uint64_t));
7978 		}
7979 	}
7980 }
7981 
7982 static void
7983 drmach_slot1_pause_free(drmach_slot1_pause_t **slot1_paused)
7984 {
7985 	int	i, j, k;
7986 	drmach_slot1_pause_t *slot1;
7987 
7988 	for (i = 0; i < STARCAT_BDSET_MAX; i++) {
7989 		if ((slot1 = slot1_paused[i]) == NULL)
7990 			continue;
7991 
7992 		for (j = 0; j < STARCAT_SLOT1_IO_MAX; j++)
7993 			for (k = 0; k < DRMACH_SCHIZO_PCI_LEAF_MAX; k++)
7994 				drmach_s1p_pci_free(&slot1->schizo[j].pci[k]);
7995 
7996 		kmem_free(slot1, sizeof (*slot1));
7997 		slot1_paused[i] = NULL;
7998 	}
7999 }
8000 
8001 /*
8002  * Tree walk callback routine. If dip represents a Schizo PCI leaf,
8003  * fill in the appropriate info in the slot1_paused state table.
8004  */
8005 static int
8006 drmach_find_slot1_io(dev_info_t *dip, void *arg)
8007 {
8008 	int		portid, exp, ioc_unum, leaf_unum;
8009 	char		buf[OBP_MAXDRVNAME];
8010 	int		buflen = sizeof (buf);
8011 	drmach_reg_t	regs[3];
8012 	int		reglen = sizeof (regs);
8013 	uint32_t	leaf_offset;
8014 	uint64_t	schizo_csr_pa, pci_csr_pa;
8015 	drmach_s1p_pci_t *pci;
8016 	drmach_slot1_pause_t **slot1_paused = (drmach_slot1_pause_t **)arg;
8017 
8018 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
8019 	    "name", (caddr_t)buf, &buflen) != DDI_PROP_SUCCESS ||
8020 	    strncmp(buf, DRMACH_PCI_NAMEPROP, strlen(DRMACH_PCI_NAMEPROP))) {
8021 		return (DDI_WALK_CONTINUE);
8022 	}
8023 
8024 	if ((portid = ddi_getprop(DDI_DEV_T_ANY, dip,
8025 	    DDI_PROP_DONTPASS, "portid", -1)) == -1) {
8026 		return (DDI_WALK_CONTINUE);
8027 	}
8028 
8029 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
8030 	    "reg", (caddr_t)regs, &reglen) != DDI_PROP_SUCCESS) {
8031 		DRMACH_PR("drmach_find_slot1_io: no reg prop for pci "
8032 		    "dip=%p\n", dip);
8033 		return (DDI_WALK_CONTINUE);
8034 	}
8035 
8036 	exp = portid >> 5;
8037 	ioc_unum = portid & 0x1;
8038 	leaf_offset = regs[0].reg_addr_lo & 0x7fffff;
8039 	pci_csr_pa = (uint64_t)regs[0].reg_addr_hi << 32;
8040 	pci_csr_pa |= (uint64_t)regs[0].reg_addr_lo;
8041 	schizo_csr_pa = (uint64_t)regs[1].reg_addr_hi << 32;
8042 	schizo_csr_pa |= (uint64_t)regs[1].reg_addr_lo;
8043 
8044 	ASSERT(exp >= 0 && exp < STARCAT_BDSET_MAX);
8045 	ASSERT(slot1_paused[exp] != NULL);
8046 	ASSERT(leaf_offset == 0x600000 || leaf_offset == 0x700000);
8047 	ASSERT(slot1_paused[exp]->schizo[ioc_unum].csr_basepa == 0x0UL ||
8048 	    slot1_paused[exp]->schizo[ioc_unum].csr_basepa == schizo_csr_pa);
8049 
8050 	leaf_unum = (leaf_offset == 0x600000) ? 0 : 1;
8051 	slot1_paused[exp]->schizo[ioc_unum].csr_basepa = schizo_csr_pa;
8052 	pci = &slot1_paused[exp]->schizo[ioc_unum].pci[leaf_unum];
8053 
8054 	/*
8055 	 * XXX This dip should really be held (via ndi_hold_devi())
8056 	 * before saving it in the pci pause structure. However that
8057 	 * would prevent DR as the pause data structures persist until
8058 	 * the next suspend. drmach code should be modified to free the
8059 	 * the slot 1 pause data structures for a boardset when its
8060 	 * slot 1 board is DRed out. The dip can then be released via
8061 	 * ndi_rele_devi() when the pause data structure is freed
8062 	 * allowing DR to proceed. Until this change is made, drmach
8063 	 * code should be careful about dereferencing the saved dip as
8064 	 * it may no longer exist.
8065 	 */
8066 	pci->dip = dip;
8067 	pci->portid = portid;
8068 	pci->csr_basepa = pci_csr_pa;
8069 
8070 	DRMACH_PR("drmach_find_slot1_io: name=%s, portid=0x%x, dip=%p\n",
8071 	    buf, portid, dip);
8072 
8073 	return (DDI_WALK_PRUNECHILD);
8074 }
8075 
8076 static void
8077 drmach_slot1_pause_add_io(drmach_slot1_pause_t **slot1_paused)
8078 {
8079 	/*
8080 	 * Root node doesn't have to be held
8081 	 */
8082 	ddi_walk_devs(ddi_root_node(), drmach_find_slot1_io,
8083 	    (void *)slot1_paused);
8084 }
8085 
8086 /*
8087  * Save the interrupt mapping registers for each non-idle interrupt
8088  * represented by the bit pairs in the saved interrupt state
8089  * diagnostic registers for this PCI leaf.
8090  */
8091 static void
8092 drmach_s1p_intr_map_reg_save(drmach_s1p_pci_t *pci, drmach_sr_iter_t iter)
8093 {
8094 	int	 i, cnt, ino;
8095 	uint64_t reg;
8096 	char	 *dname;
8097 	uchar_t	 Xmits;
8098 
8099 	dname = ddi_binding_name(pci->dip);
8100 	Xmits = (strcmp(dname, XMITS_BINDING_NAME) == 0)  ?  1 : 0;
8101 
8102 	/*
8103 	 * 1st pass allocates, 2nd pass populates.
8104 	 */
8105 	for (i = 0; i < 2; i++) {
8106 		cnt = ino = 0;
8107 
8108 		/*
8109 		 * PCI slot interrupts
8110 		 */
8111 		reg = pci->regs[iter].slot_intr_state_diag;
8112 		while (reg) {
8113 			/*
8114 			 * Xmits Interrupt Number Offset(ino) Assignments
8115 			 *   00-17 PCI Slot Interrupts
8116 			 *   18-1f Not Used
8117 			 */
8118 			if ((Xmits) && (ino > 0x17))
8119 				break;
8120 			if ((reg & COMMON_CLEAR_INTR_REG_MASK) !=
8121 			    COMMON_CLEAR_INTR_REG_IDLE) {
8122 				if (i) {
8123 					pci->regs[iter].intr_map_regs[cnt] =
8124 					    lddphysio(pci->csr_basepa +
8125 					    SCHIZO_IB_INTR_MAP_REG_OFFSET +
8126 					    ino * sizeof (reg));
8127 				}
8128 				++cnt;
8129 			}
8130 			++ino;
8131 			reg >>= 2;
8132 		}
8133 
8134 		/*
8135 		 * Xmits Interrupt Number Offset(ino) Assignments
8136 		 *   20-2f Not Used
8137 		 *   30-37 Internal interrupts
8138 		 *   38-3e Not Used
8139 		 */
8140 		ino = (Xmits)  ?  0x30 : 0x20;
8141 
8142 		/*
8143 		 * OBIO and internal schizo interrupts
8144 		 * Each PCI leaf has a set of mapping registers for all
8145 		 * possible interrupt sources except the NewLink interrupts.
8146 		 */
8147 		reg = pci->regs[iter].obio_intr_state_diag;
8148 		while (reg && ino <= 0x38) {
8149 			if ((reg & COMMON_CLEAR_INTR_REG_MASK) !=
8150 			    COMMON_CLEAR_INTR_REG_IDLE) {
8151 				if (i) {
8152 					pci->regs[iter].intr_map_regs[cnt] =
8153 					    lddphysio(pci->csr_basepa +
8154 					    SCHIZO_IB_INTR_MAP_REG_OFFSET +
8155 					    ino * sizeof (reg));
8156 				}
8157 				++cnt;
8158 			}
8159 			++ino;
8160 			reg >>= 2;
8161 		}
8162 
8163 		if (!i) {
8164 			pci->regs[iter].nmap_regs = cnt;
8165 			pci->regs[iter].intr_map_regs =
8166 			    kmem_zalloc(cnt * sizeof (reg), KM_SLEEP);
8167 		}
8168 	}
8169 }
8170 
8171 static void
8172 drmach_s1p_axq_update(drmach_s1p_axq_t *axq, drmach_sr_iter_t iter)
8173 {
8174 	uint32_t	reg;
8175 
8176 	if (axq->reg_basepa == 0x0UL)
8177 		return;
8178 
8179 	if (iter == DRMACH_POST_SUSPEND) {
8180 		axq->pcr_sel_save = ldphysio(axq->reg_basepa +
8181 		    AXQ_SLOT1_PERFCNT_SEL);
8182 		/*
8183 		 * Select l2_io_queue counter by writing L2_IO_Q mux
8184 		 * input to bits 0-6 of perf cntr select reg.
8185 		 */
8186 		reg = axq->pcr_sel_save;
8187 		reg &= ~AXQ_PIC_CLEAR_MASK;
8188 		reg |= L2_IO_Q;
8189 
8190 		stphysio(axq->reg_basepa + AXQ_SLOT1_PERFCNT_SEL, reg);
8191 	}
8192 
8193 	axq->pic_l2_io_q[iter] = ldphysio(axq->reg_basepa + AXQ_SLOT1_PERFCNT0);
8194 
8195 	if (iter == DRMACH_PRE_RESUME) {
8196 		stphysio(axq->reg_basepa + AXQ_SLOT1_PERFCNT_SEL,
8197 		    axq->pcr_sel_save);
8198 	}
8199 
8200 	DRMACH_PR("drmach_s1p_axq_update: axq #%d pic_l2_io_q[%d]=%d\n",
8201 	    ddi_get_instance(axq->dip), iter, axq->pic_l2_io_q[iter]);
8202 }
8203 
8204 static void
8205 drmach_s1p_schizo_update(drmach_s1p_schizo_t *schizo, drmach_sr_iter_t iter)
8206 {
8207 	int	i;
8208 	drmach_s1p_pci_t *pci;
8209 
8210 	if (schizo->csr_basepa == 0x0UL)
8211 		return;
8212 
8213 	schizo->regs[iter].csr =
8214 	    lddphysio(schizo->csr_basepa + SCHIZO_CB_CSR_OFFSET);
8215 	schizo->regs[iter].errctrl =
8216 	    lddphysio(schizo->csr_basepa + SCHIZO_CB_ERRCTRL_OFFSET);
8217 	schizo->regs[iter].errlog =
8218 	    lddphysio(schizo->csr_basepa + SCHIZO_CB_ERRLOG_OFFSET);
8219 
8220 	for (i = 0; i < DRMACH_SCHIZO_PCI_LEAF_MAX; i++) {
8221 		pci = &schizo->pci[i];
8222 		if (pci->dip != NULL && pci->csr_basepa != 0x0UL) {
8223 			pci->regs[iter].slot_intr_state_diag =
8224 			    lddphysio(pci->csr_basepa +
8225 			    COMMON_IB_SLOT_INTR_STATE_DIAG_REG);
8226 
8227 			pci->regs[iter].obio_intr_state_diag =
8228 			    lddphysio(pci->csr_basepa +
8229 			    COMMON_IB_OBIO_INTR_STATE_DIAG_REG);
8230 
8231 			drmach_s1p_intr_map_reg_save(pci, iter);
8232 		}
8233 	}
8234 }
8235 
8236 /*
8237  * Called post-suspend and pre-resume to snapshot the suspend state
8238  * of slot1 AXQs and Schizos.
8239  */
8240 static void
8241 drmach_slot1_pause_update(drmach_slot1_pause_t **slot1_paused,
8242     drmach_sr_iter_t iter)
8243 {
8244 	int	i, j;
8245 	drmach_slot1_pause_t *slot1;
8246 
8247 	for (i = 0; i < STARCAT_BDSET_MAX; i++) {
8248 		if ((slot1 = slot1_paused[i]) == NULL)
8249 			continue;
8250 
8251 		drmach_s1p_axq_update(&slot1->axq, iter);
8252 		for (j = 0; j < STARCAT_SLOT1_IO_MAX; j++)
8253 			drmach_s1p_schizo_update(&slot1->schizo[j], iter);
8254 	}
8255 }
8256 
8257 /*
8258  * Starcat hPCI Schizo devices.
8259  *
8260  * The name field is overloaded. NULL means the slot (interrupt concentrator
8261  * bus) is not used. intr_mask is a bit mask representing the 4 possible
8262  * interrupts per slot, on if valid (rio does not use interrupt lines 0, 1).
8263  */
8264 static struct {
8265 	char	*name;
8266 	uint8_t	intr_mask;
8267 } drmach_schz_slot_intr[][DRMACH_SCHIZO_PCI_LEAF_MAX] = {
8268 	/* Schizo 0 */		/* Schizo 1 */
8269 	{{"C3V0", 0xf},		{"C3V1", 0xf}},		/* slot 0 */
8270 	{{"C5V0", 0xf},		{"C5V1", 0xf}},		/* slot 1 */
8271 	{{"rio", 0xc},		{NULL, 0x0}},		/* slot 2 */
8272 	{{NULL, 0x0},		{NULL, 0x0}},		/* slot 3 */
8273 	{{"sbbc", 0xf},		{NULL, 0x0}},		/* slot 4 */
8274 	{{NULL, 0x0},		{NULL, 0x0}},		/* slot 5 */
8275 	{{NULL, 0x0},		{NULL, 0x0}},		/* slot 6 */
8276 	{{NULL, 0x0},		{NULL, 0x0}}		/* slot 7 */
8277 };
8278 
8279 /*
8280  * See Schizo Specification, Revision 51 (May 23, 2001), Section 22.4.4
8281  * "Interrupt Registers", Table 22-69, page 306.
8282  */
8283 static char *
8284 drmach_schz_internal_ino2str(int ino)
8285 {
8286 	int	intr;
8287 
8288 	ASSERT(ino >= 0x30 && ino <= 0x37);
8289 
8290 	intr = ino & 0x7;
8291 	switch (intr) {
8292 		case (0x0):	return ("Uncorrectable ECC error");
8293 		case (0x1):	return ("Correctable ECC error");
8294 		case (0x2):	return ("PCI Bus A Error");
8295 		case (0x3):	return ("PCI Bus B Error");
8296 		case (0x4):	return ("Safari Bus Error");
8297 		default:	return ("Reserved");
8298 	}
8299 }
8300 
8301 #define	DRMACH_INTR_MASK_SHIFT(ino)	((ino) << 1)
8302 
8303 static void
8304 drmach_s1p_decode_slot_intr(int exp, int unum, drmach_s1p_pci_t *pci,
8305     int ino, drmach_sr_iter_t iter)
8306 {
8307 	uint8_t		intr_mask;
8308 	char		*slot_devname;
8309 	char		namebuf[OBP_MAXDRVNAME];
8310 	int		slot, intr_line, slot_valid, intr_valid;
8311 
8312 	ASSERT(ino >= 0 && ino <= 0x1f);
8313 	ASSERT((pci->regs[iter].slot_intr_state_diag &
8314 	    (COMMON_CLEAR_INTR_REG_MASK << DRMACH_INTR_MASK_SHIFT(ino))) !=
8315 	    COMMON_CLEAR_INTR_REG_IDLE);
8316 
8317 	slot = (ino >> 2) & 0x7;
8318 	intr_line = ino & 0x3;
8319 
8320 	slot_devname = drmach_schz_slot_intr[slot][unum].name;
8321 	slot_valid = (slot_devname == NULL) ? 0 : 1;
8322 	if (!slot_valid) {
8323 		snprintf(namebuf, sizeof (namebuf), "slot %d (INVALID)", slot);
8324 		slot_devname = namebuf;
8325 	}
8326 
8327 	intr_mask = drmach_schz_slot_intr[slot][unum].intr_mask;
8328 	intr_valid = (1 << intr_line) & intr_mask;
8329 
8330 	prom_printf("IO%d/P%d PCI slot interrupt: ino=0x%x, source device=%s, "
8331 	    "interrupt line=%d%s\n", exp, unum, ino, slot_devname, intr_line,
8332 	    (slot_valid && !intr_valid) ? " (INVALID)" : "");
8333 }
8334 
8335 /*
8336  * Log interrupt source device info for all valid, pending interrupts
8337  * on each Schizo PCI leaf. Called if Schizo has logged a Safari bus
8338  * error in the error ctrl reg.
8339  */
8340 static void
8341 drmach_s1p_schizo_log_intr(drmach_s1p_schizo_t *schizo, int exp,
8342     int unum, drmach_sr_iter_t iter)
8343 {
8344 	uint64_t	reg;
8345 	int		i, n, ino;
8346 	drmach_s1p_pci_t *pci;
8347 
8348 	ASSERT(exp >= 0 && exp < STARCAT_BDSET_MAX);
8349 	ASSERT(unum < STARCAT_SLOT1_IO_MAX);
8350 
8351 	/*
8352 	 * Check the saved interrupt mapping registers. If interrupt is valid,
8353 	 * map the ino to the Schizo source device and check that the pci
8354 	 * slot and interrupt line are valid.
8355 	 */
8356 	for (i = 0; i < DRMACH_SCHIZO_PCI_LEAF_MAX; i++) {
8357 		pci = &schizo->pci[i];
8358 		for (n = 0; n < pci->regs[iter].nmap_regs; n++) {
8359 			reg = pci->regs[iter].intr_map_regs[n];
8360 			if (reg & COMMON_INTR_MAP_REG_VALID) {
8361 				ino = reg & COMMON_INTR_MAP_REG_INO;
8362 
8363 				if (ino <= 0x1f) {
8364 					/*
8365 					 * PCI slot interrupt
8366 					 */
8367 					drmach_s1p_decode_slot_intr(exp, unum,
8368 					    pci, ino, iter);
8369 				} else if (ino <= 0x2f) {
8370 					/*
8371 					 * OBIO interrupt
8372 					 */
8373 					prom_printf("IO%d/P%d OBIO interrupt: "
8374 					    "ino=0x%x\n", exp, unum, ino);
8375 				} else if (ino <= 0x37) {
8376 					/*
8377 					 * Internal interrupt
8378 					 */
8379 					prom_printf("IO%d/P%d Internal "
8380 					    "interrupt: ino=0x%x (%s)\n",
8381 					    exp, unum, ino,
8382 					    drmach_schz_internal_ino2str(ino));
8383 				} else {
8384 					/*
8385 					 * NewLink interrupt
8386 					 */
8387 					prom_printf("IO%d/P%d NewLink "
8388 					    "interrupt: ino=0x%x\n", exp,
8389 					    unum, ino);
8390 				}
8391 
8392 				DRMACH_PR("drmach_s1p_schizo_log_intr: "
8393 				    "exp=%d, schizo=%d, pci_leaf=%c, "
8394 				    "ino=0x%x, intr_map_reg=0x%lx\n",
8395 				    exp, unum, (i == 0) ? 'A' : 'B', ino, reg);
8396 			}
8397 		}
8398 	}
8399 }
8400 
8401 /*
8402  * See Schizo Specification, Revision 51 (May 23, 2001), Section 22.2.4
8403  * "Safari Error Control/Log Registers", Table 22-11, page 248.
8404  */
8405 #define	DRMACH_SCHIZO_SAFARI_UNMAPPED_ERR	(0x1ull << 4)
8406 
8407 /*
8408  * Check for possible error indicators prior to resuming the
8409  * AXQ driver, which will de-assert slot1 AXQ_DOMCTRL_PAUSE.
8410  */
8411 static void
8412 drmach_slot1_pause_verify(drmach_slot1_pause_t **slot1_paused,
8413     drmach_sr_iter_t iter)
8414 {
8415 	int	i, j;
8416 	int 	errflag = 0;
8417 	drmach_slot1_pause_t *slot1;
8418 
8419 	/*
8420 	 * Check for logged schizo bus error and pending interrupts.
8421 	 */
8422 	for (i = 0; i < STARCAT_BDSET_MAX; i++) {
8423 		if ((slot1 = slot1_paused[i]) == NULL)
8424 			continue;
8425 
8426 		for (j = 0; j < STARCAT_SLOT1_IO_MAX; j++) {
8427 			if (slot1->schizo[j].csr_basepa == 0x0UL)
8428 				continue;
8429 
8430 			if (slot1->schizo[j].regs[iter].errlog &
8431 			    DRMACH_SCHIZO_SAFARI_UNMAPPED_ERR) {
8432 				if (!errflag) {
8433 					prom_printf("DR WARNING: interrupt "
8434 					    "attempt detected during "
8435 					    "copy-rename (%s):\n",
8436 					    (iter == DRMACH_POST_SUSPEND) ?
8437 					    "post suspend" : "pre resume");
8438 					++errflag;
8439 				}
8440 				drmach_s1p_schizo_log_intr(&slot1->schizo[j],
8441 				    i, j, iter);
8442 			}
8443 		}
8444 	}
8445 
8446 	/*
8447 	 * Check for changes in axq l2_io_q performance counters (2nd pass only)
8448 	 */
8449 	if (iter == DRMACH_PRE_RESUME) {
8450 		for (i = 0; i < STARCAT_BDSET_MAX; i++) {
8451 			if ((slot1 = slot1_paused[i]) == NULL)
8452 				continue;
8453 
8454 			if (slot1->axq.pic_l2_io_q[DRMACH_POST_SUSPEND] !=
8455 			    slot1->axq.pic_l2_io_q[DRMACH_PRE_RESUME]) {
8456 				prom_printf("DR WARNING: IO transactions "
8457 				    "detected on IO%d during copy-rename: "
8458 				    "AXQ l2_io_q performance counter "
8459 				    "start=%d, end=%d\n", i,
8460 				    slot1->axq.pic_l2_io_q[DRMACH_POST_SUSPEND],
8461 				    slot1->axq.pic_l2_io_q[DRMACH_PRE_RESUME]);
8462 			}
8463 		}
8464 	}
8465 }
8466 
8467 struct drmach_sr_list {
8468 	dev_info_t		*dip;
8469 	struct drmach_sr_list	*next;
8470 	struct drmach_sr_list	*prev;
8471 };
8472 
8473 static struct drmach_sr_ordered {
8474 	char			*name;
8475 	struct drmach_sr_list	*ring;
8476 } drmach_sr_ordered[] = {
8477 	{ "iosram",			NULL },
8478 	{ "address-extender-queue",	NULL },
8479 	{ NULL,				NULL }, /* terminator -- required */
8480 };
8481 
8482 static void
8483 drmach_sr_insert(struct drmach_sr_list **lp, dev_info_t *dip)
8484 {
8485 	struct drmach_sr_list *np;
8486 
8487 	DRMACH_PR("drmach_sr_insert: adding dip %p\n", dip);
8488 
8489 	np = (struct drmach_sr_list *)kmem_alloc(
8490 		sizeof (struct drmach_sr_list), KM_SLEEP);
8491 
8492 	ndi_hold_devi(dip);
8493 	np->dip = dip;
8494 
8495 	if (*lp == NULL) {
8496 		/* establish list */
8497 		*lp = np->next = np->prev = np;
8498 	} else {
8499 		/* place new node behind head node on ring list */
8500 		np->prev = (*lp)->prev;
8501 		np->next = *lp;
8502 		np->prev->next = np;
8503 		np->next->prev = np;
8504 	}
8505 }
8506 
8507 static void
8508 drmach_sr_delete(struct drmach_sr_list **lp, dev_info_t *dip)
8509 {
8510 	DRMACH_PR("drmach_sr_delete: searching for dip %p\n", dip);
8511 
8512 	if (*lp) {
8513 		struct drmach_sr_list *xp;
8514 
8515 		/* start search with mostly likely node */
8516 		xp = (*lp)->prev;
8517 		do {
8518 			if (xp->dip == dip) {
8519 				xp->prev->next = xp->next;
8520 				xp->next->prev = xp->prev;
8521 
8522 				if (xp == *lp)
8523 					*lp = xp->next;
8524 				if (xp == *lp)
8525 					*lp = NULL;
8526 				xp->dip = NULL;
8527 				ndi_rele_devi(dip);
8528 				kmem_free(xp, sizeof (*xp));
8529 
8530 				DRMACH_PR("drmach_sr_delete:"
8531 					" disposed sr node for dip %p", dip);
8532 				return;
8533 			}
8534 
8535 			DRMACH_PR("drmach_sr_delete: still searching\n");
8536 
8537 			xp = xp->prev;
8538 		} while (xp != (*lp)->prev);
8539 	}
8540 
8541 	/* every dip should be found during resume */
8542 	DRMACH_PR("ERROR: drmach_sr_delete: can't find dip %p", dip);
8543 }
8544 
8545 int
8546 drmach_verify_sr(dev_info_t *dip, int sflag)
8547 {
8548 	int	rv;
8549 	int	len;
8550 	char    name[OBP_MAXDRVNAME];
8551 
8552 	if (drmach_slot1_pause_debug) {
8553 		if (sflag && drmach_slot1_pause_init) {
8554 			drmach_slot1_pause_free(drmach_slot1_paused);
8555 			drmach_slot1_pause_init = 0;
8556 		} else if (!sflag && !drmach_slot1_pause_init) {
8557 			/* schedule init for next suspend */
8558 			drmach_slot1_pause_init = 1;
8559 		}
8560 	}
8561 
8562 	rv = ddi_getproplen(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
8563 		"name", &len);
8564 	if (rv == DDI_PROP_SUCCESS) {
8565 		int		portid;
8566 		uint64_t	reg;
8567 		struct drmach_sr_ordered *op;
8568 
8569 		rv = ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
8570 			DDI_PROP_DONTPASS, "name", (caddr_t)name, &len);
8571 
8572 		if (rv != DDI_PROP_SUCCESS)
8573 			return (0);
8574 
8575 		if (drmach_slot1_pause_debug && sflag &&
8576 		    drmach_is_slot1_pause_axq(dip, name, &portid, &reg)) {
8577 			drmach_slot1_pause_add_axq(dip, name, portid, reg,
8578 			    drmach_slot1_paused);
8579 		}
8580 
8581 		for (op = drmach_sr_ordered; op->name; op++) {
8582 			if (strncmp(op->name, name, strlen(op->name)) == 0) {
8583 				if (sflag)
8584 					drmach_sr_insert(&op->ring, dip);
8585 				else
8586 					drmach_sr_delete(&op->ring, dip);
8587 				return (1);
8588 			}
8589 		}
8590 	}
8591 
8592 	return (0);
8593 }
8594 
8595 static void
8596 drmach_sr_dip(dev_info_t *dip, int suspend)
8597 {
8598 	int	 rv;
8599 	major_t	 maj;
8600 	char	*name, *name_addr, *aka;
8601 
8602 	if ((name = ddi_get_name(dip)) == NULL)
8603 		name = "<null name>";
8604 	else if ((maj = ddi_name_to_major(name)) != -1)
8605 		aka = ddi_major_to_name(maj);
8606 	else
8607 		aka = "<unknown>";
8608 
8609 	if ((name_addr = ddi_get_name_addr(dip)) == NULL)
8610 		name_addr = "<null>";
8611 
8612 	prom_printf("\t%s %s@%s (aka %s)\n",
8613 		suspend ? "suspending" : "resuming",
8614 		name, name_addr, aka);
8615 
8616 	if (suspend) {
8617 		rv = devi_detach(dip, DDI_SUSPEND);
8618 	} else {
8619 		rv = devi_attach(dip, DDI_RESUME);
8620 	}
8621 
8622 	if (rv != DDI_SUCCESS) {
8623 		prom_printf("\tFAILED to %s %s@%s\n",
8624 			suspend ? "suspend" : "resume",
8625 			name, name_addr);
8626 	}
8627 }
8628 
8629 void
8630 drmach_suspend_last()
8631 {
8632 	struct drmach_sr_ordered *op;
8633 
8634 	if (drmach_slot1_pause_debug)
8635 		drmach_slot1_pause_add_io(drmach_slot1_paused);
8636 
8637 	/*
8638 	 * The ordering array declares the strict sequence in which
8639 	 * the named drivers are to suspended. Each element in
8640 	 * the array may have a double-linked ring list of driver
8641 	 * instances (dip) in the order in which they were presented
8642 	 * to drmach_verify_sr. If present, walk the list in the
8643 	 * forward direction to suspend each instance.
8644 	 */
8645 	for (op = drmach_sr_ordered; op->name; op++) {
8646 		if (op->ring) {
8647 			struct drmach_sr_list *rp;
8648 
8649 			rp = op->ring;
8650 			do {
8651 				drmach_sr_dip(rp->dip, 1);
8652 				rp = rp->next;
8653 			} while (rp != op->ring);
8654 		}
8655 	}
8656 
8657 	if (drmach_slot1_pause_debug) {
8658 		drmach_slot1_pause_update(drmach_slot1_paused,
8659 		    DRMACH_POST_SUSPEND);
8660 		drmach_slot1_pause_verify(drmach_slot1_paused,
8661 		    DRMACH_POST_SUSPEND);
8662 	}
8663 }
8664 
8665 void
8666 drmach_resume_first()
8667 {
8668 	struct drmach_sr_ordered *op = drmach_sr_ordered +
8669 		(sizeof (drmach_sr_ordered) / sizeof (drmach_sr_ordered[0]));
8670 
8671 	if (drmach_slot1_pause_debug) {
8672 		drmach_slot1_pause_update(drmach_slot1_paused,
8673 		    DRMACH_PRE_RESUME);
8674 		drmach_slot1_pause_verify(drmach_slot1_paused,
8675 		    DRMACH_PRE_RESUME);
8676 	}
8677 
8678 	op -= 1;	/* point at terminating element */
8679 
8680 	/*
8681 	 * walk ordering array and rings backwards to resume dips
8682 	 * in reverse order in which they were suspended
8683 	 */
8684 	while (--op >= drmach_sr_ordered) {
8685 		if (op->ring) {
8686 			struct drmach_sr_list *rp;
8687 
8688 			rp = op->ring->prev;
8689 			do {
8690 				drmach_sr_dip(rp->dip, 0);
8691 				rp = rp->prev;
8692 			} while (rp != op->ring->prev);
8693 		}
8694 	}
8695 }
8696 
8697 /*
8698  * Log a DR sysevent.
8699  * Return value: 0 success, non-zero failure.
8700  */
8701 int
8702 drmach_log_sysevent(int board, char *hint, int flag, int verbose)
8703 {
8704 	sysevent_t			*ev;
8705 	sysevent_id_t			eid;
8706 	int				rv, km_flag;
8707 	sysevent_value_t		evnt_val;
8708 	sysevent_attr_list_t		*evnt_attr_list = NULL;
8709 	char				attach_pnt[MAXNAMELEN];
8710 
8711 	km_flag = (flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
8712 	attach_pnt[0] = '\0';
8713 	if (drmach_board_name(board, attach_pnt, MAXNAMELEN)) {
8714 		rv = -1;
8715 		goto logexit;
8716 	}
8717 	if (verbose)
8718 		DRMACH_PR("drmach_log_sysevent: %s %s, flag: %d, verbose: %d\n",
8719 			    attach_pnt, hint, flag, verbose);
8720 
8721 	if ((ev = sysevent_alloc(EC_DR, ESC_DR_AP_STATE_CHANGE,
8722 				    SUNW_KERN_PUB"dr", km_flag)) == NULL) {
8723 		rv = -2;
8724 		goto logexit;
8725 	}
8726 	evnt_val.value_type = SE_DATA_TYPE_STRING;
8727 	evnt_val.value.sv_string = attach_pnt;
8728 	if ((rv = sysevent_add_attr(&evnt_attr_list, DR_AP_ID,
8729 				    &evnt_val, km_flag)) != 0)
8730 		goto logexit;
8731 
8732 	evnt_val.value_type = SE_DATA_TYPE_STRING;
8733 	evnt_val.value.sv_string = hint;
8734 	if ((rv = sysevent_add_attr(&evnt_attr_list, DR_HINT,
8735 				    &evnt_val, km_flag)) != 0) {
8736 		sysevent_free_attr(evnt_attr_list);
8737 		goto logexit;
8738 	}
8739 
8740 	(void) sysevent_attach_attributes(ev, evnt_attr_list);
8741 
8742 	/*
8743 	 * Log the event but do not sleep waiting for its
8744 	 * delivery. This provides insulation from syseventd.
8745 	 */
8746 	rv = log_sysevent(ev, SE_NOSLEEP, &eid);
8747 
8748 logexit:
8749 	if (ev)
8750 		sysevent_free(ev);
8751 	if ((rv != 0) && verbose)
8752 		cmn_err(CE_WARN,
8753 			    "drmach_log_sysevent failed (rv %d) for %s  %s\n",
8754 			    rv, attach_pnt, hint);
8755 
8756 	return (rv);
8757 }
8758 
8759 /*
8760  * Initialize the mem_slice portion of a claim/unconfig/unclaim mailbox message.
8761  * Only the valid entries are modified, so the array should be zeroed out
8762  * initially.
8763  */
8764 static void
8765 drmach_msg_memslice_init(dr_memslice_t slice_arr[]) {
8766 	int	i;
8767 	char	c;
8768 
8769 	ASSERT(mutex_owned(&drmach_slice_table_lock));
8770 
8771 	for (i = 0; i < AXQ_MAX_EXP; i++) {
8772 		c = drmach_slice_table[i];
8773 
8774 		if (c & 0x20) {
8775 			slice_arr[i].valid = 1;
8776 			slice_arr[i].slice = c & 0x1f;
8777 		}
8778 	}
8779 }
8780 
8781 /*
8782  * Initialize the mem_regs portion of a claim/unconfig/unclaim mailbox message.
8783  * Only the valid entries are modified, so the array should be zeroed out
8784  * initially.
8785  */
8786 static void
8787 drmach_msg_memregs_init(dr_memregs_t regs_arr[]) {
8788 	int		rv, exp, mcnum, bank;
8789 	uint64_t	madr;
8790 	drmachid_t	id;
8791 	drmach_board_t	*bp;
8792 	drmach_mem_t	*mp;
8793 	dr_memregs_t	*memregs;
8794 
8795 	/* CONSTCOND */
8796 	ASSERT(DRMACH_MC_NBANKS == (PMBANKS_PER_PORT * LMBANKS_PER_PMBANK));
8797 
8798 	for (exp = 0; exp < 18; exp++) {
8799 		rv = drmach_array_get(drmach_boards,
8800 		    DRMACH_EXPSLOT2BNUM(exp, 0), &id);
8801 		ASSERT(rv == 0);	/* should never be out of bounds */
8802 		if (id == NULL) {
8803 			continue;
8804 		}
8805 
8806 		memregs = &regs_arr[exp];
8807 		bp = (drmach_board_t *)id;
8808 		for (mp = bp->mem; mp != NULL; mp = mp->next) {
8809 			mcnum = mp->dev.portid & 0x3;
8810 			for (bank = 0; bank < DRMACH_MC_NBANKS; bank++) {
8811 				drmach_mem_read_madr(mp, bank, &madr);
8812 				if (madr & DRMACH_MC_VALID_MASK) {
8813 					DRMACH_PR("%d.%d.%d.madr = 0x%lx\n",
8814 						exp, mcnum, bank, madr);
8815 					memregs->madr[mcnum][bank].hi =
8816 					    DRMACH_U64_TO_MCREGHI(madr);
8817 					memregs->madr[mcnum][bank].lo =
8818 					    DRMACH_U64_TO_MCREGLO(madr);
8819 				}
8820 			}
8821 		}
8822 	}
8823 }
8824 
8825 /*
8826  * Do not allow physical address range modification if either board on this
8827  * expander has processors in NULL LPA mode (CBASE=CBND=NULL).
8828  *
8829  * A side effect of NULL proc LPA mode in Starcat SSM is that local reads will
8830  * install the cache line as owned/dirty as a result of the RTSR transaction.
8831  * See section 5.2.3 of the Safari spec.  All processors will read the bus sync
8832  * list before the rename after flushing local caches.  When copy-rename
8833  * requires changing the physical address ranges (i.e. smaller memory target),
8834  * the bus sync list contains physical addresses that will not exist after the
8835  * rename.  If these cache lines are owned due to a RTSR, a system error can
8836  * occur following the rename when these cache lines are evicted and a writeback
8837  * is attempted.
8838  *
8839  * Incoming parameter represents either the copy-rename source or a candidate
8840  * target memory board.  On Starcat, only slot0 boards may have memory.
8841  */
8842 int
8843 drmach_allow_memrange_modify(drmachid_t s0id)
8844 {
8845 	drmach_board_t	*s0bp, *s1bp;
8846 	drmachid_t	s1id;
8847 	int		rv;
8848 
8849 	s0bp = s0id;
8850 
8851 	ASSERT(DRMACH_IS_BOARD_ID(s0id));
8852 	ASSERT(DRMACH_BNUM2SLOT(s0bp->bnum) == 0);
8853 
8854 	if (s0bp->flags & DRMACH_NULL_PROC_LPA) {
8855 		/*
8856 		 * This is reason enough to fail the request, no need
8857 		 * to check the device list for cpus.
8858 		 */
8859 		return (0);
8860 	}
8861 
8862 	/*
8863 	 * Check for MCPU board on the same expander.
8864 	 *
8865 	 * The board flag DRMACH_NULL_PROC_LPA can be set for all board
8866 	 * types, as it is derived at from the POST gdcd board flag
8867 	 * L1SSFLG_THIS_L1_NULL_PROC_LPA, which can be set (and should be
8868 	 * ignored) for boards with no processors.  Since NULL proc LPA
8869 	 * applies only to processors, we walk the devices array to detect
8870 	 * MCPUs.
8871 	 */
8872 	rv = drmach_array_get(drmach_boards, s0bp->bnum + 1, &s1id);
8873 	s1bp = s1id;
8874 	if (rv == 0 && s1bp != NULL) {
8875 
8876 		ASSERT(DRMACH_IS_BOARD_ID(s1id));
8877 		ASSERT(DRMACH_BNUM2SLOT(s1bp->bnum) == 1);
8878 		ASSERT(DRMACH_BNUM2EXP(s0bp->bnum) ==
8879 		    DRMACH_BNUM2EXP(s1bp->bnum));
8880 
8881 		if ((s1bp->flags & DRMACH_NULL_PROC_LPA) &&
8882 		    s1bp->devices != NULL) {
8883 			int		d_idx;
8884 			drmachid_t	d_id;
8885 
8886 			rv = drmach_array_first(s1bp->devices, &d_idx, &d_id);
8887 			while (rv == 0) {
8888 				if (DRMACH_IS_CPU_ID(d_id)) {
8889 					/*
8890 					 * Fail MCPU in NULL LPA mode.
8891 					 */
8892 					return (0);
8893 				}
8894 
8895 				rv = drmach_array_next(s1bp->devices, &d_idx,
8896 				    &d_id);
8897 			}
8898 		}
8899 	}
8900 
8901 	return (1);
8902 }
8903