xref: /titanic_52/usr/src/uts/sun4u/starfire/io/drmach.c (revision 3b133bec939f5230f040960ee1503dadd3dff343)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/debug.h>
28 #include <sys/types.h>
29 #include <sys/varargs.h>
30 #include <sys/errno.h>
31 #include <sys/cred.h>
32 #include <sys/dditypes.h>
33 #include <sys/devops.h>
34 #include <sys/modctl.h>
35 #include <sys/poll.h>
36 #include <sys/conf.h>
37 #include <sys/ddi.h>
38 #include <sys/sunddi.h>
39 #include <sys/sunndi.h>
40 #include <sys/ndi_impldefs.h>
41 #include <sys/stat.h>
42 #include <sys/kmem.h>
43 #include <sys/vmem.h>
44 #include <sys/processor.h>
45 #include <sys/spitregs.h>
46 #include <sys/cpuvar.h>
47 #include <sys/cpupart.h>
48 #include <sys/mem_config.h>
49 #include <sys/ddi_impldefs.h>
50 #include <sys/systm.h>
51 #include <sys/machsystm.h>
52 #include <sys/autoconf.h>
53 #include <sys/cmn_err.h>
54 #include <sys/sysmacros.h>
55 #include <sys/x_call.h>
56 #include <sys/promif.h>
57 #include <sys/prom_plat.h>
58 #include <sys/membar.h>
59 #include <vm/seg_kmem.h>
60 #include <sys/mem_cage.h>
61 #include <sys/stack.h>
62 #include <sys/archsystm.h>
63 #include <vm/hat_sfmmu.h>
64 #include <sys/pte.h>
65 #include <sys/mmu.h>
66 #include <sys/cpu_module.h>
67 #include <sys/obpdefs.h>
68 #include <sys/note.h>
69 
70 #include <sys/starfire.h>	/* plat_max_... decls */
71 #include <sys/cvc.h>
72 #include <sys/cpu_sgnblk_defs.h>
73 #include <sys/drmach.h>
74 #include <sys/dr_util.h>
75 #include <sys/pda.h>
76 
77 #include <sys/sysevent.h>
78 #include <sys/sysevent/dr.h>
79 #include <sys/sysevent/eventdefs.h>
80 
81 
82 extern void		bcopy32_il(uint64_t, uint64_t);
83 extern void		flush_ecache_il(
84 				uint64_t physaddr, int size, int linesz);
85 extern uint_t		ldphysio_il(uint64_t physaddr);
86 extern void		stphysio_il(uint64_t physaddr, uint_t value);
87 
88 extern uint64_t		mc_get_mem_alignment(void);
89 extern uint64_t		mc_get_asr_addr(pnode_t);
90 extern uint64_t		mc_get_idle_addr(pnode_t);
91 extern uint64_t		mc_get_alignment_mask(pnode_t);
92 extern int		mc_read_asr(pnode_t, uint_t *);
93 extern int		mc_write_asr(pnode_t, uint_t);
94 extern uint64_t		mc_asr_to_pa(uint_t);
95 extern uint_t		mc_pa_to_asr(uint_t, uint64_t);
96 
97 extern int		pc_madr_add(int, int, int, int);
98 
99 typedef struct {
100 	struct drmach_node	*node;
101 	void			*data;
102 } drmach_node_walk_args_t;
103 
104 typedef struct drmach_node {
105 	void		*here;
106 
107 	pnode_t		 (*get_dnode)(struct drmach_node *node);
108 	int		 (*walk)(struct drmach_node *node, void *data,
109 				int (*cb)(drmach_node_walk_args_t *args));
110 } drmach_node_t;
111 
112 typedef struct {
113 	int		 min_index;
114 	int		 max_index;
115 	int		 arr_sz;
116 	drmachid_t	*arr;
117 } drmach_array_t;
118 
119 typedef struct {
120 	void		*isa;
121 
122 	sbd_error_t	*(*release)(drmachid_t);
123 	sbd_error_t	*(*status)(drmachid_t, drmach_status_t *);
124 
125 	char		 name[MAXNAMELEN];
126 } drmach_common_t;
127 
128 typedef struct {
129 	drmach_common_t	 cm;
130 	int		 bnum;
131 	int		 assigned;
132 	int		 powered;
133 	int		 connect_cpuid;
134 	int		 cond;
135 	drmach_node_t	*tree;
136 	drmach_array_t	*devices;
137 } drmach_board_t;
138 
139 typedef struct {
140 	drmach_common_t	 cm;
141 	drmach_board_t	*bp;
142 	int		 unum;
143 	int		 busy;
144 	int		 powered;
145 	const char	*type;
146 	drmach_node_t	*node;
147 } drmach_device_t;
148 
149 typedef struct {
150 	int		 flags;
151 	drmach_device_t	*dp;
152 	sbd_error_t	*err;
153 	dev_info_t	*dip;
154 } drmach_config_args_t;
155 
156 typedef struct {
157 	uint64_t	 idle_addr;
158 	drmach_device_t	*mem;
159 } drmach_mc_idle_script_t;
160 
161 typedef struct {
162 	uint64_t	masr_addr;
163 	uint_t		masr;
164 	uint_t		_filler;
165 } drmach_rename_script_t;
166 
167 typedef struct {
168 	void		(*run)(void *arg);
169 	caddr_t		data;
170 	pda_handle_t	*ph;
171 	struct memlist	*c_ml;
172 	uint64_t	s_copybasepa;
173 	uint64_t	t_copybasepa;
174 	drmach_device_t	*restless_mc;	/* diagnostic output */
175 } drmach_copy_rename_program_t;
176 
177 typedef enum {
178 	DO_IDLE,
179 	DO_UNIDLE,
180 	DO_PAUSE,
181 	DO_UNPAUSE
182 } drmach_iopc_op_t;
183 
184 typedef struct {
185 	drmach_board_t	*obj;
186 	int		 ndevs;
187 	void		*a;
188 	sbd_error_t	*(*found)(void *a, const char *, int, drmachid_t);
189 	sbd_error_t	*err;
190 } drmach_board_cb_data_t;
191 
192 static caddr_t		 drmach_shutdown_va;
193 
194 static int		 drmach_initialized;
195 static drmach_array_t	*drmach_boards;
196 
197 static int		 drmach_cpu_delay = 100;
198 static int		 drmach_cpu_ntries = 50000;
199 
200 volatile uchar_t	*drmach_xt_mb;
201 
202 /*
203  * Do not change the drmach_shutdown_mbox structure without
204  * considering the drmach_shutdown_asm assembly language code.
205  */
206 struct drmach_shutdown_mbox {
207 	uint64_t	estack;
208 	uint64_t	flushaddr;
209 	int		size;
210 	int		linesize;
211 	uint64_t	physaddr;
212 };
213 struct drmach_shutdown_mbox	*drmach_shutdown_asm_mbox;
214 static sbd_error_t	*drmach_device_new(drmach_node_t *,
215 				drmach_board_t *, drmach_device_t **);
216 static sbd_error_t	*drmach_cpu_new(drmach_device_t *);
217 static sbd_error_t	*drmach_mem_new(drmach_device_t *);
218 static sbd_error_t	*drmach_io_new(drmach_device_t *);
219 
220 extern struct cpu	*SIGBCPU;
221 
222 #ifdef DEBUG
223 
224 #define	DRMACH_PR		if (drmach_debug) printf
225 int drmach_debug = 0;		 /* set to non-zero to enable debug messages */
226 #else
227 
228 #define	DRMACH_PR		_NOTE(CONSTANTCONDITION) if (0) printf
229 #endif /* DEBUG */
230 
231 #define	DRMACH_OBJ(id)		((drmach_common_t *)id)
232 
233 #define	DRMACH_IS_BOARD_ID(id)	\
234 	((id != 0) &&		\
235 	(DRMACH_OBJ(id)->isa == (void *)drmach_board_new))
236 
237 #define	DRMACH_IS_CPU_ID(id)	\
238 	((id != 0) &&		\
239 	(DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new))
240 
241 #define	DRMACH_IS_MEM_ID(id)	\
242 	((id != 0) &&		\
243 	(DRMACH_OBJ(id)->isa == (void *)drmach_mem_new))
244 
245 #define	DRMACH_IS_IO_ID(id)	\
246 	((id != 0) &&		\
247 	(DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
248 
249 #define	DRMACH_IS_DEVICE_ID(id)					\
250 	((id != 0) &&						\
251 	(DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new ||	\
252 	    DRMACH_OBJ(id)->isa == (void *)drmach_mem_new ||	\
253 	    DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
254 
255 #define	DRMACH_IS_ID(id)					\
256 	((id != 0) &&						\
257 	(DRMACH_OBJ(id)->isa == (void *)drmach_board_new ||	\
258 	    DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new ||	\
259 	    DRMACH_OBJ(id)->isa == (void *)drmach_mem_new ||	\
260 	    DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
261 
262 #define	DRMACH_CPUID2BNUM(cpuid) \
263 	((cpuid) / MAX_CPU_UNITS_PER_BOARD)
264 
265 #define	DRMACH_INTERNAL_ERROR() \
266 	drerr_new(1, ESTF_INTERNAL, drmach_ie_fmt, __LINE__)
267 static char		*drmach_ie_fmt = "drmach.c %d";
268 
269 static struct {
270 	const char	 *name;
271 	const char	 *type;
272 	sbd_error_t	 *(*new)(drmach_device_t *);
273 } name2type[] = {
274 	{ "SUNW,UltraSPARC",	DRMACH_DEVTYPE_CPU,  drmach_cpu_new },
275 	{ "mem-unit",		DRMACH_DEVTYPE_MEM,  drmach_mem_new },
276 	{ "pci",		DRMACH_DEVTYPE_PCI,  drmach_io_new  },
277 	{ "sbus",		DRMACH_DEVTYPE_SBUS, drmach_io_new  },
278 };
279 
280 /* node types to cleanup when a board is unconfigured */
281 #define	MISC_COUNTER_TIMER_DEVNAME	"counter-timer"
282 #define	MISC_PERF_COUNTER_DEVNAME	"perf-counter"
283 
284 /* utility */
285 #define	MBYTE	(1048576ull)
286 
287 /*
288  * This is necessary because the CPU support needs
289  * to call cvc_assign_iocpu.
290  */
291 #ifndef lint
292 char _depends_on[] = "drv/cvc";
293 #endif  /* lint */
294 
295 /*
296  * drmach autoconfiguration data structures and interfaces
297  */
298 
299 extern struct mod_ops mod_miscops;
300 
301 static struct modlmisc modlmisc = {
302 	&mod_miscops,
303 	"Sun Enterprise 10000 DR"
304 };
305 
306 static struct modlinkage modlinkage = {
307 	MODREV_1,
308 	(void *)&modlmisc,
309 	NULL
310 };
311 
312 static kmutex_t drmach_i_lock;
313 
314 int
315 _init(void)
316 {
317 	int err;
318 
319 	/* check that we have the correct version of obp */
320 	if (prom_test("SUNW,UE10000,add-brd") != 0) {
321 
322 		cmn_err(CE_WARN, "!OBP/SSP upgrade is required to enable "
323 		    "DR Functionality");
324 
325 		return (-1);
326 	}
327 
328 	mutex_init(&drmach_i_lock, NULL, MUTEX_DRIVER, NULL);
329 
330 	drmach_xt_mb = (uchar_t *)vmem_alloc(static_alloc_arena,
331 	    NCPU * sizeof (uchar_t), VM_SLEEP);
332 	drmach_shutdown_asm_mbox = (struct drmach_shutdown_mbox *)
333 	    vmem_alloc(static_alloc_arena, sizeof (struct drmach_shutdown_mbox),
334 	    VM_SLEEP);
335 
336 	if ((err = mod_install(&modlinkage)) != 0) {
337 		mutex_destroy(&drmach_i_lock);
338 		vmem_free(static_alloc_arena, (void *)drmach_xt_mb,
339 		    NCPU * sizeof (uchar_t));
340 		vmem_free(static_alloc_arena, (void *)drmach_shutdown_asm_mbox,
341 		    sizeof (struct drmach_shutdown_mbox));
342 	}
343 
344 	return (err);
345 }
346 
347 int
348 _fini(void)
349 {
350 	static int drmach_fini(void);
351 
352 	if (drmach_fini())
353 		return (DDI_FAILURE);
354 	else
355 		return (mod_remove(&modlinkage));
356 }
357 
358 int
359 _info(struct modinfo *modinfop)
360 {
361 	return (mod_info(&modlinkage, modinfop));
362 }
363 
364 static pnode_t
365 drmach_node_obp_get_dnode(drmach_node_t *np)
366 {
367 	return ((pnode_t)(uintptr_t)np->here);
368 }
369 
370 static int
371 drmach_node_obp_walk(drmach_node_t *np, void *data,
372 		int (*cb)(drmach_node_walk_args_t *args))
373 {
374 	pnode_t			nodeid;
375 	int			rv;
376 	drmach_node_walk_args_t	args;
377 
378 	/* initialized args structure for callback */
379 	args.node = np;
380 	args.data = data;
381 
382 	nodeid = prom_childnode(prom_rootnode());
383 
384 	/* save our new position with in the tree */
385 	np->here = (void *)(uintptr_t)nodeid;
386 
387 	rv = 0;
388 	while (nodeid != OBP_NONODE) {
389 		rv = (*cb)(&args);
390 		if (rv)
391 			break;
392 
393 		nodeid = prom_nextnode(nodeid);
394 
395 		/* save our new position with in the tree */
396 		np->here = (void *)(uintptr_t)nodeid;
397 	}
398 
399 	return (rv);
400 }
401 
402 static drmach_node_t *
403 drmach_node_new(void)
404 {
405 	drmach_node_t *np;
406 
407 	np = kmem_zalloc(sizeof (drmach_node_t), KM_SLEEP);
408 
409 	np->get_dnode = drmach_node_obp_get_dnode;
410 	np->walk = drmach_node_obp_walk;
411 
412 	return (np);
413 }
414 
415 static void
416 drmach_node_dispose(drmach_node_t *np)
417 {
418 	kmem_free(np, sizeof (*np));
419 }
420 
421 static dev_info_t *
422 drmach_node_get_dip(drmach_node_t *np)
423 {
424 	pnode_t nodeid;
425 
426 	nodeid = np->get_dnode(np);
427 	if (nodeid == OBP_NONODE)
428 		return (NULL);
429 	else {
430 		dev_info_t *dip;
431 
432 		/* The root node doesn't have to be held */
433 		dip = e_ddi_nodeid_to_dip(nodeid);
434 		if (dip) {
435 			/*
436 			 * Branch rooted at dip is already held, so release
437 			 * hold acquired in e_ddi_nodeid_to_dip()
438 			 */
439 			ddi_release_devi(dip);
440 			ASSERT(e_ddi_branch_held(dip));
441 		}
442 
443 		return (dip);
444 	}
445 	/*NOTREACHED*/
446 }
447 
448 static pnode_t
449 drmach_node_get_dnode(drmach_node_t *np)
450 {
451 	return (np->get_dnode(np));
452 }
453 
454 static int
455 drmach_node_walk(drmach_node_t *np, void *param,
456 		int (*cb)(drmach_node_walk_args_t *args))
457 {
458 	return (np->walk(np, param, cb));
459 }
460 
461 static int
462 drmach_node_get_prop(drmach_node_t *np, char *name, void *buf)
463 {
464 	pnode_t	nodeid;
465 	int	rv;
466 
467 	nodeid = np->get_dnode(np);
468 	if (nodeid == OBP_NONODE)
469 		rv = -1;
470 	else if (prom_getproplen(nodeid, (caddr_t)name) < 0)
471 		rv = -1;
472 	else {
473 		(void) prom_getprop(nodeid, (caddr_t)name, (caddr_t)buf);
474 		rv = 0;
475 	}
476 
477 	return (rv);
478 }
479 
480 static int
481 drmach_node_get_proplen(drmach_node_t *np, char *name, int *len)
482 {
483 	pnode_t	 nodeid;
484 	int	 rv;
485 
486 	nodeid = np->get_dnode(np);
487 	if (nodeid == OBP_NONODE)
488 		rv = -1;
489 	else {
490 		*len = prom_getproplen(nodeid, (caddr_t)name);
491 		rv = (*len < 0 ? -1 : 0);
492 	}
493 
494 	return (rv);
495 }
496 
497 static drmachid_t
498 drmach_node_dup(drmach_node_t *np)
499 {
500 	drmach_node_t *dup;
501 
502 	dup = drmach_node_new();
503 	dup->here = np->here;
504 
505 	return (dup);
506 }
507 
508 /*
509  * drmach_array provides convenient array construction, access,
510  * bounds checking and array destruction logic.
511  */
512 
513 static drmach_array_t *
514 drmach_array_new(int min_index, int max_index)
515 {
516 	drmach_array_t *arr;
517 
518 	arr = kmem_zalloc(sizeof (drmach_array_t), KM_SLEEP);
519 
520 	arr->arr_sz = (max_index - min_index + 1) * sizeof (void *);
521 	if (arr->arr_sz > 0) {
522 		arr->min_index = min_index;
523 		arr->max_index = max_index;
524 
525 		arr->arr = kmem_zalloc(arr->arr_sz, KM_SLEEP);
526 		return (arr);
527 	} else {
528 		kmem_free(arr, sizeof (*arr));
529 		return (0);
530 	}
531 }
532 
533 static int
534 drmach_array_set(drmach_array_t *arr, int idx, drmachid_t val)
535 {
536 	if (idx < arr->min_index || idx > arr->max_index)
537 		return (-1);
538 	else {
539 		arr->arr[idx - arr->min_index] = val;
540 		return (0);
541 	}
542 	/*NOTREACHED*/
543 }
544 
545 static int
546 drmach_array_get(drmach_array_t *arr, int idx, drmachid_t *val)
547 {
548 	if (idx < arr->min_index || idx > arr->max_index)
549 		return (-1);
550 	else {
551 		*val = arr->arr[idx - arr->min_index];
552 		return (0);
553 	}
554 	/*NOTREACHED*/
555 }
556 
557 static int
558 drmach_array_first(drmach_array_t *arr, int *idx, drmachid_t *val)
559 {
560 	int rv;
561 
562 	*idx = arr->min_index;
563 	while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
564 		*idx += 1;
565 
566 	return (rv);
567 }
568 
569 static int
570 drmach_array_next(drmach_array_t *arr, int *idx, drmachid_t *val)
571 {
572 	int rv;
573 
574 	*idx += 1;
575 	while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
576 		*idx += 1;
577 
578 	return (rv);
579 }
580 
581 static void
582 drmach_array_dispose(drmach_array_t *arr, void (*disposer)(drmachid_t))
583 {
584 	drmachid_t	val;
585 	int		idx;
586 	int		rv;
587 
588 	rv = drmach_array_first(arr, &idx, &val);
589 	while (rv == 0) {
590 		(*disposer)(val);
591 		rv = drmach_array_next(arr, &idx, &val);
592 	}
593 
594 	kmem_free(arr->arr, arr->arr_sz);
595 	kmem_free(arr, sizeof (*arr));
596 }
597 
598 /*ARGSUSED*/
599 static int
600 drmach_prom_select(pnode_t nodeid, void *arg, uint_t flags)
601 {
602 	int			rprop[64];
603 	pnode_t			saved;
604 	drmach_config_args_t	*ap = (drmach_config_args_t *)arg;
605 	drmach_device_t		*dp = ap->dp;
606 	sbd_error_t		*err;
607 
608 	saved = drmach_node_get_dnode(dp->node);
609 
610 	if (nodeid != saved)
611 		return (DDI_FAILURE);
612 
613 	if (saved == OBP_NONODE) {
614 		err = DRMACH_INTERNAL_ERROR();
615 		DRERR_SET_C(&ap->err, &err);
616 		return (DDI_FAILURE);
617 	}
618 
619 	if (prom_getprop(nodeid, OBP_REG, (caddr_t)rprop) <= 0) {
620 		return (DDI_FAILURE);
621 	}
622 
623 	return (DDI_SUCCESS);
624 }
625 
626 /*ARGSUSED*/
627 static void
628 drmach_branch_callback(dev_info_t *rdip, void *arg, uint_t flags)
629 {
630 	drmach_config_args_t	*ap = (drmach_config_args_t *)arg;
631 
632 	ASSERT(ap->dip == NULL);
633 
634 	ap->dip = rdip;
635 }
636 
637 sbd_error_t *
638 drmach_configure(drmachid_t id, int flags)
639 {
640 	drmach_device_t		*dp;
641 	sbd_error_t		*err;
642 	drmach_config_args_t	ca;
643 	devi_branch_t		b = {0};
644 	dev_info_t		*fdip = NULL;
645 
646 	if (!DRMACH_IS_DEVICE_ID(id))
647 		return (drerr_new(0, ESTF_INAPPROP, NULL));
648 	dp = id;
649 
650 	ca.dp = dp;
651 	ca.flags = flags;
652 	ca.err = NULL;		/* will be set if error detected */
653 	ca.dip = NULL;
654 
655 	b.arg = &ca;
656 	b.type = DEVI_BRANCH_PROM;
657 	b.create.prom_branch_select = drmach_prom_select;
658 	b.devi_branch_callback = drmach_branch_callback;
659 
660 	if (e_ddi_branch_create(ddi_root_node(), &b, &fdip,
661 	    DEVI_BRANCH_CHILD | DEVI_BRANCH_CONFIGURE) != 0) {
662 		char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
663 
664 		/*
665 		 * If non-NULL, fdip is returned held and must be released.
666 		 */
667 		if (fdip != NULL) {
668 			(void) ddi_pathname(fdip, path);
669 			ddi_release_devi(fdip);
670 		} else if (ca.dip != NULL) {
671 			/* safe to call ddi_pathname as dip already held */
672 			(void) ddi_pathname(ca.dip, path);
673 		} else {
674 			(void) strcpy(path, "<none>");
675 		}
676 
677 		err = drerr_new(1, ESTF_DRVFAIL, path);
678 		DRERR_SET_C(&ca.err, &err);
679 		kmem_free(path, MAXPATHLEN);
680 	}
681 
682 	return (ca.err);
683 }
684 
685 static sbd_error_t *
686 drmach_device_new(drmach_node_t *node,
687 	drmach_board_t *bp, drmach_device_t **dpp)
688 {
689 	int		 i;
690 	int		 rv;
691 	drmach_device_t	*dp;
692 	sbd_error_t	*err;
693 	char		 name[OBP_MAXDRVNAME];
694 
695 	rv = drmach_node_get_prop(node, OBP_NAME, name);
696 	if (rv) {
697 		/* every node is expected to have a name */
698 		err = drerr_new(1, ESTF_GETPROP,
699 		    "PROM Node 0x%x: property %s",
700 		    (uint_t)node->get_dnode(node), OBP_NAME);
701 
702 		return (err);
703 	}
704 
705 	/*
706 	 * The node currently being examined is not listed in the name2type[]
707 	 * array.  In this case, the node is no interest to drmach.  Both
708 	 * dp and err are initialized here to yield nothing (no device or
709 	 * error structure) for this case.
710 	 */
711 	for (i = 0; i < sizeof (name2type) / sizeof (name2type[0]); i++)
712 		if (strcmp(name2type[i].name, name) == 0)
713 			break;
714 
715 	if (i < sizeof (name2type) / sizeof (name2type[0])) {
716 		dp = kmem_zalloc(sizeof (drmach_device_t), KM_SLEEP);
717 
718 		dp->bp = bp;
719 		dp->unum = -1;
720 		dp->node = drmach_node_dup(node);
721 		dp->type = name2type[i].type;
722 
723 		err = (name2type[i].new)(dp);
724 		if (err) {
725 			drmach_node_dispose(node);
726 			kmem_free(dp, sizeof (*dp));
727 			dp = NULL;
728 		}
729 
730 		*dpp = dp;
731 		return (err);
732 	}
733 
734 	/*
735 	 * The node currently being examined is not listed in the name2type[]
736 	 * array.  In this case, the node is no interest to drmach.  Both
737 	 * dp and err are initialized here to yield nothing (no device or
738 	 * error structure) for this case.
739 	 */
740 	*dpp = NULL;
741 	return (NULL);
742 }
743 
744 static void
745 drmach_device_dispose(drmachid_t id)
746 {
747 	drmach_device_t *self = id;
748 
749 	if (self->node)
750 		drmach_node_dispose(self->node);
751 
752 	kmem_free(self, sizeof (*self));
753 }
754 
755 static sbd_error_t *
756 drmach_device_get_prop(drmach_device_t *dp, char *name, void *buf)
757 {
758 	sbd_error_t	*err = NULL;
759 	int		 rv;
760 
761 	rv = drmach_node_get_prop(dp->node, name, buf);
762 	if (rv) {
763 		err = drerr_new(1, ESTF_GETPROP,
764 		    "%s::%s: property %s",
765 		    dp->bp->cm.name, dp->cm.name, name);
766 	}
767 
768 	return (err);
769 }
770 
771 static sbd_error_t *
772 drmach_device_get_proplen(drmach_device_t *dp, char *name, int *len)
773 {
774 	sbd_error_t	*err = NULL;
775 	int		 rv;
776 
777 	rv = drmach_node_get_proplen(dp->node, name, len);
778 	if (rv) {
779 		err = drerr_new(1, ESTF_GETPROPLEN,
780 		    "%s::%s: property %s",
781 		    dp->bp->cm.name, dp->cm.name, name);
782 	}
783 
784 	return (err);
785 }
786 
787 static drmach_board_t *
788 drmach_board_new(int bnum)
789 {
790 	static sbd_error_t *drmach_board_release(drmachid_t);
791 	static sbd_error_t *drmach_board_status(drmachid_t, drmach_status_t *);
792 
793 	drmach_board_t	*bp;
794 
795 	bp = kmem_zalloc(sizeof (drmach_board_t), KM_SLEEP);
796 
797 	bp->cm.isa = (void *)drmach_board_new;
798 	bp->cm.release = drmach_board_release;
799 	bp->cm.status = drmach_board_status;
800 
801 	(void) drmach_board_name(bnum, bp->cm.name, sizeof (bp->cm.name));
802 
803 	bp->bnum = bnum;
804 	bp->devices = NULL;
805 	bp->connect_cpuid = -1;
806 	bp->tree = drmach_node_new();
807 	bp->assigned = !drmach_initialized;
808 	bp->powered = !drmach_initialized;
809 
810 	(void) drmach_array_set(drmach_boards, bnum, bp);
811 	return (bp);
812 }
813 
814 static void
815 drmach_board_dispose(drmachid_t id)
816 {
817 	drmach_board_t *bp;
818 
819 	ASSERT(DRMACH_IS_BOARD_ID(id));
820 	bp = id;
821 
822 	if (bp->tree)
823 		drmach_node_dispose(bp->tree);
824 
825 	if (bp->devices)
826 		drmach_array_dispose(bp->devices, drmach_device_dispose);
827 
828 	kmem_free(bp, sizeof (*bp));
829 }
830 
831 static sbd_error_t *
832 drmach_board_status(drmachid_t id, drmach_status_t *stat)
833 {
834 	sbd_error_t	*err = NULL;
835 	drmach_board_t	*bp;
836 
837 	if (!DRMACH_IS_BOARD_ID(id))
838 		return (drerr_new(0, ESTF_INAPPROP, NULL));
839 	bp = id;
840 
841 	stat->assigned = bp->assigned;
842 	stat->powered = bp->powered;
843 	stat->busy = 0;			/* assume not busy */
844 	stat->configured = 0;		/* assume not configured */
845 	stat->empty = 0;
846 	stat->cond = bp->cond = SBD_COND_OK;
847 	(void) strncpy(stat->type, "System Brd", sizeof (stat->type));
848 	stat->info[0] = '\0';
849 
850 	if (bp->devices) {
851 		int		 rv;
852 		int		 d_idx;
853 		drmachid_t	 d_id;
854 
855 		rv = drmach_array_first(bp->devices, &d_idx, &d_id);
856 		while (rv == 0) {
857 			drmach_status_t	d_stat;
858 
859 			err = drmach_status(d_id, &d_stat);
860 			if (err)
861 				break;
862 
863 			stat->busy |= d_stat.busy;
864 			stat->configured |= d_stat.configured;
865 
866 			rv = drmach_array_next(bp->devices, &d_idx, &d_id);
867 		}
868 	}
869 
870 	return (err);
871 }
872 
873 /* a simple routine to reduce redundancy of this common logic */
874 static pda_handle_t
875 drmach_pda_open(void)
876 {
877 	pda_handle_t ph;
878 
879 	ph = pda_open();
880 	if (ph == NULL) {
881 		/* catch in debug kernels */
882 		ASSERT(0);
883 		cmn_err(CE_WARN, "pda_open failed");
884 	}
885 
886 	return (ph);
887 }
888 
889 #ifdef DEBUG
890 int drmach_init_break = 0;
891 #endif
892 
893 static int
894 hold_rele_branch(dev_info_t *rdip, void *arg)
895 {
896 	int	i;
897 	int	*holdp = (int *)arg;
898 	char	*name = ddi_node_name(rdip);
899 
900 	/*
901 	 * For Starfire, we must be children of the root devinfo node
902 	 */
903 	ASSERT(ddi_get_parent(rdip) == ddi_root_node());
904 
905 	for (i = 0; i < sizeof (name2type) / sizeof (name2type[0]); i++)
906 		if (strcmp(name2type[i].name, name) == 0)
907 			break;
908 
909 	if (i == sizeof (name2type) / sizeof (name2type[0])) {
910 		/* Not of interest to us */
911 		return (DDI_WALK_PRUNECHILD);
912 	}
913 
914 	if (*holdp) {
915 		ASSERT(!e_ddi_branch_held(rdip));
916 		e_ddi_branch_hold(rdip);
917 	} else {
918 		ASSERT(e_ddi_branch_held(rdip));
919 		e_ddi_branch_rele(rdip);
920 	}
921 
922 	return (DDI_WALK_PRUNECHILD);
923 }
924 
925 static int
926 drmach_init(void)
927 {
928 	pnode_t		nodeid;
929 	dev_info_t	*rdip;
930 	int		hold, circ;
931 
932 #ifdef DEBUG
933 	if (drmach_init_break)
934 		debug_enter("drmach_init: drmach_init_break set\n");
935 #endif
936 	mutex_enter(&drmach_i_lock);
937 	if (drmach_initialized) {
938 		mutex_exit(&drmach_i_lock);
939 		return (0);
940 	}
941 
942 	drmach_boards = drmach_array_new(0, MAX_BOARDS - 1);
943 
944 	nodeid = prom_childnode(prom_rootnode());
945 	do {
946 		int		 bnum;
947 		drmachid_t	 id;
948 
949 		bnum = -1;
950 		(void) prom_getprop(nodeid, OBP_BOARDNUM, (caddr_t)&bnum);
951 		if (bnum == -1)
952 			continue;
953 
954 		if (drmach_array_get(drmach_boards, bnum, &id) == -1) {
955 			cmn_err(CE_WARN, "OBP node 0x%x has"
956 			    " invalid property value, %s=%d",
957 			    nodeid, OBP_BOARDNUM, bnum);
958 
959 			/* clean up */
960 			drmach_array_dispose(
961 			    drmach_boards, drmach_board_dispose);
962 
963 			mutex_exit(&drmach_i_lock);
964 			return (-1);
965 		} else if (id == NULL)
966 			(void) drmach_board_new(bnum);
967 	} while ((nodeid = prom_nextnode(nodeid)) != OBP_NONODE);
968 
969 	drmach_shutdown_va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
970 
971 	/*
972 	 * Walk immediate children of devinfo root node and hold
973 	 * all devinfo branches of interest.
974 	 */
975 	hold = 1;
976 	rdip = ddi_root_node();
977 
978 	ndi_devi_enter(rdip, &circ);
979 	ddi_walk_devs(ddi_get_child(rdip), hold_rele_branch, &hold);
980 	ndi_devi_exit(rdip, circ);
981 
982 	drmach_initialized = 1;
983 
984 	mutex_exit(&drmach_i_lock);
985 
986 	return (0);
987 }
988 
989 static int
990 drmach_fini(void)
991 {
992 	dev_info_t	*rdip;
993 	int		hold, circ;
994 
995 	if (drmach_initialized) {
996 		int		busy = 0;
997 		int		rv;
998 		int		idx;
999 		drmachid_t	id;
1000 
1001 		ASSERT(drmach_boards != NULL);
1002 
1003 		rv = drmach_array_first(drmach_boards, &idx, &id);
1004 		while (rv == 0) {
1005 			sbd_error_t	*err;
1006 			drmach_status_t stat;
1007 
1008 			err = drmach_board_status(id, &stat);
1009 			if (err) {
1010 				/* catch in debug kernels */
1011 				ASSERT(0);
1012 				sbd_err_clear(&err);
1013 				busy = 1;
1014 			} else
1015 				busy |= stat.busy;
1016 
1017 			rv = drmach_array_next(drmach_boards, &idx, &id);
1018 		}
1019 
1020 		if (busy)
1021 			return (-1);
1022 
1023 		drmach_array_dispose(drmach_boards, drmach_board_dispose);
1024 		drmach_boards = NULL;
1025 
1026 		vmem_free(heap_arena, drmach_shutdown_va, PAGESIZE);
1027 
1028 		/*
1029 		 * Walk immediate children of the root devinfo node
1030 		 * releasing holds acquired on branches in drmach_init()
1031 		 */
1032 		hold = 0;
1033 		rdip = ddi_root_node();
1034 
1035 		ndi_devi_enter(rdip, &circ);
1036 		ddi_walk_devs(ddi_get_child(rdip), hold_rele_branch, &hold);
1037 		ndi_devi_exit(rdip, circ);
1038 
1039 		mutex_destroy(&drmach_i_lock);
1040 
1041 		drmach_initialized = 0;
1042 	}
1043 	if (drmach_xt_mb != NULL) {
1044 		vmem_free(static_alloc_arena, (void *)drmach_xt_mb,
1045 		    NCPU * sizeof (uchar_t));
1046 	}
1047 	if (drmach_shutdown_asm_mbox != NULL) {
1048 		vmem_free(static_alloc_arena, (void *)drmach_shutdown_asm_mbox,
1049 		    sizeof (struct drmach_shutdown_mbox));
1050 	}
1051 	return (0);
1052 }
1053 
1054 static sbd_error_t *
1055 drmach_get_mc_asr_addr(drmachid_t id, uint64_t *pa)
1056 {
1057 	drmach_device_t	*dp;
1058 	pnode_t		nodeid;
1059 	uint64_t	addr;
1060 
1061 	if (!DRMACH_IS_MEM_ID(id))
1062 		return (drerr_new(0, ESTF_INAPPROP, NULL));
1063 	dp = id;
1064 
1065 	nodeid = drmach_node_get_dnode(dp->node);
1066 	if (nodeid == OBP_NONODE || nodeid == OBP_BADNODE)
1067 		return (DRMACH_INTERNAL_ERROR());
1068 
1069 	addr = mc_get_asr_addr(nodeid);
1070 	if (addr == (uint64_t)-1)
1071 		return (DRMACH_INTERNAL_ERROR());
1072 
1073 	*pa = addr;
1074 	return (NULL);
1075 }
1076 
1077 static sbd_error_t *
1078 drmach_get_mc_idle_addr(drmachid_t id, uint64_t *pa)
1079 {
1080 	drmach_device_t	*dp;
1081 	pnode_t		nodeid;
1082 	uint64_t	addr;
1083 
1084 	if (!DRMACH_IS_MEM_ID(id))
1085 		return (drerr_new(0, ESTF_INAPPROP, NULL));
1086 	dp = id;
1087 
1088 	nodeid = drmach_node_get_dnode(dp->node);
1089 	if (nodeid == OBP_NONODE || nodeid == OBP_BADNODE)
1090 		return (DRMACH_INTERNAL_ERROR());
1091 
1092 	addr = mc_get_idle_addr(nodeid);
1093 	if (addr == (uint64_t)-1)
1094 		return (DRMACH_INTERNAL_ERROR());
1095 
1096 	*pa = addr;
1097 	return (NULL);
1098 }
1099 
1100 static sbd_error_t *
1101 drmach_read_mc_asr(drmachid_t id, uint_t *mcregp)
1102 {
1103 	drmach_device_t	*dp;
1104 	pnode_t		 nodeid;
1105 	sbd_error_t	*err;
1106 
1107 	if (!DRMACH_IS_MEM_ID(id))
1108 		return (drerr_new(0, ESTF_INAPPROP, NULL));
1109 	dp = id;
1110 
1111 	nodeid = drmach_node_get_dnode(dp->node);
1112 	if (nodeid == OBP_NONODE || nodeid == OBP_BADNODE)
1113 		err = DRMACH_INTERNAL_ERROR();
1114 	else if (mc_read_asr(nodeid, mcregp) == -1)
1115 		err = DRMACH_INTERNAL_ERROR();
1116 	else
1117 		err = NULL;
1118 
1119 	return (err);
1120 }
1121 
1122 static sbd_error_t *
1123 drmach_write_mc_asr(drmachid_t id, uint_t mcreg)
1124 {
1125 	drmach_device_t	*dp;
1126 	pnode_t		 nodeid;
1127 	sbd_error_t	*err;
1128 
1129 	if (!DRMACH_IS_MEM_ID(id))
1130 		return (drerr_new(0, ESTF_INAPPROP, NULL));
1131 	dp = id;
1132 
1133 	nodeid = drmach_node_get_dnode(dp->node);
1134 	if (nodeid == OBP_NONODE || nodeid == OBP_BADNODE)
1135 		err = DRMACH_INTERNAL_ERROR();
1136 	else if (mc_write_asr(nodeid, mcreg) == -1)
1137 		err = DRMACH_INTERNAL_ERROR();
1138 	else
1139 		err = NULL;
1140 
1141 	return (err);
1142 }
1143 
1144 static sbd_error_t *
1145 drmach_prep_rename_script(drmach_device_t *s_mem, drmach_device_t *t_mem,
1146 	uint64_t t_slice_offset, caddr_t buf, int buflen)
1147 {
1148 	int			i, b, m;
1149 	drmach_mc_idle_script_t	*isp;
1150 	drmach_rename_script_t	*rsp;
1151 	int			s_bd, t_bd;
1152 	uint_t			s_masr, t_masr;
1153 	uint64_t		s_new_basepa, t_new_basepa;
1154 	int			b_idx, rv;
1155 	sbd_error_t		*err;
1156 	drmachid_t		 b_id;
1157 	drmach_board_t		*brd;
1158 
1159 #ifdef DEBUG
1160 	/*
1161 	 * Starfire CPU/MEM/IO boards have only one MC per board.
1162 	 * This function has been coded with that fact in mind.
1163 	 */
1164 	ASSERT(MAX_MEM_UNITS_PER_BOARD == 1);
1165 
1166 	/*
1167 	 * calculate the maximum space that could be consumed,
1168 	 * then verify the available buffer space is adequate.
1169 	 */
1170 	m  = sizeof (drmach_mc_idle_script_t *) * 2; /* two MCs */
1171 	b  = sizeof (drmach_rename_script_t *) * 3 * MAX_CPU_UNITS_PER_BOARD;
1172 	b += sizeof (drmach_rename_script_t *) * 3 * MAX_IO_UNITS_PER_BOARD;
1173 	b *= MAX_BOARDS;
1174 	b += sizeof (drmach_rename_script_t *) * 3;
1175 	b += sizeof (drmach_rename_script_t *) * 1;
1176 	ASSERT(m + b < buflen);
1177 #endif
1178 
1179 	/*
1180 	 * construct an array of MC idle register addresses of
1181 	 * both MCs.  The array is zero terminated -- as expected
1182 	 * by drmach_copy_rename_prog__relocatable().
1183 	 */
1184 	isp = (drmach_mc_idle_script_t *)buf;
1185 
1186 	/* source mc */
1187 	err = drmach_get_mc_idle_addr(s_mem, &isp->idle_addr);
1188 	if (err)
1189 		return (err);
1190 	isp->mem = s_mem;
1191 	isp += 1;
1192 
1193 	/* target mc */
1194 	err = drmach_get_mc_idle_addr(t_mem, &isp->idle_addr);
1195 	if (err)
1196 		return (err);
1197 	isp->mem = t_mem;
1198 	isp += 1;
1199 
1200 	/* terminator */
1201 	isp->idle_addr = 0;
1202 	isp->mem = NULL;
1203 	isp += 1;
1204 
1205 	/* fetch source mc asr register value */
1206 	err = drmach_read_mc_asr(s_mem, &s_masr);
1207 	if (err)
1208 		return (err);
1209 	else if (s_masr & STARFIRE_MC_INTERLEAVE_MASK) {
1210 		return (drerr_new(1, ESTF_INTERBOARD, "%s::%s",
1211 		    s_mem->bp->cm.name, s_mem->cm.name));
1212 	}
1213 
1214 	/* fetch target mc asr register value */
1215 	err = drmach_read_mc_asr(t_mem, &t_masr);
1216 	if (err)
1217 		return (err);
1218 	else if (t_masr & STARFIRE_MC_INTERLEAVE_MASK) {
1219 		return (drerr_new(1, ESTF_INTERBOARD, "%s::%s",
1220 		    t_mem->bp->cm.name, t_mem->cm.name));
1221 	}
1222 
1223 	/* get new source base pa from target's masr */
1224 	s_new_basepa = mc_asr_to_pa(t_masr);
1225 
1226 	/*
1227 	 * remove any existing slice offset to realign
1228 	 * memory with board's slice boundary
1229 	 */
1230 	s_new_basepa &= ~ (mc_get_mem_alignment() - 1);
1231 
1232 	/* get new target base pa from source's masr */
1233 	t_new_basepa  = mc_asr_to_pa(s_masr);
1234 
1235 	/* remove any existing slice offset, then apply new offset */
1236 	t_new_basepa &= ~ (mc_get_mem_alignment() - 1);
1237 	t_new_basepa += t_slice_offset;
1238 
1239 	/* encode new base pa into s_masr.  turn off mem present bit */
1240 	s_masr  = mc_pa_to_asr(s_masr, s_new_basepa);
1241 	s_masr &= ~STARFIRE_MC_MEM_PRESENT_MASK;
1242 
1243 	/* encode new base pa into t_masr.  turn on mem present bit */
1244 	t_masr  = mc_pa_to_asr(t_masr, t_new_basepa);
1245 	t_masr |= STARFIRE_MC_MEM_PRESENT_MASK;
1246 
1247 	/*
1248 	 * Step 0:	Mark source memory as not present.
1249 	 */
1250 	m = 0;
1251 	rsp = (drmach_rename_script_t *)isp;
1252 	err = drmach_get_mc_asr_addr(s_mem, &rsp[m].masr_addr);
1253 	if (err)
1254 		return (err);
1255 	rsp[m].masr = s_masr;
1256 	m++;
1257 
1258 	/*
1259 	 * Step 1:	Write source base address to target MC
1260 	 *		with present bit off.
1261 	 */
1262 	err = drmach_get_mc_asr_addr(t_mem, &rsp[m].masr_addr);
1263 	if (err)
1264 		return (err);
1265 	rsp[m].masr = t_masr & ~STARFIRE_MC_MEM_PRESENT_MASK;
1266 	m++;
1267 
1268 	/*
1269 	 * Step 2:	Now rewrite target reg with present bit on.
1270 	 */
1271 	rsp[m].masr_addr = rsp[m-1].masr_addr;
1272 	rsp[m].masr = t_masr;
1273 	m++;
1274 
1275 	s_bd = s_mem->bp->bnum;
1276 	t_bd = t_mem->bp->bnum;
1277 
1278 	DRMACH_PR("preparing script for CPU and IO units:\n");
1279 
1280 	rv = drmach_array_first(drmach_boards, &b_idx, &b_id);
1281 	if (rv) {
1282 		/* catch this in debug kernels */
1283 		ASSERT(0);
1284 		return (DRMACH_INTERNAL_ERROR());
1285 	}
1286 
1287 	do {
1288 		int			 d_idx;
1289 		drmachid_t		 d_id;
1290 		drmach_device_t		*device;
1291 
1292 		ASSERT(DRMACH_IS_BOARD_ID(b_id));
1293 		brd = b_id;
1294 		b = brd->bnum;
1295 
1296 		/*
1297 		 * Step 3:	Update PC MADR tables for CPUs.
1298 		 */
1299 		if (brd->devices == NULL) {
1300 			/* devices not initialized */
1301 			continue;
1302 		}
1303 
1304 		rv = drmach_array_first(brd->devices, &d_idx, &d_id);
1305 		if (rv) {
1306 			/* must mean no devices on this board */
1307 			break;
1308 		}
1309 
1310 		DRMACH_PR("\t%s\n", brd->cm.name);
1311 
1312 		do {
1313 			ASSERT(DRMACH_IS_DEVICE_ID(d_id));
1314 
1315 			if (!DRMACH_IS_CPU_ID(d_id))
1316 				continue;
1317 
1318 			device = d_id;
1319 			i = device->unum;
1320 
1321 			DRMACH_PR("\t\t%s\n", device->cm.name);
1322 
1323 			/*
1324 			 * Disabled detaching mem node.
1325 			 */
1326 			rsp[m].masr_addr = STARFIRE_PC_MADR_ADDR(b, s_bd, i);
1327 			rsp[m].masr = s_masr;
1328 			m++;
1329 			/*
1330 			 * Always write masr with present bit
1331 			 * off and then again with it on.
1332 			 */
1333 			rsp[m].masr_addr = STARFIRE_PC_MADR_ADDR(b, t_bd, i);
1334 			rsp[m].masr = t_masr & ~STARFIRE_MC_MEM_PRESENT_MASK;
1335 			m++;
1336 			rsp[m].masr_addr = rsp[m-1].masr_addr;
1337 			rsp[m].masr = t_masr;
1338 			m++;
1339 
1340 		} while (drmach_array_next(brd->devices, &d_idx, &d_id) == 0);
1341 
1342 		/*
1343 		 * Step 4:	Update PC MADR tables for IOs.
1344 		 */
1345 		rv = drmach_array_first(brd->devices, &d_idx, &d_id);
1346 		/* this worked for previous loop, must work here too */
1347 		ASSERT(rv == 0);
1348 
1349 		do {
1350 			ASSERT(DRMACH_IS_DEVICE_ID(d_id));
1351 
1352 			if (!DRMACH_IS_IO_ID(d_id))
1353 				continue;
1354 
1355 			device = d_id;
1356 			i = device->unum;
1357 
1358 			DRMACH_PR("\t\t%s\n", device->cm.name);
1359 
1360 			/*
1361 			 * Disabled detaching mem node.
1362 			 */
1363 			rsp[m].masr_addr = STARFIRE_PC_MADR_ADDR(b, s_bd, i+4);
1364 			rsp[m].masr = s_masr;
1365 			m++;
1366 			/*
1367 			 * Always write masr with present bit
1368 			 * off and then again with it on.
1369 			 */
1370 			rsp[m].masr_addr = STARFIRE_PC_MADR_ADDR(b, t_bd, i+4);
1371 			rsp[m].masr = t_masr & ~STARFIRE_MC_MEM_PRESENT_MASK;
1372 			m++;
1373 			rsp[m].masr_addr = rsp[m-1].masr_addr;
1374 			rsp[m].masr = t_masr;
1375 			m++;
1376 
1377 		} while (drmach_array_next(brd->devices, &d_idx, &d_id) == 0);
1378 	} while (drmach_array_next(drmach_boards, &b_idx, &b_id) == 0);
1379 
1380 	/*
1381 	 * Zero masr_addr value indicates the END.
1382 	 */
1383 	rsp[m].masr_addr = 0ull;
1384 	rsp[m].masr = 0;
1385 	DRMACH_PR("number of steps in rename script = %d\n", m);
1386 	m++;
1387 
1388 	/* paranoia */
1389 	ASSERT((caddr_t)&rsp[m] <= buf + buflen);
1390 
1391 #ifdef DEBUG
1392 	{
1393 		int	j;
1394 
1395 		DRMACH_PR("mc idle register address list:");
1396 		isp = (drmach_mc_idle_script_t *)buf;
1397 		DRMACH_PR("source mc idle addr 0x%lx, mem id %p",
1398 		    isp[0].idle_addr, (void *)isp[0].mem);
1399 		DRMACH_PR("target mc idle addr 0x%lx, mem id %p",
1400 		    isp[1].idle_addr, (void *)isp[1].mem);
1401 		ASSERT(isp[2].idle_addr == 0);
1402 
1403 		DRMACH_PR("copy-rename script:");
1404 		for (j = 0; j < m; j++) {
1405 			DRMACH_PR("0x%lx = 0x%08x",
1406 			    rsp[j].masr_addr, rsp[j].masr);
1407 		}
1408 
1409 		DELAY(1000000);
1410 	}
1411 #endif
1412 
1413 	/* return number of bytes consumed */
1414 	b = (caddr_t)&rsp[m] - buf;
1415 	DRMACH_PR("total number of bytes consumed is %d\n", b);
1416 	ASSERT(b <= buflen);
1417 
1418 #ifdef lint
1419 	buflen = buflen;
1420 #endif
1421 
1422 	return (NULL);
1423 }
1424 
1425 /*
1426  * The routine performs the necessary memory COPY and MC adr SWITCH.
1427  * Both operations MUST be at the same "level" so that the stack is
1428  * maintained correctly between the copy and switch.  The switch
1429  * portion implements a caching mechanism to guarantee the code text
1430  * is cached prior to execution.  This is to guard against possible
1431  * memory access while the MC adr's are being modified.
1432  *
1433  * IMPORTANT: The _drmach_copy_rename_end() function must immediately
1434  * follow drmach_copy_rename_prog__relocatable() so that the correct
1435  * "length" of the drmach_copy_rename_prog__relocatable can be
1436  * calculated.  This routine MUST be a LEAF function, i.e. it can
1437  * make NO function calls, primarily for two reasons:
1438  *
1439  *	1. We must keep the stack consistent across the "switch".
1440  *	2. Function calls are compiled to relative offsets, and
1441  *	   we execute this function we'll be executing it from
1442  *	   a copied version in a different area of memory, thus
1443  *	   the relative offsets will be bogus.
1444  *
1445  * Moreover, it must have the "__relocatable" suffix to inform DTrace
1446  * providers (and anything else, for that matter) that this
1447  * function's text is manually relocated elsewhere before it is
1448  * executed.  That is, it cannot be safely instrumented with any
1449  * methodology that is PC-relative.
1450  */
1451 static void
1452 drmach_copy_rename_prog__relocatable(drmach_copy_rename_program_t *prog)
1453 {
1454 	extern void drmach_exec_script_il(drmach_rename_script_t *rsp);
1455 
1456 	drmach_mc_idle_script_t		*isp;
1457 	struct memlist			*ml;
1458 	int				csize;
1459 	int				lnsize;
1460 	uint64_t			caddr;
1461 
1462 	isp = (drmach_mc_idle_script_t *)prog->data;
1463 
1464 	caddr = ecache_flushaddr;
1465 	csize = (cpunodes[CPU->cpu_id].ecache_size << 1);
1466 	lnsize = cpunodes[CPU->cpu_id].ecache_linesize;
1467 
1468 	/*
1469 	 * DO COPY.
1470 	 */
1471 	for (ml = prog->c_ml; ml; ml = ml->ml_next) {
1472 		uint64_t	s_pa, t_pa;
1473 		uint64_t	nbytes;
1474 
1475 		s_pa = prog->s_copybasepa + ml->ml_address;
1476 		t_pa = prog->t_copybasepa + ml->ml_address;
1477 		nbytes = ml->ml_size;
1478 
1479 		while (nbytes != 0ull) {
1480 			/*
1481 			 * This copy does NOT use an ASI
1482 			 * that avoids the Ecache, therefore
1483 			 * the dst_pa addresses may remain
1484 			 * in our Ecache after the dst_pa
1485 			 * has been removed from the system.
1486 			 * A subsequent write-back to memory
1487 			 * will cause an ARB-stop because the
1488 			 * physical address no longer exists
1489 			 * in the system. Therefore we must
1490 			 * flush out local Ecache after we
1491 			 * finish the copy.
1492 			 */
1493 
1494 			/* copy 32 bytes at src_pa to dst_pa */
1495 			bcopy32_il(s_pa, t_pa);
1496 
1497 			/* increment by 32 bytes */
1498 			s_pa += (4 * sizeof (uint64_t));
1499 			t_pa += (4 * sizeof (uint64_t));
1500 
1501 			/* decrement by 32 bytes */
1502 			nbytes -= (4 * sizeof (uint64_t));
1503 		}
1504 	}
1505 
1506 	/*
1507 	 * Since bcopy32_il() does NOT use an ASI to bypass
1508 	 * the Ecache, we need to flush our Ecache after
1509 	 * the copy is complete.
1510 	 */
1511 	flush_ecache_il(caddr, csize, lnsize);		/* inline version */
1512 
1513 	/*
1514 	 * Wait for MCs to go idle.
1515 	 */
1516 	do {
1517 		register int	t = 10;
1518 		register uint_t	v;
1519 
1520 		/* loop t cycles waiting for each mc to indicate it's idle */
1521 		do {
1522 			v = ldphysio_il(isp->idle_addr)
1523 			    & STARFIRE_MC_IDLE_MASK;
1524 
1525 		} while (v != STARFIRE_MC_IDLE_MASK && t-- > 0);
1526 
1527 		/* bailout if timedout */
1528 		if (t <= 0) {
1529 			prog->restless_mc = isp->mem;
1530 			return;
1531 		}
1532 
1533 		isp += 1;
1534 
1535 		/* stop if terminating zero has been reached */
1536 	} while (isp->idle_addr != 0);
1537 
1538 	/* advance passed terminating zero */
1539 	isp += 1;
1540 
1541 	/*
1542 	 * The following inline assembly routine caches
1543 	 * the rename script and then caches the code that
1544 	 * will do the rename.  This is necessary
1545 	 * so that we don't have any memory references during
1546 	 * the reprogramming.  We accomplish this by first
1547 	 * jumping through the code to guarantee it's cached
1548 	 * before we actually execute it.
1549 	 */
1550 	drmach_exec_script_il((drmach_rename_script_t *)isp);
1551 }
1552 
1553 static void
1554 drmach_copy_rename_end(void)
1555 {
1556 	/*
1557 	 * IMPORTANT:	This function's location MUST be located immediately
1558 	 *		following drmach_copy_rename_prog__relocatable to
1559 	 *		accurately estimate its size.  Note that this assumes
1560 	 *		the compiler keeps these functions in the order in
1561 	 *		which they appear :-o
1562 	 */
1563 }
1564 
1565 sbd_error_t *
1566 drmach_copy_rename_init(drmachid_t t_id, uint64_t t_slice_offset,
1567 	drmachid_t s_id, struct memlist *c_ml, drmachid_t *pgm_id)
1568 {
1569 	drmach_device_t	*s_mem;
1570 	drmach_device_t	*t_mem;
1571 	struct memlist	*x_ml;
1572 	uint64_t	off_mask, s_copybasepa, t_copybasepa, t_basepa;
1573 	int		len;
1574 	caddr_t		bp, wp;
1575 	pda_handle_t	ph;
1576 	sbd_error_t	*err;
1577 	drmach_copy_rename_program_t *prog;
1578 
1579 	if (!DRMACH_IS_MEM_ID(s_id))
1580 		return (drerr_new(0, ESTF_INAPPROP, NULL));
1581 	if (!DRMACH_IS_MEM_ID(t_id))
1582 		return (drerr_new(0, ESTF_INAPPROP, NULL));
1583 	s_mem = s_id;
1584 	t_mem = t_id;
1585 
1586 	/* get starting physical address of target memory */
1587 	err = drmach_mem_get_base_physaddr(t_id, &t_basepa);
1588 	if (err)
1589 		return (err);
1590 
1591 	/* calculate slice offset mask from slice size */
1592 	off_mask = mc_get_mem_alignment() - 1;
1593 
1594 	/* calculate source and target base pa */
1595 	s_copybasepa = c_ml->ml_address;
1596 	t_copybasepa =
1597 	    t_basepa + ((c_ml->ml_address & off_mask) - t_slice_offset);
1598 
1599 	/* paranoia */
1600 	ASSERT((c_ml->ml_address & off_mask) >= t_slice_offset);
1601 
1602 	/* adjust copy memlist addresses to be relative to copy base pa */
1603 	x_ml = c_ml;
1604 	while (x_ml != NULL) {
1605 		x_ml->ml_address -= s_copybasepa;
1606 		x_ml = x_ml->ml_next;
1607 	}
1608 
1609 #ifdef DEBUG
1610 	{
1611 	uint64_t s_basepa, s_size, t_size;
1612 
1613 	x_ml = c_ml;
1614 	while (x_ml->ml_next != NULL)
1615 		x_ml = x_ml->ml_next;
1616 
1617 	DRMACH_PR("source copy span: base pa 0x%lx, end pa 0x%lx\n",
1618 	    s_copybasepa,
1619 	    s_copybasepa + x_ml->ml_address + x_ml->ml_size);
1620 
1621 	DRMACH_PR("target copy span: base pa 0x%lx, end pa 0x%lx\n",
1622 	    t_copybasepa,
1623 	    t_copybasepa + x_ml->ml_address + x_ml->ml_size);
1624 
1625 	DRMACH_PR("copy memlist (relative to copy base pa):\n");
1626 	MEMLIST_DUMP(c_ml);
1627 
1628 	err = drmach_mem_get_base_physaddr(s_id, &s_basepa);
1629 	ASSERT(err == NULL);
1630 
1631 	err = drmach_mem_get_size(s_id, &s_size);
1632 	ASSERT(err == NULL);
1633 
1634 	err = drmach_mem_get_size(t_id, &t_size);
1635 	ASSERT(err == NULL);
1636 
1637 	DRMACH_PR("current source base pa 0x%lx, size 0x%lx\n",
1638 	    s_basepa, s_size);
1639 	DRMACH_PR("current target base pa 0x%lx, size 0x%lx\n",
1640 	    t_basepa, t_size);
1641 
1642 	ASSERT(s_copybasepa + x_ml->ml_address + x_ml->ml_size <=
1643 	    s_basepa + s_size);
1644 	ASSERT(t_copybasepa + x_ml->ml_address + x_ml->ml_size <=
1645 	    t_basepa + t_size);
1646 	}
1647 #endif
1648 
1649 	ph = drmach_pda_open();
1650 	if (ph == NULL)
1651 		return (DRMACH_INTERNAL_ERROR());
1652 
1653 	/*
1654 	 * bp will be page aligned, since we're calling
1655 	 * kmem_zalloc() with an exact multiple of PAGESIZE.
1656 	 */
1657 	wp = bp = kmem_zalloc(PAGESIZE, KM_SLEEP);
1658 
1659 	/* allocate space for copy rename struct */
1660 	len = sizeof (drmach_copy_rename_program_t);
1661 	DRMACH_PR("prog = 0x%p, header len %d\n", (void *)wp, len);
1662 	prog = (drmach_copy_rename_program_t *)wp;
1663 	wp += (len + ecache_alignsize - 1) & ~ (ecache_alignsize - 1);
1664 
1665 	/*
1666 	 * Copy the code for the copy-rename routine into
1667 	 * a page aligned piece of memory.  We do this to guarantee
1668 	 * that we're executing within the same page and thus reduce
1669 	 * the possibility of cache collisions between different
1670 	 * pages.
1671 	 */
1672 	len = (int)((ulong_t)drmach_copy_rename_end -
1673 	    (ulong_t)drmach_copy_rename_prog__relocatable);
1674 	ASSERT(wp + len < bp + PAGESIZE);
1675 	bcopy((caddr_t)drmach_copy_rename_prog__relocatable, wp, len);
1676 
1677 	DRMACH_PR("copy-rename function 0x%p, len %d\n", (void *)wp, len);
1678 	prog->run = (void (*)())wp;
1679 	wp += (len + ecache_alignsize - 1) & ~ (ecache_alignsize - 1);
1680 
1681 	/*
1682 	 * Prepare data page that will contain script of
1683 	 * operations to perform during copy-rename.
1684 	 * Allocate temporary buffer to hold script.
1685 	 */
1686 	err = drmach_prep_rename_script(s_mem, t_mem, t_slice_offset,
1687 	    wp, PAGESIZE - (wp - bp));
1688 	if (err) {
1689 		(void) drmach_copy_rename_fini(prog);
1690 		return (err);
1691 	}
1692 
1693 	DRMACH_PR("copy-rename script 0x%p, len %d\n", (void *)wp, len);
1694 	prog->data = wp;
1695 	wp += (len + ecache_alignsize - 1) & ~ (ecache_alignsize - 1);
1696 
1697 	prog->ph = ph;
1698 	prog->s_copybasepa = s_copybasepa;
1699 	prog->t_copybasepa = t_copybasepa;
1700 	prog->c_ml = c_ml;
1701 	*pgm_id = prog;
1702 
1703 	return (NULL);
1704 }
1705 
1706 sbd_error_t *
1707 drmach_copy_rename_fini(drmachid_t id)
1708 {
1709 	drmach_copy_rename_program_t	*prog = id;
1710 	sbd_error_t			*err = NULL;
1711 
1712 	if (prog->c_ml != NULL)
1713 		memlist_delete(prog->c_ml);
1714 
1715 	if (prog->ph != NULL)
1716 		pda_close(prog->ph);
1717 
1718 	if (prog->restless_mc != 0) {
1719 		cmn_err(CE_WARN, "MC did not idle; OBP Node 0x%x",
1720 		    (uint_t)drmach_node_get_dnode(prog->restless_mc->node));
1721 
1722 		err = DRMACH_INTERNAL_ERROR();
1723 	}
1724 
1725 	kmem_free(prog, PAGESIZE);
1726 
1727 	return (err);
1728 }
1729 
1730 static sbd_error_t *
1731 drmach_io_new(drmach_device_t *dp)
1732 {
1733 	static sbd_error_t *drmach_io_release(drmachid_t);
1734 	static sbd_error_t *drmach_io_status(drmachid_t, drmach_status_t *);
1735 
1736 	sbd_error_t	*err;
1737 	int		 portid;
1738 
1739 	err = drmach_device_get_prop(dp, "upa-portid", &portid);
1740 	if (err == NULL) {
1741 		ASSERT(portid & 0x40);
1742 		dp->unum = portid & 1;
1743 	}
1744 
1745 	dp->cm.isa = (void *)drmach_io_new;
1746 	dp->cm.release = drmach_io_release;
1747 	dp->cm.status = drmach_io_status;
1748 
1749 	(void) snprintf(dp->cm.name, sizeof (dp->cm.name), "%s%d", dp->type,
1750 	    dp->unum);
1751 
1752 	return (err);
1753 }
1754 
1755 static void
1756 drmach_iopc_op(pda_handle_t ph, drmach_iopc_op_t op)
1757 {
1758 	register int b;
1759 
1760 	for (b = 0; b < MAX_BOARDS; b++) {
1761 		int		p;
1762 		ushort_t	bda_ioc;
1763 		board_desc_t	*bdesc;
1764 
1765 		if (pda_board_present(ph, b) == 0)
1766 			continue;
1767 
1768 		bdesc = (board_desc_t *)pda_get_board_info(ph, b);
1769 		/*
1770 		 * Update PCs for IOCs.
1771 		 */
1772 		bda_ioc = bdesc->bda_ioc;
1773 		for (p = 0; p < MAX_IOCS; p++) {
1774 			u_longlong_t	idle_addr;
1775 			uchar_t		value;
1776 
1777 			if (BDA_NBL(bda_ioc, p) != BDAN_GOOD)
1778 				continue;
1779 
1780 			idle_addr = STARFIRE_BB_PC_ADDR(b, p, 1);
1781 
1782 			switch (op) {
1783 			case DO_PAUSE:
1784 				value = STARFIRE_BB_PC_PAUSE(p);
1785 				break;
1786 
1787 			case DO_IDLE:
1788 				value = STARFIRE_BB_PC_IDLE(p);
1789 				break;
1790 
1791 			case DO_UNPAUSE:
1792 				value = ldbphysio(idle_addr);
1793 				value &= ~STARFIRE_BB_PC_PAUSE(p);
1794 				break;
1795 
1796 			case DO_UNIDLE:
1797 				value = ldbphysio(idle_addr);
1798 				value &= ~STARFIRE_BB_PC_IDLE(p);
1799 				break;
1800 
1801 			default:
1802 				cmn_err(CE_PANIC,
1803 				    "drmach_iopc_op: unknown op (%d)",
1804 				    (int)op);
1805 				/*NOTREACHED*/
1806 			}
1807 			stbphysio(idle_addr, value);
1808 		}
1809 	}
1810 }
1811 
1812 void
1813 drmach_copy_rename(drmachid_t id)
1814 {
1815 	drmach_copy_rename_program_t	*prog = id;
1816 	uint64_t			neer;
1817 
1818 	/*
1819 	 * UPA IDLE
1820 	 * Protocol = PAUSE -> IDLE -> UNPAUSE
1821 	 * In reality since we only "idle" the IOPCs it's sufficient
1822 	 * to just issue the IDLE operation since (in theory) all IOPCs
1823 	 * in the field are PC6.  However, we'll be robust and do the
1824 	 * proper workaround protocol so that we never have to worry!
1825 	 */
1826 	drmach_iopc_op(prog->ph, DO_PAUSE);
1827 	drmach_iopc_op(prog->ph, DO_IDLE);
1828 	DELAY(100);
1829 	drmach_iopc_op(prog->ph, DO_UNPAUSE);
1830 	DELAY(100);
1831 
1832 	/* disable CE reporting */
1833 	neer = get_error_enable();
1834 	set_error_enable(neer & ~EER_CEEN);
1835 
1836 	/* run the copy/rename program */
1837 	prog->run(prog);
1838 
1839 	/* enable CE reporting */
1840 	set_error_enable(neer);
1841 
1842 	/*
1843 	 * UPA UNIDLE
1844 	 * Protocol = UNIDLE
1845 	 */
1846 	drmach_iopc_op(prog->ph, DO_UNIDLE);
1847 	DELAY(100);
1848 }
1849 
1850 /*
1851  * The counter-timer and perf-counter nodes are not being cleaned
1852  * up after a board that was present at start of day is detached.
1853  * If the board has become unconfigured with this operation, walk
1854  * the prom tree and find all counter-timer and perf-counter nodes
1855  * that have the same board number as the board that was just
1856  * unconfigured and remove them.
1857  */
1858 static sbd_error_t *
1859 drmach_remove_counter_nodes(drmachid_t id)
1860 {
1861 	int		num;
1862 	char		name[OBP_MAXDRVNAME];
1863 	pnode_t		child;
1864 	dev_info_t	*dip;
1865 	sbd_error_t	*err;
1866 	drmach_status_t	stat;
1867 	drmach_board_t	*bp;
1868 
1869 	if (!DRMACH_IS_BOARD_ID(id)) {
1870 		return (drerr_new(0, ESTF_INAPPROP, NULL));
1871 	}
1872 
1873 	if ((err = drmach_board_status(id, &stat)) != NULL) {
1874 		return (err);
1875 	}
1876 
1877 	/*
1878 	 * Only clean up the counter-timer and perf-counter
1879 	 * nodes when the entire board is unconfigured.
1880 	 */
1881 	if (stat.configured) {
1882 		return (NULL);
1883 	}
1884 
1885 	bp = (drmach_board_t *)id;
1886 
1887 	err = NULL;
1888 
1889 	for (child = prom_childnode(prom_rootnode()); child != OBP_NONODE;
1890 	    child = prom_nextnode(child)) {
1891 
1892 		if (prom_getprop(child, OBP_BOARDNUM, (caddr_t)&num) == -1) {
1893 			continue;
1894 		}
1895 
1896 		if (bp->bnum != num) {
1897 			continue;
1898 		}
1899 
1900 		if (prom_getprop(child, OBP_NAME, (caddr_t)name) == -1) {
1901 			continue;
1902 		}
1903 
1904 		if (strncmp(name, MISC_COUNTER_TIMER_DEVNAME, OBP_MAXDRVNAME) &&
1905 		    strncmp(name, MISC_PERF_COUNTER_DEVNAME, OBP_MAXDRVNAME)) {
1906 				continue;
1907 		}
1908 
1909 		/* Root node doesn't have to be held */
1910 		dip = e_ddi_nodeid_to_dip(child);
1911 
1912 		/*
1913 		 * If the node is only in the OBP tree, then
1914 		 * we don't have to remove it.
1915 		 */
1916 		if (dip) {
1917 			dev_info_t *fdip = NULL;
1918 
1919 			DRMACH_PR("removing %s devinfo node\n", name);
1920 
1921 			e_ddi_branch_hold(dip);
1922 			ddi_release_devi(dip); /* held in e_ddi_nodeid_to_dip */
1923 
1924 			if (e_ddi_branch_destroy(dip, &fdip, 0)) {
1925 				char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1926 
1927 				/*
1928 				 * If non-NULL, fdip is held and must be
1929 				 * released.
1930 				 */
1931 				if (fdip != NULL) {
1932 					(void) ddi_pathname(fdip, path);
1933 					ddi_release_devi(fdip);
1934 				} else {
1935 					(void) ddi_pathname(dip, path);
1936 				}
1937 
1938 				err = drerr_new(1, ESTF_DRVFAIL, path);
1939 				kmem_free(path, MAXPATHLEN);
1940 				e_ddi_branch_rele(dip);
1941 				break;
1942 			}
1943 		}
1944 	}
1945 
1946 	return (err);
1947 }
1948 
1949 /*ARGSUSED*/
1950 sbd_error_t *
1951 drmach_pre_op(int cmd, drmachid_t id, drmach_opts_t *opts)
1952 {
1953 	/* allow status and ncm operations to always succeed */
1954 	if ((cmd == SBD_CMD_STATUS) || (cmd == SBD_CMD_GETNCM)) {
1955 		return (NULL);
1956 	}
1957 
1958 	/* check all other commands for the required option string */
1959 	if ((opts->size > 0) && (opts->copts != NULL)) {
1960 
1961 		DRMACH_PR("platform options: %s\n", opts->copts);
1962 
1963 		if (strstr(opts->copts, "xfdr") != NULL) {
1964 			return (NULL);
1965 		}
1966 	}
1967 
1968 	return (drerr_new(0, ESTF_SUPPORT, NULL));
1969 }
1970 
1971 /*ARGSUSED*/
1972 sbd_error_t *
1973 drmach_post_op(int cmd, drmachid_t id, drmach_opts_t *opts)
1974 {
1975 	sbd_error_t	*err = NULL;
1976 
1977 	switch (cmd) {
1978 	case SBD_CMD_UNCONFIGURE:
1979 
1980 		err = drmach_remove_counter_nodes(id);
1981 		break;
1982 
1983 	case SBD_CMD_CONFIGURE:
1984 	case SBD_CMD_DISCONNECT:
1985 	case SBD_CMD_CONNECT:
1986 	case SBD_CMD_GETNCM:
1987 	case SBD_CMD_STATUS:
1988 		break;
1989 
1990 	default:
1991 		break;
1992 	}
1993 
1994 	return (err);
1995 }
1996 
1997 sbd_error_t *
1998 drmach_board_assign(int bnum, drmachid_t *id)
1999 {
2000 	sbd_error_t	*err;
2001 
2002 	if (!drmach_initialized && drmach_init() == -1) {
2003 		err = DRMACH_INTERNAL_ERROR();
2004 	} else if (drmach_array_get(drmach_boards, bnum, id) == -1) {
2005 		err = drerr_new(1, ESTF_BNUM, "%d", bnum);
2006 	} else if (*id != NULL) {
2007 		err = NULL;
2008 	} else {
2009 		drmach_board_t	*bp;
2010 
2011 		*id  = (drmachid_t)drmach_board_new(bnum);
2012 		bp = *id;
2013 		bp->assigned = 1;
2014 		err = NULL;
2015 	}
2016 
2017 	return (err);
2018 }
2019 
2020 static int
2021 drmach_attach_board(void *arg)
2022 {
2023 	drmach_board_t	*obj = (drmach_board_t *)arg;
2024 	cpuset_t	cset;
2025 	int		retval;
2026 
2027 	/*
2028 	 * OBP disables traps during the board probe.
2029 	 * So, in order to prevent cross-call/cross-trap timeouts,
2030 	 * and thus panics, we effectively block anybody from
2031 	 * issuing xc's/xt's by doing a promsafe_xc_attention.
2032 	 * In the previous version of Starfire DR (2.6), a timeout
2033 	 * suspension mechanism was implemented in the send-mondo
2034 	 * assembly.  That mechanism is unnecessary with the
2035 	 * existence of xc_attention/xc_dismissed.
2036 	 */
2037 	cset = cpu_ready_set;
2038 	promsafe_xc_attention(cset);
2039 
2040 	retval = prom_starfire_add_brd(obj->connect_cpuid);
2041 
2042 	xc_dismissed(cset);
2043 
2044 	return (retval);
2045 }
2046 
2047 sbd_error_t *
2048 drmach_board_connect(drmachid_t id, drmach_opts_t *opts)
2049 {
2050 	drmach_board_t	*obj = (drmach_board_t *)id;
2051 	int		retval;
2052 	sbd_error_t	*err;
2053 	char		*cptr, *copts;
2054 
2055 	if (!DRMACH_IS_BOARD_ID(id))
2056 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2057 
2058 	if (opts->size > 0)
2059 		copts = opts->copts;
2060 
2061 	if ((cptr = strstr(copts, "cpuid=")) != NULL) {
2062 		int cpuid;
2063 
2064 		cptr += strlen("cpuid=");
2065 		cpuid = stoi(&cptr);
2066 
2067 		if (DRMACH_CPUID2BNUM(cpuid) == obj->bnum) {
2068 			obj->connect_cpuid = cpuid;
2069 			obj->assigned = 1;
2070 		} else
2071 			return (drerr_new(1, ESTF_SETCPUVAL, "%d", cpuid));
2072 	} else {
2073 		/* cpuid was not specified */
2074 		obj->connect_cpuid = -1;
2075 	}
2076 
2077 	if (obj->connect_cpuid == -1) {
2078 		err =  drerr_new(1, ESTF_NOCPUID, obj->cm.name);
2079 		return (err);
2080 	}
2081 
2082 	cmn_err(CE_CONT, "DRMACH: PROM attach %s CPU %d\n",
2083 	    obj->cm.name, obj->connect_cpuid);
2084 
2085 	retval = prom_tree_update(drmach_attach_board, obj);
2086 
2087 	if (retval == 0)
2088 		err = NULL;
2089 	else {
2090 		cmn_err(CE_WARN, "prom error: prom_starfire_add_brd(%d) "
2091 		    "returned %d", obj->connect_cpuid, retval);
2092 
2093 		err = drerr_new(1, ESTF_PROBE, obj->cm.name);
2094 	}
2095 
2096 	obj->connect_cpuid = -1;
2097 
2098 	return (err);
2099 }
2100 
2101 /*ARGSUSED*/
2102 sbd_error_t *
2103 drmach_board_disconnect(drmachid_t id, drmach_opts_t *opts)
2104 {
2105 	drmach_board_t		*bp;
2106 	int			rv;
2107 	int			d_idx;	/* device index */
2108 	drmachid_t		d_id;	/* device ID */
2109 	sbd_error_t		*err;
2110 
2111 	if (!DRMACH_IS_BOARD_ID(id))
2112 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2113 
2114 	bp = id;
2115 
2116 	/*
2117 	 * We need to make sure all of the board's device nodes
2118 	 * have been removed from the Solaris device tree before
2119 	 * continuing with the disconnect. Otherwise, we could
2120 	 * disconnect the board and remove the OBP device tree
2121 	 * nodes with Solaris device tree nodes remaining.
2122 	 *
2123 	 * On Starfire, Solaris device tree nodes are deleted
2124 	 * during unconfigure by drmach_unconfigure(). It's
2125 	 * necessary to do this here because drmach_unconfigure()
2126 	 * failures are not handled during unconfigure.
2127 	 */
2128 	if (bp->devices) {
2129 		rv = drmach_array_first(bp->devices, &d_idx, &d_id);
2130 		while (rv == 0) {
2131 			err = drmach_unconfigure(d_id, DRMACH_DEVI_REMOVE);
2132 			if (err)
2133 				return (err);
2134 
2135 			rv = drmach_array_next(bp->devices, &d_idx, &d_id);
2136 		}
2137 	}
2138 
2139 	/*
2140 	 * Starfire board Solaris device tree counter nodes,
2141 	 * which are only present on start-of-day boards, are
2142 	 * removed in the dr_post_op() code flow after the
2143 	 * board is unconfigured. We call the counter node
2144 	 * removal function here because unconfigure errors
2145 	 * can cause the dr_post_op() function to be skipped
2146 	 * after an unconfigure operation even though all of
2147 	 * the board's devices have been transitioned to the
2148 	 * unconfigured state.
2149 	 */
2150 	err = drmach_remove_counter_nodes(id);
2151 	if (err)
2152 		return (err);
2153 
2154 	return (NULL);
2155 }
2156 
2157 static int
2158 drmach_board_find_devices_cb(drmach_node_walk_args_t *args)
2159 {
2160 	drmach_node_t			*node = args->node;
2161 	drmach_board_cb_data_t		*data = args->data;
2162 	drmach_board_t			*obj = data->obj;
2163 
2164 	int		 rv;
2165 	int		 bnum;
2166 	drmach_device_t	*device;
2167 
2168 	rv = drmach_node_get_prop(node, OBP_BOARDNUM, &bnum);
2169 	if (rv) {
2170 		/*
2171 		 * if the node does not have a board# property, then
2172 		 * by that information alone it is known that drmach
2173 		 * is not interested in it.
2174 		 */
2175 		return (0);
2176 	} else if (bnum != obj->bnum)
2177 		return (0);
2178 
2179 	/*
2180 	 * Create a device data structure from this node data.
2181 	 * The call may yield nothing if the node is not of interest
2182 	 * to drmach.
2183 	 */
2184 	data->err = drmach_device_new(node, obj, &device);
2185 	if (data->err)
2186 		return (-1);
2187 	else if (device == NULL) {
2188 		/*
2189 		 * drmach_device_new examined the node we passed in
2190 		 * and determined that it was one not of interest to
2191 		 * drmach.  So, it is skipped.
2192 		 */
2193 		return (0);
2194 	}
2195 
2196 	rv = drmach_array_set(obj->devices, data->ndevs++, device);
2197 	if (rv) {
2198 		drmach_device_dispose(device);
2199 		data->err = DRMACH_INTERNAL_ERROR();
2200 		return (-1);
2201 	}
2202 
2203 	data->err = (*data->found)(data->a, device->type, device->unum, device);
2204 	return (data->err == NULL ? 0 : -1);
2205 }
2206 
2207 sbd_error_t *
2208 drmach_board_find_devices(drmachid_t id, void *a,
2209 	sbd_error_t *(*found)(void *a, const char *, int, drmachid_t))
2210 {
2211 	extern int		 plat_max_cpu_units_per_board();
2212 	extern int		 plat_max_mem_units_per_board();
2213 	extern int		 plat_max_io_units_per_board();
2214 
2215 	drmach_board_t		*obj = (drmach_board_t *)id;
2216 	sbd_error_t		*err;
2217 	int			 max_devices;
2218 	int			 rv;
2219 	drmach_board_cb_data_t	data;
2220 
2221 	max_devices  = plat_max_cpu_units_per_board();
2222 	max_devices += plat_max_mem_units_per_board();
2223 	max_devices += plat_max_io_units_per_board();
2224 
2225 	obj->devices = drmach_array_new(0, max_devices);
2226 
2227 	data.obj = obj;
2228 	data.ndevs = 0;
2229 	data.found = found;
2230 	data.a = a;
2231 	data.err = NULL;
2232 
2233 	rv = drmach_node_walk(obj->tree, &data, drmach_board_find_devices_cb);
2234 	if (rv == 0)
2235 		err = NULL;
2236 	else {
2237 		drmach_array_dispose(obj->devices, drmach_device_dispose);
2238 		obj->devices = NULL;
2239 
2240 		if (data.err)
2241 			err = data.err;
2242 		else
2243 			err = DRMACH_INTERNAL_ERROR();
2244 	}
2245 
2246 	return (err);
2247 }
2248 
2249 int
2250 drmach_board_lookup(int bnum, drmachid_t *id)
2251 {
2252 	int	rv = 0;
2253 
2254 	if (!drmach_initialized && drmach_init() == -1) {
2255 		*id = 0;
2256 		rv = -1;
2257 	} else if (drmach_array_get(drmach_boards, bnum, id)) {
2258 		*id = 0;
2259 		rv = -1;
2260 	}
2261 	return (rv);
2262 }
2263 
2264 sbd_error_t *
2265 drmach_board_name(int bnum, char *buf, int buflen)
2266 {
2267 	(void) snprintf(buf, buflen, "SB%d", bnum);
2268 	return (NULL);
2269 }
2270 
2271 sbd_error_t *
2272 drmach_board_poweroff(drmachid_t id)
2273 {
2274 	drmach_board_t	*bp;
2275 	sbd_error_t	*err;
2276 	drmach_status_t	 stat;
2277 
2278 	if (!DRMACH_IS_BOARD_ID(id))
2279 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2280 	bp = id;
2281 
2282 	err = drmach_board_status(id, &stat);
2283 	if (err)
2284 		return (err);
2285 	else if (stat.configured || stat.busy)
2286 		return (drerr_new(0, ESTF_CONFIGBUSY, bp->cm.name));
2287 	else {
2288 		/* board power off is essentially a noop for Starfire */
2289 		bp->powered = 0;
2290 		return (NULL);
2291 	}
2292 	/*NOTREACHED*/
2293 }
2294 
2295 sbd_error_t *
2296 drmach_board_poweron(drmachid_t id)
2297 {
2298 	drmach_board_t	*bp;
2299 
2300 	if (!DRMACH_IS_BOARD_ID(id))
2301 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2302 	bp = id;
2303 
2304 	/* board power on is essentially a noop for Starfire */
2305 	bp->powered = 1;
2306 
2307 	return (NULL);
2308 }
2309 
2310 static sbd_error_t *
2311 drmach_board_release(drmachid_t id)
2312 {
2313 	if (!DRMACH_IS_BOARD_ID(id))
2314 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2315 	return (NULL);
2316 }
2317 
2318 /*ARGSUSED*/
2319 sbd_error_t *
2320 drmach_board_test(drmachid_t id, drmach_opts_t *opts, int force)
2321 {
2322 	return (NULL);
2323 }
2324 
2325 sbd_error_t *
2326 drmach_board_unassign(drmachid_t id)
2327 {
2328 	drmach_board_t	*bp;
2329 	sbd_error_t	*err;
2330 	drmach_status_t	 stat;
2331 
2332 	if (!DRMACH_IS_BOARD_ID(id))
2333 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2334 	bp = id;
2335 
2336 	err = drmach_board_status(id, &stat);
2337 	if (err)
2338 		return (err);
2339 	else if (stat.configured || stat.busy)
2340 		return (drerr_new(0, ESTF_CONFIGBUSY, bp->cm.name));
2341 	else if (drmach_array_set(drmach_boards, bp->bnum, 0) != 0)
2342 		return (DRMACH_INTERNAL_ERROR());
2343 	else {
2344 		drmach_board_dispose(bp);
2345 		return (NULL);
2346 	}
2347 	/*NOTREACHED*/
2348 }
2349 
2350 static sbd_error_t *
2351 drmach_cpu_new(drmach_device_t *dp)
2352 {
2353 	static sbd_error_t *drmach_cpu_release(drmachid_t);
2354 	static sbd_error_t *drmach_cpu_status(drmachid_t, drmach_status_t *);
2355 
2356 	sbd_error_t	*err;
2357 	int		 portid;
2358 
2359 	err = drmach_device_get_prop(dp, "upa-portid", &portid);
2360 	if (err == NULL)
2361 		dp->unum = portid & 3;
2362 
2363 	dp->cm.isa = (void *)drmach_cpu_new;
2364 	dp->cm.release = drmach_cpu_release;
2365 	dp->cm.status = drmach_cpu_status;
2366 
2367 	(void) snprintf(dp->cm.name, sizeof (dp->cm.name), "%s%d", dp->type,
2368 	    dp->unum);
2369 
2370 	return (err);
2371 }
2372 
2373 /*
2374  * drmach_cpu_obp_detach()
2375  *  This requires two steps, first, we must put the cpuid into the OBP
2376  *  idle loop (Idle in Program) state.  Then we call OBP to place the CPU
2377  *  into the "Detached" state, which does any special processing to
2378  *  actually detach the cpu, such as flushing ecache, and also ensures
2379  *  that a subsequent breakpoint won't restart the cpu (if it was just in
2380  *  Idle in Program state).
2381  */
2382 static void
2383 drmach_cpu_obp_detach(int cpuid)
2384 {
2385 	/*
2386 	 * Cpu may not be under OBP's control. Eg, if cpu exited to download
2387 	 * helper on a prior attach.
2388 	 */
2389 	if (CPU_SGN_EXISTS(cpuid) &&
2390 	    !SGN_CPU_IS_OS(cpuid) &&
2391 	    !SGN_CPU_IS_OBP(cpuid)) {
2392 		cmn_err(CE_WARN,
2393 		    "unexpected signature (0x%x) for cpu %d",
2394 		    get_cpu_sgn(cpuid), cpuid);
2395 	}
2396 
2397 	/*
2398 	 * Now we place the CPU into the "Detached" idle loop in OBP.
2399 	 * This is so that the CPU won't be restarted if we break into
2400 	 * OBP with a breakpoint or BREAK key from the console, and also
2401 	 * if we need to do any special processing, such as flushing the
2402 	 * cpu's ecache, disabling interrupts (by turning of the ET bit in
2403 	 * the PSR) and/or spinning in BBSRAM rather than global memory.
2404 	 */
2405 	DRMACH_PR("prom_starfire_rm_cpu(%d)\n", cpuid);
2406 	prom_starfire_rm_cpu(cpuid);
2407 }
2408 
2409 /*
2410  * drmach_cpu_obp_is_detached() returns TRUE if the cpu sigblock signature state
2411  * is SIGBST_DETACHED; otherwise it returns FALSE. This routine should only
2412  * be called after we have asked OBP to detach the CPU. It should NOT be
2413  * called as a check during any other flow.
2414  */
2415 static int
2416 drmach_cpu_obp_is_detached(int cpuid)
2417 {
2418 	if (!CPU_SGN_EXISTS(cpuid) ||
2419 	    (SGN_CPU_IS_OS(cpuid) && SGN_CPU_STATE_IS_DETACHED(cpuid)))
2420 		return (1);
2421 	else
2422 		return (0);
2423 }
2424 
2425 static int
2426 drmach_cpu_start(struct cpu *cp)
2427 {
2428 	int		cpuid = cp->cpu_id;
2429 	int		ntries = drmach_cpu_ntries;
2430 	extern void	restart_other_cpu(int);
2431 
2432 	ASSERT(MUTEX_HELD(&cpu_lock));
2433 	ASSERT(cpunodes[cpuid].nodeid != (pnode_t)0);
2434 
2435 	cp->cpu_flags &= ~CPU_POWEROFF;
2436 
2437 	/*
2438 	 * NOTE: restart_other_cpu pauses cpus during the
2439 	 *	 slave cpu start.  This helps to quiesce the
2440 	 *	 bus traffic a bit which makes the tick sync
2441 	 *	 routine in the prom more robust.
2442 	 */
2443 	DRMACH_PR("COLD START for cpu (%d)\n", cpuid);
2444 
2445 	prom_starfire_add_cpu(cpuid);
2446 
2447 	restart_other_cpu(cpuid);
2448 
2449 	/*
2450 	 * Wait for the cpu to reach its idle thread before
2451 	 * we zap him with a request to blow away the mappings
2452 	 * he (might) have for the drmach_shutdown_asm code
2453 	 * he may have executed on unconfigure.
2454 	 */
2455 	while ((cp->cpu_thread != cp->cpu_idle_thread) && (ntries > 0)) {
2456 		DELAY(drmach_cpu_delay);
2457 		ntries--;
2458 	}
2459 
2460 	DRMACH_PR("waited %d out of %d loops for cpu %d\n",
2461 	    drmach_cpu_ntries - ntries, drmach_cpu_ntries, cpuid);
2462 
2463 	xt_one(cpuid, vtag_flushpage_tl1,
2464 	    (uint64_t)drmach_shutdown_va, (uint64_t)ksfmmup);
2465 
2466 	return (0);
2467 }
2468 
2469 /*
2470  * A detaching CPU is xcalled with an xtrap to drmach_cpu_stop_self() after
2471  * it has been offlined. The function of this routine is to get the cpu
2472  * spinning in a safe place. The requirement is that the system will not
2473  * reference anything on the detaching board (memory and i/o is detached
2474  * elsewhere) and that the CPU not reference anything on any other board
2475  * in the system.  This isolation is required during and after the writes
2476  * to the domain masks to remove the board from the domain.
2477  *
2478  * To accomplish this isolation the following is done:
2479  *	1) Create a locked mapping to a location in BBSRAM where
2480  *	   the cpu will execute.
2481  *	2) Copy the target function (drmach_shutdown_asm) in which
2482  *	   the cpu will execute into BBSRAM.
2483  *	3) Jump into function with BBSRAM.
2484  *	   Function will:
2485  *	   3.1) Flush its Ecache (displacement).
2486  *	   3.2) Flush its Dcache with HW mechanism.
2487  *	   3.3) Flush its Icache with HW mechanism.
2488  *	   3.4) Flush all valid and _unlocked_ D-TLB entries.
2489  *	   3.5) Flush all valid and _unlocked_ I-TLB entries.
2490  *	   3.6) Clear xt_mb to signal completion. Note: cache line is
2491  *		recovered by drmach_cpu_poweroff().
2492  *	4) Jump into a tight loop.
2493  */
2494 #define	DRMACH_BBSRAM_OFFSET	0x1000
2495 
2496 static void
2497 drmach_cpu_stop_self(void)
2498 {
2499 	int		cpuid = (int)CPU->cpu_id;
2500 	tte_t		tte;
2501 	volatile uint_t	*src, *dst;
2502 	uint_t		funclen;
2503 	uint64_t	bbsram_pa, bbsram_offset;
2504 	uint_t		bbsram_pfn;
2505 	uint64_t	bbsram_addr;
2506 	void		(*bbsram_func)(uint64_t);
2507 	extern void	drmach_shutdown_asm(uint64_t);
2508 	extern void	drmach_shutdown_asm_end(void);
2509 
2510 	funclen = (uint_t)drmach_shutdown_asm_end - (uint_t)drmach_shutdown_asm;
2511 	ASSERT(funclen <= MMU_PAGESIZE);
2512 	/*
2513 	 * We'll start from the 0th's base.
2514 	 */
2515 	bbsram_pa = STARFIRE_UPAID2UPS(cpuid) | STARFIRE_PSI_BASE;
2516 	bbsram_offset = bbsram_pa | 0xfe0ULL;
2517 	bbsram_pa += ldphysio(bbsram_offset) + DRMACH_BBSRAM_OFFSET;
2518 
2519 	bbsram_pfn = (uint_t)(bbsram_pa >> MMU_PAGESHIFT);
2520 
2521 	bbsram_addr = (uint64_t)drmach_shutdown_va;
2522 	drmach_shutdown_asm_mbox->estack = bbsram_addr + (uint64_t)funclen;
2523 
2524 	tte.tte_inthi = TTE_VALID_INT | TTE_SZ_INT(TTE8K) |
2525 	    TTE_PFN_INTHI(bbsram_pfn);
2526 	tte.tte_intlo = TTE_PFN_INTLO(bbsram_pfn) |
2527 	    TTE_HWWR_INT | TTE_PRIV_INT | TTE_LCK_INT;
2528 	sfmmu_dtlb_ld_kva(drmach_shutdown_va, &tte);	/* load dtlb */
2529 	sfmmu_itlb_ld_kva(drmach_shutdown_va, &tte);	/* load itlb */
2530 
2531 	for (src = (uint_t *)drmach_shutdown_asm, dst = (uint_t *)bbsram_addr;
2532 	    src < (uint_t *)drmach_shutdown_asm_end; src++, dst++)
2533 		*dst = *src;
2534 
2535 	bbsram_func = (void (*)())bbsram_addr;
2536 	drmach_shutdown_asm_mbox->flushaddr = ecache_flushaddr;
2537 	drmach_shutdown_asm_mbox->size = (cpunodes[cpuid].ecache_size << 1);
2538 	drmach_shutdown_asm_mbox->linesize = cpunodes[cpuid].ecache_linesize;
2539 	drmach_shutdown_asm_mbox->physaddr =
2540 	    va_to_pa((void *)&drmach_xt_mb[cpuid]);
2541 
2542 	/*
2543 	 * Signal to drmach_cpu_poweroff() is via drmach_xt_mb cleared
2544 	 * by asm code
2545 	 */
2546 
2547 	(*bbsram_func)(va_to_pa((void *)drmach_shutdown_asm_mbox));
2548 }
2549 
2550 static void
2551 drmach_cpu_shutdown_self(void)
2552 {
2553 	cpu_t		*cp = CPU;
2554 	int		cpuid = cp->cpu_id;
2555 	extern void	flush_windows(void);
2556 
2557 	flush_windows();
2558 
2559 	(void) spl8();
2560 
2561 	ASSERT(cp->cpu_intr_actv == 0);
2562 	ASSERT(cp->cpu_thread == cp->cpu_idle_thread ||
2563 	    cp->cpu_thread == cp->cpu_startup_thread);
2564 
2565 	cp->cpu_flags = CPU_OFFLINE | CPU_QUIESCED | CPU_POWEROFF;
2566 
2567 	drmach_cpu_stop_self();
2568 
2569 	cmn_err(CE_PANIC, "CPU %d FAILED TO SHUTDOWN", cpuid);
2570 }
2571 
2572 /* a helper routine to keep the math in one place */
2573 static processorid_t
2574 drmach_cpu_calc_id(drmach_device_t *dp)
2575 {
2576 	return (dp->bp->bnum * MAX_CPU_UNITS_PER_BOARD + dp->unum);
2577 }
2578 
2579 /*
2580  * Move bootproc (SIGBCPU) to another cpu.  If dst_cpu is NULL, a
2581  * destination cpu is chosen from the set of cpus not located on the
2582  * same board as the current bootproc cpu.
2583  */
2584 static sbd_error_t *
2585 drmach_cpu_juggle_bootproc(drmach_device_t *dst_cpu)
2586 {
2587 	processorid_t	 cpuid;
2588 	struct cpu	*cp;
2589 	sbd_error_t	*err;
2590 	int		 rv;
2591 
2592 	ASSERT(MUTEX_HELD(&cpu_lock));
2593 
2594 	/* dst_cpu is NULL when target cpu is unspecified. So, pick one. */
2595 	if (dst_cpu == NULL) {
2596 		int avoid_board = DRMACH_CPUID2BNUM(SIGBCPU->cpu_id);
2597 		int max_cpuid = MAX_BOARDS * MAX_CPU_UNITS_PER_BOARD;
2598 
2599 		for (cpuid = 0; cpuid < max_cpuid; cpuid++)
2600 			if (DRMACH_CPUID2BNUM(cpuid) != avoid_board) {
2601 				cp = cpu_get(cpuid);
2602 				if (cp != NULL && cpu_is_online(cp))
2603 					break;
2604 			}
2605 
2606 		if (cpuid == max_cpuid) {
2607 			err = drerr_new(1, ESTF_JUGGLE, NULL);
2608 			return (err);
2609 		}
2610 
2611 		/* else, cp points to the selected target cpu */
2612 	} else {
2613 		cpuid = drmach_cpu_calc_id(dst_cpu);
2614 
2615 		if ((cp = cpu_get(cpuid)) == NULL) {
2616 			err = drerr_new(1, ESTF_NODEV, "%s::%s",
2617 			    dst_cpu->bp->cm.name, dst_cpu->cm.name);
2618 			return (err);
2619 		}
2620 
2621 		if (cpuid == SIGBCPU->cpu_id) {
2622 			cmn_err(CE_WARN,
2623 			    "SIGBCPU(%d) same as new selection(%d)",
2624 			    SIGBCPU->cpu_id, cpuid);
2625 
2626 			/* technically not an error, but a no-op */
2627 			return (NULL);
2628 		}
2629 	}
2630 
2631 	cmn_err(CE_NOTE, "?relocating SIGBCPU from %d to %d",
2632 	    SIGBCPU->cpu_id, cpuid);
2633 
2634 	DRMACH_PR("moving SIGBCPU to CPU %d\n", cpuid);
2635 
2636 	/*
2637 	 * Tell OBP to initialize cvc-offset field of new CPU0
2638 	 * so that it's in sync with OBP and cvc_server
2639 	 */
2640 	prom_starfire_init_console(cpuid);
2641 
2642 	/*
2643 	 * Assign cvc to new cpu0's bbsram for I/O.  This has to be
2644 	 * done BEFORE cpu0 is moved via obp, since this logic
2645 	 * will cause obp_helper to switch to a different bbsram for
2646 	 * cvc I/O.  We don't want cvc writing to a buffer from which
2647 	 * nobody will pick up the data!
2648 	 */
2649 	cvc_assign_iocpu(cpuid);
2650 
2651 	rv = prom_starfire_move_cpu0(cpuid);
2652 
2653 	if (rv == 0) {
2654 		SIGBCPU = cp;
2655 
2656 		DRMACH_PR("successfully juggled to CPU %d\n", cpuid);
2657 		return (NULL);
2658 	} else {
2659 		DRMACH_PR("prom error: prom_starfire_move_cpu0(%d) "
2660 		    "returned %d\n", cpuid, rv);
2661 
2662 		/*
2663 		 * The move failed, hopefully obp_helper is still back
2664 		 * at the old bootproc.  Move cvc back there.
2665 		 */
2666 		cvc_assign_iocpu(SIGBCPU->cpu_id);
2667 
2668 
2669 		err = drerr_new(1, ESTF_MOVESIGB, "CPU %d", cpuid);
2670 		return (err);
2671 	}
2672 	/*NOTREACHED*/
2673 }
2674 
2675 static sbd_error_t *
2676 drmach_cpu_release(drmachid_t id)
2677 {
2678 	drmach_device_t	*dp;
2679 	processorid_t	 cpuid;
2680 	struct cpu	*cp;
2681 	sbd_error_t	*err;
2682 
2683 	if (!DRMACH_IS_CPU_ID(id))
2684 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2685 	dp = id;
2686 	cpuid = drmach_cpu_calc_id(dp);
2687 
2688 	ASSERT(MUTEX_HELD(&cpu_lock));
2689 
2690 	cp = cpu_get(cpuid);
2691 	if (cp == NULL)
2692 		err = DRMACH_INTERNAL_ERROR();
2693 	else if (SIGBCPU->cpu_id == cp->cpu_id)
2694 		err = drmach_cpu_juggle_bootproc(NULL);
2695 	else
2696 		err = NULL;
2697 
2698 	return (err);
2699 }
2700 
2701 static sbd_error_t *
2702 drmach_cpu_status(drmachid_t id, drmach_status_t *stat)
2703 {
2704 	drmach_device_t *dp;
2705 
2706 	ASSERT(DRMACH_IS_CPU_ID(id));
2707 	dp = id;
2708 
2709 	stat->assigned = dp->bp->assigned;
2710 	stat->powered = dp->bp->powered;
2711 	mutex_enter(&cpu_lock);
2712 	stat->configured = (cpu_get(drmach_cpu_calc_id(dp)) != NULL);
2713 	mutex_exit(&cpu_lock);
2714 	stat->busy = dp->busy;
2715 	(void) strncpy(stat->type, dp->type, sizeof (stat->type));
2716 	stat->info[0] = '\0';
2717 
2718 	return (NULL);
2719 }
2720 
2721 sbd_error_t *
2722 drmach_cpu_disconnect(drmachid_t id)
2723 {
2724 	drmach_device_t	*cpu;
2725 	int		 cpuid;
2726 	int		 ntries;
2727 	int		 p;
2728 	u_longlong_t	 pc_addr;
2729 	uchar_t		 rvalue;
2730 
2731 	if (!DRMACH_IS_CPU_ID(id))
2732 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2733 	cpu = id;
2734 
2735 	cpuid = drmach_cpu_calc_id(cpu);
2736 	if (SIGBCPU->cpu_id == cpuid) {
2737 		/* this cpu is SIGBCPU, can't disconnect */
2738 		return (drerr_new(1, ESTF_HASSIGB, "%s::%s",
2739 		    cpu->bp->cm.name, cpu->cm.name));
2740 	}
2741 
2742 	/*
2743 	 * Make sure SIGBST_DETACHED is set before
2744 	 * mapping out the sig block.
2745 	 */
2746 	ntries = drmach_cpu_ntries;
2747 	while (!drmach_cpu_obp_is_detached(cpuid) && ntries) {
2748 		DELAY(drmach_cpu_delay);
2749 		ntries--;
2750 	}
2751 	if (!drmach_cpu_obp_is_detached(cpuid)) {
2752 		cmn_err(CE_WARN, "failed to mark cpu %d detached in sigblock",
2753 		    cpuid);
2754 	}
2755 
2756 	/* map out signature block */
2757 	if (CPU_SGN_EXISTS(cpuid)) {
2758 		CPU_SGN_MAPOUT(cpuid);
2759 	}
2760 
2761 	/*
2762 	 * We now PC IDLE the processor to guarantee we
2763 	 * stop any transactions from coming from it.
2764 	 */
2765 	p = cpu->unum & 1;
2766 	pc_addr = STARFIRE_BB_PC_ADDR(cpu->bp->bnum, cpu->unum, 0);
2767 
2768 	DRMACH_PR("PC idle cpu %d (addr = 0x%llx, port = %d, p = %d)",
2769 	    drmach_cpu_calc_id(cpu), pc_addr, cpu->unum, p);
2770 
2771 	rvalue = ldbphysio(pc_addr);
2772 	rvalue |= STARFIRE_BB_PC_IDLE(p);
2773 	stbphysio(pc_addr, rvalue);
2774 	DELAY(50000);
2775 
2776 	return (NULL);
2777 }
2778 
2779 sbd_error_t *
2780 drmach_cpu_get_id(drmachid_t id, processorid_t *cpuid)
2781 {
2782 	drmach_device_t *cpu;
2783 
2784 	if (!DRMACH_IS_CPU_ID(id))
2785 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2786 	cpu = id;
2787 
2788 	*cpuid = drmach_cpu_calc_id(cpu);
2789 	return (NULL);
2790 }
2791 
2792 sbd_error_t *
2793 drmach_cpu_get_impl(drmachid_t id, int *ip)
2794 {
2795 	drmach_device_t *cpu;
2796 	int		impl;
2797 
2798 	if (!DRMACH_IS_CPU_ID(id))
2799 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2800 
2801 	cpu = id;
2802 
2803 	if (drmach_node_get_prop(cpu->node, "implementation#", &impl) == -1) {
2804 		return (DRMACH_INTERNAL_ERROR());
2805 	}
2806 
2807 	*ip = impl;
2808 
2809 	return (NULL);
2810 }
2811 
2812 void
2813 drmach_cpu_flush_ecache_sync(void)
2814 {
2815 	ASSERT(curthread->t_bound_cpu == CPU);
2816 
2817 	/*
2818 	 * Now let's flush our ecache thereby removing all references
2819 	 * to the target (detaching) memory from all ecache's in
2820 	 * system.
2821 	 */
2822 	cpu_flush_ecache();
2823 
2824 	/*
2825 	 * Delay 100 usec out of paranoia to insure everything
2826 	 * (hardware queues) has drained before we start reprogramming
2827 	 * the hardware.
2828 	 */
2829 	DELAY(100);
2830 }
2831 
2832 sbd_error_t *
2833 drmach_get_dip(drmachid_t id, dev_info_t **dip)
2834 {
2835 	drmach_device_t	*dp;
2836 
2837 	if (!DRMACH_IS_DEVICE_ID(id))
2838 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2839 	dp = id;
2840 
2841 	*dip = drmach_node_get_dip(dp->node);
2842 	return (NULL);
2843 }
2844 
2845 sbd_error_t *
2846 drmach_io_is_attached(drmachid_t id, int *yes)
2847 {
2848 	drmach_device_t *dp;
2849 	dev_info_t	*dip;
2850 	int		state;
2851 
2852 	if (!DRMACH_IS_IO_ID(id))
2853 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2854 	dp = id;
2855 
2856 	dip = drmach_node_get_dip(dp->node);
2857 	if (dip == NULL) {
2858 		*yes = 0;
2859 		return (NULL);
2860 	}
2861 
2862 	state = ddi_get_devstate(dip);
2863 	*yes = (i_ddi_devi_attached(dip) || (state == DDI_DEVSTATE_UP));
2864 
2865 	return (NULL);
2866 }
2867 
2868 sbd_error_t *
2869 drmach_io_pre_release(drmachid_t id)
2870 {
2871 	if (!DRMACH_IS_IO_ID(id))
2872 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2873 	return (NULL);
2874 }
2875 
2876 static sbd_error_t *
2877 drmach_io_release(drmachid_t id)
2878 {
2879 	if (!DRMACH_IS_IO_ID(id))
2880 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2881 	return (NULL);
2882 }
2883 
2884 sbd_error_t *
2885 drmach_io_unrelease(drmachid_t id)
2886 {
2887 	if (!DRMACH_IS_IO_ID(id))
2888 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2889 	return (NULL);
2890 }
2891 
2892 /*ARGSUSED*/
2893 sbd_error_t *
2894 drmach_io_post_release(drmachid_t id)
2895 {
2896 	return (NULL);
2897 }
2898 
2899 /*ARGSUSED*/
2900 sbd_error_t *
2901 drmach_io_post_attach(drmachid_t id)
2902 {
2903 	return (NULL);
2904 }
2905 
2906 static sbd_error_t *
2907 drmach_io_status(drmachid_t id, drmach_status_t *stat)
2908 {
2909 	drmach_device_t *dp;
2910 	sbd_error_t	*err;
2911 	int		 configured;
2912 
2913 	ASSERT(DRMACH_IS_IO_ID(id));
2914 	dp = id;
2915 
2916 	err = drmach_io_is_attached(id, &configured);
2917 	if (err)
2918 		return (err);
2919 
2920 	stat->assigned = dp->bp->assigned;
2921 	stat->powered = dp->bp->powered;
2922 	stat->configured = (configured != 0);
2923 	stat->busy = dp->busy;
2924 	(void) strncpy(stat->type, dp->type, sizeof (stat->type));
2925 	stat->info[0] = '\0';
2926 
2927 	return (NULL);
2928 }
2929 
2930 static sbd_error_t *
2931 drmach_mem_new(drmach_device_t *dp)
2932 {
2933 	static sbd_error_t *drmach_mem_release(drmachid_t);
2934 	static sbd_error_t *drmach_mem_status(drmachid_t, drmach_status_t *);
2935 
2936 	dp->unum = 0;
2937 	dp->cm.isa = (void *)drmach_mem_new;
2938 	dp->cm.release = drmach_mem_release;
2939 	dp->cm.status = drmach_mem_status;
2940 
2941 	(void) snprintf(dp->cm.name, sizeof (dp->cm.name), "%s", dp->type);
2942 
2943 	return (NULL);
2944 }
2945 
2946 sbd_error_t *
2947 drmach_mem_add_span(drmachid_t id, uint64_t basepa, uint64_t size)
2948 {
2949 	pfn_t		basepfn = (pfn_t)(basepa >> PAGESHIFT);
2950 	pgcnt_t		npages = (pgcnt_t)(size >> PAGESHIFT);
2951 	pda_handle_t	ph;
2952 	int		rv;
2953 
2954 	ASSERT(size != 0);
2955 
2956 	if (!DRMACH_IS_MEM_ID(id))
2957 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2958 
2959 	rv = kcage_range_add(basepfn, npages, KCAGE_DOWN);
2960 	if (rv == ENOMEM) {
2961 		cmn_err(CE_WARN, "%lu megabytes not available to kernel cage",
2962 		    (ulong_t)(size == 0 ? 0 : size / MBYTE));
2963 	} else if (rv != 0) {
2964 		/* catch this in debug kernels */
2965 		ASSERT(0);
2966 
2967 		cmn_err(CE_WARN, "unexpected kcage_range_add"
2968 		    " return value %d", rv);
2969 	}
2970 
2971 	/*
2972 	 * Update the PDA (post2obp) structure with the
2973 	 * range of the newly added memory.
2974 	 */
2975 	ph = drmach_pda_open();
2976 	if (ph != NULL) {
2977 		pda_mem_add_span(ph, basepa, size);
2978 		pda_close(ph);
2979 	}
2980 
2981 	return (NULL);
2982 }
2983 
2984 sbd_error_t *
2985 drmach_mem_del_span(drmachid_t id, uint64_t basepa, uint64_t size)
2986 {
2987 	drmach_device_t	*mem = id;
2988 	pfn_t		basepfn = (pfn_t)(basepa >> PAGESHIFT);
2989 	pgcnt_t		npages = (pgcnt_t)(size >> PAGESHIFT);
2990 	uint_t		mcreg;
2991 	sbd_error_t	*err;
2992 	pda_handle_t	ph;
2993 	int		rv;
2994 
2995 	err = drmach_read_mc_asr(id, &mcreg);
2996 	if (err)
2997 		return (err);
2998 	else if (mcreg & STARFIRE_MC_INTERLEAVE_MASK) {
2999 		return (drerr_new(1, ESTF_INTERBOARD, "%s::%s",
3000 		    mem->bp->cm.name, mem->cm.name));
3001 	}
3002 
3003 	if (size > 0) {
3004 		rv = kcage_range_delete_post_mem_del(basepfn, npages);
3005 		if (rv != 0) {
3006 			cmn_err(CE_WARN,
3007 			    "unexpected kcage_range_delete_post_mem_del"
3008 			    " return value %d", rv);
3009 			return (DRMACH_INTERNAL_ERROR());
3010 		}
3011 	}
3012 
3013 	/*
3014 	 * Update the PDA (post2obp) structure with the
3015 	 * range of removed memory.
3016 	 */
3017 	ph = drmach_pda_open();
3018 	if (ph != NULL) {
3019 		if (size > 0)
3020 			pda_mem_del_span(ph, basepa, size);
3021 
3022 		/* update PDA to board's new mc register settings */
3023 		pda_mem_sync(ph, mem->bp->bnum, 0);
3024 
3025 		pda_close(ph);
3026 	}
3027 
3028 	return (NULL);
3029 }
3030 
3031 /* support routine for enable and disable */
3032 static sbd_error_t *
3033 drmach_mem_update_interconnect(drmachid_t id, uint_t mcreg)
3034 {
3035 	drmach_device_t	*dp;
3036 	pda_handle_t	 ph;
3037 	int		 b;
3038 
3039 	if (!DRMACH_IS_MEM_ID(id))
3040 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3041 	dp = id;
3042 
3043 	ph = drmach_pda_open();
3044 	if (ph == NULL)
3045 		return (DRMACH_INTERNAL_ERROR());
3046 
3047 	for (b = 0; b < MAX_BOARDS; b++) {
3048 		int		p;
3049 		int		rv;
3050 		ushort_t	bda_proc, bda_ioc;
3051 		board_desc_t	*bdesc;
3052 
3053 		if (pda_board_present(ph, b) == 0)
3054 			continue;
3055 
3056 		bdesc = (board_desc_t *)pda_get_board_info(ph, b);
3057 
3058 		/*
3059 		 * Update PCs for CPUs.
3060 		 */
3061 
3062 		/* make sure definition in platmod is in sync with pda */
3063 		ASSERT(MAX_PROCMODS == MAX_CPU_UNITS_PER_BOARD);
3064 
3065 		bda_proc = bdesc->bda_proc;
3066 		for (p = 0; p < MAX_PROCMODS; p++) {
3067 			if (BDA_NBL(bda_proc, p) != BDAN_GOOD)
3068 				continue;
3069 
3070 			rv = pc_madr_add(b, dp->bp->bnum, p, mcreg);
3071 			if (rv) {
3072 				pda_close(ph);
3073 				return (DRMACH_INTERNAL_ERROR());
3074 			}
3075 		}
3076 
3077 		/*
3078 		 * Update PCs for IOCs.
3079 		 */
3080 
3081 		/* make sure definition in platmod is in sync with pda */
3082 		ASSERT(MAX_IOCS == MAX_IO_UNITS_PER_BOARD);
3083 
3084 		bda_ioc = bdesc->bda_ioc;
3085 		for (p = 0; p < MAX_IOCS; p++) {
3086 			if (BDA_NBL(bda_ioc, p) != BDAN_GOOD)
3087 				continue;
3088 
3089 			rv = pc_madr_add(b, dp->bp->bnum, p + 4, mcreg);
3090 			if (rv) {
3091 				pda_close(ph);
3092 				return (DRMACH_INTERNAL_ERROR());
3093 			}
3094 		}
3095 	}
3096 
3097 	pda_close(ph);
3098 	return (NULL);
3099 }
3100 
3101 sbd_error_t *
3102 drmach_mem_disable(drmachid_t id)
3103 {
3104 	sbd_error_t	*err;
3105 	uint_t		 mcreg;
3106 
3107 	err = drmach_read_mc_asr(id, &mcreg);
3108 	if (err == NULL) {
3109 		ASSERT(mcreg & STARFIRE_MC_MEM_PRESENT_MASK);
3110 
3111 		/* Turn off presence bit. */
3112 		mcreg &= ~STARFIRE_MC_MEM_PRESENT_MASK;
3113 
3114 		err = drmach_mem_update_interconnect(id, mcreg);
3115 		if (err == NULL)
3116 			err = drmach_write_mc_asr(id, mcreg);
3117 	}
3118 
3119 	return (err);
3120 }
3121 
3122 sbd_error_t *
3123 drmach_mem_enable(drmachid_t id)
3124 {
3125 	sbd_error_t	*err;
3126 	uint_t		 mcreg;
3127 
3128 	err = drmach_read_mc_asr(id, &mcreg);
3129 	if (err == NULL) {
3130 		mcreg |= STARFIRE_MC_MEM_PRESENT_MASK;
3131 
3132 		err = drmach_write_mc_asr(id, mcreg);
3133 		if (err == NULL)
3134 			err = drmach_mem_update_interconnect(id, mcreg);
3135 	}
3136 
3137 	return (err);
3138 }
3139 
3140 sbd_error_t *
3141 drmach_mem_get_alignment(drmachid_t id, uint64_t *mask)
3142 {
3143 	drmach_device_t	*mem;
3144 	sbd_error_t	*err;
3145 	pnode_t		 nodeid;
3146 
3147 	if (!DRMACH_IS_MEM_ID(id))
3148 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3149 	mem = id;
3150 
3151 	nodeid = drmach_node_get_dnode(mem->node);
3152 	if (nodeid == OBP_NONODE || nodeid == OBP_BADNODE)
3153 		err = DRMACH_INTERNAL_ERROR();
3154 	else {
3155 		uint64_t size;
3156 
3157 		size = mc_get_alignment_mask(nodeid);
3158 		if (size == (uint64_t)-1)
3159 			err = DRMACH_INTERNAL_ERROR();
3160 		else {
3161 			*mask = size - 1;
3162 			err = NULL;
3163 		}
3164 	}
3165 
3166 	return (err);
3167 }
3168 
3169 sbd_error_t *
3170 drmach_mem_get_base_physaddr(drmachid_t id, uint64_t *pa)
3171 {
3172 	sbd_error_t	*err;
3173 	uint_t		 mcreg;
3174 
3175 	err = drmach_read_mc_asr(id, &mcreg);
3176 	if (err == NULL)
3177 		*pa = mc_asr_to_pa(mcreg);
3178 
3179 	return (err);
3180 }
3181 
3182 /*
3183  * Use of this routine after copy/rename will yield incorrect results,
3184  * because the OBP MEMAVAIL property will not correctly reflect the
3185  * programming of the MCs.
3186  */
3187 sbd_error_t *
3188 drmach_mem_get_memlist(drmachid_t id, struct memlist **ml)
3189 {
3190 	drmach_device_t	*mem;
3191 	int		rv, i, rlen, rblks;
3192 	sbd_error_t	*err;
3193 	struct memlist	*mlist;
3194 	struct sf_memunit_regspec *rlist;
3195 
3196 	if (!DRMACH_IS_MEM_ID(id))
3197 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3198 	mem = id;
3199 
3200 	err = drmach_device_get_proplen(mem, "dr-available", &rlen);
3201 	if (err)
3202 		return (err);
3203 
3204 	rlist = kmem_zalloc(rlen, KM_SLEEP);
3205 
3206 	err = drmach_device_get_prop(mem, "dr-available", rlist);
3207 	if (err) {
3208 		kmem_free(rlist, rlen);
3209 		return (err);
3210 	}
3211 
3212 	mlist = NULL;
3213 	rblks = rlen / sizeof (struct sf_memunit_regspec);
3214 	for (i = 0; i < rblks; i++) {
3215 		uint64_t	addr, size;
3216 
3217 		addr  = (uint64_t)rlist[i].regspec_addr_hi << 32;
3218 		addr |= (uint64_t)rlist[i].regspec_addr_lo;
3219 		size  = (uint64_t)rlist[i].regspec_size_hi << 32;
3220 		size |= (uint64_t)rlist[i].regspec_size_lo;
3221 
3222 		mlist = memlist_add_span(mlist, addr, size);
3223 	}
3224 
3225 	kmem_free(rlist, rlen);
3226 
3227 	/*
3228 	 * Make sure the incoming memlist doesn't already
3229 	 * intersect with what's present in the system (phys_install).
3230 	 */
3231 	memlist_read_lock();
3232 	rv = memlist_intersect(phys_install, mlist);
3233 	memlist_read_unlock();
3234 	if (rv) {
3235 #ifdef DEBUG
3236 		DRMACH_PR("OBP derived memlist intersects"
3237 		    " with phys_install\n");
3238 		memlist_dump(mlist);
3239 
3240 		DRMACH_PR("phys_install memlist:\n");
3241 		memlist_dump(phys_install);
3242 #endif
3243 
3244 		memlist_delete(mlist);
3245 		return (DRMACH_INTERNAL_ERROR());
3246 	}
3247 
3248 #ifdef DEBUG
3249 	DRMACH_PR("OBP derived memlist:");
3250 	memlist_dump(mlist);
3251 #endif
3252 
3253 	*ml = mlist;
3254 	return (NULL);
3255 }
3256 
3257 sbd_error_t *
3258 drmach_mem_get_size(drmachid_t id, uint64_t *bytes)
3259 {
3260 	drmach_device_t	*mem;
3261 	pda_handle_t	ph;
3262 	pgcnt_t		npages;
3263 
3264 	if (!DRMACH_IS_MEM_ID(id))
3265 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3266 	mem = id;
3267 
3268 	ph = drmach_pda_open();
3269 	if (ph == NULL)
3270 		return (DRMACH_INTERNAL_ERROR());
3271 
3272 	npages = pda_get_mem_size(ph, mem->bp->bnum);
3273 	*bytes = (uint64_t)npages << PAGESHIFT;
3274 
3275 	pda_close(ph);
3276 	return (NULL);
3277 }
3278 
3279 sbd_error_t *
3280 drmach_mem_get_slice_size(drmachid_t id, uint64_t *bytes)
3281 {
3282 	if (!DRMACH_IS_MEM_ID(id))
3283 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3284 
3285 	*bytes = mc_get_mem_alignment();
3286 	return (NULL);
3287 }
3288 
3289 /* field debugging tool */
3290 processorid_t drmach_mem_cpu_affinity_nail = 0;
3291 
3292 processorid_t
3293 drmach_mem_cpu_affinity(drmachid_t id)
3294 {
3295 	drmach_device_t	*mp;
3296 	drmach_board_t	*bp;
3297 	processorid_t	 cpuid;
3298 
3299 	if (!DRMACH_IS_MEM_ID(id))
3300 		return (CPU_CURRENT);
3301 
3302 	if (drmach_mem_cpu_affinity_nail) {
3303 		cpuid = drmach_mem_cpu_affinity_nail;
3304 
3305 		if (cpuid < 0 || cpuid > NCPU)
3306 			return (CPU_CURRENT);
3307 
3308 		mutex_enter(&cpu_lock);
3309 		if (cpu[cpuid] == NULL || !CPU_ACTIVE(cpu[cpuid]))
3310 			cpuid = CPU_CURRENT;
3311 		mutex_exit(&cpu_lock);
3312 
3313 		return (cpuid);
3314 	}
3315 
3316 	/* try to choose a proc on the target board */
3317 	mp = id;
3318 	bp = mp->bp;
3319 	if (bp->devices) {
3320 		int		rv;
3321 		int		d_idx;
3322 		drmachid_t	d_id;
3323 
3324 		rv = drmach_array_first(bp->devices, &d_idx, &d_id);
3325 		while (rv == 0) {
3326 			if (DRMACH_IS_CPU_ID(d_id)) {
3327 				cpuid = drmach_cpu_calc_id(d_id);
3328 
3329 				mutex_enter(&cpu_lock);
3330 				if (cpu[cpuid] && CPU_ACTIVE(cpu[cpuid])) {
3331 					mutex_exit(&cpu_lock);
3332 					DRMACH_PR("drmach_mem_cpu_affinity: "
3333 					    "selected cpuid=%d\n", cpuid);
3334 					return (cpuid);
3335 				} else {
3336 					mutex_exit(&cpu_lock);
3337 				}
3338 			}
3339 
3340 			rv = drmach_array_next(bp->devices, &d_idx, &d_id);
3341 		}
3342 	}
3343 
3344 	/* otherwise, this proc, wherever it is */
3345 	DRMACH_PR("drmach_mem_cpu_affinity: using default CPU_CURRENT\n");
3346 
3347 	return (CPU_CURRENT);
3348 }
3349 
3350 static sbd_error_t *
3351 drmach_mem_release(drmachid_t id)
3352 {
3353 	if (!DRMACH_IS_MEM_ID(id))
3354 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3355 	return (NULL);
3356 }
3357 
3358 static sbd_error_t *
3359 drmach_mem_status(drmachid_t id, drmach_status_t *stat)
3360 {
3361 	drmach_device_t *dp;
3362 	sbd_error_t	*err;
3363 	uint64_t	 pa, slice_size;
3364 	struct memlist	*ml;
3365 
3366 	ASSERT(DRMACH_IS_MEM_ID(id));
3367 	dp = id;
3368 
3369 	/* get starting physical address of target memory */
3370 	err = drmach_mem_get_base_physaddr(id, &pa);
3371 	if (err)
3372 		return (err);
3373 
3374 	/* round down to slice boundary */
3375 	slice_size = mc_get_mem_alignment();
3376 	pa &= ~ (slice_size - 1);
3377 
3378 	/* stop at first span that is in slice */
3379 	memlist_read_lock();
3380 	for (ml = phys_install; ml; ml = ml->ml_next)
3381 		if (ml->ml_address >= pa && ml->ml_address < pa + slice_size)
3382 			break;
3383 	memlist_read_unlock();
3384 
3385 	stat->assigned = dp->bp->assigned;
3386 	stat->powered = dp->bp->powered;
3387 	stat->configured = (ml != NULL);
3388 	stat->busy = dp->busy;
3389 	(void) strncpy(stat->type, dp->type, sizeof (stat->type));
3390 	stat->info[0] = '\0';
3391 
3392 	return (NULL);
3393 }
3394 
3395 static int
3396 drmach_detach_board(void *arg)
3397 {
3398 	cpuset_t	cset;
3399 	int		retval;
3400 	drmach_board_t	*bp = (drmach_board_t *)arg;
3401 
3402 	cset = cpu_ready_set;
3403 	promsafe_xc_attention(cset);
3404 
3405 	retval = prom_starfire_rm_brd(bp->bnum);
3406 
3407 	xc_dismissed(cset);
3408 
3409 	return (retval);
3410 }
3411 
3412 sbd_error_t *
3413 drmach_board_deprobe(drmachid_t id)
3414 {
3415 	drmach_board_t	*bp;
3416 	int		 retval;
3417 
3418 	if (!DRMACH_IS_BOARD_ID(id))
3419 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3420 	bp = id;
3421 
3422 	cmn_err(CE_CONT, "DR: PROM detach board %d\n", bp->bnum);
3423 
3424 	retval = prom_tree_update(drmach_detach_board, bp);
3425 
3426 	if (retval == 0)
3427 		return (NULL);
3428 	else {
3429 		cmn_err(CE_WARN, "prom error: prom_starfire_rm_brd(%d) "
3430 		    "returned %d", bp->bnum, retval);
3431 		return (drerr_new(1, ESTF_DEPROBE, "%s", bp->cm.name));
3432 	}
3433 }
3434 
3435 /*ARGSUSED*/
3436 static sbd_error_t *
3437 drmach_pt_juggle_bootproc(drmachid_t id, drmach_opts_t *opts)
3438 {
3439 	drmach_device_t	*cpu;
3440 	sbd_error_t	*err;
3441 
3442 	if (!DRMACH_IS_CPU_ID(id))
3443 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3444 	cpu = id;
3445 
3446 	mutex_enter(&cpu_lock);
3447 
3448 	err = drmach_cpu_juggle_bootproc(cpu);
3449 
3450 	mutex_exit(&cpu_lock);
3451 
3452 	return (err);
3453 }
3454 
3455 /*ARGSUSED*/
3456 static sbd_error_t *
3457 drmach_pt_dump_pdainfo(drmachid_t id, drmach_opts_t *opts)
3458 {
3459 	drmach_board_t	*bp;
3460 	int		board;
3461 	int		i;
3462 	pda_handle_t	ph;
3463 	board_desc_t	*bdesc;
3464 
3465 	if (!DRMACH_IS_BOARD_ID(id))
3466 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3467 	bp = id;
3468 	board = bp->bnum;
3469 
3470 	ph = drmach_pda_open();
3471 	if (ph == NULL)
3472 		return (DRMACH_INTERNAL_ERROR());
3473 
3474 	if (pda_board_present(ph, board) == 0) {
3475 		cmn_err(CE_CONT, "board %d is MISSING\n", board);
3476 		pda_close(ph);
3477 		return (DRMACH_INTERNAL_ERROR());
3478 	}
3479 
3480 	cmn_err(CE_CONT, "board %d is PRESENT\n", board);
3481 
3482 	bdesc = (board_desc_t *)pda_get_board_info(ph, board);
3483 	if (bdesc == NULL) {
3484 		cmn_err(CE_CONT,
3485 		    "no board descriptor found for board %d\n",
3486 		    board);
3487 		pda_close(ph);
3488 		return (DRMACH_INTERNAL_ERROR());
3489 	}
3490 
3491 	/* make sure definition in platmod is in sync with pda */
3492 	ASSERT(MAX_PROCMODS == MAX_CPU_UNITS_PER_BOARD);
3493 
3494 	for (i = 0; i < MAX_PROCMODS; i++) {
3495 		if (BDA_NBL(bdesc->bda_proc, i) == BDAN_GOOD)
3496 			cmn_err(CE_CONT,
3497 			    "proc %d.%d PRESENT\n", board, i);
3498 		else
3499 			cmn_err(CE_CONT,
3500 			    "proc %d.%d MISSING\n", board, i);
3501 	}
3502 
3503 	for (i = 0; i < MAX_MGROUPS; i++) {
3504 		if (BDA_NBL(bdesc->bda_mgroup, i) == BDAN_GOOD)
3505 			cmn_err(CE_CONT,
3506 			    "mgroup %d.%d PRESENT\n", board, i);
3507 		else
3508 			cmn_err(CE_CONT,
3509 			    "mgroup %d.%d MISSING\n", board, i);
3510 	}
3511 
3512 	/* make sure definition in platmod is in sync with pda */
3513 	ASSERT(MAX_IOCS == MAX_IO_UNITS_PER_BOARD);
3514 
3515 	for (i = 0; i < MAX_IOCS; i++) {
3516 		int	s;
3517 
3518 		if (BDA_NBL(bdesc->bda_ioc, i) == BDAN_GOOD) {
3519 			cmn_err(CE_CONT,
3520 			    "ioc %d.%d PRESENT\n", board, i);
3521 			for (s = 0; s < MAX_SLOTS_PER_IOC; s++) {
3522 				if (BDA_NBL(bdesc->bda_ios[i], s) != BDAN_GOOD)
3523 					continue;
3524 				cmn_err(CE_CONT,
3525 				    "..scard %d.%d.%d PRESENT\n",
3526 				    board, i, s);
3527 			}
3528 		} else {
3529 			cmn_err(CE_CONT,
3530 			    "ioc %d.%d MISSING\n",
3531 			    board, i);
3532 		}
3533 	}
3534 
3535 	cmn_err(CE_CONT,
3536 	    "board %d memsize = %d pages\n",
3537 	    board, pda_get_mem_size(ph, board));
3538 
3539 	pda_close(ph);
3540 
3541 	return (NULL);
3542 }
3543 
3544 /*ARGSUSED*/
3545 sbd_error_t *
3546 drmach_pt_readmem(drmachid_t id, drmach_opts_t *opts)
3547 {
3548 	struct memlist	*ml;
3549 	uint64_t	src_pa;
3550 	uint64_t	dst_pa;
3551 	uint64_t	dst;
3552 
3553 	dst_pa = va_to_pa(&dst);
3554 
3555 	memlist_read_lock();
3556 	for (ml = phys_install; ml; ml = ml->ml_next) {
3557 		uint64_t	nbytes;
3558 
3559 		src_pa = ml->ml_address;
3560 		nbytes = ml->ml_size;
3561 
3562 		while (nbytes != 0ull) {
3563 
3564 			/* copy 32 bytes at arc_pa to dst_pa */
3565 			bcopy32_il(src_pa, dst_pa);
3566 
3567 			/* increment by 32 bytes */
3568 			src_pa += (4 * sizeof (uint64_t));
3569 
3570 			/* decrement by 32 bytes */
3571 			nbytes -= (4 * sizeof (uint64_t));
3572 		}
3573 	}
3574 	memlist_read_unlock();
3575 
3576 	return (NULL);
3577 }
3578 
3579 static struct {
3580 	const char	*name;
3581 	sbd_error_t	*(*handler)(drmachid_t id, drmach_opts_t *opts);
3582 } drmach_pt_arr[] = {
3583 	{ "juggle",		drmach_pt_juggle_bootproc	},
3584 	{ "pda",		drmach_pt_dump_pdainfo		},
3585 	{ "readmem",		drmach_pt_readmem		},
3586 
3587 	/* the following line must always be last */
3588 	{ NULL,			NULL				}
3589 };
3590 
3591 /*ARGSUSED*/
3592 sbd_error_t *
3593 drmach_passthru(drmachid_t id, drmach_opts_t *opts)
3594 {
3595 	int		i;
3596 	sbd_error_t	*err;
3597 
3598 	i = 0;
3599 	while (drmach_pt_arr[i].name != NULL) {
3600 		int len = strlen(drmach_pt_arr[i].name);
3601 
3602 		if (strncmp(drmach_pt_arr[i].name, opts->copts, len) == 0)
3603 			break;
3604 
3605 		i += 1;
3606 	}
3607 
3608 	if (drmach_pt_arr[i].name == NULL)
3609 		err = drerr_new(0, ESTF_UNKPTCMD, opts->copts);
3610 	else
3611 		err = (*drmach_pt_arr[i].handler)(id, opts);
3612 
3613 	return (err);
3614 }
3615 
3616 sbd_error_t *
3617 drmach_release(drmachid_t id)
3618 {
3619 	drmach_common_t *cp;
3620 	if (!DRMACH_IS_DEVICE_ID(id))
3621 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3622 	cp = id;
3623 
3624 	return (cp->release(id));
3625 }
3626 
3627 sbd_error_t *
3628 drmach_status(drmachid_t id, drmach_status_t *stat)
3629 {
3630 	drmach_common_t *cp;
3631 
3632 	if (!DRMACH_IS_ID(id))
3633 		return (drerr_new(0, ESTF_NOTID, NULL));
3634 	cp = id;
3635 
3636 	return (cp->status(id, stat));
3637 }
3638 
3639 sbd_error_t *
3640 drmach_unconfigure(drmachid_t id, int flags)
3641 {
3642 	drmach_device_t	*dp;
3643 	pnode_t		 nodeid;
3644 	dev_info_t	*dip, *fdip = NULL;
3645 
3646 	if (!DRMACH_IS_DEVICE_ID(id))
3647 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3648 
3649 	dp = id;
3650 
3651 	nodeid = drmach_node_get_dnode(dp->node);
3652 	if (nodeid == OBP_NONODE)
3653 		return (DRMACH_INTERNAL_ERROR());
3654 
3655 	dip = e_ddi_nodeid_to_dip(nodeid);
3656 	if (dip == NULL)
3657 		return (NULL);
3658 
3659 	/*
3660 	 * Branch already held, so hold acquired in
3661 	 * e_ddi_nodeid_to_dip() can be released
3662 	 */
3663 	ddi_release_devi(dip);
3664 
3665 	if (flags & DEVI_BRANCH_DESTROY)
3666 		flags |= DEVI_BRANCH_EVENT;
3667 
3668 	/*
3669 	 * Force flag is no longer necessary. See starcat/io/drmach.c
3670 	 * for details.
3671 	 */
3672 	ASSERT(e_ddi_branch_held(dip));
3673 	if (e_ddi_branch_unconfigure(dip, &fdip, flags)) {
3674 		sbd_error_t	*err;
3675 		char		*path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3676 
3677 		/*
3678 		 * If non-NULL, fdip is returned held and must be released.
3679 		 */
3680 		if (fdip != NULL) {
3681 			(void) ddi_pathname(fdip, path);
3682 			ndi_rele_devi(fdip);
3683 		} else {
3684 			(void) ddi_pathname(dip, path);
3685 		}
3686 
3687 		err = drerr_new(1, ESTF_DRVFAIL, path);
3688 
3689 		kmem_free(path, MAXPATHLEN);
3690 
3691 		return (err);
3692 	}
3693 
3694 	return (NULL);
3695 }
3696 
3697 /*
3698  * drmach interfaces to legacy Starfire platmod logic
3699  * linkage via runtime symbol look up, called from plat_cpu_power*
3700  */
3701 
3702 /*
3703  * Start up a cpu.  It is possible that we're attempting to restart
3704  * the cpu after an UNCONFIGURE in which case the cpu will be
3705  * spinning in its cache.  So, all we have to do is wakeup him up.
3706  * Under normal circumstances the cpu will be coming from a previous
3707  * CONNECT and thus will be spinning in OBP.  In both cases, the
3708  * startup sequence is the same.
3709  */
3710 int
3711 drmach_cpu_poweron(struct cpu *cp)
3712 {
3713 	DRMACH_PR("drmach_cpu_poweron: starting cpuid %d\n", cp->cpu_id);
3714 
3715 	ASSERT(MUTEX_HELD(&cpu_lock));
3716 
3717 	if (drmach_cpu_start(cp) != 0)
3718 		return (EBUSY);
3719 	else
3720 		return (0);
3721 }
3722 
3723 int
3724 drmach_cpu_poweroff(struct cpu *cp)
3725 {
3726 	int		ntries, cnt;
3727 	processorid_t	cpuid = cp->cpu_id;
3728 	void		drmach_cpu_shutdown_self(void);
3729 
3730 	DRMACH_PR("drmach_cpu_poweroff: stopping cpuid %d\n", cp->cpu_id);
3731 
3732 	ASSERT(MUTEX_HELD(&cpu_lock));
3733 
3734 	/*
3735 	 * Capture all CPUs (except for detaching proc) to prevent
3736 	 * crosscalls to the detaching proc until it has cleared its
3737 	 * bit in cpu_ready_set.
3738 	 *
3739 	 * The CPU's remain paused and the prom_mutex is known to be free.
3740 	 * This prevents the x-trap victim from blocking when doing prom
3741 	 * IEEE-1275 calls at a high PIL level.
3742 	 */
3743 	promsafe_pause_cpus();
3744 
3745 	/*
3746 	 * Quiesce interrupts on the target CPU. We do this by setting
3747 	 * the CPU 'not ready'- (i.e. removing the CPU from cpu_ready_set) to
3748 	 * prevent it from receiving cross calls and cross traps.
3749 	 * This prevents the processor from receiving any new soft interrupts.
3750 	 */
3751 	mp_cpu_quiesce(cp);
3752 
3753 	/* setup xt_mb, will be cleared by drmach_shutdown_asm when ready */
3754 	drmach_xt_mb[cpuid] = 0x80;
3755 
3756 	xt_one_unchecked(cpuid, (xcfunc_t *)idle_stop_xcall,
3757 	    (uint64_t)drmach_cpu_shutdown_self, NULL);
3758 
3759 	ntries = drmach_cpu_ntries;
3760 	cnt = 0;
3761 	while (drmach_xt_mb[cpuid] && ntries) {
3762 		DELAY(drmach_cpu_delay);
3763 		ntries--;
3764 		cnt++;
3765 	}
3766 
3767 	drmach_xt_mb[cpuid] = 0;	/* steal the cache line back */
3768 
3769 	start_cpus();
3770 
3771 	DRMACH_PR("waited %d out of %d tries for "
3772 	    "drmach_cpu_shutdown_self on cpu%d",
3773 	    drmach_cpu_ntries - ntries, drmach_cpu_ntries, cp->cpu_id);
3774 
3775 	drmach_cpu_obp_detach(cpuid);
3776 
3777 	CPU_SIGNATURE(OS_SIG, SIGST_DETACHED, SIGSUBST_NULL, cpuid);
3778 
3779 	return (0);
3780 }
3781 
3782 /*ARGSUSED*/
3783 int
3784 drmach_verify_sr(dev_info_t *dip, int sflag)
3785 {
3786 	return (0);
3787 }
3788 
3789 void
3790 drmach_suspend_last(void)
3791 {
3792 }
3793 
3794 void
3795 drmach_resume_first(void)
3796 {
3797 }
3798 
3799 /*
3800  * Log a DR sysevent.
3801  * Return value: 0 success, non-zero failure.
3802  */
3803 int
3804 drmach_log_sysevent(int board, char *hint, int flag, int verbose)
3805 {
3806 	sysevent_t			*ev;
3807 	sysevent_id_t			eid;
3808 	int				rv, km_flag;
3809 	sysevent_value_t		evnt_val;
3810 	sysevent_attr_list_t		*evnt_attr_list = NULL;
3811 	char				attach_pnt[MAXNAMELEN];
3812 
3813 	km_flag = (flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
3814 	attach_pnt[0] = '\0';
3815 	if (drmach_board_name(board, attach_pnt, MAXNAMELEN)) {
3816 		rv = -1;
3817 		goto logexit;
3818 	}
3819 	if (verbose)
3820 		DRMACH_PR("drmach_log_sysevent: %s %s, flag: %d, verbose: %d\n",
3821 		    attach_pnt, hint, flag, verbose);
3822 
3823 	if ((ev = sysevent_alloc(EC_DR, ESC_DR_AP_STATE_CHANGE,
3824 	    SUNW_KERN_PUB"dr", km_flag)) == NULL) {
3825 		rv = -2;
3826 		goto logexit;
3827 	}
3828 	evnt_val.value_type = SE_DATA_TYPE_STRING;
3829 	evnt_val.value.sv_string = attach_pnt;
3830 	if ((rv = sysevent_add_attr(&evnt_attr_list, DR_AP_ID,
3831 	    &evnt_val, km_flag)) != 0)
3832 		goto logexit;
3833 
3834 	evnt_val.value_type = SE_DATA_TYPE_STRING;
3835 	evnt_val.value.sv_string = hint;
3836 	if ((rv = sysevent_add_attr(&evnt_attr_list, DR_HINT,
3837 	    &evnt_val, km_flag)) != 0) {
3838 		sysevent_free_attr(evnt_attr_list);
3839 		goto logexit;
3840 	}
3841 
3842 	(void) sysevent_attach_attributes(ev, evnt_attr_list);
3843 
3844 	/*
3845 	 * Log the event but do not sleep waiting for its
3846 	 * delivery. This provides insulation from syseventd.
3847 	 */
3848 	rv = log_sysevent(ev, SE_NOSLEEP, &eid);
3849 
3850 logexit:
3851 	if (ev)
3852 		sysevent_free(ev);
3853 	if ((rv != 0) && verbose)
3854 		cmn_err(CE_WARN,
3855 		    "drmach_log_sysevent failed (rv %d) for %s  %s\n",
3856 		    rv, attach_pnt, hint);
3857 
3858 	return (rv);
3859 }
3860 
3861 /*ARGSUSED*/
3862 int
3863 drmach_allow_memrange_modify(drmachid_t id)
3864 {
3865 	return (1);	/* TRUE */
3866 }
3867