xref: /titanic_50/usr/src/uts/sun4u/starfire/io/drmach.c (revision 2e107de79998f3036decec2454002940afb9a6ff)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/debug.h>
27 #include <sys/types.h>
28 #include <sys/varargs.h>
29 #include <sys/errno.h>
30 #include <sys/cred.h>
31 #include <sys/dditypes.h>
32 #include <sys/devops.h>
33 #include <sys/modctl.h>
34 #include <sys/poll.h>
35 #include <sys/conf.h>
36 #include <sys/ddi.h>
37 #include <sys/sunddi.h>
38 #include <sys/sunndi.h>
39 #include <sys/ndi_impldefs.h>
40 #include <sys/stat.h>
41 #include <sys/kmem.h>
42 #include <sys/vmem.h>
43 #include <sys/processor.h>
44 #include <sys/spitregs.h>
45 #include <sys/cpuvar.h>
46 #include <sys/cpupart.h>
47 #include <sys/mem_config.h>
48 #include <sys/ddi_impldefs.h>
49 #include <sys/systm.h>
50 #include <sys/machsystm.h>
51 #include <sys/autoconf.h>
52 #include <sys/cmn_err.h>
53 #include <sys/sysmacros.h>
54 #include <sys/x_call.h>
55 #include <sys/promif.h>
56 #include <sys/prom_plat.h>
57 #include <sys/membar.h>
58 #include <vm/seg_kmem.h>
59 #include <sys/mem_cage.h>
60 #include <sys/stack.h>
61 #include <sys/archsystm.h>
62 #include <vm/hat_sfmmu.h>
63 #include <sys/pte.h>
64 #include <sys/mmu.h>
65 #include <sys/cpu_module.h>
66 #include <sys/obpdefs.h>
67 #include <sys/note.h>
68 
69 #include <sys/starfire.h>	/* plat_max_... decls */
70 #include <sys/cvc.h>
71 #include <sys/cpu_sgnblk_defs.h>
72 #include <sys/drmach.h>
73 #include <sys/dr_util.h>
74 #include <sys/pda.h>
75 
76 #include <sys/sysevent.h>
77 #include <sys/sysevent/dr.h>
78 #include <sys/sysevent/eventdefs.h>
79 
80 
81 extern void		bcopy32_il(uint64_t, uint64_t);
82 extern void		flush_ecache_il(
83 				uint64_t physaddr, int size, int linesz);
84 extern uint_t		ldphysio_il(uint64_t physaddr);
85 extern void		stphysio_il(uint64_t physaddr, uint_t value);
86 
87 extern uint64_t		mc_get_mem_alignment(void);
88 extern uint64_t		mc_get_asr_addr(pnode_t);
89 extern uint64_t		mc_get_idle_addr(pnode_t);
90 extern uint64_t		mc_get_alignment_mask(pnode_t);
91 extern int		mc_read_asr(pnode_t, uint_t *);
92 extern int		mc_write_asr(pnode_t, uint_t);
93 extern uint64_t		mc_asr_to_pa(uint_t);
94 extern uint_t		mc_pa_to_asr(uint_t, uint64_t);
95 
96 extern int		pc_madr_add(int, int, int, int);
97 
98 typedef struct {
99 	struct drmach_node	*node;
100 	void			*data;
101 } drmach_node_walk_args_t;
102 
103 typedef struct drmach_node {
104 	void		*here;
105 
106 	pnode_t		 (*get_dnode)(struct drmach_node *node);
107 	int		 (*walk)(struct drmach_node *node, void *data,
108 				int (*cb)(drmach_node_walk_args_t *args));
109 } drmach_node_t;
110 
111 typedef struct {
112 	int		 min_index;
113 	int		 max_index;
114 	int		 arr_sz;
115 	drmachid_t	*arr;
116 } drmach_array_t;
117 
118 typedef struct {
119 	void		*isa;
120 
121 	sbd_error_t	*(*release)(drmachid_t);
122 	sbd_error_t	*(*status)(drmachid_t, drmach_status_t *);
123 
124 	char		 name[MAXNAMELEN];
125 } drmach_common_t;
126 
127 typedef struct {
128 	drmach_common_t	 cm;
129 	int		 bnum;
130 	int		 assigned;
131 	int		 powered;
132 	int		 connect_cpuid;
133 	int		 cond;
134 	drmach_node_t	*tree;
135 	drmach_array_t	*devices;
136 } drmach_board_t;
137 
138 typedef struct {
139 	drmach_common_t	 cm;
140 	drmach_board_t	*bp;
141 	int		 unum;
142 	int		 busy;
143 	int		 powered;
144 	const char	*type;
145 	drmach_node_t	*node;
146 } drmach_device_t;
147 
148 typedef struct {
149 	int		 flags;
150 	drmach_device_t	*dp;
151 	sbd_error_t	*err;
152 	dev_info_t	*dip;
153 } drmach_config_args_t;
154 
155 typedef struct {
156 	uint64_t	 idle_addr;
157 	drmach_device_t	*mem;
158 } drmach_mc_idle_script_t;
159 
160 typedef struct {
161 	uint64_t	masr_addr;
162 	uint_t		masr;
163 	uint_t		_filler;
164 } drmach_rename_script_t;
165 
166 typedef struct {
167 	void		(*run)(void *arg);
168 	caddr_t		data;
169 	pda_handle_t	*ph;
170 	struct memlist	*c_ml;
171 	uint64_t	s_copybasepa;
172 	uint64_t	t_copybasepa;
173 	drmach_device_t	*restless_mc;	/* diagnostic output */
174 } drmach_copy_rename_program_t;
175 
176 typedef enum {
177 	DO_IDLE,
178 	DO_UNIDLE,
179 	DO_PAUSE,
180 	DO_UNPAUSE
181 } drmach_iopc_op_t;
182 
183 typedef struct {
184 	drmach_board_t	*obj;
185 	int		 ndevs;
186 	void		*a;
187 	sbd_error_t	*(*found)(void *a, const char *, int, drmachid_t);
188 	sbd_error_t	*err;
189 } drmach_board_cb_data_t;
190 
191 static caddr_t		 drmach_shutdown_va;
192 
193 static int		 drmach_initialized;
194 static drmach_array_t	*drmach_boards;
195 
196 static int		 drmach_cpu_delay = 100;
197 static int		 drmach_cpu_ntries = 50000;
198 
199 volatile uchar_t	*drmach_xt_mb;
200 
201 /*
202  * Do not change the drmach_shutdown_mbox structure without
203  * considering the drmach_shutdown_asm assembly language code.
204  */
205 struct drmach_shutdown_mbox {
206 	uint64_t	estack;
207 	uint64_t	flushaddr;
208 	int		size;
209 	int		linesize;
210 	uint64_t	physaddr;
211 };
212 struct drmach_shutdown_mbox	*drmach_shutdown_asm_mbox;
213 static sbd_error_t	*drmach_device_new(drmach_node_t *,
214 				drmach_board_t *, drmach_device_t **);
215 static sbd_error_t	*drmach_cpu_new(drmach_device_t *);
216 static sbd_error_t	*drmach_mem_new(drmach_device_t *);
217 static sbd_error_t	*drmach_io_new(drmach_device_t *);
218 
219 extern struct cpu	*SIGBCPU;
220 
221 #ifdef DEBUG
222 
223 #define	DRMACH_PR		if (drmach_debug) printf
224 int drmach_debug = 0;		 /* set to non-zero to enable debug messages */
225 #else
226 
227 #define	DRMACH_PR		_NOTE(CONSTANTCONDITION) if (0) printf
228 #endif /* DEBUG */
229 
230 #define	DRMACH_OBJ(id)		((drmach_common_t *)id)
231 
232 #define	DRMACH_IS_BOARD_ID(id)	\
233 	((id != 0) &&		\
234 	(DRMACH_OBJ(id)->isa == (void *)drmach_board_new))
235 
236 #define	DRMACH_IS_CPU_ID(id)	\
237 	((id != 0) &&		\
238 	(DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new))
239 
240 #define	DRMACH_IS_MEM_ID(id)	\
241 	((id != 0) &&		\
242 	(DRMACH_OBJ(id)->isa == (void *)drmach_mem_new))
243 
244 #define	DRMACH_IS_IO_ID(id)	\
245 	((id != 0) &&		\
246 	(DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
247 
248 #define	DRMACH_IS_DEVICE_ID(id)					\
249 	((id != 0) &&						\
250 	(DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new ||	\
251 	    DRMACH_OBJ(id)->isa == (void *)drmach_mem_new ||	\
252 	    DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
253 
254 #define	DRMACH_IS_ID(id)					\
255 	((id != 0) &&						\
256 	(DRMACH_OBJ(id)->isa == (void *)drmach_board_new ||	\
257 	    DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new ||	\
258 	    DRMACH_OBJ(id)->isa == (void *)drmach_mem_new ||	\
259 	    DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
260 
261 #define	DRMACH_CPUID2BNUM(cpuid) \
262 	((cpuid) / MAX_CPU_UNITS_PER_BOARD)
263 
264 #define	DRMACH_INTERNAL_ERROR() \
265 	drerr_new(1, ESTF_INTERNAL, drmach_ie_fmt, __LINE__)
266 static char		*drmach_ie_fmt = "drmach.c %d";
267 
268 static struct {
269 	const char	 *name;
270 	const char	 *type;
271 	sbd_error_t	 *(*new)(drmach_device_t *);
272 } name2type[] = {
273 	{ "SUNW,UltraSPARC",	DRMACH_DEVTYPE_CPU,  drmach_cpu_new },
274 	{ "mem-unit",		DRMACH_DEVTYPE_MEM,  drmach_mem_new },
275 	{ "pci",		DRMACH_DEVTYPE_PCI,  drmach_io_new  },
276 	{ "sbus",		DRMACH_DEVTYPE_SBUS, drmach_io_new  },
277 };
278 
279 /* node types to cleanup when a board is unconfigured */
280 #define	MISC_COUNTER_TIMER_DEVNAME	"counter-timer"
281 #define	MISC_PERF_COUNTER_DEVNAME	"perf-counter"
282 
283 /* utility */
284 #define	MBYTE	(1048576ull)
285 
286 /*
287  * This is necessary because the CPU support needs
288  * to call cvc_assign_iocpu.
289  */
290 #ifndef lint
291 char _depends_on[] = "drv/cvc";
292 #endif  /* lint */
293 
294 /*
295  * drmach autoconfiguration data structures and interfaces
296  */
297 
298 extern struct mod_ops mod_miscops;
299 
300 static struct modlmisc modlmisc = {
301 	&mod_miscops,
302 	"Sun Enterprise 10000 DR"
303 };
304 
305 static struct modlinkage modlinkage = {
306 	MODREV_1,
307 	(void *)&modlmisc,
308 	NULL
309 };
310 
311 static kmutex_t drmach_i_lock;
312 
313 int
314 _init(void)
315 {
316 	int err;
317 
318 	/* check that we have the correct version of obp */
319 	if (prom_test("SUNW,UE10000,add-brd") != 0) {
320 
321 		cmn_err(CE_WARN, "!OBP/SSP upgrade is required to enable "
322 		    "DR Functionality");
323 
324 		return (-1);
325 	}
326 
327 	mutex_init(&drmach_i_lock, NULL, MUTEX_DRIVER, NULL);
328 
329 	drmach_xt_mb = (uchar_t *)vmem_alloc(static_alloc_arena,
330 	    NCPU * sizeof (uchar_t), VM_SLEEP);
331 	drmach_shutdown_asm_mbox = (struct drmach_shutdown_mbox *)
332 	    vmem_alloc(static_alloc_arena, sizeof (struct drmach_shutdown_mbox),
333 	    VM_SLEEP);
334 
335 	if ((err = mod_install(&modlinkage)) != 0) {
336 		mutex_destroy(&drmach_i_lock);
337 		vmem_free(static_alloc_arena, (void *)drmach_xt_mb,
338 		    NCPU * sizeof (uchar_t));
339 		vmem_free(static_alloc_arena, (void *)drmach_shutdown_asm_mbox,
340 		    sizeof (struct drmach_shutdown_mbox));
341 	}
342 
343 	return (err);
344 }
345 
346 int
347 _fini(void)
348 {
349 	static int drmach_fini(void);
350 
351 	if (drmach_fini())
352 		return (DDI_FAILURE);
353 	else
354 		return (mod_remove(&modlinkage));
355 }
356 
357 int
358 _info(struct modinfo *modinfop)
359 {
360 	return (mod_info(&modlinkage, modinfop));
361 }
362 
363 static pnode_t
364 drmach_node_obp_get_dnode(drmach_node_t *np)
365 {
366 	return ((pnode_t)(uintptr_t)np->here);
367 }
368 
369 static int
370 drmach_node_obp_walk(drmach_node_t *np, void *data,
371 		int (*cb)(drmach_node_walk_args_t *args))
372 {
373 	pnode_t			nodeid;
374 	int			rv;
375 	drmach_node_walk_args_t	args;
376 
377 	/* initialized args structure for callback */
378 	args.node = np;
379 	args.data = data;
380 
381 	nodeid = prom_childnode(prom_rootnode());
382 
383 	/* save our new position with in the tree */
384 	np->here = (void *)(uintptr_t)nodeid;
385 
386 	rv = 0;
387 	while (nodeid != OBP_NONODE) {
388 		rv = (*cb)(&args);
389 		if (rv)
390 			break;
391 
392 		nodeid = prom_nextnode(nodeid);
393 
394 		/* save our new position with in the tree */
395 		np->here = (void *)(uintptr_t)nodeid;
396 	}
397 
398 	return (rv);
399 }
400 
401 static drmach_node_t *
402 drmach_node_new(void)
403 {
404 	drmach_node_t *np;
405 
406 	np = kmem_zalloc(sizeof (drmach_node_t), KM_SLEEP);
407 
408 	np->get_dnode = drmach_node_obp_get_dnode;
409 	np->walk = drmach_node_obp_walk;
410 
411 	return (np);
412 }
413 
414 static void
415 drmach_node_dispose(drmach_node_t *np)
416 {
417 	kmem_free(np, sizeof (*np));
418 }
419 
420 static dev_info_t *
421 drmach_node_get_dip(drmach_node_t *np)
422 {
423 	pnode_t nodeid;
424 
425 	nodeid = np->get_dnode(np);
426 	if (nodeid == OBP_NONODE)
427 		return (NULL);
428 	else {
429 		dev_info_t *dip;
430 
431 		/* The root node doesn't have to be held */
432 		dip = e_ddi_nodeid_to_dip(nodeid);
433 		if (dip) {
434 			/*
435 			 * Branch rooted at dip is already held, so release
436 			 * hold acquired in e_ddi_nodeid_to_dip()
437 			 */
438 			ddi_release_devi(dip);
439 			ASSERT(e_ddi_branch_held(dip));
440 		}
441 
442 		return (dip);
443 	}
444 	/*NOTREACHED*/
445 }
446 
447 static pnode_t
448 drmach_node_get_dnode(drmach_node_t *np)
449 {
450 	return (np->get_dnode(np));
451 }
452 
453 static int
454 drmach_node_walk(drmach_node_t *np, void *param,
455 		int (*cb)(drmach_node_walk_args_t *args))
456 {
457 	return (np->walk(np, param, cb));
458 }
459 
460 static int
461 drmach_node_get_prop(drmach_node_t *np, char *name, void *buf)
462 {
463 	pnode_t	nodeid;
464 	int	rv;
465 
466 	nodeid = np->get_dnode(np);
467 	if (nodeid == OBP_NONODE)
468 		rv = -1;
469 	else if (prom_getproplen(nodeid, (caddr_t)name) < 0)
470 		rv = -1;
471 	else {
472 		(void) prom_getprop(nodeid, (caddr_t)name, (caddr_t)buf);
473 		rv = 0;
474 	}
475 
476 	return (rv);
477 }
478 
479 static int
480 drmach_node_get_proplen(drmach_node_t *np, char *name, int *len)
481 {
482 	pnode_t	 nodeid;
483 	int	 rv;
484 
485 	nodeid = np->get_dnode(np);
486 	if (nodeid == OBP_NONODE)
487 		rv = -1;
488 	else {
489 		*len = prom_getproplen(nodeid, (caddr_t)name);
490 		rv = (*len < 0 ? -1 : 0);
491 	}
492 
493 	return (rv);
494 }
495 
496 static drmachid_t
497 drmach_node_dup(drmach_node_t *np)
498 {
499 	drmach_node_t *dup;
500 
501 	dup = drmach_node_new();
502 	dup->here = np->here;
503 
504 	return (dup);
505 }
506 
507 /*
508  * drmach_array provides convenient array construction, access,
509  * bounds checking and array destruction logic.
510  */
511 
512 static drmach_array_t *
513 drmach_array_new(int min_index, int max_index)
514 {
515 	drmach_array_t *arr;
516 
517 	arr = kmem_zalloc(sizeof (drmach_array_t), KM_SLEEP);
518 
519 	arr->arr_sz = (max_index - min_index + 1) * sizeof (void *);
520 	if (arr->arr_sz > 0) {
521 		arr->min_index = min_index;
522 		arr->max_index = max_index;
523 
524 		arr->arr = kmem_zalloc(arr->arr_sz, KM_SLEEP);
525 		return (arr);
526 	} else {
527 		kmem_free(arr, sizeof (*arr));
528 		return (0);
529 	}
530 }
531 
532 static int
533 drmach_array_set(drmach_array_t *arr, int idx, drmachid_t val)
534 {
535 	if (idx < arr->min_index || idx > arr->max_index)
536 		return (-1);
537 	else {
538 		arr->arr[idx - arr->min_index] = val;
539 		return (0);
540 	}
541 	/*NOTREACHED*/
542 }
543 
544 static int
545 drmach_array_get(drmach_array_t *arr, int idx, drmachid_t *val)
546 {
547 	if (idx < arr->min_index || idx > arr->max_index)
548 		return (-1);
549 	else {
550 		*val = arr->arr[idx - arr->min_index];
551 		return (0);
552 	}
553 	/*NOTREACHED*/
554 }
555 
556 static int
557 drmach_array_first(drmach_array_t *arr, int *idx, drmachid_t *val)
558 {
559 	int rv;
560 
561 	*idx = arr->min_index;
562 	while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
563 		*idx += 1;
564 
565 	return (rv);
566 }
567 
568 static int
569 drmach_array_next(drmach_array_t *arr, int *idx, drmachid_t *val)
570 {
571 	int rv;
572 
573 	*idx += 1;
574 	while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
575 		*idx += 1;
576 
577 	return (rv);
578 }
579 
580 static void
581 drmach_array_dispose(drmach_array_t *arr, void (*disposer)(drmachid_t))
582 {
583 	drmachid_t	val;
584 	int		idx;
585 	int		rv;
586 
587 	rv = drmach_array_first(arr, &idx, &val);
588 	while (rv == 0) {
589 		(*disposer)(val);
590 		rv = drmach_array_next(arr, &idx, &val);
591 	}
592 
593 	kmem_free(arr->arr, arr->arr_sz);
594 	kmem_free(arr, sizeof (*arr));
595 }
596 
597 /*ARGSUSED*/
598 static int
599 drmach_prom_select(pnode_t nodeid, void *arg, uint_t flags)
600 {
601 	int			rprop[64];
602 	pnode_t			saved;
603 	drmach_config_args_t	*ap = (drmach_config_args_t *)arg;
604 	drmach_device_t		*dp = ap->dp;
605 	sbd_error_t		*err;
606 
607 	saved = drmach_node_get_dnode(dp->node);
608 
609 	if (nodeid != saved)
610 		return (DDI_FAILURE);
611 
612 	if (saved == OBP_NONODE) {
613 		err = DRMACH_INTERNAL_ERROR();
614 		DRERR_SET_C(&ap->err, &err);
615 		return (DDI_FAILURE);
616 	}
617 
618 	if (prom_getprop(nodeid, OBP_REG, (caddr_t)rprop) <= 0) {
619 		return (DDI_FAILURE);
620 	}
621 
622 	return (DDI_SUCCESS);
623 }
624 
625 /*ARGSUSED*/
626 static void
627 drmach_branch_callback(dev_info_t *rdip, void *arg, uint_t flags)
628 {
629 	drmach_config_args_t	*ap = (drmach_config_args_t *)arg;
630 
631 	ASSERT(ap->dip == NULL);
632 
633 	ap->dip = rdip;
634 }
635 
636 sbd_error_t *
637 drmach_configure(drmachid_t id, int flags)
638 {
639 	drmach_device_t		*dp;
640 	sbd_error_t		*err;
641 	drmach_config_args_t	ca;
642 	devi_branch_t		b = {0};
643 	dev_info_t		*fdip = NULL;
644 
645 	if (!DRMACH_IS_DEVICE_ID(id))
646 		return (drerr_new(0, ESTF_INAPPROP, NULL));
647 	dp = id;
648 
649 	ca.dp = dp;
650 	ca.flags = flags;
651 	ca.err = NULL;		/* will be set if error detected */
652 	ca.dip = NULL;
653 
654 	b.arg = &ca;
655 	b.type = DEVI_BRANCH_PROM;
656 	b.create.prom_branch_select = drmach_prom_select;
657 	b.devi_branch_callback = drmach_branch_callback;
658 
659 	if (e_ddi_branch_create(ddi_root_node(), &b, &fdip,
660 	    DEVI_BRANCH_CHILD | DEVI_BRANCH_CONFIGURE) != 0) {
661 		char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
662 
663 		/*
664 		 * If non-NULL, fdip is returned held and must be released.
665 		 */
666 		if (fdip != NULL) {
667 			(void) ddi_pathname(fdip, path);
668 			ddi_release_devi(fdip);
669 		} else if (ca.dip != NULL) {
670 			/* safe to call ddi_pathname as dip already held */
671 			(void) ddi_pathname(ca.dip, path);
672 		} else {
673 			(void) strcpy(path, "<none>");
674 		}
675 
676 		err = drerr_new(1, ESTF_DRVFAIL, path);
677 		DRERR_SET_C(&ca.err, &err);
678 		kmem_free(path, MAXPATHLEN);
679 	}
680 
681 	return (ca.err);
682 }
683 
684 static sbd_error_t *
685 drmach_device_new(drmach_node_t *node,
686 	drmach_board_t *bp, drmach_device_t **dpp)
687 {
688 	int		 i;
689 	int		 rv;
690 	drmach_device_t	*dp;
691 	sbd_error_t	*err;
692 	char		 name[OBP_MAXDRVNAME];
693 
694 	rv = drmach_node_get_prop(node, OBP_NAME, name);
695 	if (rv) {
696 		/* every node is expected to have a name */
697 		err = drerr_new(1, ESTF_GETPROP,
698 			"PROM Node 0x%x: property %s",
699 			(uint_t)node->get_dnode(node), OBP_NAME);
700 
701 		return (err);
702 	}
703 
704 	/*
705 	 * The node currently being examined is not listed in the name2type[]
706 	 * array.  In this case, the node is no interest to drmach.  Both
707 	 * dp and err are initialized here to yield nothing (no device or
708 	 * error structure) for this case.
709 	 */
710 	for (i = 0; i < sizeof (name2type) / sizeof (name2type[0]); i++)
711 		if (strcmp(name2type[i].name, name) == 0)
712 			break;
713 
714 	if (i < sizeof (name2type) / sizeof (name2type[0])) {
715 		dp = kmem_zalloc(sizeof (drmach_device_t), KM_SLEEP);
716 
717 		dp->bp = bp;
718 		dp->unum = -1;
719 		dp->node = drmach_node_dup(node);
720 		dp->type = name2type[i].type;
721 
722 		err = (name2type[i].new)(dp);
723 		if (err) {
724 			drmach_node_dispose(node);
725 			kmem_free(dp, sizeof (*dp));
726 			dp = NULL;
727 		}
728 
729 		*dpp = dp;
730 		return (err);
731 	}
732 
733 	/*
734 	 * The node currently being examined is not listed in the name2type[]
735 	 * array.  In this case, the node is no interest to drmach.  Both
736 	 * dp and err are initialized here to yield nothing (no device or
737 	 * error structure) for this case.
738 	 */
739 	*dpp = NULL;
740 	return (NULL);
741 }
742 
743 static void
744 drmach_device_dispose(drmachid_t id)
745 {
746 	drmach_device_t *self = id;
747 
748 	if (self->node)
749 		drmach_node_dispose(self->node);
750 
751 	kmem_free(self, sizeof (*self));
752 }
753 
754 static sbd_error_t *
755 drmach_device_get_prop(drmach_device_t *dp, char *name, void *buf)
756 {
757 	sbd_error_t	*err = NULL;
758 	int		 rv;
759 
760 	rv = drmach_node_get_prop(dp->node, name, buf);
761 	if (rv) {
762 		err = drerr_new(1, ESTF_GETPROP,
763 			"%s::%s: property %s",
764 			dp->bp->cm.name, dp->cm.name, name);
765 	}
766 
767 	return (err);
768 }
769 
770 static sbd_error_t *
771 drmach_device_get_proplen(drmach_device_t *dp, char *name, int *len)
772 {
773 	sbd_error_t	*err = NULL;
774 	int		 rv;
775 
776 	rv = drmach_node_get_proplen(dp->node, name, len);
777 	if (rv) {
778 		err = drerr_new(1, ESTF_GETPROPLEN,
779 			"%s::%s: property %s",
780 			dp->bp->cm.name, dp->cm.name, name);
781 	}
782 
783 	return (err);
784 }
785 
786 static drmach_board_t *
787 drmach_board_new(int bnum)
788 {
789 	static sbd_error_t *drmach_board_release(drmachid_t);
790 	static sbd_error_t *drmach_board_status(drmachid_t, drmach_status_t *);
791 
792 	drmach_board_t	*bp;
793 
794 	bp = kmem_zalloc(sizeof (drmach_board_t), KM_SLEEP);
795 
796 	bp->cm.isa = (void *)drmach_board_new;
797 	bp->cm.release = drmach_board_release;
798 	bp->cm.status = drmach_board_status;
799 
800 	(void) drmach_board_name(bnum, bp->cm.name, sizeof (bp->cm.name));
801 
802 	bp->bnum = bnum;
803 	bp->devices = NULL;
804 	bp->connect_cpuid = -1;
805 	bp->tree = drmach_node_new();
806 	bp->assigned = !drmach_initialized;
807 	bp->powered = !drmach_initialized;
808 
809 	drmach_array_set(drmach_boards, bnum, bp);
810 	return (bp);
811 }
812 
813 static void
814 drmach_board_dispose(drmachid_t id)
815 {
816 	drmach_board_t *bp;
817 
818 	ASSERT(DRMACH_IS_BOARD_ID(id));
819 	bp = id;
820 
821 	if (bp->tree)
822 		drmach_node_dispose(bp->tree);
823 
824 	if (bp->devices)
825 		drmach_array_dispose(bp->devices, drmach_device_dispose);
826 
827 	kmem_free(bp, sizeof (*bp));
828 }
829 
830 static sbd_error_t *
831 drmach_board_status(drmachid_t id, drmach_status_t *stat)
832 {
833 	sbd_error_t	*err = NULL;
834 	drmach_board_t	*bp;
835 
836 	if (!DRMACH_IS_BOARD_ID(id))
837 		return (drerr_new(0, ESTF_INAPPROP, NULL));
838 	bp = id;
839 
840 	stat->assigned = bp->assigned;
841 	stat->powered = bp->powered;
842 	stat->busy = 0;			/* assume not busy */
843 	stat->configured = 0;		/* assume not configured */
844 	stat->empty = 0;
845 	stat->cond = bp->cond = SBD_COND_OK;
846 	strncpy(stat->type, "System Brd", sizeof (stat->type));
847 	stat->info[0] = '\0';
848 
849 	if (bp->devices) {
850 		int		 rv;
851 		int		 d_idx;
852 		drmachid_t	 d_id;
853 
854 		rv = drmach_array_first(bp->devices, &d_idx, &d_id);
855 		while (rv == 0) {
856 			drmach_status_t	d_stat;
857 
858 			err = drmach_status(d_id, &d_stat);
859 			if (err)
860 				break;
861 
862 			stat->busy |= d_stat.busy;
863 			stat->configured |= d_stat.configured;
864 
865 			rv = drmach_array_next(bp->devices, &d_idx, &d_id);
866 		}
867 	}
868 
869 	return (err);
870 }
871 
872 /* a simple routine to reduce redundancy of this common logic */
873 static pda_handle_t
874 drmach_pda_open(void)
875 {
876 	pda_handle_t ph;
877 
878 	ph = pda_open();
879 	if (ph == NULL) {
880 		/* catch in debug kernels */
881 		ASSERT(0);
882 		cmn_err(CE_WARN, "pda_open failed");
883 	}
884 
885 	return (ph);
886 }
887 
888 #ifdef DEBUG
889 int drmach_init_break = 0;
890 #endif
891 
892 static int
893 hold_rele_branch(dev_info_t *rdip, void *arg)
894 {
895 	int	i;
896 	int	*holdp = (int *)arg;
897 	char	*name = ddi_node_name(rdip);
898 
899 	/*
900 	 * For Starfire, we must be children of the root devinfo node
901 	 */
902 	ASSERT(ddi_get_parent(rdip) == ddi_root_node());
903 
904 	for (i = 0; i < sizeof (name2type) / sizeof (name2type[0]); i++)
905 		if (strcmp(name2type[i].name, name) == 0)
906 			break;
907 
908 	if (i == sizeof (name2type) / sizeof (name2type[0])) {
909 		/* Not of interest to us */
910 		return (DDI_WALK_PRUNECHILD);
911 	}
912 
913 	if (*holdp) {
914 		ASSERT(!e_ddi_branch_held(rdip));
915 		e_ddi_branch_hold(rdip);
916 	} else {
917 		ASSERT(e_ddi_branch_held(rdip));
918 		e_ddi_branch_rele(rdip);
919 	}
920 
921 	return (DDI_WALK_PRUNECHILD);
922 }
923 
924 static int
925 drmach_init(void)
926 {
927 	pnode_t		nodeid;
928 	dev_info_t	*rdip;
929 	int		hold, circ;
930 
931 #ifdef DEBUG
932 	if (drmach_init_break)
933 		debug_enter("drmach_init: drmach_init_break set\n");
934 #endif
935 	mutex_enter(&drmach_i_lock);
936 	if (drmach_initialized) {
937 		mutex_exit(&drmach_i_lock);
938 		return (0);
939 	}
940 
941 	drmach_boards = drmach_array_new(0, MAX_BOARDS - 1);
942 
943 	nodeid = prom_childnode(prom_rootnode());
944 	do {
945 		int		 bnum;
946 		drmachid_t	 id;
947 
948 		bnum = -1;
949 		(void) prom_getprop(nodeid, OBP_BOARDNUM, (caddr_t)&bnum);
950 		if (bnum == -1)
951 			continue;
952 
953 		if (drmach_array_get(drmach_boards, bnum, &id) == -1) {
954 			cmn_err(CE_WARN, "OBP node 0x%x has"
955 				" invalid property value, %s=%d",
956 				nodeid, OBP_BOARDNUM, bnum);
957 
958 			/* clean up */
959 			drmach_array_dispose(
960 				drmach_boards, drmach_board_dispose);
961 
962 			mutex_exit(&drmach_i_lock);
963 			return (-1);
964 		} else if (id == NULL)
965 			(void) drmach_board_new(bnum);
966 	} while ((nodeid = prom_nextnode(nodeid)) != OBP_NONODE);
967 
968 	drmach_shutdown_va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
969 
970 	/*
971 	 * Walk immediate children of devinfo root node and hold
972 	 * all devinfo branches of interest.
973 	 */
974 	hold = 1;
975 	rdip = ddi_root_node();
976 
977 	ndi_devi_enter(rdip, &circ);
978 	ddi_walk_devs(ddi_get_child(rdip), hold_rele_branch, &hold);
979 	ndi_devi_exit(rdip, circ);
980 
981 	drmach_initialized = 1;
982 
983 	mutex_exit(&drmach_i_lock);
984 
985 	return (0);
986 }
987 
988 static int
989 drmach_fini(void)
990 {
991 	dev_info_t	*rdip;
992 	int		hold, circ;
993 
994 	if (drmach_initialized) {
995 		int		busy = 0;
996 		int		rv;
997 		int		idx;
998 		drmachid_t	id;
999 
1000 		ASSERT(drmach_boards != NULL);
1001 
1002 		rv = drmach_array_first(drmach_boards, &idx, &id);
1003 		while (rv == 0) {
1004 			sbd_error_t	*err;
1005 			drmach_status_t stat;
1006 
1007 			err = drmach_board_status(id, &stat);
1008 			if (err) {
1009 				/* catch in debug kernels */
1010 				ASSERT(0);
1011 				sbd_err_clear(&err);
1012 				busy = 1;
1013 			} else
1014 				busy |= stat.busy;
1015 
1016 			rv = drmach_array_next(drmach_boards, &idx, &id);
1017 		}
1018 
1019 		if (busy)
1020 			return (-1);
1021 
1022 		drmach_array_dispose(drmach_boards, drmach_board_dispose);
1023 		drmach_boards = NULL;
1024 
1025 		vmem_free(heap_arena, drmach_shutdown_va, PAGESIZE);
1026 
1027 		/*
1028 		 * Walk immediate children of the root devinfo node
1029 		 * releasing holds acquired on branches in drmach_init()
1030 		 */
1031 		hold = 0;
1032 		rdip = ddi_root_node();
1033 
1034 		ndi_devi_enter(rdip, &circ);
1035 		ddi_walk_devs(ddi_get_child(rdip), hold_rele_branch, &hold);
1036 		ndi_devi_exit(rdip, circ);
1037 
1038 		mutex_destroy(&drmach_i_lock);
1039 
1040 		drmach_initialized = 0;
1041 	}
1042 	if (drmach_xt_mb != NULL) {
1043 		vmem_free(static_alloc_arena, (void *)drmach_xt_mb,
1044 		    NCPU * sizeof (uchar_t));
1045 	}
1046 	if (drmach_shutdown_asm_mbox != NULL) {
1047 		vmem_free(static_alloc_arena, (void *)drmach_shutdown_asm_mbox,
1048 		    sizeof (struct drmach_shutdown_mbox));
1049 	}
1050 	return (0);
1051 }
1052 
1053 static sbd_error_t *
1054 drmach_get_mc_asr_addr(drmachid_t id, uint64_t *pa)
1055 {
1056 	drmach_device_t	*dp;
1057 	pnode_t		nodeid;
1058 	uint64_t	addr;
1059 
1060 	if (!DRMACH_IS_MEM_ID(id))
1061 		return (drerr_new(0, ESTF_INAPPROP, NULL));
1062 	dp = id;
1063 
1064 	nodeid = drmach_node_get_dnode(dp->node);
1065 	if (nodeid == OBP_NONODE || nodeid == OBP_BADNODE)
1066 		return (DRMACH_INTERNAL_ERROR());
1067 
1068 	addr = mc_get_asr_addr(nodeid);
1069 	if (addr == (uint64_t)-1)
1070 		return (DRMACH_INTERNAL_ERROR());
1071 
1072 	*pa = addr;
1073 	return (NULL);
1074 }
1075 
1076 static sbd_error_t *
1077 drmach_get_mc_idle_addr(drmachid_t id, uint64_t *pa)
1078 {
1079 	drmach_device_t	*dp;
1080 	pnode_t		nodeid;
1081 	uint64_t	addr;
1082 
1083 	if (!DRMACH_IS_MEM_ID(id))
1084 		return (drerr_new(0, ESTF_INAPPROP, NULL));
1085 	dp = id;
1086 
1087 	nodeid = drmach_node_get_dnode(dp->node);
1088 	if (nodeid == OBP_NONODE || nodeid == OBP_BADNODE)
1089 		return (DRMACH_INTERNAL_ERROR());
1090 
1091 	addr = mc_get_idle_addr(nodeid);
1092 	if (addr == (uint64_t)-1)
1093 		return (DRMACH_INTERNAL_ERROR());
1094 
1095 	*pa = addr;
1096 	return (NULL);
1097 }
1098 
1099 static sbd_error_t *
1100 drmach_read_mc_asr(drmachid_t id, uint_t *mcregp)
1101 {
1102 	drmach_device_t	*dp;
1103 	pnode_t		 nodeid;
1104 	sbd_error_t	*err;
1105 
1106 	if (!DRMACH_IS_MEM_ID(id))
1107 		return (drerr_new(0, ESTF_INAPPROP, NULL));
1108 	dp = id;
1109 
1110 	nodeid = drmach_node_get_dnode(dp->node);
1111 	if (nodeid == OBP_NONODE || nodeid == OBP_BADNODE)
1112 		err = DRMACH_INTERNAL_ERROR();
1113 	else if (mc_read_asr(nodeid, mcregp) == -1)
1114 		err = DRMACH_INTERNAL_ERROR();
1115 	else
1116 		err = NULL;
1117 
1118 	return (err);
1119 }
1120 
1121 static sbd_error_t *
1122 drmach_write_mc_asr(drmachid_t id, uint_t mcreg)
1123 {
1124 	drmach_device_t	*dp;
1125 	pnode_t		 nodeid;
1126 	sbd_error_t	*err;
1127 
1128 	if (!DRMACH_IS_MEM_ID(id))
1129 		return (drerr_new(0, ESTF_INAPPROP, NULL));
1130 	dp = id;
1131 
1132 	nodeid = drmach_node_get_dnode(dp->node);
1133 	if (nodeid == OBP_NONODE || nodeid == OBP_BADNODE)
1134 		err = DRMACH_INTERNAL_ERROR();
1135 	else if (mc_write_asr(nodeid, mcreg) == -1)
1136 		err = DRMACH_INTERNAL_ERROR();
1137 	else
1138 		err = NULL;
1139 
1140 	return (err);
1141 }
1142 
1143 static sbd_error_t *
1144 drmach_prep_rename_script(drmach_device_t *s_mem, drmach_device_t *t_mem,
1145 	uint64_t t_slice_offset, caddr_t buf, int buflen)
1146 {
1147 	int			i, b, m;
1148 	drmach_mc_idle_script_t	*isp;
1149 	drmach_rename_script_t	*rsp;
1150 	int			s_bd, t_bd;
1151 	uint_t			s_masr, t_masr;
1152 	uint64_t		s_new_basepa, t_new_basepa;
1153 	int			b_idx, rv;
1154 	sbd_error_t		*err;
1155 	drmachid_t		 b_id;
1156 	drmach_board_t		*brd;
1157 
1158 #ifdef DEBUG
1159 	/*
1160 	 * Starfire CPU/MEM/IO boards have only one MC per board.
1161 	 * This function has been coded with that fact in mind.
1162 	 */
1163 	ASSERT(MAX_MEM_UNITS_PER_BOARD == 1);
1164 
1165 	/*
1166 	 * calculate the maximum space that could be consumed,
1167 	 * then verify the available buffer space is adequate.
1168 	 */
1169 	m  = sizeof (drmach_mc_idle_script_t *) * 2; /* two MCs */
1170 	b  = sizeof (drmach_rename_script_t *) * 3 * MAX_CPU_UNITS_PER_BOARD;
1171 	b += sizeof (drmach_rename_script_t *) * 3 * MAX_IO_UNITS_PER_BOARD;
1172 	b *= MAX_BOARDS;
1173 	b += sizeof (drmach_rename_script_t *) * 3;
1174 	b += sizeof (drmach_rename_script_t *) * 1;
1175 	ASSERT(m + b < buflen);
1176 #endif
1177 
1178 	/*
1179 	 * construct an array of MC idle register addresses of
1180 	 * both MCs.  The array is zero terminated -- as expected
1181 	 * by drmach_copy_rename_prog__relocatable().
1182 	 */
1183 	isp = (drmach_mc_idle_script_t *)buf;
1184 
1185 	/* source mc */
1186 	err = drmach_get_mc_idle_addr(s_mem, &isp->idle_addr);
1187 	if (err)
1188 		return (err);
1189 	isp->mem = s_mem;
1190 	isp += 1;
1191 
1192 	/* target mc */
1193 	err = drmach_get_mc_idle_addr(t_mem, &isp->idle_addr);
1194 	if (err)
1195 		return (err);
1196 	isp->mem = t_mem;
1197 	isp += 1;
1198 
1199 	/* terminator */
1200 	isp->idle_addr = 0;
1201 	isp->mem = NULL;
1202 	isp += 1;
1203 
1204 	/* fetch source mc asr register value */
1205 	err = drmach_read_mc_asr(s_mem, &s_masr);
1206 	if (err)
1207 		return (err);
1208 	else if (s_masr & STARFIRE_MC_INTERLEAVE_MASK) {
1209 		return (drerr_new(1, ESTF_INTERBOARD, "%s::%s",
1210 				s_mem->bp->cm.name, s_mem->cm.name));
1211 	}
1212 
1213 	/* fetch target mc asr register value */
1214 	err = drmach_read_mc_asr(t_mem, &t_masr);
1215 	if (err)
1216 		return (err);
1217 	else if (t_masr & STARFIRE_MC_INTERLEAVE_MASK) {
1218 		return (drerr_new(1, ESTF_INTERBOARD, "%s::%s",
1219 				t_mem->bp->cm.name, t_mem->cm.name));
1220 	}
1221 
1222 	/* get new source base pa from target's masr */
1223 	s_new_basepa = mc_asr_to_pa(t_masr);
1224 
1225 	/*
1226 	 * remove any existing slice offset to realign
1227 	 * memory with board's slice boundary
1228 	 */
1229 	s_new_basepa &= ~ (mc_get_mem_alignment() - 1);
1230 
1231 	/* get new target base pa from source's masr */
1232 	t_new_basepa  = mc_asr_to_pa(s_masr);
1233 
1234 	/* remove any existing slice offset, then apply new offset */
1235 	t_new_basepa &= ~ (mc_get_mem_alignment() - 1);
1236 	t_new_basepa += t_slice_offset;
1237 
1238 	/* encode new base pa into s_masr.  turn off mem present bit */
1239 	s_masr  = mc_pa_to_asr(s_masr, s_new_basepa);
1240 	s_masr &= ~STARFIRE_MC_MEM_PRESENT_MASK;
1241 
1242 	/* encode new base pa into t_masr.  turn on mem present bit */
1243 	t_masr  = mc_pa_to_asr(t_masr, t_new_basepa);
1244 	t_masr |= STARFIRE_MC_MEM_PRESENT_MASK;
1245 
1246 	/*
1247 	 * Step 0:	Mark source memory as not present.
1248 	 */
1249 	m = 0;
1250 	rsp = (drmach_rename_script_t *)isp;
1251 	err = drmach_get_mc_asr_addr(s_mem, &rsp[m].masr_addr);
1252 	if (err)
1253 		return (err);
1254 	rsp[m].masr = s_masr;
1255 	m++;
1256 
1257 	/*
1258 	 * Step 1:	Write source base address to target MC
1259 	 *		with present bit off.
1260 	 */
1261 	err = drmach_get_mc_asr_addr(t_mem, &rsp[m].masr_addr);
1262 	if (err)
1263 		return (err);
1264 	rsp[m].masr = t_masr & ~STARFIRE_MC_MEM_PRESENT_MASK;
1265 	m++;
1266 
1267 	/*
1268 	 * Step 2:	Now rewrite target reg with present bit on.
1269 	 */
1270 	rsp[m].masr_addr = rsp[m-1].masr_addr;
1271 	rsp[m].masr = t_masr;
1272 	m++;
1273 
1274 	s_bd = s_mem->bp->bnum;
1275 	t_bd = t_mem->bp->bnum;
1276 
1277 	DRMACH_PR("preparing script for CPU and IO units:\n");
1278 
1279 	rv = drmach_array_first(drmach_boards, &b_idx, &b_id);
1280 	if (rv) {
1281 		/* catch this in debug kernels */
1282 		ASSERT(0);
1283 		return (DRMACH_INTERNAL_ERROR());
1284 	}
1285 
1286 	do {
1287 		int			 d_idx;
1288 		drmachid_t		 d_id;
1289 		drmach_device_t		*device;
1290 
1291 		ASSERT(DRMACH_IS_BOARD_ID(b_id));
1292 		brd = b_id;
1293 		b = brd->bnum;
1294 
1295 		/*
1296 		 * Step 3:	Update PC MADR tables for CPUs.
1297 		 */
1298 		if (brd->devices == NULL) {
1299 			/* devices not initialized */
1300 			continue;
1301 		}
1302 
1303 		rv = drmach_array_first(brd->devices, &d_idx, &d_id);
1304 		if (rv) {
1305 			/* must mean no devices on this board */
1306 			break;
1307 		}
1308 
1309 		DRMACH_PR("\t%s\n", brd->cm.name);
1310 
1311 		do {
1312 			ASSERT(DRMACH_IS_DEVICE_ID(d_id));
1313 
1314 			if (!DRMACH_IS_CPU_ID(d_id))
1315 				continue;
1316 
1317 			device = d_id;
1318 			i = device->unum;
1319 
1320 			DRMACH_PR("\t\t%s\n", device->cm.name);
1321 
1322 			/*
1323 			 * Disabled detaching mem node.
1324 			 */
1325 			rsp[m].masr_addr = STARFIRE_PC_MADR_ADDR(b, s_bd, i);
1326 			rsp[m].masr = s_masr;
1327 			m++;
1328 			/*
1329 			 * Always write masr with present bit
1330 			 * off and then again with it on.
1331 			 */
1332 			rsp[m].masr_addr = STARFIRE_PC_MADR_ADDR(b, t_bd, i);
1333 			rsp[m].masr = t_masr & ~STARFIRE_MC_MEM_PRESENT_MASK;
1334 			m++;
1335 			rsp[m].masr_addr = rsp[m-1].masr_addr;
1336 			rsp[m].masr = t_masr;
1337 			m++;
1338 
1339 		} while (drmach_array_next(brd->devices, &d_idx, &d_id) == 0);
1340 
1341 		/*
1342 		 * Step 4:	Update PC MADR tables for IOs.
1343 		 */
1344 		rv = drmach_array_first(brd->devices, &d_idx, &d_id);
1345 		/* this worked for previous loop, must work here too */
1346 		ASSERT(rv == 0);
1347 
1348 		do {
1349 			ASSERT(DRMACH_IS_DEVICE_ID(d_id));
1350 
1351 			if (!DRMACH_IS_IO_ID(d_id))
1352 				continue;
1353 
1354 			device = d_id;
1355 			i = device->unum;
1356 
1357 			DRMACH_PR("\t\t%s\n", device->cm.name);
1358 
1359 			/*
1360 			 * Disabled detaching mem node.
1361 			 */
1362 			rsp[m].masr_addr = STARFIRE_PC_MADR_ADDR(b, s_bd, i+4);
1363 			rsp[m].masr = s_masr;
1364 			m++;
1365 			/*
1366 			 * Always write masr with present bit
1367 			 * off and then again with it on.
1368 			 */
1369 			rsp[m].masr_addr = STARFIRE_PC_MADR_ADDR(b, t_bd, i+4);
1370 			rsp[m].masr = t_masr & ~STARFIRE_MC_MEM_PRESENT_MASK;
1371 			m++;
1372 			rsp[m].masr_addr = rsp[m-1].masr_addr;
1373 			rsp[m].masr = t_masr;
1374 			m++;
1375 
1376 		} while (drmach_array_next(brd->devices, &d_idx, &d_id) == 0);
1377 	} while (drmach_array_next(drmach_boards, &b_idx, &b_id) == 0);
1378 
1379 	/*
1380 	 * Zero masr_addr value indicates the END.
1381 	 */
1382 	rsp[m].masr_addr = 0ull;
1383 	rsp[m].masr = 0;
1384 	DRMACH_PR("number of steps in rename script = %d\n", m);
1385 	m++;
1386 
1387 	/* paranoia */
1388 	ASSERT((caddr_t)&rsp[m] <= buf + buflen);
1389 
1390 #ifdef DEBUG
1391 	{
1392 		int	j;
1393 
1394 		DRMACH_PR("mc idle register address list:");
1395 		isp = (drmach_mc_idle_script_t *)buf;
1396 		DRMACH_PR("source mc idle addr 0x%lx, mem id %p",
1397 			isp[0].idle_addr, isp[0].mem);
1398 		DRMACH_PR("target mc idle addr 0x%lx, mem id %p",
1399 			isp[1].idle_addr, isp[1].mem);
1400 		ASSERT(isp[2].idle_addr == 0);
1401 
1402 		DRMACH_PR("copy-rename script:");
1403 		for (j = 0; j < m; j++) {
1404 			DRMACH_PR("0x%lx = 0x%08x",
1405 				rsp[j].masr_addr, rsp[j].masr);
1406 		}
1407 
1408 		DELAY(1000000);
1409 	}
1410 #endif
1411 
1412 	/* return number of bytes consumed */
1413 	b = (caddr_t)&rsp[m] - buf;
1414 	DRMACH_PR("total number of bytes consumed is %d\n", b);
1415 	ASSERT(b <= buflen);
1416 
1417 #ifdef lint
1418 	buflen = buflen;
1419 #endif
1420 
1421 	return (NULL);
1422 }
1423 
1424 /*
1425  * The routine performs the necessary memory COPY and MC adr SWITCH.
1426  * Both operations MUST be at the same "level" so that the stack is
1427  * maintained correctly between the copy and switch.  The switch
1428  * portion implements a caching mechanism to guarantee the code text
1429  * is cached prior to execution.  This is to guard against possible
1430  * memory access while the MC adr's are being modified.
1431  *
1432  * IMPORTANT: The _drmach_copy_rename_end() function must immediately
1433  * follow drmach_copy_rename_prog__relocatable() so that the correct
1434  * "length" of the drmach_copy_rename_prog__relocatable can be
1435  * calculated.  This routine MUST be a LEAF function, i.e. it can
1436  * make NO function calls, primarily for two reasons:
1437  *
1438  *	1. We must keep the stack consistent across the "switch".
1439  *	2. Function calls are compiled to relative offsets, and
1440  *	   we execute this function we'll be executing it from
1441  *	   a copied version in a different area of memory, thus
1442  *	   the relative offsets will be bogus.
1443  *
1444  * Moreover, it must have the "__relocatable" suffix to inform DTrace
1445  * providers (and anything else, for that matter) that this
1446  * function's text is manually relocated elsewhere before it is
1447  * executed.  That is, it cannot be safely instrumented with any
1448  * methodology that is PC-relative.
1449  */
1450 static void
1451 drmach_copy_rename_prog__relocatable(drmach_copy_rename_program_t *prog)
1452 {
1453 	extern void drmach_exec_script_il(drmach_rename_script_t *rsp);
1454 
1455 	drmach_mc_idle_script_t		*isp;
1456 	struct memlist			*ml;
1457 	int				csize;
1458 	int				lnsize;
1459 	uint64_t			caddr;
1460 
1461 	isp = (drmach_mc_idle_script_t *)prog->data;
1462 
1463 	caddr = ecache_flushaddr;
1464 	csize = (cpunodes[CPU->cpu_id].ecache_size << 1);
1465 	lnsize = cpunodes[CPU->cpu_id].ecache_linesize;
1466 
1467 	/*
1468 	 * DO COPY.
1469 	 */
1470 	for (ml = prog->c_ml; ml; ml = ml->next) {
1471 		uint64_t	s_pa, t_pa;
1472 		uint64_t	nbytes;
1473 
1474 		s_pa = prog->s_copybasepa + ml->address;
1475 		t_pa = prog->t_copybasepa + ml->address;
1476 		nbytes = ml->size;
1477 
1478 		while (nbytes != 0ull) {
1479 			/*
1480 			 * This copy does NOT use an ASI
1481 			 * that avoids the Ecache, therefore
1482 			 * the dst_pa addresses may remain
1483 			 * in our Ecache after the dst_pa
1484 			 * has been removed from the system.
1485 			 * A subsequent write-back to memory
1486 			 * will cause an ARB-stop because the
1487 			 * physical address no longer exists
1488 			 * in the system. Therefore we must
1489 			 * flush out local Ecache after we
1490 			 * finish the copy.
1491 			 */
1492 
1493 			/* copy 32 bytes at src_pa to dst_pa */
1494 			bcopy32_il(s_pa, t_pa);
1495 
1496 			/* increment by 32 bytes */
1497 			s_pa += (4 * sizeof (uint64_t));
1498 			t_pa += (4 * sizeof (uint64_t));
1499 
1500 			/* decrement by 32 bytes */
1501 			nbytes -= (4 * sizeof (uint64_t));
1502 		}
1503 	}
1504 
1505 	/*
1506 	 * Since bcopy32_il() does NOT use an ASI to bypass
1507 	 * the Ecache, we need to flush our Ecache after
1508 	 * the copy is complete.
1509 	 */
1510 	flush_ecache_il(caddr, csize, lnsize);		/* inline version */
1511 
1512 	/*
1513 	 * Wait for MCs to go idle.
1514 	 */
1515 	do {
1516 		register int	t = 10;
1517 		register uint_t	v;
1518 
1519 		/* loop t cycles waiting for each mc to indicate it's idle */
1520 		do {
1521 			v = ldphysio_il(isp->idle_addr)
1522 				& STARFIRE_MC_IDLE_MASK;
1523 
1524 		} while (v != STARFIRE_MC_IDLE_MASK && t-- > 0);
1525 
1526 		/* bailout if timedout */
1527 		if (t <= 0) {
1528 			prog->restless_mc = isp->mem;
1529 			return;
1530 		}
1531 
1532 		isp += 1;
1533 
1534 		/* stop if terminating zero has been reached */
1535 	} while (isp->idle_addr != 0);
1536 
1537 	/* advance passed terminating zero */
1538 	isp += 1;
1539 
1540 	/*
1541 	 * The following inline assembly routine caches
1542 	 * the rename script and then caches the code that
1543 	 * will do the rename.  This is necessary
1544 	 * so that we don't have any memory references during
1545 	 * the reprogramming.  We accomplish this by first
1546 	 * jumping through the code to guarantee it's cached
1547 	 * before we actually execute it.
1548 	 */
1549 	drmach_exec_script_il((drmach_rename_script_t *)isp);
1550 }
1551 
1552 static void
1553 drmach_copy_rename_end(void)
1554 {
1555 	/*
1556 	 * IMPORTANT:	This function's location MUST be located immediately
1557 	 *		following drmach_copy_rename_prog__relocatable to
1558 	 *		accurately estimate its size.  Note that this assumes
1559 	 *		the compiler keeps these functions in the order in
1560 	 *		which they appear :-o
1561 	 */
1562 }
1563 
1564 sbd_error_t *
1565 drmach_copy_rename_init(drmachid_t t_id, uint64_t t_slice_offset,
1566 	drmachid_t s_id, struct memlist *c_ml, drmachid_t *pgm_id)
1567 {
1568 	drmach_device_t	*s_mem;
1569 	drmach_device_t	*t_mem;
1570 	struct memlist	*x_ml;
1571 	uint64_t	off_mask, s_copybasepa, t_copybasepa, t_basepa;
1572 	int		len;
1573 	caddr_t		bp, wp;
1574 	pda_handle_t	ph;
1575 	sbd_error_t	*err;
1576 	drmach_copy_rename_program_t *prog;
1577 
1578 	if (!DRMACH_IS_MEM_ID(s_id))
1579 		return (drerr_new(0, ESTF_INAPPROP, NULL));
1580 	if (!DRMACH_IS_MEM_ID(t_id))
1581 		return (drerr_new(0, ESTF_INAPPROP, NULL));
1582 	s_mem = s_id;
1583 	t_mem = t_id;
1584 
1585 	/* get starting physical address of target memory */
1586 	err = drmach_mem_get_base_physaddr(t_id, &t_basepa);
1587 	if (err)
1588 		return (err);
1589 
1590 	/* calculate slice offset mask from slice size */
1591 	off_mask = mc_get_mem_alignment() - 1;
1592 
1593 	/* calculate source and target base pa */
1594 	s_copybasepa = c_ml->address;
1595 	t_copybasepa = t_basepa + ((c_ml->address & off_mask) - t_slice_offset);
1596 
1597 	/* paranoia */
1598 	ASSERT((c_ml->address & off_mask) >= t_slice_offset);
1599 
1600 	/* adjust copy memlist addresses to be relative to copy base pa */
1601 	x_ml = c_ml;
1602 	while (x_ml != NULL) {
1603 		x_ml->address -= s_copybasepa;
1604 		x_ml = x_ml->next;
1605 	}
1606 
1607 #ifdef DEBUG
1608 	{
1609 	uint64_t s_basepa, s_size, t_size;
1610 
1611 	x_ml = c_ml;
1612 	while (x_ml->next != NULL)
1613 		x_ml = x_ml->next;
1614 
1615 	DRMACH_PR("source copy span: base pa 0x%lx, end pa 0x%lx\n",
1616 		s_copybasepa,
1617 		s_copybasepa + x_ml->address + x_ml->size);
1618 
1619 	DRMACH_PR("target copy span: base pa 0x%lx, end pa 0x%lx\n",
1620 		t_copybasepa,
1621 		t_copybasepa + x_ml->address + x_ml->size);
1622 
1623 	DRMACH_PR("copy memlist (relative to copy base pa):\n");
1624 	MEMLIST_DUMP(c_ml);
1625 
1626 	err = drmach_mem_get_base_physaddr(s_id, &s_basepa);
1627 	ASSERT(err == NULL);
1628 
1629 	err = drmach_mem_get_size(s_id, &s_size);
1630 	ASSERT(err == NULL);
1631 
1632 	err = drmach_mem_get_size(t_id, &t_size);
1633 	ASSERT(err == NULL);
1634 
1635 	DRMACH_PR("current source base pa 0x%lx, size 0x%lx\n",
1636 		s_basepa, s_size);
1637 	DRMACH_PR("current target base pa 0x%lx, size 0x%lx\n",
1638 		t_basepa, t_size);
1639 
1640 	ASSERT(s_copybasepa + x_ml->address + x_ml->size <= s_basepa + s_size);
1641 	ASSERT(t_copybasepa + x_ml->address + x_ml->size <= t_basepa + t_size);
1642 	}
1643 #endif
1644 
1645 	ph = drmach_pda_open();
1646 	if (ph == NULL)
1647 		return (DRMACH_INTERNAL_ERROR());
1648 
1649 	/*
1650 	 * bp will be page aligned, since we're calling
1651 	 * kmem_zalloc() with an exact multiple of PAGESIZE.
1652 	 */
1653 	wp = bp = kmem_zalloc(PAGESIZE, KM_SLEEP);
1654 
1655 	/* allocate space for copy rename struct */
1656 	len = sizeof (drmach_copy_rename_program_t);
1657 	DRMACH_PR("prog = 0x%p, header len %d\n", wp, len);
1658 	prog = (drmach_copy_rename_program_t *)wp;
1659 	wp += (len + ecache_alignsize - 1) & ~ (ecache_alignsize - 1);
1660 
1661 	/*
1662 	 * Copy the code for the copy-rename routine into
1663 	 * a page aligned piece of memory.  We do this to guarantee
1664 	 * that we're executing within the same page and thus reduce
1665 	 * the possibility of cache collisions between different
1666 	 * pages.
1667 	 */
1668 	len = (int)((ulong_t)drmach_copy_rename_end -
1669 		    (ulong_t)drmach_copy_rename_prog__relocatable);
1670 	ASSERT(wp + len < bp + PAGESIZE);
1671 	bcopy((caddr_t)drmach_copy_rename_prog__relocatable, wp, len);
1672 
1673 	DRMACH_PR("copy-rename function 0x%p, len %d\n", wp, len);
1674 	prog->run = (void (*)())wp;
1675 	wp += (len + ecache_alignsize - 1) & ~ (ecache_alignsize - 1);
1676 
1677 	/*
1678 	 * Prepare data page that will contain script of
1679 	 * operations to perform during copy-rename.
1680 	 * Allocate temporary buffer to hold script.
1681 	 */
1682 	err = drmach_prep_rename_script(s_mem, t_mem, t_slice_offset,
1683 		wp, PAGESIZE - (wp - bp));
1684 	if (err) {
1685 		(void) drmach_copy_rename_fini(prog);
1686 		return (err);
1687 	}
1688 
1689 	DRMACH_PR("copy-rename script 0x%p, len %d\n", wp, len);
1690 	prog->data = wp;
1691 	wp += (len + ecache_alignsize - 1) & ~ (ecache_alignsize - 1);
1692 
1693 	prog->ph = ph;
1694 	prog->s_copybasepa = s_copybasepa;
1695 	prog->t_copybasepa = t_copybasepa;
1696 	prog->c_ml = c_ml;
1697 	*pgm_id = prog;
1698 
1699 	return (NULL);
1700 }
1701 
1702 sbd_error_t *
1703 drmach_copy_rename_fini(drmachid_t id)
1704 {
1705 	drmach_copy_rename_program_t	*prog = id;
1706 	sbd_error_t			*err = NULL;
1707 
1708 	if (prog->c_ml != NULL)
1709 		memlist_delete(prog->c_ml);
1710 
1711 	if (prog->ph != NULL)
1712 		pda_close(prog->ph);
1713 
1714 	if (prog->restless_mc != 0) {
1715 		cmn_err(CE_WARN, "MC did not idle; OBP Node 0x%x",
1716 			(uint_t)drmach_node_get_dnode(prog->restless_mc->node));
1717 
1718 		err = DRMACH_INTERNAL_ERROR();
1719 	}
1720 
1721 	kmem_free(prog, PAGESIZE);
1722 
1723 	return (err);
1724 }
1725 
1726 static sbd_error_t *
1727 drmach_io_new(drmach_device_t *dp)
1728 {
1729 	static sbd_error_t *drmach_io_release(drmachid_t);
1730 	static sbd_error_t *drmach_io_status(drmachid_t, drmach_status_t *);
1731 
1732 	sbd_error_t	*err;
1733 	int		 portid;
1734 
1735 	err = drmach_device_get_prop(dp, "upa-portid", &portid);
1736 	if (err == NULL) {
1737 		ASSERT(portid & 0x40);
1738 		dp->unum = portid & 1;
1739 	}
1740 
1741 	dp->cm.isa = (void *)drmach_io_new;
1742 	dp->cm.release = drmach_io_release;
1743 	dp->cm.status = drmach_io_status;
1744 
1745 	snprintf(dp->cm.name, sizeof (dp->cm.name), "%s%d", dp->type, dp->unum);
1746 
1747 	return (err);
1748 }
1749 
1750 static void
1751 drmach_iopc_op(pda_handle_t ph, drmach_iopc_op_t op)
1752 {
1753 	register int b;
1754 
1755 	for (b = 0; b < MAX_BOARDS; b++) {
1756 		int		p;
1757 		ushort_t	bda_ioc;
1758 		board_desc_t	*bdesc;
1759 
1760 		if (pda_board_present(ph, b) == 0)
1761 			continue;
1762 
1763 		bdesc = (board_desc_t *)pda_get_board_info(ph, b);
1764 		/*
1765 		 * Update PCs for IOCs.
1766 		 */
1767 		bda_ioc = bdesc->bda_ioc;
1768 		for (p = 0; p < MAX_IOCS; p++) {
1769 			u_longlong_t	idle_addr;
1770 			uchar_t		value;
1771 
1772 			if (BDA_NBL(bda_ioc, p) != BDAN_GOOD)
1773 				continue;
1774 
1775 			idle_addr = STARFIRE_BB_PC_ADDR(b, p, 1);
1776 
1777 			switch (op) {
1778 			case DO_PAUSE:
1779 				value = STARFIRE_BB_PC_PAUSE(p);
1780 				break;
1781 
1782 			case DO_IDLE:
1783 				value = STARFIRE_BB_PC_IDLE(p);
1784 				break;
1785 
1786 			case DO_UNPAUSE:
1787 				value = ldbphysio(idle_addr);
1788 				value &= ~STARFIRE_BB_PC_PAUSE(p);
1789 				break;
1790 
1791 			case DO_UNIDLE:
1792 				value = ldbphysio(idle_addr);
1793 				value &= ~STARFIRE_BB_PC_IDLE(p);
1794 				break;
1795 
1796 			default:
1797 				cmn_err(CE_PANIC,
1798 					"drmach_iopc_op: unknown op (%d)",
1799 					(int)op);
1800 				/*NOTREACHED*/
1801 			}
1802 			stbphysio(idle_addr, value);
1803 		}
1804 	}
1805 }
1806 
1807 void
1808 drmach_copy_rename(drmachid_t id)
1809 {
1810 	drmach_copy_rename_program_t	*prog = id;
1811 	uint64_t			neer;
1812 
1813 	/*
1814 	 * UPA IDLE
1815 	 * Protocol = PAUSE -> IDLE -> UNPAUSE
1816 	 * In reality since we only "idle" the IOPCs it's sufficient
1817 	 * to just issue the IDLE operation since (in theory) all IOPCs
1818 	 * in the field are PC6.  However, we'll be robust and do the
1819 	 * proper workaround protocol so that we never have to worry!
1820 	 */
1821 	drmach_iopc_op(prog->ph, DO_PAUSE);
1822 	drmach_iopc_op(prog->ph, DO_IDLE);
1823 	DELAY(100);
1824 	drmach_iopc_op(prog->ph, DO_UNPAUSE);
1825 	DELAY(100);
1826 
1827 	/* disable CE reporting */
1828 	neer = get_error_enable();
1829 	set_error_enable(neer & ~EER_CEEN);
1830 
1831 	/* run the copy/rename program */
1832 	prog->run(prog);
1833 
1834 	/* enable CE reporting */
1835 	set_error_enable(neer);
1836 
1837 	/*
1838 	 * UPA UNIDLE
1839 	 * Protocol = UNIDLE
1840 	 */
1841 	drmach_iopc_op(prog->ph, DO_UNIDLE);
1842 	DELAY(100);
1843 }
1844 
1845 /*
1846  * The counter-timer and perf-counter nodes are not being cleaned
1847  * up after a board that was present at start of day is detached.
1848  * If the board has become unconfigured with this operation, walk
1849  * the prom tree and find all counter-timer and perf-counter nodes
1850  * that have the same board number as the board that was just
1851  * unconfigured and remove them.
1852  */
1853 static sbd_error_t *
1854 drmach_remove_counter_nodes(drmachid_t id)
1855 {
1856 	int		num;
1857 	char		name[OBP_MAXDRVNAME];
1858 	pnode_t		child;
1859 	dev_info_t	*dip;
1860 	sbd_error_t	*err;
1861 	drmach_status_t	stat;
1862 	drmach_board_t	*bp;
1863 
1864 	if (!DRMACH_IS_BOARD_ID(id)) {
1865 		return (drerr_new(0, ESTF_INAPPROP, NULL));
1866 	}
1867 
1868 	if ((err = drmach_board_status(id, &stat)) != NULL) {
1869 		return (err);
1870 	}
1871 
1872 	/*
1873 	 * Only clean up the counter-timer and perf-counter
1874 	 * nodes when the entire board is unconfigured.
1875 	 */
1876 	if (stat.configured) {
1877 		return (NULL);
1878 	}
1879 
1880 	bp = (drmach_board_t *)id;
1881 
1882 	err = NULL;
1883 
1884 	for (child = prom_childnode(prom_rootnode()); child != OBP_NONODE;
1885 	    child = prom_nextnode(child)) {
1886 
1887 		if (prom_getprop(child, OBP_BOARDNUM, (caddr_t)&num) == -1) {
1888 			continue;
1889 		}
1890 
1891 		if (bp->bnum != num) {
1892 			continue;
1893 		}
1894 
1895 		if (prom_getprop(child, OBP_NAME, (caddr_t)name) == -1) {
1896 			continue;
1897 		}
1898 
1899 		if (strncmp(name, MISC_COUNTER_TIMER_DEVNAME, OBP_MAXDRVNAME) &&
1900 		    strncmp(name, MISC_PERF_COUNTER_DEVNAME, OBP_MAXDRVNAME)) {
1901 				continue;
1902 		}
1903 
1904 		/* Root node doesn't have to be held */
1905 		dip = e_ddi_nodeid_to_dip(child);
1906 
1907 		/*
1908 		 * If the node is only in the OBP tree, then
1909 		 * we don't have to remove it.
1910 		 */
1911 		if (dip) {
1912 			dev_info_t *fdip = NULL;
1913 
1914 			DRMACH_PR("removing %s devinfo node\n", name);
1915 
1916 			e_ddi_branch_hold(dip);
1917 			ddi_release_devi(dip); /* held in e_ddi_nodeid_to_dip */
1918 
1919 			if (e_ddi_branch_destroy(dip, &fdip, 0)) {
1920 				char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1921 
1922 				/*
1923 				 * If non-NULL, fdip is held and must be
1924 				 * released.
1925 				 */
1926 				if (fdip != NULL) {
1927 					(void) ddi_pathname(fdip, path);
1928 					ddi_release_devi(fdip);
1929 				} else {
1930 					(void) ddi_pathname(dip, path);
1931 				}
1932 
1933 				err = drerr_new(1, ESTF_DRVFAIL, path);
1934 				kmem_free(path, MAXPATHLEN);
1935 				e_ddi_branch_rele(dip);
1936 				break;
1937 			}
1938 		}
1939 	}
1940 
1941 	return (err);
1942 }
1943 
1944 /*ARGSUSED*/
1945 sbd_error_t *
1946 drmach_pre_op(int cmd, drmachid_t id, drmach_opts_t *opts)
1947 {
1948 	/* allow status and ncm operations to always succeed */
1949 	if ((cmd == SBD_CMD_STATUS) || (cmd == SBD_CMD_GETNCM)) {
1950 		return (NULL);
1951 	}
1952 
1953 	/* check all other commands for the required option string */
1954 	if ((opts->size > 0) && (opts->copts != NULL)) {
1955 
1956 		DRMACH_PR("platform options: %s\n", opts->copts);
1957 
1958 		if (strstr(opts->copts, "xfdr") != NULL) {
1959 			return (NULL);
1960 		}
1961 	}
1962 
1963 	return (drerr_new(0, ESTF_SUPPORT, NULL));
1964 }
1965 
1966 /*ARGSUSED*/
1967 sbd_error_t *
1968 drmach_post_op(int cmd, drmachid_t id, drmach_opts_t *opts)
1969 {
1970 	sbd_error_t	*err = NULL;
1971 
1972 	switch (cmd) {
1973 	case SBD_CMD_UNCONFIGURE:
1974 
1975 		err = drmach_remove_counter_nodes(id);
1976 		break;
1977 
1978 	case SBD_CMD_CONFIGURE:
1979 	case SBD_CMD_DISCONNECT:
1980 	case SBD_CMD_CONNECT:
1981 	case SBD_CMD_GETNCM:
1982 	case SBD_CMD_STATUS:
1983 		break;
1984 
1985 	default:
1986 		break;
1987 	}
1988 
1989 	return (err);
1990 }
1991 
1992 sbd_error_t *
1993 drmach_board_assign(int bnum, drmachid_t *id)
1994 {
1995 	sbd_error_t	*err;
1996 
1997 	if (!drmach_initialized && drmach_init() == -1) {
1998 		err = DRMACH_INTERNAL_ERROR();
1999 	} else if (drmach_array_get(drmach_boards, bnum, id) == -1) {
2000 		err = drerr_new(1, ESTF_BNUM, "%d", bnum);
2001 	} else if (*id != NULL) {
2002 		err = NULL;
2003 	} else {
2004 		drmach_board_t	*bp;
2005 
2006 		*id  = (drmachid_t)drmach_board_new(bnum);
2007 		bp = *id;
2008 		bp->assigned = 1;
2009 		err = NULL;
2010 	}
2011 
2012 	return (err);
2013 }
2014 
2015 static int
2016 drmach_attach_board(void *arg)
2017 {
2018 	drmach_board_t	*obj = (drmach_board_t *)arg;
2019 	cpuset_t	cset;
2020 	int		retval;
2021 
2022 	/*
2023 	 * OBP disables traps during the board probe.
2024 	 * So, in order to prevent cross-call/cross-trap timeouts,
2025 	 * and thus panics, we effectively block anybody from
2026 	 * issuing xc's/xt's by doing a promsafe_xc_attention.
2027 	 * In the previous version of Starfire DR (2.6), a timeout
2028 	 * suspension mechanism was implemented in the send-mondo
2029 	 * assembly.  That mechanism is unnecessary with the
2030 	 * existence of xc_attention/xc_dismissed.
2031 	 */
2032 	cset = cpu_ready_set;
2033 	promsafe_xc_attention(cset);
2034 
2035 	retval = prom_starfire_add_brd(obj->connect_cpuid);
2036 
2037 	xc_dismissed(cset);
2038 
2039 	return (retval);
2040 }
2041 
2042 sbd_error_t *
2043 drmach_board_connect(drmachid_t id, drmach_opts_t *opts)
2044 {
2045 	drmach_board_t	*obj = (drmach_board_t *)id;
2046 	int		retval;
2047 	sbd_error_t	*err;
2048 	char		*cptr, *copts;
2049 
2050 	if (!DRMACH_IS_BOARD_ID(id))
2051 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2052 
2053 	if (opts->size > 0)
2054 		copts = opts->copts;
2055 
2056 	if ((cptr = strstr(copts, "cpuid=")) != NULL) {
2057 		int cpuid;
2058 
2059 		cptr += strlen("cpuid=");
2060 		cpuid = stoi(&cptr);
2061 
2062 		if (DRMACH_CPUID2BNUM(cpuid) == obj->bnum) {
2063 			obj->connect_cpuid = cpuid;
2064 			obj->assigned = 1;
2065 		} else
2066 			return (drerr_new(1, ESTF_SETCPUVAL, "%d", cpuid));
2067 	} else {
2068 		/* cpuid was not specified */
2069 		obj->connect_cpuid = -1;
2070 	}
2071 
2072 	if (obj->connect_cpuid == -1) {
2073 		err =  drerr_new(1, ESTF_NOCPUID, obj->cm.name);
2074 		return (err);
2075 	}
2076 
2077 	cmn_err(CE_CONT, "DRMACH: PROM attach %s CPU %d\n",
2078 		obj->cm.name, obj->connect_cpuid);
2079 
2080 	retval = prom_tree_update(drmach_attach_board, obj);
2081 
2082 	if (retval == 0)
2083 		err = NULL;
2084 	else {
2085 		cmn_err(CE_WARN, "prom error: prom_starfire_add_brd(%d) "
2086 			"returned %d", obj->connect_cpuid, retval);
2087 
2088 		err = drerr_new(1, ESTF_PROBE, obj->cm.name);
2089 	}
2090 
2091 	obj->connect_cpuid = -1;
2092 
2093 	return (err);
2094 }
2095 
2096 /*ARGSUSED*/
2097 sbd_error_t *
2098 drmach_board_disconnect(drmachid_t id, drmach_opts_t *opts)
2099 {
2100 	drmach_board_t		*bp;
2101 	int			rv;
2102 	int			d_idx;	/* device index */
2103 	drmachid_t		d_id;	/* device ID */
2104 	sbd_error_t		*err;
2105 
2106 	if (!DRMACH_IS_BOARD_ID(id))
2107 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2108 
2109 	bp = id;
2110 
2111 	/*
2112 	 * We need to make sure all of the board's device nodes
2113 	 * have been removed from the Solaris device tree before
2114 	 * continuing with the disconnect. Otherwise, we could
2115 	 * disconnect the board and remove the OBP device tree
2116 	 * nodes with Solaris device tree nodes remaining.
2117 	 *
2118 	 * On Starfire, Solaris device tree nodes are deleted
2119 	 * during unconfigure by drmach_unconfigure(). It's
2120 	 * necessary to do this here because drmach_unconfigure()
2121 	 * failures are not handled during unconfigure.
2122 	 */
2123 	if (bp->devices) {
2124 		rv = drmach_array_first(bp->devices, &d_idx, &d_id);
2125 		while (rv == 0) {
2126 			err = drmach_unconfigure(d_id, DRMACH_DEVI_REMOVE);
2127 			if (err)
2128 				return (err);
2129 
2130 			rv = drmach_array_next(bp->devices, &d_idx, &d_id);
2131 		}
2132 	}
2133 
2134 	/*
2135 	 * Starfire board Solaris device tree counter nodes,
2136 	 * which are only present on start-of-day boards, are
2137 	 * removed in the dr_post_op() code flow after the
2138 	 * board is unconfigured. We call the counter node
2139 	 * removal function here because unconfigure errors
2140 	 * can cause the dr_post_op() function to be skipped
2141 	 * after an unconfigure operation even though all of
2142 	 * the board's devices have been transitioned to the
2143 	 * unconfigured state.
2144 	 */
2145 	err = drmach_remove_counter_nodes(id);
2146 	if (err)
2147 		return (err);
2148 
2149 	return (NULL);
2150 }
2151 
2152 static int
2153 drmach_board_find_devices_cb(drmach_node_walk_args_t *args)
2154 {
2155 	drmach_node_t			*node = args->node;
2156 	drmach_board_cb_data_t		*data = args->data;
2157 	drmach_board_t			*obj = data->obj;
2158 
2159 	int		 rv;
2160 	int		 bnum;
2161 	drmach_device_t	*device;
2162 
2163 	rv = drmach_node_get_prop(node, OBP_BOARDNUM, &bnum);
2164 	if (rv) {
2165 		/*
2166 		 * if the node does not have a board# property, then
2167 		 * by that information alone it is known that drmach
2168 		 * is not interested in it.
2169 		 */
2170 		return (0);
2171 	} else if (bnum != obj->bnum)
2172 		return (0);
2173 
2174 	/*
2175 	 * Create a device data structure from this node data.
2176 	 * The call may yield nothing if the node is not of interest
2177 	 * to drmach.
2178 	 */
2179 	data->err = drmach_device_new(node, obj, &device);
2180 	if (data->err)
2181 		return (-1);
2182 	else if (device == NULL) {
2183 		/*
2184 		 * drmach_device_new examined the node we passed in
2185 		 * and determined that it was one not of interest to
2186 		 * drmach.  So, it is skipped.
2187 		 */
2188 		return (0);
2189 	}
2190 
2191 	rv = drmach_array_set(obj->devices, data->ndevs++, device);
2192 	if (rv) {
2193 		drmach_device_dispose(device);
2194 		data->err = DRMACH_INTERNAL_ERROR();
2195 		return (-1);
2196 	}
2197 
2198 	data->err = (*data->found)(data->a, device->type, device->unum, device);
2199 	return (data->err == NULL ? 0 : -1);
2200 }
2201 
2202 sbd_error_t *
2203 drmach_board_find_devices(drmachid_t id, void *a,
2204 	sbd_error_t *(*found)(void *a, const char *, int, drmachid_t))
2205 {
2206 	extern int		 plat_max_cpu_units_per_board();
2207 	extern int		 plat_max_mem_units_per_board();
2208 	extern int		 plat_max_io_units_per_board();
2209 
2210 	drmach_board_t		*obj = (drmach_board_t *)id;
2211 	sbd_error_t		*err;
2212 	int			 max_devices;
2213 	int			 rv;
2214 	drmach_board_cb_data_t	data;
2215 
2216 	max_devices  = plat_max_cpu_units_per_board();
2217 	max_devices += plat_max_mem_units_per_board();
2218 	max_devices += plat_max_io_units_per_board();
2219 
2220 	obj->devices = drmach_array_new(0, max_devices);
2221 
2222 	data.obj = obj;
2223 	data.ndevs = 0;
2224 	data.found = found;
2225 	data.a = a;
2226 	data.err = NULL;
2227 
2228 	rv = drmach_node_walk(obj->tree, &data, drmach_board_find_devices_cb);
2229 	if (rv == 0)
2230 		err = NULL;
2231 	else {
2232 		drmach_array_dispose(obj->devices, drmach_device_dispose);
2233 		obj->devices = NULL;
2234 
2235 		if (data.err)
2236 			err = data.err;
2237 		else
2238 			err = DRMACH_INTERNAL_ERROR();
2239 	}
2240 
2241 	return (err);
2242 }
2243 
2244 int
2245 drmach_board_lookup(int bnum, drmachid_t *id)
2246 {
2247 	int	rv = 0;
2248 
2249 	if (!drmach_initialized && drmach_init() == -1) {
2250 		*id = 0;
2251 		rv = -1;
2252 	} else if (drmach_array_get(drmach_boards, bnum, id)) {
2253 		*id = 0;
2254 		rv = -1;
2255 	}
2256 	return (rv);
2257 }
2258 
2259 sbd_error_t *
2260 drmach_board_name(int bnum, char *buf, int buflen)
2261 {
2262 	snprintf(buf, buflen, "SB%d", bnum);
2263 	return (NULL);
2264 }
2265 
2266 sbd_error_t *
2267 drmach_board_poweroff(drmachid_t id)
2268 {
2269 	drmach_board_t	*bp;
2270 	sbd_error_t	*err;
2271 	drmach_status_t	 stat;
2272 
2273 	if (!DRMACH_IS_BOARD_ID(id))
2274 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2275 	bp = id;
2276 
2277 	err = drmach_board_status(id, &stat);
2278 	if (err)
2279 		return (err);
2280 	else if (stat.configured || stat.busy)
2281 		return (drerr_new(0, ESTF_CONFIGBUSY, bp->cm.name));
2282 	else {
2283 		/* board power off is essentially a noop for Starfire */
2284 		bp->powered = 0;
2285 		return (NULL);
2286 	}
2287 	/*NOTREACHED*/
2288 }
2289 
2290 sbd_error_t *
2291 drmach_board_poweron(drmachid_t id)
2292 {
2293 	drmach_board_t	*bp;
2294 
2295 	if (!DRMACH_IS_BOARD_ID(id))
2296 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2297 	bp = id;
2298 
2299 	/* board power on is essentially a noop for Starfire */
2300 	bp->powered = 1;
2301 
2302 	return (NULL);
2303 }
2304 
2305 static sbd_error_t *
2306 drmach_board_release(drmachid_t id)
2307 {
2308 	if (!DRMACH_IS_BOARD_ID(id))
2309 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2310 	return (NULL);
2311 }
2312 
2313 /*ARGSUSED*/
2314 sbd_error_t *
2315 drmach_board_test(drmachid_t id, drmach_opts_t *opts, int force)
2316 {
2317 	return (NULL);
2318 }
2319 
2320 sbd_error_t *
2321 drmach_board_unassign(drmachid_t id)
2322 {
2323 	drmach_board_t	*bp;
2324 	sbd_error_t	*err;
2325 	drmach_status_t	 stat;
2326 
2327 	if (!DRMACH_IS_BOARD_ID(id))
2328 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2329 	bp = id;
2330 
2331 	err = drmach_board_status(id, &stat);
2332 	if (err)
2333 		return (err);
2334 	else if (stat.configured || stat.busy)
2335 		return (drerr_new(0, ESTF_CONFIGBUSY, bp->cm.name));
2336 	else if (drmach_array_set(drmach_boards, bp->bnum, 0) != 0)
2337 		return (DRMACH_INTERNAL_ERROR());
2338 	else {
2339 		drmach_board_dispose(bp);
2340 		return (NULL);
2341 	}
2342 	/*NOTREACHED*/
2343 }
2344 
2345 static sbd_error_t *
2346 drmach_cpu_new(drmach_device_t *dp)
2347 {
2348 	static sbd_error_t *drmach_cpu_release(drmachid_t);
2349 	static sbd_error_t *drmach_cpu_status(drmachid_t, drmach_status_t *);
2350 
2351 	sbd_error_t	*err;
2352 	int		 portid;
2353 
2354 	err = drmach_device_get_prop(dp, "upa-portid", &portid);
2355 	if (err == NULL)
2356 		dp->unum = portid & 3;
2357 
2358 	dp->cm.isa = (void *)drmach_cpu_new;
2359 	dp->cm.release = drmach_cpu_release;
2360 	dp->cm.status = drmach_cpu_status;
2361 
2362 	snprintf(dp->cm.name, sizeof (dp->cm.name), "%s%d", dp->type, dp->unum);
2363 
2364 	return (err);
2365 }
2366 
2367 /*
2368  * drmach_cpu_obp_detach()
2369  *  This requires two steps, first, we must put the cpuid into the OBP
2370  *  idle loop (Idle in Program) state.  Then we call OBP to place the CPU
2371  *  into the "Detached" state, which does any special processing to
2372  *  actually detach the cpu, such as flushing ecache, and also ensures
2373  *  that a subsequent breakpoint won't restart the cpu (if it was just in
2374  *  Idle in Program state).
2375  */
2376 static void
2377 drmach_cpu_obp_detach(int cpuid)
2378 {
2379 	/*
2380 	 * Cpu may not be under OBP's control. Eg, if cpu exited to download
2381 	 * helper on a prior attach.
2382 	 */
2383 	if (CPU_SGN_EXISTS(cpuid) &&
2384 			!SGN_CPU_IS_OS(cpuid) &&
2385 			!SGN_CPU_IS_OBP(cpuid)) {
2386 		cmn_err(CE_WARN,
2387 			"unexpected signature (0x%x) for cpu %d",
2388 			get_cpu_sgn(cpuid), cpuid);
2389 	}
2390 
2391 	/*
2392 	 * Now we place the CPU into the "Detached" idle loop in OBP.
2393 	 * This is so that the CPU won't be restarted if we break into
2394 	 * OBP with a breakpoint or BREAK key from the console, and also
2395 	 * if we need to do any special processing, such as flushing the
2396 	 * cpu's ecache, disabling interrupts (by turning of the ET bit in
2397 	 * the PSR) and/or spinning in BBSRAM rather than global memory.
2398 	 */
2399 	DRMACH_PR("prom_starfire_rm_cpu(%d)\n", cpuid);
2400 	prom_starfire_rm_cpu(cpuid);
2401 }
2402 
2403 /*
2404  * drmach_cpu_obp_is_detached() returns TRUE if the cpu sigblock signature state
2405  * is SIGBST_DETACHED; otherwise it returns FALSE. This routine should only
2406  * be called after we have asked OBP to detach the CPU. It should NOT be
2407  * called as a check during any other flow.
2408  */
2409 static int
2410 drmach_cpu_obp_is_detached(int cpuid)
2411 {
2412 	if (!CPU_SGN_EXISTS(cpuid) ||
2413 		(SGN_CPU_IS_OS(cpuid) && SGN_CPU_STATE_IS_DETACHED(cpuid)))
2414 		return (1);
2415 	else
2416 		return (0);
2417 }
2418 
2419 static int
2420 drmach_cpu_start(struct cpu *cp)
2421 {
2422 	int		cpuid = cp->cpu_id;
2423 	int		ntries = drmach_cpu_ntries;
2424 	extern void	restart_other_cpu(int);
2425 
2426 	ASSERT(MUTEX_HELD(&cpu_lock));
2427 	ASSERT(cpunodes[cpuid].nodeid != (pnode_t)0);
2428 
2429 	cp->cpu_flags &= ~CPU_POWEROFF;
2430 
2431 	/*
2432 	 * NOTE: restart_other_cpu pauses cpus during the
2433 	 *	 slave cpu start.  This helps to quiesce the
2434 	 *	 bus traffic a bit which makes the tick sync
2435 	 *	 routine in the prom more robust.
2436 	 */
2437 	DRMACH_PR("COLD START for cpu (%d)\n", cpuid);
2438 
2439 	prom_starfire_add_cpu(cpuid);
2440 
2441 	restart_other_cpu(cpuid);
2442 
2443 	/*
2444 	 * Wait for the cpu to reach its idle thread before
2445 	 * we zap him with a request to blow away the mappings
2446 	 * he (might) have for the drmach_shutdown_asm code
2447 	 * he may have executed on unconfigure.
2448 	 */
2449 	while ((cp->cpu_thread != cp->cpu_idle_thread) && (ntries > 0)) {
2450 		DELAY(drmach_cpu_delay);
2451 		ntries--;
2452 	}
2453 
2454 	DRMACH_PR("waited %d out of %d loops for cpu %d\n",
2455 		drmach_cpu_ntries - ntries, drmach_cpu_ntries, cpuid);
2456 
2457 	xt_one(cpuid, vtag_flushpage_tl1,
2458 		(uint64_t)drmach_shutdown_va, (uint64_t)ksfmmup);
2459 
2460 	return (0);
2461 }
2462 
2463 /*
2464  * A detaching CPU is xcalled with an xtrap to drmach_cpu_stop_self() after
2465  * it has been offlined. The function of this routine is to get the cpu
2466  * spinning in a safe place. The requirement is that the system will not
2467  * reference anything on the detaching board (memory and i/o is detached
2468  * elsewhere) and that the CPU not reference anything on any other board
2469  * in the system.  This isolation is required during and after the writes
2470  * to the domain masks to remove the board from the domain.
2471  *
2472  * To accomplish this isolation the following is done:
2473  *	1) Create a locked mapping to a location in BBSRAM where
2474  *	   the cpu will execute.
2475  *	2) Copy the target function (drmach_shutdown_asm) in which
2476  *	   the cpu will execute into BBSRAM.
2477  *	3) Jump into function with BBSRAM.
2478  *	   Function will:
2479  *	   3.1) Flush its Ecache (displacement).
2480  *	   3.2) Flush its Dcache with HW mechanism.
2481  *	   3.3) Flush its Icache with HW mechanism.
2482  *	   3.4) Flush all valid and _unlocked_ D-TLB entries.
2483  *	   3.5) Flush all valid and _unlocked_ I-TLB entries.
2484  *	   3.6) Clear xt_mb to signal completion. Note: cache line is
2485  *		recovered by drmach_cpu_poweroff().
2486  *	4) Jump into a tight loop.
2487  */
2488 #define	DRMACH_BBSRAM_OFFSET	0x1000
2489 
2490 static void
2491 drmach_cpu_stop_self(void)
2492 {
2493 	int		cpuid = (int)CPU->cpu_id;
2494 	tte_t		tte;
2495 	volatile uint_t	*src, *dst;
2496 	uint_t		funclen;
2497 	uint64_t	bbsram_pa, bbsram_offset;
2498 	uint_t		bbsram_pfn;
2499 	uint64_t	bbsram_addr;
2500 	void		(*bbsram_func)(uint64_t);
2501 	extern void	drmach_shutdown_asm(uint64_t);
2502 	extern void	drmach_shutdown_asm_end(void);
2503 
2504 	funclen = (uint_t)drmach_shutdown_asm_end - (uint_t)drmach_shutdown_asm;
2505 	ASSERT(funclen <= MMU_PAGESIZE);
2506 	/*
2507 	 * We'll start from the 0th's base.
2508 	 */
2509 	bbsram_pa = STARFIRE_UPAID2UPS(cpuid) | STARFIRE_PSI_BASE;
2510 	bbsram_offset = bbsram_pa | 0xfe0ULL;
2511 	bbsram_pa += ldphysio(bbsram_offset) + DRMACH_BBSRAM_OFFSET;
2512 
2513 	bbsram_pfn = (uint_t)(bbsram_pa >> MMU_PAGESHIFT);
2514 
2515 	bbsram_addr = (uint64_t)drmach_shutdown_va;
2516 	drmach_shutdown_asm_mbox->estack = bbsram_addr + (uint64_t)funclen;
2517 
2518 	tte.tte_inthi = TTE_VALID_INT | TTE_SZ_INT(TTE8K) |
2519 			TTE_PFN_INTHI(bbsram_pfn);
2520 	tte.tte_intlo = TTE_PFN_INTLO(bbsram_pfn) |
2521 			TTE_HWWR_INT | TTE_PRIV_INT | TTE_LCK_INT;
2522 	sfmmu_dtlb_ld_kva(drmach_shutdown_va, &tte);	/* load dtlb */
2523 	sfmmu_itlb_ld_kva(drmach_shutdown_va, &tte);	/* load itlb */
2524 
2525 	for (src = (uint_t *)drmach_shutdown_asm, dst = (uint_t *)bbsram_addr;
2526 		src < (uint_t *)drmach_shutdown_asm_end; src++, dst++)
2527 		*dst = *src;
2528 
2529 	bbsram_func = (void (*)())bbsram_addr;
2530 	drmach_shutdown_asm_mbox->flushaddr = ecache_flushaddr;
2531 	drmach_shutdown_asm_mbox->size = (cpunodes[cpuid].ecache_size << 1);
2532 	drmach_shutdown_asm_mbox->linesize = cpunodes[cpuid].ecache_linesize;
2533 	drmach_shutdown_asm_mbox->physaddr
2534 				    = va_to_pa((void *)&drmach_xt_mb[cpuid]);
2535 
2536 	/*
2537 	 * Signal to drmach_cpu_poweroff() is via drmach_xt_mb cleared
2538 	 * by asm code
2539 	 */
2540 
2541 	(*bbsram_func)(va_to_pa((void *)drmach_shutdown_asm_mbox));
2542 }
2543 
2544 static void
2545 drmach_cpu_shutdown_self(void)
2546 {
2547 	cpu_t		*cp = CPU;
2548 	int		cpuid = cp->cpu_id;
2549 	extern void	flush_windows(void);
2550 
2551 	flush_windows();
2552 
2553 	(void) spl8();
2554 
2555 	ASSERT(cp->cpu_intr_actv == 0);
2556 	ASSERT(cp->cpu_thread == cp->cpu_idle_thread ||
2557 	    cp->cpu_thread == cp->cpu_startup_thread);
2558 
2559 	cp->cpu_flags = CPU_OFFLINE | CPU_QUIESCED | CPU_POWEROFF;
2560 
2561 	drmach_cpu_stop_self();
2562 
2563 	cmn_err(CE_PANIC, "CPU %d FAILED TO SHUTDOWN", cpuid);
2564 }
2565 
2566 /* a helper routine to keep the math in one place */
2567 static processorid_t
2568 drmach_cpu_calc_id(drmach_device_t *dp)
2569 {
2570 	return (dp->bp->bnum * MAX_CPU_UNITS_PER_BOARD + dp->unum);
2571 }
2572 
2573 /*
2574  * Move bootproc (SIGBCPU) to another cpu.  If dst_cpu is NULL, a
2575  * destination cpu is chosen from the set of cpus not located on the
2576  * same board as the current bootproc cpu.
2577  */
2578 static sbd_error_t *
2579 drmach_cpu_juggle_bootproc(drmach_device_t *dst_cpu)
2580 {
2581 	processorid_t	 cpuid;
2582 	struct cpu	*cp;
2583 	sbd_error_t	*err;
2584 	int		 rv;
2585 
2586 	ASSERT(MUTEX_HELD(&cpu_lock));
2587 
2588 	/* dst_cpu is NULL when target cpu is unspecified. So, pick one. */
2589 	if (dst_cpu == NULL) {
2590 		int avoid_board = DRMACH_CPUID2BNUM(SIGBCPU->cpu_id);
2591 		int max_cpuid = MAX_BOARDS * MAX_CPU_UNITS_PER_BOARD;
2592 
2593 		for (cpuid = 0; cpuid < max_cpuid; cpuid++)
2594 			if (DRMACH_CPUID2BNUM(cpuid) != avoid_board) {
2595 				cp = cpu_get(cpuid);
2596 				if (cp != NULL && cpu_is_online(cp))
2597 					break;
2598 			}
2599 
2600 		if (cpuid == max_cpuid) {
2601 			err = drerr_new(1, ESTF_JUGGLE, NULL);
2602 			return (err);
2603 		}
2604 
2605 		/* else, cp points to the selected target cpu */
2606 	} else {
2607 		cpuid = drmach_cpu_calc_id(dst_cpu);
2608 
2609 		if ((cp = cpu_get(cpuid)) == NULL) {
2610 			err = drerr_new(1, ESTF_NODEV, "%s::%s",
2611 				dst_cpu->bp->cm.name, dst_cpu->cm.name);
2612 			return (err);
2613 		}
2614 
2615 		if (cpuid == SIGBCPU->cpu_id) {
2616 			cmn_err(CE_WARN,
2617 				"SIGBCPU(%d) same as new selection(%d)",
2618 				SIGBCPU->cpu_id, cpuid);
2619 
2620 			/* technically not an error, but a no-op */
2621 			return (NULL);
2622 		}
2623 	}
2624 
2625 	cmn_err(CE_NOTE, "?relocating SIGBCPU from %d to %d",
2626 		SIGBCPU->cpu_id, cpuid);
2627 
2628 	DRMACH_PR("moving SIGBCPU to CPU %d\n", cpuid);
2629 
2630 	/*
2631 	 * Tell OBP to initialize cvc-offset field of new CPU0
2632 	 * so that it's in sync with OBP and cvc_server
2633 	 */
2634 	prom_starfire_init_console(cpuid);
2635 
2636 	/*
2637 	 * Assign cvc to new cpu0's bbsram for I/O.  This has to be
2638 	 * done BEFORE cpu0 is moved via obp, since this logic
2639 	 * will cause obp_helper to switch to a different bbsram for
2640 	 * cvc I/O.  We don't want cvc writing to a buffer from which
2641 	 * nobody will pick up the data!
2642 	 */
2643 	cvc_assign_iocpu(cpuid);
2644 
2645 	rv = prom_starfire_move_cpu0(cpuid);
2646 
2647 	if (rv == 0) {
2648 		SIGBCPU = cp;
2649 
2650 		DRMACH_PR("successfully juggled to CPU %d\n", cpuid);
2651 		return (NULL);
2652 	} else {
2653 		DRMACH_PR("prom error: prom_starfire_move_cpu0(%d) "
2654 			"returned %d\n", cpuid, rv);
2655 
2656 		/*
2657 		 * The move failed, hopefully obp_helper is still back
2658 		 * at the old bootproc.  Move cvc back there.
2659 		 */
2660 		cvc_assign_iocpu(SIGBCPU->cpu_id);
2661 
2662 
2663 		err = drerr_new(1, ESTF_MOVESIGB, "CPU %d", cpuid);
2664 		return (err);
2665 	}
2666 	/*NOTREACHED*/
2667 }
2668 
2669 static sbd_error_t *
2670 drmach_cpu_release(drmachid_t id)
2671 {
2672 	drmach_device_t	*dp;
2673 	processorid_t	 cpuid;
2674 	struct cpu	*cp;
2675 	sbd_error_t	*err;
2676 
2677 	if (!DRMACH_IS_CPU_ID(id))
2678 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2679 	dp = id;
2680 	cpuid = drmach_cpu_calc_id(dp);
2681 
2682 	ASSERT(MUTEX_HELD(&cpu_lock));
2683 
2684 	cp = cpu_get(cpuid);
2685 	if (cp == NULL)
2686 		err = DRMACH_INTERNAL_ERROR();
2687 	else if (SIGBCPU->cpu_id == cp->cpu_id)
2688 		err = drmach_cpu_juggle_bootproc(NULL);
2689 	else
2690 		err = NULL;
2691 
2692 	return (err);
2693 }
2694 
2695 static sbd_error_t *
2696 drmach_cpu_status(drmachid_t id, drmach_status_t *stat)
2697 {
2698 	drmach_device_t *dp;
2699 
2700 	ASSERT(DRMACH_IS_CPU_ID(id));
2701 	dp = id;
2702 
2703 	stat->assigned = dp->bp->assigned;
2704 	stat->powered = dp->bp->powered;
2705 	mutex_enter(&cpu_lock);
2706 	stat->configured = (cpu_get(drmach_cpu_calc_id(dp)) != NULL);
2707 	mutex_exit(&cpu_lock);
2708 	stat->busy = dp->busy;
2709 	strncpy(stat->type, dp->type, sizeof (stat->type));
2710 	stat->info[0] = '\0';
2711 
2712 	return (NULL);
2713 }
2714 
2715 sbd_error_t *
2716 drmach_cpu_disconnect(drmachid_t id)
2717 {
2718 	drmach_device_t	*cpu;
2719 	int		 cpuid;
2720 	int		 ntries;
2721 	int		 p;
2722 	u_longlong_t	 pc_addr;
2723 	uchar_t		 rvalue;
2724 
2725 	if (!DRMACH_IS_CPU_ID(id))
2726 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2727 	cpu = id;
2728 
2729 	cpuid = drmach_cpu_calc_id(cpu);
2730 	if (SIGBCPU->cpu_id == cpuid) {
2731 		/* this cpu is SIGBCPU, can't disconnect */
2732 		return (drerr_new(1, ESTF_HASSIGB, "%s::%s",
2733 				cpu->bp->cm.name, cpu->cm.name));
2734 	}
2735 
2736 	/*
2737 	 * Make sure SIGBST_DETACHED is set before
2738 	 * mapping out the sig block.
2739 	 */
2740 	ntries = drmach_cpu_ntries;
2741 	while (!drmach_cpu_obp_is_detached(cpuid) && ntries) {
2742 		DELAY(drmach_cpu_delay);
2743 		ntries--;
2744 	}
2745 	if (!drmach_cpu_obp_is_detached(cpuid)) {
2746 		cmn_err(CE_WARN, "failed to mark cpu %d detached in sigblock",
2747 			cpuid);
2748 	}
2749 
2750 	/* map out signature block */
2751 	if (CPU_SGN_EXISTS(cpuid)) {
2752 		CPU_SGN_MAPOUT(cpuid);
2753 	}
2754 
2755 	/*
2756 	 * We now PC IDLE the processor to guarantee we
2757 	 * stop any transactions from coming from it.
2758 	 */
2759 	p = cpu->unum & 1;
2760 	pc_addr = STARFIRE_BB_PC_ADDR(cpu->bp->bnum, cpu->unum, 0);
2761 
2762 	DRMACH_PR("PC idle cpu %d (addr = 0x%llx, port = %d, p = %d)",
2763 		drmach_cpu_calc_id(cpu), pc_addr, cpu->unum, p);
2764 
2765 	rvalue = ldbphysio(pc_addr);
2766 	rvalue |= STARFIRE_BB_PC_IDLE(p);
2767 	stbphysio(pc_addr, rvalue);
2768 	DELAY(50000);
2769 
2770 	return (NULL);
2771 }
2772 
2773 sbd_error_t *
2774 drmach_cpu_get_id(drmachid_t id, processorid_t *cpuid)
2775 {
2776 	drmach_device_t *cpu;
2777 
2778 	if (!DRMACH_IS_CPU_ID(id))
2779 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2780 	cpu = id;
2781 
2782 	*cpuid = drmach_cpu_calc_id(cpu);
2783 	return (NULL);
2784 }
2785 
2786 sbd_error_t *
2787 drmach_cpu_get_impl(drmachid_t id, int *ip)
2788 {
2789 	drmach_device_t *cpu;
2790 	int		impl;
2791 
2792 	if (!DRMACH_IS_CPU_ID(id))
2793 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2794 
2795 	cpu = id;
2796 
2797 	if (drmach_node_get_prop(cpu->node, "implementation#", &impl) == -1) {
2798 		return (DRMACH_INTERNAL_ERROR());
2799 	}
2800 
2801 	*ip = impl;
2802 
2803 	return (NULL);
2804 }
2805 
2806 void
2807 drmach_cpu_flush_ecache_sync(void)
2808 {
2809 	ASSERT(curthread->t_bound_cpu == CPU);
2810 
2811 	/*
2812 	 * Now let's flush our ecache thereby removing all references
2813 	 * to the target (detaching) memory from all ecache's in
2814 	 * system.
2815 	 */
2816 	cpu_flush_ecache();
2817 
2818 	/*
2819 	 * Delay 100 usec out of paranoia to insure everything
2820 	 * (hardware queues) has drained before we start reprogramming
2821 	 * the hardware.
2822 	 */
2823 	DELAY(100);
2824 }
2825 
2826 sbd_error_t *
2827 drmach_get_dip(drmachid_t id, dev_info_t **dip)
2828 {
2829 	drmach_device_t	*dp;
2830 
2831 	if (!DRMACH_IS_DEVICE_ID(id))
2832 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2833 	dp = id;
2834 
2835 	*dip = drmach_node_get_dip(dp->node);
2836 	return (NULL);
2837 }
2838 
2839 sbd_error_t *
2840 drmach_io_is_attached(drmachid_t id, int *yes)
2841 {
2842 	drmach_device_t *dp;
2843 	dev_info_t	*dip;
2844 	int		state;
2845 
2846 	if (!DRMACH_IS_IO_ID(id))
2847 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2848 	dp = id;
2849 
2850 	dip = drmach_node_get_dip(dp->node);
2851 	if (dip == NULL) {
2852 		*yes = 0;
2853 		return (NULL);
2854 	}
2855 
2856 	state = ddi_get_devstate(dip);
2857 	*yes = (i_ddi_devi_attached(dip) || (state == DDI_DEVSTATE_UP));
2858 
2859 	return (NULL);
2860 }
2861 
2862 sbd_error_t *
2863 drmach_io_pre_release(drmachid_t id)
2864 {
2865 	if (!DRMACH_IS_IO_ID(id))
2866 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2867 	return (NULL);
2868 }
2869 
2870 static sbd_error_t *
2871 drmach_io_release(drmachid_t id)
2872 {
2873 	if (!DRMACH_IS_IO_ID(id))
2874 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2875 	return (NULL);
2876 }
2877 
2878 sbd_error_t *
2879 drmach_io_unrelease(drmachid_t id)
2880 {
2881 	if (!DRMACH_IS_IO_ID(id))
2882 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2883 	return (NULL);
2884 }
2885 
2886 /*ARGSUSED*/
2887 sbd_error_t *
2888 drmach_io_post_release(drmachid_t id)
2889 {
2890 	return (NULL);
2891 }
2892 
2893 /*ARGSUSED*/
2894 sbd_error_t *
2895 drmach_io_post_attach(drmachid_t id)
2896 {
2897 	return (NULL);
2898 }
2899 
2900 static sbd_error_t *
2901 drmach_io_status(drmachid_t id, drmach_status_t *stat)
2902 {
2903 	drmach_device_t *dp;
2904 	sbd_error_t	*err;
2905 	int		 configured;
2906 
2907 	ASSERT(DRMACH_IS_IO_ID(id));
2908 	dp = id;
2909 
2910 	err = drmach_io_is_attached(id, &configured);
2911 	if (err)
2912 		return (err);
2913 
2914 	stat->assigned = dp->bp->assigned;
2915 	stat->powered = dp->bp->powered;
2916 	stat->configured = (configured != 0);
2917 	stat->busy = dp->busy;
2918 	strncpy(stat->type, dp->type, sizeof (stat->type));
2919 	stat->info[0] = '\0';
2920 
2921 	return (NULL);
2922 }
2923 
2924 static sbd_error_t *
2925 drmach_mem_new(drmach_device_t *dp)
2926 {
2927 	static sbd_error_t *drmach_mem_release(drmachid_t);
2928 	static sbd_error_t *drmach_mem_status(drmachid_t, drmach_status_t *);
2929 
2930 	dp->unum = 0;
2931 	dp->cm.isa = (void *)drmach_mem_new;
2932 	dp->cm.release = drmach_mem_release;
2933 	dp->cm.status = drmach_mem_status;
2934 
2935 	snprintf(dp->cm.name, sizeof (dp->cm.name), "%s", dp->type);
2936 
2937 	return (NULL);
2938 }
2939 
2940 sbd_error_t *
2941 drmach_mem_add_span(drmachid_t id, uint64_t basepa, uint64_t size)
2942 {
2943 	pfn_t		basepfn = (pfn_t)(basepa >> PAGESHIFT);
2944 	pgcnt_t		npages = (pgcnt_t)(size >> PAGESHIFT);
2945 	pda_handle_t	ph;
2946 	int		rv;
2947 
2948 	ASSERT(size != 0);
2949 
2950 	if (!DRMACH_IS_MEM_ID(id))
2951 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2952 
2953 	rv = kcage_range_add(basepfn, npages, KCAGE_DOWN);
2954 	if (rv == ENOMEM) {
2955 		cmn_err(CE_WARN, "%ld megabytes not available to kernel cage",
2956 			(size == 0 ? 0 : size / MBYTE));
2957 	} else if (rv != 0) {
2958 		/* catch this in debug kernels */
2959 		ASSERT(0);
2960 
2961 		cmn_err(CE_WARN, "unexpected kcage_range_add"
2962 			" return value %d", rv);
2963 	}
2964 
2965 	/*
2966 	 * Update the PDA (post2obp) structure with the
2967 	 * range of the newly added memory.
2968 	 */
2969 	ph = drmach_pda_open();
2970 	if (ph != NULL) {
2971 		pda_mem_add_span(ph, basepa, size);
2972 		pda_close(ph);
2973 	}
2974 
2975 	return (NULL);
2976 }
2977 
2978 sbd_error_t *
2979 drmach_mem_del_span(drmachid_t id, uint64_t basepa, uint64_t size)
2980 {
2981 	drmach_device_t	*mem = id;
2982 	pfn_t		basepfn = (pfn_t)(basepa >> PAGESHIFT);
2983 	pgcnt_t		npages = (pgcnt_t)(size >> PAGESHIFT);
2984 	uint_t		mcreg;
2985 	sbd_error_t	*err;
2986 	pda_handle_t	ph;
2987 	int		rv;
2988 
2989 	err = drmach_read_mc_asr(id, &mcreg);
2990 	if (err)
2991 		return (err);
2992 	else if (mcreg & STARFIRE_MC_INTERLEAVE_MASK) {
2993 		return (drerr_new(1, ESTF_INTERBOARD, "%s::%s",
2994 				mem->bp->cm.name, mem->cm.name));
2995 	}
2996 
2997 	if (size > 0) {
2998 		rv = kcage_range_delete_post_mem_del(basepfn, npages);
2999 		if (rv != 0) {
3000 			cmn_err(CE_WARN,
3001 			    "unexpected kcage_range_delete_post_mem_del"
3002 			    " return value %d", rv);
3003 			return (DRMACH_INTERNAL_ERROR());
3004 		}
3005 	}
3006 
3007 	/*
3008 	 * Update the PDA (post2obp) structure with the
3009 	 * range of removed memory.
3010 	 */
3011 	ph = drmach_pda_open();
3012 	if (ph != NULL) {
3013 		if (size > 0)
3014 			pda_mem_del_span(ph, basepa, size);
3015 
3016 		/* update PDA to board's new mc register settings */
3017 		pda_mem_sync(ph, mem->bp->bnum, 0);
3018 
3019 		pda_close(ph);
3020 	}
3021 
3022 	return (NULL);
3023 }
3024 
3025 /* support routine for enable and disable */
3026 static sbd_error_t *
3027 drmach_mem_update_interconnect(drmachid_t id, uint_t mcreg)
3028 {
3029 	drmach_device_t	*dp;
3030 	pda_handle_t	 ph;
3031 	int		 b;
3032 
3033 	if (!DRMACH_IS_MEM_ID(id))
3034 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3035 	dp = id;
3036 
3037 	ph = drmach_pda_open();
3038 	if (ph == NULL)
3039 		return (DRMACH_INTERNAL_ERROR());
3040 
3041 	for (b = 0; b < MAX_BOARDS; b++) {
3042 		int		p;
3043 		int		rv;
3044 		ushort_t	bda_proc, bda_ioc;
3045 		board_desc_t	*bdesc;
3046 
3047 		if (pda_board_present(ph, b) == 0)
3048 			continue;
3049 
3050 		bdesc = (board_desc_t *)pda_get_board_info(ph, b);
3051 
3052 		/*
3053 		 * Update PCs for CPUs.
3054 		 */
3055 
3056 		/* make sure definition in platmod is in sync with pda */
3057 		ASSERT(MAX_PROCMODS == MAX_CPU_UNITS_PER_BOARD);
3058 
3059 		bda_proc = bdesc->bda_proc;
3060 		for (p = 0; p < MAX_PROCMODS; p++) {
3061 			if (BDA_NBL(bda_proc, p) != BDAN_GOOD)
3062 				continue;
3063 
3064 			rv = pc_madr_add(b, dp->bp->bnum, p, mcreg);
3065 			if (rv) {
3066 				pda_close(ph);
3067 				return (DRMACH_INTERNAL_ERROR());
3068 			}
3069 		}
3070 
3071 		/*
3072 		 * Update PCs for IOCs.
3073 		 */
3074 
3075 		/* make sure definition in platmod is in sync with pda */
3076 		ASSERT(MAX_IOCS == MAX_IO_UNITS_PER_BOARD);
3077 
3078 		bda_ioc = bdesc->bda_ioc;
3079 		for (p = 0; p < MAX_IOCS; p++) {
3080 			if (BDA_NBL(bda_ioc, p) != BDAN_GOOD)
3081 				continue;
3082 
3083 			rv = pc_madr_add(b, dp->bp->bnum, p + 4, mcreg);
3084 			if (rv) {
3085 				pda_close(ph);
3086 				return (DRMACH_INTERNAL_ERROR());
3087 			}
3088 		}
3089 	}
3090 
3091 	pda_close(ph);
3092 	return (NULL);
3093 }
3094 
3095 sbd_error_t *
3096 drmach_mem_disable(drmachid_t id)
3097 {
3098 	sbd_error_t	*err;
3099 	uint_t		 mcreg;
3100 
3101 	err = drmach_read_mc_asr(id, &mcreg);
3102 	if (err == NULL) {
3103 		ASSERT(mcreg & STARFIRE_MC_MEM_PRESENT_MASK);
3104 
3105 		/* Turn off presence bit. */
3106 		mcreg &= ~STARFIRE_MC_MEM_PRESENT_MASK;
3107 
3108 		err = drmach_mem_update_interconnect(id, mcreg);
3109 		if (err == NULL)
3110 			err = drmach_write_mc_asr(id, mcreg);
3111 	}
3112 
3113 	return (err);
3114 }
3115 
3116 sbd_error_t *
3117 drmach_mem_enable(drmachid_t id)
3118 {
3119 	sbd_error_t	*err;
3120 	uint_t		 mcreg;
3121 
3122 	err = drmach_read_mc_asr(id, &mcreg);
3123 	if (err == NULL) {
3124 		mcreg |= STARFIRE_MC_MEM_PRESENT_MASK;
3125 
3126 		err = drmach_write_mc_asr(id, mcreg);
3127 		if (err == NULL)
3128 			err = drmach_mem_update_interconnect(id, mcreg);
3129 	}
3130 
3131 	return (err);
3132 }
3133 
3134 sbd_error_t *
3135 drmach_mem_get_alignment(drmachid_t id, uint64_t *mask)
3136 {
3137 	drmach_device_t	*mem;
3138 	sbd_error_t	*err;
3139 	pnode_t		 nodeid;
3140 
3141 	if (!DRMACH_IS_MEM_ID(id))
3142 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3143 	mem = id;
3144 
3145 	nodeid = drmach_node_get_dnode(mem->node);
3146 	if (nodeid == OBP_NONODE || nodeid == OBP_BADNODE)
3147 		err = DRMACH_INTERNAL_ERROR();
3148 	else {
3149 		uint64_t size;
3150 
3151 		size = mc_get_alignment_mask(nodeid);
3152 		if (size == (uint64_t)-1)
3153 			err = DRMACH_INTERNAL_ERROR();
3154 		else {
3155 			*mask = size - 1;
3156 			err = NULL;
3157 		}
3158 	}
3159 
3160 	return (err);
3161 }
3162 
3163 sbd_error_t *
3164 drmach_mem_get_base_physaddr(drmachid_t id, uint64_t *pa)
3165 {
3166 	sbd_error_t	*err;
3167 	uint_t		 mcreg;
3168 
3169 	err = drmach_read_mc_asr(id, &mcreg);
3170 	if (err == NULL)
3171 		*pa = mc_asr_to_pa(mcreg);
3172 
3173 	return (err);
3174 }
3175 
3176 /*
3177  * Use of this routine after copy/rename will yield incorrect results,
3178  * because the OBP MEMAVAIL property will not correctly reflect the
3179  * programming of the MCs.
3180  */
3181 sbd_error_t *
3182 drmach_mem_get_memlist(drmachid_t id, struct memlist **ml)
3183 {
3184 	drmach_device_t	*mem;
3185 	int		rv, i, rlen, rblks;
3186 	sbd_error_t	*err;
3187 	struct memlist	*mlist;
3188 	struct sf_memunit_regspec *rlist;
3189 
3190 	if (!DRMACH_IS_MEM_ID(id))
3191 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3192 	mem = id;
3193 
3194 	err = drmach_device_get_proplen(mem, "dr-available", &rlen);
3195 	if (err)
3196 		return (err);
3197 
3198 	rlist = kmem_zalloc(rlen, KM_SLEEP);
3199 
3200 	err = drmach_device_get_prop(mem, "dr-available", rlist);
3201 	if (err) {
3202 		kmem_free(rlist, rlen);
3203 		return (err);
3204 	}
3205 
3206 	mlist = NULL;
3207 	rblks = rlen / sizeof (struct sf_memunit_regspec);
3208 	for (i = 0; i < rblks; i++) {
3209 		uint64_t	addr, size;
3210 
3211 		addr  = (uint64_t)rlist[i].regspec_addr_hi << 32;
3212 		addr |= (uint64_t)rlist[i].regspec_addr_lo;
3213 		size  = (uint64_t)rlist[i].regspec_size_hi << 32;
3214 		size |= (uint64_t)rlist[i].regspec_size_lo;
3215 
3216 		mlist = memlist_add_span(mlist, addr, size);
3217 	}
3218 
3219 	kmem_free(rlist, rlen);
3220 
3221 	/*
3222 	 * Make sure the incoming memlist doesn't already
3223 	 * intersect with what's present in the system (phys_install).
3224 	 */
3225 	memlist_read_lock();
3226 	rv = memlist_intersect(phys_install, mlist);
3227 	memlist_read_unlock();
3228 	if (rv) {
3229 #ifdef DEBUG
3230 		DRMACH_PR("OBP derived memlist intersects"
3231 			" with phys_install\n");
3232 		memlist_dump(mlist);
3233 
3234 		DRMACH_PR("phys_install memlist:\n");
3235 		memlist_dump(phys_install);
3236 #endif
3237 
3238 		memlist_delete(mlist);
3239 		return (DRMACH_INTERNAL_ERROR());
3240 	}
3241 
3242 #ifdef DEBUG
3243 	DRMACH_PR("OBP derived memlist:");
3244 	memlist_dump(mlist);
3245 #endif
3246 
3247 	*ml = mlist;
3248 	return (NULL);
3249 }
3250 
3251 sbd_error_t *
3252 drmach_mem_get_size(drmachid_t id, uint64_t *bytes)
3253 {
3254 	drmach_device_t	*mem;
3255 	pda_handle_t	ph;
3256 	pgcnt_t		npages;
3257 
3258 	if (!DRMACH_IS_MEM_ID(id))
3259 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3260 	mem = id;
3261 
3262 	ph = drmach_pda_open();
3263 	if (ph == NULL)
3264 		return (DRMACH_INTERNAL_ERROR());
3265 
3266 	npages = pda_get_mem_size(ph, mem->bp->bnum);
3267 	*bytes = (uint64_t)npages << PAGESHIFT;
3268 
3269 	pda_close(ph);
3270 	return (NULL);
3271 }
3272 
3273 sbd_error_t *
3274 drmach_mem_get_slice_size(drmachid_t id, uint64_t *bytes)
3275 {
3276 	if (!DRMACH_IS_MEM_ID(id))
3277 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3278 
3279 	*bytes = mc_get_mem_alignment();
3280 	return (NULL);
3281 }
3282 
3283 /* field debugging tool */
3284 processorid_t drmach_mem_cpu_affinity_nail = 0;
3285 
3286 processorid_t
3287 drmach_mem_cpu_affinity(drmachid_t id)
3288 {
3289 	drmach_device_t	*mp;
3290 	drmach_board_t	*bp;
3291 	processorid_t	 cpuid;
3292 
3293 	if (!DRMACH_IS_MEM_ID(id))
3294 		return (CPU_CURRENT);
3295 
3296 	if (drmach_mem_cpu_affinity_nail) {
3297 		cpuid = drmach_mem_cpu_affinity_nail;
3298 
3299 		if (cpuid < 0 || cpuid > NCPU)
3300 			return (CPU_CURRENT);
3301 
3302 		mutex_enter(&cpu_lock);
3303 		if (cpu[cpuid] == NULL || !CPU_ACTIVE(cpu[cpuid]))
3304 			cpuid = CPU_CURRENT;
3305 		mutex_exit(&cpu_lock);
3306 
3307 		return (cpuid);
3308 	}
3309 
3310 	/* try to choose a proc on the target board */
3311 	mp = id;
3312 	bp = mp->bp;
3313 	if (bp->devices) {
3314 		int		rv;
3315 		int		d_idx;
3316 		drmachid_t	d_id;
3317 
3318 		rv = drmach_array_first(bp->devices, &d_idx, &d_id);
3319 		while (rv == 0) {
3320 			if (DRMACH_IS_CPU_ID(d_id)) {
3321 				cpuid = drmach_cpu_calc_id(d_id);
3322 
3323 				mutex_enter(&cpu_lock);
3324 				if (cpu[cpuid] && CPU_ACTIVE(cpu[cpuid])) {
3325 					mutex_exit(&cpu_lock);
3326 					DRMACH_PR("drmach_mem_cpu_affinity: "
3327 					    "selected cpuid=%d\n", cpuid);
3328 					return (cpuid);
3329 				} else {
3330 					mutex_exit(&cpu_lock);
3331 				}
3332 			}
3333 
3334 			rv = drmach_array_next(bp->devices, &d_idx, &d_id);
3335 		}
3336 	}
3337 
3338 	/* otherwise, this proc, wherever it is */
3339 	DRMACH_PR("drmach_mem_cpu_affinity: using default CPU_CURRENT\n");
3340 
3341 	return (CPU_CURRENT);
3342 }
3343 
3344 static sbd_error_t *
3345 drmach_mem_release(drmachid_t id)
3346 {
3347 	if (!DRMACH_IS_MEM_ID(id))
3348 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3349 	return (NULL);
3350 }
3351 
3352 static sbd_error_t *
3353 drmach_mem_status(drmachid_t id, drmach_status_t *stat)
3354 {
3355 	drmach_device_t *dp;
3356 	sbd_error_t	*err;
3357 	uint64_t	 pa, slice_size;
3358 	struct memlist	*ml;
3359 
3360 	ASSERT(DRMACH_IS_MEM_ID(id));
3361 	dp = id;
3362 
3363 	/* get starting physical address of target memory */
3364 	err = drmach_mem_get_base_physaddr(id, &pa);
3365 	if (err)
3366 		return (err);
3367 
3368 	/* round down to slice boundary */
3369 	slice_size = mc_get_mem_alignment();
3370 	pa &= ~ (slice_size - 1);
3371 
3372 	/* stop at first span that is in slice */
3373 	memlist_read_lock();
3374 	for (ml = phys_install; ml; ml = ml->next)
3375 		if (ml->address >= pa && ml->address < pa + slice_size)
3376 			break;
3377 	memlist_read_unlock();
3378 
3379 	stat->assigned = dp->bp->assigned;
3380 	stat->powered = dp->bp->powered;
3381 	stat->configured = (ml != NULL);
3382 	stat->busy = dp->busy;
3383 	strncpy(stat->type, dp->type, sizeof (stat->type));
3384 	stat->info[0] = '\0';
3385 
3386 	return (NULL);
3387 }
3388 
3389 static int
3390 drmach_detach_board(void *arg)
3391 {
3392 	cpuset_t	cset;
3393 	int		retval;
3394 	drmach_board_t	*bp = (drmach_board_t *)arg;
3395 
3396 	cset = cpu_ready_set;
3397 	promsafe_xc_attention(cset);
3398 
3399 	retval = prom_starfire_rm_brd(bp->bnum);
3400 
3401 	xc_dismissed(cset);
3402 
3403 	return (retval);
3404 }
3405 
3406 sbd_error_t *
3407 drmach_board_deprobe(drmachid_t id)
3408 {
3409 	drmach_board_t	*bp;
3410 	int		 retval;
3411 
3412 	if (!DRMACH_IS_BOARD_ID(id))
3413 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3414 	bp = id;
3415 
3416 	cmn_err(CE_CONT, "DR: PROM detach board %d\n", bp->bnum);
3417 
3418 	retval = prom_tree_update(drmach_detach_board, bp);
3419 
3420 	if (retval == 0)
3421 		return (NULL);
3422 	else {
3423 		cmn_err(CE_WARN, "prom error: prom_starfire_rm_brd(%d) "
3424 			"returned %d", bp->bnum, retval);
3425 		return (drerr_new(1, ESTF_DEPROBE, "%s", bp->cm.name));
3426 	}
3427 }
3428 
3429 /*ARGSUSED*/
3430 static sbd_error_t *
3431 drmach_pt_juggle_bootproc(drmachid_t id, drmach_opts_t *opts)
3432 {
3433 	drmach_device_t	*cpu;
3434 	sbd_error_t	*err;
3435 
3436 	if (!DRMACH_IS_CPU_ID(id))
3437 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3438 	cpu = id;
3439 
3440 	mutex_enter(&cpu_lock);
3441 
3442 	err = drmach_cpu_juggle_bootproc(cpu);
3443 
3444 	mutex_exit(&cpu_lock);
3445 
3446 	return (err);
3447 }
3448 
3449 /*ARGSUSED*/
3450 static sbd_error_t *
3451 drmach_pt_dump_pdainfo(drmachid_t id, drmach_opts_t *opts)
3452 {
3453 	drmach_board_t	*bp;
3454 	int		board;
3455 	int		i;
3456 	pda_handle_t	ph;
3457 	board_desc_t	*bdesc;
3458 
3459 	if (!DRMACH_IS_BOARD_ID(id))
3460 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3461 	bp = id;
3462 	board = bp->bnum;
3463 
3464 	ph = drmach_pda_open();
3465 	if (ph == NULL)
3466 		return (DRMACH_INTERNAL_ERROR());
3467 
3468 	if (pda_board_present(ph, board) == 0) {
3469 		cmn_err(CE_CONT, "board %d is MISSING\n", board);
3470 		pda_close(ph);
3471 		return (DRMACH_INTERNAL_ERROR());
3472 	}
3473 
3474 	cmn_err(CE_CONT, "board %d is PRESENT\n", board);
3475 
3476 	bdesc = (board_desc_t *)pda_get_board_info(ph, board);
3477 	if (bdesc == NULL) {
3478 		cmn_err(CE_CONT,
3479 			"no board descriptor found for board %d\n",
3480 			board);
3481 		pda_close(ph);
3482 		return (DRMACH_INTERNAL_ERROR());
3483 	}
3484 
3485 	/* make sure definition in platmod is in sync with pda */
3486 	ASSERT(MAX_PROCMODS == MAX_CPU_UNITS_PER_BOARD);
3487 
3488 	for (i = 0; i < MAX_PROCMODS; i++) {
3489 		if (BDA_NBL(bdesc->bda_proc, i) == BDAN_GOOD)
3490 			cmn_err(CE_CONT,
3491 				"proc %d.%d PRESENT\n", board, i);
3492 		else
3493 			cmn_err(CE_CONT,
3494 				"proc %d.%d MISSING\n", board, i);
3495 	}
3496 
3497 	for (i = 0; i < MAX_MGROUPS; i++) {
3498 		if (BDA_NBL(bdesc->bda_mgroup, i) == BDAN_GOOD)
3499 			cmn_err(CE_CONT,
3500 				"mgroup %d.%d PRESENT\n", board, i);
3501 		else
3502 			cmn_err(CE_CONT,
3503 				"mgroup %d.%d MISSING\n", board, i);
3504 	}
3505 
3506 	/* make sure definition in platmod is in sync with pda */
3507 	ASSERT(MAX_IOCS == MAX_IO_UNITS_PER_BOARD);
3508 
3509 	for (i = 0; i < MAX_IOCS; i++) {
3510 		int	s;
3511 
3512 		if (BDA_NBL(bdesc->bda_ioc, i) == BDAN_GOOD) {
3513 			cmn_err(CE_CONT,
3514 				"ioc %d.%d PRESENT\n", board, i);
3515 			for (s = 0; s < MAX_SLOTS_PER_IOC; s++) {
3516 				if (BDA_NBL(bdesc->bda_ios[i], s) != BDAN_GOOD)
3517 					continue;
3518 				cmn_err(CE_CONT,
3519 					"..scard %d.%d.%d PRESENT\n",
3520 					board, i, s);
3521 			}
3522 		} else {
3523 			cmn_err(CE_CONT,
3524 				"ioc %d.%d MISSING\n",
3525 				board, i);
3526 		}
3527 	}
3528 
3529 	cmn_err(CE_CONT,
3530 		"board %d memsize = %d pages\n",
3531 		board, pda_get_mem_size(ph, board));
3532 
3533 	pda_close(ph);
3534 
3535 	return (NULL);
3536 }
3537 
3538 /*ARGSUSED*/
3539 sbd_error_t *
3540 drmach_pt_readmem(drmachid_t id, drmach_opts_t *opts)
3541 {
3542 	struct memlist	*ml;
3543 	uint64_t	src_pa;
3544 	uint64_t	dst_pa;
3545 	uint64_t	dst;
3546 
3547 	dst_pa = va_to_pa(&dst);
3548 
3549 	memlist_read_lock();
3550 	for (ml = phys_install; ml; ml = ml->next) {
3551 		uint64_t	nbytes;
3552 
3553 		src_pa = ml->address;
3554 		nbytes = ml->size;
3555 
3556 		while (nbytes != 0ull) {
3557 
3558 			/* copy 32 bytes at arc_pa to dst_pa */
3559 			bcopy32_il(src_pa, dst_pa);
3560 
3561 			/* increment by 32 bytes */
3562 			src_pa += (4 * sizeof (uint64_t));
3563 
3564 			/* decrement by 32 bytes */
3565 			nbytes -= (4 * sizeof (uint64_t));
3566 		}
3567 	}
3568 	memlist_read_unlock();
3569 
3570 	return (NULL);
3571 }
3572 
3573 static struct {
3574 	const char	*name;
3575 	sbd_error_t	*(*handler)(drmachid_t id, drmach_opts_t *opts);
3576 } drmach_pt_arr[] = {
3577 	{ "juggle",		drmach_pt_juggle_bootproc	},
3578 	{ "pda",		drmach_pt_dump_pdainfo		},
3579 	{ "readmem",		drmach_pt_readmem		},
3580 
3581 	/* the following line must always be last */
3582 	{ NULL,			NULL				}
3583 };
3584 
3585 /*ARGSUSED*/
3586 sbd_error_t *
3587 drmach_passthru(drmachid_t id, drmach_opts_t *opts)
3588 {
3589 	int		i;
3590 	sbd_error_t	*err;
3591 
3592 	i = 0;
3593 	while (drmach_pt_arr[i].name != NULL) {
3594 		int len = strlen(drmach_pt_arr[i].name);
3595 
3596 		if (strncmp(drmach_pt_arr[i].name, opts->copts, len) == 0)
3597 			break;
3598 
3599 		i += 1;
3600 	}
3601 
3602 	if (drmach_pt_arr[i].name == NULL)
3603 		err = drerr_new(0, ESTF_UNKPTCMD, opts->copts);
3604 	else
3605 		err = (*drmach_pt_arr[i].handler)(id, opts);
3606 
3607 	return (err);
3608 }
3609 
3610 sbd_error_t *
3611 drmach_release(drmachid_t id)
3612 {
3613 	drmach_common_t *cp;
3614 	if (!DRMACH_IS_DEVICE_ID(id))
3615 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3616 	cp = id;
3617 
3618 	return (cp->release(id));
3619 }
3620 
3621 sbd_error_t *
3622 drmach_status(drmachid_t id, drmach_status_t *stat)
3623 {
3624 	drmach_common_t *cp;
3625 
3626 	if (!DRMACH_IS_ID(id))
3627 		return (drerr_new(0, ESTF_NOTID, NULL));
3628 	cp = id;
3629 
3630 	return (cp->status(id, stat));
3631 }
3632 
3633 sbd_error_t *
3634 drmach_unconfigure(drmachid_t id, int flags)
3635 {
3636 	drmach_device_t	*dp;
3637 	pnode_t		 nodeid;
3638 	dev_info_t	*dip, *fdip = NULL;
3639 
3640 	if (!DRMACH_IS_DEVICE_ID(id))
3641 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3642 
3643 	dp = id;
3644 
3645 	nodeid = drmach_node_get_dnode(dp->node);
3646 	if (nodeid == OBP_NONODE)
3647 		return (DRMACH_INTERNAL_ERROR());
3648 
3649 	dip = e_ddi_nodeid_to_dip(nodeid);
3650 	if (dip == NULL)
3651 		return (NULL);
3652 
3653 	/*
3654 	 * Branch already held, so hold acquired in
3655 	 * e_ddi_nodeid_to_dip() can be released
3656 	 */
3657 	ddi_release_devi(dip);
3658 
3659 	if (flags & DEVI_BRANCH_DESTROY)
3660 		flags |= DEVI_BRANCH_EVENT;
3661 
3662 	/*
3663 	 * Force flag is no longer necessary. See starcat/io/drmach.c
3664 	 * for details.
3665 	 */
3666 	ASSERT(e_ddi_branch_held(dip));
3667 	if (e_ddi_branch_unconfigure(dip, &fdip, flags)) {
3668 		sbd_error_t	*err;
3669 		char		*path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3670 
3671 		/*
3672 		 * If non-NULL, fdip is returned held and must be released.
3673 		 */
3674 		if (fdip != NULL) {
3675 			(void) ddi_pathname(fdip, path);
3676 			ndi_rele_devi(fdip);
3677 		} else {
3678 			(void) ddi_pathname(dip, path);
3679 		}
3680 
3681 		err = drerr_new(1, ESTF_DRVFAIL, path);
3682 
3683 		kmem_free(path, MAXPATHLEN);
3684 
3685 		return (err);
3686 	}
3687 
3688 	return (NULL);
3689 }
3690 
3691 /*
3692  * drmach interfaces to legacy Starfire platmod logic
3693  * linkage via runtime symbol look up, called from plat_cpu_power*
3694  */
3695 
3696 /*
3697  * Start up a cpu.  It is possible that we're attempting to restart
3698  * the cpu after an UNCONFIGURE in which case the cpu will be
3699  * spinning in its cache.  So, all we have to do is wakeup him up.
3700  * Under normal circumstances the cpu will be coming from a previous
3701  * CONNECT and thus will be spinning in OBP.  In both cases, the
3702  * startup sequence is the same.
3703  */
3704 int
3705 drmach_cpu_poweron(struct cpu *cp)
3706 {
3707 	DRMACH_PR("drmach_cpu_poweron: starting cpuid %d\n", cp->cpu_id);
3708 
3709 	ASSERT(MUTEX_HELD(&cpu_lock));
3710 
3711 	if (drmach_cpu_start(cp) != 0)
3712 		return (EBUSY);
3713 	else
3714 		return (0);
3715 }
3716 
3717 int
3718 drmach_cpu_poweroff(struct cpu *cp)
3719 {
3720 	int		ntries, cnt;
3721 	processorid_t	cpuid = cp->cpu_id;
3722 	void		drmach_cpu_shutdown_self(void);
3723 
3724 	DRMACH_PR("drmach_cpu_poweroff: stopping cpuid %d\n", cp->cpu_id);
3725 
3726 	ASSERT(MUTEX_HELD(&cpu_lock));
3727 
3728 	/*
3729 	 * Capture all CPUs (except for detaching proc) to prevent
3730 	 * crosscalls to the detaching proc until it has cleared its
3731 	 * bit in cpu_ready_set.
3732 	 *
3733 	 * The CPU's remain paused and the prom_mutex is known to be free.
3734 	 * This prevents the x-trap victim from blocking when doing prom
3735 	 * IEEE-1275 calls at a high PIL level.
3736 	 */
3737 	promsafe_pause_cpus();
3738 
3739 	/*
3740 	 * Quiesce interrupts on the target CPU. We do this by setting
3741 	 * the CPU 'not ready'- (i.e. removing the CPU from cpu_ready_set) to
3742 	 * prevent it from receiving cross calls and cross traps.
3743 	 * This prevents the processor from receiving any new soft interrupts.
3744 	 */
3745 	mp_cpu_quiesce(cp);
3746 
3747 	/* setup xt_mb, will be cleared by drmach_shutdown_asm when ready */
3748 	drmach_xt_mb[cpuid] = 0x80;
3749 
3750 	xt_one_unchecked(cpuid, (xcfunc_t *)idle_stop_xcall,
3751 		(uint64_t)drmach_cpu_shutdown_self, NULL);
3752 
3753 	ntries = drmach_cpu_ntries;
3754 	cnt = 0;
3755 	while (drmach_xt_mb[cpuid] && ntries) {
3756 		DELAY(drmach_cpu_delay);
3757 		ntries--;
3758 		cnt++;
3759 	}
3760 
3761 	drmach_xt_mb[cpuid] = 0;	/* steal the cache line back */
3762 
3763 	start_cpus();
3764 
3765 	DRMACH_PR("waited %d out of %d tries for "
3766 		"drmach_cpu_shutdown_self on cpu%d",
3767 		drmach_cpu_ntries - ntries, drmach_cpu_ntries, cp->cpu_id);
3768 
3769 	drmach_cpu_obp_detach(cpuid);
3770 
3771 	CPU_SIGNATURE(OS_SIG, SIGST_DETACHED, SIGSUBST_NULL, cpuid);
3772 
3773 	return (0);
3774 }
3775 
3776 /*ARGSUSED*/
3777 int
3778 drmach_verify_sr(dev_info_t *dip, int sflag)
3779 {
3780 	return (0);
3781 }
3782 
3783 void
3784 drmach_suspend_last(void)
3785 {
3786 }
3787 
3788 void
3789 drmach_resume_first(void)
3790 {
3791 }
3792 
3793 /*
3794  * Log a DR sysevent.
3795  * Return value: 0 success, non-zero failure.
3796  */
3797 int
3798 drmach_log_sysevent(int board, char *hint, int flag, int verbose)
3799 {
3800 	sysevent_t			*ev;
3801 	sysevent_id_t			eid;
3802 	int				rv, km_flag;
3803 	sysevent_value_t		evnt_val;
3804 	sysevent_attr_list_t		*evnt_attr_list = NULL;
3805 	char				attach_pnt[MAXNAMELEN];
3806 
3807 	km_flag = (flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
3808 	attach_pnt[0] = '\0';
3809 	if (drmach_board_name(board, attach_pnt, MAXNAMELEN)) {
3810 		rv = -1;
3811 		goto logexit;
3812 	}
3813 	if (verbose)
3814 		DRMACH_PR("drmach_log_sysevent: %s %s, flag: %d, verbose: %d\n",
3815 			    attach_pnt, hint, flag, verbose);
3816 
3817 	if ((ev = sysevent_alloc(EC_DR, ESC_DR_AP_STATE_CHANGE,
3818 				    SUNW_KERN_PUB"dr", km_flag)) == NULL) {
3819 		rv = -2;
3820 		goto logexit;
3821 	}
3822 	evnt_val.value_type = SE_DATA_TYPE_STRING;
3823 	evnt_val.value.sv_string = attach_pnt;
3824 	if ((rv = sysevent_add_attr(&evnt_attr_list, DR_AP_ID,
3825 				    &evnt_val, km_flag)) != 0)
3826 		goto logexit;
3827 
3828 	evnt_val.value_type = SE_DATA_TYPE_STRING;
3829 	evnt_val.value.sv_string = hint;
3830 	if ((rv = sysevent_add_attr(&evnt_attr_list, DR_HINT,
3831 				    &evnt_val, km_flag)) != 0) {
3832 		sysevent_free_attr(evnt_attr_list);
3833 		goto logexit;
3834 	}
3835 
3836 	(void) sysevent_attach_attributes(ev, evnt_attr_list);
3837 
3838 	/*
3839 	 * Log the event but do not sleep waiting for its
3840 	 * delivery. This provides insulation from syseventd.
3841 	 */
3842 	rv = log_sysevent(ev, SE_NOSLEEP, &eid);
3843 
3844 logexit:
3845 	if (ev)
3846 		sysevent_free(ev);
3847 	if ((rv != 0) && verbose)
3848 		cmn_err(CE_WARN,
3849 			    "drmach_log_sysevent failed (rv %d) for %s  %s\n",
3850 			    rv, attach_pnt, hint);
3851 
3852 	return (rv);
3853 }
3854 
3855 /*ARGSUSED*/
3856 int
3857 drmach_allow_memrange_modify(drmachid_t id)
3858 {
3859 	return (1);	/* TRUE */
3860 }
3861