xref: /titanic_41/usr/src/uts/sun4u/starfire/io/drmach.c (revision 0adc16190e36914964740716575460dda750de39)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/debug.h>
29 #include <sys/types.h>
30 #include <sys/varargs.h>
31 #include <sys/errno.h>
32 #include <sys/cred.h>
33 #include <sys/dditypes.h>
34 #include <sys/devops.h>
35 #include <sys/modctl.h>
36 #include <sys/poll.h>
37 #include <sys/conf.h>
38 #include <sys/ddi.h>
39 #include <sys/sunddi.h>
40 #include <sys/sunndi.h>
41 #include <sys/ndi_impldefs.h>
42 #include <sys/stat.h>
43 #include <sys/kmem.h>
44 #include <sys/vmem.h>
45 #include <sys/processor.h>
46 #include <sys/spitregs.h>
47 #include <sys/cpuvar.h>
48 #include <sys/cpupart.h>
49 #include <sys/mem_config.h>
50 #include <sys/ddi_impldefs.h>
51 #include <sys/systm.h>
52 #include <sys/machsystm.h>
53 #include <sys/autoconf.h>
54 #include <sys/cmn_err.h>
55 #include <sys/sysmacros.h>
56 #include <sys/x_call.h>
57 #include <sys/promif.h>
58 #include <sys/prom_plat.h>
59 #include <sys/membar.h>
60 #include <vm/seg_kmem.h>
61 #include <sys/mem_cage.h>
62 #include <sys/stack.h>
63 #include <sys/archsystm.h>
64 #include <vm/hat_sfmmu.h>
65 #include <sys/pte.h>
66 #include <sys/mmu.h>
67 #include <sys/cpu_module.h>
68 #include <sys/obpdefs.h>
69 #include <sys/note.h>
70 
71 #include <sys/starfire.h>	/* plat_max_... decls */
72 #include <sys/cvc.h>
73 #include <sys/cpu_sgnblk_defs.h>
74 #include <sys/drmach.h>
75 #include <sys/dr_util.h>
76 #include <sys/pda.h>
77 
78 #include <sys/sysevent.h>
79 #include <sys/sysevent/dr.h>
80 #include <sys/sysevent/eventdefs.h>
81 
82 
83 extern void		bcopy32_il(uint64_t, uint64_t);
84 extern void		flush_ecache_il(
85 				uint64_t physaddr, int size, int linesz);
86 extern uint_t		ldphysio_il(uint64_t physaddr);
87 extern void		stphysio_il(uint64_t physaddr, uint_t value);
88 
89 extern uint64_t		mc_get_mem_alignment(void);
90 extern uint64_t		mc_get_asr_addr(pnode_t);
91 extern uint64_t		mc_get_idle_addr(pnode_t);
92 extern uint64_t		mc_get_alignment_mask(pnode_t);
93 extern int		mc_read_asr(pnode_t, uint_t *);
94 extern int		mc_write_asr(pnode_t, uint_t);
95 extern uint64_t		mc_asr_to_pa(uint_t);
96 extern uint_t		mc_pa_to_asr(uint_t, uint64_t);
97 
98 extern int		pc_madr_add(int, int, int, int);
99 
100 typedef struct {
101 	struct drmach_node	*node;
102 	void			*data;
103 } drmach_node_walk_args_t;
104 
105 typedef struct drmach_node {
106 	void		*here;
107 
108 	pnode_t		 (*get_dnode)(struct drmach_node *node);
109 	int		 (*walk)(struct drmach_node *node, void *data,
110 				int (*cb)(drmach_node_walk_args_t *args));
111 } drmach_node_t;
112 
113 typedef struct {
114 	int		 min_index;
115 	int		 max_index;
116 	int		 arr_sz;
117 	drmachid_t	*arr;
118 } drmach_array_t;
119 
120 typedef struct {
121 	void		*isa;
122 
123 	sbd_error_t	*(*release)(drmachid_t);
124 	sbd_error_t	*(*status)(drmachid_t, drmach_status_t *);
125 
126 	char		 name[MAXNAMELEN];
127 } drmach_common_t;
128 
129 typedef struct {
130 	drmach_common_t	 cm;
131 	int		 bnum;
132 	int		 assigned;
133 	int		 powered;
134 	int		 connect_cpuid;
135 	int		 cond;
136 	drmach_node_t	*tree;
137 	drmach_array_t	*devices;
138 } drmach_board_t;
139 
140 typedef struct {
141 	drmach_common_t	 cm;
142 	drmach_board_t	*bp;
143 	int		 unum;
144 	int		 busy;
145 	int		 powered;
146 	const char	*type;
147 	drmach_node_t	*node;
148 } drmach_device_t;
149 
150 typedef struct {
151 	int		 flags;
152 	drmach_device_t	*dp;
153 	sbd_error_t	*err;
154 	dev_info_t	*dip;
155 } drmach_config_args_t;
156 
157 typedef struct {
158 	uint64_t	 idle_addr;
159 	drmach_device_t	*mem;
160 } drmach_mc_idle_script_t;
161 
162 typedef struct {
163 	uint64_t	masr_addr;
164 	uint_t		masr;
165 	uint_t		_filler;
166 } drmach_rename_script_t;
167 
168 typedef struct {
169 	void		(*run)(void *arg);
170 	caddr_t		data;
171 	pda_handle_t	*ph;
172 	struct memlist	*c_ml;
173 	uint64_t	s_copybasepa;
174 	uint64_t	t_copybasepa;
175 	drmach_device_t	*restless_mc;	/* diagnostic output */
176 } drmach_copy_rename_program_t;
177 
178 typedef enum {
179 	DO_IDLE,
180 	DO_UNIDLE,
181 	DO_PAUSE,
182 	DO_UNPAUSE
183 } drmach_iopc_op_t;
184 
185 typedef struct {
186 	drmach_board_t	*obj;
187 	int		 ndevs;
188 	void		*a;
189 	sbd_error_t	*(*found)(void *a, const char *, int, drmachid_t);
190 	sbd_error_t	*err;
191 } drmach_board_cb_data_t;
192 
193 static caddr_t		 drmach_shutdown_va;
194 
195 static int		 drmach_initialized;
196 static drmach_array_t	*drmach_boards;
197 
198 static int		 drmach_cpu_delay = 100;
199 static int		 drmach_cpu_ntries = 50000;
200 
201 volatile uchar_t	*drmach_xt_mb;
202 
203 /*
204  * Do not change the drmach_shutdown_mbox structure without
205  * considering the drmach_shutdown_asm assembly language code.
206  */
207 struct drmach_shutdown_mbox {
208 	uint64_t	estack;
209 	uint64_t	flushaddr;
210 	int		size;
211 	int		linesize;
212 	uint64_t	physaddr;
213 };
214 struct drmach_shutdown_mbox	*drmach_shutdown_asm_mbox;
215 static sbd_error_t	*drmach_device_new(drmach_node_t *,
216 				drmach_board_t *, drmach_device_t **);
217 static sbd_error_t	*drmach_cpu_new(drmach_device_t *);
218 static sbd_error_t	*drmach_mem_new(drmach_device_t *);
219 static sbd_error_t	*drmach_io_new(drmach_device_t *);
220 
221 extern struct cpu	*SIGBCPU;
222 
223 #ifdef DEBUG
224 
225 #define	DRMACH_PR		if (drmach_debug) printf
226 int drmach_debug = 0;		 /* set to non-zero to enable debug messages */
227 #else
228 
229 #define	DRMACH_PR		_NOTE(CONSTANTCONDITION) if (0) printf
230 #endif /* DEBUG */
231 
232 #define	DRMACH_OBJ(id)		((drmach_common_t *)id)
233 
234 #define	DRMACH_IS_BOARD_ID(id)	\
235 	((id != 0) &&		\
236 	(DRMACH_OBJ(id)->isa == (void *)drmach_board_new))
237 
238 #define	DRMACH_IS_CPU_ID(id)	\
239 	((id != 0) &&		\
240 	(DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new))
241 
242 #define	DRMACH_IS_MEM_ID(id)	\
243 	((id != 0) &&		\
244 	(DRMACH_OBJ(id)->isa == (void *)drmach_mem_new))
245 
246 #define	DRMACH_IS_IO_ID(id)	\
247 	((id != 0) &&		\
248 	(DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
249 
250 #define	DRMACH_IS_DEVICE_ID(id)					\
251 	((id != 0) &&						\
252 	(DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new ||	\
253 	    DRMACH_OBJ(id)->isa == (void *)drmach_mem_new ||	\
254 	    DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
255 
256 #define	DRMACH_IS_ID(id)					\
257 	((id != 0) &&						\
258 	(DRMACH_OBJ(id)->isa == (void *)drmach_board_new ||	\
259 	    DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new ||	\
260 	    DRMACH_OBJ(id)->isa == (void *)drmach_mem_new ||	\
261 	    DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
262 
263 #define	DRMACH_CPUID2BNUM(cpuid) \
264 	((cpuid) / MAX_CPU_UNITS_PER_BOARD)
265 
266 #define	DRMACH_INTERNAL_ERROR() \
267 	drerr_new(1, ESTF_INTERNAL, drmach_ie_fmt, __LINE__)
268 static char		*drmach_ie_fmt = "drmach.c %d";
269 
270 static struct {
271 	const char	 *name;
272 	const char	 *type;
273 	sbd_error_t	 *(*new)(drmach_device_t *);
274 } name2type[] = {
275 	{ "SUNW,UltraSPARC",	DRMACH_DEVTYPE_CPU,  drmach_cpu_new },
276 	{ "mem-unit",		DRMACH_DEVTYPE_MEM,  drmach_mem_new },
277 	{ "pci",		DRMACH_DEVTYPE_PCI,  drmach_io_new  },
278 	{ "sbus",		DRMACH_DEVTYPE_SBUS, drmach_io_new  },
279 };
280 
281 /* node types to cleanup when a board is unconfigured */
282 #define	MISC_COUNTER_TIMER_DEVNAME	"counter-timer"
283 #define	MISC_PERF_COUNTER_DEVNAME	"perf-counter"
284 
285 /* utility */
286 #define	MBYTE	(1048576ull)
287 
288 /*
289  * This is necessary because the CPU support needs
290  * to call cvc_assign_iocpu.
291  */
292 #ifndef lint
293 char _depends_on[] = "drv/cvc";
294 #endif  /* lint */
295 
296 /*
297  * drmach autoconfiguration data structures and interfaces
298  */
299 
300 extern struct mod_ops mod_miscops;
301 
302 static struct modlmisc modlmisc = {
303 	&mod_miscops,
304 	"Sun Enterprise 10000 DR %I%"
305 };
306 
307 static struct modlinkage modlinkage = {
308 	MODREV_1,
309 	(void *)&modlmisc,
310 	NULL
311 };
312 
313 static kmutex_t drmach_i_lock;
314 
315 int
316 _init(void)
317 {
318 	int err;
319 
320 	/* check that we have the correct version of obp */
321 	if (prom_test("SUNW,UE10000,add-brd") != 0) {
322 
323 		cmn_err(CE_WARN, "!OBP/SSP upgrade is required to enable "
324 		    "DR Functionality");
325 
326 		return (-1);
327 	}
328 
329 	mutex_init(&drmach_i_lock, NULL, MUTEX_DRIVER, NULL);
330 
331 	drmach_xt_mb = (uchar_t *)vmem_alloc(static_alloc_arena,
332 	    NCPU * sizeof (uchar_t), VM_SLEEP);
333 	drmach_shutdown_asm_mbox = (struct drmach_shutdown_mbox *)
334 	    vmem_alloc(static_alloc_arena, sizeof (struct drmach_shutdown_mbox),
335 	    VM_SLEEP);
336 
337 	if ((err = mod_install(&modlinkage)) != 0) {
338 		mutex_destroy(&drmach_i_lock);
339 		vmem_free(static_alloc_arena, (void *)drmach_xt_mb,
340 		    NCPU * sizeof (uchar_t));
341 		vmem_free(static_alloc_arena, (void *)drmach_shutdown_asm_mbox,
342 		    sizeof (struct drmach_shutdown_mbox));
343 	}
344 
345 	return (err);
346 }
347 
348 int
349 _fini(void)
350 {
351 	static int drmach_fini(void);
352 
353 	if (drmach_fini())
354 		return (DDI_FAILURE);
355 	else
356 		return (mod_remove(&modlinkage));
357 }
358 
359 int
360 _info(struct modinfo *modinfop)
361 {
362 	return (mod_info(&modlinkage, modinfop));
363 }
364 
365 static pnode_t
366 drmach_node_obp_get_dnode(drmach_node_t *np)
367 {
368 	return ((pnode_t)(uintptr_t)np->here);
369 }
370 
371 static int
372 drmach_node_obp_walk(drmach_node_t *np, void *data,
373 		int (*cb)(drmach_node_walk_args_t *args))
374 {
375 	pnode_t			nodeid;
376 	int			rv;
377 	drmach_node_walk_args_t	args;
378 
379 	/* initialized args structure for callback */
380 	args.node = np;
381 	args.data = data;
382 
383 	nodeid = prom_childnode(prom_rootnode());
384 
385 	/* save our new position with in the tree */
386 	np->here = (void *)(uintptr_t)nodeid;
387 
388 	rv = 0;
389 	while (nodeid != OBP_NONODE) {
390 		rv = (*cb)(&args);
391 		if (rv)
392 			break;
393 
394 		nodeid = prom_nextnode(nodeid);
395 
396 		/* save our new position with in the tree */
397 		np->here = (void *)(uintptr_t)nodeid;
398 	}
399 
400 	return (rv);
401 }
402 
403 static drmach_node_t *
404 drmach_node_new(void)
405 {
406 	drmach_node_t *np;
407 
408 	np = kmem_zalloc(sizeof (drmach_node_t), KM_SLEEP);
409 
410 	np->get_dnode = drmach_node_obp_get_dnode;
411 	np->walk = drmach_node_obp_walk;
412 
413 	return (np);
414 }
415 
416 static void
417 drmach_node_dispose(drmach_node_t *np)
418 {
419 	kmem_free(np, sizeof (*np));
420 }
421 
422 static dev_info_t *
423 drmach_node_get_dip(drmach_node_t *np)
424 {
425 	pnode_t nodeid;
426 
427 	nodeid = np->get_dnode(np);
428 	if (nodeid == OBP_NONODE)
429 		return (NULL);
430 	else {
431 		dev_info_t *dip;
432 
433 		/* The root node doesn't have to be held */
434 		dip = e_ddi_nodeid_to_dip(nodeid);
435 		if (dip) {
436 			/*
437 			 * Branch rooted at dip is already held, so release
438 			 * hold acquired in e_ddi_nodeid_to_dip()
439 			 */
440 			ddi_release_devi(dip);
441 			ASSERT(e_ddi_branch_held(dip));
442 		}
443 
444 		return (dip);
445 	}
446 	/*NOTREACHED*/
447 }
448 
449 static pnode_t
450 drmach_node_get_dnode(drmach_node_t *np)
451 {
452 	return (np->get_dnode(np));
453 }
454 
455 static int
456 drmach_node_walk(drmach_node_t *np, void *param,
457 		int (*cb)(drmach_node_walk_args_t *args))
458 {
459 	return (np->walk(np, param, cb));
460 }
461 
462 static int
463 drmach_node_get_prop(drmach_node_t *np, char *name, void *buf)
464 {
465 	pnode_t	nodeid;
466 	int	rv;
467 
468 	nodeid = np->get_dnode(np);
469 	if (nodeid == OBP_NONODE)
470 		rv = -1;
471 	else if (prom_getproplen(nodeid, (caddr_t)name) < 0)
472 		rv = -1;
473 	else {
474 		(void) prom_getprop(nodeid, (caddr_t)name, (caddr_t)buf);
475 		rv = 0;
476 	}
477 
478 	return (rv);
479 }
480 
481 static int
482 drmach_node_get_proplen(drmach_node_t *np, char *name, int *len)
483 {
484 	pnode_t	 nodeid;
485 	int	 rv;
486 
487 	nodeid = np->get_dnode(np);
488 	if (nodeid == OBP_NONODE)
489 		rv = -1;
490 	else {
491 		*len = prom_getproplen(nodeid, (caddr_t)name);
492 		rv = (*len < 0 ? -1 : 0);
493 	}
494 
495 	return (rv);
496 }
497 
498 static drmachid_t
499 drmach_node_dup(drmach_node_t *np)
500 {
501 	drmach_node_t *dup;
502 
503 	dup = drmach_node_new();
504 	dup->here = np->here;
505 
506 	return (dup);
507 }
508 
509 /*
510  * drmach_array provides convenient array construction, access,
511  * bounds checking and array destruction logic.
512  */
513 
514 static drmach_array_t *
515 drmach_array_new(int min_index, int max_index)
516 {
517 	drmach_array_t *arr;
518 
519 	arr = kmem_zalloc(sizeof (drmach_array_t), KM_SLEEP);
520 
521 	arr->arr_sz = (max_index - min_index + 1) * sizeof (void *);
522 	if (arr->arr_sz > 0) {
523 		arr->min_index = min_index;
524 		arr->max_index = max_index;
525 
526 		arr->arr = kmem_zalloc(arr->arr_sz, KM_SLEEP);
527 		return (arr);
528 	} else {
529 		kmem_free(arr, sizeof (*arr));
530 		return (0);
531 	}
532 }
533 
534 static int
535 drmach_array_set(drmach_array_t *arr, int idx, drmachid_t val)
536 {
537 	if (idx < arr->min_index || idx > arr->max_index)
538 		return (-1);
539 	else {
540 		arr->arr[idx - arr->min_index] = val;
541 		return (0);
542 	}
543 	/*NOTREACHED*/
544 }
545 
546 static int
547 drmach_array_get(drmach_array_t *arr, int idx, drmachid_t *val)
548 {
549 	if (idx < arr->min_index || idx > arr->max_index)
550 		return (-1);
551 	else {
552 		*val = arr->arr[idx - arr->min_index];
553 		return (0);
554 	}
555 	/*NOTREACHED*/
556 }
557 
558 static int
559 drmach_array_first(drmach_array_t *arr, int *idx, drmachid_t *val)
560 {
561 	int rv;
562 
563 	*idx = arr->min_index;
564 	while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
565 		*idx += 1;
566 
567 	return (rv);
568 }
569 
570 static int
571 drmach_array_next(drmach_array_t *arr, int *idx, drmachid_t *val)
572 {
573 	int rv;
574 
575 	*idx += 1;
576 	while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
577 		*idx += 1;
578 
579 	return (rv);
580 }
581 
582 static void
583 drmach_array_dispose(drmach_array_t *arr, void (*disposer)(drmachid_t))
584 {
585 	drmachid_t	val;
586 	int		idx;
587 	int		rv;
588 
589 	rv = drmach_array_first(arr, &idx, &val);
590 	while (rv == 0) {
591 		(*disposer)(val);
592 		rv = drmach_array_next(arr, &idx, &val);
593 	}
594 
595 	kmem_free(arr->arr, arr->arr_sz);
596 	kmem_free(arr, sizeof (*arr));
597 }
598 
599 /*ARGSUSED*/
600 static int
601 drmach_prom_select(pnode_t nodeid, void *arg, uint_t flags)
602 {
603 	int			rprop[64];
604 	pnode_t			saved;
605 	drmach_config_args_t	*ap = (drmach_config_args_t *)arg;
606 	drmach_device_t		*dp = ap->dp;
607 	sbd_error_t		*err;
608 
609 	saved = drmach_node_get_dnode(dp->node);
610 
611 	if (nodeid != saved)
612 		return (DDI_FAILURE);
613 
614 	if (saved == OBP_NONODE) {
615 		err = DRMACH_INTERNAL_ERROR();
616 		DRERR_SET_C(&ap->err, &err);
617 		return (DDI_FAILURE);
618 	}
619 
620 	if (prom_getprop(nodeid, OBP_REG, (caddr_t)rprop) <= 0) {
621 		return (DDI_FAILURE);
622 	}
623 
624 	return (DDI_SUCCESS);
625 }
626 
627 /*ARGSUSED*/
628 static void
629 drmach_branch_callback(dev_info_t *rdip, void *arg, uint_t flags)
630 {
631 	drmach_config_args_t	*ap = (drmach_config_args_t *)arg;
632 
633 	ASSERT(ap->dip == NULL);
634 
635 	ap->dip = rdip;
636 }
637 
638 sbd_error_t *
639 drmach_configure(drmachid_t id, int flags)
640 {
641 	drmach_device_t		*dp;
642 	sbd_error_t		*err;
643 	drmach_config_args_t	ca;
644 	devi_branch_t		b = {0};
645 	dev_info_t		*fdip = NULL;
646 
647 	if (!DRMACH_IS_DEVICE_ID(id))
648 		return (drerr_new(0, ESTF_INAPPROP, NULL));
649 	dp = id;
650 
651 	ca.dp = dp;
652 	ca.flags = flags;
653 	ca.err = NULL;		/* will be set if error detected */
654 	ca.dip = NULL;
655 
656 	b.arg = &ca;
657 	b.type = DEVI_BRANCH_PROM;
658 	b.create.prom_branch_select = drmach_prom_select;
659 	b.devi_branch_callback = drmach_branch_callback;
660 
661 	if (e_ddi_branch_create(ddi_root_node(), &b, &fdip,
662 	    DEVI_BRANCH_CHILD | DEVI_BRANCH_CONFIGURE) != 0) {
663 		char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
664 
665 		/*
666 		 * If non-NULL, fdip is returned held and must be released.
667 		 */
668 		if (fdip != NULL) {
669 			(void) ddi_pathname(fdip, path);
670 			ddi_release_devi(fdip);
671 		} else if (ca.dip != NULL) {
672 			/* safe to call ddi_pathname as dip already held */
673 			(void) ddi_pathname(ca.dip, path);
674 		} else {
675 			(void) strcpy(path, "<none>");
676 		}
677 
678 		err = drerr_new(1, ESTF_DRVFAIL, path);
679 		DRERR_SET_C(&ca.err, &err);
680 		kmem_free(path, MAXPATHLEN);
681 	}
682 
683 	return (ca.err);
684 }
685 
686 static sbd_error_t *
687 drmach_device_new(drmach_node_t *node,
688 	drmach_board_t *bp, drmach_device_t **dpp)
689 {
690 	int		 i;
691 	int		 rv;
692 	drmach_device_t	*dp;
693 	sbd_error_t	*err;
694 	char		 name[OBP_MAXDRVNAME];
695 
696 	rv = drmach_node_get_prop(node, OBP_NAME, name);
697 	if (rv) {
698 		/* every node is expected to have a name */
699 		err = drerr_new(1, ESTF_GETPROP,
700 			"PROM Node 0x%x: property %s",
701 			(uint_t)node->get_dnode(node), OBP_NAME);
702 
703 		return (err);
704 	}
705 
706 	/*
707 	 * The node currently being examined is not listed in the name2type[]
708 	 * array.  In this case, the node is no interest to drmach.  Both
709 	 * dp and err are initialized here to yield nothing (no device or
710 	 * error structure) for this case.
711 	 */
712 	for (i = 0; i < sizeof (name2type) / sizeof (name2type[0]); i++)
713 		if (strcmp(name2type[i].name, name) == 0)
714 			break;
715 
716 	if (i < sizeof (name2type) / sizeof (name2type[0])) {
717 		dp = kmem_zalloc(sizeof (drmach_device_t), KM_SLEEP);
718 
719 		dp->bp = bp;
720 		dp->unum = -1;
721 		dp->node = drmach_node_dup(node);
722 		dp->type = name2type[i].type;
723 
724 		err = (name2type[i].new)(dp);
725 		if (err) {
726 			drmach_node_dispose(node);
727 			kmem_free(dp, sizeof (*dp));
728 			dp = NULL;
729 		}
730 
731 		*dpp = dp;
732 		return (err);
733 	}
734 
735 	/*
736 	 * The node currently being examined is not listed in the name2type[]
737 	 * array.  In this case, the node is no interest to drmach.  Both
738 	 * dp and err are initialized here to yield nothing (no device or
739 	 * error structure) for this case.
740 	 */
741 	*dpp = NULL;
742 	return (NULL);
743 }
744 
745 static void
746 drmach_device_dispose(drmachid_t id)
747 {
748 	drmach_device_t *self = id;
749 
750 	if (self->node)
751 		drmach_node_dispose(self->node);
752 
753 	kmem_free(self, sizeof (*self));
754 }
755 
756 static sbd_error_t *
757 drmach_device_get_prop(drmach_device_t *dp, char *name, void *buf)
758 {
759 	sbd_error_t	*err = NULL;
760 	int		 rv;
761 
762 	rv = drmach_node_get_prop(dp->node, name, buf);
763 	if (rv) {
764 		err = drerr_new(1, ESTF_GETPROP,
765 			"%s::%s: property %s",
766 			dp->bp->cm.name, dp->cm.name, name);
767 	}
768 
769 	return (err);
770 }
771 
772 static sbd_error_t *
773 drmach_device_get_proplen(drmach_device_t *dp, char *name, int *len)
774 {
775 	sbd_error_t	*err = NULL;
776 	int		 rv;
777 
778 	rv = drmach_node_get_proplen(dp->node, name, len);
779 	if (rv) {
780 		err = drerr_new(1, ESTF_GETPROPLEN,
781 			"%s::%s: property %s",
782 			dp->bp->cm.name, dp->cm.name, name);
783 	}
784 
785 	return (err);
786 }
787 
788 static drmach_board_t *
789 drmach_board_new(int bnum)
790 {
791 	static sbd_error_t *drmach_board_release(drmachid_t);
792 	static sbd_error_t *drmach_board_status(drmachid_t, drmach_status_t *);
793 
794 	drmach_board_t	*bp;
795 
796 	bp = kmem_zalloc(sizeof (drmach_board_t), KM_SLEEP);
797 
798 	bp->cm.isa = (void *)drmach_board_new;
799 	bp->cm.release = drmach_board_release;
800 	bp->cm.status = drmach_board_status;
801 
802 	(void) drmach_board_name(bnum, bp->cm.name, sizeof (bp->cm.name));
803 
804 	bp->bnum = bnum;
805 	bp->devices = NULL;
806 	bp->connect_cpuid = -1;
807 	bp->tree = drmach_node_new();
808 	bp->assigned = !drmach_initialized;
809 	bp->powered = !drmach_initialized;
810 
811 	drmach_array_set(drmach_boards, bnum, bp);
812 	return (bp);
813 }
814 
815 static void
816 drmach_board_dispose(drmachid_t id)
817 {
818 	drmach_board_t *bp;
819 
820 	ASSERT(DRMACH_IS_BOARD_ID(id));
821 	bp = id;
822 
823 	if (bp->tree)
824 		drmach_node_dispose(bp->tree);
825 
826 	if (bp->devices)
827 		drmach_array_dispose(bp->devices, drmach_device_dispose);
828 
829 	kmem_free(bp, sizeof (*bp));
830 }
831 
832 static sbd_error_t *
833 drmach_board_status(drmachid_t id, drmach_status_t *stat)
834 {
835 	sbd_error_t	*err = NULL;
836 	drmach_board_t	*bp;
837 
838 	if (!DRMACH_IS_BOARD_ID(id))
839 		return (drerr_new(0, ESTF_INAPPROP, NULL));
840 	bp = id;
841 
842 	stat->assigned = bp->assigned;
843 	stat->powered = bp->powered;
844 	stat->busy = 0;			/* assume not busy */
845 	stat->configured = 0;		/* assume not configured */
846 	stat->empty = 0;
847 	stat->cond = bp->cond = SBD_COND_OK;
848 	strncpy(stat->type, "System Brd", sizeof (stat->type));
849 	stat->info[0] = '\0';
850 
851 	if (bp->devices) {
852 		int		 rv;
853 		int		 d_idx;
854 		drmachid_t	 d_id;
855 
856 		rv = drmach_array_first(bp->devices, &d_idx, &d_id);
857 		while (rv == 0) {
858 			drmach_status_t	d_stat;
859 
860 			err = drmach_status(d_id, &d_stat);
861 			if (err)
862 				break;
863 
864 			stat->busy |= d_stat.busy;
865 			stat->configured |= d_stat.configured;
866 
867 			rv = drmach_array_next(bp->devices, &d_idx, &d_id);
868 		}
869 	}
870 
871 	return (err);
872 }
873 
874 /* a simple routine to reduce redundancy of this common logic */
875 static pda_handle_t
876 drmach_pda_open(void)
877 {
878 	pda_handle_t ph;
879 
880 	ph = pda_open();
881 	if (ph == NULL) {
882 		/* catch in debug kernels */
883 		ASSERT(0);
884 		cmn_err(CE_WARN, "pda_open failed");
885 	}
886 
887 	return (ph);
888 }
889 
890 #ifdef DEBUG
891 int drmach_init_break = 0;
892 #endif
893 
894 static int
895 hold_rele_branch(dev_info_t *rdip, void *arg)
896 {
897 	int	i;
898 	int	*holdp = (int *)arg;
899 	char	*name = ddi_node_name(rdip);
900 
901 	/*
902 	 * For Starfire, we must be children of the root devinfo node
903 	 */
904 	ASSERT(ddi_get_parent(rdip) == ddi_root_node());
905 
906 	for (i = 0; i < sizeof (name2type) / sizeof (name2type[0]); i++)
907 		if (strcmp(name2type[i].name, name) == 0)
908 			break;
909 
910 	if (i == sizeof (name2type) / sizeof (name2type[0])) {
911 		/* Not of interest to us */
912 		return (DDI_WALK_PRUNECHILD);
913 	}
914 
915 	if (*holdp) {
916 		ASSERT(!e_ddi_branch_held(rdip));
917 		e_ddi_branch_hold(rdip);
918 	} else {
919 		ASSERT(e_ddi_branch_held(rdip));
920 		e_ddi_branch_rele(rdip);
921 	}
922 
923 	return (DDI_WALK_PRUNECHILD);
924 }
925 
926 static int
927 drmach_init(void)
928 {
929 	pnode_t		nodeid;
930 	dev_info_t	*rdip;
931 	int		hold, circ;
932 
933 #ifdef DEBUG
934 	if (drmach_init_break)
935 		debug_enter("drmach_init: drmach_init_break set\n");
936 #endif
937 	mutex_enter(&drmach_i_lock);
938 	if (drmach_initialized) {
939 		mutex_exit(&drmach_i_lock);
940 		return (0);
941 	}
942 
943 	drmach_boards = drmach_array_new(0, MAX_BOARDS - 1);
944 
945 	nodeid = prom_childnode(prom_rootnode());
946 	do {
947 		int		 bnum;
948 		drmachid_t	 id;
949 
950 		bnum = -1;
951 		(void) prom_getprop(nodeid, OBP_BOARDNUM, (caddr_t)&bnum);
952 		if (bnum == -1)
953 			continue;
954 
955 		if (drmach_array_get(drmach_boards, bnum, &id) == -1) {
956 			cmn_err(CE_WARN, "OBP node 0x%x has"
957 				" invalid property value, %s=%d",
958 				nodeid, OBP_BOARDNUM, bnum);
959 
960 			/* clean up */
961 			drmach_array_dispose(
962 				drmach_boards, drmach_board_dispose);
963 
964 			mutex_exit(&drmach_i_lock);
965 			return (-1);
966 		} else if (id == NULL)
967 			(void) drmach_board_new(bnum);
968 	} while ((nodeid = prom_nextnode(nodeid)) != OBP_NONODE);
969 
970 	drmach_shutdown_va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
971 
972 	/*
973 	 * Walk immediate children of devinfo root node and hold
974 	 * all devinfo branches of interest.
975 	 */
976 	hold = 1;
977 	rdip = ddi_root_node();
978 
979 	ndi_devi_enter(rdip, &circ);
980 	ddi_walk_devs(ddi_get_child(rdip), hold_rele_branch, &hold);
981 	ndi_devi_exit(rdip, circ);
982 
983 	drmach_initialized = 1;
984 
985 	mutex_exit(&drmach_i_lock);
986 
987 	return (0);
988 }
989 
990 static int
991 drmach_fini(void)
992 {
993 	dev_info_t	*rdip;
994 	int		hold, circ;
995 
996 	if (drmach_initialized) {
997 		int		busy = 0;
998 		int		rv;
999 		int		idx;
1000 		drmachid_t	id;
1001 
1002 		ASSERT(drmach_boards != NULL);
1003 
1004 		rv = drmach_array_first(drmach_boards, &idx, &id);
1005 		while (rv == 0) {
1006 			sbd_error_t	*err;
1007 			drmach_status_t stat;
1008 
1009 			err = drmach_board_status(id, &stat);
1010 			if (err) {
1011 				/* catch in debug kernels */
1012 				ASSERT(0);
1013 				sbd_err_clear(&err);
1014 				busy = 1;
1015 			} else
1016 				busy |= stat.busy;
1017 
1018 			rv = drmach_array_next(drmach_boards, &idx, &id);
1019 		}
1020 
1021 		if (busy)
1022 			return (-1);
1023 
1024 		drmach_array_dispose(drmach_boards, drmach_board_dispose);
1025 		drmach_boards = NULL;
1026 
1027 		vmem_free(heap_arena, drmach_shutdown_va, PAGESIZE);
1028 
1029 		/*
1030 		 * Walk immediate children of the root devinfo node
1031 		 * releasing holds acquired on branches in drmach_init()
1032 		 */
1033 		hold = 0;
1034 		rdip = ddi_root_node();
1035 
1036 		ndi_devi_enter(rdip, &circ);
1037 		ddi_walk_devs(ddi_get_child(rdip), hold_rele_branch, &hold);
1038 		ndi_devi_exit(rdip, circ);
1039 
1040 		mutex_destroy(&drmach_i_lock);
1041 
1042 		drmach_initialized = 0;
1043 	}
1044 	if (drmach_xt_mb != NULL) {
1045 		vmem_free(static_alloc_arena, (void *)drmach_xt_mb,
1046 		    NCPU * sizeof (uchar_t));
1047 	}
1048 	if (drmach_shutdown_asm_mbox != NULL) {
1049 		vmem_free(static_alloc_arena, (void *)drmach_shutdown_asm_mbox,
1050 		    sizeof (struct drmach_shutdown_mbox));
1051 	}
1052 	return (0);
1053 }
1054 
1055 static sbd_error_t *
1056 drmach_get_mc_asr_addr(drmachid_t id, uint64_t *pa)
1057 {
1058 	drmach_device_t	*dp;
1059 	pnode_t		nodeid;
1060 	uint64_t	addr;
1061 
1062 	if (!DRMACH_IS_MEM_ID(id))
1063 		return (drerr_new(0, ESTF_INAPPROP, NULL));
1064 	dp = id;
1065 
1066 	nodeid = drmach_node_get_dnode(dp->node);
1067 	if (nodeid == OBP_NONODE || nodeid == OBP_BADNODE)
1068 		return (DRMACH_INTERNAL_ERROR());
1069 
1070 	addr = mc_get_asr_addr(nodeid);
1071 	if (addr == (uint64_t)-1)
1072 		return (DRMACH_INTERNAL_ERROR());
1073 
1074 	*pa = addr;
1075 	return (NULL);
1076 }
1077 
1078 static sbd_error_t *
1079 drmach_get_mc_idle_addr(drmachid_t id, uint64_t *pa)
1080 {
1081 	drmach_device_t	*dp;
1082 	pnode_t		nodeid;
1083 	uint64_t	addr;
1084 
1085 	if (!DRMACH_IS_MEM_ID(id))
1086 		return (drerr_new(0, ESTF_INAPPROP, NULL));
1087 	dp = id;
1088 
1089 	nodeid = drmach_node_get_dnode(dp->node);
1090 	if (nodeid == OBP_NONODE || nodeid == OBP_BADNODE)
1091 		return (DRMACH_INTERNAL_ERROR());
1092 
1093 	addr = mc_get_idle_addr(nodeid);
1094 	if (addr == (uint64_t)-1)
1095 		return (DRMACH_INTERNAL_ERROR());
1096 
1097 	*pa = addr;
1098 	return (NULL);
1099 }
1100 
1101 static sbd_error_t *
1102 drmach_read_mc_asr(drmachid_t id, uint_t *mcregp)
1103 {
1104 	drmach_device_t	*dp;
1105 	pnode_t		 nodeid;
1106 	sbd_error_t	*err;
1107 
1108 	if (!DRMACH_IS_MEM_ID(id))
1109 		return (drerr_new(0, ESTF_INAPPROP, NULL));
1110 	dp = id;
1111 
1112 	nodeid = drmach_node_get_dnode(dp->node);
1113 	if (nodeid == OBP_NONODE || nodeid == OBP_BADNODE)
1114 		err = DRMACH_INTERNAL_ERROR();
1115 	else if (mc_read_asr(nodeid, mcregp) == -1)
1116 		err = DRMACH_INTERNAL_ERROR();
1117 	else
1118 		err = NULL;
1119 
1120 	return (err);
1121 }
1122 
1123 static sbd_error_t *
1124 drmach_write_mc_asr(drmachid_t id, uint_t mcreg)
1125 {
1126 	drmach_device_t	*dp;
1127 	pnode_t		 nodeid;
1128 	sbd_error_t	*err;
1129 
1130 	if (!DRMACH_IS_MEM_ID(id))
1131 		return (drerr_new(0, ESTF_INAPPROP, NULL));
1132 	dp = id;
1133 
1134 	nodeid = drmach_node_get_dnode(dp->node);
1135 	if (nodeid == OBP_NONODE || nodeid == OBP_BADNODE)
1136 		err = DRMACH_INTERNAL_ERROR();
1137 	else if (mc_write_asr(nodeid, mcreg) == -1)
1138 		err = DRMACH_INTERNAL_ERROR();
1139 	else
1140 		err = NULL;
1141 
1142 	return (err);
1143 }
1144 
1145 static struct memlist *
1146 memlist_add_span(struct memlist *mlist, uint64_t base, uint64_t len)
1147 {
1148 	struct memlist	*ml, *tl, *nl;
1149 
1150 	if (len == 0ull)
1151 		return (NULL);
1152 
1153 	if (mlist == NULL) {
1154 		mlist = GETSTRUCT(struct memlist, 1);
1155 		mlist->address = base;
1156 		mlist->size = len;
1157 		mlist->next = mlist->prev = NULL;
1158 
1159 		return (mlist);
1160 	}
1161 
1162 	for (tl = ml = mlist; ml; tl = ml, ml = ml->next) {
1163 		if (base < ml->address) {
1164 			if ((base + len) < ml->address) {
1165 				nl = GETSTRUCT(struct memlist, 1);
1166 				nl->address = base;
1167 				nl->size = len;
1168 				nl->next = ml;
1169 				if ((nl->prev = ml->prev) != NULL)
1170 					nl->prev->next = nl;
1171 				ml->prev = nl;
1172 				if (mlist == ml)
1173 					mlist = nl;
1174 			} else {
1175 				ml->size = MAX((base + len),
1176 						(ml->address + ml->size)) -
1177 						base;
1178 				ml->address = base;
1179 			}
1180 			break;
1181 
1182 		} else if (base <= (ml->address + ml->size)) {
1183 			ml->size = MAX((base + len),
1184 					(ml->address + ml->size)) -
1185 					MIN(ml->address, base);
1186 			ml->address = MIN(ml->address, base);
1187 			break;
1188 		}
1189 	}
1190 	if (ml == NULL) {
1191 		nl = GETSTRUCT(struct memlist, 1);
1192 		nl->address = base;
1193 		nl->size = len;
1194 		nl->next = NULL;
1195 		nl->prev = tl;
1196 		tl->next = nl;
1197 	}
1198 
1199 	memlist_coalesce(mlist);
1200 
1201 	return (mlist);
1202 }
1203 
1204 static sbd_error_t *
1205 drmach_prep_rename_script(drmach_device_t *s_mem, drmach_device_t *t_mem,
1206 	uint64_t t_slice_offset, caddr_t buf, int buflen)
1207 {
1208 	int			i, b, m;
1209 	drmach_mc_idle_script_t	*isp;
1210 	drmach_rename_script_t	*rsp;
1211 	int			s_bd, t_bd;
1212 	uint_t			s_masr, t_masr;
1213 	uint64_t		s_new_basepa, t_new_basepa;
1214 	int			b_idx, rv;
1215 	sbd_error_t		*err;
1216 	drmachid_t		 b_id;
1217 	drmach_board_t		*brd;
1218 
1219 #ifdef DEBUG
1220 	/*
1221 	 * Starfire CPU/MEM/IO boards have only one MC per board.
1222 	 * This function has been coded with that fact in mind.
1223 	 */
1224 	ASSERT(MAX_MEM_UNITS_PER_BOARD == 1);
1225 
1226 	/*
1227 	 * calculate the maximum space that could be consumed,
1228 	 * then verify the available buffer space is adequate.
1229 	 */
1230 	m  = sizeof (drmach_mc_idle_script_t *) * 2; /* two MCs */
1231 	b  = sizeof (drmach_rename_script_t *) * 3 * MAX_CPU_UNITS_PER_BOARD;
1232 	b += sizeof (drmach_rename_script_t *) * 3 * MAX_IO_UNITS_PER_BOARD;
1233 	b *= MAX_BOARDS;
1234 	b += sizeof (drmach_rename_script_t *) * 3;
1235 	b += sizeof (drmach_rename_script_t *) * 1;
1236 	ASSERT(m + b < buflen);
1237 #endif
1238 
1239 	/*
1240 	 * construct an array of MC idle register addresses of
1241 	 * both MCs.  The array is zero terminated -- as expected
1242 	 * by drmach_copy_rename_prog__relocatable().
1243 	 */
1244 	isp = (drmach_mc_idle_script_t *)buf;
1245 
1246 	/* source mc */
1247 	err = drmach_get_mc_idle_addr(s_mem, &isp->idle_addr);
1248 	if (err)
1249 		return (err);
1250 	isp->mem = s_mem;
1251 	isp += 1;
1252 
1253 	/* target mc */
1254 	err = drmach_get_mc_idle_addr(t_mem, &isp->idle_addr);
1255 	if (err)
1256 		return (err);
1257 	isp->mem = t_mem;
1258 	isp += 1;
1259 
1260 	/* terminator */
1261 	isp->idle_addr = 0;
1262 	isp->mem = NULL;
1263 	isp += 1;
1264 
1265 	/* fetch source mc asr register value */
1266 	err = drmach_read_mc_asr(s_mem, &s_masr);
1267 	if (err)
1268 		return (err);
1269 	else if (s_masr & STARFIRE_MC_INTERLEAVE_MASK) {
1270 		return (drerr_new(1, ESTF_INTERBOARD, "%s::%s",
1271 				s_mem->bp->cm.name, s_mem->cm.name));
1272 	}
1273 
1274 	/* fetch target mc asr register value */
1275 	err = drmach_read_mc_asr(t_mem, &t_masr);
1276 	if (err)
1277 		return (err);
1278 	else if (t_masr & STARFIRE_MC_INTERLEAVE_MASK) {
1279 		return (drerr_new(1, ESTF_INTERBOARD, "%s::%s",
1280 				t_mem->bp->cm.name, t_mem->cm.name));
1281 	}
1282 
1283 	/* get new source base pa from target's masr */
1284 	s_new_basepa = mc_asr_to_pa(t_masr);
1285 
1286 	/*
1287 	 * remove any existing slice offset to realign
1288 	 * memory with board's slice boundary
1289 	 */
1290 	s_new_basepa &= ~ (mc_get_mem_alignment() - 1);
1291 
1292 	/* get new target base pa from source's masr */
1293 	t_new_basepa  = mc_asr_to_pa(s_masr);
1294 
1295 	/* remove any existing slice offset, then apply new offset */
1296 	t_new_basepa &= ~ (mc_get_mem_alignment() - 1);
1297 	t_new_basepa += t_slice_offset;
1298 
1299 	/* encode new base pa into s_masr.  turn off mem present bit */
1300 	s_masr  = mc_pa_to_asr(s_masr, s_new_basepa);
1301 	s_masr &= ~STARFIRE_MC_MEM_PRESENT_MASK;
1302 
1303 	/* encode new base pa into t_masr.  turn on mem present bit */
1304 	t_masr  = mc_pa_to_asr(t_masr, t_new_basepa);
1305 	t_masr |= STARFIRE_MC_MEM_PRESENT_MASK;
1306 
1307 	/*
1308 	 * Step 0:	Mark source memory as not present.
1309 	 */
1310 	m = 0;
1311 	rsp = (drmach_rename_script_t *)isp;
1312 	err = drmach_get_mc_asr_addr(s_mem, &rsp[m].masr_addr);
1313 	if (err)
1314 		return (err);
1315 	rsp[m].masr = s_masr;
1316 	m++;
1317 
1318 	/*
1319 	 * Step 1:	Write source base address to target MC
1320 	 *		with present bit off.
1321 	 */
1322 	err = drmach_get_mc_asr_addr(t_mem, &rsp[m].masr_addr);
1323 	if (err)
1324 		return (err);
1325 	rsp[m].masr = t_masr & ~STARFIRE_MC_MEM_PRESENT_MASK;
1326 	m++;
1327 
1328 	/*
1329 	 * Step 2:	Now rewrite target reg with present bit on.
1330 	 */
1331 	rsp[m].masr_addr = rsp[m-1].masr_addr;
1332 	rsp[m].masr = t_masr;
1333 	m++;
1334 
1335 	s_bd = s_mem->bp->bnum;
1336 	t_bd = t_mem->bp->bnum;
1337 
1338 	DRMACH_PR("preparing script for CPU and IO units:\n");
1339 
1340 	rv = drmach_array_first(drmach_boards, &b_idx, &b_id);
1341 	if (rv) {
1342 		/* catch this in debug kernels */
1343 		ASSERT(0);
1344 		return (DRMACH_INTERNAL_ERROR());
1345 	}
1346 
1347 	do {
1348 		int			 d_idx;
1349 		drmachid_t		 d_id;
1350 		drmach_device_t		*device;
1351 
1352 		ASSERT(DRMACH_IS_BOARD_ID(b_id));
1353 		brd = b_id;
1354 		b = brd->bnum;
1355 
1356 		/*
1357 		 * Step 3:	Update PC MADR tables for CPUs.
1358 		 */
1359 		rv = drmach_array_first(brd->devices, &d_idx, &d_id);
1360 		if (rv) {
1361 			/* must mean no devices on this board */
1362 			break;
1363 		}
1364 
1365 		DRMACH_PR("\t%s\n", brd->cm.name);
1366 
1367 		do {
1368 			ASSERT(DRMACH_IS_DEVICE_ID(d_id));
1369 
1370 			if (!DRMACH_IS_CPU_ID(d_id))
1371 				continue;
1372 
1373 			device = d_id;
1374 			i = device->unum;
1375 
1376 			DRMACH_PR("\t\t%s\n", device->cm.name);
1377 
1378 			/*
1379 			 * Disabled detaching mem node.
1380 			 */
1381 			rsp[m].masr_addr = STARFIRE_PC_MADR_ADDR(b, s_bd, i);
1382 			rsp[m].masr = s_masr;
1383 			m++;
1384 			/*
1385 			 * Always write masr with present bit
1386 			 * off and then again with it on.
1387 			 */
1388 			rsp[m].masr_addr = STARFIRE_PC_MADR_ADDR(b, t_bd, i);
1389 			rsp[m].masr = t_masr & ~STARFIRE_MC_MEM_PRESENT_MASK;
1390 			m++;
1391 			rsp[m].masr_addr = rsp[m-1].masr_addr;
1392 			rsp[m].masr = t_masr;
1393 			m++;
1394 
1395 		} while (drmach_array_next(brd->devices, &d_idx, &d_id) == 0);
1396 
1397 		/*
1398 		 * Step 4:	Update PC MADR tables for IOs.
1399 		 */
1400 		rv = drmach_array_first(brd->devices, &d_idx, &d_id);
1401 		/* this worked for previous loop, must work here too */
1402 		ASSERT(rv == 0);
1403 
1404 		do {
1405 			ASSERT(DRMACH_IS_DEVICE_ID(d_id));
1406 
1407 			if (!DRMACH_IS_IO_ID(d_id))
1408 				continue;
1409 
1410 			device = d_id;
1411 			i = device->unum;
1412 
1413 			DRMACH_PR("\t\t%s\n", device->cm.name);
1414 
1415 			/*
1416 			 * Disabled detaching mem node.
1417 			 */
1418 			rsp[m].masr_addr = STARFIRE_PC_MADR_ADDR(b, s_bd, i+4);
1419 			rsp[m].masr = s_masr;
1420 			m++;
1421 			/*
1422 			 * Always write masr with present bit
1423 			 * off and then again with it on.
1424 			 */
1425 			rsp[m].masr_addr = STARFIRE_PC_MADR_ADDR(b, t_bd, i+4);
1426 			rsp[m].masr = t_masr & ~STARFIRE_MC_MEM_PRESENT_MASK;
1427 			m++;
1428 			rsp[m].masr_addr = rsp[m-1].masr_addr;
1429 			rsp[m].masr = t_masr;
1430 			m++;
1431 
1432 		} while (drmach_array_next(brd->devices, &d_idx, &d_id) == 0);
1433 	} while (drmach_array_next(drmach_boards, &b_idx, &b_id) == 0);
1434 
1435 	/*
1436 	 * Zero masr_addr value indicates the END.
1437 	 */
1438 	rsp[m].masr_addr = 0ull;
1439 	rsp[m].masr = 0;
1440 	DRMACH_PR("number of steps in rename script = %d\n", m);
1441 	m++;
1442 
1443 	/* paranoia */
1444 	ASSERT((caddr_t)&rsp[m] <= buf + buflen);
1445 
1446 #ifdef DEBUG
1447 	{
1448 		int	j;
1449 
1450 		DRMACH_PR("mc idle register address list:");
1451 		isp = (drmach_mc_idle_script_t *)buf;
1452 		DRMACH_PR("source mc idle addr 0x%lx, mem id %p",
1453 			isp[0].idle_addr, isp[0].mem);
1454 		DRMACH_PR("target mc idle addr 0x%lx, mem id %p",
1455 			isp[1].idle_addr, isp[1].mem);
1456 		ASSERT(isp[2].idle_addr == 0);
1457 
1458 		DRMACH_PR("copy-rename script:");
1459 		for (j = 0; j < m; j++) {
1460 			DRMACH_PR("0x%lx = 0x%08x",
1461 				rsp[j].masr_addr, rsp[j].masr);
1462 		}
1463 
1464 		DELAY(1000000);
1465 	}
1466 #endif
1467 
1468 	/* return number of bytes consumed */
1469 	b = (caddr_t)&rsp[m] - buf;
1470 	DRMACH_PR("total number of bytes consumed is %d\n", b);
1471 	ASSERT(b <= buflen);
1472 
1473 #ifdef lint
1474 	buflen = buflen;
1475 #endif
1476 
1477 	return (NULL);
1478 }
1479 
1480 /*
1481  * The routine performs the necessary memory COPY and MC adr SWITCH.
1482  * Both operations MUST be at the same "level" so that the stack is
1483  * maintained correctly between the copy and switch.  The switch
1484  * portion implements a caching mechanism to guarantee the code text
1485  * is cached prior to execution.  This is to guard against possible
1486  * memory access while the MC adr's are being modified.
1487  *
1488  * IMPORTANT: The _drmach_copy_rename_end() function must immediately
1489  * follow drmach_copy_rename_prog__relocatable() so that the correct
1490  * "length" of the drmach_copy_rename_prog__relocatable can be
1491  * calculated.  This routine MUST be a LEAF function, i.e. it can
1492  * make NO function calls, primarily for two reasons:
1493  *
1494  *	1. We must keep the stack consistent across the "switch".
1495  *	2. Function calls are compiled to relative offsets, and
1496  *	   we execute this function we'll be executing it from
1497  *	   a copied version in a different area of memory, thus
1498  *	   the relative offsets will be bogus.
1499  *
1500  * Moreover, it must have the "__relocatable" suffix to inform DTrace
1501  * providers (and anything else, for that matter) that this
1502  * function's text is manually relocated elsewhere before it is
1503  * executed.  That is, it cannot be safely instrumented with any
1504  * methodology that is PC-relative.
1505  */
1506 static void
1507 drmach_copy_rename_prog__relocatable(drmach_copy_rename_program_t *prog)
1508 {
1509 	extern void drmach_exec_script_il(drmach_rename_script_t *rsp);
1510 
1511 	drmach_mc_idle_script_t		*isp;
1512 	struct memlist			*ml;
1513 	int				csize;
1514 	int				lnsize;
1515 	uint64_t			caddr;
1516 
1517 	isp = (drmach_mc_idle_script_t *)prog->data;
1518 
1519 	caddr = ecache_flushaddr;
1520 	csize = (cpunodes[CPU->cpu_id].ecache_size << 1);
1521 	lnsize = cpunodes[CPU->cpu_id].ecache_linesize;
1522 
1523 	/*
1524 	 * DO COPY.
1525 	 */
1526 	for (ml = prog->c_ml; ml; ml = ml->next) {
1527 		uint64_t	s_pa, t_pa;
1528 		uint64_t	nbytes;
1529 
1530 		s_pa = prog->s_copybasepa + ml->address;
1531 		t_pa = prog->t_copybasepa + ml->address;
1532 		nbytes = ml->size;
1533 
1534 		while (nbytes != 0ull) {
1535 			/*
1536 			 * This copy does NOT use an ASI
1537 			 * that avoids the Ecache, therefore
1538 			 * the dst_pa addresses may remain
1539 			 * in our Ecache after the dst_pa
1540 			 * has been removed from the system.
1541 			 * A subsequent write-back to memory
1542 			 * will cause an ARB-stop because the
1543 			 * physical address no longer exists
1544 			 * in the system. Therefore we must
1545 			 * flush out local Ecache after we
1546 			 * finish the copy.
1547 			 */
1548 
1549 			/* copy 32 bytes at src_pa to dst_pa */
1550 			bcopy32_il(s_pa, t_pa);
1551 
1552 			/* increment by 32 bytes */
1553 			s_pa += (4 * sizeof (uint64_t));
1554 			t_pa += (4 * sizeof (uint64_t));
1555 
1556 			/* decrement by 32 bytes */
1557 			nbytes -= (4 * sizeof (uint64_t));
1558 		}
1559 	}
1560 
1561 	/*
1562 	 * Since bcopy32_il() does NOT use an ASI to bypass
1563 	 * the Ecache, we need to flush our Ecache after
1564 	 * the copy is complete.
1565 	 */
1566 	flush_ecache_il(caddr, csize, lnsize);		/* inline version */
1567 
1568 	/*
1569 	 * Wait for MCs to go idle.
1570 	 */
1571 	do {
1572 		register int	t = 10;
1573 		register uint_t	v;
1574 
1575 		/* loop t cycles waiting for each mc to indicate it's idle */
1576 		do {
1577 			v = ldphysio_il(isp->idle_addr)
1578 				& STARFIRE_MC_IDLE_MASK;
1579 
1580 		} while (v != STARFIRE_MC_IDLE_MASK && t-- > 0);
1581 
1582 		/* bailout if timedout */
1583 		if (t <= 0) {
1584 			prog->restless_mc = isp->mem;
1585 			return;
1586 		}
1587 
1588 		isp += 1;
1589 
1590 		/* stop if terminating zero has been reached */
1591 	} while (isp->idle_addr != 0);
1592 
1593 	/* advance passed terminating zero */
1594 	isp += 1;
1595 
1596 	/*
1597 	 * The following inline assembly routine caches
1598 	 * the rename script and then caches the code that
1599 	 * will do the rename.  This is necessary
1600 	 * so that we don't have any memory references during
1601 	 * the reprogramming.  We accomplish this by first
1602 	 * jumping through the code to guarantee it's cached
1603 	 * before we actually execute it.
1604 	 */
1605 	drmach_exec_script_il((drmach_rename_script_t *)isp);
1606 }
1607 
1608 static void
1609 drmach_copy_rename_end(void)
1610 {
1611 	/*
1612 	 * IMPORTANT:	This function's location MUST be located immediately
1613 	 *		following drmach_copy_rename_prog__relocatable to
1614 	 *		accurately estimate its size.  Note that this assumes
1615 	 *		the compiler keeps these functions in the order in
1616 	 *		which they appear :-o
1617 	 */
1618 }
1619 
1620 sbd_error_t *
1621 drmach_copy_rename_init(drmachid_t t_id, uint64_t t_slice_offset,
1622 	drmachid_t s_id, struct memlist *c_ml, drmachid_t *pgm_id)
1623 {
1624 	drmach_device_t	*s_mem;
1625 	drmach_device_t	*t_mem;
1626 	struct memlist	*x_ml;
1627 	uint64_t	off_mask, s_copybasepa, t_copybasepa, t_basepa;
1628 	int		len;
1629 	caddr_t		bp, wp;
1630 	pda_handle_t	ph;
1631 	sbd_error_t	*err;
1632 	drmach_copy_rename_program_t *prog;
1633 
1634 	if (!DRMACH_IS_MEM_ID(s_id))
1635 		return (drerr_new(0, ESTF_INAPPROP, NULL));
1636 	if (!DRMACH_IS_MEM_ID(t_id))
1637 		return (drerr_new(0, ESTF_INAPPROP, NULL));
1638 	s_mem = s_id;
1639 	t_mem = t_id;
1640 
1641 	/* get starting physical address of target memory */
1642 	err = drmach_mem_get_base_physaddr(t_id, &t_basepa);
1643 	if (err)
1644 		return (err);
1645 
1646 	/* calculate slice offset mask from slice size */
1647 	off_mask = mc_get_mem_alignment() - 1;
1648 
1649 	/* calculate source and target base pa */
1650 	s_copybasepa = c_ml->address;
1651 	t_copybasepa = t_basepa + ((c_ml->address & off_mask) - t_slice_offset);
1652 
1653 	/* paranoia */
1654 	ASSERT((c_ml->address & off_mask) >= t_slice_offset);
1655 
1656 	/* adjust copy memlist addresses to be relative to copy base pa */
1657 	x_ml = c_ml;
1658 	while (x_ml != NULL) {
1659 		x_ml->address -= s_copybasepa;
1660 		x_ml = x_ml->next;
1661 	}
1662 
1663 #ifdef DEBUG
1664 	{
1665 	uint64_t s_basepa, s_size, t_size;
1666 
1667 	x_ml = c_ml;
1668 	while (x_ml->next != NULL)
1669 		x_ml = x_ml->next;
1670 
1671 	DRMACH_PR("source copy span: base pa 0x%lx, end pa 0x%lx\n",
1672 		s_copybasepa,
1673 		s_copybasepa + x_ml->address + x_ml->size);
1674 
1675 	DRMACH_PR("target copy span: base pa 0x%lx, end pa 0x%lx\n",
1676 		t_copybasepa,
1677 		t_copybasepa + x_ml->address + x_ml->size);
1678 
1679 	DRMACH_PR("copy memlist (relative to copy base pa):\n");
1680 	MEMLIST_DUMP(c_ml);
1681 
1682 	err = drmach_mem_get_base_physaddr(s_id, &s_basepa);
1683 	ASSERT(err == NULL);
1684 
1685 	err = drmach_mem_get_size(s_id, &s_size);
1686 	ASSERT(err == NULL);
1687 
1688 	err = drmach_mem_get_size(t_id, &t_size);
1689 	ASSERT(err == NULL);
1690 
1691 	DRMACH_PR("current source base pa 0x%lx, size 0x%lx\n",
1692 		s_basepa, s_size);
1693 	DRMACH_PR("current target base pa 0x%lx, size 0x%lx\n",
1694 		t_basepa, t_size);
1695 
1696 	ASSERT(s_copybasepa + x_ml->address + x_ml->size <= s_basepa + s_size);
1697 	ASSERT(t_copybasepa + x_ml->address + x_ml->size <= t_basepa + t_size);
1698 	}
1699 #endif
1700 
1701 	ph = drmach_pda_open();
1702 	if (ph == NULL)
1703 		return (DRMACH_INTERNAL_ERROR());
1704 
1705 	/*
1706 	 * bp will be page aligned, since we're calling
1707 	 * kmem_zalloc() with an exact multiple of PAGESIZE.
1708 	 */
1709 	wp = bp = kmem_zalloc(PAGESIZE, KM_SLEEP);
1710 
1711 	/* allocate space for copy rename struct */
1712 	len = sizeof (drmach_copy_rename_program_t);
1713 	DRMACH_PR("prog = 0x%p, header len %d\n", wp, len);
1714 	prog = (drmach_copy_rename_program_t *)wp;
1715 	wp += (len + ecache_alignsize - 1) & ~ (ecache_alignsize - 1);
1716 
1717 	/*
1718 	 * Copy the code for the copy-rename routine into
1719 	 * a page aligned piece of memory.  We do this to guarantee
1720 	 * that we're executing within the same page and thus reduce
1721 	 * the possibility of cache collisions between different
1722 	 * pages.
1723 	 */
1724 	len = (int)((ulong_t)drmach_copy_rename_end -
1725 		    (ulong_t)drmach_copy_rename_prog__relocatable);
1726 	ASSERT(wp + len < bp + PAGESIZE);
1727 	bcopy((caddr_t)drmach_copy_rename_prog__relocatable, wp, len);
1728 
1729 	DRMACH_PR("copy-rename function 0x%p, len %d\n", wp, len);
1730 	prog->run = (void (*)())wp;
1731 	wp += (len + ecache_alignsize - 1) & ~ (ecache_alignsize - 1);
1732 
1733 	/*
1734 	 * Prepare data page that will contain script of
1735 	 * operations to perform during copy-rename.
1736 	 * Allocate temporary buffer to hold script.
1737 	 */
1738 	err = drmach_prep_rename_script(s_mem, t_mem, t_slice_offset,
1739 		wp, PAGESIZE - (wp - bp));
1740 	if (err) {
1741 		(void) drmach_copy_rename_fini(prog);
1742 		return (err);
1743 	}
1744 
1745 	DRMACH_PR("copy-rename script 0x%p, len %d\n", wp, len);
1746 	prog->data = wp;
1747 	wp += (len + ecache_alignsize - 1) & ~ (ecache_alignsize - 1);
1748 
1749 	prog->ph = ph;
1750 	prog->s_copybasepa = s_copybasepa;
1751 	prog->t_copybasepa = t_copybasepa;
1752 	prog->c_ml = c_ml;
1753 	*pgm_id = prog;
1754 
1755 	return (NULL);
1756 }
1757 
1758 sbd_error_t *
1759 drmach_copy_rename_fini(drmachid_t id)
1760 {
1761 	drmach_copy_rename_program_t	*prog = id;
1762 	sbd_error_t			*err = NULL;
1763 
1764 	if (prog->c_ml != NULL)
1765 		memlist_delete(prog->c_ml);
1766 
1767 	if (prog->ph != NULL)
1768 		pda_close(prog->ph);
1769 
1770 	if (prog->restless_mc != 0) {
1771 		cmn_err(CE_WARN, "MC did not idle; OBP Node 0x%x",
1772 			(uint_t)drmach_node_get_dnode(prog->restless_mc->node));
1773 
1774 		err = DRMACH_INTERNAL_ERROR();
1775 	}
1776 
1777 	kmem_free(prog, PAGESIZE);
1778 
1779 	return (err);
1780 }
1781 
1782 static sbd_error_t *
1783 drmach_io_new(drmach_device_t *dp)
1784 {
1785 	static sbd_error_t *drmach_io_release(drmachid_t);
1786 	static sbd_error_t *drmach_io_status(drmachid_t, drmach_status_t *);
1787 
1788 	sbd_error_t	*err;
1789 	int		 portid;
1790 
1791 	err = drmach_device_get_prop(dp, "upa-portid", &portid);
1792 	if (err == NULL) {
1793 		ASSERT(portid & 0x40);
1794 		dp->unum = portid & 1;
1795 	}
1796 
1797 	dp->cm.isa = (void *)drmach_io_new;
1798 	dp->cm.release = drmach_io_release;
1799 	dp->cm.status = drmach_io_status;
1800 
1801 	snprintf(dp->cm.name, sizeof (dp->cm.name), "%s%d", dp->type, dp->unum);
1802 
1803 	return (err);
1804 }
1805 
1806 static void
1807 drmach_iopc_op(pda_handle_t ph, drmach_iopc_op_t op)
1808 {
1809 	register int b;
1810 
1811 	for (b = 0; b < MAX_BOARDS; b++) {
1812 		int		p;
1813 		ushort_t	bda_ioc;
1814 		board_desc_t	*bdesc;
1815 
1816 		if (pda_board_present(ph, b) == 0)
1817 			continue;
1818 
1819 		bdesc = (board_desc_t *)pda_get_board_info(ph, b);
1820 		/*
1821 		 * Update PCs for IOCs.
1822 		 */
1823 		bda_ioc = bdesc->bda_ioc;
1824 		for (p = 0; p < MAX_IOCS; p++) {
1825 			u_longlong_t	idle_addr;
1826 			uchar_t		value;
1827 
1828 			if (BDA_NBL(bda_ioc, p) != BDAN_GOOD)
1829 				continue;
1830 
1831 			idle_addr = STARFIRE_BB_PC_ADDR(b, p, 1);
1832 
1833 			switch (op) {
1834 			case DO_PAUSE:
1835 				value = STARFIRE_BB_PC_PAUSE(p);
1836 				break;
1837 
1838 			case DO_IDLE:
1839 				value = STARFIRE_BB_PC_IDLE(p);
1840 				break;
1841 
1842 			case DO_UNPAUSE:
1843 				value = ldbphysio(idle_addr);
1844 				value &= ~STARFIRE_BB_PC_PAUSE(p);
1845 				break;
1846 
1847 			case DO_UNIDLE:
1848 				value = ldbphysio(idle_addr);
1849 				value &= ~STARFIRE_BB_PC_IDLE(p);
1850 				break;
1851 
1852 			default:
1853 				cmn_err(CE_PANIC,
1854 					"drmach_iopc_op: unknown op (%d)",
1855 					(int)op);
1856 				/*NOTREACHED*/
1857 			}
1858 			stbphysio(idle_addr, value);
1859 		}
1860 	}
1861 }
1862 
1863 void
1864 drmach_copy_rename(drmachid_t id)
1865 {
1866 	drmach_copy_rename_program_t	*prog = id;
1867 	uint64_t			neer;
1868 
1869 	/*
1870 	 * UPA IDLE
1871 	 * Protocol = PAUSE -> IDLE -> UNPAUSE
1872 	 * In reality since we only "idle" the IOPCs it's sufficient
1873 	 * to just issue the IDLE operation since (in theory) all IOPCs
1874 	 * in the field are PC6.  However, we'll be robust and do the
1875 	 * proper workaround protocol so that we never have to worry!
1876 	 */
1877 	drmach_iopc_op(prog->ph, DO_PAUSE);
1878 	drmach_iopc_op(prog->ph, DO_IDLE);
1879 	DELAY(100);
1880 	drmach_iopc_op(prog->ph, DO_UNPAUSE);
1881 	DELAY(100);
1882 
1883 	/* disable CE reporting */
1884 	neer = get_error_enable();
1885 	set_error_enable(neer & ~EER_CEEN);
1886 
1887 	/* run the copy/rename program */
1888 	prog->run(prog);
1889 
1890 	/* enable CE reporting */
1891 	set_error_enable(neer);
1892 
1893 	/*
1894 	 * UPA UNIDLE
1895 	 * Protocol = UNIDLE
1896 	 */
1897 	drmach_iopc_op(prog->ph, DO_UNIDLE);
1898 	DELAY(100);
1899 }
1900 
1901 /*
1902  * The counter-timer and perf-counter nodes are not being cleaned
1903  * up after a board that was present at start of day is detached.
1904  * If the board has become unconfigured with this operation, walk
1905  * the prom tree and find all counter-timer and perf-counter nodes
1906  * that have the same board number as the board that was just
1907  * unconfigured and remove them.
1908  */
1909 static sbd_error_t *
1910 drmach_remove_counter_nodes(drmachid_t id)
1911 {
1912 	int		num;
1913 	char		name[OBP_MAXDRVNAME];
1914 	pnode_t		child;
1915 	dev_info_t	*dip;
1916 	sbd_error_t	*err;
1917 	drmach_status_t	stat;
1918 	drmach_board_t	*bp;
1919 
1920 	if (!DRMACH_IS_BOARD_ID(id)) {
1921 		return (drerr_new(0, ESTF_INAPPROP, NULL));
1922 	}
1923 
1924 	if ((err = drmach_board_status(id, &stat)) != NULL) {
1925 		return (err);
1926 	}
1927 
1928 	/*
1929 	 * Only clean up the counter-timer and perf-counter
1930 	 * nodes when the entire board is unconfigured.
1931 	 */
1932 	if (stat.configured) {
1933 		return (NULL);
1934 	}
1935 
1936 	bp = (drmach_board_t *)id;
1937 
1938 	err = NULL;
1939 
1940 	for (child = prom_childnode(prom_rootnode()); child != OBP_NONODE;
1941 	    child = prom_nextnode(child)) {
1942 
1943 		if (prom_getprop(child, OBP_BOARDNUM, (caddr_t)&num) == -1) {
1944 			continue;
1945 		}
1946 
1947 		if (bp->bnum != num) {
1948 			continue;
1949 		}
1950 
1951 		if (prom_getprop(child, OBP_NAME, (caddr_t)name) == -1) {
1952 			continue;
1953 		}
1954 
1955 		if (strncmp(name, MISC_COUNTER_TIMER_DEVNAME, OBP_MAXDRVNAME) &&
1956 		    strncmp(name, MISC_PERF_COUNTER_DEVNAME, OBP_MAXDRVNAME)) {
1957 				continue;
1958 		}
1959 
1960 		/* Root node doesn't have to be held */
1961 		dip = e_ddi_nodeid_to_dip(child);
1962 
1963 		/*
1964 		 * If the node is only in the OBP tree, then
1965 		 * we don't have to remove it.
1966 		 */
1967 		if (dip) {
1968 			dev_info_t *fdip = NULL;
1969 
1970 			DRMACH_PR("removing %s devinfo node\n", name);
1971 
1972 			e_ddi_branch_hold(dip);
1973 			ddi_release_devi(dip); /* held in e_ddi_nodeid_to_dip */
1974 
1975 			if (e_ddi_branch_destroy(dip, &fdip, 0)) {
1976 				char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1977 
1978 				/*
1979 				 * If non-NULL, fdip is held and must be
1980 				 * released.
1981 				 */
1982 				if (fdip != NULL) {
1983 					(void) ddi_pathname(fdip, path);
1984 					ddi_release_devi(fdip);
1985 				} else {
1986 					(void) ddi_pathname(dip, path);
1987 				}
1988 
1989 				err = drerr_new(1, ESTF_DRVFAIL, path);
1990 				kmem_free(path, MAXPATHLEN);
1991 				e_ddi_branch_rele(dip);
1992 				break;
1993 			}
1994 		}
1995 	}
1996 
1997 	return (err);
1998 }
1999 
2000 /*ARGSUSED*/
2001 sbd_error_t *
2002 drmach_pre_op(int cmd, drmachid_t id, drmach_opts_t *opts)
2003 {
2004 	/* allow status and ncm operations to always succeed */
2005 	if ((cmd == SBD_CMD_STATUS) || (cmd == SBD_CMD_GETNCM)) {
2006 		return (NULL);
2007 	}
2008 
2009 	/* check all other commands for the required option string */
2010 	if ((opts->size > 0) && (opts->copts != NULL)) {
2011 
2012 		DRMACH_PR("platform options: %s\n", opts->copts);
2013 
2014 		if (strstr(opts->copts, "xfdr") != NULL) {
2015 			return (NULL);
2016 		}
2017 	}
2018 
2019 	return (drerr_new(0, ESTF_SUPPORT, NULL));
2020 }
2021 
2022 /*ARGSUSED*/
2023 sbd_error_t *
2024 drmach_post_op(int cmd, drmachid_t id, drmach_opts_t *opts)
2025 {
2026 	sbd_error_t	*err = NULL;
2027 
2028 	switch (cmd) {
2029 	case SBD_CMD_UNCONFIGURE:
2030 
2031 		err = drmach_remove_counter_nodes(id);
2032 		break;
2033 
2034 	case SBD_CMD_CONFIGURE:
2035 	case SBD_CMD_DISCONNECT:
2036 	case SBD_CMD_CONNECT:
2037 	case SBD_CMD_GETNCM:
2038 	case SBD_CMD_STATUS:
2039 		break;
2040 
2041 	default:
2042 		break;
2043 	}
2044 
2045 	return (err);
2046 }
2047 
2048 sbd_error_t *
2049 drmach_board_assign(int bnum, drmachid_t *id)
2050 {
2051 	sbd_error_t	*err;
2052 
2053 	if (!drmach_initialized && drmach_init() == -1) {
2054 		err = DRMACH_INTERNAL_ERROR();
2055 	} else if (drmach_array_get(drmach_boards, bnum, id) == -1) {
2056 		err = drerr_new(1, ESTF_BNUM, "%d", bnum);
2057 	} else if (*id != NULL) {
2058 		err = NULL;
2059 	} else {
2060 		drmach_board_t	*bp;
2061 
2062 		*id  = (drmachid_t)drmach_board_new(bnum);
2063 		bp = *id;
2064 		bp->assigned = 1;
2065 		err = NULL;
2066 	}
2067 
2068 	return (err);
2069 }
2070 
2071 static int
2072 drmach_attach_board(void *arg)
2073 {
2074 	drmach_board_t	*obj = (drmach_board_t *)arg;
2075 	cpuset_t	cset;
2076 	int		retval;
2077 
2078 	/*
2079 	 * OBP disables traps during the board probe.
2080 	 * So, in order to prevent cross-call/cross-trap timeouts,
2081 	 * and thus panics, we effectively block anybody from
2082 	 * issuing xc's/xt's by doing a promsafe_xc_attention.
2083 	 * In the previous version of Starfire DR (2.6), a timeout
2084 	 * suspension mechanism was implemented in the send-mondo
2085 	 * assembly.  That mechanism is unnecessary with the
2086 	 * existence of xc_attention/xc_dismissed.
2087 	 */
2088 	cset = cpu_ready_set;
2089 	promsafe_xc_attention(cset);
2090 
2091 	retval = prom_starfire_add_brd(obj->connect_cpuid);
2092 
2093 	xc_dismissed(cset);
2094 
2095 	return (retval);
2096 }
2097 
2098 sbd_error_t *
2099 drmach_board_connect(drmachid_t id, drmach_opts_t *opts)
2100 {
2101 	drmach_board_t	*obj = (drmach_board_t *)id;
2102 	int		retval;
2103 	sbd_error_t	*err;
2104 	char		*cptr, *copts;
2105 
2106 	if (!DRMACH_IS_BOARD_ID(id))
2107 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2108 
2109 	if (opts->size > 0)
2110 		copts = opts->copts;
2111 
2112 	if ((cptr = strstr(copts, "cpuid=")) != NULL) {
2113 		int cpuid;
2114 
2115 		cptr += strlen("cpuid=");
2116 		cpuid = stoi(&cptr);
2117 
2118 		if (DRMACH_CPUID2BNUM(cpuid) == obj->bnum) {
2119 			obj->connect_cpuid = cpuid;
2120 			obj->assigned = 1;
2121 		} else
2122 			return (drerr_new(1, ESTF_SETCPUVAL, "%d", cpuid));
2123 	} else {
2124 		/* cpuid was not specified */
2125 		obj->connect_cpuid = -1;
2126 	}
2127 
2128 	if (obj->connect_cpuid == -1) {
2129 		err =  drerr_new(1, ESTF_NOCPUID, obj->cm.name);
2130 		return (err);
2131 	}
2132 
2133 	cmn_err(CE_CONT, "DRMACH: PROM attach %s CPU %d\n",
2134 		obj->cm.name, obj->connect_cpuid);
2135 
2136 	retval = prom_tree_update(drmach_attach_board, obj);
2137 
2138 	if (retval == 0)
2139 		err = NULL;
2140 	else {
2141 		cmn_err(CE_WARN, "prom error: prom_starfire_add_brd(%d) "
2142 			"returned %d", obj->connect_cpuid, retval);
2143 
2144 		err = drerr_new(1, ESTF_PROBE, obj->cm.name);
2145 	}
2146 
2147 	obj->connect_cpuid = -1;
2148 
2149 	return (err);
2150 }
2151 
2152 /*ARGSUSED*/
2153 sbd_error_t *
2154 drmach_board_disconnect(drmachid_t id, drmach_opts_t *opts)
2155 {
2156 	drmach_board_t		*bp;
2157 	int			rv;
2158 	int			d_idx;	/* device index */
2159 	drmachid_t		d_id;	/* device ID */
2160 	sbd_error_t		*err;
2161 
2162 	if (!DRMACH_IS_BOARD_ID(id))
2163 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2164 
2165 	bp = id;
2166 
2167 	/*
2168 	 * We need to make sure all of the board's device nodes
2169 	 * have been removed from the Solaris device tree before
2170 	 * continuing with the disconnect. Otherwise, we could
2171 	 * disconnect the board and remove the OBP device tree
2172 	 * nodes with Solaris device tree nodes remaining.
2173 	 *
2174 	 * On Starfire, Solaris device tree nodes are deleted
2175 	 * during unconfigure by drmach_unconfigure(). It's
2176 	 * necessary to do this here because drmach_unconfigure()
2177 	 * failures are not handled during unconfigure.
2178 	 */
2179 	if (bp->devices) {
2180 		rv = drmach_array_first(bp->devices, &d_idx, &d_id);
2181 		while (rv == 0) {
2182 			err = drmach_unconfigure(d_id, DRMACH_DEVI_REMOVE);
2183 			if (err)
2184 				return (err);
2185 
2186 			rv = drmach_array_next(bp->devices, &d_idx, &d_id);
2187 		}
2188 	}
2189 
2190 	/*
2191 	 * Starfire board Solaris device tree counter nodes,
2192 	 * which are only present on start-of-day boards, are
2193 	 * removed in the dr_post_op() code flow after the
2194 	 * board is unconfigured. We call the counter node
2195 	 * removal function here because unconfigure errors
2196 	 * can cause the dr_post_op() function to be skipped
2197 	 * after an unconfigure operation even though all of
2198 	 * the board's devices have been transitioned to the
2199 	 * unconfigured state.
2200 	 */
2201 	err = drmach_remove_counter_nodes(id);
2202 	if (err)
2203 		return (err);
2204 
2205 	return (NULL);
2206 }
2207 
2208 static int
2209 drmach_board_find_devices_cb(drmach_node_walk_args_t *args)
2210 {
2211 	drmach_node_t			*node = args->node;
2212 	drmach_board_cb_data_t		*data = args->data;
2213 	drmach_board_t			*obj = data->obj;
2214 
2215 	int		 rv;
2216 	int		 bnum;
2217 	drmach_device_t	*device;
2218 
2219 	rv = drmach_node_get_prop(node, OBP_BOARDNUM, &bnum);
2220 	if (rv) {
2221 		/*
2222 		 * if the node does not have a board# property, then
2223 		 * by that information alone it is known that drmach
2224 		 * is not interested in it.
2225 		 */
2226 		return (0);
2227 	} else if (bnum != obj->bnum)
2228 		return (0);
2229 
2230 	/*
2231 	 * Create a device data structure from this node data.
2232 	 * The call may yield nothing if the node is not of interest
2233 	 * to drmach.
2234 	 */
2235 	data->err = drmach_device_new(node, obj, &device);
2236 	if (data->err)
2237 		return (-1);
2238 	else if (device == NULL) {
2239 		/*
2240 		 * drmach_device_new examined the node we passed in
2241 		 * and determined that it was one not of interest to
2242 		 * drmach.  So, it is skipped.
2243 		 */
2244 		return (0);
2245 	}
2246 
2247 	rv = drmach_array_set(obj->devices, data->ndevs++, device);
2248 	if (rv) {
2249 		drmach_device_dispose(device);
2250 		data->err = DRMACH_INTERNAL_ERROR();
2251 		return (-1);
2252 	}
2253 
2254 	data->err = (*data->found)(data->a, device->type, device->unum, device);
2255 	return (data->err == NULL ? 0 : -1);
2256 }
2257 
2258 sbd_error_t *
2259 drmach_board_find_devices(drmachid_t id, void *a,
2260 	sbd_error_t *(*found)(void *a, const char *, int, drmachid_t))
2261 {
2262 	extern int		 plat_max_cpu_units_per_board();
2263 	extern int		 plat_max_mem_units_per_board();
2264 	extern int		 plat_max_io_units_per_board();
2265 
2266 	drmach_board_t		*obj = (drmach_board_t *)id;
2267 	sbd_error_t		*err;
2268 	int			 max_devices;
2269 	int			 rv;
2270 	drmach_board_cb_data_t	data;
2271 
2272 	max_devices  = plat_max_cpu_units_per_board();
2273 	max_devices += plat_max_mem_units_per_board();
2274 	max_devices += plat_max_io_units_per_board();
2275 
2276 	obj->devices = drmach_array_new(0, max_devices);
2277 
2278 	data.obj = obj;
2279 	data.ndevs = 0;
2280 	data.found = found;
2281 	data.a = a;
2282 	data.err = NULL;
2283 
2284 	rv = drmach_node_walk(obj->tree, &data, drmach_board_find_devices_cb);
2285 	if (rv == 0)
2286 		err = NULL;
2287 	else {
2288 		drmach_array_dispose(obj->devices, drmach_device_dispose);
2289 		obj->devices = NULL;
2290 
2291 		if (data.err)
2292 			err = data.err;
2293 		else
2294 			err = DRMACH_INTERNAL_ERROR();
2295 	}
2296 
2297 	return (err);
2298 }
2299 
2300 int
2301 drmach_board_lookup(int bnum, drmachid_t *id)
2302 {
2303 	int	rv = 0;
2304 
2305 	if (!drmach_initialized && drmach_init() == -1) {
2306 		*id = 0;
2307 		rv = -1;
2308 	} else if (drmach_array_get(drmach_boards, bnum, id)) {
2309 		*id = 0;
2310 		rv = -1;
2311 	}
2312 	return (rv);
2313 }
2314 
2315 sbd_error_t *
2316 drmach_board_name(int bnum, char *buf, int buflen)
2317 {
2318 	snprintf(buf, buflen, "SB%d", bnum);
2319 	return (NULL);
2320 }
2321 
2322 sbd_error_t *
2323 drmach_board_poweroff(drmachid_t id)
2324 {
2325 	drmach_board_t	*bp;
2326 	sbd_error_t	*err;
2327 	drmach_status_t	 stat;
2328 
2329 	if (!DRMACH_IS_BOARD_ID(id))
2330 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2331 	bp = id;
2332 
2333 	err = drmach_board_status(id, &stat);
2334 	if (err)
2335 		return (err);
2336 	else if (stat.configured || stat.busy)
2337 		return (drerr_new(0, ESTF_CONFIGBUSY, bp->cm.name));
2338 	else {
2339 		/* board power off is essentially a noop for Starfire */
2340 		bp->powered = 0;
2341 		return (NULL);
2342 	}
2343 	/*NOTREACHED*/
2344 }
2345 
2346 sbd_error_t *
2347 drmach_board_poweron(drmachid_t id)
2348 {
2349 	drmach_board_t	*bp;
2350 
2351 	if (!DRMACH_IS_BOARD_ID(id))
2352 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2353 	bp = id;
2354 
2355 	/* board power on is essentially a noop for Starfire */
2356 	bp->powered = 1;
2357 
2358 	return (NULL);
2359 }
2360 
2361 static sbd_error_t *
2362 drmach_board_release(drmachid_t id)
2363 {
2364 	if (!DRMACH_IS_BOARD_ID(id))
2365 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2366 	return (NULL);
2367 }
2368 
2369 /*ARGSUSED*/
2370 sbd_error_t *
2371 drmach_board_test(drmachid_t id, drmach_opts_t *opts, int force)
2372 {
2373 	return (NULL);
2374 }
2375 
2376 sbd_error_t *
2377 drmach_board_unassign(drmachid_t id)
2378 {
2379 	drmach_board_t	*bp;
2380 	sbd_error_t	*err;
2381 	drmach_status_t	 stat;
2382 
2383 	if (!DRMACH_IS_BOARD_ID(id))
2384 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2385 	bp = id;
2386 
2387 	err = drmach_board_status(id, &stat);
2388 	if (err)
2389 		return (err);
2390 	else if (stat.configured || stat.busy)
2391 		return (drerr_new(0, ESTF_CONFIGBUSY, bp->cm.name));
2392 	else if (drmach_array_set(drmach_boards, bp->bnum, 0) != 0)
2393 		return (DRMACH_INTERNAL_ERROR());
2394 	else {
2395 		drmach_board_dispose(bp);
2396 		return (NULL);
2397 	}
2398 	/*NOTREACHED*/
2399 }
2400 
2401 static sbd_error_t *
2402 drmach_cpu_new(drmach_device_t *dp)
2403 {
2404 	static sbd_error_t *drmach_cpu_release(drmachid_t);
2405 	static sbd_error_t *drmach_cpu_status(drmachid_t, drmach_status_t *);
2406 
2407 	sbd_error_t	*err;
2408 	int		 portid;
2409 
2410 	err = drmach_device_get_prop(dp, "upa-portid", &portid);
2411 	if (err == NULL)
2412 		dp->unum = portid & 3;
2413 
2414 	dp->cm.isa = (void *)drmach_cpu_new;
2415 	dp->cm.release = drmach_cpu_release;
2416 	dp->cm.status = drmach_cpu_status;
2417 
2418 	snprintf(dp->cm.name, sizeof (dp->cm.name), "%s%d", dp->type, dp->unum);
2419 
2420 	return (err);
2421 }
2422 
2423 /*
2424  * drmach_cpu_obp_detach()
2425  *  This requires two steps, first, we must put the cpuid into the OBP
2426  *  idle loop (Idle in Program) state.  Then we call OBP to place the CPU
2427  *  into the "Detached" state, which does any special processing to
2428  *  actually detach the cpu, such as flushing ecache, and also ensures
2429  *  that a subsequent breakpoint won't restart the cpu (if it was just in
2430  *  Idle in Program state).
2431  */
2432 static void
2433 drmach_cpu_obp_detach(int cpuid)
2434 {
2435 	/*
2436 	 * Cpu may not be under OBP's control. Eg, if cpu exited to download
2437 	 * helper on a prior attach.
2438 	 */
2439 	if (CPU_SGN_EXISTS(cpuid) &&
2440 			!SGN_CPU_IS_OS(cpuid) &&
2441 			!SGN_CPU_IS_OBP(cpuid)) {
2442 		cmn_err(CE_WARN,
2443 			"unexpected signature (0x%x) for cpu %d",
2444 			get_cpu_sgn(cpuid), cpuid);
2445 	}
2446 
2447 	/*
2448 	 * Now we place the CPU into the "Detached" idle loop in OBP.
2449 	 * This is so that the CPU won't be restarted if we break into
2450 	 * OBP with a breakpoint or BREAK key from the console, and also
2451 	 * if we need to do any special processing, such as flushing the
2452 	 * cpu's ecache, disabling interrupts (by turning of the ET bit in
2453 	 * the PSR) and/or spinning in BBSRAM rather than global memory.
2454 	 */
2455 	DRMACH_PR("prom_starfire_rm_cpu(%d)\n", cpuid);
2456 	prom_starfire_rm_cpu(cpuid);
2457 }
2458 
2459 /*
2460  * drmach_cpu_obp_is_detached() returns TRUE if the cpu sigblock signature state
2461  * is SIGBST_DETACHED; otherwise it returns FALSE. This routine should only
2462  * be called after we have asked OBP to detach the CPU. It should NOT be
2463  * called as a check during any other flow.
2464  */
2465 static int
2466 drmach_cpu_obp_is_detached(int cpuid)
2467 {
2468 	if (!CPU_SGN_EXISTS(cpuid) ||
2469 		(SGN_CPU_IS_OS(cpuid) && SGN_CPU_STATE_IS_DETACHED(cpuid)))
2470 		return (1);
2471 	else
2472 		return (0);
2473 }
2474 
2475 static int
2476 drmach_cpu_start(struct cpu *cp)
2477 {
2478 	int		cpuid = cp->cpu_id;
2479 	int		ntries = drmach_cpu_ntries;
2480 	extern void	restart_other_cpu(int);
2481 
2482 	ASSERT(MUTEX_HELD(&cpu_lock));
2483 	ASSERT(cpunodes[cpuid].nodeid != (pnode_t)0);
2484 
2485 	cp->cpu_flags &= ~CPU_POWEROFF;
2486 
2487 	/*
2488 	 * NOTE: restart_other_cpu pauses cpus during the
2489 	 *	 slave cpu start.  This helps to quiesce the
2490 	 *	 bus traffic a bit which makes the tick sync
2491 	 *	 routine in the prom more robust.
2492 	 */
2493 	DRMACH_PR("COLD START for cpu (%d)\n", cpuid);
2494 
2495 	prom_starfire_add_cpu(cpuid);
2496 
2497 	restart_other_cpu(cpuid);
2498 
2499 	/*
2500 	 * Wait for the cpu to reach its idle thread before
2501 	 * we zap him with a request to blow away the mappings
2502 	 * he (might) have for the drmach_shutdown_asm code
2503 	 * he may have executed on unconfigure.
2504 	 */
2505 	while ((cp->cpu_thread != cp->cpu_idle_thread) && (ntries > 0)) {
2506 		DELAY(drmach_cpu_delay);
2507 		ntries--;
2508 	}
2509 
2510 	DRMACH_PR("waited %d out of %d loops for cpu %d\n",
2511 		drmach_cpu_ntries - ntries, drmach_cpu_ntries, cpuid);
2512 
2513 	xt_one(cpuid, vtag_flushpage_tl1,
2514 		(uint64_t)drmach_shutdown_va, (uint64_t)KCONTEXT);
2515 
2516 	return (0);
2517 }
2518 
2519 /*
2520  * A detaching CPU is xcalled with an xtrap to drmach_cpu_stop_self() after
2521  * it has been offlined. The function of this routine is to get the cpu
2522  * spinning in a safe place. The requirement is that the system will not
2523  * reference anything on the detaching board (memory and i/o is detached
2524  * elsewhere) and that the CPU not reference anything on any other board
2525  * in the system.  This isolation is required during and after the writes
2526  * to the domain masks to remove the board from the domain.
2527  *
2528  * To accomplish this isolation the following is done:
2529  *	1) Create a locked mapping to a location in BBSRAM where
2530  *	   the cpu will execute.
2531  *	2) Copy the target function (drmach_shutdown_asm) in which
2532  *	   the cpu will execute into BBSRAM.
2533  *	3) Jump into function with BBSRAM.
2534  *	   Function will:
2535  *	   3.1) Flush its Ecache (displacement).
2536  *	   3.2) Flush its Dcache with HW mechanism.
2537  *	   3.3) Flush its Icache with HW mechanism.
2538  *	   3.4) Flush all valid and _unlocked_ D-TLB entries.
2539  *	   3.5) Flush all valid and _unlocked_ I-TLB entries.
2540  *	   3.6) Clear xt_mb to signal completion. Note: cache line is
2541  *		recovered by drmach_cpu_poweroff().
2542  *	4) Jump into a tight loop.
2543  */
2544 #define	DRMACH_BBSRAM_OFFSET	0x1000
2545 
2546 static void
2547 drmach_cpu_stop_self(void)
2548 {
2549 	int		cpuid = (int)CPU->cpu_id;
2550 	tte_t		tte;
2551 	volatile uint_t	*src, *dst;
2552 	uint_t		funclen;
2553 	uint64_t	bbsram_pa, bbsram_offset;
2554 	uint_t		bbsram_pfn;
2555 	uint64_t	bbsram_addr;
2556 	void		(*bbsram_func)(uint64_t);
2557 	extern void	drmach_shutdown_asm(uint64_t);
2558 	extern void	drmach_shutdown_asm_end(void);
2559 
2560 	funclen = (uint_t)drmach_shutdown_asm_end - (uint_t)drmach_shutdown_asm;
2561 	ASSERT(funclen <= MMU_PAGESIZE);
2562 	/*
2563 	 * We'll start from the 0th's base.
2564 	 */
2565 	bbsram_pa = STARFIRE_UPAID2UPS(cpuid) | STARFIRE_PSI_BASE;
2566 	bbsram_offset = bbsram_pa | 0xfe0ULL;
2567 	bbsram_pa += ldphysio(bbsram_offset) + DRMACH_BBSRAM_OFFSET;
2568 
2569 	bbsram_pfn = (uint_t)(bbsram_pa >> MMU_PAGESHIFT);
2570 
2571 	bbsram_addr = (uint64_t)drmach_shutdown_va;
2572 	drmach_shutdown_asm_mbox->estack = bbsram_addr + (uint64_t)funclen;
2573 
2574 	tte.tte_inthi = TTE_VALID_INT | TTE_SZ_INT(TTE8K) |
2575 			TTE_PFN_INTHI(bbsram_pfn);
2576 	tte.tte_intlo = TTE_PFN_INTLO(bbsram_pfn) |
2577 			TTE_HWWR_INT | TTE_PRIV_INT | TTE_LCK_INT;
2578 	sfmmu_dtlb_ld(drmach_shutdown_va, KCONTEXT, &tte);	/* load dtlb */
2579 	sfmmu_itlb_ld(drmach_shutdown_va, KCONTEXT, &tte);	/* load itlb */
2580 
2581 	for (src = (uint_t *)drmach_shutdown_asm, dst = (uint_t *)bbsram_addr;
2582 		src < (uint_t *)drmach_shutdown_asm_end; src++, dst++)
2583 		*dst = *src;
2584 
2585 	bbsram_func = (void (*)())bbsram_addr;
2586 	drmach_shutdown_asm_mbox->flushaddr = ecache_flushaddr;
2587 	drmach_shutdown_asm_mbox->size = (cpunodes[cpuid].ecache_size << 1);
2588 	drmach_shutdown_asm_mbox->linesize = cpunodes[cpuid].ecache_linesize;
2589 	drmach_shutdown_asm_mbox->physaddr
2590 				    = va_to_pa((void *)&drmach_xt_mb[cpuid]);
2591 
2592 	/*
2593 	 * Signal to drmach_cpu_poweroff() is via drmach_xt_mb cleared
2594 	 * by asm code
2595 	 */
2596 
2597 	(*bbsram_func)(va_to_pa((void *)drmach_shutdown_asm_mbox));
2598 }
2599 
2600 static void
2601 drmach_cpu_shutdown_self(void)
2602 {
2603 	cpu_t		*cp = CPU;
2604 	int		cpuid = cp->cpu_id;
2605 	extern void	flush_windows(void);
2606 
2607 	flush_windows();
2608 
2609 	(void) spl8();
2610 
2611 	ASSERT(cp->cpu_intr_actv == 0);
2612 	ASSERT(cp->cpu_thread == cp->cpu_idle_thread ||
2613 	    cp->cpu_thread == cp->cpu_startup_thread);
2614 
2615 	cp->cpu_flags = CPU_OFFLINE | CPU_QUIESCED | CPU_POWEROFF;
2616 
2617 	drmach_cpu_stop_self();
2618 
2619 	cmn_err(CE_PANIC, "CPU %d FAILED TO SHUTDOWN", cpuid);
2620 }
2621 
2622 /* a helper routine to keep the math in one place */
2623 static processorid_t
2624 drmach_cpu_calc_id(drmach_device_t *dp)
2625 {
2626 	return (dp->bp->bnum * MAX_CPU_UNITS_PER_BOARD + dp->unum);
2627 }
2628 
2629 /*
2630  * Move bootproc (SIGBCPU) to another cpu.  If dst_cpu is NULL, a
2631  * destination cpu is chosen from the set of cpus not located on the
2632  * same board as the current bootproc cpu.
2633  */
2634 static sbd_error_t *
2635 drmach_cpu_juggle_bootproc(drmach_device_t *dst_cpu)
2636 {
2637 	processorid_t	 cpuid;
2638 	struct cpu	*cp;
2639 	sbd_error_t	*err;
2640 	int		 rv;
2641 
2642 	ASSERT(MUTEX_HELD(&cpu_lock));
2643 
2644 	/* dst_cpu is NULL when target cpu is unspecified. So, pick one. */
2645 	if (dst_cpu == NULL) {
2646 		int avoid_board = DRMACH_CPUID2BNUM(SIGBCPU->cpu_id);
2647 		int max_cpuid = MAX_BOARDS * MAX_CPU_UNITS_PER_BOARD;
2648 
2649 		for (cpuid = 0; cpuid < max_cpuid; cpuid++)
2650 			if (DRMACH_CPUID2BNUM(cpuid) != avoid_board) {
2651 				cp = cpu_get(cpuid);
2652 				if (cp != NULL && cpu_is_online(cp))
2653 					break;
2654 			}
2655 
2656 		if (cpuid == max_cpuid) {
2657 			err = drerr_new(1, ESTF_JUGGLE, NULL);
2658 			return (err);
2659 		}
2660 
2661 		/* else, cp points to the selected target cpu */
2662 	} else {
2663 		cpuid = drmach_cpu_calc_id(dst_cpu);
2664 
2665 		if ((cp = cpu_get(cpuid)) == NULL) {
2666 			err = drerr_new(1, ESTF_NODEV, "%s::%s",
2667 				dst_cpu->bp->cm.name, dst_cpu->cm.name);
2668 			return (err);
2669 		}
2670 
2671 		if (cpuid == SIGBCPU->cpu_id) {
2672 			cmn_err(CE_WARN,
2673 				"SIGBCPU(%d) same as new selection(%d)",
2674 				SIGBCPU->cpu_id, cpuid);
2675 
2676 			/* technically not an error, but a no-op */
2677 			return (NULL);
2678 		}
2679 	}
2680 
2681 	cmn_err(CE_NOTE, "?relocating SIGBCPU from %d to %d",
2682 		SIGBCPU->cpu_id, cpuid);
2683 
2684 	DRMACH_PR("moving SIGBCPU to CPU %d\n", cpuid);
2685 
2686 	/*
2687 	 * Tell OBP to initialize cvc-offset field of new CPU0
2688 	 * so that it's in sync with OBP and cvc_server
2689 	 */
2690 	prom_starfire_init_console(cpuid);
2691 
2692 	/*
2693 	 * Assign cvc to new cpu0's bbsram for I/O.  This has to be
2694 	 * done BEFORE cpu0 is moved via obp, since this logic
2695 	 * will cause obp_helper to switch to a different bbsram for
2696 	 * cvc I/O.  We don't want cvc writing to a buffer from which
2697 	 * nobody will pick up the data!
2698 	 */
2699 	cvc_assign_iocpu(cpuid);
2700 
2701 	rv = prom_starfire_move_cpu0(cpuid);
2702 
2703 	if (rv == 0) {
2704 		SIGBCPU = cp;
2705 
2706 		DRMACH_PR("successfully juggled to CPU %d\n", cpuid);
2707 		return (NULL);
2708 	} else {
2709 		DRMACH_PR("prom error: prom_starfire_move_cpu0(%d) "
2710 			"returned %d\n", cpuid, rv);
2711 
2712 		/*
2713 		 * The move failed, hopefully obp_helper is still back
2714 		 * at the old bootproc.  Move cvc back there.
2715 		 */
2716 		cvc_assign_iocpu(SIGBCPU->cpu_id);
2717 
2718 
2719 		err = drerr_new(1, ESTF_MOVESIGB, "CPU %d", cpuid);
2720 		return (err);
2721 	}
2722 	/*NOTREACHED*/
2723 }
2724 
2725 static sbd_error_t *
2726 drmach_cpu_release(drmachid_t id)
2727 {
2728 	drmach_device_t	*dp;
2729 	processorid_t	 cpuid;
2730 	struct cpu	*cp;
2731 	sbd_error_t	*err;
2732 
2733 	if (!DRMACH_IS_CPU_ID(id))
2734 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2735 	dp = id;
2736 	cpuid = drmach_cpu_calc_id(dp);
2737 
2738 	ASSERT(MUTEX_HELD(&cpu_lock));
2739 
2740 	cp = cpu_get(cpuid);
2741 	if (cp == NULL)
2742 		err = DRMACH_INTERNAL_ERROR();
2743 	else if (SIGBCPU->cpu_id == cp->cpu_id)
2744 		err = drmach_cpu_juggle_bootproc(NULL);
2745 	else
2746 		err = NULL;
2747 
2748 	return (err);
2749 }
2750 
2751 static sbd_error_t *
2752 drmach_cpu_status(drmachid_t id, drmach_status_t *stat)
2753 {
2754 	drmach_device_t *dp;
2755 
2756 	ASSERT(DRMACH_IS_CPU_ID(id));
2757 	dp = id;
2758 
2759 	stat->assigned = dp->bp->assigned;
2760 	stat->powered = dp->bp->powered;
2761 	mutex_enter(&cpu_lock);
2762 	stat->configured = (cpu_get(drmach_cpu_calc_id(dp)) != NULL);
2763 	mutex_exit(&cpu_lock);
2764 	stat->busy = dp->busy;
2765 	strncpy(stat->type, dp->type, sizeof (stat->type));
2766 	stat->info[0] = '\0';
2767 
2768 	return (NULL);
2769 }
2770 
2771 sbd_error_t *
2772 drmach_cpu_disconnect(drmachid_t id)
2773 {
2774 	drmach_device_t	*cpu;
2775 	int		 cpuid;
2776 	int		 ntries;
2777 	int		 p;
2778 	u_longlong_t	 pc_addr;
2779 	uchar_t		 rvalue;
2780 
2781 	if (!DRMACH_IS_CPU_ID(id))
2782 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2783 	cpu = id;
2784 
2785 	cpuid = drmach_cpu_calc_id(cpu);
2786 	if (SIGBCPU->cpu_id == cpuid) {
2787 		/* this cpu is SIGBCPU, can't disconnect */
2788 		return (drerr_new(1, ESTF_HASSIGB, "%s::%s",
2789 				cpu->bp->cm.name, cpu->cm.name));
2790 	}
2791 
2792 	/*
2793 	 * Make sure SIGBST_DETACHED is set before
2794 	 * mapping out the sig block.
2795 	 */
2796 	ntries = drmach_cpu_ntries;
2797 	while (!drmach_cpu_obp_is_detached(cpuid) && ntries) {
2798 		DELAY(drmach_cpu_delay);
2799 		ntries--;
2800 	}
2801 	if (!drmach_cpu_obp_is_detached(cpuid)) {
2802 		cmn_err(CE_WARN, "failed to mark cpu %d detached in sigblock",
2803 			cpuid);
2804 	}
2805 
2806 	/* map out signature block */
2807 	if (CPU_SGN_EXISTS(cpuid)) {
2808 		CPU_SGN_MAPOUT(cpuid);
2809 	}
2810 
2811 	/*
2812 	 * We now PC IDLE the processor to guarantee we
2813 	 * stop any transactions from coming from it.
2814 	 */
2815 	p = cpu->unum & 1;
2816 	pc_addr = STARFIRE_BB_PC_ADDR(cpu->bp->bnum, cpu->unum, 0);
2817 
2818 	DRMACH_PR("PC idle cpu %d (addr = 0x%llx, port = %d, p = %d)",
2819 		drmach_cpu_calc_id(cpu), pc_addr, cpu->unum, p);
2820 
2821 	rvalue = ldbphysio(pc_addr);
2822 	rvalue |= STARFIRE_BB_PC_IDLE(p);
2823 	stbphysio(pc_addr, rvalue);
2824 	DELAY(50000);
2825 
2826 	return (NULL);
2827 }
2828 
2829 sbd_error_t *
2830 drmach_cpu_get_id(drmachid_t id, processorid_t *cpuid)
2831 {
2832 	drmach_device_t *cpu;
2833 
2834 	if (!DRMACH_IS_CPU_ID(id))
2835 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2836 	cpu = id;
2837 
2838 	*cpuid = drmach_cpu_calc_id(cpu);
2839 	return (NULL);
2840 }
2841 
2842 sbd_error_t *
2843 drmach_cpu_get_impl(drmachid_t id, int *ip)
2844 {
2845 	drmach_device_t *cpu;
2846 	int		impl;
2847 
2848 	if (!DRMACH_IS_CPU_ID(id))
2849 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2850 
2851 	cpu = id;
2852 
2853 	if (drmach_node_get_prop(cpu->node, "implementation#", &impl) == -1) {
2854 		return (DRMACH_INTERNAL_ERROR());
2855 	}
2856 
2857 	*ip = impl;
2858 
2859 	return (NULL);
2860 }
2861 
2862 void
2863 drmach_cpu_flush_ecache_sync(void)
2864 {
2865 	ASSERT(curthread->t_bound_cpu == CPU);
2866 
2867 	/*
2868 	 * Now let's flush our ecache thereby removing all references
2869 	 * to the target (detaching) memory from all ecache's in
2870 	 * system.
2871 	 */
2872 	cpu_flush_ecache();
2873 
2874 	/*
2875 	 * Delay 100 usec out of paranoia to insure everything
2876 	 * (hardware queues) has drained before we start reprogramming
2877 	 * the hardware.
2878 	 */
2879 	DELAY(100);
2880 }
2881 
2882 sbd_error_t *
2883 drmach_get_dip(drmachid_t id, dev_info_t **dip)
2884 {
2885 	drmach_device_t	*dp;
2886 
2887 	if (!DRMACH_IS_DEVICE_ID(id))
2888 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2889 	dp = id;
2890 
2891 	*dip = drmach_node_get_dip(dp->node);
2892 	return (NULL);
2893 }
2894 
2895 sbd_error_t *
2896 drmach_io_is_attached(drmachid_t id, int *yes)
2897 {
2898 	drmach_device_t *dp;
2899 	dev_info_t	*dip;
2900 	int		state;
2901 
2902 	if (!DRMACH_IS_IO_ID(id))
2903 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2904 	dp = id;
2905 
2906 	dip = drmach_node_get_dip(dp->node);
2907 	if (dip == NULL) {
2908 		*yes = 0;
2909 		return (NULL);
2910 	}
2911 
2912 	state = ddi_get_devstate(dip);
2913 	*yes = (i_ddi_devi_attached(dip) || (state == DDI_DEVSTATE_UP));
2914 
2915 	return (NULL);
2916 }
2917 
2918 sbd_error_t *
2919 drmach_io_pre_release(drmachid_t id)
2920 {
2921 	if (!DRMACH_IS_IO_ID(id))
2922 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2923 	return (NULL);
2924 }
2925 
2926 static sbd_error_t *
2927 drmach_io_release(drmachid_t id)
2928 {
2929 	if (!DRMACH_IS_IO_ID(id))
2930 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2931 	return (NULL);
2932 }
2933 
2934 sbd_error_t *
2935 drmach_io_unrelease(drmachid_t id)
2936 {
2937 	if (!DRMACH_IS_IO_ID(id))
2938 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2939 	return (NULL);
2940 }
2941 
2942 /*ARGSUSED*/
2943 sbd_error_t *
2944 drmach_io_post_release(drmachid_t id)
2945 {
2946 	return (NULL);
2947 }
2948 
2949 /*ARGSUSED*/
2950 sbd_error_t *
2951 drmach_io_post_attach(drmachid_t id)
2952 {
2953 	return (NULL);
2954 }
2955 
2956 static sbd_error_t *
2957 drmach_io_status(drmachid_t id, drmach_status_t *stat)
2958 {
2959 	drmach_device_t *dp;
2960 	sbd_error_t	*err;
2961 	int		 configured;
2962 
2963 	ASSERT(DRMACH_IS_IO_ID(id));
2964 	dp = id;
2965 
2966 	err = drmach_io_is_attached(id, &configured);
2967 	if (err)
2968 		return (err);
2969 
2970 	stat->assigned = dp->bp->assigned;
2971 	stat->powered = dp->bp->powered;
2972 	stat->configured = (configured != 0);
2973 	stat->busy = dp->busy;
2974 	strncpy(stat->type, dp->type, sizeof (stat->type));
2975 	stat->info[0] = '\0';
2976 
2977 	return (NULL);
2978 }
2979 
2980 static sbd_error_t *
2981 drmach_mem_new(drmach_device_t *dp)
2982 {
2983 	static sbd_error_t *drmach_mem_release(drmachid_t);
2984 	static sbd_error_t *drmach_mem_status(drmachid_t, drmach_status_t *);
2985 
2986 	dp->unum = 0;
2987 	dp->cm.isa = (void *)drmach_mem_new;
2988 	dp->cm.release = drmach_mem_release;
2989 	dp->cm.status = drmach_mem_status;
2990 
2991 	snprintf(dp->cm.name, sizeof (dp->cm.name), "%s", dp->type);
2992 
2993 	return (NULL);
2994 }
2995 
2996 sbd_error_t *
2997 drmach_mem_add_span(drmachid_t id, uint64_t basepa, uint64_t size)
2998 {
2999 	pfn_t		basepfn = (pfn_t)(basepa >> PAGESHIFT);
3000 	pgcnt_t		npages = (pgcnt_t)(size >> PAGESHIFT);
3001 	pda_handle_t	ph;
3002 	int		rv;
3003 
3004 	ASSERT(size != 0);
3005 
3006 	if (!DRMACH_IS_MEM_ID(id))
3007 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3008 
3009 	kcage_range_lock();
3010 	rv = kcage_range_add(basepfn, npages, 1);
3011 	kcage_range_unlock();
3012 	if (rv == ENOMEM) {
3013 		cmn_err(CE_WARN, "%ld megabytes not available to kernel cage",
3014 			(size == 0 ? 0 : size / MBYTE));
3015 	} else if (rv != 0) {
3016 		/* catch this in debug kernels */
3017 		ASSERT(0);
3018 
3019 		cmn_err(CE_WARN, "unexpected kcage_range_add"
3020 			" return value %d", rv);
3021 	}
3022 
3023 	/*
3024 	 * Update the PDA (post2obp) structure with the
3025 	 * range of the newly added memory.
3026 	 */
3027 	ph = drmach_pda_open();
3028 	if (ph != NULL) {
3029 		pda_mem_add_span(ph, basepa, size);
3030 		pda_close(ph);
3031 	}
3032 
3033 	return (NULL);
3034 }
3035 
3036 sbd_error_t *
3037 drmach_mem_del_span(drmachid_t id, uint64_t basepa, uint64_t size)
3038 {
3039 	drmach_device_t	*mem = id;
3040 	pfn_t		basepfn = (pfn_t)(basepa >> PAGESHIFT);
3041 	pgcnt_t		npages = (pgcnt_t)(size >> PAGESHIFT);
3042 	uint_t		mcreg;
3043 	sbd_error_t	*err;
3044 	pda_handle_t	ph;
3045 	int		rv;
3046 
3047 	err = drmach_read_mc_asr(id, &mcreg);
3048 	if (err)
3049 		return (err);
3050 	else if (mcreg & STARFIRE_MC_INTERLEAVE_MASK) {
3051 		return (drerr_new(1, ESTF_INTERBOARD, "%s::%s",
3052 				mem->bp->cm.name, mem->cm.name));
3053 	}
3054 
3055 	if (size > 0) {
3056 		kcage_range_lock();
3057 		rv = kcage_range_delete_post_mem_del(basepfn, npages);
3058 		kcage_range_unlock();
3059 		if (rv != 0) {
3060 			cmn_err(CE_WARN,
3061 			    "unexpected kcage_range_delete_post_mem_del"
3062 			    " return value %d", rv);
3063 			return (DRMACH_INTERNAL_ERROR());
3064 		}
3065 	}
3066 
3067 	/*
3068 	 * Update the PDA (post2obp) structure with the
3069 	 * range of removed memory.
3070 	 */
3071 	ph = drmach_pda_open();
3072 	if (ph != NULL) {
3073 		if (size > 0)
3074 			pda_mem_del_span(ph, basepa, size);
3075 
3076 		/* update PDA to board's new mc register settings */
3077 		pda_mem_sync(ph, mem->bp->bnum, 0);
3078 
3079 		pda_close(ph);
3080 	}
3081 
3082 	return (NULL);
3083 }
3084 
3085 /* support routine for enable and disable */
3086 static sbd_error_t *
3087 drmach_mem_update_interconnect(drmachid_t id, uint_t mcreg)
3088 {
3089 	drmach_device_t	*dp;
3090 	pda_handle_t	 ph;
3091 	int		 b;
3092 
3093 	if (!DRMACH_IS_MEM_ID(id))
3094 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3095 	dp = id;
3096 
3097 	ph = drmach_pda_open();
3098 	if (ph == NULL)
3099 		return (DRMACH_INTERNAL_ERROR());
3100 
3101 	for (b = 0; b < MAX_BOARDS; b++) {
3102 		int		p;
3103 		int		rv;
3104 		ushort_t	bda_proc, bda_ioc;
3105 		board_desc_t	*bdesc;
3106 
3107 		if (pda_board_present(ph, b) == 0)
3108 			continue;
3109 
3110 		bdesc = (board_desc_t *)pda_get_board_info(ph, b);
3111 
3112 		/*
3113 		 * Update PCs for CPUs.
3114 		 */
3115 
3116 		/* make sure definition in platmod is in sync with pda */
3117 		ASSERT(MAX_PROCMODS == MAX_CPU_UNITS_PER_BOARD);
3118 
3119 		bda_proc = bdesc->bda_proc;
3120 		for (p = 0; p < MAX_PROCMODS; p++) {
3121 			if (BDA_NBL(bda_proc, p) != BDAN_GOOD)
3122 				continue;
3123 
3124 			rv = pc_madr_add(b, dp->bp->bnum, p, mcreg);
3125 			if (rv) {
3126 				pda_close(ph);
3127 				return (DRMACH_INTERNAL_ERROR());
3128 			}
3129 		}
3130 
3131 		/*
3132 		 * Update PCs for IOCs.
3133 		 */
3134 
3135 		/* make sure definition in platmod is in sync with pda */
3136 		ASSERT(MAX_IOCS == MAX_IO_UNITS_PER_BOARD);
3137 
3138 		bda_ioc = bdesc->bda_ioc;
3139 		for (p = 0; p < MAX_IOCS; p++) {
3140 			if (BDA_NBL(bda_ioc, p) != BDAN_GOOD)
3141 				continue;
3142 
3143 			rv = pc_madr_add(b, dp->bp->bnum, p + 4, mcreg);
3144 			if (rv) {
3145 				pda_close(ph);
3146 				return (DRMACH_INTERNAL_ERROR());
3147 			}
3148 		}
3149 	}
3150 
3151 	pda_close(ph);
3152 	return (NULL);
3153 }
3154 
3155 sbd_error_t *
3156 drmach_mem_disable(drmachid_t id)
3157 {
3158 	sbd_error_t	*err;
3159 	uint_t		 mcreg;
3160 
3161 	err = drmach_read_mc_asr(id, &mcreg);
3162 	if (err == NULL) {
3163 		ASSERT(mcreg & STARFIRE_MC_MEM_PRESENT_MASK);
3164 
3165 		/* Turn off presence bit. */
3166 		mcreg &= ~STARFIRE_MC_MEM_PRESENT_MASK;
3167 
3168 		err = drmach_mem_update_interconnect(id, mcreg);
3169 		if (err == NULL)
3170 			err = drmach_write_mc_asr(id, mcreg);
3171 	}
3172 
3173 	return (err);
3174 }
3175 
3176 sbd_error_t *
3177 drmach_mem_enable(drmachid_t id)
3178 {
3179 	sbd_error_t	*err;
3180 	uint_t		 mcreg;
3181 
3182 	err = drmach_read_mc_asr(id, &mcreg);
3183 	if (err == NULL) {
3184 		mcreg |= STARFIRE_MC_MEM_PRESENT_MASK;
3185 
3186 		err = drmach_write_mc_asr(id, mcreg);
3187 		if (err == NULL)
3188 			err = drmach_mem_update_interconnect(id, mcreg);
3189 	}
3190 
3191 	return (err);
3192 }
3193 
3194 sbd_error_t *
3195 drmach_mem_get_alignment(drmachid_t id, uint64_t *mask)
3196 {
3197 	drmach_device_t	*mem;
3198 	sbd_error_t	*err;
3199 	pnode_t		 nodeid;
3200 
3201 	if (!DRMACH_IS_MEM_ID(id))
3202 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3203 	mem = id;
3204 
3205 	nodeid = drmach_node_get_dnode(mem->node);
3206 	if (nodeid == OBP_NONODE || nodeid == OBP_BADNODE)
3207 		err = DRMACH_INTERNAL_ERROR();
3208 	else {
3209 		uint64_t size;
3210 
3211 		size = mc_get_alignment_mask(nodeid);
3212 		if (size == (uint64_t)-1)
3213 			err = DRMACH_INTERNAL_ERROR();
3214 		else {
3215 			*mask = size - 1;
3216 			err = NULL;
3217 		}
3218 	}
3219 
3220 	return (err);
3221 }
3222 
3223 sbd_error_t *
3224 drmach_mem_get_base_physaddr(drmachid_t id, uint64_t *pa)
3225 {
3226 	sbd_error_t	*err;
3227 	uint_t		 mcreg;
3228 
3229 	err = drmach_read_mc_asr(id, &mcreg);
3230 	if (err == NULL)
3231 		*pa = mc_asr_to_pa(mcreg);
3232 
3233 	return (err);
3234 }
3235 
3236 /*
3237  * Use of this routine after copy/rename will yield incorrect results,
3238  * because the OBP MEMAVAIL property will not correctly reflect the
3239  * programming of the MCs.
3240  */
3241 sbd_error_t *
3242 drmach_mem_get_memlist(drmachid_t id, struct memlist **ml)
3243 {
3244 	drmach_device_t	*mem;
3245 	int		rv, i, rlen, rblks;
3246 	sbd_error_t	*err;
3247 	struct memlist	*mlist;
3248 	struct sf_memunit_regspec *rlist;
3249 
3250 	if (!DRMACH_IS_MEM_ID(id))
3251 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3252 	mem = id;
3253 
3254 	err = drmach_device_get_proplen(mem, "dr-available", &rlen);
3255 	if (err)
3256 		return (err);
3257 
3258 	rlist = kmem_zalloc(rlen, KM_SLEEP);
3259 
3260 	err = drmach_device_get_prop(mem, "dr-available", rlist);
3261 	if (err) {
3262 		kmem_free(rlist, rlen);
3263 		return (err);
3264 	}
3265 
3266 	mlist = NULL;
3267 	rblks = rlen / sizeof (struct sf_memunit_regspec);
3268 	for (i = 0; i < rblks; i++) {
3269 		uint64_t	addr, size;
3270 
3271 		addr  = (uint64_t)rlist[i].regspec_addr_hi << 32;
3272 		addr |= (uint64_t)rlist[i].regspec_addr_lo;
3273 		size  = (uint64_t)rlist[i].regspec_size_hi << 32;
3274 		size |= (uint64_t)rlist[i].regspec_size_lo;
3275 
3276 		mlist = memlist_add_span(mlist, addr, size);
3277 	}
3278 
3279 	kmem_free(rlist, rlen);
3280 
3281 	/*
3282 	 * Make sure the incoming memlist doesn't already
3283 	 * intersect with what's present in the system (phys_install).
3284 	 */
3285 	memlist_read_lock();
3286 	rv = memlist_intersect(phys_install, mlist);
3287 	memlist_read_unlock();
3288 	if (rv) {
3289 #ifdef DEBUG
3290 		DRMACH_PR("OBP derived memlist intersects"
3291 			" with phys_install\n");
3292 		memlist_dump(mlist);
3293 
3294 		DRMACH_PR("phys_install memlist:\n");
3295 		memlist_dump(phys_install);
3296 #endif
3297 
3298 		memlist_delete(mlist);
3299 		return (DRMACH_INTERNAL_ERROR());
3300 	}
3301 
3302 #ifdef DEBUG
3303 	DRMACH_PR("OBP derived memlist:");
3304 	memlist_dump(mlist);
3305 #endif
3306 
3307 	*ml = mlist;
3308 	return (NULL);
3309 }
3310 
3311 sbd_error_t *
3312 drmach_mem_get_size(drmachid_t id, uint64_t *bytes)
3313 {
3314 	drmach_device_t	*mem;
3315 	pda_handle_t	ph;
3316 	pgcnt_t		npages;
3317 
3318 	if (!DRMACH_IS_MEM_ID(id))
3319 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3320 	mem = id;
3321 
3322 	ph = drmach_pda_open();
3323 	if (ph == NULL)
3324 		return (DRMACH_INTERNAL_ERROR());
3325 
3326 	npages = pda_get_mem_size(ph, mem->bp->bnum);
3327 	*bytes = (uint64_t)npages << PAGESHIFT;
3328 
3329 	pda_close(ph);
3330 	return (NULL);
3331 }
3332 
3333 sbd_error_t *
3334 drmach_mem_get_slice_size(drmachid_t id, uint64_t *bytes)
3335 {
3336 	if (!DRMACH_IS_MEM_ID(id))
3337 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3338 
3339 	*bytes = mc_get_mem_alignment();
3340 	return (NULL);
3341 }
3342 
3343 /* field debugging tool */
3344 processorid_t drmach_mem_cpu_affinity_nail = 0;
3345 
3346 processorid_t
3347 drmach_mem_cpu_affinity(drmachid_t id)
3348 {
3349 	drmach_device_t	*mp;
3350 	drmach_board_t	*bp;
3351 	processorid_t	 cpuid;
3352 
3353 	if (!DRMACH_IS_MEM_ID(id))
3354 		return (CPU_CURRENT);
3355 
3356 	if (drmach_mem_cpu_affinity_nail) {
3357 		cpuid = drmach_mem_cpu_affinity_nail;
3358 
3359 		if (cpuid < 0 || cpuid > NCPU)
3360 			return (CPU_CURRENT);
3361 
3362 		mutex_enter(&cpu_lock);
3363 		if (cpu[cpuid] == NULL || !CPU_ACTIVE(cpu[cpuid]))
3364 			cpuid = CPU_CURRENT;
3365 		mutex_exit(&cpu_lock);
3366 
3367 		return (cpuid);
3368 	}
3369 
3370 	/* try to choose a proc on the target board */
3371 	mp = id;
3372 	bp = mp->bp;
3373 	if (bp->devices) {
3374 		int		rv;
3375 		int		d_idx;
3376 		drmachid_t	d_id;
3377 
3378 		rv = drmach_array_first(bp->devices, &d_idx, &d_id);
3379 		while (rv == 0) {
3380 			if (DRMACH_IS_CPU_ID(d_id)) {
3381 				cpuid = drmach_cpu_calc_id(d_id);
3382 
3383 				mutex_enter(&cpu_lock);
3384 				if (cpu[cpuid] && CPU_ACTIVE(cpu[cpuid])) {
3385 					mutex_exit(&cpu_lock);
3386 					DRMACH_PR("drmach_mem_cpu_affinity: "
3387 					    "selected cpuid=%d\n", cpuid);
3388 					return (cpuid);
3389 				} else {
3390 					mutex_exit(&cpu_lock);
3391 				}
3392 			}
3393 
3394 			rv = drmach_array_next(bp->devices, &d_idx, &d_id);
3395 		}
3396 	}
3397 
3398 	/* otherwise, this proc, wherever it is */
3399 	DRMACH_PR("drmach_mem_cpu_affinity: using default CPU_CURRENT\n");
3400 
3401 	return (CPU_CURRENT);
3402 }
3403 
3404 static sbd_error_t *
3405 drmach_mem_release(drmachid_t id)
3406 {
3407 	if (!DRMACH_IS_MEM_ID(id))
3408 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3409 	return (NULL);
3410 }
3411 
3412 static sbd_error_t *
3413 drmach_mem_status(drmachid_t id, drmach_status_t *stat)
3414 {
3415 	drmach_device_t *dp;
3416 	sbd_error_t	*err;
3417 	uint64_t	 pa, slice_size;
3418 	struct memlist	*ml;
3419 
3420 	ASSERT(DRMACH_IS_MEM_ID(id));
3421 	dp = id;
3422 
3423 	/* get starting physical address of target memory */
3424 	err = drmach_mem_get_base_physaddr(id, &pa);
3425 	if (err)
3426 		return (err);
3427 
3428 	/* round down to slice boundary */
3429 	slice_size = mc_get_mem_alignment();
3430 	pa &= ~ (slice_size - 1);
3431 
3432 	/* stop at first span that is in slice */
3433 	memlist_read_lock();
3434 	for (ml = phys_install; ml; ml = ml->next)
3435 		if (ml->address >= pa && ml->address < pa + slice_size)
3436 			break;
3437 	memlist_read_unlock();
3438 
3439 	stat->assigned = dp->bp->assigned;
3440 	stat->powered = dp->bp->powered;
3441 	stat->configured = (ml != NULL);
3442 	stat->busy = dp->busy;
3443 	strncpy(stat->type, dp->type, sizeof (stat->type));
3444 	stat->info[0] = '\0';
3445 
3446 	return (NULL);
3447 }
3448 
3449 static int
3450 drmach_detach_board(void *arg)
3451 {
3452 	cpuset_t	cset;
3453 	int		retval;
3454 	drmach_board_t	*bp = (drmach_board_t *)arg;
3455 
3456 	cset = cpu_ready_set;
3457 	promsafe_xc_attention(cset);
3458 
3459 	retval = prom_starfire_rm_brd(bp->bnum);
3460 
3461 	xc_dismissed(cset);
3462 
3463 	return (retval);
3464 }
3465 
3466 sbd_error_t *
3467 drmach_board_deprobe(drmachid_t id)
3468 {
3469 	drmach_board_t	*bp;
3470 	int		 retval;
3471 
3472 	if (!DRMACH_IS_BOARD_ID(id))
3473 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3474 	bp = id;
3475 
3476 	cmn_err(CE_CONT, "DR: PROM detach board %d\n", bp->bnum);
3477 
3478 	retval = prom_tree_update(drmach_detach_board, bp);
3479 
3480 	if (retval == 0)
3481 		return (NULL);
3482 	else {
3483 		cmn_err(CE_WARN, "prom error: prom_starfire_rm_brd(%d) "
3484 			"returned %d", bp->bnum, retval);
3485 		return (drerr_new(1, ESTF_DEPROBE, "%s", bp->cm.name));
3486 	}
3487 }
3488 
3489 /*ARGSUSED*/
3490 static sbd_error_t *
3491 drmach_pt_juggle_bootproc(drmachid_t id, drmach_opts_t *opts)
3492 {
3493 	drmach_device_t	*cpu;
3494 	sbd_error_t	*err;
3495 
3496 	if (!DRMACH_IS_CPU_ID(id))
3497 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3498 	cpu = id;
3499 
3500 	mutex_enter(&cpu_lock);
3501 
3502 	err = drmach_cpu_juggle_bootproc(cpu);
3503 
3504 	mutex_exit(&cpu_lock);
3505 
3506 	return (err);
3507 }
3508 
3509 /*ARGSUSED*/
3510 static sbd_error_t *
3511 drmach_pt_dump_pdainfo(drmachid_t id, drmach_opts_t *opts)
3512 {
3513 	drmach_board_t	*bp;
3514 	int		board;
3515 	int		i;
3516 	pda_handle_t	ph;
3517 	board_desc_t	*bdesc;
3518 
3519 	if (!DRMACH_IS_BOARD_ID(id))
3520 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3521 	bp = id;
3522 	board = bp->bnum;
3523 
3524 	ph = drmach_pda_open();
3525 	if (ph == NULL)
3526 		return (DRMACH_INTERNAL_ERROR());
3527 
3528 	if (pda_board_present(ph, board) == 0) {
3529 		cmn_err(CE_CONT, "board %d is MISSING\n", board);
3530 		pda_close(ph);
3531 		return (DRMACH_INTERNAL_ERROR());
3532 	}
3533 
3534 	cmn_err(CE_CONT, "board %d is PRESENT\n", board);
3535 
3536 	bdesc = (board_desc_t *)pda_get_board_info(ph, board);
3537 	if (bdesc == NULL) {
3538 		cmn_err(CE_CONT,
3539 			"no board descriptor found for board %d\n",
3540 			board);
3541 		pda_close(ph);
3542 		return (DRMACH_INTERNAL_ERROR());
3543 	}
3544 
3545 	/* make sure definition in platmod is in sync with pda */
3546 	ASSERT(MAX_PROCMODS == MAX_CPU_UNITS_PER_BOARD);
3547 
3548 	for (i = 0; i < MAX_PROCMODS; i++) {
3549 		if (BDA_NBL(bdesc->bda_proc, i) == BDAN_GOOD)
3550 			cmn_err(CE_CONT,
3551 				"proc %d.%d PRESENT\n", board, i);
3552 		else
3553 			cmn_err(CE_CONT,
3554 				"proc %d.%d MISSING\n", board, i);
3555 	}
3556 
3557 	for (i = 0; i < MAX_MGROUPS; i++) {
3558 		if (BDA_NBL(bdesc->bda_mgroup, i) == BDAN_GOOD)
3559 			cmn_err(CE_CONT,
3560 				"mgroup %d.%d PRESENT\n", board, i);
3561 		else
3562 			cmn_err(CE_CONT,
3563 				"mgroup %d.%d MISSING\n", board, i);
3564 	}
3565 
3566 	/* make sure definition in platmod is in sync with pda */
3567 	ASSERT(MAX_IOCS == MAX_IO_UNITS_PER_BOARD);
3568 
3569 	for (i = 0; i < MAX_IOCS; i++) {
3570 		int	s;
3571 
3572 		if (BDA_NBL(bdesc->bda_ioc, i) == BDAN_GOOD) {
3573 			cmn_err(CE_CONT,
3574 				"ioc %d.%d PRESENT\n", board, i);
3575 			for (s = 0; s < MAX_SLOTS_PER_IOC; s++) {
3576 				if (BDA_NBL(bdesc->bda_ios[i], s) != BDAN_GOOD)
3577 					continue;
3578 				cmn_err(CE_CONT,
3579 					"..scard %d.%d.%d PRESENT\n",
3580 					board, i, s);
3581 			}
3582 		} else {
3583 			cmn_err(CE_CONT,
3584 				"ioc %d.%d MISSING\n",
3585 				board, i);
3586 		}
3587 	}
3588 
3589 	cmn_err(CE_CONT,
3590 		"board %d memsize = %d pages\n",
3591 		board, pda_get_mem_size(ph, board));
3592 
3593 	pda_close(ph);
3594 
3595 	return (NULL);
3596 }
3597 
3598 /*ARGSUSED*/
3599 sbd_error_t *
3600 drmach_pt_readmem(drmachid_t id, drmach_opts_t *opts)
3601 {
3602 	struct memlist	*ml;
3603 	uint64_t	src_pa;
3604 	uint64_t	dst_pa;
3605 	uint64_t	dst;
3606 
3607 	dst_pa = va_to_pa(&dst);
3608 
3609 	memlist_read_lock();
3610 	for (ml = phys_install; ml; ml = ml->next) {
3611 		uint64_t	nbytes;
3612 
3613 		src_pa = ml->address;
3614 		nbytes = ml->size;
3615 
3616 		while (nbytes != 0ull) {
3617 
3618 			/* copy 32 bytes at arc_pa to dst_pa */
3619 			bcopy32_il(src_pa, dst_pa);
3620 
3621 			/* increment by 32 bytes */
3622 			src_pa += (4 * sizeof (uint64_t));
3623 
3624 			/* decrement by 32 bytes */
3625 			nbytes -= (4 * sizeof (uint64_t));
3626 		}
3627 	}
3628 	memlist_read_unlock();
3629 
3630 	return (NULL);
3631 }
3632 
3633 static struct {
3634 	const char	*name;
3635 	sbd_error_t	*(*handler)(drmachid_t id, drmach_opts_t *opts);
3636 } drmach_pt_arr[] = {
3637 	{ "juggle",		drmach_pt_juggle_bootproc	},
3638 	{ "pda",		drmach_pt_dump_pdainfo		},
3639 	{ "readmem",		drmach_pt_readmem		},
3640 
3641 	/* the following line must always be last */
3642 	{ NULL,			NULL				}
3643 };
3644 
3645 /*ARGSUSED*/
3646 sbd_error_t *
3647 drmach_passthru(drmachid_t id, drmach_opts_t *opts)
3648 {
3649 	int		i;
3650 	sbd_error_t	*err;
3651 
3652 	i = 0;
3653 	while (drmach_pt_arr[i].name != NULL) {
3654 		int len = strlen(drmach_pt_arr[i].name);
3655 
3656 		if (strncmp(drmach_pt_arr[i].name, opts->copts, len) == 0)
3657 			break;
3658 
3659 		i += 1;
3660 	}
3661 
3662 	if (drmach_pt_arr[i].name == NULL)
3663 		err = drerr_new(0, ESTF_UNKPTCMD, opts->copts);
3664 	else
3665 		err = (*drmach_pt_arr[i].handler)(id, opts);
3666 
3667 	return (err);
3668 }
3669 
3670 sbd_error_t *
3671 drmach_release(drmachid_t id)
3672 {
3673 	drmach_common_t *cp;
3674 	if (!DRMACH_IS_DEVICE_ID(id))
3675 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3676 	cp = id;
3677 
3678 	return (cp->release(id));
3679 }
3680 
3681 sbd_error_t *
3682 drmach_status(drmachid_t id, drmach_status_t *stat)
3683 {
3684 	drmach_common_t *cp;
3685 
3686 	if (!DRMACH_IS_ID(id))
3687 		return (drerr_new(0, ESTF_NOTID, NULL));
3688 	cp = id;
3689 
3690 	return (cp->status(id, stat));
3691 }
3692 
3693 sbd_error_t *
3694 drmach_unconfigure(drmachid_t id, int flags)
3695 {
3696 	drmach_device_t	*dp;
3697 	pnode_t		 nodeid;
3698 	dev_info_t	*dip, *fdip = NULL;
3699 	uint_t 		ddi_flags;
3700 
3701 	if (!DRMACH_IS_DEVICE_ID(id))
3702 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3703 
3704 	dp = id;
3705 
3706 	nodeid = drmach_node_get_dnode(dp->node);
3707 	if (nodeid == OBP_NONODE)
3708 		return (DRMACH_INTERNAL_ERROR());
3709 
3710 	dip = e_ddi_nodeid_to_dip(nodeid);
3711 	if (dip == NULL)
3712 		return (NULL);
3713 
3714 	/*
3715 	 * Branch already held, so hold acquired in
3716 	 * e_ddi_nodeid_to_dip() can be released
3717 	 */
3718 	ddi_release_devi(dip);
3719 
3720 	ddi_flags = 0;
3721 
3722 	if (flags & DRMACH_DEVI_REMOVE)
3723 		ddi_flags |= DEVI_BRANCH_DESTROY | DEVI_BRANCH_EVENT;
3724 
3725 	/*
3726 	 * Force flag is no longer necessary. See starcat/io/drmach.c
3727 	 * for details.
3728 	 */
3729 	ASSERT(e_ddi_branch_held(dip));
3730 	if (e_ddi_branch_unconfigure(dip, &fdip, ddi_flags)) {
3731 		sbd_error_t	*err;
3732 		char		*path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3733 
3734 		/*
3735 		 * If non-NULL, fdip is returned held and must be released.
3736 		 */
3737 		if (fdip != NULL) {
3738 			(void) ddi_pathname(fdip, path);
3739 			ndi_rele_devi(fdip);
3740 		} else {
3741 			(void) ddi_pathname(dip, path);
3742 		}
3743 
3744 		err = drerr_new(1, ESTF_DRVFAIL, path);
3745 
3746 		kmem_free(path, MAXPATHLEN);
3747 
3748 		return (err);
3749 	}
3750 
3751 	return (NULL);
3752 }
3753 
3754 /*
3755  * drmach interfaces to legacy Starfire platmod logic
3756  * linkage via runtime symbol look up, called from plat_cpu_power*
3757  */
3758 
3759 /*
3760  * Start up a cpu.  It is possible that we're attempting to restart
3761  * the cpu after an UNCONFIGURE in which case the cpu will be
3762  * spinning in its cache.  So, all we have to do is wakeup him up.
3763  * Under normal circumstances the cpu will be coming from a previous
3764  * CONNECT and thus will be spinning in OBP.  In both cases, the
3765  * startup sequence is the same.
3766  */
3767 int
3768 drmach_cpu_poweron(struct cpu *cp)
3769 {
3770 	DRMACH_PR("drmach_cpu_poweron: starting cpuid %d\n", cp->cpu_id);
3771 
3772 	ASSERT(MUTEX_HELD(&cpu_lock));
3773 
3774 	if (drmach_cpu_start(cp) != 0)
3775 		return (EBUSY);
3776 	else
3777 		return (0);
3778 }
3779 
3780 int
3781 drmach_cpu_poweroff(struct cpu *cp)
3782 {
3783 	int		ntries, cnt;
3784 	processorid_t	cpuid = cp->cpu_id;
3785 	void		drmach_cpu_shutdown_self(void);
3786 
3787 	DRMACH_PR("drmach_cpu_poweroff: stopping cpuid %d\n", cp->cpu_id);
3788 
3789 	ASSERT(MUTEX_HELD(&cpu_lock));
3790 
3791 	/*
3792 	 * Capture all CPUs (except for detaching proc) to prevent
3793 	 * crosscalls to the detaching proc until it has cleared its
3794 	 * bit in cpu_ready_set.
3795 	 *
3796 	 * The CPU's remain paused and the prom_mutex is known to be free.
3797 	 * This prevents the x-trap victim from blocking when doing prom
3798 	 * IEEE-1275 calls at a high PIL level.
3799 	 */
3800 	promsafe_pause_cpus();
3801 
3802 	/*
3803 	 * Quiesce interrupts on the target CPU. We do this by setting
3804 	 * the CPU 'not ready'- (i.e. removing the CPU from cpu_ready_set) to
3805 	 * prevent it from receiving cross calls and cross traps.
3806 	 * This prevents the processor from receiving any new soft interrupts.
3807 	 */
3808 	mp_cpu_quiesce(cp);
3809 
3810 	/* setup xt_mb, will be cleared by drmach_shutdown_asm when ready */
3811 	drmach_xt_mb[cpuid] = 0x80;
3812 
3813 	xt_one_unchecked(cpuid, (xcfunc_t *)idle_stop_xcall,
3814 		(uint64_t)drmach_cpu_shutdown_self, NULL);
3815 
3816 	ntries = drmach_cpu_ntries;
3817 	cnt = 0;
3818 	while (drmach_xt_mb[cpuid] && ntries) {
3819 		DELAY(drmach_cpu_delay);
3820 		ntries--;
3821 		cnt++;
3822 	}
3823 
3824 	drmach_xt_mb[cpuid] = 0;	/* steal the cache line back */
3825 
3826 	start_cpus();
3827 
3828 	DRMACH_PR("waited %d out of %d tries for "
3829 		"drmach_cpu_shutdown_self on cpu%d",
3830 		drmach_cpu_ntries - ntries, drmach_cpu_ntries, cp->cpu_id);
3831 
3832 	drmach_cpu_obp_detach(cpuid);
3833 
3834 	CPU_SIGNATURE(OS_SIG, SIGST_DETACHED, SIGSUBST_NULL, cpuid);
3835 
3836 	return (0);
3837 }
3838 
3839 /*ARGSUSED*/
3840 int
3841 drmach_verify_sr(dev_info_t *dip, int sflag)
3842 {
3843 	return (0);
3844 }
3845 
3846 void
3847 drmach_suspend_last(void)
3848 {
3849 }
3850 
3851 void
3852 drmach_resume_first(void)
3853 {
3854 }
3855 
3856 /*
3857  * Log a DR sysevent.
3858  * Return value: 0 success, non-zero failure.
3859  */
3860 int
3861 drmach_log_sysevent(int board, char *hint, int flag, int verbose)
3862 {
3863 	sysevent_t			*ev;
3864 	sysevent_id_t			eid;
3865 	int				rv, km_flag;
3866 	sysevent_value_t		evnt_val;
3867 	sysevent_attr_list_t		*evnt_attr_list = NULL;
3868 	char				attach_pnt[MAXNAMELEN];
3869 
3870 	km_flag = (flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
3871 	attach_pnt[0] = '\0';
3872 	if (drmach_board_name(board, attach_pnt, MAXNAMELEN)) {
3873 		rv = -1;
3874 		goto logexit;
3875 	}
3876 	if (verbose)
3877 		DRMACH_PR("drmach_log_sysevent: %s %s, flag: %d, verbose: %d\n",
3878 			    attach_pnt, hint, flag, verbose);
3879 
3880 	if ((ev = sysevent_alloc(EC_DR, ESC_DR_AP_STATE_CHANGE,
3881 				    SUNW_KERN_PUB"dr", km_flag)) == NULL) {
3882 		rv = -2;
3883 		goto logexit;
3884 	}
3885 	evnt_val.value_type = SE_DATA_TYPE_STRING;
3886 	evnt_val.value.sv_string = attach_pnt;
3887 	if ((rv = sysevent_add_attr(&evnt_attr_list, DR_AP_ID,
3888 				    &evnt_val, km_flag)) != 0)
3889 		goto logexit;
3890 
3891 	evnt_val.value_type = SE_DATA_TYPE_STRING;
3892 	evnt_val.value.sv_string = hint;
3893 	if ((rv = sysevent_add_attr(&evnt_attr_list, DR_HINT,
3894 				    &evnt_val, km_flag)) != 0) {
3895 		sysevent_free_attr(evnt_attr_list);
3896 		goto logexit;
3897 	}
3898 
3899 	(void) sysevent_attach_attributes(ev, evnt_attr_list);
3900 
3901 	/*
3902 	 * Log the event but do not sleep waiting for its
3903 	 * delivery. This provides insulation from syseventd.
3904 	 */
3905 	rv = log_sysevent(ev, SE_NOSLEEP, &eid);
3906 
3907 logexit:
3908 	if (ev)
3909 		sysevent_free(ev);
3910 	if ((rv != 0) && verbose)
3911 		cmn_err(CE_WARN,
3912 			    "drmach_log_sysevent failed (rv %d) for %s  %s\n",
3913 			    rv, attach_pnt, hint);
3914 
3915 	return (rv);
3916 }
3917 
3918 /*ARGSUSED*/
3919 int
3920 drmach_allow_memrange_modify(drmachid_t id)
3921 {
3922 	return (1);	/* TRUE */
3923 }
3924