xref: /titanic_51/usr/src/uts/sun4u/starfire/io/drmach.c (revision 1a887b2e15e4d9b63b5add57f3334b5b31960018)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/debug.h>
30 #include <sys/types.h>
31 #include <sys/varargs.h>
32 #include <sys/errno.h>
33 #include <sys/cred.h>
34 #include <sys/dditypes.h>
35 #include <sys/devops.h>
36 #include <sys/modctl.h>
37 #include <sys/poll.h>
38 #include <sys/conf.h>
39 #include <sys/ddi.h>
40 #include <sys/sunddi.h>
41 #include <sys/sunndi.h>
42 #include <sys/ndi_impldefs.h>
43 #include <sys/stat.h>
44 #include <sys/kmem.h>
45 #include <sys/vmem.h>
46 #include <sys/processor.h>
47 #include <sys/spitregs.h>
48 #include <sys/cpuvar.h>
49 #include <sys/cpupart.h>
50 #include <sys/mem_config.h>
51 #include <sys/ddi_impldefs.h>
52 #include <sys/systm.h>
53 #include <sys/machsystm.h>
54 #include <sys/autoconf.h>
55 #include <sys/cmn_err.h>
56 #include <sys/sysmacros.h>
57 #include <sys/x_call.h>
58 #include <sys/promif.h>
59 #include <sys/prom_plat.h>
60 #include <sys/membar.h>
61 #include <vm/seg_kmem.h>
62 #include <sys/mem_cage.h>
63 #include <sys/stack.h>
64 #include <sys/archsystm.h>
65 #include <vm/hat_sfmmu.h>
66 #include <sys/pte.h>
67 #include <sys/mmu.h>
68 #include <sys/cpu_module.h>
69 #include <sys/obpdefs.h>
70 #include <sys/note.h>
71 
72 #include <sys/starfire.h>	/* plat_max_... decls */
73 #include <sys/cvc.h>
74 #include <sys/cpu_sgnblk_defs.h>
75 #include <sys/drmach.h>
76 #include <sys/dr_util.h>
77 #include <sys/pda.h>
78 
79 #include <sys/sysevent.h>
80 #include <sys/sysevent/dr.h>
81 #include <sys/sysevent/eventdefs.h>
82 
83 
84 extern void		bcopy32_il(uint64_t, uint64_t);
85 extern void		flush_ecache_il(
86 				uint64_t physaddr, int size, int linesz);
87 extern uint_t		ldphysio_il(uint64_t physaddr);
88 extern void		stphysio_il(uint64_t physaddr, uint_t value);
89 
90 extern uint64_t		mc_get_mem_alignment(void);
91 extern uint64_t		mc_get_asr_addr(dnode_t);
92 extern uint64_t		mc_get_idle_addr(dnode_t);
93 extern uint64_t		mc_get_alignment_mask(dnode_t);
94 extern int		mc_read_asr(dnode_t, uint_t *);
95 extern int		mc_write_asr(dnode_t, uint_t);
96 extern uint64_t		mc_asr_to_pa(uint_t);
97 extern uint_t		mc_pa_to_asr(uint_t, uint64_t);
98 
99 extern int		pc_madr_add(int, int, int, int);
100 
101 typedef struct {
102 	struct drmach_node	*node;
103 	void			*data;
104 } drmach_node_walk_args_t;
105 
106 typedef struct drmach_node {
107 	void		*here;
108 
109 	dnode_t		 (*get_dnode)(struct drmach_node *node);
110 	int		 (*walk)(struct drmach_node *node, void *data,
111 				int (*cb)(drmach_node_walk_args_t *args));
112 } drmach_node_t;
113 
114 typedef struct {
115 	int		 min_index;
116 	int		 max_index;
117 	int		 arr_sz;
118 	drmachid_t	*arr;
119 } drmach_array_t;
120 
121 typedef struct {
122 	void		*isa;
123 
124 	sbd_error_t	*(*release)(drmachid_t);
125 	sbd_error_t	*(*status)(drmachid_t, drmach_status_t *);
126 
127 	char		 name[MAXNAMELEN];
128 } drmach_common_t;
129 
130 typedef struct {
131 	drmach_common_t	 cm;
132 	int		 bnum;
133 	int		 assigned;
134 	int		 powered;
135 	int		 connect_cpuid;
136 	int		 cond;
137 	drmach_node_t	*tree;
138 	drmach_array_t	*devices;
139 } drmach_board_t;
140 
141 typedef struct {
142 	drmach_common_t	 cm;
143 	drmach_board_t	*bp;
144 	int		 unum;
145 	int		 busy;
146 	int		 powered;
147 	const char	*type;
148 	drmach_node_t	*node;
149 } drmach_device_t;
150 
151 typedef struct {
152 	int		 flags;
153 	drmach_device_t	*dp;
154 	sbd_error_t	*err;
155 	dev_info_t	*dip;
156 } drmach_config_args_t;
157 
158 typedef struct {
159 	uint64_t	 idle_addr;
160 	drmach_device_t	*mem;
161 } drmach_mc_idle_script_t;
162 
163 typedef struct {
164 	uint64_t	masr_addr;
165 	uint_t		masr;
166 	uint_t		_filler;
167 } drmach_rename_script_t;
168 
169 typedef struct {
170 	void		(*run)(void *arg);
171 	caddr_t		data;
172 	pda_handle_t	*ph;
173 	struct memlist	*c_ml;
174 	uint64_t	s_copybasepa;
175 	uint64_t	t_copybasepa;
176 	drmach_device_t	*restless_mc;	/* diagnostic output */
177 } drmach_copy_rename_program_t;
178 
179 typedef enum {
180 	DO_IDLE,
181 	DO_UNIDLE,
182 	DO_PAUSE,
183 	DO_UNPAUSE
184 } drmach_iopc_op_t;
185 
186 typedef struct {
187 	drmach_board_t	*obj;
188 	int		 ndevs;
189 	void		*a;
190 	sbd_error_t	*(*found)(void *a, const char *, int, drmachid_t);
191 	sbd_error_t	*err;
192 } drmach_board_cb_data_t;
193 
194 static caddr_t		 drmach_shutdown_va;
195 
196 static int		 drmach_initialized;
197 static drmach_array_t	*drmach_boards;
198 
199 static int		 drmach_cpu_delay = 100;
200 static int		 drmach_cpu_ntries = 50000;
201 
202 volatile uchar_t	*drmach_xt_mb;
203 
204 /*
205  * Do not change the drmach_shutdown_mbox structure without
206  * considering the drmach_shutdown_asm assembly language code.
207  */
208 struct drmach_shutdown_mbox {
209 	uint64_t	estack;
210 	uint64_t	flushaddr;
211 	int		size;
212 	int		linesize;
213 	uint64_t	physaddr;
214 };
215 struct drmach_shutdown_mbox	*drmach_shutdown_asm_mbox;
216 static sbd_error_t	*drmach_device_new(drmach_node_t *,
217 				drmach_board_t *, drmach_device_t **);
218 static sbd_error_t	*drmach_cpu_new(drmach_device_t *);
219 static sbd_error_t	*drmach_mem_new(drmach_device_t *);
220 static sbd_error_t	*drmach_io_new(drmach_device_t *);
221 
222 extern struct cpu	*SIGBCPU;
223 
224 #ifdef DEBUG
225 
226 #define	DRMACH_PR		if (drmach_debug) printf
227 int drmach_debug = 0;		 /* set to non-zero to enable debug messages */
228 #else
229 
230 #define	DRMACH_PR		_NOTE(CONSTANTCONDITION) if (0) printf
231 #endif /* DEBUG */
232 
233 #define	DRMACH_OBJ(id)		((drmach_common_t *)id)
234 
235 #define	DRMACH_IS_BOARD_ID(id)	\
236 	((id != 0) &&		\
237 	(DRMACH_OBJ(id)->isa == (void *)drmach_board_new))
238 
239 #define	DRMACH_IS_CPU_ID(id)	\
240 	((id != 0) &&		\
241 	(DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new))
242 
243 #define	DRMACH_IS_MEM_ID(id)	\
244 	((id != 0) &&		\
245 	(DRMACH_OBJ(id)->isa == (void *)drmach_mem_new))
246 
247 #define	DRMACH_IS_IO_ID(id)	\
248 	((id != 0) &&		\
249 	(DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
250 
251 #define	DRMACH_IS_DEVICE_ID(id)					\
252 	((id != 0) &&						\
253 	(DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new ||	\
254 	    DRMACH_OBJ(id)->isa == (void *)drmach_mem_new ||	\
255 	    DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
256 
257 #define	DRMACH_IS_ID(id)					\
258 	((id != 0) &&						\
259 	(DRMACH_OBJ(id)->isa == (void *)drmach_board_new ||	\
260 	    DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new ||	\
261 	    DRMACH_OBJ(id)->isa == (void *)drmach_mem_new ||	\
262 	    DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
263 
264 #define	DRMACH_CPUID2BNUM(cpuid) \
265 	((cpuid) / MAX_CPU_UNITS_PER_BOARD)
266 
267 #define	DRMACH_INTERNAL_ERROR() \
268 	drerr_new(1, ESTF_INTERNAL, drmach_ie_fmt, __LINE__)
269 static char		*drmach_ie_fmt = "drmach.c %d";
270 
271 static struct {
272 	const char	 *name;
273 	const char	 *type;
274 	sbd_error_t	 *(*new)(drmach_device_t *);
275 } name2type[] = {
276 	{ "SUNW,UltraSPARC",	DRMACH_DEVTYPE_CPU,  drmach_cpu_new },
277 	{ "mem-unit",		DRMACH_DEVTYPE_MEM,  drmach_mem_new },
278 	{ "pci",		DRMACH_DEVTYPE_PCI,  drmach_io_new  },
279 	{ "sbus",		DRMACH_DEVTYPE_SBUS, drmach_io_new  },
280 };
281 
282 /* node types to cleanup when a board is unconfigured */
283 #define	MISC_COUNTER_TIMER_DEVNAME	"counter-timer"
284 #define	MISC_PERF_COUNTER_DEVNAME	"perf-counter"
285 
286 /* utility */
287 #define	MBYTE	(1048576ull)
288 
289 /*
290  * This is necessary because the CPU support needs
291  * to call cvc_assign_iocpu.
292  */
293 #ifndef lint
294 static char _depends_on[] = "drv/cvc";
295 #endif  /* lint */
296 
297 /*
298  * drmach autoconfiguration data structures and interfaces
299  */
300 
301 extern struct mod_ops mod_miscops;
302 
303 static struct modlmisc modlmisc = {
304 	&mod_miscops,
305 	"Sun Enterprise 10000 DR %I%"
306 };
307 
308 static struct modlinkage modlinkage = {
309 	MODREV_1,
310 	(void *)&modlmisc,
311 	NULL
312 };
313 
314 static kmutex_t drmach_i_lock;
315 
316 int
317 _init(void)
318 {
319 	int err;
320 
321 	/* check that we have the correct version of obp */
322 	if (prom_test("SUNW,UE10000,add-brd") != 0) {
323 
324 		cmn_err(CE_WARN, "!OBP/SSP upgrade is required to enable "
325 		    "DR Functionality");
326 
327 		return (-1);
328 	}
329 
330 	mutex_init(&drmach_i_lock, NULL, MUTEX_DRIVER, NULL);
331 
332 	drmach_xt_mb = (uchar_t *)vmem_alloc(static_alloc_arena,
333 	    NCPU * sizeof (uchar_t), VM_SLEEP);
334 	drmach_shutdown_asm_mbox = (struct drmach_shutdown_mbox *)
335 	    vmem_alloc(static_alloc_arena, sizeof (struct drmach_shutdown_mbox),
336 	    VM_SLEEP);
337 
338 	if ((err = mod_install(&modlinkage)) != 0) {
339 		mutex_destroy(&drmach_i_lock);
340 		vmem_free(static_alloc_arena, (void *)drmach_xt_mb,
341 		    NCPU * sizeof (uchar_t));
342 		vmem_free(static_alloc_arena, (void *)drmach_shutdown_asm_mbox,
343 		    sizeof (struct drmach_shutdown_mbox));
344 	}
345 
346 	return (err);
347 }
348 
349 int
350 _fini(void)
351 {
352 	static int drmach_fini(void);
353 
354 	if (drmach_fini())
355 		return (DDI_FAILURE);
356 	else
357 		return (mod_remove(&modlinkage));
358 }
359 
360 int
361 _info(struct modinfo *modinfop)
362 {
363 	return (mod_info(&modlinkage, modinfop));
364 }
365 
366 static dnode_t
367 drmach_node_obp_get_dnode(drmach_node_t *np)
368 {
369 	return ((dnode_t)np->here);
370 }
371 
372 static int
373 drmach_node_obp_walk(drmach_node_t *np, void *data,
374 		int (*cb)(drmach_node_walk_args_t *args))
375 {
376 	dnode_t			nodeid;
377 	int			rv;
378 	drmach_node_walk_args_t	args;
379 
380 	/* initialized args structure for callback */
381 	args.node = np;
382 	args.data = data;
383 
384 	nodeid = prom_childnode(prom_rootnode());
385 
386 	/* save our new position with in the tree */
387 	np->here = (void *)nodeid;
388 
389 	rv = 0;
390 	while (nodeid != OBP_NONODE) {
391 		rv = (*cb)(&args);
392 		if (rv)
393 			break;
394 
395 		nodeid = prom_nextnode(nodeid);
396 
397 		/* save our new position with in the tree */
398 		np->here = (void *)nodeid;
399 	}
400 
401 	return (rv);
402 }
403 
404 static drmach_node_t *
405 drmach_node_new(void)
406 {
407 	drmach_node_t *np;
408 
409 	np = kmem_zalloc(sizeof (drmach_node_t), KM_SLEEP);
410 
411 	np->get_dnode = drmach_node_obp_get_dnode;
412 	np->walk = drmach_node_obp_walk;
413 
414 	return (np);
415 }
416 
417 static void
418 drmach_node_dispose(drmach_node_t *np)
419 {
420 	kmem_free(np, sizeof (*np));
421 }
422 
423 static dev_info_t *
424 drmach_node_get_dip(drmach_node_t *np)
425 {
426 	dnode_t nodeid;
427 
428 	nodeid = np->get_dnode(np);
429 	if (nodeid == OBP_NONODE)
430 		return (NULL);
431 	else {
432 		dev_info_t *dip;
433 
434 		/* The root node doesn't have to be held */
435 		dip = e_ddi_nodeid_to_dip(nodeid);
436 		if (dip) {
437 			/*
438 			 * Branch rooted at dip is already held, so release
439 			 * hold acquired in e_ddi_nodeid_to_dip()
440 			 */
441 			ddi_release_devi(dip);
442 			ASSERT(e_ddi_branch_held(dip));
443 		}
444 
445 		return (dip);
446 	}
447 	/*NOTREACHED*/
448 }
449 
450 static dnode_t
451 drmach_node_get_dnode(drmach_node_t *np)
452 {
453 	return (np->get_dnode(np));
454 }
455 
456 static int
457 drmach_node_walk(drmach_node_t *np, void *param,
458 		int (*cb)(drmach_node_walk_args_t *args))
459 {
460 	return (np->walk(np, param, cb));
461 }
462 
463 static int
464 drmach_node_get_prop(drmach_node_t *np, char *name, void *buf)
465 {
466 	dnode_t	nodeid;
467 	int	rv;
468 
469 	nodeid = np->get_dnode(np);
470 	if (nodeid == OBP_NONODE)
471 		rv = -1;
472 	else if (prom_getproplen(nodeid, (caddr_t)name) < 0)
473 		rv = -1;
474 	else {
475 		(void) prom_getprop(nodeid, (caddr_t)name, (caddr_t)buf);
476 		rv = 0;
477 	}
478 
479 	return (rv);
480 }
481 
482 static int
483 drmach_node_get_proplen(drmach_node_t *np, char *name, int *len)
484 {
485 	dnode_t	 nodeid;
486 	int	 rv;
487 
488 	nodeid = np->get_dnode(np);
489 	if (nodeid == OBP_NONODE)
490 		rv = -1;
491 	else {
492 		*len = prom_getproplen(nodeid, (caddr_t)name);
493 		rv = (*len < 0 ? -1 : 0);
494 	}
495 
496 	return (rv);
497 }
498 
499 static drmachid_t
500 drmach_node_dup(drmach_node_t *np)
501 {
502 	drmach_node_t *dup;
503 
504 	dup = drmach_node_new();
505 	dup->here = np->here;
506 
507 	return (dup);
508 }
509 
510 /*
511  * drmach_array provides convenient array construction, access,
512  * bounds checking and array destruction logic.
513  */
514 
515 static drmach_array_t *
516 drmach_array_new(int min_index, int max_index)
517 {
518 	drmach_array_t *arr;
519 
520 	arr = kmem_zalloc(sizeof (drmach_array_t), KM_SLEEP);
521 
522 	arr->arr_sz = (max_index - min_index + 1) * sizeof (void *);
523 	if (arr->arr_sz > 0) {
524 		arr->min_index = min_index;
525 		arr->max_index = max_index;
526 
527 		arr->arr = kmem_zalloc(arr->arr_sz, KM_SLEEP);
528 		return (arr);
529 	} else {
530 		kmem_free(arr, sizeof (*arr));
531 		return (0);
532 	}
533 }
534 
535 static int
536 drmach_array_set(drmach_array_t *arr, int idx, drmachid_t val)
537 {
538 	if (idx < arr->min_index || idx > arr->max_index)
539 		return (-1);
540 	else {
541 		arr->arr[idx - arr->min_index] = val;
542 		return (0);
543 	}
544 	/*NOTREACHED*/
545 }
546 
547 static int
548 drmach_array_get(drmach_array_t *arr, int idx, drmachid_t *val)
549 {
550 	if (idx < arr->min_index || idx > arr->max_index)
551 		return (-1);
552 	else {
553 		*val = arr->arr[idx - arr->min_index];
554 		return (0);
555 	}
556 	/*NOTREACHED*/
557 }
558 
559 static int
560 drmach_array_first(drmach_array_t *arr, int *idx, drmachid_t *val)
561 {
562 	int rv;
563 
564 	*idx = arr->min_index;
565 	while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
566 		*idx += 1;
567 
568 	return (rv);
569 }
570 
571 static int
572 drmach_array_next(drmach_array_t *arr, int *idx, drmachid_t *val)
573 {
574 	int rv;
575 
576 	*idx += 1;
577 	while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
578 		*idx += 1;
579 
580 	return (rv);
581 }
582 
583 static void
584 drmach_array_dispose(drmach_array_t *arr, void (*disposer)(drmachid_t))
585 {
586 	drmachid_t	val;
587 	int		idx;
588 	int		rv;
589 
590 	rv = drmach_array_first(arr, &idx, &val);
591 	while (rv == 0) {
592 		(*disposer)(val);
593 		rv = drmach_array_next(arr, &idx, &val);
594 	}
595 
596 	kmem_free(arr->arr, arr->arr_sz);
597 	kmem_free(arr, sizeof (*arr));
598 }
599 
600 /*ARGSUSED*/
601 static int
602 drmach_prom_select(dnode_t nodeid, void *arg, uint_t flags)
603 {
604 	int			rprop[64];
605 	dnode_t			saved;
606 	drmach_config_args_t	*ap = (drmach_config_args_t *)arg;
607 	drmach_device_t		*dp = ap->dp;
608 	sbd_error_t		*err;
609 
610 	saved = drmach_node_get_dnode(dp->node);
611 
612 	if (nodeid != saved)
613 		return (DDI_FAILURE);
614 
615 	if (saved == OBP_NONODE) {
616 		err = DRMACH_INTERNAL_ERROR();
617 		DRERR_SET_C(&ap->err, &err);
618 		return (DDI_FAILURE);
619 	}
620 
621 	if (prom_getprop(nodeid, OBP_REG, (caddr_t)rprop) <= 0) {
622 		return (DDI_FAILURE);
623 	}
624 
625 	return (DDI_SUCCESS);
626 }
627 
628 /*ARGSUSED*/
629 static void
630 drmach_branch_callback(dev_info_t *rdip, void *arg, uint_t flags)
631 {
632 	drmach_config_args_t	*ap = (drmach_config_args_t *)arg;
633 
634 	ASSERT(ap->dip == NULL);
635 
636 	ap->dip = rdip;
637 }
638 
639 sbd_error_t *
640 drmach_configure(drmachid_t id, int flags)
641 {
642 	drmach_device_t		*dp;
643 	sbd_error_t		*err;
644 	drmach_config_args_t	ca;
645 	devi_branch_t		b = {0};
646 	dev_info_t		*fdip = NULL;
647 
648 	if (!DRMACH_IS_DEVICE_ID(id))
649 		return (drerr_new(0, ESTF_INAPPROP, NULL));
650 	dp = id;
651 
652 	ca.dp = dp;
653 	ca.flags = flags;
654 	ca.err = NULL;		/* will be set if error detected */
655 	ca.dip = NULL;
656 
657 	b.arg = &ca;
658 	b.type = DEVI_BRANCH_PROM;
659 	b.create.prom_branch_select = drmach_prom_select;
660 	b.devi_branch_callback = drmach_branch_callback;
661 
662 	if (e_ddi_branch_create(ddi_root_node(), &b, &fdip,
663 	    DEVI_BRANCH_CHILD | DEVI_BRANCH_CONFIGURE) != 0) {
664 		char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
665 
666 		/*
667 		 * If non-NULL, fdip is returned held and must be released.
668 		 */
669 		if (fdip != NULL) {
670 			(void) ddi_pathname(fdip, path);
671 			ddi_release_devi(fdip);
672 		} else if (ca.dip != NULL) {
673 			/* safe to call ddi_pathname as dip already held */
674 			(void) ddi_pathname(ca.dip, path);
675 		} else {
676 			(void) strcpy(path, "<none>");
677 		}
678 
679 		err = drerr_new(1, ESTF_DRVFAIL, path);
680 		DRERR_SET_C(&ca.err, &err);
681 		kmem_free(path, MAXPATHLEN);
682 	}
683 
684 	return (ca.err);
685 }
686 
687 static sbd_error_t *
688 drmach_device_new(drmach_node_t *node,
689 	drmach_board_t *bp, drmach_device_t **dpp)
690 {
691 	int		 i;
692 	int		 rv;
693 	drmach_device_t	*dp;
694 	sbd_error_t	*err;
695 	char		 name[OBP_MAXDRVNAME];
696 
697 	rv = drmach_node_get_prop(node, OBP_NAME, name);
698 	if (rv) {
699 		/* every node is expected to have a name */
700 		err = drerr_new(1, ESTF_GETPROP,
701 			"PROM Node 0x%x: property %s",
702 			(uint_t)node->get_dnode(node), OBP_NAME);
703 
704 		return (err);
705 	}
706 
707 	/*
708 	 * The node currently being examined is not listed in the name2type[]
709 	 * array.  In this case, the node is no interest to drmach.  Both
710 	 * dp and err are initialized here to yield nothing (no device or
711 	 * error structure) for this case.
712 	 */
713 	for (i = 0; i < sizeof (name2type) / sizeof (name2type[0]); i++)
714 		if (strcmp(name2type[i].name, name) == 0)
715 			break;
716 
717 	if (i < sizeof (name2type) / sizeof (name2type[0])) {
718 		dp = kmem_zalloc(sizeof (drmach_device_t), KM_SLEEP);
719 
720 		dp->bp = bp;
721 		dp->unum = -1;
722 		dp->node = drmach_node_dup(node);
723 		dp->type = name2type[i].type;
724 
725 		err = (name2type[i].new)(dp);
726 		if (err) {
727 			drmach_node_dispose(node);
728 			kmem_free(dp, sizeof (*dp));
729 			dp = NULL;
730 		}
731 
732 		*dpp = dp;
733 		return (err);
734 	}
735 
736 	/*
737 	 * The node currently being examined is not listed in the name2type[]
738 	 * array.  In this case, the node is no interest to drmach.  Both
739 	 * dp and err are initialized here to yield nothing (no device or
740 	 * error structure) for this case.
741 	 */
742 	*dpp = NULL;
743 	return (NULL);
744 }
745 
746 static void
747 drmach_device_dispose(drmachid_t id)
748 {
749 	drmach_device_t *self = id;
750 
751 	if (self->node)
752 		drmach_node_dispose(self->node);
753 
754 	kmem_free(self, sizeof (*self));
755 }
756 
757 static sbd_error_t *
758 drmach_device_get_prop(drmach_device_t *dp, char *name, void *buf)
759 {
760 	sbd_error_t	*err = NULL;
761 	int		 rv;
762 
763 	rv = drmach_node_get_prop(dp->node, name, buf);
764 	if (rv) {
765 		err = drerr_new(1, ESTF_GETPROP,
766 			"%s::%s: property %s",
767 			dp->bp->cm.name, dp->cm.name, name);
768 	}
769 
770 	return (err);
771 }
772 
773 static sbd_error_t *
774 drmach_device_get_proplen(drmach_device_t *dp, char *name, int *len)
775 {
776 	sbd_error_t	*err = NULL;
777 	int		 rv;
778 
779 	rv = drmach_node_get_proplen(dp->node, name, len);
780 	if (rv) {
781 		err = drerr_new(1, ESTF_GETPROPLEN,
782 			"%s::%s: property %s",
783 			dp->bp->cm.name, dp->cm.name, name);
784 	}
785 
786 	return (err);
787 }
788 
789 static drmach_board_t *
790 drmach_board_new(int bnum)
791 {
792 	static sbd_error_t *drmach_board_release(drmachid_t);
793 	static sbd_error_t *drmach_board_status(drmachid_t, drmach_status_t *);
794 
795 	drmach_board_t	*bp;
796 
797 	bp = kmem_zalloc(sizeof (drmach_board_t), KM_SLEEP);
798 
799 	bp->cm.isa = (void *)drmach_board_new;
800 	bp->cm.release = drmach_board_release;
801 	bp->cm.status = drmach_board_status;
802 
803 	(void) drmach_board_name(bnum, bp->cm.name, sizeof (bp->cm.name));
804 
805 	bp->bnum = bnum;
806 	bp->devices = NULL;
807 	bp->connect_cpuid = -1;
808 	bp->tree = drmach_node_new();
809 	bp->assigned = !drmach_initialized;
810 	bp->powered = !drmach_initialized;
811 
812 	drmach_array_set(drmach_boards, bnum, bp);
813 	return (bp);
814 }
815 
816 static void
817 drmach_board_dispose(drmachid_t id)
818 {
819 	drmach_board_t *bp;
820 
821 	ASSERT(DRMACH_IS_BOARD_ID(id));
822 	bp = id;
823 
824 	if (bp->tree)
825 		drmach_node_dispose(bp->tree);
826 
827 	if (bp->devices)
828 		drmach_array_dispose(bp->devices, drmach_device_dispose);
829 
830 	kmem_free(bp, sizeof (*bp));
831 }
832 
833 static sbd_error_t *
834 drmach_board_status(drmachid_t id, drmach_status_t *stat)
835 {
836 	sbd_error_t	*err = NULL;
837 	drmach_board_t	*bp;
838 
839 	if (!DRMACH_IS_BOARD_ID(id))
840 		return (drerr_new(0, ESTF_INAPPROP, NULL));
841 	bp = id;
842 
843 	stat->assigned = bp->assigned;
844 	stat->powered = bp->powered;
845 	stat->busy = 0;			/* assume not busy */
846 	stat->configured = 0;		/* assume not configured */
847 	stat->empty = 0;
848 	stat->cond = bp->cond = SBD_COND_OK;
849 	strncpy(stat->type, "System Brd", sizeof (stat->type));
850 	stat->info[0] = '\0';
851 
852 	if (bp->devices) {
853 		int		 rv;
854 		int		 d_idx;
855 		drmachid_t	 d_id;
856 
857 		rv = drmach_array_first(bp->devices, &d_idx, &d_id);
858 		while (rv == 0) {
859 			drmach_status_t	d_stat;
860 
861 			err = drmach_status(d_id, &d_stat);
862 			if (err)
863 				break;
864 
865 			stat->busy |= d_stat.busy;
866 			stat->configured |= d_stat.configured;
867 
868 			rv = drmach_array_next(bp->devices, &d_idx, &d_id);
869 		}
870 	}
871 
872 	return (err);
873 }
874 
875 /* a simple routine to reduce redundancy of this common logic */
876 static pda_handle_t
877 drmach_pda_open(void)
878 {
879 	pda_handle_t ph;
880 
881 	ph = pda_open();
882 	if (ph == NULL) {
883 		/* catch in debug kernels */
884 		ASSERT(0);
885 		cmn_err(CE_WARN, "pda_open failed");
886 	}
887 
888 	return (ph);
889 }
890 
891 #ifdef DEBUG
892 int drmach_init_break = 0;
893 #endif
894 
895 static int
896 hold_rele_branch(dev_info_t *rdip, void *arg)
897 {
898 	int	i;
899 	int	*holdp = (int *)arg;
900 	char	*name = ddi_node_name(rdip);
901 
902 	/*
903 	 * For Starfire, we must be children of the root devinfo node
904 	 */
905 	ASSERT(ddi_get_parent(rdip) == ddi_root_node());
906 
907 	for (i = 0; i < sizeof (name2type) / sizeof (name2type[0]); i++)
908 		if (strcmp(name2type[i].name, name) == 0)
909 			break;
910 
911 	if (i == sizeof (name2type) / sizeof (name2type[0])) {
912 		/* Not of interest to us */
913 		return (DDI_WALK_PRUNECHILD);
914 	}
915 
916 	if (*holdp) {
917 		ASSERT(!e_ddi_branch_held(rdip));
918 		e_ddi_branch_hold(rdip);
919 	} else {
920 		ASSERT(e_ddi_branch_held(rdip));
921 		e_ddi_branch_rele(rdip);
922 	}
923 
924 	return (DDI_WALK_PRUNECHILD);
925 }
926 
927 static int
928 drmach_init(void)
929 {
930 	dnode_t		nodeid;
931 	dev_info_t	*rdip;
932 	int		hold, circ;
933 
934 #ifdef DEBUG
935 	if (drmach_init_break)
936 		debug_enter("drmach_init: drmach_init_break set\n");
937 #endif
938 	mutex_enter(&drmach_i_lock);
939 	if (drmach_initialized) {
940 		mutex_exit(&drmach_i_lock);
941 		return (0);
942 	}
943 
944 	drmach_boards = drmach_array_new(0, MAX_BOARDS - 1);
945 
946 	nodeid = prom_childnode(prom_rootnode());
947 	do {
948 		int		 bnum;
949 		drmachid_t	 id;
950 
951 		bnum = -1;
952 		(void) prom_getprop(nodeid, OBP_BOARDNUM, (caddr_t)&bnum);
953 		if (bnum == -1)
954 			continue;
955 
956 		if (drmach_array_get(drmach_boards, bnum, &id) == -1) {
957 			cmn_err(CE_WARN, "OBP node 0x%x has"
958 				" invalid property value, %s=%d",
959 				nodeid, OBP_BOARDNUM, bnum);
960 
961 			/* clean up */
962 			drmach_array_dispose(
963 				drmach_boards, drmach_board_dispose);
964 
965 			mutex_exit(&drmach_i_lock);
966 			return (-1);
967 		} else if (id == NULL)
968 			(void) drmach_board_new(bnum);
969 	} while ((nodeid = prom_nextnode(nodeid)) != OBP_NONODE);
970 
971 	drmach_shutdown_va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
972 
973 	/*
974 	 * Walk immediate children of devinfo root node and hold
975 	 * all devinfo branches of interest.
976 	 */
977 	hold = 1;
978 	rdip = ddi_root_node();
979 
980 	ndi_devi_enter(rdip, &circ);
981 	ddi_walk_devs(ddi_get_child(rdip), hold_rele_branch, &hold);
982 	ndi_devi_exit(rdip, circ);
983 
984 	drmach_initialized = 1;
985 
986 	mutex_exit(&drmach_i_lock);
987 
988 	return (0);
989 }
990 
991 static int
992 drmach_fini(void)
993 {
994 	dev_info_t	*rdip;
995 	int		hold, circ;
996 
997 	if (drmach_initialized) {
998 		int		busy = 0;
999 		int		rv;
1000 		int		idx;
1001 		drmachid_t	id;
1002 
1003 		ASSERT(drmach_boards != NULL);
1004 
1005 		rv = drmach_array_first(drmach_boards, &idx, &id);
1006 		while (rv == 0) {
1007 			sbd_error_t	*err;
1008 			drmach_status_t stat;
1009 
1010 			err = drmach_board_status(id, &stat);
1011 			if (err) {
1012 				/* catch in debug kernels */
1013 				ASSERT(0);
1014 				sbd_err_clear(&err);
1015 				busy = 1;
1016 			} else
1017 				busy |= stat.busy;
1018 
1019 			rv = drmach_array_next(drmach_boards, &idx, &id);
1020 		}
1021 
1022 		if (busy)
1023 			return (-1);
1024 
1025 		drmach_array_dispose(drmach_boards, drmach_board_dispose);
1026 		drmach_boards = NULL;
1027 
1028 		vmem_free(heap_arena, drmach_shutdown_va, PAGESIZE);
1029 
1030 		/*
1031 		 * Walk immediate children of the root devinfo node
1032 		 * releasing holds acquired on branches in drmach_init()
1033 		 */
1034 		hold = 0;
1035 		rdip = ddi_root_node();
1036 
1037 		ndi_devi_enter(rdip, &circ);
1038 		ddi_walk_devs(ddi_get_child(rdip), hold_rele_branch, &hold);
1039 		ndi_devi_exit(rdip, circ);
1040 
1041 		mutex_destroy(&drmach_i_lock);
1042 
1043 		drmach_initialized = 0;
1044 	}
1045 	if (drmach_xt_mb != NULL) {
1046 		vmem_free(static_alloc_arena, (void *)drmach_xt_mb,
1047 		    NCPU * sizeof (uchar_t));
1048 	}
1049 	if (drmach_shutdown_asm_mbox != NULL) {
1050 		vmem_free(static_alloc_arena, (void *)drmach_shutdown_asm_mbox,
1051 		    sizeof (struct drmach_shutdown_mbox));
1052 	}
1053 	return (0);
1054 }
1055 
1056 static sbd_error_t *
1057 drmach_get_mc_asr_addr(drmachid_t id, uint64_t *pa)
1058 {
1059 	drmach_device_t	*dp;
1060 	dnode_t		nodeid;
1061 	uint64_t	addr;
1062 
1063 	if (!DRMACH_IS_MEM_ID(id))
1064 		return (drerr_new(0, ESTF_INAPPROP, NULL));
1065 	dp = id;
1066 
1067 	nodeid = drmach_node_get_dnode(dp->node);
1068 	if (nodeid == OBP_NONODE || nodeid == OBP_BADNODE)
1069 		return (DRMACH_INTERNAL_ERROR());
1070 
1071 	addr = mc_get_asr_addr(nodeid);
1072 	if (addr == (uint64_t)-1)
1073 		return (DRMACH_INTERNAL_ERROR());
1074 
1075 	*pa = addr;
1076 	return (NULL);
1077 }
1078 
1079 static sbd_error_t *
1080 drmach_get_mc_idle_addr(drmachid_t id, uint64_t *pa)
1081 {
1082 	drmach_device_t	*dp;
1083 	dnode_t		nodeid;
1084 	uint64_t	addr;
1085 
1086 	if (!DRMACH_IS_MEM_ID(id))
1087 		return (drerr_new(0, ESTF_INAPPROP, NULL));
1088 	dp = id;
1089 
1090 	nodeid = drmach_node_get_dnode(dp->node);
1091 	if (nodeid == OBP_NONODE || nodeid == OBP_BADNODE)
1092 		return (DRMACH_INTERNAL_ERROR());
1093 
1094 	addr = mc_get_idle_addr(nodeid);
1095 	if (addr == (uint64_t)-1)
1096 		return (DRMACH_INTERNAL_ERROR());
1097 
1098 	*pa = addr;
1099 	return (NULL);
1100 }
1101 
1102 static sbd_error_t *
1103 drmach_read_mc_asr(drmachid_t id, uint_t *mcregp)
1104 {
1105 	drmach_device_t	*dp;
1106 	dnode_t		 nodeid;
1107 	sbd_error_t	*err;
1108 
1109 	if (!DRMACH_IS_MEM_ID(id))
1110 		return (drerr_new(0, ESTF_INAPPROP, NULL));
1111 	dp = id;
1112 
1113 	nodeid = drmach_node_get_dnode(dp->node);
1114 	if (nodeid == OBP_NONODE || nodeid == OBP_BADNODE)
1115 		err = DRMACH_INTERNAL_ERROR();
1116 	else if (mc_read_asr(nodeid, mcregp) == -1)
1117 		err = DRMACH_INTERNAL_ERROR();
1118 	else
1119 		err = NULL;
1120 
1121 	return (err);
1122 }
1123 
1124 static sbd_error_t *
1125 drmach_write_mc_asr(drmachid_t id, uint_t mcreg)
1126 {
1127 	drmach_device_t	*dp;
1128 	dnode_t		 nodeid;
1129 	sbd_error_t	*err;
1130 
1131 	if (!DRMACH_IS_MEM_ID(id))
1132 		return (drerr_new(0, ESTF_INAPPROP, NULL));
1133 	dp = id;
1134 
1135 	nodeid = drmach_node_get_dnode(dp->node);
1136 	if (nodeid == OBP_NONODE || nodeid == OBP_BADNODE)
1137 		err = DRMACH_INTERNAL_ERROR();
1138 	else if (mc_write_asr(nodeid, mcreg) == -1)
1139 		err = DRMACH_INTERNAL_ERROR();
1140 	else
1141 		err = NULL;
1142 
1143 	return (err);
1144 }
1145 
1146 static struct memlist *
1147 memlist_add_span(struct memlist *mlist, uint64_t base, uint64_t len)
1148 {
1149 	struct memlist	*ml, *tl, *nl;
1150 
1151 	if (len == 0ull)
1152 		return (NULL);
1153 
1154 	if (mlist == NULL) {
1155 		mlist = GETSTRUCT(struct memlist, 1);
1156 		mlist->address = base;
1157 		mlist->size = len;
1158 		mlist->next = mlist->prev = NULL;
1159 
1160 		return (mlist);
1161 	}
1162 
1163 	for (tl = ml = mlist; ml; tl = ml, ml = ml->next) {
1164 		if (base < ml->address) {
1165 			if ((base + len) < ml->address) {
1166 				nl = GETSTRUCT(struct memlist, 1);
1167 				nl->address = base;
1168 				nl->size = len;
1169 				nl->next = ml;
1170 				if ((nl->prev = ml->prev) != NULL)
1171 					nl->prev->next = nl;
1172 				ml->prev = nl;
1173 				if (mlist == ml)
1174 					mlist = nl;
1175 			} else {
1176 				ml->size = MAX((base + len),
1177 						(ml->address + ml->size)) -
1178 						base;
1179 				ml->address = base;
1180 			}
1181 			break;
1182 
1183 		} else if (base <= (ml->address + ml->size)) {
1184 			ml->size = MAX((base + len),
1185 					(ml->address + ml->size)) -
1186 					MIN(ml->address, base);
1187 			ml->address = MIN(ml->address, base);
1188 			break;
1189 		}
1190 	}
1191 	if (ml == NULL) {
1192 		nl = GETSTRUCT(struct memlist, 1);
1193 		nl->address = base;
1194 		nl->size = len;
1195 		nl->next = NULL;
1196 		nl->prev = tl;
1197 		tl->next = nl;
1198 	}
1199 
1200 	memlist_coalesce(mlist);
1201 
1202 	return (mlist);
1203 }
1204 
1205 static sbd_error_t *
1206 drmach_prep_rename_script(drmach_device_t *s_mem, drmach_device_t *t_mem,
1207 	uint64_t t_slice_offset, caddr_t buf, int buflen)
1208 {
1209 	int			i, b, m;
1210 	drmach_mc_idle_script_t	*isp;
1211 	drmach_rename_script_t	*rsp;
1212 	int			s_bd, t_bd;
1213 	uint_t			s_masr, t_masr;
1214 	uint64_t		s_new_basepa, t_new_basepa;
1215 	int			b_idx, rv;
1216 	sbd_error_t		*err;
1217 	drmachid_t		 b_id;
1218 	drmach_board_t		*brd;
1219 
1220 #ifdef DEBUG
1221 	/*
1222 	 * Starfire CPU/MEM/IO boards have only one MC per board.
1223 	 * This function has been coded with that fact in mind.
1224 	 */
1225 	ASSERT(MAX_MEM_UNITS_PER_BOARD == 1);
1226 
1227 	/*
1228 	 * calculate the maximum space that could be consumed,
1229 	 * then verify the available buffer space is adequate.
1230 	 */
1231 	m  = sizeof (drmach_mc_idle_script_t *) * 2; /* two MCs */
1232 	b  = sizeof (drmach_rename_script_t *) * 3 * MAX_CPU_UNITS_PER_BOARD;
1233 	b += sizeof (drmach_rename_script_t *) * 3 * MAX_IO_UNITS_PER_BOARD;
1234 	b *= MAX_BOARDS;
1235 	b += sizeof (drmach_rename_script_t *) * 3;
1236 	b += sizeof (drmach_rename_script_t *) * 1;
1237 	ASSERT(m + b < buflen);
1238 #endif
1239 
1240 	/*
1241 	 * construct an array of MC idle register addresses of
1242 	 * both MCs.  The array is zero terminated -- as expected
1243 	 * by drmach_copy_rename_prog__relocatable().
1244 	 */
1245 	isp = (drmach_mc_idle_script_t *)buf;
1246 
1247 	/* source mc */
1248 	err = drmach_get_mc_idle_addr(s_mem, &isp->idle_addr);
1249 	if (err)
1250 		return (err);
1251 	isp->mem = s_mem;
1252 	isp += 1;
1253 
1254 	/* target mc */
1255 	err = drmach_get_mc_idle_addr(t_mem, &isp->idle_addr);
1256 	if (err)
1257 		return (err);
1258 	isp->mem = t_mem;
1259 	isp += 1;
1260 
1261 	/* terminator */
1262 	isp->idle_addr = 0;
1263 	isp->mem = NULL;
1264 	isp += 1;
1265 
1266 	/* fetch source mc asr register value */
1267 	err = drmach_read_mc_asr(s_mem, &s_masr);
1268 	if (err)
1269 		return (err);
1270 	else if (s_masr & STARFIRE_MC_INTERLEAVE_MASK) {
1271 		return (drerr_new(1, ESTF_INTERBOARD, "%s::%s",
1272 				s_mem->bp->cm.name, s_mem->cm.name));
1273 	}
1274 
1275 	/* fetch target mc asr register value */
1276 	err = drmach_read_mc_asr(t_mem, &t_masr);
1277 	if (err)
1278 		return (err);
1279 	else if (t_masr & STARFIRE_MC_INTERLEAVE_MASK) {
1280 		return (drerr_new(1, ESTF_INTERBOARD, "%s::%s",
1281 				t_mem->bp->cm.name, t_mem->cm.name));
1282 	}
1283 
1284 	/* get new source base pa from target's masr */
1285 	s_new_basepa = mc_asr_to_pa(t_masr);
1286 
1287 	/*
1288 	 * remove any existing slice offset to realign
1289 	 * memory with board's slice boundary
1290 	 */
1291 	s_new_basepa &= ~ (mc_get_mem_alignment() - 1);
1292 
1293 	/* get new target base pa from source's masr */
1294 	t_new_basepa  = mc_asr_to_pa(s_masr);
1295 
1296 	/* remove any existing slice offset, then apply new offset */
1297 	t_new_basepa &= ~ (mc_get_mem_alignment() - 1);
1298 	t_new_basepa += t_slice_offset;
1299 
1300 	/* encode new base pa into s_masr.  turn off mem present bit */
1301 	s_masr  = mc_pa_to_asr(s_masr, s_new_basepa);
1302 	s_masr &= ~STARFIRE_MC_MEM_PRESENT_MASK;
1303 
1304 	/* encode new base pa into t_masr.  turn on mem present bit */
1305 	t_masr  = mc_pa_to_asr(t_masr, t_new_basepa);
1306 	t_masr |= STARFIRE_MC_MEM_PRESENT_MASK;
1307 
1308 	/*
1309 	 * Step 0:	Mark source memory as not present.
1310 	 */
1311 	m = 0;
1312 	rsp = (drmach_rename_script_t *)isp;
1313 	err = drmach_get_mc_asr_addr(s_mem, &rsp[m].masr_addr);
1314 	if (err)
1315 		return (err);
1316 	rsp[m].masr = s_masr;
1317 	m++;
1318 
1319 	/*
1320 	 * Step 1:	Write source base address to target MC
1321 	 *		with present bit off.
1322 	 */
1323 	err = drmach_get_mc_asr_addr(t_mem, &rsp[m].masr_addr);
1324 	if (err)
1325 		return (err);
1326 	rsp[m].masr = t_masr & ~STARFIRE_MC_MEM_PRESENT_MASK;
1327 	m++;
1328 
1329 	/*
1330 	 * Step 2:	Now rewrite target reg with present bit on.
1331 	 */
1332 	rsp[m].masr_addr = rsp[m-1].masr_addr;
1333 	rsp[m].masr = t_masr;
1334 	m++;
1335 
1336 	s_bd = s_mem->bp->bnum;
1337 	t_bd = t_mem->bp->bnum;
1338 
1339 	DRMACH_PR("preparing script for CPU and IO units:\n");
1340 
1341 	rv = drmach_array_first(drmach_boards, &b_idx, &b_id);
1342 	if (rv) {
1343 		/* catch this in debug kernels */
1344 		ASSERT(0);
1345 		return (DRMACH_INTERNAL_ERROR());
1346 	}
1347 
1348 	do {
1349 		int			 d_idx;
1350 		drmachid_t		 d_id;
1351 		drmach_device_t		*device;
1352 
1353 		ASSERT(DRMACH_IS_BOARD_ID(b_id));
1354 		brd = b_id;
1355 		b = brd->bnum;
1356 
1357 		/*
1358 		 * Step 3:	Update PC MADR tables for CPUs.
1359 		 */
1360 		rv = drmach_array_first(brd->devices, &d_idx, &d_id);
1361 		if (rv) {
1362 			/* must mean no devices on this board */
1363 			break;
1364 		}
1365 
1366 		DRMACH_PR("\t%s\n", brd->cm.name);
1367 
1368 		do {
1369 			ASSERT(DRMACH_IS_DEVICE_ID(d_id));
1370 
1371 			if (!DRMACH_IS_CPU_ID(d_id))
1372 				continue;
1373 
1374 			device = d_id;
1375 			i = device->unum;
1376 
1377 			DRMACH_PR("\t\t%s\n", device->cm.name);
1378 
1379 			/*
1380 			 * Disabled detaching mem node.
1381 			 */
1382 			rsp[m].masr_addr = STARFIRE_PC_MADR_ADDR(b, s_bd, i);
1383 			rsp[m].masr = s_masr;
1384 			m++;
1385 			/*
1386 			 * Always write masr with present bit
1387 			 * off and then again with it on.
1388 			 */
1389 			rsp[m].masr_addr = STARFIRE_PC_MADR_ADDR(b, t_bd, i);
1390 			rsp[m].masr = t_masr & ~STARFIRE_MC_MEM_PRESENT_MASK;
1391 			m++;
1392 			rsp[m].masr_addr = rsp[m-1].masr_addr;
1393 			rsp[m].masr = t_masr;
1394 			m++;
1395 
1396 		} while (drmach_array_next(brd->devices, &d_idx, &d_id) == 0);
1397 
1398 		/*
1399 		 * Step 4:	Update PC MADR tables for IOs.
1400 		 */
1401 		rv = drmach_array_first(brd->devices, &d_idx, &d_id);
1402 		/* this worked for previous loop, must work here too */
1403 		ASSERT(rv == 0);
1404 
1405 		do {
1406 			ASSERT(DRMACH_IS_DEVICE_ID(d_id));
1407 
1408 			if (!DRMACH_IS_IO_ID(d_id))
1409 				continue;
1410 
1411 			device = d_id;
1412 			i = device->unum;
1413 
1414 			DRMACH_PR("\t\t%s\n", device->cm.name);
1415 
1416 			/*
1417 			 * Disabled detaching mem node.
1418 			 */
1419 			rsp[m].masr_addr = STARFIRE_PC_MADR_ADDR(b, s_bd, i+4);
1420 			rsp[m].masr = s_masr;
1421 			m++;
1422 			/*
1423 			 * Always write masr with present bit
1424 			 * off and then again with it on.
1425 			 */
1426 			rsp[m].masr_addr = STARFIRE_PC_MADR_ADDR(b, t_bd, i+4);
1427 			rsp[m].masr = t_masr & ~STARFIRE_MC_MEM_PRESENT_MASK;
1428 			m++;
1429 			rsp[m].masr_addr = rsp[m-1].masr_addr;
1430 			rsp[m].masr = t_masr;
1431 			m++;
1432 
1433 		} while (drmach_array_next(brd->devices, &d_idx, &d_id) == 0);
1434 	} while (drmach_array_next(drmach_boards, &b_idx, &b_id) == 0);
1435 
1436 	/*
1437 	 * Zero masr_addr value indicates the END.
1438 	 */
1439 	rsp[m].masr_addr = 0ull;
1440 	rsp[m].masr = 0;
1441 	DRMACH_PR("number of steps in rename script = %d\n", m);
1442 	m++;
1443 
1444 	/* paranoia */
1445 	ASSERT((caddr_t)&rsp[m] <= buf + buflen);
1446 
1447 #ifdef DEBUG
1448 	{
1449 		int	j;
1450 
1451 		DRMACH_PR("mc idle register address list:");
1452 		isp = (drmach_mc_idle_script_t *)buf;
1453 		DRMACH_PR("source mc idle addr 0x%llx, mem id %p",
1454 			isp[0].idle_addr, isp[0].mem);
1455 		DRMACH_PR("target mc idle addr 0x%llx, mem id %p",
1456 			isp[1].idle_addr, isp[1].mem);
1457 		ASSERT(isp[2].idle_addr == 0);
1458 
1459 		DRMACH_PR("copy-rename script:");
1460 		for (j = 0; j < m; j++) {
1461 			DRMACH_PR("0x%llx = 0x%08x",
1462 				rsp[j].masr_addr, rsp[j].masr);
1463 		}
1464 
1465 		DELAY(1000000);
1466 	}
1467 #endif
1468 
1469 	/* return number of bytes consumed */
1470 	b = (caddr_t)&rsp[m] - buf;
1471 	DRMACH_PR("total number of bytes consumed is %d\n", b);
1472 	ASSERT(b <= buflen);
1473 
1474 #ifdef lint
1475 	buflen = buflen;
1476 #endif
1477 
1478 	return (NULL);
1479 }
1480 
1481 /*
1482  * The routine performs the necessary memory COPY and MC adr SWITCH.
1483  * Both operations MUST be at the same "level" so that the stack is
1484  * maintained correctly between the copy and switch.  The switch
1485  * portion implements a caching mechanism to guarantee the code text
1486  * is cached prior to execution.  This is to guard against possible
1487  * memory access while the MC adr's are being modified.
1488  *
1489  * IMPORTANT: The _drmach_copy_rename_end() function must immediately
1490  * follow drmach_copy_rename_prog__relocatable() so that the correct
1491  * "length" of the drmach_copy_rename_prog__relocatable can be
1492  * calculated.  This routine MUST be a LEAF function, i.e. it can
1493  * make NO function calls, primarily for two reasons:
1494  *
1495  *	1. We must keep the stack consistent across the "switch".
1496  *	2. Function calls are compiled to relative offsets, and
1497  *	   we execute this function we'll be executing it from
1498  *	   a copied version in a different area of memory, thus
1499  *	   the relative offsets will be bogus.
1500  *
1501  * Moreover, it must have the "__relocatable" suffix to inform DTrace
1502  * providers (and anything else, for that matter) that this
1503  * function's text is manually relocated elsewhere before it is
1504  * executed.  That is, it cannot be safely instrumented with any
1505  * methodology that is PC-relative.
1506  */
1507 static void
1508 drmach_copy_rename_prog__relocatable(drmach_copy_rename_program_t *prog)
1509 {
1510 	extern void drmach_exec_script_il(drmach_rename_script_t *rsp);
1511 
1512 	drmach_mc_idle_script_t		*isp;
1513 	struct memlist			*ml;
1514 	int				csize;
1515 	int				lnsize;
1516 	uint64_t			caddr;
1517 
1518 	isp = (drmach_mc_idle_script_t *)prog->data;
1519 
1520 	caddr = ecache_flushaddr;
1521 	csize = (cpunodes[CPU->cpu_id].ecache_size << 1);
1522 	lnsize = cpunodes[CPU->cpu_id].ecache_linesize;
1523 
1524 	/*
1525 	 * DO COPY.
1526 	 */
1527 	for (ml = prog->c_ml; ml; ml = ml->next) {
1528 		uint64_t	s_pa, t_pa;
1529 		uint64_t	nbytes;
1530 
1531 		s_pa = prog->s_copybasepa + ml->address;
1532 		t_pa = prog->t_copybasepa + ml->address;
1533 		nbytes = ml->size;
1534 
1535 		while (nbytes != 0ull) {
1536 			/*
1537 			 * This copy does NOT use an ASI
1538 			 * that avoids the Ecache, therefore
1539 			 * the dst_pa addresses may remain
1540 			 * in our Ecache after the dst_pa
1541 			 * has been removed from the system.
1542 			 * A subsequent write-back to memory
1543 			 * will cause an ARB-stop because the
1544 			 * physical address no longer exists
1545 			 * in the system. Therefore we must
1546 			 * flush out local Ecache after we
1547 			 * finish the copy.
1548 			 */
1549 
1550 			/* copy 32 bytes at src_pa to dst_pa */
1551 			bcopy32_il(s_pa, t_pa);
1552 
1553 			/* increment by 32 bytes */
1554 			s_pa += (4 * sizeof (uint64_t));
1555 			t_pa += (4 * sizeof (uint64_t));
1556 
1557 			/* decrement by 32 bytes */
1558 			nbytes -= (4 * sizeof (uint64_t));
1559 		}
1560 	}
1561 
1562 	/*
1563 	 * Since bcopy32_il() does NOT use an ASI to bypass
1564 	 * the Ecache, we need to flush our Ecache after
1565 	 * the copy is complete.
1566 	 */
1567 	flush_ecache_il(caddr, csize, lnsize);		/* inline version */
1568 
1569 	/*
1570 	 * Wait for MCs to go idle.
1571 	 */
1572 	do {
1573 		register int	t = 10;
1574 		register uint_t	v;
1575 
1576 		/* loop t cycles waiting for each mc to indicate it's idle */
1577 		do {
1578 			v = ldphysio_il(isp->idle_addr)
1579 				& STARFIRE_MC_IDLE_MASK;
1580 
1581 		} while (v != STARFIRE_MC_IDLE_MASK && t-- > 0);
1582 
1583 		/* bailout if timedout */
1584 		if (t <= 0) {
1585 			prog->restless_mc = isp->mem;
1586 			return;
1587 		}
1588 
1589 		isp += 1;
1590 
1591 		/* stop if terminating zero has been reached */
1592 	} while (isp->idle_addr != 0);
1593 
1594 	/* advance passed terminating zero */
1595 	isp += 1;
1596 
1597 	/*
1598 	 * The following inline assembly routine caches
1599 	 * the rename script and then caches the code that
1600 	 * will do the rename.  This is necessary
1601 	 * so that we don't have any memory references during
1602 	 * the reprogramming.  We accomplish this by first
1603 	 * jumping through the code to guarantee it's cached
1604 	 * before we actually execute it.
1605 	 */
1606 	drmach_exec_script_il((drmach_rename_script_t *)isp);
1607 }
1608 
1609 static void
1610 drmach_copy_rename_end(void)
1611 {
1612 	/*
1613 	 * IMPORTANT:	This function's location MUST be located immediately
1614 	 *		following drmach_copy_rename_prog__relocatable to
1615 	 *		accurately estimate its size.  Note that this assumes
1616 	 *		the compiler keeps these functions in the order in
1617 	 *		which they appear :-o
1618 	 */
1619 }
1620 
1621 sbd_error_t *
1622 drmach_copy_rename_init(drmachid_t t_id, uint64_t t_slice_offset,
1623 	drmachid_t s_id, struct memlist *c_ml, drmachid_t *pgm_id)
1624 {
1625 	drmach_device_t	*s_mem;
1626 	drmach_device_t	*t_mem;
1627 	struct memlist	*x_ml;
1628 	uint64_t	off_mask, s_copybasepa, t_copybasepa, t_basepa;
1629 	int		len;
1630 	caddr_t		bp, wp;
1631 	pda_handle_t	ph;
1632 	sbd_error_t	*err;
1633 	drmach_copy_rename_program_t *prog;
1634 
1635 	if (!DRMACH_IS_MEM_ID(s_id))
1636 		return (drerr_new(0, ESTF_INAPPROP, NULL));
1637 	if (!DRMACH_IS_MEM_ID(t_id))
1638 		return (drerr_new(0, ESTF_INAPPROP, NULL));
1639 	s_mem = s_id;
1640 	t_mem = t_id;
1641 
1642 	/* get starting physical address of target memory */
1643 	err = drmach_mem_get_base_physaddr(t_id, &t_basepa);
1644 	if (err)
1645 		return (err);
1646 
1647 	/* calculate slice offset mask from slice size */
1648 	off_mask = mc_get_mem_alignment() - 1;
1649 
1650 	/* calculate source and target base pa */
1651 	s_copybasepa = c_ml->address;
1652 	t_copybasepa = t_basepa + ((c_ml->address & off_mask) - t_slice_offset);
1653 
1654 	/* paranoia */
1655 	ASSERT((c_ml->address & off_mask) >= t_slice_offset);
1656 
1657 	/* adjust copy memlist addresses to be relative to copy base pa */
1658 	x_ml = c_ml;
1659 	while (x_ml != NULL) {
1660 		x_ml->address -= s_copybasepa;
1661 		x_ml = x_ml->next;
1662 	}
1663 
1664 #ifdef DEBUG
1665 	{
1666 	uint64_t s_basepa, s_size, t_size;
1667 
1668 	x_ml = c_ml;
1669 	while (x_ml->next != NULL)
1670 		x_ml = x_ml->next;
1671 
1672 	DRMACH_PR("source copy span: base pa 0x%llx, end pa 0x%llx\n",
1673 		s_copybasepa,
1674 		s_copybasepa + x_ml->address + x_ml->size);
1675 
1676 	DRMACH_PR("target copy span: base pa 0x%llx, end pa 0x%llx\n",
1677 		t_copybasepa,
1678 		t_copybasepa + x_ml->address + x_ml->size);
1679 
1680 	DRMACH_PR("copy memlist (relative to copy base pa):\n");
1681 	MEMLIST_DUMP(c_ml);
1682 
1683 	err = drmach_mem_get_base_physaddr(s_id, &s_basepa);
1684 	ASSERT(err == NULL);
1685 
1686 	err = drmach_mem_get_size(s_id, &s_size);
1687 	ASSERT(err == NULL);
1688 
1689 	err = drmach_mem_get_size(t_id, &t_size);
1690 	ASSERT(err == NULL);
1691 
1692 	DRMACH_PR("current source base pa 0x%llx, size 0x%llx\n",
1693 		s_basepa, s_size);
1694 	DRMACH_PR("current target base pa 0x%llx, size 0x%llx\n",
1695 		t_basepa, t_size);
1696 
1697 	ASSERT(s_copybasepa + x_ml->address + x_ml->size <= s_basepa + s_size);
1698 	ASSERT(t_copybasepa + x_ml->address + x_ml->size <= t_basepa + t_size);
1699 	}
1700 #endif
1701 
1702 	ph = drmach_pda_open();
1703 	if (ph == NULL)
1704 		return (DRMACH_INTERNAL_ERROR());
1705 
1706 	/*
1707 	 * bp will be page aligned, since we're calling
1708 	 * kmem_zalloc() with an exact multiple of PAGESIZE.
1709 	 */
1710 	wp = bp = kmem_zalloc(PAGESIZE, KM_SLEEP);
1711 
1712 	/* allocate space for copy rename struct */
1713 	len = sizeof (drmach_copy_rename_program_t);
1714 	DRMACH_PR("prog = 0x%p, header len %d\n", wp, len);
1715 	prog = (drmach_copy_rename_program_t *)wp;
1716 	wp += (len + ecache_alignsize - 1) & ~ (ecache_alignsize - 1);
1717 
1718 	/*
1719 	 * Copy the code for the copy-rename routine into
1720 	 * a page aligned piece of memory.  We do this to guarantee
1721 	 * that we're executing within the same page and thus reduce
1722 	 * the possibility of cache collisions between different
1723 	 * pages.
1724 	 */
1725 	len = (int)((ulong_t)drmach_copy_rename_end -
1726 		    (ulong_t)drmach_copy_rename_prog__relocatable);
1727 	ASSERT(wp + len < bp + PAGESIZE);
1728 	bcopy((caddr_t)drmach_copy_rename_prog__relocatable, wp, len);
1729 
1730 	DRMACH_PR("copy-rename function 0x%p, len %d\n", wp, len);
1731 	prog->run = (void (*)())wp;
1732 	wp += (len + ecache_alignsize - 1) & ~ (ecache_alignsize - 1);
1733 
1734 	/*
1735 	 * Prepare data page that will contain script of
1736 	 * operations to perform during copy-rename.
1737 	 * Allocate temporary buffer to hold script.
1738 	 */
1739 	err = drmach_prep_rename_script(s_mem, t_mem, t_slice_offset,
1740 		wp, PAGESIZE - (wp - bp));
1741 	if (err) {
1742 		(void) drmach_copy_rename_fini(prog);
1743 		return (err);
1744 	}
1745 
1746 	DRMACH_PR("copy-rename script 0x%p, len %d\n", wp, len);
1747 	prog->data = wp;
1748 	wp += (len + ecache_alignsize - 1) & ~ (ecache_alignsize - 1);
1749 
1750 	prog->ph = ph;
1751 	prog->s_copybasepa = s_copybasepa;
1752 	prog->t_copybasepa = t_copybasepa;
1753 	prog->c_ml = c_ml;
1754 	*pgm_id = prog;
1755 
1756 	return (NULL);
1757 }
1758 
1759 sbd_error_t *
1760 drmach_copy_rename_fini(drmachid_t id)
1761 {
1762 	drmach_copy_rename_program_t	*prog = id;
1763 	sbd_error_t			*err = NULL;
1764 
1765 	if (prog->c_ml != NULL)
1766 		memlist_delete(prog->c_ml);
1767 
1768 	if (prog->ph != NULL)
1769 		pda_close(prog->ph);
1770 
1771 	if (prog->restless_mc != 0) {
1772 		cmn_err(CE_WARN, "MC did not idle; OBP Node 0x%x",
1773 			(uint_t)drmach_node_get_dnode(prog->restless_mc->node));
1774 
1775 		err = DRMACH_INTERNAL_ERROR();
1776 	}
1777 
1778 	kmem_free(prog, PAGESIZE);
1779 
1780 	return (err);
1781 }
1782 
1783 static sbd_error_t *
1784 drmach_io_new(drmach_device_t *dp)
1785 {
1786 	static sbd_error_t *drmach_io_release(drmachid_t);
1787 	static sbd_error_t *drmach_io_status(drmachid_t, drmach_status_t *);
1788 
1789 	sbd_error_t	*err;
1790 	int		 portid;
1791 
1792 	err = drmach_device_get_prop(dp, "upa-portid", &portid);
1793 	if (err == NULL) {
1794 		ASSERT(portid & 0x40);
1795 		dp->unum = portid & 1;
1796 	}
1797 
1798 	dp->cm.isa = (void *)drmach_io_new;
1799 	dp->cm.release = drmach_io_release;
1800 	dp->cm.status = drmach_io_status;
1801 
1802 	snprintf(dp->cm.name, sizeof (dp->cm.name), "%s%d", dp->type, dp->unum);
1803 
1804 	return (err);
1805 }
1806 
1807 static void
1808 drmach_iopc_op(pda_handle_t ph, drmach_iopc_op_t op)
1809 {
1810 	register int b;
1811 
1812 	for (b = 0; b < MAX_BOARDS; b++) {
1813 		int		p;
1814 		ushort_t	bda_ioc;
1815 		board_desc_t	*bdesc;
1816 
1817 		if (pda_board_present(ph, b) == 0)
1818 			continue;
1819 
1820 		bdesc = (board_desc_t *)pda_get_board_info(ph, b);
1821 		/*
1822 		 * Update PCs for IOCs.
1823 		 */
1824 		bda_ioc = bdesc->bda_ioc;
1825 		for (p = 0; p < MAX_IOCS; p++) {
1826 			u_longlong_t	idle_addr;
1827 			uchar_t		value;
1828 
1829 			if (BDA_NBL(bda_ioc, p) != BDAN_GOOD)
1830 				continue;
1831 
1832 			idle_addr = STARFIRE_BB_PC_ADDR(b, p, 1);
1833 
1834 			switch (op) {
1835 			case DO_PAUSE:
1836 				value = STARFIRE_BB_PC_PAUSE(p);
1837 				break;
1838 
1839 			case DO_IDLE:
1840 				value = STARFIRE_BB_PC_IDLE(p);
1841 				break;
1842 
1843 			case DO_UNPAUSE:
1844 				value = ldbphysio(idle_addr);
1845 				value &= ~STARFIRE_BB_PC_PAUSE(p);
1846 				break;
1847 
1848 			case DO_UNIDLE:
1849 				value = ldbphysio(idle_addr);
1850 				value &= ~STARFIRE_BB_PC_IDLE(p);
1851 				break;
1852 
1853 			default:
1854 				cmn_err(CE_PANIC,
1855 					"drmach_iopc_op: unknown op (%d)",
1856 					(int)op);
1857 				/*NOTREACHED*/
1858 			}
1859 			stbphysio(idle_addr, value);
1860 		}
1861 	}
1862 }
1863 
1864 void
1865 drmach_copy_rename(drmachid_t id)
1866 {
1867 	drmach_copy_rename_program_t	*prog = id;
1868 	uint64_t			neer;
1869 
1870 	/*
1871 	 * UPA IDLE
1872 	 * Protocol = PAUSE -> IDLE -> UNPAUSE
1873 	 * In reality since we only "idle" the IOPCs it's sufficient
1874 	 * to just issue the IDLE operation since (in theory) all IOPCs
1875 	 * in the field are PC6.  However, we'll be robust and do the
1876 	 * proper workaround protocol so that we never have to worry!
1877 	 */
1878 	drmach_iopc_op(prog->ph, DO_PAUSE);
1879 	drmach_iopc_op(prog->ph, DO_IDLE);
1880 	DELAY(100);
1881 	drmach_iopc_op(prog->ph, DO_UNPAUSE);
1882 	DELAY(100);
1883 
1884 	/* disable CE reporting */
1885 	neer = get_error_enable();
1886 	set_error_enable(neer & ~EER_CEEN);
1887 
1888 	/* run the copy/rename program */
1889 	prog->run(prog);
1890 
1891 	/* enable CE reporting */
1892 	set_error_enable(neer);
1893 
1894 	/*
1895 	 * UPA UNIDLE
1896 	 * Protocol = UNIDLE
1897 	 */
1898 	drmach_iopc_op(prog->ph, DO_UNIDLE);
1899 	DELAY(100);
1900 }
1901 
1902 /*
1903  * The counter-timer and perf-counter nodes are not being cleaned
1904  * up after a board that was present at start of day is detached.
1905  * If the board has become unconfigured with this operation, walk
1906  * the prom tree and find all counter-timer and perf-counter nodes
1907  * that have the same board number as the board that was just
1908  * unconfigured and remove them.
1909  */
1910 static sbd_error_t *
1911 drmach_remove_counter_nodes(drmachid_t id)
1912 {
1913 	int		num;
1914 	char		name[OBP_MAXDRVNAME];
1915 	dnode_t		child;
1916 	dev_info_t	*dip;
1917 	sbd_error_t	*err;
1918 	drmach_status_t	stat;
1919 	drmach_board_t	*bp;
1920 
1921 	if (!DRMACH_IS_BOARD_ID(id)) {
1922 		return (drerr_new(0, ESTF_INAPPROP, NULL));
1923 	}
1924 
1925 	if ((err = drmach_board_status(id, &stat)) != NULL) {
1926 		return (err);
1927 	}
1928 
1929 	/*
1930 	 * Only clean up the counter-timer and perf-counter
1931 	 * nodes when the entire board is unconfigured.
1932 	 */
1933 	if (stat.configured) {
1934 		return (NULL);
1935 	}
1936 
1937 	bp = (drmach_board_t *)id;
1938 
1939 	err = NULL;
1940 
1941 	for (child = prom_childnode(prom_rootnode()); child != OBP_NONODE;
1942 	    child = prom_nextnode(child)) {
1943 
1944 		if (prom_getprop(child, OBP_BOARDNUM, (caddr_t)&num) == -1) {
1945 			continue;
1946 		}
1947 
1948 		if (bp->bnum != num) {
1949 			continue;
1950 		}
1951 
1952 		if (prom_getprop(child, OBP_NAME, (caddr_t)name) == -1) {
1953 			continue;
1954 		}
1955 
1956 		if (strncmp(name, MISC_COUNTER_TIMER_DEVNAME, OBP_MAXDRVNAME) &&
1957 		    strncmp(name, MISC_PERF_COUNTER_DEVNAME, OBP_MAXDRVNAME)) {
1958 				continue;
1959 		}
1960 
1961 		/* Root node doesn't have to be held */
1962 		dip = e_ddi_nodeid_to_dip(child);
1963 
1964 		/*
1965 		 * If the node is only in the OBP tree, then
1966 		 * we don't have to remove it.
1967 		 */
1968 		if (dip) {
1969 			dev_info_t *fdip = NULL;
1970 
1971 			DRMACH_PR("removing %s devinfo node\n", name);
1972 
1973 			e_ddi_branch_hold(dip);
1974 			ddi_release_devi(dip); /* held in e_ddi_nodeid_to_dip */
1975 
1976 			if (e_ddi_branch_destroy(dip, &fdip, 0)) {
1977 				char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1978 
1979 				/*
1980 				 * If non-NULL, fdip is held and must be
1981 				 * released.
1982 				 */
1983 				if (fdip != NULL) {
1984 					(void) ddi_pathname(fdip, path);
1985 					ddi_release_devi(fdip);
1986 				} else {
1987 					(void) ddi_pathname(dip, path);
1988 				}
1989 
1990 				err = drerr_new(1, ESTF_DRVFAIL, path);
1991 				kmem_free(path, MAXPATHLEN);
1992 				e_ddi_branch_rele(dip);
1993 				break;
1994 			}
1995 		}
1996 	}
1997 
1998 	return (err);
1999 }
2000 
2001 /*ARGSUSED*/
2002 sbd_error_t *
2003 drmach_pre_op(int cmd, drmachid_t id, drmach_opts_t *opts)
2004 {
2005 	/* allow status and ncm operations to always succeed */
2006 	if ((cmd == SBD_CMD_STATUS) || (cmd == SBD_CMD_GETNCM)) {
2007 		return (NULL);
2008 	}
2009 
2010 	/* check all other commands for the required option string */
2011 	if ((opts->size > 0) && (opts->copts != NULL)) {
2012 
2013 		DRMACH_PR("platform options: %s\n", opts->copts);
2014 
2015 		if (strstr(opts->copts, "xfdr") != NULL) {
2016 			return (NULL);
2017 		}
2018 	}
2019 
2020 	return (drerr_new(0, ESTF_SUPPORT, NULL));
2021 }
2022 
2023 /*ARGSUSED*/
2024 sbd_error_t *
2025 drmach_post_op(int cmd, drmachid_t id, drmach_opts_t *opts)
2026 {
2027 	sbd_error_t	*err = NULL;
2028 
2029 	switch (cmd) {
2030 	case SBD_CMD_UNCONFIGURE:
2031 
2032 		err = drmach_remove_counter_nodes(id);
2033 		break;
2034 
2035 	case SBD_CMD_CONFIGURE:
2036 	case SBD_CMD_DISCONNECT:
2037 	case SBD_CMD_CONNECT:
2038 	case SBD_CMD_GETNCM:
2039 	case SBD_CMD_STATUS:
2040 		break;
2041 
2042 	default:
2043 		break;
2044 	}
2045 
2046 	return (err);
2047 }
2048 
2049 sbd_error_t *
2050 drmach_board_assign(int bnum, drmachid_t *id)
2051 {
2052 	sbd_error_t	*err;
2053 
2054 	if (!drmach_initialized && drmach_init() == -1) {
2055 		err = DRMACH_INTERNAL_ERROR();
2056 	} else if (drmach_array_get(drmach_boards, bnum, id) == -1) {
2057 		err = drerr_new(1, ESTF_BNUM, "%d", bnum);
2058 	} else if (*id != NULL) {
2059 		err = NULL;
2060 	} else {
2061 		drmach_board_t	*bp;
2062 
2063 		*id  = (drmachid_t)drmach_board_new(bnum);
2064 		bp = *id;
2065 		bp->assigned = 1;
2066 		err = NULL;
2067 	}
2068 
2069 	return (err);
2070 }
2071 
2072 static int
2073 drmach_attach_board(void *arg)
2074 {
2075 	drmach_board_t	*obj = (drmach_board_t *)arg;
2076 	cpuset_t	cset;
2077 	int		retval;
2078 
2079 	/*
2080 	 * OBP disables traps during the board probe.
2081 	 * So, in order to prevent cross-call/cross-trap timeouts,
2082 	 * and thus panics, we effectively block anybody from
2083 	 * issuing xc's/xt's by doing a promsafe_xc_attention.
2084 	 * In the previous version of Starfire DR (2.6), a timeout
2085 	 * suspension mechanism was implemented in the send-mondo
2086 	 * assembly.  That mechanism is unnecessary with the
2087 	 * existence of xc_attention/xc_dismissed.
2088 	 */
2089 	cset = cpu_ready_set;
2090 	promsafe_xc_attention(cset);
2091 
2092 	retval = prom_starfire_add_brd(obj->connect_cpuid);
2093 
2094 	xc_dismissed(cset);
2095 
2096 	return (retval);
2097 }
2098 
2099 sbd_error_t *
2100 drmach_board_connect(drmachid_t id, drmach_opts_t *opts)
2101 {
2102 	drmach_board_t	*obj = (drmach_board_t *)id;
2103 	int		retval;
2104 	sbd_error_t	*err;
2105 	char		*cptr, *copts;
2106 
2107 	if (!DRMACH_IS_BOARD_ID(id))
2108 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2109 
2110 	if (opts->size > 0)
2111 		copts = opts->copts;
2112 
2113 	if ((cptr = strstr(copts, "cpuid=")) != NULL) {
2114 		int cpuid;
2115 
2116 		cptr += strlen("cpuid=");
2117 		cpuid = stoi(&cptr);
2118 
2119 		if (DRMACH_CPUID2BNUM(cpuid) == obj->bnum) {
2120 			obj->connect_cpuid = cpuid;
2121 			obj->assigned = 1;
2122 		} else
2123 			return (drerr_new(1, ESTF_SETCPUVAL, "%d", cpuid));
2124 	} else {
2125 		/* cpuid was not specified */
2126 		obj->connect_cpuid = -1;
2127 	}
2128 
2129 	if (obj->connect_cpuid == -1) {
2130 		err =  drerr_new(1, ESTF_NOCPUID, obj->cm.name);
2131 		return (err);
2132 	}
2133 
2134 	cmn_err(CE_CONT, "DRMACH: PROM attach %s CPU %d\n",
2135 		obj->cm.name, obj->connect_cpuid);
2136 
2137 	retval = prom_tree_update(drmach_attach_board, obj);
2138 
2139 	if (retval == 0)
2140 		err = NULL;
2141 	else {
2142 		cmn_err(CE_WARN, "prom error: prom_starfire_add_brd(%d) "
2143 			"returned %d", obj->connect_cpuid, retval);
2144 
2145 		err = drerr_new(1, ESTF_PROBE, obj->cm.name);
2146 	}
2147 
2148 	obj->connect_cpuid = -1;
2149 
2150 	return (err);
2151 }
2152 
2153 /*ARGSUSED*/
2154 sbd_error_t *
2155 drmach_board_disconnect(drmachid_t id, drmach_opts_t *opts)
2156 {
2157 	drmach_board_t		*bp;
2158 	int			rv;
2159 	int			d_idx;	/* device index */
2160 	drmachid_t		d_id;	/* device ID */
2161 	sbd_error_t		*err;
2162 
2163 	if (!DRMACH_IS_BOARD_ID(id))
2164 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2165 
2166 	bp = id;
2167 
2168 	/*
2169 	 * We need to make sure all of the board's device nodes
2170 	 * have been removed from the Solaris device tree before
2171 	 * continuing with the disconnect. Otherwise, we could
2172 	 * disconnect the board and remove the OBP device tree
2173 	 * nodes with Solaris device tree nodes remaining.
2174 	 *
2175 	 * On Starfire, Solaris device tree nodes are deleted
2176 	 * during unconfigure by drmach_unconfigure(). It's
2177 	 * necessary to do this here because drmach_unconfigure()
2178 	 * failures are not handled during unconfigure.
2179 	 */
2180 	if (bp->devices) {
2181 		rv = drmach_array_first(bp->devices, &d_idx, &d_id);
2182 		while (rv == 0) {
2183 			err = drmach_unconfigure(d_id, DRMACH_DEVI_REMOVE);
2184 			if (err)
2185 				return (err);
2186 
2187 			rv = drmach_array_next(bp->devices, &d_idx, &d_id);
2188 		}
2189 	}
2190 
2191 	/*
2192 	 * Starfire board Solaris device tree counter nodes,
2193 	 * which are only present on start-of-day boards, are
2194 	 * removed in the dr_post_op() code flow after the
2195 	 * board is unconfigured. We call the counter node
2196 	 * removal function here because unconfigure errors
2197 	 * can cause the dr_post_op() function to be skipped
2198 	 * after an unconfigure operation even though all of
2199 	 * the board's devices have been transitioned to the
2200 	 * unconfigured state.
2201 	 */
2202 	err = drmach_remove_counter_nodes(id);
2203 	if (err)
2204 		return (err);
2205 
2206 	return (NULL);
2207 }
2208 
2209 static int
2210 drmach_board_find_devices_cb(drmach_node_walk_args_t *args)
2211 {
2212 	drmach_node_t			*node = args->node;
2213 	drmach_board_cb_data_t		*data = args->data;
2214 	drmach_board_t			*obj = data->obj;
2215 
2216 	int		 rv;
2217 	int		 bnum;
2218 	drmach_device_t	*device;
2219 
2220 	rv = drmach_node_get_prop(node, OBP_BOARDNUM, &bnum);
2221 	if (rv) {
2222 		/*
2223 		 * if the node does not have a board# property, then
2224 		 * by that information alone it is known that drmach
2225 		 * is not interested in it.
2226 		 */
2227 		return (0);
2228 	} else if (bnum != obj->bnum)
2229 		return (0);
2230 
2231 	/*
2232 	 * Create a device data structure from this node data.
2233 	 * The call may yield nothing if the node is not of interest
2234 	 * to drmach.
2235 	 */
2236 	data->err = drmach_device_new(node, obj, &device);
2237 	if (data->err)
2238 		return (-1);
2239 	else if (device == NULL) {
2240 		/*
2241 		 * drmach_device_new examined the node we passed in
2242 		 * and determined that it was one not of interest to
2243 		 * drmach.  So, it is skipped.
2244 		 */
2245 		return (0);
2246 	}
2247 
2248 	rv = drmach_array_set(obj->devices, data->ndevs++, device);
2249 	if (rv) {
2250 		drmach_device_dispose(device);
2251 		data->err = DRMACH_INTERNAL_ERROR();
2252 		return (-1);
2253 	}
2254 
2255 	data->err = (*data->found)(data->a, device->type, device->unum, device);
2256 	return (data->err == NULL ? 0 : -1);
2257 }
2258 
2259 sbd_error_t *
2260 drmach_board_find_devices(drmachid_t id, void *a,
2261 	sbd_error_t *(*found)(void *a, const char *, int, drmachid_t))
2262 {
2263 	extern int		 plat_max_cpu_units_per_board();
2264 	extern int		 plat_max_mem_units_per_board();
2265 	extern int		 plat_max_io_units_per_board();
2266 
2267 	drmach_board_t		*obj = (drmach_board_t *)id;
2268 	sbd_error_t		*err;
2269 	int			 max_devices;
2270 	int			 rv;
2271 	drmach_board_cb_data_t	data;
2272 
2273 	max_devices  = plat_max_cpu_units_per_board();
2274 	max_devices += plat_max_mem_units_per_board();
2275 	max_devices += plat_max_io_units_per_board();
2276 
2277 	obj->devices = drmach_array_new(0, max_devices);
2278 
2279 	data.obj = obj;
2280 	data.ndevs = 0;
2281 	data.found = found;
2282 	data.a = a;
2283 	data.err = NULL;
2284 
2285 	rv = drmach_node_walk(obj->tree, &data, drmach_board_find_devices_cb);
2286 	if (rv == 0)
2287 		err = NULL;
2288 	else {
2289 		drmach_array_dispose(obj->devices, drmach_device_dispose);
2290 		obj->devices = NULL;
2291 
2292 		if (data.err)
2293 			err = data.err;
2294 		else
2295 			err = DRMACH_INTERNAL_ERROR();
2296 	}
2297 
2298 	return (err);
2299 }
2300 
2301 int
2302 drmach_board_lookup(int bnum, drmachid_t *id)
2303 {
2304 	int	rv = 0;
2305 
2306 	if (!drmach_initialized && drmach_init() == -1) {
2307 		*id = 0;
2308 		rv = -1;
2309 	} else if (drmach_array_get(drmach_boards, bnum, id)) {
2310 		*id = 0;
2311 		rv = -1;
2312 	}
2313 	return (rv);
2314 }
2315 
2316 sbd_error_t *
2317 drmach_board_name(int bnum, char *buf, int buflen)
2318 {
2319 	snprintf(buf, buflen, "SB%d", bnum);
2320 	return (NULL);
2321 }
2322 
2323 sbd_error_t *
2324 drmach_board_poweroff(drmachid_t id)
2325 {
2326 	drmach_board_t	*bp;
2327 	sbd_error_t	*err;
2328 	drmach_status_t	 stat;
2329 
2330 	if (!DRMACH_IS_BOARD_ID(id))
2331 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2332 	bp = id;
2333 
2334 	err = drmach_board_status(id, &stat);
2335 	if (err)
2336 		return (err);
2337 	else if (stat.configured || stat.busy)
2338 		return (drerr_new(0, ESTF_CONFIGBUSY, bp->cm.name));
2339 	else {
2340 		/* board power off is essentially a noop for Starfire */
2341 		bp->powered = 0;
2342 		return (NULL);
2343 	}
2344 	/*NOTREACHED*/
2345 }
2346 
2347 sbd_error_t *
2348 drmach_board_poweron(drmachid_t id)
2349 {
2350 	drmach_board_t	*bp;
2351 
2352 	if (!DRMACH_IS_BOARD_ID(id))
2353 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2354 	bp = id;
2355 
2356 	/* board power on is essentially a noop for Starfire */
2357 	bp->powered = 1;
2358 
2359 	return (NULL);
2360 }
2361 
2362 static sbd_error_t *
2363 drmach_board_release(drmachid_t id)
2364 {
2365 	if (!DRMACH_IS_BOARD_ID(id))
2366 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2367 	return (NULL);
2368 }
2369 
2370 /*ARGSUSED*/
2371 sbd_error_t *
2372 drmach_board_test(drmachid_t id, drmach_opts_t *opts, int force)
2373 {
2374 	return (NULL);
2375 }
2376 
2377 sbd_error_t *
2378 drmach_board_unassign(drmachid_t id)
2379 {
2380 	drmach_board_t	*bp;
2381 	sbd_error_t	*err;
2382 	drmach_status_t	 stat;
2383 
2384 	if (!DRMACH_IS_BOARD_ID(id))
2385 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2386 	bp = id;
2387 
2388 	err = drmach_board_status(id, &stat);
2389 	if (err)
2390 		return (err);
2391 	else if (stat.configured || stat.busy)
2392 		return (drerr_new(0, ESTF_CONFIGBUSY, bp->cm.name));
2393 	else if (drmach_array_set(drmach_boards, bp->bnum, 0) != 0)
2394 		return (DRMACH_INTERNAL_ERROR());
2395 	else {
2396 		drmach_board_dispose(bp);
2397 		return (NULL);
2398 	}
2399 	/*NOTREACHED*/
2400 }
2401 
2402 static sbd_error_t *
2403 drmach_cpu_new(drmach_device_t *dp)
2404 {
2405 	static sbd_error_t *drmach_cpu_release(drmachid_t);
2406 	static sbd_error_t *drmach_cpu_status(drmachid_t, drmach_status_t *);
2407 
2408 	sbd_error_t	*err;
2409 	int		 portid;
2410 
2411 	err = drmach_device_get_prop(dp, "upa-portid", &portid);
2412 	if (err == NULL)
2413 		dp->unum = portid & 3;
2414 
2415 	dp->cm.isa = (void *)drmach_cpu_new;
2416 	dp->cm.release = drmach_cpu_release;
2417 	dp->cm.status = drmach_cpu_status;
2418 
2419 	snprintf(dp->cm.name, sizeof (dp->cm.name), "%s%d", dp->type, dp->unum);
2420 
2421 	return (err);
2422 }
2423 
2424 /*
2425  * drmach_cpu_obp_detach()
2426  *  This requires two steps, first, we must put the cpuid into the OBP
2427  *  idle loop (Idle in Program) state.  Then we call OBP to place the CPU
2428  *  into the "Detached" state, which does any special processing to
2429  *  actually detach the cpu, such as flushing ecache, and also ensures
2430  *  that a subsequent breakpoint won't restart the cpu (if it was just in
2431  *  Idle in Program state).
2432  */
2433 static void
2434 drmach_cpu_obp_detach(int cpuid)
2435 {
2436 	/*
2437 	 * Cpu may not be under OBP's control. Eg, if cpu exited to download
2438 	 * helper on a prior attach.
2439 	 */
2440 	if (CPU_SGN_EXISTS(cpuid) &&
2441 			!SGN_CPU_IS_OS(cpuid) &&
2442 			!SGN_CPU_IS_OBP(cpuid)) {
2443 		cmn_err(CE_WARN,
2444 			"unexpected signature (0x%x) for cpu %d",
2445 			get_cpu_sgn(cpuid), cpuid);
2446 	}
2447 
2448 	/*
2449 	 * Now we place the CPU into the "Detached" idle loop in OBP.
2450 	 * This is so that the CPU won't be restarted if we break into
2451 	 * OBP with a breakpoint or BREAK key from the console, and also
2452 	 * if we need to do any special processing, such as flushing the
2453 	 * cpu's ecache, disabling interrupts (by turning of the ET bit in
2454 	 * the PSR) and/or spinning in BBSRAM rather than global memory.
2455 	 */
2456 	DRMACH_PR("prom_starfire_rm_cpu(%d)\n", cpuid);
2457 	prom_starfire_rm_cpu(cpuid);
2458 }
2459 
2460 /*
2461  * drmach_cpu_obp_is_detached() returns TRUE if the cpu sigblock signature state
2462  * is SIGBST_DETACHED; otherwise it returns FALSE. This routine should only
2463  * be called after we have asked OBP to detach the CPU. It should NOT be
2464  * called as a check during any other flow.
2465  */
2466 static int
2467 drmach_cpu_obp_is_detached(int cpuid)
2468 {
2469 	if (!CPU_SGN_EXISTS(cpuid) ||
2470 		(SGN_CPU_IS_OS(cpuid) && SGN_CPU_STATE_IS_DETACHED(cpuid)))
2471 		return (1);
2472 	else
2473 		return (0);
2474 }
2475 
2476 static int
2477 drmach_cpu_start(struct cpu *cp)
2478 {
2479 	int		cpuid = cp->cpu_id;
2480 	int		ntries = drmach_cpu_ntries;
2481 	extern void	restart_other_cpu(int);
2482 
2483 	ASSERT(MUTEX_HELD(&cpu_lock));
2484 	ASSERT(cpunodes[cpuid].nodeid != (dnode_t)0);
2485 
2486 	cp->cpu_flags &= ~CPU_POWEROFF;
2487 
2488 	/*
2489 	 * NOTE: restart_other_cpu pauses cpus during the
2490 	 *	 slave cpu start.  This helps to quiesce the
2491 	 *	 bus traffic a bit which makes the tick sync
2492 	 *	 routine in the prom more robust.
2493 	 */
2494 	DRMACH_PR("COLD START for cpu (%d)\n", cpuid);
2495 
2496 	prom_starfire_add_cpu(cpuid);
2497 
2498 	restart_other_cpu(cpuid);
2499 
2500 	/*
2501 	 * Wait for the cpu to reach its idle thread before
2502 	 * we zap him with a request to blow away the mappings
2503 	 * he (might) have for the drmach_shutdown_asm code
2504 	 * he may have executed on unconfigure.
2505 	 */
2506 	while ((cp->cpu_thread != cp->cpu_idle_thread) && (ntries > 0)) {
2507 		DELAY(drmach_cpu_delay);
2508 		ntries--;
2509 	}
2510 
2511 	DRMACH_PR("waited %d out of %d loops for cpu %d\n",
2512 		drmach_cpu_ntries - ntries, drmach_cpu_ntries, cpuid);
2513 
2514 	xt_one(cpuid, vtag_flushpage_tl1,
2515 		(uint64_t)drmach_shutdown_va, (uint64_t)KCONTEXT);
2516 
2517 	return (0);
2518 }
2519 
2520 /*
2521  * A detaching CPU is xcalled with an xtrap to drmach_cpu_stop_self() after
2522  * it has been offlined. The function of this routine is to get the cpu
2523  * spinning in a safe place. The requirement is that the system will not
2524  * reference anything on the detaching board (memory and i/o is detached
2525  * elsewhere) and that the CPU not reference anything on any other board
2526  * in the system.  This isolation is required during and after the writes
2527  * to the domain masks to remove the board from the domain.
2528  *
2529  * To accomplish this isolation the following is done:
2530  *	1) Create a locked mapping to a location in BBSRAM where
2531  *	   the cpu will execute.
2532  *	2) Copy the target function (drmach_shutdown_asm) in which
2533  *	   the cpu will execute into BBSRAM.
2534  *	3) Jump into function with BBSRAM.
2535  *	   Function will:
2536  *	   3.1) Flush its Ecache (displacement).
2537  *	   3.2) Flush its Dcache with HW mechanism.
2538  *	   3.3) Flush its Icache with HW mechanism.
2539  *	   3.4) Flush all valid and _unlocked_ D-TLB entries.
2540  *	   3.5) Flush all valid and _unlocked_ I-TLB entries.
2541  *	   3.6) Clear xt_mb to signal completion. Note: cache line is
2542  *		recovered by drmach_cpu_poweroff().
2543  *	4) Jump into a tight loop.
2544  */
2545 #define	DRMACH_BBSRAM_OFFSET	0x1000
2546 
2547 static void
2548 drmach_cpu_stop_self(void)
2549 {
2550 	int		cpuid = (int)CPU->cpu_id;
2551 	tte_t		tte;
2552 	volatile uint_t	*src, *dst;
2553 	uint_t		funclen;
2554 	uint64_t	bbsram_pa, bbsram_offset;
2555 	uint_t		bbsram_pfn;
2556 	uint64_t	bbsram_addr;
2557 	void		(*bbsram_func)(uint64_t);
2558 	extern void	drmach_shutdown_asm(uint64_t);
2559 	extern void	drmach_shutdown_asm_end(void);
2560 
2561 	funclen = (uint_t)drmach_shutdown_asm_end - (uint_t)drmach_shutdown_asm;
2562 	ASSERT(funclen <= MMU_PAGESIZE);
2563 	/*
2564 	 * We'll start from the 0th's base.
2565 	 */
2566 	bbsram_pa = STARFIRE_UPAID2UPS(cpuid) | STARFIRE_PSI_BASE;
2567 	bbsram_offset = bbsram_pa | 0xfe0ULL;
2568 	bbsram_pa += ldphysio(bbsram_offset) + DRMACH_BBSRAM_OFFSET;
2569 
2570 	bbsram_pfn = (uint_t)(bbsram_pa >> MMU_PAGESHIFT);
2571 
2572 	bbsram_addr = (uint64_t)drmach_shutdown_va;
2573 	drmach_shutdown_asm_mbox->estack = bbsram_addr + (uint64_t)funclen;
2574 
2575 	tte.tte_inthi = TTE_VALID_INT | TTE_SZ_INT(TTE8K) |
2576 			TTE_PFN_INTHI(bbsram_pfn);
2577 	tte.tte_intlo = TTE_PFN_INTLO(bbsram_pfn) |
2578 			TTE_HWWR_INT | TTE_PRIV_INT | TTE_LCK_INT;
2579 	sfmmu_dtlb_ld(drmach_shutdown_va, KCONTEXT, &tte);	/* load dtlb */
2580 	sfmmu_itlb_ld(drmach_shutdown_va, KCONTEXT, &tte);	/* load itlb */
2581 
2582 	for (src = (uint_t *)drmach_shutdown_asm, dst = (uint_t *)bbsram_addr;
2583 		src < (uint_t *)drmach_shutdown_asm_end; src++, dst++)
2584 		*dst = *src;
2585 
2586 	bbsram_func = (void (*)())bbsram_addr;
2587 	drmach_shutdown_asm_mbox->flushaddr = ecache_flushaddr;
2588 	drmach_shutdown_asm_mbox->size = (cpunodes[cpuid].ecache_size << 1);
2589 	drmach_shutdown_asm_mbox->linesize = cpunodes[cpuid].ecache_linesize;
2590 	drmach_shutdown_asm_mbox->physaddr
2591 				    = va_to_pa((void *)&drmach_xt_mb[cpuid]);
2592 
2593 	/*
2594 	 * Signal to drmach_cpu_poweroff() is via drmach_xt_mb cleared
2595 	 * by asm code
2596 	 */
2597 
2598 	(*bbsram_func)(va_to_pa((void *)drmach_shutdown_asm_mbox));
2599 }
2600 
2601 static void
2602 drmach_cpu_shutdown_self(void)
2603 {
2604 	cpu_t		*cp = CPU;
2605 	int		cpuid = cp->cpu_id;
2606 	extern void	flush_windows(void);
2607 
2608 	flush_windows();
2609 
2610 	(void) spl8();
2611 
2612 	ASSERT(cp->cpu_intr_actv == 0);
2613 	ASSERT(cp->cpu_thread == cp->cpu_idle_thread);
2614 
2615 	cp->cpu_flags = CPU_OFFLINE | CPU_QUIESCED | CPU_POWEROFF;
2616 
2617 	drmach_cpu_stop_self();
2618 
2619 	cmn_err(CE_PANIC, "CPU %d FAILED TO SHUTDOWN", cpuid);
2620 }
2621 
2622 /* a helper routine to keep the math in one place */
2623 static processorid_t
2624 drmach_cpu_calc_id(drmach_device_t *dp)
2625 {
2626 	return (dp->bp->bnum * MAX_CPU_UNITS_PER_BOARD + dp->unum);
2627 }
2628 
2629 /*
2630  * Move bootproc (SIGBCPU) to another cpu.  If dst_cpu is NULL, a
2631  * destination cpu is chosen from the set of cpus not located on the
2632  * same board as the current bootproc cpu.
2633  */
2634 static sbd_error_t *
2635 drmach_cpu_juggle_bootproc(drmach_device_t *dst_cpu)
2636 {
2637 	processorid_t	 cpuid;
2638 	struct cpu	*cp;
2639 	sbd_error_t	*err;
2640 	int		 rv;
2641 
2642 	ASSERT(MUTEX_HELD(&cpu_lock));
2643 
2644 	/* dst_cpu is NULL when target cpu is unspecified. So, pick one. */
2645 	if (dst_cpu == NULL) {
2646 		int avoid_board = DRMACH_CPUID2BNUM(SIGBCPU->cpu_id);
2647 		int max_cpuid = MAX_BOARDS * MAX_CPU_UNITS_PER_BOARD;
2648 
2649 		for (cpuid = 0; cpuid < max_cpuid; cpuid++)
2650 			if (DRMACH_CPUID2BNUM(cpuid) != avoid_board) {
2651 				cp = cpu_get(cpuid);
2652 				if (cp != NULL && cpu_is_online(cp))
2653 					break;
2654 			}
2655 
2656 		if (cpuid == max_cpuid) {
2657 			err = drerr_new(1, ESTF_JUGGLE, NULL);
2658 			return (err);
2659 		}
2660 
2661 		/* else, cp points to the selected target cpu */
2662 	} else {
2663 		cpuid = drmach_cpu_calc_id(dst_cpu);
2664 
2665 		if ((cp = cpu_get(cpuid)) == NULL) {
2666 			err = drerr_new(1, ESTF_NODEV, "%s::%s",
2667 				dst_cpu->bp->cm.name, dst_cpu->cm.name);
2668 			return (err);
2669 		}
2670 
2671 		if (cpuid == SIGBCPU->cpu_id) {
2672 			cmn_err(CE_WARN,
2673 				"SIGBCPU(%d) same as new selection(%d)",
2674 				SIGBCPU->cpu_id, cpuid);
2675 
2676 			/* technically not an error, but a no-op */
2677 			return (NULL);
2678 		}
2679 	}
2680 
2681 	cmn_err(CE_NOTE, "?relocating SIGBCPU from %d to %d",
2682 		SIGBCPU->cpu_id, cpuid);
2683 
2684 	DRMACH_PR("moving SIGBCPU to CPU %d\n", cpuid);
2685 
2686 	/*
2687 	 * Tell OBP to initialize cvc-offset field of new CPU0
2688 	 * so that it's in sync with OBP and cvc_server
2689 	 */
2690 	prom_starfire_init_console(cpuid);
2691 
2692 	/*
2693 	 * Assign cvc to new cpu0's bbsram for I/O.  This has to be
2694 	 * done BEFORE cpu0 is moved via obp, since this logic
2695 	 * will cause obp_helper to switch to a different bbsram for
2696 	 * cvc I/O.  We don't want cvc writing to a buffer from which
2697 	 * nobody will pick up the data!
2698 	 */
2699 	cvc_assign_iocpu(cpuid);
2700 
2701 	rv = prom_starfire_move_cpu0(cpuid);
2702 
2703 	if (rv == 0) {
2704 		SIGBCPU = cp;
2705 
2706 		DRMACH_PR("successfully juggled to CPU %d\n", cpuid);
2707 		return (NULL);
2708 	} else {
2709 		DRMACH_PR("prom error: prom_starfire_move_cpu0(%d) "
2710 			"returned %d\n", cpuid, rv);
2711 
2712 		/*
2713 		 * The move failed, hopefully obp_helper is still back
2714 		 * at the old bootproc.  Move cvc back there.
2715 		 */
2716 		cvc_assign_iocpu(SIGBCPU->cpu_id);
2717 
2718 
2719 		err = drerr_new(1, ESTF_MOVESIGB, "CPU %d", cpuid);
2720 		return (err);
2721 	}
2722 	/*NOTREACHED*/
2723 }
2724 
2725 static sbd_error_t *
2726 drmach_cpu_release(drmachid_t id)
2727 {
2728 	drmach_device_t	*dp;
2729 	processorid_t	 cpuid;
2730 	struct cpu	*cp;
2731 	sbd_error_t	*err;
2732 
2733 	if (!DRMACH_IS_CPU_ID(id))
2734 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2735 	dp = id;
2736 	cpuid = drmach_cpu_calc_id(dp);
2737 
2738 	ASSERT(MUTEX_HELD(&cpu_lock));
2739 
2740 	cp = cpu_get(cpuid);
2741 	if (cp == NULL)
2742 		err = DRMACH_INTERNAL_ERROR();
2743 	else if (SIGBCPU->cpu_id == cp->cpu_id)
2744 		err = drmach_cpu_juggle_bootproc(NULL);
2745 	else
2746 		err = NULL;
2747 
2748 	return (err);
2749 }
2750 
2751 static sbd_error_t *
2752 drmach_cpu_status(drmachid_t id, drmach_status_t *stat)
2753 {
2754 	drmach_device_t *dp;
2755 
2756 	ASSERT(DRMACH_IS_CPU_ID(id));
2757 	dp = id;
2758 
2759 	stat->assigned = dp->bp->assigned;
2760 	stat->powered = dp->bp->powered;
2761 	mutex_enter(&cpu_lock);
2762 	stat->configured = (cpu_get(drmach_cpu_calc_id(dp)) != NULL);
2763 	mutex_exit(&cpu_lock);
2764 	stat->busy = dp->busy;
2765 	strncpy(stat->type, dp->type, sizeof (stat->type));
2766 	stat->info[0] = '\0';
2767 
2768 	return (NULL);
2769 }
2770 
2771 sbd_error_t *
2772 drmach_cpu_disconnect(drmachid_t id)
2773 {
2774 	drmach_device_t	*cpu;
2775 	int		 cpuid;
2776 	int		 ntries;
2777 	int		 p;
2778 	u_longlong_t	 pc_addr;
2779 	uchar_t		 rvalue;
2780 
2781 	if (!DRMACH_IS_CPU_ID(id))
2782 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2783 	cpu = id;
2784 
2785 	cpuid = drmach_cpu_calc_id(cpu);
2786 	if (SIGBCPU->cpu_id == cpuid) {
2787 		/* this cpu is SIGBCPU, can't disconnect */
2788 		return (drerr_new(1, ESTF_HASSIGB, "%s::%s",
2789 				cpu->bp->cm.name, cpu->cm.name));
2790 	}
2791 
2792 	/*
2793 	 * Make sure SIGBST_DETACHED is set before
2794 	 * mapping out the sig block.
2795 	 */
2796 	ntries = drmach_cpu_ntries;
2797 	while (!drmach_cpu_obp_is_detached(cpuid) && ntries) {
2798 		DELAY(drmach_cpu_delay);
2799 		ntries--;
2800 	}
2801 	if (!drmach_cpu_obp_is_detached(cpuid)) {
2802 		cmn_err(CE_WARN, "failed to mark cpu %d detached in sigblock",
2803 			cpuid);
2804 	}
2805 
2806 	/* map out signature block */
2807 	if (CPU_SGN_EXISTS(cpuid)) {
2808 		CPU_SGN_MAPOUT(cpuid);
2809 	}
2810 
2811 	/*
2812 	 * We now PC IDLE the processor to guarantee we
2813 	 * stop any transactions from coming from it.
2814 	 */
2815 	p = cpu->unum & 1;
2816 	pc_addr = STARFIRE_BB_PC_ADDR(cpu->bp->bnum, cpu->unum, 0);
2817 
2818 	DRMACH_PR("PC idle cpu %d (addr = 0x%llx, port = %d, p = %d)",
2819 		drmach_cpu_calc_id(cpu), pc_addr, cpu->unum, p);
2820 
2821 	rvalue = ldbphysio(pc_addr);
2822 	rvalue |= STARFIRE_BB_PC_IDLE(p);
2823 	stbphysio(pc_addr, rvalue);
2824 	DELAY(50000);
2825 
2826 	return (NULL);
2827 }
2828 
2829 sbd_error_t *
2830 drmach_cpu_get_id(drmachid_t id, processorid_t *cpuid)
2831 {
2832 	drmach_device_t *cpu;
2833 
2834 	if (!DRMACH_IS_CPU_ID(id))
2835 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2836 	cpu = id;
2837 
2838 	*cpuid = drmach_cpu_calc_id(cpu);
2839 	return (NULL);
2840 }
2841 
2842 sbd_error_t *
2843 drmach_cpu_get_impl(drmachid_t id, int *ip)
2844 {
2845 	drmach_device_t *cpu;
2846 	int		impl;
2847 
2848 	if (!DRMACH_IS_CPU_ID(id))
2849 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2850 
2851 	cpu = id;
2852 
2853 	if (drmach_node_get_prop(cpu->node, "implementation#", &impl) == -1) {
2854 		return (DRMACH_INTERNAL_ERROR());
2855 	}
2856 
2857 	*ip = impl;
2858 
2859 	return (NULL);
2860 }
2861 
2862 void
2863 drmach_cpu_flush_ecache_sync(void)
2864 {
2865 	ASSERT(curthread->t_bound_cpu == CPU);
2866 
2867 	/*
2868 	 * Now let's flush our ecache thereby removing all references
2869 	 * to the target (detaching) memory from all ecache's in
2870 	 * system.
2871 	 */
2872 	cpu_flush_ecache();
2873 
2874 	/*
2875 	 * Delay 100 usec out of paranoia to insure everything
2876 	 * (hardware queues) has drained before we start reprogramming
2877 	 * the hardware.
2878 	 */
2879 	DELAY(100);
2880 }
2881 
2882 sbd_error_t *
2883 drmach_get_dip(drmachid_t id, dev_info_t **dip)
2884 {
2885 	drmach_device_t	*dp;
2886 
2887 	if (!DRMACH_IS_DEVICE_ID(id))
2888 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2889 	dp = id;
2890 
2891 	*dip = drmach_node_get_dip(dp->node);
2892 	return (NULL);
2893 }
2894 
2895 sbd_error_t *
2896 drmach_io_is_attached(drmachid_t id, int *yes)
2897 {
2898 	drmach_device_t *dp;
2899 	dev_info_t	*dip;
2900 	int		state;
2901 
2902 	if (!DRMACH_IS_IO_ID(id))
2903 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2904 	dp = id;
2905 
2906 	dip = drmach_node_get_dip(dp->node);
2907 	if (dip == NULL) {
2908 		*yes = 0;
2909 		return (NULL);
2910 	}
2911 
2912 	state = ddi_get_devstate(dip);
2913 	*yes = ((i_ddi_node_state(dip) >= DS_ATTACHED) ||
2914 	    (state == DDI_DEVSTATE_UP));
2915 
2916 	return (NULL);
2917 }
2918 
2919 sbd_error_t *
2920 drmach_io_pre_release(drmachid_t id)
2921 {
2922 	if (!DRMACH_IS_IO_ID(id))
2923 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2924 	return (NULL);
2925 }
2926 
2927 static sbd_error_t *
2928 drmach_io_release(drmachid_t id)
2929 {
2930 	if (!DRMACH_IS_IO_ID(id))
2931 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2932 	return (NULL);
2933 }
2934 
2935 sbd_error_t *
2936 drmach_io_unrelease(drmachid_t id)
2937 {
2938 	if (!DRMACH_IS_IO_ID(id))
2939 		return (drerr_new(0, ESTF_INAPPROP, NULL));
2940 	return (NULL);
2941 }
2942 
2943 /*ARGSUSED*/
2944 sbd_error_t *
2945 drmach_io_post_release(drmachid_t id)
2946 {
2947 	return (NULL);
2948 }
2949 
2950 /*ARGSUSED*/
2951 sbd_error_t *
2952 drmach_io_post_attach(drmachid_t id)
2953 {
2954 	return (NULL);
2955 }
2956 
2957 static sbd_error_t *
2958 drmach_io_status(drmachid_t id, drmach_status_t *stat)
2959 {
2960 	drmach_device_t *dp;
2961 	sbd_error_t	*err;
2962 	int		 configured;
2963 
2964 	ASSERT(DRMACH_IS_IO_ID(id));
2965 	dp = id;
2966 
2967 	err = drmach_io_is_attached(id, &configured);
2968 	if (err)
2969 		return (err);
2970 
2971 	stat->assigned = dp->bp->assigned;
2972 	stat->powered = dp->bp->powered;
2973 	stat->configured = (configured != 0);
2974 	stat->busy = dp->busy;
2975 	strncpy(stat->type, dp->type, sizeof (stat->type));
2976 	stat->info[0] = '\0';
2977 
2978 	return (NULL);
2979 }
2980 
2981 static sbd_error_t *
2982 drmach_mem_new(drmach_device_t *dp)
2983 {
2984 	static sbd_error_t *drmach_mem_release(drmachid_t);
2985 	static sbd_error_t *drmach_mem_status(drmachid_t, drmach_status_t *);
2986 
2987 	dp->unum = 0;
2988 	dp->cm.isa = (void *)drmach_mem_new;
2989 	dp->cm.release = drmach_mem_release;
2990 	dp->cm.status = drmach_mem_status;
2991 
2992 	snprintf(dp->cm.name, sizeof (dp->cm.name), "%s", dp->type);
2993 
2994 	return (NULL);
2995 }
2996 
2997 sbd_error_t *
2998 drmach_mem_add_span(drmachid_t id, uint64_t basepa, uint64_t size)
2999 {
3000 	pfn_t		basepfn = (pfn_t)(basepa >> PAGESHIFT);
3001 	pgcnt_t		npages = (pgcnt_t)(size >> PAGESHIFT);
3002 	pda_handle_t	ph;
3003 	int		rv;
3004 
3005 	ASSERT(size != 0);
3006 
3007 	if (!DRMACH_IS_MEM_ID(id))
3008 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3009 
3010 	kcage_range_lock();
3011 	rv = kcage_range_add(basepfn, npages, 1);
3012 	kcage_range_unlock();
3013 	if (rv == ENOMEM) {
3014 		cmn_err(CE_WARN, "%lld megabytes not available to kernel cage",
3015 			(size == 0 ? 0 : size / MBYTE));
3016 	} else if (rv != 0) {
3017 		/* catch this in debug kernels */
3018 		ASSERT(0);
3019 
3020 		cmn_err(CE_WARN, "unexpected kcage_range_add"
3021 			" return value %d", rv);
3022 	}
3023 
3024 	/*
3025 	 * Update the PDA (post2obp) structure with the
3026 	 * range of the newly added memory.
3027 	 */
3028 	ph = drmach_pda_open();
3029 	if (ph != NULL) {
3030 		pda_mem_add_span(ph, basepa, size);
3031 		pda_close(ph);
3032 	}
3033 
3034 	return (NULL);
3035 }
3036 
3037 sbd_error_t *
3038 drmach_mem_del_span(drmachid_t id, uint64_t basepa, uint64_t size)
3039 {
3040 	drmach_device_t	*mem = id;
3041 	pfn_t		basepfn = (pfn_t)(basepa >> PAGESHIFT);
3042 	pgcnt_t		npages = (pgcnt_t)(size >> PAGESHIFT);
3043 	uint_t		mcreg;
3044 	sbd_error_t	*err;
3045 	pda_handle_t	ph;
3046 	int		rv;
3047 
3048 	err = drmach_read_mc_asr(id, &mcreg);
3049 	if (err)
3050 		return (err);
3051 	else if (mcreg & STARFIRE_MC_INTERLEAVE_MASK) {
3052 		return (drerr_new(1, ESTF_INTERBOARD, "%s::%s",
3053 				mem->bp->cm.name, mem->cm.name));
3054 	}
3055 
3056 	if (size > 0) {
3057 		kcage_range_lock();
3058 		rv = kcage_range_delete_post_mem_del(basepfn, npages);
3059 		kcage_range_unlock();
3060 		if (rv != 0) {
3061 			cmn_err(CE_WARN,
3062 			    "unexpected kcage_range_delete_post_mem_del"
3063 			    " return value %d", rv);
3064 			return (DRMACH_INTERNAL_ERROR());
3065 		}
3066 	}
3067 
3068 	/*
3069 	 * Update the PDA (post2obp) structure with the
3070 	 * range of removed memory.
3071 	 */
3072 	ph = drmach_pda_open();
3073 	if (ph != NULL) {
3074 		if (size > 0)
3075 			pda_mem_del_span(ph, basepa, size);
3076 
3077 		/* update PDA to board's new mc register settings */
3078 		pda_mem_sync(ph, mem->bp->bnum, 0);
3079 
3080 		pda_close(ph);
3081 	}
3082 
3083 	return (NULL);
3084 }
3085 
3086 /* support routine for enable and disable */
3087 static sbd_error_t *
3088 drmach_mem_update_interconnect(drmachid_t id, uint_t mcreg)
3089 {
3090 	drmach_device_t	*dp;
3091 	pda_handle_t	 ph;
3092 	int		 b;
3093 
3094 	if (!DRMACH_IS_MEM_ID(id))
3095 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3096 	dp = id;
3097 
3098 	ph = drmach_pda_open();
3099 	if (ph == NULL)
3100 		return (DRMACH_INTERNAL_ERROR());
3101 
3102 	for (b = 0; b < MAX_BOARDS; b++) {
3103 		int		p;
3104 		int		rv;
3105 		ushort_t	bda_proc, bda_ioc;
3106 		board_desc_t	*bdesc;
3107 
3108 		if (pda_board_present(ph, b) == 0)
3109 			continue;
3110 
3111 		bdesc = (board_desc_t *)pda_get_board_info(ph, b);
3112 
3113 		/*
3114 		 * Update PCs for CPUs.
3115 		 */
3116 
3117 		/* make sure definition in platmod is in sync with pda */
3118 		ASSERT(MAX_PROCMODS == MAX_CPU_UNITS_PER_BOARD);
3119 
3120 		bda_proc = bdesc->bda_proc;
3121 		for (p = 0; p < MAX_PROCMODS; p++) {
3122 			if (BDA_NBL(bda_proc, p) != BDAN_GOOD)
3123 				continue;
3124 
3125 			rv = pc_madr_add(b, dp->bp->bnum, p, mcreg);
3126 			if (rv) {
3127 				pda_close(ph);
3128 				return (DRMACH_INTERNAL_ERROR());
3129 			}
3130 		}
3131 
3132 		/*
3133 		 * Update PCs for IOCs.
3134 		 */
3135 
3136 		/* make sure definition in platmod is in sync with pda */
3137 		ASSERT(MAX_IOCS == MAX_IO_UNITS_PER_BOARD);
3138 
3139 		bda_ioc = bdesc->bda_ioc;
3140 		for (p = 0; p < MAX_IOCS; p++) {
3141 			if (BDA_NBL(bda_ioc, p) != BDAN_GOOD)
3142 				continue;
3143 
3144 			rv = pc_madr_add(b, dp->bp->bnum, p + 4, mcreg);
3145 			if (rv) {
3146 				pda_close(ph);
3147 				return (DRMACH_INTERNAL_ERROR());
3148 			}
3149 		}
3150 	}
3151 
3152 	pda_close(ph);
3153 	return (NULL);
3154 }
3155 
3156 sbd_error_t *
3157 drmach_mem_disable(drmachid_t id)
3158 {
3159 	sbd_error_t	*err;
3160 	uint_t		 mcreg;
3161 
3162 	err = drmach_read_mc_asr(id, &mcreg);
3163 	if (err == NULL) {
3164 		ASSERT(mcreg & STARFIRE_MC_MEM_PRESENT_MASK);
3165 
3166 		/* Turn off presence bit. */
3167 		mcreg &= ~STARFIRE_MC_MEM_PRESENT_MASK;
3168 
3169 		err = drmach_mem_update_interconnect(id, mcreg);
3170 		if (err == NULL)
3171 			err = drmach_write_mc_asr(id, mcreg);
3172 	}
3173 
3174 	return (err);
3175 }
3176 
3177 sbd_error_t *
3178 drmach_mem_enable(drmachid_t id)
3179 {
3180 	sbd_error_t	*err;
3181 	uint_t		 mcreg;
3182 
3183 	err = drmach_read_mc_asr(id, &mcreg);
3184 	if (err == NULL) {
3185 		mcreg |= STARFIRE_MC_MEM_PRESENT_MASK;
3186 
3187 		err = drmach_write_mc_asr(id, mcreg);
3188 		if (err == NULL)
3189 			err = drmach_mem_update_interconnect(id, mcreg);
3190 	}
3191 
3192 	return (err);
3193 }
3194 
3195 sbd_error_t *
3196 drmach_mem_get_alignment(drmachid_t id, uint64_t *mask)
3197 {
3198 	drmach_device_t	*mem;
3199 	sbd_error_t	*err;
3200 	dnode_t		 nodeid;
3201 
3202 	if (!DRMACH_IS_MEM_ID(id))
3203 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3204 	mem = id;
3205 
3206 	nodeid = drmach_node_get_dnode(mem->node);
3207 	if (nodeid == OBP_NONODE || nodeid == OBP_BADNODE)
3208 		err = DRMACH_INTERNAL_ERROR();
3209 	else {
3210 		uint64_t size;
3211 
3212 		size = mc_get_alignment_mask(nodeid);
3213 		if (size == (uint64_t)-1)
3214 			err = DRMACH_INTERNAL_ERROR();
3215 		else {
3216 			*mask = size - 1;
3217 			err = NULL;
3218 		}
3219 	}
3220 
3221 	return (err);
3222 }
3223 
3224 sbd_error_t *
3225 drmach_mem_get_base_physaddr(drmachid_t id, uint64_t *pa)
3226 {
3227 	sbd_error_t	*err;
3228 	uint_t		 mcreg;
3229 
3230 	err = drmach_read_mc_asr(id, &mcreg);
3231 	if (err == NULL)
3232 		*pa = mc_asr_to_pa(mcreg);
3233 
3234 	return (err);
3235 }
3236 
3237 /*
3238  * Use of this routine after copy/rename will yield incorrect results,
3239  * because the OBP MEMAVAIL property will not correctly reflect the
3240  * programming of the MCs.
3241  */
3242 sbd_error_t *
3243 drmach_mem_get_memlist(drmachid_t id, struct memlist **ml)
3244 {
3245 	drmach_device_t	*mem;
3246 	int		rv, i, rlen, rblks;
3247 	sbd_error_t	*err;
3248 	struct memlist	*mlist;
3249 	struct sf_memunit_regspec *rlist;
3250 
3251 	if (!DRMACH_IS_MEM_ID(id))
3252 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3253 	mem = id;
3254 
3255 	err = drmach_device_get_proplen(mem, "dr-available", &rlen);
3256 	if (err)
3257 		return (err);
3258 
3259 	rlist = kmem_zalloc(rlen, KM_SLEEP);
3260 
3261 	err = drmach_device_get_prop(mem, "dr-available", rlist);
3262 	if (err) {
3263 		kmem_free(rlist, rlen);
3264 		return (err);
3265 	}
3266 
3267 	mlist = NULL;
3268 	rblks = rlen / sizeof (struct sf_memunit_regspec);
3269 	for (i = 0; i < rblks; i++) {
3270 		uint64_t	addr, size;
3271 
3272 		addr  = (uint64_t)rlist[i].regspec_addr_hi << 32;
3273 		addr |= (uint64_t)rlist[i].regspec_addr_lo;
3274 		size  = (uint64_t)rlist[i].regspec_size_hi << 32;
3275 		size |= (uint64_t)rlist[i].regspec_size_lo;
3276 
3277 		mlist = memlist_add_span(mlist, addr, size);
3278 	}
3279 
3280 	kmem_free(rlist, rlen);
3281 
3282 	/*
3283 	 * Make sure the incoming memlist doesn't already
3284 	 * intersect with what's present in the system (phys_install).
3285 	 */
3286 	memlist_read_lock();
3287 	rv = memlist_intersect(phys_install, mlist);
3288 	memlist_read_unlock();
3289 	if (rv) {
3290 #ifdef DEBUG
3291 		DRMACH_PR("OBP derived memlist intersects"
3292 			" with phys_install\n");
3293 		memlist_dump(mlist);
3294 
3295 		DRMACH_PR("phys_install memlist:\n");
3296 		memlist_dump(phys_install);
3297 #endif
3298 
3299 		memlist_delete(mlist);
3300 		return (DRMACH_INTERNAL_ERROR());
3301 	}
3302 
3303 #ifdef DEBUG
3304 	DRMACH_PR("OBP derived memlist:");
3305 	memlist_dump(mlist);
3306 #endif
3307 
3308 	*ml = mlist;
3309 	return (NULL);
3310 }
3311 
3312 sbd_error_t *
3313 drmach_mem_get_size(drmachid_t id, uint64_t *bytes)
3314 {
3315 	drmach_device_t	*mem;
3316 	pda_handle_t	ph;
3317 	pgcnt_t		npages;
3318 
3319 	if (!DRMACH_IS_MEM_ID(id))
3320 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3321 	mem = id;
3322 
3323 	ph = drmach_pda_open();
3324 	if (ph == NULL)
3325 		return (DRMACH_INTERNAL_ERROR());
3326 
3327 	npages = pda_get_mem_size(ph, mem->bp->bnum);
3328 	*bytes = (uint64_t)npages << PAGESHIFT;
3329 
3330 	pda_close(ph);
3331 	return (NULL);
3332 }
3333 
3334 sbd_error_t *
3335 drmach_mem_get_slice_size(drmachid_t id, uint64_t *bytes)
3336 {
3337 	if (!DRMACH_IS_MEM_ID(id))
3338 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3339 
3340 	*bytes = mc_get_mem_alignment();
3341 	return (NULL);
3342 }
3343 
3344 /* field debugging tool */
3345 processorid_t drmach_mem_cpu_affinity_nail = 0;
3346 
3347 processorid_t
3348 drmach_mem_cpu_affinity(drmachid_t id)
3349 {
3350 	drmach_device_t	*mp;
3351 	drmach_board_t	*bp;
3352 	processorid_t	 cpuid;
3353 
3354 	if (!DRMACH_IS_MEM_ID(id))
3355 		return (CPU_CURRENT);
3356 
3357 	if (drmach_mem_cpu_affinity_nail) {
3358 		cpuid = drmach_mem_cpu_affinity_nail;
3359 
3360 		if (cpuid < 0 || cpuid > NCPU)
3361 			return (CPU_CURRENT);
3362 
3363 		mutex_enter(&cpu_lock);
3364 		if (cpu[cpuid] == NULL || !CPU_ACTIVE(cpu[cpuid]))
3365 			cpuid = CPU_CURRENT;
3366 		mutex_exit(&cpu_lock);
3367 
3368 		return (cpuid);
3369 	}
3370 
3371 	/* try to choose a proc on the target board */
3372 	mp = id;
3373 	bp = mp->bp;
3374 	if (bp->devices) {
3375 		int		rv;
3376 		int		d_idx;
3377 		drmachid_t	d_id;
3378 
3379 		rv = drmach_array_first(bp->devices, &d_idx, &d_id);
3380 		while (rv == 0) {
3381 			if (DRMACH_IS_CPU_ID(d_id)) {
3382 				cpuid = drmach_cpu_calc_id(d_id);
3383 
3384 				mutex_enter(&cpu_lock);
3385 				if (cpu[cpuid] && CPU_ACTIVE(cpu[cpuid])) {
3386 					mutex_exit(&cpu_lock);
3387 					DRMACH_PR("drmach_mem_cpu_affinity: "
3388 					    "selected cpuid=%d\n", cpuid);
3389 					return (cpuid);
3390 				} else {
3391 					mutex_exit(&cpu_lock);
3392 				}
3393 			}
3394 
3395 			rv = drmach_array_next(bp->devices, &d_idx, &d_id);
3396 		}
3397 	}
3398 
3399 	/* otherwise, this proc, wherever it is */
3400 	DRMACH_PR("drmach_mem_cpu_affinity: using default CPU_CURRENT\n");
3401 
3402 	return (CPU_CURRENT);
3403 }
3404 
3405 static sbd_error_t *
3406 drmach_mem_release(drmachid_t id)
3407 {
3408 	if (!DRMACH_IS_MEM_ID(id))
3409 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3410 	return (NULL);
3411 }
3412 
3413 static sbd_error_t *
3414 drmach_mem_status(drmachid_t id, drmach_status_t *stat)
3415 {
3416 	drmach_device_t *dp;
3417 	sbd_error_t	*err;
3418 	uint64_t	 pa, slice_size;
3419 	struct memlist	*ml;
3420 
3421 	ASSERT(DRMACH_IS_MEM_ID(id));
3422 	dp = id;
3423 
3424 	/* get starting physical address of target memory */
3425 	err = drmach_mem_get_base_physaddr(id, &pa);
3426 	if (err)
3427 		return (err);
3428 
3429 	/* round down to slice boundary */
3430 	slice_size = mc_get_mem_alignment();
3431 	pa &= ~ (slice_size - 1);
3432 
3433 	/* stop at first span that is in slice */
3434 	memlist_read_lock();
3435 	for (ml = phys_install; ml; ml = ml->next)
3436 		if (ml->address >= pa && ml->address < pa + slice_size)
3437 			break;
3438 	memlist_read_unlock();
3439 
3440 	stat->assigned = dp->bp->assigned;
3441 	stat->powered = dp->bp->powered;
3442 	stat->configured = (ml != NULL);
3443 	stat->busy = dp->busy;
3444 	strncpy(stat->type, dp->type, sizeof (stat->type));
3445 	stat->info[0] = '\0';
3446 
3447 	return (NULL);
3448 }
3449 
3450 static int
3451 drmach_detach_board(void *arg)
3452 {
3453 	cpuset_t	cset;
3454 	int		retval;
3455 	drmach_board_t	*bp = (drmach_board_t *)arg;
3456 
3457 	cset = cpu_ready_set;
3458 	promsafe_xc_attention(cset);
3459 
3460 	retval = prom_starfire_rm_brd(bp->bnum);
3461 
3462 	xc_dismissed(cset);
3463 
3464 	return (retval);
3465 }
3466 
3467 sbd_error_t *
3468 drmach_board_deprobe(drmachid_t id)
3469 {
3470 	drmach_board_t	*bp;
3471 	int		 retval;
3472 
3473 	if (!DRMACH_IS_BOARD_ID(id))
3474 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3475 	bp = id;
3476 
3477 	cmn_err(CE_CONT, "DR: PROM detach board %d\n", bp->bnum);
3478 
3479 	retval = prom_tree_update(drmach_detach_board, bp);
3480 
3481 	if (retval == 0)
3482 		return (NULL);
3483 	else {
3484 		cmn_err(CE_WARN, "prom error: prom_starfire_rm_brd(%d) "
3485 			"returned %d", bp->bnum, retval);
3486 		return (drerr_new(1, ESTF_DEPROBE, "%s", bp->cm.name));
3487 	}
3488 }
3489 
3490 /*ARGSUSED*/
3491 static sbd_error_t *
3492 drmach_pt_juggle_bootproc(drmachid_t id, drmach_opts_t *opts)
3493 {
3494 	drmach_device_t	*cpu;
3495 	sbd_error_t	*err;
3496 
3497 	if (!DRMACH_IS_CPU_ID(id))
3498 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3499 	cpu = id;
3500 
3501 	mutex_enter(&cpu_lock);
3502 
3503 	err = drmach_cpu_juggle_bootproc(cpu);
3504 
3505 	mutex_exit(&cpu_lock);
3506 
3507 	return (err);
3508 }
3509 
3510 /*ARGSUSED*/
3511 static sbd_error_t *
3512 drmach_pt_dump_pdainfo(drmachid_t id, drmach_opts_t *opts)
3513 {
3514 	drmach_board_t	*bp;
3515 	int		board;
3516 	int		i;
3517 	pda_handle_t	ph;
3518 	board_desc_t	*bdesc;
3519 
3520 	if (!DRMACH_IS_BOARD_ID(id))
3521 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3522 	bp = id;
3523 	board = bp->bnum;
3524 
3525 	ph = drmach_pda_open();
3526 	if (ph == NULL)
3527 		return (DRMACH_INTERNAL_ERROR());
3528 
3529 	if (pda_board_present(ph, board) == 0) {
3530 		cmn_err(CE_CONT, "board %d is MISSING\n", board);
3531 		pda_close(ph);
3532 		return (DRMACH_INTERNAL_ERROR());
3533 	}
3534 
3535 	cmn_err(CE_CONT, "board %d is PRESENT\n", board);
3536 
3537 	bdesc = (board_desc_t *)pda_get_board_info(ph, board);
3538 	if (bdesc == NULL) {
3539 		cmn_err(CE_CONT,
3540 			"no board descriptor found for board %d\n",
3541 			board);
3542 		pda_close(ph);
3543 		return (DRMACH_INTERNAL_ERROR());
3544 	}
3545 
3546 	/* make sure definition in platmod is in sync with pda */
3547 	ASSERT(MAX_PROCMODS == MAX_CPU_UNITS_PER_BOARD);
3548 
3549 	for (i = 0; i < MAX_PROCMODS; i++) {
3550 		if (BDA_NBL(bdesc->bda_proc, i) == BDAN_GOOD)
3551 			cmn_err(CE_CONT,
3552 				"proc %d.%d PRESENT\n", board, i);
3553 		else
3554 			cmn_err(CE_CONT,
3555 				"proc %d.%d MISSING\n", board, i);
3556 	}
3557 
3558 	for (i = 0; i < MAX_MGROUPS; i++) {
3559 		if (BDA_NBL(bdesc->bda_mgroup, i) == BDAN_GOOD)
3560 			cmn_err(CE_CONT,
3561 				"mgroup %d.%d PRESENT\n", board, i);
3562 		else
3563 			cmn_err(CE_CONT,
3564 				"mgroup %d.%d MISSING\n", board, i);
3565 	}
3566 
3567 	/* make sure definition in platmod is in sync with pda */
3568 	ASSERT(MAX_IOCS == MAX_IO_UNITS_PER_BOARD);
3569 
3570 	for (i = 0; i < MAX_IOCS; i++) {
3571 		int	s;
3572 
3573 		if (BDA_NBL(bdesc->bda_ioc, i) == BDAN_GOOD) {
3574 			cmn_err(CE_CONT,
3575 				"ioc %d.%d PRESENT\n", board, i);
3576 			for (s = 0; s < MAX_SLOTS_PER_IOC; s++) {
3577 				if (BDA_NBL(bdesc->bda_ios[i], s) != BDAN_GOOD)
3578 					continue;
3579 				cmn_err(CE_CONT,
3580 					"..scard %d.%d.%d PRESENT\n",
3581 					board, i, s);
3582 			}
3583 		} else {
3584 			cmn_err(CE_CONT,
3585 				"ioc %d.%d MISSING\n",
3586 				board, i);
3587 		}
3588 	}
3589 
3590 	cmn_err(CE_CONT,
3591 		"board %d memsize = %d pages\n",
3592 		board, pda_get_mem_size(ph, board));
3593 
3594 	pda_close(ph);
3595 
3596 	return (NULL);
3597 }
3598 
3599 /*ARGSUSED*/
3600 sbd_error_t *
3601 drmach_pt_readmem(drmachid_t id, drmach_opts_t *opts)
3602 {
3603 	struct memlist	*ml;
3604 	uint64_t	src_pa;
3605 	uint64_t	dst_pa;
3606 	uint64_t	dst;
3607 
3608 	dst_pa = va_to_pa(&dst);
3609 
3610 	memlist_read_lock();
3611 	for (ml = phys_install; ml; ml = ml->next) {
3612 		uint64_t	nbytes;
3613 
3614 		src_pa = ml->address;
3615 		nbytes = ml->size;
3616 
3617 		while (nbytes != 0ull) {
3618 
3619 			/* copy 32 bytes at arc_pa to dst_pa */
3620 			bcopy32_il(src_pa, dst_pa);
3621 
3622 			/* increment by 32 bytes */
3623 			src_pa += (4 * sizeof (uint64_t));
3624 
3625 			/* decrement by 32 bytes */
3626 			nbytes -= (4 * sizeof (uint64_t));
3627 		}
3628 	}
3629 	memlist_read_unlock();
3630 
3631 	return (NULL);
3632 }
3633 
3634 static struct {
3635 	const char	*name;
3636 	sbd_error_t	*(*handler)(drmachid_t id, drmach_opts_t *opts);
3637 } drmach_pt_arr[] = {
3638 	{ "juggle",		drmach_pt_juggle_bootproc	},
3639 	{ "pda",		drmach_pt_dump_pdainfo		},
3640 	{ "readmem",		drmach_pt_readmem		},
3641 
3642 	/* the following line must always be last */
3643 	{ NULL,			NULL				}
3644 };
3645 
3646 /*ARGSUSED*/
3647 sbd_error_t *
3648 drmach_passthru(drmachid_t id, drmach_opts_t *opts)
3649 {
3650 	int		i;
3651 	sbd_error_t	*err;
3652 
3653 	i = 0;
3654 	while (drmach_pt_arr[i].name != NULL) {
3655 		int len = strlen(drmach_pt_arr[i].name);
3656 
3657 		if (strncmp(drmach_pt_arr[i].name, opts->copts, len) == 0)
3658 			break;
3659 
3660 		i += 1;
3661 	}
3662 
3663 	if (drmach_pt_arr[i].name == NULL)
3664 		err = drerr_new(0, ESTF_UNKPTCMD, opts->copts);
3665 	else
3666 		err = (*drmach_pt_arr[i].handler)(id, opts);
3667 
3668 	return (err);
3669 }
3670 
3671 sbd_error_t *
3672 drmach_release(drmachid_t id)
3673 {
3674 	drmach_common_t *cp;
3675 	if (!DRMACH_IS_DEVICE_ID(id))
3676 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3677 	cp = id;
3678 
3679 	return (cp->release(id));
3680 }
3681 
3682 sbd_error_t *
3683 drmach_status(drmachid_t id, drmach_status_t *stat)
3684 {
3685 	drmach_common_t *cp;
3686 
3687 	if (!DRMACH_IS_ID(id))
3688 		return (drerr_new(0, ESTF_NOTID, NULL));
3689 	cp = id;
3690 
3691 	return (cp->status(id, stat));
3692 }
3693 
3694 sbd_error_t *
3695 drmach_unconfigure(drmachid_t id, int flags)
3696 {
3697 	drmach_device_t	*dp;
3698 	dnode_t		 nodeid;
3699 	dev_info_t	*dip, *fdip = NULL;
3700 	uint_t 		ddi_flags;
3701 
3702 	if (!DRMACH_IS_DEVICE_ID(id))
3703 		return (drerr_new(0, ESTF_INAPPROP, NULL));
3704 
3705 	dp = id;
3706 
3707 	nodeid = drmach_node_get_dnode(dp->node);
3708 	if (nodeid == OBP_NONODE)
3709 		return (DRMACH_INTERNAL_ERROR());
3710 
3711 	dip = e_ddi_nodeid_to_dip(nodeid);
3712 	if (dip == NULL)
3713 		return (NULL);
3714 
3715 	/*
3716 	 * Branch already held, so hold acquired in
3717 	 * e_ddi_nodeid_to_dip() can be released
3718 	 */
3719 	ddi_release_devi(dip);
3720 
3721 	ddi_flags = 0;
3722 
3723 	if (flags & DRMACH_DEVI_REMOVE)
3724 		ddi_flags |= DEVI_BRANCH_DESTROY | DEVI_BRANCH_EVENT;
3725 
3726 	/*
3727 	 * Force flag is no longer necessary. See starcat/io/drmach.c
3728 	 * for details.
3729 	 */
3730 	ASSERT(e_ddi_branch_held(dip));
3731 	if (e_ddi_branch_unconfigure(dip, &fdip, ddi_flags)) {
3732 		sbd_error_t	*err;
3733 		char		*path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3734 
3735 		/*
3736 		 * If non-NULL, fdip is returned held and must be released.
3737 		 */
3738 		if (fdip != NULL) {
3739 			(void) ddi_pathname(fdip, path);
3740 			ndi_rele_devi(fdip);
3741 		} else {
3742 			(void) ddi_pathname(dip, path);
3743 		}
3744 
3745 		err = drerr_new(1, ESTF_DRVFAIL, path);
3746 
3747 		kmem_free(path, MAXPATHLEN);
3748 
3749 		return (err);
3750 	}
3751 
3752 	return (NULL);
3753 }
3754 
3755 /*
3756  * drmach interfaces to legacy Starfire platmod logic
3757  * linkage via runtime symbol look up, called from plat_cpu_power*
3758  */
3759 
3760 /*
3761  * Start up a cpu.  It is possible that we're attempting to restart
3762  * the cpu after an UNCONFIGURE in which case the cpu will be
3763  * spinning in its cache.  So, all we have to do is wakeup him up.
3764  * Under normal circumstances the cpu will be coming from a previous
3765  * CONNECT and thus will be spinning in OBP.  In both cases, the
3766  * startup sequence is the same.
3767  */
3768 int
3769 drmach_cpu_poweron(struct cpu *cp)
3770 {
3771 	DRMACH_PR("drmach_cpu_poweron: starting cpuid %d\n", cp->cpu_id);
3772 
3773 	ASSERT(MUTEX_HELD(&cpu_lock));
3774 
3775 	if (drmach_cpu_start(cp) != 0)
3776 		return (EBUSY);
3777 	else
3778 		return (0);
3779 }
3780 
3781 int
3782 drmach_cpu_poweroff(struct cpu *cp)
3783 {
3784 	int		ntries, cnt;
3785 	processorid_t	cpuid = cp->cpu_id;
3786 	void		drmach_cpu_shutdown_self(void);
3787 
3788 	DRMACH_PR("drmach_cpu_poweroff: stopping cpuid %d\n", cp->cpu_id);
3789 
3790 	ASSERT(MUTEX_HELD(&cpu_lock));
3791 
3792 	/*
3793 	 * Capture all CPUs (except for detaching proc) to prevent
3794 	 * crosscalls to the detaching proc until it has cleared its
3795 	 * bit in cpu_ready_set.
3796 	 *
3797 	 * The CPU's remain paused and the prom_mutex is known to be free.
3798 	 * This prevents the x-trap victim from blocking when doing prom
3799 	 * IEEE-1275 calls at a high PIL level.
3800 	 */
3801 	promsafe_pause_cpus();
3802 
3803 	/*
3804 	 * Quiesce interrupts on the target CPU. We do this by setting
3805 	 * the CPU 'not ready'- (i.e. removing the CPU from cpu_ready_set) to
3806 	 * prevent it from receiving cross calls and cross traps.
3807 	 * This prevents the processor from receiving any new soft interrupts.
3808 	 */
3809 	mp_cpu_quiesce(cp);
3810 
3811 	/* setup xt_mb, will be cleared by drmach_shutdown_asm when ready */
3812 	drmach_xt_mb[cpuid] = 0x80;
3813 
3814 	xt_one_unchecked(cpuid, (xcfunc_t *)idle_stop_xcall,
3815 		(uint64_t)drmach_cpu_shutdown_self, NULL);
3816 
3817 	ntries = drmach_cpu_ntries;
3818 	cnt = 0;
3819 	while (drmach_xt_mb[cpuid] && ntries) {
3820 		DELAY(drmach_cpu_delay);
3821 		ntries--;
3822 		cnt++;
3823 	}
3824 
3825 	drmach_xt_mb[cpuid] = 0;	/* steal the cache line back */
3826 
3827 	start_cpus();
3828 
3829 	DRMACH_PR("waited %d out of %d tries for "
3830 		"drmach_cpu_shutdown_self on cpu%d",
3831 		drmach_cpu_ntries - ntries, drmach_cpu_ntries, cp->cpu_id);
3832 
3833 	drmach_cpu_obp_detach(cpuid);
3834 
3835 	CPU_SIGNATURE(OS_SIG, SIGST_DETACHED, SIGSUBST_NULL, cpuid);
3836 
3837 	return (0);
3838 }
3839 
3840 /*ARGSUSED*/
3841 int
3842 drmach_verify_sr(dev_info_t *dip, int sflag)
3843 {
3844 	return (0);
3845 }
3846 
3847 void
3848 drmach_suspend_last(void)
3849 {
3850 }
3851 
3852 void
3853 drmach_resume_first(void)
3854 {
3855 }
3856 
3857 /*
3858  * Log a DR sysevent.
3859  * Return value: 0 success, non-zero failure.
3860  */
3861 int
3862 drmach_log_sysevent(int board, char *hint, int flag, int verbose)
3863 {
3864 	sysevent_t			*ev;
3865 	sysevent_id_t			eid;
3866 	int				rv, km_flag;
3867 	sysevent_value_t		evnt_val;
3868 	sysevent_attr_list_t		*evnt_attr_list = NULL;
3869 	char				attach_pnt[MAXNAMELEN];
3870 
3871 	km_flag = (flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
3872 	attach_pnt[0] = '\0';
3873 	if (drmach_board_name(board, attach_pnt, MAXNAMELEN)) {
3874 		rv = -1;
3875 		goto logexit;
3876 	}
3877 	if (verbose)
3878 		DRMACH_PR("drmach_log_sysevent: %s %s, flag: %d, verbose: %d\n",
3879 			    attach_pnt, hint, flag, verbose);
3880 
3881 	if ((ev = sysevent_alloc(EC_DR, ESC_DR_AP_STATE_CHANGE,
3882 				    SUNW_KERN_PUB"dr", km_flag)) == NULL) {
3883 		rv = -2;
3884 		goto logexit;
3885 	}
3886 	evnt_val.value_type = SE_DATA_TYPE_STRING;
3887 	evnt_val.value.sv_string = attach_pnt;
3888 	if ((rv = sysevent_add_attr(&evnt_attr_list, DR_AP_ID,
3889 				    &evnt_val, km_flag)) != 0)
3890 		goto logexit;
3891 
3892 	evnt_val.value_type = SE_DATA_TYPE_STRING;
3893 	evnt_val.value.sv_string = hint;
3894 	if ((rv = sysevent_add_attr(&evnt_attr_list, DR_HINT,
3895 				    &evnt_val, km_flag)) != 0) {
3896 		sysevent_free_attr(evnt_attr_list);
3897 		goto logexit;
3898 	}
3899 
3900 	(void) sysevent_attach_attributes(ev, evnt_attr_list);
3901 
3902 	/*
3903 	 * Log the event but do not sleep waiting for its
3904 	 * delivery. This provides insulation from syseventd.
3905 	 */
3906 	rv = log_sysevent(ev, SE_NOSLEEP, &eid);
3907 
3908 logexit:
3909 	if (ev)
3910 		sysevent_free(ev);
3911 	if ((rv != 0) && verbose)
3912 		cmn_err(CE_WARN,
3913 			    "drmach_log_sysevent failed (rv %d) for %s  %s\n",
3914 			    rv, attach_pnt, hint);
3915 
3916 	return (rv);
3917 }
3918 
3919 /*ARGSUSED*/
3920 int
3921 drmach_allow_memrange_modify(drmachid_t id)
3922 {
3923 	return (1);	/* TRUE */
3924 }
3925