xref: /titanic_44/usr/src/uts/sun4u/opl/io/drmach.c (revision ee5416c9d7e449233197d5d20bc6b81e4ff091b2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 
29 #include <sys/debug.h>
30 #include <sys/types.h>
31 #include <sys/varargs.h>
32 #include <sys/errno.h>
33 #include <sys/cred.h>
34 #include <sys/dditypes.h>
35 #include <sys/devops.h>
36 #include <sys/modctl.h>
37 #include <sys/poll.h>
38 #include <sys/conf.h>
39 #include <sys/ddi.h>
40 #include <sys/sunddi.h>
41 #include <sys/sunndi.h>
42 #include <sys/ndi_impldefs.h>
43 #include <sys/stat.h>
44 #include <sys/kmem.h>
45 #include <sys/vmem.h>
46 #include <sys/opl_olympus_regs.h>
47 #include <sys/cpuvar.h>
48 #include <sys/cpupart.h>
49 #include <sys/mem_config.h>
50 #include <sys/ddi_impldefs.h>
51 #include <sys/systm.h>
52 #include <sys/machsystm.h>
53 #include <sys/autoconf.h>
54 #include <sys/cmn_err.h>
55 #include <sys/sysmacros.h>
56 #include <sys/x_call.h>
57 #include <sys/promif.h>
58 #include <sys/prom_plat.h>
59 #include <sys/membar.h>
60 #include <vm/seg_kmem.h>
61 #include <sys/mem_cage.h>
62 #include <sys/stack.h>
63 #include <sys/archsystm.h>
64 #include <vm/hat_sfmmu.h>
65 #include <sys/pte.h>
66 #include <sys/mmu.h>
67 #include <sys/cpu_module.h>
68 #include <sys/obpdefs.h>
69 #include <sys/note.h>
70 #include <sys/ontrap.h>
71 #include <sys/cpu_sgnblk_defs.h>
72 #include <sys/opl.h>
73 #include <sys/cpu_impl.h>
74 
75 
76 #include <sys/promimpl.h>
77 #include <sys/prom_plat.h>
78 #include <sys/kobj.h>
79 
80 #include <sys/sysevent.h>
81 #include <sys/sysevent/dr.h>
82 #include <sys/sysevent/eventdefs.h>
83 
84 #include <sys/drmach.h>
85 #include <sys/dr_util.h>
86 
87 #include <sys/fcode.h>
88 #include <sys/opl_cfg.h>
89 
90 extern void		bcopy32_il(uint64_t, uint64_t);
91 extern void		flush_cache_il(void);
92 extern void		drmach_sleep_il(void);
93 
94 typedef struct {
95 	struct drmach_node	*node;
96 	void			*data;
97 } drmach_node_walk_args_t;
98 
99 typedef struct drmach_node {
100 	void		*here;
101 
102 	pnode_t		(*get_dnode)(struct drmach_node *node);
103 	int		(*walk)(struct drmach_node *node, void *data,
104 				int (*cb)(drmach_node_walk_args_t *args));
105 	dev_info_t	*(*n_getdip)(struct drmach_node *node);
106 	int		(*n_getproplen)(struct drmach_node *node, char *name,
107 				int *len);
108 	int		(*n_getprop)(struct drmach_node *node, char *name,
109 				void *buf, int len);
110 	int		(*get_parent)(struct drmach_node *node,
111 				struct drmach_node *pnode);
112 } drmach_node_t;
113 
114 typedef struct {
115 	int		 min_index;
116 	int		 max_index;
117 	int		 arr_sz;
118 	drmachid_t	*arr;
119 } drmach_array_t;
120 
121 typedef struct {
122 	void		*isa;
123 
124 	void		(*dispose)(drmachid_t);
125 	sbd_error_t	*(*release)(drmachid_t);
126 	sbd_error_t	*(*status)(drmachid_t, drmach_status_t *);
127 
128 	char		 name[MAXNAMELEN];
129 } drmach_common_t;
130 
131 typedef	struct {
132 	uint32_t	core_present;
133 	uint32_t	core_hotadded;
134 	uint32_t	core_started;
135 } drmach_cmp_t;
136 
137 typedef struct {
138 	drmach_common_t	 cm;
139 	int		 bnum;
140 	int		 assigned;
141 	int		 powered;
142 	int		 connected;
143 	int		 cond;
144 	drmach_node_t	*tree;
145 	drmach_array_t	*devices;
146 	int		boot_board;	/* if board exists on bootup */
147 	drmach_cmp_t	cores[OPL_MAX_COREID_PER_BOARD];
148 } drmach_board_t;
149 
150 typedef struct {
151 	drmach_common_t	 cm;
152 	drmach_board_t	*bp;
153 	int		 unum;
154 	int		portid;
155 	int		 busy;
156 	int		 powered;
157 	const char	*type;
158 	drmach_node_t	*node;
159 } drmach_device_t;
160 
161 typedef struct drmach_cpu {
162 	drmach_device_t  dev;
163 	processorid_t    cpuid;
164 	int		sb;
165 	int		chipid;
166 	int		coreid;
167 	int		strandid;
168 	int		status;
169 #define	OPL_CPU_HOTADDED	1
170 } drmach_cpu_t;
171 
172 typedef struct drmach_mem {
173 	drmach_device_t  dev;
174 	uint64_t	slice_base;
175 	uint64_t	slice_size;
176 	uint64_t	base_pa;	/* lowest installed memory base */
177 	uint64_t	nbytes;		/* size of installed memory */
178 	struct memlist *memlist;
179 } drmach_mem_t;
180 
181 typedef struct drmach_io {
182 	drmach_device_t  dev;
183 	int	channel;
184 	int	leaf;
185 } drmach_io_t;
186 
187 typedef struct drmach_domain_info {
188 	uint32_t	floating;
189 	int		allow_dr;
190 } drmach_domain_info_t;
191 
192 drmach_domain_info_t drmach_domain;
193 
194 typedef struct {
195 	int		 flags;
196 	drmach_device_t	*dp;
197 	sbd_error_t	*err;
198 	dev_info_t	*dip;
199 } drmach_config_args_t;
200 
201 typedef struct {
202 	drmach_board_t	*obj;
203 	int		 ndevs;
204 	void		*a;
205 	sbd_error_t	*(*found)(void *a, const char *, int, drmachid_t);
206 	sbd_error_t	*err;
207 } drmach_board_cb_data_t;
208 
209 static drmach_array_t	*drmach_boards;
210 
211 static sbd_error_t	*drmach_device_new(drmach_node_t *,
212 				drmach_board_t *, int, drmachid_t *);
213 static sbd_error_t	*drmach_cpu_new(drmach_device_t *, drmachid_t *);
214 static sbd_error_t	*drmach_mem_new(drmach_device_t *, drmachid_t *);
215 static sbd_error_t	*drmach_io_new(drmach_device_t *, drmachid_t *);
216 
217 static dev_info_t	*drmach_node_ddi_get_dip(drmach_node_t *np);
218 static int		 drmach_node_ddi_get_prop(drmach_node_t *np,
219 				char *name, void *buf, int len);
220 static int		 drmach_node_ddi_get_proplen(drmach_node_t *np,
221 				char *name, int *len);
222 
223 static int 		drmach_get_portid(drmach_node_t *);
224 static	sbd_error_t	*drmach_i_status(drmachid_t, drmach_status_t *);
225 static int		opl_check_dr_status();
226 static void		drmach_io_dispose(drmachid_t);
227 static sbd_error_t	*drmach_io_release(drmachid_t);
228 static sbd_error_t	*drmach_io_status(drmachid_t, drmach_status_t *);
229 static int 		drmach_init(void);
230 static void 		drmach_fini(void);
231 static void		drmach_swap_pa(drmach_mem_t *, drmach_mem_t *);
232 static drmach_board_t	*drmach_get_board_by_bnum(int);
233 
234 /* options for the second argument in drmach_add_remove_cpu() */
235 #define	HOTADD_CPU	1
236 #define	HOTREMOVE_CPU	2
237 
238 #define	ON_BOARD_CORE_NUM(x)	(((uint_t)(x) / OPL_MAX_STRANDID_PER_CORE) & \
239 	(OPL_MAX_COREID_PER_BOARD - 1))
240 
241 extern struct cpu	*SIGBCPU;
242 
243 static int		drmach_name2type_idx(char *);
244 static drmach_board_t	*drmach_board_new(int, int);
245 
246 #ifdef DEBUG
247 
248 #define	DRMACH_PR		if (drmach_debug) printf
249 int drmach_debug = 1;		 /* set to non-zero to enable debug messages */
250 #else
251 
252 #define	DRMACH_PR		_NOTE(CONSTANTCONDITION) if (0) printf
253 #endif /* DEBUG */
254 
255 
256 #define	DRMACH_OBJ(id)		((drmach_common_t *)id)
257 
258 #define	DRMACH_NULL_ID(id)	((id) == 0)
259 
260 #define	DRMACH_IS_BOARD_ID(id)	\
261 	((id != 0) &&		\
262 	(DRMACH_OBJ(id)->isa == (void *)drmach_board_new))
263 
264 #define	DRMACH_IS_CPU_ID(id)	\
265 	((id != 0) &&		\
266 	(DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new))
267 
268 #define	DRMACH_IS_MEM_ID(id)	\
269 	((id != 0) &&		\
270 	(DRMACH_OBJ(id)->isa == (void *)drmach_mem_new))
271 
272 #define	DRMACH_IS_IO_ID(id)	\
273 	((id != 0) &&		\
274 	(DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
275 
276 #define	DRMACH_IS_DEVICE_ID(id)					\
277 	((id != 0) &&						\
278 	(DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new ||	\
279 	    DRMACH_OBJ(id)->isa == (void *)drmach_mem_new ||	\
280 	    DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
281 
282 #define	DRMACH_IS_ID(id)					\
283 	((id != 0) &&						\
284 	(DRMACH_OBJ(id)->isa == (void *)drmach_board_new ||	\
285 	    DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new ||	\
286 	    DRMACH_OBJ(id)->isa == (void *)drmach_mem_new ||	\
287 	    DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
288 
289 #define	DRMACH_INTERNAL_ERROR() \
290 	drerr_new(1, EOPL_INTERNAL, drmach_ie_fmt, __LINE__)
291 
292 static char		*drmach_ie_fmt = "drmach.c %d";
293 
294 static struct {
295 	const char	*name;
296 	const char	*type;
297 	sbd_error_t	*(*new)(drmach_device_t *, drmachid_t *);
298 } drmach_name2type[] = {
299 	{ "cpu",	DRMACH_DEVTYPE_CPU,		drmach_cpu_new },
300 	{ "pseudo-mc",	DRMACH_DEVTYPE_MEM,		drmach_mem_new },
301 	{ "pci",	DRMACH_DEVTYPE_PCI,		drmach_io_new  },
302 };
303 
304 /* utility */
305 #define	MBYTE	(1048576ull)
306 
307 /*
308  * drmach autoconfiguration data structures and interfaces
309  */
310 
311 extern struct mod_ops mod_miscops;
312 
313 static struct modlmisc modlmisc = {
314 	&mod_miscops,
315 	"OPL DR 1.1"
316 };
317 
318 static struct modlinkage modlinkage = {
319 	MODREV_1,
320 	(void *)&modlmisc,
321 	NULL
322 };
323 
324 static krwlock_t drmach_boards_rwlock;
325 
326 typedef const char	*fn_t;
327 
328 int
329 _init(void)
330 {
331 	int err;
332 
333 	if ((err = drmach_init()) != 0) {
334 		return (err);
335 	}
336 
337 	if ((err = mod_install(&modlinkage)) != 0) {
338 		drmach_fini();
339 	}
340 
341 	return (err);
342 }
343 
344 int
345 _fini(void)
346 {
347 	int	err;
348 
349 	if ((err = mod_remove(&modlinkage)) == 0)
350 		drmach_fini();
351 
352 	return (err);
353 }
354 
355 int
356 _info(struct modinfo *modinfop)
357 {
358 	return (mod_info(&modlinkage, modinfop));
359 }
360 
361 struct drmach_mc_lookup {
362 	int	bnum;
363 	drmach_board_t	*bp;
364 	dev_info_t *dip;	/* rv - set if found */
365 };
366 
367 #define	_ptob64(p) ((uint64_t)(p) << PAGESHIFT)
368 #define	_b64top(b) ((pgcnt_t)((b) >> PAGESHIFT))
369 
370 static int
371 drmach_setup_mc_info(dev_info_t *dip, drmach_mem_t *mp)
372 {
373 	uint64_t	memory_ranges[128];
374 	int len;
375 	struct memlist	*ml;
376 	int rv;
377 	hwd_sb_t *hwd;
378 	hwd_memory_t *pm;
379 
380 	len = sizeof (memory_ranges);
381 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
382 	    "sb-mem-ranges", (caddr_t)&memory_ranges[0], &len) !=
383 	    DDI_PROP_SUCCESS) {
384 		mp->slice_base = 0;
385 		mp->slice_size = 0;
386 		return (-1);
387 	}
388 	mp->slice_base = memory_ranges[0];
389 	mp->slice_size = memory_ranges[1];
390 
391 	if (!mp->dev.bp->boot_board) {
392 		int i;
393 
394 		rv = opl_read_hwd(mp->dev.bp->bnum, NULL,  NULL, NULL, &hwd);
395 
396 		if (rv != 0) {
397 			return (-1);
398 		}
399 
400 		ml = NULL;
401 		pm = &hwd->sb_cmu.cmu_memory;
402 		for (i = 0; i < HWD_MAX_MEM_CHUNKS; i++) {
403 			if (pm->mem_chunks[i].chnk_size > 0) {
404 				ml = memlist_add_span(ml,
405 				    pm->mem_chunks[i].chnk_start_address,
406 				    pm->mem_chunks[i].chnk_size);
407 			}
408 		}
409 	} else {
410 		/*
411 		 * we intersect phys_install to get base_pa.
412 		 * This only works at bootup time.
413 		 */
414 
415 		memlist_read_lock();
416 		ml = memlist_dup(phys_install);
417 		memlist_read_unlock();
418 
419 		ml = memlist_del_span(ml, 0ull, mp->slice_base);
420 		if (ml) {
421 			uint64_t basepa, endpa;
422 			endpa = _ptob64(physmax + 1);
423 
424 			basepa = mp->slice_base + mp->slice_size;
425 
426 			ml = memlist_del_span(ml, basepa, endpa - basepa);
427 		}
428 	}
429 
430 	if (ml) {
431 		uint64_t nbytes = 0;
432 		struct memlist *p;
433 		for (p = ml; p; p = p->next) {
434 			nbytes += p->size;
435 		}
436 		if ((mp->nbytes = nbytes) > 0)
437 			mp->base_pa = ml->address;
438 		else
439 			mp->base_pa = 0;
440 		mp->memlist = ml;
441 	} else {
442 		mp->base_pa = 0;
443 		mp->nbytes = 0;
444 	}
445 	return (0);
446 }
447 
448 
449 struct drmach_hotcpu {
450 	drmach_board_t *bp;
451 	int	bnum;
452 	int	core_id;
453 	int 	rv;
454 	int	option;
455 };
456 
457 static int
458 drmach_cpu_cb(dev_info_t *dip, void *arg)
459 {
460 	struct drmach_hotcpu *p = (struct drmach_hotcpu *)arg;
461 	char name[OBP_MAXDRVNAME];
462 	int len = OBP_MAXDRVNAME;
463 	int bnum, core_id, strand_id;
464 	drmach_board_t *bp;
465 
466 	if (dip == ddi_root_node()) {
467 		return (DDI_WALK_CONTINUE);
468 	}
469 
470 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
471 	    DDI_PROP_DONTPASS, "name",
472 	    (caddr_t)name, &len) != DDI_PROP_SUCCESS) {
473 		return (DDI_WALK_PRUNECHILD);
474 	}
475 
476 	/* only cmp has board number */
477 	bnum = -1;
478 	len = sizeof (bnum);
479 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
480 	    DDI_PROP_DONTPASS, OBP_BOARDNUM,
481 	    (caddr_t)&bnum, &len) != DDI_PROP_SUCCESS) {
482 		bnum = -1;
483 	}
484 
485 	if (strcmp(name, "cmp") == 0) {
486 		if (bnum != p->bnum)
487 			return (DDI_WALK_PRUNECHILD);
488 		return (DDI_WALK_CONTINUE);
489 	}
490 	/* we have already pruned all unwanted cores and cpu's above */
491 	if (strcmp(name, "core") == 0) {
492 		return (DDI_WALK_CONTINUE);
493 	}
494 	if (strcmp(name, "cpu") == 0) {
495 		processorid_t cpuid;
496 		len = sizeof (cpuid);
497 		if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
498 		    DDI_PROP_DONTPASS, "cpuid",
499 		    (caddr_t)&cpuid, &len) != DDI_PROP_SUCCESS) {
500 			p->rv = -1;
501 			return (DDI_WALK_TERMINATE);
502 		}
503 
504 		core_id = p->core_id;
505 
506 		bnum = LSB_ID(cpuid);
507 
508 		if (ON_BOARD_CORE_NUM(cpuid) != core_id)
509 			return (DDI_WALK_CONTINUE);
510 
511 		bp = p->bp;
512 		ASSERT(bnum == bp->bnum);
513 
514 		if (p->option == HOTADD_CPU) {
515 			if (prom_hotaddcpu(cpuid) != 0) {
516 				p->rv = -1;
517 				return (DDI_WALK_TERMINATE);
518 			}
519 			strand_id = STRAND_ID(cpuid);
520 			bp->cores[core_id].core_hotadded |= (1 << strand_id);
521 		} else if (p->option == HOTREMOVE_CPU) {
522 			if (prom_hotremovecpu(cpuid) != 0) {
523 				p->rv = -1;
524 				return (DDI_WALK_TERMINATE);
525 			}
526 			strand_id = STRAND_ID(cpuid);
527 			bp->cores[core_id].core_hotadded &= ~(1 << strand_id);
528 		}
529 		return (DDI_WALK_CONTINUE);
530 	}
531 
532 	return (DDI_WALK_PRUNECHILD);
533 }
534 
535 
536 static int
537 drmach_add_remove_cpu(int bnum, int core_id, int option)
538 {
539 	struct drmach_hotcpu arg;
540 	drmach_board_t *bp;
541 
542 	bp = drmach_get_board_by_bnum(bnum);
543 	ASSERT(bp);
544 
545 	arg.bp = bp;
546 	arg.bnum = bnum;
547 	arg.core_id = core_id;
548 	arg.rv = 0;
549 	arg.option = option;
550 	ddi_walk_devs(ddi_root_node(), drmach_cpu_cb, (void *)&arg);
551 	return (arg.rv);
552 }
553 
554 struct drmach_setup_core_arg {
555 	drmach_board_t *bp;
556 };
557 
558 static int
559 drmach_setup_core_cb(dev_info_t *dip, void *arg)
560 {
561 	struct drmach_setup_core_arg *p = (struct drmach_setup_core_arg *)arg;
562 	char name[OBP_MAXDRVNAME];
563 	int len = OBP_MAXDRVNAME;
564 	int bnum;
565 	int core_id, strand_id;
566 
567 	if (dip == ddi_root_node()) {
568 		return (DDI_WALK_CONTINUE);
569 	}
570 
571 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
572 	    DDI_PROP_DONTPASS, "name",
573 	    (caddr_t)name, &len) != DDI_PROP_SUCCESS) {
574 		return (DDI_WALK_PRUNECHILD);
575 	}
576 
577 	/* only cmp has board number */
578 	bnum = -1;
579 	len = sizeof (bnum);
580 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
581 	    DDI_PROP_DONTPASS, OBP_BOARDNUM,
582 	    (caddr_t)&bnum, &len) != DDI_PROP_SUCCESS) {
583 		bnum = -1;
584 	}
585 
586 	if (strcmp(name, "cmp") == 0) {
587 		if (bnum != p->bp->bnum)
588 			return (DDI_WALK_PRUNECHILD);
589 		return (DDI_WALK_CONTINUE);
590 	}
591 	/* we have already pruned all unwanted cores and cpu's above */
592 	if (strcmp(name, "core") == 0) {
593 		return (DDI_WALK_CONTINUE);
594 	}
595 	if (strcmp(name, "cpu") == 0) {
596 		processorid_t cpuid;
597 		len = sizeof (cpuid);
598 		if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
599 		    DDI_PROP_DONTPASS, "cpuid",
600 		    (caddr_t)&cpuid, &len) != DDI_PROP_SUCCESS) {
601 			return (DDI_WALK_TERMINATE);
602 		}
603 		bnum = LSB_ID(cpuid);
604 		ASSERT(bnum == p->bp->bnum);
605 		core_id = ON_BOARD_CORE_NUM(cpuid);
606 		strand_id = STRAND_ID(cpuid);
607 		p->bp->cores[core_id].core_present |= (1 << strand_id);
608 		return (DDI_WALK_CONTINUE);
609 	}
610 
611 	return (DDI_WALK_PRUNECHILD);
612 }
613 
614 
615 static void
616 drmach_setup_core_info(drmach_board_t *obj)
617 {
618 	struct drmach_setup_core_arg arg;
619 	int i;
620 
621 	for (i = 0; i < OPL_MAX_COREID_PER_BOARD; i++) {
622 		obj->cores[i].core_present = 0;
623 		obj->cores[i].core_hotadded = 0;
624 		obj->cores[i].core_started = 0;
625 	}
626 	arg.bp = obj;
627 	ddi_walk_devs(ddi_root_node(), drmach_setup_core_cb, (void *)&arg);
628 
629 	for (i = 0; i < OPL_MAX_COREID_PER_BOARD; i++) {
630 		if (obj->boot_board) {
631 			obj->cores[i].core_hotadded =
632 			    obj->cores[i].core_started =
633 			    obj->cores[i].core_present;
634 		}
635 	}
636 }
637 
638 /*
639  * drmach_node_* routines serve the purpose of separating the
640  * rest of the code from the device tree and OBP.  This is necessary
641  * because of In-Kernel-Probing.  Devices probed after stod, are probed
642  * by the in-kernel-prober, not OBP.  These devices, therefore, do not
643  * have dnode ids.
644  */
645 
646 typedef struct {
647 	drmach_node_walk_args_t	*nwargs;
648 	int 			(*cb)(drmach_node_walk_args_t *args);
649 	int			err;
650 } drmach_node_ddi_walk_args_t;
651 
652 static int
653 drmach_node_ddi_walk_cb(dev_info_t *dip, void *arg)
654 {
655 	drmach_node_ddi_walk_args_t	*nargs;
656 
657 	nargs = (drmach_node_ddi_walk_args_t *)arg;
658 
659 	/*
660 	 * dip doesn't have to be held here as we are called
661 	 * from ddi_walk_devs() which holds the dip.
662 	 */
663 	nargs->nwargs->node->here = (void *)dip;
664 
665 	nargs->err = nargs->cb(nargs->nwargs);
666 
667 
668 	/*
669 	 * Set "here" to NULL so that unheld dip is not accessible
670 	 * outside ddi_walk_devs()
671 	 */
672 	nargs->nwargs->node->here = NULL;
673 
674 	if (nargs->err)
675 		return (DDI_WALK_TERMINATE);
676 	else
677 		return (DDI_WALK_CONTINUE);
678 }
679 
680 static int
681 drmach_node_ddi_walk(drmach_node_t *np, void *data,
682 		int (*cb)(drmach_node_walk_args_t *args))
683 {
684 	drmach_node_walk_args_t		args;
685 	drmach_node_ddi_walk_args_t	nargs;
686 
687 
688 	/* initialized args structure for callback */
689 	args.node = np;
690 	args.data = data;
691 
692 	nargs.nwargs = &args;
693 	nargs.cb = cb;
694 	nargs.err = 0;
695 
696 	/*
697 	 * Root node doesn't have to be held in any way.
698 	 */
699 	ddi_walk_devs(ddi_root_node(), drmach_node_ddi_walk_cb, (void *)&nargs);
700 
701 	return (nargs.err);
702 }
703 
704 static int
705 drmach_node_ddi_get_parent(drmach_node_t *np, drmach_node_t *pp)
706 {
707 	dev_info_t	*ndip;
708 	static char	*fn = "drmach_node_ddi_get_parent";
709 
710 	ndip = np->n_getdip(np);
711 	if (ndip == NULL) {
712 		cmn_err(CE_WARN, "%s: NULL dip", fn);
713 		return (-1);
714 	}
715 
716 	bcopy(np, pp, sizeof (drmach_node_t));
717 
718 	pp->here = (void *)ddi_get_parent(ndip);
719 	if (pp->here == NULL) {
720 		cmn_err(CE_WARN, "%s: NULL parent dip", fn);
721 		return (-1);
722 	}
723 
724 	return (0);
725 }
726 
727 /*ARGSUSED*/
728 static pnode_t
729 drmach_node_ddi_get_dnode(drmach_node_t *np)
730 {
731 	return ((pnode_t)NULL);
732 }
733 
734 static drmach_node_t *
735 drmach_node_new(void)
736 {
737 	drmach_node_t *np;
738 
739 	np = kmem_zalloc(sizeof (drmach_node_t), KM_SLEEP);
740 
741 	np->get_dnode = drmach_node_ddi_get_dnode;
742 	np->walk = drmach_node_ddi_walk;
743 	np->n_getdip = drmach_node_ddi_get_dip;
744 	np->n_getproplen = drmach_node_ddi_get_proplen;
745 	np->n_getprop = drmach_node_ddi_get_prop;
746 	np->get_parent = drmach_node_ddi_get_parent;
747 
748 	return (np);
749 }
750 
751 static void
752 drmach_node_dispose(drmach_node_t *np)
753 {
754 	kmem_free(np, sizeof (*np));
755 }
756 
757 static dev_info_t *
758 drmach_node_ddi_get_dip(drmach_node_t *np)
759 {
760 	return ((dev_info_t *)np->here);
761 }
762 
763 static int
764 drmach_node_walk(drmach_node_t *np, void *param,
765 		int (*cb)(drmach_node_walk_args_t *args))
766 {
767 	return (np->walk(np, param, cb));
768 }
769 
770 static int
771 drmach_node_ddi_get_prop(drmach_node_t *np, char *name, void *buf, int len)
772 {
773 	int		rv = 0;
774 	dev_info_t	*ndip;
775 	static char	*fn = "drmach_node_ddi_get_prop";
776 
777 
778 	ndip = np->n_getdip(np);
779 	if (ndip == NULL) {
780 		cmn_err(CE_WARN, "%s: NULL dip", fn);
781 		rv = -1;
782 	} else if (ddi_getlongprop_buf(DDI_DEV_T_ANY, ndip,
783 	    DDI_PROP_DONTPASS, name,
784 	    (caddr_t)buf, &len) != DDI_PROP_SUCCESS) {
785 		rv = -1;
786 	}
787 
788 	return (rv);
789 }
790 
791 static int
792 drmach_node_ddi_get_proplen(drmach_node_t *np, char *name, int *len)
793 {
794 	int		rv = 0;
795 	dev_info_t	*ndip;
796 
797 	ndip = np->n_getdip(np);
798 	if (ndip == NULL) {
799 		rv = -1;
800 	} else if (ddi_getproplen(DDI_DEV_T_ANY, ndip, DDI_PROP_DONTPASS, name,
801 	    len) != DDI_PROP_SUCCESS) {
802 		rv = -1;
803 	}
804 
805 	return (rv);
806 }
807 
808 static drmachid_t
809 drmach_node_dup(drmach_node_t *np)
810 {
811 	drmach_node_t *dup;
812 
813 	dup = drmach_node_new();
814 	dup->here = np->here;
815 	dup->get_dnode = np->get_dnode;
816 	dup->walk = np->walk;
817 	dup->n_getdip = np->n_getdip;
818 	dup->n_getproplen = np->n_getproplen;
819 	dup->n_getprop = np->n_getprop;
820 	dup->get_parent = np->get_parent;
821 
822 	return (dup);
823 }
824 
825 /*
826  * drmach_array provides convenient array construction, access,
827  * bounds checking and array destruction logic.
828  */
829 
830 static drmach_array_t *
831 drmach_array_new(int min_index, int max_index)
832 {
833 	drmach_array_t *arr;
834 
835 	arr = kmem_zalloc(sizeof (drmach_array_t), KM_SLEEP);
836 
837 	arr->arr_sz = (max_index - min_index + 1) * sizeof (void *);
838 	if (arr->arr_sz > 0) {
839 		arr->min_index = min_index;
840 		arr->max_index = max_index;
841 
842 		arr->arr = kmem_zalloc(arr->arr_sz, KM_SLEEP);
843 		return (arr);
844 	} else {
845 		kmem_free(arr, sizeof (*arr));
846 		return (0);
847 	}
848 }
849 
850 static int
851 drmach_array_set(drmach_array_t *arr, int idx, drmachid_t val)
852 {
853 	if (idx < arr->min_index || idx > arr->max_index)
854 		return (-1);
855 	else {
856 		arr->arr[idx - arr->min_index] = val;
857 		return (0);
858 	}
859 	/*NOTREACHED*/
860 }
861 
862 static int
863 drmach_array_get(drmach_array_t *arr, int idx, drmachid_t *val)
864 {
865 	if (idx < arr->min_index || idx > arr->max_index)
866 		return (-1);
867 	else {
868 		*val = arr->arr[idx - arr->min_index];
869 		return (0);
870 	}
871 	/*NOTREACHED*/
872 }
873 
874 static int
875 drmach_array_first(drmach_array_t *arr, int *idx, drmachid_t *val)
876 {
877 	int rv;
878 
879 	*idx = arr->min_index;
880 	while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
881 		*idx += 1;
882 
883 	return (rv);
884 }
885 
886 static int
887 drmach_array_next(drmach_array_t *arr, int *idx, drmachid_t *val)
888 {
889 	int rv;
890 
891 	*idx += 1;
892 	while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
893 		*idx += 1;
894 
895 	return (rv);
896 }
897 
898 static void
899 drmach_array_dispose(drmach_array_t *arr, void (*disposer)(drmachid_t))
900 {
901 	drmachid_t	val;
902 	int		idx;
903 	int		rv;
904 
905 	rv = drmach_array_first(arr, &idx, &val);
906 	while (rv == 0) {
907 		(*disposer)(val);
908 		rv = drmach_array_next(arr, &idx, &val);
909 	}
910 
911 	kmem_free(arr->arr, arr->arr_sz);
912 	kmem_free(arr, sizeof (*arr));
913 }
914 
915 static drmach_board_t *
916 drmach_get_board_by_bnum(int bnum)
917 {
918 	drmachid_t id;
919 
920 	if (drmach_array_get(drmach_boards, bnum, &id) == 0)
921 		return ((drmach_board_t *)id);
922 	else
923 		return (NULL);
924 }
925 
926 static pnode_t
927 drmach_node_get_dnode(drmach_node_t *np)
928 {
929 	return (np->get_dnode(np));
930 }
931 
932 /*ARGSUSED*/
933 sbd_error_t *
934 drmach_configure(drmachid_t id, int flags)
935 {
936 	drmach_device_t		*dp;
937 	sbd_error_t		*err = NULL;
938 	dev_info_t		*rdip;
939 	dev_info_t		*fdip = NULL;
940 
941 	if (DRMACH_IS_CPU_ID(id)) {
942 		return (NULL);
943 	}
944 	if (!DRMACH_IS_DEVICE_ID(id))
945 		return (drerr_new(0, EOPL_INAPPROP, NULL));
946 	dp = id;
947 	rdip = dp->node->n_getdip(dp->node);
948 
949 	ASSERT(rdip);
950 
951 	ASSERT(e_ddi_branch_held(rdip));
952 
953 	if (e_ddi_branch_configure(rdip, &fdip, 0) != 0) {
954 		char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
955 		dev_info_t *dip = (fdip != NULL) ? fdip : rdip;
956 
957 		(void) ddi_pathname(dip, path);
958 		err = drerr_new(1,  EOPL_DRVFAIL, path);
959 
960 		kmem_free(path, MAXPATHLEN);
961 
962 		/* If non-NULL, fdip is returned held and must be released */
963 		if (fdip != NULL)
964 			ddi_release_devi(fdip);
965 	}
966 
967 	return (err);
968 }
969 
970 
971 static sbd_error_t *
972 drmach_device_new(drmach_node_t *node,
973 	drmach_board_t *bp, int portid, drmachid_t *idp)
974 {
975 	int		 i;
976 	int		 rv;
977 	drmach_device_t	proto;
978 	sbd_error_t	*err;
979 	char		 name[OBP_MAXDRVNAME];
980 
981 	rv = node->n_getprop(node, "name", name, OBP_MAXDRVNAME);
982 	if (rv) {
983 		/* every node is expected to have a name */
984 		err = drerr_new(1, EOPL_GETPROP, "device node %s: property %s",
985 		    ddi_node_name(node->n_getdip(node)), "name");
986 		return (err);
987 	}
988 
989 	/*
990 	 * The node currently being examined is not listed in the name2type[]
991 	 * array.  In this case, the node is no interest to drmach.  Both
992 	 * dp and err are initialized here to yield nothing (no device or
993 	 * error structure) for this case.
994 	 */
995 	i = drmach_name2type_idx(name);
996 
997 
998 	if (i < 0) {
999 		*idp = (drmachid_t)0;
1000 		return (NULL);
1001 	}
1002 
1003 	/* device specific new function will set unum */
1004 
1005 	bzero(&proto, sizeof (proto));
1006 	proto.type = drmach_name2type[i].type;
1007 	proto.bp = bp;
1008 	proto.node = node;
1009 	proto.portid = portid;
1010 
1011 	return (drmach_name2type[i].new(&proto, idp));
1012 }
1013 
1014 static void
1015 drmach_device_dispose(drmachid_t id)
1016 {
1017 	drmach_device_t *self = id;
1018 
1019 	self->cm.dispose(id);
1020 }
1021 
1022 
1023 static drmach_board_t *
1024 drmach_board_new(int bnum, int boot_board)
1025 {
1026 	static sbd_error_t *drmach_board_release(drmachid_t);
1027 	static sbd_error_t *drmach_board_status(drmachid_t, drmach_status_t *);
1028 
1029 	drmach_board_t	*bp;
1030 
1031 	bp = kmem_zalloc(sizeof (drmach_board_t), KM_SLEEP);
1032 
1033 	bp->cm.isa = (void *)drmach_board_new;
1034 	bp->cm.release = drmach_board_release;
1035 	bp->cm.status = drmach_board_status;
1036 
1037 	(void) drmach_board_name(bnum, bp->cm.name, sizeof (bp->cm.name));
1038 
1039 	bp->bnum = bnum;
1040 	bp->devices = NULL;
1041 	bp->connected = boot_board;
1042 	bp->tree = drmach_node_new();
1043 	bp->assigned = boot_board;
1044 	bp->powered = boot_board;
1045 	bp->boot_board = boot_board;
1046 
1047 	/*
1048 	 * If this is not bootup initialization, we have to wait till
1049 	 * IKP sets up the device nodes in drmach_board_connect().
1050 	 */
1051 	if (boot_board)
1052 		drmach_setup_core_info(bp);
1053 
1054 	drmach_array_set(drmach_boards, bnum, bp);
1055 	return (bp);
1056 }
1057 
1058 static void
1059 drmach_board_dispose(drmachid_t id)
1060 {
1061 	drmach_board_t *bp;
1062 
1063 	ASSERT(DRMACH_IS_BOARD_ID(id));
1064 	bp = id;
1065 
1066 	if (bp->tree)
1067 		drmach_node_dispose(bp->tree);
1068 
1069 	if (bp->devices)
1070 		drmach_array_dispose(bp->devices, drmach_device_dispose);
1071 
1072 	kmem_free(bp, sizeof (*bp));
1073 }
1074 
1075 static sbd_error_t *
1076 drmach_board_status(drmachid_t id, drmach_status_t *stat)
1077 {
1078 	sbd_error_t	*err = NULL;
1079 	drmach_board_t	*bp;
1080 
1081 	if (!DRMACH_IS_BOARD_ID(id))
1082 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1083 	bp = id;
1084 
1085 	stat->assigned = bp->assigned;
1086 	stat->powered = bp->powered;
1087 	stat->busy = 0;			/* assume not busy */
1088 	stat->configured = 0;		/* assume not configured */
1089 	stat->empty = 0;
1090 	stat->cond = bp->cond = SBD_COND_OK;
1091 	strncpy(stat->type, "System Brd", sizeof (stat->type));
1092 	stat->info[0] = '\0';
1093 
1094 	if (bp->devices) {
1095 		int		 rv;
1096 		int		 d_idx;
1097 		drmachid_t	 d_id;
1098 
1099 		rv = drmach_array_first(bp->devices, &d_idx, &d_id);
1100 		while (rv == 0) {
1101 			drmach_status_t	d_stat;
1102 
1103 			err = drmach_i_status(d_id, &d_stat);
1104 			if (err)
1105 				break;
1106 
1107 			stat->busy |= d_stat.busy;
1108 			stat->configured |= d_stat.configured;
1109 
1110 			rv = drmach_array_next(bp->devices, &d_idx, &d_id);
1111 		}
1112 	}
1113 
1114 	return (err);
1115 }
1116 
1117 int
1118 drmach_board_is_floating(drmachid_t id)
1119 {
1120 	drmach_board_t *bp;
1121 
1122 	if (!DRMACH_IS_BOARD_ID(id))
1123 		return (0);
1124 
1125 	bp = (drmach_board_t *)id;
1126 
1127 	return ((drmach_domain.floating & (1 << bp->bnum)) ? 1 : 0);
1128 }
1129 
1130 static int
1131 drmach_init(void)
1132 {
1133 	dev_info_t	*rdip;
1134 	int		i, rv, len;
1135 	int		*floating;
1136 
1137 	rw_init(&drmach_boards_rwlock, NULL, RW_DEFAULT, NULL);
1138 
1139 	drmach_boards = drmach_array_new(0, MAX_BOARDS - 1);
1140 
1141 	rdip = ddi_root_node();
1142 
1143 	if (ddi_getproplen(DDI_DEV_T_ANY, rdip, DDI_PROP_DONTPASS,
1144 	    "floating-boards", &len) != DDI_PROP_SUCCESS) {
1145 		cmn_err(CE_WARN, "Cannot get floating-boards proplen\n");
1146 	} else {
1147 		floating = (int *)kmem_alloc(len, KM_SLEEP);
1148 		rv = ddi_prop_op(DDI_DEV_T_ANY, rdip, PROP_LEN_AND_VAL_BUF,
1149 		    DDI_PROP_DONTPASS, "floating-boards", (caddr_t)floating,
1150 		    &len);
1151 		if (rv != DDI_PROP_SUCCESS) {
1152 			cmn_err(CE_WARN, "Cannot get floating-boards prop\n");
1153 		} else {
1154 			drmach_domain.floating = 0;
1155 			for (i = 0; i < len / sizeof (int); i++) {
1156 				drmach_domain.floating |= (1 << floating[i]);
1157 			}
1158 		}
1159 		kmem_free(floating, len);
1160 	}
1161 	drmach_domain.allow_dr = opl_check_dr_status();
1162 
1163 	rdip = ddi_get_child(ddi_root_node());
1164 	do {
1165 		int		 bnum;
1166 		drmachid_t	 id;
1167 
1168 		bnum = -1;
1169 		bnum = ddi_getprop(DDI_DEV_T_ANY, rdip, DDI_PROP_DONTPASS,
1170 		    OBP_BOARDNUM, -1);
1171 		if (bnum == -1)
1172 			continue;
1173 
1174 		if (drmach_array_get(drmach_boards, bnum, &id) == -1) {
1175 			cmn_err(CE_WARN, "Device node 0x%p has invalid "
1176 			    "property value, %s=%d", rdip, OBP_BOARDNUM, bnum);
1177 			goto error;
1178 		} else if (id == NULL) {
1179 			(void) drmach_board_new(bnum, 1);
1180 		}
1181 	} while ((rdip = ddi_get_next_sibling(rdip)) != NULL);
1182 
1183 	opl_hold_devtree();
1184 
1185 	/*
1186 	 * Initialize the IKP feature.
1187 	 *
1188 	 * This can be done only after DR has acquired a hold on all the
1189 	 * device nodes that are interesting to IKP.
1190 	 */
1191 	if (opl_init_cfg() != 0) {
1192 		cmn_err(CE_WARN, "DR - IKP initialization failed");
1193 
1194 		opl_release_devtree();
1195 
1196 		goto error;
1197 	}
1198 
1199 	return (0);
1200 error:
1201 	drmach_array_dispose(drmach_boards, drmach_board_dispose);
1202 	rw_destroy(&drmach_boards_rwlock);
1203 	return (ENXIO);
1204 }
1205 
1206 static void
1207 drmach_fini(void)
1208 {
1209 	rw_enter(&drmach_boards_rwlock, RW_WRITER);
1210 	drmach_array_dispose(drmach_boards, drmach_board_dispose);
1211 	drmach_boards = NULL;
1212 	rw_exit(&drmach_boards_rwlock);
1213 
1214 	/*
1215 	 * Walk immediate children of the root devinfo node
1216 	 * releasing holds acquired on branches in drmach_init()
1217 	 */
1218 
1219 	opl_release_devtree();
1220 
1221 	rw_destroy(&drmach_boards_rwlock);
1222 }
1223 
1224 /*
1225  *	Each system board contains 2 Oberon PCI bridge and
1226  *	1 CMUCH.
1227  *	Each oberon has 2 channels.
1228  *	Each channel has 2 pci-ex leaf.
1229  *	Each CMUCH has 1 pci bus.
1230  *
1231  *
1232  *	Device Path:
1233  *	/pci@<portid>,reg
1234  *
1235  *	where
1236  *	portid[10] = 0
1237  *	portid[9:0] = LLEAF_ID[9:0] of the Oberon Channel
1238  *
1239  *	LLEAF_ID[9:8] = 0
1240  *	LLEAF_ID[8:4] = LSB_ID[4:0]
1241  *	LLEAF_ID[3:1] = IO Channel#[2:0] (0,1,2,3 for Oberon)
1242  *			channel 4 is pcicmu
1243  *	LLEAF_ID[0] = PCI Leaf Number (0 for leaf-A, 1 for leaf-B)
1244  *
1245  *	Properties:
1246  *	name = pci
1247  *	device_type = "pciex"
1248  *	board# = LSBID
1249  *	reg = int32 * 2, Oberon CSR space of the leaf and the UBC space
1250  *	portid = Jupiter Bus Device ID ((LSB_ID << 3)|pciport#)
1251  */
1252 
1253 static sbd_error_t *
1254 drmach_io_new(drmach_device_t *proto, drmachid_t *idp)
1255 {
1256 	drmach_io_t	*ip;
1257 
1258 	int		 portid;
1259 
1260 	portid = proto->portid;
1261 	ASSERT(portid != -1);
1262 	proto->unum = portid & (MAX_IO_UNITS_PER_BOARD - 1);
1263 
1264 	ip = kmem_zalloc(sizeof (drmach_io_t), KM_SLEEP);
1265 	bcopy(proto, &ip->dev, sizeof (ip->dev));
1266 	ip->dev.node = drmach_node_dup(proto->node);
1267 	ip->dev.cm.isa = (void *)drmach_io_new;
1268 	ip->dev.cm.dispose = drmach_io_dispose;
1269 	ip->dev.cm.release = drmach_io_release;
1270 	ip->dev.cm.status = drmach_io_status;
1271 	ip->channel = (portid >> 1) & 0x7;
1272 	ip->leaf = (portid & 0x1);
1273 
1274 	snprintf(ip->dev.cm.name, sizeof (ip->dev.cm.name), "%s%d",
1275 	    ip->dev.type, ip->dev.unum);
1276 
1277 	*idp = (drmachid_t)ip;
1278 	return (NULL);
1279 }
1280 
1281 
1282 static void
1283 drmach_io_dispose(drmachid_t id)
1284 {
1285 	drmach_io_t *self;
1286 
1287 	ASSERT(DRMACH_IS_IO_ID(id));
1288 
1289 	self = id;
1290 	if (self->dev.node)
1291 		drmach_node_dispose(self->dev.node);
1292 
1293 	kmem_free(self, sizeof (*self));
1294 }
1295 
1296 /*ARGSUSED*/
1297 sbd_error_t *
1298 drmach_pre_op(int cmd, drmachid_t id, drmach_opts_t *opts)
1299 {
1300 	drmach_board_t	*bp = (drmach_board_t *)id;
1301 	sbd_error_t	*err = NULL;
1302 
1303 	/* allow status and ncm operations to always succeed */
1304 	if ((cmd == SBD_CMD_STATUS) || (cmd == SBD_CMD_GETNCM)) {
1305 		return (NULL);
1306 	}
1307 
1308 	/* check all other commands for the required option string */
1309 
1310 	if ((opts->size > 0) && (opts->copts != NULL)) {
1311 
1312 		DRMACH_PR("platform options: %s\n", opts->copts);
1313 
1314 		if (strstr(opts->copts, "opldr") == NULL) {
1315 			err = drerr_new(1, EOPL_SUPPORT, NULL);
1316 		}
1317 	} else {
1318 		err = drerr_new(1, EOPL_SUPPORT, NULL);
1319 	}
1320 
1321 	if (!err && id && DRMACH_IS_BOARD_ID(id)) {
1322 		switch (cmd) {
1323 			case SBD_CMD_TEST:
1324 			case SBD_CMD_STATUS:
1325 			case SBD_CMD_GETNCM:
1326 				break;
1327 			case SBD_CMD_CONNECT:
1328 				if (bp->connected)
1329 					err = drerr_new(0, ESBD_STATE, NULL);
1330 				else if (!drmach_domain.allow_dr)
1331 					err = drerr_new(1, EOPL_SUPPORT, NULL);
1332 				break;
1333 			case SBD_CMD_DISCONNECT:
1334 				if (!bp->connected)
1335 					err = drerr_new(0, ESBD_STATE, NULL);
1336 				else if (!drmach_domain.allow_dr)
1337 					err = drerr_new(1, EOPL_SUPPORT, NULL);
1338 				break;
1339 			default:
1340 				if (!drmach_domain.allow_dr)
1341 					err = drerr_new(1, EOPL_SUPPORT, NULL);
1342 				break;
1343 
1344 		}
1345 	}
1346 
1347 	return (err);
1348 }
1349 
1350 /*ARGSUSED*/
1351 sbd_error_t *
1352 drmach_post_op(int cmd, drmachid_t id, drmach_opts_t *opts)
1353 {
1354 	return (NULL);
1355 }
1356 
1357 sbd_error_t *
1358 drmach_board_assign(int bnum, drmachid_t *id)
1359 {
1360 	sbd_error_t	*err = NULL;
1361 
1362 	rw_enter(&drmach_boards_rwlock, RW_WRITER);
1363 
1364 	if (drmach_array_get(drmach_boards, bnum, id) == -1) {
1365 		err = drerr_new(1, EOPL_BNUM, "%d", bnum);
1366 	} else {
1367 		drmach_board_t	*bp;
1368 
1369 		if (*id)
1370 			rw_downgrade(&drmach_boards_rwlock);
1371 
1372 		bp = *id;
1373 		if (!(*id))
1374 			bp = *id  =
1375 			    (drmachid_t)drmach_board_new(bnum, 0);
1376 		bp->assigned = 1;
1377 	}
1378 
1379 	rw_exit(&drmach_boards_rwlock);
1380 
1381 	return (err);
1382 }
1383 
1384 /*ARGSUSED*/
1385 sbd_error_t *
1386 drmach_board_connect(drmachid_t id, drmach_opts_t *opts)
1387 {
1388 	extern int	cpu_alljupiter;
1389 	drmach_board_t	*obj = (drmach_board_t *)id;
1390 	unsigned	cpu_impl;
1391 
1392 	if (!DRMACH_IS_BOARD_ID(id))
1393 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1394 
1395 	if (opl_probe_sb(obj->bnum, &cpu_impl) != 0)
1396 		return (drerr_new(1, EOPL_PROBE, NULL));
1397 
1398 	if (cpu_alljupiter) {
1399 		if (cpu_impl & (1 << OLYMPUS_C_IMPL))
1400 			return (drerr_new(1, EOPL_MIXED_CPU, NULL));
1401 	}
1402 
1403 	(void) prom_attach_notice(obj->bnum);
1404 
1405 	drmach_setup_core_info(obj);
1406 
1407 	obj->connected = 1;
1408 
1409 	return (NULL);
1410 }
1411 
1412 static int drmach_cache_flush_flag[NCPU];
1413 
1414 /*ARGSUSED*/
1415 static void
1416 drmach_flush_cache(uint64_t id, uint64_t dummy)
1417 {
1418 	extern void cpu_flush_ecache(void);
1419 
1420 	cpu_flush_ecache();
1421 	drmach_cache_flush_flag[id] = 0;
1422 }
1423 
1424 static void
1425 drmach_flush_all()
1426 {
1427 	cpuset_t	xc_cpuset;
1428 	int		i;
1429 
1430 	xc_cpuset = cpu_ready_set;
1431 	for (i = 0; i < NCPU; i++) {
1432 		if (CPU_IN_SET(xc_cpuset, i)) {
1433 			drmach_cache_flush_flag[i] = 1;
1434 			xc_one(i, drmach_flush_cache, i, 0);
1435 			while (drmach_cache_flush_flag[i]) {
1436 				DELAY(1000);
1437 			}
1438 		}
1439 	}
1440 }
1441 
1442 static int
1443 drmach_disconnect_cpus(drmach_board_t *bp)
1444 {
1445 	int i, bnum;
1446 
1447 	bnum = bp->bnum;
1448 
1449 	for (i = 0; i < OPL_MAX_COREID_PER_BOARD; i++) {
1450 		if (bp->cores[i].core_present) {
1451 			if (bp->cores[i].core_started)
1452 				return (-1);
1453 			if (bp->cores[i].core_hotadded) {
1454 				if (drmach_add_remove_cpu(bnum, i,
1455 				    HOTREMOVE_CPU)) {
1456 					cmn_err(CE_WARN, "Failed to remove "
1457 					    "CMP %d on board %d\n", i, bnum);
1458 					return (-1);
1459 				}
1460 			}
1461 		}
1462 	}
1463 	return (0);
1464 }
1465 
1466 /*ARGSUSED*/
1467 sbd_error_t *
1468 drmach_board_disconnect(drmachid_t id, drmach_opts_t *opts)
1469 {
1470 	drmach_board_t *obj;
1471 	int rv = 0;
1472 	sbd_error_t		*err = NULL;
1473 
1474 	if (DRMACH_NULL_ID(id))
1475 		return (NULL);
1476 
1477 	if (!DRMACH_IS_BOARD_ID(id))
1478 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1479 
1480 	obj = (drmach_board_t *)id;
1481 
1482 	if (drmach_disconnect_cpus(obj)) {
1483 		err = drerr_new(1, EOPL_DEPROBE, obj->cm.name);
1484 		return (err);
1485 	}
1486 
1487 	rv = opl_unprobe_sb(obj->bnum);
1488 
1489 	if (rv == 0) {
1490 		prom_detach_notice(obj->bnum);
1491 		obj->connected = 0;
1492 
1493 	} else
1494 		err = drerr_new(1, EOPL_DEPROBE, obj->cm.name);
1495 
1496 	return (err);
1497 }
1498 
1499 static int
1500 drmach_get_portid(drmach_node_t *np)
1501 {
1502 	int		portid;
1503 	char		type[OBP_MAXPROPNAME];
1504 
1505 	if (np->n_getprop(np, "portid", &portid, sizeof (portid)) == 0)
1506 		return (portid);
1507 
1508 	/*
1509 	 * Get the device_type property to see if we should
1510 	 * continue processing this node.
1511 	 */
1512 	if (np->n_getprop(np, "device_type", &type, sizeof (type)) != 0)
1513 		return (-1);
1514 
1515 	if (strcmp(type, OPL_CPU_NODE) == 0) {
1516 		/*
1517 		 * We return cpuid because it has no portid
1518 		 */
1519 		if (np->n_getprop(np, "cpuid", &portid, sizeof (portid)) == 0)
1520 			return (portid);
1521 	}
1522 
1523 	return (-1);
1524 }
1525 
1526 /*
1527  * This is a helper function to determine if a given
1528  * node should be considered for a dr operation according
1529  * to predefined dr type nodes and the node's name.
1530  * Formal Parameter : The name of a device node.
1531  * Return Value: -1, name does not map to a valid dr type.
1532  *		 A value greater or equal to 0, name is a valid dr type.
1533  */
1534 static int
1535 drmach_name2type_idx(char *name)
1536 {
1537 	int 	index, ntypes;
1538 
1539 	if (name == NULL)
1540 		return (-1);
1541 
1542 	/*
1543 	 * Determine how many possible types are currently supported
1544 	 * for dr.
1545 	 */
1546 	ntypes = sizeof (drmach_name2type) / sizeof (drmach_name2type[0]);
1547 
1548 	/* Determine if the node's name correspond to a predefined type. */
1549 	for (index = 0; index < ntypes; index++) {
1550 		if (strcmp(drmach_name2type[index].name, name) == 0)
1551 			/* The node is an allowed type for dr. */
1552 			return (index);
1553 	}
1554 
1555 	/*
1556 	 * If the name of the node does not map to any of the
1557 	 * types in the array drmach_name2type then the node is not of
1558 	 * interest to dr.
1559 	 */
1560 	return (-1);
1561 }
1562 
1563 /*
1564  * there is some complication on OPL:
1565  * - pseudo-mc nodes do not have portid property
1566  * - portid[9:5] of cmp node is LSB #, portid[7:3] of pci is LSB#
1567  * - cmp has board#
1568  * - core and cpu nodes do not have portid and board# properties
1569  * starcat uses portid to derive the board# but that does not work
1570  * for us.  starfire reads board# property to filter the devices.
1571  * That does not work either.  So for these specific device,
1572  * we use specific hard coded methods to get the board# -
1573  * cpu: LSB# = CPUID[9:5]
1574  */
1575 
1576 static int
1577 drmach_board_find_devices_cb(drmach_node_walk_args_t *args)
1578 {
1579 	drmach_node_t			*node = args->node;
1580 	drmach_board_cb_data_t		*data = args->data;
1581 	drmach_board_t			*obj = data->obj;
1582 
1583 	int		rv, portid;
1584 	int		bnum;
1585 	drmachid_t	id;
1586 	drmach_device_t	*device;
1587 	char name[OBP_MAXDRVNAME];
1588 
1589 	portid = drmach_get_portid(node);
1590 	/*
1591 	 * core, cpu and pseudo-mc do not have portid
1592 	 * we use cpuid as the portid of the cpu node
1593 	 * for pseudo-mc, we do not use portid info.
1594 	 */
1595 
1596 	rv = node->n_getprop(node, "name", name, OBP_MAXDRVNAME);
1597 	if (rv)
1598 		return (0);
1599 
1600 
1601 	rv = node->n_getprop(node, OBP_BOARDNUM, &bnum, sizeof (bnum));
1602 
1603 	if (rv) {
1604 		/*
1605 		 * cpu does not have board# property.  We use
1606 		 * CPUID[9:5]
1607 		 */
1608 		if (strcmp("cpu", name) == 0) {
1609 			bnum = (portid >> 5) & 0x1f;
1610 		} else
1611 			return (0);
1612 	}
1613 
1614 
1615 	if (bnum != obj->bnum)
1616 		return (0);
1617 
1618 	if (drmach_name2type_idx(name) < 0) {
1619 		return (0);
1620 	}
1621 
1622 	/*
1623 	 * Create a device data structure from this node data.
1624 	 * The call may yield nothing if the node is not of interest
1625 	 * to drmach.
1626 	 */
1627 	data->err = drmach_device_new(node, obj, portid, &id);
1628 	if (data->err)
1629 		return (-1);
1630 	else if (!id) {
1631 		/*
1632 		 * drmach_device_new examined the node we passed in
1633 		 * and determined that it was one not of interest to
1634 		 * drmach.  So, it is skipped.
1635 		 */
1636 		return (0);
1637 	}
1638 
1639 	rv = drmach_array_set(obj->devices, data->ndevs++, id);
1640 	if (rv) {
1641 		data->err = DRMACH_INTERNAL_ERROR();
1642 		return (-1);
1643 	}
1644 	device = id;
1645 
1646 	data->err = (*data->found)(data->a, device->type, device->unum, id);
1647 	return (data->err == NULL ? 0 : -1);
1648 }
1649 
1650 sbd_error_t *
1651 drmach_board_find_devices(drmachid_t id, void *a,
1652 	sbd_error_t *(*found)(void *a, const char *, int, drmachid_t))
1653 {
1654 	drmach_board_t		*bp = (drmach_board_t *)id;
1655 	sbd_error_t		*err;
1656 	int			 max_devices;
1657 	int			 rv;
1658 	drmach_board_cb_data_t	data;
1659 
1660 
1661 	if (!DRMACH_IS_BOARD_ID(id))
1662 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1663 
1664 	max_devices  = MAX_CPU_UNITS_PER_BOARD;
1665 	max_devices += MAX_MEM_UNITS_PER_BOARD;
1666 	max_devices += MAX_IO_UNITS_PER_BOARD;
1667 
1668 	bp->devices = drmach_array_new(0, max_devices);
1669 
1670 	if (bp->tree == NULL)
1671 		bp->tree = drmach_node_new();
1672 
1673 	data.obj = bp;
1674 	data.ndevs = 0;
1675 	data.found = found;
1676 	data.a = a;
1677 	data.err = NULL;
1678 
1679 	rv = drmach_node_walk(bp->tree, &data, drmach_board_find_devices_cb);
1680 	if (rv == 0)
1681 		err = NULL;
1682 	else {
1683 		drmach_array_dispose(bp->devices, drmach_device_dispose);
1684 		bp->devices = NULL;
1685 
1686 		if (data.err)
1687 			err = data.err;
1688 		else
1689 			err = DRMACH_INTERNAL_ERROR();
1690 	}
1691 
1692 	return (err);
1693 }
1694 
1695 int
1696 drmach_board_lookup(int bnum, drmachid_t *id)
1697 {
1698 	int	rv = 0;
1699 
1700 	rw_enter(&drmach_boards_rwlock, RW_READER);
1701 	if (drmach_array_get(drmach_boards, bnum, id)) {
1702 		*id = 0;
1703 		rv = -1;
1704 	}
1705 	rw_exit(&drmach_boards_rwlock);
1706 	return (rv);
1707 }
1708 
1709 sbd_error_t *
1710 drmach_board_name(int bnum, char *buf, int buflen)
1711 {
1712 	snprintf(buf, buflen, "SB%d", bnum);
1713 	return (NULL);
1714 }
1715 
1716 sbd_error_t *
1717 drmach_board_poweroff(drmachid_t id)
1718 {
1719 	drmach_board_t	*bp;
1720 	sbd_error_t	*err;
1721 	drmach_status_t	 stat;
1722 
1723 	if (DRMACH_NULL_ID(id))
1724 		return (NULL);
1725 
1726 	if (!DRMACH_IS_BOARD_ID(id))
1727 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1728 	bp = id;
1729 
1730 	err = drmach_board_status(id, &stat);
1731 
1732 	if (!err) {
1733 		if (stat.configured || stat.busy)
1734 			err = drerr_new(0, EOPL_CONFIGBUSY, bp->cm.name);
1735 		else {
1736 			bp->powered = 0;
1737 		}
1738 	}
1739 	return (err);
1740 }
1741 
1742 sbd_error_t *
1743 drmach_board_poweron(drmachid_t id)
1744 {
1745 	drmach_board_t	*bp;
1746 
1747 	if (!DRMACH_IS_BOARD_ID(id))
1748 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1749 	bp = id;
1750 
1751 	bp->powered = 1;
1752 
1753 	return (NULL);
1754 }
1755 
1756 static sbd_error_t *
1757 drmach_board_release(drmachid_t id)
1758 {
1759 	if (!DRMACH_IS_BOARD_ID(id))
1760 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1761 	return (NULL);
1762 }
1763 
1764 /*ARGSUSED*/
1765 sbd_error_t *
1766 drmach_board_test(drmachid_t id, drmach_opts_t *opts, int force)
1767 {
1768 	return (NULL);
1769 }
1770 
1771 sbd_error_t *
1772 drmach_board_unassign(drmachid_t id)
1773 {
1774 	drmach_board_t	*bp;
1775 	sbd_error_t	*err;
1776 	drmach_status_t	 stat;
1777 
1778 	if (DRMACH_NULL_ID(id))
1779 		return (NULL);
1780 
1781 	if (!DRMACH_IS_BOARD_ID(id)) {
1782 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1783 	}
1784 	bp = id;
1785 
1786 	rw_enter(&drmach_boards_rwlock, RW_WRITER);
1787 
1788 	err = drmach_board_status(id, &stat);
1789 	if (err) {
1790 		rw_exit(&drmach_boards_rwlock);
1791 		return (err);
1792 	}
1793 	if (stat.configured || stat.busy) {
1794 		err = drerr_new(0, EOPL_CONFIGBUSY, bp->cm.name);
1795 	} else {
1796 		if (drmach_array_set(drmach_boards, bp->bnum, 0) != 0)
1797 			err = DRMACH_INTERNAL_ERROR();
1798 		else
1799 			drmach_board_dispose(bp);
1800 	}
1801 	rw_exit(&drmach_boards_rwlock);
1802 	return (err);
1803 }
1804 
1805 /*
1806  * We have to do more on OPL - e.g. set up sram tte, read cpuid, strand id,
1807  * implementation #, etc
1808  */
1809 
1810 static sbd_error_t *
1811 drmach_cpu_new(drmach_device_t *proto, drmachid_t *idp)
1812 {
1813 	static void drmach_cpu_dispose(drmachid_t);
1814 	static sbd_error_t *drmach_cpu_release(drmachid_t);
1815 	static sbd_error_t *drmach_cpu_status(drmachid_t, drmach_status_t *);
1816 
1817 	int		 portid;
1818 	drmach_cpu_t	*cp = NULL;
1819 
1820 	/* portid is CPUID of the node */
1821 	portid = proto->portid;
1822 	ASSERT(portid != -1);
1823 
1824 	/* unum = (CMP/CHIP ID) + (ON_BOARD_CORE_NUM * MAX_CMPID_PER_BOARD) */
1825 	proto->unum = ((portid/OPL_MAX_CPUID_PER_CMP) &
1826 	    (OPL_MAX_CMPID_PER_BOARD - 1)) +
1827 	    ((portid & (OPL_MAX_CPUID_PER_CMP - 1)) *
1828 	    (OPL_MAX_CMPID_PER_BOARD));
1829 
1830 	cp = kmem_zalloc(sizeof (drmach_cpu_t), KM_SLEEP);
1831 	bcopy(proto, &cp->dev, sizeof (cp->dev));
1832 	cp->dev.node = drmach_node_dup(proto->node);
1833 	cp->dev.cm.isa = (void *)drmach_cpu_new;
1834 	cp->dev.cm.dispose = drmach_cpu_dispose;
1835 	cp->dev.cm.release = drmach_cpu_release;
1836 	cp->dev.cm.status = drmach_cpu_status;
1837 
1838 	snprintf(cp->dev.cm.name, sizeof (cp->dev.cm.name), "%s%d",
1839 	    cp->dev.type, cp->dev.unum);
1840 
1841 /*
1842  *	CPU ID representation
1843  *	CPUID[9:5] = SB#
1844  *	CPUID[4:3] = Chip#
1845  *	CPUID[2:1] = Core# (Only 2 core for OPL)
1846  *	CPUID[0:0] = Strand#
1847  */
1848 
1849 /*
1850  *	reg property of the strand contains strand ID
1851  *	reg property of the parent node contains core ID
1852  *	We should use them.
1853  */
1854 	cp->cpuid = portid;
1855 	cp->sb = (portid >> 5) & 0x1f;
1856 	cp->chipid = (portid >> 3) & 0x3;
1857 	cp->coreid = (portid >> 1) & 0x3;
1858 	cp->strandid = portid & 0x1;
1859 
1860 	*idp = (drmachid_t)cp;
1861 	return (NULL);
1862 }
1863 
1864 
1865 static void
1866 drmach_cpu_dispose(drmachid_t id)
1867 {
1868 	drmach_cpu_t	*self;
1869 
1870 	ASSERT(DRMACH_IS_CPU_ID(id));
1871 
1872 	self = id;
1873 	if (self->dev.node)
1874 		drmach_node_dispose(self->dev.node);
1875 
1876 	kmem_free(self, sizeof (*self));
1877 }
1878 
1879 static int
1880 drmach_cpu_start(struct cpu *cp)
1881 {
1882 	int		cpuid = cp->cpu_id;
1883 	extern int	restart_other_cpu(int);
1884 
1885 	ASSERT(MUTEX_HELD(&cpu_lock));
1886 	ASSERT(cpunodes[cpuid].nodeid != (pnode_t)0);
1887 
1888 	cp->cpu_flags &= ~CPU_POWEROFF;
1889 
1890 	/*
1891 	 * NOTE: restart_other_cpu pauses cpus during the
1892 	 *	 slave cpu start.  This helps to quiesce the
1893 	 *	 bus traffic a bit which makes the tick sync
1894 	 *	 routine in the prom more robust.
1895 	 */
1896 	DRMACH_PR("COLD START for cpu (%d)\n", cpuid);
1897 
1898 	restart_other_cpu(cpuid);
1899 
1900 	return (0);
1901 }
1902 
1903 static sbd_error_t *
1904 drmach_cpu_release(drmachid_t id)
1905 {
1906 	if (!DRMACH_IS_CPU_ID(id))
1907 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1908 
1909 	return (NULL);
1910 }
1911 
1912 static sbd_error_t *
1913 drmach_cpu_status(drmachid_t id, drmach_status_t *stat)
1914 {
1915 	drmach_cpu_t *cp;
1916 	drmach_device_t *dp;
1917 
1918 	ASSERT(DRMACH_IS_CPU_ID(id));
1919 	cp = (drmach_cpu_t *)id;
1920 	dp = &cp->dev;
1921 
1922 	stat->assigned = dp->bp->assigned;
1923 	stat->powered = dp->bp->powered;
1924 	mutex_enter(&cpu_lock);
1925 	stat->configured = (cpu_get(cp->cpuid) != NULL);
1926 	mutex_exit(&cpu_lock);
1927 	stat->busy = dp->busy;
1928 	strncpy(stat->type, dp->type, sizeof (stat->type));
1929 	stat->info[0] = '\0';
1930 
1931 	return (NULL);
1932 }
1933 
1934 sbd_error_t *
1935 drmach_cpu_disconnect(drmachid_t id)
1936 {
1937 
1938 	if (!DRMACH_IS_CPU_ID(id))
1939 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1940 
1941 	return (NULL);
1942 }
1943 
1944 sbd_error_t *
1945 drmach_cpu_get_id(drmachid_t id, processorid_t *cpuid)
1946 {
1947 	drmach_cpu_t *cpu;
1948 
1949 	if (!DRMACH_IS_CPU_ID(id))
1950 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1951 	cpu = (drmach_cpu_t *)id;
1952 
1953 	/* get from cpu directly on OPL */
1954 	*cpuid = cpu->cpuid;
1955 	return (NULL);
1956 }
1957 
1958 sbd_error_t *
1959 drmach_cpu_get_impl(drmachid_t id, int *ip)
1960 {
1961 	drmach_device_t *cpu;
1962 	drmach_node_t	*np;
1963 	drmach_node_t	pp;
1964 	int		impl;
1965 	char		type[OBP_MAXPROPNAME];
1966 
1967 	if (!DRMACH_IS_CPU_ID(id))
1968 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1969 
1970 	cpu = id;
1971 	np = cpu->node;
1972 
1973 	if (np->get_parent(np, &pp) != 0) {
1974 		return (DRMACH_INTERNAL_ERROR());
1975 	}
1976 
1977 	/* the parent should be core */
1978 
1979 	if (pp.n_getprop(&pp, "device_type", &type, sizeof (type)) != 0) {
1980 		return (drerr_new(0, EOPL_GETPROP, NULL));
1981 	}
1982 
1983 	if (strcmp(type, OPL_CORE_NODE) == 0) {
1984 		if (pp.n_getprop(&pp, "implementation#", &impl,
1985 		    sizeof (impl)) != 0) {
1986 			return (drerr_new(0, EOPL_GETPROP, NULL));
1987 		}
1988 	} else {
1989 		return (DRMACH_INTERNAL_ERROR());
1990 	}
1991 
1992 	*ip = impl;
1993 
1994 	return (NULL);
1995 }
1996 
1997 sbd_error_t *
1998 drmach_get_dip(drmachid_t id, dev_info_t **dip)
1999 {
2000 	drmach_device_t	*dp;
2001 
2002 	if (!DRMACH_IS_DEVICE_ID(id))
2003 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2004 	dp = id;
2005 
2006 	*dip = dp->node->n_getdip(dp->node);
2007 	return (NULL);
2008 }
2009 
2010 sbd_error_t *
2011 drmach_io_is_attached(drmachid_t id, int *yes)
2012 {
2013 	drmach_device_t *dp;
2014 	dev_info_t	*dip;
2015 	int		state;
2016 
2017 	if (!DRMACH_IS_IO_ID(id))
2018 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2019 	dp = id;
2020 
2021 	dip = dp->node->n_getdip(dp->node);
2022 	if (dip == NULL) {
2023 		*yes = 0;
2024 		return (NULL);
2025 	}
2026 
2027 	state = ddi_get_devstate(dip);
2028 	*yes = ((i_ddi_node_state(dip) >= DS_ATTACHED) ||
2029 	    (state == DDI_DEVSTATE_UP));
2030 
2031 	return (NULL);
2032 }
2033 
2034 struct drmach_io_cb {
2035 	char	*name;	/* name of the node */
2036 	int	(*func)(dev_info_t *);
2037 	int	rv;
2038 	dev_info_t *dip;
2039 };
2040 
2041 #define	DRMACH_IO_POST_ATTACH	0
2042 #define	DRMACH_IO_PRE_RELEASE	1
2043 
2044 static int
2045 drmach_io_cb_check(dev_info_t *dip, void *arg)
2046 {
2047 	struct drmach_io_cb *p = (struct drmach_io_cb *)arg;
2048 	char name[OBP_MAXDRVNAME];
2049 	int len = OBP_MAXDRVNAME;
2050 
2051 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "name",
2052 	    (caddr_t)name, &len) != DDI_PROP_SUCCESS) {
2053 		return (DDI_WALK_PRUNECHILD);
2054 	}
2055 
2056 	if (strcmp(name, p->name) == 0) {
2057 		ndi_hold_devi(dip);
2058 		p->dip = dip;
2059 		return (DDI_WALK_TERMINATE);
2060 	}
2061 
2062 	return (DDI_WALK_CONTINUE);
2063 }
2064 
2065 
2066 static int
2067 drmach_console_ops(drmachid_t *id, int state)
2068 {
2069 	drmach_io_t *obj = (drmach_io_t *)id;
2070 	struct drmach_io_cb arg;
2071 	int (*msudetp)(dev_info_t *);
2072 	int (*msuattp)(dev_info_t *);
2073 	dev_info_t *dip, *pdip;
2074 	int circ;
2075 
2076 	/* 4 is pcicmu channel */
2077 	if (obj->channel != 4)
2078 		return (0);
2079 
2080 	arg.name = "serial";
2081 	arg.func = NULL;
2082 	if (state == DRMACH_IO_PRE_RELEASE) {
2083 		msudetp = (int (*)(dev_info_t *))
2084 		    modgetsymvalue("oplmsu_dr_detach", 0);
2085 		if (msudetp != NULL)
2086 			arg.func = msudetp;
2087 	} else if (state == DRMACH_IO_POST_ATTACH) {
2088 		msuattp = (int (*)(dev_info_t *))
2089 		    modgetsymvalue("oplmsu_dr_attach", 0);
2090 		if (msuattp != NULL)
2091 			arg.func = msuattp;
2092 	} else {
2093 		return (0);
2094 	}
2095 
2096 	if (arg.func == NULL) {
2097 		return (0);
2098 	}
2099 
2100 	arg.rv = 0;
2101 	arg.dip = NULL;
2102 
2103 	dip = obj->dev.node->n_getdip(obj->dev.node);
2104 	if (pdip = ddi_get_parent(dip)) {
2105 		ndi_hold_devi(pdip);
2106 		ndi_devi_enter(pdip, &circ);
2107 	} else {
2108 		/* this cannot happen unless something bad happens */
2109 		return (-1);
2110 	}
2111 
2112 	ddi_walk_devs(dip, drmach_io_cb_check, (void *)&arg);
2113 
2114 	ndi_devi_exit(pdip, circ);
2115 	ndi_rele_devi(pdip);
2116 
2117 	if (arg.dip) {
2118 		arg.rv = (*arg.func)(arg.dip);
2119 		ndi_rele_devi(arg.dip);
2120 	} else {
2121 		arg.rv = -1;
2122 	}
2123 
2124 	return (arg.rv);
2125 }
2126 
2127 sbd_error_t *
2128 drmach_io_pre_release(drmachid_t id)
2129 {
2130 	int rv;
2131 
2132 	if (!DRMACH_IS_IO_ID(id))
2133 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2134 
2135 	rv = drmach_console_ops(id, DRMACH_IO_PRE_RELEASE);
2136 
2137 	if (rv != 0)
2138 		cmn_err(CE_WARN, "IO callback failed in pre-release\n");
2139 
2140 	return (NULL);
2141 }
2142 
2143 static sbd_error_t *
2144 drmach_io_release(drmachid_t id)
2145 {
2146 	if (!DRMACH_IS_IO_ID(id))
2147 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2148 	return (NULL);
2149 }
2150 
2151 sbd_error_t *
2152 drmach_io_unrelease(drmachid_t id)
2153 {
2154 	if (!DRMACH_IS_IO_ID(id))
2155 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2156 	return (NULL);
2157 }
2158 
2159 /*ARGSUSED*/
2160 sbd_error_t *
2161 drmach_io_post_release(drmachid_t id)
2162 {
2163 	return (NULL);
2164 }
2165 
2166 /*ARGSUSED*/
2167 sbd_error_t *
2168 drmach_io_post_attach(drmachid_t id)
2169 {
2170 	int rv;
2171 
2172 	if (!DRMACH_IS_IO_ID(id))
2173 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2174 
2175 	rv = drmach_console_ops(id, DRMACH_IO_POST_ATTACH);
2176 
2177 	if (rv != 0)
2178 		cmn_err(CE_WARN, "IO callback failed in post-attach\n");
2179 
2180 	return (0);
2181 }
2182 
2183 static sbd_error_t *
2184 drmach_io_status(drmachid_t id, drmach_status_t *stat)
2185 {
2186 	drmach_device_t *dp;
2187 	sbd_error_t	*err;
2188 	int		 configured;
2189 
2190 	ASSERT(DRMACH_IS_IO_ID(id));
2191 	dp = id;
2192 
2193 	err = drmach_io_is_attached(id, &configured);
2194 	if (err)
2195 		return (err);
2196 
2197 	stat->assigned = dp->bp->assigned;
2198 	stat->powered = dp->bp->powered;
2199 	stat->configured = (configured != 0);
2200 	stat->busy = dp->busy;
2201 	strncpy(stat->type, dp->type, sizeof (stat->type));
2202 	stat->info[0] = '\0';
2203 
2204 	return (NULL);
2205 }
2206 
2207 static sbd_error_t *
2208 drmach_mem_new(drmach_device_t *proto, drmachid_t *idp)
2209 {
2210 	static void drmach_mem_dispose(drmachid_t);
2211 	static sbd_error_t *drmach_mem_release(drmachid_t);
2212 	static sbd_error_t *drmach_mem_status(drmachid_t, drmach_status_t *);
2213 	dev_info_t *dip;
2214 	int rv;
2215 
2216 	drmach_mem_t	*mp;
2217 
2218 	rv = 0;
2219 
2220 	if ((proto->node->n_getproplen(proto->node, "mc-addr", &rv) < 0) ||
2221 	    (rv <= 0)) {
2222 		*idp = (drmachid_t)0;
2223 		return (NULL);
2224 	}
2225 
2226 	mp = kmem_zalloc(sizeof (drmach_mem_t), KM_SLEEP);
2227 	proto->unum = 0;
2228 
2229 	bcopy(proto, &mp->dev, sizeof (mp->dev));
2230 	mp->dev.node = drmach_node_dup(proto->node);
2231 	mp->dev.cm.isa = (void *)drmach_mem_new;
2232 	mp->dev.cm.dispose = drmach_mem_dispose;
2233 	mp->dev.cm.release = drmach_mem_release;
2234 	mp->dev.cm.status = drmach_mem_status;
2235 
2236 	snprintf(mp->dev.cm.name, sizeof (mp->dev.cm.name), "%s", mp->dev.type);
2237 
2238 	dip = mp->dev.node->n_getdip(mp->dev.node);
2239 	if (drmach_setup_mc_info(dip, mp) != 0) {
2240 		return (drerr_new(1, EOPL_MC_SETUP, NULL));
2241 	}
2242 
2243 	/* make sure we do not create memoryless nodes */
2244 	if (mp->nbytes == 0) {
2245 		*idp = (drmachid_t)NULL;
2246 		kmem_free(mp, sizeof (drmach_mem_t));
2247 	} else
2248 		*idp = (drmachid_t)mp;
2249 
2250 	return (NULL);
2251 }
2252 
2253 static void
2254 drmach_mem_dispose(drmachid_t id)
2255 {
2256 	drmach_mem_t *mp;
2257 
2258 	ASSERT(DRMACH_IS_MEM_ID(id));
2259 
2260 
2261 	mp = id;
2262 
2263 	if (mp->dev.node)
2264 		drmach_node_dispose(mp->dev.node);
2265 
2266 	if (mp->memlist) {
2267 		memlist_delete(mp->memlist);
2268 		mp->memlist = NULL;
2269 	}
2270 
2271 	kmem_free(mp, sizeof (*mp));
2272 }
2273 
2274 sbd_error_t *
2275 drmach_mem_add_span(drmachid_t id, uint64_t basepa, uint64_t size)
2276 {
2277 	pfn_t		basepfn = (pfn_t)(basepa >> PAGESHIFT);
2278 	pgcnt_t		npages = (pgcnt_t)(size >> PAGESHIFT);
2279 	int		rv;
2280 
2281 	ASSERT(size != 0);
2282 
2283 	if (!DRMACH_IS_MEM_ID(id))
2284 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2285 
2286 	rv = kcage_range_add(basepfn, npages, KCAGE_DOWN);
2287 	if (rv == ENOMEM) {
2288 		cmn_err(CE_WARN, "%ld megabytes not available to kernel cage",
2289 		    (size == 0 ? 0 : size / MBYTE));
2290 	} else if (rv != 0) {
2291 		/* catch this in debug kernels */
2292 		ASSERT(0);
2293 
2294 		cmn_err(CE_WARN, "unexpected kcage_range_add return value %d",
2295 		    rv);
2296 	}
2297 
2298 	if (rv) {
2299 		return (DRMACH_INTERNAL_ERROR());
2300 	}
2301 	else
2302 		return (NULL);
2303 }
2304 
2305 sbd_error_t *
2306 drmach_mem_del_span(drmachid_t id, uint64_t basepa, uint64_t size)
2307 {
2308 	pfn_t		basepfn = (pfn_t)(basepa >> PAGESHIFT);
2309 	pgcnt_t		npages = (pgcnt_t)(size >> PAGESHIFT);
2310 	int		rv;
2311 
2312 	if (!DRMACH_IS_MEM_ID(id))
2313 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2314 
2315 	if (size > 0) {
2316 		rv = kcage_range_delete_post_mem_del(basepfn, npages);
2317 		if (rv != 0) {
2318 			cmn_err(CE_WARN,
2319 			    "unexpected kcage_range_delete_post_mem_del"
2320 			    " return value %d", rv);
2321 			return (DRMACH_INTERNAL_ERROR());
2322 		}
2323 	}
2324 
2325 	return (NULL);
2326 }
2327 
2328 sbd_error_t *
2329 drmach_mem_disable(drmachid_t id)
2330 {
2331 	if (!DRMACH_IS_MEM_ID(id))
2332 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2333 	else {
2334 		drmach_flush_all();
2335 		return (NULL);
2336 	}
2337 }
2338 
2339 sbd_error_t *
2340 drmach_mem_enable(drmachid_t id)
2341 {
2342 	if (!DRMACH_IS_MEM_ID(id))
2343 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2344 	else
2345 		return (NULL);
2346 }
2347 
2348 sbd_error_t *
2349 drmach_mem_get_info(drmachid_t id, drmach_mem_info_t *mem)
2350 {
2351 	drmach_mem_t *mp;
2352 
2353 	if (!DRMACH_IS_MEM_ID(id))
2354 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2355 
2356 	mp = (drmach_mem_t *)id;
2357 
2358 	/*
2359 	 * This is only used by dr to round up/down the memory
2360 	 * for copying. Our unit of memory isolation is 64 MB.
2361 	 */
2362 
2363 	mem->mi_alignment_mask = (64 * 1024 * 1024 - 1);
2364 	mem->mi_basepa = mp->base_pa;
2365 	mem->mi_size = mp->nbytes;
2366 	mem->mi_slice_size = mp->slice_size;
2367 
2368 	return (NULL);
2369 }
2370 
2371 sbd_error_t *
2372 drmach_mem_get_base_physaddr(drmachid_t id, uint64_t *pa)
2373 {
2374 	drmach_mem_t *mp;
2375 
2376 	if (!DRMACH_IS_MEM_ID(id))
2377 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2378 
2379 	mp = (drmach_mem_t *)id;
2380 
2381 	*pa = mp->base_pa;
2382 	return (NULL);
2383 }
2384 
2385 sbd_error_t *
2386 drmach_mem_get_memlist(drmachid_t id, struct memlist **ml)
2387 {
2388 	drmach_mem_t	*mem;
2389 #ifdef	DEBUG
2390 	int		rv;
2391 #endif
2392 	struct memlist	*mlist;
2393 
2394 	if (!DRMACH_IS_MEM_ID(id))
2395 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2396 
2397 	mem = (drmach_mem_t *)id;
2398 	mlist = memlist_dup(mem->memlist);
2399 
2400 #ifdef DEBUG
2401 	/*
2402 	 * Make sure the incoming memlist doesn't already
2403 	 * intersect with what's present in the system (phys_install).
2404 	 */
2405 	memlist_read_lock();
2406 	rv = memlist_intersect(phys_install, mlist);
2407 	memlist_read_unlock();
2408 	if (rv) {
2409 		DRMACH_PR("Derived memlist intersects with phys_install\n");
2410 		memlist_dump(mlist);
2411 
2412 		DRMACH_PR("phys_install memlist:\n");
2413 		memlist_dump(phys_install);
2414 
2415 		memlist_delete(mlist);
2416 		return (DRMACH_INTERNAL_ERROR());
2417 	}
2418 
2419 	DRMACH_PR("Derived memlist:");
2420 	memlist_dump(mlist);
2421 #endif
2422 	*ml = mlist;
2423 
2424 	return (NULL);
2425 }
2426 
2427 sbd_error_t *
2428 drmach_mem_get_slice_size(drmachid_t id, uint64_t *bytes)
2429 {
2430 	drmach_mem_t	*mem;
2431 
2432 	if (!DRMACH_IS_MEM_ID(id))
2433 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2434 
2435 	mem = (drmach_mem_t *)id;
2436 
2437 	*bytes = mem->slice_size;
2438 
2439 	return (NULL);
2440 }
2441 
2442 
2443 /* ARGSUSED */
2444 processorid_t
2445 drmach_mem_cpu_affinity(drmachid_t id)
2446 {
2447 	return (CPU_CURRENT);
2448 }
2449 
2450 static sbd_error_t *
2451 drmach_mem_release(drmachid_t id)
2452 {
2453 	if (!DRMACH_IS_MEM_ID(id))
2454 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2455 	return (NULL);
2456 }
2457 
2458 static sbd_error_t *
2459 drmach_mem_status(drmachid_t id, drmach_status_t *stat)
2460 {
2461 	drmach_mem_t *dp;
2462 	uint64_t	 pa, slice_size;
2463 	struct memlist	*ml;
2464 
2465 	ASSERT(DRMACH_IS_MEM_ID(id));
2466 	dp = id;
2467 
2468 	/* get starting physical address of target memory */
2469 	pa = dp->base_pa;
2470 
2471 	/* round down to slice boundary */
2472 	slice_size = dp->slice_size;
2473 	pa &= ~(slice_size - 1);
2474 
2475 	/* stop at first span that is in slice */
2476 	memlist_read_lock();
2477 	for (ml = phys_install; ml; ml = ml->next)
2478 		if (ml->address >= pa && ml->address < pa + slice_size)
2479 			break;
2480 	memlist_read_unlock();
2481 
2482 	stat->assigned = dp->dev.bp->assigned;
2483 	stat->powered = dp->dev.bp->powered;
2484 	stat->configured = (ml != NULL);
2485 	stat->busy = dp->dev.busy;
2486 	strncpy(stat->type, dp->dev.type, sizeof (stat->type));
2487 	stat->info[0] = '\0';
2488 
2489 	return (NULL);
2490 }
2491 
2492 
2493 sbd_error_t *
2494 drmach_board_deprobe(drmachid_t id)
2495 {
2496 	drmach_board_t	*bp;
2497 
2498 	if (!DRMACH_IS_BOARD_ID(id))
2499 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2500 
2501 	bp = id;
2502 
2503 	cmn_err(CE_CONT, "DR: detach board %d\n", bp->bnum);
2504 
2505 	if (bp->tree) {
2506 		drmach_node_dispose(bp->tree);
2507 		bp->tree = NULL;
2508 	}
2509 	if (bp->devices) {
2510 		drmach_array_dispose(bp->devices, drmach_device_dispose);
2511 		bp->devices = NULL;
2512 	}
2513 
2514 	bp->boot_board = 0;
2515 
2516 	return (NULL);
2517 }
2518 
2519 /*ARGSUSED*/
2520 static sbd_error_t *
2521 drmach_pt_ikprobe(drmachid_t id, drmach_opts_t *opts)
2522 {
2523 	drmach_board_t		*bp = (drmach_board_t *)id;
2524 	sbd_error_t		*err = NULL;
2525 	int	rv;
2526 	unsigned cpu_impl;
2527 
2528 	if (!DRMACH_IS_BOARD_ID(id))
2529 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2530 
2531 	DRMACH_PR("calling opl_probe_board for bnum=%d\n", bp->bnum);
2532 	rv = opl_probe_sb(bp->bnum, &cpu_impl);
2533 	if (rv != 0) {
2534 		err = drerr_new(1, EOPL_PROBE, bp->cm.name);
2535 		return (err);
2536 	}
2537 	return (err);
2538 }
2539 
2540 /*ARGSUSED*/
2541 static sbd_error_t *
2542 drmach_pt_ikdeprobe(drmachid_t id, drmach_opts_t *opts)
2543 {
2544 	drmach_board_t	*bp;
2545 	sbd_error_t	*err = NULL;
2546 	int	rv;
2547 
2548 	if (!DRMACH_IS_BOARD_ID(id))
2549 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2550 	bp = (drmach_board_t *)id;
2551 
2552 	cmn_err(CE_CONT, "DR: in-kernel unprobe board %d\n", bp->bnum);
2553 
2554 	rv = opl_unprobe_sb(bp->bnum);
2555 	if (rv != 0) {
2556 		err = drerr_new(1, EOPL_DEPROBE, bp->cm.name);
2557 	}
2558 
2559 	return (err);
2560 }
2561 
2562 
2563 /*ARGSUSED*/
2564 sbd_error_t *
2565 drmach_pt_readmem(drmachid_t id, drmach_opts_t *opts)
2566 {
2567 	struct memlist	*ml;
2568 	uint64_t	src_pa;
2569 	uint64_t	dst_pa;
2570 	uint64_t	dst;
2571 
2572 	dst_pa = va_to_pa(&dst);
2573 
2574 	memlist_read_lock();
2575 	for (ml = phys_install; ml; ml = ml->next) {
2576 		uint64_t	nbytes;
2577 
2578 		src_pa = ml->address;
2579 		nbytes = ml->size;
2580 
2581 		while (nbytes != 0ull) {
2582 
2583 			/* copy 32 bytes at arc_pa to dst_pa */
2584 			bcopy32_il(src_pa, dst_pa);
2585 
2586 			/* increment by 32 bytes */
2587 			src_pa += (4 * sizeof (uint64_t));
2588 
2589 			/* decrement by 32 bytes */
2590 			nbytes -= (4 * sizeof (uint64_t));
2591 		}
2592 	}
2593 	memlist_read_unlock();
2594 
2595 	return (NULL);
2596 }
2597 
2598 static struct {
2599 	const char	*name;
2600 	sbd_error_t	*(*handler)(drmachid_t id, drmach_opts_t *opts);
2601 } drmach_pt_arr[] = {
2602 	{ "readmem",		drmach_pt_readmem		},
2603 	{ "ikprobe",	drmach_pt_ikprobe	},
2604 	{ "ikdeprobe",	drmach_pt_ikdeprobe	},
2605 
2606 	/* the following line must always be last */
2607 	{ NULL,			NULL				}
2608 };
2609 
2610 /*ARGSUSED*/
2611 sbd_error_t *
2612 drmach_passthru(drmachid_t id, drmach_opts_t *opts)
2613 {
2614 	int		i;
2615 	sbd_error_t	*err;
2616 
2617 	i = 0;
2618 	while (drmach_pt_arr[i].name != NULL) {
2619 		int len = strlen(drmach_pt_arr[i].name);
2620 
2621 		if (strncmp(drmach_pt_arr[i].name, opts->copts, len) == 0)
2622 			break;
2623 
2624 		i += 1;
2625 	}
2626 
2627 	if (drmach_pt_arr[i].name == NULL)
2628 		err = drerr_new(0, EOPL_UNKPTCMD, opts->copts);
2629 	else
2630 		err = (*drmach_pt_arr[i].handler)(id, opts);
2631 
2632 	return (err);
2633 }
2634 
2635 sbd_error_t *
2636 drmach_release(drmachid_t id)
2637 {
2638 	drmach_common_t *cp;
2639 
2640 	if (!DRMACH_IS_DEVICE_ID(id))
2641 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2642 	cp = id;
2643 
2644 	return (cp->release(id));
2645 }
2646 
2647 sbd_error_t *
2648 drmach_status(drmachid_t id, drmach_status_t *stat)
2649 {
2650 	drmach_common_t *cp;
2651 	sbd_error_t	*err;
2652 
2653 	rw_enter(&drmach_boards_rwlock, RW_READER);
2654 
2655 	if (!DRMACH_IS_ID(id)) {
2656 		rw_exit(&drmach_boards_rwlock);
2657 		return (drerr_new(0, EOPL_NOTID, NULL));
2658 	}
2659 	cp = (drmach_common_t *)id;
2660 	err = cp->status(id, stat);
2661 
2662 	rw_exit(&drmach_boards_rwlock);
2663 
2664 	return (err);
2665 }
2666 
2667 static sbd_error_t *
2668 drmach_i_status(drmachid_t id, drmach_status_t *stat)
2669 {
2670 	drmach_common_t *cp;
2671 
2672 	if (!DRMACH_IS_ID(id))
2673 		return (drerr_new(0, EOPL_NOTID, NULL));
2674 	cp = id;
2675 
2676 	return (cp->status(id, stat));
2677 }
2678 
2679 /*ARGSUSED*/
2680 sbd_error_t *
2681 drmach_unconfigure(drmachid_t id, int flags)
2682 {
2683 	drmach_device_t *dp;
2684 	dev_info_t	*rdip, *fdip = NULL;
2685 	char name[OBP_MAXDRVNAME];
2686 	int rv;
2687 
2688 	if (DRMACH_IS_CPU_ID(id))
2689 		return (NULL);
2690 
2691 	if (!DRMACH_IS_DEVICE_ID(id))
2692 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2693 
2694 	dp = id;
2695 
2696 	rdip = dp->node->n_getdip(dp->node);
2697 
2698 	ASSERT(rdip);
2699 
2700 	rv = dp->node->n_getprop(dp->node, "name", name, OBP_MAXDRVNAME);
2701 
2702 	if (rv)
2703 		return (NULL);
2704 
2705 	/*
2706 	 * Note: FORCE flag is no longer necessary under devfs
2707 	 */
2708 
2709 	ASSERT(e_ddi_branch_held(rdip));
2710 	if (e_ddi_branch_unconfigure(rdip, &fdip, 0)) {
2711 		sbd_error_t	*err;
2712 		char		*path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
2713 
2714 		/*
2715 		 * If non-NULL, fdip is returned held and must be released.
2716 		 */
2717 		if (fdip != NULL) {
2718 			(void) ddi_pathname(fdip, path);
2719 			ndi_rele_devi(fdip);
2720 		} else {
2721 			(void) ddi_pathname(rdip, path);
2722 		}
2723 
2724 		err = drerr_new(1, EOPL_DRVFAIL, path);
2725 
2726 		kmem_free(path, MAXPATHLEN);
2727 
2728 		return (err);
2729 	}
2730 
2731 	return (NULL);
2732 }
2733 
2734 
2735 int
2736 drmach_cpu_poweron(struct cpu *cp)
2737 {
2738 	int bnum, cpuid, onb_core_num, strand_id;
2739 	drmach_board_t *bp;
2740 
2741 	DRMACH_PR("drmach_cpu_poweron: starting cpuid %d\n", cp->cpu_id);
2742 
2743 	cpuid = cp->cpu_id;
2744 	bnum = LSB_ID(cpuid);
2745 	onb_core_num = ON_BOARD_CORE_NUM(cpuid);
2746 	strand_id = STRAND_ID(cpuid);
2747 	bp = drmach_get_board_by_bnum(bnum);
2748 
2749 	ASSERT(bp);
2750 	if (bp->cores[onb_core_num].core_hotadded == 0) {
2751 		if (drmach_add_remove_cpu(bnum, onb_core_num,
2752 		    HOTADD_CPU) != 0) {
2753 			cmn_err(CE_WARN, "Failed to add CMP %d on board %d\n",
2754 			    onb_core_num, bnum);
2755 			return (EIO);
2756 		}
2757 	}
2758 
2759 	ASSERT(MUTEX_HELD(&cpu_lock));
2760 
2761 	if (drmach_cpu_start(cp) != 0) {
2762 		if (bp->cores[onb_core_num].core_started == 0) {
2763 			/*
2764 			 * we must undo the hotadd or no one will do that
2765 			 * If this fails, we will do this again in
2766 			 * drmach_board_disconnect.
2767 			 */
2768 			if (drmach_add_remove_cpu(bnum, onb_core_num,
2769 			    HOTREMOVE_CPU) != 0) {
2770 				cmn_err(CE_WARN, "Failed to remove CMP %d "
2771 				    "on board %d\n", onb_core_num, bnum);
2772 			}
2773 		}
2774 		return (EBUSY);
2775 	} else {
2776 		bp->cores[onb_core_num].core_started |= (1 << strand_id);
2777 		return (0);
2778 	}
2779 }
2780 
2781 int
2782 drmach_cpu_poweroff(struct cpu *cp)
2783 {
2784 	int 		rv = 0;
2785 	processorid_t	cpuid = cp->cpu_id;
2786 
2787 	DRMACH_PR("drmach_cpu_poweroff: stopping cpuid %d\n", cp->cpu_id);
2788 
2789 	ASSERT(MUTEX_HELD(&cpu_lock));
2790 
2791 	/*
2792 	 * Capture all CPUs (except for detaching proc) to prevent
2793 	 * crosscalls to the detaching proc until it has cleared its
2794 	 * bit in cpu_ready_set.
2795 	 *
2796 	 * The CPU's remain paused and the prom_mutex is known to be free.
2797 	 * This prevents the x-trap victim from blocking when doing prom
2798 	 * IEEE-1275 calls at a high PIL level.
2799 	 */
2800 
2801 	promsafe_pause_cpus();
2802 
2803 	/*
2804 	 * Quiesce interrupts on the target CPU. We do this by setting
2805 	 * the CPU 'not ready'- (i.e. removing the CPU from cpu_ready_set) to
2806 	 * prevent it from receiving cross calls and cross traps.
2807 	 * This prevents the processor from receiving any new soft interrupts.
2808 	 */
2809 	mp_cpu_quiesce(cp);
2810 
2811 	rv = prom_stopcpu_bycpuid(cpuid);
2812 	if (rv == 0)
2813 		cp->cpu_flags = CPU_OFFLINE | CPU_QUIESCED | CPU_POWEROFF;
2814 
2815 	start_cpus();
2816 
2817 	if (rv == 0) {
2818 		int bnum, onb_core_num, strand_id;
2819 		drmach_board_t *bp;
2820 
2821 		CPU_SIGNATURE(OS_SIG, SIGST_DETACHED, SIGSUBST_NULL, cpuid);
2822 
2823 		bnum = LSB_ID(cpuid);
2824 		onb_core_num = ON_BOARD_CORE_NUM(cpuid);
2825 		strand_id = STRAND_ID(cpuid);
2826 		bp = drmach_get_board_by_bnum(bnum);
2827 		ASSERT(bp);
2828 
2829 		bp->cores[onb_core_num].core_started &= ~(1 << strand_id);
2830 		if (bp->cores[onb_core_num].core_started == 0) {
2831 			if (drmach_add_remove_cpu(bnum, onb_core_num,
2832 			    HOTREMOVE_CPU) != 0) {
2833 				cmn_err(CE_WARN, "Failed to remove CMP %d LSB "
2834 				    "%d\n", onb_core_num, bnum);
2835 				return (EIO);
2836 			}
2837 		}
2838 	}
2839 
2840 	return (rv);
2841 }
2842 
2843 /*ARGSUSED*/
2844 int
2845 drmach_verify_sr(dev_info_t *dip, int sflag)
2846 {
2847 	return (0);
2848 }
2849 
2850 void
2851 drmach_suspend_last(void)
2852 {
2853 }
2854 
2855 void
2856 drmach_resume_first(void)
2857 {
2858 }
2859 
2860 /*
2861  * Log a DR sysevent.
2862  * Return value: 0 success, non-zero failure.
2863  */
2864 int
2865 drmach_log_sysevent(int board, char *hint, int flag, int verbose)
2866 {
2867 	sysevent_t			*ev;
2868 	sysevent_id_t			eid;
2869 	int				rv, km_flag;
2870 	sysevent_value_t		evnt_val;
2871 	sysevent_attr_list_t		*evnt_attr_list = NULL;
2872 	char				attach_pnt[MAXNAMELEN];
2873 
2874 	km_flag = (flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
2875 	attach_pnt[0] = '\0';
2876 	if (drmach_board_name(board, attach_pnt, MAXNAMELEN)) {
2877 		rv = -1;
2878 		goto logexit;
2879 	}
2880 	if (verbose) {
2881 		DRMACH_PR("drmach_log_sysevent: %s %s, flag: %d, verbose: %d\n",
2882 		    attach_pnt, hint, flag, verbose);
2883 	}
2884 
2885 	if ((ev = sysevent_alloc(EC_DR, ESC_DR_AP_STATE_CHANGE,
2886 	    SUNW_KERN_PUB"dr", km_flag)) == NULL) {
2887 		rv = -2;
2888 		goto logexit;
2889 	}
2890 	evnt_val.value_type = SE_DATA_TYPE_STRING;
2891 	evnt_val.value.sv_string = attach_pnt;
2892 	if ((rv = sysevent_add_attr(&evnt_attr_list, DR_AP_ID, &evnt_val,
2893 	    km_flag)) != 0)
2894 		goto logexit;
2895 
2896 	evnt_val.value_type = SE_DATA_TYPE_STRING;
2897 	evnt_val.value.sv_string = hint;
2898 	if ((rv = sysevent_add_attr(&evnt_attr_list, DR_HINT, &evnt_val,
2899 	    km_flag)) != 0) {
2900 		sysevent_free_attr(evnt_attr_list);
2901 		goto logexit;
2902 	}
2903 
2904 	(void) sysevent_attach_attributes(ev, evnt_attr_list);
2905 
2906 	/*
2907 	 * Log the event but do not sleep waiting for its
2908 	 * delivery. This provides insulation from syseventd.
2909 	 */
2910 	rv = log_sysevent(ev, SE_NOSLEEP, &eid);
2911 
2912 logexit:
2913 	if (ev)
2914 		sysevent_free(ev);
2915 	if ((rv != 0) && verbose)
2916 		cmn_err(CE_WARN, "drmach_log_sysevent failed (rv %d) for %s "
2917 		    " %s\n", rv, attach_pnt, hint);
2918 
2919 	return (rv);
2920 }
2921 
2922 #define	OPL_DR_STATUS_PROP "dr-status"
2923 
2924 static int
2925 opl_check_dr_status()
2926 {
2927 	pnode_t	node;
2928 	int	rtn, len;
2929 	char	*str;
2930 
2931 	node = prom_rootnode();
2932 	if (node == OBP_BADNODE) {
2933 		return (1);
2934 	}
2935 
2936 	len = prom_getproplen(node, OPL_DR_STATUS_PROP);
2937 	if (len == -1) {
2938 		/*
2939 		 * dr-status doesn't exist when DR is activated and
2940 		 * any warning messages aren't needed.
2941 		 */
2942 		return (1);
2943 	}
2944 
2945 	str = (char *)kmem_zalloc(len+1, KM_SLEEP);
2946 	rtn = prom_getprop(node, OPL_DR_STATUS_PROP, str);
2947 	kmem_free(str, len + 1);
2948 	if (rtn == -1) {
2949 		return (1);
2950 	} else {
2951 		return (0);
2952 	}
2953 }
2954 
2955 /* we are allocating memlist from TLB locked pages to avoid tlbmisses */
2956 
2957 static struct memlist *
2958 drmach_memlist_add_span(drmach_copy_rename_program_t *p,
2959 	struct memlist *mlist, uint64_t base, uint64_t len)
2960 {
2961 	struct memlist	*ml, *tl, *nl;
2962 
2963 	if (len == 0ull)
2964 		return (NULL);
2965 
2966 	if (mlist == NULL) {
2967 		mlist = p->free_mlist;
2968 		if (mlist == NULL)
2969 			return (NULL);
2970 		p->free_mlist = mlist->next;
2971 		mlist->address = base;
2972 		mlist->size = len;
2973 		mlist->next = mlist->prev = NULL;
2974 
2975 		return (mlist);
2976 	}
2977 
2978 	for (tl = ml = mlist; ml; tl = ml, ml = ml->next) {
2979 		if (base < ml->address) {
2980 			if ((base + len) < ml->address) {
2981 				nl = p->free_mlist;
2982 				if (nl == NULL)
2983 					return (NULL);
2984 				p->free_mlist = nl->next;
2985 				nl->address = base;
2986 				nl->size = len;
2987 				nl->next = ml;
2988 				if ((nl->prev = ml->prev) != NULL)
2989 					nl->prev->next = nl;
2990 				ml->prev = nl;
2991 				if (mlist == ml)
2992 					mlist = nl;
2993 			} else {
2994 				ml->size = MAX((base + len), (ml->address +
2995 				    ml->size)) - base;
2996 				ml->address = base;
2997 			}
2998 			break;
2999 
3000 		} else if (base <= (ml->address + ml->size)) {
3001 			ml->size = MAX((base + len), (ml->address + ml->size)) -
3002 			    MIN(ml->address, base);
3003 			ml->address = MIN(ml->address, base);
3004 			break;
3005 		}
3006 	}
3007 	if (ml == NULL) {
3008 		nl = p->free_mlist;
3009 		if (nl == NULL)
3010 			return (NULL);
3011 		p->free_mlist = nl->next;
3012 		nl->address = base;
3013 		nl->size = len;
3014 		nl->next = NULL;
3015 		nl->prev = tl;
3016 		tl->next = nl;
3017 	}
3018 
3019 	return (mlist);
3020 }
3021 
3022 /*
3023  * The routine performs the necessary memory COPY and MC adr SWITCH.
3024  * Both operations MUST be at the same "level" so that the stack is
3025  * maintained correctly between the copy and switch.  The switch
3026  * portion implements a caching mechanism to guarantee the code text
3027  * is cached prior to execution.  This is to guard against possible
3028  * memory access while the MC adr's are being modified.
3029  *
3030  * IMPORTANT: The _drmach_copy_rename_end() function must immediately
3031  * follow drmach_copy_rename_prog__relocatable() so that the correct
3032  * "length" of the drmach_copy_rename_prog__relocatable can be
3033  * calculated.  This routine MUST be a LEAF function, i.e. it can
3034  * make NO function calls, primarily for two reasons:
3035  *
3036  *	1. We must keep the stack consistent across the "switch".
3037  *	2. Function calls are compiled to relative offsets, and
3038  *	   we execute this function we'll be executing it from
3039  *	   a copied version in a different area of memory, thus
3040  *	   the relative offsets will be bogus.
3041  *
3042  * Moreover, it must have the "__relocatable" suffix to inform DTrace
3043  * providers (and anything else, for that matter) that this
3044  * function's text is manually relocated elsewhere before it is
3045  * executed.  That is, it cannot be safely instrumented with any
3046  * methodology that is PC-relative.
3047  */
3048 
3049 /*
3050  * We multiply this to system_clock_frequency so we
3051  * are setting a delay of fmem_timeout second for
3052  * the rename command.
3053  *
3054  * FMEM command itself should complete within 15 sec.
3055  * We add 2 more sec to be conservative.
3056  *
3057  * Note that there is also a SCF BUSY bit checking
3058  * in drmach_asm.s right before FMEM command is
3059  * issued.  XSCF sets the SCF BUSY bit when the
3060  * other domain on the same PSB reboots and it
3061  * will not be able to service the FMEM command
3062  * within 15 sec.   After setting the SCF BUSY
3063  * bit, XSCF will wait a while before servicing
3064  * other reboot command so there is no race
3065  * condition.
3066  */
3067 
3068 static int	fmem_timeout = 17;
3069 
3070 /*
3071  *	The empirical data on some OPL system shows that
3072  *	we can copy 250 MB per second.  We set it to
3073  * 	80 MB to be conservative.  In normal case,
3074  *	this timeout does not affect anything.
3075  */
3076 
3077 static int	min_copy_size_per_sec = 80 * 1024 * 1024;
3078 
3079 /*
3080  *	This is the timeout value for the xcall synchronization
3081  *	to get all the CPU ready to do the parallel copying.
3082  *	Even on a fully loaded system, 10 sec. should be long
3083  *	enough.
3084  */
3085 
3086 static int	cpu_xcall_delay = 10;
3087 int drmach_disable_mcopy = 0;
3088 
3089 /*
3090  * The following delay loop executes sleep instruction to yield the
3091  * CPU to other strands.  If this is not done, some strand will tie
3092  * up the CPU in busy loops while the other strand cannot do useful
3093  * work.  The copy procedure will take a much longer time without this.
3094  */
3095 #define	DR_DELAY_IL(ms, freq)					\
3096 	{							\
3097 		uint64_t start;					\
3098 		uint64_t nstick;				\
3099 		volatile uint64_t now;				\
3100 		nstick = ((uint64_t)ms * freq)/1000;		\
3101 		start = drmach_get_stick_il();			\
3102 		now = start;					\
3103 		while ((now - start) <= nstick) {		\
3104 			drmach_sleep_il();			\
3105 			now = drmach_get_stick_il();		\
3106 		}						\
3107 	}
3108 
3109 static int
3110 drmach_copy_rename_prog__relocatable(drmach_copy_rename_program_t *prog,
3111 	int cpuid)
3112 {
3113 	struct memlist		*ml;
3114 	register int		rtn;
3115 	int			i;
3116 	register uint64_t	curr, limit;
3117 	extern uint64_t		drmach_get_stick_il();
3118 	extern void		membar_sync_il();
3119 	extern void		flush_instr_mem_il(void*);
3120 	extern void		flush_windows_il(void);
3121 	uint64_t		copy_start;
3122 
3123 	/*
3124 	 * flush_windows is moved here to make sure all
3125 	 * registers used in the callers are flushed to
3126 	 * memory before the copy.
3127 	 *
3128 	 * If flush_windows() is called too early in the
3129 	 * calling function, the compiler might put some
3130 	 * data in the local registers after flush_windows().
3131 	 * After FMA, if there is any fill trap, the registers
3132 	 * will contain stale data.
3133 	 */
3134 
3135 	flush_windows_il();
3136 
3137 	prog->critical->stat[cpuid] = FMEM_LOOP_COPY_READY;
3138 	membar_sync_il();
3139 
3140 	if (prog->data->cpuid == cpuid) {
3141 		limit = drmach_get_stick_il();
3142 		limit += cpu_xcall_delay * system_clock_freq;
3143 		for (i = 0; i < NCPU; i++) {
3144 			if (CPU_IN_SET(prog->data->cpu_slave_set, i)) {
3145 				/* wait for all CPU's to be ready */
3146 				for (;;) {
3147 					if (prog->critical->stat[i] ==
3148 					    FMEM_LOOP_COPY_READY) {
3149 						break;
3150 					}
3151 					DR_DELAY_IL(1, prog->data->stick_freq);
3152 				}
3153 				curr = drmach_get_stick_il();
3154 				if (curr > limit) {
3155 					prog->data->fmem_status.error =
3156 					    EOPL_FMEM_XC_TIMEOUT;
3157 					return (EOPL_FMEM_XC_TIMEOUT);
3158 				}
3159 			}
3160 		}
3161 		prog->data->fmem_status.stat = FMEM_LOOP_COPY_READY;
3162 		membar_sync_il();
3163 		copy_start = drmach_get_stick_il();
3164 	} else {
3165 		for (;;) {
3166 			if (prog->data->fmem_status.stat ==
3167 			    FMEM_LOOP_COPY_READY) {
3168 				break;
3169 			}
3170 			if (prog->data->fmem_status.error) {
3171 				prog->data->error[cpuid] = EOPL_FMEM_TERMINATE;
3172 				return (EOPL_FMEM_TERMINATE);
3173 			}
3174 			DR_DELAY_IL(1, prog->data->stick_freq);
3175 		}
3176 	}
3177 
3178 	/*
3179 	 * DO COPY.
3180 	 */
3181 	if (CPU_IN_SET(prog->data->cpu_copy_set, cpuid)) {
3182 		for (ml = prog->data->cpu_ml[cpuid]; ml; ml = ml->next) {
3183 			uint64_t	s_pa, t_pa;
3184 			uint64_t	nbytes;
3185 
3186 			s_pa = prog->data->s_copybasepa + ml->address;
3187 			t_pa = prog->data->t_copybasepa + ml->address;
3188 			nbytes = ml->size;
3189 
3190 			while (nbytes != 0ull) {
3191 				/*
3192 				 * If the master has detected error, we just
3193 				 * bail out
3194 				 */
3195 				if (prog->data->fmem_status.error !=
3196 				    ESBD_NOERROR) {
3197 					prog->data->error[cpuid] =
3198 					    EOPL_FMEM_TERMINATE;
3199 					return (EOPL_FMEM_TERMINATE);
3200 				}
3201 				/*
3202 				 * This copy does NOT use an ASI
3203 				 * that avoids the Ecache, therefore
3204 				 * the dst_pa addresses may remain
3205 				 * in our Ecache after the dst_pa
3206 				 * has been removed from the system.
3207 				 * A subsequent write-back to memory
3208 				 * will cause an ARB-stop because the
3209 				 * physical address no longer exists
3210 				 * in the system. Therefore we must
3211 				 * flush out local Ecache after we
3212 				 * finish the copy.
3213 				 */
3214 
3215 				/* copy 32 bytes at src_pa to dst_pa */
3216 				bcopy32_il(s_pa, t_pa);
3217 
3218 				/*
3219 				 * increment the counter to signal that we are
3220 				 * alive
3221 				 */
3222 				prog->stat->nbytes[cpuid] += 32;
3223 
3224 				/* increment by 32 bytes */
3225 				s_pa += (4 * sizeof (uint64_t));
3226 				t_pa += (4 * sizeof (uint64_t));
3227 
3228 				/* decrement by 32 bytes */
3229 				nbytes -= (4 * sizeof (uint64_t));
3230 			}
3231 		}
3232 		prog->critical->stat[cpuid] = FMEM_LOOP_COPY_DONE;
3233 		membar_sync_il();
3234 	}
3235 
3236 	/*
3237 	 * Since bcopy32_il() does NOT use an ASI to bypass
3238 	 * the Ecache, we need to flush our Ecache after
3239 	 * the copy is complete.
3240 	 */
3241 	flush_cache_il();
3242 
3243 	/*
3244 	 * drmach_fmem_exec_script()
3245 	 */
3246 	if (prog->data->cpuid == cpuid) {
3247 		uint64_t	last, now;
3248 
3249 		limit = copy_start + prog->data->copy_delay;
3250 		for (i = 0; i < NCPU; i++) {
3251 			if (!CPU_IN_SET(prog->data->cpu_slave_set, i))
3252 				continue;
3253 
3254 			for (;;) {
3255 				/*
3256 				 * we get FMEM_LOOP_FMEM_READY in
3257 				 * normal case
3258 				 */
3259 				if (prog->critical->stat[i] ==
3260 				    FMEM_LOOP_FMEM_READY) {
3261 					break;
3262 				}
3263 				/* got error traps */
3264 				if (prog->data->error[i] ==
3265 				    EOPL_FMEM_COPY_ERROR) {
3266 					prog->data->fmem_status.error =
3267 					    EOPL_FMEM_COPY_ERROR;
3268 					return (EOPL_FMEM_COPY_ERROR);
3269 				}
3270 				/*
3271 				 * if we have not reached limit, wait
3272 				 * more
3273 				 */
3274 				curr = drmach_get_stick_il();
3275 				if (curr <= limit)
3276 					continue;
3277 
3278 				prog->data->slowest_cpuid = i;
3279 				prog->data->copy_wait_time = curr - copy_start;
3280 
3281 				/* now check if slave is alive */
3282 				last = prog->stat->nbytes[i];
3283 
3284 				DR_DELAY_IL(1, prog->data->stick_freq);
3285 
3286 				now = prog->stat->nbytes[i];
3287 				if (now <= last) {
3288 					/*
3289 					 * no progress, perhaps just
3290 					 * finished
3291 					 */
3292 					DR_DELAY_IL(1, prog->data->stick_freq);
3293 					if (prog->critical->stat[i] ==
3294 					    FMEM_LOOP_FMEM_READY)
3295 						break;
3296 					/* copy error */
3297 					if (prog->data->error[i] ==
3298 					    EOPL_FMEM_COPY_ERROR) {
3299 						prog->data-> fmem_status.error =
3300 						    EOPL_FMEM_COPY_ERROR;
3301 						return (EOPL_FMEM_COPY_ERROR);
3302 					}
3303 					prog->data->fmem_status.error =
3304 					    EOPL_FMEM_COPY_TIMEOUT;
3305 					return (EOPL_FMEM_COPY_TIMEOUT);
3306 				}
3307 			}
3308 		}
3309 
3310 		prog->critical->stat[cpuid] = FMEM_LOOP_FMEM_READY;
3311 		prog->data->fmem_status.stat  = FMEM_LOOP_FMEM_READY;
3312 
3313 		membar_sync_il();
3314 		flush_instr_mem_il((void*) (prog->critical));
3315 		/*
3316 		 * drmach_fmem_exec_script()
3317 		 */
3318 		rtn = prog->critical->fmem((void *)prog->critical, PAGESIZE);
3319 		return (rtn);
3320 	} else {
3321 		flush_instr_mem_il((void*) (prog->critical));
3322 		/*
3323 		 * drmach_fmem_loop_script()
3324 		 */
3325 		rtn = prog->critical->loop((void *)(prog->critical), PAGESIZE,
3326 		    (void *)&(prog->critical->stat[cpuid]));
3327 		prog->data->error[cpuid] = rtn;
3328 		/* slave thread does not care the rv */
3329 		return (0);
3330 	}
3331 }
3332 
3333 static void
3334 drmach_copy_rename_end(void)
3335 {
3336 	/*
3337 	 * IMPORTANT:	This function's location MUST be located immediately
3338 	 *		following drmach_copy_rename_prog__relocatable to
3339 	 *		accurately estimate its size.  Note that this assumes
3340 	 *		the compiler keeps these functions in the order in
3341 	 *		which they appear :-o
3342 	 */
3343 }
3344 
3345 
3346 static void
3347 drmach_setup_memlist(drmach_copy_rename_program_t *p)
3348 {
3349 	struct memlist *ml;
3350 	caddr_t buf;
3351 	int nbytes, s;
3352 
3353 	nbytes = PAGESIZE;
3354 	s = roundup(sizeof (struct memlist), sizeof (void *));
3355 	p->free_mlist = NULL;
3356 	buf = p->memlist_buffer;
3357 	while (nbytes >= sizeof (struct memlist)) {
3358 		ml = (struct memlist *)buf;
3359 		ml->next = p->free_mlist;
3360 		p->free_mlist = ml;
3361 		buf += s;
3362 		nbytes -= s;
3363 	}
3364 }
3365 
3366 static void
3367 drmach_lock_critical(caddr_t va, caddr_t new_va)
3368 {
3369 	tte_t tte;
3370 	int i;
3371 
3372 	kpreempt_disable();
3373 
3374 	for (i = 0; i < DRMACH_FMEM_LOCKED_PAGES; i++) {
3375 		vtag_flushpage(new_va, (uint64_t)ksfmmup);
3376 		sfmmu_memtte(&tte, va_to_pfn(va), PROC_DATA|HAT_NOSYNC, TTE8K);
3377 		tte.tte_intlo |= TTE_LCK_INT;
3378 		sfmmu_dtlb_ld_kva(new_va, &tte);
3379 		sfmmu_itlb_ld_kva(new_va, &tte);
3380 		va += PAGESIZE;
3381 		new_va += PAGESIZE;
3382 	}
3383 }
3384 
3385 static void
3386 drmach_unlock_critical(caddr_t va)
3387 {
3388 	int i;
3389 
3390 	for (i = 0; i < DRMACH_FMEM_LOCKED_PAGES; i++) {
3391 		vtag_flushpage(va, (uint64_t)ksfmmup);
3392 		va += PAGESIZE;
3393 	}
3394 
3395 	kpreempt_enable();
3396 }
3397 
3398 sbd_error_t *
3399 drmach_copy_rename_init(drmachid_t t_id, drmachid_t s_id,
3400 	struct memlist *c_ml, drmachid_t *pgm_id)
3401 {
3402 	drmach_mem_t	*s_mem;
3403 	drmach_mem_t	*t_mem;
3404 	struct memlist	*x_ml;
3405 	uint64_t	s_copybasepa, t_copybasepa;
3406 	uint_t		len;
3407 	caddr_t		bp, wp;
3408 	int			s_bd, t_bd, cpuid, active_cpus, i;
3409 	uint64_t		c_addr;
3410 	size_t			c_size, copy_sz, sz;
3411 	extern void		drmach_fmem_loop_script();
3412 	extern void		drmach_fmem_loop_script_rtn();
3413 	extern int		drmach_fmem_exec_script();
3414 	extern void		drmach_fmem_exec_script_end();
3415 	sbd_error_t	*err;
3416 	drmach_copy_rename_program_t *prog = NULL;
3417 	drmach_copy_rename_program_t *prog_kmem = NULL;
3418 	void		(*mc_suspend)(void);
3419 	void		(*mc_resume)(void);
3420 	int		(*scf_fmem_start)(int, int);
3421 	int		(*scf_fmem_end)(void);
3422 	int		(*scf_fmem_cancel)(void);
3423 	uint64_t	(*scf_get_base_addr)(void);
3424 
3425 	if (!DRMACH_IS_MEM_ID(s_id))
3426 		return (drerr_new(0, EOPL_INAPPROP, NULL));
3427 	if (!DRMACH_IS_MEM_ID(t_id))
3428 		return (drerr_new(0, EOPL_INAPPROP, NULL));
3429 
3430 	for (i = 0; i < NCPU; i++) {
3431 		int lsb_id, onb_core_num, strand_id;
3432 		drmach_board_t *bp;
3433 
3434 		/*
3435 		 * this kind of CPU will spin in cache
3436 		 */
3437 		if (CPU_IN_SET(cpu_ready_set, i))
3438 			continue;
3439 
3440 		/*
3441 		 * Now check for any inactive CPU's that
3442 		 * have been hotadded.  This can only occur in
3443 		 * error condition in drmach_cpu_poweron().
3444 		 */
3445 		lsb_id = LSB_ID(i);
3446 		onb_core_num = ON_BOARD_CORE_NUM(i);
3447 		strand_id = STRAND_ID(i);
3448 		bp = drmach_get_board_by_bnum(lsb_id);
3449 		if (bp == NULL)
3450 			continue;
3451 		if (bp->cores[onb_core_num].core_hotadded &
3452 		    (1 << strand_id)) {
3453 			if (!(bp->cores[onb_core_num].core_started &
3454 			    (1 << strand_id))) {
3455 				return (drerr_new(1, EOPL_CPU_STATE, NULL));
3456 			}
3457 		}
3458 	}
3459 
3460 	mc_suspend = (void (*)(void))
3461 	    modgetsymvalue("opl_mc_suspend", 0);
3462 	mc_resume = (void (*)(void))
3463 	    modgetsymvalue("opl_mc_resume", 0);
3464 
3465 	if (mc_suspend == NULL || mc_resume == NULL) {
3466 		return (drerr_new(1, EOPL_MC_OPL, NULL));
3467 	}
3468 
3469 	scf_fmem_start = (int (*)(int, int))
3470 	    modgetsymvalue("scf_fmem_start", 0);
3471 	if (scf_fmem_start == NULL) {
3472 		return (drerr_new(1, EOPL_SCF_FMEM, NULL));
3473 	}
3474 	scf_fmem_end = (int (*)(void))
3475 	    modgetsymvalue("scf_fmem_end", 0);
3476 	if (scf_fmem_end == NULL) {
3477 		return (drerr_new(1, EOPL_SCF_FMEM, NULL));
3478 	}
3479 	scf_fmem_cancel = (int (*)(void))
3480 	    modgetsymvalue("scf_fmem_cancel", 0);
3481 	if (scf_fmem_cancel == NULL) {
3482 		return (drerr_new(1, EOPL_SCF_FMEM, NULL));
3483 	}
3484 	scf_get_base_addr = (uint64_t (*)(void))
3485 	    modgetsymvalue("scf_get_base_addr", 0);
3486 	if (scf_get_base_addr == NULL) {
3487 		return (drerr_new(1, EOPL_SCF_FMEM, NULL));
3488 	}
3489 	s_mem = s_id;
3490 	t_mem = t_id;
3491 
3492 	s_bd = s_mem->dev.bp->bnum;
3493 	t_bd = t_mem->dev.bp->bnum;
3494 
3495 	/* calculate source and target base pa */
3496 
3497 	s_copybasepa = s_mem->slice_base;
3498 	t_copybasepa = t_mem->slice_base;
3499 
3500 	/* adjust copy memlist addresses to be relative to copy base pa */
3501 	x_ml = c_ml;
3502 	while (x_ml != NULL) {
3503 		x_ml->address -= s_copybasepa;
3504 		x_ml = x_ml->next;
3505 	}
3506 
3507 	/*
3508 	 * bp will be page aligned, since we're calling
3509 	 * kmem_zalloc() with an exact multiple of PAGESIZE.
3510 	 */
3511 
3512 	prog_kmem = (drmach_copy_rename_program_t *)kmem_zalloc(
3513 	    DRMACH_FMEM_LOCKED_PAGES * PAGESIZE, KM_SLEEP);
3514 
3515 	prog_kmem->prog = prog_kmem;
3516 
3517 	/*
3518 	 * To avoid MTLB hit, we allocate a new VM space and remap
3519 	 * the kmem_alloc buffer to that address.  This solves
3520 	 * 2 problems we found:
3521 	 * - the kmem_alloc buffer can be just a chunk inside
3522 	 *   a much larger, e.g. 4MB buffer and MTLB will occur
3523 	 *   if there are both a 4MB and a 8K TLB mapping to
3524 	 *   the same VA range.
3525 	 * - the kmem mapping got dropped into the TLB by other
3526 	 *   strands, unintentionally.
3527 	 * Note that the pointers like data, critical, memlist_buffer,
3528 	 * and stat inside the copy rename structure are mapped to this
3529 	 * alternate VM space so we must make sure we lock the TLB mapping
3530 	 * whenever we access data pointed to by these pointers.
3531 	 */
3532 
3533 	prog = prog_kmem->locked_prog = vmem_alloc(heap_arena,
3534 	    DRMACH_FMEM_LOCKED_PAGES * PAGESIZE, VM_SLEEP);
3535 	wp = bp = (caddr_t)prog;
3536 
3537 	/* Now remap prog_kmem to prog */
3538 	drmach_lock_critical((caddr_t)prog_kmem, (caddr_t)prog);
3539 
3540 	/* All pointers in prog are based on the alternate mapping */
3541 	prog->data = (drmach_copy_rename_data_t *)roundup(((uint64_t)prog +
3542 	    sizeof (drmach_copy_rename_program_t)), sizeof (void *));
3543 
3544 	ASSERT(((uint64_t)prog->data + sizeof (drmach_copy_rename_data_t))
3545 	    <= ((uint64_t)prog + PAGESIZE));
3546 
3547 	prog->critical = (drmach_copy_rename_critical_t *)
3548 	    (wp + DRMACH_FMEM_CRITICAL_PAGE * PAGESIZE);
3549 
3550 	prog->memlist_buffer = (caddr_t)(wp + DRMACH_FMEM_MLIST_PAGE *
3551 	    PAGESIZE);
3552 
3553 	prog->stat = (drmach_cr_stat_t *)(wp + DRMACH_FMEM_STAT_PAGE *
3554 	    PAGESIZE);
3555 
3556 	/* LINTED */
3557 	ASSERT(sizeof (drmach_cr_stat_t) <= ((DRMACH_FMEM_LOCKED_PAGES -
3558 	    DRMACH_FMEM_STAT_PAGE) * PAGESIZE));
3559 
3560 	prog->critical->scf_reg_base = (uint64_t)-1;
3561 	prog->critical->scf_td[0] = (s_bd & 0xff);
3562 	prog->critical->scf_td[1] = (t_bd & 0xff);
3563 	for (i = 2; i < 15; i++) {
3564 		prog->critical->scf_td[i]   = 0;
3565 	}
3566 	prog->critical->scf_td[15] = ((0xaa + s_bd + t_bd) & 0xff);
3567 
3568 	bp = (caddr_t)prog->critical;
3569 	len = sizeof (drmach_copy_rename_critical_t);
3570 	wp = (caddr_t)roundup((uint64_t)bp + len, sizeof (void *));
3571 
3572 	len = (uint_t)((ulong_t)drmach_copy_rename_end -
3573 	    (ulong_t)drmach_copy_rename_prog__relocatable);
3574 
3575 	/*
3576 	 * We always leave 1K nop's to prevent the processor from
3577 	 * speculative execution that causes memory access
3578 	 */
3579 	wp = wp + len + 1024;
3580 
3581 	len = (uint_t)((ulong_t)drmach_fmem_exec_script_end -
3582 	    (ulong_t)drmach_fmem_exec_script);
3583 	/* this is the entry point of the loop script */
3584 	wp = wp + len + 1024;
3585 
3586 	len = (uint_t)((ulong_t)drmach_fmem_exec_script -
3587 	    (ulong_t)drmach_fmem_loop_script);
3588 	wp = wp + len + 1024;
3589 
3590 	/* now we make sure there is 1K extra */
3591 
3592 	if ((wp - bp) > PAGESIZE) {
3593 		err = drerr_new(1, EOPL_FMEM_SETUP, NULL);
3594 		goto out;
3595 	}
3596 
3597 	bp = (caddr_t)prog->critical;
3598 	len = sizeof (drmach_copy_rename_critical_t);
3599 	wp = (caddr_t)roundup((uint64_t)bp + len, sizeof (void *));
3600 
3601 	prog->critical->run = (int (*)())(wp);
3602 	len = (uint_t)((ulong_t)drmach_copy_rename_end -
3603 	    (ulong_t)drmach_copy_rename_prog__relocatable);
3604 
3605 	bcopy((caddr_t)drmach_copy_rename_prog__relocatable, wp, len);
3606 
3607 	wp = (caddr_t)roundup((uint64_t)wp + len, 1024);
3608 
3609 	prog->critical->fmem = (int (*)())(wp);
3610 	len = (int)((ulong_t)drmach_fmem_exec_script_end -
3611 	    (ulong_t)drmach_fmem_exec_script);
3612 	bcopy((caddr_t)drmach_fmem_exec_script, wp, len);
3613 
3614 	len = (int)((ulong_t)drmach_fmem_exec_script_end -
3615 	    (ulong_t)drmach_fmem_exec_script);
3616 	wp = (caddr_t)roundup((uint64_t)wp + len, 1024);
3617 
3618 	prog->critical->loop = (int (*)())(wp);
3619 	len = (int)((ulong_t)drmach_fmem_exec_script -
3620 	    (ulong_t)drmach_fmem_loop_script);
3621 	bcopy((caddr_t)drmach_fmem_loop_script, (void *)wp, len);
3622 	len = (int)((ulong_t)drmach_fmem_loop_script_rtn-
3623 	    (ulong_t)drmach_fmem_loop_script);
3624 	prog->critical->loop_rtn = (void (*)()) (wp+len);
3625 
3626 	prog->data->fmem_status.error = ESBD_NOERROR;
3627 
3628 	/* now we are committed, call SCF, soft suspend mac patrol */
3629 	if ((*scf_fmem_start)(s_bd, t_bd)) {
3630 		err = drerr_new(1, EOPL_SCF_FMEM_START, NULL);
3631 		goto out;
3632 	}
3633 	prog->data->scf_fmem_end = scf_fmem_end;
3634 	prog->data->scf_fmem_cancel = scf_fmem_cancel;
3635 	prog->data->scf_get_base_addr = scf_get_base_addr;
3636 	prog->data->fmem_status.op |= OPL_FMEM_SCF_START;
3637 
3638 	/* soft suspend mac patrol */
3639 	(*mc_suspend)();
3640 	prog->data->fmem_status.op |= OPL_FMEM_MC_SUSPEND;
3641 	prog->data->mc_resume = mc_resume;
3642 
3643 	prog->critical->inst_loop_ret  =
3644 	    *(uint64_t *)(prog->critical->loop_rtn);
3645 
3646 	/*
3647 	 * 0x30800000 is op code "ba,a	+0"
3648 	 */
3649 
3650 	*(uint_t *)(prog->critical->loop_rtn) = (uint_t)(0x30800000);
3651 
3652 	/*
3653 	 * set the value of SCF FMEM TIMEOUT
3654 	 */
3655 	prog->critical->delay = fmem_timeout * system_clock_freq;
3656 
3657 	prog->data->s_mem = (drmachid_t)s_mem;
3658 	prog->data->t_mem = (drmachid_t)t_mem;
3659 
3660 	cpuid = CPU->cpu_id;
3661 	prog->data->cpuid = cpuid;
3662 	prog->data->cpu_ready_set = cpu_ready_set;
3663 	prog->data->cpu_slave_set = cpu_ready_set;
3664 	prog->data->slowest_cpuid = (processorid_t)-1;
3665 	prog->data->copy_wait_time = 0;
3666 	CPUSET_DEL(prog->data->cpu_slave_set, cpuid);
3667 
3668 	for (i = 0; i < NCPU; i++) {
3669 		prog->data->cpu_ml[i] = NULL;
3670 	}
3671 
3672 	active_cpus = 0;
3673 	if (drmach_disable_mcopy) {
3674 		active_cpus = 1;
3675 		CPUSET_ADD(prog->data->cpu_copy_set, cpuid);
3676 	} else {
3677 		for (i = 0; i < NCPU; i++) {
3678 			if (CPU_IN_SET(cpu_ready_set, i) &&
3679 			    CPU_ACTIVE(cpu[i])) {
3680 				CPUSET_ADD(prog->data->cpu_copy_set, i);
3681 				active_cpus++;
3682 			}
3683 		}
3684 	}
3685 
3686 	drmach_setup_memlist(prog);
3687 
3688 	x_ml = c_ml;
3689 	sz = 0;
3690 	while (x_ml != NULL) {
3691 		sz += x_ml->size;
3692 		x_ml = x_ml->next;
3693 	}
3694 
3695 	copy_sz = sz/active_cpus;
3696 	copy_sz = roundup(copy_sz, MMU_PAGESIZE4M);
3697 
3698 	while (sz > copy_sz*active_cpus) {
3699 		copy_sz += MMU_PAGESIZE4M;
3700 	}
3701 
3702 	prog->data->stick_freq = system_clock_freq;
3703 	prog->data->copy_delay = ((copy_sz / min_copy_size_per_sec) + 2) *
3704 	    system_clock_freq;
3705 
3706 	x_ml = c_ml;
3707 	c_addr = x_ml->address;
3708 	c_size = x_ml->size;
3709 
3710 	for (i = 0; i < NCPU; i++) {
3711 		prog->stat->nbytes[i] = 0;
3712 		if (!CPU_IN_SET(prog->data->cpu_copy_set, i)) {
3713 			continue;
3714 		}
3715 		sz = copy_sz;
3716 
3717 		while (sz) {
3718 			if (c_size > sz) {
3719 				prog->data->cpu_ml[i] =
3720 				    drmach_memlist_add_span(prog,
3721 				    prog->data->cpu_ml[i], c_addr, sz);
3722 				c_addr += sz;
3723 				c_size -= sz;
3724 				break;
3725 			} else {
3726 				sz -= c_size;
3727 				prog->data->cpu_ml[i] =
3728 				    drmach_memlist_add_span(prog,
3729 				    prog->data->cpu_ml[i], c_addr, c_size);
3730 				x_ml = x_ml->next;
3731 				if (x_ml != NULL) {
3732 					c_addr = x_ml->address;
3733 					c_size = x_ml->size;
3734 				} else {
3735 					goto end;
3736 				}
3737 			}
3738 		}
3739 	}
3740 end:
3741 	prog->data->s_copybasepa = s_copybasepa;
3742 	prog->data->t_copybasepa = t_copybasepa;
3743 	prog->data->c_ml = c_ml;
3744 	*pgm_id = prog_kmem;
3745 
3746 	/* Unmap the alternate space.  It will have to be remapped again */
3747 	drmach_unlock_critical((caddr_t)prog);
3748 	return (NULL);
3749 out:
3750 	if (prog != NULL) {
3751 		drmach_unlock_critical((caddr_t)prog);
3752 		vmem_free(heap_arena, prog, DRMACH_FMEM_LOCKED_PAGES *
3753 		    PAGESIZE);
3754 	}
3755 	if (prog_kmem != NULL) {
3756 		kmem_free(prog_kmem, DRMACH_FMEM_LOCKED_PAGES * PAGESIZE);
3757 	}
3758 	return (err);
3759 }
3760 
3761 sbd_error_t *
3762 drmach_copy_rename_fini(drmachid_t id)
3763 {
3764 	drmach_copy_rename_program_t	*prog = id;
3765 	sbd_error_t			*err = NULL;
3766 	int				rv;
3767 	uint_t				fmem_error;
3768 
3769 	/*
3770 	 * Note that we have to delay calling SCF to find out the
3771 	 * status of the FMEM operation here because SCF cannot
3772 	 * respond while it is suspended.
3773 	 * This create a small window when we are sure about the
3774 	 * base address of the system board.
3775 	 * If there is any call to mc-opl to get memory unum,
3776 	 * mc-opl will return UNKNOWN as the unum.
3777 	 */
3778 
3779 	/*
3780 	 * we have to remap again because all the pointer like data,
3781 	 * critical in prog are based on the alternate vmem space.
3782 	 */
3783 	(void) drmach_lock_critical((caddr_t)prog, (caddr_t)prog->locked_prog);
3784 
3785 	if (prog->data->c_ml != NULL)
3786 		memlist_delete(prog->data->c_ml);
3787 
3788 	if ((prog->data->fmem_status.op &
3789 	    (OPL_FMEM_SCF_START | OPL_FMEM_MC_SUSPEND)) !=
3790 	    (OPL_FMEM_SCF_START | OPL_FMEM_MC_SUSPEND)) {
3791 		cmn_err(CE_PANIC, "drmach_copy_rename_fini: invalid op "
3792 		    "code %x\n", prog->data->fmem_status.op);
3793 	}
3794 
3795 	fmem_error = prog->data->fmem_status.error;
3796 	if (fmem_error != ESBD_NOERROR) {
3797 		err = drerr_new(1, fmem_error, NULL);
3798 	}
3799 
3800 	/* possible ops are SCF_START, MC_SUSPEND */
3801 	if (prog->critical->fmem_issued) {
3802 		if (fmem_error != ESBD_NOERROR) {
3803 			cmn_err(CE_PANIC, "Irrecoverable FMEM error %d\n",
3804 			    fmem_error);
3805 		}
3806 		rv = (*prog->data->scf_fmem_end)();
3807 		if (rv) {
3808 			cmn_err(CE_PANIC, "scf_fmem_end() failed rv=%d", rv);
3809 		}
3810 		/*
3811 		 * If we get here, rename is successful.
3812 		 * Do all the copy rename post processing.
3813 		 */
3814 		drmach_swap_pa((drmach_mem_t *)prog->data->s_mem,
3815 		    (drmach_mem_t *)prog->data->t_mem);
3816 	} else {
3817 		rv = (*prog->data->scf_fmem_cancel)();
3818 		if (rv) {
3819 			cmn_err(CE_WARN, "scf_fmem_cancel() failed rv=0x%x",
3820 			    rv);
3821 			if (!err) {
3822 				err = drerr_new(1, EOPL_SCF_FMEM_CANCEL,
3823 				    "scf_fmem_cancel() failed. rv = 0x%x", rv);
3824 			}
3825 		}
3826 	}
3827 	/* soft resume mac patrol */
3828 	(*prog->data->mc_resume)();
3829 
3830 	drmach_unlock_critical((caddr_t)prog->locked_prog);
3831 
3832 	vmem_free(heap_arena, prog->locked_prog,
3833 	    DRMACH_FMEM_LOCKED_PAGES * PAGESIZE);
3834 	kmem_free(prog, DRMACH_FMEM_LOCKED_PAGES * PAGESIZE);
3835 	return (err);
3836 }
3837 
3838 /*ARGSUSED*/
3839 static void
3840 drmach_copy_rename_slave(struct regs *rp, drmachid_t id)
3841 {
3842 	drmach_copy_rename_program_t	*prog =
3843 	    (drmach_copy_rename_program_t *)id;
3844 	register int			cpuid;
3845 	extern void			drmach_flush();
3846 	extern void			membar_sync_il();
3847 	extern void			drmach_flush_icache();
3848 	on_trap_data_t			otd;
3849 
3850 	cpuid = CPU->cpu_id;
3851 
3852 	if (on_trap(&otd, OT_DATA_EC)) {
3853 		no_trap();
3854 		prog->data->error[cpuid] = EOPL_FMEM_COPY_ERROR;
3855 		prog->critical->stat[cpuid] = FMEM_LOOP_EXIT;
3856 		drmach_flush_icache();
3857 		membar_sync_il();
3858 		return;
3859 	}
3860 
3861 
3862 	/*
3863 	 * jmp drmach_copy_rename_prog().
3864 	 */
3865 
3866 	drmach_flush(prog->critical, PAGESIZE);
3867 	(void) prog->critical->run(prog, cpuid);
3868 	drmach_flush_icache();
3869 
3870 	no_trap();
3871 
3872 	prog->critical->stat[cpuid] = FMEM_LOOP_EXIT;
3873 
3874 	membar_sync_il();
3875 }
3876 
3877 static void
3878 drmach_swap_pa(drmach_mem_t *s_mem, drmach_mem_t *t_mem)
3879 {
3880 	uint64_t s_base, t_base;
3881 	drmach_board_t *s_board, *t_board;
3882 	struct memlist *ml;
3883 
3884 	s_board = s_mem->dev.bp;
3885 	t_board = t_mem->dev.bp;
3886 	if (s_board == NULL || t_board == NULL) {
3887 		cmn_err(CE_PANIC, "Cannot locate source or target board\n");
3888 		return;
3889 	}
3890 	s_base = s_mem->slice_base;
3891 	t_base = t_mem->slice_base;
3892 
3893 	s_mem->slice_base = t_base;
3894 	s_mem->base_pa = (s_mem->base_pa - s_base) + t_base;
3895 
3896 	for (ml = s_mem->memlist; ml; ml = ml->next) {
3897 		ml->address = ml->address - s_base + t_base;
3898 	}
3899 
3900 	t_mem->slice_base = s_base;
3901 	t_mem->base_pa = (t_mem->base_pa - t_base) + s_base;
3902 
3903 	for (ml = t_mem->memlist; ml; ml = ml->next) {
3904 		ml->address = ml->address - t_base + s_base;
3905 	}
3906 
3907 	/*
3908 	 * IKP has to update the sb-mem-ranges for mac patrol driver
3909 	 * when it resumes, it will re-read the sb-mem-range property
3910 	 * to get the new base address
3911 	 */
3912 	if (oplcfg_pa_swap(s_board->bnum, t_board->bnum) != 0)
3913 		cmn_err(CE_PANIC, "Could not update device nodes\n");
3914 }
3915 
3916 void
3917 drmach_copy_rename(drmachid_t id)
3918 {
3919 	drmach_copy_rename_program_t	*prog_kmem = id;
3920 	drmach_copy_rename_program_t	*prog;
3921 	cpuset_t	cpuset;
3922 	int		cpuid;
3923 	uint64_t	inst;
3924 	register int	rtn;
3925 	extern int	in_sync;
3926 	int		old_in_sync;
3927 	extern void	drmach_sys_trap();
3928 	extern void	drmach_flush();
3929 	extern void	drmach_flush_icache();
3930 	extern uint64_t	patch_inst(uint64_t *, uint64_t);
3931 	on_trap_data_t	otd;
3932 
3933 
3934 	prog = prog_kmem->locked_prog;
3935 
3936 
3937 	/*
3938 	 * We must immediately drop in the TLB because all pointers
3939 	 * are based on the alternate vmem space.
3940 	 */
3941 
3942 	(void) drmach_lock_critical((caddr_t)prog_kmem, (caddr_t)prog);
3943 
3944 	/*
3945 	 * we call scf to get the base address here becuase if scf
3946 	 * has not been suspended yet, the active path can be changing and
3947 	 * sometimes it is not even mapped.  We call the interface when
3948 	 * the OS has been quiesced.
3949 	 */
3950 	prog->critical->scf_reg_base = (*prog->data->scf_get_base_addr)();
3951 
3952 	if (prog->critical->scf_reg_base == (uint64_t)-1 ||
3953 	    prog->critical->scf_reg_base == NULL) {
3954 		prog->data->fmem_status.error = EOPL_FMEM_SCF_ERR;
3955 		drmach_unlock_critical((caddr_t)prog);
3956 		return;
3957 	}
3958 
3959 	cpuset = prog->data->cpu_ready_set;
3960 
3961 	for (cpuid = 0; cpuid < NCPU; cpuid++) {
3962 		if (CPU_IN_SET(cpuset, cpuid)) {
3963 			prog->critical->stat[cpuid] = FMEM_LOOP_START;
3964 			prog->data->error[cpuid] = ESBD_NOERROR;
3965 		}
3966 	}
3967 
3968 	old_in_sync = in_sync;
3969 	in_sync = 1;
3970 	cpuid = CPU->cpu_id;
3971 
3972 	CPUSET_DEL(cpuset, cpuid);
3973 
3974 	for (cpuid = 0; cpuid < NCPU; cpuid++) {
3975 		if (CPU_IN_SET(cpuset, cpuid)) {
3976 			xc_one(cpuid, (xcfunc_t *)drmach_lock_critical,
3977 			    (uint64_t)prog_kmem, (uint64_t)prog);
3978 		}
3979 	}
3980 
3981 	cpuid = CPU->cpu_id;
3982 
3983 	xt_some(cpuset, (xcfunc_t *)drmach_sys_trap,
3984 	    (uint64_t)drmach_copy_rename_slave, (uint64_t)prog);
3985 	xt_sync(cpuset);
3986 
3987 	if (on_trap(&otd, OT_DATA_EC)) {
3988 		rtn = EOPL_FMEM_COPY_ERROR;
3989 		drmach_flush_icache();
3990 		goto done;
3991 	}
3992 
3993 	/*
3994 	 * jmp drmach_copy_rename_prog().
3995 	 */
3996 
3997 	drmach_flush(prog->critical, PAGESIZE);
3998 	rtn = prog->critical->run(prog, cpuid);
3999 
4000 	drmach_flush_icache();
4001 
4002 
4003 done:
4004 	no_trap();
4005 	if (rtn == EOPL_FMEM_HW_ERROR) {
4006 		kpreempt_enable();
4007 		prom_panic("URGENT_ERROR_TRAP is detected during FMEM.\n");
4008 	}
4009 
4010 	/*
4011 	 * In normal case, all slave CPU's are still spinning in
4012 	 * the assembly code.  The master has to patch the instruction
4013 	 * to get them out.
4014 	 * In error case, e.g. COPY_ERROR, some slave CPU's might
4015 	 * have aborted and already returned and sset LOOP_EXIT status.
4016 	 * Some CPU might still be copying.
4017 	 * In any case, some delay is necessary to give them
4018 	 * enough time to set the LOOP_EXIT status.
4019 	 */
4020 
4021 	for (;;) {
4022 		inst = patch_inst((uint64_t *)prog->critical->loop_rtn,
4023 		    prog->critical->inst_loop_ret);
4024 		if (prog->critical->inst_loop_ret == inst) {
4025 			break;
4026 		}
4027 	}
4028 
4029 	for (cpuid = 0; cpuid < NCPU; cpuid++) {
4030 		uint64_t	last, now;
4031 		if (!CPU_IN_SET(cpuset, cpuid)) {
4032 			continue;
4033 		}
4034 		last = prog->stat->nbytes[cpuid];
4035 		/*
4036 		 * Wait for all CPU to exit.
4037 		 * However we do not want an infinite loop
4038 		 * so we detect hangup situation here.
4039 		 * If the slave CPU is still copying data,
4040 		 * we will continue to wait.
4041 		 * In error cases, the master has already set
4042 		 * fmem_status.error to abort the copying.
4043 		 * 1 m.s delay for them to abort copying and
4044 		 * return to drmach_copy_rename_slave to set
4045 		 * FMEM_LOOP_EXIT status should be enough.
4046 		 */
4047 		for (;;) {
4048 			if (prog->critical->stat[cpuid] == FMEM_LOOP_EXIT)
4049 				break;
4050 			drmach_sleep_il();
4051 			drv_usecwait(1000);
4052 			now = prog->stat->nbytes[cpuid];
4053 			if (now <= last) {
4054 				drv_usecwait(1000);
4055 				if (prog->critical->stat[cpuid] ==
4056 				    FMEM_LOOP_EXIT)
4057 					break;
4058 				cmn_err(CE_PANIC, "CPU %d hang during Copy "
4059 				    "Rename", cpuid);
4060 			}
4061 			last = now;
4062 		}
4063 		if (prog->data->error[cpuid] == EOPL_FMEM_HW_ERROR) {
4064 			prom_panic("URGENT_ERROR_TRAP is detected during "
4065 			    "FMEM.\n");
4066 		}
4067 	}
4068 
4069 	/*
4070 	 * This must be done after all strands have exit.
4071 	 * Removing the TLB entry will affect both strands
4072 	 * in the same core.
4073 	 */
4074 
4075 	for (cpuid = 0; cpuid < NCPU; cpuid++) {
4076 		if (CPU_IN_SET(cpuset, cpuid)) {
4077 			xc_one(cpuid, (xcfunc_t *)drmach_unlock_critical,
4078 			    (uint64_t)prog, 0);
4079 		}
4080 	}
4081 
4082 	in_sync = old_in_sync;
4083 
4084 	/*
4085 	 * we should unlock before the following lock to keep the kpreempt
4086 	 * count correct.
4087 	 */
4088 	(void) drmach_unlock_critical((caddr_t)prog);
4089 
4090 	/*
4091 	 * we must remap again.  TLB might have been removed in above xcall.
4092 	 */
4093 
4094 	(void) drmach_lock_critical((caddr_t)prog_kmem, (caddr_t)prog);
4095 
4096 	if (prog->data->fmem_status.error == ESBD_NOERROR)
4097 		prog->data->fmem_status.error = rtn;
4098 
4099 	if (prog->data->copy_wait_time > 0) {
4100 		DRMACH_PR("Unexpected long wait time %ld seconds "
4101 		    "during copy rename on CPU %d\n",
4102 		    prog->data->copy_wait_time/prog->data->stick_freq,
4103 		    prog->data->slowest_cpuid);
4104 	}
4105 	drmach_unlock_critical((caddr_t)prog);
4106 }
4107