xref: /titanic_51/usr/src/uts/sun4u/opl/io/drmach.c (revision bfe60e20c2f727eab7a71b13a2183a856ae0c22f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 
29 #include <sys/debug.h>
30 #include <sys/types.h>
31 #include <sys/varargs.h>
32 #include <sys/errno.h>
33 #include <sys/cred.h>
34 #include <sys/dditypes.h>
35 #include <sys/devops.h>
36 #include <sys/modctl.h>
37 #include <sys/poll.h>
38 #include <sys/conf.h>
39 #include <sys/ddi.h>
40 #include <sys/sunddi.h>
41 #include <sys/sunndi.h>
42 #include <sys/ndi_impldefs.h>
43 #include <sys/stat.h>
44 #include <sys/kmem.h>
45 #include <sys/vmem.h>
46 #include <sys/opl_olympus_regs.h>
47 #include <sys/cpuvar.h>
48 #include <sys/cpupart.h>
49 #include <sys/mem_config.h>
50 #include <sys/ddi_impldefs.h>
51 #include <sys/systm.h>
52 #include <sys/machsystm.h>
53 #include <sys/autoconf.h>
54 #include <sys/cmn_err.h>
55 #include <sys/sysmacros.h>
56 #include <sys/x_call.h>
57 #include <sys/promif.h>
58 #include <sys/prom_plat.h>
59 #include <sys/membar.h>
60 #include <vm/seg_kmem.h>
61 #include <sys/mem_cage.h>
62 #include <sys/stack.h>
63 #include <sys/archsystm.h>
64 #include <vm/hat_sfmmu.h>
65 #include <sys/pte.h>
66 #include <sys/mmu.h>
67 #include <sys/cpu_module.h>
68 #include <sys/obpdefs.h>
69 #include <sys/note.h>
70 #include <sys/ontrap.h>
71 #include <sys/cpu_sgnblk_defs.h>
72 #include <sys/opl.h>
73 
74 
75 #include <sys/promimpl.h>
76 #include <sys/prom_plat.h>
77 #include <sys/kobj.h>
78 
79 #include <sys/sysevent.h>
80 #include <sys/sysevent/dr.h>
81 #include <sys/sysevent/eventdefs.h>
82 
83 #include <sys/drmach.h>
84 #include <sys/dr_util.h>
85 
86 #include <sys/fcode.h>
87 #include <sys/opl_cfg.h>
88 
89 extern void		bcopy32_il(uint64_t, uint64_t);
90 extern void		flush_cache_il(void);
91 extern void		drmach_sleep_il(void);
92 
93 typedef struct {
94 	struct drmach_node	*node;
95 	void			*data;
96 } drmach_node_walk_args_t;
97 
98 typedef struct drmach_node {
99 	void		*here;
100 
101 	pnode_t		(*get_dnode)(struct drmach_node *node);
102 	int		(*walk)(struct drmach_node *node, void *data,
103 				int (*cb)(drmach_node_walk_args_t *args));
104 	dev_info_t	*(*n_getdip)(struct drmach_node *node);
105 	int		(*n_getproplen)(struct drmach_node *node, char *name,
106 				int *len);
107 	int		(*n_getprop)(struct drmach_node *node, char *name,
108 				void *buf, int len);
109 	int		(*get_parent)(struct drmach_node *node,
110 				struct drmach_node *pnode);
111 } drmach_node_t;
112 
113 typedef struct {
114 	int		 min_index;
115 	int		 max_index;
116 	int		 arr_sz;
117 	drmachid_t	*arr;
118 } drmach_array_t;
119 
120 typedef struct {
121 	void		*isa;
122 
123 	void		(*dispose)(drmachid_t);
124 	sbd_error_t	*(*release)(drmachid_t);
125 	sbd_error_t	*(*status)(drmachid_t, drmach_status_t *);
126 
127 	char		 name[MAXNAMELEN];
128 } drmach_common_t;
129 
130 typedef	struct {
131 	uint32_t	core_present;
132 	uint32_t	core_hotadded;
133 	uint32_t	core_started;
134 } drmach_cmp_t;
135 
136 typedef struct {
137 	drmach_common_t	 cm;
138 	int		 bnum;
139 	int		 assigned;
140 	int		 powered;
141 	int		 connected;
142 	int		 cond;
143 	drmach_node_t	*tree;
144 	drmach_array_t	*devices;
145 	int		boot_board;	/* if board exists on bootup */
146 	drmach_cmp_t	cores[OPL_MAX_COREID_PER_BOARD];
147 } drmach_board_t;
148 
149 typedef struct {
150 	drmach_common_t	 cm;
151 	drmach_board_t	*bp;
152 	int		 unum;
153 	int		portid;
154 	int		 busy;
155 	int		 powered;
156 	const char	*type;
157 	drmach_node_t	*node;
158 } drmach_device_t;
159 
160 typedef struct drmach_cpu {
161 	drmach_device_t  dev;
162 	processorid_t    cpuid;
163 	int		sb;
164 	int		chipid;
165 	int		coreid;
166 	int		strandid;
167 	int		status;
168 #define	OPL_CPU_HOTADDED	1
169 } drmach_cpu_t;
170 
171 typedef struct drmach_mem {
172 	drmach_device_t  dev;
173 	uint64_t	slice_base;
174 	uint64_t	slice_size;
175 	uint64_t	base_pa;	/* lowest installed memory base */
176 	uint64_t	nbytes;		/* size of installed memory */
177 	struct memlist *memlist;
178 } drmach_mem_t;
179 
180 typedef struct drmach_io {
181 	drmach_device_t  dev;
182 	int	channel;
183 	int	leaf;
184 } drmach_io_t;
185 
186 typedef struct drmach_domain_info {
187 	uint32_t	floating;
188 	int		allow_dr;
189 } drmach_domain_info_t;
190 
191 drmach_domain_info_t drmach_domain;
192 
193 typedef struct {
194 	int		 flags;
195 	drmach_device_t	*dp;
196 	sbd_error_t	*err;
197 	dev_info_t	*dip;
198 } drmach_config_args_t;
199 
200 typedef struct {
201 	drmach_board_t	*obj;
202 	int		 ndevs;
203 	void		*a;
204 	sbd_error_t	*(*found)(void *a, const char *, int, drmachid_t);
205 	sbd_error_t	*err;
206 } drmach_board_cb_data_t;
207 
208 static drmach_array_t	*drmach_boards;
209 
210 static sbd_error_t	*drmach_device_new(drmach_node_t *,
211 				drmach_board_t *, int, drmachid_t *);
212 static sbd_error_t	*drmach_cpu_new(drmach_device_t *, drmachid_t *);
213 static sbd_error_t	*drmach_mem_new(drmach_device_t *, drmachid_t *);
214 static sbd_error_t	*drmach_io_new(drmach_device_t *, drmachid_t *);
215 
216 static dev_info_t	*drmach_node_ddi_get_dip(drmach_node_t *np);
217 static int		 drmach_node_ddi_get_prop(drmach_node_t *np,
218 				char *name, void *buf, int len);
219 static int		 drmach_node_ddi_get_proplen(drmach_node_t *np,
220 				char *name, int *len);
221 
222 static int 		drmach_get_portid(drmach_node_t *);
223 static	sbd_error_t	*drmach_i_status(drmachid_t, drmach_status_t *);
224 static int		opl_check_dr_status();
225 static void		drmach_io_dispose(drmachid_t);
226 static sbd_error_t	*drmach_io_release(drmachid_t);
227 static sbd_error_t	*drmach_io_status(drmachid_t, drmach_status_t *);
228 static int 		drmach_init(void);
229 static void 		drmach_fini(void);
230 static void		drmach_swap_pa(drmach_mem_t *, drmach_mem_t *);
231 static drmach_board_t	*drmach_get_board_by_bnum(int);
232 
233 /* options for the second argument in drmach_add_remove_cpu() */
234 #define	HOTADD_CPU	1
235 #define	HOTREMOVE_CPU	2
236 
237 #define	ON_BOARD_CORE_NUM(x)	(((uint_t)(x) / OPL_MAX_STRANDID_PER_CORE) & \
238 	(OPL_MAX_COREID_PER_BOARD - 1))
239 
240 extern struct cpu	*SIGBCPU;
241 
242 static int		drmach_name2type_idx(char *);
243 static drmach_board_t	*drmach_board_new(int, int);
244 
245 #ifdef DEBUG
246 
247 #define	DRMACH_PR		if (drmach_debug) printf
248 int drmach_debug = 1;		 /* set to non-zero to enable debug messages */
249 #else
250 
251 #define	DRMACH_PR		_NOTE(CONSTANTCONDITION) if (0) printf
252 #endif /* DEBUG */
253 
254 
255 #define	DRMACH_OBJ(id)		((drmach_common_t *)id)
256 
257 #define	DRMACH_IS_BOARD_ID(id)	\
258 	((id != 0) &&		\
259 	(DRMACH_OBJ(id)->isa == (void *)drmach_board_new))
260 
261 #define	DRMACH_IS_CPU_ID(id)	\
262 	((id != 0) &&		\
263 	(DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new))
264 
265 #define	DRMACH_IS_MEM_ID(id)	\
266 	((id != 0) &&		\
267 	(DRMACH_OBJ(id)->isa == (void *)drmach_mem_new))
268 
269 #define	DRMACH_IS_IO_ID(id)	\
270 	((id != 0) &&		\
271 	(DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
272 
273 #define	DRMACH_IS_DEVICE_ID(id)					\
274 	((id != 0) &&						\
275 	(DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new ||	\
276 	    DRMACH_OBJ(id)->isa == (void *)drmach_mem_new ||	\
277 	    DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
278 
279 #define	DRMACH_IS_ID(id)					\
280 	((id != 0) &&						\
281 	(DRMACH_OBJ(id)->isa == (void *)drmach_board_new ||	\
282 	    DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new ||	\
283 	    DRMACH_OBJ(id)->isa == (void *)drmach_mem_new ||	\
284 	    DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
285 
286 #define	DRMACH_INTERNAL_ERROR() \
287 	drerr_new(1, EOPL_INTERNAL, drmach_ie_fmt, __LINE__)
288 
289 static char		*drmach_ie_fmt = "drmach.c %d";
290 
291 static struct {
292 	const char	*name;
293 	const char	*type;
294 	sbd_error_t	*(*new)(drmach_device_t *, drmachid_t *);
295 } drmach_name2type[] = {
296 	{ "cpu",	DRMACH_DEVTYPE_CPU,		drmach_cpu_new },
297 	{ "pseudo-mc",	DRMACH_DEVTYPE_MEM,		drmach_mem_new },
298 	{ "pci",	DRMACH_DEVTYPE_PCI,		drmach_io_new  },
299 };
300 
301 /* utility */
302 #define	MBYTE	(1048576ull)
303 
304 /*
305  * drmach autoconfiguration data structures and interfaces
306  */
307 
308 extern struct mod_ops mod_miscops;
309 
310 static struct modlmisc modlmisc = {
311 	&mod_miscops,
312 	"OPL DR 1.1"
313 };
314 
315 static struct modlinkage modlinkage = {
316 	MODREV_1,
317 	(void *)&modlmisc,
318 	NULL
319 };
320 
321 static krwlock_t drmach_boards_rwlock;
322 
323 typedef const char	*fn_t;
324 
325 int
326 _init(void)
327 {
328 	int err;
329 
330 	if ((err = drmach_init()) != 0) {
331 		return (err);
332 	}
333 
334 	if ((err = mod_install(&modlinkage)) != 0) {
335 		drmach_fini();
336 	}
337 
338 	return (err);
339 }
340 
341 int
342 _fini(void)
343 {
344 	int	err;
345 
346 	if ((err = mod_remove(&modlinkage)) == 0)
347 		drmach_fini();
348 
349 	return (err);
350 }
351 
352 int
353 _info(struct modinfo *modinfop)
354 {
355 	return (mod_info(&modlinkage, modinfop));
356 }
357 
358 struct drmach_mc_lookup {
359 	int	bnum;
360 	drmach_board_t	*bp;
361 	dev_info_t *dip;	/* rv - set if found */
362 };
363 
364 #define	_ptob64(p) ((uint64_t)(p) << PAGESHIFT)
365 #define	_b64top(b) ((pgcnt_t)((b) >> PAGESHIFT))
366 
367 static int
368 drmach_setup_mc_info(dev_info_t *dip, drmach_mem_t *mp)
369 {
370 	uint64_t	memory_ranges[128];
371 	int len;
372 	struct memlist	*ml;
373 	int rv;
374 	hwd_sb_t *hwd;
375 	hwd_memory_t *pm;
376 
377 	len = sizeof (memory_ranges);
378 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
379 		DDI_PROP_DONTPASS, "sb-mem-ranges",
380 	    (caddr_t)&memory_ranges[0], &len) != DDI_PROP_SUCCESS) {
381 		mp->slice_base = 0;
382 		mp->slice_size = 0;
383 		return (-1);
384 	}
385 	mp->slice_base = memory_ranges[0];
386 	mp->slice_size = memory_ranges[1];
387 
388 	if (!mp->dev.bp->boot_board) {
389 		int i;
390 
391 		rv = opl_read_hwd(mp->dev.bp->bnum, NULL,  NULL, NULL, &hwd);
392 
393 		if (rv != 0) {
394 			return (-1);
395 		}
396 
397 		ml = NULL;
398 		pm = &hwd->sb_cmu.cmu_memory;
399 		for (i = 0; i < HWD_MAX_MEM_CHUNKS; i++) {
400 			if (pm->mem_chunks[i].chnk_size > 0) {
401 				ml = memlist_add_span(ml,
402 					pm->mem_chunks[i].chnk_start_address,
403 					pm->mem_chunks[i].chnk_size);
404 			}
405 		}
406 	} else {
407 		/*
408 		 * we intersect phys_install to get base_pa.
409 		 * This only works at bootup time.
410 		 */
411 
412 		memlist_read_lock();
413 		ml = memlist_dup(phys_install);
414 		memlist_read_unlock();
415 
416 		ml = memlist_del_span(ml, 0ull, mp->slice_base);
417 		if (ml) {
418 			uint64_t basepa, endpa;
419 			endpa = _ptob64(physmax + 1);
420 
421 			basepa = mp->slice_base + mp->slice_size;
422 
423 			ml = memlist_del_span(ml, basepa, endpa - basepa);
424 		}
425 	}
426 
427 	if (ml) {
428 		uint64_t nbytes = 0;
429 		struct memlist *p;
430 		for (p = ml; p; p = p->next) {
431 			nbytes += p->size;
432 		}
433 		if ((mp->nbytes = nbytes) > 0)
434 			mp->base_pa = ml->address;
435 		else
436 			mp->base_pa = 0;
437 		mp->memlist = ml;
438 	} else {
439 		mp->base_pa = 0;
440 		mp->nbytes = 0;
441 	}
442 	return (0);
443 }
444 
445 
446 struct drmach_hotcpu {
447 	drmach_board_t *bp;
448 	int	bnum;
449 	int	core_id;
450 	int 	rv;
451 	int	option;
452 };
453 
454 static int
455 drmach_cpu_cb(dev_info_t *dip, void *arg)
456 {
457 	struct drmach_hotcpu *p = (struct drmach_hotcpu *)arg;
458 	char name[OBP_MAXDRVNAME];
459 	int len = OBP_MAXDRVNAME;
460 	int bnum, core_id, strand_id;
461 	drmach_board_t *bp;
462 
463 	if (dip == ddi_root_node()) {
464 		return (DDI_WALK_CONTINUE);
465 	}
466 
467 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
468 	    DDI_PROP_DONTPASS, "name",
469 	    (caddr_t)name, &len) != DDI_PROP_SUCCESS) {
470 		return (DDI_WALK_PRUNECHILD);
471 	}
472 
473 	/* only cmp has board number */
474 	bnum = -1;
475 	len = sizeof (bnum);
476 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
477 	    DDI_PROP_DONTPASS, OBP_BOARDNUM,
478 	    (caddr_t)&bnum, &len) != DDI_PROP_SUCCESS) {
479 		bnum = -1;
480 	}
481 
482 	if (strcmp(name, "cmp") == 0) {
483 		if (bnum != p->bnum)
484 			return (DDI_WALK_PRUNECHILD);
485 		return (DDI_WALK_CONTINUE);
486 	}
487 	/* we have already pruned all unwanted cores and cpu's above */
488 	if (strcmp(name, "core") == 0) {
489 		return (DDI_WALK_CONTINUE);
490 	}
491 	if (strcmp(name, "cpu") == 0) {
492 		processorid_t cpuid;
493 		len = sizeof (cpuid);
494 		if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
495 		    DDI_PROP_DONTPASS, "cpuid",
496 		    (caddr_t)&cpuid, &len) != DDI_PROP_SUCCESS) {
497 			p->rv = -1;
498 			return (DDI_WALK_TERMINATE);
499 		}
500 
501 		core_id = p->core_id;
502 
503 		bnum = LSB_ID(cpuid);
504 
505 		if (ON_BOARD_CORE_NUM(cpuid) != core_id)
506 			return (DDI_WALK_CONTINUE);
507 
508 		bp = p->bp;
509 		ASSERT(bnum == bp->bnum);
510 
511 		if (p->option == HOTADD_CPU) {
512 			if (prom_hotaddcpu(cpuid) != 0) {
513 				p->rv = -1;
514 				return (DDI_WALK_TERMINATE);
515 			}
516 			strand_id = STRAND_ID(cpuid);
517 			bp->cores[core_id].core_hotadded |= (1 << strand_id);
518 		} else if (p->option == HOTREMOVE_CPU) {
519 			if (prom_hotremovecpu(cpuid) != 0) {
520 				p->rv = -1;
521 				return (DDI_WALK_TERMINATE);
522 			}
523 			strand_id = STRAND_ID(cpuid);
524 			bp->cores[core_id].core_hotadded &= ~(1 << strand_id);
525 		}
526 		return (DDI_WALK_CONTINUE);
527 	}
528 
529 	return (DDI_WALK_PRUNECHILD);
530 }
531 
532 
533 static int
534 drmach_add_remove_cpu(int bnum, int core_id, int option)
535 {
536 	struct drmach_hotcpu arg;
537 	drmach_board_t *bp;
538 
539 	bp = drmach_get_board_by_bnum(bnum);
540 	ASSERT(bp);
541 
542 	arg.bp = bp;
543 	arg.bnum = bnum;
544 	arg.core_id = core_id;
545 	arg.rv = 0;
546 	arg.option = option;
547 	ddi_walk_devs(ddi_root_node(), drmach_cpu_cb, (void *)&arg);
548 	return (arg.rv);
549 }
550 
551 struct drmach_setup_core_arg {
552 	drmach_board_t *bp;
553 };
554 
555 static int
556 drmach_setup_core_cb(dev_info_t *dip, void *arg)
557 {
558 	struct drmach_setup_core_arg *p = (struct drmach_setup_core_arg *)arg;
559 	char name[OBP_MAXDRVNAME];
560 	int len = OBP_MAXDRVNAME;
561 	int bnum;
562 	int core_id, strand_id;
563 
564 	if (dip == ddi_root_node()) {
565 		return (DDI_WALK_CONTINUE);
566 	}
567 
568 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
569 	    DDI_PROP_DONTPASS, "name",
570 	    (caddr_t)name, &len) != DDI_PROP_SUCCESS) {
571 		return (DDI_WALK_PRUNECHILD);
572 	}
573 
574 	/* only cmp has board number */
575 	bnum = -1;
576 	len = sizeof (bnum);
577 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
578 	    DDI_PROP_DONTPASS, OBP_BOARDNUM,
579 	    (caddr_t)&bnum, &len) != DDI_PROP_SUCCESS) {
580 		bnum = -1;
581 	}
582 
583 	if (strcmp(name, "cmp") == 0) {
584 		if (bnum != p->bp->bnum)
585 			return (DDI_WALK_PRUNECHILD);
586 		return (DDI_WALK_CONTINUE);
587 	}
588 	/* we have already pruned all unwanted cores and cpu's above */
589 	if (strcmp(name, "core") == 0) {
590 		return (DDI_WALK_CONTINUE);
591 	}
592 	if (strcmp(name, "cpu") == 0) {
593 		processorid_t cpuid;
594 		len = sizeof (cpuid);
595 		if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
596 		    DDI_PROP_DONTPASS, "cpuid",
597 		    (caddr_t)&cpuid, &len) != DDI_PROP_SUCCESS) {
598 			return (DDI_WALK_TERMINATE);
599 		}
600 		bnum = LSB_ID(cpuid);
601 		ASSERT(bnum == p->bp->bnum);
602 		core_id = ON_BOARD_CORE_NUM(cpuid);
603 		strand_id = STRAND_ID(cpuid);
604 		p->bp->cores[core_id].core_present |= (1 << strand_id);
605 		return (DDI_WALK_CONTINUE);
606 	}
607 
608 	return (DDI_WALK_PRUNECHILD);
609 }
610 
611 
612 static void
613 drmach_setup_core_info(drmach_board_t *obj)
614 {
615 	struct drmach_setup_core_arg arg;
616 	int i;
617 
618 	for (i = 0; i < OPL_MAX_COREID_PER_BOARD; i++) {
619 		obj->cores[i].core_present = 0;
620 		obj->cores[i].core_hotadded = 0;
621 		obj->cores[i].core_started = 0;
622 	}
623 	arg.bp = obj;
624 	ddi_walk_devs(ddi_root_node(), drmach_setup_core_cb, (void *)&arg);
625 
626 	for (i = 0; i < OPL_MAX_COREID_PER_BOARD; i++) {
627 		if (obj->boot_board) {
628 			obj->cores[i].core_hotadded =
629 				obj->cores[i].core_started =
630 				obj->cores[i].core_present;
631 		}
632 	}
633 }
634 
635 /*
636  * drmach_node_* routines serve the purpose of separating the
637  * rest of the code from the device tree and OBP.  This is necessary
638  * because of In-Kernel-Probing.  Devices probed after stod, are probed
639  * by the in-kernel-prober, not OBP.  These devices, therefore, do not
640  * have dnode ids.
641  */
642 
643 typedef struct {
644 	drmach_node_walk_args_t	*nwargs;
645 	int 			(*cb)(drmach_node_walk_args_t *args);
646 	int			err;
647 } drmach_node_ddi_walk_args_t;
648 
649 static int
650 drmach_node_ddi_walk_cb(dev_info_t *dip, void *arg)
651 {
652 	drmach_node_ddi_walk_args_t	*nargs;
653 
654 	nargs = (drmach_node_ddi_walk_args_t *)arg;
655 
656 	/*
657 	 * dip doesn't have to be held here as we are called
658 	 * from ddi_walk_devs() which holds the dip.
659 	 */
660 	nargs->nwargs->node->here = (void *)dip;
661 
662 	nargs->err = nargs->cb(nargs->nwargs);
663 
664 
665 	/*
666 	 * Set "here" to NULL so that unheld dip is not accessible
667 	 * outside ddi_walk_devs()
668 	 */
669 	nargs->nwargs->node->here = NULL;
670 
671 	if (nargs->err)
672 		return (DDI_WALK_TERMINATE);
673 	else
674 		return (DDI_WALK_CONTINUE);
675 }
676 
677 static int
678 drmach_node_ddi_walk(drmach_node_t *np, void *data,
679 		int (*cb)(drmach_node_walk_args_t *args))
680 {
681 	drmach_node_walk_args_t		args;
682 	drmach_node_ddi_walk_args_t	nargs;
683 
684 
685 	/* initialized args structure for callback */
686 	args.node = np;
687 	args.data = data;
688 
689 	nargs.nwargs = &args;
690 	nargs.cb = cb;
691 	nargs.err = 0;
692 
693 	/*
694 	 * Root node doesn't have to be held in any way.
695 	 */
696 	ddi_walk_devs(ddi_root_node(), drmach_node_ddi_walk_cb,
697 		(void *)&nargs);
698 
699 	return (nargs.err);
700 }
701 
702 static int
703 drmach_node_ddi_get_parent(drmach_node_t *np, drmach_node_t *pp)
704 {
705 	dev_info_t	*ndip;
706 	static char	*fn = "drmach_node_ddi_get_parent";
707 
708 	ndip = np->n_getdip(np);
709 	if (ndip == NULL) {
710 		cmn_err(CE_WARN, "%s: NULL dip", fn);
711 		return (-1);
712 	}
713 
714 	bcopy(np, pp, sizeof (drmach_node_t));
715 
716 	pp->here = (void *)ddi_get_parent(ndip);
717 	if (pp->here == NULL) {
718 		cmn_err(CE_WARN, "%s: NULL parent dip", fn);
719 		return (-1);
720 	}
721 
722 	return (0);
723 }
724 
725 /*ARGSUSED*/
726 static pnode_t
727 drmach_node_ddi_get_dnode(drmach_node_t *np)
728 {
729 	return ((pnode_t)NULL);
730 }
731 
732 static drmach_node_t *
733 drmach_node_new(void)
734 {
735 	drmach_node_t *np;
736 
737 	np = kmem_zalloc(sizeof (drmach_node_t), KM_SLEEP);
738 
739 	np->get_dnode = drmach_node_ddi_get_dnode;
740 	np->walk = drmach_node_ddi_walk;
741 	np->n_getdip = drmach_node_ddi_get_dip;
742 	np->n_getproplen = drmach_node_ddi_get_proplen;
743 	np->n_getprop = drmach_node_ddi_get_prop;
744 	np->get_parent = drmach_node_ddi_get_parent;
745 
746 	return (np);
747 }
748 
749 static void
750 drmach_node_dispose(drmach_node_t *np)
751 {
752 	kmem_free(np, sizeof (*np));
753 }
754 
755 static dev_info_t *
756 drmach_node_ddi_get_dip(drmach_node_t *np)
757 {
758 	return ((dev_info_t *)np->here);
759 }
760 
761 static int
762 drmach_node_walk(drmach_node_t *np, void *param,
763 		int (*cb)(drmach_node_walk_args_t *args))
764 {
765 	return (np->walk(np, param, cb));
766 }
767 
768 static int
769 drmach_node_ddi_get_prop(drmach_node_t *np, char *name, void *buf, int len)
770 {
771 	int		rv = 0;
772 	dev_info_t	*ndip;
773 	static char	*fn = "drmach_node_ddi_get_prop";
774 
775 
776 	ndip = np->n_getdip(np);
777 	if (ndip == NULL) {
778 		cmn_err(CE_WARN, "%s: NULL dip", fn);
779 		rv = -1;
780 	} else if (ddi_getlongprop_buf(DDI_DEV_T_ANY, ndip,
781 	    DDI_PROP_DONTPASS, name,
782 	    (caddr_t)buf, &len) != DDI_PROP_SUCCESS) {
783 		rv = -1;
784 	}
785 
786 	return (rv);
787 }
788 
789 static int
790 drmach_node_ddi_get_proplen(drmach_node_t *np, char *name, int *len)
791 {
792 	int		rv = 0;
793 	dev_info_t	*ndip;
794 
795 	ndip = np->n_getdip(np);
796 	if (ndip == NULL) {
797 		rv = -1;
798 	} else if (ddi_getproplen(DDI_DEV_T_ANY, ndip, DDI_PROP_DONTPASS,
799 		name, len) != DDI_PROP_SUCCESS) {
800 		rv = -1;
801 	}
802 
803 	return (rv);
804 }
805 
806 static drmachid_t
807 drmach_node_dup(drmach_node_t *np)
808 {
809 	drmach_node_t *dup;
810 
811 	dup = drmach_node_new();
812 	dup->here = np->here;
813 	dup->get_dnode = np->get_dnode;
814 	dup->walk = np->walk;
815 	dup->n_getdip = np->n_getdip;
816 	dup->n_getproplen = np->n_getproplen;
817 	dup->n_getprop = np->n_getprop;
818 	dup->get_parent = np->get_parent;
819 
820 	return (dup);
821 }
822 
823 /*
824  * drmach_array provides convenient array construction, access,
825  * bounds checking and array destruction logic.
826  */
827 
828 static drmach_array_t *
829 drmach_array_new(int min_index, int max_index)
830 {
831 	drmach_array_t *arr;
832 
833 	arr = kmem_zalloc(sizeof (drmach_array_t), KM_SLEEP);
834 
835 	arr->arr_sz = (max_index - min_index + 1) * sizeof (void *);
836 	if (arr->arr_sz > 0) {
837 		arr->min_index = min_index;
838 		arr->max_index = max_index;
839 
840 		arr->arr = kmem_zalloc(arr->arr_sz, KM_SLEEP);
841 		return (arr);
842 	} else {
843 		kmem_free(arr, sizeof (*arr));
844 		return (0);
845 	}
846 }
847 
848 static int
849 drmach_array_set(drmach_array_t *arr, int idx, drmachid_t val)
850 {
851 	if (idx < arr->min_index || idx > arr->max_index)
852 		return (-1);
853 	else {
854 		arr->arr[idx - arr->min_index] = val;
855 		return (0);
856 	}
857 	/*NOTREACHED*/
858 }
859 
860 static int
861 drmach_array_get(drmach_array_t *arr, int idx, drmachid_t *val)
862 {
863 	if (idx < arr->min_index || idx > arr->max_index)
864 		return (-1);
865 	else {
866 		*val = arr->arr[idx - arr->min_index];
867 		return (0);
868 	}
869 	/*NOTREACHED*/
870 }
871 
872 static int
873 drmach_array_first(drmach_array_t *arr, int *idx, drmachid_t *val)
874 {
875 	int rv;
876 
877 	*idx = arr->min_index;
878 	while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
879 		*idx += 1;
880 
881 	return (rv);
882 }
883 
884 static int
885 drmach_array_next(drmach_array_t *arr, int *idx, drmachid_t *val)
886 {
887 	int rv;
888 
889 	*idx += 1;
890 	while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
891 		*idx += 1;
892 
893 	return (rv);
894 }
895 
896 static void
897 drmach_array_dispose(drmach_array_t *arr, void (*disposer)(drmachid_t))
898 {
899 	drmachid_t	val;
900 	int		idx;
901 	int		rv;
902 
903 	rv = drmach_array_first(arr, &idx, &val);
904 	while (rv == 0) {
905 		(*disposer)(val);
906 		rv = drmach_array_next(arr, &idx, &val);
907 	}
908 
909 	kmem_free(arr->arr, arr->arr_sz);
910 	kmem_free(arr, sizeof (*arr));
911 }
912 
913 static drmach_board_t *
914 drmach_get_board_by_bnum(int bnum)
915 {
916 	drmachid_t id;
917 
918 	if (drmach_array_get(drmach_boards, bnum, &id) == 0)
919 		return ((drmach_board_t *)id);
920 	else
921 		return (NULL);
922 }
923 
924 static pnode_t
925 drmach_node_get_dnode(drmach_node_t *np)
926 {
927 	return (np->get_dnode(np));
928 }
929 
930 /*ARGSUSED*/
931 sbd_error_t *
932 drmach_configure(drmachid_t id, int flags)
933 {
934 	drmach_device_t		*dp;
935 	sbd_error_t		*err = NULL;
936 	dev_info_t		*rdip;
937 	dev_info_t		*fdip = NULL;
938 
939 	if (DRMACH_IS_CPU_ID(id)) {
940 		return (NULL);
941 	}
942 	if (!DRMACH_IS_DEVICE_ID(id))
943 		return (drerr_new(0, EOPL_INAPPROP, NULL));
944 	dp = id;
945 	rdip = dp->node->n_getdip(dp->node);
946 
947 	ASSERT(rdip);
948 
949 	ASSERT(e_ddi_branch_held(rdip));
950 
951 	if (e_ddi_branch_configure(rdip, &fdip, 0) != 0) {
952 		char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
953 		dev_info_t *dip = (fdip != NULL) ? fdip : rdip;
954 
955 		(void) ddi_pathname(dip, path);
956 		err = drerr_new(1,  EOPL_DRVFAIL, path);
957 
958 		kmem_free(path, MAXPATHLEN);
959 
960 		/* If non-NULL, fdip is returned held and must be released */
961 		if (fdip != NULL)
962 			ddi_release_devi(fdip);
963 	}
964 
965 	return (err);
966 }
967 
968 
969 static sbd_error_t *
970 drmach_device_new(drmach_node_t *node,
971 	drmach_board_t *bp, int portid, drmachid_t *idp)
972 {
973 	int		 i;
974 	int		 rv;
975 	drmach_device_t	proto;
976 	sbd_error_t	*err;
977 	char		 name[OBP_MAXDRVNAME];
978 
979 	rv = node->n_getprop(node, "name", name, OBP_MAXDRVNAME);
980 	if (rv) {
981 		/* every node is expected to have a name */
982 		err = drerr_new(1, EOPL_GETPROP,
983 			"device node %s: property %s",
984 			ddi_node_name(node->n_getdip(node)), "name");
985 		return (err);
986 	}
987 
988 	/*
989 	 * The node currently being examined is not listed in the name2type[]
990 	 * array.  In this case, the node is no interest to drmach.  Both
991 	 * dp and err are initialized here to yield nothing (no device or
992 	 * error structure) for this case.
993 	 */
994 	i = drmach_name2type_idx(name);
995 
996 
997 	if (i < 0) {
998 		*idp = (drmachid_t)0;
999 		return (NULL);
1000 	}
1001 
1002 	/* device specific new function will set unum */
1003 
1004 	bzero(&proto, sizeof (proto));
1005 	proto.type = drmach_name2type[i].type;
1006 	proto.bp = bp;
1007 	proto.node = node;
1008 	proto.portid = portid;
1009 
1010 	return (drmach_name2type[i].new(&proto, idp));
1011 }
1012 
1013 static void
1014 drmach_device_dispose(drmachid_t id)
1015 {
1016 	drmach_device_t *self = id;
1017 
1018 	self->cm.dispose(id);
1019 }
1020 
1021 
1022 static drmach_board_t *
1023 drmach_board_new(int bnum, int boot_board)
1024 {
1025 	static sbd_error_t *drmach_board_release(drmachid_t);
1026 	static sbd_error_t *drmach_board_status(drmachid_t, drmach_status_t *);
1027 
1028 	drmach_board_t	*bp;
1029 
1030 	bp = kmem_zalloc(sizeof (drmach_board_t), KM_SLEEP);
1031 
1032 	bp->cm.isa = (void *)drmach_board_new;
1033 	bp->cm.release = drmach_board_release;
1034 	bp->cm.status = drmach_board_status;
1035 
1036 	(void) drmach_board_name(bnum, bp->cm.name, sizeof (bp->cm.name));
1037 
1038 	bp->bnum = bnum;
1039 	bp->devices = NULL;
1040 	bp->connected = boot_board;
1041 	bp->tree = drmach_node_new();
1042 	bp->assigned = boot_board;
1043 	bp->powered = boot_board;
1044 	bp->boot_board = boot_board;
1045 
1046 	/*
1047 	 * If this is not bootup initialization, we have to wait till
1048 	 * IKP sets up the device nodes in drmach_board_connect().
1049 	 */
1050 	if (boot_board)
1051 		drmach_setup_core_info(bp);
1052 
1053 	drmach_array_set(drmach_boards, bnum, bp);
1054 	return (bp);
1055 }
1056 
1057 static void
1058 drmach_board_dispose(drmachid_t id)
1059 {
1060 	drmach_board_t *bp;
1061 
1062 	ASSERT(DRMACH_IS_BOARD_ID(id));
1063 	bp = id;
1064 
1065 	if (bp->tree)
1066 		drmach_node_dispose(bp->tree);
1067 
1068 	if (bp->devices)
1069 		drmach_array_dispose(bp->devices, drmach_device_dispose);
1070 
1071 	kmem_free(bp, sizeof (*bp));
1072 }
1073 
1074 static sbd_error_t *
1075 drmach_board_status(drmachid_t id, drmach_status_t *stat)
1076 {
1077 	sbd_error_t	*err = NULL;
1078 	drmach_board_t	*bp;
1079 
1080 	if (!DRMACH_IS_BOARD_ID(id))
1081 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1082 	bp = id;
1083 
1084 	stat->assigned = bp->assigned;
1085 	stat->powered = bp->powered;
1086 	stat->busy = 0;			/* assume not busy */
1087 	stat->configured = 0;		/* assume not configured */
1088 	stat->empty = 0;
1089 	stat->cond = bp->cond = SBD_COND_OK;
1090 	strncpy(stat->type, "System Brd", sizeof (stat->type));
1091 	stat->info[0] = '\0';
1092 
1093 	if (bp->devices) {
1094 		int		 rv;
1095 		int		 d_idx;
1096 		drmachid_t	 d_id;
1097 
1098 		rv = drmach_array_first(bp->devices, &d_idx, &d_id);
1099 		while (rv == 0) {
1100 			drmach_status_t	d_stat;
1101 
1102 			err = drmach_i_status(d_id, &d_stat);
1103 			if (err)
1104 				break;
1105 
1106 			stat->busy |= d_stat.busy;
1107 			stat->configured |= d_stat.configured;
1108 
1109 			rv = drmach_array_next(bp->devices, &d_idx, &d_id);
1110 		}
1111 	}
1112 
1113 	return (err);
1114 }
1115 
1116 int
1117 drmach_board_is_floating(drmachid_t id)
1118 {
1119 	drmach_board_t *bp;
1120 
1121 	if (!DRMACH_IS_BOARD_ID(id))
1122 		return (0);
1123 
1124 	bp = (drmach_board_t *)id;
1125 
1126 	return ((drmach_domain.floating & (1 << bp->bnum)) ? 1 : 0);
1127 }
1128 
1129 static int
1130 drmach_init(void)
1131 {
1132 	dev_info_t	*rdip;
1133 	int		i, rv, len;
1134 	int		*floating;
1135 
1136 	rw_init(&drmach_boards_rwlock, NULL, RW_DEFAULT, NULL);
1137 
1138 	drmach_boards = drmach_array_new(0, MAX_BOARDS - 1);
1139 
1140 	rdip = ddi_root_node();
1141 
1142 	if (ddi_getproplen(DDI_DEV_T_ANY, rdip, DDI_PROP_DONTPASS,
1143 		"floating-boards", &len) != DDI_PROP_SUCCESS) {
1144 		cmn_err(CE_WARN, "Cannot get floating-boards proplen\n");
1145 	} else {
1146 		floating = (int *)kmem_alloc(len, KM_SLEEP);
1147 		rv = ddi_prop_op(DDI_DEV_T_ANY, rdip,
1148 			PROP_LEN_AND_VAL_BUF, DDI_PROP_DONTPASS,
1149 			"floating-boards", (caddr_t)floating, &len);
1150 		if (rv != DDI_PROP_SUCCESS) {
1151 			cmn_err(CE_WARN, "Cannot get floating-boards prop\n");
1152 		} else {
1153 			drmach_domain.floating = 0;
1154 			for (i = 0; i < len / sizeof (int); i++) {
1155 				drmach_domain.floating |= (1 << floating[i]);
1156 			}
1157 		}
1158 		kmem_free(floating, len);
1159 	}
1160 	drmach_domain.allow_dr = opl_check_dr_status();
1161 
1162 	rdip = ddi_get_child(ddi_root_node());
1163 	do {
1164 		int		 bnum;
1165 		drmachid_t	 id;
1166 
1167 		bnum = -1;
1168 		bnum = ddi_getprop(DDI_DEV_T_ANY, rdip,
1169 			DDI_PROP_DONTPASS, OBP_BOARDNUM, -1);
1170 		if (bnum == -1)
1171 			continue;
1172 
1173 		if (drmach_array_get(drmach_boards, bnum, &id) == -1) {
1174 			cmn_err(CE_WARN, "Device node 0x%p has"
1175 				" invalid property value, %s=%d",
1176 					rdip, OBP_BOARDNUM, bnum);
1177 			goto error;
1178 		} else if (id == NULL) {
1179 			(void) drmach_board_new(bnum, 1);
1180 		}
1181 	} while ((rdip = ddi_get_next_sibling(rdip)) != NULL);
1182 
1183 	opl_hold_devtree();
1184 
1185 	/*
1186 	 * Initialize the IKP feature.
1187 	 *
1188 	 * This can be done only after DR has acquired a hold on all the
1189 	 * device nodes that are interesting to IKP.
1190 	 */
1191 	if (opl_init_cfg() != 0) {
1192 		cmn_err(CE_WARN, "DR - IKP initialization failed");
1193 
1194 		opl_release_devtree();
1195 
1196 		goto error;
1197 	}
1198 
1199 	return (0);
1200 error:
1201 	drmach_array_dispose(drmach_boards, drmach_board_dispose);
1202 	rw_destroy(&drmach_boards_rwlock);
1203 	return (ENXIO);
1204 }
1205 
1206 static void
1207 drmach_fini(void)
1208 {
1209 	rw_enter(&drmach_boards_rwlock, RW_WRITER);
1210 	drmach_array_dispose(drmach_boards, drmach_board_dispose);
1211 	drmach_boards = NULL;
1212 	rw_exit(&drmach_boards_rwlock);
1213 
1214 	/*
1215 	 * Walk immediate children of the root devinfo node
1216 	 * releasing holds acquired on branches in drmach_init()
1217 	 */
1218 
1219 	opl_release_devtree();
1220 
1221 	rw_destroy(&drmach_boards_rwlock);
1222 }
1223 
1224 /*
1225  *	Each system board contains 2 Oberon PCI bridge and
1226  *	1 CMUCH.
1227  *	Each oberon has 2 channels.
1228  *	Each channel has 2 pci-ex leaf.
1229  *	Each CMUCH has 1 pci bus.
1230  *
1231  *
1232  *	Device Path:
1233  *	/pci@<portid>,reg
1234  *
1235  *	where
1236  *	portid[10] = 0
1237  *	portid[9:0] = LLEAF_ID[9:0] of the Oberon Channel
1238  *
1239  *	LLEAF_ID[9:8] = 0
1240  *	LLEAF_ID[8:4] = LSB_ID[4:0]
1241  *	LLEAF_ID[3:1] = IO Channel#[2:0] (0,1,2,3 for Oberon)
1242  *			channel 4 is pcicmu
1243  *	LLEAF_ID[0] = PCI Leaf Number (0 for leaf-A, 1 for leaf-B)
1244  *
1245  *	Properties:
1246  *	name = pci
1247  *	device_type = "pciex"
1248  *	board# = LSBID
1249  *	reg = int32 * 2, Oberon CSR space of the leaf and the UBC space
1250  *	portid = Jupiter Bus Device ID ((LSB_ID << 3)|pciport#)
1251  */
1252 
1253 static sbd_error_t *
1254 drmach_io_new(drmach_device_t *proto, drmachid_t *idp)
1255 {
1256 	drmach_io_t	*ip;
1257 
1258 	int		 portid;
1259 
1260 	portid = proto->portid;
1261 	ASSERT(portid != -1);
1262 	proto->unum = portid & (MAX_IO_UNITS_PER_BOARD - 1);
1263 
1264 	ip = kmem_zalloc(sizeof (drmach_io_t), KM_SLEEP);
1265 	bcopy(proto, &ip->dev, sizeof (ip->dev));
1266 	ip->dev.node = drmach_node_dup(proto->node);
1267 	ip->dev.cm.isa = (void *)drmach_io_new;
1268 	ip->dev.cm.dispose = drmach_io_dispose;
1269 	ip->dev.cm.release = drmach_io_release;
1270 	ip->dev.cm.status = drmach_io_status;
1271 	ip->channel = (portid >> 1) & 0x7;
1272 	ip->leaf = (portid & 0x1);
1273 
1274 	snprintf(ip->dev.cm.name, sizeof (ip->dev.cm.name), "%s%d",
1275 		ip->dev.type, ip->dev.unum);
1276 
1277 	*idp = (drmachid_t)ip;
1278 	return (NULL);
1279 }
1280 
1281 
1282 static void
1283 drmach_io_dispose(drmachid_t id)
1284 {
1285 	drmach_io_t *self;
1286 
1287 	ASSERT(DRMACH_IS_IO_ID(id));
1288 
1289 	self = id;
1290 	if (self->dev.node)
1291 		drmach_node_dispose(self->dev.node);
1292 
1293 	kmem_free(self, sizeof (*self));
1294 }
1295 
1296 /*ARGSUSED*/
1297 sbd_error_t *
1298 drmach_pre_op(int cmd, drmachid_t id, drmach_opts_t *opts)
1299 {
1300 	drmach_board_t	*bp = (drmach_board_t *)id;
1301 	sbd_error_t	*err = NULL;
1302 
1303 	/* allow status and ncm operations to always succeed */
1304 	if ((cmd == SBD_CMD_STATUS) || (cmd == SBD_CMD_GETNCM)) {
1305 		return (NULL);
1306 	}
1307 
1308 	/* check all other commands for the required option string */
1309 
1310 	if ((opts->size > 0) && (opts->copts != NULL)) {
1311 
1312 		DRMACH_PR("platform options: %s\n", opts->copts);
1313 
1314 		if (strstr(opts->copts, "opldr") == NULL) {
1315 			err = drerr_new(1, EOPL_SUPPORT, NULL);
1316 		}
1317 	} else {
1318 		err = drerr_new(1, EOPL_SUPPORT, NULL);
1319 	}
1320 
1321 	if (!err && id && DRMACH_IS_BOARD_ID(id)) {
1322 		switch (cmd) {
1323 			case SBD_CMD_TEST:
1324 			case SBD_CMD_STATUS:
1325 			case SBD_CMD_GETNCM:
1326 				break;
1327 			case SBD_CMD_CONNECT:
1328 				if (bp->connected)
1329 					err = drerr_new(0, ESBD_STATE, NULL);
1330 				else if (!drmach_domain.allow_dr)
1331 					err = drerr_new(1, EOPL_SUPPORT,
1332 						NULL);
1333 				break;
1334 			case SBD_CMD_DISCONNECT:
1335 				if (!bp->connected)
1336 					err = drerr_new(0, ESBD_STATE, NULL);
1337 				else if (!drmach_domain.allow_dr)
1338 					err = drerr_new(1, EOPL_SUPPORT,
1339 						NULL);
1340 				break;
1341 			default:
1342 				if (!drmach_domain.allow_dr)
1343 					err = drerr_new(1, EOPL_SUPPORT,
1344 						NULL);
1345 				break;
1346 
1347 		}
1348 	}
1349 
1350 	return (err);
1351 }
1352 
1353 /*ARGSUSED*/
1354 sbd_error_t *
1355 drmach_post_op(int cmd, drmachid_t id, drmach_opts_t *opts)
1356 {
1357 	return (NULL);
1358 }
1359 
1360 sbd_error_t *
1361 drmach_board_assign(int bnum, drmachid_t *id)
1362 {
1363 	sbd_error_t	*err = NULL;
1364 
1365 	rw_enter(&drmach_boards_rwlock, RW_WRITER);
1366 
1367 	if (drmach_array_get(drmach_boards, bnum, id) == -1) {
1368 		err = drerr_new(1, EOPL_BNUM, "%d", bnum);
1369 	} else {
1370 		drmach_board_t	*bp;
1371 
1372 		if (*id)
1373 			rw_downgrade(&drmach_boards_rwlock);
1374 
1375 		bp = *id;
1376 		if (!(*id))
1377 			bp = *id  =
1378 				(drmachid_t)drmach_board_new(bnum, 0);
1379 		bp->assigned = 1;
1380 	}
1381 
1382 	rw_exit(&drmach_boards_rwlock);
1383 
1384 	return (err);
1385 }
1386 
1387 /*ARGSUSED*/
1388 sbd_error_t *
1389 drmach_board_connect(drmachid_t id, drmach_opts_t *opts)
1390 {
1391 	drmach_board_t	*obj = (drmach_board_t *)id;
1392 
1393 	if (!DRMACH_IS_BOARD_ID(id))
1394 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1395 
1396 	if (opl_probe_sb(obj->bnum) != 0)
1397 		return (drerr_new(0, EOPL_PROBE, NULL));
1398 
1399 	(void) prom_attach_notice(obj->bnum);
1400 
1401 	drmach_setup_core_info(obj);
1402 
1403 	obj->connected = 1;
1404 
1405 	return (NULL);
1406 }
1407 
1408 static int drmach_cache_flush_flag[NCPU];
1409 
1410 /*ARGSUSED*/
1411 static void
1412 drmach_flush_cache(uint64_t id, uint64_t dummy)
1413 {
1414 	extern void cpu_flush_ecache(void);
1415 
1416 	cpu_flush_ecache();
1417 	drmach_cache_flush_flag[id] = 0;
1418 }
1419 
1420 static void
1421 drmach_flush_all()
1422 {
1423 	cpuset_t	xc_cpuset;
1424 	int		i;
1425 
1426 	xc_cpuset = cpu_ready_set;
1427 	for (i = 0; i < NCPU; i++) {
1428 		if (CPU_IN_SET(xc_cpuset, i)) {
1429 			drmach_cache_flush_flag[i] = 1;
1430 			xc_one(i, drmach_flush_cache, i, 0);
1431 			while (drmach_cache_flush_flag[i]) {
1432 				DELAY(1000);
1433 			}
1434 		}
1435 	}
1436 }
1437 
1438 static int
1439 drmach_disconnect_cpus(drmach_board_t *bp)
1440 {
1441 	int i, bnum;
1442 
1443 	bnum = bp->bnum;
1444 
1445 	for (i = 0; i < OPL_MAX_COREID_PER_BOARD; i++) {
1446 	    if (bp->cores[i].core_present) {
1447 		if (bp->cores[i].core_started)
1448 		    return (-1);
1449 		if (bp->cores[i].core_hotadded) {
1450 		    if (drmach_add_remove_cpu(bnum, i, HOTREMOVE_CPU)) {
1451 			cmn_err(CE_WARN,
1452 			    "Failed to remove CMP %d on board %d\n",
1453 			    i, bnum);
1454 			return (-1);
1455 		    }
1456 		}
1457 	    }
1458 	}
1459 	return (0);
1460 }
1461 
1462 /*ARGSUSED*/
1463 sbd_error_t *
1464 drmach_board_disconnect(drmachid_t id, drmach_opts_t *opts)
1465 {
1466 	drmach_board_t *obj;
1467 	int rv = 0;
1468 	sbd_error_t		*err = NULL;
1469 
1470 
1471 	if (!DRMACH_IS_BOARD_ID(id))
1472 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1473 
1474 
1475 
1476 	obj = (drmach_board_t *)id;
1477 
1478 	if (drmach_disconnect_cpus(obj)) {
1479 		err = drerr_new(0, EOPL_DEPROBE, obj->cm.name);
1480 		return (err);
1481 	}
1482 
1483 	rv = opl_unprobe_sb(obj->bnum);
1484 
1485 	if (rv == 0) {
1486 		prom_detach_notice(obj->bnum);
1487 		obj->connected = 0;
1488 
1489 	} else
1490 		err = drerr_new(0, EOPL_DEPROBE, obj->cm.name);
1491 
1492 	return (err);
1493 }
1494 
1495 static int
1496 drmach_get_portid(drmach_node_t *np)
1497 {
1498 	int		portid;
1499 	char		type[OBP_MAXPROPNAME];
1500 
1501 	if (np->n_getprop(np, "portid", &portid, sizeof (portid)) == 0)
1502 		return (portid);
1503 
1504 	/*
1505 	 * Get the device_type property to see if we should
1506 	 * continue processing this node.
1507 	 */
1508 	if (np->n_getprop(np, "device_type", &type, sizeof (type)) != 0)
1509 		return (-1);
1510 
1511 	if (strcmp(type, OPL_CPU_NODE) == 0) {
1512 		/*
1513 		 * We return cpuid because it has no portid
1514 		 */
1515 		if (np->n_getprop(np, "cpuid", &portid, sizeof (portid)) == 0)
1516 			return (portid);
1517 	}
1518 
1519 	return (-1);
1520 }
1521 
1522 /*
1523  * This is a helper function to determine if a given
1524  * node should be considered for a dr operation according
1525  * to predefined dr type nodes and the node's name.
1526  * Formal Parameter : The name of a device node.
1527  * Return Value: -1, name does not map to a valid dr type.
1528  *		 A value greater or equal to 0, name is a valid dr type.
1529  */
1530 static int
1531 drmach_name2type_idx(char *name)
1532 {
1533 	int 	index, ntypes;
1534 
1535 	if (name == NULL)
1536 		return (-1);
1537 
1538 	/*
1539 	 * Determine how many possible types are currently supported
1540 	 * for dr.
1541 	 */
1542 	ntypes = sizeof (drmach_name2type) / sizeof (drmach_name2type[0]);
1543 
1544 	/* Determine if the node's name correspond to a predefined type. */
1545 	for (index = 0; index < ntypes; index++) {
1546 		if (strcmp(drmach_name2type[index].name, name) == 0)
1547 			/* The node is an allowed type for dr. */
1548 			return (index);
1549 	}
1550 
1551 	/*
1552 	 * If the name of the node does not map to any of the
1553 	 * types in the array drmach_name2type then the node is not of
1554 	 * interest to dr.
1555 	 */
1556 	return (-1);
1557 }
1558 
1559 /*
1560  * there is some complication on OPL:
1561  * - pseudo-mc nodes do not have portid property
1562  * - portid[9:5] of cmp node is LSB #, portid[7:3] of pci is LSB#
1563  * - cmp has board#
1564  * - core and cpu nodes do not have portid and board# properties
1565  * starcat uses portid to derive the board# but that does not work
1566  * for us.  starfire reads board# property to filter the devices.
1567  * That does not work either.  So for these specific device,
1568  * we use specific hard coded methods to get the board# -
1569  * cpu: LSB# = CPUID[9:5]
1570  */
1571 
1572 static int
1573 drmach_board_find_devices_cb(drmach_node_walk_args_t *args)
1574 {
1575 	drmach_node_t			*node = args->node;
1576 	drmach_board_cb_data_t		*data = args->data;
1577 	drmach_board_t			*obj = data->obj;
1578 
1579 	int		rv, portid;
1580 	int		bnum;
1581 	drmachid_t	id;
1582 	drmach_device_t	*device;
1583 	char name[OBP_MAXDRVNAME];
1584 
1585 	portid = drmach_get_portid(node);
1586 	/*
1587 	 * core, cpu and pseudo-mc do not have portid
1588 	 * we use cpuid as the portid of the cpu node
1589 	 * for pseudo-mc, we do not use portid info.
1590 	 */
1591 
1592 	rv = node->n_getprop(node, "name", name, OBP_MAXDRVNAME);
1593 	if (rv)
1594 		return (0);
1595 
1596 
1597 	rv = node->n_getprop(node, OBP_BOARDNUM, &bnum, sizeof (bnum));
1598 
1599 	if (rv) {
1600 		/*
1601 		 * cpu does not have board# property.  We use
1602 		 * CPUID[9:5]
1603 		 */
1604 		if (strcmp("cpu", name) == 0) {
1605 			bnum = (portid >> 5) & 0x1f;
1606 		} else
1607 			return (0);
1608 	}
1609 
1610 
1611 	if (bnum != obj->bnum)
1612 		return (0);
1613 
1614 	if (drmach_name2type_idx(name) < 0) {
1615 		return (0);
1616 	}
1617 
1618 	/*
1619 	 * Create a device data structure from this node data.
1620 	 * The call may yield nothing if the node is not of interest
1621 	 * to drmach.
1622 	 */
1623 	data->err = drmach_device_new(node, obj, portid, &id);
1624 	if (data->err)
1625 		return (-1);
1626 	else if (!id) {
1627 		/*
1628 		 * drmach_device_new examined the node we passed in
1629 		 * and determined that it was one not of interest to
1630 		 * drmach.  So, it is skipped.
1631 		 */
1632 		return (0);
1633 	}
1634 
1635 	rv = drmach_array_set(obj->devices, data->ndevs++, id);
1636 	if (rv) {
1637 		data->err = DRMACH_INTERNAL_ERROR();
1638 		return (-1);
1639 	}
1640 	device = id;
1641 
1642 	data->err = (*data->found)(data->a, device->type, device->unum, id);
1643 	return (data->err == NULL ? 0 : -1);
1644 }
1645 
1646 sbd_error_t *
1647 drmach_board_find_devices(drmachid_t id, void *a,
1648 	sbd_error_t *(*found)(void *a, const char *, int, drmachid_t))
1649 {
1650 	drmach_board_t		*bp = (drmach_board_t *)id;
1651 	sbd_error_t		*err;
1652 	int			 max_devices;
1653 	int			 rv;
1654 	drmach_board_cb_data_t	data;
1655 
1656 
1657 	if (!DRMACH_IS_BOARD_ID(id))
1658 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1659 
1660 	max_devices  = MAX_CPU_UNITS_PER_BOARD;
1661 	max_devices += MAX_MEM_UNITS_PER_BOARD;
1662 	max_devices += MAX_IO_UNITS_PER_BOARD;
1663 
1664 	bp->devices = drmach_array_new(0, max_devices);
1665 
1666 	if (bp->tree == NULL)
1667 		bp->tree = drmach_node_new();
1668 
1669 	data.obj = bp;
1670 	data.ndevs = 0;
1671 	data.found = found;
1672 	data.a = a;
1673 	data.err = NULL;
1674 
1675 	rv = drmach_node_walk(bp->tree, &data, drmach_board_find_devices_cb);
1676 	if (rv == 0)
1677 		err = NULL;
1678 	else {
1679 		drmach_array_dispose(bp->devices, drmach_device_dispose);
1680 		bp->devices = NULL;
1681 
1682 		if (data.err)
1683 			err = data.err;
1684 		else
1685 			err = DRMACH_INTERNAL_ERROR();
1686 	}
1687 
1688 	return (err);
1689 }
1690 
1691 int
1692 drmach_board_lookup(int bnum, drmachid_t *id)
1693 {
1694 	int	rv = 0;
1695 
1696 	rw_enter(&drmach_boards_rwlock, RW_READER);
1697 	if (drmach_array_get(drmach_boards, bnum, id)) {
1698 		*id = 0;
1699 		rv = -1;
1700 	}
1701 	rw_exit(&drmach_boards_rwlock);
1702 	return (rv);
1703 }
1704 
1705 sbd_error_t *
1706 drmach_board_name(int bnum, char *buf, int buflen)
1707 {
1708 	snprintf(buf, buflen, "SB%d", bnum);
1709 	return (NULL);
1710 }
1711 
1712 sbd_error_t *
1713 drmach_board_poweroff(drmachid_t id)
1714 {
1715 	drmach_board_t	*bp;
1716 	sbd_error_t	*err;
1717 	drmach_status_t	 stat;
1718 
1719 	if (!DRMACH_IS_BOARD_ID(id))
1720 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1721 	bp = id;
1722 
1723 	err = drmach_board_status(id, &stat);
1724 
1725 	if (!err) {
1726 		if (stat.configured || stat.busy)
1727 			err = drerr_new(0, EOPL_CONFIGBUSY, bp->cm.name);
1728 		else {
1729 			bp->powered = 0;
1730 		}
1731 	}
1732 	return (err);
1733 }
1734 
1735 sbd_error_t *
1736 drmach_board_poweron(drmachid_t id)
1737 {
1738 	drmach_board_t	*bp;
1739 
1740 	if (!DRMACH_IS_BOARD_ID(id))
1741 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1742 	bp = id;
1743 
1744 	bp->powered = 1;
1745 
1746 	return (NULL);
1747 }
1748 
1749 static sbd_error_t *
1750 drmach_board_release(drmachid_t id)
1751 {
1752 	if (!DRMACH_IS_BOARD_ID(id))
1753 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1754 	return (NULL);
1755 }
1756 
1757 /*ARGSUSED*/
1758 sbd_error_t *
1759 drmach_board_test(drmachid_t id, drmach_opts_t *opts, int force)
1760 {
1761 	return (NULL);
1762 }
1763 
1764 sbd_error_t *
1765 drmach_board_unassign(drmachid_t id)
1766 {
1767 	drmach_board_t	*bp;
1768 	sbd_error_t	*err;
1769 	drmach_status_t	 stat;
1770 
1771 
1772 	if (!DRMACH_IS_BOARD_ID(id)) {
1773 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1774 	}
1775 	bp = id;
1776 
1777 	rw_enter(&drmach_boards_rwlock, RW_WRITER);
1778 
1779 	err = drmach_board_status(id, &stat);
1780 	if (err) {
1781 		rw_exit(&drmach_boards_rwlock);
1782 		return (err);
1783 	}
1784 	if (stat.configured || stat.busy) {
1785 		err = drerr_new(0, EOPL_CONFIGBUSY, bp->cm.name);
1786 	} else {
1787 		if (drmach_array_set(drmach_boards, bp->bnum, 0) != 0)
1788 			err = DRMACH_INTERNAL_ERROR();
1789 		else
1790 			drmach_board_dispose(bp);
1791 	}
1792 	rw_exit(&drmach_boards_rwlock);
1793 	return (err);
1794 }
1795 
1796 /*
1797  * We have to do more on OPL - e.g. set up sram tte, read cpuid, strand id,
1798  * implementation #, etc
1799  */
1800 
1801 static sbd_error_t *
1802 drmach_cpu_new(drmach_device_t *proto, drmachid_t *idp)
1803 {
1804 	static void drmach_cpu_dispose(drmachid_t);
1805 	static sbd_error_t *drmach_cpu_release(drmachid_t);
1806 	static sbd_error_t *drmach_cpu_status(drmachid_t, drmach_status_t *);
1807 
1808 	int		 portid;
1809 	drmach_cpu_t	*cp = NULL;
1810 
1811 	/* portid is CPUID of the node */
1812 	portid = proto->portid;
1813 	ASSERT(portid != -1);
1814 
1815 	/* unum = (CMP/CHIP ID) + (ON_BOARD_CORE_NUM * MAX_CMPID_PER_BOARD) */
1816 	proto->unum = ((portid/OPL_MAX_CPUID_PER_CMP) &
1817 		(OPL_MAX_CMPID_PER_BOARD - 1)) +
1818 		((portid & (OPL_MAX_CPUID_PER_CMP - 1)) *
1819 		(OPL_MAX_CMPID_PER_BOARD));
1820 
1821 	cp = kmem_zalloc(sizeof (drmach_cpu_t), KM_SLEEP);
1822 	bcopy(proto, &cp->dev, sizeof (cp->dev));
1823 	cp->dev.node = drmach_node_dup(proto->node);
1824 	cp->dev.cm.isa = (void *)drmach_cpu_new;
1825 	cp->dev.cm.dispose = drmach_cpu_dispose;
1826 	cp->dev.cm.release = drmach_cpu_release;
1827 	cp->dev.cm.status = drmach_cpu_status;
1828 
1829 	snprintf(cp->dev.cm.name, sizeof (cp->dev.cm.name), "%s%d",
1830 		cp->dev.type, cp->dev.unum);
1831 
1832 /*
1833  *	CPU ID representation
1834  *	CPUID[9:5] = SB#
1835  *	CPUID[4:3] = Chip#
1836  *	CPUID[2:1] = Core# (Only 2 core for OPL)
1837  *	CPUID[0:0] = Strand#
1838  */
1839 
1840 /*
1841  *	reg property of the strand contains strand ID
1842  *	reg property of the parent node contains core ID
1843  *	We should use them.
1844  */
1845 	cp->cpuid = portid;
1846 	cp->sb = (portid >> 5) & 0x1f;
1847 	cp->chipid = (portid >> 3) & 0x3;
1848 	cp->coreid = (portid >> 1) & 0x3;
1849 	cp->strandid = portid & 0x1;
1850 
1851 	*idp = (drmachid_t)cp;
1852 	return (NULL);
1853 }
1854 
1855 
1856 static void
1857 drmach_cpu_dispose(drmachid_t id)
1858 {
1859 	drmach_cpu_t	*self;
1860 
1861 	ASSERT(DRMACH_IS_CPU_ID(id));
1862 
1863 	self = id;
1864 	if (self->dev.node)
1865 		drmach_node_dispose(self->dev.node);
1866 
1867 	kmem_free(self, sizeof (*self));
1868 }
1869 
1870 static int
1871 drmach_cpu_start(struct cpu *cp)
1872 {
1873 	int		cpuid = cp->cpu_id;
1874 	extern int	restart_other_cpu(int);
1875 
1876 	ASSERT(MUTEX_HELD(&cpu_lock));
1877 	ASSERT(cpunodes[cpuid].nodeid != (pnode_t)0);
1878 
1879 	cp->cpu_flags &= ~CPU_POWEROFF;
1880 
1881 	/*
1882 	 * NOTE: restart_other_cpu pauses cpus during the
1883 	 *	 slave cpu start.  This helps to quiesce the
1884 	 *	 bus traffic a bit which makes the tick sync
1885 	 *	 routine in the prom more robust.
1886 	 */
1887 	DRMACH_PR("COLD START for cpu (%d)\n", cpuid);
1888 
1889 	restart_other_cpu(cpuid);
1890 
1891 	return (0);
1892 }
1893 
1894 static sbd_error_t *
1895 drmach_cpu_release(drmachid_t id)
1896 {
1897 	if (!DRMACH_IS_CPU_ID(id))
1898 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1899 
1900 	return (NULL);
1901 }
1902 
1903 static sbd_error_t *
1904 drmach_cpu_status(drmachid_t id, drmach_status_t *stat)
1905 {
1906 	drmach_cpu_t *cp;
1907 	drmach_device_t *dp;
1908 
1909 	ASSERT(DRMACH_IS_CPU_ID(id));
1910 	cp = (drmach_cpu_t *)id;
1911 	dp = &cp->dev;
1912 
1913 	stat->assigned = dp->bp->assigned;
1914 	stat->powered = dp->bp->powered;
1915 	mutex_enter(&cpu_lock);
1916 	stat->configured = (cpu_get(cp->cpuid) != NULL);
1917 	mutex_exit(&cpu_lock);
1918 	stat->busy = dp->busy;
1919 	strncpy(stat->type, dp->type, sizeof (stat->type));
1920 	stat->info[0] = '\0';
1921 
1922 	return (NULL);
1923 }
1924 
1925 sbd_error_t *
1926 drmach_cpu_disconnect(drmachid_t id)
1927 {
1928 
1929 	if (!DRMACH_IS_CPU_ID(id))
1930 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1931 
1932 	return (NULL);
1933 }
1934 
1935 sbd_error_t *
1936 drmach_cpu_get_id(drmachid_t id, processorid_t *cpuid)
1937 {
1938 	drmach_cpu_t *cpu;
1939 
1940 	if (!DRMACH_IS_CPU_ID(id))
1941 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1942 	cpu = (drmach_cpu_t *)id;
1943 
1944 	/* get from cpu directly on OPL */
1945 	*cpuid = cpu->cpuid;
1946 	return (NULL);
1947 }
1948 
1949 sbd_error_t *
1950 drmach_cpu_get_impl(drmachid_t id, int *ip)
1951 {
1952 	drmach_device_t *cpu;
1953 	drmach_node_t	*np;
1954 	drmach_node_t	pp;
1955 	int		impl;
1956 	char		type[OBP_MAXPROPNAME];
1957 
1958 	if (!DRMACH_IS_CPU_ID(id))
1959 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1960 
1961 	cpu = id;
1962 	np = cpu->node;
1963 
1964 	if (np->get_parent(np, &pp) != 0) {
1965 		return (DRMACH_INTERNAL_ERROR());
1966 	}
1967 
1968 	/* the parent should be core */
1969 
1970 	if (pp.n_getprop(&pp, "device_type", &type, sizeof (type)) != 0) {
1971 		return (drerr_new(0, EOPL_GETPROP, NULL));
1972 	}
1973 
1974 	if (strcmp(type, OPL_CORE_NODE) == 0) {
1975 		if (pp.n_getprop(&pp, "implementation#",
1976 			&impl, sizeof (impl)) != 0) {
1977 			return (drerr_new(0, EOPL_GETPROP, NULL));
1978 		}
1979 	} else {
1980 		return (DRMACH_INTERNAL_ERROR());
1981 	}
1982 
1983 	*ip = impl;
1984 
1985 	return (NULL);
1986 }
1987 
1988 sbd_error_t *
1989 drmach_get_dip(drmachid_t id, dev_info_t **dip)
1990 {
1991 	drmach_device_t	*dp;
1992 
1993 	if (!DRMACH_IS_DEVICE_ID(id))
1994 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1995 	dp = id;
1996 
1997 	*dip = dp->node->n_getdip(dp->node);
1998 	return (NULL);
1999 }
2000 
2001 sbd_error_t *
2002 drmach_io_is_attached(drmachid_t id, int *yes)
2003 {
2004 	drmach_device_t *dp;
2005 	dev_info_t	*dip;
2006 	int		state;
2007 
2008 	if (!DRMACH_IS_IO_ID(id))
2009 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2010 	dp = id;
2011 
2012 	dip = dp->node->n_getdip(dp->node);
2013 	if (dip == NULL) {
2014 		*yes = 0;
2015 		return (NULL);
2016 	}
2017 
2018 	state = ddi_get_devstate(dip);
2019 	*yes = ((i_ddi_node_state(dip) >= DS_ATTACHED) ||
2020 	    (state == DDI_DEVSTATE_UP));
2021 
2022 	return (NULL);
2023 }
2024 
2025 struct drmach_io_cb {
2026 	char	*name;	/* name of the node */
2027 	int	(*func)(dev_info_t *);
2028 	int	rv;
2029 	dev_info_t *dip;
2030 };
2031 
2032 #define	DRMACH_IO_POST_ATTACH	0
2033 #define	DRMACH_IO_PRE_RELEASE	1
2034 
2035 static int
2036 drmach_io_cb_check(dev_info_t *dip, void *arg)
2037 {
2038 	struct drmach_io_cb *p = (struct drmach_io_cb *)arg;
2039 	char name[OBP_MAXDRVNAME];
2040 	int len = OBP_MAXDRVNAME;
2041 
2042 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
2043 		DDI_PROP_DONTPASS, "name",
2044 	    (caddr_t)name, &len) != DDI_PROP_SUCCESS) {
2045 		return (DDI_WALK_PRUNECHILD);
2046 	}
2047 
2048 	if (strcmp(name, p->name) == 0) {
2049 		ndi_hold_devi(dip);
2050 		p->dip = dip;
2051 		return (DDI_WALK_TERMINATE);
2052 	}
2053 
2054 	return (DDI_WALK_CONTINUE);
2055 }
2056 
2057 
2058 static int
2059 drmach_console_ops(drmachid_t *id, int state)
2060 {
2061 	drmach_io_t *obj = (drmach_io_t *)id;
2062 	struct drmach_io_cb arg;
2063 	int (*msudetp)(dev_info_t *);
2064 	int (*msuattp)(dev_info_t *);
2065 	dev_info_t *dip, *pdip;
2066 	int circ;
2067 
2068 	/* 4 is pcicmu channel */
2069 	if (obj->channel != 4)
2070 		return (0);
2071 
2072 	arg.name = "serial";
2073 	arg.func = NULL;
2074 	if (state == DRMACH_IO_PRE_RELEASE) {
2075 		msudetp = (int (*)(dev_info_t *))
2076 		    modgetsymvalue("oplmsu_dr_detach", 0);
2077 		if (msudetp != NULL)
2078 			arg.func = msudetp;
2079 	} else if (state == DRMACH_IO_POST_ATTACH) {
2080 		msuattp = (int (*)(dev_info_t *))
2081 		    modgetsymvalue("oplmsu_dr_attach", 0);
2082 		if (msuattp != NULL)
2083 			arg.func = msuattp;
2084 	} else {
2085 		return (0);
2086 	}
2087 
2088 	if (arg.func == NULL) {
2089 		return (0);
2090 	}
2091 
2092 	arg.rv = 0;
2093 	arg.dip = NULL;
2094 
2095 	dip = obj->dev.node->n_getdip(obj->dev.node);
2096 	if (pdip = ddi_get_parent(dip)) {
2097 		ndi_hold_devi(pdip);
2098 		ndi_devi_enter(pdip, &circ);
2099 	} else {
2100 		/* this cannot happen unless something bad happens */
2101 		return (-1);
2102 	}
2103 
2104 	ddi_walk_devs(dip, drmach_io_cb_check, (void *)&arg);
2105 
2106 	ndi_devi_exit(pdip, circ);
2107 	ndi_rele_devi(pdip);
2108 
2109 	if (arg.dip) {
2110 		arg.rv = (*arg.func)(arg.dip);
2111 		ndi_rele_devi(arg.dip);
2112 	} else {
2113 		arg.rv = -1;
2114 	}
2115 
2116 	return (arg.rv);
2117 }
2118 
2119 sbd_error_t *
2120 drmach_io_pre_release(drmachid_t id)
2121 {
2122 	int rv;
2123 
2124 	if (!DRMACH_IS_IO_ID(id))
2125 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2126 
2127 	rv = drmach_console_ops(id, DRMACH_IO_PRE_RELEASE);
2128 
2129 	if (rv != 0)
2130 		cmn_err(CE_WARN, "IO callback failed in pre-release\n");
2131 
2132 	return (NULL);
2133 }
2134 
2135 static sbd_error_t *
2136 drmach_io_release(drmachid_t id)
2137 {
2138 	if (!DRMACH_IS_IO_ID(id))
2139 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2140 	return (NULL);
2141 }
2142 
2143 sbd_error_t *
2144 drmach_io_unrelease(drmachid_t id)
2145 {
2146 	if (!DRMACH_IS_IO_ID(id))
2147 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2148 	return (NULL);
2149 }
2150 
2151 /*ARGSUSED*/
2152 sbd_error_t *
2153 drmach_io_post_release(drmachid_t id)
2154 {
2155 	return (NULL);
2156 }
2157 
2158 /*ARGSUSED*/
2159 sbd_error_t *
2160 drmach_io_post_attach(drmachid_t id)
2161 {
2162 	int rv;
2163 
2164 	if (!DRMACH_IS_IO_ID(id))
2165 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2166 
2167 	rv = drmach_console_ops(id, DRMACH_IO_POST_ATTACH);
2168 
2169 	if (rv != 0)
2170 		cmn_err(CE_WARN, "IO callback failed in post-attach\n");
2171 
2172 	return (0);
2173 }
2174 
2175 static sbd_error_t *
2176 drmach_io_status(drmachid_t id, drmach_status_t *stat)
2177 {
2178 	drmach_device_t *dp;
2179 	sbd_error_t	*err;
2180 	int		 configured;
2181 
2182 	ASSERT(DRMACH_IS_IO_ID(id));
2183 	dp = id;
2184 
2185 	err = drmach_io_is_attached(id, &configured);
2186 	if (err)
2187 		return (err);
2188 
2189 	stat->assigned = dp->bp->assigned;
2190 	stat->powered = dp->bp->powered;
2191 	stat->configured = (configured != 0);
2192 	stat->busy = dp->busy;
2193 	strncpy(stat->type, dp->type, sizeof (stat->type));
2194 	stat->info[0] = '\0';
2195 
2196 	return (NULL);
2197 }
2198 
2199 static sbd_error_t *
2200 drmach_mem_new(drmach_device_t *proto, drmachid_t *idp)
2201 {
2202 	static void drmach_mem_dispose(drmachid_t);
2203 	static sbd_error_t *drmach_mem_release(drmachid_t);
2204 	static sbd_error_t *drmach_mem_status(drmachid_t, drmach_status_t *);
2205 	dev_info_t *dip;
2206 	int rv;
2207 
2208 	drmach_mem_t	*mp;
2209 
2210 	rv = 0;
2211 
2212 	if ((proto->node->n_getproplen(proto->node, "mc-addr", &rv) < 0) ||
2213 		(rv <= 0)) {
2214 		*idp = (drmachid_t)0;
2215 		return (NULL);
2216 	}
2217 
2218 	mp = kmem_zalloc(sizeof (drmach_mem_t), KM_SLEEP);
2219 	proto->unum = 0;
2220 
2221 	bcopy(proto, &mp->dev, sizeof (mp->dev));
2222 	mp->dev.node = drmach_node_dup(proto->node);
2223 	mp->dev.cm.isa = (void *)drmach_mem_new;
2224 	mp->dev.cm.dispose = drmach_mem_dispose;
2225 	mp->dev.cm.release = drmach_mem_release;
2226 	mp->dev.cm.status = drmach_mem_status;
2227 
2228 	snprintf(mp->dev.cm.name,
2229 		sizeof (mp->dev.cm.name), "%s", mp->dev.type);
2230 
2231 	dip = mp->dev.node->n_getdip(mp->dev.node);
2232 	if (drmach_setup_mc_info(dip, mp) != 0) {
2233 		return (drerr_new(0, EOPL_MC_SETUP, NULL));
2234 	}
2235 
2236 	/* make sure we do not create memoryless nodes */
2237 	if (mp->nbytes == 0) {
2238 		*idp = (drmachid_t)NULL;
2239 		kmem_free(mp, sizeof (drmach_mem_t));
2240 	} else
2241 		*idp = (drmachid_t)mp;
2242 
2243 	return (NULL);
2244 }
2245 
2246 static void
2247 drmach_mem_dispose(drmachid_t id)
2248 {
2249 	drmach_mem_t *mp;
2250 
2251 	ASSERT(DRMACH_IS_MEM_ID(id));
2252 
2253 
2254 	mp = id;
2255 
2256 	if (mp->dev.node)
2257 		drmach_node_dispose(mp->dev.node);
2258 
2259 	if (mp->memlist) {
2260 		memlist_delete(mp->memlist);
2261 		mp->memlist = NULL;
2262 	}
2263 
2264 	kmem_free(mp, sizeof (*mp));
2265 }
2266 
2267 sbd_error_t *
2268 drmach_mem_add_span(drmachid_t id, uint64_t basepa, uint64_t size)
2269 {
2270 	pfn_t		basepfn = (pfn_t)(basepa >> PAGESHIFT);
2271 	pgcnt_t		npages = (pgcnt_t)(size >> PAGESHIFT);
2272 	int		rv;
2273 
2274 	ASSERT(size != 0);
2275 
2276 	if (!DRMACH_IS_MEM_ID(id))
2277 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2278 
2279 	kcage_range_lock();
2280 	rv = kcage_range_add(basepfn, npages, 1);
2281 	kcage_range_unlock();
2282 	if (rv == ENOMEM) {
2283 		cmn_err(CE_WARN, "%ld megabytes not available to kernel cage",
2284 			(size == 0 ? 0 : size / MBYTE));
2285 	} else if (rv != 0) {
2286 		/* catch this in debug kernels */
2287 		ASSERT(0);
2288 
2289 		cmn_err(CE_WARN, "unexpected kcage_range_add"
2290 			" return value %d", rv);
2291 	}
2292 
2293 	if (rv) {
2294 		return (DRMACH_INTERNAL_ERROR());
2295 	}
2296 	else
2297 		return (NULL);
2298 }
2299 
2300 sbd_error_t *
2301 drmach_mem_del_span(drmachid_t id, uint64_t basepa, uint64_t size)
2302 {
2303 	pfn_t		basepfn = (pfn_t)(basepa >> PAGESHIFT);
2304 	pgcnt_t		npages = (pgcnt_t)(size >> PAGESHIFT);
2305 	int		rv;
2306 
2307 	if (!DRMACH_IS_MEM_ID(id))
2308 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2309 
2310 	if (size > 0) {
2311 		kcage_range_lock();
2312 		rv = kcage_range_delete_post_mem_del(basepfn, npages);
2313 		kcage_range_unlock();
2314 		if (rv != 0) {
2315 			cmn_err(CE_WARN,
2316 			    "unexpected kcage_range_delete_post_mem_del"
2317 			    " return value %d", rv);
2318 			return (DRMACH_INTERNAL_ERROR());
2319 		}
2320 	}
2321 
2322 	return (NULL);
2323 }
2324 
2325 sbd_error_t *
2326 drmach_mem_disable(drmachid_t id)
2327 {
2328 	if (!DRMACH_IS_MEM_ID(id))
2329 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2330 	else {
2331 		drmach_flush_all();
2332 		return (NULL);
2333 	}
2334 }
2335 
2336 sbd_error_t *
2337 drmach_mem_enable(drmachid_t id)
2338 {
2339 	if (!DRMACH_IS_MEM_ID(id))
2340 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2341 	else
2342 		return (NULL);
2343 }
2344 
2345 sbd_error_t *
2346 drmach_mem_get_info(drmachid_t id, drmach_mem_info_t *mem)
2347 {
2348 	drmach_mem_t *mp;
2349 
2350 	if (!DRMACH_IS_MEM_ID(id))
2351 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2352 
2353 	mp = (drmach_mem_t *)id;
2354 
2355 	/*
2356 	 * This is only used by dr to round up/down the memory
2357 	 * for copying. Our unit of memory isolation is 64 MB.
2358 	 */
2359 
2360 	mem->mi_alignment_mask = (64 * 1024 * 1024 - 1);
2361 	mem->mi_basepa = mp->base_pa;
2362 	mem->mi_size = mp->nbytes;
2363 	mem->mi_slice_size = mp->slice_size;
2364 
2365 	return (NULL);
2366 }
2367 
2368 sbd_error_t *
2369 drmach_mem_get_base_physaddr(drmachid_t id, uint64_t *pa)
2370 {
2371 	drmach_mem_t *mp;
2372 
2373 	if (!DRMACH_IS_MEM_ID(id))
2374 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2375 
2376 	mp = (drmach_mem_t *)id;
2377 
2378 	*pa = mp->base_pa;
2379 	return (NULL);
2380 }
2381 
2382 sbd_error_t *
2383 drmach_mem_get_memlist(drmachid_t id, struct memlist **ml)
2384 {
2385 	drmach_mem_t	*mem;
2386 	int		rv;
2387 	struct memlist	*mlist;
2388 
2389 	if (!DRMACH_IS_MEM_ID(id))
2390 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2391 
2392 	mem = (drmach_mem_t *)id;
2393 	mlist = memlist_dup(mem->memlist);
2394 
2395 #ifdef DEBUG
2396 	/*
2397 	 * Make sure the incoming memlist doesn't already
2398 	 * intersect with what's present in the system (phys_install).
2399 	 */
2400 	memlist_read_lock();
2401 	rv = memlist_intersect(phys_install, mlist);
2402 	memlist_read_unlock();
2403 	if (rv) {
2404 		DRMACH_PR("Derived memlist intersects"
2405 			" with phys_install\n");
2406 		memlist_dump(mlist);
2407 
2408 		DRMACH_PR("phys_install memlist:\n");
2409 		memlist_dump(phys_install);
2410 
2411 		memlist_delete(mlist);
2412 		return (DRMACH_INTERNAL_ERROR());
2413 	}
2414 
2415 	DRMACH_PR("Derived memlist:");
2416 	memlist_dump(mlist);
2417 #endif
2418 	*ml = mlist;
2419 
2420 	return (NULL);
2421 }
2422 
2423 sbd_error_t *
2424 drmach_mem_get_slice_size(drmachid_t id, uint64_t *bytes)
2425 {
2426 	drmach_mem_t	*mem;
2427 
2428 	if (!DRMACH_IS_MEM_ID(id))
2429 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2430 
2431 	mem = (drmach_mem_t *)id;
2432 
2433 	*bytes = mem->slice_size;
2434 
2435 	return (NULL);
2436 }
2437 
2438 
2439 /* ARGSUSED */
2440 processorid_t
2441 drmach_mem_cpu_affinity(drmachid_t id)
2442 {
2443 	return (CPU_CURRENT);
2444 }
2445 
2446 static sbd_error_t *
2447 drmach_mem_release(drmachid_t id)
2448 {
2449 	if (!DRMACH_IS_MEM_ID(id))
2450 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2451 	return (NULL);
2452 }
2453 
2454 static sbd_error_t *
2455 drmach_mem_status(drmachid_t id, drmach_status_t *stat)
2456 {
2457 	drmach_mem_t *dp;
2458 	uint64_t	 pa, slice_size;
2459 	struct memlist	*ml;
2460 
2461 	ASSERT(DRMACH_IS_MEM_ID(id));
2462 	dp = id;
2463 
2464 	/* get starting physical address of target memory */
2465 	pa = dp->base_pa;
2466 
2467 	/* round down to slice boundary */
2468 	slice_size = dp->slice_size;
2469 	pa &= ~(slice_size - 1);
2470 
2471 	/* stop at first span that is in slice */
2472 	memlist_read_lock();
2473 	for (ml = phys_install; ml; ml = ml->next)
2474 		if (ml->address >= pa && ml->address < pa + slice_size)
2475 			break;
2476 	memlist_read_unlock();
2477 
2478 	stat->assigned = dp->dev.bp->assigned;
2479 	stat->powered = dp->dev.bp->powered;
2480 	stat->configured = (ml != NULL);
2481 	stat->busy = dp->dev.busy;
2482 	strncpy(stat->type, dp->dev.type, sizeof (stat->type));
2483 	stat->info[0] = '\0';
2484 
2485 	return (NULL);
2486 }
2487 
2488 
2489 sbd_error_t *
2490 drmach_board_deprobe(drmachid_t id)
2491 {
2492 	drmach_board_t	*bp;
2493 
2494 	if (!DRMACH_IS_BOARD_ID(id))
2495 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2496 
2497 	bp = id;
2498 
2499 	cmn_err(CE_CONT, "DR: detach board %d\n", bp->bnum);
2500 
2501 	if (bp->tree) {
2502 		drmach_node_dispose(bp->tree);
2503 		bp->tree = NULL;
2504 	}
2505 	if (bp->devices) {
2506 		drmach_array_dispose(bp->devices, drmach_device_dispose);
2507 		bp->devices = NULL;
2508 	}
2509 
2510 	bp->boot_board = 0;
2511 
2512 	return (NULL);
2513 }
2514 
2515 /*ARGSUSED*/
2516 static sbd_error_t *
2517 drmach_pt_ikprobe(drmachid_t id, drmach_opts_t *opts)
2518 {
2519 	drmach_board_t		*bp = (drmach_board_t *)id;
2520 	sbd_error_t		*err = NULL;
2521 	int	rv;
2522 
2523 	if (!DRMACH_IS_BOARD_ID(id))
2524 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2525 
2526 	DRMACH_PR("calling opl_probe_board for bnum=%d\n", bp->bnum);
2527 	rv = opl_probe_sb(bp->bnum);
2528 	if (rv != 0) {
2529 		err = drerr_new(0, EOPL_PROBE, bp->cm.name);
2530 		return (err);
2531 	}
2532 	return (err);
2533 }
2534 
2535 /*ARGSUSED*/
2536 static sbd_error_t *
2537 drmach_pt_ikdeprobe(drmachid_t id, drmach_opts_t *opts)
2538 {
2539 	drmach_board_t	*bp;
2540 	sbd_error_t	*err = NULL;
2541 	int	rv;
2542 
2543 	if (!DRMACH_IS_BOARD_ID(id))
2544 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2545 	bp = (drmach_board_t *)id;
2546 
2547 	cmn_err(CE_CONT, "DR: in-kernel unprobe board %d\n", bp->bnum);
2548 
2549 	rv = opl_unprobe_sb(bp->bnum);
2550 	if (rv != 0) {
2551 		err = drerr_new(0, EOPL_DEPROBE, bp->cm.name);
2552 	}
2553 
2554 	return (err);
2555 }
2556 
2557 
2558 /*ARGSUSED*/
2559 sbd_error_t *
2560 drmach_pt_readmem(drmachid_t id, drmach_opts_t *opts)
2561 {
2562 	struct memlist	*ml;
2563 	uint64_t	src_pa;
2564 	uint64_t	dst_pa;
2565 	uint64_t	dst;
2566 
2567 	dst_pa = va_to_pa(&dst);
2568 
2569 	memlist_read_lock();
2570 	for (ml = phys_install; ml; ml = ml->next) {
2571 		uint64_t	nbytes;
2572 
2573 		src_pa = ml->address;
2574 		nbytes = ml->size;
2575 
2576 		while (nbytes != 0ull) {
2577 
2578 			/* copy 32 bytes at arc_pa to dst_pa */
2579 			bcopy32_il(src_pa, dst_pa);
2580 
2581 			/* increment by 32 bytes */
2582 			src_pa += (4 * sizeof (uint64_t));
2583 
2584 			/* decrement by 32 bytes */
2585 			nbytes -= (4 * sizeof (uint64_t));
2586 		}
2587 	}
2588 	memlist_read_unlock();
2589 
2590 	return (NULL);
2591 }
2592 
2593 static struct {
2594 	const char	*name;
2595 	sbd_error_t	*(*handler)(drmachid_t id, drmach_opts_t *opts);
2596 } drmach_pt_arr[] = {
2597 	{ "readmem",		drmach_pt_readmem		},
2598 	{ "ikprobe",	drmach_pt_ikprobe	},
2599 	{ "ikdeprobe",	drmach_pt_ikdeprobe	},
2600 
2601 	/* the following line must always be last */
2602 	{ NULL,			NULL				}
2603 };
2604 
2605 /*ARGSUSED*/
2606 sbd_error_t *
2607 drmach_passthru(drmachid_t id, drmach_opts_t *opts)
2608 {
2609 	int		i;
2610 	sbd_error_t	*err;
2611 
2612 	i = 0;
2613 	while (drmach_pt_arr[i].name != NULL) {
2614 		int len = strlen(drmach_pt_arr[i].name);
2615 
2616 		if (strncmp(drmach_pt_arr[i].name, opts->copts, len) == 0)
2617 			break;
2618 
2619 		i += 1;
2620 	}
2621 
2622 	if (drmach_pt_arr[i].name == NULL)
2623 		err = drerr_new(0, EOPL_UNKPTCMD, opts->copts);
2624 	else
2625 		err = (*drmach_pt_arr[i].handler)(id, opts);
2626 
2627 	return (err);
2628 }
2629 
2630 sbd_error_t *
2631 drmach_release(drmachid_t id)
2632 {
2633 	drmach_common_t *cp;
2634 
2635 	if (!DRMACH_IS_DEVICE_ID(id))
2636 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2637 	cp = id;
2638 
2639 	return (cp->release(id));
2640 }
2641 
2642 sbd_error_t *
2643 drmach_status(drmachid_t id, drmach_status_t *stat)
2644 {
2645 	drmach_common_t *cp;
2646 	sbd_error_t	*err;
2647 
2648 	rw_enter(&drmach_boards_rwlock, RW_READER);
2649 
2650 	if (!DRMACH_IS_ID(id)) {
2651 		rw_exit(&drmach_boards_rwlock);
2652 		return (drerr_new(0, EOPL_NOTID, NULL));
2653 	}
2654 	cp = (drmach_common_t *)id;
2655 	err = cp->status(id, stat);
2656 
2657 	rw_exit(&drmach_boards_rwlock);
2658 
2659 	return (err);
2660 }
2661 
2662 static sbd_error_t *
2663 drmach_i_status(drmachid_t id, drmach_status_t *stat)
2664 {
2665 	drmach_common_t *cp;
2666 
2667 	if (!DRMACH_IS_ID(id))
2668 		return (drerr_new(0, EOPL_NOTID, NULL));
2669 	cp = id;
2670 
2671 	return (cp->status(id, stat));
2672 }
2673 
2674 /*ARGSUSED*/
2675 sbd_error_t *
2676 drmach_unconfigure(drmachid_t id, int flags)
2677 {
2678 	drmach_device_t *dp;
2679 	dev_info_t	*rdip, *fdip = NULL;
2680 	char name[OBP_MAXDRVNAME];
2681 	int rv;
2682 
2683 	if (DRMACH_IS_CPU_ID(id))
2684 		return (NULL);
2685 
2686 	if (!DRMACH_IS_DEVICE_ID(id))
2687 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2688 
2689 	dp = id;
2690 
2691 	rdip = dp->node->n_getdip(dp->node);
2692 
2693 	ASSERT(rdip);
2694 
2695 	rv = dp->node->n_getprop(dp->node, "name", name, OBP_MAXDRVNAME);
2696 
2697 	if (rv)
2698 		return (NULL);
2699 
2700 	/*
2701 	 * Note: FORCE flag is no longer necessary under devfs
2702 	 */
2703 
2704 	ASSERT(e_ddi_branch_held(rdip));
2705 	if (e_ddi_branch_unconfigure(rdip, &fdip, 0)) {
2706 		sbd_error_t	*err;
2707 		char		*path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
2708 
2709 		/*
2710 		 * If non-NULL, fdip is returned held and must be released.
2711 		 */
2712 		if (fdip != NULL) {
2713 			(void) ddi_pathname(fdip, path);
2714 			ndi_rele_devi(fdip);
2715 		} else {
2716 			(void) ddi_pathname(rdip, path);
2717 		}
2718 
2719 		err = drerr_new(1, EOPL_DRVFAIL, path);
2720 
2721 		kmem_free(path, MAXPATHLEN);
2722 
2723 		return (err);
2724 	}
2725 
2726 	return (NULL);
2727 }
2728 
2729 
2730 int
2731 drmach_cpu_poweron(struct cpu *cp)
2732 {
2733 	int bnum, cpuid, onb_core_num, strand_id;
2734 	drmach_board_t *bp;
2735 
2736 	DRMACH_PR("drmach_cpu_poweron: starting cpuid %d\n", cp->cpu_id);
2737 
2738 	cpuid = cp->cpu_id;
2739 	bnum = LSB_ID(cpuid);
2740 	onb_core_num = ON_BOARD_CORE_NUM(cpuid);
2741 	strand_id = STRAND_ID(cpuid);
2742 	bp = drmach_get_board_by_bnum(bnum);
2743 
2744 	ASSERT(bp);
2745 	if (bp->cores[onb_core_num].core_hotadded == 0) {
2746 		if (drmach_add_remove_cpu(bnum, onb_core_num,
2747 			HOTADD_CPU) != 0) {
2748 			cmn_err(CE_WARN, "Failed to add CMP %d on board %d\n",
2749 				onb_core_num, bnum);
2750 			return (EIO);
2751 		}
2752 	}
2753 
2754 	ASSERT(MUTEX_HELD(&cpu_lock));
2755 
2756 	if (drmach_cpu_start(cp) != 0) {
2757 		if (bp->cores[onb_core_num].core_started == 0) {
2758 			/*
2759 			 * we must undo the hotadd or no one will do that
2760 			 * If this fails, we will do this again in
2761 			 * drmach_board_disconnect.
2762 			 */
2763 			if (drmach_add_remove_cpu(bnum, onb_core_num,
2764 				HOTREMOVE_CPU) != 0) {
2765 				cmn_err(CE_WARN, "Failed to remove CMP %d "
2766 					"on board %d\n",
2767 					onb_core_num, bnum);
2768 			}
2769 		}
2770 		return (EBUSY);
2771 	} else {
2772 		bp->cores[onb_core_num].core_started |= (1 << strand_id);
2773 		return (0);
2774 	}
2775 }
2776 
2777 int
2778 drmach_cpu_poweroff(struct cpu *cp)
2779 {
2780 	int 		rv = 0;
2781 	processorid_t	cpuid = cp->cpu_id;
2782 
2783 	DRMACH_PR("drmach_cpu_poweroff: stopping cpuid %d\n", cp->cpu_id);
2784 
2785 	ASSERT(MUTEX_HELD(&cpu_lock));
2786 
2787 	/*
2788 	 * Capture all CPUs (except for detaching proc) to prevent
2789 	 * crosscalls to the detaching proc until it has cleared its
2790 	 * bit in cpu_ready_set.
2791 	 *
2792 	 * The CPU's remain paused and the prom_mutex is known to be free.
2793 	 * This prevents the x-trap victim from blocking when doing prom
2794 	 * IEEE-1275 calls at a high PIL level.
2795 	 */
2796 
2797 	promsafe_pause_cpus();
2798 
2799 	/*
2800 	 * Quiesce interrupts on the target CPU. We do this by setting
2801 	 * the CPU 'not ready'- (i.e. removing the CPU from cpu_ready_set) to
2802 	 * prevent it from receiving cross calls and cross traps.
2803 	 * This prevents the processor from receiving any new soft interrupts.
2804 	 */
2805 	mp_cpu_quiesce(cp);
2806 
2807 	rv = prom_stopcpu_bycpuid(cpuid);
2808 	if (rv == 0)
2809 		cp->cpu_flags = CPU_OFFLINE | CPU_QUIESCED | CPU_POWEROFF;
2810 
2811 	start_cpus();
2812 
2813 	if (rv == 0) {
2814 		int bnum, onb_core_num, strand_id;
2815 		drmach_board_t *bp;
2816 
2817 		CPU_SIGNATURE(OS_SIG, SIGST_DETACHED, SIGSUBST_NULL, cpuid);
2818 
2819 		bnum = LSB_ID(cpuid);
2820 		onb_core_num = ON_BOARD_CORE_NUM(cpuid);
2821 		strand_id = STRAND_ID(cpuid);
2822 		bp = drmach_get_board_by_bnum(bnum);
2823 		ASSERT(bp);
2824 
2825 		bp->cores[onb_core_num].core_started &= ~(1 << strand_id);
2826 		if (bp->cores[onb_core_num].core_started == 0) {
2827 			if (drmach_add_remove_cpu(bnum, onb_core_num,
2828 				HOTREMOVE_CPU) != 0) {
2829 				cmn_err(CE_WARN,
2830 					"Failed to remove CMP %d LSB %d\n",
2831 					onb_core_num, bnum);
2832 				return (EIO);
2833 			}
2834 		}
2835 	}
2836 
2837 	return (rv);
2838 }
2839 
2840 /*ARGSUSED*/
2841 int
2842 drmach_verify_sr(dev_info_t *dip, int sflag)
2843 {
2844 	return (0);
2845 }
2846 
2847 void
2848 drmach_suspend_last(void)
2849 {
2850 }
2851 
2852 void
2853 drmach_resume_first(void)
2854 {
2855 }
2856 
2857 /*
2858  * Log a DR sysevent.
2859  * Return value: 0 success, non-zero failure.
2860  */
2861 int
2862 drmach_log_sysevent(int board, char *hint, int flag, int verbose)
2863 {
2864 	sysevent_t			*ev;
2865 	sysevent_id_t			eid;
2866 	int				rv, km_flag;
2867 	sysevent_value_t		evnt_val;
2868 	sysevent_attr_list_t		*evnt_attr_list = NULL;
2869 	char				attach_pnt[MAXNAMELEN];
2870 
2871 	km_flag = (flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
2872 	attach_pnt[0] = '\0';
2873 	if (drmach_board_name(board, attach_pnt, MAXNAMELEN)) {
2874 		rv = -1;
2875 		goto logexit;
2876 	}
2877 	if (verbose)
2878 		DRMACH_PR("drmach_log_sysevent: %s %s, flag: %d, verbose: %d\n",
2879 			attach_pnt, hint, flag, verbose);
2880 
2881 	if ((ev = sysevent_alloc(EC_DR, ESC_DR_AP_STATE_CHANGE,
2882 		SUNW_KERN_PUB"dr", km_flag)) == NULL) {
2883 		rv = -2;
2884 		goto logexit;
2885 	}
2886 	evnt_val.value_type = SE_DATA_TYPE_STRING;
2887 	evnt_val.value.sv_string = attach_pnt;
2888 	if ((rv = sysevent_add_attr(&evnt_attr_list, DR_AP_ID,
2889 		&evnt_val, km_flag)) != 0)
2890 		goto logexit;
2891 
2892 	evnt_val.value_type = SE_DATA_TYPE_STRING;
2893 	evnt_val.value.sv_string = hint;
2894 	if ((rv = sysevent_add_attr(&evnt_attr_list, DR_HINT,
2895 		&evnt_val, km_flag)) != 0) {
2896 		sysevent_free_attr(evnt_attr_list);
2897 		goto logexit;
2898 	}
2899 
2900 	(void) sysevent_attach_attributes(ev, evnt_attr_list);
2901 
2902 	/*
2903 	 * Log the event but do not sleep waiting for its
2904 	 * delivery. This provides insulation from syseventd.
2905 	 */
2906 	rv = log_sysevent(ev, SE_NOSLEEP, &eid);
2907 
2908 logexit:
2909 	if (ev)
2910 		sysevent_free(ev);
2911 	if ((rv != 0) && verbose)
2912 		cmn_err(CE_WARN,
2913 			"drmach_log_sysevent failed (rv %d) for %s  %s\n",
2914 			rv, attach_pnt, hint);
2915 
2916 	return (rv);
2917 }
2918 
2919 #define	OPL_DR_STATUS_PROP "dr-status"
2920 
2921 static int
2922 opl_check_dr_status()
2923 {
2924 	pnode_t	node;
2925 	int	rtn, len;
2926 	char	*str;
2927 
2928 	node = prom_rootnode();
2929 	if (node == OBP_BADNODE) {
2930 		return (1);
2931 	}
2932 
2933 	len = prom_getproplen(node, OPL_DR_STATUS_PROP);
2934 	if (len == -1) {
2935 		/*
2936 		 * dr-status doesn't exist when DR is activated and
2937 		 * any warning messages aren't needed.
2938 		 */
2939 		return (1);
2940 	}
2941 
2942 	str = (char *)kmem_zalloc(len+1, KM_SLEEP);
2943 	rtn = prom_getprop(node, OPL_DR_STATUS_PROP, str);
2944 	kmem_free(str, len + 1);
2945 	if (rtn == -1) {
2946 		return (1);
2947 	} else {
2948 		return (0);
2949 	}
2950 }
2951 
2952 /* we are allocating memlist from TLB locked pages to avoid tlbmisses */
2953 
2954 static struct memlist *
2955 drmach_memlist_add_span(drmach_copy_rename_program_t *p,
2956 	struct memlist *mlist, uint64_t base, uint64_t len)
2957 {
2958 	struct memlist	*ml, *tl, *nl;
2959 
2960 	if (len == 0ull)
2961 		return (NULL);
2962 
2963 	if (mlist == NULL) {
2964 		mlist = p->free_mlist;
2965 		if (mlist == NULL)
2966 			return (NULL);
2967 		p->free_mlist = mlist->next;
2968 		mlist->address = base;
2969 		mlist->size = len;
2970 		mlist->next = mlist->prev = NULL;
2971 
2972 		return (mlist);
2973 	}
2974 
2975 	for (tl = ml = mlist; ml; tl = ml, ml = ml->next) {
2976 		if (base < ml->address) {
2977 			if ((base + len) < ml->address) {
2978 				nl = p->free_mlist;
2979 				if (nl == NULL)
2980 					return (NULL);
2981 				p->free_mlist = nl->next;
2982 				nl->address = base;
2983 				nl->size = len;
2984 				nl->next = ml;
2985 				if ((nl->prev = ml->prev) != NULL)
2986 					nl->prev->next = nl;
2987 				ml->prev = nl;
2988 				if (mlist == ml)
2989 					mlist = nl;
2990 			} else {
2991 				ml->size = MAX((base + len),
2992 					(ml->address + ml->size)) -
2993 					base;
2994 				ml->address = base;
2995 			}
2996 			break;
2997 
2998 		} else if (base <= (ml->address + ml->size)) {
2999 			ml->size = MAX((base + len),
3000 				(ml->address + ml->size)) -
3001 				MIN(ml->address, base);
3002 			ml->address = MIN(ml->address, base);
3003 			break;
3004 		}
3005 	}
3006 	if (ml == NULL) {
3007 		nl = p->free_mlist;
3008 		if (nl == NULL)
3009 			return (NULL);
3010 		p->free_mlist = nl->next;
3011 		nl->address = base;
3012 		nl->size = len;
3013 		nl->next = NULL;
3014 		nl->prev = tl;
3015 		tl->next = nl;
3016 	}
3017 
3018 	return (mlist);
3019 }
3020 
3021 /*
3022  * The routine performs the necessary memory COPY and MC adr SWITCH.
3023  * Both operations MUST be at the same "level" so that the stack is
3024  * maintained correctly between the copy and switch.  The switch
3025  * portion implements a caching mechanism to guarantee the code text
3026  * is cached prior to execution.  This is to guard against possible
3027  * memory access while the MC adr's are being modified.
3028  *
3029  * IMPORTANT: The _drmach_copy_rename_end() function must immediately
3030  * follow drmach_copy_rename_prog__relocatable() so that the correct
3031  * "length" of the drmach_copy_rename_prog__relocatable can be
3032  * calculated.  This routine MUST be a LEAF function, i.e. it can
3033  * make NO function calls, primarily for two reasons:
3034  *
3035  *	1. We must keep the stack consistent across the "switch".
3036  *	2. Function calls are compiled to relative offsets, and
3037  *	   we execute this function we'll be executing it from
3038  *	   a copied version in a different area of memory, thus
3039  *	   the relative offsets will be bogus.
3040  *
3041  * Moreover, it must have the "__relocatable" suffix to inform DTrace
3042  * providers (and anything else, for that matter) that this
3043  * function's text is manually relocated elsewhere before it is
3044  * executed.  That is, it cannot be safely instrumented with any
3045  * methodology that is PC-relative.
3046  */
3047 
3048 /*
3049  * We multiply this to system_clock_frequency so we
3050  * are setting a delay of fmem_timeout second for
3051  * the rename command.  The spec says 15 second is
3052  * enough but the Fujitsu HW team suggested 17 sec.
3053  */
3054 static int	fmem_timeout = 17;
3055 static int	min_copy_size_per_sec = 20 * 1024 * 1024;
3056 int drmach_disable_mcopy = 0;
3057 
3058 /*
3059  * The following delay loop executes sleep instruction to yield the
3060  * CPU to other strands.  If this is not done, some strand will tie
3061  * up the CPU in busy loops while the other strand cannot do useful
3062  * work.  The copy procedure will take a much longer time without this.
3063  */
3064 #define	DR_DELAY_IL(ms, freq)					\
3065 	{							\
3066 		uint64_t start;					\
3067 		uint64_t nstick;				\
3068 		volatile uint64_t now;				\
3069 		nstick = ((uint64_t)ms * freq)/1000;		\
3070 		start = drmach_get_stick_il();			\
3071 		now = start;					\
3072 		while ((now - start) <= nstick) {		\
3073 			drmach_sleep_il();			\
3074 			now = drmach_get_stick_il();		\
3075 		}						\
3076 	}
3077 
3078 static int
3079 drmach_copy_rename_prog__relocatable(drmach_copy_rename_program_t *prog,
3080 	int cpuid)
3081 {
3082 	struct memlist		*ml;
3083 	register int		rtn;
3084 	int			i;
3085 	register uint64_t	curr, limit;
3086 	extern uint64_t		drmach_get_stick_il();
3087 	extern void		membar_sync_il();
3088 	extern void		flush_instr_mem_il(void*);
3089 	extern void		flush_windows_il(void);
3090 	uint64_t		copy_start;
3091 
3092 	/*
3093 	 * flush_windows is moved here to make sure all
3094 	 * registers used in the callers are flushed to
3095 	 * memory before the copy.
3096 	 *
3097 	 * If flush_windows() is called too early in the
3098 	 * calling function, the compiler might put some
3099 	 * data in the local registers after flush_windows().
3100 	 * After FMA, if there is any fill trap, the registers
3101 	 * will contain stale data.
3102 	 */
3103 
3104 	flush_windows_il();
3105 
3106 	prog->critical->stat[cpuid] = FMEM_LOOP_COPY_READY;
3107 	membar_sync_il();
3108 
3109 	if (prog->data->cpuid == cpuid) {
3110 		limit = drmach_get_stick_il();
3111 		limit += prog->critical->delay;
3112 		for (i = 0; i < NCPU; i++) {
3113 			if (CPU_IN_SET(prog->data->cpu_slave_set, i)) {
3114 			/* wait for all CPU's to be ready */
3115 			    for (;;) {
3116 				if (prog->critical->stat[i] ==
3117 					FMEM_LOOP_COPY_READY) {
3118 					break;
3119 				}
3120 				DR_DELAY_IL(1, prog->data->stick_freq);
3121 			    }
3122 			    curr = drmach_get_stick_il();
3123 			    if (curr > limit) {
3124 				prog->data->fmem_status.error =
3125 					FMEM_XC_TIMEOUT;
3126 				return (FMEM_XC_TIMEOUT);
3127 			    }
3128 			}
3129 		}
3130 		prog->data->fmem_status.stat = FMEM_LOOP_COPY_READY;
3131 		membar_sync_il();
3132 		copy_start = drmach_get_stick_il();
3133 	} else {
3134 		for (;;) {
3135 			if (prog->data->fmem_status.stat ==
3136 				FMEM_LOOP_COPY_READY) {
3137 				break;
3138 			}
3139 			if (prog->data->fmem_status.error) {
3140 				prog->data->error[cpuid] = FMEM_TERMINATE;
3141 				return (FMEM_TERMINATE);
3142 			}
3143 			DR_DELAY_IL(1, prog->data->stick_freq);
3144 		}
3145 	}
3146 
3147 	/*
3148 	 * DO COPY.
3149 	 */
3150 	if (CPU_IN_SET(prog->data->cpu_copy_set, cpuid)) {
3151 	    for (ml = prog->data->cpu_ml[cpuid]; ml; ml = ml->next) {
3152 		uint64_t	s_pa, t_pa;
3153 		uint64_t	nbytes;
3154 
3155 		s_pa = prog->data->s_copybasepa + ml->address;
3156 		t_pa = prog->data->t_copybasepa + ml->address;
3157 		nbytes = ml->size;
3158 
3159 		while (nbytes != 0ull) {
3160 			/* If the master has detected error, we just bail out */
3161 			if (prog->data->fmem_status.error) {
3162 				prog->data->error[cpuid] = FMEM_TERMINATE;
3163 				return (FMEM_TERMINATE);
3164 			}
3165 			/*
3166 			 * This copy does NOT use an ASI
3167 			 * that avoids the Ecache, therefore
3168 			 * the dst_pa addresses may remain
3169 			 * in our Ecache after the dst_pa
3170 			 * has been removed from the system.
3171 			 * A subsequent write-back to memory
3172 			 * will cause an ARB-stop because the
3173 			 * physical address no longer exists
3174 			 * in the system. Therefore we must
3175 			 * flush out local Ecache after we
3176 			 * finish the copy.
3177 			 */
3178 
3179 			/* copy 32 bytes at src_pa to dst_pa */
3180 			bcopy32_il(s_pa, t_pa);
3181 
3182 			/* increment the counter to signal that we are alive */
3183 			prog->stat->nbytes[cpuid] += 32;
3184 
3185 			/* increment by 32 bytes */
3186 			s_pa += (4 * sizeof (uint64_t));
3187 			t_pa += (4 * sizeof (uint64_t));
3188 
3189 			/* decrement by 32 bytes */
3190 			nbytes -= (4 * sizeof (uint64_t));
3191 		}
3192 	    }
3193 	    prog->critical->stat[cpuid] = FMEM_LOOP_COPY_DONE;
3194 	    membar_sync_il();
3195 	}
3196 
3197 	/*
3198 	 * Since bcopy32_il() does NOT use an ASI to bypass
3199 	 * the Ecache, we need to flush our Ecache after
3200 	 * the copy is complete.
3201 	 */
3202 	flush_cache_il();
3203 
3204 	/*
3205 	 * drmach_fmem_exec_script()
3206 	 */
3207 	if (prog->data->cpuid == cpuid) {
3208 		uint64_t	last, now;
3209 
3210 		limit = copy_start + prog->data->copy_delay;
3211 		for (i = 0; i < NCPU; i++) {
3212 			if (CPU_IN_SET(prog->data->cpu_slave_set, i)) {
3213 			    for (;;) {
3214 				/* we get FMEM_LOOP_FMEM_READY in normal case */
3215 				if (prog->critical->stat[i] ==
3216 					FMEM_LOOP_FMEM_READY) {
3217 					break;
3218 				}
3219 				/* got error traps */
3220 				if (prog->critical->stat[i] ==
3221 					FMEM_COPY_ERROR) {
3222 					prog->data->fmem_status.error =
3223 						FMEM_COPY_ERROR;
3224 					return (FMEM_COPY_ERROR);
3225 				}
3226 				/* if we have not reached limit, wait more */
3227 				curr = drmach_get_stick_il();
3228 				if (curr <= limit)
3229 					continue;
3230 
3231 				prog->data->slowest_cpuid = i;
3232 				prog->data->copy_wait_time =
3233 					curr - copy_start;
3234 
3235 				/* now check if slave is alive */
3236 				last = prog->stat->nbytes[i];
3237 
3238 				DR_DELAY_IL(1, prog->data->stick_freq);
3239 
3240 				now = prog->stat->nbytes[i];
3241 				if (now <= last) {
3242 					/* no progress, perhaps just finished */
3243 					DR_DELAY_IL(1, prog->data->stick_freq);
3244 					if (prog->critical->stat[i] ==
3245 						FMEM_LOOP_FMEM_READY)
3246 						break;
3247 					/* copy error */
3248 					if (prog->critical->stat[i] ==
3249 						FMEM_COPY_ERROR) {
3250 						prog->data->fmem_status.error =
3251 							FMEM_COPY_ERROR;
3252 						return (FMEM_COPY_ERROR);
3253 					}
3254 					prog->data->fmem_status.error =
3255 					    FMEM_COPY_TIMEOUT;
3256 					return (FMEM_COPY_TIMEOUT);
3257 				}
3258 			    }
3259 			}
3260 		}
3261 		prog->critical->stat[cpuid] = FMEM_LOOP_FMEM_READY;
3262 		prog->data->fmem_status.stat  = FMEM_LOOP_FMEM_READY;
3263 
3264 		membar_sync_il();
3265 		flush_instr_mem_il((void*) (prog->critical));
3266 		/*
3267 		 * drmach_fmem_exec_script()
3268 		 */
3269 		rtn = prog->critical->fmem((void *)prog->critical, PAGESIZE);
3270 		return (rtn);
3271 	} else {
3272 		flush_instr_mem_il((void*) (prog->critical));
3273 		/*
3274 		 * drmach_fmem_loop_script()
3275 		 */
3276 		rtn = prog->critical->loop((void *)(prog->critical),
3277 			PAGESIZE, (void *)&(prog->critical->stat[cpuid]));
3278 		prog->data->error[cpuid] = rtn;
3279 		/* slave thread does not care the rv */
3280 		return (0);
3281 	}
3282 }
3283 
3284 static void
3285 drmach_copy_rename_end(void)
3286 {
3287 	/*
3288 	 * IMPORTANT:	This function's location MUST be located immediately
3289 	 *		following drmach_copy_rename_prog__relocatable to
3290 	 *		accurately estimate its size.  Note that this assumes
3291 	 *		the compiler keeps these functions in the order in
3292 	 *		which they appear :-o
3293 	 */
3294 }
3295 
3296 
3297 static void
3298 drmach_setup_memlist(drmach_copy_rename_program_t *p)
3299 {
3300 	struct memlist *ml;
3301 	caddr_t buf;
3302 	int nbytes, s;
3303 
3304 	nbytes = PAGESIZE;
3305 	s = roundup(sizeof (struct memlist), sizeof (void *));
3306 	p->free_mlist = NULL;
3307 	buf = p->memlist_buffer;
3308 	while (nbytes >= sizeof (struct memlist)) {
3309 		ml = (struct memlist *)buf;
3310 		ml->next = p->free_mlist;
3311 		p->free_mlist = ml;
3312 		buf += s;
3313 		nbytes -= s;
3314 	}
3315 }
3316 
3317 static void
3318 drmach_lock_critical(caddr_t va, caddr_t new_va)
3319 {
3320 	tte_t tte;
3321 	int i;
3322 
3323 	kpreempt_disable();
3324 
3325 	for (i = 0; i < DRMACH_FMEM_LOCKED_PAGES; i++) {
3326 		vtag_flushpage(new_va, (uint64_t)ksfmmup);
3327 		sfmmu_memtte(&tte, va_to_pfn(va),
3328 			PROC_DATA|HAT_NOSYNC, TTE8K);
3329 		tte.tte_intlo |= TTE_LCK_INT;
3330 		sfmmu_dtlb_ld_kva(new_va, &tte);
3331 		sfmmu_itlb_ld_kva(new_va, &tte);
3332 		va += PAGESIZE;
3333 		new_va += PAGESIZE;
3334 	}
3335 }
3336 
3337 static void
3338 drmach_unlock_critical(caddr_t va)
3339 {
3340 	int i;
3341 
3342 	for (i = 0; i < DRMACH_FMEM_LOCKED_PAGES; i++) {
3343 		vtag_flushpage(va, (uint64_t)ksfmmup);
3344 		va += PAGESIZE;
3345 	}
3346 
3347 	kpreempt_enable();
3348 }
3349 
3350 sbd_error_t *
3351 drmach_copy_rename_init(drmachid_t t_id, drmachid_t s_id,
3352 	struct memlist *c_ml, drmachid_t *pgm_id)
3353 {
3354 	drmach_mem_t	*s_mem;
3355 	drmach_mem_t	*t_mem;
3356 	struct memlist	*x_ml;
3357 	uint64_t	s_copybasepa, t_copybasepa;
3358 	uint_t		len;
3359 	caddr_t		bp, wp;
3360 	int			s_bd, t_bd, cpuid, active_cpus, i;
3361 	uint64_t		c_addr;
3362 	size_t			c_size, copy_sz, sz;
3363 	extern void		drmach_fmem_loop_script();
3364 	extern void		drmach_fmem_loop_script_rtn();
3365 	extern int		drmach_fmem_exec_script();
3366 	extern void		drmach_fmem_exec_script_end();
3367 	sbd_error_t	*err;
3368 	drmach_copy_rename_program_t *prog = NULL;
3369 	drmach_copy_rename_program_t *prog_kmem = NULL;
3370 	void		(*mc_suspend)(void);
3371 	void		(*mc_resume)(void);
3372 	int		(*scf_fmem_start)(int, int);
3373 	int		(*scf_fmem_end)(void);
3374 	int		(*scf_fmem_cancel)(void);
3375 	uint64_t	(*scf_get_base_addr)(void);
3376 
3377 	if (!DRMACH_IS_MEM_ID(s_id))
3378 		return (drerr_new(0, EOPL_INAPPROP, NULL));
3379 	if (!DRMACH_IS_MEM_ID(t_id))
3380 		return (drerr_new(0, EOPL_INAPPROP, NULL));
3381 
3382 	for (i = 0; i < NCPU; i++) {
3383 		int lsb_id, onb_core_num, strand_id;
3384 		drmach_board_t *bp;
3385 
3386 		/*
3387 		 * this kind of CPU will spin in cache
3388 		 */
3389 		if (CPU_IN_SET(cpu_ready_set, i))
3390 			continue;
3391 
3392 		/*
3393 		 * Now check for any inactive CPU's that
3394 		 * have been hotadded.  This can only occur in
3395 		 * error condition in drmach_cpu_poweron().
3396 		 */
3397 		lsb_id = LSB_ID(i);
3398 		onb_core_num = ON_BOARD_CORE_NUM(i);
3399 		strand_id = STRAND_ID(i);
3400 		bp = drmach_get_board_by_bnum(lsb_id);
3401 		if (bp == NULL)
3402 			continue;
3403 		if (bp->cores[onb_core_num].core_hotadded &
3404 		    (1 << strand_id)) {
3405 		    if (!(bp->cores[onb_core_num].core_started &
3406 			(1 << strand_id))) {
3407 			return (drerr_new(0, EOPL_CPU_STATE, NULL));
3408 		    }
3409 		}
3410 	}
3411 
3412 	mc_suspend = (void (*)(void))
3413 	    modgetsymvalue("opl_mc_suspend", 0);
3414 	mc_resume = (void (*)(void))
3415 	    modgetsymvalue("opl_mc_resume", 0);
3416 
3417 	if (mc_suspend == NULL || mc_resume == NULL) {
3418 		return (drerr_new(0, EOPL_MC_OPL, NULL));
3419 	}
3420 
3421 	scf_fmem_start = (int (*)(int, int))
3422 	    modgetsymvalue("scf_fmem_start", 0);
3423 	if (scf_fmem_start == NULL) {
3424 		return (drerr_new(0, EOPL_SCF_FMEM, NULL));
3425 	}
3426 	scf_fmem_end = (int (*)(void))
3427 	    modgetsymvalue("scf_fmem_end", 0);
3428 	if (scf_fmem_end == NULL) {
3429 		return (drerr_new(0, EOPL_SCF_FMEM, NULL));
3430 	}
3431 	scf_fmem_cancel = (int (*)(void))
3432 	    modgetsymvalue("scf_fmem_cancel", 0);
3433 	if (scf_fmem_cancel == NULL) {
3434 		return (drerr_new(0, EOPL_SCF_FMEM, NULL));
3435 	}
3436 	scf_get_base_addr = (uint64_t (*)(void))
3437 	    modgetsymvalue("scf_get_base_addr", 0);
3438 	if (scf_get_base_addr == NULL) {
3439 		return (drerr_new(0, EOPL_SCF_FMEM, NULL));
3440 	}
3441 	s_mem = s_id;
3442 	t_mem = t_id;
3443 
3444 	s_bd = s_mem->dev.bp->bnum;
3445 	t_bd = t_mem->dev.bp->bnum;
3446 
3447 	/* calculate source and target base pa */
3448 
3449 	s_copybasepa = s_mem->slice_base;
3450 	t_copybasepa = t_mem->slice_base;
3451 
3452 	/* adjust copy memlist addresses to be relative to copy base pa */
3453 	x_ml = c_ml;
3454 	while (x_ml != NULL) {
3455 		x_ml->address -= s_copybasepa;
3456 		x_ml = x_ml->next;
3457 	}
3458 
3459 	/*
3460 	 * bp will be page aligned, since we're calling
3461 	 * kmem_zalloc() with an exact multiple of PAGESIZE.
3462 	 */
3463 
3464 	prog_kmem = (drmach_copy_rename_program_t *)kmem_zalloc(
3465 		DRMACH_FMEM_LOCKED_PAGES * PAGESIZE, KM_SLEEP);
3466 
3467 	prog_kmem->prog = prog_kmem;
3468 
3469 	/*
3470 	 * To avoid MTLB hit, we allocate a new VM space and remap
3471 	 * the kmem_alloc buffer to that address.  This solves
3472 	 * 2 problems we found:
3473 	 * - the kmem_alloc buffer can be just a chunk inside
3474 	 *   a much larger, e.g. 4MB buffer and MTLB will occur
3475 	 *   if there are both a 4MB and a 8K TLB mapping to
3476 	 *   the same VA range.
3477 	 * - the kmem mapping got dropped into the TLB by other
3478 	 *   strands, unintentionally.
3479 	 * Note that the pointers like data, critical, memlist_buffer,
3480 	 * and stat inside the copy rename structure are mapped to this
3481 	 * alternate VM space so we must make sure we lock the TLB mapping
3482 	 * whenever we access data pointed to by these pointers.
3483 	 */
3484 
3485 	prog = prog_kmem->locked_prog = vmem_alloc(heap_arena,
3486 		DRMACH_FMEM_LOCKED_PAGES * PAGESIZE, VM_SLEEP);
3487 	wp = bp = (caddr_t)prog;
3488 
3489 	/* Now remap prog_kmem to prog */
3490 	drmach_lock_critical((caddr_t)prog_kmem, (caddr_t)prog);
3491 
3492 	/* All pointers in prog are based on the alternate mapping */
3493 	prog->data = (drmach_copy_rename_data_t *)roundup(((uint64_t)prog +
3494 		sizeof (drmach_copy_rename_program_t)), sizeof (void *));
3495 
3496 	ASSERT(((uint64_t)prog->data + sizeof (drmach_copy_rename_data_t))
3497 		<= ((uint64_t)prog + PAGESIZE));
3498 
3499 	prog->critical = (drmach_copy_rename_critical_t *)
3500 		(wp + DRMACH_FMEM_CRITICAL_PAGE * PAGESIZE);
3501 
3502 	prog->memlist_buffer = (caddr_t)(wp +
3503 		DRMACH_FMEM_MLIST_PAGE * PAGESIZE);
3504 
3505 	prog->stat = (drmach_cr_stat_t *)(wp +
3506 		DRMACH_FMEM_STAT_PAGE * PAGESIZE);
3507 
3508 	/* LINTED */
3509 	ASSERT(sizeof (drmach_cr_stat_t)
3510 		<= ((DRMACH_FMEM_LOCKED_PAGES - DRMACH_FMEM_STAT_PAGE)
3511 		* PAGESIZE));
3512 
3513 	prog->critical->scf_reg_base = (uint64_t)-1;
3514 	prog->critical->scf_td[0] = (s_bd & 0xff);
3515 	prog->critical->scf_td[1] = (t_bd & 0xff);
3516 	for (i = 2; i < 15; i++) {
3517 		prog->critical->scf_td[i]   = 0;
3518 	}
3519 	prog->critical->scf_td[15] = ((0xaa + s_bd + t_bd) & 0xff);
3520 
3521 	bp = (caddr_t)prog->critical;
3522 	len = sizeof (drmach_copy_rename_critical_t);
3523 	wp = (caddr_t)roundup((uint64_t)bp + len, sizeof (void *));
3524 
3525 	len = (uint_t)((ulong_t)drmach_copy_rename_end -
3526 		(ulong_t)drmach_copy_rename_prog__relocatable);
3527 
3528 	/*
3529 	 * We always leave 1K nop's to prevent the processor from
3530 	 * speculative execution that causes memory access
3531 	 */
3532 	wp = wp + len + 1024;
3533 
3534 	len = (uint_t)((ulong_t)drmach_fmem_exec_script_end -
3535 		(ulong_t)drmach_fmem_exec_script);
3536 	/* this is the entry point of the loop script */
3537 	wp = wp + len + 1024;
3538 
3539 	len = (uint_t)((ulong_t)drmach_fmem_exec_script -
3540 		(ulong_t)drmach_fmem_loop_script);
3541 	wp = wp + len + 1024;
3542 
3543 	/* now we make sure there is 1K extra */
3544 
3545 	if ((wp - bp) > PAGESIZE) {
3546 		err = drerr_new(0, EOPL_FMEM_SETUP, NULL);
3547 		goto out;
3548 	}
3549 
3550 	bp = (caddr_t)prog->critical;
3551 	len = sizeof (drmach_copy_rename_critical_t);
3552 	wp = (caddr_t)roundup((uint64_t)bp + len, sizeof (void *));
3553 
3554 	prog->critical->run = (int (*)())(wp);
3555 	len = (uint_t)((ulong_t)drmach_copy_rename_end -
3556 		(ulong_t)drmach_copy_rename_prog__relocatable);
3557 
3558 	bcopy((caddr_t)drmach_copy_rename_prog__relocatable, wp, len);
3559 
3560 	wp = (caddr_t)roundup((uint64_t)wp + len, 1024);
3561 
3562 	prog->critical->fmem = (int (*)())(wp);
3563 	len = (int)((ulong_t)drmach_fmem_exec_script_end -
3564 		(ulong_t)drmach_fmem_exec_script);
3565 	bcopy((caddr_t)drmach_fmem_exec_script, wp, len);
3566 
3567 	len = (int)((ulong_t)drmach_fmem_exec_script_end -
3568 		(ulong_t)drmach_fmem_exec_script);
3569 	wp = (caddr_t)roundup((uint64_t)wp + len, 1024);
3570 
3571 	prog->critical->loop = (int (*)())(wp);
3572 	len = (int)((ulong_t)drmach_fmem_exec_script -
3573 		(ulong_t)drmach_fmem_loop_script);
3574 	bcopy((caddr_t)drmach_fmem_loop_script, (void *)wp, len);
3575 	len = (int)((ulong_t)drmach_fmem_loop_script_rtn-
3576 		(ulong_t)drmach_fmem_loop_script);
3577 	prog->critical->loop_rtn = (void (*)()) (wp+len);
3578 
3579 	/* now we are committed, call SCF, soft suspend mac patrol */
3580 	if ((*scf_fmem_start)(s_bd, t_bd)) {
3581 		err = drerr_new(0, EOPL_SCF_FMEM_START, NULL);
3582 		goto out;
3583 	}
3584 	prog->data->scf_fmem_end = scf_fmem_end;
3585 	prog->data->scf_fmem_cancel = scf_fmem_cancel;
3586 	prog->data->scf_get_base_addr = scf_get_base_addr;
3587 	prog->data->fmem_status.op |= OPL_FMEM_SCF_START;
3588 	/* soft suspend mac patrol */
3589 	(*mc_suspend)();
3590 	prog->data->fmem_status.op |= OPL_FMEM_MC_SUSPEND;
3591 	prog->data->mc_resume = mc_resume;
3592 
3593 	prog->critical->inst_loop_ret  =
3594 		*(uint64_t *)(prog->critical->loop_rtn);
3595 
3596 	/*
3597 	 * 0x30800000 is op code "ba,a	+0"
3598 	 */
3599 
3600 	*(uint_t *)(prog->critical->loop_rtn) = (uint_t)(0x30800000);
3601 
3602 	/*
3603 	 * set the value of SCF FMEM TIMEOUT
3604 	 */
3605 	prog->critical->delay = fmem_timeout * system_clock_freq;
3606 
3607 	prog->data->s_mem = (drmachid_t)s_mem;
3608 	prog->data->t_mem = (drmachid_t)t_mem;
3609 
3610 	cpuid = CPU->cpu_id;
3611 	prog->data->cpuid = cpuid;
3612 	prog->data->cpu_ready_set = cpu_ready_set;
3613 	prog->data->cpu_slave_set = cpu_ready_set;
3614 	prog->data->slowest_cpuid = (processorid_t)-1;
3615 	prog->data->copy_wait_time = 0;
3616 	CPUSET_DEL(prog->data->cpu_slave_set, cpuid);
3617 
3618 	for (i = 0; i < NCPU; i++) {
3619 		prog->data->cpu_ml[i] = NULL;
3620 	}
3621 
3622 	active_cpus = 0;
3623 	if (drmach_disable_mcopy) {
3624 		active_cpus = 1;
3625 		CPUSET_ADD(prog->data->cpu_copy_set, cpuid);
3626 	} else {
3627 		for (i = 0; i < NCPU; i++) {
3628 			if (CPU_IN_SET(cpu_ready_set, i) &&
3629 				CPU_ACTIVE(cpu[i])) {
3630 				CPUSET_ADD(prog->data->cpu_copy_set, i);
3631 				active_cpus++;
3632 			}
3633 		}
3634 	}
3635 
3636 	drmach_setup_memlist(prog);
3637 
3638 	x_ml = c_ml;
3639 	sz = 0;
3640 	while (x_ml != NULL) {
3641 		sz += x_ml->size;
3642 		x_ml = x_ml->next;
3643 	}
3644 
3645 	copy_sz = sz/active_cpus;
3646 	copy_sz = roundup(copy_sz, MMU_PAGESIZE4M);
3647 
3648 	while (sz > copy_sz*active_cpus) {
3649 		copy_sz += MMU_PAGESIZE4M;
3650 	}
3651 
3652 	prog->data->stick_freq = system_clock_freq;
3653 	prog->data->copy_delay = ((copy_sz / min_copy_size_per_sec) + 2) *
3654 		system_clock_freq;
3655 
3656 	x_ml = c_ml;
3657 	c_addr = x_ml->address;
3658 	c_size = x_ml->size;
3659 
3660 	for (i = 0; i < NCPU; i++) {
3661 		prog->stat->nbytes[i] = 0;
3662 		if (!CPU_IN_SET(prog->data->cpu_copy_set, i)) {
3663 			continue;
3664 		}
3665 		sz = copy_sz;
3666 
3667 		while (sz) {
3668 			if (c_size > sz) {
3669 				prog->data->cpu_ml[i] =
3670 					drmach_memlist_add_span(prog,
3671 					prog->data->cpu_ml[i],
3672 					c_addr, sz);
3673 				c_addr += sz;
3674 				c_size -= sz;
3675 				break;
3676 			} else {
3677 				sz -= c_size;
3678 				prog->data->cpu_ml[i] = drmach_memlist_add_span(
3679 					prog, prog->data->cpu_ml[i],
3680 						c_addr, c_size);
3681 				x_ml = x_ml->next;
3682 				if (x_ml != NULL) {
3683 					c_addr = x_ml->address;
3684 					c_size = x_ml->size;
3685 				} else {
3686 					goto end;
3687 				}
3688 			}
3689 		}
3690 	}
3691 end:
3692 	prog->data->s_copybasepa = s_copybasepa;
3693 	prog->data->t_copybasepa = t_copybasepa;
3694 	prog->data->c_ml = c_ml;
3695 	*pgm_id = prog_kmem;
3696 
3697 	/* Unmap the alternate space.  It will have to be remapped again */
3698 	drmach_unlock_critical((caddr_t)prog);
3699 	return (NULL);
3700 out:
3701 	if (prog != NULL) {
3702 		drmach_unlock_critical((caddr_t)prog);
3703 		vmem_free(heap_arena, prog,
3704 			DRMACH_FMEM_LOCKED_PAGES * PAGESIZE);
3705 	}
3706 	if (prog_kmem != NULL) {
3707 		kmem_free(prog_kmem, DRMACH_FMEM_LOCKED_PAGES * PAGESIZE);
3708 	}
3709 	return (err);
3710 }
3711 
3712 sbd_error_t *
3713 drmach_copy_rename_fini(drmachid_t id)
3714 {
3715 	drmach_copy_rename_program_t	*prog = id;
3716 	sbd_error_t			*err = NULL;
3717 	int				rv;
3718 
3719 	/*
3720 	 * Note that we have to delay calling SCF to find out the
3721 	 * status of the FMEM operation here because SCF cannot
3722 	 * respond while it is suspended.
3723 	 * This create a small window when we are sure about the
3724 	 * base address of the system board.
3725 	 * If there is any call to mc-opl to get memory unum,
3726 	 * mc-opl will return UNKNOWN as the unum.
3727 	 */
3728 
3729 	/*
3730 	 * we have to remap again because all the pointer like data,
3731 	 * critical in prog are based on the alternate vmem space.
3732 	 */
3733 	(void) drmach_lock_critical((caddr_t)prog, (caddr_t)prog->locked_prog);
3734 
3735 	if (prog->data->c_ml != NULL)
3736 		memlist_delete(prog->data->c_ml);
3737 
3738 	if ((prog->data->fmem_status.op &
3739 		(OPL_FMEM_SCF_START| OPL_FMEM_MC_SUSPEND)) !=
3740 		(OPL_FMEM_SCF_START | OPL_FMEM_MC_SUSPEND)) {
3741 		cmn_err(CE_PANIC, "drmach_copy_rename_fini: "
3742 			"invalid op code %x\n",
3743 				prog->data->fmem_status.op);
3744 	}
3745 
3746 	/* possible ops are SCF_START, MC_SUSPEND */
3747 	if (prog->critical->fmem_issued) {
3748 		if (prog->data->fmem_status.error != FMEM_NO_ERROR)
3749 			cmn_err(CE_PANIC, "scf fmem request failed");
3750 		rv = (*prog->data->scf_fmem_end)();
3751 		if (rv) {
3752 			cmn_err(CE_PANIC, "scf_fmem_end() failed rv=%d", rv);
3753 		}
3754 		/*
3755 		 * If we get here, rename is successful.
3756 		 * Do all the copy rename post processing.
3757 		 */
3758 		drmach_swap_pa((drmach_mem_t *)prog->data->s_mem,
3759 			(drmach_mem_t *)prog->data->t_mem);
3760 	} else {
3761 		if (prog->data->fmem_status.error != 0) {
3762 			cmn_err(CE_WARN, "Kernel Migration fails. 0x%x",
3763 				prog->data->fmem_status.error);
3764 			err = drerr_new(1, EOPL_FMEM_ERROR, "FMEM error = 0x%x",
3765 				prog->data->fmem_status.error);
3766 		}
3767 		rv = (*prog->data->scf_fmem_cancel)();
3768 		if (rv) {
3769 		    cmn_err(CE_WARN, "scf_fmem_cancel() failed rv=0x%x", rv);
3770 		    if (!err)
3771 			err = drerr_new(1, EOPL_SCF_FMEM_CANCEL,
3772 			    "rv = 0x%x", rv);
3773 		}
3774 	}
3775 	/* soft resume mac patrol */
3776 	(*prog->data->mc_resume)();
3777 
3778 	drmach_unlock_critical((caddr_t)prog->locked_prog);
3779 
3780 	vmem_free(heap_arena, prog->locked_prog,
3781 		DRMACH_FMEM_LOCKED_PAGES * PAGESIZE);
3782 	kmem_free(prog, DRMACH_FMEM_LOCKED_PAGES * PAGESIZE);
3783 	return (err);
3784 }
3785 
3786 /*ARGSUSED*/
3787 static void
3788 drmach_copy_rename_slave(struct regs *rp, drmachid_t id)
3789 {
3790 	drmach_copy_rename_program_t	*prog =
3791 		(drmach_copy_rename_program_t *)id;
3792 	register int			cpuid;
3793 	extern void			drmach_flush();
3794 	extern void			membar_sync_il();
3795 	extern void			drmach_flush_icache();
3796 	on_trap_data_t			otd;
3797 
3798 	cpuid = CPU->cpu_id;
3799 
3800 	if (on_trap(&otd, OT_DATA_EC)) {
3801 		no_trap();
3802 		prog->data->error[cpuid] = FMEM_COPY_ERROR;
3803 		prog->critical->stat[cpuid] = FMEM_LOOP_EXIT;
3804 		drmach_flush_icache();
3805 		membar_sync_il();
3806 		return;
3807 	}
3808 
3809 
3810 	/*
3811 	 * jmp drmach_copy_rename_prog().
3812 	 */
3813 
3814 	drmach_flush(prog->critical, PAGESIZE);
3815 	(void) prog->critical->run(prog, cpuid);
3816 	drmach_flush_icache();
3817 
3818 	no_trap();
3819 
3820 	prog->critical->stat[cpuid] = FMEM_LOOP_EXIT;
3821 
3822 	membar_sync_il();
3823 }
3824 
3825 static void
3826 drmach_swap_pa(drmach_mem_t *s_mem, drmach_mem_t *t_mem)
3827 {
3828 	uint64_t s_base, t_base;
3829 	drmach_board_t *s_board, *t_board;
3830 	struct memlist *ml;
3831 
3832 	s_board = s_mem->dev.bp;
3833 	t_board = t_mem->dev.bp;
3834 	if (s_board == NULL || t_board == NULL) {
3835 		cmn_err(CE_PANIC, "Cannot locate source or target board\n");
3836 		return;
3837 	}
3838 	s_base = s_mem->slice_base;
3839 	t_base = t_mem->slice_base;
3840 
3841 	s_mem->slice_base = t_base;
3842 	s_mem->base_pa = (s_mem->base_pa - s_base) + t_base;
3843 
3844 	for (ml = s_mem->memlist; ml; ml = ml->next) {
3845 		ml->address = ml->address - s_base + t_base;
3846 	}
3847 
3848 	t_mem->slice_base = s_base;
3849 	t_mem->base_pa = (t_mem->base_pa - t_base) + s_base;
3850 
3851 	for (ml = t_mem->memlist; ml; ml = ml->next) {
3852 		ml->address = ml->address - t_base + s_base;
3853 	}
3854 
3855 	/*
3856 	 * IKP has to update the sb-mem-ranges for mac patrol driver
3857 	 * when it resumes, it will re-read the sb-mem-range property
3858 	 * to get the new base address
3859 	 */
3860 	if (oplcfg_pa_swap(s_board->bnum, t_board->bnum) != 0)
3861 		cmn_err(CE_PANIC, "Could not update device nodes\n");
3862 }
3863 
3864 void
3865 drmach_copy_rename(drmachid_t id)
3866 {
3867 	drmach_copy_rename_program_t	*prog_kmem = id;
3868 	drmach_copy_rename_program_t	*prog;
3869 	cpuset_t	cpuset;
3870 	int		cpuid;
3871 	uint64_t	inst;
3872 	register int	rtn;
3873 	extern int	in_sync;
3874 	int		old_in_sync;
3875 	extern void	drmach_sys_trap();
3876 	extern void	drmach_flush();
3877 	extern void	drmach_flush_icache();
3878 	extern uint64_t	patch_inst(uint64_t *, uint64_t);
3879 	on_trap_data_t	otd;
3880 
3881 	prog = prog_kmem->locked_prog;
3882 
3883 	/*
3884 	 * We must immediately drop in the TLB because all pointers
3885 	 * are based on the alternate vmem space.
3886 	 */
3887 
3888 	(void) drmach_lock_critical((caddr_t)prog_kmem, (caddr_t)prog);
3889 
3890 	/*
3891 	 * we call scf to get the base address here becuase if scf
3892 	 * has not been suspended yet, the active path can be changing and
3893 	 * sometimes it is not even mapped.  We call the interface when
3894 	 * the OS has been quiesced.
3895 	 */
3896 	prog->critical->scf_reg_base = (*prog->data->scf_get_base_addr)();
3897 
3898 	if (prog->critical->scf_reg_base == (uint64_t)-1 ||
3899 		prog->critical->scf_reg_base == NULL) {
3900 		prog->data->fmem_status.error = FMEM_SCF_ERR;
3901 		drmach_unlock_critical((caddr_t)prog);
3902 		return;
3903 	}
3904 
3905 	cpuset = prog->data->cpu_ready_set;
3906 
3907 	for (cpuid = 0; cpuid < NCPU; cpuid++) {
3908 		if (CPU_IN_SET(cpuset, cpuid)) {
3909 			prog->critical->stat[cpuid] = FMEM_LOOP_START;
3910 			prog->data->error[cpuid] = FMEM_NO_ERROR;
3911 		}
3912 	}
3913 
3914 	old_in_sync = in_sync;
3915 	in_sync = 1;
3916 	cpuid = CPU->cpu_id;
3917 
3918 	CPUSET_DEL(cpuset, cpuid);
3919 
3920 	for (cpuid = 0; cpuid < NCPU; cpuid++) {
3921 		if (CPU_IN_SET(cpuset, cpuid)) {
3922 			xc_one(cpuid, (xcfunc_t *)drmach_lock_critical,
3923 				(uint64_t)prog_kmem, (uint64_t)prog);
3924 		}
3925 	}
3926 
3927 	cpuid = CPU->cpu_id;
3928 
3929 	xt_some(cpuset, (xcfunc_t *)drmach_sys_trap,
3930 				(uint64_t)drmach_copy_rename_slave,
3931 				(uint64_t)prog);
3932 	xt_sync(cpuset);
3933 
3934 	if (on_trap(&otd, OT_DATA_EC)) {
3935 		rtn = FMEM_COPY_ERROR;
3936 		drmach_flush_icache();
3937 		goto done;
3938 	}
3939 
3940 	/*
3941 	 * jmp drmach_copy_rename_prog().
3942 	 */
3943 	drmach_flush(prog->critical, PAGESIZE);
3944 	rtn = prog->critical->run(prog, cpuid);
3945 	drmach_flush_icache();
3946 
3947 
3948 done:
3949 	no_trap();
3950 	if (rtn == FMEM_HW_ERROR) {
3951 		kpreempt_enable();
3952 		prom_panic("URGENT_ERROR_TRAP is "
3953 			"detected during FMEM.\n");
3954 	}
3955 
3956 	/*
3957 	 * In normal case, all slave CPU's are still spinning in
3958 	 * the assembly code.  The master has to patch the instruction
3959 	 * to get them out.
3960 	 * In error case, e.g. COPY_ERROR, some slave CPU's might
3961 	 * have aborted and already returned and sset LOOP_EXIT status.
3962 	 * Some CPU might still be copying.
3963 	 * In any case, some delay is necessary to give them
3964 	 * enough time to set the LOOP_EXIT status.
3965 	 */
3966 
3967 	for (;;) {
3968 		inst = patch_inst((uint64_t *)prog->critical->loop_rtn,
3969 			prog->critical->inst_loop_ret);
3970 		if (prog->critical->inst_loop_ret == inst) {
3971 			break;
3972 		}
3973 	}
3974 
3975 	for (cpuid = 0; cpuid < NCPU; cpuid++) {
3976 		uint64_t	last, now;
3977 		if (!CPU_IN_SET(cpuset, cpuid)) {
3978 			continue;
3979 		}
3980 		last = prog->stat->nbytes[cpuid];
3981 		/*
3982 		 * Wait for all CPU to exit.
3983 		 * However we do not want an infinite loop
3984 		 * so we detect hangup situation here.
3985 		 * If the slave CPU is still copying data,
3986 		 * we will continue to wait.
3987 		 * In error cases, the master has already set
3988 		 * fmem_status.error to abort the copying.
3989 		 * 1 m.s delay for them to abort copying and
3990 		 * return to drmach_copy_rename_slave to set
3991 		 * FMEM_LOOP_EXIT status should be enough.
3992 		 */
3993 		for (;;) {
3994 			if (prog->critical->stat[cpuid] == FMEM_LOOP_EXIT)
3995 				break;
3996 			drmach_sleep_il();
3997 			drv_usecwait(1000);
3998 			now = prog->stat->nbytes[cpuid];
3999 			if (now <= last) {
4000 			    drv_usecwait(1000);
4001 			    if (prog->critical->stat[cpuid] == FMEM_LOOP_EXIT)
4002 				break;
4003 			    cmn_err(CE_PANIC,
4004 				"CPU %d hang during Copy Rename", cpuid);
4005 			}
4006 			last = now;
4007 		}
4008 		if (prog->data->error[cpuid] == FMEM_HW_ERROR) {
4009 			prom_panic("URGENT_ERROR_TRAP is "
4010 				"detected during FMEM.\n");
4011 		}
4012 	}
4013 
4014 	/*
4015 	 * This must be done after all strands have exit.
4016 	 * Removing the TLB entry will affect both strands
4017 	 * in the same core.
4018 	 */
4019 
4020 	for (cpuid = 0; cpuid < NCPU; cpuid++) {
4021 		if (CPU_IN_SET(cpuset, cpuid)) {
4022 			xc_one(cpuid, (xcfunc_t *)drmach_unlock_critical,
4023 				(uint64_t)prog, 0);
4024 		}
4025 	}
4026 
4027 	in_sync = old_in_sync;
4028 
4029 	/*
4030 	 * we should unlock before the following lock to keep the kpreempt
4031 	 * count correct.
4032 	 */
4033 	(void) drmach_unlock_critical((caddr_t)prog);
4034 
4035 	/*
4036 	 * we must remap again.  TLB might have been removed in above xcall.
4037 	 */
4038 
4039 	(void) drmach_lock_critical((caddr_t)prog_kmem, (caddr_t)prog);
4040 
4041 	if (prog->data->fmem_status.error == 0)
4042 		prog->data->fmem_status.error = rtn;
4043 
4044 	if (prog->data->copy_wait_time > 0) {
4045 		DRMACH_PR("Unexpected long wait time %ld seconds "
4046 			"during copy rename on CPU %d\n",
4047 			prog->data->copy_wait_time/prog->data->stick_freq,
4048 			prog->data->slowest_cpuid);
4049 	}
4050 	drmach_unlock_critical((caddr_t)prog);
4051 }
4052