xref: /titanic_52/usr/src/uts/sun4u/opl/io/drmach.c (revision 6b990117eca1cdf7ad1f4424209791ad38be9791)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 
29 #include <sys/debug.h>
30 #include <sys/types.h>
31 #include <sys/varargs.h>
32 #include <sys/errno.h>
33 #include <sys/cred.h>
34 #include <sys/dditypes.h>
35 #include <sys/devops.h>
36 #include <sys/modctl.h>
37 #include <sys/poll.h>
38 #include <sys/conf.h>
39 #include <sys/ddi.h>
40 #include <sys/sunddi.h>
41 #include <sys/sunndi.h>
42 #include <sys/ndi_impldefs.h>
43 #include <sys/stat.h>
44 #include <sys/kmem.h>
45 #include <sys/vmem.h>
46 #include <sys/opl_olympus_regs.h>
47 #include <sys/cpuvar.h>
48 #include <sys/cpupart.h>
49 #include <sys/mem_config.h>
50 #include <sys/ddi_impldefs.h>
51 #include <sys/systm.h>
52 #include <sys/machsystm.h>
53 #include <sys/autoconf.h>
54 #include <sys/cmn_err.h>
55 #include <sys/sysmacros.h>
56 #include <sys/x_call.h>
57 #include <sys/promif.h>
58 #include <sys/prom_plat.h>
59 #include <sys/membar.h>
60 #include <vm/seg_kmem.h>
61 #include <sys/mem_cage.h>
62 #include <sys/stack.h>
63 #include <sys/archsystm.h>
64 #include <vm/hat_sfmmu.h>
65 #include <sys/pte.h>
66 #include <sys/mmu.h>
67 #include <sys/cpu_module.h>
68 #include <sys/obpdefs.h>
69 #include <sys/note.h>
70 #include <sys/ontrap.h>
71 #include <sys/cpu_sgnblk_defs.h>
72 #include <sys/opl.h>
73 
74 
75 #include <sys/promimpl.h>
76 #include <sys/prom_plat.h>
77 #include <sys/kobj.h>
78 
79 #include <sys/sysevent.h>
80 #include <sys/sysevent/dr.h>
81 #include <sys/sysevent/eventdefs.h>
82 
83 #include <sys/drmach.h>
84 #include <sys/dr_util.h>
85 
86 #include <sys/fcode.h>
87 #include <sys/opl_cfg.h>
88 
89 extern void		bcopy32_il(uint64_t, uint64_t);
90 extern void		flush_cache_il(void);
91 extern void		drmach_sleep_il(void);
92 
93 typedef struct {
94 	struct drmach_node	*node;
95 	void			*data;
96 } drmach_node_walk_args_t;
97 
98 typedef struct drmach_node {
99 	void		*here;
100 
101 	pnode_t		(*get_dnode)(struct drmach_node *node);
102 	int		(*walk)(struct drmach_node *node, void *data,
103 				int (*cb)(drmach_node_walk_args_t *args));
104 	dev_info_t	*(*n_getdip)(struct drmach_node *node);
105 	int		(*n_getproplen)(struct drmach_node *node, char *name,
106 				int *len);
107 	int		(*n_getprop)(struct drmach_node *node, char *name,
108 				void *buf, int len);
109 	int		(*get_parent)(struct drmach_node *node,
110 				struct drmach_node *pnode);
111 } drmach_node_t;
112 
113 typedef struct {
114 	int		 min_index;
115 	int		 max_index;
116 	int		 arr_sz;
117 	drmachid_t	*arr;
118 } drmach_array_t;
119 
120 typedef struct {
121 	void		*isa;
122 
123 	void		(*dispose)(drmachid_t);
124 	sbd_error_t	*(*release)(drmachid_t);
125 	sbd_error_t	*(*status)(drmachid_t, drmach_status_t *);
126 
127 	char		 name[MAXNAMELEN];
128 } drmach_common_t;
129 
130 typedef	struct {
131 	uint32_t	core_present;
132 	uint32_t	core_hotadded;
133 	uint32_t	core_started;
134 } drmach_cmp_t;
135 
136 typedef struct {
137 	drmach_common_t	 cm;
138 	int		 bnum;
139 	int		 assigned;
140 	int		 powered;
141 	int		 connected;
142 	int		 cond;
143 	drmach_node_t	*tree;
144 	drmach_array_t	*devices;
145 	int		boot_board;	/* if board exists on bootup */
146 	drmach_cmp_t	cores[OPL_MAX_COREID_PER_BOARD];
147 } drmach_board_t;
148 
149 typedef struct {
150 	drmach_common_t	 cm;
151 	drmach_board_t	*bp;
152 	int		 unum;
153 	int		portid;
154 	int		 busy;
155 	int		 powered;
156 	const char	*type;
157 	drmach_node_t	*node;
158 } drmach_device_t;
159 
160 typedef struct drmach_cpu {
161 	drmach_device_t  dev;
162 	processorid_t    cpuid;
163 	int		sb;
164 	int		chipid;
165 	int		coreid;
166 	int		strandid;
167 	int		status;
168 #define	OPL_CPU_HOTADDED	1
169 } drmach_cpu_t;
170 
171 typedef struct drmach_mem {
172 	drmach_device_t  dev;
173 	uint64_t	slice_base;
174 	uint64_t	slice_size;
175 	uint64_t	base_pa;	/* lowest installed memory base */
176 	uint64_t	nbytes;		/* size of installed memory */
177 	struct memlist *memlist;
178 } drmach_mem_t;
179 
180 typedef struct drmach_io {
181 	drmach_device_t  dev;
182 	int	channel;
183 	int	leaf;
184 } drmach_io_t;
185 
186 typedef struct drmach_domain_info {
187 	uint32_t	floating;
188 	int		allow_dr;
189 } drmach_domain_info_t;
190 
191 drmach_domain_info_t drmach_domain;
192 
193 typedef struct {
194 	int		 flags;
195 	drmach_device_t	*dp;
196 	sbd_error_t	*err;
197 	dev_info_t	*dip;
198 } drmach_config_args_t;
199 
200 typedef struct {
201 	drmach_board_t	*obj;
202 	int		 ndevs;
203 	void		*a;
204 	sbd_error_t	*(*found)(void *a, const char *, int, drmachid_t);
205 	sbd_error_t	*err;
206 } drmach_board_cb_data_t;
207 
208 static drmach_array_t	*drmach_boards;
209 
210 static sbd_error_t	*drmach_device_new(drmach_node_t *,
211 				drmach_board_t *, int, drmachid_t *);
212 static sbd_error_t	*drmach_cpu_new(drmach_device_t *, drmachid_t *);
213 static sbd_error_t	*drmach_mem_new(drmach_device_t *, drmachid_t *);
214 static sbd_error_t	*drmach_io_new(drmach_device_t *, drmachid_t *);
215 
216 static dev_info_t	*drmach_node_ddi_get_dip(drmach_node_t *np);
217 static int		 drmach_node_ddi_get_prop(drmach_node_t *np,
218 				char *name, void *buf, int len);
219 static int		 drmach_node_ddi_get_proplen(drmach_node_t *np,
220 				char *name, int *len);
221 
222 static int 		drmach_get_portid(drmach_node_t *);
223 static	sbd_error_t	*drmach_i_status(drmachid_t, drmach_status_t *);
224 static int		opl_check_dr_status();
225 static void		drmach_io_dispose(drmachid_t);
226 static sbd_error_t	*drmach_io_release(drmachid_t);
227 static sbd_error_t	*drmach_io_status(drmachid_t, drmach_status_t *);
228 static int 		drmach_init(void);
229 static void 		drmach_fini(void);
230 static void		drmach_swap_pa(drmach_mem_t *, drmach_mem_t *);
231 static drmach_board_t	*drmach_get_board_by_bnum(int);
232 
233 /* options for the second argument in drmach_add_remove_cpu() */
234 #define	HOTADD_CPU	1
235 #define	HOTREMOVE_CPU	2
236 
237 #define	ON_BOARD_CORE_NUM(x)	(((uint_t)(x) / OPL_MAX_STRANDID_PER_CORE) & \
238 	(OPL_MAX_COREID_PER_BOARD - 1))
239 
240 extern struct cpu	*SIGBCPU;
241 
242 static int		drmach_name2type_idx(char *);
243 static drmach_board_t	*drmach_board_new(int, int);
244 
245 #ifdef DEBUG
246 
247 #define	DRMACH_PR		if (drmach_debug) printf
248 int drmach_debug = 1;		 /* set to non-zero to enable debug messages */
249 #else
250 
251 #define	DRMACH_PR		_NOTE(CONSTANTCONDITION) if (0) printf
252 #endif /* DEBUG */
253 
254 
255 #define	DRMACH_OBJ(id)		((drmach_common_t *)id)
256 
257 #define	DRMACH_NULL_ID(id)	((id) == 0)
258 
259 #define	DRMACH_IS_BOARD_ID(id)	\
260 	((id != 0) &&		\
261 	(DRMACH_OBJ(id)->isa == (void *)drmach_board_new))
262 
263 #define	DRMACH_IS_CPU_ID(id)	\
264 	((id != 0) &&		\
265 	(DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new))
266 
267 #define	DRMACH_IS_MEM_ID(id)	\
268 	((id != 0) &&		\
269 	(DRMACH_OBJ(id)->isa == (void *)drmach_mem_new))
270 
271 #define	DRMACH_IS_IO_ID(id)	\
272 	((id != 0) &&		\
273 	(DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
274 
275 #define	DRMACH_IS_DEVICE_ID(id)					\
276 	((id != 0) &&						\
277 	(DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new ||	\
278 	    DRMACH_OBJ(id)->isa == (void *)drmach_mem_new ||	\
279 	    DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
280 
281 #define	DRMACH_IS_ID(id)					\
282 	((id != 0) &&						\
283 	(DRMACH_OBJ(id)->isa == (void *)drmach_board_new ||	\
284 	    DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new ||	\
285 	    DRMACH_OBJ(id)->isa == (void *)drmach_mem_new ||	\
286 	    DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
287 
288 #define	DRMACH_INTERNAL_ERROR() \
289 	drerr_new(1, EOPL_INTERNAL, drmach_ie_fmt, __LINE__)
290 
291 static char		*drmach_ie_fmt = "drmach.c %d";
292 
293 static struct {
294 	const char	*name;
295 	const char	*type;
296 	sbd_error_t	*(*new)(drmach_device_t *, drmachid_t *);
297 } drmach_name2type[] = {
298 	{ "cpu",	DRMACH_DEVTYPE_CPU,		drmach_cpu_new },
299 	{ "pseudo-mc",	DRMACH_DEVTYPE_MEM,		drmach_mem_new },
300 	{ "pci",	DRMACH_DEVTYPE_PCI,		drmach_io_new  },
301 };
302 
303 /* utility */
304 #define	MBYTE	(1048576ull)
305 
306 /*
307  * drmach autoconfiguration data structures and interfaces
308  */
309 
310 extern struct mod_ops mod_miscops;
311 
312 static struct modlmisc modlmisc = {
313 	&mod_miscops,
314 	"OPL DR 1.1"
315 };
316 
317 static struct modlinkage modlinkage = {
318 	MODREV_1,
319 	(void *)&modlmisc,
320 	NULL
321 };
322 
323 static krwlock_t drmach_boards_rwlock;
324 
325 typedef const char	*fn_t;
326 
327 int
328 _init(void)
329 {
330 	int err;
331 
332 	if ((err = drmach_init()) != 0) {
333 		return (err);
334 	}
335 
336 	if ((err = mod_install(&modlinkage)) != 0) {
337 		drmach_fini();
338 	}
339 
340 	return (err);
341 }
342 
343 int
344 _fini(void)
345 {
346 	int	err;
347 
348 	if ((err = mod_remove(&modlinkage)) == 0)
349 		drmach_fini();
350 
351 	return (err);
352 }
353 
354 int
355 _info(struct modinfo *modinfop)
356 {
357 	return (mod_info(&modlinkage, modinfop));
358 }
359 
360 struct drmach_mc_lookup {
361 	int	bnum;
362 	drmach_board_t	*bp;
363 	dev_info_t *dip;	/* rv - set if found */
364 };
365 
366 #define	_ptob64(p) ((uint64_t)(p) << PAGESHIFT)
367 #define	_b64top(b) ((pgcnt_t)((b) >> PAGESHIFT))
368 
369 static int
370 drmach_setup_mc_info(dev_info_t *dip, drmach_mem_t *mp)
371 {
372 	uint64_t	memory_ranges[128];
373 	int len;
374 	struct memlist	*ml;
375 	int rv;
376 	hwd_sb_t *hwd;
377 	hwd_memory_t *pm;
378 
379 	len = sizeof (memory_ranges);
380 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
381 		DDI_PROP_DONTPASS, "sb-mem-ranges",
382 	    (caddr_t)&memory_ranges[0], &len) != DDI_PROP_SUCCESS) {
383 		mp->slice_base = 0;
384 		mp->slice_size = 0;
385 		return (-1);
386 	}
387 	mp->slice_base = memory_ranges[0];
388 	mp->slice_size = memory_ranges[1];
389 
390 	if (!mp->dev.bp->boot_board) {
391 		int i;
392 
393 		rv = opl_read_hwd(mp->dev.bp->bnum, NULL,  NULL, NULL, &hwd);
394 
395 		if (rv != 0) {
396 			return (-1);
397 		}
398 
399 		ml = NULL;
400 		pm = &hwd->sb_cmu.cmu_memory;
401 		for (i = 0; i < HWD_MAX_MEM_CHUNKS; i++) {
402 			if (pm->mem_chunks[i].chnk_size > 0) {
403 				ml = memlist_add_span(ml,
404 					pm->mem_chunks[i].chnk_start_address,
405 					pm->mem_chunks[i].chnk_size);
406 			}
407 		}
408 	} else {
409 		/*
410 		 * we intersect phys_install to get base_pa.
411 		 * This only works at bootup time.
412 		 */
413 
414 		memlist_read_lock();
415 		ml = memlist_dup(phys_install);
416 		memlist_read_unlock();
417 
418 		ml = memlist_del_span(ml, 0ull, mp->slice_base);
419 		if (ml) {
420 			uint64_t basepa, endpa;
421 			endpa = _ptob64(physmax + 1);
422 
423 			basepa = mp->slice_base + mp->slice_size;
424 
425 			ml = memlist_del_span(ml, basepa, endpa - basepa);
426 		}
427 	}
428 
429 	if (ml) {
430 		uint64_t nbytes = 0;
431 		struct memlist *p;
432 		for (p = ml; p; p = p->next) {
433 			nbytes += p->size;
434 		}
435 		if ((mp->nbytes = nbytes) > 0)
436 			mp->base_pa = ml->address;
437 		else
438 			mp->base_pa = 0;
439 		mp->memlist = ml;
440 	} else {
441 		mp->base_pa = 0;
442 		mp->nbytes = 0;
443 	}
444 	return (0);
445 }
446 
447 
448 struct drmach_hotcpu {
449 	drmach_board_t *bp;
450 	int	bnum;
451 	int	core_id;
452 	int 	rv;
453 	int	option;
454 };
455 
456 static int
457 drmach_cpu_cb(dev_info_t *dip, void *arg)
458 {
459 	struct drmach_hotcpu *p = (struct drmach_hotcpu *)arg;
460 	char name[OBP_MAXDRVNAME];
461 	int len = OBP_MAXDRVNAME;
462 	int bnum, core_id, strand_id;
463 	drmach_board_t *bp;
464 
465 	if (dip == ddi_root_node()) {
466 		return (DDI_WALK_CONTINUE);
467 	}
468 
469 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
470 	    DDI_PROP_DONTPASS, "name",
471 	    (caddr_t)name, &len) != DDI_PROP_SUCCESS) {
472 		return (DDI_WALK_PRUNECHILD);
473 	}
474 
475 	/* only cmp has board number */
476 	bnum = -1;
477 	len = sizeof (bnum);
478 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
479 	    DDI_PROP_DONTPASS, OBP_BOARDNUM,
480 	    (caddr_t)&bnum, &len) != DDI_PROP_SUCCESS) {
481 		bnum = -1;
482 	}
483 
484 	if (strcmp(name, "cmp") == 0) {
485 		if (bnum != p->bnum)
486 			return (DDI_WALK_PRUNECHILD);
487 		return (DDI_WALK_CONTINUE);
488 	}
489 	/* we have already pruned all unwanted cores and cpu's above */
490 	if (strcmp(name, "core") == 0) {
491 		return (DDI_WALK_CONTINUE);
492 	}
493 	if (strcmp(name, "cpu") == 0) {
494 		processorid_t cpuid;
495 		len = sizeof (cpuid);
496 		if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
497 		    DDI_PROP_DONTPASS, "cpuid",
498 		    (caddr_t)&cpuid, &len) != DDI_PROP_SUCCESS) {
499 			p->rv = -1;
500 			return (DDI_WALK_TERMINATE);
501 		}
502 
503 		core_id = p->core_id;
504 
505 		bnum = LSB_ID(cpuid);
506 
507 		if (ON_BOARD_CORE_NUM(cpuid) != core_id)
508 			return (DDI_WALK_CONTINUE);
509 
510 		bp = p->bp;
511 		ASSERT(bnum == bp->bnum);
512 
513 		if (p->option == HOTADD_CPU) {
514 			if (prom_hotaddcpu(cpuid) != 0) {
515 				p->rv = -1;
516 				return (DDI_WALK_TERMINATE);
517 			}
518 			strand_id = STRAND_ID(cpuid);
519 			bp->cores[core_id].core_hotadded |= (1 << strand_id);
520 		} else if (p->option == HOTREMOVE_CPU) {
521 			if (prom_hotremovecpu(cpuid) != 0) {
522 				p->rv = -1;
523 				return (DDI_WALK_TERMINATE);
524 			}
525 			strand_id = STRAND_ID(cpuid);
526 			bp->cores[core_id].core_hotadded &= ~(1 << strand_id);
527 		}
528 		return (DDI_WALK_CONTINUE);
529 	}
530 
531 	return (DDI_WALK_PRUNECHILD);
532 }
533 
534 
535 static int
536 drmach_add_remove_cpu(int bnum, int core_id, int option)
537 {
538 	struct drmach_hotcpu arg;
539 	drmach_board_t *bp;
540 
541 	bp = drmach_get_board_by_bnum(bnum);
542 	ASSERT(bp);
543 
544 	arg.bp = bp;
545 	arg.bnum = bnum;
546 	arg.core_id = core_id;
547 	arg.rv = 0;
548 	arg.option = option;
549 	ddi_walk_devs(ddi_root_node(), drmach_cpu_cb, (void *)&arg);
550 	return (arg.rv);
551 }
552 
553 struct drmach_setup_core_arg {
554 	drmach_board_t *bp;
555 };
556 
557 static int
558 drmach_setup_core_cb(dev_info_t *dip, void *arg)
559 {
560 	struct drmach_setup_core_arg *p = (struct drmach_setup_core_arg *)arg;
561 	char name[OBP_MAXDRVNAME];
562 	int len = OBP_MAXDRVNAME;
563 	int bnum;
564 	int core_id, strand_id;
565 
566 	if (dip == ddi_root_node()) {
567 		return (DDI_WALK_CONTINUE);
568 	}
569 
570 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
571 	    DDI_PROP_DONTPASS, "name",
572 	    (caddr_t)name, &len) != DDI_PROP_SUCCESS) {
573 		return (DDI_WALK_PRUNECHILD);
574 	}
575 
576 	/* only cmp has board number */
577 	bnum = -1;
578 	len = sizeof (bnum);
579 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
580 	    DDI_PROP_DONTPASS, OBP_BOARDNUM,
581 	    (caddr_t)&bnum, &len) != DDI_PROP_SUCCESS) {
582 		bnum = -1;
583 	}
584 
585 	if (strcmp(name, "cmp") == 0) {
586 		if (bnum != p->bp->bnum)
587 			return (DDI_WALK_PRUNECHILD);
588 		return (DDI_WALK_CONTINUE);
589 	}
590 	/* we have already pruned all unwanted cores and cpu's above */
591 	if (strcmp(name, "core") == 0) {
592 		return (DDI_WALK_CONTINUE);
593 	}
594 	if (strcmp(name, "cpu") == 0) {
595 		processorid_t cpuid;
596 		len = sizeof (cpuid);
597 		if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
598 		    DDI_PROP_DONTPASS, "cpuid",
599 		    (caddr_t)&cpuid, &len) != DDI_PROP_SUCCESS) {
600 			return (DDI_WALK_TERMINATE);
601 		}
602 		bnum = LSB_ID(cpuid);
603 		ASSERT(bnum == p->bp->bnum);
604 		core_id = ON_BOARD_CORE_NUM(cpuid);
605 		strand_id = STRAND_ID(cpuid);
606 		p->bp->cores[core_id].core_present |= (1 << strand_id);
607 		return (DDI_WALK_CONTINUE);
608 	}
609 
610 	return (DDI_WALK_PRUNECHILD);
611 }
612 
613 
614 static void
615 drmach_setup_core_info(drmach_board_t *obj)
616 {
617 	struct drmach_setup_core_arg arg;
618 	int i;
619 
620 	for (i = 0; i < OPL_MAX_COREID_PER_BOARD; i++) {
621 		obj->cores[i].core_present = 0;
622 		obj->cores[i].core_hotadded = 0;
623 		obj->cores[i].core_started = 0;
624 	}
625 	arg.bp = obj;
626 	ddi_walk_devs(ddi_root_node(), drmach_setup_core_cb, (void *)&arg);
627 
628 	for (i = 0; i < OPL_MAX_COREID_PER_BOARD; i++) {
629 		if (obj->boot_board) {
630 			obj->cores[i].core_hotadded =
631 				obj->cores[i].core_started =
632 				obj->cores[i].core_present;
633 		}
634 	}
635 }
636 
637 /*
638  * drmach_node_* routines serve the purpose of separating the
639  * rest of the code from the device tree and OBP.  This is necessary
640  * because of In-Kernel-Probing.  Devices probed after stod, are probed
641  * by the in-kernel-prober, not OBP.  These devices, therefore, do not
642  * have dnode ids.
643  */
644 
645 typedef struct {
646 	drmach_node_walk_args_t	*nwargs;
647 	int 			(*cb)(drmach_node_walk_args_t *args);
648 	int			err;
649 } drmach_node_ddi_walk_args_t;
650 
651 static int
652 drmach_node_ddi_walk_cb(dev_info_t *dip, void *arg)
653 {
654 	drmach_node_ddi_walk_args_t	*nargs;
655 
656 	nargs = (drmach_node_ddi_walk_args_t *)arg;
657 
658 	/*
659 	 * dip doesn't have to be held here as we are called
660 	 * from ddi_walk_devs() which holds the dip.
661 	 */
662 	nargs->nwargs->node->here = (void *)dip;
663 
664 	nargs->err = nargs->cb(nargs->nwargs);
665 
666 
667 	/*
668 	 * Set "here" to NULL so that unheld dip is not accessible
669 	 * outside ddi_walk_devs()
670 	 */
671 	nargs->nwargs->node->here = NULL;
672 
673 	if (nargs->err)
674 		return (DDI_WALK_TERMINATE);
675 	else
676 		return (DDI_WALK_CONTINUE);
677 }
678 
679 static int
680 drmach_node_ddi_walk(drmach_node_t *np, void *data,
681 		int (*cb)(drmach_node_walk_args_t *args))
682 {
683 	drmach_node_walk_args_t		args;
684 	drmach_node_ddi_walk_args_t	nargs;
685 
686 
687 	/* initialized args structure for callback */
688 	args.node = np;
689 	args.data = data;
690 
691 	nargs.nwargs = &args;
692 	nargs.cb = cb;
693 	nargs.err = 0;
694 
695 	/*
696 	 * Root node doesn't have to be held in any way.
697 	 */
698 	ddi_walk_devs(ddi_root_node(), drmach_node_ddi_walk_cb,
699 		(void *)&nargs);
700 
701 	return (nargs.err);
702 }
703 
704 static int
705 drmach_node_ddi_get_parent(drmach_node_t *np, drmach_node_t *pp)
706 {
707 	dev_info_t	*ndip;
708 	static char	*fn = "drmach_node_ddi_get_parent";
709 
710 	ndip = np->n_getdip(np);
711 	if (ndip == NULL) {
712 		cmn_err(CE_WARN, "%s: NULL dip", fn);
713 		return (-1);
714 	}
715 
716 	bcopy(np, pp, sizeof (drmach_node_t));
717 
718 	pp->here = (void *)ddi_get_parent(ndip);
719 	if (pp->here == NULL) {
720 		cmn_err(CE_WARN, "%s: NULL parent dip", fn);
721 		return (-1);
722 	}
723 
724 	return (0);
725 }
726 
727 /*ARGSUSED*/
728 static pnode_t
729 drmach_node_ddi_get_dnode(drmach_node_t *np)
730 {
731 	return ((pnode_t)NULL);
732 }
733 
734 static drmach_node_t *
735 drmach_node_new(void)
736 {
737 	drmach_node_t *np;
738 
739 	np = kmem_zalloc(sizeof (drmach_node_t), KM_SLEEP);
740 
741 	np->get_dnode = drmach_node_ddi_get_dnode;
742 	np->walk = drmach_node_ddi_walk;
743 	np->n_getdip = drmach_node_ddi_get_dip;
744 	np->n_getproplen = drmach_node_ddi_get_proplen;
745 	np->n_getprop = drmach_node_ddi_get_prop;
746 	np->get_parent = drmach_node_ddi_get_parent;
747 
748 	return (np);
749 }
750 
751 static void
752 drmach_node_dispose(drmach_node_t *np)
753 {
754 	kmem_free(np, sizeof (*np));
755 }
756 
757 static dev_info_t *
758 drmach_node_ddi_get_dip(drmach_node_t *np)
759 {
760 	return ((dev_info_t *)np->here);
761 }
762 
763 static int
764 drmach_node_walk(drmach_node_t *np, void *param,
765 		int (*cb)(drmach_node_walk_args_t *args))
766 {
767 	return (np->walk(np, param, cb));
768 }
769 
770 static int
771 drmach_node_ddi_get_prop(drmach_node_t *np, char *name, void *buf, int len)
772 {
773 	int		rv = 0;
774 	dev_info_t	*ndip;
775 	static char	*fn = "drmach_node_ddi_get_prop";
776 
777 
778 	ndip = np->n_getdip(np);
779 	if (ndip == NULL) {
780 		cmn_err(CE_WARN, "%s: NULL dip", fn);
781 		rv = -1;
782 	} else if (ddi_getlongprop_buf(DDI_DEV_T_ANY, ndip,
783 	    DDI_PROP_DONTPASS, name,
784 	    (caddr_t)buf, &len) != DDI_PROP_SUCCESS) {
785 		rv = -1;
786 	}
787 
788 	return (rv);
789 }
790 
791 static int
792 drmach_node_ddi_get_proplen(drmach_node_t *np, char *name, int *len)
793 {
794 	int		rv = 0;
795 	dev_info_t	*ndip;
796 
797 	ndip = np->n_getdip(np);
798 	if (ndip == NULL) {
799 		rv = -1;
800 	} else if (ddi_getproplen(DDI_DEV_T_ANY, ndip, DDI_PROP_DONTPASS,
801 		name, len) != DDI_PROP_SUCCESS) {
802 		rv = -1;
803 	}
804 
805 	return (rv);
806 }
807 
808 static drmachid_t
809 drmach_node_dup(drmach_node_t *np)
810 {
811 	drmach_node_t *dup;
812 
813 	dup = drmach_node_new();
814 	dup->here = np->here;
815 	dup->get_dnode = np->get_dnode;
816 	dup->walk = np->walk;
817 	dup->n_getdip = np->n_getdip;
818 	dup->n_getproplen = np->n_getproplen;
819 	dup->n_getprop = np->n_getprop;
820 	dup->get_parent = np->get_parent;
821 
822 	return (dup);
823 }
824 
825 /*
826  * drmach_array provides convenient array construction, access,
827  * bounds checking and array destruction logic.
828  */
829 
830 static drmach_array_t *
831 drmach_array_new(int min_index, int max_index)
832 {
833 	drmach_array_t *arr;
834 
835 	arr = kmem_zalloc(sizeof (drmach_array_t), KM_SLEEP);
836 
837 	arr->arr_sz = (max_index - min_index + 1) * sizeof (void *);
838 	if (arr->arr_sz > 0) {
839 		arr->min_index = min_index;
840 		arr->max_index = max_index;
841 
842 		arr->arr = kmem_zalloc(arr->arr_sz, KM_SLEEP);
843 		return (arr);
844 	} else {
845 		kmem_free(arr, sizeof (*arr));
846 		return (0);
847 	}
848 }
849 
850 static int
851 drmach_array_set(drmach_array_t *arr, int idx, drmachid_t val)
852 {
853 	if (idx < arr->min_index || idx > arr->max_index)
854 		return (-1);
855 	else {
856 		arr->arr[idx - arr->min_index] = val;
857 		return (0);
858 	}
859 	/*NOTREACHED*/
860 }
861 
862 static int
863 drmach_array_get(drmach_array_t *arr, int idx, drmachid_t *val)
864 {
865 	if (idx < arr->min_index || idx > arr->max_index)
866 		return (-1);
867 	else {
868 		*val = arr->arr[idx - arr->min_index];
869 		return (0);
870 	}
871 	/*NOTREACHED*/
872 }
873 
874 static int
875 drmach_array_first(drmach_array_t *arr, int *idx, drmachid_t *val)
876 {
877 	int rv;
878 
879 	*idx = arr->min_index;
880 	while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
881 		*idx += 1;
882 
883 	return (rv);
884 }
885 
886 static int
887 drmach_array_next(drmach_array_t *arr, int *idx, drmachid_t *val)
888 {
889 	int rv;
890 
891 	*idx += 1;
892 	while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
893 		*idx += 1;
894 
895 	return (rv);
896 }
897 
898 static void
899 drmach_array_dispose(drmach_array_t *arr, void (*disposer)(drmachid_t))
900 {
901 	drmachid_t	val;
902 	int		idx;
903 	int		rv;
904 
905 	rv = drmach_array_first(arr, &idx, &val);
906 	while (rv == 0) {
907 		(*disposer)(val);
908 		rv = drmach_array_next(arr, &idx, &val);
909 	}
910 
911 	kmem_free(arr->arr, arr->arr_sz);
912 	kmem_free(arr, sizeof (*arr));
913 }
914 
915 static drmach_board_t *
916 drmach_get_board_by_bnum(int bnum)
917 {
918 	drmachid_t id;
919 
920 	if (drmach_array_get(drmach_boards, bnum, &id) == 0)
921 		return ((drmach_board_t *)id);
922 	else
923 		return (NULL);
924 }
925 
926 static pnode_t
927 drmach_node_get_dnode(drmach_node_t *np)
928 {
929 	return (np->get_dnode(np));
930 }
931 
932 /*ARGSUSED*/
933 sbd_error_t *
934 drmach_configure(drmachid_t id, int flags)
935 {
936 	drmach_device_t		*dp;
937 	sbd_error_t		*err = NULL;
938 	dev_info_t		*rdip;
939 	dev_info_t		*fdip = NULL;
940 
941 	if (DRMACH_IS_CPU_ID(id)) {
942 		return (NULL);
943 	}
944 	if (!DRMACH_IS_DEVICE_ID(id))
945 		return (drerr_new(0, EOPL_INAPPROP, NULL));
946 	dp = id;
947 	rdip = dp->node->n_getdip(dp->node);
948 
949 	ASSERT(rdip);
950 
951 	ASSERT(e_ddi_branch_held(rdip));
952 
953 	if (e_ddi_branch_configure(rdip, &fdip, 0) != 0) {
954 		char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
955 		dev_info_t *dip = (fdip != NULL) ? fdip : rdip;
956 
957 		(void) ddi_pathname(dip, path);
958 		err = drerr_new(1,  EOPL_DRVFAIL, path);
959 
960 		kmem_free(path, MAXPATHLEN);
961 
962 		/* If non-NULL, fdip is returned held and must be released */
963 		if (fdip != NULL)
964 			ddi_release_devi(fdip);
965 	}
966 
967 	return (err);
968 }
969 
970 
971 static sbd_error_t *
972 drmach_device_new(drmach_node_t *node,
973 	drmach_board_t *bp, int portid, drmachid_t *idp)
974 {
975 	int		 i;
976 	int		 rv;
977 	drmach_device_t	proto;
978 	sbd_error_t	*err;
979 	char		 name[OBP_MAXDRVNAME];
980 
981 	rv = node->n_getprop(node, "name", name, OBP_MAXDRVNAME);
982 	if (rv) {
983 		/* every node is expected to have a name */
984 		err = drerr_new(1, EOPL_GETPROP,
985 			"device node %s: property %s",
986 			ddi_node_name(node->n_getdip(node)), "name");
987 		return (err);
988 	}
989 
990 	/*
991 	 * The node currently being examined is not listed in the name2type[]
992 	 * array.  In this case, the node is no interest to drmach.  Both
993 	 * dp and err are initialized here to yield nothing (no device or
994 	 * error structure) for this case.
995 	 */
996 	i = drmach_name2type_idx(name);
997 
998 
999 	if (i < 0) {
1000 		*idp = (drmachid_t)0;
1001 		return (NULL);
1002 	}
1003 
1004 	/* device specific new function will set unum */
1005 
1006 	bzero(&proto, sizeof (proto));
1007 	proto.type = drmach_name2type[i].type;
1008 	proto.bp = bp;
1009 	proto.node = node;
1010 	proto.portid = portid;
1011 
1012 	return (drmach_name2type[i].new(&proto, idp));
1013 }
1014 
1015 static void
1016 drmach_device_dispose(drmachid_t id)
1017 {
1018 	drmach_device_t *self = id;
1019 
1020 	self->cm.dispose(id);
1021 }
1022 
1023 
1024 static drmach_board_t *
1025 drmach_board_new(int bnum, int boot_board)
1026 {
1027 	static sbd_error_t *drmach_board_release(drmachid_t);
1028 	static sbd_error_t *drmach_board_status(drmachid_t, drmach_status_t *);
1029 
1030 	drmach_board_t	*bp;
1031 
1032 	bp = kmem_zalloc(sizeof (drmach_board_t), KM_SLEEP);
1033 
1034 	bp->cm.isa = (void *)drmach_board_new;
1035 	bp->cm.release = drmach_board_release;
1036 	bp->cm.status = drmach_board_status;
1037 
1038 	(void) drmach_board_name(bnum, bp->cm.name, sizeof (bp->cm.name));
1039 
1040 	bp->bnum = bnum;
1041 	bp->devices = NULL;
1042 	bp->connected = boot_board;
1043 	bp->tree = drmach_node_new();
1044 	bp->assigned = boot_board;
1045 	bp->powered = boot_board;
1046 	bp->boot_board = boot_board;
1047 
1048 	/*
1049 	 * If this is not bootup initialization, we have to wait till
1050 	 * IKP sets up the device nodes in drmach_board_connect().
1051 	 */
1052 	if (boot_board)
1053 		drmach_setup_core_info(bp);
1054 
1055 	drmach_array_set(drmach_boards, bnum, bp);
1056 	return (bp);
1057 }
1058 
1059 static void
1060 drmach_board_dispose(drmachid_t id)
1061 {
1062 	drmach_board_t *bp;
1063 
1064 	ASSERT(DRMACH_IS_BOARD_ID(id));
1065 	bp = id;
1066 
1067 	if (bp->tree)
1068 		drmach_node_dispose(bp->tree);
1069 
1070 	if (bp->devices)
1071 		drmach_array_dispose(bp->devices, drmach_device_dispose);
1072 
1073 	kmem_free(bp, sizeof (*bp));
1074 }
1075 
1076 static sbd_error_t *
1077 drmach_board_status(drmachid_t id, drmach_status_t *stat)
1078 {
1079 	sbd_error_t	*err = NULL;
1080 	drmach_board_t	*bp;
1081 
1082 	if (!DRMACH_IS_BOARD_ID(id))
1083 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1084 	bp = id;
1085 
1086 	stat->assigned = bp->assigned;
1087 	stat->powered = bp->powered;
1088 	stat->busy = 0;			/* assume not busy */
1089 	stat->configured = 0;		/* assume not configured */
1090 	stat->empty = 0;
1091 	stat->cond = bp->cond = SBD_COND_OK;
1092 	strncpy(stat->type, "System Brd", sizeof (stat->type));
1093 	stat->info[0] = '\0';
1094 
1095 	if (bp->devices) {
1096 		int		 rv;
1097 		int		 d_idx;
1098 		drmachid_t	 d_id;
1099 
1100 		rv = drmach_array_first(bp->devices, &d_idx, &d_id);
1101 		while (rv == 0) {
1102 			drmach_status_t	d_stat;
1103 
1104 			err = drmach_i_status(d_id, &d_stat);
1105 			if (err)
1106 				break;
1107 
1108 			stat->busy |= d_stat.busy;
1109 			stat->configured |= d_stat.configured;
1110 
1111 			rv = drmach_array_next(bp->devices, &d_idx, &d_id);
1112 		}
1113 	}
1114 
1115 	return (err);
1116 }
1117 
1118 int
1119 drmach_board_is_floating(drmachid_t id)
1120 {
1121 	drmach_board_t *bp;
1122 
1123 	if (!DRMACH_IS_BOARD_ID(id))
1124 		return (0);
1125 
1126 	bp = (drmach_board_t *)id;
1127 
1128 	return ((drmach_domain.floating & (1 << bp->bnum)) ? 1 : 0);
1129 }
1130 
1131 static int
1132 drmach_init(void)
1133 {
1134 	dev_info_t	*rdip;
1135 	int		i, rv, len;
1136 	int		*floating;
1137 
1138 	rw_init(&drmach_boards_rwlock, NULL, RW_DEFAULT, NULL);
1139 
1140 	drmach_boards = drmach_array_new(0, MAX_BOARDS - 1);
1141 
1142 	rdip = ddi_root_node();
1143 
1144 	if (ddi_getproplen(DDI_DEV_T_ANY, rdip, DDI_PROP_DONTPASS,
1145 		"floating-boards", &len) != DDI_PROP_SUCCESS) {
1146 		cmn_err(CE_WARN, "Cannot get floating-boards proplen\n");
1147 	} else {
1148 		floating = (int *)kmem_alloc(len, KM_SLEEP);
1149 		rv = ddi_prop_op(DDI_DEV_T_ANY, rdip,
1150 			PROP_LEN_AND_VAL_BUF, DDI_PROP_DONTPASS,
1151 			"floating-boards", (caddr_t)floating, &len);
1152 		if (rv != DDI_PROP_SUCCESS) {
1153 			cmn_err(CE_WARN, "Cannot get floating-boards prop\n");
1154 		} else {
1155 			drmach_domain.floating = 0;
1156 			for (i = 0; i < len / sizeof (int); i++) {
1157 				drmach_domain.floating |= (1 << floating[i]);
1158 			}
1159 		}
1160 		kmem_free(floating, len);
1161 	}
1162 	drmach_domain.allow_dr = opl_check_dr_status();
1163 
1164 	rdip = ddi_get_child(ddi_root_node());
1165 	do {
1166 		int		 bnum;
1167 		drmachid_t	 id;
1168 
1169 		bnum = -1;
1170 		bnum = ddi_getprop(DDI_DEV_T_ANY, rdip,
1171 			DDI_PROP_DONTPASS, OBP_BOARDNUM, -1);
1172 		if (bnum == -1)
1173 			continue;
1174 
1175 		if (drmach_array_get(drmach_boards, bnum, &id) == -1) {
1176 			cmn_err(CE_WARN, "Device node 0x%p has"
1177 				" invalid property value, %s=%d",
1178 					rdip, OBP_BOARDNUM, bnum);
1179 			goto error;
1180 		} else if (id == NULL) {
1181 			(void) drmach_board_new(bnum, 1);
1182 		}
1183 	} while ((rdip = ddi_get_next_sibling(rdip)) != NULL);
1184 
1185 	opl_hold_devtree();
1186 
1187 	/*
1188 	 * Initialize the IKP feature.
1189 	 *
1190 	 * This can be done only after DR has acquired a hold on all the
1191 	 * device nodes that are interesting to IKP.
1192 	 */
1193 	if (opl_init_cfg() != 0) {
1194 		cmn_err(CE_WARN, "DR - IKP initialization failed");
1195 
1196 		opl_release_devtree();
1197 
1198 		goto error;
1199 	}
1200 
1201 	return (0);
1202 error:
1203 	drmach_array_dispose(drmach_boards, drmach_board_dispose);
1204 	rw_destroy(&drmach_boards_rwlock);
1205 	return (ENXIO);
1206 }
1207 
1208 static void
1209 drmach_fini(void)
1210 {
1211 	rw_enter(&drmach_boards_rwlock, RW_WRITER);
1212 	drmach_array_dispose(drmach_boards, drmach_board_dispose);
1213 	drmach_boards = NULL;
1214 	rw_exit(&drmach_boards_rwlock);
1215 
1216 	/*
1217 	 * Walk immediate children of the root devinfo node
1218 	 * releasing holds acquired on branches in drmach_init()
1219 	 */
1220 
1221 	opl_release_devtree();
1222 
1223 	rw_destroy(&drmach_boards_rwlock);
1224 }
1225 
1226 /*
1227  *	Each system board contains 2 Oberon PCI bridge and
1228  *	1 CMUCH.
1229  *	Each oberon has 2 channels.
1230  *	Each channel has 2 pci-ex leaf.
1231  *	Each CMUCH has 1 pci bus.
1232  *
1233  *
1234  *	Device Path:
1235  *	/pci@<portid>,reg
1236  *
1237  *	where
1238  *	portid[10] = 0
1239  *	portid[9:0] = LLEAF_ID[9:0] of the Oberon Channel
1240  *
1241  *	LLEAF_ID[9:8] = 0
1242  *	LLEAF_ID[8:4] = LSB_ID[4:0]
1243  *	LLEAF_ID[3:1] = IO Channel#[2:0] (0,1,2,3 for Oberon)
1244  *			channel 4 is pcicmu
1245  *	LLEAF_ID[0] = PCI Leaf Number (0 for leaf-A, 1 for leaf-B)
1246  *
1247  *	Properties:
1248  *	name = pci
1249  *	device_type = "pciex"
1250  *	board# = LSBID
1251  *	reg = int32 * 2, Oberon CSR space of the leaf and the UBC space
1252  *	portid = Jupiter Bus Device ID ((LSB_ID << 3)|pciport#)
1253  */
1254 
1255 static sbd_error_t *
1256 drmach_io_new(drmach_device_t *proto, drmachid_t *idp)
1257 {
1258 	drmach_io_t	*ip;
1259 
1260 	int		 portid;
1261 
1262 	portid = proto->portid;
1263 	ASSERT(portid != -1);
1264 	proto->unum = portid & (MAX_IO_UNITS_PER_BOARD - 1);
1265 
1266 	ip = kmem_zalloc(sizeof (drmach_io_t), KM_SLEEP);
1267 	bcopy(proto, &ip->dev, sizeof (ip->dev));
1268 	ip->dev.node = drmach_node_dup(proto->node);
1269 	ip->dev.cm.isa = (void *)drmach_io_new;
1270 	ip->dev.cm.dispose = drmach_io_dispose;
1271 	ip->dev.cm.release = drmach_io_release;
1272 	ip->dev.cm.status = drmach_io_status;
1273 	ip->channel = (portid >> 1) & 0x7;
1274 	ip->leaf = (portid & 0x1);
1275 
1276 	snprintf(ip->dev.cm.name, sizeof (ip->dev.cm.name), "%s%d",
1277 		ip->dev.type, ip->dev.unum);
1278 
1279 	*idp = (drmachid_t)ip;
1280 	return (NULL);
1281 }
1282 
1283 
1284 static void
1285 drmach_io_dispose(drmachid_t id)
1286 {
1287 	drmach_io_t *self;
1288 
1289 	ASSERT(DRMACH_IS_IO_ID(id));
1290 
1291 	self = id;
1292 	if (self->dev.node)
1293 		drmach_node_dispose(self->dev.node);
1294 
1295 	kmem_free(self, sizeof (*self));
1296 }
1297 
1298 /*ARGSUSED*/
1299 sbd_error_t *
1300 drmach_pre_op(int cmd, drmachid_t id, drmach_opts_t *opts)
1301 {
1302 	drmach_board_t	*bp = (drmach_board_t *)id;
1303 	sbd_error_t	*err = NULL;
1304 
1305 	/* allow status and ncm operations to always succeed */
1306 	if ((cmd == SBD_CMD_STATUS) || (cmd == SBD_CMD_GETNCM)) {
1307 		return (NULL);
1308 	}
1309 
1310 	/* check all other commands for the required option string */
1311 
1312 	if ((opts->size > 0) && (opts->copts != NULL)) {
1313 
1314 		DRMACH_PR("platform options: %s\n", opts->copts);
1315 
1316 		if (strstr(opts->copts, "opldr") == NULL) {
1317 			err = drerr_new(1, EOPL_SUPPORT, NULL);
1318 		}
1319 	} else {
1320 		err = drerr_new(1, EOPL_SUPPORT, NULL);
1321 	}
1322 
1323 	if (!err && id && DRMACH_IS_BOARD_ID(id)) {
1324 		switch (cmd) {
1325 			case SBD_CMD_TEST:
1326 			case SBD_CMD_STATUS:
1327 			case SBD_CMD_GETNCM:
1328 				break;
1329 			case SBD_CMD_CONNECT:
1330 				if (bp->connected)
1331 					err = drerr_new(0, ESBD_STATE, NULL);
1332 				else if (!drmach_domain.allow_dr)
1333 					err = drerr_new(1, EOPL_SUPPORT,
1334 						NULL);
1335 				break;
1336 			case SBD_CMD_DISCONNECT:
1337 				if (!bp->connected)
1338 					err = drerr_new(0, ESBD_STATE, NULL);
1339 				else if (!drmach_domain.allow_dr)
1340 					err = drerr_new(1, EOPL_SUPPORT,
1341 						NULL);
1342 				break;
1343 			default:
1344 				if (!drmach_domain.allow_dr)
1345 					err = drerr_new(1, EOPL_SUPPORT,
1346 						NULL);
1347 				break;
1348 
1349 		}
1350 	}
1351 
1352 	return (err);
1353 }
1354 
1355 /*ARGSUSED*/
1356 sbd_error_t *
1357 drmach_post_op(int cmd, drmachid_t id, drmach_opts_t *opts)
1358 {
1359 	return (NULL);
1360 }
1361 
1362 sbd_error_t *
1363 drmach_board_assign(int bnum, drmachid_t *id)
1364 {
1365 	sbd_error_t	*err = NULL;
1366 
1367 	rw_enter(&drmach_boards_rwlock, RW_WRITER);
1368 
1369 	if (drmach_array_get(drmach_boards, bnum, id) == -1) {
1370 		err = drerr_new(1, EOPL_BNUM, "%d", bnum);
1371 	} else {
1372 		drmach_board_t	*bp;
1373 
1374 		if (*id)
1375 			rw_downgrade(&drmach_boards_rwlock);
1376 
1377 		bp = *id;
1378 		if (!(*id))
1379 			bp = *id  =
1380 				(drmachid_t)drmach_board_new(bnum, 0);
1381 		bp->assigned = 1;
1382 	}
1383 
1384 	rw_exit(&drmach_boards_rwlock);
1385 
1386 	return (err);
1387 }
1388 
1389 /*ARGSUSED*/
1390 sbd_error_t *
1391 drmach_board_connect(drmachid_t id, drmach_opts_t *opts)
1392 {
1393 	drmach_board_t	*obj = (drmach_board_t *)id;
1394 
1395 	if (!DRMACH_IS_BOARD_ID(id))
1396 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1397 
1398 	if (opl_probe_sb(obj->bnum) != 0)
1399 		return (drerr_new(1, EOPL_PROBE, NULL));
1400 
1401 	(void) prom_attach_notice(obj->bnum);
1402 
1403 	drmach_setup_core_info(obj);
1404 
1405 	obj->connected = 1;
1406 
1407 	return (NULL);
1408 }
1409 
1410 static int drmach_cache_flush_flag[NCPU];
1411 
1412 /*ARGSUSED*/
1413 static void
1414 drmach_flush_cache(uint64_t id, uint64_t dummy)
1415 {
1416 	extern void cpu_flush_ecache(void);
1417 
1418 	cpu_flush_ecache();
1419 	drmach_cache_flush_flag[id] = 0;
1420 }
1421 
1422 static void
1423 drmach_flush_all()
1424 {
1425 	cpuset_t	xc_cpuset;
1426 	int		i;
1427 
1428 	xc_cpuset = cpu_ready_set;
1429 	for (i = 0; i < NCPU; i++) {
1430 		if (CPU_IN_SET(xc_cpuset, i)) {
1431 			drmach_cache_flush_flag[i] = 1;
1432 			xc_one(i, drmach_flush_cache, i, 0);
1433 			while (drmach_cache_flush_flag[i]) {
1434 				DELAY(1000);
1435 			}
1436 		}
1437 	}
1438 }
1439 
1440 static int
1441 drmach_disconnect_cpus(drmach_board_t *bp)
1442 {
1443 	int i, bnum;
1444 
1445 	bnum = bp->bnum;
1446 
1447 	for (i = 0; i < OPL_MAX_COREID_PER_BOARD; i++) {
1448 	    if (bp->cores[i].core_present) {
1449 		if (bp->cores[i].core_started)
1450 		    return (-1);
1451 		if (bp->cores[i].core_hotadded) {
1452 		    if (drmach_add_remove_cpu(bnum, i, HOTREMOVE_CPU)) {
1453 			cmn_err(CE_WARN,
1454 			    "Failed to remove CMP %d on board %d\n",
1455 			    i, bnum);
1456 			return (-1);
1457 		    }
1458 		}
1459 	    }
1460 	}
1461 	return (0);
1462 }
1463 
1464 /*ARGSUSED*/
1465 sbd_error_t *
1466 drmach_board_disconnect(drmachid_t id, drmach_opts_t *opts)
1467 {
1468 	drmach_board_t *obj;
1469 	int rv = 0;
1470 	sbd_error_t		*err = NULL;
1471 
1472 	if (DRMACH_NULL_ID(id))
1473 		return (NULL);
1474 
1475 	if (!DRMACH_IS_BOARD_ID(id))
1476 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1477 
1478 	obj = (drmach_board_t *)id;
1479 
1480 	if (drmach_disconnect_cpus(obj)) {
1481 		err = drerr_new(1, EOPL_DEPROBE, obj->cm.name);
1482 		return (err);
1483 	}
1484 
1485 	rv = opl_unprobe_sb(obj->bnum);
1486 
1487 	if (rv == 0) {
1488 		prom_detach_notice(obj->bnum);
1489 		obj->connected = 0;
1490 
1491 	} else
1492 		err = drerr_new(1, EOPL_DEPROBE, obj->cm.name);
1493 
1494 	return (err);
1495 }
1496 
1497 static int
1498 drmach_get_portid(drmach_node_t *np)
1499 {
1500 	int		portid;
1501 	char		type[OBP_MAXPROPNAME];
1502 
1503 	if (np->n_getprop(np, "portid", &portid, sizeof (portid)) == 0)
1504 		return (portid);
1505 
1506 	/*
1507 	 * Get the device_type property to see if we should
1508 	 * continue processing this node.
1509 	 */
1510 	if (np->n_getprop(np, "device_type", &type, sizeof (type)) != 0)
1511 		return (-1);
1512 
1513 	if (strcmp(type, OPL_CPU_NODE) == 0) {
1514 		/*
1515 		 * We return cpuid because it has no portid
1516 		 */
1517 		if (np->n_getprop(np, "cpuid", &portid, sizeof (portid)) == 0)
1518 			return (portid);
1519 	}
1520 
1521 	return (-1);
1522 }
1523 
1524 /*
1525  * This is a helper function to determine if a given
1526  * node should be considered for a dr operation according
1527  * to predefined dr type nodes and the node's name.
1528  * Formal Parameter : The name of a device node.
1529  * Return Value: -1, name does not map to a valid dr type.
1530  *		 A value greater or equal to 0, name is a valid dr type.
1531  */
1532 static int
1533 drmach_name2type_idx(char *name)
1534 {
1535 	int 	index, ntypes;
1536 
1537 	if (name == NULL)
1538 		return (-1);
1539 
1540 	/*
1541 	 * Determine how many possible types are currently supported
1542 	 * for dr.
1543 	 */
1544 	ntypes = sizeof (drmach_name2type) / sizeof (drmach_name2type[0]);
1545 
1546 	/* Determine if the node's name correspond to a predefined type. */
1547 	for (index = 0; index < ntypes; index++) {
1548 		if (strcmp(drmach_name2type[index].name, name) == 0)
1549 			/* The node is an allowed type for dr. */
1550 			return (index);
1551 	}
1552 
1553 	/*
1554 	 * If the name of the node does not map to any of the
1555 	 * types in the array drmach_name2type then the node is not of
1556 	 * interest to dr.
1557 	 */
1558 	return (-1);
1559 }
1560 
1561 /*
1562  * there is some complication on OPL:
1563  * - pseudo-mc nodes do not have portid property
1564  * - portid[9:5] of cmp node is LSB #, portid[7:3] of pci is LSB#
1565  * - cmp has board#
1566  * - core and cpu nodes do not have portid and board# properties
1567  * starcat uses portid to derive the board# but that does not work
1568  * for us.  starfire reads board# property to filter the devices.
1569  * That does not work either.  So for these specific device,
1570  * we use specific hard coded methods to get the board# -
1571  * cpu: LSB# = CPUID[9:5]
1572  */
1573 
1574 static int
1575 drmach_board_find_devices_cb(drmach_node_walk_args_t *args)
1576 {
1577 	drmach_node_t			*node = args->node;
1578 	drmach_board_cb_data_t		*data = args->data;
1579 	drmach_board_t			*obj = data->obj;
1580 
1581 	int		rv, portid;
1582 	int		bnum;
1583 	drmachid_t	id;
1584 	drmach_device_t	*device;
1585 	char name[OBP_MAXDRVNAME];
1586 
1587 	portid = drmach_get_portid(node);
1588 	/*
1589 	 * core, cpu and pseudo-mc do not have portid
1590 	 * we use cpuid as the portid of the cpu node
1591 	 * for pseudo-mc, we do not use portid info.
1592 	 */
1593 
1594 	rv = node->n_getprop(node, "name", name, OBP_MAXDRVNAME);
1595 	if (rv)
1596 		return (0);
1597 
1598 
1599 	rv = node->n_getprop(node, OBP_BOARDNUM, &bnum, sizeof (bnum));
1600 
1601 	if (rv) {
1602 		/*
1603 		 * cpu does not have board# property.  We use
1604 		 * CPUID[9:5]
1605 		 */
1606 		if (strcmp("cpu", name) == 0) {
1607 			bnum = (portid >> 5) & 0x1f;
1608 		} else
1609 			return (0);
1610 	}
1611 
1612 
1613 	if (bnum != obj->bnum)
1614 		return (0);
1615 
1616 	if (drmach_name2type_idx(name) < 0) {
1617 		return (0);
1618 	}
1619 
1620 	/*
1621 	 * Create a device data structure from this node data.
1622 	 * The call may yield nothing if the node is not of interest
1623 	 * to drmach.
1624 	 */
1625 	data->err = drmach_device_new(node, obj, portid, &id);
1626 	if (data->err)
1627 		return (-1);
1628 	else if (!id) {
1629 		/*
1630 		 * drmach_device_new examined the node we passed in
1631 		 * and determined that it was one not of interest to
1632 		 * drmach.  So, it is skipped.
1633 		 */
1634 		return (0);
1635 	}
1636 
1637 	rv = drmach_array_set(obj->devices, data->ndevs++, id);
1638 	if (rv) {
1639 		data->err = DRMACH_INTERNAL_ERROR();
1640 		return (-1);
1641 	}
1642 	device = id;
1643 
1644 	data->err = (*data->found)(data->a, device->type, device->unum, id);
1645 	return (data->err == NULL ? 0 : -1);
1646 }
1647 
1648 sbd_error_t *
1649 drmach_board_find_devices(drmachid_t id, void *a,
1650 	sbd_error_t *(*found)(void *a, const char *, int, drmachid_t))
1651 {
1652 	drmach_board_t		*bp = (drmach_board_t *)id;
1653 	sbd_error_t		*err;
1654 	int			 max_devices;
1655 	int			 rv;
1656 	drmach_board_cb_data_t	data;
1657 
1658 
1659 	if (!DRMACH_IS_BOARD_ID(id))
1660 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1661 
1662 	max_devices  = MAX_CPU_UNITS_PER_BOARD;
1663 	max_devices += MAX_MEM_UNITS_PER_BOARD;
1664 	max_devices += MAX_IO_UNITS_PER_BOARD;
1665 
1666 	bp->devices = drmach_array_new(0, max_devices);
1667 
1668 	if (bp->tree == NULL)
1669 		bp->tree = drmach_node_new();
1670 
1671 	data.obj = bp;
1672 	data.ndevs = 0;
1673 	data.found = found;
1674 	data.a = a;
1675 	data.err = NULL;
1676 
1677 	rv = drmach_node_walk(bp->tree, &data, drmach_board_find_devices_cb);
1678 	if (rv == 0)
1679 		err = NULL;
1680 	else {
1681 		drmach_array_dispose(bp->devices, drmach_device_dispose);
1682 		bp->devices = NULL;
1683 
1684 		if (data.err)
1685 			err = data.err;
1686 		else
1687 			err = DRMACH_INTERNAL_ERROR();
1688 	}
1689 
1690 	return (err);
1691 }
1692 
1693 int
1694 drmach_board_lookup(int bnum, drmachid_t *id)
1695 {
1696 	int	rv = 0;
1697 
1698 	rw_enter(&drmach_boards_rwlock, RW_READER);
1699 	if (drmach_array_get(drmach_boards, bnum, id)) {
1700 		*id = 0;
1701 		rv = -1;
1702 	}
1703 	rw_exit(&drmach_boards_rwlock);
1704 	return (rv);
1705 }
1706 
1707 sbd_error_t *
1708 drmach_board_name(int bnum, char *buf, int buflen)
1709 {
1710 	snprintf(buf, buflen, "SB%d", bnum);
1711 	return (NULL);
1712 }
1713 
1714 sbd_error_t *
1715 drmach_board_poweroff(drmachid_t id)
1716 {
1717 	drmach_board_t	*bp;
1718 	sbd_error_t	*err;
1719 	drmach_status_t	 stat;
1720 
1721 	if (DRMACH_NULL_ID(id))
1722 		return (NULL);
1723 
1724 	if (!DRMACH_IS_BOARD_ID(id))
1725 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1726 	bp = id;
1727 
1728 	err = drmach_board_status(id, &stat);
1729 
1730 	if (!err) {
1731 		if (stat.configured || stat.busy)
1732 			err = drerr_new(0, EOPL_CONFIGBUSY, bp->cm.name);
1733 		else {
1734 			bp->powered = 0;
1735 		}
1736 	}
1737 	return (err);
1738 }
1739 
1740 sbd_error_t *
1741 drmach_board_poweron(drmachid_t id)
1742 {
1743 	drmach_board_t	*bp;
1744 
1745 	if (!DRMACH_IS_BOARD_ID(id))
1746 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1747 	bp = id;
1748 
1749 	bp->powered = 1;
1750 
1751 	return (NULL);
1752 }
1753 
1754 static sbd_error_t *
1755 drmach_board_release(drmachid_t id)
1756 {
1757 	if (!DRMACH_IS_BOARD_ID(id))
1758 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1759 	return (NULL);
1760 }
1761 
1762 /*ARGSUSED*/
1763 sbd_error_t *
1764 drmach_board_test(drmachid_t id, drmach_opts_t *opts, int force)
1765 {
1766 	return (NULL);
1767 }
1768 
1769 sbd_error_t *
1770 drmach_board_unassign(drmachid_t id)
1771 {
1772 	drmach_board_t	*bp;
1773 	sbd_error_t	*err;
1774 	drmach_status_t	 stat;
1775 
1776 	if (DRMACH_NULL_ID(id))
1777 		return (NULL);
1778 
1779 	if (!DRMACH_IS_BOARD_ID(id)) {
1780 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1781 	}
1782 	bp = id;
1783 
1784 	rw_enter(&drmach_boards_rwlock, RW_WRITER);
1785 
1786 	err = drmach_board_status(id, &stat);
1787 	if (err) {
1788 		rw_exit(&drmach_boards_rwlock);
1789 		return (err);
1790 	}
1791 	if (stat.configured || stat.busy) {
1792 		err = drerr_new(0, EOPL_CONFIGBUSY, bp->cm.name);
1793 	} else {
1794 		if (drmach_array_set(drmach_boards, bp->bnum, 0) != 0)
1795 			err = DRMACH_INTERNAL_ERROR();
1796 		else
1797 			drmach_board_dispose(bp);
1798 	}
1799 	rw_exit(&drmach_boards_rwlock);
1800 	return (err);
1801 }
1802 
1803 /*
1804  * We have to do more on OPL - e.g. set up sram tte, read cpuid, strand id,
1805  * implementation #, etc
1806  */
1807 
1808 static sbd_error_t *
1809 drmach_cpu_new(drmach_device_t *proto, drmachid_t *idp)
1810 {
1811 	static void drmach_cpu_dispose(drmachid_t);
1812 	static sbd_error_t *drmach_cpu_release(drmachid_t);
1813 	static sbd_error_t *drmach_cpu_status(drmachid_t, drmach_status_t *);
1814 
1815 	int		 portid;
1816 	drmach_cpu_t	*cp = NULL;
1817 
1818 	/* portid is CPUID of the node */
1819 	portid = proto->portid;
1820 	ASSERT(portid != -1);
1821 
1822 	/* unum = (CMP/CHIP ID) + (ON_BOARD_CORE_NUM * MAX_CMPID_PER_BOARD) */
1823 	proto->unum = ((portid/OPL_MAX_CPUID_PER_CMP) &
1824 		(OPL_MAX_CMPID_PER_BOARD - 1)) +
1825 		((portid & (OPL_MAX_CPUID_PER_CMP - 1)) *
1826 		(OPL_MAX_CMPID_PER_BOARD));
1827 
1828 	cp = kmem_zalloc(sizeof (drmach_cpu_t), KM_SLEEP);
1829 	bcopy(proto, &cp->dev, sizeof (cp->dev));
1830 	cp->dev.node = drmach_node_dup(proto->node);
1831 	cp->dev.cm.isa = (void *)drmach_cpu_new;
1832 	cp->dev.cm.dispose = drmach_cpu_dispose;
1833 	cp->dev.cm.release = drmach_cpu_release;
1834 	cp->dev.cm.status = drmach_cpu_status;
1835 
1836 	snprintf(cp->dev.cm.name, sizeof (cp->dev.cm.name), "%s%d",
1837 		cp->dev.type, cp->dev.unum);
1838 
1839 /*
1840  *	CPU ID representation
1841  *	CPUID[9:5] = SB#
1842  *	CPUID[4:3] = Chip#
1843  *	CPUID[2:1] = Core# (Only 2 core for OPL)
1844  *	CPUID[0:0] = Strand#
1845  */
1846 
1847 /*
1848  *	reg property of the strand contains strand ID
1849  *	reg property of the parent node contains core ID
1850  *	We should use them.
1851  */
1852 	cp->cpuid = portid;
1853 	cp->sb = (portid >> 5) & 0x1f;
1854 	cp->chipid = (portid >> 3) & 0x3;
1855 	cp->coreid = (portid >> 1) & 0x3;
1856 	cp->strandid = portid & 0x1;
1857 
1858 	*idp = (drmachid_t)cp;
1859 	return (NULL);
1860 }
1861 
1862 
1863 static void
1864 drmach_cpu_dispose(drmachid_t id)
1865 {
1866 	drmach_cpu_t	*self;
1867 
1868 	ASSERT(DRMACH_IS_CPU_ID(id));
1869 
1870 	self = id;
1871 	if (self->dev.node)
1872 		drmach_node_dispose(self->dev.node);
1873 
1874 	kmem_free(self, sizeof (*self));
1875 }
1876 
1877 static int
1878 drmach_cpu_start(struct cpu *cp)
1879 {
1880 	int		cpuid = cp->cpu_id;
1881 	extern int	restart_other_cpu(int);
1882 
1883 	ASSERT(MUTEX_HELD(&cpu_lock));
1884 	ASSERT(cpunodes[cpuid].nodeid != (pnode_t)0);
1885 
1886 	cp->cpu_flags &= ~CPU_POWEROFF;
1887 
1888 	/*
1889 	 * NOTE: restart_other_cpu pauses cpus during the
1890 	 *	 slave cpu start.  This helps to quiesce the
1891 	 *	 bus traffic a bit which makes the tick sync
1892 	 *	 routine in the prom more robust.
1893 	 */
1894 	DRMACH_PR("COLD START for cpu (%d)\n", cpuid);
1895 
1896 	restart_other_cpu(cpuid);
1897 
1898 	return (0);
1899 }
1900 
1901 static sbd_error_t *
1902 drmach_cpu_release(drmachid_t id)
1903 {
1904 	if (!DRMACH_IS_CPU_ID(id))
1905 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1906 
1907 	return (NULL);
1908 }
1909 
1910 static sbd_error_t *
1911 drmach_cpu_status(drmachid_t id, drmach_status_t *stat)
1912 {
1913 	drmach_cpu_t *cp;
1914 	drmach_device_t *dp;
1915 
1916 	ASSERT(DRMACH_IS_CPU_ID(id));
1917 	cp = (drmach_cpu_t *)id;
1918 	dp = &cp->dev;
1919 
1920 	stat->assigned = dp->bp->assigned;
1921 	stat->powered = dp->bp->powered;
1922 	mutex_enter(&cpu_lock);
1923 	stat->configured = (cpu_get(cp->cpuid) != NULL);
1924 	mutex_exit(&cpu_lock);
1925 	stat->busy = dp->busy;
1926 	strncpy(stat->type, dp->type, sizeof (stat->type));
1927 	stat->info[0] = '\0';
1928 
1929 	return (NULL);
1930 }
1931 
1932 sbd_error_t *
1933 drmach_cpu_disconnect(drmachid_t id)
1934 {
1935 
1936 	if (!DRMACH_IS_CPU_ID(id))
1937 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1938 
1939 	return (NULL);
1940 }
1941 
1942 sbd_error_t *
1943 drmach_cpu_get_id(drmachid_t id, processorid_t *cpuid)
1944 {
1945 	drmach_cpu_t *cpu;
1946 
1947 	if (!DRMACH_IS_CPU_ID(id))
1948 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1949 	cpu = (drmach_cpu_t *)id;
1950 
1951 	/* get from cpu directly on OPL */
1952 	*cpuid = cpu->cpuid;
1953 	return (NULL);
1954 }
1955 
1956 sbd_error_t *
1957 drmach_cpu_get_impl(drmachid_t id, int *ip)
1958 {
1959 	drmach_device_t *cpu;
1960 	drmach_node_t	*np;
1961 	drmach_node_t	pp;
1962 	int		impl;
1963 	char		type[OBP_MAXPROPNAME];
1964 
1965 	if (!DRMACH_IS_CPU_ID(id))
1966 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1967 
1968 	cpu = id;
1969 	np = cpu->node;
1970 
1971 	if (np->get_parent(np, &pp) != 0) {
1972 		return (DRMACH_INTERNAL_ERROR());
1973 	}
1974 
1975 	/* the parent should be core */
1976 
1977 	if (pp.n_getprop(&pp, "device_type", &type, sizeof (type)) != 0) {
1978 		return (drerr_new(0, EOPL_GETPROP, NULL));
1979 	}
1980 
1981 	if (strcmp(type, OPL_CORE_NODE) == 0) {
1982 		if (pp.n_getprop(&pp, "implementation#",
1983 			&impl, sizeof (impl)) != 0) {
1984 			return (drerr_new(0, EOPL_GETPROP, NULL));
1985 		}
1986 	} else {
1987 		return (DRMACH_INTERNAL_ERROR());
1988 	}
1989 
1990 	*ip = impl;
1991 
1992 	return (NULL);
1993 }
1994 
1995 sbd_error_t *
1996 drmach_get_dip(drmachid_t id, dev_info_t **dip)
1997 {
1998 	drmach_device_t	*dp;
1999 
2000 	if (!DRMACH_IS_DEVICE_ID(id))
2001 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2002 	dp = id;
2003 
2004 	*dip = dp->node->n_getdip(dp->node);
2005 	return (NULL);
2006 }
2007 
2008 sbd_error_t *
2009 drmach_io_is_attached(drmachid_t id, int *yes)
2010 {
2011 	drmach_device_t *dp;
2012 	dev_info_t	*dip;
2013 	int		state;
2014 
2015 	if (!DRMACH_IS_IO_ID(id))
2016 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2017 	dp = id;
2018 
2019 	dip = dp->node->n_getdip(dp->node);
2020 	if (dip == NULL) {
2021 		*yes = 0;
2022 		return (NULL);
2023 	}
2024 
2025 	state = ddi_get_devstate(dip);
2026 	*yes = ((i_ddi_node_state(dip) >= DS_ATTACHED) ||
2027 	    (state == DDI_DEVSTATE_UP));
2028 
2029 	return (NULL);
2030 }
2031 
2032 struct drmach_io_cb {
2033 	char	*name;	/* name of the node */
2034 	int	(*func)(dev_info_t *);
2035 	int	rv;
2036 	dev_info_t *dip;
2037 };
2038 
2039 #define	DRMACH_IO_POST_ATTACH	0
2040 #define	DRMACH_IO_PRE_RELEASE	1
2041 
2042 static int
2043 drmach_io_cb_check(dev_info_t *dip, void *arg)
2044 {
2045 	struct drmach_io_cb *p = (struct drmach_io_cb *)arg;
2046 	char name[OBP_MAXDRVNAME];
2047 	int len = OBP_MAXDRVNAME;
2048 
2049 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
2050 		DDI_PROP_DONTPASS, "name",
2051 	    (caddr_t)name, &len) != DDI_PROP_SUCCESS) {
2052 		return (DDI_WALK_PRUNECHILD);
2053 	}
2054 
2055 	if (strcmp(name, p->name) == 0) {
2056 		ndi_hold_devi(dip);
2057 		p->dip = dip;
2058 		return (DDI_WALK_TERMINATE);
2059 	}
2060 
2061 	return (DDI_WALK_CONTINUE);
2062 }
2063 
2064 
2065 static int
2066 drmach_console_ops(drmachid_t *id, int state)
2067 {
2068 	drmach_io_t *obj = (drmach_io_t *)id;
2069 	struct drmach_io_cb arg;
2070 	int (*msudetp)(dev_info_t *);
2071 	int (*msuattp)(dev_info_t *);
2072 	dev_info_t *dip, *pdip;
2073 	int circ;
2074 
2075 	/* 4 is pcicmu channel */
2076 	if (obj->channel != 4)
2077 		return (0);
2078 
2079 	arg.name = "serial";
2080 	arg.func = NULL;
2081 	if (state == DRMACH_IO_PRE_RELEASE) {
2082 		msudetp = (int (*)(dev_info_t *))
2083 		    modgetsymvalue("oplmsu_dr_detach", 0);
2084 		if (msudetp != NULL)
2085 			arg.func = msudetp;
2086 	} else if (state == DRMACH_IO_POST_ATTACH) {
2087 		msuattp = (int (*)(dev_info_t *))
2088 		    modgetsymvalue("oplmsu_dr_attach", 0);
2089 		if (msuattp != NULL)
2090 			arg.func = msuattp;
2091 	} else {
2092 		return (0);
2093 	}
2094 
2095 	if (arg.func == NULL) {
2096 		return (0);
2097 	}
2098 
2099 	arg.rv = 0;
2100 	arg.dip = NULL;
2101 
2102 	dip = obj->dev.node->n_getdip(obj->dev.node);
2103 	if (pdip = ddi_get_parent(dip)) {
2104 		ndi_hold_devi(pdip);
2105 		ndi_devi_enter(pdip, &circ);
2106 	} else {
2107 		/* this cannot happen unless something bad happens */
2108 		return (-1);
2109 	}
2110 
2111 	ddi_walk_devs(dip, drmach_io_cb_check, (void *)&arg);
2112 
2113 	ndi_devi_exit(pdip, circ);
2114 	ndi_rele_devi(pdip);
2115 
2116 	if (arg.dip) {
2117 		arg.rv = (*arg.func)(arg.dip);
2118 		ndi_rele_devi(arg.dip);
2119 	} else {
2120 		arg.rv = -1;
2121 	}
2122 
2123 	return (arg.rv);
2124 }
2125 
2126 sbd_error_t *
2127 drmach_io_pre_release(drmachid_t id)
2128 {
2129 	int rv;
2130 
2131 	if (!DRMACH_IS_IO_ID(id))
2132 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2133 
2134 	rv = drmach_console_ops(id, DRMACH_IO_PRE_RELEASE);
2135 
2136 	if (rv != 0)
2137 		cmn_err(CE_WARN, "IO callback failed in pre-release\n");
2138 
2139 	return (NULL);
2140 }
2141 
2142 static sbd_error_t *
2143 drmach_io_release(drmachid_t id)
2144 {
2145 	if (!DRMACH_IS_IO_ID(id))
2146 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2147 	return (NULL);
2148 }
2149 
2150 sbd_error_t *
2151 drmach_io_unrelease(drmachid_t id)
2152 {
2153 	if (!DRMACH_IS_IO_ID(id))
2154 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2155 	return (NULL);
2156 }
2157 
2158 /*ARGSUSED*/
2159 sbd_error_t *
2160 drmach_io_post_release(drmachid_t id)
2161 {
2162 	return (NULL);
2163 }
2164 
2165 /*ARGSUSED*/
2166 sbd_error_t *
2167 drmach_io_post_attach(drmachid_t id)
2168 {
2169 	int rv;
2170 
2171 	if (!DRMACH_IS_IO_ID(id))
2172 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2173 
2174 	rv = drmach_console_ops(id, DRMACH_IO_POST_ATTACH);
2175 
2176 	if (rv != 0)
2177 		cmn_err(CE_WARN, "IO callback failed in post-attach\n");
2178 
2179 	return (0);
2180 }
2181 
2182 static sbd_error_t *
2183 drmach_io_status(drmachid_t id, drmach_status_t *stat)
2184 {
2185 	drmach_device_t *dp;
2186 	sbd_error_t	*err;
2187 	int		 configured;
2188 
2189 	ASSERT(DRMACH_IS_IO_ID(id));
2190 	dp = id;
2191 
2192 	err = drmach_io_is_attached(id, &configured);
2193 	if (err)
2194 		return (err);
2195 
2196 	stat->assigned = dp->bp->assigned;
2197 	stat->powered = dp->bp->powered;
2198 	stat->configured = (configured != 0);
2199 	stat->busy = dp->busy;
2200 	strncpy(stat->type, dp->type, sizeof (stat->type));
2201 	stat->info[0] = '\0';
2202 
2203 	return (NULL);
2204 }
2205 
2206 static sbd_error_t *
2207 drmach_mem_new(drmach_device_t *proto, drmachid_t *idp)
2208 {
2209 	static void drmach_mem_dispose(drmachid_t);
2210 	static sbd_error_t *drmach_mem_release(drmachid_t);
2211 	static sbd_error_t *drmach_mem_status(drmachid_t, drmach_status_t *);
2212 	dev_info_t *dip;
2213 	int rv;
2214 
2215 	drmach_mem_t	*mp;
2216 
2217 	rv = 0;
2218 
2219 	if ((proto->node->n_getproplen(proto->node, "mc-addr", &rv) < 0) ||
2220 		(rv <= 0)) {
2221 		*idp = (drmachid_t)0;
2222 		return (NULL);
2223 	}
2224 
2225 	mp = kmem_zalloc(sizeof (drmach_mem_t), KM_SLEEP);
2226 	proto->unum = 0;
2227 
2228 	bcopy(proto, &mp->dev, sizeof (mp->dev));
2229 	mp->dev.node = drmach_node_dup(proto->node);
2230 	mp->dev.cm.isa = (void *)drmach_mem_new;
2231 	mp->dev.cm.dispose = drmach_mem_dispose;
2232 	mp->dev.cm.release = drmach_mem_release;
2233 	mp->dev.cm.status = drmach_mem_status;
2234 
2235 	snprintf(mp->dev.cm.name,
2236 		sizeof (mp->dev.cm.name), "%s", mp->dev.type);
2237 
2238 	dip = mp->dev.node->n_getdip(mp->dev.node);
2239 	if (drmach_setup_mc_info(dip, mp) != 0) {
2240 		return (drerr_new(1, EOPL_MC_SETUP, NULL));
2241 	}
2242 
2243 	/* make sure we do not create memoryless nodes */
2244 	if (mp->nbytes == 0) {
2245 		*idp = (drmachid_t)NULL;
2246 		kmem_free(mp, sizeof (drmach_mem_t));
2247 	} else
2248 		*idp = (drmachid_t)mp;
2249 
2250 	return (NULL);
2251 }
2252 
2253 static void
2254 drmach_mem_dispose(drmachid_t id)
2255 {
2256 	drmach_mem_t *mp;
2257 
2258 	ASSERT(DRMACH_IS_MEM_ID(id));
2259 
2260 
2261 	mp = id;
2262 
2263 	if (mp->dev.node)
2264 		drmach_node_dispose(mp->dev.node);
2265 
2266 	if (mp->memlist) {
2267 		memlist_delete(mp->memlist);
2268 		mp->memlist = NULL;
2269 	}
2270 
2271 	kmem_free(mp, sizeof (*mp));
2272 }
2273 
2274 sbd_error_t *
2275 drmach_mem_add_span(drmachid_t id, uint64_t basepa, uint64_t size)
2276 {
2277 	pfn_t		basepfn = (pfn_t)(basepa >> PAGESHIFT);
2278 	pgcnt_t		npages = (pgcnt_t)(size >> PAGESHIFT);
2279 	int		rv;
2280 
2281 	ASSERT(size != 0);
2282 
2283 	if (!DRMACH_IS_MEM_ID(id))
2284 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2285 
2286 	kcage_range_lock();
2287 	rv = kcage_range_add(basepfn, npages, 1);
2288 	kcage_range_unlock();
2289 	if (rv == ENOMEM) {
2290 		cmn_err(CE_WARN, "%ld megabytes not available to kernel cage",
2291 			(size == 0 ? 0 : size / MBYTE));
2292 	} else if (rv != 0) {
2293 		/* catch this in debug kernels */
2294 		ASSERT(0);
2295 
2296 		cmn_err(CE_WARN, "unexpected kcage_range_add"
2297 			" return value %d", rv);
2298 	}
2299 
2300 	if (rv) {
2301 		return (DRMACH_INTERNAL_ERROR());
2302 	}
2303 	else
2304 		return (NULL);
2305 }
2306 
2307 sbd_error_t *
2308 drmach_mem_del_span(drmachid_t id, uint64_t basepa, uint64_t size)
2309 {
2310 	pfn_t		basepfn = (pfn_t)(basepa >> PAGESHIFT);
2311 	pgcnt_t		npages = (pgcnt_t)(size >> PAGESHIFT);
2312 	int		rv;
2313 
2314 	if (!DRMACH_IS_MEM_ID(id))
2315 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2316 
2317 	if (size > 0) {
2318 		kcage_range_lock();
2319 		rv = kcage_range_delete_post_mem_del(basepfn, npages);
2320 		kcage_range_unlock();
2321 		if (rv != 0) {
2322 			cmn_err(CE_WARN,
2323 			    "unexpected kcage_range_delete_post_mem_del"
2324 			    " return value %d", rv);
2325 			return (DRMACH_INTERNAL_ERROR());
2326 		}
2327 	}
2328 
2329 	return (NULL);
2330 }
2331 
2332 sbd_error_t *
2333 drmach_mem_disable(drmachid_t id)
2334 {
2335 	if (!DRMACH_IS_MEM_ID(id))
2336 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2337 	else {
2338 		drmach_flush_all();
2339 		return (NULL);
2340 	}
2341 }
2342 
2343 sbd_error_t *
2344 drmach_mem_enable(drmachid_t id)
2345 {
2346 	if (!DRMACH_IS_MEM_ID(id))
2347 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2348 	else
2349 		return (NULL);
2350 }
2351 
2352 sbd_error_t *
2353 drmach_mem_get_info(drmachid_t id, drmach_mem_info_t *mem)
2354 {
2355 	drmach_mem_t *mp;
2356 
2357 	if (!DRMACH_IS_MEM_ID(id))
2358 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2359 
2360 	mp = (drmach_mem_t *)id;
2361 
2362 	/*
2363 	 * This is only used by dr to round up/down the memory
2364 	 * for copying. Our unit of memory isolation is 64 MB.
2365 	 */
2366 
2367 	mem->mi_alignment_mask = (64 * 1024 * 1024 - 1);
2368 	mem->mi_basepa = mp->base_pa;
2369 	mem->mi_size = mp->nbytes;
2370 	mem->mi_slice_size = mp->slice_size;
2371 
2372 	return (NULL);
2373 }
2374 
2375 sbd_error_t *
2376 drmach_mem_get_base_physaddr(drmachid_t id, uint64_t *pa)
2377 {
2378 	drmach_mem_t *mp;
2379 
2380 	if (!DRMACH_IS_MEM_ID(id))
2381 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2382 
2383 	mp = (drmach_mem_t *)id;
2384 
2385 	*pa = mp->base_pa;
2386 	return (NULL);
2387 }
2388 
2389 sbd_error_t *
2390 drmach_mem_get_memlist(drmachid_t id, struct memlist **ml)
2391 {
2392 	drmach_mem_t	*mem;
2393 #ifdef	DEBUG
2394 	int		rv;
2395 #endif
2396 	struct memlist	*mlist;
2397 
2398 	if (!DRMACH_IS_MEM_ID(id))
2399 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2400 
2401 	mem = (drmach_mem_t *)id;
2402 	mlist = memlist_dup(mem->memlist);
2403 
2404 #ifdef DEBUG
2405 	/*
2406 	 * Make sure the incoming memlist doesn't already
2407 	 * intersect with what's present in the system (phys_install).
2408 	 */
2409 	memlist_read_lock();
2410 	rv = memlist_intersect(phys_install, mlist);
2411 	memlist_read_unlock();
2412 	if (rv) {
2413 		DRMACH_PR("Derived memlist intersects"
2414 			" with phys_install\n");
2415 		memlist_dump(mlist);
2416 
2417 		DRMACH_PR("phys_install memlist:\n");
2418 		memlist_dump(phys_install);
2419 
2420 		memlist_delete(mlist);
2421 		return (DRMACH_INTERNAL_ERROR());
2422 	}
2423 
2424 	DRMACH_PR("Derived memlist:");
2425 	memlist_dump(mlist);
2426 #endif
2427 	*ml = mlist;
2428 
2429 	return (NULL);
2430 }
2431 
2432 sbd_error_t *
2433 drmach_mem_get_slice_size(drmachid_t id, uint64_t *bytes)
2434 {
2435 	drmach_mem_t	*mem;
2436 
2437 	if (!DRMACH_IS_MEM_ID(id))
2438 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2439 
2440 	mem = (drmach_mem_t *)id;
2441 
2442 	*bytes = mem->slice_size;
2443 
2444 	return (NULL);
2445 }
2446 
2447 
2448 /* ARGSUSED */
2449 processorid_t
2450 drmach_mem_cpu_affinity(drmachid_t id)
2451 {
2452 	return (CPU_CURRENT);
2453 }
2454 
2455 static sbd_error_t *
2456 drmach_mem_release(drmachid_t id)
2457 {
2458 	if (!DRMACH_IS_MEM_ID(id))
2459 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2460 	return (NULL);
2461 }
2462 
2463 static sbd_error_t *
2464 drmach_mem_status(drmachid_t id, drmach_status_t *stat)
2465 {
2466 	drmach_mem_t *dp;
2467 	uint64_t	 pa, slice_size;
2468 	struct memlist	*ml;
2469 
2470 	ASSERT(DRMACH_IS_MEM_ID(id));
2471 	dp = id;
2472 
2473 	/* get starting physical address of target memory */
2474 	pa = dp->base_pa;
2475 
2476 	/* round down to slice boundary */
2477 	slice_size = dp->slice_size;
2478 	pa &= ~(slice_size - 1);
2479 
2480 	/* stop at first span that is in slice */
2481 	memlist_read_lock();
2482 	for (ml = phys_install; ml; ml = ml->next)
2483 		if (ml->address >= pa && ml->address < pa + slice_size)
2484 			break;
2485 	memlist_read_unlock();
2486 
2487 	stat->assigned = dp->dev.bp->assigned;
2488 	stat->powered = dp->dev.bp->powered;
2489 	stat->configured = (ml != NULL);
2490 	stat->busy = dp->dev.busy;
2491 	strncpy(stat->type, dp->dev.type, sizeof (stat->type));
2492 	stat->info[0] = '\0';
2493 
2494 	return (NULL);
2495 }
2496 
2497 
2498 sbd_error_t *
2499 drmach_board_deprobe(drmachid_t id)
2500 {
2501 	drmach_board_t	*bp;
2502 
2503 	if (!DRMACH_IS_BOARD_ID(id))
2504 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2505 
2506 	bp = id;
2507 
2508 	cmn_err(CE_CONT, "DR: detach board %d\n", bp->bnum);
2509 
2510 	if (bp->tree) {
2511 		drmach_node_dispose(bp->tree);
2512 		bp->tree = NULL;
2513 	}
2514 	if (bp->devices) {
2515 		drmach_array_dispose(bp->devices, drmach_device_dispose);
2516 		bp->devices = NULL;
2517 	}
2518 
2519 	bp->boot_board = 0;
2520 
2521 	return (NULL);
2522 }
2523 
2524 /*ARGSUSED*/
2525 static sbd_error_t *
2526 drmach_pt_ikprobe(drmachid_t id, drmach_opts_t *opts)
2527 {
2528 	drmach_board_t		*bp = (drmach_board_t *)id;
2529 	sbd_error_t		*err = NULL;
2530 	int	rv;
2531 
2532 	if (!DRMACH_IS_BOARD_ID(id))
2533 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2534 
2535 	DRMACH_PR("calling opl_probe_board for bnum=%d\n", bp->bnum);
2536 	rv = opl_probe_sb(bp->bnum);
2537 	if (rv != 0) {
2538 		err = drerr_new(1, EOPL_PROBE, bp->cm.name);
2539 		return (err);
2540 	}
2541 	return (err);
2542 }
2543 
2544 /*ARGSUSED*/
2545 static sbd_error_t *
2546 drmach_pt_ikdeprobe(drmachid_t id, drmach_opts_t *opts)
2547 {
2548 	drmach_board_t	*bp;
2549 	sbd_error_t	*err = NULL;
2550 	int	rv;
2551 
2552 	if (!DRMACH_IS_BOARD_ID(id))
2553 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2554 	bp = (drmach_board_t *)id;
2555 
2556 	cmn_err(CE_CONT, "DR: in-kernel unprobe board %d\n", bp->bnum);
2557 
2558 	rv = opl_unprobe_sb(bp->bnum);
2559 	if (rv != 0) {
2560 		err = drerr_new(1, EOPL_DEPROBE, bp->cm.name);
2561 	}
2562 
2563 	return (err);
2564 }
2565 
2566 
2567 /*ARGSUSED*/
2568 sbd_error_t *
2569 drmach_pt_readmem(drmachid_t id, drmach_opts_t *opts)
2570 {
2571 	struct memlist	*ml;
2572 	uint64_t	src_pa;
2573 	uint64_t	dst_pa;
2574 	uint64_t	dst;
2575 
2576 	dst_pa = va_to_pa(&dst);
2577 
2578 	memlist_read_lock();
2579 	for (ml = phys_install; ml; ml = ml->next) {
2580 		uint64_t	nbytes;
2581 
2582 		src_pa = ml->address;
2583 		nbytes = ml->size;
2584 
2585 		while (nbytes != 0ull) {
2586 
2587 			/* copy 32 bytes at arc_pa to dst_pa */
2588 			bcopy32_il(src_pa, dst_pa);
2589 
2590 			/* increment by 32 bytes */
2591 			src_pa += (4 * sizeof (uint64_t));
2592 
2593 			/* decrement by 32 bytes */
2594 			nbytes -= (4 * sizeof (uint64_t));
2595 		}
2596 	}
2597 	memlist_read_unlock();
2598 
2599 	return (NULL);
2600 }
2601 
2602 static struct {
2603 	const char	*name;
2604 	sbd_error_t	*(*handler)(drmachid_t id, drmach_opts_t *opts);
2605 } drmach_pt_arr[] = {
2606 	{ "readmem",		drmach_pt_readmem		},
2607 	{ "ikprobe",	drmach_pt_ikprobe	},
2608 	{ "ikdeprobe",	drmach_pt_ikdeprobe	},
2609 
2610 	/* the following line must always be last */
2611 	{ NULL,			NULL				}
2612 };
2613 
2614 /*ARGSUSED*/
2615 sbd_error_t *
2616 drmach_passthru(drmachid_t id, drmach_opts_t *opts)
2617 {
2618 	int		i;
2619 	sbd_error_t	*err;
2620 
2621 	i = 0;
2622 	while (drmach_pt_arr[i].name != NULL) {
2623 		int len = strlen(drmach_pt_arr[i].name);
2624 
2625 		if (strncmp(drmach_pt_arr[i].name, opts->copts, len) == 0)
2626 			break;
2627 
2628 		i += 1;
2629 	}
2630 
2631 	if (drmach_pt_arr[i].name == NULL)
2632 		err = drerr_new(0, EOPL_UNKPTCMD, opts->copts);
2633 	else
2634 		err = (*drmach_pt_arr[i].handler)(id, opts);
2635 
2636 	return (err);
2637 }
2638 
2639 sbd_error_t *
2640 drmach_release(drmachid_t id)
2641 {
2642 	drmach_common_t *cp;
2643 
2644 	if (!DRMACH_IS_DEVICE_ID(id))
2645 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2646 	cp = id;
2647 
2648 	return (cp->release(id));
2649 }
2650 
2651 sbd_error_t *
2652 drmach_status(drmachid_t id, drmach_status_t *stat)
2653 {
2654 	drmach_common_t *cp;
2655 	sbd_error_t	*err;
2656 
2657 	rw_enter(&drmach_boards_rwlock, RW_READER);
2658 
2659 	if (!DRMACH_IS_ID(id)) {
2660 		rw_exit(&drmach_boards_rwlock);
2661 		return (drerr_new(0, EOPL_NOTID, NULL));
2662 	}
2663 	cp = (drmach_common_t *)id;
2664 	err = cp->status(id, stat);
2665 
2666 	rw_exit(&drmach_boards_rwlock);
2667 
2668 	return (err);
2669 }
2670 
2671 static sbd_error_t *
2672 drmach_i_status(drmachid_t id, drmach_status_t *stat)
2673 {
2674 	drmach_common_t *cp;
2675 
2676 	if (!DRMACH_IS_ID(id))
2677 		return (drerr_new(0, EOPL_NOTID, NULL));
2678 	cp = id;
2679 
2680 	return (cp->status(id, stat));
2681 }
2682 
2683 /*ARGSUSED*/
2684 sbd_error_t *
2685 drmach_unconfigure(drmachid_t id, int flags)
2686 {
2687 	drmach_device_t *dp;
2688 	dev_info_t	*rdip, *fdip = NULL;
2689 	char name[OBP_MAXDRVNAME];
2690 	int rv;
2691 
2692 	if (DRMACH_IS_CPU_ID(id))
2693 		return (NULL);
2694 
2695 	if (!DRMACH_IS_DEVICE_ID(id))
2696 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2697 
2698 	dp = id;
2699 
2700 	rdip = dp->node->n_getdip(dp->node);
2701 
2702 	ASSERT(rdip);
2703 
2704 	rv = dp->node->n_getprop(dp->node, "name", name, OBP_MAXDRVNAME);
2705 
2706 	if (rv)
2707 		return (NULL);
2708 
2709 	/*
2710 	 * Note: FORCE flag is no longer necessary under devfs
2711 	 */
2712 
2713 	ASSERT(e_ddi_branch_held(rdip));
2714 	if (e_ddi_branch_unconfigure(rdip, &fdip, 0)) {
2715 		sbd_error_t	*err;
2716 		char		*path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
2717 
2718 		/*
2719 		 * If non-NULL, fdip is returned held and must be released.
2720 		 */
2721 		if (fdip != NULL) {
2722 			(void) ddi_pathname(fdip, path);
2723 			ndi_rele_devi(fdip);
2724 		} else {
2725 			(void) ddi_pathname(rdip, path);
2726 		}
2727 
2728 		err = drerr_new(1, EOPL_DRVFAIL, path);
2729 
2730 		kmem_free(path, MAXPATHLEN);
2731 
2732 		return (err);
2733 	}
2734 
2735 	return (NULL);
2736 }
2737 
2738 
2739 int
2740 drmach_cpu_poweron(struct cpu *cp)
2741 {
2742 	int bnum, cpuid, onb_core_num, strand_id;
2743 	drmach_board_t *bp;
2744 
2745 	DRMACH_PR("drmach_cpu_poweron: starting cpuid %d\n", cp->cpu_id);
2746 
2747 	cpuid = cp->cpu_id;
2748 	bnum = LSB_ID(cpuid);
2749 	onb_core_num = ON_BOARD_CORE_NUM(cpuid);
2750 	strand_id = STRAND_ID(cpuid);
2751 	bp = drmach_get_board_by_bnum(bnum);
2752 
2753 	ASSERT(bp);
2754 	if (bp->cores[onb_core_num].core_hotadded == 0) {
2755 		if (drmach_add_remove_cpu(bnum, onb_core_num,
2756 			HOTADD_CPU) != 0) {
2757 			cmn_err(CE_WARN, "Failed to add CMP %d on board %d\n",
2758 				onb_core_num, bnum);
2759 			return (EIO);
2760 		}
2761 	}
2762 
2763 	ASSERT(MUTEX_HELD(&cpu_lock));
2764 
2765 	if (drmach_cpu_start(cp) != 0) {
2766 		if (bp->cores[onb_core_num].core_started == 0) {
2767 			/*
2768 			 * we must undo the hotadd or no one will do that
2769 			 * If this fails, we will do this again in
2770 			 * drmach_board_disconnect.
2771 			 */
2772 			if (drmach_add_remove_cpu(bnum, onb_core_num,
2773 				HOTREMOVE_CPU) != 0) {
2774 				cmn_err(CE_WARN, "Failed to remove CMP %d "
2775 					"on board %d\n",
2776 					onb_core_num, bnum);
2777 			}
2778 		}
2779 		return (EBUSY);
2780 	} else {
2781 		bp->cores[onb_core_num].core_started |= (1 << strand_id);
2782 		return (0);
2783 	}
2784 }
2785 
2786 int
2787 drmach_cpu_poweroff(struct cpu *cp)
2788 {
2789 	int 		rv = 0;
2790 	processorid_t	cpuid = cp->cpu_id;
2791 
2792 	DRMACH_PR("drmach_cpu_poweroff: stopping cpuid %d\n", cp->cpu_id);
2793 
2794 	ASSERT(MUTEX_HELD(&cpu_lock));
2795 
2796 	/*
2797 	 * Capture all CPUs (except for detaching proc) to prevent
2798 	 * crosscalls to the detaching proc until it has cleared its
2799 	 * bit in cpu_ready_set.
2800 	 *
2801 	 * The CPU's remain paused and the prom_mutex is known to be free.
2802 	 * This prevents the x-trap victim from blocking when doing prom
2803 	 * IEEE-1275 calls at a high PIL level.
2804 	 */
2805 
2806 	promsafe_pause_cpus();
2807 
2808 	/*
2809 	 * Quiesce interrupts on the target CPU. We do this by setting
2810 	 * the CPU 'not ready'- (i.e. removing the CPU from cpu_ready_set) to
2811 	 * prevent it from receiving cross calls and cross traps.
2812 	 * This prevents the processor from receiving any new soft interrupts.
2813 	 */
2814 	mp_cpu_quiesce(cp);
2815 
2816 	rv = prom_stopcpu_bycpuid(cpuid);
2817 	if (rv == 0)
2818 		cp->cpu_flags = CPU_OFFLINE | CPU_QUIESCED | CPU_POWEROFF;
2819 
2820 	start_cpus();
2821 
2822 	if (rv == 0) {
2823 		int bnum, onb_core_num, strand_id;
2824 		drmach_board_t *bp;
2825 
2826 		CPU_SIGNATURE(OS_SIG, SIGST_DETACHED, SIGSUBST_NULL, cpuid);
2827 
2828 		bnum = LSB_ID(cpuid);
2829 		onb_core_num = ON_BOARD_CORE_NUM(cpuid);
2830 		strand_id = STRAND_ID(cpuid);
2831 		bp = drmach_get_board_by_bnum(bnum);
2832 		ASSERT(bp);
2833 
2834 		bp->cores[onb_core_num].core_started &= ~(1 << strand_id);
2835 		if (bp->cores[onb_core_num].core_started == 0) {
2836 			if (drmach_add_remove_cpu(bnum, onb_core_num,
2837 				HOTREMOVE_CPU) != 0) {
2838 				cmn_err(CE_WARN,
2839 					"Failed to remove CMP %d LSB %d\n",
2840 					onb_core_num, bnum);
2841 				return (EIO);
2842 			}
2843 		}
2844 	}
2845 
2846 	return (rv);
2847 }
2848 
2849 /*ARGSUSED*/
2850 int
2851 drmach_verify_sr(dev_info_t *dip, int sflag)
2852 {
2853 	return (0);
2854 }
2855 
2856 void
2857 drmach_suspend_last(void)
2858 {
2859 }
2860 
2861 void
2862 drmach_resume_first(void)
2863 {
2864 }
2865 
2866 /*
2867  * Log a DR sysevent.
2868  * Return value: 0 success, non-zero failure.
2869  */
2870 int
2871 drmach_log_sysevent(int board, char *hint, int flag, int verbose)
2872 {
2873 	sysevent_t			*ev;
2874 	sysevent_id_t			eid;
2875 	int				rv, km_flag;
2876 	sysevent_value_t		evnt_val;
2877 	sysevent_attr_list_t		*evnt_attr_list = NULL;
2878 	char				attach_pnt[MAXNAMELEN];
2879 
2880 	km_flag = (flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
2881 	attach_pnt[0] = '\0';
2882 	if (drmach_board_name(board, attach_pnt, MAXNAMELEN)) {
2883 		rv = -1;
2884 		goto logexit;
2885 	}
2886 	if (verbose) {
2887 		DRMACH_PR("drmach_log_sysevent: %s %s, flag: %d, verbose: %d\n",
2888 			attach_pnt, hint, flag, verbose);
2889 	}
2890 
2891 	if ((ev = sysevent_alloc(EC_DR, ESC_DR_AP_STATE_CHANGE,
2892 		SUNW_KERN_PUB"dr", km_flag)) == NULL) {
2893 		rv = -2;
2894 		goto logexit;
2895 	}
2896 	evnt_val.value_type = SE_DATA_TYPE_STRING;
2897 	evnt_val.value.sv_string = attach_pnt;
2898 	if ((rv = sysevent_add_attr(&evnt_attr_list, DR_AP_ID,
2899 		&evnt_val, km_flag)) != 0)
2900 		goto logexit;
2901 
2902 	evnt_val.value_type = SE_DATA_TYPE_STRING;
2903 	evnt_val.value.sv_string = hint;
2904 	if ((rv = sysevent_add_attr(&evnt_attr_list, DR_HINT,
2905 		&evnt_val, km_flag)) != 0) {
2906 		sysevent_free_attr(evnt_attr_list);
2907 		goto logexit;
2908 	}
2909 
2910 	(void) sysevent_attach_attributes(ev, evnt_attr_list);
2911 
2912 	/*
2913 	 * Log the event but do not sleep waiting for its
2914 	 * delivery. This provides insulation from syseventd.
2915 	 */
2916 	rv = log_sysevent(ev, SE_NOSLEEP, &eid);
2917 
2918 logexit:
2919 	if (ev)
2920 		sysevent_free(ev);
2921 	if ((rv != 0) && verbose)
2922 		cmn_err(CE_WARN,
2923 			"drmach_log_sysevent failed (rv %d) for %s  %s\n",
2924 			rv, attach_pnt, hint);
2925 
2926 	return (rv);
2927 }
2928 
2929 #define	OPL_DR_STATUS_PROP "dr-status"
2930 
2931 static int
2932 opl_check_dr_status()
2933 {
2934 	pnode_t	node;
2935 	int	rtn, len;
2936 	char	*str;
2937 
2938 	node = prom_rootnode();
2939 	if (node == OBP_BADNODE) {
2940 		return (1);
2941 	}
2942 
2943 	len = prom_getproplen(node, OPL_DR_STATUS_PROP);
2944 	if (len == -1) {
2945 		/*
2946 		 * dr-status doesn't exist when DR is activated and
2947 		 * any warning messages aren't needed.
2948 		 */
2949 		return (1);
2950 	}
2951 
2952 	str = (char *)kmem_zalloc(len+1, KM_SLEEP);
2953 	rtn = prom_getprop(node, OPL_DR_STATUS_PROP, str);
2954 	kmem_free(str, len + 1);
2955 	if (rtn == -1) {
2956 		return (1);
2957 	} else {
2958 		return (0);
2959 	}
2960 }
2961 
2962 /* we are allocating memlist from TLB locked pages to avoid tlbmisses */
2963 
2964 static struct memlist *
2965 drmach_memlist_add_span(drmach_copy_rename_program_t *p,
2966 	struct memlist *mlist, uint64_t base, uint64_t len)
2967 {
2968 	struct memlist	*ml, *tl, *nl;
2969 
2970 	if (len == 0ull)
2971 		return (NULL);
2972 
2973 	if (mlist == NULL) {
2974 		mlist = p->free_mlist;
2975 		if (mlist == NULL)
2976 			return (NULL);
2977 		p->free_mlist = mlist->next;
2978 		mlist->address = base;
2979 		mlist->size = len;
2980 		mlist->next = mlist->prev = NULL;
2981 
2982 		return (mlist);
2983 	}
2984 
2985 	for (tl = ml = mlist; ml; tl = ml, ml = ml->next) {
2986 		if (base < ml->address) {
2987 			if ((base + len) < ml->address) {
2988 				nl = p->free_mlist;
2989 				if (nl == NULL)
2990 					return (NULL);
2991 				p->free_mlist = nl->next;
2992 				nl->address = base;
2993 				nl->size = len;
2994 				nl->next = ml;
2995 				if ((nl->prev = ml->prev) != NULL)
2996 					nl->prev->next = nl;
2997 				ml->prev = nl;
2998 				if (mlist == ml)
2999 					mlist = nl;
3000 			} else {
3001 				ml->size = MAX((base + len),
3002 					(ml->address + ml->size)) -
3003 					base;
3004 				ml->address = base;
3005 			}
3006 			break;
3007 
3008 		} else if (base <= (ml->address + ml->size)) {
3009 			ml->size = MAX((base + len),
3010 				(ml->address + ml->size)) -
3011 				MIN(ml->address, base);
3012 			ml->address = MIN(ml->address, base);
3013 			break;
3014 		}
3015 	}
3016 	if (ml == NULL) {
3017 		nl = p->free_mlist;
3018 		if (nl == NULL)
3019 			return (NULL);
3020 		p->free_mlist = nl->next;
3021 		nl->address = base;
3022 		nl->size = len;
3023 		nl->next = NULL;
3024 		nl->prev = tl;
3025 		tl->next = nl;
3026 	}
3027 
3028 	return (mlist);
3029 }
3030 
3031 /*
3032  * The routine performs the necessary memory COPY and MC adr SWITCH.
3033  * Both operations MUST be at the same "level" so that the stack is
3034  * maintained correctly between the copy and switch.  The switch
3035  * portion implements a caching mechanism to guarantee the code text
3036  * is cached prior to execution.  This is to guard against possible
3037  * memory access while the MC adr's are being modified.
3038  *
3039  * IMPORTANT: The _drmach_copy_rename_end() function must immediately
3040  * follow drmach_copy_rename_prog__relocatable() so that the correct
3041  * "length" of the drmach_copy_rename_prog__relocatable can be
3042  * calculated.  This routine MUST be a LEAF function, i.e. it can
3043  * make NO function calls, primarily for two reasons:
3044  *
3045  *	1. We must keep the stack consistent across the "switch".
3046  *	2. Function calls are compiled to relative offsets, and
3047  *	   we execute this function we'll be executing it from
3048  *	   a copied version in a different area of memory, thus
3049  *	   the relative offsets will be bogus.
3050  *
3051  * Moreover, it must have the "__relocatable" suffix to inform DTrace
3052  * providers (and anything else, for that matter) that this
3053  * function's text is manually relocated elsewhere before it is
3054  * executed.  That is, it cannot be safely instrumented with any
3055  * methodology that is PC-relative.
3056  */
3057 
3058 /*
3059  * We multiply this to system_clock_frequency so we
3060  * are setting a delay of fmem_timeout second for
3061  * the rename command.
3062  *
3063  * FMEM command itself should complete within 15 sec.
3064  * We add 2 more sec to be conservative.
3065  *
3066  * Note that there is also a SCF BUSY bit checking
3067  * in drmach_asm.s right before FMEM command is
3068  * issued.  XSCF sets the SCF BUSY bit when the
3069  * other domain on the same PSB reboots and it
3070  * will not be able to service the FMEM command
3071  * within 15 sec.   After setting the SCF BUSY
3072  * bit, XSCF will wait a while before servicing
3073  * other reboot command so there is no race
3074  * condition.
3075  */
3076 
3077 static int	fmem_timeout = 17;
3078 
3079 /*
3080  *	The empirical data on some OPL system shows that
3081  *	we can copy 250 MB per second.  We set it to
3082  * 	80 MB to be conservative.  In normal case,
3083  *	this timeout does not affect anything.
3084  */
3085 
3086 static int	min_copy_size_per_sec = 80 * 1024 * 1024;
3087 
3088 /*
3089  *	This is the timeout value for the xcall synchronization
3090  *	to get all the CPU ready to do the parallel copying.
3091  *	Even on a fully loaded system, 10 sec. should be long
3092  *	enough.
3093  */
3094 
3095 static int	cpu_xcall_delay = 10;
3096 int drmach_disable_mcopy = 0;
3097 
3098 /*
3099  * The following delay loop executes sleep instruction to yield the
3100  * CPU to other strands.  If this is not done, some strand will tie
3101  * up the CPU in busy loops while the other strand cannot do useful
3102  * work.  The copy procedure will take a much longer time without this.
3103  */
3104 #define	DR_DELAY_IL(ms, freq)					\
3105 	{							\
3106 		uint64_t start;					\
3107 		uint64_t nstick;				\
3108 		volatile uint64_t now;				\
3109 		nstick = ((uint64_t)ms * freq)/1000;		\
3110 		start = drmach_get_stick_il();			\
3111 		now = start;					\
3112 		while ((now - start) <= nstick) {		\
3113 			drmach_sleep_il();			\
3114 			now = drmach_get_stick_il();		\
3115 		}						\
3116 	}
3117 
3118 static int
3119 drmach_copy_rename_prog__relocatable(drmach_copy_rename_program_t *prog,
3120 	int cpuid)
3121 {
3122 	struct memlist		*ml;
3123 	register int		rtn;
3124 	int			i;
3125 	register uint64_t	curr, limit;
3126 	extern uint64_t		drmach_get_stick_il();
3127 	extern void		membar_sync_il();
3128 	extern void		flush_instr_mem_il(void*);
3129 	extern void		flush_windows_il(void);
3130 	uint64_t		copy_start;
3131 
3132 	/*
3133 	 * flush_windows is moved here to make sure all
3134 	 * registers used in the callers are flushed to
3135 	 * memory before the copy.
3136 	 *
3137 	 * If flush_windows() is called too early in the
3138 	 * calling function, the compiler might put some
3139 	 * data in the local registers after flush_windows().
3140 	 * After FMA, if there is any fill trap, the registers
3141 	 * will contain stale data.
3142 	 */
3143 
3144 	flush_windows_il();
3145 
3146 	prog->critical->stat[cpuid] = FMEM_LOOP_COPY_READY;
3147 	membar_sync_il();
3148 
3149 	if (prog->data->cpuid == cpuid) {
3150 		limit = drmach_get_stick_il();
3151 		limit += cpu_xcall_delay * system_clock_freq;
3152 		for (i = 0; i < NCPU; i++) {
3153 			if (CPU_IN_SET(prog->data->cpu_slave_set, i)) {
3154 			/* wait for all CPU's to be ready */
3155 			    for (;;) {
3156 				if (prog->critical->stat[i] ==
3157 					FMEM_LOOP_COPY_READY) {
3158 					break;
3159 				}
3160 				DR_DELAY_IL(1, prog->data->stick_freq);
3161 			    }
3162 			    curr = drmach_get_stick_il();
3163 			    if (curr > limit) {
3164 				prog->data->fmem_status.error =
3165 					EOPL_FMEM_XC_TIMEOUT;
3166 				return (EOPL_FMEM_XC_TIMEOUT);
3167 			    }
3168 			}
3169 		}
3170 		prog->data->fmem_status.stat = FMEM_LOOP_COPY_READY;
3171 		membar_sync_il();
3172 		copy_start = drmach_get_stick_il();
3173 	} else {
3174 		for (;;) {
3175 			if (prog->data->fmem_status.stat ==
3176 				FMEM_LOOP_COPY_READY) {
3177 				break;
3178 			}
3179 			if (prog->data->fmem_status.error) {
3180 				prog->data->error[cpuid] =
3181 					EOPL_FMEM_TERMINATE;
3182 				return (EOPL_FMEM_TERMINATE);
3183 			}
3184 			DR_DELAY_IL(1, prog->data->stick_freq);
3185 		}
3186 	}
3187 
3188 	/*
3189 	 * DO COPY.
3190 	 */
3191 	if (CPU_IN_SET(prog->data->cpu_copy_set, cpuid)) {
3192 	    for (ml = prog->data->cpu_ml[cpuid]; ml; ml = ml->next) {
3193 		uint64_t	s_pa, t_pa;
3194 		uint64_t	nbytes;
3195 
3196 		s_pa = prog->data->s_copybasepa + ml->address;
3197 		t_pa = prog->data->t_copybasepa + ml->address;
3198 		nbytes = ml->size;
3199 
3200 		while (nbytes != 0ull) {
3201 			/* If the master has detected error, we just bail out */
3202 			if (prog->data->fmem_status.error != ESBD_NOERROR) {
3203 				prog->data->error[cpuid] =
3204 					EOPL_FMEM_TERMINATE;
3205 				return (EOPL_FMEM_TERMINATE);
3206 			}
3207 			/*
3208 			 * This copy does NOT use an ASI
3209 			 * that avoids the Ecache, therefore
3210 			 * the dst_pa addresses may remain
3211 			 * in our Ecache after the dst_pa
3212 			 * has been removed from the system.
3213 			 * A subsequent write-back to memory
3214 			 * will cause an ARB-stop because the
3215 			 * physical address no longer exists
3216 			 * in the system. Therefore we must
3217 			 * flush out local Ecache after we
3218 			 * finish the copy.
3219 			 */
3220 
3221 			/* copy 32 bytes at src_pa to dst_pa */
3222 			bcopy32_il(s_pa, t_pa);
3223 
3224 			/* increment the counter to signal that we are alive */
3225 			prog->stat->nbytes[cpuid] += 32;
3226 
3227 			/* increment by 32 bytes */
3228 			s_pa += (4 * sizeof (uint64_t));
3229 			t_pa += (4 * sizeof (uint64_t));
3230 
3231 			/* decrement by 32 bytes */
3232 			nbytes -= (4 * sizeof (uint64_t));
3233 		}
3234 	    }
3235 	    prog->critical->stat[cpuid] = FMEM_LOOP_COPY_DONE;
3236 	    membar_sync_il();
3237 	}
3238 
3239 	/*
3240 	 * Since bcopy32_il() does NOT use an ASI to bypass
3241 	 * the Ecache, we need to flush our Ecache after
3242 	 * the copy is complete.
3243 	 */
3244 	flush_cache_il();
3245 
3246 	/*
3247 	 * drmach_fmem_exec_script()
3248 	 */
3249 	if (prog->data->cpuid == cpuid) {
3250 		uint64_t	last, now;
3251 
3252 		limit = copy_start + prog->data->copy_delay;
3253 		for (i = 0; i < NCPU; i++) {
3254 			if (CPU_IN_SET(prog->data->cpu_slave_set, i)) {
3255 			    for (;;) {
3256 				/* we get FMEM_LOOP_FMEM_READY in normal case */
3257 				if (prog->critical->stat[i] ==
3258 					FMEM_LOOP_FMEM_READY) {
3259 					break;
3260 				}
3261 				/* got error traps */
3262 				if (prog->data->error[i] ==
3263 					EOPL_FMEM_COPY_ERROR) {
3264 					prog->data->fmem_status.error =
3265 						EOPL_FMEM_COPY_ERROR;
3266 					return (EOPL_FMEM_COPY_ERROR);
3267 				}
3268 				/* if we have not reached limit, wait more */
3269 				curr = drmach_get_stick_il();
3270 				if (curr <= limit)
3271 					continue;
3272 
3273 				prog->data->slowest_cpuid = i;
3274 				prog->data->copy_wait_time =
3275 					curr - copy_start;
3276 
3277 				/* now check if slave is alive */
3278 				last = prog->stat->nbytes[i];
3279 
3280 				DR_DELAY_IL(1, prog->data->stick_freq);
3281 
3282 				now = prog->stat->nbytes[i];
3283 				if (now <= last) {
3284 					/* no progress, perhaps just finished */
3285 					DR_DELAY_IL(1, prog->data->stick_freq);
3286 					if (prog->critical->stat[i] ==
3287 						FMEM_LOOP_FMEM_READY)
3288 						break;
3289 					/* copy error */
3290 					if (prog->data->error[i] ==
3291 						EOPL_FMEM_COPY_ERROR) {
3292 						prog->data->fmem_status.error =
3293 							EOPL_FMEM_COPY_ERROR;
3294 						return (EOPL_FMEM_COPY_ERROR);
3295 					}
3296 					prog->data->fmem_status.error =
3297 					    EOPL_FMEM_COPY_TIMEOUT;
3298 					return (EOPL_FMEM_COPY_TIMEOUT);
3299 				}
3300 			    }
3301 			}
3302 		}
3303 
3304 		prog->critical->stat[cpuid] = FMEM_LOOP_FMEM_READY;
3305 		prog->data->fmem_status.stat  = FMEM_LOOP_FMEM_READY;
3306 
3307 		membar_sync_il();
3308 		flush_instr_mem_il((void*) (prog->critical));
3309 		/*
3310 		 * drmach_fmem_exec_script()
3311 		 */
3312 		rtn = prog->critical->fmem((void *)prog->critical, PAGESIZE);
3313 		return (rtn);
3314 	} else {
3315 		flush_instr_mem_il((void*) (prog->critical));
3316 		/*
3317 		 * drmach_fmem_loop_script()
3318 		 */
3319 		rtn = prog->critical->loop((void *)(prog->critical),
3320 			PAGESIZE, (void *)&(prog->critical->stat[cpuid]));
3321 		prog->data->error[cpuid] = rtn;
3322 		/* slave thread does not care the rv */
3323 		return (0);
3324 	}
3325 }
3326 
3327 static void
3328 drmach_copy_rename_end(void)
3329 {
3330 	/*
3331 	 * IMPORTANT:	This function's location MUST be located immediately
3332 	 *		following drmach_copy_rename_prog__relocatable to
3333 	 *		accurately estimate its size.  Note that this assumes
3334 	 *		the compiler keeps these functions in the order in
3335 	 *		which they appear :-o
3336 	 */
3337 }
3338 
3339 
3340 static void
3341 drmach_setup_memlist(drmach_copy_rename_program_t *p)
3342 {
3343 	struct memlist *ml;
3344 	caddr_t buf;
3345 	int nbytes, s;
3346 
3347 	nbytes = PAGESIZE;
3348 	s = roundup(sizeof (struct memlist), sizeof (void *));
3349 	p->free_mlist = NULL;
3350 	buf = p->memlist_buffer;
3351 	while (nbytes >= sizeof (struct memlist)) {
3352 		ml = (struct memlist *)buf;
3353 		ml->next = p->free_mlist;
3354 		p->free_mlist = ml;
3355 		buf += s;
3356 		nbytes -= s;
3357 	}
3358 }
3359 
3360 static void
3361 drmach_lock_critical(caddr_t va, caddr_t new_va)
3362 {
3363 	tte_t tte;
3364 	int i;
3365 
3366 	kpreempt_disable();
3367 
3368 	for (i = 0; i < DRMACH_FMEM_LOCKED_PAGES; i++) {
3369 		vtag_flushpage(new_va, (uint64_t)ksfmmup);
3370 		sfmmu_memtte(&tte, va_to_pfn(va),
3371 			PROC_DATA|HAT_NOSYNC, TTE8K);
3372 		tte.tte_intlo |= TTE_LCK_INT;
3373 		sfmmu_dtlb_ld_kva(new_va, &tte);
3374 		sfmmu_itlb_ld_kva(new_va, &tte);
3375 		va += PAGESIZE;
3376 		new_va += PAGESIZE;
3377 	}
3378 }
3379 
3380 static void
3381 drmach_unlock_critical(caddr_t va)
3382 {
3383 	int i;
3384 
3385 	for (i = 0; i < DRMACH_FMEM_LOCKED_PAGES; i++) {
3386 		vtag_flushpage(va, (uint64_t)ksfmmup);
3387 		va += PAGESIZE;
3388 	}
3389 
3390 	kpreempt_enable();
3391 }
3392 
3393 sbd_error_t *
3394 drmach_copy_rename_init(drmachid_t t_id, drmachid_t s_id,
3395 	struct memlist *c_ml, drmachid_t *pgm_id)
3396 {
3397 	drmach_mem_t	*s_mem;
3398 	drmach_mem_t	*t_mem;
3399 	struct memlist	*x_ml;
3400 	uint64_t	s_copybasepa, t_copybasepa;
3401 	uint_t		len;
3402 	caddr_t		bp, wp;
3403 	int			s_bd, t_bd, cpuid, active_cpus, i;
3404 	uint64_t		c_addr;
3405 	size_t			c_size, copy_sz, sz;
3406 	extern void		drmach_fmem_loop_script();
3407 	extern void		drmach_fmem_loop_script_rtn();
3408 	extern int		drmach_fmem_exec_script();
3409 	extern void		drmach_fmem_exec_script_end();
3410 	sbd_error_t	*err;
3411 	drmach_copy_rename_program_t *prog = NULL;
3412 	drmach_copy_rename_program_t *prog_kmem = NULL;
3413 	void		(*mc_suspend)(void);
3414 	void		(*mc_resume)(void);
3415 	int		(*scf_fmem_start)(int, int);
3416 	int		(*scf_fmem_end)(void);
3417 	int		(*scf_fmem_cancel)(void);
3418 	uint64_t	(*scf_get_base_addr)(void);
3419 
3420 	if (!DRMACH_IS_MEM_ID(s_id))
3421 		return (drerr_new(0, EOPL_INAPPROP, NULL));
3422 	if (!DRMACH_IS_MEM_ID(t_id))
3423 		return (drerr_new(0, EOPL_INAPPROP, NULL));
3424 
3425 	for (i = 0; i < NCPU; i++) {
3426 		int lsb_id, onb_core_num, strand_id;
3427 		drmach_board_t *bp;
3428 
3429 		/*
3430 		 * this kind of CPU will spin in cache
3431 		 */
3432 		if (CPU_IN_SET(cpu_ready_set, i))
3433 			continue;
3434 
3435 		/*
3436 		 * Now check for any inactive CPU's that
3437 		 * have been hotadded.  This can only occur in
3438 		 * error condition in drmach_cpu_poweron().
3439 		 */
3440 		lsb_id = LSB_ID(i);
3441 		onb_core_num = ON_BOARD_CORE_NUM(i);
3442 		strand_id = STRAND_ID(i);
3443 		bp = drmach_get_board_by_bnum(lsb_id);
3444 		if (bp == NULL)
3445 			continue;
3446 		if (bp->cores[onb_core_num].core_hotadded &
3447 		    (1 << strand_id)) {
3448 		    if (!(bp->cores[onb_core_num].core_started &
3449 			(1 << strand_id))) {
3450 			return (drerr_new(1, EOPL_CPU_STATE, NULL));
3451 		    }
3452 		}
3453 	}
3454 
3455 	mc_suspend = (void (*)(void))
3456 	    modgetsymvalue("opl_mc_suspend", 0);
3457 	mc_resume = (void (*)(void))
3458 	    modgetsymvalue("opl_mc_resume", 0);
3459 
3460 	if (mc_suspend == NULL || mc_resume == NULL) {
3461 		return (drerr_new(1, EOPL_MC_OPL, NULL));
3462 	}
3463 
3464 	scf_fmem_start = (int (*)(int, int))
3465 	    modgetsymvalue("scf_fmem_start", 0);
3466 	if (scf_fmem_start == NULL) {
3467 		return (drerr_new(1, EOPL_SCF_FMEM, NULL));
3468 	}
3469 	scf_fmem_end = (int (*)(void))
3470 	    modgetsymvalue("scf_fmem_end", 0);
3471 	if (scf_fmem_end == NULL) {
3472 		return (drerr_new(1, EOPL_SCF_FMEM, NULL));
3473 	}
3474 	scf_fmem_cancel = (int (*)(void))
3475 	    modgetsymvalue("scf_fmem_cancel", 0);
3476 	if (scf_fmem_cancel == NULL) {
3477 		return (drerr_new(1, EOPL_SCF_FMEM, NULL));
3478 	}
3479 	scf_get_base_addr = (uint64_t (*)(void))
3480 	    modgetsymvalue("scf_get_base_addr", 0);
3481 	if (scf_get_base_addr == NULL) {
3482 		return (drerr_new(1, EOPL_SCF_FMEM, NULL));
3483 	}
3484 	s_mem = s_id;
3485 	t_mem = t_id;
3486 
3487 	s_bd = s_mem->dev.bp->bnum;
3488 	t_bd = t_mem->dev.bp->bnum;
3489 
3490 	/* calculate source and target base pa */
3491 
3492 	s_copybasepa = s_mem->slice_base;
3493 	t_copybasepa = t_mem->slice_base;
3494 
3495 	/* adjust copy memlist addresses to be relative to copy base pa */
3496 	x_ml = c_ml;
3497 	while (x_ml != NULL) {
3498 		x_ml->address -= s_copybasepa;
3499 		x_ml = x_ml->next;
3500 	}
3501 
3502 	/*
3503 	 * bp will be page aligned, since we're calling
3504 	 * kmem_zalloc() with an exact multiple of PAGESIZE.
3505 	 */
3506 
3507 	prog_kmem = (drmach_copy_rename_program_t *)kmem_zalloc(
3508 		DRMACH_FMEM_LOCKED_PAGES * PAGESIZE, KM_SLEEP);
3509 
3510 	prog_kmem->prog = prog_kmem;
3511 
3512 	/*
3513 	 * To avoid MTLB hit, we allocate a new VM space and remap
3514 	 * the kmem_alloc buffer to that address.  This solves
3515 	 * 2 problems we found:
3516 	 * - the kmem_alloc buffer can be just a chunk inside
3517 	 *   a much larger, e.g. 4MB buffer and MTLB will occur
3518 	 *   if there are both a 4MB and a 8K TLB mapping to
3519 	 *   the same VA range.
3520 	 * - the kmem mapping got dropped into the TLB by other
3521 	 *   strands, unintentionally.
3522 	 * Note that the pointers like data, critical, memlist_buffer,
3523 	 * and stat inside the copy rename structure are mapped to this
3524 	 * alternate VM space so we must make sure we lock the TLB mapping
3525 	 * whenever we access data pointed to by these pointers.
3526 	 */
3527 
3528 	prog = prog_kmem->locked_prog = vmem_alloc(heap_arena,
3529 		DRMACH_FMEM_LOCKED_PAGES * PAGESIZE, VM_SLEEP);
3530 	wp = bp = (caddr_t)prog;
3531 
3532 	/* Now remap prog_kmem to prog */
3533 	drmach_lock_critical((caddr_t)prog_kmem, (caddr_t)prog);
3534 
3535 	/* All pointers in prog are based on the alternate mapping */
3536 	prog->data = (drmach_copy_rename_data_t *)roundup(((uint64_t)prog +
3537 		sizeof (drmach_copy_rename_program_t)), sizeof (void *));
3538 
3539 	ASSERT(((uint64_t)prog->data + sizeof (drmach_copy_rename_data_t))
3540 		<= ((uint64_t)prog + PAGESIZE));
3541 
3542 	prog->critical = (drmach_copy_rename_critical_t *)
3543 		(wp + DRMACH_FMEM_CRITICAL_PAGE * PAGESIZE);
3544 
3545 	prog->memlist_buffer = (caddr_t)(wp +
3546 		DRMACH_FMEM_MLIST_PAGE * PAGESIZE);
3547 
3548 	prog->stat = (drmach_cr_stat_t *)(wp +
3549 		DRMACH_FMEM_STAT_PAGE * PAGESIZE);
3550 
3551 	/* LINTED */
3552 	ASSERT(sizeof (drmach_cr_stat_t)
3553 		<= ((DRMACH_FMEM_LOCKED_PAGES - DRMACH_FMEM_STAT_PAGE)
3554 		* PAGESIZE));
3555 
3556 	prog->critical->scf_reg_base = (uint64_t)-1;
3557 	prog->critical->scf_td[0] = (s_bd & 0xff);
3558 	prog->critical->scf_td[1] = (t_bd & 0xff);
3559 	for (i = 2; i < 15; i++) {
3560 		prog->critical->scf_td[i]   = 0;
3561 	}
3562 	prog->critical->scf_td[15] = ((0xaa + s_bd + t_bd) & 0xff);
3563 
3564 	bp = (caddr_t)prog->critical;
3565 	len = sizeof (drmach_copy_rename_critical_t);
3566 	wp = (caddr_t)roundup((uint64_t)bp + len, sizeof (void *));
3567 
3568 	len = (uint_t)((ulong_t)drmach_copy_rename_end -
3569 		(ulong_t)drmach_copy_rename_prog__relocatable);
3570 
3571 	/*
3572 	 * We always leave 1K nop's to prevent the processor from
3573 	 * speculative execution that causes memory access
3574 	 */
3575 	wp = wp + len + 1024;
3576 
3577 	len = (uint_t)((ulong_t)drmach_fmem_exec_script_end -
3578 		(ulong_t)drmach_fmem_exec_script);
3579 	/* this is the entry point of the loop script */
3580 	wp = wp + len + 1024;
3581 
3582 	len = (uint_t)((ulong_t)drmach_fmem_exec_script -
3583 		(ulong_t)drmach_fmem_loop_script);
3584 	wp = wp + len + 1024;
3585 
3586 	/* now we make sure there is 1K extra */
3587 
3588 	if ((wp - bp) > PAGESIZE) {
3589 		err = drerr_new(1, EOPL_FMEM_SETUP, NULL);
3590 		goto out;
3591 	}
3592 
3593 	bp = (caddr_t)prog->critical;
3594 	len = sizeof (drmach_copy_rename_critical_t);
3595 	wp = (caddr_t)roundup((uint64_t)bp + len, sizeof (void *));
3596 
3597 	prog->critical->run = (int (*)())(wp);
3598 	len = (uint_t)((ulong_t)drmach_copy_rename_end -
3599 		(ulong_t)drmach_copy_rename_prog__relocatable);
3600 
3601 	bcopy((caddr_t)drmach_copy_rename_prog__relocatable, wp, len);
3602 
3603 	wp = (caddr_t)roundup((uint64_t)wp + len, 1024);
3604 
3605 	prog->critical->fmem = (int (*)())(wp);
3606 	len = (int)((ulong_t)drmach_fmem_exec_script_end -
3607 		(ulong_t)drmach_fmem_exec_script);
3608 	bcopy((caddr_t)drmach_fmem_exec_script, wp, len);
3609 
3610 	len = (int)((ulong_t)drmach_fmem_exec_script_end -
3611 		(ulong_t)drmach_fmem_exec_script);
3612 	wp = (caddr_t)roundup((uint64_t)wp + len, 1024);
3613 
3614 	prog->critical->loop = (int (*)())(wp);
3615 	len = (int)((ulong_t)drmach_fmem_exec_script -
3616 		(ulong_t)drmach_fmem_loop_script);
3617 	bcopy((caddr_t)drmach_fmem_loop_script, (void *)wp, len);
3618 	len = (int)((ulong_t)drmach_fmem_loop_script_rtn-
3619 		(ulong_t)drmach_fmem_loop_script);
3620 	prog->critical->loop_rtn = (void (*)()) (wp+len);
3621 
3622 	prog->data->fmem_status.error = ESBD_NOERROR;
3623 
3624 	/* now we are committed, call SCF, soft suspend mac patrol */
3625 	if ((*scf_fmem_start)(s_bd, t_bd)) {
3626 		err = drerr_new(1, EOPL_SCF_FMEM_START, NULL);
3627 		goto out;
3628 	}
3629 	prog->data->scf_fmem_end = scf_fmem_end;
3630 	prog->data->scf_fmem_cancel = scf_fmem_cancel;
3631 	prog->data->scf_get_base_addr = scf_get_base_addr;
3632 	prog->data->fmem_status.op |= OPL_FMEM_SCF_START;
3633 
3634 	/* soft suspend mac patrol */
3635 	(*mc_suspend)();
3636 	prog->data->fmem_status.op |= OPL_FMEM_MC_SUSPEND;
3637 	prog->data->mc_resume = mc_resume;
3638 
3639 	prog->critical->inst_loop_ret  =
3640 		*(uint64_t *)(prog->critical->loop_rtn);
3641 
3642 	/*
3643 	 * 0x30800000 is op code "ba,a	+0"
3644 	 */
3645 
3646 	*(uint_t *)(prog->critical->loop_rtn) = (uint_t)(0x30800000);
3647 
3648 	/*
3649 	 * set the value of SCF FMEM TIMEOUT
3650 	 */
3651 	prog->critical->delay = fmem_timeout * system_clock_freq;
3652 
3653 	prog->data->s_mem = (drmachid_t)s_mem;
3654 	prog->data->t_mem = (drmachid_t)t_mem;
3655 
3656 	cpuid = CPU->cpu_id;
3657 	prog->data->cpuid = cpuid;
3658 	prog->data->cpu_ready_set = cpu_ready_set;
3659 	prog->data->cpu_slave_set = cpu_ready_set;
3660 	prog->data->slowest_cpuid = (processorid_t)-1;
3661 	prog->data->copy_wait_time = 0;
3662 	CPUSET_DEL(prog->data->cpu_slave_set, cpuid);
3663 
3664 	for (i = 0; i < NCPU; i++) {
3665 		prog->data->cpu_ml[i] = NULL;
3666 	}
3667 
3668 	active_cpus = 0;
3669 	if (drmach_disable_mcopy) {
3670 		active_cpus = 1;
3671 		CPUSET_ADD(prog->data->cpu_copy_set, cpuid);
3672 	} else {
3673 		for (i = 0; i < NCPU; i++) {
3674 			if (CPU_IN_SET(cpu_ready_set, i) &&
3675 				CPU_ACTIVE(cpu[i])) {
3676 				CPUSET_ADD(prog->data->cpu_copy_set, i);
3677 				active_cpus++;
3678 			}
3679 		}
3680 	}
3681 
3682 	drmach_setup_memlist(prog);
3683 
3684 	x_ml = c_ml;
3685 	sz = 0;
3686 	while (x_ml != NULL) {
3687 		sz += x_ml->size;
3688 		x_ml = x_ml->next;
3689 	}
3690 
3691 	copy_sz = sz/active_cpus;
3692 	copy_sz = roundup(copy_sz, MMU_PAGESIZE4M);
3693 
3694 	while (sz > copy_sz*active_cpus) {
3695 		copy_sz += MMU_PAGESIZE4M;
3696 	}
3697 
3698 	prog->data->stick_freq = system_clock_freq;
3699 	prog->data->copy_delay = ((copy_sz / min_copy_size_per_sec) + 2) *
3700 		system_clock_freq;
3701 
3702 	x_ml = c_ml;
3703 	c_addr = x_ml->address;
3704 	c_size = x_ml->size;
3705 
3706 	for (i = 0; i < NCPU; i++) {
3707 		prog->stat->nbytes[i] = 0;
3708 		if (!CPU_IN_SET(prog->data->cpu_copy_set, i)) {
3709 			continue;
3710 		}
3711 		sz = copy_sz;
3712 
3713 		while (sz) {
3714 			if (c_size > sz) {
3715 				prog->data->cpu_ml[i] =
3716 					drmach_memlist_add_span(prog,
3717 					prog->data->cpu_ml[i],
3718 					c_addr, sz);
3719 				c_addr += sz;
3720 				c_size -= sz;
3721 				break;
3722 			} else {
3723 				sz -= c_size;
3724 				prog->data->cpu_ml[i] = drmach_memlist_add_span(
3725 					prog, prog->data->cpu_ml[i],
3726 						c_addr, c_size);
3727 				x_ml = x_ml->next;
3728 				if (x_ml != NULL) {
3729 					c_addr = x_ml->address;
3730 					c_size = x_ml->size;
3731 				} else {
3732 					goto end;
3733 				}
3734 			}
3735 		}
3736 	}
3737 end:
3738 	prog->data->s_copybasepa = s_copybasepa;
3739 	prog->data->t_copybasepa = t_copybasepa;
3740 	prog->data->c_ml = c_ml;
3741 	*pgm_id = prog_kmem;
3742 
3743 	/* Unmap the alternate space.  It will have to be remapped again */
3744 	drmach_unlock_critical((caddr_t)prog);
3745 	return (NULL);
3746 out:
3747 	if (prog != NULL) {
3748 		drmach_unlock_critical((caddr_t)prog);
3749 		vmem_free(heap_arena, prog,
3750 			DRMACH_FMEM_LOCKED_PAGES * PAGESIZE);
3751 	}
3752 	if (prog_kmem != NULL) {
3753 		kmem_free(prog_kmem, DRMACH_FMEM_LOCKED_PAGES * PAGESIZE);
3754 	}
3755 	return (err);
3756 }
3757 
3758 sbd_error_t *
3759 drmach_copy_rename_fini(drmachid_t id)
3760 {
3761 	drmach_copy_rename_program_t	*prog = id;
3762 	sbd_error_t			*err = NULL;
3763 	int				rv;
3764 	uint_t				fmem_error;
3765 
3766 	/*
3767 	 * Note that we have to delay calling SCF to find out the
3768 	 * status of the FMEM operation here because SCF cannot
3769 	 * respond while it is suspended.
3770 	 * This create a small window when we are sure about the
3771 	 * base address of the system board.
3772 	 * If there is any call to mc-opl to get memory unum,
3773 	 * mc-opl will return UNKNOWN as the unum.
3774 	 */
3775 
3776 	/*
3777 	 * we have to remap again because all the pointer like data,
3778 	 * critical in prog are based on the alternate vmem space.
3779 	 */
3780 	(void) drmach_lock_critical((caddr_t)prog, (caddr_t)prog->locked_prog);
3781 
3782 	if (prog->data->c_ml != NULL)
3783 		memlist_delete(prog->data->c_ml);
3784 
3785 	if ((prog->data->fmem_status.op &
3786 		(OPL_FMEM_SCF_START| OPL_FMEM_MC_SUSPEND)) !=
3787 		(OPL_FMEM_SCF_START | OPL_FMEM_MC_SUSPEND)) {
3788 		cmn_err(CE_PANIC, "drmach_copy_rename_fini: "
3789 			"invalid op code %x\n",
3790 				prog->data->fmem_status.op);
3791 	}
3792 
3793 	fmem_error = prog->data->fmem_status.error;
3794 	if (fmem_error != ESBD_NOERROR) {
3795 		err = drerr_new(1, fmem_error, NULL);
3796 	}
3797 
3798 	/* possible ops are SCF_START, MC_SUSPEND */
3799 	if (prog->critical->fmem_issued) {
3800 		if (fmem_error != ESBD_NOERROR) {
3801 		    cmn_err(CE_PANIC, "Irrecoverable FMEM error %d\n",
3802 			fmem_error);
3803 		}
3804 		rv = (*prog->data->scf_fmem_end)();
3805 		if (rv) {
3806 			cmn_err(CE_PANIC, "scf_fmem_end() failed rv=%d", rv);
3807 		}
3808 		/*
3809 		 * If we get here, rename is successful.
3810 		 * Do all the copy rename post processing.
3811 		 */
3812 		drmach_swap_pa((drmach_mem_t *)prog->data->s_mem,
3813 			(drmach_mem_t *)prog->data->t_mem);
3814 	} else {
3815 		rv = (*prog->data->scf_fmem_cancel)();
3816 		if (rv) {
3817 		    cmn_err(CE_WARN, "scf_fmem_cancel() failed rv=0x%x", rv);
3818 		    if (!err)
3819 			err = drerr_new(1, EOPL_SCF_FMEM_CANCEL,
3820 			    "scf_fmem_cancel() failed. rv = 0x%x", rv);
3821 		}
3822 	}
3823 	/* soft resume mac patrol */
3824 	(*prog->data->mc_resume)();
3825 
3826 	drmach_unlock_critical((caddr_t)prog->locked_prog);
3827 
3828 	vmem_free(heap_arena, prog->locked_prog,
3829 		DRMACH_FMEM_LOCKED_PAGES * PAGESIZE);
3830 	kmem_free(prog, DRMACH_FMEM_LOCKED_PAGES * PAGESIZE);
3831 	return (err);
3832 }
3833 
3834 /*ARGSUSED*/
3835 static void
3836 drmach_copy_rename_slave(struct regs *rp, drmachid_t id)
3837 {
3838 	drmach_copy_rename_program_t	*prog =
3839 		(drmach_copy_rename_program_t *)id;
3840 	register int			cpuid;
3841 	extern void			drmach_flush();
3842 	extern void			membar_sync_il();
3843 	extern void			drmach_flush_icache();
3844 	on_trap_data_t			otd;
3845 
3846 	cpuid = CPU->cpu_id;
3847 
3848 	if (on_trap(&otd, OT_DATA_EC)) {
3849 		no_trap();
3850 		prog->data->error[cpuid] = EOPL_FMEM_COPY_ERROR;
3851 		prog->critical->stat[cpuid] = FMEM_LOOP_EXIT;
3852 		drmach_flush_icache();
3853 		membar_sync_il();
3854 		return;
3855 	}
3856 
3857 
3858 	/*
3859 	 * jmp drmach_copy_rename_prog().
3860 	 */
3861 
3862 	drmach_flush(prog->critical, PAGESIZE);
3863 	(void) prog->critical->run(prog, cpuid);
3864 	drmach_flush_icache();
3865 
3866 	no_trap();
3867 
3868 	prog->critical->stat[cpuid] = FMEM_LOOP_EXIT;
3869 
3870 	membar_sync_il();
3871 }
3872 
3873 static void
3874 drmach_swap_pa(drmach_mem_t *s_mem, drmach_mem_t *t_mem)
3875 {
3876 	uint64_t s_base, t_base;
3877 	drmach_board_t *s_board, *t_board;
3878 	struct memlist *ml;
3879 
3880 	s_board = s_mem->dev.bp;
3881 	t_board = t_mem->dev.bp;
3882 	if (s_board == NULL || t_board == NULL) {
3883 		cmn_err(CE_PANIC, "Cannot locate source or target board\n");
3884 		return;
3885 	}
3886 	s_base = s_mem->slice_base;
3887 	t_base = t_mem->slice_base;
3888 
3889 	s_mem->slice_base = t_base;
3890 	s_mem->base_pa = (s_mem->base_pa - s_base) + t_base;
3891 
3892 	for (ml = s_mem->memlist; ml; ml = ml->next) {
3893 		ml->address = ml->address - s_base + t_base;
3894 	}
3895 
3896 	t_mem->slice_base = s_base;
3897 	t_mem->base_pa = (t_mem->base_pa - t_base) + s_base;
3898 
3899 	for (ml = t_mem->memlist; ml; ml = ml->next) {
3900 		ml->address = ml->address - t_base + s_base;
3901 	}
3902 
3903 	/*
3904 	 * IKP has to update the sb-mem-ranges for mac patrol driver
3905 	 * when it resumes, it will re-read the sb-mem-range property
3906 	 * to get the new base address
3907 	 */
3908 	if (oplcfg_pa_swap(s_board->bnum, t_board->bnum) != 0)
3909 		cmn_err(CE_PANIC, "Could not update device nodes\n");
3910 }
3911 
3912 void
3913 drmach_copy_rename(drmachid_t id)
3914 {
3915 	drmach_copy_rename_program_t	*prog_kmem = id;
3916 	drmach_copy_rename_program_t	*prog;
3917 	cpuset_t	cpuset;
3918 	int		cpuid;
3919 	uint64_t	inst;
3920 	register int	rtn;
3921 	extern int	in_sync;
3922 	int		old_in_sync;
3923 	extern void	drmach_sys_trap();
3924 	extern void	drmach_flush();
3925 	extern void	drmach_flush_icache();
3926 	extern uint64_t	patch_inst(uint64_t *, uint64_t);
3927 	on_trap_data_t	otd;
3928 
3929 
3930 	prog = prog_kmem->locked_prog;
3931 
3932 
3933 	/*
3934 	 * We must immediately drop in the TLB because all pointers
3935 	 * are based on the alternate vmem space.
3936 	 */
3937 
3938 	(void) drmach_lock_critical((caddr_t)prog_kmem, (caddr_t)prog);
3939 
3940 	/*
3941 	 * we call scf to get the base address here becuase if scf
3942 	 * has not been suspended yet, the active path can be changing and
3943 	 * sometimes it is not even mapped.  We call the interface when
3944 	 * the OS has been quiesced.
3945 	 */
3946 	prog->critical->scf_reg_base = (*prog->data->scf_get_base_addr)();
3947 
3948 	if (prog->critical->scf_reg_base == (uint64_t)-1 ||
3949 		prog->critical->scf_reg_base == NULL) {
3950 		prog->data->fmem_status.error = EOPL_FMEM_SCF_ERR;
3951 		drmach_unlock_critical((caddr_t)prog);
3952 		return;
3953 	}
3954 
3955 	cpuset = prog->data->cpu_ready_set;
3956 
3957 	for (cpuid = 0; cpuid < NCPU; cpuid++) {
3958 		if (CPU_IN_SET(cpuset, cpuid)) {
3959 			prog->critical->stat[cpuid] = FMEM_LOOP_START;
3960 			prog->data->error[cpuid] = ESBD_NOERROR;
3961 		}
3962 	}
3963 
3964 	old_in_sync = in_sync;
3965 	in_sync = 1;
3966 	cpuid = CPU->cpu_id;
3967 
3968 	CPUSET_DEL(cpuset, cpuid);
3969 
3970 	for (cpuid = 0; cpuid < NCPU; cpuid++) {
3971 		if (CPU_IN_SET(cpuset, cpuid)) {
3972 			xc_one(cpuid, (xcfunc_t *)drmach_lock_critical,
3973 				(uint64_t)prog_kmem, (uint64_t)prog);
3974 		}
3975 	}
3976 
3977 	cpuid = CPU->cpu_id;
3978 
3979 	xt_some(cpuset, (xcfunc_t *)drmach_sys_trap,
3980 				(uint64_t)drmach_copy_rename_slave,
3981 				(uint64_t)prog);
3982 	xt_sync(cpuset);
3983 
3984 	if (on_trap(&otd, OT_DATA_EC)) {
3985 		rtn = EOPL_FMEM_COPY_ERROR;
3986 		drmach_flush_icache();
3987 		goto done;
3988 	}
3989 
3990 	/*
3991 	 * jmp drmach_copy_rename_prog().
3992 	 */
3993 
3994 	drmach_flush(prog->critical, PAGESIZE);
3995 	rtn = prog->critical->run(prog, cpuid);
3996 	drmach_flush_icache();
3997 
3998 
3999 done:
4000 	no_trap();
4001 	if (rtn == EOPL_FMEM_HW_ERROR) {
4002 		kpreempt_enable();
4003 		prom_panic("URGENT_ERROR_TRAP is "
4004 			"detected during FMEM.\n");
4005 	}
4006 
4007 	/*
4008 	 * In normal case, all slave CPU's are still spinning in
4009 	 * the assembly code.  The master has to patch the instruction
4010 	 * to get them out.
4011 	 * In error case, e.g. COPY_ERROR, some slave CPU's might
4012 	 * have aborted and already returned and sset LOOP_EXIT status.
4013 	 * Some CPU might still be copying.
4014 	 * In any case, some delay is necessary to give them
4015 	 * enough time to set the LOOP_EXIT status.
4016 	 */
4017 
4018 	for (;;) {
4019 		inst = patch_inst((uint64_t *)prog->critical->loop_rtn,
4020 			prog->critical->inst_loop_ret);
4021 		if (prog->critical->inst_loop_ret == inst) {
4022 			break;
4023 		}
4024 	}
4025 
4026 	for (cpuid = 0; cpuid < NCPU; cpuid++) {
4027 		uint64_t	last, now;
4028 		if (!CPU_IN_SET(cpuset, cpuid)) {
4029 			continue;
4030 		}
4031 		last = prog->stat->nbytes[cpuid];
4032 		/*
4033 		 * Wait for all CPU to exit.
4034 		 * However we do not want an infinite loop
4035 		 * so we detect hangup situation here.
4036 		 * If the slave CPU is still copying data,
4037 		 * we will continue to wait.
4038 		 * In error cases, the master has already set
4039 		 * fmem_status.error to abort the copying.
4040 		 * 1 m.s delay for them to abort copying and
4041 		 * return to drmach_copy_rename_slave to set
4042 		 * FMEM_LOOP_EXIT status should be enough.
4043 		 */
4044 		for (;;) {
4045 			if (prog->critical->stat[cpuid] == FMEM_LOOP_EXIT)
4046 				break;
4047 			drmach_sleep_il();
4048 			drv_usecwait(1000);
4049 			now = prog->stat->nbytes[cpuid];
4050 			if (now <= last) {
4051 			    drv_usecwait(1000);
4052 			    if (prog->critical->stat[cpuid] == FMEM_LOOP_EXIT)
4053 				break;
4054 			    cmn_err(CE_PANIC,
4055 				"CPU %d hang during Copy Rename", cpuid);
4056 			}
4057 			last = now;
4058 		}
4059 		if (prog->data->error[cpuid] == EOPL_FMEM_HW_ERROR) {
4060 			prom_panic("URGENT_ERROR_TRAP is "
4061 				"detected during FMEM.\n");
4062 		}
4063 	}
4064 
4065 	/*
4066 	 * This must be done after all strands have exit.
4067 	 * Removing the TLB entry will affect both strands
4068 	 * in the same core.
4069 	 */
4070 
4071 	for (cpuid = 0; cpuid < NCPU; cpuid++) {
4072 		if (CPU_IN_SET(cpuset, cpuid)) {
4073 			xc_one(cpuid, (xcfunc_t *)drmach_unlock_critical,
4074 				(uint64_t)prog, 0);
4075 		}
4076 	}
4077 
4078 	in_sync = old_in_sync;
4079 
4080 	/*
4081 	 * we should unlock before the following lock to keep the kpreempt
4082 	 * count correct.
4083 	 */
4084 	(void) drmach_unlock_critical((caddr_t)prog);
4085 
4086 	/*
4087 	 * we must remap again.  TLB might have been removed in above xcall.
4088 	 */
4089 
4090 	(void) drmach_lock_critical((caddr_t)prog_kmem, (caddr_t)prog);
4091 
4092 	if (prog->data->fmem_status.error == ESBD_NOERROR)
4093 		prog->data->fmem_status.error = rtn;
4094 
4095 	if (prog->data->copy_wait_time > 0) {
4096 		DRMACH_PR("Unexpected long wait time %ld seconds "
4097 			"during copy rename on CPU %d\n",
4098 			prog->data->copy_wait_time/prog->data->stick_freq,
4099 			prog->data->slowest_cpuid);
4100 	}
4101 	drmach_unlock_critical((caddr_t)prog);
4102 }
4103