xref: /titanic_41/usr/src/uts/sun4u/opl/io/drmach.c (revision 7801e5e8b5bc4af34929c54a02cfb78398da08dd)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/debug.h>
28 #include <sys/types.h>
29 #include <sys/varargs.h>
30 #include <sys/errno.h>
31 #include <sys/cred.h>
32 #include <sys/dditypes.h>
33 #include <sys/devops.h>
34 #include <sys/modctl.h>
35 #include <sys/poll.h>
36 #include <sys/conf.h>
37 #include <sys/ddi.h>
38 #include <sys/sunddi.h>
39 #include <sys/sunndi.h>
40 #include <sys/ndi_impldefs.h>
41 #include <sys/stat.h>
42 #include <sys/kmem.h>
43 #include <sys/vmem.h>
44 #include <sys/opl_olympus_regs.h>
45 #include <sys/cpuvar.h>
46 #include <sys/cpupart.h>
47 #include <sys/mem_config.h>
48 #include <sys/ddi_impldefs.h>
49 #include <sys/systm.h>
50 #include <sys/machsystm.h>
51 #include <sys/autoconf.h>
52 #include <sys/cmn_err.h>
53 #include <sys/sysmacros.h>
54 #include <sys/x_call.h>
55 #include <sys/promif.h>
56 #include <sys/prom_plat.h>
57 #include <sys/membar.h>
58 #include <vm/seg_kmem.h>
59 #include <sys/mem_cage.h>
60 #include <sys/stack.h>
61 #include <sys/archsystm.h>
62 #include <vm/hat_sfmmu.h>
63 #include <sys/pte.h>
64 #include <sys/mmu.h>
65 #include <sys/cpu_module.h>
66 #include <sys/obpdefs.h>
67 #include <sys/note.h>
68 #include <sys/ontrap.h>
69 #include <sys/cpu_sgnblk_defs.h>
70 #include <sys/opl.h>
71 #include <sys/cpu_impl.h>
72 
73 
74 #include <sys/promimpl.h>
75 #include <sys/prom_plat.h>
76 #include <sys/kobj.h>
77 
78 #include <sys/sysevent.h>
79 #include <sys/sysevent/dr.h>
80 #include <sys/sysevent/eventdefs.h>
81 
82 #include <sys/drmach.h>
83 #include <sys/dr_util.h>
84 
85 #include <sys/fcode.h>
86 #include <sys/opl_cfg.h>
87 
88 extern void		bcopy32_il(uint64_t, uint64_t);
89 extern void		flush_cache_il(void);
90 extern void		drmach_sleep_il(void);
91 
92 typedef struct {
93 	struct drmach_node	*node;
94 	void			*data;
95 } drmach_node_walk_args_t;
96 
97 typedef struct drmach_node {
98 	void		*here;
99 
100 	pnode_t		(*get_dnode)(struct drmach_node *node);
101 	int		(*walk)(struct drmach_node *node, void *data,
102 				int (*cb)(drmach_node_walk_args_t *args));
103 	dev_info_t	*(*n_getdip)(struct drmach_node *node);
104 	int		(*n_getproplen)(struct drmach_node *node, char *name,
105 				int *len);
106 	int		(*n_getprop)(struct drmach_node *node, char *name,
107 				void *buf, int len);
108 	int		(*get_parent)(struct drmach_node *node,
109 				struct drmach_node *pnode);
110 } drmach_node_t;
111 
112 typedef struct {
113 	int		 min_index;
114 	int		 max_index;
115 	int		 arr_sz;
116 	drmachid_t	*arr;
117 } drmach_array_t;
118 
119 typedef struct {
120 	void		*isa;
121 
122 	void		(*dispose)(drmachid_t);
123 	sbd_error_t	*(*release)(drmachid_t);
124 	sbd_error_t	*(*status)(drmachid_t, drmach_status_t *);
125 
126 	char		 name[MAXNAMELEN];
127 } drmach_common_t;
128 
129 typedef	struct {
130 	uint32_t	core_present;
131 	uint32_t	core_hotadded;
132 	uint32_t	core_started;
133 } drmach_cmp_t;
134 
135 typedef struct {
136 	drmach_common_t	 cm;
137 	int		 bnum;
138 	int		 assigned;
139 	int		 powered;
140 	int		 connected;
141 	int		 cond;
142 	drmach_node_t	*tree;
143 	drmach_array_t	*devices;
144 	int		boot_board;	/* if board exists on bootup */
145 	drmach_cmp_t	cores[OPL_MAX_COREID_PER_BOARD];
146 } drmach_board_t;
147 
148 typedef struct {
149 	drmach_common_t	 cm;
150 	drmach_board_t	*bp;
151 	int		 unum;
152 	int		portid;
153 	int		 busy;
154 	int		 powered;
155 	const char	*type;
156 	drmach_node_t	*node;
157 } drmach_device_t;
158 
159 typedef struct drmach_cpu {
160 	drmach_device_t  dev;
161 	processorid_t    cpuid;
162 	int		sb;
163 	int		chipid;
164 	int		coreid;
165 	int		strandid;
166 	int		status;
167 #define	OPL_CPU_HOTADDED	1
168 } drmach_cpu_t;
169 
170 typedef struct drmach_mem {
171 	drmach_device_t  dev;
172 	uint64_t	slice_base;
173 	uint64_t	slice_size;
174 	uint64_t	base_pa;	/* lowest installed memory base */
175 	uint64_t	nbytes;		/* size of installed memory */
176 	struct memlist *memlist;
177 } drmach_mem_t;
178 
179 typedef struct drmach_io {
180 	drmach_device_t  dev;
181 	int	channel;
182 	int	leaf;
183 } drmach_io_t;
184 
185 typedef struct drmach_domain_info {
186 	uint32_t	floating;
187 	int		allow_dr;
188 } drmach_domain_info_t;
189 
190 drmach_domain_info_t drmach_domain;
191 
192 typedef struct {
193 	int		 flags;
194 	drmach_device_t	*dp;
195 	sbd_error_t	*err;
196 	dev_info_t	*dip;
197 } drmach_config_args_t;
198 
199 typedef struct {
200 	drmach_board_t	*obj;
201 	int		 ndevs;
202 	void		*a;
203 	sbd_error_t	*(*found)(void *a, const char *, int, drmachid_t);
204 	sbd_error_t	*err;
205 } drmach_board_cb_data_t;
206 
207 static drmach_array_t	*drmach_boards;
208 
209 static sbd_error_t	*drmach_device_new(drmach_node_t *,
210 				drmach_board_t *, int, drmachid_t *);
211 static sbd_error_t	*drmach_cpu_new(drmach_device_t *, drmachid_t *);
212 static sbd_error_t	*drmach_mem_new(drmach_device_t *, drmachid_t *);
213 static sbd_error_t	*drmach_io_new(drmach_device_t *, drmachid_t *);
214 
215 static dev_info_t	*drmach_node_ddi_get_dip(drmach_node_t *np);
216 static int		 drmach_node_ddi_get_prop(drmach_node_t *np,
217 				char *name, void *buf, int len);
218 static int		 drmach_node_ddi_get_proplen(drmach_node_t *np,
219 				char *name, int *len);
220 
221 static int 		drmach_get_portid(drmach_node_t *);
222 static	sbd_error_t	*drmach_i_status(drmachid_t, drmach_status_t *);
223 static int		opl_check_dr_status();
224 static void		drmach_io_dispose(drmachid_t);
225 static sbd_error_t	*drmach_io_release(drmachid_t);
226 static sbd_error_t	*drmach_io_status(drmachid_t, drmach_status_t *);
227 static int 		drmach_init(void);
228 static void 		drmach_fini(void);
229 static void		drmach_swap_pa(drmach_mem_t *, drmach_mem_t *);
230 static drmach_board_t	*drmach_get_board_by_bnum(int);
231 
232 /* options for the second argument in drmach_add_remove_cpu() */
233 #define	HOTADD_CPU	1
234 #define	HOTREMOVE_CPU	2
235 
236 #define	ON_BOARD_CORE_NUM(x)	(((uint_t)(x) / OPL_MAX_STRANDID_PER_CORE) & \
237 	(OPL_MAX_COREID_PER_BOARD - 1))
238 
239 extern struct cpu	*SIGBCPU;
240 
241 static int		drmach_name2type_idx(char *);
242 static drmach_board_t	*drmach_board_new(int, int);
243 
244 #ifdef DEBUG
245 
246 #define	DRMACH_PR		if (drmach_debug) printf
247 int drmach_debug = 1;		 /* set to non-zero to enable debug messages */
248 #else
249 
250 #define	DRMACH_PR		_NOTE(CONSTANTCONDITION) if (0) printf
251 #endif /* DEBUG */
252 
253 
254 #define	DRMACH_OBJ(id)		((drmach_common_t *)id)
255 
256 #define	DRMACH_NULL_ID(id)	((id) == 0)
257 
258 #define	DRMACH_IS_BOARD_ID(id)	\
259 	((id != 0) &&		\
260 	(DRMACH_OBJ(id)->isa == (void *)drmach_board_new))
261 
262 #define	DRMACH_IS_CPU_ID(id)	\
263 	((id != 0) &&		\
264 	(DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new))
265 
266 #define	DRMACH_IS_MEM_ID(id)	\
267 	((id != 0) &&		\
268 	(DRMACH_OBJ(id)->isa == (void *)drmach_mem_new))
269 
270 #define	DRMACH_IS_IO_ID(id)	\
271 	((id != 0) &&		\
272 	(DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
273 
274 #define	DRMACH_IS_DEVICE_ID(id)					\
275 	((id != 0) &&						\
276 	(DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new ||	\
277 	    DRMACH_OBJ(id)->isa == (void *)drmach_mem_new ||	\
278 	    DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
279 
280 #define	DRMACH_IS_ID(id)					\
281 	((id != 0) &&						\
282 	(DRMACH_OBJ(id)->isa == (void *)drmach_board_new ||	\
283 	    DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new ||	\
284 	    DRMACH_OBJ(id)->isa == (void *)drmach_mem_new ||	\
285 	    DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
286 
287 #define	DRMACH_INTERNAL_ERROR() \
288 	drerr_new(1, EOPL_INTERNAL, drmach_ie_fmt, __LINE__)
289 
290 static char		*drmach_ie_fmt = "drmach.c %d";
291 
292 static struct {
293 	const char	*name;
294 	const char	*type;
295 	sbd_error_t	*(*new)(drmach_device_t *, drmachid_t *);
296 } drmach_name2type[] = {
297 	{ "cpu",	DRMACH_DEVTYPE_CPU,		drmach_cpu_new },
298 	{ "pseudo-mc",	DRMACH_DEVTYPE_MEM,		drmach_mem_new },
299 	{ "pci",	DRMACH_DEVTYPE_PCI,		drmach_io_new  },
300 };
301 
302 /* utility */
303 #define	MBYTE	(1048576ull)
304 
305 /*
306  * drmach autoconfiguration data structures and interfaces
307  */
308 
309 extern struct mod_ops mod_miscops;
310 
311 static struct modlmisc modlmisc = {
312 	&mod_miscops,
313 	"OPL DR 1.1"
314 };
315 
316 static struct modlinkage modlinkage = {
317 	MODREV_1,
318 	(void *)&modlmisc,
319 	NULL
320 };
321 
322 static krwlock_t drmach_boards_rwlock;
323 
324 typedef const char	*fn_t;
325 
326 int
327 _init(void)
328 {
329 	int err;
330 
331 	if ((err = drmach_init()) != 0) {
332 		return (err);
333 	}
334 
335 	if ((err = mod_install(&modlinkage)) != 0) {
336 		drmach_fini();
337 	}
338 
339 	return (err);
340 }
341 
342 int
343 _fini(void)
344 {
345 	int	err;
346 
347 	if ((err = mod_remove(&modlinkage)) == 0)
348 		drmach_fini();
349 
350 	return (err);
351 }
352 
353 int
354 _info(struct modinfo *modinfop)
355 {
356 	return (mod_info(&modlinkage, modinfop));
357 }
358 
359 struct drmach_mc_lookup {
360 	int	bnum;
361 	drmach_board_t	*bp;
362 	dev_info_t *dip;	/* rv - set if found */
363 };
364 
365 #define	_ptob64(p) ((uint64_t)(p) << PAGESHIFT)
366 #define	_b64top(b) ((pgcnt_t)((b) >> PAGESHIFT))
367 
368 static int
369 drmach_setup_mc_info(dev_info_t *dip, drmach_mem_t *mp)
370 {
371 	uint64_t	memory_ranges[128];
372 	int len;
373 	struct memlist	*ml;
374 	int rv;
375 	hwd_sb_t *hwd;
376 	hwd_memory_t *pm;
377 
378 	len = sizeof (memory_ranges);
379 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
380 	    "sb-mem-ranges", (caddr_t)&memory_ranges[0], &len) !=
381 	    DDI_PROP_SUCCESS) {
382 		mp->slice_base = 0;
383 		mp->slice_size = 0;
384 		return (-1);
385 	}
386 	mp->slice_base = memory_ranges[0];
387 	mp->slice_size = memory_ranges[1];
388 
389 	if (!mp->dev.bp->boot_board) {
390 		int i;
391 
392 		rv = opl_read_hwd(mp->dev.bp->bnum, NULL,  NULL, NULL, &hwd);
393 
394 		if (rv != 0) {
395 			return (-1);
396 		}
397 
398 		ml = NULL;
399 		pm = &hwd->sb_cmu.cmu_memory;
400 		for (i = 0; i < HWD_MAX_MEM_CHUNKS; i++) {
401 			if (pm->mem_chunks[i].chnk_size > 0) {
402 				ml = memlist_add_span(ml,
403 				    pm->mem_chunks[i].chnk_start_address,
404 				    pm->mem_chunks[i].chnk_size);
405 			}
406 		}
407 	} else {
408 		/*
409 		 * we intersect phys_install to get base_pa.
410 		 * This only works at bootup time.
411 		 */
412 
413 		memlist_read_lock();
414 		ml = memlist_dup(phys_install);
415 		memlist_read_unlock();
416 
417 		ml = memlist_del_span(ml, 0ull, mp->slice_base);
418 		if (ml) {
419 			uint64_t basepa, endpa;
420 			endpa = _ptob64(physmax + 1);
421 
422 			basepa = mp->slice_base + mp->slice_size;
423 
424 			ml = memlist_del_span(ml, basepa, endpa - basepa);
425 		}
426 	}
427 
428 	if (ml) {
429 		uint64_t nbytes = 0;
430 		struct memlist *p;
431 		for (p = ml; p; p = p->ml_next) {
432 			nbytes += p->ml_size;
433 		}
434 		if ((mp->nbytes = nbytes) > 0)
435 			mp->base_pa = ml->ml_address;
436 		else
437 			mp->base_pa = 0;
438 		mp->memlist = ml;
439 	} else {
440 		mp->base_pa = 0;
441 		mp->nbytes = 0;
442 	}
443 	return (0);
444 }
445 
446 
447 struct drmach_hotcpu {
448 	drmach_board_t *bp;
449 	int	bnum;
450 	int	core_id;
451 	int 	rv;
452 	int	option;
453 };
454 
455 static int
456 drmach_cpu_cb(dev_info_t *dip, void *arg)
457 {
458 	struct drmach_hotcpu *p = (struct drmach_hotcpu *)arg;
459 	char name[OBP_MAXDRVNAME];
460 	int len = OBP_MAXDRVNAME;
461 	int bnum, core_id, strand_id;
462 	drmach_board_t *bp;
463 
464 	if (dip == ddi_root_node()) {
465 		return (DDI_WALK_CONTINUE);
466 	}
467 
468 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
469 	    DDI_PROP_DONTPASS, "name",
470 	    (caddr_t)name, &len) != DDI_PROP_SUCCESS) {
471 		return (DDI_WALK_PRUNECHILD);
472 	}
473 
474 	/* only cmp has board number */
475 	bnum = -1;
476 	len = sizeof (bnum);
477 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
478 	    DDI_PROP_DONTPASS, OBP_BOARDNUM,
479 	    (caddr_t)&bnum, &len) != DDI_PROP_SUCCESS) {
480 		bnum = -1;
481 	}
482 
483 	if (strcmp(name, "cmp") == 0) {
484 		if (bnum != p->bnum)
485 			return (DDI_WALK_PRUNECHILD);
486 		return (DDI_WALK_CONTINUE);
487 	}
488 	/* we have already pruned all unwanted cores and cpu's above */
489 	if (strcmp(name, "core") == 0) {
490 		return (DDI_WALK_CONTINUE);
491 	}
492 	if (strcmp(name, "cpu") == 0) {
493 		processorid_t cpuid;
494 		len = sizeof (cpuid);
495 		if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
496 		    DDI_PROP_DONTPASS, "cpuid",
497 		    (caddr_t)&cpuid, &len) != DDI_PROP_SUCCESS) {
498 			p->rv = -1;
499 			return (DDI_WALK_TERMINATE);
500 		}
501 
502 		core_id = p->core_id;
503 
504 		bnum = LSB_ID(cpuid);
505 
506 		if (ON_BOARD_CORE_NUM(cpuid) != core_id)
507 			return (DDI_WALK_CONTINUE);
508 
509 		bp = p->bp;
510 		ASSERT(bnum == bp->bnum);
511 
512 		if (p->option == HOTADD_CPU) {
513 			if (prom_hotaddcpu(cpuid) != 0) {
514 				p->rv = -1;
515 				return (DDI_WALK_TERMINATE);
516 			}
517 			strand_id = STRAND_ID(cpuid);
518 			bp->cores[core_id].core_hotadded |= (1 << strand_id);
519 		} else if (p->option == HOTREMOVE_CPU) {
520 			if (prom_hotremovecpu(cpuid) != 0) {
521 				p->rv = -1;
522 				return (DDI_WALK_TERMINATE);
523 			}
524 			strand_id = STRAND_ID(cpuid);
525 			bp->cores[core_id].core_hotadded &= ~(1 << strand_id);
526 		}
527 		return (DDI_WALK_CONTINUE);
528 	}
529 
530 	return (DDI_WALK_PRUNECHILD);
531 }
532 
533 
534 static int
535 drmach_add_remove_cpu(int bnum, int core_id, int option)
536 {
537 	struct drmach_hotcpu arg;
538 	drmach_board_t *bp;
539 
540 	bp = drmach_get_board_by_bnum(bnum);
541 	ASSERT(bp);
542 
543 	arg.bp = bp;
544 	arg.bnum = bnum;
545 	arg.core_id = core_id;
546 	arg.rv = 0;
547 	arg.option = option;
548 	ddi_walk_devs(ddi_root_node(), drmach_cpu_cb, (void *)&arg);
549 	return (arg.rv);
550 }
551 
552 struct drmach_setup_core_arg {
553 	drmach_board_t *bp;
554 };
555 
556 static int
557 drmach_setup_core_cb(dev_info_t *dip, void *arg)
558 {
559 	struct drmach_setup_core_arg *p = (struct drmach_setup_core_arg *)arg;
560 	char name[OBP_MAXDRVNAME];
561 	int len = OBP_MAXDRVNAME;
562 	int bnum;
563 	int core_id, strand_id;
564 
565 	if (dip == ddi_root_node()) {
566 		return (DDI_WALK_CONTINUE);
567 	}
568 
569 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
570 	    DDI_PROP_DONTPASS, "name",
571 	    (caddr_t)name, &len) != DDI_PROP_SUCCESS) {
572 		return (DDI_WALK_PRUNECHILD);
573 	}
574 
575 	/* only cmp has board number */
576 	bnum = -1;
577 	len = sizeof (bnum);
578 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
579 	    DDI_PROP_DONTPASS, OBP_BOARDNUM,
580 	    (caddr_t)&bnum, &len) != DDI_PROP_SUCCESS) {
581 		bnum = -1;
582 	}
583 
584 	if (strcmp(name, "cmp") == 0) {
585 		if (bnum != p->bp->bnum)
586 			return (DDI_WALK_PRUNECHILD);
587 		return (DDI_WALK_CONTINUE);
588 	}
589 	/* we have already pruned all unwanted cores and cpu's above */
590 	if (strcmp(name, "core") == 0) {
591 		return (DDI_WALK_CONTINUE);
592 	}
593 	if (strcmp(name, "cpu") == 0) {
594 		processorid_t cpuid;
595 		len = sizeof (cpuid);
596 		if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
597 		    DDI_PROP_DONTPASS, "cpuid",
598 		    (caddr_t)&cpuid, &len) != DDI_PROP_SUCCESS) {
599 			return (DDI_WALK_TERMINATE);
600 		}
601 		bnum = LSB_ID(cpuid);
602 		ASSERT(bnum == p->bp->bnum);
603 		core_id = ON_BOARD_CORE_NUM(cpuid);
604 		strand_id = STRAND_ID(cpuid);
605 		p->bp->cores[core_id].core_present |= (1 << strand_id);
606 		return (DDI_WALK_CONTINUE);
607 	}
608 
609 	return (DDI_WALK_PRUNECHILD);
610 }
611 
612 
613 static void
614 drmach_setup_core_info(drmach_board_t *obj)
615 {
616 	struct drmach_setup_core_arg arg;
617 	int i;
618 
619 	for (i = 0; i < OPL_MAX_COREID_PER_BOARD; i++) {
620 		obj->cores[i].core_present = 0;
621 		obj->cores[i].core_hotadded = 0;
622 		obj->cores[i].core_started = 0;
623 	}
624 	arg.bp = obj;
625 	ddi_walk_devs(ddi_root_node(), drmach_setup_core_cb, (void *)&arg);
626 
627 	for (i = 0; i < OPL_MAX_COREID_PER_BOARD; i++) {
628 		if (obj->boot_board) {
629 			obj->cores[i].core_hotadded =
630 			    obj->cores[i].core_started =
631 			    obj->cores[i].core_present;
632 		}
633 	}
634 }
635 
636 /*
637  * drmach_node_* routines serve the purpose of separating the
638  * rest of the code from the device tree and OBP.  This is necessary
639  * because of In-Kernel-Probing.  Devices probed after stod, are probed
640  * by the in-kernel-prober, not OBP.  These devices, therefore, do not
641  * have dnode ids.
642  */
643 
644 typedef struct {
645 	drmach_node_walk_args_t	*nwargs;
646 	int 			(*cb)(drmach_node_walk_args_t *args);
647 	int			err;
648 } drmach_node_ddi_walk_args_t;
649 
650 static int
651 drmach_node_ddi_walk_cb(dev_info_t *dip, void *arg)
652 {
653 	drmach_node_ddi_walk_args_t	*nargs;
654 
655 	nargs = (drmach_node_ddi_walk_args_t *)arg;
656 
657 	/*
658 	 * dip doesn't have to be held here as we are called
659 	 * from ddi_walk_devs() which holds the dip.
660 	 */
661 	nargs->nwargs->node->here = (void *)dip;
662 
663 	nargs->err = nargs->cb(nargs->nwargs);
664 
665 
666 	/*
667 	 * Set "here" to NULL so that unheld dip is not accessible
668 	 * outside ddi_walk_devs()
669 	 */
670 	nargs->nwargs->node->here = NULL;
671 
672 	if (nargs->err)
673 		return (DDI_WALK_TERMINATE);
674 	else
675 		return (DDI_WALK_CONTINUE);
676 }
677 
678 static int
679 drmach_node_ddi_walk(drmach_node_t *np, void *data,
680 		int (*cb)(drmach_node_walk_args_t *args))
681 {
682 	drmach_node_walk_args_t		args;
683 	drmach_node_ddi_walk_args_t	nargs;
684 
685 
686 	/* initialized args structure for callback */
687 	args.node = np;
688 	args.data = data;
689 
690 	nargs.nwargs = &args;
691 	nargs.cb = cb;
692 	nargs.err = 0;
693 
694 	/*
695 	 * Root node doesn't have to be held in any way.
696 	 */
697 	ddi_walk_devs(ddi_root_node(), drmach_node_ddi_walk_cb, (void *)&nargs);
698 
699 	return (nargs.err);
700 }
701 
702 static int
703 drmach_node_ddi_get_parent(drmach_node_t *np, drmach_node_t *pp)
704 {
705 	dev_info_t	*ndip;
706 	static char	*fn = "drmach_node_ddi_get_parent";
707 
708 	ndip = np->n_getdip(np);
709 	if (ndip == NULL) {
710 		cmn_err(CE_WARN, "%s: NULL dip", fn);
711 		return (-1);
712 	}
713 
714 	bcopy(np, pp, sizeof (drmach_node_t));
715 
716 	pp->here = (void *)ddi_get_parent(ndip);
717 	if (pp->here == NULL) {
718 		cmn_err(CE_WARN, "%s: NULL parent dip", fn);
719 		return (-1);
720 	}
721 
722 	return (0);
723 }
724 
725 /*ARGSUSED*/
726 static pnode_t
727 drmach_node_ddi_get_dnode(drmach_node_t *np)
728 {
729 	return ((pnode_t)NULL);
730 }
731 
732 static drmach_node_t *
733 drmach_node_new(void)
734 {
735 	drmach_node_t *np;
736 
737 	np = kmem_zalloc(sizeof (drmach_node_t), KM_SLEEP);
738 
739 	np->get_dnode = drmach_node_ddi_get_dnode;
740 	np->walk = drmach_node_ddi_walk;
741 	np->n_getdip = drmach_node_ddi_get_dip;
742 	np->n_getproplen = drmach_node_ddi_get_proplen;
743 	np->n_getprop = drmach_node_ddi_get_prop;
744 	np->get_parent = drmach_node_ddi_get_parent;
745 
746 	return (np);
747 }
748 
749 static void
750 drmach_node_dispose(drmach_node_t *np)
751 {
752 	kmem_free(np, sizeof (*np));
753 }
754 
755 static dev_info_t *
756 drmach_node_ddi_get_dip(drmach_node_t *np)
757 {
758 	return ((dev_info_t *)np->here);
759 }
760 
761 static int
762 drmach_node_walk(drmach_node_t *np, void *param,
763 		int (*cb)(drmach_node_walk_args_t *args))
764 {
765 	return (np->walk(np, param, cb));
766 }
767 
768 static int
769 drmach_node_ddi_get_prop(drmach_node_t *np, char *name, void *buf, int len)
770 {
771 	int		rv = 0;
772 	dev_info_t	*ndip;
773 	static char	*fn = "drmach_node_ddi_get_prop";
774 
775 
776 	ndip = np->n_getdip(np);
777 	if (ndip == NULL) {
778 		cmn_err(CE_WARN, "%s: NULL dip", fn);
779 		rv = -1;
780 	} else if (ddi_getlongprop_buf(DDI_DEV_T_ANY, ndip,
781 	    DDI_PROP_DONTPASS, name,
782 	    (caddr_t)buf, &len) != DDI_PROP_SUCCESS) {
783 		rv = -1;
784 	}
785 
786 	return (rv);
787 }
788 
789 static int
790 drmach_node_ddi_get_proplen(drmach_node_t *np, char *name, int *len)
791 {
792 	int		rv = 0;
793 	dev_info_t	*ndip;
794 
795 	ndip = np->n_getdip(np);
796 	if (ndip == NULL) {
797 		rv = -1;
798 	} else if (ddi_getproplen(DDI_DEV_T_ANY, ndip, DDI_PROP_DONTPASS, name,
799 	    len) != DDI_PROP_SUCCESS) {
800 		rv = -1;
801 	}
802 
803 	return (rv);
804 }
805 
806 static drmachid_t
807 drmach_node_dup(drmach_node_t *np)
808 {
809 	drmach_node_t *dup;
810 
811 	dup = drmach_node_new();
812 	dup->here = np->here;
813 	dup->get_dnode = np->get_dnode;
814 	dup->walk = np->walk;
815 	dup->n_getdip = np->n_getdip;
816 	dup->n_getproplen = np->n_getproplen;
817 	dup->n_getprop = np->n_getprop;
818 	dup->get_parent = np->get_parent;
819 
820 	return (dup);
821 }
822 
823 /*
824  * drmach_array provides convenient array construction, access,
825  * bounds checking and array destruction logic.
826  */
827 
828 static drmach_array_t *
829 drmach_array_new(int min_index, int max_index)
830 {
831 	drmach_array_t *arr;
832 
833 	arr = kmem_zalloc(sizeof (drmach_array_t), KM_SLEEP);
834 
835 	arr->arr_sz = (max_index - min_index + 1) * sizeof (void *);
836 	if (arr->arr_sz > 0) {
837 		arr->min_index = min_index;
838 		arr->max_index = max_index;
839 
840 		arr->arr = kmem_zalloc(arr->arr_sz, KM_SLEEP);
841 		return (arr);
842 	} else {
843 		kmem_free(arr, sizeof (*arr));
844 		return (0);
845 	}
846 }
847 
848 static int
849 drmach_array_set(drmach_array_t *arr, int idx, drmachid_t val)
850 {
851 	if (idx < arr->min_index || idx > arr->max_index)
852 		return (-1);
853 	else {
854 		arr->arr[idx - arr->min_index] = val;
855 		return (0);
856 	}
857 	/*NOTREACHED*/
858 }
859 
860 static int
861 drmach_array_get(drmach_array_t *arr, int idx, drmachid_t *val)
862 {
863 	if (idx < arr->min_index || idx > arr->max_index)
864 		return (-1);
865 	else {
866 		*val = arr->arr[idx - arr->min_index];
867 		return (0);
868 	}
869 	/*NOTREACHED*/
870 }
871 
872 static int
873 drmach_array_first(drmach_array_t *arr, int *idx, drmachid_t *val)
874 {
875 	int rv;
876 
877 	*idx = arr->min_index;
878 	while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
879 		*idx += 1;
880 
881 	return (rv);
882 }
883 
884 static int
885 drmach_array_next(drmach_array_t *arr, int *idx, drmachid_t *val)
886 {
887 	int rv;
888 
889 	*idx += 1;
890 	while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
891 		*idx += 1;
892 
893 	return (rv);
894 }
895 
896 static void
897 drmach_array_dispose(drmach_array_t *arr, void (*disposer)(drmachid_t))
898 {
899 	drmachid_t	val;
900 	int		idx;
901 	int		rv;
902 
903 	rv = drmach_array_first(arr, &idx, &val);
904 	while (rv == 0) {
905 		(*disposer)(val);
906 		rv = drmach_array_next(arr, &idx, &val);
907 	}
908 
909 	kmem_free(arr->arr, arr->arr_sz);
910 	kmem_free(arr, sizeof (*arr));
911 }
912 
913 static drmach_board_t *
914 drmach_get_board_by_bnum(int bnum)
915 {
916 	drmachid_t id;
917 
918 	if (drmach_array_get(drmach_boards, bnum, &id) == 0)
919 		return ((drmach_board_t *)id);
920 	else
921 		return (NULL);
922 }
923 
924 static pnode_t
925 drmach_node_get_dnode(drmach_node_t *np)
926 {
927 	return (np->get_dnode(np));
928 }
929 
930 /*ARGSUSED*/
931 sbd_error_t *
932 drmach_configure(drmachid_t id, int flags)
933 {
934 	drmach_device_t		*dp;
935 	sbd_error_t		*err = NULL;
936 	dev_info_t		*rdip;
937 	dev_info_t		*fdip = NULL;
938 
939 	if (DRMACH_IS_CPU_ID(id)) {
940 		return (NULL);
941 	}
942 	if (!DRMACH_IS_DEVICE_ID(id))
943 		return (drerr_new(0, EOPL_INAPPROP, NULL));
944 	dp = id;
945 	rdip = dp->node->n_getdip(dp->node);
946 
947 	ASSERT(rdip);
948 
949 	ASSERT(e_ddi_branch_held(rdip));
950 
951 	if (e_ddi_branch_configure(rdip, &fdip, 0) != 0) {
952 		char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
953 		dev_info_t *dip = (fdip != NULL) ? fdip : rdip;
954 
955 		(void) ddi_pathname(dip, path);
956 		err = drerr_new(1,  EOPL_DRVFAIL, path);
957 
958 		kmem_free(path, MAXPATHLEN);
959 
960 		/* If non-NULL, fdip is returned held and must be released */
961 		if (fdip != NULL)
962 			ddi_release_devi(fdip);
963 	}
964 
965 	return (err);
966 }
967 
968 
969 static sbd_error_t *
970 drmach_device_new(drmach_node_t *node,
971 	drmach_board_t *bp, int portid, drmachid_t *idp)
972 {
973 	int		 i;
974 	int		 rv;
975 	drmach_device_t	proto;
976 	sbd_error_t	*err;
977 	char		 name[OBP_MAXDRVNAME];
978 
979 	rv = node->n_getprop(node, "name", name, OBP_MAXDRVNAME);
980 	if (rv) {
981 		/* every node is expected to have a name */
982 		err = drerr_new(1, EOPL_GETPROP, "device node %s: property %s",
983 		    ddi_node_name(node->n_getdip(node)), "name");
984 		return (err);
985 	}
986 
987 	/*
988 	 * The node currently being examined is not listed in the name2type[]
989 	 * array.  In this case, the node is no interest to drmach.  Both
990 	 * dp and err are initialized here to yield nothing (no device or
991 	 * error structure) for this case.
992 	 */
993 	i = drmach_name2type_idx(name);
994 
995 
996 	if (i < 0) {
997 		*idp = (drmachid_t)0;
998 		return (NULL);
999 	}
1000 
1001 	/* device specific new function will set unum */
1002 
1003 	bzero(&proto, sizeof (proto));
1004 	proto.type = drmach_name2type[i].type;
1005 	proto.bp = bp;
1006 	proto.node = node;
1007 	proto.portid = portid;
1008 
1009 	return (drmach_name2type[i].new(&proto, idp));
1010 }
1011 
1012 static void
1013 drmach_device_dispose(drmachid_t id)
1014 {
1015 	drmach_device_t *self = id;
1016 
1017 	self->cm.dispose(id);
1018 }
1019 
1020 
1021 static drmach_board_t *
1022 drmach_board_new(int bnum, int boot_board)
1023 {
1024 	static sbd_error_t *drmach_board_release(drmachid_t);
1025 	static sbd_error_t *drmach_board_status(drmachid_t, drmach_status_t *);
1026 
1027 	drmach_board_t	*bp;
1028 
1029 	bp = kmem_zalloc(sizeof (drmach_board_t), KM_SLEEP);
1030 
1031 	bp->cm.isa = (void *)drmach_board_new;
1032 	bp->cm.release = drmach_board_release;
1033 	bp->cm.status = drmach_board_status;
1034 
1035 	(void) drmach_board_name(bnum, bp->cm.name, sizeof (bp->cm.name));
1036 
1037 	bp->bnum = bnum;
1038 	bp->devices = NULL;
1039 	bp->connected = boot_board;
1040 	bp->tree = drmach_node_new();
1041 	bp->assigned = boot_board;
1042 	bp->powered = boot_board;
1043 	bp->boot_board = boot_board;
1044 
1045 	/*
1046 	 * If this is not bootup initialization, we have to wait till
1047 	 * IKP sets up the device nodes in drmach_board_connect().
1048 	 */
1049 	if (boot_board)
1050 		drmach_setup_core_info(bp);
1051 
1052 	(void) drmach_array_set(drmach_boards, bnum, bp);
1053 	return (bp);
1054 }
1055 
1056 static void
1057 drmach_board_dispose(drmachid_t id)
1058 {
1059 	drmach_board_t *bp;
1060 
1061 	ASSERT(DRMACH_IS_BOARD_ID(id));
1062 	bp = id;
1063 
1064 	if (bp->tree)
1065 		drmach_node_dispose(bp->tree);
1066 
1067 	if (bp->devices)
1068 		drmach_array_dispose(bp->devices, drmach_device_dispose);
1069 
1070 	kmem_free(bp, sizeof (*bp));
1071 }
1072 
1073 static sbd_error_t *
1074 drmach_board_status(drmachid_t id, drmach_status_t *stat)
1075 {
1076 	sbd_error_t	*err = NULL;
1077 	drmach_board_t	*bp;
1078 
1079 	if (!DRMACH_IS_BOARD_ID(id))
1080 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1081 	bp = id;
1082 
1083 	stat->assigned = bp->assigned;
1084 	stat->powered = bp->powered;
1085 	stat->busy = 0;			/* assume not busy */
1086 	stat->configured = 0;		/* assume not configured */
1087 	stat->empty = 0;
1088 	stat->cond = bp->cond = SBD_COND_OK;
1089 	(void) strncpy(stat->type, "System Brd", sizeof (stat->type));
1090 	stat->info[0] = '\0';
1091 
1092 	if (bp->devices) {
1093 		int		 rv;
1094 		int		 d_idx;
1095 		drmachid_t	 d_id;
1096 
1097 		rv = drmach_array_first(bp->devices, &d_idx, &d_id);
1098 		while (rv == 0) {
1099 			drmach_status_t	d_stat;
1100 
1101 			err = drmach_i_status(d_id, &d_stat);
1102 			if (err)
1103 				break;
1104 
1105 			stat->busy |= d_stat.busy;
1106 			stat->configured |= d_stat.configured;
1107 
1108 			rv = drmach_array_next(bp->devices, &d_idx, &d_id);
1109 		}
1110 	}
1111 
1112 	return (err);
1113 }
1114 
1115 int
1116 drmach_board_is_floating(drmachid_t id)
1117 {
1118 	drmach_board_t *bp;
1119 
1120 	if (!DRMACH_IS_BOARD_ID(id))
1121 		return (0);
1122 
1123 	bp = (drmach_board_t *)id;
1124 
1125 	return ((drmach_domain.floating & (1 << bp->bnum)) ? 1 : 0);
1126 }
1127 
1128 static int
1129 drmach_init(void)
1130 {
1131 	dev_info_t	*rdip;
1132 	int		i, rv, len;
1133 	int		*floating;
1134 
1135 	rw_init(&drmach_boards_rwlock, NULL, RW_DEFAULT, NULL);
1136 
1137 	drmach_boards = drmach_array_new(0, MAX_BOARDS - 1);
1138 
1139 	rdip = ddi_root_node();
1140 
1141 	if (ddi_getproplen(DDI_DEV_T_ANY, rdip, DDI_PROP_DONTPASS,
1142 	    "floating-boards", &len) != DDI_PROP_SUCCESS) {
1143 		cmn_err(CE_WARN, "Cannot get floating-boards proplen\n");
1144 	} else {
1145 		floating = (int *)kmem_alloc(len, KM_SLEEP);
1146 		rv = ddi_prop_op(DDI_DEV_T_ANY, rdip, PROP_LEN_AND_VAL_BUF,
1147 		    DDI_PROP_DONTPASS, "floating-boards", (caddr_t)floating,
1148 		    &len);
1149 		if (rv != DDI_PROP_SUCCESS) {
1150 			cmn_err(CE_WARN, "Cannot get floating-boards prop\n");
1151 		} else {
1152 			drmach_domain.floating = 0;
1153 			for (i = 0; i < len / sizeof (int); i++) {
1154 				drmach_domain.floating |= (1 << floating[i]);
1155 			}
1156 		}
1157 		kmem_free(floating, len);
1158 	}
1159 	drmach_domain.allow_dr = opl_check_dr_status();
1160 
1161 	rdip = ddi_get_child(ddi_root_node());
1162 	do {
1163 		int		 bnum;
1164 		drmachid_t	 id;
1165 
1166 		bnum = -1;
1167 		bnum = ddi_getprop(DDI_DEV_T_ANY, rdip, DDI_PROP_DONTPASS,
1168 		    OBP_BOARDNUM, -1);
1169 		if (bnum == -1)
1170 			continue;
1171 
1172 		if (drmach_array_get(drmach_boards, bnum, &id) == -1) {
1173 			cmn_err(CE_WARN, "Device node 0x%p has invalid "
1174 			    "property value, %s=%d", (void *)rdip,
1175 			    OBP_BOARDNUM, bnum);
1176 			goto error;
1177 		} else if (id == NULL) {
1178 			(void) drmach_board_new(bnum, 1);
1179 		}
1180 	} while ((rdip = ddi_get_next_sibling(rdip)) != NULL);
1181 
1182 	opl_hold_devtree();
1183 
1184 	/*
1185 	 * Initialize the IKP feature.
1186 	 *
1187 	 * This can be done only after DR has acquired a hold on all the
1188 	 * device nodes that are interesting to IKP.
1189 	 */
1190 	if (opl_init_cfg() != 0) {
1191 		cmn_err(CE_WARN, "DR - IKP initialization failed");
1192 
1193 		opl_release_devtree();
1194 
1195 		goto error;
1196 	}
1197 
1198 	return (0);
1199 error:
1200 	drmach_array_dispose(drmach_boards, drmach_board_dispose);
1201 	rw_destroy(&drmach_boards_rwlock);
1202 	return (ENXIO);
1203 }
1204 
1205 static void
1206 drmach_fini(void)
1207 {
1208 	rw_enter(&drmach_boards_rwlock, RW_WRITER);
1209 	drmach_array_dispose(drmach_boards, drmach_board_dispose);
1210 	drmach_boards = NULL;
1211 	rw_exit(&drmach_boards_rwlock);
1212 
1213 	/*
1214 	 * Walk immediate children of the root devinfo node
1215 	 * releasing holds acquired on branches in drmach_init()
1216 	 */
1217 
1218 	opl_release_devtree();
1219 
1220 	rw_destroy(&drmach_boards_rwlock);
1221 }
1222 
1223 /*
1224  *	Each system board contains 2 Oberon PCI bridge and
1225  *	1 CMUCH.
1226  *	Each oberon has 2 channels.
1227  *	Each channel has 2 pci-ex leaf.
1228  *	Each CMUCH has 1 pci bus.
1229  *
1230  *
1231  *	Device Path:
1232  *	/pci@<portid>,reg
1233  *
1234  *	where
1235  *	portid[10] = 0
1236  *	portid[9:0] = LLEAF_ID[9:0] of the Oberon Channel
1237  *
1238  *	LLEAF_ID[9:8] = 0
1239  *	LLEAF_ID[8:4] = LSB_ID[4:0]
1240  *	LLEAF_ID[3:1] = IO Channel#[2:0] (0,1,2,3 for Oberon)
1241  *			channel 4 is pcicmu
1242  *	LLEAF_ID[0] = PCI Leaf Number (0 for leaf-A, 1 for leaf-B)
1243  *
1244  *	Properties:
1245  *	name = pci
1246  *	device_type = "pciex"
1247  *	board# = LSBID
1248  *	reg = int32 * 2, Oberon CSR space of the leaf and the UBC space
1249  *	portid = Jupiter Bus Device ID ((LSB_ID << 3)|pciport#)
1250  */
1251 
1252 static sbd_error_t *
1253 drmach_io_new(drmach_device_t *proto, drmachid_t *idp)
1254 {
1255 	drmach_io_t	*ip;
1256 
1257 	int		 portid;
1258 
1259 	portid = proto->portid;
1260 	ASSERT(portid != -1);
1261 	proto->unum = portid & (MAX_IO_UNITS_PER_BOARD - 1);
1262 
1263 	ip = kmem_zalloc(sizeof (drmach_io_t), KM_SLEEP);
1264 	bcopy(proto, &ip->dev, sizeof (ip->dev));
1265 	ip->dev.node = drmach_node_dup(proto->node);
1266 	ip->dev.cm.isa = (void *)drmach_io_new;
1267 	ip->dev.cm.dispose = drmach_io_dispose;
1268 	ip->dev.cm.release = drmach_io_release;
1269 	ip->dev.cm.status = drmach_io_status;
1270 	ip->channel = (portid >> 1) & 0x7;
1271 	ip->leaf = (portid & 0x1);
1272 
1273 	(void) snprintf(ip->dev.cm.name, sizeof (ip->dev.cm.name), "%s%d",
1274 	    ip->dev.type, ip->dev.unum);
1275 
1276 	*idp = (drmachid_t)ip;
1277 	return (NULL);
1278 }
1279 
1280 
1281 static void
1282 drmach_io_dispose(drmachid_t id)
1283 {
1284 	drmach_io_t *self;
1285 
1286 	ASSERT(DRMACH_IS_IO_ID(id));
1287 
1288 	self = id;
1289 	if (self->dev.node)
1290 		drmach_node_dispose(self->dev.node);
1291 
1292 	kmem_free(self, sizeof (*self));
1293 }
1294 
1295 /*ARGSUSED*/
1296 sbd_error_t *
1297 drmach_pre_op(int cmd, drmachid_t id, drmach_opts_t *opts)
1298 {
1299 	drmach_board_t	*bp = (drmach_board_t *)id;
1300 	sbd_error_t	*err = NULL;
1301 
1302 	/* allow status and ncm operations to always succeed */
1303 	if ((cmd == SBD_CMD_STATUS) || (cmd == SBD_CMD_GETNCM)) {
1304 		return (NULL);
1305 	}
1306 
1307 	/* check all other commands for the required option string */
1308 
1309 	if ((opts->size > 0) && (opts->copts != NULL)) {
1310 
1311 		DRMACH_PR("platform options: %s\n", opts->copts);
1312 
1313 		if (strstr(opts->copts, "opldr") == NULL) {
1314 			err = drerr_new(1, EOPL_SUPPORT, NULL);
1315 		}
1316 	} else {
1317 		err = drerr_new(1, EOPL_SUPPORT, NULL);
1318 	}
1319 
1320 	if (!err && id && DRMACH_IS_BOARD_ID(id)) {
1321 		switch (cmd) {
1322 			case SBD_CMD_TEST:
1323 			case SBD_CMD_STATUS:
1324 			case SBD_CMD_GETNCM:
1325 				break;
1326 			case SBD_CMD_CONNECT:
1327 				if (bp->connected)
1328 					err = drerr_new(0, ESBD_STATE, NULL);
1329 				else if (!drmach_domain.allow_dr)
1330 					err = drerr_new(1, EOPL_SUPPORT, NULL);
1331 				break;
1332 			case SBD_CMD_DISCONNECT:
1333 				if (!bp->connected)
1334 					err = drerr_new(0, ESBD_STATE, NULL);
1335 				else if (!drmach_domain.allow_dr)
1336 					err = drerr_new(1, EOPL_SUPPORT, NULL);
1337 				break;
1338 			default:
1339 				if (!drmach_domain.allow_dr)
1340 					err = drerr_new(1, EOPL_SUPPORT, NULL);
1341 				break;
1342 
1343 		}
1344 	}
1345 
1346 	return (err);
1347 }
1348 
1349 /*ARGSUSED*/
1350 sbd_error_t *
1351 drmach_post_op(int cmd, drmachid_t id, drmach_opts_t *opts)
1352 {
1353 	return (NULL);
1354 }
1355 
1356 sbd_error_t *
1357 drmach_board_assign(int bnum, drmachid_t *id)
1358 {
1359 	sbd_error_t	*err = NULL;
1360 
1361 	rw_enter(&drmach_boards_rwlock, RW_WRITER);
1362 
1363 	if (drmach_array_get(drmach_boards, bnum, id) == -1) {
1364 		err = drerr_new(1, EOPL_BNUM, "%d", bnum);
1365 	} else {
1366 		drmach_board_t	*bp;
1367 
1368 		if (*id)
1369 			rw_downgrade(&drmach_boards_rwlock);
1370 
1371 		bp = *id;
1372 		if (!(*id))
1373 			bp = *id  =
1374 			    (drmachid_t)drmach_board_new(bnum, 0);
1375 		bp->assigned = 1;
1376 	}
1377 
1378 	rw_exit(&drmach_boards_rwlock);
1379 
1380 	return (err);
1381 }
1382 
1383 /*ARGSUSED*/
1384 sbd_error_t *
1385 drmach_board_connect(drmachid_t id, drmach_opts_t *opts)
1386 {
1387 	extern int	cpu_alljupiter;
1388 	drmach_board_t	*obj = (drmach_board_t *)id;
1389 	unsigned	cpu_impl;
1390 
1391 	if (!DRMACH_IS_BOARD_ID(id))
1392 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1393 
1394 	if (opl_probe_sb(obj->bnum, &cpu_impl) != 0)
1395 		return (drerr_new(1, EOPL_PROBE, NULL));
1396 
1397 	if (cpu_alljupiter) {
1398 		if (cpu_impl & (1 << OLYMPUS_C_IMPL)) {
1399 			(void) opl_unprobe_sb(obj->bnum);
1400 			return (drerr_new(1, EOPL_MIXED_CPU, NULL));
1401 		}
1402 	}
1403 
1404 	(void) prom_attach_notice(obj->bnum);
1405 
1406 	drmach_setup_core_info(obj);
1407 
1408 	obj->connected = 1;
1409 
1410 	return (NULL);
1411 }
1412 
1413 static int drmach_cache_flush_flag[NCPU];
1414 
1415 /*ARGSUSED*/
1416 static void
1417 drmach_flush_cache(uint64_t id, uint64_t dummy)
1418 {
1419 	extern void cpu_flush_ecache(void);
1420 
1421 	cpu_flush_ecache();
1422 	drmach_cache_flush_flag[id] = 0;
1423 }
1424 
1425 static void
1426 drmach_flush_all()
1427 {
1428 	cpuset_t	xc_cpuset;
1429 	int		i;
1430 
1431 	xc_cpuset = cpu_ready_set;
1432 	for (i = 0; i < NCPU; i++) {
1433 		if (CPU_IN_SET(xc_cpuset, i)) {
1434 			drmach_cache_flush_flag[i] = 1;
1435 			xc_one(i, drmach_flush_cache, i, 0);
1436 			while (drmach_cache_flush_flag[i]) {
1437 				DELAY(1000);
1438 			}
1439 		}
1440 	}
1441 }
1442 
1443 static int
1444 drmach_disconnect_cpus(drmach_board_t *bp)
1445 {
1446 	int i, bnum;
1447 
1448 	bnum = bp->bnum;
1449 
1450 	for (i = 0; i < OPL_MAX_COREID_PER_BOARD; i++) {
1451 		if (bp->cores[i].core_present) {
1452 			if (bp->cores[i].core_started)
1453 				return (-1);
1454 			if (bp->cores[i].core_hotadded) {
1455 				if (drmach_add_remove_cpu(bnum, i,
1456 				    HOTREMOVE_CPU)) {
1457 					cmn_err(CE_WARN, "Failed to remove "
1458 					    "CMP %d on board %d\n", i, bnum);
1459 					return (-1);
1460 				}
1461 			}
1462 		}
1463 	}
1464 	return (0);
1465 }
1466 
1467 /*ARGSUSED*/
1468 sbd_error_t *
1469 drmach_board_disconnect(drmachid_t id, drmach_opts_t *opts)
1470 {
1471 	drmach_board_t *obj;
1472 	int rv = 0;
1473 	sbd_error_t		*err = NULL;
1474 
1475 	if (DRMACH_NULL_ID(id))
1476 		return (NULL);
1477 
1478 	if (!DRMACH_IS_BOARD_ID(id))
1479 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1480 
1481 	obj = (drmach_board_t *)id;
1482 
1483 	if (drmach_disconnect_cpus(obj)) {
1484 		err = drerr_new(1, EOPL_DEPROBE, obj->cm.name);
1485 		return (err);
1486 	}
1487 
1488 	rv = opl_unprobe_sb(obj->bnum);
1489 
1490 	if (rv == 0) {
1491 		(void) prom_detach_notice(obj->bnum);
1492 		obj->connected = 0;
1493 
1494 	} else
1495 		err = drerr_new(1, EOPL_DEPROBE, obj->cm.name);
1496 
1497 	return (err);
1498 }
1499 
1500 static int
1501 drmach_get_portid(drmach_node_t *np)
1502 {
1503 	int		portid;
1504 	char		type[OBP_MAXPROPNAME];
1505 
1506 	if (np->n_getprop(np, "portid", &portid, sizeof (portid)) == 0)
1507 		return (portid);
1508 
1509 	/*
1510 	 * Get the device_type property to see if we should
1511 	 * continue processing this node.
1512 	 */
1513 	if (np->n_getprop(np, "device_type", &type, sizeof (type)) != 0)
1514 		return (-1);
1515 
1516 	if (strcmp(type, OPL_CPU_NODE) == 0) {
1517 		/*
1518 		 * We return cpuid because it has no portid
1519 		 */
1520 		if (np->n_getprop(np, "cpuid", &portid, sizeof (portid)) == 0)
1521 			return (portid);
1522 	}
1523 
1524 	return (-1);
1525 }
1526 
1527 /*
1528  * This is a helper function to determine if a given
1529  * node should be considered for a dr operation according
1530  * to predefined dr type nodes and the node's name.
1531  * Formal Parameter : The name of a device node.
1532  * Return Value: -1, name does not map to a valid dr type.
1533  *		 A value greater or equal to 0, name is a valid dr type.
1534  */
1535 static int
1536 drmach_name2type_idx(char *name)
1537 {
1538 	int 	index, ntypes;
1539 
1540 	if (name == NULL)
1541 		return (-1);
1542 
1543 	/*
1544 	 * Determine how many possible types are currently supported
1545 	 * for dr.
1546 	 */
1547 	ntypes = sizeof (drmach_name2type) / sizeof (drmach_name2type[0]);
1548 
1549 	/* Determine if the node's name correspond to a predefined type. */
1550 	for (index = 0; index < ntypes; index++) {
1551 		if (strcmp(drmach_name2type[index].name, name) == 0)
1552 			/* The node is an allowed type for dr. */
1553 			return (index);
1554 	}
1555 
1556 	/*
1557 	 * If the name of the node does not map to any of the
1558 	 * types in the array drmach_name2type then the node is not of
1559 	 * interest to dr.
1560 	 */
1561 	return (-1);
1562 }
1563 
1564 /*
1565  * there is some complication on OPL:
1566  * - pseudo-mc nodes do not have portid property
1567  * - portid[9:5] of cmp node is LSB #, portid[7:3] of pci is LSB#
1568  * - cmp has board#
1569  * - core and cpu nodes do not have portid and board# properties
1570  * starcat uses portid to derive the board# but that does not work
1571  * for us.  starfire reads board# property to filter the devices.
1572  * That does not work either.  So for these specific device,
1573  * we use specific hard coded methods to get the board# -
1574  * cpu: LSB# = CPUID[9:5]
1575  */
1576 
1577 static int
1578 drmach_board_find_devices_cb(drmach_node_walk_args_t *args)
1579 {
1580 	drmach_node_t			*node = args->node;
1581 	drmach_board_cb_data_t		*data = args->data;
1582 	drmach_board_t			*obj = data->obj;
1583 
1584 	int		rv, portid;
1585 	int		bnum;
1586 	drmachid_t	id;
1587 	drmach_device_t	*device;
1588 	char name[OBP_MAXDRVNAME];
1589 
1590 	portid = drmach_get_portid(node);
1591 	/*
1592 	 * core, cpu and pseudo-mc do not have portid
1593 	 * we use cpuid as the portid of the cpu node
1594 	 * for pseudo-mc, we do not use portid info.
1595 	 */
1596 
1597 	rv = node->n_getprop(node, "name", name, OBP_MAXDRVNAME);
1598 	if (rv)
1599 		return (0);
1600 
1601 
1602 	rv = node->n_getprop(node, OBP_BOARDNUM, &bnum, sizeof (bnum));
1603 
1604 	if (rv) {
1605 		/*
1606 		 * cpu does not have board# property.  We use
1607 		 * CPUID[9:5]
1608 		 */
1609 		if (strcmp("cpu", name) == 0) {
1610 			bnum = (portid >> 5) & 0x1f;
1611 		} else
1612 			return (0);
1613 	}
1614 
1615 
1616 	if (bnum != obj->bnum)
1617 		return (0);
1618 
1619 	if (drmach_name2type_idx(name) < 0) {
1620 		return (0);
1621 	}
1622 
1623 	/*
1624 	 * Create a device data structure from this node data.
1625 	 * The call may yield nothing if the node is not of interest
1626 	 * to drmach.
1627 	 */
1628 	data->err = drmach_device_new(node, obj, portid, &id);
1629 	if (data->err)
1630 		return (-1);
1631 	else if (!id) {
1632 		/*
1633 		 * drmach_device_new examined the node we passed in
1634 		 * and determined that it was one not of interest to
1635 		 * drmach.  So, it is skipped.
1636 		 */
1637 		return (0);
1638 	}
1639 
1640 	rv = drmach_array_set(obj->devices, data->ndevs++, id);
1641 	if (rv) {
1642 		data->err = DRMACH_INTERNAL_ERROR();
1643 		return (-1);
1644 	}
1645 	device = id;
1646 
1647 	data->err = (*data->found)(data->a, device->type, device->unum, id);
1648 	return (data->err == NULL ? 0 : -1);
1649 }
1650 
1651 sbd_error_t *
1652 drmach_board_find_devices(drmachid_t id, void *a,
1653 	sbd_error_t *(*found)(void *a, const char *, int, drmachid_t))
1654 {
1655 	drmach_board_t		*bp = (drmach_board_t *)id;
1656 	sbd_error_t		*err;
1657 	int			 max_devices;
1658 	int			 rv;
1659 	drmach_board_cb_data_t	data;
1660 
1661 
1662 	if (!DRMACH_IS_BOARD_ID(id))
1663 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1664 
1665 	max_devices  = MAX_CPU_UNITS_PER_BOARD;
1666 	max_devices += MAX_MEM_UNITS_PER_BOARD;
1667 	max_devices += MAX_IO_UNITS_PER_BOARD;
1668 
1669 	bp->devices = drmach_array_new(0, max_devices);
1670 
1671 	if (bp->tree == NULL)
1672 		bp->tree = drmach_node_new();
1673 
1674 	data.obj = bp;
1675 	data.ndevs = 0;
1676 	data.found = found;
1677 	data.a = a;
1678 	data.err = NULL;
1679 
1680 	rv = drmach_node_walk(bp->tree, &data, drmach_board_find_devices_cb);
1681 	if (rv == 0)
1682 		err = NULL;
1683 	else {
1684 		drmach_array_dispose(bp->devices, drmach_device_dispose);
1685 		bp->devices = NULL;
1686 
1687 		if (data.err)
1688 			err = data.err;
1689 		else
1690 			err = DRMACH_INTERNAL_ERROR();
1691 	}
1692 
1693 	return (err);
1694 }
1695 
1696 int
1697 drmach_board_lookup(int bnum, drmachid_t *id)
1698 {
1699 	int	rv = 0;
1700 
1701 	rw_enter(&drmach_boards_rwlock, RW_READER);
1702 	if (drmach_array_get(drmach_boards, bnum, id)) {
1703 		*id = 0;
1704 		rv = -1;
1705 	}
1706 	rw_exit(&drmach_boards_rwlock);
1707 	return (rv);
1708 }
1709 
1710 sbd_error_t *
1711 drmach_board_name(int bnum, char *buf, int buflen)
1712 {
1713 	(void) snprintf(buf, buflen, "SB%d", bnum);
1714 	return (NULL);
1715 }
1716 
1717 sbd_error_t *
1718 drmach_board_poweroff(drmachid_t id)
1719 {
1720 	drmach_board_t	*bp;
1721 	sbd_error_t	*err;
1722 	drmach_status_t	 stat;
1723 
1724 	if (DRMACH_NULL_ID(id))
1725 		return (NULL);
1726 
1727 	if (!DRMACH_IS_BOARD_ID(id))
1728 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1729 	bp = id;
1730 
1731 	err = drmach_board_status(id, &stat);
1732 
1733 	if (!err) {
1734 		if (stat.configured || stat.busy)
1735 			err = drerr_new(0, EOPL_CONFIGBUSY, bp->cm.name);
1736 		else {
1737 			bp->powered = 0;
1738 		}
1739 	}
1740 	return (err);
1741 }
1742 
1743 sbd_error_t *
1744 drmach_board_poweron(drmachid_t id)
1745 {
1746 	drmach_board_t	*bp;
1747 
1748 	if (!DRMACH_IS_BOARD_ID(id))
1749 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1750 	bp = id;
1751 
1752 	bp->powered = 1;
1753 
1754 	return (NULL);
1755 }
1756 
1757 static sbd_error_t *
1758 drmach_board_release(drmachid_t id)
1759 {
1760 	if (!DRMACH_IS_BOARD_ID(id))
1761 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1762 	return (NULL);
1763 }
1764 
1765 /*ARGSUSED*/
1766 sbd_error_t *
1767 drmach_board_test(drmachid_t id, drmach_opts_t *opts, int force)
1768 {
1769 	return (NULL);
1770 }
1771 
1772 sbd_error_t *
1773 drmach_board_unassign(drmachid_t id)
1774 {
1775 	drmach_board_t	*bp;
1776 	sbd_error_t	*err;
1777 	drmach_status_t	 stat;
1778 
1779 	if (DRMACH_NULL_ID(id))
1780 		return (NULL);
1781 
1782 	if (!DRMACH_IS_BOARD_ID(id)) {
1783 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1784 	}
1785 	bp = id;
1786 
1787 	rw_enter(&drmach_boards_rwlock, RW_WRITER);
1788 
1789 	err = drmach_board_status(id, &stat);
1790 	if (err) {
1791 		rw_exit(&drmach_boards_rwlock);
1792 		return (err);
1793 	}
1794 	if (stat.configured || stat.busy) {
1795 		err = drerr_new(0, EOPL_CONFIGBUSY, bp->cm.name);
1796 	} else {
1797 		if (drmach_array_set(drmach_boards, bp->bnum, 0) != 0)
1798 			err = DRMACH_INTERNAL_ERROR();
1799 		else
1800 			drmach_board_dispose(bp);
1801 	}
1802 	rw_exit(&drmach_boards_rwlock);
1803 	return (err);
1804 }
1805 
1806 /*
1807  * We have to do more on OPL - e.g. set up sram tte, read cpuid, strand id,
1808  * implementation #, etc
1809  */
1810 
1811 static sbd_error_t *
1812 drmach_cpu_new(drmach_device_t *proto, drmachid_t *idp)
1813 {
1814 	static void drmach_cpu_dispose(drmachid_t);
1815 	static sbd_error_t *drmach_cpu_release(drmachid_t);
1816 	static sbd_error_t *drmach_cpu_status(drmachid_t, drmach_status_t *);
1817 
1818 	int		 portid;
1819 	drmach_cpu_t	*cp = NULL;
1820 
1821 	/* portid is CPUID of the node */
1822 	portid = proto->portid;
1823 	ASSERT(portid != -1);
1824 
1825 	/* unum = (CMP/CHIP ID) + (ON_BOARD_CORE_NUM * MAX_CMPID_PER_BOARD) */
1826 	proto->unum = ((portid/OPL_MAX_CPUID_PER_CMP) &
1827 	    (OPL_MAX_CMPID_PER_BOARD - 1)) +
1828 	    ((portid & (OPL_MAX_CPUID_PER_CMP - 1)) *
1829 	    (OPL_MAX_CMPID_PER_BOARD));
1830 
1831 	cp = kmem_zalloc(sizeof (drmach_cpu_t), KM_SLEEP);
1832 	bcopy(proto, &cp->dev, sizeof (cp->dev));
1833 	cp->dev.node = drmach_node_dup(proto->node);
1834 	cp->dev.cm.isa = (void *)drmach_cpu_new;
1835 	cp->dev.cm.dispose = drmach_cpu_dispose;
1836 	cp->dev.cm.release = drmach_cpu_release;
1837 	cp->dev.cm.status = drmach_cpu_status;
1838 
1839 	(void) snprintf(cp->dev.cm.name, sizeof (cp->dev.cm.name), "%s%d",
1840 	    cp->dev.type, cp->dev.unum);
1841 
1842 /*
1843  *	CPU ID representation
1844  *	CPUID[9:5] = SB#
1845  *	CPUID[4:3] = Chip#
1846  *	CPUID[2:1] = Core# (Only 2 core for OPL)
1847  *	CPUID[0:0] = Strand#
1848  */
1849 
1850 /*
1851  *	reg property of the strand contains strand ID
1852  *	reg property of the parent node contains core ID
1853  *	We should use them.
1854  */
1855 	cp->cpuid = portid;
1856 	cp->sb = (portid >> 5) & 0x1f;
1857 	cp->chipid = (portid >> 3) & 0x3;
1858 	cp->coreid = (portid >> 1) & 0x3;
1859 	cp->strandid = portid & 0x1;
1860 
1861 	*idp = (drmachid_t)cp;
1862 	return (NULL);
1863 }
1864 
1865 
1866 static void
1867 drmach_cpu_dispose(drmachid_t id)
1868 {
1869 	drmach_cpu_t	*self;
1870 
1871 	ASSERT(DRMACH_IS_CPU_ID(id));
1872 
1873 	self = id;
1874 	if (self->dev.node)
1875 		drmach_node_dispose(self->dev.node);
1876 
1877 	kmem_free(self, sizeof (*self));
1878 }
1879 
1880 static int
1881 drmach_cpu_start(struct cpu *cp)
1882 {
1883 	int		cpuid = cp->cpu_id;
1884 	extern int	restart_other_cpu(int);
1885 
1886 	ASSERT(MUTEX_HELD(&cpu_lock));
1887 	ASSERT(cpunodes[cpuid].nodeid != (pnode_t)0);
1888 
1889 	cp->cpu_flags &= ~CPU_POWEROFF;
1890 
1891 	/*
1892 	 * NOTE: restart_other_cpu pauses cpus during the
1893 	 *	 slave cpu start.  This helps to quiesce the
1894 	 *	 bus traffic a bit which makes the tick sync
1895 	 *	 routine in the prom more robust.
1896 	 */
1897 	DRMACH_PR("COLD START for cpu (%d)\n", cpuid);
1898 
1899 	(void) restart_other_cpu(cpuid);
1900 
1901 	return (0);
1902 }
1903 
1904 static sbd_error_t *
1905 drmach_cpu_release(drmachid_t id)
1906 {
1907 	if (!DRMACH_IS_CPU_ID(id))
1908 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1909 
1910 	return (NULL);
1911 }
1912 
1913 static sbd_error_t *
1914 drmach_cpu_status(drmachid_t id, drmach_status_t *stat)
1915 {
1916 	drmach_cpu_t *cp;
1917 	drmach_device_t *dp;
1918 
1919 	ASSERT(DRMACH_IS_CPU_ID(id));
1920 	cp = (drmach_cpu_t *)id;
1921 	dp = &cp->dev;
1922 
1923 	stat->assigned = dp->bp->assigned;
1924 	stat->powered = dp->bp->powered;
1925 	mutex_enter(&cpu_lock);
1926 	stat->configured = (cpu_get(cp->cpuid) != NULL);
1927 	mutex_exit(&cpu_lock);
1928 	stat->busy = dp->busy;
1929 	(void) strncpy(stat->type, dp->type, sizeof (stat->type));
1930 	stat->info[0] = '\0';
1931 
1932 	return (NULL);
1933 }
1934 
1935 sbd_error_t *
1936 drmach_cpu_disconnect(drmachid_t id)
1937 {
1938 
1939 	if (!DRMACH_IS_CPU_ID(id))
1940 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1941 
1942 	return (NULL);
1943 }
1944 
1945 sbd_error_t *
1946 drmach_cpu_get_id(drmachid_t id, processorid_t *cpuid)
1947 {
1948 	drmach_cpu_t *cpu;
1949 
1950 	if (!DRMACH_IS_CPU_ID(id))
1951 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1952 	cpu = (drmach_cpu_t *)id;
1953 
1954 	/* get from cpu directly on OPL */
1955 	*cpuid = cpu->cpuid;
1956 	return (NULL);
1957 }
1958 
1959 sbd_error_t *
1960 drmach_cpu_get_impl(drmachid_t id, int *ip)
1961 {
1962 	drmach_device_t *cpu;
1963 	drmach_node_t	*np;
1964 	drmach_node_t	pp;
1965 	int		impl;
1966 	char		type[OBP_MAXPROPNAME];
1967 
1968 	if (!DRMACH_IS_CPU_ID(id))
1969 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1970 
1971 	cpu = id;
1972 	np = cpu->node;
1973 
1974 	if (np->get_parent(np, &pp) != 0) {
1975 		return (DRMACH_INTERNAL_ERROR());
1976 	}
1977 
1978 	/* the parent should be core */
1979 
1980 	if (pp.n_getprop(&pp, "device_type", &type, sizeof (type)) != 0) {
1981 		return (drerr_new(0, EOPL_GETPROP, NULL));
1982 	}
1983 
1984 	if (strcmp(type, OPL_CORE_NODE) == 0) {
1985 		if (pp.n_getprop(&pp, "implementation#", &impl,
1986 		    sizeof (impl)) != 0) {
1987 			return (drerr_new(0, EOPL_GETPROP, NULL));
1988 		}
1989 	} else {
1990 		return (DRMACH_INTERNAL_ERROR());
1991 	}
1992 
1993 	*ip = impl;
1994 
1995 	return (NULL);
1996 }
1997 
1998 sbd_error_t *
1999 drmach_get_dip(drmachid_t id, dev_info_t **dip)
2000 {
2001 	drmach_device_t	*dp;
2002 
2003 	if (!DRMACH_IS_DEVICE_ID(id))
2004 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2005 	dp = id;
2006 
2007 	*dip = dp->node->n_getdip(dp->node);
2008 	return (NULL);
2009 }
2010 
2011 sbd_error_t *
2012 drmach_io_is_attached(drmachid_t id, int *yes)
2013 {
2014 	drmach_device_t *dp;
2015 	dev_info_t	*dip;
2016 	int		state;
2017 
2018 	if (!DRMACH_IS_IO_ID(id))
2019 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2020 	dp = id;
2021 
2022 	dip = dp->node->n_getdip(dp->node);
2023 	if (dip == NULL) {
2024 		*yes = 0;
2025 		return (NULL);
2026 	}
2027 
2028 	state = ddi_get_devstate(dip);
2029 	*yes = ((i_ddi_node_state(dip) >= DS_ATTACHED) ||
2030 	    (state == DDI_DEVSTATE_UP));
2031 
2032 	return (NULL);
2033 }
2034 
2035 struct drmach_io_cb {
2036 	char	*name;	/* name of the node */
2037 	int	(*func)(dev_info_t *);
2038 	int	rv;
2039 	dev_info_t *dip;
2040 };
2041 
2042 #define	DRMACH_IO_POST_ATTACH	0
2043 #define	DRMACH_IO_PRE_RELEASE	1
2044 
2045 static int
2046 drmach_io_cb_check(dev_info_t *dip, void *arg)
2047 {
2048 	struct drmach_io_cb *p = (struct drmach_io_cb *)arg;
2049 	char name[OBP_MAXDRVNAME];
2050 	int len = OBP_MAXDRVNAME;
2051 
2052 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "name",
2053 	    (caddr_t)name, &len) != DDI_PROP_SUCCESS) {
2054 		return (DDI_WALK_PRUNECHILD);
2055 	}
2056 
2057 	if (strcmp(name, p->name) == 0) {
2058 		ndi_hold_devi(dip);
2059 		p->dip = dip;
2060 		return (DDI_WALK_TERMINATE);
2061 	}
2062 
2063 	return (DDI_WALK_CONTINUE);
2064 }
2065 
2066 
2067 static int
2068 drmach_console_ops(drmachid_t *id, int state)
2069 {
2070 	drmach_io_t *obj = (drmach_io_t *)id;
2071 	struct drmach_io_cb arg;
2072 	int (*msudetp)(dev_info_t *);
2073 	int (*msuattp)(dev_info_t *);
2074 	dev_info_t *dip, *pdip;
2075 	int circ;
2076 
2077 	/* 4 is pcicmu channel */
2078 	if (obj->channel != 4)
2079 		return (0);
2080 
2081 	arg.name = "serial";
2082 	arg.func = NULL;
2083 	if (state == DRMACH_IO_PRE_RELEASE) {
2084 		msudetp = (int (*)(dev_info_t *))
2085 		    modgetsymvalue("oplmsu_dr_detach", 0);
2086 		if (msudetp != NULL)
2087 			arg.func = msudetp;
2088 	} else if (state == DRMACH_IO_POST_ATTACH) {
2089 		msuattp = (int (*)(dev_info_t *))
2090 		    modgetsymvalue("oplmsu_dr_attach", 0);
2091 		if (msuattp != NULL)
2092 			arg.func = msuattp;
2093 	} else {
2094 		return (0);
2095 	}
2096 
2097 	if (arg.func == NULL) {
2098 		return (0);
2099 	}
2100 
2101 	arg.rv = 0;
2102 	arg.dip = NULL;
2103 
2104 	dip = obj->dev.node->n_getdip(obj->dev.node);
2105 	if (pdip = ddi_get_parent(dip)) {
2106 		ndi_hold_devi(pdip);
2107 		ndi_devi_enter(pdip, &circ);
2108 	} else {
2109 		/* this cannot happen unless something bad happens */
2110 		return (-1);
2111 	}
2112 
2113 	ddi_walk_devs(dip, drmach_io_cb_check, (void *)&arg);
2114 
2115 	ndi_devi_exit(pdip, circ);
2116 	ndi_rele_devi(pdip);
2117 
2118 	if (arg.dip) {
2119 		arg.rv = (*arg.func)(arg.dip);
2120 		ndi_rele_devi(arg.dip);
2121 	} else {
2122 		arg.rv = -1;
2123 	}
2124 
2125 	return (arg.rv);
2126 }
2127 
2128 sbd_error_t *
2129 drmach_io_pre_release(drmachid_t id)
2130 {
2131 	int rv;
2132 
2133 	if (!DRMACH_IS_IO_ID(id))
2134 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2135 
2136 	rv = drmach_console_ops(id, DRMACH_IO_PRE_RELEASE);
2137 
2138 	if (rv != 0)
2139 		cmn_err(CE_WARN, "IO callback failed in pre-release\n");
2140 
2141 	return (NULL);
2142 }
2143 
2144 static sbd_error_t *
2145 drmach_io_release(drmachid_t id)
2146 {
2147 	if (!DRMACH_IS_IO_ID(id))
2148 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2149 	return (NULL);
2150 }
2151 
2152 sbd_error_t *
2153 drmach_io_unrelease(drmachid_t id)
2154 {
2155 	if (!DRMACH_IS_IO_ID(id))
2156 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2157 	return (NULL);
2158 }
2159 
2160 /*ARGSUSED*/
2161 sbd_error_t *
2162 drmach_io_post_release(drmachid_t id)
2163 {
2164 	return (NULL);
2165 }
2166 
2167 /*ARGSUSED*/
2168 sbd_error_t *
2169 drmach_io_post_attach(drmachid_t id)
2170 {
2171 	int rv;
2172 
2173 	if (!DRMACH_IS_IO_ID(id))
2174 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2175 
2176 	rv = drmach_console_ops(id, DRMACH_IO_POST_ATTACH);
2177 
2178 	if (rv != 0)
2179 		cmn_err(CE_WARN, "IO callback failed in post-attach\n");
2180 
2181 	return (0);
2182 }
2183 
2184 static sbd_error_t *
2185 drmach_io_status(drmachid_t id, drmach_status_t *stat)
2186 {
2187 	drmach_device_t *dp;
2188 	sbd_error_t	*err;
2189 	int		 configured;
2190 
2191 	ASSERT(DRMACH_IS_IO_ID(id));
2192 	dp = id;
2193 
2194 	err = drmach_io_is_attached(id, &configured);
2195 	if (err)
2196 		return (err);
2197 
2198 	stat->assigned = dp->bp->assigned;
2199 	stat->powered = dp->bp->powered;
2200 	stat->configured = (configured != 0);
2201 	stat->busy = dp->busy;
2202 	(void) strncpy(stat->type, dp->type, sizeof (stat->type));
2203 	stat->info[0] = '\0';
2204 
2205 	return (NULL);
2206 }
2207 
2208 static sbd_error_t *
2209 drmach_mem_new(drmach_device_t *proto, drmachid_t *idp)
2210 {
2211 	static void drmach_mem_dispose(drmachid_t);
2212 	static sbd_error_t *drmach_mem_release(drmachid_t);
2213 	static sbd_error_t *drmach_mem_status(drmachid_t, drmach_status_t *);
2214 	dev_info_t *dip;
2215 	int rv;
2216 
2217 	drmach_mem_t	*mp;
2218 
2219 	rv = 0;
2220 
2221 	if ((proto->node->n_getproplen(proto->node, "mc-addr", &rv) < 0) ||
2222 	    (rv <= 0)) {
2223 		*idp = (drmachid_t)0;
2224 		return (NULL);
2225 	}
2226 
2227 	mp = kmem_zalloc(sizeof (drmach_mem_t), KM_SLEEP);
2228 	proto->unum = 0;
2229 
2230 	bcopy(proto, &mp->dev, sizeof (mp->dev));
2231 	mp->dev.node = drmach_node_dup(proto->node);
2232 	mp->dev.cm.isa = (void *)drmach_mem_new;
2233 	mp->dev.cm.dispose = drmach_mem_dispose;
2234 	mp->dev.cm.release = drmach_mem_release;
2235 	mp->dev.cm.status = drmach_mem_status;
2236 
2237 	(void) snprintf(mp->dev.cm.name, sizeof (mp->dev.cm.name), "%s",
2238 	    mp->dev.type);
2239 
2240 	dip = mp->dev.node->n_getdip(mp->dev.node);
2241 	if (drmach_setup_mc_info(dip, mp) != 0) {
2242 		return (drerr_new(1, EOPL_MC_SETUP, NULL));
2243 	}
2244 
2245 	/* make sure we do not create memoryless nodes */
2246 	if (mp->nbytes == 0) {
2247 		*idp = (drmachid_t)NULL;
2248 		kmem_free(mp, sizeof (drmach_mem_t));
2249 	} else
2250 		*idp = (drmachid_t)mp;
2251 
2252 	return (NULL);
2253 }
2254 
2255 static void
2256 drmach_mem_dispose(drmachid_t id)
2257 {
2258 	drmach_mem_t *mp;
2259 
2260 	ASSERT(DRMACH_IS_MEM_ID(id));
2261 
2262 
2263 	mp = id;
2264 
2265 	if (mp->dev.node)
2266 		drmach_node_dispose(mp->dev.node);
2267 
2268 	if (mp->memlist) {
2269 		memlist_delete(mp->memlist);
2270 		mp->memlist = NULL;
2271 	}
2272 
2273 	kmem_free(mp, sizeof (*mp));
2274 }
2275 
2276 sbd_error_t *
2277 drmach_mem_add_span(drmachid_t id, uint64_t basepa, uint64_t size)
2278 {
2279 	pfn_t		basepfn = (pfn_t)(basepa >> PAGESHIFT);
2280 	pgcnt_t		npages = (pgcnt_t)(size >> PAGESHIFT);
2281 	int		rv;
2282 
2283 	ASSERT(size != 0);
2284 
2285 	if (!DRMACH_IS_MEM_ID(id))
2286 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2287 
2288 	rv = kcage_range_add(basepfn, npages, KCAGE_DOWN);
2289 	if (rv == ENOMEM) {
2290 		cmn_err(CE_WARN, "%lu megabytes not available to kernel cage",
2291 		    (ulong_t)(size == 0 ? 0 : size / MBYTE));
2292 	} else if (rv != 0) {
2293 		/* catch this in debug kernels */
2294 		ASSERT(0);
2295 
2296 		cmn_err(CE_WARN, "unexpected kcage_range_add return value %d",
2297 		    rv);
2298 	}
2299 
2300 	if (rv) {
2301 		return (DRMACH_INTERNAL_ERROR());
2302 	}
2303 	else
2304 		return (NULL);
2305 }
2306 
2307 sbd_error_t *
2308 drmach_mem_del_span(drmachid_t id, uint64_t basepa, uint64_t size)
2309 {
2310 	pfn_t		basepfn = (pfn_t)(basepa >> PAGESHIFT);
2311 	pgcnt_t		npages = (pgcnt_t)(size >> PAGESHIFT);
2312 	int		rv;
2313 
2314 	if (!DRMACH_IS_MEM_ID(id))
2315 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2316 
2317 	if (size > 0) {
2318 		rv = kcage_range_delete_post_mem_del(basepfn, npages);
2319 		if (rv != 0) {
2320 			cmn_err(CE_WARN,
2321 			    "unexpected kcage_range_delete_post_mem_del"
2322 			    " return value %d", rv);
2323 			return (DRMACH_INTERNAL_ERROR());
2324 		}
2325 	}
2326 
2327 	return (NULL);
2328 }
2329 
2330 sbd_error_t *
2331 drmach_mem_disable(drmachid_t id)
2332 {
2333 	if (!DRMACH_IS_MEM_ID(id))
2334 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2335 	else {
2336 		drmach_flush_all();
2337 		return (NULL);
2338 	}
2339 }
2340 
2341 sbd_error_t *
2342 drmach_mem_enable(drmachid_t id)
2343 {
2344 	if (!DRMACH_IS_MEM_ID(id))
2345 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2346 	else
2347 		return (NULL);
2348 }
2349 
2350 sbd_error_t *
2351 drmach_mem_get_info(drmachid_t id, drmach_mem_info_t *mem)
2352 {
2353 	drmach_mem_t *mp;
2354 
2355 	if (!DRMACH_IS_MEM_ID(id))
2356 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2357 
2358 	mp = (drmach_mem_t *)id;
2359 
2360 	/*
2361 	 * This is only used by dr to round up/down the memory
2362 	 * for copying. Our unit of memory isolation is 64 MB.
2363 	 */
2364 
2365 	mem->mi_alignment_mask = (64 * 1024 * 1024 - 1);
2366 	mem->mi_basepa = mp->base_pa;
2367 	mem->mi_size = mp->nbytes;
2368 	mem->mi_slice_size = mp->slice_size;
2369 
2370 	return (NULL);
2371 }
2372 
2373 sbd_error_t *
2374 drmach_mem_get_base_physaddr(drmachid_t id, uint64_t *pa)
2375 {
2376 	drmach_mem_t *mp;
2377 
2378 	if (!DRMACH_IS_MEM_ID(id))
2379 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2380 
2381 	mp = (drmach_mem_t *)id;
2382 
2383 	*pa = mp->base_pa;
2384 	return (NULL);
2385 }
2386 
2387 sbd_error_t *
2388 drmach_mem_get_memlist(drmachid_t id, struct memlist **ml)
2389 {
2390 	drmach_mem_t	*mem;
2391 #ifdef	DEBUG
2392 	int		rv;
2393 #endif
2394 	struct memlist	*mlist;
2395 
2396 	if (!DRMACH_IS_MEM_ID(id))
2397 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2398 
2399 	mem = (drmach_mem_t *)id;
2400 	mlist = memlist_dup(mem->memlist);
2401 
2402 #ifdef DEBUG
2403 	/*
2404 	 * Make sure the incoming memlist doesn't already
2405 	 * intersect with what's present in the system (phys_install).
2406 	 */
2407 	memlist_read_lock();
2408 	rv = memlist_intersect(phys_install, mlist);
2409 	memlist_read_unlock();
2410 	if (rv) {
2411 		DRMACH_PR("Derived memlist intersects with phys_install\n");
2412 		memlist_dump(mlist);
2413 
2414 		DRMACH_PR("phys_install memlist:\n");
2415 		memlist_dump(phys_install);
2416 
2417 		memlist_delete(mlist);
2418 		return (DRMACH_INTERNAL_ERROR());
2419 	}
2420 
2421 	DRMACH_PR("Derived memlist:");
2422 	memlist_dump(mlist);
2423 #endif
2424 	*ml = mlist;
2425 
2426 	return (NULL);
2427 }
2428 
2429 sbd_error_t *
2430 drmach_mem_get_slice_size(drmachid_t id, uint64_t *bytes)
2431 {
2432 	drmach_mem_t	*mem;
2433 
2434 	if (!DRMACH_IS_MEM_ID(id))
2435 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2436 
2437 	mem = (drmach_mem_t *)id;
2438 
2439 	*bytes = mem->slice_size;
2440 
2441 	return (NULL);
2442 }
2443 
2444 
2445 /* ARGSUSED */
2446 processorid_t
2447 drmach_mem_cpu_affinity(drmachid_t id)
2448 {
2449 	return (CPU_CURRENT);
2450 }
2451 
2452 static sbd_error_t *
2453 drmach_mem_release(drmachid_t id)
2454 {
2455 	if (!DRMACH_IS_MEM_ID(id))
2456 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2457 	return (NULL);
2458 }
2459 
2460 static sbd_error_t *
2461 drmach_mem_status(drmachid_t id, drmach_status_t *stat)
2462 {
2463 	drmach_mem_t *dp;
2464 	uint64_t	 pa, slice_size;
2465 	struct memlist	*ml;
2466 
2467 	ASSERT(DRMACH_IS_MEM_ID(id));
2468 	dp = id;
2469 
2470 	/* get starting physical address of target memory */
2471 	pa = dp->base_pa;
2472 
2473 	/* round down to slice boundary */
2474 	slice_size = dp->slice_size;
2475 	pa &= ~(slice_size - 1);
2476 
2477 	/* stop at first span that is in slice */
2478 	memlist_read_lock();
2479 	for (ml = phys_install; ml; ml = ml->ml_next)
2480 		if (ml->ml_address >= pa && ml->ml_address < pa + slice_size)
2481 			break;
2482 	memlist_read_unlock();
2483 
2484 	stat->assigned = dp->dev.bp->assigned;
2485 	stat->powered = dp->dev.bp->powered;
2486 	stat->configured = (ml != NULL);
2487 	stat->busy = dp->dev.busy;
2488 	(void) strncpy(stat->type, dp->dev.type, sizeof (stat->type));
2489 	stat->info[0] = '\0';
2490 
2491 	return (NULL);
2492 }
2493 
2494 
2495 sbd_error_t *
2496 drmach_board_deprobe(drmachid_t id)
2497 {
2498 	drmach_board_t	*bp;
2499 
2500 	if (!DRMACH_IS_BOARD_ID(id))
2501 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2502 
2503 	bp = id;
2504 
2505 	cmn_err(CE_CONT, "DR: detach board %d\n", bp->bnum);
2506 
2507 	if (bp->tree) {
2508 		drmach_node_dispose(bp->tree);
2509 		bp->tree = NULL;
2510 	}
2511 	if (bp->devices) {
2512 		drmach_array_dispose(bp->devices, drmach_device_dispose);
2513 		bp->devices = NULL;
2514 	}
2515 
2516 	bp->boot_board = 0;
2517 
2518 	return (NULL);
2519 }
2520 
2521 /*ARGSUSED*/
2522 static sbd_error_t *
2523 drmach_pt_ikprobe(drmachid_t id, drmach_opts_t *opts)
2524 {
2525 	drmach_board_t		*bp = (drmach_board_t *)id;
2526 	sbd_error_t		*err = NULL;
2527 	int	rv;
2528 	unsigned cpu_impl;
2529 
2530 	if (!DRMACH_IS_BOARD_ID(id))
2531 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2532 
2533 	DRMACH_PR("calling opl_probe_board for bnum=%d\n", bp->bnum);
2534 	rv = opl_probe_sb(bp->bnum, &cpu_impl);
2535 	if (rv != 0) {
2536 		err = drerr_new(1, EOPL_PROBE, bp->cm.name);
2537 		return (err);
2538 	}
2539 	return (err);
2540 }
2541 
2542 /*ARGSUSED*/
2543 static sbd_error_t *
2544 drmach_pt_ikdeprobe(drmachid_t id, drmach_opts_t *opts)
2545 {
2546 	drmach_board_t	*bp;
2547 	sbd_error_t	*err = NULL;
2548 	int	rv;
2549 
2550 	if (!DRMACH_IS_BOARD_ID(id))
2551 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2552 	bp = (drmach_board_t *)id;
2553 
2554 	cmn_err(CE_CONT, "DR: in-kernel unprobe board %d\n", bp->bnum);
2555 
2556 	rv = opl_unprobe_sb(bp->bnum);
2557 	if (rv != 0) {
2558 		err = drerr_new(1, EOPL_DEPROBE, bp->cm.name);
2559 	}
2560 
2561 	return (err);
2562 }
2563 
2564 
2565 /*ARGSUSED*/
2566 sbd_error_t *
2567 drmach_pt_readmem(drmachid_t id, drmach_opts_t *opts)
2568 {
2569 	struct memlist	*ml;
2570 	uint64_t	src_pa;
2571 	uint64_t	dst_pa;
2572 	uint64_t	dst;
2573 
2574 	dst_pa = va_to_pa(&dst);
2575 
2576 	memlist_read_lock();
2577 	for (ml = phys_install; ml; ml = ml->ml_next) {
2578 		uint64_t	nbytes;
2579 
2580 		src_pa = ml->ml_address;
2581 		nbytes = ml->ml_size;
2582 
2583 		while (nbytes != 0ull) {
2584 
2585 			/* copy 32 bytes at arc_pa to dst_pa */
2586 			bcopy32_il(src_pa, dst_pa);
2587 
2588 			/* increment by 32 bytes */
2589 			src_pa += (4 * sizeof (uint64_t));
2590 
2591 			/* decrement by 32 bytes */
2592 			nbytes -= (4 * sizeof (uint64_t));
2593 		}
2594 	}
2595 	memlist_read_unlock();
2596 
2597 	return (NULL);
2598 }
2599 
2600 static struct {
2601 	const char	*name;
2602 	sbd_error_t	*(*handler)(drmachid_t id, drmach_opts_t *opts);
2603 } drmach_pt_arr[] = {
2604 	{ "readmem",		drmach_pt_readmem		},
2605 	{ "ikprobe",	drmach_pt_ikprobe	},
2606 	{ "ikdeprobe",	drmach_pt_ikdeprobe	},
2607 
2608 	/* the following line must always be last */
2609 	{ NULL,			NULL				}
2610 };
2611 
2612 /*ARGSUSED*/
2613 sbd_error_t *
2614 drmach_passthru(drmachid_t id, drmach_opts_t *opts)
2615 {
2616 	int		i;
2617 	sbd_error_t	*err;
2618 
2619 	i = 0;
2620 	while (drmach_pt_arr[i].name != NULL) {
2621 		int len = strlen(drmach_pt_arr[i].name);
2622 
2623 		if (strncmp(drmach_pt_arr[i].name, opts->copts, len) == 0)
2624 			break;
2625 
2626 		i += 1;
2627 	}
2628 
2629 	if (drmach_pt_arr[i].name == NULL)
2630 		err = drerr_new(0, EOPL_UNKPTCMD, opts->copts);
2631 	else
2632 		err = (*drmach_pt_arr[i].handler)(id, opts);
2633 
2634 	return (err);
2635 }
2636 
2637 sbd_error_t *
2638 drmach_release(drmachid_t id)
2639 {
2640 	drmach_common_t *cp;
2641 
2642 	if (!DRMACH_IS_DEVICE_ID(id))
2643 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2644 	cp = id;
2645 
2646 	return (cp->release(id));
2647 }
2648 
2649 sbd_error_t *
2650 drmach_status(drmachid_t id, drmach_status_t *stat)
2651 {
2652 	drmach_common_t *cp;
2653 	sbd_error_t	*err;
2654 
2655 	rw_enter(&drmach_boards_rwlock, RW_READER);
2656 
2657 	if (!DRMACH_IS_ID(id)) {
2658 		rw_exit(&drmach_boards_rwlock);
2659 		return (drerr_new(0, EOPL_NOTID, NULL));
2660 	}
2661 	cp = (drmach_common_t *)id;
2662 	err = cp->status(id, stat);
2663 
2664 	rw_exit(&drmach_boards_rwlock);
2665 
2666 	return (err);
2667 }
2668 
2669 static sbd_error_t *
2670 drmach_i_status(drmachid_t id, drmach_status_t *stat)
2671 {
2672 	drmach_common_t *cp;
2673 
2674 	if (!DRMACH_IS_ID(id))
2675 		return (drerr_new(0, EOPL_NOTID, NULL));
2676 	cp = id;
2677 
2678 	return (cp->status(id, stat));
2679 }
2680 
2681 /*ARGSUSED*/
2682 sbd_error_t *
2683 drmach_unconfigure(drmachid_t id, int flags)
2684 {
2685 	drmach_device_t *dp;
2686 	dev_info_t	*rdip, *fdip = NULL;
2687 	char name[OBP_MAXDRVNAME];
2688 	int rv;
2689 
2690 	if (DRMACH_IS_CPU_ID(id))
2691 		return (NULL);
2692 
2693 	if (!DRMACH_IS_DEVICE_ID(id))
2694 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2695 
2696 	dp = id;
2697 
2698 	rdip = dp->node->n_getdip(dp->node);
2699 
2700 	ASSERT(rdip);
2701 
2702 	rv = dp->node->n_getprop(dp->node, "name", name, OBP_MAXDRVNAME);
2703 
2704 	if (rv)
2705 		return (NULL);
2706 
2707 	/*
2708 	 * Note: FORCE flag is no longer necessary under devfs
2709 	 */
2710 
2711 	ASSERT(e_ddi_branch_held(rdip));
2712 	if (e_ddi_branch_unconfigure(rdip, &fdip, 0)) {
2713 		sbd_error_t	*err;
2714 		char		*path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
2715 
2716 		/*
2717 		 * If non-NULL, fdip is returned held and must be released.
2718 		 */
2719 		if (fdip != NULL) {
2720 			(void) ddi_pathname(fdip, path);
2721 			ndi_rele_devi(fdip);
2722 		} else {
2723 			(void) ddi_pathname(rdip, path);
2724 		}
2725 
2726 		err = drerr_new(1, EOPL_DRVFAIL, path);
2727 
2728 		kmem_free(path, MAXPATHLEN);
2729 
2730 		return (err);
2731 	}
2732 
2733 	return (NULL);
2734 }
2735 
2736 
2737 int
2738 drmach_cpu_poweron(struct cpu *cp)
2739 {
2740 	int bnum, cpuid, onb_core_num, strand_id;
2741 	drmach_board_t *bp;
2742 
2743 	DRMACH_PR("drmach_cpu_poweron: starting cpuid %d\n", cp->cpu_id);
2744 
2745 	cpuid = cp->cpu_id;
2746 	bnum = LSB_ID(cpuid);
2747 	onb_core_num = ON_BOARD_CORE_NUM(cpuid);
2748 	strand_id = STRAND_ID(cpuid);
2749 	bp = drmach_get_board_by_bnum(bnum);
2750 
2751 	ASSERT(bp);
2752 	if (bp->cores[onb_core_num].core_hotadded == 0) {
2753 		if (drmach_add_remove_cpu(bnum, onb_core_num,
2754 		    HOTADD_CPU) != 0) {
2755 			cmn_err(CE_WARN, "Failed to add CMP %d on board %d\n",
2756 			    onb_core_num, bnum);
2757 			return (EIO);
2758 		}
2759 	}
2760 
2761 	ASSERT(MUTEX_HELD(&cpu_lock));
2762 
2763 	if (drmach_cpu_start(cp) != 0) {
2764 		if (bp->cores[onb_core_num].core_started == 0) {
2765 			/*
2766 			 * we must undo the hotadd or no one will do that
2767 			 * If this fails, we will do this again in
2768 			 * drmach_board_disconnect.
2769 			 */
2770 			if (drmach_add_remove_cpu(bnum, onb_core_num,
2771 			    HOTREMOVE_CPU) != 0) {
2772 				cmn_err(CE_WARN, "Failed to remove CMP %d "
2773 				    "on board %d\n", onb_core_num, bnum);
2774 			}
2775 		}
2776 		return (EBUSY);
2777 	} else {
2778 		bp->cores[onb_core_num].core_started |= (1 << strand_id);
2779 		return (0);
2780 	}
2781 }
2782 
2783 int
2784 drmach_cpu_poweroff(struct cpu *cp)
2785 {
2786 	int 		rv = 0;
2787 	processorid_t	cpuid = cp->cpu_id;
2788 
2789 	DRMACH_PR("drmach_cpu_poweroff: stopping cpuid %d\n", cp->cpu_id);
2790 
2791 	ASSERT(MUTEX_HELD(&cpu_lock));
2792 
2793 	/*
2794 	 * Capture all CPUs (except for detaching proc) to prevent
2795 	 * crosscalls to the detaching proc until it has cleared its
2796 	 * bit in cpu_ready_set.
2797 	 *
2798 	 * The CPU's remain paused and the prom_mutex is known to be free.
2799 	 * This prevents the x-trap victim from blocking when doing prom
2800 	 * IEEE-1275 calls at a high PIL level.
2801 	 */
2802 
2803 	promsafe_pause_cpus();
2804 
2805 	/*
2806 	 * Quiesce interrupts on the target CPU. We do this by setting
2807 	 * the CPU 'not ready'- (i.e. removing the CPU from cpu_ready_set) to
2808 	 * prevent it from receiving cross calls and cross traps.
2809 	 * This prevents the processor from receiving any new soft interrupts.
2810 	 */
2811 	mp_cpu_quiesce(cp);
2812 
2813 	rv = prom_stopcpu_bycpuid(cpuid);
2814 	if (rv == 0)
2815 		cp->cpu_flags = CPU_OFFLINE | CPU_QUIESCED | CPU_POWEROFF;
2816 
2817 	start_cpus();
2818 
2819 	if (rv == 0) {
2820 		int bnum, onb_core_num, strand_id;
2821 		drmach_board_t *bp;
2822 
2823 		CPU_SIGNATURE(OS_SIG, SIGST_DETACHED, SIGSUBST_NULL, cpuid);
2824 
2825 		bnum = LSB_ID(cpuid);
2826 		onb_core_num = ON_BOARD_CORE_NUM(cpuid);
2827 		strand_id = STRAND_ID(cpuid);
2828 		bp = drmach_get_board_by_bnum(bnum);
2829 		ASSERT(bp);
2830 
2831 		bp->cores[onb_core_num].core_started &= ~(1 << strand_id);
2832 		if (bp->cores[onb_core_num].core_started == 0) {
2833 			if (drmach_add_remove_cpu(bnum, onb_core_num,
2834 			    HOTREMOVE_CPU) != 0) {
2835 				cmn_err(CE_WARN, "Failed to remove CMP %d LSB "
2836 				    "%d\n", onb_core_num, bnum);
2837 				return (EIO);
2838 			}
2839 		}
2840 	}
2841 
2842 	return (rv);
2843 }
2844 
2845 /*ARGSUSED*/
2846 int
2847 drmach_verify_sr(dev_info_t *dip, int sflag)
2848 {
2849 	return (0);
2850 }
2851 
2852 void
2853 drmach_suspend_last(void)
2854 {
2855 }
2856 
2857 void
2858 drmach_resume_first(void)
2859 {
2860 }
2861 
2862 /*
2863  * Log a DR sysevent.
2864  * Return value: 0 success, non-zero failure.
2865  */
2866 int
2867 drmach_log_sysevent(int board, char *hint, int flag, int verbose)
2868 {
2869 	sysevent_t			*ev;
2870 	sysevent_id_t			eid;
2871 	int				rv, km_flag;
2872 	sysevent_value_t		evnt_val;
2873 	sysevent_attr_list_t		*evnt_attr_list = NULL;
2874 	char				attach_pnt[MAXNAMELEN];
2875 
2876 	km_flag = (flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
2877 	attach_pnt[0] = '\0';
2878 	if (drmach_board_name(board, attach_pnt, MAXNAMELEN)) {
2879 		rv = -1;
2880 		goto logexit;
2881 	}
2882 	if (verbose) {
2883 		DRMACH_PR("drmach_log_sysevent: %s %s, flag: %d, verbose: %d\n",
2884 		    attach_pnt, hint, flag, verbose);
2885 	}
2886 
2887 	if ((ev = sysevent_alloc(EC_DR, ESC_DR_AP_STATE_CHANGE,
2888 	    SUNW_KERN_PUB"dr", km_flag)) == NULL) {
2889 		rv = -2;
2890 		goto logexit;
2891 	}
2892 	evnt_val.value_type = SE_DATA_TYPE_STRING;
2893 	evnt_val.value.sv_string = attach_pnt;
2894 	if ((rv = sysevent_add_attr(&evnt_attr_list, DR_AP_ID, &evnt_val,
2895 	    km_flag)) != 0)
2896 		goto logexit;
2897 
2898 	evnt_val.value_type = SE_DATA_TYPE_STRING;
2899 	evnt_val.value.sv_string = hint;
2900 	if ((rv = sysevent_add_attr(&evnt_attr_list, DR_HINT, &evnt_val,
2901 	    km_flag)) != 0) {
2902 		sysevent_free_attr(evnt_attr_list);
2903 		goto logexit;
2904 	}
2905 
2906 	(void) sysevent_attach_attributes(ev, evnt_attr_list);
2907 
2908 	/*
2909 	 * Log the event but do not sleep waiting for its
2910 	 * delivery. This provides insulation from syseventd.
2911 	 */
2912 	rv = log_sysevent(ev, SE_NOSLEEP, &eid);
2913 
2914 logexit:
2915 	if (ev)
2916 		sysevent_free(ev);
2917 	if ((rv != 0) && verbose)
2918 		cmn_err(CE_WARN, "drmach_log_sysevent failed (rv %d) for %s "
2919 		    " %s\n", rv, attach_pnt, hint);
2920 
2921 	return (rv);
2922 }
2923 
2924 #define	OPL_DR_STATUS_PROP "dr-status"
2925 
2926 static int
2927 opl_check_dr_status()
2928 {
2929 	pnode_t	node;
2930 	int	rtn, len;
2931 	char	*str;
2932 
2933 	node = prom_rootnode();
2934 	if (node == OBP_BADNODE) {
2935 		return (1);
2936 	}
2937 
2938 	len = prom_getproplen(node, OPL_DR_STATUS_PROP);
2939 	if (len == -1) {
2940 		/*
2941 		 * dr-status doesn't exist when DR is activated and
2942 		 * any warning messages aren't needed.
2943 		 */
2944 		return (1);
2945 	}
2946 
2947 	str = (char *)kmem_zalloc(len+1, KM_SLEEP);
2948 	rtn = prom_getprop(node, OPL_DR_STATUS_PROP, str);
2949 	kmem_free(str, len + 1);
2950 	if (rtn == -1) {
2951 		return (1);
2952 	} else {
2953 		return (0);
2954 	}
2955 }
2956 
2957 /* we are allocating memlist from TLB locked pages to avoid tlbmisses */
2958 
2959 static struct memlist *
2960 drmach_memlist_add_span(drmach_copy_rename_program_t *p,
2961 	struct memlist *mlist, uint64_t base, uint64_t len)
2962 {
2963 	struct memlist	*ml, *tl, *nl;
2964 
2965 	if (len == 0ull)
2966 		return (NULL);
2967 
2968 	if (mlist == NULL) {
2969 		mlist = p->free_mlist;
2970 		if (mlist == NULL)
2971 			return (NULL);
2972 		p->free_mlist = mlist->ml_next;
2973 		mlist->ml_address = base;
2974 		mlist->ml_size = len;
2975 		mlist->ml_next = mlist->ml_prev = NULL;
2976 
2977 		return (mlist);
2978 	}
2979 
2980 	for (tl = ml = mlist; ml; tl = ml, ml = ml->ml_next) {
2981 		if (base < ml->ml_address) {
2982 			if ((base + len) < ml->ml_address) {
2983 				nl = p->free_mlist;
2984 				if (nl == NULL)
2985 					return (NULL);
2986 				p->free_mlist = nl->ml_next;
2987 				nl->ml_address = base;
2988 				nl->ml_size = len;
2989 				nl->ml_next = ml;
2990 				if ((nl->ml_prev = ml->ml_prev) != NULL)
2991 					nl->ml_prev->ml_next = nl;
2992 				ml->ml_prev = nl;
2993 				if (mlist == ml)
2994 					mlist = nl;
2995 			} else {
2996 				ml->ml_size = MAX((base + len),
2997 				    (ml->ml_address + ml->ml_size)) - base;
2998 				ml->ml_address = base;
2999 			}
3000 			break;
3001 
3002 		} else if (base <= (ml->ml_address + ml->ml_size)) {
3003 			ml->ml_size =
3004 			    MAX((base + len), (ml->ml_address + ml->ml_size)) -
3005 			    MIN(ml->ml_address, base);
3006 			ml->ml_address = MIN(ml->ml_address, base);
3007 			break;
3008 		}
3009 	}
3010 	if (ml == NULL) {
3011 		nl = p->free_mlist;
3012 		if (nl == NULL)
3013 			return (NULL);
3014 		p->free_mlist = nl->ml_next;
3015 		nl->ml_address = base;
3016 		nl->ml_size = len;
3017 		nl->ml_next = NULL;
3018 		nl->ml_prev = tl;
3019 		tl->ml_next = nl;
3020 	}
3021 
3022 	return (mlist);
3023 }
3024 
3025 /*
3026  * The routine performs the necessary memory COPY and MC adr SWITCH.
3027  * Both operations MUST be at the same "level" so that the stack is
3028  * maintained correctly between the copy and switch.  The switch
3029  * portion implements a caching mechanism to guarantee the code text
3030  * is cached prior to execution.  This is to guard against possible
3031  * memory access while the MC adr's are being modified.
3032  *
3033  * IMPORTANT: The _drmach_copy_rename_end() function must immediately
3034  * follow drmach_copy_rename_prog__relocatable() so that the correct
3035  * "length" of the drmach_copy_rename_prog__relocatable can be
3036  * calculated.  This routine MUST be a LEAF function, i.e. it can
3037  * make NO function calls, primarily for two reasons:
3038  *
3039  *	1. We must keep the stack consistent across the "switch".
3040  *	2. Function calls are compiled to relative offsets, and
3041  *	   we execute this function we'll be executing it from
3042  *	   a copied version in a different area of memory, thus
3043  *	   the relative offsets will be bogus.
3044  *
3045  * Moreover, it must have the "__relocatable" suffix to inform DTrace
3046  * providers (and anything else, for that matter) that this
3047  * function's text is manually relocated elsewhere before it is
3048  * executed.  That is, it cannot be safely instrumented with any
3049  * methodology that is PC-relative.
3050  */
3051 
3052 /*
3053  * We multiply this to system_clock_frequency so we
3054  * are setting a delay of fmem_timeout second for
3055  * the rename command.
3056  *
3057  * FMEM command itself should complete within 15 sec.
3058  * We add 2 more sec to be conservative.
3059  *
3060  * Note that there is also a SCF BUSY bit checking
3061  * in drmach_asm.s right before FMEM command is
3062  * issued.  XSCF sets the SCF BUSY bit when the
3063  * other domain on the same PSB reboots and it
3064  * will not be able to service the FMEM command
3065  * within 15 sec.   After setting the SCF BUSY
3066  * bit, XSCF will wait a while before servicing
3067  * other reboot command so there is no race
3068  * condition.
3069  */
3070 
3071 static int	fmem_timeout = 17;
3072 
3073 /*
3074  *	The empirical data on some OPL system shows that
3075  *	we can copy 250 MB per second.  We set it to
3076  * 	80 MB to be conservative.  In normal case,
3077  *	this timeout does not affect anything.
3078  */
3079 
3080 static int	min_copy_size_per_sec = 80 * 1024 * 1024;
3081 
3082 /*
3083  *	This is the timeout value for the xcall synchronization
3084  *	to get all the CPU ready to do the parallel copying.
3085  *	Even on a fully loaded system, 10 sec. should be long
3086  *	enough.
3087  */
3088 
3089 static int	cpu_xcall_delay = 10;
3090 int drmach_disable_mcopy = 0;
3091 
3092 /*
3093  * The following delay loop executes sleep instruction to yield the
3094  * CPU to other strands.  If this is not done, some strand will tie
3095  * up the CPU in busy loops while the other strand cannot do useful
3096  * work.  The copy procedure will take a much longer time without this.
3097  */
3098 #define	DR_DELAY_IL(ms, freq)					\
3099 	{							\
3100 		uint64_t start;					\
3101 		uint64_t nstick;				\
3102 		volatile uint64_t now;				\
3103 		nstick = ((uint64_t)ms * freq)/1000;		\
3104 		start = drmach_get_stick_il();			\
3105 		now = start;					\
3106 		while ((now - start) <= nstick) {		\
3107 			drmach_sleep_il();			\
3108 			now = drmach_get_stick_il();		\
3109 		}						\
3110 	}
3111 
3112 /* Each loop is 2ms, timeout at 1000ms */
3113 static int drmach_copy_rename_timeout = 500;
3114 
3115 static int
3116 drmach_copy_rename_prog__relocatable(drmach_copy_rename_program_t *prog,
3117 	int cpuid)
3118 {
3119 	struct memlist		*ml;
3120 	register int		rtn;
3121 	int			i;
3122 	register uint64_t	curr, limit;
3123 	extern uint64_t		drmach_get_stick_il();
3124 	extern void		membar_sync_il();
3125 	extern void		flush_instr_mem_il(void*);
3126 	extern void		flush_windows_il(void);
3127 	uint64_t		copy_start;
3128 
3129 	/*
3130 	 * flush_windows is moved here to make sure all
3131 	 * registers used in the callers are flushed to
3132 	 * memory before the copy.
3133 	 *
3134 	 * If flush_windows() is called too early in the
3135 	 * calling function, the compiler might put some
3136 	 * data in the local registers after flush_windows().
3137 	 * After FMA, if there is any fill trap, the registers
3138 	 * will contain stale data.
3139 	 */
3140 
3141 	flush_windows_il();
3142 
3143 	prog->critical->stat[cpuid] = FMEM_LOOP_COPY_READY;
3144 	membar_sync_il();
3145 
3146 	if (prog->data->cpuid == cpuid) {
3147 		limit = drmach_get_stick_il();
3148 		limit += cpu_xcall_delay * system_clock_freq;
3149 		for (i = 0; i < NCPU; i++) {
3150 			if (CPU_IN_SET(prog->data->cpu_slave_set, i)) {
3151 				/* wait for all CPU's to be ready */
3152 				for (;;) {
3153 					if (prog->critical->stat[i] ==
3154 					    FMEM_LOOP_COPY_READY) {
3155 						break;
3156 					}
3157 					DR_DELAY_IL(1, prog->data->stick_freq);
3158 				}
3159 				curr = drmach_get_stick_il();
3160 				if (curr > limit) {
3161 					prog->data->fmem_status.error =
3162 					    EOPL_FMEM_XC_TIMEOUT;
3163 					return (EOPL_FMEM_XC_TIMEOUT);
3164 				}
3165 			}
3166 		}
3167 		prog->data->fmem_status.stat = FMEM_LOOP_COPY_READY;
3168 		membar_sync_il();
3169 		copy_start = drmach_get_stick_il();
3170 	} else {
3171 		for (;;) {
3172 			if (prog->data->fmem_status.stat ==
3173 			    FMEM_LOOP_COPY_READY) {
3174 				break;
3175 			}
3176 			if (prog->data->fmem_status.error) {
3177 				prog->data->error[cpuid] = EOPL_FMEM_TERMINATE;
3178 				return (EOPL_FMEM_TERMINATE);
3179 			}
3180 			DR_DELAY_IL(1, prog->data->stick_freq);
3181 		}
3182 	}
3183 
3184 	/*
3185 	 * DO COPY.
3186 	 */
3187 	if (CPU_IN_SET(prog->data->cpu_copy_set, cpuid)) {
3188 		for (ml = prog->data->cpu_ml[cpuid]; ml; ml = ml->ml_next) {
3189 			uint64_t	s_pa, t_pa;
3190 			uint64_t	nbytes;
3191 
3192 			s_pa = prog->data->s_copybasepa + ml->ml_address;
3193 			t_pa = prog->data->t_copybasepa + ml->ml_address;
3194 			nbytes = ml->ml_size;
3195 
3196 			while (nbytes != 0ull) {
3197 				/*
3198 				 * If the master has detected error, we just
3199 				 * bail out
3200 				 */
3201 				if (prog->data->fmem_status.error !=
3202 				    ESBD_NOERROR) {
3203 					prog->data->error[cpuid] =
3204 					    EOPL_FMEM_TERMINATE;
3205 					return (EOPL_FMEM_TERMINATE);
3206 				}
3207 				/*
3208 				 * This copy does NOT use an ASI
3209 				 * that avoids the Ecache, therefore
3210 				 * the dst_pa addresses may remain
3211 				 * in our Ecache after the dst_pa
3212 				 * has been removed from the system.
3213 				 * A subsequent write-back to memory
3214 				 * will cause an ARB-stop because the
3215 				 * physical address no longer exists
3216 				 * in the system. Therefore we must
3217 				 * flush out local Ecache after we
3218 				 * finish the copy.
3219 				 */
3220 
3221 				/* copy 32 bytes at src_pa to dst_pa */
3222 				bcopy32_il(s_pa, t_pa);
3223 
3224 				/*
3225 				 * increment the counter to signal that we are
3226 				 * alive
3227 				 */
3228 				prog->stat->nbytes[cpuid] += 32;
3229 
3230 				/* increment by 32 bytes */
3231 				s_pa += (4 * sizeof (uint64_t));
3232 				t_pa += (4 * sizeof (uint64_t));
3233 
3234 				/* decrement by 32 bytes */
3235 				nbytes -= (4 * sizeof (uint64_t));
3236 			}
3237 		}
3238 		prog->critical->stat[cpuid] = FMEM_LOOP_COPY_DONE;
3239 		membar_sync_il();
3240 	}
3241 
3242 	/*
3243 	 * Since bcopy32_il() does NOT use an ASI to bypass
3244 	 * the Ecache, we need to flush our Ecache after
3245 	 * the copy is complete.
3246 	 */
3247 	flush_cache_il();
3248 
3249 	/*
3250 	 * drmach_fmem_exec_script()
3251 	 */
3252 	if (prog->data->cpuid == cpuid) {
3253 		uint64_t	last, now;
3254 
3255 		limit = copy_start + prog->data->copy_delay;
3256 		for (i = 0; i < NCPU; i++) {
3257 			if (!CPU_IN_SET(prog->data->cpu_slave_set, i))
3258 				continue;
3259 
3260 			for (;;) {
3261 				/*
3262 				 * we get FMEM_LOOP_FMEM_READY in
3263 				 * normal case
3264 				 */
3265 				if (prog->critical->stat[i] ==
3266 				    FMEM_LOOP_FMEM_READY) {
3267 					break;
3268 				}
3269 				/* got error traps */
3270 				if (prog->data->error[i] ==
3271 				    EOPL_FMEM_COPY_ERROR) {
3272 					prog->data->fmem_status.error =
3273 					    EOPL_FMEM_COPY_ERROR;
3274 					return (EOPL_FMEM_COPY_ERROR);
3275 				}
3276 				/*
3277 				 * if we have not reached limit, wait
3278 				 * more
3279 				 */
3280 				curr = drmach_get_stick_il();
3281 				if (curr <= limit)
3282 					continue;
3283 
3284 				prog->data->slowest_cpuid = i;
3285 				prog->data->copy_wait_time = curr - copy_start;
3286 
3287 				/* now check if slave is alive */
3288 				last = prog->stat->nbytes[i];
3289 
3290 				DR_DELAY_IL(1, prog->data->stick_freq);
3291 
3292 				now = prog->stat->nbytes[i];
3293 				if (now <= last) {
3294 					/*
3295 					 * no progress, perhaps just
3296 					 * finished
3297 					 */
3298 					DR_DELAY_IL(1, prog->data->stick_freq);
3299 					if (prog->critical->stat[i] ==
3300 					    FMEM_LOOP_FMEM_READY)
3301 						break;
3302 					/* copy error */
3303 					if (prog->data->error[i] ==
3304 					    EOPL_FMEM_COPY_ERROR) {
3305 						prog->data-> fmem_status.error =
3306 						    EOPL_FMEM_COPY_ERROR;
3307 						return (EOPL_FMEM_COPY_ERROR);
3308 					}
3309 
3310 					prog->data->copy_rename_count++;
3311 					if (prog->data->copy_rename_count
3312 					    < drmach_copy_rename_timeout) {
3313 						continue;
3314 					} else {
3315 						prog->data->fmem_status.error =
3316 						    EOPL_FMEM_COPY_TIMEOUT;
3317 						return (EOPL_FMEM_COPY_TIMEOUT);
3318 					}
3319 				}
3320 			}
3321 		}
3322 
3323 		prog->critical->stat[cpuid] = FMEM_LOOP_FMEM_READY;
3324 		prog->data->fmem_status.stat  = FMEM_LOOP_FMEM_READY;
3325 
3326 		membar_sync_il();
3327 		flush_instr_mem_il((void*) (prog->critical));
3328 		/*
3329 		 * drmach_fmem_exec_script()
3330 		 */
3331 		rtn = prog->critical->fmem((void *)prog->critical, PAGESIZE);
3332 		return (rtn);
3333 	} else {
3334 		flush_instr_mem_il((void*) (prog->critical));
3335 		/*
3336 		 * drmach_fmem_loop_script()
3337 		 */
3338 		rtn = prog->critical->loop((void *)(prog->critical), PAGESIZE,
3339 		    (void *)&(prog->critical->stat[cpuid]));
3340 		prog->data->error[cpuid] = rtn;
3341 		/* slave thread does not care the rv */
3342 		return (0);
3343 	}
3344 }
3345 
3346 static void
3347 drmach_copy_rename_end(void)
3348 {
3349 	/*
3350 	 * IMPORTANT:	This function's location MUST be located immediately
3351 	 *		following drmach_copy_rename_prog__relocatable to
3352 	 *		accurately estimate its size.  Note that this assumes
3353 	 *		the compiler keeps these functions in the order in
3354 	 *		which they appear :-o
3355 	 */
3356 }
3357 
3358 
3359 static int
3360 drmach_setup_memlist(drmach_copy_rename_program_t *p)
3361 {
3362 	struct memlist *ml;
3363 	caddr_t buf;
3364 	int nbytes, s, n_elements;
3365 
3366 	nbytes = PAGESIZE;
3367 	n_elements = 0;
3368 	s = roundup(sizeof (struct memlist), sizeof (void *));
3369 	p->free_mlist = NULL;
3370 	buf = p->memlist_buffer;
3371 	while (nbytes >= sizeof (struct memlist)) {
3372 		ml = (struct memlist *)buf;
3373 		ml->ml_next = p->free_mlist;
3374 		p->free_mlist = ml;
3375 		buf += s;
3376 		n_elements++;
3377 		nbytes -= s;
3378 	}
3379 	return (n_elements);
3380 }
3381 
3382 static void
3383 drmach_lock_critical(caddr_t va, caddr_t new_va)
3384 {
3385 	tte_t tte;
3386 	int i;
3387 
3388 	kpreempt_disable();
3389 
3390 	for (i = 0; i < DRMACH_FMEM_LOCKED_PAGES; i++) {
3391 		vtag_flushpage(new_va, (uint64_t)ksfmmup);
3392 		sfmmu_memtte(&tte, va_to_pfn(va), PROC_DATA|HAT_NOSYNC, TTE8K);
3393 		tte.tte_intlo |= TTE_LCK_INT;
3394 		sfmmu_dtlb_ld_kva(new_va, &tte);
3395 		sfmmu_itlb_ld_kva(new_va, &tte);
3396 		va += PAGESIZE;
3397 		new_va += PAGESIZE;
3398 	}
3399 }
3400 
3401 static void
3402 drmach_unlock_critical(caddr_t va)
3403 {
3404 	int i;
3405 
3406 	for (i = 0; i < DRMACH_FMEM_LOCKED_PAGES; i++) {
3407 		vtag_flushpage(va, (uint64_t)ksfmmup);
3408 		va += PAGESIZE;
3409 	}
3410 
3411 	kpreempt_enable();
3412 }
3413 
3414 sbd_error_t *
3415 drmach_copy_rename_init(drmachid_t t_id, drmachid_t s_id,
3416 	struct memlist *c_ml, drmachid_t *pgm_id)
3417 {
3418 	drmach_mem_t	*s_mem;
3419 	drmach_mem_t	*t_mem;
3420 	struct memlist	*x_ml;
3421 	uint64_t	s_copybasepa, t_copybasepa;
3422 	uint_t		len;
3423 	caddr_t		bp, wp;
3424 	int		s_bd, t_bd, cpuid, active_cpus, i;
3425 	int		max_elms, mlist_size, rv;
3426 	uint64_t	c_addr;
3427 	size_t		c_size, copy_sz, sz;
3428 	extern void	drmach_fmem_loop_script();
3429 	extern void	drmach_fmem_loop_script_rtn();
3430 	extern int	drmach_fmem_exec_script();
3431 	extern void	drmach_fmem_exec_script_end();
3432 	sbd_error_t	*err;
3433 	drmach_copy_rename_program_t *prog = NULL;
3434 	drmach_copy_rename_program_t *prog_kmem = NULL;
3435 	void		(*mc_suspend)(void);
3436 	void		(*mc_resume)(void);
3437 	int		(*scf_fmem_start)(int, int);
3438 	int		(*scf_fmem_end)(void);
3439 	int		(*scf_fmem_cancel)(void);
3440 	uint64_t	(*scf_get_base_addr)(void);
3441 
3442 	if (!DRMACH_IS_MEM_ID(s_id))
3443 		return (drerr_new(0, EOPL_INAPPROP, NULL));
3444 	if (!DRMACH_IS_MEM_ID(t_id))
3445 		return (drerr_new(0, EOPL_INAPPROP, NULL));
3446 
3447 	for (i = 0; i < NCPU; i++) {
3448 		int lsb_id, onb_core_num, strand_id;
3449 		drmach_board_t *bp;
3450 
3451 		/*
3452 		 * this kind of CPU will spin in cache
3453 		 */
3454 		if (CPU_IN_SET(cpu_ready_set, i))
3455 			continue;
3456 
3457 		/*
3458 		 * Now check for any inactive CPU's that
3459 		 * have been hotadded.  This can only occur in
3460 		 * error condition in drmach_cpu_poweron().
3461 		 */
3462 		lsb_id = LSB_ID(i);
3463 		onb_core_num = ON_BOARD_CORE_NUM(i);
3464 		strand_id = STRAND_ID(i);
3465 		bp = drmach_get_board_by_bnum(lsb_id);
3466 		if (bp == NULL)
3467 			continue;
3468 		if (bp->cores[onb_core_num].core_hotadded &
3469 		    (1 << strand_id)) {
3470 			if (!(bp->cores[onb_core_num].core_started &
3471 			    (1 << strand_id))) {
3472 				return (drerr_new(1, EOPL_CPU_STATE, NULL));
3473 			}
3474 		}
3475 	}
3476 
3477 	mc_suspend = (void (*)(void))
3478 	    modgetsymvalue("opl_mc_suspend", 0);
3479 	mc_resume = (void (*)(void))
3480 	    modgetsymvalue("opl_mc_resume", 0);
3481 
3482 	if (mc_suspend == NULL || mc_resume == NULL) {
3483 		return (drerr_new(1, EOPL_MC_OPL, NULL));
3484 	}
3485 
3486 	scf_fmem_start = (int (*)(int, int))
3487 	    modgetsymvalue("scf_fmem_start", 0);
3488 	if (scf_fmem_start == NULL) {
3489 		return (drerr_new(1, EOPL_SCF_FMEM, NULL));
3490 	}
3491 	scf_fmem_end = (int (*)(void))
3492 	    modgetsymvalue("scf_fmem_end", 0);
3493 	if (scf_fmem_end == NULL) {
3494 		return (drerr_new(1, EOPL_SCF_FMEM, NULL));
3495 	}
3496 	scf_fmem_cancel = (int (*)(void))
3497 	    modgetsymvalue("scf_fmem_cancel", 0);
3498 	if (scf_fmem_cancel == NULL) {
3499 		return (drerr_new(1, EOPL_SCF_FMEM, NULL));
3500 	}
3501 	scf_get_base_addr = (uint64_t (*)(void))
3502 	    modgetsymvalue("scf_get_base_addr", 0);
3503 	if (scf_get_base_addr == NULL) {
3504 		return (drerr_new(1, EOPL_SCF_FMEM, NULL));
3505 	}
3506 	s_mem = s_id;
3507 	t_mem = t_id;
3508 
3509 	s_bd = s_mem->dev.bp->bnum;
3510 	t_bd = t_mem->dev.bp->bnum;
3511 
3512 	/* calculate source and target base pa */
3513 
3514 	s_copybasepa = s_mem->slice_base;
3515 	t_copybasepa = t_mem->slice_base;
3516 
3517 	/* adjust copy memlist addresses to be relative to copy base pa */
3518 	x_ml = c_ml;
3519 	mlist_size = 0;
3520 	while (x_ml != NULL) {
3521 		x_ml->ml_address -= s_copybasepa;
3522 		x_ml = x_ml->ml_next;
3523 		mlist_size++;
3524 	}
3525 
3526 	/*
3527 	 * bp will be page aligned, since we're calling
3528 	 * kmem_zalloc() with an exact multiple of PAGESIZE.
3529 	 */
3530 
3531 	prog_kmem = (drmach_copy_rename_program_t *)kmem_zalloc(
3532 	    DRMACH_FMEM_LOCKED_PAGES * PAGESIZE, KM_SLEEP);
3533 
3534 	prog_kmem->prog = prog_kmem;
3535 
3536 	/*
3537 	 * To avoid MTLB hit, we allocate a new VM space and remap
3538 	 * the kmem_alloc buffer to that address.  This solves
3539 	 * 2 problems we found:
3540 	 * - the kmem_alloc buffer can be just a chunk inside
3541 	 *   a much larger, e.g. 4MB buffer and MTLB will occur
3542 	 *   if there are both a 4MB and a 8K TLB mapping to
3543 	 *   the same VA range.
3544 	 * - the kmem mapping got dropped into the TLB by other
3545 	 *   strands, unintentionally.
3546 	 * Note that the pointers like data, critical, memlist_buffer,
3547 	 * and stat inside the copy rename structure are mapped to this
3548 	 * alternate VM space so we must make sure we lock the TLB mapping
3549 	 * whenever we access data pointed to by these pointers.
3550 	 */
3551 
3552 	prog = prog_kmem->locked_prog = vmem_alloc(heap_arena,
3553 	    DRMACH_FMEM_LOCKED_PAGES * PAGESIZE, VM_SLEEP);
3554 	wp = bp = (caddr_t)prog;
3555 
3556 	/* Now remap prog_kmem to prog */
3557 	drmach_lock_critical((caddr_t)prog_kmem, (caddr_t)prog);
3558 
3559 	/* All pointers in prog are based on the alternate mapping */
3560 	prog->data = (drmach_copy_rename_data_t *)roundup(((uint64_t)prog +
3561 	    sizeof (drmach_copy_rename_program_t)), sizeof (void *));
3562 
3563 	ASSERT(((uint64_t)prog->data + sizeof (drmach_copy_rename_data_t))
3564 	    <= ((uint64_t)prog + PAGESIZE));
3565 
3566 	prog->critical = (drmach_copy_rename_critical_t *)
3567 	    (wp + DRMACH_FMEM_CRITICAL_PAGE * PAGESIZE);
3568 
3569 	prog->memlist_buffer = (caddr_t)(wp + DRMACH_FMEM_MLIST_PAGE *
3570 	    PAGESIZE);
3571 
3572 	prog->stat = (drmach_cr_stat_t *)(wp + DRMACH_FMEM_STAT_PAGE *
3573 	    PAGESIZE);
3574 
3575 	/* LINTED */
3576 	ASSERT(sizeof (drmach_cr_stat_t) <= ((DRMACH_FMEM_LOCKED_PAGES -
3577 	    DRMACH_FMEM_STAT_PAGE) * PAGESIZE));
3578 
3579 	prog->critical->scf_reg_base = (uint64_t)-1;
3580 	prog->critical->scf_td[0] = (s_bd & 0xff);
3581 	prog->critical->scf_td[1] = (t_bd & 0xff);
3582 	for (i = 2; i < 15; i++) {
3583 		prog->critical->scf_td[i]   = 0;
3584 	}
3585 	prog->critical->scf_td[15] = ((0xaa + s_bd + t_bd) & 0xff);
3586 
3587 	bp = (caddr_t)prog->critical;
3588 	len = sizeof (drmach_copy_rename_critical_t);
3589 	wp = (caddr_t)roundup((uint64_t)bp + len, sizeof (void *));
3590 
3591 	len = (uint_t)((ulong_t)drmach_copy_rename_end -
3592 	    (ulong_t)drmach_copy_rename_prog__relocatable);
3593 
3594 	/*
3595 	 * We always leave 1K nop's to prevent the processor from
3596 	 * speculative execution that causes memory access
3597 	 */
3598 	wp = wp + len + 1024;
3599 
3600 	len = (uint_t)((ulong_t)drmach_fmem_exec_script_end -
3601 	    (ulong_t)drmach_fmem_exec_script);
3602 	/* this is the entry point of the loop script */
3603 	wp = wp + len + 1024;
3604 
3605 	len = (uint_t)((ulong_t)drmach_fmem_exec_script -
3606 	    (ulong_t)drmach_fmem_loop_script);
3607 	wp = wp + len + 1024;
3608 
3609 	/* now we make sure there is 1K extra */
3610 
3611 	if ((wp - bp) > PAGESIZE) {
3612 		err = drerr_new(1, EOPL_FMEM_SETUP, NULL);
3613 		goto out;
3614 	}
3615 
3616 	bp = (caddr_t)prog->critical;
3617 	len = sizeof (drmach_copy_rename_critical_t);
3618 	wp = (caddr_t)roundup((uint64_t)bp + len, sizeof (void *));
3619 
3620 	prog->critical->run = (int (*)())(wp);
3621 	len = (uint_t)((ulong_t)drmach_copy_rename_end -
3622 	    (ulong_t)drmach_copy_rename_prog__relocatable);
3623 
3624 	bcopy((caddr_t)drmach_copy_rename_prog__relocatable, wp, len);
3625 
3626 	wp = (caddr_t)roundup((uint64_t)wp + len, 1024);
3627 
3628 	prog->critical->fmem = (int (*)())(wp);
3629 	len = (int)((ulong_t)drmach_fmem_exec_script_end -
3630 	    (ulong_t)drmach_fmem_exec_script);
3631 	bcopy((caddr_t)drmach_fmem_exec_script, wp, len);
3632 
3633 	len = (int)((ulong_t)drmach_fmem_exec_script_end -
3634 	    (ulong_t)drmach_fmem_exec_script);
3635 	wp = (caddr_t)roundup((uint64_t)wp + len, 1024);
3636 
3637 	prog->critical->loop = (int (*)())(wp);
3638 	len = (int)((ulong_t)drmach_fmem_exec_script -
3639 	    (ulong_t)drmach_fmem_loop_script);
3640 	bcopy((caddr_t)drmach_fmem_loop_script, (void *)wp, len);
3641 	len = (int)((ulong_t)drmach_fmem_loop_script_rtn-
3642 	    (ulong_t)drmach_fmem_loop_script);
3643 	prog->critical->loop_rtn = (void (*)()) (wp+len);
3644 
3645 	prog->data->fmem_status.error = ESBD_NOERROR;
3646 
3647 	/* now we are committed, call SCF, soft suspend mac patrol */
3648 	if ((*scf_fmem_start)(s_bd, t_bd)) {
3649 		err = drerr_new(1, EOPL_SCF_FMEM_START, NULL);
3650 		goto out;
3651 	}
3652 	prog->data->scf_fmem_end = scf_fmem_end;
3653 	prog->data->scf_fmem_cancel = scf_fmem_cancel;
3654 	prog->data->scf_get_base_addr = scf_get_base_addr;
3655 	prog->data->fmem_status.op |= OPL_FMEM_SCF_START;
3656 
3657 	/* soft suspend mac patrol */
3658 	(*mc_suspend)();
3659 	prog->data->fmem_status.op |= OPL_FMEM_MC_SUSPEND;
3660 	prog->data->mc_resume = mc_resume;
3661 
3662 	prog->critical->inst_loop_ret  =
3663 	    *(uint64_t *)(prog->critical->loop_rtn);
3664 
3665 	/*
3666 	 * 0x30800000 is op code "ba,a	+0"
3667 	 */
3668 
3669 	*(uint_t *)(prog->critical->loop_rtn) = (uint_t)(0x30800000);
3670 
3671 	/*
3672 	 * set the value of SCF FMEM TIMEOUT
3673 	 */
3674 	prog->critical->delay = fmem_timeout * system_clock_freq;
3675 
3676 	prog->data->s_mem = (drmachid_t)s_mem;
3677 	prog->data->t_mem = (drmachid_t)t_mem;
3678 
3679 	cpuid = CPU->cpu_id;
3680 	prog->data->cpuid = cpuid;
3681 	prog->data->cpu_ready_set = cpu_ready_set;
3682 	prog->data->cpu_slave_set = cpu_ready_set;
3683 	prog->data->slowest_cpuid = (processorid_t)-1;
3684 	prog->data->copy_wait_time = 0;
3685 	prog->data->copy_rename_count = 0;
3686 	CPUSET_DEL(prog->data->cpu_slave_set, cpuid);
3687 
3688 	for (i = 0; i < NCPU; i++) {
3689 		prog->data->cpu_ml[i] = NULL;
3690 	}
3691 
3692 	/*
3693 	 * max_elms -	max number of memlist structures that
3694 	 * 		may be allocated for the CPU memory list.
3695 	 *		If there are too many memory span (because
3696 	 *		of fragmentation) than number of memlist
3697 	 *		available, we should return error.
3698 	 */
3699 	max_elms = drmach_setup_memlist(prog);
3700 	if (max_elms < mlist_size) {
3701 		err = drerr_new(1, EOPL_FMEM_SETUP, NULL);
3702 		goto err_out;
3703 	}
3704 
3705 	active_cpus = 0;
3706 	if (drmach_disable_mcopy) {
3707 		active_cpus = 1;
3708 		CPUSET_ADD(prog->data->cpu_copy_set, cpuid);
3709 	} else {
3710 		int max_cpu_num;
3711 		/*
3712 		 * The parallel copy procedure is going to split some
3713 		 * of the elements of the original memory copy list.
3714 		 * The number of added elements can be up to
3715 		 * (max_cpu_num - 1).  It means that max_cpu_num
3716 		 * should satisfy the following condition:
3717 		 * (max_cpu_num - 1) + mlist_size <= max_elms.
3718 		 */
3719 		max_cpu_num = max_elms - mlist_size + 1;
3720 
3721 		for (i = 0; i < NCPU; i++) {
3722 			if (CPU_IN_SET(cpu_ready_set, i) &&
3723 			    CPU_ACTIVE(cpu[i])) {
3724 				/*
3725 				 * To reduce the level-2 cache contention only
3726 				 * one strand per core will participate
3727 				 * in the copy. If the strand with even cpu_id
3728 				 * number is present in the ready set, we will
3729 				 * include this strand in the copy set. If it
3730 				 * is not present in the ready set, we check for
3731 				 * the strand with the consecutive odd cpu_id
3732 				 * and include it, provided that it is
3733 				 * present in the ready set.
3734 				 */
3735 				if (!(i & 0x1) ||
3736 				    !CPU_IN_SET(prog->data->cpu_copy_set,
3737 				    i - 1)) {
3738 					CPUSET_ADD(prog->data->cpu_copy_set, i);
3739 					active_cpus++;
3740 					/*
3741 					 * We cannot have more than
3742 					 * max_cpu_num CPUs in the copy
3743 					 * set, because each CPU has to
3744 					 * have at least one element
3745 					 * long memory copy list.
3746 					 */
3747 					if (active_cpus >= max_cpu_num)
3748 						break;
3749 
3750 				}
3751 			}
3752 		}
3753 	}
3754 
3755 	x_ml = c_ml;
3756 	sz = 0;
3757 	while (x_ml != NULL) {
3758 		sz += x_ml->ml_size;
3759 		x_ml = x_ml->ml_next;
3760 	}
3761 
3762 	copy_sz = sz/active_cpus;
3763 	copy_sz = roundup(copy_sz, MMU_PAGESIZE4M);
3764 
3765 	while (sz > copy_sz*active_cpus) {
3766 		copy_sz += MMU_PAGESIZE4M;
3767 	}
3768 
3769 	prog->data->stick_freq = system_clock_freq;
3770 	prog->data->copy_delay = ((copy_sz / min_copy_size_per_sec) + 2) *
3771 	    system_clock_freq;
3772 
3773 	x_ml = c_ml;
3774 	c_addr = x_ml->ml_address;
3775 	c_size = x_ml->ml_size;
3776 
3777 	for (i = 0; i < NCPU; i++) {
3778 		prog->stat->nbytes[i] = 0;
3779 		if (!CPU_IN_SET(prog->data->cpu_copy_set, i)) {
3780 			continue;
3781 		}
3782 		sz = copy_sz;
3783 
3784 		while (sz) {
3785 			if (c_size > sz) {
3786 				if ((prog->data->cpu_ml[i] =
3787 				    drmach_memlist_add_span(prog,
3788 				    prog->data->cpu_ml[i],
3789 				    c_addr, sz)) == NULL) {
3790 					cmn_err(CE_WARN,
3791 					    "Unexpected drmach_memlist_add_span"
3792 					    " failure.");
3793 					err = drerr_new(1, EOPL_FMEM_SETUP,
3794 					    NULL);
3795 					mc_resume();
3796 					goto out;
3797 				}
3798 				c_addr += sz;
3799 				c_size -= sz;
3800 				break;
3801 			} else {
3802 				sz -= c_size;
3803 				if ((prog->data->cpu_ml[i] =
3804 				    drmach_memlist_add_span(prog,
3805 				    prog->data->cpu_ml[i],
3806 				    c_addr, c_size)) == NULL) {
3807 					cmn_err(CE_WARN,
3808 					    "Unexpected drmach_memlist_add_span"
3809 					    " failure.");
3810 					err = drerr_new(1, EOPL_FMEM_SETUP,
3811 					    NULL);
3812 					mc_resume();
3813 					goto out;
3814 				}
3815 
3816 				x_ml = x_ml->ml_next;
3817 				if (x_ml != NULL) {
3818 					c_addr = x_ml->ml_address;
3819 					c_size = x_ml->ml_size;
3820 				} else {
3821 					goto end;
3822 				}
3823 			}
3824 		}
3825 	}
3826 end:
3827 	prog->data->s_copybasepa = s_copybasepa;
3828 	prog->data->t_copybasepa = t_copybasepa;
3829 	prog->data->c_ml = c_ml;
3830 	*pgm_id = prog_kmem;
3831 
3832 	/* Unmap the alternate space.  It will have to be remapped again */
3833 	drmach_unlock_critical((caddr_t)prog);
3834 	return (NULL);
3835 
3836 err_out:
3837 	mc_resume();
3838 	rv = (*prog->data->scf_fmem_cancel)();
3839 	if (rv) {
3840 		cmn_err(CE_WARN, "scf_fmem_cancel() failed rv=0x%x", rv);
3841 	}
3842 out:
3843 	if (prog != NULL) {
3844 		drmach_unlock_critical((caddr_t)prog);
3845 		vmem_free(heap_arena, prog, DRMACH_FMEM_LOCKED_PAGES *
3846 		    PAGESIZE);
3847 	}
3848 	if (prog_kmem != NULL) {
3849 		kmem_free(prog_kmem, DRMACH_FMEM_LOCKED_PAGES * PAGESIZE);
3850 	}
3851 	return (err);
3852 }
3853 
3854 sbd_error_t *
3855 drmach_copy_rename_fini(drmachid_t id)
3856 {
3857 	drmach_copy_rename_program_t	*prog = id;
3858 	sbd_error_t			*err = NULL;
3859 	int				rv;
3860 	uint_t				fmem_error;
3861 
3862 	/*
3863 	 * Note that we have to delay calling SCF to find out the
3864 	 * status of the FMEM operation here because SCF cannot
3865 	 * respond while it is suspended.
3866 	 * This create a small window when we are sure about the
3867 	 * base address of the system board.
3868 	 * If there is any call to mc-opl to get memory unum,
3869 	 * mc-opl will return UNKNOWN as the unum.
3870 	 */
3871 
3872 	/*
3873 	 * we have to remap again because all the pointer like data,
3874 	 * critical in prog are based on the alternate vmem space.
3875 	 */
3876 	(void) drmach_lock_critical((caddr_t)prog, (caddr_t)prog->locked_prog);
3877 
3878 	if (prog->data->c_ml != NULL)
3879 		memlist_delete(prog->data->c_ml);
3880 
3881 	if ((prog->data->fmem_status.op &
3882 	    (OPL_FMEM_SCF_START | OPL_FMEM_MC_SUSPEND)) !=
3883 	    (OPL_FMEM_SCF_START | OPL_FMEM_MC_SUSPEND)) {
3884 		cmn_err(CE_PANIC, "drmach_copy_rename_fini: invalid op "
3885 		    "code %x\n", prog->data->fmem_status.op);
3886 	}
3887 
3888 	fmem_error = prog->data->fmem_status.error;
3889 	if (fmem_error != ESBD_NOERROR) {
3890 		err = drerr_new(1, fmem_error, NULL);
3891 	}
3892 
3893 	/* possible ops are SCF_START, MC_SUSPEND */
3894 	if (prog->critical->fmem_issued) {
3895 		if (fmem_error != ESBD_NOERROR) {
3896 			cmn_err(CE_PANIC, "Irrecoverable FMEM error %d\n",
3897 			    fmem_error);
3898 		}
3899 		rv = (*prog->data->scf_fmem_end)();
3900 		if (rv) {
3901 			cmn_err(CE_PANIC, "scf_fmem_end() failed rv=%d", rv);
3902 		}
3903 		/*
3904 		 * If we get here, rename is successful.
3905 		 * Do all the copy rename post processing.
3906 		 */
3907 		drmach_swap_pa((drmach_mem_t *)prog->data->s_mem,
3908 		    (drmach_mem_t *)prog->data->t_mem);
3909 	} else {
3910 		rv = (*prog->data->scf_fmem_cancel)();
3911 		if (rv) {
3912 			cmn_err(CE_WARN, "scf_fmem_cancel() failed rv=0x%x",
3913 			    rv);
3914 			if (!err) {
3915 				err = drerr_new(1, EOPL_SCF_FMEM_CANCEL,
3916 				    "scf_fmem_cancel() failed. rv = 0x%x", rv);
3917 			}
3918 		}
3919 	}
3920 	/* soft resume mac patrol */
3921 	(*prog->data->mc_resume)();
3922 
3923 	drmach_unlock_critical((caddr_t)prog->locked_prog);
3924 
3925 	vmem_free(heap_arena, prog->locked_prog,
3926 	    DRMACH_FMEM_LOCKED_PAGES * PAGESIZE);
3927 	kmem_free(prog, DRMACH_FMEM_LOCKED_PAGES * PAGESIZE);
3928 	return (err);
3929 }
3930 
3931 /*ARGSUSED*/
3932 static void
3933 drmach_copy_rename_slave(struct regs *rp, drmachid_t id)
3934 {
3935 	drmach_copy_rename_program_t	*prog =
3936 	    (drmach_copy_rename_program_t *)id;
3937 	register int			cpuid;
3938 	extern void			drmach_flush();
3939 	extern void			membar_sync_il();
3940 	extern void			drmach_flush_icache();
3941 	on_trap_data_t			otd;
3942 
3943 	cpuid = CPU->cpu_id;
3944 
3945 	if (on_trap(&otd, OT_DATA_EC)) {
3946 		no_trap();
3947 		prog->data->error[cpuid] = EOPL_FMEM_COPY_ERROR;
3948 		prog->critical->stat[cpuid] = FMEM_LOOP_EXIT;
3949 		drmach_flush_icache();
3950 		membar_sync_il();
3951 		return;
3952 	}
3953 
3954 
3955 	/*
3956 	 * jmp drmach_copy_rename_prog().
3957 	 */
3958 
3959 	drmach_flush(prog->critical, PAGESIZE);
3960 	(void) prog->critical->run(prog, cpuid);
3961 	drmach_flush_icache();
3962 
3963 	no_trap();
3964 
3965 	prog->critical->stat[cpuid] = FMEM_LOOP_EXIT;
3966 
3967 	membar_sync_il();
3968 }
3969 
3970 static void
3971 drmach_swap_pa(drmach_mem_t *s_mem, drmach_mem_t *t_mem)
3972 {
3973 	uint64_t s_base, t_base;
3974 	drmach_board_t *s_board, *t_board;
3975 	struct memlist *ml;
3976 
3977 	s_board = s_mem->dev.bp;
3978 	t_board = t_mem->dev.bp;
3979 	if (s_board == NULL || t_board == NULL) {
3980 		cmn_err(CE_PANIC, "Cannot locate source or target board\n");
3981 		return;
3982 	}
3983 	s_base = s_mem->slice_base;
3984 	t_base = t_mem->slice_base;
3985 
3986 	s_mem->slice_base = t_base;
3987 	s_mem->base_pa = (s_mem->base_pa - s_base) + t_base;
3988 
3989 	for (ml = s_mem->memlist; ml; ml = ml->ml_next) {
3990 		ml->ml_address = ml->ml_address - s_base + t_base;
3991 	}
3992 
3993 	t_mem->slice_base = s_base;
3994 	t_mem->base_pa = (t_mem->base_pa - t_base) + s_base;
3995 
3996 	for (ml = t_mem->memlist; ml; ml = ml->ml_next) {
3997 		ml->ml_address = ml->ml_address - t_base + s_base;
3998 	}
3999 
4000 	/*
4001 	 * IKP has to update the sb-mem-ranges for mac patrol driver
4002 	 * when it resumes, it will re-read the sb-mem-range property
4003 	 * to get the new base address
4004 	 */
4005 	if (oplcfg_pa_swap(s_board->bnum, t_board->bnum) != 0)
4006 		cmn_err(CE_PANIC, "Could not update device nodes\n");
4007 }
4008 
4009 void
4010 drmach_copy_rename(drmachid_t id)
4011 {
4012 	drmach_copy_rename_program_t	*prog_kmem = id;
4013 	drmach_copy_rename_program_t	*prog;
4014 	cpuset_t	cpuset;
4015 	int		cpuid;
4016 	uint64_t	inst;
4017 	register int	rtn;
4018 	extern int	in_sync;
4019 	int		old_in_sync;
4020 	extern void	drmach_sys_trap();
4021 	extern void	drmach_flush();
4022 	extern void	drmach_flush_icache();
4023 	extern uint64_t	patch_inst(uint64_t *, uint64_t);
4024 	on_trap_data_t	otd;
4025 
4026 
4027 	prog = prog_kmem->locked_prog;
4028 
4029 
4030 	/*
4031 	 * We must immediately drop in the TLB because all pointers
4032 	 * are based on the alternate vmem space.
4033 	 */
4034 
4035 	(void) drmach_lock_critical((caddr_t)prog_kmem, (caddr_t)prog);
4036 
4037 	/*
4038 	 * we call scf to get the base address here becuase if scf
4039 	 * has not been suspended yet, the active path can be changing and
4040 	 * sometimes it is not even mapped.  We call the interface when
4041 	 * the OS has been quiesced.
4042 	 */
4043 	prog->critical->scf_reg_base = (*prog->data->scf_get_base_addr)();
4044 
4045 	if (prog->critical->scf_reg_base == (uint64_t)-1 ||
4046 	    prog->critical->scf_reg_base == NULL) {
4047 		prog->data->fmem_status.error = EOPL_FMEM_SCF_ERR;
4048 		drmach_unlock_critical((caddr_t)prog);
4049 		return;
4050 	}
4051 
4052 	cpuset = prog->data->cpu_ready_set;
4053 
4054 	for (cpuid = 0; cpuid < NCPU; cpuid++) {
4055 		if (CPU_IN_SET(cpuset, cpuid)) {
4056 			prog->critical->stat[cpuid] = FMEM_LOOP_START;
4057 			prog->data->error[cpuid] = ESBD_NOERROR;
4058 		}
4059 	}
4060 
4061 	old_in_sync = in_sync;
4062 	in_sync = 1;
4063 	cpuid = CPU->cpu_id;
4064 
4065 	CPUSET_DEL(cpuset, cpuid);
4066 
4067 	for (cpuid = 0; cpuid < NCPU; cpuid++) {
4068 		if (CPU_IN_SET(cpuset, cpuid)) {
4069 			xc_one(cpuid, (xcfunc_t *)drmach_lock_critical,
4070 			    (uint64_t)prog_kmem, (uint64_t)prog);
4071 		}
4072 	}
4073 
4074 	cpuid = CPU->cpu_id;
4075 
4076 	xt_some(cpuset, (xcfunc_t *)drmach_sys_trap,
4077 	    (uint64_t)drmach_copy_rename_slave, (uint64_t)prog);
4078 	xt_sync(cpuset);
4079 
4080 	if (on_trap(&otd, OT_DATA_EC)) {
4081 		rtn = EOPL_FMEM_COPY_ERROR;
4082 		drmach_flush_icache();
4083 		goto done;
4084 	}
4085 
4086 	/*
4087 	 * jmp drmach_copy_rename_prog().
4088 	 */
4089 
4090 	drmach_flush(prog->critical, PAGESIZE);
4091 	rtn = prog->critical->run(prog, cpuid);
4092 
4093 	drmach_flush_icache();
4094 
4095 
4096 done:
4097 	no_trap();
4098 	if (rtn == EOPL_FMEM_HW_ERROR) {
4099 		kpreempt_enable();
4100 		prom_panic("URGENT_ERROR_TRAP is detected during FMEM.\n");
4101 	}
4102 
4103 	/*
4104 	 * In normal case, all slave CPU's are still spinning in
4105 	 * the assembly code.  The master has to patch the instruction
4106 	 * to get them out.
4107 	 * In error case, e.g. COPY_ERROR, some slave CPU's might
4108 	 * have aborted and already returned and sset LOOP_EXIT status.
4109 	 * Some CPU might still be copying.
4110 	 * In any case, some delay is necessary to give them
4111 	 * enough time to set the LOOP_EXIT status.
4112 	 */
4113 
4114 	for (;;) {
4115 		inst = patch_inst((uint64_t *)prog->critical->loop_rtn,
4116 		    prog->critical->inst_loop_ret);
4117 		if (prog->critical->inst_loop_ret == inst) {
4118 			break;
4119 		}
4120 	}
4121 
4122 	for (cpuid = 0; cpuid < NCPU; cpuid++) {
4123 		uint64_t	last, now;
4124 		if (!CPU_IN_SET(cpuset, cpuid)) {
4125 			continue;
4126 		}
4127 		last = prog->stat->nbytes[cpuid];
4128 		/*
4129 		 * Wait for all CPU to exit.
4130 		 * However we do not want an infinite loop
4131 		 * so we detect hangup situation here.
4132 		 * If the slave CPU is still copying data,
4133 		 * we will continue to wait.
4134 		 * In error cases, the master has already set
4135 		 * fmem_status.error to abort the copying.
4136 		 * 1 m.s delay for them to abort copying and
4137 		 * return to drmach_copy_rename_slave to set
4138 		 * FMEM_LOOP_EXIT status should be enough.
4139 		 */
4140 		for (;;) {
4141 			if (prog->critical->stat[cpuid] == FMEM_LOOP_EXIT)
4142 				break;
4143 			drmach_sleep_il();
4144 			drv_usecwait(1000);
4145 			now = prog->stat->nbytes[cpuid];
4146 			if (now <= last) {
4147 				drv_usecwait(1000);
4148 				if (prog->critical->stat[cpuid] ==
4149 				    FMEM_LOOP_EXIT)
4150 					break;
4151 				cmn_err(CE_PANIC, "CPU %d hang during Copy "
4152 				    "Rename", cpuid);
4153 			}
4154 			last = now;
4155 		}
4156 		if (prog->data->error[cpuid] == EOPL_FMEM_HW_ERROR) {
4157 			prom_panic("URGENT_ERROR_TRAP is detected during "
4158 			    "FMEM.\n");
4159 		}
4160 	}
4161 
4162 	/*
4163 	 * This must be done after all strands have exit.
4164 	 * Removing the TLB entry will affect both strands
4165 	 * in the same core.
4166 	 */
4167 
4168 	for (cpuid = 0; cpuid < NCPU; cpuid++) {
4169 		if (CPU_IN_SET(cpuset, cpuid)) {
4170 			xc_one(cpuid, (xcfunc_t *)drmach_unlock_critical,
4171 			    (uint64_t)prog, 0);
4172 		}
4173 	}
4174 
4175 	in_sync = old_in_sync;
4176 
4177 	/*
4178 	 * we should unlock before the following lock to keep the kpreempt
4179 	 * count correct.
4180 	 */
4181 	(void) drmach_unlock_critical((caddr_t)prog);
4182 
4183 	/*
4184 	 * we must remap again.  TLB might have been removed in above xcall.
4185 	 */
4186 
4187 	(void) drmach_lock_critical((caddr_t)prog_kmem, (caddr_t)prog);
4188 
4189 	if (prog->data->fmem_status.error == ESBD_NOERROR)
4190 		prog->data->fmem_status.error = rtn;
4191 
4192 	if (prog->data->copy_wait_time > 0) {
4193 		DRMACH_PR("Unexpected long wait time %ld seconds "
4194 		    "during copy rename on CPU %d\n",
4195 		    prog->data->copy_wait_time/prog->data->stick_freq,
4196 		    prog->data->slowest_cpuid);
4197 	}
4198 	drmach_unlock_critical((caddr_t)prog);
4199 }
4200