xref: /titanic_41/usr/src/uts/sun4u/opl/io/drmach.c (revision f63f7506be0210195779706f51c58646e568cc40)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/debug.h>
29 #include <sys/types.h>
30 #include <sys/varargs.h>
31 #include <sys/errno.h>
32 #include <sys/cred.h>
33 #include <sys/dditypes.h>
34 #include <sys/devops.h>
35 #include <sys/modctl.h>
36 #include <sys/poll.h>
37 #include <sys/conf.h>
38 #include <sys/ddi.h>
39 #include <sys/sunddi.h>
40 #include <sys/sunndi.h>
41 #include <sys/ndi_impldefs.h>
42 #include <sys/stat.h>
43 #include <sys/kmem.h>
44 #include <sys/vmem.h>
45 #include <sys/opl_olympus_regs.h>
46 #include <sys/cpuvar.h>
47 #include <sys/cpupart.h>
48 #include <sys/mem_config.h>
49 #include <sys/ddi_impldefs.h>
50 #include <sys/systm.h>
51 #include <sys/machsystm.h>
52 #include <sys/autoconf.h>
53 #include <sys/cmn_err.h>
54 #include <sys/sysmacros.h>
55 #include <sys/x_call.h>
56 #include <sys/promif.h>
57 #include <sys/prom_plat.h>
58 #include <sys/membar.h>
59 #include <vm/seg_kmem.h>
60 #include <sys/mem_cage.h>
61 #include <sys/stack.h>
62 #include <sys/archsystm.h>
63 #include <vm/hat_sfmmu.h>
64 #include <sys/pte.h>
65 #include <sys/mmu.h>
66 #include <sys/cpu_module.h>
67 #include <sys/obpdefs.h>
68 #include <sys/note.h>
69 #include <sys/ontrap.h>
70 #include <sys/cpu_sgnblk_defs.h>
71 #include <sys/opl.h>
72 
73 
74 #include <sys/promimpl.h>
75 #include <sys/prom_plat.h>
76 #include <sys/kobj.h>
77 
78 #include <sys/sysevent.h>
79 #include <sys/sysevent/dr.h>
80 #include <sys/sysevent/eventdefs.h>
81 
82 #include <sys/drmach.h>
83 #include <sys/dr_util.h>
84 
85 #include <sys/fcode.h>
86 #include <sys/opl_cfg.h>
87 
88 extern void		bcopy32_il(uint64_t, uint64_t);
89 extern void		flush_cache_il(void);
90 extern void		drmach_sleep_il(void);
91 
92 typedef struct {
93 	struct drmach_node	*node;
94 	void			*data;
95 } drmach_node_walk_args_t;
96 
97 typedef struct drmach_node {
98 	void		*here;
99 
100 	pnode_t		(*get_dnode)(struct drmach_node *node);
101 	int		(*walk)(struct drmach_node *node, void *data,
102 				int (*cb)(drmach_node_walk_args_t *args));
103 	dev_info_t	*(*n_getdip)(struct drmach_node *node);
104 	int		(*n_getproplen)(struct drmach_node *node, char *name,
105 				int *len);
106 	int		(*n_getprop)(struct drmach_node *node, char *name,
107 				void *buf, int len);
108 	int		(*get_parent)(struct drmach_node *node,
109 				struct drmach_node *pnode);
110 } drmach_node_t;
111 
112 typedef struct {
113 	int		 min_index;
114 	int		 max_index;
115 	int		 arr_sz;
116 	drmachid_t	*arr;
117 } drmach_array_t;
118 
119 typedef struct {
120 	void		*isa;
121 
122 	void		(*dispose)(drmachid_t);
123 	sbd_error_t	*(*release)(drmachid_t);
124 	sbd_error_t	*(*status)(drmachid_t, drmach_status_t *);
125 
126 	char		 name[MAXNAMELEN];
127 } drmach_common_t;
128 
129 typedef	struct {
130 	uint32_t	core_present;
131 	uint32_t	core_hotadded;
132 	uint32_t	core_started;
133 } drmach_cmp_t;
134 
135 typedef struct {
136 	drmach_common_t	 cm;
137 	int		 bnum;
138 	int		 assigned;
139 	int		 powered;
140 	int		 connected;
141 	int		 cond;
142 	drmach_node_t	*tree;
143 	drmach_array_t	*devices;
144 	int		boot_board;	/* if board exists on bootup */
145 	drmach_cmp_t	cores[OPL_MAX_COREID_PER_BOARD];
146 } drmach_board_t;
147 
148 typedef struct {
149 	drmach_common_t	 cm;
150 	drmach_board_t	*bp;
151 	int		 unum;
152 	int		portid;
153 	int		 busy;
154 	int		 powered;
155 	const char	*type;
156 	drmach_node_t	*node;
157 } drmach_device_t;
158 
159 typedef struct drmach_cpu {
160 	drmach_device_t  dev;
161 	processorid_t    cpuid;
162 	int		sb;
163 	int		chipid;
164 	int		coreid;
165 	int		strandid;
166 	int		status;
167 #define	OPL_CPU_HOTADDED	1
168 } drmach_cpu_t;
169 
170 typedef struct drmach_mem {
171 	drmach_device_t  dev;
172 	uint64_t	slice_base;
173 	uint64_t	slice_size;
174 	uint64_t	base_pa;	/* lowest installed memory base */
175 	uint64_t	nbytes;		/* size of installed memory */
176 	struct memlist *memlist;
177 } drmach_mem_t;
178 
179 typedef struct drmach_io {
180 	drmach_device_t  dev;
181 	int	channel;
182 	int	leaf;
183 } drmach_io_t;
184 
185 typedef struct drmach_domain_info {
186 	uint32_t	floating;
187 	int		allow_dr;
188 } drmach_domain_info_t;
189 
190 drmach_domain_info_t drmach_domain;
191 
192 typedef struct {
193 	int		 flags;
194 	drmach_device_t	*dp;
195 	sbd_error_t	*err;
196 	dev_info_t	*dip;
197 } drmach_config_args_t;
198 
199 typedef struct {
200 	drmach_board_t	*obj;
201 	int		 ndevs;
202 	void		*a;
203 	sbd_error_t	*(*found)(void *a, const char *, int, drmachid_t);
204 	sbd_error_t	*err;
205 } drmach_board_cb_data_t;
206 
207 static drmach_array_t	*drmach_boards;
208 
209 static sbd_error_t	*drmach_device_new(drmach_node_t *,
210 				drmach_board_t *, int, drmachid_t *);
211 static sbd_error_t	*drmach_cpu_new(drmach_device_t *, drmachid_t *);
212 static sbd_error_t	*drmach_mem_new(drmach_device_t *, drmachid_t *);
213 static sbd_error_t	*drmach_io_new(drmach_device_t *, drmachid_t *);
214 
215 static dev_info_t	*drmach_node_ddi_get_dip(drmach_node_t *np);
216 static int		 drmach_node_ddi_get_prop(drmach_node_t *np,
217 				char *name, void *buf, int len);
218 static int		 drmach_node_ddi_get_proplen(drmach_node_t *np,
219 				char *name, int *len);
220 
221 static int 		drmach_get_portid(drmach_node_t *);
222 static	sbd_error_t	*drmach_i_status(drmachid_t, drmach_status_t *);
223 static int		opl_check_dr_status();
224 static void		drmach_io_dispose(drmachid_t);
225 static sbd_error_t	*drmach_io_release(drmachid_t);
226 static sbd_error_t	*drmach_io_status(drmachid_t, drmach_status_t *);
227 static int 		drmach_init(void);
228 static void 		drmach_fini(void);
229 static void		drmach_swap_pa(drmach_mem_t *, drmach_mem_t *);
230 static drmach_board_t	*drmach_get_board_by_bnum(int);
231 
232 /* options for the second argument in drmach_add_remove_cpu() */
233 #define	HOTADD_CPU	1
234 #define	HOTREMOVE_CPU	2
235 
236 #define	ON_BOARD_CORE_NUM(x)	(((uint_t)(x) / OPL_MAX_STRANDID_PER_CORE) & \
237 	(OPL_MAX_COREID_PER_BOARD - 1))
238 
239 extern struct cpu	*SIGBCPU;
240 
241 static int		drmach_name2type_idx(char *);
242 static drmach_board_t	*drmach_board_new(int, int);
243 
244 #ifdef DEBUG
245 
246 #define	DRMACH_PR		if (drmach_debug) printf
247 int drmach_debug = 1;		 /* set to non-zero to enable debug messages */
248 #else
249 
250 #define	DRMACH_PR		_NOTE(CONSTANTCONDITION) if (0) printf
251 #endif /* DEBUG */
252 
253 
254 #define	DRMACH_OBJ(id)		((drmach_common_t *)id)
255 
256 #define	DRMACH_IS_BOARD_ID(id)	\
257 	((id != 0) &&		\
258 	(DRMACH_OBJ(id)->isa == (void *)drmach_board_new))
259 
260 #define	DRMACH_IS_CPU_ID(id)	\
261 	((id != 0) &&		\
262 	(DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new))
263 
264 #define	DRMACH_IS_MEM_ID(id)	\
265 	((id != 0) &&		\
266 	(DRMACH_OBJ(id)->isa == (void *)drmach_mem_new))
267 
268 #define	DRMACH_IS_IO_ID(id)	\
269 	((id != 0) &&		\
270 	(DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
271 
272 #define	DRMACH_IS_DEVICE_ID(id)					\
273 	((id != 0) &&						\
274 	(DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new ||	\
275 	    DRMACH_OBJ(id)->isa == (void *)drmach_mem_new ||	\
276 	    DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
277 
278 #define	DRMACH_IS_ID(id)					\
279 	((id != 0) &&						\
280 	(DRMACH_OBJ(id)->isa == (void *)drmach_board_new ||	\
281 	    DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new ||	\
282 	    DRMACH_OBJ(id)->isa == (void *)drmach_mem_new ||	\
283 	    DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
284 
285 #define	DRMACH_INTERNAL_ERROR() \
286 	drerr_new(1, EOPL_INTERNAL, drmach_ie_fmt, __LINE__)
287 
288 static char		*drmach_ie_fmt = "drmach.c %d";
289 
290 static struct {
291 	const char	*name;
292 	const char	*type;
293 	sbd_error_t	*(*new)(drmach_device_t *, drmachid_t *);
294 } drmach_name2type[] = {
295 	{ "cpu",	DRMACH_DEVTYPE_CPU,		drmach_cpu_new },
296 	{ "pseudo-mc",	DRMACH_DEVTYPE_MEM,		drmach_mem_new },
297 	{ "pci",	DRMACH_DEVTYPE_PCI,		drmach_io_new  },
298 };
299 
300 /* utility */
301 #define	MBYTE	(1048576ull)
302 
303 /*
304  * drmach autoconfiguration data structures and interfaces
305  */
306 
307 extern struct mod_ops mod_miscops;
308 
309 static struct modlmisc modlmisc = {
310 	&mod_miscops,
311 	"OPL DR 1.1"
312 };
313 
314 static struct modlinkage modlinkage = {
315 	MODREV_1,
316 	(void *)&modlmisc,
317 	NULL
318 };
319 
320 static krwlock_t drmach_boards_rwlock;
321 
322 typedef const char	*fn_t;
323 
324 int
325 _init(void)
326 {
327 	int err;
328 
329 	if ((err = drmach_init()) != 0) {
330 		return (err);
331 	}
332 
333 	if ((err = mod_install(&modlinkage)) != 0) {
334 		drmach_fini();
335 	}
336 
337 	return (err);
338 }
339 
340 int
341 _fini(void)
342 {
343 	int	err;
344 
345 	if ((err = mod_remove(&modlinkage)) == 0)
346 		drmach_fini();
347 
348 	return (err);
349 }
350 
351 int
352 _info(struct modinfo *modinfop)
353 {
354 	return (mod_info(&modlinkage, modinfop));
355 }
356 
357 /*
358  * The following routines are used to set up the memory
359  * properties in the board structure.
360  */
361 
362 struct drmach_mc_lookup {
363 	int	bnum;
364 	drmach_board_t	*bp;
365 	dev_info_t *dip;	/* rv - set if found */
366 };
367 
368 #define	_ptob64(p) ((uint64_t)(p) << PAGESHIFT)
369 #define	_b64top(b) ((pgcnt_t)((b) >> PAGESHIFT))
370 
371 static int
372 drmach_setup_mc_info(dev_info_t *dip, drmach_mem_t *mp)
373 {
374 	uint64_t	memory_ranges[128];
375 	int len;
376 	struct memlist	*ml;
377 	int rv;
378 	hwd_sb_t *hwd;
379 	hwd_memory_t *pm;
380 
381 	len = sizeof (memory_ranges);
382 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
383 		DDI_PROP_DONTPASS, "sb-mem-ranges",
384 	    (caddr_t)&memory_ranges[0], &len) != DDI_PROP_SUCCESS) {
385 		mp->slice_base = 0;
386 		mp->slice_size = 0;
387 		return (-1);
388 	}
389 	mp->slice_base = memory_ranges[0];
390 	mp->slice_size = memory_ranges[1];
391 
392 	if (!mp->dev.bp->boot_board) {
393 		int i;
394 
395 		rv = opl_read_hwd(mp->dev.bp->bnum, NULL,  NULL, NULL, &hwd);
396 
397 		if (rv != 0) {
398 			return (-1);
399 		}
400 
401 		ml = NULL;
402 		pm = &hwd->sb_cmu.cmu_memory;
403 		for (i = 0; i < HWD_MAX_MEM_CHUNKS; i++) {
404 			if (pm->mem_chunks[i].chnk_size > 0) {
405 				ml = memlist_add_span(ml,
406 					pm->mem_chunks[i].chnk_start_address,
407 					pm->mem_chunks[i].chnk_size);
408 			}
409 		}
410 	} else {
411 		/*
412 		 * we intersect phys_install to get base_pa.
413 		 * This only works at bootup time.
414 		 */
415 
416 		memlist_read_lock();
417 		ml = memlist_dup(phys_install);
418 		memlist_read_unlock();
419 
420 		ml = memlist_del_span(ml, 0ull, mp->slice_base);
421 		if (ml) {
422 			uint64_t basepa, endpa;
423 			endpa = _ptob64(physmax + 1);
424 
425 			basepa = mp->slice_base + mp->slice_size;
426 
427 			ml = memlist_del_span(ml, basepa, endpa - basepa);
428 		}
429 	}
430 
431 	if (ml) {
432 		uint64_t nbytes = 0;
433 		struct memlist *p;
434 		for (p = ml; p; p = p->next) {
435 			nbytes += p->size;
436 		}
437 		if ((mp->nbytes = nbytes) > 0)
438 			mp->base_pa = ml->address;
439 		else
440 			mp->base_pa = 0;
441 		mp->memlist = ml;
442 	} else {
443 		mp->base_pa = 0;
444 		mp->nbytes = 0;
445 	}
446 	return (0);
447 }
448 
449 
450 struct drmach_hotcpu {
451 	drmach_board_t *bp;
452 	int	bnum;
453 	int	core_id;
454 	int 	rv;
455 	int	option;
456 };
457 
458 static int
459 drmach_cpu_cb(dev_info_t *dip, void *arg)
460 {
461 	struct drmach_hotcpu *p = (struct drmach_hotcpu *)arg;
462 	char name[OBP_MAXDRVNAME];
463 	int len = OBP_MAXDRVNAME;
464 	int bnum, core_id, strand_id;
465 	drmach_board_t *bp;
466 
467 	if (dip == ddi_root_node()) {
468 		return (DDI_WALK_CONTINUE);
469 	}
470 
471 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
472 	    DDI_PROP_DONTPASS, "name",
473 	    (caddr_t)name, &len) != DDI_PROP_SUCCESS) {
474 		return (DDI_WALK_PRUNECHILD);
475 	}
476 
477 	/* only cmp has board number */
478 	bnum = -1;
479 	len = sizeof (bnum);
480 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
481 	    DDI_PROP_DONTPASS, OBP_BOARDNUM,
482 	    (caddr_t)&bnum, &len) != DDI_PROP_SUCCESS) {
483 		bnum = -1;
484 	}
485 
486 	if (strcmp(name, "cmp") == 0) {
487 		if (bnum != p->bnum)
488 			return (DDI_WALK_PRUNECHILD);
489 		return (DDI_WALK_CONTINUE);
490 	}
491 	/* we have already pruned all unwanted cores and cpu's above */
492 	if (strcmp(name, "core") == 0) {
493 		return (DDI_WALK_CONTINUE);
494 	}
495 	if (strcmp(name, "cpu") == 0) {
496 		processorid_t cpuid;
497 		len = sizeof (cpuid);
498 		if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
499 		    DDI_PROP_DONTPASS, "cpuid",
500 		    (caddr_t)&cpuid, &len) != DDI_PROP_SUCCESS) {
501 			p->rv = -1;
502 			return (DDI_WALK_TERMINATE);
503 		}
504 
505 		core_id = p->core_id;
506 
507 		bnum = LSB_ID(cpuid);
508 
509 		if (ON_BOARD_CORE_NUM(cpuid) != core_id)
510 			return (DDI_WALK_CONTINUE);
511 
512 		bp = p->bp;
513 		ASSERT(bnum == bp->bnum);
514 
515 		if (p->option == HOTADD_CPU) {
516 			if (prom_hotaddcpu(cpuid) != 0) {
517 				p->rv = -1;
518 				return (DDI_WALK_TERMINATE);
519 			}
520 			strand_id = STRAND_ID(cpuid);
521 			bp->cores[core_id].core_hotadded |= (1 << strand_id);
522 		} else if (p->option == HOTREMOVE_CPU) {
523 			if (prom_hotremovecpu(cpuid) != 0) {
524 				p->rv = -1;
525 				return (DDI_WALK_TERMINATE);
526 			}
527 			strand_id = STRAND_ID(cpuid);
528 			bp->cores[core_id].core_hotadded &= ~(1 << strand_id);
529 		}
530 		return (DDI_WALK_CONTINUE);
531 	}
532 
533 	return (DDI_WALK_PRUNECHILD);
534 }
535 
536 
537 static int
538 drmach_add_remove_cpu(int bnum, int core_id, int option)
539 {
540 	struct drmach_hotcpu arg;
541 	drmach_board_t *bp;
542 
543 	bp = drmach_get_board_by_bnum(bnum);
544 	ASSERT(bp);
545 
546 	arg.bp = bp;
547 	arg.bnum = bnum;
548 	arg.core_id = core_id;
549 	arg.rv = 0;
550 	arg.option = option;
551 	ddi_walk_devs(ddi_root_node(), drmach_cpu_cb, (void *)&arg);
552 	return (arg.rv);
553 }
554 
555 struct drmach_setup_core_arg {
556 	drmach_board_t *bp;
557 };
558 
559 static int
560 drmach_setup_core_cb(dev_info_t *dip, void *arg)
561 {
562 	struct drmach_setup_core_arg *p = (struct drmach_setup_core_arg *)arg;
563 	char name[OBP_MAXDRVNAME];
564 	int len = OBP_MAXDRVNAME;
565 	int bnum;
566 	int core_id, strand_id;
567 
568 	if (dip == ddi_root_node()) {
569 		return (DDI_WALK_CONTINUE);
570 	}
571 
572 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
573 	    DDI_PROP_DONTPASS, "name",
574 	    (caddr_t)name, &len) != DDI_PROP_SUCCESS) {
575 		return (DDI_WALK_PRUNECHILD);
576 	}
577 
578 	/* only cmp has board number */
579 	bnum = -1;
580 	len = sizeof (bnum);
581 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
582 	    DDI_PROP_DONTPASS, OBP_BOARDNUM,
583 	    (caddr_t)&bnum, &len) != DDI_PROP_SUCCESS) {
584 		bnum = -1;
585 	}
586 
587 	if (strcmp(name, "cmp") == 0) {
588 		if (bnum != p->bp->bnum)
589 			return (DDI_WALK_PRUNECHILD);
590 		return (DDI_WALK_CONTINUE);
591 	}
592 	/* we have already pruned all unwanted cores and cpu's above */
593 	if (strcmp(name, "core") == 0) {
594 		return (DDI_WALK_CONTINUE);
595 	}
596 	if (strcmp(name, "cpu") == 0) {
597 		processorid_t cpuid;
598 		len = sizeof (cpuid);
599 		if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
600 		    DDI_PROP_DONTPASS, "cpuid",
601 		    (caddr_t)&cpuid, &len) != DDI_PROP_SUCCESS) {
602 			return (DDI_WALK_TERMINATE);
603 		}
604 		bnum = LSB_ID(cpuid);
605 		ASSERT(bnum == p->bp->bnum);
606 		core_id = ON_BOARD_CORE_NUM(cpuid);
607 		strand_id = STRAND_ID(cpuid);
608 		p->bp->cores[core_id].core_present |= (1 << strand_id);
609 		return (DDI_WALK_CONTINUE);
610 	}
611 
612 	return (DDI_WALK_PRUNECHILD);
613 }
614 
615 
616 static void
617 drmach_setup_core_info(drmach_board_t *obj)
618 {
619 	struct drmach_setup_core_arg arg;
620 	int i;
621 
622 	for (i = 0; i < OPL_MAX_COREID_PER_BOARD; i++) {
623 		obj->cores[i].core_present = 0;
624 		obj->cores[i].core_hotadded = 0;
625 		obj->cores[i].core_started = 0;
626 	}
627 	arg.bp = obj;
628 	ddi_walk_devs(ddi_root_node(), drmach_setup_core_cb, (void *)&arg);
629 
630 	for (i = 0; i < OPL_MAX_COREID_PER_BOARD; i++) {
631 		if (obj->boot_board) {
632 			obj->cores[i].core_hotadded =
633 				obj->cores[i].core_started =
634 				obj->cores[i].core_present;
635 		}
636 	}
637 }
638 
639 /*
640  * drmach_node_* routines serve the purpose of separating the
641  * rest of the code from the device tree and OBP.  This is necessary
642  * because of In-Kernel-Probing.  Devices probed after stod, are probed
643  * by the in-kernel-prober, not OBP.  These devices, therefore, do not
644  * have dnode ids.
645  */
646 
647 typedef struct {
648 	drmach_node_walk_args_t	*nwargs;
649 	int 			(*cb)(drmach_node_walk_args_t *args);
650 	int			err;
651 } drmach_node_ddi_walk_args_t;
652 
653 static int
654 drmach_node_ddi_walk_cb(dev_info_t *dip, void *arg)
655 {
656 	drmach_node_ddi_walk_args_t	*nargs;
657 
658 	nargs = (drmach_node_ddi_walk_args_t *)arg;
659 
660 	/*
661 	 * dip doesn't have to be held here as we are called
662 	 * from ddi_walk_devs() which holds the dip.
663 	 */
664 	nargs->nwargs->node->here = (void *)dip;
665 
666 	nargs->err = nargs->cb(nargs->nwargs);
667 
668 
669 	/*
670 	 * Set "here" to NULL so that unheld dip is not accessible
671 	 * outside ddi_walk_devs()
672 	 */
673 	nargs->nwargs->node->here = NULL;
674 
675 	if (nargs->err)
676 		return (DDI_WALK_TERMINATE);
677 	else
678 		return (DDI_WALK_CONTINUE);
679 }
680 
681 static int
682 drmach_node_ddi_walk(drmach_node_t *np, void *data,
683 		int (*cb)(drmach_node_walk_args_t *args))
684 {
685 	drmach_node_walk_args_t		args;
686 	drmach_node_ddi_walk_args_t	nargs;
687 
688 
689 	/* initialized args structure for callback */
690 	args.node = np;
691 	args.data = data;
692 
693 	nargs.nwargs = &args;
694 	nargs.cb = cb;
695 	nargs.err = 0;
696 
697 	/*
698 	 * Root node doesn't have to be held in any way.
699 	 */
700 	ddi_walk_devs(ddi_root_node(), drmach_node_ddi_walk_cb,
701 		(void *)&nargs);
702 
703 	return (nargs.err);
704 }
705 
706 static int
707 drmach_node_ddi_get_parent(drmach_node_t *np, drmach_node_t *pp)
708 {
709 	dev_info_t	*ndip;
710 	static char	*fn = "drmach_node_ddi_get_parent";
711 
712 	ndip = np->n_getdip(np);
713 	if (ndip == NULL) {
714 		cmn_err(CE_WARN, "%s: NULL dip", fn);
715 		return (-1);
716 	}
717 
718 	bcopy(np, pp, sizeof (drmach_node_t));
719 
720 	pp->here = (void *)ddi_get_parent(ndip);
721 	if (pp->here == NULL) {
722 		cmn_err(CE_WARN, "%s: NULL parent dip", fn);
723 		return (-1);
724 	}
725 
726 	return (0);
727 }
728 
729 /*ARGSUSED*/
730 static pnode_t
731 drmach_node_ddi_get_dnode(drmach_node_t *np)
732 {
733 	return ((pnode_t)NULL);
734 }
735 
736 static drmach_node_t *
737 drmach_node_new(void)
738 {
739 	drmach_node_t *np;
740 
741 	np = kmem_zalloc(sizeof (drmach_node_t), KM_SLEEP);
742 
743 	np->get_dnode = drmach_node_ddi_get_dnode;
744 	np->walk = drmach_node_ddi_walk;
745 	np->n_getdip = drmach_node_ddi_get_dip;
746 	np->n_getproplen = drmach_node_ddi_get_proplen;
747 	np->n_getprop = drmach_node_ddi_get_prop;
748 	np->get_parent = drmach_node_ddi_get_parent;
749 
750 	return (np);
751 }
752 
753 static void
754 drmach_node_dispose(drmach_node_t *np)
755 {
756 	kmem_free(np, sizeof (*np));
757 }
758 
759 static dev_info_t *
760 drmach_node_ddi_get_dip(drmach_node_t *np)
761 {
762 	return ((dev_info_t *)np->here);
763 }
764 
765 static int
766 drmach_node_walk(drmach_node_t *np, void *param,
767 		int (*cb)(drmach_node_walk_args_t *args))
768 {
769 	return (np->walk(np, param, cb));
770 }
771 
772 static int
773 drmach_node_ddi_get_prop(drmach_node_t *np, char *name, void *buf, int len)
774 {
775 	int		rv = 0;
776 	dev_info_t	*ndip;
777 	static char	*fn = "drmach_node_ddi_get_prop";
778 
779 
780 	ndip = np->n_getdip(np);
781 	if (ndip == NULL) {
782 		cmn_err(CE_WARN, "%s: NULL dip", fn);
783 		rv = -1;
784 	} else if (ddi_getlongprop_buf(DDI_DEV_T_ANY, ndip,
785 	    DDI_PROP_DONTPASS, name,
786 	    (caddr_t)buf, &len) != DDI_PROP_SUCCESS) {
787 		rv = -1;
788 	}
789 
790 	return (rv);
791 }
792 
793 static int
794 drmach_node_ddi_get_proplen(drmach_node_t *np, char *name, int *len)
795 {
796 	int		rv = 0;
797 	dev_info_t	*ndip;
798 
799 	ndip = np->n_getdip(np);
800 	if (ndip == NULL) {
801 		rv = -1;
802 	} else if (ddi_getproplen(DDI_DEV_T_ANY, ndip, DDI_PROP_DONTPASS,
803 		name, len) != DDI_PROP_SUCCESS) {
804 		rv = -1;
805 	}
806 
807 	return (rv);
808 }
809 
810 static drmachid_t
811 drmach_node_dup(drmach_node_t *np)
812 {
813 	drmach_node_t *dup;
814 
815 	dup = drmach_node_new();
816 	dup->here = np->here;
817 	dup->get_dnode = np->get_dnode;
818 	dup->walk = np->walk;
819 	dup->n_getdip = np->n_getdip;
820 	dup->n_getproplen = np->n_getproplen;
821 	dup->n_getprop = np->n_getprop;
822 	dup->get_parent = np->get_parent;
823 
824 	return (dup);
825 }
826 
827 /*
828  * drmach_array provides convenient array construction, access,
829  * bounds checking and array destruction logic.
830  */
831 
832 static drmach_array_t *
833 drmach_array_new(int min_index, int max_index)
834 {
835 	drmach_array_t *arr;
836 
837 	arr = kmem_zalloc(sizeof (drmach_array_t), KM_SLEEP);
838 
839 	arr->arr_sz = (max_index - min_index + 1) * sizeof (void *);
840 	if (arr->arr_sz > 0) {
841 		arr->min_index = min_index;
842 		arr->max_index = max_index;
843 
844 		arr->arr = kmem_zalloc(arr->arr_sz, KM_SLEEP);
845 		return (arr);
846 	} else {
847 		kmem_free(arr, sizeof (*arr));
848 		return (0);
849 	}
850 }
851 
852 static int
853 drmach_array_set(drmach_array_t *arr, int idx, drmachid_t val)
854 {
855 	if (idx < arr->min_index || idx > arr->max_index)
856 		return (-1);
857 	else {
858 		arr->arr[idx - arr->min_index] = val;
859 		return (0);
860 	}
861 	/*NOTREACHED*/
862 }
863 
864 static int
865 drmach_array_get(drmach_array_t *arr, int idx, drmachid_t *val)
866 {
867 	if (idx < arr->min_index || idx > arr->max_index)
868 		return (-1);
869 	else {
870 		*val = arr->arr[idx - arr->min_index];
871 		return (0);
872 	}
873 	/*NOTREACHED*/
874 }
875 
876 static int
877 drmach_array_first(drmach_array_t *arr, int *idx, drmachid_t *val)
878 {
879 	int rv;
880 
881 	*idx = arr->min_index;
882 	while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
883 		*idx += 1;
884 
885 	return (rv);
886 }
887 
888 static int
889 drmach_array_next(drmach_array_t *arr, int *idx, drmachid_t *val)
890 {
891 	int rv;
892 
893 	*idx += 1;
894 	while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
895 		*idx += 1;
896 
897 	return (rv);
898 }
899 
900 static void
901 drmach_array_dispose(drmach_array_t *arr, void (*disposer)(drmachid_t))
902 {
903 	drmachid_t	val;
904 	int		idx;
905 	int		rv;
906 
907 	rv = drmach_array_first(arr, &idx, &val);
908 	while (rv == 0) {
909 		(*disposer)(val);
910 		rv = drmach_array_next(arr, &idx, &val);
911 	}
912 
913 	kmem_free(arr->arr, arr->arr_sz);
914 	kmem_free(arr, sizeof (*arr));
915 }
916 
917 static drmach_board_t *
918 drmach_get_board_by_bnum(int bnum)
919 {
920 	drmachid_t id;
921 
922 	if (drmach_array_get(drmach_boards, bnum, &id) == 0)
923 		return ((drmach_board_t *)id);
924 	else
925 		return (NULL);
926 }
927 
928 static pnode_t
929 drmach_node_get_dnode(drmach_node_t *np)
930 {
931 	return (np->get_dnode(np));
932 }
933 
934 /*ARGSUSED*/
935 sbd_error_t *
936 drmach_configure(drmachid_t id, int flags)
937 {
938 	drmach_device_t		*dp;
939 	sbd_error_t		*err = NULL;
940 	dev_info_t		*rdip;
941 	dev_info_t		*fdip = NULL;
942 
943 	if (DRMACH_IS_CPU_ID(id)) {
944 		return (NULL);
945 	}
946 	if (!DRMACH_IS_DEVICE_ID(id))
947 		return (drerr_new(0, EOPL_INAPPROP, NULL));
948 	dp = id;
949 	rdip = dp->node->n_getdip(dp->node);
950 
951 	ASSERT(rdip);
952 
953 	ASSERT(e_ddi_branch_held(rdip));
954 
955 	if (e_ddi_branch_configure(rdip, &fdip, 0) != 0) {
956 		char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
957 		dev_info_t *dip = (fdip != NULL) ? fdip : rdip;
958 
959 		(void) ddi_pathname(dip, path);
960 		err = drerr_new(1,  EOPL_DRVFAIL, path);
961 
962 		kmem_free(path, MAXPATHLEN);
963 
964 		/* If non-NULL, fdip is returned held and must be released */
965 		if (fdip != NULL)
966 			ddi_release_devi(fdip);
967 	}
968 
969 	return (err);
970 }
971 
972 
973 static sbd_error_t *
974 drmach_device_new(drmach_node_t *node,
975 	drmach_board_t *bp, int portid, drmachid_t *idp)
976 {
977 	int		 i;
978 	int		 rv;
979 	drmach_device_t	proto;
980 	sbd_error_t	*err;
981 	char		 name[OBP_MAXDRVNAME];
982 
983 	rv = node->n_getprop(node, "name", name, OBP_MAXDRVNAME);
984 	if (rv) {
985 		/* every node is expected to have a name */
986 		err = drerr_new(1, EOPL_GETPROP,
987 			"device node %s: property %s",
988 			ddi_node_name(node->n_getdip(node)), "name");
989 		return (err);
990 	}
991 
992 	/*
993 	 * The node currently being examined is not listed in the name2type[]
994 	 * array.  In this case, the node is no interest to drmach.  Both
995 	 * dp and err are initialized here to yield nothing (no device or
996 	 * error structure) for this case.
997 	 */
998 	i = drmach_name2type_idx(name);
999 
1000 
1001 	if (i < 0) {
1002 		*idp = (drmachid_t)0;
1003 		return (NULL);
1004 	}
1005 
1006 	/* device specific new function will set unum */
1007 
1008 	bzero(&proto, sizeof (proto));
1009 	proto.type = drmach_name2type[i].type;
1010 	proto.bp = bp;
1011 	proto.node = node;
1012 	proto.portid = portid;
1013 
1014 	return (drmach_name2type[i].new(&proto, idp));
1015 }
1016 
1017 static void
1018 drmach_device_dispose(drmachid_t id)
1019 {
1020 	drmach_device_t *self = id;
1021 
1022 	self->cm.dispose(id);
1023 }
1024 
1025 
1026 static drmach_board_t *
1027 drmach_board_new(int bnum, int boot_board)
1028 {
1029 	static sbd_error_t *drmach_board_release(drmachid_t);
1030 	static sbd_error_t *drmach_board_status(drmachid_t, drmach_status_t *);
1031 
1032 	drmach_board_t	*bp;
1033 
1034 	bp = kmem_zalloc(sizeof (drmach_board_t), KM_SLEEP);
1035 
1036 	bp->cm.isa = (void *)drmach_board_new;
1037 	bp->cm.release = drmach_board_release;
1038 	bp->cm.status = drmach_board_status;
1039 
1040 	(void) drmach_board_name(bnum, bp->cm.name, sizeof (bp->cm.name));
1041 
1042 	bp->bnum = bnum;
1043 	bp->devices = NULL;
1044 	bp->connected = boot_board;
1045 	bp->tree = drmach_node_new();
1046 	bp->assigned = boot_board;
1047 	bp->powered = boot_board;
1048 	bp->boot_board = boot_board;
1049 
1050 	/*
1051 	 * If this is not bootup initialization, we have to wait till
1052 	 * IKP sets up the device nodes in drmach_board_connect().
1053 	 */
1054 	if (boot_board)
1055 		drmach_setup_core_info(bp);
1056 
1057 	drmach_array_set(drmach_boards, bnum, bp);
1058 	return (bp);
1059 }
1060 
1061 static void
1062 drmach_board_dispose(drmachid_t id)
1063 {
1064 	drmach_board_t *bp;
1065 
1066 	ASSERT(DRMACH_IS_BOARD_ID(id));
1067 	bp = id;
1068 
1069 	if (bp->tree)
1070 		drmach_node_dispose(bp->tree);
1071 
1072 	if (bp->devices)
1073 		drmach_array_dispose(bp->devices, drmach_device_dispose);
1074 
1075 	kmem_free(bp, sizeof (*bp));
1076 }
1077 
1078 static sbd_error_t *
1079 drmach_board_status(drmachid_t id, drmach_status_t *stat)
1080 {
1081 	sbd_error_t	*err = NULL;
1082 	drmach_board_t	*bp;
1083 
1084 	if (!DRMACH_IS_BOARD_ID(id))
1085 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1086 	bp = id;
1087 
1088 	stat->assigned = bp->assigned;
1089 	stat->powered = bp->powered;
1090 	stat->busy = 0;			/* assume not busy */
1091 	stat->configured = 0;		/* assume not configured */
1092 	stat->empty = 0;
1093 	stat->cond = bp->cond = SBD_COND_OK;
1094 	strncpy(stat->type, "System Brd", sizeof (stat->type));
1095 	stat->info[0] = '\0';
1096 
1097 	if (bp->devices) {
1098 		int		 rv;
1099 		int		 d_idx;
1100 		drmachid_t	 d_id;
1101 
1102 		rv = drmach_array_first(bp->devices, &d_idx, &d_id);
1103 		while (rv == 0) {
1104 			drmach_status_t	d_stat;
1105 
1106 			err = drmach_i_status(d_id, &d_stat);
1107 			if (err)
1108 				break;
1109 
1110 			stat->busy |= d_stat.busy;
1111 			stat->configured |= d_stat.configured;
1112 
1113 			rv = drmach_array_next(bp->devices, &d_idx, &d_id);
1114 		}
1115 	}
1116 
1117 	return (err);
1118 }
1119 
1120 int
1121 drmach_board_is_floating(drmachid_t id)
1122 {
1123 	drmach_board_t *bp;
1124 
1125 	if (!DRMACH_IS_BOARD_ID(id))
1126 		return (0);
1127 
1128 	bp = (drmach_board_t *)id;
1129 
1130 	return ((drmach_domain.floating & (1 << bp->bnum)) ? 1 : 0);
1131 }
1132 
1133 static int
1134 drmach_init(void)
1135 {
1136 	dev_info_t	*rdip;
1137 	int		i, rv, len;
1138 	int		*floating;
1139 
1140 	rw_init(&drmach_boards_rwlock, NULL, RW_DEFAULT, NULL);
1141 
1142 	drmach_boards = drmach_array_new(0, MAX_BOARDS - 1);
1143 
1144 	rdip = ddi_root_node();
1145 
1146 	if (ddi_getproplen(DDI_DEV_T_ANY, rdip, DDI_PROP_DONTPASS,
1147 		"floating-boards", &len) != DDI_PROP_SUCCESS) {
1148 		cmn_err(CE_WARN, "Cannot get floating-boards proplen\n");
1149 	} else {
1150 		floating = (int *)kmem_alloc(len, KM_SLEEP);
1151 		rv = ddi_prop_op(DDI_DEV_T_ANY, rdip,
1152 			PROP_LEN_AND_VAL_BUF, DDI_PROP_DONTPASS,
1153 			"floating-boards", (caddr_t)floating, &len);
1154 		if (rv != DDI_PROP_SUCCESS) {
1155 			cmn_err(CE_WARN, "Cannot get floating-boards prop\n");
1156 		} else {
1157 			drmach_domain.floating = 0;
1158 			for (i = 0; i < len / sizeof (int); i++) {
1159 				drmach_domain.floating |= (1 << floating[i]);
1160 			}
1161 		}
1162 		kmem_free(floating, len);
1163 	}
1164 	drmach_domain.allow_dr = opl_check_dr_status();
1165 
1166 	rdip = ddi_get_child(ddi_root_node());
1167 	do {
1168 		int		 bnum;
1169 		drmachid_t	 id;
1170 
1171 		bnum = -1;
1172 		bnum = ddi_getprop(DDI_DEV_T_ANY, rdip,
1173 			DDI_PROP_DONTPASS, OBP_BOARDNUM, -1);
1174 		if (bnum == -1)
1175 			continue;
1176 
1177 		if (drmach_array_get(drmach_boards, bnum, &id) == -1) {
1178 			cmn_err(CE_WARN, "Device node 0x%p has"
1179 				" invalid property value, %s=%d",
1180 					rdip, OBP_BOARDNUM, bnum);
1181 			goto error;
1182 		} else if (id == NULL) {
1183 			(void) drmach_board_new(bnum, 1);
1184 		}
1185 	} while ((rdip = ddi_get_next_sibling(rdip)) != NULL);
1186 
1187 	opl_hold_devtree();
1188 
1189 	/*
1190 	 * Initialize the IKP feature.
1191 	 *
1192 	 * This can be done only after DR has acquired a hold on all the
1193 	 * device nodes that are interesting to IKP.
1194 	 */
1195 	if (opl_init_cfg() != 0) {
1196 		cmn_err(CE_WARN, "DR - IKP initialization failed");
1197 
1198 		opl_release_devtree();
1199 
1200 		goto error;
1201 	}
1202 
1203 	return (0);
1204 error:
1205 	drmach_array_dispose(drmach_boards, drmach_board_dispose);
1206 	rw_destroy(&drmach_boards_rwlock);
1207 	return (ENXIO);
1208 }
1209 
1210 static void
1211 drmach_fini(void)
1212 {
1213 	rw_enter(&drmach_boards_rwlock, RW_WRITER);
1214 	drmach_array_dispose(drmach_boards, drmach_board_dispose);
1215 	drmach_boards = NULL;
1216 	rw_exit(&drmach_boards_rwlock);
1217 
1218 	/*
1219 	 * Walk immediate children of the root devinfo node
1220 	 * releasing holds acquired on branches in drmach_init()
1221 	 */
1222 
1223 	opl_release_devtree();
1224 
1225 	rw_destroy(&drmach_boards_rwlock);
1226 }
1227 
1228 /*
1229  *	Each system board contains 2 Oberon PCI bridge and
1230  *	1 CMUCH.
1231  *	Each oberon has 2 channels.
1232  *	Each channel has 2 pci-ex leaf.
1233  *	Each CMUCH has 1 pci bus.
1234  *
1235  *
1236  *	Device Path:
1237  *	/pci@<portid>,reg
1238  *
1239  *	where
1240  *	portid[10] = 0
1241  *	portid[9:0] = LLEAF_ID[9:0] of the Oberon Channel
1242  *
1243  *	LLEAF_ID[9:8] = 0
1244  *	LLEAF_ID[8:4] = LSB_ID[4:0]
1245  *	LLEAF_ID[3:1] = IO Channel#[2:0] (0,1,2,3 for Oberon)
1246  *			channel 4 is pcicmu
1247  *	LLEAF_ID[0] = PCI Leaf Number (0 for leaf-A, 1 for leaf-B)
1248  *
1249  *	Properties:
1250  *	name = pci
1251  *	device_type = "pciex"
1252  *	board# = LSBID
1253  *	reg = int32 * 2, Oberon CSR space of the leaf and the UBC space
1254  *	portid = Jupiter Bus Device ID ((LSB_ID << 3)|pciport#)
1255  */
1256 
1257 static sbd_error_t *
1258 drmach_io_new(drmach_device_t *proto, drmachid_t *idp)
1259 {
1260 	drmach_io_t	*ip;
1261 
1262 	int		 portid;
1263 
1264 	portid = proto->portid;
1265 	ASSERT(portid != -1);
1266 	proto->unum = portid & (MAX_IO_UNITS_PER_BOARD - 1);
1267 
1268 	ip = kmem_zalloc(sizeof (drmach_io_t), KM_SLEEP);
1269 	bcopy(proto, &ip->dev, sizeof (ip->dev));
1270 	ip->dev.node = drmach_node_dup(proto->node);
1271 	ip->dev.cm.isa = (void *)drmach_io_new;
1272 	ip->dev.cm.dispose = drmach_io_dispose;
1273 	ip->dev.cm.release = drmach_io_release;
1274 	ip->dev.cm.status = drmach_io_status;
1275 	ip->channel = (portid >> 1) & 0x7;
1276 	ip->leaf = (portid & 0x1);
1277 
1278 	snprintf(ip->dev.cm.name, sizeof (ip->dev.cm.name), "%s%d",
1279 		ip->dev.type, ip->dev.unum);
1280 
1281 	*idp = (drmachid_t)ip;
1282 	return (NULL);
1283 }
1284 
1285 
1286 static void
1287 drmach_io_dispose(drmachid_t id)
1288 {
1289 	drmach_io_t *self;
1290 
1291 	ASSERT(DRMACH_IS_IO_ID(id));
1292 
1293 	self = id;
1294 	if (self->dev.node)
1295 		drmach_node_dispose(self->dev.node);
1296 
1297 	kmem_free(self, sizeof (*self));
1298 }
1299 
1300 /*ARGSUSED*/
1301 sbd_error_t *
1302 drmach_pre_op(int cmd, drmachid_t id, drmach_opts_t *opts)
1303 {
1304 	drmach_board_t	*bp = (drmach_board_t *)id;
1305 	sbd_error_t	*err = NULL;
1306 
1307 	/* allow status and ncm operations to always succeed */
1308 	if ((cmd == SBD_CMD_STATUS) || (cmd == SBD_CMD_GETNCM)) {
1309 		return (NULL);
1310 	}
1311 
1312 	/* check all other commands for the required option string */
1313 
1314 	if ((opts->size > 0) && (opts->copts != NULL)) {
1315 
1316 		DRMACH_PR("platform options: %s\n", opts->copts);
1317 
1318 		if (strstr(opts->copts, "opldr") == NULL) {
1319 			err = drerr_new(1, EOPL_SUPPORT, NULL);
1320 		}
1321 	} else {
1322 		err = drerr_new(1, EOPL_SUPPORT, NULL);
1323 	}
1324 
1325 	if (!err && id && DRMACH_IS_BOARD_ID(id)) {
1326 		switch (cmd) {
1327 			case SBD_CMD_TEST:
1328 			case SBD_CMD_STATUS:
1329 			case SBD_CMD_GETNCM:
1330 				break;
1331 			case SBD_CMD_CONNECT:
1332 				if (bp->connected)
1333 					err = drerr_new(0, ESBD_STATE, NULL);
1334 				else if (!drmach_domain.allow_dr)
1335 					err = drerr_new(1, EOPL_SUPPORT,
1336 						NULL);
1337 				break;
1338 			case SBD_CMD_DISCONNECT:
1339 				if (!bp->connected)
1340 					err = drerr_new(0, ESBD_STATE, NULL);
1341 				else if (!drmach_domain.allow_dr)
1342 					err = drerr_new(1, EOPL_SUPPORT,
1343 						NULL);
1344 				break;
1345 			default:
1346 				if (!drmach_domain.allow_dr)
1347 					err = drerr_new(1, EOPL_SUPPORT,
1348 						NULL);
1349 				break;
1350 
1351 		}
1352 	}
1353 
1354 	return (err);
1355 }
1356 
1357 /*ARGSUSED*/
1358 sbd_error_t *
1359 drmach_post_op(int cmd, drmachid_t id, drmach_opts_t *opts)
1360 {
1361 	return (NULL);
1362 }
1363 
1364 sbd_error_t *
1365 drmach_board_assign(int bnum, drmachid_t *id)
1366 {
1367 	sbd_error_t	*err = NULL;
1368 
1369 	rw_enter(&drmach_boards_rwlock, RW_WRITER);
1370 
1371 	if (drmach_array_get(drmach_boards, bnum, id) == -1) {
1372 		err = drerr_new(1, EOPL_BNUM, "%d", bnum);
1373 	} else {
1374 		drmach_board_t	*bp;
1375 
1376 		if (*id)
1377 			rw_downgrade(&drmach_boards_rwlock);
1378 
1379 		bp = *id;
1380 		if (!(*id))
1381 			bp = *id  =
1382 				(drmachid_t)drmach_board_new(bnum, 0);
1383 		bp->assigned = 1;
1384 	}
1385 
1386 	rw_exit(&drmach_boards_rwlock);
1387 
1388 	return (err);
1389 }
1390 
1391 /*ARGSUSED*/
1392 sbd_error_t *
1393 drmach_board_connect(drmachid_t id, drmach_opts_t *opts)
1394 {
1395 	drmach_board_t	*obj = (drmach_board_t *)id;
1396 
1397 	if (!DRMACH_IS_BOARD_ID(id))
1398 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1399 
1400 	if (opl_probe_sb(obj->bnum) != 0)
1401 		return (DRMACH_INTERNAL_ERROR());
1402 
1403 	(void) prom_attach_notice(obj->bnum);
1404 
1405 	drmach_setup_core_info(obj);
1406 
1407 	obj->connected = 1;
1408 
1409 	return (NULL);
1410 }
1411 
1412 static int drmach_cache_flush_flag[NCPU];
1413 
1414 /*ARGSUSED*/
1415 static void
1416 drmach_flush_cache(uint64_t id, uint64_t dummy)
1417 {
1418 	extern void cpu_flush_ecache(void);
1419 
1420 	cpu_flush_ecache();
1421 	drmach_cache_flush_flag[id] = 0;
1422 }
1423 
1424 static void
1425 drmach_flush_all()
1426 {
1427 	cpuset_t	xc_cpuset;
1428 	int		i;
1429 
1430 	xc_cpuset = cpu_ready_set;
1431 	for (i = 0; i < NCPU; i++) {
1432 		if (CPU_IN_SET(xc_cpuset, i)) {
1433 			drmach_cache_flush_flag[i] = 1;
1434 			xc_one(i, drmach_flush_cache, i, 0);
1435 			while (drmach_cache_flush_flag[i]) {
1436 				DELAY(1000);
1437 			}
1438 		}
1439 	}
1440 }
1441 
1442 static int
1443 drmach_disconnect_cpus(drmach_board_t *bp)
1444 {
1445 	int i, bnum;
1446 
1447 	bnum = bp->bnum;
1448 
1449 	for (i = 0; i < OPL_MAX_COREID_PER_BOARD; i++) {
1450 	    if (bp->cores[i].core_present) {
1451 		if (bp->cores[i].core_started)
1452 		    return (-1);
1453 		if (bp->cores[i].core_hotadded) {
1454 		    if (drmach_add_remove_cpu(bnum, i, HOTREMOVE_CPU)) {
1455 			cmn_err(CE_WARN,
1456 			    "Failed to remove CMP %d on board %d\n",
1457 			    i, bnum);
1458 			return (-1);
1459 		    }
1460 		}
1461 	    }
1462 	}
1463 	return (0);
1464 }
1465 
1466 /*ARGSUSED*/
1467 sbd_error_t *
1468 drmach_board_disconnect(drmachid_t id, drmach_opts_t *opts)
1469 {
1470 	drmach_board_t *obj;
1471 	int rv = 0;
1472 	sbd_error_t		*err = NULL;
1473 
1474 
1475 	if (!DRMACH_IS_BOARD_ID(id))
1476 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1477 
1478 
1479 
1480 	obj = (drmach_board_t *)id;
1481 
1482 	if (drmach_disconnect_cpus(obj)) {
1483 		err = drerr_new(0, EOPL_DEPROBE, obj->cm.name);
1484 		return (err);
1485 	}
1486 
1487 	rv = opl_unprobe_sb(obj->bnum);
1488 
1489 	if (rv == 0) {
1490 		prom_detach_notice(obj->bnum);
1491 		obj->connected = 0;
1492 
1493 	} else
1494 		err = drerr_new(0, EOPL_DEPROBE, obj->cm.name);
1495 
1496 	return (err);
1497 }
1498 
1499 static int
1500 drmach_get_portid(drmach_node_t *np)
1501 {
1502 	int		portid;
1503 	char		type[OBP_MAXPROPNAME];
1504 
1505 	if (np->n_getprop(np, "portid", &portid, sizeof (portid)) == 0)
1506 		return (portid);
1507 
1508 	/*
1509 	 * Get the device_type property to see if we should
1510 	 * continue processing this node.
1511 	 */
1512 	if (np->n_getprop(np, "device_type", &type, sizeof (type)) != 0)
1513 		return (-1);
1514 
1515 	if (strcmp(type, OPL_CPU_NODE) == 0) {
1516 		/*
1517 		 * We return cpuid because it has no portid
1518 		 */
1519 		if (np->n_getprop(np, "cpuid", &portid, sizeof (portid)) == 0)
1520 			return (portid);
1521 	}
1522 
1523 	return (-1);
1524 }
1525 
1526 /*
1527  * This is a helper function to determine if a given
1528  * node should be considered for a dr operation according
1529  * to predefined dr type nodes and the node's name.
1530  * Formal Parameter : The name of a device node.
1531  * Return Value: -1, name does not map to a valid dr type.
1532  *		 A value greater or equal to 0, name is a valid dr type.
1533  */
1534 static int
1535 drmach_name2type_idx(char *name)
1536 {
1537 	int 	index, ntypes;
1538 
1539 	if (name == NULL)
1540 		return (-1);
1541 
1542 	/*
1543 	 * Determine how many possible types are currently supported
1544 	 * for dr.
1545 	 */
1546 	ntypes = sizeof (drmach_name2type) / sizeof (drmach_name2type[0]);
1547 
1548 	/* Determine if the node's name correspond to a predefined type. */
1549 	for (index = 0; index < ntypes; index++) {
1550 		if (strcmp(drmach_name2type[index].name, name) == 0)
1551 			/* The node is an allowed type for dr. */
1552 			return (index);
1553 	}
1554 
1555 	/*
1556 	 * If the name of the node does not map to any of the
1557 	 * types in the array drmach_name2type then the node is not of
1558 	 * interest to dr.
1559 	 */
1560 	return (-1);
1561 }
1562 
1563 /*
1564  * there is some complication on OPL:
1565  * - pseudo-mc nodes do not have portid property
1566  * - portid[9:5] of cmp node is LSB #, portid[7:3] of pci is LSB#
1567  * - cmp has board#
1568  * - core and cpu nodes do not have portid and board# properties
1569  * starcat uses portid to derive the board# but that does not work
1570  * for us.  starfire reads board# property to filter the devices.
1571  * That does not work either.  So for these specific device,
1572  * we use specific hard coded methods to get the board# -
1573  * cpu: LSB# = CPUID[9:5]
1574  */
1575 
1576 static int
1577 drmach_board_find_devices_cb(drmach_node_walk_args_t *args)
1578 {
1579 	drmach_node_t			*node = args->node;
1580 	drmach_board_cb_data_t		*data = args->data;
1581 	drmach_board_t			*obj = data->obj;
1582 
1583 	int		rv, portid;
1584 	int		bnum;
1585 	drmachid_t	id;
1586 	drmach_device_t	*device;
1587 	char name[OBP_MAXDRVNAME];
1588 
1589 	portid = drmach_get_portid(node);
1590 	/*
1591 	 * core, cpu and pseudo-mc do not have portid
1592 	 * we use cpuid as the portid of the cpu node
1593 	 * for pseudo-mc, we do not use portid info.
1594 	 */
1595 
1596 	rv = node->n_getprop(node, "name", name, OBP_MAXDRVNAME);
1597 	if (rv)
1598 		return (0);
1599 
1600 
1601 	rv = node->n_getprop(node, OBP_BOARDNUM, &bnum, sizeof (bnum));
1602 
1603 	if (rv) {
1604 		/*
1605 		 * cpu does not have board# property.  We use
1606 		 * CPUID[9:5]
1607 		 */
1608 		if (strcmp("cpu", name) == 0) {
1609 			bnum = (portid >> 5) & 0x1f;
1610 		} else
1611 			return (0);
1612 	}
1613 
1614 
1615 	if (bnum != obj->bnum)
1616 		return (0);
1617 
1618 	if (drmach_name2type_idx(name) < 0) {
1619 		return (0);
1620 	}
1621 
1622 	/*
1623 	 * Create a device data structure from this node data.
1624 	 * The call may yield nothing if the node is not of interest
1625 	 * to drmach.
1626 	 */
1627 	data->err = drmach_device_new(node, obj, portid, &id);
1628 	if (data->err)
1629 		return (-1);
1630 	else if (!id) {
1631 		/*
1632 		 * drmach_device_new examined the node we passed in
1633 		 * and determined that it was one not of interest to
1634 		 * drmach.  So, it is skipped.
1635 		 */
1636 		return (0);
1637 	}
1638 
1639 	rv = drmach_array_set(obj->devices, data->ndevs++, id);
1640 	if (rv) {
1641 		data->err = DRMACH_INTERNAL_ERROR();
1642 		return (-1);
1643 	}
1644 	device = id;
1645 
1646 	data->err = (*data->found)(data->a, device->type, device->unum, id);
1647 	return (data->err == NULL ? 0 : -1);
1648 }
1649 
1650 sbd_error_t *
1651 drmach_board_find_devices(drmachid_t id, void *a,
1652 	sbd_error_t *(*found)(void *a, const char *, int, drmachid_t))
1653 {
1654 	drmach_board_t		*bp = (drmach_board_t *)id;
1655 	sbd_error_t		*err;
1656 	int			 max_devices;
1657 	int			 rv;
1658 	drmach_board_cb_data_t	data;
1659 
1660 
1661 	if (!DRMACH_IS_BOARD_ID(id))
1662 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1663 
1664 	max_devices  = MAX_CPU_UNITS_PER_BOARD;
1665 	max_devices += MAX_MEM_UNITS_PER_BOARD;
1666 	max_devices += MAX_IO_UNITS_PER_BOARD;
1667 
1668 	bp->devices = drmach_array_new(0, max_devices);
1669 
1670 	if (bp->tree == NULL)
1671 		bp->tree = drmach_node_new();
1672 
1673 	data.obj = bp;
1674 	data.ndevs = 0;
1675 	data.found = found;
1676 	data.a = a;
1677 	data.err = NULL;
1678 
1679 	rv = drmach_node_walk(bp->tree, &data, drmach_board_find_devices_cb);
1680 	if (rv == 0)
1681 		err = NULL;
1682 	else {
1683 		drmach_array_dispose(bp->devices, drmach_device_dispose);
1684 		bp->devices = NULL;
1685 
1686 		if (data.err)
1687 			err = data.err;
1688 		else
1689 			err = DRMACH_INTERNAL_ERROR();
1690 	}
1691 
1692 	return (err);
1693 }
1694 
1695 int
1696 drmach_board_lookup(int bnum, drmachid_t *id)
1697 {
1698 	int	rv = 0;
1699 
1700 	rw_enter(&drmach_boards_rwlock, RW_READER);
1701 	if (drmach_array_get(drmach_boards, bnum, id)) {
1702 		*id = 0;
1703 		rv = -1;
1704 	}
1705 	rw_exit(&drmach_boards_rwlock);
1706 	return (rv);
1707 }
1708 
1709 sbd_error_t *
1710 drmach_board_name(int bnum, char *buf, int buflen)
1711 {
1712 	snprintf(buf, buflen, "SB%d", bnum);
1713 	return (NULL);
1714 }
1715 
1716 sbd_error_t *
1717 drmach_board_poweroff(drmachid_t id)
1718 {
1719 	drmach_board_t	*bp;
1720 	sbd_error_t	*err;
1721 	drmach_status_t	 stat;
1722 
1723 	if (!DRMACH_IS_BOARD_ID(id))
1724 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1725 	bp = id;
1726 
1727 	err = drmach_board_status(id, &stat);
1728 
1729 	if (!err) {
1730 		if (stat.configured || stat.busy)
1731 			err = drerr_new(0, EOPL_CONFIGBUSY, bp->cm.name);
1732 		else {
1733 			bp->powered = 0;
1734 		}
1735 	}
1736 	return (err);
1737 }
1738 
1739 sbd_error_t *
1740 drmach_board_poweron(drmachid_t id)
1741 {
1742 	drmach_board_t	*bp;
1743 
1744 	if (!DRMACH_IS_BOARD_ID(id))
1745 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1746 	bp = id;
1747 
1748 	bp->powered = 1;
1749 
1750 	return (NULL);
1751 }
1752 
1753 static sbd_error_t *
1754 drmach_board_release(drmachid_t id)
1755 {
1756 	if (!DRMACH_IS_BOARD_ID(id))
1757 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1758 	return (NULL);
1759 }
1760 
1761 /*ARGSUSED*/
1762 sbd_error_t *
1763 drmach_board_test(drmachid_t id, drmach_opts_t *opts, int force)
1764 {
1765 	return (NULL);
1766 }
1767 
1768 sbd_error_t *
1769 drmach_board_unassign(drmachid_t id)
1770 {
1771 	drmach_board_t	*bp;
1772 	sbd_error_t	*err;
1773 	drmach_status_t	 stat;
1774 
1775 
1776 	if (!DRMACH_IS_BOARD_ID(id)) {
1777 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1778 	}
1779 	bp = id;
1780 
1781 	rw_enter(&drmach_boards_rwlock, RW_WRITER);
1782 
1783 	err = drmach_board_status(id, &stat);
1784 	if (err) {
1785 		rw_exit(&drmach_boards_rwlock);
1786 		return (err);
1787 	}
1788 	if (stat.configured || stat.busy) {
1789 		err = drerr_new(0, EOPL_CONFIGBUSY, bp->cm.name);
1790 	} else {
1791 		if (drmach_array_set(drmach_boards, bp->bnum, 0) != 0)
1792 			err = DRMACH_INTERNAL_ERROR();
1793 		else
1794 			drmach_board_dispose(bp);
1795 	}
1796 	rw_exit(&drmach_boards_rwlock);
1797 	return (err);
1798 }
1799 
1800 /*
1801  * We have to do more on OPL - e.g. set up sram tte, read cpuid, strand id,
1802  * implementation #, etc
1803  */
1804 
1805 static sbd_error_t *
1806 drmach_cpu_new(drmach_device_t *proto, drmachid_t *idp)
1807 {
1808 	static void drmach_cpu_dispose(drmachid_t);
1809 	static sbd_error_t *drmach_cpu_release(drmachid_t);
1810 	static sbd_error_t *drmach_cpu_status(drmachid_t, drmach_status_t *);
1811 
1812 	int		 portid;
1813 	drmach_cpu_t	*cp = NULL;
1814 
1815 	/* portid is CPUID of the node */
1816 	portid = proto->portid;
1817 	ASSERT(portid != -1);
1818 
1819 	/* unum = (CMP/CHIP ID) + (ON_BOARD_CORE_NUM * MAX_CMPID_PER_BOARD) */
1820 	proto->unum = ((portid/OPL_MAX_CPUID_PER_CMP) &
1821 		(OPL_MAX_CMPID_PER_BOARD - 1)) +
1822 		((portid & (OPL_MAX_CPUID_PER_CMP - 1)) *
1823 		(OPL_MAX_CMPID_PER_BOARD));
1824 
1825 	cp = kmem_zalloc(sizeof (drmach_cpu_t), KM_SLEEP);
1826 	bcopy(proto, &cp->dev, sizeof (cp->dev));
1827 	cp->dev.node = drmach_node_dup(proto->node);
1828 	cp->dev.cm.isa = (void *)drmach_cpu_new;
1829 	cp->dev.cm.dispose = drmach_cpu_dispose;
1830 	cp->dev.cm.release = drmach_cpu_release;
1831 	cp->dev.cm.status = drmach_cpu_status;
1832 
1833 	snprintf(cp->dev.cm.name, sizeof (cp->dev.cm.name), "%s%d",
1834 		cp->dev.type, cp->dev.unum);
1835 
1836 /*
1837  *	CPU ID representation
1838  *	CPUID[9:5] = SB#
1839  *	CPUID[4:3] = Chip#
1840  *	CPUID[2:1] = Core# (Only 2 core for OPL)
1841  *	CPUID[0:0] = Strand#
1842  */
1843 
1844 /*
1845  *	reg property of the strand contains strand ID
1846  *	reg property of the parent node contains core ID
1847  *	We should use them.
1848  */
1849 	cp->cpuid = portid;
1850 	cp->sb = (portid >> 5) & 0x1f;
1851 	cp->chipid = (portid >> 3) & 0x3;
1852 	cp->coreid = (portid >> 1) & 0x3;
1853 	cp->strandid = portid & 0x1;
1854 
1855 	*idp = (drmachid_t)cp;
1856 	return (NULL);
1857 }
1858 
1859 
1860 static void
1861 drmach_cpu_dispose(drmachid_t id)
1862 {
1863 	drmach_cpu_t	*self;
1864 
1865 	ASSERT(DRMACH_IS_CPU_ID(id));
1866 
1867 	self = id;
1868 	if (self->dev.node)
1869 		drmach_node_dispose(self->dev.node);
1870 
1871 	kmem_free(self, sizeof (*self));
1872 }
1873 
1874 static int
1875 drmach_cpu_start(struct cpu *cp)
1876 {
1877 	int		cpuid = cp->cpu_id;
1878 	extern int	restart_other_cpu(int);
1879 
1880 	ASSERT(MUTEX_HELD(&cpu_lock));
1881 	ASSERT(cpunodes[cpuid].nodeid != (pnode_t)0);
1882 
1883 	cp->cpu_flags &= ~CPU_POWEROFF;
1884 
1885 	/*
1886 	 * NOTE: restart_other_cpu pauses cpus during the
1887 	 *	 slave cpu start.  This helps to quiesce the
1888 	 *	 bus traffic a bit which makes the tick sync
1889 	 *	 routine in the prom more robust.
1890 	 */
1891 	DRMACH_PR("COLD START for cpu (%d)\n", cpuid);
1892 
1893 	restart_other_cpu(cpuid);
1894 
1895 	return (0);
1896 }
1897 
1898 static sbd_error_t *
1899 drmach_cpu_release(drmachid_t id)
1900 {
1901 	if (!DRMACH_IS_CPU_ID(id))
1902 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1903 
1904 	return (NULL);
1905 }
1906 
1907 static sbd_error_t *
1908 drmach_cpu_status(drmachid_t id, drmach_status_t *stat)
1909 {
1910 	drmach_cpu_t *cp;
1911 	drmach_device_t *dp;
1912 
1913 	ASSERT(DRMACH_IS_CPU_ID(id));
1914 	cp = (drmach_cpu_t *)id;
1915 	dp = &cp->dev;
1916 
1917 	stat->assigned = dp->bp->assigned;
1918 	stat->powered = dp->bp->powered;
1919 	mutex_enter(&cpu_lock);
1920 	stat->configured = (cpu_get(cp->cpuid) != NULL);
1921 	mutex_exit(&cpu_lock);
1922 	stat->busy = dp->busy;
1923 	strncpy(stat->type, dp->type, sizeof (stat->type));
1924 	stat->info[0] = '\0';
1925 
1926 	return (NULL);
1927 }
1928 
1929 sbd_error_t *
1930 drmach_cpu_disconnect(drmachid_t id)
1931 {
1932 
1933 	if (!DRMACH_IS_CPU_ID(id))
1934 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1935 
1936 	return (NULL);
1937 }
1938 
1939 sbd_error_t *
1940 drmach_cpu_get_id(drmachid_t id, processorid_t *cpuid)
1941 {
1942 	drmach_cpu_t *cpu;
1943 
1944 	if (!DRMACH_IS_CPU_ID(id))
1945 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1946 	cpu = (drmach_cpu_t *)id;
1947 
1948 	/* get from cpu directly on OPL */
1949 	*cpuid = cpu->cpuid;
1950 	return (NULL);
1951 }
1952 
1953 sbd_error_t *
1954 drmach_cpu_get_impl(drmachid_t id, int *ip)
1955 {
1956 	drmach_device_t *cpu;
1957 	drmach_node_t	*np;
1958 	drmach_node_t	pp;
1959 	int		impl;
1960 	char		type[OBP_MAXPROPNAME];
1961 
1962 	if (!DRMACH_IS_CPU_ID(id))
1963 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1964 
1965 	cpu = id;
1966 	np = cpu->node;
1967 
1968 	if (np->get_parent(np, &pp) != 0) {
1969 		return (DRMACH_INTERNAL_ERROR());
1970 	}
1971 
1972 	/* the parent should be core */
1973 
1974 	if (pp.n_getprop(&pp, "device_type", &type, sizeof (type)) != 0) {
1975 		return (drerr_new(0, EOPL_GETPROP, NULL));
1976 	}
1977 
1978 	if (strcmp(type, OPL_CORE_NODE) == 0) {
1979 		if (pp.n_getprop(&pp, "implementation#",
1980 			&impl, sizeof (impl)) != 0) {
1981 			return (drerr_new(0, EOPL_GETPROP, NULL));
1982 		}
1983 	} else {
1984 		return (DRMACH_INTERNAL_ERROR());
1985 	}
1986 
1987 	*ip = impl;
1988 
1989 	return (NULL);
1990 }
1991 
1992 sbd_error_t *
1993 drmach_get_dip(drmachid_t id, dev_info_t **dip)
1994 {
1995 	drmach_device_t	*dp;
1996 
1997 	if (!DRMACH_IS_DEVICE_ID(id))
1998 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1999 	dp = id;
2000 
2001 	*dip = dp->node->n_getdip(dp->node);
2002 	return (NULL);
2003 }
2004 
2005 sbd_error_t *
2006 drmach_io_is_attached(drmachid_t id, int *yes)
2007 {
2008 	drmach_device_t *dp;
2009 	dev_info_t	*dip;
2010 	int		state;
2011 
2012 	if (!DRMACH_IS_IO_ID(id))
2013 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2014 	dp = id;
2015 
2016 	dip = dp->node->n_getdip(dp->node);
2017 	if (dip == NULL) {
2018 		*yes = 0;
2019 		return (NULL);
2020 	}
2021 
2022 	state = ddi_get_devstate(dip);
2023 	*yes = ((i_ddi_node_state(dip) >= DS_ATTACHED) ||
2024 	    (state == DDI_DEVSTATE_UP));
2025 
2026 	return (NULL);
2027 }
2028 
2029 struct drmach_io_cb {
2030 	char	*name;	/* name of the node */
2031 	int	(*func)(dev_info_t *);
2032 	int	rv;
2033 };
2034 
2035 #define	DRMACH_IO_POST_ATTACH	0
2036 #define	DRMACH_IO_PRE_RELEASE	1
2037 
2038 static int
2039 drmach_io_cb_check(dev_info_t *dip, void *arg)
2040 {
2041 	struct drmach_io_cb *p = (struct drmach_io_cb *)arg;
2042 	char name[OBP_MAXDRVNAME];
2043 	int len = OBP_MAXDRVNAME;
2044 
2045 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
2046 		DDI_PROP_DONTPASS, "name",
2047 	    (caddr_t)name, &len) != DDI_PROP_SUCCESS) {
2048 		return (DDI_WALK_PRUNECHILD);
2049 	}
2050 
2051 	if (strcmp(name, p->name) == 0) {
2052 		p->rv = (*p->func)(dip);
2053 		return (DDI_WALK_TERMINATE);
2054 	}
2055 
2056 	return (DDI_WALK_CONTINUE);
2057 }
2058 
2059 
2060 static int
2061 drmach_console_ops(drmachid_t *id, int state)
2062 {
2063 	drmach_io_t *obj = (drmach_io_t *)id;
2064 	struct drmach_io_cb arg;
2065 	int (*msudetp)(dev_info_t *);
2066 	int (*msuattp)(dev_info_t *);
2067 	dev_info_t *dip, *pdip;
2068 	int circ;
2069 
2070 	/* 4 is pcicmu channel */
2071 	if (obj->channel != 4)
2072 		return (0);
2073 
2074 	arg.name = "serial";
2075 	arg.func = NULL;
2076 	if (state == DRMACH_IO_PRE_RELEASE) {
2077 		msudetp = (int (*)(dev_info_t *))
2078 		    modgetsymvalue("oplmsu_dr_detach", 0);
2079 		if (msudetp != NULL)
2080 			arg.func = msudetp;
2081 	} else if (state == DRMACH_IO_POST_ATTACH) {
2082 		msuattp = (int (*)(dev_info_t *))
2083 		    modgetsymvalue("oplmsu_dr_attach", 0);
2084 		if (msuattp != NULL)
2085 			arg.func = msuattp;
2086 	}
2087 	else
2088 		return (0);
2089 
2090 	if (arg.func == NULL) {
2091 		return (0);
2092 	}
2093 
2094 	arg.rv = 0;
2095 
2096 	dip = obj->dev.node->n_getdip(obj->dev.node);
2097 	if (pdip = ddi_get_parent(dip)) {
2098 		ndi_hold_devi(pdip);
2099 		ndi_devi_enter(pdip, &circ);
2100 	} else {
2101 		/* this cannot happen unless something bad happens */
2102 		return (-1);
2103 	}
2104 
2105 	ddi_walk_devs(dip, drmach_io_cb_check, (void *)&arg);
2106 
2107 	if (pdip) {
2108 		ndi_devi_exit(pdip, circ);
2109 		ndi_rele_devi(pdip);
2110 	}
2111 
2112 	return (arg.rv);
2113 }
2114 
2115 sbd_error_t *
2116 drmach_io_pre_release(drmachid_t id)
2117 {
2118 	int rv;
2119 
2120 	if (!DRMACH_IS_IO_ID(id))
2121 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2122 
2123 	rv = drmach_console_ops(id, DRMACH_IO_PRE_RELEASE);
2124 
2125 	if (rv != 0)
2126 		cmn_err(CE_WARN, "IO callback failed in pre-release\n");
2127 
2128 	return (NULL);
2129 }
2130 
2131 static sbd_error_t *
2132 drmach_io_release(drmachid_t id)
2133 {
2134 	if (!DRMACH_IS_IO_ID(id))
2135 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2136 	return (NULL);
2137 }
2138 
2139 sbd_error_t *
2140 drmach_io_unrelease(drmachid_t id)
2141 {
2142 	if (!DRMACH_IS_IO_ID(id))
2143 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2144 	return (NULL);
2145 }
2146 
2147 /*ARGSUSED*/
2148 sbd_error_t *
2149 drmach_io_post_release(drmachid_t id)
2150 {
2151 	return (NULL);
2152 }
2153 
2154 /*ARGSUSED*/
2155 sbd_error_t *
2156 drmach_io_post_attach(drmachid_t id)
2157 {
2158 	int rv;
2159 
2160 	if (!DRMACH_IS_IO_ID(id))
2161 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2162 
2163 	rv = drmach_console_ops(id, DRMACH_IO_POST_ATTACH);
2164 
2165 	if (rv != 0)
2166 		cmn_err(CE_WARN, "IO callback failed in post-attach\n");
2167 
2168 	return (0);
2169 }
2170 
2171 static sbd_error_t *
2172 drmach_io_status(drmachid_t id, drmach_status_t *stat)
2173 {
2174 	drmach_device_t *dp;
2175 	sbd_error_t	*err;
2176 	int		 configured;
2177 
2178 	ASSERT(DRMACH_IS_IO_ID(id));
2179 	dp = id;
2180 
2181 	err = drmach_io_is_attached(id, &configured);
2182 	if (err)
2183 		return (err);
2184 
2185 	stat->assigned = dp->bp->assigned;
2186 	stat->powered = dp->bp->powered;
2187 	stat->configured = (configured != 0);
2188 	stat->busy = dp->busy;
2189 	strncpy(stat->type, dp->type, sizeof (stat->type));
2190 	stat->info[0] = '\0';
2191 
2192 	return (NULL);
2193 }
2194 
2195 static sbd_error_t *
2196 drmach_mem_new(drmach_device_t *proto, drmachid_t *idp)
2197 {
2198 	static void drmach_mem_dispose(drmachid_t);
2199 	static sbd_error_t *drmach_mem_release(drmachid_t);
2200 	static sbd_error_t *drmach_mem_status(drmachid_t, drmach_status_t *);
2201 	dev_info_t *dip;
2202 
2203 	drmach_mem_t	*mp;
2204 
2205 	mp = kmem_zalloc(sizeof (drmach_mem_t), KM_SLEEP);
2206 	proto->unum = 0;
2207 
2208 	bcopy(proto, &mp->dev, sizeof (mp->dev));
2209 	mp->dev.node = drmach_node_dup(proto->node);
2210 	mp->dev.cm.isa = (void *)drmach_mem_new;
2211 	mp->dev.cm.dispose = drmach_mem_dispose;
2212 	mp->dev.cm.release = drmach_mem_release;
2213 	mp->dev.cm.status = drmach_mem_status;
2214 
2215 	snprintf(mp->dev.cm.name,
2216 		sizeof (mp->dev.cm.name), "%s", mp->dev.type);
2217 
2218 	dip = mp->dev.node->n_getdip(mp->dev.node);
2219 	if (drmach_setup_mc_info(dip, mp) != 0) {
2220 		return (DRMACH_INTERNAL_ERROR());
2221 	}
2222 
2223 	*idp = (drmachid_t)mp;
2224 	return (NULL);
2225 }
2226 
2227 static void
2228 drmach_mem_dispose(drmachid_t id)
2229 {
2230 	drmach_mem_t *mp;
2231 
2232 	ASSERT(DRMACH_IS_MEM_ID(id));
2233 
2234 
2235 	mp = id;
2236 
2237 	if (mp->dev.node)
2238 		drmach_node_dispose(mp->dev.node);
2239 
2240 	if (mp->memlist) {
2241 		memlist_delete(mp->memlist);
2242 		mp->memlist = NULL;
2243 	}
2244 }
2245 
2246 sbd_error_t *
2247 drmach_mem_add_span(drmachid_t id, uint64_t basepa, uint64_t size)
2248 {
2249 	pfn_t		basepfn = (pfn_t)(basepa >> PAGESHIFT);
2250 	pgcnt_t		npages = (pgcnt_t)(size >> PAGESHIFT);
2251 	int		rv;
2252 
2253 	ASSERT(size != 0);
2254 
2255 	if (!DRMACH_IS_MEM_ID(id))
2256 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2257 
2258 	kcage_range_lock();
2259 	rv = kcage_range_add(basepfn, npages, 1);
2260 	kcage_range_unlock();
2261 	if (rv == ENOMEM) {
2262 		cmn_err(CE_WARN, "%ld megabytes not available to kernel cage",
2263 			(size == 0 ? 0 : size / MBYTE));
2264 	} else if (rv != 0) {
2265 		/* catch this in debug kernels */
2266 		ASSERT(0);
2267 
2268 		cmn_err(CE_WARN, "unexpected kcage_range_add"
2269 			" return value %d", rv);
2270 	}
2271 
2272 	if (rv) {
2273 		return (DRMACH_INTERNAL_ERROR());
2274 	}
2275 	else
2276 		return (NULL);
2277 }
2278 
2279 sbd_error_t *
2280 drmach_mem_del_span(drmachid_t id, uint64_t basepa, uint64_t size)
2281 {
2282 	pfn_t		basepfn = (pfn_t)(basepa >> PAGESHIFT);
2283 	pgcnt_t		npages = (pgcnt_t)(size >> PAGESHIFT);
2284 	int		rv;
2285 
2286 	if (!DRMACH_IS_MEM_ID(id))
2287 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2288 
2289 	if (size > 0) {
2290 		kcage_range_lock();
2291 		rv = kcage_range_delete_post_mem_del(basepfn, npages);
2292 		kcage_range_unlock();
2293 		if (rv != 0) {
2294 			cmn_err(CE_WARN,
2295 			    "unexpected kcage_range_delete_post_mem_del"
2296 			    " return value %d", rv);
2297 			return (DRMACH_INTERNAL_ERROR());
2298 		}
2299 	}
2300 
2301 	return (NULL);
2302 }
2303 
2304 sbd_error_t *
2305 drmach_mem_disable(drmachid_t id)
2306 {
2307 	if (!DRMACH_IS_MEM_ID(id))
2308 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2309 	else {
2310 		drmach_flush_all();
2311 		return (NULL);
2312 	}
2313 }
2314 
2315 sbd_error_t *
2316 drmach_mem_enable(drmachid_t id)
2317 {
2318 	if (!DRMACH_IS_MEM_ID(id))
2319 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2320 	else
2321 		return (NULL);
2322 }
2323 
2324 sbd_error_t *
2325 drmach_mem_get_info(drmachid_t id, drmach_mem_info_t *mem)
2326 {
2327 	drmach_mem_t *mp;
2328 
2329 	if (!DRMACH_IS_MEM_ID(id))
2330 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2331 
2332 	mp = (drmach_mem_t *)id;
2333 
2334 	/*
2335 	 * This is only used by dr to round up/down the memory
2336 	 * for copying. Our unit of memory isolation is 64 MB.
2337 	 */
2338 
2339 	mem->mi_alignment_mask = (64 * 1024 * 1024 - 1);
2340 	mem->mi_basepa = mp->base_pa;
2341 	mem->mi_size = mp->nbytes;
2342 	mem->mi_slice_size = mp->slice_size;
2343 
2344 	return (NULL);
2345 }
2346 
2347 sbd_error_t *
2348 drmach_mem_get_base_physaddr(drmachid_t id, uint64_t *pa)
2349 {
2350 	drmach_mem_t *mp;
2351 
2352 	if (!DRMACH_IS_MEM_ID(id))
2353 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2354 
2355 	mp = (drmach_mem_t *)id;
2356 
2357 	*pa = mp->base_pa;
2358 	return (NULL);
2359 }
2360 
2361 sbd_error_t *
2362 drmach_mem_get_memlist(drmachid_t id, struct memlist **ml)
2363 {
2364 	drmach_mem_t	*mem;
2365 	int		rv;
2366 	struct memlist	*mlist;
2367 
2368 	if (!DRMACH_IS_MEM_ID(id))
2369 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2370 
2371 	mem = (drmach_mem_t *)id;
2372 	mlist = memlist_dup(mem->memlist);
2373 
2374 #ifdef DEBUG
2375 	/*
2376 	 * Make sure the incoming memlist doesn't already
2377 	 * intersect with what's present in the system (phys_install).
2378 	 */
2379 	memlist_read_lock();
2380 	rv = memlist_intersect(phys_install, mlist);
2381 	memlist_read_unlock();
2382 	if (rv) {
2383 		DRMACH_PR("Derived memlist intersects"
2384 			" with phys_install\n");
2385 		memlist_dump(mlist);
2386 
2387 		DRMACH_PR("phys_install memlist:\n");
2388 		memlist_dump(phys_install);
2389 
2390 		memlist_delete(mlist);
2391 		return (DRMACH_INTERNAL_ERROR());
2392 	}
2393 
2394 	DRMACH_PR("Derived memlist:");
2395 	memlist_dump(mlist);
2396 #endif
2397 
2398 	*ml = mlist;
2399 
2400 	return (NULL);
2401 }
2402 
2403 sbd_error_t *
2404 drmach_mem_get_slice_size(drmachid_t id, uint64_t *bytes)
2405 {
2406 	drmach_mem_t	*mem;
2407 
2408 	if (!DRMACH_IS_MEM_ID(id))
2409 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2410 
2411 	mem = (drmach_mem_t *)id;
2412 
2413 	*bytes = mem->slice_size;
2414 
2415 	return (NULL);
2416 }
2417 
2418 
2419 /* ARGSUSED */
2420 processorid_t
2421 drmach_mem_cpu_affinity(drmachid_t id)
2422 {
2423 	return (CPU_CURRENT);
2424 }
2425 
2426 static sbd_error_t *
2427 drmach_mem_release(drmachid_t id)
2428 {
2429 	if (!DRMACH_IS_MEM_ID(id))
2430 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2431 	return (NULL);
2432 }
2433 
2434 static sbd_error_t *
2435 drmach_mem_status(drmachid_t id, drmach_status_t *stat)
2436 {
2437 	drmach_mem_t *dp;
2438 	uint64_t	 pa, slice_size;
2439 	struct memlist	*ml;
2440 
2441 	ASSERT(DRMACH_IS_MEM_ID(id));
2442 	dp = id;
2443 
2444 	/* get starting physical address of target memory */
2445 	pa = dp->base_pa;
2446 
2447 	/* round down to slice boundary */
2448 	slice_size = dp->slice_size;
2449 	pa &= ~(slice_size - 1);
2450 
2451 	/* stop at first span that is in slice */
2452 	memlist_read_lock();
2453 	for (ml = phys_install; ml; ml = ml->next)
2454 		if (ml->address >= pa && ml->address < pa + slice_size)
2455 			break;
2456 	memlist_read_unlock();
2457 
2458 	stat->assigned = dp->dev.bp->assigned;
2459 	stat->powered = dp->dev.bp->powered;
2460 	stat->configured = (ml != NULL);
2461 	stat->busy = dp->dev.busy;
2462 	strncpy(stat->type, dp->dev.type, sizeof (stat->type));
2463 	stat->info[0] = '\0';
2464 
2465 	return (NULL);
2466 }
2467 
2468 
2469 sbd_error_t *
2470 drmach_board_deprobe(drmachid_t id)
2471 {
2472 	drmach_board_t	*bp;
2473 
2474 	if (!DRMACH_IS_BOARD_ID(id))
2475 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2476 
2477 	bp = id;
2478 
2479 	cmn_err(CE_CONT, "DR: PROM detach board %d\n", bp->bnum);
2480 
2481 	if (bp->tree) {
2482 		drmach_node_dispose(bp->tree);
2483 		bp->tree = NULL;
2484 	}
2485 	if (bp->devices) {
2486 		drmach_array_dispose(bp->devices, drmach_device_dispose);
2487 		bp->devices = NULL;
2488 	}
2489 
2490 	bp->boot_board = 0;
2491 
2492 	return (NULL);
2493 }
2494 
2495 /*ARGSUSED*/
2496 static sbd_error_t *
2497 drmach_pt_ikprobe(drmachid_t id, drmach_opts_t *opts)
2498 {
2499 	drmach_board_t		*bp = (drmach_board_t *)id;
2500 	sbd_error_t		*err = NULL;
2501 	int	rv;
2502 
2503 	if (!DRMACH_IS_BOARD_ID(id))
2504 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2505 
2506 	DRMACH_PR("calling opl_probe_board for bnum=%d\n", bp->bnum);
2507 	rv = opl_probe_sb(bp->bnum);
2508 	if (rv != 0) {
2509 		err = drerr_new(0, EOPL_PROBE, bp->cm.name);
2510 		return (err);
2511 	}
2512 	return (err);
2513 }
2514 
2515 /*ARGSUSED*/
2516 static sbd_error_t *
2517 drmach_pt_ikdeprobe(drmachid_t id, drmach_opts_t *opts)
2518 {
2519 	drmach_board_t	*bp;
2520 	sbd_error_t	*err = NULL;
2521 	int	rv;
2522 
2523 	if (!DRMACH_IS_BOARD_ID(id))
2524 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2525 	bp = (drmach_board_t *)id;
2526 
2527 	cmn_err(CE_CONT, "DR: in-kernel unprobe board %d\n", bp->bnum);
2528 
2529 	rv = opl_unprobe_sb(bp->bnum);
2530 	if (rv != 0) {
2531 		err = drerr_new(0, EOPL_DEPROBE, bp->cm.name);
2532 	}
2533 
2534 	return (err);
2535 }
2536 
2537 
2538 /*ARGSUSED*/
2539 sbd_error_t *
2540 drmach_pt_readmem(drmachid_t id, drmach_opts_t *opts)
2541 {
2542 	struct memlist	*ml;
2543 	uint64_t	src_pa;
2544 	uint64_t	dst_pa;
2545 	uint64_t	dst;
2546 
2547 	dst_pa = va_to_pa(&dst);
2548 
2549 	memlist_read_lock();
2550 	for (ml = phys_install; ml; ml = ml->next) {
2551 		uint64_t	nbytes;
2552 
2553 		src_pa = ml->address;
2554 		nbytes = ml->size;
2555 
2556 		while (nbytes != 0ull) {
2557 
2558 			/* copy 32 bytes at arc_pa to dst_pa */
2559 			bcopy32_il(src_pa, dst_pa);
2560 
2561 			/* increment by 32 bytes */
2562 			src_pa += (4 * sizeof (uint64_t));
2563 
2564 			/* decrement by 32 bytes */
2565 			nbytes -= (4 * sizeof (uint64_t));
2566 		}
2567 	}
2568 	memlist_read_unlock();
2569 
2570 	return (NULL);
2571 }
2572 
2573 static struct {
2574 	const char	*name;
2575 	sbd_error_t	*(*handler)(drmachid_t id, drmach_opts_t *opts);
2576 } drmach_pt_arr[] = {
2577 	{ "readmem",		drmach_pt_readmem		},
2578 	{ "ikprobe",	drmach_pt_ikprobe	},
2579 	{ "ikdeprobe",	drmach_pt_ikdeprobe	},
2580 
2581 	/* the following line must always be last */
2582 	{ NULL,			NULL				}
2583 };
2584 
2585 /*ARGSUSED*/
2586 sbd_error_t *
2587 drmach_passthru(drmachid_t id, drmach_opts_t *opts)
2588 {
2589 	int		i;
2590 	sbd_error_t	*err;
2591 
2592 	i = 0;
2593 	while (drmach_pt_arr[i].name != NULL) {
2594 		int len = strlen(drmach_pt_arr[i].name);
2595 
2596 		if (strncmp(drmach_pt_arr[i].name, opts->copts, len) == 0)
2597 			break;
2598 
2599 		i += 1;
2600 	}
2601 
2602 	if (drmach_pt_arr[i].name == NULL)
2603 		err = drerr_new(0, EOPL_UNKPTCMD, opts->copts);
2604 	else
2605 		err = (*drmach_pt_arr[i].handler)(id, opts);
2606 
2607 	return (err);
2608 }
2609 
2610 sbd_error_t *
2611 drmach_release(drmachid_t id)
2612 {
2613 	drmach_common_t *cp;
2614 
2615 	if (!DRMACH_IS_DEVICE_ID(id))
2616 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2617 	cp = id;
2618 
2619 	return (cp->release(id));
2620 }
2621 
2622 sbd_error_t *
2623 drmach_status(drmachid_t id, drmach_status_t *stat)
2624 {
2625 	drmach_common_t *cp;
2626 	sbd_error_t	*err;
2627 
2628 	rw_enter(&drmach_boards_rwlock, RW_READER);
2629 
2630 	if (!DRMACH_IS_ID(id)) {
2631 		rw_exit(&drmach_boards_rwlock);
2632 		return (drerr_new(0, EOPL_NOTID, NULL));
2633 	}
2634 	cp = (drmach_common_t *)id;
2635 	err = cp->status(id, stat);
2636 
2637 	rw_exit(&drmach_boards_rwlock);
2638 
2639 	return (err);
2640 }
2641 
2642 static sbd_error_t *
2643 drmach_i_status(drmachid_t id, drmach_status_t *stat)
2644 {
2645 	drmach_common_t *cp;
2646 
2647 	if (!DRMACH_IS_ID(id))
2648 		return (drerr_new(0, EOPL_NOTID, NULL));
2649 	cp = id;
2650 
2651 	return (cp->status(id, stat));
2652 }
2653 
2654 /*ARGSUSED*/
2655 sbd_error_t *
2656 drmach_unconfigure(drmachid_t id, int flags)
2657 {
2658 	drmach_device_t *dp;
2659 	dev_info_t	*rdip, *fdip = NULL;
2660 	char name[OBP_MAXDRVNAME];
2661 	int rv;
2662 
2663 	if (DRMACH_IS_CPU_ID(id))
2664 		return (NULL);
2665 
2666 	if (!DRMACH_IS_DEVICE_ID(id))
2667 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2668 
2669 	dp = id;
2670 
2671 	rdip = dp->node->n_getdip(dp->node);
2672 
2673 	ASSERT(rdip);
2674 
2675 	rv = dp->node->n_getprop(dp->node, "name", name, OBP_MAXDRVNAME);
2676 
2677 	if (rv)
2678 		return (NULL);
2679 
2680 	/*
2681 	 * Note: FORCE flag is no longer necessary under devfs
2682 	 */
2683 
2684 	ASSERT(e_ddi_branch_held(rdip));
2685 	if (e_ddi_branch_unconfigure(rdip, &fdip, 0)) {
2686 		sbd_error_t	*err;
2687 		char		*path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
2688 
2689 		/*
2690 		 * If non-NULL, fdip is returned held and must be released.
2691 		 */
2692 		if (fdip != NULL) {
2693 			(void) ddi_pathname(fdip, path);
2694 			ndi_rele_devi(fdip);
2695 		} else {
2696 			(void) ddi_pathname(rdip, path);
2697 		}
2698 
2699 		err = drerr_new(1, EOPL_DRVFAIL, path);
2700 
2701 		kmem_free(path, MAXPATHLEN);
2702 
2703 		return (err);
2704 	}
2705 
2706 	return (NULL);
2707 }
2708 
2709 
2710 int
2711 drmach_cpu_poweron(struct cpu *cp)
2712 {
2713 	int bnum, cpuid, onb_core_num, strand_id;
2714 	drmach_board_t *bp;
2715 
2716 	DRMACH_PR("drmach_cpu_poweron: starting cpuid %d\n", cp->cpu_id);
2717 
2718 	cpuid = cp->cpu_id;
2719 	bnum = LSB_ID(cpuid);
2720 	onb_core_num = ON_BOARD_CORE_NUM(cpuid);
2721 	strand_id = STRAND_ID(cpuid);
2722 	bp = drmach_get_board_by_bnum(bnum);
2723 
2724 	ASSERT(bp);
2725 	if (bp->cores[onb_core_num].core_hotadded == 0) {
2726 		if (drmach_add_remove_cpu(bnum, onb_core_num,
2727 			HOTADD_CPU) != 0) {
2728 			cmn_err(CE_WARN, "Failed to add CMP %d on board %d\n",
2729 				onb_core_num, bnum);
2730 			return (EIO);
2731 		}
2732 	}
2733 
2734 	ASSERT(MUTEX_HELD(&cpu_lock));
2735 
2736 	if (drmach_cpu_start(cp) != 0) {
2737 		if (bp->cores[onb_core_num].core_started == 0) {
2738 			/*
2739 			 * we must undo the hotadd or no one will do that
2740 			 * If this fails, we will do this again in
2741 			 * drmach_board_disconnect.
2742 			 */
2743 			if (drmach_add_remove_cpu(bnum, onb_core_num,
2744 				HOTREMOVE_CPU) != 0) {
2745 				cmn_err(CE_WARN, "Failed to remove CMP %d "
2746 					"on board %d\n",
2747 					onb_core_num, bnum);
2748 			}
2749 		}
2750 		return (EBUSY);
2751 	} else {
2752 		bp->cores[onb_core_num].core_started |= (1 << strand_id);
2753 		return (0);
2754 	}
2755 }
2756 
2757 int
2758 drmach_cpu_poweroff(struct cpu *cp)
2759 {
2760 	int 		rv = 0;
2761 	processorid_t	cpuid = cp->cpu_id;
2762 
2763 	DRMACH_PR("drmach_cpu_poweroff: stopping cpuid %d\n", cp->cpu_id);
2764 
2765 	ASSERT(MUTEX_HELD(&cpu_lock));
2766 
2767 	/*
2768 	 * Capture all CPUs (except for detaching proc) to prevent
2769 	 * crosscalls to the detaching proc until it has cleared its
2770 	 * bit in cpu_ready_set.
2771 	 *
2772 	 * The CPU's remain paused and the prom_mutex is known to be free.
2773 	 * This prevents the x-trap victim from blocking when doing prom
2774 	 * IEEE-1275 calls at a high PIL level.
2775 	 */
2776 
2777 	promsafe_pause_cpus();
2778 
2779 	/*
2780 	 * Quiesce interrupts on the target CPU. We do this by setting
2781 	 * the CPU 'not ready'- (i.e. removing the CPU from cpu_ready_set) to
2782 	 * prevent it from receiving cross calls and cross traps.
2783 	 * This prevents the processor from receiving any new soft interrupts.
2784 	 */
2785 	mp_cpu_quiesce(cp);
2786 
2787 	rv = prom_stopcpu_bycpuid(cpuid);
2788 	if (rv == 0)
2789 		cp->cpu_flags = CPU_OFFLINE | CPU_QUIESCED | CPU_POWEROFF;
2790 
2791 	start_cpus();
2792 
2793 	if (rv == 0) {
2794 		int bnum, onb_core_num, strand_id;
2795 		drmach_board_t *bp;
2796 
2797 		CPU_SIGNATURE(OS_SIG, SIGST_DETACHED, SIGSUBST_NULL, cpuid);
2798 
2799 		bnum = LSB_ID(cpuid);
2800 		onb_core_num = ON_BOARD_CORE_NUM(cpuid);
2801 		strand_id = STRAND_ID(cpuid);
2802 		bp = drmach_get_board_by_bnum(bnum);
2803 		ASSERT(bp);
2804 
2805 		bp->cores[onb_core_num].core_started &= ~(1 << strand_id);
2806 		if (bp->cores[onb_core_num].core_started == 0) {
2807 			if (drmach_add_remove_cpu(bnum, onb_core_num,
2808 				HOTREMOVE_CPU) != 0) {
2809 				cmn_err(CE_WARN,
2810 					"Failed to remove CMP %d LSB %d\n",
2811 					onb_core_num, bnum);
2812 				return (EIO);
2813 			}
2814 		}
2815 	}
2816 
2817 	return (rv);
2818 }
2819 
2820 /*ARGSUSED*/
2821 int
2822 drmach_verify_sr(dev_info_t *dip, int sflag)
2823 {
2824 	return (0);
2825 }
2826 
2827 void
2828 drmach_suspend_last(void)
2829 {
2830 }
2831 
2832 void
2833 drmach_resume_first(void)
2834 {
2835 }
2836 
2837 /*
2838  * Log a DR sysevent.
2839  * Return value: 0 success, non-zero failure.
2840  */
2841 int
2842 drmach_log_sysevent(int board, char *hint, int flag, int verbose)
2843 {
2844 	sysevent_t			*ev;
2845 	sysevent_id_t			eid;
2846 	int				rv, km_flag;
2847 	sysevent_value_t		evnt_val;
2848 	sysevent_attr_list_t		*evnt_attr_list = NULL;
2849 	char				attach_pnt[MAXNAMELEN];
2850 
2851 	km_flag = (flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
2852 	attach_pnt[0] = '\0';
2853 	if (drmach_board_name(board, attach_pnt, MAXNAMELEN)) {
2854 		rv = -1;
2855 		goto logexit;
2856 	}
2857 	if (verbose)
2858 		DRMACH_PR("drmach_log_sysevent: %s %s, flag: %d, verbose: %d\n",
2859 			attach_pnt, hint, flag, verbose);
2860 
2861 	if ((ev = sysevent_alloc(EC_DR, ESC_DR_AP_STATE_CHANGE,
2862 		SUNW_KERN_PUB"dr", km_flag)) == NULL) {
2863 		rv = -2;
2864 		goto logexit;
2865 	}
2866 	evnt_val.value_type = SE_DATA_TYPE_STRING;
2867 	evnt_val.value.sv_string = attach_pnt;
2868 	if ((rv = sysevent_add_attr(&evnt_attr_list, DR_AP_ID,
2869 		&evnt_val, km_flag)) != 0)
2870 		goto logexit;
2871 
2872 	evnt_val.value_type = SE_DATA_TYPE_STRING;
2873 	evnt_val.value.sv_string = hint;
2874 	if ((rv = sysevent_add_attr(&evnt_attr_list, DR_HINT,
2875 		&evnt_val, km_flag)) != 0) {
2876 		sysevent_free_attr(evnt_attr_list);
2877 		goto logexit;
2878 	}
2879 
2880 	(void) sysevent_attach_attributes(ev, evnt_attr_list);
2881 
2882 	/*
2883 	 * Log the event but do not sleep waiting for its
2884 	 * delivery. This provides insulation from syseventd.
2885 	 */
2886 	rv = log_sysevent(ev, SE_NOSLEEP, &eid);
2887 
2888 logexit:
2889 	if (ev)
2890 		sysevent_free(ev);
2891 	if ((rv != 0) && verbose)
2892 		cmn_err(CE_WARN,
2893 			"drmach_log_sysevent failed (rv %d) for %s  %s\n",
2894 			rv, attach_pnt, hint);
2895 
2896 	return (rv);
2897 }
2898 
2899 #define	OPL_DR_STATUS_PROP "dr-status"
2900 
2901 static int
2902 opl_check_dr_status()
2903 {
2904 	pnode_t	node;
2905 	int	rtn, len;
2906 	char	*str;
2907 
2908 	node = prom_rootnode();
2909 	if (node == OBP_BADNODE) {
2910 		return (1);
2911 	}
2912 
2913 	len = prom_getproplen(node, OPL_DR_STATUS_PROP);
2914 	if (len == -1) {
2915 		/*
2916 		 * dr-status doesn't exist when DR is activated and
2917 		 * any warning messages aren't needed.
2918 		 */
2919 		return (1);
2920 	}
2921 
2922 	str = (char *)kmem_zalloc(len+1, KM_SLEEP);
2923 	rtn = prom_getprop(node, OPL_DR_STATUS_PROP, str);
2924 	kmem_free(str, len + 1);
2925 	if (rtn == -1) {
2926 		return (1);
2927 	} else {
2928 		return (0);
2929 	}
2930 }
2931 
2932 static sbd_error_t *
2933 drmach_get_scf_addr(uint64_t *addr)
2934 {
2935 	caddr_t *scf_cmd_addr;
2936 	uint64_t pa;
2937 	scf_cmd_addr = (caddr_t *)modgetsymvalue("scf_avail_cmd_reg_vaddr", 0);
2938 	if (scf_cmd_addr != NULL) {
2939 		pa = (uint64_t)va_to_pa(*scf_cmd_addr);
2940 		*addr = pa;
2941 		return (NULL);
2942 	}
2943 
2944 	return (DRMACH_INTERNAL_ERROR());
2945 }
2946 
2947 /* we are allocating memlist from TLB locked pages to avoid tlbmisses */
2948 
2949 static struct memlist *
2950 drmach_memlist_add_span(drmach_copy_rename_program_t *p,
2951 	struct memlist *mlist, uint64_t base, uint64_t len)
2952 {
2953 	struct memlist	*ml, *tl, *nl;
2954 
2955 	if (len == 0ull)
2956 		return (NULL);
2957 
2958 	if (mlist == NULL) {
2959 		mlist = p->free_mlist;
2960 		if (mlist == NULL)
2961 			return (NULL);
2962 		p->free_mlist = mlist->next;
2963 		mlist->address = base;
2964 		mlist->size = len;
2965 		mlist->next = mlist->prev = NULL;
2966 
2967 		return (mlist);
2968 	}
2969 
2970 	for (tl = ml = mlist; ml; tl = ml, ml = ml->next) {
2971 		if (base < ml->address) {
2972 			if ((base + len) < ml->address) {
2973 				nl = p->free_mlist;
2974 				if (nl == NULL)
2975 					return (NULL);
2976 				p->free_mlist = nl->next;
2977 				nl->address = base;
2978 				nl->size = len;
2979 				nl->next = ml;
2980 				if ((nl->prev = ml->prev) != NULL)
2981 					nl->prev->next = nl;
2982 				ml->prev = nl;
2983 				if (mlist == ml)
2984 					mlist = nl;
2985 			} else {
2986 				ml->size = MAX((base + len),
2987 					(ml->address + ml->size)) -
2988 					base;
2989 				ml->address = base;
2990 			}
2991 			break;
2992 
2993 		} else if (base <= (ml->address + ml->size)) {
2994 			ml->size = MAX((base + len),
2995 				(ml->address + ml->size)) -
2996 				MIN(ml->address, base);
2997 			ml->address = MIN(ml->address, base);
2998 			break;
2999 		}
3000 	}
3001 	if (ml == NULL) {
3002 		nl = p->free_mlist;
3003 		if (nl == NULL)
3004 			return (NULL);
3005 		p->free_mlist = nl->next;
3006 		nl->address = base;
3007 		nl->size = len;
3008 		nl->next = NULL;
3009 		nl->prev = tl;
3010 		tl->next = nl;
3011 	}
3012 
3013 	return (mlist);
3014 }
3015 
3016 /*
3017  * The routine performs the necessary memory COPY and MC adr SWITCH.
3018  * Both operations MUST be at the same "level" so that the stack is
3019  * maintained correctly between the copy and switch.  The switch
3020  * portion implements a caching mechanism to guarantee the code text
3021  * is cached prior to execution.  This is to guard against possible
3022  * memory access while the MC adr's are being modified.
3023  *
3024  * IMPORTANT: The _drmach_copy_rename_end() function must immediately
3025  * follow drmach_copy_rename_prog__relocatable() so that the correct
3026  * "length" of the drmach_copy_rename_prog__relocatable can be
3027  * calculated.  This routine MUST be a LEAF function, i.e. it can
3028  * make NO function calls, primarily for two reasons:
3029  *
3030  *	1. We must keep the stack consistent across the "switch".
3031  *	2. Function calls are compiled to relative offsets, and
3032  *	   we execute this function we'll be executing it from
3033  *	   a copied version in a different area of memory, thus
3034  *	   the relative offsets will be bogus.
3035  *
3036  * Moreover, it must have the "__relocatable" suffix to inform DTrace
3037  * providers (and anything else, for that matter) that this
3038  * function's text is manually relocated elsewhere before it is
3039  * executed.  That is, it cannot be safely instrumented with any
3040  * methodology that is PC-relative.
3041  */
3042 
3043 /*
3044  * We multiply this to system_clock_frequency so we
3045  * are setting a delay of fmem_timeout second for
3046  * the rename command.  The spec says 15 second is
3047  * enough but the Fujitsu HW team suggested 17 sec.
3048  */
3049 static int	fmem_timeout = 17;
3050 static int	min_copy_size_per_sec = 20 * 1024 * 1024;
3051 int drmach_disable_mcopy = 0;
3052 
3053 #define	DR_DELAY_IL(ms, freq)					\
3054 	{							\
3055 		uint64_t start;					\
3056 		uint64_t nstick;				\
3057 		volatile uint64_t now;				\
3058 		nstick = ((uint64_t)ms * freq)/1000;		\
3059 		start = drmach_get_stick_il();			\
3060 		now = start;					\
3061 		while ((now - start) <= nstick) {		\
3062 			drmach_sleep_il();			\
3063 			now = drmach_get_stick_il();		\
3064 		}						\
3065 	}
3066 
3067 static int
3068 drmach_copy_rename_prog__relocatable(drmach_copy_rename_program_t *prog,
3069 	int cpuid)
3070 {
3071 	struct memlist		*ml;
3072 	register int		rtn;
3073 	int			i;
3074 	register uint64_t	curr, limit;
3075 	extern uint64_t		drmach_get_stick_il();
3076 	extern void		membar_sync_il();
3077 	extern void		flush_instr_mem_il(void*);
3078 	uint64_t		copy_start;
3079 
3080 	prog->critical->stat[cpuid] = FMEM_LOOP_COPY_READY;
3081 	membar_sync_il();
3082 
3083 	if (prog->data->cpuid == cpuid) {
3084 		limit = drmach_get_stick_il();
3085 		limit += prog->critical->delay;
3086 
3087 		for (i = 0; i < NCPU; i++) {
3088 			if (CPU_IN_SET(prog->data->cpu_slave_set, i)) {
3089 			/* wait for all CPU's to be ready */
3090 			    for (;;) {
3091 				if (prog->critical->stat[i] ==
3092 					FMEM_LOOP_COPY_READY) {
3093 					break;
3094 				}
3095 			    }
3096 			    curr = drmach_get_stick_il();
3097 			    if (curr > limit) {
3098 				prog->data->fmem_status.error =
3099 					FMEM_XC_TIMEOUT;
3100 				return (FMEM_XC_TIMEOUT);
3101 			    }
3102 			}
3103 		}
3104 		prog->data->fmem_status.stat = FMEM_LOOP_COPY_READY;
3105 		membar_sync_il();
3106 		copy_start = drmach_get_stick_il();
3107 	} else {
3108 		for (;;) {
3109 			if (prog->data->fmem_status.stat ==
3110 				FMEM_LOOP_COPY_READY) {
3111 				break;
3112 			}
3113 			if (prog->data->fmem_status.error) {
3114 				prog->data->error[cpuid] = FMEM_TERMINATE;
3115 				return (FMEM_TERMINATE);
3116 			}
3117 		}
3118 	}
3119 
3120 	/*
3121 	 * DO COPY.
3122 	 */
3123 	if (CPU_IN_SET(prog->data->cpu_copy_set, cpuid)) {
3124 	    for (ml = prog->data->cpu_ml[cpuid]; ml; ml = ml->next) {
3125 		uint64_t	s_pa, t_pa;
3126 		uint64_t	nbytes;
3127 
3128 		s_pa = prog->data->s_copybasepa + ml->address;
3129 		t_pa = prog->data->t_copybasepa + ml->address;
3130 		nbytes = ml->size;
3131 
3132 		while (nbytes != 0ull) {
3133 			/* If the master has detected error, we just bail out */
3134 			if (prog->data->fmem_status.error) {
3135 				prog->data->error[cpuid] = FMEM_TERMINATE;
3136 				return (FMEM_TERMINATE);
3137 			}
3138 			/*
3139 			 * This copy does NOT use an ASI
3140 			 * that avoids the Ecache, therefore
3141 			 * the dst_pa addresses may remain
3142 			 * in our Ecache after the dst_pa
3143 			 * has been removed from the system.
3144 			 * A subsequent write-back to memory
3145 			 * will cause an ARB-stop because the
3146 			 * physical address no longer exists
3147 			 * in the system. Therefore we must
3148 			 * flush out local Ecache after we
3149 			 * finish the copy.
3150 			 */
3151 
3152 			/* copy 32 bytes at src_pa to dst_pa */
3153 			bcopy32_il(s_pa, t_pa);
3154 
3155 			/* increment the counter to signal that we are alive */
3156 			prog->stat->nbytes[cpuid] += 32;
3157 
3158 			/* increment by 32 bytes */
3159 			s_pa += (4 * sizeof (uint64_t));
3160 			t_pa += (4 * sizeof (uint64_t));
3161 
3162 			/* decrement by 32 bytes */
3163 			nbytes -= (4 * sizeof (uint64_t));
3164 		}
3165 	    }
3166 	    prog->critical->stat[cpuid] = FMEM_LOOP_COPY_DONE;
3167 	    membar_sync_il();
3168 	}
3169 
3170 	/*
3171 	 * Since bcopy32_il() does NOT use an ASI to bypass
3172 	 * the Ecache, we need to flush our Ecache after
3173 	 * the copy is complete.
3174 	 */
3175 	flush_cache_il();
3176 
3177 	/*
3178 	 * drmach_fmem_exec_script()
3179 	 */
3180 	if (prog->data->cpuid == cpuid) {
3181 		uint64_t	last, now;
3182 
3183 		limit = copy_start + prog->data->copy_delay;
3184 		for (i = 0; i < NCPU; i++) {
3185 			if (CPU_IN_SET(prog->data->cpu_slave_set, i)) {
3186 			    for (;;) {
3187 				/* we get FMEM_LOOP_FMEM_READY in normal case */
3188 				if (prog->critical->stat[i] ==
3189 					FMEM_LOOP_FMEM_READY) {
3190 					break;
3191 				}
3192 				/* got error traps */
3193 				if (prog->critical->stat[i] ==
3194 					FMEM_COPY_ERROR) {
3195 					prog->data->fmem_status.error =
3196 						FMEM_COPY_ERROR;
3197 					return (FMEM_COPY_ERROR);
3198 				}
3199 				/* if we have not reached limit, wait more */
3200 				curr = drmach_get_stick_il();
3201 				if (curr <= limit)
3202 					continue;
3203 
3204 				prog->data->slowest_cpuid = i;
3205 				prog->data->copy_wait_time =
3206 					curr - copy_start;
3207 
3208 				/* now check if slave is alive */
3209 				last = prog->stat->nbytes[i];
3210 
3211 				DR_DELAY_IL(1, prog->data->stick_freq);
3212 
3213 				now = prog->stat->nbytes[i];
3214 				if (now <= last) {
3215 					/* no progress, perhaps just finished */
3216 					DR_DELAY_IL(1, prog->data->stick_freq);
3217 					if (prog->critical->stat[i] ==
3218 						FMEM_LOOP_FMEM_READY)
3219 						break;
3220 					/* copy error */
3221 					if (prog->critical->stat[i] ==
3222 						FMEM_COPY_ERROR) {
3223 						prog->data->fmem_status.error =
3224 							FMEM_COPY_ERROR;
3225 						return (FMEM_COPY_ERROR);
3226 					}
3227 					prog->data->fmem_status.error =
3228 					    FMEM_COPY_TIMEOUT;
3229 					return (FMEM_COPY_TIMEOUT);
3230 				}
3231 			    }
3232 			}
3233 		}
3234 		prog->critical->stat[cpuid] = FMEM_LOOP_FMEM_READY;
3235 		prog->data->fmem_status.stat  = FMEM_LOOP_FMEM_READY;
3236 
3237 		membar_sync_il();
3238 		flush_instr_mem_il((void*) (prog->critical));
3239 		/*
3240 		 * drmach_fmem_exec_script()
3241 		 */
3242 		rtn = prog->critical->fmem((void *)prog->critical, PAGESIZE);
3243 		return (rtn);
3244 	} else {
3245 		flush_instr_mem_il((void*) (prog->critical));
3246 		/*
3247 		 * drmach_fmem_loop_script()
3248 		 */
3249 		rtn = prog->critical->loop((void *)(prog->critical),
3250 			PAGESIZE, (void *)&(prog->critical->stat[cpuid]));
3251 
3252 		prog->data->error[cpuid] = rtn;
3253 		/* slave thread does not care the rv */
3254 		return (0);
3255 	}
3256 }
3257 
3258 static void
3259 drmach_copy_rename_end(void)
3260 {
3261 	/*
3262 	 * IMPORTANT:	This function's location MUST be located immediately
3263 	 *		following drmach_copy_rename_prog__relocatable to
3264 	 *		accurately estimate its size.  Note that this assumes
3265 	 *		the compiler keeps these functions in the order in
3266 	 *		which they appear :-o
3267 	 */
3268 }
3269 
3270 
3271 static void
3272 drmach_setup_memlist(drmach_copy_rename_program_t *p)
3273 {
3274 	struct memlist *ml;
3275 	caddr_t buf;
3276 	int nbytes, s;
3277 
3278 	nbytes = PAGESIZE;
3279 	s = roundup(sizeof (struct memlist), sizeof (void *));
3280 	p->free_mlist = NULL;
3281 	buf = p->memlist_buffer;
3282 	while (nbytes >= sizeof (struct memlist)) {
3283 		ml = (struct memlist *)buf;
3284 		ml->next = p->free_mlist;
3285 		p->free_mlist = ml;
3286 		buf += s;
3287 		nbytes -= s;
3288 	}
3289 }
3290 
3291 sbd_error_t *
3292 drmach_copy_rename_init(drmachid_t t_id, drmachid_t s_id,
3293 	struct memlist *c_ml, drmachid_t *pgm_id)
3294 {
3295 	drmach_mem_t	*s_mem;
3296 	drmach_mem_t	*t_mem;
3297 	struct memlist	*x_ml;
3298 	uint64_t	s_copybasepa, t_copybasepa;
3299 	uint_t		len;
3300 	caddr_t		bp, wp;
3301 	int			s_bd, t_bd, cpuid, active_cpus, i;
3302 	uint64_t		c_addr;
3303 	size_t			c_size, copy_sz, sz;
3304 	static sbd_error_t	*drmach_get_scf_addr(uint64_t *);
3305 	extern void		drmach_fmem_loop_script();
3306 	extern void		drmach_fmem_loop_script_rtn();
3307 	extern int		drmach_fmem_exec_script();
3308 	extern void		drmach_fmem_exec_script_end();
3309 	sbd_error_t	*err;
3310 	drmach_copy_rename_program_t *prog;
3311 	void		(*mc_suspend)(void);
3312 	void		(*mc_resume)(void);
3313 	int		(*scf_fmem_start)(int, int);
3314 	int		(*scf_fmem_end)(void);
3315 	int		(*scf_fmem_cancel)(void);
3316 
3317 	if (!DRMACH_IS_MEM_ID(s_id))
3318 		return (drerr_new(0, EOPL_INAPPROP, NULL));
3319 	if (!DRMACH_IS_MEM_ID(t_id))
3320 		return (drerr_new(0, EOPL_INAPPROP, NULL));
3321 
3322 	for (i = 0; i < NCPU; i++) {
3323 		int lsb_id, onb_core_num, strand_id;
3324 		drmach_board_t *bp;
3325 
3326 		/*
3327 		 * this kind of CPU will spin in cache
3328 		 */
3329 		if (CPU_IN_SET(cpu_ready_set, i))
3330 			continue;
3331 
3332 		/*
3333 		 * Now check for any inactive CPU's that
3334 		 * have been hotadded.  This can only occur in
3335 		 * error condition in drmach_cpu_poweron().
3336 		 */
3337 		lsb_id = LSB_ID(i);
3338 		onb_core_num = ON_BOARD_CORE_NUM(i);
3339 		strand_id = STRAND_ID(i);
3340 		bp = drmach_get_board_by_bnum(lsb_id);
3341 		if (bp == NULL)
3342 			continue;
3343 		if (bp->cores[onb_core_num].core_hotadded &
3344 		    (1 << strand_id)) {
3345 		    if (!(bp->cores[onb_core_num].core_started &
3346 			(1 << strand_id))) {
3347 			return (DRMACH_INTERNAL_ERROR());
3348 		    }
3349 		}
3350 	}
3351 
3352 	mc_suspend = (void (*)(void))
3353 	    modgetsymvalue("opl_mc_suspend", 0);
3354 	mc_resume = (void (*)(void))
3355 	    modgetsymvalue("opl_mc_resume", 0);
3356 
3357 	if (mc_suspend == NULL || mc_resume == NULL) {
3358 		return (DRMACH_INTERNAL_ERROR());
3359 	}
3360 
3361 	scf_fmem_start = (int (*)(int, int))
3362 	    modgetsymvalue("scf_fmem_start", 0);
3363 	if (scf_fmem_start == NULL) {
3364 		return (DRMACH_INTERNAL_ERROR());
3365 	}
3366 	scf_fmem_end = (int (*)(void))
3367 	    modgetsymvalue("scf_fmem_end", 0);
3368 	if (scf_fmem_end == NULL) {
3369 		return (DRMACH_INTERNAL_ERROR());
3370 	}
3371 	scf_fmem_cancel = (int (*)(void))
3372 	    modgetsymvalue("scf_fmem_cancel", 0);
3373 	if (scf_fmem_cancel == NULL) {
3374 		return (DRMACH_INTERNAL_ERROR());
3375 	}
3376 	s_mem = s_id;
3377 	t_mem = t_id;
3378 
3379 	s_bd = s_mem->dev.bp->bnum;
3380 	t_bd = t_mem->dev.bp->bnum;
3381 
3382 	/* calculate source and target base pa */
3383 
3384 	s_copybasepa = s_mem->slice_base;
3385 	t_copybasepa = t_mem->slice_base;
3386 
3387 	/* adjust copy memlist addresses to be relative to copy base pa */
3388 	x_ml = c_ml;
3389 	while (x_ml != NULL) {
3390 		x_ml->address -= s_copybasepa;
3391 		x_ml = x_ml->next;
3392 	}
3393 
3394 	/*
3395 	 * bp will be page aligned, since we're calling
3396 	 * kmem_zalloc() with an exact multiple of PAGESIZE.
3397 	 */
3398 	wp = bp = kmem_zalloc(DRMACH_FMEM_LOCKED_PAGES * PAGESIZE,
3399 		KM_SLEEP);
3400 
3401 	prog = (drmach_copy_rename_program_t *)(wp +
3402 		DRMACH_FMEM_DATA_PAGE * PAGESIZE);
3403 	prog->data = (drmach_copy_rename_data_t *)roundup(((uint64_t)prog +
3404 		sizeof (drmach_copy_rename_program_t)), sizeof (void *));
3405 
3406 	ASSERT(((uint64_t)prog->data + sizeof (drmach_copy_rename_data_t))
3407 		<= ((uint64_t)prog + PAGESIZE));
3408 
3409 	prog->critical = (drmach_copy_rename_critical_t *)
3410 		(wp + DRMACH_FMEM_CRITICAL_PAGE * PAGESIZE);
3411 
3412 	prog->memlist_buffer = (caddr_t)(wp +
3413 		DRMACH_FMEM_MLIST_PAGE * PAGESIZE);
3414 
3415 	prog->stat = (drmach_cr_stat_t *)(wp +
3416 		DRMACH_FMEM_STAT_PAGE * PAGESIZE);
3417 
3418 	/* LINTED */
3419 	ASSERT(sizeof (drmach_cr_stat_t)
3420 		<= ((DRMACH_FMEM_LOCKED_PAGES - DRMACH_FMEM_STAT_PAGE)
3421 		* PAGESIZE));
3422 
3423 	prog->critical->scf_reg_base = (uint64_t)-1;
3424 	err = drmach_get_scf_addr(&(prog->critical->scf_reg_base));
3425 	if (err) {
3426 		kmem_free(wp, DRMACH_FMEM_LOCKED_PAGES * PAGESIZE);
3427 		return (err);
3428 	}
3429 
3430 	prog->critical->scf_td[0] = (s_bd & 0xff);
3431 	prog->critical->scf_td[1] = (t_bd & 0xff);
3432 	for (i = 2; i < 15; i++) {
3433 		prog->critical->scf_td[i]   = 0;
3434 	}
3435 	prog->critical->scf_td[15] = ((0xaa + s_bd + t_bd) & 0xff);
3436 
3437 	bp = (caddr_t)prog->critical;
3438 	len = sizeof (drmach_copy_rename_critical_t);
3439 	wp = (caddr_t)roundup((uint64_t)bp + len, sizeof (void *));
3440 
3441 	len = (uint_t)((ulong_t)drmach_copy_rename_end -
3442 		(ulong_t)drmach_copy_rename_prog__relocatable);
3443 
3444 	/*
3445 	 * We always leave 1K nop's to prevent the processor from
3446 	 * speculative execution that causes memory access
3447 	 */
3448 	wp = wp + len + 1024;
3449 
3450 	len = (uint_t)((ulong_t)drmach_fmem_exec_script_end -
3451 		(ulong_t)drmach_fmem_exec_script);
3452 	/* this is the entry point of the loop script */
3453 	wp = wp + len + 1024;
3454 
3455 	len = (uint_t)((ulong_t)drmach_fmem_exec_script -
3456 		(ulong_t)drmach_fmem_loop_script);
3457 	wp = wp + len + 1024;
3458 
3459 	/* now we make sure there is 1K extra */
3460 
3461 	if ((wp - bp) > PAGESIZE) {
3462 		kmem_free(prog, DRMACH_FMEM_LOCKED_PAGES * PAGESIZE);
3463 		return (DRMACH_INTERNAL_ERROR());
3464 	}
3465 
3466 	bp = (caddr_t)prog->critical;
3467 	len = sizeof (drmach_copy_rename_critical_t);
3468 	wp = (caddr_t)roundup((uint64_t)bp + len, sizeof (void *));
3469 
3470 	prog->critical->run = (int (*)())(wp);
3471 	len = (uint_t)((ulong_t)drmach_copy_rename_end -
3472 		(ulong_t)drmach_copy_rename_prog__relocatable);
3473 
3474 	bcopy((caddr_t)drmach_copy_rename_prog__relocatable, wp, len);
3475 
3476 	wp = (caddr_t)roundup((uint64_t)wp + len, 1024);
3477 
3478 	prog->critical->fmem = (int (*)())(wp);
3479 	len = (int)((ulong_t)drmach_fmem_exec_script_end -
3480 		(ulong_t)drmach_fmem_exec_script);
3481 	bcopy((caddr_t)drmach_fmem_exec_script, wp, len);
3482 
3483 	len = (int)((ulong_t)drmach_fmem_exec_script_end -
3484 		(ulong_t)drmach_fmem_exec_script);
3485 	wp = (caddr_t)roundup((uint64_t)wp + len, 1024);
3486 
3487 	prog->critical->loop = (int (*)())(wp);
3488 	len = (int)((ulong_t)drmach_fmem_exec_script -
3489 		(ulong_t)drmach_fmem_loop_script);
3490 	bcopy((caddr_t)drmach_fmem_loop_script, (void *)wp, len);
3491 	len = (int)((ulong_t)drmach_fmem_loop_script_rtn-
3492 		(ulong_t)drmach_fmem_loop_script);
3493 	prog->critical->loop_rtn = (void (*)()) (wp+len);
3494 
3495 	/* now we are committed, call SCF, soft suspend mac patrol */
3496 	if ((*scf_fmem_start)(s_bd, t_bd)) {
3497 		kmem_free(prog, DRMACH_FMEM_LOCKED_PAGES * PAGESIZE);
3498 		return (DRMACH_INTERNAL_ERROR());
3499 	}
3500 	prog->data->scf_fmem_end = scf_fmem_end;
3501 	prog->data->scf_fmem_cancel = scf_fmem_cancel;
3502 	prog->data->fmem_status.op |= OPL_FMEM_SCF_START;
3503 	/* soft suspend mac patrol */
3504 	(*mc_suspend)();
3505 	prog->data->fmem_status.op |= OPL_FMEM_MC_SUSPEND;
3506 	prog->data->mc_resume = mc_resume;
3507 
3508 	prog->critical->inst_loop_ret  =
3509 		*(uint64_t *)(prog->critical->loop_rtn);
3510 
3511 	/*
3512 	 * 0x30800000 is op code "ba,a	+0"
3513 	 */
3514 
3515 	*(uint_t *)(prog->critical->loop_rtn) = (uint_t)(0x30800000);
3516 
3517 	/*
3518 	 * set the value of SCF FMEM TIMEOUT
3519 	 */
3520 	prog->critical->delay = fmem_timeout * system_clock_freq;
3521 
3522 	prog->data->s_mem = (drmachid_t)s_mem;
3523 	prog->data->t_mem = (drmachid_t)t_mem;
3524 
3525 	cpuid = CPU->cpu_id;
3526 	prog->data->cpuid = cpuid;
3527 	prog->data->cpu_ready_set = cpu_ready_set;
3528 	prog->data->cpu_slave_set = cpu_ready_set;
3529 	prog->data->slowest_cpuid = (processorid_t)-1;
3530 	prog->data->copy_wait_time = 0;
3531 	CPUSET_DEL(prog->data->cpu_slave_set, cpuid);
3532 
3533 	for (i = 0; i < NCPU; i++) {
3534 		prog->data->cpu_ml[i] = NULL;
3535 	}
3536 
3537 	active_cpus = 0;
3538 	if (drmach_disable_mcopy) {
3539 		active_cpus = 1;
3540 		CPUSET_ADD(prog->data->cpu_copy_set, cpuid);
3541 	} else {
3542 		for (i = 0; i < NCPU; i++) {
3543 			if (CPU_IN_SET(cpu_ready_set, i) &&
3544 				CPU_ACTIVE(cpu[i])) {
3545 				CPUSET_ADD(prog->data->cpu_copy_set, i);
3546 				active_cpus++;
3547 			}
3548 		}
3549 	}
3550 
3551 	drmach_setup_memlist(prog);
3552 
3553 	x_ml = c_ml;
3554 	sz = 0;
3555 	while (x_ml != NULL) {
3556 		sz += x_ml->size;
3557 		x_ml = x_ml->next;
3558 	}
3559 
3560 	copy_sz = sz/active_cpus;
3561 	copy_sz = roundup(copy_sz, MMU_PAGESIZE4M);
3562 
3563 	while (sz > copy_sz*active_cpus) {
3564 		copy_sz += MMU_PAGESIZE4M;
3565 	}
3566 
3567 	prog->data->stick_freq = system_clock_freq;
3568 	prog->data->copy_delay = ((copy_sz / min_copy_size_per_sec) + 2) *
3569 		system_clock_freq;
3570 
3571 	x_ml = c_ml;
3572 	c_addr = x_ml->address;
3573 	c_size = x_ml->size;
3574 
3575 	for (i = 0; i < NCPU; i++) {
3576 		prog->stat->nbytes[i] = 0;
3577 		if (!CPU_IN_SET(prog->data->cpu_copy_set, i)) {
3578 			continue;
3579 		}
3580 		sz = copy_sz;
3581 
3582 		while (sz) {
3583 			if (c_size > sz) {
3584 				prog->data->cpu_ml[i] =
3585 					drmach_memlist_add_span(prog,
3586 					prog->data->cpu_ml[i],
3587 					c_addr, sz);
3588 				c_addr += sz;
3589 				c_size -= sz;
3590 				break;
3591 			} else {
3592 				sz -= c_size;
3593 				prog->data->cpu_ml[i] = drmach_memlist_add_span(
3594 					prog, prog->data->cpu_ml[i],
3595 						c_addr, c_size);
3596 				x_ml = x_ml->next;
3597 				if (x_ml != NULL) {
3598 					c_addr = x_ml->address;
3599 					c_size = x_ml->size;
3600 				} else {
3601 					goto end;
3602 				}
3603 			}
3604 		}
3605 	}
3606 end:
3607 	prog->data->s_copybasepa = s_copybasepa;
3608 	prog->data->t_copybasepa = t_copybasepa;
3609 	prog->data->c_ml = c_ml;
3610 	*pgm_id = prog;
3611 
3612 	return (NULL);
3613 }
3614 
3615 sbd_error_t *
3616 drmach_copy_rename_fini(drmachid_t id)
3617 {
3618 	drmach_copy_rename_program_t	*prog = id;
3619 	sbd_error_t			*err = NULL;
3620 	int				rv;
3621 
3622 	/*
3623 	 * Note that we have to delay calling SCF to find out the
3624 	 * status of the FMEM operation here because SCF cannot
3625 	 * respond while it is suspended.
3626 	 * This create a small window when we are sure about the
3627 	 * base address of the system board.
3628 	 * If there is any call to mc-opl to get memory unum,
3629 	 * mc-opl will return UNKNOWN as the unum.
3630 	 */
3631 
3632 	if (prog->data->c_ml != NULL)
3633 		memlist_delete(prog->data->c_ml);
3634 
3635 	if ((prog->data->fmem_status.op &
3636 		(OPL_FMEM_SCF_START| OPL_FMEM_MC_SUSPEND)) !=
3637 		(OPL_FMEM_SCF_START | OPL_FMEM_MC_SUSPEND)) {
3638 		cmn_err(CE_PANIC, "drmach_copy_rename_fini: "
3639 			"invalid op code %x\n",
3640 				prog->data->fmem_status.op);
3641 	}
3642 
3643 	/* possible ops are SCF_START, MC_SUSPEND */
3644 	if (prog->critical->fmem_issued) {
3645 		if (prog->data->fmem_status.error != FMEM_NO_ERROR)
3646 			cmn_err(CE_PANIC, "scf fmem request failed");
3647 		rv = (*prog->data->scf_fmem_end)();
3648 		if (rv) {
3649 			cmn_err(CE_PANIC, "scf_fmem_end() failed");
3650 		}
3651 		/*
3652 		 * If we get here, rename is successful.
3653 		 * Do all the copy rename post processing.
3654 		 */
3655 		drmach_swap_pa((drmach_mem_t *)prog->data->s_mem,
3656 			(drmach_mem_t *)prog->data->t_mem);
3657 	} else {
3658 		if (prog->data->fmem_status.error != 0) {
3659 			cmn_err(CE_WARN, "Kernel Migration fails. 0x%x",
3660 				prog->data->fmem_status.error);
3661 			err = DRMACH_INTERNAL_ERROR();
3662 		}
3663 		rv = (*prog->data->scf_fmem_cancel)();
3664 		if (rv) {
3665 			cmn_err(CE_WARN, "scf_fmem_cancel() failed");
3666 			if (!err)
3667 				err = DRMACH_INTERNAL_ERROR();
3668 		}
3669 	}
3670 	/* soft resume mac patrol */
3671 	(*prog->data->mc_resume)();
3672 
3673 	kmem_free(prog, DRMACH_FMEM_LOCKED_PAGES * PAGESIZE);
3674 	return (err);
3675 }
3676 
3677 static void
3678 drmach_lock_critical(caddr_t va)
3679 {
3680 	tte_t tte;
3681 	int i;
3682 
3683 	for (i = 0; i < DRMACH_FMEM_LOCKED_PAGES; i++) {
3684 		vtag_flushpage(va, (uint64_t)ksfmmup);
3685 		sfmmu_memtte(&tte, va_to_pfn(va),
3686 			PROC_DATA|HAT_NOSYNC, TTE8K);
3687 		tte.tte_intlo |= TTE_LCK_INT;
3688 		sfmmu_dtlb_ld_kva(va, &tte);
3689 		sfmmu_itlb_ld_kva(va, &tte);
3690 		va += PAGESIZE;
3691 	}
3692 }
3693 
3694 static void
3695 drmach_unlock_critical(caddr_t va)
3696 {
3697 	int i;
3698 
3699 	for (i = 0; i < DRMACH_FMEM_LOCKED_PAGES; i++) {
3700 		vtag_flushpage(va, (uint64_t)ksfmmup);
3701 		va += PAGESIZE;
3702 	}
3703 }
3704 
3705 /*ARGSUSED*/
3706 static void
3707 drmach_copy_rename_slave(struct regs *rp, drmachid_t id)
3708 {
3709 	drmach_copy_rename_program_t	*prog = id;
3710 	register int			cpuid;
3711 	extern void			drmach_flush();
3712 	extern void			membar_sync_il();
3713 	extern void			drmach_flush_icache();
3714 	on_trap_data_t			otd;
3715 
3716 	kpreempt_disable();
3717 	cpuid = CPU->cpu_id;
3718 
3719 	if (on_trap(&otd, OT_DATA_EC)) {
3720 		no_trap();
3721 		drmach_unlock_critical((caddr_t)prog);
3722 		kpreempt_enable();
3723 		prog->data->error[cpuid] = FMEM_COPY_ERROR;
3724 		prog->critical->stat[cpuid] = FMEM_LOOP_EXIT;
3725 		return;
3726 	}
3727 
3728 
3729 	(void) drmach_lock_critical((caddr_t)prog);
3730 
3731 	flush_windows();
3732 
3733 	/*
3734 	 * jmp drmach_copy_rename_prog().
3735 	 */
3736 
3737 	drmach_flush(prog->critical, PAGESIZE);
3738 	(void) prog->critical->run(prog, cpuid);
3739 	drmach_flush_icache();
3740 
3741 	no_trap();
3742 	drmach_unlock_critical((caddr_t)prog);
3743 
3744 	kpreempt_enable();
3745 
3746 	prog->critical->stat[cpuid] = FMEM_LOOP_EXIT;
3747 	membar_sync_il();
3748 }
3749 
3750 static void
3751 drmach_swap_pa(drmach_mem_t *s_mem, drmach_mem_t *t_mem)
3752 {
3753 	uint64_t s_base, t_base;
3754 	drmach_board_t *s_board, *t_board;
3755 	struct memlist *ml;
3756 
3757 	s_board = s_mem->dev.bp;
3758 	t_board = t_mem->dev.bp;
3759 	if (s_board == NULL || t_board == NULL) {
3760 		cmn_err(CE_PANIC, "Cannot locate source or target board\n");
3761 		return;
3762 	}
3763 	s_base = s_mem->slice_base;
3764 	t_base = t_mem->slice_base;
3765 
3766 	s_mem->slice_base = t_base;
3767 	s_mem->base_pa = (s_mem->base_pa - s_base) + t_base;
3768 
3769 	for (ml = s_mem->memlist; ml; ml = ml->next) {
3770 		ml->address = ml->address - s_base + t_base;
3771 	}
3772 
3773 	t_mem->slice_base = s_base;
3774 	t_mem->base_pa = (t_mem->base_pa - t_base) + s_base;
3775 
3776 	for (ml = t_mem->memlist; ml; ml = ml->next) {
3777 		ml->address = ml->address - t_base + s_base;
3778 	}
3779 
3780 	/*
3781 	 * IKP has to update the sb-mem-ranges for mac patrol driver
3782 	 * when it resumes, it will re-read the sb-mem-range property
3783 	 * to get the new base address
3784 	 */
3785 	if (oplcfg_pa_swap(s_board->bnum, t_board->bnum) != 0)
3786 		cmn_err(CE_PANIC, "Could not update device nodes\n");
3787 }
3788 
3789 void
3790 drmach_copy_rename(drmachid_t id)
3791 {
3792 	drmach_copy_rename_program_t	*prog = id;
3793 	cpuset_t	cpuset;
3794 	int		cpuid;
3795 	uint64_t	inst;
3796 	register int	rtn;
3797 	extern int	in_sync;
3798 	int		old_in_sync;
3799 	extern void	drmach_sys_trap();
3800 	extern void	drmach_flush();
3801 	extern void	drmach_flush_icache();
3802 	extern uint64_t	patch_inst(uint64_t *, uint64_t);
3803 	on_trap_data_t	otd;
3804 
3805 	if (prog->critical->scf_reg_base == (uint64_t)-1) {
3806 		prog->data->fmem_status.error = FMEM_SCF_ERR;
3807 		return;
3808 	}
3809 
3810 	kpreempt_disable();
3811 	cpuset = prog->data->cpu_ready_set;
3812 
3813 	for (cpuid = 0; cpuid < NCPU; cpuid++) {
3814 		if (CPU_IN_SET(cpuset, cpuid)) {
3815 			prog->critical->stat[cpuid] = FMEM_LOOP_START;
3816 			prog->data->error[cpuid] = FMEM_NO_ERROR;
3817 		}
3818 	}
3819 
3820 	old_in_sync = in_sync;
3821 	in_sync = 1;
3822 	cpuid = CPU->cpu_id;
3823 
3824 	CPUSET_DEL(cpuset, cpuid);
3825 
3826 	xc_some(cpuset, (xcfunc_t *)drmach_lock_critical,
3827 		(uint64_t)prog, (uint64_t)0);
3828 
3829 	xt_some(cpuset, (xcfunc_t *)drmach_sys_trap,
3830 		(uint64_t)drmach_copy_rename_slave, (uint64_t)prog);
3831 	xt_sync(cpuset);
3832 
3833 	(void) drmach_lock_critical((caddr_t)prog);
3834 
3835 	if (on_trap(&otd, OT_DATA_EC)) {
3836 		rtn = FMEM_COPY_ERROR;
3837 		goto done;
3838 	}
3839 
3840 	flush_windows();
3841 
3842 	/*
3843 	 * jmp drmach_copy_rename_prog().
3844 	 */
3845 	drmach_flush(prog->critical, PAGESIZE);
3846 	rtn = prog->critical->run(prog, cpuid);
3847 	drmach_flush_icache();
3848 
3849 
3850 done:
3851 	no_trap();
3852 	if (rtn == FMEM_HW_ERROR) {
3853 		kpreempt_enable();
3854 		prom_panic("URGENT_ERROR_TRAP is "
3855 			"detected during FMEM.\n");
3856 	}
3857 
3858 	/*
3859 	 * In normal case, all slave CPU's are still spinning in
3860 	 * the assembly code.  The master has to patch the instruction
3861 	 * to get them out.
3862 	 * In error case, e.g. COPY_ERROR, some slave CPU's might
3863 	 * have aborted and already returned and sset LOOP_EXIT status.
3864 	 * Some CPU might still be copying.
3865 	 * In any case, some delay is necessary to give them
3866 	 * enough time to set the LOOP_EXIT status.
3867 	 */
3868 
3869 	for (;;) {
3870 		inst = patch_inst((uint64_t *)prog->critical->loop_rtn,
3871 			prog->critical->inst_loop_ret);
3872 		if (prog->critical->inst_loop_ret == inst) {
3873 			break;
3874 		}
3875 	}
3876 
3877 	for (cpuid = 0; cpuid < NCPU; cpuid++) {
3878 		uint64_t	last, now;
3879 		if (!CPU_IN_SET(cpuset, cpuid)) {
3880 			continue;
3881 		}
3882 		last = prog->stat->nbytes[cpuid];
3883 		/*
3884 		 * Wait for all CPU to exit.
3885 		 * However we do not want an infinite loop
3886 		 * so we detect hangup situation here.
3887 		 * If the slave CPU is still copying data,
3888 		 * we will continue to wait.
3889 		 * In error cases, the master has already set
3890 		 * fmem_status.error to abort the copying.
3891 		 * 1 m.s delay for them to abort copying and
3892 		 * return to drmach_copy_rename_slave to set
3893 		 * FMEM_LOOP_EXIT status should be enough.
3894 		 */
3895 		for (;;) {
3896 			if (prog->critical->stat[cpuid] == FMEM_LOOP_EXIT)
3897 				break;
3898 			drmach_sleep_il();
3899 			drv_usecwait(1000);
3900 			now = prog->stat->nbytes[cpuid];
3901 			if (now <= last) {
3902 			    drv_usecwait(1000);
3903 			    if (prog->critical->stat[cpuid] == FMEM_LOOP_EXIT)
3904 				break;
3905 			    cmn_err(CE_PANIC,
3906 				"CPU %d hang during Copy Rename", cpuid);
3907 			}
3908 			last = now;
3909 		}
3910 		if (prog->data->error[cpuid] == FMEM_HW_ERROR) {
3911 			prom_panic("URGENT_ERROR_TRAP is "
3912 				"detected during FMEM.\n");
3913 		}
3914 	}
3915 	drmach_unlock_critical((caddr_t)prog);
3916 
3917 	in_sync = old_in_sync;
3918 
3919 	kpreempt_enable();
3920 	if (prog->data->fmem_status.error == 0)
3921 		prog->data->fmem_status.error = rtn;
3922 
3923 	if (prog->data->copy_wait_time > 0) {
3924 		DRMACH_PR("Unexpected long wait time %ld seconds "
3925 			"during copy rename on CPU %d\n",
3926 			prog->data->copy_wait_time/prog->data->stick_freq,
3927 			prog->data->slowest_cpuid);
3928 	}
3929 }
3930