xref: /illumos-gate/usr/src/uts/sun4u/opl/io/drmach.c (revision bc0ee17c150fbf29e52c0ff365163e4e7b1c2f0a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Copyright 2023 Oxide Computer Company
29  */
30 
31 #include <sys/debug.h>
32 #include <sys/types.h>
33 #include <sys/varargs.h>
34 #include <sys/errno.h>
35 #include <sys/cred.h>
36 #include <sys/dditypes.h>
37 #include <sys/devops.h>
38 #include <sys/modctl.h>
39 #include <sys/poll.h>
40 #include <sys/conf.h>
41 #include <sys/ddi.h>
42 #include <sys/sunddi.h>
43 #include <sys/sunndi.h>
44 #include <sys/ndi_impldefs.h>
45 #include <sys/stat.h>
46 #include <sys/kmem.h>
47 #include <sys/vmem.h>
48 #include <sys/opl_olympus_regs.h>
49 #include <sys/cpuvar.h>
50 #include <sys/cpupart.h>
51 #include <sys/mem_config.h>
52 #include <sys/ddi_impldefs.h>
53 #include <sys/systm.h>
54 #include <sys/machsystm.h>
55 #include <sys/autoconf.h>
56 #include <sys/cmn_err.h>
57 #include <sys/sysmacros.h>
58 #include <sys/x_call.h>
59 #include <sys/promif.h>
60 #include <sys/prom_plat.h>
61 #include <sys/membar.h>
62 #include <vm/seg_kmem.h>
63 #include <sys/mem_cage.h>
64 #include <sys/stack.h>
65 #include <sys/archsystm.h>
66 #include <vm/hat_sfmmu.h>
67 #include <sys/pte.h>
68 #include <sys/mmu.h>
69 #include <sys/cpu_module.h>
70 #include <sys/obpdefs.h>
71 #include <sys/note.h>
72 #include <sys/ontrap.h>
73 #include <sys/cpu_sgnblk_defs.h>
74 #include <sys/opl.h>
75 #include <sys/cpu_impl.h>
76 
77 
78 #include <sys/promimpl.h>
79 #include <sys/prom_plat.h>
80 #include <sys/kobj.h>
81 
82 #include <sys/sysevent.h>
83 #include <sys/sysevent/dr.h>
84 #include <sys/sysevent/eventdefs.h>
85 
86 #include <sys/drmach.h>
87 #include <sys/dr_util.h>
88 
89 #include <sys/fcode.h>
90 #include <sys/opl_cfg.h>
91 
92 extern void		bcopy32_il(uint64_t, uint64_t);
93 extern void		flush_cache_il(void);
94 extern void		drmach_sleep_il(void);
95 
96 typedef struct {
97 	struct drmach_node	*node;
98 	void			*data;
99 } drmach_node_walk_args_t;
100 
101 typedef struct drmach_node {
102 	void		*here;
103 
104 	pnode_t		(*get_dnode)(struct drmach_node *node);
105 	int		(*walk)(struct drmach_node *node, void *data,
106 				int (*cb)(drmach_node_walk_args_t *args));
107 	dev_info_t	*(*n_getdip)(struct drmach_node *node);
108 	int		(*n_getproplen)(struct drmach_node *node, char *name,
109 				int *len);
110 	int		(*n_getprop)(struct drmach_node *node, char *name,
111 				void *buf, int len);
112 	int		(*get_parent)(struct drmach_node *node,
113 				struct drmach_node *pnode);
114 } drmach_node_t;
115 
116 typedef struct {
117 	int		 min_index;
118 	int		 max_index;
119 	int		 arr_sz;
120 	drmachid_t	*arr;
121 } drmach_array_t;
122 
123 typedef struct {
124 	void		*isa;
125 
126 	void		(*dispose)(drmachid_t);
127 	sbd_error_t	*(*release)(drmachid_t);
128 	sbd_error_t	*(*status)(drmachid_t, drmach_status_t *);
129 
130 	char		 name[MAXNAMELEN];
131 } drmach_common_t;
132 
133 typedef	struct {
134 	uint32_t	core_present;
135 	uint32_t	core_hotadded;
136 	uint32_t	core_started;
137 } drmach_cmp_t;
138 
139 typedef struct {
140 	drmach_common_t	 cm;
141 	int		 bnum;
142 	int		 assigned;
143 	int		 powered;
144 	int		 connected;
145 	int		 cond;
146 	drmach_node_t	*tree;
147 	drmach_array_t	*devices;
148 	int		boot_board;	/* if board exists on bootup */
149 	drmach_cmp_t	cores[OPL_MAX_COREID_PER_BOARD];
150 } drmach_board_t;
151 
152 typedef struct {
153 	drmach_common_t	 cm;
154 	drmach_board_t	*bp;
155 	int		 unum;
156 	int		portid;
157 	int		 busy;
158 	int		 powered;
159 	const char	*type;
160 	drmach_node_t	*node;
161 } drmach_device_t;
162 
163 typedef struct drmach_cpu {
164 	drmach_device_t  dev;
165 	processorid_t    cpuid;
166 	int		sb;
167 	int		chipid;
168 	int		coreid;
169 	int		strandid;
170 	int		status;
171 #define	OPL_CPU_HOTADDED	1
172 } drmach_cpu_t;
173 
174 typedef struct drmach_mem {
175 	drmach_device_t  dev;
176 	uint64_t	slice_base;
177 	uint64_t	slice_size;
178 	uint64_t	base_pa;	/* lowest installed memory base */
179 	uint64_t	nbytes;		/* size of installed memory */
180 	struct memlist *memlist;
181 } drmach_mem_t;
182 
183 typedef struct drmach_io {
184 	drmach_device_t  dev;
185 	int	channel;
186 	int	leaf;
187 } drmach_io_t;
188 
189 typedef struct drmach_domain_info {
190 	uint32_t	floating;
191 	int		allow_dr;
192 } drmach_domain_info_t;
193 
194 drmach_domain_info_t drmach_domain;
195 
196 typedef struct {
197 	int		 flags;
198 	drmach_device_t	*dp;
199 	sbd_error_t	*err;
200 	dev_info_t	*dip;
201 } drmach_config_args_t;
202 
203 typedef struct {
204 	drmach_board_t	*obj;
205 	int		 ndevs;
206 	void		*a;
207 	sbd_error_t	*(*found)(void *a, const char *, int, drmachid_t);
208 	sbd_error_t	*err;
209 } drmach_board_cb_data_t;
210 
211 static drmach_array_t	*drmach_boards;
212 
213 static sbd_error_t	*drmach_device_new(drmach_node_t *,
214 				drmach_board_t *, int, drmachid_t *);
215 static sbd_error_t	*drmach_cpu_new(drmach_device_t *, drmachid_t *);
216 static sbd_error_t	*drmach_mem_new(drmach_device_t *, drmachid_t *);
217 static sbd_error_t	*drmach_io_new(drmach_device_t *, drmachid_t *);
218 
219 static dev_info_t	*drmach_node_ddi_get_dip(drmach_node_t *np);
220 static int		 drmach_node_ddi_get_prop(drmach_node_t *np,
221 				char *name, void *buf, int len);
222 static int		 drmach_node_ddi_get_proplen(drmach_node_t *np,
223 				char *name, int *len);
224 
225 static int		drmach_get_portid(drmach_node_t *);
226 static	sbd_error_t	*drmach_i_status(drmachid_t, drmach_status_t *);
227 static int		opl_check_dr_status();
228 static void		drmach_io_dispose(drmachid_t);
229 static sbd_error_t	*drmach_io_release(drmachid_t);
230 static sbd_error_t	*drmach_io_status(drmachid_t, drmach_status_t *);
231 static int		drmach_init(void);
232 static void		drmach_fini(void);
233 static void		drmach_swap_pa(drmach_mem_t *, drmach_mem_t *);
234 static drmach_board_t	*drmach_get_board_by_bnum(int);
235 
236 static sbd_error_t	*drmach_board_release(drmachid_t);
237 static sbd_error_t	*drmach_board_status(drmachid_t, drmach_status_t *);
238 static void		drmach_cpu_dispose(drmachid_t);
239 static sbd_error_t	*drmach_cpu_release(drmachid_t);
240 static sbd_error_t	*drmach_cpu_status(drmachid_t, drmach_status_t *);
241 static void		drmach_mem_dispose(drmachid_t);
242 static sbd_error_t	*drmach_mem_release(drmachid_t);
243 static sbd_error_t	*drmach_mem_status(drmachid_t, drmach_status_t *);
244 
245 /* options for the second argument in drmach_add_remove_cpu() */
246 #define	HOTADD_CPU	1
247 #define	HOTREMOVE_CPU	2
248 
249 #define	ON_BOARD_CORE_NUM(x)	(((uint_t)(x) / OPL_MAX_STRANDID_PER_CORE) & \
250 	(OPL_MAX_COREID_PER_BOARD - 1))
251 
252 extern struct cpu	*SIGBCPU;
253 
254 static int		drmach_name2type_idx(char *);
255 static drmach_board_t	*drmach_board_new(int, int);
256 
257 #ifdef DEBUG
258 
259 #define	DRMACH_PR		if (drmach_debug) printf
260 int drmach_debug = 1;		 /* set to non-zero to enable debug messages */
261 #else
262 
263 #define	DRMACH_PR		_NOTE(CONSTANTCONDITION) if (0) printf
264 #endif /* DEBUG */
265 
266 
267 #define	DRMACH_OBJ(id)		((drmach_common_t *)id)
268 
269 #define	DRMACH_NULL_ID(id)	((id) == 0)
270 
271 #define	DRMACH_IS_BOARD_ID(id)	\
272 	((id != 0) &&		\
273 	(DRMACH_OBJ(id)->isa == (void *)drmach_board_new))
274 
275 #define	DRMACH_IS_CPU_ID(id)	\
276 	((id != 0) &&		\
277 	(DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new))
278 
279 #define	DRMACH_IS_MEM_ID(id)	\
280 	((id != 0) &&		\
281 	(DRMACH_OBJ(id)->isa == (void *)drmach_mem_new))
282 
283 #define	DRMACH_IS_IO_ID(id)	\
284 	((id != 0) &&		\
285 	(DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
286 
287 #define	DRMACH_IS_DEVICE_ID(id)					\
288 	((id != 0) &&						\
289 	(DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new ||	\
290 	    DRMACH_OBJ(id)->isa == (void *)drmach_mem_new ||	\
291 	    DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
292 
293 #define	DRMACH_IS_ID(id)					\
294 	((id != 0) &&						\
295 	(DRMACH_OBJ(id)->isa == (void *)drmach_board_new ||	\
296 	    DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new ||	\
297 	    DRMACH_OBJ(id)->isa == (void *)drmach_mem_new ||	\
298 	    DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
299 
300 #define	DRMACH_INTERNAL_ERROR() \
301 	drerr_new(1, EOPL_INTERNAL, drmach_ie_fmt, __LINE__)
302 
303 static char		*drmach_ie_fmt = "drmach.c %d";
304 
305 static struct {
306 	const char	*name;
307 	const char	*type;
308 	sbd_error_t	*(*new)(drmach_device_t *, drmachid_t *);
309 } drmach_name2type[] = {
310 	{ "cpu",	DRMACH_DEVTYPE_CPU,		drmach_cpu_new },
311 	{ "pseudo-mc",	DRMACH_DEVTYPE_MEM,		drmach_mem_new },
312 	{ "pci",	DRMACH_DEVTYPE_PCI,		drmach_io_new  },
313 };
314 
315 /* utility */
316 #define	MBYTE	(1048576ull)
317 
318 /*
319  * drmach autoconfiguration data structures and interfaces
320  */
321 
322 extern struct mod_ops mod_miscops;
323 
324 static struct modlmisc modlmisc = {
325 	&mod_miscops,
326 	"OPL DR 1.1"
327 };
328 
329 static struct modlinkage modlinkage = {
330 	MODREV_1,
331 	(void *)&modlmisc,
332 	NULL
333 };
334 
335 static krwlock_t drmach_boards_rwlock;
336 
337 typedef const char	*fn_t;
338 
339 int
340 _init(void)
341 {
342 	int err;
343 
344 	if ((err = drmach_init()) != 0) {
345 		return (err);
346 	}
347 
348 	if ((err = mod_install(&modlinkage)) != 0) {
349 		drmach_fini();
350 	}
351 
352 	return (err);
353 }
354 
355 int
356 _fini(void)
357 {
358 	int	err;
359 
360 	if ((err = mod_remove(&modlinkage)) == 0)
361 		drmach_fini();
362 
363 	return (err);
364 }
365 
366 int
367 _info(struct modinfo *modinfop)
368 {
369 	return (mod_info(&modlinkage, modinfop));
370 }
371 
372 struct drmach_mc_lookup {
373 	int	bnum;
374 	drmach_board_t	*bp;
375 	dev_info_t *dip;	/* rv - set if found */
376 };
377 
378 #define	_ptob64(p) ((uint64_t)(p) << PAGESHIFT)
379 #define	_b64top(b) ((pgcnt_t)((b) >> PAGESHIFT))
380 
381 static int
382 drmach_setup_mc_info(dev_info_t *dip, drmach_mem_t *mp)
383 {
384 	uint64_t	memory_ranges[128];
385 	int len;
386 	struct memlist	*ml;
387 	int rv;
388 	hwd_sb_t *hwd;
389 	hwd_memory_t *pm;
390 
391 	len = sizeof (memory_ranges);
392 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
393 	    "sb-mem-ranges", (caddr_t)&memory_ranges[0], &len) !=
394 	    DDI_PROP_SUCCESS) {
395 		mp->slice_base = 0;
396 		mp->slice_size = 0;
397 		return (-1);
398 	}
399 	mp->slice_base = memory_ranges[0];
400 	mp->slice_size = memory_ranges[1];
401 
402 	if (!mp->dev.bp->boot_board) {
403 		int i;
404 
405 		rv = opl_read_hwd(mp->dev.bp->bnum, NULL,  NULL, NULL, &hwd);
406 
407 		if (rv != 0) {
408 			return (-1);
409 		}
410 
411 		ml = NULL;
412 		pm = &hwd->sb_cmu.cmu_memory;
413 		for (i = 0; i < HWD_MAX_MEM_CHUNKS; i++) {
414 			if (pm->mem_chunks[i].chnk_size > 0) {
415 				ml = memlist_add_span(ml,
416 				    pm->mem_chunks[i].chnk_start_address,
417 				    pm->mem_chunks[i].chnk_size);
418 			}
419 		}
420 	} else {
421 		/*
422 		 * we intersect phys_install to get base_pa.
423 		 * This only works at bootup time.
424 		 */
425 
426 		memlist_read_lock();
427 		ml = memlist_dup(phys_install);
428 		memlist_read_unlock();
429 
430 		ml = memlist_del_span(ml, 0ull, mp->slice_base);
431 		if (ml) {
432 			uint64_t basepa, endpa;
433 			endpa = _ptob64(physmax + 1);
434 
435 			basepa = mp->slice_base + mp->slice_size;
436 
437 			ml = memlist_del_span(ml, basepa, endpa - basepa);
438 		}
439 	}
440 
441 	if (ml) {
442 		uint64_t nbytes = 0;
443 		struct memlist *p;
444 		for (p = ml; p; p = p->ml_next) {
445 			nbytes += p->ml_size;
446 		}
447 		if ((mp->nbytes = nbytes) > 0)
448 			mp->base_pa = ml->ml_address;
449 		else
450 			mp->base_pa = 0;
451 		mp->memlist = ml;
452 	} else {
453 		mp->base_pa = 0;
454 		mp->nbytes = 0;
455 	}
456 	return (0);
457 }
458 
459 
460 struct drmach_hotcpu {
461 	drmach_board_t *bp;
462 	int	bnum;
463 	int	core_id;
464 	int	rv;
465 	int	option;
466 };
467 
468 static int
469 drmach_cpu_cb(dev_info_t *dip, void *arg)
470 {
471 	struct drmach_hotcpu *p = (struct drmach_hotcpu *)arg;
472 	char name[OBP_MAXDRVNAME];
473 	int len = OBP_MAXDRVNAME;
474 	int bnum, core_id, strand_id;
475 	drmach_board_t *bp;
476 
477 	if (dip == ddi_root_node()) {
478 		return (DDI_WALK_CONTINUE);
479 	}
480 
481 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
482 	    DDI_PROP_DONTPASS, "name",
483 	    (caddr_t)name, &len) != DDI_PROP_SUCCESS) {
484 		return (DDI_WALK_PRUNECHILD);
485 	}
486 
487 	/* only cmp has board number */
488 	bnum = -1;
489 	len = sizeof (bnum);
490 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
491 	    DDI_PROP_DONTPASS, OBP_BOARDNUM,
492 	    (caddr_t)&bnum, &len) != DDI_PROP_SUCCESS) {
493 		bnum = -1;
494 	}
495 
496 	if (strcmp(name, "cmp") == 0) {
497 		if (bnum != p->bnum)
498 			return (DDI_WALK_PRUNECHILD);
499 		return (DDI_WALK_CONTINUE);
500 	}
501 	/* we have already pruned all unwanted cores and cpu's above */
502 	if (strcmp(name, "core") == 0) {
503 		return (DDI_WALK_CONTINUE);
504 	}
505 	if (strcmp(name, "cpu") == 0) {
506 		processorid_t cpuid;
507 		len = sizeof (cpuid);
508 		if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
509 		    DDI_PROP_DONTPASS, "cpuid",
510 		    (caddr_t)&cpuid, &len) != DDI_PROP_SUCCESS) {
511 			p->rv = -1;
512 			return (DDI_WALK_TERMINATE);
513 		}
514 
515 		core_id = p->core_id;
516 
517 		bnum = LSB_ID(cpuid);
518 
519 		if (ON_BOARD_CORE_NUM(cpuid) != core_id)
520 			return (DDI_WALK_CONTINUE);
521 
522 		bp = p->bp;
523 		ASSERT(bnum == bp->bnum);
524 
525 		if (p->option == HOTADD_CPU) {
526 			if (prom_hotaddcpu(cpuid) != 0) {
527 				p->rv = -1;
528 				return (DDI_WALK_TERMINATE);
529 			}
530 			strand_id = STRAND_ID(cpuid);
531 			bp->cores[core_id].core_hotadded |= (1 << strand_id);
532 		} else if (p->option == HOTREMOVE_CPU) {
533 			if (prom_hotremovecpu(cpuid) != 0) {
534 				p->rv = -1;
535 				return (DDI_WALK_TERMINATE);
536 			}
537 			strand_id = STRAND_ID(cpuid);
538 			bp->cores[core_id].core_hotadded &= ~(1 << strand_id);
539 		}
540 		return (DDI_WALK_CONTINUE);
541 	}
542 
543 	return (DDI_WALK_PRUNECHILD);
544 }
545 
546 
547 static int
548 drmach_add_remove_cpu(int bnum, int core_id, int option)
549 {
550 	struct drmach_hotcpu arg;
551 	drmach_board_t *bp;
552 
553 	bp = drmach_get_board_by_bnum(bnum);
554 	ASSERT(bp);
555 
556 	arg.bp = bp;
557 	arg.bnum = bnum;
558 	arg.core_id = core_id;
559 	arg.rv = 0;
560 	arg.option = option;
561 	ddi_walk_devs(ddi_root_node(), drmach_cpu_cb, (void *)&arg);
562 	return (arg.rv);
563 }
564 
565 struct drmach_setup_core_arg {
566 	drmach_board_t *bp;
567 };
568 
569 static int
570 drmach_setup_core_cb(dev_info_t *dip, void *arg)
571 {
572 	struct drmach_setup_core_arg *p = (struct drmach_setup_core_arg *)arg;
573 	char name[OBP_MAXDRVNAME];
574 	int len = OBP_MAXDRVNAME;
575 	int bnum;
576 	int core_id, strand_id;
577 
578 	if (dip == ddi_root_node()) {
579 		return (DDI_WALK_CONTINUE);
580 	}
581 
582 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
583 	    DDI_PROP_DONTPASS, "name",
584 	    (caddr_t)name, &len) != DDI_PROP_SUCCESS) {
585 		return (DDI_WALK_PRUNECHILD);
586 	}
587 
588 	/* only cmp has board number */
589 	bnum = -1;
590 	len = sizeof (bnum);
591 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
592 	    DDI_PROP_DONTPASS, OBP_BOARDNUM,
593 	    (caddr_t)&bnum, &len) != DDI_PROP_SUCCESS) {
594 		bnum = -1;
595 	}
596 
597 	if (strcmp(name, "cmp") == 0) {
598 		if (bnum != p->bp->bnum)
599 			return (DDI_WALK_PRUNECHILD);
600 		return (DDI_WALK_CONTINUE);
601 	}
602 	/* we have already pruned all unwanted cores and cpu's above */
603 	if (strcmp(name, "core") == 0) {
604 		return (DDI_WALK_CONTINUE);
605 	}
606 	if (strcmp(name, "cpu") == 0) {
607 		processorid_t cpuid;
608 		len = sizeof (cpuid);
609 		if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
610 		    DDI_PROP_DONTPASS, "cpuid",
611 		    (caddr_t)&cpuid, &len) != DDI_PROP_SUCCESS) {
612 			return (DDI_WALK_TERMINATE);
613 		}
614 		bnum = LSB_ID(cpuid);
615 		ASSERT(bnum == p->bp->bnum);
616 		core_id = ON_BOARD_CORE_NUM(cpuid);
617 		strand_id = STRAND_ID(cpuid);
618 		p->bp->cores[core_id].core_present |= (1 << strand_id);
619 		return (DDI_WALK_CONTINUE);
620 	}
621 
622 	return (DDI_WALK_PRUNECHILD);
623 }
624 
625 
626 static void
627 drmach_setup_core_info(drmach_board_t *obj)
628 {
629 	struct drmach_setup_core_arg arg;
630 	int i;
631 
632 	for (i = 0; i < OPL_MAX_COREID_PER_BOARD; i++) {
633 		obj->cores[i].core_present = 0;
634 		obj->cores[i].core_hotadded = 0;
635 		obj->cores[i].core_started = 0;
636 	}
637 	arg.bp = obj;
638 	ddi_walk_devs(ddi_root_node(), drmach_setup_core_cb, (void *)&arg);
639 
640 	for (i = 0; i < OPL_MAX_COREID_PER_BOARD; i++) {
641 		if (obj->boot_board) {
642 			obj->cores[i].core_hotadded =
643 			    obj->cores[i].core_started =
644 			    obj->cores[i].core_present;
645 		}
646 	}
647 }
648 
649 /*
650  * drmach_node_* routines serve the purpose of separating the
651  * rest of the code from the device tree and OBP.  This is necessary
652  * because of In-Kernel-Probing.  Devices probed after stod, are probed
653  * by the in-kernel-prober, not OBP.  These devices, therefore, do not
654  * have dnode ids.
655  */
656 
657 typedef struct {
658 	drmach_node_walk_args_t	*nwargs;
659 	int			(*cb)(drmach_node_walk_args_t *args);
660 	int			err;
661 } drmach_node_ddi_walk_args_t;
662 
663 static int
664 drmach_node_ddi_walk_cb(dev_info_t *dip, void *arg)
665 {
666 	drmach_node_ddi_walk_args_t	*nargs;
667 
668 	nargs = (drmach_node_ddi_walk_args_t *)arg;
669 
670 	/*
671 	 * dip doesn't have to be held here as we are called
672 	 * from ddi_walk_devs() which holds the dip.
673 	 */
674 	nargs->nwargs->node->here = (void *)dip;
675 
676 	nargs->err = nargs->cb(nargs->nwargs);
677 
678 
679 	/*
680 	 * Set "here" to NULL so that unheld dip is not accessible
681 	 * outside ddi_walk_devs()
682 	 */
683 	nargs->nwargs->node->here = NULL;
684 
685 	if (nargs->err)
686 		return (DDI_WALK_TERMINATE);
687 	else
688 		return (DDI_WALK_CONTINUE);
689 }
690 
691 static int
692 drmach_node_ddi_walk(drmach_node_t *np, void *data,
693     int (*cb)(drmach_node_walk_args_t *args))
694 {
695 	drmach_node_walk_args_t		args;
696 	drmach_node_ddi_walk_args_t	nargs;
697 
698 
699 	/* initialized args structure for callback */
700 	args.node = np;
701 	args.data = data;
702 
703 	nargs.nwargs = &args;
704 	nargs.cb = cb;
705 	nargs.err = 0;
706 
707 	/*
708 	 * Root node doesn't have to be held in any way.
709 	 */
710 	ddi_walk_devs(ddi_root_node(), drmach_node_ddi_walk_cb, (void *)&nargs);
711 
712 	return (nargs.err);
713 }
714 
715 static int
716 drmach_node_ddi_get_parent(drmach_node_t *np, drmach_node_t *pp)
717 {
718 	dev_info_t	*ndip;
719 	static char	*fn = "drmach_node_ddi_get_parent";
720 
721 	ndip = np->n_getdip(np);
722 	if (ndip == NULL) {
723 		cmn_err(CE_WARN, "%s: NULL dip", fn);
724 		return (-1);
725 	}
726 
727 	bcopy(np, pp, sizeof (drmach_node_t));
728 
729 	pp->here = (void *)ddi_get_parent(ndip);
730 	if (pp->here == NULL) {
731 		cmn_err(CE_WARN, "%s: NULL parent dip", fn);
732 		return (-1);
733 	}
734 
735 	return (0);
736 }
737 
738 /*ARGSUSED*/
739 static pnode_t
740 drmach_node_ddi_get_dnode(drmach_node_t *np)
741 {
742 	return (0);
743 }
744 
745 static drmach_node_t *
746 drmach_node_new(void)
747 {
748 	drmach_node_t *np;
749 
750 	np = kmem_zalloc(sizeof (drmach_node_t), KM_SLEEP);
751 
752 	np->get_dnode = drmach_node_ddi_get_dnode;
753 	np->walk = drmach_node_ddi_walk;
754 	np->n_getdip = drmach_node_ddi_get_dip;
755 	np->n_getproplen = drmach_node_ddi_get_proplen;
756 	np->n_getprop = drmach_node_ddi_get_prop;
757 	np->get_parent = drmach_node_ddi_get_parent;
758 
759 	return (np);
760 }
761 
762 static void
763 drmach_node_dispose(drmach_node_t *np)
764 {
765 	kmem_free(np, sizeof (*np));
766 }
767 
768 static dev_info_t *
769 drmach_node_ddi_get_dip(drmach_node_t *np)
770 {
771 	return ((dev_info_t *)np->here);
772 }
773 
774 static int
775 drmach_node_walk(drmach_node_t *np, void *param,
776     int (*cb)(drmach_node_walk_args_t *args))
777 {
778 	return (np->walk(np, param, cb));
779 }
780 
781 static int
782 drmach_node_ddi_get_prop(drmach_node_t *np, char *name, void *buf, int len)
783 {
784 	int		rv = 0;
785 	dev_info_t	*ndip;
786 	static char	*fn = "drmach_node_ddi_get_prop";
787 
788 
789 	ndip = np->n_getdip(np);
790 	if (ndip == NULL) {
791 		cmn_err(CE_WARN, "%s: NULL dip", fn);
792 		rv = -1;
793 	} else if (ddi_getlongprop_buf(DDI_DEV_T_ANY, ndip,
794 	    DDI_PROP_DONTPASS, name,
795 	    (caddr_t)buf, &len) != DDI_PROP_SUCCESS) {
796 		rv = -1;
797 	}
798 
799 	return (rv);
800 }
801 
802 static int
803 drmach_node_ddi_get_proplen(drmach_node_t *np, char *name, int *len)
804 {
805 	int		rv = 0;
806 	dev_info_t	*ndip;
807 
808 	ndip = np->n_getdip(np);
809 	if (ndip == NULL) {
810 		rv = -1;
811 	} else if (ddi_getproplen(DDI_DEV_T_ANY, ndip, DDI_PROP_DONTPASS, name,
812 	    len) != DDI_PROP_SUCCESS) {
813 		rv = -1;
814 	}
815 
816 	return (rv);
817 }
818 
819 static drmachid_t
820 drmach_node_dup(drmach_node_t *np)
821 {
822 	drmach_node_t *dup;
823 
824 	dup = drmach_node_new();
825 	dup->here = np->here;
826 	dup->get_dnode = np->get_dnode;
827 	dup->walk = np->walk;
828 	dup->n_getdip = np->n_getdip;
829 	dup->n_getproplen = np->n_getproplen;
830 	dup->n_getprop = np->n_getprop;
831 	dup->get_parent = np->get_parent;
832 
833 	return (dup);
834 }
835 
836 /*
837  * drmach_array provides convenient array construction, access,
838  * bounds checking and array destruction logic.
839  */
840 
841 static drmach_array_t *
842 drmach_array_new(int min_index, int max_index)
843 {
844 	drmach_array_t *arr;
845 
846 	arr = kmem_zalloc(sizeof (drmach_array_t), KM_SLEEP);
847 
848 	arr->arr_sz = (max_index - min_index + 1) * sizeof (void *);
849 	if (arr->arr_sz > 0) {
850 		arr->min_index = min_index;
851 		arr->max_index = max_index;
852 
853 		arr->arr = kmem_zalloc(arr->arr_sz, KM_SLEEP);
854 		return (arr);
855 	} else {
856 		kmem_free(arr, sizeof (*arr));
857 		return (0);
858 	}
859 }
860 
861 static int
862 drmach_array_set(drmach_array_t *arr, int idx, drmachid_t val)
863 {
864 	if (idx < arr->min_index || idx > arr->max_index)
865 		return (-1);
866 	else {
867 		arr->arr[idx - arr->min_index] = val;
868 		return (0);
869 	}
870 	/*NOTREACHED*/
871 }
872 
873 static int
874 drmach_array_get(drmach_array_t *arr, int idx, drmachid_t *val)
875 {
876 	if (idx < arr->min_index || idx > arr->max_index)
877 		return (-1);
878 	else {
879 		*val = arr->arr[idx - arr->min_index];
880 		return (0);
881 	}
882 	/*NOTREACHED*/
883 }
884 
885 static int
886 drmach_array_first(drmach_array_t *arr, int *idx, drmachid_t *val)
887 {
888 	int rv;
889 
890 	*idx = arr->min_index;
891 	while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
892 		*idx += 1;
893 
894 	return (rv);
895 }
896 
897 static int
898 drmach_array_next(drmach_array_t *arr, int *idx, drmachid_t *val)
899 {
900 	int rv;
901 
902 	*idx += 1;
903 	while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
904 		*idx += 1;
905 
906 	return (rv);
907 }
908 
909 static void
910 drmach_array_dispose(drmach_array_t *arr, void (*disposer)(drmachid_t))
911 {
912 	drmachid_t	val;
913 	int		idx;
914 	int		rv;
915 
916 	rv = drmach_array_first(arr, &idx, &val);
917 	while (rv == 0) {
918 		(*disposer)(val);
919 		rv = drmach_array_next(arr, &idx, &val);
920 	}
921 
922 	kmem_free(arr->arr, arr->arr_sz);
923 	kmem_free(arr, sizeof (*arr));
924 }
925 
926 static drmach_board_t *
927 drmach_get_board_by_bnum(int bnum)
928 {
929 	drmachid_t id;
930 
931 	if (drmach_array_get(drmach_boards, bnum, &id) == 0)
932 		return ((drmach_board_t *)id);
933 	else
934 		return (NULL);
935 }
936 
937 static pnode_t
938 drmach_node_get_dnode(drmach_node_t *np)
939 {
940 	return (np->get_dnode(np));
941 }
942 
943 /*ARGSUSED*/
944 sbd_error_t *
945 drmach_configure(drmachid_t id, int flags)
946 {
947 	drmach_device_t		*dp;
948 	sbd_error_t		*err = NULL;
949 	dev_info_t		*rdip;
950 	dev_info_t		*fdip = NULL;
951 
952 	if (DRMACH_IS_CPU_ID(id)) {
953 		return (NULL);
954 	}
955 	if (!DRMACH_IS_DEVICE_ID(id))
956 		return (drerr_new(0, EOPL_INAPPROP, NULL));
957 	dp = id;
958 	rdip = dp->node->n_getdip(dp->node);
959 
960 	ASSERT(rdip);
961 
962 	ASSERT(e_ddi_branch_held(rdip));
963 
964 	if (e_ddi_branch_configure(rdip, &fdip, 0) != 0) {
965 		char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
966 		dev_info_t *dip = (fdip != NULL) ? fdip : rdip;
967 
968 		(void) ddi_pathname(dip, path);
969 		err = drerr_new(1,  EOPL_DRVFAIL, path);
970 
971 		kmem_free(path, MAXPATHLEN);
972 
973 		/* If non-NULL, fdip is returned held and must be released */
974 		if (fdip != NULL)
975 			ddi_release_devi(fdip);
976 	}
977 
978 	return (err);
979 }
980 
981 
982 static sbd_error_t *
983 drmach_device_new(drmach_node_t *node,
984     drmach_board_t *bp, int portid, drmachid_t *idp)
985 {
986 	int		 i;
987 	int		 rv;
988 	drmach_device_t	proto;
989 	sbd_error_t	*err;
990 	char		 name[OBP_MAXDRVNAME];
991 
992 	rv = node->n_getprop(node, "name", name, OBP_MAXDRVNAME);
993 	if (rv) {
994 		/* every node is expected to have a name */
995 		err = drerr_new(1, EOPL_GETPROP, "device node %s: property %s",
996 		    ddi_node_name(node->n_getdip(node)), "name");
997 		return (err);
998 	}
999 
1000 	/*
1001 	 * The node currently being examined is not listed in the name2type[]
1002 	 * array.  In this case, the node is no interest to drmach.  Both
1003 	 * dp and err are initialized here to yield nothing (no device or
1004 	 * error structure) for this case.
1005 	 */
1006 	i = drmach_name2type_idx(name);
1007 
1008 
1009 	if (i < 0) {
1010 		*idp = (drmachid_t)0;
1011 		return (NULL);
1012 	}
1013 
1014 	/* device specific new function will set unum */
1015 
1016 	bzero(&proto, sizeof (proto));
1017 	proto.type = drmach_name2type[i].type;
1018 	proto.bp = bp;
1019 	proto.node = node;
1020 	proto.portid = portid;
1021 
1022 	return (drmach_name2type[i].new(&proto, idp));
1023 }
1024 
1025 static void
1026 drmach_device_dispose(drmachid_t id)
1027 {
1028 	drmach_device_t *self = id;
1029 
1030 	self->cm.dispose(id);
1031 }
1032 
1033 
1034 static drmach_board_t *
1035 drmach_board_new(int bnum, int boot_board)
1036 {
1037 	drmach_board_t	*bp;
1038 
1039 	bp = kmem_zalloc(sizeof (drmach_board_t), KM_SLEEP);
1040 
1041 	bp->cm.isa = (void *)drmach_board_new;
1042 	bp->cm.release = drmach_board_release;
1043 	bp->cm.status = drmach_board_status;
1044 
1045 	(void) drmach_board_name(bnum, bp->cm.name, sizeof (bp->cm.name));
1046 
1047 	bp->bnum = bnum;
1048 	bp->devices = NULL;
1049 	bp->connected = boot_board;
1050 	bp->tree = drmach_node_new();
1051 	bp->assigned = boot_board;
1052 	bp->powered = boot_board;
1053 	bp->boot_board = boot_board;
1054 
1055 	/*
1056 	 * If this is not bootup initialization, we have to wait till
1057 	 * IKP sets up the device nodes in drmach_board_connect().
1058 	 */
1059 	if (boot_board)
1060 		drmach_setup_core_info(bp);
1061 
1062 	(void) drmach_array_set(drmach_boards, bnum, bp);
1063 	return (bp);
1064 }
1065 
1066 static void
1067 drmach_board_dispose(drmachid_t id)
1068 {
1069 	drmach_board_t *bp;
1070 
1071 	ASSERT(DRMACH_IS_BOARD_ID(id));
1072 	bp = id;
1073 
1074 	if (bp->tree)
1075 		drmach_node_dispose(bp->tree);
1076 
1077 	if (bp->devices)
1078 		drmach_array_dispose(bp->devices, drmach_device_dispose);
1079 
1080 	kmem_free(bp, sizeof (*bp));
1081 }
1082 
1083 static sbd_error_t *
1084 drmach_board_status(drmachid_t id, drmach_status_t *stat)
1085 {
1086 	sbd_error_t	*err = NULL;
1087 	drmach_board_t	*bp;
1088 
1089 	if (!DRMACH_IS_BOARD_ID(id))
1090 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1091 	bp = id;
1092 
1093 	stat->assigned = bp->assigned;
1094 	stat->powered = bp->powered;
1095 	stat->busy = 0;			/* assume not busy */
1096 	stat->configured = 0;		/* assume not configured */
1097 	stat->empty = 0;
1098 	stat->cond = bp->cond = SBD_COND_OK;
1099 	(void) strncpy(stat->type, "System Brd", sizeof (stat->type));
1100 	stat->info[0] = '\0';
1101 
1102 	if (bp->devices) {
1103 		int		 rv;
1104 		int		 d_idx;
1105 		drmachid_t	 d_id;
1106 
1107 		rv = drmach_array_first(bp->devices, &d_idx, &d_id);
1108 		while (rv == 0) {
1109 			drmach_status_t	d_stat;
1110 
1111 			err = drmach_i_status(d_id, &d_stat);
1112 			if (err)
1113 				break;
1114 
1115 			stat->busy |= d_stat.busy;
1116 			stat->configured |= d_stat.configured;
1117 
1118 			rv = drmach_array_next(bp->devices, &d_idx, &d_id);
1119 		}
1120 	}
1121 
1122 	return (err);
1123 }
1124 
1125 int
1126 drmach_board_is_floating(drmachid_t id)
1127 {
1128 	drmach_board_t *bp;
1129 
1130 	if (!DRMACH_IS_BOARD_ID(id))
1131 		return (0);
1132 
1133 	bp = (drmach_board_t *)id;
1134 
1135 	return ((drmach_domain.floating & (1 << bp->bnum)) ? 1 : 0);
1136 }
1137 
1138 static int
1139 drmach_init(void)
1140 {
1141 	dev_info_t	*rdip;
1142 	int		i, rv, len;
1143 	int		*floating;
1144 
1145 	rw_init(&drmach_boards_rwlock, NULL, RW_DEFAULT, NULL);
1146 
1147 	drmach_boards = drmach_array_new(0, MAX_BOARDS - 1);
1148 
1149 	rdip = ddi_root_node();
1150 
1151 	if (ddi_getproplen(DDI_DEV_T_ANY, rdip, DDI_PROP_DONTPASS,
1152 	    "floating-boards", &len) != DDI_PROP_SUCCESS) {
1153 		cmn_err(CE_WARN, "Cannot get floating-boards proplen\n");
1154 	} else {
1155 		floating = (int *)kmem_alloc(len, KM_SLEEP);
1156 		rv = ddi_prop_op(DDI_DEV_T_ANY, rdip, PROP_LEN_AND_VAL_BUF,
1157 		    DDI_PROP_DONTPASS, "floating-boards", (caddr_t)floating,
1158 		    &len);
1159 		if (rv != DDI_PROP_SUCCESS) {
1160 			cmn_err(CE_WARN, "Cannot get floating-boards prop\n");
1161 		} else {
1162 			drmach_domain.floating = 0;
1163 			for (i = 0; i < len / sizeof (int); i++) {
1164 				drmach_domain.floating |= (1 << floating[i]);
1165 			}
1166 		}
1167 		kmem_free(floating, len);
1168 	}
1169 	drmach_domain.allow_dr = opl_check_dr_status();
1170 
1171 	rdip = ddi_get_child(ddi_root_node());
1172 	do {
1173 		int		 bnum;
1174 		drmachid_t	 id;
1175 
1176 		bnum = -1;
1177 		bnum = ddi_getprop(DDI_DEV_T_ANY, rdip, DDI_PROP_DONTPASS,
1178 		    OBP_BOARDNUM, -1);
1179 		if (bnum == -1)
1180 			continue;
1181 
1182 		if (drmach_array_get(drmach_boards, bnum, &id) == -1) {
1183 			cmn_err(CE_WARN, "Device node 0x%p has invalid "
1184 			    "property value, %s=%d", (void *)rdip,
1185 			    OBP_BOARDNUM, bnum);
1186 			goto error;
1187 		} else if (id == NULL) {
1188 			(void) drmach_board_new(bnum, 1);
1189 		}
1190 	} while ((rdip = ddi_get_next_sibling(rdip)) != NULL);
1191 
1192 	opl_hold_devtree();
1193 
1194 	/*
1195 	 * Initialize the IKP feature.
1196 	 *
1197 	 * This can be done only after DR has acquired a hold on all the
1198 	 * device nodes that are interesting to IKP.
1199 	 */
1200 	if (opl_init_cfg() != 0) {
1201 		cmn_err(CE_WARN, "DR - IKP initialization failed");
1202 
1203 		opl_release_devtree();
1204 
1205 		goto error;
1206 	}
1207 
1208 	return (0);
1209 error:
1210 	drmach_array_dispose(drmach_boards, drmach_board_dispose);
1211 	rw_destroy(&drmach_boards_rwlock);
1212 	return (ENXIO);
1213 }
1214 
1215 static void
1216 drmach_fini(void)
1217 {
1218 	rw_enter(&drmach_boards_rwlock, RW_WRITER);
1219 	drmach_array_dispose(drmach_boards, drmach_board_dispose);
1220 	drmach_boards = NULL;
1221 	rw_exit(&drmach_boards_rwlock);
1222 
1223 	/*
1224 	 * Walk immediate children of the root devinfo node
1225 	 * releasing holds acquired on branches in drmach_init()
1226 	 */
1227 
1228 	opl_release_devtree();
1229 
1230 	rw_destroy(&drmach_boards_rwlock);
1231 }
1232 
1233 /*
1234  *	Each system board contains 2 Oberon PCI bridge and
1235  *	1 CMUCH.
1236  *	Each oberon has 2 channels.
1237  *	Each channel has 2 pci-ex leaf.
1238  *	Each CMUCH has 1 pci bus.
1239  *
1240  *
1241  *	Device Path:
1242  *	/pci@<portid>,reg
1243  *
1244  *	where
1245  *	portid[10] = 0
1246  *	portid[9:0] = LLEAF_ID[9:0] of the Oberon Channel
1247  *
1248  *	LLEAF_ID[9:8] = 0
1249  *	LLEAF_ID[8:4] = LSB_ID[4:0]
1250  *	LLEAF_ID[3:1] = IO Channel#[2:0] (0,1,2,3 for Oberon)
1251  *			channel 4 is pcicmu
1252  *	LLEAF_ID[0] = PCI Leaf Number (0 for leaf-A, 1 for leaf-B)
1253  *
1254  *	Properties:
1255  *	name = pci
1256  *	device_type = "pciex"
1257  *	board# = LSBID
1258  *	reg = int32 * 2, Oberon CSR space of the leaf and the UBC space
1259  *	portid = Jupiter Bus Device ID ((LSB_ID << 3)|pciport#)
1260  */
1261 
1262 static sbd_error_t *
1263 drmach_io_new(drmach_device_t *proto, drmachid_t *idp)
1264 {
1265 	drmach_io_t	*ip;
1266 
1267 	int		 portid;
1268 
1269 	portid = proto->portid;
1270 	ASSERT(portid != -1);
1271 	proto->unum = portid & (MAX_IO_UNITS_PER_BOARD - 1);
1272 
1273 	ip = kmem_zalloc(sizeof (drmach_io_t), KM_SLEEP);
1274 	bcopy(proto, &ip->dev, sizeof (ip->dev));
1275 	ip->dev.node = drmach_node_dup(proto->node);
1276 	ip->dev.cm.isa = (void *)drmach_io_new;
1277 	ip->dev.cm.dispose = drmach_io_dispose;
1278 	ip->dev.cm.release = drmach_io_release;
1279 	ip->dev.cm.status = drmach_io_status;
1280 	ip->channel = (portid >> 1) & 0x7;
1281 	ip->leaf = (portid & 0x1);
1282 
1283 	(void) snprintf(ip->dev.cm.name, sizeof (ip->dev.cm.name), "%s%d",
1284 	    ip->dev.type, ip->dev.unum);
1285 
1286 	*idp = (drmachid_t)ip;
1287 	return (NULL);
1288 }
1289 
1290 
1291 static void
1292 drmach_io_dispose(drmachid_t id)
1293 {
1294 	drmach_io_t *self;
1295 
1296 	ASSERT(DRMACH_IS_IO_ID(id));
1297 
1298 	self = id;
1299 	if (self->dev.node)
1300 		drmach_node_dispose(self->dev.node);
1301 
1302 	kmem_free(self, sizeof (*self));
1303 }
1304 
1305 /*ARGSUSED*/
1306 sbd_error_t *
1307 drmach_pre_op(int cmd, drmachid_t id, drmach_opts_t *opts)
1308 {
1309 	drmach_board_t	*bp = (drmach_board_t *)id;
1310 	sbd_error_t	*err = NULL;
1311 
1312 	/* allow status and ncm operations to always succeed */
1313 	if ((cmd == SBD_CMD_STATUS) || (cmd == SBD_CMD_GETNCM)) {
1314 		return (NULL);
1315 	}
1316 
1317 	/* check all other commands for the required option string */
1318 
1319 	if ((opts->size > 0) && (opts->copts != NULL)) {
1320 
1321 		DRMACH_PR("platform options: %s\n", opts->copts);
1322 
1323 		if (strstr(opts->copts, "opldr") == NULL) {
1324 			err = drerr_new(1, EOPL_SUPPORT, NULL);
1325 		}
1326 	} else {
1327 		err = drerr_new(1, EOPL_SUPPORT, NULL);
1328 	}
1329 
1330 	if (!err && id && DRMACH_IS_BOARD_ID(id)) {
1331 		switch (cmd) {
1332 			case SBD_CMD_TEST:
1333 			case SBD_CMD_STATUS:
1334 			case SBD_CMD_GETNCM:
1335 				break;
1336 			case SBD_CMD_CONNECT:
1337 				if (bp->connected)
1338 					err = drerr_new(0, ESBD_STATE, NULL);
1339 				else if (!drmach_domain.allow_dr)
1340 					err = drerr_new(1, EOPL_SUPPORT, NULL);
1341 				break;
1342 			case SBD_CMD_DISCONNECT:
1343 				if (!bp->connected)
1344 					err = drerr_new(0, ESBD_STATE, NULL);
1345 				else if (!drmach_domain.allow_dr)
1346 					err = drerr_new(1, EOPL_SUPPORT, NULL);
1347 				break;
1348 			default:
1349 				if (!drmach_domain.allow_dr)
1350 					err = drerr_new(1, EOPL_SUPPORT, NULL);
1351 				break;
1352 
1353 		}
1354 	}
1355 
1356 	return (err);
1357 }
1358 
1359 /*ARGSUSED*/
1360 sbd_error_t *
1361 drmach_post_op(int cmd, drmachid_t id, drmach_opts_t *opts)
1362 {
1363 	return (NULL);
1364 }
1365 
1366 sbd_error_t *
1367 drmach_board_assign(int bnum, drmachid_t *id)
1368 {
1369 	sbd_error_t	*err = NULL;
1370 
1371 	rw_enter(&drmach_boards_rwlock, RW_WRITER);
1372 
1373 	if (drmach_array_get(drmach_boards, bnum, id) == -1) {
1374 		err = drerr_new(1, EOPL_BNUM, "%d", bnum);
1375 	} else {
1376 		drmach_board_t	*bp;
1377 
1378 		if (*id)
1379 			rw_downgrade(&drmach_boards_rwlock);
1380 
1381 		bp = *id;
1382 		if (!(*id))
1383 			bp = *id  =
1384 			    (drmachid_t)drmach_board_new(bnum, 0);
1385 		bp->assigned = 1;
1386 	}
1387 
1388 	rw_exit(&drmach_boards_rwlock);
1389 
1390 	return (err);
1391 }
1392 
1393 /*ARGSUSED*/
1394 sbd_error_t *
1395 drmach_board_connect(drmachid_t id, drmach_opts_t *opts)
1396 {
1397 	extern int	cpu_alljupiter;
1398 	drmach_board_t	*obj = (drmach_board_t *)id;
1399 	unsigned	cpu_impl;
1400 
1401 	if (!DRMACH_IS_BOARD_ID(id))
1402 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1403 
1404 	if (opl_probe_sb(obj->bnum, &cpu_impl) != 0)
1405 		return (drerr_new(1, EOPL_PROBE, NULL));
1406 
1407 	if (cpu_alljupiter) {
1408 		if (cpu_impl & (1 << OLYMPUS_C_IMPL)) {
1409 			(void) opl_unprobe_sb(obj->bnum);
1410 			return (drerr_new(1, EOPL_MIXED_CPU, NULL));
1411 		}
1412 	}
1413 
1414 	(void) prom_attach_notice(obj->bnum);
1415 
1416 	drmach_setup_core_info(obj);
1417 
1418 	obj->connected = 1;
1419 
1420 	return (NULL);
1421 }
1422 
1423 static int drmach_cache_flush_flag[NCPU];
1424 
1425 /*ARGSUSED*/
1426 static void
1427 drmach_flush_cache(uint64_t id, uint64_t dummy)
1428 {
1429 	extern void cpu_flush_ecache(void);
1430 
1431 	cpu_flush_ecache();
1432 	drmach_cache_flush_flag[id] = 0;
1433 }
1434 
1435 static void
1436 drmach_flush_all()
1437 {
1438 	cpuset_t	xc_cpuset;
1439 	int		i;
1440 
1441 	xc_cpuset = cpu_ready_set;
1442 	for (i = 0; i < NCPU; i++) {
1443 		if (CPU_IN_SET(xc_cpuset, i)) {
1444 			drmach_cache_flush_flag[i] = 1;
1445 			xc_one(i, drmach_flush_cache, i, 0);
1446 			while (drmach_cache_flush_flag[i]) {
1447 				DELAY(1000);
1448 			}
1449 		}
1450 	}
1451 }
1452 
1453 static int
1454 drmach_disconnect_cpus(drmach_board_t *bp)
1455 {
1456 	int i, bnum;
1457 
1458 	bnum = bp->bnum;
1459 
1460 	for (i = 0; i < OPL_MAX_COREID_PER_BOARD; i++) {
1461 		if (bp->cores[i].core_present) {
1462 			if (bp->cores[i].core_started)
1463 				return (-1);
1464 			if (bp->cores[i].core_hotadded) {
1465 				if (drmach_add_remove_cpu(bnum, i,
1466 				    HOTREMOVE_CPU)) {
1467 					cmn_err(CE_WARN, "Failed to remove "
1468 					    "CMP %d on board %d\n", i, bnum);
1469 					return (-1);
1470 				}
1471 			}
1472 		}
1473 	}
1474 	return (0);
1475 }
1476 
1477 /*ARGSUSED*/
1478 sbd_error_t *
1479 drmach_board_disconnect(drmachid_t id, drmach_opts_t *opts)
1480 {
1481 	drmach_board_t *obj;
1482 	int rv = 0;
1483 	sbd_error_t		*err = NULL;
1484 
1485 	if (DRMACH_NULL_ID(id))
1486 		return (NULL);
1487 
1488 	if (!DRMACH_IS_BOARD_ID(id))
1489 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1490 
1491 	obj = (drmach_board_t *)id;
1492 
1493 	if (drmach_disconnect_cpus(obj)) {
1494 		err = drerr_new(1, EOPL_DEPROBE, obj->cm.name);
1495 		return (err);
1496 	}
1497 
1498 	rv = opl_unprobe_sb(obj->bnum);
1499 
1500 	if (rv == 0) {
1501 		(void) prom_detach_notice(obj->bnum);
1502 		obj->connected = 0;
1503 
1504 	} else
1505 		err = drerr_new(1, EOPL_DEPROBE, obj->cm.name);
1506 
1507 	return (err);
1508 }
1509 
1510 static int
1511 drmach_get_portid(drmach_node_t *np)
1512 {
1513 	int		portid;
1514 	char		type[OBP_MAXPROPNAME];
1515 
1516 	if (np->n_getprop(np, "portid", &portid, sizeof (portid)) == 0)
1517 		return (portid);
1518 
1519 	/*
1520 	 * Get the device_type property to see if we should
1521 	 * continue processing this node.
1522 	 */
1523 	if (np->n_getprop(np, "device_type", &type, sizeof (type)) != 0)
1524 		return (-1);
1525 
1526 	if (strcmp(type, OPL_CPU_NODE) == 0) {
1527 		/*
1528 		 * We return cpuid because it has no portid
1529 		 */
1530 		if (np->n_getprop(np, "cpuid", &portid, sizeof (portid)) == 0)
1531 			return (portid);
1532 	}
1533 
1534 	return (-1);
1535 }
1536 
1537 /*
1538  * This is a helper function to determine if a given
1539  * node should be considered for a dr operation according
1540  * to predefined dr type nodes and the node's name.
1541  * Formal Parameter : The name of a device node.
1542  * Return Value: -1, name does not map to a valid dr type.
1543  *		 A value greater or equal to 0, name is a valid dr type.
1544  */
1545 static int
1546 drmach_name2type_idx(char *name)
1547 {
1548 	int	index, ntypes;
1549 
1550 	if (name == NULL)
1551 		return (-1);
1552 
1553 	/*
1554 	 * Determine how many possible types are currently supported
1555 	 * for dr.
1556 	 */
1557 	ntypes = sizeof (drmach_name2type) / sizeof (drmach_name2type[0]);
1558 
1559 	/* Determine if the node's name correspond to a predefined type. */
1560 	for (index = 0; index < ntypes; index++) {
1561 		if (strcmp(drmach_name2type[index].name, name) == 0)
1562 			/* The node is an allowed type for dr. */
1563 			return (index);
1564 	}
1565 
1566 	/*
1567 	 * If the name of the node does not map to any of the
1568 	 * types in the array drmach_name2type then the node is not of
1569 	 * interest to dr.
1570 	 */
1571 	return (-1);
1572 }
1573 
1574 /*
1575  * there is some complication on OPL:
1576  * - pseudo-mc nodes do not have portid property
1577  * - portid[9:5] of cmp node is LSB #, portid[7:3] of pci is LSB#
1578  * - cmp has board#
1579  * - core and cpu nodes do not have portid and board# properties
1580  * starcat uses portid to derive the board# but that does not work
1581  * for us.  starfire reads board# property to filter the devices.
1582  * That does not work either.  So for these specific device,
1583  * we use specific hard coded methods to get the board# -
1584  * cpu: LSB# = CPUID[9:5]
1585  */
1586 
1587 static int
1588 drmach_board_find_devices_cb(drmach_node_walk_args_t *args)
1589 {
1590 	drmach_node_t			*node = args->node;
1591 	drmach_board_cb_data_t		*data = args->data;
1592 	drmach_board_t			*obj = data->obj;
1593 
1594 	int		rv, portid;
1595 	int		bnum;
1596 	drmachid_t	id;
1597 	drmach_device_t	*device;
1598 	char name[OBP_MAXDRVNAME];
1599 
1600 	portid = drmach_get_portid(node);
1601 	/*
1602 	 * core, cpu and pseudo-mc do not have portid
1603 	 * we use cpuid as the portid of the cpu node
1604 	 * for pseudo-mc, we do not use portid info.
1605 	 */
1606 
1607 	rv = node->n_getprop(node, "name", name, OBP_MAXDRVNAME);
1608 	if (rv)
1609 		return (0);
1610 
1611 
1612 	rv = node->n_getprop(node, OBP_BOARDNUM, &bnum, sizeof (bnum));
1613 
1614 	if (rv) {
1615 		/*
1616 		 * cpu does not have board# property.  We use
1617 		 * CPUID[9:5]
1618 		 */
1619 		if (strcmp("cpu", name) == 0) {
1620 			bnum = (portid >> 5) & 0x1f;
1621 		} else
1622 			return (0);
1623 	}
1624 
1625 
1626 	if (bnum != obj->bnum)
1627 		return (0);
1628 
1629 	if (drmach_name2type_idx(name) < 0) {
1630 		return (0);
1631 	}
1632 
1633 	/*
1634 	 * Create a device data structure from this node data.
1635 	 * The call may yield nothing if the node is not of interest
1636 	 * to drmach.
1637 	 */
1638 	data->err = drmach_device_new(node, obj, portid, &id);
1639 	if (data->err)
1640 		return (-1);
1641 	else if (!id) {
1642 		/*
1643 		 * drmach_device_new examined the node we passed in
1644 		 * and determined that it was one not of interest to
1645 		 * drmach.  So, it is skipped.
1646 		 */
1647 		return (0);
1648 	}
1649 
1650 	rv = drmach_array_set(obj->devices, data->ndevs++, id);
1651 	if (rv) {
1652 		data->err = DRMACH_INTERNAL_ERROR();
1653 		return (-1);
1654 	}
1655 	device = id;
1656 
1657 	data->err = (*data->found)(data->a, device->type, device->unum, id);
1658 	return (data->err == NULL ? 0 : -1);
1659 }
1660 
1661 sbd_error_t *
1662 drmach_board_find_devices(drmachid_t id, void *a,
1663     sbd_error_t *(*found)(void *a, const char *, int, drmachid_t))
1664 {
1665 	drmach_board_t		*bp = (drmach_board_t *)id;
1666 	sbd_error_t		*err;
1667 	int			 max_devices;
1668 	int			 rv;
1669 	drmach_board_cb_data_t	data;
1670 
1671 
1672 	if (!DRMACH_IS_BOARD_ID(id))
1673 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1674 
1675 	max_devices  = MAX_CPU_UNITS_PER_BOARD;
1676 	max_devices += MAX_MEM_UNITS_PER_BOARD;
1677 	max_devices += MAX_IO_UNITS_PER_BOARD;
1678 
1679 	bp->devices = drmach_array_new(0, max_devices);
1680 
1681 	if (bp->tree == NULL)
1682 		bp->tree = drmach_node_new();
1683 
1684 	data.obj = bp;
1685 	data.ndevs = 0;
1686 	data.found = found;
1687 	data.a = a;
1688 	data.err = NULL;
1689 
1690 	rv = drmach_node_walk(bp->tree, &data, drmach_board_find_devices_cb);
1691 	if (rv == 0)
1692 		err = NULL;
1693 	else {
1694 		drmach_array_dispose(bp->devices, drmach_device_dispose);
1695 		bp->devices = NULL;
1696 
1697 		if (data.err)
1698 			err = data.err;
1699 		else
1700 			err = DRMACH_INTERNAL_ERROR();
1701 	}
1702 
1703 	return (err);
1704 }
1705 
1706 int
1707 drmach_board_lookup(int bnum, drmachid_t *id)
1708 {
1709 	int	rv = 0;
1710 
1711 	rw_enter(&drmach_boards_rwlock, RW_READER);
1712 	if (drmach_array_get(drmach_boards, bnum, id)) {
1713 		*id = 0;
1714 		rv = -1;
1715 	}
1716 	rw_exit(&drmach_boards_rwlock);
1717 	return (rv);
1718 }
1719 
1720 sbd_error_t *
1721 drmach_board_name(int bnum, char *buf, int buflen)
1722 {
1723 	(void) snprintf(buf, buflen, "SB%d", bnum);
1724 	return (NULL);
1725 }
1726 
1727 sbd_error_t *
1728 drmach_board_poweroff(drmachid_t id)
1729 {
1730 	drmach_board_t	*bp;
1731 	sbd_error_t	*err;
1732 	drmach_status_t	 stat;
1733 
1734 	if (DRMACH_NULL_ID(id))
1735 		return (NULL);
1736 
1737 	if (!DRMACH_IS_BOARD_ID(id))
1738 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1739 	bp = id;
1740 
1741 	err = drmach_board_status(id, &stat);
1742 
1743 	if (!err) {
1744 		if (stat.configured || stat.busy)
1745 			err = drerr_new(0, EOPL_CONFIGBUSY, bp->cm.name);
1746 		else {
1747 			bp->powered = 0;
1748 		}
1749 	}
1750 	return (err);
1751 }
1752 
1753 sbd_error_t *
1754 drmach_board_poweron(drmachid_t id)
1755 {
1756 	drmach_board_t	*bp;
1757 
1758 	if (!DRMACH_IS_BOARD_ID(id))
1759 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1760 	bp = id;
1761 
1762 	bp->powered = 1;
1763 
1764 	return (NULL);
1765 }
1766 
1767 static sbd_error_t *
1768 drmach_board_release(drmachid_t id)
1769 {
1770 	if (!DRMACH_IS_BOARD_ID(id))
1771 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1772 	return (NULL);
1773 }
1774 
1775 /*ARGSUSED*/
1776 sbd_error_t *
1777 drmach_board_test(drmachid_t id, drmach_opts_t *opts, int force)
1778 {
1779 	return (NULL);
1780 }
1781 
1782 sbd_error_t *
1783 drmach_board_unassign(drmachid_t id)
1784 {
1785 	drmach_board_t	*bp;
1786 	sbd_error_t	*err;
1787 	drmach_status_t	 stat;
1788 
1789 	if (DRMACH_NULL_ID(id))
1790 		return (NULL);
1791 
1792 	if (!DRMACH_IS_BOARD_ID(id)) {
1793 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1794 	}
1795 	bp = id;
1796 
1797 	rw_enter(&drmach_boards_rwlock, RW_WRITER);
1798 
1799 	err = drmach_board_status(id, &stat);
1800 	if (err) {
1801 		rw_exit(&drmach_boards_rwlock);
1802 		return (err);
1803 	}
1804 	if (stat.configured || stat.busy) {
1805 		err = drerr_new(0, EOPL_CONFIGBUSY, bp->cm.name);
1806 	} else {
1807 		if (drmach_array_set(drmach_boards, bp->bnum, 0) != 0)
1808 			err = DRMACH_INTERNAL_ERROR();
1809 		else
1810 			drmach_board_dispose(bp);
1811 	}
1812 	rw_exit(&drmach_boards_rwlock);
1813 	return (err);
1814 }
1815 
1816 /*
1817  * We have to do more on OPL - e.g. set up sram tte, read cpuid, strand id,
1818  * implementation #, etc
1819  */
1820 
1821 static sbd_error_t *
1822 drmach_cpu_new(drmach_device_t *proto, drmachid_t *idp)
1823 {
1824 	int		 portid;
1825 	drmach_cpu_t	*cp = NULL;
1826 
1827 	/* portid is CPUID of the node */
1828 	portid = proto->portid;
1829 	ASSERT(portid != -1);
1830 
1831 	/* unum = (CMP/CHIP ID) + (ON_BOARD_CORE_NUM * MAX_CMPID_PER_BOARD) */
1832 	proto->unum = ((portid/OPL_MAX_CPUID_PER_CMP) &
1833 	    (OPL_MAX_CMPID_PER_BOARD - 1)) +
1834 	    ((portid & (OPL_MAX_CPUID_PER_CMP - 1)) *
1835 	    (OPL_MAX_CMPID_PER_BOARD));
1836 
1837 	cp = kmem_zalloc(sizeof (drmach_cpu_t), KM_SLEEP);
1838 	bcopy(proto, &cp->dev, sizeof (cp->dev));
1839 	cp->dev.node = drmach_node_dup(proto->node);
1840 	cp->dev.cm.isa = (void *)drmach_cpu_new;
1841 	cp->dev.cm.dispose = drmach_cpu_dispose;
1842 	cp->dev.cm.release = drmach_cpu_release;
1843 	cp->dev.cm.status = drmach_cpu_status;
1844 
1845 	(void) snprintf(cp->dev.cm.name, sizeof (cp->dev.cm.name), "%s%d",
1846 	    cp->dev.type, cp->dev.unum);
1847 
1848 /*
1849  *	CPU ID representation
1850  *	CPUID[9:5] = SB#
1851  *	CPUID[4:3] = Chip#
1852  *	CPUID[2:1] = Core# (Only 2 core for OPL)
1853  *	CPUID[0:0] = Strand#
1854  */
1855 
1856 /*
1857  *	reg property of the strand contains strand ID
1858  *	reg property of the parent node contains core ID
1859  *	We should use them.
1860  */
1861 	cp->cpuid = portid;
1862 	cp->sb = (portid >> 5) & 0x1f;
1863 	cp->chipid = (portid >> 3) & 0x3;
1864 	cp->coreid = (portid >> 1) & 0x3;
1865 	cp->strandid = portid & 0x1;
1866 
1867 	*idp = (drmachid_t)cp;
1868 	return (NULL);
1869 }
1870 
1871 
1872 static void
1873 drmach_cpu_dispose(drmachid_t id)
1874 {
1875 	drmach_cpu_t	*self;
1876 
1877 	ASSERT(DRMACH_IS_CPU_ID(id));
1878 
1879 	self = id;
1880 	if (self->dev.node)
1881 		drmach_node_dispose(self->dev.node);
1882 
1883 	kmem_free(self, sizeof (*self));
1884 }
1885 
1886 static int
1887 drmach_cpu_start(struct cpu *cp)
1888 {
1889 	int		cpuid = cp->cpu_id;
1890 	extern int	restart_other_cpu(int);
1891 
1892 	ASSERT(MUTEX_HELD(&cpu_lock));
1893 	ASSERT(cpunodes[cpuid].nodeid != (pnode_t)0);
1894 
1895 	cp->cpu_flags &= ~CPU_POWEROFF;
1896 
1897 	/*
1898 	 * NOTE: restart_other_cpu pauses cpus during the
1899 	 *	 slave cpu start.  This helps to quiesce the
1900 	 *	 bus traffic a bit which makes the tick sync
1901 	 *	 routine in the prom more robust.
1902 	 */
1903 	DRMACH_PR("COLD START for cpu (%d)\n", cpuid);
1904 
1905 	(void) restart_other_cpu(cpuid);
1906 
1907 	return (0);
1908 }
1909 
1910 static sbd_error_t *
1911 drmach_cpu_release(drmachid_t id)
1912 {
1913 	if (!DRMACH_IS_CPU_ID(id))
1914 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1915 
1916 	return (NULL);
1917 }
1918 
1919 static sbd_error_t *
1920 drmach_cpu_status(drmachid_t id, drmach_status_t *stat)
1921 {
1922 	drmach_cpu_t *cp;
1923 	drmach_device_t *dp;
1924 
1925 	ASSERT(DRMACH_IS_CPU_ID(id));
1926 	cp = (drmach_cpu_t *)id;
1927 	dp = &cp->dev;
1928 
1929 	stat->assigned = dp->bp->assigned;
1930 	stat->powered = dp->bp->powered;
1931 	mutex_enter(&cpu_lock);
1932 	stat->configured = (cpu_get(cp->cpuid) != NULL);
1933 	mutex_exit(&cpu_lock);
1934 	stat->busy = dp->busy;
1935 	(void) strncpy(stat->type, dp->type, sizeof (stat->type));
1936 	stat->info[0] = '\0';
1937 
1938 	return (NULL);
1939 }
1940 
1941 sbd_error_t *
1942 drmach_cpu_disconnect(drmachid_t id)
1943 {
1944 
1945 	if (!DRMACH_IS_CPU_ID(id))
1946 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1947 
1948 	return (NULL);
1949 }
1950 
1951 sbd_error_t *
1952 drmach_cpu_get_id(drmachid_t id, processorid_t *cpuid)
1953 {
1954 	drmach_cpu_t *cpu;
1955 
1956 	if (!DRMACH_IS_CPU_ID(id))
1957 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1958 	cpu = (drmach_cpu_t *)id;
1959 
1960 	/* get from cpu directly on OPL */
1961 	*cpuid = cpu->cpuid;
1962 	return (NULL);
1963 }
1964 
1965 sbd_error_t *
1966 drmach_cpu_get_impl(drmachid_t id, int *ip)
1967 {
1968 	drmach_device_t *cpu;
1969 	drmach_node_t	*np;
1970 	drmach_node_t	pp;
1971 	int		impl;
1972 	char		type[OBP_MAXPROPNAME];
1973 
1974 	if (!DRMACH_IS_CPU_ID(id))
1975 		return (drerr_new(0, EOPL_INAPPROP, NULL));
1976 
1977 	cpu = id;
1978 	np = cpu->node;
1979 
1980 	if (np->get_parent(np, &pp) != 0) {
1981 		return (DRMACH_INTERNAL_ERROR());
1982 	}
1983 
1984 	/* the parent should be core */
1985 
1986 	if (pp.n_getprop(&pp, "device_type", &type, sizeof (type)) != 0) {
1987 		return (drerr_new(0, EOPL_GETPROP, NULL));
1988 	}
1989 
1990 	if (strcmp(type, OPL_CORE_NODE) == 0) {
1991 		if (pp.n_getprop(&pp, "implementation#", &impl,
1992 		    sizeof (impl)) != 0) {
1993 			return (drerr_new(0, EOPL_GETPROP, NULL));
1994 		}
1995 	} else {
1996 		return (DRMACH_INTERNAL_ERROR());
1997 	}
1998 
1999 	*ip = impl;
2000 
2001 	return (NULL);
2002 }
2003 
2004 sbd_error_t *
2005 drmach_get_dip(drmachid_t id, dev_info_t **dip)
2006 {
2007 	drmach_device_t	*dp;
2008 
2009 	if (!DRMACH_IS_DEVICE_ID(id))
2010 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2011 	dp = id;
2012 
2013 	*dip = dp->node->n_getdip(dp->node);
2014 	return (NULL);
2015 }
2016 
2017 sbd_error_t *
2018 drmach_io_is_attached(drmachid_t id, int *yes)
2019 {
2020 	drmach_device_t *dp;
2021 	dev_info_t	*dip;
2022 	int		state;
2023 
2024 	if (!DRMACH_IS_IO_ID(id))
2025 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2026 	dp = id;
2027 
2028 	dip = dp->node->n_getdip(dp->node);
2029 	if (dip == NULL) {
2030 		*yes = 0;
2031 		return (NULL);
2032 	}
2033 
2034 	state = ddi_get_devstate(dip);
2035 	*yes = ((i_ddi_node_state(dip) >= DS_ATTACHED) ||
2036 	    (state == DDI_DEVSTATE_UP));
2037 
2038 	return (NULL);
2039 }
2040 
2041 struct drmach_io_cb {
2042 	char	*name;	/* name of the node */
2043 	int	(*func)(dev_info_t *);
2044 	int	rv;
2045 	dev_info_t *dip;
2046 };
2047 
2048 #define	DRMACH_IO_POST_ATTACH	0
2049 #define	DRMACH_IO_PRE_RELEASE	1
2050 
2051 static int
2052 drmach_io_cb_check(dev_info_t *dip, void *arg)
2053 {
2054 	struct drmach_io_cb *p = (struct drmach_io_cb *)arg;
2055 	char name[OBP_MAXDRVNAME];
2056 	int len = OBP_MAXDRVNAME;
2057 
2058 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "name",
2059 	    (caddr_t)name, &len) != DDI_PROP_SUCCESS) {
2060 		return (DDI_WALK_PRUNECHILD);
2061 	}
2062 
2063 	if (strcmp(name, p->name) == 0) {
2064 		ndi_hold_devi(dip);
2065 		p->dip = dip;
2066 		return (DDI_WALK_TERMINATE);
2067 	}
2068 
2069 	return (DDI_WALK_CONTINUE);
2070 }
2071 
2072 
2073 static int
2074 drmach_console_ops(drmachid_t *id, int state)
2075 {
2076 	drmach_io_t *obj = (drmach_io_t *)id;
2077 	struct drmach_io_cb arg;
2078 	int (*msudetp)(dev_info_t *);
2079 	int (*msuattp)(dev_info_t *);
2080 	dev_info_t *dip, *pdip;
2081 
2082 	/* 4 is pcicmu channel */
2083 	if (obj->channel != 4)
2084 		return (0);
2085 
2086 	arg.name = "serial";
2087 	arg.func = NULL;
2088 	if (state == DRMACH_IO_PRE_RELEASE) {
2089 		msudetp = (int (*)(dev_info_t *))
2090 		    modgetsymvalue("oplmsu_dr_detach", 0);
2091 		if (msudetp != NULL)
2092 			arg.func = msudetp;
2093 	} else if (state == DRMACH_IO_POST_ATTACH) {
2094 		msuattp = (int (*)(dev_info_t *))
2095 		    modgetsymvalue("oplmsu_dr_attach", 0);
2096 		if (msuattp != NULL)
2097 			arg.func = msuattp;
2098 	} else {
2099 		return (0);
2100 	}
2101 
2102 	if (arg.func == NULL) {
2103 		return (0);
2104 	}
2105 
2106 	arg.rv = 0;
2107 	arg.dip = NULL;
2108 
2109 	dip = obj->dev.node->n_getdip(obj->dev.node);
2110 	pdip = ddi_get_parent(dip);
2111 	if (pdip == NULL) {
2112 		/* this cannot happen unless something bad happens */
2113 		return (-1);
2114 	}
2115 	ndi_hold_devi(pdip);
2116 	ndi_devi_enter(pdip);
2117 
2118 	ddi_walk_devs(dip, drmach_io_cb_check, (void *)&arg);
2119 
2120 	ndi_devi_exit(pdip);
2121 	ndi_rele_devi(pdip);
2122 
2123 	if (arg.dip) {
2124 		arg.rv = (*arg.func)(arg.dip);
2125 		ndi_rele_devi(arg.dip);
2126 	} else {
2127 		arg.rv = -1;
2128 	}
2129 
2130 	return (arg.rv);
2131 }
2132 
2133 sbd_error_t *
2134 drmach_io_pre_release(drmachid_t id)
2135 {
2136 	int rv;
2137 
2138 	if (!DRMACH_IS_IO_ID(id))
2139 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2140 
2141 	rv = drmach_console_ops(id, DRMACH_IO_PRE_RELEASE);
2142 
2143 	if (rv != 0)
2144 		cmn_err(CE_WARN, "IO callback failed in pre-release\n");
2145 
2146 	return (NULL);
2147 }
2148 
2149 static sbd_error_t *
2150 drmach_io_release(drmachid_t id)
2151 {
2152 	if (!DRMACH_IS_IO_ID(id))
2153 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2154 	return (NULL);
2155 }
2156 
2157 sbd_error_t *
2158 drmach_io_unrelease(drmachid_t id)
2159 {
2160 	if (!DRMACH_IS_IO_ID(id))
2161 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2162 	return (NULL);
2163 }
2164 
2165 /*ARGSUSED*/
2166 sbd_error_t *
2167 drmach_io_post_release(drmachid_t id)
2168 {
2169 	return (NULL);
2170 }
2171 
2172 /*ARGSUSED*/
2173 sbd_error_t *
2174 drmach_io_post_attach(drmachid_t id)
2175 {
2176 	int rv;
2177 
2178 	if (!DRMACH_IS_IO_ID(id))
2179 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2180 
2181 	rv = drmach_console_ops(id, DRMACH_IO_POST_ATTACH);
2182 
2183 	if (rv != 0)
2184 		cmn_err(CE_WARN, "IO callback failed in post-attach\n");
2185 
2186 	return (0);
2187 }
2188 
2189 static sbd_error_t *
2190 drmach_io_status(drmachid_t id, drmach_status_t *stat)
2191 {
2192 	drmach_device_t *dp;
2193 	sbd_error_t	*err;
2194 	int		 configured;
2195 
2196 	ASSERT(DRMACH_IS_IO_ID(id));
2197 	dp = id;
2198 
2199 	err = drmach_io_is_attached(id, &configured);
2200 	if (err)
2201 		return (err);
2202 
2203 	stat->assigned = dp->bp->assigned;
2204 	stat->powered = dp->bp->powered;
2205 	stat->configured = (configured != 0);
2206 	stat->busy = dp->busy;
2207 	(void) strncpy(stat->type, dp->type, sizeof (stat->type));
2208 	stat->info[0] = '\0';
2209 
2210 	return (NULL);
2211 }
2212 
2213 static sbd_error_t *
2214 drmach_mem_new(drmach_device_t *proto, drmachid_t *idp)
2215 {
2216 	dev_info_t *dip;
2217 	int rv;
2218 
2219 	drmach_mem_t	*mp;
2220 
2221 	rv = 0;
2222 
2223 	if ((proto->node->n_getproplen(proto->node, "mc-addr", &rv) < 0) ||
2224 	    (rv <= 0)) {
2225 		*idp = (drmachid_t)0;
2226 		return (NULL);
2227 	}
2228 
2229 	mp = kmem_zalloc(sizeof (drmach_mem_t), KM_SLEEP);
2230 	proto->unum = 0;
2231 
2232 	bcopy(proto, &mp->dev, sizeof (mp->dev));
2233 	mp->dev.node = drmach_node_dup(proto->node);
2234 	mp->dev.cm.isa = (void *)drmach_mem_new;
2235 	mp->dev.cm.dispose = drmach_mem_dispose;
2236 	mp->dev.cm.release = drmach_mem_release;
2237 	mp->dev.cm.status = drmach_mem_status;
2238 
2239 	(void) snprintf(mp->dev.cm.name, sizeof (mp->dev.cm.name), "%s",
2240 	    mp->dev.type);
2241 
2242 	dip = mp->dev.node->n_getdip(mp->dev.node);
2243 	if (drmach_setup_mc_info(dip, mp) != 0) {
2244 		return (drerr_new(1, EOPL_MC_SETUP, NULL));
2245 	}
2246 
2247 	/* make sure we do not create memoryless nodes */
2248 	if (mp->nbytes == 0) {
2249 		*idp = (drmachid_t)NULL;
2250 		kmem_free(mp, sizeof (drmach_mem_t));
2251 	} else
2252 		*idp = (drmachid_t)mp;
2253 
2254 	return (NULL);
2255 }
2256 
2257 static void
2258 drmach_mem_dispose(drmachid_t id)
2259 {
2260 	drmach_mem_t *mp;
2261 
2262 	ASSERT(DRMACH_IS_MEM_ID(id));
2263 
2264 
2265 	mp = id;
2266 
2267 	if (mp->dev.node)
2268 		drmach_node_dispose(mp->dev.node);
2269 
2270 	if (mp->memlist) {
2271 		memlist_delete(mp->memlist);
2272 		mp->memlist = NULL;
2273 	}
2274 
2275 	kmem_free(mp, sizeof (*mp));
2276 }
2277 
2278 sbd_error_t *
2279 drmach_mem_add_span(drmachid_t id, uint64_t basepa, uint64_t size)
2280 {
2281 	pfn_t		basepfn = (pfn_t)(basepa >> PAGESHIFT);
2282 	pgcnt_t		npages = (pgcnt_t)(size >> PAGESHIFT);
2283 	int		rv;
2284 
2285 	ASSERT(size != 0);
2286 
2287 	if (!DRMACH_IS_MEM_ID(id))
2288 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2289 
2290 	rv = kcage_range_add(basepfn, npages, KCAGE_DOWN);
2291 	if (rv == ENOMEM) {
2292 		cmn_err(CE_WARN, "%lu megabytes not available to kernel cage",
2293 		    (ulong_t)(size == 0 ? 0 : size / MBYTE));
2294 	} else if (rv != 0) {
2295 		/* catch this in debug kernels */
2296 		ASSERT(0);
2297 
2298 		cmn_err(CE_WARN, "unexpected kcage_range_add return value %d",
2299 		    rv);
2300 	}
2301 
2302 	if (rv) {
2303 		return (DRMACH_INTERNAL_ERROR());
2304 	}
2305 	else
2306 		return (NULL);
2307 }
2308 
2309 sbd_error_t *
2310 drmach_mem_del_span(drmachid_t id, uint64_t basepa, uint64_t size)
2311 {
2312 	pfn_t		basepfn = (pfn_t)(basepa >> PAGESHIFT);
2313 	pgcnt_t		npages = (pgcnt_t)(size >> PAGESHIFT);
2314 	int		rv;
2315 
2316 	if (!DRMACH_IS_MEM_ID(id))
2317 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2318 
2319 	if (size > 0) {
2320 		rv = kcage_range_delete_post_mem_del(basepfn, npages);
2321 		if (rv != 0) {
2322 			cmn_err(CE_WARN,
2323 			    "unexpected kcage_range_delete_post_mem_del"
2324 			    " return value %d", rv);
2325 			return (DRMACH_INTERNAL_ERROR());
2326 		}
2327 	}
2328 
2329 	return (NULL);
2330 }
2331 
2332 sbd_error_t *
2333 drmach_mem_disable(drmachid_t id)
2334 {
2335 	if (!DRMACH_IS_MEM_ID(id))
2336 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2337 	else {
2338 		drmach_flush_all();
2339 		return (NULL);
2340 	}
2341 }
2342 
2343 sbd_error_t *
2344 drmach_mem_enable(drmachid_t id)
2345 {
2346 	if (!DRMACH_IS_MEM_ID(id))
2347 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2348 	else
2349 		return (NULL);
2350 }
2351 
2352 sbd_error_t *
2353 drmach_mem_get_info(drmachid_t id, drmach_mem_info_t *mem)
2354 {
2355 	drmach_mem_t *mp;
2356 
2357 	if (!DRMACH_IS_MEM_ID(id))
2358 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2359 
2360 	mp = (drmach_mem_t *)id;
2361 
2362 	/*
2363 	 * This is only used by dr to round up/down the memory
2364 	 * for copying. Our unit of memory isolation is 64 MB.
2365 	 */
2366 
2367 	mem->mi_alignment_mask = (64 * 1024 * 1024 - 1);
2368 	mem->mi_basepa = mp->base_pa;
2369 	mem->mi_size = mp->nbytes;
2370 	mem->mi_slice_size = mp->slice_size;
2371 
2372 	return (NULL);
2373 }
2374 
2375 sbd_error_t *
2376 drmach_mem_get_base_physaddr(drmachid_t id, uint64_t *pa)
2377 {
2378 	drmach_mem_t *mp;
2379 
2380 	if (!DRMACH_IS_MEM_ID(id))
2381 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2382 
2383 	mp = (drmach_mem_t *)id;
2384 
2385 	*pa = mp->base_pa;
2386 	return (NULL);
2387 }
2388 
2389 sbd_error_t *
2390 drmach_mem_get_memlist(drmachid_t id, struct memlist **ml)
2391 {
2392 	drmach_mem_t	*mem;
2393 #ifdef	DEBUG
2394 	int		rv;
2395 #endif
2396 	struct memlist	*mlist;
2397 
2398 	if (!DRMACH_IS_MEM_ID(id))
2399 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2400 
2401 	mem = (drmach_mem_t *)id;
2402 	mlist = memlist_dup(mem->memlist);
2403 
2404 #ifdef DEBUG
2405 	/*
2406 	 * Make sure the incoming memlist doesn't already
2407 	 * intersect with what's present in the system (phys_install).
2408 	 */
2409 	memlist_read_lock();
2410 	rv = memlist_intersect(phys_install, mlist);
2411 	memlist_read_unlock();
2412 	if (rv) {
2413 		DRMACH_PR("Derived memlist intersects with phys_install\n");
2414 		memlist_dump(mlist);
2415 
2416 		DRMACH_PR("phys_install memlist:\n");
2417 		memlist_dump(phys_install);
2418 
2419 		memlist_delete(mlist);
2420 		return (DRMACH_INTERNAL_ERROR());
2421 	}
2422 
2423 	DRMACH_PR("Derived memlist:");
2424 	memlist_dump(mlist);
2425 #endif
2426 	*ml = mlist;
2427 
2428 	return (NULL);
2429 }
2430 
2431 sbd_error_t *
2432 drmach_mem_get_slice_size(drmachid_t id, uint64_t *bytes)
2433 {
2434 	drmach_mem_t	*mem;
2435 
2436 	if (!DRMACH_IS_MEM_ID(id))
2437 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2438 
2439 	mem = (drmach_mem_t *)id;
2440 
2441 	*bytes = mem->slice_size;
2442 
2443 	return (NULL);
2444 }
2445 
2446 
2447 /* ARGSUSED */
2448 processorid_t
2449 drmach_mem_cpu_affinity(drmachid_t id)
2450 {
2451 	return (CPU_CURRENT);
2452 }
2453 
2454 static sbd_error_t *
2455 drmach_mem_release(drmachid_t id)
2456 {
2457 	if (!DRMACH_IS_MEM_ID(id))
2458 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2459 	return (NULL);
2460 }
2461 
2462 static sbd_error_t *
2463 drmach_mem_status(drmachid_t id, drmach_status_t *stat)
2464 {
2465 	drmach_mem_t *dp;
2466 	uint64_t	 pa, slice_size;
2467 	struct memlist	*ml;
2468 
2469 	ASSERT(DRMACH_IS_MEM_ID(id));
2470 	dp = id;
2471 
2472 	/* get starting physical address of target memory */
2473 	pa = dp->base_pa;
2474 
2475 	/* round down to slice boundary */
2476 	slice_size = dp->slice_size;
2477 	pa &= ~(slice_size - 1);
2478 
2479 	/* stop at first span that is in slice */
2480 	memlist_read_lock();
2481 	for (ml = phys_install; ml; ml = ml->ml_next)
2482 		if (ml->ml_address >= pa && ml->ml_address < pa + slice_size)
2483 			break;
2484 	memlist_read_unlock();
2485 
2486 	stat->assigned = dp->dev.bp->assigned;
2487 	stat->powered = dp->dev.bp->powered;
2488 	stat->configured = (ml != NULL);
2489 	stat->busy = dp->dev.busy;
2490 	(void) strncpy(stat->type, dp->dev.type, sizeof (stat->type));
2491 	stat->info[0] = '\0';
2492 
2493 	return (NULL);
2494 }
2495 
2496 
2497 sbd_error_t *
2498 drmach_board_deprobe(drmachid_t id)
2499 {
2500 	drmach_board_t	*bp;
2501 
2502 	if (!DRMACH_IS_BOARD_ID(id))
2503 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2504 
2505 	bp = id;
2506 
2507 	cmn_err(CE_CONT, "DR: detach board %d\n", bp->bnum);
2508 
2509 	if (bp->tree) {
2510 		drmach_node_dispose(bp->tree);
2511 		bp->tree = NULL;
2512 	}
2513 	if (bp->devices) {
2514 		drmach_array_dispose(bp->devices, drmach_device_dispose);
2515 		bp->devices = NULL;
2516 	}
2517 
2518 	bp->boot_board = 0;
2519 
2520 	return (NULL);
2521 }
2522 
2523 /*ARGSUSED*/
2524 static sbd_error_t *
2525 drmach_pt_ikprobe(drmachid_t id, drmach_opts_t *opts)
2526 {
2527 	drmach_board_t		*bp = (drmach_board_t *)id;
2528 	sbd_error_t		*err = NULL;
2529 	int	rv;
2530 	unsigned cpu_impl;
2531 
2532 	if (!DRMACH_IS_BOARD_ID(id))
2533 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2534 
2535 	DRMACH_PR("calling opl_probe_board for bnum=%d\n", bp->bnum);
2536 	rv = opl_probe_sb(bp->bnum, &cpu_impl);
2537 	if (rv != 0) {
2538 		err = drerr_new(1, EOPL_PROBE, bp->cm.name);
2539 		return (err);
2540 	}
2541 	return (err);
2542 }
2543 
2544 /*ARGSUSED*/
2545 static sbd_error_t *
2546 drmach_pt_ikdeprobe(drmachid_t id, drmach_opts_t *opts)
2547 {
2548 	drmach_board_t	*bp;
2549 	sbd_error_t	*err = NULL;
2550 	int	rv;
2551 
2552 	if (!DRMACH_IS_BOARD_ID(id))
2553 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2554 	bp = (drmach_board_t *)id;
2555 
2556 	cmn_err(CE_CONT, "DR: in-kernel unprobe board %d\n", bp->bnum);
2557 
2558 	rv = opl_unprobe_sb(bp->bnum);
2559 	if (rv != 0) {
2560 		err = drerr_new(1, EOPL_DEPROBE, bp->cm.name);
2561 	}
2562 
2563 	return (err);
2564 }
2565 
2566 
2567 /*ARGSUSED*/
2568 sbd_error_t *
2569 drmach_pt_readmem(drmachid_t id, drmach_opts_t *opts)
2570 {
2571 	struct memlist	*ml;
2572 	uint64_t	src_pa;
2573 	uint64_t	dst_pa;
2574 	uint64_t	dst;
2575 
2576 	dst_pa = va_to_pa(&dst);
2577 
2578 	memlist_read_lock();
2579 	for (ml = phys_install; ml; ml = ml->ml_next) {
2580 		uint64_t	nbytes;
2581 
2582 		src_pa = ml->ml_address;
2583 		nbytes = ml->ml_size;
2584 
2585 		while (nbytes != 0ull) {
2586 
2587 			/* copy 32 bytes at arc_pa to dst_pa */
2588 			bcopy32_il(src_pa, dst_pa);
2589 
2590 			/* increment by 32 bytes */
2591 			src_pa += (4 * sizeof (uint64_t));
2592 
2593 			/* decrement by 32 bytes */
2594 			nbytes -= (4 * sizeof (uint64_t));
2595 		}
2596 	}
2597 	memlist_read_unlock();
2598 
2599 	return (NULL);
2600 }
2601 
2602 static struct {
2603 	const char	*name;
2604 	sbd_error_t	*(*handler)(drmachid_t id, drmach_opts_t *opts);
2605 } drmach_pt_arr[] = {
2606 	{ "readmem",		drmach_pt_readmem		},
2607 	{ "ikprobe",	drmach_pt_ikprobe	},
2608 	{ "ikdeprobe",	drmach_pt_ikdeprobe	},
2609 
2610 	/* the following line must always be last */
2611 	{ NULL,			NULL				}
2612 };
2613 
2614 /*ARGSUSED*/
2615 sbd_error_t *
2616 drmach_passthru(drmachid_t id, drmach_opts_t *opts)
2617 {
2618 	int		i;
2619 	sbd_error_t	*err;
2620 
2621 	i = 0;
2622 	while (drmach_pt_arr[i].name != NULL) {
2623 		int len = strlen(drmach_pt_arr[i].name);
2624 
2625 		if (strncmp(drmach_pt_arr[i].name, opts->copts, len) == 0)
2626 			break;
2627 
2628 		i += 1;
2629 	}
2630 
2631 	if (drmach_pt_arr[i].name == NULL)
2632 		err = drerr_new(0, EOPL_UNKPTCMD, opts->copts);
2633 	else
2634 		err = (*drmach_pt_arr[i].handler)(id, opts);
2635 
2636 	return (err);
2637 }
2638 
2639 sbd_error_t *
2640 drmach_release(drmachid_t id)
2641 {
2642 	drmach_common_t *cp;
2643 
2644 	if (!DRMACH_IS_DEVICE_ID(id))
2645 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2646 	cp = id;
2647 
2648 	return (cp->release(id));
2649 }
2650 
2651 sbd_error_t *
2652 drmach_status(drmachid_t id, drmach_status_t *stat)
2653 {
2654 	drmach_common_t *cp;
2655 	sbd_error_t	*err;
2656 
2657 	rw_enter(&drmach_boards_rwlock, RW_READER);
2658 
2659 	if (!DRMACH_IS_ID(id)) {
2660 		rw_exit(&drmach_boards_rwlock);
2661 		return (drerr_new(0, EOPL_NOTID, NULL));
2662 	}
2663 	cp = (drmach_common_t *)id;
2664 	err = cp->status(id, stat);
2665 
2666 	rw_exit(&drmach_boards_rwlock);
2667 
2668 	return (err);
2669 }
2670 
2671 static sbd_error_t *
2672 drmach_i_status(drmachid_t id, drmach_status_t *stat)
2673 {
2674 	drmach_common_t *cp;
2675 
2676 	if (!DRMACH_IS_ID(id))
2677 		return (drerr_new(0, EOPL_NOTID, NULL));
2678 	cp = id;
2679 
2680 	return (cp->status(id, stat));
2681 }
2682 
2683 /*ARGSUSED*/
2684 sbd_error_t *
2685 drmach_unconfigure(drmachid_t id, int flags)
2686 {
2687 	drmach_device_t *dp;
2688 	dev_info_t	*rdip, *fdip = NULL;
2689 	char name[OBP_MAXDRVNAME];
2690 	int rv;
2691 
2692 	if (DRMACH_IS_CPU_ID(id))
2693 		return (NULL);
2694 
2695 	if (!DRMACH_IS_DEVICE_ID(id))
2696 		return (drerr_new(0, EOPL_INAPPROP, NULL));
2697 
2698 	dp = id;
2699 
2700 	rdip = dp->node->n_getdip(dp->node);
2701 
2702 	ASSERT(rdip);
2703 
2704 	rv = dp->node->n_getprop(dp->node, "name", name, OBP_MAXDRVNAME);
2705 
2706 	if (rv)
2707 		return (NULL);
2708 
2709 	/*
2710 	 * Note: FORCE flag is no longer necessary under devfs
2711 	 */
2712 
2713 	ASSERT(e_ddi_branch_held(rdip));
2714 	if (e_ddi_branch_unconfigure(rdip, &fdip, 0)) {
2715 		sbd_error_t	*err;
2716 		char		*path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
2717 
2718 		/*
2719 		 * If non-NULL, fdip is returned held and must be released.
2720 		 */
2721 		if (fdip != NULL) {
2722 			(void) ddi_pathname(fdip, path);
2723 			ndi_rele_devi(fdip);
2724 		} else {
2725 			(void) ddi_pathname(rdip, path);
2726 		}
2727 
2728 		err = drerr_new(1, EOPL_DRVFAIL, path);
2729 
2730 		kmem_free(path, MAXPATHLEN);
2731 
2732 		return (err);
2733 	}
2734 
2735 	return (NULL);
2736 }
2737 
2738 
2739 int
2740 drmach_cpu_poweron(struct cpu *cp)
2741 {
2742 	int bnum, cpuid, onb_core_num, strand_id;
2743 	drmach_board_t *bp;
2744 
2745 	DRMACH_PR("drmach_cpu_poweron: starting cpuid %d\n", cp->cpu_id);
2746 
2747 	cpuid = cp->cpu_id;
2748 	bnum = LSB_ID(cpuid);
2749 	onb_core_num = ON_BOARD_CORE_NUM(cpuid);
2750 	strand_id = STRAND_ID(cpuid);
2751 	bp = drmach_get_board_by_bnum(bnum);
2752 
2753 	ASSERT(bp);
2754 	if (bp->cores[onb_core_num].core_hotadded == 0) {
2755 		if (drmach_add_remove_cpu(bnum, onb_core_num,
2756 		    HOTADD_CPU) != 0) {
2757 			cmn_err(CE_WARN, "Failed to add CMP %d on board %d\n",
2758 			    onb_core_num, bnum);
2759 			return (EIO);
2760 		}
2761 	}
2762 
2763 	ASSERT(MUTEX_HELD(&cpu_lock));
2764 
2765 	if (drmach_cpu_start(cp) != 0) {
2766 		if (bp->cores[onb_core_num].core_started == 0) {
2767 			/*
2768 			 * we must undo the hotadd or no one will do that
2769 			 * If this fails, we will do this again in
2770 			 * drmach_board_disconnect.
2771 			 */
2772 			if (drmach_add_remove_cpu(bnum, onb_core_num,
2773 			    HOTREMOVE_CPU) != 0) {
2774 				cmn_err(CE_WARN, "Failed to remove CMP %d "
2775 				    "on board %d\n", onb_core_num, bnum);
2776 			}
2777 		}
2778 		return (EBUSY);
2779 	} else {
2780 		bp->cores[onb_core_num].core_started |= (1 << strand_id);
2781 		return (0);
2782 	}
2783 }
2784 
2785 int
2786 drmach_cpu_poweroff(struct cpu *cp)
2787 {
2788 	int		rv = 0;
2789 	processorid_t	cpuid = cp->cpu_id;
2790 
2791 	DRMACH_PR("drmach_cpu_poweroff: stopping cpuid %d\n", cp->cpu_id);
2792 
2793 	ASSERT(MUTEX_HELD(&cpu_lock));
2794 
2795 	/*
2796 	 * Capture all CPUs (except for detaching proc) to prevent
2797 	 * crosscalls to the detaching proc until it has cleared its
2798 	 * bit in cpu_ready_set.
2799 	 *
2800 	 * The CPU's remain paused and the prom_mutex is known to be free.
2801 	 * This prevents the x-trap victim from blocking when doing prom
2802 	 * IEEE-1275 calls at a high PIL level.
2803 	 */
2804 
2805 	promsafe_pause_cpus();
2806 
2807 	/*
2808 	 * Quiesce interrupts on the target CPU. We do this by setting
2809 	 * the CPU 'not ready'- (i.e. removing the CPU from cpu_ready_set) to
2810 	 * prevent it from receiving cross calls and cross traps.
2811 	 * This prevents the processor from receiving any new soft interrupts.
2812 	 */
2813 	mp_cpu_quiesce(cp);
2814 
2815 	rv = prom_stopcpu_bycpuid(cpuid);
2816 	if (rv == 0)
2817 		cp->cpu_flags = CPU_OFFLINE | CPU_QUIESCED | CPU_POWEROFF;
2818 
2819 	start_cpus();
2820 
2821 	if (rv == 0) {
2822 		int bnum, onb_core_num, strand_id;
2823 		drmach_board_t *bp;
2824 
2825 		CPU_SIGNATURE(OS_SIG, SIGST_DETACHED, SIGSUBST_NULL, cpuid);
2826 
2827 		bnum = LSB_ID(cpuid);
2828 		onb_core_num = ON_BOARD_CORE_NUM(cpuid);
2829 		strand_id = STRAND_ID(cpuid);
2830 		bp = drmach_get_board_by_bnum(bnum);
2831 		ASSERT(bp);
2832 
2833 		bp->cores[onb_core_num].core_started &= ~(1 << strand_id);
2834 		if (bp->cores[onb_core_num].core_started == 0) {
2835 			if (drmach_add_remove_cpu(bnum, onb_core_num,
2836 			    HOTREMOVE_CPU) != 0) {
2837 				cmn_err(CE_WARN, "Failed to remove CMP %d LSB "
2838 				    "%d\n", onb_core_num, bnum);
2839 				return (EIO);
2840 			}
2841 		}
2842 	}
2843 
2844 	return (rv);
2845 }
2846 
2847 /*ARGSUSED*/
2848 int
2849 drmach_verify_sr(dev_info_t *dip, int sflag)
2850 {
2851 	return (0);
2852 }
2853 
2854 void
2855 drmach_suspend_last(void)
2856 {
2857 }
2858 
2859 void
2860 drmach_resume_first(void)
2861 {
2862 }
2863 
2864 /*
2865  * Log a DR sysevent.
2866  * Return value: 0 success, non-zero failure.
2867  */
2868 int
2869 drmach_log_sysevent(int board, char *hint, int flag, int verbose)
2870 {
2871 	sysevent_t			*ev;
2872 	sysevent_id_t			eid;
2873 	int				rv, km_flag;
2874 	sysevent_value_t		evnt_val;
2875 	sysevent_attr_list_t		*evnt_attr_list = NULL;
2876 	char				attach_pnt[MAXNAMELEN];
2877 
2878 	km_flag = (flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
2879 	attach_pnt[0] = '\0';
2880 	if (drmach_board_name(board, attach_pnt, MAXNAMELEN)) {
2881 		rv = -1;
2882 		goto logexit;
2883 	}
2884 	if (verbose) {
2885 		DRMACH_PR("drmach_log_sysevent: %s %s, flag: %d, verbose: %d\n",
2886 		    attach_pnt, hint, flag, verbose);
2887 	}
2888 
2889 	if ((ev = sysevent_alloc(EC_DR, ESC_DR_AP_STATE_CHANGE,
2890 	    SUNW_KERN_PUB"dr", km_flag)) == NULL) {
2891 		rv = -2;
2892 		goto logexit;
2893 	}
2894 	evnt_val.value_type = SE_DATA_TYPE_STRING;
2895 	evnt_val.value.sv_string = attach_pnt;
2896 	if ((rv = sysevent_add_attr(&evnt_attr_list, DR_AP_ID, &evnt_val,
2897 	    km_flag)) != 0)
2898 		goto logexit;
2899 
2900 	evnt_val.value_type = SE_DATA_TYPE_STRING;
2901 	evnt_val.value.sv_string = hint;
2902 	if ((rv = sysevent_add_attr(&evnt_attr_list, DR_HINT, &evnt_val,
2903 	    km_flag)) != 0) {
2904 		sysevent_free_attr(evnt_attr_list);
2905 		goto logexit;
2906 	}
2907 
2908 	(void) sysevent_attach_attributes(ev, evnt_attr_list);
2909 
2910 	/*
2911 	 * Log the event but do not sleep waiting for its
2912 	 * delivery. This provides insulation from syseventd.
2913 	 */
2914 	rv = log_sysevent(ev, SE_NOSLEEP, &eid);
2915 
2916 logexit:
2917 	if (ev)
2918 		sysevent_free(ev);
2919 	if ((rv != 0) && verbose)
2920 		cmn_err(CE_WARN, "drmach_log_sysevent failed (rv %d) for %s "
2921 		    " %s\n", rv, attach_pnt, hint);
2922 
2923 	return (rv);
2924 }
2925 
2926 #define	OPL_DR_STATUS_PROP "dr-status"
2927 
2928 static int
2929 opl_check_dr_status()
2930 {
2931 	pnode_t	node;
2932 	int	rtn, len;
2933 	char	*str;
2934 
2935 	node = prom_rootnode();
2936 	if (node == OBP_BADNODE) {
2937 		return (1);
2938 	}
2939 
2940 	len = prom_getproplen(node, OPL_DR_STATUS_PROP);
2941 	if (len == -1) {
2942 		/*
2943 		 * dr-status doesn't exist when DR is activated and
2944 		 * any warning messages aren't needed.
2945 		 */
2946 		return (1);
2947 	}
2948 
2949 	str = (char *)kmem_zalloc(len+1, KM_SLEEP);
2950 	rtn = prom_getprop(node, OPL_DR_STATUS_PROP, str);
2951 	kmem_free(str, len + 1);
2952 	if (rtn == -1) {
2953 		return (1);
2954 	} else {
2955 		return (0);
2956 	}
2957 }
2958 
2959 /* we are allocating memlist from TLB locked pages to avoid tlbmisses */
2960 
2961 static struct memlist *
2962 drmach_memlist_add_span(drmach_copy_rename_program_t *p,
2963     struct memlist *mlist, uint64_t base, uint64_t len)
2964 {
2965 	struct memlist	*ml, *tl, *nl;
2966 
2967 	if (len == 0ull)
2968 		return (NULL);
2969 
2970 	if (mlist == NULL) {
2971 		mlist = p->free_mlist;
2972 		if (mlist == NULL)
2973 			return (NULL);
2974 		p->free_mlist = mlist->ml_next;
2975 		mlist->ml_address = base;
2976 		mlist->ml_size = len;
2977 		mlist->ml_next = mlist->ml_prev = NULL;
2978 
2979 		return (mlist);
2980 	}
2981 
2982 	for (tl = ml = mlist; ml; tl = ml, ml = ml->ml_next) {
2983 		if (base < ml->ml_address) {
2984 			if ((base + len) < ml->ml_address) {
2985 				nl = p->free_mlist;
2986 				if (nl == NULL)
2987 					return (NULL);
2988 				p->free_mlist = nl->ml_next;
2989 				nl->ml_address = base;
2990 				nl->ml_size = len;
2991 				nl->ml_next = ml;
2992 				if ((nl->ml_prev = ml->ml_prev) != NULL)
2993 					nl->ml_prev->ml_next = nl;
2994 				ml->ml_prev = nl;
2995 				if (mlist == ml)
2996 					mlist = nl;
2997 			} else {
2998 				ml->ml_size = MAX((base + len),
2999 				    (ml->ml_address + ml->ml_size)) - base;
3000 				ml->ml_address = base;
3001 			}
3002 			break;
3003 
3004 		} else if (base <= (ml->ml_address + ml->ml_size)) {
3005 			ml->ml_size =
3006 			    MAX((base + len), (ml->ml_address + ml->ml_size)) -
3007 			    MIN(ml->ml_address, base);
3008 			ml->ml_address = MIN(ml->ml_address, base);
3009 			break;
3010 		}
3011 	}
3012 	if (ml == NULL) {
3013 		nl = p->free_mlist;
3014 		if (nl == NULL)
3015 			return (NULL);
3016 		p->free_mlist = nl->ml_next;
3017 		nl->ml_address = base;
3018 		nl->ml_size = len;
3019 		nl->ml_next = NULL;
3020 		nl->ml_prev = tl;
3021 		tl->ml_next = nl;
3022 	}
3023 
3024 	return (mlist);
3025 }
3026 
3027 /*
3028  * The routine performs the necessary memory COPY and MC adr SWITCH.
3029  * Both operations MUST be at the same "level" so that the stack is
3030  * maintained correctly between the copy and switch.  The switch
3031  * portion implements a caching mechanism to guarantee the code text
3032  * is cached prior to execution.  This is to guard against possible
3033  * memory access while the MC adr's are being modified.
3034  *
3035  * IMPORTANT: The _drmach_copy_rename_end() function must immediately
3036  * follow drmach_copy_rename_prog__relocatable() so that the correct
3037  * "length" of the drmach_copy_rename_prog__relocatable can be
3038  * calculated.  This routine MUST be a LEAF function, i.e. it can
3039  * make NO function calls, primarily for two reasons:
3040  *
3041  *	1. We must keep the stack consistent across the "switch".
3042  *	2. Function calls are compiled to relative offsets, and
3043  *	   we execute this function we'll be executing it from
3044  *	   a copied version in a different area of memory, thus
3045  *	   the relative offsets will be bogus.
3046  *
3047  * Moreover, it must have the "__relocatable" suffix to inform DTrace
3048  * providers (and anything else, for that matter) that this
3049  * function's text is manually relocated elsewhere before it is
3050  * executed.  That is, it cannot be safely instrumented with any
3051  * methodology that is PC-relative.
3052  */
3053 
3054 /*
3055  * We multiply this to system_clock_frequency so we
3056  * are setting a delay of fmem_timeout second for
3057  * the rename command.
3058  *
3059  * FMEM command itself should complete within 15 sec.
3060  * We add 2 more sec to be conservative.
3061  *
3062  * Note that there is also a SCF BUSY bit checking
3063  * in drmach_asm.s right before FMEM command is
3064  * issued.  XSCF sets the SCF BUSY bit when the
3065  * other domain on the same PSB reboots and it
3066  * will not be able to service the FMEM command
3067  * within 15 sec.   After setting the SCF BUSY
3068  * bit, XSCF will wait a while before servicing
3069  * other reboot command so there is no race
3070  * condition.
3071  */
3072 
3073 static int	fmem_timeout = 17;
3074 
3075 /*
3076  *	The empirical data on some OPL system shows that
3077  *	we can copy 250 MB per second.  We set it to
3078  *	80 MB to be conservative.  In normal case,
3079  *	this timeout does not affect anything.
3080  */
3081 
3082 static int	min_copy_size_per_sec = 80 * 1024 * 1024;
3083 
3084 /*
3085  *	This is the timeout value for the xcall synchronization
3086  *	to get all the CPU ready to do the parallel copying.
3087  *	Even on a fully loaded system, 10 sec. should be long
3088  *	enough.
3089  */
3090 
3091 static int	cpu_xcall_delay = 10;
3092 int drmach_disable_mcopy = 0;
3093 
3094 /*
3095  * The following delay loop executes sleep instruction to yield the
3096  * CPU to other strands.  If this is not done, some strand will tie
3097  * up the CPU in busy loops while the other strand cannot do useful
3098  * work.  The copy procedure will take a much longer time without this.
3099  */
3100 #define	DR_DELAY_IL(ms, freq)					\
3101 	{							\
3102 		uint64_t start;					\
3103 		uint64_t nstick;				\
3104 		volatile uint64_t now;				\
3105 		nstick = ((uint64_t)ms * freq)/1000;		\
3106 		start = drmach_get_stick_il();			\
3107 		now = start;					\
3108 		while ((now - start) <= nstick) {		\
3109 			drmach_sleep_il();			\
3110 			now = drmach_get_stick_il();		\
3111 		}						\
3112 	}
3113 
3114 /* Each loop is 2ms, timeout at 1000ms */
3115 static int drmach_copy_rename_timeout = 500;
3116 
3117 static int
3118 drmach_copy_rename_prog__relocatable(drmach_copy_rename_program_t *prog,
3119     int cpuid)
3120 {
3121 	struct memlist		*ml;
3122 	register int		rtn;
3123 	int			i;
3124 	register uint64_t	curr, limit;
3125 	extern uint64_t		drmach_get_stick_il();
3126 	extern void		membar_sync_il();
3127 	extern void		flush_instr_mem_il(void*);
3128 	extern void		flush_windows_il(void);
3129 	uint64_t		copy_start;
3130 
3131 	/*
3132 	 * flush_windows is moved here to make sure all
3133 	 * registers used in the callers are flushed to
3134 	 * memory before the copy.
3135 	 *
3136 	 * If flush_windows() is called too early in the
3137 	 * calling function, the compiler might put some
3138 	 * data in the local registers after flush_windows().
3139 	 * After FMA, if there is any fill trap, the registers
3140 	 * will contain stale data.
3141 	 */
3142 
3143 	flush_windows_il();
3144 
3145 	prog->critical->stat[cpuid] = FMEM_LOOP_COPY_READY;
3146 	membar_sync_il();
3147 
3148 	if (prog->data->cpuid == cpuid) {
3149 		limit = drmach_get_stick_il();
3150 		limit += cpu_xcall_delay * system_clock_freq;
3151 		for (i = 0; i < NCPU; i++) {
3152 			if (CPU_IN_SET(prog->data->cpu_slave_set, i)) {
3153 				/* wait for all CPU's to be ready */
3154 				for (;;) {
3155 					if (prog->critical->stat[i] ==
3156 					    FMEM_LOOP_COPY_READY) {
3157 						break;
3158 					}
3159 					DR_DELAY_IL(1, prog->data->stick_freq);
3160 				}
3161 				curr = drmach_get_stick_il();
3162 				if (curr > limit) {
3163 					prog->data->fmem_status.error =
3164 					    EOPL_FMEM_XC_TIMEOUT;
3165 					return (EOPL_FMEM_XC_TIMEOUT);
3166 				}
3167 			}
3168 		}
3169 		prog->data->fmem_status.stat = FMEM_LOOP_COPY_READY;
3170 		membar_sync_il();
3171 		copy_start = drmach_get_stick_il();
3172 	} else {
3173 		for (;;) {
3174 			if (prog->data->fmem_status.stat ==
3175 			    FMEM_LOOP_COPY_READY) {
3176 				break;
3177 			}
3178 			if (prog->data->fmem_status.error) {
3179 				prog->data->error[cpuid] = EOPL_FMEM_TERMINATE;
3180 				return (EOPL_FMEM_TERMINATE);
3181 			}
3182 			DR_DELAY_IL(1, prog->data->stick_freq);
3183 		}
3184 	}
3185 
3186 	/*
3187 	 * DO COPY.
3188 	 */
3189 	if (CPU_IN_SET(prog->data->cpu_copy_set, cpuid)) {
3190 		for (ml = prog->data->cpu_ml[cpuid]; ml; ml = ml->ml_next) {
3191 			uint64_t	s_pa, t_pa;
3192 			uint64_t	nbytes;
3193 
3194 			s_pa = prog->data->s_copybasepa + ml->ml_address;
3195 			t_pa = prog->data->t_copybasepa + ml->ml_address;
3196 			nbytes = ml->ml_size;
3197 
3198 			while (nbytes != 0ull) {
3199 				/*
3200 				 * If the master has detected error, we just
3201 				 * bail out
3202 				 */
3203 				if (prog->data->fmem_status.error !=
3204 				    ESBD_NOERROR) {
3205 					prog->data->error[cpuid] =
3206 					    EOPL_FMEM_TERMINATE;
3207 					return (EOPL_FMEM_TERMINATE);
3208 				}
3209 				/*
3210 				 * This copy does NOT use an ASI
3211 				 * that avoids the Ecache, therefore
3212 				 * the dst_pa addresses may remain
3213 				 * in our Ecache after the dst_pa
3214 				 * has been removed from the system.
3215 				 * A subsequent write-back to memory
3216 				 * will cause an ARB-stop because the
3217 				 * physical address no longer exists
3218 				 * in the system. Therefore we must
3219 				 * flush out local Ecache after we
3220 				 * finish the copy.
3221 				 */
3222 
3223 				/* copy 32 bytes at src_pa to dst_pa */
3224 				bcopy32_il(s_pa, t_pa);
3225 
3226 				/*
3227 				 * increment the counter to signal that we are
3228 				 * alive
3229 				 */
3230 				prog->stat->nbytes[cpuid] += 32;
3231 
3232 				/* increment by 32 bytes */
3233 				s_pa += (4 * sizeof (uint64_t));
3234 				t_pa += (4 * sizeof (uint64_t));
3235 
3236 				/* decrement by 32 bytes */
3237 				nbytes -= (4 * sizeof (uint64_t));
3238 			}
3239 		}
3240 		prog->critical->stat[cpuid] = FMEM_LOOP_COPY_DONE;
3241 		membar_sync_il();
3242 	}
3243 
3244 	/*
3245 	 * Since bcopy32_il() does NOT use an ASI to bypass
3246 	 * the Ecache, we need to flush our Ecache after
3247 	 * the copy is complete.
3248 	 */
3249 	flush_cache_il();
3250 
3251 	/*
3252 	 * drmach_fmem_exec_script()
3253 	 */
3254 	if (prog->data->cpuid == cpuid) {
3255 		uint64_t	last, now;
3256 
3257 		limit = copy_start + prog->data->copy_delay;
3258 		for (i = 0; i < NCPU; i++) {
3259 			if (!CPU_IN_SET(prog->data->cpu_slave_set, i))
3260 				continue;
3261 
3262 			for (;;) {
3263 				/*
3264 				 * we get FMEM_LOOP_FMEM_READY in
3265 				 * normal case
3266 				 */
3267 				if (prog->critical->stat[i] ==
3268 				    FMEM_LOOP_FMEM_READY) {
3269 					break;
3270 				}
3271 				/* got error traps */
3272 				if (prog->data->error[i] ==
3273 				    EOPL_FMEM_COPY_ERROR) {
3274 					prog->data->fmem_status.error =
3275 					    EOPL_FMEM_COPY_ERROR;
3276 					return (EOPL_FMEM_COPY_ERROR);
3277 				}
3278 				/*
3279 				 * if we have not reached limit, wait
3280 				 * more
3281 				 */
3282 				curr = drmach_get_stick_il();
3283 				if (curr <= limit)
3284 					continue;
3285 
3286 				prog->data->slowest_cpuid = i;
3287 				prog->data->copy_wait_time = curr - copy_start;
3288 
3289 				/* now check if slave is alive */
3290 				last = prog->stat->nbytes[i];
3291 
3292 				DR_DELAY_IL(1, prog->data->stick_freq);
3293 
3294 				now = prog->stat->nbytes[i];
3295 				if (now <= last) {
3296 					/*
3297 					 * no progress, perhaps just
3298 					 * finished
3299 					 */
3300 					DR_DELAY_IL(1, prog->data->stick_freq);
3301 					if (prog->critical->stat[i] ==
3302 					    FMEM_LOOP_FMEM_READY)
3303 						break;
3304 					/* copy error */
3305 					if (prog->data->error[i] ==
3306 					    EOPL_FMEM_COPY_ERROR) {
3307 						prog->data-> fmem_status.error =
3308 						    EOPL_FMEM_COPY_ERROR;
3309 						return (EOPL_FMEM_COPY_ERROR);
3310 					}
3311 
3312 					prog->data->copy_rename_count++;
3313 					if (prog->data->copy_rename_count
3314 					    < drmach_copy_rename_timeout) {
3315 						continue;
3316 					} else {
3317 						prog->data->fmem_status.error =
3318 						    EOPL_FMEM_COPY_TIMEOUT;
3319 						return (EOPL_FMEM_COPY_TIMEOUT);
3320 					}
3321 				}
3322 			}
3323 		}
3324 
3325 		prog->critical->stat[cpuid] = FMEM_LOOP_FMEM_READY;
3326 		prog->data->fmem_status.stat  = FMEM_LOOP_FMEM_READY;
3327 
3328 		membar_sync_il();
3329 		flush_instr_mem_il((void*) (prog->critical));
3330 		/*
3331 		 * drmach_fmem_exec_script()
3332 		 */
3333 		rtn = prog->critical->fmem((void *)prog->critical, PAGESIZE);
3334 		return (rtn);
3335 	} else {
3336 		flush_instr_mem_il((void*) (prog->critical));
3337 		/*
3338 		 * drmach_fmem_loop_script()
3339 		 */
3340 		rtn = prog->critical->loop((void *)(prog->critical), PAGESIZE,
3341 		    (void *)&(prog->critical->stat[cpuid]));
3342 		prog->data->error[cpuid] = rtn;
3343 		/* slave thread does not care the rv */
3344 		return (0);
3345 	}
3346 }
3347 
3348 static void
3349 drmach_copy_rename_end(void)
3350 {
3351 	/*
3352 	 * IMPORTANT:	This function's location MUST be located immediately
3353 	 *		following drmach_copy_rename_prog__relocatable to
3354 	 *		accurately estimate its size.  Note that this assumes
3355 	 *		the compiler keeps these functions in the order in
3356 	 *		which they appear :-o
3357 	 */
3358 }
3359 
3360 
3361 static int
3362 drmach_setup_memlist(drmach_copy_rename_program_t *p)
3363 {
3364 	struct memlist *ml;
3365 	caddr_t buf;
3366 	int nbytes, s, n_elements;
3367 
3368 	nbytes = PAGESIZE;
3369 	n_elements = 0;
3370 	s = roundup(sizeof (struct memlist), sizeof (void *));
3371 	p->free_mlist = NULL;
3372 	buf = p->memlist_buffer;
3373 	while (nbytes >= sizeof (struct memlist)) {
3374 		ml = (struct memlist *)buf;
3375 		ml->ml_next = p->free_mlist;
3376 		p->free_mlist = ml;
3377 		buf += s;
3378 		n_elements++;
3379 		nbytes -= s;
3380 	}
3381 	return (n_elements);
3382 }
3383 
3384 static void
3385 drmach_lock_critical(caddr_t va, caddr_t new_va)
3386 {
3387 	tte_t tte;
3388 	int i;
3389 
3390 	kpreempt_disable();
3391 
3392 	for (i = 0; i < DRMACH_FMEM_LOCKED_PAGES; i++) {
3393 		vtag_flushpage(new_va, (uint64_t)ksfmmup);
3394 		sfmmu_memtte(&tte, va_to_pfn(va), PROC_DATA|HAT_NOSYNC, TTE8K);
3395 		tte.tte_intlo |= TTE_LCK_INT;
3396 		sfmmu_dtlb_ld_kva(new_va, &tte);
3397 		sfmmu_itlb_ld_kva(new_va, &tte);
3398 		va += PAGESIZE;
3399 		new_va += PAGESIZE;
3400 	}
3401 }
3402 
3403 static void
3404 drmach_unlock_critical(caddr_t va)
3405 {
3406 	int i;
3407 
3408 	for (i = 0; i < DRMACH_FMEM_LOCKED_PAGES; i++) {
3409 		vtag_flushpage(va, (uint64_t)ksfmmup);
3410 		va += PAGESIZE;
3411 	}
3412 
3413 	kpreempt_enable();
3414 }
3415 
3416 sbd_error_t *
3417 drmach_copy_rename_init(drmachid_t t_id, drmachid_t s_id,
3418     struct memlist *c_ml, drmachid_t *pgm_id)
3419 {
3420 	drmach_mem_t	*s_mem;
3421 	drmach_mem_t	*t_mem;
3422 	struct memlist	*x_ml;
3423 	uint64_t	s_copybasepa, t_copybasepa;
3424 	uint_t		len;
3425 	caddr_t		bp, wp;
3426 	int		s_bd, t_bd, cpuid, active_cpus, i;
3427 	int		max_elms, mlist_size, rv;
3428 	uint64_t	c_addr;
3429 	size_t		c_size, copy_sz, sz;
3430 	extern void	drmach_fmem_loop_script();
3431 	extern void	drmach_fmem_loop_script_rtn();
3432 	extern int	drmach_fmem_exec_script();
3433 	extern void	drmach_fmem_exec_script_end();
3434 	sbd_error_t	*err;
3435 	drmach_copy_rename_program_t *prog = NULL;
3436 	drmach_copy_rename_program_t *prog_kmem = NULL;
3437 	void		(*mc_suspend)(void);
3438 	void		(*mc_resume)(void);
3439 	int		(*scf_fmem_start)(int, int);
3440 	int		(*scf_fmem_end)(void);
3441 	int		(*scf_fmem_cancel)(void);
3442 	uint64_t	(*scf_get_base_addr)(void);
3443 
3444 	if (!DRMACH_IS_MEM_ID(s_id))
3445 		return (drerr_new(0, EOPL_INAPPROP, NULL));
3446 	if (!DRMACH_IS_MEM_ID(t_id))
3447 		return (drerr_new(0, EOPL_INAPPROP, NULL));
3448 
3449 	for (i = 0; i < NCPU; i++) {
3450 		int lsb_id, onb_core_num, strand_id;
3451 		drmach_board_t *bp;
3452 
3453 		/*
3454 		 * this kind of CPU will spin in cache
3455 		 */
3456 		if (CPU_IN_SET(cpu_ready_set, i))
3457 			continue;
3458 
3459 		/*
3460 		 * Now check for any inactive CPU's that
3461 		 * have been hotadded.  This can only occur in
3462 		 * error condition in drmach_cpu_poweron().
3463 		 */
3464 		lsb_id = LSB_ID(i);
3465 		onb_core_num = ON_BOARD_CORE_NUM(i);
3466 		strand_id = STRAND_ID(i);
3467 		bp = drmach_get_board_by_bnum(lsb_id);
3468 		if (bp == NULL)
3469 			continue;
3470 		if (bp->cores[onb_core_num].core_hotadded &
3471 		    (1 << strand_id)) {
3472 			if (!(bp->cores[onb_core_num].core_started &
3473 			    (1 << strand_id))) {
3474 				return (drerr_new(1, EOPL_CPU_STATE, NULL));
3475 			}
3476 		}
3477 	}
3478 
3479 	mc_suspend = (void (*)(void))
3480 	    modgetsymvalue("opl_mc_suspend", 0);
3481 	mc_resume = (void (*)(void))
3482 	    modgetsymvalue("opl_mc_resume", 0);
3483 
3484 	if (mc_suspend == NULL || mc_resume == NULL) {
3485 		return (drerr_new(1, EOPL_MC_OPL, NULL));
3486 	}
3487 
3488 	scf_fmem_start = (int (*)(int, int))
3489 	    modgetsymvalue("scf_fmem_start", 0);
3490 	if (scf_fmem_start == NULL) {
3491 		return (drerr_new(1, EOPL_SCF_FMEM, NULL));
3492 	}
3493 	scf_fmem_end = (int (*)(void))
3494 	    modgetsymvalue("scf_fmem_end", 0);
3495 	if (scf_fmem_end == NULL) {
3496 		return (drerr_new(1, EOPL_SCF_FMEM, NULL));
3497 	}
3498 	scf_fmem_cancel = (int (*)(void))
3499 	    modgetsymvalue("scf_fmem_cancel", 0);
3500 	if (scf_fmem_cancel == NULL) {
3501 		return (drerr_new(1, EOPL_SCF_FMEM, NULL));
3502 	}
3503 	scf_get_base_addr = (uint64_t (*)(void))
3504 	    modgetsymvalue("scf_get_base_addr", 0);
3505 	if (scf_get_base_addr == NULL) {
3506 		return (drerr_new(1, EOPL_SCF_FMEM, NULL));
3507 	}
3508 	s_mem = s_id;
3509 	t_mem = t_id;
3510 
3511 	s_bd = s_mem->dev.bp->bnum;
3512 	t_bd = t_mem->dev.bp->bnum;
3513 
3514 	/* calculate source and target base pa */
3515 
3516 	s_copybasepa = s_mem->slice_base;
3517 	t_copybasepa = t_mem->slice_base;
3518 
3519 	/* adjust copy memlist addresses to be relative to copy base pa */
3520 	x_ml = c_ml;
3521 	mlist_size = 0;
3522 	while (x_ml != NULL) {
3523 		x_ml->ml_address -= s_copybasepa;
3524 		x_ml = x_ml->ml_next;
3525 		mlist_size++;
3526 	}
3527 
3528 	/*
3529 	 * bp will be page aligned, since we're calling
3530 	 * kmem_zalloc() with an exact multiple of PAGESIZE.
3531 	 */
3532 
3533 	prog_kmem = (drmach_copy_rename_program_t *)kmem_zalloc(
3534 	    DRMACH_FMEM_LOCKED_PAGES * PAGESIZE, KM_SLEEP);
3535 
3536 	prog_kmem->prog = prog_kmem;
3537 
3538 	/*
3539 	 * To avoid MTLB hit, we allocate a new VM space and remap
3540 	 * the kmem_alloc buffer to that address.  This solves
3541 	 * 2 problems we found:
3542 	 * - the kmem_alloc buffer can be just a chunk inside
3543 	 *   a much larger, e.g. 4MB buffer and MTLB will occur
3544 	 *   if there are both a 4MB and a 8K TLB mapping to
3545 	 *   the same VA range.
3546 	 * - the kmem mapping got dropped into the TLB by other
3547 	 *   strands, unintentionally.
3548 	 * Note that the pointers like data, critical, memlist_buffer,
3549 	 * and stat inside the copy rename structure are mapped to this
3550 	 * alternate VM space so we must make sure we lock the TLB mapping
3551 	 * whenever we access data pointed to by these pointers.
3552 	 */
3553 
3554 	prog = prog_kmem->locked_prog = vmem_alloc(heap_arena,
3555 	    DRMACH_FMEM_LOCKED_PAGES * PAGESIZE, VM_SLEEP);
3556 	wp = bp = (caddr_t)prog;
3557 
3558 	/* Now remap prog_kmem to prog */
3559 	drmach_lock_critical((caddr_t)prog_kmem, (caddr_t)prog);
3560 
3561 	/* All pointers in prog are based on the alternate mapping */
3562 	prog->data = (drmach_copy_rename_data_t *)roundup(((uint64_t)prog +
3563 	    sizeof (drmach_copy_rename_program_t)), sizeof (void *));
3564 
3565 	ASSERT(((uint64_t)prog->data + sizeof (drmach_copy_rename_data_t))
3566 	    <= ((uint64_t)prog + PAGESIZE));
3567 
3568 	prog->critical = (drmach_copy_rename_critical_t *)
3569 	    (wp + DRMACH_FMEM_CRITICAL_PAGE * PAGESIZE);
3570 
3571 	prog->memlist_buffer = (caddr_t)(wp + DRMACH_FMEM_MLIST_PAGE *
3572 	    PAGESIZE);
3573 
3574 	prog->stat = (drmach_cr_stat_t *)(wp + DRMACH_FMEM_STAT_PAGE *
3575 	    PAGESIZE);
3576 
3577 	/* LINTED */
3578 	ASSERT(sizeof (drmach_cr_stat_t) <= ((DRMACH_FMEM_LOCKED_PAGES -
3579 	    DRMACH_FMEM_STAT_PAGE) * PAGESIZE));
3580 
3581 	prog->critical->scf_reg_base = (uint64_t)-1;
3582 	prog->critical->scf_td[0] = (s_bd & 0xff);
3583 	prog->critical->scf_td[1] = (t_bd & 0xff);
3584 	for (i = 2; i < 15; i++) {
3585 		prog->critical->scf_td[i]   = 0;
3586 	}
3587 	prog->critical->scf_td[15] = ((0xaa + s_bd + t_bd) & 0xff);
3588 
3589 	bp = (caddr_t)prog->critical;
3590 	len = sizeof (drmach_copy_rename_critical_t);
3591 	wp = (caddr_t)roundup((uint64_t)bp + len, sizeof (void *));
3592 
3593 	len = (uint_t)((ulong_t)drmach_copy_rename_end -
3594 	    (ulong_t)drmach_copy_rename_prog__relocatable);
3595 
3596 	/*
3597 	 * We always leave 1K nop's to prevent the processor from
3598 	 * speculative execution that causes memory access
3599 	 */
3600 	wp = wp + len + 1024;
3601 
3602 	len = (uint_t)((ulong_t)drmach_fmem_exec_script_end -
3603 	    (ulong_t)drmach_fmem_exec_script);
3604 	/* this is the entry point of the loop script */
3605 	wp = wp + len + 1024;
3606 
3607 	len = (uint_t)((ulong_t)drmach_fmem_exec_script -
3608 	    (ulong_t)drmach_fmem_loop_script);
3609 	wp = wp + len + 1024;
3610 
3611 	/* now we make sure there is 1K extra */
3612 
3613 	if ((wp - bp) > PAGESIZE) {
3614 		err = drerr_new(1, EOPL_FMEM_SETUP, NULL);
3615 		goto out;
3616 	}
3617 
3618 	bp = (caddr_t)prog->critical;
3619 	len = sizeof (drmach_copy_rename_critical_t);
3620 	wp = (caddr_t)roundup((uint64_t)bp + len, sizeof (void *));
3621 
3622 	prog->critical->run = (int (*)())(wp);
3623 	len = (uint_t)((ulong_t)drmach_copy_rename_end -
3624 	    (ulong_t)drmach_copy_rename_prog__relocatable);
3625 
3626 	bcopy((caddr_t)drmach_copy_rename_prog__relocatable, wp, len);
3627 
3628 	wp = (caddr_t)roundup((uint64_t)wp + len, 1024);
3629 
3630 	prog->critical->fmem = (int (*)())(wp);
3631 	len = (int)((ulong_t)drmach_fmem_exec_script_end -
3632 	    (ulong_t)drmach_fmem_exec_script);
3633 	bcopy((caddr_t)drmach_fmem_exec_script, wp, len);
3634 
3635 	len = (int)((ulong_t)drmach_fmem_exec_script_end -
3636 	    (ulong_t)drmach_fmem_exec_script);
3637 	wp = (caddr_t)roundup((uint64_t)wp + len, 1024);
3638 
3639 	prog->critical->loop = (int (*)())(wp);
3640 	len = (int)((ulong_t)drmach_fmem_exec_script -
3641 	    (ulong_t)drmach_fmem_loop_script);
3642 	bcopy((caddr_t)drmach_fmem_loop_script, (void *)wp, len);
3643 	len = (int)((ulong_t)drmach_fmem_loop_script_rtn-
3644 	    (ulong_t)drmach_fmem_loop_script);
3645 	prog->critical->loop_rtn = (void (*)()) (wp+len);
3646 
3647 	prog->data->fmem_status.error = ESBD_NOERROR;
3648 
3649 	/* now we are committed, call SCF, soft suspend mac patrol */
3650 	if ((*scf_fmem_start)(s_bd, t_bd)) {
3651 		err = drerr_new(1, EOPL_SCF_FMEM_START, NULL);
3652 		goto out;
3653 	}
3654 	prog->data->scf_fmem_end = scf_fmem_end;
3655 	prog->data->scf_fmem_cancel = scf_fmem_cancel;
3656 	prog->data->scf_get_base_addr = scf_get_base_addr;
3657 	prog->data->fmem_status.op |= OPL_FMEM_SCF_START;
3658 
3659 	/* soft suspend mac patrol */
3660 	(*mc_suspend)();
3661 	prog->data->fmem_status.op |= OPL_FMEM_MC_SUSPEND;
3662 	prog->data->mc_resume = mc_resume;
3663 
3664 	prog->critical->inst_loop_ret  =
3665 	    *(uint64_t *)(prog->critical->loop_rtn);
3666 
3667 	/*
3668 	 * 0x30800000 is op code "ba,a	+0"
3669 	 */
3670 
3671 	*(uint_t *)(prog->critical->loop_rtn) = (uint_t)(0x30800000);
3672 
3673 	/*
3674 	 * set the value of SCF FMEM TIMEOUT
3675 	 */
3676 	prog->critical->delay = fmem_timeout * system_clock_freq;
3677 
3678 	prog->data->s_mem = (drmachid_t)s_mem;
3679 	prog->data->t_mem = (drmachid_t)t_mem;
3680 
3681 	cpuid = CPU->cpu_id;
3682 	prog->data->cpuid = cpuid;
3683 	prog->data->cpu_ready_set = cpu_ready_set;
3684 	prog->data->cpu_slave_set = cpu_ready_set;
3685 	prog->data->slowest_cpuid = (processorid_t)-1;
3686 	prog->data->copy_wait_time = 0;
3687 	prog->data->copy_rename_count = 0;
3688 	CPUSET_DEL(prog->data->cpu_slave_set, cpuid);
3689 
3690 	for (i = 0; i < NCPU; i++) {
3691 		prog->data->cpu_ml[i] = NULL;
3692 	}
3693 
3694 	/*
3695 	 * max_elms -	max number of memlist structures that
3696 	 *		may be allocated for the CPU memory list.
3697 	 *		If there are too many memory span (because
3698 	 *		of fragmentation) than number of memlist
3699 	 *		available, we should return error.
3700 	 */
3701 	max_elms = drmach_setup_memlist(prog);
3702 	if (max_elms < mlist_size) {
3703 		err = drerr_new(1, EOPL_FMEM_SETUP, NULL);
3704 		goto err_out;
3705 	}
3706 
3707 	active_cpus = 0;
3708 	if (drmach_disable_mcopy) {
3709 		active_cpus = 1;
3710 		CPUSET_ADD(prog->data->cpu_copy_set, cpuid);
3711 	} else {
3712 		int max_cpu_num;
3713 		/*
3714 		 * The parallel copy procedure is going to split some
3715 		 * of the elements of the original memory copy list.
3716 		 * The number of added elements can be up to
3717 		 * (max_cpu_num - 1).  It means that max_cpu_num
3718 		 * should satisfy the following condition:
3719 		 * (max_cpu_num - 1) + mlist_size <= max_elms.
3720 		 */
3721 		max_cpu_num = max_elms - mlist_size + 1;
3722 
3723 		for (i = 0; i < NCPU; i++) {
3724 			if (CPU_IN_SET(cpu_ready_set, i) &&
3725 			    CPU_ACTIVE(cpu[i])) {
3726 				/*
3727 				 * To reduce the level-2 cache contention only
3728 				 * one strand per core will participate
3729 				 * in the copy. If the strand with even cpu_id
3730 				 * number is present in the ready set, we will
3731 				 * include this strand in the copy set. If it
3732 				 * is not present in the ready set, we check for
3733 				 * the strand with the consecutive odd cpu_id
3734 				 * and include it, provided that it is
3735 				 * present in the ready set.
3736 				 */
3737 				if (!(i & 0x1) ||
3738 				    !CPU_IN_SET(prog->data->cpu_copy_set,
3739 				    i - 1)) {
3740 					CPUSET_ADD(prog->data->cpu_copy_set, i);
3741 					active_cpus++;
3742 					/*
3743 					 * We cannot have more than
3744 					 * max_cpu_num CPUs in the copy
3745 					 * set, because each CPU has to
3746 					 * have at least one element
3747 					 * long memory copy list.
3748 					 */
3749 					if (active_cpus >= max_cpu_num)
3750 						break;
3751 
3752 				}
3753 			}
3754 		}
3755 	}
3756 
3757 	x_ml = c_ml;
3758 	sz = 0;
3759 	while (x_ml != NULL) {
3760 		sz += x_ml->ml_size;
3761 		x_ml = x_ml->ml_next;
3762 	}
3763 
3764 	copy_sz = sz/active_cpus;
3765 	copy_sz = roundup(copy_sz, MMU_PAGESIZE4M);
3766 
3767 	while (sz > copy_sz*active_cpus) {
3768 		copy_sz += MMU_PAGESIZE4M;
3769 	}
3770 
3771 	prog->data->stick_freq = system_clock_freq;
3772 	prog->data->copy_delay = ((copy_sz / min_copy_size_per_sec) + 2) *
3773 	    system_clock_freq;
3774 
3775 	x_ml = c_ml;
3776 	c_addr = x_ml->ml_address;
3777 	c_size = x_ml->ml_size;
3778 
3779 	for (i = 0; i < NCPU; i++) {
3780 		prog->stat->nbytes[i] = 0;
3781 		if (!CPU_IN_SET(prog->data->cpu_copy_set, i)) {
3782 			continue;
3783 		}
3784 		sz = copy_sz;
3785 
3786 		while (sz) {
3787 			if (c_size > sz) {
3788 				if ((prog->data->cpu_ml[i] =
3789 				    drmach_memlist_add_span(prog,
3790 				    prog->data->cpu_ml[i],
3791 				    c_addr, sz)) == NULL) {
3792 					cmn_err(CE_WARN,
3793 					    "Unexpected drmach_memlist_add_span"
3794 					    " failure.");
3795 					err = drerr_new(1, EOPL_FMEM_SETUP,
3796 					    NULL);
3797 					mc_resume();
3798 					goto out;
3799 				}
3800 				c_addr += sz;
3801 				c_size -= sz;
3802 				break;
3803 			} else {
3804 				sz -= c_size;
3805 				if ((prog->data->cpu_ml[i] =
3806 				    drmach_memlist_add_span(prog,
3807 				    prog->data->cpu_ml[i],
3808 				    c_addr, c_size)) == NULL) {
3809 					cmn_err(CE_WARN,
3810 					    "Unexpected drmach_memlist_add_span"
3811 					    " failure.");
3812 					err = drerr_new(1, EOPL_FMEM_SETUP,
3813 					    NULL);
3814 					mc_resume();
3815 					goto out;
3816 				}
3817 
3818 				x_ml = x_ml->ml_next;
3819 				if (x_ml != NULL) {
3820 					c_addr = x_ml->ml_address;
3821 					c_size = x_ml->ml_size;
3822 				} else {
3823 					goto end;
3824 				}
3825 			}
3826 		}
3827 	}
3828 end:
3829 	prog->data->s_copybasepa = s_copybasepa;
3830 	prog->data->t_copybasepa = t_copybasepa;
3831 	prog->data->c_ml = c_ml;
3832 	*pgm_id = prog_kmem;
3833 
3834 	/* Unmap the alternate space.  It will have to be remapped again */
3835 	drmach_unlock_critical((caddr_t)prog);
3836 	return (NULL);
3837 
3838 err_out:
3839 	mc_resume();
3840 	rv = (*prog->data->scf_fmem_cancel)();
3841 	if (rv) {
3842 		cmn_err(CE_WARN, "scf_fmem_cancel() failed rv=0x%x", rv);
3843 	}
3844 out:
3845 	if (prog != NULL) {
3846 		drmach_unlock_critical((caddr_t)prog);
3847 		vmem_free(heap_arena, prog, DRMACH_FMEM_LOCKED_PAGES *
3848 		    PAGESIZE);
3849 	}
3850 	if (prog_kmem != NULL) {
3851 		kmem_free(prog_kmem, DRMACH_FMEM_LOCKED_PAGES * PAGESIZE);
3852 	}
3853 	return (err);
3854 }
3855 
3856 sbd_error_t *
3857 drmach_copy_rename_fini(drmachid_t id)
3858 {
3859 	drmach_copy_rename_program_t	*prog = id;
3860 	sbd_error_t			*err = NULL;
3861 	int				rv;
3862 	uint_t				fmem_error;
3863 
3864 	/*
3865 	 * Note that we have to delay calling SCF to find out the
3866 	 * status of the FMEM operation here because SCF cannot
3867 	 * respond while it is suspended.
3868 	 * This create a small window when we are sure about the
3869 	 * base address of the system board.
3870 	 * If there is any call to mc-opl to get memory unum,
3871 	 * mc-opl will return UNKNOWN as the unum.
3872 	 */
3873 
3874 	/*
3875 	 * we have to remap again because all the pointer like data,
3876 	 * critical in prog are based on the alternate vmem space.
3877 	 */
3878 	(void) drmach_lock_critical((caddr_t)prog, (caddr_t)prog->locked_prog);
3879 
3880 	if (prog->data->c_ml != NULL)
3881 		memlist_delete(prog->data->c_ml);
3882 
3883 	if ((prog->data->fmem_status.op &
3884 	    (OPL_FMEM_SCF_START | OPL_FMEM_MC_SUSPEND)) !=
3885 	    (OPL_FMEM_SCF_START | OPL_FMEM_MC_SUSPEND)) {
3886 		cmn_err(CE_PANIC, "drmach_copy_rename_fini: invalid op "
3887 		    "code %x\n", prog->data->fmem_status.op);
3888 	}
3889 
3890 	fmem_error = prog->data->fmem_status.error;
3891 	if (fmem_error != ESBD_NOERROR) {
3892 		err = drerr_new(1, fmem_error, NULL);
3893 	}
3894 
3895 	/* possible ops are SCF_START, MC_SUSPEND */
3896 	if (prog->critical->fmem_issued) {
3897 		if (fmem_error != ESBD_NOERROR) {
3898 			cmn_err(CE_PANIC, "Irrecoverable FMEM error %d\n",
3899 			    fmem_error);
3900 		}
3901 		rv = (*prog->data->scf_fmem_end)();
3902 		if (rv) {
3903 			cmn_err(CE_PANIC, "scf_fmem_end() failed rv=%d", rv);
3904 		}
3905 		/*
3906 		 * If we get here, rename is successful.
3907 		 * Do all the copy rename post processing.
3908 		 */
3909 		drmach_swap_pa((drmach_mem_t *)prog->data->s_mem,
3910 		    (drmach_mem_t *)prog->data->t_mem);
3911 	} else {
3912 		rv = (*prog->data->scf_fmem_cancel)();
3913 		if (rv) {
3914 			cmn_err(CE_WARN, "scf_fmem_cancel() failed rv=0x%x",
3915 			    rv);
3916 			if (!err) {
3917 				err = drerr_new(1, EOPL_SCF_FMEM_CANCEL,
3918 				    "scf_fmem_cancel() failed. rv = 0x%x", rv);
3919 			}
3920 		}
3921 	}
3922 	/* soft resume mac patrol */
3923 	(*prog->data->mc_resume)();
3924 
3925 	drmach_unlock_critical((caddr_t)prog->locked_prog);
3926 
3927 	vmem_free(heap_arena, prog->locked_prog,
3928 	    DRMACH_FMEM_LOCKED_PAGES * PAGESIZE);
3929 	kmem_free(prog, DRMACH_FMEM_LOCKED_PAGES * PAGESIZE);
3930 	return (err);
3931 }
3932 
3933 /*ARGSUSED*/
3934 static void
3935 drmach_copy_rename_slave(struct regs *rp, drmachid_t id)
3936 {
3937 	drmach_copy_rename_program_t	*prog =
3938 	    (drmach_copy_rename_program_t *)id;
3939 	register int			cpuid;
3940 	extern void			drmach_flush();
3941 	extern void			membar_sync_il();
3942 	extern void			drmach_flush_icache();
3943 	on_trap_data_t			otd;
3944 
3945 	cpuid = CPU->cpu_id;
3946 
3947 	if (on_trap(&otd, OT_DATA_EC)) {
3948 		no_trap();
3949 		prog->data->error[cpuid] = EOPL_FMEM_COPY_ERROR;
3950 		prog->critical->stat[cpuid] = FMEM_LOOP_EXIT;
3951 		drmach_flush_icache();
3952 		membar_sync_il();
3953 		return;
3954 	}
3955 
3956 
3957 	/*
3958 	 * jmp drmach_copy_rename_prog().
3959 	 */
3960 
3961 	drmach_flush(prog->critical, PAGESIZE);
3962 	(void) prog->critical->run(prog, cpuid);
3963 	drmach_flush_icache();
3964 
3965 	no_trap();
3966 
3967 	prog->critical->stat[cpuid] = FMEM_LOOP_EXIT;
3968 
3969 	membar_sync_il();
3970 }
3971 
3972 static void
3973 drmach_swap_pa(drmach_mem_t *s_mem, drmach_mem_t *t_mem)
3974 {
3975 	uint64_t s_base, t_base;
3976 	drmach_board_t *s_board, *t_board;
3977 	struct memlist *ml;
3978 
3979 	s_board = s_mem->dev.bp;
3980 	t_board = t_mem->dev.bp;
3981 	if (s_board == NULL || t_board == NULL) {
3982 		cmn_err(CE_PANIC, "Cannot locate source or target board\n");
3983 		return;
3984 	}
3985 	s_base = s_mem->slice_base;
3986 	t_base = t_mem->slice_base;
3987 
3988 	s_mem->slice_base = t_base;
3989 	s_mem->base_pa = (s_mem->base_pa - s_base) + t_base;
3990 
3991 	for (ml = s_mem->memlist; ml; ml = ml->ml_next) {
3992 		ml->ml_address = ml->ml_address - s_base + t_base;
3993 	}
3994 
3995 	t_mem->slice_base = s_base;
3996 	t_mem->base_pa = (t_mem->base_pa - t_base) + s_base;
3997 
3998 	for (ml = t_mem->memlist; ml; ml = ml->ml_next) {
3999 		ml->ml_address = ml->ml_address - t_base + s_base;
4000 	}
4001 
4002 	/*
4003 	 * IKP has to update the sb-mem-ranges for mac patrol driver
4004 	 * when it resumes, it will re-read the sb-mem-range property
4005 	 * to get the new base address
4006 	 */
4007 	if (oplcfg_pa_swap(s_board->bnum, t_board->bnum) != 0)
4008 		cmn_err(CE_PANIC, "Could not update device nodes\n");
4009 }
4010 
4011 void
4012 drmach_copy_rename(drmachid_t id)
4013 {
4014 	drmach_copy_rename_program_t	*prog_kmem = id;
4015 	drmach_copy_rename_program_t	*prog;
4016 	cpuset_t	cpuset;
4017 	int		cpuid;
4018 	uint64_t	inst;
4019 	register int	rtn;
4020 	extern int	in_sync;
4021 	int		old_in_sync;
4022 	extern void	drmach_sys_trap();
4023 	extern void	drmach_flush();
4024 	extern void	drmach_flush_icache();
4025 	extern uint64_t	patch_inst(uint64_t *, uint64_t);
4026 	on_trap_data_t	otd;
4027 
4028 
4029 	prog = prog_kmem->locked_prog;
4030 
4031 
4032 	/*
4033 	 * We must immediately drop in the TLB because all pointers
4034 	 * are based on the alternate vmem space.
4035 	 */
4036 
4037 	(void) drmach_lock_critical((caddr_t)prog_kmem, (caddr_t)prog);
4038 
4039 	/*
4040 	 * we call scf to get the base address here becuase if scf
4041 	 * has not been suspended yet, the active path can be changing and
4042 	 * sometimes it is not even mapped.  We call the interface when
4043 	 * the OS has been quiesced.
4044 	 */
4045 	prog->critical->scf_reg_base = (*prog->data->scf_get_base_addr)();
4046 
4047 	if (prog->critical->scf_reg_base == (uint64_t)-1 ||
4048 	    prog->critical->scf_reg_base == 0) {
4049 		prog->data->fmem_status.error = EOPL_FMEM_SCF_ERR;
4050 		drmach_unlock_critical((caddr_t)prog);
4051 		return;
4052 	}
4053 
4054 	cpuset = prog->data->cpu_ready_set;
4055 
4056 	for (cpuid = 0; cpuid < NCPU; cpuid++) {
4057 		if (CPU_IN_SET(cpuset, cpuid)) {
4058 			prog->critical->stat[cpuid] = FMEM_LOOP_START;
4059 			prog->data->error[cpuid] = ESBD_NOERROR;
4060 		}
4061 	}
4062 
4063 	old_in_sync = in_sync;
4064 	in_sync = 1;
4065 	cpuid = CPU->cpu_id;
4066 
4067 	CPUSET_DEL(cpuset, cpuid);
4068 
4069 	for (cpuid = 0; cpuid < NCPU; cpuid++) {
4070 		if (CPU_IN_SET(cpuset, cpuid)) {
4071 			xc_one(cpuid, (xcfunc_t *)drmach_lock_critical,
4072 			    (uint64_t)prog_kmem, (uint64_t)prog);
4073 		}
4074 	}
4075 
4076 	cpuid = CPU->cpu_id;
4077 
4078 	xt_some(cpuset, (xcfunc_t *)drmach_sys_trap,
4079 	    (uint64_t)drmach_copy_rename_slave, (uint64_t)prog);
4080 	xt_sync(cpuset);
4081 
4082 	if (on_trap(&otd, OT_DATA_EC)) {
4083 		rtn = EOPL_FMEM_COPY_ERROR;
4084 		drmach_flush_icache();
4085 		goto done;
4086 	}
4087 
4088 	/*
4089 	 * jmp drmach_copy_rename_prog().
4090 	 */
4091 
4092 	drmach_flush(prog->critical, PAGESIZE);
4093 	rtn = prog->critical->run(prog, cpuid);
4094 
4095 	drmach_flush_icache();
4096 
4097 
4098 done:
4099 	no_trap();
4100 	if (rtn == EOPL_FMEM_HW_ERROR) {
4101 		kpreempt_enable();
4102 		prom_panic("URGENT_ERROR_TRAP is detected during FMEM.\n");
4103 	}
4104 
4105 	/*
4106 	 * In normal case, all slave CPU's are still spinning in
4107 	 * the assembly code.  The master has to patch the instruction
4108 	 * to get them out.
4109 	 * In error case, e.g. COPY_ERROR, some slave CPU's might
4110 	 * have aborted and already returned and sset LOOP_EXIT status.
4111 	 * Some CPU might still be copying.
4112 	 * In any case, some delay is necessary to give them
4113 	 * enough time to set the LOOP_EXIT status.
4114 	 */
4115 
4116 	for (;;) {
4117 		inst = patch_inst((uint64_t *)prog->critical->loop_rtn,
4118 		    prog->critical->inst_loop_ret);
4119 		if (prog->critical->inst_loop_ret == inst) {
4120 			break;
4121 		}
4122 	}
4123 
4124 	for (cpuid = 0; cpuid < NCPU; cpuid++) {
4125 		uint64_t	last, now;
4126 		if (!CPU_IN_SET(cpuset, cpuid)) {
4127 			continue;
4128 		}
4129 		last = prog->stat->nbytes[cpuid];
4130 		/*
4131 		 * Wait for all CPU to exit.
4132 		 * However we do not want an infinite loop
4133 		 * so we detect hangup situation here.
4134 		 * If the slave CPU is still copying data,
4135 		 * we will continue to wait.
4136 		 * In error cases, the master has already set
4137 		 * fmem_status.error to abort the copying.
4138 		 * 1 m.s delay for them to abort copying and
4139 		 * return to drmach_copy_rename_slave to set
4140 		 * FMEM_LOOP_EXIT status should be enough.
4141 		 */
4142 		for (;;) {
4143 			if (prog->critical->stat[cpuid] == FMEM_LOOP_EXIT)
4144 				break;
4145 			drmach_sleep_il();
4146 			drv_usecwait(1000);
4147 			now = prog->stat->nbytes[cpuid];
4148 			if (now <= last) {
4149 				drv_usecwait(1000);
4150 				if (prog->critical->stat[cpuid] ==
4151 				    FMEM_LOOP_EXIT)
4152 					break;
4153 				cmn_err(CE_PANIC, "CPU %d hang during Copy "
4154 				    "Rename", cpuid);
4155 			}
4156 			last = now;
4157 		}
4158 		if (prog->data->error[cpuid] == EOPL_FMEM_HW_ERROR) {
4159 			prom_panic("URGENT_ERROR_TRAP is detected during "
4160 			    "FMEM.\n");
4161 		}
4162 	}
4163 
4164 	/*
4165 	 * This must be done after all strands have exit.
4166 	 * Removing the TLB entry will affect both strands
4167 	 * in the same core.
4168 	 */
4169 
4170 	for (cpuid = 0; cpuid < NCPU; cpuid++) {
4171 		if (CPU_IN_SET(cpuset, cpuid)) {
4172 			xc_one(cpuid, (xcfunc_t *)drmach_unlock_critical,
4173 			    (uint64_t)prog, 0);
4174 		}
4175 	}
4176 
4177 	in_sync = old_in_sync;
4178 
4179 	/*
4180 	 * we should unlock before the following lock to keep the kpreempt
4181 	 * count correct.
4182 	 */
4183 	(void) drmach_unlock_critical((caddr_t)prog);
4184 
4185 	/*
4186 	 * we must remap again.  TLB might have been removed in above xcall.
4187 	 */
4188 
4189 	(void) drmach_lock_critical((caddr_t)prog_kmem, (caddr_t)prog);
4190 
4191 	if (prog->data->fmem_status.error == ESBD_NOERROR)
4192 		prog->data->fmem_status.error = rtn;
4193 
4194 	if (prog->data->copy_wait_time > 0) {
4195 		DRMACH_PR("Unexpected long wait time %ld seconds "
4196 		    "during copy rename on CPU %d\n",
4197 		    prog->data->copy_wait_time/prog->data->stick_freq,
4198 		    prog->data->slowest_cpuid);
4199 	}
4200 	drmach_unlock_critical((caddr_t)prog);
4201 }
4202