xref: /titanic_50/usr/src/uts/sun4u/opl/io/dr_mem.c (revision 44cd46cadd9aab751dae6a4023c1cb5bf316d274)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * DR memory support routines.
30  */
31 
32 #include <sys/note.h>
33 #include <sys/debug.h>
34 #include <sys/types.h>
35 #include <sys/errno.h>
36 #include <sys/param.h>
37 #include <sys/dditypes.h>
38 #include <sys/kmem.h>
39 #include <sys/conf.h>
40 #include <sys/ddi.h>
41 #include <sys/sunddi.h>
42 #include <sys/sunndi.h>
43 #include <sys/ddi_impldefs.h>
44 #include <sys/ndi_impldefs.h>
45 #include <sys/sysmacros.h>
46 #include <sys/machsystm.h>
47 #include <sys/spitregs.h>
48 #include <sys/cpuvar.h>
49 #include <sys/promif.h>
50 #include <vm/seg_kmem.h>
51 #include <sys/lgrp.h>
52 #include <sys/platform_module.h>
53 
54 #include <vm/page.h>
55 
56 #include <sys/dr.h>
57 #include <sys/dr_util.h>
58 #include <sys/drmach.h>
59 
60 extern struct memlist	*phys_install;
61 extern vnode_t		retired_pages;
62 
63 /* TODO: push this reference below drmach line */
64 extern int		kcage_on;
65 
66 /* for the DR*INTERNAL_ERROR macros.  see sys/dr.h. */
67 static char *dr_ie_fmt = "%M% %d";
68 
69 typedef enum {
70 	DR_TP_INVALID = -1,
71 	DR_TP_SAME,
72 	DR_TP_LARGE,
73 	DR_TP_NONRELOC,
74 	DR_TP_FLOATING
75 } dr_target_pref_t;
76 
77 static int		dr_post_detach_mem_unit(dr_mem_unit_t *mp);
78 static int		dr_reserve_mem_spans(memhandle_t *mhp,
79 				struct memlist *mlist);
80 static int		dr_select_mem_target(dr_handle_t *hp,
81 				dr_mem_unit_t *mp, struct memlist *ml);
82 static void		dr_init_mem_unit_data(dr_mem_unit_t *mp);
83 static struct memlist	*dr_memlist_del_retired_pages(struct memlist *ml);
84 static dr_target_pref_t	dr_get_target_preference(dr_handle_t *hp,
85 				dr_mem_unit_t *t_mp, dr_mem_unit_t *s_mp,
86 				struct memlist *s_ml, struct memlist *x_ml,
87 				struct memlist *b_ml);
88 
89 static int		memlist_canfit(struct memlist *s_mlist,
90 				struct memlist *t_mlist);
91 static int		dr_del_mlist_query(struct memlist *mlist,
92 				memquery_t *mp);
93 static struct memlist	*dr_get_copy_mlist(struct memlist *s_ml,
94 				struct memlist *t_ml, dr_mem_unit_t *s_mp,
95 				dr_mem_unit_t *t_mp);
96 static struct memlist	*dr_get_nonreloc_mlist(struct memlist *s_ml,
97 				dr_mem_unit_t *s_mp);
98 static int		dr_memlist_canfit(struct memlist *s_mlist,
99 				struct memlist *t_mlist, dr_mem_unit_t *s_mp,
100 				dr_mem_unit_t *t_mp);
101 
102 extern void		page_unretire_pages(void);
103 
104 /*
105  * dr_mem_unit_t.sbm_flags
106  */
107 #define	DR_MFLAG_RESERVED	0x01	/* mem unit reserved for delete */
108 #define	DR_MFLAG_SOURCE		0x02	/* source brd of copy/rename op */
109 #define	DR_MFLAG_TARGET		0x04	/* target brd of copy/rename op */
110 #define	DR_MFLAG_RELOWNER	0x20	/* memory release (delete) owner */
111 #define	DR_MFLAG_RELDONE	0x40	/* memory release (delete) done */
112 
113 /* helper macros */
114 #define	_ptob64(p) ((uint64_t)(p) << PAGESHIFT)
115 #define	_b64top(b) ((pgcnt_t)((b) >> PAGESHIFT))
116 
117 static struct memlist *
118 dr_get_memlist(dr_mem_unit_t *mp)
119 {
120 	struct memlist	*mlist = NULL;
121 	sbd_error_t	*err;
122 	static fn_t	f = "dr_get_memlist";
123 
124 	PR_MEM("%s for %s...\n", f, mp->sbm_cm.sbdev_path);
125 
126 	/*
127 	 * Return cached memlist, if present.
128 	 * This memlist will be present following an
129 	 * unconfigure (a.k.a: detach) of this memunit.
130 	 * It should only be used in the case were a configure
131 	 * is bringing this memunit back in without going
132 	 * through the disconnect and connect states.
133 	 */
134 	if (mp->sbm_mlist) {
135 		PR_MEM("%s: found cached memlist\n", f);
136 
137 		mlist = memlist_dup(mp->sbm_mlist);
138 	} else {
139 		uint64_t basepa = _ptob64(mp->sbm_basepfn);
140 
141 		/* attempt to construct a memlist using phys_install */
142 
143 		/* round down to slice base address */
144 		basepa &= ~(mp->sbm_slice_size - 1);
145 
146 		/* get a copy of phys_install to edit */
147 		memlist_read_lock();
148 		mlist = memlist_dup(phys_install);
149 		memlist_read_unlock();
150 
151 		/* trim lower irrelevant span */
152 		if (mlist)
153 			mlist = memlist_del_span(mlist, 0ull, basepa);
154 
155 		/* trim upper irrelevant span */
156 		if (mlist) {
157 			uint64_t endpa;
158 
159 			basepa += mp->sbm_slice_size;
160 			endpa = _ptob64(physmax + 1);
161 			if (endpa > basepa)
162 				mlist = memlist_del_span(
163 					mlist, basepa,
164 					endpa - basepa);
165 		}
166 
167 		if (mlist) {
168 			/* successfully built a memlist */
169 			PR_MEM("%s: derived memlist from phys_install\n", f);
170 		}
171 
172 		/* if no mlist yet, try platform layer */
173 		if (!mlist) {
174 			err = drmach_mem_get_memlist(
175 				mp->sbm_cm.sbdev_id, &mlist);
176 			if (err) {
177 				DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
178 				mlist = NULL; /* paranoia */
179 			}
180 		}
181 	}
182 
183 	PR_MEM("%s: memlist for %s\n", f, mp->sbm_cm.sbdev_path);
184 	PR_MEMLIST_DUMP(mlist);
185 
186 	return (mlist);
187 }
188 
189 typedef struct {
190 	kcondvar_t cond;
191 	kmutex_t lock;
192 	int error;
193 	int done;
194 } dr_release_mem_sync_t;
195 
196 /*
197  * Memory has been logically removed by the time this routine is called.
198  */
199 static void
200 dr_mem_del_done(void *arg, int error)
201 {
202 	dr_release_mem_sync_t *ds = arg;
203 
204 	mutex_enter(&ds->lock);
205 	ds->error = error;
206 	ds->done = 1;
207 	cv_signal(&ds->cond);
208 	mutex_exit(&ds->lock);
209 }
210 
211 /*
212  * When we reach here the memory being drained should have
213  * already been reserved in dr_pre_release_mem().
214  * Our only task here is to kick off the "drain" and wait
215  * for it to finish.
216  */
217 void
218 dr_release_mem(dr_common_unit_t *cp)
219 {
220 	dr_mem_unit_t	*mp = (dr_mem_unit_t *)cp;
221 	int		err;
222 	dr_release_mem_sync_t rms;
223 	static fn_t	f = "dr_release_mem";
224 
225 	/* check that this memory unit has been reserved */
226 	if (!(mp->sbm_flags & DR_MFLAG_RELOWNER)) {
227 		DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
228 		return;
229 	}
230 
231 	bzero((void *) &rms, sizeof (rms));
232 
233 	mutex_init(&rms.lock, NULL, MUTEX_DRIVER, NULL);
234 	cv_init(&rms.cond, NULL, CV_DRIVER, NULL);
235 
236 	mutex_enter(&rms.lock);
237 	err = kphysm_del_start(mp->sbm_memhandle,
238 		dr_mem_del_done, (void *) &rms);
239 	if (err == KPHYSM_OK) {
240 		/* wait for completion or interrupt */
241 		while (!rms.done) {
242 			if (cv_wait_sig(&rms.cond, &rms.lock) == 0) {
243 				/* then there is a pending UNIX signal */
244 				(void) kphysm_del_cancel(mp->sbm_memhandle);
245 
246 				/* wait for completion */
247 				while (!rms.done)
248 					cv_wait(&rms.cond, &rms.lock);
249 			}
250 		}
251 		/* get the result of the memory delete operation */
252 		err = rms.error;
253 	}
254 	mutex_exit(&rms.lock);
255 
256 	cv_destroy(&rms.cond);
257 	mutex_destroy(&rms.lock);
258 
259 	if (err != KPHYSM_OK) {
260 		int e_code;
261 
262 		switch (err) {
263 			case KPHYSM_ENOWORK:
264 				e_code = ESBD_NOERROR;
265 				break;
266 
267 			case KPHYSM_EHANDLE:
268 			case KPHYSM_ESEQUENCE:
269 				e_code = ESBD_INTERNAL;
270 				break;
271 
272 			case KPHYSM_ENOTVIABLE:
273 				e_code = ESBD_MEM_NOTVIABLE;
274 				break;
275 
276 			case KPHYSM_EREFUSED:
277 				e_code = ESBD_MEM_REFUSED;
278 				break;
279 
280 			case KPHYSM_ENONRELOC:
281 				e_code = ESBD_MEM_NONRELOC;
282 				break;
283 
284 			case KPHYSM_ECANCELLED:
285 				e_code = ESBD_MEM_CANCELLED;
286 				break;
287 
288 			case KPHYSM_ERESOURCE:
289 				e_code = ESBD_MEMFAIL;
290 				break;
291 
292 			default:
293 				cmn_err(CE_WARN,
294 					"%s: unexpected kphysm error code %d,"
295 					" id 0x%p",
296 					f, err, mp->sbm_cm.sbdev_id);
297 
298 				e_code = ESBD_IO;
299 				break;
300 		}
301 
302 		if (e_code != ESBD_NOERROR) {
303 			dr_dev_err(CE_IGNORE, &mp->sbm_cm, e_code);
304 		}
305 	}
306 }
307 
308 void
309 dr_attach_mem(dr_handle_t *hp, dr_common_unit_t *cp)
310 {
311 	_NOTE(ARGUNUSED(hp))
312 
313 	dr_mem_unit_t	*mp = (dr_mem_unit_t *)cp;
314 	struct memlist	*ml, *mc;
315 	sbd_error_t	*err;
316 	static fn_t	f = "dr_attach_mem";
317 
318 	PR_MEM("%s...\n", f);
319 
320 	dr_lock_status(hp->h_bd);
321 	err = drmach_configure(cp->sbdev_id, 0);
322 	dr_unlock_status(hp->h_bd);
323 	if (err) {
324 		DRERR_SET_C(&cp->sbdev_error, &err);
325 		return;
326 	}
327 
328 	ml = dr_get_memlist(mp);
329 	for (mc = ml; mc; mc = mc->next) {
330 		int		 rv;
331 		sbd_error_t	*err;
332 
333 		rv = kphysm_add_memory_dynamic(
334 			(pfn_t)(mc->address >> PAGESHIFT),
335 			(pgcnt_t)(mc->size >> PAGESHIFT));
336 		if (rv != KPHYSM_OK) {
337 			/*
338 			 * translate kphysm error and
339 			 * store in devlist error
340 			 */
341 			switch (rv) {
342 			case KPHYSM_ERESOURCE:
343 				rv = ESBD_NOMEM;
344 				break;
345 
346 			case KPHYSM_EFAULT:
347 				rv = ESBD_FAULT;
348 				break;
349 
350 			default:
351 				rv = ESBD_INTERNAL;
352 				break;
353 			}
354 
355 			if (rv == ESBD_INTERNAL) {
356 				DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
357 			} else
358 				dr_dev_err(CE_WARN, &mp->sbm_cm, rv);
359 			break;
360 		}
361 
362 		err = drmach_mem_add_span(
363 			mp->sbm_cm.sbdev_id, mc->address, mc->size);
364 		if (err) {
365 			DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
366 			break;
367 		}
368 	}
369 
370 	memlist_delete(ml);
371 
372 	/* back out if configure failed */
373 	if (mp->sbm_cm.sbdev_error != NULL) {
374 		dr_lock_status(hp->h_bd);
375 		err = drmach_unconfigure(cp->sbdev_id, 0);
376 		if (err)
377 			sbd_err_clear(&err);
378 		dr_unlock_status(hp->h_bd);
379 	}
380 }
381 
382 static struct memlist *
383 dr_memlist_del_retired_pages(struct memlist *mlist)
384 {
385 	page_t		*pp;
386 	pfn_t		pfn;
387 	kmutex_t	*vphm;
388 	vnode_t		*vp = &retired_pages;
389 	static fn_t	f = "dr_memlist_del_retired_pages";
390 
391 	vphm = page_vnode_mutex(vp);
392 	mutex_enter(vphm);
393 
394 	PR_MEM("%s\n", f);
395 
396 	if ((pp = vp->v_pages) == NULL) {
397 		mutex_exit(vphm);
398 		return (mlist);
399 	}
400 
401 	do {
402 		ASSERT(pp != NULL);
403 		/*
404 		 * page_downgrade happens after page_hashin, so we
405 		 * can't assert PAGE_SE. Just assert locked to catch
406 		 * changes to the retired vnode locking scheme.
407 		 */
408 		ASSERT(PAGE_LOCKED(pp));
409 		ASSERT(pp->p_vnode == &retired_pages);
410 
411 		if (!page_trylock(pp, SE_SHARED))
412 			continue;
413 
414 		pfn = page_pptonum(pp);
415 
416 		ASSERT((pp->p_offset >> PAGESHIFT) == pfn);
417 		/*
418 		 * Page retirement currently breaks large pages into PAGESIZE
419 		 * pages. If this changes, need to remove the assert and deal
420 		 * with different page sizes.
421 		 */
422 		ASSERT(pp->p_szc == 0);
423 
424 		if (address_in_memlist(mlist, ptob(pfn), PAGESIZE)) {
425 			mlist = memlist_del_span(mlist, ptob(pfn), PAGESIZE);
426 			PR_MEM("deleted retired page 0x%lx (pfn 0x%lx) "
427 			    "from memlist\n", ptob(pfn), pfn);
428 		}
429 
430 		page_unlock(pp);
431 	} while ((pp = pp->p_vpnext) != vp->v_pages);
432 
433 	mutex_exit(vphm);
434 
435 	return (mlist);
436 }
437 
438 #ifdef	DEBUG
439 int dbg_retirecnt = 10;
440 
441 static void
442 dbg_page_retire(struct memlist *r_ml)
443 {
444 	struct memlist	*t_ml;
445 	page_t		*pp, *epp;
446 	pfn_t		pfn, epfn;
447 	struct memseg	*seg;
448 
449 	int dbg_retired = 0;
450 	int dbg_skip = 10;
451 	int dbg_seq = 1;
452 
453 	if (r_ml == NULL)
454 		return;
455 
456 	for (t_ml = r_ml; (t_ml != NULL); t_ml = t_ml->next) {
457 		pfn = _b64top(t_ml->address);
458 		epfn = _b64top(t_ml->address + t_ml->size);
459 
460 		for (seg = memsegs; seg != NULL; seg = seg->next) {
461 			int retire = 0;
462 			int skip = 0;
463 			if (pfn >= seg->pages_end || epfn < seg->pages_base)
464 				continue;
465 
466 			pp = seg->pages;
467 			if (pfn > seg->pages_base)
468 				pp += pfn - seg->pages_base;
469 
470 			epp = seg->epages;
471 			if (epfn < seg->pages_end)
472 				epp -= seg->pages_end - epfn;
473 
474 			ASSERT(pp < epp);
475 #if 0
476 			while (pp < epp) {
477 				if (PP_ISFREE(pp) && !page_isfaulty(pp)) {
478 					if (retire++ < dbg_seq) {
479 						page_settoxic(pp,
480 							PAGE_IS_FAULTY);
481 						page_retire(pp,
482 							PAGE_IS_FAILING);
483 						if (++dbg_retired >=
484 							dbg_retirecnt)
485 							return;
486 					} else if (skip++ >= dbg_skip) {
487 						skip = 0;
488 						retire = 0;
489 						dbg_seq++;
490 					}
491 				}
492 				pp++;
493 			}
494 #endif /* 0 */
495 			while (pp < epp) {
496 				if (PP_ISFREE(pp)) {
497 					if (retire++ < dbg_seq) {
498 						page_retire(t_ml->address,
499 						    PR_OK);
500 						if (++dbg_retired >=
501 							dbg_retirecnt)
502 							return;
503 					} else if (skip++ >= dbg_skip) {
504 						skip = 0;
505 						retire = 0;
506 						dbg_seq++;
507 					}
508 				}
509 				pp++;
510 			}
511 		}
512 	}
513 }
514 #endif
515 
516 static int
517 dr_move_memory(dr_handle_t *hp, dr_mem_unit_t *s_mp, dr_mem_unit_t *t_mp)
518 {
519 	int		rv = -1;
520 	time_t		 copytime;
521 	drmachid_t	 cr_id;
522 	dr_sr_handle_t	*srhp = NULL;
523 	dr_board_t	*t_bp, *s_bp;
524 	struct memlist	*c_ml, *d_ml;
525 	sbd_error_t	*err;
526 	static fn_t	 f = "dr_move_memory";
527 
528 	PR_MEM("%s: (INLINE) moving memory from %s to %s\n",
529 		f,
530 		s_mp->sbm_cm.sbdev_path,
531 		t_mp->sbm_cm.sbdev_path);
532 
533 	ASSERT(s_mp->sbm_flags & DR_MFLAG_SOURCE);
534 	ASSERT(s_mp->sbm_peer == t_mp);
535 	ASSERT(s_mp->sbm_mlist);
536 
537 	ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET);
538 	ASSERT(t_mp->sbm_peer == s_mp);
539 
540 #ifdef	DEBUG
541 	if (dbg_retirecnt)
542 		dbg_page_retire(s_mp->sbm_mlist);
543 #endif
544 
545 	/*
546 	 * create a memlist of spans to copy by removing
547 	 * the spans that have been deleted, if any, from
548 	 * the full source board memlist.  s_mp->sbm_del_mlist
549 	 * will be NULL if there were no spans deleted from
550 	 * the source board.
551 	 */
552 	c_ml = memlist_dup(s_mp->sbm_mlist);
553 	d_ml = s_mp->sbm_del_mlist;
554 	while (d_ml != NULL) {
555 		c_ml = memlist_del_span(c_ml, d_ml->address, d_ml->size);
556 		d_ml = d_ml->next;
557 	}
558 
559 	/*
560 	 * Remove retired pages from the copy list. The page content
561 	 * need not be copied since the pages are no longer in use.
562 	 */
563 	PR_MEM("%s: copy list before removing retired pages (if any):\n", f);
564 	PR_MEMLIST_DUMP(c_ml);
565 
566 	c_ml = dr_memlist_del_retired_pages(c_ml);
567 
568 	PR_MEM("%s: copy list after removing retired pages:\n", f);
569 	PR_MEMLIST_DUMP(c_ml);
570 
571 	/*
572 	 * With parallel copy, it shouldn't make a difference which
573 	 * CPU is the actual master during copy-rename since all
574 	 * CPUs participate in the parallel copy anyway.
575 	 */
576 	affinity_set(CPU_CURRENT);
577 
578 	err = drmach_copy_rename_init(
579 		t_mp->sbm_cm.sbdev_id, s_mp->sbm_cm.sbdev_id, c_ml, &cr_id);
580 	if (err) {
581 		DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
582 		affinity_clear();
583 		memlist_delete(c_ml);
584 		return (-1);
585 	}
586 
587 	srhp = dr_get_sr_handle(hp);
588 	ASSERT(srhp);
589 
590 	copytime = lbolt;
591 
592 	/* Quiesce the OS.  */
593 	if (dr_suspend(srhp)) {
594 		cmn_err(CE_WARN, "%s: failed to quiesce OS"
595 			" for copy-rename", f);
596 
597 		err = drmach_copy_rename_fini(cr_id);
598 		if (err) {
599 			/*
600 			 * no error is expected since the program has
601 			 * not yet run.
602 			 */
603 
604 			/* catch this in debug kernels */
605 			ASSERT(0);
606 
607 			sbd_err_clear(&err);
608 		}
609 
610 		/* suspend error reached via hp */
611 		s_mp->sbm_cm.sbdev_error = hp->h_err;
612 		hp->h_err = NULL;
613 		goto done;
614 	}
615 
616 	drmach_copy_rename(cr_id);
617 
618 	/* Resume the OS.  */
619 	dr_resume(srhp);
620 
621 	copytime = lbolt - copytime;
622 
623 	if (err = drmach_copy_rename_fini(cr_id))
624 		goto done;
625 
626 	/*
627 	 * Rename memory for lgroup.
628 	 * Source and target board numbers are packaged in arg.
629 	 */
630 	s_bp = s_mp->sbm_cm.sbdev_bp;
631 	t_bp = t_mp->sbm_cm.sbdev_bp;
632 
633 	lgrp_plat_config(LGRP_CONFIG_MEM_RENAME,
634 		(uintptr_t)(s_bp->b_num | (t_bp->b_num << 16)));
635 
636 
637 	PR_MEM("%s: copy-rename elapsed time = %ld ticks (%ld secs)\n",
638 		f, copytime, copytime / hz);
639 
640 	rv = 0;
641 done:
642 	if (srhp)
643 		dr_release_sr_handle(srhp);
644 	if (err)
645 		DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
646 	affinity_clear();
647 
648 	return (rv);
649 }
650 
651 /*
652  * If detaching node contains memory that is "non-permanent"
653  * then the memory adr's are simply cleared.  If the memory
654  * is non-relocatable, then do a copy-rename.
655  */
656 void
657 dr_detach_mem(dr_handle_t *hp, dr_common_unit_t *cp)
658 {
659 	int			rv = 0;
660 	dr_mem_unit_t		*s_mp = (dr_mem_unit_t *)cp;
661 	dr_mem_unit_t		*t_mp;
662 	dr_state_t		state;
663 	static fn_t		f = "dr_detach_mem";
664 
665 	PR_MEM("%s...\n", f);
666 
667 	/* lookup target mem unit and target board structure, if any */
668 	if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
669 		t_mp = s_mp->sbm_peer;
670 		ASSERT(t_mp != NULL);
671 		ASSERT(t_mp->sbm_peer == s_mp);
672 	} else {
673 		t_mp = NULL;
674 	}
675 
676 	/* verify mem unit's state is UNREFERENCED */
677 	state = s_mp->sbm_cm.sbdev_state;
678 	if (state != DR_STATE_UNREFERENCED) {
679 		dr_dev_err(CE_IGNORE, &s_mp->sbm_cm, ESBD_STATE);
680 		return;
681 	}
682 
683 	/* verify target mem unit's state is UNREFERENCED, if any */
684 	if (t_mp != NULL) {
685 		state = t_mp->sbm_cm.sbdev_state;
686 		if (state != DR_STATE_UNREFERENCED) {
687 			dr_dev_err(CE_IGNORE, &t_mp->sbm_cm, ESBD_STATE);
688 			return;
689 		}
690 	}
691 
692 	/*
693 	 * If there is no target board (no copy/rename was needed), then
694 	 * we're done!
695 	 */
696 	if (t_mp == NULL) {
697 		sbd_error_t *err;
698 		/*
699 		 * Reprogram interconnect hardware and disable
700 		 * memory controllers for memory node that's going away.
701 		 */
702 
703 		err = drmach_mem_disable(s_mp->sbm_cm.sbdev_id);
704 		if (err) {
705 			DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
706 			rv = -1;
707 		}
708 	} else {
709 		rv = dr_move_memory(hp, s_mp, t_mp);
710 		PR_MEM("%s: %s memory COPY-RENAME (board %d -> %d)\n",
711 			f,
712 			rv ? "FAILED" : "COMPLETED",
713 			s_mp->sbm_cm.sbdev_bp->b_num,
714 			t_mp->sbm_cm.sbdev_bp->b_num);
715 
716 		if (rv != 0)
717 			(void) dr_cancel_mem(s_mp);
718 	}
719 
720 	if (rv == 0) {
721 		sbd_error_t *err;
722 
723 		dr_lock_status(hp->h_bd);
724 		err = drmach_unconfigure(s_mp->sbm_cm.sbdev_id, 0);
725 		dr_unlock_status(hp->h_bd);
726 		if (err)
727 			sbd_err_clear(&err);
728 	}
729 }
730 
731 /*
732  * This routine acts as a wrapper for kphysm_del_span_query in order to
733  * support potential memory holes in a board's physical address space.
734  * It calls kphysm_del_span_query for each node in a memlist and accumulates
735  * the results in *mp.
736  */
737 static int
738 dr_del_mlist_query(struct memlist *mlist, memquery_t *mp)
739 {
740 	struct memlist	*ml;
741 	int		 rv = 0;
742 
743 
744 	if (mlist == NULL)
745 		cmn_err(CE_WARN, "dr_del_mlist_query: mlist=NULL\n");
746 
747 	mp->phys_pages = 0;
748 	mp->managed = 0;
749 	mp->nonrelocatable = 0;
750 	mp->first_nonrelocatable = (pfn_t)-1;	/* XXX */
751 	mp->last_nonrelocatable = 0;
752 
753 	for (ml = mlist; ml; ml = ml->next) {
754 		memquery_t mq;
755 
756 		rv = kphysm_del_span_query(
757 			_b64top(ml->address), _b64top(ml->size), &mq);
758 		if (rv)
759 			break;
760 
761 		mp->phys_pages += mq.phys_pages;
762 		mp->managed += mq.managed;
763 		mp->nonrelocatable += mq.nonrelocatable;
764 
765 		if (mq.nonrelocatable != 0) {
766 			if (mq.first_nonrelocatable < mp->first_nonrelocatable)
767 				mp->first_nonrelocatable =
768 					mq.first_nonrelocatable;
769 			if (mq.last_nonrelocatable > mp->last_nonrelocatable)
770 				mp->last_nonrelocatable =
771 					mq.last_nonrelocatable;
772 		}
773 	}
774 
775 	if (mp->nonrelocatable == 0)
776 		mp->first_nonrelocatable = 0;	/* XXX */
777 
778 	return (rv);
779 }
780 
781 /*
782  * NOTE: This routine is only partially smart about multiple
783  *	 mem-units.  Need to make mem-status structure smart
784  *	 about them also.
785  */
786 int
787 dr_mem_status(dr_handle_t *hp, dr_devset_t devset, sbd_dev_stat_t *dsp)
788 {
789 	int		m, mix;
790 	memdelstat_t	mdst;
791 	memquery_t	mq;
792 	dr_board_t	*bp;
793 	dr_mem_unit_t	*mp;
794 	sbd_mem_stat_t	*msp;
795 	static fn_t	f = "dr_mem_status";
796 
797 	bp = hp->h_bd;
798 	devset &= DR_DEVS_PRESENT(bp);
799 
800 	for (m = mix = 0; m < MAX_MEM_UNITS_PER_BOARD; m++) {
801 		int		rv;
802 		sbd_error_t	*err;
803 		drmach_status_t	 pstat;
804 		dr_mem_unit_t	*p_mp;
805 
806 		if (DEVSET_IN_SET(devset, SBD_COMP_MEM, m) == 0)
807 			continue;
808 
809 		mp = dr_get_mem_unit(bp, m);
810 
811 		if (mp->sbm_cm.sbdev_state == DR_STATE_EMPTY) {
812 			/* present, but not fully initialized */
813 			continue;
814 		}
815 
816 		if (mp->sbm_cm.sbdev_id == (drmachid_t)0)
817 			continue;
818 
819 		/* fetch platform status */
820 		err = drmach_status(mp->sbm_cm.sbdev_id, &pstat);
821 		if (err) {
822 			DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
823 			continue;
824 		}
825 
826 		msp = &dsp->d_mem;
827 		bzero((caddr_t)msp, sizeof (*msp));
828 
829 		strncpy(msp->ms_cm.c_id.c_name, pstat.type,
830 			sizeof (msp->ms_cm.c_id.c_name));
831 		msp->ms_cm.c_id.c_type = mp->sbm_cm.sbdev_type;
832 		msp->ms_cm.c_id.c_unit = SBD_NULL_UNIT;
833 		msp->ms_cm.c_cond = mp->sbm_cm.sbdev_cond;
834 		msp->ms_cm.c_busy = mp->sbm_cm.sbdev_busy | pstat.busy;
835 		msp->ms_cm.c_time = mp->sbm_cm.sbdev_time;
836 		msp->ms_cm.c_ostate = mp->sbm_cm.sbdev_ostate;
837 
838 		msp->ms_totpages = mp->sbm_npages;
839 		msp->ms_basepfn = mp->sbm_basepfn;
840 		msp->ms_pageslost = mp->sbm_pageslost;
841 		msp->ms_cage_enabled = kcage_on;
842 
843 		if (mp->sbm_flags & DR_MFLAG_RESERVED)
844 			p_mp = mp->sbm_peer;
845 		else
846 			p_mp = NULL;
847 
848 		if (p_mp == NULL) {
849 			msp->ms_peer_is_target = 0;
850 			msp->ms_peer_ap_id[0] = '\0';
851 		} else if (p_mp->sbm_flags & DR_MFLAG_RESERVED) {
852 			char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
853 			char *minor;
854 
855 			/*
856 			 * b_dip doesn't have to be held for ddi_pathname()
857 			 * because the board struct (dr_board_t) will be
858 			 * destroyed before b_dip detaches.
859 			 */
860 			(void) ddi_pathname(bp->b_dip, path);
861 			minor = strchr(p_mp->sbm_cm.sbdev_path, ':');
862 
863 			snprintf(msp->ms_peer_ap_id,
864 			    sizeof (msp->ms_peer_ap_id), "%s%s",
865 			    path, (minor == NULL) ? "" : minor);
866 
867 			kmem_free(path, MAXPATHLEN);
868 
869 			if (p_mp->sbm_flags & DR_MFLAG_TARGET)
870 				msp->ms_peer_is_target = 1;
871 		}
872 
873 		if (mp->sbm_flags & DR_MFLAG_RELOWNER)
874 			rv = kphysm_del_status(mp->sbm_memhandle, &mdst);
875 		else
876 			rv = KPHYSM_EHANDLE;	/* force 'if' to fail */
877 
878 		if (rv == KPHYSM_OK) {
879 			/*
880 			 * Any pages above managed is "free",
881 			 * i.e. it's collected.
882 			 */
883 			msp->ms_detpages += (uint_t)(mdst.collected +
884 			    mdst.phys_pages - mdst.managed);
885 		} else {
886 			/*
887 			 * If we're UNREFERENCED or UNCONFIGURED,
888 			 * then the number of detached pages is
889 			 * however many pages are on the board.
890 			 * I.e. detached = not in use by OS.
891 			 */
892 			switch (msp->ms_cm.c_ostate) {
893 			/*
894 			 * changed to use cfgadm states
895 			 *
896 			 * was:
897 			 *	case DR_STATE_UNREFERENCED:
898 			 *	case DR_STATE_UNCONFIGURED:
899 			 */
900 			case SBD_STAT_UNCONFIGURED:
901 				msp->ms_detpages = msp->ms_totpages;
902 				break;
903 
904 			default:
905 				break;
906 			}
907 		}
908 
909 		/*
910 		 * kphysm_del_span_query can report non-reloc pages = total
911 		 * pages for memory that is not yet configured
912 		 */
913 		if (mp->sbm_cm.sbdev_state != DR_STATE_UNCONFIGURED) {
914 			struct memlist *ml;
915 
916 			ml = dr_get_memlist(mp);
917 			rv = ml ? dr_del_mlist_query(ml, &mq) : -1;
918 			memlist_delete(ml);
919 
920 			if (rv == KPHYSM_OK) {
921 				msp->ms_managed_pages = mq.managed;
922 				msp->ms_noreloc_pages = mq.nonrelocatable;
923 				msp->ms_noreloc_first =
924 				    mq.first_nonrelocatable;
925 				msp->ms_noreloc_last =
926 				    mq.last_nonrelocatable;
927 				msp->ms_cm.c_sflags = 0;
928 				if (mq.nonrelocatable) {
929 					SBD_SET_SUSPEND(SBD_CMD_UNCONFIGURE,
930 					    msp->ms_cm.c_sflags);
931 				}
932 			} else {
933 				PR_MEM("%s: kphysm_del_span_query() = %d\n",
934 				    f, rv);
935 			}
936 		}
937 
938 		/*
939 		 * Check source unit state during copy-rename
940 		 */
941 		if ((mp->sbm_flags & DR_MFLAG_SOURCE) &&
942 		    (mp->sbm_cm.sbdev_state == DR_STATE_UNREFERENCED ||
943 		    mp->sbm_cm.sbdev_state == DR_STATE_RELEASE))
944 			msp->ms_cm.c_ostate = SBD_STAT_CONFIGURED;
945 
946 		mix++;
947 		dsp++;
948 	}
949 
950 	return (mix);
951 }
952 
953 int
954 dr_pre_attach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
955 {
956 	_NOTE(ARGUNUSED(hp))
957 
958 	int		err_flag = 0;
959 	int		d;
960 	sbd_error_t	*err;
961 	static fn_t	f = "dr_pre_attach_mem";
962 
963 	PR_MEM("%s...\n", f);
964 
965 	for (d = 0; d < devnum; d++) {
966 		dr_mem_unit_t	*mp = (dr_mem_unit_t *)devlist[d];
967 		dr_state_t	state;
968 
969 		cmn_err(CE_CONT, "OS configure %s", mp->sbm_cm.sbdev_path);
970 
971 		state = mp->sbm_cm.sbdev_state;
972 		switch (state) {
973 		case DR_STATE_UNCONFIGURED:
974 			PR_MEM("%s: recovering from UNCONFIG for %s\n",
975 				f,
976 				mp->sbm_cm.sbdev_path);
977 
978 			/* use memlist cached by dr_post_detach_mem_unit */
979 			ASSERT(mp->sbm_mlist != NULL);
980 			PR_MEM("%s: re-configuring cached memlist for %s:\n",
981 				f, mp->sbm_cm.sbdev_path);
982 			PR_MEMLIST_DUMP(mp->sbm_mlist);
983 
984 			/* kphysm del handle should be have been freed */
985 			ASSERT((mp->sbm_flags & DR_MFLAG_RELOWNER) == 0);
986 
987 			/*FALLTHROUGH*/
988 
989 		case DR_STATE_CONNECTED:
990 			PR_MEM("%s: reprogramming mem hardware on %s\n",
991 				f, mp->sbm_cm.sbdev_bp->b_path);
992 
993 			PR_MEM("%s: enabling %s\n",
994 				f, mp->sbm_cm.sbdev_path);
995 
996 			err = drmach_mem_enable(mp->sbm_cm.sbdev_id);
997 			if (err) {
998 				DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
999 				err_flag = 1;
1000 			}
1001 			break;
1002 
1003 		default:
1004 			dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_STATE);
1005 			err_flag = 1;
1006 			break;
1007 		}
1008 
1009 		/* exit for loop if error encountered */
1010 		if (err_flag)
1011 			break;
1012 	}
1013 
1014 	return (err_flag ? -1 : 0);
1015 }
1016 
1017 int
1018 dr_post_attach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
1019 {
1020 	_NOTE(ARGUNUSED(hp))
1021 
1022 	int		d;
1023 	static fn_t	f = "dr_post_attach_mem";
1024 
1025 	PR_MEM("%s...\n", f);
1026 
1027 	for (d = 0; d < devnum; d++) {
1028 		dr_mem_unit_t	*mp = (dr_mem_unit_t *)devlist[d];
1029 		struct memlist	*mlist, *ml;
1030 
1031 		mlist = dr_get_memlist(mp);
1032 		if (mlist == NULL) {
1033 			dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_MEMFAIL);
1034 			continue;
1035 		}
1036 
1037 		/*
1038 		 * Verify the memory really did successfully attach
1039 		 * by checking for its existence in phys_install.
1040 		 */
1041 		memlist_read_lock();
1042 		if (memlist_intersect(phys_install, mlist) == 0) {
1043 			memlist_read_unlock();
1044 
1045 			DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
1046 
1047 			PR_MEM("%s: %s memlist not in phys_install",
1048 				f, mp->sbm_cm.sbdev_path);
1049 
1050 			memlist_delete(mlist);
1051 			continue;
1052 		}
1053 		memlist_read_unlock();
1054 
1055 		for (ml = mlist; ml != NULL; ml = ml->next) {
1056 			sbd_error_t *err;
1057 
1058 			err = drmach_mem_add_span(
1059 				mp->sbm_cm.sbdev_id,
1060 				ml->address,
1061 				ml->size);
1062 			if (err)
1063 				DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
1064 		}
1065 
1066 		memlist_delete(mlist);
1067 
1068 		/*
1069 		 * Destroy cached memlist, if any.
1070 		 * There will be a cached memlist in sbm_mlist if
1071 		 * this board is being configured directly after
1072 		 * an unconfigure.
1073 		 * To support this transition, dr_post_detach_mem
1074 		 * left a copy of the last known memlist in sbm_mlist.
1075 		 * This memlist could differ from any derived from
1076 		 * hardware if while this memunit was last configured
1077 		 * the system detected and deleted bad pages from
1078 		 * phys_install.  The location of those bad pages
1079 		 * will be reflected in the cached memlist.
1080 		 */
1081 		if (mp->sbm_mlist) {
1082 			memlist_delete(mp->sbm_mlist);
1083 			mp->sbm_mlist = NULL;
1084 		}
1085 	}
1086 
1087 	return (0);
1088 }
1089 
1090 int
1091 dr_pre_detach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
1092 {
1093 	_NOTE(ARGUNUSED(hp))
1094 
1095 	int d;
1096 
1097 	for (d = 0; d < devnum; d++) {
1098 		dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d];
1099 
1100 		cmn_err(CE_CONT, "OS unconfigure %s", mp->sbm_cm.sbdev_path);
1101 	}
1102 
1103 	return (0);
1104 }
1105 
1106 int
1107 dr_post_detach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
1108 {
1109 	_NOTE(ARGUNUSED(hp))
1110 
1111 	int		d, rv;
1112 	static fn_t	f = "dr_post_detach_mem";
1113 
1114 	PR_MEM("%s...\n", f);
1115 
1116 	rv = 0;
1117 	for (d = 0; d < devnum; d++) {
1118 		dr_mem_unit_t	*mp = (dr_mem_unit_t *)devlist[d];
1119 
1120 		ASSERT(mp->sbm_cm.sbdev_bp == hp->h_bd);
1121 
1122 		if (dr_post_detach_mem_unit(mp))
1123 			rv = -1;
1124 	}
1125 
1126 	return (rv);
1127 }
1128 
1129 static void
1130 dr_add_memory_spans(dr_mem_unit_t *mp, struct memlist *ml)
1131 {
1132 	static fn_t	f = "dr_add_memory_spans";
1133 
1134 	PR_MEM("%s...", f);
1135 	PR_MEMLIST_DUMP(ml);
1136 
1137 #ifdef DEBUG
1138 	memlist_read_lock();
1139 	if (memlist_intersect(phys_install, ml)) {
1140 		PR_MEM("%s:WARNING: memlist intersects with phys_install\n", f);
1141 	}
1142 	memlist_read_unlock();
1143 #endif
1144 
1145 	for (; ml; ml = ml->next) {
1146 		pfn_t		 base;
1147 		pgcnt_t		 npgs;
1148 		int		 rv;
1149 		sbd_error_t	*err;
1150 
1151 		base = _b64top(ml->address);
1152 		npgs = _b64top(ml->size);
1153 
1154 		rv = kphysm_add_memory_dynamic(base, npgs);
1155 
1156 		err = drmach_mem_add_span(
1157 			mp->sbm_cm.sbdev_id,
1158 			ml->address,
1159 			ml->size);
1160 
1161 		if (err)
1162 			DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
1163 
1164 		if (rv != KPHYSM_OK) {
1165 			cmn_err(CE_WARN, "%s:"
1166 				" unexpected kphysm_add_memory_dynamic"
1167 				" return value %d;"
1168 				" basepfn=0x%lx, npages=%ld\n",
1169 				f, rv, base, npgs);
1170 
1171 			continue;
1172 		}
1173 	}
1174 }
1175 
1176 static int
1177 dr_post_detach_mem_unit(dr_mem_unit_t *s_mp)
1178 {
1179 	uint64_t	sz = s_mp->sbm_slice_size;
1180 	uint64_t	sm = sz - 1;
1181 	/* old and new below refer to PAs before and after copy-rename */
1182 	uint64_t	s_old_basepa, s_new_basepa;
1183 	uint64_t	t_old_basepa, t_new_basepa;
1184 	dr_mem_unit_t	*t_mp, *x_mp;
1185 	drmach_mem_info_t	minfo;
1186 	struct memlist	*ml;
1187 	struct memlist	*t_excess_mlist;
1188 	int		rv;
1189 	int		s_excess_mem_deleted = 0;
1190 	sbd_error_t	*err;
1191 	static fn_t	f = "dr_post_detach_mem_unit";
1192 
1193 	PR_MEM("%s...\n", f);
1194 
1195 	/* s_mp->sbm_del_mlist could be NULL, meaning no deleted spans */
1196 	PR_MEM("%s: %s: deleted memlist (EMPTY maybe okay):\n",
1197 		f, s_mp->sbm_cm.sbdev_path);
1198 	PR_MEMLIST_DUMP(s_mp->sbm_del_mlist);
1199 
1200 	/* sanity check */
1201 	ASSERT(s_mp->sbm_del_mlist == NULL ||
1202 		(s_mp->sbm_flags & DR_MFLAG_RELDONE) != 0);
1203 
1204 	if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
1205 		t_mp = s_mp->sbm_peer;
1206 		ASSERT(t_mp != NULL);
1207 		ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET);
1208 		ASSERT(t_mp->sbm_peer == s_mp);
1209 
1210 		ASSERT(t_mp->sbm_flags & DR_MFLAG_RELDONE);
1211 		ASSERT(t_mp->sbm_del_mlist);
1212 
1213 		PR_MEM("%s: target %s: deleted memlist:\n",
1214 			f, t_mp->sbm_cm.sbdev_path);
1215 		PR_MEMLIST_DUMP(t_mp->sbm_del_mlist);
1216 	} else {
1217 		/* this is no target unit */
1218 		t_mp = NULL;
1219 	}
1220 
1221 	/*
1222 	 * Verify the memory really did successfully detach
1223 	 * by checking for its non-existence in phys_install.
1224 	 */
1225 	rv = 0;
1226 	memlist_read_lock();
1227 	if (s_mp->sbm_flags & DR_MFLAG_RELDONE) {
1228 		x_mp = s_mp;
1229 		rv = memlist_intersect(phys_install, x_mp->sbm_del_mlist);
1230 	}
1231 	if (rv == 0 && t_mp && (t_mp->sbm_flags & DR_MFLAG_RELDONE)) {
1232 		x_mp = t_mp;
1233 		rv = memlist_intersect(phys_install, x_mp->sbm_del_mlist);
1234 	}
1235 	memlist_read_unlock();
1236 
1237 	if (rv) {
1238 		/* error: memlist still in phys_install */
1239 		DR_DEV_INTERNAL_ERROR(&x_mp->sbm_cm);
1240 	}
1241 
1242 	/*
1243 	 * clean mem unit state and bail out if an error has been recorded.
1244 	 */
1245 	rv = 0;
1246 	if (s_mp->sbm_cm.sbdev_error) {
1247 		PR_MEM("%s: %s flags=%x", f,
1248 			s_mp->sbm_cm.sbdev_path, s_mp->sbm_flags);
1249 		DR_DEV_CLR_UNREFERENCED(&s_mp->sbm_cm);
1250 		DR_DEV_CLR_RELEASED(&s_mp->sbm_cm);
1251 		dr_device_transition(&s_mp->sbm_cm, DR_STATE_CONFIGURED);
1252 		rv = -1;
1253 	}
1254 	if (t_mp != NULL && t_mp->sbm_cm.sbdev_error != NULL) {
1255 		PR_MEM("%s: %s flags=%x", f,
1256 			s_mp->sbm_cm.sbdev_path, s_mp->sbm_flags);
1257 		DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm);
1258 		DR_DEV_CLR_RELEASED(&t_mp->sbm_cm);
1259 		dr_device_transition(&t_mp->sbm_cm, DR_STATE_CONFIGURED);
1260 		rv = -1;
1261 	}
1262 	if (rv)
1263 		goto cleanup;
1264 
1265 	s_old_basepa = _ptob64(s_mp->sbm_basepfn);
1266 	err = drmach_mem_get_info(s_mp->sbm_cm.sbdev_id, &minfo);
1267 	ASSERT(err == NULL);
1268 	s_new_basepa = minfo.mi_basepa;
1269 
1270 	PR_MEM("%s:s_old_basepa: 0x%lx\n", f, s_old_basepa);
1271 	PR_MEM("%s:s_new_basepa: 0x%lx\n", f, s_new_basepa);
1272 
1273 	if (t_mp != NULL) {
1274 		struct memlist *s_copy_mlist;
1275 
1276 		t_old_basepa = _ptob64(t_mp->sbm_basepfn);
1277 		err = drmach_mem_get_info(t_mp->sbm_cm.sbdev_id, &minfo);
1278 		ASSERT(err == NULL);
1279 		t_new_basepa = minfo.mi_basepa;
1280 
1281 		PR_MEM("%s:t_old_basepa: 0x%lx\n", f, t_old_basepa);
1282 		PR_MEM("%s:t_new_basepa: 0x%lx\n", f, t_new_basepa);
1283 
1284 		/*
1285 		 * Construct copy list with original source addresses.
1286 		 * Used to add back excess target mem.
1287 		 */
1288 		s_copy_mlist = memlist_dup(s_mp->sbm_mlist);
1289 		for (ml = s_mp->sbm_del_mlist; ml; ml = ml->next) {
1290 			s_copy_mlist = memlist_del_span(s_copy_mlist,
1291 			    ml->address, ml->size);
1292 		}
1293 
1294 		PR_MEM("%s: source copy list:\n:", f);
1295 		PR_MEMLIST_DUMP(s_copy_mlist);
1296 
1297 		/*
1298 		 * We had to swap mem-units, so update
1299 		 * memlists accordingly with new base
1300 		 * addresses.
1301 		 */
1302 		for (ml = t_mp->sbm_mlist; ml; ml = ml->next) {
1303 			ml->address -= t_old_basepa;
1304 			ml->address += t_new_basepa;
1305 		}
1306 
1307 		/*
1308 		 * There is no need to explicitly rename the target delete
1309 		 * memlist, because sbm_del_mlist and sbm_mlist always
1310 		 * point to the same memlist for a copy/rename operation.
1311 		 */
1312 		ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
1313 
1314 		PR_MEM("%s: renamed target memlist and delete memlist:\n", f);
1315 		PR_MEMLIST_DUMP(t_mp->sbm_mlist);
1316 
1317 		for (ml = s_mp->sbm_mlist; ml; ml = ml->next) {
1318 			ml->address -= s_old_basepa;
1319 			ml->address += s_new_basepa;
1320 		}
1321 
1322 		PR_MEM("%s: renamed source memlist:\n", f);
1323 		PR_MEMLIST_DUMP(s_mp->sbm_mlist);
1324 
1325 		/*
1326 		 * Keep track of dynamically added segments
1327 		 * since they cannot be split if we need to delete
1328 		 * excess source memory later for this board.
1329 		 */
1330 		if (t_mp->sbm_dyn_segs)
1331 			memlist_delete(t_mp->sbm_dyn_segs);
1332 		t_mp->sbm_dyn_segs = s_mp->sbm_dyn_segs;
1333 		s_mp->sbm_dyn_segs = NULL;
1334 
1335 		/*
1336 		 * Add back excess target memory.
1337 		 * Subtract out the portion of the target memory
1338 		 * node that was taken over by the source memory
1339 		 * node.
1340 		 */
1341 		t_excess_mlist = memlist_dup(t_mp->sbm_mlist);
1342 		for (ml = s_copy_mlist; ml; ml = ml->next) {
1343 			t_excess_mlist =
1344 			    memlist_del_span(t_excess_mlist,
1345 			    ml->address, ml->size);
1346 		}
1347 
1348 		/*
1349 		 * Update dynamically added segs
1350 		 */
1351 		for (ml = s_mp->sbm_del_mlist; ml; ml = ml->next) {
1352 			t_mp->sbm_dyn_segs =
1353 			    memlist_del_span(t_mp->sbm_dyn_segs,
1354 			    ml->address, ml->size);
1355 		}
1356 		for (ml = t_excess_mlist; ml; ml = ml->next) {
1357 			t_mp->sbm_dyn_segs =
1358 			    memlist_cat_span(t_mp->sbm_dyn_segs,
1359 			    ml->address, ml->size);
1360 		}
1361 		PR_MEM("%s: %s: updated dynamic seg list:\n",
1362 		    f, t_mp->sbm_cm.sbdev_path);
1363 		PR_MEMLIST_DUMP(t_mp->sbm_dyn_segs);
1364 
1365 		if (t_excess_mlist != NULL) {
1366 			/*
1367 			 * After the small <-> big copy-rename,
1368 			 * the original address space for the
1369 			 * source board may have excess to be
1370 			 * deleted. This is a case different
1371 			 * from the big->small excess source
1372 			 * memory case listed below.
1373 			 * Remove s_mp->sbm_del_mlist from
1374 			 * the kernel cage glist.
1375 			 */
1376 			for (ml = s_mp->sbm_del_mlist; ml;
1377 				ml = ml->next) {
1378 				PR_MEM("%s: delete small<->big copy-"
1379 				    "rename source excess memory", f);
1380 				PR_MEMLIST_DUMP(ml);
1381 
1382 				err = drmach_mem_del_span(
1383 					s_mp->sbm_cm.sbdev_id,
1384 					    ml->address, ml->size);
1385 				if (err)
1386 					DRERR_SET_C(&s_mp->
1387 					    sbm_cm.sbdev_error, &err);
1388 				ASSERT(err == NULL);
1389 			}
1390 
1391 			/*
1392 			 * mark sbm_del_mlist as been deleted so that
1393 			 * we won't end up to delete it twice later
1394 			 * from the span list
1395 			 */
1396 			s_excess_mem_deleted = 1;
1397 
1398 			PR_MEM("%s: adding back remaining portion"
1399 				" of %s, memlist:\n",
1400 				f, t_mp->sbm_cm.sbdev_path);
1401 			PR_MEMLIST_DUMP(t_excess_mlist);
1402 
1403 			dr_add_memory_spans(s_mp, t_excess_mlist);
1404 			memlist_delete(t_excess_mlist);
1405 		}
1406 		memlist_delete(s_copy_mlist);
1407 
1408 #ifdef DEBUG
1409 		/*
1410 		 * s_mp->sbm_del_mlist may still needed
1411 		 */
1412 		PR_MEM("%s: source delete memeory flag %d",
1413 		    f, s_excess_mem_deleted);
1414 		PR_MEM("%s: source delete memlist", f);
1415 		PR_MEMLIST_DUMP(s_mp->sbm_del_mlist);
1416 #endif
1417 
1418 	}
1419 
1420 	if (t_mp != NULL) {
1421 		/* delete target's entire address space */
1422 		err = drmach_mem_del_span(
1423 			t_mp->sbm_cm.sbdev_id, t_old_basepa & ~ sm, sz);
1424 		if (err)
1425 			DRERR_SET_C(&t_mp->sbm_cm.sbdev_error, &err);
1426 		ASSERT(err == NULL);
1427 
1428 		/*
1429 		 * After the copy/rename, the original address space
1430 		 * for the source board (which is now located on the
1431 		 * target board) may now have some excess to be deleted.
1432 		 * Those excess memory on the source board are kept in
1433 		 * source board's sbm_del_mlist
1434 		 */
1435 		for (ml = s_mp->sbm_del_mlist; !s_excess_mem_deleted && ml;
1436 			ml = ml->next) {
1437 			PR_MEM("%s: delete source excess memory", f);
1438 			PR_MEMLIST_DUMP(ml);
1439 
1440 			err = drmach_mem_del_span(s_mp->sbm_cm.sbdev_id,
1441 				ml->address, ml->size);
1442 			if (err)
1443 				DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
1444 			ASSERT(err == NULL);
1445 		}
1446 
1447 	} else {
1448 		/* delete board's entire address space */
1449 		err = drmach_mem_del_span(s_mp->sbm_cm.sbdev_id,
1450 						s_old_basepa & ~ sm, sz);
1451 		if (err)
1452 			DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
1453 		ASSERT(err == NULL);
1454 	}
1455 
1456 cleanup:
1457 	/* clean up target mem unit */
1458 	if (t_mp != NULL) {
1459 		memlist_delete(t_mp->sbm_del_mlist);
1460 		/* no need to delete sbm_mlist, it shares sbm_del_mlist */
1461 
1462 		t_mp->sbm_del_mlist = NULL;
1463 		t_mp->sbm_mlist = NULL;
1464 		t_mp->sbm_peer = NULL;
1465 		t_mp->sbm_flags = 0;
1466 		t_mp->sbm_cm.sbdev_busy = 0;
1467 		dr_init_mem_unit_data(t_mp);
1468 
1469 	}
1470 	if (t_mp != NULL && t_mp->sbm_cm.sbdev_error == NULL) {
1471 		/*
1472 		 * now that copy/rename has completed, undo this
1473 		 * work that was done in dr_release_mem_done.
1474 		 */
1475 		DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm);
1476 		DR_DEV_CLR_RELEASED(&t_mp->sbm_cm);
1477 		dr_device_transition(&t_mp->sbm_cm, DR_STATE_CONFIGURED);
1478 	}
1479 
1480 	/*
1481 	 * clean up (source) board's mem unit structure.
1482 	 * NOTE: sbm_mlist is retained if no error has been record (in other
1483 	 * words, when s_mp->sbm_cm.sbdev_error is NULL). This memlist is
1484 	 * referred to elsewhere as the cached memlist.  The cached memlist
1485 	 * is used to re-attach (configure back in) this memunit from the
1486 	 * unconfigured state.  The memlist is retained because it may
1487 	 * represent bad pages that were detected while the memory was
1488 	 * configured into the OS.  The OS deletes bad pages from phys_install.
1489 	 * Those deletes, if any, will be represented in the cached mlist.
1490 	 */
1491 	if (s_mp->sbm_del_mlist && s_mp->sbm_del_mlist != s_mp->sbm_mlist)
1492 		memlist_delete(s_mp->sbm_del_mlist);
1493 
1494 	if (s_mp->sbm_cm.sbdev_error && s_mp->sbm_mlist) {
1495 		memlist_delete(s_mp->sbm_mlist);
1496 		s_mp->sbm_mlist = NULL;
1497 	}
1498 
1499 	if (s_mp->sbm_dyn_segs != NULL && s_mp->sbm_cm.sbdev_error == 0) {
1500 		memlist_delete(s_mp->sbm_dyn_segs);
1501 		s_mp->sbm_dyn_segs = NULL;
1502 	}
1503 
1504 	s_mp->sbm_del_mlist = NULL;
1505 	s_mp->sbm_peer = NULL;
1506 	s_mp->sbm_flags = 0;
1507 	s_mp->sbm_cm.sbdev_busy = 0;
1508 	dr_init_mem_unit_data(s_mp);
1509 
1510 	PR_MEM("%s: cached memlist for %s:", f, s_mp->sbm_cm.sbdev_path);
1511 	PR_MEMLIST_DUMP(s_mp->sbm_mlist);
1512 
1513 	return (0);
1514 }
1515 
1516 /*
1517  * Successful return from this function will have the memory
1518  * handle in bp->b_dev[..mem-unit...].sbm_memhandle allocated
1519  * and waiting.  This routine's job is to select the memory that
1520  * actually has to be released (detached) which may not necessarily
1521  * be the same memory node that came in in devlist[],
1522  * i.e. a copy-rename is needed.
1523  */
1524 int
1525 dr_pre_release_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
1526 {
1527 	int		d;
1528 	int		err_flag = 0;
1529 	static fn_t	f = "dr_pre_release_mem";
1530 
1531 	PR_MEM("%s...\n", f);
1532 
1533 	for (d = 0; d < devnum; d++) {
1534 		dr_mem_unit_t	*mp = (dr_mem_unit_t *)devlist[d];
1535 		int		rv;
1536 		memquery_t	mq;
1537 		struct memlist	*ml;
1538 
1539 		if (mp->sbm_cm.sbdev_error) {
1540 			err_flag = 1;
1541 			continue;
1542 		} else if (!kcage_on) {
1543 			dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_KCAGE_OFF);
1544 			err_flag = 1;
1545 			continue;
1546 		}
1547 
1548 		if (mp->sbm_flags & DR_MFLAG_RESERVED) {
1549 			/*
1550 			 * Board is currently involved in a delete
1551 			 * memory operation. Can't detach this guy until
1552 			 * that operation completes.
1553 			 */
1554 			dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_INVAL);
1555 			err_flag = 1;
1556 			break;
1557 		}
1558 
1559 		/* flags should be clean at this time */
1560 		ASSERT(mp->sbm_flags == 0);
1561 
1562 		ASSERT(mp->sbm_mlist == NULL);
1563 		ASSERT(mp->sbm_del_mlist == NULL);
1564 		if (mp->sbm_mlist != NULL) {
1565 			memlist_delete(mp->sbm_mlist);
1566 			mp->sbm_mlist = NULL;
1567 		}
1568 
1569 		ml = dr_get_memlist(mp);
1570 		if (ml == NULL) {
1571 			err_flag = 1;
1572 			PR_MEM("%s: no memlist found for %s\n",
1573 			    f, mp->sbm_cm.sbdev_path);
1574 			continue;
1575 		}
1576 
1577 		/*
1578 		 * Check whether the detaching memory requires a
1579 		 * copy-rename.
1580 		 */
1581 		ASSERT(mp->sbm_npages != 0);
1582 		rv = dr_del_mlist_query(ml, &mq);
1583 		if (rv != KPHYSM_OK) {
1584 			memlist_delete(ml);
1585 			DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
1586 			err_flag = 1;
1587 			break;
1588 		}
1589 
1590 		if (mq.nonrelocatable != 0) {
1591 			if (!(dr_cmd_flags(hp) &
1592 				(SBD_FLAG_FORCE | SBD_FLAG_QUIESCE_OKAY))) {
1593 				memlist_delete(ml);
1594 				/* caller wasn't prompted for a suspend */
1595 				dr_dev_err(CE_WARN, &mp->sbm_cm,
1596 					ESBD_QUIESCE_REQD);
1597 				err_flag = 1;
1598 				break;
1599 			}
1600 		}
1601 
1602 		/* allocate a kphysm handle */
1603 		rv = kphysm_del_gethandle(&mp->sbm_memhandle);
1604 		if (rv != KPHYSM_OK) {
1605 			memlist_delete(ml);
1606 
1607 			DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
1608 			err_flag = 1;
1609 			break;
1610 		}
1611 		mp->sbm_flags |= DR_MFLAG_RELOWNER;
1612 
1613 		if ((mq.nonrelocatable != 0) ||
1614 			dr_reserve_mem_spans(&mp->sbm_memhandle, ml)) {
1615 			/*
1616 			 * Either the detaching memory node contains
1617 			 * non-reloc memory or we failed to reserve the
1618 			 * detaching memory node (which did _not_ have
1619 			 * any non-reloc memory, i.e. some non-reloc mem
1620 			 * got onboard).
1621 			 */
1622 
1623 			if (dr_select_mem_target(hp, mp, ml)) {
1624 				int rv;
1625 
1626 				/*
1627 				 * We had no luck locating a target
1628 				 * memory node to be the recipient of
1629 				 * the non-reloc memory on the node
1630 				 * we're trying to detach.
1631 				 * Clean up be disposing the mem handle
1632 				 * and the mem list.
1633 				 */
1634 				rv = kphysm_del_release(mp->sbm_memhandle);
1635 				if (rv != KPHYSM_OK) {
1636 					/*
1637 					 * can do nothing but complain
1638 					 * and hope helpful for debug
1639 					 */
1640 					cmn_err(CE_WARN, "%s: unexpected"
1641 						" kphysm_del_release return"
1642 						" value %d",
1643 						f, rv);
1644 				}
1645 				mp->sbm_flags &= ~DR_MFLAG_RELOWNER;
1646 
1647 				memlist_delete(ml);
1648 
1649 				/* make sure sbm_flags is clean */
1650 				ASSERT(mp->sbm_flags == 0);
1651 
1652 				dr_dev_err(CE_WARN,
1653 					&mp->sbm_cm, ESBD_NO_TARGET);
1654 
1655 				err_flag = 1;
1656 				break;
1657 			}
1658 
1659 			/*
1660 			 * ml is not memlist_delete'd here because
1661 			 * it has been assigned to mp->sbm_mlist
1662 			 * by dr_select_mem_target.
1663 			 */
1664 		} else {
1665 			/* no target needed to detach this board */
1666 			mp->sbm_flags |= DR_MFLAG_RESERVED;
1667 			mp->sbm_peer = NULL;
1668 			mp->sbm_del_mlist = ml;
1669 			mp->sbm_mlist = ml;
1670 			mp->sbm_cm.sbdev_busy = 1;
1671 		}
1672 #ifdef DEBUG
1673 		ASSERT(mp->sbm_mlist != NULL);
1674 
1675 		if (mp->sbm_flags & DR_MFLAG_SOURCE) {
1676 			PR_MEM("%s: release of %s requires copy/rename;"
1677 				" selected target board %s\n",
1678 				f,
1679 				mp->sbm_cm.sbdev_path,
1680 				mp->sbm_peer->sbm_cm.sbdev_path);
1681 		} else {
1682 			PR_MEM("%s: copy/rename not required to release %s\n",
1683 				f, mp->sbm_cm.sbdev_path);
1684 		}
1685 
1686 		ASSERT(mp->sbm_flags & DR_MFLAG_RELOWNER);
1687 		ASSERT(mp->sbm_flags & DR_MFLAG_RESERVED);
1688 #endif
1689 	}
1690 
1691 	return (err_flag ? -1 : 0);
1692 }
1693 
1694 void
1695 dr_release_mem_done(dr_common_unit_t *cp)
1696 {
1697 	dr_mem_unit_t	*s_mp = (dr_mem_unit_t *)cp;
1698 	dr_mem_unit_t *t_mp, *mp;
1699 	int		rv;
1700 	static fn_t	f = "dr_release_mem_done";
1701 
1702 	/*
1703 	 * This unit will be flagged with DR_MFLAG_SOURCE, if it
1704 	 * has a target unit.
1705 	 */
1706 	if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
1707 		t_mp = s_mp->sbm_peer;
1708 		ASSERT(t_mp != NULL);
1709 		ASSERT(t_mp->sbm_peer == s_mp);
1710 		ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET);
1711 		ASSERT(t_mp->sbm_flags & DR_MFLAG_RESERVED);
1712 	} else {
1713 		/* this is no target unit */
1714 		t_mp = NULL;
1715 	}
1716 
1717 	/* free delete handle */
1718 	ASSERT(s_mp->sbm_flags & DR_MFLAG_RELOWNER);
1719 	ASSERT(s_mp->sbm_flags & DR_MFLAG_RESERVED);
1720 	rv = kphysm_del_release(s_mp->sbm_memhandle);
1721 	if (rv != KPHYSM_OK) {
1722 		/*
1723 		 * can do nothing but complain
1724 		 * and hope helpful for debug
1725 		 */
1726 		cmn_err(CE_WARN, "%s: unexpected kphysm_del_release"
1727 			" return value %d", f, rv);
1728 	}
1729 	s_mp->sbm_flags &= ~DR_MFLAG_RELOWNER;
1730 
1731 	/*
1732 	 * If an error was encountered during release, clean up
1733 	 * the source (and target, if present) unit data.
1734 	 */
1735 /* XXX Can we know that sbdev_error was encountered during release? */
1736 	if (s_mp->sbm_cm.sbdev_error != NULL) {
1737 		PR_MEM("%s: %s: error %d noted\n",
1738 			f,
1739 			s_mp->sbm_cm.sbdev_path,
1740 			s_mp->sbm_cm.sbdev_error->e_code);
1741 
1742 		if (t_mp != NULL) {
1743 			ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
1744 			t_mp->sbm_del_mlist = NULL;
1745 
1746 			if (t_mp->sbm_mlist != NULL) {
1747 				memlist_delete(t_mp->sbm_mlist);
1748 				t_mp->sbm_mlist = NULL;
1749 			}
1750 
1751 			t_mp->sbm_peer = NULL;
1752 			t_mp->sbm_flags = 0;
1753 			t_mp->sbm_cm.sbdev_busy = 0;
1754 		}
1755 
1756 		if (s_mp->sbm_del_mlist != s_mp->sbm_mlist)
1757 			memlist_delete(s_mp->sbm_del_mlist);
1758 		s_mp->sbm_del_mlist = NULL;
1759 
1760 		if (s_mp->sbm_mlist != NULL) {
1761 			memlist_delete(s_mp->sbm_mlist);
1762 			s_mp->sbm_mlist = NULL;
1763 		}
1764 
1765 		s_mp->sbm_peer = NULL;
1766 		s_mp->sbm_flags = 0;
1767 		s_mp->sbm_cm.sbdev_busy = 0;
1768 
1769 		/* bail out */
1770 		return;
1771 	}
1772 
1773 	DR_DEV_SET_RELEASED(&s_mp->sbm_cm);
1774 	dr_device_transition(&s_mp->sbm_cm, DR_STATE_RELEASE);
1775 
1776 	if (t_mp != NULL) {
1777 		/*
1778 		 * the kphysm delete operation that drained the source
1779 		 * board also drained this target board.  Since the source
1780 		 * board drain is now known to have succeeded, we know this
1781 		 * target board is drained too.
1782 		 *
1783 		 * because DR_DEV_SET_RELEASED and dr_device_transition
1784 		 * is done here, the dr_release_dev_done should not
1785 		 * fail.
1786 		 */
1787 		DR_DEV_SET_RELEASED(&t_mp->sbm_cm);
1788 		dr_device_transition(&t_mp->sbm_cm, DR_STATE_RELEASE);
1789 
1790 		/*
1791 		 * NOTE: do not transition target's board state,
1792 		 * even if the mem-unit was the last configure
1793 		 * unit of the board.  When copy/rename completes
1794 		 * this mem-unit will transitioned back to
1795 		 * the configured state.  In the meantime, the
1796 		 * board's must remain as is.
1797 		 */
1798 	}
1799 
1800 	/* if board(s) had deleted memory, verify it is gone */
1801 	rv = 0;
1802 	memlist_read_lock();
1803 	if (s_mp->sbm_del_mlist != NULL) {
1804 		mp = s_mp;
1805 		rv = memlist_intersect(phys_install, mp->sbm_del_mlist);
1806 	}
1807 	if (rv == 0 && t_mp && t_mp->sbm_del_mlist != NULL) {
1808 		mp = t_mp;
1809 		rv = memlist_intersect(phys_install, mp->sbm_del_mlist);
1810 	}
1811 	memlist_read_unlock();
1812 	if (rv) {
1813 		cmn_err(CE_WARN, "%s: %smem-unit (%d.%d): "
1814 			"deleted memory still found in phys_install",
1815 			f,
1816 			(mp == t_mp ? "target " : ""),
1817 			mp->sbm_cm.sbdev_bp->b_num,
1818 			mp->sbm_cm.sbdev_unum);
1819 
1820 		DR_DEV_INTERNAL_ERROR(&s_mp->sbm_cm);
1821 		return;
1822 	}
1823 
1824 	s_mp->sbm_flags |= DR_MFLAG_RELDONE;
1825 	if (t_mp != NULL)
1826 		t_mp->sbm_flags |= DR_MFLAG_RELDONE;
1827 
1828 	/* this should not fail */
1829 	if (dr_release_dev_done(&s_mp->sbm_cm) != 0) {
1830 		/* catch this in debug kernels */
1831 		ASSERT(0);
1832 		return;
1833 	}
1834 
1835 	PR_MEM("%s: marking %s release DONE\n",
1836 		f, s_mp->sbm_cm.sbdev_path);
1837 
1838 	s_mp->sbm_cm.sbdev_ostate = SBD_STAT_UNCONFIGURED;
1839 
1840 	if (t_mp != NULL) {
1841 		/* should not fail */
1842 		rv = dr_release_dev_done(&t_mp->sbm_cm);
1843 		if (rv != 0) {
1844 			/* catch this in debug kernels */
1845 			ASSERT(0);
1846 			return;
1847 		}
1848 
1849 		PR_MEM("%s: marking %s release DONE\n",
1850 			f, t_mp->sbm_cm.sbdev_path);
1851 
1852 		t_mp->sbm_cm.sbdev_ostate = SBD_STAT_UNCONFIGURED;
1853 	}
1854 }
1855 
1856 /*ARGSUSED*/
1857 int
1858 dr_disconnect_mem(dr_mem_unit_t *mp)
1859 {
1860 	static fn_t	f = "dr_disconnect_mem";
1861 	update_membounds_t umb;
1862 
1863 #ifdef DEBUG
1864 	int state = mp->sbm_cm.sbdev_state;
1865 	ASSERT(state == DR_STATE_CONNECTED ||
1866 		state == DR_STATE_UNCONFIGURED);
1867 #endif
1868 
1869 	PR_MEM("%s...\n", f);
1870 
1871 	if (mp->sbm_del_mlist && mp->sbm_del_mlist != mp->sbm_mlist)
1872 		memlist_delete(mp->sbm_del_mlist);
1873 	mp->sbm_del_mlist = NULL;
1874 
1875 	if (mp->sbm_mlist) {
1876 		memlist_delete(mp->sbm_mlist);
1877 		mp->sbm_mlist = NULL;
1878 	}
1879 
1880 	/*
1881 	 * Remove memory from lgroup
1882 	 * For now, only board info is required.
1883 	 */
1884 	umb.u_board = mp->sbm_cm.sbdev_bp->b_num;
1885 	umb.u_base = (uint64_t)-1;
1886 	umb.u_len = (uint64_t)-1;
1887 
1888 	lgrp_plat_config(LGRP_CONFIG_MEM_DEL, (uintptr_t)&umb);
1889 
1890 	return (0);
1891 }
1892 
1893 int
1894 dr_cancel_mem(dr_mem_unit_t *s_mp)
1895 {
1896 	dr_mem_unit_t	*t_mp;
1897 	dr_state_t	state;
1898 	static fn_t	f = "dr_cancel_mem";
1899 
1900 	state = s_mp->sbm_cm.sbdev_state;
1901 
1902 	if (s_mp->sbm_flags & DR_MFLAG_TARGET) {
1903 		/* must cancel source board, not target board */
1904 		/* TODO: set error */
1905 		return (-1);
1906 	} else if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
1907 		t_mp = s_mp->sbm_peer;
1908 		ASSERT(t_mp != NULL);
1909 		ASSERT(t_mp->sbm_peer == s_mp);
1910 
1911 		/* must always match the source board's state */
1912 		/* TODO: is this assertion correct? */
1913 		ASSERT(t_mp->sbm_cm.sbdev_state == state);
1914 	} else {
1915 		/* this is no target unit */
1916 		t_mp = NULL;
1917 	}
1918 
1919 	switch (state) {
1920 	case DR_STATE_UNREFERENCED:	/* state set by dr_release_dev_done */
1921 		ASSERT((s_mp->sbm_flags & DR_MFLAG_RELOWNER) == 0);
1922 
1923 		if (t_mp != NULL && t_mp->sbm_del_mlist != NULL) {
1924 			PR_MEM("%s: undoing target %s memory delete\n",
1925 				f, t_mp->sbm_cm.sbdev_path);
1926 			dr_add_memory_spans(t_mp, t_mp->sbm_del_mlist);
1927 
1928 			DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm);
1929 		}
1930 
1931 		if (s_mp->sbm_del_mlist != NULL) {
1932 			PR_MEM("%s: undoing %s memory delete\n",
1933 				f, s_mp->sbm_cm.sbdev_path);
1934 
1935 			dr_add_memory_spans(s_mp, s_mp->sbm_del_mlist);
1936 		}
1937 
1938 		/*FALLTHROUGH*/
1939 
1940 /* TODO: should no longer be possible to see the release state here */
1941 	case DR_STATE_RELEASE:	/* state set by dr_release_mem_done */
1942 
1943 		ASSERT((s_mp->sbm_flags & DR_MFLAG_RELOWNER) == 0);
1944 
1945 		if (t_mp != NULL) {
1946 			ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
1947 			t_mp->sbm_del_mlist = NULL;
1948 
1949 			if (t_mp->sbm_mlist != NULL) {
1950 				memlist_delete(t_mp->sbm_mlist);
1951 				t_mp->sbm_mlist = NULL;
1952 			}
1953 
1954 			t_mp->sbm_peer = NULL;
1955 			t_mp->sbm_flags = 0;
1956 			t_mp->sbm_cm.sbdev_busy = 0;
1957 			dr_init_mem_unit_data(t_mp);
1958 
1959 			DR_DEV_CLR_RELEASED(&t_mp->sbm_cm);
1960 
1961 			dr_device_transition(
1962 				&t_mp->sbm_cm, DR_STATE_CONFIGURED);
1963 		}
1964 
1965 		if (s_mp->sbm_del_mlist != s_mp->sbm_mlist)
1966 			memlist_delete(s_mp->sbm_del_mlist);
1967 		s_mp->sbm_del_mlist = NULL;
1968 
1969 		if (s_mp->sbm_mlist != NULL) {
1970 			memlist_delete(s_mp->sbm_mlist);
1971 			s_mp->sbm_mlist = NULL;
1972 		}
1973 
1974 		s_mp->sbm_peer = NULL;
1975 		s_mp->sbm_flags = 0;
1976 		s_mp->sbm_cm.sbdev_busy = 0;
1977 		dr_init_mem_unit_data(s_mp);
1978 
1979 		return (0);
1980 
1981 	default:
1982 		PR_MEM("%s: WARNING unexpected state (%d) for %s\n",
1983 			f, (int)state, s_mp->sbm_cm.sbdev_path);
1984 
1985 		return (-1);
1986 	}
1987 	/*NOTREACHED*/
1988 }
1989 
1990 void
1991 dr_init_mem_unit(dr_mem_unit_t *mp)
1992 {
1993 	dr_state_t	new_state;
1994 
1995 
1996 	if (DR_DEV_IS_ATTACHED(&mp->sbm_cm)) {
1997 		new_state = DR_STATE_CONFIGURED;
1998 		mp->sbm_cm.sbdev_cond = SBD_COND_OK;
1999 	} else if (DR_DEV_IS_PRESENT(&mp->sbm_cm)) {
2000 		new_state = DR_STATE_CONNECTED;
2001 		mp->sbm_cm.sbdev_cond = SBD_COND_OK;
2002 	} else if (mp->sbm_cm.sbdev_id != (drmachid_t)0) {
2003 		new_state = DR_STATE_OCCUPIED;
2004 	} else {
2005 		new_state = DR_STATE_EMPTY;
2006 	}
2007 
2008 	if (DR_DEV_IS_PRESENT(&mp->sbm_cm))
2009 		dr_init_mem_unit_data(mp);
2010 
2011 	/* delay transition until fully initialized */
2012 	dr_device_transition(&mp->sbm_cm, new_state);
2013 }
2014 
2015 static void
2016 dr_init_mem_unit_data(dr_mem_unit_t *mp)
2017 {
2018 	drmachid_t	id = mp->sbm_cm.sbdev_id;
2019 	drmach_mem_info_t	minfo;
2020 	sbd_error_t	*err;
2021 	static fn_t	f = "dr_init_mem_unit_data";
2022 	update_membounds_t umb;
2023 
2024 	PR_MEM("%s...\n", f);
2025 
2026 	/* a little sanity checking */
2027 	ASSERT(mp->sbm_peer == NULL);
2028 	ASSERT(mp->sbm_flags == 0);
2029 
2030 	if (err = drmach_mem_get_info(id, &minfo)) {
2031 		DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
2032 		return;
2033 	}
2034 	mp->sbm_basepfn = _b64top(minfo.mi_basepa);
2035 	mp->sbm_npages = _b64top(minfo.mi_size);
2036 	mp->sbm_alignment_mask = _b64top(minfo.mi_alignment_mask);
2037 	mp->sbm_slice_size = minfo.mi_slice_size;
2038 
2039 	/*
2040 	 * Add memory to lgroup
2041 	 */
2042 	umb.u_board = mp->sbm_cm.sbdev_bp->b_num;
2043 	umb.u_base = (uint64_t)mp->sbm_basepfn << MMU_PAGESHIFT;
2044 	umb.u_len = (uint64_t)mp->sbm_npages << MMU_PAGESHIFT;
2045 
2046 	lgrp_plat_config(LGRP_CONFIG_MEM_ADD, (uintptr_t)&umb);
2047 
2048 	PR_MEM("%s: %s (basepfn = 0x%lx, npgs = %ld)\n",
2049 		f, mp->sbm_cm.sbdev_path, mp->sbm_basepfn, mp->sbm_npages);
2050 }
2051 
2052 static int
2053 dr_reserve_mem_spans(memhandle_t *mhp, struct memlist *ml)
2054 {
2055 	int		err;
2056 	pfn_t		base;
2057 	pgcnt_t		npgs;
2058 	struct memlist	*mc;
2059 	static fn_t	f = "dr_reserve_mem_spans";
2060 
2061 	PR_MEM("%s...\n", f);
2062 
2063 	/*
2064 	 * Walk the supplied memlist scheduling each span for removal
2065 	 * with kphysm_del_span.  It is possible that a span may intersect
2066 	 * an area occupied by the cage.
2067 	 */
2068 	for (mc = ml; mc != NULL; mc = mc->next) {
2069 		base = _b64top(mc->address);
2070 		npgs = _b64top(mc->size);
2071 
2072 		err = kphysm_del_span(*mhp, base, npgs);
2073 		if (err != KPHYSM_OK) {
2074 			cmn_err(CE_WARN, "%s memory reserve failed."
2075 				" unexpected kphysm_del_span return value %d;"
2076 				" basepfn=0x%lx npages=%ld",
2077 				f, err, base, npgs);
2078 
2079 			return (-1);
2080 		}
2081 	}
2082 
2083 	return (0);
2084 }
2085 
2086 #define	DR_SMT_NPREF_SETS	6
2087 #define	DR_SMT_NUNITS_PER_SET	MAX_BOARDS * MAX_MEM_UNITS_PER_BOARD
2088 
2089 /* debug counters */
2090 int dr_smt_realigned;
2091 int dr_smt_preference[DR_SMT_NPREF_SETS];
2092 
2093 #ifdef DEBUG
2094 uint_t dr_ignore_board; /* if bit[bnum-1] set, board won't be candidate */
2095 #endif
2096 
2097 /*
2098  * Find and reserve a copy/rename target board suitable for the
2099  * given source board.
2100  * All boards in the system are examined and categorized in relation to
2101  * their memory size versus the source board's memory size.  Order of
2102  * preference is:
2103  *	1st copy all source, source/target same size
2104  *	2nd copy all source, larger target
2105  * 	3rd copy nonrelocatable source span
2106  */
2107 static int
2108 dr_select_mem_target(dr_handle_t *hp,
2109 	dr_mem_unit_t *s_mp, struct memlist *s_ml)
2110 {
2111 	dr_target_pref_t preference; /* lower value is higher preference */
2112 	int		idx;
2113 	dr_mem_unit_t	**sets;
2114 
2115 	int		t_bd;
2116 	int		t_unit;
2117 	int		rv;
2118 	dr_board_t	*s_bp, *t_bp;
2119 	dr_mem_unit_t	*t_mp, *c_mp;
2120 	struct memlist	*d_ml, *t_ml, *ml, *b_ml, *x_ml = NULL;
2121 	memquery_t	s_mq = {0};
2122 	static fn_t	f = "dr_select_mem_target";
2123 
2124 	PR_MEM("%s...\n", f);
2125 
2126 	ASSERT(s_ml != NULL);
2127 
2128 	sets = GETSTRUCT(dr_mem_unit_t *, DR_SMT_NUNITS_PER_SET *
2129 	    DR_SMT_NPREF_SETS);
2130 
2131 	s_bp = hp->h_bd;
2132 	/* calculate the offset into the slice of the last source board pfn */
2133 	ASSERT(s_mp->sbm_npages != 0);
2134 
2135 	/*
2136 	 * Find non-relocatable span on source board.
2137 	 */
2138 	rv = kphysm_del_span_query(s_mp->sbm_basepfn, s_mp->sbm_npages, &s_mq);
2139 	if (rv != KPHYSM_OK) {
2140 		PR_MEM("%s: %s: unexpected kphysm_del_span_query"
2141 		    " return value %d; basepfn 0x%lx, npages %ld\n",
2142 		    f, s_mp->sbm_cm.sbdev_path, rv, s_mp->sbm_basepfn,
2143 		    s_mp->sbm_npages);
2144 		return (-1);
2145 	}
2146 
2147 	ASSERT(s_mq.phys_pages != 0);
2148 	ASSERT(s_mq.nonrelocatable != 0);
2149 
2150 	PR_MEM("%s: %s: nonrelocatable span (0x%lx..0x%lx)\n", f,
2151 	    s_mp->sbm_cm.sbdev_path, s_mq.first_nonrelocatable,
2152 	    s_mq.last_nonrelocatable);
2153 
2154 	/* break down s_ml if it contains dynamic segments */
2155 	b_ml = memlist_dup(s_ml);
2156 
2157 	for (ml = s_mp->sbm_dyn_segs; ml; ml = ml->next) {
2158 		b_ml = memlist_del_span(b_ml, ml->address, ml->size);
2159 		b_ml = memlist_cat_span(b_ml, ml->address, ml->size);
2160 	}
2161 
2162 
2163 	/*
2164 	 * Make one pass through all memory units on all boards
2165 	 * and categorize them with respect to the source board.
2166 	 */
2167 	for (t_bd = 0; t_bd < MAX_BOARDS; t_bd++) {
2168 		/*
2169 		 * The board structs are a contiguous array
2170 		 * so we take advantage of that to find the
2171 		 * correct board struct pointer for a given
2172 		 * board number.
2173 		 */
2174 		t_bp = dr_lookup_board(t_bd);
2175 
2176 		/* source board can not be its own target */
2177 		if (s_bp->b_num == t_bp->b_num)
2178 			continue;
2179 
2180 		for (t_unit = 0; t_unit < MAX_MEM_UNITS_PER_BOARD; t_unit++) {
2181 
2182 			t_mp = dr_get_mem_unit(t_bp, t_unit);
2183 
2184 			/* this memory node must be attached */
2185 			if (!DR_DEV_IS_ATTACHED(&t_mp->sbm_cm))
2186 				continue;
2187 
2188 			/* source unit can not be its own target */
2189 			if (s_mp == t_mp) {
2190 				/* catch this is debug kernels */
2191 				ASSERT(0);
2192 				continue;
2193 			}
2194 
2195 			/*
2196 			 * this memory node must not already be reserved
2197 			 * by some other memory delete operation.
2198 			 */
2199 			if (t_mp->sbm_flags & DR_MFLAG_RESERVED)
2200 				continue;
2201 
2202 			/* get target board memlist */
2203 			t_ml = dr_get_memlist(t_mp);
2204 			if (t_ml == NULL) {
2205 				cmn_err(CE_WARN, "%s: no memlist for"
2206 				    " mem-unit %d, board %d", f,
2207 				    t_mp->sbm_cm.sbdev_bp->b_num,
2208 				    t_mp->sbm_cm.sbdev_unum);
2209 				continue;
2210 			}
2211 
2212 			preference = dr_get_target_preference(hp, t_mp, s_mp,
2213 			    t_ml, s_ml, b_ml);
2214 
2215 			if (preference == DR_TP_INVALID)
2216 				continue;
2217 
2218 			dr_smt_preference[preference]++;
2219 
2220 			/* calculate index to start of preference set */
2221 			idx  = DR_SMT_NUNITS_PER_SET * preference;
2222 			/* calculate offset to respective element */
2223 			idx += t_bd * MAX_MEM_UNITS_PER_BOARD + t_unit;
2224 
2225 			ASSERT(idx < DR_SMT_NUNITS_PER_SET * DR_SMT_NPREF_SETS);
2226 			sets[idx] = t_mp;
2227 		}
2228 	}
2229 
2230 	if (b_ml != NULL)
2231 		memlist_delete(b_ml);
2232 
2233 	/*
2234 	 * NOTE: this would be a good place to sort each candidate
2235 	 * set in to some desired order, e.g. memory size in ascending
2236 	 * order.  Without an additional sorting step here, the order
2237 	 * within a set is ascending board number order.
2238 	 */
2239 
2240 	c_mp = NULL;
2241 	x_ml = NULL;
2242 	t_ml = NULL;
2243 	for (idx = 0; idx < DR_SMT_NUNITS_PER_SET * DR_SMT_NPREF_SETS; idx++) {
2244 		memquery_t mq;
2245 
2246 		preference = (dr_target_pref_t)(idx / DR_SMT_NUNITS_PER_SET);
2247 
2248 		ASSERT(preference != DR_TP_INVALID);
2249 
2250 		/* cleanup t_ml after previous pass */
2251 		if (t_ml != NULL) {
2252 			memlist_delete(t_ml);
2253 			t_ml = NULL;
2254 		}
2255 
2256 		/* get candidate target board mem unit */
2257 		t_mp = sets[idx];
2258 		if (t_mp == NULL)
2259 			continue;
2260 
2261 		/* get target board memlist */
2262 		t_ml = dr_get_memlist(t_mp);
2263 		if (t_ml == NULL) {
2264 			cmn_err(CE_WARN, "%s: no memlist for"
2265 				" mem-unit %d, board %d",
2266 				f,
2267 				t_mp->sbm_cm.sbdev_bp->b_num,
2268 				t_mp->sbm_cm.sbdev_unum);
2269 
2270 			continue;
2271 		}
2272 
2273 		PR_MEM("%s: checking for no-reloc in %s, "
2274 			" basepfn=0x%lx, npages=%ld\n",
2275 			f,
2276 			t_mp->sbm_cm.sbdev_path,
2277 			t_mp->sbm_basepfn,
2278 			t_mp->sbm_npages);
2279 
2280 		rv = dr_del_mlist_query(t_ml, &mq);
2281 		if (rv != KPHYSM_OK) {
2282 			PR_MEM("%s: kphysm_del_span_query:"
2283 				" unexpected return value %d\n", f, rv);
2284 
2285 			continue;
2286 		}
2287 
2288 		if (mq.nonrelocatable != 0) {
2289 			PR_MEM("%s: candidate %s has"
2290 				" nonrelocatable span [0x%lx..0x%lx]\n",
2291 				f,
2292 				t_mp->sbm_cm.sbdev_path,
2293 				mq.first_nonrelocatable,
2294 				mq.last_nonrelocatable);
2295 
2296 			continue;
2297 		}
2298 
2299 #ifdef DEBUG
2300 		/*
2301 		 * This is a debug tool for excluding certain boards
2302 		 * from being selected as a target board candidate.
2303 		 * dr_ignore_board is only tested by this driver.
2304 		 * It must be set with adb, obp, /etc/system or your
2305 		 * favorite debugger.
2306 		 */
2307 		if (dr_ignore_board &
2308 			(1 << (t_mp->sbm_cm.sbdev_bp->b_num - 1))) {
2309 			PR_MEM("%s: dr_ignore_board flag set,"
2310 				" ignoring %s as candidate\n",
2311 				f, t_mp->sbm_cm.sbdev_path);
2312 			continue;
2313 		}
2314 #endif
2315 
2316 		/*
2317 		 * Reserve excess source board memory, if any.
2318 		 *
2319 		 * Only the nonrelocatable source span will be copied
2320 		 * so schedule the rest of the source mem to be deleted.
2321 		 */
2322 		switch (preference) {
2323 		case DR_TP_NONRELOC:
2324 			/*
2325 			 * Get source copy memlist and use it to construct
2326 			 * delete memlist.
2327 			 */
2328 			d_ml = memlist_dup(s_ml);
2329 			x_ml = dr_get_copy_mlist(s_ml, t_ml, s_mp, t_mp);
2330 
2331 			/* XXX */
2332 			ASSERT(d_ml != NULL);
2333 			ASSERT(x_ml != NULL);
2334 
2335 			for (ml = x_ml; ml != NULL; ml = ml->next) {
2336 				d_ml = memlist_del_span(d_ml, ml->address,
2337 				    ml->size);
2338 			}
2339 
2340 			PR_MEM("%s: %s: reserving src brd memlist:\n", f,
2341 			    s_mp->sbm_cm.sbdev_path);
2342 			PR_MEMLIST_DUMP(d_ml);
2343 
2344 			/* reserve excess spans */
2345 			if (dr_reserve_mem_spans(&s_mp->sbm_memhandle,
2346 			    d_ml) != 0) {
2347 				/* likely more non-reloc pages appeared */
2348 				/* TODO: restart from top? */
2349 				continue;
2350 			}
2351 			break;
2352 		default:
2353 			d_ml = NULL;
2354 			break;
2355 		}
2356 
2357 		s_mp->sbm_flags |= DR_MFLAG_RESERVED;
2358 
2359 		/*
2360 		 * reserve all memory on target board.
2361 		 * NOTE: source board's memhandle is used.
2362 		 *
2363 		 * If this succeeds (eq 0), then target selection is
2364 		 * complete and all unwanted memory spans, both source and
2365 		 * target, have been reserved.  Loop is terminated.
2366 		 */
2367 		if (dr_reserve_mem_spans(&s_mp->sbm_memhandle, t_ml) == 0) {
2368 			PR_MEM("%s: %s: target board memory reserved\n",
2369 				f, t_mp->sbm_cm.sbdev_path);
2370 
2371 			/* a candidate target board is now reserved */
2372 			t_mp->sbm_flags |= DR_MFLAG_RESERVED;
2373 			c_mp = t_mp;
2374 
2375 			/* *** EXITING LOOP *** */
2376 			break;
2377 		}
2378 
2379 		/* did not successfully reserve the target board. */
2380 		PR_MEM("%s: could not reserve target %s\n",
2381 			f, t_mp->sbm_cm.sbdev_path);
2382 
2383 		/*
2384 		 * NOTE: an undo of the dr_reserve_mem_span work
2385 		 * will happen automatically when the memhandle
2386 		 * (s_mp->sbm_memhandle) is kphysm_del_release'd.
2387 		 */
2388 
2389 		s_mp->sbm_flags &= ~DR_MFLAG_RESERVED;
2390 	}
2391 
2392 	/* clean up after memlist editing logic */
2393 	if (x_ml != NULL)
2394 		memlist_delete(x_ml);
2395 
2396 	FREESTRUCT(sets, dr_mem_unit_t *, DR_SMT_NUNITS_PER_SET *
2397 	    DR_SMT_NPREF_SETS);
2398 
2399 	/*
2400 	 * c_mp will be NULL when the entire sets[] array
2401 	 * has been searched without reserving a target board.
2402 	 */
2403 	if (c_mp == NULL) {
2404 		PR_MEM("%s: %s: target selection failed.\n",
2405 			f, s_mp->sbm_cm.sbdev_path);
2406 
2407 		if (t_ml != NULL)
2408 			memlist_delete(t_ml);
2409 
2410 		return (-1);
2411 	}
2412 
2413 	PR_MEM("%s: found target %s for source %s\n",
2414 		f,
2415 		c_mp->sbm_cm.sbdev_path,
2416 		s_mp->sbm_cm.sbdev_path);
2417 
2418 	s_mp->sbm_peer = c_mp;
2419 	s_mp->sbm_flags |= DR_MFLAG_SOURCE;
2420 	s_mp->sbm_del_mlist = d_ml;	/* spans to be deleted, if any */
2421 	s_mp->sbm_mlist = s_ml;
2422 	s_mp->sbm_cm.sbdev_busy = 1;
2423 
2424 	c_mp->sbm_peer = s_mp;
2425 	c_mp->sbm_flags |= DR_MFLAG_TARGET;
2426 	c_mp->sbm_del_mlist = t_ml;	/* spans to be deleted */
2427 	c_mp->sbm_mlist = t_ml;
2428 	c_mp->sbm_cm.sbdev_busy = 1;
2429 
2430 	return (0);
2431 }
2432 
2433 /*
2434  * Returns target preference rank:
2435  *     -1 not a valid copy-rename target board
2436  *	0 copy all source, source/target same size
2437  *	1 copy all source, larger target
2438  * 	2 copy nonrelocatable source span
2439  */
2440 static dr_target_pref_t
2441 dr_get_target_preference(dr_handle_t *hp,
2442     dr_mem_unit_t *t_mp, dr_mem_unit_t *s_mp,
2443     struct memlist *t_ml, struct memlist *s_ml,
2444     struct memlist *b_ml)
2445 {
2446 	dr_target_pref_t preference;
2447 	struct memlist *s_nonreloc_ml = NULL;
2448 	drmachid_t t_id;
2449 	static fn_t	f = "dr_get_target_preference";
2450 
2451 	t_id = t_mp->sbm_cm.sbdev_bp->b_id;
2452 
2453 	/*
2454 	 * Can the entire source board be copied?
2455 	 */
2456 	if (dr_memlist_canfit(s_ml, t_ml, s_mp, t_mp)) {
2457 		if (s_mp->sbm_npages == t_mp->sbm_npages)
2458 			preference = DR_TP_SAME;	/* same size */
2459 		else
2460 			preference = DR_TP_LARGE;	/* larger target */
2461 	} else {
2462 		/*
2463 		 * Entire source won't fit so try non-relocatable memory only
2464 		 * (target aligned).
2465 		 */
2466 		s_nonreloc_ml = dr_get_nonreloc_mlist(b_ml, s_mp);
2467 		if (s_nonreloc_ml == NULL) {
2468 			PR_MEM("%s: dr_get_nonreloc_mlist failed\n", f);
2469 			preference = DR_TP_INVALID;
2470 		}
2471 		if (dr_memlist_canfit(s_nonreloc_ml, t_ml, s_mp, t_mp))
2472 			preference = DR_TP_NONRELOC;
2473 		else
2474 			preference = DR_TP_INVALID;
2475 	}
2476 
2477 	if (s_nonreloc_ml != NULL)
2478 		memlist_delete(s_nonreloc_ml);
2479 
2480 	/*
2481 	 * Force floating board preference lower than all other boards
2482 	 * if the force flag is present; otherwise disallow the board.
2483 	 */
2484 	if ((preference != DR_TP_INVALID) && drmach_board_is_floating(t_id)) {
2485 		if (dr_cmd_flags(hp) & SBD_FLAG_FORCE)
2486 			preference += DR_TP_FLOATING;
2487 		else
2488 			preference = DR_TP_INVALID;
2489 	}
2490 
2491 	PR_MEM("%s: %s preference=%d\n", f, t_mp->sbm_cm.sbdev_path,
2492 	    preference);
2493 
2494 	return (preference);
2495 }
2496 
2497 /*
2498  * Create a memlist representing the source memory that will be copied to
2499  * the target board.  The memory to be copied is the maximum amount that
2500  * will fit on the target board.
2501  */
2502 static struct memlist *
2503 dr_get_copy_mlist(struct memlist *s_mlist, struct memlist *t_mlist,
2504     dr_mem_unit_t *s_mp, dr_mem_unit_t *t_mp)
2505 {
2506 	struct memlist	*t_ml, *s_copy_ml, *s_del_ml, *ml, *x_ml;
2507 	uint64_t	s_slice_mask, s_slice_base;
2508 	uint64_t	t_slice_mask, t_slice_base;
2509 	static fn_t	f = "dr_get_copy_mlist";
2510 
2511 	ASSERT(s_mlist != NULL);
2512 	ASSERT(t_mlist != NULL);
2513 	ASSERT(t_mp->sbm_slice_size == s_mp->sbm_slice_size);
2514 
2515 	s_slice_mask = s_mp->sbm_slice_size - 1;
2516 	s_slice_base = s_mlist->address & ~s_slice_mask;
2517 
2518 	t_slice_mask = t_mp->sbm_slice_size - 1;
2519 	t_slice_base = t_mlist->address & ~t_slice_mask;
2520 
2521 	t_ml = memlist_dup(t_mlist);
2522 	s_del_ml = memlist_dup(s_mlist);
2523 	s_copy_ml = memlist_dup(s_mlist);
2524 
2525 	/* XXX */
2526 	ASSERT(t_ml != NULL);
2527 	ASSERT(s_del_ml != NULL);
2528 	ASSERT(s_copy_ml != NULL);
2529 
2530 	/*
2531 	 * To construct the source copy memlist:
2532 	 *
2533 	 * The target memlist is converted to the post-rename
2534 	 * source addresses.  This is the physical address range
2535 	 * the target will have after the copy-rename.  Overlaying
2536 	 * and deleting this from the current source memlist will
2537 	 * give the source delete memlist.  The copy memlist is
2538 	 * the reciprocal of the source delete memlist.
2539 	 */
2540 	for (ml = t_ml; ml != NULL; ml = ml->next) {
2541 		/*
2542 		 * Normalize relative to target slice base PA
2543 		 * in order to preseve slice offsets.
2544 		 */
2545 		ml->address -= t_slice_base;
2546 		/*
2547 		 * Convert to source slice PA address.
2548 		 */
2549 		ml->address += s_slice_base;
2550 	}
2551 
2552 	for (ml = t_ml; ml != NULL; ml = ml->next) {
2553 		s_del_ml = memlist_del_span(s_del_ml, ml->address, ml->size);
2554 	}
2555 
2556 	/*
2557 	 * Expand the delete mlist to fully include any dynamic segments
2558 	 * it intersects with.
2559 	 */
2560 	for (x_ml = NULL, ml = s_del_ml; ml != NULL; ml = ml->next) {
2561 		uint64_t del_base = ml->address;
2562 		uint64_t del_end = ml->address + ml->size;
2563 		struct memlist *dyn;
2564 
2565 		for (dyn = s_mp->sbm_dyn_segs; dyn != NULL; dyn = dyn->next) {
2566 			uint64_t dyn_base = dyn->address;
2567 			uint64_t dyn_end = dyn->address + dyn->size;
2568 
2569 			if (del_base > dyn_base && del_base < dyn_end)
2570 				del_base = dyn_base;
2571 
2572 			if (del_end > dyn_base && del_end < dyn_end)
2573 				del_end = dyn_end;
2574 		}
2575 
2576 		x_ml = memlist_cat_span(x_ml, del_base, del_end - del_base);
2577 	}
2578 
2579 	memlist_delete(s_del_ml);
2580 	s_del_ml = x_ml;
2581 
2582 	for (ml = s_del_ml; ml != NULL; ml = ml->next) {
2583 		s_copy_ml = memlist_del_span(s_copy_ml, ml->address, ml->size);
2584 	}
2585 
2586 	PR_MEM("%s: source delete mlist\n", f);
2587 	PR_MEMLIST_DUMP(s_del_ml);
2588 
2589 	PR_MEM("%s: source copy mlist\n", f);
2590 	PR_MEMLIST_DUMP(s_copy_ml);
2591 
2592 	memlist_delete(t_ml);
2593 	memlist_delete(s_del_ml);
2594 
2595 	return (s_copy_ml);
2596 }
2597 
2598 /*
2599  * Scan the non-relocatable spans on the source memory
2600  * and construct a minimum mlist that includes all non-reloc
2601  * memory subject to target alignment, and dynamic segment
2602  * constraints where only whole dynamic segments may be deleted.
2603  */
2604 static struct memlist *
2605 dr_get_nonreloc_mlist(struct memlist *s_ml, dr_mem_unit_t *s_mp)
2606 {
2607 	struct memlist	*x_ml = NULL;
2608 	struct memlist	*ml;
2609 	static fn_t	f = "dr_get_nonreloc_mlist";
2610 
2611 	PR_MEM("%s: checking for split of dyn seg list:\n", f);
2612 	PR_MEMLIST_DUMP(s_mp->sbm_dyn_segs);
2613 
2614 	for (ml = s_ml; ml; ml = ml->next) {
2615 		int rv;
2616 		uint64_t nr_base, nr_end;
2617 		memquery_t mq;
2618 		struct memlist *dyn;
2619 
2620 		rv = kphysm_del_span_query(
2621 			_b64top(ml->address), _b64top(ml->size), &mq);
2622 		if (rv) {
2623 			memlist_delete(x_ml);
2624 			return (NULL);
2625 		}
2626 
2627 		if (mq.nonrelocatable == 0)
2628 			continue;
2629 
2630 		PR_MEM("%s: non-reloc span: 0x%lx, 0x%lx (%lx, %lx)\n", f,
2631 			_ptob64(mq.first_nonrelocatable),
2632 			_ptob64(mq.last_nonrelocatable),
2633 			mq.first_nonrelocatable,
2634 			mq.last_nonrelocatable);
2635 
2636 		/*
2637 		 * Align the span at both ends to allow for possible
2638 		 * cage expansion.
2639 		 */
2640 		nr_base = _ptob64(mq.first_nonrelocatable);
2641 		nr_end = _ptob64(mq.last_nonrelocatable + 1);
2642 
2643 		PR_MEM("%s: adjusted non-reloc span: 0x%lx, 0x%lx\n",
2644 			f, nr_base, nr_end);
2645 
2646 		/*
2647 		 * Expand the non-reloc span to fully include any
2648 		 * dynamic segments it intersects with.
2649 		 */
2650 		for (dyn = s_mp->sbm_dyn_segs; dyn != NULL; dyn = dyn->next) {
2651 			uint64_t dyn_base = dyn->address;
2652 			uint64_t dyn_end = dyn->address + dyn->size;
2653 
2654 			if (nr_base > dyn_base && nr_base < dyn_end)
2655 				nr_base = dyn_base;
2656 
2657 			if (nr_end > dyn_base && nr_end < dyn_end)
2658 				nr_end = dyn_end;
2659 		}
2660 
2661 		x_ml = memlist_cat_span(x_ml, nr_base, nr_end - nr_base);
2662 	}
2663 
2664 	if (x_ml == NULL) {
2665 		PR_MEM("%s: source didn't have any non-reloc pages!\n", f);
2666 		return (NULL);
2667 	}
2668 
2669 	PR_MEM("%s: %s: edited source memlist:\n", f, s_mp->sbm_cm.sbdev_path);
2670 	PR_MEMLIST_DUMP(x_ml);
2671 
2672 	return (x_ml);
2673 }
2674 
2675 /*
2676  * Check if source memlist can fit in target memlist while maintaining
2677  * relative offsets within board.
2678  */
2679 static int
2680 dr_memlist_canfit(struct memlist *s_mlist, struct memlist *t_mlist,
2681     dr_mem_unit_t *s_mp, dr_mem_unit_t *t_mp)
2682 {
2683 	int		canfit = 0;
2684 	struct memlist	*s_ml, *t_ml, *ml;
2685 	uint64_t	s_slice_mask, t_slice_mask;
2686 	static fn_t	f = "dr_mlist_canfit";
2687 
2688 	s_ml = memlist_dup(s_mlist);
2689 	t_ml = memlist_dup(t_mlist);
2690 
2691 	if (s_ml == NULL || t_ml == NULL) {
2692 		cmn_err(CE_WARN, "%s: memlist_dup failed\n", f);
2693 		goto done;
2694 	}
2695 
2696 	s_slice_mask = s_mp->sbm_slice_size - 1;
2697 	t_slice_mask = t_mp->sbm_slice_size - 1;
2698 
2699 	/*
2700 	 * Normalize to slice relative offsets.
2701 	 */
2702 	for (ml = s_ml; ml; ml = ml->next)
2703 		ml->address &= s_slice_mask;
2704 
2705 	for (ml = t_ml; ml; ml = ml->next)
2706 		ml->address &= t_slice_mask;
2707 
2708 	canfit = memlist_canfit(s_ml, t_ml);
2709 done:
2710 	memlist_delete(s_ml);
2711 	memlist_delete(t_ml);
2712 
2713 	return (canfit);
2714 }
2715 
2716 /*
2717  * Memlist support.
2718  */
2719 
2720 /*
2721  * Determine whether the source memlist (s_mlist) will
2722  * fit into the target memlist (t_mlist) in terms of
2723  * size and holes.  Assumes the caller has normalized the
2724  * memlist physical addresses for comparison.
2725  */
2726 static int
2727 memlist_canfit(struct memlist *s_mlist, struct memlist *t_mlist)
2728 {
2729 	int		rv = 0;
2730 	struct memlist	*s_ml, *t_ml;
2731 
2732 	if ((s_mlist == NULL) || (t_mlist == NULL))
2733 		return (0);
2734 
2735 	s_ml = s_mlist;
2736 	for (t_ml = t_mlist; t_ml && s_ml; t_ml = t_ml->next) {
2737 		uint64_t	s_start, s_end;
2738 		uint64_t	t_start, t_end;
2739 
2740 		t_start = t_ml->address;
2741 		t_end = t_start + t_ml->size;
2742 
2743 		for (; s_ml; s_ml = s_ml->next) {
2744 			s_start = s_ml->address;
2745 			s_end = s_start + s_ml->size;
2746 
2747 			if ((s_start < t_start) || (s_end > t_end))
2748 				break;
2749 		}
2750 	}
2751 
2752 	/*
2753 	 * If we ran out of source memlist chunks that mean
2754 	 * we found a home for all of them.
2755 	 */
2756 	if (s_ml == NULL)
2757 		rv = 1;
2758 
2759 	return (rv);
2760 }
2761