xref: /titanic_41/usr/src/uts/sun4u/opl/io/dr_mem.c (revision 07d06da50d310a325b457d6330165aebab1e0064)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * DR memory support routines.
28  */
29 
30 #include <sys/note.h>
31 #include <sys/debug.h>
32 #include <sys/types.h>
33 #include <sys/errno.h>
34 #include <sys/param.h>
35 #include <sys/dditypes.h>
36 #include <sys/kmem.h>
37 #include <sys/conf.h>
38 #include <sys/ddi.h>
39 #include <sys/sunddi.h>
40 #include <sys/sunndi.h>
41 #include <sys/ddi_impldefs.h>
42 #include <sys/ndi_impldefs.h>
43 #include <sys/sysmacros.h>
44 #include <sys/machsystm.h>
45 #include <sys/spitregs.h>
46 #include <sys/cpuvar.h>
47 #include <sys/promif.h>
48 #include <vm/seg_kmem.h>
49 #include <sys/lgrp.h>
50 #include <sys/platform_module.h>
51 
52 #include <vm/page.h>
53 
54 #include <sys/dr.h>
55 #include <sys/dr_util.h>
56 #include <sys/drmach.h>
57 #include <sys/kobj.h>
58 
59 extern struct memlist	*phys_install;
60 extern vnode_t		*retired_pages;
61 
62 /* TODO: push this reference below drmach line */
63 extern int		kcage_on;
64 
65 /* for the DR*INTERNAL_ERROR macros.  see sys/dr.h. */
66 static char *dr_ie_fmt = "dr_mem.c %d";
67 
68 typedef enum {
69 	DR_TP_INVALID = -1,
70 	DR_TP_SAME,
71 	DR_TP_LARGE,
72 	DR_TP_NONRELOC,
73 	DR_TP_FLOATING
74 } dr_target_pref_t;
75 
76 static int		dr_post_detach_mem_unit(dr_mem_unit_t *mp);
77 static int		dr_reserve_mem_spans(memhandle_t *mhp,
78 				struct memlist *mlist);
79 static int		dr_select_mem_target(dr_handle_t *hp,
80 				dr_mem_unit_t *mp, struct memlist *ml);
81 static void		dr_init_mem_unit_data(dr_mem_unit_t *mp);
82 static struct memlist	*dr_memlist_del_retired_pages(struct memlist *ml);
83 static dr_target_pref_t	dr_get_target_preference(dr_handle_t *hp,
84 				dr_mem_unit_t *t_mp, dr_mem_unit_t *s_mp,
85 				struct memlist *s_ml, struct memlist *x_ml,
86 				struct memlist *b_ml);
87 
88 static int		memlist_canfit(struct memlist *s_mlist,
89 				struct memlist *t_mlist);
90 static int		dr_del_mlist_query(struct memlist *mlist,
91 				memquery_t *mp);
92 static struct memlist	*dr_get_copy_mlist(struct memlist *s_ml,
93 				struct memlist *t_ml, dr_mem_unit_t *s_mp,
94 				dr_mem_unit_t *t_mp);
95 static struct memlist	*dr_get_nonreloc_mlist(struct memlist *s_ml,
96 				dr_mem_unit_t *s_mp);
97 static int		dr_memlist_canfit(struct memlist *s_mlist,
98 				struct memlist *t_mlist, dr_mem_unit_t *s_mp,
99 				dr_mem_unit_t *t_mp);
100 
101 /*
102  * dr_mem_unit_t.sbm_flags
103  */
104 #define	DR_MFLAG_RESERVED	0x01	/* mem unit reserved for delete */
105 #define	DR_MFLAG_SOURCE		0x02	/* source brd of copy/rename op */
106 #define	DR_MFLAG_TARGET		0x04	/* target brd of copy/rename op */
107 #define	DR_MFLAG_RELOWNER	0x20	/* memory release (delete) owner */
108 #define	DR_MFLAG_RELDONE	0x40	/* memory release (delete) done */
109 
110 /* helper macros */
111 #define	_ptob64(p) ((uint64_t)(p) << PAGESHIFT)
112 #define	_b64top(b) ((pgcnt_t)((b) >> PAGESHIFT))
113 
114 static struct memlist *
115 dr_get_memlist(dr_mem_unit_t *mp)
116 {
117 	struct memlist	*mlist = NULL;
118 	sbd_error_t	*err;
119 	static fn_t	f = "dr_get_memlist";
120 
121 	PR_MEM("%s for %s...\n", f, mp->sbm_cm.sbdev_path);
122 
123 	/*
124 	 * Return cached memlist, if present.
125 	 * This memlist will be present following an
126 	 * unconfigure (a.k.a: detach) of this memunit.
127 	 * It should only be used in the case were a configure
128 	 * is bringing this memunit back in without going
129 	 * through the disconnect and connect states.
130 	 */
131 	if (mp->sbm_mlist) {
132 		PR_MEM("%s: found cached memlist\n", f);
133 
134 		mlist = memlist_dup(mp->sbm_mlist);
135 	} else {
136 		uint64_t basepa = _ptob64(mp->sbm_basepfn);
137 
138 		/* attempt to construct a memlist using phys_install */
139 
140 		/* round down to slice base address */
141 		basepa &= ~(mp->sbm_slice_size - 1);
142 
143 		/* get a copy of phys_install to edit */
144 		memlist_read_lock();
145 		mlist = memlist_dup(phys_install);
146 		memlist_read_unlock();
147 
148 		/* trim lower irrelevant span */
149 		if (mlist)
150 			mlist = memlist_del_span(mlist, 0ull, basepa);
151 
152 		/* trim upper irrelevant span */
153 		if (mlist) {
154 			uint64_t endpa;
155 
156 			basepa += mp->sbm_slice_size;
157 			endpa = _ptob64(physmax + 1);
158 			if (endpa > basepa)
159 				mlist = memlist_del_span(
160 				    mlist, basepa,
161 				    endpa - basepa);
162 		}
163 
164 		if (mlist) {
165 			/* successfully built a memlist */
166 			PR_MEM("%s: derived memlist from phys_install\n", f);
167 		}
168 
169 		/* if no mlist yet, try platform layer */
170 		if (!mlist) {
171 			err = drmach_mem_get_memlist(
172 			    mp->sbm_cm.sbdev_id, &mlist);
173 			if (err) {
174 				DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
175 				mlist = NULL; /* paranoia */
176 			}
177 		}
178 	}
179 
180 	PR_MEM("%s: memlist for %s\n", f, mp->sbm_cm.sbdev_path);
181 	PR_MEMLIST_DUMP(mlist);
182 
183 	return (mlist);
184 }
185 
186 typedef struct {
187 	kcondvar_t cond;
188 	kmutex_t lock;
189 	int error;
190 	int done;
191 } dr_release_mem_sync_t;
192 
193 /*
194  * Memory has been logically removed by the time this routine is called.
195  */
196 static void
197 dr_mem_del_done(void *arg, int error)
198 {
199 	dr_release_mem_sync_t *ds = arg;
200 
201 	mutex_enter(&ds->lock);
202 	ds->error = error;
203 	ds->done = 1;
204 	cv_signal(&ds->cond);
205 	mutex_exit(&ds->lock);
206 }
207 
208 /*
209  * When we reach here the memory being drained should have
210  * already been reserved in dr_pre_release_mem().
211  * Our only task here is to kick off the "drain" and wait
212  * for it to finish.
213  */
214 void
215 dr_release_mem(dr_common_unit_t *cp)
216 {
217 	dr_mem_unit_t	*mp = (dr_mem_unit_t *)cp;
218 	int		err;
219 	dr_release_mem_sync_t rms;
220 	static fn_t	f = "dr_release_mem";
221 
222 	/* check that this memory unit has been reserved */
223 	if (!(mp->sbm_flags & DR_MFLAG_RELOWNER)) {
224 		DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
225 		return;
226 	}
227 
228 	bzero((void *) &rms, sizeof (rms));
229 
230 	mutex_init(&rms.lock, NULL, MUTEX_DRIVER, NULL);
231 	cv_init(&rms.cond, NULL, CV_DRIVER, NULL);
232 
233 	mutex_enter(&rms.lock);
234 	err = kphysm_del_start(mp->sbm_memhandle,
235 	    dr_mem_del_done, (void *) &rms);
236 	if (err == KPHYSM_OK) {
237 		/* wait for completion or interrupt */
238 		while (!rms.done) {
239 			if (cv_wait_sig(&rms.cond, &rms.lock) == 0) {
240 				/* then there is a pending UNIX signal */
241 				(void) kphysm_del_cancel(mp->sbm_memhandle);
242 
243 				/* wait for completion */
244 				while (!rms.done)
245 					cv_wait(&rms.cond, &rms.lock);
246 			}
247 		}
248 		/* get the result of the memory delete operation */
249 		err = rms.error;
250 	}
251 	mutex_exit(&rms.lock);
252 
253 	cv_destroy(&rms.cond);
254 	mutex_destroy(&rms.lock);
255 
256 	if (err != KPHYSM_OK) {
257 		int e_code;
258 
259 		switch (err) {
260 			case KPHYSM_ENOWORK:
261 				e_code = ESBD_NOERROR;
262 				break;
263 
264 			case KPHYSM_EHANDLE:
265 			case KPHYSM_ESEQUENCE:
266 				e_code = ESBD_INTERNAL;
267 				break;
268 
269 			case KPHYSM_ENOTVIABLE:
270 				e_code = ESBD_MEM_NOTVIABLE;
271 				break;
272 
273 			case KPHYSM_EREFUSED:
274 				e_code = ESBD_MEM_REFUSED;
275 				break;
276 
277 			case KPHYSM_ENONRELOC:
278 				e_code = ESBD_MEM_NONRELOC;
279 				break;
280 
281 			case KPHYSM_ECANCELLED:
282 				e_code = ESBD_MEM_CANCELLED;
283 				break;
284 
285 			case KPHYSM_ERESOURCE:
286 				e_code = ESBD_MEMFAIL;
287 				break;
288 
289 			default:
290 				cmn_err(CE_WARN,
291 				    "%s: unexpected kphysm error code %d,"
292 				    " id 0x%p",
293 				    f, err, mp->sbm_cm.sbdev_id);
294 
295 				e_code = ESBD_IO;
296 				break;
297 		}
298 
299 		if (e_code != ESBD_NOERROR) {
300 			dr_dev_err(CE_WARN, &mp->sbm_cm, e_code);
301 		}
302 	}
303 }
304 
305 void
306 dr_attach_mem(dr_handle_t *hp, dr_common_unit_t *cp)
307 {
308 	_NOTE(ARGUNUSED(hp))
309 
310 	dr_mem_unit_t	*mp = (dr_mem_unit_t *)cp;
311 	struct memlist	*ml, *mc;
312 	sbd_error_t	*err;
313 	static fn_t	f = "dr_attach_mem";
314 
315 	PR_MEM("%s...\n", f);
316 
317 	dr_lock_status(hp->h_bd);
318 	err = drmach_configure(cp->sbdev_id, 0);
319 	dr_unlock_status(hp->h_bd);
320 	if (err) {
321 		DRERR_SET_C(&cp->sbdev_error, &err);
322 		return;
323 	}
324 
325 	ml = dr_get_memlist(mp);
326 	for (mc = ml; mc; mc = mc->next) {
327 		int		 rv;
328 		sbd_error_t	*err;
329 
330 		rv = kphysm_add_memory_dynamic(
331 		    (pfn_t)(mc->address >> PAGESHIFT),
332 		    (pgcnt_t)(mc->size >> PAGESHIFT));
333 		if (rv != KPHYSM_OK) {
334 			/*
335 			 * translate kphysm error and
336 			 * store in devlist error
337 			 */
338 			switch (rv) {
339 			case KPHYSM_ERESOURCE:
340 				rv = ESBD_NOMEM;
341 				break;
342 
343 			case KPHYSM_EFAULT:
344 				rv = ESBD_FAULT;
345 				break;
346 
347 			default:
348 				rv = ESBD_INTERNAL;
349 				break;
350 			}
351 
352 			if (rv == ESBD_INTERNAL) {
353 				DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
354 			} else
355 				dr_dev_err(CE_WARN, &mp->sbm_cm, rv);
356 			break;
357 		}
358 
359 		err = drmach_mem_add_span(
360 		    mp->sbm_cm.sbdev_id, mc->address, mc->size);
361 		if (err) {
362 			DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
363 			break;
364 		}
365 	}
366 
367 	memlist_delete(ml);
368 
369 	/* back out if configure failed */
370 	if (mp->sbm_cm.sbdev_error != NULL) {
371 		dr_lock_status(hp->h_bd);
372 		err = drmach_unconfigure(cp->sbdev_id, 0);
373 		if (err)
374 			sbd_err_clear(&err);
375 		dr_unlock_status(hp->h_bd);
376 	}
377 }
378 
379 static struct memlist *
380 dr_memlist_del_retired_pages(struct memlist *mlist)
381 {
382 	page_t		*pp;
383 	pfn_t		pfn;
384 	kmutex_t	*vphm;
385 	vnode_t		*vp = retired_pages;
386 	static fn_t	f = "dr_memlist_del_retired_pages";
387 
388 	vphm = page_vnode_mutex(vp);
389 	mutex_enter(vphm);
390 
391 	PR_MEM("%s\n", f);
392 
393 	if ((pp = vp->v_pages) == NULL) {
394 		mutex_exit(vphm);
395 		return (mlist);
396 	}
397 
398 	do {
399 		ASSERT(pp != NULL);
400 		ASSERT(pp->p_vnode == retired_pages);
401 
402 		if (!page_try_reclaim_lock(pp, SE_SHARED, SE_RETIRED))
403 			continue;
404 
405 		pfn = page_pptonum(pp);
406 
407 		/*
408 		 * Page retirement currently breaks large pages into PAGESIZE
409 		 * pages. If this changes, need to remove the assert and deal
410 		 * with different page sizes.
411 		 */
412 		ASSERT(pp->p_szc == 0);
413 
414 		if (address_in_memlist(mlist, ptob(pfn), PAGESIZE)) {
415 			mlist = memlist_del_span(mlist, ptob(pfn), PAGESIZE);
416 			PR_MEM("deleted retired page 0x%lx (pfn 0x%lx) "
417 			    "from memlist\n", ptob(pfn), pfn);
418 		}
419 
420 		page_unlock(pp);
421 	} while ((pp = pp->p_vpnext) != vp->v_pages);
422 
423 	mutex_exit(vphm);
424 
425 	return (mlist);
426 }
427 
428 static int
429 dr_move_memory(dr_handle_t *hp, dr_mem_unit_t *s_mp, dr_mem_unit_t *t_mp)
430 {
431 	int		rv = -1;
432 	time_t		 copytime;
433 	drmachid_t	 cr_id;
434 	dr_sr_handle_t	*srhp = NULL;
435 	dr_board_t	*t_bp, *s_bp;
436 	struct memlist	*c_ml, *d_ml;
437 	sbd_error_t	*err;
438 	static fn_t	 f = "dr_move_memory";
439 
440 	PR_MEM("%s: (INLINE) moving memory from %s to %s\n",
441 	    f,
442 	    s_mp->sbm_cm.sbdev_path,
443 	    t_mp->sbm_cm.sbdev_path);
444 
445 	ASSERT(s_mp->sbm_flags & DR_MFLAG_SOURCE);
446 	ASSERT(s_mp->sbm_peer == t_mp);
447 	ASSERT(s_mp->sbm_mlist);
448 
449 	ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET);
450 	ASSERT(t_mp->sbm_peer == s_mp);
451 
452 	/*
453 	 * create a memlist of spans to copy by removing
454 	 * the spans that have been deleted, if any, from
455 	 * the full source board memlist.  s_mp->sbm_del_mlist
456 	 * will be NULL if there were no spans deleted from
457 	 * the source board.
458 	 */
459 	c_ml = memlist_dup(s_mp->sbm_mlist);
460 	d_ml = s_mp->sbm_del_mlist;
461 	while (d_ml != NULL) {
462 		c_ml = memlist_del_span(c_ml, d_ml->address, d_ml->size);
463 		d_ml = d_ml->next;
464 	}
465 
466 	/*
467 	 * Remove retired pages from the copy list. The page content
468 	 * need not be copied since the pages are no longer in use.
469 	 */
470 	PR_MEM("%s: copy list before removing retired pages (if any):\n", f);
471 	PR_MEMLIST_DUMP(c_ml);
472 
473 	c_ml = dr_memlist_del_retired_pages(c_ml);
474 
475 	PR_MEM("%s: copy list after removing retired pages:\n", f);
476 	PR_MEMLIST_DUMP(c_ml);
477 
478 	/*
479 	 * With parallel copy, it shouldn't make a difference which
480 	 * CPU is the actual master during copy-rename since all
481 	 * CPUs participate in the parallel copy anyway.
482 	 */
483 	affinity_set(CPU_CURRENT);
484 
485 	err = drmach_copy_rename_init(
486 	    t_mp->sbm_cm.sbdev_id, s_mp->sbm_cm.sbdev_id, c_ml, &cr_id);
487 	if (err) {
488 		DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
489 		affinity_clear();
490 		memlist_delete(c_ml);
491 		return (-1);
492 	}
493 
494 	srhp = dr_get_sr_handle(hp);
495 	ASSERT(srhp);
496 
497 	copytime = ddi_get_lbolt();
498 
499 	/* Quiesce the OS.  */
500 	if (dr_suspend(srhp)) {
501 		cmn_err(CE_WARN, "%s: failed to quiesce OS"
502 		    " for copy-rename", f);
503 
504 		err = drmach_copy_rename_fini(cr_id);
505 		if (err) {
506 			/*
507 			 * no error is expected since the program has
508 			 * not yet run.
509 			 */
510 
511 			/* catch this in debug kernels */
512 			ASSERT(0);
513 
514 			sbd_err_clear(&err);
515 		}
516 
517 		/* suspend error reached via hp */
518 		s_mp->sbm_cm.sbdev_error = hp->h_err;
519 		hp->h_err = NULL;
520 		goto done;
521 	}
522 
523 	drmach_copy_rename(cr_id);
524 
525 	/* Resume the OS.  */
526 	dr_resume(srhp);
527 
528 	copytime = ddi_get_lbolt() - copytime;
529 
530 	if (err = drmach_copy_rename_fini(cr_id))
531 		goto done;
532 
533 	/*
534 	 * Rename memory for lgroup.
535 	 * Source and target board numbers are packaged in arg.
536 	 */
537 	s_bp = s_mp->sbm_cm.sbdev_bp;
538 	t_bp = t_mp->sbm_cm.sbdev_bp;
539 
540 	lgrp_plat_config(LGRP_CONFIG_MEM_RENAME,
541 	    (uintptr_t)(s_bp->b_num | (t_bp->b_num << 16)));
542 
543 
544 	PR_MEM("%s: copy-rename elapsed time = %ld ticks (%ld secs)\n",
545 	    f, copytime, copytime / hz);
546 
547 	rv = 0;
548 done:
549 	if (srhp)
550 		dr_release_sr_handle(srhp);
551 	if (err)
552 		DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
553 	affinity_clear();
554 
555 	return (rv);
556 }
557 
558 /*
559  * If detaching node contains memory that is "non-permanent"
560  * then the memory adr's are simply cleared.  If the memory
561  * is non-relocatable, then do a copy-rename.
562  */
563 void
564 dr_detach_mem(dr_handle_t *hp, dr_common_unit_t *cp)
565 {
566 	int			rv = 0;
567 	dr_mem_unit_t		*s_mp = (dr_mem_unit_t *)cp;
568 	dr_mem_unit_t		*t_mp;
569 	dr_state_t		state;
570 	static fn_t		f = "dr_detach_mem";
571 
572 	PR_MEM("%s...\n", f);
573 
574 	/* lookup target mem unit and target board structure, if any */
575 	if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
576 		t_mp = s_mp->sbm_peer;
577 		ASSERT(t_mp != NULL);
578 		ASSERT(t_mp->sbm_peer == s_mp);
579 	} else {
580 		t_mp = NULL;
581 	}
582 
583 	/* verify mem unit's state is UNREFERENCED */
584 	state = s_mp->sbm_cm.sbdev_state;
585 	if (state != DR_STATE_UNREFERENCED) {
586 		dr_dev_err(CE_IGNORE, &s_mp->sbm_cm, ESBD_STATE);
587 		return;
588 	}
589 
590 	/* verify target mem unit's state is UNREFERENCED, if any */
591 	if (t_mp != NULL) {
592 		state = t_mp->sbm_cm.sbdev_state;
593 		if (state != DR_STATE_UNREFERENCED) {
594 			dr_dev_err(CE_IGNORE, &t_mp->sbm_cm, ESBD_STATE);
595 			return;
596 		}
597 	}
598 
599 	/*
600 	 * If there is no target board (no copy/rename was needed), then
601 	 * we're done!
602 	 */
603 	if (t_mp == NULL) {
604 		sbd_error_t *err;
605 		/*
606 		 * Reprogram interconnect hardware and disable
607 		 * memory controllers for memory node that's going away.
608 		 */
609 
610 		err = drmach_mem_disable(s_mp->sbm_cm.sbdev_id);
611 		if (err) {
612 			DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
613 			rv = -1;
614 		}
615 	} else {
616 		rv = dr_move_memory(hp, s_mp, t_mp);
617 		PR_MEM("%s: %s memory COPY-RENAME (board %d -> %d)\n",
618 		    f,
619 		    rv ? "FAILED" : "COMPLETED",
620 		    s_mp->sbm_cm.sbdev_bp->b_num,
621 		    t_mp->sbm_cm.sbdev_bp->b_num);
622 
623 		if (rv != 0)
624 			(void) dr_cancel_mem(s_mp);
625 	}
626 
627 	if (rv == 0) {
628 		sbd_error_t *err;
629 
630 		dr_lock_status(hp->h_bd);
631 		err = drmach_unconfigure(s_mp->sbm_cm.sbdev_id, 0);
632 		dr_unlock_status(hp->h_bd);
633 		if (err)
634 			sbd_err_clear(&err);
635 	}
636 }
637 
638 /*
639  * This routine acts as a wrapper for kphysm_del_span_query in order to
640  * support potential memory holes in a board's physical address space.
641  * It calls kphysm_del_span_query for each node in a memlist and accumulates
642  * the results in *mp.
643  */
644 static int
645 dr_del_mlist_query(struct memlist *mlist, memquery_t *mp)
646 {
647 	struct memlist	*ml;
648 	int		 rv = 0;
649 
650 
651 	if (mlist == NULL)
652 		cmn_err(CE_WARN, "dr_del_mlist_query: mlist=NULL\n");
653 
654 	mp->phys_pages = 0;
655 	mp->managed = 0;
656 	mp->nonrelocatable = 0;
657 	mp->first_nonrelocatable = (pfn_t)-1;	/* XXX */
658 	mp->last_nonrelocatable = 0;
659 
660 	for (ml = mlist; ml; ml = ml->next) {
661 		memquery_t mq;
662 
663 		rv = kphysm_del_span_query(
664 		    _b64top(ml->address), _b64top(ml->size), &mq);
665 		if (rv)
666 			break;
667 
668 		mp->phys_pages += mq.phys_pages;
669 		mp->managed += mq.managed;
670 		mp->nonrelocatable += mq.nonrelocatable;
671 
672 		if (mq.nonrelocatable != 0) {
673 			if (mq.first_nonrelocatable < mp->first_nonrelocatable)
674 				mp->first_nonrelocatable =
675 				    mq.first_nonrelocatable;
676 			if (mq.last_nonrelocatable > mp->last_nonrelocatable)
677 				mp->last_nonrelocatable =
678 				    mq.last_nonrelocatable;
679 		}
680 	}
681 
682 	if (mp->nonrelocatable == 0)
683 		mp->first_nonrelocatable = 0;	/* XXX */
684 
685 	return (rv);
686 }
687 
688 /*
689  * NOTE: This routine is only partially smart about multiple
690  *	 mem-units.  Need to make mem-status structure smart
691  *	 about them also.
692  */
693 int
694 dr_mem_status(dr_handle_t *hp, dr_devset_t devset, sbd_dev_stat_t *dsp)
695 {
696 	int		m, mix;
697 	memdelstat_t	mdst;
698 	memquery_t	mq;
699 	dr_board_t	*bp;
700 	dr_mem_unit_t	*mp;
701 	sbd_mem_stat_t	*msp;
702 	static fn_t	f = "dr_mem_status";
703 
704 	bp = hp->h_bd;
705 	devset &= DR_DEVS_PRESENT(bp);
706 
707 	for (m = mix = 0; m < MAX_MEM_UNITS_PER_BOARD; m++) {
708 		int		rv;
709 		sbd_error_t	*err;
710 		drmach_status_t	 pstat;
711 		dr_mem_unit_t	*p_mp;
712 
713 		if (DEVSET_IN_SET(devset, SBD_COMP_MEM, m) == 0)
714 			continue;
715 
716 		mp = dr_get_mem_unit(bp, m);
717 
718 		if (mp->sbm_cm.sbdev_state == DR_STATE_EMPTY) {
719 			/* present, but not fully initialized */
720 			continue;
721 		}
722 
723 		if (mp->sbm_cm.sbdev_id == (drmachid_t)0)
724 			continue;
725 
726 		/* fetch platform status */
727 		err = drmach_status(mp->sbm_cm.sbdev_id, &pstat);
728 		if (err) {
729 			DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
730 			continue;
731 		}
732 
733 		msp = &dsp->d_mem;
734 		bzero((caddr_t)msp, sizeof (*msp));
735 
736 		(void) strncpy(msp->ms_cm.c_id.c_name, pstat.type,
737 		    sizeof (msp->ms_cm.c_id.c_name));
738 		msp->ms_cm.c_id.c_type = mp->sbm_cm.sbdev_type;
739 		msp->ms_cm.c_id.c_unit = SBD_NULL_UNIT;
740 		msp->ms_cm.c_cond = mp->sbm_cm.sbdev_cond;
741 		msp->ms_cm.c_busy = mp->sbm_cm.sbdev_busy | pstat.busy;
742 		msp->ms_cm.c_time = mp->sbm_cm.sbdev_time;
743 		msp->ms_cm.c_ostate = mp->sbm_cm.sbdev_ostate;
744 
745 		msp->ms_totpages = mp->sbm_npages;
746 		msp->ms_basepfn = mp->sbm_basepfn;
747 		msp->ms_pageslost = mp->sbm_pageslost;
748 		msp->ms_cage_enabled = kcage_on;
749 
750 		if (mp->sbm_flags & DR_MFLAG_RESERVED)
751 			p_mp = mp->sbm_peer;
752 		else
753 			p_mp = NULL;
754 
755 		if (p_mp == NULL) {
756 			msp->ms_peer_is_target = 0;
757 			msp->ms_peer_ap_id[0] = '\0';
758 		} else if (p_mp->sbm_flags & DR_MFLAG_RESERVED) {
759 			char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
760 			char *minor;
761 
762 			/*
763 			 * b_dip doesn't have to be held for ddi_pathname()
764 			 * because the board struct (dr_board_t) will be
765 			 * destroyed before b_dip detaches.
766 			 */
767 			(void) ddi_pathname(bp->b_dip, path);
768 			minor = strchr(p_mp->sbm_cm.sbdev_path, ':');
769 
770 			(void) snprintf(msp->ms_peer_ap_id,
771 			    sizeof (msp->ms_peer_ap_id), "%s%s",
772 			    path, (minor == NULL) ? "" : minor);
773 
774 			kmem_free(path, MAXPATHLEN);
775 
776 			if (p_mp->sbm_flags & DR_MFLAG_TARGET)
777 				msp->ms_peer_is_target = 1;
778 		}
779 
780 		if (mp->sbm_flags & DR_MFLAG_RELOWNER)
781 			rv = kphysm_del_status(mp->sbm_memhandle, &mdst);
782 		else
783 			rv = KPHYSM_EHANDLE;	/* force 'if' to fail */
784 
785 		if (rv == KPHYSM_OK) {
786 			/*
787 			 * Any pages above managed is "free",
788 			 * i.e. it's collected.
789 			 */
790 			msp->ms_detpages += (uint_t)(mdst.collected +
791 			    mdst.phys_pages - mdst.managed);
792 		} else {
793 			/*
794 			 * If we're UNREFERENCED or UNCONFIGURED,
795 			 * then the number of detached pages is
796 			 * however many pages are on the board.
797 			 * I.e. detached = not in use by OS.
798 			 */
799 			switch (msp->ms_cm.c_ostate) {
800 			/*
801 			 * changed to use cfgadm states
802 			 *
803 			 * was:
804 			 *	case DR_STATE_UNREFERENCED:
805 			 *	case DR_STATE_UNCONFIGURED:
806 			 */
807 			case SBD_STAT_UNCONFIGURED:
808 				msp->ms_detpages = msp->ms_totpages;
809 				break;
810 
811 			default:
812 				break;
813 			}
814 		}
815 
816 		/*
817 		 * kphysm_del_span_query can report non-reloc pages = total
818 		 * pages for memory that is not yet configured
819 		 */
820 		if (mp->sbm_cm.sbdev_state != DR_STATE_UNCONFIGURED) {
821 			struct memlist *ml;
822 
823 			ml = dr_get_memlist(mp);
824 			rv = ml ? dr_del_mlist_query(ml, &mq) : -1;
825 			memlist_delete(ml);
826 
827 			if (rv == KPHYSM_OK) {
828 				msp->ms_managed_pages = mq.managed;
829 				msp->ms_noreloc_pages = mq.nonrelocatable;
830 				msp->ms_noreloc_first =
831 				    mq.first_nonrelocatable;
832 				msp->ms_noreloc_last =
833 				    mq.last_nonrelocatable;
834 				msp->ms_cm.c_sflags = 0;
835 				if (mq.nonrelocatable) {
836 					SBD_SET_SUSPEND(SBD_CMD_UNCONFIGURE,
837 					    msp->ms_cm.c_sflags);
838 				}
839 			} else {
840 				PR_MEM("%s: kphysm_del_span_query() = %d\n",
841 				    f, rv);
842 			}
843 		}
844 
845 		/*
846 		 * Check source unit state during copy-rename
847 		 */
848 		if ((mp->sbm_flags & DR_MFLAG_SOURCE) &&
849 		    (mp->sbm_cm.sbdev_state == DR_STATE_UNREFERENCED ||
850 		    mp->sbm_cm.sbdev_state == DR_STATE_RELEASE))
851 			msp->ms_cm.c_ostate = SBD_STAT_CONFIGURED;
852 
853 		mix++;
854 		dsp++;
855 	}
856 
857 	return (mix);
858 }
859 
860 int
861 dr_pre_attach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
862 {
863 	_NOTE(ARGUNUSED(hp))
864 
865 	int		err_flag = 0;
866 	int		d;
867 	sbd_error_t	*err;
868 	static fn_t	f = "dr_pre_attach_mem";
869 
870 	PR_MEM("%s...\n", f);
871 
872 	for (d = 0; d < devnum; d++) {
873 		dr_mem_unit_t	*mp = (dr_mem_unit_t *)devlist[d];
874 		dr_state_t	state;
875 
876 		cmn_err(CE_CONT, "OS configure %s", mp->sbm_cm.sbdev_path);
877 
878 		state = mp->sbm_cm.sbdev_state;
879 		switch (state) {
880 		case DR_STATE_UNCONFIGURED:
881 			PR_MEM("%s: recovering from UNCONFIG for %s\n",
882 			    f,
883 			    mp->sbm_cm.sbdev_path);
884 
885 			/* use memlist cached by dr_post_detach_mem_unit */
886 			ASSERT(mp->sbm_mlist != NULL);
887 			PR_MEM("%s: re-configuring cached memlist for %s:\n",
888 			    f, mp->sbm_cm.sbdev_path);
889 			PR_MEMLIST_DUMP(mp->sbm_mlist);
890 
891 			/* kphysm del handle should be have been freed */
892 			ASSERT((mp->sbm_flags & DR_MFLAG_RELOWNER) == 0);
893 
894 			/*FALLTHROUGH*/
895 
896 		case DR_STATE_CONNECTED:
897 			PR_MEM("%s: reprogramming mem hardware on %s\n",
898 			    f, mp->sbm_cm.sbdev_bp->b_path);
899 
900 			PR_MEM("%s: enabling %s\n",
901 			    f, mp->sbm_cm.sbdev_path);
902 
903 			err = drmach_mem_enable(mp->sbm_cm.sbdev_id);
904 			if (err) {
905 				DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
906 				err_flag = 1;
907 			}
908 			break;
909 
910 		default:
911 			dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_STATE);
912 			err_flag = 1;
913 			break;
914 		}
915 
916 		/* exit for loop if error encountered */
917 		if (err_flag)
918 			break;
919 	}
920 
921 	return (err_flag ? -1 : 0);
922 }
923 
924 static void
925 dr_update_mc_memory()
926 {
927 	void		(*mc_update_mlist)(void);
928 
929 	/*
930 	 * mc-opl is configured during drmach_mem_new but the memory
931 	 * has not been added to phys_install at that time.
932 	 * we must inform mc-opl to update the mlist after we
933 	 * attach or detach a system board.
934 	 */
935 
936 	mc_update_mlist = (void (*)(void))
937 	    modgetsymvalue("opl_mc_update_mlist", 0);
938 
939 	if (mc_update_mlist != NULL) {
940 		(*mc_update_mlist)();
941 	}
942 }
943 
944 int
945 dr_post_attach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
946 {
947 	_NOTE(ARGUNUSED(hp))
948 
949 	int		d;
950 	static fn_t	f = "dr_post_attach_mem";
951 
952 	PR_MEM("%s...\n", f);
953 
954 	for (d = 0; d < devnum; d++) {
955 		dr_mem_unit_t	*mp = (dr_mem_unit_t *)devlist[d];
956 		struct memlist	*mlist, *ml;
957 
958 		mlist = dr_get_memlist(mp);
959 		if (mlist == NULL) {
960 			/* OPL supports memoryless board */
961 			continue;
962 		}
963 
964 		/*
965 		 * Verify the memory really did successfully attach
966 		 * by checking for its existence in phys_install.
967 		 */
968 		memlist_read_lock();
969 		if (memlist_intersect(phys_install, mlist) == 0) {
970 			memlist_read_unlock();
971 
972 			DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
973 
974 			PR_MEM("%s: %s memlist not in phys_install",
975 			    f, mp->sbm_cm.sbdev_path);
976 
977 			memlist_delete(mlist);
978 			continue;
979 		}
980 		memlist_read_unlock();
981 
982 		for (ml = mlist; ml != NULL; ml = ml->next) {
983 			sbd_error_t *err;
984 
985 			err = drmach_mem_add_span(
986 			    mp->sbm_cm.sbdev_id,
987 			    ml->address,
988 			    ml->size);
989 			if (err)
990 				DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
991 		}
992 
993 		memlist_delete(mlist);
994 
995 		/*
996 		 * Destroy cached memlist, if any.
997 		 * There will be a cached memlist in sbm_mlist if
998 		 * this board is being configured directly after
999 		 * an unconfigure.
1000 		 * To support this transition, dr_post_detach_mem
1001 		 * left a copy of the last known memlist in sbm_mlist.
1002 		 * This memlist could differ from any derived from
1003 		 * hardware if while this memunit was last configured
1004 		 * the system detected and deleted bad pages from
1005 		 * phys_install.  The location of those bad pages
1006 		 * will be reflected in the cached memlist.
1007 		 */
1008 		if (mp->sbm_mlist) {
1009 			memlist_delete(mp->sbm_mlist);
1010 			mp->sbm_mlist = NULL;
1011 		}
1012 	}
1013 
1014 	dr_update_mc_memory();
1015 
1016 	return (0);
1017 }
1018 
1019 int
1020 dr_pre_detach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
1021 {
1022 	_NOTE(ARGUNUSED(hp))
1023 
1024 	int d;
1025 
1026 	for (d = 0; d < devnum; d++) {
1027 		dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d];
1028 
1029 		cmn_err(CE_CONT, "OS unconfigure %s", mp->sbm_cm.sbdev_path);
1030 	}
1031 
1032 	return (0);
1033 }
1034 
1035 int
1036 dr_post_detach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
1037 {
1038 	_NOTE(ARGUNUSED(hp))
1039 
1040 	int		d, rv;
1041 	static fn_t	f = "dr_post_detach_mem";
1042 
1043 	PR_MEM("%s...\n", f);
1044 
1045 	rv = 0;
1046 	for (d = 0; d < devnum; d++) {
1047 		dr_mem_unit_t	*mp = (dr_mem_unit_t *)devlist[d];
1048 
1049 		ASSERT(mp->sbm_cm.sbdev_bp == hp->h_bd);
1050 
1051 		if (dr_post_detach_mem_unit(mp))
1052 			rv = -1;
1053 	}
1054 	dr_update_mc_memory();
1055 
1056 	return (rv);
1057 }
1058 
1059 static void
1060 dr_add_memory_spans(dr_mem_unit_t *mp, struct memlist *ml)
1061 {
1062 	static fn_t	f = "dr_add_memory_spans";
1063 
1064 	PR_MEM("%s...", f);
1065 	PR_MEMLIST_DUMP(ml);
1066 
1067 #ifdef DEBUG
1068 	memlist_read_lock();
1069 	if (memlist_intersect(phys_install, ml)) {
1070 		PR_MEM("%s:WARNING: memlist intersects with phys_install\n", f);
1071 	}
1072 	memlist_read_unlock();
1073 #endif
1074 
1075 	for (; ml; ml = ml->next) {
1076 		pfn_t		 base;
1077 		pgcnt_t		 npgs;
1078 		int		 rv;
1079 		sbd_error_t	*err;
1080 
1081 		base = _b64top(ml->address);
1082 		npgs = _b64top(ml->size);
1083 
1084 		rv = kphysm_add_memory_dynamic(base, npgs);
1085 
1086 		err = drmach_mem_add_span(
1087 		    mp->sbm_cm.sbdev_id,
1088 		    ml->address,
1089 		    ml->size);
1090 
1091 		if (err)
1092 			DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
1093 
1094 		if (rv != KPHYSM_OK) {
1095 			cmn_err(CE_WARN, "%s:"
1096 			    " unexpected kphysm_add_memory_dynamic"
1097 			    " return value %d;"
1098 			    " basepfn=0x%lx, npages=%ld\n",
1099 			    f, rv, base, npgs);
1100 
1101 			continue;
1102 		}
1103 	}
1104 }
1105 
1106 static int
1107 memlist_touch(struct memlist *ml, uint64_t add)
1108 {
1109 	while (ml != NULL) {
1110 		if ((add == ml->address) ||
1111 		    (add == (ml->address + ml->size)))
1112 			return (1);
1113 		ml = ml->next;
1114 	}
1115 	return (0);
1116 }
1117 
1118 static sbd_error_t *
1119 dr_process_excess_mlist(dr_mem_unit_t *s_mp,
1120 	dr_mem_unit_t *t_mp, struct memlist *t_excess_mlist)
1121 {
1122 	struct memlist	*ml;
1123 	sbd_error_t	*err;
1124 	static fn_t	f = "dr_process_excess_mlist";
1125 	uint64_t	new_pa, nbytes;
1126 	int rv;
1127 
1128 	err = NULL;
1129 
1130 	/*
1131 	 * After the small <-> big copy-rename,
1132 	 * the original address space for the
1133 	 * source board may have excess to be
1134 	 * deleted. This is a case different
1135 	 * from the big->small excess source
1136 	 * memory case listed below.
1137 	 * Remove s_mp->sbm_del_mlist from
1138 	 * the kernel cage glist.
1139 	 */
1140 	for (ml = s_mp->sbm_del_mlist; ml;
1141 	    ml = ml->next) {
1142 		PR_MEM("%s: delete small<->big copy-"
1143 		    "rename source excess memory", f);
1144 		PR_MEMLIST_DUMP(ml);
1145 
1146 		err = drmach_mem_del_span(
1147 		    s_mp->sbm_cm.sbdev_id,
1148 		    ml->address, ml->size);
1149 		if (err)
1150 			DRERR_SET_C(&s_mp->
1151 			    sbm_cm.sbdev_error, &err);
1152 		ASSERT(err == NULL);
1153 	}
1154 
1155 	PR_MEM("%s: adding back remaining portion"
1156 	    " of %s, memlist:\n",
1157 	    f, t_mp->sbm_cm.sbdev_path);
1158 	PR_MEMLIST_DUMP(t_excess_mlist);
1159 
1160 	for (ml = t_excess_mlist; ml; ml = ml->next) {
1161 		struct memlist ml0;
1162 
1163 		ml0.address = ml->address;
1164 		ml0.size = ml->size;
1165 		ml0.next = ml0.prev = NULL;
1166 
1167 		/*
1168 		 * If the memory object is 256 MB aligned (max page size
1169 		 * on OPL, it will not be coalesced to the adjacent memory
1170 		 * chunks.  The coalesce logic assumes contiguous page
1171 		 * structures for contiguous memory and we hit panic.
1172 		 * For anything less than 256 MB alignment, we have
1173 		 * to make sure that it is not adjacent to anything.
1174 		 * If the new chunk is adjacent to phys_install, we
1175 		 * truncate it to 4MB boundary.  4 MB is somewhat
1176 		 * arbitrary.  However we do not want to create
1177 		 * very small segments because they can cause problem.
1178 		 * The extreme case of 8K segment will fail
1179 		 * kphysm_add_memory_dynamic(), e.g.
1180 		 */
1181 		if ((ml->address & (MH_MPSS_ALIGNMENT - 1)) ||
1182 		    (ml->size & (MH_MPSS_ALIGNMENT - 1))) {
1183 
1184 		memlist_read_lock();
1185 		rv = memlist_touch(phys_install, ml0.address);
1186 		memlist_read_unlock();
1187 
1188 		if (rv) {
1189 			new_pa = roundup(ml0.address + 1, MH_MIN_ALIGNMENT);
1190 			nbytes = (new_pa -  ml0.address);
1191 			if (nbytes >= ml0.size) {
1192 				t_mp->sbm_dyn_segs =
1193 				    memlist_del_span(t_mp->sbm_dyn_segs,
1194 				    ml0.address, ml0.size);
1195 				continue;
1196 			}
1197 			t_mp->sbm_dyn_segs =
1198 			    memlist_del_span(t_mp->sbm_dyn_segs,
1199 			    ml0.address, nbytes);
1200 			ml0.size -= nbytes;
1201 			ml0.address = new_pa;
1202 		}
1203 
1204 		if (ml0.size == 0) {
1205 			continue;
1206 		}
1207 
1208 		memlist_read_lock();
1209 		rv = memlist_touch(phys_install, ml0.address + ml0.size);
1210 		memlist_read_unlock();
1211 
1212 		if (rv) {
1213 			new_pa = rounddown(ml0.address + ml0.size - 1,
1214 			    MH_MIN_ALIGNMENT);
1215 			nbytes = (ml0.address + ml0.size - new_pa);
1216 			if (nbytes >= ml0.size) {
1217 				t_mp->sbm_dyn_segs =
1218 				    memlist_del_span(t_mp->sbm_dyn_segs,
1219 				    ml0.address, ml0.size);
1220 				continue;
1221 			}
1222 			t_mp->sbm_dyn_segs =
1223 			    memlist_del_span(t_mp->sbm_dyn_segs,
1224 			    new_pa, nbytes);
1225 			ml0.size -= nbytes;
1226 		}
1227 
1228 		if (ml0.size > 0) {
1229 			dr_add_memory_spans(s_mp, &ml0);
1230 		}
1231 		} else if (ml0.size > 0) {
1232 			dr_add_memory_spans(s_mp, &ml0);
1233 		}
1234 	}
1235 	memlist_delete(t_excess_mlist);
1236 	return (err);
1237 }
1238 
1239 static int
1240 dr_post_detach_mem_unit(dr_mem_unit_t *s_mp)
1241 {
1242 	uint64_t	sz = s_mp->sbm_slice_size;
1243 	uint64_t	sm = sz - 1;
1244 	/* old and new below refer to PAs before and after copy-rename */
1245 	uint64_t	s_old_basepa, s_new_basepa;
1246 	uint64_t	t_old_basepa, t_new_basepa;
1247 	dr_mem_unit_t	*t_mp, *x_mp;
1248 	drmach_mem_info_t	minfo;
1249 	struct memlist	*ml;
1250 	struct memlist	*t_excess_mlist;
1251 	int		rv;
1252 	int		s_excess_mem_deleted = 0;
1253 	sbd_error_t	*err;
1254 	static fn_t	f = "dr_post_detach_mem_unit";
1255 
1256 	PR_MEM("%s...\n", f);
1257 
1258 	/* s_mp->sbm_del_mlist could be NULL, meaning no deleted spans */
1259 	PR_MEM("%s: %s: deleted memlist (EMPTY maybe okay):\n",
1260 	    f, s_mp->sbm_cm.sbdev_path);
1261 	PR_MEMLIST_DUMP(s_mp->sbm_del_mlist);
1262 
1263 	/* sanity check */
1264 	ASSERT(s_mp->sbm_del_mlist == NULL ||
1265 	    (s_mp->sbm_flags & DR_MFLAG_RELDONE) != 0);
1266 
1267 	if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
1268 		t_mp = s_mp->sbm_peer;
1269 		ASSERT(t_mp != NULL);
1270 		ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET);
1271 		ASSERT(t_mp->sbm_peer == s_mp);
1272 
1273 		ASSERT(t_mp->sbm_flags & DR_MFLAG_RELDONE);
1274 		ASSERT(t_mp->sbm_del_mlist);
1275 
1276 		PR_MEM("%s: target %s: deleted memlist:\n",
1277 		    f, t_mp->sbm_cm.sbdev_path);
1278 		PR_MEMLIST_DUMP(t_mp->sbm_del_mlist);
1279 	} else {
1280 		/* this is no target unit */
1281 		t_mp = NULL;
1282 	}
1283 
1284 	/*
1285 	 * Verify the memory really did successfully detach
1286 	 * by checking for its non-existence in phys_install.
1287 	 */
1288 	rv = 0;
1289 	memlist_read_lock();
1290 	if (s_mp->sbm_flags & DR_MFLAG_RELDONE) {
1291 		x_mp = s_mp;
1292 		rv = memlist_intersect(phys_install, x_mp->sbm_del_mlist);
1293 	}
1294 	if (rv == 0 && t_mp && (t_mp->sbm_flags & DR_MFLAG_RELDONE)) {
1295 		x_mp = t_mp;
1296 		rv = memlist_intersect(phys_install, x_mp->sbm_del_mlist);
1297 	}
1298 	memlist_read_unlock();
1299 
1300 	if (rv) {
1301 		/* error: memlist still in phys_install */
1302 		DR_DEV_INTERNAL_ERROR(&x_mp->sbm_cm);
1303 	}
1304 
1305 	/*
1306 	 * clean mem unit state and bail out if an error has been recorded.
1307 	 */
1308 	rv = 0;
1309 	if (s_mp->sbm_cm.sbdev_error) {
1310 		PR_MEM("%s: %s flags=%x", f,
1311 		    s_mp->sbm_cm.sbdev_path, s_mp->sbm_flags);
1312 		DR_DEV_CLR_UNREFERENCED(&s_mp->sbm_cm);
1313 		DR_DEV_CLR_RELEASED(&s_mp->sbm_cm);
1314 		dr_device_transition(&s_mp->sbm_cm, DR_STATE_CONFIGURED);
1315 		rv = -1;
1316 	}
1317 	if (t_mp != NULL && t_mp->sbm_cm.sbdev_error != NULL) {
1318 		PR_MEM("%s: %s flags=%x", f,
1319 		    s_mp->sbm_cm.sbdev_path, s_mp->sbm_flags);
1320 		DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm);
1321 		DR_DEV_CLR_RELEASED(&t_mp->sbm_cm);
1322 		dr_device_transition(&t_mp->sbm_cm, DR_STATE_CONFIGURED);
1323 		rv = -1;
1324 	}
1325 	if (rv)
1326 		goto cleanup;
1327 
1328 	s_old_basepa = _ptob64(s_mp->sbm_basepfn);
1329 	err = drmach_mem_get_info(s_mp->sbm_cm.sbdev_id, &minfo);
1330 	ASSERT(err == NULL);
1331 	s_new_basepa = minfo.mi_basepa;
1332 
1333 	PR_MEM("%s:s_old_basepa: 0x%lx\n", f, s_old_basepa);
1334 	PR_MEM("%s:s_new_basepa: 0x%lx\n", f, s_new_basepa);
1335 
1336 	if (t_mp != NULL) {
1337 		struct memlist *s_copy_mlist;
1338 
1339 		t_old_basepa = _ptob64(t_mp->sbm_basepfn);
1340 		err = drmach_mem_get_info(t_mp->sbm_cm.sbdev_id, &minfo);
1341 		ASSERT(err == NULL);
1342 		t_new_basepa = minfo.mi_basepa;
1343 
1344 		PR_MEM("%s:t_old_basepa: 0x%lx\n", f, t_old_basepa);
1345 		PR_MEM("%s:t_new_basepa: 0x%lx\n", f, t_new_basepa);
1346 
1347 		/*
1348 		 * Construct copy list with original source addresses.
1349 		 * Used to add back excess target mem.
1350 		 */
1351 		s_copy_mlist = memlist_dup(s_mp->sbm_mlist);
1352 		for (ml = s_mp->sbm_del_mlist; ml; ml = ml->next) {
1353 			s_copy_mlist = memlist_del_span(s_copy_mlist,
1354 			    ml->address, ml->size);
1355 		}
1356 
1357 		PR_MEM("%s: source copy list:\n:", f);
1358 		PR_MEMLIST_DUMP(s_copy_mlist);
1359 
1360 		/*
1361 		 * We had to swap mem-units, so update
1362 		 * memlists accordingly with new base
1363 		 * addresses.
1364 		 */
1365 		for (ml = t_mp->sbm_mlist; ml; ml = ml->next) {
1366 			ml->address -= t_old_basepa;
1367 			ml->address += t_new_basepa;
1368 		}
1369 
1370 		/*
1371 		 * There is no need to explicitly rename the target delete
1372 		 * memlist, because sbm_del_mlist and sbm_mlist always
1373 		 * point to the same memlist for a copy/rename operation.
1374 		 */
1375 		ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
1376 
1377 		PR_MEM("%s: renamed target memlist and delete memlist:\n", f);
1378 		PR_MEMLIST_DUMP(t_mp->sbm_mlist);
1379 
1380 		for (ml = s_mp->sbm_mlist; ml; ml = ml->next) {
1381 			ml->address -= s_old_basepa;
1382 			ml->address += s_new_basepa;
1383 		}
1384 
1385 		PR_MEM("%s: renamed source memlist:\n", f);
1386 		PR_MEMLIST_DUMP(s_mp->sbm_mlist);
1387 		PR_MEM("%s: source dyn seg memlist:\n", f);
1388 		PR_MEMLIST_DUMP(s_mp->sbm_dyn_segs);
1389 
1390 		/*
1391 		 * Keep track of dynamically added segments
1392 		 * since they cannot be split if we need to delete
1393 		 * excess source memory later for this board.
1394 		 */
1395 		if (t_mp->sbm_dyn_segs)
1396 			memlist_delete(t_mp->sbm_dyn_segs);
1397 		t_mp->sbm_dyn_segs = s_mp->sbm_dyn_segs;
1398 		s_mp->sbm_dyn_segs = NULL;
1399 
1400 		/*
1401 		 * Add back excess target memory.
1402 		 * Subtract out the portion of the target memory
1403 		 * node that was taken over by the source memory
1404 		 * node.
1405 		 */
1406 		t_excess_mlist = memlist_dup(t_mp->sbm_mlist);
1407 		for (ml = s_copy_mlist; ml; ml = ml->next) {
1408 			t_excess_mlist =
1409 			    memlist_del_span(t_excess_mlist,
1410 			    ml->address, ml->size);
1411 		}
1412 		PR_MEM("%s: excess memlist:\n", f);
1413 		PR_MEMLIST_DUMP(t_excess_mlist);
1414 
1415 		/*
1416 		 * Update dynamically added segs
1417 		 */
1418 		for (ml = s_mp->sbm_del_mlist; ml; ml = ml->next) {
1419 			t_mp->sbm_dyn_segs =
1420 			    memlist_del_span(t_mp->sbm_dyn_segs,
1421 			    ml->address, ml->size);
1422 		}
1423 		for (ml = t_excess_mlist; ml; ml = ml->next) {
1424 			t_mp->sbm_dyn_segs =
1425 			    memlist_cat_span(t_mp->sbm_dyn_segs,
1426 			    ml->address, ml->size);
1427 		}
1428 		PR_MEM("%s: %s: updated dynamic seg list:\n",
1429 		    f, t_mp->sbm_cm.sbdev_path);
1430 		PR_MEMLIST_DUMP(t_mp->sbm_dyn_segs);
1431 
1432 		if (t_excess_mlist != NULL) {
1433 			err = dr_process_excess_mlist(s_mp, t_mp,
1434 			    t_excess_mlist);
1435 			s_excess_mem_deleted = 1;
1436 		}
1437 
1438 		memlist_delete(s_copy_mlist);
1439 
1440 #ifdef DEBUG
1441 		/*
1442 		 * s_mp->sbm_del_mlist may still needed
1443 		 */
1444 		PR_MEM("%s: source delete memeory flag %d",
1445 		    f, s_excess_mem_deleted);
1446 		PR_MEM("%s: source delete memlist", f);
1447 		PR_MEMLIST_DUMP(s_mp->sbm_del_mlist);
1448 #endif
1449 
1450 	}
1451 
1452 	if (t_mp != NULL) {
1453 		/* delete target's entire address space */
1454 		err = drmach_mem_del_span(
1455 		    t_mp->sbm_cm.sbdev_id, t_old_basepa & ~ sm, sz);
1456 		if (err)
1457 			DRERR_SET_C(&t_mp->sbm_cm.sbdev_error, &err);
1458 		ASSERT(err == NULL);
1459 
1460 		/*
1461 		 * After the copy/rename, the original address space
1462 		 * for the source board (which is now located on the
1463 		 * target board) may now have some excess to be deleted.
1464 		 * Those excess memory on the source board are kept in
1465 		 * source board's sbm_del_mlist
1466 		 */
1467 		for (ml = s_mp->sbm_del_mlist; !s_excess_mem_deleted && ml;
1468 		    ml = ml->next) {
1469 			PR_MEM("%s: delete source excess memory", f);
1470 			PR_MEMLIST_DUMP(ml);
1471 
1472 			err = drmach_mem_del_span(s_mp->sbm_cm.sbdev_id,
1473 			    ml->address, ml->size);
1474 			if (err)
1475 				DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
1476 			ASSERT(err == NULL);
1477 		}
1478 
1479 	} else {
1480 		/* delete board's entire address space */
1481 		err = drmach_mem_del_span(s_mp->sbm_cm.sbdev_id,
1482 		    s_old_basepa & ~ sm, sz);
1483 		if (err)
1484 			DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
1485 		ASSERT(err == NULL);
1486 	}
1487 
1488 cleanup:
1489 	/* clean up target mem unit */
1490 	if (t_mp != NULL) {
1491 		memlist_delete(t_mp->sbm_del_mlist);
1492 		/* no need to delete sbm_mlist, it shares sbm_del_mlist */
1493 
1494 		t_mp->sbm_del_mlist = NULL;
1495 		t_mp->sbm_mlist = NULL;
1496 		t_mp->sbm_peer = NULL;
1497 		t_mp->sbm_flags = 0;
1498 		t_mp->sbm_cm.sbdev_busy = 0;
1499 		dr_init_mem_unit_data(t_mp);
1500 
1501 	}
1502 	if (t_mp != NULL && t_mp->sbm_cm.sbdev_error == NULL) {
1503 		/*
1504 		 * now that copy/rename has completed, undo this
1505 		 * work that was done in dr_release_mem_done.
1506 		 */
1507 		DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm);
1508 		DR_DEV_CLR_RELEASED(&t_mp->sbm_cm);
1509 		dr_device_transition(&t_mp->sbm_cm, DR_STATE_CONFIGURED);
1510 	}
1511 
1512 	/*
1513 	 * clean up (source) board's mem unit structure.
1514 	 * NOTE: sbm_mlist is retained if no error has been record (in other
1515 	 * words, when s_mp->sbm_cm.sbdev_error is NULL). This memlist is
1516 	 * referred to elsewhere as the cached memlist.  The cached memlist
1517 	 * is used to re-attach (configure back in) this memunit from the
1518 	 * unconfigured state.  The memlist is retained because it may
1519 	 * represent bad pages that were detected while the memory was
1520 	 * configured into the OS.  The OS deletes bad pages from phys_install.
1521 	 * Those deletes, if any, will be represented in the cached mlist.
1522 	 */
1523 	if (s_mp->sbm_del_mlist && s_mp->sbm_del_mlist != s_mp->sbm_mlist)
1524 		memlist_delete(s_mp->sbm_del_mlist);
1525 
1526 	if (s_mp->sbm_cm.sbdev_error && s_mp->sbm_mlist) {
1527 		memlist_delete(s_mp->sbm_mlist);
1528 		s_mp->sbm_mlist = NULL;
1529 	}
1530 
1531 	if (s_mp->sbm_dyn_segs != NULL && s_mp->sbm_cm.sbdev_error == 0) {
1532 		memlist_delete(s_mp->sbm_dyn_segs);
1533 		s_mp->sbm_dyn_segs = NULL;
1534 	}
1535 
1536 	s_mp->sbm_del_mlist = NULL;
1537 	s_mp->sbm_peer = NULL;
1538 	s_mp->sbm_flags = 0;
1539 	s_mp->sbm_cm.sbdev_busy = 0;
1540 	dr_init_mem_unit_data(s_mp);
1541 
1542 	PR_MEM("%s: cached memlist for %s:", f, s_mp->sbm_cm.sbdev_path);
1543 	PR_MEMLIST_DUMP(s_mp->sbm_mlist);
1544 
1545 	return (0);
1546 }
1547 
1548 /*
1549  * Successful return from this function will have the memory
1550  * handle in bp->b_dev[..mem-unit...].sbm_memhandle allocated
1551  * and waiting.  This routine's job is to select the memory that
1552  * actually has to be released (detached) which may not necessarily
1553  * be the same memory node that came in in devlist[],
1554  * i.e. a copy-rename is needed.
1555  */
1556 int
1557 dr_pre_release_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
1558 {
1559 	int		d;
1560 	int		err_flag = 0;
1561 	static fn_t	f = "dr_pre_release_mem";
1562 
1563 	PR_MEM("%s...\n", f);
1564 
1565 	for (d = 0; d < devnum; d++) {
1566 		dr_mem_unit_t	*mp = (dr_mem_unit_t *)devlist[d];
1567 		int		rv;
1568 		memquery_t	mq;
1569 		struct memlist	*ml;
1570 
1571 		if (mp->sbm_cm.sbdev_error) {
1572 			err_flag = 1;
1573 			continue;
1574 		} else if (!kcage_on) {
1575 			dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_KCAGE_OFF);
1576 			err_flag = 1;
1577 			continue;
1578 		}
1579 
1580 		if (mp->sbm_flags & DR_MFLAG_RESERVED) {
1581 			/*
1582 			 * Board is currently involved in a delete
1583 			 * memory operation. Can't detach this guy until
1584 			 * that operation completes.
1585 			 */
1586 			dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_INVAL);
1587 			err_flag = 1;
1588 			break;
1589 		}
1590 
1591 		/* flags should be clean at this time */
1592 		ASSERT(mp->sbm_flags == 0);
1593 
1594 		ASSERT(mp->sbm_mlist == NULL);
1595 		ASSERT(mp->sbm_del_mlist == NULL);
1596 		if (mp->sbm_mlist != NULL) {
1597 			memlist_delete(mp->sbm_mlist);
1598 			mp->sbm_mlist = NULL;
1599 		}
1600 
1601 		ml = dr_get_memlist(mp);
1602 		if (ml == NULL) {
1603 			err_flag = 1;
1604 			PR_MEM("%s: no memlist found for %s\n",
1605 			    f, mp->sbm_cm.sbdev_path);
1606 			continue;
1607 		}
1608 
1609 		/*
1610 		 * Check whether the detaching memory requires a
1611 		 * copy-rename.
1612 		 */
1613 		ASSERT(mp->sbm_npages != 0);
1614 
1615 		rv = dr_del_mlist_query(ml, &mq);
1616 		if (rv != KPHYSM_OK) {
1617 			memlist_delete(ml);
1618 			DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
1619 			err_flag = 1;
1620 			break;
1621 		}
1622 
1623 		if (mq.nonrelocatable != 0) {
1624 			if (!(dr_cmd_flags(hp) &
1625 			    (SBD_FLAG_FORCE | SBD_FLAG_QUIESCE_OKAY))) {
1626 				memlist_delete(ml);
1627 				/* caller wasn't prompted for a suspend */
1628 				dr_dev_err(CE_WARN, &mp->sbm_cm,
1629 				    ESBD_QUIESCE_REQD);
1630 				err_flag = 1;
1631 				break;
1632 			}
1633 		}
1634 
1635 		/* allocate a kphysm handle */
1636 		rv = kphysm_del_gethandle(&mp->sbm_memhandle);
1637 		if (rv != KPHYSM_OK) {
1638 			memlist_delete(ml);
1639 
1640 			DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
1641 			err_flag = 1;
1642 			break;
1643 		}
1644 		mp->sbm_flags |= DR_MFLAG_RELOWNER;
1645 
1646 		if ((mq.nonrelocatable != 0) ||
1647 		    dr_reserve_mem_spans(&mp->sbm_memhandle, ml)) {
1648 			/*
1649 			 * Either the detaching memory node contains
1650 			 * non-reloc memory or we failed to reserve the
1651 			 * detaching memory node (which did _not_ have
1652 			 * any non-reloc memory, i.e. some non-reloc mem
1653 			 * got onboard).
1654 			 */
1655 
1656 			if (dr_select_mem_target(hp, mp, ml)) {
1657 				int rv;
1658 
1659 				/*
1660 				 * We had no luck locating a target
1661 				 * memory node to be the recipient of
1662 				 * the non-reloc memory on the node
1663 				 * we're trying to detach.
1664 				 * Clean up be disposing the mem handle
1665 				 * and the mem list.
1666 				 */
1667 				rv = kphysm_del_release(mp->sbm_memhandle);
1668 				if (rv != KPHYSM_OK) {
1669 					/*
1670 					 * can do nothing but complain
1671 					 * and hope helpful for debug
1672 					 */
1673 					cmn_err(CE_WARN, "%s: unexpected"
1674 					    " kphysm_del_release return"
1675 					    " value %d",
1676 					    f, rv);
1677 				}
1678 				mp->sbm_flags &= ~DR_MFLAG_RELOWNER;
1679 
1680 				memlist_delete(ml);
1681 
1682 				/* make sure sbm_flags is clean */
1683 				ASSERT(mp->sbm_flags == 0);
1684 
1685 				dr_dev_err(CE_WARN,
1686 				    &mp->sbm_cm, ESBD_NO_TARGET);
1687 
1688 				err_flag = 1;
1689 				break;
1690 			}
1691 
1692 			/*
1693 			 * ml is not memlist_delete'd here because
1694 			 * it has been assigned to mp->sbm_mlist
1695 			 * by dr_select_mem_target.
1696 			 */
1697 		} else {
1698 			/* no target needed to detach this board */
1699 			mp->sbm_flags |= DR_MFLAG_RESERVED;
1700 			mp->sbm_peer = NULL;
1701 			mp->sbm_del_mlist = ml;
1702 			mp->sbm_mlist = ml;
1703 			mp->sbm_cm.sbdev_busy = 1;
1704 		}
1705 #ifdef DEBUG
1706 		ASSERT(mp->sbm_mlist != NULL);
1707 
1708 		if (mp->sbm_flags & DR_MFLAG_SOURCE) {
1709 			PR_MEM("%s: release of %s requires copy/rename;"
1710 			    " selected target board %s\n",
1711 			    f,
1712 			    mp->sbm_cm.sbdev_path,
1713 			    mp->sbm_peer->sbm_cm.sbdev_path);
1714 		} else {
1715 			PR_MEM("%s: copy/rename not required to release %s\n",
1716 			    f, mp->sbm_cm.sbdev_path);
1717 		}
1718 
1719 		ASSERT(mp->sbm_flags & DR_MFLAG_RELOWNER);
1720 		ASSERT(mp->sbm_flags & DR_MFLAG_RESERVED);
1721 #endif
1722 	}
1723 
1724 	return (err_flag ? -1 : 0);
1725 }
1726 
1727 void
1728 dr_release_mem_done(dr_common_unit_t *cp)
1729 {
1730 	dr_mem_unit_t	*s_mp = (dr_mem_unit_t *)cp;
1731 	dr_mem_unit_t *t_mp, *mp;
1732 	int		rv;
1733 	static fn_t	f = "dr_release_mem_done";
1734 
1735 	/*
1736 	 * This unit will be flagged with DR_MFLAG_SOURCE, if it
1737 	 * has a target unit.
1738 	 */
1739 	if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
1740 		t_mp = s_mp->sbm_peer;
1741 		ASSERT(t_mp != NULL);
1742 		ASSERT(t_mp->sbm_peer == s_mp);
1743 		ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET);
1744 		ASSERT(t_mp->sbm_flags & DR_MFLAG_RESERVED);
1745 	} else {
1746 		/* this is no target unit */
1747 		t_mp = NULL;
1748 	}
1749 
1750 	/* free delete handle */
1751 	ASSERT(s_mp->sbm_flags & DR_MFLAG_RELOWNER);
1752 	ASSERT(s_mp->sbm_flags & DR_MFLAG_RESERVED);
1753 	rv = kphysm_del_release(s_mp->sbm_memhandle);
1754 	if (rv != KPHYSM_OK) {
1755 		/*
1756 		 * can do nothing but complain
1757 		 * and hope helpful for debug
1758 		 */
1759 		cmn_err(CE_WARN, "%s: unexpected kphysm_del_release"
1760 		    " return value %d", f, rv);
1761 	}
1762 	s_mp->sbm_flags &= ~DR_MFLAG_RELOWNER;
1763 
1764 	/*
1765 	 * If an error was encountered during release, clean up
1766 	 * the source (and target, if present) unit data.
1767 	 */
1768 /* XXX Can we know that sbdev_error was encountered during release? */
1769 	if (s_mp->sbm_cm.sbdev_error != NULL) {
1770 
1771 		if (t_mp != NULL) {
1772 			ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
1773 			t_mp->sbm_del_mlist = NULL;
1774 
1775 			if (t_mp->sbm_mlist != NULL) {
1776 				memlist_delete(t_mp->sbm_mlist);
1777 				t_mp->sbm_mlist = NULL;
1778 			}
1779 
1780 			t_mp->sbm_peer = NULL;
1781 			t_mp->sbm_flags = 0;
1782 			t_mp->sbm_cm.sbdev_busy = 0;
1783 		}
1784 
1785 		if (s_mp->sbm_del_mlist != s_mp->sbm_mlist)
1786 			memlist_delete(s_mp->sbm_del_mlist);
1787 		s_mp->sbm_del_mlist = NULL;
1788 
1789 		if (s_mp->sbm_mlist != NULL) {
1790 			memlist_delete(s_mp->sbm_mlist);
1791 			s_mp->sbm_mlist = NULL;
1792 		}
1793 
1794 		s_mp->sbm_peer = NULL;
1795 		s_mp->sbm_flags = 0;
1796 		s_mp->sbm_cm.sbdev_busy = 0;
1797 
1798 		/* bail out */
1799 		return;
1800 	}
1801 
1802 	DR_DEV_SET_RELEASED(&s_mp->sbm_cm);
1803 	dr_device_transition(&s_mp->sbm_cm, DR_STATE_RELEASE);
1804 
1805 	if (t_mp != NULL) {
1806 		/*
1807 		 * the kphysm delete operation that drained the source
1808 		 * board also drained this target board.  Since the source
1809 		 * board drain is now known to have succeeded, we know this
1810 		 * target board is drained too.
1811 		 *
1812 		 * because DR_DEV_SET_RELEASED and dr_device_transition
1813 		 * is done here, the dr_release_dev_done should not
1814 		 * fail.
1815 		 */
1816 		DR_DEV_SET_RELEASED(&t_mp->sbm_cm);
1817 		dr_device_transition(&t_mp->sbm_cm, DR_STATE_RELEASE);
1818 
1819 		/*
1820 		 * NOTE: do not transition target's board state,
1821 		 * even if the mem-unit was the last configure
1822 		 * unit of the board.  When copy/rename completes
1823 		 * this mem-unit will transitioned back to
1824 		 * the configured state.  In the meantime, the
1825 		 * board's must remain as is.
1826 		 */
1827 	}
1828 
1829 	/* if board(s) had deleted memory, verify it is gone */
1830 	rv = 0;
1831 	memlist_read_lock();
1832 	if (s_mp->sbm_del_mlist != NULL) {
1833 		mp = s_mp;
1834 		rv = memlist_intersect(phys_install, mp->sbm_del_mlist);
1835 	}
1836 	if (rv == 0 && t_mp && t_mp->sbm_del_mlist != NULL) {
1837 		mp = t_mp;
1838 		rv = memlist_intersect(phys_install, mp->sbm_del_mlist);
1839 	}
1840 	memlist_read_unlock();
1841 	if (rv) {
1842 		cmn_err(CE_WARN, "%s: %smem-unit (%d.%d): "
1843 		    "deleted memory still found in phys_install",
1844 		    f,
1845 		    (mp == t_mp ? "target " : ""),
1846 		    mp->sbm_cm.sbdev_bp->b_num,
1847 		    mp->sbm_cm.sbdev_unum);
1848 
1849 		DR_DEV_INTERNAL_ERROR(&s_mp->sbm_cm);
1850 		return;
1851 	}
1852 
1853 	s_mp->sbm_flags |= DR_MFLAG_RELDONE;
1854 	if (t_mp != NULL)
1855 		t_mp->sbm_flags |= DR_MFLAG_RELDONE;
1856 
1857 	/* this should not fail */
1858 	if (dr_release_dev_done(&s_mp->sbm_cm) != 0) {
1859 		/* catch this in debug kernels */
1860 		ASSERT(0);
1861 		return;
1862 	}
1863 
1864 	PR_MEM("%s: marking %s release DONE\n",
1865 	    f, s_mp->sbm_cm.sbdev_path);
1866 
1867 	s_mp->sbm_cm.sbdev_ostate = SBD_STAT_UNCONFIGURED;
1868 
1869 	if (t_mp != NULL) {
1870 		/* should not fail */
1871 		rv = dr_release_dev_done(&t_mp->sbm_cm);
1872 		if (rv != 0) {
1873 			/* catch this in debug kernels */
1874 			ASSERT(0);
1875 			return;
1876 		}
1877 
1878 		PR_MEM("%s: marking %s release DONE\n",
1879 		    f, t_mp->sbm_cm.sbdev_path);
1880 
1881 		t_mp->sbm_cm.sbdev_ostate = SBD_STAT_UNCONFIGURED;
1882 	}
1883 }
1884 
1885 /*ARGSUSED*/
1886 int
1887 dr_disconnect_mem(dr_mem_unit_t *mp)
1888 {
1889 	static fn_t	f = "dr_disconnect_mem";
1890 	update_membounds_t umb;
1891 
1892 #ifdef DEBUG
1893 	int state = mp->sbm_cm.sbdev_state;
1894 	ASSERT(state == DR_STATE_CONNECTED ||
1895 	    state == DR_STATE_UNCONFIGURED);
1896 #endif
1897 
1898 	PR_MEM("%s...\n", f);
1899 
1900 	if (mp->sbm_del_mlist && mp->sbm_del_mlist != mp->sbm_mlist)
1901 		memlist_delete(mp->sbm_del_mlist);
1902 	mp->sbm_del_mlist = NULL;
1903 
1904 	if (mp->sbm_mlist) {
1905 		memlist_delete(mp->sbm_mlist);
1906 		mp->sbm_mlist = NULL;
1907 	}
1908 
1909 	/*
1910 	 * Remove memory from lgroup
1911 	 * For now, only board info is required.
1912 	 */
1913 	umb.u_board = mp->sbm_cm.sbdev_bp->b_num;
1914 	umb.u_base = (uint64_t)-1;
1915 	umb.u_len = (uint64_t)-1;
1916 
1917 	lgrp_plat_config(LGRP_CONFIG_MEM_DEL, (uintptr_t)&umb);
1918 
1919 	return (0);
1920 }
1921 
1922 int
1923 dr_cancel_mem(dr_mem_unit_t *s_mp)
1924 {
1925 	dr_mem_unit_t	*t_mp;
1926 	dr_state_t	state;
1927 	static fn_t	f = "dr_cancel_mem";
1928 
1929 	state = s_mp->sbm_cm.sbdev_state;
1930 
1931 	if (s_mp->sbm_flags & DR_MFLAG_TARGET) {
1932 		/* must cancel source board, not target board */
1933 		/* TODO: set error */
1934 		return (-1);
1935 	} else if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
1936 		t_mp = s_mp->sbm_peer;
1937 		ASSERT(t_mp != NULL);
1938 		ASSERT(t_mp->sbm_peer == s_mp);
1939 
1940 		/* must always match the source board's state */
1941 		/* TODO: is this assertion correct? */
1942 		ASSERT(t_mp->sbm_cm.sbdev_state == state);
1943 	} else {
1944 		/* this is no target unit */
1945 		t_mp = NULL;
1946 	}
1947 
1948 	switch (state) {
1949 	case DR_STATE_UNREFERENCED:	/* state set by dr_release_dev_done */
1950 		ASSERT((s_mp->sbm_flags & DR_MFLAG_RELOWNER) == 0);
1951 
1952 		if (t_mp != NULL && t_mp->sbm_del_mlist != NULL) {
1953 			PR_MEM("%s: undoing target %s memory delete\n",
1954 			    f, t_mp->sbm_cm.sbdev_path);
1955 			dr_add_memory_spans(t_mp, t_mp->sbm_del_mlist);
1956 
1957 			DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm);
1958 		}
1959 
1960 		if (s_mp->sbm_del_mlist != NULL) {
1961 			PR_MEM("%s: undoing %s memory delete\n",
1962 			    f, s_mp->sbm_cm.sbdev_path);
1963 
1964 			dr_add_memory_spans(s_mp, s_mp->sbm_del_mlist);
1965 		}
1966 
1967 		/*FALLTHROUGH*/
1968 
1969 /* TODO: should no longer be possible to see the release state here */
1970 	case DR_STATE_RELEASE:	/* state set by dr_release_mem_done */
1971 
1972 		ASSERT((s_mp->sbm_flags & DR_MFLAG_RELOWNER) == 0);
1973 
1974 		if (t_mp != NULL) {
1975 			ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
1976 			t_mp->sbm_del_mlist = NULL;
1977 
1978 			if (t_mp->sbm_mlist != NULL) {
1979 				memlist_delete(t_mp->sbm_mlist);
1980 				t_mp->sbm_mlist = NULL;
1981 			}
1982 
1983 			t_mp->sbm_peer = NULL;
1984 			t_mp->sbm_flags = 0;
1985 			t_mp->sbm_cm.sbdev_busy = 0;
1986 			dr_init_mem_unit_data(t_mp);
1987 
1988 			DR_DEV_CLR_RELEASED(&t_mp->sbm_cm);
1989 
1990 			dr_device_transition(
1991 			    &t_mp->sbm_cm, DR_STATE_CONFIGURED);
1992 		}
1993 
1994 		if (s_mp->sbm_del_mlist != s_mp->sbm_mlist)
1995 			memlist_delete(s_mp->sbm_del_mlist);
1996 		s_mp->sbm_del_mlist = NULL;
1997 
1998 		if (s_mp->sbm_mlist != NULL) {
1999 			memlist_delete(s_mp->sbm_mlist);
2000 			s_mp->sbm_mlist = NULL;
2001 		}
2002 
2003 		s_mp->sbm_peer = NULL;
2004 		s_mp->sbm_flags = 0;
2005 		s_mp->sbm_cm.sbdev_busy = 0;
2006 		dr_init_mem_unit_data(s_mp);
2007 
2008 		return (0);
2009 
2010 	default:
2011 		PR_MEM("%s: WARNING unexpected state (%d) for %s\n",
2012 		    f, (int)state, s_mp->sbm_cm.sbdev_path);
2013 
2014 		return (-1);
2015 	}
2016 	/*NOTREACHED*/
2017 }
2018 
2019 void
2020 dr_init_mem_unit(dr_mem_unit_t *mp)
2021 {
2022 	dr_state_t	new_state;
2023 
2024 
2025 	if (DR_DEV_IS_ATTACHED(&mp->sbm_cm)) {
2026 		new_state = DR_STATE_CONFIGURED;
2027 		mp->sbm_cm.sbdev_cond = SBD_COND_OK;
2028 	} else if (DR_DEV_IS_PRESENT(&mp->sbm_cm)) {
2029 		new_state = DR_STATE_CONNECTED;
2030 		mp->sbm_cm.sbdev_cond = SBD_COND_OK;
2031 	} else if (mp->sbm_cm.sbdev_id != (drmachid_t)0) {
2032 		new_state = DR_STATE_OCCUPIED;
2033 	} else {
2034 		new_state = DR_STATE_EMPTY;
2035 	}
2036 
2037 	if (DR_DEV_IS_PRESENT(&mp->sbm_cm))
2038 		dr_init_mem_unit_data(mp);
2039 
2040 	/* delay transition until fully initialized */
2041 	dr_device_transition(&mp->sbm_cm, new_state);
2042 }
2043 
2044 static void
2045 dr_init_mem_unit_data(dr_mem_unit_t *mp)
2046 {
2047 	drmachid_t	id = mp->sbm_cm.sbdev_id;
2048 	drmach_mem_info_t	minfo;
2049 	sbd_error_t	*err;
2050 	static fn_t	f = "dr_init_mem_unit_data";
2051 	update_membounds_t umb;
2052 
2053 	PR_MEM("%s...\n", f);
2054 
2055 	/* a little sanity checking */
2056 	ASSERT(mp->sbm_peer == NULL);
2057 	ASSERT(mp->sbm_flags == 0);
2058 
2059 	if (err = drmach_mem_get_info(id, &minfo)) {
2060 		DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
2061 		return;
2062 	}
2063 	mp->sbm_basepfn = _b64top(minfo.mi_basepa);
2064 	mp->sbm_npages = _b64top(minfo.mi_size);
2065 	mp->sbm_alignment_mask = _b64top(minfo.mi_alignment_mask);
2066 	mp->sbm_slice_size = minfo.mi_slice_size;
2067 
2068 	/*
2069 	 * Add memory to lgroup
2070 	 */
2071 	umb.u_board = mp->sbm_cm.sbdev_bp->b_num;
2072 	umb.u_base = (uint64_t)mp->sbm_basepfn << MMU_PAGESHIFT;
2073 	umb.u_len = (uint64_t)mp->sbm_npages << MMU_PAGESHIFT;
2074 
2075 	lgrp_plat_config(LGRP_CONFIG_MEM_ADD, (uintptr_t)&umb);
2076 
2077 	PR_MEM("%s: %s (basepfn = 0x%lx, npgs = %ld)\n",
2078 	    f, mp->sbm_cm.sbdev_path, mp->sbm_basepfn, mp->sbm_npages);
2079 }
2080 
2081 static int
2082 dr_reserve_mem_spans(memhandle_t *mhp, struct memlist *ml)
2083 {
2084 	int		err;
2085 	pfn_t		base;
2086 	pgcnt_t		npgs;
2087 	struct memlist	*mc;
2088 	static fn_t	f = "dr_reserve_mem_spans";
2089 
2090 	PR_MEM("%s...\n", f);
2091 
2092 	/*
2093 	 * Walk the supplied memlist scheduling each span for removal
2094 	 * with kphysm_del_span.  It is possible that a span may intersect
2095 	 * an area occupied by the cage.
2096 	 */
2097 	for (mc = ml; mc != NULL; mc = mc->next) {
2098 		base = _b64top(mc->address);
2099 		npgs = _b64top(mc->size);
2100 
2101 		err = kphysm_del_span(*mhp, base, npgs);
2102 		if (err != KPHYSM_OK) {
2103 			cmn_err(CE_WARN, "%s memory reserve failed."
2104 			    " unexpected kphysm_del_span return value %d;"
2105 			    " basepfn=0x%lx npages=%ld",
2106 			    f, err, base, npgs);
2107 
2108 			return (-1);
2109 		}
2110 	}
2111 
2112 	return (0);
2113 }
2114 
2115 #define	DR_SMT_NPREF_SETS	6
2116 #define	DR_SMT_NUNITS_PER_SET	MAX_BOARDS * MAX_MEM_UNITS_PER_BOARD
2117 
2118 /* debug counters */
2119 int dr_smt_realigned;
2120 int dr_smt_preference[DR_SMT_NPREF_SETS];
2121 
2122 #ifdef DEBUG
2123 uint_t dr_ignore_board; /* if bit[bnum-1] set, board won't be candidate */
2124 #endif
2125 
2126 /*
2127  * Find and reserve a copy/rename target board suitable for the
2128  * given source board.
2129  * All boards in the system are examined and categorized in relation to
2130  * their memory size versus the source board's memory size.  Order of
2131  * preference is:
2132  *	1st copy all source, source/target same size
2133  *	2nd copy all source, larger target
2134  * 	3rd copy nonrelocatable source span
2135  */
2136 static int
2137 dr_select_mem_target(dr_handle_t *hp,
2138 	dr_mem_unit_t *s_mp, struct memlist *s_ml)
2139 {
2140 	dr_target_pref_t preference; /* lower value is higher preference */
2141 	int		idx;
2142 	dr_mem_unit_t	**sets;
2143 
2144 	int		t_bd;
2145 	int		t_unit;
2146 	int		rv;
2147 	dr_board_t	*s_bp, *t_bp;
2148 	dr_mem_unit_t	*t_mp, *c_mp;
2149 	struct memlist	*d_ml, *t_ml, *ml, *b_ml, *x_ml = NULL;
2150 	memquery_t	s_mq = {0};
2151 	static fn_t	f = "dr_select_mem_target";
2152 
2153 	PR_MEM("%s...\n", f);
2154 
2155 	ASSERT(s_ml != NULL);
2156 
2157 	sets = GETSTRUCT(dr_mem_unit_t *, DR_SMT_NUNITS_PER_SET *
2158 	    DR_SMT_NPREF_SETS);
2159 
2160 	s_bp = hp->h_bd;
2161 	/* calculate the offset into the slice of the last source board pfn */
2162 	ASSERT(s_mp->sbm_npages != 0);
2163 
2164 	/*
2165 	 * Find non-relocatable span on source board.
2166 	 */
2167 	rv = kphysm_del_span_query(s_mp->sbm_basepfn, s_mp->sbm_npages, &s_mq);
2168 	if (rv != KPHYSM_OK) {
2169 		PR_MEM("%s: %s: unexpected kphysm_del_span_query"
2170 		    " return value %d; basepfn 0x%lx, npages %ld\n",
2171 		    f, s_mp->sbm_cm.sbdev_path, rv, s_mp->sbm_basepfn,
2172 		    s_mp->sbm_npages);
2173 		return (-1);
2174 	}
2175 
2176 	ASSERT(s_mq.phys_pages != 0);
2177 	ASSERT(s_mq.nonrelocatable != 0);
2178 
2179 	PR_MEM("%s: %s: nonrelocatable span (0x%lx..0x%lx)\n", f,
2180 	    s_mp->sbm_cm.sbdev_path, s_mq.first_nonrelocatable,
2181 	    s_mq.last_nonrelocatable);
2182 
2183 	/* break down s_ml if it contains dynamic segments */
2184 	b_ml = memlist_dup(s_ml);
2185 
2186 	for (ml = s_mp->sbm_dyn_segs; ml; ml = ml->next) {
2187 		b_ml = memlist_del_span(b_ml, ml->address, ml->size);
2188 		b_ml = memlist_cat_span(b_ml, ml->address, ml->size);
2189 	}
2190 
2191 
2192 	/*
2193 	 * Make one pass through all memory units on all boards
2194 	 * and categorize them with respect to the source board.
2195 	 */
2196 	for (t_bd = 0; t_bd < MAX_BOARDS; t_bd++) {
2197 		/*
2198 		 * The board structs are a contiguous array
2199 		 * so we take advantage of that to find the
2200 		 * correct board struct pointer for a given
2201 		 * board number.
2202 		 */
2203 		t_bp = dr_lookup_board(t_bd);
2204 
2205 		/* source board can not be its own target */
2206 		if (s_bp->b_num == t_bp->b_num)
2207 			continue;
2208 
2209 		for (t_unit = 0; t_unit < MAX_MEM_UNITS_PER_BOARD; t_unit++) {
2210 
2211 			t_mp = dr_get_mem_unit(t_bp, t_unit);
2212 
2213 			/* this memory node must be attached */
2214 			if (!DR_DEV_IS_ATTACHED(&t_mp->sbm_cm))
2215 				continue;
2216 
2217 			/* source unit can not be its own target */
2218 			if (s_mp == t_mp) {
2219 				/* catch this is debug kernels */
2220 				ASSERT(0);
2221 				continue;
2222 			}
2223 
2224 			/*
2225 			 * this memory node must not already be reserved
2226 			 * by some other memory delete operation.
2227 			 */
2228 			if (t_mp->sbm_flags & DR_MFLAG_RESERVED)
2229 				continue;
2230 
2231 			/* get target board memlist */
2232 			t_ml = dr_get_memlist(t_mp);
2233 			if (t_ml == NULL) {
2234 				cmn_err(CE_WARN, "%s: no memlist for"
2235 				    " mem-unit %d, board %d", f,
2236 				    t_mp->sbm_cm.sbdev_bp->b_num,
2237 				    t_mp->sbm_cm.sbdev_unum);
2238 				continue;
2239 			}
2240 
2241 			preference = dr_get_target_preference(hp, t_mp, s_mp,
2242 			    t_ml, s_ml, b_ml);
2243 
2244 			memlist_delete(t_ml);
2245 
2246 			if (preference == DR_TP_INVALID)
2247 				continue;
2248 
2249 			dr_smt_preference[preference]++;
2250 
2251 			/* calculate index to start of preference set */
2252 			idx  = DR_SMT_NUNITS_PER_SET * preference;
2253 			/* calculate offset to respective element */
2254 			idx += t_bd * MAX_MEM_UNITS_PER_BOARD + t_unit;
2255 
2256 			ASSERT(idx < DR_SMT_NUNITS_PER_SET * DR_SMT_NPREF_SETS);
2257 			sets[idx] = t_mp;
2258 		}
2259 	}
2260 
2261 	if (b_ml != NULL)
2262 		memlist_delete(b_ml);
2263 
2264 	/*
2265 	 * NOTE: this would be a good place to sort each candidate
2266 	 * set in to some desired order, e.g. memory size in ascending
2267 	 * order.  Without an additional sorting step here, the order
2268 	 * within a set is ascending board number order.
2269 	 */
2270 
2271 	c_mp = NULL;
2272 	x_ml = NULL;
2273 	t_ml = NULL;
2274 	for (idx = 0; idx < DR_SMT_NUNITS_PER_SET * DR_SMT_NPREF_SETS; idx++) {
2275 		memquery_t mq;
2276 
2277 		preference = (dr_target_pref_t)(idx / DR_SMT_NUNITS_PER_SET);
2278 
2279 		ASSERT(preference != DR_TP_INVALID);
2280 
2281 		/* cleanup t_ml after previous pass */
2282 		if (t_ml != NULL) {
2283 			memlist_delete(t_ml);
2284 			t_ml = NULL;
2285 		}
2286 
2287 		/* get candidate target board mem unit */
2288 		t_mp = sets[idx];
2289 		if (t_mp == NULL)
2290 			continue;
2291 
2292 		/* get target board memlist */
2293 		t_ml = dr_get_memlist(t_mp);
2294 		if (t_ml == NULL) {
2295 			cmn_err(CE_WARN, "%s: no memlist for"
2296 			    " mem-unit %d, board %d",
2297 			    f,
2298 			    t_mp->sbm_cm.sbdev_bp->b_num,
2299 			    t_mp->sbm_cm.sbdev_unum);
2300 
2301 			continue;
2302 		}
2303 
2304 		PR_MEM("%s: checking for no-reloc in %s, "
2305 		    " basepfn=0x%lx, npages=%ld\n",
2306 		    f,
2307 		    t_mp->sbm_cm.sbdev_path,
2308 		    t_mp->sbm_basepfn,
2309 		    t_mp->sbm_npages);
2310 
2311 		rv = dr_del_mlist_query(t_ml, &mq);
2312 		if (rv != KPHYSM_OK) {
2313 			PR_MEM("%s: kphysm_del_span_query:"
2314 			    " unexpected return value %d\n", f, rv);
2315 
2316 			continue;
2317 		}
2318 
2319 		if (mq.nonrelocatable != 0) {
2320 			PR_MEM("%s: candidate %s has"
2321 			    " nonrelocatable span [0x%lx..0x%lx]\n",
2322 			    f,
2323 			    t_mp->sbm_cm.sbdev_path,
2324 			    mq.first_nonrelocatable,
2325 			    mq.last_nonrelocatable);
2326 
2327 			continue;
2328 		}
2329 
2330 #ifdef DEBUG
2331 		/*
2332 		 * This is a debug tool for excluding certain boards
2333 		 * from being selected as a target board candidate.
2334 		 * dr_ignore_board is only tested by this driver.
2335 		 * It must be set with adb, obp, /etc/system or your
2336 		 * favorite debugger.
2337 		 */
2338 		if (dr_ignore_board &
2339 		    (1 << (t_mp->sbm_cm.sbdev_bp->b_num - 1))) {
2340 			PR_MEM("%s: dr_ignore_board flag set,"
2341 			    " ignoring %s as candidate\n",
2342 			    f, t_mp->sbm_cm.sbdev_path);
2343 			continue;
2344 		}
2345 #endif
2346 
2347 		/*
2348 		 * Reserve excess source board memory, if any.
2349 		 *
2350 		 * Only the nonrelocatable source span will be copied
2351 		 * so schedule the rest of the source mem to be deleted.
2352 		 */
2353 		switch (preference) {
2354 		case DR_TP_NONRELOC:
2355 			/*
2356 			 * Get source copy memlist and use it to construct
2357 			 * delete memlist.
2358 			 */
2359 			d_ml = memlist_dup(s_ml);
2360 			x_ml = dr_get_copy_mlist(s_ml, t_ml, s_mp, t_mp);
2361 
2362 			/* XXX */
2363 			ASSERT(d_ml != NULL);
2364 			ASSERT(x_ml != NULL);
2365 
2366 			for (ml = x_ml; ml != NULL; ml = ml->next) {
2367 				d_ml = memlist_del_span(d_ml, ml->address,
2368 				    ml->size);
2369 			}
2370 
2371 			PR_MEM("%s: %s: reserving src brd memlist:\n", f,
2372 			    s_mp->sbm_cm.sbdev_path);
2373 			PR_MEMLIST_DUMP(d_ml);
2374 
2375 			/* reserve excess spans */
2376 			if (dr_reserve_mem_spans(&s_mp->sbm_memhandle,
2377 			    d_ml) != 0) {
2378 				/* likely more non-reloc pages appeared */
2379 				/* TODO: restart from top? */
2380 				continue;
2381 			}
2382 			break;
2383 		default:
2384 			d_ml = NULL;
2385 			break;
2386 		}
2387 
2388 		s_mp->sbm_flags |= DR_MFLAG_RESERVED;
2389 
2390 		/*
2391 		 * reserve all memory on target board.
2392 		 * NOTE: source board's memhandle is used.
2393 		 *
2394 		 * If this succeeds (eq 0), then target selection is
2395 		 * complete and all unwanted memory spans, both source and
2396 		 * target, have been reserved.  Loop is terminated.
2397 		 */
2398 		if (dr_reserve_mem_spans(&s_mp->sbm_memhandle, t_ml) == 0) {
2399 			PR_MEM("%s: %s: target board memory reserved\n",
2400 			    f, t_mp->sbm_cm.sbdev_path);
2401 
2402 			/* a candidate target board is now reserved */
2403 			t_mp->sbm_flags |= DR_MFLAG_RESERVED;
2404 			c_mp = t_mp;
2405 
2406 			/* *** EXITING LOOP *** */
2407 			break;
2408 		}
2409 
2410 		/* did not successfully reserve the target board. */
2411 		PR_MEM("%s: could not reserve target %s\n",
2412 		    f, t_mp->sbm_cm.sbdev_path);
2413 
2414 		/*
2415 		 * NOTE: an undo of the dr_reserve_mem_span work
2416 		 * will happen automatically when the memhandle
2417 		 * (s_mp->sbm_memhandle) is kphysm_del_release'd.
2418 		 */
2419 
2420 		s_mp->sbm_flags &= ~DR_MFLAG_RESERVED;
2421 	}
2422 
2423 	/* clean up after memlist editing logic */
2424 	if (x_ml != NULL)
2425 		memlist_delete(x_ml);
2426 
2427 	FREESTRUCT(sets, dr_mem_unit_t *, DR_SMT_NUNITS_PER_SET *
2428 	    DR_SMT_NPREF_SETS);
2429 
2430 	/*
2431 	 * c_mp will be NULL when the entire sets[] array
2432 	 * has been searched without reserving a target board.
2433 	 */
2434 	if (c_mp == NULL) {
2435 		PR_MEM("%s: %s: target selection failed.\n",
2436 		    f, s_mp->sbm_cm.sbdev_path);
2437 
2438 		if (t_ml != NULL)
2439 			memlist_delete(t_ml);
2440 
2441 		return (-1);
2442 	}
2443 
2444 	PR_MEM("%s: found target %s for source %s\n",
2445 	    f,
2446 	    c_mp->sbm_cm.sbdev_path,
2447 	    s_mp->sbm_cm.sbdev_path);
2448 
2449 	s_mp->sbm_peer = c_mp;
2450 	s_mp->sbm_flags |= DR_MFLAG_SOURCE;
2451 	s_mp->sbm_del_mlist = d_ml;	/* spans to be deleted, if any */
2452 	s_mp->sbm_mlist = s_ml;
2453 	s_mp->sbm_cm.sbdev_busy = 1;
2454 
2455 	c_mp->sbm_peer = s_mp;
2456 	c_mp->sbm_flags |= DR_MFLAG_TARGET;
2457 	c_mp->sbm_del_mlist = t_ml;	/* spans to be deleted */
2458 	c_mp->sbm_mlist = t_ml;
2459 	c_mp->sbm_cm.sbdev_busy = 1;
2460 
2461 	return (0);
2462 }
2463 
2464 /*
2465  * Returns target preference rank:
2466  *     -1 not a valid copy-rename target board
2467  *	0 copy all source, source/target same size
2468  *	1 copy all source, larger target
2469  * 	2 copy nonrelocatable source span
2470  */
2471 static dr_target_pref_t
2472 dr_get_target_preference(dr_handle_t *hp,
2473     dr_mem_unit_t *t_mp, dr_mem_unit_t *s_mp,
2474     struct memlist *t_ml, struct memlist *s_ml,
2475     struct memlist *b_ml)
2476 {
2477 	dr_target_pref_t preference;
2478 	struct memlist *s_nonreloc_ml = NULL;
2479 	drmachid_t t_id;
2480 	static fn_t	f = "dr_get_target_preference";
2481 
2482 	t_id = t_mp->sbm_cm.sbdev_bp->b_id;
2483 
2484 	/*
2485 	 * Can the entire source board be copied?
2486 	 */
2487 	if (dr_memlist_canfit(s_ml, t_ml, s_mp, t_mp)) {
2488 		if (s_mp->sbm_npages == t_mp->sbm_npages)
2489 			preference = DR_TP_SAME;	/* same size */
2490 		else
2491 			preference = DR_TP_LARGE;	/* larger target */
2492 	} else {
2493 		/*
2494 		 * Entire source won't fit so try non-relocatable memory only
2495 		 * (target aligned).
2496 		 */
2497 		s_nonreloc_ml = dr_get_nonreloc_mlist(b_ml, s_mp);
2498 		if (s_nonreloc_ml == NULL) {
2499 			PR_MEM("%s: dr_get_nonreloc_mlist failed\n", f);
2500 			preference = DR_TP_INVALID;
2501 		}
2502 		if (dr_memlist_canfit(s_nonreloc_ml, t_ml, s_mp, t_mp))
2503 			preference = DR_TP_NONRELOC;
2504 		else
2505 			preference = DR_TP_INVALID;
2506 	}
2507 
2508 	if (s_nonreloc_ml != NULL)
2509 		memlist_delete(s_nonreloc_ml);
2510 
2511 	/*
2512 	 * Force floating board preference lower than all other boards
2513 	 * if the force flag is present; otherwise disallow the board.
2514 	 */
2515 	if ((preference != DR_TP_INVALID) && drmach_board_is_floating(t_id)) {
2516 		if (dr_cmd_flags(hp) & SBD_FLAG_FORCE)
2517 			preference += DR_TP_FLOATING;
2518 		else
2519 			preference = DR_TP_INVALID;
2520 	}
2521 
2522 	PR_MEM("%s: %s preference=%d\n", f, t_mp->sbm_cm.sbdev_path,
2523 	    preference);
2524 
2525 	return (preference);
2526 }
2527 
2528 /*
2529  * Create a memlist representing the source memory that will be copied to
2530  * the target board.  The memory to be copied is the maximum amount that
2531  * will fit on the target board.
2532  */
2533 static struct memlist *
2534 dr_get_copy_mlist(struct memlist *s_mlist, struct memlist *t_mlist,
2535     dr_mem_unit_t *s_mp, dr_mem_unit_t *t_mp)
2536 {
2537 	struct memlist	*t_ml, *s_copy_ml, *s_del_ml, *ml, *x_ml;
2538 	uint64_t	s_slice_mask, s_slice_base;
2539 	uint64_t	t_slice_mask, t_slice_base;
2540 	static fn_t	f = "dr_get_copy_mlist";
2541 
2542 	ASSERT(s_mlist != NULL);
2543 	ASSERT(t_mlist != NULL);
2544 	ASSERT(t_mp->sbm_slice_size == s_mp->sbm_slice_size);
2545 
2546 	s_slice_mask = s_mp->sbm_slice_size - 1;
2547 	s_slice_base = s_mlist->address & ~s_slice_mask;
2548 
2549 	t_slice_mask = t_mp->sbm_slice_size - 1;
2550 	t_slice_base = t_mlist->address & ~t_slice_mask;
2551 
2552 	t_ml = memlist_dup(t_mlist);
2553 	s_del_ml = memlist_dup(s_mlist);
2554 	s_copy_ml = memlist_dup(s_mlist);
2555 
2556 	/* XXX */
2557 	ASSERT(t_ml != NULL);
2558 	ASSERT(s_del_ml != NULL);
2559 	ASSERT(s_copy_ml != NULL);
2560 
2561 	/*
2562 	 * To construct the source copy memlist:
2563 	 *
2564 	 * The target memlist is converted to the post-rename
2565 	 * source addresses.  This is the physical address range
2566 	 * the target will have after the copy-rename.  Overlaying
2567 	 * and deleting this from the current source memlist will
2568 	 * give the source delete memlist.  The copy memlist is
2569 	 * the reciprocal of the source delete memlist.
2570 	 */
2571 	for (ml = t_ml; ml != NULL; ml = ml->next) {
2572 		/*
2573 		 * Normalize relative to target slice base PA
2574 		 * in order to preseve slice offsets.
2575 		 */
2576 		ml->address -= t_slice_base;
2577 		/*
2578 		 * Convert to source slice PA address.
2579 		 */
2580 		ml->address += s_slice_base;
2581 	}
2582 
2583 	for (ml = t_ml; ml != NULL; ml = ml->next) {
2584 		s_del_ml = memlist_del_span(s_del_ml, ml->address, ml->size);
2585 	}
2586 
2587 	/*
2588 	 * Expand the delete mlist to fully include any dynamic segments
2589 	 * it intersects with.
2590 	 */
2591 	for (x_ml = NULL, ml = s_del_ml; ml != NULL; ml = ml->next) {
2592 		uint64_t del_base = ml->address;
2593 		uint64_t del_end = ml->address + ml->size;
2594 		struct memlist *dyn;
2595 
2596 		for (dyn = s_mp->sbm_dyn_segs; dyn != NULL; dyn = dyn->next) {
2597 			uint64_t dyn_base = dyn->address;
2598 			uint64_t dyn_end = dyn->address + dyn->size;
2599 
2600 			if (del_base > dyn_base && del_base < dyn_end)
2601 				del_base = dyn_base;
2602 
2603 			if (del_end > dyn_base && del_end < dyn_end)
2604 				del_end = dyn_end;
2605 		}
2606 
2607 		x_ml = memlist_cat_span(x_ml, del_base, del_end - del_base);
2608 	}
2609 
2610 	memlist_delete(s_del_ml);
2611 	s_del_ml = x_ml;
2612 
2613 	for (ml = s_del_ml; ml != NULL; ml = ml->next) {
2614 		s_copy_ml = memlist_del_span(s_copy_ml, ml->address, ml->size);
2615 	}
2616 
2617 	PR_MEM("%s: source delete mlist\n", f);
2618 	PR_MEMLIST_DUMP(s_del_ml);
2619 
2620 	PR_MEM("%s: source copy mlist\n", f);
2621 	PR_MEMLIST_DUMP(s_copy_ml);
2622 
2623 	memlist_delete(t_ml);
2624 	memlist_delete(s_del_ml);
2625 
2626 	return (s_copy_ml);
2627 }
2628 
2629 /*
2630  * Scan the non-relocatable spans on the source memory
2631  * and construct a minimum mlist that includes all non-reloc
2632  * memory subject to target alignment, and dynamic segment
2633  * constraints where only whole dynamic segments may be deleted.
2634  */
2635 static struct memlist *
2636 dr_get_nonreloc_mlist(struct memlist *s_ml, dr_mem_unit_t *s_mp)
2637 {
2638 	struct memlist	*x_ml = NULL;
2639 	struct memlist	*ml;
2640 	static fn_t	f = "dr_get_nonreloc_mlist";
2641 
2642 	PR_MEM("%s: checking for split of dyn seg list:\n", f);
2643 	PR_MEMLIST_DUMP(s_mp->sbm_dyn_segs);
2644 
2645 	for (ml = s_ml; ml; ml = ml->next) {
2646 		int rv;
2647 		uint64_t nr_base, nr_end;
2648 		memquery_t mq;
2649 		struct memlist *dyn;
2650 
2651 		rv = kphysm_del_span_query(
2652 		    _b64top(ml->address), _b64top(ml->size), &mq);
2653 		if (rv) {
2654 			memlist_delete(x_ml);
2655 			return (NULL);
2656 		}
2657 
2658 		if (mq.nonrelocatable == 0)
2659 			continue;
2660 
2661 		PR_MEM("%s: non-reloc span: 0x%lx, 0x%lx (%lx, %lx)\n", f,
2662 		    _ptob64(mq.first_nonrelocatable),
2663 		    _ptob64(mq.last_nonrelocatable),
2664 		    mq.first_nonrelocatable,
2665 		    mq.last_nonrelocatable);
2666 
2667 		/*
2668 		 * Align the span at both ends to allow for possible
2669 		 * cage expansion.
2670 		 */
2671 		nr_base = _ptob64(mq.first_nonrelocatable);
2672 		nr_end = _ptob64(mq.last_nonrelocatable + 1);
2673 
2674 		PR_MEM("%s: adjusted non-reloc span: 0x%lx, 0x%lx\n",
2675 		    f, nr_base, nr_end);
2676 
2677 		/*
2678 		 * Expand the non-reloc span to fully include any
2679 		 * dynamic segments it intersects with.
2680 		 */
2681 		for (dyn = s_mp->sbm_dyn_segs; dyn != NULL; dyn = dyn->next) {
2682 			uint64_t dyn_base = dyn->address;
2683 			uint64_t dyn_end = dyn->address + dyn->size;
2684 
2685 			if (nr_base > dyn_base && nr_base < dyn_end)
2686 				nr_base = dyn_base;
2687 
2688 			if (nr_end > dyn_base && nr_end < dyn_end)
2689 				nr_end = dyn_end;
2690 		}
2691 
2692 		x_ml = memlist_cat_span(x_ml, nr_base, nr_end - nr_base);
2693 	}
2694 
2695 	if (x_ml == NULL) {
2696 		PR_MEM("%s: source didn't have any non-reloc pages!\n", f);
2697 		return (NULL);
2698 	}
2699 
2700 	PR_MEM("%s: %s: edited source memlist:\n", f, s_mp->sbm_cm.sbdev_path);
2701 	PR_MEMLIST_DUMP(x_ml);
2702 
2703 	return (x_ml);
2704 }
2705 
2706 /*
2707  * Check if source memlist can fit in target memlist while maintaining
2708  * relative offsets within board.
2709  */
2710 static int
2711 dr_memlist_canfit(struct memlist *s_mlist, struct memlist *t_mlist,
2712     dr_mem_unit_t *s_mp, dr_mem_unit_t *t_mp)
2713 {
2714 	int		canfit = 0;
2715 	struct memlist	*s_ml, *t_ml, *ml;
2716 	uint64_t	s_slice_mask, t_slice_mask;
2717 	static fn_t	f = "dr_mlist_canfit";
2718 
2719 	s_ml = memlist_dup(s_mlist);
2720 	t_ml = memlist_dup(t_mlist);
2721 
2722 	if (s_ml == NULL || t_ml == NULL) {
2723 		cmn_err(CE_WARN, "%s: memlist_dup failed\n", f);
2724 		goto done;
2725 	}
2726 
2727 	s_slice_mask = s_mp->sbm_slice_size - 1;
2728 	t_slice_mask = t_mp->sbm_slice_size - 1;
2729 
2730 	/*
2731 	 * Normalize to slice relative offsets.
2732 	 */
2733 	for (ml = s_ml; ml; ml = ml->next)
2734 		ml->address &= s_slice_mask;
2735 
2736 	for (ml = t_ml; ml; ml = ml->next)
2737 		ml->address &= t_slice_mask;
2738 
2739 	canfit = memlist_canfit(s_ml, t_ml);
2740 done:
2741 	memlist_delete(s_ml);
2742 	memlist_delete(t_ml);
2743 
2744 	return (canfit);
2745 }
2746 
2747 /*
2748  * Memlist support.
2749  */
2750 
2751 /*
2752  * Determine whether the source memlist (s_mlist) will
2753  * fit into the target memlist (t_mlist) in terms of
2754  * size and holes.  Assumes the caller has normalized the
2755  * memlist physical addresses for comparison.
2756  */
2757 static int
2758 memlist_canfit(struct memlist *s_mlist, struct memlist *t_mlist)
2759 {
2760 	int		rv = 0;
2761 	struct memlist	*s_ml, *t_ml;
2762 
2763 	if ((s_mlist == NULL) || (t_mlist == NULL))
2764 		return (0);
2765 
2766 	s_ml = s_mlist;
2767 	for (t_ml = t_mlist; t_ml && s_ml; t_ml = t_ml->next) {
2768 		uint64_t	s_start, s_end;
2769 		uint64_t	t_start, t_end;
2770 
2771 		t_start = t_ml->address;
2772 		t_end = t_start + t_ml->size;
2773 
2774 		for (; s_ml; s_ml = s_ml->next) {
2775 			s_start = s_ml->address;
2776 			s_end = s_start + s_ml->size;
2777 
2778 			if ((s_start < t_start) || (s_end > t_end))
2779 				break;
2780 		}
2781 	}
2782 
2783 	/*
2784 	 * If we ran out of source memlist chunks that mean
2785 	 * we found a home for all of them.
2786 	 */
2787 	if (s_ml == NULL)
2788 		rv = 1;
2789 
2790 	return (rv);
2791 }
2792