xref: /titanic_41/usr/src/uts/sun4u/opl/io/dr_mem.c (revision 20ae46ebaff1237662e05edf9db61538aa85d448)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * DR memory support routines.
30  */
31 
32 #include <sys/note.h>
33 #include <sys/debug.h>
34 #include <sys/types.h>
35 #include <sys/errno.h>
36 #include <sys/param.h>
37 #include <sys/dditypes.h>
38 #include <sys/kmem.h>
39 #include <sys/conf.h>
40 #include <sys/ddi.h>
41 #include <sys/sunddi.h>
42 #include <sys/sunndi.h>
43 #include <sys/ddi_impldefs.h>
44 #include <sys/ndi_impldefs.h>
45 #include <sys/sysmacros.h>
46 #include <sys/machsystm.h>
47 #include <sys/spitregs.h>
48 #include <sys/cpuvar.h>
49 #include <sys/promif.h>
50 #include <vm/seg_kmem.h>
51 #include <sys/lgrp.h>
52 #include <sys/platform_module.h>
53 
54 #include <vm/page.h>
55 
56 #include <sys/dr.h>
57 #include <sys/dr_util.h>
58 #include <sys/drmach.h>
59 #include <sys/kobj.h>
60 
61 extern struct memlist	*phys_install;
62 extern vnode_t		*retired_pages;
63 
64 /* TODO: push this reference below drmach line */
65 extern int		kcage_on;
66 
67 /* for the DR*INTERNAL_ERROR macros.  see sys/dr.h. */
68 static char *dr_ie_fmt = "dr_mem.c %d";
69 
70 typedef enum {
71 	DR_TP_INVALID = -1,
72 	DR_TP_SAME,
73 	DR_TP_LARGE,
74 	DR_TP_NONRELOC,
75 	DR_TP_FLOATING
76 } dr_target_pref_t;
77 
78 static int		dr_post_detach_mem_unit(dr_mem_unit_t *mp);
79 static int		dr_reserve_mem_spans(memhandle_t *mhp,
80 				struct memlist *mlist);
81 static int		dr_select_mem_target(dr_handle_t *hp,
82 				dr_mem_unit_t *mp, struct memlist *ml);
83 static void		dr_init_mem_unit_data(dr_mem_unit_t *mp);
84 static struct memlist	*dr_memlist_del_retired_pages(struct memlist *ml);
85 static dr_target_pref_t	dr_get_target_preference(dr_handle_t *hp,
86 				dr_mem_unit_t *t_mp, dr_mem_unit_t *s_mp,
87 				struct memlist *s_ml, struct memlist *x_ml,
88 				struct memlist *b_ml);
89 
90 static int		memlist_canfit(struct memlist *s_mlist,
91 				struct memlist *t_mlist);
92 static int		dr_del_mlist_query(struct memlist *mlist,
93 				memquery_t *mp);
94 static struct memlist	*dr_get_copy_mlist(struct memlist *s_ml,
95 				struct memlist *t_ml, dr_mem_unit_t *s_mp,
96 				dr_mem_unit_t *t_mp);
97 static struct memlist	*dr_get_nonreloc_mlist(struct memlist *s_ml,
98 				dr_mem_unit_t *s_mp);
99 static int		dr_memlist_canfit(struct memlist *s_mlist,
100 				struct memlist *t_mlist, dr_mem_unit_t *s_mp,
101 				dr_mem_unit_t *t_mp);
102 
103 /*
104  * dr_mem_unit_t.sbm_flags
105  */
106 #define	DR_MFLAG_RESERVED	0x01	/* mem unit reserved for delete */
107 #define	DR_MFLAG_SOURCE		0x02	/* source brd of copy/rename op */
108 #define	DR_MFLAG_TARGET		0x04	/* target brd of copy/rename op */
109 #define	DR_MFLAG_RELOWNER	0x20	/* memory release (delete) owner */
110 #define	DR_MFLAG_RELDONE	0x40	/* memory release (delete) done */
111 
112 /* helper macros */
113 #define	_ptob64(p) ((uint64_t)(p) << PAGESHIFT)
114 #define	_b64top(b) ((pgcnt_t)((b) >> PAGESHIFT))
115 
116 static struct memlist *
117 dr_get_memlist(dr_mem_unit_t *mp)
118 {
119 	struct memlist	*mlist = NULL;
120 	sbd_error_t	*err;
121 	static fn_t	f = "dr_get_memlist";
122 
123 	PR_MEM("%s for %s...\n", f, mp->sbm_cm.sbdev_path);
124 
125 	/*
126 	 * Return cached memlist, if present.
127 	 * This memlist will be present following an
128 	 * unconfigure (a.k.a: detach) of this memunit.
129 	 * It should only be used in the case were a configure
130 	 * is bringing this memunit back in without going
131 	 * through the disconnect and connect states.
132 	 */
133 	if (mp->sbm_mlist) {
134 		PR_MEM("%s: found cached memlist\n", f);
135 
136 		mlist = memlist_dup(mp->sbm_mlist);
137 	} else {
138 		uint64_t basepa = _ptob64(mp->sbm_basepfn);
139 
140 		/* attempt to construct a memlist using phys_install */
141 
142 		/* round down to slice base address */
143 		basepa &= ~(mp->sbm_slice_size - 1);
144 
145 		/* get a copy of phys_install to edit */
146 		memlist_read_lock();
147 		mlist = memlist_dup(phys_install);
148 		memlist_read_unlock();
149 
150 		/* trim lower irrelevant span */
151 		if (mlist)
152 			mlist = memlist_del_span(mlist, 0ull, basepa);
153 
154 		/* trim upper irrelevant span */
155 		if (mlist) {
156 			uint64_t endpa;
157 
158 			basepa += mp->sbm_slice_size;
159 			endpa = _ptob64(physmax + 1);
160 			if (endpa > basepa)
161 				mlist = memlist_del_span(
162 				    mlist, basepa,
163 				    endpa - basepa);
164 		}
165 
166 		if (mlist) {
167 			/* successfully built a memlist */
168 			PR_MEM("%s: derived memlist from phys_install\n", f);
169 		}
170 
171 		/* if no mlist yet, try platform layer */
172 		if (!mlist) {
173 			err = drmach_mem_get_memlist(
174 			    mp->sbm_cm.sbdev_id, &mlist);
175 			if (err) {
176 				DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
177 				mlist = NULL; /* paranoia */
178 			}
179 		}
180 	}
181 
182 	PR_MEM("%s: memlist for %s\n", f, mp->sbm_cm.sbdev_path);
183 	PR_MEMLIST_DUMP(mlist);
184 
185 	return (mlist);
186 }
187 
188 typedef struct {
189 	kcondvar_t cond;
190 	kmutex_t lock;
191 	int error;
192 	int done;
193 } dr_release_mem_sync_t;
194 
195 /*
196  * Memory has been logically removed by the time this routine is called.
197  */
198 static void
199 dr_mem_del_done(void *arg, int error)
200 {
201 	dr_release_mem_sync_t *ds = arg;
202 
203 	mutex_enter(&ds->lock);
204 	ds->error = error;
205 	ds->done = 1;
206 	cv_signal(&ds->cond);
207 	mutex_exit(&ds->lock);
208 }
209 
210 /*
211  * When we reach here the memory being drained should have
212  * already been reserved in dr_pre_release_mem().
213  * Our only task here is to kick off the "drain" and wait
214  * for it to finish.
215  */
216 void
217 dr_release_mem(dr_common_unit_t *cp)
218 {
219 	dr_mem_unit_t	*mp = (dr_mem_unit_t *)cp;
220 	int		err;
221 	dr_release_mem_sync_t rms;
222 	static fn_t	f = "dr_release_mem";
223 
224 	/* check that this memory unit has been reserved */
225 	if (!(mp->sbm_flags & DR_MFLAG_RELOWNER)) {
226 		DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
227 		return;
228 	}
229 
230 	bzero((void *) &rms, sizeof (rms));
231 
232 	mutex_init(&rms.lock, NULL, MUTEX_DRIVER, NULL);
233 	cv_init(&rms.cond, NULL, CV_DRIVER, NULL);
234 
235 	mutex_enter(&rms.lock);
236 	err = kphysm_del_start(mp->sbm_memhandle,
237 	    dr_mem_del_done, (void *) &rms);
238 	if (err == KPHYSM_OK) {
239 		/* wait for completion or interrupt */
240 		while (!rms.done) {
241 			if (cv_wait_sig(&rms.cond, &rms.lock) == 0) {
242 				/* then there is a pending UNIX signal */
243 				(void) kphysm_del_cancel(mp->sbm_memhandle);
244 
245 				/* wait for completion */
246 				while (!rms.done)
247 					cv_wait(&rms.cond, &rms.lock);
248 			}
249 		}
250 		/* get the result of the memory delete operation */
251 		err = rms.error;
252 	}
253 	mutex_exit(&rms.lock);
254 
255 	cv_destroy(&rms.cond);
256 	mutex_destroy(&rms.lock);
257 
258 	if (err != KPHYSM_OK) {
259 		int e_code;
260 
261 		switch (err) {
262 			case KPHYSM_ENOWORK:
263 				e_code = ESBD_NOERROR;
264 				break;
265 
266 			case KPHYSM_EHANDLE:
267 			case KPHYSM_ESEQUENCE:
268 				e_code = ESBD_INTERNAL;
269 				break;
270 
271 			case KPHYSM_ENOTVIABLE:
272 				e_code = ESBD_MEM_NOTVIABLE;
273 				break;
274 
275 			case KPHYSM_EREFUSED:
276 				e_code = ESBD_MEM_REFUSED;
277 				break;
278 
279 			case KPHYSM_ENONRELOC:
280 				e_code = ESBD_MEM_NONRELOC;
281 				break;
282 
283 			case KPHYSM_ECANCELLED:
284 				e_code = ESBD_MEM_CANCELLED;
285 				break;
286 
287 			case KPHYSM_ERESOURCE:
288 				e_code = ESBD_MEMFAIL;
289 				break;
290 
291 			default:
292 				cmn_err(CE_WARN,
293 				    "%s: unexpected kphysm error code %d,"
294 				    " id 0x%p",
295 				    f, err, mp->sbm_cm.sbdev_id);
296 
297 				e_code = ESBD_IO;
298 				break;
299 		}
300 
301 		if (e_code != ESBD_NOERROR) {
302 			dr_dev_err(CE_WARN, &mp->sbm_cm, e_code);
303 		}
304 	}
305 }
306 
307 void
308 dr_attach_mem(dr_handle_t *hp, dr_common_unit_t *cp)
309 {
310 	_NOTE(ARGUNUSED(hp))
311 
312 	dr_mem_unit_t	*mp = (dr_mem_unit_t *)cp;
313 	struct memlist	*ml, *mc;
314 	sbd_error_t	*err;
315 	static fn_t	f = "dr_attach_mem";
316 
317 	PR_MEM("%s...\n", f);
318 
319 	dr_lock_status(hp->h_bd);
320 	err = drmach_configure(cp->sbdev_id, 0);
321 	dr_unlock_status(hp->h_bd);
322 	if (err) {
323 		DRERR_SET_C(&cp->sbdev_error, &err);
324 		return;
325 	}
326 
327 	ml = dr_get_memlist(mp);
328 	for (mc = ml; mc; mc = mc->next) {
329 		int		 rv;
330 		sbd_error_t	*err;
331 
332 		rv = kphysm_add_memory_dynamic(
333 		    (pfn_t)(mc->address >> PAGESHIFT),
334 		    (pgcnt_t)(mc->size >> PAGESHIFT));
335 		if (rv != KPHYSM_OK) {
336 			/*
337 			 * translate kphysm error and
338 			 * store in devlist error
339 			 */
340 			switch (rv) {
341 			case KPHYSM_ERESOURCE:
342 				rv = ESBD_NOMEM;
343 				break;
344 
345 			case KPHYSM_EFAULT:
346 				rv = ESBD_FAULT;
347 				break;
348 
349 			default:
350 				rv = ESBD_INTERNAL;
351 				break;
352 			}
353 
354 			if (rv == ESBD_INTERNAL) {
355 				DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
356 			} else
357 				dr_dev_err(CE_WARN, &mp->sbm_cm, rv);
358 			break;
359 		}
360 
361 		err = drmach_mem_add_span(
362 		    mp->sbm_cm.sbdev_id, mc->address, mc->size);
363 		if (err) {
364 			DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
365 			break;
366 		}
367 	}
368 
369 	memlist_delete(ml);
370 
371 	/* back out if configure failed */
372 	if (mp->sbm_cm.sbdev_error != NULL) {
373 		dr_lock_status(hp->h_bd);
374 		err = drmach_unconfigure(cp->sbdev_id, 0);
375 		if (err)
376 			sbd_err_clear(&err);
377 		dr_unlock_status(hp->h_bd);
378 	}
379 }
380 
381 static struct memlist *
382 dr_memlist_del_retired_pages(struct memlist *mlist)
383 {
384 	page_t		*pp;
385 	pfn_t		pfn;
386 	kmutex_t	*vphm;
387 	vnode_t		*vp = retired_pages;
388 	static fn_t	f = "dr_memlist_del_retired_pages";
389 
390 	vphm = page_vnode_mutex(vp);
391 	mutex_enter(vphm);
392 
393 	PR_MEM("%s\n", f);
394 
395 	if ((pp = vp->v_pages) == NULL) {
396 		mutex_exit(vphm);
397 		return (mlist);
398 	}
399 
400 	do {
401 		ASSERT(pp != NULL);
402 		ASSERT(pp->p_vnode == retired_pages);
403 
404 		if (!page_try_reclaim_lock(pp, SE_SHARED, SE_RETIRED))
405 			continue;
406 
407 		pfn = page_pptonum(pp);
408 
409 		/*
410 		 * Page retirement currently breaks large pages into PAGESIZE
411 		 * pages. If this changes, need to remove the assert and deal
412 		 * with different page sizes.
413 		 */
414 		ASSERT(pp->p_szc == 0);
415 
416 		if (address_in_memlist(mlist, ptob(pfn), PAGESIZE)) {
417 			mlist = memlist_del_span(mlist, ptob(pfn), PAGESIZE);
418 			PR_MEM("deleted retired page 0x%lx (pfn 0x%lx) "
419 			    "from memlist\n", ptob(pfn), pfn);
420 		}
421 
422 		page_unlock(pp);
423 	} while ((pp = pp->p_vpnext) != vp->v_pages);
424 
425 	mutex_exit(vphm);
426 
427 	return (mlist);
428 }
429 
430 static int
431 dr_move_memory(dr_handle_t *hp, dr_mem_unit_t *s_mp, dr_mem_unit_t *t_mp)
432 {
433 	int		rv = -1;
434 	time_t		 copytime;
435 	drmachid_t	 cr_id;
436 	dr_sr_handle_t	*srhp = NULL;
437 	dr_board_t	*t_bp, *s_bp;
438 	struct memlist	*c_ml, *d_ml;
439 	sbd_error_t	*err;
440 	static fn_t	 f = "dr_move_memory";
441 
442 	PR_MEM("%s: (INLINE) moving memory from %s to %s\n",
443 	    f,
444 	    s_mp->sbm_cm.sbdev_path,
445 	    t_mp->sbm_cm.sbdev_path);
446 
447 	ASSERT(s_mp->sbm_flags & DR_MFLAG_SOURCE);
448 	ASSERT(s_mp->sbm_peer == t_mp);
449 	ASSERT(s_mp->sbm_mlist);
450 
451 	ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET);
452 	ASSERT(t_mp->sbm_peer == s_mp);
453 
454 	/*
455 	 * create a memlist of spans to copy by removing
456 	 * the spans that have been deleted, if any, from
457 	 * the full source board memlist.  s_mp->sbm_del_mlist
458 	 * will be NULL if there were no spans deleted from
459 	 * the source board.
460 	 */
461 	c_ml = memlist_dup(s_mp->sbm_mlist);
462 	d_ml = s_mp->sbm_del_mlist;
463 	while (d_ml != NULL) {
464 		c_ml = memlist_del_span(c_ml, d_ml->address, d_ml->size);
465 		d_ml = d_ml->next;
466 	}
467 
468 	/*
469 	 * Remove retired pages from the copy list. The page content
470 	 * need not be copied since the pages are no longer in use.
471 	 */
472 	PR_MEM("%s: copy list before removing retired pages (if any):\n", f);
473 	PR_MEMLIST_DUMP(c_ml);
474 
475 	c_ml = dr_memlist_del_retired_pages(c_ml);
476 
477 	PR_MEM("%s: copy list after removing retired pages:\n", f);
478 	PR_MEMLIST_DUMP(c_ml);
479 
480 	/*
481 	 * With parallel copy, it shouldn't make a difference which
482 	 * CPU is the actual master during copy-rename since all
483 	 * CPUs participate in the parallel copy anyway.
484 	 */
485 	affinity_set(CPU_CURRENT);
486 
487 	err = drmach_copy_rename_init(
488 	    t_mp->sbm_cm.sbdev_id, s_mp->sbm_cm.sbdev_id, c_ml, &cr_id);
489 	if (err) {
490 		DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
491 		affinity_clear();
492 		memlist_delete(c_ml);
493 		return (-1);
494 	}
495 
496 	srhp = dr_get_sr_handle(hp);
497 	ASSERT(srhp);
498 
499 	copytime = lbolt;
500 
501 	/* Quiesce the OS.  */
502 	if (dr_suspend(srhp)) {
503 		cmn_err(CE_WARN, "%s: failed to quiesce OS"
504 		    " for copy-rename", f);
505 
506 		err = drmach_copy_rename_fini(cr_id);
507 		if (err) {
508 			/*
509 			 * no error is expected since the program has
510 			 * not yet run.
511 			 */
512 
513 			/* catch this in debug kernels */
514 			ASSERT(0);
515 
516 			sbd_err_clear(&err);
517 		}
518 
519 		/* suspend error reached via hp */
520 		s_mp->sbm_cm.sbdev_error = hp->h_err;
521 		hp->h_err = NULL;
522 		goto done;
523 	}
524 
525 	drmach_copy_rename(cr_id);
526 
527 	/* Resume the OS.  */
528 	dr_resume(srhp);
529 
530 	copytime = lbolt - copytime;
531 
532 	if (err = drmach_copy_rename_fini(cr_id))
533 		goto done;
534 
535 	/*
536 	 * Rename memory for lgroup.
537 	 * Source and target board numbers are packaged in arg.
538 	 */
539 	s_bp = s_mp->sbm_cm.sbdev_bp;
540 	t_bp = t_mp->sbm_cm.sbdev_bp;
541 
542 	lgrp_plat_config(LGRP_CONFIG_MEM_RENAME,
543 	    (uintptr_t)(s_bp->b_num | (t_bp->b_num << 16)));
544 
545 
546 	PR_MEM("%s: copy-rename elapsed time = %ld ticks (%ld secs)\n",
547 	    f, copytime, copytime / hz);
548 
549 	rv = 0;
550 done:
551 	if (srhp)
552 		dr_release_sr_handle(srhp);
553 	if (err)
554 		DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
555 	affinity_clear();
556 
557 	return (rv);
558 }
559 
560 /*
561  * If detaching node contains memory that is "non-permanent"
562  * then the memory adr's are simply cleared.  If the memory
563  * is non-relocatable, then do a copy-rename.
564  */
565 void
566 dr_detach_mem(dr_handle_t *hp, dr_common_unit_t *cp)
567 {
568 	int			rv = 0;
569 	dr_mem_unit_t		*s_mp = (dr_mem_unit_t *)cp;
570 	dr_mem_unit_t		*t_mp;
571 	dr_state_t		state;
572 	static fn_t		f = "dr_detach_mem";
573 
574 	PR_MEM("%s...\n", f);
575 
576 	/* lookup target mem unit and target board structure, if any */
577 	if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
578 		t_mp = s_mp->sbm_peer;
579 		ASSERT(t_mp != NULL);
580 		ASSERT(t_mp->sbm_peer == s_mp);
581 	} else {
582 		t_mp = NULL;
583 	}
584 
585 	/* verify mem unit's state is UNREFERENCED */
586 	state = s_mp->sbm_cm.sbdev_state;
587 	if (state != DR_STATE_UNREFERENCED) {
588 		dr_dev_err(CE_IGNORE, &s_mp->sbm_cm, ESBD_STATE);
589 		return;
590 	}
591 
592 	/* verify target mem unit's state is UNREFERENCED, if any */
593 	if (t_mp != NULL) {
594 		state = t_mp->sbm_cm.sbdev_state;
595 		if (state != DR_STATE_UNREFERENCED) {
596 			dr_dev_err(CE_IGNORE, &t_mp->sbm_cm, ESBD_STATE);
597 			return;
598 		}
599 	}
600 
601 	/*
602 	 * If there is no target board (no copy/rename was needed), then
603 	 * we're done!
604 	 */
605 	if (t_mp == NULL) {
606 		sbd_error_t *err;
607 		/*
608 		 * Reprogram interconnect hardware and disable
609 		 * memory controllers for memory node that's going away.
610 		 */
611 
612 		err = drmach_mem_disable(s_mp->sbm_cm.sbdev_id);
613 		if (err) {
614 			DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
615 			rv = -1;
616 		}
617 	} else {
618 		rv = dr_move_memory(hp, s_mp, t_mp);
619 		PR_MEM("%s: %s memory COPY-RENAME (board %d -> %d)\n",
620 		    f,
621 		    rv ? "FAILED" : "COMPLETED",
622 		    s_mp->sbm_cm.sbdev_bp->b_num,
623 		    t_mp->sbm_cm.sbdev_bp->b_num);
624 
625 		if (rv != 0)
626 			(void) dr_cancel_mem(s_mp);
627 	}
628 
629 	if (rv == 0) {
630 		sbd_error_t *err;
631 
632 		dr_lock_status(hp->h_bd);
633 		err = drmach_unconfigure(s_mp->sbm_cm.sbdev_id, 0);
634 		dr_unlock_status(hp->h_bd);
635 		if (err)
636 			sbd_err_clear(&err);
637 	}
638 }
639 
640 /*
641  * This routine acts as a wrapper for kphysm_del_span_query in order to
642  * support potential memory holes in a board's physical address space.
643  * It calls kphysm_del_span_query for each node in a memlist and accumulates
644  * the results in *mp.
645  */
646 static int
647 dr_del_mlist_query(struct memlist *mlist, memquery_t *mp)
648 {
649 	struct memlist	*ml;
650 	int		 rv = 0;
651 
652 
653 	if (mlist == NULL)
654 		cmn_err(CE_WARN, "dr_del_mlist_query: mlist=NULL\n");
655 
656 	mp->phys_pages = 0;
657 	mp->managed = 0;
658 	mp->nonrelocatable = 0;
659 	mp->first_nonrelocatable = (pfn_t)-1;	/* XXX */
660 	mp->last_nonrelocatable = 0;
661 
662 	for (ml = mlist; ml; ml = ml->next) {
663 		memquery_t mq;
664 
665 		rv = kphysm_del_span_query(
666 		    _b64top(ml->address), _b64top(ml->size), &mq);
667 		if (rv)
668 			break;
669 
670 		mp->phys_pages += mq.phys_pages;
671 		mp->managed += mq.managed;
672 		mp->nonrelocatable += mq.nonrelocatable;
673 
674 		if (mq.nonrelocatable != 0) {
675 			if (mq.first_nonrelocatable < mp->first_nonrelocatable)
676 				mp->first_nonrelocatable =
677 				    mq.first_nonrelocatable;
678 			if (mq.last_nonrelocatable > mp->last_nonrelocatable)
679 				mp->last_nonrelocatable =
680 				    mq.last_nonrelocatable;
681 		}
682 	}
683 
684 	if (mp->nonrelocatable == 0)
685 		mp->first_nonrelocatable = 0;	/* XXX */
686 
687 	return (rv);
688 }
689 
690 /*
691  * NOTE: This routine is only partially smart about multiple
692  *	 mem-units.  Need to make mem-status structure smart
693  *	 about them also.
694  */
695 int
696 dr_mem_status(dr_handle_t *hp, dr_devset_t devset, sbd_dev_stat_t *dsp)
697 {
698 	int		m, mix;
699 	memdelstat_t	mdst;
700 	memquery_t	mq;
701 	dr_board_t	*bp;
702 	dr_mem_unit_t	*mp;
703 	sbd_mem_stat_t	*msp;
704 	static fn_t	f = "dr_mem_status";
705 
706 	bp = hp->h_bd;
707 	devset &= DR_DEVS_PRESENT(bp);
708 
709 	for (m = mix = 0; m < MAX_MEM_UNITS_PER_BOARD; m++) {
710 		int		rv;
711 		sbd_error_t	*err;
712 		drmach_status_t	 pstat;
713 		dr_mem_unit_t	*p_mp;
714 
715 		if (DEVSET_IN_SET(devset, SBD_COMP_MEM, m) == 0)
716 			continue;
717 
718 		mp = dr_get_mem_unit(bp, m);
719 
720 		if (mp->sbm_cm.sbdev_state == DR_STATE_EMPTY) {
721 			/* present, but not fully initialized */
722 			continue;
723 		}
724 
725 		if (mp->sbm_cm.sbdev_id == (drmachid_t)0)
726 			continue;
727 
728 		/* fetch platform status */
729 		err = drmach_status(mp->sbm_cm.sbdev_id, &pstat);
730 		if (err) {
731 			DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
732 			continue;
733 		}
734 
735 		msp = &dsp->d_mem;
736 		bzero((caddr_t)msp, sizeof (*msp));
737 
738 		strncpy(msp->ms_cm.c_id.c_name, pstat.type,
739 		    sizeof (msp->ms_cm.c_id.c_name));
740 		msp->ms_cm.c_id.c_type = mp->sbm_cm.sbdev_type;
741 		msp->ms_cm.c_id.c_unit = SBD_NULL_UNIT;
742 		msp->ms_cm.c_cond = mp->sbm_cm.sbdev_cond;
743 		msp->ms_cm.c_busy = mp->sbm_cm.sbdev_busy | pstat.busy;
744 		msp->ms_cm.c_time = mp->sbm_cm.sbdev_time;
745 		msp->ms_cm.c_ostate = mp->sbm_cm.sbdev_ostate;
746 
747 		msp->ms_totpages = mp->sbm_npages;
748 		msp->ms_basepfn = mp->sbm_basepfn;
749 		msp->ms_pageslost = mp->sbm_pageslost;
750 		msp->ms_cage_enabled = kcage_on;
751 
752 		if (mp->sbm_flags & DR_MFLAG_RESERVED)
753 			p_mp = mp->sbm_peer;
754 		else
755 			p_mp = NULL;
756 
757 		if (p_mp == NULL) {
758 			msp->ms_peer_is_target = 0;
759 			msp->ms_peer_ap_id[0] = '\0';
760 		} else if (p_mp->sbm_flags & DR_MFLAG_RESERVED) {
761 			char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
762 			char *minor;
763 
764 			/*
765 			 * b_dip doesn't have to be held for ddi_pathname()
766 			 * because the board struct (dr_board_t) will be
767 			 * destroyed before b_dip detaches.
768 			 */
769 			(void) ddi_pathname(bp->b_dip, path);
770 			minor = strchr(p_mp->sbm_cm.sbdev_path, ':');
771 
772 			snprintf(msp->ms_peer_ap_id,
773 			    sizeof (msp->ms_peer_ap_id), "%s%s",
774 			    path, (minor == NULL) ? "" : minor);
775 
776 			kmem_free(path, MAXPATHLEN);
777 
778 			if (p_mp->sbm_flags & DR_MFLAG_TARGET)
779 				msp->ms_peer_is_target = 1;
780 		}
781 
782 		if (mp->sbm_flags & DR_MFLAG_RELOWNER)
783 			rv = kphysm_del_status(mp->sbm_memhandle, &mdst);
784 		else
785 			rv = KPHYSM_EHANDLE;	/* force 'if' to fail */
786 
787 		if (rv == KPHYSM_OK) {
788 			/*
789 			 * Any pages above managed is "free",
790 			 * i.e. it's collected.
791 			 */
792 			msp->ms_detpages += (uint_t)(mdst.collected +
793 			    mdst.phys_pages - mdst.managed);
794 		} else {
795 			/*
796 			 * If we're UNREFERENCED or UNCONFIGURED,
797 			 * then the number of detached pages is
798 			 * however many pages are on the board.
799 			 * I.e. detached = not in use by OS.
800 			 */
801 			switch (msp->ms_cm.c_ostate) {
802 			/*
803 			 * changed to use cfgadm states
804 			 *
805 			 * was:
806 			 *	case DR_STATE_UNREFERENCED:
807 			 *	case DR_STATE_UNCONFIGURED:
808 			 */
809 			case SBD_STAT_UNCONFIGURED:
810 				msp->ms_detpages = msp->ms_totpages;
811 				break;
812 
813 			default:
814 				break;
815 			}
816 		}
817 
818 		/*
819 		 * kphysm_del_span_query can report non-reloc pages = total
820 		 * pages for memory that is not yet configured
821 		 */
822 		if (mp->sbm_cm.sbdev_state != DR_STATE_UNCONFIGURED) {
823 			struct memlist *ml;
824 
825 			ml = dr_get_memlist(mp);
826 			rv = ml ? dr_del_mlist_query(ml, &mq) : -1;
827 			memlist_delete(ml);
828 
829 			if (rv == KPHYSM_OK) {
830 				msp->ms_managed_pages = mq.managed;
831 				msp->ms_noreloc_pages = mq.nonrelocatable;
832 				msp->ms_noreloc_first =
833 				    mq.first_nonrelocatable;
834 				msp->ms_noreloc_last =
835 				    mq.last_nonrelocatable;
836 				msp->ms_cm.c_sflags = 0;
837 				if (mq.nonrelocatable) {
838 					SBD_SET_SUSPEND(SBD_CMD_UNCONFIGURE,
839 					    msp->ms_cm.c_sflags);
840 				}
841 			} else {
842 				PR_MEM("%s: kphysm_del_span_query() = %d\n",
843 				    f, rv);
844 			}
845 		}
846 
847 		/*
848 		 * Check source unit state during copy-rename
849 		 */
850 		if ((mp->sbm_flags & DR_MFLAG_SOURCE) &&
851 		    (mp->sbm_cm.sbdev_state == DR_STATE_UNREFERENCED ||
852 		    mp->sbm_cm.sbdev_state == DR_STATE_RELEASE))
853 			msp->ms_cm.c_ostate = SBD_STAT_CONFIGURED;
854 
855 		mix++;
856 		dsp++;
857 	}
858 
859 	return (mix);
860 }
861 
862 int
863 dr_pre_attach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
864 {
865 	_NOTE(ARGUNUSED(hp))
866 
867 	int		err_flag = 0;
868 	int		d;
869 	sbd_error_t	*err;
870 	static fn_t	f = "dr_pre_attach_mem";
871 
872 	PR_MEM("%s...\n", f);
873 
874 	for (d = 0; d < devnum; d++) {
875 		dr_mem_unit_t	*mp = (dr_mem_unit_t *)devlist[d];
876 		dr_state_t	state;
877 
878 		cmn_err(CE_CONT, "OS configure %s", mp->sbm_cm.sbdev_path);
879 
880 		state = mp->sbm_cm.sbdev_state;
881 		switch (state) {
882 		case DR_STATE_UNCONFIGURED:
883 			PR_MEM("%s: recovering from UNCONFIG for %s\n",
884 			    f,
885 			    mp->sbm_cm.sbdev_path);
886 
887 			/* use memlist cached by dr_post_detach_mem_unit */
888 			ASSERT(mp->sbm_mlist != NULL);
889 			PR_MEM("%s: re-configuring cached memlist for %s:\n",
890 			    f, mp->sbm_cm.sbdev_path);
891 			PR_MEMLIST_DUMP(mp->sbm_mlist);
892 
893 			/* kphysm del handle should be have been freed */
894 			ASSERT((mp->sbm_flags & DR_MFLAG_RELOWNER) == 0);
895 
896 			/*FALLTHROUGH*/
897 
898 		case DR_STATE_CONNECTED:
899 			PR_MEM("%s: reprogramming mem hardware on %s\n",
900 			    f, mp->sbm_cm.sbdev_bp->b_path);
901 
902 			PR_MEM("%s: enabling %s\n",
903 			    f, mp->sbm_cm.sbdev_path);
904 
905 			err = drmach_mem_enable(mp->sbm_cm.sbdev_id);
906 			if (err) {
907 				DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
908 				err_flag = 1;
909 			}
910 			break;
911 
912 		default:
913 			dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_STATE);
914 			err_flag = 1;
915 			break;
916 		}
917 
918 		/* exit for loop if error encountered */
919 		if (err_flag)
920 			break;
921 	}
922 
923 	return (err_flag ? -1 : 0);
924 }
925 
926 static void
927 dr_update_mc_memory()
928 {
929 	void		(*mc_update_mlist)(void);
930 
931 	/*
932 	 * mc-opl is configured during drmach_mem_new but the memory
933 	 * has not been added to phys_install at that time.
934 	 * we must inform mc-opl to update the mlist after we
935 	 * attach or detach a system board.
936 	 */
937 
938 	mc_update_mlist = (void (*)(void))
939 	    modgetsymvalue("opl_mc_update_mlist", 0);
940 
941 	if (mc_update_mlist != NULL) {
942 		(*mc_update_mlist)();
943 	}
944 }
945 
946 int
947 dr_post_attach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
948 {
949 	_NOTE(ARGUNUSED(hp))
950 
951 	int		d;
952 	static fn_t	f = "dr_post_attach_mem";
953 
954 	PR_MEM("%s...\n", f);
955 
956 	for (d = 0; d < devnum; d++) {
957 		dr_mem_unit_t	*mp = (dr_mem_unit_t *)devlist[d];
958 		struct memlist	*mlist, *ml;
959 
960 		mlist = dr_get_memlist(mp);
961 		if (mlist == NULL) {
962 			/* OPL supports memoryless board */
963 			continue;
964 		}
965 
966 		/*
967 		 * Verify the memory really did successfully attach
968 		 * by checking for its existence in phys_install.
969 		 */
970 		memlist_read_lock();
971 		if (memlist_intersect(phys_install, mlist) == 0) {
972 			memlist_read_unlock();
973 
974 			DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
975 
976 			PR_MEM("%s: %s memlist not in phys_install",
977 			    f, mp->sbm_cm.sbdev_path);
978 
979 			memlist_delete(mlist);
980 			continue;
981 		}
982 		memlist_read_unlock();
983 
984 		for (ml = mlist; ml != NULL; ml = ml->next) {
985 			sbd_error_t *err;
986 
987 			err = drmach_mem_add_span(
988 			    mp->sbm_cm.sbdev_id,
989 			    ml->address,
990 			    ml->size);
991 			if (err)
992 				DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
993 		}
994 
995 		memlist_delete(mlist);
996 
997 		/*
998 		 * Destroy cached memlist, if any.
999 		 * There will be a cached memlist in sbm_mlist if
1000 		 * this board is being configured directly after
1001 		 * an unconfigure.
1002 		 * To support this transition, dr_post_detach_mem
1003 		 * left a copy of the last known memlist in sbm_mlist.
1004 		 * This memlist could differ from any derived from
1005 		 * hardware if while this memunit was last configured
1006 		 * the system detected and deleted bad pages from
1007 		 * phys_install.  The location of those bad pages
1008 		 * will be reflected in the cached memlist.
1009 		 */
1010 		if (mp->sbm_mlist) {
1011 			memlist_delete(mp->sbm_mlist);
1012 			mp->sbm_mlist = NULL;
1013 		}
1014 	}
1015 
1016 	dr_update_mc_memory();
1017 
1018 	return (0);
1019 }
1020 
1021 int
1022 dr_pre_detach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
1023 {
1024 	_NOTE(ARGUNUSED(hp))
1025 
1026 	int d;
1027 
1028 	for (d = 0; d < devnum; d++) {
1029 		dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d];
1030 
1031 		cmn_err(CE_CONT, "OS unconfigure %s", mp->sbm_cm.sbdev_path);
1032 	}
1033 
1034 	return (0);
1035 }
1036 
1037 int
1038 dr_post_detach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
1039 {
1040 	_NOTE(ARGUNUSED(hp))
1041 
1042 	int		d, rv;
1043 	static fn_t	f = "dr_post_detach_mem";
1044 
1045 	PR_MEM("%s...\n", f);
1046 
1047 	rv = 0;
1048 	for (d = 0; d < devnum; d++) {
1049 		dr_mem_unit_t	*mp = (dr_mem_unit_t *)devlist[d];
1050 
1051 		ASSERT(mp->sbm_cm.sbdev_bp == hp->h_bd);
1052 
1053 		if (dr_post_detach_mem_unit(mp))
1054 			rv = -1;
1055 	}
1056 	dr_update_mc_memory();
1057 
1058 	return (rv);
1059 }
1060 
1061 static void
1062 dr_add_memory_spans(dr_mem_unit_t *mp, struct memlist *ml)
1063 {
1064 	static fn_t	f = "dr_add_memory_spans";
1065 
1066 	PR_MEM("%s...", f);
1067 	PR_MEMLIST_DUMP(ml);
1068 
1069 #ifdef DEBUG
1070 	memlist_read_lock();
1071 	if (memlist_intersect(phys_install, ml)) {
1072 		PR_MEM("%s:WARNING: memlist intersects with phys_install\n", f);
1073 	}
1074 	memlist_read_unlock();
1075 #endif
1076 
1077 	for (; ml; ml = ml->next) {
1078 		pfn_t		 base;
1079 		pgcnt_t		 npgs;
1080 		int		 rv;
1081 		sbd_error_t	*err;
1082 
1083 		base = _b64top(ml->address);
1084 		npgs = _b64top(ml->size);
1085 
1086 		rv = kphysm_add_memory_dynamic(base, npgs);
1087 
1088 		err = drmach_mem_add_span(
1089 		    mp->sbm_cm.sbdev_id,
1090 		    ml->address,
1091 		    ml->size);
1092 
1093 		if (err)
1094 			DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
1095 
1096 		if (rv != KPHYSM_OK) {
1097 			cmn_err(CE_WARN, "%s:"
1098 			    " unexpected kphysm_add_memory_dynamic"
1099 			    " return value %d;"
1100 			    " basepfn=0x%lx, npages=%ld\n",
1101 			    f, rv, base, npgs);
1102 
1103 			continue;
1104 		}
1105 	}
1106 }
1107 
1108 static int
1109 memlist_touch(struct memlist *ml, uint64_t add)
1110 {
1111 	while (ml != NULL) {
1112 		if ((add == ml->address) ||
1113 		    (add == (ml->address + ml->size)))
1114 			return (1);
1115 		ml = ml->next;
1116 	}
1117 	return (0);
1118 }
1119 
1120 static sbd_error_t *
1121 dr_process_excess_mlist(dr_mem_unit_t *s_mp,
1122 	dr_mem_unit_t *t_mp, struct memlist *t_excess_mlist)
1123 {
1124 	struct memlist	*ml;
1125 	sbd_error_t	*err;
1126 	static fn_t	f = "dr_process_excess_mlist";
1127 	uint64_t	new_pa, nbytes;
1128 	int rv;
1129 
1130 	err = NULL;
1131 
1132 	/*
1133 	 * After the small <-> big copy-rename,
1134 	 * the original address space for the
1135 	 * source board may have excess to be
1136 	 * deleted. This is a case different
1137 	 * from the big->small excess source
1138 	 * memory case listed below.
1139 	 * Remove s_mp->sbm_del_mlist from
1140 	 * the kernel cage glist.
1141 	 */
1142 	for (ml = s_mp->sbm_del_mlist; ml;
1143 	    ml = ml->next) {
1144 		PR_MEM("%s: delete small<->big copy-"
1145 		    "rename source excess memory", f);
1146 		PR_MEMLIST_DUMP(ml);
1147 
1148 		err = drmach_mem_del_span(
1149 		    s_mp->sbm_cm.sbdev_id,
1150 		    ml->address, ml->size);
1151 		if (err)
1152 			DRERR_SET_C(&s_mp->
1153 			    sbm_cm.sbdev_error, &err);
1154 		ASSERT(err == NULL);
1155 	}
1156 
1157 	PR_MEM("%s: adding back remaining portion"
1158 	    " of %s, memlist:\n",
1159 	    f, t_mp->sbm_cm.sbdev_path);
1160 	PR_MEMLIST_DUMP(t_excess_mlist);
1161 
1162 	for (ml = t_excess_mlist; ml; ml = ml->next) {
1163 		struct memlist ml0;
1164 
1165 		ml0.address = ml->address;
1166 		ml0.size = ml->size;
1167 		ml0.next = ml0.prev = NULL;
1168 
1169 		/*
1170 		 * If the memory object is 256 MB aligned (max page size
1171 		 * on OPL, it will not be coalesced to the adjacent memory
1172 		 * chunks.  The coalesce logic assumes contiguous page
1173 		 * structures for contiguous memory and we hit panic.
1174 		 * For anything less than 256 MB alignment, we have
1175 		 * to make sure that it is not adjacent to anything.
1176 		 * If the new chunk is adjacent to phys_install, we
1177 		 * truncate it to 4MB boundary.  4 MB is somewhat
1178 		 * arbitrary.  However we do not want to create
1179 		 * very small segments because they can cause problem.
1180 		 * The extreme case of 8K segment will fail
1181 		 * kphysm_add_memory_dynamic(), e.g.
1182 		 */
1183 		if ((ml->address & (MH_MPSS_ALIGNMENT - 1)) ||
1184 		    (ml->size & (MH_MPSS_ALIGNMENT - 1))) {
1185 
1186 		memlist_read_lock();
1187 		rv = memlist_touch(phys_install, ml0.address);
1188 		memlist_read_unlock();
1189 
1190 		if (rv) {
1191 			new_pa = roundup(ml0.address + 1, MH_MIN_ALIGNMENT);
1192 			nbytes = (new_pa -  ml0.address);
1193 			if (nbytes >= ml0.size) {
1194 				t_mp->sbm_dyn_segs =
1195 				    memlist_del_span(t_mp->sbm_dyn_segs,
1196 				    ml0.address, ml0.size);
1197 				continue;
1198 			}
1199 			t_mp->sbm_dyn_segs =
1200 			    memlist_del_span(t_mp->sbm_dyn_segs,
1201 			    ml0.address, nbytes);
1202 			ml0.size -= nbytes;
1203 			ml0.address = new_pa;
1204 		}
1205 
1206 		if (ml0.size == 0) {
1207 			continue;
1208 		}
1209 
1210 		memlist_read_lock();
1211 		rv = memlist_touch(phys_install, ml0.address + ml0.size);
1212 		memlist_read_unlock();
1213 
1214 		if (rv) {
1215 			new_pa = rounddown(ml0.address + ml0.size - 1,
1216 			    MH_MIN_ALIGNMENT);
1217 			nbytes = (ml0.address + ml0.size - new_pa);
1218 			if (nbytes >= ml0.size) {
1219 				t_mp->sbm_dyn_segs =
1220 				    memlist_del_span(t_mp->sbm_dyn_segs,
1221 				    ml0.address, ml0.size);
1222 				continue;
1223 			}
1224 			t_mp->sbm_dyn_segs =
1225 			    memlist_del_span(t_mp->sbm_dyn_segs,
1226 			    new_pa, nbytes);
1227 			ml0.size -= nbytes;
1228 		}
1229 
1230 		if (ml0.size > 0) {
1231 			dr_add_memory_spans(s_mp, &ml0);
1232 		}
1233 		} else if (ml0.size > 0) {
1234 			dr_add_memory_spans(s_mp, &ml0);
1235 		}
1236 	}
1237 	memlist_delete(t_excess_mlist);
1238 	return (err);
1239 }
1240 
1241 static int
1242 dr_post_detach_mem_unit(dr_mem_unit_t *s_mp)
1243 {
1244 	uint64_t	sz = s_mp->sbm_slice_size;
1245 	uint64_t	sm = sz - 1;
1246 	/* old and new below refer to PAs before and after copy-rename */
1247 	uint64_t	s_old_basepa, s_new_basepa;
1248 	uint64_t	t_old_basepa, t_new_basepa;
1249 	dr_mem_unit_t	*t_mp, *x_mp;
1250 	drmach_mem_info_t	minfo;
1251 	struct memlist	*ml;
1252 	struct memlist	*t_excess_mlist;
1253 	int		rv;
1254 	int		s_excess_mem_deleted = 0;
1255 	sbd_error_t	*err;
1256 	static fn_t	f = "dr_post_detach_mem_unit";
1257 
1258 	PR_MEM("%s...\n", f);
1259 
1260 	/* s_mp->sbm_del_mlist could be NULL, meaning no deleted spans */
1261 	PR_MEM("%s: %s: deleted memlist (EMPTY maybe okay):\n",
1262 	    f, s_mp->sbm_cm.sbdev_path);
1263 	PR_MEMLIST_DUMP(s_mp->sbm_del_mlist);
1264 
1265 	/* sanity check */
1266 	ASSERT(s_mp->sbm_del_mlist == NULL ||
1267 	    (s_mp->sbm_flags & DR_MFLAG_RELDONE) != 0);
1268 
1269 	if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
1270 		t_mp = s_mp->sbm_peer;
1271 		ASSERT(t_mp != NULL);
1272 		ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET);
1273 		ASSERT(t_mp->sbm_peer == s_mp);
1274 
1275 		ASSERT(t_mp->sbm_flags & DR_MFLAG_RELDONE);
1276 		ASSERT(t_mp->sbm_del_mlist);
1277 
1278 		PR_MEM("%s: target %s: deleted memlist:\n",
1279 		    f, t_mp->sbm_cm.sbdev_path);
1280 		PR_MEMLIST_DUMP(t_mp->sbm_del_mlist);
1281 	} else {
1282 		/* this is no target unit */
1283 		t_mp = NULL;
1284 	}
1285 
1286 	/*
1287 	 * Verify the memory really did successfully detach
1288 	 * by checking for its non-existence in phys_install.
1289 	 */
1290 	rv = 0;
1291 	memlist_read_lock();
1292 	if (s_mp->sbm_flags & DR_MFLAG_RELDONE) {
1293 		x_mp = s_mp;
1294 		rv = memlist_intersect(phys_install, x_mp->sbm_del_mlist);
1295 	}
1296 	if (rv == 0 && t_mp && (t_mp->sbm_flags & DR_MFLAG_RELDONE)) {
1297 		x_mp = t_mp;
1298 		rv = memlist_intersect(phys_install, x_mp->sbm_del_mlist);
1299 	}
1300 	memlist_read_unlock();
1301 
1302 	if (rv) {
1303 		/* error: memlist still in phys_install */
1304 		DR_DEV_INTERNAL_ERROR(&x_mp->sbm_cm);
1305 	}
1306 
1307 	/*
1308 	 * clean mem unit state and bail out if an error has been recorded.
1309 	 */
1310 	rv = 0;
1311 	if (s_mp->sbm_cm.sbdev_error) {
1312 		PR_MEM("%s: %s flags=%x", f,
1313 		    s_mp->sbm_cm.sbdev_path, s_mp->sbm_flags);
1314 		DR_DEV_CLR_UNREFERENCED(&s_mp->sbm_cm);
1315 		DR_DEV_CLR_RELEASED(&s_mp->sbm_cm);
1316 		dr_device_transition(&s_mp->sbm_cm, DR_STATE_CONFIGURED);
1317 		rv = -1;
1318 	}
1319 	if (t_mp != NULL && t_mp->sbm_cm.sbdev_error != NULL) {
1320 		PR_MEM("%s: %s flags=%x", f,
1321 		    s_mp->sbm_cm.sbdev_path, s_mp->sbm_flags);
1322 		DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm);
1323 		DR_DEV_CLR_RELEASED(&t_mp->sbm_cm);
1324 		dr_device_transition(&t_mp->sbm_cm, DR_STATE_CONFIGURED);
1325 		rv = -1;
1326 	}
1327 	if (rv)
1328 		goto cleanup;
1329 
1330 	s_old_basepa = _ptob64(s_mp->sbm_basepfn);
1331 	err = drmach_mem_get_info(s_mp->sbm_cm.sbdev_id, &minfo);
1332 	ASSERT(err == NULL);
1333 	s_new_basepa = minfo.mi_basepa;
1334 
1335 	PR_MEM("%s:s_old_basepa: 0x%lx\n", f, s_old_basepa);
1336 	PR_MEM("%s:s_new_basepa: 0x%lx\n", f, s_new_basepa);
1337 
1338 	if (t_mp != NULL) {
1339 		struct memlist *s_copy_mlist;
1340 
1341 		t_old_basepa = _ptob64(t_mp->sbm_basepfn);
1342 		err = drmach_mem_get_info(t_mp->sbm_cm.sbdev_id, &minfo);
1343 		ASSERT(err == NULL);
1344 		t_new_basepa = minfo.mi_basepa;
1345 
1346 		PR_MEM("%s:t_old_basepa: 0x%lx\n", f, t_old_basepa);
1347 		PR_MEM("%s:t_new_basepa: 0x%lx\n", f, t_new_basepa);
1348 
1349 		/*
1350 		 * Construct copy list with original source addresses.
1351 		 * Used to add back excess target mem.
1352 		 */
1353 		s_copy_mlist = memlist_dup(s_mp->sbm_mlist);
1354 		for (ml = s_mp->sbm_del_mlist; ml; ml = ml->next) {
1355 			s_copy_mlist = memlist_del_span(s_copy_mlist,
1356 			    ml->address, ml->size);
1357 		}
1358 
1359 		PR_MEM("%s: source copy list:\n:", f);
1360 		PR_MEMLIST_DUMP(s_copy_mlist);
1361 
1362 		/*
1363 		 * We had to swap mem-units, so update
1364 		 * memlists accordingly with new base
1365 		 * addresses.
1366 		 */
1367 		for (ml = t_mp->sbm_mlist; ml; ml = ml->next) {
1368 			ml->address -= t_old_basepa;
1369 			ml->address += t_new_basepa;
1370 		}
1371 
1372 		/*
1373 		 * There is no need to explicitly rename the target delete
1374 		 * memlist, because sbm_del_mlist and sbm_mlist always
1375 		 * point to the same memlist for a copy/rename operation.
1376 		 */
1377 		ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
1378 
1379 		PR_MEM("%s: renamed target memlist and delete memlist:\n", f);
1380 		PR_MEMLIST_DUMP(t_mp->sbm_mlist);
1381 
1382 		for (ml = s_mp->sbm_mlist; ml; ml = ml->next) {
1383 			ml->address -= s_old_basepa;
1384 			ml->address += s_new_basepa;
1385 		}
1386 
1387 		PR_MEM("%s: renamed source memlist:\n", f);
1388 		PR_MEMLIST_DUMP(s_mp->sbm_mlist);
1389 		PR_MEM("%s: source dyn seg memlist:\n", f);
1390 		PR_MEMLIST_DUMP(s_mp->sbm_dyn_segs);
1391 
1392 		/*
1393 		 * Keep track of dynamically added segments
1394 		 * since they cannot be split if we need to delete
1395 		 * excess source memory later for this board.
1396 		 */
1397 		if (t_mp->sbm_dyn_segs)
1398 			memlist_delete(t_mp->sbm_dyn_segs);
1399 		t_mp->sbm_dyn_segs = s_mp->sbm_dyn_segs;
1400 		s_mp->sbm_dyn_segs = NULL;
1401 
1402 		/*
1403 		 * Add back excess target memory.
1404 		 * Subtract out the portion of the target memory
1405 		 * node that was taken over by the source memory
1406 		 * node.
1407 		 */
1408 		t_excess_mlist = memlist_dup(t_mp->sbm_mlist);
1409 		for (ml = s_copy_mlist; ml; ml = ml->next) {
1410 			t_excess_mlist =
1411 			    memlist_del_span(t_excess_mlist,
1412 			    ml->address, ml->size);
1413 		}
1414 		PR_MEM("%s: excess memlist:\n", f);
1415 		PR_MEMLIST_DUMP(t_excess_mlist);
1416 
1417 		/*
1418 		 * Update dynamically added segs
1419 		 */
1420 		for (ml = s_mp->sbm_del_mlist; ml; ml = ml->next) {
1421 			t_mp->sbm_dyn_segs =
1422 			    memlist_del_span(t_mp->sbm_dyn_segs,
1423 			    ml->address, ml->size);
1424 		}
1425 		for (ml = t_excess_mlist; ml; ml = ml->next) {
1426 			t_mp->sbm_dyn_segs =
1427 			    memlist_cat_span(t_mp->sbm_dyn_segs,
1428 			    ml->address, ml->size);
1429 		}
1430 		PR_MEM("%s: %s: updated dynamic seg list:\n",
1431 		    f, t_mp->sbm_cm.sbdev_path);
1432 		PR_MEMLIST_DUMP(t_mp->sbm_dyn_segs);
1433 
1434 		if (t_excess_mlist != NULL) {
1435 			err = dr_process_excess_mlist(s_mp, t_mp,
1436 			    t_excess_mlist);
1437 			s_excess_mem_deleted = 1;
1438 		}
1439 
1440 		memlist_delete(s_copy_mlist);
1441 
1442 #ifdef DEBUG
1443 		/*
1444 		 * s_mp->sbm_del_mlist may still needed
1445 		 */
1446 		PR_MEM("%s: source delete memeory flag %d",
1447 		    f, s_excess_mem_deleted);
1448 		PR_MEM("%s: source delete memlist", f);
1449 		PR_MEMLIST_DUMP(s_mp->sbm_del_mlist);
1450 #endif
1451 
1452 	}
1453 
1454 	if (t_mp != NULL) {
1455 		/* delete target's entire address space */
1456 		err = drmach_mem_del_span(
1457 		    t_mp->sbm_cm.sbdev_id, t_old_basepa & ~ sm, sz);
1458 		if (err)
1459 			DRERR_SET_C(&t_mp->sbm_cm.sbdev_error, &err);
1460 		ASSERT(err == NULL);
1461 
1462 		/*
1463 		 * After the copy/rename, the original address space
1464 		 * for the source board (which is now located on the
1465 		 * target board) may now have some excess to be deleted.
1466 		 * Those excess memory on the source board are kept in
1467 		 * source board's sbm_del_mlist
1468 		 */
1469 		for (ml = s_mp->sbm_del_mlist; !s_excess_mem_deleted && ml;
1470 		    ml = ml->next) {
1471 			PR_MEM("%s: delete source excess memory", f);
1472 			PR_MEMLIST_DUMP(ml);
1473 
1474 			err = drmach_mem_del_span(s_mp->sbm_cm.sbdev_id,
1475 			    ml->address, ml->size);
1476 			if (err)
1477 				DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
1478 			ASSERT(err == NULL);
1479 		}
1480 
1481 	} else {
1482 		/* delete board's entire address space */
1483 		err = drmach_mem_del_span(s_mp->sbm_cm.sbdev_id,
1484 		    s_old_basepa & ~ sm, sz);
1485 		if (err)
1486 			DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
1487 		ASSERT(err == NULL);
1488 	}
1489 
1490 cleanup:
1491 	/* clean up target mem unit */
1492 	if (t_mp != NULL) {
1493 		memlist_delete(t_mp->sbm_del_mlist);
1494 		/* no need to delete sbm_mlist, it shares sbm_del_mlist */
1495 
1496 		t_mp->sbm_del_mlist = NULL;
1497 		t_mp->sbm_mlist = NULL;
1498 		t_mp->sbm_peer = NULL;
1499 		t_mp->sbm_flags = 0;
1500 		t_mp->sbm_cm.sbdev_busy = 0;
1501 		dr_init_mem_unit_data(t_mp);
1502 
1503 	}
1504 	if (t_mp != NULL && t_mp->sbm_cm.sbdev_error == NULL) {
1505 		/*
1506 		 * now that copy/rename has completed, undo this
1507 		 * work that was done in dr_release_mem_done.
1508 		 */
1509 		DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm);
1510 		DR_DEV_CLR_RELEASED(&t_mp->sbm_cm);
1511 		dr_device_transition(&t_mp->sbm_cm, DR_STATE_CONFIGURED);
1512 	}
1513 
1514 	/*
1515 	 * clean up (source) board's mem unit structure.
1516 	 * NOTE: sbm_mlist is retained if no error has been record (in other
1517 	 * words, when s_mp->sbm_cm.sbdev_error is NULL). This memlist is
1518 	 * referred to elsewhere as the cached memlist.  The cached memlist
1519 	 * is used to re-attach (configure back in) this memunit from the
1520 	 * unconfigured state.  The memlist is retained because it may
1521 	 * represent bad pages that were detected while the memory was
1522 	 * configured into the OS.  The OS deletes bad pages from phys_install.
1523 	 * Those deletes, if any, will be represented in the cached mlist.
1524 	 */
1525 	if (s_mp->sbm_del_mlist && s_mp->sbm_del_mlist != s_mp->sbm_mlist)
1526 		memlist_delete(s_mp->sbm_del_mlist);
1527 
1528 	if (s_mp->sbm_cm.sbdev_error && s_mp->sbm_mlist) {
1529 		memlist_delete(s_mp->sbm_mlist);
1530 		s_mp->sbm_mlist = NULL;
1531 	}
1532 
1533 	if (s_mp->sbm_dyn_segs != NULL && s_mp->sbm_cm.sbdev_error == 0) {
1534 		memlist_delete(s_mp->sbm_dyn_segs);
1535 		s_mp->sbm_dyn_segs = NULL;
1536 	}
1537 
1538 	s_mp->sbm_del_mlist = NULL;
1539 	s_mp->sbm_peer = NULL;
1540 	s_mp->sbm_flags = 0;
1541 	s_mp->sbm_cm.sbdev_busy = 0;
1542 	dr_init_mem_unit_data(s_mp);
1543 
1544 	PR_MEM("%s: cached memlist for %s:", f, s_mp->sbm_cm.sbdev_path);
1545 	PR_MEMLIST_DUMP(s_mp->sbm_mlist);
1546 
1547 	return (0);
1548 }
1549 
1550 /*
1551  * Successful return from this function will have the memory
1552  * handle in bp->b_dev[..mem-unit...].sbm_memhandle allocated
1553  * and waiting.  This routine's job is to select the memory that
1554  * actually has to be released (detached) which may not necessarily
1555  * be the same memory node that came in in devlist[],
1556  * i.e. a copy-rename is needed.
1557  */
1558 int
1559 dr_pre_release_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
1560 {
1561 	int		d;
1562 	int		err_flag = 0;
1563 	static fn_t	f = "dr_pre_release_mem";
1564 
1565 	PR_MEM("%s...\n", f);
1566 
1567 	for (d = 0; d < devnum; d++) {
1568 		dr_mem_unit_t	*mp = (dr_mem_unit_t *)devlist[d];
1569 		int		rv;
1570 		memquery_t	mq;
1571 		struct memlist	*ml;
1572 
1573 		if (mp->sbm_cm.sbdev_error) {
1574 			err_flag = 1;
1575 			continue;
1576 		} else if (!kcage_on) {
1577 			dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_KCAGE_OFF);
1578 			err_flag = 1;
1579 			continue;
1580 		}
1581 
1582 		if (mp->sbm_flags & DR_MFLAG_RESERVED) {
1583 			/*
1584 			 * Board is currently involved in a delete
1585 			 * memory operation. Can't detach this guy until
1586 			 * that operation completes.
1587 			 */
1588 			dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_INVAL);
1589 			err_flag = 1;
1590 			break;
1591 		}
1592 
1593 		/* flags should be clean at this time */
1594 		ASSERT(mp->sbm_flags == 0);
1595 
1596 		ASSERT(mp->sbm_mlist == NULL);
1597 		ASSERT(mp->sbm_del_mlist == NULL);
1598 		if (mp->sbm_mlist != NULL) {
1599 			memlist_delete(mp->sbm_mlist);
1600 			mp->sbm_mlist = NULL;
1601 		}
1602 
1603 		ml = dr_get_memlist(mp);
1604 		if (ml == NULL) {
1605 			err_flag = 1;
1606 			PR_MEM("%s: no memlist found for %s\n",
1607 			    f, mp->sbm_cm.sbdev_path);
1608 			continue;
1609 		}
1610 
1611 		/*
1612 		 * Check whether the detaching memory requires a
1613 		 * copy-rename.
1614 		 */
1615 		ASSERT(mp->sbm_npages != 0);
1616 
1617 		rv = dr_del_mlist_query(ml, &mq);
1618 		if (rv != KPHYSM_OK) {
1619 			memlist_delete(ml);
1620 			DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
1621 			err_flag = 1;
1622 			break;
1623 		}
1624 
1625 		if (mq.nonrelocatable != 0) {
1626 			if (!(dr_cmd_flags(hp) &
1627 			    (SBD_FLAG_FORCE | SBD_FLAG_QUIESCE_OKAY))) {
1628 				memlist_delete(ml);
1629 				/* caller wasn't prompted for a suspend */
1630 				dr_dev_err(CE_WARN, &mp->sbm_cm,
1631 				    ESBD_QUIESCE_REQD);
1632 				err_flag = 1;
1633 				break;
1634 			}
1635 		}
1636 
1637 		/* allocate a kphysm handle */
1638 		rv = kphysm_del_gethandle(&mp->sbm_memhandle);
1639 		if (rv != KPHYSM_OK) {
1640 			memlist_delete(ml);
1641 
1642 			DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
1643 			err_flag = 1;
1644 			break;
1645 		}
1646 		mp->sbm_flags |= DR_MFLAG_RELOWNER;
1647 
1648 		if ((mq.nonrelocatable != 0) ||
1649 		    dr_reserve_mem_spans(&mp->sbm_memhandle, ml)) {
1650 			/*
1651 			 * Either the detaching memory node contains
1652 			 * non-reloc memory or we failed to reserve the
1653 			 * detaching memory node (which did _not_ have
1654 			 * any non-reloc memory, i.e. some non-reloc mem
1655 			 * got onboard).
1656 			 */
1657 
1658 			if (dr_select_mem_target(hp, mp, ml)) {
1659 				int rv;
1660 
1661 				/*
1662 				 * We had no luck locating a target
1663 				 * memory node to be the recipient of
1664 				 * the non-reloc memory on the node
1665 				 * we're trying to detach.
1666 				 * Clean up be disposing the mem handle
1667 				 * and the mem list.
1668 				 */
1669 				rv = kphysm_del_release(mp->sbm_memhandle);
1670 				if (rv != KPHYSM_OK) {
1671 					/*
1672 					 * can do nothing but complain
1673 					 * and hope helpful for debug
1674 					 */
1675 					cmn_err(CE_WARN, "%s: unexpected"
1676 					    " kphysm_del_release return"
1677 					    " value %d",
1678 					    f, rv);
1679 				}
1680 				mp->sbm_flags &= ~DR_MFLAG_RELOWNER;
1681 
1682 				memlist_delete(ml);
1683 
1684 				/* make sure sbm_flags is clean */
1685 				ASSERT(mp->sbm_flags == 0);
1686 
1687 				dr_dev_err(CE_WARN,
1688 				    &mp->sbm_cm, ESBD_NO_TARGET);
1689 
1690 				err_flag = 1;
1691 				break;
1692 			}
1693 
1694 			/*
1695 			 * ml is not memlist_delete'd here because
1696 			 * it has been assigned to mp->sbm_mlist
1697 			 * by dr_select_mem_target.
1698 			 */
1699 		} else {
1700 			/* no target needed to detach this board */
1701 			mp->sbm_flags |= DR_MFLAG_RESERVED;
1702 			mp->sbm_peer = NULL;
1703 			mp->sbm_del_mlist = ml;
1704 			mp->sbm_mlist = ml;
1705 			mp->sbm_cm.sbdev_busy = 1;
1706 		}
1707 #ifdef DEBUG
1708 		ASSERT(mp->sbm_mlist != NULL);
1709 
1710 		if (mp->sbm_flags & DR_MFLAG_SOURCE) {
1711 			PR_MEM("%s: release of %s requires copy/rename;"
1712 			    " selected target board %s\n",
1713 			    f,
1714 			    mp->sbm_cm.sbdev_path,
1715 			    mp->sbm_peer->sbm_cm.sbdev_path);
1716 		} else {
1717 			PR_MEM("%s: copy/rename not required to release %s\n",
1718 			    f, mp->sbm_cm.sbdev_path);
1719 		}
1720 
1721 		ASSERT(mp->sbm_flags & DR_MFLAG_RELOWNER);
1722 		ASSERT(mp->sbm_flags & DR_MFLAG_RESERVED);
1723 #endif
1724 	}
1725 
1726 	return (err_flag ? -1 : 0);
1727 }
1728 
1729 void
1730 dr_release_mem_done(dr_common_unit_t *cp)
1731 {
1732 	dr_mem_unit_t	*s_mp = (dr_mem_unit_t *)cp;
1733 	dr_mem_unit_t *t_mp, *mp;
1734 	int		rv;
1735 	static fn_t	f = "dr_release_mem_done";
1736 
1737 	/*
1738 	 * This unit will be flagged with DR_MFLAG_SOURCE, if it
1739 	 * has a target unit.
1740 	 */
1741 	if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
1742 		t_mp = s_mp->sbm_peer;
1743 		ASSERT(t_mp != NULL);
1744 		ASSERT(t_mp->sbm_peer == s_mp);
1745 		ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET);
1746 		ASSERT(t_mp->sbm_flags & DR_MFLAG_RESERVED);
1747 	} else {
1748 		/* this is no target unit */
1749 		t_mp = NULL;
1750 	}
1751 
1752 	/* free delete handle */
1753 	ASSERT(s_mp->sbm_flags & DR_MFLAG_RELOWNER);
1754 	ASSERT(s_mp->sbm_flags & DR_MFLAG_RESERVED);
1755 	rv = kphysm_del_release(s_mp->sbm_memhandle);
1756 	if (rv != KPHYSM_OK) {
1757 		/*
1758 		 * can do nothing but complain
1759 		 * and hope helpful for debug
1760 		 */
1761 		cmn_err(CE_WARN, "%s: unexpected kphysm_del_release"
1762 		    " return value %d", f, rv);
1763 	}
1764 	s_mp->sbm_flags &= ~DR_MFLAG_RELOWNER;
1765 
1766 	/*
1767 	 * If an error was encountered during release, clean up
1768 	 * the source (and target, if present) unit data.
1769 	 */
1770 /* XXX Can we know that sbdev_error was encountered during release? */
1771 	if (s_mp->sbm_cm.sbdev_error != NULL) {
1772 
1773 		if (t_mp != NULL) {
1774 			ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
1775 			t_mp->sbm_del_mlist = NULL;
1776 
1777 			if (t_mp->sbm_mlist != NULL) {
1778 				memlist_delete(t_mp->sbm_mlist);
1779 				t_mp->sbm_mlist = NULL;
1780 			}
1781 
1782 			t_mp->sbm_peer = NULL;
1783 			t_mp->sbm_flags = 0;
1784 			t_mp->sbm_cm.sbdev_busy = 0;
1785 		}
1786 
1787 		if (s_mp->sbm_del_mlist != s_mp->sbm_mlist)
1788 			memlist_delete(s_mp->sbm_del_mlist);
1789 		s_mp->sbm_del_mlist = NULL;
1790 
1791 		if (s_mp->sbm_mlist != NULL) {
1792 			memlist_delete(s_mp->sbm_mlist);
1793 			s_mp->sbm_mlist = NULL;
1794 		}
1795 
1796 		s_mp->sbm_peer = NULL;
1797 		s_mp->sbm_flags = 0;
1798 		s_mp->sbm_cm.sbdev_busy = 0;
1799 
1800 		/* bail out */
1801 		return;
1802 	}
1803 
1804 	DR_DEV_SET_RELEASED(&s_mp->sbm_cm);
1805 	dr_device_transition(&s_mp->sbm_cm, DR_STATE_RELEASE);
1806 
1807 	if (t_mp != NULL) {
1808 		/*
1809 		 * the kphysm delete operation that drained the source
1810 		 * board also drained this target board.  Since the source
1811 		 * board drain is now known to have succeeded, we know this
1812 		 * target board is drained too.
1813 		 *
1814 		 * because DR_DEV_SET_RELEASED and dr_device_transition
1815 		 * is done here, the dr_release_dev_done should not
1816 		 * fail.
1817 		 */
1818 		DR_DEV_SET_RELEASED(&t_mp->sbm_cm);
1819 		dr_device_transition(&t_mp->sbm_cm, DR_STATE_RELEASE);
1820 
1821 		/*
1822 		 * NOTE: do not transition target's board state,
1823 		 * even if the mem-unit was the last configure
1824 		 * unit of the board.  When copy/rename completes
1825 		 * this mem-unit will transitioned back to
1826 		 * the configured state.  In the meantime, the
1827 		 * board's must remain as is.
1828 		 */
1829 	}
1830 
1831 	/* if board(s) had deleted memory, verify it is gone */
1832 	rv = 0;
1833 	memlist_read_lock();
1834 	if (s_mp->sbm_del_mlist != NULL) {
1835 		mp = s_mp;
1836 		rv = memlist_intersect(phys_install, mp->sbm_del_mlist);
1837 	}
1838 	if (rv == 0 && t_mp && t_mp->sbm_del_mlist != NULL) {
1839 		mp = t_mp;
1840 		rv = memlist_intersect(phys_install, mp->sbm_del_mlist);
1841 	}
1842 	memlist_read_unlock();
1843 	if (rv) {
1844 		cmn_err(CE_WARN, "%s: %smem-unit (%d.%d): "
1845 		    "deleted memory still found in phys_install",
1846 		    f,
1847 		    (mp == t_mp ? "target " : ""),
1848 		    mp->sbm_cm.sbdev_bp->b_num,
1849 		    mp->sbm_cm.sbdev_unum);
1850 
1851 		DR_DEV_INTERNAL_ERROR(&s_mp->sbm_cm);
1852 		return;
1853 	}
1854 
1855 	s_mp->sbm_flags |= DR_MFLAG_RELDONE;
1856 	if (t_mp != NULL)
1857 		t_mp->sbm_flags |= DR_MFLAG_RELDONE;
1858 
1859 	/* this should not fail */
1860 	if (dr_release_dev_done(&s_mp->sbm_cm) != 0) {
1861 		/* catch this in debug kernels */
1862 		ASSERT(0);
1863 		return;
1864 	}
1865 
1866 	PR_MEM("%s: marking %s release DONE\n",
1867 	    f, s_mp->sbm_cm.sbdev_path);
1868 
1869 	s_mp->sbm_cm.sbdev_ostate = SBD_STAT_UNCONFIGURED;
1870 
1871 	if (t_mp != NULL) {
1872 		/* should not fail */
1873 		rv = dr_release_dev_done(&t_mp->sbm_cm);
1874 		if (rv != 0) {
1875 			/* catch this in debug kernels */
1876 			ASSERT(0);
1877 			return;
1878 		}
1879 
1880 		PR_MEM("%s: marking %s release DONE\n",
1881 		    f, t_mp->sbm_cm.sbdev_path);
1882 
1883 		t_mp->sbm_cm.sbdev_ostate = SBD_STAT_UNCONFIGURED;
1884 	}
1885 }
1886 
1887 /*ARGSUSED*/
1888 int
1889 dr_disconnect_mem(dr_mem_unit_t *mp)
1890 {
1891 	static fn_t	f = "dr_disconnect_mem";
1892 	update_membounds_t umb;
1893 
1894 #ifdef DEBUG
1895 	int state = mp->sbm_cm.sbdev_state;
1896 	ASSERT(state == DR_STATE_CONNECTED ||
1897 	    state == DR_STATE_UNCONFIGURED);
1898 #endif
1899 
1900 	PR_MEM("%s...\n", f);
1901 
1902 	if (mp->sbm_del_mlist && mp->sbm_del_mlist != mp->sbm_mlist)
1903 		memlist_delete(mp->sbm_del_mlist);
1904 	mp->sbm_del_mlist = NULL;
1905 
1906 	if (mp->sbm_mlist) {
1907 		memlist_delete(mp->sbm_mlist);
1908 		mp->sbm_mlist = NULL;
1909 	}
1910 
1911 	/*
1912 	 * Remove memory from lgroup
1913 	 * For now, only board info is required.
1914 	 */
1915 	umb.u_board = mp->sbm_cm.sbdev_bp->b_num;
1916 	umb.u_base = (uint64_t)-1;
1917 	umb.u_len = (uint64_t)-1;
1918 
1919 	lgrp_plat_config(LGRP_CONFIG_MEM_DEL, (uintptr_t)&umb);
1920 
1921 	return (0);
1922 }
1923 
1924 int
1925 dr_cancel_mem(dr_mem_unit_t *s_mp)
1926 {
1927 	dr_mem_unit_t	*t_mp;
1928 	dr_state_t	state;
1929 	static fn_t	f = "dr_cancel_mem";
1930 
1931 	state = s_mp->sbm_cm.sbdev_state;
1932 
1933 	if (s_mp->sbm_flags & DR_MFLAG_TARGET) {
1934 		/* must cancel source board, not target board */
1935 		/* TODO: set error */
1936 		return (-1);
1937 	} else if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
1938 		t_mp = s_mp->sbm_peer;
1939 		ASSERT(t_mp != NULL);
1940 		ASSERT(t_mp->sbm_peer == s_mp);
1941 
1942 		/* must always match the source board's state */
1943 		/* TODO: is this assertion correct? */
1944 		ASSERT(t_mp->sbm_cm.sbdev_state == state);
1945 	} else {
1946 		/* this is no target unit */
1947 		t_mp = NULL;
1948 	}
1949 
1950 	switch (state) {
1951 	case DR_STATE_UNREFERENCED:	/* state set by dr_release_dev_done */
1952 		ASSERT((s_mp->sbm_flags & DR_MFLAG_RELOWNER) == 0);
1953 
1954 		if (t_mp != NULL && t_mp->sbm_del_mlist != NULL) {
1955 			PR_MEM("%s: undoing target %s memory delete\n",
1956 			    f, t_mp->sbm_cm.sbdev_path);
1957 			dr_add_memory_spans(t_mp, t_mp->sbm_del_mlist);
1958 
1959 			DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm);
1960 		}
1961 
1962 		if (s_mp->sbm_del_mlist != NULL) {
1963 			PR_MEM("%s: undoing %s memory delete\n",
1964 			    f, s_mp->sbm_cm.sbdev_path);
1965 
1966 			dr_add_memory_spans(s_mp, s_mp->sbm_del_mlist);
1967 		}
1968 
1969 		/*FALLTHROUGH*/
1970 
1971 /* TODO: should no longer be possible to see the release state here */
1972 	case DR_STATE_RELEASE:	/* state set by dr_release_mem_done */
1973 
1974 		ASSERT((s_mp->sbm_flags & DR_MFLAG_RELOWNER) == 0);
1975 
1976 		if (t_mp != NULL) {
1977 			ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
1978 			t_mp->sbm_del_mlist = NULL;
1979 
1980 			if (t_mp->sbm_mlist != NULL) {
1981 				memlist_delete(t_mp->sbm_mlist);
1982 				t_mp->sbm_mlist = NULL;
1983 			}
1984 
1985 			t_mp->sbm_peer = NULL;
1986 			t_mp->sbm_flags = 0;
1987 			t_mp->sbm_cm.sbdev_busy = 0;
1988 			dr_init_mem_unit_data(t_mp);
1989 
1990 			DR_DEV_CLR_RELEASED(&t_mp->sbm_cm);
1991 
1992 			dr_device_transition(
1993 			    &t_mp->sbm_cm, DR_STATE_CONFIGURED);
1994 		}
1995 
1996 		if (s_mp->sbm_del_mlist != s_mp->sbm_mlist)
1997 			memlist_delete(s_mp->sbm_del_mlist);
1998 		s_mp->sbm_del_mlist = NULL;
1999 
2000 		if (s_mp->sbm_mlist != NULL) {
2001 			memlist_delete(s_mp->sbm_mlist);
2002 			s_mp->sbm_mlist = NULL;
2003 		}
2004 
2005 		s_mp->sbm_peer = NULL;
2006 		s_mp->sbm_flags = 0;
2007 		s_mp->sbm_cm.sbdev_busy = 0;
2008 		dr_init_mem_unit_data(s_mp);
2009 
2010 		return (0);
2011 
2012 	default:
2013 		PR_MEM("%s: WARNING unexpected state (%d) for %s\n",
2014 		    f, (int)state, s_mp->sbm_cm.sbdev_path);
2015 
2016 		return (-1);
2017 	}
2018 	/*NOTREACHED*/
2019 }
2020 
2021 void
2022 dr_init_mem_unit(dr_mem_unit_t *mp)
2023 {
2024 	dr_state_t	new_state;
2025 
2026 
2027 	if (DR_DEV_IS_ATTACHED(&mp->sbm_cm)) {
2028 		new_state = DR_STATE_CONFIGURED;
2029 		mp->sbm_cm.sbdev_cond = SBD_COND_OK;
2030 	} else if (DR_DEV_IS_PRESENT(&mp->sbm_cm)) {
2031 		new_state = DR_STATE_CONNECTED;
2032 		mp->sbm_cm.sbdev_cond = SBD_COND_OK;
2033 	} else if (mp->sbm_cm.sbdev_id != (drmachid_t)0) {
2034 		new_state = DR_STATE_OCCUPIED;
2035 	} else {
2036 		new_state = DR_STATE_EMPTY;
2037 	}
2038 
2039 	if (DR_DEV_IS_PRESENT(&mp->sbm_cm))
2040 		dr_init_mem_unit_data(mp);
2041 
2042 	/* delay transition until fully initialized */
2043 	dr_device_transition(&mp->sbm_cm, new_state);
2044 }
2045 
2046 static void
2047 dr_init_mem_unit_data(dr_mem_unit_t *mp)
2048 {
2049 	drmachid_t	id = mp->sbm_cm.sbdev_id;
2050 	drmach_mem_info_t	minfo;
2051 	sbd_error_t	*err;
2052 	static fn_t	f = "dr_init_mem_unit_data";
2053 	update_membounds_t umb;
2054 
2055 	PR_MEM("%s...\n", f);
2056 
2057 	/* a little sanity checking */
2058 	ASSERT(mp->sbm_peer == NULL);
2059 	ASSERT(mp->sbm_flags == 0);
2060 
2061 	if (err = drmach_mem_get_info(id, &minfo)) {
2062 		DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
2063 		return;
2064 	}
2065 	mp->sbm_basepfn = _b64top(minfo.mi_basepa);
2066 	mp->sbm_npages = _b64top(minfo.mi_size);
2067 	mp->sbm_alignment_mask = _b64top(minfo.mi_alignment_mask);
2068 	mp->sbm_slice_size = minfo.mi_slice_size;
2069 
2070 	/*
2071 	 * Add memory to lgroup
2072 	 */
2073 	umb.u_board = mp->sbm_cm.sbdev_bp->b_num;
2074 	umb.u_base = (uint64_t)mp->sbm_basepfn << MMU_PAGESHIFT;
2075 	umb.u_len = (uint64_t)mp->sbm_npages << MMU_PAGESHIFT;
2076 
2077 	lgrp_plat_config(LGRP_CONFIG_MEM_ADD, (uintptr_t)&umb);
2078 
2079 	PR_MEM("%s: %s (basepfn = 0x%lx, npgs = %ld)\n",
2080 	    f, mp->sbm_cm.sbdev_path, mp->sbm_basepfn, mp->sbm_npages);
2081 }
2082 
2083 static int
2084 dr_reserve_mem_spans(memhandle_t *mhp, struct memlist *ml)
2085 {
2086 	int		err;
2087 	pfn_t		base;
2088 	pgcnt_t		npgs;
2089 	struct memlist	*mc;
2090 	static fn_t	f = "dr_reserve_mem_spans";
2091 
2092 	PR_MEM("%s...\n", f);
2093 
2094 	/*
2095 	 * Walk the supplied memlist scheduling each span for removal
2096 	 * with kphysm_del_span.  It is possible that a span may intersect
2097 	 * an area occupied by the cage.
2098 	 */
2099 	for (mc = ml; mc != NULL; mc = mc->next) {
2100 		base = _b64top(mc->address);
2101 		npgs = _b64top(mc->size);
2102 
2103 		err = kphysm_del_span(*mhp, base, npgs);
2104 		if (err != KPHYSM_OK) {
2105 			cmn_err(CE_WARN, "%s memory reserve failed."
2106 			    " unexpected kphysm_del_span return value %d;"
2107 			    " basepfn=0x%lx npages=%ld",
2108 			    f, err, base, npgs);
2109 
2110 			return (-1);
2111 		}
2112 	}
2113 
2114 	return (0);
2115 }
2116 
2117 #define	DR_SMT_NPREF_SETS	6
2118 #define	DR_SMT_NUNITS_PER_SET	MAX_BOARDS * MAX_MEM_UNITS_PER_BOARD
2119 
2120 /* debug counters */
2121 int dr_smt_realigned;
2122 int dr_smt_preference[DR_SMT_NPREF_SETS];
2123 
2124 #ifdef DEBUG
2125 uint_t dr_ignore_board; /* if bit[bnum-1] set, board won't be candidate */
2126 #endif
2127 
2128 /*
2129  * Find and reserve a copy/rename target board suitable for the
2130  * given source board.
2131  * All boards in the system are examined and categorized in relation to
2132  * their memory size versus the source board's memory size.  Order of
2133  * preference is:
2134  *	1st copy all source, source/target same size
2135  *	2nd copy all source, larger target
2136  * 	3rd copy nonrelocatable source span
2137  */
2138 static int
2139 dr_select_mem_target(dr_handle_t *hp,
2140 	dr_mem_unit_t *s_mp, struct memlist *s_ml)
2141 {
2142 	dr_target_pref_t preference; /* lower value is higher preference */
2143 	int		idx;
2144 	dr_mem_unit_t	**sets;
2145 
2146 	int		t_bd;
2147 	int		t_unit;
2148 	int		rv;
2149 	dr_board_t	*s_bp, *t_bp;
2150 	dr_mem_unit_t	*t_mp, *c_mp;
2151 	struct memlist	*d_ml, *t_ml, *ml, *b_ml, *x_ml = NULL;
2152 	memquery_t	s_mq = {0};
2153 	static fn_t	f = "dr_select_mem_target";
2154 
2155 	PR_MEM("%s...\n", f);
2156 
2157 	ASSERT(s_ml != NULL);
2158 
2159 	sets = GETSTRUCT(dr_mem_unit_t *, DR_SMT_NUNITS_PER_SET *
2160 	    DR_SMT_NPREF_SETS);
2161 
2162 	s_bp = hp->h_bd;
2163 	/* calculate the offset into the slice of the last source board pfn */
2164 	ASSERT(s_mp->sbm_npages != 0);
2165 
2166 	/*
2167 	 * Find non-relocatable span on source board.
2168 	 */
2169 	rv = kphysm_del_span_query(s_mp->sbm_basepfn, s_mp->sbm_npages, &s_mq);
2170 	if (rv != KPHYSM_OK) {
2171 		PR_MEM("%s: %s: unexpected kphysm_del_span_query"
2172 		    " return value %d; basepfn 0x%lx, npages %ld\n",
2173 		    f, s_mp->sbm_cm.sbdev_path, rv, s_mp->sbm_basepfn,
2174 		    s_mp->sbm_npages);
2175 		return (-1);
2176 	}
2177 
2178 	ASSERT(s_mq.phys_pages != 0);
2179 	ASSERT(s_mq.nonrelocatable != 0);
2180 
2181 	PR_MEM("%s: %s: nonrelocatable span (0x%lx..0x%lx)\n", f,
2182 	    s_mp->sbm_cm.sbdev_path, s_mq.first_nonrelocatable,
2183 	    s_mq.last_nonrelocatable);
2184 
2185 	/* break down s_ml if it contains dynamic segments */
2186 	b_ml = memlist_dup(s_ml);
2187 
2188 	for (ml = s_mp->sbm_dyn_segs; ml; ml = ml->next) {
2189 		b_ml = memlist_del_span(b_ml, ml->address, ml->size);
2190 		b_ml = memlist_cat_span(b_ml, ml->address, ml->size);
2191 	}
2192 
2193 
2194 	/*
2195 	 * Make one pass through all memory units on all boards
2196 	 * and categorize them with respect to the source board.
2197 	 */
2198 	for (t_bd = 0; t_bd < MAX_BOARDS; t_bd++) {
2199 		/*
2200 		 * The board structs are a contiguous array
2201 		 * so we take advantage of that to find the
2202 		 * correct board struct pointer for a given
2203 		 * board number.
2204 		 */
2205 		t_bp = dr_lookup_board(t_bd);
2206 
2207 		/* source board can not be its own target */
2208 		if (s_bp->b_num == t_bp->b_num)
2209 			continue;
2210 
2211 		for (t_unit = 0; t_unit < MAX_MEM_UNITS_PER_BOARD; t_unit++) {
2212 
2213 			t_mp = dr_get_mem_unit(t_bp, t_unit);
2214 
2215 			/* this memory node must be attached */
2216 			if (!DR_DEV_IS_ATTACHED(&t_mp->sbm_cm))
2217 				continue;
2218 
2219 			/* source unit can not be its own target */
2220 			if (s_mp == t_mp) {
2221 				/* catch this is debug kernels */
2222 				ASSERT(0);
2223 				continue;
2224 			}
2225 
2226 			/*
2227 			 * this memory node must not already be reserved
2228 			 * by some other memory delete operation.
2229 			 */
2230 			if (t_mp->sbm_flags & DR_MFLAG_RESERVED)
2231 				continue;
2232 
2233 			/* get target board memlist */
2234 			t_ml = dr_get_memlist(t_mp);
2235 			if (t_ml == NULL) {
2236 				cmn_err(CE_WARN, "%s: no memlist for"
2237 				    " mem-unit %d, board %d", f,
2238 				    t_mp->sbm_cm.sbdev_bp->b_num,
2239 				    t_mp->sbm_cm.sbdev_unum);
2240 				continue;
2241 			}
2242 
2243 			preference = dr_get_target_preference(hp, t_mp, s_mp,
2244 			    t_ml, s_ml, b_ml);
2245 
2246 			memlist_delete(t_ml);
2247 
2248 			if (preference == DR_TP_INVALID)
2249 				continue;
2250 
2251 			dr_smt_preference[preference]++;
2252 
2253 			/* calculate index to start of preference set */
2254 			idx  = DR_SMT_NUNITS_PER_SET * preference;
2255 			/* calculate offset to respective element */
2256 			idx += t_bd * MAX_MEM_UNITS_PER_BOARD + t_unit;
2257 
2258 			ASSERT(idx < DR_SMT_NUNITS_PER_SET * DR_SMT_NPREF_SETS);
2259 			sets[idx] = t_mp;
2260 		}
2261 	}
2262 
2263 	if (b_ml != NULL)
2264 		memlist_delete(b_ml);
2265 
2266 	/*
2267 	 * NOTE: this would be a good place to sort each candidate
2268 	 * set in to some desired order, e.g. memory size in ascending
2269 	 * order.  Without an additional sorting step here, the order
2270 	 * within a set is ascending board number order.
2271 	 */
2272 
2273 	c_mp = NULL;
2274 	x_ml = NULL;
2275 	t_ml = NULL;
2276 	for (idx = 0; idx < DR_SMT_NUNITS_PER_SET * DR_SMT_NPREF_SETS; idx++) {
2277 		memquery_t mq;
2278 
2279 		preference = (dr_target_pref_t)(idx / DR_SMT_NUNITS_PER_SET);
2280 
2281 		ASSERT(preference != DR_TP_INVALID);
2282 
2283 		/* cleanup t_ml after previous pass */
2284 		if (t_ml != NULL) {
2285 			memlist_delete(t_ml);
2286 			t_ml = NULL;
2287 		}
2288 
2289 		/* get candidate target board mem unit */
2290 		t_mp = sets[idx];
2291 		if (t_mp == NULL)
2292 			continue;
2293 
2294 		/* get target board memlist */
2295 		t_ml = dr_get_memlist(t_mp);
2296 		if (t_ml == NULL) {
2297 			cmn_err(CE_WARN, "%s: no memlist for"
2298 			    " mem-unit %d, board %d",
2299 			    f,
2300 			    t_mp->sbm_cm.sbdev_bp->b_num,
2301 			    t_mp->sbm_cm.sbdev_unum);
2302 
2303 			continue;
2304 		}
2305 
2306 		PR_MEM("%s: checking for no-reloc in %s, "
2307 		    " basepfn=0x%lx, npages=%ld\n",
2308 		    f,
2309 		    t_mp->sbm_cm.sbdev_path,
2310 		    t_mp->sbm_basepfn,
2311 		    t_mp->sbm_npages);
2312 
2313 		rv = dr_del_mlist_query(t_ml, &mq);
2314 		if (rv != KPHYSM_OK) {
2315 			PR_MEM("%s: kphysm_del_span_query:"
2316 			    " unexpected return value %d\n", f, rv);
2317 
2318 			continue;
2319 		}
2320 
2321 		if (mq.nonrelocatable != 0) {
2322 			PR_MEM("%s: candidate %s has"
2323 			    " nonrelocatable span [0x%lx..0x%lx]\n",
2324 			    f,
2325 			    t_mp->sbm_cm.sbdev_path,
2326 			    mq.first_nonrelocatable,
2327 			    mq.last_nonrelocatable);
2328 
2329 			continue;
2330 		}
2331 
2332 #ifdef DEBUG
2333 		/*
2334 		 * This is a debug tool for excluding certain boards
2335 		 * from being selected as a target board candidate.
2336 		 * dr_ignore_board is only tested by this driver.
2337 		 * It must be set with adb, obp, /etc/system or your
2338 		 * favorite debugger.
2339 		 */
2340 		if (dr_ignore_board &
2341 		    (1 << (t_mp->sbm_cm.sbdev_bp->b_num - 1))) {
2342 			PR_MEM("%s: dr_ignore_board flag set,"
2343 			    " ignoring %s as candidate\n",
2344 			    f, t_mp->sbm_cm.sbdev_path);
2345 			continue;
2346 		}
2347 #endif
2348 
2349 		/*
2350 		 * Reserve excess source board memory, if any.
2351 		 *
2352 		 * Only the nonrelocatable source span will be copied
2353 		 * so schedule the rest of the source mem to be deleted.
2354 		 */
2355 		switch (preference) {
2356 		case DR_TP_NONRELOC:
2357 			/*
2358 			 * Get source copy memlist and use it to construct
2359 			 * delete memlist.
2360 			 */
2361 			d_ml = memlist_dup(s_ml);
2362 			x_ml = dr_get_copy_mlist(s_ml, t_ml, s_mp, t_mp);
2363 
2364 			/* XXX */
2365 			ASSERT(d_ml != NULL);
2366 			ASSERT(x_ml != NULL);
2367 
2368 			for (ml = x_ml; ml != NULL; ml = ml->next) {
2369 				d_ml = memlist_del_span(d_ml, ml->address,
2370 				    ml->size);
2371 			}
2372 
2373 			PR_MEM("%s: %s: reserving src brd memlist:\n", f,
2374 			    s_mp->sbm_cm.sbdev_path);
2375 			PR_MEMLIST_DUMP(d_ml);
2376 
2377 			/* reserve excess spans */
2378 			if (dr_reserve_mem_spans(&s_mp->sbm_memhandle,
2379 			    d_ml) != 0) {
2380 				/* likely more non-reloc pages appeared */
2381 				/* TODO: restart from top? */
2382 				continue;
2383 			}
2384 			break;
2385 		default:
2386 			d_ml = NULL;
2387 			break;
2388 		}
2389 
2390 		s_mp->sbm_flags |= DR_MFLAG_RESERVED;
2391 
2392 		/*
2393 		 * reserve all memory on target board.
2394 		 * NOTE: source board's memhandle is used.
2395 		 *
2396 		 * If this succeeds (eq 0), then target selection is
2397 		 * complete and all unwanted memory spans, both source and
2398 		 * target, have been reserved.  Loop is terminated.
2399 		 */
2400 		if (dr_reserve_mem_spans(&s_mp->sbm_memhandle, t_ml) == 0) {
2401 			PR_MEM("%s: %s: target board memory reserved\n",
2402 			    f, t_mp->sbm_cm.sbdev_path);
2403 
2404 			/* a candidate target board is now reserved */
2405 			t_mp->sbm_flags |= DR_MFLAG_RESERVED;
2406 			c_mp = t_mp;
2407 
2408 			/* *** EXITING LOOP *** */
2409 			break;
2410 		}
2411 
2412 		/* did not successfully reserve the target board. */
2413 		PR_MEM("%s: could not reserve target %s\n",
2414 		    f, t_mp->sbm_cm.sbdev_path);
2415 
2416 		/*
2417 		 * NOTE: an undo of the dr_reserve_mem_span work
2418 		 * will happen automatically when the memhandle
2419 		 * (s_mp->sbm_memhandle) is kphysm_del_release'd.
2420 		 */
2421 
2422 		s_mp->sbm_flags &= ~DR_MFLAG_RESERVED;
2423 	}
2424 
2425 	/* clean up after memlist editing logic */
2426 	if (x_ml != NULL)
2427 		memlist_delete(x_ml);
2428 
2429 	FREESTRUCT(sets, dr_mem_unit_t *, DR_SMT_NUNITS_PER_SET *
2430 	    DR_SMT_NPREF_SETS);
2431 
2432 	/*
2433 	 * c_mp will be NULL when the entire sets[] array
2434 	 * has been searched without reserving a target board.
2435 	 */
2436 	if (c_mp == NULL) {
2437 		PR_MEM("%s: %s: target selection failed.\n",
2438 		    f, s_mp->sbm_cm.sbdev_path);
2439 
2440 		if (t_ml != NULL)
2441 			memlist_delete(t_ml);
2442 
2443 		return (-1);
2444 	}
2445 
2446 	PR_MEM("%s: found target %s for source %s\n",
2447 	    f,
2448 	    c_mp->sbm_cm.sbdev_path,
2449 	    s_mp->sbm_cm.sbdev_path);
2450 
2451 	s_mp->sbm_peer = c_mp;
2452 	s_mp->sbm_flags |= DR_MFLAG_SOURCE;
2453 	s_mp->sbm_del_mlist = d_ml;	/* spans to be deleted, if any */
2454 	s_mp->sbm_mlist = s_ml;
2455 	s_mp->sbm_cm.sbdev_busy = 1;
2456 
2457 	c_mp->sbm_peer = s_mp;
2458 	c_mp->sbm_flags |= DR_MFLAG_TARGET;
2459 	c_mp->sbm_del_mlist = t_ml;	/* spans to be deleted */
2460 	c_mp->sbm_mlist = t_ml;
2461 	c_mp->sbm_cm.sbdev_busy = 1;
2462 
2463 	return (0);
2464 }
2465 
2466 /*
2467  * Returns target preference rank:
2468  *     -1 not a valid copy-rename target board
2469  *	0 copy all source, source/target same size
2470  *	1 copy all source, larger target
2471  * 	2 copy nonrelocatable source span
2472  */
2473 static dr_target_pref_t
2474 dr_get_target_preference(dr_handle_t *hp,
2475     dr_mem_unit_t *t_mp, dr_mem_unit_t *s_mp,
2476     struct memlist *t_ml, struct memlist *s_ml,
2477     struct memlist *b_ml)
2478 {
2479 	dr_target_pref_t preference;
2480 	struct memlist *s_nonreloc_ml = NULL;
2481 	drmachid_t t_id;
2482 	static fn_t	f = "dr_get_target_preference";
2483 
2484 	t_id = t_mp->sbm_cm.sbdev_bp->b_id;
2485 
2486 	/*
2487 	 * Can the entire source board be copied?
2488 	 */
2489 	if (dr_memlist_canfit(s_ml, t_ml, s_mp, t_mp)) {
2490 		if (s_mp->sbm_npages == t_mp->sbm_npages)
2491 			preference = DR_TP_SAME;	/* same size */
2492 		else
2493 			preference = DR_TP_LARGE;	/* larger target */
2494 	} else {
2495 		/*
2496 		 * Entire source won't fit so try non-relocatable memory only
2497 		 * (target aligned).
2498 		 */
2499 		s_nonreloc_ml = dr_get_nonreloc_mlist(b_ml, s_mp);
2500 		if (s_nonreloc_ml == NULL) {
2501 			PR_MEM("%s: dr_get_nonreloc_mlist failed\n", f);
2502 			preference = DR_TP_INVALID;
2503 		}
2504 		if (dr_memlist_canfit(s_nonreloc_ml, t_ml, s_mp, t_mp))
2505 			preference = DR_TP_NONRELOC;
2506 		else
2507 			preference = DR_TP_INVALID;
2508 	}
2509 
2510 	if (s_nonreloc_ml != NULL)
2511 		memlist_delete(s_nonreloc_ml);
2512 
2513 	/*
2514 	 * Force floating board preference lower than all other boards
2515 	 * if the force flag is present; otherwise disallow the board.
2516 	 */
2517 	if ((preference != DR_TP_INVALID) && drmach_board_is_floating(t_id)) {
2518 		if (dr_cmd_flags(hp) & SBD_FLAG_FORCE)
2519 			preference += DR_TP_FLOATING;
2520 		else
2521 			preference = DR_TP_INVALID;
2522 	}
2523 
2524 	PR_MEM("%s: %s preference=%d\n", f, t_mp->sbm_cm.sbdev_path,
2525 	    preference);
2526 
2527 	return (preference);
2528 }
2529 
2530 /*
2531  * Create a memlist representing the source memory that will be copied to
2532  * the target board.  The memory to be copied is the maximum amount that
2533  * will fit on the target board.
2534  */
2535 static struct memlist *
2536 dr_get_copy_mlist(struct memlist *s_mlist, struct memlist *t_mlist,
2537     dr_mem_unit_t *s_mp, dr_mem_unit_t *t_mp)
2538 {
2539 	struct memlist	*t_ml, *s_copy_ml, *s_del_ml, *ml, *x_ml;
2540 	uint64_t	s_slice_mask, s_slice_base;
2541 	uint64_t	t_slice_mask, t_slice_base;
2542 	static fn_t	f = "dr_get_copy_mlist";
2543 
2544 	ASSERT(s_mlist != NULL);
2545 	ASSERT(t_mlist != NULL);
2546 	ASSERT(t_mp->sbm_slice_size == s_mp->sbm_slice_size);
2547 
2548 	s_slice_mask = s_mp->sbm_slice_size - 1;
2549 	s_slice_base = s_mlist->address & ~s_slice_mask;
2550 
2551 	t_slice_mask = t_mp->sbm_slice_size - 1;
2552 	t_slice_base = t_mlist->address & ~t_slice_mask;
2553 
2554 	t_ml = memlist_dup(t_mlist);
2555 	s_del_ml = memlist_dup(s_mlist);
2556 	s_copy_ml = memlist_dup(s_mlist);
2557 
2558 	/* XXX */
2559 	ASSERT(t_ml != NULL);
2560 	ASSERT(s_del_ml != NULL);
2561 	ASSERT(s_copy_ml != NULL);
2562 
2563 	/*
2564 	 * To construct the source copy memlist:
2565 	 *
2566 	 * The target memlist is converted to the post-rename
2567 	 * source addresses.  This is the physical address range
2568 	 * the target will have after the copy-rename.  Overlaying
2569 	 * and deleting this from the current source memlist will
2570 	 * give the source delete memlist.  The copy memlist is
2571 	 * the reciprocal of the source delete memlist.
2572 	 */
2573 	for (ml = t_ml; ml != NULL; ml = ml->next) {
2574 		/*
2575 		 * Normalize relative to target slice base PA
2576 		 * in order to preseve slice offsets.
2577 		 */
2578 		ml->address -= t_slice_base;
2579 		/*
2580 		 * Convert to source slice PA address.
2581 		 */
2582 		ml->address += s_slice_base;
2583 	}
2584 
2585 	for (ml = t_ml; ml != NULL; ml = ml->next) {
2586 		s_del_ml = memlist_del_span(s_del_ml, ml->address, ml->size);
2587 	}
2588 
2589 	/*
2590 	 * Expand the delete mlist to fully include any dynamic segments
2591 	 * it intersects with.
2592 	 */
2593 	for (x_ml = NULL, ml = s_del_ml; ml != NULL; ml = ml->next) {
2594 		uint64_t del_base = ml->address;
2595 		uint64_t del_end = ml->address + ml->size;
2596 		struct memlist *dyn;
2597 
2598 		for (dyn = s_mp->sbm_dyn_segs; dyn != NULL; dyn = dyn->next) {
2599 			uint64_t dyn_base = dyn->address;
2600 			uint64_t dyn_end = dyn->address + dyn->size;
2601 
2602 			if (del_base > dyn_base && del_base < dyn_end)
2603 				del_base = dyn_base;
2604 
2605 			if (del_end > dyn_base && del_end < dyn_end)
2606 				del_end = dyn_end;
2607 		}
2608 
2609 		x_ml = memlist_cat_span(x_ml, del_base, del_end - del_base);
2610 	}
2611 
2612 	memlist_delete(s_del_ml);
2613 	s_del_ml = x_ml;
2614 
2615 	for (ml = s_del_ml; ml != NULL; ml = ml->next) {
2616 		s_copy_ml = memlist_del_span(s_copy_ml, ml->address, ml->size);
2617 	}
2618 
2619 	PR_MEM("%s: source delete mlist\n", f);
2620 	PR_MEMLIST_DUMP(s_del_ml);
2621 
2622 	PR_MEM("%s: source copy mlist\n", f);
2623 	PR_MEMLIST_DUMP(s_copy_ml);
2624 
2625 	memlist_delete(t_ml);
2626 	memlist_delete(s_del_ml);
2627 
2628 	return (s_copy_ml);
2629 }
2630 
2631 /*
2632  * Scan the non-relocatable spans on the source memory
2633  * and construct a minimum mlist that includes all non-reloc
2634  * memory subject to target alignment, and dynamic segment
2635  * constraints where only whole dynamic segments may be deleted.
2636  */
2637 static struct memlist *
2638 dr_get_nonreloc_mlist(struct memlist *s_ml, dr_mem_unit_t *s_mp)
2639 {
2640 	struct memlist	*x_ml = NULL;
2641 	struct memlist	*ml;
2642 	static fn_t	f = "dr_get_nonreloc_mlist";
2643 
2644 	PR_MEM("%s: checking for split of dyn seg list:\n", f);
2645 	PR_MEMLIST_DUMP(s_mp->sbm_dyn_segs);
2646 
2647 	for (ml = s_ml; ml; ml = ml->next) {
2648 		int rv;
2649 		uint64_t nr_base, nr_end;
2650 		memquery_t mq;
2651 		struct memlist *dyn;
2652 
2653 		rv = kphysm_del_span_query(
2654 		    _b64top(ml->address), _b64top(ml->size), &mq);
2655 		if (rv) {
2656 			memlist_delete(x_ml);
2657 			return (NULL);
2658 		}
2659 
2660 		if (mq.nonrelocatable == 0)
2661 			continue;
2662 
2663 		PR_MEM("%s: non-reloc span: 0x%lx, 0x%lx (%lx, %lx)\n", f,
2664 		    _ptob64(mq.first_nonrelocatable),
2665 		    _ptob64(mq.last_nonrelocatable),
2666 		    mq.first_nonrelocatable,
2667 		    mq.last_nonrelocatable);
2668 
2669 		/*
2670 		 * Align the span at both ends to allow for possible
2671 		 * cage expansion.
2672 		 */
2673 		nr_base = _ptob64(mq.first_nonrelocatable);
2674 		nr_end = _ptob64(mq.last_nonrelocatable + 1);
2675 
2676 		PR_MEM("%s: adjusted non-reloc span: 0x%lx, 0x%lx\n",
2677 		    f, nr_base, nr_end);
2678 
2679 		/*
2680 		 * Expand the non-reloc span to fully include any
2681 		 * dynamic segments it intersects with.
2682 		 */
2683 		for (dyn = s_mp->sbm_dyn_segs; dyn != NULL; dyn = dyn->next) {
2684 			uint64_t dyn_base = dyn->address;
2685 			uint64_t dyn_end = dyn->address + dyn->size;
2686 
2687 			if (nr_base > dyn_base && nr_base < dyn_end)
2688 				nr_base = dyn_base;
2689 
2690 			if (nr_end > dyn_base && nr_end < dyn_end)
2691 				nr_end = dyn_end;
2692 		}
2693 
2694 		x_ml = memlist_cat_span(x_ml, nr_base, nr_end - nr_base);
2695 	}
2696 
2697 	if (x_ml == NULL) {
2698 		PR_MEM("%s: source didn't have any non-reloc pages!\n", f);
2699 		return (NULL);
2700 	}
2701 
2702 	PR_MEM("%s: %s: edited source memlist:\n", f, s_mp->sbm_cm.sbdev_path);
2703 	PR_MEMLIST_DUMP(x_ml);
2704 
2705 	return (x_ml);
2706 }
2707 
2708 /*
2709  * Check if source memlist can fit in target memlist while maintaining
2710  * relative offsets within board.
2711  */
2712 static int
2713 dr_memlist_canfit(struct memlist *s_mlist, struct memlist *t_mlist,
2714     dr_mem_unit_t *s_mp, dr_mem_unit_t *t_mp)
2715 {
2716 	int		canfit = 0;
2717 	struct memlist	*s_ml, *t_ml, *ml;
2718 	uint64_t	s_slice_mask, t_slice_mask;
2719 	static fn_t	f = "dr_mlist_canfit";
2720 
2721 	s_ml = memlist_dup(s_mlist);
2722 	t_ml = memlist_dup(t_mlist);
2723 
2724 	if (s_ml == NULL || t_ml == NULL) {
2725 		cmn_err(CE_WARN, "%s: memlist_dup failed\n", f);
2726 		goto done;
2727 	}
2728 
2729 	s_slice_mask = s_mp->sbm_slice_size - 1;
2730 	t_slice_mask = t_mp->sbm_slice_size - 1;
2731 
2732 	/*
2733 	 * Normalize to slice relative offsets.
2734 	 */
2735 	for (ml = s_ml; ml; ml = ml->next)
2736 		ml->address &= s_slice_mask;
2737 
2738 	for (ml = t_ml; ml; ml = ml->next)
2739 		ml->address &= t_slice_mask;
2740 
2741 	canfit = memlist_canfit(s_ml, t_ml);
2742 done:
2743 	memlist_delete(s_ml);
2744 	memlist_delete(t_ml);
2745 
2746 	return (canfit);
2747 }
2748 
2749 /*
2750  * Memlist support.
2751  */
2752 
2753 /*
2754  * Determine whether the source memlist (s_mlist) will
2755  * fit into the target memlist (t_mlist) in terms of
2756  * size and holes.  Assumes the caller has normalized the
2757  * memlist physical addresses for comparison.
2758  */
2759 static int
2760 memlist_canfit(struct memlist *s_mlist, struct memlist *t_mlist)
2761 {
2762 	int		rv = 0;
2763 	struct memlist	*s_ml, *t_ml;
2764 
2765 	if ((s_mlist == NULL) || (t_mlist == NULL))
2766 		return (0);
2767 
2768 	s_ml = s_mlist;
2769 	for (t_ml = t_mlist; t_ml && s_ml; t_ml = t_ml->next) {
2770 		uint64_t	s_start, s_end;
2771 		uint64_t	t_start, t_end;
2772 
2773 		t_start = t_ml->address;
2774 		t_end = t_start + t_ml->size;
2775 
2776 		for (; s_ml; s_ml = s_ml->next) {
2777 			s_start = s_ml->address;
2778 			s_end = s_start + s_ml->size;
2779 
2780 			if ((s_start < t_start) || (s_end > t_end))
2781 				break;
2782 		}
2783 	}
2784 
2785 	/*
2786 	 * If we ran out of source memlist chunks that mean
2787 	 * we found a home for all of them.
2788 	 */
2789 	if (s_ml == NULL)
2790 		rv = 1;
2791 
2792 	return (rv);
2793 }
2794