xref: /titanic_51/usr/src/uts/sun4u/opl/io/dr_mem.c (revision bfe60e20c2f727eab7a71b13a2183a856ae0c22f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * DR memory support routines.
30  */
31 
32 #include <sys/note.h>
33 #include <sys/debug.h>
34 #include <sys/types.h>
35 #include <sys/errno.h>
36 #include <sys/param.h>
37 #include <sys/dditypes.h>
38 #include <sys/kmem.h>
39 #include <sys/conf.h>
40 #include <sys/ddi.h>
41 #include <sys/sunddi.h>
42 #include <sys/sunndi.h>
43 #include <sys/ddi_impldefs.h>
44 #include <sys/ndi_impldefs.h>
45 #include <sys/sysmacros.h>
46 #include <sys/machsystm.h>
47 #include <sys/spitregs.h>
48 #include <sys/cpuvar.h>
49 #include <sys/promif.h>
50 #include <vm/seg_kmem.h>
51 #include <sys/lgrp.h>
52 #include <sys/platform_module.h>
53 
54 #include <vm/page.h>
55 
56 #include <sys/dr.h>
57 #include <sys/dr_util.h>
58 #include <sys/drmach.h>
59 #include <sys/kobj.h>
60 
61 extern struct memlist	*phys_install;
62 extern vnode_t		*retired_pages;
63 
64 /* TODO: push this reference below drmach line */
65 extern int		kcage_on;
66 
67 /* for the DR*INTERNAL_ERROR macros.  see sys/dr.h. */
68 static char *dr_ie_fmt = "dr_mem.c %d";
69 
70 typedef enum {
71 	DR_TP_INVALID = -1,
72 	DR_TP_SAME,
73 	DR_TP_LARGE,
74 	DR_TP_NONRELOC,
75 	DR_TP_FLOATING
76 } dr_target_pref_t;
77 
78 static int		dr_post_detach_mem_unit(dr_mem_unit_t *mp);
79 static int		dr_reserve_mem_spans(memhandle_t *mhp,
80 				struct memlist *mlist);
81 static int		dr_select_mem_target(dr_handle_t *hp,
82 				dr_mem_unit_t *mp, struct memlist *ml);
83 static void		dr_init_mem_unit_data(dr_mem_unit_t *mp);
84 static struct memlist	*dr_memlist_del_retired_pages(struct memlist *ml);
85 static dr_target_pref_t	dr_get_target_preference(dr_handle_t *hp,
86 				dr_mem_unit_t *t_mp, dr_mem_unit_t *s_mp,
87 				struct memlist *s_ml, struct memlist *x_ml,
88 				struct memlist *b_ml);
89 
90 static int		memlist_canfit(struct memlist *s_mlist,
91 				struct memlist *t_mlist);
92 static int		dr_del_mlist_query(struct memlist *mlist,
93 				memquery_t *mp);
94 static struct memlist	*dr_get_copy_mlist(struct memlist *s_ml,
95 				struct memlist *t_ml, dr_mem_unit_t *s_mp,
96 				dr_mem_unit_t *t_mp);
97 static struct memlist	*dr_get_nonreloc_mlist(struct memlist *s_ml,
98 				dr_mem_unit_t *s_mp);
99 static int		dr_memlist_canfit(struct memlist *s_mlist,
100 				struct memlist *t_mlist, dr_mem_unit_t *s_mp,
101 				dr_mem_unit_t *t_mp);
102 
103 /*
104  * dr_mem_unit_t.sbm_flags
105  */
106 #define	DR_MFLAG_RESERVED	0x01	/* mem unit reserved for delete */
107 #define	DR_MFLAG_SOURCE		0x02	/* source brd of copy/rename op */
108 #define	DR_MFLAG_TARGET		0x04	/* target brd of copy/rename op */
109 #define	DR_MFLAG_RELOWNER	0x20	/* memory release (delete) owner */
110 #define	DR_MFLAG_RELDONE	0x40	/* memory release (delete) done */
111 
112 /* helper macros */
113 #define	_ptob64(p) ((uint64_t)(p) << PAGESHIFT)
114 #define	_b64top(b) ((pgcnt_t)((b) >> PAGESHIFT))
115 
116 static struct memlist *
117 dr_get_memlist(dr_mem_unit_t *mp)
118 {
119 	struct memlist	*mlist = NULL;
120 	sbd_error_t	*err;
121 	static fn_t	f = "dr_get_memlist";
122 
123 	PR_MEM("%s for %s...\n", f, mp->sbm_cm.sbdev_path);
124 
125 	/*
126 	 * Return cached memlist, if present.
127 	 * This memlist will be present following an
128 	 * unconfigure (a.k.a: detach) of this memunit.
129 	 * It should only be used in the case were a configure
130 	 * is bringing this memunit back in without going
131 	 * through the disconnect and connect states.
132 	 */
133 	if (mp->sbm_mlist) {
134 		PR_MEM("%s: found cached memlist\n", f);
135 
136 		mlist = memlist_dup(mp->sbm_mlist);
137 	} else {
138 		uint64_t basepa = _ptob64(mp->sbm_basepfn);
139 
140 		/* attempt to construct a memlist using phys_install */
141 
142 		/* round down to slice base address */
143 		basepa &= ~(mp->sbm_slice_size - 1);
144 
145 		/* get a copy of phys_install to edit */
146 		memlist_read_lock();
147 		mlist = memlist_dup(phys_install);
148 		memlist_read_unlock();
149 
150 		/* trim lower irrelevant span */
151 		if (mlist)
152 			mlist = memlist_del_span(mlist, 0ull, basepa);
153 
154 		/* trim upper irrelevant span */
155 		if (mlist) {
156 			uint64_t endpa;
157 
158 			basepa += mp->sbm_slice_size;
159 			endpa = _ptob64(physmax + 1);
160 			if (endpa > basepa)
161 				mlist = memlist_del_span(
162 					mlist, basepa,
163 					endpa - basepa);
164 		}
165 
166 		if (mlist) {
167 			/* successfully built a memlist */
168 			PR_MEM("%s: derived memlist from phys_install\n", f);
169 		}
170 
171 		/* if no mlist yet, try platform layer */
172 		if (!mlist) {
173 			err = drmach_mem_get_memlist(
174 				mp->sbm_cm.sbdev_id, &mlist);
175 			if (err) {
176 				DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
177 				mlist = NULL; /* paranoia */
178 			}
179 		}
180 	}
181 
182 	PR_MEM("%s: memlist for %s\n", f, mp->sbm_cm.sbdev_path);
183 	PR_MEMLIST_DUMP(mlist);
184 
185 	return (mlist);
186 }
187 
188 typedef struct {
189 	kcondvar_t cond;
190 	kmutex_t lock;
191 	int error;
192 	int done;
193 } dr_release_mem_sync_t;
194 
195 /*
196  * Memory has been logically removed by the time this routine is called.
197  */
198 static void
199 dr_mem_del_done(void *arg, int error)
200 {
201 	dr_release_mem_sync_t *ds = arg;
202 
203 	mutex_enter(&ds->lock);
204 	ds->error = error;
205 	ds->done = 1;
206 	cv_signal(&ds->cond);
207 	mutex_exit(&ds->lock);
208 }
209 
210 /*
211  * When we reach here the memory being drained should have
212  * already been reserved in dr_pre_release_mem().
213  * Our only task here is to kick off the "drain" and wait
214  * for it to finish.
215  */
216 void
217 dr_release_mem(dr_common_unit_t *cp)
218 {
219 	dr_mem_unit_t	*mp = (dr_mem_unit_t *)cp;
220 	int		err;
221 	dr_release_mem_sync_t rms;
222 	static fn_t	f = "dr_release_mem";
223 
224 	/* check that this memory unit has been reserved */
225 	if (!(mp->sbm_flags & DR_MFLAG_RELOWNER)) {
226 		DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
227 		return;
228 	}
229 
230 	bzero((void *) &rms, sizeof (rms));
231 
232 	mutex_init(&rms.lock, NULL, MUTEX_DRIVER, NULL);
233 	cv_init(&rms.cond, NULL, CV_DRIVER, NULL);
234 
235 	mutex_enter(&rms.lock);
236 	err = kphysm_del_start(mp->sbm_memhandle,
237 		dr_mem_del_done, (void *) &rms);
238 	if (err == KPHYSM_OK) {
239 		/* wait for completion or interrupt */
240 		while (!rms.done) {
241 			if (cv_wait_sig(&rms.cond, &rms.lock) == 0) {
242 				/* then there is a pending UNIX signal */
243 				(void) kphysm_del_cancel(mp->sbm_memhandle);
244 
245 				/* wait for completion */
246 				while (!rms.done)
247 					cv_wait(&rms.cond, &rms.lock);
248 			}
249 		}
250 		/* get the result of the memory delete operation */
251 		err = rms.error;
252 	}
253 	mutex_exit(&rms.lock);
254 
255 	cv_destroy(&rms.cond);
256 	mutex_destroy(&rms.lock);
257 
258 	if (err != KPHYSM_OK) {
259 		int e_code;
260 
261 		switch (err) {
262 			case KPHYSM_ENOWORK:
263 				e_code = ESBD_NOERROR;
264 				break;
265 
266 			case KPHYSM_EHANDLE:
267 			case KPHYSM_ESEQUENCE:
268 				e_code = ESBD_INTERNAL;
269 				break;
270 
271 			case KPHYSM_ENOTVIABLE:
272 				e_code = ESBD_MEM_NOTVIABLE;
273 				break;
274 
275 			case KPHYSM_EREFUSED:
276 				e_code = ESBD_MEM_REFUSED;
277 				break;
278 
279 			case KPHYSM_ENONRELOC:
280 				e_code = ESBD_MEM_NONRELOC;
281 				break;
282 
283 			case KPHYSM_ECANCELLED:
284 				e_code = ESBD_MEM_CANCELLED;
285 				break;
286 
287 			case KPHYSM_ERESOURCE:
288 				e_code = ESBD_MEMFAIL;
289 				break;
290 
291 			default:
292 				cmn_err(CE_WARN,
293 					"%s: unexpected kphysm error code %d,"
294 					" id 0x%p",
295 					f, err, mp->sbm_cm.sbdev_id);
296 
297 				e_code = ESBD_IO;
298 				break;
299 		}
300 
301 		if (e_code != ESBD_NOERROR) {
302 			dr_dev_err(CE_IGNORE, &mp->sbm_cm, e_code);
303 		}
304 	}
305 }
306 
307 void
308 dr_attach_mem(dr_handle_t *hp, dr_common_unit_t *cp)
309 {
310 	_NOTE(ARGUNUSED(hp))
311 
312 	dr_mem_unit_t	*mp = (dr_mem_unit_t *)cp;
313 	struct memlist	*ml, *mc;
314 	sbd_error_t	*err;
315 	static fn_t	f = "dr_attach_mem";
316 
317 	PR_MEM("%s...\n", f);
318 
319 	dr_lock_status(hp->h_bd);
320 	err = drmach_configure(cp->sbdev_id, 0);
321 	dr_unlock_status(hp->h_bd);
322 	if (err) {
323 		DRERR_SET_C(&cp->sbdev_error, &err);
324 		return;
325 	}
326 
327 	ml = dr_get_memlist(mp);
328 	for (mc = ml; mc; mc = mc->next) {
329 		int		 rv;
330 		sbd_error_t	*err;
331 
332 		rv = kphysm_add_memory_dynamic(
333 			(pfn_t)(mc->address >> PAGESHIFT),
334 			(pgcnt_t)(mc->size >> PAGESHIFT));
335 		if (rv != KPHYSM_OK) {
336 			/*
337 			 * translate kphysm error and
338 			 * store in devlist error
339 			 */
340 			switch (rv) {
341 			case KPHYSM_ERESOURCE:
342 				rv = ESBD_NOMEM;
343 				break;
344 
345 			case KPHYSM_EFAULT:
346 				rv = ESBD_FAULT;
347 				break;
348 
349 			default:
350 				rv = ESBD_INTERNAL;
351 				break;
352 			}
353 
354 			if (rv == ESBD_INTERNAL) {
355 				DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
356 			} else
357 				dr_dev_err(CE_WARN, &mp->sbm_cm, rv);
358 			break;
359 		}
360 
361 		err = drmach_mem_add_span(
362 			mp->sbm_cm.sbdev_id, mc->address, mc->size);
363 		if (err) {
364 			DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
365 			break;
366 		}
367 	}
368 
369 	memlist_delete(ml);
370 
371 	/* back out if configure failed */
372 	if (mp->sbm_cm.sbdev_error != NULL) {
373 		dr_lock_status(hp->h_bd);
374 		err = drmach_unconfigure(cp->sbdev_id, 0);
375 		if (err)
376 			sbd_err_clear(&err);
377 		dr_unlock_status(hp->h_bd);
378 	}
379 }
380 
381 static struct memlist *
382 dr_memlist_del_retired_pages(struct memlist *mlist)
383 {
384 	page_t		*pp;
385 	pfn_t		pfn;
386 	kmutex_t	*vphm;
387 	vnode_t		*vp = retired_pages;
388 	static fn_t	f = "dr_memlist_del_retired_pages";
389 
390 	vphm = page_vnode_mutex(vp);
391 	mutex_enter(vphm);
392 
393 	PR_MEM("%s\n", f);
394 
395 	if ((pp = vp->v_pages) == NULL) {
396 		mutex_exit(vphm);
397 		return (mlist);
398 	}
399 
400 	do {
401 		ASSERT(pp != NULL);
402 		/*
403 		 * page_downgrade happens after page_hashin, so we
404 		 * can't assert PAGE_SE. Just assert locked to catch
405 		 * changes to the retired vnode locking scheme.
406 		 */
407 		ASSERT(PAGE_LOCKED(pp));
408 		ASSERT(pp->p_vnode == retired_pages);
409 
410 		if (!page_trylock(pp, SE_SHARED))
411 			continue;
412 
413 		pfn = page_pptonum(pp);
414 
415 		ASSERT((pp->p_offset >> PAGESHIFT) == pfn);
416 		/*
417 		 * Page retirement currently breaks large pages into PAGESIZE
418 		 * pages. If this changes, need to remove the assert and deal
419 		 * with different page sizes.
420 		 */
421 		ASSERT(pp->p_szc == 0);
422 
423 		if (address_in_memlist(mlist, ptob(pfn), PAGESIZE)) {
424 			mlist = memlist_del_span(mlist, ptob(pfn), PAGESIZE);
425 			PR_MEM("deleted retired page 0x%lx (pfn 0x%lx) "
426 			    "from memlist\n", ptob(pfn), pfn);
427 		}
428 
429 		page_unlock(pp);
430 	} while ((pp = pp->p_vpnext) != vp->v_pages);
431 
432 	mutex_exit(vphm);
433 
434 	return (mlist);
435 }
436 
437 static int
438 dr_move_memory(dr_handle_t *hp, dr_mem_unit_t *s_mp, dr_mem_unit_t *t_mp)
439 {
440 	int		rv = -1;
441 	time_t		 copytime;
442 	drmachid_t	 cr_id;
443 	dr_sr_handle_t	*srhp = NULL;
444 	dr_board_t	*t_bp, *s_bp;
445 	struct memlist	*c_ml, *d_ml;
446 	sbd_error_t	*err;
447 	static fn_t	 f = "dr_move_memory";
448 
449 	PR_MEM("%s: (INLINE) moving memory from %s to %s\n",
450 		f,
451 		s_mp->sbm_cm.sbdev_path,
452 		t_mp->sbm_cm.sbdev_path);
453 
454 	ASSERT(s_mp->sbm_flags & DR_MFLAG_SOURCE);
455 	ASSERT(s_mp->sbm_peer == t_mp);
456 	ASSERT(s_mp->sbm_mlist);
457 
458 	ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET);
459 	ASSERT(t_mp->sbm_peer == s_mp);
460 
461 	/*
462 	 * create a memlist of spans to copy by removing
463 	 * the spans that have been deleted, if any, from
464 	 * the full source board memlist.  s_mp->sbm_del_mlist
465 	 * will be NULL if there were no spans deleted from
466 	 * the source board.
467 	 */
468 	c_ml = memlist_dup(s_mp->sbm_mlist);
469 	d_ml = s_mp->sbm_del_mlist;
470 	while (d_ml != NULL) {
471 		c_ml = memlist_del_span(c_ml, d_ml->address, d_ml->size);
472 		d_ml = d_ml->next;
473 	}
474 
475 	/*
476 	 * Remove retired pages from the copy list. The page content
477 	 * need not be copied since the pages are no longer in use.
478 	 */
479 	PR_MEM("%s: copy list before removing retired pages (if any):\n", f);
480 	PR_MEMLIST_DUMP(c_ml);
481 
482 	c_ml = dr_memlist_del_retired_pages(c_ml);
483 
484 	PR_MEM("%s: copy list after removing retired pages:\n", f);
485 	PR_MEMLIST_DUMP(c_ml);
486 
487 	/*
488 	 * With parallel copy, it shouldn't make a difference which
489 	 * CPU is the actual master during copy-rename since all
490 	 * CPUs participate in the parallel copy anyway.
491 	 */
492 	affinity_set(CPU_CURRENT);
493 
494 	err = drmach_copy_rename_init(
495 		t_mp->sbm_cm.sbdev_id, s_mp->sbm_cm.sbdev_id, c_ml, &cr_id);
496 	if (err) {
497 		DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
498 		affinity_clear();
499 		memlist_delete(c_ml);
500 		return (-1);
501 	}
502 
503 	srhp = dr_get_sr_handle(hp);
504 	ASSERT(srhp);
505 
506 	copytime = lbolt;
507 
508 	/* Quiesce the OS.  */
509 	if (dr_suspend(srhp)) {
510 		cmn_err(CE_WARN, "%s: failed to quiesce OS"
511 			" for copy-rename", f);
512 
513 		err = drmach_copy_rename_fini(cr_id);
514 		if (err) {
515 			/*
516 			 * no error is expected since the program has
517 			 * not yet run.
518 			 */
519 
520 			/* catch this in debug kernels */
521 			ASSERT(0);
522 
523 			sbd_err_clear(&err);
524 		}
525 
526 		/* suspend error reached via hp */
527 		s_mp->sbm_cm.sbdev_error = hp->h_err;
528 		hp->h_err = NULL;
529 		goto done;
530 	}
531 
532 	drmach_copy_rename(cr_id);
533 
534 	/* Resume the OS.  */
535 	dr_resume(srhp);
536 
537 	copytime = lbolt - copytime;
538 
539 	if (err = drmach_copy_rename_fini(cr_id))
540 		goto done;
541 
542 	/*
543 	 * Rename memory for lgroup.
544 	 * Source and target board numbers are packaged in arg.
545 	 */
546 	s_bp = s_mp->sbm_cm.sbdev_bp;
547 	t_bp = t_mp->sbm_cm.sbdev_bp;
548 
549 	lgrp_plat_config(LGRP_CONFIG_MEM_RENAME,
550 		(uintptr_t)(s_bp->b_num | (t_bp->b_num << 16)));
551 
552 
553 	PR_MEM("%s: copy-rename elapsed time = %ld ticks (%ld secs)\n",
554 		f, copytime, copytime / hz);
555 
556 	rv = 0;
557 done:
558 	if (srhp)
559 		dr_release_sr_handle(srhp);
560 	if (err)
561 		DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
562 	affinity_clear();
563 
564 	return (rv);
565 }
566 
567 /*
568  * If detaching node contains memory that is "non-permanent"
569  * then the memory adr's are simply cleared.  If the memory
570  * is non-relocatable, then do a copy-rename.
571  */
572 void
573 dr_detach_mem(dr_handle_t *hp, dr_common_unit_t *cp)
574 {
575 	int			rv = 0;
576 	dr_mem_unit_t		*s_mp = (dr_mem_unit_t *)cp;
577 	dr_mem_unit_t		*t_mp;
578 	dr_state_t		state;
579 	static fn_t		f = "dr_detach_mem";
580 
581 	PR_MEM("%s...\n", f);
582 
583 	/* lookup target mem unit and target board structure, if any */
584 	if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
585 		t_mp = s_mp->sbm_peer;
586 		ASSERT(t_mp != NULL);
587 		ASSERT(t_mp->sbm_peer == s_mp);
588 	} else {
589 		t_mp = NULL;
590 	}
591 
592 	/* verify mem unit's state is UNREFERENCED */
593 	state = s_mp->sbm_cm.sbdev_state;
594 	if (state != DR_STATE_UNREFERENCED) {
595 		dr_dev_err(CE_IGNORE, &s_mp->sbm_cm, ESBD_STATE);
596 		return;
597 	}
598 
599 	/* verify target mem unit's state is UNREFERENCED, if any */
600 	if (t_mp != NULL) {
601 		state = t_mp->sbm_cm.sbdev_state;
602 		if (state != DR_STATE_UNREFERENCED) {
603 			dr_dev_err(CE_IGNORE, &t_mp->sbm_cm, ESBD_STATE);
604 			return;
605 		}
606 	}
607 
608 	/*
609 	 * If there is no target board (no copy/rename was needed), then
610 	 * we're done!
611 	 */
612 	if (t_mp == NULL) {
613 		sbd_error_t *err;
614 		/*
615 		 * Reprogram interconnect hardware and disable
616 		 * memory controllers for memory node that's going away.
617 		 */
618 
619 		err = drmach_mem_disable(s_mp->sbm_cm.sbdev_id);
620 		if (err) {
621 			DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
622 			rv = -1;
623 		}
624 	} else {
625 		rv = dr_move_memory(hp, s_mp, t_mp);
626 		PR_MEM("%s: %s memory COPY-RENAME (board %d -> %d)\n",
627 			f,
628 			rv ? "FAILED" : "COMPLETED",
629 			s_mp->sbm_cm.sbdev_bp->b_num,
630 			t_mp->sbm_cm.sbdev_bp->b_num);
631 
632 		if (rv != 0)
633 			(void) dr_cancel_mem(s_mp);
634 	}
635 
636 	if (rv == 0) {
637 		sbd_error_t *err;
638 
639 		dr_lock_status(hp->h_bd);
640 		err = drmach_unconfigure(s_mp->sbm_cm.sbdev_id, 0);
641 		dr_unlock_status(hp->h_bd);
642 		if (err)
643 			sbd_err_clear(&err);
644 	}
645 }
646 
647 /*
648  * This routine acts as a wrapper for kphysm_del_span_query in order to
649  * support potential memory holes in a board's physical address space.
650  * It calls kphysm_del_span_query for each node in a memlist and accumulates
651  * the results in *mp.
652  */
653 static int
654 dr_del_mlist_query(struct memlist *mlist, memquery_t *mp)
655 {
656 	struct memlist	*ml;
657 	int		 rv = 0;
658 
659 
660 	if (mlist == NULL)
661 		cmn_err(CE_WARN, "dr_del_mlist_query: mlist=NULL\n");
662 
663 	mp->phys_pages = 0;
664 	mp->managed = 0;
665 	mp->nonrelocatable = 0;
666 	mp->first_nonrelocatable = (pfn_t)-1;	/* XXX */
667 	mp->last_nonrelocatable = 0;
668 
669 	for (ml = mlist; ml; ml = ml->next) {
670 		memquery_t mq;
671 
672 		rv = kphysm_del_span_query(
673 			_b64top(ml->address), _b64top(ml->size), &mq);
674 		if (rv)
675 			break;
676 
677 		mp->phys_pages += mq.phys_pages;
678 		mp->managed += mq.managed;
679 		mp->nonrelocatable += mq.nonrelocatable;
680 
681 		if (mq.nonrelocatable != 0) {
682 			if (mq.first_nonrelocatable < mp->first_nonrelocatable)
683 				mp->first_nonrelocatable =
684 					mq.first_nonrelocatable;
685 			if (mq.last_nonrelocatable > mp->last_nonrelocatable)
686 				mp->last_nonrelocatable =
687 					mq.last_nonrelocatable;
688 		}
689 	}
690 
691 	if (mp->nonrelocatable == 0)
692 		mp->first_nonrelocatable = 0;	/* XXX */
693 
694 	return (rv);
695 }
696 
697 /*
698  * NOTE: This routine is only partially smart about multiple
699  *	 mem-units.  Need to make mem-status structure smart
700  *	 about them also.
701  */
702 int
703 dr_mem_status(dr_handle_t *hp, dr_devset_t devset, sbd_dev_stat_t *dsp)
704 {
705 	int		m, mix;
706 	memdelstat_t	mdst;
707 	memquery_t	mq;
708 	dr_board_t	*bp;
709 	dr_mem_unit_t	*mp;
710 	sbd_mem_stat_t	*msp;
711 	static fn_t	f = "dr_mem_status";
712 
713 	bp = hp->h_bd;
714 	devset &= DR_DEVS_PRESENT(bp);
715 
716 	for (m = mix = 0; m < MAX_MEM_UNITS_PER_BOARD; m++) {
717 		int		rv;
718 		sbd_error_t	*err;
719 		drmach_status_t	 pstat;
720 		dr_mem_unit_t	*p_mp;
721 
722 		if (DEVSET_IN_SET(devset, SBD_COMP_MEM, m) == 0)
723 			continue;
724 
725 		mp = dr_get_mem_unit(bp, m);
726 
727 		if (mp->sbm_cm.sbdev_state == DR_STATE_EMPTY) {
728 			/* present, but not fully initialized */
729 			continue;
730 		}
731 
732 		if (mp->sbm_cm.sbdev_id == (drmachid_t)0)
733 			continue;
734 
735 		/* fetch platform status */
736 		err = drmach_status(mp->sbm_cm.sbdev_id, &pstat);
737 		if (err) {
738 			DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
739 			continue;
740 		}
741 
742 		msp = &dsp->d_mem;
743 		bzero((caddr_t)msp, sizeof (*msp));
744 
745 		strncpy(msp->ms_cm.c_id.c_name, pstat.type,
746 			sizeof (msp->ms_cm.c_id.c_name));
747 		msp->ms_cm.c_id.c_type = mp->sbm_cm.sbdev_type;
748 		msp->ms_cm.c_id.c_unit = SBD_NULL_UNIT;
749 		msp->ms_cm.c_cond = mp->sbm_cm.sbdev_cond;
750 		msp->ms_cm.c_busy = mp->sbm_cm.sbdev_busy | pstat.busy;
751 		msp->ms_cm.c_time = mp->sbm_cm.sbdev_time;
752 		msp->ms_cm.c_ostate = mp->sbm_cm.sbdev_ostate;
753 
754 		msp->ms_totpages = mp->sbm_npages;
755 		msp->ms_basepfn = mp->sbm_basepfn;
756 		msp->ms_pageslost = mp->sbm_pageslost;
757 		msp->ms_cage_enabled = kcage_on;
758 
759 		if (mp->sbm_flags & DR_MFLAG_RESERVED)
760 			p_mp = mp->sbm_peer;
761 		else
762 			p_mp = NULL;
763 
764 		if (p_mp == NULL) {
765 			msp->ms_peer_is_target = 0;
766 			msp->ms_peer_ap_id[0] = '\0';
767 		} else if (p_mp->sbm_flags & DR_MFLAG_RESERVED) {
768 			char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
769 			char *minor;
770 
771 			/*
772 			 * b_dip doesn't have to be held for ddi_pathname()
773 			 * because the board struct (dr_board_t) will be
774 			 * destroyed before b_dip detaches.
775 			 */
776 			(void) ddi_pathname(bp->b_dip, path);
777 			minor = strchr(p_mp->sbm_cm.sbdev_path, ':');
778 
779 			snprintf(msp->ms_peer_ap_id,
780 			    sizeof (msp->ms_peer_ap_id), "%s%s",
781 			    path, (minor == NULL) ? "" : minor);
782 
783 			kmem_free(path, MAXPATHLEN);
784 
785 			if (p_mp->sbm_flags & DR_MFLAG_TARGET)
786 				msp->ms_peer_is_target = 1;
787 		}
788 
789 		if (mp->sbm_flags & DR_MFLAG_RELOWNER)
790 			rv = kphysm_del_status(mp->sbm_memhandle, &mdst);
791 		else
792 			rv = KPHYSM_EHANDLE;	/* force 'if' to fail */
793 
794 		if (rv == KPHYSM_OK) {
795 			/*
796 			 * Any pages above managed is "free",
797 			 * i.e. it's collected.
798 			 */
799 			msp->ms_detpages += (uint_t)(mdst.collected +
800 			    mdst.phys_pages - mdst.managed);
801 		} else {
802 			/*
803 			 * If we're UNREFERENCED or UNCONFIGURED,
804 			 * then the number of detached pages is
805 			 * however many pages are on the board.
806 			 * I.e. detached = not in use by OS.
807 			 */
808 			switch (msp->ms_cm.c_ostate) {
809 			/*
810 			 * changed to use cfgadm states
811 			 *
812 			 * was:
813 			 *	case DR_STATE_UNREFERENCED:
814 			 *	case DR_STATE_UNCONFIGURED:
815 			 */
816 			case SBD_STAT_UNCONFIGURED:
817 				msp->ms_detpages = msp->ms_totpages;
818 				break;
819 
820 			default:
821 				break;
822 			}
823 		}
824 
825 		/*
826 		 * kphysm_del_span_query can report non-reloc pages = total
827 		 * pages for memory that is not yet configured
828 		 */
829 		if (mp->sbm_cm.sbdev_state != DR_STATE_UNCONFIGURED) {
830 			struct memlist *ml;
831 
832 			ml = dr_get_memlist(mp);
833 			rv = ml ? dr_del_mlist_query(ml, &mq) : -1;
834 			memlist_delete(ml);
835 
836 			if (rv == KPHYSM_OK) {
837 				msp->ms_managed_pages = mq.managed;
838 				msp->ms_noreloc_pages = mq.nonrelocatable;
839 				msp->ms_noreloc_first =
840 				    mq.first_nonrelocatable;
841 				msp->ms_noreloc_last =
842 				    mq.last_nonrelocatable;
843 				msp->ms_cm.c_sflags = 0;
844 				if (mq.nonrelocatable) {
845 					SBD_SET_SUSPEND(SBD_CMD_UNCONFIGURE,
846 					    msp->ms_cm.c_sflags);
847 				}
848 			} else {
849 				PR_MEM("%s: kphysm_del_span_query() = %d\n",
850 				    f, rv);
851 			}
852 		}
853 
854 		/*
855 		 * Check source unit state during copy-rename
856 		 */
857 		if ((mp->sbm_flags & DR_MFLAG_SOURCE) &&
858 		    (mp->sbm_cm.sbdev_state == DR_STATE_UNREFERENCED ||
859 		    mp->sbm_cm.sbdev_state == DR_STATE_RELEASE))
860 			msp->ms_cm.c_ostate = SBD_STAT_CONFIGURED;
861 
862 		mix++;
863 		dsp++;
864 	}
865 
866 	return (mix);
867 }
868 
869 int
870 dr_pre_attach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
871 {
872 	_NOTE(ARGUNUSED(hp))
873 
874 	int		err_flag = 0;
875 	int		d;
876 	sbd_error_t	*err;
877 	static fn_t	f = "dr_pre_attach_mem";
878 
879 	PR_MEM("%s...\n", f);
880 
881 	for (d = 0; d < devnum; d++) {
882 		dr_mem_unit_t	*mp = (dr_mem_unit_t *)devlist[d];
883 		dr_state_t	state;
884 
885 		cmn_err(CE_CONT, "OS configure %s", mp->sbm_cm.sbdev_path);
886 
887 		state = mp->sbm_cm.sbdev_state;
888 		switch (state) {
889 		case DR_STATE_UNCONFIGURED:
890 			PR_MEM("%s: recovering from UNCONFIG for %s\n",
891 				f,
892 				mp->sbm_cm.sbdev_path);
893 
894 			/* use memlist cached by dr_post_detach_mem_unit */
895 			ASSERT(mp->sbm_mlist != NULL);
896 			PR_MEM("%s: re-configuring cached memlist for %s:\n",
897 				f, mp->sbm_cm.sbdev_path);
898 			PR_MEMLIST_DUMP(mp->sbm_mlist);
899 
900 			/* kphysm del handle should be have been freed */
901 			ASSERT((mp->sbm_flags & DR_MFLAG_RELOWNER) == 0);
902 
903 			/*FALLTHROUGH*/
904 
905 		case DR_STATE_CONNECTED:
906 			PR_MEM("%s: reprogramming mem hardware on %s\n",
907 				f, mp->sbm_cm.sbdev_bp->b_path);
908 
909 			PR_MEM("%s: enabling %s\n",
910 				f, mp->sbm_cm.sbdev_path);
911 
912 			err = drmach_mem_enable(mp->sbm_cm.sbdev_id);
913 			if (err) {
914 				DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
915 				err_flag = 1;
916 			}
917 			break;
918 
919 		default:
920 			dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_STATE);
921 			err_flag = 1;
922 			break;
923 		}
924 
925 		/* exit for loop if error encountered */
926 		if (err_flag)
927 			break;
928 	}
929 
930 	return (err_flag ? -1 : 0);
931 }
932 
933 static void
934 dr_update_mc_memory()
935 {
936 	void		(*mc_update_mlist)(void);
937 
938 	/*
939 	 * mc-opl is configured during drmach_mem_new but the memory
940 	 * has not been added to phys_install at that time.
941 	 * we must inform mc-opl to update the mlist after we
942 	 * attach or detach a system board.
943 	 */
944 
945 	mc_update_mlist = (void (*)(void))
946 	    modgetsymvalue("opl_mc_update_mlist", 0);
947 
948 	if (mc_update_mlist != NULL) {
949 		(*mc_update_mlist)();
950 	}
951 }
952 
953 int
954 dr_post_attach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
955 {
956 	_NOTE(ARGUNUSED(hp))
957 
958 	int		d;
959 	static fn_t	f = "dr_post_attach_mem";
960 
961 	PR_MEM("%s...\n", f);
962 
963 	for (d = 0; d < devnum; d++) {
964 		dr_mem_unit_t	*mp = (dr_mem_unit_t *)devlist[d];
965 		struct memlist	*mlist, *ml;
966 
967 		mlist = dr_get_memlist(mp);
968 		if (mlist == NULL) {
969 			/* OPL supports memoryless board */
970 			continue;
971 		}
972 
973 		/*
974 		 * Verify the memory really did successfully attach
975 		 * by checking for its existence in phys_install.
976 		 */
977 		memlist_read_lock();
978 		if (memlist_intersect(phys_install, mlist) == 0) {
979 			memlist_read_unlock();
980 
981 			DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
982 
983 			PR_MEM("%s: %s memlist not in phys_install",
984 				f, mp->sbm_cm.sbdev_path);
985 
986 			memlist_delete(mlist);
987 			continue;
988 		}
989 		memlist_read_unlock();
990 
991 		for (ml = mlist; ml != NULL; ml = ml->next) {
992 			sbd_error_t *err;
993 
994 			err = drmach_mem_add_span(
995 				mp->sbm_cm.sbdev_id,
996 				ml->address,
997 				ml->size);
998 			if (err)
999 				DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
1000 		}
1001 
1002 		memlist_delete(mlist);
1003 
1004 		/*
1005 		 * Destroy cached memlist, if any.
1006 		 * There will be a cached memlist in sbm_mlist if
1007 		 * this board is being configured directly after
1008 		 * an unconfigure.
1009 		 * To support this transition, dr_post_detach_mem
1010 		 * left a copy of the last known memlist in sbm_mlist.
1011 		 * This memlist could differ from any derived from
1012 		 * hardware if while this memunit was last configured
1013 		 * the system detected and deleted bad pages from
1014 		 * phys_install.  The location of those bad pages
1015 		 * will be reflected in the cached memlist.
1016 		 */
1017 		if (mp->sbm_mlist) {
1018 			memlist_delete(mp->sbm_mlist);
1019 			mp->sbm_mlist = NULL;
1020 		}
1021 	}
1022 
1023 	dr_update_mc_memory();
1024 
1025 	return (0);
1026 }
1027 
1028 int
1029 dr_pre_detach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
1030 {
1031 	_NOTE(ARGUNUSED(hp))
1032 
1033 	int d;
1034 
1035 	for (d = 0; d < devnum; d++) {
1036 		dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d];
1037 
1038 		cmn_err(CE_CONT, "OS unconfigure %s", mp->sbm_cm.sbdev_path);
1039 	}
1040 
1041 	return (0);
1042 }
1043 
1044 int
1045 dr_post_detach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
1046 {
1047 	_NOTE(ARGUNUSED(hp))
1048 
1049 	int		d, rv;
1050 	static fn_t	f = "dr_post_detach_mem";
1051 
1052 	PR_MEM("%s...\n", f);
1053 
1054 	rv = 0;
1055 	for (d = 0; d < devnum; d++) {
1056 		dr_mem_unit_t	*mp = (dr_mem_unit_t *)devlist[d];
1057 
1058 		ASSERT(mp->sbm_cm.sbdev_bp == hp->h_bd);
1059 
1060 		if (dr_post_detach_mem_unit(mp))
1061 			rv = -1;
1062 	}
1063 	dr_update_mc_memory();
1064 
1065 	return (rv);
1066 }
1067 
1068 static void
1069 dr_add_memory_spans(dr_mem_unit_t *mp, struct memlist *ml)
1070 {
1071 	static fn_t	f = "dr_add_memory_spans";
1072 
1073 	PR_MEM("%s...", f);
1074 	PR_MEMLIST_DUMP(ml);
1075 
1076 #ifdef DEBUG
1077 	memlist_read_lock();
1078 	if (memlist_intersect(phys_install, ml)) {
1079 		PR_MEM("%s:WARNING: memlist intersects with phys_install\n", f);
1080 	}
1081 	memlist_read_unlock();
1082 #endif
1083 
1084 	for (; ml; ml = ml->next) {
1085 		pfn_t		 base;
1086 		pgcnt_t		 npgs;
1087 		int		 rv;
1088 		sbd_error_t	*err;
1089 
1090 		base = _b64top(ml->address);
1091 		npgs = _b64top(ml->size);
1092 
1093 		rv = kphysm_add_memory_dynamic(base, npgs);
1094 
1095 		err = drmach_mem_add_span(
1096 			mp->sbm_cm.sbdev_id,
1097 			ml->address,
1098 			ml->size);
1099 
1100 		if (err)
1101 			DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
1102 
1103 		if (rv != KPHYSM_OK) {
1104 			cmn_err(CE_WARN, "%s:"
1105 				" unexpected kphysm_add_memory_dynamic"
1106 				" return value %d;"
1107 				" basepfn=0x%lx, npages=%ld\n",
1108 				f, rv, base, npgs);
1109 
1110 			continue;
1111 		}
1112 	}
1113 }
1114 
1115 static int
1116 memlist_touch(struct memlist *ml, uint64_t add)
1117 {
1118 	while (ml != NULL) {
1119 		if ((add == ml->address) ||
1120 			(add == (ml->address + ml->size)))
1121 			return (1);
1122 		ml = ml->next;
1123 	}
1124 	return (0);
1125 }
1126 
1127 static sbd_error_t *
1128 dr_process_excess_mlist(dr_mem_unit_t *s_mp,
1129 	dr_mem_unit_t *t_mp, struct memlist *t_excess_mlist)
1130 {
1131 	struct memlist	*ml;
1132 	sbd_error_t	*err;
1133 	static fn_t	f = "dr_process_excess_mlist";
1134 	uint64_t	new_pa, nbytes;
1135 	int rv;
1136 
1137 	err = NULL;
1138 
1139 	/*
1140 	 * After the small <-> big copy-rename,
1141 	 * the original address space for the
1142 	 * source board may have excess to be
1143 	 * deleted. This is a case different
1144 	 * from the big->small excess source
1145 	 * memory case listed below.
1146 	 * Remove s_mp->sbm_del_mlist from
1147 	 * the kernel cage glist.
1148 	 */
1149 	for (ml = s_mp->sbm_del_mlist; ml;
1150 		ml = ml->next) {
1151 		PR_MEM("%s: delete small<->big copy-"
1152 		    "rename source excess memory", f);
1153 		PR_MEMLIST_DUMP(ml);
1154 
1155 		err = drmach_mem_del_span(
1156 			s_mp->sbm_cm.sbdev_id,
1157 			    ml->address, ml->size);
1158 		if (err)
1159 			DRERR_SET_C(&s_mp->
1160 			    sbm_cm.sbdev_error, &err);
1161 		ASSERT(err == NULL);
1162 	}
1163 
1164 	PR_MEM("%s: adding back remaining portion"
1165 		" of %s, memlist:\n",
1166 		f, t_mp->sbm_cm.sbdev_path);
1167 	PR_MEMLIST_DUMP(t_excess_mlist);
1168 
1169 	for (ml = t_excess_mlist; ml; ml = ml->next) {
1170 	    struct memlist ml0;
1171 
1172 	    ml0.address = ml->address;
1173 	    ml0.size = ml->size;
1174 	    ml0.next = ml0.prev = NULL;
1175 
1176 	/*
1177 	 * If the memory object is 256 MB aligned (max page size
1178 	 * on OPL, it will not be coalesced to the adjacent memory
1179 	 * chunks.  The coalesce logic assumes contiguous page
1180 	 * structures for contiguous memory and we hit panic.
1181 	 * For anything less than 256 MB alignment, we have
1182 	 * to make sure that it is not adjacent to anything.
1183 	 * If the new chunk is adjacent to phys_install, we
1184 	 * truncate it to 4MB boundary.  4 MB is somewhat
1185 	 * arbitrary.  However we do not want to create
1186 	 * very small segments because they can cause problem.
1187 	 * The extreme case of 8K segment will fail
1188 	 * kphysm_add_memory_dynamic(), e.g.
1189 	 */
1190 	    if ((ml->address & (MH_MPSS_ALIGNMENT - 1)) ||
1191 		(ml->size & (MH_MPSS_ALIGNMENT - 1))) {
1192 
1193 		memlist_read_lock();
1194 		rv = memlist_touch(phys_install, ml0.address);
1195 		memlist_read_unlock();
1196 
1197 		if (rv) {
1198 		    new_pa = roundup(ml0.address + 1, MH_MIN_ALIGNMENT);
1199 		    nbytes = (new_pa -  ml0.address);
1200 		    if (nbytes >= ml0.size) {
1201 			t_mp->sbm_dyn_segs =
1202 			    memlist_del_span(t_mp->sbm_dyn_segs,
1203 				ml0.address, ml0.size);
1204 			continue;
1205 		    }
1206 		    t_mp->sbm_dyn_segs =
1207 			memlist_del_span(t_mp->sbm_dyn_segs,
1208 			    ml0.address, nbytes);
1209 		    ml0.size -= nbytes;
1210 		    ml0.address = new_pa;
1211 		}
1212 
1213 		if (ml0.size == 0) {
1214 		    continue;
1215 		}
1216 
1217 		memlist_read_lock();
1218 		rv = memlist_touch(phys_install, ml0.address + ml0.size);
1219 		memlist_read_unlock();
1220 
1221 		if (rv) {
1222 		    new_pa = rounddown(ml0.address + ml0.size - 1,
1223 			MH_MIN_ALIGNMENT);
1224 		    nbytes = (ml0.address + ml0.size - new_pa);
1225 		    if (nbytes >= ml0.size) {
1226 			t_mp->sbm_dyn_segs =
1227 			    memlist_del_span(t_mp->sbm_dyn_segs,
1228 				ml0.address, ml0.size);
1229 			continue;
1230 		    }
1231 		    t_mp->sbm_dyn_segs =
1232 			memlist_del_span(t_mp->sbm_dyn_segs,
1233 			    new_pa, nbytes);
1234 		    ml0.size -= nbytes;
1235 		}
1236 
1237 		if (ml0.size > 0) {
1238 		    dr_add_memory_spans(s_mp, &ml0);
1239 		}
1240 	    } else if (ml0.size > 0) {
1241 		dr_add_memory_spans(s_mp, &ml0);
1242 	    }
1243 	}
1244 	memlist_delete(t_excess_mlist);
1245 	return (err);
1246 }
1247 
1248 static int
1249 dr_post_detach_mem_unit(dr_mem_unit_t *s_mp)
1250 {
1251 	uint64_t	sz = s_mp->sbm_slice_size;
1252 	uint64_t	sm = sz - 1;
1253 	/* old and new below refer to PAs before and after copy-rename */
1254 	uint64_t	s_old_basepa, s_new_basepa;
1255 	uint64_t	t_old_basepa, t_new_basepa;
1256 	dr_mem_unit_t	*t_mp, *x_mp;
1257 	drmach_mem_info_t	minfo;
1258 	struct memlist	*ml;
1259 	struct memlist	*t_excess_mlist;
1260 	int		rv;
1261 	int		s_excess_mem_deleted = 0;
1262 	sbd_error_t	*err;
1263 	static fn_t	f = "dr_post_detach_mem_unit";
1264 
1265 	PR_MEM("%s...\n", f);
1266 
1267 	/* s_mp->sbm_del_mlist could be NULL, meaning no deleted spans */
1268 	PR_MEM("%s: %s: deleted memlist (EMPTY maybe okay):\n",
1269 		f, s_mp->sbm_cm.sbdev_path);
1270 	PR_MEMLIST_DUMP(s_mp->sbm_del_mlist);
1271 
1272 	/* sanity check */
1273 	ASSERT(s_mp->sbm_del_mlist == NULL ||
1274 		(s_mp->sbm_flags & DR_MFLAG_RELDONE) != 0);
1275 
1276 	if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
1277 		t_mp = s_mp->sbm_peer;
1278 		ASSERT(t_mp != NULL);
1279 		ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET);
1280 		ASSERT(t_mp->sbm_peer == s_mp);
1281 
1282 		ASSERT(t_mp->sbm_flags & DR_MFLAG_RELDONE);
1283 		ASSERT(t_mp->sbm_del_mlist);
1284 
1285 		PR_MEM("%s: target %s: deleted memlist:\n",
1286 			f, t_mp->sbm_cm.sbdev_path);
1287 		PR_MEMLIST_DUMP(t_mp->sbm_del_mlist);
1288 	} else {
1289 		/* this is no target unit */
1290 		t_mp = NULL;
1291 	}
1292 
1293 	/*
1294 	 * Verify the memory really did successfully detach
1295 	 * by checking for its non-existence in phys_install.
1296 	 */
1297 	rv = 0;
1298 	memlist_read_lock();
1299 	if (s_mp->sbm_flags & DR_MFLAG_RELDONE) {
1300 		x_mp = s_mp;
1301 		rv = memlist_intersect(phys_install, x_mp->sbm_del_mlist);
1302 	}
1303 	if (rv == 0 && t_mp && (t_mp->sbm_flags & DR_MFLAG_RELDONE)) {
1304 		x_mp = t_mp;
1305 		rv = memlist_intersect(phys_install, x_mp->sbm_del_mlist);
1306 	}
1307 	memlist_read_unlock();
1308 
1309 	if (rv) {
1310 		/* error: memlist still in phys_install */
1311 		DR_DEV_INTERNAL_ERROR(&x_mp->sbm_cm);
1312 	}
1313 
1314 	/*
1315 	 * clean mem unit state and bail out if an error has been recorded.
1316 	 */
1317 	rv = 0;
1318 	if (s_mp->sbm_cm.sbdev_error) {
1319 		PR_MEM("%s: %s flags=%x", f,
1320 			s_mp->sbm_cm.sbdev_path, s_mp->sbm_flags);
1321 		DR_DEV_CLR_UNREFERENCED(&s_mp->sbm_cm);
1322 		DR_DEV_CLR_RELEASED(&s_mp->sbm_cm);
1323 		dr_device_transition(&s_mp->sbm_cm, DR_STATE_CONFIGURED);
1324 		rv = -1;
1325 	}
1326 	if (t_mp != NULL && t_mp->sbm_cm.sbdev_error != NULL) {
1327 		PR_MEM("%s: %s flags=%x", f,
1328 			s_mp->sbm_cm.sbdev_path, s_mp->sbm_flags);
1329 		DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm);
1330 		DR_DEV_CLR_RELEASED(&t_mp->sbm_cm);
1331 		dr_device_transition(&t_mp->sbm_cm, DR_STATE_CONFIGURED);
1332 		rv = -1;
1333 	}
1334 	if (rv)
1335 		goto cleanup;
1336 
1337 	s_old_basepa = _ptob64(s_mp->sbm_basepfn);
1338 	err = drmach_mem_get_info(s_mp->sbm_cm.sbdev_id, &minfo);
1339 	ASSERT(err == NULL);
1340 	s_new_basepa = minfo.mi_basepa;
1341 
1342 	PR_MEM("%s:s_old_basepa: 0x%lx\n", f, s_old_basepa);
1343 	PR_MEM("%s:s_new_basepa: 0x%lx\n", f, s_new_basepa);
1344 
1345 	if (t_mp != NULL) {
1346 		struct memlist *s_copy_mlist;
1347 
1348 		t_old_basepa = _ptob64(t_mp->sbm_basepfn);
1349 		err = drmach_mem_get_info(t_mp->sbm_cm.sbdev_id, &minfo);
1350 		ASSERT(err == NULL);
1351 		t_new_basepa = minfo.mi_basepa;
1352 
1353 		PR_MEM("%s:t_old_basepa: 0x%lx\n", f, t_old_basepa);
1354 		PR_MEM("%s:t_new_basepa: 0x%lx\n", f, t_new_basepa);
1355 
1356 		/*
1357 		 * Construct copy list with original source addresses.
1358 		 * Used to add back excess target mem.
1359 		 */
1360 		s_copy_mlist = memlist_dup(s_mp->sbm_mlist);
1361 		for (ml = s_mp->sbm_del_mlist; ml; ml = ml->next) {
1362 			s_copy_mlist = memlist_del_span(s_copy_mlist,
1363 			    ml->address, ml->size);
1364 		}
1365 
1366 		PR_MEM("%s: source copy list:\n:", f);
1367 		PR_MEMLIST_DUMP(s_copy_mlist);
1368 
1369 		/*
1370 		 * We had to swap mem-units, so update
1371 		 * memlists accordingly with new base
1372 		 * addresses.
1373 		 */
1374 		for (ml = t_mp->sbm_mlist; ml; ml = ml->next) {
1375 			ml->address -= t_old_basepa;
1376 			ml->address += t_new_basepa;
1377 		}
1378 
1379 		/*
1380 		 * There is no need to explicitly rename the target delete
1381 		 * memlist, because sbm_del_mlist and sbm_mlist always
1382 		 * point to the same memlist for a copy/rename operation.
1383 		 */
1384 		ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
1385 
1386 		PR_MEM("%s: renamed target memlist and delete memlist:\n", f);
1387 		PR_MEMLIST_DUMP(t_mp->sbm_mlist);
1388 
1389 		for (ml = s_mp->sbm_mlist; ml; ml = ml->next) {
1390 			ml->address -= s_old_basepa;
1391 			ml->address += s_new_basepa;
1392 		}
1393 
1394 		PR_MEM("%s: renamed source memlist:\n", f);
1395 		PR_MEMLIST_DUMP(s_mp->sbm_mlist);
1396 		PR_MEM("%s: source dyn seg memlist:\n", f);
1397 		PR_MEMLIST_DUMP(s_mp->sbm_dyn_segs);
1398 
1399 		/*
1400 		 * Keep track of dynamically added segments
1401 		 * since they cannot be split if we need to delete
1402 		 * excess source memory later for this board.
1403 		 */
1404 		if (t_mp->sbm_dyn_segs)
1405 			memlist_delete(t_mp->sbm_dyn_segs);
1406 		t_mp->sbm_dyn_segs = s_mp->sbm_dyn_segs;
1407 		s_mp->sbm_dyn_segs = NULL;
1408 
1409 		/*
1410 		 * Add back excess target memory.
1411 		 * Subtract out the portion of the target memory
1412 		 * node that was taken over by the source memory
1413 		 * node.
1414 		 */
1415 		t_excess_mlist = memlist_dup(t_mp->sbm_mlist);
1416 		for (ml = s_copy_mlist; ml; ml = ml->next) {
1417 			t_excess_mlist =
1418 			    memlist_del_span(t_excess_mlist,
1419 			    ml->address, ml->size);
1420 		}
1421 		PR_MEM("%s: excess memlist:\n", f);
1422 		PR_MEMLIST_DUMP(t_excess_mlist);
1423 
1424 		/*
1425 		 * Update dynamically added segs
1426 		 */
1427 		for (ml = s_mp->sbm_del_mlist; ml; ml = ml->next) {
1428 			t_mp->sbm_dyn_segs =
1429 			    memlist_del_span(t_mp->sbm_dyn_segs,
1430 			    ml->address, ml->size);
1431 		}
1432 		for (ml = t_excess_mlist; ml; ml = ml->next) {
1433 			t_mp->sbm_dyn_segs =
1434 			    memlist_cat_span(t_mp->sbm_dyn_segs,
1435 			    ml->address, ml->size);
1436 		}
1437 		PR_MEM("%s: %s: updated dynamic seg list:\n",
1438 		    f, t_mp->sbm_cm.sbdev_path);
1439 		PR_MEMLIST_DUMP(t_mp->sbm_dyn_segs);
1440 
1441 		if (t_excess_mlist != NULL) {
1442 			err = dr_process_excess_mlist(s_mp, t_mp,
1443 				t_excess_mlist);
1444 			s_excess_mem_deleted = 1;
1445 		}
1446 
1447 		memlist_delete(s_copy_mlist);
1448 
1449 #ifdef DEBUG
1450 		/*
1451 		 * s_mp->sbm_del_mlist may still needed
1452 		 */
1453 		PR_MEM("%s: source delete memeory flag %d",
1454 		    f, s_excess_mem_deleted);
1455 		PR_MEM("%s: source delete memlist", f);
1456 		PR_MEMLIST_DUMP(s_mp->sbm_del_mlist);
1457 #endif
1458 
1459 	}
1460 
1461 	if (t_mp != NULL) {
1462 		/* delete target's entire address space */
1463 		err = drmach_mem_del_span(
1464 			t_mp->sbm_cm.sbdev_id, t_old_basepa & ~ sm, sz);
1465 		if (err)
1466 			DRERR_SET_C(&t_mp->sbm_cm.sbdev_error, &err);
1467 		ASSERT(err == NULL);
1468 
1469 		/*
1470 		 * After the copy/rename, the original address space
1471 		 * for the source board (which is now located on the
1472 		 * target board) may now have some excess to be deleted.
1473 		 * Those excess memory on the source board are kept in
1474 		 * source board's sbm_del_mlist
1475 		 */
1476 		for (ml = s_mp->sbm_del_mlist; !s_excess_mem_deleted && ml;
1477 			ml = ml->next) {
1478 			PR_MEM("%s: delete source excess memory", f);
1479 			PR_MEMLIST_DUMP(ml);
1480 
1481 			err = drmach_mem_del_span(s_mp->sbm_cm.sbdev_id,
1482 				ml->address, ml->size);
1483 			if (err)
1484 				DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
1485 			ASSERT(err == NULL);
1486 		}
1487 
1488 	} else {
1489 		/* delete board's entire address space */
1490 		err = drmach_mem_del_span(s_mp->sbm_cm.sbdev_id,
1491 						s_old_basepa & ~ sm, sz);
1492 		if (err)
1493 			DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
1494 		ASSERT(err == NULL);
1495 	}
1496 
1497 cleanup:
1498 	/* clean up target mem unit */
1499 	if (t_mp != NULL) {
1500 		memlist_delete(t_mp->sbm_del_mlist);
1501 		/* no need to delete sbm_mlist, it shares sbm_del_mlist */
1502 
1503 		t_mp->sbm_del_mlist = NULL;
1504 		t_mp->sbm_mlist = NULL;
1505 		t_mp->sbm_peer = NULL;
1506 		t_mp->sbm_flags = 0;
1507 		t_mp->sbm_cm.sbdev_busy = 0;
1508 		dr_init_mem_unit_data(t_mp);
1509 
1510 	}
1511 	if (t_mp != NULL && t_mp->sbm_cm.sbdev_error == NULL) {
1512 		/*
1513 		 * now that copy/rename has completed, undo this
1514 		 * work that was done in dr_release_mem_done.
1515 		 */
1516 		DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm);
1517 		DR_DEV_CLR_RELEASED(&t_mp->sbm_cm);
1518 		dr_device_transition(&t_mp->sbm_cm, DR_STATE_CONFIGURED);
1519 	}
1520 
1521 	/*
1522 	 * clean up (source) board's mem unit structure.
1523 	 * NOTE: sbm_mlist is retained if no error has been record (in other
1524 	 * words, when s_mp->sbm_cm.sbdev_error is NULL). This memlist is
1525 	 * referred to elsewhere as the cached memlist.  The cached memlist
1526 	 * is used to re-attach (configure back in) this memunit from the
1527 	 * unconfigured state.  The memlist is retained because it may
1528 	 * represent bad pages that were detected while the memory was
1529 	 * configured into the OS.  The OS deletes bad pages from phys_install.
1530 	 * Those deletes, if any, will be represented in the cached mlist.
1531 	 */
1532 	if (s_mp->sbm_del_mlist && s_mp->sbm_del_mlist != s_mp->sbm_mlist)
1533 		memlist_delete(s_mp->sbm_del_mlist);
1534 
1535 	if (s_mp->sbm_cm.sbdev_error && s_mp->sbm_mlist) {
1536 		memlist_delete(s_mp->sbm_mlist);
1537 		s_mp->sbm_mlist = NULL;
1538 	}
1539 
1540 	if (s_mp->sbm_dyn_segs != NULL && s_mp->sbm_cm.sbdev_error == 0) {
1541 		memlist_delete(s_mp->sbm_dyn_segs);
1542 		s_mp->sbm_dyn_segs = NULL;
1543 	}
1544 
1545 	s_mp->sbm_del_mlist = NULL;
1546 	s_mp->sbm_peer = NULL;
1547 	s_mp->sbm_flags = 0;
1548 	s_mp->sbm_cm.sbdev_busy = 0;
1549 	dr_init_mem_unit_data(s_mp);
1550 
1551 	PR_MEM("%s: cached memlist for %s:", f, s_mp->sbm_cm.sbdev_path);
1552 	PR_MEMLIST_DUMP(s_mp->sbm_mlist);
1553 
1554 	return (0);
1555 }
1556 
1557 /*
1558  * Successful return from this function will have the memory
1559  * handle in bp->b_dev[..mem-unit...].sbm_memhandle allocated
1560  * and waiting.  This routine's job is to select the memory that
1561  * actually has to be released (detached) which may not necessarily
1562  * be the same memory node that came in in devlist[],
1563  * i.e. a copy-rename is needed.
1564  */
1565 int
1566 dr_pre_release_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
1567 {
1568 	int		d;
1569 	int		err_flag = 0;
1570 	static fn_t	f = "dr_pre_release_mem";
1571 
1572 	PR_MEM("%s...\n", f);
1573 
1574 	for (d = 0; d < devnum; d++) {
1575 		dr_mem_unit_t	*mp = (dr_mem_unit_t *)devlist[d];
1576 		int		rv;
1577 		memquery_t	mq;
1578 		struct memlist	*ml;
1579 
1580 		if (mp->sbm_cm.sbdev_error) {
1581 			err_flag = 1;
1582 			continue;
1583 		} else if (!kcage_on) {
1584 			dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_KCAGE_OFF);
1585 			err_flag = 1;
1586 			continue;
1587 		}
1588 
1589 		if (mp->sbm_flags & DR_MFLAG_RESERVED) {
1590 			/*
1591 			 * Board is currently involved in a delete
1592 			 * memory operation. Can't detach this guy until
1593 			 * that operation completes.
1594 			 */
1595 			dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_INVAL);
1596 			err_flag = 1;
1597 			break;
1598 		}
1599 
1600 		/* flags should be clean at this time */
1601 		ASSERT(mp->sbm_flags == 0);
1602 
1603 		ASSERT(mp->sbm_mlist == NULL);
1604 		ASSERT(mp->sbm_del_mlist == NULL);
1605 		if (mp->sbm_mlist != NULL) {
1606 			memlist_delete(mp->sbm_mlist);
1607 			mp->sbm_mlist = NULL;
1608 		}
1609 
1610 		ml = dr_get_memlist(mp);
1611 		if (ml == NULL) {
1612 			err_flag = 1;
1613 			PR_MEM("%s: no memlist found for %s\n",
1614 			    f, mp->sbm_cm.sbdev_path);
1615 			continue;
1616 		}
1617 
1618 		/*
1619 		 * Check whether the detaching memory requires a
1620 		 * copy-rename.
1621 		 */
1622 		ASSERT(mp->sbm_npages != 0);
1623 
1624 		rv = dr_del_mlist_query(ml, &mq);
1625 		if (rv != KPHYSM_OK) {
1626 			memlist_delete(ml);
1627 			DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
1628 			err_flag = 1;
1629 			break;
1630 		}
1631 
1632 		if (mq.nonrelocatable != 0) {
1633 			if (!(dr_cmd_flags(hp) &
1634 				(SBD_FLAG_FORCE | SBD_FLAG_QUIESCE_OKAY))) {
1635 				memlist_delete(ml);
1636 				/* caller wasn't prompted for a suspend */
1637 				dr_dev_err(CE_WARN, &mp->sbm_cm,
1638 					ESBD_QUIESCE_REQD);
1639 				err_flag = 1;
1640 				break;
1641 			}
1642 		}
1643 
1644 		/* allocate a kphysm handle */
1645 		rv = kphysm_del_gethandle(&mp->sbm_memhandle);
1646 		if (rv != KPHYSM_OK) {
1647 			memlist_delete(ml);
1648 
1649 			DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
1650 			err_flag = 1;
1651 			break;
1652 		}
1653 		mp->sbm_flags |= DR_MFLAG_RELOWNER;
1654 
1655 		if ((mq.nonrelocatable != 0) ||
1656 			dr_reserve_mem_spans(&mp->sbm_memhandle, ml)) {
1657 			/*
1658 			 * Either the detaching memory node contains
1659 			 * non-reloc memory or we failed to reserve the
1660 			 * detaching memory node (which did _not_ have
1661 			 * any non-reloc memory, i.e. some non-reloc mem
1662 			 * got onboard).
1663 			 */
1664 
1665 			if (dr_select_mem_target(hp, mp, ml)) {
1666 				int rv;
1667 
1668 				/*
1669 				 * We had no luck locating a target
1670 				 * memory node to be the recipient of
1671 				 * the non-reloc memory on the node
1672 				 * we're trying to detach.
1673 				 * Clean up be disposing the mem handle
1674 				 * and the mem list.
1675 				 */
1676 				rv = kphysm_del_release(mp->sbm_memhandle);
1677 				if (rv != KPHYSM_OK) {
1678 					/*
1679 					 * can do nothing but complain
1680 					 * and hope helpful for debug
1681 					 */
1682 					cmn_err(CE_WARN, "%s: unexpected"
1683 						" kphysm_del_release return"
1684 						" value %d",
1685 						f, rv);
1686 				}
1687 				mp->sbm_flags &= ~DR_MFLAG_RELOWNER;
1688 
1689 				memlist_delete(ml);
1690 
1691 				/* make sure sbm_flags is clean */
1692 				ASSERT(mp->sbm_flags == 0);
1693 
1694 				dr_dev_err(CE_WARN,
1695 					&mp->sbm_cm, ESBD_NO_TARGET);
1696 
1697 				err_flag = 1;
1698 				break;
1699 			}
1700 
1701 			/*
1702 			 * ml is not memlist_delete'd here because
1703 			 * it has been assigned to mp->sbm_mlist
1704 			 * by dr_select_mem_target.
1705 			 */
1706 		} else {
1707 			/* no target needed to detach this board */
1708 			mp->sbm_flags |= DR_MFLAG_RESERVED;
1709 			mp->sbm_peer = NULL;
1710 			mp->sbm_del_mlist = ml;
1711 			mp->sbm_mlist = ml;
1712 			mp->sbm_cm.sbdev_busy = 1;
1713 		}
1714 #ifdef DEBUG
1715 		ASSERT(mp->sbm_mlist != NULL);
1716 
1717 		if (mp->sbm_flags & DR_MFLAG_SOURCE) {
1718 			PR_MEM("%s: release of %s requires copy/rename;"
1719 				" selected target board %s\n",
1720 				f,
1721 				mp->sbm_cm.sbdev_path,
1722 				mp->sbm_peer->sbm_cm.sbdev_path);
1723 		} else {
1724 			PR_MEM("%s: copy/rename not required to release %s\n",
1725 				f, mp->sbm_cm.sbdev_path);
1726 		}
1727 
1728 		ASSERT(mp->sbm_flags & DR_MFLAG_RELOWNER);
1729 		ASSERT(mp->sbm_flags & DR_MFLAG_RESERVED);
1730 #endif
1731 	}
1732 
1733 	return (err_flag ? -1 : 0);
1734 }
1735 
1736 void
1737 dr_release_mem_done(dr_common_unit_t *cp)
1738 {
1739 	dr_mem_unit_t	*s_mp = (dr_mem_unit_t *)cp;
1740 	dr_mem_unit_t *t_mp, *mp;
1741 	int		rv;
1742 	static fn_t	f = "dr_release_mem_done";
1743 
1744 	/*
1745 	 * This unit will be flagged with DR_MFLAG_SOURCE, if it
1746 	 * has a target unit.
1747 	 */
1748 	if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
1749 		t_mp = s_mp->sbm_peer;
1750 		ASSERT(t_mp != NULL);
1751 		ASSERT(t_mp->sbm_peer == s_mp);
1752 		ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET);
1753 		ASSERT(t_mp->sbm_flags & DR_MFLAG_RESERVED);
1754 	} else {
1755 		/* this is no target unit */
1756 		t_mp = NULL;
1757 	}
1758 
1759 	/* free delete handle */
1760 	ASSERT(s_mp->sbm_flags & DR_MFLAG_RELOWNER);
1761 	ASSERT(s_mp->sbm_flags & DR_MFLAG_RESERVED);
1762 	rv = kphysm_del_release(s_mp->sbm_memhandle);
1763 	if (rv != KPHYSM_OK) {
1764 		/*
1765 		 * can do nothing but complain
1766 		 * and hope helpful for debug
1767 		 */
1768 		cmn_err(CE_WARN, "%s: unexpected kphysm_del_release"
1769 			" return value %d", f, rv);
1770 	}
1771 	s_mp->sbm_flags &= ~DR_MFLAG_RELOWNER;
1772 
1773 	/*
1774 	 * If an error was encountered during release, clean up
1775 	 * the source (and target, if present) unit data.
1776 	 */
1777 /* XXX Can we know that sbdev_error was encountered during release? */
1778 	if (s_mp->sbm_cm.sbdev_error != NULL) {
1779 		PR_MEM("%s: %s: error %d noted\n",
1780 			f,
1781 			s_mp->sbm_cm.sbdev_path,
1782 			s_mp->sbm_cm.sbdev_error->e_code);
1783 
1784 		if (t_mp != NULL) {
1785 			ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
1786 			t_mp->sbm_del_mlist = NULL;
1787 
1788 			if (t_mp->sbm_mlist != NULL) {
1789 				memlist_delete(t_mp->sbm_mlist);
1790 				t_mp->sbm_mlist = NULL;
1791 			}
1792 
1793 			t_mp->sbm_peer = NULL;
1794 			t_mp->sbm_flags = 0;
1795 			t_mp->sbm_cm.sbdev_busy = 0;
1796 		}
1797 
1798 		if (s_mp->sbm_del_mlist != s_mp->sbm_mlist)
1799 			memlist_delete(s_mp->sbm_del_mlist);
1800 		s_mp->sbm_del_mlist = NULL;
1801 
1802 		if (s_mp->sbm_mlist != NULL) {
1803 			memlist_delete(s_mp->sbm_mlist);
1804 			s_mp->sbm_mlist = NULL;
1805 		}
1806 
1807 		s_mp->sbm_peer = NULL;
1808 		s_mp->sbm_flags = 0;
1809 		s_mp->sbm_cm.sbdev_busy = 0;
1810 
1811 		/* bail out */
1812 		return;
1813 	}
1814 
1815 	DR_DEV_SET_RELEASED(&s_mp->sbm_cm);
1816 	dr_device_transition(&s_mp->sbm_cm, DR_STATE_RELEASE);
1817 
1818 	if (t_mp != NULL) {
1819 		/*
1820 		 * the kphysm delete operation that drained the source
1821 		 * board also drained this target board.  Since the source
1822 		 * board drain is now known to have succeeded, we know this
1823 		 * target board is drained too.
1824 		 *
1825 		 * because DR_DEV_SET_RELEASED and dr_device_transition
1826 		 * is done here, the dr_release_dev_done should not
1827 		 * fail.
1828 		 */
1829 		DR_DEV_SET_RELEASED(&t_mp->sbm_cm);
1830 		dr_device_transition(&t_mp->sbm_cm, DR_STATE_RELEASE);
1831 
1832 		/*
1833 		 * NOTE: do not transition target's board state,
1834 		 * even if the mem-unit was the last configure
1835 		 * unit of the board.  When copy/rename completes
1836 		 * this mem-unit will transitioned back to
1837 		 * the configured state.  In the meantime, the
1838 		 * board's must remain as is.
1839 		 */
1840 	}
1841 
1842 	/* if board(s) had deleted memory, verify it is gone */
1843 	rv = 0;
1844 	memlist_read_lock();
1845 	if (s_mp->sbm_del_mlist != NULL) {
1846 		mp = s_mp;
1847 		rv = memlist_intersect(phys_install, mp->sbm_del_mlist);
1848 	}
1849 	if (rv == 0 && t_mp && t_mp->sbm_del_mlist != NULL) {
1850 		mp = t_mp;
1851 		rv = memlist_intersect(phys_install, mp->sbm_del_mlist);
1852 	}
1853 	memlist_read_unlock();
1854 	if (rv) {
1855 		cmn_err(CE_WARN, "%s: %smem-unit (%d.%d): "
1856 			"deleted memory still found in phys_install",
1857 			f,
1858 			(mp == t_mp ? "target " : ""),
1859 			mp->sbm_cm.sbdev_bp->b_num,
1860 			mp->sbm_cm.sbdev_unum);
1861 
1862 		DR_DEV_INTERNAL_ERROR(&s_mp->sbm_cm);
1863 		return;
1864 	}
1865 
1866 	s_mp->sbm_flags |= DR_MFLAG_RELDONE;
1867 	if (t_mp != NULL)
1868 		t_mp->sbm_flags |= DR_MFLAG_RELDONE;
1869 
1870 	/* this should not fail */
1871 	if (dr_release_dev_done(&s_mp->sbm_cm) != 0) {
1872 		/* catch this in debug kernels */
1873 		ASSERT(0);
1874 		return;
1875 	}
1876 
1877 	PR_MEM("%s: marking %s release DONE\n",
1878 		f, s_mp->sbm_cm.sbdev_path);
1879 
1880 	s_mp->sbm_cm.sbdev_ostate = SBD_STAT_UNCONFIGURED;
1881 
1882 	if (t_mp != NULL) {
1883 		/* should not fail */
1884 		rv = dr_release_dev_done(&t_mp->sbm_cm);
1885 		if (rv != 0) {
1886 			/* catch this in debug kernels */
1887 			ASSERT(0);
1888 			return;
1889 		}
1890 
1891 		PR_MEM("%s: marking %s release DONE\n",
1892 			f, t_mp->sbm_cm.sbdev_path);
1893 
1894 		t_mp->sbm_cm.sbdev_ostate = SBD_STAT_UNCONFIGURED;
1895 	}
1896 }
1897 
1898 /*ARGSUSED*/
1899 int
1900 dr_disconnect_mem(dr_mem_unit_t *mp)
1901 {
1902 	static fn_t	f = "dr_disconnect_mem";
1903 	update_membounds_t umb;
1904 
1905 #ifdef DEBUG
1906 	int state = mp->sbm_cm.sbdev_state;
1907 	ASSERT(state == DR_STATE_CONNECTED ||
1908 		state == DR_STATE_UNCONFIGURED);
1909 #endif
1910 
1911 	PR_MEM("%s...\n", f);
1912 
1913 	if (mp->sbm_del_mlist && mp->sbm_del_mlist != mp->sbm_mlist)
1914 		memlist_delete(mp->sbm_del_mlist);
1915 	mp->sbm_del_mlist = NULL;
1916 
1917 	if (mp->sbm_mlist) {
1918 		memlist_delete(mp->sbm_mlist);
1919 		mp->sbm_mlist = NULL;
1920 	}
1921 
1922 	/*
1923 	 * Remove memory from lgroup
1924 	 * For now, only board info is required.
1925 	 */
1926 	umb.u_board = mp->sbm_cm.sbdev_bp->b_num;
1927 	umb.u_base = (uint64_t)-1;
1928 	umb.u_len = (uint64_t)-1;
1929 
1930 	lgrp_plat_config(LGRP_CONFIG_MEM_DEL, (uintptr_t)&umb);
1931 
1932 	return (0);
1933 }
1934 
1935 int
1936 dr_cancel_mem(dr_mem_unit_t *s_mp)
1937 {
1938 	dr_mem_unit_t	*t_mp;
1939 	dr_state_t	state;
1940 	static fn_t	f = "dr_cancel_mem";
1941 
1942 	state = s_mp->sbm_cm.sbdev_state;
1943 
1944 	if (s_mp->sbm_flags & DR_MFLAG_TARGET) {
1945 		/* must cancel source board, not target board */
1946 		/* TODO: set error */
1947 		return (-1);
1948 	} else if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
1949 		t_mp = s_mp->sbm_peer;
1950 		ASSERT(t_mp != NULL);
1951 		ASSERT(t_mp->sbm_peer == s_mp);
1952 
1953 		/* must always match the source board's state */
1954 		/* TODO: is this assertion correct? */
1955 		ASSERT(t_mp->sbm_cm.sbdev_state == state);
1956 	} else {
1957 		/* this is no target unit */
1958 		t_mp = NULL;
1959 	}
1960 
1961 	switch (state) {
1962 	case DR_STATE_UNREFERENCED:	/* state set by dr_release_dev_done */
1963 		ASSERT((s_mp->sbm_flags & DR_MFLAG_RELOWNER) == 0);
1964 
1965 		if (t_mp != NULL && t_mp->sbm_del_mlist != NULL) {
1966 			PR_MEM("%s: undoing target %s memory delete\n",
1967 				f, t_mp->sbm_cm.sbdev_path);
1968 			dr_add_memory_spans(t_mp, t_mp->sbm_del_mlist);
1969 
1970 			DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm);
1971 		}
1972 
1973 		if (s_mp->sbm_del_mlist != NULL) {
1974 			PR_MEM("%s: undoing %s memory delete\n",
1975 				f, s_mp->sbm_cm.sbdev_path);
1976 
1977 			dr_add_memory_spans(s_mp, s_mp->sbm_del_mlist);
1978 		}
1979 
1980 		/*FALLTHROUGH*/
1981 
1982 /* TODO: should no longer be possible to see the release state here */
1983 	case DR_STATE_RELEASE:	/* state set by dr_release_mem_done */
1984 
1985 		ASSERT((s_mp->sbm_flags & DR_MFLAG_RELOWNER) == 0);
1986 
1987 		if (t_mp != NULL) {
1988 			ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
1989 			t_mp->sbm_del_mlist = NULL;
1990 
1991 			if (t_mp->sbm_mlist != NULL) {
1992 				memlist_delete(t_mp->sbm_mlist);
1993 				t_mp->sbm_mlist = NULL;
1994 			}
1995 
1996 			t_mp->sbm_peer = NULL;
1997 			t_mp->sbm_flags = 0;
1998 			t_mp->sbm_cm.sbdev_busy = 0;
1999 			dr_init_mem_unit_data(t_mp);
2000 
2001 			DR_DEV_CLR_RELEASED(&t_mp->sbm_cm);
2002 
2003 			dr_device_transition(
2004 				&t_mp->sbm_cm, DR_STATE_CONFIGURED);
2005 		}
2006 
2007 		if (s_mp->sbm_del_mlist != s_mp->sbm_mlist)
2008 			memlist_delete(s_mp->sbm_del_mlist);
2009 		s_mp->sbm_del_mlist = NULL;
2010 
2011 		if (s_mp->sbm_mlist != NULL) {
2012 			memlist_delete(s_mp->sbm_mlist);
2013 			s_mp->sbm_mlist = NULL;
2014 		}
2015 
2016 		s_mp->sbm_peer = NULL;
2017 		s_mp->sbm_flags = 0;
2018 		s_mp->sbm_cm.sbdev_busy = 0;
2019 		dr_init_mem_unit_data(s_mp);
2020 
2021 		return (0);
2022 
2023 	default:
2024 		PR_MEM("%s: WARNING unexpected state (%d) for %s\n",
2025 			f, (int)state, s_mp->sbm_cm.sbdev_path);
2026 
2027 		return (-1);
2028 	}
2029 	/*NOTREACHED*/
2030 }
2031 
2032 void
2033 dr_init_mem_unit(dr_mem_unit_t *mp)
2034 {
2035 	dr_state_t	new_state;
2036 
2037 
2038 	if (DR_DEV_IS_ATTACHED(&mp->sbm_cm)) {
2039 		new_state = DR_STATE_CONFIGURED;
2040 		mp->sbm_cm.sbdev_cond = SBD_COND_OK;
2041 	} else if (DR_DEV_IS_PRESENT(&mp->sbm_cm)) {
2042 		new_state = DR_STATE_CONNECTED;
2043 		mp->sbm_cm.sbdev_cond = SBD_COND_OK;
2044 	} else if (mp->sbm_cm.sbdev_id != (drmachid_t)0) {
2045 		new_state = DR_STATE_OCCUPIED;
2046 	} else {
2047 		new_state = DR_STATE_EMPTY;
2048 	}
2049 
2050 	if (DR_DEV_IS_PRESENT(&mp->sbm_cm))
2051 		dr_init_mem_unit_data(mp);
2052 
2053 	/* delay transition until fully initialized */
2054 	dr_device_transition(&mp->sbm_cm, new_state);
2055 }
2056 
2057 static void
2058 dr_init_mem_unit_data(dr_mem_unit_t *mp)
2059 {
2060 	drmachid_t	id = mp->sbm_cm.sbdev_id;
2061 	drmach_mem_info_t	minfo;
2062 	sbd_error_t	*err;
2063 	static fn_t	f = "dr_init_mem_unit_data";
2064 	update_membounds_t umb;
2065 
2066 	PR_MEM("%s...\n", f);
2067 
2068 	/* a little sanity checking */
2069 	ASSERT(mp->sbm_peer == NULL);
2070 	ASSERT(mp->sbm_flags == 0);
2071 
2072 	if (err = drmach_mem_get_info(id, &minfo)) {
2073 		DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
2074 		return;
2075 	}
2076 	mp->sbm_basepfn = _b64top(minfo.mi_basepa);
2077 	mp->sbm_npages = _b64top(minfo.mi_size);
2078 	mp->sbm_alignment_mask = _b64top(minfo.mi_alignment_mask);
2079 	mp->sbm_slice_size = minfo.mi_slice_size;
2080 
2081 	/*
2082 	 * Add memory to lgroup
2083 	 */
2084 	umb.u_board = mp->sbm_cm.sbdev_bp->b_num;
2085 	umb.u_base = (uint64_t)mp->sbm_basepfn << MMU_PAGESHIFT;
2086 	umb.u_len = (uint64_t)mp->sbm_npages << MMU_PAGESHIFT;
2087 
2088 	lgrp_plat_config(LGRP_CONFIG_MEM_ADD, (uintptr_t)&umb);
2089 
2090 	PR_MEM("%s: %s (basepfn = 0x%lx, npgs = %ld)\n",
2091 		f, mp->sbm_cm.sbdev_path, mp->sbm_basepfn, mp->sbm_npages);
2092 }
2093 
2094 static int
2095 dr_reserve_mem_spans(memhandle_t *mhp, struct memlist *ml)
2096 {
2097 	int		err;
2098 	pfn_t		base;
2099 	pgcnt_t		npgs;
2100 	struct memlist	*mc;
2101 	static fn_t	f = "dr_reserve_mem_spans";
2102 
2103 	PR_MEM("%s...\n", f);
2104 
2105 	/*
2106 	 * Walk the supplied memlist scheduling each span for removal
2107 	 * with kphysm_del_span.  It is possible that a span may intersect
2108 	 * an area occupied by the cage.
2109 	 */
2110 	for (mc = ml; mc != NULL; mc = mc->next) {
2111 		base = _b64top(mc->address);
2112 		npgs = _b64top(mc->size);
2113 
2114 		err = kphysm_del_span(*mhp, base, npgs);
2115 		if (err != KPHYSM_OK) {
2116 			cmn_err(CE_WARN, "%s memory reserve failed."
2117 				" unexpected kphysm_del_span return value %d;"
2118 				" basepfn=0x%lx npages=%ld",
2119 				f, err, base, npgs);
2120 
2121 			return (-1);
2122 		}
2123 	}
2124 
2125 	return (0);
2126 }
2127 
2128 #define	DR_SMT_NPREF_SETS	6
2129 #define	DR_SMT_NUNITS_PER_SET	MAX_BOARDS * MAX_MEM_UNITS_PER_BOARD
2130 
2131 /* debug counters */
2132 int dr_smt_realigned;
2133 int dr_smt_preference[DR_SMT_NPREF_SETS];
2134 
2135 #ifdef DEBUG
2136 uint_t dr_ignore_board; /* if bit[bnum-1] set, board won't be candidate */
2137 #endif
2138 
2139 /*
2140  * Find and reserve a copy/rename target board suitable for the
2141  * given source board.
2142  * All boards in the system are examined and categorized in relation to
2143  * their memory size versus the source board's memory size.  Order of
2144  * preference is:
2145  *	1st copy all source, source/target same size
2146  *	2nd copy all source, larger target
2147  * 	3rd copy nonrelocatable source span
2148  */
2149 static int
2150 dr_select_mem_target(dr_handle_t *hp,
2151 	dr_mem_unit_t *s_mp, struct memlist *s_ml)
2152 {
2153 	dr_target_pref_t preference; /* lower value is higher preference */
2154 	int		idx;
2155 	dr_mem_unit_t	**sets;
2156 
2157 	int		t_bd;
2158 	int		t_unit;
2159 	int		rv;
2160 	dr_board_t	*s_bp, *t_bp;
2161 	dr_mem_unit_t	*t_mp, *c_mp;
2162 	struct memlist	*d_ml, *t_ml, *ml, *b_ml, *x_ml = NULL;
2163 	memquery_t	s_mq = {0};
2164 	static fn_t	f = "dr_select_mem_target";
2165 
2166 	PR_MEM("%s...\n", f);
2167 
2168 	ASSERT(s_ml != NULL);
2169 
2170 	sets = GETSTRUCT(dr_mem_unit_t *, DR_SMT_NUNITS_PER_SET *
2171 	    DR_SMT_NPREF_SETS);
2172 
2173 	s_bp = hp->h_bd;
2174 	/* calculate the offset into the slice of the last source board pfn */
2175 	ASSERT(s_mp->sbm_npages != 0);
2176 
2177 	/*
2178 	 * Find non-relocatable span on source board.
2179 	 */
2180 	rv = kphysm_del_span_query(s_mp->sbm_basepfn, s_mp->sbm_npages, &s_mq);
2181 	if (rv != KPHYSM_OK) {
2182 		PR_MEM("%s: %s: unexpected kphysm_del_span_query"
2183 		    " return value %d; basepfn 0x%lx, npages %ld\n",
2184 		    f, s_mp->sbm_cm.sbdev_path, rv, s_mp->sbm_basepfn,
2185 		    s_mp->sbm_npages);
2186 		return (-1);
2187 	}
2188 
2189 	ASSERT(s_mq.phys_pages != 0);
2190 	ASSERT(s_mq.nonrelocatable != 0);
2191 
2192 	PR_MEM("%s: %s: nonrelocatable span (0x%lx..0x%lx)\n", f,
2193 	    s_mp->sbm_cm.sbdev_path, s_mq.first_nonrelocatable,
2194 	    s_mq.last_nonrelocatable);
2195 
2196 	/* break down s_ml if it contains dynamic segments */
2197 	b_ml = memlist_dup(s_ml);
2198 
2199 	for (ml = s_mp->sbm_dyn_segs; ml; ml = ml->next) {
2200 		b_ml = memlist_del_span(b_ml, ml->address, ml->size);
2201 		b_ml = memlist_cat_span(b_ml, ml->address, ml->size);
2202 	}
2203 
2204 
2205 	/*
2206 	 * Make one pass through all memory units on all boards
2207 	 * and categorize them with respect to the source board.
2208 	 */
2209 	for (t_bd = 0; t_bd < MAX_BOARDS; t_bd++) {
2210 		/*
2211 		 * The board structs are a contiguous array
2212 		 * so we take advantage of that to find the
2213 		 * correct board struct pointer for a given
2214 		 * board number.
2215 		 */
2216 		t_bp = dr_lookup_board(t_bd);
2217 
2218 		/* source board can not be its own target */
2219 		if (s_bp->b_num == t_bp->b_num)
2220 			continue;
2221 
2222 		for (t_unit = 0; t_unit < MAX_MEM_UNITS_PER_BOARD; t_unit++) {
2223 
2224 			t_mp = dr_get_mem_unit(t_bp, t_unit);
2225 
2226 			/* this memory node must be attached */
2227 			if (!DR_DEV_IS_ATTACHED(&t_mp->sbm_cm))
2228 				continue;
2229 
2230 			/* source unit can not be its own target */
2231 			if (s_mp == t_mp) {
2232 				/* catch this is debug kernels */
2233 				ASSERT(0);
2234 				continue;
2235 			}
2236 
2237 			/*
2238 			 * this memory node must not already be reserved
2239 			 * by some other memory delete operation.
2240 			 */
2241 			if (t_mp->sbm_flags & DR_MFLAG_RESERVED)
2242 				continue;
2243 
2244 			/* get target board memlist */
2245 			t_ml = dr_get_memlist(t_mp);
2246 			if (t_ml == NULL) {
2247 				cmn_err(CE_WARN, "%s: no memlist for"
2248 				    " mem-unit %d, board %d", f,
2249 				    t_mp->sbm_cm.sbdev_bp->b_num,
2250 				    t_mp->sbm_cm.sbdev_unum);
2251 				continue;
2252 			}
2253 
2254 			preference = dr_get_target_preference(hp, t_mp, s_mp,
2255 			    t_ml, s_ml, b_ml);
2256 
2257 			if (preference == DR_TP_INVALID)
2258 				continue;
2259 
2260 			dr_smt_preference[preference]++;
2261 
2262 			/* calculate index to start of preference set */
2263 			idx  = DR_SMT_NUNITS_PER_SET * preference;
2264 			/* calculate offset to respective element */
2265 			idx += t_bd * MAX_MEM_UNITS_PER_BOARD + t_unit;
2266 
2267 			ASSERT(idx < DR_SMT_NUNITS_PER_SET * DR_SMT_NPREF_SETS);
2268 			sets[idx] = t_mp;
2269 		}
2270 	}
2271 
2272 	if (b_ml != NULL)
2273 		memlist_delete(b_ml);
2274 
2275 	/*
2276 	 * NOTE: this would be a good place to sort each candidate
2277 	 * set in to some desired order, e.g. memory size in ascending
2278 	 * order.  Without an additional sorting step here, the order
2279 	 * within a set is ascending board number order.
2280 	 */
2281 
2282 	c_mp = NULL;
2283 	x_ml = NULL;
2284 	t_ml = NULL;
2285 	for (idx = 0; idx < DR_SMT_NUNITS_PER_SET * DR_SMT_NPREF_SETS; idx++) {
2286 		memquery_t mq;
2287 
2288 		preference = (dr_target_pref_t)(idx / DR_SMT_NUNITS_PER_SET);
2289 
2290 		ASSERT(preference != DR_TP_INVALID);
2291 
2292 		/* cleanup t_ml after previous pass */
2293 		if (t_ml != NULL) {
2294 			memlist_delete(t_ml);
2295 			t_ml = NULL;
2296 		}
2297 
2298 		/* get candidate target board mem unit */
2299 		t_mp = sets[idx];
2300 		if (t_mp == NULL)
2301 			continue;
2302 
2303 		/* get target board memlist */
2304 		t_ml = dr_get_memlist(t_mp);
2305 		if (t_ml == NULL) {
2306 			cmn_err(CE_WARN, "%s: no memlist for"
2307 				" mem-unit %d, board %d",
2308 				f,
2309 				t_mp->sbm_cm.sbdev_bp->b_num,
2310 				t_mp->sbm_cm.sbdev_unum);
2311 
2312 			continue;
2313 		}
2314 
2315 		PR_MEM("%s: checking for no-reloc in %s, "
2316 			" basepfn=0x%lx, npages=%ld\n",
2317 			f,
2318 			t_mp->sbm_cm.sbdev_path,
2319 			t_mp->sbm_basepfn,
2320 			t_mp->sbm_npages);
2321 
2322 		rv = dr_del_mlist_query(t_ml, &mq);
2323 		if (rv != KPHYSM_OK) {
2324 			PR_MEM("%s: kphysm_del_span_query:"
2325 				" unexpected return value %d\n", f, rv);
2326 
2327 			continue;
2328 		}
2329 
2330 		if (mq.nonrelocatable != 0) {
2331 			PR_MEM("%s: candidate %s has"
2332 				" nonrelocatable span [0x%lx..0x%lx]\n",
2333 				f,
2334 				t_mp->sbm_cm.sbdev_path,
2335 				mq.first_nonrelocatable,
2336 				mq.last_nonrelocatable);
2337 
2338 			continue;
2339 		}
2340 
2341 #ifdef DEBUG
2342 		/*
2343 		 * This is a debug tool for excluding certain boards
2344 		 * from being selected as a target board candidate.
2345 		 * dr_ignore_board is only tested by this driver.
2346 		 * It must be set with adb, obp, /etc/system or your
2347 		 * favorite debugger.
2348 		 */
2349 		if (dr_ignore_board &
2350 			(1 << (t_mp->sbm_cm.sbdev_bp->b_num - 1))) {
2351 			PR_MEM("%s: dr_ignore_board flag set,"
2352 				" ignoring %s as candidate\n",
2353 				f, t_mp->sbm_cm.sbdev_path);
2354 			continue;
2355 		}
2356 #endif
2357 
2358 		/*
2359 		 * Reserve excess source board memory, if any.
2360 		 *
2361 		 * Only the nonrelocatable source span will be copied
2362 		 * so schedule the rest of the source mem to be deleted.
2363 		 */
2364 		switch (preference) {
2365 		case DR_TP_NONRELOC:
2366 			/*
2367 			 * Get source copy memlist and use it to construct
2368 			 * delete memlist.
2369 			 */
2370 			d_ml = memlist_dup(s_ml);
2371 			x_ml = dr_get_copy_mlist(s_ml, t_ml, s_mp, t_mp);
2372 
2373 			/* XXX */
2374 			ASSERT(d_ml != NULL);
2375 			ASSERT(x_ml != NULL);
2376 
2377 			for (ml = x_ml; ml != NULL; ml = ml->next) {
2378 				d_ml = memlist_del_span(d_ml, ml->address,
2379 				    ml->size);
2380 			}
2381 
2382 			PR_MEM("%s: %s: reserving src brd memlist:\n", f,
2383 			    s_mp->sbm_cm.sbdev_path);
2384 			PR_MEMLIST_DUMP(d_ml);
2385 
2386 			/* reserve excess spans */
2387 			if (dr_reserve_mem_spans(&s_mp->sbm_memhandle,
2388 			    d_ml) != 0) {
2389 				/* likely more non-reloc pages appeared */
2390 				/* TODO: restart from top? */
2391 				continue;
2392 			}
2393 			break;
2394 		default:
2395 			d_ml = NULL;
2396 			break;
2397 		}
2398 
2399 		s_mp->sbm_flags |= DR_MFLAG_RESERVED;
2400 
2401 		/*
2402 		 * reserve all memory on target board.
2403 		 * NOTE: source board's memhandle is used.
2404 		 *
2405 		 * If this succeeds (eq 0), then target selection is
2406 		 * complete and all unwanted memory spans, both source and
2407 		 * target, have been reserved.  Loop is terminated.
2408 		 */
2409 		if (dr_reserve_mem_spans(&s_mp->sbm_memhandle, t_ml) == 0) {
2410 			PR_MEM("%s: %s: target board memory reserved\n",
2411 				f, t_mp->sbm_cm.sbdev_path);
2412 
2413 			/* a candidate target board is now reserved */
2414 			t_mp->sbm_flags |= DR_MFLAG_RESERVED;
2415 			c_mp = t_mp;
2416 
2417 			/* *** EXITING LOOP *** */
2418 			break;
2419 		}
2420 
2421 		/* did not successfully reserve the target board. */
2422 		PR_MEM("%s: could not reserve target %s\n",
2423 			f, t_mp->sbm_cm.sbdev_path);
2424 
2425 		/*
2426 		 * NOTE: an undo of the dr_reserve_mem_span work
2427 		 * will happen automatically when the memhandle
2428 		 * (s_mp->sbm_memhandle) is kphysm_del_release'd.
2429 		 */
2430 
2431 		s_mp->sbm_flags &= ~DR_MFLAG_RESERVED;
2432 	}
2433 
2434 	/* clean up after memlist editing logic */
2435 	if (x_ml != NULL)
2436 		memlist_delete(x_ml);
2437 
2438 	FREESTRUCT(sets, dr_mem_unit_t *, DR_SMT_NUNITS_PER_SET *
2439 	    DR_SMT_NPREF_SETS);
2440 
2441 	/*
2442 	 * c_mp will be NULL when the entire sets[] array
2443 	 * has been searched without reserving a target board.
2444 	 */
2445 	if (c_mp == NULL) {
2446 		PR_MEM("%s: %s: target selection failed.\n",
2447 			f, s_mp->sbm_cm.sbdev_path);
2448 
2449 		if (t_ml != NULL)
2450 			memlist_delete(t_ml);
2451 
2452 		return (-1);
2453 	}
2454 
2455 	PR_MEM("%s: found target %s for source %s\n",
2456 		f,
2457 		c_mp->sbm_cm.sbdev_path,
2458 		s_mp->sbm_cm.sbdev_path);
2459 
2460 	s_mp->sbm_peer = c_mp;
2461 	s_mp->sbm_flags |= DR_MFLAG_SOURCE;
2462 	s_mp->sbm_del_mlist = d_ml;	/* spans to be deleted, if any */
2463 	s_mp->sbm_mlist = s_ml;
2464 	s_mp->sbm_cm.sbdev_busy = 1;
2465 
2466 	c_mp->sbm_peer = s_mp;
2467 	c_mp->sbm_flags |= DR_MFLAG_TARGET;
2468 	c_mp->sbm_del_mlist = t_ml;	/* spans to be deleted */
2469 	c_mp->sbm_mlist = t_ml;
2470 	c_mp->sbm_cm.sbdev_busy = 1;
2471 
2472 	return (0);
2473 }
2474 
2475 /*
2476  * Returns target preference rank:
2477  *     -1 not a valid copy-rename target board
2478  *	0 copy all source, source/target same size
2479  *	1 copy all source, larger target
2480  * 	2 copy nonrelocatable source span
2481  */
2482 static dr_target_pref_t
2483 dr_get_target_preference(dr_handle_t *hp,
2484     dr_mem_unit_t *t_mp, dr_mem_unit_t *s_mp,
2485     struct memlist *t_ml, struct memlist *s_ml,
2486     struct memlist *b_ml)
2487 {
2488 	dr_target_pref_t preference;
2489 	struct memlist *s_nonreloc_ml = NULL;
2490 	drmachid_t t_id;
2491 	static fn_t	f = "dr_get_target_preference";
2492 
2493 	t_id = t_mp->sbm_cm.sbdev_bp->b_id;
2494 
2495 	/*
2496 	 * Can the entire source board be copied?
2497 	 */
2498 	if (dr_memlist_canfit(s_ml, t_ml, s_mp, t_mp)) {
2499 		if (s_mp->sbm_npages == t_mp->sbm_npages)
2500 			preference = DR_TP_SAME;	/* same size */
2501 		else
2502 			preference = DR_TP_LARGE;	/* larger target */
2503 	} else {
2504 		/*
2505 		 * Entire source won't fit so try non-relocatable memory only
2506 		 * (target aligned).
2507 		 */
2508 		s_nonreloc_ml = dr_get_nonreloc_mlist(b_ml, s_mp);
2509 		if (s_nonreloc_ml == NULL) {
2510 			PR_MEM("%s: dr_get_nonreloc_mlist failed\n", f);
2511 			preference = DR_TP_INVALID;
2512 		}
2513 		if (dr_memlist_canfit(s_nonreloc_ml, t_ml, s_mp, t_mp))
2514 			preference = DR_TP_NONRELOC;
2515 		else
2516 			preference = DR_TP_INVALID;
2517 	}
2518 
2519 	if (s_nonreloc_ml != NULL)
2520 		memlist_delete(s_nonreloc_ml);
2521 
2522 	/*
2523 	 * Force floating board preference lower than all other boards
2524 	 * if the force flag is present; otherwise disallow the board.
2525 	 */
2526 	if ((preference != DR_TP_INVALID) && drmach_board_is_floating(t_id)) {
2527 		if (dr_cmd_flags(hp) & SBD_FLAG_FORCE)
2528 			preference += DR_TP_FLOATING;
2529 		else
2530 			preference = DR_TP_INVALID;
2531 	}
2532 
2533 	PR_MEM("%s: %s preference=%d\n", f, t_mp->sbm_cm.sbdev_path,
2534 	    preference);
2535 
2536 	return (preference);
2537 }
2538 
2539 /*
2540  * Create a memlist representing the source memory that will be copied to
2541  * the target board.  The memory to be copied is the maximum amount that
2542  * will fit on the target board.
2543  */
2544 static struct memlist *
2545 dr_get_copy_mlist(struct memlist *s_mlist, struct memlist *t_mlist,
2546     dr_mem_unit_t *s_mp, dr_mem_unit_t *t_mp)
2547 {
2548 	struct memlist	*t_ml, *s_copy_ml, *s_del_ml, *ml, *x_ml;
2549 	uint64_t	s_slice_mask, s_slice_base;
2550 	uint64_t	t_slice_mask, t_slice_base;
2551 	static fn_t	f = "dr_get_copy_mlist";
2552 
2553 	ASSERT(s_mlist != NULL);
2554 	ASSERT(t_mlist != NULL);
2555 	ASSERT(t_mp->sbm_slice_size == s_mp->sbm_slice_size);
2556 
2557 	s_slice_mask = s_mp->sbm_slice_size - 1;
2558 	s_slice_base = s_mlist->address & ~s_slice_mask;
2559 
2560 	t_slice_mask = t_mp->sbm_slice_size - 1;
2561 	t_slice_base = t_mlist->address & ~t_slice_mask;
2562 
2563 	t_ml = memlist_dup(t_mlist);
2564 	s_del_ml = memlist_dup(s_mlist);
2565 	s_copy_ml = memlist_dup(s_mlist);
2566 
2567 	/* XXX */
2568 	ASSERT(t_ml != NULL);
2569 	ASSERT(s_del_ml != NULL);
2570 	ASSERT(s_copy_ml != NULL);
2571 
2572 	/*
2573 	 * To construct the source copy memlist:
2574 	 *
2575 	 * The target memlist is converted to the post-rename
2576 	 * source addresses.  This is the physical address range
2577 	 * the target will have after the copy-rename.  Overlaying
2578 	 * and deleting this from the current source memlist will
2579 	 * give the source delete memlist.  The copy memlist is
2580 	 * the reciprocal of the source delete memlist.
2581 	 */
2582 	for (ml = t_ml; ml != NULL; ml = ml->next) {
2583 		/*
2584 		 * Normalize relative to target slice base PA
2585 		 * in order to preseve slice offsets.
2586 		 */
2587 		ml->address -= t_slice_base;
2588 		/*
2589 		 * Convert to source slice PA address.
2590 		 */
2591 		ml->address += s_slice_base;
2592 	}
2593 
2594 	for (ml = t_ml; ml != NULL; ml = ml->next) {
2595 		s_del_ml = memlist_del_span(s_del_ml, ml->address, ml->size);
2596 	}
2597 
2598 	/*
2599 	 * Expand the delete mlist to fully include any dynamic segments
2600 	 * it intersects with.
2601 	 */
2602 	for (x_ml = NULL, ml = s_del_ml; ml != NULL; ml = ml->next) {
2603 		uint64_t del_base = ml->address;
2604 		uint64_t del_end = ml->address + ml->size;
2605 		struct memlist *dyn;
2606 
2607 		for (dyn = s_mp->sbm_dyn_segs; dyn != NULL; dyn = dyn->next) {
2608 			uint64_t dyn_base = dyn->address;
2609 			uint64_t dyn_end = dyn->address + dyn->size;
2610 
2611 			if (del_base > dyn_base && del_base < dyn_end)
2612 				del_base = dyn_base;
2613 
2614 			if (del_end > dyn_base && del_end < dyn_end)
2615 				del_end = dyn_end;
2616 		}
2617 
2618 		x_ml = memlist_cat_span(x_ml, del_base, del_end - del_base);
2619 	}
2620 
2621 	memlist_delete(s_del_ml);
2622 	s_del_ml = x_ml;
2623 
2624 	for (ml = s_del_ml; ml != NULL; ml = ml->next) {
2625 		s_copy_ml = memlist_del_span(s_copy_ml, ml->address, ml->size);
2626 	}
2627 
2628 	PR_MEM("%s: source delete mlist\n", f);
2629 	PR_MEMLIST_DUMP(s_del_ml);
2630 
2631 	PR_MEM("%s: source copy mlist\n", f);
2632 	PR_MEMLIST_DUMP(s_copy_ml);
2633 
2634 	memlist_delete(t_ml);
2635 	memlist_delete(s_del_ml);
2636 
2637 	return (s_copy_ml);
2638 }
2639 
2640 /*
2641  * Scan the non-relocatable spans on the source memory
2642  * and construct a minimum mlist that includes all non-reloc
2643  * memory subject to target alignment, and dynamic segment
2644  * constraints where only whole dynamic segments may be deleted.
2645  */
2646 static struct memlist *
2647 dr_get_nonreloc_mlist(struct memlist *s_ml, dr_mem_unit_t *s_mp)
2648 {
2649 	struct memlist	*x_ml = NULL;
2650 	struct memlist	*ml;
2651 	static fn_t	f = "dr_get_nonreloc_mlist";
2652 
2653 	PR_MEM("%s: checking for split of dyn seg list:\n", f);
2654 	PR_MEMLIST_DUMP(s_mp->sbm_dyn_segs);
2655 
2656 	for (ml = s_ml; ml; ml = ml->next) {
2657 		int rv;
2658 		uint64_t nr_base, nr_end;
2659 		memquery_t mq;
2660 		struct memlist *dyn;
2661 
2662 		rv = kphysm_del_span_query(
2663 			_b64top(ml->address), _b64top(ml->size), &mq);
2664 		if (rv) {
2665 			memlist_delete(x_ml);
2666 			return (NULL);
2667 		}
2668 
2669 		if (mq.nonrelocatable == 0)
2670 			continue;
2671 
2672 		PR_MEM("%s: non-reloc span: 0x%lx, 0x%lx (%lx, %lx)\n", f,
2673 			_ptob64(mq.first_nonrelocatable),
2674 			_ptob64(mq.last_nonrelocatable),
2675 			mq.first_nonrelocatable,
2676 			mq.last_nonrelocatable);
2677 
2678 		/*
2679 		 * Align the span at both ends to allow for possible
2680 		 * cage expansion.
2681 		 */
2682 		nr_base = _ptob64(mq.first_nonrelocatable);
2683 		nr_end = _ptob64(mq.last_nonrelocatable + 1);
2684 
2685 		PR_MEM("%s: adjusted non-reloc span: 0x%lx, 0x%lx\n",
2686 			f, nr_base, nr_end);
2687 
2688 		/*
2689 		 * Expand the non-reloc span to fully include any
2690 		 * dynamic segments it intersects with.
2691 		 */
2692 		for (dyn = s_mp->sbm_dyn_segs; dyn != NULL; dyn = dyn->next) {
2693 			uint64_t dyn_base = dyn->address;
2694 			uint64_t dyn_end = dyn->address + dyn->size;
2695 
2696 			if (nr_base > dyn_base && nr_base < dyn_end)
2697 				nr_base = dyn_base;
2698 
2699 			if (nr_end > dyn_base && nr_end < dyn_end)
2700 				nr_end = dyn_end;
2701 		}
2702 
2703 		x_ml = memlist_cat_span(x_ml, nr_base, nr_end - nr_base);
2704 	}
2705 
2706 	if (x_ml == NULL) {
2707 		PR_MEM("%s: source didn't have any non-reloc pages!\n", f);
2708 		return (NULL);
2709 	}
2710 
2711 	PR_MEM("%s: %s: edited source memlist:\n", f, s_mp->sbm_cm.sbdev_path);
2712 	PR_MEMLIST_DUMP(x_ml);
2713 
2714 	return (x_ml);
2715 }
2716 
2717 /*
2718  * Check if source memlist can fit in target memlist while maintaining
2719  * relative offsets within board.
2720  */
2721 static int
2722 dr_memlist_canfit(struct memlist *s_mlist, struct memlist *t_mlist,
2723     dr_mem_unit_t *s_mp, dr_mem_unit_t *t_mp)
2724 {
2725 	int		canfit = 0;
2726 	struct memlist	*s_ml, *t_ml, *ml;
2727 	uint64_t	s_slice_mask, t_slice_mask;
2728 	static fn_t	f = "dr_mlist_canfit";
2729 
2730 	s_ml = memlist_dup(s_mlist);
2731 	t_ml = memlist_dup(t_mlist);
2732 
2733 	if (s_ml == NULL || t_ml == NULL) {
2734 		cmn_err(CE_WARN, "%s: memlist_dup failed\n", f);
2735 		goto done;
2736 	}
2737 
2738 	s_slice_mask = s_mp->sbm_slice_size - 1;
2739 	t_slice_mask = t_mp->sbm_slice_size - 1;
2740 
2741 	/*
2742 	 * Normalize to slice relative offsets.
2743 	 */
2744 	for (ml = s_ml; ml; ml = ml->next)
2745 		ml->address &= s_slice_mask;
2746 
2747 	for (ml = t_ml; ml; ml = ml->next)
2748 		ml->address &= t_slice_mask;
2749 
2750 	canfit = memlist_canfit(s_ml, t_ml);
2751 done:
2752 	memlist_delete(s_ml);
2753 	memlist_delete(t_ml);
2754 
2755 	return (canfit);
2756 }
2757 
2758 /*
2759  * Memlist support.
2760  */
2761 
2762 /*
2763  * Determine whether the source memlist (s_mlist) will
2764  * fit into the target memlist (t_mlist) in terms of
2765  * size and holes.  Assumes the caller has normalized the
2766  * memlist physical addresses for comparison.
2767  */
2768 static int
2769 memlist_canfit(struct memlist *s_mlist, struct memlist *t_mlist)
2770 {
2771 	int		rv = 0;
2772 	struct memlist	*s_ml, *t_ml;
2773 
2774 	if ((s_mlist == NULL) || (t_mlist == NULL))
2775 		return (0);
2776 
2777 	s_ml = s_mlist;
2778 	for (t_ml = t_mlist; t_ml && s_ml; t_ml = t_ml->next) {
2779 		uint64_t	s_start, s_end;
2780 		uint64_t	t_start, t_end;
2781 
2782 		t_start = t_ml->address;
2783 		t_end = t_start + t_ml->size;
2784 
2785 		for (; s_ml; s_ml = s_ml->next) {
2786 			s_start = s_ml->address;
2787 			s_end = s_start + s_ml->size;
2788 
2789 			if ((s_start < t_start) || (s_end > t_end))
2790 				break;
2791 		}
2792 	}
2793 
2794 	/*
2795 	 * If we ran out of source memlist chunks that mean
2796 	 * we found a home for all of them.
2797 	 */
2798 	if (s_ml == NULL)
2799 		rv = 1;
2800 
2801 	return (rv);
2802 }
2803