xref: /titanic_50/usr/src/uts/sun4u/opl/io/dr_mem.c (revision 56f33205c9ed776c3c909e07d52e94610a675740)
125cf1a30Sjl139090 /*
225cf1a30Sjl139090  * CDDL HEADER START
325cf1a30Sjl139090  *
425cf1a30Sjl139090  * The contents of this file are subject to the terms of the
525cf1a30Sjl139090  * Common Development and Distribution License (the "License").
625cf1a30Sjl139090  * You may not use this file except in compliance with the License.
725cf1a30Sjl139090  *
825cf1a30Sjl139090  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
925cf1a30Sjl139090  * or http://www.opensolaris.org/os/licensing.
1025cf1a30Sjl139090  * See the License for the specific language governing permissions
1125cf1a30Sjl139090  * and limitations under the License.
1225cf1a30Sjl139090  *
1325cf1a30Sjl139090  * When distributing Covered Code, include this CDDL HEADER in each
1425cf1a30Sjl139090  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1525cf1a30Sjl139090  * If applicable, add the following below this CDDL HEADER, with the
1625cf1a30Sjl139090  * fields enclosed by brackets "[]" replaced with your own identifying
1725cf1a30Sjl139090  * information: Portions Copyright [yyyy] [name of copyright owner]
1825cf1a30Sjl139090  *
1925cf1a30Sjl139090  * CDDL HEADER END
2025cf1a30Sjl139090  */
2125cf1a30Sjl139090 /*
22*56f33205SJonathan Adams  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
2325cf1a30Sjl139090  * Use is subject to license terms.
2425cf1a30Sjl139090  */
2525cf1a30Sjl139090 
2625cf1a30Sjl139090 /*
2725cf1a30Sjl139090  * DR memory support routines.
2825cf1a30Sjl139090  */
2925cf1a30Sjl139090 
3025cf1a30Sjl139090 #include <sys/note.h>
3125cf1a30Sjl139090 #include <sys/debug.h>
3225cf1a30Sjl139090 #include <sys/types.h>
3325cf1a30Sjl139090 #include <sys/errno.h>
3425cf1a30Sjl139090 #include <sys/param.h>
3525cf1a30Sjl139090 #include <sys/dditypes.h>
3625cf1a30Sjl139090 #include <sys/kmem.h>
3725cf1a30Sjl139090 #include <sys/conf.h>
3825cf1a30Sjl139090 #include <sys/ddi.h>
3925cf1a30Sjl139090 #include <sys/sunddi.h>
4025cf1a30Sjl139090 #include <sys/sunndi.h>
4125cf1a30Sjl139090 #include <sys/ddi_impldefs.h>
4225cf1a30Sjl139090 #include <sys/ndi_impldefs.h>
4325cf1a30Sjl139090 #include <sys/sysmacros.h>
4425cf1a30Sjl139090 #include <sys/machsystm.h>
4525cf1a30Sjl139090 #include <sys/spitregs.h>
4625cf1a30Sjl139090 #include <sys/cpuvar.h>
4725cf1a30Sjl139090 #include <sys/promif.h>
4825cf1a30Sjl139090 #include <vm/seg_kmem.h>
4925cf1a30Sjl139090 #include <sys/lgrp.h>
5025cf1a30Sjl139090 #include <sys/platform_module.h>
5125cf1a30Sjl139090 
5225cf1a30Sjl139090 #include <vm/page.h>
5325cf1a30Sjl139090 
5425cf1a30Sjl139090 #include <sys/dr.h>
5525cf1a30Sjl139090 #include <sys/dr_util.h>
5625cf1a30Sjl139090 #include <sys/drmach.h>
5768ac2337Sjl139090 #include <sys/kobj.h>
5825cf1a30Sjl139090 
5925cf1a30Sjl139090 extern struct memlist	*phys_install;
6068ac2337Sjl139090 extern vnode_t		*retired_pages;
6125cf1a30Sjl139090 
6225cf1a30Sjl139090 /* TODO: push this reference below drmach line */
6325cf1a30Sjl139090 extern int		kcage_on;
6425cf1a30Sjl139090 
6525cf1a30Sjl139090 /* for the DR*INTERNAL_ERROR macros.  see sys/dr.h. */
6668ac2337Sjl139090 static char *dr_ie_fmt = "dr_mem.c %d";
6725cf1a30Sjl139090 
6825cf1a30Sjl139090 typedef enum {
6925cf1a30Sjl139090 	DR_TP_INVALID = -1,
7025cf1a30Sjl139090 	DR_TP_SAME,
7125cf1a30Sjl139090 	DR_TP_LARGE,
7225cf1a30Sjl139090 	DR_TP_NONRELOC,
7325cf1a30Sjl139090 	DR_TP_FLOATING
7425cf1a30Sjl139090 } dr_target_pref_t;
7525cf1a30Sjl139090 
7625cf1a30Sjl139090 static int		dr_post_detach_mem_unit(dr_mem_unit_t *mp);
7725cf1a30Sjl139090 static int		dr_reserve_mem_spans(memhandle_t *mhp,
7825cf1a30Sjl139090 				struct memlist *mlist);
7925cf1a30Sjl139090 static int		dr_select_mem_target(dr_handle_t *hp,
8025cf1a30Sjl139090 				dr_mem_unit_t *mp, struct memlist *ml);
8125cf1a30Sjl139090 static void		dr_init_mem_unit_data(dr_mem_unit_t *mp);
8225cf1a30Sjl139090 static struct memlist	*dr_memlist_del_retired_pages(struct memlist *ml);
8325cf1a30Sjl139090 static dr_target_pref_t	dr_get_target_preference(dr_handle_t *hp,
8425cf1a30Sjl139090 				dr_mem_unit_t *t_mp, dr_mem_unit_t *s_mp,
8525cf1a30Sjl139090 				struct memlist *s_ml, struct memlist *x_ml,
8625cf1a30Sjl139090 				struct memlist *b_ml);
8725cf1a30Sjl139090 
8825cf1a30Sjl139090 static int		memlist_canfit(struct memlist *s_mlist,
8925cf1a30Sjl139090 				struct memlist *t_mlist);
9025cf1a30Sjl139090 static int		dr_del_mlist_query(struct memlist *mlist,
9125cf1a30Sjl139090 				memquery_t *mp);
9225cf1a30Sjl139090 static struct memlist	*dr_get_copy_mlist(struct memlist *s_ml,
9325cf1a30Sjl139090 				struct memlist *t_ml, dr_mem_unit_t *s_mp,
9425cf1a30Sjl139090 				dr_mem_unit_t *t_mp);
9525cf1a30Sjl139090 static struct memlist	*dr_get_nonreloc_mlist(struct memlist *s_ml,
9625cf1a30Sjl139090 				dr_mem_unit_t *s_mp);
9725cf1a30Sjl139090 static int		dr_memlist_canfit(struct memlist *s_mlist,
9825cf1a30Sjl139090 				struct memlist *t_mlist, dr_mem_unit_t *s_mp,
9925cf1a30Sjl139090 				dr_mem_unit_t *t_mp);
10025cf1a30Sjl139090 
10125cf1a30Sjl139090 /*
10225cf1a30Sjl139090  * dr_mem_unit_t.sbm_flags
10325cf1a30Sjl139090  */
10425cf1a30Sjl139090 #define	DR_MFLAG_RESERVED	0x01	/* mem unit reserved for delete */
10525cf1a30Sjl139090 #define	DR_MFLAG_SOURCE		0x02	/* source brd of copy/rename op */
10625cf1a30Sjl139090 #define	DR_MFLAG_TARGET		0x04	/* target brd of copy/rename op */
10725cf1a30Sjl139090 #define	DR_MFLAG_RELOWNER	0x20	/* memory release (delete) owner */
10825cf1a30Sjl139090 #define	DR_MFLAG_RELDONE	0x40	/* memory release (delete) done */
10925cf1a30Sjl139090 
11025cf1a30Sjl139090 /* helper macros */
11125cf1a30Sjl139090 #define	_ptob64(p) ((uint64_t)(p) << PAGESHIFT)
11225cf1a30Sjl139090 #define	_b64top(b) ((pgcnt_t)((b) >> PAGESHIFT))
11325cf1a30Sjl139090 
11425cf1a30Sjl139090 static struct memlist *
dr_get_memlist(dr_mem_unit_t * mp)11525cf1a30Sjl139090 dr_get_memlist(dr_mem_unit_t *mp)
11625cf1a30Sjl139090 {
11725cf1a30Sjl139090 	struct memlist	*mlist = NULL;
11825cf1a30Sjl139090 	sbd_error_t	*err;
11925cf1a30Sjl139090 	static fn_t	f = "dr_get_memlist";
12025cf1a30Sjl139090 
12125cf1a30Sjl139090 	PR_MEM("%s for %s...\n", f, mp->sbm_cm.sbdev_path);
12225cf1a30Sjl139090 
12325cf1a30Sjl139090 	/*
12425cf1a30Sjl139090 	 * Return cached memlist, if present.
12525cf1a30Sjl139090 	 * This memlist will be present following an
12625cf1a30Sjl139090 	 * unconfigure (a.k.a: detach) of this memunit.
12725cf1a30Sjl139090 	 * It should only be used in the case were a configure
12825cf1a30Sjl139090 	 * is bringing this memunit back in without going
12925cf1a30Sjl139090 	 * through the disconnect and connect states.
13025cf1a30Sjl139090 	 */
13125cf1a30Sjl139090 	if (mp->sbm_mlist) {
13225cf1a30Sjl139090 		PR_MEM("%s: found cached memlist\n", f);
13325cf1a30Sjl139090 
13425cf1a30Sjl139090 		mlist = memlist_dup(mp->sbm_mlist);
13525cf1a30Sjl139090 	} else {
13625cf1a30Sjl139090 		uint64_t basepa = _ptob64(mp->sbm_basepfn);
13725cf1a30Sjl139090 
13825cf1a30Sjl139090 		/* attempt to construct a memlist using phys_install */
13925cf1a30Sjl139090 
14025cf1a30Sjl139090 		/* round down to slice base address */
14125cf1a30Sjl139090 		basepa &= ~(mp->sbm_slice_size - 1);
14225cf1a30Sjl139090 
14325cf1a30Sjl139090 		/* get a copy of phys_install to edit */
14425cf1a30Sjl139090 		memlist_read_lock();
14525cf1a30Sjl139090 		mlist = memlist_dup(phys_install);
14625cf1a30Sjl139090 		memlist_read_unlock();
14725cf1a30Sjl139090 
14825cf1a30Sjl139090 		/* trim lower irrelevant span */
14925cf1a30Sjl139090 		if (mlist)
15025cf1a30Sjl139090 			mlist = memlist_del_span(mlist, 0ull, basepa);
15125cf1a30Sjl139090 
15225cf1a30Sjl139090 		/* trim upper irrelevant span */
15325cf1a30Sjl139090 		if (mlist) {
15425cf1a30Sjl139090 			uint64_t endpa;
15525cf1a30Sjl139090 
15625cf1a30Sjl139090 			basepa += mp->sbm_slice_size;
15725cf1a30Sjl139090 			endpa = _ptob64(physmax + 1);
15825cf1a30Sjl139090 			if (endpa > basepa)
15925cf1a30Sjl139090 				mlist = memlist_del_span(
16025cf1a30Sjl139090 				    mlist, basepa,
16125cf1a30Sjl139090 				    endpa - basepa);
16225cf1a30Sjl139090 		}
16325cf1a30Sjl139090 
16425cf1a30Sjl139090 		if (mlist) {
16525cf1a30Sjl139090 			/* successfully built a memlist */
16625cf1a30Sjl139090 			PR_MEM("%s: derived memlist from phys_install\n", f);
16725cf1a30Sjl139090 		}
16825cf1a30Sjl139090 
16925cf1a30Sjl139090 		/* if no mlist yet, try platform layer */
17025cf1a30Sjl139090 		if (!mlist) {
17125cf1a30Sjl139090 			err = drmach_mem_get_memlist(
17225cf1a30Sjl139090 			    mp->sbm_cm.sbdev_id, &mlist);
17325cf1a30Sjl139090 			if (err) {
17425cf1a30Sjl139090 				DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
17525cf1a30Sjl139090 				mlist = NULL; /* paranoia */
17625cf1a30Sjl139090 			}
17725cf1a30Sjl139090 		}
17825cf1a30Sjl139090 	}
17925cf1a30Sjl139090 
18025cf1a30Sjl139090 	PR_MEM("%s: memlist for %s\n", f, mp->sbm_cm.sbdev_path);
18125cf1a30Sjl139090 	PR_MEMLIST_DUMP(mlist);
18225cf1a30Sjl139090 
18325cf1a30Sjl139090 	return (mlist);
18425cf1a30Sjl139090 }
18525cf1a30Sjl139090 
18625cf1a30Sjl139090 typedef struct {
18725cf1a30Sjl139090 	kcondvar_t cond;
18825cf1a30Sjl139090 	kmutex_t lock;
18925cf1a30Sjl139090 	int error;
19025cf1a30Sjl139090 	int done;
19125cf1a30Sjl139090 } dr_release_mem_sync_t;
19225cf1a30Sjl139090 
19325cf1a30Sjl139090 /*
19425cf1a30Sjl139090  * Memory has been logically removed by the time this routine is called.
19525cf1a30Sjl139090  */
19625cf1a30Sjl139090 static void
dr_mem_del_done(void * arg,int error)19725cf1a30Sjl139090 dr_mem_del_done(void *arg, int error)
19825cf1a30Sjl139090 {
19925cf1a30Sjl139090 	dr_release_mem_sync_t *ds = arg;
20025cf1a30Sjl139090 
20125cf1a30Sjl139090 	mutex_enter(&ds->lock);
20225cf1a30Sjl139090 	ds->error = error;
20325cf1a30Sjl139090 	ds->done = 1;
20425cf1a30Sjl139090 	cv_signal(&ds->cond);
20525cf1a30Sjl139090 	mutex_exit(&ds->lock);
20625cf1a30Sjl139090 }
20725cf1a30Sjl139090 
20825cf1a30Sjl139090 /*
20925cf1a30Sjl139090  * When we reach here the memory being drained should have
21025cf1a30Sjl139090  * already been reserved in dr_pre_release_mem().
21125cf1a30Sjl139090  * Our only task here is to kick off the "drain" and wait
21225cf1a30Sjl139090  * for it to finish.
21325cf1a30Sjl139090  */
21425cf1a30Sjl139090 void
dr_release_mem(dr_common_unit_t * cp)21525cf1a30Sjl139090 dr_release_mem(dr_common_unit_t *cp)
21625cf1a30Sjl139090 {
21725cf1a30Sjl139090 	dr_mem_unit_t	*mp = (dr_mem_unit_t *)cp;
21825cf1a30Sjl139090 	int		err;
21925cf1a30Sjl139090 	dr_release_mem_sync_t rms;
22025cf1a30Sjl139090 	static fn_t	f = "dr_release_mem";
22125cf1a30Sjl139090 
22225cf1a30Sjl139090 	/* check that this memory unit has been reserved */
22325cf1a30Sjl139090 	if (!(mp->sbm_flags & DR_MFLAG_RELOWNER)) {
22425cf1a30Sjl139090 		DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
22525cf1a30Sjl139090 		return;
22625cf1a30Sjl139090 	}
22725cf1a30Sjl139090 
22825cf1a30Sjl139090 	bzero((void *) &rms, sizeof (rms));
22925cf1a30Sjl139090 
23025cf1a30Sjl139090 	mutex_init(&rms.lock, NULL, MUTEX_DRIVER, NULL);
23125cf1a30Sjl139090 	cv_init(&rms.cond, NULL, CV_DRIVER, NULL);
23225cf1a30Sjl139090 
23325cf1a30Sjl139090 	mutex_enter(&rms.lock);
23425cf1a30Sjl139090 	err = kphysm_del_start(mp->sbm_memhandle,
23525cf1a30Sjl139090 	    dr_mem_del_done, (void *) &rms);
23625cf1a30Sjl139090 	if (err == KPHYSM_OK) {
23725cf1a30Sjl139090 		/* wait for completion or interrupt */
23825cf1a30Sjl139090 		while (!rms.done) {
23925cf1a30Sjl139090 			if (cv_wait_sig(&rms.cond, &rms.lock) == 0) {
24025cf1a30Sjl139090 				/* then there is a pending UNIX signal */
24125cf1a30Sjl139090 				(void) kphysm_del_cancel(mp->sbm_memhandle);
24225cf1a30Sjl139090 
24325cf1a30Sjl139090 				/* wait for completion */
24425cf1a30Sjl139090 				while (!rms.done)
24525cf1a30Sjl139090 					cv_wait(&rms.cond, &rms.lock);
24625cf1a30Sjl139090 			}
24725cf1a30Sjl139090 		}
24825cf1a30Sjl139090 		/* get the result of the memory delete operation */
24925cf1a30Sjl139090 		err = rms.error;
25025cf1a30Sjl139090 	}
25125cf1a30Sjl139090 	mutex_exit(&rms.lock);
25225cf1a30Sjl139090 
25325cf1a30Sjl139090 	cv_destroy(&rms.cond);
25425cf1a30Sjl139090 	mutex_destroy(&rms.lock);
25525cf1a30Sjl139090 
25625cf1a30Sjl139090 	if (err != KPHYSM_OK) {
25725cf1a30Sjl139090 		int e_code;
25825cf1a30Sjl139090 
25925cf1a30Sjl139090 		switch (err) {
26025cf1a30Sjl139090 			case KPHYSM_ENOWORK:
26125cf1a30Sjl139090 				e_code = ESBD_NOERROR;
26225cf1a30Sjl139090 				break;
26325cf1a30Sjl139090 
26425cf1a30Sjl139090 			case KPHYSM_EHANDLE:
26525cf1a30Sjl139090 			case KPHYSM_ESEQUENCE:
26625cf1a30Sjl139090 				e_code = ESBD_INTERNAL;
26725cf1a30Sjl139090 				break;
26825cf1a30Sjl139090 
26925cf1a30Sjl139090 			case KPHYSM_ENOTVIABLE:
27025cf1a30Sjl139090 				e_code = ESBD_MEM_NOTVIABLE;
27125cf1a30Sjl139090 				break;
27225cf1a30Sjl139090 
27325cf1a30Sjl139090 			case KPHYSM_EREFUSED:
27425cf1a30Sjl139090 				e_code = ESBD_MEM_REFUSED;
27525cf1a30Sjl139090 				break;
27625cf1a30Sjl139090 
27725cf1a30Sjl139090 			case KPHYSM_ENONRELOC:
27825cf1a30Sjl139090 				e_code = ESBD_MEM_NONRELOC;
27925cf1a30Sjl139090 				break;
28025cf1a30Sjl139090 
28125cf1a30Sjl139090 			case KPHYSM_ECANCELLED:
28225cf1a30Sjl139090 				e_code = ESBD_MEM_CANCELLED;
28325cf1a30Sjl139090 				break;
28425cf1a30Sjl139090 
28525cf1a30Sjl139090 			case KPHYSM_ERESOURCE:
28625cf1a30Sjl139090 				e_code = ESBD_MEMFAIL;
28725cf1a30Sjl139090 				break;
28825cf1a30Sjl139090 
28925cf1a30Sjl139090 			default:
29025cf1a30Sjl139090 				cmn_err(CE_WARN,
29125cf1a30Sjl139090 				    "%s: unexpected kphysm error code %d,"
29225cf1a30Sjl139090 				    " id 0x%p",
29325cf1a30Sjl139090 				    f, err, mp->sbm_cm.sbdev_id);
29425cf1a30Sjl139090 
29525cf1a30Sjl139090 				e_code = ESBD_IO;
29625cf1a30Sjl139090 				break;
29725cf1a30Sjl139090 		}
29825cf1a30Sjl139090 
29925cf1a30Sjl139090 		if (e_code != ESBD_NOERROR) {
300b307f191Sbm42561 			dr_dev_err(CE_WARN, &mp->sbm_cm, e_code);
30125cf1a30Sjl139090 		}
30225cf1a30Sjl139090 	}
30325cf1a30Sjl139090 }
30425cf1a30Sjl139090 
30525cf1a30Sjl139090 void
dr_attach_mem(dr_handle_t * hp,dr_common_unit_t * cp)30625cf1a30Sjl139090 dr_attach_mem(dr_handle_t *hp, dr_common_unit_t *cp)
30725cf1a30Sjl139090 {
30825cf1a30Sjl139090 	_NOTE(ARGUNUSED(hp))
30925cf1a30Sjl139090 
31025cf1a30Sjl139090 	dr_mem_unit_t	*mp = (dr_mem_unit_t *)cp;
31125cf1a30Sjl139090 	struct memlist	*ml, *mc;
31225cf1a30Sjl139090 	sbd_error_t	*err;
31325cf1a30Sjl139090 	static fn_t	f = "dr_attach_mem";
31425cf1a30Sjl139090 
31525cf1a30Sjl139090 	PR_MEM("%s...\n", f);
31625cf1a30Sjl139090 
31725cf1a30Sjl139090 	dr_lock_status(hp->h_bd);
31825cf1a30Sjl139090 	err = drmach_configure(cp->sbdev_id, 0);
31925cf1a30Sjl139090 	dr_unlock_status(hp->h_bd);
32025cf1a30Sjl139090 	if (err) {
32125cf1a30Sjl139090 		DRERR_SET_C(&cp->sbdev_error, &err);
32225cf1a30Sjl139090 		return;
32325cf1a30Sjl139090 	}
32425cf1a30Sjl139090 
32525cf1a30Sjl139090 	ml = dr_get_memlist(mp);
326*56f33205SJonathan Adams 	for (mc = ml; mc; mc = mc->ml_next) {
32725cf1a30Sjl139090 		int		 rv;
32825cf1a30Sjl139090 		sbd_error_t	*err;
32925cf1a30Sjl139090 
33025cf1a30Sjl139090 		rv = kphysm_add_memory_dynamic(
331*56f33205SJonathan Adams 		    (pfn_t)(mc->ml_address >> PAGESHIFT),
332*56f33205SJonathan Adams 		    (pgcnt_t)(mc->ml_size >> PAGESHIFT));
33325cf1a30Sjl139090 		if (rv != KPHYSM_OK) {
33425cf1a30Sjl139090 			/*
33525cf1a30Sjl139090 			 * translate kphysm error and
33625cf1a30Sjl139090 			 * store in devlist error
33725cf1a30Sjl139090 			 */
33825cf1a30Sjl139090 			switch (rv) {
33925cf1a30Sjl139090 			case KPHYSM_ERESOURCE:
34025cf1a30Sjl139090 				rv = ESBD_NOMEM;
34125cf1a30Sjl139090 				break;
34225cf1a30Sjl139090 
34325cf1a30Sjl139090 			case KPHYSM_EFAULT:
34425cf1a30Sjl139090 				rv = ESBD_FAULT;
34525cf1a30Sjl139090 				break;
34625cf1a30Sjl139090 
34725cf1a30Sjl139090 			default:
34825cf1a30Sjl139090 				rv = ESBD_INTERNAL;
34925cf1a30Sjl139090 				break;
35025cf1a30Sjl139090 			}
35125cf1a30Sjl139090 
35225cf1a30Sjl139090 			if (rv == ESBD_INTERNAL) {
35325cf1a30Sjl139090 				DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
35425cf1a30Sjl139090 			} else
35525cf1a30Sjl139090 				dr_dev_err(CE_WARN, &mp->sbm_cm, rv);
35625cf1a30Sjl139090 			break;
35725cf1a30Sjl139090 		}
35825cf1a30Sjl139090 
35925cf1a30Sjl139090 		err = drmach_mem_add_span(
360*56f33205SJonathan Adams 		    mp->sbm_cm.sbdev_id, mc->ml_address, mc->ml_size);
36125cf1a30Sjl139090 		if (err) {
36225cf1a30Sjl139090 			DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
36325cf1a30Sjl139090 			break;
36425cf1a30Sjl139090 		}
36525cf1a30Sjl139090 	}
36625cf1a30Sjl139090 
36725cf1a30Sjl139090 	memlist_delete(ml);
36825cf1a30Sjl139090 
36925cf1a30Sjl139090 	/* back out if configure failed */
37025cf1a30Sjl139090 	if (mp->sbm_cm.sbdev_error != NULL) {
37125cf1a30Sjl139090 		dr_lock_status(hp->h_bd);
37225cf1a30Sjl139090 		err = drmach_unconfigure(cp->sbdev_id, 0);
37325cf1a30Sjl139090 		if (err)
37425cf1a30Sjl139090 			sbd_err_clear(&err);
37525cf1a30Sjl139090 		dr_unlock_status(hp->h_bd);
37625cf1a30Sjl139090 	}
37725cf1a30Sjl139090 }
37825cf1a30Sjl139090 
37925cf1a30Sjl139090 static struct memlist *
dr_memlist_del_retired_pages(struct memlist * mlist)38025cf1a30Sjl139090 dr_memlist_del_retired_pages(struct memlist *mlist)
38125cf1a30Sjl139090 {
38225cf1a30Sjl139090 	page_t		*pp;
38325cf1a30Sjl139090 	pfn_t		pfn;
38425cf1a30Sjl139090 	kmutex_t	*vphm;
38568ac2337Sjl139090 	vnode_t		*vp = retired_pages;
38625cf1a30Sjl139090 	static fn_t	f = "dr_memlist_del_retired_pages";
38725cf1a30Sjl139090 
38825cf1a30Sjl139090 	vphm = page_vnode_mutex(vp);
38925cf1a30Sjl139090 	mutex_enter(vphm);
39025cf1a30Sjl139090 
39125cf1a30Sjl139090 	PR_MEM("%s\n", f);
39225cf1a30Sjl139090 
39325cf1a30Sjl139090 	if ((pp = vp->v_pages) == NULL) {
39425cf1a30Sjl139090 		mutex_exit(vphm);
39525cf1a30Sjl139090 		return (mlist);
39625cf1a30Sjl139090 	}
39725cf1a30Sjl139090 
39825cf1a30Sjl139090 	do {
39925cf1a30Sjl139090 		ASSERT(pp != NULL);
40068ac2337Sjl139090 		ASSERT(pp->p_vnode == retired_pages);
40125cf1a30Sjl139090 
4020c48eb93Sbm42561 		if (!page_try_reclaim_lock(pp, SE_SHARED, SE_RETIRED))
40325cf1a30Sjl139090 			continue;
40425cf1a30Sjl139090 
40525cf1a30Sjl139090 		pfn = page_pptonum(pp);
40625cf1a30Sjl139090 
40725cf1a30Sjl139090 		/*
40825cf1a30Sjl139090 		 * Page retirement currently breaks large pages into PAGESIZE
40925cf1a30Sjl139090 		 * pages. If this changes, need to remove the assert and deal
41025cf1a30Sjl139090 		 * with different page sizes.
41125cf1a30Sjl139090 		 */
41225cf1a30Sjl139090 		ASSERT(pp->p_szc == 0);
41325cf1a30Sjl139090 
41425cf1a30Sjl139090 		if (address_in_memlist(mlist, ptob(pfn), PAGESIZE)) {
41525cf1a30Sjl139090 			mlist = memlist_del_span(mlist, ptob(pfn), PAGESIZE);
41625cf1a30Sjl139090 			PR_MEM("deleted retired page 0x%lx (pfn 0x%lx) "
41725cf1a30Sjl139090 			    "from memlist\n", ptob(pfn), pfn);
41825cf1a30Sjl139090 		}
41925cf1a30Sjl139090 
42025cf1a30Sjl139090 		page_unlock(pp);
42125cf1a30Sjl139090 	} while ((pp = pp->p_vpnext) != vp->v_pages);
42225cf1a30Sjl139090 
42325cf1a30Sjl139090 	mutex_exit(vphm);
42425cf1a30Sjl139090 
42525cf1a30Sjl139090 	return (mlist);
42625cf1a30Sjl139090 }
42725cf1a30Sjl139090 
42825cf1a30Sjl139090 static int
dr_move_memory(dr_handle_t * hp,dr_mem_unit_t * s_mp,dr_mem_unit_t * t_mp)42925cf1a30Sjl139090 dr_move_memory(dr_handle_t *hp, dr_mem_unit_t *s_mp, dr_mem_unit_t *t_mp)
43025cf1a30Sjl139090 {
43125cf1a30Sjl139090 	int		rv = -1;
43225cf1a30Sjl139090 	time_t		 copytime;
43325cf1a30Sjl139090 	drmachid_t	 cr_id;
43425cf1a30Sjl139090 	dr_sr_handle_t	*srhp = NULL;
43525cf1a30Sjl139090 	dr_board_t	*t_bp, *s_bp;
43625cf1a30Sjl139090 	struct memlist	*c_ml, *d_ml;
43725cf1a30Sjl139090 	sbd_error_t	*err;
43825cf1a30Sjl139090 	static fn_t	 f = "dr_move_memory";
43925cf1a30Sjl139090 
44025cf1a30Sjl139090 	PR_MEM("%s: (INLINE) moving memory from %s to %s\n",
44125cf1a30Sjl139090 	    f,
44225cf1a30Sjl139090 	    s_mp->sbm_cm.sbdev_path,
44325cf1a30Sjl139090 	    t_mp->sbm_cm.sbdev_path);
44425cf1a30Sjl139090 
44525cf1a30Sjl139090 	ASSERT(s_mp->sbm_flags & DR_MFLAG_SOURCE);
44625cf1a30Sjl139090 	ASSERT(s_mp->sbm_peer == t_mp);
44725cf1a30Sjl139090 	ASSERT(s_mp->sbm_mlist);
44825cf1a30Sjl139090 
44925cf1a30Sjl139090 	ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET);
45025cf1a30Sjl139090 	ASSERT(t_mp->sbm_peer == s_mp);
45125cf1a30Sjl139090 
45225cf1a30Sjl139090 	/*
45325cf1a30Sjl139090 	 * create a memlist of spans to copy by removing
45425cf1a30Sjl139090 	 * the spans that have been deleted, if any, from
45525cf1a30Sjl139090 	 * the full source board memlist.  s_mp->sbm_del_mlist
45625cf1a30Sjl139090 	 * will be NULL if there were no spans deleted from
45725cf1a30Sjl139090 	 * the source board.
45825cf1a30Sjl139090 	 */
45925cf1a30Sjl139090 	c_ml = memlist_dup(s_mp->sbm_mlist);
46025cf1a30Sjl139090 	d_ml = s_mp->sbm_del_mlist;
46125cf1a30Sjl139090 	while (d_ml != NULL) {
462*56f33205SJonathan Adams 		c_ml = memlist_del_span(c_ml, d_ml->ml_address, d_ml->ml_size);
463*56f33205SJonathan Adams 		d_ml = d_ml->ml_next;
46425cf1a30Sjl139090 	}
46525cf1a30Sjl139090 
46625cf1a30Sjl139090 	/*
46725cf1a30Sjl139090 	 * Remove retired pages from the copy list. The page content
46825cf1a30Sjl139090 	 * need not be copied since the pages are no longer in use.
46925cf1a30Sjl139090 	 */
47025cf1a30Sjl139090 	PR_MEM("%s: copy list before removing retired pages (if any):\n", f);
47125cf1a30Sjl139090 	PR_MEMLIST_DUMP(c_ml);
47225cf1a30Sjl139090 
47325cf1a30Sjl139090 	c_ml = dr_memlist_del_retired_pages(c_ml);
47425cf1a30Sjl139090 
47525cf1a30Sjl139090 	PR_MEM("%s: copy list after removing retired pages:\n", f);
47625cf1a30Sjl139090 	PR_MEMLIST_DUMP(c_ml);
47725cf1a30Sjl139090 
47825cf1a30Sjl139090 	/*
47925cf1a30Sjl139090 	 * With parallel copy, it shouldn't make a difference which
48025cf1a30Sjl139090 	 * CPU is the actual master during copy-rename since all
48125cf1a30Sjl139090 	 * CPUs participate in the parallel copy anyway.
48225cf1a30Sjl139090 	 */
48325cf1a30Sjl139090 	affinity_set(CPU_CURRENT);
48425cf1a30Sjl139090 
48525cf1a30Sjl139090 	err = drmach_copy_rename_init(
48625cf1a30Sjl139090 	    t_mp->sbm_cm.sbdev_id, s_mp->sbm_cm.sbdev_id, c_ml, &cr_id);
48725cf1a30Sjl139090 	if (err) {
48825cf1a30Sjl139090 		DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
48925cf1a30Sjl139090 		affinity_clear();
49025cf1a30Sjl139090 		memlist_delete(c_ml);
49125cf1a30Sjl139090 		return (-1);
49225cf1a30Sjl139090 	}
49325cf1a30Sjl139090 
49425cf1a30Sjl139090 	srhp = dr_get_sr_handle(hp);
49525cf1a30Sjl139090 	ASSERT(srhp);
49625cf1a30Sjl139090 
497d3d50737SRafael Vanoni 	copytime = ddi_get_lbolt();
49825cf1a30Sjl139090 
49925cf1a30Sjl139090 	/* Quiesce the OS.  */
50025cf1a30Sjl139090 	if (dr_suspend(srhp)) {
50125cf1a30Sjl139090 		cmn_err(CE_WARN, "%s: failed to quiesce OS"
50225cf1a30Sjl139090 		    " for copy-rename", f);
50325cf1a30Sjl139090 
50425cf1a30Sjl139090 		err = drmach_copy_rename_fini(cr_id);
50525cf1a30Sjl139090 		if (err) {
50625cf1a30Sjl139090 			/*
50725cf1a30Sjl139090 			 * no error is expected since the program has
50825cf1a30Sjl139090 			 * not yet run.
50925cf1a30Sjl139090 			 */
51025cf1a30Sjl139090 
51125cf1a30Sjl139090 			/* catch this in debug kernels */
51225cf1a30Sjl139090 			ASSERT(0);
51325cf1a30Sjl139090 
51425cf1a30Sjl139090 			sbd_err_clear(&err);
51525cf1a30Sjl139090 		}
51625cf1a30Sjl139090 
51725cf1a30Sjl139090 		/* suspend error reached via hp */
51825cf1a30Sjl139090 		s_mp->sbm_cm.sbdev_error = hp->h_err;
51925cf1a30Sjl139090 		hp->h_err = NULL;
52025cf1a30Sjl139090 		goto done;
52125cf1a30Sjl139090 	}
52225cf1a30Sjl139090 
52325cf1a30Sjl139090 	drmach_copy_rename(cr_id);
52425cf1a30Sjl139090 
52525cf1a30Sjl139090 	/* Resume the OS.  */
52625cf1a30Sjl139090 	dr_resume(srhp);
52725cf1a30Sjl139090 
528d3d50737SRafael Vanoni 	copytime = ddi_get_lbolt() - copytime;
52925cf1a30Sjl139090 
53025cf1a30Sjl139090 	if (err = drmach_copy_rename_fini(cr_id))
53125cf1a30Sjl139090 		goto done;
53225cf1a30Sjl139090 
53325cf1a30Sjl139090 	/*
53425cf1a30Sjl139090 	 * Rename memory for lgroup.
53525cf1a30Sjl139090 	 * Source and target board numbers are packaged in arg.
53625cf1a30Sjl139090 	 */
53725cf1a30Sjl139090 	s_bp = s_mp->sbm_cm.sbdev_bp;
53825cf1a30Sjl139090 	t_bp = t_mp->sbm_cm.sbdev_bp;
53925cf1a30Sjl139090 
54025cf1a30Sjl139090 	lgrp_plat_config(LGRP_CONFIG_MEM_RENAME,
54125cf1a30Sjl139090 	    (uintptr_t)(s_bp->b_num | (t_bp->b_num << 16)));
54225cf1a30Sjl139090 
54325cf1a30Sjl139090 
54425cf1a30Sjl139090 	PR_MEM("%s: copy-rename elapsed time = %ld ticks (%ld secs)\n",
54525cf1a30Sjl139090 	    f, copytime, copytime / hz);
54625cf1a30Sjl139090 
54725cf1a30Sjl139090 	rv = 0;
54825cf1a30Sjl139090 done:
54925cf1a30Sjl139090 	if (srhp)
55025cf1a30Sjl139090 		dr_release_sr_handle(srhp);
55125cf1a30Sjl139090 	if (err)
55225cf1a30Sjl139090 		DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
55325cf1a30Sjl139090 	affinity_clear();
55425cf1a30Sjl139090 
55525cf1a30Sjl139090 	return (rv);
55625cf1a30Sjl139090 }
55725cf1a30Sjl139090 
55825cf1a30Sjl139090 /*
55925cf1a30Sjl139090  * If detaching node contains memory that is "non-permanent"
56025cf1a30Sjl139090  * then the memory adr's are simply cleared.  If the memory
56125cf1a30Sjl139090  * is non-relocatable, then do a copy-rename.
56225cf1a30Sjl139090  */
56325cf1a30Sjl139090 void
dr_detach_mem(dr_handle_t * hp,dr_common_unit_t * cp)56425cf1a30Sjl139090 dr_detach_mem(dr_handle_t *hp, dr_common_unit_t *cp)
56525cf1a30Sjl139090 {
56625cf1a30Sjl139090 	int			rv = 0;
56725cf1a30Sjl139090 	dr_mem_unit_t		*s_mp = (dr_mem_unit_t *)cp;
56825cf1a30Sjl139090 	dr_mem_unit_t		*t_mp;
56925cf1a30Sjl139090 	dr_state_t		state;
57025cf1a30Sjl139090 	static fn_t		f = "dr_detach_mem";
57125cf1a30Sjl139090 
57225cf1a30Sjl139090 	PR_MEM("%s...\n", f);
57325cf1a30Sjl139090 
57425cf1a30Sjl139090 	/* lookup target mem unit and target board structure, if any */
57525cf1a30Sjl139090 	if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
57625cf1a30Sjl139090 		t_mp = s_mp->sbm_peer;
57725cf1a30Sjl139090 		ASSERT(t_mp != NULL);
57825cf1a30Sjl139090 		ASSERT(t_mp->sbm_peer == s_mp);
57925cf1a30Sjl139090 	} else {
58025cf1a30Sjl139090 		t_mp = NULL;
58125cf1a30Sjl139090 	}
58225cf1a30Sjl139090 
58325cf1a30Sjl139090 	/* verify mem unit's state is UNREFERENCED */
58425cf1a30Sjl139090 	state = s_mp->sbm_cm.sbdev_state;
58525cf1a30Sjl139090 	if (state != DR_STATE_UNREFERENCED) {
58625cf1a30Sjl139090 		dr_dev_err(CE_IGNORE, &s_mp->sbm_cm, ESBD_STATE);
58725cf1a30Sjl139090 		return;
58825cf1a30Sjl139090 	}
58925cf1a30Sjl139090 
59025cf1a30Sjl139090 	/* verify target mem unit's state is UNREFERENCED, if any */
59125cf1a30Sjl139090 	if (t_mp != NULL) {
59225cf1a30Sjl139090 		state = t_mp->sbm_cm.sbdev_state;
59325cf1a30Sjl139090 		if (state != DR_STATE_UNREFERENCED) {
59425cf1a30Sjl139090 			dr_dev_err(CE_IGNORE, &t_mp->sbm_cm, ESBD_STATE);
59525cf1a30Sjl139090 			return;
59625cf1a30Sjl139090 		}
59725cf1a30Sjl139090 	}
59825cf1a30Sjl139090 
59925cf1a30Sjl139090 	/*
60025cf1a30Sjl139090 	 * If there is no target board (no copy/rename was needed), then
60125cf1a30Sjl139090 	 * we're done!
60225cf1a30Sjl139090 	 */
60325cf1a30Sjl139090 	if (t_mp == NULL) {
60425cf1a30Sjl139090 		sbd_error_t *err;
60525cf1a30Sjl139090 		/*
60625cf1a30Sjl139090 		 * Reprogram interconnect hardware and disable
60725cf1a30Sjl139090 		 * memory controllers for memory node that's going away.
60825cf1a30Sjl139090 		 */
60925cf1a30Sjl139090 
61025cf1a30Sjl139090 		err = drmach_mem_disable(s_mp->sbm_cm.sbdev_id);
61125cf1a30Sjl139090 		if (err) {
61225cf1a30Sjl139090 			DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
61325cf1a30Sjl139090 			rv = -1;
61425cf1a30Sjl139090 		}
61525cf1a30Sjl139090 	} else {
61625cf1a30Sjl139090 		rv = dr_move_memory(hp, s_mp, t_mp);
61725cf1a30Sjl139090 		PR_MEM("%s: %s memory COPY-RENAME (board %d -> %d)\n",
61825cf1a30Sjl139090 		    f,
61925cf1a30Sjl139090 		    rv ? "FAILED" : "COMPLETED",
62025cf1a30Sjl139090 		    s_mp->sbm_cm.sbdev_bp->b_num,
62125cf1a30Sjl139090 		    t_mp->sbm_cm.sbdev_bp->b_num);
62225cf1a30Sjl139090 
62325cf1a30Sjl139090 		if (rv != 0)
62425cf1a30Sjl139090 			(void) dr_cancel_mem(s_mp);
62525cf1a30Sjl139090 	}
62625cf1a30Sjl139090 
62725cf1a30Sjl139090 	if (rv == 0) {
62825cf1a30Sjl139090 		sbd_error_t *err;
62925cf1a30Sjl139090 
63025cf1a30Sjl139090 		dr_lock_status(hp->h_bd);
63125cf1a30Sjl139090 		err = drmach_unconfigure(s_mp->sbm_cm.sbdev_id, 0);
63225cf1a30Sjl139090 		dr_unlock_status(hp->h_bd);
63325cf1a30Sjl139090 		if (err)
63425cf1a30Sjl139090 			sbd_err_clear(&err);
63525cf1a30Sjl139090 	}
63625cf1a30Sjl139090 }
63725cf1a30Sjl139090 
63825cf1a30Sjl139090 /*
63925cf1a30Sjl139090  * This routine acts as a wrapper for kphysm_del_span_query in order to
64025cf1a30Sjl139090  * support potential memory holes in a board's physical address space.
64125cf1a30Sjl139090  * It calls kphysm_del_span_query for each node in a memlist and accumulates
64225cf1a30Sjl139090  * the results in *mp.
64325cf1a30Sjl139090  */
64425cf1a30Sjl139090 static int
dr_del_mlist_query(struct memlist * mlist,memquery_t * mp)64525cf1a30Sjl139090 dr_del_mlist_query(struct memlist *mlist, memquery_t *mp)
64625cf1a30Sjl139090 {
64725cf1a30Sjl139090 	struct memlist	*ml;
64825cf1a30Sjl139090 	int		 rv = 0;
64925cf1a30Sjl139090 
65025cf1a30Sjl139090 
65125cf1a30Sjl139090 	if (mlist == NULL)
65225cf1a30Sjl139090 		cmn_err(CE_WARN, "dr_del_mlist_query: mlist=NULL\n");
65325cf1a30Sjl139090 
65425cf1a30Sjl139090 	mp->phys_pages = 0;
65525cf1a30Sjl139090 	mp->managed = 0;
65625cf1a30Sjl139090 	mp->nonrelocatable = 0;
65725cf1a30Sjl139090 	mp->first_nonrelocatable = (pfn_t)-1;	/* XXX */
65825cf1a30Sjl139090 	mp->last_nonrelocatable = 0;
65925cf1a30Sjl139090 
660*56f33205SJonathan Adams 	for (ml = mlist; ml; ml = ml->ml_next) {
66125cf1a30Sjl139090 		memquery_t mq;
66225cf1a30Sjl139090 
66325cf1a30Sjl139090 		rv = kphysm_del_span_query(
664*56f33205SJonathan Adams 		    _b64top(ml->ml_address), _b64top(ml->ml_size), &mq);
66525cf1a30Sjl139090 		if (rv)
66625cf1a30Sjl139090 			break;
66725cf1a30Sjl139090 
66825cf1a30Sjl139090 		mp->phys_pages += mq.phys_pages;
66925cf1a30Sjl139090 		mp->managed += mq.managed;
67025cf1a30Sjl139090 		mp->nonrelocatable += mq.nonrelocatable;
67125cf1a30Sjl139090 
67225cf1a30Sjl139090 		if (mq.nonrelocatable != 0) {
67325cf1a30Sjl139090 			if (mq.first_nonrelocatable < mp->first_nonrelocatable)
67425cf1a30Sjl139090 				mp->first_nonrelocatable =
67525cf1a30Sjl139090 				    mq.first_nonrelocatable;
67625cf1a30Sjl139090 			if (mq.last_nonrelocatable > mp->last_nonrelocatable)
67725cf1a30Sjl139090 				mp->last_nonrelocatable =
67825cf1a30Sjl139090 				    mq.last_nonrelocatable;
67925cf1a30Sjl139090 		}
68025cf1a30Sjl139090 	}
68125cf1a30Sjl139090 
68225cf1a30Sjl139090 	if (mp->nonrelocatable == 0)
68325cf1a30Sjl139090 		mp->first_nonrelocatable = 0;	/* XXX */
68425cf1a30Sjl139090 
68525cf1a30Sjl139090 	return (rv);
68625cf1a30Sjl139090 }
68725cf1a30Sjl139090 
68825cf1a30Sjl139090 /*
68925cf1a30Sjl139090  * NOTE: This routine is only partially smart about multiple
69025cf1a30Sjl139090  *	 mem-units.  Need to make mem-status structure smart
69125cf1a30Sjl139090  *	 about them also.
69225cf1a30Sjl139090  */
69325cf1a30Sjl139090 int
dr_mem_status(dr_handle_t * hp,dr_devset_t devset,sbd_dev_stat_t * dsp)69425cf1a30Sjl139090 dr_mem_status(dr_handle_t *hp, dr_devset_t devset, sbd_dev_stat_t *dsp)
69525cf1a30Sjl139090 {
69625cf1a30Sjl139090 	int		m, mix;
69725cf1a30Sjl139090 	memdelstat_t	mdst;
69825cf1a30Sjl139090 	memquery_t	mq;
69925cf1a30Sjl139090 	dr_board_t	*bp;
70025cf1a30Sjl139090 	dr_mem_unit_t	*mp;
70125cf1a30Sjl139090 	sbd_mem_stat_t	*msp;
70225cf1a30Sjl139090 	static fn_t	f = "dr_mem_status";
70325cf1a30Sjl139090 
70425cf1a30Sjl139090 	bp = hp->h_bd;
70525cf1a30Sjl139090 	devset &= DR_DEVS_PRESENT(bp);
70625cf1a30Sjl139090 
70725cf1a30Sjl139090 	for (m = mix = 0; m < MAX_MEM_UNITS_PER_BOARD; m++) {
70825cf1a30Sjl139090 		int		rv;
70925cf1a30Sjl139090 		sbd_error_t	*err;
71025cf1a30Sjl139090 		drmach_status_t	 pstat;
71125cf1a30Sjl139090 		dr_mem_unit_t	*p_mp;
71225cf1a30Sjl139090 
71325cf1a30Sjl139090 		if (DEVSET_IN_SET(devset, SBD_COMP_MEM, m) == 0)
71425cf1a30Sjl139090 			continue;
71525cf1a30Sjl139090 
71625cf1a30Sjl139090 		mp = dr_get_mem_unit(bp, m);
71725cf1a30Sjl139090 
71825cf1a30Sjl139090 		if (mp->sbm_cm.sbdev_state == DR_STATE_EMPTY) {
71925cf1a30Sjl139090 			/* present, but not fully initialized */
72025cf1a30Sjl139090 			continue;
72125cf1a30Sjl139090 		}
72225cf1a30Sjl139090 
72325cf1a30Sjl139090 		if (mp->sbm_cm.sbdev_id == (drmachid_t)0)
72425cf1a30Sjl139090 			continue;
72525cf1a30Sjl139090 
72625cf1a30Sjl139090 		/* fetch platform status */
72725cf1a30Sjl139090 		err = drmach_status(mp->sbm_cm.sbdev_id, &pstat);
72825cf1a30Sjl139090 		if (err) {
72925cf1a30Sjl139090 			DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
73025cf1a30Sjl139090 			continue;
73125cf1a30Sjl139090 		}
73225cf1a30Sjl139090 
73325cf1a30Sjl139090 		msp = &dsp->d_mem;
73425cf1a30Sjl139090 		bzero((caddr_t)msp, sizeof (*msp));
73525cf1a30Sjl139090 
73607d06da5SSurya Prakki 		(void) strncpy(msp->ms_cm.c_id.c_name, pstat.type,
73725cf1a30Sjl139090 		    sizeof (msp->ms_cm.c_id.c_name));
73825cf1a30Sjl139090 		msp->ms_cm.c_id.c_type = mp->sbm_cm.sbdev_type;
73925cf1a30Sjl139090 		msp->ms_cm.c_id.c_unit = SBD_NULL_UNIT;
74025cf1a30Sjl139090 		msp->ms_cm.c_cond = mp->sbm_cm.sbdev_cond;
74125cf1a30Sjl139090 		msp->ms_cm.c_busy = mp->sbm_cm.sbdev_busy | pstat.busy;
74225cf1a30Sjl139090 		msp->ms_cm.c_time = mp->sbm_cm.sbdev_time;
74325cf1a30Sjl139090 		msp->ms_cm.c_ostate = mp->sbm_cm.sbdev_ostate;
74425cf1a30Sjl139090 
74525cf1a30Sjl139090 		msp->ms_totpages = mp->sbm_npages;
74625cf1a30Sjl139090 		msp->ms_basepfn = mp->sbm_basepfn;
74725cf1a30Sjl139090 		msp->ms_pageslost = mp->sbm_pageslost;
74825cf1a30Sjl139090 		msp->ms_cage_enabled = kcage_on;
74925cf1a30Sjl139090 
75025cf1a30Sjl139090 		if (mp->sbm_flags & DR_MFLAG_RESERVED)
75125cf1a30Sjl139090 			p_mp = mp->sbm_peer;
75225cf1a30Sjl139090 		else
75325cf1a30Sjl139090 			p_mp = NULL;
75425cf1a30Sjl139090 
75525cf1a30Sjl139090 		if (p_mp == NULL) {
75625cf1a30Sjl139090 			msp->ms_peer_is_target = 0;
75725cf1a30Sjl139090 			msp->ms_peer_ap_id[0] = '\0';
75825cf1a30Sjl139090 		} else if (p_mp->sbm_flags & DR_MFLAG_RESERVED) {
75925cf1a30Sjl139090 			char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
76025cf1a30Sjl139090 			char *minor;
76125cf1a30Sjl139090 
76225cf1a30Sjl139090 			/*
76325cf1a30Sjl139090 			 * b_dip doesn't have to be held for ddi_pathname()
76425cf1a30Sjl139090 			 * because the board struct (dr_board_t) will be
76525cf1a30Sjl139090 			 * destroyed before b_dip detaches.
76625cf1a30Sjl139090 			 */
76725cf1a30Sjl139090 			(void) ddi_pathname(bp->b_dip, path);
76825cf1a30Sjl139090 			minor = strchr(p_mp->sbm_cm.sbdev_path, ':');
76925cf1a30Sjl139090 
77007d06da5SSurya Prakki 			(void) snprintf(msp->ms_peer_ap_id,
77125cf1a30Sjl139090 			    sizeof (msp->ms_peer_ap_id), "%s%s",
77225cf1a30Sjl139090 			    path, (minor == NULL) ? "" : minor);
77325cf1a30Sjl139090 
77425cf1a30Sjl139090 			kmem_free(path, MAXPATHLEN);
77525cf1a30Sjl139090 
77625cf1a30Sjl139090 			if (p_mp->sbm_flags & DR_MFLAG_TARGET)
77725cf1a30Sjl139090 				msp->ms_peer_is_target = 1;
77825cf1a30Sjl139090 		}
77925cf1a30Sjl139090 
78025cf1a30Sjl139090 		if (mp->sbm_flags & DR_MFLAG_RELOWNER)
78125cf1a30Sjl139090 			rv = kphysm_del_status(mp->sbm_memhandle, &mdst);
78225cf1a30Sjl139090 		else
78325cf1a30Sjl139090 			rv = KPHYSM_EHANDLE;	/* force 'if' to fail */
78425cf1a30Sjl139090 
78525cf1a30Sjl139090 		if (rv == KPHYSM_OK) {
78625cf1a30Sjl139090 			/*
78725cf1a30Sjl139090 			 * Any pages above managed is "free",
78825cf1a30Sjl139090 			 * i.e. it's collected.
78925cf1a30Sjl139090 			 */
79025cf1a30Sjl139090 			msp->ms_detpages += (uint_t)(mdst.collected +
79125cf1a30Sjl139090 			    mdst.phys_pages - mdst.managed);
79225cf1a30Sjl139090 		} else {
79325cf1a30Sjl139090 			/*
79425cf1a30Sjl139090 			 * If we're UNREFERENCED or UNCONFIGURED,
79525cf1a30Sjl139090 			 * then the number of detached pages is
79625cf1a30Sjl139090 			 * however many pages are on the board.
79725cf1a30Sjl139090 			 * I.e. detached = not in use by OS.
79825cf1a30Sjl139090 			 */
79925cf1a30Sjl139090 			switch (msp->ms_cm.c_ostate) {
80025cf1a30Sjl139090 			/*
80125cf1a30Sjl139090 			 * changed to use cfgadm states
80225cf1a30Sjl139090 			 *
80325cf1a30Sjl139090 			 * was:
80425cf1a30Sjl139090 			 *	case DR_STATE_UNREFERENCED:
80525cf1a30Sjl139090 			 *	case DR_STATE_UNCONFIGURED:
80625cf1a30Sjl139090 			 */
80725cf1a30Sjl139090 			case SBD_STAT_UNCONFIGURED:
80825cf1a30Sjl139090 				msp->ms_detpages = msp->ms_totpages;
80925cf1a30Sjl139090 				break;
81025cf1a30Sjl139090 
81125cf1a30Sjl139090 			default:
81225cf1a30Sjl139090 				break;
81325cf1a30Sjl139090 			}
81425cf1a30Sjl139090 		}
81525cf1a30Sjl139090 
81625cf1a30Sjl139090 		/*
81725cf1a30Sjl139090 		 * kphysm_del_span_query can report non-reloc pages = total
81825cf1a30Sjl139090 		 * pages for memory that is not yet configured
81925cf1a30Sjl139090 		 */
82025cf1a30Sjl139090 		if (mp->sbm_cm.sbdev_state != DR_STATE_UNCONFIGURED) {
82125cf1a30Sjl139090 			struct memlist *ml;
82225cf1a30Sjl139090 
82325cf1a30Sjl139090 			ml = dr_get_memlist(mp);
82425cf1a30Sjl139090 			rv = ml ? dr_del_mlist_query(ml, &mq) : -1;
82525cf1a30Sjl139090 			memlist_delete(ml);
82625cf1a30Sjl139090 
82725cf1a30Sjl139090 			if (rv == KPHYSM_OK) {
82825cf1a30Sjl139090 				msp->ms_managed_pages = mq.managed;
82925cf1a30Sjl139090 				msp->ms_noreloc_pages = mq.nonrelocatable;
83025cf1a30Sjl139090 				msp->ms_noreloc_first =
83125cf1a30Sjl139090 				    mq.first_nonrelocatable;
83225cf1a30Sjl139090 				msp->ms_noreloc_last =
83325cf1a30Sjl139090 				    mq.last_nonrelocatable;
83425cf1a30Sjl139090 				msp->ms_cm.c_sflags = 0;
83525cf1a30Sjl139090 				if (mq.nonrelocatable) {
83625cf1a30Sjl139090 					SBD_SET_SUSPEND(SBD_CMD_UNCONFIGURE,
83725cf1a30Sjl139090 					    msp->ms_cm.c_sflags);
83825cf1a30Sjl139090 				}
83925cf1a30Sjl139090 			} else {
84025cf1a30Sjl139090 				PR_MEM("%s: kphysm_del_span_query() = %d\n",
84125cf1a30Sjl139090 				    f, rv);
84225cf1a30Sjl139090 			}
84325cf1a30Sjl139090 		}
84425cf1a30Sjl139090 
84525cf1a30Sjl139090 		/*
84625cf1a30Sjl139090 		 * Check source unit state during copy-rename
84725cf1a30Sjl139090 		 */
84825cf1a30Sjl139090 		if ((mp->sbm_flags & DR_MFLAG_SOURCE) &&
84925cf1a30Sjl139090 		    (mp->sbm_cm.sbdev_state == DR_STATE_UNREFERENCED ||
85025cf1a30Sjl139090 		    mp->sbm_cm.sbdev_state == DR_STATE_RELEASE))
85125cf1a30Sjl139090 			msp->ms_cm.c_ostate = SBD_STAT_CONFIGURED;
85225cf1a30Sjl139090 
85325cf1a30Sjl139090 		mix++;
85425cf1a30Sjl139090 		dsp++;
85525cf1a30Sjl139090 	}
85625cf1a30Sjl139090 
85725cf1a30Sjl139090 	return (mix);
85825cf1a30Sjl139090 }
85925cf1a30Sjl139090 
86025cf1a30Sjl139090 int
dr_pre_attach_mem(dr_handle_t * hp,dr_common_unit_t ** devlist,int devnum)86125cf1a30Sjl139090 dr_pre_attach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
86225cf1a30Sjl139090 {
86325cf1a30Sjl139090 	_NOTE(ARGUNUSED(hp))
86425cf1a30Sjl139090 
86525cf1a30Sjl139090 	int		err_flag = 0;
86625cf1a30Sjl139090 	int		d;
86725cf1a30Sjl139090 	sbd_error_t	*err;
86825cf1a30Sjl139090 	static fn_t	f = "dr_pre_attach_mem";
86925cf1a30Sjl139090 
87025cf1a30Sjl139090 	PR_MEM("%s...\n", f);
87125cf1a30Sjl139090 
87225cf1a30Sjl139090 	for (d = 0; d < devnum; d++) {
87325cf1a30Sjl139090 		dr_mem_unit_t	*mp = (dr_mem_unit_t *)devlist[d];
87425cf1a30Sjl139090 		dr_state_t	state;
87525cf1a30Sjl139090 
87625cf1a30Sjl139090 		cmn_err(CE_CONT, "OS configure %s", mp->sbm_cm.sbdev_path);
87725cf1a30Sjl139090 
87825cf1a30Sjl139090 		state = mp->sbm_cm.sbdev_state;
87925cf1a30Sjl139090 		switch (state) {
88025cf1a30Sjl139090 		case DR_STATE_UNCONFIGURED:
88125cf1a30Sjl139090 			PR_MEM("%s: recovering from UNCONFIG for %s\n",
88225cf1a30Sjl139090 			    f,
88325cf1a30Sjl139090 			    mp->sbm_cm.sbdev_path);
88425cf1a30Sjl139090 
88525cf1a30Sjl139090 			/* use memlist cached by dr_post_detach_mem_unit */
88625cf1a30Sjl139090 			ASSERT(mp->sbm_mlist != NULL);
88725cf1a30Sjl139090 			PR_MEM("%s: re-configuring cached memlist for %s:\n",
88825cf1a30Sjl139090 			    f, mp->sbm_cm.sbdev_path);
88925cf1a30Sjl139090 			PR_MEMLIST_DUMP(mp->sbm_mlist);
89025cf1a30Sjl139090 
89125cf1a30Sjl139090 			/* kphysm del handle should be have been freed */
89225cf1a30Sjl139090 			ASSERT((mp->sbm_flags & DR_MFLAG_RELOWNER) == 0);
89325cf1a30Sjl139090 
89425cf1a30Sjl139090 			/*FALLTHROUGH*/
89525cf1a30Sjl139090 
89625cf1a30Sjl139090 		case DR_STATE_CONNECTED:
89725cf1a30Sjl139090 			PR_MEM("%s: reprogramming mem hardware on %s\n",
89825cf1a30Sjl139090 			    f, mp->sbm_cm.sbdev_bp->b_path);
89925cf1a30Sjl139090 
90025cf1a30Sjl139090 			PR_MEM("%s: enabling %s\n",
90125cf1a30Sjl139090 			    f, mp->sbm_cm.sbdev_path);
90225cf1a30Sjl139090 
90325cf1a30Sjl139090 			err = drmach_mem_enable(mp->sbm_cm.sbdev_id);
90425cf1a30Sjl139090 			if (err) {
90525cf1a30Sjl139090 				DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
90625cf1a30Sjl139090 				err_flag = 1;
90725cf1a30Sjl139090 			}
90825cf1a30Sjl139090 			break;
90925cf1a30Sjl139090 
91025cf1a30Sjl139090 		default:
91125cf1a30Sjl139090 			dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_STATE);
91225cf1a30Sjl139090 			err_flag = 1;
91325cf1a30Sjl139090 			break;
91425cf1a30Sjl139090 		}
91525cf1a30Sjl139090 
91625cf1a30Sjl139090 		/* exit for loop if error encountered */
91725cf1a30Sjl139090 		if (err_flag)
91825cf1a30Sjl139090 			break;
91925cf1a30Sjl139090 	}
92025cf1a30Sjl139090 
92125cf1a30Sjl139090 	return (err_flag ? -1 : 0);
92225cf1a30Sjl139090 }
92325cf1a30Sjl139090 
92468ac2337Sjl139090 static void
dr_update_mc_memory()92568ac2337Sjl139090 dr_update_mc_memory()
92668ac2337Sjl139090 {
92768ac2337Sjl139090 	void		(*mc_update_mlist)(void);
92868ac2337Sjl139090 
92968ac2337Sjl139090 	/*
93068ac2337Sjl139090 	 * mc-opl is configured during drmach_mem_new but the memory
93168ac2337Sjl139090 	 * has not been added to phys_install at that time.
93268ac2337Sjl139090 	 * we must inform mc-opl to update the mlist after we
93368ac2337Sjl139090 	 * attach or detach a system board.
93468ac2337Sjl139090 	 */
93568ac2337Sjl139090 
93668ac2337Sjl139090 	mc_update_mlist = (void (*)(void))
93768ac2337Sjl139090 	    modgetsymvalue("opl_mc_update_mlist", 0);
93868ac2337Sjl139090 
93968ac2337Sjl139090 	if (mc_update_mlist != NULL) {
94068ac2337Sjl139090 		(*mc_update_mlist)();
94168ac2337Sjl139090 	}
94268ac2337Sjl139090 }
94368ac2337Sjl139090 
94425cf1a30Sjl139090 int
dr_post_attach_mem(dr_handle_t * hp,dr_common_unit_t ** devlist,int devnum)94525cf1a30Sjl139090 dr_post_attach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
94625cf1a30Sjl139090 {
94725cf1a30Sjl139090 	_NOTE(ARGUNUSED(hp))
94825cf1a30Sjl139090 
94925cf1a30Sjl139090 	int		d;
95025cf1a30Sjl139090 	static fn_t	f = "dr_post_attach_mem";
95125cf1a30Sjl139090 
95225cf1a30Sjl139090 	PR_MEM("%s...\n", f);
95325cf1a30Sjl139090 
95425cf1a30Sjl139090 	for (d = 0; d < devnum; d++) {
95525cf1a30Sjl139090 		dr_mem_unit_t	*mp = (dr_mem_unit_t *)devlist[d];
95625cf1a30Sjl139090 		struct memlist	*mlist, *ml;
95725cf1a30Sjl139090 
95825cf1a30Sjl139090 		mlist = dr_get_memlist(mp);
95925cf1a30Sjl139090 		if (mlist == NULL) {
96068ac2337Sjl139090 			/* OPL supports memoryless board */
96125cf1a30Sjl139090 			continue;
96225cf1a30Sjl139090 		}
96325cf1a30Sjl139090 
96425cf1a30Sjl139090 		/*
96525cf1a30Sjl139090 		 * Verify the memory really did successfully attach
96625cf1a30Sjl139090 		 * by checking for its existence in phys_install.
96725cf1a30Sjl139090 		 */
96825cf1a30Sjl139090 		memlist_read_lock();
96925cf1a30Sjl139090 		if (memlist_intersect(phys_install, mlist) == 0) {
97025cf1a30Sjl139090 			memlist_read_unlock();
97125cf1a30Sjl139090 
97225cf1a30Sjl139090 			DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
97325cf1a30Sjl139090 
97425cf1a30Sjl139090 			PR_MEM("%s: %s memlist not in phys_install",
97525cf1a30Sjl139090 			    f, mp->sbm_cm.sbdev_path);
97625cf1a30Sjl139090 
97725cf1a30Sjl139090 			memlist_delete(mlist);
97825cf1a30Sjl139090 			continue;
97925cf1a30Sjl139090 		}
98025cf1a30Sjl139090 		memlist_read_unlock();
98125cf1a30Sjl139090 
982*56f33205SJonathan Adams 		for (ml = mlist; ml != NULL; ml = ml->ml_next) {
98325cf1a30Sjl139090 			sbd_error_t *err;
98425cf1a30Sjl139090 
98525cf1a30Sjl139090 			err = drmach_mem_add_span(
98625cf1a30Sjl139090 			    mp->sbm_cm.sbdev_id,
987*56f33205SJonathan Adams 			    ml->ml_address,
988*56f33205SJonathan Adams 			    ml->ml_size);
98925cf1a30Sjl139090 			if (err)
99025cf1a30Sjl139090 				DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
99125cf1a30Sjl139090 		}
99225cf1a30Sjl139090 
99325cf1a30Sjl139090 		memlist_delete(mlist);
99425cf1a30Sjl139090 
99525cf1a30Sjl139090 		/*
99625cf1a30Sjl139090 		 * Destroy cached memlist, if any.
99725cf1a30Sjl139090 		 * There will be a cached memlist in sbm_mlist if
99825cf1a30Sjl139090 		 * this board is being configured directly after
99925cf1a30Sjl139090 		 * an unconfigure.
100025cf1a30Sjl139090 		 * To support this transition, dr_post_detach_mem
100125cf1a30Sjl139090 		 * left a copy of the last known memlist in sbm_mlist.
100225cf1a30Sjl139090 		 * This memlist could differ from any derived from
100325cf1a30Sjl139090 		 * hardware if while this memunit was last configured
100425cf1a30Sjl139090 		 * the system detected and deleted bad pages from
100525cf1a30Sjl139090 		 * phys_install.  The location of those bad pages
100625cf1a30Sjl139090 		 * will be reflected in the cached memlist.
100725cf1a30Sjl139090 		 */
100825cf1a30Sjl139090 		if (mp->sbm_mlist) {
100925cf1a30Sjl139090 			memlist_delete(mp->sbm_mlist);
101025cf1a30Sjl139090 			mp->sbm_mlist = NULL;
101125cf1a30Sjl139090 		}
101225cf1a30Sjl139090 	}
101325cf1a30Sjl139090 
101468ac2337Sjl139090 	dr_update_mc_memory();
101568ac2337Sjl139090 
101625cf1a30Sjl139090 	return (0);
101725cf1a30Sjl139090 }
101825cf1a30Sjl139090 
101925cf1a30Sjl139090 int
dr_pre_detach_mem(dr_handle_t * hp,dr_common_unit_t ** devlist,int devnum)102025cf1a30Sjl139090 dr_pre_detach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
102125cf1a30Sjl139090 {
102225cf1a30Sjl139090 	_NOTE(ARGUNUSED(hp))
102325cf1a30Sjl139090 
102425cf1a30Sjl139090 	int d;
102525cf1a30Sjl139090 
102625cf1a30Sjl139090 	for (d = 0; d < devnum; d++) {
102725cf1a30Sjl139090 		dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d];
102825cf1a30Sjl139090 
102925cf1a30Sjl139090 		cmn_err(CE_CONT, "OS unconfigure %s", mp->sbm_cm.sbdev_path);
103025cf1a30Sjl139090 	}
103125cf1a30Sjl139090 
103225cf1a30Sjl139090 	return (0);
103325cf1a30Sjl139090 }
103425cf1a30Sjl139090 
103525cf1a30Sjl139090 int
dr_post_detach_mem(dr_handle_t * hp,dr_common_unit_t ** devlist,int devnum)103625cf1a30Sjl139090 dr_post_detach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
103725cf1a30Sjl139090 {
103825cf1a30Sjl139090 	_NOTE(ARGUNUSED(hp))
103925cf1a30Sjl139090 
104025cf1a30Sjl139090 	int		d, rv;
104125cf1a30Sjl139090 	static fn_t	f = "dr_post_detach_mem";
104225cf1a30Sjl139090 
104325cf1a30Sjl139090 	PR_MEM("%s...\n", f);
104425cf1a30Sjl139090 
104525cf1a30Sjl139090 	rv = 0;
104625cf1a30Sjl139090 	for (d = 0; d < devnum; d++) {
104725cf1a30Sjl139090 		dr_mem_unit_t	*mp = (dr_mem_unit_t *)devlist[d];
104825cf1a30Sjl139090 
104925cf1a30Sjl139090 		ASSERT(mp->sbm_cm.sbdev_bp == hp->h_bd);
105025cf1a30Sjl139090 
105125cf1a30Sjl139090 		if (dr_post_detach_mem_unit(mp))
105225cf1a30Sjl139090 			rv = -1;
105325cf1a30Sjl139090 	}
105468ac2337Sjl139090 	dr_update_mc_memory();
105525cf1a30Sjl139090 
105625cf1a30Sjl139090 	return (rv);
105725cf1a30Sjl139090 }
105825cf1a30Sjl139090 
105925cf1a30Sjl139090 static void
dr_add_memory_spans(dr_mem_unit_t * mp,struct memlist * ml)106025cf1a30Sjl139090 dr_add_memory_spans(dr_mem_unit_t *mp, struct memlist *ml)
106125cf1a30Sjl139090 {
106225cf1a30Sjl139090 	static fn_t	f = "dr_add_memory_spans";
106325cf1a30Sjl139090 
106425cf1a30Sjl139090 	PR_MEM("%s...", f);
106525cf1a30Sjl139090 	PR_MEMLIST_DUMP(ml);
106625cf1a30Sjl139090 
106725cf1a30Sjl139090 #ifdef DEBUG
106825cf1a30Sjl139090 	memlist_read_lock();
106925cf1a30Sjl139090 	if (memlist_intersect(phys_install, ml)) {
107025cf1a30Sjl139090 		PR_MEM("%s:WARNING: memlist intersects with phys_install\n", f);
107125cf1a30Sjl139090 	}
107225cf1a30Sjl139090 	memlist_read_unlock();
107325cf1a30Sjl139090 #endif
107425cf1a30Sjl139090 
1075*56f33205SJonathan Adams 	for (; ml; ml = ml->ml_next) {
107625cf1a30Sjl139090 		pfn_t		 base;
107725cf1a30Sjl139090 		pgcnt_t		 npgs;
107825cf1a30Sjl139090 		int		 rv;
107925cf1a30Sjl139090 		sbd_error_t	*err;
108025cf1a30Sjl139090 
1081*56f33205SJonathan Adams 		base = _b64top(ml->ml_address);
1082*56f33205SJonathan Adams 		npgs = _b64top(ml->ml_size);
108325cf1a30Sjl139090 
108425cf1a30Sjl139090 		rv = kphysm_add_memory_dynamic(base, npgs);
108525cf1a30Sjl139090 
108625cf1a30Sjl139090 		err = drmach_mem_add_span(
108725cf1a30Sjl139090 		    mp->sbm_cm.sbdev_id,
1088*56f33205SJonathan Adams 		    ml->ml_address,
1089*56f33205SJonathan Adams 		    ml->ml_size);
109025cf1a30Sjl139090 
109125cf1a30Sjl139090 		if (err)
109225cf1a30Sjl139090 			DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
109325cf1a30Sjl139090 
109425cf1a30Sjl139090 		if (rv != KPHYSM_OK) {
109525cf1a30Sjl139090 			cmn_err(CE_WARN, "%s:"
109625cf1a30Sjl139090 			    " unexpected kphysm_add_memory_dynamic"
109725cf1a30Sjl139090 			    " return value %d;"
109825cf1a30Sjl139090 			    " basepfn=0x%lx, npages=%ld\n",
109925cf1a30Sjl139090 			    f, rv, base, npgs);
110025cf1a30Sjl139090 
110125cf1a30Sjl139090 			continue;
110225cf1a30Sjl139090 		}
110325cf1a30Sjl139090 	}
110425cf1a30Sjl139090 }
110525cf1a30Sjl139090 
110625cf1a30Sjl139090 static int
memlist_touch(struct memlist * ml,uint64_t add)110768ac2337Sjl139090 memlist_touch(struct memlist *ml, uint64_t add)
110868ac2337Sjl139090 {
110968ac2337Sjl139090 	while (ml != NULL) {
1110*56f33205SJonathan Adams 		if ((add == ml->ml_address) ||
1111*56f33205SJonathan Adams 		    (add == (ml->ml_address + ml->ml_size)))
111268ac2337Sjl139090 			return (1);
1113*56f33205SJonathan Adams 		ml = ml->ml_next;
111468ac2337Sjl139090 	}
111568ac2337Sjl139090 	return (0);
111668ac2337Sjl139090 }
111768ac2337Sjl139090 
111868ac2337Sjl139090 static sbd_error_t *
dr_process_excess_mlist(dr_mem_unit_t * s_mp,dr_mem_unit_t * t_mp,struct memlist * t_excess_mlist)111968ac2337Sjl139090 dr_process_excess_mlist(dr_mem_unit_t *s_mp,
112068ac2337Sjl139090 	dr_mem_unit_t *t_mp, struct memlist *t_excess_mlist)
112168ac2337Sjl139090 {
112268ac2337Sjl139090 	struct memlist	*ml;
112368ac2337Sjl139090 	sbd_error_t	*err;
112468ac2337Sjl139090 	static fn_t	f = "dr_process_excess_mlist";
112568ac2337Sjl139090 	uint64_t	new_pa, nbytes;
112668ac2337Sjl139090 	int rv;
112768ac2337Sjl139090 
112868ac2337Sjl139090 	err = NULL;
112968ac2337Sjl139090 
113068ac2337Sjl139090 	/*
113168ac2337Sjl139090 	 * After the small <-> big copy-rename,
113268ac2337Sjl139090 	 * the original address space for the
113368ac2337Sjl139090 	 * source board may have excess to be
113468ac2337Sjl139090 	 * deleted. This is a case different
113568ac2337Sjl139090 	 * from the big->small excess source
113668ac2337Sjl139090 	 * memory case listed below.
113768ac2337Sjl139090 	 * Remove s_mp->sbm_del_mlist from
113868ac2337Sjl139090 	 * the kernel cage glist.
113968ac2337Sjl139090 	 */
114068ac2337Sjl139090 	for (ml = s_mp->sbm_del_mlist; ml;
1141*56f33205SJonathan Adams 	    ml = ml->ml_next) {
114268ac2337Sjl139090 		PR_MEM("%s: delete small<->big copy-"
114368ac2337Sjl139090 		    "rename source excess memory", f);
114468ac2337Sjl139090 		PR_MEMLIST_DUMP(ml);
114568ac2337Sjl139090 
114668ac2337Sjl139090 		err = drmach_mem_del_span(
114768ac2337Sjl139090 		    s_mp->sbm_cm.sbdev_id,
1148*56f33205SJonathan Adams 		    ml->ml_address, ml->ml_size);
114968ac2337Sjl139090 		if (err)
115068ac2337Sjl139090 			DRERR_SET_C(&s_mp->
115168ac2337Sjl139090 			    sbm_cm.sbdev_error, &err);
115268ac2337Sjl139090 		ASSERT(err == NULL);
115368ac2337Sjl139090 	}
115468ac2337Sjl139090 
115568ac2337Sjl139090 	PR_MEM("%s: adding back remaining portion"
115668ac2337Sjl139090 	    " of %s, memlist:\n",
115768ac2337Sjl139090 	    f, t_mp->sbm_cm.sbdev_path);
115868ac2337Sjl139090 	PR_MEMLIST_DUMP(t_excess_mlist);
115968ac2337Sjl139090 
1160*56f33205SJonathan Adams 	for (ml = t_excess_mlist; ml; ml = ml->ml_next) {
116168ac2337Sjl139090 		struct memlist ml0;
116268ac2337Sjl139090 
1163*56f33205SJonathan Adams 		ml0.ml_address = ml->ml_address;
1164*56f33205SJonathan Adams 		ml0.ml_size = ml->ml_size;
1165*56f33205SJonathan Adams 		ml0.ml_next = ml0.ml_prev = NULL;
116668ac2337Sjl139090 
116768ac2337Sjl139090 		/*
116868ac2337Sjl139090 		 * If the memory object is 256 MB aligned (max page size
116968ac2337Sjl139090 		 * on OPL, it will not be coalesced to the adjacent memory
117068ac2337Sjl139090 		 * chunks.  The coalesce logic assumes contiguous page
117168ac2337Sjl139090 		 * structures for contiguous memory and we hit panic.
117268ac2337Sjl139090 		 * For anything less than 256 MB alignment, we have
117368ac2337Sjl139090 		 * to make sure that it is not adjacent to anything.
117468ac2337Sjl139090 		 * If the new chunk is adjacent to phys_install, we
117568ac2337Sjl139090 		 * truncate it to 4MB boundary.  4 MB is somewhat
117668ac2337Sjl139090 		 * arbitrary.  However we do not want to create
117768ac2337Sjl139090 		 * very small segments because they can cause problem.
117868ac2337Sjl139090 		 * The extreme case of 8K segment will fail
117968ac2337Sjl139090 		 * kphysm_add_memory_dynamic(), e.g.
118068ac2337Sjl139090 		 */
1181*56f33205SJonathan Adams 		if ((ml->ml_address & (MH_MPSS_ALIGNMENT - 1)) ||
1182*56f33205SJonathan Adams 		    (ml->ml_size & (MH_MPSS_ALIGNMENT - 1))) {
118368ac2337Sjl139090 
118468ac2337Sjl139090 		memlist_read_lock();
1185*56f33205SJonathan Adams 		rv = memlist_touch(phys_install, ml0.ml_address);
118668ac2337Sjl139090 		memlist_read_unlock();
118768ac2337Sjl139090 
118868ac2337Sjl139090 		if (rv) {
1189*56f33205SJonathan Adams 			new_pa = roundup(ml0.ml_address + 1, MH_MIN_ALIGNMENT);
1190*56f33205SJonathan Adams 			nbytes = (new_pa -  ml0.ml_address);
1191*56f33205SJonathan Adams 			if (nbytes >= ml0.ml_size) {
119268ac2337Sjl139090 				t_mp->sbm_dyn_segs =
119368ac2337Sjl139090 				    memlist_del_span(t_mp->sbm_dyn_segs,
1194*56f33205SJonathan Adams 				    ml0.ml_address, ml0.ml_size);
119568ac2337Sjl139090 				continue;
119668ac2337Sjl139090 			}
119768ac2337Sjl139090 			t_mp->sbm_dyn_segs =
119868ac2337Sjl139090 			    memlist_del_span(t_mp->sbm_dyn_segs,
1199*56f33205SJonathan Adams 			    ml0.ml_address, nbytes);
1200*56f33205SJonathan Adams 			ml0.ml_size -= nbytes;
1201*56f33205SJonathan Adams 			ml0.ml_address = new_pa;
120268ac2337Sjl139090 		}
120368ac2337Sjl139090 
1204*56f33205SJonathan Adams 		if (ml0.ml_size == 0) {
120568ac2337Sjl139090 			continue;
120668ac2337Sjl139090 		}
120768ac2337Sjl139090 
120868ac2337Sjl139090 		memlist_read_lock();
1209*56f33205SJonathan Adams 		rv = memlist_touch(phys_install, ml0.ml_address + ml0.ml_size);
121068ac2337Sjl139090 		memlist_read_unlock();
121168ac2337Sjl139090 
121268ac2337Sjl139090 		if (rv) {
1213*56f33205SJonathan Adams 			new_pa = rounddown(ml0.ml_address + ml0.ml_size - 1,
121468ac2337Sjl139090 			    MH_MIN_ALIGNMENT);
1215*56f33205SJonathan Adams 			nbytes = (ml0.ml_address + ml0.ml_size - new_pa);
1216*56f33205SJonathan Adams 			if (nbytes >= ml0.ml_size) {
121768ac2337Sjl139090 				t_mp->sbm_dyn_segs =
121868ac2337Sjl139090 				    memlist_del_span(t_mp->sbm_dyn_segs,
1219*56f33205SJonathan Adams 				    ml0.ml_address, ml0.ml_size);
122068ac2337Sjl139090 				continue;
122168ac2337Sjl139090 			}
122268ac2337Sjl139090 			t_mp->sbm_dyn_segs =
122368ac2337Sjl139090 			    memlist_del_span(t_mp->sbm_dyn_segs,
122468ac2337Sjl139090 			    new_pa, nbytes);
1225*56f33205SJonathan Adams 			ml0.ml_size -= nbytes;
122668ac2337Sjl139090 		}
122768ac2337Sjl139090 
1228*56f33205SJonathan Adams 		if (ml0.ml_size > 0) {
122968ac2337Sjl139090 			dr_add_memory_spans(s_mp, &ml0);
123068ac2337Sjl139090 		}
1231*56f33205SJonathan Adams 		} else if (ml0.ml_size > 0) {
123268ac2337Sjl139090 			dr_add_memory_spans(s_mp, &ml0);
123368ac2337Sjl139090 		}
123468ac2337Sjl139090 	}
123568ac2337Sjl139090 	memlist_delete(t_excess_mlist);
123668ac2337Sjl139090 	return (err);
123768ac2337Sjl139090 }
123868ac2337Sjl139090 
123968ac2337Sjl139090 static int
dr_post_detach_mem_unit(dr_mem_unit_t * s_mp)124025cf1a30Sjl139090 dr_post_detach_mem_unit(dr_mem_unit_t *s_mp)
124125cf1a30Sjl139090 {
124225cf1a30Sjl139090 	uint64_t	sz = s_mp->sbm_slice_size;
124325cf1a30Sjl139090 	uint64_t	sm = sz - 1;
124425cf1a30Sjl139090 	/* old and new below refer to PAs before and after copy-rename */
124525cf1a30Sjl139090 	uint64_t	s_old_basepa, s_new_basepa;
124625cf1a30Sjl139090 	uint64_t	t_old_basepa, t_new_basepa;
124725cf1a30Sjl139090 	dr_mem_unit_t	*t_mp, *x_mp;
124825cf1a30Sjl139090 	drmach_mem_info_t	minfo;
124925cf1a30Sjl139090 	struct memlist	*ml;
125025cf1a30Sjl139090 	struct memlist	*t_excess_mlist;
125125cf1a30Sjl139090 	int		rv;
125225cf1a30Sjl139090 	int		s_excess_mem_deleted = 0;
125325cf1a30Sjl139090 	sbd_error_t	*err;
125425cf1a30Sjl139090 	static fn_t	f = "dr_post_detach_mem_unit";
125525cf1a30Sjl139090 
125625cf1a30Sjl139090 	PR_MEM("%s...\n", f);
125725cf1a30Sjl139090 
125825cf1a30Sjl139090 	/* s_mp->sbm_del_mlist could be NULL, meaning no deleted spans */
125925cf1a30Sjl139090 	PR_MEM("%s: %s: deleted memlist (EMPTY maybe okay):\n",
126025cf1a30Sjl139090 	    f, s_mp->sbm_cm.sbdev_path);
126125cf1a30Sjl139090 	PR_MEMLIST_DUMP(s_mp->sbm_del_mlist);
126225cf1a30Sjl139090 
126325cf1a30Sjl139090 	/* sanity check */
126425cf1a30Sjl139090 	ASSERT(s_mp->sbm_del_mlist == NULL ||
126525cf1a30Sjl139090 	    (s_mp->sbm_flags & DR_MFLAG_RELDONE) != 0);
126625cf1a30Sjl139090 
126725cf1a30Sjl139090 	if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
126825cf1a30Sjl139090 		t_mp = s_mp->sbm_peer;
126925cf1a30Sjl139090 		ASSERT(t_mp != NULL);
127025cf1a30Sjl139090 		ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET);
127125cf1a30Sjl139090 		ASSERT(t_mp->sbm_peer == s_mp);
127225cf1a30Sjl139090 
127325cf1a30Sjl139090 		ASSERT(t_mp->sbm_flags & DR_MFLAG_RELDONE);
127425cf1a30Sjl139090 		ASSERT(t_mp->sbm_del_mlist);
127525cf1a30Sjl139090 
127625cf1a30Sjl139090 		PR_MEM("%s: target %s: deleted memlist:\n",
127725cf1a30Sjl139090 		    f, t_mp->sbm_cm.sbdev_path);
127825cf1a30Sjl139090 		PR_MEMLIST_DUMP(t_mp->sbm_del_mlist);
127925cf1a30Sjl139090 	} else {
128025cf1a30Sjl139090 		/* this is no target unit */
128125cf1a30Sjl139090 		t_mp = NULL;
128225cf1a30Sjl139090 	}
128325cf1a30Sjl139090 
128425cf1a30Sjl139090 	/*
128525cf1a30Sjl139090 	 * Verify the memory really did successfully detach
128625cf1a30Sjl139090 	 * by checking for its non-existence in phys_install.
128725cf1a30Sjl139090 	 */
128825cf1a30Sjl139090 	rv = 0;
128925cf1a30Sjl139090 	memlist_read_lock();
129025cf1a30Sjl139090 	if (s_mp->sbm_flags & DR_MFLAG_RELDONE) {
129125cf1a30Sjl139090 		x_mp = s_mp;
129225cf1a30Sjl139090 		rv = memlist_intersect(phys_install, x_mp->sbm_del_mlist);
129325cf1a30Sjl139090 	}
129425cf1a30Sjl139090 	if (rv == 0 && t_mp && (t_mp->sbm_flags & DR_MFLAG_RELDONE)) {
129525cf1a30Sjl139090 		x_mp = t_mp;
129625cf1a30Sjl139090 		rv = memlist_intersect(phys_install, x_mp->sbm_del_mlist);
129725cf1a30Sjl139090 	}
129825cf1a30Sjl139090 	memlist_read_unlock();
129925cf1a30Sjl139090 
130025cf1a30Sjl139090 	if (rv) {
130125cf1a30Sjl139090 		/* error: memlist still in phys_install */
130225cf1a30Sjl139090 		DR_DEV_INTERNAL_ERROR(&x_mp->sbm_cm);
130325cf1a30Sjl139090 	}
130425cf1a30Sjl139090 
130525cf1a30Sjl139090 	/*
130625cf1a30Sjl139090 	 * clean mem unit state and bail out if an error has been recorded.
130725cf1a30Sjl139090 	 */
130825cf1a30Sjl139090 	rv = 0;
130925cf1a30Sjl139090 	if (s_mp->sbm_cm.sbdev_error) {
131025cf1a30Sjl139090 		PR_MEM("%s: %s flags=%x", f,
131125cf1a30Sjl139090 		    s_mp->sbm_cm.sbdev_path, s_mp->sbm_flags);
131225cf1a30Sjl139090 		DR_DEV_CLR_UNREFERENCED(&s_mp->sbm_cm);
131325cf1a30Sjl139090 		DR_DEV_CLR_RELEASED(&s_mp->sbm_cm);
131425cf1a30Sjl139090 		dr_device_transition(&s_mp->sbm_cm, DR_STATE_CONFIGURED);
131525cf1a30Sjl139090 		rv = -1;
131625cf1a30Sjl139090 	}
131725cf1a30Sjl139090 	if (t_mp != NULL && t_mp->sbm_cm.sbdev_error != NULL) {
131825cf1a30Sjl139090 		PR_MEM("%s: %s flags=%x", f,
131925cf1a30Sjl139090 		    s_mp->sbm_cm.sbdev_path, s_mp->sbm_flags);
132025cf1a30Sjl139090 		DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm);
132125cf1a30Sjl139090 		DR_DEV_CLR_RELEASED(&t_mp->sbm_cm);
132225cf1a30Sjl139090 		dr_device_transition(&t_mp->sbm_cm, DR_STATE_CONFIGURED);
132325cf1a30Sjl139090 		rv = -1;
132425cf1a30Sjl139090 	}
132525cf1a30Sjl139090 	if (rv)
132625cf1a30Sjl139090 		goto cleanup;
132725cf1a30Sjl139090 
132825cf1a30Sjl139090 	s_old_basepa = _ptob64(s_mp->sbm_basepfn);
132925cf1a30Sjl139090 	err = drmach_mem_get_info(s_mp->sbm_cm.sbdev_id, &minfo);
133025cf1a30Sjl139090 	ASSERT(err == NULL);
133125cf1a30Sjl139090 	s_new_basepa = minfo.mi_basepa;
133225cf1a30Sjl139090 
133325cf1a30Sjl139090 	PR_MEM("%s:s_old_basepa: 0x%lx\n", f, s_old_basepa);
133425cf1a30Sjl139090 	PR_MEM("%s:s_new_basepa: 0x%lx\n", f, s_new_basepa);
133525cf1a30Sjl139090 
133625cf1a30Sjl139090 	if (t_mp != NULL) {
133725cf1a30Sjl139090 		struct memlist *s_copy_mlist;
133825cf1a30Sjl139090 
133925cf1a30Sjl139090 		t_old_basepa = _ptob64(t_mp->sbm_basepfn);
134025cf1a30Sjl139090 		err = drmach_mem_get_info(t_mp->sbm_cm.sbdev_id, &minfo);
134125cf1a30Sjl139090 		ASSERT(err == NULL);
134225cf1a30Sjl139090 		t_new_basepa = minfo.mi_basepa;
134325cf1a30Sjl139090 
134425cf1a30Sjl139090 		PR_MEM("%s:t_old_basepa: 0x%lx\n", f, t_old_basepa);
134525cf1a30Sjl139090 		PR_MEM("%s:t_new_basepa: 0x%lx\n", f, t_new_basepa);
134625cf1a30Sjl139090 
134725cf1a30Sjl139090 		/*
134825cf1a30Sjl139090 		 * Construct copy list with original source addresses.
134925cf1a30Sjl139090 		 * Used to add back excess target mem.
135025cf1a30Sjl139090 		 */
135125cf1a30Sjl139090 		s_copy_mlist = memlist_dup(s_mp->sbm_mlist);
1352*56f33205SJonathan Adams 		for (ml = s_mp->sbm_del_mlist; ml; ml = ml->ml_next) {
135325cf1a30Sjl139090 			s_copy_mlist = memlist_del_span(s_copy_mlist,
1354*56f33205SJonathan Adams 			    ml->ml_address, ml->ml_size);
135525cf1a30Sjl139090 		}
135625cf1a30Sjl139090 
135725cf1a30Sjl139090 		PR_MEM("%s: source copy list:\n:", f);
135825cf1a30Sjl139090 		PR_MEMLIST_DUMP(s_copy_mlist);
135925cf1a30Sjl139090 
136025cf1a30Sjl139090 		/*
136125cf1a30Sjl139090 		 * We had to swap mem-units, so update
136225cf1a30Sjl139090 		 * memlists accordingly with new base
136325cf1a30Sjl139090 		 * addresses.
136425cf1a30Sjl139090 		 */
1365*56f33205SJonathan Adams 		for (ml = t_mp->sbm_mlist; ml; ml = ml->ml_next) {
1366*56f33205SJonathan Adams 			ml->ml_address -= t_old_basepa;
1367*56f33205SJonathan Adams 			ml->ml_address += t_new_basepa;
136825cf1a30Sjl139090 		}
136925cf1a30Sjl139090 
137025cf1a30Sjl139090 		/*
137125cf1a30Sjl139090 		 * There is no need to explicitly rename the target delete
137225cf1a30Sjl139090 		 * memlist, because sbm_del_mlist and sbm_mlist always
137325cf1a30Sjl139090 		 * point to the same memlist for a copy/rename operation.
137425cf1a30Sjl139090 		 */
137525cf1a30Sjl139090 		ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
137625cf1a30Sjl139090 
137725cf1a30Sjl139090 		PR_MEM("%s: renamed target memlist and delete memlist:\n", f);
137825cf1a30Sjl139090 		PR_MEMLIST_DUMP(t_mp->sbm_mlist);
137925cf1a30Sjl139090 
1380*56f33205SJonathan Adams 		for (ml = s_mp->sbm_mlist; ml; ml = ml->ml_next) {
1381*56f33205SJonathan Adams 			ml->ml_address -= s_old_basepa;
1382*56f33205SJonathan Adams 			ml->ml_address += s_new_basepa;
138325cf1a30Sjl139090 		}
138425cf1a30Sjl139090 
138525cf1a30Sjl139090 		PR_MEM("%s: renamed source memlist:\n", f);
138625cf1a30Sjl139090 		PR_MEMLIST_DUMP(s_mp->sbm_mlist);
138768ac2337Sjl139090 		PR_MEM("%s: source dyn seg memlist:\n", f);
138868ac2337Sjl139090 		PR_MEMLIST_DUMP(s_mp->sbm_dyn_segs);
138925cf1a30Sjl139090 
139025cf1a30Sjl139090 		/*
139125cf1a30Sjl139090 		 * Keep track of dynamically added segments
139225cf1a30Sjl139090 		 * since they cannot be split if we need to delete
139325cf1a30Sjl139090 		 * excess source memory later for this board.
139425cf1a30Sjl139090 		 */
139525cf1a30Sjl139090 		if (t_mp->sbm_dyn_segs)
139625cf1a30Sjl139090 			memlist_delete(t_mp->sbm_dyn_segs);
139725cf1a30Sjl139090 		t_mp->sbm_dyn_segs = s_mp->sbm_dyn_segs;
139825cf1a30Sjl139090 		s_mp->sbm_dyn_segs = NULL;
139925cf1a30Sjl139090 
140025cf1a30Sjl139090 		/*
140125cf1a30Sjl139090 		 * Add back excess target memory.
140225cf1a30Sjl139090 		 * Subtract out the portion of the target memory
140325cf1a30Sjl139090 		 * node that was taken over by the source memory
140425cf1a30Sjl139090 		 * node.
140525cf1a30Sjl139090 		 */
140625cf1a30Sjl139090 		t_excess_mlist = memlist_dup(t_mp->sbm_mlist);
1407*56f33205SJonathan Adams 		for (ml = s_copy_mlist; ml; ml = ml->ml_next) {
140825cf1a30Sjl139090 			t_excess_mlist =
140925cf1a30Sjl139090 			    memlist_del_span(t_excess_mlist,
1410*56f33205SJonathan Adams 			    ml->ml_address, ml->ml_size);
141125cf1a30Sjl139090 		}
141268ac2337Sjl139090 		PR_MEM("%s: excess memlist:\n", f);
141368ac2337Sjl139090 		PR_MEMLIST_DUMP(t_excess_mlist);
141425cf1a30Sjl139090 
141525cf1a30Sjl139090 		/*
141625cf1a30Sjl139090 		 * Update dynamically added segs
141725cf1a30Sjl139090 		 */
1418*56f33205SJonathan Adams 		for (ml = s_mp->sbm_del_mlist; ml; ml = ml->ml_next) {
141925cf1a30Sjl139090 			t_mp->sbm_dyn_segs =
142025cf1a30Sjl139090 			    memlist_del_span(t_mp->sbm_dyn_segs,
1421*56f33205SJonathan Adams 			    ml->ml_address, ml->ml_size);
142225cf1a30Sjl139090 		}
1423*56f33205SJonathan Adams 		for (ml = t_excess_mlist; ml; ml = ml->ml_next) {
142425cf1a30Sjl139090 			t_mp->sbm_dyn_segs =
142525cf1a30Sjl139090 			    memlist_cat_span(t_mp->sbm_dyn_segs,
1426*56f33205SJonathan Adams 			    ml->ml_address, ml->ml_size);
142725cf1a30Sjl139090 		}
142825cf1a30Sjl139090 		PR_MEM("%s: %s: updated dynamic seg list:\n",
142925cf1a30Sjl139090 		    f, t_mp->sbm_cm.sbdev_path);
143025cf1a30Sjl139090 		PR_MEMLIST_DUMP(t_mp->sbm_dyn_segs);
143125cf1a30Sjl139090 
143225cf1a30Sjl139090 		if (t_excess_mlist != NULL) {
143368ac2337Sjl139090 			err = dr_process_excess_mlist(s_mp, t_mp,
143468ac2337Sjl139090 			    t_excess_mlist);
143525cf1a30Sjl139090 			s_excess_mem_deleted = 1;
143625cf1a30Sjl139090 		}
143768ac2337Sjl139090 
143825cf1a30Sjl139090 		memlist_delete(s_copy_mlist);
143925cf1a30Sjl139090 
144025cf1a30Sjl139090 #ifdef DEBUG
144125cf1a30Sjl139090 		/*
144225cf1a30Sjl139090 		 * s_mp->sbm_del_mlist may still needed
144325cf1a30Sjl139090 		 */
144425cf1a30Sjl139090 		PR_MEM("%s: source delete memeory flag %d",
144525cf1a30Sjl139090 		    f, s_excess_mem_deleted);
144625cf1a30Sjl139090 		PR_MEM("%s: source delete memlist", f);
144725cf1a30Sjl139090 		PR_MEMLIST_DUMP(s_mp->sbm_del_mlist);
144825cf1a30Sjl139090 #endif
144925cf1a30Sjl139090 
145025cf1a30Sjl139090 	}
145125cf1a30Sjl139090 
145225cf1a30Sjl139090 	if (t_mp != NULL) {
145325cf1a30Sjl139090 		/* delete target's entire address space */
145425cf1a30Sjl139090 		err = drmach_mem_del_span(
145525cf1a30Sjl139090 		    t_mp->sbm_cm.sbdev_id, t_old_basepa & ~ sm, sz);
145625cf1a30Sjl139090 		if (err)
145725cf1a30Sjl139090 			DRERR_SET_C(&t_mp->sbm_cm.sbdev_error, &err);
145825cf1a30Sjl139090 		ASSERT(err == NULL);
145925cf1a30Sjl139090 
146025cf1a30Sjl139090 		/*
146125cf1a30Sjl139090 		 * After the copy/rename, the original address space
146225cf1a30Sjl139090 		 * for the source board (which is now located on the
146325cf1a30Sjl139090 		 * target board) may now have some excess to be deleted.
146425cf1a30Sjl139090 		 * Those excess memory on the source board are kept in
146525cf1a30Sjl139090 		 * source board's sbm_del_mlist
146625cf1a30Sjl139090 		 */
146725cf1a30Sjl139090 		for (ml = s_mp->sbm_del_mlist; !s_excess_mem_deleted && ml;
1468*56f33205SJonathan Adams 		    ml = ml->ml_next) {
146925cf1a30Sjl139090 			PR_MEM("%s: delete source excess memory", f);
147025cf1a30Sjl139090 			PR_MEMLIST_DUMP(ml);
147125cf1a30Sjl139090 
147225cf1a30Sjl139090 			err = drmach_mem_del_span(s_mp->sbm_cm.sbdev_id,
1473*56f33205SJonathan Adams 			    ml->ml_address, ml->ml_size);
147425cf1a30Sjl139090 			if (err)
147525cf1a30Sjl139090 				DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
147625cf1a30Sjl139090 			ASSERT(err == NULL);
147725cf1a30Sjl139090 		}
147825cf1a30Sjl139090 
147925cf1a30Sjl139090 	} else {
148025cf1a30Sjl139090 		/* delete board's entire address space */
148125cf1a30Sjl139090 		err = drmach_mem_del_span(s_mp->sbm_cm.sbdev_id,
148225cf1a30Sjl139090 		    s_old_basepa & ~ sm, sz);
148325cf1a30Sjl139090 		if (err)
148425cf1a30Sjl139090 			DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
148525cf1a30Sjl139090 		ASSERT(err == NULL);
148625cf1a30Sjl139090 	}
148725cf1a30Sjl139090 
148825cf1a30Sjl139090 cleanup:
148925cf1a30Sjl139090 	/* clean up target mem unit */
149025cf1a30Sjl139090 	if (t_mp != NULL) {
149125cf1a30Sjl139090 		memlist_delete(t_mp->sbm_del_mlist);
149225cf1a30Sjl139090 		/* no need to delete sbm_mlist, it shares sbm_del_mlist */
149325cf1a30Sjl139090 
149425cf1a30Sjl139090 		t_mp->sbm_del_mlist = NULL;
149525cf1a30Sjl139090 		t_mp->sbm_mlist = NULL;
149625cf1a30Sjl139090 		t_mp->sbm_peer = NULL;
149725cf1a30Sjl139090 		t_mp->sbm_flags = 0;
149825cf1a30Sjl139090 		t_mp->sbm_cm.sbdev_busy = 0;
149925cf1a30Sjl139090 		dr_init_mem_unit_data(t_mp);
150025cf1a30Sjl139090 
150125cf1a30Sjl139090 	}
150225cf1a30Sjl139090 	if (t_mp != NULL && t_mp->sbm_cm.sbdev_error == NULL) {
150325cf1a30Sjl139090 		/*
150425cf1a30Sjl139090 		 * now that copy/rename has completed, undo this
150525cf1a30Sjl139090 		 * work that was done in dr_release_mem_done.
150625cf1a30Sjl139090 		 */
150725cf1a30Sjl139090 		DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm);
150825cf1a30Sjl139090 		DR_DEV_CLR_RELEASED(&t_mp->sbm_cm);
150925cf1a30Sjl139090 		dr_device_transition(&t_mp->sbm_cm, DR_STATE_CONFIGURED);
151025cf1a30Sjl139090 	}
151125cf1a30Sjl139090 
151225cf1a30Sjl139090 	/*
151325cf1a30Sjl139090 	 * clean up (source) board's mem unit structure.
151425cf1a30Sjl139090 	 * NOTE: sbm_mlist is retained if no error has been record (in other
151525cf1a30Sjl139090 	 * words, when s_mp->sbm_cm.sbdev_error is NULL). This memlist is
151625cf1a30Sjl139090 	 * referred to elsewhere as the cached memlist.  The cached memlist
151725cf1a30Sjl139090 	 * is used to re-attach (configure back in) this memunit from the
151825cf1a30Sjl139090 	 * unconfigured state.  The memlist is retained because it may
151925cf1a30Sjl139090 	 * represent bad pages that were detected while the memory was
152025cf1a30Sjl139090 	 * configured into the OS.  The OS deletes bad pages from phys_install.
152125cf1a30Sjl139090 	 * Those deletes, if any, will be represented in the cached mlist.
152225cf1a30Sjl139090 	 */
152325cf1a30Sjl139090 	if (s_mp->sbm_del_mlist && s_mp->sbm_del_mlist != s_mp->sbm_mlist)
152425cf1a30Sjl139090 		memlist_delete(s_mp->sbm_del_mlist);
152525cf1a30Sjl139090 
152625cf1a30Sjl139090 	if (s_mp->sbm_cm.sbdev_error && s_mp->sbm_mlist) {
152725cf1a30Sjl139090 		memlist_delete(s_mp->sbm_mlist);
152825cf1a30Sjl139090 		s_mp->sbm_mlist = NULL;
152925cf1a30Sjl139090 	}
153025cf1a30Sjl139090 
153125cf1a30Sjl139090 	if (s_mp->sbm_dyn_segs != NULL && s_mp->sbm_cm.sbdev_error == 0) {
153225cf1a30Sjl139090 		memlist_delete(s_mp->sbm_dyn_segs);
153325cf1a30Sjl139090 		s_mp->sbm_dyn_segs = NULL;
153425cf1a30Sjl139090 	}
153525cf1a30Sjl139090 
153625cf1a30Sjl139090 	s_mp->sbm_del_mlist = NULL;
153725cf1a30Sjl139090 	s_mp->sbm_peer = NULL;
153825cf1a30Sjl139090 	s_mp->sbm_flags = 0;
153925cf1a30Sjl139090 	s_mp->sbm_cm.sbdev_busy = 0;
154025cf1a30Sjl139090 	dr_init_mem_unit_data(s_mp);
154125cf1a30Sjl139090 
154225cf1a30Sjl139090 	PR_MEM("%s: cached memlist for %s:", f, s_mp->sbm_cm.sbdev_path);
154325cf1a30Sjl139090 	PR_MEMLIST_DUMP(s_mp->sbm_mlist);
154425cf1a30Sjl139090 
154525cf1a30Sjl139090 	return (0);
154625cf1a30Sjl139090 }
154725cf1a30Sjl139090 
154825cf1a30Sjl139090 /*
154925cf1a30Sjl139090  * Successful return from this function will have the memory
155025cf1a30Sjl139090  * handle in bp->b_dev[..mem-unit...].sbm_memhandle allocated
155125cf1a30Sjl139090  * and waiting.  This routine's job is to select the memory that
155225cf1a30Sjl139090  * actually has to be released (detached) which may not necessarily
155325cf1a30Sjl139090  * be the same memory node that came in in devlist[],
155425cf1a30Sjl139090  * i.e. a copy-rename is needed.
155525cf1a30Sjl139090  */
155625cf1a30Sjl139090 int
dr_pre_release_mem(dr_handle_t * hp,dr_common_unit_t ** devlist,int devnum)155725cf1a30Sjl139090 dr_pre_release_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
155825cf1a30Sjl139090 {
155925cf1a30Sjl139090 	int		d;
156025cf1a30Sjl139090 	int		err_flag = 0;
156125cf1a30Sjl139090 	static fn_t	f = "dr_pre_release_mem";
156225cf1a30Sjl139090 
156325cf1a30Sjl139090 	PR_MEM("%s...\n", f);
156425cf1a30Sjl139090 
156525cf1a30Sjl139090 	for (d = 0; d < devnum; d++) {
156625cf1a30Sjl139090 		dr_mem_unit_t	*mp = (dr_mem_unit_t *)devlist[d];
156725cf1a30Sjl139090 		int		rv;
156825cf1a30Sjl139090 		memquery_t	mq;
156925cf1a30Sjl139090 		struct memlist	*ml;
157025cf1a30Sjl139090 
157125cf1a30Sjl139090 		if (mp->sbm_cm.sbdev_error) {
157225cf1a30Sjl139090 			err_flag = 1;
157325cf1a30Sjl139090 			continue;
157425cf1a30Sjl139090 		} else if (!kcage_on) {
157525cf1a30Sjl139090 			dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_KCAGE_OFF);
157625cf1a30Sjl139090 			err_flag = 1;
157725cf1a30Sjl139090 			continue;
157825cf1a30Sjl139090 		}
157925cf1a30Sjl139090 
158025cf1a30Sjl139090 		if (mp->sbm_flags & DR_MFLAG_RESERVED) {
158125cf1a30Sjl139090 			/*
158225cf1a30Sjl139090 			 * Board is currently involved in a delete
158325cf1a30Sjl139090 			 * memory operation. Can't detach this guy until
158425cf1a30Sjl139090 			 * that operation completes.
158525cf1a30Sjl139090 			 */
158625cf1a30Sjl139090 			dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_INVAL);
158725cf1a30Sjl139090 			err_flag = 1;
158825cf1a30Sjl139090 			break;
158925cf1a30Sjl139090 		}
159025cf1a30Sjl139090 
159125cf1a30Sjl139090 		/* flags should be clean at this time */
159225cf1a30Sjl139090 		ASSERT(mp->sbm_flags == 0);
159325cf1a30Sjl139090 
159425cf1a30Sjl139090 		ASSERT(mp->sbm_mlist == NULL);
159525cf1a30Sjl139090 		ASSERT(mp->sbm_del_mlist == NULL);
159625cf1a30Sjl139090 		if (mp->sbm_mlist != NULL) {
159725cf1a30Sjl139090 			memlist_delete(mp->sbm_mlist);
159825cf1a30Sjl139090 			mp->sbm_mlist = NULL;
159925cf1a30Sjl139090 		}
160025cf1a30Sjl139090 
160125cf1a30Sjl139090 		ml = dr_get_memlist(mp);
160225cf1a30Sjl139090 		if (ml == NULL) {
160325cf1a30Sjl139090 			err_flag = 1;
160425cf1a30Sjl139090 			PR_MEM("%s: no memlist found for %s\n",
160525cf1a30Sjl139090 			    f, mp->sbm_cm.sbdev_path);
160625cf1a30Sjl139090 			continue;
160725cf1a30Sjl139090 		}
160825cf1a30Sjl139090 
160925cf1a30Sjl139090 		/*
161025cf1a30Sjl139090 		 * Check whether the detaching memory requires a
161125cf1a30Sjl139090 		 * copy-rename.
161225cf1a30Sjl139090 		 */
161325cf1a30Sjl139090 		ASSERT(mp->sbm_npages != 0);
161468ac2337Sjl139090 
161525cf1a30Sjl139090 		rv = dr_del_mlist_query(ml, &mq);
161625cf1a30Sjl139090 		if (rv != KPHYSM_OK) {
161725cf1a30Sjl139090 			memlist_delete(ml);
161825cf1a30Sjl139090 			DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
161925cf1a30Sjl139090 			err_flag = 1;
162025cf1a30Sjl139090 			break;
162125cf1a30Sjl139090 		}
162225cf1a30Sjl139090 
162325cf1a30Sjl139090 		if (mq.nonrelocatable != 0) {
162425cf1a30Sjl139090 			if (!(dr_cmd_flags(hp) &
162525cf1a30Sjl139090 			    (SBD_FLAG_FORCE | SBD_FLAG_QUIESCE_OKAY))) {
162625cf1a30Sjl139090 				memlist_delete(ml);
162725cf1a30Sjl139090 				/* caller wasn't prompted for a suspend */
162825cf1a30Sjl139090 				dr_dev_err(CE_WARN, &mp->sbm_cm,
162925cf1a30Sjl139090 				    ESBD_QUIESCE_REQD);
163025cf1a30Sjl139090 				err_flag = 1;
163125cf1a30Sjl139090 				break;
163225cf1a30Sjl139090 			}
163325cf1a30Sjl139090 		}
163425cf1a30Sjl139090 
163525cf1a30Sjl139090 		/* allocate a kphysm handle */
163625cf1a30Sjl139090 		rv = kphysm_del_gethandle(&mp->sbm_memhandle);
163725cf1a30Sjl139090 		if (rv != KPHYSM_OK) {
163825cf1a30Sjl139090 			memlist_delete(ml);
163925cf1a30Sjl139090 
164025cf1a30Sjl139090 			DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
164125cf1a30Sjl139090 			err_flag = 1;
164225cf1a30Sjl139090 			break;
164325cf1a30Sjl139090 		}
164425cf1a30Sjl139090 		mp->sbm_flags |= DR_MFLAG_RELOWNER;
164525cf1a30Sjl139090 
164625cf1a30Sjl139090 		if ((mq.nonrelocatable != 0) ||
164725cf1a30Sjl139090 		    dr_reserve_mem_spans(&mp->sbm_memhandle, ml)) {
164825cf1a30Sjl139090 			/*
164925cf1a30Sjl139090 			 * Either the detaching memory node contains
165025cf1a30Sjl139090 			 * non-reloc memory or we failed to reserve the
165125cf1a30Sjl139090 			 * detaching memory node (which did _not_ have
165225cf1a30Sjl139090 			 * any non-reloc memory, i.e. some non-reloc mem
165325cf1a30Sjl139090 			 * got onboard).
165425cf1a30Sjl139090 			 */
165525cf1a30Sjl139090 
165625cf1a30Sjl139090 			if (dr_select_mem_target(hp, mp, ml)) {
165725cf1a30Sjl139090 				int rv;
165825cf1a30Sjl139090 
165925cf1a30Sjl139090 				/*
166025cf1a30Sjl139090 				 * We had no luck locating a target
166125cf1a30Sjl139090 				 * memory node to be the recipient of
166225cf1a30Sjl139090 				 * the non-reloc memory on the node
166325cf1a30Sjl139090 				 * we're trying to detach.
166425cf1a30Sjl139090 				 * Clean up be disposing the mem handle
166525cf1a30Sjl139090 				 * and the mem list.
166625cf1a30Sjl139090 				 */
166725cf1a30Sjl139090 				rv = kphysm_del_release(mp->sbm_memhandle);
166825cf1a30Sjl139090 				if (rv != KPHYSM_OK) {
166925cf1a30Sjl139090 					/*
167025cf1a30Sjl139090 					 * can do nothing but complain
167125cf1a30Sjl139090 					 * and hope helpful for debug
167225cf1a30Sjl139090 					 */
167325cf1a30Sjl139090 					cmn_err(CE_WARN, "%s: unexpected"
167425cf1a30Sjl139090 					    " kphysm_del_release return"
167525cf1a30Sjl139090 					    " value %d",
167625cf1a30Sjl139090 					    f, rv);
167725cf1a30Sjl139090 				}
167825cf1a30Sjl139090 				mp->sbm_flags &= ~DR_MFLAG_RELOWNER;
167925cf1a30Sjl139090 
168025cf1a30Sjl139090 				memlist_delete(ml);
168125cf1a30Sjl139090 
168225cf1a30Sjl139090 				/* make sure sbm_flags is clean */
168325cf1a30Sjl139090 				ASSERT(mp->sbm_flags == 0);
168425cf1a30Sjl139090 
168525cf1a30Sjl139090 				dr_dev_err(CE_WARN,
168625cf1a30Sjl139090 				    &mp->sbm_cm, ESBD_NO_TARGET);
168725cf1a30Sjl139090 
168825cf1a30Sjl139090 				err_flag = 1;
168925cf1a30Sjl139090 				break;
169025cf1a30Sjl139090 			}
169125cf1a30Sjl139090 
169225cf1a30Sjl139090 			/*
169325cf1a30Sjl139090 			 * ml is not memlist_delete'd here because
169425cf1a30Sjl139090 			 * it has been assigned to mp->sbm_mlist
169525cf1a30Sjl139090 			 * by dr_select_mem_target.
169625cf1a30Sjl139090 			 */
169725cf1a30Sjl139090 		} else {
169825cf1a30Sjl139090 			/* no target needed to detach this board */
169925cf1a30Sjl139090 			mp->sbm_flags |= DR_MFLAG_RESERVED;
170025cf1a30Sjl139090 			mp->sbm_peer = NULL;
170125cf1a30Sjl139090 			mp->sbm_del_mlist = ml;
170225cf1a30Sjl139090 			mp->sbm_mlist = ml;
170325cf1a30Sjl139090 			mp->sbm_cm.sbdev_busy = 1;
170425cf1a30Sjl139090 		}
170525cf1a30Sjl139090 #ifdef DEBUG
170625cf1a30Sjl139090 		ASSERT(mp->sbm_mlist != NULL);
170725cf1a30Sjl139090 
170825cf1a30Sjl139090 		if (mp->sbm_flags & DR_MFLAG_SOURCE) {
170925cf1a30Sjl139090 			PR_MEM("%s: release of %s requires copy/rename;"
171025cf1a30Sjl139090 			    " selected target board %s\n",
171125cf1a30Sjl139090 			    f,
171225cf1a30Sjl139090 			    mp->sbm_cm.sbdev_path,
171325cf1a30Sjl139090 			    mp->sbm_peer->sbm_cm.sbdev_path);
171425cf1a30Sjl139090 		} else {
171525cf1a30Sjl139090 			PR_MEM("%s: copy/rename not required to release %s\n",
171625cf1a30Sjl139090 			    f, mp->sbm_cm.sbdev_path);
171725cf1a30Sjl139090 		}
171825cf1a30Sjl139090 
171925cf1a30Sjl139090 		ASSERT(mp->sbm_flags & DR_MFLAG_RELOWNER);
172025cf1a30Sjl139090 		ASSERT(mp->sbm_flags & DR_MFLAG_RESERVED);
172125cf1a30Sjl139090 #endif
172225cf1a30Sjl139090 	}
172325cf1a30Sjl139090 
172425cf1a30Sjl139090 	return (err_flag ? -1 : 0);
172525cf1a30Sjl139090 }
172625cf1a30Sjl139090 
172725cf1a30Sjl139090 void
dr_release_mem_done(dr_common_unit_t * cp)172825cf1a30Sjl139090 dr_release_mem_done(dr_common_unit_t *cp)
172925cf1a30Sjl139090 {
173025cf1a30Sjl139090 	dr_mem_unit_t	*s_mp = (dr_mem_unit_t *)cp;
173125cf1a30Sjl139090 	dr_mem_unit_t *t_mp, *mp;
173225cf1a30Sjl139090 	int		rv;
173325cf1a30Sjl139090 	static fn_t	f = "dr_release_mem_done";
173425cf1a30Sjl139090 
173525cf1a30Sjl139090 	/*
173625cf1a30Sjl139090 	 * This unit will be flagged with DR_MFLAG_SOURCE, if it
173725cf1a30Sjl139090 	 * has a target unit.
173825cf1a30Sjl139090 	 */
173925cf1a30Sjl139090 	if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
174025cf1a30Sjl139090 		t_mp = s_mp->sbm_peer;
174125cf1a30Sjl139090 		ASSERT(t_mp != NULL);
174225cf1a30Sjl139090 		ASSERT(t_mp->sbm_peer == s_mp);
174325cf1a30Sjl139090 		ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET);
174425cf1a30Sjl139090 		ASSERT(t_mp->sbm_flags & DR_MFLAG_RESERVED);
174525cf1a30Sjl139090 	} else {
174625cf1a30Sjl139090 		/* this is no target unit */
174725cf1a30Sjl139090 		t_mp = NULL;
174825cf1a30Sjl139090 	}
174925cf1a30Sjl139090 
175025cf1a30Sjl139090 	/* free delete handle */
175125cf1a30Sjl139090 	ASSERT(s_mp->sbm_flags & DR_MFLAG_RELOWNER);
175225cf1a30Sjl139090 	ASSERT(s_mp->sbm_flags & DR_MFLAG_RESERVED);
175325cf1a30Sjl139090 	rv = kphysm_del_release(s_mp->sbm_memhandle);
175425cf1a30Sjl139090 	if (rv != KPHYSM_OK) {
175525cf1a30Sjl139090 		/*
175625cf1a30Sjl139090 		 * can do nothing but complain
175725cf1a30Sjl139090 		 * and hope helpful for debug
175825cf1a30Sjl139090 		 */
175925cf1a30Sjl139090 		cmn_err(CE_WARN, "%s: unexpected kphysm_del_release"
176025cf1a30Sjl139090 		    " return value %d", f, rv);
176125cf1a30Sjl139090 	}
176225cf1a30Sjl139090 	s_mp->sbm_flags &= ~DR_MFLAG_RELOWNER;
176325cf1a30Sjl139090 
176425cf1a30Sjl139090 	/*
176525cf1a30Sjl139090 	 * If an error was encountered during release, clean up
176625cf1a30Sjl139090 	 * the source (and target, if present) unit data.
176725cf1a30Sjl139090 	 */
176825cf1a30Sjl139090 /* XXX Can we know that sbdev_error was encountered during release? */
176925cf1a30Sjl139090 	if (s_mp->sbm_cm.sbdev_error != NULL) {
177025cf1a30Sjl139090 
177125cf1a30Sjl139090 		if (t_mp != NULL) {
177225cf1a30Sjl139090 			ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
177325cf1a30Sjl139090 			t_mp->sbm_del_mlist = NULL;
177425cf1a30Sjl139090 
177525cf1a30Sjl139090 			if (t_mp->sbm_mlist != NULL) {
177625cf1a30Sjl139090 				memlist_delete(t_mp->sbm_mlist);
177725cf1a30Sjl139090 				t_mp->sbm_mlist = NULL;
177825cf1a30Sjl139090 			}
177925cf1a30Sjl139090 
178025cf1a30Sjl139090 			t_mp->sbm_peer = NULL;
178125cf1a30Sjl139090 			t_mp->sbm_flags = 0;
178225cf1a30Sjl139090 			t_mp->sbm_cm.sbdev_busy = 0;
178325cf1a30Sjl139090 		}
178425cf1a30Sjl139090 
178525cf1a30Sjl139090 		if (s_mp->sbm_del_mlist != s_mp->sbm_mlist)
178625cf1a30Sjl139090 			memlist_delete(s_mp->sbm_del_mlist);
178725cf1a30Sjl139090 		s_mp->sbm_del_mlist = NULL;
178825cf1a30Sjl139090 
178925cf1a30Sjl139090 		if (s_mp->sbm_mlist != NULL) {
179025cf1a30Sjl139090 			memlist_delete(s_mp->sbm_mlist);
179125cf1a30Sjl139090 			s_mp->sbm_mlist = NULL;
179225cf1a30Sjl139090 		}
179325cf1a30Sjl139090 
179425cf1a30Sjl139090 		s_mp->sbm_peer = NULL;
179525cf1a30Sjl139090 		s_mp->sbm_flags = 0;
179625cf1a30Sjl139090 		s_mp->sbm_cm.sbdev_busy = 0;
179725cf1a30Sjl139090 
179825cf1a30Sjl139090 		/* bail out */
179925cf1a30Sjl139090 		return;
180025cf1a30Sjl139090 	}
180125cf1a30Sjl139090 
180225cf1a30Sjl139090 	DR_DEV_SET_RELEASED(&s_mp->sbm_cm);
180325cf1a30Sjl139090 	dr_device_transition(&s_mp->sbm_cm, DR_STATE_RELEASE);
180425cf1a30Sjl139090 
180525cf1a30Sjl139090 	if (t_mp != NULL) {
180625cf1a30Sjl139090 		/*
180725cf1a30Sjl139090 		 * the kphysm delete operation that drained the source
180825cf1a30Sjl139090 		 * board also drained this target board.  Since the source
180925cf1a30Sjl139090 		 * board drain is now known to have succeeded, we know this
181025cf1a30Sjl139090 		 * target board is drained too.
181125cf1a30Sjl139090 		 *
181225cf1a30Sjl139090 		 * because DR_DEV_SET_RELEASED and dr_device_transition
181325cf1a30Sjl139090 		 * is done here, the dr_release_dev_done should not
181425cf1a30Sjl139090 		 * fail.
181525cf1a30Sjl139090 		 */
181625cf1a30Sjl139090 		DR_DEV_SET_RELEASED(&t_mp->sbm_cm);
181725cf1a30Sjl139090 		dr_device_transition(&t_mp->sbm_cm, DR_STATE_RELEASE);
181825cf1a30Sjl139090 
181925cf1a30Sjl139090 		/*
182025cf1a30Sjl139090 		 * NOTE: do not transition target's board state,
182125cf1a30Sjl139090 		 * even if the mem-unit was the last configure
182225cf1a30Sjl139090 		 * unit of the board.  When copy/rename completes
182325cf1a30Sjl139090 		 * this mem-unit will transitioned back to
182425cf1a30Sjl139090 		 * the configured state.  In the meantime, the
182525cf1a30Sjl139090 		 * board's must remain as is.
182625cf1a30Sjl139090 		 */
182725cf1a30Sjl139090 	}
182825cf1a30Sjl139090 
182925cf1a30Sjl139090 	/* if board(s) had deleted memory, verify it is gone */
183025cf1a30Sjl139090 	rv = 0;
183125cf1a30Sjl139090 	memlist_read_lock();
183225cf1a30Sjl139090 	if (s_mp->sbm_del_mlist != NULL) {
183325cf1a30Sjl139090 		mp = s_mp;
183425cf1a30Sjl139090 		rv = memlist_intersect(phys_install, mp->sbm_del_mlist);
183525cf1a30Sjl139090 	}
183625cf1a30Sjl139090 	if (rv == 0 && t_mp && t_mp->sbm_del_mlist != NULL) {
183725cf1a30Sjl139090 		mp = t_mp;
183825cf1a30Sjl139090 		rv = memlist_intersect(phys_install, mp->sbm_del_mlist);
183925cf1a30Sjl139090 	}
184025cf1a30Sjl139090 	memlist_read_unlock();
184125cf1a30Sjl139090 	if (rv) {
184225cf1a30Sjl139090 		cmn_err(CE_WARN, "%s: %smem-unit (%d.%d): "
184325cf1a30Sjl139090 		    "deleted memory still found in phys_install",
184425cf1a30Sjl139090 		    f,
184525cf1a30Sjl139090 		    (mp == t_mp ? "target " : ""),
184625cf1a30Sjl139090 		    mp->sbm_cm.sbdev_bp->b_num,
184725cf1a30Sjl139090 		    mp->sbm_cm.sbdev_unum);
184825cf1a30Sjl139090 
184925cf1a30Sjl139090 		DR_DEV_INTERNAL_ERROR(&s_mp->sbm_cm);
185025cf1a30Sjl139090 		return;
185125cf1a30Sjl139090 	}
185225cf1a30Sjl139090 
185325cf1a30Sjl139090 	s_mp->sbm_flags |= DR_MFLAG_RELDONE;
185425cf1a30Sjl139090 	if (t_mp != NULL)
185525cf1a30Sjl139090 		t_mp->sbm_flags |= DR_MFLAG_RELDONE;
185625cf1a30Sjl139090 
185725cf1a30Sjl139090 	/* this should not fail */
185825cf1a30Sjl139090 	if (dr_release_dev_done(&s_mp->sbm_cm) != 0) {
185925cf1a30Sjl139090 		/* catch this in debug kernels */
186025cf1a30Sjl139090 		ASSERT(0);
186125cf1a30Sjl139090 		return;
186225cf1a30Sjl139090 	}
186325cf1a30Sjl139090 
186425cf1a30Sjl139090 	PR_MEM("%s: marking %s release DONE\n",
186525cf1a30Sjl139090 	    f, s_mp->sbm_cm.sbdev_path);
186625cf1a30Sjl139090 
186725cf1a30Sjl139090 	s_mp->sbm_cm.sbdev_ostate = SBD_STAT_UNCONFIGURED;
186825cf1a30Sjl139090 
186925cf1a30Sjl139090 	if (t_mp != NULL) {
187025cf1a30Sjl139090 		/* should not fail */
187125cf1a30Sjl139090 		rv = dr_release_dev_done(&t_mp->sbm_cm);
187225cf1a30Sjl139090 		if (rv != 0) {
187325cf1a30Sjl139090 			/* catch this in debug kernels */
187425cf1a30Sjl139090 			ASSERT(0);
187525cf1a30Sjl139090 			return;
187625cf1a30Sjl139090 		}
187725cf1a30Sjl139090 
187825cf1a30Sjl139090 		PR_MEM("%s: marking %s release DONE\n",
187925cf1a30Sjl139090 		    f, t_mp->sbm_cm.sbdev_path);
188025cf1a30Sjl139090 
188125cf1a30Sjl139090 		t_mp->sbm_cm.sbdev_ostate = SBD_STAT_UNCONFIGURED;
188225cf1a30Sjl139090 	}
188325cf1a30Sjl139090 }
188425cf1a30Sjl139090 
188525cf1a30Sjl139090 /*ARGSUSED*/
188625cf1a30Sjl139090 int
dr_disconnect_mem(dr_mem_unit_t * mp)188725cf1a30Sjl139090 dr_disconnect_mem(dr_mem_unit_t *mp)
188825cf1a30Sjl139090 {
188925cf1a30Sjl139090 	static fn_t	f = "dr_disconnect_mem";
189025cf1a30Sjl139090 	update_membounds_t umb;
189125cf1a30Sjl139090 
189225cf1a30Sjl139090 #ifdef DEBUG
189325cf1a30Sjl139090 	int state = mp->sbm_cm.sbdev_state;
189425cf1a30Sjl139090 	ASSERT(state == DR_STATE_CONNECTED ||
189525cf1a30Sjl139090 	    state == DR_STATE_UNCONFIGURED);
189625cf1a30Sjl139090 #endif
189725cf1a30Sjl139090 
189825cf1a30Sjl139090 	PR_MEM("%s...\n", f);
189925cf1a30Sjl139090 
190025cf1a30Sjl139090 	if (mp->sbm_del_mlist && mp->sbm_del_mlist != mp->sbm_mlist)
190125cf1a30Sjl139090 		memlist_delete(mp->sbm_del_mlist);
190225cf1a30Sjl139090 	mp->sbm_del_mlist = NULL;
190325cf1a30Sjl139090 
190425cf1a30Sjl139090 	if (mp->sbm_mlist) {
190525cf1a30Sjl139090 		memlist_delete(mp->sbm_mlist);
190625cf1a30Sjl139090 		mp->sbm_mlist = NULL;
190725cf1a30Sjl139090 	}
190825cf1a30Sjl139090 
190925cf1a30Sjl139090 	/*
191025cf1a30Sjl139090 	 * Remove memory from lgroup
191125cf1a30Sjl139090 	 * For now, only board info is required.
191225cf1a30Sjl139090 	 */
191325cf1a30Sjl139090 	umb.u_board = mp->sbm_cm.sbdev_bp->b_num;
191425cf1a30Sjl139090 	umb.u_base = (uint64_t)-1;
191525cf1a30Sjl139090 	umb.u_len = (uint64_t)-1;
191625cf1a30Sjl139090 
191725cf1a30Sjl139090 	lgrp_plat_config(LGRP_CONFIG_MEM_DEL, (uintptr_t)&umb);
191825cf1a30Sjl139090 
191925cf1a30Sjl139090 	return (0);
192025cf1a30Sjl139090 }
192125cf1a30Sjl139090 
192225cf1a30Sjl139090 int
dr_cancel_mem(dr_mem_unit_t * s_mp)192325cf1a30Sjl139090 dr_cancel_mem(dr_mem_unit_t *s_mp)
192425cf1a30Sjl139090 {
192525cf1a30Sjl139090 	dr_mem_unit_t	*t_mp;
192625cf1a30Sjl139090 	dr_state_t	state;
192725cf1a30Sjl139090 	static fn_t	f = "dr_cancel_mem";
192825cf1a30Sjl139090 
192925cf1a30Sjl139090 	state = s_mp->sbm_cm.sbdev_state;
193025cf1a30Sjl139090 
193125cf1a30Sjl139090 	if (s_mp->sbm_flags & DR_MFLAG_TARGET) {
193225cf1a30Sjl139090 		/* must cancel source board, not target board */
193325cf1a30Sjl139090 		/* TODO: set error */
193425cf1a30Sjl139090 		return (-1);
193525cf1a30Sjl139090 	} else if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
193625cf1a30Sjl139090 		t_mp = s_mp->sbm_peer;
193725cf1a30Sjl139090 		ASSERT(t_mp != NULL);
193825cf1a30Sjl139090 		ASSERT(t_mp->sbm_peer == s_mp);
193925cf1a30Sjl139090 
194025cf1a30Sjl139090 		/* must always match the source board's state */
194125cf1a30Sjl139090 		/* TODO: is this assertion correct? */
194225cf1a30Sjl139090 		ASSERT(t_mp->sbm_cm.sbdev_state == state);
194325cf1a30Sjl139090 	} else {
194425cf1a30Sjl139090 		/* this is no target unit */
194525cf1a30Sjl139090 		t_mp = NULL;
194625cf1a30Sjl139090 	}
194725cf1a30Sjl139090 
194825cf1a30Sjl139090 	switch (state) {
194925cf1a30Sjl139090 	case DR_STATE_UNREFERENCED:	/* state set by dr_release_dev_done */
195025cf1a30Sjl139090 		ASSERT((s_mp->sbm_flags & DR_MFLAG_RELOWNER) == 0);
195125cf1a30Sjl139090 
195225cf1a30Sjl139090 		if (t_mp != NULL && t_mp->sbm_del_mlist != NULL) {
195325cf1a30Sjl139090 			PR_MEM("%s: undoing target %s memory delete\n",
195425cf1a30Sjl139090 			    f, t_mp->sbm_cm.sbdev_path);
195525cf1a30Sjl139090 			dr_add_memory_spans(t_mp, t_mp->sbm_del_mlist);
195625cf1a30Sjl139090 
195725cf1a30Sjl139090 			DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm);
195825cf1a30Sjl139090 		}
195925cf1a30Sjl139090 
196025cf1a30Sjl139090 		if (s_mp->sbm_del_mlist != NULL) {
196125cf1a30Sjl139090 			PR_MEM("%s: undoing %s memory delete\n",
196225cf1a30Sjl139090 			    f, s_mp->sbm_cm.sbdev_path);
196325cf1a30Sjl139090 
196425cf1a30Sjl139090 			dr_add_memory_spans(s_mp, s_mp->sbm_del_mlist);
196525cf1a30Sjl139090 		}
196625cf1a30Sjl139090 
196725cf1a30Sjl139090 		/*FALLTHROUGH*/
196825cf1a30Sjl139090 
196925cf1a30Sjl139090 /* TODO: should no longer be possible to see the release state here */
197025cf1a30Sjl139090 	case DR_STATE_RELEASE:	/* state set by dr_release_mem_done */
197125cf1a30Sjl139090 
197225cf1a30Sjl139090 		ASSERT((s_mp->sbm_flags & DR_MFLAG_RELOWNER) == 0);
197325cf1a30Sjl139090 
197425cf1a30Sjl139090 		if (t_mp != NULL) {
197525cf1a30Sjl139090 			ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
197625cf1a30Sjl139090 			t_mp->sbm_del_mlist = NULL;
197725cf1a30Sjl139090 
197825cf1a30Sjl139090 			if (t_mp->sbm_mlist != NULL) {
197925cf1a30Sjl139090 				memlist_delete(t_mp->sbm_mlist);
198025cf1a30Sjl139090 				t_mp->sbm_mlist = NULL;
198125cf1a30Sjl139090 			}
198225cf1a30Sjl139090 
198325cf1a30Sjl139090 			t_mp->sbm_peer = NULL;
198425cf1a30Sjl139090 			t_mp->sbm_flags = 0;
198525cf1a30Sjl139090 			t_mp->sbm_cm.sbdev_busy = 0;
198625cf1a30Sjl139090 			dr_init_mem_unit_data(t_mp);
198725cf1a30Sjl139090 
198825cf1a30Sjl139090 			DR_DEV_CLR_RELEASED(&t_mp->sbm_cm);
198925cf1a30Sjl139090 
199025cf1a30Sjl139090 			dr_device_transition(
199125cf1a30Sjl139090 			    &t_mp->sbm_cm, DR_STATE_CONFIGURED);
199225cf1a30Sjl139090 		}
199325cf1a30Sjl139090 
199425cf1a30Sjl139090 		if (s_mp->sbm_del_mlist != s_mp->sbm_mlist)
199525cf1a30Sjl139090 			memlist_delete(s_mp->sbm_del_mlist);
199625cf1a30Sjl139090 		s_mp->sbm_del_mlist = NULL;
199725cf1a30Sjl139090 
199825cf1a30Sjl139090 		if (s_mp->sbm_mlist != NULL) {
199925cf1a30Sjl139090 			memlist_delete(s_mp->sbm_mlist);
200025cf1a30Sjl139090 			s_mp->sbm_mlist = NULL;
200125cf1a30Sjl139090 		}
200225cf1a30Sjl139090 
200325cf1a30Sjl139090 		s_mp->sbm_peer = NULL;
200425cf1a30Sjl139090 		s_mp->sbm_flags = 0;
200525cf1a30Sjl139090 		s_mp->sbm_cm.sbdev_busy = 0;
200625cf1a30Sjl139090 		dr_init_mem_unit_data(s_mp);
200725cf1a30Sjl139090 
200825cf1a30Sjl139090 		return (0);
200925cf1a30Sjl139090 
201025cf1a30Sjl139090 	default:
201125cf1a30Sjl139090 		PR_MEM("%s: WARNING unexpected state (%d) for %s\n",
201225cf1a30Sjl139090 		    f, (int)state, s_mp->sbm_cm.sbdev_path);
201325cf1a30Sjl139090 
201425cf1a30Sjl139090 		return (-1);
201525cf1a30Sjl139090 	}
201625cf1a30Sjl139090 	/*NOTREACHED*/
201725cf1a30Sjl139090 }
201825cf1a30Sjl139090 
201925cf1a30Sjl139090 void
dr_init_mem_unit(dr_mem_unit_t * mp)202025cf1a30Sjl139090 dr_init_mem_unit(dr_mem_unit_t *mp)
202125cf1a30Sjl139090 {
202225cf1a30Sjl139090 	dr_state_t	new_state;
202325cf1a30Sjl139090 
202425cf1a30Sjl139090 
202525cf1a30Sjl139090 	if (DR_DEV_IS_ATTACHED(&mp->sbm_cm)) {
202625cf1a30Sjl139090 		new_state = DR_STATE_CONFIGURED;
202725cf1a30Sjl139090 		mp->sbm_cm.sbdev_cond = SBD_COND_OK;
202825cf1a30Sjl139090 	} else if (DR_DEV_IS_PRESENT(&mp->sbm_cm)) {
202925cf1a30Sjl139090 		new_state = DR_STATE_CONNECTED;
203025cf1a30Sjl139090 		mp->sbm_cm.sbdev_cond = SBD_COND_OK;
203125cf1a30Sjl139090 	} else if (mp->sbm_cm.sbdev_id != (drmachid_t)0) {
203225cf1a30Sjl139090 		new_state = DR_STATE_OCCUPIED;
203325cf1a30Sjl139090 	} else {
203425cf1a30Sjl139090 		new_state = DR_STATE_EMPTY;
203525cf1a30Sjl139090 	}
203625cf1a30Sjl139090 
203725cf1a30Sjl139090 	if (DR_DEV_IS_PRESENT(&mp->sbm_cm))
203825cf1a30Sjl139090 		dr_init_mem_unit_data(mp);
203925cf1a30Sjl139090 
204025cf1a30Sjl139090 	/* delay transition until fully initialized */
204125cf1a30Sjl139090 	dr_device_transition(&mp->sbm_cm, new_state);
204225cf1a30Sjl139090 }
204325cf1a30Sjl139090 
204425cf1a30Sjl139090 static void
dr_init_mem_unit_data(dr_mem_unit_t * mp)204525cf1a30Sjl139090 dr_init_mem_unit_data(dr_mem_unit_t *mp)
204625cf1a30Sjl139090 {
204725cf1a30Sjl139090 	drmachid_t	id = mp->sbm_cm.sbdev_id;
204825cf1a30Sjl139090 	drmach_mem_info_t	minfo;
204925cf1a30Sjl139090 	sbd_error_t	*err;
205025cf1a30Sjl139090 	static fn_t	f = "dr_init_mem_unit_data";
205125cf1a30Sjl139090 	update_membounds_t umb;
205225cf1a30Sjl139090 
205325cf1a30Sjl139090 	PR_MEM("%s...\n", f);
205425cf1a30Sjl139090 
205525cf1a30Sjl139090 	/* a little sanity checking */
205625cf1a30Sjl139090 	ASSERT(mp->sbm_peer == NULL);
205725cf1a30Sjl139090 	ASSERT(mp->sbm_flags == 0);
205825cf1a30Sjl139090 
205925cf1a30Sjl139090 	if (err = drmach_mem_get_info(id, &minfo)) {
206025cf1a30Sjl139090 		DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
206125cf1a30Sjl139090 		return;
206225cf1a30Sjl139090 	}
206325cf1a30Sjl139090 	mp->sbm_basepfn = _b64top(minfo.mi_basepa);
206425cf1a30Sjl139090 	mp->sbm_npages = _b64top(minfo.mi_size);
206525cf1a30Sjl139090 	mp->sbm_alignment_mask = _b64top(minfo.mi_alignment_mask);
206625cf1a30Sjl139090 	mp->sbm_slice_size = minfo.mi_slice_size;
206725cf1a30Sjl139090 
206825cf1a30Sjl139090 	/*
206925cf1a30Sjl139090 	 * Add memory to lgroup
207025cf1a30Sjl139090 	 */
207125cf1a30Sjl139090 	umb.u_board = mp->sbm_cm.sbdev_bp->b_num;
207225cf1a30Sjl139090 	umb.u_base = (uint64_t)mp->sbm_basepfn << MMU_PAGESHIFT;
207325cf1a30Sjl139090 	umb.u_len = (uint64_t)mp->sbm_npages << MMU_PAGESHIFT;
207425cf1a30Sjl139090 
207525cf1a30Sjl139090 	lgrp_plat_config(LGRP_CONFIG_MEM_ADD, (uintptr_t)&umb);
207625cf1a30Sjl139090 
207725cf1a30Sjl139090 	PR_MEM("%s: %s (basepfn = 0x%lx, npgs = %ld)\n",
207825cf1a30Sjl139090 	    f, mp->sbm_cm.sbdev_path, mp->sbm_basepfn, mp->sbm_npages);
207925cf1a30Sjl139090 }
208025cf1a30Sjl139090 
208125cf1a30Sjl139090 static int
dr_reserve_mem_spans(memhandle_t * mhp,struct memlist * ml)208225cf1a30Sjl139090 dr_reserve_mem_spans(memhandle_t *mhp, struct memlist *ml)
208325cf1a30Sjl139090 {
208425cf1a30Sjl139090 	int		err;
208525cf1a30Sjl139090 	pfn_t		base;
208625cf1a30Sjl139090 	pgcnt_t		npgs;
208725cf1a30Sjl139090 	struct memlist	*mc;
208825cf1a30Sjl139090 	static fn_t	f = "dr_reserve_mem_spans";
208925cf1a30Sjl139090 
209025cf1a30Sjl139090 	PR_MEM("%s...\n", f);
209125cf1a30Sjl139090 
209225cf1a30Sjl139090 	/*
209325cf1a30Sjl139090 	 * Walk the supplied memlist scheduling each span for removal
209425cf1a30Sjl139090 	 * with kphysm_del_span.  It is possible that a span may intersect
209525cf1a30Sjl139090 	 * an area occupied by the cage.
209625cf1a30Sjl139090 	 */
2097*56f33205SJonathan Adams 	for (mc = ml; mc != NULL; mc = mc->ml_next) {
2098*56f33205SJonathan Adams 		base = _b64top(mc->ml_address);
2099*56f33205SJonathan Adams 		npgs = _b64top(mc->ml_size);
210025cf1a30Sjl139090 
210125cf1a30Sjl139090 		err = kphysm_del_span(*mhp, base, npgs);
210225cf1a30Sjl139090 		if (err != KPHYSM_OK) {
210325cf1a30Sjl139090 			cmn_err(CE_WARN, "%s memory reserve failed."
210425cf1a30Sjl139090 			    " unexpected kphysm_del_span return value %d;"
210525cf1a30Sjl139090 			    " basepfn=0x%lx npages=%ld",
210625cf1a30Sjl139090 			    f, err, base, npgs);
210725cf1a30Sjl139090 
210825cf1a30Sjl139090 			return (-1);
210925cf1a30Sjl139090 		}
211025cf1a30Sjl139090 	}
211125cf1a30Sjl139090 
211225cf1a30Sjl139090 	return (0);
211325cf1a30Sjl139090 }
211425cf1a30Sjl139090 
211525cf1a30Sjl139090 #define	DR_SMT_NPREF_SETS	6
211625cf1a30Sjl139090 #define	DR_SMT_NUNITS_PER_SET	MAX_BOARDS * MAX_MEM_UNITS_PER_BOARD
211725cf1a30Sjl139090 
211825cf1a30Sjl139090 /* debug counters */
211925cf1a30Sjl139090 int dr_smt_realigned;
212025cf1a30Sjl139090 int dr_smt_preference[DR_SMT_NPREF_SETS];
212125cf1a30Sjl139090 
212225cf1a30Sjl139090 #ifdef DEBUG
212325cf1a30Sjl139090 uint_t dr_ignore_board; /* if bit[bnum-1] set, board won't be candidate */
212425cf1a30Sjl139090 #endif
212525cf1a30Sjl139090 
212625cf1a30Sjl139090 /*
212725cf1a30Sjl139090  * Find and reserve a copy/rename target board suitable for the
212825cf1a30Sjl139090  * given source board.
212925cf1a30Sjl139090  * All boards in the system are examined and categorized in relation to
213025cf1a30Sjl139090  * their memory size versus the source board's memory size.  Order of
213125cf1a30Sjl139090  * preference is:
213225cf1a30Sjl139090  *	1st copy all source, source/target same size
213325cf1a30Sjl139090  *	2nd copy all source, larger target
213425cf1a30Sjl139090  * 	3rd copy nonrelocatable source span
213525cf1a30Sjl139090  */
213625cf1a30Sjl139090 static int
dr_select_mem_target(dr_handle_t * hp,dr_mem_unit_t * s_mp,struct memlist * s_ml)213725cf1a30Sjl139090 dr_select_mem_target(dr_handle_t *hp,
213825cf1a30Sjl139090 	dr_mem_unit_t *s_mp, struct memlist *s_ml)
213925cf1a30Sjl139090 {
214025cf1a30Sjl139090 	dr_target_pref_t preference; /* lower value is higher preference */
214125cf1a30Sjl139090 	int		idx;
214225cf1a30Sjl139090 	dr_mem_unit_t	**sets;
214325cf1a30Sjl139090 
214425cf1a30Sjl139090 	int		t_bd;
214525cf1a30Sjl139090 	int		t_unit;
214625cf1a30Sjl139090 	int		rv;
214725cf1a30Sjl139090 	dr_board_t	*s_bp, *t_bp;
214825cf1a30Sjl139090 	dr_mem_unit_t	*t_mp, *c_mp;
214925cf1a30Sjl139090 	struct memlist	*d_ml, *t_ml, *ml, *b_ml, *x_ml = NULL;
215025cf1a30Sjl139090 	memquery_t	s_mq = {0};
215125cf1a30Sjl139090 	static fn_t	f = "dr_select_mem_target";
215225cf1a30Sjl139090 
215325cf1a30Sjl139090 	PR_MEM("%s...\n", f);
215425cf1a30Sjl139090 
215525cf1a30Sjl139090 	ASSERT(s_ml != NULL);
215625cf1a30Sjl139090 
215725cf1a30Sjl139090 	sets = GETSTRUCT(dr_mem_unit_t *, DR_SMT_NUNITS_PER_SET *
215825cf1a30Sjl139090 	    DR_SMT_NPREF_SETS);
215925cf1a30Sjl139090 
216025cf1a30Sjl139090 	s_bp = hp->h_bd;
216125cf1a30Sjl139090 	/* calculate the offset into the slice of the last source board pfn */
216225cf1a30Sjl139090 	ASSERT(s_mp->sbm_npages != 0);
216325cf1a30Sjl139090 
216425cf1a30Sjl139090 	/*
216525cf1a30Sjl139090 	 * Find non-relocatable span on source board.
216625cf1a30Sjl139090 	 */
216725cf1a30Sjl139090 	rv = kphysm_del_span_query(s_mp->sbm_basepfn, s_mp->sbm_npages, &s_mq);
216825cf1a30Sjl139090 	if (rv != KPHYSM_OK) {
216925cf1a30Sjl139090 		PR_MEM("%s: %s: unexpected kphysm_del_span_query"
217025cf1a30Sjl139090 		    " return value %d; basepfn 0x%lx, npages %ld\n",
217125cf1a30Sjl139090 		    f, s_mp->sbm_cm.sbdev_path, rv, s_mp->sbm_basepfn,
217225cf1a30Sjl139090 		    s_mp->sbm_npages);
217325cf1a30Sjl139090 		return (-1);
217425cf1a30Sjl139090 	}
217525cf1a30Sjl139090 
217625cf1a30Sjl139090 	ASSERT(s_mq.phys_pages != 0);
217725cf1a30Sjl139090 	ASSERT(s_mq.nonrelocatable != 0);
217825cf1a30Sjl139090 
217925cf1a30Sjl139090 	PR_MEM("%s: %s: nonrelocatable span (0x%lx..0x%lx)\n", f,
218025cf1a30Sjl139090 	    s_mp->sbm_cm.sbdev_path, s_mq.first_nonrelocatable,
218125cf1a30Sjl139090 	    s_mq.last_nonrelocatable);
218225cf1a30Sjl139090 
218325cf1a30Sjl139090 	/* break down s_ml if it contains dynamic segments */
218425cf1a30Sjl139090 	b_ml = memlist_dup(s_ml);
218525cf1a30Sjl139090 
2186*56f33205SJonathan Adams 	for (ml = s_mp->sbm_dyn_segs; ml; ml = ml->ml_next) {
2187*56f33205SJonathan Adams 		b_ml = memlist_del_span(b_ml, ml->ml_address, ml->ml_size);
2188*56f33205SJonathan Adams 		b_ml = memlist_cat_span(b_ml, ml->ml_address, ml->ml_size);
218925cf1a30Sjl139090 	}
219025cf1a30Sjl139090 
219125cf1a30Sjl139090 
219225cf1a30Sjl139090 	/*
219325cf1a30Sjl139090 	 * Make one pass through all memory units on all boards
219425cf1a30Sjl139090 	 * and categorize them with respect to the source board.
219525cf1a30Sjl139090 	 */
219625cf1a30Sjl139090 	for (t_bd = 0; t_bd < MAX_BOARDS; t_bd++) {
219725cf1a30Sjl139090 		/*
219825cf1a30Sjl139090 		 * The board structs are a contiguous array
219925cf1a30Sjl139090 		 * so we take advantage of that to find the
220025cf1a30Sjl139090 		 * correct board struct pointer for a given
220125cf1a30Sjl139090 		 * board number.
220225cf1a30Sjl139090 		 */
220325cf1a30Sjl139090 		t_bp = dr_lookup_board(t_bd);
220425cf1a30Sjl139090 
220525cf1a30Sjl139090 		/* source board can not be its own target */
220625cf1a30Sjl139090 		if (s_bp->b_num == t_bp->b_num)
220725cf1a30Sjl139090 			continue;
220825cf1a30Sjl139090 
220925cf1a30Sjl139090 		for (t_unit = 0; t_unit < MAX_MEM_UNITS_PER_BOARD; t_unit++) {
221025cf1a30Sjl139090 
221125cf1a30Sjl139090 			t_mp = dr_get_mem_unit(t_bp, t_unit);
221225cf1a30Sjl139090 
221325cf1a30Sjl139090 			/* this memory node must be attached */
221425cf1a30Sjl139090 			if (!DR_DEV_IS_ATTACHED(&t_mp->sbm_cm))
221525cf1a30Sjl139090 				continue;
221625cf1a30Sjl139090 
221725cf1a30Sjl139090 			/* source unit can not be its own target */
221825cf1a30Sjl139090 			if (s_mp == t_mp) {
221925cf1a30Sjl139090 				/* catch this is debug kernels */
222025cf1a30Sjl139090 				ASSERT(0);
222125cf1a30Sjl139090 				continue;
222225cf1a30Sjl139090 			}
222325cf1a30Sjl139090 
222425cf1a30Sjl139090 			/*
222525cf1a30Sjl139090 			 * this memory node must not already be reserved
222625cf1a30Sjl139090 			 * by some other memory delete operation.
222725cf1a30Sjl139090 			 */
222825cf1a30Sjl139090 			if (t_mp->sbm_flags & DR_MFLAG_RESERVED)
222925cf1a30Sjl139090 				continue;
223025cf1a30Sjl139090 
223125cf1a30Sjl139090 			/* get target board memlist */
223225cf1a30Sjl139090 			t_ml = dr_get_memlist(t_mp);
223325cf1a30Sjl139090 			if (t_ml == NULL) {
223425cf1a30Sjl139090 				cmn_err(CE_WARN, "%s: no memlist for"
223525cf1a30Sjl139090 				    " mem-unit %d, board %d", f,
223625cf1a30Sjl139090 				    t_mp->sbm_cm.sbdev_bp->b_num,
223725cf1a30Sjl139090 				    t_mp->sbm_cm.sbdev_unum);
223825cf1a30Sjl139090 				continue;
223925cf1a30Sjl139090 			}
224025cf1a30Sjl139090 
224125cf1a30Sjl139090 			preference = dr_get_target_preference(hp, t_mp, s_mp,
224225cf1a30Sjl139090 			    t_ml, s_ml, b_ml);
224325cf1a30Sjl139090 
22443103d4ceSjesusm 			memlist_delete(t_ml);
22453103d4ceSjesusm 
224625cf1a30Sjl139090 			if (preference == DR_TP_INVALID)
224725cf1a30Sjl139090 				continue;
224825cf1a30Sjl139090 
224925cf1a30Sjl139090 			dr_smt_preference[preference]++;
225025cf1a30Sjl139090 
225125cf1a30Sjl139090 			/* calculate index to start of preference set */
225225cf1a30Sjl139090 			idx  = DR_SMT_NUNITS_PER_SET * preference;
225325cf1a30Sjl139090 			/* calculate offset to respective element */
225425cf1a30Sjl139090 			idx += t_bd * MAX_MEM_UNITS_PER_BOARD + t_unit;
225525cf1a30Sjl139090 
225625cf1a30Sjl139090 			ASSERT(idx < DR_SMT_NUNITS_PER_SET * DR_SMT_NPREF_SETS);
225725cf1a30Sjl139090 			sets[idx] = t_mp;
225825cf1a30Sjl139090 		}
225925cf1a30Sjl139090 	}
226025cf1a30Sjl139090 
226125cf1a30Sjl139090 	if (b_ml != NULL)
226225cf1a30Sjl139090 		memlist_delete(b_ml);
226325cf1a30Sjl139090 
226425cf1a30Sjl139090 	/*
226525cf1a30Sjl139090 	 * NOTE: this would be a good place to sort each candidate
226625cf1a30Sjl139090 	 * set in to some desired order, e.g. memory size in ascending
226725cf1a30Sjl139090 	 * order.  Without an additional sorting step here, the order
226825cf1a30Sjl139090 	 * within a set is ascending board number order.
226925cf1a30Sjl139090 	 */
227025cf1a30Sjl139090 
227125cf1a30Sjl139090 	c_mp = NULL;
227225cf1a30Sjl139090 	x_ml = NULL;
227325cf1a30Sjl139090 	t_ml = NULL;
227425cf1a30Sjl139090 	for (idx = 0; idx < DR_SMT_NUNITS_PER_SET * DR_SMT_NPREF_SETS; idx++) {
227525cf1a30Sjl139090 		memquery_t mq;
227625cf1a30Sjl139090 
227725cf1a30Sjl139090 		preference = (dr_target_pref_t)(idx / DR_SMT_NUNITS_PER_SET);
227825cf1a30Sjl139090 
227925cf1a30Sjl139090 		ASSERT(preference != DR_TP_INVALID);
228025cf1a30Sjl139090 
228125cf1a30Sjl139090 		/* cleanup t_ml after previous pass */
228225cf1a30Sjl139090 		if (t_ml != NULL) {
228325cf1a30Sjl139090 			memlist_delete(t_ml);
228425cf1a30Sjl139090 			t_ml = NULL;
228525cf1a30Sjl139090 		}
228625cf1a30Sjl139090 
228725cf1a30Sjl139090 		/* get candidate target board mem unit */
228825cf1a30Sjl139090 		t_mp = sets[idx];
228925cf1a30Sjl139090 		if (t_mp == NULL)
229025cf1a30Sjl139090 			continue;
229125cf1a30Sjl139090 
229225cf1a30Sjl139090 		/* get target board memlist */
229325cf1a30Sjl139090 		t_ml = dr_get_memlist(t_mp);
229425cf1a30Sjl139090 		if (t_ml == NULL) {
229525cf1a30Sjl139090 			cmn_err(CE_WARN, "%s: no memlist for"
229625cf1a30Sjl139090 			    " mem-unit %d, board %d",
229725cf1a30Sjl139090 			    f,
229825cf1a30Sjl139090 			    t_mp->sbm_cm.sbdev_bp->b_num,
229925cf1a30Sjl139090 			    t_mp->sbm_cm.sbdev_unum);
230025cf1a30Sjl139090 
230125cf1a30Sjl139090 			continue;
230225cf1a30Sjl139090 		}
230325cf1a30Sjl139090 
230425cf1a30Sjl139090 		PR_MEM("%s: checking for no-reloc in %s, "
230525cf1a30Sjl139090 		    " basepfn=0x%lx, npages=%ld\n",
230625cf1a30Sjl139090 		    f,
230725cf1a30Sjl139090 		    t_mp->sbm_cm.sbdev_path,
230825cf1a30Sjl139090 		    t_mp->sbm_basepfn,
230925cf1a30Sjl139090 		    t_mp->sbm_npages);
231025cf1a30Sjl139090 
231125cf1a30Sjl139090 		rv = dr_del_mlist_query(t_ml, &mq);
231225cf1a30Sjl139090 		if (rv != KPHYSM_OK) {
231325cf1a30Sjl139090 			PR_MEM("%s: kphysm_del_span_query:"
231425cf1a30Sjl139090 			    " unexpected return value %d\n", f, rv);
231525cf1a30Sjl139090 
231625cf1a30Sjl139090 			continue;
231725cf1a30Sjl139090 		}
231825cf1a30Sjl139090 
231925cf1a30Sjl139090 		if (mq.nonrelocatable != 0) {
232025cf1a30Sjl139090 			PR_MEM("%s: candidate %s has"
232125cf1a30Sjl139090 			    " nonrelocatable span [0x%lx..0x%lx]\n",
232225cf1a30Sjl139090 			    f,
232325cf1a30Sjl139090 			    t_mp->sbm_cm.sbdev_path,
232425cf1a30Sjl139090 			    mq.first_nonrelocatable,
232525cf1a30Sjl139090 			    mq.last_nonrelocatable);
232625cf1a30Sjl139090 
232725cf1a30Sjl139090 			continue;
232825cf1a30Sjl139090 		}
232925cf1a30Sjl139090 
233025cf1a30Sjl139090 #ifdef DEBUG
233125cf1a30Sjl139090 		/*
233225cf1a30Sjl139090 		 * This is a debug tool for excluding certain boards
233325cf1a30Sjl139090 		 * from being selected as a target board candidate.
233425cf1a30Sjl139090 		 * dr_ignore_board is only tested by this driver.
233525cf1a30Sjl139090 		 * It must be set with adb, obp, /etc/system or your
233625cf1a30Sjl139090 		 * favorite debugger.
233725cf1a30Sjl139090 		 */
233825cf1a30Sjl139090 		if (dr_ignore_board &
233925cf1a30Sjl139090 		    (1 << (t_mp->sbm_cm.sbdev_bp->b_num - 1))) {
234025cf1a30Sjl139090 			PR_MEM("%s: dr_ignore_board flag set,"
234125cf1a30Sjl139090 			    " ignoring %s as candidate\n",
234225cf1a30Sjl139090 			    f, t_mp->sbm_cm.sbdev_path);
234325cf1a30Sjl139090 			continue;
234425cf1a30Sjl139090 		}
234525cf1a30Sjl139090 #endif
234625cf1a30Sjl139090 
234725cf1a30Sjl139090 		/*
234825cf1a30Sjl139090 		 * Reserve excess source board memory, if any.
234925cf1a30Sjl139090 		 *
235025cf1a30Sjl139090 		 * Only the nonrelocatable source span will be copied
235125cf1a30Sjl139090 		 * so schedule the rest of the source mem to be deleted.
235225cf1a30Sjl139090 		 */
235325cf1a30Sjl139090 		switch (preference) {
235425cf1a30Sjl139090 		case DR_TP_NONRELOC:
235525cf1a30Sjl139090 			/*
235625cf1a30Sjl139090 			 * Get source copy memlist and use it to construct
235725cf1a30Sjl139090 			 * delete memlist.
235825cf1a30Sjl139090 			 */
235925cf1a30Sjl139090 			d_ml = memlist_dup(s_ml);
236025cf1a30Sjl139090 			x_ml = dr_get_copy_mlist(s_ml, t_ml, s_mp, t_mp);
236125cf1a30Sjl139090 
236225cf1a30Sjl139090 			/* XXX */
236325cf1a30Sjl139090 			ASSERT(d_ml != NULL);
236425cf1a30Sjl139090 			ASSERT(x_ml != NULL);
236525cf1a30Sjl139090 
2366*56f33205SJonathan Adams 			for (ml = x_ml; ml != NULL; ml = ml->ml_next) {
2367*56f33205SJonathan Adams 				d_ml = memlist_del_span(d_ml, ml->ml_address,
2368*56f33205SJonathan Adams 				    ml->ml_size);
236925cf1a30Sjl139090 			}
237025cf1a30Sjl139090 
237125cf1a30Sjl139090 			PR_MEM("%s: %s: reserving src brd memlist:\n", f,
237225cf1a30Sjl139090 			    s_mp->sbm_cm.sbdev_path);
237325cf1a30Sjl139090 			PR_MEMLIST_DUMP(d_ml);
237425cf1a30Sjl139090 
237525cf1a30Sjl139090 			/* reserve excess spans */
237625cf1a30Sjl139090 			if (dr_reserve_mem_spans(&s_mp->sbm_memhandle,
237725cf1a30Sjl139090 			    d_ml) != 0) {
237825cf1a30Sjl139090 				/* likely more non-reloc pages appeared */
237925cf1a30Sjl139090 				/* TODO: restart from top? */
238025cf1a30Sjl139090 				continue;
238125cf1a30Sjl139090 			}
238225cf1a30Sjl139090 			break;
238325cf1a30Sjl139090 		default:
238425cf1a30Sjl139090 			d_ml = NULL;
238525cf1a30Sjl139090 			break;
238625cf1a30Sjl139090 		}
238725cf1a30Sjl139090 
238825cf1a30Sjl139090 		s_mp->sbm_flags |= DR_MFLAG_RESERVED;
238925cf1a30Sjl139090 
239025cf1a30Sjl139090 		/*
239125cf1a30Sjl139090 		 * reserve all memory on target board.
239225cf1a30Sjl139090 		 * NOTE: source board's memhandle is used.
239325cf1a30Sjl139090 		 *
239425cf1a30Sjl139090 		 * If this succeeds (eq 0), then target selection is
239525cf1a30Sjl139090 		 * complete and all unwanted memory spans, both source and
239625cf1a30Sjl139090 		 * target, have been reserved.  Loop is terminated.
239725cf1a30Sjl139090 		 */
239825cf1a30Sjl139090 		if (dr_reserve_mem_spans(&s_mp->sbm_memhandle, t_ml) == 0) {
239925cf1a30Sjl139090 			PR_MEM("%s: %s: target board memory reserved\n",
240025cf1a30Sjl139090 			    f, t_mp->sbm_cm.sbdev_path);
240125cf1a30Sjl139090 
240225cf1a30Sjl139090 			/* a candidate target board is now reserved */
240325cf1a30Sjl139090 			t_mp->sbm_flags |= DR_MFLAG_RESERVED;
240425cf1a30Sjl139090 			c_mp = t_mp;
240525cf1a30Sjl139090 
240625cf1a30Sjl139090 			/* *** EXITING LOOP *** */
240725cf1a30Sjl139090 			break;
240825cf1a30Sjl139090 		}
240925cf1a30Sjl139090 
241025cf1a30Sjl139090 		/* did not successfully reserve the target board. */
241125cf1a30Sjl139090 		PR_MEM("%s: could not reserve target %s\n",
241225cf1a30Sjl139090 		    f, t_mp->sbm_cm.sbdev_path);
241325cf1a30Sjl139090 
241425cf1a30Sjl139090 		/*
241525cf1a30Sjl139090 		 * NOTE: an undo of the dr_reserve_mem_span work
241625cf1a30Sjl139090 		 * will happen automatically when the memhandle
241725cf1a30Sjl139090 		 * (s_mp->sbm_memhandle) is kphysm_del_release'd.
241825cf1a30Sjl139090 		 */
241925cf1a30Sjl139090 
242025cf1a30Sjl139090 		s_mp->sbm_flags &= ~DR_MFLAG_RESERVED;
242125cf1a30Sjl139090 	}
242225cf1a30Sjl139090 
242325cf1a30Sjl139090 	/* clean up after memlist editing logic */
242425cf1a30Sjl139090 	if (x_ml != NULL)
242525cf1a30Sjl139090 		memlist_delete(x_ml);
242625cf1a30Sjl139090 
242725cf1a30Sjl139090 	FREESTRUCT(sets, dr_mem_unit_t *, DR_SMT_NUNITS_PER_SET *
242825cf1a30Sjl139090 	    DR_SMT_NPREF_SETS);
242925cf1a30Sjl139090 
243025cf1a30Sjl139090 	/*
243125cf1a30Sjl139090 	 * c_mp will be NULL when the entire sets[] array
243225cf1a30Sjl139090 	 * has been searched without reserving a target board.
243325cf1a30Sjl139090 	 */
243425cf1a30Sjl139090 	if (c_mp == NULL) {
243525cf1a30Sjl139090 		PR_MEM("%s: %s: target selection failed.\n",
243625cf1a30Sjl139090 		    f, s_mp->sbm_cm.sbdev_path);
243725cf1a30Sjl139090 
243825cf1a30Sjl139090 		if (t_ml != NULL)
243925cf1a30Sjl139090 			memlist_delete(t_ml);
244025cf1a30Sjl139090 
244125cf1a30Sjl139090 		return (-1);
244225cf1a30Sjl139090 	}
244325cf1a30Sjl139090 
244425cf1a30Sjl139090 	PR_MEM("%s: found target %s for source %s\n",
244525cf1a30Sjl139090 	    f,
244625cf1a30Sjl139090 	    c_mp->sbm_cm.sbdev_path,
244725cf1a30Sjl139090 	    s_mp->sbm_cm.sbdev_path);
244825cf1a30Sjl139090 
244925cf1a30Sjl139090 	s_mp->sbm_peer = c_mp;
245025cf1a30Sjl139090 	s_mp->sbm_flags |= DR_MFLAG_SOURCE;
245125cf1a30Sjl139090 	s_mp->sbm_del_mlist = d_ml;	/* spans to be deleted, if any */
245225cf1a30Sjl139090 	s_mp->sbm_mlist = s_ml;
245325cf1a30Sjl139090 	s_mp->sbm_cm.sbdev_busy = 1;
245425cf1a30Sjl139090 
245525cf1a30Sjl139090 	c_mp->sbm_peer = s_mp;
245625cf1a30Sjl139090 	c_mp->sbm_flags |= DR_MFLAG_TARGET;
245725cf1a30Sjl139090 	c_mp->sbm_del_mlist = t_ml;	/* spans to be deleted */
245825cf1a30Sjl139090 	c_mp->sbm_mlist = t_ml;
245925cf1a30Sjl139090 	c_mp->sbm_cm.sbdev_busy = 1;
246025cf1a30Sjl139090 
246125cf1a30Sjl139090 	return (0);
246225cf1a30Sjl139090 }
246325cf1a30Sjl139090 
246425cf1a30Sjl139090 /*
246525cf1a30Sjl139090  * Returns target preference rank:
246625cf1a30Sjl139090  *     -1 not a valid copy-rename target board
246725cf1a30Sjl139090  *	0 copy all source, source/target same size
246825cf1a30Sjl139090  *	1 copy all source, larger target
246925cf1a30Sjl139090  * 	2 copy nonrelocatable source span
247025cf1a30Sjl139090  */
247125cf1a30Sjl139090 static dr_target_pref_t
dr_get_target_preference(dr_handle_t * hp,dr_mem_unit_t * t_mp,dr_mem_unit_t * s_mp,struct memlist * t_ml,struct memlist * s_ml,struct memlist * b_ml)247225cf1a30Sjl139090 dr_get_target_preference(dr_handle_t *hp,
247325cf1a30Sjl139090     dr_mem_unit_t *t_mp, dr_mem_unit_t *s_mp,
247425cf1a30Sjl139090     struct memlist *t_ml, struct memlist *s_ml,
247525cf1a30Sjl139090     struct memlist *b_ml)
247625cf1a30Sjl139090 {
247725cf1a30Sjl139090 	dr_target_pref_t preference;
247825cf1a30Sjl139090 	struct memlist *s_nonreloc_ml = NULL;
247925cf1a30Sjl139090 	drmachid_t t_id;
248025cf1a30Sjl139090 	static fn_t	f = "dr_get_target_preference";
248125cf1a30Sjl139090 
248225cf1a30Sjl139090 	t_id = t_mp->sbm_cm.sbdev_bp->b_id;
248325cf1a30Sjl139090 
248425cf1a30Sjl139090 	/*
248525cf1a30Sjl139090 	 * Can the entire source board be copied?
248625cf1a30Sjl139090 	 */
248725cf1a30Sjl139090 	if (dr_memlist_canfit(s_ml, t_ml, s_mp, t_mp)) {
248825cf1a30Sjl139090 		if (s_mp->sbm_npages == t_mp->sbm_npages)
248925cf1a30Sjl139090 			preference = DR_TP_SAME;	/* same size */
249025cf1a30Sjl139090 		else
249125cf1a30Sjl139090 			preference = DR_TP_LARGE;	/* larger target */
249225cf1a30Sjl139090 	} else {
249325cf1a30Sjl139090 		/*
249425cf1a30Sjl139090 		 * Entire source won't fit so try non-relocatable memory only
249525cf1a30Sjl139090 		 * (target aligned).
249625cf1a30Sjl139090 		 */
249725cf1a30Sjl139090 		s_nonreloc_ml = dr_get_nonreloc_mlist(b_ml, s_mp);
249825cf1a30Sjl139090 		if (s_nonreloc_ml == NULL) {
249925cf1a30Sjl139090 			PR_MEM("%s: dr_get_nonreloc_mlist failed\n", f);
250025cf1a30Sjl139090 			preference = DR_TP_INVALID;
250125cf1a30Sjl139090 		}
250225cf1a30Sjl139090 		if (dr_memlist_canfit(s_nonreloc_ml, t_ml, s_mp, t_mp))
250325cf1a30Sjl139090 			preference = DR_TP_NONRELOC;
250425cf1a30Sjl139090 		else
250525cf1a30Sjl139090 			preference = DR_TP_INVALID;
250625cf1a30Sjl139090 	}
250725cf1a30Sjl139090 
250825cf1a30Sjl139090 	if (s_nonreloc_ml != NULL)
250925cf1a30Sjl139090 		memlist_delete(s_nonreloc_ml);
251025cf1a30Sjl139090 
251125cf1a30Sjl139090 	/*
251225cf1a30Sjl139090 	 * Force floating board preference lower than all other boards
251325cf1a30Sjl139090 	 * if the force flag is present; otherwise disallow the board.
251425cf1a30Sjl139090 	 */
251525cf1a30Sjl139090 	if ((preference != DR_TP_INVALID) && drmach_board_is_floating(t_id)) {
251625cf1a30Sjl139090 		if (dr_cmd_flags(hp) & SBD_FLAG_FORCE)
251725cf1a30Sjl139090 			preference += DR_TP_FLOATING;
251825cf1a30Sjl139090 		else
251925cf1a30Sjl139090 			preference = DR_TP_INVALID;
252025cf1a30Sjl139090 	}
252125cf1a30Sjl139090 
252225cf1a30Sjl139090 	PR_MEM("%s: %s preference=%d\n", f, t_mp->sbm_cm.sbdev_path,
252325cf1a30Sjl139090 	    preference);
252425cf1a30Sjl139090 
252525cf1a30Sjl139090 	return (preference);
252625cf1a30Sjl139090 }
252725cf1a30Sjl139090 
252825cf1a30Sjl139090 /*
252925cf1a30Sjl139090  * Create a memlist representing the source memory that will be copied to
253025cf1a30Sjl139090  * the target board.  The memory to be copied is the maximum amount that
253125cf1a30Sjl139090  * will fit on the target board.
253225cf1a30Sjl139090  */
253325cf1a30Sjl139090 static struct memlist *
dr_get_copy_mlist(struct memlist * s_mlist,struct memlist * t_mlist,dr_mem_unit_t * s_mp,dr_mem_unit_t * t_mp)253425cf1a30Sjl139090 dr_get_copy_mlist(struct memlist *s_mlist, struct memlist *t_mlist,
253525cf1a30Sjl139090     dr_mem_unit_t *s_mp, dr_mem_unit_t *t_mp)
253625cf1a30Sjl139090 {
253725cf1a30Sjl139090 	struct memlist	*t_ml, *s_copy_ml, *s_del_ml, *ml, *x_ml;
253825cf1a30Sjl139090 	uint64_t	s_slice_mask, s_slice_base;
253925cf1a30Sjl139090 	uint64_t	t_slice_mask, t_slice_base;
254025cf1a30Sjl139090 	static fn_t	f = "dr_get_copy_mlist";
254125cf1a30Sjl139090 
254225cf1a30Sjl139090 	ASSERT(s_mlist != NULL);
254325cf1a30Sjl139090 	ASSERT(t_mlist != NULL);
254425cf1a30Sjl139090 	ASSERT(t_mp->sbm_slice_size == s_mp->sbm_slice_size);
254525cf1a30Sjl139090 
254625cf1a30Sjl139090 	s_slice_mask = s_mp->sbm_slice_size - 1;
2547*56f33205SJonathan Adams 	s_slice_base = s_mlist->ml_address & ~s_slice_mask;
254825cf1a30Sjl139090 
254925cf1a30Sjl139090 	t_slice_mask = t_mp->sbm_slice_size - 1;
2550*56f33205SJonathan Adams 	t_slice_base = t_mlist->ml_address & ~t_slice_mask;
255125cf1a30Sjl139090 
255225cf1a30Sjl139090 	t_ml = memlist_dup(t_mlist);
255325cf1a30Sjl139090 	s_del_ml = memlist_dup(s_mlist);
255425cf1a30Sjl139090 	s_copy_ml = memlist_dup(s_mlist);
255525cf1a30Sjl139090 
255625cf1a30Sjl139090 	/* XXX */
255725cf1a30Sjl139090 	ASSERT(t_ml != NULL);
255825cf1a30Sjl139090 	ASSERT(s_del_ml != NULL);
255925cf1a30Sjl139090 	ASSERT(s_copy_ml != NULL);
256025cf1a30Sjl139090 
256125cf1a30Sjl139090 	/*
256225cf1a30Sjl139090 	 * To construct the source copy memlist:
256325cf1a30Sjl139090 	 *
256425cf1a30Sjl139090 	 * The target memlist is converted to the post-rename
256525cf1a30Sjl139090 	 * source addresses.  This is the physical address range
256625cf1a30Sjl139090 	 * the target will have after the copy-rename.  Overlaying
256725cf1a30Sjl139090 	 * and deleting this from the current source memlist will
256825cf1a30Sjl139090 	 * give the source delete memlist.  The copy memlist is
256925cf1a30Sjl139090 	 * the reciprocal of the source delete memlist.
257025cf1a30Sjl139090 	 */
2571*56f33205SJonathan Adams 	for (ml = t_ml; ml != NULL; ml = ml->ml_next) {
257225cf1a30Sjl139090 		/*
257325cf1a30Sjl139090 		 * Normalize relative to target slice base PA
257425cf1a30Sjl139090 		 * in order to preseve slice offsets.
257525cf1a30Sjl139090 		 */
2576*56f33205SJonathan Adams 		ml->ml_address -= t_slice_base;
257725cf1a30Sjl139090 		/*
257825cf1a30Sjl139090 		 * Convert to source slice PA address.
257925cf1a30Sjl139090 		 */
2580*56f33205SJonathan Adams 		ml->ml_address += s_slice_base;
258125cf1a30Sjl139090 	}
258225cf1a30Sjl139090 
2583*56f33205SJonathan Adams 	for (ml = t_ml; ml != NULL; ml = ml->ml_next) {
2584*56f33205SJonathan Adams 		s_del_ml = memlist_del_span(s_del_ml,
2585*56f33205SJonathan Adams 		    ml->ml_address, ml->ml_size);
258625cf1a30Sjl139090 	}
258725cf1a30Sjl139090 
258825cf1a30Sjl139090 	/*
258925cf1a30Sjl139090 	 * Expand the delete mlist to fully include any dynamic segments
259025cf1a30Sjl139090 	 * it intersects with.
259125cf1a30Sjl139090 	 */
2592*56f33205SJonathan Adams 	for (x_ml = NULL, ml = s_del_ml; ml != NULL; ml = ml->ml_next) {
2593*56f33205SJonathan Adams 		uint64_t del_base = ml->ml_address;
2594*56f33205SJonathan Adams 		uint64_t del_end = ml->ml_address + ml->ml_size;
259525cf1a30Sjl139090 		struct memlist *dyn;
259625cf1a30Sjl139090 
2597*56f33205SJonathan Adams 		for (dyn = s_mp->sbm_dyn_segs; dyn != NULL;
2598*56f33205SJonathan Adams 		    dyn = dyn->ml_next) {
2599*56f33205SJonathan Adams 			uint64_t dyn_base = dyn->ml_address;
2600*56f33205SJonathan Adams 			uint64_t dyn_end = dyn->ml_address + dyn->ml_size;
260125cf1a30Sjl139090 
260225cf1a30Sjl139090 			if (del_base > dyn_base && del_base < dyn_end)
260325cf1a30Sjl139090 				del_base = dyn_base;
260425cf1a30Sjl139090 
260525cf1a30Sjl139090 			if (del_end > dyn_base && del_end < dyn_end)
260625cf1a30Sjl139090 				del_end = dyn_end;
260725cf1a30Sjl139090 		}
260825cf1a30Sjl139090 
260925cf1a30Sjl139090 		x_ml = memlist_cat_span(x_ml, del_base, del_end - del_base);
261025cf1a30Sjl139090 	}
261125cf1a30Sjl139090 
261225cf1a30Sjl139090 	memlist_delete(s_del_ml);
261325cf1a30Sjl139090 	s_del_ml = x_ml;
261425cf1a30Sjl139090 
2615*56f33205SJonathan Adams 	for (ml = s_del_ml; ml != NULL; ml = ml->ml_next) {
2616*56f33205SJonathan Adams 		s_copy_ml = memlist_del_span(s_copy_ml,
2617*56f33205SJonathan Adams 		    ml->ml_address, ml->ml_size);
261825cf1a30Sjl139090 	}
261925cf1a30Sjl139090 
262025cf1a30Sjl139090 	PR_MEM("%s: source delete mlist\n", f);
262125cf1a30Sjl139090 	PR_MEMLIST_DUMP(s_del_ml);
262225cf1a30Sjl139090 
262325cf1a30Sjl139090 	PR_MEM("%s: source copy mlist\n", f);
262425cf1a30Sjl139090 	PR_MEMLIST_DUMP(s_copy_ml);
262525cf1a30Sjl139090 
262625cf1a30Sjl139090 	memlist_delete(t_ml);
262725cf1a30Sjl139090 	memlist_delete(s_del_ml);
262825cf1a30Sjl139090 
262925cf1a30Sjl139090 	return (s_copy_ml);
263025cf1a30Sjl139090 }
263125cf1a30Sjl139090 
263225cf1a30Sjl139090 /*
263325cf1a30Sjl139090  * Scan the non-relocatable spans on the source memory
263425cf1a30Sjl139090  * and construct a minimum mlist that includes all non-reloc
263525cf1a30Sjl139090  * memory subject to target alignment, and dynamic segment
263625cf1a30Sjl139090  * constraints where only whole dynamic segments may be deleted.
263725cf1a30Sjl139090  */
263825cf1a30Sjl139090 static struct memlist *
dr_get_nonreloc_mlist(struct memlist * s_ml,dr_mem_unit_t * s_mp)263925cf1a30Sjl139090 dr_get_nonreloc_mlist(struct memlist *s_ml, dr_mem_unit_t *s_mp)
264025cf1a30Sjl139090 {
264125cf1a30Sjl139090 	struct memlist	*x_ml = NULL;
264225cf1a30Sjl139090 	struct memlist	*ml;
264325cf1a30Sjl139090 	static fn_t	f = "dr_get_nonreloc_mlist";
264425cf1a30Sjl139090 
264525cf1a30Sjl139090 	PR_MEM("%s: checking for split of dyn seg list:\n", f);
264625cf1a30Sjl139090 	PR_MEMLIST_DUMP(s_mp->sbm_dyn_segs);
264725cf1a30Sjl139090 
2648*56f33205SJonathan Adams 	for (ml = s_ml; ml; ml = ml->ml_next) {
264925cf1a30Sjl139090 		int rv;
265025cf1a30Sjl139090 		uint64_t nr_base, nr_end;
265125cf1a30Sjl139090 		memquery_t mq;
265225cf1a30Sjl139090 		struct memlist *dyn;
265325cf1a30Sjl139090 
265425cf1a30Sjl139090 		rv = kphysm_del_span_query(
2655*56f33205SJonathan Adams 		    _b64top(ml->ml_address), _b64top(ml->ml_size), &mq);
265625cf1a30Sjl139090 		if (rv) {
265725cf1a30Sjl139090 			memlist_delete(x_ml);
265825cf1a30Sjl139090 			return (NULL);
265925cf1a30Sjl139090 		}
266025cf1a30Sjl139090 
266125cf1a30Sjl139090 		if (mq.nonrelocatable == 0)
266225cf1a30Sjl139090 			continue;
266325cf1a30Sjl139090 
266425cf1a30Sjl139090 		PR_MEM("%s: non-reloc span: 0x%lx, 0x%lx (%lx, %lx)\n", f,
266525cf1a30Sjl139090 		    _ptob64(mq.first_nonrelocatable),
266625cf1a30Sjl139090 		    _ptob64(mq.last_nonrelocatable),
266725cf1a30Sjl139090 		    mq.first_nonrelocatable,
266825cf1a30Sjl139090 		    mq.last_nonrelocatable);
266925cf1a30Sjl139090 
267025cf1a30Sjl139090 		/*
267125cf1a30Sjl139090 		 * Align the span at both ends to allow for possible
267225cf1a30Sjl139090 		 * cage expansion.
267325cf1a30Sjl139090 		 */
267425cf1a30Sjl139090 		nr_base = _ptob64(mq.first_nonrelocatable);
267525cf1a30Sjl139090 		nr_end = _ptob64(mq.last_nonrelocatable + 1);
267625cf1a30Sjl139090 
267725cf1a30Sjl139090 		PR_MEM("%s: adjusted non-reloc span: 0x%lx, 0x%lx\n",
267825cf1a30Sjl139090 		    f, nr_base, nr_end);
267925cf1a30Sjl139090 
268025cf1a30Sjl139090 		/*
268125cf1a30Sjl139090 		 * Expand the non-reloc span to fully include any
268225cf1a30Sjl139090 		 * dynamic segments it intersects with.
268325cf1a30Sjl139090 		 */
2684*56f33205SJonathan Adams 		for (dyn = s_mp->sbm_dyn_segs; dyn != NULL;
2685*56f33205SJonathan Adams 		    dyn = dyn->ml_next) {
2686*56f33205SJonathan Adams 			uint64_t dyn_base = dyn->ml_address;
2687*56f33205SJonathan Adams 			uint64_t dyn_end = dyn->ml_address + dyn->ml_size;
268825cf1a30Sjl139090 
268925cf1a30Sjl139090 			if (nr_base > dyn_base && nr_base < dyn_end)
269025cf1a30Sjl139090 				nr_base = dyn_base;
269125cf1a30Sjl139090 
269225cf1a30Sjl139090 			if (nr_end > dyn_base && nr_end < dyn_end)
269325cf1a30Sjl139090 				nr_end = dyn_end;
269425cf1a30Sjl139090 		}
269525cf1a30Sjl139090 
269625cf1a30Sjl139090 		x_ml = memlist_cat_span(x_ml, nr_base, nr_end - nr_base);
269725cf1a30Sjl139090 	}
269825cf1a30Sjl139090 
269925cf1a30Sjl139090 	if (x_ml == NULL) {
270025cf1a30Sjl139090 		PR_MEM("%s: source didn't have any non-reloc pages!\n", f);
270125cf1a30Sjl139090 		return (NULL);
270225cf1a30Sjl139090 	}
270325cf1a30Sjl139090 
270425cf1a30Sjl139090 	PR_MEM("%s: %s: edited source memlist:\n", f, s_mp->sbm_cm.sbdev_path);
270525cf1a30Sjl139090 	PR_MEMLIST_DUMP(x_ml);
270625cf1a30Sjl139090 
270725cf1a30Sjl139090 	return (x_ml);
270825cf1a30Sjl139090 }
270925cf1a30Sjl139090 
271025cf1a30Sjl139090 /*
271125cf1a30Sjl139090  * Check if source memlist can fit in target memlist while maintaining
271225cf1a30Sjl139090  * relative offsets within board.
271325cf1a30Sjl139090  */
271425cf1a30Sjl139090 static int
dr_memlist_canfit(struct memlist * s_mlist,struct memlist * t_mlist,dr_mem_unit_t * s_mp,dr_mem_unit_t * t_mp)271525cf1a30Sjl139090 dr_memlist_canfit(struct memlist *s_mlist, struct memlist *t_mlist,
271625cf1a30Sjl139090     dr_mem_unit_t *s_mp, dr_mem_unit_t *t_mp)
271725cf1a30Sjl139090 {
271825cf1a30Sjl139090 	int		canfit = 0;
271925cf1a30Sjl139090 	struct memlist	*s_ml, *t_ml, *ml;
272025cf1a30Sjl139090 	uint64_t	s_slice_mask, t_slice_mask;
272125cf1a30Sjl139090 	static fn_t	f = "dr_mlist_canfit";
272225cf1a30Sjl139090 
272325cf1a30Sjl139090 	s_ml = memlist_dup(s_mlist);
272425cf1a30Sjl139090 	t_ml = memlist_dup(t_mlist);
272525cf1a30Sjl139090 
272625cf1a30Sjl139090 	if (s_ml == NULL || t_ml == NULL) {
272725cf1a30Sjl139090 		cmn_err(CE_WARN, "%s: memlist_dup failed\n", f);
272825cf1a30Sjl139090 		goto done;
272925cf1a30Sjl139090 	}
273025cf1a30Sjl139090 
273125cf1a30Sjl139090 	s_slice_mask = s_mp->sbm_slice_size - 1;
273225cf1a30Sjl139090 	t_slice_mask = t_mp->sbm_slice_size - 1;
273325cf1a30Sjl139090 
273425cf1a30Sjl139090 	/*
273525cf1a30Sjl139090 	 * Normalize to slice relative offsets.
273625cf1a30Sjl139090 	 */
2737*56f33205SJonathan Adams 	for (ml = s_ml; ml; ml = ml->ml_next)
2738*56f33205SJonathan Adams 		ml->ml_address &= s_slice_mask;
273925cf1a30Sjl139090 
2740*56f33205SJonathan Adams 	for (ml = t_ml; ml; ml = ml->ml_next)
2741*56f33205SJonathan Adams 		ml->ml_address &= t_slice_mask;
274225cf1a30Sjl139090 
274325cf1a30Sjl139090 	canfit = memlist_canfit(s_ml, t_ml);
274425cf1a30Sjl139090 done:
274525cf1a30Sjl139090 	memlist_delete(s_ml);
274625cf1a30Sjl139090 	memlist_delete(t_ml);
274725cf1a30Sjl139090 
274825cf1a30Sjl139090 	return (canfit);
274925cf1a30Sjl139090 }
275025cf1a30Sjl139090 
275125cf1a30Sjl139090 /*
275225cf1a30Sjl139090  * Memlist support.
275325cf1a30Sjl139090  */
275425cf1a30Sjl139090 
275525cf1a30Sjl139090 /*
275625cf1a30Sjl139090  * Determine whether the source memlist (s_mlist) will
275725cf1a30Sjl139090  * fit into the target memlist (t_mlist) in terms of
275825cf1a30Sjl139090  * size and holes.  Assumes the caller has normalized the
275925cf1a30Sjl139090  * memlist physical addresses for comparison.
276025cf1a30Sjl139090  */
276125cf1a30Sjl139090 static int
memlist_canfit(struct memlist * s_mlist,struct memlist * t_mlist)276225cf1a30Sjl139090 memlist_canfit(struct memlist *s_mlist, struct memlist *t_mlist)
276325cf1a30Sjl139090 {
276425cf1a30Sjl139090 	int		rv = 0;
276525cf1a30Sjl139090 	struct memlist	*s_ml, *t_ml;
276625cf1a30Sjl139090 
276725cf1a30Sjl139090 	if ((s_mlist == NULL) || (t_mlist == NULL))
276825cf1a30Sjl139090 		return (0);
276925cf1a30Sjl139090 
277025cf1a30Sjl139090 	s_ml = s_mlist;
2771*56f33205SJonathan Adams 	for (t_ml = t_mlist; t_ml && s_ml; t_ml = t_ml->ml_next) {
277225cf1a30Sjl139090 		uint64_t	s_start, s_end;
277325cf1a30Sjl139090 		uint64_t	t_start, t_end;
277425cf1a30Sjl139090 
2775*56f33205SJonathan Adams 		t_start = t_ml->ml_address;
2776*56f33205SJonathan Adams 		t_end = t_start + t_ml->ml_size;
277725cf1a30Sjl139090 
2778*56f33205SJonathan Adams 		for (; s_ml; s_ml = s_ml->ml_next) {
2779*56f33205SJonathan Adams 			s_start = s_ml->ml_address;
2780*56f33205SJonathan Adams 			s_end = s_start + s_ml->ml_size;
278125cf1a30Sjl139090 
278225cf1a30Sjl139090 			if ((s_start < t_start) || (s_end > t_end))
278325cf1a30Sjl139090 				break;
278425cf1a30Sjl139090 		}
278525cf1a30Sjl139090 	}
278625cf1a30Sjl139090 
278725cf1a30Sjl139090 	/*
278825cf1a30Sjl139090 	 * If we ran out of source memlist chunks that mean
278925cf1a30Sjl139090 	 * we found a home for all of them.
279025cf1a30Sjl139090 	 */
279125cf1a30Sjl139090 	if (s_ml == NULL)
279225cf1a30Sjl139090 		rv = 1;
279325cf1a30Sjl139090 
279425cf1a30Sjl139090 	return (rv);
279525cf1a30Sjl139090 }
2796