xref: /titanic_50/usr/src/uts/sun4u/io/sbd_mem.c (revision 1e6f4912c04ba197d638cc6eb5b35eeae672df40)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * memory support routines for sbd.
29  */
30 
31 #include <sys/debug.h>
32 #include <sys/types.h>
33 #include <sys/errno.h>
34 #include <sys/param.h>
35 #include <sys/dditypes.h>
36 #include <sys/kmem.h>
37 #include <sys/conf.h>
38 #include <sys/ddi.h>
39 #include <sys/sunddi.h>
40 #include <sys/sunndi.h>
41 #include <sys/ddi_impldefs.h>
42 #include <sys/sysmacros.h>
43 #include <sys/machsystm.h>
44 #include <sys/spitregs.h>
45 #include <sys/cpuvar.h>
46 #include <sys/cpu_module.h>
47 #include <sys/promif.h>
48 #include <sys/memlist_impl.h>
49 #include <sys/mem_cage.h>
50 #include <sys/lgrp.h>
51 #include <sys/platform_module.h>
52 #include <vm/seg_kmem.h>
53 
54 #include <sys/sbdpriv.h>
55 
56 #define	_ptob64(p) ((uint64_t)(p) << PAGESHIFT)
57 #define	_b64top(b) ((pgcnt_t)((b) >> PAGESHIFT))
58 
59 static int		sbd_post_detach_mem_unit(sbd_mem_unit_t *mp,
60 				sbderror_t *ep);
61 static int		sbd_reserve_mem_spans(memhandle_t *mhp,
62 					struct memlist *mlist);
63 static int		sbd_check_boundaries(struct memlist *orig_memlist,
64 					sbd_mem_unit_t *s_mp,
65 					sbd_mem_unit_t *t_mp);
66 static int		sbd_select_mem_target(sbd_handle_t *hp,
67 				sbd_mem_unit_t *mp, struct memlist *ml);
68 static void		sbd_init_mem_unit_data(sbd_mem_unit_t *mp, sbderror_t
69 					*ep);
70 static int		memlist_canfit(struct memlist *s_mlist,
71 					struct memlist *t_mlist);
72 static void		sbd_mem_cleanup(sbd_mem_unit_t *s_mp,
73 				sbd_mem_unit_t *t_mp, sbderror_t *ep);
74 static void		sbd_flush_ecache(uint64_t a, uint64_t b);
75 
76 struct memlist *
77 sbd_get_memlist(sbd_mem_unit_t *mp, sbderror_t *ep)
78 {
79 	struct memlist	*mlist;
80 	static fn_t	f = "sbd_get_memlist";
81 	sbd_board_t 	*sbp = (sbd_board_t *)mp->sbm_cm.sbdev_sbp;
82 	sbdp_handle_t	*hdp;
83 	sbd_handle_t	*hp = MACHBD2HD(sbp);
84 
85 	PR_MEM("%s...\n", f);
86 
87 	/*
88 	 * Return cached memlist, if present.
89 	 * This memlist will be present following an
90 	 * unconfigure (a.k.a: detach) of this memunit.
91 	 * It should only be used in the case were a configure
92 	 * is bringing this memunit back in without going
93 	 * through the disconnect and connect states.
94 	 */
95 	if (mp->sbm_mlist) {
96 		PR_MEM("%s: found cached memlist\n", f);
97 
98 		mlist = memlist_dup(mp->sbm_mlist);
99 	} else {
100 		/* attempt to construct a memlist using phys_install */
101 
102 		/*
103 		 * NOTE: this code block assumes only one memunit per
104 		 * board.  This is currently safe because the function
105 		 * sbd_init_mem_devlist() forces this assumption to be
106 		 * valid.
107 		 */
108 
109 		/* round down to slice base address */
110 		/* build mlist from the lower layer */
111 		hdp = sbd_get_sbdp_handle(sbp, hp);
112 		mlist = sbdp_get_memlist(hdp, mp->sbm_cm.sbdev_dip);
113 		if (mlist == NULL) {
114 			SBD_GET_PERR(hdp->h_err, ep);
115 			PR_MEM("sbd:%s: failed to get memlist for "
116 				"dip (0x%p) ecode %d errno %d", f,
117 				(void *)mp->sbm_cm.sbdev_dip,
118 				ep->e_code, ep->e_errno);
119 			sbd_release_sbdp_handle(hdp);
120 			return (NULL);
121 		}
122 		sbd_release_sbdp_handle(hdp);
123 	}
124 
125 	PR_MEM("%s: memlist for mem-unit (%d.%d), dip 0x%p:\n",
126 		f, sbp->sb_num,
127 		mp->sbm_cm.sbdev_unum,
128 		(void *)mp->sbm_cm.sbdev_dip);
129 	SBD_MEMLIST_DUMP(mlist);
130 
131 	return (mlist);
132 }
133 
134 int
135 sbd_pre_attach_mem(sbd_handle_t *hp, sbd_devlist_t devlist[], int devnum)
136 {
137 	int		err_flag = 0;
138 	sbderror_t	*ep = SBD_HD2ERR(hp);
139 	sbd_board_t	*sbp = SBDH2BD(hp->h_sbd);
140 	int		d, i;
141 	sbdp_handle_t	*hdp;
142 	static fn_t	f = "sbd_pre_attach_mem";
143 
144 	PR_MEM("%s...\n", f);
145 
146 	SBD_SET_ERR(ep, 0);
147 	hdp = sbd_get_sbdp_handle(sbp, hp);
148 
149 	for (d = 0; d < devnum; d++) {
150 		sbd_mem_unit_t	*mp;
151 		int		unit;
152 		dev_info_t	*dip;
153 		sbd_istate_t	state;
154 		int		rv;
155 
156 		/* sbd_get_devlist will not devlist element w/ dip of 0 */
157 		ASSERT(devlist[d].dv_dip != NULL);
158 
159 		dip = devlist[d].dv_dip;
160 		unit = sbdp_get_unit_num(hdp, dip);
161 		if (unit == -1) {
162 			if (hp->h_flags & SBD_IOCTL_FLAG_FORCE)
163 				continue;
164 			else {
165 				SBD_GET_PERR(hdp->h_err, ep);
166 				err_flag = 1;
167 				break;
168 			}
169 		}
170 
171 		mp = SBD_GET_BOARD_MEMUNIT(sbp, unit);
172 
173 		ASSERT(mp->sbm_cm.sbdev_sbp == sbp);
174 		ASSERT(unit == mp->sbm_cm.sbdev_unum);
175 
176 		PR_MEM("sbd: OS attach mem-unit (%d.%d)\n",
177 			sbp->sb_num,
178 			mp->sbm_cm.sbdev_unum);
179 
180 		state = mp->sbm_cm.sbdev_state;
181 		switch (state) {
182 		case SBD_STATE_UNCONFIGURED:
183 			/* use memlist cached by sbd_post_detach_mem_unit */
184 			if (mp->sbm_mlist != NULL) {
185 				PR_MEM("%s: recovering from UNCONFIG"
186 					" mem-unit (%d.%d)\n",
187 					f, sbp->sb_num,
188 					mp->sbm_cm.sbdev_unum);
189 
190 				PR_MEM("%s: re-configure cached memlist:\n", f);
191 				SBD_MEMLIST_DUMP(mp->sbm_mlist);
192 
193 				/*
194 				 * kphysm del handle should have been freed
195 				 */
196 				ASSERT((mp->sbm_flags & SBD_MFLAG_RELOWNER)
197 					== 0);
198 			} else {
199 				if (hp->h_flags & SBD_IOCTL_FLAG_FORCE)
200 					continue;
201 				else {
202 					SBD_GET_PERR(hdp->h_err, ep);
203 					err_flag = 1;
204 					PR_MEM("%s: mem-unit (%d.%d)"
205 						" unusable\n",
206 						f, sbp->sb_num,
207 						mp->sbm_cm.sbdev_unum);
208 					break;
209 				}
210 			}
211 
212 			/*FALLTHROUGH*/
213 
214 		case SBD_STATE_CONNECTED:
215 			PR_MEM("%s: reprogramming mem hardware (board %d)\n",
216 				f, sbp->sb_num);
217 
218 			for (i = 0; i < SBD_NUM_MC_PER_BOARD; i++) {
219 				if (mp->sbm_dip[i] == NULL)
220 					continue;
221 				dip = mp->sbm_dip[i];
222 
223 				PR_MEM("%s: enabling mc 0x%p on board %d\n",
224 					f, (void *)dip, sbp->sb_num);
225 
226 				rv = sbdphw_enable_memctrl(hdp, dip);
227 				if (rv < 0) {
228 					SBD_GET_PERR(hdp->h_err, ep);
229 					cmn_err(CE_WARN,
230 					"%s: failed to program mem ctrlr %p on "
231 					"board %d", f, (void *)mp->sbm_dip[i],
232 					sbp->sb_num);
233 					err_flag = 1;
234 				}
235 			}
236 			break;
237 
238 		default:
239 			cmn_err(CE_WARN,
240 				"%s: unexpected state (%d) for mem-unit "
241 				"(%d.%d)", f, state, sbp->sb_num,
242 				mp->sbm_cm.sbdev_unum);
243 			if (SBD_GET_ERR(ep) == 0) {
244 				SBD_SET_ERR(ep, ESBD_STATE);
245 				err_flag = 1;
246 			}
247 			break;
248 		}
249 
250 		/* exit for loop if error encountered */
251 		if (err_flag) {
252 			SBD_SET_ERRSTR(ep,
253 			    sbp->sb_mempath[mp->sbm_cm.sbdev_unum]);
254 			break;
255 		}
256 	}
257 	sbd_release_sbdp_handle(hdp);
258 
259 	return (err_flag ? -1 : 0);
260 }
261 
262 int
263 sbd_post_attach_mem(sbd_handle_t *hp, sbd_devlist_t devlist[], int devnum)
264 {
265 	int		d;
266 	sbdp_handle_t	*hdp;
267 	sbd_board_t	*sbp = SBDH2BD(hp->h_sbd);
268 	sbderror_t	*ep = SBD_HD2ERR(hp);
269 	static fn_t	f = "sbd_post_attach_mem";
270 
271 	PR_MEM("%s...\n", f);
272 	hdp = sbd_get_sbdp_handle(sbp, hp);
273 
274 	for (d = 0; d < devnum; d++) {
275 		sbd_mem_unit_t	*mp;
276 		dev_info_t	*dip;
277 		int		unit;
278 		struct memlist	*mlist, *ml;
279 
280 		/* sbd_get_devlist will not devlist element w/ dip of 0 */
281 		ASSERT(devlist[d].dv_dip != NULL);
282 
283 		dip = devlist[d].dv_dip;
284 		unit = sbdp_get_unit_num(hdp, dip);
285 		if (unit == -1) {
286 			if (hp->h_flags & SBD_IOCTL_FLAG_FORCE)
287 				continue;
288 			else {
289 				SBD_GET_PERR(hdp->h_err, ep);
290 				break;
291 			}
292 		}
293 
294 		mp = SBD_GET_BOARD_MEMUNIT(sbp, unit);
295 
296 		mlist = sbd_get_memlist(mp, ep);
297 		if (mlist == NULL) {
298 			cmn_err(CE_WARN,
299 				"%s: no memlist for mem-unit (%d.%d)",
300 				f,
301 				sbp->sb_num,
302 				mp->sbm_cm.sbdev_unum);
303 
304 			if (SBD_GET_ERR(ep) == 0) {
305 				SBD_SET_ERR(ep, ESBD_MEMFAIL);
306 				SBD_SET_ERRSTR(ep, sbp->sb_mempath[unit]);
307 			}
308 
309 			continue;
310 		}
311 
312 		/*
313 		 * Verify the memory really did successfully attach
314 		 * by checking for its existence in phys_install.
315 		 */
316 
317 		memlist_read_lock();
318 		if (memlist_intersect(phys_install, mlist) == 0) {
319 			memlist_read_unlock();
320 
321 			cmn_err(CE_WARN,
322 				"%s: mem-unit (%d.%d) memlist not in"
323 				" phys_install", f, sbp->sb_num,
324 				mp->sbm_cm.sbdev_unum);
325 
326 			if (SBD_GET_ERR(ep) == 0) {
327 				SBD_SET_ERR(ep, ESBD_INTERNAL);
328 				SBD_SET_ERRSTR(ep, sbp->sb_mempath[unit]);
329 			}
330 
331 			memlist_delete(mlist);
332 			continue;
333 		}
334 		memlist_read_unlock();
335 
336 		for (ml = mlist; ml != NULL; ml = ml->next) {
337 			(void) sbdp_mem_add_span(hdp, ml->address, ml->size);
338 		}
339 
340 		memlist_delete(mlist);
341 
342 		/*
343 		 * Destroy cached memlist, if any.
344 		 * There will be a cached memlist in sbm_mlist if
345 		 * this board is being configured directly after
346 		 * an unconfigure.
347 		 * To support this transition, sbd_post_detach_mem
348 		 * left a copy of the last known memlist in sbm_mlist.
349 		 * This memlist could differ from any derived from
350 		 * hardware if while this memunit was last configured
351 		 * the system detected and deleted bad pages from
352 		 * phys_install.  The location of those bad pages
353 		 * will be reflected in the cached memlist.
354 		 */
355 		if (mp->sbm_mlist) {
356 			memlist_delete(mp->sbm_mlist);
357 			mp->sbm_mlist = NULL;
358 		}
359 		sbd_init_mem_unit_data(mp, ep);
360 	}
361 
362 	sbd_release_sbdp_handle(hdp);
363 	return (0);
364 }
365 
366 int
367 sbd_pre_detach_mem(sbd_handle_t *hp, sbd_devlist_t devlist[], int devnum)
368 {
369 	int		d;
370 	int		unit;
371 	sbdp_handle_t	*hdp;
372 	sbderror_t	*ep = SBD_HD2ERR(hp);
373 	sbd_board_t	*sbp = SBDH2BD(hp->h_sbd);
374 	dev_info_t	*dip;
375 
376 	hdp = sbd_get_sbdp_handle(sbp, hp);
377 
378 	for (d = 0; d < devnum; d++) {
379 		sbd_mem_unit_t *mp;
380 
381 		/* sbd_get_devlist will not devlist element w/ dip of 0 */
382 		ASSERT(devlist[d].dv_dip != NULL);
383 
384 		dip = devlist[d].dv_dip;
385 		unit = sbdp_get_unit_num(hdp, dip);
386 		if (unit == -1) {
387 			if (hp->h_flags & SBD_IOCTL_FLAG_FORCE)
388 				continue;
389 			else {
390 				SBD_GET_PERR(hdp->h_err, ep);
391 				sbd_release_sbdp_handle(hdp);
392 				return (-1);
393 			}
394 		}
395 
396 		mp = SBD_GET_BOARD_MEMUNIT(sbp, unit);
397 
398 		/* sanity check */
399 		ASSERT(mp->sbm_cm.sbdev_sbp == sbp);
400 		ASSERT(unit == mp->sbm_cm.sbdev_unum);
401 
402 		PR_MEM("sbd: OS detach mem-unit (%d.%d)\n",
403 			sbp->sb_num, mp->sbm_cm.sbdev_unum);
404 	}
405 
406 	sbd_release_sbdp_handle(hdp);
407 	return (0);
408 }
409 
410 int
411 sbd_post_detach_mem(sbd_handle_t *hp, sbd_devlist_t devlist[], int devnum)
412 {
413 	int		d, rv;
414 	sbdp_handle_t	*hdp;
415 	sbd_board_t	*sbp;
416 	sbd_mem_unit_t	*s_mp, *t_mp;
417 	static fn_t	f = "sbd_post_detach_mem";
418 
419 	PR_MEM("%s...\n", f);
420 
421 	sbp = SBDH2BD(hp->h_sbd);
422 
423 	hdp = sbd_get_sbdp_handle(sbp, hp);
424 
425 
426 	rv = 0;
427 	for (d = 0; d < devnum; d++) {
428 		sbderror_t	*ep;
429 		dev_info_t	*dip;
430 		int		unit;
431 
432 		/* sbd_get_devlist will not devlist element w/ dip of 0 */
433 		ASSERT(devlist[d].dv_dip != NULL);
434 
435 		ep = &devlist[d].dv_error;
436 		if ((SBD_GET_ERR(SBD_HD2ERR(hp)) != 0) ||
437 		    (sbd_set_err_in_hdl(hp, ep) == 0)) {
438 			rv = -1;
439 		}
440 
441 		dip = devlist[d].dv_dip;
442 		unit = sbdp_get_unit_num(hdp, dip);
443 		if (unit == -1) {
444 			if (hp->h_flags & SBD_IOCTL_FLAG_FORCE)
445 				continue;
446 			else {
447 				if (rv != -1)
448 					SBD_GET_PERR(hdp->h_err, ep);
449 				break;
450 			}
451 		}
452 
453 		s_mp = SBD_GET_BOARD_MEMUNIT(sbp, unit);
454 
455 		ASSERT(s_mp->sbm_cm.sbdev_sbp == sbp);
456 
457 		if (rv == -1) {
458 			if (s_mp->sbm_flags & SBD_MFLAG_SOURCE) {
459 				t_mp = s_mp->sbm_peer;
460 			} else {
461 				/* this is no target unit */
462 				t_mp = NULL;
463 			}
464 
465 			sbd_mem_cleanup(s_mp, t_mp, ep);
466 		} else if (sbd_post_detach_mem_unit(s_mp, ep))
467 			rv = -1;
468 	}
469 
470 	sbd_release_sbdp_handle(hdp);
471 	return (rv);
472 }
473 
474 static void
475 sbd_add_memory_spans(sbd_board_t *sbp, struct memlist *ml)
476 {
477 	sbdp_handle_t	*hdp;
478 	static fn_t	f = "sbd_add_memory_spans";
479 
480 	PR_MEM("%s...", f);
481 	SBD_MEMLIST_DUMP(ml);
482 
483 #ifdef DEBUG
484 	memlist_read_lock();
485 	if (memlist_intersect(phys_install, ml)) {
486 		PR_MEM("%s:WARNING: memlist intersects with phys_install\n", f);
487 	}
488 	memlist_read_unlock();
489 #endif
490 	hdp = sbd_get_sbdp_handle(NULL, NULL);
491 
492 	for (; ml; ml = ml->next) {
493 		update_membounds_t umb;
494 		pfn_t	base;
495 		pgcnt_t	npgs;
496 		int	rv;
497 
498 		base = _b64top(ml->address);
499 		npgs = _b64top(ml->size);
500 
501 		umb.u_board = sbp->sb_num;
502 		umb.u_base = (uint64_t)base << MMU_PAGESHIFT;
503 		umb.u_len = (uint64_t)npgs << MMU_PAGESHIFT;
504 
505 		lgrp_plat_config(LGRP_CONFIG_MEM_ADD, (uintptr_t)&umb);
506 		rv = kphysm_add_memory_dynamic(base, npgs);
507 
508 		(void) sbdp_mem_add_span(hdp, ml->address, ml->size);
509 
510 		if (rv != KPHYSM_OK) {
511 			cmn_err(CE_WARN, "sbd:%s:"
512 				" unexpected kphysm_add_memory_dynamic"
513 				" return value %d;"
514 				" basepfn=0x%lx, npages=%ld\n",
515 				f, rv, base, npgs);
516 
517 			continue;
518 		}
519 		rv = kcage_range_add(base, npgs, KCAGE_DOWN);
520 		if (rv != 0)
521 			continue;
522 	}
523 	sbd_release_sbdp_handle(hdp);
524 }
525 
526 /* hack for test scripts.  *** remove before code finalized *** */
527 int sbd_last_target;
528 
529 static int
530 sbd_post_detach_mem_unit(sbd_mem_unit_t *s_mp, sbderror_t *ep)
531 {
532 	uint64_t	sz;
533 	uint64_t	sm;
534 	uint64_t	t_basepa;
535 	uint64_t	tmp_basepa;
536 	uint64_t	s_basepa;
537 	sbd_board_t 	*sbp;
538 	sbdp_handle_t	*hdp;
539 	uint64_t	s_nbytes;
540 	uint64_t	s_new_basepa;
541 	sbd_mem_unit_t	*t_mp, *x_mp;
542 	struct memlist	*ml;
543 	int		rv;
544 	static fn_t	f = "sbd_post_detach_mem_unit";
545 	sbd_handle_t	*hp;
546 
547 	PR_MEM("%s...\n", f);
548 
549 	sbp = (sbd_board_t *)s_mp->sbm_cm.sbdev_sbp;
550 	hp = MACHBD2HD(sbp);
551 	hdp = sbd_get_sbdp_handle(sbp, hp);
552 
553 	if (sbdp_get_mem_alignment(hdp, s_mp->sbm_cm.sbdev_dip, &sz)) {
554 		cmn_err(CE_WARN,
555 			"sbd:%s: no alignment for mem-unit (%d.%d)",
556 			f, sbp->sb_num, s_mp->sbm_cm.sbdev_unum);
557 		SBD_GET_PERR(hdp->h_err, ep);
558 		sbd_release_sbdp_handle(hdp);
559 		return (-1);
560 	}
561 	sm = sz - 1;
562 
563 	/* s_mp->sbm_del_mlist could be NULL, meaning no deleted spans */
564 	PR_MEM("%s: brd %d: deleted memlist (EMPTY maybe okay):\n",
565 		f, sbp->sb_num);
566 	SBD_MEMLIST_DUMP(s_mp->sbm_del_mlist);
567 
568 	/* sanity check */
569 	ASSERT(s_mp->sbm_del_mlist == NULL ||
570 		(s_mp->sbm_flags & SBD_MFLAG_RELDONE) != 0);
571 
572 	if (s_mp->sbm_flags & SBD_MFLAG_SOURCE) {
573 		t_mp = s_mp->sbm_peer;
574 
575 		ASSERT(t_mp != NULL);
576 		ASSERT(t_mp->sbm_flags & SBD_MFLAG_TARGET);
577 		ASSERT(t_mp->sbm_peer == s_mp);
578 
579 		ASSERT(t_mp->sbm_flags & SBD_MFLAG_RELDONE);
580 		ASSERT(t_mp->sbm_del_mlist);
581 
582 		sbp = (sbd_board_t *)t_mp->sbm_cm.sbdev_sbp;
583 		PR_MEM("%s: target brd %d: deleted memlist:\n",
584 			f, sbp->sb_num);
585 		SBD_MEMLIST_DUMP(t_mp->sbm_del_mlist);
586 	} else {
587 		/* this is no target unit */
588 		t_mp = NULL;
589 	}
590 
591 	/*
592 	 * Verify the memory really did successfully detach
593 	 * by checking for its non-existence in phys_install.
594 	 */
595 	rv = 0;
596 	memlist_read_lock();
597 	if (s_mp->sbm_flags & SBD_MFLAG_RELDONE) {
598 		x_mp = s_mp;
599 		rv = memlist_intersect(phys_install, x_mp->sbm_del_mlist);
600 	}
601 	if (rv == 0 && t_mp && (t_mp->sbm_flags & SBD_MFLAG_RELDONE)) {
602 		x_mp = t_mp;
603 		rv = memlist_intersect(phys_install, x_mp->sbm_del_mlist);
604 	}
605 	memlist_read_unlock();
606 
607 	if (rv) {
608 		sbp = (sbd_board_t *)x_mp->sbm_cm.sbdev_sbp;
609 
610 		cmn_err(CE_WARN,
611 			"%s: %smem-unit (%d.%d) memlist still in phys_install",
612 			f,
613 			x_mp == t_mp ? "target " : "",
614 			sbp->sb_num,
615 			x_mp->sbm_cm.sbdev_unum);
616 		SBD_SET_ERR(ep, ESBD_INTERNAL);
617 		SBD_SET_ERRSTR(ep, sbp->sb_mempath[x_mp->sbm_cm.sbdev_unum]);
618 		sbd_release_sbdp_handle(hdp);
619 		return (-1);
620 	}
621 
622 	s_basepa	= _ptob64(s_mp->sbm_basepfn);
623 	s_nbytes	= _ptob64(s_mp->sbm_npages);
624 
625 	if (t_mp != NULL) {
626 		t_basepa	= _ptob64(t_mp->sbm_basepfn);
627 		s_new_basepa	= (s_basepa & ~ sm) +
628 					_ptob64(t_mp->sbm_slice_offset);
629 
630 		/*
631 		 * We had to swap mem-units, so update
632 		 * memlists accordingly with new base
633 		 * addresses.
634 		 */
635 		for (ml = t_mp->sbm_mlist; ml; ml = ml->next) {
636 			ml->address -= t_basepa;
637 			ml->address += s_new_basepa;
638 		}
639 
640 		/*
641 		 * There is no need to explicitly rename the target delete
642 		 * memlist, because sbm_del_mlist and sbm_mlist always
643 		 * point to the same memlist for a copy/rename operation.
644 		 */
645 		ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
646 
647 		PR_MEM("%s: renamed target memlist and delete memlist", f);
648 		SBD_MEMLIST_DUMP(t_mp->sbm_mlist);
649 
650 		for (ml = s_mp->sbm_mlist; ml; ml = ml->next) {
651 			ml->address -= s_basepa;
652 			ml->address += t_basepa;
653 		}
654 
655 		PR_MEM("%s: renamed source memlist", f);
656 		SBD_MEMLIST_DUMP(s_mp->sbm_mlist);
657 
658 #ifdef DEBUG
659 		ASSERT(s_mp->sbm_mlist != s_mp->sbm_del_mlist);
660 		/*
661 		 * Renaming s_mp->sbm_del_mlist is not necessary.  This
662 		 * list is not used beyond this point, and in fact, is
663 		 *  disposed of at the end of this function.
664 		 */
665 		for (ml = s_mp->sbm_del_mlist; ml; ml = ml->next) {
666 			ml->address -= s_basepa;
667 			ml->address += t_basepa;
668 		}
669 
670 		PR_MEM("%s: renamed source delete memlist", f);
671 		SBD_MEMLIST_DUMP(s_mp->sbm_del_mlist);
672 #endif
673 
674 		if (s_mp->sbm_flags & SBD_MFLAG_MEMUPSIZE) {
675 			struct memlist	*nl;
676 			int mlret;
677 
678 			/*
679 			 * We had to perform a copy-rename from a
680 			 * small memory node to a big memory node.
681 			 * Need to add back the remaining memory on
682 			 * the big board that wasn't used by that
683 			 * from the small board during the copy.
684 			 * Subtract out the portion of the target memory
685 			 * node that was taken over by the source memory
686 			 * node.
687 			 */
688 			nl = memlist_dup(t_mp->sbm_mlist);
689 			mlret = memlist_delete_span(s_basepa, s_nbytes, &nl);
690 			PR_MEM("%s: mlret = %d\n", f, mlret);
691 
692 			sbp = (sbd_board_t *)t_mp->sbm_cm.sbdev_sbp;
693 			PR_MEM("%s: adding back remaining portion"
694 				" of mem-unit (%d.%d), memlist:\n",
695 				f, sbp->sb_num,
696 				t_mp->sbm_cm.sbdev_unum);
697 
698 			SBD_MEMLIST_DUMP(nl);
699 
700 			sbd_add_memory_spans(sbp, nl);
701 
702 			memlist_delete(nl);
703 		}
704 	}
705 
706 
707 	if (t_mp != NULL) {
708 		sbp = (sbd_board_t *)t_mp->sbm_cm.sbdev_sbp;
709 		hdp->h_board = sbp->sb_num;
710 		/* delete target's entire address space */
711 		tmp_basepa = t_basepa & ~ sm;
712 		rv = sbdp_mem_del_span(hdp, tmp_basepa, sz);
713 		ASSERT(rv == 0);
714 
715 		sbp = (sbd_board_t *)s_mp->sbm_cm.sbdev_sbp;
716 		hdp->h_board = sbp->sb_num;
717 		tmp_basepa = s_basepa & ~ sm;
718 		sz = s_new_basepa & sm;
719 		/* delete source board's vacant address space */
720 		rv = sbdp_mem_del_span(hdp, tmp_basepa, sz);
721 		ASSERT(rv == 0);
722 	} else {
723 		sbp = (sbd_board_t *)s_mp->sbm_cm.sbdev_sbp;
724 		hdp->h_board = sbp->sb_num;
725 		tmp_basepa = s_basepa & ~ sm;
726 		/* delete board's entire address space */
727 		rv = sbdp_mem_del_span(hdp, tmp_basepa, sz);
728 		ASSERT(rv == 0);
729 	}
730 
731 #ifdef LINT
732 	rv = rv;
733 #endif
734 
735 	sbd_mem_cleanup(s_mp, t_mp, ep);
736 
737 	sbp = (sbd_board_t *)s_mp->sbm_cm.sbdev_sbp;
738 	PR_MEM("%s: board %d's memlist:", f, sbp->sb_num);
739 	SBD_MEMLIST_DUMP(s_mp->sbm_mlist);
740 
741 	sbd_release_sbdp_handle(hdp);
742 	return (0);
743 }
744 
745 static void
746 sbd_mem_cleanup(sbd_mem_unit_t *s_mp, sbd_mem_unit_t *t_mp, sbderror_t *ep)
747 {
748 	sbd_board_t *sbp;
749 
750 	/* clean up target mem unit */
751 	if (t_mp != NULL) {
752 		sbp = (sbd_board_t *)t_mp->sbm_cm.sbdev_sbp;
753 
754 		ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
755 		/*
756 		 * sbm_del_mlist and sbm_mlist point at the same list
757 		 * We only need to delete one and then set both pointers
758 		 * to NULL
759 		 */
760 		memlist_delete(t_mp->sbm_del_mlist);
761 
762 		t_mp->sbm_del_mlist = NULL;
763 		t_mp->sbm_mlist = NULL;
764 		t_mp->sbm_peer = NULL;
765 		t_mp->sbm_flags = 0;
766 		t_mp->sbm_cm.sbdev_busy = 0;
767 		sbd_init_mem_unit_data(t_mp, ep);
768 
769 		/*
770 		 * now that copy/rename has completed, undo this
771 		 * work that was done in sbd_release_mem_done.
772 		 */
773 		/*
774 		 * If error don't set the target to configured
775 		 */
776 		if (SBD_GET_ERR(ep) == 0) {
777 			SBD_DEV_CLR_UNREFERENCED(sbp, SBD_COMP_MEM, 0);
778 			SBD_DEV_CLR_RELEASED(sbp, SBD_COMP_MEM, 0);
779 			SBD_DEVICE_TRANSITION(sbp, SBD_COMP_MEM, 0,
780 				SBD_STATE_CONFIGURED);
781 		}
782 
783 /* hack for test scripts.  *** remove before code finalized *** */
784 sbd_last_target = sbp->sb_num;
785 	}
786 
787 	/*
788 	 * clean up (source) board's mem unit structure.
789 	 * NOTE: sbm_mlist is retained.  It is referred to as the
790 	 * cached memlist.  The cached memlist is used to re-attach
791 	 * (configure back in) this memunit from the unconfigured
792 	 * state.
793 	 */
794 	if (s_mp != NULL) {
795 		sbp = (sbd_board_t *)s_mp->sbm_cm.sbdev_sbp;
796 
797 		/*
798 		 * Don't want to call memlist_delete for sbm_del_mlist,
799 		 * since that list points to the sbm_list
800 		 */
801 		s_mp->sbm_del_mlist = NULL;
802 		s_mp->sbm_peer = NULL;
803 		s_mp->sbm_flags = 0;
804 		s_mp->sbm_cm.sbdev_busy = 0;
805 		sbd_init_mem_unit_data(s_mp, ep);
806 	}
807 }
808 
809 /*
810  * Successful return from this function will have the memory
811  * handle in sbp->sb_dev[..mem-unit...].sbm_memhandle allocated
812  * and waiting.  This routine's job is to select the memory that
813  * actually has to be released (detached) which may not necessarily
814  * be the same memory node that came in in devlist[],
815  * i.e. a copy-rename is needed.
816  */
817 int
818 sbd_pre_release_mem(sbd_handle_t *hp, sbd_devlist_t devlist[], int devnum)
819 {
820 	extern int	kcage_on;
821 	int		d;
822 	int		err_flag = 0;
823 	sbd_board_t	*sbp = SBDH2BD(hp->h_sbd);
824 	sbderror_t	*ep = SBD_HD2ERR(hp);
825 	sbderror_t	*lep;
826 	static fn_t	f = "sbd_pre_release_mem";
827 
828 	PR_MEM("%s...\n", f);
829 
830 	if (kcage_on == 0) {
831 		/*
832 		 * Can't Detach memory if Cage is OFF.
833 		 */
834 		cmn_err(CE_WARN, "%s: kernel cage is disabled", f);
835 		SBD_SET_ERR(ep, ESBD_KCAGE_OFF);
836 		return (-1);
837 	}
838 
839 	for (d = 0; d < devnum; d++) {
840 		int		rv;
841 		memquery_t	mq;
842 		sbd_mem_unit_t	*mp;
843 		struct memlist	*ml;
844 
845 		/* sbd_get_devlist will not devlist element w/ dip of 0 */
846 		ASSERT(devlist[d].dv_dip != NULL);
847 
848 		mp = SBD_GET_BOARD_MEMUNIT(sbp, d);
849 
850 		/*
851 		 * If all the mem unit is marked as failed then don't allow the
852 		 * operation
853 		 */
854 		if (mp->sbm_cm.sbdev_cond == SBD_COND_FAILED) {
855 			SBD_SET_ERR(ep, ESBD_STATE);
856 			SBD_SET_ERRSTR(ep, sbp->sb_mempath[d]);
857 			err_flag = -1;
858 			break;
859 		}
860 
861 		ASSERT(d == mp->sbm_cm.sbdev_unum);
862 
863 		/*
864 		 * if interleave is set to across boards fail the op
865 		 */
866 		if (mp->sbm_interleave) {
867 			SBD_SET_ERR(ep, ESBD_MEMINTLV);
868 			SBD_SET_ERRSTR(ep, sbp->sb_mempath[d]);
869 			err_flag = -1;
870 			break;
871 		}
872 
873 		lep = &devlist[d].dv_error;
874 		if (SBD_GET_ERR(lep) != 0) {
875 			err_flag = -1;
876 			(void) sbd_set_err_in_hdl(hp, lep);
877 			break;
878 		}
879 
880 		if (mp->sbm_flags & SBD_MFLAG_RESERVED) {
881 			/*
882 			 * Board is currently involved in a delete
883 			 * memory operation. Can't detach this guy until
884 			 * that operation completes.
885 			 */
886 			cmn_err(CE_WARN,
887 				"%s: ineligible mem-unit (%d.%d) for detach",
888 				f, sbp->sb_num,
889 				mp->sbm_cm.sbdev_unum);
890 
891 			SBD_SET_ERR(lep, ESBD_INVAL);
892 			SBD_SET_ERRSTR(lep, sbp->sb_mempath[d]);
893 			(void) sbd_set_err_in_hdl(hp, lep);
894 			err_flag = -1;
895 			break;
896 		}
897 
898 		/*
899 		 * Check whether the detaching memory requires a
900 		 * copy-rename.
901 		 */
902 		ASSERT(mp->sbm_npages != 0);
903 		rv = kphysm_del_span_query(
904 			mp->sbm_basepfn, mp->sbm_npages, &mq);
905 		if (rv != KPHYSM_OK) {
906 			cmn_err(CE_WARN,
907 				"%s: unexpected kphysm_del_span_query"
908 				" return value %d;"
909 				" basepfn 0x%lx, npages 0x%lx,"
910 				" mem-unit (%d.%d), dip 0x%p",
911 				f,
912 				rv,
913 				mp->sbm_basepfn,
914 				mp->sbm_npages,
915 				sbp->sb_num,
916 				mp->sbm_cm.sbdev_unum,
917 				(void *)mp->sbm_cm.sbdev_dip);
918 
919 			SBD_SET_ERR(lep, ESBD_INTERNAL);
920 			SBD_SET_ERRSTR(lep, sbp->sb_mempath[d]);
921 			(void) sbd_set_err_in_hdl(hp, lep);
922 			err_flag = -1;
923 			break;
924 		}
925 
926 		if (mq.nonrelocatable != 0) {
927 			if (!(hp->h_iap->i_flags & SBD_FLAG_QUIESCE_OKAY)) {
928 				/* caller wasn't prompted for a suspend */
929 					SBD_SET_ERR(lep, ESBD_QUIESCE_REQD);
930 					SBD_SET_ERRSTR(lep, sbp->sb_mempath[d]);
931 					(void) sbd_set_err_in_hdl(hp, lep);
932 					err_flag = 1;
933 					break;
934 			}
935 		}
936 
937 		/* flags should be clean at this time */
938 		ASSERT(mp->sbm_flags == 0);
939 
940 		ASSERT(mp->sbm_del_mlist == NULL);	/* should be null */
941 
942 		if (mp->sbm_mlist != NULL) {
943 			memlist_delete(mp->sbm_mlist);
944 			mp->sbm_mlist = NULL;
945 		}
946 
947 		ml = sbd_get_memlist(mp, lep);
948 		(void) sbd_set_err_in_hdl(hp, lep);
949 		if (ml == NULL) {
950 			PR_MEM("%s: no memlist found for board %d\n",
951 				f, sbp->sb_num);
952 			err_flag = -1;
953 			break;
954 		}
955 
956 		/* allocate a kphysm handle */
957 		rv = kphysm_del_gethandle(&mp->sbm_memhandle);
958 		if (rv != KPHYSM_OK) {
959 			memlist_delete(ml);
960 
961 			cmn_err(CE_WARN,
962 				"%s: unexpected kphysm_del_gethandle"
963 				" return value %d", f, rv);
964 
965 			SBD_SET_ERR(lep, ESBD_INTERNAL);
966 			SBD_SET_ERRSTR(lep, sbp->sb_mempath[d]);
967 			(void) sbd_set_err_in_hdl(hp, lep);
968 			err_flag = -1;
969 			break;
970 		}
971 		mp->sbm_flags |= SBD_MFLAG_RELOWNER;
972 
973 		if ((mq.nonrelocatable != 0) ||
974 			sbd_reserve_mem_spans(&mp->sbm_memhandle, ml)) {
975 			/*
976 			 * Either the detaching memory node contains
977 			 * non-reloc memory or we failed to reserve the
978 			 * detaching memory node (which did _not_ have
979 			 * any non-reloc memory, i.e. some non-reloc mem
980 			 * got onboard).
981 			 */
982 
983 			if (sbd_select_mem_target(hp, mp, ml)) {
984 				int rv;
985 
986 				/*
987 				 * We had no luck locating a target
988 				 * memory node to be the recipient of
989 				 * the non-reloc memory on the node
990 				 * we're trying to detach.
991 				 * Clean up be disposing the mem handle
992 				 * and the mem list.
993 				 */
994 				rv = kphysm_del_release(mp->sbm_memhandle);
995 				if (rv != KPHYSM_OK) {
996 					/*
997 					 * can do nothing but complain
998 					 * and hope helpful for debug
999 					 */
1000 					cmn_err(CE_WARN, "sbd:%s: unexpected"
1001 						" kphysm_del_release return"
1002 						" value %d",
1003 						f, rv);
1004 				}
1005 				mp->sbm_flags &= ~SBD_MFLAG_RELOWNER;
1006 
1007 				memlist_delete(ml);
1008 
1009 				/* make sure sbm_flags is clean */
1010 				ASSERT(mp->sbm_flags == 0);
1011 
1012 				cmn_err(CE_WARN,
1013 					"%s: no available target for "
1014 					"mem-unit (%d.%d)",
1015 					f, sbp->sb_num,
1016 					mp->sbm_cm.sbdev_unum);
1017 
1018 				SBD_SET_ERR(lep, ESBD_NO_TARGET);
1019 				SBD_SET_ERRSTR(lep,
1020 					sbp->sb_mempath[mp->sbm_cm.sbdev_unum]);
1021 				(void) sbd_set_err_in_hdl(hp, lep);
1022 
1023 				err_flag = -1;
1024 				break;
1025 			}
1026 
1027 			/*
1028 			 * ml is not memlist_deleted here because
1029 			 * it has been assigned to mp->sbm_mlist
1030 			 * by sbd_select_mem_target.
1031 			 */
1032 		} else {
1033 			/* no target needed to detach this board */
1034 			mp->sbm_flags |= SBD_MFLAG_RESERVED;
1035 			mp->sbm_peer = NULL;
1036 			mp->sbm_del_mlist = ml;
1037 			mp->sbm_mlist = ml;
1038 			mp->sbm_cm.sbdev_busy = 1;
1039 		}
1040 #ifdef DEBUG
1041 		ASSERT(mp->sbm_mlist != NULL);
1042 
1043 		if (mp->sbm_flags & SBD_MFLAG_SOURCE) {
1044 			int src, targ;
1045 
1046 			sbp = (sbd_board_t *)
1047 				mp->sbm_peer->sbm_cm.sbdev_sbp;
1048 			targ = sbp->sb_num;
1049 			sbp = (sbd_board_t *)mp->sbm_cm.sbdev_sbp;
1050 			src = sbp->sb_num;
1051 			PR_MEM("%s: release of board %d requires copy/rename;"
1052 				" selected target board %d\n",
1053 				f, src, targ);
1054 		} else {
1055 			sbp = (sbd_board_t *)mp->sbm_cm.sbdev_sbp;
1056 			PR_MEM("%s: copy/rename not required to release"
1057 				" board %d\n", f, sbp->sb_num);
1058 		}
1059 
1060 		ASSERT(mp->sbm_flags & SBD_MFLAG_RELOWNER);
1061 		ASSERT(mp->sbm_flags & SBD_MFLAG_RESERVED);
1062 #endif
1063 	}
1064 
1065 	return (err_flag);
1066 }
1067 
1068 void
1069 sbd_release_mem_done(sbd_handle_t *hp, int unit)
1070 {
1071 	sbd_mem_unit_t	*s_mp, *t_mp, *mp;
1072 	sbderror_t	*ep = SBD_HD2ERR(hp);
1073 	sbd_board_t	*sbp = SBDH2BD(hp->h_sbd);
1074 	int		rv;
1075 	static fn_t	f = "sbd_release_mem_done";
1076 
1077 	s_mp = SBD_GET_BOARD_MEMUNIT(sbp, unit);
1078 	sbp = (sbd_board_t *)s_mp->sbm_cm.sbdev_sbp;
1079 
1080 	/*
1081 	 * This unit will be flagged with SBD_MFLAG_SOURCE, if it
1082 	 * has a target unit.
1083 	 */
1084 	if (s_mp->sbm_flags & SBD_MFLAG_SOURCE) {
1085 		t_mp = s_mp->sbm_peer;
1086 		ASSERT(t_mp != NULL);
1087 		ASSERT(t_mp->sbm_peer == s_mp);
1088 		ASSERT(t_mp->sbm_flags & SBD_MFLAG_TARGET);
1089 		ASSERT(t_mp->sbm_flags & SBD_MFLAG_RESERVED);
1090 	} else {
1091 		/* this is no target unit */
1092 		t_mp = NULL;
1093 	}
1094 
1095 	/* free delete handle */
1096 	ASSERT(s_mp->sbm_flags & SBD_MFLAG_RELOWNER);
1097 	ASSERT(s_mp->sbm_flags & SBD_MFLAG_RESERVED);
1098 
1099 	rv = kphysm_del_release(s_mp->sbm_memhandle);
1100 	if (rv != KPHYSM_OK) {
1101 		/*
1102 		 * can do nothing but complain
1103 		 * and hope helpful for debug
1104 		 */
1105 		cmn_err(CE_WARN, "sbd:%s: unexpected kphysm_del_release"
1106 			" return value %d", f, rv);
1107 	}
1108 	s_mp->sbm_flags &= ~SBD_MFLAG_RELOWNER;
1109 
1110 	/*
1111 	 * If an error was encountered during release, clean up
1112 	 * the source (and target, if present) unit data.
1113 	 */
1114 	if (SBD_GET_ERR(ep) != 0) {
1115 
1116 		PR_MEM("%s: unit %d.%d: error %d noted\n",
1117 			f, sbp->sb_num,
1118 			s_mp->sbm_cm.sbdev_unum,
1119 			SBD_GET_ERR(ep));
1120 
1121 		sbd_mem_cleanup(s_mp, t_mp, ep);
1122 
1123 		/* bail out */
1124 		return;
1125 	}
1126 
1127 	SBD_DEV_SET_RELEASED(sbp, SBD_COMP_MEM, unit);
1128 	SBD_DEVICE_TRANSITION(sbp, SBD_COMP_MEM, unit, SBD_STATE_RELEASE);
1129 
1130 	if (t_mp != NULL) {
1131 		sbp = (sbd_board_t *)t_mp->sbm_cm.sbdev_sbp;
1132 		/*
1133 		 * the kphysm delete operation that drained the source
1134 		 * board also drained this target board.  Since the source
1135 		 * board drain is now known to have succeeded, we know this
1136 		 * target board is drained too.
1137 		 */
1138 		SBD_DEV_SET_RELEASED(sbp, SBD_COMP_MEM,
1139 			t_mp->sbm_cm.sbdev_unum);
1140 		SBD_DEVICE_TRANSITION(sbp, SBD_COMP_MEM,
1141 			t_mp->sbm_cm.sbdev_unum,
1142 			SBD_STATE_RELEASE);
1143 
1144 		/*
1145 		 * NOTE: do not transition target's board state,
1146 		 * even if the mem-unit was the last configure
1147 		 * unit of the board.  When copy/rename completes
1148 		 * this mem-unit will transitioned back to
1149 		 * the configured state.  In the meantime, the
1150 		 * board's must remain as is.
1151 		 */
1152 	}
1153 
1154 	/* if board(s) had deleted memory, verify it is gone */
1155 	rv = 0;
1156 	memlist_read_lock();
1157 	if (s_mp->sbm_del_mlist != NULL) {
1158 		sbp = (sbd_board_t *)s_mp->sbm_cm.sbdev_sbp;
1159 		mp = s_mp;
1160 		rv = memlist_intersect(phys_install, mp->sbm_del_mlist);
1161 	}
1162 	if (rv == 0 && t_mp && t_mp->sbm_del_mlist != NULL) {
1163 		sbp = (sbd_board_t *)t_mp->sbm_cm.sbdev_sbp;
1164 		mp = t_mp;
1165 		rv = memlist_intersect(phys_install, mp->sbm_del_mlist);
1166 	}
1167 	memlist_read_unlock();
1168 	if (rv) {
1169 		cmn_err(CE_WARN, "sbd:%s: %smem-unit (%d.%d): "
1170 			"deleted memory still found in phys_install",
1171 			f,
1172 			(mp == t_mp ? "target " : ""),
1173 			sbp->sb_num,
1174 			mp->sbm_cm.sbdev_unum);
1175 
1176 		SBD_SET_ERR(ep, ESBD_INTERNAL);
1177 		SBD_SET_ERRSTR(ep, sbp->sb_mempath[mp->sbm_cm.sbdev_unum]);
1178 		return;
1179 	}
1180 
1181 	s_mp->sbm_flags |= SBD_MFLAG_RELDONE;
1182 	if (t_mp != NULL) {
1183 		t_mp->sbm_flags &= ~SBD_MFLAG_RESERVED;
1184 		t_mp->sbm_flags |= SBD_MFLAG_RELDONE;
1185 	}
1186 
1187 	sbp = (sbd_board_t *)s_mp->sbm_cm.sbdev_sbp;
1188 
1189 	SBD_DEV_SET_UNREFERENCED(sbp, SBD_COMP_MEM, unit);
1190 	SBD_DEVICE_TRANSITION(sbp, SBD_COMP_MEM, unit, SBD_STATE_UNREFERENCED);
1191 
1192 	PR_MEM("%s: marking mem-unit (%d.%d) release DONE\n",
1193 		f, sbp->sb_num,
1194 		s_mp->sbm_cm.sbdev_unum);
1195 
1196 	s_mp->sbm_cm.sbdev_ostate = SBD_STAT_UNCONFIGURED;
1197 
1198 	if (t_mp != NULL) {
1199 		sbp = (sbd_board_t *)t_mp->sbm_cm.sbdev_sbp;
1200 
1201 		SBD_DEV_SET_UNREFERENCED(sbp, SBD_COMP_MEM,
1202 			t_mp->sbm_cm.sbdev_unum);
1203 		SBD_DEVICE_TRANSITION(sbp, SBD_COMP_MEM,
1204 			t_mp->sbm_cm.sbdev_unum,
1205 			SBD_STATE_UNREFERENCED);
1206 
1207 		sbp = (sbd_board_t *)s_mp->sbm_cm.sbdev_sbp;
1208 
1209 		PR_MEM("%s: marking mem-unit (%d.%d) release DONE\n",
1210 			f, sbp->sb_num,
1211 			t_mp->sbm_cm.sbdev_unum);
1212 
1213 		t_mp->sbm_cm.sbdev_ostate = SBD_STAT_UNCONFIGURED;
1214 	}
1215 }
1216 
1217 int
1218 sbd_disconnect_mem(sbd_handle_t *hp, int unit)
1219 {
1220 	static fn_t	f = "sbd_disconnect_mem";
1221 	sbd_mem_unit_t	*mp;
1222 	sbd_board_t	*sbp = SBDH2BD(hp->h_sbd);
1223 
1224 	mp = SBD_GET_BOARD_MEMUNIT(sbp, unit);
1225 
1226 	ASSERT(mp->sbm_cm.sbdev_state == SBD_STATE_CONNECTED ||
1227 	    mp->sbm_cm.sbdev_state == SBD_STATE_UNCONFIGURED);
1228 
1229 	PR_MEM("%s...\n", f);
1230 
1231 	if (mp->sbm_del_mlist && mp->sbm_del_mlist != mp->sbm_mlist)
1232 		memlist_delete(mp->sbm_del_mlist);
1233 	mp->sbm_del_mlist = NULL;
1234 
1235 	if (mp->sbm_mlist) {
1236 		memlist_delete(mp->sbm_mlist);
1237 		mp->sbm_mlist = NULL;
1238 	}
1239 
1240 	return (0);
1241 }
1242 
1243 int
1244 sbd_cancel_mem(sbd_handle_t *hp, int unit)
1245 {
1246 	sbd_mem_unit_t	*s_mp, *t_mp;
1247 	sbd_istate_t	state;
1248 	sbd_board_t	*sbp = SBDH2BD(hp->h_sbd);
1249 	sbd_board_t	*tsbp;
1250 	static fn_t	f = "sbd_cancel_mem";
1251 	sbderror_t	*ep = SBD_HD2ERR(hp);
1252 
1253 	s_mp = SBD_GET_BOARD_MEMUNIT(sbp, unit);
1254 
1255 	state = s_mp->sbm_cm.sbdev_state;
1256 
1257 	if (s_mp->sbm_flags & SBD_MFLAG_TARGET) {
1258 		/* must cancel source board, not target board */
1259 		SBD_SET_ERR(ep, ESBD_INTERNAL);
1260 		SBD_SET_ERRSTR(ep, sbp->sb_mempath[unit]);
1261 		return (-1);
1262 	} else if (s_mp->sbm_flags & SBD_MFLAG_SOURCE) {
1263 		t_mp = s_mp->sbm_peer;
1264 		tsbp = t_mp->sbm_cm.sbdev_sbp;
1265 		ASSERT(t_mp != NULL);
1266 		ASSERT(t_mp->sbm_peer == s_mp);
1267 
1268 		/* must always match the source board's state */
1269 		ASSERT(t_mp->sbm_cm.sbdev_state == state);
1270 	} else {
1271 		/* this is no target unit */
1272 		t_mp = NULL;
1273 	}
1274 
1275 	switch (state) {
1276 	case SBD_STATE_UNREFERENCED:	/* state set by sbd_release_mem_done */
1277 		ASSERT((s_mp->sbm_flags & SBD_MFLAG_RELOWNER) == 0);
1278 
1279 		if (t_mp != NULL && t_mp->sbm_del_mlist != NULL) {
1280 			PR_MEM("%s: undoing target board %d memory delete\n",
1281 				f, tsbp->sb_num);
1282 			sbd_add_memory_spans(tsbp, t_mp->sbm_del_mlist);
1283 			SBD_DEV_CLR_UNREFERENCED(tsbp, SBD_COMP_MEM,
1284 				t_mp->sbm_cm.sbdev_unum);
1285 		}
1286 
1287 		if (s_mp->sbm_del_mlist != NULL) {
1288 			PR_MEM("%s: undoing board %d memory delete\n",
1289 				f, sbp->sb_num);
1290 			sbd_add_memory_spans(sbp, s_mp->sbm_del_mlist);
1291 		}
1292 
1293 		/*FALLTHROUGH*/
1294 
1295 	case SBD_STATE_CONFIGURED:
1296 		/*
1297 		 * we got here because of an error early in the release process
1298 		 * Just leave the memory as is and report the error
1299 		 */
1300 
1301 		ASSERT((s_mp->sbm_flags & SBD_MFLAG_RELOWNER) == 0);
1302 
1303 		if (t_mp != NULL) {
1304 			ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
1305 			t_mp->sbm_del_mlist = NULL;
1306 
1307 			if (t_mp->sbm_mlist != NULL) {
1308 				memlist_delete(t_mp->sbm_mlist);
1309 				t_mp->sbm_mlist = NULL;
1310 			}
1311 
1312 			t_mp->sbm_peer = NULL;
1313 			t_mp->sbm_flags = 0;
1314 			t_mp->sbm_cm.sbdev_busy = 0;
1315 			sbd_init_mem_unit_data(t_mp, ep);
1316 
1317 			SBD_DEV_CLR_RELEASED(tsbp, SBD_COMP_MEM,
1318 				t_mp->sbm_cm.sbdev_unum);
1319 
1320 			SBD_DEVICE_TRANSITION(tsbp, SBD_COMP_MEM,
1321 				t_mp->sbm_cm.sbdev_unum,
1322 				SBD_STATE_CONFIGURED);
1323 		}
1324 
1325 		if (s_mp->sbm_del_mlist != s_mp->sbm_mlist)
1326 			memlist_delete(s_mp->sbm_del_mlist);
1327 		s_mp->sbm_del_mlist = NULL;
1328 
1329 		if (s_mp->sbm_mlist != NULL) {
1330 			memlist_delete(s_mp->sbm_mlist);
1331 			s_mp->sbm_mlist = NULL;
1332 		}
1333 
1334 		s_mp->sbm_peer = NULL;
1335 		s_mp->sbm_flags = 0;
1336 		s_mp->sbm_cm.sbdev_busy = 0;
1337 		sbd_init_mem_unit_data(s_mp, ep);
1338 
1339 		return (0);
1340 	default:
1341 		PR_MEM("%s: WARNING unexpected state (%d) for "
1342 			"mem-unit %d.%d\n",
1343 			f,
1344 			(int)state,
1345 			sbp->sb_num,
1346 			s_mp->sbm_cm.sbdev_unum);
1347 
1348 		return (-1);
1349 	}
1350 	/*NOTREACHED*/
1351 }
1352 
1353 void
1354 sbd_init_mem_unit(sbd_board_t *sbp, int unit, sbderror_t *ep)
1355 {
1356 	sbd_istate_t	new_state;
1357 	sbd_mem_unit_t	*mp;
1358 	dev_info_t	*cur_mc_dip;
1359 	int		failed_mcs = 0, present_mcs = 0;
1360 	sbd_cond_t	mc_cond;
1361 	int		i;
1362 
1363 	mp = SBD_GET_BOARD_MEMUNIT(sbp, unit);
1364 
1365 	if (SBD_DEV_IS_ATTACHED(sbp, SBD_COMP_MEM, unit)) {
1366 		new_state = SBD_STATE_CONFIGURED;
1367 	} else if (SBD_DEV_IS_PRESENT(sbp, SBD_COMP_MEM, unit)) {
1368 		new_state = SBD_STATE_CONNECTED;
1369 	} else if (mp->sbm_cm.sbdev_dip != NULL) {
1370 		new_state = SBD_STATE_OCCUPIED;
1371 	} else {
1372 		new_state = SBD_STATE_EMPTY;
1373 	}
1374 
1375 	/*
1376 	 * Check all the possible memory nodes on the board.  If all of them
1377 	 * have a failed status mark memory as failed. Otherwise mem is ok
1378 	 */
1379 	if (!sbp->sb_memaccess_ok) {
1380 		mp->sbm_cm.sbdev_cond = SBD_COND_UNKNOWN;
1381 		return;
1382 	}
1383 
1384 	for (i = 0; i < SBD_NUM_MC_PER_BOARD; i++) {
1385 		cur_mc_dip = mp->sbm_dip[i];
1386 
1387 		if (cur_mc_dip == NULL)
1388 			continue;
1389 
1390 		present_mcs |= (1 << i);
1391 
1392 		mc_cond = sbd_get_comp_cond(cur_mc_dip);
1393 		if (mc_cond == SBD_COND_FAILED) {
1394 			failed_mcs |= (1 << i);
1395 		}
1396 	}
1397 
1398 	if (failed_mcs == present_mcs) {
1399 		/*
1400 		 * All mem nodes failed, therefore mark all mem
1401 		 * as failed
1402 		 */
1403 		mp->sbm_cm.sbdev_cond = SBD_COND_FAILED;
1404 	} else {
1405 		mp->sbm_cm.sbdev_cond = SBD_COND_OK;
1406 	}
1407 
1408 	sbd_init_mem_unit_data(mp, ep);
1409 
1410 	/*
1411 	 * Any changes to this memory unit should be performed above
1412 	 * this call to ensure the unit is fully initialized
1413 	 * before transitioning to the new state.
1414 	 */
1415 	SBD_DEVICE_TRANSITION(sbp, SBD_COMP_MEM, unit, new_state);
1416 
1417 }
1418 
1419 static void
1420 sbd_init_mem_unit_data(sbd_mem_unit_t *mp, sbderror_t *ep)
1421 {
1422 	uint64_t	basepa;
1423 	uint64_t	sz;
1424 	sbd_board_t	*sbp = mp->sbm_cm.sbdev_sbp;
1425 	sbdp_handle_t	*hdp;
1426 	static fn_t	f = "sbd_init_mem_unit_data";
1427 	sbd_handle_t	*hp = MACHBD2HD(sbp);
1428 
1429 	PR_MEM("%s...\n", f);
1430 
1431 	/* a little sanity checking */
1432 	ASSERT(mp->sbm_peer == NULL);
1433 	ASSERT(mp->sbm_flags == 0);
1434 
1435 	hdp = sbd_get_sbdp_handle(sbp, hp);
1436 
1437 	/* get basepfn of mem unit */
1438 	if (sbdphw_get_base_physaddr(hdp, mp->sbm_cm.sbdev_dip, &basepa)) {
1439 		cmn_err(CE_WARN, "sbd:%s: failed to get physaddr"
1440 			" for mem-unit (%d.%d)",
1441 			f,
1442 			sbp->sb_num,
1443 			mp->sbm_cm.sbdev_unum);
1444 		SBD_GET_PERR(hdp->h_err, ep);
1445 		sbd_release_sbdp_handle(hdp);
1446 		return;
1447 	}
1448 	mp->sbm_basepfn = _b64top(basepa);
1449 
1450 	/* attempt to get number of pages from PDA */
1451 	mp->sbm_npages = sbdp_get_mem_size(hdp);
1452 
1453 	/* if didn't work, calculate using memlist */
1454 	if (mp->sbm_npages == 0) {
1455 		struct memlist	*ml, *mlist;
1456 		mlist = sbd_get_memlist(mp, ep);
1457 		for (ml = mlist; ml; ml = ml->next)
1458 			mp->sbm_npages += btop(ml->size);
1459 		memlist_delete(mlist);
1460 	}
1461 
1462 
1463 	if (sbdp_get_mem_alignment(hdp, mp->sbm_cm.sbdev_dip, &sz)) {
1464 		cmn_err(CE_WARN,
1465 			"sbd:%s: no alignment for mem-unit (%d.%d)",
1466 			f, sbp->sb_num, mp->sbm_cm.sbdev_unum);
1467 		SBD_GET_PERR(hdp->h_err, ep);
1468 		sbd_release_sbdp_handle(hdp);
1469 		return;
1470 	}
1471 	mp->sbm_alignment_mask = _b64top(sz);
1472 
1473 
1474 	mp->sbm_interleave = sbdp_isinterleaved(hdp,
1475 	    mp->sbm_cm.sbdev_dip);
1476 
1477 	PR_MEM("%s: board %d (basepfn = 0x%lx, npgs = 0x%lx interleave %d)\n",
1478 		f, sbp->sb_num,
1479 		mp->sbm_basepfn,
1480 		mp->sbm_npages,
1481 		mp->sbm_interleave);
1482 
1483 	sbd_release_sbdp_handle(hdp);
1484 }
1485 
1486 static int
1487 sbd_reserve_mem_spans(memhandle_t *mhp, struct memlist *ml)
1488 {
1489 	int		err;
1490 	pfn_t		base;
1491 	pgcnt_t		npgs;
1492 	struct memlist	*mc;
1493 	static fn_t	f = "sbd_reserve_mem_spans";
1494 
1495 	PR_MEM("%s...\n", f);
1496 
1497 	/*
1498 	 * Walk the supplied memlist scheduling each span for removal
1499 	 * with kphysm_del_span.  It is possible that a span may intersect
1500 	 * an area occupied by the cage.
1501 	 */
1502 	for (mc = ml; mc != NULL; mc = mc->next) {
1503 		base = _b64top(mc->address);
1504 		npgs = _b64top(mc->size);
1505 
1506 		err = kphysm_del_span(*mhp, base, npgs);
1507 		if (err != KPHYSM_OK) {
1508 			cmn_err(CE_WARN, "sbd:%s memory reserve failed."
1509 				" unexpected kphysm_del_span return value %d;"
1510 				" basepfn=0x%lx npages=%ld",
1511 				f, err, base, npgs);
1512 			return (-1);
1513 		}
1514 	}
1515 	return (0);
1516 }
1517 
1518 /* debug counters */
1519 int sbd_smt_realigned;
1520 int sbd_smt_preference[4];
1521 
1522 #ifdef DEBUG
1523 uint_t sbd_ignore_board; /* if bit[bnum-1] set, board won't be candidate */
1524 #endif
1525 
1526 /*
1527  * Verify that there is no memory overlapping if copy-rename is
1528  * done with the selected target board.
1529  *
1530  * Returns 0 if OK, -1 otherwise.
1531  */
1532 static int
1533 sbd_check_boundaries(struct memlist *orig_memlist, sbd_mem_unit_t *s_mp,
1534 	sbd_mem_unit_t *t_mp)
1535 {
1536 	struct memlist	*new_memlist;
1537 	int mlret;
1538 	static fn_t	f = "sbd_check_boundaries";
1539 
1540 	new_memlist = memlist_dup(orig_memlist);
1541 	if (new_memlist == NULL) {
1542 		PR_MEM("%s: can't dup original memlist\n", f);
1543 		return (-1);
1544 	}
1545 
1546 	mlret = memlist_delete_span(
1547 		_ptob64(s_mp->sbm_basepfn),
1548 		_ptob64(s_mp->sbm_npages),
1549 		&new_memlist);
1550 	if (mlret != MEML_SPANOP_OK) {
1551 		PR_MEM("%s: del s/s mlret = %d\n", f, mlret);
1552 		goto check_done;
1553 	}
1554 
1555 	mlret = memlist_delete_span(
1556 		_ptob64(t_mp->sbm_basepfn),
1557 		_ptob64(t_mp->sbm_npages),
1558 		&new_memlist);
1559 	if (mlret != MEML_SPANOP_OK) {
1560 		PR_MEM("%s: del t/t mlret = %d\n", f, mlret);
1561 		goto check_done;
1562 	}
1563 
1564 	mlret = memlist_add_span(
1565 		_ptob64(t_mp->sbm_basepfn),
1566 		_ptob64(s_mp->sbm_npages),
1567 		&new_memlist);
1568 	if (mlret != MEML_SPANOP_OK) {
1569 		PR_MEM("%s: add t/s mlret = %d\n", f, mlret);
1570 		goto check_done;
1571 	}
1572 
1573 	mlret = memlist_add_span(
1574 		_ptob64(s_mp->sbm_basepfn),
1575 		_ptob64(t_mp->sbm_npages),
1576 		&new_memlist);
1577 	if (mlret != MEML_SPANOP_OK) {
1578 		PR_MEM("%s: add s/t mlret = %d\n", f, mlret);
1579 	}
1580 
1581 check_done:
1582 	memlist_delete(new_memlist);
1583 
1584 	if (mlret == MEML_SPANOP_OK)
1585 		return (0);
1586 	else
1587 		return (-1);
1588 }
1589 
1590 /*
1591  * Find and reserve a copy/rename target board suitable for the
1592  * given source board.
1593  * All boards in the system are examined and categorized in relation to
1594  * their memory size versus the source board's memory size.  Order of
1595  * preference is:
1596  *	1st: board has same memory size
1597  * 	2nd: board has larger memory size
1598  *	3rd: board has smaller memory size
1599  *	4th: board has smaller memory size, available memory will be reduced.
1600  * Boards in category 3 and 4 will have their MC's reprogrammed to locate the
1601  * span to which the MC responds to address span that appropriately covers
1602  * the nonrelocatable span of the source board.
1603  */
1604 static int
1605 sbd_select_mem_target(sbd_handle_t *hp,
1606 	sbd_mem_unit_t *s_mp, struct memlist *s_ml)
1607 {
1608 	uint64_t	sz;
1609 	pgcnt_t		sm;
1610 	int		n_sets = 4; /* same, larger, smaller, clipped */
1611 	int		preference; /* lower value is higher preference */
1612 	int		n_units_per_set;
1613 	int		idx;
1614 	sbd_mem_unit_t	**sets;
1615 	sbdp_handle_t	*hdp;
1616 	int		t_bd;
1617 	sbd_softstate_t	*softsp;
1618 	int		t_unit;
1619 	int		max_boards;
1620 	int		rv;
1621 	sbd_board_t	*s_sbp, *t_sbp;
1622 	sbd_mem_unit_t	*t_mp, *c_mp;
1623 	struct memlist	*d_ml, *t_ml, *x_ml;
1624 	memquery_t	s_mq = {0};
1625 	static fn_t	f = "sbd_select_mem_target";
1626 
1627 	PR_MEM("%s...\n", f);
1628 
1629 	ASSERT(s_ml != NULL);
1630 
1631 	s_sbp = s_mp->sbm_cm.sbdev_sbp;
1632 
1633 	hdp = sbd_get_sbdp_handle(s_sbp, hp);
1634 
1635 	if (sbdp_get_mem_alignment(hdp, s_mp->sbm_cm.sbdev_dip, &sz)) {
1636 		sbderror_t	*ep = SBD_HD2ERR(hp);
1637 		cmn_err(CE_WARN,
1638 			"sbd:%s: no alignment for mem-unit (%d.%d)",
1639 			f, s_sbp->sb_num, s_mp->sbm_cm.sbdev_unum);
1640 		SBD_GET_PERR(hdp->h_err, ep);
1641 		sbd_release_sbdp_handle(hdp);
1642 		return (-1);
1643 	}
1644 	sm = sz - 1;
1645 	sbd_release_sbdp_handle(hdp);
1646 
1647 	softsp = (sbd_softstate_t *)s_sbp->sb_softsp;
1648 
1649 	max_boards = softsp->max_boards;
1650 	n_units_per_set = max_boards * MAX_MEM_UNITS_PER_BOARD;
1651 	sets = GETSTRUCT(sbd_mem_unit_t *, n_units_per_set * n_sets);
1652 
1653 	/*
1654 	 * Make one pass through all memory units on all boards
1655 	 * and categorize them with respect to the source board.
1656 	 */
1657 	for (t_bd = 0; t_bd < max_boards; t_bd++) {
1658 		/*
1659 		 * The board structs are a contiguous array
1660 		 * so we take advantage of that to find the
1661 		 * correct board struct pointer for a given
1662 		 * board number.
1663 		 */
1664 		t_sbp = (sbd_board_t *)softsp->sbd_boardlist;
1665 		t_sbp += t_bd;
1666 
1667 		/* source board can not be its own target */
1668 		if (s_sbp->sb_num == t_sbp->sb_num)
1669 			continue;
1670 
1671 		for (t_unit = 0; t_unit < MAX_MEM_UNITS_PER_BOARD; t_unit++) {
1672 
1673 			t_mp = SBD_GET_BOARD_MEMUNIT(t_sbp, t_unit);
1674 
1675 			/* this memory node must be attached */
1676 			if (!SBD_DEV_IS_ATTACHED(t_sbp, SBD_COMP_MEM, t_unit))
1677 				continue;
1678 
1679 			/* source unit can not be its own target */
1680 			if (s_mp == t_mp) {
1681 				/* catch this in debug kernels */
1682 				ASSERT(0);
1683 				continue;
1684 			}
1685 
1686 			/*
1687 			 * this memory node must not already be reserved
1688 			 * by some other memory delete operation.
1689 			 */
1690 			if (t_mp->sbm_flags & SBD_MFLAG_RESERVED)
1691 				continue;
1692 
1693 			/*
1694 			 * categorize the memory node
1695 			 * If this is a smaller memory node, create a
1696 			 * temporary, edited copy of the source board's
1697 			 * memlist containing only the span of the non-
1698 			 * relocatable pages.
1699 			 */
1700 			if (t_mp->sbm_npages == s_mp->sbm_npages) {
1701 				preference = 0;
1702 				t_mp->sbm_slice_offset = 0;
1703 			} else if (t_mp->sbm_npages > s_mp->sbm_npages) {
1704 				preference = 1;
1705 				t_mp->sbm_slice_offset = 0;
1706 			} else {
1707 				/*
1708 				 * We do not allow other options right now
1709 				 */
1710 				continue;
1711 			}
1712 
1713 			sbd_smt_preference[preference]++;
1714 
1715 			/* calculate index to start of preference set */
1716 			idx  = n_units_per_set * preference;
1717 			/* calculate offset to respective element */
1718 			idx += t_bd * MAX_MEM_UNITS_PER_BOARD + t_unit;
1719 
1720 			ASSERT(idx < n_units_per_set * n_sets);
1721 			sets[idx] = t_mp;
1722 		}
1723 	}
1724 
1725 	/*
1726 	 * NOTE: this would be a good place to sort each candidate
1727 	 * set in to some desired order, e.g. memory size in ascending
1728 	 * order.  Without an additional sorting step here, the order
1729 	 * within a set is ascending board number order.
1730 	 */
1731 
1732 	c_mp = NULL;
1733 	x_ml = NULL;
1734 	t_ml = NULL;
1735 	for (idx = 0; idx < n_units_per_set * n_sets; idx++) {
1736 		memquery_t mq;
1737 
1738 		/* cleanup t_ml after previous pass */
1739 		if (t_ml != NULL) {
1740 			memlist_delete(t_ml);
1741 			t_ml = NULL;
1742 		}
1743 
1744 		/* get candidate target board mem unit */
1745 		t_mp = sets[idx];
1746 		if (t_mp == NULL)
1747 			continue;
1748 
1749 		t_sbp = t_mp->sbm_cm.sbdev_sbp;
1750 
1751 		/* get target board memlist */
1752 		t_ml = sbd_get_memlist(t_mp, SBD_HD2ERR(hp));
1753 		if (t_ml == NULL) {
1754 			cmn_err(CE_WARN, "sbd:%s: no memlist for"
1755 				" mem-unit %d, board %d",
1756 				f,
1757 				t_sbp->sb_num,
1758 				t_mp->sbm_cm.sbdev_unum);
1759 
1760 			continue;
1761 		}
1762 
1763 		/* get appropriate source board memlist */
1764 		if (t_mp->sbm_npages < s_mp->sbm_npages) {
1765 			spgcnt_t excess;
1766 
1767 			/*
1768 			 * make a copy of the source board memlist
1769 			 * then edit it to remove the spans that
1770 			 * are outside the calculated span of
1771 			 * [pfn..s_mq.last_nonrelocatable].
1772 			 */
1773 			if (x_ml != NULL)
1774 				memlist_delete(x_ml);
1775 
1776 			x_ml = memlist_dup(s_ml);
1777 			if (x_ml == NULL) {
1778 				PR_MEM("%s: memlist_dup failed\n", f);
1779 				/* TODO: should abort */
1780 				continue;
1781 			}
1782 
1783 			/* trim off lower portion */
1784 			excess = t_mp->sbm_slice_offset;
1785 			if (excess > 0) {
1786 				int mlret;
1787 
1788 				mlret = memlist_delete_span(
1789 					_ptob64(s_mp->sbm_basepfn),
1790 					_ptob64(excess),
1791 					&x_ml);
1792 				PR_MEM("%s: mlret = %d\n", f, mlret);
1793 			}
1794 
1795 			/*
1796 			 * Since this candidate target board is smaller
1797 			 * than the source board, s_mq must have been
1798 			 * initialized in previous loop while processing
1799 			 * this or some other candidate board.
1800 			 * FIXME: this is weak.
1801 			 */
1802 			ASSERT(s_mq.phys_pages != 0);
1803 
1804 			/* trim off upper portion */
1805 			excess = (s_mp->sbm_basepfn + s_mp->sbm_npages)
1806 				- (s_mq.last_nonrelocatable + 1);
1807 			if (excess > 0) {
1808 				pfn_t p;
1809 				int mlret;
1810 
1811 				p  = s_mq.last_nonrelocatable + 1;
1812 				p -= excess;
1813 
1814 				mlret = memlist_delete_span(
1815 					_ptob64(p),
1816 					_ptob64(excess),
1817 					&x_ml);
1818 				PR_MEM("%s: mlret = %d\n", f, mlret);
1819 			}
1820 
1821 			PR_MEM("%s: brd %d: edited source memlist:\n",
1822 				f, s_sbp->sb_num);
1823 			SBD_MEMLIST_DUMP(x_ml);
1824 
1825 #ifdef DEBUG
1826 			/* sanity check memlist */
1827 			d_ml = x_ml;
1828 			while (d_ml->next != NULL)
1829 				d_ml = d_ml->next;
1830 			ASSERT(x_ml->address == _ptob64(s_mp->sbm_basepfn) +
1831 				_ptob64(t_mp->sbm_slice_offset));
1832 			ASSERT(d_ml->address + d_ml->size ==
1833 				_ptob64(s_mq.last_nonrelocatable + 1));
1834 #endif
1835 
1836 			/*
1837 			 * x_ml now describes only the portion of the
1838 			 * source board that will be moved during the
1839 			 * copy/rename operation.
1840 			 */
1841 			d_ml = x_ml;
1842 		} else {
1843 			/* use original memlist; all spans will be moved */
1844 			d_ml = s_ml;
1845 		}
1846 
1847 		/* verify target can support source memory spans. */
1848 		if (memlist_canfit(d_ml, t_ml) == 0) {
1849 			PR_MEM("%s: source memlist won't"
1850 				" fit in target memlist\n", f);
1851 			PR_MEM("%s: source memlist:\n", f);
1852 			SBD_MEMLIST_DUMP(d_ml);
1853 			PR_MEM("%s: target memlist:\n", f);
1854 			SBD_MEMLIST_DUMP(t_ml);
1855 
1856 			continue;
1857 		}
1858 
1859 		/* NOTE: the value of d_ml is not used beyond this point */
1860 
1861 		PR_MEM("%s: checking for no-reloc on board %d, "
1862 			" basepfn=0x%lx, npages=%ld\n",
1863 			f,
1864 			t_sbp->sb_num,
1865 			t_mp->sbm_basepfn,
1866 			t_mp->sbm_npages);
1867 
1868 		rv = kphysm_del_span_query(
1869 			t_mp->sbm_basepfn, t_mp->sbm_npages, &mq);
1870 		if (rv != KPHYSM_OK) {
1871 			PR_MEM("%s: kphysm_del_span_query:"
1872 				" unexpected return value %d\n", f, rv);
1873 
1874 			continue;
1875 		}
1876 
1877 		if (mq.nonrelocatable != 0) {
1878 			PR_MEM("%s: candidate board %d has"
1879 				" nonrelocatable span [0x%lx..0x%lx]\n",
1880 				f,
1881 				t_sbp->sb_num,
1882 				mq.first_nonrelocatable,
1883 				mq.last_nonrelocatable);
1884 
1885 			continue;
1886 		}
1887 
1888 #ifdef DEBUG
1889 		/*
1890 		 * This is a debug tool for excluding certain boards
1891 		 * from being selected as a target board candidate.
1892 		 * sbd_ignore_board is only tested by this driver.
1893 		 * It must be set with adb, obp, /etc/system or your
1894 		 * favorite debugger.
1895 		 */
1896 		if (sbd_ignore_board &
1897 			(1 << (t_sbp->sb_num - 1))) {
1898 			PR_MEM("%s: sbd_ignore_board flag set,"
1899 				" ignoring board %d as candidate\n",
1900 				f, t_sbp->sb_num);
1901 			continue;
1902 		}
1903 #endif
1904 
1905 		/*
1906 		 * Make sure there is no memory overlap if this
1907 		 * target board is used for copy-rename.
1908 		 */
1909 		if (sbd_check_boundaries(phys_install, s_mp, t_mp) != 0)
1910 			continue;
1911 
1912 		/*
1913 		 * Reserve excess source board memory, if any.
1914 		 *
1915 		 * When the number of pages on the candidate target
1916 		 * board is less than the number of pages on the source,
1917 		 * then some spans (clearly) of the source board's address
1918 		 * space will not be covered by physical memory after the
1919 		 * copy/rename completes.  The following code block
1920 		 * schedules those spans to be deleted.
1921 		 */
1922 		if (t_mp->sbm_npages < s_mp->sbm_npages) {
1923 			pfn_t pfn;
1924 			int mlret;
1925 
1926 			d_ml = memlist_dup(s_ml);
1927 			if (d_ml == NULL) {
1928 				PR_MEM("%s: cant dup src brd memlist\n", f);
1929 				/* TODO: should abort */
1930 				continue;
1931 			}
1932 
1933 			/* calculate base pfn relative to target board */
1934 			pfn  = s_mp->sbm_basepfn & ~sm;
1935 			pfn += t_mp->sbm_slice_offset;
1936 
1937 			/* remove span that will reside on candidate board */
1938 			mlret = memlist_delete_span(
1939 				_ptob64(pfn),
1940 				_ptob64(t_mp->sbm_npages),
1941 				&d_ml);
1942 			PR_MEM("%s: mlret = %d\n", f, mlret);
1943 
1944 			PR_MEM("%s: brd %d: reserving src brd memlist:\n",
1945 				f, s_sbp->sb_num);
1946 			SBD_MEMLIST_DUMP(d_ml);
1947 
1948 			/* reserve excess spans */
1949 			if (sbd_reserve_mem_spans(
1950 				&s_mp->sbm_memhandle, d_ml) != 0) {
1951 
1952 				/* likely more non-reloc pages appeared */
1953 				/* TODO: restart from top? */
1954 				continue;
1955 			}
1956 		} else {
1957 			/* no excess source board memory */
1958 			d_ml = NULL;
1959 		}
1960 
1961 		s_mp->sbm_flags |= SBD_MFLAG_RESERVED;
1962 
1963 		/*
1964 		 * reserve all memory on target board.
1965 		 * NOTE: source board's memhandle is used.
1966 		 *
1967 		 * If this succeeds (eq 0), then target selection is
1968 		 * complete and all unwanted memory spans, both source and
1969 		 * target, have been reserved.  Loop is terminated.
1970 		 */
1971 		if (sbd_reserve_mem_spans(&s_mp->sbm_memhandle, t_ml) == 0) {
1972 			PR_MEM("%s: brd %d: target board memory reserved\n",
1973 				f, t_sbp->sb_num);
1974 
1975 			/* a candidate target board is now reserved */
1976 			t_mp->sbm_flags |= SBD_MFLAG_RESERVED;
1977 			c_mp = t_mp;
1978 
1979 			/* *** EXITING LOOP *** */
1980 			break;
1981 		}
1982 
1983 		/* did not successfully reserve the target board. */
1984 		PR_MEM("%s: could not reserve target board %d\n",
1985 			f, t_sbp->sb_num);
1986 
1987 		/*
1988 		 * NOTE: an undo of the sbd_reserve_mem_span work
1989 		 * will happen automatically when the memhandle
1990 		 * (s_mp->sbm_memhandle) is kphysm_del_release'd.
1991 		 */
1992 
1993 		s_mp->sbm_flags &= ~SBD_MFLAG_RESERVED;
1994 	}
1995 
1996 	/* clean up after memlist editing logic */
1997 	if (x_ml != NULL)
1998 		memlist_delete(x_ml);
1999 
2000 	FREESTRUCT(sets, sbd_mem_unit_t *, n_units_per_set * n_sets);
2001 
2002 	/*
2003 	 * c_mp will be NULL when the entire sets[] array
2004 	 * has been searched without reserving a target board.
2005 	 */
2006 	if (c_mp == NULL) {
2007 		PR_MEM("%s: brd %d: target selection failed.\n",
2008 			f, s_sbp->sb_num);
2009 
2010 		if (t_ml != NULL)
2011 			memlist_delete(t_ml);
2012 
2013 		return (-1);
2014 	}
2015 
2016 	PR_MEM("%s: found target board %d for source board %d\n",
2017 		f,
2018 		t_sbp->sb_num,
2019 		s_sbp->sb_num);
2020 
2021 	s_mp->sbm_peer = c_mp;
2022 	s_mp->sbm_flags |= SBD_MFLAG_SOURCE;
2023 	s_mp->sbm_del_mlist = d_ml;	/* spans to be deleted, if any */
2024 	s_mp->sbm_mlist = s_ml;
2025 	s_mp->sbm_cm.sbdev_busy = 1;
2026 
2027 	c_mp->sbm_peer = s_mp;
2028 	c_mp->sbm_flags |= SBD_MFLAG_TARGET;
2029 	c_mp->sbm_del_mlist = t_ml;	/* spans to be deleted */
2030 	c_mp->sbm_mlist = t_ml;
2031 	c_mp->sbm_cm.sbdev_busy = 1;
2032 
2033 	s_mp->sbm_flags &= ~SBD_MFLAG_MEMRESIZE;
2034 	if (c_mp->sbm_npages > s_mp->sbm_npages) {
2035 		s_mp->sbm_flags |= SBD_MFLAG_MEMUPSIZE;
2036 		PR_MEM("%s: upsize (source pgs 0x%lx < target pgs 0x%lx)\n",
2037 			f, s_mp->sbm_npages, c_mp->sbm_npages);
2038 	} else if (c_mp->sbm_npages < s_mp->sbm_npages) {
2039 		s_mp->sbm_flags |= SBD_MFLAG_MEMDOWNSIZE;
2040 		PR_MEM("%s: downsize (source pgs 0x%lx > target pgs 0x%lx)\n",
2041 			f, s_mp->sbm_npages, c_mp->sbm_npages);
2042 	}
2043 
2044 	return (0);
2045 }
2046 
2047 int
2048 sbd_move_memory(sbd_handle_t *hp, sbd_board_t *s_bp, sbd_board_t *t_bp)
2049 {
2050 	int	ret;
2051 	sbdp_handle_t	*hdp;
2052 	sbderror_t	*ep = SBD_HD2ERR(hp);
2053 
2054 	hdp = sbd_get_sbdp_handle(s_bp, hp);
2055 
2056 	ret = sbdp_move_memory(hdp, t_bp->sb_num);
2057 	if (ret != 0)
2058 		SBD_GET_PERR(hdp->h_err, ep);
2059 
2060 	sbd_release_sbdp_handle(hdp);
2061 
2062 	return (ret);
2063 }
2064 
2065 /*
2066  * Memlist support.
2067  */
2068 void
2069 memlist_delete(struct memlist *mlist)
2070 {
2071 	sbdp_handle_t	*hdp;
2072 
2073 	hdp = sbd_get_sbdp_handle(NULL, NULL);
2074 
2075 	(void) sbdp_del_memlist(hdp, mlist);
2076 
2077 	sbd_release_sbdp_handle(hdp);
2078 }
2079 
2080 struct memlist *
2081 memlist_dup(struct memlist *mlist)
2082 {
2083 	struct memlist *hl, *prev;
2084 
2085 	if (mlist == NULL)
2086 		return (NULL);
2087 
2088 	prev = NULL;
2089 	hl = NULL;
2090 	for (; mlist; mlist = mlist->next) {
2091 		struct memlist *mp;
2092 
2093 		mp = memlist_get_one();
2094 		if (mp == NULL) {
2095 			if (hl != NULL)
2096 				memlist_free_list(hl);
2097 			hl = NULL;
2098 			break;
2099 		}
2100 		mp->address = mlist->address;
2101 		mp->size = mlist->size;
2102 		mp->next = NULL;
2103 		mp->prev = prev;
2104 
2105 		if (prev == NULL)
2106 			hl = mp;
2107 		else
2108 			prev->next = mp;
2109 		prev = mp;
2110 	}
2111 
2112 	return (hl);
2113 }
2114 
2115 void
2116 memlist_dump(struct memlist *mlist)
2117 {
2118 	register struct memlist *ml;
2119 
2120 	if (mlist == NULL) {
2121 		PR_MEM("memlist> EMPTY\n");
2122 	} else {
2123 		for (ml = mlist; ml; ml = ml->next)
2124 			PR_MEM("memlist> 0x%" PRIx64 " "
2125 				"0x%" PRIx64 " \n",
2126 				ml->address, ml->size);
2127 	}
2128 }
2129 
2130 int
2131 memlist_intersect(struct memlist *al, struct memlist *bl)
2132 {
2133 	uint64_t	astart, aend, bstart, bend;
2134 
2135 	if ((al == NULL) || (bl == NULL))
2136 		return (0);
2137 
2138 	aend = al->address + al->size;
2139 	bstart = bl->address;
2140 	bend = bl->address + bl->size;
2141 
2142 	while (al && bl) {
2143 		while (al && (aend <= bstart))
2144 			if ((al = al->next) != NULL)
2145 				aend = al->address + al->size;
2146 		if (al == NULL)
2147 			return (0);
2148 
2149 		if ((astart = al->address) <= bstart)
2150 			return (1);
2151 
2152 		while (bl && (bend <= astart))
2153 			if ((bl = bl->next) != NULL)
2154 				bend = bl->address + bl->size;
2155 		if (bl == NULL)
2156 			return (0);
2157 
2158 		if ((bstart = bl->address) <= astart)
2159 			return (1);
2160 	}
2161 
2162 	return (0);
2163 }
2164 
2165 /*
2166  * Determine whether the source memlist (s_mlist) will
2167  * fit into the target memlist (t_mlist) in terms of
2168  * size and holes (i.e. based on same relative base address).
2169  */
2170 static int
2171 memlist_canfit(struct memlist *s_mlist, struct memlist *t_mlist)
2172 {
2173 	int		rv = 0;
2174 	uint64_t	s_basepa, t_basepa;
2175 	struct memlist	*s_ml, *t_ml;
2176 
2177 	if ((s_mlist == NULL) || (t_mlist == NULL))
2178 		return (0);
2179 
2180 	/*
2181 	 * Base both memlists on common base address (0).
2182 	 */
2183 	s_basepa = s_mlist->address;
2184 	t_basepa = t_mlist->address;
2185 
2186 	for (s_ml = s_mlist; s_ml; s_ml = s_ml->next)
2187 		s_ml->address -= s_basepa;
2188 
2189 	for (t_ml = t_mlist; t_ml; t_ml = t_ml->next)
2190 		t_ml->address -= t_basepa;
2191 
2192 	s_ml = s_mlist;
2193 	for (t_ml = t_mlist; t_ml && s_ml; t_ml = t_ml->next) {
2194 		uint64_t	s_start, s_end;
2195 		uint64_t	t_start, t_end;
2196 
2197 		t_start = t_ml->address;
2198 		t_end = t_start + t_ml->size;
2199 
2200 		for (; s_ml; s_ml = s_ml->next) {
2201 			s_start = s_ml->address;
2202 			s_end = s_start + s_ml->size;
2203 
2204 			if ((s_start < t_start) || (s_end > t_end))
2205 				break;
2206 		}
2207 	}
2208 	/*
2209 	 * If we ran out of source memlist chunks that mean
2210 	 * we found a home for all of them.
2211 	 */
2212 	if (s_ml == NULL)
2213 		rv = 1;
2214 
2215 	/*
2216 	 * Need to add base addresses back since memlists
2217 	 * are probably in use by caller.
2218 	 */
2219 	for (s_ml = s_mlist; s_ml; s_ml = s_ml->next)
2220 		s_ml->address += s_basepa;
2221 
2222 	for (t_ml = t_mlist; t_ml; t_ml = t_ml->next)
2223 		t_ml->address += t_basepa;
2224 
2225 	return (rv);
2226 }
2227 
2228 void
2229 sbd_attach_mem(sbd_handle_t *hp, sbderror_t *ep)
2230 {
2231 	sbd_mem_unit_t	*mp;
2232 	dev_info_t	*dip;
2233 	sbd_board_t	*sbp = SBDH2BD(hp->h_sbd);
2234 	sbdp_handle_t	*hdp;
2235 	int		err, unit;
2236 	struct memlist	*ml, *mc;
2237 	static fn_t	f = "sbd_attach_mem";
2238 	int		i;
2239 
2240 	PR_MEM("%s...\n", f);
2241 
2242 	/*
2243 	 * all four cpus have to be attached before
2244 	 * configuring mem
2245 	 */
2246 	for (i = 0; i < MAX_CPU_UNITS_PER_BOARD; i++) {
2247 		sbd_cpu_unit_t	*cpup;
2248 		struct cpu	*cp;
2249 
2250 		if (!SBD_DEV_IS_PRESENT(sbp, SBD_COMP_CPU, i))
2251 			continue;
2252 
2253 		if (!SBD_DEV_IS_ATTACHED(sbp, SBD_COMP_CPU, i))
2254 			goto error;
2255 
2256 		cpup = SBD_GET_BOARD_CPUUNIT(sbp, i);
2257 
2258 		if (cpup == NULL)
2259 			goto error;
2260 
2261 		mutex_enter(&cpu_lock);
2262 		cp = cpu_get(cpup->sbc_cpu_id);
2263 		if (cp == NULL) {
2264 			mutex_exit(&cpu_lock);
2265 			cmn_err(CE_WARN,
2266 			    "sbd:%s: cpu_get failed for cpu %d",
2267 			    f, cpup->sbc_cpu_id);
2268 			goto error;
2269 		}
2270 		if (cpu_is_poweredoff(cp)) {
2271 			mutex_exit(&cpu_lock);
2272 			goto error;
2273 		}
2274 		mutex_exit(&cpu_lock);
2275 		continue;
2276 
2277 error:
2278 		SBD_SET_ERR(ep, ESBD_CPUONLINE);
2279 		SBD_SET_ERRSTR(ep, sbp->sb_mempath[i]);
2280 		(void) sbd_set_err_in_hdl(hp, ep);
2281 		return;
2282 	}
2283 
2284 	dip = *(sbp->sb_devlist[NIX(SBD_COMP_MEM)]);
2285 
2286 	hdp = sbd_get_sbdp_handle(sbp, hp);
2287 	unit = sbdp_get_unit_num(hdp, dip);
2288 	if (unit < 0) {
2289 		SBD_GET_PERR(hdp->h_err, ep);
2290 		sbd_release_sbdp_handle(hdp);
2291 		return;
2292 	}
2293 
2294 	ASSERT(sbp->sb_mempath[unit] != NULL);
2295 	ASSERT(e_ddi_branch_held(dip));
2296 
2297 	(void) ddi_pathname(dip, sbp->sb_mempath[unit]);
2298 
2299 	mp = SBD_GET_BOARD_MEMUNIT(sbp, unit);
2300 
2301 	ml = sbd_get_memlist(mp, ep);
2302 	if (ml == NULL) {
2303 		cmn_err(CE_WARN,
2304 			"sbd:%s: failed to get memlist for "
2305 			"board %d", f, sbp->sb_num);
2306 		/*
2307 		 * Need to record an error and return.
2308 		 */
2309 		SBD_SET_ERR(ep, ESBD_MEMFAIL);
2310 		SBD_SET_ERRSTR(ep, sbp->sb_mempath[unit]);
2311 		sbd_release_sbdp_handle(hdp);
2312 		return;
2313 	}
2314 
2315 	SBD_MEMLIST_DUMP(ml);
2316 	err = 0;
2317 	for (mc = ml; mc; mc = mc->next) {
2318 		update_membounds_t umb;
2319 		pfn_t	base;
2320 		pgcnt_t npgs;
2321 
2322 		base = (pfn_t)(mc->address >> PAGESHIFT);
2323 		npgs = (pgcnt_t)(mc->size >> PAGESHIFT);
2324 
2325 		umb.u_board = sbp->sb_num;
2326 		umb.u_base = (uint64_t)base << MMU_PAGESHIFT;
2327 		umb.u_len = (uint64_t)npgs << MMU_PAGESHIFT;
2328 
2329 		lgrp_plat_config(LGRP_CONFIG_MEM_ADD, (uintptr_t)&umb);
2330 		err = kphysm_add_memory_dynamic(base, npgs);
2331 
2332 		if (err != KPHYSM_OK) {
2333 			cmn_err(CE_WARN,
2334 			    "%s: kphysm_add_memory_dynamic fail %d", f, err);
2335 
2336 			/* translate kphysm error */
2337 			switch (err) {
2338 			case KPHYSM_ERESOURCE:
2339 				err = ESBD_NOMEM;
2340 				break;
2341 
2342 			case KPHYSM_EFAULT:
2343 				err = ESBD_FAULT;
2344 				break;
2345 
2346 			default:
2347 				err = ESBD_INVAL;
2348 				break;
2349 			}
2350 			break;
2351 		}
2352 
2353 		err = kcage_range_add(base, npgs, KCAGE_DOWN);
2354 		if (err != 0) {
2355 			cmn_err(CE_WARN,
2356 			    "%s: kcage_range_add fail %d", f, err);
2357 
2358 			/* Translate kcage error. */
2359 			switch (err) {
2360 			case ENOMEM:
2361 				err = ESBD_NOMEM;
2362 				break;
2363 			default:
2364 				err = ESBD_INVAL;
2365 				break;
2366 			}
2367 			break;
2368 		}
2369 		(void) sbdp_mem_add_span(hdp, mc->address, mc->size);
2370 	}
2371 
2372 	if (err != 0) {
2373 		SBD_SET_ERR(ep, err);
2374 		SBD_SET_ERRSTR(ep, sbp->sb_mempath[unit]);
2375 	}
2376 
2377 	memlist_delete(ml);
2378 	sbd_release_sbdp_handle(hdp);
2379 
2380 	/*
2381 	 * Now attach all mem devinfo nodes to the device tree.
2382 	 */
2383 	for (i = 0; i < SBD_NUM_MC_PER_BOARD; i++) {
2384 		if (mp->sbm_dip[i] == NULL)
2385 			continue;
2386 		ASSERT(e_ddi_branch_held(mp->sbm_dip[i]));
2387 		if (e_ddi_branch_configure(mp->sbm_dip[i], NULL, 0) &&
2388 		    SBD_GET_ERR(ep) == 0) {
2389 			SBD_SET_ERR(ep, ESBD_INVAL);
2390 			SBD_SET_ERRSTR(ep, sbp->sb_mempath[unit]);
2391 		}
2392 	}
2393 }
2394 
2395 typedef struct {
2396 	kcondvar_t cond;
2397 	kmutex_t lock;
2398 	int error;
2399 	int done;
2400 } sbd_release_mem_sync_t;
2401 
2402 /*
2403  * When we reach here the memory being drained should have
2404  * already been reserved in sbd_pre_release_mem().
2405  * Our only task here is to kick off the "drain".
2406  * Returns -1 when error encountered or zero for success.
2407  */
2408 int
2409 sbd_release_mem(sbd_handle_t *hp, dev_info_t *dip, int unit)
2410 {
2411 	memhandle_t	mh;
2412 	int		err;
2413 	int		cancel_flag = 0;
2414 	int		e_code = 0;
2415 	sbd_board_t	*sbp = SBDH2BD(hp->h_sbd);
2416 	sbd_release_mem_sync_t rms;
2417 	static fn_t	f = "sbd_release_mem";
2418 
2419 	/*
2420 	 * If this node has a scheduled memory delete operation,
2421 	 * it will have a memhandle.  If it does have a memhandle (the
2422 	 * return value of sbd_get_memhandle is zero when true),
2423 	 * then perform the delete.
2424 	 */
2425 
2426 	if ((cancel_flag = sbd_get_memhandle(hp, dip, &mh)) != 0) {
2427 		cmn_err(CE_WARN, "%s: couldn't get the memhandle\n", f);
2428 		return (cancel_flag);
2429 	}
2430 
2431 	bzero((void *) &rms, sizeof (rms));
2432 
2433 	mutex_init(&rms.lock, NULL, MUTEX_DRIVER, NULL);
2434 	cv_init(&rms.cond, NULL, CV_DRIVER, NULL);
2435 
2436 	mutex_enter(&rms.lock);
2437 	err = kphysm_del_start(mh, sbd_release_memory_done, (void *) &rms);
2438 	if (err == KPHYSM_OK) {
2439 		/* wait for completion */
2440 		while (!rms.done) {
2441 			if (cancel_flag) {
2442 				/* previously canceled */
2443 				cv_wait(&rms.cond, &rms.lock);
2444 			} else if (cv_wait_sig(&rms.cond, &rms.lock) == 0) {
2445 				/* interrupted: cancel and wait */
2446 				cancel_flag = -1;
2447 				(void) kphysm_del_cancel(mh);
2448 			}
2449 		}
2450 		/* get the result of the memory delete operation */
2451 		err = rms.error;
2452 	} else {
2453 		(void) kphysm_del_release(mh);
2454 	}
2455 
2456 	mutex_exit(&rms.lock);
2457 
2458 	cv_destroy(&rms.cond);
2459 	mutex_destroy(&rms.lock);
2460 
2461 	if (err != KPHYSM_OK) {
2462 		switch (err) {
2463 			case KPHYSM_ENOWORK:
2464 				e_code = ESBD_NOERROR;
2465 				break;
2466 
2467 			case KPHYSM_EHANDLE:
2468 			case KPHYSM_ESEQUENCE:
2469 				e_code = ESBD_INTERNAL;
2470 				break;
2471 
2472 			case KPHYSM_ENOTVIABLE:
2473 				e_code = ESBD_MEM_NOTVIABLE;
2474 				break;
2475 
2476 			case KPHYSM_EREFUSED:
2477 				e_code = ESBD_MEM_REFUSED;
2478 				break;
2479 
2480 			case KPHYSM_ENONRELOC:
2481 				e_code = ESBD_MEM_NONRELOC;
2482 				break;
2483 
2484 			case KPHYSM_ECANCELLED:
2485 				e_code = ESBD_MEM_CANCELLED;
2486 				break;
2487 
2488 			case KPHYSM_ERESOURCE:
2489 				e_code = ESBD_MEMFAIL;
2490 				break;
2491 
2492 			default:
2493 				cmn_err(CE_WARN, "sbd:%s:"
2494 					" unexpected kphysm error code %d,"
2495 					" dip 0x%p",
2496 					f, err, (void *)dip);
2497 
2498 				e_code = ESBD_IO;
2499 				break;
2500 		}
2501 
2502 		if (e_code != 0) {
2503 			cancel_flag = -1;
2504 			SBD_SET_ERR(SBD_HD2ERR(hp), e_code);
2505 			SBD_SET_ERRSTR(SBD_HD2ERR(hp), sbp->sb_mempath[unit]);
2506 		}
2507 	}
2508 
2509 	return (cancel_flag);
2510 }
2511 
2512 /*
2513  * Memory has been logically removed by the time this routine is called.
2514  */
2515 void
2516 sbd_release_memory_done(void *arg, int error)
2517 {
2518 	sbd_release_mem_sync_t *ds = arg;
2519 
2520 	mutex_enter(&ds->lock);
2521 	ds->error = error;
2522 	ds->done = 1;
2523 	cv_signal(&ds->cond);
2524 	mutex_exit(&ds->lock);
2525 }
2526 
2527 /*
2528  * If detaching node contains memory that is "non-permanent"
2529  * then the memory adr's are simply cleared.  If the memory
2530  * is non-relocatable, then do a copy-rename.
2531  */
2532 int
2533 sbd_detach_memory(sbd_handle_t *hp, sbderror_t *ep, sbd_mem_unit_t *s_mp,
2534 	int unit)
2535 {
2536 	int			rv;
2537 	sbd_mem_unit_t		*t_mp;
2538 	sbd_istate_t		state;
2539 	sbdp_handle_t		*hdp;
2540 	sbd_board_t 		*sbp = (sbd_board_t *)s_mp->sbm_cm.sbdev_sbp;
2541 	sbd_board_t		*tbp;
2542 	static fn_t		f = "sbd_detach_memory";
2543 
2544 	PR_MEM("%s...\n", f);
2545 
2546 	/* lookup target mem unit and target board structure, if any */
2547 	if (s_mp->sbm_flags & SBD_MFLAG_SOURCE) {
2548 		t_mp = s_mp->sbm_peer;
2549 		ASSERT(t_mp != NULL);
2550 		ASSERT(t_mp->sbm_peer == s_mp);
2551 		tbp = (sbd_board_t *)t_mp->sbm_cm.sbdev_sbp;
2552 	} else {
2553 		t_mp = NULL;
2554 	}
2555 
2556 	/* verify mem unit's state is UNREFERENCED */
2557 	state = s_mp->sbm_cm.sbdev_state;
2558 	if (state != SBD_STATE_UNREFERENCED) {
2559 		cmn_err(CE_WARN, "%s: invalid state transition for"
2560 			" mem-unit (%d.%d)",
2561 			f,
2562 			sbp->sb_num,
2563 			s_mp->sbm_cm.sbdev_unum);
2564 		SBD_SET_ERR(ep, ESBD_STATE);
2565 		SBD_SET_ERRSTR(ep, sbp->sb_mempath[unit]);
2566 		return (-1);
2567 	}
2568 
2569 	/* verify target mem unit's state is UNREFERENCED, if any */
2570 	if (t_mp != NULL) {
2571 		state = t_mp->sbm_cm.sbdev_state;
2572 		if (state != SBD_STATE_UNREFERENCED) {
2573 			cmn_err(CE_WARN, "%s: invalid state transition for"
2574 				" target mem-unit (%d.%d)",
2575 				f,
2576 				tbp->sb_num,
2577 				t_mp->sbm_cm.sbdev_unum);
2578 			SBD_SET_ERR(ep, ESBD_STATE);
2579 			SBD_SET_ERRSTR(ep, sbp->sb_mempath[unit]);
2580 			return (-1);
2581 		}
2582 	}
2583 
2584 	/*
2585 	 * Displacement flush all ecaches in the system.
2586 	 * That's the fastest way to remove all cache references
2587 	 * to the detaching memory.
2588 	 */
2589 	xc_all(sbd_flush_ecache, 0, 0);
2590 
2591 	hdp = sbd_get_sbdp_handle(sbp, hp);
2592 
2593 	/*
2594 	 * If there is no target board (no copy/rename was needed), then
2595 	 * we're done!
2596 	 */
2597 	if (t_mp == NULL) {
2598 		/*
2599 		 * Reprogram interconnect hardware and disable
2600 		 * memory controllers for memory node that's going away.
2601 		 */
2602 
2603 		rv = sbdphw_disable_memctrl(hdp, s_mp->sbm_cm.sbdev_dip);
2604 		if (rv) {
2605 			cmn_err(CE_WARN,
2606 				"%s: failed to deprogram mem-unit (%d.%d),"
2607 				" dip 0x%p",
2608 				f,
2609 				sbp->sb_num,
2610 				s_mp->sbm_cm.sbdev_unum,
2611 				(void *)s_mp->sbm_cm.sbdev_dip);
2612 			/*
2613 			 * Make sure we don't rewrite an sbdp error
2614 			 */
2615 			if (SBD_GET_ERR(ep) != 0) {
2616 				SBD_SET_ERR(ep, ESBD_HW_PROGRAM);
2617 				SBD_SET_ERRSTR(ep, sbp->sb_mempath[unit]);
2618 			}
2619 		}
2620 	} else {
2621 		rv = sbd_move_memory(hp, sbp, tbp);
2622 		if (rv) {
2623 			int i;
2624 
2625 			cmn_err(CE_WARN, "%s: failed to move memory"
2626 				" from board %d to board %d",
2627 				f,
2628 				sbp->sb_num,
2629 				tbp->sb_num);
2630 			/*
2631 			 * Make sure we don't rewrite an sbdp error
2632 			 */
2633 			if (SBD_GET_ERR(ep) != 0) {
2634 				SBD_SET_ERR(ep, ESBD_INTERNAL);
2635 				SBD_SET_ERRSTR(ep, sbp->sb_mempath[unit]);
2636 			}
2637 			/*
2638 			 * If we failed here, it means that the target board's
2639 			 * memory has been unconfigured.  We need to configure
2640 			 * it back
2641 			 */
2642 			for (i = 0; i < MAX_MEM_UNITS_PER_BOARD; i++) {
2643 				int		unit;
2644 				dev_info_t	*dip;
2645 				dev_info_t	**devlist;
2646 
2647 
2648 				devlist = tbp->sb_devlist[NIX(SBD_COMP_MEM)];
2649 				dip = devlist[i];
2650 				sbd_reset_error_sbdph(hdp);
2651 				unit = sbdp_get_unit_num(hdp, dip);
2652 
2653 				/*
2654 				 * We already saved the error that created
2655 				 * this mess.  If we fail, make sure not
2656 				 * to overwrite the original error
2657 				 */
2658 				if (unit == -1) {
2659 					continue;
2660 				}
2661 				if (sbd_cancel_mem(hp, unit) != 0)
2662 					continue;
2663 
2664 				t_mp->sbm_flags = 0;
2665 				/*
2666 				 * clean up
2667 				 */
2668 				sbd_mem_cleanup(s_mp, t_mp, ep);
2669 				if (s_mp->sbm_mlist) {
2670 					memlist_delete(s_mp->sbm_mlist);
2671 					s_mp->sbm_mlist = NULL;
2672 				}
2673 
2674 				SBD_DEVICE_TRANSITION(tbp, SBD_COMP_MEM,
2675 				    unit, SBD_STATE_CONFIGURED);
2676 			}
2677 		}
2678 
2679 		PR_MEM("%s: %s memory COPY-RENAME (board %d -> %d)\n",
2680 			f,
2681 			rv ? "FAILED" : "COMPLETED",
2682 			sbp->sb_num,
2683 			tbp->sb_num);
2684 	}
2685 
2686 	if (rv == 0) {
2687 		update_membounds_t umb;
2688 
2689 		umb.u_board = sbp->sb_num;
2690 		umb.u_base = (uint64_t)-1;
2691 		umb.u_len = (uint64_t)-1;
2692 
2693 		lgrp_plat_config(LGRP_CONFIG_MEM_DEL, (uintptr_t)&umb);
2694 	}
2695 
2696 	sbd_release_sbdp_handle(hdp);
2697 	return (rv);
2698 }
2699 
2700 /*ARGSUSED*/
2701 static void
2702 sbd_flush_ecache(uint64_t a, uint64_t b)
2703 {
2704 	cpu_flush_ecache();
2705 }
2706