1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * DR memory support routines.
28 */
29
30 #include <sys/note.h>
31 #include <sys/debug.h>
32 #include <sys/types.h>
33 #include <sys/errno.h>
34 #include <sys/param.h>
35 #include <sys/dditypes.h>
36 #include <sys/kmem.h>
37 #include <sys/conf.h>
38 #include <sys/ddi.h>
39 #include <sys/sunddi.h>
40 #include <sys/sunndi.h>
41 #include <sys/ddi_impldefs.h>
42 #include <sys/ndi_impldefs.h>
43 #include <sys/sysmacros.h>
44 #include <sys/machsystm.h>
45 #include <sys/spitregs.h>
46 #include <sys/cpuvar.h>
47 #include <sys/promif.h>
48 #include <vm/seg_kmem.h>
49 #include <sys/lgrp.h>
50 #include <sys/platform_module.h>
51
52 #include <vm/page.h>
53
54 #include <sys/dr.h>
55 #include <sys/dr_util.h>
56 #include <sys/drmach.h>
57 #include <sys/kobj.h>
58
59 extern struct memlist *phys_install;
60 extern vnode_t *retired_pages;
61
62 /* TODO: push this reference below drmach line */
63 extern int kcage_on;
64
65 /* for the DR*INTERNAL_ERROR macros. see sys/dr.h. */
66 static char *dr_ie_fmt = "dr_mem.c %d";
67
68 typedef enum {
69 DR_TP_INVALID = -1,
70 DR_TP_SAME,
71 DR_TP_LARGE,
72 DR_TP_NONRELOC,
73 DR_TP_FLOATING
74 } dr_target_pref_t;
75
76 static int dr_post_detach_mem_unit(dr_mem_unit_t *mp);
77 static int dr_reserve_mem_spans(memhandle_t *mhp,
78 struct memlist *mlist);
79 static int dr_select_mem_target(dr_handle_t *hp,
80 dr_mem_unit_t *mp, struct memlist *ml);
81 static void dr_init_mem_unit_data(dr_mem_unit_t *mp);
82 static struct memlist *dr_memlist_del_retired_pages(struct memlist *ml);
83 static dr_target_pref_t dr_get_target_preference(dr_handle_t *hp,
84 dr_mem_unit_t *t_mp, dr_mem_unit_t *s_mp,
85 struct memlist *s_ml, struct memlist *x_ml,
86 struct memlist *b_ml);
87
88 static int memlist_canfit(struct memlist *s_mlist,
89 struct memlist *t_mlist);
90 static int dr_del_mlist_query(struct memlist *mlist,
91 memquery_t *mp);
92 static struct memlist *dr_get_copy_mlist(struct memlist *s_ml,
93 struct memlist *t_ml, dr_mem_unit_t *s_mp,
94 dr_mem_unit_t *t_mp);
95 static struct memlist *dr_get_nonreloc_mlist(struct memlist *s_ml,
96 dr_mem_unit_t *s_mp);
97 static int dr_memlist_canfit(struct memlist *s_mlist,
98 struct memlist *t_mlist, dr_mem_unit_t *s_mp,
99 dr_mem_unit_t *t_mp);
100
101 /*
102 * dr_mem_unit_t.sbm_flags
103 */
104 #define DR_MFLAG_RESERVED 0x01 /* mem unit reserved for delete */
105 #define DR_MFLAG_SOURCE 0x02 /* source brd of copy/rename op */
106 #define DR_MFLAG_TARGET 0x04 /* target brd of copy/rename op */
107 #define DR_MFLAG_RELOWNER 0x20 /* memory release (delete) owner */
108 #define DR_MFLAG_RELDONE 0x40 /* memory release (delete) done */
109
110 /* helper macros */
111 #define _ptob64(p) ((uint64_t)(p) << PAGESHIFT)
112 #define _b64top(b) ((pgcnt_t)((b) >> PAGESHIFT))
113
114 static struct memlist *
dr_get_memlist(dr_mem_unit_t * mp)115 dr_get_memlist(dr_mem_unit_t *mp)
116 {
117 struct memlist *mlist = NULL;
118 sbd_error_t *err;
119 static fn_t f = "dr_get_memlist";
120
121 PR_MEM("%s for %s...\n", f, mp->sbm_cm.sbdev_path);
122
123 /*
124 * Return cached memlist, if present.
125 * This memlist will be present following an
126 * unconfigure (a.k.a: detach) of this memunit.
127 * It should only be used in the case were a configure
128 * is bringing this memunit back in without going
129 * through the disconnect and connect states.
130 */
131 if (mp->sbm_mlist) {
132 PR_MEM("%s: found cached memlist\n", f);
133
134 mlist = memlist_dup(mp->sbm_mlist);
135 } else {
136 uint64_t basepa = _ptob64(mp->sbm_basepfn);
137
138 /* attempt to construct a memlist using phys_install */
139
140 /* round down to slice base address */
141 basepa &= ~(mp->sbm_slice_size - 1);
142
143 /* get a copy of phys_install to edit */
144 memlist_read_lock();
145 mlist = memlist_dup(phys_install);
146 memlist_read_unlock();
147
148 /* trim lower irrelevant span */
149 if (mlist)
150 mlist = memlist_del_span(mlist, 0ull, basepa);
151
152 /* trim upper irrelevant span */
153 if (mlist) {
154 uint64_t endpa;
155
156 basepa += mp->sbm_slice_size;
157 endpa = _ptob64(physmax + 1);
158 if (endpa > basepa)
159 mlist = memlist_del_span(
160 mlist, basepa,
161 endpa - basepa);
162 }
163
164 if (mlist) {
165 /* successfully built a memlist */
166 PR_MEM("%s: derived memlist from phys_install\n", f);
167 }
168
169 /* if no mlist yet, try platform layer */
170 if (!mlist) {
171 err = drmach_mem_get_memlist(
172 mp->sbm_cm.sbdev_id, &mlist);
173 if (err) {
174 DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
175 mlist = NULL; /* paranoia */
176 }
177 }
178 }
179
180 PR_MEM("%s: memlist for %s\n", f, mp->sbm_cm.sbdev_path);
181 PR_MEMLIST_DUMP(mlist);
182
183 return (mlist);
184 }
185
186 typedef struct {
187 kcondvar_t cond;
188 kmutex_t lock;
189 int error;
190 int done;
191 } dr_release_mem_sync_t;
192
193 /*
194 * Memory has been logically removed by the time this routine is called.
195 */
196 static void
dr_mem_del_done(void * arg,int error)197 dr_mem_del_done(void *arg, int error)
198 {
199 dr_release_mem_sync_t *ds = arg;
200
201 mutex_enter(&ds->lock);
202 ds->error = error;
203 ds->done = 1;
204 cv_signal(&ds->cond);
205 mutex_exit(&ds->lock);
206 }
207
208 /*
209 * When we reach here the memory being drained should have
210 * already been reserved in dr_pre_release_mem().
211 * Our only task here is to kick off the "drain" and wait
212 * for it to finish.
213 */
214 void
dr_release_mem(dr_common_unit_t * cp)215 dr_release_mem(dr_common_unit_t *cp)
216 {
217 dr_mem_unit_t *mp = (dr_mem_unit_t *)cp;
218 int err;
219 dr_release_mem_sync_t rms;
220 static fn_t f = "dr_release_mem";
221
222 /* check that this memory unit has been reserved */
223 if (!(mp->sbm_flags & DR_MFLAG_RELOWNER)) {
224 DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
225 return;
226 }
227
228 bzero((void *) &rms, sizeof (rms));
229
230 mutex_init(&rms.lock, NULL, MUTEX_DRIVER, NULL);
231 cv_init(&rms.cond, NULL, CV_DRIVER, NULL);
232
233 mutex_enter(&rms.lock);
234 err = kphysm_del_start(mp->sbm_memhandle,
235 dr_mem_del_done, (void *) &rms);
236 if (err == KPHYSM_OK) {
237 /* wait for completion or interrupt */
238 while (!rms.done) {
239 if (cv_wait_sig(&rms.cond, &rms.lock) == 0) {
240 /* then there is a pending UNIX signal */
241 (void) kphysm_del_cancel(mp->sbm_memhandle);
242
243 /* wait for completion */
244 while (!rms.done)
245 cv_wait(&rms.cond, &rms.lock);
246 }
247 }
248 /* get the result of the memory delete operation */
249 err = rms.error;
250 }
251 mutex_exit(&rms.lock);
252
253 cv_destroy(&rms.cond);
254 mutex_destroy(&rms.lock);
255
256 if (err != KPHYSM_OK) {
257 int e_code;
258
259 switch (err) {
260 case KPHYSM_ENOWORK:
261 e_code = ESBD_NOERROR;
262 break;
263
264 case KPHYSM_EHANDLE:
265 case KPHYSM_ESEQUENCE:
266 e_code = ESBD_INTERNAL;
267 break;
268
269 case KPHYSM_ENOTVIABLE:
270 e_code = ESBD_MEM_NOTVIABLE;
271 break;
272
273 case KPHYSM_EREFUSED:
274 e_code = ESBD_MEM_REFUSED;
275 break;
276
277 case KPHYSM_ENONRELOC:
278 e_code = ESBD_MEM_NONRELOC;
279 break;
280
281 case KPHYSM_ECANCELLED:
282 e_code = ESBD_MEM_CANCELLED;
283 break;
284
285 case KPHYSM_ERESOURCE:
286 e_code = ESBD_MEMFAIL;
287 break;
288
289 default:
290 cmn_err(CE_WARN,
291 "%s: unexpected kphysm error code %d,"
292 " id 0x%p",
293 f, err, mp->sbm_cm.sbdev_id);
294
295 e_code = ESBD_IO;
296 break;
297 }
298
299 if (e_code != ESBD_NOERROR) {
300 dr_dev_err(CE_WARN, &mp->sbm_cm, e_code);
301 }
302 }
303 }
304
305 void
dr_attach_mem(dr_handle_t * hp,dr_common_unit_t * cp)306 dr_attach_mem(dr_handle_t *hp, dr_common_unit_t *cp)
307 {
308 _NOTE(ARGUNUSED(hp))
309
310 dr_mem_unit_t *mp = (dr_mem_unit_t *)cp;
311 struct memlist *ml, *mc;
312 sbd_error_t *err;
313 static fn_t f = "dr_attach_mem";
314
315 PR_MEM("%s...\n", f);
316
317 dr_lock_status(hp->h_bd);
318 err = drmach_configure(cp->sbdev_id, 0);
319 dr_unlock_status(hp->h_bd);
320 if (err) {
321 DRERR_SET_C(&cp->sbdev_error, &err);
322 return;
323 }
324
325 ml = dr_get_memlist(mp);
326 for (mc = ml; mc; mc = mc->ml_next) {
327 int rv;
328 sbd_error_t *err;
329
330 rv = kphysm_add_memory_dynamic(
331 (pfn_t)(mc->ml_address >> PAGESHIFT),
332 (pgcnt_t)(mc->ml_size >> PAGESHIFT));
333 if (rv != KPHYSM_OK) {
334 /*
335 * translate kphysm error and
336 * store in devlist error
337 */
338 switch (rv) {
339 case KPHYSM_ERESOURCE:
340 rv = ESBD_NOMEM;
341 break;
342
343 case KPHYSM_EFAULT:
344 rv = ESBD_FAULT;
345 break;
346
347 default:
348 rv = ESBD_INTERNAL;
349 break;
350 }
351
352 if (rv == ESBD_INTERNAL) {
353 DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
354 } else
355 dr_dev_err(CE_WARN, &mp->sbm_cm, rv);
356 break;
357 }
358
359 err = drmach_mem_add_span(
360 mp->sbm_cm.sbdev_id, mc->ml_address, mc->ml_size);
361 if (err) {
362 DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
363 break;
364 }
365 }
366
367 memlist_delete(ml);
368
369 /* back out if configure failed */
370 if (mp->sbm_cm.sbdev_error != NULL) {
371 dr_lock_status(hp->h_bd);
372 err = drmach_unconfigure(cp->sbdev_id, 0);
373 if (err)
374 sbd_err_clear(&err);
375 dr_unlock_status(hp->h_bd);
376 }
377 }
378
379 static struct memlist *
dr_memlist_del_retired_pages(struct memlist * mlist)380 dr_memlist_del_retired_pages(struct memlist *mlist)
381 {
382 page_t *pp;
383 pfn_t pfn;
384 kmutex_t *vphm;
385 vnode_t *vp = retired_pages;
386 static fn_t f = "dr_memlist_del_retired_pages";
387
388 vphm = page_vnode_mutex(vp);
389 mutex_enter(vphm);
390
391 PR_MEM("%s\n", f);
392
393 if ((pp = vp->v_pages) == NULL) {
394 mutex_exit(vphm);
395 return (mlist);
396 }
397
398 do {
399 ASSERT(pp != NULL);
400 ASSERT(pp->p_vnode == retired_pages);
401
402 if (!page_try_reclaim_lock(pp, SE_SHARED, SE_RETIRED))
403 continue;
404
405 pfn = page_pptonum(pp);
406
407 /*
408 * Page retirement currently breaks large pages into PAGESIZE
409 * pages. If this changes, need to remove the assert and deal
410 * with different page sizes.
411 */
412 ASSERT(pp->p_szc == 0);
413
414 if (address_in_memlist(mlist, ptob(pfn), PAGESIZE)) {
415 mlist = memlist_del_span(mlist, ptob(pfn), PAGESIZE);
416 PR_MEM("deleted retired page 0x%lx (pfn 0x%lx) "
417 "from memlist\n", ptob(pfn), pfn);
418 }
419
420 page_unlock(pp);
421 } while ((pp = pp->p_vpnext) != vp->v_pages);
422
423 mutex_exit(vphm);
424
425 return (mlist);
426 }
427
428 static int
dr_move_memory(dr_handle_t * hp,dr_mem_unit_t * s_mp,dr_mem_unit_t * t_mp)429 dr_move_memory(dr_handle_t *hp, dr_mem_unit_t *s_mp, dr_mem_unit_t *t_mp)
430 {
431 int rv = -1;
432 time_t copytime;
433 drmachid_t cr_id;
434 dr_sr_handle_t *srhp = NULL;
435 dr_board_t *t_bp, *s_bp;
436 struct memlist *c_ml, *d_ml;
437 sbd_error_t *err;
438 static fn_t f = "dr_move_memory";
439
440 PR_MEM("%s: (INLINE) moving memory from %s to %s\n",
441 f,
442 s_mp->sbm_cm.sbdev_path,
443 t_mp->sbm_cm.sbdev_path);
444
445 ASSERT(s_mp->sbm_flags & DR_MFLAG_SOURCE);
446 ASSERT(s_mp->sbm_peer == t_mp);
447 ASSERT(s_mp->sbm_mlist);
448
449 ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET);
450 ASSERT(t_mp->sbm_peer == s_mp);
451
452 /*
453 * create a memlist of spans to copy by removing
454 * the spans that have been deleted, if any, from
455 * the full source board memlist. s_mp->sbm_del_mlist
456 * will be NULL if there were no spans deleted from
457 * the source board.
458 */
459 c_ml = memlist_dup(s_mp->sbm_mlist);
460 d_ml = s_mp->sbm_del_mlist;
461 while (d_ml != NULL) {
462 c_ml = memlist_del_span(c_ml, d_ml->ml_address, d_ml->ml_size);
463 d_ml = d_ml->ml_next;
464 }
465
466 /*
467 * Remove retired pages from the copy list. The page content
468 * need not be copied since the pages are no longer in use.
469 */
470 PR_MEM("%s: copy list before removing retired pages (if any):\n", f);
471 PR_MEMLIST_DUMP(c_ml);
472
473 c_ml = dr_memlist_del_retired_pages(c_ml);
474
475 PR_MEM("%s: copy list after removing retired pages:\n", f);
476 PR_MEMLIST_DUMP(c_ml);
477
478 /*
479 * With parallel copy, it shouldn't make a difference which
480 * CPU is the actual master during copy-rename since all
481 * CPUs participate in the parallel copy anyway.
482 */
483 affinity_set(CPU_CURRENT);
484
485 err = drmach_copy_rename_init(
486 t_mp->sbm_cm.sbdev_id, s_mp->sbm_cm.sbdev_id, c_ml, &cr_id);
487 if (err) {
488 DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
489 affinity_clear();
490 memlist_delete(c_ml);
491 return (-1);
492 }
493
494 srhp = dr_get_sr_handle(hp);
495 ASSERT(srhp);
496
497 copytime = ddi_get_lbolt();
498
499 /* Quiesce the OS. */
500 if (dr_suspend(srhp)) {
501 cmn_err(CE_WARN, "%s: failed to quiesce OS"
502 " for copy-rename", f);
503
504 err = drmach_copy_rename_fini(cr_id);
505 if (err) {
506 /*
507 * no error is expected since the program has
508 * not yet run.
509 */
510
511 /* catch this in debug kernels */
512 ASSERT(0);
513
514 sbd_err_clear(&err);
515 }
516
517 /* suspend error reached via hp */
518 s_mp->sbm_cm.sbdev_error = hp->h_err;
519 hp->h_err = NULL;
520 goto done;
521 }
522
523 drmach_copy_rename(cr_id);
524
525 /* Resume the OS. */
526 dr_resume(srhp);
527
528 copytime = ddi_get_lbolt() - copytime;
529
530 if (err = drmach_copy_rename_fini(cr_id))
531 goto done;
532
533 /*
534 * Rename memory for lgroup.
535 * Source and target board numbers are packaged in arg.
536 */
537 s_bp = s_mp->sbm_cm.sbdev_bp;
538 t_bp = t_mp->sbm_cm.sbdev_bp;
539
540 lgrp_plat_config(LGRP_CONFIG_MEM_RENAME,
541 (uintptr_t)(s_bp->b_num | (t_bp->b_num << 16)));
542
543
544 PR_MEM("%s: copy-rename elapsed time = %ld ticks (%ld secs)\n",
545 f, copytime, copytime / hz);
546
547 rv = 0;
548 done:
549 if (srhp)
550 dr_release_sr_handle(srhp);
551 if (err)
552 DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
553 affinity_clear();
554
555 return (rv);
556 }
557
558 /*
559 * If detaching node contains memory that is "non-permanent"
560 * then the memory adr's are simply cleared. If the memory
561 * is non-relocatable, then do a copy-rename.
562 */
563 void
dr_detach_mem(dr_handle_t * hp,dr_common_unit_t * cp)564 dr_detach_mem(dr_handle_t *hp, dr_common_unit_t *cp)
565 {
566 int rv = 0;
567 dr_mem_unit_t *s_mp = (dr_mem_unit_t *)cp;
568 dr_mem_unit_t *t_mp;
569 dr_state_t state;
570 static fn_t f = "dr_detach_mem";
571
572 PR_MEM("%s...\n", f);
573
574 /* lookup target mem unit and target board structure, if any */
575 if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
576 t_mp = s_mp->sbm_peer;
577 ASSERT(t_mp != NULL);
578 ASSERT(t_mp->sbm_peer == s_mp);
579 } else {
580 t_mp = NULL;
581 }
582
583 /* verify mem unit's state is UNREFERENCED */
584 state = s_mp->sbm_cm.sbdev_state;
585 if (state != DR_STATE_UNREFERENCED) {
586 dr_dev_err(CE_IGNORE, &s_mp->sbm_cm, ESBD_STATE);
587 return;
588 }
589
590 /* verify target mem unit's state is UNREFERENCED, if any */
591 if (t_mp != NULL) {
592 state = t_mp->sbm_cm.sbdev_state;
593 if (state != DR_STATE_UNREFERENCED) {
594 dr_dev_err(CE_IGNORE, &t_mp->sbm_cm, ESBD_STATE);
595 return;
596 }
597 }
598
599 /*
600 * If there is no target board (no copy/rename was needed), then
601 * we're done!
602 */
603 if (t_mp == NULL) {
604 sbd_error_t *err;
605 /*
606 * Reprogram interconnect hardware and disable
607 * memory controllers for memory node that's going away.
608 */
609
610 err = drmach_mem_disable(s_mp->sbm_cm.sbdev_id);
611 if (err) {
612 DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
613 rv = -1;
614 }
615 } else {
616 rv = dr_move_memory(hp, s_mp, t_mp);
617 PR_MEM("%s: %s memory COPY-RENAME (board %d -> %d)\n",
618 f,
619 rv ? "FAILED" : "COMPLETED",
620 s_mp->sbm_cm.sbdev_bp->b_num,
621 t_mp->sbm_cm.sbdev_bp->b_num);
622
623 if (rv != 0)
624 (void) dr_cancel_mem(s_mp);
625 }
626
627 if (rv == 0) {
628 sbd_error_t *err;
629
630 dr_lock_status(hp->h_bd);
631 err = drmach_unconfigure(s_mp->sbm_cm.sbdev_id, 0);
632 dr_unlock_status(hp->h_bd);
633 if (err)
634 sbd_err_clear(&err);
635 }
636 }
637
638 /*
639 * This routine acts as a wrapper for kphysm_del_span_query in order to
640 * support potential memory holes in a board's physical address space.
641 * It calls kphysm_del_span_query for each node in a memlist and accumulates
642 * the results in *mp.
643 */
644 static int
dr_del_mlist_query(struct memlist * mlist,memquery_t * mp)645 dr_del_mlist_query(struct memlist *mlist, memquery_t *mp)
646 {
647 struct memlist *ml;
648 int rv = 0;
649
650
651 if (mlist == NULL)
652 cmn_err(CE_WARN, "dr_del_mlist_query: mlist=NULL\n");
653
654 mp->phys_pages = 0;
655 mp->managed = 0;
656 mp->nonrelocatable = 0;
657 mp->first_nonrelocatable = (pfn_t)-1; /* XXX */
658 mp->last_nonrelocatable = 0;
659
660 for (ml = mlist; ml; ml = ml->ml_next) {
661 memquery_t mq;
662
663 rv = kphysm_del_span_query(
664 _b64top(ml->ml_address), _b64top(ml->ml_size), &mq);
665 if (rv)
666 break;
667
668 mp->phys_pages += mq.phys_pages;
669 mp->managed += mq.managed;
670 mp->nonrelocatable += mq.nonrelocatable;
671
672 if (mq.nonrelocatable != 0) {
673 if (mq.first_nonrelocatable < mp->first_nonrelocatable)
674 mp->first_nonrelocatable =
675 mq.first_nonrelocatable;
676 if (mq.last_nonrelocatable > mp->last_nonrelocatable)
677 mp->last_nonrelocatable =
678 mq.last_nonrelocatable;
679 }
680 }
681
682 if (mp->nonrelocatable == 0)
683 mp->first_nonrelocatable = 0; /* XXX */
684
685 return (rv);
686 }
687
688 /*
689 * NOTE: This routine is only partially smart about multiple
690 * mem-units. Need to make mem-status structure smart
691 * about them also.
692 */
693 int
dr_mem_status(dr_handle_t * hp,dr_devset_t devset,sbd_dev_stat_t * dsp)694 dr_mem_status(dr_handle_t *hp, dr_devset_t devset, sbd_dev_stat_t *dsp)
695 {
696 int m, mix;
697 memdelstat_t mdst;
698 memquery_t mq;
699 dr_board_t *bp;
700 dr_mem_unit_t *mp;
701 sbd_mem_stat_t *msp;
702 static fn_t f = "dr_mem_status";
703
704 bp = hp->h_bd;
705 devset &= DR_DEVS_PRESENT(bp);
706
707 for (m = mix = 0; m < MAX_MEM_UNITS_PER_BOARD; m++) {
708 int rv;
709 sbd_error_t *err;
710 drmach_status_t pstat;
711 dr_mem_unit_t *p_mp;
712
713 if (DEVSET_IN_SET(devset, SBD_COMP_MEM, m) == 0)
714 continue;
715
716 mp = dr_get_mem_unit(bp, m);
717
718 if (mp->sbm_cm.sbdev_state == DR_STATE_EMPTY) {
719 /* present, but not fully initialized */
720 continue;
721 }
722
723 if (mp->sbm_cm.sbdev_id == (drmachid_t)0)
724 continue;
725
726 /* fetch platform status */
727 err = drmach_status(mp->sbm_cm.sbdev_id, &pstat);
728 if (err) {
729 DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
730 continue;
731 }
732
733 msp = &dsp->d_mem;
734 bzero((caddr_t)msp, sizeof (*msp));
735
736 (void) strncpy(msp->ms_cm.c_id.c_name, pstat.type,
737 sizeof (msp->ms_cm.c_id.c_name));
738 msp->ms_cm.c_id.c_type = mp->sbm_cm.sbdev_type;
739 msp->ms_cm.c_id.c_unit = SBD_NULL_UNIT;
740 msp->ms_cm.c_cond = mp->sbm_cm.sbdev_cond;
741 msp->ms_cm.c_busy = mp->sbm_cm.sbdev_busy | pstat.busy;
742 msp->ms_cm.c_time = mp->sbm_cm.sbdev_time;
743 msp->ms_cm.c_ostate = mp->sbm_cm.sbdev_ostate;
744
745 msp->ms_totpages = mp->sbm_npages;
746 msp->ms_basepfn = mp->sbm_basepfn;
747 msp->ms_pageslost = mp->sbm_pageslost;
748 msp->ms_cage_enabled = kcage_on;
749
750 if (mp->sbm_flags & DR_MFLAG_RESERVED)
751 p_mp = mp->sbm_peer;
752 else
753 p_mp = NULL;
754
755 if (p_mp == NULL) {
756 msp->ms_peer_is_target = 0;
757 msp->ms_peer_ap_id[0] = '\0';
758 } else if (p_mp->sbm_flags & DR_MFLAG_RESERVED) {
759 char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
760 char *minor;
761
762 /*
763 * b_dip doesn't have to be held for ddi_pathname()
764 * because the board struct (dr_board_t) will be
765 * destroyed before b_dip detaches.
766 */
767 (void) ddi_pathname(bp->b_dip, path);
768 minor = strchr(p_mp->sbm_cm.sbdev_path, ':');
769
770 (void) snprintf(msp->ms_peer_ap_id,
771 sizeof (msp->ms_peer_ap_id), "%s%s",
772 path, (minor == NULL) ? "" : minor);
773
774 kmem_free(path, MAXPATHLEN);
775
776 if (p_mp->sbm_flags & DR_MFLAG_TARGET)
777 msp->ms_peer_is_target = 1;
778 }
779
780 if (mp->sbm_flags & DR_MFLAG_RELOWNER)
781 rv = kphysm_del_status(mp->sbm_memhandle, &mdst);
782 else
783 rv = KPHYSM_EHANDLE; /* force 'if' to fail */
784
785 if (rv == KPHYSM_OK) {
786 /*
787 * Any pages above managed is "free",
788 * i.e. it's collected.
789 */
790 msp->ms_detpages += (uint_t)(mdst.collected +
791 mdst.phys_pages - mdst.managed);
792 } else {
793 /*
794 * If we're UNREFERENCED or UNCONFIGURED,
795 * then the number of detached pages is
796 * however many pages are on the board.
797 * I.e. detached = not in use by OS.
798 */
799 switch (msp->ms_cm.c_ostate) {
800 /*
801 * changed to use cfgadm states
802 *
803 * was:
804 * case DR_STATE_UNREFERENCED:
805 * case DR_STATE_UNCONFIGURED:
806 */
807 case SBD_STAT_UNCONFIGURED:
808 msp->ms_detpages = msp->ms_totpages;
809 break;
810
811 default:
812 break;
813 }
814 }
815
816 /*
817 * kphysm_del_span_query can report non-reloc pages = total
818 * pages for memory that is not yet configured
819 */
820 if (mp->sbm_cm.sbdev_state != DR_STATE_UNCONFIGURED) {
821 struct memlist *ml;
822
823 ml = dr_get_memlist(mp);
824 rv = ml ? dr_del_mlist_query(ml, &mq) : -1;
825 memlist_delete(ml);
826
827 if (rv == KPHYSM_OK) {
828 msp->ms_managed_pages = mq.managed;
829 msp->ms_noreloc_pages = mq.nonrelocatable;
830 msp->ms_noreloc_first =
831 mq.first_nonrelocatable;
832 msp->ms_noreloc_last =
833 mq.last_nonrelocatable;
834 msp->ms_cm.c_sflags = 0;
835 if (mq.nonrelocatable) {
836 SBD_SET_SUSPEND(SBD_CMD_UNCONFIGURE,
837 msp->ms_cm.c_sflags);
838 }
839 } else {
840 PR_MEM("%s: kphysm_del_span_query() = %d\n",
841 f, rv);
842 }
843 }
844
845 /*
846 * Check source unit state during copy-rename
847 */
848 if ((mp->sbm_flags & DR_MFLAG_SOURCE) &&
849 (mp->sbm_cm.sbdev_state == DR_STATE_UNREFERENCED ||
850 mp->sbm_cm.sbdev_state == DR_STATE_RELEASE))
851 msp->ms_cm.c_ostate = SBD_STAT_CONFIGURED;
852
853 mix++;
854 dsp++;
855 }
856
857 return (mix);
858 }
859
860 int
dr_pre_attach_mem(dr_handle_t * hp,dr_common_unit_t ** devlist,int devnum)861 dr_pre_attach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
862 {
863 _NOTE(ARGUNUSED(hp))
864
865 int err_flag = 0;
866 int d;
867 sbd_error_t *err;
868 static fn_t f = "dr_pre_attach_mem";
869
870 PR_MEM("%s...\n", f);
871
872 for (d = 0; d < devnum; d++) {
873 dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d];
874 dr_state_t state;
875
876 cmn_err(CE_CONT, "OS configure %s", mp->sbm_cm.sbdev_path);
877
878 state = mp->sbm_cm.sbdev_state;
879 switch (state) {
880 case DR_STATE_UNCONFIGURED:
881 PR_MEM("%s: recovering from UNCONFIG for %s\n",
882 f,
883 mp->sbm_cm.sbdev_path);
884
885 /* use memlist cached by dr_post_detach_mem_unit */
886 ASSERT(mp->sbm_mlist != NULL);
887 PR_MEM("%s: re-configuring cached memlist for %s:\n",
888 f, mp->sbm_cm.sbdev_path);
889 PR_MEMLIST_DUMP(mp->sbm_mlist);
890
891 /* kphysm del handle should be have been freed */
892 ASSERT((mp->sbm_flags & DR_MFLAG_RELOWNER) == 0);
893
894 /*FALLTHROUGH*/
895
896 case DR_STATE_CONNECTED:
897 PR_MEM("%s: reprogramming mem hardware on %s\n",
898 f, mp->sbm_cm.sbdev_bp->b_path);
899
900 PR_MEM("%s: enabling %s\n",
901 f, mp->sbm_cm.sbdev_path);
902
903 err = drmach_mem_enable(mp->sbm_cm.sbdev_id);
904 if (err) {
905 DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
906 err_flag = 1;
907 }
908 break;
909
910 default:
911 dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_STATE);
912 err_flag = 1;
913 break;
914 }
915
916 /* exit for loop if error encountered */
917 if (err_flag)
918 break;
919 }
920
921 return (err_flag ? -1 : 0);
922 }
923
924 static void
dr_update_mc_memory()925 dr_update_mc_memory()
926 {
927 void (*mc_update_mlist)(void);
928
929 /*
930 * mc-opl is configured during drmach_mem_new but the memory
931 * has not been added to phys_install at that time.
932 * we must inform mc-opl to update the mlist after we
933 * attach or detach a system board.
934 */
935
936 mc_update_mlist = (void (*)(void))
937 modgetsymvalue("opl_mc_update_mlist", 0);
938
939 if (mc_update_mlist != NULL) {
940 (*mc_update_mlist)();
941 }
942 }
943
944 int
dr_post_attach_mem(dr_handle_t * hp,dr_common_unit_t ** devlist,int devnum)945 dr_post_attach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
946 {
947 _NOTE(ARGUNUSED(hp))
948
949 int d;
950 static fn_t f = "dr_post_attach_mem";
951
952 PR_MEM("%s...\n", f);
953
954 for (d = 0; d < devnum; d++) {
955 dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d];
956 struct memlist *mlist, *ml;
957
958 mlist = dr_get_memlist(mp);
959 if (mlist == NULL) {
960 /* OPL supports memoryless board */
961 continue;
962 }
963
964 /*
965 * Verify the memory really did successfully attach
966 * by checking for its existence in phys_install.
967 */
968 memlist_read_lock();
969 if (memlist_intersect(phys_install, mlist) == 0) {
970 memlist_read_unlock();
971
972 DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
973
974 PR_MEM("%s: %s memlist not in phys_install",
975 f, mp->sbm_cm.sbdev_path);
976
977 memlist_delete(mlist);
978 continue;
979 }
980 memlist_read_unlock();
981
982 for (ml = mlist; ml != NULL; ml = ml->ml_next) {
983 sbd_error_t *err;
984
985 err = drmach_mem_add_span(
986 mp->sbm_cm.sbdev_id,
987 ml->ml_address,
988 ml->ml_size);
989 if (err)
990 DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
991 }
992
993 memlist_delete(mlist);
994
995 /*
996 * Destroy cached memlist, if any.
997 * There will be a cached memlist in sbm_mlist if
998 * this board is being configured directly after
999 * an unconfigure.
1000 * To support this transition, dr_post_detach_mem
1001 * left a copy of the last known memlist in sbm_mlist.
1002 * This memlist could differ from any derived from
1003 * hardware if while this memunit was last configured
1004 * the system detected and deleted bad pages from
1005 * phys_install. The location of those bad pages
1006 * will be reflected in the cached memlist.
1007 */
1008 if (mp->sbm_mlist) {
1009 memlist_delete(mp->sbm_mlist);
1010 mp->sbm_mlist = NULL;
1011 }
1012 }
1013
1014 dr_update_mc_memory();
1015
1016 return (0);
1017 }
1018
1019 int
dr_pre_detach_mem(dr_handle_t * hp,dr_common_unit_t ** devlist,int devnum)1020 dr_pre_detach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
1021 {
1022 _NOTE(ARGUNUSED(hp))
1023
1024 int d;
1025
1026 for (d = 0; d < devnum; d++) {
1027 dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d];
1028
1029 cmn_err(CE_CONT, "OS unconfigure %s", mp->sbm_cm.sbdev_path);
1030 }
1031
1032 return (0);
1033 }
1034
1035 int
dr_post_detach_mem(dr_handle_t * hp,dr_common_unit_t ** devlist,int devnum)1036 dr_post_detach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
1037 {
1038 _NOTE(ARGUNUSED(hp))
1039
1040 int d, rv;
1041 static fn_t f = "dr_post_detach_mem";
1042
1043 PR_MEM("%s...\n", f);
1044
1045 rv = 0;
1046 for (d = 0; d < devnum; d++) {
1047 dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d];
1048
1049 ASSERT(mp->sbm_cm.sbdev_bp == hp->h_bd);
1050
1051 if (dr_post_detach_mem_unit(mp))
1052 rv = -1;
1053 }
1054 dr_update_mc_memory();
1055
1056 return (rv);
1057 }
1058
1059 static void
dr_add_memory_spans(dr_mem_unit_t * mp,struct memlist * ml)1060 dr_add_memory_spans(dr_mem_unit_t *mp, struct memlist *ml)
1061 {
1062 static fn_t f = "dr_add_memory_spans";
1063
1064 PR_MEM("%s...", f);
1065 PR_MEMLIST_DUMP(ml);
1066
1067 #ifdef DEBUG
1068 memlist_read_lock();
1069 if (memlist_intersect(phys_install, ml)) {
1070 PR_MEM("%s:WARNING: memlist intersects with phys_install\n", f);
1071 }
1072 memlist_read_unlock();
1073 #endif
1074
1075 for (; ml; ml = ml->ml_next) {
1076 pfn_t base;
1077 pgcnt_t npgs;
1078 int rv;
1079 sbd_error_t *err;
1080
1081 base = _b64top(ml->ml_address);
1082 npgs = _b64top(ml->ml_size);
1083
1084 rv = kphysm_add_memory_dynamic(base, npgs);
1085
1086 err = drmach_mem_add_span(
1087 mp->sbm_cm.sbdev_id,
1088 ml->ml_address,
1089 ml->ml_size);
1090
1091 if (err)
1092 DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
1093
1094 if (rv != KPHYSM_OK) {
1095 cmn_err(CE_WARN, "%s:"
1096 " unexpected kphysm_add_memory_dynamic"
1097 " return value %d;"
1098 " basepfn=0x%lx, npages=%ld\n",
1099 f, rv, base, npgs);
1100
1101 continue;
1102 }
1103 }
1104 }
1105
1106 static int
memlist_touch(struct memlist * ml,uint64_t add)1107 memlist_touch(struct memlist *ml, uint64_t add)
1108 {
1109 while (ml != NULL) {
1110 if ((add == ml->ml_address) ||
1111 (add == (ml->ml_address + ml->ml_size)))
1112 return (1);
1113 ml = ml->ml_next;
1114 }
1115 return (0);
1116 }
1117
1118 static sbd_error_t *
dr_process_excess_mlist(dr_mem_unit_t * s_mp,dr_mem_unit_t * t_mp,struct memlist * t_excess_mlist)1119 dr_process_excess_mlist(dr_mem_unit_t *s_mp,
1120 dr_mem_unit_t *t_mp, struct memlist *t_excess_mlist)
1121 {
1122 struct memlist *ml;
1123 sbd_error_t *err;
1124 static fn_t f = "dr_process_excess_mlist";
1125 uint64_t new_pa, nbytes;
1126 int rv;
1127
1128 err = NULL;
1129
1130 /*
1131 * After the small <-> big copy-rename,
1132 * the original address space for the
1133 * source board may have excess to be
1134 * deleted. This is a case different
1135 * from the big->small excess source
1136 * memory case listed below.
1137 * Remove s_mp->sbm_del_mlist from
1138 * the kernel cage glist.
1139 */
1140 for (ml = s_mp->sbm_del_mlist; ml;
1141 ml = ml->ml_next) {
1142 PR_MEM("%s: delete small<->big copy-"
1143 "rename source excess memory", f);
1144 PR_MEMLIST_DUMP(ml);
1145
1146 err = drmach_mem_del_span(
1147 s_mp->sbm_cm.sbdev_id,
1148 ml->ml_address, ml->ml_size);
1149 if (err)
1150 DRERR_SET_C(&s_mp->
1151 sbm_cm.sbdev_error, &err);
1152 ASSERT(err == NULL);
1153 }
1154
1155 PR_MEM("%s: adding back remaining portion"
1156 " of %s, memlist:\n",
1157 f, t_mp->sbm_cm.sbdev_path);
1158 PR_MEMLIST_DUMP(t_excess_mlist);
1159
1160 for (ml = t_excess_mlist; ml; ml = ml->ml_next) {
1161 struct memlist ml0;
1162
1163 ml0.ml_address = ml->ml_address;
1164 ml0.ml_size = ml->ml_size;
1165 ml0.ml_next = ml0.ml_prev = NULL;
1166
1167 /*
1168 * If the memory object is 256 MB aligned (max page size
1169 * on OPL, it will not be coalesced to the adjacent memory
1170 * chunks. The coalesce logic assumes contiguous page
1171 * structures for contiguous memory and we hit panic.
1172 * For anything less than 256 MB alignment, we have
1173 * to make sure that it is not adjacent to anything.
1174 * If the new chunk is adjacent to phys_install, we
1175 * truncate it to 4MB boundary. 4 MB is somewhat
1176 * arbitrary. However we do not want to create
1177 * very small segments because they can cause problem.
1178 * The extreme case of 8K segment will fail
1179 * kphysm_add_memory_dynamic(), e.g.
1180 */
1181 if ((ml->ml_address & (MH_MPSS_ALIGNMENT - 1)) ||
1182 (ml->ml_size & (MH_MPSS_ALIGNMENT - 1))) {
1183
1184 memlist_read_lock();
1185 rv = memlist_touch(phys_install, ml0.ml_address);
1186 memlist_read_unlock();
1187
1188 if (rv) {
1189 new_pa = roundup(ml0.ml_address + 1, MH_MIN_ALIGNMENT);
1190 nbytes = (new_pa - ml0.ml_address);
1191 if (nbytes >= ml0.ml_size) {
1192 t_mp->sbm_dyn_segs =
1193 memlist_del_span(t_mp->sbm_dyn_segs,
1194 ml0.ml_address, ml0.ml_size);
1195 continue;
1196 }
1197 t_mp->sbm_dyn_segs =
1198 memlist_del_span(t_mp->sbm_dyn_segs,
1199 ml0.ml_address, nbytes);
1200 ml0.ml_size -= nbytes;
1201 ml0.ml_address = new_pa;
1202 }
1203
1204 if (ml0.ml_size == 0) {
1205 continue;
1206 }
1207
1208 memlist_read_lock();
1209 rv = memlist_touch(phys_install, ml0.ml_address + ml0.ml_size);
1210 memlist_read_unlock();
1211
1212 if (rv) {
1213 new_pa = rounddown(ml0.ml_address + ml0.ml_size - 1,
1214 MH_MIN_ALIGNMENT);
1215 nbytes = (ml0.ml_address + ml0.ml_size - new_pa);
1216 if (nbytes >= ml0.ml_size) {
1217 t_mp->sbm_dyn_segs =
1218 memlist_del_span(t_mp->sbm_dyn_segs,
1219 ml0.ml_address, ml0.ml_size);
1220 continue;
1221 }
1222 t_mp->sbm_dyn_segs =
1223 memlist_del_span(t_mp->sbm_dyn_segs,
1224 new_pa, nbytes);
1225 ml0.ml_size -= nbytes;
1226 }
1227
1228 if (ml0.ml_size > 0) {
1229 dr_add_memory_spans(s_mp, &ml0);
1230 }
1231 } else if (ml0.ml_size > 0) {
1232 dr_add_memory_spans(s_mp, &ml0);
1233 }
1234 }
1235 memlist_delete(t_excess_mlist);
1236 return (err);
1237 }
1238
1239 static int
dr_post_detach_mem_unit(dr_mem_unit_t * s_mp)1240 dr_post_detach_mem_unit(dr_mem_unit_t *s_mp)
1241 {
1242 uint64_t sz = s_mp->sbm_slice_size;
1243 uint64_t sm = sz - 1;
1244 /* old and new below refer to PAs before and after copy-rename */
1245 uint64_t s_old_basepa, s_new_basepa;
1246 uint64_t t_old_basepa, t_new_basepa;
1247 dr_mem_unit_t *t_mp, *x_mp;
1248 drmach_mem_info_t minfo;
1249 struct memlist *ml;
1250 struct memlist *t_excess_mlist;
1251 int rv;
1252 int s_excess_mem_deleted = 0;
1253 sbd_error_t *err;
1254 static fn_t f = "dr_post_detach_mem_unit";
1255
1256 PR_MEM("%s...\n", f);
1257
1258 /* s_mp->sbm_del_mlist could be NULL, meaning no deleted spans */
1259 PR_MEM("%s: %s: deleted memlist (EMPTY maybe okay):\n",
1260 f, s_mp->sbm_cm.sbdev_path);
1261 PR_MEMLIST_DUMP(s_mp->sbm_del_mlist);
1262
1263 /* sanity check */
1264 ASSERT(s_mp->sbm_del_mlist == NULL ||
1265 (s_mp->sbm_flags & DR_MFLAG_RELDONE) != 0);
1266
1267 if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
1268 t_mp = s_mp->sbm_peer;
1269 ASSERT(t_mp != NULL);
1270 ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET);
1271 ASSERT(t_mp->sbm_peer == s_mp);
1272
1273 ASSERT(t_mp->sbm_flags & DR_MFLAG_RELDONE);
1274 ASSERT(t_mp->sbm_del_mlist);
1275
1276 PR_MEM("%s: target %s: deleted memlist:\n",
1277 f, t_mp->sbm_cm.sbdev_path);
1278 PR_MEMLIST_DUMP(t_mp->sbm_del_mlist);
1279 } else {
1280 /* this is no target unit */
1281 t_mp = NULL;
1282 }
1283
1284 /*
1285 * Verify the memory really did successfully detach
1286 * by checking for its non-existence in phys_install.
1287 */
1288 rv = 0;
1289 memlist_read_lock();
1290 if (s_mp->sbm_flags & DR_MFLAG_RELDONE) {
1291 x_mp = s_mp;
1292 rv = memlist_intersect(phys_install, x_mp->sbm_del_mlist);
1293 }
1294 if (rv == 0 && t_mp && (t_mp->sbm_flags & DR_MFLAG_RELDONE)) {
1295 x_mp = t_mp;
1296 rv = memlist_intersect(phys_install, x_mp->sbm_del_mlist);
1297 }
1298 memlist_read_unlock();
1299
1300 if (rv) {
1301 /* error: memlist still in phys_install */
1302 DR_DEV_INTERNAL_ERROR(&x_mp->sbm_cm);
1303 }
1304
1305 /*
1306 * clean mem unit state and bail out if an error has been recorded.
1307 */
1308 rv = 0;
1309 if (s_mp->sbm_cm.sbdev_error) {
1310 PR_MEM("%s: %s flags=%x", f,
1311 s_mp->sbm_cm.sbdev_path, s_mp->sbm_flags);
1312 DR_DEV_CLR_UNREFERENCED(&s_mp->sbm_cm);
1313 DR_DEV_CLR_RELEASED(&s_mp->sbm_cm);
1314 dr_device_transition(&s_mp->sbm_cm, DR_STATE_CONFIGURED);
1315 rv = -1;
1316 }
1317 if (t_mp != NULL && t_mp->sbm_cm.sbdev_error != NULL) {
1318 PR_MEM("%s: %s flags=%x", f,
1319 s_mp->sbm_cm.sbdev_path, s_mp->sbm_flags);
1320 DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm);
1321 DR_DEV_CLR_RELEASED(&t_mp->sbm_cm);
1322 dr_device_transition(&t_mp->sbm_cm, DR_STATE_CONFIGURED);
1323 rv = -1;
1324 }
1325 if (rv)
1326 goto cleanup;
1327
1328 s_old_basepa = _ptob64(s_mp->sbm_basepfn);
1329 err = drmach_mem_get_info(s_mp->sbm_cm.sbdev_id, &minfo);
1330 ASSERT(err == NULL);
1331 s_new_basepa = minfo.mi_basepa;
1332
1333 PR_MEM("%s:s_old_basepa: 0x%lx\n", f, s_old_basepa);
1334 PR_MEM("%s:s_new_basepa: 0x%lx\n", f, s_new_basepa);
1335
1336 if (t_mp != NULL) {
1337 struct memlist *s_copy_mlist;
1338
1339 t_old_basepa = _ptob64(t_mp->sbm_basepfn);
1340 err = drmach_mem_get_info(t_mp->sbm_cm.sbdev_id, &minfo);
1341 ASSERT(err == NULL);
1342 t_new_basepa = minfo.mi_basepa;
1343
1344 PR_MEM("%s:t_old_basepa: 0x%lx\n", f, t_old_basepa);
1345 PR_MEM("%s:t_new_basepa: 0x%lx\n", f, t_new_basepa);
1346
1347 /*
1348 * Construct copy list with original source addresses.
1349 * Used to add back excess target mem.
1350 */
1351 s_copy_mlist = memlist_dup(s_mp->sbm_mlist);
1352 for (ml = s_mp->sbm_del_mlist; ml; ml = ml->ml_next) {
1353 s_copy_mlist = memlist_del_span(s_copy_mlist,
1354 ml->ml_address, ml->ml_size);
1355 }
1356
1357 PR_MEM("%s: source copy list:\n:", f);
1358 PR_MEMLIST_DUMP(s_copy_mlist);
1359
1360 /*
1361 * We had to swap mem-units, so update
1362 * memlists accordingly with new base
1363 * addresses.
1364 */
1365 for (ml = t_mp->sbm_mlist; ml; ml = ml->ml_next) {
1366 ml->ml_address -= t_old_basepa;
1367 ml->ml_address += t_new_basepa;
1368 }
1369
1370 /*
1371 * There is no need to explicitly rename the target delete
1372 * memlist, because sbm_del_mlist and sbm_mlist always
1373 * point to the same memlist for a copy/rename operation.
1374 */
1375 ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
1376
1377 PR_MEM("%s: renamed target memlist and delete memlist:\n", f);
1378 PR_MEMLIST_DUMP(t_mp->sbm_mlist);
1379
1380 for (ml = s_mp->sbm_mlist; ml; ml = ml->ml_next) {
1381 ml->ml_address -= s_old_basepa;
1382 ml->ml_address += s_new_basepa;
1383 }
1384
1385 PR_MEM("%s: renamed source memlist:\n", f);
1386 PR_MEMLIST_DUMP(s_mp->sbm_mlist);
1387 PR_MEM("%s: source dyn seg memlist:\n", f);
1388 PR_MEMLIST_DUMP(s_mp->sbm_dyn_segs);
1389
1390 /*
1391 * Keep track of dynamically added segments
1392 * since they cannot be split if we need to delete
1393 * excess source memory later for this board.
1394 */
1395 if (t_mp->sbm_dyn_segs)
1396 memlist_delete(t_mp->sbm_dyn_segs);
1397 t_mp->sbm_dyn_segs = s_mp->sbm_dyn_segs;
1398 s_mp->sbm_dyn_segs = NULL;
1399
1400 /*
1401 * Add back excess target memory.
1402 * Subtract out the portion of the target memory
1403 * node that was taken over by the source memory
1404 * node.
1405 */
1406 t_excess_mlist = memlist_dup(t_mp->sbm_mlist);
1407 for (ml = s_copy_mlist; ml; ml = ml->ml_next) {
1408 t_excess_mlist =
1409 memlist_del_span(t_excess_mlist,
1410 ml->ml_address, ml->ml_size);
1411 }
1412 PR_MEM("%s: excess memlist:\n", f);
1413 PR_MEMLIST_DUMP(t_excess_mlist);
1414
1415 /*
1416 * Update dynamically added segs
1417 */
1418 for (ml = s_mp->sbm_del_mlist; ml; ml = ml->ml_next) {
1419 t_mp->sbm_dyn_segs =
1420 memlist_del_span(t_mp->sbm_dyn_segs,
1421 ml->ml_address, ml->ml_size);
1422 }
1423 for (ml = t_excess_mlist; ml; ml = ml->ml_next) {
1424 t_mp->sbm_dyn_segs =
1425 memlist_cat_span(t_mp->sbm_dyn_segs,
1426 ml->ml_address, ml->ml_size);
1427 }
1428 PR_MEM("%s: %s: updated dynamic seg list:\n",
1429 f, t_mp->sbm_cm.sbdev_path);
1430 PR_MEMLIST_DUMP(t_mp->sbm_dyn_segs);
1431
1432 if (t_excess_mlist != NULL) {
1433 err = dr_process_excess_mlist(s_mp, t_mp,
1434 t_excess_mlist);
1435 s_excess_mem_deleted = 1;
1436 }
1437
1438 memlist_delete(s_copy_mlist);
1439
1440 #ifdef DEBUG
1441 /*
1442 * s_mp->sbm_del_mlist may still needed
1443 */
1444 PR_MEM("%s: source delete memeory flag %d",
1445 f, s_excess_mem_deleted);
1446 PR_MEM("%s: source delete memlist", f);
1447 PR_MEMLIST_DUMP(s_mp->sbm_del_mlist);
1448 #endif
1449
1450 }
1451
1452 if (t_mp != NULL) {
1453 /* delete target's entire address space */
1454 err = drmach_mem_del_span(
1455 t_mp->sbm_cm.sbdev_id, t_old_basepa & ~ sm, sz);
1456 if (err)
1457 DRERR_SET_C(&t_mp->sbm_cm.sbdev_error, &err);
1458 ASSERT(err == NULL);
1459
1460 /*
1461 * After the copy/rename, the original address space
1462 * for the source board (which is now located on the
1463 * target board) may now have some excess to be deleted.
1464 * Those excess memory on the source board are kept in
1465 * source board's sbm_del_mlist
1466 */
1467 for (ml = s_mp->sbm_del_mlist; !s_excess_mem_deleted && ml;
1468 ml = ml->ml_next) {
1469 PR_MEM("%s: delete source excess memory", f);
1470 PR_MEMLIST_DUMP(ml);
1471
1472 err = drmach_mem_del_span(s_mp->sbm_cm.sbdev_id,
1473 ml->ml_address, ml->ml_size);
1474 if (err)
1475 DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
1476 ASSERT(err == NULL);
1477 }
1478
1479 } else {
1480 /* delete board's entire address space */
1481 err = drmach_mem_del_span(s_mp->sbm_cm.sbdev_id,
1482 s_old_basepa & ~ sm, sz);
1483 if (err)
1484 DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
1485 ASSERT(err == NULL);
1486 }
1487
1488 cleanup:
1489 /* clean up target mem unit */
1490 if (t_mp != NULL) {
1491 memlist_delete(t_mp->sbm_del_mlist);
1492 /* no need to delete sbm_mlist, it shares sbm_del_mlist */
1493
1494 t_mp->sbm_del_mlist = NULL;
1495 t_mp->sbm_mlist = NULL;
1496 t_mp->sbm_peer = NULL;
1497 t_mp->sbm_flags = 0;
1498 t_mp->sbm_cm.sbdev_busy = 0;
1499 dr_init_mem_unit_data(t_mp);
1500
1501 }
1502 if (t_mp != NULL && t_mp->sbm_cm.sbdev_error == NULL) {
1503 /*
1504 * now that copy/rename has completed, undo this
1505 * work that was done in dr_release_mem_done.
1506 */
1507 DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm);
1508 DR_DEV_CLR_RELEASED(&t_mp->sbm_cm);
1509 dr_device_transition(&t_mp->sbm_cm, DR_STATE_CONFIGURED);
1510 }
1511
1512 /*
1513 * clean up (source) board's mem unit structure.
1514 * NOTE: sbm_mlist is retained if no error has been record (in other
1515 * words, when s_mp->sbm_cm.sbdev_error is NULL). This memlist is
1516 * referred to elsewhere as the cached memlist. The cached memlist
1517 * is used to re-attach (configure back in) this memunit from the
1518 * unconfigured state. The memlist is retained because it may
1519 * represent bad pages that were detected while the memory was
1520 * configured into the OS. The OS deletes bad pages from phys_install.
1521 * Those deletes, if any, will be represented in the cached mlist.
1522 */
1523 if (s_mp->sbm_del_mlist && s_mp->sbm_del_mlist != s_mp->sbm_mlist)
1524 memlist_delete(s_mp->sbm_del_mlist);
1525
1526 if (s_mp->sbm_cm.sbdev_error && s_mp->sbm_mlist) {
1527 memlist_delete(s_mp->sbm_mlist);
1528 s_mp->sbm_mlist = NULL;
1529 }
1530
1531 if (s_mp->sbm_dyn_segs != NULL && s_mp->sbm_cm.sbdev_error == 0) {
1532 memlist_delete(s_mp->sbm_dyn_segs);
1533 s_mp->sbm_dyn_segs = NULL;
1534 }
1535
1536 s_mp->sbm_del_mlist = NULL;
1537 s_mp->sbm_peer = NULL;
1538 s_mp->sbm_flags = 0;
1539 s_mp->sbm_cm.sbdev_busy = 0;
1540 dr_init_mem_unit_data(s_mp);
1541
1542 PR_MEM("%s: cached memlist for %s:", f, s_mp->sbm_cm.sbdev_path);
1543 PR_MEMLIST_DUMP(s_mp->sbm_mlist);
1544
1545 return (0);
1546 }
1547
1548 /*
1549 * Successful return from this function will have the memory
1550 * handle in bp->b_dev[..mem-unit...].sbm_memhandle allocated
1551 * and waiting. This routine's job is to select the memory that
1552 * actually has to be released (detached) which may not necessarily
1553 * be the same memory node that came in in devlist[],
1554 * i.e. a copy-rename is needed.
1555 */
1556 int
dr_pre_release_mem(dr_handle_t * hp,dr_common_unit_t ** devlist,int devnum)1557 dr_pre_release_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
1558 {
1559 int d;
1560 int err_flag = 0;
1561 static fn_t f = "dr_pre_release_mem";
1562
1563 PR_MEM("%s...\n", f);
1564
1565 for (d = 0; d < devnum; d++) {
1566 dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d];
1567 int rv;
1568 memquery_t mq;
1569 struct memlist *ml;
1570
1571 if (mp->sbm_cm.sbdev_error) {
1572 err_flag = 1;
1573 continue;
1574 } else if (!kcage_on) {
1575 dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_KCAGE_OFF);
1576 err_flag = 1;
1577 continue;
1578 }
1579
1580 if (mp->sbm_flags & DR_MFLAG_RESERVED) {
1581 /*
1582 * Board is currently involved in a delete
1583 * memory operation. Can't detach this guy until
1584 * that operation completes.
1585 */
1586 dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_INVAL);
1587 err_flag = 1;
1588 break;
1589 }
1590
1591 /* flags should be clean at this time */
1592 ASSERT(mp->sbm_flags == 0);
1593
1594 ASSERT(mp->sbm_mlist == NULL);
1595 ASSERT(mp->sbm_del_mlist == NULL);
1596 if (mp->sbm_mlist != NULL) {
1597 memlist_delete(mp->sbm_mlist);
1598 mp->sbm_mlist = NULL;
1599 }
1600
1601 ml = dr_get_memlist(mp);
1602 if (ml == NULL) {
1603 err_flag = 1;
1604 PR_MEM("%s: no memlist found for %s\n",
1605 f, mp->sbm_cm.sbdev_path);
1606 continue;
1607 }
1608
1609 /*
1610 * Check whether the detaching memory requires a
1611 * copy-rename.
1612 */
1613 ASSERT(mp->sbm_npages != 0);
1614
1615 rv = dr_del_mlist_query(ml, &mq);
1616 if (rv != KPHYSM_OK) {
1617 memlist_delete(ml);
1618 DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
1619 err_flag = 1;
1620 break;
1621 }
1622
1623 if (mq.nonrelocatable != 0) {
1624 if (!(dr_cmd_flags(hp) &
1625 (SBD_FLAG_FORCE | SBD_FLAG_QUIESCE_OKAY))) {
1626 memlist_delete(ml);
1627 /* caller wasn't prompted for a suspend */
1628 dr_dev_err(CE_WARN, &mp->sbm_cm,
1629 ESBD_QUIESCE_REQD);
1630 err_flag = 1;
1631 break;
1632 }
1633 }
1634
1635 /* allocate a kphysm handle */
1636 rv = kphysm_del_gethandle(&mp->sbm_memhandle);
1637 if (rv != KPHYSM_OK) {
1638 memlist_delete(ml);
1639
1640 DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
1641 err_flag = 1;
1642 break;
1643 }
1644 mp->sbm_flags |= DR_MFLAG_RELOWNER;
1645
1646 if ((mq.nonrelocatable != 0) ||
1647 dr_reserve_mem_spans(&mp->sbm_memhandle, ml)) {
1648 /*
1649 * Either the detaching memory node contains
1650 * non-reloc memory or we failed to reserve the
1651 * detaching memory node (which did _not_ have
1652 * any non-reloc memory, i.e. some non-reloc mem
1653 * got onboard).
1654 */
1655
1656 if (dr_select_mem_target(hp, mp, ml)) {
1657 int rv;
1658
1659 /*
1660 * We had no luck locating a target
1661 * memory node to be the recipient of
1662 * the non-reloc memory on the node
1663 * we're trying to detach.
1664 * Clean up be disposing the mem handle
1665 * and the mem list.
1666 */
1667 rv = kphysm_del_release(mp->sbm_memhandle);
1668 if (rv != KPHYSM_OK) {
1669 /*
1670 * can do nothing but complain
1671 * and hope helpful for debug
1672 */
1673 cmn_err(CE_WARN, "%s: unexpected"
1674 " kphysm_del_release return"
1675 " value %d",
1676 f, rv);
1677 }
1678 mp->sbm_flags &= ~DR_MFLAG_RELOWNER;
1679
1680 memlist_delete(ml);
1681
1682 /* make sure sbm_flags is clean */
1683 ASSERT(mp->sbm_flags == 0);
1684
1685 dr_dev_err(CE_WARN,
1686 &mp->sbm_cm, ESBD_NO_TARGET);
1687
1688 err_flag = 1;
1689 break;
1690 }
1691
1692 /*
1693 * ml is not memlist_delete'd here because
1694 * it has been assigned to mp->sbm_mlist
1695 * by dr_select_mem_target.
1696 */
1697 } else {
1698 /* no target needed to detach this board */
1699 mp->sbm_flags |= DR_MFLAG_RESERVED;
1700 mp->sbm_peer = NULL;
1701 mp->sbm_del_mlist = ml;
1702 mp->sbm_mlist = ml;
1703 mp->sbm_cm.sbdev_busy = 1;
1704 }
1705 #ifdef DEBUG
1706 ASSERT(mp->sbm_mlist != NULL);
1707
1708 if (mp->sbm_flags & DR_MFLAG_SOURCE) {
1709 PR_MEM("%s: release of %s requires copy/rename;"
1710 " selected target board %s\n",
1711 f,
1712 mp->sbm_cm.sbdev_path,
1713 mp->sbm_peer->sbm_cm.sbdev_path);
1714 } else {
1715 PR_MEM("%s: copy/rename not required to release %s\n",
1716 f, mp->sbm_cm.sbdev_path);
1717 }
1718
1719 ASSERT(mp->sbm_flags & DR_MFLAG_RELOWNER);
1720 ASSERT(mp->sbm_flags & DR_MFLAG_RESERVED);
1721 #endif
1722 }
1723
1724 return (err_flag ? -1 : 0);
1725 }
1726
1727 void
dr_release_mem_done(dr_common_unit_t * cp)1728 dr_release_mem_done(dr_common_unit_t *cp)
1729 {
1730 dr_mem_unit_t *s_mp = (dr_mem_unit_t *)cp;
1731 dr_mem_unit_t *t_mp, *mp;
1732 int rv;
1733 static fn_t f = "dr_release_mem_done";
1734
1735 /*
1736 * This unit will be flagged with DR_MFLAG_SOURCE, if it
1737 * has a target unit.
1738 */
1739 if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
1740 t_mp = s_mp->sbm_peer;
1741 ASSERT(t_mp != NULL);
1742 ASSERT(t_mp->sbm_peer == s_mp);
1743 ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET);
1744 ASSERT(t_mp->sbm_flags & DR_MFLAG_RESERVED);
1745 } else {
1746 /* this is no target unit */
1747 t_mp = NULL;
1748 }
1749
1750 /* free delete handle */
1751 ASSERT(s_mp->sbm_flags & DR_MFLAG_RELOWNER);
1752 ASSERT(s_mp->sbm_flags & DR_MFLAG_RESERVED);
1753 rv = kphysm_del_release(s_mp->sbm_memhandle);
1754 if (rv != KPHYSM_OK) {
1755 /*
1756 * can do nothing but complain
1757 * and hope helpful for debug
1758 */
1759 cmn_err(CE_WARN, "%s: unexpected kphysm_del_release"
1760 " return value %d", f, rv);
1761 }
1762 s_mp->sbm_flags &= ~DR_MFLAG_RELOWNER;
1763
1764 /*
1765 * If an error was encountered during release, clean up
1766 * the source (and target, if present) unit data.
1767 */
1768 /* XXX Can we know that sbdev_error was encountered during release? */
1769 if (s_mp->sbm_cm.sbdev_error != NULL) {
1770
1771 if (t_mp != NULL) {
1772 ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
1773 t_mp->sbm_del_mlist = NULL;
1774
1775 if (t_mp->sbm_mlist != NULL) {
1776 memlist_delete(t_mp->sbm_mlist);
1777 t_mp->sbm_mlist = NULL;
1778 }
1779
1780 t_mp->sbm_peer = NULL;
1781 t_mp->sbm_flags = 0;
1782 t_mp->sbm_cm.sbdev_busy = 0;
1783 }
1784
1785 if (s_mp->sbm_del_mlist != s_mp->sbm_mlist)
1786 memlist_delete(s_mp->sbm_del_mlist);
1787 s_mp->sbm_del_mlist = NULL;
1788
1789 if (s_mp->sbm_mlist != NULL) {
1790 memlist_delete(s_mp->sbm_mlist);
1791 s_mp->sbm_mlist = NULL;
1792 }
1793
1794 s_mp->sbm_peer = NULL;
1795 s_mp->sbm_flags = 0;
1796 s_mp->sbm_cm.sbdev_busy = 0;
1797
1798 /* bail out */
1799 return;
1800 }
1801
1802 DR_DEV_SET_RELEASED(&s_mp->sbm_cm);
1803 dr_device_transition(&s_mp->sbm_cm, DR_STATE_RELEASE);
1804
1805 if (t_mp != NULL) {
1806 /*
1807 * the kphysm delete operation that drained the source
1808 * board also drained this target board. Since the source
1809 * board drain is now known to have succeeded, we know this
1810 * target board is drained too.
1811 *
1812 * because DR_DEV_SET_RELEASED and dr_device_transition
1813 * is done here, the dr_release_dev_done should not
1814 * fail.
1815 */
1816 DR_DEV_SET_RELEASED(&t_mp->sbm_cm);
1817 dr_device_transition(&t_mp->sbm_cm, DR_STATE_RELEASE);
1818
1819 /*
1820 * NOTE: do not transition target's board state,
1821 * even if the mem-unit was the last configure
1822 * unit of the board. When copy/rename completes
1823 * this mem-unit will transitioned back to
1824 * the configured state. In the meantime, the
1825 * board's must remain as is.
1826 */
1827 }
1828
1829 /* if board(s) had deleted memory, verify it is gone */
1830 rv = 0;
1831 memlist_read_lock();
1832 if (s_mp->sbm_del_mlist != NULL) {
1833 mp = s_mp;
1834 rv = memlist_intersect(phys_install, mp->sbm_del_mlist);
1835 }
1836 if (rv == 0 && t_mp && t_mp->sbm_del_mlist != NULL) {
1837 mp = t_mp;
1838 rv = memlist_intersect(phys_install, mp->sbm_del_mlist);
1839 }
1840 memlist_read_unlock();
1841 if (rv) {
1842 cmn_err(CE_WARN, "%s: %smem-unit (%d.%d): "
1843 "deleted memory still found in phys_install",
1844 f,
1845 (mp == t_mp ? "target " : ""),
1846 mp->sbm_cm.sbdev_bp->b_num,
1847 mp->sbm_cm.sbdev_unum);
1848
1849 DR_DEV_INTERNAL_ERROR(&s_mp->sbm_cm);
1850 return;
1851 }
1852
1853 s_mp->sbm_flags |= DR_MFLAG_RELDONE;
1854 if (t_mp != NULL)
1855 t_mp->sbm_flags |= DR_MFLAG_RELDONE;
1856
1857 /* this should not fail */
1858 if (dr_release_dev_done(&s_mp->sbm_cm) != 0) {
1859 /* catch this in debug kernels */
1860 ASSERT(0);
1861 return;
1862 }
1863
1864 PR_MEM("%s: marking %s release DONE\n",
1865 f, s_mp->sbm_cm.sbdev_path);
1866
1867 s_mp->sbm_cm.sbdev_ostate = SBD_STAT_UNCONFIGURED;
1868
1869 if (t_mp != NULL) {
1870 /* should not fail */
1871 rv = dr_release_dev_done(&t_mp->sbm_cm);
1872 if (rv != 0) {
1873 /* catch this in debug kernels */
1874 ASSERT(0);
1875 return;
1876 }
1877
1878 PR_MEM("%s: marking %s release DONE\n",
1879 f, t_mp->sbm_cm.sbdev_path);
1880
1881 t_mp->sbm_cm.sbdev_ostate = SBD_STAT_UNCONFIGURED;
1882 }
1883 }
1884
1885 /*ARGSUSED*/
1886 int
dr_disconnect_mem(dr_mem_unit_t * mp)1887 dr_disconnect_mem(dr_mem_unit_t *mp)
1888 {
1889 static fn_t f = "dr_disconnect_mem";
1890 update_membounds_t umb;
1891
1892 #ifdef DEBUG
1893 int state = mp->sbm_cm.sbdev_state;
1894 ASSERT(state == DR_STATE_CONNECTED ||
1895 state == DR_STATE_UNCONFIGURED);
1896 #endif
1897
1898 PR_MEM("%s...\n", f);
1899
1900 if (mp->sbm_del_mlist && mp->sbm_del_mlist != mp->sbm_mlist)
1901 memlist_delete(mp->sbm_del_mlist);
1902 mp->sbm_del_mlist = NULL;
1903
1904 if (mp->sbm_mlist) {
1905 memlist_delete(mp->sbm_mlist);
1906 mp->sbm_mlist = NULL;
1907 }
1908
1909 /*
1910 * Remove memory from lgroup
1911 * For now, only board info is required.
1912 */
1913 umb.u_board = mp->sbm_cm.sbdev_bp->b_num;
1914 umb.u_base = (uint64_t)-1;
1915 umb.u_len = (uint64_t)-1;
1916
1917 lgrp_plat_config(LGRP_CONFIG_MEM_DEL, (uintptr_t)&umb);
1918
1919 return (0);
1920 }
1921
1922 int
dr_cancel_mem(dr_mem_unit_t * s_mp)1923 dr_cancel_mem(dr_mem_unit_t *s_mp)
1924 {
1925 dr_mem_unit_t *t_mp;
1926 dr_state_t state;
1927 static fn_t f = "dr_cancel_mem";
1928
1929 state = s_mp->sbm_cm.sbdev_state;
1930
1931 if (s_mp->sbm_flags & DR_MFLAG_TARGET) {
1932 /* must cancel source board, not target board */
1933 /* TODO: set error */
1934 return (-1);
1935 } else if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
1936 t_mp = s_mp->sbm_peer;
1937 ASSERT(t_mp != NULL);
1938 ASSERT(t_mp->sbm_peer == s_mp);
1939
1940 /* must always match the source board's state */
1941 /* TODO: is this assertion correct? */
1942 ASSERT(t_mp->sbm_cm.sbdev_state == state);
1943 } else {
1944 /* this is no target unit */
1945 t_mp = NULL;
1946 }
1947
1948 switch (state) {
1949 case DR_STATE_UNREFERENCED: /* state set by dr_release_dev_done */
1950 ASSERT((s_mp->sbm_flags & DR_MFLAG_RELOWNER) == 0);
1951
1952 if (t_mp != NULL && t_mp->sbm_del_mlist != NULL) {
1953 PR_MEM("%s: undoing target %s memory delete\n",
1954 f, t_mp->sbm_cm.sbdev_path);
1955 dr_add_memory_spans(t_mp, t_mp->sbm_del_mlist);
1956
1957 DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm);
1958 }
1959
1960 if (s_mp->sbm_del_mlist != NULL) {
1961 PR_MEM("%s: undoing %s memory delete\n",
1962 f, s_mp->sbm_cm.sbdev_path);
1963
1964 dr_add_memory_spans(s_mp, s_mp->sbm_del_mlist);
1965 }
1966
1967 /*FALLTHROUGH*/
1968
1969 /* TODO: should no longer be possible to see the release state here */
1970 case DR_STATE_RELEASE: /* state set by dr_release_mem_done */
1971
1972 ASSERT((s_mp->sbm_flags & DR_MFLAG_RELOWNER) == 0);
1973
1974 if (t_mp != NULL) {
1975 ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
1976 t_mp->sbm_del_mlist = NULL;
1977
1978 if (t_mp->sbm_mlist != NULL) {
1979 memlist_delete(t_mp->sbm_mlist);
1980 t_mp->sbm_mlist = NULL;
1981 }
1982
1983 t_mp->sbm_peer = NULL;
1984 t_mp->sbm_flags = 0;
1985 t_mp->sbm_cm.sbdev_busy = 0;
1986 dr_init_mem_unit_data(t_mp);
1987
1988 DR_DEV_CLR_RELEASED(&t_mp->sbm_cm);
1989
1990 dr_device_transition(
1991 &t_mp->sbm_cm, DR_STATE_CONFIGURED);
1992 }
1993
1994 if (s_mp->sbm_del_mlist != s_mp->sbm_mlist)
1995 memlist_delete(s_mp->sbm_del_mlist);
1996 s_mp->sbm_del_mlist = NULL;
1997
1998 if (s_mp->sbm_mlist != NULL) {
1999 memlist_delete(s_mp->sbm_mlist);
2000 s_mp->sbm_mlist = NULL;
2001 }
2002
2003 s_mp->sbm_peer = NULL;
2004 s_mp->sbm_flags = 0;
2005 s_mp->sbm_cm.sbdev_busy = 0;
2006 dr_init_mem_unit_data(s_mp);
2007
2008 return (0);
2009
2010 default:
2011 PR_MEM("%s: WARNING unexpected state (%d) for %s\n",
2012 f, (int)state, s_mp->sbm_cm.sbdev_path);
2013
2014 return (-1);
2015 }
2016 /*NOTREACHED*/
2017 }
2018
2019 void
dr_init_mem_unit(dr_mem_unit_t * mp)2020 dr_init_mem_unit(dr_mem_unit_t *mp)
2021 {
2022 dr_state_t new_state;
2023
2024
2025 if (DR_DEV_IS_ATTACHED(&mp->sbm_cm)) {
2026 new_state = DR_STATE_CONFIGURED;
2027 mp->sbm_cm.sbdev_cond = SBD_COND_OK;
2028 } else if (DR_DEV_IS_PRESENT(&mp->sbm_cm)) {
2029 new_state = DR_STATE_CONNECTED;
2030 mp->sbm_cm.sbdev_cond = SBD_COND_OK;
2031 } else if (mp->sbm_cm.sbdev_id != (drmachid_t)0) {
2032 new_state = DR_STATE_OCCUPIED;
2033 } else {
2034 new_state = DR_STATE_EMPTY;
2035 }
2036
2037 if (DR_DEV_IS_PRESENT(&mp->sbm_cm))
2038 dr_init_mem_unit_data(mp);
2039
2040 /* delay transition until fully initialized */
2041 dr_device_transition(&mp->sbm_cm, new_state);
2042 }
2043
2044 static void
dr_init_mem_unit_data(dr_mem_unit_t * mp)2045 dr_init_mem_unit_data(dr_mem_unit_t *mp)
2046 {
2047 drmachid_t id = mp->sbm_cm.sbdev_id;
2048 drmach_mem_info_t minfo;
2049 sbd_error_t *err;
2050 static fn_t f = "dr_init_mem_unit_data";
2051 update_membounds_t umb;
2052
2053 PR_MEM("%s...\n", f);
2054
2055 /* a little sanity checking */
2056 ASSERT(mp->sbm_peer == NULL);
2057 ASSERT(mp->sbm_flags == 0);
2058
2059 if (err = drmach_mem_get_info(id, &minfo)) {
2060 DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
2061 return;
2062 }
2063 mp->sbm_basepfn = _b64top(minfo.mi_basepa);
2064 mp->sbm_npages = _b64top(minfo.mi_size);
2065 mp->sbm_alignment_mask = _b64top(minfo.mi_alignment_mask);
2066 mp->sbm_slice_size = minfo.mi_slice_size;
2067
2068 /*
2069 * Add memory to lgroup
2070 */
2071 umb.u_board = mp->sbm_cm.sbdev_bp->b_num;
2072 umb.u_base = (uint64_t)mp->sbm_basepfn << MMU_PAGESHIFT;
2073 umb.u_len = (uint64_t)mp->sbm_npages << MMU_PAGESHIFT;
2074
2075 lgrp_plat_config(LGRP_CONFIG_MEM_ADD, (uintptr_t)&umb);
2076
2077 PR_MEM("%s: %s (basepfn = 0x%lx, npgs = %ld)\n",
2078 f, mp->sbm_cm.sbdev_path, mp->sbm_basepfn, mp->sbm_npages);
2079 }
2080
2081 static int
dr_reserve_mem_spans(memhandle_t * mhp,struct memlist * ml)2082 dr_reserve_mem_spans(memhandle_t *mhp, struct memlist *ml)
2083 {
2084 int err;
2085 pfn_t base;
2086 pgcnt_t npgs;
2087 struct memlist *mc;
2088 static fn_t f = "dr_reserve_mem_spans";
2089
2090 PR_MEM("%s...\n", f);
2091
2092 /*
2093 * Walk the supplied memlist scheduling each span for removal
2094 * with kphysm_del_span. It is possible that a span may intersect
2095 * an area occupied by the cage.
2096 */
2097 for (mc = ml; mc != NULL; mc = mc->ml_next) {
2098 base = _b64top(mc->ml_address);
2099 npgs = _b64top(mc->ml_size);
2100
2101 err = kphysm_del_span(*mhp, base, npgs);
2102 if (err != KPHYSM_OK) {
2103 cmn_err(CE_WARN, "%s memory reserve failed."
2104 " unexpected kphysm_del_span return value %d;"
2105 " basepfn=0x%lx npages=%ld",
2106 f, err, base, npgs);
2107
2108 return (-1);
2109 }
2110 }
2111
2112 return (0);
2113 }
2114
2115 #define DR_SMT_NPREF_SETS 6
2116 #define DR_SMT_NUNITS_PER_SET MAX_BOARDS * MAX_MEM_UNITS_PER_BOARD
2117
2118 /* debug counters */
2119 int dr_smt_realigned;
2120 int dr_smt_preference[DR_SMT_NPREF_SETS];
2121
2122 #ifdef DEBUG
2123 uint_t dr_ignore_board; /* if bit[bnum-1] set, board won't be candidate */
2124 #endif
2125
2126 /*
2127 * Find and reserve a copy/rename target board suitable for the
2128 * given source board.
2129 * All boards in the system are examined and categorized in relation to
2130 * their memory size versus the source board's memory size. Order of
2131 * preference is:
2132 * 1st copy all source, source/target same size
2133 * 2nd copy all source, larger target
2134 * 3rd copy nonrelocatable source span
2135 */
2136 static int
dr_select_mem_target(dr_handle_t * hp,dr_mem_unit_t * s_mp,struct memlist * s_ml)2137 dr_select_mem_target(dr_handle_t *hp,
2138 dr_mem_unit_t *s_mp, struct memlist *s_ml)
2139 {
2140 dr_target_pref_t preference; /* lower value is higher preference */
2141 int idx;
2142 dr_mem_unit_t **sets;
2143
2144 int t_bd;
2145 int t_unit;
2146 int rv;
2147 dr_board_t *s_bp, *t_bp;
2148 dr_mem_unit_t *t_mp, *c_mp;
2149 struct memlist *d_ml, *t_ml, *ml, *b_ml, *x_ml = NULL;
2150 memquery_t s_mq = {0};
2151 static fn_t f = "dr_select_mem_target";
2152
2153 PR_MEM("%s...\n", f);
2154
2155 ASSERT(s_ml != NULL);
2156
2157 sets = GETSTRUCT(dr_mem_unit_t *, DR_SMT_NUNITS_PER_SET *
2158 DR_SMT_NPREF_SETS);
2159
2160 s_bp = hp->h_bd;
2161 /* calculate the offset into the slice of the last source board pfn */
2162 ASSERT(s_mp->sbm_npages != 0);
2163
2164 /*
2165 * Find non-relocatable span on source board.
2166 */
2167 rv = kphysm_del_span_query(s_mp->sbm_basepfn, s_mp->sbm_npages, &s_mq);
2168 if (rv != KPHYSM_OK) {
2169 PR_MEM("%s: %s: unexpected kphysm_del_span_query"
2170 " return value %d; basepfn 0x%lx, npages %ld\n",
2171 f, s_mp->sbm_cm.sbdev_path, rv, s_mp->sbm_basepfn,
2172 s_mp->sbm_npages);
2173 return (-1);
2174 }
2175
2176 ASSERT(s_mq.phys_pages != 0);
2177 ASSERT(s_mq.nonrelocatable != 0);
2178
2179 PR_MEM("%s: %s: nonrelocatable span (0x%lx..0x%lx)\n", f,
2180 s_mp->sbm_cm.sbdev_path, s_mq.first_nonrelocatable,
2181 s_mq.last_nonrelocatable);
2182
2183 /* break down s_ml if it contains dynamic segments */
2184 b_ml = memlist_dup(s_ml);
2185
2186 for (ml = s_mp->sbm_dyn_segs; ml; ml = ml->ml_next) {
2187 b_ml = memlist_del_span(b_ml, ml->ml_address, ml->ml_size);
2188 b_ml = memlist_cat_span(b_ml, ml->ml_address, ml->ml_size);
2189 }
2190
2191
2192 /*
2193 * Make one pass through all memory units on all boards
2194 * and categorize them with respect to the source board.
2195 */
2196 for (t_bd = 0; t_bd < MAX_BOARDS; t_bd++) {
2197 /*
2198 * The board structs are a contiguous array
2199 * so we take advantage of that to find the
2200 * correct board struct pointer for a given
2201 * board number.
2202 */
2203 t_bp = dr_lookup_board(t_bd);
2204
2205 /* source board can not be its own target */
2206 if (s_bp->b_num == t_bp->b_num)
2207 continue;
2208
2209 for (t_unit = 0; t_unit < MAX_MEM_UNITS_PER_BOARD; t_unit++) {
2210
2211 t_mp = dr_get_mem_unit(t_bp, t_unit);
2212
2213 /* this memory node must be attached */
2214 if (!DR_DEV_IS_ATTACHED(&t_mp->sbm_cm))
2215 continue;
2216
2217 /* source unit can not be its own target */
2218 if (s_mp == t_mp) {
2219 /* catch this is debug kernels */
2220 ASSERT(0);
2221 continue;
2222 }
2223
2224 /*
2225 * this memory node must not already be reserved
2226 * by some other memory delete operation.
2227 */
2228 if (t_mp->sbm_flags & DR_MFLAG_RESERVED)
2229 continue;
2230
2231 /* get target board memlist */
2232 t_ml = dr_get_memlist(t_mp);
2233 if (t_ml == NULL) {
2234 cmn_err(CE_WARN, "%s: no memlist for"
2235 " mem-unit %d, board %d", f,
2236 t_mp->sbm_cm.sbdev_bp->b_num,
2237 t_mp->sbm_cm.sbdev_unum);
2238 continue;
2239 }
2240
2241 preference = dr_get_target_preference(hp, t_mp, s_mp,
2242 t_ml, s_ml, b_ml);
2243
2244 memlist_delete(t_ml);
2245
2246 if (preference == DR_TP_INVALID)
2247 continue;
2248
2249 dr_smt_preference[preference]++;
2250
2251 /* calculate index to start of preference set */
2252 idx = DR_SMT_NUNITS_PER_SET * preference;
2253 /* calculate offset to respective element */
2254 idx += t_bd * MAX_MEM_UNITS_PER_BOARD + t_unit;
2255
2256 ASSERT(idx < DR_SMT_NUNITS_PER_SET * DR_SMT_NPREF_SETS);
2257 sets[idx] = t_mp;
2258 }
2259 }
2260
2261 if (b_ml != NULL)
2262 memlist_delete(b_ml);
2263
2264 /*
2265 * NOTE: this would be a good place to sort each candidate
2266 * set in to some desired order, e.g. memory size in ascending
2267 * order. Without an additional sorting step here, the order
2268 * within a set is ascending board number order.
2269 */
2270
2271 c_mp = NULL;
2272 x_ml = NULL;
2273 t_ml = NULL;
2274 for (idx = 0; idx < DR_SMT_NUNITS_PER_SET * DR_SMT_NPREF_SETS; idx++) {
2275 memquery_t mq;
2276
2277 preference = (dr_target_pref_t)(idx / DR_SMT_NUNITS_PER_SET);
2278
2279 ASSERT(preference != DR_TP_INVALID);
2280
2281 /* cleanup t_ml after previous pass */
2282 if (t_ml != NULL) {
2283 memlist_delete(t_ml);
2284 t_ml = NULL;
2285 }
2286
2287 /* get candidate target board mem unit */
2288 t_mp = sets[idx];
2289 if (t_mp == NULL)
2290 continue;
2291
2292 /* get target board memlist */
2293 t_ml = dr_get_memlist(t_mp);
2294 if (t_ml == NULL) {
2295 cmn_err(CE_WARN, "%s: no memlist for"
2296 " mem-unit %d, board %d",
2297 f,
2298 t_mp->sbm_cm.sbdev_bp->b_num,
2299 t_mp->sbm_cm.sbdev_unum);
2300
2301 continue;
2302 }
2303
2304 PR_MEM("%s: checking for no-reloc in %s, "
2305 " basepfn=0x%lx, npages=%ld\n",
2306 f,
2307 t_mp->sbm_cm.sbdev_path,
2308 t_mp->sbm_basepfn,
2309 t_mp->sbm_npages);
2310
2311 rv = dr_del_mlist_query(t_ml, &mq);
2312 if (rv != KPHYSM_OK) {
2313 PR_MEM("%s: kphysm_del_span_query:"
2314 " unexpected return value %d\n", f, rv);
2315
2316 continue;
2317 }
2318
2319 if (mq.nonrelocatable != 0) {
2320 PR_MEM("%s: candidate %s has"
2321 " nonrelocatable span [0x%lx..0x%lx]\n",
2322 f,
2323 t_mp->sbm_cm.sbdev_path,
2324 mq.first_nonrelocatable,
2325 mq.last_nonrelocatable);
2326
2327 continue;
2328 }
2329
2330 #ifdef DEBUG
2331 /*
2332 * This is a debug tool for excluding certain boards
2333 * from being selected as a target board candidate.
2334 * dr_ignore_board is only tested by this driver.
2335 * It must be set with adb, obp, /etc/system or your
2336 * favorite debugger.
2337 */
2338 if (dr_ignore_board &
2339 (1 << (t_mp->sbm_cm.sbdev_bp->b_num - 1))) {
2340 PR_MEM("%s: dr_ignore_board flag set,"
2341 " ignoring %s as candidate\n",
2342 f, t_mp->sbm_cm.sbdev_path);
2343 continue;
2344 }
2345 #endif
2346
2347 /*
2348 * Reserve excess source board memory, if any.
2349 *
2350 * Only the nonrelocatable source span will be copied
2351 * so schedule the rest of the source mem to be deleted.
2352 */
2353 switch (preference) {
2354 case DR_TP_NONRELOC:
2355 /*
2356 * Get source copy memlist and use it to construct
2357 * delete memlist.
2358 */
2359 d_ml = memlist_dup(s_ml);
2360 x_ml = dr_get_copy_mlist(s_ml, t_ml, s_mp, t_mp);
2361
2362 /* XXX */
2363 ASSERT(d_ml != NULL);
2364 ASSERT(x_ml != NULL);
2365
2366 for (ml = x_ml; ml != NULL; ml = ml->ml_next) {
2367 d_ml = memlist_del_span(d_ml, ml->ml_address,
2368 ml->ml_size);
2369 }
2370
2371 PR_MEM("%s: %s: reserving src brd memlist:\n", f,
2372 s_mp->sbm_cm.sbdev_path);
2373 PR_MEMLIST_DUMP(d_ml);
2374
2375 /* reserve excess spans */
2376 if (dr_reserve_mem_spans(&s_mp->sbm_memhandle,
2377 d_ml) != 0) {
2378 /* likely more non-reloc pages appeared */
2379 /* TODO: restart from top? */
2380 continue;
2381 }
2382 break;
2383 default:
2384 d_ml = NULL;
2385 break;
2386 }
2387
2388 s_mp->sbm_flags |= DR_MFLAG_RESERVED;
2389
2390 /*
2391 * reserve all memory on target board.
2392 * NOTE: source board's memhandle is used.
2393 *
2394 * If this succeeds (eq 0), then target selection is
2395 * complete and all unwanted memory spans, both source and
2396 * target, have been reserved. Loop is terminated.
2397 */
2398 if (dr_reserve_mem_spans(&s_mp->sbm_memhandle, t_ml) == 0) {
2399 PR_MEM("%s: %s: target board memory reserved\n",
2400 f, t_mp->sbm_cm.sbdev_path);
2401
2402 /* a candidate target board is now reserved */
2403 t_mp->sbm_flags |= DR_MFLAG_RESERVED;
2404 c_mp = t_mp;
2405
2406 /* *** EXITING LOOP *** */
2407 break;
2408 }
2409
2410 /* did not successfully reserve the target board. */
2411 PR_MEM("%s: could not reserve target %s\n",
2412 f, t_mp->sbm_cm.sbdev_path);
2413
2414 /*
2415 * NOTE: an undo of the dr_reserve_mem_span work
2416 * will happen automatically when the memhandle
2417 * (s_mp->sbm_memhandle) is kphysm_del_release'd.
2418 */
2419
2420 s_mp->sbm_flags &= ~DR_MFLAG_RESERVED;
2421 }
2422
2423 /* clean up after memlist editing logic */
2424 if (x_ml != NULL)
2425 memlist_delete(x_ml);
2426
2427 FREESTRUCT(sets, dr_mem_unit_t *, DR_SMT_NUNITS_PER_SET *
2428 DR_SMT_NPREF_SETS);
2429
2430 /*
2431 * c_mp will be NULL when the entire sets[] array
2432 * has been searched without reserving a target board.
2433 */
2434 if (c_mp == NULL) {
2435 PR_MEM("%s: %s: target selection failed.\n",
2436 f, s_mp->sbm_cm.sbdev_path);
2437
2438 if (t_ml != NULL)
2439 memlist_delete(t_ml);
2440
2441 return (-1);
2442 }
2443
2444 PR_MEM("%s: found target %s for source %s\n",
2445 f,
2446 c_mp->sbm_cm.sbdev_path,
2447 s_mp->sbm_cm.sbdev_path);
2448
2449 s_mp->sbm_peer = c_mp;
2450 s_mp->sbm_flags |= DR_MFLAG_SOURCE;
2451 s_mp->sbm_del_mlist = d_ml; /* spans to be deleted, if any */
2452 s_mp->sbm_mlist = s_ml;
2453 s_mp->sbm_cm.sbdev_busy = 1;
2454
2455 c_mp->sbm_peer = s_mp;
2456 c_mp->sbm_flags |= DR_MFLAG_TARGET;
2457 c_mp->sbm_del_mlist = t_ml; /* spans to be deleted */
2458 c_mp->sbm_mlist = t_ml;
2459 c_mp->sbm_cm.sbdev_busy = 1;
2460
2461 return (0);
2462 }
2463
2464 /*
2465 * Returns target preference rank:
2466 * -1 not a valid copy-rename target board
2467 * 0 copy all source, source/target same size
2468 * 1 copy all source, larger target
2469 * 2 copy nonrelocatable source span
2470 */
2471 static dr_target_pref_t
dr_get_target_preference(dr_handle_t * hp,dr_mem_unit_t * t_mp,dr_mem_unit_t * s_mp,struct memlist * t_ml,struct memlist * s_ml,struct memlist * b_ml)2472 dr_get_target_preference(dr_handle_t *hp,
2473 dr_mem_unit_t *t_mp, dr_mem_unit_t *s_mp,
2474 struct memlist *t_ml, struct memlist *s_ml,
2475 struct memlist *b_ml)
2476 {
2477 dr_target_pref_t preference;
2478 struct memlist *s_nonreloc_ml = NULL;
2479 drmachid_t t_id;
2480 static fn_t f = "dr_get_target_preference";
2481
2482 t_id = t_mp->sbm_cm.sbdev_bp->b_id;
2483
2484 /*
2485 * Can the entire source board be copied?
2486 */
2487 if (dr_memlist_canfit(s_ml, t_ml, s_mp, t_mp)) {
2488 if (s_mp->sbm_npages == t_mp->sbm_npages)
2489 preference = DR_TP_SAME; /* same size */
2490 else
2491 preference = DR_TP_LARGE; /* larger target */
2492 } else {
2493 /*
2494 * Entire source won't fit so try non-relocatable memory only
2495 * (target aligned).
2496 */
2497 s_nonreloc_ml = dr_get_nonreloc_mlist(b_ml, s_mp);
2498 if (s_nonreloc_ml == NULL) {
2499 PR_MEM("%s: dr_get_nonreloc_mlist failed\n", f);
2500 preference = DR_TP_INVALID;
2501 }
2502 if (dr_memlist_canfit(s_nonreloc_ml, t_ml, s_mp, t_mp))
2503 preference = DR_TP_NONRELOC;
2504 else
2505 preference = DR_TP_INVALID;
2506 }
2507
2508 if (s_nonreloc_ml != NULL)
2509 memlist_delete(s_nonreloc_ml);
2510
2511 /*
2512 * Force floating board preference lower than all other boards
2513 * if the force flag is present; otherwise disallow the board.
2514 */
2515 if ((preference != DR_TP_INVALID) && drmach_board_is_floating(t_id)) {
2516 if (dr_cmd_flags(hp) & SBD_FLAG_FORCE)
2517 preference += DR_TP_FLOATING;
2518 else
2519 preference = DR_TP_INVALID;
2520 }
2521
2522 PR_MEM("%s: %s preference=%d\n", f, t_mp->sbm_cm.sbdev_path,
2523 preference);
2524
2525 return (preference);
2526 }
2527
2528 /*
2529 * Create a memlist representing the source memory that will be copied to
2530 * the target board. The memory to be copied is the maximum amount that
2531 * will fit on the target board.
2532 */
2533 static struct memlist *
dr_get_copy_mlist(struct memlist * s_mlist,struct memlist * t_mlist,dr_mem_unit_t * s_mp,dr_mem_unit_t * t_mp)2534 dr_get_copy_mlist(struct memlist *s_mlist, struct memlist *t_mlist,
2535 dr_mem_unit_t *s_mp, dr_mem_unit_t *t_mp)
2536 {
2537 struct memlist *t_ml, *s_copy_ml, *s_del_ml, *ml, *x_ml;
2538 uint64_t s_slice_mask, s_slice_base;
2539 uint64_t t_slice_mask, t_slice_base;
2540 static fn_t f = "dr_get_copy_mlist";
2541
2542 ASSERT(s_mlist != NULL);
2543 ASSERT(t_mlist != NULL);
2544 ASSERT(t_mp->sbm_slice_size == s_mp->sbm_slice_size);
2545
2546 s_slice_mask = s_mp->sbm_slice_size - 1;
2547 s_slice_base = s_mlist->ml_address & ~s_slice_mask;
2548
2549 t_slice_mask = t_mp->sbm_slice_size - 1;
2550 t_slice_base = t_mlist->ml_address & ~t_slice_mask;
2551
2552 t_ml = memlist_dup(t_mlist);
2553 s_del_ml = memlist_dup(s_mlist);
2554 s_copy_ml = memlist_dup(s_mlist);
2555
2556 /* XXX */
2557 ASSERT(t_ml != NULL);
2558 ASSERT(s_del_ml != NULL);
2559 ASSERT(s_copy_ml != NULL);
2560
2561 /*
2562 * To construct the source copy memlist:
2563 *
2564 * The target memlist is converted to the post-rename
2565 * source addresses. This is the physical address range
2566 * the target will have after the copy-rename. Overlaying
2567 * and deleting this from the current source memlist will
2568 * give the source delete memlist. The copy memlist is
2569 * the reciprocal of the source delete memlist.
2570 */
2571 for (ml = t_ml; ml != NULL; ml = ml->ml_next) {
2572 /*
2573 * Normalize relative to target slice base PA
2574 * in order to preseve slice offsets.
2575 */
2576 ml->ml_address -= t_slice_base;
2577 /*
2578 * Convert to source slice PA address.
2579 */
2580 ml->ml_address += s_slice_base;
2581 }
2582
2583 for (ml = t_ml; ml != NULL; ml = ml->ml_next) {
2584 s_del_ml = memlist_del_span(s_del_ml,
2585 ml->ml_address, ml->ml_size);
2586 }
2587
2588 /*
2589 * Expand the delete mlist to fully include any dynamic segments
2590 * it intersects with.
2591 */
2592 for (x_ml = NULL, ml = s_del_ml; ml != NULL; ml = ml->ml_next) {
2593 uint64_t del_base = ml->ml_address;
2594 uint64_t del_end = ml->ml_address + ml->ml_size;
2595 struct memlist *dyn;
2596
2597 for (dyn = s_mp->sbm_dyn_segs; dyn != NULL;
2598 dyn = dyn->ml_next) {
2599 uint64_t dyn_base = dyn->ml_address;
2600 uint64_t dyn_end = dyn->ml_address + dyn->ml_size;
2601
2602 if (del_base > dyn_base && del_base < dyn_end)
2603 del_base = dyn_base;
2604
2605 if (del_end > dyn_base && del_end < dyn_end)
2606 del_end = dyn_end;
2607 }
2608
2609 x_ml = memlist_cat_span(x_ml, del_base, del_end - del_base);
2610 }
2611
2612 memlist_delete(s_del_ml);
2613 s_del_ml = x_ml;
2614
2615 for (ml = s_del_ml; ml != NULL; ml = ml->ml_next) {
2616 s_copy_ml = memlist_del_span(s_copy_ml,
2617 ml->ml_address, ml->ml_size);
2618 }
2619
2620 PR_MEM("%s: source delete mlist\n", f);
2621 PR_MEMLIST_DUMP(s_del_ml);
2622
2623 PR_MEM("%s: source copy mlist\n", f);
2624 PR_MEMLIST_DUMP(s_copy_ml);
2625
2626 memlist_delete(t_ml);
2627 memlist_delete(s_del_ml);
2628
2629 return (s_copy_ml);
2630 }
2631
2632 /*
2633 * Scan the non-relocatable spans on the source memory
2634 * and construct a minimum mlist that includes all non-reloc
2635 * memory subject to target alignment, and dynamic segment
2636 * constraints where only whole dynamic segments may be deleted.
2637 */
2638 static struct memlist *
dr_get_nonreloc_mlist(struct memlist * s_ml,dr_mem_unit_t * s_mp)2639 dr_get_nonreloc_mlist(struct memlist *s_ml, dr_mem_unit_t *s_mp)
2640 {
2641 struct memlist *x_ml = NULL;
2642 struct memlist *ml;
2643 static fn_t f = "dr_get_nonreloc_mlist";
2644
2645 PR_MEM("%s: checking for split of dyn seg list:\n", f);
2646 PR_MEMLIST_DUMP(s_mp->sbm_dyn_segs);
2647
2648 for (ml = s_ml; ml; ml = ml->ml_next) {
2649 int rv;
2650 uint64_t nr_base, nr_end;
2651 memquery_t mq;
2652 struct memlist *dyn;
2653
2654 rv = kphysm_del_span_query(
2655 _b64top(ml->ml_address), _b64top(ml->ml_size), &mq);
2656 if (rv) {
2657 memlist_delete(x_ml);
2658 return (NULL);
2659 }
2660
2661 if (mq.nonrelocatable == 0)
2662 continue;
2663
2664 PR_MEM("%s: non-reloc span: 0x%lx, 0x%lx (%lx, %lx)\n", f,
2665 _ptob64(mq.first_nonrelocatable),
2666 _ptob64(mq.last_nonrelocatable),
2667 mq.first_nonrelocatable,
2668 mq.last_nonrelocatable);
2669
2670 /*
2671 * Align the span at both ends to allow for possible
2672 * cage expansion.
2673 */
2674 nr_base = _ptob64(mq.first_nonrelocatable);
2675 nr_end = _ptob64(mq.last_nonrelocatable + 1);
2676
2677 PR_MEM("%s: adjusted non-reloc span: 0x%lx, 0x%lx\n",
2678 f, nr_base, nr_end);
2679
2680 /*
2681 * Expand the non-reloc span to fully include any
2682 * dynamic segments it intersects with.
2683 */
2684 for (dyn = s_mp->sbm_dyn_segs; dyn != NULL;
2685 dyn = dyn->ml_next) {
2686 uint64_t dyn_base = dyn->ml_address;
2687 uint64_t dyn_end = dyn->ml_address + dyn->ml_size;
2688
2689 if (nr_base > dyn_base && nr_base < dyn_end)
2690 nr_base = dyn_base;
2691
2692 if (nr_end > dyn_base && nr_end < dyn_end)
2693 nr_end = dyn_end;
2694 }
2695
2696 x_ml = memlist_cat_span(x_ml, nr_base, nr_end - nr_base);
2697 }
2698
2699 if (x_ml == NULL) {
2700 PR_MEM("%s: source didn't have any non-reloc pages!\n", f);
2701 return (NULL);
2702 }
2703
2704 PR_MEM("%s: %s: edited source memlist:\n", f, s_mp->sbm_cm.sbdev_path);
2705 PR_MEMLIST_DUMP(x_ml);
2706
2707 return (x_ml);
2708 }
2709
2710 /*
2711 * Check if source memlist can fit in target memlist while maintaining
2712 * relative offsets within board.
2713 */
2714 static int
dr_memlist_canfit(struct memlist * s_mlist,struct memlist * t_mlist,dr_mem_unit_t * s_mp,dr_mem_unit_t * t_mp)2715 dr_memlist_canfit(struct memlist *s_mlist, struct memlist *t_mlist,
2716 dr_mem_unit_t *s_mp, dr_mem_unit_t *t_mp)
2717 {
2718 int canfit = 0;
2719 struct memlist *s_ml, *t_ml, *ml;
2720 uint64_t s_slice_mask, t_slice_mask;
2721 static fn_t f = "dr_mlist_canfit";
2722
2723 s_ml = memlist_dup(s_mlist);
2724 t_ml = memlist_dup(t_mlist);
2725
2726 if (s_ml == NULL || t_ml == NULL) {
2727 cmn_err(CE_WARN, "%s: memlist_dup failed\n", f);
2728 goto done;
2729 }
2730
2731 s_slice_mask = s_mp->sbm_slice_size - 1;
2732 t_slice_mask = t_mp->sbm_slice_size - 1;
2733
2734 /*
2735 * Normalize to slice relative offsets.
2736 */
2737 for (ml = s_ml; ml; ml = ml->ml_next)
2738 ml->ml_address &= s_slice_mask;
2739
2740 for (ml = t_ml; ml; ml = ml->ml_next)
2741 ml->ml_address &= t_slice_mask;
2742
2743 canfit = memlist_canfit(s_ml, t_ml);
2744 done:
2745 memlist_delete(s_ml);
2746 memlist_delete(t_ml);
2747
2748 return (canfit);
2749 }
2750
2751 /*
2752 * Memlist support.
2753 */
2754
2755 /*
2756 * Determine whether the source memlist (s_mlist) will
2757 * fit into the target memlist (t_mlist) in terms of
2758 * size and holes. Assumes the caller has normalized the
2759 * memlist physical addresses for comparison.
2760 */
2761 static int
memlist_canfit(struct memlist * s_mlist,struct memlist * t_mlist)2762 memlist_canfit(struct memlist *s_mlist, struct memlist *t_mlist)
2763 {
2764 int rv = 0;
2765 struct memlist *s_ml, *t_ml;
2766
2767 if ((s_mlist == NULL) || (t_mlist == NULL))
2768 return (0);
2769
2770 s_ml = s_mlist;
2771 for (t_ml = t_mlist; t_ml && s_ml; t_ml = t_ml->ml_next) {
2772 uint64_t s_start, s_end;
2773 uint64_t t_start, t_end;
2774
2775 t_start = t_ml->ml_address;
2776 t_end = t_start + t_ml->ml_size;
2777
2778 for (; s_ml; s_ml = s_ml->ml_next) {
2779 s_start = s_ml->ml_address;
2780 s_end = s_start + s_ml->ml_size;
2781
2782 if ((s_start < t_start) || (s_end > t_end))
2783 break;
2784 }
2785 }
2786
2787 /*
2788 * If we ran out of source memlist chunks that mean
2789 * we found a home for all of them.
2790 */
2791 if (s_ml == NULL)
2792 rv = 1;
2793
2794 return (rv);
2795 }
2796