1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * Copyright 2019 Peter Tribble.
29 */
30
31 /*
32 * DR memory support routines.
33 */
34
35 #include <sys/note.h>
36 #include <sys/debug.h>
37 #include <sys/types.h>
38 #include <sys/errno.h>
39 #include <sys/param.h>
40 #include <sys/dditypes.h>
41 #include <sys/kmem.h>
42 #include <sys/conf.h>
43 #include <sys/ddi.h>
44 #include <sys/sunddi.h>
45 #include <sys/sunndi.h>
46 #include <sys/ddi_impldefs.h>
47 #include <sys/ndi_impldefs.h>
48 #include <sys/sysmacros.h>
49 #include <sys/machsystm.h>
50 #include <sys/spitregs.h>
51 #include <sys/cpuvar.h>
52 #include <sys/promif.h>
53 #include <vm/seg_kmem.h>
54 #include <sys/lgrp.h>
55 #include <sys/platform_module.h>
56
57 #include <vm/page.h>
58
59 #include <sys/dr.h>
60 #include <sys/dr_util.h>
61
62 extern struct memlist *phys_install;
63
64 /* TODO: push this reference below drmach line */
65 extern int kcage_on;
66
67 /* for the DR*INTERNAL_ERROR macros. see sys/dr.h. */
68 static char *dr_ie_fmt = "dr_mem.c %d";
69
70 static int dr_post_detach_mem_unit(dr_mem_unit_t *mp);
71 static int dr_reserve_mem_spans(memhandle_t *mhp, struct memlist *mlist);
72 static int dr_select_mem_target(dr_handle_t *hp, dr_mem_unit_t *mp,
73 struct memlist *ml);
74 static void dr_init_mem_unit_data(dr_mem_unit_t *mp);
75
76 static int memlist_canfit(struct memlist *s_mlist,
77 struct memlist *t_mlist);
78
79 /*
80 * dr_mem_unit_t.sbm_flags
81 */
82 #define DR_MFLAG_RESERVED 0x01 /* mem unit reserved for delete */
83 #define DR_MFLAG_SOURCE 0x02 /* source brd of copy/rename op */
84 #define DR_MFLAG_TARGET 0x04 /* target brd of copy/rename op */
85 #define DR_MFLAG_MEMUPSIZE 0x08 /* move from big to small board */
86 #define DR_MFLAG_MEMDOWNSIZE 0x10 /* move from small to big board */
87 #define DR_MFLAG_MEMRESIZE 0x18 /* move to different size board */
88 #define DR_MFLAG_RELOWNER 0x20 /* memory release (delete) owner */
89 #define DR_MFLAG_RELDONE 0x40 /* memory release (delete) done */
90
91 /* helper macros */
92 #define _ptob64(p) ((uint64_t)(p) << PAGESHIFT)
93 #define _b64top(b) ((pgcnt_t)((b) >> PAGESHIFT))
94
95 static struct memlist *
dr_get_memlist(dr_mem_unit_t * mp)96 dr_get_memlist(dr_mem_unit_t *mp)
97 {
98 struct memlist *mlist = NULL;
99 sbd_error_t *err;
100 static fn_t f = "dr_get_memlist";
101
102 PR_MEM("%s for %s...\n", f, mp->sbm_cm.sbdev_path);
103
104 /*
105 * Return cached memlist, if present.
106 * This memlist will be present following an
107 * unconfigure (a.k.a: detach) of this memunit.
108 * It should only be used in the case were a configure
109 * is bringing this memunit back in without going
110 * through the disconnect and connect states.
111 */
112 if (mp->sbm_mlist) {
113 PR_MEM("%s: found cached memlist\n", f);
114
115 mlist = memlist_dup(mp->sbm_mlist);
116 } else {
117 uint64_t basepa = _ptob64(mp->sbm_basepfn);
118
119 /* attempt to construct a memlist using phys_install */
120
121 /* round down to slice base address */
122 basepa &= ~(mp->sbm_slice_size - 1);
123
124 /* get a copy of phys_install to edit */
125 memlist_read_lock();
126 mlist = memlist_dup(phys_install);
127 memlist_read_unlock();
128
129 /* trim lower irrelevant span */
130 if (mlist)
131 mlist = memlist_del_span(mlist, 0ull, basepa);
132
133 /* trim upper irrelevant span */
134 if (mlist) {
135 uint64_t endpa;
136
137 basepa += mp->sbm_slice_size;
138 endpa = _ptob64(physmax + 1);
139 if (endpa > basepa)
140 mlist = memlist_del_span(
141 mlist,
142 basepa,
143 endpa - basepa);
144 }
145
146 if (mlist) {
147 /* successfully built a memlist */
148 PR_MEM("%s: derived memlist from phys_install\n", f);
149 }
150
151 /* if no mlist yet, try platform layer */
152 if (!mlist) {
153 err = drmach_mem_get_memlist(
154 mp->sbm_cm.sbdev_id, &mlist);
155 if (err) {
156 DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
157 mlist = NULL; /* paranoia */
158 }
159 }
160 }
161
162 PR_MEM("%s: memlist for %s\n", f, mp->sbm_cm.sbdev_path);
163 PR_MEMLIST_DUMP(mlist);
164
165 return (mlist);
166 }
167
168 typedef struct {
169 kcondvar_t cond;
170 kmutex_t lock;
171 int error;
172 int done;
173 } dr_release_mem_sync_t;
174
175 /*
176 * Memory has been logically removed by the time this routine is called.
177 */
178 static void
dr_mem_del_done(void * arg,int error)179 dr_mem_del_done(void *arg, int error)
180 {
181 dr_release_mem_sync_t *ds = arg;
182
183 mutex_enter(&ds->lock);
184 ds->error = error;
185 ds->done = 1;
186 cv_signal(&ds->cond);
187 mutex_exit(&ds->lock);
188 }
189
190 /*
191 * When we reach here the memory being drained should have
192 * already been reserved in dr_pre_release_mem().
193 * Our only task here is to kick off the "drain" and wait
194 * for it to finish.
195 */
196 void
dr_release_mem(dr_common_unit_t * cp)197 dr_release_mem(dr_common_unit_t *cp)
198 {
199 dr_mem_unit_t *mp = (dr_mem_unit_t *)cp;
200 int err;
201 dr_release_mem_sync_t rms;
202 static fn_t f = "dr_release_mem";
203
204 /* check that this memory unit has been reserved */
205 if (!(mp->sbm_flags & DR_MFLAG_RELOWNER)) {
206 DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
207 return;
208 }
209
210 bzero((void *) &rms, sizeof (rms));
211
212 mutex_init(&rms.lock, NULL, MUTEX_DRIVER, NULL);
213 cv_init(&rms.cond, NULL, CV_DRIVER, NULL);
214
215 mutex_enter(&rms.lock);
216 err = kphysm_del_start(mp->sbm_memhandle, dr_mem_del_done,
217 (void *) &rms);
218 if (err == KPHYSM_OK) {
219 /* wait for completion or interrupt */
220 while (!rms.done) {
221 if (cv_wait_sig(&rms.cond, &rms.lock) == 0) {
222 /* then there is a pending UNIX signal */
223 (void) kphysm_del_cancel(mp->sbm_memhandle);
224
225 /* wait for completion */
226 while (!rms.done)
227 cv_wait(&rms.cond, &rms.lock);
228 }
229 }
230 /* get the result of the memory delete operation */
231 err = rms.error;
232 }
233 mutex_exit(&rms.lock);
234
235 cv_destroy(&rms.cond);
236 mutex_destroy(&rms.lock);
237
238 if (err != KPHYSM_OK) {
239 int e_code;
240
241 switch (err) {
242 case KPHYSM_ENOWORK:
243 e_code = ESBD_NOERROR;
244 break;
245
246 case KPHYSM_EHANDLE:
247 case KPHYSM_ESEQUENCE:
248 e_code = ESBD_INTERNAL;
249 break;
250
251 case KPHYSM_ENOTVIABLE:
252 e_code = ESBD_MEM_NOTVIABLE;
253 break;
254
255 case KPHYSM_EREFUSED:
256 e_code = ESBD_MEM_REFUSED;
257 break;
258
259 case KPHYSM_ENONRELOC:
260 e_code = ESBD_MEM_NONRELOC;
261 break;
262
263 case KPHYSM_ECANCELLED:
264 e_code = ESBD_MEM_CANCELLED;
265 break;
266
267 case KPHYSM_ERESOURCE:
268 e_code = ESBD_MEMFAIL;
269 break;
270
271 default:
272 cmn_err(CE_WARN,
273 "%s: unexpected kphysm error code %d,"
274 " id 0x%p",
275 f, err, mp->sbm_cm.sbdev_id);
276
277 e_code = ESBD_IO;
278 break;
279 }
280
281 if (e_code != ESBD_NOERROR) {
282 dr_dev_err(CE_IGNORE, &mp->sbm_cm, e_code);
283 }
284 }
285 }
286
287 void
dr_attach_mem(dr_handle_t * hp,dr_common_unit_t * cp)288 dr_attach_mem(dr_handle_t *hp, dr_common_unit_t *cp)
289 {
290 _NOTE(ARGUNUSED(hp))
291
292 dr_mem_unit_t *mp = (dr_mem_unit_t *)cp;
293 struct memlist *ml, *mc;
294 sbd_error_t *err;
295 static fn_t f = "dr_attach_mem";
296
297 PR_MEM("%s...\n", f);
298
299 dr_lock_status(hp->h_bd);
300 err = drmach_configure(cp->sbdev_id, 0);
301 dr_unlock_status(hp->h_bd);
302 if (err) {
303 DRERR_SET_C(&cp->sbdev_error, &err);
304 return;
305 }
306
307 ml = dr_get_memlist(mp);
308 for (mc = ml; mc; mc = mc->ml_next) {
309 int rv;
310 sbd_error_t *err;
311
312 rv = kphysm_add_memory_dynamic(
313 (pfn_t)(mc->ml_address >> PAGESHIFT),
314 (pgcnt_t)(mc->ml_size >> PAGESHIFT));
315 if (rv != KPHYSM_OK) {
316 /*
317 * translate kphysm error and
318 * store in devlist error
319 */
320 switch (rv) {
321 case KPHYSM_ERESOURCE:
322 rv = ESBD_NOMEM;
323 break;
324
325 case KPHYSM_EFAULT:
326 rv = ESBD_FAULT;
327 break;
328
329 default:
330 rv = ESBD_INTERNAL;
331 break;
332 }
333
334 if (rv == ESBD_INTERNAL) {
335 DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
336 } else
337 dr_dev_err(CE_WARN, &mp->sbm_cm, rv);
338 break;
339 }
340
341 err = drmach_mem_add_span(
342 mp->sbm_cm.sbdev_id, mc->ml_address, mc->ml_size);
343 if (err) {
344 DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
345 break;
346 }
347 }
348
349 memlist_delete(ml);
350
351 /* back out if configure failed */
352 if (mp->sbm_cm.sbdev_error != NULL) {
353 dr_lock_status(hp->h_bd);
354 err = drmach_unconfigure(cp->sbdev_id,
355 DEVI_BRANCH_DESTROY);
356 if (err)
357 sbd_err_clear(&err);
358 dr_unlock_status(hp->h_bd);
359 }
360 }
361
362 #define DR_SCRUB_VALUE 0x0d0e0a0d0b0e0e0fULL
363
364 static void
dr_mem_ecache_scrub(dr_mem_unit_t * mp,struct memlist * mlist)365 dr_mem_ecache_scrub(dr_mem_unit_t *mp, struct memlist *mlist)
366 {
367 #ifdef DEBUG
368 clock_t stime = ddi_get_lbolt();
369 #endif /* DEBUG */
370
371 struct memlist *ml;
372 uint64_t scrub_value = DR_SCRUB_VALUE;
373 processorid_t cpuid;
374 static fn_t f = "dr_mem_ecache_scrub";
375
376 cpuid = drmach_mem_cpu_affinity(mp->sbm_cm.sbdev_id);
377 affinity_set(cpuid);
378
379 PR_MEM("%s: using proc %d, memlist...\n", f,
380 (cpuid == CPU_CURRENT) ? CPU->cpu_id : cpuid);
381 PR_MEMLIST_DUMP(mlist);
382
383 for (ml = mlist; ml; ml = ml->ml_next) {
384 uint64_t dst_pa;
385 uint64_t nbytes;
386
387 /* calculate the destination physical address */
388 dst_pa = ml->ml_address;
389 if (ml->ml_address & PAGEOFFSET)
390 cmn_err(CE_WARN,
391 "%s: address (0x%lx) not on "
392 "page boundary", f, ml->ml_address);
393
394 nbytes = ml->ml_size;
395 if (ml->ml_size & PAGEOFFSET)
396 cmn_err(CE_WARN,
397 "%s: size (0x%lx) not on "
398 "page boundary", f, ml->ml_size);
399
400 /*LINTED*/
401 while (nbytes > 0) {
402 /* write 64 bits to dst_pa */
403 stdphys(dst_pa, scrub_value);
404
405 /* increment/decrement by cacheline sizes */
406 dst_pa += DRMACH_COHERENCY_UNIT;
407 nbytes -= DRMACH_COHERENCY_UNIT;
408 }
409 }
410
411 /*
412 * flush this cpu's ecache and take care to ensure
413 * that all of it's bus transactions have retired.
414 */
415 drmach_cpu_flush_ecache_sync();
416
417 affinity_clear();
418
419 #ifdef DEBUG
420 stime = ddi_get_lbolt() - stime;
421 PR_MEM("%s: scrub ticks = %ld (%ld secs)\n", f, stime, stime / hz);
422 #endif /* DEBUG */
423 }
424
425 static int
dr_move_memory(dr_handle_t * hp,dr_mem_unit_t * s_mp,dr_mem_unit_t * t_mp)426 dr_move_memory(dr_handle_t *hp, dr_mem_unit_t *s_mp, dr_mem_unit_t *t_mp)
427 {
428 time_t copytime;
429 drmachid_t cr_id;
430 dr_sr_handle_t *srhp;
431 struct memlist *c_ml, *d_ml;
432 sbd_error_t *err;
433 static fn_t f = "dr_move_memory";
434
435 PR_MEM("%s: (INLINE) moving memory from %s to %s\n",
436 f,
437 s_mp->sbm_cm.sbdev_path,
438 t_mp->sbm_cm.sbdev_path);
439
440 ASSERT(s_mp->sbm_flags & DR_MFLAG_SOURCE);
441 ASSERT(s_mp->sbm_peer == t_mp);
442 ASSERT(s_mp->sbm_mlist);
443
444 ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET);
445 ASSERT(t_mp->sbm_peer == s_mp);
446
447 /*
448 * create a memlist of spans to copy by removing
449 * the spans that have been deleted, if any, from
450 * the full source board memlist. s_mp->sbm_del_mlist
451 * will be NULL if there were no spans deleted from
452 * the source board.
453 */
454 c_ml = memlist_dup(s_mp->sbm_mlist);
455 d_ml = s_mp->sbm_del_mlist;
456 while (d_ml != NULL) {
457 c_ml = memlist_del_span(c_ml, d_ml->ml_address, d_ml->ml_size);
458 d_ml = d_ml->ml_next;
459 }
460
461 affinity_set(drmach_mem_cpu_affinity(t_mp->sbm_cm.sbdev_id));
462
463 err = drmach_copy_rename_init(
464 t_mp->sbm_cm.sbdev_id, _ptob64(t_mp->sbm_slice_offset),
465 s_mp->sbm_cm.sbdev_id, c_ml, &cr_id);
466 if (err) {
467 DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
468 affinity_clear();
469 return (-1);
470 }
471
472 srhp = dr_get_sr_handle(hp);
473 ASSERT(srhp);
474
475 copytime = ddi_get_lbolt();
476
477 /* Quiesce the OS. */
478 if (dr_suspend(srhp)) {
479 cmn_err(CE_WARN, "%s: failed to quiesce OS"
480 " for copy-rename", f);
481
482 dr_release_sr_handle(srhp);
483 err = drmach_copy_rename_fini(cr_id);
484 if (err) {
485 /*
486 * no error is expected since the program has
487 * not yet run.
488 */
489
490 /* catch this in debug kernels */
491 ASSERT(0);
492
493 sbd_err_clear(&err);
494 }
495
496 /* suspend error reached via hp */
497 s_mp->sbm_cm.sbdev_error = hp->h_err;
498 hp->h_err = NULL;
499
500 affinity_clear();
501 return (-1);
502 }
503
504 /*
505 * Rename memory for lgroup.
506 * Source and target board numbers are packaged in arg.
507 */
508 {
509 dr_board_t *t_bp, *s_bp;
510
511 s_bp = s_mp->sbm_cm.sbdev_bp;
512 t_bp = t_mp->sbm_cm.sbdev_bp;
513
514 lgrp_plat_config(LGRP_CONFIG_MEM_RENAME,
515 (uintptr_t)(s_bp->b_num | (t_bp->b_num << 16)));
516 }
517
518 drmach_copy_rename(cr_id);
519
520 /* Resume the OS. */
521 dr_resume(srhp);
522
523 copytime = ddi_get_lbolt() - copytime;
524
525 dr_release_sr_handle(srhp);
526 err = drmach_copy_rename_fini(cr_id);
527 if (err)
528 DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
529
530 affinity_clear();
531
532 PR_MEM("%s: copy-rename elapsed time = %ld ticks (%ld secs)\n",
533 f, copytime, copytime / hz);
534
535 /* return -1 if dr_suspend or copy/rename recorded an error */
536 return (err == NULL ? 0 : -1);
537 }
538
539 /*
540 * If detaching node contains memory that is "non-permanent"
541 * then the memory adr's are simply cleared. If the memory
542 * is non-relocatable, then do a copy-rename.
543 */
544 void
dr_detach_mem(dr_handle_t * hp,dr_common_unit_t * cp)545 dr_detach_mem(dr_handle_t *hp, dr_common_unit_t *cp)
546 {
547 int rv = 0;
548 dr_mem_unit_t *s_mp = (dr_mem_unit_t *)cp;
549 dr_mem_unit_t *t_mp;
550 dr_state_t state;
551 static fn_t f = "dr_detach_mem";
552
553 PR_MEM("%s...\n", f);
554
555 /* lookup target mem unit and target board structure, if any */
556 if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
557 t_mp = s_mp->sbm_peer;
558 ASSERT(t_mp != NULL);
559 ASSERT(t_mp->sbm_peer == s_mp);
560 } else {
561 t_mp = NULL;
562 }
563
564 /* verify mem unit's state is UNREFERENCED */
565 state = s_mp->sbm_cm.sbdev_state;
566 if (state != DR_STATE_UNREFERENCED) {
567 dr_dev_err(CE_IGNORE, &s_mp->sbm_cm, ESBD_STATE);
568 return;
569 }
570
571 /* verify target mem unit's state is UNREFERENCED, if any */
572 if (t_mp != NULL) {
573 state = t_mp->sbm_cm.sbdev_state;
574 if (state != DR_STATE_UNREFERENCED) {
575 dr_dev_err(CE_IGNORE, &t_mp->sbm_cm, ESBD_STATE);
576 return;
577 }
578 }
579
580 /*
581 * Scrub deleted memory. This will cause all cachelines
582 * referencing the memory to only be in the local cpu's
583 * ecache.
584 */
585 if (s_mp->sbm_flags & DR_MFLAG_RELDONE) {
586 /* no del mlist for src<=dst mem size copy/rename */
587 if (s_mp->sbm_del_mlist)
588 dr_mem_ecache_scrub(s_mp, s_mp->sbm_del_mlist);
589 }
590 if (t_mp != NULL && (t_mp->sbm_flags & DR_MFLAG_RELDONE)) {
591 ASSERT(t_mp->sbm_del_mlist);
592 dr_mem_ecache_scrub(t_mp, t_mp->sbm_del_mlist);
593 }
594
595 /*
596 * If there is no target board (no copy/rename was needed), then
597 * we're done!
598 */
599 if (t_mp == NULL) {
600 sbd_error_t *err;
601 /*
602 * Reprogram interconnect hardware and disable
603 * memory controllers for memory node that's going away.
604 */
605
606 err = drmach_mem_disable(s_mp->sbm_cm.sbdev_id);
607 if (err) {
608 DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
609 rv = -1;
610 }
611 } else {
612 rv = dr_move_memory(hp, s_mp, t_mp);
613 PR_MEM("%s: %s memory COPY-RENAME (board %d -> %d)\n",
614 f,
615 rv ? "FAILED" : "COMPLETED",
616 s_mp->sbm_cm.sbdev_bp->b_num,
617 t_mp->sbm_cm.sbdev_bp->b_num);
618
619 if (rv != 0)
620 (void) dr_cancel_mem(s_mp);
621 }
622
623 if (rv == 0) {
624 sbd_error_t *err;
625
626 dr_lock_status(hp->h_bd);
627 err = drmach_unconfigure(s_mp->sbm_cm.sbdev_id,
628 DEVI_BRANCH_DESTROY);
629 dr_unlock_status(hp->h_bd);
630 if (err)
631 sbd_err_clear(&err);
632 }
633 }
634
635 /*
636 * XXX workaround for certain lab configurations (see also starcat drmach.c)
637 * Temporary code to get around observed incorrect results from
638 * kphysm_del_span_query when the queried span contains address spans
639 * not occupied by memory in between spans that do have memory.
640 * This routine acts as a wrapper to kphysm_del_span_query. It builds
641 * a memlist from phys_install of spans that exist between base and
642 * base + npages, inclusively. Kphysm_del_span_query is called for each
643 * node in the memlist with the results accumulated in *mp.
644 */
645 static int
dr_del_span_query(pfn_t base,pgcnt_t npages,memquery_t * mp)646 dr_del_span_query(pfn_t base, pgcnt_t npages, memquery_t *mp)
647 {
648 uint64_t pa = _ptob64(base);
649 uint64_t sm = ~ (137438953472ull - 1);
650 uint64_t sa = pa & sm;
651 struct memlist *mlist, *ml;
652 int rv;
653
654 npages = npages; /* silence lint */
655 memlist_read_lock();
656 mlist = memlist_dup(phys_install);
657 memlist_read_unlock();
658
659 again:
660 for (ml = mlist; ml; ml = ml->ml_next) {
661 if ((ml->ml_address & sm) != sa) {
662 mlist = memlist_del_span(mlist,
663 ml->ml_address, ml->ml_size);
664 goto again;
665 }
666 }
667
668 mp->phys_pages = 0;
669 mp->managed = 0;
670 mp->nonrelocatable = 0;
671 mp->first_nonrelocatable = (pfn_t)-1; /* XXX */
672 mp->last_nonrelocatable = 0;
673
674 for (ml = mlist; ml; ml = ml->ml_next) {
675 memquery_t mq;
676
677 rv = kphysm_del_span_query(
678 _b64top(ml->ml_address), _b64top(ml->ml_size), &mq);
679 if (rv)
680 break;
681
682 mp->phys_pages += mq.phys_pages;
683 mp->managed += mq.managed;
684 mp->nonrelocatable += mq.nonrelocatable;
685
686 if (mq.nonrelocatable != 0) {
687 if (mq.first_nonrelocatable < mp->first_nonrelocatable)
688 mp->first_nonrelocatable =
689 mq.first_nonrelocatable;
690 if (mq.last_nonrelocatable > mp->last_nonrelocatable)
691 mp->last_nonrelocatable =
692 mq.last_nonrelocatable;
693 }
694 }
695
696 if (mp->nonrelocatable == 0)
697 mp->first_nonrelocatable = 0; /* XXX */
698
699 memlist_delete(mlist);
700 return (rv);
701 }
702
703 #define kphysm_del_span_query dr_del_span_query
704
705 /*
706 * NOTE: This routine is only partially smart about multiple
707 * mem-units. Need to make mem-status structure smart
708 * about them also.
709 */
710 int
dr_mem_status(dr_handle_t * hp,dr_devset_t devset,sbd_dev_stat_t * dsp)711 dr_mem_status(dr_handle_t *hp, dr_devset_t devset, sbd_dev_stat_t *dsp)
712 {
713 int m, mix;
714 memdelstat_t mdst;
715 memquery_t mq;
716 dr_board_t *bp;
717 dr_mem_unit_t *mp;
718 sbd_mem_stat_t *msp;
719 static fn_t f = "dr_mem_status";
720
721 bp = hp->h_bd;
722 devset &= DR_DEVS_PRESENT(bp);
723
724 for (m = mix = 0; m < MAX_MEM_UNITS_PER_BOARD; m++) {
725 int rv;
726 sbd_error_t *err;
727 drmach_status_t pstat;
728 dr_mem_unit_t *p_mp;
729
730 if (DEVSET_IN_SET(devset, SBD_COMP_MEM, m) == 0)
731 continue;
732
733 mp = dr_get_mem_unit(bp, m);
734
735 if (mp->sbm_cm.sbdev_state == DR_STATE_EMPTY) {
736 /* present, but not fully initialized */
737 continue;
738 }
739
740 if (mp->sbm_cm.sbdev_id == (drmachid_t)0)
741 continue;
742
743 /* fetch platform status */
744 err = drmach_status(mp->sbm_cm.sbdev_id, &pstat);
745 if (err) {
746 DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
747 continue;
748 }
749
750 msp = &dsp->d_mem;
751 bzero((caddr_t)msp, sizeof (*msp));
752
753 (void) strncpy(msp->ms_cm.c_id.c_name, pstat.type,
754 sizeof (msp->ms_cm.c_id.c_name));
755 msp->ms_cm.c_id.c_type = mp->sbm_cm.sbdev_type;
756 msp->ms_cm.c_id.c_unit = SBD_NULL_UNIT;
757 msp->ms_cm.c_cond = mp->sbm_cm.sbdev_cond;
758 msp->ms_cm.c_busy = mp->sbm_cm.sbdev_busy | pstat.busy;
759 msp->ms_cm.c_time = mp->sbm_cm.sbdev_time;
760 msp->ms_cm.c_ostate = mp->sbm_cm.sbdev_ostate;
761
762 msp->ms_totpages = mp->sbm_npages;
763 msp->ms_basepfn = mp->sbm_basepfn;
764 msp->ms_pageslost = mp->sbm_pageslost;
765 msp->ms_cage_enabled = kcage_on;
766
767 if (mp->sbm_flags & DR_MFLAG_RESERVED)
768 p_mp = mp->sbm_peer;
769 else
770 p_mp = NULL;
771
772 if (p_mp == NULL) {
773 msp->ms_peer_is_target = 0;
774 msp->ms_peer_ap_id[0] = '\0';
775 } else if (p_mp->sbm_flags & DR_MFLAG_RESERVED) {
776 char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
777 char *minor;
778
779 /*
780 * b_dip doesn't have to be held for ddi_pathname()
781 * because the board struct (dr_board_t) will be
782 * destroyed before b_dip detaches.
783 */
784 (void) ddi_pathname(bp->b_dip, path);
785 minor = strchr(p_mp->sbm_cm.sbdev_path, ':');
786
787 (void) snprintf(msp->ms_peer_ap_id,
788 sizeof (msp->ms_peer_ap_id), "%s%s",
789 path, (minor == NULL) ? "" : minor);
790
791 kmem_free(path, MAXPATHLEN);
792
793 if (p_mp->sbm_flags & DR_MFLAG_TARGET)
794 msp->ms_peer_is_target = 1;
795 }
796
797 if (mp->sbm_flags & DR_MFLAG_RELOWNER)
798 rv = kphysm_del_status(mp->sbm_memhandle, &mdst);
799 else
800 rv = KPHYSM_EHANDLE; /* force 'if' to fail */
801
802 if (rv == KPHYSM_OK) {
803 /*
804 * Any pages above managed is "free",
805 * i.e. it's collected.
806 */
807 msp->ms_detpages += (uint_t)(mdst.collected +
808 mdst.phys_pages - mdst.managed);
809 } else {
810 /*
811 * If we're UNREFERENCED or UNCONFIGURED,
812 * then the number of detached pages is
813 * however many pages are on the board.
814 * I.e. detached = not in use by OS.
815 */
816 switch (msp->ms_cm.c_ostate) {
817 /*
818 * changed to use cfgadm states
819 *
820 * was:
821 * case DR_STATE_UNREFERENCED:
822 * case DR_STATE_UNCONFIGURED:
823 */
824 case SBD_STAT_UNCONFIGURED:
825 msp->ms_detpages = msp->ms_totpages;
826 break;
827
828 default:
829 break;
830 }
831 }
832
833 /*
834 * kphysm_del_span_query can report non-reloc pages = total
835 * pages for memory that is not yet configured
836 */
837 if (mp->sbm_cm.sbdev_state != DR_STATE_UNCONFIGURED) {
838
839 rv = kphysm_del_span_query(mp->sbm_basepfn,
840 mp->sbm_npages, &mq);
841
842 if (rv == KPHYSM_OK) {
843 msp->ms_managed_pages = mq.managed;
844 msp->ms_noreloc_pages = mq.nonrelocatable;
845 msp->ms_noreloc_first =
846 mq.first_nonrelocatable;
847 msp->ms_noreloc_last =
848 mq.last_nonrelocatable;
849 msp->ms_cm.c_sflags = 0;
850 if (mq.nonrelocatable) {
851 SBD_SET_SUSPEND(SBD_CMD_UNCONFIGURE,
852 msp->ms_cm.c_sflags);
853 }
854 } else {
855 PR_MEM("%s: kphysm_del_span_query() = %d\n",
856 f, rv);
857 }
858 }
859
860 /*
861 * Check source unit state during copy-rename
862 */
863 if ((mp->sbm_flags & DR_MFLAG_SOURCE) &&
864 (mp->sbm_cm.sbdev_state == DR_STATE_UNREFERENCED ||
865 mp->sbm_cm.sbdev_state == DR_STATE_RELEASE))
866 msp->ms_cm.c_ostate = SBD_STAT_CONFIGURED;
867
868 mix++;
869 dsp++;
870 }
871
872 return (mix);
873 }
874
875 int
dr_pre_attach_mem(dr_handle_t * hp,dr_common_unit_t ** devlist,int devnum)876 dr_pre_attach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
877 {
878 _NOTE(ARGUNUSED(hp))
879
880 int err_flag = 0;
881 int d;
882 sbd_error_t *err;
883 static fn_t f = "dr_pre_attach_mem";
884
885 PR_MEM("%s...\n", f);
886
887 for (d = 0; d < devnum; d++) {
888 dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d];
889 dr_state_t state;
890
891 cmn_err(CE_CONT, "OS configure %s", mp->sbm_cm.sbdev_path);
892
893 state = mp->sbm_cm.sbdev_state;
894 switch (state) {
895 case DR_STATE_UNCONFIGURED:
896 PR_MEM("%s: recovering from UNCONFIG for %s\n",
897 f,
898 mp->sbm_cm.sbdev_path);
899
900 /* use memlist cached by dr_post_detach_mem_unit */
901 ASSERT(mp->sbm_mlist != NULL);
902 PR_MEM("%s: re-configuring cached memlist for %s:\n",
903 f, mp->sbm_cm.sbdev_path);
904 PR_MEMLIST_DUMP(mp->sbm_mlist);
905
906 /* kphysm del handle should be have been freed */
907 ASSERT((mp->sbm_flags & DR_MFLAG_RELOWNER) == 0);
908
909 /*FALLTHROUGH*/
910
911 case DR_STATE_CONNECTED:
912 PR_MEM("%s: reprogramming mem hardware on %s\n",
913 f, mp->sbm_cm.sbdev_bp->b_path);
914
915 PR_MEM("%s: enabling %s\n",
916 f, mp->sbm_cm.sbdev_path);
917
918 err = drmach_mem_enable(mp->sbm_cm.sbdev_id);
919 if (err) {
920 DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
921 err_flag = 1;
922 }
923 break;
924
925 default:
926 dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_STATE);
927 err_flag = 1;
928 break;
929 }
930
931 /* exit for loop if error encountered */
932 if (err_flag)
933 break;
934 }
935
936 return (err_flag ? -1 : 0);
937 }
938
939 int
dr_post_attach_mem(dr_handle_t * hp,dr_common_unit_t ** devlist,int devnum)940 dr_post_attach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
941 {
942 _NOTE(ARGUNUSED(hp))
943
944 int d;
945 static fn_t f = "dr_post_attach_mem";
946
947 PR_MEM("%s...\n", f);
948
949 for (d = 0; d < devnum; d++) {
950 dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d];
951 struct memlist *mlist, *ml;
952
953 mlist = dr_get_memlist(mp);
954 if (mlist == NULL) {
955 dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_MEMFAIL);
956 continue;
957 }
958
959 /*
960 * Verify the memory really did successfully attach
961 * by checking for its existence in phys_install.
962 */
963 memlist_read_lock();
964 if (memlist_intersect(phys_install, mlist) == 0) {
965 memlist_read_unlock();
966
967 DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
968
969 PR_MEM("%s: %s memlist not in phys_install",
970 f, mp->sbm_cm.sbdev_path);
971
972 memlist_delete(mlist);
973 continue;
974 }
975 memlist_read_unlock();
976
977 for (ml = mlist; ml != NULL; ml = ml->ml_next) {
978 sbd_error_t *err;
979
980 err = drmach_mem_add_span(
981 mp->sbm_cm.sbdev_id,
982 ml->ml_address,
983 ml->ml_size);
984 if (err)
985 DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
986 }
987
988 memlist_delete(mlist);
989
990 /*
991 * Destroy cached memlist, if any.
992 * There will be a cached memlist in sbm_mlist if
993 * this board is being configured directly after
994 * an unconfigure.
995 * To support this transition, dr_post_detach_mem
996 * left a copy of the last known memlist in sbm_mlist.
997 * This memlist could differ from any derived from
998 * hardware if while this memunit was last configured
999 * the system detected and deleted bad pages from
1000 * phys_install. The location of those bad pages
1001 * will be reflected in the cached memlist.
1002 */
1003 if (mp->sbm_mlist) {
1004 memlist_delete(mp->sbm_mlist);
1005 mp->sbm_mlist = NULL;
1006 }
1007
1008 /*
1009 * TODO: why is this call to dr_init_mem_unit_data here?
1010 * this has been done at discovery or connect time, so this is
1011 * probably redundant and unnecessary.
1012 */
1013 dr_init_mem_unit_data(mp);
1014 }
1015
1016 return (0);
1017 }
1018
1019 int
dr_pre_detach_mem(dr_handle_t * hp,dr_common_unit_t ** devlist,int devnum)1020 dr_pre_detach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
1021 {
1022 _NOTE(ARGUNUSED(hp))
1023
1024 int d;
1025
1026 for (d = 0; d < devnum; d++) {
1027 dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d];
1028
1029 cmn_err(CE_CONT, "OS unconfigure %s", mp->sbm_cm.sbdev_path);
1030 }
1031
1032 return (0);
1033 }
1034
1035
1036 int
dr_post_detach_mem(dr_handle_t * hp,dr_common_unit_t ** devlist,int devnum)1037 dr_post_detach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
1038 {
1039 _NOTE(ARGUNUSED(hp))
1040
1041 int d, rv;
1042 static fn_t f = "dr_post_detach_mem";
1043
1044 PR_MEM("%s...\n", f);
1045
1046 rv = 0;
1047 for (d = 0; d < devnum; d++) {
1048 dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d];
1049
1050 ASSERT(mp->sbm_cm.sbdev_bp == hp->h_bd);
1051
1052 if (dr_post_detach_mem_unit(mp))
1053 rv = -1;
1054 }
1055
1056 return (rv);
1057 }
1058
1059 static void
dr_add_memory_spans(dr_mem_unit_t * mp,struct memlist * ml)1060 dr_add_memory_spans(dr_mem_unit_t *mp, struct memlist *ml)
1061 {
1062 static fn_t f = "dr_add_memory_spans";
1063
1064 PR_MEM("%s...", f);
1065 PR_MEMLIST_DUMP(ml);
1066
1067 #ifdef DEBUG
1068 memlist_read_lock();
1069 if (memlist_intersect(phys_install, ml)) {
1070 PR_MEM("%s:WARNING: memlist intersects with phys_install\n", f);
1071 }
1072 memlist_read_unlock();
1073 #endif
1074
1075 for (; ml; ml = ml->ml_next) {
1076 pfn_t base;
1077 pgcnt_t npgs;
1078 int rv;
1079 sbd_error_t *err;
1080
1081 base = _b64top(ml->ml_address);
1082 npgs = _b64top(ml->ml_size);
1083
1084 rv = kphysm_add_memory_dynamic(base, npgs);
1085
1086 err = drmach_mem_add_span(
1087 mp->sbm_cm.sbdev_id,
1088 ml->ml_address,
1089 ml->ml_size);
1090
1091 if (err)
1092 DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
1093
1094 if (rv != KPHYSM_OK) {
1095 cmn_err(CE_WARN, "%s:"
1096 " unexpected kphysm_add_memory_dynamic"
1097 " return value %d;"
1098 " basepfn=0x%lx, npages=%ld\n",
1099 f, rv, base, npgs);
1100
1101 continue;
1102 }
1103 }
1104 }
1105
1106 static int
dr_post_detach_mem_unit(dr_mem_unit_t * s_mp)1107 dr_post_detach_mem_unit(dr_mem_unit_t *s_mp)
1108 {
1109 uint64_t sz = s_mp->sbm_slice_size;
1110 uint64_t sm = sz - 1;
1111 /* old and new below refer to PAs before and after copy-rename */
1112 uint64_t s_old_basepa, s_new_basepa;
1113 uint64_t t_old_basepa, t_new_basepa;
1114 uint64_t t_new_smallsize = 0;
1115 dr_mem_unit_t *t_mp, *x_mp;
1116 struct memlist *ml;
1117 int rv;
1118 sbd_error_t *err;
1119 static fn_t f = "dr_post_detach_mem_unit";
1120
1121 PR_MEM("%s...\n", f);
1122
1123 /* s_mp->sbm_del_mlist could be NULL, meaning no deleted spans */
1124 PR_MEM("%s: %s: deleted memlist (EMPTY maybe okay):\n",
1125 f, s_mp->sbm_cm.sbdev_path);
1126 PR_MEMLIST_DUMP(s_mp->sbm_del_mlist);
1127
1128 /* sanity check */
1129 ASSERT(s_mp->sbm_del_mlist == NULL ||
1130 (s_mp->sbm_flags & DR_MFLAG_RELDONE) != 0);
1131
1132 if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
1133 t_mp = s_mp->sbm_peer;
1134 ASSERT(t_mp != NULL);
1135 ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET);
1136 ASSERT(t_mp->sbm_peer == s_mp);
1137
1138 ASSERT(t_mp->sbm_flags & DR_MFLAG_RELDONE);
1139 ASSERT(t_mp->sbm_del_mlist);
1140
1141 PR_MEM("%s: target %s: deleted memlist:\n",
1142 f, t_mp->sbm_cm.sbdev_path);
1143 PR_MEMLIST_DUMP(t_mp->sbm_del_mlist);
1144 } else {
1145 /* this is no target unit */
1146 t_mp = NULL;
1147 }
1148
1149 /*
1150 * Verify the memory really did successfully detach
1151 * by checking for its non-existence in phys_install.
1152 */
1153 rv = 0;
1154 memlist_read_lock();
1155 if (s_mp->sbm_flags & DR_MFLAG_RELDONE) {
1156 x_mp = s_mp;
1157 rv = memlist_intersect(phys_install, x_mp->sbm_del_mlist);
1158 }
1159 if (rv == 0 && t_mp && (t_mp->sbm_flags & DR_MFLAG_RELDONE)) {
1160 x_mp = t_mp;
1161 rv = memlist_intersect(phys_install, x_mp->sbm_del_mlist);
1162 }
1163 memlist_read_unlock();
1164
1165 if (rv) {
1166 /* error: memlist still in phys_install */
1167 DR_DEV_INTERNAL_ERROR(&x_mp->sbm_cm);
1168 }
1169
1170 /*
1171 * clean mem unit state and bail out if an error has been recorded.
1172 */
1173 rv = 0;
1174 if (s_mp->sbm_cm.sbdev_error) {
1175 PR_MEM("%s: %s flags=%x", f,
1176 s_mp->sbm_cm.sbdev_path, s_mp->sbm_flags);
1177 DR_DEV_CLR_UNREFERENCED(&s_mp->sbm_cm);
1178 DR_DEV_CLR_RELEASED(&s_mp->sbm_cm);
1179 dr_device_transition(&s_mp->sbm_cm, DR_STATE_CONFIGURED);
1180 rv = -1;
1181 }
1182 if (t_mp != NULL && t_mp->sbm_cm.sbdev_error != NULL) {
1183 PR_MEM("%s: %s flags=%x", f,
1184 s_mp->sbm_cm.sbdev_path, s_mp->sbm_flags);
1185 DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm);
1186 DR_DEV_CLR_RELEASED(&t_mp->sbm_cm);
1187 dr_device_transition(&t_mp->sbm_cm, DR_STATE_CONFIGURED);
1188 rv = -1;
1189 }
1190 if (rv)
1191 goto cleanup;
1192
1193 s_old_basepa = _ptob64(s_mp->sbm_basepfn);
1194 err = drmach_mem_get_base_physaddr(s_mp->sbm_cm.sbdev_id,
1195 &s_new_basepa);
1196 ASSERT(err == NULL);
1197
1198 PR_MEM("%s:s_old_basepa: 0x%lx\n", f, s_old_basepa);
1199 PR_MEM("%s:s_new_basepa: 0x%lx\n", f, s_new_basepa);
1200
1201 if (t_mp != NULL) {
1202 struct memlist *s_copy_mlist;
1203
1204 t_old_basepa = _ptob64(t_mp->sbm_basepfn);
1205 err = drmach_mem_get_base_physaddr(t_mp->sbm_cm.sbdev_id,
1206 &t_new_basepa);
1207 ASSERT(err == NULL);
1208
1209 PR_MEM("%s:t_old_basepa: 0x%lx\n", f, t_old_basepa);
1210 PR_MEM("%s:t_new_basepa: 0x%lx\n", f, t_new_basepa);
1211
1212 /*
1213 * Construct copy list with original source addresses.
1214 * Used to add back excess target mem.
1215 */
1216 s_copy_mlist = memlist_dup(s_mp->sbm_mlist);
1217 for (ml = s_mp->sbm_del_mlist; ml; ml = ml->ml_next) {
1218 s_copy_mlist = memlist_del_span(s_copy_mlist,
1219 ml->ml_address, ml->ml_size);
1220 }
1221
1222 PR_MEM("%s: source copy list:\n:", f);
1223 PR_MEMLIST_DUMP(s_copy_mlist);
1224
1225 /*
1226 * We had to swap mem-units, so update
1227 * memlists accordingly with new base
1228 * addresses.
1229 */
1230 for (ml = t_mp->sbm_mlist; ml; ml = ml->ml_next) {
1231 ml->ml_address -= t_old_basepa;
1232 ml->ml_address += t_new_basepa;
1233 }
1234
1235 /*
1236 * There is no need to explicitly rename the target delete
1237 * memlist, because sbm_del_mlist and sbm_mlist always
1238 * point to the same memlist for a copy/rename operation.
1239 */
1240 ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
1241
1242 PR_MEM("%s: renamed target memlist and delete memlist:\n", f);
1243 PR_MEMLIST_DUMP(t_mp->sbm_mlist);
1244
1245 for (ml = s_mp->sbm_mlist; ml; ml = ml->ml_next) {
1246 ml->ml_address -= s_old_basepa;
1247 ml->ml_address += s_new_basepa;
1248 }
1249
1250 PR_MEM("%s: renamed source memlist:\n", f);
1251 PR_MEMLIST_DUMP(s_mp->sbm_mlist);
1252
1253 /*
1254 * Keep track of dynamically added segments
1255 * since they cannot be split if we need to delete
1256 * excess source memory later for this board.
1257 */
1258 if (t_mp->sbm_dyn_segs)
1259 memlist_delete(t_mp->sbm_dyn_segs);
1260 t_mp->sbm_dyn_segs = s_mp->sbm_dyn_segs;
1261 s_mp->sbm_dyn_segs = NULL;
1262
1263 /*
1264 * If the target memory range with the new target base PA
1265 * extends beyond the usable slice, prevent any "target excess"
1266 * from being added back after this copy/rename and
1267 * calculate the new smaller size of the target board
1268 * to be set as part of target cleanup. The base + npages
1269 * must only include the range of memory up to the end of
1270 * this slice. This will only be used after a category 4
1271 * large-to-small target type copy/rename - see comments
1272 * in dr_select_mem_target.
1273 */
1274 if (((t_new_basepa & sm) + _ptob64(t_mp->sbm_npages)) > sz) {
1275 t_new_smallsize = sz - (t_new_basepa & sm);
1276 }
1277
1278 if (s_mp->sbm_flags & DR_MFLAG_MEMRESIZE &&
1279 t_new_smallsize == 0) {
1280 struct memlist *t_excess_mlist;
1281
1282 /*
1283 * Add back excess target memory.
1284 * Subtract out the portion of the target memory
1285 * node that was taken over by the source memory
1286 * node.
1287 */
1288 t_excess_mlist = memlist_dup(t_mp->sbm_mlist);
1289 for (ml = s_copy_mlist; ml; ml = ml->ml_next) {
1290 t_excess_mlist =
1291 memlist_del_span(t_excess_mlist,
1292 ml->ml_address, ml->ml_size);
1293 }
1294
1295 /*
1296 * Update dynamically added segs
1297 */
1298 for (ml = s_mp->sbm_del_mlist; ml; ml = ml->ml_next) {
1299 t_mp->sbm_dyn_segs =
1300 memlist_del_span(t_mp->sbm_dyn_segs,
1301 ml->ml_address, ml->ml_size);
1302 }
1303 for (ml = t_excess_mlist; ml; ml = ml->ml_next) {
1304 t_mp->sbm_dyn_segs =
1305 memlist_cat_span(t_mp->sbm_dyn_segs,
1306 ml->ml_address, ml->ml_size);
1307 }
1308 PR_MEM("%s: %s: updated dynamic seg list:\n",
1309 f, t_mp->sbm_cm.sbdev_path);
1310 PR_MEMLIST_DUMP(t_mp->sbm_dyn_segs);
1311
1312 PR_MEM("%s: adding back remaining portion"
1313 " of %s, memlist:\n",
1314 f, t_mp->sbm_cm.sbdev_path);
1315 PR_MEMLIST_DUMP(t_excess_mlist);
1316
1317 dr_add_memory_spans(s_mp, t_excess_mlist);
1318 memlist_delete(t_excess_mlist);
1319 }
1320 memlist_delete(s_copy_mlist);
1321
1322 #ifdef DEBUG
1323 /*
1324 * Renaming s_mp->sbm_del_mlist is not necessary. This
1325 * list is not used beyond this point, and in fact, is
1326 * disposed of at the end of this function.
1327 */
1328 for (ml = s_mp->sbm_del_mlist; ml; ml = ml->ml_next) {
1329 ml->ml_address -= s_old_basepa;
1330 ml->ml_address += s_new_basepa;
1331 }
1332
1333 PR_MEM("%s: renamed source delete memlist", f);
1334 PR_MEMLIST_DUMP(s_mp->sbm_del_mlist);
1335 #endif
1336
1337 }
1338
1339 if (t_mp != NULL) {
1340 /* delete target's entire address space */
1341 err = drmach_mem_del_span(t_mp->sbm_cm.sbdev_id,
1342 t_old_basepa & ~ sm, sz);
1343 if (err)
1344 DRERR_SET_C(&t_mp->sbm_cm.sbdev_error, &err);
1345 ASSERT(err == NULL);
1346
1347 /*
1348 * After the copy/rename, the original address space
1349 * for the source board (which is now located on the
1350 * target board) may now have some excess to be deleted.
1351 * The amount is calculated by masking the slice
1352 * info and keeping the slice offset from t_new_basepa.
1353 */
1354 err = drmach_mem_del_span(s_mp->sbm_cm.sbdev_id,
1355 s_old_basepa & ~ sm, t_new_basepa & sm);
1356 if (err)
1357 DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
1358 ASSERT(err == NULL);
1359
1360 } else {
1361 /* delete board's entire address space */
1362 err = drmach_mem_del_span(s_mp->sbm_cm.sbdev_id,
1363 s_old_basepa & ~ sm, sz);
1364 if (err)
1365 DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
1366 ASSERT(err == NULL);
1367 }
1368
1369 cleanup:
1370 /* clean up target mem unit */
1371 if (t_mp != NULL) {
1372 memlist_delete(t_mp->sbm_del_mlist);
1373 /* no need to delete sbm_mlist, it shares sbm_del_mlist */
1374
1375 t_mp->sbm_del_mlist = NULL;
1376 t_mp->sbm_mlist = NULL;
1377 t_mp->sbm_peer = NULL;
1378 t_mp->sbm_flags = 0;
1379 t_mp->sbm_cm.sbdev_busy = 0;
1380 dr_init_mem_unit_data(t_mp);
1381
1382 /* reduce target size if new PAs go past end of usable slice */
1383 if (t_new_smallsize > 0) {
1384 t_mp->sbm_npages = _b64top(t_new_smallsize);
1385 PR_MEM("%s: target new size 0x%lx bytes\n",
1386 f, t_new_smallsize);
1387 }
1388 }
1389 if (t_mp != NULL && t_mp->sbm_cm.sbdev_error == NULL) {
1390 /*
1391 * now that copy/rename has completed, undo this
1392 * work that was done in dr_release_mem_done.
1393 */
1394 DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm);
1395 DR_DEV_CLR_RELEASED(&t_mp->sbm_cm);
1396 dr_device_transition(&t_mp->sbm_cm, DR_STATE_CONFIGURED);
1397 }
1398
1399 /*
1400 * clean up (source) board's mem unit structure.
1401 * NOTE: sbm_mlist is retained if no error has been record (in other
1402 * words, when s_mp->sbm_cm.sbdev_error is NULL). This memlist is
1403 * referred to elsewhere as the cached memlist. The cached memlist
1404 * is used to re-attach (configure back in) this memunit from the
1405 * unconfigured state. The memlist is retained because it may
1406 * represent bad pages that were detected while the memory was
1407 * configured into the OS. The OS deletes bad pages from phys_install.
1408 * Those deletes, if any, will be represented in the cached mlist.
1409 */
1410 if (s_mp->sbm_del_mlist && s_mp->sbm_del_mlist != s_mp->sbm_mlist)
1411 memlist_delete(s_mp->sbm_del_mlist);
1412
1413 if (s_mp->sbm_cm.sbdev_error && s_mp->sbm_mlist) {
1414 memlist_delete(s_mp->sbm_mlist);
1415 s_mp->sbm_mlist = NULL;
1416 }
1417
1418 if (s_mp->sbm_dyn_segs != NULL && s_mp->sbm_cm.sbdev_error == 0) {
1419 memlist_delete(s_mp->sbm_dyn_segs);
1420 s_mp->sbm_dyn_segs = NULL;
1421 }
1422
1423 s_mp->sbm_del_mlist = NULL;
1424 s_mp->sbm_peer = NULL;
1425 s_mp->sbm_flags = 0;
1426 s_mp->sbm_cm.sbdev_busy = 0;
1427 dr_init_mem_unit_data(s_mp);
1428
1429 PR_MEM("%s: cached memlist for %s:", f, s_mp->sbm_cm.sbdev_path);
1430 PR_MEMLIST_DUMP(s_mp->sbm_mlist);
1431
1432 return (0);
1433 }
1434
1435 /*
1436 * Successful return from this function will have the memory
1437 * handle in bp->b_dev[..mem-unit...].sbm_memhandle allocated
1438 * and waiting. This routine's job is to select the memory that
1439 * actually has to be released (detached) which may not necessarily
1440 * be the same memory node that came in in devlist[],
1441 * i.e. a copy-rename is needed.
1442 */
1443 int
dr_pre_release_mem(dr_handle_t * hp,dr_common_unit_t ** devlist,int devnum)1444 dr_pre_release_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
1445 {
1446 int d;
1447 int err_flag = 0;
1448 static fn_t f = "dr_pre_release_mem";
1449
1450 PR_MEM("%s...\n", f);
1451
1452 for (d = 0; d < devnum; d++) {
1453 dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d];
1454 int rv;
1455 memquery_t mq;
1456 struct memlist *ml;
1457
1458 if (mp->sbm_cm.sbdev_error) {
1459 err_flag = 1;
1460 continue;
1461 } else if (!kcage_on) {
1462 dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_KCAGE_OFF);
1463 err_flag = 1;
1464 continue;
1465 }
1466
1467 if (mp->sbm_flags & DR_MFLAG_RESERVED) {
1468 /*
1469 * Board is currently involved in a delete
1470 * memory operation. Can't detach this guy until
1471 * that operation completes.
1472 */
1473 dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_INVAL);
1474 err_flag = 1;
1475 break;
1476 }
1477
1478 /*
1479 * Check whether the detaching memory requires a
1480 * copy-rename.
1481 */
1482 ASSERT(mp->sbm_npages != 0);
1483 rv = kphysm_del_span_query(mp->sbm_basepfn, mp->sbm_npages,
1484 &mq);
1485 if (rv != KPHYSM_OK) {
1486 DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
1487 err_flag = 1;
1488 break;
1489 }
1490
1491 if (mq.nonrelocatable != 0) {
1492 if (!(dr_cmd_flags(hp) &
1493 (SBD_FLAG_FORCE | SBD_FLAG_QUIESCE_OKAY))) {
1494 /* caller wasn't prompted for a suspend */
1495 dr_dev_err(CE_WARN, &mp->sbm_cm,
1496 ESBD_QUIESCE_REQD);
1497 err_flag = 1;
1498 break;
1499 }
1500 }
1501
1502 /* flags should be clean at this time */
1503 ASSERT(mp->sbm_flags == 0);
1504
1505 ASSERT(mp->sbm_mlist == NULL); /* should be null */
1506 ASSERT(mp->sbm_del_mlist == NULL); /* should be null */
1507 if (mp->sbm_mlist != NULL) {
1508 memlist_delete(mp->sbm_mlist);
1509 mp->sbm_mlist = NULL;
1510 }
1511
1512 ml = dr_get_memlist(mp);
1513 if (ml == NULL) {
1514 err_flag = 1;
1515 PR_MEM("%s: no memlist found for %s\n",
1516 f, mp->sbm_cm.sbdev_path);
1517 continue;
1518 }
1519
1520 /* allocate a kphysm handle */
1521 rv = kphysm_del_gethandle(&mp->sbm_memhandle);
1522 if (rv != KPHYSM_OK) {
1523 memlist_delete(ml);
1524
1525 DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
1526 err_flag = 1;
1527 break;
1528 }
1529 mp->sbm_flags |= DR_MFLAG_RELOWNER;
1530
1531 if ((mq.nonrelocatable != 0) ||
1532 dr_reserve_mem_spans(&mp->sbm_memhandle, ml)) {
1533 /*
1534 * Either the detaching memory node contains
1535 * non-reloc memory or we failed to reserve the
1536 * detaching memory node (which did _not_ have
1537 * any non-reloc memory, i.e. some non-reloc mem
1538 * got onboard).
1539 */
1540
1541 if (dr_select_mem_target(hp, mp, ml)) {
1542 int rv;
1543
1544 /*
1545 * We had no luck locating a target
1546 * memory node to be the recipient of
1547 * the non-reloc memory on the node
1548 * we're trying to detach.
1549 * Clean up be disposing the mem handle
1550 * and the mem list.
1551 */
1552 rv = kphysm_del_release(mp->sbm_memhandle);
1553 if (rv != KPHYSM_OK) {
1554 /*
1555 * can do nothing but complain
1556 * and hope helpful for debug
1557 */
1558 cmn_err(CE_WARN, "%s: unexpected"
1559 " kphysm_del_release return"
1560 " value %d",
1561 f, rv);
1562 }
1563 mp->sbm_flags &= ~DR_MFLAG_RELOWNER;
1564
1565 memlist_delete(ml);
1566
1567 /* make sure sbm_flags is clean */
1568 ASSERT(mp->sbm_flags == 0);
1569
1570 dr_dev_err(CE_WARN, &mp->sbm_cm,
1571 ESBD_NO_TARGET);
1572
1573 err_flag = 1;
1574 break;
1575 }
1576
1577 /*
1578 * ml is not memlist_delete'd here because
1579 * it has been assigned to mp->sbm_mlist
1580 * by dr_select_mem_target.
1581 */
1582 } else {
1583 /* no target needed to detach this board */
1584 mp->sbm_flags |= DR_MFLAG_RESERVED;
1585 mp->sbm_peer = NULL;
1586 mp->sbm_del_mlist = ml;
1587 mp->sbm_mlist = ml;
1588 mp->sbm_cm.sbdev_busy = 1;
1589 }
1590 #ifdef DEBUG
1591 ASSERT(mp->sbm_mlist != NULL);
1592
1593 if (mp->sbm_flags & DR_MFLAG_SOURCE) {
1594 PR_MEM("%s: release of %s requires copy/rename;"
1595 " selected target board %s\n",
1596 f,
1597 mp->sbm_cm.sbdev_path,
1598 mp->sbm_peer->sbm_cm.sbdev_path);
1599 } else {
1600 PR_MEM("%s: copy/rename not required to release %s\n",
1601 f, mp->sbm_cm.sbdev_path);
1602 }
1603
1604 ASSERT(mp->sbm_flags & DR_MFLAG_RELOWNER);
1605 ASSERT(mp->sbm_flags & DR_MFLAG_RESERVED);
1606 #endif
1607 }
1608
1609 return (err_flag ? -1 : 0);
1610 }
1611
1612 void
dr_release_mem_done(dr_common_unit_t * cp)1613 dr_release_mem_done(dr_common_unit_t *cp)
1614 {
1615 dr_mem_unit_t *s_mp = (dr_mem_unit_t *)cp;
1616 dr_mem_unit_t *t_mp, *mp;
1617 int rv;
1618 static fn_t f = "dr_release_mem_done";
1619
1620 /*
1621 * This unit will be flagged with DR_MFLAG_SOURCE, if it
1622 * has a target unit.
1623 */
1624 if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
1625 t_mp = s_mp->sbm_peer;
1626 ASSERT(t_mp != NULL);
1627 ASSERT(t_mp->sbm_peer == s_mp);
1628 ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET);
1629 ASSERT(t_mp->sbm_flags & DR_MFLAG_RESERVED);
1630 } else {
1631 /* this is no target unit */
1632 t_mp = NULL;
1633 }
1634
1635 /* free delete handle */
1636 ASSERT(s_mp->sbm_flags & DR_MFLAG_RELOWNER);
1637 ASSERT(s_mp->sbm_flags & DR_MFLAG_RESERVED);
1638 rv = kphysm_del_release(s_mp->sbm_memhandle);
1639 if (rv != KPHYSM_OK) {
1640 /*
1641 * can do nothing but complain
1642 * and hope helpful for debug
1643 */
1644 cmn_err(CE_WARN, "%s: unexpected kphysm_del_release"
1645 " return value %d", f, rv);
1646 }
1647 s_mp->sbm_flags &= ~DR_MFLAG_RELOWNER;
1648
1649 /*
1650 * If an error was encountered during release, clean up
1651 * the source (and target, if present) unit data.
1652 */
1653 /* XXX Can we know that sbdev_error was encountered during release? */
1654 if (s_mp->sbm_cm.sbdev_error != NULL) {
1655 PR_MEM("%s: %s: error %d noted\n",
1656 f,
1657 s_mp->sbm_cm.sbdev_path,
1658 s_mp->sbm_cm.sbdev_error->e_code);
1659
1660 if (t_mp != NULL) {
1661 ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
1662 t_mp->sbm_del_mlist = NULL;
1663
1664 if (t_mp->sbm_mlist != NULL) {
1665 memlist_delete(t_mp->sbm_mlist);
1666 t_mp->sbm_mlist = NULL;
1667 }
1668
1669 t_mp->sbm_peer = NULL;
1670 t_mp->sbm_flags = 0;
1671 t_mp->sbm_cm.sbdev_busy = 0;
1672 }
1673
1674 if (s_mp->sbm_del_mlist != s_mp->sbm_mlist)
1675 memlist_delete(s_mp->sbm_del_mlist);
1676 s_mp->sbm_del_mlist = NULL;
1677
1678 if (s_mp->sbm_mlist != NULL) {
1679 memlist_delete(s_mp->sbm_mlist);
1680 s_mp->sbm_mlist = NULL;
1681 }
1682
1683 s_mp->sbm_peer = NULL;
1684 s_mp->sbm_flags = 0;
1685 s_mp->sbm_cm.sbdev_busy = 0;
1686
1687 /* bail out */
1688 return;
1689 }
1690
1691 DR_DEV_SET_RELEASED(&s_mp->sbm_cm);
1692 dr_device_transition(&s_mp->sbm_cm, DR_STATE_RELEASE);
1693
1694 if (t_mp != NULL) {
1695 /*
1696 * the kphysm delete operation that drained the source
1697 * board also drained this target board. Since the source
1698 * board drain is now known to have succeeded, we know this
1699 * target board is drained too.
1700 *
1701 * because DR_DEV_SET_RELEASED and dr_device_transition
1702 * is done here, the dr_release_dev_done should not
1703 * fail.
1704 */
1705 DR_DEV_SET_RELEASED(&t_mp->sbm_cm);
1706 dr_device_transition(&t_mp->sbm_cm, DR_STATE_RELEASE);
1707
1708 /*
1709 * NOTE: do not transition target's board state,
1710 * even if the mem-unit was the last configure
1711 * unit of the board. When copy/rename completes
1712 * this mem-unit will transitioned back to
1713 * the configured state. In the meantime, the
1714 * board's must remain as is.
1715 */
1716 }
1717
1718 /* if board(s) had deleted memory, verify it is gone */
1719 rv = 0;
1720 memlist_read_lock();
1721 if (s_mp->sbm_del_mlist != NULL) {
1722 mp = s_mp;
1723 rv = memlist_intersect(phys_install, mp->sbm_del_mlist);
1724 }
1725 if (rv == 0 && t_mp && t_mp->sbm_del_mlist != NULL) {
1726 mp = t_mp;
1727 rv = memlist_intersect(phys_install, mp->sbm_del_mlist);
1728 }
1729 memlist_read_unlock();
1730 if (rv) {
1731 cmn_err(CE_WARN, "%s: %smem-unit (%d.%d): "
1732 "deleted memory still found in phys_install",
1733 f,
1734 (mp == t_mp ? "target " : ""),
1735 mp->sbm_cm.sbdev_bp->b_num,
1736 mp->sbm_cm.sbdev_unum);
1737
1738 DR_DEV_INTERNAL_ERROR(&s_mp->sbm_cm);
1739 return;
1740 }
1741
1742 s_mp->sbm_flags |= DR_MFLAG_RELDONE;
1743 if (t_mp != NULL)
1744 t_mp->sbm_flags |= DR_MFLAG_RELDONE;
1745
1746 /* this should not fail */
1747 if (dr_release_dev_done(&s_mp->sbm_cm) != 0) {
1748 /* catch this in debug kernels */
1749 ASSERT(0);
1750 return;
1751 }
1752
1753 PR_MEM("%s: marking %s release DONE\n",
1754 f, s_mp->sbm_cm.sbdev_path);
1755
1756 s_mp->sbm_cm.sbdev_ostate = SBD_STAT_UNCONFIGURED;
1757
1758 if (t_mp != NULL) {
1759 /* should not fail */
1760 rv = dr_release_dev_done(&t_mp->sbm_cm);
1761 if (rv != 0) {
1762 /* catch this in debug kernels */
1763 ASSERT(0);
1764 return;
1765 }
1766
1767 PR_MEM("%s: marking %s release DONE\n",
1768 f, t_mp->sbm_cm.sbdev_path);
1769
1770 t_mp->sbm_cm.sbdev_ostate = SBD_STAT_UNCONFIGURED;
1771 }
1772 }
1773
1774 /*ARGSUSED*/
1775 int
dr_disconnect_mem(dr_mem_unit_t * mp)1776 dr_disconnect_mem(dr_mem_unit_t *mp)
1777 {
1778 static fn_t f = "dr_disconnect_mem";
1779 update_membounds_t umb;
1780
1781 #ifdef DEBUG
1782 int state = mp->sbm_cm.sbdev_state;
1783 ASSERT(state == DR_STATE_CONNECTED || state == DR_STATE_UNCONFIGURED);
1784 #endif
1785
1786 PR_MEM("%s...\n", f);
1787
1788 if (mp->sbm_del_mlist && mp->sbm_del_mlist != mp->sbm_mlist)
1789 memlist_delete(mp->sbm_del_mlist);
1790 mp->sbm_del_mlist = NULL;
1791
1792 if (mp->sbm_mlist) {
1793 memlist_delete(mp->sbm_mlist);
1794 mp->sbm_mlist = NULL;
1795 }
1796
1797 /*
1798 * Remove memory from lgroup
1799 * For now, only board info is required.
1800 */
1801 umb.u_board = mp->sbm_cm.sbdev_bp->b_num;
1802 umb.u_base = (uint64_t)-1;
1803 umb.u_len = (uint64_t)-1;
1804
1805 lgrp_plat_config(LGRP_CONFIG_MEM_DEL, (uintptr_t)&umb);
1806
1807 return (0);
1808 }
1809
1810 int
dr_cancel_mem(dr_mem_unit_t * s_mp)1811 dr_cancel_mem(dr_mem_unit_t *s_mp)
1812 {
1813 dr_mem_unit_t *t_mp;
1814 dr_state_t state;
1815 static fn_t f = "dr_cancel_mem";
1816
1817 state = s_mp->sbm_cm.sbdev_state;
1818
1819 if (s_mp->sbm_flags & DR_MFLAG_TARGET) {
1820 /* must cancel source board, not target board */
1821 /* TODO: set error */
1822 return (-1);
1823 } else if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
1824 t_mp = s_mp->sbm_peer;
1825 ASSERT(t_mp != NULL);
1826 ASSERT(t_mp->sbm_peer == s_mp);
1827
1828 /* must always match the source board's state */
1829 /* TODO: is this assertion correct? */
1830 ASSERT(t_mp->sbm_cm.sbdev_state == state);
1831 } else {
1832 /* this is no target unit */
1833 t_mp = NULL;
1834 }
1835
1836 switch (state) {
1837 case DR_STATE_UNREFERENCED: /* state set by dr_release_dev_done */
1838 ASSERT((s_mp->sbm_flags & DR_MFLAG_RELOWNER) == 0);
1839
1840 if (t_mp != NULL && t_mp->sbm_del_mlist != NULL) {
1841 PR_MEM("%s: undoing target %s memory delete\n",
1842 f, t_mp->sbm_cm.sbdev_path);
1843 dr_add_memory_spans(t_mp, t_mp->sbm_del_mlist);
1844
1845 DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm);
1846 }
1847
1848 if (s_mp->sbm_del_mlist != NULL) {
1849 PR_MEM("%s: undoing %s memory delete\n",
1850 f, s_mp->sbm_cm.sbdev_path);
1851
1852 dr_add_memory_spans(s_mp, s_mp->sbm_del_mlist);
1853 }
1854
1855 /*FALLTHROUGH*/
1856
1857 /* TODO: should no longer be possible to see the release state here */
1858 case DR_STATE_RELEASE: /* state set by dr_release_mem_done */
1859
1860 ASSERT((s_mp->sbm_flags & DR_MFLAG_RELOWNER) == 0);
1861
1862 if (t_mp != NULL) {
1863 ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
1864 t_mp->sbm_del_mlist = NULL;
1865
1866 if (t_mp->sbm_mlist != NULL) {
1867 memlist_delete(t_mp->sbm_mlist);
1868 t_mp->sbm_mlist = NULL;
1869 }
1870
1871 t_mp->sbm_peer = NULL;
1872 t_mp->sbm_flags = 0;
1873 t_mp->sbm_cm.sbdev_busy = 0;
1874 dr_init_mem_unit_data(t_mp);
1875
1876 DR_DEV_CLR_RELEASED(&t_mp->sbm_cm);
1877
1878 dr_device_transition(&t_mp->sbm_cm,
1879 DR_STATE_CONFIGURED);
1880 }
1881
1882 if (s_mp->sbm_del_mlist != s_mp->sbm_mlist)
1883 memlist_delete(s_mp->sbm_del_mlist);
1884 s_mp->sbm_del_mlist = NULL;
1885
1886 if (s_mp->sbm_mlist != NULL) {
1887 memlist_delete(s_mp->sbm_mlist);
1888 s_mp->sbm_mlist = NULL;
1889 }
1890
1891 s_mp->sbm_peer = NULL;
1892 s_mp->sbm_flags = 0;
1893 s_mp->sbm_cm.sbdev_busy = 0;
1894 dr_init_mem_unit_data(s_mp);
1895
1896 return (0);
1897
1898 default:
1899 PR_MEM("%s: WARNING unexpected state (%d) for %s\n",
1900 f, (int)state, s_mp->sbm_cm.sbdev_path);
1901
1902 return (-1);
1903 }
1904 /*NOTREACHED*/
1905 }
1906
1907 void
dr_init_mem_unit(dr_mem_unit_t * mp)1908 dr_init_mem_unit(dr_mem_unit_t *mp)
1909 {
1910 dr_state_t new_state;
1911
1912
1913 if (DR_DEV_IS_ATTACHED(&mp->sbm_cm)) {
1914 new_state = DR_STATE_CONFIGURED;
1915 mp->sbm_cm.sbdev_cond = SBD_COND_OK;
1916 } else if (DR_DEV_IS_PRESENT(&mp->sbm_cm)) {
1917 new_state = DR_STATE_CONNECTED;
1918 mp->sbm_cm.sbdev_cond = SBD_COND_OK;
1919 } else if (mp->sbm_cm.sbdev_id != (drmachid_t)0) {
1920 new_state = DR_STATE_OCCUPIED;
1921 } else {
1922 new_state = DR_STATE_EMPTY;
1923 }
1924
1925 if (DR_DEV_IS_PRESENT(&mp->sbm_cm))
1926 dr_init_mem_unit_data(mp);
1927
1928 /* delay transition until fully initialized */
1929 dr_device_transition(&mp->sbm_cm, new_state);
1930 }
1931
1932 static void
dr_init_mem_unit_data(dr_mem_unit_t * mp)1933 dr_init_mem_unit_data(dr_mem_unit_t *mp)
1934 {
1935 drmachid_t id = mp->sbm_cm.sbdev_id;
1936 uint64_t bytes;
1937 sbd_error_t *err;
1938 static fn_t f = "dr_init_mem_unit_data";
1939 update_membounds_t umb;
1940
1941 PR_MEM("%s...\n", f);
1942
1943 /* a little sanity checking */
1944 ASSERT(mp->sbm_peer == NULL);
1945 ASSERT(mp->sbm_flags == 0);
1946
1947 /* get basepfn of mem unit */
1948 err = drmach_mem_get_base_physaddr(id, &bytes);
1949 if (err) {
1950 DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
1951 mp->sbm_basepfn = (pfn_t)-1;
1952 } else
1953 mp->sbm_basepfn = _b64top(bytes);
1954
1955 /* attempt to get number of pages from PDA */
1956 err = drmach_mem_get_size(id, &bytes);
1957 if (err) {
1958 DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
1959 mp->sbm_npages = 0;
1960 } else
1961 mp->sbm_npages = _b64top(bytes);
1962
1963 /* if didn't work, calculate using memlist */
1964 if (mp->sbm_npages == 0) {
1965 struct memlist *ml, *mlist;
1966 /*
1967 * Either we couldn't open the PDA or our
1968 * PDA has garbage in it. We must have the
1969 * page count consistent and whatever the
1970 * OS states has precedence over the PDA
1971 * so let's check the kernel.
1972 */
1973 /* TODO: curious comment. it suggests pda query should happen if this fails */
1974 PR_MEM("%s: PDA query failed for npages."
1975 " Checking memlist for %s\n",
1976 f, mp->sbm_cm.sbdev_path);
1977
1978 mlist = dr_get_memlist(mp);
1979 for (ml = mlist; ml; ml = ml->ml_next)
1980 mp->sbm_npages += btop(ml->ml_size);
1981 memlist_delete(mlist);
1982 }
1983
1984 err = drmach_mem_get_alignment(id, &bytes);
1985 if (err) {
1986 DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
1987 mp->sbm_alignment_mask = 0;
1988 } else
1989 mp->sbm_alignment_mask = _b64top(bytes);
1990
1991 err = drmach_mem_get_slice_size(id, &bytes);
1992 if (err) {
1993 DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
1994 mp->sbm_slice_size = 0; /* paranoia */
1995 } else
1996 mp->sbm_slice_size = bytes;
1997
1998 /*
1999 * Add memory to lgroup
2000 */
2001 umb.u_board = mp->sbm_cm.sbdev_bp->b_num;
2002 umb.u_base = (uint64_t)mp->sbm_basepfn << MMU_PAGESHIFT;
2003 umb.u_len = (uint64_t)mp->sbm_npages << MMU_PAGESHIFT;
2004
2005 lgrp_plat_config(LGRP_CONFIG_MEM_ADD, (uintptr_t)&umb);
2006
2007 PR_MEM("%s: %s (basepfn = 0x%lx, npgs = %ld)\n",
2008 f, mp->sbm_cm.sbdev_path, mp->sbm_basepfn, mp->sbm_npages);
2009 }
2010
2011 static int
dr_reserve_mem_spans(memhandle_t * mhp,struct memlist * ml)2012 dr_reserve_mem_spans(memhandle_t *mhp, struct memlist *ml)
2013 {
2014 int err;
2015 pfn_t base;
2016 pgcnt_t npgs;
2017 struct memlist *mc;
2018 static fn_t f = "dr_reserve_mem_spans";
2019
2020 PR_MEM("%s...\n", f);
2021
2022 /*
2023 * Walk the supplied memlist scheduling each span for removal
2024 * with kphysm_del_span. It is possible that a span may intersect
2025 * an area occupied by the cage.
2026 */
2027 for (mc = ml; mc != NULL; mc = mc->ml_next) {
2028 base = _b64top(mc->ml_address);
2029 npgs = _b64top(mc->ml_size);
2030
2031 err = kphysm_del_span(*mhp, base, npgs);
2032 if (err != KPHYSM_OK) {
2033 cmn_err(CE_WARN, "%s memory reserve failed."
2034 " unexpected kphysm_del_span return value %d;"
2035 " basepfn=0x%lx npages=%ld",
2036 f, err, base, npgs);
2037
2038 return (-1);
2039 }
2040 }
2041
2042 return (0);
2043 }
2044
2045 /* debug counters */
2046 int dr_smt_realigned;
2047 int dr_smt_preference[4];
2048
2049 #ifdef DEBUG
2050 uint_t dr_ignore_board; /* if bit[bnum-1] set, board won't be candidate */
2051 #endif
2052
2053 /*
2054 * Find and reserve a copy/rename target board suitable for the
2055 * given source board.
2056 * All boards in the system are examined and categorized in relation to
2057 * their memory size versus the source board's memory size. Order of
2058 * preference is:
2059 * 1st: board has same memory size
2060 * 2nd: board has larger memory size
2061 * 3rd: board has smaller memory size
2062 * 4th: board has smaller memory size, available memory will be reduced.
2063 * Boards in category 3 and 4 will have their MC's reprogrammed to locate the
2064 * span to which the MC responds to address span that appropriately covers
2065 * the nonrelocatable span of the source board.
2066 */
2067 static int
dr_select_mem_target(dr_handle_t * hp,dr_mem_unit_t * s_mp,struct memlist * s_ml)2068 dr_select_mem_target(dr_handle_t *hp,
2069 dr_mem_unit_t *s_mp, struct memlist *s_ml)
2070 {
2071 pgcnt_t sz = _b64top(s_mp->sbm_slice_size);
2072 pgcnt_t sm = sz - 1; /* mem_slice_mask */
2073 pfn_t s_phi, t_phi;
2074
2075 int n_sets = 4; /* same, larger, smaller, clipped */
2076 int preference; /* lower value is higher preference */
2077 int n_units_per_set;
2078 int idx;
2079 dr_mem_unit_t **sets;
2080
2081 int t_bd;
2082 int t_unit;
2083 int rv;
2084 int allow_src_memrange_modify;
2085 int allow_targ_memrange_modify;
2086 drmachid_t t_id;
2087 dr_board_t *s_bp, *t_bp;
2088 dr_mem_unit_t *t_mp, *c_mp;
2089 struct memlist *d_ml, *t_ml, *x_ml;
2090 memquery_t s_mq = {0};
2091 static fn_t f = "dr_select_mem_target";
2092
2093 PR_MEM("%s...\n", f);
2094
2095 ASSERT(s_ml != NULL);
2096
2097 n_units_per_set = MAX_BOARDS * MAX_MEM_UNITS_PER_BOARD;
2098 sets = GETSTRUCT(dr_mem_unit_t *, n_units_per_set * n_sets);
2099
2100 s_bp = hp->h_bd;
2101 /* calculate the offset into the slice of the last source board pfn */
2102 ASSERT(s_mp->sbm_npages != 0);
2103 s_phi = (s_mp->sbm_basepfn + s_mp->sbm_npages - 1) & sm;
2104
2105 allow_src_memrange_modify = drmach_allow_memrange_modify(s_bp->b_id);
2106
2107 /*
2108 * Make one pass through all memory units on all boards
2109 * and categorize them with respect to the source board.
2110 */
2111 for (t_bd = 0; t_bd < MAX_BOARDS; t_bd++) {
2112 /*
2113 * The board structs are a contiguous array
2114 * so we take advantage of that to find the
2115 * correct board struct pointer for a given
2116 * board number.
2117 */
2118 t_bp = dr_lookup_board(t_bd);
2119
2120 /* source board can not be its own target */
2121 if (s_bp->b_num == t_bp->b_num)
2122 continue;
2123
2124 for (t_unit = 0; t_unit < MAX_MEM_UNITS_PER_BOARD; t_unit++) {
2125
2126 t_mp = dr_get_mem_unit(t_bp, t_unit);
2127
2128 /* this memory node must be attached */
2129 if (!DR_DEV_IS_ATTACHED(&t_mp->sbm_cm))
2130 continue;
2131
2132 /* source unit can not be its own target */
2133 if (s_mp == t_mp) {
2134 /* catch this is debug kernels */
2135 ASSERT(0);
2136 continue;
2137 }
2138
2139 /*
2140 * this memory node must not already be reserved
2141 * by some other memory delete operation.
2142 */
2143 if (t_mp->sbm_flags & DR_MFLAG_RESERVED)
2144 continue;
2145
2146 /*
2147 * categorize the memory node
2148 * If this is a smaller memory node, create a
2149 * temporary, edited copy of the source board's
2150 * memlist containing only the span of the non-
2151 * relocatable pages.
2152 */
2153 t_phi = (t_mp->sbm_basepfn + t_mp->sbm_npages - 1) & sm;
2154 t_id = t_mp->sbm_cm.sbdev_bp->b_id;
2155 allow_targ_memrange_modify =
2156 drmach_allow_memrange_modify(t_id);
2157 if (t_mp->sbm_npages == s_mp->sbm_npages &&
2158 t_phi == s_phi) {
2159 preference = 0;
2160 t_mp->sbm_slice_offset = 0;
2161 } else if (t_mp->sbm_npages > s_mp->sbm_npages &&
2162 t_phi > s_phi) {
2163 /*
2164 * Selecting this target will require modifying
2165 * the source and/or target physical address
2166 * ranges. Skip if not supported by platform.
2167 */
2168 if (!allow_src_memrange_modify ||
2169 !allow_targ_memrange_modify) {
2170 PR_MEM("%s: skip target %s, memory "
2171 "range relocation not supported "
2172 "by platform\n", f,
2173 t_mp->sbm_cm.sbdev_path);
2174 continue;
2175 }
2176 preference = 1;
2177 t_mp->sbm_slice_offset = 0;
2178 } else {
2179 pfn_t pfn = 0;
2180
2181 /*
2182 * Selecting this target will require modifying
2183 * the source and/or target physical address
2184 * ranges. Skip if not supported by platform.
2185 */
2186 if (!allow_src_memrange_modify ||
2187 !allow_targ_memrange_modify) {
2188 PR_MEM("%s: skip target %s, memory "
2189 "range relocation not supported "
2190 "by platform\n", f,
2191 t_mp->sbm_cm.sbdev_path);
2192 continue;
2193 }
2194
2195 /*
2196 * Check if its mc can be programmed to relocate
2197 * the active address range to match the
2198 * nonrelocatable span of the source board.
2199 */
2200 preference = 2;
2201
2202 if (s_mq.phys_pages == 0) {
2203 /*
2204 * find non-relocatable span on
2205 * source board.
2206 */
2207 rv = kphysm_del_span_query(
2208 s_mp->sbm_basepfn,
2209 s_mp->sbm_npages, &s_mq);
2210 if (rv != KPHYSM_OK) {
2211 PR_MEM("%s: %s: unexpected"
2212 " kphysm_del_span_query"
2213 " return value %d;"
2214 " basepfn 0x%lx,"
2215 " npages %ld\n",
2216 f,
2217 s_mp->sbm_cm.sbdev_path,
2218 rv,
2219 s_mp->sbm_basepfn,
2220 s_mp->sbm_npages);
2221
2222 /* paranoia */
2223 s_mq.phys_pages = 0;
2224
2225 continue;
2226 }
2227
2228 /* more paranoia */
2229 ASSERT(s_mq.phys_pages != 0);
2230 ASSERT(s_mq.nonrelocatable != 0);
2231
2232 /*
2233 * this should not happen
2234 * if it does, it simply means that
2235 * we can not proceed with qualifying
2236 * this target candidate.
2237 */
2238 if (s_mq.nonrelocatable == 0)
2239 continue;
2240
2241 PR_MEM("%s: %s: nonrelocatable"
2242 " span (0x%lx..0x%lx)\n",
2243 f,
2244 s_mp->sbm_cm.sbdev_path,
2245 s_mq.first_nonrelocatable,
2246 s_mq.last_nonrelocatable);
2247 }
2248
2249 /*
2250 * Round down the starting pfn of the
2251 * nonrelocatable span on the source board
2252 * to nearest programmable boundary possible
2253 * with this target candidate.
2254 */
2255 pfn = s_mq.first_nonrelocatable &
2256 ~t_mp->sbm_alignment_mask;
2257
2258 /* skip candidate if memory is too small */
2259 if (pfn + t_mp->sbm_npages <
2260 s_mq.last_nonrelocatable)
2261 continue;
2262
2263 /*
2264 * reprogramming an mc to relocate its
2265 * active address range means the beginning
2266 * address to which the DIMMS respond will
2267 * be somewhere above the slice boundary
2268 * address. The larger the size of memory
2269 * on this unit, the more likely part of it
2270 * will exist beyond the end of the slice.
2271 * The portion of the memory that does is
2272 * unavailable to the system until the mc
2273 * reprogrammed to a more favorable base
2274 * address.
2275 * An attempt is made to avoid the loss by
2276 * recalculating the mc base address relative
2277 * to the end of the slice. This may produce
2278 * a more favorable result. If not, we lower
2279 * the board's preference rating so that it
2280 * is one the last candidate boards to be
2281 * considered.
2282 */
2283 if ((pfn + t_mp->sbm_npages) & ~sm) {
2284 pfn_t p;
2285
2286 ASSERT(sz >= t_mp->sbm_npages);
2287
2288 /*
2289 * calculate an alternative starting
2290 * address relative to the end of the
2291 * slice's address space.
2292 */
2293 p = pfn & ~sm;
2294 p = p + (sz - t_mp->sbm_npages);
2295 p = p & ~t_mp->sbm_alignment_mask;
2296
2297 if ((p > s_mq.first_nonrelocatable) ||
2298 (p + t_mp->sbm_npages <
2299 s_mq.last_nonrelocatable)) {
2300
2301 /*
2302 * alternative starting addr
2303 * won't work. Lower preference
2304 * rating of this board, since
2305 * some number of pages will
2306 * unavailable for use.
2307 */
2308 preference = 3;
2309 } else {
2310 dr_smt_realigned++;
2311 pfn = p;
2312 }
2313 }
2314
2315 /*
2316 * translate calculated pfn to an offset
2317 * relative to the slice boundary. If the
2318 * candidate board is selected, this offset
2319 * will be used to calculate the values
2320 * programmed into the mc.
2321 */
2322 t_mp->sbm_slice_offset = pfn & sm;
2323 PR_MEM("%s: %s:"
2324 " proposed mc offset 0x%lx\n",
2325 f,
2326 t_mp->sbm_cm.sbdev_path,
2327 t_mp->sbm_slice_offset);
2328 }
2329
2330 dr_smt_preference[preference]++;
2331
2332 /* calculate index to start of preference set */
2333 idx = n_units_per_set * preference;
2334 /* calculate offset to respective element */
2335 idx += t_bd * MAX_MEM_UNITS_PER_BOARD + t_unit;
2336
2337 ASSERT(idx < n_units_per_set * n_sets);
2338 sets[idx] = t_mp;
2339 }
2340 }
2341
2342 /*
2343 * NOTE: this would be a good place to sort each candidate
2344 * set in to some desired order, e.g. memory size in ascending
2345 * order. Without an additional sorting step here, the order
2346 * within a set is ascending board number order.
2347 */
2348
2349 c_mp = NULL;
2350 x_ml = NULL;
2351 t_ml = NULL;
2352 for (idx = 0; idx < n_units_per_set * n_sets; idx++) {
2353 memquery_t mq;
2354
2355 /* cleanup t_ml after previous pass */
2356 if (t_ml != NULL) {
2357 memlist_delete(t_ml);
2358 t_ml = NULL;
2359 }
2360
2361 /* get candidate target board mem unit */
2362 t_mp = sets[idx];
2363 if (t_mp == NULL)
2364 continue;
2365
2366 /* get target board memlist */
2367 t_ml = dr_get_memlist(t_mp);
2368 if (t_ml == NULL) {
2369 cmn_err(CE_WARN, "%s: no memlist for"
2370 " mem-unit %d, board %d",
2371 f,
2372 t_mp->sbm_cm.sbdev_bp->b_num,
2373 t_mp->sbm_cm.sbdev_unum);
2374
2375 continue;
2376 }
2377
2378 /* get appropriate source board memlist */
2379 t_phi = (t_mp->sbm_basepfn + t_mp->sbm_npages - 1) & sm;
2380 if (t_mp->sbm_npages < s_mp->sbm_npages || t_phi < s_phi) {
2381 spgcnt_t excess;
2382
2383 /*
2384 * make a copy of the source board memlist
2385 * then edit it to remove the spans that
2386 * are outside the calculated span of
2387 * [pfn..s_mq.last_nonrelocatable].
2388 */
2389 if (x_ml != NULL)
2390 memlist_delete(x_ml);
2391
2392 x_ml = memlist_dup(s_ml);
2393 if (x_ml == NULL) {
2394 PR_MEM("%s: memlist_dup failed\n", f);
2395 /* TODO: should abort */
2396 continue;
2397 }
2398
2399 /* trim off lower portion */
2400 excess = t_mp->sbm_slice_offset -
2401 (s_mp->sbm_basepfn & sm);
2402
2403 if (excess > 0) {
2404 x_ml = memlist_del_span(
2405 x_ml,
2406 _ptob64(s_mp->sbm_basepfn),
2407 _ptob64(excess));
2408 }
2409 ASSERT(x_ml);
2410
2411 /*
2412 * Since this candidate target board is smaller
2413 * than the source board, s_mq must have been
2414 * initialized in previous loop while processing
2415 * this or some other candidate board.
2416 * FIXME: this is weak.
2417 */
2418 ASSERT(s_mq.phys_pages != 0);
2419
2420 /* trim off upper portion */
2421 excess = (s_mp->sbm_basepfn + s_mp->sbm_npages)
2422 - (s_mq.last_nonrelocatable + 1);
2423 if (excess > 0) {
2424 pfn_t p;
2425
2426 p = s_mq.last_nonrelocatable + 1;
2427 x_ml = memlist_del_span(
2428 x_ml,
2429 _ptob64(p),
2430 _ptob64(excess));
2431 }
2432
2433 PR_MEM("%s: %s: edited source memlist:\n",
2434 f, s_mp->sbm_cm.sbdev_path);
2435 PR_MEMLIST_DUMP(x_ml);
2436
2437 #ifdef DEBUG
2438 /* sanity check memlist */
2439 d_ml = x_ml;
2440 while (d_ml->ml_next != NULL)
2441 d_ml = d_ml->ml_next;
2442
2443 ASSERT(d_ml->ml_address + d_ml->ml_size ==
2444 _ptob64(s_mq.last_nonrelocatable + 1));
2445 #endif
2446
2447 /*
2448 * x_ml now describes only the portion of the
2449 * source board that will be moved during the
2450 * copy/rename operation.
2451 */
2452 d_ml = x_ml;
2453 } else {
2454 /* use original memlist; all spans will be moved */
2455 d_ml = s_ml;
2456 }
2457
2458 /* verify target can support source memory spans. */
2459 if (memlist_canfit(d_ml, t_ml) == 0) {
2460 PR_MEM("%s: source memlist won't"
2461 " fit in target memlist\n", f);
2462 PR_MEM("%s: source memlist:\n", f);
2463 PR_MEMLIST_DUMP(d_ml);
2464 PR_MEM("%s: target memlist:\n", f);
2465 PR_MEMLIST_DUMP(t_ml);
2466
2467 continue;
2468 }
2469
2470 /* NOTE: the value of d_ml is not used beyond this point */
2471
2472 PR_MEM("%s: checking for no-reloc in %s, "
2473 " basepfn=0x%lx, npages=%ld\n",
2474 f,
2475 t_mp->sbm_cm.sbdev_path,
2476 t_mp->sbm_basepfn,
2477 t_mp->sbm_npages);
2478
2479 rv = kphysm_del_span_query(
2480 t_mp->sbm_basepfn, t_mp->sbm_npages, &mq);
2481 if (rv != KPHYSM_OK) {
2482 PR_MEM("%s: kphysm_del_span_query:"
2483 " unexpected return value %d\n", f, rv);
2484
2485 continue;
2486 }
2487
2488 if (mq.nonrelocatable != 0) {
2489 PR_MEM("%s: candidate %s has"
2490 " nonrelocatable span [0x%lx..0x%lx]\n",
2491 f,
2492 t_mp->sbm_cm.sbdev_path,
2493 mq.first_nonrelocatable,
2494 mq.last_nonrelocatable);
2495
2496 continue;
2497 }
2498
2499 #ifdef DEBUG
2500 /*
2501 * This is a debug tool for excluding certain boards
2502 * from being selected as a target board candidate.
2503 * dr_ignore_board is only tested by this driver.
2504 * It must be set with adb, obp, /etc/system or your
2505 * favorite debugger.
2506 */
2507 if (dr_ignore_board &
2508 (1 << (t_mp->sbm_cm.sbdev_bp->b_num - 1))) {
2509 PR_MEM("%s: dr_ignore_board flag set,"
2510 " ignoring %s as candidate\n",
2511 f, t_mp->sbm_cm.sbdev_path);
2512 continue;
2513 }
2514 #endif
2515
2516 /*
2517 * Reserve excess source board memory, if any.
2518 *
2519 * When the number of pages on the candidate target
2520 * board is less than the number of pages on the source,
2521 * then some spans (clearly) of the source board's address
2522 * space will not be covered by physical memory after the
2523 * copy/rename completes. The following code block
2524 * schedules those spans to be deleted.
2525 */
2526 if (t_mp->sbm_npages < s_mp->sbm_npages || t_phi < s_phi) {
2527 pfn_t pfn;
2528 uint64_t s_del_pa;
2529 struct memlist *ml;
2530
2531 d_ml = memlist_dup(s_ml);
2532 if (d_ml == NULL) {
2533 PR_MEM("%s: cant dup src brd memlist\n", f);
2534 /* TODO: should abort */
2535 continue;
2536 }
2537
2538 /* calculate base pfn relative to target board */
2539 pfn = s_mp->sbm_basepfn & ~sm;
2540 pfn += t_mp->sbm_slice_offset;
2541
2542 /*
2543 * cannot split dynamically added segment
2544 */
2545 s_del_pa = _ptob64(pfn + t_mp->sbm_npages);
2546 PR_MEM("%s: proposed src delete pa=0x%lx\n", f,
2547 s_del_pa);
2548 PR_MEM("%s: checking for split of dyn seg list:\n", f);
2549 PR_MEMLIST_DUMP(s_mp->sbm_dyn_segs);
2550 for (ml = s_mp->sbm_dyn_segs; ml; ml = ml->ml_next) {
2551 if (s_del_pa > ml->ml_address &&
2552 s_del_pa < ml->ml_address + ml->ml_size) {
2553 s_del_pa = ml->ml_address;
2554 break;
2555 }
2556 }
2557
2558 /* remove span that will reside on candidate board */
2559 d_ml = memlist_del_span(d_ml, _ptob64(pfn),
2560 s_del_pa - _ptob64(pfn));
2561
2562 PR_MEM("%s: %s: reserving src brd memlist:\n",
2563 f, s_mp->sbm_cm.sbdev_path);
2564 PR_MEMLIST_DUMP(d_ml);
2565
2566 /* reserve excess spans */
2567 if (dr_reserve_mem_spans(&s_mp->sbm_memhandle, d_ml)
2568 != 0) {
2569
2570 /* likely more non-reloc pages appeared */
2571 /* TODO: restart from top? */
2572 continue;
2573 }
2574 } else {
2575 /* no excess source board memory */
2576 d_ml = NULL;
2577 }
2578
2579 s_mp->sbm_flags |= DR_MFLAG_RESERVED;
2580
2581 /*
2582 * reserve all memory on target board.
2583 * NOTE: source board's memhandle is used.
2584 *
2585 * If this succeeds (eq 0), then target selection is
2586 * complete and all unwanted memory spans, both source and
2587 * target, have been reserved. Loop is terminated.
2588 */
2589 if (dr_reserve_mem_spans(&s_mp->sbm_memhandle, t_ml) == 0) {
2590 PR_MEM("%s: %s: target board memory reserved\n",
2591 f, t_mp->sbm_cm.sbdev_path);
2592
2593 /* a candidate target board is now reserved */
2594 t_mp->sbm_flags |= DR_MFLAG_RESERVED;
2595 c_mp = t_mp;
2596
2597 /* *** EXITING LOOP *** */
2598 break;
2599 }
2600
2601 /* did not successfully reserve the target board. */
2602 PR_MEM("%s: could not reserve target %s\n",
2603 f, t_mp->sbm_cm.sbdev_path);
2604
2605 /*
2606 * NOTE: an undo of the dr_reserve_mem_span work
2607 * will happen automatically when the memhandle
2608 * (s_mp->sbm_memhandle) is kphysm_del_release'd.
2609 */
2610
2611 s_mp->sbm_flags &= ~DR_MFLAG_RESERVED;
2612 }
2613
2614 /* clean up after memlist editing logic */
2615 if (x_ml != NULL)
2616 memlist_delete(x_ml);
2617
2618 FREESTRUCT(sets, dr_mem_unit_t *, n_units_per_set * n_sets);
2619
2620 /*
2621 * c_mp will be NULL when the entire sets[] array
2622 * has been searched without reserving a target board.
2623 */
2624 if (c_mp == NULL) {
2625 PR_MEM("%s: %s: target selection failed.\n",
2626 f, s_mp->sbm_cm.sbdev_path);
2627
2628 if (t_ml != NULL)
2629 memlist_delete(t_ml);
2630
2631 return (-1);
2632 }
2633
2634 PR_MEM("%s: found target %s for source %s\n",
2635 f,
2636 c_mp->sbm_cm.sbdev_path,
2637 s_mp->sbm_cm.sbdev_path);
2638
2639 s_mp->sbm_peer = c_mp;
2640 s_mp->sbm_flags |= DR_MFLAG_SOURCE;
2641 s_mp->sbm_del_mlist = d_ml; /* spans to be deleted, if any */
2642 s_mp->sbm_mlist = s_ml;
2643 s_mp->sbm_cm.sbdev_busy = 1;
2644
2645 c_mp->sbm_peer = s_mp;
2646 c_mp->sbm_flags |= DR_MFLAG_TARGET;
2647 c_mp->sbm_del_mlist = t_ml; /* spans to be deleted */
2648 c_mp->sbm_mlist = t_ml;
2649 c_mp->sbm_cm.sbdev_busy = 1;
2650
2651 s_mp->sbm_flags &= ~DR_MFLAG_MEMRESIZE;
2652 if (c_mp->sbm_npages > s_mp->sbm_npages) {
2653 s_mp->sbm_flags |= DR_MFLAG_MEMUPSIZE;
2654 PR_MEM("%s: upsize detected (source=%ld < target=%ld)\n",
2655 f, s_mp->sbm_npages, c_mp->sbm_npages);
2656 } else if (c_mp->sbm_npages < s_mp->sbm_npages) {
2657 s_mp->sbm_flags |= DR_MFLAG_MEMDOWNSIZE;
2658 PR_MEM("%s: downsize detected (source=%ld > target=%ld)\n",
2659 f, s_mp->sbm_npages, c_mp->sbm_npages);
2660 }
2661
2662 return (0);
2663 }
2664
2665 /*
2666 * Memlist support.
2667 */
2668
2669 /*
2670 * Determine whether the source memlist (s_mlist) will
2671 * fit into the target memlist (t_mlist) in terms of
2672 * size and holes (i.e. based on same relative base address).
2673 */
2674 static int
memlist_canfit(struct memlist * s_mlist,struct memlist * t_mlist)2675 memlist_canfit(struct memlist *s_mlist, struct memlist *t_mlist)
2676 {
2677 int rv = 0;
2678 uint64_t s_basepa, t_basepa;
2679 struct memlist *s_ml, *t_ml;
2680
2681 if ((s_mlist == NULL) || (t_mlist == NULL))
2682 return (0);
2683
2684 /*
2685 * Base both memlists on common base address (0).
2686 */
2687 s_basepa = s_mlist->ml_address;
2688 t_basepa = t_mlist->ml_address;
2689
2690 for (s_ml = s_mlist; s_ml; s_ml = s_ml->ml_next)
2691 s_ml->ml_address -= s_basepa;
2692
2693 for (t_ml = t_mlist; t_ml; t_ml = t_ml->ml_next)
2694 t_ml->ml_address -= t_basepa;
2695
2696 s_ml = s_mlist;
2697 for (t_ml = t_mlist; t_ml && s_ml; t_ml = t_ml->ml_next) {
2698 uint64_t s_start, s_end;
2699 uint64_t t_start, t_end;
2700
2701 t_start = t_ml->ml_address;
2702 t_end = t_start + t_ml->ml_size;
2703
2704 for (; s_ml; s_ml = s_ml->ml_next) {
2705 s_start = s_ml->ml_address;
2706 s_end = s_start + s_ml->ml_size;
2707
2708 if ((s_start < t_start) || (s_end > t_end))
2709 break;
2710 }
2711 }
2712 /*
2713 * If we ran out of source memlist chunks that mean
2714 * we found a home for all of them.
2715 */
2716 if (s_ml == NULL)
2717 rv = 1;
2718
2719 /*
2720 * Need to add base addresses back since memlists
2721 * are probably in use by caller.
2722 */
2723 for (s_ml = s_mlist; s_ml; s_ml = s_ml->ml_next)
2724 s_ml->ml_address += s_basepa;
2725
2726 for (t_ml = t_mlist; t_ml; t_ml = t_ml->ml_next)
2727 t_ml->ml_address += t_basepa;
2728
2729 return (rv);
2730 }
2731