/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License, Version 1.0 only * (the "License"). You may not use this file except in compliance * with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2004 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" /* * DR memory support routines. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern struct memlist *phys_install; /* TODO: push this reference below drmach line */ extern int kcage_on; /* for the DR*INTERNAL_ERROR macros. see sys/dr.h. */ static char *dr_ie_fmt = "%M% %d"; static int dr_post_detach_mem_unit(dr_mem_unit_t *mp); static int dr_reserve_mem_spans(memhandle_t *mhp, struct memlist *mlist); static int dr_select_mem_target(dr_handle_t *hp, dr_mem_unit_t *mp, struct memlist *ml); static void dr_init_mem_unit_data(dr_mem_unit_t *mp); static struct memlist *memlist_dup(struct memlist *); static int memlist_canfit(struct memlist *s_mlist, struct memlist *t_mlist); static struct memlist *memlist_del_span(struct memlist *mlist, uint64_t base, uint64_t len); static struct memlist *memlist_cat_span(struct memlist *mlist, uint64_t base, uint64_t len); extern void page_unretire_pages(void); /* * dr_mem_unit_t.sbm_flags */ #define DR_MFLAG_RESERVED 0x01 /* mem unit reserved for delete */ #define DR_MFLAG_SOURCE 0x02 /* source brd of copy/rename op */ #define DR_MFLAG_TARGET 0x04 /* target brd of copy/rename op */ #define DR_MFLAG_MEMUPSIZE 0x08 /* move from big to small board */ #define DR_MFLAG_MEMDOWNSIZE 0x10 /* move from small to big board */ #define DR_MFLAG_MEMRESIZE 0x18 /* move to different size board */ #define DR_MFLAG_RELOWNER 0x20 /* memory release (delete) owner */ #define DR_MFLAG_RELDONE 0x40 /* memory release (delete) done */ /* helper macros */ #define _ptob64(p) ((uint64_t)(p) << PAGESHIFT) #define _b64top(b) ((pgcnt_t)((b) >> PAGESHIFT)) static struct memlist * dr_get_memlist(dr_mem_unit_t *mp) { struct memlist *mlist = NULL; sbd_error_t *err; static fn_t f = "dr_get_memlist"; PR_MEM("%s for %s...\n", f, mp->sbm_cm.sbdev_path); /* * Return cached memlist, if present. * This memlist will be present following an * unconfigure (a.k.a: detach) of this memunit. * It should only be used in the case were a configure * is bringing this memunit back in without going * through the disconnect and connect states. */ if (mp->sbm_mlist) { PR_MEM("%s: found cached memlist\n", f); mlist = memlist_dup(mp->sbm_mlist); } else { uint64_t basepa = _ptob64(mp->sbm_basepfn); /* attempt to construct a memlist using phys_install */ /* round down to slice base address */ basepa &= ~(mp->sbm_slice_size - 1); /* get a copy of phys_install to edit */ memlist_read_lock(); mlist = memlist_dup(phys_install); memlist_read_unlock(); /* trim lower irrelevant span */ if (mlist) mlist = memlist_del_span(mlist, 0ull, basepa); /* trim upper irrelevant span */ if (mlist) { uint64_t endpa; basepa += mp->sbm_slice_size; endpa = _ptob64(physmax + 1); if (endpa > basepa) mlist = memlist_del_span( mlist, basepa, endpa - basepa); } if (mlist) { /* successfully built a memlist */ PR_MEM("%s: derived memlist from phys_install\n", f); } /* if no mlist yet, try platform layer */ if (!mlist) { err = drmach_mem_get_memlist( mp->sbm_cm.sbdev_id, &mlist); if (err) { DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err); mlist = NULL; /* paranoia */ } } } PR_MEM("%s: memlist for %s\n", f, mp->sbm_cm.sbdev_path); PR_MEMLIST_DUMP(mlist); return (mlist); } typedef struct { kcondvar_t cond; kmutex_t lock; int error; int done; } dr_release_mem_sync_t; /* * Memory has been logically removed by the time this routine is called. */ static void dr_mem_del_done(void *arg, int error) { dr_release_mem_sync_t *ds = arg; mutex_enter(&ds->lock); ds->error = error; ds->done = 1; cv_signal(&ds->cond); mutex_exit(&ds->lock); } /* * When we reach here the memory being drained should have * already been reserved in dr_pre_release_mem(). * Our only task here is to kick off the "drain" and wait * for it to finish. */ void dr_release_mem(dr_common_unit_t *cp) { dr_mem_unit_t *mp = (dr_mem_unit_t *)cp; int err; dr_release_mem_sync_t rms; static fn_t f = "dr_release_mem"; /* check that this memory unit has been reserved */ if (!(mp->sbm_flags & DR_MFLAG_RELOWNER)) { DR_DEV_INTERNAL_ERROR(&mp->sbm_cm); return; } bzero((void *) &rms, sizeof (rms)); mutex_init(&rms.lock, NULL, MUTEX_DRIVER, NULL); cv_init(&rms.cond, NULL, CV_DRIVER, NULL); mutex_enter(&rms.lock); err = kphysm_del_start(mp->sbm_memhandle, dr_mem_del_done, (void *) &rms); if (err == KPHYSM_OK) { /* wait for completion or interrupt */ while (!rms.done) { if (cv_wait_sig(&rms.cond, &rms.lock) == 0) { /* then there is a pending UNIX signal */ (void) kphysm_del_cancel(mp->sbm_memhandle); /* wait for completion */ while (!rms.done) cv_wait(&rms.cond, &rms.lock); } } /* get the result of the memory delete operation */ err = rms.error; } mutex_exit(&rms.lock); cv_destroy(&rms.cond); mutex_destroy(&rms.lock); if (err != KPHYSM_OK) { int e_code; switch (err) { case KPHYSM_ENOWORK: e_code = ESBD_NOERROR; break; case KPHYSM_EHANDLE: case KPHYSM_ESEQUENCE: e_code = ESBD_INTERNAL; break; case KPHYSM_ENOTVIABLE: e_code = ESBD_MEM_NOTVIABLE; break; case KPHYSM_EREFUSED: e_code = ESBD_MEM_REFUSED; break; case KPHYSM_ENONRELOC: e_code = ESBD_MEM_NONRELOC; break; case KPHYSM_ECANCELLED: e_code = ESBD_MEM_CANCELLED; break; case KPHYSM_ERESOURCE: e_code = ESBD_MEMFAIL; break; default: cmn_err(CE_WARN, "%s: unexpected kphysm error code %d," " id 0x%p", f, err, mp->sbm_cm.sbdev_id); e_code = ESBD_IO; break; } if (e_code != ESBD_NOERROR) { dr_dev_err(CE_IGNORE, &mp->sbm_cm, e_code); } } } void dr_attach_mem(dr_handle_t *hp, dr_common_unit_t *cp) { _NOTE(ARGUNUSED(hp)) dr_mem_unit_t *mp = (dr_mem_unit_t *)cp; struct memlist *ml, *mc; sbd_error_t *err; static fn_t f = "dr_attach_mem"; PR_MEM("%s...\n", f); dr_lock_status(hp->h_bd); err = drmach_configure(cp->sbdev_id, 0); dr_unlock_status(hp->h_bd); if (err) { DRERR_SET_C(&cp->sbdev_error, &err); return; } ml = dr_get_memlist(mp); for (mc = ml; mc; mc = mc->next) { int rv; sbd_error_t *err; rv = kphysm_add_memory_dynamic( (pfn_t)(mc->address >> PAGESHIFT), (pgcnt_t)(mc->size >> PAGESHIFT)); if (rv != KPHYSM_OK) { /* * translate kphysm error and * store in devlist error */ switch (rv) { case KPHYSM_ERESOURCE: rv = ESBD_NOMEM; break; case KPHYSM_EFAULT: rv = ESBD_FAULT; break; default: rv = ESBD_INTERNAL; break; } if (rv == ESBD_INTERNAL) { DR_DEV_INTERNAL_ERROR(&mp->sbm_cm); } else dr_dev_err(CE_WARN, &mp->sbm_cm, rv); break; } err = drmach_mem_add_span( mp->sbm_cm.sbdev_id, mc->address, mc->size); if (err) { DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err); break; } } memlist_delete(ml); /* back out if configure failed */ if (mp->sbm_cm.sbdev_error != NULL) { dr_lock_status(hp->h_bd); err = drmach_unconfigure(cp->sbdev_id, DRMACH_DEVI_REMOVE); if (err) sbd_err_clear(&err); dr_unlock_status(hp->h_bd); } } #define DR_SCRUB_VALUE 0x0d0e0a0d0b0e0e0fULL static void dr_mem_ecache_scrub(dr_mem_unit_t *mp, struct memlist *mlist) { #ifdef DEBUG clock_t stime = lbolt; #endif /* DEBUG */ struct memlist *ml; uint64_t scrub_value = DR_SCRUB_VALUE; processorid_t cpuid; static fn_t f = "dr_mem_ecache_scrub"; cpuid = drmach_mem_cpu_affinity(mp->sbm_cm.sbdev_id); affinity_set(cpuid); PR_MEM("%s: using proc %d, memlist...\n", f, (cpuid == CPU_CURRENT) ? CPU->cpu_id : cpuid); PR_MEMLIST_DUMP(mlist); for (ml = mlist; ml; ml = ml->next) { uint64_t dst_pa; uint64_t nbytes; /* calculate the destination physical address */ dst_pa = ml->address; if (ml->address & PAGEOFFSET) cmn_err(CE_WARN, "%s: address (0x%llx) not on " "page boundary", f, ml->address); nbytes = ml->size; if (ml->size & PAGEOFFSET) cmn_err(CE_WARN, "%s: size (0x%llx) not on " "page boundary", f, ml->size); /*LINTED*/ while (nbytes > 0) { /* write 64 bits to dst_pa */ stdphys(dst_pa, scrub_value); /* increment/decrement by cacheline sizes */ dst_pa += DRMACH_COHERENCY_UNIT; nbytes -= DRMACH_COHERENCY_UNIT; } } /* * flush this cpu's ecache and take care to ensure * that all of it's bus transactions have retired. */ drmach_cpu_flush_ecache_sync(); affinity_clear(); #ifdef DEBUG stime = lbolt - stime; PR_MEM("%s: scrub ticks = %ld (%ld secs)\n", f, stime, stime / hz); #endif /* DEBUG */ } /* * This function marks as clean, all the faulty pages that belong to the * board that is copy-renamed since they are not likely to be bad pages * after the rename. This includes the retired pages on the board. */ static void dr_memlist_clrpages(struct memlist *r_ml) { struct memlist *t_ml; page_t *pp, *epp; pfn_t pfn, epfn; struct memseg *seg; if (r_ml == NULL) return; for (t_ml = r_ml; (t_ml != NULL); t_ml = t_ml->next) { pfn = _b64top(t_ml->address); epfn = _b64top(t_ml->address + t_ml->size); for (seg = memsegs; seg != NULL; seg = seg->next) { if (pfn >= seg->pages_end || epfn < seg->pages_base) continue; pp = seg->pages; if (pfn > seg->pages_base) pp += pfn - seg->pages_base; epp = seg->epages; if (epfn < seg->pages_end) epp -= seg->pages_end - epfn; ASSERT(pp < epp); while (pp < epp) { if (page_isfaulty((page_t *)pp)) page_clrtoxic_flag((page_t *)pp, PAGE_IS_FAULTY); pp++; } } } } static int dr_move_memory(dr_handle_t *hp, dr_mem_unit_t *s_mp, dr_mem_unit_t *t_mp) { time_t copytime; drmachid_t cr_id; dr_sr_handle_t *srhp; struct memlist *c_ml, *d_ml, *r_ml; sbd_error_t *err; static fn_t f = "dr_move_memory"; PR_MEM("%s: (INLINE) moving memory from %s to %s\n", f, s_mp->sbm_cm.sbdev_path, t_mp->sbm_cm.sbdev_path); ASSERT(s_mp->sbm_flags & DR_MFLAG_SOURCE); ASSERT(s_mp->sbm_peer == t_mp); ASSERT(s_mp->sbm_mlist); ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET); ASSERT(t_mp->sbm_peer == s_mp); /* * create a memlist of spans to copy by removing * the spans that have been deleted, if any, from * the full source board memlist. s_mp->sbm_del_mlist * will be NULL if there were no spans deleted from * the source board. */ c_ml = memlist_dup(s_mp->sbm_mlist); d_ml = s_mp->sbm_del_mlist; while (d_ml != NULL) { c_ml = memlist_del_span(c_ml, d_ml->address, d_ml->size); d_ml = d_ml->next; } /* * create a copy of the memlist to be used for retiring pages. */ r_ml = memlist_dup(c_ml); affinity_set(drmach_mem_cpu_affinity(t_mp->sbm_cm.sbdev_id)); err = drmach_copy_rename_init( t_mp->sbm_cm.sbdev_id, _ptob64(t_mp->sbm_slice_offset), s_mp->sbm_cm.sbdev_id, c_ml, &cr_id); if (err) { DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err); affinity_clear(); memlist_delete(r_ml); return (-1); } srhp = dr_get_sr_handle(hp); ASSERT(srhp); copytime = lbolt; /* Quiesce the OS. */ if (dr_suspend(srhp)) { cmn_err(CE_WARN, "%s: failed to quiesce OS" " for copy-rename", f); dr_release_sr_handle(srhp); err = drmach_copy_rename_fini(cr_id); if (err) { /* * no error is expected since the program has * not yet run. */ /* catch this in debug kernels */ ASSERT(0); sbd_err_clear(&err); } /* suspend error reached via hp */ s_mp->sbm_cm.sbdev_error = hp->h_err; hp->h_err = NULL; affinity_clear(); memlist_delete(r_ml); return (-1); } /* * Rename memory for lgroup. * Source and target board numbers are packaged in arg. */ { dr_board_t *t_bp, *s_bp; s_bp = s_mp->sbm_cm.sbdev_bp; t_bp = t_mp->sbm_cm.sbdev_bp; lgrp_plat_config(LGRP_CONFIG_MEM_RENAME, (uintptr_t)(s_bp->b_num | (t_bp->b_num << 16))); } drmach_copy_rename(cr_id); /* * Clear pages that have been marked as faulty since we are * changing the physical memory for the pages. */ dr_memlist_clrpages(r_ml); /* Resume the OS. */ dr_resume(srhp); copytime = lbolt - copytime; dr_release_sr_handle(srhp); err = drmach_copy_rename_fini(cr_id); if (err) DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err); affinity_clear(); PR_MEM("%s: copy-rename elapsed time = %ld ticks (%ld secs)\n", f, copytime, copytime / hz); memlist_delete(r_ml); /* Unretire any pages cleared after copy-rename */ page_unretire_pages(); /* return -1 if dr_suspend or copy/rename recorded an error */ return (err == NULL ? 0 : -1); } /* * If detaching node contains memory that is "non-permanent" * then the memory adr's are simply cleared. If the memory * is non-relocatable, then do a copy-rename. */ void dr_detach_mem(dr_handle_t *hp, dr_common_unit_t *cp) { int rv = 0; dr_mem_unit_t *s_mp = (dr_mem_unit_t *)cp; dr_mem_unit_t *t_mp; dr_state_t state; static fn_t f = "dr_detach_mem"; PR_MEM("%s...\n", f); /* lookup target mem unit and target board structure, if any */ if (s_mp->sbm_flags & DR_MFLAG_SOURCE) { t_mp = s_mp->sbm_peer; ASSERT(t_mp != NULL); ASSERT(t_mp->sbm_peer == s_mp); } else { t_mp = NULL; } /* verify mem unit's state is UNREFERENCED */ state = s_mp->sbm_cm.sbdev_state; if (state != DR_STATE_UNREFERENCED) { dr_dev_err(CE_IGNORE, &s_mp->sbm_cm, ESBD_STATE); return; } /* verify target mem unit's state is UNREFERENCED, if any */ if (t_mp != NULL) { state = t_mp->sbm_cm.sbdev_state; if (state != DR_STATE_UNREFERENCED) { dr_dev_err(CE_IGNORE, &t_mp->sbm_cm, ESBD_STATE); return; } } /* * Scrub deleted memory. This will cause all cachelines * referencing the memory to only be in the local cpu's * ecache. */ if (s_mp->sbm_flags & DR_MFLAG_RELDONE) { /* no del mlist for src<=dst mem size copy/rename */ if (s_mp->sbm_del_mlist) dr_mem_ecache_scrub(s_mp, s_mp->sbm_del_mlist); } if (t_mp != NULL && (t_mp->sbm_flags & DR_MFLAG_RELDONE)) { ASSERT(t_mp->sbm_del_mlist); dr_mem_ecache_scrub(t_mp, t_mp->sbm_del_mlist); } /* * If there is no target board (no copy/rename was needed), then * we're done! */ if (t_mp == NULL) { sbd_error_t *err; /* * Reprogram interconnect hardware and disable * memory controllers for memory node that's going away. */ err = drmach_mem_disable(s_mp->sbm_cm.sbdev_id); if (err) { DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err); rv = -1; } } else { rv = dr_move_memory(hp, s_mp, t_mp); PR_MEM("%s: %s memory COPY-RENAME (board %d -> %d)\n", f, rv ? "FAILED" : "COMPLETED", s_mp->sbm_cm.sbdev_bp->b_num, t_mp->sbm_cm.sbdev_bp->b_num); if (rv != 0) (void) dr_cancel_mem(s_mp); } if (rv == 0) { sbd_error_t *err; dr_lock_status(hp->h_bd); err = drmach_unconfigure(s_mp->sbm_cm.sbdev_id, DRMACH_DEVI_REMOVE); dr_unlock_status(hp->h_bd); if (err) sbd_err_clear(&err); } } #ifndef _STARFIRE /* * XXX workaround for certain lab configurations (see also starcat drmach.c) * Temporary code to get around observed incorrect results from * kphysm_del_span_query when the queried span contains address spans * not occupied by memory in between spans that do have memory. * This routine acts as a wrapper to kphysm_del_span_query. It builds * a memlist from phys_install of spans that exist between base and * base + npages, inclusively. Kphysm_del_span_query is called for each * node in the memlist with the results accumulated in *mp. */ static int dr_del_span_query(pfn_t base, pgcnt_t npages, memquery_t *mp) { uint64_t pa = _ptob64(base); uint64_t sm = ~ (137438953472ull - 1); uint64_t sa = pa & sm; struct memlist *mlist, *ml; int rv; npages = npages; /* silence lint */ memlist_read_lock(); mlist = memlist_dup(phys_install); memlist_read_unlock(); again: for (ml = mlist; ml; ml = ml->next) { if ((ml->address & sm) != sa) { mlist = memlist_del_span(mlist, ml->address, ml->size); goto again; } } mp->phys_pages = 0; mp->managed = 0; mp->nonrelocatable = 0; mp->first_nonrelocatable = (pfn_t)-1; /* XXX */ mp->last_nonrelocatable = 0; for (ml = mlist; ml; ml = ml->next) { memquery_t mq; rv = kphysm_del_span_query( _b64top(ml->address), _b64top(ml->size), &mq); if (rv) break; mp->phys_pages += mq.phys_pages; mp->managed += mq.managed; mp->nonrelocatable += mq.nonrelocatable; if (mq.nonrelocatable != 0) { if (mq.first_nonrelocatable < mp->first_nonrelocatable) mp->first_nonrelocatable = mq.first_nonrelocatable; if (mq.last_nonrelocatable > mp->last_nonrelocatable) mp->last_nonrelocatable = mq.last_nonrelocatable; } } if (mp->nonrelocatable == 0) mp->first_nonrelocatable = 0; /* XXX */ memlist_delete(mlist); return (rv); } #define kphysm_del_span_query dr_del_span_query #endif /* _STARFIRE */ /* * NOTE: This routine is only partially smart about multiple * mem-units. Need to make mem-status structure smart * about them also. */ int dr_mem_status(dr_handle_t *hp, dr_devset_t devset, sbd_dev_stat_t *dsp) { int m, mix; memdelstat_t mdst; memquery_t mq; dr_board_t *bp; dr_mem_unit_t *mp; sbd_mem_stat_t *msp; static fn_t f = "dr_mem_status"; bp = hp->h_bd; devset &= DR_DEVS_PRESENT(bp); for (m = mix = 0; m < MAX_MEM_UNITS_PER_BOARD; m++) { int rv; sbd_error_t *err; drmach_status_t pstat; dr_mem_unit_t *p_mp; if (DEVSET_IN_SET(devset, SBD_COMP_MEM, m) == 0) continue; mp = dr_get_mem_unit(bp, m); if (mp->sbm_cm.sbdev_state == DR_STATE_EMPTY) { /* present, but not fully initialized */ continue; } if (mp->sbm_cm.sbdev_id == (drmachid_t)0) continue; /* fetch platform status */ err = drmach_status(mp->sbm_cm.sbdev_id, &pstat); if (err) { DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err); continue; } msp = &dsp->d_mem; bzero((caddr_t)msp, sizeof (*msp)); strncpy(msp->ms_cm.c_id.c_name, pstat.type, sizeof (msp->ms_cm.c_id.c_name)); msp->ms_cm.c_id.c_type = mp->sbm_cm.sbdev_type; msp->ms_cm.c_id.c_unit = SBD_NULL_UNIT; msp->ms_cm.c_cond = mp->sbm_cm.sbdev_cond; msp->ms_cm.c_busy = mp->sbm_cm.sbdev_busy | pstat.busy; msp->ms_cm.c_time = mp->sbm_cm.sbdev_time; msp->ms_cm.c_ostate = mp->sbm_cm.sbdev_ostate; msp->ms_totpages = mp->sbm_npages; msp->ms_basepfn = mp->sbm_basepfn; msp->ms_pageslost = mp->sbm_pageslost; msp->ms_cage_enabled = kcage_on; if (mp->sbm_flags & DR_MFLAG_RESERVED) p_mp = mp->sbm_peer; else p_mp = NULL; if (p_mp == NULL) { msp->ms_peer_is_target = 0; msp->ms_peer_ap_id[0] = '\0'; } else if (p_mp->sbm_flags & DR_MFLAG_RESERVED) { char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP); char *minor; /* * b_dip doesn't have to be held for ddi_pathname() * because the board struct (dr_board_t) will be * destroyed before b_dip detaches. */ (void) ddi_pathname(bp->b_dip, path); minor = strchr(p_mp->sbm_cm.sbdev_path, ':'); snprintf(msp->ms_peer_ap_id, sizeof (msp->ms_peer_ap_id), "%s%s", path, (minor == NULL) ? "" : minor); kmem_free(path, MAXPATHLEN); if (p_mp->sbm_flags & DR_MFLAG_TARGET) msp->ms_peer_is_target = 1; } if (mp->sbm_flags & DR_MFLAG_RELOWNER) rv = kphysm_del_status(mp->sbm_memhandle, &mdst); else rv = KPHYSM_EHANDLE; /* force 'if' to fail */ if (rv == KPHYSM_OK) { /* * Any pages above managed is "free", * i.e. it's collected. */ msp->ms_detpages += (uint_t)(mdst.collected + mdst.phys_pages - mdst.managed); } else { /* * If we're UNREFERENCED or UNCONFIGURED, * then the number of detached pages is * however many pages are on the board. * I.e. detached = not in use by OS. */ switch (msp->ms_cm.c_ostate) { /* * changed to use cfgadm states * * was: * case DR_STATE_UNREFERENCED: * case DR_STATE_UNCONFIGURED: */ case SBD_STAT_UNCONFIGURED: msp->ms_detpages = msp->ms_totpages; break; default: break; } } /* * kphysm_del_span_query can report non-reloc pages = total * pages for memory that is not yet configured */ if (mp->sbm_cm.sbdev_state != DR_STATE_UNCONFIGURED) { rv = kphysm_del_span_query(mp->sbm_basepfn, mp->sbm_npages, &mq); if (rv == KPHYSM_OK) { msp->ms_managed_pages = mq.managed; msp->ms_noreloc_pages = mq.nonrelocatable; msp->ms_noreloc_first = mq.first_nonrelocatable; msp->ms_noreloc_last = mq.last_nonrelocatable; msp->ms_cm.c_sflags = 0; if (mq.nonrelocatable) { SBD_SET_SUSPEND(SBD_CMD_UNCONFIGURE, msp->ms_cm.c_sflags); } } else { PR_MEM("%s: kphysm_del_span_query() = %d\n", f, rv); } } /* * Check source unit state during copy-rename */ if ((mp->sbm_flags & DR_MFLAG_SOURCE) && (mp->sbm_cm.sbdev_state == DR_STATE_UNREFERENCED || mp->sbm_cm.sbdev_state == DR_STATE_RELEASE)) msp->ms_cm.c_ostate = SBD_STAT_CONFIGURED; mix++; dsp++; } return (mix); } int dr_pre_attach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum) { _NOTE(ARGUNUSED(hp)) int err_flag = 0; int d; sbd_error_t *err; static fn_t f = "dr_pre_attach_mem"; PR_MEM("%s...\n", f); for (d = 0; d < devnum; d++) { dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d]; dr_state_t state; cmn_err(CE_CONT, "OS configure %s", mp->sbm_cm.sbdev_path); state = mp->sbm_cm.sbdev_state; switch (state) { case DR_STATE_UNCONFIGURED: PR_MEM("%s: recovering from UNCONFIG for %s\n", f, mp->sbm_cm.sbdev_path); /* use memlist cached by dr_post_detach_mem_unit */ ASSERT(mp->sbm_mlist != NULL); PR_MEM("%s: re-configuring cached memlist for %s:\n", f, mp->sbm_cm.sbdev_path); PR_MEMLIST_DUMP(mp->sbm_mlist); /* kphysm del handle should be have been freed */ ASSERT((mp->sbm_flags & DR_MFLAG_RELOWNER) == 0); /*FALLTHROUGH*/ case DR_STATE_CONNECTED: PR_MEM("%s: reprogramming mem hardware on %s\n", f, mp->sbm_cm.sbdev_bp->b_path); PR_MEM("%s: enabling %s\n", f, mp->sbm_cm.sbdev_path); err = drmach_mem_enable(mp->sbm_cm.sbdev_id); if (err) { DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err); err_flag = 1; } break; default: dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_STATE); err_flag = 1; break; } /* exit for loop if error encountered */ if (err_flag) break; } return (err_flag ? -1 : 0); } int dr_post_attach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum) { _NOTE(ARGUNUSED(hp)) int d; static fn_t f = "dr_post_attach_mem"; PR_MEM("%s...\n", f); for (d = 0; d < devnum; d++) { dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d]; struct memlist *mlist, *ml; mlist = dr_get_memlist(mp); if (mlist == NULL) { dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_MEMFAIL); continue; } /* * Verify the memory really did successfully attach * by checking for its existence in phys_install. */ memlist_read_lock(); if (memlist_intersect(phys_install, mlist) == 0) { memlist_read_unlock(); DR_DEV_INTERNAL_ERROR(&mp->sbm_cm); PR_MEM("%s: %s memlist not in phys_install", f, mp->sbm_cm.sbdev_path); memlist_delete(mlist); continue; } memlist_read_unlock(); for (ml = mlist; ml != NULL; ml = ml->next) { sbd_error_t *err; err = drmach_mem_add_span( mp->sbm_cm.sbdev_id, ml->address, ml->size); if (err) DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err); } memlist_delete(mlist); /* * Destroy cached memlist, if any. * There will be a cached memlist in sbm_mlist if * this board is being configured directly after * an unconfigure. * To support this transition, dr_post_detach_mem * left a copy of the last known memlist in sbm_mlist. * This memlist could differ from any derived from * hardware if while this memunit was last configured * the system detected and deleted bad pages from * phys_install. The location of those bad pages * will be reflected in the cached memlist. */ if (mp->sbm_mlist) { memlist_delete(mp->sbm_mlist); mp->sbm_mlist = NULL; } /* * TODO: why is this call to dr_init_mem_unit_data here? * this has been done at discovery or connect time, so this is * probably redundant and unnecessary. */ dr_init_mem_unit_data(mp); } return (0); } int dr_pre_detach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum) { _NOTE(ARGUNUSED(hp)) int d; for (d = 0; d < devnum; d++) { dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d]; cmn_err(CE_CONT, "OS unconfigure %s", mp->sbm_cm.sbdev_path); } return (0); } int dr_post_detach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum) { _NOTE(ARGUNUSED(hp)) int d, rv; static fn_t f = "dr_post_detach_mem"; PR_MEM("%s...\n", f); rv = 0; for (d = 0; d < devnum; d++) { dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d]; ASSERT(mp->sbm_cm.sbdev_bp == hp->h_bd); if (dr_post_detach_mem_unit(mp)) rv = -1; } return (rv); } static void dr_add_memory_spans(dr_mem_unit_t *mp, struct memlist *ml) { static fn_t f = "dr_add_memory_spans"; PR_MEM("%s...", f); PR_MEMLIST_DUMP(ml); #ifdef DEBUG memlist_read_lock(); if (memlist_intersect(phys_install, ml)) { PR_MEM("%s:WARNING: memlist intersects with phys_install\n", f); } memlist_read_unlock(); #endif for (; ml; ml = ml->next) { pfn_t base; pgcnt_t npgs; int rv; sbd_error_t *err; base = _b64top(ml->address); npgs = _b64top(ml->size); rv = kphysm_add_memory_dynamic(base, npgs); err = drmach_mem_add_span( mp->sbm_cm.sbdev_id, ml->address, ml->size); if (err) DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err); if (rv != KPHYSM_OK) { cmn_err(CE_WARN, "%s:" " unexpected kphysm_add_memory_dynamic" " return value %d;" " basepfn=0x%lx, npages=%ld\n", f, rv, base, npgs); continue; } } } static int dr_post_detach_mem_unit(dr_mem_unit_t *s_mp) { uint64_t sz = s_mp->sbm_slice_size; uint64_t sm = sz - 1; /* old and new below refer to PAs before and after copy-rename */ uint64_t s_old_basepa, s_new_basepa; uint64_t t_old_basepa, t_new_basepa; uint64_t t_new_smallsize = 0; dr_mem_unit_t *t_mp, *x_mp; struct memlist *ml; int rv; sbd_error_t *err; static fn_t f = "dr_post_detach_mem_unit"; PR_MEM("%s...\n", f); /* s_mp->sbm_del_mlist could be NULL, meaning no deleted spans */ PR_MEM("%s: %s: deleted memlist (EMPTY maybe okay):\n", f, s_mp->sbm_cm.sbdev_path); PR_MEMLIST_DUMP(s_mp->sbm_del_mlist); /* sanity check */ ASSERT(s_mp->sbm_del_mlist == NULL || (s_mp->sbm_flags & DR_MFLAG_RELDONE) != 0); if (s_mp->sbm_flags & DR_MFLAG_SOURCE) { t_mp = s_mp->sbm_peer; ASSERT(t_mp != NULL); ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET); ASSERT(t_mp->sbm_peer == s_mp); ASSERT(t_mp->sbm_flags & DR_MFLAG_RELDONE); ASSERT(t_mp->sbm_del_mlist); PR_MEM("%s: target %s: deleted memlist:\n", f, t_mp->sbm_cm.sbdev_path); PR_MEMLIST_DUMP(t_mp->sbm_del_mlist); } else { /* this is no target unit */ t_mp = NULL; } /* * Verify the memory really did successfully detach * by checking for its non-existence in phys_install. */ rv = 0; memlist_read_lock(); if (s_mp->sbm_flags & DR_MFLAG_RELDONE) { x_mp = s_mp; rv = memlist_intersect(phys_install, x_mp->sbm_del_mlist); } if (rv == 0 && t_mp && (t_mp->sbm_flags & DR_MFLAG_RELDONE)) { x_mp = t_mp; rv = memlist_intersect(phys_install, x_mp->sbm_del_mlist); } memlist_read_unlock(); if (rv) { /* error: memlist still in phys_install */ DR_DEV_INTERNAL_ERROR(&x_mp->sbm_cm); } /* * clean mem unit state and bail out if an error has been recorded. */ rv = 0; if (s_mp->sbm_cm.sbdev_error) { PR_MEM("%s: %s flags=%x", f, s_mp->sbm_cm.sbdev_path, s_mp->sbm_flags); DR_DEV_CLR_UNREFERENCED(&s_mp->sbm_cm); DR_DEV_CLR_RELEASED(&s_mp->sbm_cm); dr_device_transition(&s_mp->sbm_cm, DR_STATE_CONFIGURED); rv = -1; } if (t_mp != NULL && t_mp->sbm_cm.sbdev_error != NULL) { PR_MEM("%s: %s flags=%x", f, s_mp->sbm_cm.sbdev_path, s_mp->sbm_flags); DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm); DR_DEV_CLR_RELEASED(&t_mp->sbm_cm); dr_device_transition(&t_mp->sbm_cm, DR_STATE_CONFIGURED); rv = -1; } if (rv) goto cleanup; s_old_basepa = _ptob64(s_mp->sbm_basepfn); err = drmach_mem_get_base_physaddr(s_mp->sbm_cm.sbdev_id, &s_new_basepa); ASSERT(err == NULL); PR_MEM("%s:s_old_basepa: 0x%llx\n", f, s_old_basepa); PR_MEM("%s:s_new_basepa: 0x%llx\n", f, s_new_basepa); if (t_mp != NULL) { struct memlist *s_copy_mlist; t_old_basepa = _ptob64(t_mp->sbm_basepfn); err = drmach_mem_get_base_physaddr(t_mp->sbm_cm.sbdev_id, &t_new_basepa); ASSERT(err == NULL); PR_MEM("%s:t_old_basepa: 0x%llx\n", f, t_old_basepa); PR_MEM("%s:t_new_basepa: 0x%llx\n", f, t_new_basepa); /* * Construct copy list with original source addresses. * Used to add back excess target mem. */ s_copy_mlist = memlist_dup(s_mp->sbm_mlist); for (ml = s_mp->sbm_del_mlist; ml; ml = ml->next) { s_copy_mlist = memlist_del_span(s_copy_mlist, ml->address, ml->size); } PR_MEM("%s: source copy list:\n:", f); PR_MEMLIST_DUMP(s_copy_mlist); /* * We had to swap mem-units, so update * memlists accordingly with new base * addresses. */ for (ml = t_mp->sbm_mlist; ml; ml = ml->next) { ml->address -= t_old_basepa; ml->address += t_new_basepa; } /* * There is no need to explicitly rename the target delete * memlist, because sbm_del_mlist and sbm_mlist always * point to the same memlist for a copy/rename operation. */ ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist); PR_MEM("%s: renamed target memlist and delete memlist:\n", f); PR_MEMLIST_DUMP(t_mp->sbm_mlist); for (ml = s_mp->sbm_mlist; ml; ml = ml->next) { ml->address -= s_old_basepa; ml->address += s_new_basepa; } PR_MEM("%s: renamed source memlist:\n", f); PR_MEMLIST_DUMP(s_mp->sbm_mlist); /* * Keep track of dynamically added segments * since they cannot be split if we need to delete * excess source memory later for this board. */ if (t_mp->sbm_dyn_segs) memlist_delete(t_mp->sbm_dyn_segs); t_mp->sbm_dyn_segs = s_mp->sbm_dyn_segs; s_mp->sbm_dyn_segs = NULL; /* * If the target memory range with the new target base PA * extends beyond the usable slice, prevent any "target excess" * from being added back after this copy/rename and * calculate the new smaller size of the target board * to be set as part of target cleanup. The base + npages * must only include the range of memory up to the end of * this slice. This will only be used after a category 4 * large-to-small target type copy/rename - see comments * in dr_select_mem_target. */ if (((t_new_basepa & sm) + _ptob64(t_mp->sbm_npages)) > sz) { t_new_smallsize = sz - (t_new_basepa & sm); } if (s_mp->sbm_flags & DR_MFLAG_MEMRESIZE && t_new_smallsize == 0) { struct memlist *t_excess_mlist; /* * Add back excess target memory. * Subtract out the portion of the target memory * node that was taken over by the source memory * node. */ t_excess_mlist = memlist_dup(t_mp->sbm_mlist); for (ml = s_copy_mlist; ml; ml = ml->next) { t_excess_mlist = memlist_del_span(t_excess_mlist, ml->address, ml->size); } /* * Update dynamically added segs */ for (ml = s_mp->sbm_del_mlist; ml; ml = ml->next) { t_mp->sbm_dyn_segs = memlist_del_span(t_mp->sbm_dyn_segs, ml->address, ml->size); } for (ml = t_excess_mlist; ml; ml = ml->next) { t_mp->sbm_dyn_segs = memlist_cat_span(t_mp->sbm_dyn_segs, ml->address, ml->size); } PR_MEM("%s: %s: updated dynamic seg list:\n", f, t_mp->sbm_cm.sbdev_path); PR_MEMLIST_DUMP(t_mp->sbm_dyn_segs); PR_MEM("%s: adding back remaining portion" " of %s, memlist:\n", f, t_mp->sbm_cm.sbdev_path); PR_MEMLIST_DUMP(t_excess_mlist); dr_add_memory_spans(s_mp, t_excess_mlist); memlist_delete(t_excess_mlist); } memlist_delete(s_copy_mlist); #ifdef DEBUG /* * Renaming s_mp->sbm_del_mlist is not necessary. This * list is not used beyond this point, and in fact, is * disposed of at the end of this function. */ for (ml = s_mp->sbm_del_mlist; ml; ml = ml->next) { ml->address -= s_old_basepa; ml->address += s_new_basepa; } PR_MEM("%s: renamed source delete memlist", f); PR_MEMLIST_DUMP(s_mp->sbm_del_mlist); #endif } if (t_mp != NULL) { /* delete target's entire address space */ err = drmach_mem_del_span( t_mp->sbm_cm.sbdev_id, t_old_basepa & ~ sm, sz); if (err) DRERR_SET_C(&t_mp->sbm_cm.sbdev_error, &err); ASSERT(err == NULL); /* * After the copy/rename, the original address space * for the source board (which is now located on the * target board) may now have some excess to be deleted. * The amount is calculated by masking the slice * info and keeping the slice offset from t_new_basepa. */ err = drmach_mem_del_span(s_mp->sbm_cm.sbdev_id, s_old_basepa & ~ sm, t_new_basepa & sm); if (err) DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err); ASSERT(err == NULL); } else { /* delete board's entire address space */ err = drmach_mem_del_span(s_mp->sbm_cm.sbdev_id, s_old_basepa & ~ sm, sz); if (err) DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err); ASSERT(err == NULL); } cleanup: /* clean up target mem unit */ if (t_mp != NULL) { memlist_delete(t_mp->sbm_del_mlist); /* no need to delete sbm_mlist, it shares sbm_del_mlist */ t_mp->sbm_del_mlist = NULL; t_mp->sbm_mlist = NULL; t_mp->sbm_peer = NULL; t_mp->sbm_flags = 0; t_mp->sbm_cm.sbdev_busy = 0; dr_init_mem_unit_data(t_mp); /* reduce target size if new PAs go past end of usable slice */ if (t_new_smallsize > 0) { t_mp->sbm_npages = _b64top(t_new_smallsize); PR_MEM("%s: target new size 0x%llx bytes\n", f, t_new_smallsize); } } if (t_mp != NULL && t_mp->sbm_cm.sbdev_error == NULL) { /* * now that copy/rename has completed, undo this * work that was done in dr_release_mem_done. */ DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm); DR_DEV_CLR_RELEASED(&t_mp->sbm_cm); dr_device_transition(&t_mp->sbm_cm, DR_STATE_CONFIGURED); } /* * clean up (source) board's mem unit structure. * NOTE: sbm_mlist is retained if no error has been record (in other * words, when s_mp->sbm_cm.sbdev_error is NULL). This memlist is * referred to elsewhere as the cached memlist. The cached memlist * is used to re-attach (configure back in) this memunit from the * unconfigured state. The memlist is retained because it may * represent bad pages that were detected while the memory was * configured into the OS. The OS deletes bad pages from phys_install. * Those deletes, if any, will be represented in the cached mlist. */ if (s_mp->sbm_del_mlist && s_mp->sbm_del_mlist != s_mp->sbm_mlist) memlist_delete(s_mp->sbm_del_mlist); if (s_mp->sbm_cm.sbdev_error && s_mp->sbm_mlist) { memlist_delete(s_mp->sbm_mlist); s_mp->sbm_mlist = NULL; } if (s_mp->sbm_dyn_segs != NULL && s_mp->sbm_cm.sbdev_error == 0) { memlist_delete(s_mp->sbm_dyn_segs); s_mp->sbm_dyn_segs = NULL; } s_mp->sbm_del_mlist = NULL; s_mp->sbm_peer = NULL; s_mp->sbm_flags = 0; s_mp->sbm_cm.sbdev_busy = 0; dr_init_mem_unit_data(s_mp); PR_MEM("%s: cached memlist for %s:", f, s_mp->sbm_cm.sbdev_path); PR_MEMLIST_DUMP(s_mp->sbm_mlist); return (0); } /* * Successful return from this function will have the memory * handle in bp->b_dev[..mem-unit...].sbm_memhandle allocated * and waiting. This routine's job is to select the memory that * actually has to be released (detached) which may not necessarily * be the same memory node that came in in devlist[], * i.e. a copy-rename is needed. */ int dr_pre_release_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum) { int d; int err_flag = 0; static fn_t f = "dr_pre_release_mem"; PR_MEM("%s...\n", f); for (d = 0; d < devnum; d++) { dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d]; int rv; memquery_t mq; struct memlist *ml; if (mp->sbm_cm.sbdev_error) { err_flag = 1; continue; } else if (!kcage_on) { dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_KCAGE_OFF); err_flag = 1; continue; } if (mp->sbm_flags & DR_MFLAG_RESERVED) { /* * Board is currently involved in a delete * memory operation. Can't detach this guy until * that operation completes. */ dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_INVAL); err_flag = 1; break; } /* * Check whether the detaching memory requires a * copy-rename. */ ASSERT(mp->sbm_npages != 0); rv = kphysm_del_span_query( mp->sbm_basepfn, mp->sbm_npages, &mq); if (rv != KPHYSM_OK) { DR_DEV_INTERNAL_ERROR(&mp->sbm_cm); err_flag = 1; break; } if (mq.nonrelocatable != 0) { if (!(dr_cmd_flags(hp) & (SBD_FLAG_FORCE | SBD_FLAG_QUIESCE_OKAY))) { /* caller wasn't prompted for a suspend */ dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_QUIESCE_REQD); err_flag = 1; break; } } /* flags should be clean at this time */ ASSERT(mp->sbm_flags == 0); ASSERT(mp->sbm_mlist == NULL); /* should be null */ ASSERT(mp->sbm_del_mlist == NULL); /* should be null */ if (mp->sbm_mlist != NULL) { memlist_delete(mp->sbm_mlist); mp->sbm_mlist = NULL; } ml = dr_get_memlist(mp); if (ml == NULL) { err_flag = 1; PR_MEM("%s: no memlist found for %s\n", f, mp->sbm_cm.sbdev_path); continue; } /* allocate a kphysm handle */ rv = kphysm_del_gethandle(&mp->sbm_memhandle); if (rv != KPHYSM_OK) { memlist_delete(ml); DR_DEV_INTERNAL_ERROR(&mp->sbm_cm); err_flag = 1; break; } mp->sbm_flags |= DR_MFLAG_RELOWNER; if ((mq.nonrelocatable != 0) || dr_reserve_mem_spans(&mp->sbm_memhandle, ml)) { /* * Either the detaching memory node contains * non-reloc memory or we failed to reserve the * detaching memory node (which did _not_ have * any non-reloc memory, i.e. some non-reloc mem * got onboard). */ if (dr_select_mem_target(hp, mp, ml)) { int rv; /* * We had no luck locating a target * memory node to be the recipient of * the non-reloc memory on the node * we're trying to detach. * Clean up be disposing the mem handle * and the mem list. */ rv = kphysm_del_release(mp->sbm_memhandle); if (rv != KPHYSM_OK) { /* * can do nothing but complain * and hope helpful for debug */ cmn_err(CE_WARN, "%s: unexpected" " kphysm_del_release return" " value %d", f, rv); } mp->sbm_flags &= ~DR_MFLAG_RELOWNER; memlist_delete(ml); /* make sure sbm_flags is clean */ ASSERT(mp->sbm_flags == 0); dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_NO_TARGET); err_flag = 1; break; } /* * ml is not memlist_delete'd here because * it has been assigned to mp->sbm_mlist * by dr_select_mem_target. */ } else { /* no target needed to detach this board */ mp->sbm_flags |= DR_MFLAG_RESERVED; mp->sbm_peer = NULL; mp->sbm_del_mlist = ml; mp->sbm_mlist = ml; mp->sbm_cm.sbdev_busy = 1; } #ifdef DEBUG ASSERT(mp->sbm_mlist != NULL); if (mp->sbm_flags & DR_MFLAG_SOURCE) { PR_MEM("%s: release of %s requires copy/rename;" " selected target board %s\n", f, mp->sbm_cm.sbdev_path, mp->sbm_peer->sbm_cm.sbdev_path); } else { PR_MEM("%s: copy/rename not required to release %s\n", f, mp->sbm_cm.sbdev_path); } ASSERT(mp->sbm_flags & DR_MFLAG_RELOWNER); ASSERT(mp->sbm_flags & DR_MFLAG_RESERVED); #endif } return (err_flag ? -1 : 0); } void dr_release_mem_done(dr_common_unit_t *cp) { dr_mem_unit_t *s_mp = (dr_mem_unit_t *)cp; dr_mem_unit_t *t_mp, *mp; int rv; static fn_t f = "dr_release_mem_done"; /* * This unit will be flagged with DR_MFLAG_SOURCE, if it * has a target unit. */ if (s_mp->sbm_flags & DR_MFLAG_SOURCE) { t_mp = s_mp->sbm_peer; ASSERT(t_mp != NULL); ASSERT(t_mp->sbm_peer == s_mp); ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET); ASSERT(t_mp->sbm_flags & DR_MFLAG_RESERVED); } else { /* this is no target unit */ t_mp = NULL; } /* free delete handle */ ASSERT(s_mp->sbm_flags & DR_MFLAG_RELOWNER); ASSERT(s_mp->sbm_flags & DR_MFLAG_RESERVED); rv = kphysm_del_release(s_mp->sbm_memhandle); if (rv != KPHYSM_OK) { /* * can do nothing but complain * and hope helpful for debug */ cmn_err(CE_WARN, "%s: unexpected kphysm_del_release" " return value %d", f, rv); } s_mp->sbm_flags &= ~DR_MFLAG_RELOWNER; /* * If an error was encountered during release, clean up * the source (and target, if present) unit data. */ /* XXX Can we know that sbdev_error was encountered during release? */ if (s_mp->sbm_cm.sbdev_error != NULL) { PR_MEM("%s: %s: error %d noted\n", f, s_mp->sbm_cm.sbdev_path, s_mp->sbm_cm.sbdev_error->e_code); if (t_mp != NULL) { ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist); t_mp->sbm_del_mlist = NULL; if (t_mp->sbm_mlist != NULL) { memlist_delete(t_mp->sbm_mlist); t_mp->sbm_mlist = NULL; } t_mp->sbm_peer = NULL; t_mp->sbm_flags = 0; t_mp->sbm_cm.sbdev_busy = 0; } if (s_mp->sbm_del_mlist != s_mp->sbm_mlist) memlist_delete(s_mp->sbm_del_mlist); s_mp->sbm_del_mlist = NULL; if (s_mp->sbm_mlist != NULL) { memlist_delete(s_mp->sbm_mlist); s_mp->sbm_mlist = NULL; } s_mp->sbm_peer = NULL; s_mp->sbm_flags = 0; s_mp->sbm_cm.sbdev_busy = 0; /* bail out */ return; } DR_DEV_SET_RELEASED(&s_mp->sbm_cm); dr_device_transition(&s_mp->sbm_cm, DR_STATE_RELEASE); if (t_mp != NULL) { /* * the kphysm delete operation that drained the source * board also drained this target board. Since the source * board drain is now known to have succeeded, we know this * target board is drained too. * * because DR_DEV_SET_RELEASED and dr_device_transition * is done here, the dr_release_dev_done should not * fail. */ DR_DEV_SET_RELEASED(&t_mp->sbm_cm); dr_device_transition(&t_mp->sbm_cm, DR_STATE_RELEASE); /* * NOTE: do not transition target's board state, * even if the mem-unit was the last configure * unit of the board. When copy/rename completes * this mem-unit will transitioned back to * the configured state. In the meantime, the * board's must remain as is. */ } /* if board(s) had deleted memory, verify it is gone */ rv = 0; memlist_read_lock(); if (s_mp->sbm_del_mlist != NULL) { mp = s_mp; rv = memlist_intersect(phys_install, mp->sbm_del_mlist); } if (rv == 0 && t_mp && t_mp->sbm_del_mlist != NULL) { mp = t_mp; rv = memlist_intersect(phys_install, mp->sbm_del_mlist); } memlist_read_unlock(); if (rv) { cmn_err(CE_WARN, "%s: %smem-unit (%d.%d): " "deleted memory still found in phys_install", f, (mp == t_mp ? "target " : ""), mp->sbm_cm.sbdev_bp->b_num, mp->sbm_cm.sbdev_unum); DR_DEV_INTERNAL_ERROR(&s_mp->sbm_cm); return; } s_mp->sbm_flags |= DR_MFLAG_RELDONE; if (t_mp != NULL) t_mp->sbm_flags |= DR_MFLAG_RELDONE; /* this should not fail */ if (dr_release_dev_done(&s_mp->sbm_cm) != 0) { /* catch this in debug kernels */ ASSERT(0); return; } PR_MEM("%s: marking %s release DONE\n", f, s_mp->sbm_cm.sbdev_path); s_mp->sbm_cm.sbdev_ostate = SBD_STAT_UNCONFIGURED; if (t_mp != NULL) { /* should not fail */ rv = dr_release_dev_done(&t_mp->sbm_cm); if (rv != 0) { /* catch this in debug kernels */ ASSERT(0); return; } PR_MEM("%s: marking %s release DONE\n", f, t_mp->sbm_cm.sbdev_path); t_mp->sbm_cm.sbdev_ostate = SBD_STAT_UNCONFIGURED; } } /*ARGSUSED*/ int dr_disconnect_mem(dr_mem_unit_t *mp) { static fn_t f = "dr_disconnect_mem"; update_membounds_t umb; #ifdef DEBUG int state = mp->sbm_cm.sbdev_state; ASSERT(state == DR_STATE_CONNECTED || state == DR_STATE_UNCONFIGURED); #endif PR_MEM("%s...\n", f); if (mp->sbm_del_mlist && mp->sbm_del_mlist != mp->sbm_mlist) memlist_delete(mp->sbm_del_mlist); mp->sbm_del_mlist = NULL; if (mp->sbm_mlist) { memlist_delete(mp->sbm_mlist); mp->sbm_mlist = NULL; } /* * Remove memory from lgroup * For now, only board info is required. */ umb.u_board = mp->sbm_cm.sbdev_bp->b_num; umb.u_base = (uint64_t)-1; umb.u_len = (uint64_t)-1; lgrp_plat_config(LGRP_CONFIG_MEM_DEL, (uintptr_t)&umb); return (0); } int dr_cancel_mem(dr_mem_unit_t *s_mp) { dr_mem_unit_t *t_mp; dr_state_t state; static fn_t f = "dr_cancel_mem"; state = s_mp->sbm_cm.sbdev_state; if (s_mp->sbm_flags & DR_MFLAG_TARGET) { /* must cancel source board, not target board */ /* TODO: set error */ return (-1); } else if (s_mp->sbm_flags & DR_MFLAG_SOURCE) { t_mp = s_mp->sbm_peer; ASSERT(t_mp != NULL); ASSERT(t_mp->sbm_peer == s_mp); /* must always match the source board's state */ /* TODO: is this assertion correct? */ ASSERT(t_mp->sbm_cm.sbdev_state == state); } else { /* this is no target unit */ t_mp = NULL; } switch (state) { case DR_STATE_UNREFERENCED: /* state set by dr_release_dev_done */ ASSERT((s_mp->sbm_flags & DR_MFLAG_RELOWNER) == 0); if (t_mp != NULL && t_mp->sbm_del_mlist != NULL) { PR_MEM("%s: undoing target %s memory delete\n", f, t_mp->sbm_cm.sbdev_path); dr_add_memory_spans(t_mp, t_mp->sbm_del_mlist); DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm); } if (s_mp->sbm_del_mlist != NULL) { PR_MEM("%s: undoing %s memory delete\n", f, s_mp->sbm_cm.sbdev_path); dr_add_memory_spans(s_mp, s_mp->sbm_del_mlist); } /*FALLTHROUGH*/ /* TODO: should no longer be possible to see the release state here */ case DR_STATE_RELEASE: /* state set by dr_release_mem_done */ ASSERT((s_mp->sbm_flags & DR_MFLAG_RELOWNER) == 0); if (t_mp != NULL) { ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist); t_mp->sbm_del_mlist = NULL; if (t_mp->sbm_mlist != NULL) { memlist_delete(t_mp->sbm_mlist); t_mp->sbm_mlist = NULL; } t_mp->sbm_peer = NULL; t_mp->sbm_flags = 0; t_mp->sbm_cm.sbdev_busy = 0; dr_init_mem_unit_data(t_mp); DR_DEV_CLR_RELEASED(&t_mp->sbm_cm); dr_device_transition( &t_mp->sbm_cm, DR_STATE_CONFIGURED); } if (s_mp->sbm_del_mlist != s_mp->sbm_mlist) memlist_delete(s_mp->sbm_del_mlist); s_mp->sbm_del_mlist = NULL; if (s_mp->sbm_mlist != NULL) { memlist_delete(s_mp->sbm_mlist); s_mp->sbm_mlist = NULL; } s_mp->sbm_peer = NULL; s_mp->sbm_flags = 0; s_mp->sbm_cm.sbdev_busy = 0; dr_init_mem_unit_data(s_mp); return (0); default: PR_MEM("%s: WARNING unexpected state (%d) for %s\n", f, (int)state, s_mp->sbm_cm.sbdev_path); return (-1); } /*NOTREACHED*/ } void dr_init_mem_unit(dr_mem_unit_t *mp) { dr_state_t new_state; if (DR_DEV_IS_ATTACHED(&mp->sbm_cm)) { new_state = DR_STATE_CONFIGURED; mp->sbm_cm.sbdev_cond = SBD_COND_OK; } else if (DR_DEV_IS_PRESENT(&mp->sbm_cm)) { new_state = DR_STATE_CONNECTED; mp->sbm_cm.sbdev_cond = SBD_COND_OK; } else if (mp->sbm_cm.sbdev_id != (drmachid_t)0) { new_state = DR_STATE_OCCUPIED; } else { new_state = DR_STATE_EMPTY; } if (DR_DEV_IS_PRESENT(&mp->sbm_cm)) dr_init_mem_unit_data(mp); /* delay transition until fully initialized */ dr_device_transition(&mp->sbm_cm, new_state); } static void dr_init_mem_unit_data(dr_mem_unit_t *mp) { drmachid_t id = mp->sbm_cm.sbdev_id; uint64_t bytes; sbd_error_t *err; static fn_t f = "dr_init_mem_unit_data"; update_membounds_t umb; PR_MEM("%s...\n", f); /* a little sanity checking */ ASSERT(mp->sbm_peer == NULL); ASSERT(mp->sbm_flags == 0); /* get basepfn of mem unit */ err = drmach_mem_get_base_physaddr(id, &bytes); if (err) { DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err); mp->sbm_basepfn = (pfn_t)-1; } else mp->sbm_basepfn = _b64top(bytes); /* attempt to get number of pages from PDA */ err = drmach_mem_get_size(id, &bytes); if (err) { DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err); mp->sbm_npages = 0; } else mp->sbm_npages = _b64top(bytes); /* if didn't work, calculate using memlist */ if (mp->sbm_npages == 0) { struct memlist *ml, *mlist; /* * Either we couldn't open the PDA or our * PDA has garbage in it. We must have the * page count consistent and whatever the * OS states has precedence over the PDA * so let's check the kernel. */ /* TODO: curious comment. it suggests pda query should happen if this fails */ PR_MEM("%s: PDA query failed for npages." " Checking memlist for %s\n", f, mp->sbm_cm.sbdev_path); mlist = dr_get_memlist(mp); for (ml = mlist; ml; ml = ml->next) mp->sbm_npages += btop(ml->size); memlist_delete(mlist); } err = drmach_mem_get_alignment(id, &bytes); if (err) { DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err); mp->sbm_alignment_mask = 0; } else mp->sbm_alignment_mask = _b64top(bytes); err = drmach_mem_get_slice_size(id, &bytes); if (err) { DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err); mp->sbm_slice_size = 0; /* paranoia */ } else mp->sbm_slice_size = bytes; /* * Add memory to lgroup */ umb.u_board = mp->sbm_cm.sbdev_bp->b_num; umb.u_base = (uint64_t)mp->sbm_basepfn << MMU_PAGESHIFT; umb.u_len = (uint64_t)mp->sbm_npages << MMU_PAGESHIFT; lgrp_plat_config(LGRP_CONFIG_MEM_ADD, (uintptr_t)&umb); PR_MEM("%s: %s (basepfn = 0x%x, npgs = %d)\n", f, mp->sbm_cm.sbdev_path, mp->sbm_basepfn, mp->sbm_npages); } static int dr_reserve_mem_spans(memhandle_t *mhp, struct memlist *ml) { int err; pfn_t base; pgcnt_t npgs; struct memlist *mc; static fn_t f = "dr_reserve_mem_spans"; PR_MEM("%s...\n", f); /* * Walk the supplied memlist scheduling each span for removal * with kphysm_del_span. It is possible that a span may intersect * an area occupied by the cage. */ for (mc = ml; mc != NULL; mc = mc->next) { base = _b64top(mc->address); npgs = _b64top(mc->size); err = kphysm_del_span(*mhp, base, npgs); if (err != KPHYSM_OK) { cmn_err(CE_WARN, "%s memory reserve failed." " unexpected kphysm_del_span return value %d;" " basepfn=0x%lx npages=%ld", f, err, base, npgs); return (-1); } } return (0); } /* debug counters */ int dr_smt_realigned; int dr_smt_preference[4]; #ifdef DEBUG uint_t dr_ignore_board; /* if bit[bnum-1] set, board won't be candidate */ #endif /* * Find and reserve a copy/rename target board suitable for the * given source board. * All boards in the system are examined and categorized in relation to * their memory size versus the source board's memory size. Order of * preference is: * 1st: board has same memory size * 2nd: board has larger memory size * 3rd: board has smaller memory size * 4th: board has smaller memory size, available memory will be reduced. * Boards in category 3 and 4 will have their MC's reprogrammed to locate the * span to which the MC responds to address span that appropriately covers * the nonrelocatable span of the source board. */ static int dr_select_mem_target(dr_handle_t *hp, dr_mem_unit_t *s_mp, struct memlist *s_ml) { pgcnt_t sz = _b64top(s_mp->sbm_slice_size); pgcnt_t sm = sz - 1; /* mem_slice_mask */ pfn_t s_phi, t_phi; int n_sets = 4; /* same, larger, smaller, clipped */ int preference; /* lower value is higher preference */ int n_units_per_set; int idx; dr_mem_unit_t **sets; int t_bd; int t_unit; int rv; int allow_src_memrange_modify; int allow_targ_memrange_modify; drmachid_t t_id; dr_board_t *s_bp, *t_bp; dr_mem_unit_t *t_mp, *c_mp; struct memlist *d_ml, *t_ml, *x_ml; memquery_t s_mq = {0}; static fn_t f = "dr_select_mem_target"; PR_MEM("%s...\n", f); ASSERT(s_ml != NULL); n_units_per_set = MAX_BOARDS * MAX_MEM_UNITS_PER_BOARD; sets = GETSTRUCT(dr_mem_unit_t *, n_units_per_set * n_sets); s_bp = hp->h_bd; /* calculate the offset into the slice of the last source board pfn */ ASSERT(s_mp->sbm_npages != 0); s_phi = (s_mp->sbm_basepfn + s_mp->sbm_npages - 1) & sm; allow_src_memrange_modify = drmach_allow_memrange_modify(s_bp->b_id); /* * Make one pass through all memory units on all boards * and categorize them with respect to the source board. */ for (t_bd = 0; t_bd < MAX_BOARDS; t_bd++) { /* * The board structs are a contiguous array * so we take advantage of that to find the * correct board struct pointer for a given * board number. */ t_bp = dr_lookup_board(t_bd); /* source board can not be its own target */ if (s_bp->b_num == t_bp->b_num) continue; for (t_unit = 0; t_unit < MAX_MEM_UNITS_PER_BOARD; t_unit++) { t_mp = dr_get_mem_unit(t_bp, t_unit); /* this memory node must be attached */ if (!DR_DEV_IS_ATTACHED(&t_mp->sbm_cm)) continue; /* source unit can not be its own target */ if (s_mp == t_mp) { /* catch this is debug kernels */ ASSERT(0); continue; } /* * this memory node must not already be reserved * by some other memory delete operation. */ if (t_mp->sbm_flags & DR_MFLAG_RESERVED) continue; /* * categorize the memory node * If this is a smaller memory node, create a * temporary, edited copy of the source board's * memlist containing only the span of the non- * relocatable pages. */ t_phi = (t_mp->sbm_basepfn + t_mp->sbm_npages - 1) & sm; t_id = t_mp->sbm_cm.sbdev_bp->b_id; allow_targ_memrange_modify = drmach_allow_memrange_modify(t_id); if (t_mp->sbm_npages == s_mp->sbm_npages && t_phi == s_phi) { preference = 0; t_mp->sbm_slice_offset = 0; } else if (t_mp->sbm_npages > s_mp->sbm_npages && t_phi > s_phi) { /* * Selecting this target will require modifying * the source and/or target physical address * ranges. Skip if not supported by platform. */ if (!allow_src_memrange_modify || !allow_targ_memrange_modify) { PR_MEM("%s: skip target %s, memory " "range relocation not supported " "by platform\n", f, t_mp->sbm_cm.sbdev_path); continue; } preference = 1; t_mp->sbm_slice_offset = 0; } else { pfn_t pfn = 0; /* * Selecting this target will require modifying * the source and/or target physical address * ranges. Skip if not supported by platform. */ if (!allow_src_memrange_modify || !allow_targ_memrange_modify) { PR_MEM("%s: skip target %s, memory " "range relocation not supported " "by platform\n", f, t_mp->sbm_cm.sbdev_path); continue; } /* * Check if its mc can be programmed to relocate * the active address range to match the * nonrelocatable span of the source board. */ preference = 2; if (s_mq.phys_pages == 0) { /* * find non-relocatable span on * source board. */ rv = kphysm_del_span_query( s_mp->sbm_basepfn, s_mp->sbm_npages, &s_mq); if (rv != KPHYSM_OK) { PR_MEM("%s: %s: unexpected" " kphysm_del_span_query" " return value %d;" " basepfn 0x%lx, npages %ld\n", f, s_mp->sbm_cm.sbdev_path, rv, s_mp->sbm_basepfn, s_mp->sbm_npages); /* paranoia */ s_mq.phys_pages = 0; continue; } /* more paranoia */ ASSERT(s_mq.phys_pages != 0); ASSERT(s_mq.nonrelocatable != 0); /* * this should not happen * if it does, it simply means that * we can not proceed with qualifying * this target candidate. */ if (s_mq.nonrelocatable == 0) continue; PR_MEM("%s: %s: nonrelocatable" " span (0x%lx..0x%lx)\n", f, s_mp->sbm_cm.sbdev_path, s_mq.first_nonrelocatable, s_mq.last_nonrelocatable); } /* * Round down the starting pfn of the * nonrelocatable span on the source board * to nearest programmable boundary possible * with this target candidate. */ pfn = s_mq.first_nonrelocatable & ~t_mp->sbm_alignment_mask; /* skip candidate if memory is too small */ if (pfn + t_mp->sbm_npages < s_mq.last_nonrelocatable) continue; /* * reprogramming an mc to relocate its * active address range means the beginning * address to which the DIMMS respond will * be somewhere above the slice boundary * address. The larger the size of memory * on this unit, the more likely part of it * will exist beyond the end of the slice. * The portion of the memory that does is * unavailable to the system until the mc * reprogrammed to a more favorable base * address. * An attempt is made to avoid the loss by * recalculating the mc base address relative * to the end of the slice. This may produce * a more favorable result. If not, we lower * the board's preference rating so that it * is one the last candidate boards to be * considered. */ if ((pfn + t_mp->sbm_npages) & ~sm) { pfn_t p; ASSERT(sz >= t_mp->sbm_npages); /* * calculate an alternative starting * address relative to the end of the * slice's address space. */ p = pfn & ~sm; p = p + (sz - t_mp->sbm_npages); p = p & ~t_mp->sbm_alignment_mask; if ((p > s_mq.first_nonrelocatable) || (p + t_mp->sbm_npages < s_mq.last_nonrelocatable)) { /* * alternative starting addr * won't work. Lower preference * rating of this board, since * some number of pages will * unavailable for use. */ preference = 3; } else { dr_smt_realigned++; pfn = p; } } /* * translate calculated pfn to an offset * relative to the slice boundary. If the * candidate board is selected, this offset * will be used to calculate the values * programmed into the mc. */ t_mp->sbm_slice_offset = pfn & sm; PR_MEM("%s: %s:" " proposed mc offset 0x%lx\n", f, t_mp->sbm_cm.sbdev_path, t_mp->sbm_slice_offset); } dr_smt_preference[preference]++; /* calculate index to start of preference set */ idx = n_units_per_set * preference; /* calculate offset to respective element */ idx += t_bd * MAX_MEM_UNITS_PER_BOARD + t_unit; ASSERT(idx < n_units_per_set * n_sets); sets[idx] = t_mp; } } /* * NOTE: this would be a good place to sort each candidate * set in to some desired order, e.g. memory size in ascending * order. Without an additional sorting step here, the order * within a set is ascending board number order. */ c_mp = NULL; x_ml = NULL; t_ml = NULL; for (idx = 0; idx < n_units_per_set * n_sets; idx++) { memquery_t mq; /* cleanup t_ml after previous pass */ if (t_ml != NULL) { memlist_delete(t_ml); t_ml = NULL; } /* get candidate target board mem unit */ t_mp = sets[idx]; if (t_mp == NULL) continue; /* get target board memlist */ t_ml = dr_get_memlist(t_mp); if (t_ml == NULL) { cmn_err(CE_WARN, "%s: no memlist for" " mem-unit %d, board %d", f, t_mp->sbm_cm.sbdev_bp->b_num, t_mp->sbm_cm.sbdev_unum); continue; } /* get appropriate source board memlist */ t_phi = (t_mp->sbm_basepfn + t_mp->sbm_npages - 1) & sm; if (t_mp->sbm_npages < s_mp->sbm_npages || t_phi < s_phi) { spgcnt_t excess; /* * make a copy of the source board memlist * then edit it to remove the spans that * are outside the calculated span of * [pfn..s_mq.last_nonrelocatable]. */ if (x_ml != NULL) memlist_delete(x_ml); x_ml = memlist_dup(s_ml); if (x_ml == NULL) { PR_MEM("%s: memlist_dup failed\n", f); /* TODO: should abort */ continue; } /* trim off lower portion */ excess = t_mp->sbm_slice_offset - (s_mp->sbm_basepfn & sm); if (excess > 0) { x_ml = memlist_del_span( x_ml, _ptob64(s_mp->sbm_basepfn), _ptob64(excess)); } ASSERT(x_ml); /* * Since this candidate target board is smaller * than the source board, s_mq must have been * initialized in previous loop while processing * this or some other candidate board. * FIXME: this is weak. */ ASSERT(s_mq.phys_pages != 0); /* trim off upper portion */ excess = (s_mp->sbm_basepfn + s_mp->sbm_npages) - (s_mq.last_nonrelocatable + 1); if (excess > 0) { pfn_t p; p = s_mq.last_nonrelocatable + 1; x_ml = memlist_del_span( x_ml, _ptob64(p), _ptob64(excess)); } PR_MEM("%s: %s: edited source memlist:\n", f, s_mp->sbm_cm.sbdev_path); PR_MEMLIST_DUMP(x_ml); #ifdef DEBUG /* sanity check memlist */ d_ml = x_ml; while (d_ml->next != NULL) d_ml = d_ml->next; ASSERT(d_ml->address + d_ml->size == _ptob64(s_mq.last_nonrelocatable + 1)); #endif /* * x_ml now describes only the portion of the * source board that will be moved during the * copy/rename operation. */ d_ml = x_ml; } else { /* use original memlist; all spans will be moved */ d_ml = s_ml; } /* verify target can support source memory spans. */ if (memlist_canfit(d_ml, t_ml) == 0) { PR_MEM("%s: source memlist won't" " fit in target memlist\n", f); PR_MEM("%s: source memlist:\n", f); PR_MEMLIST_DUMP(d_ml); PR_MEM("%s: target memlist:\n", f); PR_MEMLIST_DUMP(t_ml); continue; } /* NOTE: the value of d_ml is not used beyond this point */ PR_MEM("%s: checking for no-reloc in %s, " " basepfn=0x%lx, npages=%ld\n", f, t_mp->sbm_cm.sbdev_path, t_mp->sbm_basepfn, t_mp->sbm_npages); rv = kphysm_del_span_query( t_mp->sbm_basepfn, t_mp->sbm_npages, &mq); if (rv != KPHYSM_OK) { PR_MEM("%s: kphysm_del_span_query:" " unexpected return value %d\n", f, rv); continue; } if (mq.nonrelocatable != 0) { PR_MEM("%s: candidate %s has" " nonrelocatable span [0x%lx..0x%lx]\n", f, t_mp->sbm_cm.sbdev_path, mq.first_nonrelocatable, mq.last_nonrelocatable); continue; } #ifdef DEBUG /* * This is a debug tool for excluding certain boards * from being selected as a target board candidate. * dr_ignore_board is only tested by this driver. * It must be set with adb, obp, /etc/system or your * favorite debugger. */ if (dr_ignore_board & (1 << (t_mp->sbm_cm.sbdev_bp->b_num - 1))) { PR_MEM("%s: dr_ignore_board flag set," " ignoring %s as candidate\n", f, t_mp->sbm_cm.sbdev_path); continue; } #endif /* * Reserve excess source board memory, if any. * * When the number of pages on the candidate target * board is less than the number of pages on the source, * then some spans (clearly) of the source board's address * space will not be covered by physical memory after the * copy/rename completes. The following code block * schedules those spans to be deleted. */ if (t_mp->sbm_npages < s_mp->sbm_npages || t_phi < s_phi) { pfn_t pfn; uint64_t s_del_pa; struct memlist *ml; d_ml = memlist_dup(s_ml); if (d_ml == NULL) { PR_MEM("%s: cant dup src brd memlist\n", f); /* TODO: should abort */ continue; } /* calculate base pfn relative to target board */ pfn = s_mp->sbm_basepfn & ~sm; pfn += t_mp->sbm_slice_offset; /* * cannot split dynamically added segment */ s_del_pa = _ptob64(pfn + t_mp->sbm_npages); PR_MEM("%s: proposed src delete pa=0x%lx\n", f, s_del_pa); PR_MEM("%s: checking for split of dyn seg list:\n", f); PR_MEMLIST_DUMP(s_mp->sbm_dyn_segs); for (ml = s_mp->sbm_dyn_segs; ml; ml = ml->next) { if (s_del_pa > ml->address && s_del_pa < ml->address + ml->size) { s_del_pa = ml->address; break; } } /* remove span that will reside on candidate board */ d_ml = memlist_del_span(d_ml, _ptob64(pfn), s_del_pa - _ptob64(pfn)); PR_MEM("%s: %s: reserving src brd memlist:\n", f, s_mp->sbm_cm.sbdev_path); PR_MEMLIST_DUMP(d_ml); /* reserve excess spans */ if (dr_reserve_mem_spans( &s_mp->sbm_memhandle, d_ml) != 0) { /* likely more non-reloc pages appeared */ /* TODO: restart from top? */ continue; } } else { /* no excess source board memory */ d_ml = NULL; } s_mp->sbm_flags |= DR_MFLAG_RESERVED; /* * reserve all memory on target board. * NOTE: source board's memhandle is used. * * If this succeeds (eq 0), then target selection is * complete and all unwanted memory spans, both source and * target, have been reserved. Loop is terminated. */ if (dr_reserve_mem_spans(&s_mp->sbm_memhandle, t_ml) == 0) { PR_MEM("%s: %s: target board memory reserved\n", f, t_mp->sbm_cm.sbdev_path); /* a candidate target board is now reserved */ t_mp->sbm_flags |= DR_MFLAG_RESERVED; c_mp = t_mp; /* *** EXITING LOOP *** */ break; } /* did not successfully reserve the target board. */ PR_MEM("%s: could not reserve target %s\n", f, t_mp->sbm_cm.sbdev_path); /* * NOTE: an undo of the dr_reserve_mem_span work * will happen automatically when the memhandle * (s_mp->sbm_memhandle) is kphysm_del_release'd. */ s_mp->sbm_flags &= ~DR_MFLAG_RESERVED; } /* clean up after memlist editing logic */ if (x_ml != NULL) memlist_delete(x_ml); FREESTRUCT(sets, dr_mem_unit_t *, n_units_per_set * n_sets); /* * c_mp will be NULL when the entire sets[] array * has been searched without reserving a target board. */ if (c_mp == NULL) { PR_MEM("%s: %s: target selection failed.\n", f, s_mp->sbm_cm.sbdev_path); if (t_ml != NULL) memlist_delete(t_ml); return (-1); } PR_MEM("%s: found target %s for source %s\n", f, c_mp->sbm_cm.sbdev_path, s_mp->sbm_cm.sbdev_path); s_mp->sbm_peer = c_mp; s_mp->sbm_flags |= DR_MFLAG_SOURCE; s_mp->sbm_del_mlist = d_ml; /* spans to be deleted, if any */ s_mp->sbm_mlist = s_ml; s_mp->sbm_cm.sbdev_busy = 1; c_mp->sbm_peer = s_mp; c_mp->sbm_flags |= DR_MFLAG_TARGET; c_mp->sbm_del_mlist = t_ml; /* spans to be deleted */ c_mp->sbm_mlist = t_ml; c_mp->sbm_cm.sbdev_busy = 1; s_mp->sbm_flags &= ~DR_MFLAG_MEMRESIZE; if (c_mp->sbm_npages > s_mp->sbm_npages) { s_mp->sbm_flags |= DR_MFLAG_MEMUPSIZE; PR_MEM("%s: upsize detected (source=%d < target=%d)\n", f, s_mp->sbm_npages, c_mp->sbm_npages); } else if (c_mp->sbm_npages < s_mp->sbm_npages) { s_mp->sbm_flags |= DR_MFLAG_MEMDOWNSIZE; PR_MEM("%s: downsize detected (source=%d > target=%d)\n", f, s_mp->sbm_npages, c_mp->sbm_npages); } return (0); } /* * Memlist support. */ static struct memlist * memlist_dup(struct memlist *mlist) { struct memlist *hl = NULL, *tl, **mlp; if (mlist == NULL) return (NULL); mlp = &hl; tl = *mlp; for (; mlist; mlist = mlist->next) { *mlp = GETSTRUCT(struct memlist, 1); (*mlp)->address = mlist->address; (*mlp)->size = mlist->size; (*mlp)->prev = tl; tl = *mlp; mlp = &((*mlp)->next); } *mlp = NULL; return (hl); } /* * Determine whether the source memlist (s_mlist) will * fit into the target memlist (t_mlist) in terms of * size and holes (i.e. based on same relative base address). */ static int memlist_canfit(struct memlist *s_mlist, struct memlist *t_mlist) { int rv = 0; uint64_t s_basepa, t_basepa; struct memlist *s_ml, *t_ml; if ((s_mlist == NULL) || (t_mlist == NULL)) return (0); /* * Base both memlists on common base address (0). */ s_basepa = s_mlist->address; t_basepa = t_mlist->address; for (s_ml = s_mlist; s_ml; s_ml = s_ml->next) s_ml->address -= s_basepa; for (t_ml = t_mlist; t_ml; t_ml = t_ml->next) t_ml->address -= t_basepa; s_ml = s_mlist; for (t_ml = t_mlist; t_ml && s_ml; t_ml = t_ml->next) { uint64_t s_start, s_end; uint64_t t_start, t_end; t_start = t_ml->address; t_end = t_start + t_ml->size; for (; s_ml; s_ml = s_ml->next) { s_start = s_ml->address; s_end = s_start + s_ml->size; if ((s_start < t_start) || (s_end > t_end)) break; } } /* * If we ran out of source memlist chunks that mean * we found a home for all of them. */ if (s_ml == NULL) rv = 1; /* * Need to add base addresses back since memlists * are probably in use by caller. */ for (s_ml = s_mlist; s_ml; s_ml = s_ml->next) s_ml->address += s_basepa; for (t_ml = t_mlist; t_ml; t_ml = t_ml->next) t_ml->address += t_basepa; return (rv); } static struct memlist * memlist_del_span(struct memlist *mlist, uint64_t base, uint64_t len) { uint64_t end; struct memlist *ml, *tl, *nlp; if (mlist == NULL) return (NULL); end = base + len; if ((end <= mlist->address) || (base == end)) return (mlist); for (tl = ml = mlist; ml; tl = ml, ml = nlp) { uint64_t mend; nlp = ml->next; if (end <= ml->address) break; mend = ml->address + ml->size; if (base < mend) { if (base <= ml->address) { ml->address = end; if (end >= mend) ml->size = 0ull; else ml->size = mend - ml->address; } else { ml->size = base - ml->address; if (end < mend) { struct memlist *nl; /* * splitting an memlist entry. */ nl = GETSTRUCT(struct memlist, 1); nl->address = end; nl->size = mend - nl->address; if ((nl->next = nlp) != NULL) nlp->prev = nl; nl->prev = ml; ml->next = nl; nlp = nl; } } if (ml->size == 0ull) { if (ml == mlist) { if ((mlist = nlp) != NULL) nlp->prev = NULL; FREESTRUCT(ml, struct memlist, 1); if (mlist == NULL) break; ml = nlp; } else { if ((tl->next = nlp) != NULL) nlp->prev = tl; FREESTRUCT(ml, struct memlist, 1); ml = tl; } } } } return (mlist); } /* * add span without merging */ static struct memlist * memlist_cat_span(struct memlist *mlist, uint64_t base, uint64_t len) { struct memlist *ml, *tl, *nl; if (len == 0ull) return (NULL); if (mlist == NULL) { mlist = GETSTRUCT(struct memlist, 1); mlist->address = base; mlist->size = len; mlist->next = mlist->prev = NULL; return (mlist); } for (tl = ml = mlist; ml; tl = ml, ml = ml->next) { if (base < ml->address) { nl = GETSTRUCT(struct memlist, 1); nl->address = base; nl->size = len; nl->next = ml; if ((nl->prev = ml->prev) != NULL) nl->prev->next = nl; ml->prev = nl; if (mlist == ml) mlist = nl; break; } } if (ml == NULL) { nl = GETSTRUCT(struct memlist, 1); nl->address = base; nl->size = len; nl->next = NULL; nl->prev = tl; tl->next = nl; } return (mlist); }