/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. */ /* * sun4v Memory DR Module */ #include <sys/types.h> #include <sys/cmn_err.h> #include <sys/vmem.h> #include <sys/kmem.h> #include <sys/systm.h> #include <sys/machsystm.h> /* for page_freelist_coalesce() */ #include <sys/errno.h> #include <sys/memnode.h> #include <sys/memlist.h> #include <sys/memlist_impl.h> #include <sys/tuneable.h> #include <sys/proc.h> #include <sys/disp.h> #include <sys/debug.h> #include <sys/vm.h> #include <sys/callb.h> #include <sys/memlist_plat.h> /* for installed_top_size() */ #include <sys/condvar_impl.h> /* for CV_HAS_WAITERS() */ #include <sys/dumphdr.h> /* for dump_resize() */ #include <sys/atomic.h> /* for use in stats collection */ #include <sys/rwlock.h> #include <vm/seg_kmem.h> #include <vm/seg_kpm.h> #include <vm/page.h> #include <vm/vm_dep.h> #define SUNDDI_IMPL /* so sunddi.h will not redefine splx() et al */ #include <sys/sunddi.h> #include <sys/mem_config.h> #include <sys/mem_cage.h> #include <sys/lgrp.h> #include <sys/ddi.h> #include <sys/modctl.h> #include <sys/sysevent/dr.h> #include <sys/mach_descrip.h> #include <sys/mdesc.h> #include <sys/ds.h> #include <sys/drctl.h> #include <sys/dr_util.h> #include <sys/dr_mem.h> #include <sys/suspend.h> /* * DR operations are subject to Memory Alignment restrictions * for both address and the size of the request. */ #define MA_ADDR 0x10000000 /* addr alignment 256M */ #define MA_SIZE 0x10000000 /* size alignment 256M */ #define MBLK_IS_VALID(m) \ (IS_P2ALIGNED((m)->addr, MA_ADDR) && IS_P2ALIGNED((m)->size, MA_SIZE)) static memhandle_t dr_mh; /* memory handle for delete */ static struct modlmisc modlmisc = { &mod_miscops, "sun4v memory DR" }; static struct modlinkage modlinkage = { MODREV_1, (void *)&modlmisc, NULL }; static int dr_mem_allow_unload = 0; typedef int (*fn_t)(dr_mem_blk_t *, int *); /* * Global Domain Services (DS) Handle */ static ds_svc_hdl_t ds_handle; /* * Supported DS Capability Versions */ static ds_ver_t dr_mem_vers[] = { { 1, 0 } }; #define DR_MEM_NVERS (sizeof (dr_mem_vers) / sizeof (dr_mem_vers[0])) /* * DS Capability Description */ static ds_capability_t dr_mem_cap = { DR_MEM_DS_ID, /* svc_id */ dr_mem_vers, /* vers */ DR_MEM_NVERS /* nvers */ }; /* * DS Callbacks */ static void dr_mem_reg_handler(ds_cb_arg_t, ds_ver_t *, ds_svc_hdl_t); static void dr_mem_unreg_handler(ds_cb_arg_t arg); static void dr_mem_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen); /* * DS Client Ops Vector */ static ds_clnt_ops_t dr_mem_ops = { dr_mem_reg_handler, /* ds_reg_cb */ dr_mem_unreg_handler, /* ds_unreg_cb */ dr_mem_data_handler, /* ds_data_cb */ NULL /* cb_arg */ }; /* * Operation Results * * Used internally to gather results while an operation on a * list of mblks is in progress. In particular, it is used to * keep track of which mblks have already failed so that they are * not processed further, and the manner in which they failed. */ typedef struct { uint64_t addr; uint64_t size; uint32_t result; uint32_t status; char *string; } dr_mem_res_t; static char * dr_mem_estr[] = { "operation succeeded", /* DR_MEM_RES_OK */ "operation failed", /* DR_MEM_RES_FAILURE */ "operation was blocked", /* DR_MEM_RES_BLOCKED */ "memory not defined in MD", /* DR_MEM_RES_NOT_IN_MD */ "memory already in use", /* DR_MEM_RES_ESPAN */ "memory access test failed", /* DR_MEM_RES_EFAULT */ "resource not available", /* DR_MEM_RES_ERESOURCE */ "permanent pages in span", /* DR_MEM_RES_PERM */ "memory span busy", /* DR_MEM_RES_EBUSY */ "VM viability test failed", /* DR_MEM_RES_ENOTVIABLE */ "no pages to unconfigure", /* DR_MEM_RES_ENOWORK */ "operation cancelled", /* DR_MEM_RES_ECANCELLED */ "operation refused", /* DR_MEM_RES_EREFUSED */ "memory span duplicate", /* DR_MEM_RES_EDUP */ "invalid argument" /* DR_MEM_RES_EINVAL */ }; static char * dr_mem_estr_detail[] = { "", /* DR_MEM_SRES_NONE */ "memory DR disabled after migration" /* DR_MEM_SRES_OS_SUSPENDED */ }; typedef struct { kcondvar_t cond; kmutex_t lock; int error; int done; } mem_sync_t; /* * Internal Functions */ static int dr_mem_init(void); static int dr_mem_fini(void); static int dr_mem_list_wrk(dr_mem_hdr_t *, dr_mem_hdr_t **, int *); static int dr_mem_list_query(dr_mem_hdr_t *, dr_mem_hdr_t **, int *); static int dr_mem_del_stat(dr_mem_hdr_t *, dr_mem_hdr_t **, int *); static int dr_mem_del_cancel(dr_mem_hdr_t *, dr_mem_hdr_t **, int *); static int dr_mem_unconfigure(dr_mem_blk_t *, int *); static int dr_mem_configure(dr_mem_blk_t *, int *); static void dr_mem_query(dr_mem_blk_t *, dr_mem_query_t *); static dr_mem_res_t *dr_mem_res_array_init(dr_mem_hdr_t *, drctl_rsrc_t *, int); static void dr_mem_res_array_fini(dr_mem_res_t *res, int nres); static size_t dr_mem_pack_response(dr_mem_hdr_t *req, dr_mem_res_t *res, dr_mem_hdr_t **respp); static int dr_mem_find(dr_mem_blk_t *mbp); static mde_cookie_t dr_mem_find_node_md(dr_mem_blk_t *, md_t *, mde_cookie_t *); static int mem_add(pfn_t, pgcnt_t); static int mem_del(pfn_t, pgcnt_t); extern int kphysm_add_memory_dynamic(pfn_t, pgcnt_t); int _init(void) { int status; /* check that Memory DR is enabled */ if (dr_is_disabled(DR_TYPE_MEM)) return (ENOTSUP); if ((status = dr_mem_init()) != 0) { cmn_err(CE_NOTE, "Memory DR initialization failed"); return (status); } if ((status = mod_install(&modlinkage)) != 0) { (void) dr_mem_fini(); } return (status); } int _info(struct modinfo *modinfop) { return (mod_info(&modlinkage, modinfop)); } int _fini(void) { int status; if (dr_mem_allow_unload == 0) return (EBUSY); if ((status = mod_remove(&modlinkage)) == 0) { (void) dr_mem_fini(); } return (status); } static int dr_mem_init(void) { int rv; if ((rv = ds_cap_init(&dr_mem_cap, &dr_mem_ops)) != 0) { cmn_err(CE_NOTE, "dr_mem: ds_cap_init failed: %d", rv); return (rv); } return (0); } static int dr_mem_fini(void) { int rv; if ((rv = ds_cap_fini(&dr_mem_cap)) != 0) { cmn_err(CE_NOTE, "dr_mem: ds_cap_fini failed: %d", rv); } return (rv); } static void dr_mem_reg_handler(ds_cb_arg_t arg, ds_ver_t *ver, ds_svc_hdl_t hdl) { DR_DBG_MEM("reg_handler: arg=0x%p, ver=%d.%d, hdl=0x%lx\n", arg, ver->major, ver->minor, hdl); ds_handle = hdl; } static void dr_mem_unreg_handler(ds_cb_arg_t arg) { DR_DBG_MEM("unreg_handler: arg=0x%p\n", arg); ds_handle = DS_INVALID_HDL; } /*ARGSUSED*/ static void dr_mem_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen) { dr_mem_hdr_t *req = buf; dr_mem_hdr_t err_resp; dr_mem_hdr_t *resp = &err_resp; int resp_len = 0; int rv = EINVAL; /* * Sanity check the message */ if (buflen < sizeof (dr_mem_hdr_t)) { DR_DBG_MEM("incoming message short: expected at least %ld " "bytes, received %ld\n", sizeof (dr_mem_hdr_t), buflen); goto done; } if (req == NULL) { DR_DBG_MEM("empty message: expected at least %ld bytes\n", sizeof (dr_mem_hdr_t)); goto done; } DR_DBG_MEM("incoming request:\n"); DR_DBG_DUMP_MSG(buf, buflen); /* * Process the command */ switch (req->msg_type) { case DR_MEM_CONFIGURE: case DR_MEM_UNCONFIGURE: if (req->msg_arg == 0) { DR_DBG_MEM("No mblks specified for operation\n"); goto done; } if ((rv = dr_mem_list_wrk(req, &resp, &resp_len)) != 0) { DR_DBG_MEM("%s failed (%d)\n", (req->msg_type == DR_MEM_CONFIGURE) ? "Memory configure" : "Memory unconfigure", rv); } break; case DR_MEM_UNCONF_STATUS: if ((rv = dr_mem_del_stat(req, &resp, &resp_len)) != 0) DR_DBG_MEM("Memory delete status failed (%d)\n", rv); break; case DR_MEM_UNCONF_CANCEL: if ((rv = dr_mem_del_cancel(req, &resp, &resp_len)) != 0) DR_DBG_MEM("Memory delete cancel failed (%d)\n", rv); break; case DR_MEM_QUERY: if (req->msg_arg == 0) { DR_DBG_MEM("No mblks specified for operation\n"); goto done; } if ((rv = dr_mem_list_query(req, &resp, &resp_len)) != 0) DR_DBG_MEM("Memory query failed (%d)\n", rv); break; default: cmn_err(CE_NOTE, "unsupported memory DR operation (%d)", req->msg_type); break; } done: /* check if an error occurred */ if (resp == &err_resp) { resp->req_num = (req) ? req->req_num : 0; resp->msg_type = DR_MEM_ERROR; resp->msg_arg = rv; resp_len = sizeof (dr_mem_hdr_t); } DR_DBG_MEM("outgoing response:\n"); DR_DBG_DUMP_MSG(resp, resp_len); /* send back the response */ if (ds_cap_send(ds_handle, resp, resp_len) != 0) { DR_DBG_MEM("ds_send failed\n"); } /* free any allocated memory */ if (resp != &err_resp) { kmem_free(resp, resp_len); } } static char * dr_mem_get_errstr(int result, int subresult) { size_t len; char *errstr; const char *separator = ": "; if (subresult == DR_MEM_SRES_NONE) return (i_ddi_strdup(dr_mem_estr[result], KM_SLEEP)); len = snprintf(NULL, 0, "%s%s%s", dr_mem_estr[result], separator, dr_mem_estr_detail[subresult]) + 1; errstr = kmem_alloc(len, KM_SLEEP); (void) snprintf(errstr, len, "%s%s%s", dr_mem_estr[result], separator, dr_mem_estr_detail[subresult]); return (errstr); } /* * Common routine to config or unconfig multiple mblks. * * Note: Do not modify result buffer or length on error. */ static int dr_mem_list_wrk(dr_mem_hdr_t *req, dr_mem_hdr_t **resp, int *resp_len) { int rv; int idx; int count; int result; int subresult; int status; boolean_t suspend_allows_dr; fn_t dr_fn; int se_hint; dr_mem_blk_t *req_mblks; dr_mem_res_t *res; int drctl_cmd; int drctl_flags = 0; drctl_rsrc_t *drctl_req; size_t drctl_req_len; drctl_resp_t *drctl_resp; drctl_rsrc_t *drctl_rsrc; size_t drctl_resp_len = 0; drctl_cookie_t drctl_res_ck; ASSERT((req != NULL) && (req->msg_arg != 0)); count = req->msg_arg; /* * Extract all information that is specific * to the various types of operations. */ switch (req->msg_type) { case DR_MEM_CONFIGURE: dr_fn = dr_mem_configure; drctl_cmd = DRCTL_MEM_CONFIG_REQUEST; se_hint = SE_HINT_INSERT; break; case DR_MEM_UNCONFIGURE: dr_fn = dr_mem_unconfigure; drctl_cmd = DRCTL_MEM_UNCONFIG_REQUEST; se_hint = SE_HINT_REMOVE; break; default: /* Programming error if we reach this. */ cmn_err(CE_NOTE, "%s: bad msg_type %d\n", __func__, req->msg_type); ASSERT(0); return (-1); } /* the incoming array of mblks to operate on */ req_mblks = DR_MEM_CMD_MBLKS(req); /* allocate drctl request msg based on incoming resource count */ drctl_req_len = sizeof (drctl_rsrc_t) * count; drctl_req = kmem_zalloc(drctl_req_len, KM_SLEEP); /* copy the size for the drctl call from the incoming request msg */ for (idx = 0; idx < count; idx++) { drctl_req[idx].res_mem_addr = req_mblks[idx].addr; drctl_req[idx].res_mem_size = req_mblks[idx].size; } rv = drctl_config_init(drctl_cmd, drctl_flags, drctl_req, count, &drctl_resp, &drctl_resp_len, &drctl_res_ck); ASSERT((drctl_resp != NULL) && (drctl_resp_len != 0)); if (rv != 0) { DR_DBG_MEM("%s: drctl_config_init returned: %d\n", __func__, rv); kmem_free(drctl_resp, drctl_resp_len); kmem_free(drctl_req, drctl_req_len); return (rv); } ASSERT(drctl_resp->resp_type == DRCTL_RESP_OK); drctl_rsrc = drctl_resp->resp_resources; /* create the result scratch array */ res = dr_mem_res_array_init(req, drctl_rsrc, count); /* * Memory DR operations are not safe if we have been suspended and * resumed. Until this limitation is lifted, check to see if memory * DR operations are permitted at this time by the suspend subsystem. */ if ((suspend_allows_dr = suspend_memdr_allowed()) == B_FALSE) { result = DR_MEM_RES_BLOCKED; subresult = DR_MEM_SRES_OS_SUSPENDED; } else { subresult = DR_MEM_SRES_NONE; } /* perform the specified operation on each of the mblks */ for (idx = 0; idx < count; idx++) { /* * If no action will be taken against the current * mblk, update the drctl resource information to * ensure that it gets recovered properly during * the drctl fini() call. */ if (res[idx].result != DR_MEM_RES_OK) { drctl_req[idx].status = DRCTL_STATUS_CONFIG_FAILURE; continue; } /* * If memory DR operations are permitted at this time by * the suspend subsystem, call the function to perform the * operation, otherwise return a result indicating that the * operation was blocked. */ if (suspend_allows_dr) result = (*dr_fn)(&req_mblks[idx], &status); /* save off results of the operation */ res[idx].result = result; res[idx].status = status; res[idx].addr = req_mblks[idx].addr; /* for partial case */ res[idx].size = req_mblks[idx].size; /* for partial case */ res[idx].string = dr_mem_get_errstr(result, subresult); /* save result for drctl fini() reusing init() msg memory */ drctl_req[idx].status = (result != DR_MEM_RES_OK) ? DRCTL_STATUS_CONFIG_FAILURE : DRCTL_STATUS_CONFIG_SUCCESS; DR_DBG_MEM("%s: mblk 0x%lx.0x%lx stat %d result %d off '%s'\n", __func__, req_mblks[idx].addr, req_mblks[idx].size, drctl_req[idx].status, result, (res[idx].string) ? res[idx].string : ""); } if ((rv = drctl_config_fini(&drctl_res_ck, drctl_req, count)) != 0) DR_DBG_MEM("%s: drctl_config_fini returned: %d\n", __func__, rv); /* * Operation completed without any fatal errors. * Pack the response for transmission. */ *resp_len = dr_mem_pack_response(req, res, resp); /* notify interested parties about the operation */ dr_generate_event(DR_TYPE_MEM, se_hint); /* * Deallocate any scratch memory. */ kmem_free(drctl_resp, drctl_resp_len); kmem_free(drctl_req, drctl_req_len); dr_mem_res_array_fini(res, count); return (0); } /* * Allocate and initialize a result array based on the initial * drctl operation. A valid result array is always returned. */ static dr_mem_res_t * dr_mem_res_array_init(dr_mem_hdr_t *req, drctl_rsrc_t *rsrc, int nrsrc) { int idx; dr_mem_res_t *res; char *err_str; size_t err_len; /* allocate zero filled buffer to initialize fields */ res = kmem_zalloc(nrsrc * sizeof (dr_mem_res_t), KM_SLEEP); /* * Fill in the result information for each resource. */ for (idx = 0; idx < nrsrc; idx++) { res[idx].addr = rsrc[idx].res_mem_addr; res[idx].size = rsrc[idx].res_mem_size; res[idx].result = DR_MEM_RES_OK; if (rsrc[idx].status == DRCTL_STATUS_ALLOW) continue; /* * Update the state information for this mblk. */ res[idx].result = DR_MEM_RES_BLOCKED; res[idx].status = (req->msg_type == DR_MEM_CONFIGURE) ? DR_MEM_STAT_UNCONFIGURED : DR_MEM_STAT_CONFIGURED; /* * If an error string exists, copy it out of the * message buffer. This eliminates any dependency * on the memory allocated for the message buffer * itself. */ if (rsrc[idx].offset != NULL) { err_str = (char *)rsrc + rsrc[idx].offset; err_len = strlen(err_str) + 1; res[idx].string = kmem_alloc(err_len, KM_SLEEP); bcopy(err_str, res[idx].string, err_len); } } return (res); } static void dr_mem_res_array_fini(dr_mem_res_t *res, int nres) { int idx; size_t str_len; for (idx = 0; idx < nres; idx++) { /* deallocate the error string if present */ if (res[idx].string) { str_len = strlen(res[idx].string) + 1; kmem_free(res[idx].string, str_len); } } /* deallocate the result array itself */ kmem_free(res, sizeof (dr_mem_res_t) * nres); } /* * Allocate and pack a response message for transmission based * on the specified result array. A valid response message and * valid size information is always returned. */ static size_t dr_mem_pack_response(dr_mem_hdr_t *req, dr_mem_res_t *res, dr_mem_hdr_t **respp) { int idx; dr_mem_hdr_t *resp; dr_mem_stat_t *resp_stat; size_t resp_len; uint32_t curr_off; caddr_t curr_str; size_t str_len; size_t stat_len; int nstat = req->msg_arg; /* * Calculate the size of the response message * and allocate an appropriately sized buffer. */ resp_len = sizeof (dr_mem_hdr_t); /* add the stat array size */ stat_len = sizeof (dr_mem_stat_t) * nstat; resp_len += stat_len; /* add the size of any error strings */ for (idx = 0; idx < nstat; idx++) { if (res[idx].string != NULL) { resp_len += strlen(res[idx].string) + 1; } } /* allocate the message buffer */ resp = kmem_zalloc(resp_len, KM_SLEEP); /* * Fill in the header information. */ resp->req_num = req->req_num; resp->msg_type = DR_MEM_OK; resp->msg_arg = nstat; /* * Fill in the stat information. */ resp_stat = DR_MEM_RESP_STATS(resp); /* string offsets start immediately after stat array */ curr_off = sizeof (dr_mem_hdr_t) + stat_len; curr_str = (char *)resp_stat + stat_len; for (idx = 0; idx < nstat; idx++) { resp_stat[idx].addr = res[idx].addr; resp_stat[idx].size = res[idx].size; resp_stat[idx].result = res[idx].result; resp_stat[idx].status = res[idx].status; if (res[idx].string != NULL) { /* copy over the error string */ str_len = strlen(res[idx].string) + 1; bcopy(res[idx].string, curr_str, str_len); resp_stat[idx].string_off = curr_off; curr_off += str_len; curr_str += str_len; } } /* buffer should be exactly filled */ ASSERT(curr_off == resp_len); *respp = resp; return (resp_len); } static void dr_mem_query(dr_mem_blk_t *mbp, dr_mem_query_t *mqp) { memquery_t mq; DR_DBG_MEM("dr_mem_query...\n"); (void) kphysm_del_span_query(btop(mbp->addr), btop(mbp->size), &mq); if (!mq.phys_pages) return; mqp->addr = mbp->addr; mqp->mq.phys_pages = ptob(mq.phys_pages); mqp->mq.managed = ptob(mq.managed); mqp->mq.nonrelocatable = ptob(mq.nonrelocatable); mqp->mq.first_nonrelocatable = ptob(mq.first_nonrelocatable); mqp->mq.last_nonrelocatable = ptob(mq.last_nonrelocatable); /* * Set to the max byte offset within the page. */ if (mqp->mq.nonrelocatable) mqp->mq.last_nonrelocatable += PAGESIZE - 1; } /* * Do not modify result buffer or length on error. */ static int dr_mem_list_query(dr_mem_hdr_t *req, dr_mem_hdr_t **resp, int *resp_len) { int idx; int rlen; int nml; struct memlist *ml; struct memlist *phys_copy = NULL; dr_mem_blk_t *req_mblks, mb; dr_mem_hdr_t *rp; dr_mem_query_t *stat; drctl_block(); /* the incoming array of req_mblks to configure */ req_mblks = DR_MEM_CMD_MBLKS(req); /* allocate a response message, should be freed by caller */ nml = 0; rlen = sizeof (dr_mem_hdr_t); if (req_mblks->addr == NULL && req_mblks->size == 0) { /* * Request is for domain's full view of it's memory. * place a copy in phys_copy then release the memlist lock. */ memlist_read_lock(); phys_copy = dr_memlist_dup(phys_install); memlist_read_unlock(); for (ml = phys_copy; ml; ml = ml->ml_next) nml++; rlen += nml * sizeof (dr_mem_query_t); } else { rlen += req->msg_arg * sizeof (dr_mem_query_t); } rp = kmem_zalloc(rlen, KM_SLEEP); /* fill in the known data */ rp->req_num = req->req_num; rp->msg_type = DR_MEM_OK; rp->msg_arg = nml ? nml : req->msg_arg; /* stat array for the response */ stat = DR_MEM_RESP_QUERY(rp); /* get the status for each of the mblocks */ if (nml) { for (idx = 0, ml = phys_copy; ml; ml = ml->ml_next, idx++) { mb.addr = ml->ml_address; mb.size = ml->ml_size; dr_mem_query(&mb, &stat[idx]); } } else { for (idx = 0; idx < req->msg_arg; idx++) dr_mem_query(&req_mblks[idx], &stat[idx]); } *resp = rp; *resp_len = rlen; if (phys_copy != NULL) { dr_memlist_delete(phys_copy); } drctl_unblock(); return (0); } static int cvt_err(int err) { int rv; switch (err) { case KPHYSM_OK: rv = DR_MEM_RES_OK; break; case KPHYSM_ESPAN: rv = DR_MEM_RES_ESPAN; break; case KPHYSM_EFAULT: rv = DR_MEM_RES_EFAULT; break; case KPHYSM_ERESOURCE: rv = DR_MEM_RES_ERESOURCE; break; case KPHYSM_ENOTSUP: case KPHYSM_ENOHANDLES: rv = DR_MEM_RES_FAILURE; break; case KPHYSM_ENONRELOC: rv = DR_MEM_RES_PERM; break; case KPHYSM_EHANDLE: rv = DR_MEM_RES_FAILURE; break; case KPHYSM_EBUSY: rv = DR_MEM_RES_EBUSY; break; case KPHYSM_ENOTVIABLE: rv = DR_MEM_RES_ENOTVIABLE; break; case KPHYSM_ESEQUENCE: rv = DR_MEM_RES_FAILURE; break; case KPHYSM_ENOWORK: rv = DR_MEM_RES_ENOWORK; break; case KPHYSM_ECANCELLED: rv = DR_MEM_RES_ECANCELLED; break; case KPHYSM_EREFUSED: rv = DR_MEM_RES_EREFUSED; break; case KPHYSM_ENOTFINISHED: case KPHYSM_ENOTRUNNING: rv = DR_MEM_RES_FAILURE; break; case KPHYSM_EDUP: rv = DR_MEM_RES_EDUP; break; default: rv = DR_MEM_RES_FAILURE; break; } return (rv); } static int dr_mem_configure(dr_mem_blk_t *mbp, int *status) { int rv; uint64_t addr, size; rv = 0; addr = mbp->addr; size = mbp->size; DR_DBG_MEM("dr_mem_configure...\n"); if (!MBLK_IS_VALID(mbp)) { DR_DBG_MEM("invalid mblk 0x%lx.0x%lx\n", addr, size); *status = DR_MEM_STAT_UNCONFIGURED; rv = DR_MEM_RES_EINVAL; } else if (rv = dr_mem_find(mbp)) { DR_DBG_MEM("failed to find mblk 0x%lx.0x%lx (%d)\n", addr, size, rv); if (rv == EINVAL) { *status = DR_MEM_STAT_NOT_PRESENT; rv = DR_MEM_RES_NOT_IN_MD; } else { *status = DR_MEM_STAT_UNCONFIGURED; rv = DR_MEM_RES_FAILURE; } } else { rv = mem_add(btop(addr), btop(size)); DR_DBG_MEM("addr=0x%lx size=0x%lx rv=%d\n", addr, size, rv); if (rv) { *status = DR_MEM_STAT_UNCONFIGURED; } else { *status = DR_MEM_STAT_CONFIGURED; } } return (rv); } static int dr_mem_unconfigure(dr_mem_blk_t *mbp, int *status) { int rv; DR_DBG_MEM("dr_mem_unconfigure...\n"); if (!MBLK_IS_VALID(mbp)) { DR_DBG_MEM("invalid mblk 0x%lx.0x%lx\n", mbp->addr, mbp->size); *status = DR_MEM_STAT_CONFIGURED; rv = DR_MEM_RES_EINVAL; } else if (rv = mem_del(btop(mbp->addr), btop(mbp->size))) { *status = DR_MEM_STAT_CONFIGURED; } else { *status = DR_MEM_STAT_UNCONFIGURED; rv = DR_MEM_RES_OK; DR_DBG_MEM("mblk 0x%lx.0x%lx unconfigured\n", mbp->addr, mbp->size); } return (rv); } static int dr_mem_del_stat(dr_mem_hdr_t *req, dr_mem_hdr_t **resp, int *resp_len) { int status; int rlen; memdelstat_t del_stat, *stat; dr_mem_hdr_t *rp; /* * If a mem delete is in progress, get its status. */ status = (dr_mh && (kphysm_del_status(dr_mh, &del_stat) == KPHYSM_OK)); /* allocate a response message, should be freed by caller */ rlen = sizeof (dr_mem_hdr_t); rlen += status * sizeof (memdelstat_t); rp = kmem_zalloc(rlen, KM_SLEEP); /* fill in the known data */ rp->req_num = req->req_num; rp->msg_type = DR_MEM_OK; rp->msg_arg = status; if (status) { /* stat struct for the response */ stat = DR_MEM_RESP_DEL_STAT(rp); stat->phys_pages = ptob(del_stat.phys_pages); stat->managed = ptob(del_stat.managed); stat->collected = ptob(del_stat.collected); } *resp = rp; *resp_len = rlen; return (0); } static int dr_mem_del_cancel(dr_mem_hdr_t *req, dr_mem_hdr_t **resp, int *resp_len) { int rlen; dr_mem_hdr_t *rp; /* allocate a response message, should be freed by caller */ rlen = sizeof (dr_mem_hdr_t); rp = kmem_zalloc(rlen, KM_SLEEP); /* fill in the known data */ rp->req_num = req->req_num; rp->msg_type = DR_MEM_OK; rp->msg_arg = (dr_mh && kphysm_del_cancel(dr_mh) != KPHYSM_OK) ? DR_MEM_RES_EINVAL : DR_MEM_RES_OK; *resp = rp; *resp_len = rlen; return (0); } static int dr_mem_find(dr_mem_blk_t *mbp) { md_t *mdp = NULL; int num_nodes; int rv = 0; int listsz; mde_cookie_t *listp = NULL; mde_cookie_t memnode; char *found = "found"; if ((mdp = md_get_handle()) == NULL) { DR_DBG_MEM("unable to initialize machine description\n"); return (-1); } num_nodes = md_node_count(mdp); ASSERT(num_nodes > 0); listsz = num_nodes * sizeof (mde_cookie_t); listp = kmem_zalloc(listsz, KM_SLEEP); memnode = dr_mem_find_node_md(mbp, mdp, listp); if (memnode == MDE_INVAL_ELEM_COOKIE) { rv = EINVAL; found = "not found"; } DR_DBG_MEM("mblk 0x%lx.0x%lx %s\n", mbp->addr, mbp->size, found); kmem_free(listp, listsz); (void) md_fini_handle(mdp); return (rv); } /* * Look up a particular mblk in the MD. Returns the mde_cookie_t * representing that mblk if present, and MDE_INVAL_ELEM_COOKIE * otherwise. It is assumed the scratch array has already been * allocated so that it can accommodate the worst case scenario, * every node in the MD. */ static mde_cookie_t dr_mem_find_node_md(dr_mem_blk_t *mbp, md_t *mdp, mde_cookie_t *listp) { int idx; int nnodes; mde_cookie_t rootnode; uint64_t base_prop; uint64_t size_prop; mde_cookie_t result = MDE_INVAL_ELEM_COOKIE; rootnode = md_root_node(mdp); ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE); /* * Scan the DAG for all the mem nodes */ nnodes = md_scan_dag(mdp, rootnode, md_find_name(mdp, "mblock"), md_find_name(mdp, "fwd"), listp); if (nnodes < 0) { DR_DBG_MEM("Scan for mblks failed\n"); return (result); } DR_DBG_MEM("dr_mem_find_node_md: found %d mblks in the MD\n", nnodes); /* * Find the mblk of interest */ for (idx = 0; idx < nnodes; idx++) { if (md_get_prop_val(mdp, listp[idx], "base", &base_prop)) { DR_DBG_MEM("Missing 'base' property for mblk node %d\n", idx); break; } if (md_get_prop_val(mdp, listp[idx], "size", &size_prop)) { DR_DBG_MEM("Missing 'size' property for mblk node %d\n", idx); break; } if (base_prop <= mbp->addr && (base_prop + size_prop) >= (mbp->addr + mbp->size)) { /* found a match */ DR_DBG_MEM("dr_mem_find_node_md: found mblk " "0x%lx.0x%lx in MD\n", mbp->addr, mbp->size); result = listp[idx]; break; } } if (result == MDE_INVAL_ELEM_COOKIE) { DR_DBG_MEM("mblk 0x%lx.0x%lx not in MD\n", mbp->addr, mbp->size); } return (result); } static int mem_add(pfn_t base, pgcnt_t npgs) { int rv, rc; DR_DBG_MEM("%s: begin base=0x%lx npgs=0x%lx\n", __func__, base, npgs); if (npgs == 0) return (DR_MEM_RES_OK); rv = kphysm_add_memory_dynamic(base, npgs); DR_DBG_MEM("%s: kphysm_add(0x%lx, 0x%lx) = %d", __func__, base, npgs, rv); if (rv == KPHYSM_OK) { if (rc = kcage_range_add(base, npgs, KCAGE_DOWN)) cmn_err(CE_WARN, "kcage_range_add() = %d", rc); } rv = cvt_err(rv); return (rv); } static void del_done(void *arg, int error) { mem_sync_t *ms = arg; mutex_enter(&ms->lock); ms->error = error; ms->done = 1; cv_signal(&ms->cond); mutex_exit(&ms->lock); } static int mem_del(pfn_t base, pgcnt_t npgs) { int rv, err, del_range = 0; int convert = 1; mem_sync_t ms; memquery_t mq; memhandle_t mh; struct memlist *ml; struct memlist *d_ml = NULL; DR_DBG_MEM("%s: begin base=0x%lx npgs=0x%lx\n", __func__, base, npgs); if (npgs == 0) return (DR_MEM_RES_OK); if ((rv = kphysm_del_gethandle(&mh)) != KPHYSM_OK) { cmn_err(CE_WARN, "%s: del_gethandle() = %d", __func__, rv); rv = cvt_err(rv); return (rv); } if ((rv = kphysm_del_span_query(base, npgs, &mq)) != KPHYSM_OK) { cmn_err(CE_WARN, "%s: del_span_query() = %d", __func__, rv); goto done; } if (mq.nonrelocatable) { DR_DBG_MEM("%s: non-reloc pages = %ld", __func__, mq.nonrelocatable); rv = KPHYSM_ENONRELOC; goto done; } if (rv = kcage_range_delete(base, npgs)) { switch (rv) { case EBUSY: rv = DR_MEM_RES_ENOTVIABLE; break; default: rv = DR_MEM_RES_FAILURE; break; } convert = 0; /* conversion done */ cmn_err(CE_WARN, "%s: del_range() = %d", __func__, rv); goto done; } else { del_range++; } if ((rv = kphysm_del_span(mh, base, npgs)) != KPHYSM_OK) { cmn_err(CE_WARN, "%s: del_span() = %d", __func__, rv); goto done; } if ((rv = memlist_add_span(ptob(base), ptob(npgs), &d_ml)) != MEML_SPANOP_OK) { switch (rv) { case MEML_SPANOP_ESPAN: rv = DR_MEM_RES_ESPAN; break; case MEML_SPANOP_EALLOC: rv = DR_MEM_RES_ERESOURCE; break; default: rv = DR_MEM_RES_FAILURE; break; } convert = 0; /* conversion done */ cmn_err(CE_WARN, "%s: add_span() = %d", __func__, rv); goto done; } DR_DBG_MEM("%s: reserved=0x%lx", __func__, npgs); bzero((void *) &ms, sizeof (ms)); mutex_init(&ms.lock, NULL, MUTEX_DRIVER, NULL); cv_init(&ms.cond, NULL, CV_DRIVER, NULL); mutex_enter(&ms.lock); if ((rv = kphysm_del_start(mh, del_done, (void *) &ms)) == KPHYSM_OK) { /* * Since we've called drctl_config_init, we are the only * DR ctl operation in progress. Set dr_mh to the * delete memhandle for use by stat and cancel. */ ASSERT(dr_mh == NULL); dr_mh = mh; /* * Wait for completion or interrupt. */ while (!ms.done) { if (cv_wait_sig(&ms.cond, &ms.lock) == 0) { /* * There is a pending signal. */ (void) kphysm_del_cancel(mh); DR_DBG_MEM("%s: cancel", __func__); /* * Wait for completion. */ while (!ms.done) cv_wait(&ms.cond, &ms.lock); } } dr_mh = NULL; rv = ms.error; } else { DR_DBG_MEM("%s: del_start() = %d", __func__, rv); } mutex_exit(&ms.lock); cv_destroy(&ms.cond); mutex_destroy(&ms.lock); done: if (rv && del_range) { /* * Add back the spans to the kcage growth list. */ for (ml = d_ml; ml; ml = ml->ml_next) if (err = kcage_range_add(btop(ml->ml_address), btop(ml->ml_size), KCAGE_DOWN)) cmn_err(CE_WARN, "kcage_range_add() = %d", err); } memlist_free_list(d_ml); if ((err = kphysm_del_release(mh)) != KPHYSM_OK) cmn_err(CE_WARN, "%s: del_release() = %d", __func__, err); if (convert) rv = cvt_err(rv); DR_DBG_MEM("%s: rv=%d", __func__, rv); return (rv); }