1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * sun4v Memory DR Module 29 */ 30 31 32 #include <sys/types.h> 33 #include <sys/cmn_err.h> 34 #include <sys/vmem.h> 35 #include <sys/kmem.h> 36 #include <sys/systm.h> 37 #include <sys/machsystm.h> /* for page_freelist_coalesce() */ 38 #include <sys/errno.h> 39 #include <sys/memnode.h> 40 #include <sys/memlist.h> 41 #include <sys/memlist_impl.h> 42 #include <sys/tuneable.h> 43 #include <sys/proc.h> 44 #include <sys/disp.h> 45 #include <sys/debug.h> 46 #include <sys/vm.h> 47 #include <sys/callb.h> 48 #include <sys/memlist_plat.h> /* for installed_top_size() */ 49 #include <sys/condvar_impl.h> /* for CV_HAS_WAITERS() */ 50 #include <sys/dumphdr.h> /* for dump_resize() */ 51 #include <sys/atomic.h> /* for use in stats collection */ 52 #include <sys/rwlock.h> 53 #include <vm/seg_kmem.h> 54 #include <vm/seg_kpm.h> 55 #include <vm/page.h> 56 #include <vm/vm_dep.h> 57 #define SUNDDI_IMPL /* so sunddi.h will not redefine splx() et al */ 58 #include <sys/sunddi.h> 59 #include <sys/mem_config.h> 60 #include <sys/mem_cage.h> 61 #include <sys/lgrp.h> 62 #include <sys/ddi.h> 63 64 #include <sys/modctl.h> 65 #include <sys/sysevent/dr.h> 66 #include <sys/mach_descrip.h> 67 #include <sys/mdesc.h> 68 #include <sys/ds.h> 69 #include <sys/drctl.h> 70 #include <sys/dr_util.h> 71 #include <sys/dr_mem.h> 72 73 74 /* 75 * DR operations are subject to Memory Alignment restrictions 76 * for both address and the size of the request. 77 */ 78 #define MA_ADDR 0x10000000 /* addr alignment 256M */ 79 #define MA_SIZE 0x10000000 /* size alignment 256M */ 80 81 #define MBLK_IS_VALID(m) \ 82 (IS_P2ALIGNED((m)->addr, MA_ADDR) && IS_P2ALIGNED((m)->size, MA_SIZE)) 83 84 static memhandle_t dr_mh; /* memory handle for delete */ 85 86 static struct modlmisc modlmisc = { 87 &mod_miscops, 88 "sun4v memory DR" 89 }; 90 91 static struct modlinkage modlinkage = { 92 MODREV_1, 93 (void *)&modlmisc, 94 NULL 95 }; 96 97 static int dr_mem_allow_unload = 0; 98 99 typedef int (*fn_t)(dr_mem_blk_t *, int *); 100 101 /* 102 * Global Domain Services (DS) Handle 103 */ 104 static ds_svc_hdl_t ds_handle; 105 106 /* 107 * Supported DS Capability Versions 108 */ 109 static ds_ver_t dr_mem_vers[] = { { 1, 0 } }; 110 #define DR_MEM_NVERS (sizeof (dr_mem_vers) / sizeof (dr_mem_vers[0])) 111 112 /* 113 * DS Capability Description 114 */ 115 static ds_capability_t dr_mem_cap = { 116 DR_MEM_DS_ID, /* svc_id */ 117 dr_mem_vers, /* vers */ 118 DR_MEM_NVERS /* nvers */ 119 }; 120 121 /* 122 * DS Callbacks 123 */ 124 static void dr_mem_reg_handler(ds_cb_arg_t, ds_ver_t *, ds_svc_hdl_t); 125 static void dr_mem_unreg_handler(ds_cb_arg_t arg); 126 static void dr_mem_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen); 127 128 /* 129 * DS Client Ops Vector 130 */ 131 static ds_clnt_ops_t dr_mem_ops = { 132 dr_mem_reg_handler, /* ds_reg_cb */ 133 dr_mem_unreg_handler, /* ds_unreg_cb */ 134 dr_mem_data_handler, /* ds_data_cb */ 135 NULL /* cb_arg */ 136 }; 137 138 /* 139 * Operation Results 140 * 141 * Used internally to gather results while an operation on a 142 * list of mblks is in progress. In particular, it is used to 143 * keep track of which mblks have already failed so that they are 144 * not processed further, and the manner in which they failed. 145 */ 146 typedef struct { 147 uint64_t addr; 148 uint64_t size; 149 uint32_t result; 150 uint32_t status; 151 char *string; 152 } dr_mem_res_t; 153 154 static char * 155 dr_mem_estr[] = { 156 "operation succeeded", /* DR_MEM_RES_OK */ 157 "operation failed", /* DR_MEM_RES_FAILURE */ 158 "operation was blocked", /* DR_MEM_RES_BLOCKED */ 159 "memory not defined in MD", /* DR_MEM_RES_NOT_IN_MD */ 160 "memory already in use", /* DR_MEM_RES_ESPAN */ 161 "memory access test failed", /* DR_MEM_RES_EFAULT */ 162 "resource not available", /* DR_MEM_RES_ERESOURCE */ 163 "permanent pages in span", /* DR_MEM_RES_PERM */ 164 "memory span busy", /* DR_MEM_RES_EBUSY */ 165 "VM viability test failed", /* DR_MEM_RES_ENOTVIABLE */ 166 "no pages to unconfigure", /* DR_MEM_RES_ENOWORK */ 167 "operation cancelled", /* DR_MEM_RES_ECANCELLED */ 168 "operation refused", /* DR_MEM_RES_EREFUSED */ 169 "memory span duplicate", /* DR_MEM_RES_EDUP */ 170 "invalid argument" /* DR_MEM_RES_EINVAL */ 171 }; 172 173 typedef struct { 174 kcondvar_t cond; 175 kmutex_t lock; 176 int error; 177 int done; 178 } mem_sync_t; 179 180 /* 181 * Internal Functions 182 */ 183 static int dr_mem_init(void); 184 static int dr_mem_fini(void); 185 186 static int dr_mem_list_wrk(dr_mem_hdr_t *, dr_mem_hdr_t **, int *); 187 static int dr_mem_list_query(dr_mem_hdr_t *, dr_mem_hdr_t **, int *); 188 static int dr_mem_del_stat(dr_mem_hdr_t *, dr_mem_hdr_t **, int *); 189 static int dr_mem_del_cancel(dr_mem_hdr_t *, dr_mem_hdr_t **, int *); 190 191 static int dr_mem_unconfigure(dr_mem_blk_t *, int *); 192 static int dr_mem_configure(dr_mem_blk_t *, int *); 193 static void dr_mem_query(dr_mem_blk_t *, dr_mem_query_t *); 194 195 static dr_mem_res_t *dr_mem_res_array_init(dr_mem_hdr_t *, drctl_rsrc_t *, int); 196 static void dr_mem_res_array_fini(dr_mem_res_t *res, int nres); 197 static size_t dr_mem_pack_response(dr_mem_hdr_t *req, dr_mem_res_t *res, 198 dr_mem_hdr_t **respp); 199 200 static int dr_mem_find(dr_mem_blk_t *mbp); 201 static mde_cookie_t dr_mem_find_node_md(dr_mem_blk_t *, md_t *, mde_cookie_t *); 202 203 static int mem_add(pfn_t, pgcnt_t); 204 static int mem_del(pfn_t, pgcnt_t); 205 206 extern int kphysm_add_memory_dynamic(pfn_t, pgcnt_t); 207 208 int 209 _init(void) 210 { 211 int status; 212 213 /* check that Memory DR is enabled */ 214 if (dr_is_disabled(DR_TYPE_MEM)) 215 return (ENOTSUP); 216 217 if ((status = dr_mem_init()) != 0) { 218 cmn_err(CE_NOTE, "Memory DR initialization failed"); 219 return (status); 220 } 221 222 if ((status = mod_install(&modlinkage)) != 0) { 223 (void) dr_mem_fini(); 224 } 225 226 return (status); 227 } 228 229 int 230 _info(struct modinfo *modinfop) 231 { 232 return (mod_info(&modlinkage, modinfop)); 233 } 234 235 int 236 _fini(void) 237 { 238 int status; 239 240 if (dr_mem_allow_unload == 0) 241 return (EBUSY); 242 243 if ((status = mod_remove(&modlinkage)) == 0) { 244 (void) dr_mem_fini(); 245 } 246 247 return (status); 248 } 249 250 static int 251 dr_mem_init(void) 252 { 253 int rv; 254 255 if ((rv = ds_cap_init(&dr_mem_cap, &dr_mem_ops)) != 0) { 256 cmn_err(CE_NOTE, "dr_mem: ds_cap_init failed: %d", rv); 257 return (rv); 258 } 259 260 return (0); 261 } 262 263 static int 264 dr_mem_fini(void) 265 { 266 int rv; 267 268 if ((rv = ds_cap_fini(&dr_mem_cap)) != 0) { 269 cmn_err(CE_NOTE, "dr_mem: ds_cap_fini failed: %d", rv); 270 } 271 272 return (rv); 273 } 274 275 static void 276 dr_mem_reg_handler(ds_cb_arg_t arg, ds_ver_t *ver, ds_svc_hdl_t hdl) 277 { 278 DR_DBG_MEM("reg_handler: arg=0x%p, ver=%d.%d, hdl=0x%lx\n", arg, 279 ver->major, ver->minor, hdl); 280 281 ds_handle = hdl; 282 } 283 284 static void 285 dr_mem_unreg_handler(ds_cb_arg_t arg) 286 { 287 DR_DBG_MEM("unreg_handler: arg=0x%p\n", arg); 288 289 ds_handle = DS_INVALID_HDL; 290 } 291 292 /*ARGSUSED*/ 293 static void 294 dr_mem_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen) 295 { 296 dr_mem_hdr_t *req = buf; 297 dr_mem_hdr_t err_resp; 298 dr_mem_hdr_t *resp = &err_resp; 299 int resp_len = 0; 300 int rv = EINVAL; 301 302 /* 303 * Sanity check the message 304 */ 305 if (buflen < sizeof (dr_mem_hdr_t)) { 306 DR_DBG_MEM("incoming message short: expected at least %ld " 307 "bytes, received %ld\n", sizeof (dr_mem_hdr_t), buflen); 308 goto done; 309 } 310 311 if (req == NULL) { 312 DR_DBG_MEM("empty message: expected at least %ld bytes\n", 313 sizeof (dr_mem_hdr_t)); 314 goto done; 315 } 316 317 DR_DBG_MEM("incoming request:\n"); 318 DR_DBG_DUMP_MSG(buf, buflen); 319 320 /* 321 * Process the command 322 */ 323 switch (req->msg_type) { 324 case DR_MEM_CONFIGURE: 325 case DR_MEM_UNCONFIGURE: 326 if (req->msg_arg == 0) { 327 DR_DBG_MEM("No mblks specified for operation\n"); 328 goto done; 329 } 330 if ((rv = dr_mem_list_wrk(req, &resp, &resp_len)) != 0) { 331 DR_DBG_MEM("%s failed (%d)\n", 332 (req->msg_type == DR_MEM_CONFIGURE) ? 333 "Memory configure" : "Memory unconfigure", rv); 334 } 335 break; 336 337 case DR_MEM_UNCONF_STATUS: 338 if ((rv = dr_mem_del_stat(req, &resp, &resp_len)) != 0) 339 DR_DBG_MEM("Memory delete status failed (%d)\n", rv); 340 break; 341 342 case DR_MEM_UNCONF_CANCEL: 343 if ((rv = dr_mem_del_cancel(req, &resp, &resp_len)) != 0) 344 DR_DBG_MEM("Memory delete cancel failed (%d)\n", rv); 345 break; 346 347 case DR_MEM_QUERY: 348 if (req->msg_arg == 0) { 349 DR_DBG_MEM("No mblks specified for operation\n"); 350 goto done; 351 } 352 if ((rv = dr_mem_list_query(req, &resp, &resp_len)) != 0) 353 DR_DBG_MEM("Memory query failed (%d)\n", rv); 354 break; 355 356 default: 357 cmn_err(CE_NOTE, "unsupported memory DR operation (%d)", 358 req->msg_type); 359 break; 360 } 361 362 done: 363 /* check if an error occurred */ 364 if (resp == &err_resp) { 365 resp->req_num = (req) ? req->req_num : 0; 366 resp->msg_type = DR_MEM_ERROR; 367 resp->msg_arg = rv; 368 resp_len = sizeof (dr_mem_hdr_t); 369 } 370 371 DR_DBG_MEM("outgoing response:\n"); 372 DR_DBG_DUMP_MSG(resp, resp_len); 373 374 /* send back the response */ 375 if (ds_cap_send(ds_handle, resp, resp_len) != 0) { 376 DR_DBG_MEM("ds_send failed\n"); 377 } 378 379 /* free any allocated memory */ 380 if (resp != &err_resp) { 381 kmem_free(resp, resp_len); 382 } 383 } 384 385 /* 386 * Common routine to config or unconfig multiple mblks. 387 * 388 * Note: Do not modify result buffer or length on error. 389 */ 390 static int 391 dr_mem_list_wrk(dr_mem_hdr_t *req, dr_mem_hdr_t **resp, int *resp_len) 392 { 393 int rv; 394 int idx; 395 int count; 396 int result; 397 int status; 398 fn_t dr_fn; 399 int se_hint; 400 dr_mem_blk_t *req_mblks; 401 dr_mem_res_t *res; 402 int drctl_cmd; 403 int drctl_flags = 0; 404 drctl_rsrc_t *drctl_req; 405 size_t drctl_req_len; 406 drctl_resp_t *drctl_resp; 407 drctl_rsrc_t *drctl_rsrc; 408 size_t drctl_resp_len = 0; 409 drctl_cookie_t drctl_res_ck; 410 411 ASSERT((req != NULL) && (req->msg_arg != 0)); 412 413 count = req->msg_arg; 414 415 /* 416 * Extract all information that is specific 417 * to the various types of operations. 418 */ 419 switch (req->msg_type) { 420 case DR_MEM_CONFIGURE: 421 dr_fn = dr_mem_configure; 422 drctl_cmd = DRCTL_MEM_CONFIG_REQUEST; 423 se_hint = SE_HINT_INSERT; 424 break; 425 case DR_MEM_UNCONFIGURE: 426 dr_fn = dr_mem_unconfigure; 427 drctl_cmd = DRCTL_MEM_UNCONFIG_REQUEST; 428 se_hint = SE_HINT_REMOVE; 429 break; 430 default: 431 /* Programming error if we reach this. */ 432 cmn_err(CE_NOTE, "%s: bad msg_type %d\n", 433 __func__, req->msg_type); 434 ASSERT(0); 435 return (-1); 436 } 437 438 /* the incoming array of mblks to operate on */ 439 req_mblks = DR_MEM_CMD_MBLKS(req); 440 441 /* allocate drctl request msg based on incoming resource count */ 442 drctl_req_len = sizeof (drctl_rsrc_t) * count; 443 drctl_req = kmem_zalloc(drctl_req_len, KM_SLEEP); 444 445 /* copy the size for the drctl call from the incoming request msg */ 446 for (idx = 0; idx < count; idx++) { 447 drctl_req[idx].res_mem_addr = req_mblks[idx].addr; 448 drctl_req[idx].res_mem_size = req_mblks[idx].size; 449 } 450 451 rv = drctl_config_init(drctl_cmd, drctl_flags, drctl_req, 452 count, &drctl_resp, &drctl_resp_len, &drctl_res_ck); 453 454 ASSERT((drctl_resp != NULL) && (drctl_resp_len != 0)); 455 456 if (rv != 0) { 457 DR_DBG_MEM("%s: drctl_config_init returned: %d\n", 458 __func__, rv); 459 kmem_free(drctl_resp, drctl_resp_len); 460 kmem_free(drctl_req, drctl_req_len); 461 return (rv); 462 } 463 464 ASSERT(drctl_resp->resp_type == DRCTL_RESP_OK); 465 466 drctl_rsrc = drctl_resp->resp_resources; 467 468 /* create the result scratch array */ 469 res = dr_mem_res_array_init(req, drctl_rsrc, count); 470 471 /* perform the specified operation on each of the mblks */ 472 for (idx = 0; idx < count; idx++) { 473 /* 474 * If no action will be taken against the current 475 * mblk, update the drctl resource information to 476 * ensure that it gets recovered properly during 477 * the drctl fini() call. 478 */ 479 if (res[idx].result != DR_MEM_RES_OK) { 480 drctl_req[idx].status = DRCTL_STATUS_CONFIG_FAILURE; 481 continue; 482 } 483 484 /* call the function to perform the actual operation */ 485 result = (*dr_fn)(&req_mblks[idx], &status); 486 487 /* save off results of the operation */ 488 res[idx].result = result; 489 res[idx].status = status; 490 res[idx].addr = req_mblks[idx].addr; /* for partial case */ 491 res[idx].size = req_mblks[idx].size; /* for partial case */ 492 res[idx].string = i_ddi_strdup(dr_mem_estr[result], KM_SLEEP); 493 494 /* save result for drctl fini() reusing init() msg memory */ 495 drctl_req[idx].status = (result != DR_MEM_RES_OK) ? 496 DRCTL_STATUS_CONFIG_FAILURE : DRCTL_STATUS_CONFIG_SUCCESS; 497 498 DR_DBG_MEM("%s: mblk 0x%lx.0x%lx stat %d result %d off '%s'\n", 499 __func__, req_mblks[idx].addr, req_mblks[idx].size, 500 drctl_req[idx].status, result, 501 (res[idx].string) ? res[idx].string : ""); 502 } 503 504 if ((rv = drctl_config_fini(&drctl_res_ck, drctl_req, count)) != 0) 505 DR_DBG_MEM("%s: drctl_config_fini returned: %d\n", 506 __func__, rv); 507 508 /* 509 * Operation completed without any fatal errors. 510 * Pack the response for transmission. 511 */ 512 *resp_len = dr_mem_pack_response(req, res, resp); 513 514 /* notify interested parties about the operation */ 515 dr_generate_event(DR_TYPE_MEM, se_hint); 516 517 /* 518 * Deallocate any scratch memory. 519 */ 520 kmem_free(drctl_resp, drctl_resp_len); 521 kmem_free(drctl_req, drctl_req_len); 522 523 dr_mem_res_array_fini(res, count); 524 525 return (0); 526 } 527 528 /* 529 * Allocate and initialize a result array based on the initial 530 * drctl operation. A valid result array is always returned. 531 */ 532 static dr_mem_res_t * 533 dr_mem_res_array_init(dr_mem_hdr_t *req, drctl_rsrc_t *rsrc, int nrsrc) 534 { 535 int idx; 536 dr_mem_res_t *res; 537 char *err_str; 538 size_t err_len; 539 540 /* allocate zero filled buffer to initialize fields */ 541 res = kmem_zalloc(nrsrc * sizeof (dr_mem_res_t), KM_SLEEP); 542 543 /* 544 * Fill in the result information for each resource. 545 */ 546 for (idx = 0; idx < nrsrc; idx++) { 547 res[idx].addr = rsrc[idx].res_mem_addr; 548 res[idx].size = rsrc[idx].res_mem_size; 549 res[idx].result = DR_MEM_RES_OK; 550 551 if (rsrc[idx].status == DRCTL_STATUS_ALLOW) 552 continue; 553 554 /* 555 * Update the state information for this mblk. 556 */ 557 res[idx].result = DR_MEM_RES_BLOCKED; 558 res[idx].status = (req->msg_type == DR_MEM_CONFIGURE) ? 559 DR_MEM_STAT_UNCONFIGURED : DR_MEM_STAT_CONFIGURED; 560 561 /* 562 * If an error string exists, copy it out of the 563 * message buffer. This eliminates any dependency 564 * on the memory allocated for the message buffer 565 * itself. 566 */ 567 if (rsrc[idx].offset != NULL) { 568 err_str = (char *)rsrc + rsrc[idx].offset; 569 err_len = strlen(err_str) + 1; 570 571 res[idx].string = kmem_alloc(err_len, KM_SLEEP); 572 bcopy(err_str, res[idx].string, err_len); 573 } 574 } 575 576 return (res); 577 } 578 579 static void 580 dr_mem_res_array_fini(dr_mem_res_t *res, int nres) 581 { 582 int idx; 583 size_t str_len; 584 585 for (idx = 0; idx < nres; idx++) { 586 /* deallocate the error string if present */ 587 if (res[idx].string) { 588 str_len = strlen(res[idx].string) + 1; 589 kmem_free(res[idx].string, str_len); 590 } 591 } 592 593 /* deallocate the result array itself */ 594 kmem_free(res, sizeof (dr_mem_res_t) * nres); 595 } 596 597 /* 598 * Allocate and pack a response message for transmission based 599 * on the specified result array. A valid response message and 600 * valid size information is always returned. 601 */ 602 static size_t 603 dr_mem_pack_response(dr_mem_hdr_t *req, dr_mem_res_t *res, dr_mem_hdr_t **respp) 604 { 605 int idx; 606 dr_mem_hdr_t *resp; 607 dr_mem_stat_t *resp_stat; 608 size_t resp_len; 609 uint32_t curr_off; 610 caddr_t curr_str; 611 size_t str_len; 612 size_t stat_len; 613 int nstat = req->msg_arg; 614 615 /* 616 * Calculate the size of the response message 617 * and allocate an appropriately sized buffer. 618 */ 619 resp_len = sizeof (dr_mem_hdr_t); 620 621 /* add the stat array size */ 622 stat_len = sizeof (dr_mem_stat_t) * nstat; 623 resp_len += stat_len; 624 625 /* add the size of any error strings */ 626 for (idx = 0; idx < nstat; idx++) { 627 if (res[idx].string != NULL) { 628 resp_len += strlen(res[idx].string) + 1; 629 } 630 } 631 632 /* allocate the message buffer */ 633 resp = kmem_zalloc(resp_len, KM_SLEEP); 634 635 /* 636 * Fill in the header information. 637 */ 638 resp->req_num = req->req_num; 639 resp->msg_type = DR_MEM_OK; 640 resp->msg_arg = nstat; 641 642 /* 643 * Fill in the stat information. 644 */ 645 resp_stat = DR_MEM_RESP_STATS(resp); 646 647 /* string offsets start immediately after stat array */ 648 curr_off = sizeof (dr_mem_hdr_t) + stat_len; 649 curr_str = (char *)resp_stat + stat_len; 650 651 for (idx = 0; idx < nstat; idx++) { 652 resp_stat[idx].addr = res[idx].addr; 653 resp_stat[idx].size = res[idx].size; 654 resp_stat[idx].result = res[idx].result; 655 resp_stat[idx].status = res[idx].status; 656 657 if (res[idx].string != NULL) { 658 /* copy over the error string */ 659 str_len = strlen(res[idx].string) + 1; 660 bcopy(res[idx].string, curr_str, str_len); 661 resp_stat[idx].string_off = curr_off; 662 663 curr_off += str_len; 664 curr_str += str_len; 665 } 666 } 667 668 /* buffer should be exactly filled */ 669 ASSERT(curr_off == resp_len); 670 671 *respp = resp; 672 return (resp_len); 673 } 674 675 static void 676 dr_mem_query(dr_mem_blk_t *mbp, dr_mem_query_t *mqp) 677 { 678 memquery_t mq; 679 680 DR_DBG_MEM("dr_mem_query...\n"); 681 682 683 (void) kphysm_del_span_query(btop(mbp->addr), btop(mbp->size), &mq); 684 685 if (!mq.phys_pages) 686 return; 687 688 mqp->addr = mbp->addr; 689 mqp->mq.phys_pages = ptob(mq.phys_pages); 690 mqp->mq.managed = ptob(mq.managed); 691 mqp->mq.nonrelocatable = ptob(mq.nonrelocatable); 692 mqp->mq.first_nonrelocatable = ptob(mq.first_nonrelocatable); 693 mqp->mq.last_nonrelocatable = ptob(mq.last_nonrelocatable); 694 /* 695 * Set to the max byte offset within the page. 696 */ 697 if (mqp->mq.nonrelocatable) 698 mqp->mq.last_nonrelocatable += PAGESIZE - 1; 699 } 700 701 /* 702 * Do not modify result buffer or length on error. 703 */ 704 static int 705 dr_mem_list_query(dr_mem_hdr_t *req, dr_mem_hdr_t **resp, int *resp_len) 706 { 707 int idx; 708 int rlen; 709 int nml; 710 struct memlist *ml; 711 struct memlist *phys_copy = NULL; 712 dr_mem_blk_t *req_mblks, mb; 713 dr_mem_hdr_t *rp; 714 dr_mem_query_t *stat; 715 716 drctl_block(); 717 718 /* the incoming array of req_mblks to configure */ 719 req_mblks = DR_MEM_CMD_MBLKS(req); 720 721 /* allocate a response message, should be freed by caller */ 722 nml = 0; 723 rlen = sizeof (dr_mem_hdr_t); 724 if (req_mblks->addr == NULL && req_mblks->size == 0) { 725 /* 726 * Request is for domain's full view of it's memory. 727 * place a copy in phys_copy then release the memlist lock. 728 */ 729 memlist_read_lock(); 730 phys_copy = dr_memlist_dup(phys_install); 731 memlist_read_unlock(); 732 733 for (ml = phys_copy; ml; ml = ml->ml_next) 734 nml++; 735 736 rlen += nml * sizeof (dr_mem_query_t); 737 } else { 738 rlen += req->msg_arg * sizeof (dr_mem_query_t); 739 } 740 rp = kmem_zalloc(rlen, KM_SLEEP); 741 742 /* fill in the known data */ 743 rp->req_num = req->req_num; 744 rp->msg_type = DR_MEM_OK; 745 rp->msg_arg = nml ? nml : req->msg_arg; 746 747 /* stat array for the response */ 748 stat = DR_MEM_RESP_QUERY(rp); 749 750 /* get the status for each of the mblocks */ 751 if (nml) { 752 for (idx = 0, ml = phys_copy; ml; ml = ml->ml_next, idx++) { 753 mb.addr = ml->ml_address; 754 mb.size = ml->ml_size; 755 dr_mem_query(&mb, &stat[idx]); 756 } 757 } else { 758 for (idx = 0; idx < req->msg_arg; idx++) 759 dr_mem_query(&req_mblks[idx], &stat[idx]); 760 } 761 762 *resp = rp; 763 *resp_len = rlen; 764 if (phys_copy != NULL) { 765 dr_memlist_delete(phys_copy); 766 } 767 drctl_unblock(); 768 769 return (0); 770 } 771 772 static int 773 cvt_err(int err) 774 { 775 int rv; 776 777 switch (err) { 778 case KPHYSM_OK: 779 rv = DR_MEM_RES_OK; 780 break; 781 case KPHYSM_ESPAN: 782 rv = DR_MEM_RES_ESPAN; 783 break; 784 case KPHYSM_EFAULT: 785 rv = DR_MEM_RES_EFAULT; 786 break; 787 case KPHYSM_ERESOURCE: 788 rv = DR_MEM_RES_ERESOURCE; 789 break; 790 case KPHYSM_ENOTSUP: 791 case KPHYSM_ENOHANDLES: 792 rv = DR_MEM_RES_FAILURE; 793 break; 794 case KPHYSM_ENONRELOC: 795 rv = DR_MEM_RES_PERM; 796 break; 797 case KPHYSM_EHANDLE: 798 rv = DR_MEM_RES_FAILURE; 799 break; 800 case KPHYSM_EBUSY: 801 rv = DR_MEM_RES_EBUSY; 802 break; 803 case KPHYSM_ENOTVIABLE: 804 rv = DR_MEM_RES_ENOTVIABLE; 805 break; 806 case KPHYSM_ESEQUENCE: 807 rv = DR_MEM_RES_FAILURE; 808 break; 809 case KPHYSM_ENOWORK: 810 rv = DR_MEM_RES_ENOWORK; 811 break; 812 case KPHYSM_ECANCELLED: 813 rv = DR_MEM_RES_ECANCELLED; 814 break; 815 case KPHYSM_EREFUSED: 816 rv = DR_MEM_RES_EREFUSED; 817 break; 818 case KPHYSM_ENOTFINISHED: 819 case KPHYSM_ENOTRUNNING: 820 rv = DR_MEM_RES_FAILURE; 821 break; 822 case KPHYSM_EDUP: 823 rv = DR_MEM_RES_EDUP; 824 break; 825 default: 826 rv = DR_MEM_RES_FAILURE; 827 break; 828 } 829 830 return (rv); 831 } 832 833 static int 834 dr_mem_configure(dr_mem_blk_t *mbp, int *status) 835 { 836 int rv; 837 uint64_t addr, size; 838 839 rv = 0; 840 addr = mbp->addr; 841 size = mbp->size; 842 843 DR_DBG_MEM("dr_mem_configure...\n"); 844 845 if (!MBLK_IS_VALID(mbp)) { 846 DR_DBG_MEM("invalid mblk 0x%lx.0x%lx\n", addr, size); 847 *status = DR_MEM_STAT_UNCONFIGURED; 848 rv = DR_MEM_RES_EINVAL; 849 } else if (rv = dr_mem_find(mbp)) { 850 DR_DBG_MEM("failed to find mblk 0x%lx.0x%lx (%d)\n", 851 addr, size, rv); 852 if (rv == EINVAL) { 853 *status = DR_MEM_STAT_NOT_PRESENT; 854 rv = DR_MEM_RES_NOT_IN_MD; 855 } else { 856 *status = DR_MEM_STAT_UNCONFIGURED; 857 rv = DR_MEM_RES_FAILURE; 858 } 859 } else { 860 rv = mem_add(btop(addr), btop(size)); 861 DR_DBG_MEM("addr=0x%lx size=0x%lx rv=%d\n", addr, size, rv); 862 if (rv) { 863 *status = DR_MEM_STAT_UNCONFIGURED; 864 } else { 865 *status = DR_MEM_STAT_CONFIGURED; 866 } 867 } 868 869 return (rv); 870 } 871 872 static int 873 dr_mem_unconfigure(dr_mem_blk_t *mbp, int *status) 874 { 875 int rv; 876 877 DR_DBG_MEM("dr_mem_unconfigure...\n"); 878 879 if (!MBLK_IS_VALID(mbp)) { 880 DR_DBG_MEM("invalid mblk 0x%lx.0x%lx\n", 881 mbp->addr, mbp->size); 882 *status = DR_MEM_STAT_CONFIGURED; 883 rv = DR_MEM_RES_EINVAL; 884 } else if (rv = mem_del(btop(mbp->addr), btop(mbp->size))) { 885 *status = DR_MEM_STAT_CONFIGURED; 886 } else { 887 *status = DR_MEM_STAT_UNCONFIGURED; 888 rv = DR_MEM_RES_OK; 889 DR_DBG_MEM("mblk 0x%lx.0x%lx unconfigured\n", 890 mbp->addr, mbp->size); 891 } 892 return (rv); 893 } 894 895 static int 896 dr_mem_del_stat(dr_mem_hdr_t *req, dr_mem_hdr_t **resp, int *resp_len) 897 { 898 int status; 899 int rlen; 900 memdelstat_t del_stat, *stat; 901 dr_mem_hdr_t *rp; 902 903 /* 904 * If a mem delete is in progress, get its status. 905 */ 906 status = (dr_mh && (kphysm_del_status(dr_mh, &del_stat) == KPHYSM_OK)); 907 908 /* allocate a response message, should be freed by caller */ 909 rlen = sizeof (dr_mem_hdr_t); 910 rlen += status * sizeof (memdelstat_t); 911 rp = kmem_zalloc(rlen, KM_SLEEP); 912 913 /* fill in the known data */ 914 rp->req_num = req->req_num; 915 rp->msg_type = DR_MEM_OK; 916 rp->msg_arg = status; 917 918 if (status) { 919 /* stat struct for the response */ 920 stat = DR_MEM_RESP_DEL_STAT(rp); 921 stat->phys_pages = ptob(del_stat.phys_pages); 922 stat->managed = ptob(del_stat.managed); 923 stat->collected = ptob(del_stat.collected); 924 } 925 926 *resp = rp; 927 *resp_len = rlen; 928 929 return (0); 930 } 931 932 static int 933 dr_mem_del_cancel(dr_mem_hdr_t *req, dr_mem_hdr_t **resp, int *resp_len) 934 { 935 int rlen; 936 dr_mem_hdr_t *rp; 937 938 /* allocate a response message, should be freed by caller */ 939 rlen = sizeof (dr_mem_hdr_t); 940 rp = kmem_zalloc(rlen, KM_SLEEP); 941 942 /* fill in the known data */ 943 rp->req_num = req->req_num; 944 rp->msg_type = DR_MEM_OK; 945 rp->msg_arg = (dr_mh && kphysm_del_cancel(dr_mh) != KPHYSM_OK) ? 946 DR_MEM_RES_EINVAL : DR_MEM_RES_OK; 947 948 *resp = rp; 949 *resp_len = rlen; 950 951 return (0); 952 } 953 954 static int 955 dr_mem_find(dr_mem_blk_t *mbp) 956 { 957 md_t *mdp = NULL; 958 int num_nodes; 959 int rv = 0; 960 int listsz; 961 mde_cookie_t *listp = NULL; 962 mde_cookie_t memnode; 963 char *found = "found"; 964 965 if ((mdp = md_get_handle()) == NULL) { 966 DR_DBG_MEM("unable to initialize machine description\n"); 967 return (-1); 968 } 969 970 num_nodes = md_node_count(mdp); 971 ASSERT(num_nodes > 0); 972 973 listsz = num_nodes * sizeof (mde_cookie_t); 974 listp = kmem_zalloc(listsz, KM_SLEEP); 975 976 memnode = dr_mem_find_node_md(mbp, mdp, listp); 977 978 if (memnode == MDE_INVAL_ELEM_COOKIE) { 979 rv = EINVAL; 980 found = "not found"; 981 } 982 983 DR_DBG_MEM("mblk 0x%lx.0x%lx %s\n", mbp->addr, mbp->size, found); 984 985 kmem_free(listp, listsz); 986 (void) md_fini_handle(mdp); 987 988 return (rv); 989 } 990 991 /* 992 * Look up a particular mblk in the MD. Returns the mde_cookie_t 993 * representing that mblk if present, and MDE_INVAL_ELEM_COOKIE 994 * otherwise. It is assumed the scratch array has already been 995 * allocated so that it can accommodate the worst case scenario, 996 * every node in the MD. 997 */ 998 static mde_cookie_t 999 dr_mem_find_node_md(dr_mem_blk_t *mbp, md_t *mdp, mde_cookie_t *listp) 1000 { 1001 int idx; 1002 int nnodes; 1003 mde_cookie_t rootnode; 1004 uint64_t base_prop; 1005 uint64_t size_prop; 1006 mde_cookie_t result = MDE_INVAL_ELEM_COOKIE; 1007 1008 rootnode = md_root_node(mdp); 1009 ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE); 1010 1011 /* 1012 * Scan the DAG for all the mem nodes 1013 */ 1014 nnodes = md_scan_dag(mdp, rootnode, md_find_name(mdp, "mblock"), 1015 md_find_name(mdp, "fwd"), listp); 1016 1017 if (nnodes < 0) { 1018 DR_DBG_MEM("Scan for mblks failed\n"); 1019 return (result); 1020 } 1021 1022 DR_DBG_MEM("dr_mem_find_node_md: found %d mblks in the MD\n", nnodes); 1023 1024 /* 1025 * Find the mblk of interest 1026 */ 1027 for (idx = 0; idx < nnodes; idx++) { 1028 1029 if (md_get_prop_val(mdp, listp[idx], "base", &base_prop)) { 1030 DR_DBG_MEM("Missing 'base' property for mblk node %d\n", 1031 idx); 1032 break; 1033 } 1034 1035 if (md_get_prop_val(mdp, listp[idx], "size", &size_prop)) { 1036 DR_DBG_MEM("Missing 'size' property for mblk node %d\n", 1037 idx); 1038 break; 1039 } 1040 1041 if (base_prop <= mbp->addr && 1042 (base_prop + size_prop) >= (mbp->addr + mbp->size)) { 1043 /* found a match */ 1044 DR_DBG_MEM("dr_mem_find_node_md: found mblk " 1045 "0x%lx.0x%lx in MD\n", mbp->addr, mbp->size); 1046 result = listp[idx]; 1047 break; 1048 } 1049 } 1050 1051 if (result == MDE_INVAL_ELEM_COOKIE) { 1052 DR_DBG_MEM("mblk 0x%lx.0x%lx not in MD\n", 1053 mbp->addr, mbp->size); 1054 } 1055 1056 return (result); 1057 } 1058 1059 static int 1060 mem_add(pfn_t base, pgcnt_t npgs) 1061 { 1062 int rv, rc; 1063 1064 DR_DBG_MEM("%s: begin base=0x%lx npgs=0x%lx\n", __func__, base, npgs); 1065 1066 if (npgs == 0) 1067 return (DR_MEM_RES_OK); 1068 1069 rv = kphysm_add_memory_dynamic(base, npgs); 1070 DR_DBG_MEM("%s: kphysm_add(0x%lx, 0x%lx) = %d", __func__, base, npgs, 1071 rv); 1072 if (rv == KPHYSM_OK) { 1073 if (rc = kcage_range_add(base, npgs, KCAGE_DOWN)) 1074 cmn_err(CE_WARN, "kcage_range_add() = %d", rc); 1075 } 1076 rv = cvt_err(rv); 1077 return (rv); 1078 } 1079 1080 static void 1081 del_done(void *arg, int error) 1082 { 1083 mem_sync_t *ms = arg; 1084 1085 mutex_enter(&ms->lock); 1086 ms->error = error; 1087 ms->done = 1; 1088 cv_signal(&ms->cond); 1089 mutex_exit(&ms->lock); 1090 } 1091 1092 static int 1093 mem_del(pfn_t base, pgcnt_t npgs) 1094 { 1095 int rv, err, del_range = 0; 1096 int convert = 1; 1097 mem_sync_t ms; 1098 memquery_t mq; 1099 memhandle_t mh; 1100 struct memlist *ml; 1101 struct memlist *d_ml = NULL; 1102 1103 DR_DBG_MEM("%s: begin base=0x%lx npgs=0x%lx\n", __func__, base, npgs); 1104 1105 if (npgs == 0) 1106 return (DR_MEM_RES_OK); 1107 1108 if ((rv = kphysm_del_gethandle(&mh)) != KPHYSM_OK) { 1109 cmn_err(CE_WARN, "%s: del_gethandle() = %d", __func__, rv); 1110 rv = cvt_err(rv); 1111 return (rv); 1112 } 1113 if ((rv = kphysm_del_span_query(base, npgs, &mq)) 1114 != KPHYSM_OK) { 1115 cmn_err(CE_WARN, "%s: del_span_query() = %d", __func__, rv); 1116 goto done; 1117 } 1118 if (mq.nonrelocatable) { 1119 DR_DBG_MEM("%s: non-reloc pages = %ld", 1120 __func__, mq.nonrelocatable); 1121 rv = KPHYSM_ENONRELOC; 1122 goto done; 1123 } 1124 if (rv = kcage_range_delete(base, npgs)) { 1125 switch (rv) { 1126 case EBUSY: 1127 rv = DR_MEM_RES_ENOTVIABLE; 1128 break; 1129 default: 1130 rv = DR_MEM_RES_FAILURE; 1131 break; 1132 } 1133 convert = 0; /* conversion done */ 1134 cmn_err(CE_WARN, "%s: del_range() = %d", __func__, rv); 1135 goto done; 1136 } else { 1137 del_range++; 1138 } 1139 if ((rv = kphysm_del_span(mh, base, npgs)) != KPHYSM_OK) { 1140 cmn_err(CE_WARN, "%s: del_span() = %d", __func__, rv); 1141 goto done; 1142 } 1143 if ((rv = memlist_add_span(ptob(base), ptob(npgs), &d_ml)) 1144 != MEML_SPANOP_OK) { 1145 switch (rv) { 1146 case MEML_SPANOP_ESPAN: 1147 rv = DR_MEM_RES_ESPAN; 1148 break; 1149 case MEML_SPANOP_EALLOC: 1150 rv = DR_MEM_RES_ERESOURCE; 1151 break; 1152 default: 1153 rv = DR_MEM_RES_FAILURE; 1154 break; 1155 } 1156 convert = 0; /* conversion done */ 1157 cmn_err(CE_WARN, "%s: add_span() = %d", __func__, rv); 1158 goto done; 1159 } 1160 1161 DR_DBG_MEM("%s: reserved=0x%lx", __func__, npgs); 1162 1163 bzero((void *) &ms, sizeof (ms)); 1164 1165 mutex_init(&ms.lock, NULL, MUTEX_DRIVER, NULL); 1166 cv_init(&ms.cond, NULL, CV_DRIVER, NULL); 1167 mutex_enter(&ms.lock); 1168 1169 if ((rv = kphysm_del_start(mh, del_done, (void *) &ms)) == KPHYSM_OK) { 1170 /* 1171 * Since we've called drctl_config_init, we are the only 1172 * DR ctl operation in progress. Set dr_mh to the 1173 * delete memhandle for use by stat and cancel. 1174 */ 1175 ASSERT(dr_mh == NULL); 1176 dr_mh = mh; 1177 1178 /* 1179 * Wait for completion or interrupt. 1180 */ 1181 while (!ms.done) { 1182 if (cv_wait_sig(&ms.cond, &ms.lock) == 0) { 1183 /* 1184 * There is a pending signal. 1185 */ 1186 (void) kphysm_del_cancel(mh); 1187 DR_DBG_MEM("%s: cancel", __func__); 1188 /* 1189 * Wait for completion. 1190 */ 1191 while (!ms.done) 1192 cv_wait(&ms.cond, &ms.lock); 1193 } 1194 } 1195 dr_mh = NULL; 1196 rv = ms.error; 1197 } else { 1198 DR_DBG_MEM("%s: del_start() = %d", __func__, rv); 1199 } 1200 1201 mutex_exit(&ms.lock); 1202 cv_destroy(&ms.cond); 1203 mutex_destroy(&ms.lock); 1204 1205 done: 1206 if (rv && del_range) { 1207 /* 1208 * Add back the spans to the kcage growth list. 1209 */ 1210 for (ml = d_ml; ml; ml = ml->ml_next) 1211 if (err = kcage_range_add(btop(ml->ml_address), 1212 btop(ml->ml_size), KCAGE_DOWN)) 1213 cmn_err(CE_WARN, "kcage_range_add() = %d", err); 1214 } 1215 memlist_free_list(d_ml); 1216 1217 if ((err = kphysm_del_release(mh)) != KPHYSM_OK) 1218 cmn_err(CE_WARN, "%s: del_release() = %d", __func__, err); 1219 if (convert) 1220 rv = cvt_err(rv); 1221 1222 DR_DBG_MEM("%s: rv=%d", __func__, rv); 1223 1224 return (rv); 1225 } 1226