1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * sun4v Memory DR Module 29 */ 30 31 32 #include <sys/types.h> 33 #include <sys/cmn_err.h> 34 #include <sys/vmem.h> 35 #include <sys/kmem.h> 36 #include <sys/systm.h> 37 #include <sys/machsystm.h> /* for page_freelist_coalesce() */ 38 #include <sys/errno.h> 39 #include <sys/memnode.h> 40 #include <sys/memlist.h> 41 #include <sys/memlist_impl.h> 42 #include <sys/tuneable.h> 43 #include <sys/proc.h> 44 #include <sys/disp.h> 45 #include <sys/debug.h> 46 #include <sys/vm.h> 47 #include <sys/callb.h> 48 #include <sys/memlist_plat.h> /* for installed_top_size() */ 49 #include <sys/condvar_impl.h> /* for CV_HAS_WAITERS() */ 50 #include <sys/dumphdr.h> /* for dump_resize() */ 51 #include <sys/atomic.h> /* for use in stats collection */ 52 #include <sys/rwlock.h> 53 #include <vm/seg_kmem.h> 54 #include <vm/seg_kpm.h> 55 #include <vm/page.h> 56 #include <vm/vm_dep.h> 57 #define SUNDDI_IMPL /* so sunddi.h will not redefine splx() et al */ 58 #include <sys/sunddi.h> 59 #include <sys/mem_config.h> 60 #include <sys/mem_cage.h> 61 #include <sys/lgrp.h> 62 #include <sys/ddi.h> 63 64 #include <sys/modctl.h> 65 #include <sys/sysevent/dr.h> 66 #include <sys/mach_descrip.h> 67 #include <sys/mdesc.h> 68 #include <sys/ds.h> 69 #include <sys/drctl.h> 70 #include <sys/dr_util.h> 71 #include <sys/dr_mem.h> 72 73 74 /* 75 * DR operations are subject to Memory Alignment restrictions 76 * for both address and the size of the request. 77 */ 78 #define MA_ADDR 0x10000000 /* addr alignment 256M */ 79 #define MA_SIZE 0x10000000 /* size alignment 256M */ 80 81 #define MBLK_IS_VALID(m) \ 82 (IS_P2ALIGNED((m)->addr, MA_ADDR) && IS_P2ALIGNED((m)->size, MA_SIZE)) 83 84 static memhandle_t dr_mh; /* memory handle for delete */ 85 86 static struct modlmisc modlmisc = { 87 &mod_miscops, 88 "sun4v memory DR" 89 }; 90 91 static struct modlinkage modlinkage = { 92 MODREV_1, 93 (void *)&modlmisc, 94 NULL 95 }; 96 97 static int dr_mem_allow_unload = 0; 98 99 typedef int (*fn_t)(dr_mem_blk_t *, int *); 100 101 /* 102 * Global Domain Services (DS) Handle 103 */ 104 static ds_svc_hdl_t ds_handle; 105 106 /* 107 * Supported DS Capability Versions 108 */ 109 static ds_ver_t dr_mem_vers[] = { { 1, 0 } }; 110 #define DR_MEM_NVERS (sizeof (dr_mem_vers) / sizeof (dr_mem_vers[0])) 111 112 /* 113 * DS Capability Description 114 */ 115 static ds_capability_t dr_mem_cap = { 116 DR_MEM_DS_ID, /* svc_id */ 117 dr_mem_vers, /* vers */ 118 DR_MEM_NVERS /* nvers */ 119 }; 120 121 /* 122 * DS Callbacks 123 */ 124 static void dr_mem_reg_handler(ds_cb_arg_t, ds_ver_t *, ds_svc_hdl_t); 125 static void dr_mem_unreg_handler(ds_cb_arg_t arg); 126 static void dr_mem_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen); 127 128 /* 129 * DS Client Ops Vector 130 */ 131 static ds_clnt_ops_t dr_mem_ops = { 132 dr_mem_reg_handler, /* ds_reg_cb */ 133 dr_mem_unreg_handler, /* ds_unreg_cb */ 134 dr_mem_data_handler, /* ds_data_cb */ 135 NULL /* cb_arg */ 136 }; 137 138 /* 139 * Operation Results 140 * 141 * Used internally to gather results while an operation on a 142 * list of mblks is in progress. In particular, it is used to 143 * keep track of which mblks have already failed so that they are 144 * not processed further, and the manner in which they failed. 145 */ 146 typedef struct { 147 uint64_t addr; 148 uint64_t size; 149 uint32_t result; 150 uint32_t status; 151 char *string; 152 } dr_mem_res_t; 153 154 static char * 155 dr_mem_estr[] = { 156 "operation succeeded", /* DR_MEM_RES_OK */ 157 "operation failed", /* DR_MEM_RES_FAILURE */ 158 "operation was blocked", /* DR_MEM_RES_BLOCKED */ 159 "memory not defined in MD", /* DR_MEM_RES_NOT_IN_MD */ 160 "memory already in use", /* DR_MEM_RES_ESPAN */ 161 "memory access test failed", /* DR_MEM_RES_EFAULT */ 162 "resource not available", /* DR_MEM_RES_ERESOURCE */ 163 "permanent pages in span", /* DR_MEM_RES_PERM */ 164 "memory span busy", /* DR_MEM_RES_EBUSY */ 165 "VM viability test failed", /* DR_MEM_RES_ENOTVIABLE */ 166 "no pages to unconfigure", /* DR_MEM_RES_ENOWORK */ 167 "operation cancelled", /* DR_MEM_RES_ECANCELLED */ 168 "operation refused", /* DR_MEM_RES_EREFUSED */ 169 "memory span duplicate", /* DR_MEM_RES_EDUP */ 170 "invalid argument" /* DR_MEM_RES_EINVAL */ 171 }; 172 173 typedef struct { 174 kcondvar_t cond; 175 kmutex_t lock; 176 int error; 177 int done; 178 } mem_sync_t; 179 180 /* 181 * Internal Functions 182 */ 183 static int dr_mem_init(void); 184 static int dr_mem_fini(void); 185 186 static int dr_mem_list_wrk(dr_mem_hdr_t *, dr_mem_hdr_t **, int *); 187 static int dr_mem_list_query(dr_mem_hdr_t *, dr_mem_hdr_t **, int *); 188 static int dr_mem_del_stat(dr_mem_hdr_t *, dr_mem_hdr_t **, int *); 189 static int dr_mem_del_cancel(dr_mem_hdr_t *, dr_mem_hdr_t **, int *); 190 191 static int dr_mem_unconfigure(dr_mem_blk_t *, int *); 192 static int dr_mem_configure(dr_mem_blk_t *, int *); 193 static void dr_mem_query(dr_mem_blk_t *, dr_mem_query_t *); 194 195 static dr_mem_res_t *dr_mem_res_array_init(dr_mem_hdr_t *, drctl_rsrc_t *, int); 196 static void dr_mem_res_array_fini(dr_mem_res_t *res, int nres); 197 static size_t dr_mem_pack_response(dr_mem_hdr_t *req, dr_mem_res_t *res, 198 dr_mem_hdr_t **respp); 199 200 static int dr_mem_find(dr_mem_blk_t *mbp); 201 static mde_cookie_t dr_mem_find_node_md(dr_mem_blk_t *, md_t *, mde_cookie_t *); 202 203 static int mem_add(pfn_t, pgcnt_t); 204 static int mem_del(pfn_t, pgcnt_t); 205 206 extern int kphysm_add_memory_dynamic(pfn_t, pgcnt_t); 207 208 int 209 _init(void) 210 { 211 int status; 212 213 /* check that Memory DR is enabled */ 214 if (dr_is_disabled(DR_TYPE_MEM)) 215 return (ENOTSUP); 216 217 if ((status = dr_mem_init()) != 0) { 218 cmn_err(CE_NOTE, "Memory DR initialization failed"); 219 return (status); 220 } 221 222 if ((status = mod_install(&modlinkage)) != 0) { 223 (void) dr_mem_fini(); 224 } 225 226 return (status); 227 } 228 229 int 230 _info(struct modinfo *modinfop) 231 { 232 return (mod_info(&modlinkage, modinfop)); 233 } 234 235 int 236 _fini(void) 237 { 238 int status; 239 240 if (dr_mem_allow_unload == 0) 241 return (EBUSY); 242 243 if ((status = mod_remove(&modlinkage)) == 0) { 244 (void) dr_mem_fini(); 245 } 246 247 return (status); 248 } 249 250 static int 251 dr_mem_init(void) 252 { 253 int rv; 254 255 if ((rv = ds_cap_init(&dr_mem_cap, &dr_mem_ops)) != 0) { 256 cmn_err(CE_NOTE, "dr_mem: ds_cap_init failed: %d", rv); 257 return (rv); 258 } 259 260 return (0); 261 } 262 263 static int 264 dr_mem_fini(void) 265 { 266 int rv; 267 268 if ((rv = ds_cap_fini(&dr_mem_cap)) != 0) { 269 cmn_err(CE_NOTE, "dr_mem: ds_cap_fini failed: %d", rv); 270 } 271 272 return (rv); 273 } 274 275 static void 276 dr_mem_reg_handler(ds_cb_arg_t arg, ds_ver_t *ver, ds_svc_hdl_t hdl) 277 { 278 DR_DBG_MEM("reg_handler: arg=0x%p, ver=%d.%d, hdl=0x%lx\n", arg, 279 ver->major, ver->minor, hdl); 280 281 ds_handle = hdl; 282 } 283 284 static void 285 dr_mem_unreg_handler(ds_cb_arg_t arg) 286 { 287 DR_DBG_MEM("unreg_handler: arg=0x%p\n", arg); 288 289 ds_handle = DS_INVALID_HDL; 290 } 291 292 /*ARGSUSED*/ 293 static void 294 dr_mem_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen) 295 { 296 dr_mem_hdr_t *req = buf; 297 dr_mem_hdr_t err_resp; 298 dr_mem_hdr_t *resp = &err_resp; 299 int resp_len = 0; 300 int rv = EINVAL; 301 302 /* 303 * Sanity check the message 304 */ 305 if (buflen < sizeof (dr_mem_hdr_t)) { 306 DR_DBG_MEM("incoming message short: expected at least %ld " 307 "bytes, received %ld\n", sizeof (dr_mem_hdr_t), buflen); 308 goto done; 309 } 310 311 if (req == NULL) { 312 DR_DBG_MEM("empty message: expected at least %ld bytes\n", 313 sizeof (dr_mem_hdr_t)); 314 goto done; 315 } 316 317 DR_DBG_MEM("incoming request:\n"); 318 DR_DBG_DUMP_MSG(buf, buflen); 319 320 /* 321 * Process the command 322 */ 323 switch (req->msg_type) { 324 case DR_MEM_CONFIGURE: 325 case DR_MEM_UNCONFIGURE: 326 if (req->msg_arg == 0) { 327 DR_DBG_MEM("No mblks specified for operation\n"); 328 goto done; 329 } 330 if ((rv = dr_mem_list_wrk(req, &resp, &resp_len)) != 0) { 331 DR_DBG_MEM("%s failed (%d)\n", 332 (req->msg_type == DR_MEM_CONFIGURE) ? 333 "Memory configure" : "Memory unconfigure", rv); 334 } 335 break; 336 337 case DR_MEM_UNCONF_STATUS: 338 if ((rv = dr_mem_del_stat(req, &resp, &resp_len)) != 0) 339 DR_DBG_MEM("Memory delete status failed (%d)\n", rv); 340 break; 341 342 case DR_MEM_UNCONF_CANCEL: 343 if ((rv = dr_mem_del_cancel(req, &resp, &resp_len)) != 0) 344 DR_DBG_MEM("Memory delete cancel failed (%d)\n", rv); 345 break; 346 347 case DR_MEM_QUERY: 348 if (req->msg_arg == 0) { 349 DR_DBG_MEM("No mblks specified for operation\n"); 350 goto done; 351 } 352 if ((rv = dr_mem_list_query(req, &resp, &resp_len)) != 0) 353 DR_DBG_MEM("Memory query failed (%d)\n", rv); 354 break; 355 356 default: 357 cmn_err(CE_NOTE, "unsupported memory DR operation (%d)", 358 req->msg_type); 359 break; 360 } 361 362 done: 363 /* check if an error occurred */ 364 if (resp == &err_resp) { 365 resp->req_num = (req) ? req->req_num : 0; 366 resp->msg_type = DR_MEM_ERROR; 367 resp->msg_arg = rv; 368 resp_len = sizeof (dr_mem_hdr_t); 369 } 370 371 DR_DBG_MEM("outgoing response:\n"); 372 DR_DBG_DUMP_MSG(resp, resp_len); 373 374 /* send back the response */ 375 if (ds_cap_send(ds_handle, resp, resp_len) != 0) { 376 DR_DBG_MEM("ds_send failed\n"); 377 } 378 379 /* free any allocated memory */ 380 if (resp != &err_resp) { 381 kmem_free(resp, resp_len); 382 } 383 } 384 385 /* 386 * Common routine to config or unconfig multiple mblks. 387 * 388 * Note: Do not modify result buffer or length on error. 389 */ 390 static int 391 dr_mem_list_wrk(dr_mem_hdr_t *req, dr_mem_hdr_t **resp, int *resp_len) 392 { 393 int rv; 394 int idx; 395 int count; 396 int result; 397 int status; 398 fn_t dr_fn; 399 int se_hint; 400 dr_mem_blk_t *req_mblks; 401 dr_mem_res_t *res; 402 int drctl_cmd; 403 int drctl_flags = 0; 404 drctl_rsrc_t *drctl_req; 405 size_t drctl_req_len; 406 drctl_resp_t *drctl_resp; 407 drctl_rsrc_t *drctl_rsrc; 408 size_t drctl_resp_len = 0; 409 drctl_cookie_t drctl_res_ck; 410 411 ASSERT((req != NULL) && (req->msg_arg != 0)); 412 413 count = req->msg_arg; 414 415 /* 416 * Extract all information that is specific 417 * to the various types of operations. 418 */ 419 switch (req->msg_type) { 420 case DR_MEM_CONFIGURE: 421 dr_fn = dr_mem_configure; 422 drctl_cmd = DRCTL_MEM_CONFIG_REQUEST; 423 se_hint = SE_HINT_INSERT; 424 break; 425 case DR_MEM_UNCONFIGURE: 426 dr_fn = dr_mem_unconfigure; 427 drctl_cmd = DRCTL_MEM_UNCONFIG_REQUEST; 428 se_hint = SE_HINT_REMOVE; 429 break; 430 default: 431 /* Programming error if we reach this. */ 432 cmn_err(CE_NOTE, "%s: bad msg_type %d\n", 433 __func__, req->msg_type); 434 ASSERT(0); 435 return (-1); 436 } 437 438 /* the incoming array of mblks to operate on */ 439 req_mblks = DR_MEM_CMD_MBLKS(req); 440 441 /* allocate drctl request msg based on incoming resource count */ 442 drctl_req_len = sizeof (drctl_rsrc_t) * count; 443 drctl_req = kmem_zalloc(drctl_req_len, KM_SLEEP); 444 445 /* copy the size for the drctl call from the incoming request msg */ 446 for (idx = 0; idx < count; idx++) { 447 drctl_req[idx].res_mem_addr = req_mblks[idx].addr; 448 drctl_req[idx].res_mem_size = req_mblks[idx].size; 449 } 450 451 rv = drctl_config_init(drctl_cmd, drctl_flags, drctl_req, 452 count, &drctl_resp, &drctl_resp_len, &drctl_res_ck); 453 454 ASSERT((drctl_resp != NULL) && (drctl_resp_len != 0)); 455 456 if (rv != 0) { 457 DR_DBG_MEM("%s: drctl_config_init returned: %d\n", 458 __func__, rv); 459 kmem_free(drctl_resp, drctl_resp_len); 460 kmem_free(drctl_req, drctl_req_len); 461 return (rv); 462 } 463 464 ASSERT(drctl_resp->resp_type == DRCTL_RESP_OK); 465 466 drctl_rsrc = drctl_resp->resp_resources; 467 468 /* create the result scratch array */ 469 res = dr_mem_res_array_init(req, drctl_rsrc, count); 470 471 /* perform the specified operation on each of the mblks */ 472 for (idx = 0; idx < count; idx++) { 473 /* 474 * If no action will be taken against the current 475 * mblk, update the drctl resource information to 476 * ensure that it gets recovered properly during 477 * the drctl fini() call. 478 */ 479 if (res[idx].result != DR_MEM_RES_OK) { 480 drctl_req[idx].status = DRCTL_STATUS_CONFIG_FAILURE; 481 continue; 482 } 483 484 /* call the function to perform the actual operation */ 485 result = (*dr_fn)(&req_mblks[idx], &status); 486 487 /* save off results of the operation */ 488 res[idx].result = result; 489 res[idx].status = status; 490 res[idx].addr = req_mblks[idx].addr; /* for partial case */ 491 res[idx].size = req_mblks[idx].size; /* for partial case */ 492 res[idx].string = i_ddi_strdup(dr_mem_estr[result], KM_SLEEP); 493 494 /* save result for drctl fini() reusing init() msg memory */ 495 drctl_req[idx].status = (result != DR_MEM_RES_OK) ? 496 DRCTL_STATUS_CONFIG_FAILURE : DRCTL_STATUS_CONFIG_SUCCESS; 497 498 DR_DBG_MEM("%s: mblk 0x%lx.0x%lx stat %d result %d off '%s'\n", 499 __func__, req_mblks[idx].addr, req_mblks[idx].size, 500 drctl_req[idx].status, result, 501 (res[idx].string) ? res[idx].string : ""); 502 } 503 504 if ((rv = drctl_config_fini(&drctl_res_ck, drctl_req, count)) != 0) 505 DR_DBG_MEM("%s: drctl_config_fini returned: %d\n", 506 __func__, rv); 507 508 /* 509 * Operation completed without any fatal errors. 510 * Pack the response for transmission. 511 */ 512 *resp_len = dr_mem_pack_response(req, res, resp); 513 514 /* notify interested parties about the operation */ 515 dr_generate_event(DR_TYPE_MEM, se_hint); 516 517 /* 518 * Deallocate any scratch memory. 519 */ 520 kmem_free(drctl_resp, drctl_resp_len); 521 kmem_free(drctl_req, drctl_req_len); 522 523 dr_mem_res_array_fini(res, count); 524 525 return (0); 526 } 527 528 /* 529 * Allocate and initialize a result array based on the initial 530 * drctl operation. A valid result array is always returned. 531 */ 532 static dr_mem_res_t * 533 dr_mem_res_array_init(dr_mem_hdr_t *req, drctl_rsrc_t *rsrc, int nrsrc) 534 { 535 int idx; 536 dr_mem_res_t *res; 537 char *err_str; 538 size_t err_len; 539 540 /* allocate zero filled buffer to initialize fields */ 541 res = kmem_zalloc(nrsrc * sizeof (dr_mem_res_t), KM_SLEEP); 542 543 /* 544 * Fill in the result information for each resource. 545 */ 546 for (idx = 0; idx < nrsrc; idx++) { 547 res[idx].addr = rsrc[idx].res_mem_addr; 548 res[idx].size = rsrc[idx].res_mem_size; 549 res[idx].result = DR_MEM_RES_OK; 550 551 if (rsrc[idx].status == DRCTL_STATUS_ALLOW) 552 continue; 553 554 /* 555 * Update the state information for this mblk. 556 */ 557 res[idx].result = DR_MEM_RES_BLOCKED; 558 res[idx].status = (req->msg_type == DR_MEM_CONFIGURE) ? 559 DR_MEM_STAT_UNCONFIGURED : DR_MEM_STAT_CONFIGURED; 560 561 /* 562 * If an error string exists, copy it out of the 563 * message buffer. This eliminates any dependency 564 * on the memory allocated for the message buffer 565 * itself. 566 */ 567 if (rsrc[idx].offset != NULL) { 568 err_str = (char *)rsrc + rsrc[idx].offset; 569 err_len = strlen(err_str) + 1; 570 571 res[idx].string = kmem_alloc(err_len, KM_SLEEP); 572 bcopy(err_str, res[idx].string, err_len); 573 } 574 } 575 576 return (res); 577 } 578 579 static void 580 dr_mem_res_array_fini(dr_mem_res_t *res, int nres) 581 { 582 int idx; 583 size_t str_len; 584 585 for (idx = 0; idx < nres; idx++) { 586 /* deallocate the error string if present */ 587 if (res[idx].string) { 588 str_len = strlen(res[idx].string) + 1; 589 kmem_free(res[idx].string, str_len); 590 } 591 } 592 593 /* deallocate the result array itself */ 594 kmem_free(res, sizeof (dr_mem_res_t) * nres); 595 } 596 597 /* 598 * Allocate and pack a response message for transmission based 599 * on the specified result array. A valid response message and 600 * valid size information is always returned. 601 */ 602 static size_t 603 dr_mem_pack_response(dr_mem_hdr_t *req, dr_mem_res_t *res, dr_mem_hdr_t **respp) 604 { 605 int idx; 606 dr_mem_hdr_t *resp; 607 dr_mem_stat_t *resp_stat; 608 size_t resp_len; 609 uint32_t curr_off; 610 caddr_t curr_str; 611 size_t str_len; 612 size_t stat_len; 613 int nstat = req->msg_arg; 614 615 /* 616 * Calculate the size of the response message 617 * and allocate an appropriately sized buffer. 618 */ 619 resp_len = sizeof (dr_mem_hdr_t); 620 621 /* add the stat array size */ 622 stat_len = sizeof (dr_mem_stat_t) * nstat; 623 resp_len += stat_len; 624 625 /* add the size of any error strings */ 626 for (idx = 0; idx < nstat; idx++) { 627 if (res[idx].string != NULL) { 628 resp_len += strlen(res[idx].string) + 1; 629 } 630 } 631 632 /* allocate the message buffer */ 633 resp = kmem_zalloc(resp_len, KM_SLEEP); 634 635 /* 636 * Fill in the header information. 637 */ 638 resp->req_num = req->req_num; 639 resp->msg_type = DR_MEM_OK; 640 resp->msg_arg = nstat; 641 642 /* 643 * Fill in the stat information. 644 */ 645 resp_stat = DR_MEM_RESP_STATS(resp); 646 647 /* string offsets start immediately after stat array */ 648 curr_off = sizeof (dr_mem_hdr_t) + stat_len; 649 curr_str = (char *)resp_stat + stat_len; 650 651 for (idx = 0; idx < nstat; idx++) { 652 resp_stat[idx].addr = res[idx].addr; 653 resp_stat[idx].size = res[idx].size; 654 resp_stat[idx].result = res[idx].result; 655 resp_stat[idx].status = res[idx].status; 656 657 if (res[idx].string != NULL) { 658 /* copy over the error string */ 659 str_len = strlen(res[idx].string) + 1; 660 bcopy(res[idx].string, curr_str, str_len); 661 resp_stat[idx].string_off = curr_off; 662 663 curr_off += str_len; 664 curr_str += str_len; 665 } 666 } 667 668 /* buffer should be exactly filled */ 669 ASSERT(curr_off == resp_len); 670 671 *respp = resp; 672 return (resp_len); 673 } 674 675 static void 676 dr_mem_query(dr_mem_blk_t *mbp, dr_mem_query_t *mqp) 677 { 678 memquery_t mq; 679 680 DR_DBG_MEM("dr_mem_query...\n"); 681 682 683 (void) kphysm_del_span_query(btop(mbp->addr), btop(mbp->size), &mq); 684 685 if (!mq.phys_pages) 686 return; 687 688 mqp->addr = mbp->addr; 689 mqp->mq.phys_pages = ptob(mq.phys_pages); 690 mqp->mq.managed = ptob(mq.managed); 691 mqp->mq.nonrelocatable = ptob(mq.nonrelocatable); 692 mqp->mq.first_nonrelocatable = ptob(mq.first_nonrelocatable); 693 mqp->mq.last_nonrelocatable = ptob(mq.last_nonrelocatable); 694 /* 695 * Set to the max byte offset within the page. 696 */ 697 if (mqp->mq.nonrelocatable) 698 mqp->mq.last_nonrelocatable += PAGESIZE - 1; 699 } 700 701 /* 702 * Do not modify result buffer or length on error. 703 */ 704 static int 705 dr_mem_list_query(dr_mem_hdr_t *req, dr_mem_hdr_t **resp, int *resp_len) 706 { 707 int idx; 708 int rlen; 709 int nml; 710 struct memlist *ml; 711 dr_mem_blk_t *req_mblks, mb; 712 dr_mem_hdr_t *rp; 713 dr_mem_query_t *stat; 714 715 drctl_block(); 716 717 /* the incoming array of req_mblks to configure */ 718 req_mblks = DR_MEM_CMD_MBLKS(req); 719 720 /* allocate a response message, should be freed by caller */ 721 nml = 0; 722 rlen = sizeof (dr_mem_hdr_t); 723 if (req_mblks->addr == NULL && req_mblks->size == 0) { 724 /* 725 * Request is for domain's full view of it's memory. 726 */ 727 memlist_read_lock(); 728 for (ml = phys_install; ml; ml = ml->next) 729 nml++; 730 731 rlen += nml * sizeof (dr_mem_query_t); 732 } else { 733 rlen += req->msg_arg * sizeof (dr_mem_query_t); 734 } 735 rp = kmem_zalloc(rlen, KM_SLEEP); 736 737 /* fill in the known data */ 738 rp->req_num = req->req_num; 739 rp->msg_type = DR_MEM_OK; 740 rp->msg_arg = nml ? nml : req->msg_arg; 741 742 /* stat array for the response */ 743 stat = DR_MEM_RESP_QUERY(rp); 744 745 /* get the status for each of the mblocks */ 746 if (nml) { 747 for (idx = 0, ml = phys_install; ml; ml = ml->next, idx++) { 748 mb.addr = ml->address; 749 mb.size = ml->size; 750 dr_mem_query(&mb, &stat[idx]); 751 } 752 memlist_read_unlock(); 753 } else { 754 for (idx = 0; idx < req->msg_arg; idx++) 755 dr_mem_query(&req_mblks[idx], &stat[idx]); 756 } 757 758 *resp = rp; 759 *resp_len = rlen; 760 761 drctl_unblock(); 762 763 return (0); 764 } 765 766 static int 767 cvt_err(int err) 768 { 769 int rv; 770 771 switch (err) { 772 case KPHYSM_OK: 773 rv = DR_MEM_RES_OK; 774 break; 775 case KPHYSM_ESPAN: 776 rv = DR_MEM_RES_ESPAN; 777 break; 778 case KPHYSM_EFAULT: 779 rv = DR_MEM_RES_EFAULT; 780 break; 781 case KPHYSM_ERESOURCE: 782 rv = DR_MEM_RES_ERESOURCE; 783 break; 784 case KPHYSM_ENOTSUP: 785 case KPHYSM_ENOHANDLES: 786 rv = DR_MEM_RES_FAILURE; 787 break; 788 case KPHYSM_ENONRELOC: 789 rv = DR_MEM_RES_PERM; 790 break; 791 case KPHYSM_EHANDLE: 792 rv = DR_MEM_RES_FAILURE; 793 break; 794 case KPHYSM_EBUSY: 795 rv = DR_MEM_RES_EBUSY; 796 break; 797 case KPHYSM_ENOTVIABLE: 798 rv = DR_MEM_RES_ENOTVIABLE; 799 break; 800 case KPHYSM_ESEQUENCE: 801 rv = DR_MEM_RES_FAILURE; 802 break; 803 case KPHYSM_ENOWORK: 804 rv = DR_MEM_RES_ENOWORK; 805 break; 806 case KPHYSM_ECANCELLED: 807 rv = DR_MEM_RES_ECANCELLED; 808 break; 809 case KPHYSM_EREFUSED: 810 rv = DR_MEM_RES_EREFUSED; 811 break; 812 case KPHYSM_ENOTFINISHED: 813 case KPHYSM_ENOTRUNNING: 814 rv = DR_MEM_RES_FAILURE; 815 break; 816 case KPHYSM_EDUP: 817 rv = DR_MEM_RES_EDUP; 818 break; 819 default: 820 rv = DR_MEM_RES_FAILURE; 821 break; 822 } 823 824 return (rv); 825 } 826 827 static int 828 dr_mem_configure(dr_mem_blk_t *mbp, int *status) 829 { 830 int rv; 831 uint64_t addr, size; 832 833 rv = 0; 834 addr = mbp->addr; 835 size = mbp->size; 836 837 DR_DBG_MEM("dr_mem_configure...\n"); 838 839 if (!MBLK_IS_VALID(mbp)) { 840 DR_DBG_MEM("invalid mblk 0x%lx.0x%lx\n", addr, size); 841 *status = DR_MEM_STAT_UNCONFIGURED; 842 rv = DR_MEM_RES_EINVAL; 843 } else if (rv = dr_mem_find(mbp)) { 844 DR_DBG_MEM("failed to find mblk 0x%lx.0x%lx (%d)\n", 845 addr, size, rv); 846 if (rv == EINVAL) { 847 *status = DR_MEM_STAT_NOT_PRESENT; 848 rv = DR_MEM_RES_NOT_IN_MD; 849 } else { 850 *status = DR_MEM_STAT_UNCONFIGURED; 851 rv = DR_MEM_RES_FAILURE; 852 } 853 } else { 854 rv = mem_add(btop(addr), btop(size)); 855 DR_DBG_MEM("addr=0x%lx size=0x%lx rv=%d\n", addr, size, rv); 856 if (rv) { 857 *status = DR_MEM_STAT_UNCONFIGURED; 858 } else { 859 *status = DR_MEM_STAT_CONFIGURED; 860 } 861 } 862 863 return (rv); 864 } 865 866 static int 867 dr_mem_unconfigure(dr_mem_blk_t *mbp, int *status) 868 { 869 int rv; 870 871 DR_DBG_MEM("dr_mem_unconfigure...\n"); 872 873 if (!MBLK_IS_VALID(mbp)) { 874 DR_DBG_MEM("invalid mblk 0x%lx.0x%lx\n", 875 mbp->addr, mbp->size); 876 *status = DR_MEM_STAT_CONFIGURED; 877 rv = DR_MEM_RES_EINVAL; 878 } else if (rv = mem_del(btop(mbp->addr), btop(mbp->size))) { 879 *status = DR_MEM_STAT_CONFIGURED; 880 } else { 881 *status = DR_MEM_STAT_UNCONFIGURED; 882 rv = DR_MEM_RES_OK; 883 DR_DBG_MEM("mblk 0x%lx.0x%lx unconfigured\n", 884 mbp->addr, mbp->size); 885 } 886 return (rv); 887 } 888 889 static int 890 dr_mem_del_stat(dr_mem_hdr_t *req, dr_mem_hdr_t **resp, int *resp_len) 891 { 892 int status; 893 int rlen; 894 memdelstat_t del_stat, *stat; 895 dr_mem_hdr_t *rp; 896 897 /* 898 * If a mem delete is in progress, get its status. 899 */ 900 status = (dr_mh && (kphysm_del_status(dr_mh, &del_stat) == KPHYSM_OK)); 901 902 /* allocate a response message, should be freed by caller */ 903 rlen = sizeof (dr_mem_hdr_t); 904 rlen += status * sizeof (memdelstat_t); 905 rp = kmem_zalloc(rlen, KM_SLEEP); 906 907 /* fill in the known data */ 908 rp->req_num = req->req_num; 909 rp->msg_type = DR_MEM_OK; 910 rp->msg_arg = status; 911 912 if (status) { 913 /* stat struct for the response */ 914 stat = DR_MEM_RESP_DEL_STAT(rp); 915 stat->phys_pages = ptob(del_stat.phys_pages); 916 stat->managed = ptob(del_stat.managed); 917 stat->collected = ptob(del_stat.collected); 918 } 919 920 *resp = rp; 921 *resp_len = rlen; 922 923 return (0); 924 } 925 926 static int 927 dr_mem_del_cancel(dr_mem_hdr_t *req, dr_mem_hdr_t **resp, int *resp_len) 928 { 929 int rlen; 930 dr_mem_hdr_t *rp; 931 932 /* allocate a response message, should be freed by caller */ 933 rlen = sizeof (dr_mem_hdr_t); 934 rp = kmem_zalloc(rlen, KM_SLEEP); 935 936 /* fill in the known data */ 937 rp->req_num = req->req_num; 938 rp->msg_type = DR_MEM_OK; 939 rp->msg_arg = (dr_mh && kphysm_del_cancel(dr_mh) != KPHYSM_OK) ? 940 DR_MEM_RES_EINVAL : DR_MEM_RES_OK; 941 942 *resp = rp; 943 *resp_len = rlen; 944 945 return (0); 946 } 947 948 static int 949 dr_mem_find(dr_mem_blk_t *mbp) 950 { 951 md_t *mdp = NULL; 952 int num_nodes; 953 int rv = 0; 954 int listsz; 955 mde_cookie_t *listp = NULL; 956 mde_cookie_t memnode; 957 char *found = "found"; 958 959 if ((mdp = md_get_handle()) == NULL) { 960 DR_DBG_MEM("unable to initialize machine description\n"); 961 return (-1); 962 } 963 964 num_nodes = md_node_count(mdp); 965 ASSERT(num_nodes > 0); 966 967 listsz = num_nodes * sizeof (mde_cookie_t); 968 listp = kmem_zalloc(listsz, KM_SLEEP); 969 970 memnode = dr_mem_find_node_md(mbp, mdp, listp); 971 972 if (memnode == MDE_INVAL_ELEM_COOKIE) { 973 rv = EINVAL; 974 found = "not found"; 975 } 976 977 DR_DBG_MEM("mblk 0x%lx.0x%lx %s\n", mbp->addr, mbp->size, found); 978 979 kmem_free(listp, listsz); 980 (void) md_fini_handle(mdp); 981 982 return (rv); 983 } 984 985 /* 986 * Look up a particular mblk in the MD. Returns the mde_cookie_t 987 * representing that mblk if present, and MDE_INVAL_ELEM_COOKIE 988 * otherwise. It is assumed the scratch array has already been 989 * allocated so that it can accommodate the worst case scenario, 990 * every node in the MD. 991 */ 992 static mde_cookie_t 993 dr_mem_find_node_md(dr_mem_blk_t *mbp, md_t *mdp, mde_cookie_t *listp) 994 { 995 int idx; 996 int nnodes; 997 mde_cookie_t rootnode; 998 uint64_t base_prop; 999 uint64_t size_prop; 1000 mde_cookie_t result = MDE_INVAL_ELEM_COOKIE; 1001 1002 rootnode = md_root_node(mdp); 1003 ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE); 1004 1005 /* 1006 * Scan the DAG for all the mem nodes 1007 */ 1008 nnodes = md_scan_dag(mdp, rootnode, md_find_name(mdp, "mblock"), 1009 md_find_name(mdp, "fwd"), listp); 1010 1011 if (nnodes < 0) { 1012 DR_DBG_MEM("Scan for mblks failed\n"); 1013 return (result); 1014 } 1015 1016 DR_DBG_MEM("dr_mem_find_node_md: found %d mblks in the MD\n", nnodes); 1017 1018 /* 1019 * Find the mblk of interest 1020 */ 1021 for (idx = 0; idx < nnodes; idx++) { 1022 1023 if (md_get_prop_val(mdp, listp[idx], "base", &base_prop)) { 1024 DR_DBG_MEM("Missing 'base' property for mblk node %d\n", 1025 idx); 1026 break; 1027 } 1028 1029 if (md_get_prop_val(mdp, listp[idx], "size", &size_prop)) { 1030 DR_DBG_MEM("Missing 'size' property for mblk node %d\n", 1031 idx); 1032 break; 1033 } 1034 1035 if (base_prop <= mbp->addr && 1036 (base_prop + size_prop) >= (mbp->addr + mbp->size)) { 1037 /* found a match */ 1038 DR_DBG_MEM("dr_mem_find_node_md: found mblk " 1039 "0x%lx.0x%lx in MD\n", mbp->addr, mbp->size); 1040 result = listp[idx]; 1041 break; 1042 } 1043 } 1044 1045 if (result == MDE_INVAL_ELEM_COOKIE) { 1046 DR_DBG_MEM("mblk 0x%lx.0x%lx not in MD\n", 1047 mbp->addr, mbp->size); 1048 } 1049 1050 return (result); 1051 } 1052 1053 static int 1054 mem_add(pfn_t base, pgcnt_t npgs) 1055 { 1056 int rv, rc; 1057 1058 DR_DBG_MEM("%s: begin base=0x%lx npgs=0x%lx\n", __func__, base, npgs); 1059 1060 if (npgs == 0) 1061 return (DR_MEM_RES_OK); 1062 1063 rv = kphysm_add_memory_dynamic(base, npgs); 1064 DR_DBG_MEM("%s: kphysm_add(0x%lx, 0x%lx) = %d", __func__, base, npgs, 1065 rv); 1066 if (rv == KPHYSM_OK) { 1067 if (rc = kcage_range_add(base, npgs, KCAGE_DOWN)) 1068 cmn_err(CE_WARN, "kcage_range_add() = %d", rc); 1069 } 1070 rv = cvt_err(rv); 1071 return (rv); 1072 } 1073 1074 static void 1075 del_done(void *arg, int error) 1076 { 1077 mem_sync_t *ms = arg; 1078 1079 mutex_enter(&ms->lock); 1080 ms->error = error; 1081 ms->done = 1; 1082 cv_signal(&ms->cond); 1083 mutex_exit(&ms->lock); 1084 } 1085 1086 static int 1087 mem_del(pfn_t base, pgcnt_t npgs) 1088 { 1089 int rv, err, del_range = 0; 1090 int convert = 1; 1091 mem_sync_t ms; 1092 memquery_t mq; 1093 memhandle_t mh; 1094 struct memlist *ml; 1095 struct memlist *d_ml = NULL; 1096 1097 DR_DBG_MEM("%s: begin base=0x%lx npgs=0x%lx\n", __func__, base, npgs); 1098 1099 if (npgs == 0) 1100 return (DR_MEM_RES_OK); 1101 1102 if ((rv = kphysm_del_gethandle(&mh)) != KPHYSM_OK) { 1103 cmn_err(CE_WARN, "%s: del_gethandle() = %d", __func__, rv); 1104 rv = cvt_err(rv); 1105 return (rv); 1106 } 1107 if ((rv = kphysm_del_span_query(base, npgs, &mq)) 1108 != KPHYSM_OK) { 1109 cmn_err(CE_WARN, "%s: del_span_query() = %d", __func__, rv); 1110 goto done; 1111 } 1112 if (mq.nonrelocatable) { 1113 DR_DBG_MEM("%s: non-reloc pages = %ld", 1114 __func__, mq.nonrelocatable); 1115 rv = KPHYSM_ENONRELOC; 1116 goto done; 1117 } 1118 if (rv = kcage_range_delete(base, npgs)) { 1119 switch (rv) { 1120 case EBUSY: 1121 rv = DR_MEM_RES_ENOTVIABLE; 1122 break; 1123 default: 1124 rv = DR_MEM_RES_FAILURE; 1125 break; 1126 } 1127 convert = 0; /* conversion done */ 1128 cmn_err(CE_WARN, "%s: del_range() = %d", __func__, rv); 1129 goto done; 1130 } else { 1131 del_range++; 1132 } 1133 if ((rv = kphysm_del_span(mh, base, npgs)) != KPHYSM_OK) { 1134 cmn_err(CE_WARN, "%s: del_span() = %d", __func__, rv); 1135 goto done; 1136 } 1137 if ((rv = memlist_add_span(ptob(base), ptob(npgs), &d_ml)) 1138 != MEML_SPANOP_OK) { 1139 switch (rv) { 1140 case MEML_SPANOP_ESPAN: 1141 rv = DR_MEM_RES_ESPAN; 1142 break; 1143 case MEML_SPANOP_EALLOC: 1144 rv = DR_MEM_RES_ERESOURCE; 1145 break; 1146 default: 1147 rv = DR_MEM_RES_FAILURE; 1148 break; 1149 } 1150 convert = 0; /* conversion done */ 1151 cmn_err(CE_WARN, "%s: add_span() = %d", __func__, rv); 1152 goto done; 1153 } 1154 1155 DR_DBG_MEM("%s: reserved=0x%lx", __func__, npgs); 1156 1157 bzero((void *) &ms, sizeof (ms)); 1158 1159 mutex_init(&ms.lock, NULL, MUTEX_DRIVER, NULL); 1160 cv_init(&ms.cond, NULL, CV_DRIVER, NULL); 1161 mutex_enter(&ms.lock); 1162 1163 if ((rv = kphysm_del_start(mh, del_done, (void *) &ms)) == KPHYSM_OK) { 1164 /* 1165 * Since we've called drctl_config_init, we are the only 1166 * DR ctl operation in progress. Set dr_mh to the 1167 * delete memhandle for use by stat and cancel. 1168 */ 1169 ASSERT(dr_mh == NULL); 1170 dr_mh = mh; 1171 1172 /* 1173 * Wait for completion or interrupt. 1174 */ 1175 while (!ms.done) { 1176 if (cv_wait_sig(&ms.cond, &ms.lock) == 0) { 1177 /* 1178 * There is a pending signal. 1179 */ 1180 (void) kphysm_del_cancel(mh); 1181 DR_DBG_MEM("%s: cancel", __func__); 1182 /* 1183 * Wait for completion. 1184 */ 1185 while (!ms.done) 1186 cv_wait(&ms.cond, &ms.lock); 1187 } 1188 } 1189 dr_mh = NULL; 1190 rv = ms.error; 1191 } else { 1192 DR_DBG_MEM("%s: del_start() = %d", __func__, rv); 1193 } 1194 1195 mutex_exit(&ms.lock); 1196 cv_destroy(&ms.cond); 1197 mutex_destroy(&ms.lock); 1198 1199 done: 1200 if (rv && del_range) { 1201 /* 1202 * Add back the spans to the kcage growth list. 1203 */ 1204 for (ml = d_ml; ml; ml = ml->next) 1205 if (err = kcage_range_add(btop(ml->address), 1206 btop(ml->size), KCAGE_DOWN)) 1207 cmn_err(CE_WARN, "kcage_range_add() = %d", err); 1208 } 1209 memlist_free_list(d_ml); 1210 1211 if ((err = kphysm_del_release(mh)) != KPHYSM_OK) 1212 cmn_err(CE_WARN, "%s: del_release() = %d", __func__, err); 1213 if (convert) 1214 rv = cvt_err(rv); 1215 1216 DR_DBG_MEM("%s: rv=%d", __func__, rv); 1217 1218 return (rv); 1219 } 1220