1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* 27 * sun4v Memory DR Module 28 */ 29 30 31 #include <sys/types.h> 32 #include <sys/cmn_err.h> 33 #include <sys/vmem.h> 34 #include <sys/kmem.h> 35 #include <sys/systm.h> 36 #include <sys/machsystm.h> /* for page_freelist_coalesce() */ 37 #include <sys/errno.h> 38 #include <sys/memnode.h> 39 #include <sys/memlist.h> 40 #include <sys/memlist_impl.h> 41 #include <sys/tuneable.h> 42 #include <sys/proc.h> 43 #include <sys/disp.h> 44 #include <sys/debug.h> 45 #include <sys/vm.h> 46 #include <sys/callb.h> 47 #include <sys/memlist_plat.h> /* for installed_top_size() */ 48 #include <sys/condvar_impl.h> /* for CV_HAS_WAITERS() */ 49 #include <sys/dumphdr.h> /* for dump_resize() */ 50 #include <sys/atomic.h> /* for use in stats collection */ 51 #include <sys/rwlock.h> 52 #include <vm/seg_kmem.h> 53 #include <vm/seg_kpm.h> 54 #include <vm/page.h> 55 #include <vm/vm_dep.h> 56 #define SUNDDI_IMPL /* so sunddi.h will not redefine splx() et al */ 57 #include <sys/sunddi.h> 58 #include <sys/mem_config.h> 59 #include <sys/mem_cage.h> 60 #include <sys/lgrp.h> 61 #include <sys/ddi.h> 62 63 #include <sys/modctl.h> 64 #include <sys/sysevent/dr.h> 65 #include <sys/mach_descrip.h> 66 #include <sys/mdesc.h> 67 #include <sys/ds.h> 68 #include <sys/drctl.h> 69 #include <sys/dr_util.h> 70 #include <sys/dr_mem.h> 71 #include <sys/suspend.h> 72 73 74 /* 75 * DR operations are subject to Memory Alignment restrictions 76 * for both address and the size of the request. 77 */ 78 #define MA_ADDR 0x10000000 /* addr alignment 256M */ 79 #define MA_SIZE 0x10000000 /* size alignment 256M */ 80 81 #define MBLK_IS_VALID(m) \ 82 (IS_P2ALIGNED((m)->addr, MA_ADDR) && IS_P2ALIGNED((m)->size, MA_SIZE)) 83 84 static memhandle_t dr_mh; /* memory handle for delete */ 85 86 static struct modlmisc modlmisc = { 87 &mod_miscops, 88 "sun4v memory DR" 89 }; 90 91 static struct modlinkage modlinkage = { 92 MODREV_1, 93 (void *)&modlmisc, 94 NULL 95 }; 96 97 static int dr_mem_allow_unload = 0; 98 99 typedef int (*fn_t)(dr_mem_blk_t *, int *); 100 101 /* 102 * Global Domain Services (DS) Handle 103 */ 104 static ds_svc_hdl_t ds_handle; 105 106 /* 107 * Supported DS Capability Versions 108 */ 109 static ds_ver_t dr_mem_vers[] = { { 1, 0 } }; 110 #define DR_MEM_NVERS (sizeof (dr_mem_vers) / sizeof (dr_mem_vers[0])) 111 112 /* 113 * DS Capability Description 114 */ 115 static ds_capability_t dr_mem_cap = { 116 DR_MEM_DS_ID, /* svc_id */ 117 dr_mem_vers, /* vers */ 118 DR_MEM_NVERS /* nvers */ 119 }; 120 121 /* 122 * DS Callbacks 123 */ 124 static void dr_mem_reg_handler(ds_cb_arg_t, ds_ver_t *, ds_svc_hdl_t); 125 static void dr_mem_unreg_handler(ds_cb_arg_t arg); 126 static void dr_mem_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen); 127 128 /* 129 * DS Client Ops Vector 130 */ 131 static ds_clnt_ops_t dr_mem_ops = { 132 dr_mem_reg_handler, /* ds_reg_cb */ 133 dr_mem_unreg_handler, /* ds_unreg_cb */ 134 dr_mem_data_handler, /* ds_data_cb */ 135 NULL /* cb_arg */ 136 }; 137 138 /* 139 * Operation Results 140 * 141 * Used internally to gather results while an operation on a 142 * list of mblks is in progress. In particular, it is used to 143 * keep track of which mblks have already failed so that they are 144 * not processed further, and the manner in which they failed. 145 */ 146 typedef struct { 147 uint64_t addr; 148 uint64_t size; 149 uint32_t result; 150 uint32_t status; 151 char *string; 152 } dr_mem_res_t; 153 154 static char * 155 dr_mem_estr[] = { 156 "operation succeeded", /* DR_MEM_RES_OK */ 157 "operation failed", /* DR_MEM_RES_FAILURE */ 158 "operation was blocked", /* DR_MEM_RES_BLOCKED */ 159 "memory not defined in MD", /* DR_MEM_RES_NOT_IN_MD */ 160 "memory already in use", /* DR_MEM_RES_ESPAN */ 161 "memory access test failed", /* DR_MEM_RES_EFAULT */ 162 "resource not available", /* DR_MEM_RES_ERESOURCE */ 163 "permanent pages in span", /* DR_MEM_RES_PERM */ 164 "memory span busy", /* DR_MEM_RES_EBUSY */ 165 "VM viability test failed", /* DR_MEM_RES_ENOTVIABLE */ 166 "no pages to unconfigure", /* DR_MEM_RES_ENOWORK */ 167 "operation cancelled", /* DR_MEM_RES_ECANCELLED */ 168 "operation refused", /* DR_MEM_RES_EREFUSED */ 169 "memory span duplicate", /* DR_MEM_RES_EDUP */ 170 "invalid argument" /* DR_MEM_RES_EINVAL */ 171 }; 172 173 static char * 174 dr_mem_estr_detail[] = { 175 "", /* DR_MEM_SRES_NONE */ 176 "memory DR disabled after migration" /* DR_MEM_SRES_OS_SUSPENDED */ 177 }; 178 179 typedef struct { 180 kcondvar_t cond; 181 kmutex_t lock; 182 int error; 183 int done; 184 } mem_sync_t; 185 186 /* 187 * Internal Functions 188 */ 189 static int dr_mem_init(void); 190 static int dr_mem_fini(void); 191 192 static int dr_mem_list_wrk(dr_mem_hdr_t *, dr_mem_hdr_t **, int *); 193 static int dr_mem_list_query(dr_mem_hdr_t *, dr_mem_hdr_t **, int *); 194 static int dr_mem_del_stat(dr_mem_hdr_t *, dr_mem_hdr_t **, int *); 195 static int dr_mem_del_cancel(dr_mem_hdr_t *, dr_mem_hdr_t **, int *); 196 197 static int dr_mem_unconfigure(dr_mem_blk_t *, int *); 198 static int dr_mem_configure(dr_mem_blk_t *, int *); 199 static void dr_mem_query(dr_mem_blk_t *, dr_mem_query_t *); 200 201 static dr_mem_res_t *dr_mem_res_array_init(dr_mem_hdr_t *, drctl_rsrc_t *, int); 202 static void dr_mem_res_array_fini(dr_mem_res_t *res, int nres); 203 static size_t dr_mem_pack_response(dr_mem_hdr_t *req, dr_mem_res_t *res, 204 dr_mem_hdr_t **respp); 205 206 static int dr_mem_find(dr_mem_blk_t *mbp); 207 static mde_cookie_t dr_mem_find_node_md(dr_mem_blk_t *, md_t *, mde_cookie_t *); 208 209 static int mem_add(pfn_t, pgcnt_t); 210 static int mem_del(pfn_t, pgcnt_t); 211 212 extern int kphysm_add_memory_dynamic(pfn_t, pgcnt_t); 213 214 int 215 _init(void) 216 { 217 int status; 218 219 /* check that Memory DR is enabled */ 220 if (dr_is_disabled(DR_TYPE_MEM)) 221 return (ENOTSUP); 222 223 if ((status = dr_mem_init()) != 0) { 224 cmn_err(CE_NOTE, "Memory DR initialization failed"); 225 return (status); 226 } 227 228 if ((status = mod_install(&modlinkage)) != 0) { 229 (void) dr_mem_fini(); 230 } 231 232 return (status); 233 } 234 235 int 236 _info(struct modinfo *modinfop) 237 { 238 return (mod_info(&modlinkage, modinfop)); 239 } 240 241 int 242 _fini(void) 243 { 244 int status; 245 246 if (dr_mem_allow_unload == 0) 247 return (EBUSY); 248 249 if ((status = mod_remove(&modlinkage)) == 0) { 250 (void) dr_mem_fini(); 251 } 252 253 return (status); 254 } 255 256 static int 257 dr_mem_init(void) 258 { 259 int rv; 260 261 if ((rv = ds_cap_init(&dr_mem_cap, &dr_mem_ops)) != 0) { 262 cmn_err(CE_NOTE, "dr_mem: ds_cap_init failed: %d", rv); 263 return (rv); 264 } 265 266 return (0); 267 } 268 269 static int 270 dr_mem_fini(void) 271 { 272 int rv; 273 274 if ((rv = ds_cap_fini(&dr_mem_cap)) != 0) { 275 cmn_err(CE_NOTE, "dr_mem: ds_cap_fini failed: %d", rv); 276 } 277 278 return (rv); 279 } 280 281 static void 282 dr_mem_reg_handler(ds_cb_arg_t arg, ds_ver_t *ver, ds_svc_hdl_t hdl) 283 { 284 DR_DBG_MEM("reg_handler: arg=0x%p, ver=%d.%d, hdl=0x%lx\n", arg, 285 ver->major, ver->minor, hdl); 286 287 ds_handle = hdl; 288 } 289 290 static void 291 dr_mem_unreg_handler(ds_cb_arg_t arg) 292 { 293 DR_DBG_MEM("unreg_handler: arg=0x%p\n", arg); 294 295 ds_handle = DS_INVALID_HDL; 296 } 297 298 /*ARGSUSED*/ 299 static void 300 dr_mem_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen) 301 { 302 dr_mem_hdr_t *req = buf; 303 dr_mem_hdr_t err_resp; 304 dr_mem_hdr_t *resp = &err_resp; 305 int resp_len = 0; 306 int rv = EINVAL; 307 308 /* 309 * Sanity check the message 310 */ 311 if (buflen < sizeof (dr_mem_hdr_t)) { 312 DR_DBG_MEM("incoming message short: expected at least %ld " 313 "bytes, received %ld\n", sizeof (dr_mem_hdr_t), buflen); 314 goto done; 315 } 316 317 if (req == NULL) { 318 DR_DBG_MEM("empty message: expected at least %ld bytes\n", 319 sizeof (dr_mem_hdr_t)); 320 goto done; 321 } 322 323 DR_DBG_MEM("incoming request:\n"); 324 DR_DBG_DUMP_MSG(buf, buflen); 325 326 /* 327 * Process the command 328 */ 329 switch (req->msg_type) { 330 case DR_MEM_CONFIGURE: 331 case DR_MEM_UNCONFIGURE: 332 if (req->msg_arg == 0) { 333 DR_DBG_MEM("No mblks specified for operation\n"); 334 goto done; 335 } 336 if ((rv = dr_mem_list_wrk(req, &resp, &resp_len)) != 0) { 337 DR_DBG_MEM("%s failed (%d)\n", 338 (req->msg_type == DR_MEM_CONFIGURE) ? 339 "Memory configure" : "Memory unconfigure", rv); 340 } 341 break; 342 343 case DR_MEM_UNCONF_STATUS: 344 if ((rv = dr_mem_del_stat(req, &resp, &resp_len)) != 0) 345 DR_DBG_MEM("Memory delete status failed (%d)\n", rv); 346 break; 347 348 case DR_MEM_UNCONF_CANCEL: 349 if ((rv = dr_mem_del_cancel(req, &resp, &resp_len)) != 0) 350 DR_DBG_MEM("Memory delete cancel failed (%d)\n", rv); 351 break; 352 353 case DR_MEM_QUERY: 354 if (req->msg_arg == 0) { 355 DR_DBG_MEM("No mblks specified for operation\n"); 356 goto done; 357 } 358 if ((rv = dr_mem_list_query(req, &resp, &resp_len)) != 0) 359 DR_DBG_MEM("Memory query failed (%d)\n", rv); 360 break; 361 362 default: 363 cmn_err(CE_NOTE, "unsupported memory DR operation (%d)", 364 req->msg_type); 365 break; 366 } 367 368 done: 369 /* check if an error occurred */ 370 if (resp == &err_resp) { 371 resp->req_num = (req) ? req->req_num : 0; 372 resp->msg_type = DR_MEM_ERROR; 373 resp->msg_arg = rv; 374 resp_len = sizeof (dr_mem_hdr_t); 375 } 376 377 DR_DBG_MEM("outgoing response:\n"); 378 DR_DBG_DUMP_MSG(resp, resp_len); 379 380 /* send back the response */ 381 if (ds_cap_send(ds_handle, resp, resp_len) != 0) { 382 DR_DBG_MEM("ds_send failed\n"); 383 } 384 385 /* free any allocated memory */ 386 if (resp != &err_resp) { 387 kmem_free(resp, resp_len); 388 } 389 } 390 391 static char * 392 dr_mem_get_errstr(int result, int subresult) 393 { 394 size_t len; 395 char *errstr; 396 const char *separator = ": "; 397 398 if (subresult == DR_MEM_SRES_NONE) 399 return (i_ddi_strdup(dr_mem_estr[result], KM_SLEEP)); 400 401 len = snprintf(NULL, 0, "%s%s%s", dr_mem_estr[result], 402 separator, dr_mem_estr_detail[subresult]) + 1; 403 404 errstr = kmem_alloc(len, KM_SLEEP); 405 406 (void) snprintf(errstr, len, "%s%s%s", dr_mem_estr[result], 407 separator, dr_mem_estr_detail[subresult]); 408 409 return (errstr); 410 } 411 412 /* 413 * Common routine to config or unconfig multiple mblks. 414 * 415 * Note: Do not modify result buffer or length on error. 416 */ 417 static int 418 dr_mem_list_wrk(dr_mem_hdr_t *req, dr_mem_hdr_t **resp, int *resp_len) 419 { 420 int rv; 421 int idx; 422 int count; 423 int result; 424 int subresult; 425 int status; 426 boolean_t suspend_allows_dr; 427 fn_t dr_fn; 428 int se_hint; 429 dr_mem_blk_t *req_mblks; 430 dr_mem_res_t *res; 431 int drctl_cmd; 432 int drctl_flags = 0; 433 drctl_rsrc_t *drctl_req; 434 size_t drctl_req_len; 435 drctl_resp_t *drctl_resp; 436 drctl_rsrc_t *drctl_rsrc; 437 size_t drctl_resp_len = 0; 438 drctl_cookie_t drctl_res_ck; 439 440 ASSERT((req != NULL) && (req->msg_arg != 0)); 441 442 count = req->msg_arg; 443 444 /* 445 * Extract all information that is specific 446 * to the various types of operations. 447 */ 448 switch (req->msg_type) { 449 case DR_MEM_CONFIGURE: 450 dr_fn = dr_mem_configure; 451 drctl_cmd = DRCTL_MEM_CONFIG_REQUEST; 452 se_hint = SE_HINT_INSERT; 453 break; 454 case DR_MEM_UNCONFIGURE: 455 dr_fn = dr_mem_unconfigure; 456 drctl_cmd = DRCTL_MEM_UNCONFIG_REQUEST; 457 se_hint = SE_HINT_REMOVE; 458 break; 459 default: 460 /* Programming error if we reach this. */ 461 cmn_err(CE_NOTE, "%s: bad msg_type %d\n", 462 __func__, req->msg_type); 463 ASSERT(0); 464 return (-1); 465 } 466 467 /* the incoming array of mblks to operate on */ 468 req_mblks = DR_MEM_CMD_MBLKS(req); 469 470 /* allocate drctl request msg based on incoming resource count */ 471 drctl_req_len = sizeof (drctl_rsrc_t) * count; 472 drctl_req = kmem_zalloc(drctl_req_len, KM_SLEEP); 473 474 /* copy the size for the drctl call from the incoming request msg */ 475 for (idx = 0; idx < count; idx++) { 476 drctl_req[idx].res_mem_addr = req_mblks[idx].addr; 477 drctl_req[idx].res_mem_size = req_mblks[idx].size; 478 } 479 480 rv = drctl_config_init(drctl_cmd, drctl_flags, drctl_req, 481 count, &drctl_resp, &drctl_resp_len, &drctl_res_ck); 482 483 ASSERT((drctl_resp != NULL) && (drctl_resp_len != 0)); 484 485 if (rv != 0) { 486 DR_DBG_MEM("%s: drctl_config_init returned: %d\n", 487 __func__, rv); 488 kmem_free(drctl_resp, drctl_resp_len); 489 kmem_free(drctl_req, drctl_req_len); 490 return (rv); 491 } 492 493 ASSERT(drctl_resp->resp_type == DRCTL_RESP_OK); 494 495 drctl_rsrc = drctl_resp->resp_resources; 496 497 /* create the result scratch array */ 498 res = dr_mem_res_array_init(req, drctl_rsrc, count); 499 500 /* 501 * Memory DR operations are not safe if we have been suspended and 502 * resumed. Until this limitation is lifted, check to see if memory 503 * DR operations are permitted at this time by the suspend subsystem. 504 */ 505 if ((suspend_allows_dr = suspend_memdr_allowed()) == B_FALSE) { 506 result = DR_MEM_RES_BLOCKED; 507 subresult = DR_MEM_SRES_OS_SUSPENDED; 508 } else { 509 subresult = DR_MEM_SRES_NONE; 510 } 511 512 /* perform the specified operation on each of the mblks */ 513 for (idx = 0; idx < count; idx++) { 514 /* 515 * If no action will be taken against the current 516 * mblk, update the drctl resource information to 517 * ensure that it gets recovered properly during 518 * the drctl fini() call. 519 */ 520 if (res[idx].result != DR_MEM_RES_OK) { 521 drctl_req[idx].status = DRCTL_STATUS_CONFIG_FAILURE; 522 continue; 523 } 524 525 /* 526 * If memory DR operations are permitted at this time by 527 * the suspend subsystem, call the function to perform the 528 * operation, otherwise return a result indicating that the 529 * operation was blocked. 530 */ 531 if (suspend_allows_dr) 532 result = (*dr_fn)(&req_mblks[idx], &status); 533 534 /* save off results of the operation */ 535 res[idx].result = result; 536 res[idx].status = status; 537 res[idx].addr = req_mblks[idx].addr; /* for partial case */ 538 res[idx].size = req_mblks[idx].size; /* for partial case */ 539 res[idx].string = dr_mem_get_errstr(result, subresult); 540 541 /* save result for drctl fini() reusing init() msg memory */ 542 drctl_req[idx].status = (result != DR_MEM_RES_OK) ? 543 DRCTL_STATUS_CONFIG_FAILURE : DRCTL_STATUS_CONFIG_SUCCESS; 544 545 DR_DBG_MEM("%s: mblk 0x%lx.0x%lx stat %d result %d off '%s'\n", 546 __func__, req_mblks[idx].addr, req_mblks[idx].size, 547 drctl_req[idx].status, result, 548 (res[idx].string) ? res[idx].string : ""); 549 } 550 551 if ((rv = drctl_config_fini(&drctl_res_ck, drctl_req, count)) != 0) 552 DR_DBG_MEM("%s: drctl_config_fini returned: %d\n", 553 __func__, rv); 554 555 /* 556 * Operation completed without any fatal errors. 557 * Pack the response for transmission. 558 */ 559 *resp_len = dr_mem_pack_response(req, res, resp); 560 561 /* notify interested parties about the operation */ 562 dr_generate_event(DR_TYPE_MEM, se_hint); 563 564 /* 565 * Deallocate any scratch memory. 566 */ 567 kmem_free(drctl_resp, drctl_resp_len); 568 kmem_free(drctl_req, drctl_req_len); 569 570 dr_mem_res_array_fini(res, count); 571 572 return (0); 573 } 574 575 /* 576 * Allocate and initialize a result array based on the initial 577 * drctl operation. A valid result array is always returned. 578 */ 579 static dr_mem_res_t * 580 dr_mem_res_array_init(dr_mem_hdr_t *req, drctl_rsrc_t *rsrc, int nrsrc) 581 { 582 int idx; 583 dr_mem_res_t *res; 584 char *err_str; 585 size_t err_len; 586 587 /* allocate zero filled buffer to initialize fields */ 588 res = kmem_zalloc(nrsrc * sizeof (dr_mem_res_t), KM_SLEEP); 589 590 /* 591 * Fill in the result information for each resource. 592 */ 593 for (idx = 0; idx < nrsrc; idx++) { 594 res[idx].addr = rsrc[idx].res_mem_addr; 595 res[idx].size = rsrc[idx].res_mem_size; 596 res[idx].result = DR_MEM_RES_OK; 597 598 if (rsrc[idx].status == DRCTL_STATUS_ALLOW) 599 continue; 600 601 /* 602 * Update the state information for this mblk. 603 */ 604 res[idx].result = DR_MEM_RES_BLOCKED; 605 res[idx].status = (req->msg_type == DR_MEM_CONFIGURE) ? 606 DR_MEM_STAT_UNCONFIGURED : DR_MEM_STAT_CONFIGURED; 607 608 /* 609 * If an error string exists, copy it out of the 610 * message buffer. This eliminates any dependency 611 * on the memory allocated for the message buffer 612 * itself. 613 */ 614 if (rsrc[idx].offset != 0) { 615 err_str = (char *)rsrc + rsrc[idx].offset; 616 err_len = strlen(err_str) + 1; 617 618 res[idx].string = kmem_alloc(err_len, KM_SLEEP); 619 bcopy(err_str, res[idx].string, err_len); 620 } 621 } 622 623 return (res); 624 } 625 626 static void 627 dr_mem_res_array_fini(dr_mem_res_t *res, int nres) 628 { 629 int idx; 630 size_t str_len; 631 632 for (idx = 0; idx < nres; idx++) { 633 /* deallocate the error string if present */ 634 if (res[idx].string) { 635 str_len = strlen(res[idx].string) + 1; 636 kmem_free(res[idx].string, str_len); 637 } 638 } 639 640 /* deallocate the result array itself */ 641 kmem_free(res, sizeof (dr_mem_res_t) * nres); 642 } 643 644 /* 645 * Allocate and pack a response message for transmission based 646 * on the specified result array. A valid response message and 647 * valid size information is always returned. 648 */ 649 static size_t 650 dr_mem_pack_response(dr_mem_hdr_t *req, dr_mem_res_t *res, dr_mem_hdr_t **respp) 651 { 652 int idx; 653 dr_mem_hdr_t *resp; 654 dr_mem_stat_t *resp_stat; 655 size_t resp_len; 656 uint32_t curr_off; 657 caddr_t curr_str; 658 size_t str_len; 659 size_t stat_len; 660 int nstat = req->msg_arg; 661 662 /* 663 * Calculate the size of the response message 664 * and allocate an appropriately sized buffer. 665 */ 666 resp_len = sizeof (dr_mem_hdr_t); 667 668 /* add the stat array size */ 669 stat_len = sizeof (dr_mem_stat_t) * nstat; 670 resp_len += stat_len; 671 672 /* add the size of any error strings */ 673 for (idx = 0; idx < nstat; idx++) { 674 if (res[idx].string != NULL) { 675 resp_len += strlen(res[idx].string) + 1; 676 } 677 } 678 679 /* allocate the message buffer */ 680 resp = kmem_zalloc(resp_len, KM_SLEEP); 681 682 /* 683 * Fill in the header information. 684 */ 685 resp->req_num = req->req_num; 686 resp->msg_type = DR_MEM_OK; 687 resp->msg_arg = nstat; 688 689 /* 690 * Fill in the stat information. 691 */ 692 resp_stat = DR_MEM_RESP_STATS(resp); 693 694 /* string offsets start immediately after stat array */ 695 curr_off = sizeof (dr_mem_hdr_t) + stat_len; 696 curr_str = (char *)resp_stat + stat_len; 697 698 for (idx = 0; idx < nstat; idx++) { 699 resp_stat[idx].addr = res[idx].addr; 700 resp_stat[idx].size = res[idx].size; 701 resp_stat[idx].result = res[idx].result; 702 resp_stat[idx].status = res[idx].status; 703 704 if (res[idx].string != NULL) { 705 /* copy over the error string */ 706 str_len = strlen(res[idx].string) + 1; 707 bcopy(res[idx].string, curr_str, str_len); 708 resp_stat[idx].string_off = curr_off; 709 710 curr_off += str_len; 711 curr_str += str_len; 712 } 713 } 714 715 /* buffer should be exactly filled */ 716 ASSERT(curr_off == resp_len); 717 718 *respp = resp; 719 return (resp_len); 720 } 721 722 static void 723 dr_mem_query(dr_mem_blk_t *mbp, dr_mem_query_t *mqp) 724 { 725 memquery_t mq; 726 727 DR_DBG_MEM("dr_mem_query...\n"); 728 729 730 (void) kphysm_del_span_query(btop(mbp->addr), btop(mbp->size), &mq); 731 732 if (!mq.phys_pages) 733 return; 734 735 mqp->addr = mbp->addr; 736 mqp->mq.phys_pages = ptob(mq.phys_pages); 737 mqp->mq.managed = ptob(mq.managed); 738 mqp->mq.nonrelocatable = ptob(mq.nonrelocatable); 739 mqp->mq.first_nonrelocatable = ptob(mq.first_nonrelocatable); 740 mqp->mq.last_nonrelocatable = ptob(mq.last_nonrelocatable); 741 /* 742 * Set to the max byte offset within the page. 743 */ 744 if (mqp->mq.nonrelocatable) 745 mqp->mq.last_nonrelocatable += PAGESIZE - 1; 746 } 747 748 /* 749 * Do not modify result buffer or length on error. 750 */ 751 static int 752 dr_mem_list_query(dr_mem_hdr_t *req, dr_mem_hdr_t **resp, int *resp_len) 753 { 754 int idx; 755 int rlen; 756 int nml; 757 struct memlist *ml; 758 struct memlist *phys_copy = NULL; 759 dr_mem_blk_t *req_mblks, mb; 760 dr_mem_hdr_t *rp; 761 dr_mem_query_t *stat; 762 763 drctl_block(); 764 765 /* the incoming array of req_mblks to configure */ 766 req_mblks = DR_MEM_CMD_MBLKS(req); 767 768 /* allocate a response message, should be freed by caller */ 769 nml = 0; 770 rlen = sizeof (dr_mem_hdr_t); 771 if (req_mblks->addr == 0 && req_mblks->size == 0) { 772 /* 773 * Request is for domain's full view of it's memory. 774 * place a copy in phys_copy then release the memlist lock. 775 */ 776 memlist_read_lock(); 777 phys_copy = dr_memlist_dup(phys_install); 778 memlist_read_unlock(); 779 780 for (ml = phys_copy; ml; ml = ml->ml_next) 781 nml++; 782 783 rlen += nml * sizeof (dr_mem_query_t); 784 } else { 785 rlen += req->msg_arg * sizeof (dr_mem_query_t); 786 } 787 rp = kmem_zalloc(rlen, KM_SLEEP); 788 789 /* fill in the known data */ 790 rp->req_num = req->req_num; 791 rp->msg_type = DR_MEM_OK; 792 rp->msg_arg = nml ? nml : req->msg_arg; 793 794 /* stat array for the response */ 795 stat = DR_MEM_RESP_QUERY(rp); 796 797 /* get the status for each of the mblocks */ 798 if (nml) { 799 for (idx = 0, ml = phys_copy; ml; ml = ml->ml_next, idx++) { 800 mb.addr = ml->ml_address; 801 mb.size = ml->ml_size; 802 dr_mem_query(&mb, &stat[idx]); 803 } 804 } else { 805 for (idx = 0; idx < req->msg_arg; idx++) 806 dr_mem_query(&req_mblks[idx], &stat[idx]); 807 } 808 809 *resp = rp; 810 *resp_len = rlen; 811 if (phys_copy != NULL) { 812 dr_memlist_delete(phys_copy); 813 } 814 drctl_unblock(); 815 816 return (0); 817 } 818 819 static int 820 cvt_err(int err) 821 { 822 int rv; 823 824 switch (err) { 825 case KPHYSM_OK: 826 rv = DR_MEM_RES_OK; 827 break; 828 case KPHYSM_ESPAN: 829 rv = DR_MEM_RES_ESPAN; 830 break; 831 case KPHYSM_EFAULT: 832 rv = DR_MEM_RES_EFAULT; 833 break; 834 case KPHYSM_ERESOURCE: 835 rv = DR_MEM_RES_ERESOURCE; 836 break; 837 case KPHYSM_ENOTSUP: 838 case KPHYSM_ENOHANDLES: 839 rv = DR_MEM_RES_FAILURE; 840 break; 841 case KPHYSM_ENONRELOC: 842 rv = DR_MEM_RES_PERM; 843 break; 844 case KPHYSM_EHANDLE: 845 rv = DR_MEM_RES_FAILURE; 846 break; 847 case KPHYSM_EBUSY: 848 rv = DR_MEM_RES_EBUSY; 849 break; 850 case KPHYSM_ENOTVIABLE: 851 rv = DR_MEM_RES_ENOTVIABLE; 852 break; 853 case KPHYSM_ESEQUENCE: 854 rv = DR_MEM_RES_FAILURE; 855 break; 856 case KPHYSM_ENOWORK: 857 rv = DR_MEM_RES_ENOWORK; 858 break; 859 case KPHYSM_ECANCELLED: 860 rv = DR_MEM_RES_ECANCELLED; 861 break; 862 case KPHYSM_EREFUSED: 863 rv = DR_MEM_RES_EREFUSED; 864 break; 865 case KPHYSM_ENOTFINISHED: 866 case KPHYSM_ENOTRUNNING: 867 rv = DR_MEM_RES_FAILURE; 868 break; 869 case KPHYSM_EDUP: 870 rv = DR_MEM_RES_EDUP; 871 break; 872 default: 873 rv = DR_MEM_RES_FAILURE; 874 break; 875 } 876 877 return (rv); 878 } 879 880 static int 881 dr_mem_configure(dr_mem_blk_t *mbp, int *status) 882 { 883 int rv; 884 uint64_t addr, size; 885 886 rv = 0; 887 addr = mbp->addr; 888 size = mbp->size; 889 890 DR_DBG_MEM("dr_mem_configure...\n"); 891 892 if (!MBLK_IS_VALID(mbp)) { 893 DR_DBG_MEM("invalid mblk 0x%lx.0x%lx\n", addr, size); 894 *status = DR_MEM_STAT_UNCONFIGURED; 895 rv = DR_MEM_RES_EINVAL; 896 } else if (rv = dr_mem_find(mbp)) { 897 DR_DBG_MEM("failed to find mblk 0x%lx.0x%lx (%d)\n", 898 addr, size, rv); 899 if (rv == EINVAL) { 900 *status = DR_MEM_STAT_NOT_PRESENT; 901 rv = DR_MEM_RES_NOT_IN_MD; 902 } else { 903 *status = DR_MEM_STAT_UNCONFIGURED; 904 rv = DR_MEM_RES_FAILURE; 905 } 906 } else { 907 rv = mem_add(btop(addr), btop(size)); 908 DR_DBG_MEM("addr=0x%lx size=0x%lx rv=%d\n", addr, size, rv); 909 if (rv) { 910 *status = DR_MEM_STAT_UNCONFIGURED; 911 } else { 912 *status = DR_MEM_STAT_CONFIGURED; 913 } 914 } 915 916 return (rv); 917 } 918 919 static int 920 dr_mem_unconfigure(dr_mem_blk_t *mbp, int *status) 921 { 922 int rv; 923 924 DR_DBG_MEM("dr_mem_unconfigure...\n"); 925 926 if (!MBLK_IS_VALID(mbp)) { 927 DR_DBG_MEM("invalid mblk 0x%lx.0x%lx\n", 928 mbp->addr, mbp->size); 929 *status = DR_MEM_STAT_CONFIGURED; 930 rv = DR_MEM_RES_EINVAL; 931 } else if (rv = mem_del(btop(mbp->addr), btop(mbp->size))) { 932 *status = DR_MEM_STAT_CONFIGURED; 933 } else { 934 *status = DR_MEM_STAT_UNCONFIGURED; 935 rv = DR_MEM_RES_OK; 936 DR_DBG_MEM("mblk 0x%lx.0x%lx unconfigured\n", 937 mbp->addr, mbp->size); 938 } 939 return (rv); 940 } 941 942 static int 943 dr_mem_del_stat(dr_mem_hdr_t *req, dr_mem_hdr_t **resp, int *resp_len) 944 { 945 int status; 946 int rlen; 947 memdelstat_t del_stat, *stat; 948 dr_mem_hdr_t *rp; 949 950 /* 951 * If a mem delete is in progress, get its status. 952 */ 953 status = (dr_mh && (kphysm_del_status(dr_mh, &del_stat) == KPHYSM_OK)); 954 955 /* allocate a response message, should be freed by caller */ 956 rlen = sizeof (dr_mem_hdr_t); 957 rlen += status * sizeof (memdelstat_t); 958 rp = kmem_zalloc(rlen, KM_SLEEP); 959 960 /* fill in the known data */ 961 rp->req_num = req->req_num; 962 rp->msg_type = DR_MEM_OK; 963 rp->msg_arg = status; 964 965 if (status) { 966 /* stat struct for the response */ 967 stat = DR_MEM_RESP_DEL_STAT(rp); 968 stat->phys_pages = ptob(del_stat.phys_pages); 969 stat->managed = ptob(del_stat.managed); 970 stat->collected = ptob(del_stat.collected); 971 } 972 973 *resp = rp; 974 *resp_len = rlen; 975 976 return (0); 977 } 978 979 static int 980 dr_mem_del_cancel(dr_mem_hdr_t *req, dr_mem_hdr_t **resp, int *resp_len) 981 { 982 int rlen; 983 dr_mem_hdr_t *rp; 984 985 /* allocate a response message, should be freed by caller */ 986 rlen = sizeof (dr_mem_hdr_t); 987 rp = kmem_zalloc(rlen, KM_SLEEP); 988 989 /* fill in the known data */ 990 rp->req_num = req->req_num; 991 rp->msg_type = DR_MEM_OK; 992 rp->msg_arg = (dr_mh && kphysm_del_cancel(dr_mh) != KPHYSM_OK) ? 993 DR_MEM_RES_EINVAL : DR_MEM_RES_OK; 994 995 *resp = rp; 996 *resp_len = rlen; 997 998 return (0); 999 } 1000 1001 static int 1002 dr_mem_find(dr_mem_blk_t *mbp) 1003 { 1004 md_t *mdp = NULL; 1005 int num_nodes; 1006 int rv = 0; 1007 int listsz; 1008 mde_cookie_t *listp = NULL; 1009 mde_cookie_t memnode; 1010 char *found = "found"; 1011 1012 if ((mdp = md_get_handle()) == NULL) { 1013 DR_DBG_MEM("unable to initialize machine description\n"); 1014 return (-1); 1015 } 1016 1017 num_nodes = md_node_count(mdp); 1018 ASSERT(num_nodes > 0); 1019 1020 listsz = num_nodes * sizeof (mde_cookie_t); 1021 listp = kmem_zalloc(listsz, KM_SLEEP); 1022 1023 memnode = dr_mem_find_node_md(mbp, mdp, listp); 1024 1025 if (memnode == MDE_INVAL_ELEM_COOKIE) { 1026 rv = EINVAL; 1027 found = "not found"; 1028 } 1029 1030 DR_DBG_MEM("mblk 0x%lx.0x%lx %s\n", mbp->addr, mbp->size, found); 1031 1032 kmem_free(listp, listsz); 1033 (void) md_fini_handle(mdp); 1034 1035 return (rv); 1036 } 1037 1038 /* 1039 * Look up a particular mblk in the MD. Returns the mde_cookie_t 1040 * representing that mblk if present, and MDE_INVAL_ELEM_COOKIE 1041 * otherwise. It is assumed the scratch array has already been 1042 * allocated so that it can accommodate the worst case scenario, 1043 * every node in the MD. 1044 */ 1045 static mde_cookie_t 1046 dr_mem_find_node_md(dr_mem_blk_t *mbp, md_t *mdp, mde_cookie_t *listp) 1047 { 1048 int idx; 1049 int nnodes; 1050 mde_cookie_t rootnode; 1051 uint64_t base_prop; 1052 uint64_t size_prop; 1053 mde_cookie_t result = MDE_INVAL_ELEM_COOKIE; 1054 1055 rootnode = md_root_node(mdp); 1056 ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE); 1057 1058 /* 1059 * Scan the DAG for all the mem nodes 1060 */ 1061 nnodes = md_scan_dag(mdp, rootnode, md_find_name(mdp, "mblock"), 1062 md_find_name(mdp, "fwd"), listp); 1063 1064 if (nnodes < 0) { 1065 DR_DBG_MEM("Scan for mblks failed\n"); 1066 return (result); 1067 } 1068 1069 DR_DBG_MEM("dr_mem_find_node_md: found %d mblks in the MD\n", nnodes); 1070 1071 /* 1072 * Find the mblk of interest 1073 */ 1074 for (idx = 0; idx < nnodes; idx++) { 1075 1076 if (md_get_prop_val(mdp, listp[idx], "base", &base_prop)) { 1077 DR_DBG_MEM("Missing 'base' property for mblk node %d\n", 1078 idx); 1079 break; 1080 } 1081 1082 if (md_get_prop_val(mdp, listp[idx], "size", &size_prop)) { 1083 DR_DBG_MEM("Missing 'size' property for mblk node %d\n", 1084 idx); 1085 break; 1086 } 1087 1088 if (base_prop <= mbp->addr && 1089 (base_prop + size_prop) >= (mbp->addr + mbp->size)) { 1090 /* found a match */ 1091 DR_DBG_MEM("dr_mem_find_node_md: found mblk " 1092 "0x%lx.0x%lx in MD\n", mbp->addr, mbp->size); 1093 result = listp[idx]; 1094 break; 1095 } 1096 } 1097 1098 if (result == MDE_INVAL_ELEM_COOKIE) { 1099 DR_DBG_MEM("mblk 0x%lx.0x%lx not in MD\n", 1100 mbp->addr, mbp->size); 1101 } 1102 1103 return (result); 1104 } 1105 1106 static int 1107 mem_add(pfn_t base, pgcnt_t npgs) 1108 { 1109 int rv, rc; 1110 1111 DR_DBG_MEM("%s: begin base=0x%lx npgs=0x%lx\n", __func__, base, npgs); 1112 1113 if (npgs == 0) 1114 return (DR_MEM_RES_OK); 1115 1116 rv = kphysm_add_memory_dynamic(base, npgs); 1117 DR_DBG_MEM("%s: kphysm_add(0x%lx, 0x%lx) = %d", __func__, base, npgs, 1118 rv); 1119 if (rv == KPHYSM_OK) { 1120 if (rc = kcage_range_add(base, npgs, KCAGE_DOWN)) 1121 cmn_err(CE_WARN, "kcage_range_add() = %d", rc); 1122 } 1123 rv = cvt_err(rv); 1124 return (rv); 1125 } 1126 1127 static void 1128 del_done(void *arg, int error) 1129 { 1130 mem_sync_t *ms = arg; 1131 1132 mutex_enter(&ms->lock); 1133 ms->error = error; 1134 ms->done = 1; 1135 cv_signal(&ms->cond); 1136 mutex_exit(&ms->lock); 1137 } 1138 1139 static int 1140 mem_del(pfn_t base, pgcnt_t npgs) 1141 { 1142 int rv, err, del_range = 0; 1143 int convert = 1; 1144 mem_sync_t ms; 1145 memquery_t mq; 1146 memhandle_t mh; 1147 struct memlist *ml; 1148 struct memlist *d_ml = NULL; 1149 1150 DR_DBG_MEM("%s: begin base=0x%lx npgs=0x%lx\n", __func__, base, npgs); 1151 1152 if (npgs == 0) 1153 return (DR_MEM_RES_OK); 1154 1155 if ((rv = kphysm_del_gethandle(&mh)) != KPHYSM_OK) { 1156 cmn_err(CE_WARN, "%s: del_gethandle() = %d", __func__, rv); 1157 rv = cvt_err(rv); 1158 return (rv); 1159 } 1160 if ((rv = kphysm_del_span_query(base, npgs, &mq)) 1161 != KPHYSM_OK) { 1162 cmn_err(CE_WARN, "%s: del_span_query() = %d", __func__, rv); 1163 goto done; 1164 } 1165 if (mq.nonrelocatable) { 1166 DR_DBG_MEM("%s: non-reloc pages = %ld", 1167 __func__, mq.nonrelocatable); 1168 rv = KPHYSM_ENONRELOC; 1169 goto done; 1170 } 1171 if (rv = kcage_range_delete(base, npgs)) { 1172 switch (rv) { 1173 case EBUSY: 1174 rv = DR_MEM_RES_ENOTVIABLE; 1175 break; 1176 default: 1177 rv = DR_MEM_RES_FAILURE; 1178 break; 1179 } 1180 convert = 0; /* conversion done */ 1181 cmn_err(CE_WARN, "%s: del_range() = %d", __func__, rv); 1182 goto done; 1183 } else { 1184 del_range++; 1185 } 1186 if ((rv = kphysm_del_span(mh, base, npgs)) != KPHYSM_OK) { 1187 cmn_err(CE_WARN, "%s: del_span() = %d", __func__, rv); 1188 goto done; 1189 } 1190 if ((rv = memlist_add_span(ptob(base), ptob(npgs), &d_ml)) 1191 != MEML_SPANOP_OK) { 1192 switch (rv) { 1193 case MEML_SPANOP_ESPAN: 1194 rv = DR_MEM_RES_ESPAN; 1195 break; 1196 case MEML_SPANOP_EALLOC: 1197 rv = DR_MEM_RES_ERESOURCE; 1198 break; 1199 default: 1200 rv = DR_MEM_RES_FAILURE; 1201 break; 1202 } 1203 convert = 0; /* conversion done */ 1204 cmn_err(CE_WARN, "%s: add_span() = %d", __func__, rv); 1205 goto done; 1206 } 1207 1208 DR_DBG_MEM("%s: reserved=0x%lx", __func__, npgs); 1209 1210 bzero((void *) &ms, sizeof (ms)); 1211 1212 mutex_init(&ms.lock, NULL, MUTEX_DRIVER, NULL); 1213 cv_init(&ms.cond, NULL, CV_DRIVER, NULL); 1214 mutex_enter(&ms.lock); 1215 1216 if ((rv = kphysm_del_start(mh, del_done, (void *) &ms)) == KPHYSM_OK) { 1217 /* 1218 * Since we've called drctl_config_init, we are the only 1219 * DR ctl operation in progress. Set dr_mh to the 1220 * delete memhandle for use by stat and cancel. 1221 */ 1222 ASSERT(dr_mh == NULL); 1223 dr_mh = mh; 1224 1225 /* 1226 * Wait for completion or interrupt. 1227 */ 1228 while (!ms.done) { 1229 if (cv_wait_sig(&ms.cond, &ms.lock) == 0) { 1230 /* 1231 * There is a pending signal. 1232 */ 1233 (void) kphysm_del_cancel(mh); 1234 DR_DBG_MEM("%s: cancel", __func__); 1235 /* 1236 * Wait for completion. 1237 */ 1238 while (!ms.done) 1239 cv_wait(&ms.cond, &ms.lock); 1240 } 1241 } 1242 dr_mh = NULL; 1243 rv = ms.error; 1244 } else { 1245 DR_DBG_MEM("%s: del_start() = %d", __func__, rv); 1246 } 1247 1248 mutex_exit(&ms.lock); 1249 cv_destroy(&ms.cond); 1250 mutex_destroy(&ms.lock); 1251 1252 done: 1253 if (rv && del_range) { 1254 /* 1255 * Add back the spans to the kcage growth list. 1256 */ 1257 for (ml = d_ml; ml; ml = ml->ml_next) 1258 if (err = kcage_range_add(btop(ml->ml_address), 1259 btop(ml->ml_size), KCAGE_DOWN)) 1260 cmn_err(CE_WARN, "kcage_range_add() = %d", err); 1261 } 1262 memlist_free_list(d_ml); 1263 1264 if ((err = kphysm_del_release(mh)) != KPHYSM_OK) 1265 cmn_err(CE_WARN, "%s: del_release() = %d", __func__, err); 1266 if (convert) 1267 rv = cvt_err(rv); 1268 1269 DR_DBG_MEM("%s: rv=%d", __func__, rv); 1270 1271 return (rv); 1272 } 1273