1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * sun4v Memory DR Module 29 */ 30 31 32 #include <sys/types.h> 33 #include <sys/cmn_err.h> 34 #include <sys/vmem.h> 35 #include <sys/kmem.h> 36 #include <sys/systm.h> 37 #include <sys/machsystm.h> /* for page_freelist_coalesce() */ 38 #include <sys/errno.h> 39 #include <sys/memnode.h> 40 #include <sys/memlist.h> 41 #include <sys/memlist_impl.h> 42 #include <sys/tuneable.h> 43 #include <sys/proc.h> 44 #include <sys/disp.h> 45 #include <sys/debug.h> 46 #include <sys/vm.h> 47 #include <sys/callb.h> 48 #include <sys/memlist_plat.h> /* for installed_top_size() */ 49 #include <sys/condvar_impl.h> /* for CV_HAS_WAITERS() */ 50 #include <sys/dumphdr.h> /* for dump_resize() */ 51 #include <sys/atomic.h> /* for use in stats collection */ 52 #include <sys/rwlock.h> 53 #include <vm/seg_kmem.h> 54 #include <vm/seg_kpm.h> 55 #include <vm/page.h> 56 #include <vm/vm_dep.h> 57 #define SUNDDI_IMPL /* so sunddi.h will not redefine splx() et al */ 58 #include <sys/sunddi.h> 59 #include <sys/mem_config.h> 60 #include <sys/mem_cage.h> 61 #include <sys/lgrp.h> 62 #include <sys/ddi.h> 63 64 #include <sys/modctl.h> 65 #include <sys/sysevent/dr.h> 66 #include <sys/mach_descrip.h> 67 #include <sys/mdesc.h> 68 #include <sys/ds.h> 69 #include <sys/drctl.h> 70 #include <sys/dr_util.h> 71 #include <sys/dr_mem.h> 72 73 74 /* 75 * DR operations are subject to Memory Alignment restrictions 76 * for both address and the size of the request. 77 */ 78 #define MA_ADDR 0x10000000 /* addr alignment 256M */ 79 #define MA_SIZE 0x10000000 /* size alignment 256M */ 80 81 #define MBLK_IS_VALID(m) \ 82 (IS_P2ALIGNED((m)->addr, MA_ADDR) && IS_P2ALIGNED((m)->size, MA_SIZE)) 83 84 static memhandle_t dr_mh; /* memory handle for delete */ 85 86 static struct modlmisc modlmisc = { 87 &mod_miscops, 88 "sun4v memory DR" 89 }; 90 91 static struct modlinkage modlinkage = { 92 MODREV_1, 93 (void *)&modlmisc, 94 NULL 95 }; 96 97 static int dr_mem_allow_unload = 0; 98 99 typedef int (*fn_t)(dr_mem_blk_t *, int *); 100 101 /* 102 * Global Domain Services (DS) Handle 103 */ 104 static ds_svc_hdl_t ds_handle; 105 106 /* 107 * Supported DS Capability Versions 108 */ 109 static ds_ver_t dr_mem_vers[] = { { 1, 0 } }; 110 #define DR_MEM_NVERS (sizeof (dr_mem_vers) / sizeof (dr_mem_vers[0])) 111 112 /* 113 * DS Capability Description 114 */ 115 static ds_capability_t dr_mem_cap = { 116 DR_MEM_DS_ID, /* svc_id */ 117 dr_mem_vers, /* vers */ 118 DR_MEM_NVERS /* nvers */ 119 }; 120 121 /* 122 * DS Callbacks 123 */ 124 static void dr_mem_reg_handler(ds_cb_arg_t, ds_ver_t *, ds_svc_hdl_t); 125 static void dr_mem_unreg_handler(ds_cb_arg_t arg); 126 static void dr_mem_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen); 127 128 /* 129 * DS Client Ops Vector 130 */ 131 static ds_clnt_ops_t dr_mem_ops = { 132 dr_mem_reg_handler, /* ds_reg_cb */ 133 dr_mem_unreg_handler, /* ds_unreg_cb */ 134 dr_mem_data_handler, /* ds_data_cb */ 135 NULL /* cb_arg */ 136 }; 137 138 /* 139 * Operation Results 140 * 141 * Used internally to gather results while an operation on a 142 * list of mblks is in progress. In particular, it is used to 143 * keep track of which mblks have already failed so that they are 144 * not processed further, and the manner in which they failed. 145 */ 146 typedef struct { 147 uint64_t addr; 148 uint64_t size; 149 uint32_t result; 150 uint32_t status; 151 char *string; 152 } dr_mem_res_t; 153 154 static char * 155 dr_mem_estr[] = { 156 "operation succeeded", /* DR_MEM_RES_OK */ 157 "operation failed", /* DR_MEM_RES_FAILURE */ 158 "operation was blocked", /* DR_MEM_RES_BLOCKED */ 159 "memory not defined in MD", /* DR_MEM_RES_NOT_IN_MD */ 160 "memory already in use", /* DR_MEM_RES_ESPAN */ 161 "memory access test failed", /* DR_MEM_RES_EFAULT */ 162 "resource not available", /* DR_MEM_RES_ERESOURCE */ 163 "permanent pages in span", /* DR_MEM_RES_PERM */ 164 "memory span busy", /* DR_MEM_RES_EBUSY */ 165 "VM viability test failed", /* DR_MEM_RES_ENOTVIABLE */ 166 "no pages to unconfigure", /* DR_MEM_RES_ENOWORK */ 167 "operation cancelled", /* DR_MEM_RES_ECANCELLED */ 168 "operation refused", /* DR_MEM_RES_EREFUSED */ 169 "memory span duplicate", /* DR_MEM_RES_EDUP */ 170 "invalid argument" /* DR_MEM_RES_EINVAL */ 171 }; 172 173 typedef struct { 174 kcondvar_t cond; 175 kmutex_t lock; 176 int error; 177 int done; 178 } mem_sync_t; 179 180 /* 181 * Internal Functions 182 */ 183 static int dr_mem_init(void); 184 static int dr_mem_fini(void); 185 186 static int dr_mem_list_wrk(dr_mem_hdr_t *, dr_mem_hdr_t **, int *); 187 static int dr_mem_list_query(dr_mem_hdr_t *, dr_mem_hdr_t **, int *); 188 static int dr_mem_del_stat(dr_mem_hdr_t *, dr_mem_hdr_t **, int *); 189 static int dr_mem_del_cancel(dr_mem_hdr_t *, dr_mem_hdr_t **, int *); 190 191 static int dr_mem_unconfigure(dr_mem_blk_t *, int *); 192 static int dr_mem_configure(dr_mem_blk_t *, int *); 193 static void dr_mem_query(dr_mem_blk_t *, dr_mem_query_t *); 194 195 static dr_mem_res_t *dr_mem_res_array_init(dr_mem_hdr_t *, drctl_rsrc_t *, int); 196 static void dr_mem_res_array_fini(dr_mem_res_t *res, int nres); 197 static size_t dr_mem_pack_response(dr_mem_hdr_t *req, dr_mem_res_t *res, 198 dr_mem_hdr_t **respp); 199 200 static int dr_mem_find(dr_mem_blk_t *mbp); 201 static mde_cookie_t dr_mem_find_node_md(dr_mem_blk_t *, md_t *, mde_cookie_t *); 202 203 static int mem_add(pfn_t, pgcnt_t); 204 static int mem_del(pfn_t, pgcnt_t); 205 206 static size_t rsvaddsz; 207 extern void i_dr_mem_init(uint64_t *); 208 extern void i_dr_mem_fini(); 209 extern void i_dr_mem_update(); 210 extern int kphysm_add_memory_dynamic(pfn_t, pgcnt_t); 211 212 int 213 _init(void) 214 { 215 int status; 216 217 /* check that Memory DR is enabled */ 218 if (dr_is_disabled(DR_TYPE_MEM)) 219 return (ENOTSUP); 220 221 if ((status = dr_mem_init()) != 0) { 222 cmn_err(CE_NOTE, "Memory DR initialization failed"); 223 return (status); 224 } 225 226 if ((status = mod_install(&modlinkage)) != 0) { 227 (void) dr_mem_fini(); 228 } 229 230 return (status); 231 } 232 233 int 234 _info(struct modinfo *modinfop) 235 { 236 return (mod_info(&modlinkage, modinfop)); 237 } 238 239 int 240 _fini(void) 241 { 242 int status; 243 244 if (dr_mem_allow_unload == 0) 245 return (EBUSY); 246 247 if ((status = mod_remove(&modlinkage)) == 0) { 248 (void) dr_mem_fini(); 249 } 250 251 return (status); 252 } 253 254 static int 255 dr_mem_init(void) 256 { 257 int rv; 258 259 if ((rv = ds_cap_init(&dr_mem_cap, &dr_mem_ops)) != 0) { 260 cmn_err(CE_NOTE, "dr_mem: ds_cap_init failed: %d", rv); 261 return (rv); 262 } 263 264 i_dr_mem_init(&rsvaddsz); 265 266 return (0); 267 } 268 269 static int 270 dr_mem_fini(void) 271 { 272 int rv; 273 274 i_dr_mem_fini(); 275 276 if ((rv = ds_cap_fini(&dr_mem_cap)) != 0) { 277 cmn_err(CE_NOTE, "dr_mem: ds_cap_fini failed: %d", rv); 278 } 279 280 return (rv); 281 } 282 283 static void 284 dr_mem_reg_handler(ds_cb_arg_t arg, ds_ver_t *ver, ds_svc_hdl_t hdl) 285 { 286 DR_DBG_MEM("reg_handler: arg=0x%p, ver=%d.%d, hdl=0x%lx\n", arg, 287 ver->major, ver->minor, hdl); 288 289 ds_handle = hdl; 290 } 291 292 static void 293 dr_mem_unreg_handler(ds_cb_arg_t arg) 294 { 295 DR_DBG_MEM("unreg_handler: arg=0x%p\n", arg); 296 297 ds_handle = DS_INVALID_HDL; 298 } 299 300 /*ARGSUSED*/ 301 static void 302 dr_mem_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen) 303 { 304 dr_mem_hdr_t *req = buf; 305 dr_mem_hdr_t err_resp; 306 dr_mem_hdr_t *resp = &err_resp; 307 int resp_len = 0; 308 int rv = EINVAL; 309 310 /* 311 * Sanity check the message 312 */ 313 if (buflen < sizeof (dr_mem_hdr_t)) { 314 DR_DBG_MEM("incoming message short: expected at least %ld " 315 "bytes, received %ld\n", sizeof (dr_mem_hdr_t), buflen); 316 goto done; 317 } 318 319 if (req == NULL) { 320 DR_DBG_MEM("empty message: expected at least %ld bytes\n", 321 sizeof (dr_mem_hdr_t)); 322 goto done; 323 } 324 325 DR_DBG_MEM("incoming request:\n"); 326 DR_DBG_DUMP_MSG(buf, buflen); 327 328 /* 329 * Process the command 330 */ 331 switch (req->msg_type) { 332 case DR_MEM_CONFIGURE: 333 case DR_MEM_UNCONFIGURE: 334 if (req->msg_arg == 0) { 335 DR_DBG_MEM("No mblks specified for operation\n"); 336 goto done; 337 } 338 if ((rv = dr_mem_list_wrk(req, &resp, &resp_len)) != 0) { 339 DR_DBG_MEM("%s failed (%d)\n", 340 (req->msg_type == DR_MEM_CONFIGURE) ? 341 "Memory configure" : "Memory unconfigure", rv); 342 } 343 break; 344 345 case DR_MEM_UNCONF_STATUS: 346 if ((rv = dr_mem_del_stat(req, &resp, &resp_len)) != 0) 347 DR_DBG_MEM("Memory delete status failed (%d)\n", rv); 348 break; 349 350 case DR_MEM_UNCONF_CANCEL: 351 if ((rv = dr_mem_del_cancel(req, &resp, &resp_len)) != 0) 352 DR_DBG_MEM("Memory delete cancel failed (%d)\n", rv); 353 break; 354 355 case DR_MEM_QUERY: 356 if (req->msg_arg == 0) { 357 DR_DBG_MEM("No mblks specified for operation\n"); 358 goto done; 359 } 360 if ((rv = dr_mem_list_query(req, &resp, &resp_len)) != 0) 361 DR_DBG_MEM("Memory query failed (%d)\n", rv); 362 break; 363 364 default: 365 cmn_err(CE_NOTE, "unsupported memory DR operation (%d)", 366 req->msg_type); 367 break; 368 } 369 370 done: 371 /* check if an error occurred */ 372 if (resp == &err_resp) { 373 resp->req_num = (req) ? req->req_num : 0; 374 resp->msg_type = DR_MEM_ERROR; 375 resp->msg_arg = rv; 376 resp_len = sizeof (dr_mem_hdr_t); 377 } 378 379 DR_DBG_MEM("outgoing response:\n"); 380 DR_DBG_DUMP_MSG(resp, resp_len); 381 382 /* send back the response */ 383 if (ds_cap_send(ds_handle, resp, resp_len) != 0) { 384 DR_DBG_MEM("ds_send failed\n"); 385 } 386 387 /* free any allocated memory */ 388 if (resp != &err_resp) { 389 kmem_free(resp, resp_len); 390 } 391 } 392 393 /* 394 * Common routine to config or unconfig multiple mblks. 395 * 396 * Note: Do not modify result buffer or length on error. 397 */ 398 static int 399 dr_mem_list_wrk(dr_mem_hdr_t *req, dr_mem_hdr_t **resp, int *resp_len) 400 { 401 int rv; 402 int idx; 403 int count; 404 int result; 405 int status; 406 fn_t dr_fn; 407 int se_hint; 408 dr_mem_blk_t *req_mblks; 409 dr_mem_res_t *res; 410 int drctl_cmd; 411 int drctl_flags = 0; 412 drctl_rsrc_t *drctl_req; 413 size_t drctl_req_len; 414 drctl_resp_t *drctl_resp; 415 drctl_rsrc_t *drctl_rsrc; 416 size_t drctl_resp_len = 0; 417 drctl_cookie_t drctl_res_ck; 418 419 ASSERT((req != NULL) && (req->msg_arg != 0)); 420 421 count = req->msg_arg; 422 423 /* 424 * Extract all information that is specific 425 * to the various types of operations. 426 */ 427 switch (req->msg_type) { 428 case DR_MEM_CONFIGURE: 429 dr_fn = dr_mem_configure; 430 drctl_cmd = DRCTL_MEM_CONFIG_REQUEST; 431 se_hint = SE_HINT_INSERT; 432 break; 433 case DR_MEM_UNCONFIGURE: 434 dr_fn = dr_mem_unconfigure; 435 drctl_cmd = DRCTL_MEM_UNCONFIG_REQUEST; 436 se_hint = SE_HINT_REMOVE; 437 break; 438 default: 439 /* Programming error if we reach this. */ 440 cmn_err(CE_NOTE, "%s: bad msg_type %d\n", 441 __func__, req->msg_type); 442 ASSERT(0); 443 return (-1); 444 } 445 446 /* the incoming array of mblks to operate on */ 447 req_mblks = DR_MEM_CMD_MBLKS(req); 448 449 /* allocate drctl request msg based on incoming resource count */ 450 drctl_req_len = sizeof (drctl_rsrc_t) * count; 451 drctl_req = kmem_zalloc(drctl_req_len, KM_SLEEP); 452 453 /* copy the size for the drctl call from the incoming request msg */ 454 for (idx = 0; idx < count; idx++) { 455 drctl_req[idx].res_mem_addr = req_mblks[idx].addr; 456 drctl_req[idx].res_mem_size = req_mblks[idx].size; 457 } 458 459 rv = drctl_config_init(drctl_cmd, drctl_flags, drctl_req, 460 count, &drctl_resp, &drctl_resp_len, &drctl_res_ck); 461 462 ASSERT((drctl_resp != NULL) && (drctl_resp_len != 0)); 463 464 if (rv != 0) { 465 DR_DBG_MEM("%s: drctl_config_init returned: %d\n", 466 __func__, rv); 467 kmem_free(drctl_resp, drctl_resp_len); 468 kmem_free(drctl_req, drctl_req_len); 469 return (rv); 470 } 471 472 ASSERT(drctl_resp->resp_type == DRCTL_RESP_OK); 473 474 drctl_rsrc = drctl_resp->resp_resources; 475 476 /* create the result scratch array */ 477 res = dr_mem_res_array_init(req, drctl_rsrc, count); 478 479 /* perform the specified operation on each of the mblks */ 480 for (idx = 0; idx < count; idx++) { 481 /* 482 * If no action will be taken against the current 483 * mblk, update the drctl resource information to 484 * ensure that it gets recovered properly during 485 * the drctl fini() call. 486 */ 487 if (res[idx].result != DR_MEM_RES_OK) { 488 drctl_req[idx].status = DRCTL_STATUS_CONFIG_FAILURE; 489 continue; 490 } 491 492 /* call the function to perform the actual operation */ 493 result = (*dr_fn)(&req_mblks[idx], &status); 494 495 /* save off results of the operation */ 496 res[idx].result = result; 497 res[idx].status = status; 498 res[idx].addr = req_mblks[idx].addr; /* for partial case */ 499 res[idx].size = req_mblks[idx].size; /* for partial case */ 500 res[idx].string = i_ddi_strdup(dr_mem_estr[result], KM_SLEEP); 501 502 /* save result for drctl fini() reusing init() msg memory */ 503 drctl_req[idx].status = (result != DR_MEM_RES_OK) ? 504 DRCTL_STATUS_CONFIG_FAILURE : DRCTL_STATUS_CONFIG_SUCCESS; 505 506 DR_DBG_MEM("%s: mblk 0x%lx.0x%lx stat %d result %d off '%s'\n", 507 __func__, req_mblks[idx].addr, req_mblks[idx].size, 508 drctl_req[idx].status, result, 509 (res[idx].string) ? res[idx].string : ""); 510 } 511 512 if ((rv = drctl_config_fini(&drctl_res_ck, drctl_req, count)) != 0) 513 DR_DBG_MEM("%s: drctl_config_fini returned: %d\n", 514 __func__, rv); 515 516 /* 517 * Operation completed without any fatal errors. 518 * Pack the response for transmission. 519 */ 520 *resp_len = dr_mem_pack_response(req, res, resp); 521 522 /* notify interested parties about the operation */ 523 dr_generate_event(DR_TYPE_MEM, se_hint); 524 525 /* 526 * Deallocate any scratch memory. 527 */ 528 kmem_free(drctl_resp, drctl_resp_len); 529 kmem_free(drctl_req, drctl_req_len); 530 531 dr_mem_res_array_fini(res, count); 532 533 return (0); 534 } 535 536 /* 537 * Allocate and initialize a result array based on the initial 538 * drctl operation. A valid result array is always returned. 539 */ 540 static dr_mem_res_t * 541 dr_mem_res_array_init(dr_mem_hdr_t *req, drctl_rsrc_t *rsrc, int nrsrc) 542 { 543 int idx; 544 dr_mem_res_t *res; 545 char *err_str; 546 size_t err_len; 547 548 /* allocate zero filled buffer to initialize fields */ 549 res = kmem_zalloc(nrsrc * sizeof (dr_mem_res_t), KM_SLEEP); 550 551 /* 552 * Fill in the result information for each resource. 553 */ 554 for (idx = 0; idx < nrsrc; idx++) { 555 res[idx].addr = rsrc[idx].res_mem_addr; 556 res[idx].size = rsrc[idx].res_mem_size; 557 res[idx].result = DR_MEM_RES_OK; 558 559 if (rsrc[idx].status == DRCTL_STATUS_ALLOW) 560 continue; 561 562 /* 563 * Update the state information for this mblk. 564 */ 565 res[idx].result = DR_MEM_RES_BLOCKED; 566 res[idx].status = (req->msg_type == DR_MEM_CONFIGURE) ? 567 DR_MEM_STAT_UNCONFIGURED : DR_MEM_STAT_CONFIGURED; 568 569 /* 570 * If an error string exists, copy it out of the 571 * message buffer. This eliminates any dependency 572 * on the memory allocated for the message buffer 573 * itself. 574 */ 575 if (rsrc[idx].offset != NULL) { 576 err_str = (char *)rsrc + rsrc[idx].offset; 577 err_len = strlen(err_str) + 1; 578 579 res[idx].string = kmem_alloc(err_len, KM_SLEEP); 580 bcopy(err_str, res[idx].string, err_len); 581 } 582 } 583 584 return (res); 585 } 586 587 static void 588 dr_mem_res_array_fini(dr_mem_res_t *res, int nres) 589 { 590 int idx; 591 size_t str_len; 592 593 for (idx = 0; idx < nres; idx++) { 594 /* deallocate the error string if present */ 595 if (res[idx].string) { 596 str_len = strlen(res[idx].string) + 1; 597 kmem_free(res[idx].string, str_len); 598 } 599 } 600 601 /* deallocate the result array itself */ 602 kmem_free(res, sizeof (dr_mem_res_t) * nres); 603 } 604 605 /* 606 * Allocate and pack a response message for transmission based 607 * on the specified result array. A valid response message and 608 * valid size information is always returned. 609 */ 610 static size_t 611 dr_mem_pack_response(dr_mem_hdr_t *req, dr_mem_res_t *res, dr_mem_hdr_t **respp) 612 { 613 int idx; 614 dr_mem_hdr_t *resp; 615 dr_mem_stat_t *resp_stat; 616 size_t resp_len; 617 uint32_t curr_off; 618 caddr_t curr_str; 619 size_t str_len; 620 size_t stat_len; 621 int nstat = req->msg_arg; 622 623 /* 624 * Calculate the size of the response message 625 * and allocate an appropriately sized buffer. 626 */ 627 resp_len = sizeof (dr_mem_hdr_t); 628 629 /* add the stat array size */ 630 stat_len = sizeof (dr_mem_stat_t) * nstat; 631 resp_len += stat_len; 632 633 /* add the size of any error strings */ 634 for (idx = 0; idx < nstat; idx++) { 635 if (res[idx].string != NULL) { 636 resp_len += strlen(res[idx].string) + 1; 637 } 638 } 639 640 /* allocate the message buffer */ 641 resp = kmem_zalloc(resp_len, KM_SLEEP); 642 643 /* 644 * Fill in the header information. 645 */ 646 resp->req_num = req->req_num; 647 resp->msg_type = DR_MEM_OK; 648 resp->msg_arg = nstat; 649 650 /* 651 * Fill in the stat information. 652 */ 653 resp_stat = DR_MEM_RESP_STATS(resp); 654 655 /* string offsets start immediately after stat array */ 656 curr_off = sizeof (dr_mem_hdr_t) + stat_len; 657 curr_str = (char *)resp_stat + stat_len; 658 659 for (idx = 0; idx < nstat; idx++) { 660 resp_stat[idx].addr = res[idx].addr; 661 resp_stat[idx].size = res[idx].size; 662 resp_stat[idx].result = res[idx].result; 663 resp_stat[idx].status = res[idx].status; 664 665 if (res[idx].string != NULL) { 666 /* copy over the error string */ 667 str_len = strlen(res[idx].string) + 1; 668 bcopy(res[idx].string, curr_str, str_len); 669 resp_stat[idx].string_off = curr_off; 670 671 curr_off += str_len; 672 curr_str += str_len; 673 } 674 } 675 676 /* buffer should be exactly filled */ 677 ASSERT(curr_off == resp_len); 678 679 *respp = resp; 680 return (resp_len); 681 } 682 683 static void 684 dr_mem_query(dr_mem_blk_t *mbp, dr_mem_query_t *mqp) 685 { 686 memquery_t mq; 687 688 DR_DBG_MEM("dr_mem_query...\n"); 689 690 691 (void) kphysm_del_span_query(btop(mbp->addr), btop(mbp->size), &mq); 692 693 if (!mq.phys_pages) 694 return; 695 696 mqp->addr = mbp->addr; 697 mqp->mq.phys_pages = ptob(mq.phys_pages); 698 mqp->mq.managed = ptob(mq.managed); 699 mqp->mq.nonrelocatable = ptob(mq.nonrelocatable); 700 mqp->mq.first_nonrelocatable = ptob(mq.first_nonrelocatable); 701 mqp->mq.last_nonrelocatable = ptob(mq.last_nonrelocatable); 702 /* 703 * Set to the max byte offset within the page. 704 */ 705 if (mqp->mq.nonrelocatable) 706 mqp->mq.last_nonrelocatable += PAGESIZE - 1; 707 } 708 709 /* 710 * Do not modify result buffer or length on error. 711 */ 712 static int 713 dr_mem_list_query(dr_mem_hdr_t *req, dr_mem_hdr_t **resp, int *resp_len) 714 { 715 int idx; 716 int rlen; 717 int nml; 718 struct memlist *ml; 719 dr_mem_blk_t *req_mblks, mb; 720 dr_mem_hdr_t *rp; 721 dr_mem_query_t *stat; 722 723 /* the incoming array of req_mblks to configure */ 724 req_mblks = DR_MEM_CMD_MBLKS(req); 725 726 /* allocate a response message, should be freed by caller */ 727 nml = 0; 728 rlen = sizeof (dr_mem_hdr_t); 729 if (req_mblks->addr == NULL && req_mblks->size == 0) { 730 /* 731 * Request is for domain's full view of it's memory. 732 */ 733 memlist_read_lock(); 734 for (ml = phys_install; ml; ml = ml->next) 735 nml++; 736 737 rlen += nml * sizeof (dr_mem_query_t); 738 } else { 739 rlen += req->msg_arg * sizeof (dr_mem_query_t); 740 } 741 rp = kmem_zalloc(rlen, KM_SLEEP); 742 743 /* fill in the known data */ 744 rp->req_num = req->req_num; 745 rp->msg_type = DR_MEM_OK; 746 rp->msg_arg = nml ? nml : req->msg_arg; 747 748 /* stat array for the response */ 749 stat = DR_MEM_RESP_QUERY(rp); 750 751 /* get the status for each of the mblocks */ 752 if (nml) { 753 for (idx = 0, ml = phys_install; ml; ml = ml->next, idx++) { 754 mb.addr = ml->address; 755 mb.size = ml->size; 756 dr_mem_query(&mb, &stat[idx]); 757 } 758 memlist_read_unlock(); 759 } else { 760 for (idx = 0; idx < req->msg_arg; idx++) 761 dr_mem_query(&req_mblks[idx], &stat[idx]); 762 } 763 764 *resp = rp; 765 *resp_len = rlen; 766 767 return (0); 768 } 769 770 static int 771 cvt_err(int err) 772 { 773 int rv; 774 775 switch (err) { 776 case KPHYSM_OK: 777 rv = DR_MEM_RES_OK; 778 break; 779 case KPHYSM_ESPAN: 780 rv = DR_MEM_RES_ESPAN; 781 break; 782 case KPHYSM_EFAULT: 783 rv = DR_MEM_RES_EFAULT; 784 break; 785 case KPHYSM_ERESOURCE: 786 rv = DR_MEM_RES_ERESOURCE; 787 break; 788 case KPHYSM_ENOTSUP: 789 case KPHYSM_ENOHANDLES: 790 rv = DR_MEM_RES_FAILURE; 791 break; 792 case KPHYSM_ENONRELOC: 793 rv = DR_MEM_RES_PERM; 794 break; 795 case KPHYSM_EHANDLE: 796 rv = DR_MEM_RES_FAILURE; 797 break; 798 case KPHYSM_EBUSY: 799 rv = DR_MEM_RES_EBUSY; 800 break; 801 case KPHYSM_ENOTVIABLE: 802 rv = DR_MEM_RES_ENOTVIABLE; 803 break; 804 case KPHYSM_ESEQUENCE: 805 rv = DR_MEM_RES_FAILURE; 806 break; 807 case KPHYSM_ENOWORK: 808 rv = DR_MEM_RES_ENOWORK; 809 break; 810 case KPHYSM_ECANCELLED: 811 rv = DR_MEM_RES_ECANCELLED; 812 break; 813 case KPHYSM_EREFUSED: 814 rv = DR_MEM_RES_EREFUSED; 815 break; 816 case KPHYSM_ENOTFINISHED: 817 case KPHYSM_ENOTRUNNING: 818 rv = DR_MEM_RES_FAILURE; 819 break; 820 case KPHYSM_EDUP: 821 rv = DR_MEM_RES_EDUP; 822 break; 823 default: 824 rv = DR_MEM_RES_FAILURE; 825 break; 826 } 827 828 return (rv); 829 } 830 831 static int 832 dr_mem_configure(dr_mem_blk_t *mbp, int *status) 833 { 834 int rv; 835 uint64_t addr, size, addsz; 836 837 rv = 0; 838 addr = mbp->addr; 839 size = mbp->size; 840 841 DR_DBG_MEM("dr_mem_configure...\n"); 842 843 if (!MBLK_IS_VALID(mbp)) { 844 DR_DBG_MEM("invalid mblk 0x%lx.0x%lx\n", addr, size); 845 *status = DR_MEM_STAT_UNCONFIGURED; 846 rv = DR_MEM_RES_EINVAL; 847 } else if (rv = dr_mem_find(mbp)) { 848 DR_DBG_MEM("failed to find mblk 0x%lx.0x%lx (%d)\n", 849 addr, size, rv); 850 if (rv == EINVAL) { 851 *status = DR_MEM_STAT_NOT_PRESENT; 852 rv = DR_MEM_RES_NOT_IN_MD; 853 } else { 854 *status = DR_MEM_STAT_UNCONFIGURED; 855 rv = DR_MEM_RES_FAILURE; 856 } 857 } else if (rsvaddsz) { 858 addr += size; 859 860 /* 861 * Add up to the first <rsvaddsz> portion of mblock 862 * first since that portion has reserved meta pages. 863 * This will likely guarantee an additional amount of 864 * free pages from which we may have to allocate the 865 * rest of the meta pages. 866 * 867 * Break up the request in descending order (if needed) 868 * in order to ensure that cage grows from the high end 869 * of the original request. 870 */ 871 for (addsz = MIN(size, rsvaddsz); addsz > 0; addsz = size) { 872 ASSERT(addr >= mbp->addr); 873 DR_DBG_MEM("addsz=0x%lx size=0x%lx\n", addsz, size); 874 if (rv = mem_add(btop(addr - addsz), btop(addsz))) { 875 DR_DBG_MEM("failed to configure span" 876 " 0x%lx.0x%lx (%d)\n", addr, addsz, rv); 877 break; 878 } else { 879 size -= addsz; 880 addr -= addsz; 881 } 882 } 883 884 /* 885 * Mark the mblock configured if any span 886 * in that mblock was successfully added. 887 * 888 * In case of partial success: 889 * 890 * rv != DR_MEM_RES_OK 891 * status == DR_MEM_STAT_CONFIGURED 892 * 893 * mark span actually configured. 894 */ 895 if (size == mbp->size && rv != KPHYSM_ESPAN) { 896 *status = DR_MEM_STAT_UNCONFIGURED; 897 } else { 898 DR_DBG_MEM("failed (partial) to configure span" 899 " 0x%lx.0x%lx (%d)\n", addr, addsz, rv); 900 *status = DR_MEM_STAT_CONFIGURED; 901 mbp->addr = addr; 902 mbp->size -= size; 903 } 904 905 rv = cvt_err(rv); 906 i_dr_mem_update(); 907 } else { 908 /* 909 * The reserved feature is disabled, add whole mblock. 910 */ 911 rv = mem_add(btop(addr), btop(size)); 912 DR_DBG_MEM("addr=0x%lx size=0x%lx rv=%d\n", addr, size, rv); 913 if (rv) { 914 rv = cvt_err(rv); 915 *status = DR_MEM_STAT_UNCONFIGURED; 916 } else { 917 *status = DR_MEM_STAT_CONFIGURED; 918 } 919 } 920 921 return (rv); 922 } 923 924 static int 925 dr_mem_unconfigure(dr_mem_blk_t *mbp, int *status) 926 { 927 int rv; 928 929 DR_DBG_MEM("dr_mem_unconfigure...\n"); 930 931 if (!MBLK_IS_VALID(mbp)) { 932 DR_DBG_MEM("invalid mblk 0x%lx.0x%lx\n", 933 mbp->addr, mbp->size); 934 *status = DR_MEM_STAT_CONFIGURED; 935 rv = DR_MEM_RES_EINVAL; 936 } else if (rv = mem_del(btop(mbp->addr), btop(mbp->size))) { 937 rv = cvt_err(rv); 938 *status = DR_MEM_STAT_CONFIGURED; 939 } else { 940 *status = DR_MEM_STAT_UNCONFIGURED; 941 rv = DR_MEM_RES_OK; 942 DR_DBG_MEM("mblk 0x%lx.0x%lx unconfigured\n", 943 mbp->addr, mbp->size); 944 } 945 return (rv); 946 } 947 948 static int 949 dr_mem_del_stat(dr_mem_hdr_t *req, dr_mem_hdr_t **resp, int *resp_len) 950 { 951 int status; 952 int rlen; 953 memdelstat_t del_stat, *stat; 954 dr_mem_hdr_t *rp; 955 956 /* 957 * If a mem delete is in progress, get its status. 958 */ 959 status = (dr_mh && (kphysm_del_status(dr_mh, &del_stat) == KPHYSM_OK)); 960 961 /* allocate a response message, should be freed by caller */ 962 rlen = sizeof (dr_mem_hdr_t); 963 rlen += status * sizeof (memdelstat_t); 964 rp = kmem_zalloc(rlen, KM_SLEEP); 965 966 /* fill in the known data */ 967 rp->req_num = req->req_num; 968 rp->msg_type = DR_MEM_OK; 969 rp->msg_arg = status; 970 971 if (status) { 972 /* stat struct for the response */ 973 stat = DR_MEM_RESP_DEL_STAT(rp); 974 stat->phys_pages = ptob(del_stat.phys_pages); 975 stat->managed = ptob(del_stat.managed); 976 stat->collected = ptob(del_stat.collected); 977 } 978 979 *resp = rp; 980 *resp_len = rlen; 981 982 return (0); 983 } 984 985 static int 986 dr_mem_del_cancel(dr_mem_hdr_t *req, dr_mem_hdr_t **resp, int *resp_len) 987 { 988 int rlen; 989 dr_mem_hdr_t *rp; 990 991 /* allocate a response message, should be freed by caller */ 992 rlen = sizeof (dr_mem_hdr_t); 993 rp = kmem_zalloc(rlen, KM_SLEEP); 994 995 /* fill in the known data */ 996 rp->req_num = req->req_num; 997 rp->msg_type = DR_MEM_OK; 998 rp->msg_arg = (dr_mh && kphysm_del_cancel(dr_mh) != KPHYSM_OK) ? 999 DR_MEM_RES_EINVAL : DR_MEM_RES_OK; 1000 1001 *resp = rp; 1002 *resp_len = rlen; 1003 1004 return (0); 1005 } 1006 1007 static int 1008 dr_mem_find(dr_mem_blk_t *mbp) 1009 { 1010 md_t *mdp = NULL; 1011 int num_nodes; 1012 int rv = 0; 1013 int listsz; 1014 mde_cookie_t *listp = NULL; 1015 mde_cookie_t memnode; 1016 char *found = "found"; 1017 1018 if ((mdp = md_get_handle()) == NULL) { 1019 DR_DBG_MEM("unable to initialize machine description\n"); 1020 return (-1); 1021 } 1022 1023 num_nodes = md_node_count(mdp); 1024 ASSERT(num_nodes > 0); 1025 1026 listsz = num_nodes * sizeof (mde_cookie_t); 1027 listp = kmem_zalloc(listsz, KM_SLEEP); 1028 1029 memnode = dr_mem_find_node_md(mbp, mdp, listp); 1030 1031 if (memnode == MDE_INVAL_ELEM_COOKIE) { 1032 rv = EINVAL; 1033 found = "not found"; 1034 } 1035 1036 DR_DBG_MEM("mblk 0x%lx.0x%lx %s\n", mbp->addr, mbp->size, found); 1037 1038 kmem_free(listp, listsz); 1039 (void) md_fini_handle(mdp); 1040 1041 return (rv); 1042 } 1043 1044 /* 1045 * Look up a particular mblk in the MD. Returns the mde_cookie_t 1046 * representing that mblk if present, and MDE_INVAL_ELEM_COOKIE 1047 * otherwise. It is assumed the scratch array has already been 1048 * allocated so that it can accommodate the worst case scenario, 1049 * every node in the MD. 1050 */ 1051 static mde_cookie_t 1052 dr_mem_find_node_md(dr_mem_blk_t *mbp, md_t *mdp, mde_cookie_t *listp) 1053 { 1054 int idx; 1055 int nnodes; 1056 mde_cookie_t rootnode; 1057 uint64_t base_prop; 1058 uint64_t size_prop; 1059 mde_cookie_t result = MDE_INVAL_ELEM_COOKIE; 1060 1061 rootnode = md_root_node(mdp); 1062 ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE); 1063 1064 /* 1065 * Scan the DAG for all the mem nodes 1066 */ 1067 nnodes = md_scan_dag(mdp, rootnode, md_find_name(mdp, "mblock"), 1068 md_find_name(mdp, "fwd"), listp); 1069 1070 if (nnodes < 0) { 1071 DR_DBG_MEM("Scan for mblks failed\n"); 1072 return (result); 1073 } 1074 1075 DR_DBG_MEM("dr_mem_find_node_md: found %d mblks in the MD\n", nnodes); 1076 1077 /* 1078 * Find the mblk of interest 1079 */ 1080 for (idx = 0; idx < nnodes; idx++) { 1081 1082 if (md_get_prop_val(mdp, listp[idx], "base", &base_prop)) { 1083 DR_DBG_MEM("Missing 'base' property for mblk node %d\n", 1084 idx); 1085 break; 1086 } 1087 1088 if (md_get_prop_val(mdp, listp[idx], "size", &size_prop)) { 1089 DR_DBG_MEM("Missing 'size' property for mblk node %d\n", 1090 idx); 1091 break; 1092 } 1093 1094 if (base_prop <= mbp->addr && 1095 (base_prop + size_prop) >= (mbp->addr + mbp->size)) { 1096 /* found a match */ 1097 DR_DBG_MEM("dr_mem_find_node_md: found mblk " 1098 "0x%lx.0x%lx in MD\n", mbp->addr, mbp->size); 1099 result = listp[idx]; 1100 break; 1101 } 1102 } 1103 1104 if (result == MDE_INVAL_ELEM_COOKIE) { 1105 DR_DBG_MEM("mblk 0x%lx.0x%lx not in MD\n", 1106 mbp->addr, mbp->size); 1107 } 1108 1109 return (result); 1110 } 1111 1112 static int 1113 mem_add(pfn_t base, pgcnt_t npgs) 1114 { 1115 int rv, rc; 1116 1117 DR_DBG_MEM("%s: begin base=0x%lx npgs=0x%lx\n", __func__, base, npgs); 1118 1119 if (npgs == 0) 1120 return (0); 1121 1122 rv = kphysm_add_memory_dynamic(base, npgs); 1123 DR_DBG_MEM("%s: kphysm_add(0x%lx, 0x%lx) = %d", __func__, base, npgs, 1124 rv); 1125 if (!rv) { 1126 if (rc = kcage_range_add(base, npgs, KCAGE_DOWN)) 1127 cmn_err(CE_WARN, "kcage_range_add() = %d", rc); 1128 } 1129 return (rv); 1130 } 1131 1132 static void 1133 del_done(void *arg, int error) 1134 { 1135 mem_sync_t *ms = arg; 1136 1137 mutex_enter(&ms->lock); 1138 ms->error = error; 1139 ms->done = 1; 1140 cv_signal(&ms->cond); 1141 mutex_exit(&ms->lock); 1142 } 1143 1144 static int 1145 mem_del(pfn_t base, pgcnt_t npgs) 1146 { 1147 int rv, err, del_range = 0; 1148 mem_sync_t ms; 1149 memquery_t mq; 1150 memhandle_t mh; 1151 struct memlist *ml; 1152 struct memlist *d_ml = NULL; 1153 1154 DR_DBG_MEM("%s: begin base=0x%lx npgs=0x%lx\n", __func__, base, npgs); 1155 1156 if (npgs == 0) 1157 return (0); 1158 1159 if ((rv = kphysm_del_gethandle(&mh)) != KPHYSM_OK) { 1160 cmn_err(CE_WARN, "%s: del_gethandle() = %d", __func__, rv); 1161 return (rv); 1162 } 1163 if ((rv = kphysm_del_span_query(base, npgs, &mq)) 1164 != KPHYSM_OK) { 1165 cmn_err(CE_WARN, "%s: del_span_query() = %d", __func__, rv); 1166 goto done; 1167 } 1168 if (mq.nonrelocatable) { 1169 DR_DBG_MEM("%s: non-reloc pages = %ld", 1170 __func__, mq.nonrelocatable); 1171 rv = KPHYSM_ENONRELOC; 1172 goto done; 1173 } 1174 if (rv = kcage_range_delete(base, npgs)) { 1175 cmn_err(CE_WARN, "%s: del_range() = %d", __func__, rv); 1176 goto done; 1177 } else { 1178 del_range++; 1179 } 1180 if ((rv = kphysm_del_span(mh, base, npgs)) != KPHYSM_OK) { 1181 cmn_err(CE_WARN, "%s: del_span() = %d", __func__, rv); 1182 goto done; 1183 } 1184 if ((rv = memlist_add_span(ptob(base), ptob(npgs), &d_ml)) 1185 != MEML_SPANOP_OK) { 1186 cmn_err(CE_WARN, "%s: add_span() = %d", __func__, rv); 1187 goto done; 1188 } 1189 1190 DR_DBG_MEM("%s: reserved=0x%lx", __func__, npgs); 1191 1192 bzero((void *) &ms, sizeof (ms)); 1193 1194 mutex_init(&ms.lock, NULL, MUTEX_DRIVER, NULL); 1195 cv_init(&ms.cond, NULL, CV_DRIVER, NULL); 1196 mutex_enter(&ms.lock); 1197 1198 if ((rv = kphysm_del_start(mh, del_done, (void *) &ms)) == KPHYSM_OK) { 1199 /* 1200 * Since we've called drctl_config_init, we are the only 1201 * DR ctl operation in progress. Set dr_mh to the 1202 * delete memhandle for use by stat and cancel. 1203 */ 1204 ASSERT(dr_mh == NULL); 1205 dr_mh = mh; 1206 1207 /* 1208 * Wait for completion or interrupt. 1209 */ 1210 while (!ms.done) { 1211 if (cv_wait_sig(&ms.cond, &ms.lock) == 0) { 1212 /* 1213 * There is a pending signal. 1214 */ 1215 (void) kphysm_del_cancel(mh); 1216 DR_DBG_MEM("%s: cancel", __func__); 1217 /* 1218 * Wait for completion. 1219 */ 1220 while (!ms.done) 1221 cv_wait(&ms.cond, &ms.lock); 1222 } 1223 } 1224 dr_mh = NULL; 1225 rv = ms.error; 1226 } else { 1227 DR_DBG_MEM("%s: del_start() = %d", __func__, rv); 1228 } 1229 1230 mutex_exit(&ms.lock); 1231 cv_destroy(&ms.cond); 1232 mutex_destroy(&ms.lock); 1233 1234 done: 1235 if (rv && del_range) { 1236 /* 1237 * Add back the spans to the kcage growth list. 1238 */ 1239 for (ml = d_ml; ml; ml = ml->next) 1240 if (err = kcage_range_add(btop(ml->address), 1241 btop(ml->size), KCAGE_DOWN)) 1242 cmn_err(CE_WARN, "kcage_range_add() = %d", err); 1243 } 1244 memlist_free_list(d_ml); 1245 1246 if ((err = kphysm_del_release(mh)) != KPHYSM_OK) 1247 cmn_err(CE_WARN, "%s: del_release() = %d", __func__, err); 1248 1249 DR_DBG_MEM("%s: rv=%d", __func__, rv); 1250 1251 return (rv); 1252 } 1253