1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <stdio.h> 29 #include <unistd.h> 30 #include <stdlib.h> 31 #include <string.h> 32 #include <fcntl.h> 33 #include <pthread.h> 34 #include <errno.h> 35 #include <libnvpair.h> 36 #include <dlfcn.h> 37 #include <link.h> 38 39 #include <sys/processor.h> 40 #include <sys/stat.h> 41 #include <sys/mdesc.h> 42 #include <sys/param.h> 43 #include <sys/systeminfo.h> 44 #include <sys/mem.h> 45 #include <sys/bl.h> 46 #include <sys/fm/protocol.h> 47 #include <fm/fmd_fmri.h> 48 #include <sys/pri.h> 49 50 #include "ldom.h" 51 #include "ldmsvcs_utils.h" 52 53 #define MD_STR_PLATFORM "platform" 54 #define MD_STR_DOM_ENABLE "domaining-enabled" 55 56 static void *ldom_dl_hp = (void *)NULL; 57 static const char *ldom_dl_path = "libpri.so.1"; 58 static int ldom_dl_mode = (RTLD_NOW | RTLD_LOCAL); 59 60 static int (*ldom_pri_init)(void) = (int (*)(void))NULL; 61 static void (*ldom_pri_fini)(void) = (void (*)(void))NULL; 62 static ssize_t (*ldom_pri_get)(uint8_t wait, uint64_t *token, uint64_t **buf, 63 void *(*allocp)(size_t), void (*freep)(void *, size_t)) = 64 (ssize_t (*)(uint8_t wait, uint64_t *token, uint64_t **buf, 65 void *(*allocp)(size_t), void (*freep)(void *, size_t)))NULL; 66 67 static void 68 ldom_pri_config(void) 69 { 70 char isa[MAXNAMELEN]; /* used to see if machine is sun4v */ 71 72 if (sysinfo(SI_MACHINE, isa, MAXNAMELEN) < 0) 73 return; 74 if (strcmp(isa, "sun4v") != 0) 75 return; 76 if ((ldom_dl_hp = dlopen(ldom_dl_path, ldom_dl_mode)) == NULL) 77 return; 78 79 ldom_pri_init = (int (*)(void))dlsym(ldom_dl_hp, "pri_init"); 80 ldom_pri_fini = (void (*)(void))dlsym(ldom_dl_hp, "pri_fini"); 81 ldom_pri_get = (ssize_t (*)(uint8_t wait, uint64_t *token, 82 uint64_t **buf, void *(*allocp)(size_t), 83 void (*freep)(void *, size_t)))dlsym(ldom_dl_hp, "pri_get"); 84 } 85 86 static void 87 ldom_pri_unconfig(void) 88 { 89 if (ldom_dl_hp == NULL) 90 return; 91 92 ldom_pri_init = (int (*)(void))NULL; 93 ldom_pri_fini = (void (*)(void))NULL; 94 ldom_pri_get = (ssize_t (*)(uint8_t wait, uint64_t *token, 95 uint64_t **buf, void *(*allocp)(size_t), 96 void (*freep)(void *, size_t)))NULL; 97 (void) dlclose(ldom_dl_hp); 98 ldom_dl_hp = (void *)NULL; 99 } 100 101 static ssize_t 102 get_local_core_md(ldom_hdl_t *lhp, uint64_t **buf) 103 { 104 int fh; 105 size_t size; 106 ssize_t ssize; 107 uint64_t tok; 108 uint64_t *bufp; 109 110 if (ldom_pri_get != NULL) 111 if ((ssize = (*ldom_pri_get)(PRI_GET, &tok, buf, 112 lhp->allocp, lhp->freep)) >= 0) 113 return (ssize); 114 115 if ((fh = open("/devices/pseudo/mdesc@0:mdesc", O_RDONLY, 0)) < 0) 116 return (-1); 117 118 if (ioctl(fh, MDESCIOCGSZ, &size) < 0) { 119 (void) close(fh); 120 return (-1); 121 } 122 123 bufp = (uint64_t *)lhp->allocp(size); 124 125 if (read(fh, bufp, size) < 0) { 126 lhp->freep(bufp, size); 127 (void) close(fh); 128 return (-1); 129 } 130 (void) close(fh); 131 132 *buf = bufp; 133 134 return ((ssize_t)size); 135 } 136 137 138 static int 139 get_local_md_prop_value(ldom_hdl_t *lhp, char *node, char *prop, uint64_t *val) 140 { 141 int rc = 1; 142 uint64_t *bufp; 143 ssize_t bufsiz; 144 145 if ((bufsiz = get_local_core_md(lhp, &bufp)) > 0) { 146 md_t *mdp; 147 148 if (mdp = md_init_intern(bufp, lhp->allocp, lhp->freep)) { 149 int num_nodes; 150 mde_cookie_t *listp; 151 152 num_nodes = md_node_count(mdp); 153 listp = lhp->allocp(sizeof (mde_cookie_t) * num_nodes); 154 155 if (md_scan_dag(mdp, MDE_INVAL_ELEM_COOKIE, 156 md_find_name(mdp, node), 157 md_find_name(mdp, "fwd"), 158 listp) > 0 && 159 md_get_prop_val(mdp, listp[0], prop, val) >= 0) { 160 /* found the property */ 161 rc = 0; 162 } 163 164 lhp->freep(listp, sizeof (mde_cookie_t) * num_nodes); 165 (void) md_fini(mdp); 166 } 167 lhp->freep(bufp, bufsiz); 168 } 169 return (rc); 170 } 171 172 static int 173 ldom_getinfo(struct ldom_hdl *lhp) 174 { 175 static pthread_mutex_t mt = PTHREAD_MUTEX_INITIALIZER; 176 static pthread_cond_t cv = PTHREAD_COND_INITIALIZER; 177 static int major_version = -1; 178 static int service_ldom = -1; 179 static int busy_init = 0; 180 181 int ier, rc = 0; 182 uint64_t domain_enable; 183 184 (void) pthread_mutex_lock(&mt); 185 186 while (busy_init == 1) 187 (void) pthread_cond_wait(&cv, &mt); 188 189 if (major_version != -1 && service_ldom != -1) { 190 lhp->major_version = major_version; 191 lhp->service_ldom = service_ldom; 192 (void) pthread_mutex_unlock(&mt); 193 return (0); 194 } 195 196 /* 197 * get to this point if major_version and service_ldom have not yet 198 * been determined 199 */ 200 busy_init = 1; 201 (void) pthread_mutex_unlock(&mt); 202 203 /* 204 * set defaults which correspond to the case of "LDOMS not 205 * available". note that these can (and will) also apply to 206 * non-sun4v machines. 207 */ 208 major_version = 0; 209 service_ldom = 0; 210 domain_enable = 0; 211 212 if (get_local_md_prop_value(lhp, MD_STR_PLATFORM, MD_STR_DOM_ENABLE, 213 &domain_enable) == 0 && 214 domain_enable != 0) { 215 216 /* 217 * Domaining is enable and ldmd is not in config mode 218 * so this is a ldom env. 219 */ 220 major_version = 1; 221 222 if ((ier = ldmsvcs_check_channel()) == 0) { 223 /* 224 * control ldom 225 * ldmfma channel between FMA and ldmd only exists 226 * on the control domain. 227 */ 228 service_ldom = 1; 229 } else if (ier == 1) { 230 /* 231 * guest ldom 232 * non-control ldom such as guest and io service ldom 233 */ 234 service_ldom = 0; 235 } 236 } 237 238 (void) pthread_mutex_lock(&mt); 239 lhp->major_version = major_version; 240 lhp->service_ldom = service_ldom; 241 busy_init = 0; 242 (void) pthread_mutex_unlock(&mt); 243 244 (void) pthread_cond_broadcast(&cv); 245 246 return (rc); 247 } 248 249 250 /* 251 * search the machine description for a "pid" entry (physical cpuid) and 252 * return the corresponding "id" entry (virtual cpuid) 253 */ 254 static processorid_t 255 cpu_phys2virt(ldom_hdl_t *lhp, uint32_t cpuid) 256 { 257 char isa[MAXNAMELEN]; 258 md_t *mdp; 259 mde_cookie_t *listp; 260 ssize_t bufsize; 261 processorid_t vid; 262 uint64_t *bufp; 263 uint64_t pval; 264 int num_nodes, ncpus, i; 265 266 (void) sysinfo(SI_ARCHITECTURE, isa, MAXNAMELEN); 267 268 if (strcmp(isa, "sun4v") != 0) 269 return ((processorid_t)cpuid); 270 271 /* 272 * convert the physical cpuid to a virtual cpuid 273 */ 274 if ((bufsize = ldom_get_core_md(lhp, &bufp)) < 1) 275 return (-1); 276 277 if ((mdp = md_init_intern(bufp, lhp->allocp, lhp->freep)) == NULL || 278 (num_nodes = md_node_count(mdp)) < 1) { 279 lhp->freep(bufp, bufsize); 280 return (-1); 281 } 282 283 listp = (mde_cookie_t *)lhp->allocp(sizeof (mde_cookie_t) * num_nodes); 284 ncpus = md_scan_dag(mdp, MDE_INVAL_ELEM_COOKIE, 285 md_find_name(mdp, "cpu"), 286 md_find_name(mdp, "fwd"), listp); 287 288 vid = -1; 289 for (i = 0; i < ncpus; i++) { 290 if (md_get_prop_val(mdp, listp[i], "pid", &pval) >= 0 && 291 pval == (uint64_t)cpuid) { 292 if (md_get_prop_val(mdp, listp[i], "id", &pval) >= 0) 293 vid = (processorid_t)pval; 294 295 break; 296 } 297 } 298 299 lhp->freep(listp, sizeof (mde_cookie_t) * num_nodes); 300 (void) md_fini(mdp); 301 lhp->freep(bufp, bufsize); 302 303 return (vid); 304 } 305 306 /* 307 * if checking for status of a retired page: 308 * 0 - page is retired 309 * EAGAIN - page is scheduled for retirement 310 * EIO - page not scheduled for retirement 311 * EINVAL - error 312 * 313 * if retiring a page: 314 * 0 - success in retiring page 315 * EIO - page is already retired 316 * EAGAIN - page is scheduled for retirement 317 * EINVAL - error 318 * 319 * the original decoder for ioctl() return values is 320 * http://fma.eng/documents/engineering/cpumem/page_retire_api.txt 321 */ 322 static int 323 os_mem_page_retire(ldom_hdl_t *lhp, int cmd, nvlist_t *nvl) 324 { 325 mem_page_t mpage; 326 char *fmribuf; 327 size_t fmrisz; 328 int fd, rc, err; 329 330 if (cmd != MEM_PAGE_RETIRE && cmd != MEM_PAGE_FMRI_RETIRE && 331 cmd != MEM_PAGE_ISRETIRED && cmd != MEM_PAGE_FMRI_ISRETIRED) 332 return (EINVAL); 333 334 if ((fd = open("/dev/mem", O_RDONLY)) < 0) 335 return (EINVAL); 336 337 if ((errno = nvlist_size(nvl, &fmrisz, NV_ENCODE_NATIVE)) != 0 || 338 fmrisz > MEM_FMRI_MAX_BUFSIZE || 339 (fmribuf = lhp->allocp(fmrisz)) == NULL) { 340 (void) close(fd); 341 return (EINVAL); 342 } 343 344 if ((errno = nvlist_pack(nvl, &fmribuf, &fmrisz, 345 NV_ENCODE_NATIVE, 0)) != 0) { 346 lhp->freep(fmribuf, fmrisz); 347 (void) close(fd); 348 return (EINVAL); 349 } 350 351 mpage.m_fmri = fmribuf; 352 mpage.m_fmrisz = fmrisz; 353 354 rc = ioctl(fd, cmd, &mpage); 355 err = errno; 356 357 lhp->freep(fmribuf, fmrisz); 358 (void) close(fd); 359 360 if (rc < 0) { 361 rc = err; 362 } 363 364 return (rc); 365 } 366 367 int 368 ldom_fmri_status(ldom_hdl_t *lhp, nvlist_t *nvl) 369 { 370 char *name; 371 int ret; 372 373 if (nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &name) != 0) 374 return (EINVAL); 375 376 switch (ldom_major_version(lhp)) { 377 case 0: 378 /* 379 * version == 0 means LDOMS support is not available 380 */ 381 if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) { 382 processorid_t vid; 383 uint32_t cpuid; 384 385 if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_ID, 386 &cpuid) == 0 && 387 (vid = cpu_phys2virt(lhp, cpuid)) != -1) 388 return (p_online(vid, P_STATUS)); 389 } else if (strcmp(name, FM_FMRI_SCHEME_MEM) == 0) { 390 return (os_mem_page_retire(lhp, 391 MEM_PAGE_FMRI_ISRETIRED, nvl)); 392 } 393 394 return (EINVAL); 395 /*NOTREACHED*/ 396 break; 397 case 1: 398 /* LDOMS 1.0 */ 399 if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) { 400 uint32_t cpuid; 401 402 if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_ID, 403 &cpuid) == 0) 404 ret = ldmsvcs_cpu_req_status(lhp, cpuid); 405 } else if (strcmp(name, FM_FMRI_SCHEME_MEM) == 0) { 406 uint64_t pa; 407 408 if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR, 409 &pa) == 0) 410 ret = ldmsvcs_mem_req_status(lhp, pa); 411 else 412 ret = EINVAL; 413 } else { 414 ret = ENOTSUP; 415 } 416 return (ret); 417 418 /*NOTREACHED*/ 419 break; 420 default: 421 break; 422 } 423 424 return (ENOTSUP); 425 } 426 427 428 int 429 ldom_fmri_retire(ldom_hdl_t *lhp, nvlist_t *nvl) 430 { 431 char *name; 432 int ret; 433 434 if (nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &name) != 0) 435 return (EINVAL); 436 437 switch (ldom_major_version(lhp)) { 438 case 0: 439 /* 440 * version == 0 means LDOMS support is not available 441 */ 442 if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) { 443 processorid_t vid; 444 uint32_t cpuid; 445 446 if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_ID, 447 &cpuid) == 0 && 448 (vid = cpu_phys2virt(lhp, cpuid)) != -1) 449 return (p_online(vid, P_FAULTED)); 450 } else if (strcmp(name, FM_FMRI_SCHEME_MEM) == 0) { 451 return (os_mem_page_retire(lhp, 452 MEM_PAGE_FMRI_RETIRE, nvl)); 453 } 454 455 return (EINVAL); 456 /*NOTREACHED*/ 457 break; 458 case 1: 459 /* LDOMS 1.0 */ 460 if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) { 461 uint32_t cpuid; 462 463 if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_ID, 464 &cpuid) == 0) 465 ret = ldmsvcs_cpu_req_offline(lhp, cpuid); 466 } else if (strcmp(name, FM_FMRI_SCHEME_MEM) == 0) { 467 uint64_t pa; 468 469 if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR, 470 &pa) == 0) 471 ret = ldmsvcs_mem_req_retire(lhp, pa); 472 else 473 ret = EINVAL; 474 } else { 475 ret = ENOTSUP; 476 } 477 return (ret); 478 479 /*NOTREACHED*/ 480 break; 481 default: 482 break; 483 } 484 485 return (ENOTSUP); 486 } 487 488 489 /* 490 * blacklist cpus in a non-LDOMS environment 491 */ 492 int 493 ldom_fmri_blacklist(ldom_hdl_t *lhp, nvlist_t *nvl) 494 { 495 char *name; 496 497 if (ldom_major_version(lhp) != 0) 498 return (0); 499 500 if (nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &name) != 0) 501 return (EINVAL); 502 503 if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) { 504 bl_req_t blr; 505 char *class; 506 int fd, rc, err; 507 508 if ((nvlist_lookup_string(nvl, FM_CLASS, &class) != 0) || 509 (class == NULL) || (*class == '\0')) 510 return (EINVAL); 511 512 if ((fd = open("/dev/bl", O_RDONLY)) < 0) 513 return (EIO); 514 515 if (nvlist_size(nvl, &blr.bl_fmrisz, NV_ENCODE_NATIVE) != 0 || 516 blr.bl_fmrisz == 0 || 517 (blr.bl_fmri = (caddr_t)lhp->allocp(blr.bl_fmrisz)) == 518 NULL) { 519 (void) close(fd); 520 return (EINVAL); 521 } 522 523 blr.bl_class = class; 524 525 rc = ioctl(fd, BLIOC_INSERT, &blr); 526 err = errno; 527 528 lhp->freep((void *)&blr.bl_fmri, blr.bl_fmrisz); 529 (void) close(fd); 530 531 if (rc < 0 && err != ENOTSUP) { 532 errno = err; 533 return (-1); 534 } 535 } 536 537 return (0); 538 } 539 540 541 ssize_t 542 ldom_get_core_md(ldom_hdl_t *lhp, uint64_t **buf) 543 { 544 ssize_t rv; /* return value */ 545 546 switch (ldom_major_version(lhp)) { 547 case 0: 548 return (get_local_core_md(lhp, buf)); 549 /*NOTREACHED*/ 550 break; 551 case 1: 552 /* LDOMS 1.0 */ 553 if (ldom_on_service(lhp) == 1) { 554 if ((rv = ldmsvcs_get_core_md(lhp, buf)) < 0) 555 rv = get_local_core_md(lhp, buf); 556 return (rv); 557 } else { 558 return (get_local_core_md(lhp, buf)); 559 } 560 561 /*NOTREACHED*/ 562 break; 563 default: 564 *buf = NULL; 565 break; 566 } 567 568 return (-1); 569 } 570 571 /* 572 * version 0 means no LDOMS 573 */ 574 int 575 ldom_major_version(ldom_hdl_t *lhp) 576 { 577 if (lhp == NULL) 578 return (-1); 579 580 if (ldom_getinfo(lhp) == 0) 581 return (lhp->major_version); 582 else 583 return (0); 584 } 585 586 /* 587 * in the absence of ldoms we are on a single OS instance which is the 588 * equivalent of the service ldom 589 */ 590 int 591 ldom_on_service(ldom_hdl_t *lhp) 592 { 593 if (lhp == NULL) 594 return (-1); 595 596 if (ldom_getinfo(lhp) == 0) 597 return (lhp->service_ldom); 598 else 599 return (1); 600 } 601 602 603 ldom_hdl_t * 604 ldom_init(void *(*allocp)(size_t size), 605 void (*freep)(void *addr, size_t size)) 606 { 607 struct ldom_hdl *lhp; 608 609 ldom_pri_config(); 610 if (ldom_pri_init != NULL) 611 if ((*ldom_pri_init)() < 0) 612 return (NULL); 613 614 if ((lhp = allocp(sizeof (struct ldom_hdl))) == NULL) { 615 if (ldom_pri_fini != NULL) 616 (*ldom_pri_fini)(); 617 return (NULL); 618 } 619 620 lhp->major_version = -1; /* version not yet determined */ 621 lhp->allocp = allocp; 622 lhp->freep = freep; 623 624 ldmsvcs_init(lhp); 625 626 return (lhp); 627 } 628 629 630 void 631 ldom_fini(ldom_hdl_t *lhp) 632 { 633 if (lhp == NULL) 634 return; 635 636 ldmsvcs_fini(lhp); 637 lhp->freep(lhp, sizeof (struct ldom_hdl)); 638 639 if (ldom_pri_fini != NULL) 640 (*ldom_pri_fini)(); 641 ldom_pri_unconfig(); 642 } 643 644 /* end file */ 645