1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <stdio.h> 29 #include <unistd.h> 30 #include <stdlib.h> 31 #include <string.h> 32 #include <fcntl.h> 33 #include <pthread.h> 34 #include <errno.h> 35 #include <libnvpair.h> 36 37 #include <sys/processor.h> 38 #include <sys/stat.h> 39 #include <sys/mdesc.h> 40 #include <sys/param.h> 41 #include <sys/systeminfo.h> 42 #include <sys/mem.h> 43 #include <sys/bl.h> 44 #include <sys/fm/protocol.h> 45 #include <fm/fmd_fmri.h> 46 #include <sys/pri.h> 47 48 #include "ldom.h" 49 #include "ldmsvcs_utils.h" 50 51 #define MD_STR_PLATFORM "platform" 52 #define MD_STR_DOM_ENABLE "domaining-enabled" 53 54 static ssize_t 55 get_local_core_md(ldom_hdl_t *lhp, uint64_t **buf) 56 { 57 int fh; 58 size_t size; 59 ssize_t ssize; 60 uint64_t tok; 61 uint64_t *bufp; 62 63 if ((ssize = pri_get(PRI_GET, &tok, buf, lhp->allocp, lhp->freep)) >= 0) 64 return (ssize); 65 66 if ((fh = open("/devices/pseudo/mdesc@0:mdesc", O_RDONLY, 0)) < 0) 67 return (-1); 68 69 if (ioctl(fh, MDESCIOCGSZ, &size) < 0) { 70 (void) close(fh); 71 return (-1); 72 } 73 74 bufp = (uint64_t *)lhp->allocp(size); 75 76 if (read(fh, bufp, size) < 0) { 77 lhp->freep(bufp, size); 78 (void) close(fh); 79 return (-1); 80 } 81 (void) close(fh); 82 83 *buf = bufp; 84 85 return ((ssize_t)size); 86 } 87 88 89 static int 90 get_local_md_prop_value(ldom_hdl_t *lhp, char *node, char *prop, uint64_t *val) 91 { 92 int rc = 1; 93 uint64_t *bufp; 94 ssize_t bufsiz; 95 96 if ((bufsiz = get_local_core_md(lhp, &bufp)) > 0) { 97 md_t *mdp; 98 99 if (mdp = md_init_intern(bufp, lhp->allocp, lhp->freep)) { 100 int num_nodes; 101 mde_cookie_t *listp; 102 103 num_nodes = md_node_count(mdp); 104 listp = lhp->allocp(sizeof (mde_cookie_t) * num_nodes); 105 106 if (md_scan_dag(mdp, MDE_INVAL_ELEM_COOKIE, 107 md_find_name(mdp, node), 108 md_find_name(mdp, "fwd"), 109 listp) > 0 && 110 md_get_prop_val(mdp, listp[0], prop, val) >= 0) { 111 /* found the property */ 112 rc = 0; 113 } 114 115 lhp->freep(listp, sizeof (mde_cookie_t) * num_nodes); 116 (void) md_fini(mdp); 117 } 118 lhp->freep(bufp, bufsiz); 119 } 120 return (rc); 121 } 122 123 static int 124 ldom_getinfo(struct ldom_hdl *lhp) 125 { 126 static pthread_mutex_t mt = PTHREAD_MUTEX_INITIALIZER; 127 static pthread_cond_t cv = PTHREAD_COND_INITIALIZER; 128 static int major_version = -1; 129 static int service_ldom = -1; 130 static int busy_init = 0; 131 132 int ier, rc = 0; 133 uint64_t domain_enable; 134 135 (void) pthread_mutex_lock(&mt); 136 137 while (busy_init == 1) 138 (void) pthread_cond_wait(&cv, &mt); 139 140 if (major_version != -1 && service_ldom != -1) { 141 lhp->major_version = major_version; 142 lhp->service_ldom = service_ldom; 143 (void) pthread_mutex_unlock(&mt); 144 return (0); 145 } 146 147 /* 148 * get to this point if major_version and service_ldom have not yet 149 * been determined 150 */ 151 busy_init = 1; 152 (void) pthread_mutex_unlock(&mt); 153 154 /* 155 * set defaults which correspond to the case of "LDOMS not 156 * available". note that these can (and will) also apply to 157 * non-sun4v machines. 158 */ 159 major_version = 0; 160 service_ldom = 0; 161 domain_enable = 0; 162 163 if (get_local_md_prop_value(lhp, MD_STR_PLATFORM, MD_STR_DOM_ENABLE, 164 &domain_enable) == 0 && 165 domain_enable != 0) { 166 167 /* 168 * Domaining is enable and ldmd is not in config mode 169 * so this is a ldom env. 170 */ 171 major_version = 1; 172 173 if ((ier = ldmsvcs_check_channel()) == 0) { 174 /* 175 * control ldom 176 * ldmfma channel between FMA and ldmd only exists 177 * on the control domain. 178 */ 179 service_ldom = 1; 180 } else if (ier == 1) { 181 /* 182 * guest ldom 183 * non-control ldom such as guest and io service ldom 184 */ 185 service_ldom = 0; 186 } 187 } 188 189 (void) pthread_mutex_lock(&mt); 190 lhp->major_version = major_version; 191 lhp->service_ldom = service_ldom; 192 busy_init = 0; 193 (void) pthread_mutex_unlock(&mt); 194 195 (void) pthread_cond_broadcast(&cv); 196 197 return (rc); 198 } 199 200 201 /* 202 * search the machine description for a "pid" entry (physical cpuid) and 203 * return the corresponding "id" entry (virtual cpuid) 204 */ 205 static processorid_t 206 cpu_phys2virt(ldom_hdl_t *lhp, uint32_t cpuid) 207 { 208 char isa[MAXNAMELEN]; 209 md_t *mdp; 210 mde_cookie_t *listp; 211 ssize_t bufsize; 212 processorid_t vid; 213 uint64_t *bufp; 214 uint64_t pval; 215 int num_nodes, ncpus, i; 216 217 (void) sysinfo(SI_ARCHITECTURE, isa, MAXNAMELEN); 218 219 if (strcmp(isa, "sun4v") != 0) 220 return ((processorid_t)cpuid); 221 222 /* 223 * convert the physical cpuid to a virtual cpuid 224 */ 225 if ((bufsize = ldom_get_core_md(lhp, &bufp)) < 1) 226 return (-1); 227 228 if ((mdp = md_init_intern(bufp, lhp->allocp, lhp->freep)) == NULL || 229 (num_nodes = md_node_count(mdp)) < 1) { 230 lhp->freep(bufp, bufsize); 231 return (-1); 232 } 233 234 listp = (mde_cookie_t *)lhp->allocp(sizeof (mde_cookie_t) * num_nodes); 235 ncpus = md_scan_dag(mdp, MDE_INVAL_ELEM_COOKIE, 236 md_find_name(mdp, "cpu"), 237 md_find_name(mdp, "fwd"), listp); 238 239 vid = -1; 240 for (i = 0; i < ncpus; i++) { 241 if (md_get_prop_val(mdp, listp[i], "pid", &pval) >= 0 && 242 pval == (uint64_t)cpuid) { 243 if (md_get_prop_val(mdp, listp[i], "id", &pval) >= 0) 244 vid = (processorid_t)pval; 245 246 break; 247 } 248 } 249 250 lhp->freep(listp, sizeof (mde_cookie_t) * num_nodes); 251 (void) md_fini(mdp); 252 lhp->freep(bufp, bufsize); 253 254 return (vid); 255 } 256 257 /* 258 * if checking for status of a retired page: 259 * 0 - page is retired 260 * EAGAIN - page is scheduled for retirement 261 * EIO - page not scheduled for retirement 262 * EINVAL - error 263 * 264 * if retiring a page: 265 * 0 - success in retiring page 266 * EIO - page is already retired 267 * EAGAIN - page is scheduled for retirement 268 * EINVAL - error 269 * 270 * the original decoder for ioctl() return values is 271 * http://fma.eng/documents/engineering/cpumem/page_retire_api.txt 272 */ 273 static int 274 os_mem_page_retire(ldom_hdl_t *lhp, int cmd, nvlist_t *nvl) 275 { 276 mem_page_t mpage; 277 char *fmribuf; 278 size_t fmrisz; 279 int fd, rc, err; 280 281 if (cmd != MEM_PAGE_RETIRE && cmd != MEM_PAGE_FMRI_RETIRE && 282 cmd != MEM_PAGE_ISRETIRED && cmd != MEM_PAGE_FMRI_ISRETIRED) 283 return (EINVAL); 284 285 if ((fd = open("/dev/mem", O_RDONLY)) < 0) 286 return (EINVAL); 287 288 if ((errno = nvlist_size(nvl, &fmrisz, NV_ENCODE_NATIVE)) != 0 || 289 fmrisz > MEM_FMRI_MAX_BUFSIZE || 290 (fmribuf = lhp->allocp(fmrisz)) == NULL) { 291 (void) close(fd); 292 return (EINVAL); 293 } 294 295 if ((errno = nvlist_pack(nvl, &fmribuf, &fmrisz, 296 NV_ENCODE_NATIVE, 0)) != 0) { 297 lhp->freep(fmribuf, fmrisz); 298 (void) close(fd); 299 return (EINVAL); 300 } 301 302 mpage.m_fmri = fmribuf; 303 mpage.m_fmrisz = fmrisz; 304 305 rc = ioctl(fd, cmd, &mpage); 306 err = errno; 307 308 lhp->freep(fmribuf, fmrisz); 309 (void) close(fd); 310 311 if (rc < 0) { 312 rc = err; 313 } 314 315 return (rc); 316 } 317 318 int 319 ldom_fmri_status(ldom_hdl_t *lhp, nvlist_t *nvl) 320 { 321 char *name; 322 int ret; 323 324 if (nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &name) != 0) 325 return (EINVAL); 326 327 switch (ldom_major_version(lhp)) { 328 case 0: 329 /* 330 * version == 0 means LDOMS support is not available 331 */ 332 if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) { 333 processorid_t vid; 334 uint32_t cpuid; 335 336 if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_ID, 337 &cpuid) == 0 && 338 (vid = cpu_phys2virt(lhp, cpuid)) != -1) 339 return (p_online(vid, P_STATUS)); 340 } else if (strcmp(name, FM_FMRI_SCHEME_MEM) == 0) { 341 return (os_mem_page_retire(lhp, 342 MEM_PAGE_FMRI_ISRETIRED, nvl)); 343 } 344 345 return (EINVAL); 346 /*NOTREACHED*/ 347 break; 348 case 1: 349 /* LDOMS 1.0 */ 350 if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) { 351 uint32_t cpuid; 352 353 if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_ID, 354 &cpuid) == 0) 355 ret = ldmsvcs_cpu_req_status(lhp, cpuid); 356 } else if (strcmp(name, FM_FMRI_SCHEME_MEM) == 0) { 357 uint64_t pa; 358 359 if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR, 360 &pa) == 0) 361 ret = ldmsvcs_mem_req_status(lhp, pa); 362 else 363 ret = EINVAL; 364 } else { 365 ret = ENOTSUP; 366 } 367 return (ret); 368 369 /*NOTREACHED*/ 370 break; 371 default: 372 break; 373 } 374 375 return (ENOTSUP); 376 } 377 378 379 int 380 ldom_fmri_retire(ldom_hdl_t *lhp, nvlist_t *nvl) 381 { 382 char *name; 383 int ret; 384 385 if (nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &name) != 0) 386 return (EINVAL); 387 388 switch (ldom_major_version(lhp)) { 389 case 0: 390 /* 391 * version == 0 means LDOMS support is not available 392 */ 393 if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) { 394 processorid_t vid; 395 uint32_t cpuid; 396 397 if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_ID, 398 &cpuid) == 0 && 399 (vid = cpu_phys2virt(lhp, cpuid)) != -1) 400 return (p_online(vid, P_FAULTED)); 401 } else if (strcmp(name, FM_FMRI_SCHEME_MEM) == 0) { 402 return (os_mem_page_retire(lhp, 403 MEM_PAGE_FMRI_RETIRE, nvl)); 404 } 405 406 return (EINVAL); 407 /*NOTREACHED*/ 408 break; 409 case 1: 410 /* LDOMS 1.0 */ 411 if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) { 412 uint32_t cpuid; 413 414 if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_ID, 415 &cpuid) == 0) 416 ret = ldmsvcs_cpu_req_offline(lhp, cpuid); 417 } else if (strcmp(name, FM_FMRI_SCHEME_MEM) == 0) { 418 uint64_t pa; 419 420 if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR, 421 &pa) == 0) 422 ret = ldmsvcs_mem_req_retire(lhp, pa); 423 else 424 ret = EINVAL; 425 } else { 426 ret = ENOTSUP; 427 } 428 return (ret); 429 430 /*NOTREACHED*/ 431 break; 432 default: 433 break; 434 } 435 436 return (ENOTSUP); 437 } 438 439 440 /* 441 * blacklist cpus in a non-LDOMS environment 442 */ 443 int 444 ldom_fmri_blacklist(ldom_hdl_t *lhp, nvlist_t *nvl) 445 { 446 char *name; 447 448 if (ldom_major_version(lhp) != 0) 449 return (0); 450 451 if (nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &name) != 0) 452 return (EINVAL); 453 454 if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) { 455 bl_req_t blr; 456 char *class; 457 int fd, rc, err; 458 459 if ((nvlist_lookup_string(nvl, FM_CLASS, &class) != 0) || 460 (class == NULL) || (*class == '\0')) 461 return (EINVAL); 462 463 if ((fd = open("/dev/bl", O_RDONLY)) < 0) 464 return (EIO); 465 466 if (nvlist_size(nvl, &blr.bl_fmrisz, NV_ENCODE_NATIVE) != 0 || 467 blr.bl_fmrisz == 0 || 468 (blr.bl_fmri = (caddr_t)lhp->allocp(blr.bl_fmrisz)) == 469 NULL) { 470 (void) close(fd); 471 return (EINVAL); 472 } 473 474 blr.bl_class = class; 475 476 rc = ioctl(fd, BLIOC_INSERT, &blr); 477 err = errno; 478 479 lhp->freep((void *)&blr.bl_fmri, blr.bl_fmrisz); 480 (void) close(fd); 481 482 if (rc < 0 && err != ENOTSUP) { 483 errno = err; 484 return (-1); 485 } 486 } 487 488 return (0); 489 } 490 491 492 ssize_t 493 ldom_get_core_md(ldom_hdl_t *lhp, uint64_t **buf) 494 { 495 ssize_t rv; /* return value */ 496 497 switch (ldom_major_version(lhp)) { 498 case 0: 499 return (get_local_core_md(lhp, buf)); 500 /*NOTREACHED*/ 501 break; 502 case 1: 503 /* LDOMS 1.0 */ 504 if (ldom_on_service(lhp) == 1) { 505 if ((rv = ldmsvcs_get_core_md(lhp, buf)) < 0) 506 rv = get_local_core_md(lhp, buf); 507 return (rv); 508 } else { 509 return (get_local_core_md(lhp, buf)); 510 } 511 512 /*NOTREACHED*/ 513 break; 514 default: 515 *buf = NULL; 516 break; 517 } 518 519 return (-1); 520 } 521 522 /* 523 * version 0 means no LDOMS 524 */ 525 int 526 ldom_major_version(ldom_hdl_t *lhp) 527 { 528 if (lhp == NULL) 529 return (-1); 530 531 if (ldom_getinfo(lhp) == 0) 532 return (lhp->major_version); 533 else 534 return (0); 535 } 536 537 /* 538 * in the absence of ldoms we are on a single OS instance which is the 539 * equivalent of the service ldom 540 */ 541 int 542 ldom_on_service(ldom_hdl_t *lhp) 543 { 544 if (lhp == NULL) 545 return (-1); 546 547 if (ldom_getinfo(lhp) == 0) 548 return (lhp->service_ldom); 549 else 550 return (1); 551 } 552 553 554 ldom_hdl_t * 555 ldom_init(void *(*allocp)(size_t size), 556 void (*freep)(void *addr, size_t size)) 557 { 558 struct ldom_hdl *lhp; 559 560 if (pri_init() < 0) 561 return (NULL); 562 563 if ((lhp = allocp(sizeof (struct ldom_hdl))) == NULL) { 564 pri_fini(); 565 return (NULL); 566 } 567 568 lhp->major_version = -1; /* version not yet determined */ 569 lhp->allocp = allocp; 570 lhp->freep = freep; 571 572 ldmsvcs_init(lhp); 573 574 return (lhp); 575 } 576 577 578 void 579 ldom_fini(ldom_hdl_t *lhp) 580 { 581 if (lhp == NULL) 582 return; 583 584 ldmsvcs_fini(lhp); 585 lhp->freep(lhp, sizeof (struct ldom_hdl)); 586 587 pri_fini(); 588 } 589 590 /* end file */ 591