1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <stdio.h> 29 #include <unistd.h> 30 #include <stdlib.h> 31 #include <string.h> 32 #include <fcntl.h> 33 #include <pthread.h> 34 #include <errno.h> 35 #include <libnvpair.h> 36 37 #include <sys/processor.h> 38 #include <sys/stat.h> 39 #include <sys/mdesc.h> 40 #include <sys/param.h> 41 #include <sys/systeminfo.h> 42 #include <sys/mem.h> 43 #include <sys/bl.h> 44 #include <sys/fm/protocol.h> 45 #include <fm/fmd_fmri.h> 46 47 #include "ldom.h" 48 #include "ldmsvcs_utils.h" 49 50 51 static ssize_t 52 get_local_core_md(ldom_hdl_t *lhp, uint64_t **buf) 53 { 54 int fh; 55 size_t size; 56 uint64_t *bufp; 57 58 if ((fh = open("/devices/pseudo/mdesc@0:mdesc", O_RDONLY, 0)) < 0) 59 return (-1); 60 61 if (ioctl(fh, MDESCIOCGSZ, &size) < 0) { 62 (void) close(fh); 63 return (-1); 64 } 65 66 bufp = (uint64_t *)lhp->allocp(size); 67 68 if (read(fh, bufp, size) < 0) { 69 lhp->freep(bufp, size); 70 (void) close(fh); 71 return (-1); 72 } 73 (void) close(fh); 74 75 *buf = bufp; 76 77 return ((ssize_t)size); 78 } 79 80 81 static int 82 ldom_getinfo(struct ldom_hdl *lhp) 83 { 84 static pthread_mutex_t mt = PTHREAD_MUTEX_INITIALIZER; 85 static pthread_cond_t cv = PTHREAD_COND_INITIALIZER; 86 static int major_version = -1; 87 static int service_ldom = -1; 88 static int busy_init = 0; 89 90 int ier, rc = 0; 91 92 (void) pthread_mutex_lock(&mt); 93 94 while (busy_init == 1) 95 (void) pthread_cond_wait(&cv, &mt); 96 97 if (major_version != -1 && service_ldom != -1) { 98 lhp->major_version = major_version; 99 lhp->service_ldom = service_ldom; 100 (void) pthread_mutex_unlock(&mt); 101 return (0); 102 } 103 104 /* 105 * get to this point if major_version and service_ldom have not yet 106 * been determined 107 */ 108 busy_init = 1; 109 (void) pthread_mutex_unlock(&mt); 110 111 /* 112 * set defaults which correspond to the case of "LDOMS not 113 * available". note that these can (and will) also apply to 114 * non-sun4v machines. 115 */ 116 major_version = 0; 117 service_ldom = 1; 118 119 /* figure out version */ 120 if ((ier = ldmsvcs_check_channel()) == 0) { 121 /* 122 * get into this block if vldc exists. LDOMS is available 123 * and we are on the service LDOM. 124 */ 125 major_version = 1; 126 service_ldom = 1; 127 } else if (ier == 1) { 128 /* 129 * get into this block if vldc does not exist 130 * 131 * if we do not get into the following if() block [i.e., 132 * if (bufsiz <= 0)] then we are on a non-sun4v machine. 133 */ 134 uint64_t *bufp; 135 ssize_t bufsiz; 136 137 if ((bufsiz = get_local_core_md(lhp, &bufp)) > 0) { 138 md_t *mdp; 139 140 if ((mdp = md_init_intern(bufp, lhp->allocp, 141 lhp->freep)) != NULL) { 142 mde_cookie_t *listp; 143 uint64_t dval; 144 int num_nodes; 145 146 num_nodes = md_node_count(mdp); 147 listp = lhp->allocp(sizeof (mde_cookie_t) * 148 num_nodes); 149 150 /* 151 * if we do not enter the following if block, 152 * we conclude that LDOMS is not available 153 */ 154 if (md_scan_dag(mdp, MDE_INVAL_ELEM_COOKIE, 155 md_find_name(mdp, "platform"), 156 md_find_name(mdp, "fwd"), 157 listp) > 0 && 158 md_get_prop_val(mdp, listp[0], 159 "domaining-enabled", &dval) >= 0 && 160 dval == 1) { 161 /* 162 * LDOMS is available. an earlier 163 * block detected the situation of 164 * being on a service LDOM, so 165 * we get to this point only if we 166 * are not on a service LDOM. 167 */ 168 major_version = 1; 169 service_ldom = 0; 170 } 171 172 lhp->freep(listp, sizeof (mde_cookie_t) * 173 num_nodes); 174 (void) md_fini(mdp); 175 } 176 177 lhp->freep(bufp, bufsiz); 178 } 179 } else { 180 rc = 1; 181 } 182 183 (void) pthread_mutex_lock(&mt); 184 lhp->major_version = major_version; 185 lhp->service_ldom = service_ldom; 186 busy_init = 0; 187 (void) pthread_mutex_unlock(&mt); 188 189 (void) pthread_cond_broadcast(&cv); 190 191 return (rc); 192 } 193 194 195 /* 196 * search the machine description for a "pid" entry (physical cpuid) and 197 * return the corresponding "id" entry (virtual cpuid) 198 */ 199 static processorid_t 200 cpu_phys2virt(ldom_hdl_t *lhp, uint32_t cpuid) 201 { 202 char isa[MAXNAMELEN]; 203 md_t *mdp; 204 mde_cookie_t *listp; 205 ssize_t bufsize; 206 processorid_t vid; 207 uint64_t *bufp; 208 uint64_t pval; 209 int num_nodes, ncpus, i; 210 211 (void) sysinfo(SI_ARCHITECTURE, isa, MAXNAMELEN); 212 213 if (strcmp(isa, "sun4v") != 0) 214 return ((processorid_t)cpuid); 215 216 /* 217 * convert the physical cpuid to a virtual cpuid 218 */ 219 if ((bufsize = ldom_get_core_md(lhp, &bufp)) < 1) 220 return (-1); 221 222 if ((mdp = md_init_intern(bufp, lhp->allocp, lhp->freep)) == NULL || 223 (num_nodes = md_node_count(mdp)) < 1) { 224 lhp->freep(bufp, bufsize); 225 return (-1); 226 } 227 228 listp = (mde_cookie_t *)lhp->allocp(sizeof (mde_cookie_t) * num_nodes); 229 ncpus = md_scan_dag(mdp, MDE_INVAL_ELEM_COOKIE, 230 md_find_name(mdp, "cpu"), 231 md_find_name(mdp, "fwd"), listp); 232 233 vid = -1; 234 for (i = 0; i < ncpus; i++) { 235 if (md_get_prop_val(mdp, listp[i], "pid", &pval) >= 0 && 236 pval == (uint64_t)cpuid) { 237 if (md_get_prop_val(mdp, listp[i], "id", &pval) >= 0) 238 vid = (processorid_t)pval; 239 240 break; 241 } 242 } 243 244 lhp->freep(listp, sizeof (mde_cookie_t) * num_nodes); 245 (void) md_fini(mdp); 246 lhp->freep(bufp, bufsize); 247 248 return (vid); 249 } 250 251 /* 252 * if checking for status of a retired page: 253 * 0 - page is retired 254 * EAGAIN - page is scheduled for retirement 255 * EIO - page not scheduled for retirement 256 * EINVAL - error 257 * 258 * if retiring a page: 259 * 0 - success in retiring page 260 * EIO - page is already retired 261 * EAGAIN - page is scheduled for retirement 262 * EINVAL - error 263 * 264 * the original decoder for ioctl() return values is 265 * http://fma.eng/documents/engineering/cpumem/page_retire_api.txt 266 */ 267 static int 268 os_mem_page_retire(ldom_hdl_t *lhp, int cmd, nvlist_t *nvl) 269 { 270 mem_page_t mpage; 271 char *fmribuf; 272 size_t fmrisz; 273 int fd, rc, err; 274 275 if (cmd != MEM_PAGE_RETIRE && cmd != MEM_PAGE_FMRI_RETIRE && 276 cmd != MEM_PAGE_ISRETIRED && cmd != MEM_PAGE_FMRI_ISRETIRED) 277 return (EINVAL); 278 279 if ((fd = open("/dev/mem", O_RDONLY)) < 0) 280 return (EINVAL); 281 282 if ((errno = nvlist_size(nvl, &fmrisz, NV_ENCODE_NATIVE)) != 0 || 283 fmrisz > MEM_FMRI_MAX_BUFSIZE || 284 (fmribuf = lhp->allocp(fmrisz)) == NULL) { 285 (void) close(fd); 286 return (EINVAL); 287 } 288 289 if ((errno = nvlist_pack(nvl, &fmribuf, &fmrisz, 290 NV_ENCODE_NATIVE, 0)) != 0) { 291 lhp->freep(fmribuf, fmrisz); 292 (void) close(fd); 293 return (EINVAL); 294 } 295 296 mpage.m_fmri = fmribuf; 297 mpage.m_fmrisz = fmrisz; 298 299 rc = ioctl(fd, cmd, &mpage); 300 err = errno; 301 302 lhp->freep(fmribuf, fmrisz); 303 (void) close(fd); 304 305 if (rc < 0) { 306 rc = err; 307 } 308 309 return (rc); 310 } 311 312 int 313 ldom_fmri_status(ldom_hdl_t *lhp, nvlist_t *nvl) 314 { 315 char *name; 316 int ret; 317 318 if (nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &name) != 0) 319 return (EINVAL); 320 321 switch (ldom_major_version(lhp)) { 322 case 0: 323 /* 324 * version == 0 means LDOMS support is not available 325 */ 326 if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) { 327 processorid_t vid; 328 uint32_t cpuid; 329 330 if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_ID, 331 &cpuid) == 0 && 332 (vid = cpu_phys2virt(lhp, cpuid)) != -1) 333 return (p_online(vid, P_STATUS)); 334 } else if (strcmp(name, FM_FMRI_SCHEME_MEM) == 0) { 335 return (os_mem_page_retire(lhp, 336 MEM_PAGE_FMRI_ISRETIRED, nvl)); 337 } 338 339 return (EINVAL); 340 /*NOTREACHED*/ 341 break; 342 case 1: 343 /* LDOMS 1.0 */ 344 if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) { 345 uint32_t cpuid; 346 347 if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_ID, 348 &cpuid) == 0) 349 ret = ldmsvcs_cpu_req_status(lhp, cpuid); 350 } else if (strcmp(name, FM_FMRI_SCHEME_MEM) == 0) { 351 uint64_t pa; 352 353 if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR, 354 &pa) == 0) 355 ret = ldmsvcs_mem_req_status(lhp, pa); 356 else 357 ret = EINVAL; 358 } else { 359 ret = ENOTSUP; 360 } 361 return (ret); 362 363 /*NOTREACHED*/ 364 break; 365 default: 366 break; 367 } 368 369 return (ENOTSUP); 370 } 371 372 373 int 374 ldom_fmri_retire(ldom_hdl_t *lhp, nvlist_t *nvl) 375 { 376 char *name; 377 int ret; 378 379 if (nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &name) != 0) 380 return (EINVAL); 381 382 switch (ldom_major_version(lhp)) { 383 case 0: 384 /* 385 * version == 0 means LDOMS support is not available 386 */ 387 if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) { 388 processorid_t vid; 389 uint32_t cpuid; 390 391 if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_ID, 392 &cpuid) == 0 && 393 (vid = cpu_phys2virt(lhp, cpuid)) != -1) 394 return (p_online(vid, P_FAULTED)); 395 } else if (strcmp(name, FM_FMRI_SCHEME_MEM) == 0) { 396 return (os_mem_page_retire(lhp, 397 MEM_PAGE_FMRI_RETIRE, nvl)); 398 } 399 400 return (EINVAL); 401 /*NOTREACHED*/ 402 break; 403 case 1: 404 /* LDOMS 1.0 */ 405 if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) { 406 uint32_t cpuid; 407 408 if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_ID, 409 &cpuid) == 0) 410 ret = ldmsvcs_cpu_req_offline(lhp, cpuid); 411 } else if (strcmp(name, FM_FMRI_SCHEME_MEM) == 0) { 412 uint64_t pa; 413 414 if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR, 415 &pa) == 0) 416 ret = ldmsvcs_mem_req_retire(lhp, pa); 417 else 418 ret = EINVAL; 419 } else { 420 ret = ENOTSUP; 421 } 422 return (ret); 423 424 /*NOTREACHED*/ 425 break; 426 default: 427 break; 428 } 429 430 return (ENOTSUP); 431 } 432 433 434 /* 435 * blacklist cpus in a non-LDOMS environment 436 */ 437 int 438 ldom_fmri_blacklist(ldom_hdl_t *lhp, nvlist_t *nvl) 439 { 440 char *name; 441 442 if (ldom_major_version(lhp) != 0) 443 return (0); 444 445 if (nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &name) != 0) 446 return (EINVAL); 447 448 if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) { 449 bl_req_t blr; 450 char *class; 451 int fd, rc, err; 452 453 if ((nvlist_lookup_string(nvl, FM_CLASS, &class) != 0) || 454 (class == NULL) || (*class == '\0')) 455 return (EINVAL); 456 457 if ((fd = open("/dev/bl", O_RDONLY)) < 0) 458 return (EIO); 459 460 if (nvlist_size(nvl, &blr.bl_fmrisz, NV_ENCODE_NATIVE) != 0 || 461 blr.bl_fmrisz == 0 || 462 (blr.bl_fmri = (caddr_t)lhp->allocp(blr.bl_fmrisz)) == 463 NULL) { 464 (void) close(fd); 465 return (EINVAL); 466 } 467 468 blr.bl_class = class; 469 470 rc = ioctl(fd, BLIOC_INSERT, &blr); 471 err = errno; 472 473 lhp->freep((void *)&blr.bl_fmri, blr.bl_fmrisz); 474 (void) close(fd); 475 476 if (rc < 0 && err != ENOTSUP) { 477 errno = err; 478 return (-1); 479 } 480 } 481 482 return (0); 483 } 484 485 486 ssize_t 487 ldom_get_core_md(ldom_hdl_t *lhp, uint64_t **buf) 488 { 489 switch (ldom_major_version(lhp)) { 490 case 0: 491 return (get_local_core_md(lhp, buf)); 492 /*NOTREACHED*/ 493 break; 494 case 1: 495 /* LDOMS 1.0 */ 496 if (ldom_on_service(lhp) == 1) 497 return (ldmsvcs_get_core_md(lhp, buf)); 498 else 499 return (get_local_core_md(lhp, buf)); 500 501 /*NOTREACHED*/ 502 break; 503 default: 504 *buf = NULL; 505 break; 506 } 507 508 return (-1); 509 } 510 511 512 /* 513 * version 0 means no LDOMS 514 */ 515 int 516 ldom_major_version(ldom_hdl_t *lhp) 517 { 518 if (lhp == NULL) 519 return (-1); 520 521 if (ldom_getinfo(lhp) == 0) 522 return (lhp->major_version); 523 else 524 return (0); 525 } 526 527 /* 528 * in the absence of ldoms we are on a single OS instance which is the 529 * equivalent of the service ldom 530 */ 531 int 532 ldom_on_service(ldom_hdl_t *lhp) 533 { 534 if (lhp == NULL) 535 return (-1); 536 537 if (ldom_getinfo(lhp) == 0) 538 return (lhp->service_ldom); 539 else 540 return (1); 541 } 542 543 544 ldom_hdl_t * 545 ldom_init(void *(*allocp)(size_t size), 546 void (*freep)(void *addr, size_t size)) 547 { 548 struct ldom_hdl *lhp; 549 550 if ((lhp = allocp(sizeof (struct ldom_hdl))) == NULL) 551 return (NULL); 552 553 lhp->major_version = -1; /* version not yet determined */ 554 lhp->allocp = allocp; 555 lhp->freep = freep; 556 557 ldmsvcs_init(lhp); 558 559 return (lhp); 560 } 561 562 563 void 564 ldom_fini(ldom_hdl_t *lhp) 565 { 566 if (lhp == NULL) 567 return; 568 569 ldmsvcs_fini(lhp); 570 lhp->freep(lhp, sizeof (struct ldom_hdl)); 571 } 572 573 /* end file */ 574