1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <stdio.h> 29 #include <unistd.h> 30 #include <stdlib.h> 31 #include <string.h> 32 #include <fcntl.h> 33 #include <pthread.h> 34 #include <errno.h> 35 #include <libnvpair.h> 36 37 #include <sys/processor.h> 38 #include <sys/stat.h> 39 #include <sys/mdesc.h> 40 #include <sys/param.h> 41 #include <sys/systeminfo.h> 42 #include <sys/mem.h> 43 #include <sys/bl.h> 44 #include <sys/fm/protocol.h> 45 #include <fm/fmd_fmri.h> 46 47 #include "ldom.h" 48 #include "ldmsvcs_utils.h" 49 50 51 static ssize_t 52 get_local_core_md(ldom_hdl_t *lhp, uint64_t **buf) 53 { 54 int fh; 55 size_t size; 56 uint64_t *bufp; 57 58 if ((fh = open("/devices/pseudo/mdesc@0:mdesc", O_RDONLY, 0)) < 0) 59 return (-1); 60 61 if (ioctl(fh, MDESCIOCGSZ, &size) < 0) { 62 (void) close(fh); 63 return (-1); 64 } 65 66 bufp = (uint64_t *)lhp->allocp(size); 67 68 if (read(fh, bufp, size) < 0) { 69 lhp->freep(bufp, size); 70 (void) close(fh); 71 return (-1); 72 } 73 (void) close(fh); 74 75 *buf = bufp; 76 77 return ((ssize_t)size); 78 } 79 80 81 static int 82 ldom_getinfo(struct ldom_hdl *lhp) 83 { 84 static pthread_mutex_t mt = PTHREAD_MUTEX_INITIALIZER; 85 static pthread_cond_t cv = PTHREAD_COND_INITIALIZER; 86 static int major_version = -1; 87 static int service_ldom = -1; 88 static int busy_init = 0; 89 90 int ier, rc = 0; 91 92 (void) pthread_mutex_lock(&mt); 93 94 while (busy_init == 1) 95 (void) pthread_cond_wait(&cv, &mt); 96 97 if (major_version != -1 && service_ldom != -1) { 98 lhp->major_version = major_version; 99 lhp->service_ldom = service_ldom; 100 (void) pthread_mutex_unlock(&mt); 101 return (0); 102 } 103 104 /* 105 * get to this point if major_version and service_ldom have not yet 106 * been determined 107 */ 108 busy_init = 1; 109 (void) pthread_mutex_unlock(&mt); 110 111 /* 112 * set defaults which correspond to the case of "LDOMS not 113 * available". note that these can (and will) also apply to 114 * non-sun4v machines. 115 */ 116 major_version = 0; 117 service_ldom = 1; 118 119 /* figure out version */ 120 if ((ier = ldmsvcs_check_channel()) == 0) { 121 /* 122 * get into this block if vldc exists. LDOMS is available 123 * and we are on the service LDOM. 124 */ 125 major_version = 1; 126 service_ldom = 1; 127 } else if (ier == 1) { 128 /* 129 * get into this block if vldc does not exist 130 * 131 * if we do not get into the following if() block [i.e., 132 * if (bufsiz <= 0)] then we are on a non-sun4v machine. 133 */ 134 uint64_t *bufp; 135 ssize_t bufsiz; 136 137 if ((bufsiz = get_local_core_md(lhp, &bufp)) > 0) { 138 md_t *mdp; 139 140 if ((mdp = md_init_intern(bufp, lhp->allocp, 141 lhp->freep)) != NULL) { 142 mde_cookie_t *listp; 143 uint64_t dval; 144 int num_nodes; 145 146 num_nodes = md_node_count(mdp); 147 listp = lhp->allocp(sizeof (mde_cookie_t) * 148 num_nodes); 149 150 /* 151 * if we do not enter the following if block, 152 * we conclude that LDOMS is not available 153 */ 154 if (md_scan_dag(mdp, MDE_INVAL_ELEM_COOKIE, 155 md_find_name(mdp, "platform"), 156 md_find_name(mdp, "fwd"), 157 listp) > 0 && 158 md_get_prop_val(mdp, listp[0], 159 "domaining-enabled", &dval) >= 0 && 160 dval == 1) { 161 /* 162 * LDOMS is available. an earlier 163 * block detected the situation of 164 * being on a service LDOM, so 165 * we get to this point only if we 166 * are not on a service LDOM. 167 */ 168 major_version = 1; 169 service_ldom = 0; 170 } 171 172 lhp->freep(listp, sizeof (mde_cookie_t) * 173 num_nodes); 174 (void) md_fini(mdp); 175 } 176 177 lhp->freep(bufp, bufsiz); 178 } 179 } else { 180 rc = 1; 181 } 182 183 (void) pthread_mutex_lock(&mt); 184 lhp->major_version = major_version; 185 lhp->service_ldom = service_ldom; 186 busy_init = 0; 187 (void) pthread_mutex_unlock(&mt); 188 189 (void) pthread_cond_broadcast(&cv); 190 191 return (rc); 192 } 193 194 195 /* 196 * search the machine description for a "pid" entry (physical cpuid) and 197 * return the corresponding "id" entry (virtual cpuid) 198 */ 199 static processorid_t 200 cpu_phys2virt(ldom_hdl_t *lhp, uint32_t cpuid) 201 { 202 char isa[MAXNAMELEN]; 203 md_t *mdp; 204 mde_cookie_t *listp; 205 ssize_t bufsize; 206 processorid_t vid; 207 uint64_t *bufp; 208 uint64_t pval; 209 int num_nodes, ncpus, i; 210 211 (void) sysinfo(SI_ARCHITECTURE, isa, MAXNAMELEN); 212 213 if (strcmp(isa, "sun4v") != 0) 214 return ((processorid_t)cpuid); 215 216 /* 217 * convert the physical cpuid to a virtual cpuid 218 */ 219 if ((bufsize = ldom_get_core_md(lhp, &bufp)) < 1) 220 return (-1); 221 222 if ((mdp = md_init_intern(bufp, lhp->allocp, lhp->freep)) == NULL || 223 (num_nodes = md_node_count(mdp)) < 1) { 224 lhp->freep(bufp, bufsize); 225 return (-1); 226 } 227 228 listp = (mde_cookie_t *)lhp->allocp(sizeof (mde_cookie_t) * num_nodes); 229 ncpus = md_scan_dag(mdp, MDE_INVAL_ELEM_COOKIE, 230 md_find_name(mdp, "cpu"), 231 md_find_name(mdp, "fwd"), listp); 232 233 vid = -1; 234 for (i = 0; i < ncpus; i++) { 235 if (md_get_prop_val(mdp, listp[i], "pid", &pval) >= 0 && 236 pval == (uint64_t)cpuid) { 237 if (md_get_prop_val(mdp, listp[i], "id", &pval) >= 0) 238 vid = (processorid_t)pval; 239 240 break; 241 } 242 } 243 244 lhp->freep(listp, sizeof (mde_cookie_t) * num_nodes); 245 (void) md_fini(mdp); 246 lhp->freep(bufp, bufsize); 247 248 return (vid); 249 } 250 251 /* 252 * if checking for status of a retired page: 253 * 0 - page is retired 254 * EAGAIN - page is scheduled for retirement 255 * EIO - page not scheduled for retirement 256 * EINVAL - error 257 * 258 * if retiring a page: 259 * 0 - success in retiring page 260 * EIO - page is already retired 261 * EAGAIN - page is scheduled for retirement 262 * EINVAL - error 263 * 264 * the original decoder for ioctl() return values is 265 * http://fma.eng/documents/engineering/cpumem/page_retire_api.txt 266 */ 267 static int 268 os_mem_page_retire(ldom_hdl_t *lhp, int cmd, nvlist_t *nvl) 269 { 270 mem_page_t mpage; 271 char *fmribuf; 272 size_t fmrisz; 273 int fd, rc; 274 275 if ((fd = open("/dev/mem", O_RDONLY)) < 0) 276 return (EINVAL); 277 278 if ((errno = nvlist_size(nvl, &fmrisz, NV_ENCODE_NATIVE)) != 0 || 279 fmrisz > MEM_FMRI_MAX_BUFSIZE || 280 (fmribuf = lhp->allocp(fmrisz)) == NULL) { 281 (void) close(fd); 282 return (EINVAL); 283 } 284 285 if ((errno = nvlist_pack(nvl, &fmribuf, &fmrisz, 286 NV_ENCODE_NATIVE, 0)) != 0) { 287 lhp->freep(fmribuf, fmrisz); 288 (void) close(fd); 289 return (EINVAL); 290 } 291 292 mpage.m_fmri = fmribuf; 293 mpage.m_fmrisz = fmrisz; 294 295 rc = ioctl(fd, cmd, &mpage); 296 lhp->freep(fmribuf, fmrisz); 297 (void) close(fd); 298 299 if (rc < 0) 300 return (EINVAL); 301 302 if ((cmd == MEM_PAGE_RETIRE || cmd == MEM_PAGE_FMRI_RETIRE || 303 cmd == MEM_PAGE_ISRETIRED || cmd == MEM_PAGE_FMRI_ISRETIRED) && 304 (rc == 0 || rc == EIO || rc == EAGAIN)) 305 return (rc); 306 307 return (EINVAL); 308 } 309 310 int 311 ldom_fmri_status(ldom_hdl_t *lhp, nvlist_t *nvl) 312 { 313 char *name; 314 int ret; 315 316 if (nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &name) != 0) 317 return (EINVAL); 318 319 switch (ldom_major_version(lhp)) { 320 case 0: 321 /* 322 * version == 0 means LDOMS support is not available 323 */ 324 if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) { 325 processorid_t vid; 326 uint32_t cpuid; 327 328 if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_ID, 329 &cpuid) == 0 && 330 (vid = cpu_phys2virt(lhp, cpuid)) != -1) 331 return (p_online(vid, P_STATUS)); 332 } else if (strcmp(name, FM_FMRI_SCHEME_MEM) == 0) { 333 return (os_mem_page_retire(lhp, 334 MEM_PAGE_FMRI_ISRETIRED, nvl)); 335 } 336 337 return (EINVAL); 338 /*NOTREACHED*/ 339 break; 340 case 1: 341 /* LDOMS 1.0 */ 342 if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) { 343 uint32_t cpuid; 344 345 if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_ID, 346 &cpuid) == 0) 347 ret = ldmsvcs_cpu_req_status(lhp, cpuid); 348 } else if (strcmp(name, FM_FMRI_SCHEME_MEM) == 0) { 349 uint64_t pa; 350 351 if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR, 352 &pa) == 0) 353 ret = ldmsvcs_mem_req_status(lhp, pa); 354 else 355 ret = EINVAL; 356 } else { 357 ret = ENOTSUP; 358 } 359 return (ret); 360 361 /*NOTREACHED*/ 362 break; 363 default: 364 break; 365 } 366 367 return (ENOTSUP); 368 } 369 370 371 int 372 ldom_fmri_retire(ldom_hdl_t *lhp, nvlist_t *nvl) 373 { 374 char *name; 375 int ret; 376 377 if (nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &name) != 0) 378 return (EINVAL); 379 380 switch (ldom_major_version(lhp)) { 381 case 0: 382 /* 383 * version == 0 means LDOMS support is not available 384 */ 385 if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) { 386 processorid_t vid; 387 uint32_t cpuid; 388 389 if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_ID, 390 &cpuid) == 0 && 391 (vid = cpu_phys2virt(lhp, cpuid)) != -1) 392 return (p_online(vid, P_FAULTED)); 393 } else if (strcmp(name, FM_FMRI_SCHEME_MEM) == 0) { 394 return (os_mem_page_retire(lhp, 395 MEM_PAGE_FMRI_RETIRE, nvl)); 396 } 397 398 return (EINVAL); 399 /*NOTREACHED*/ 400 break; 401 case 1: 402 /* LDOMS 1.0 */ 403 if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) { 404 uint32_t cpuid; 405 406 if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_ID, 407 &cpuid) == 0) 408 ret = ldmsvcs_cpu_req_offline(lhp, cpuid); 409 } else if (strcmp(name, FM_FMRI_SCHEME_MEM) == 0) { 410 uint64_t pa; 411 412 if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR, 413 &pa) == 0) 414 ret = ldmsvcs_mem_req_retire(lhp, pa); 415 else 416 ret = EINVAL; 417 } else { 418 ret = ENOTSUP; 419 } 420 return (ret); 421 422 /*NOTREACHED*/ 423 break; 424 default: 425 break; 426 } 427 428 return (ENOTSUP); 429 } 430 431 432 /* 433 * blacklist cpus in a non-LDOMS environment 434 */ 435 int 436 ldom_fmri_blacklist(ldom_hdl_t *lhp, nvlist_t *nvl) 437 { 438 char *name; 439 440 if (ldom_major_version(lhp) != 0) 441 return (0); 442 443 if (nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &name) != 0) 444 return (EINVAL); 445 446 if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) { 447 bl_req_t blr; 448 char *class; 449 int fd, rc, err; 450 451 if ((nvlist_lookup_string(nvl, FM_CLASS, &class) != 0) || 452 (class == NULL) || (*class == '\0')) 453 return (EINVAL); 454 455 if ((fd = open("/dev/bl", O_RDONLY)) < 0) 456 return (EIO); 457 458 if (nvlist_size(nvl, &blr.bl_fmrisz, NV_ENCODE_NATIVE) != 0 || 459 blr.bl_fmrisz == 0 || 460 (blr.bl_fmri = (caddr_t)lhp->allocp(blr.bl_fmrisz)) == 461 NULL) { 462 (void) close(fd); 463 return (EINVAL); 464 } 465 466 blr.bl_class = class; 467 468 rc = ioctl(fd, BLIOC_INSERT, &blr); 469 err = errno; 470 471 lhp->freep((void *)&blr.bl_fmri, blr.bl_fmrisz); 472 (void) close(fd); 473 474 if (rc < 0 && err != ENOTSUP) { 475 errno = err; 476 return (-1); 477 } 478 } 479 480 return (0); 481 } 482 483 484 ssize_t 485 ldom_get_core_md(ldom_hdl_t *lhp, uint64_t **buf) 486 { 487 switch (ldom_major_version(lhp)) { 488 case 0: 489 return (get_local_core_md(lhp, buf)); 490 /*NOTREACHED*/ 491 break; 492 case 1: 493 /* LDOMS 1.0 */ 494 if (ldom_on_service(lhp) == 1) 495 return (ldmsvcs_get_core_md(lhp, buf)); 496 else 497 return (get_local_core_md(lhp, buf)); 498 499 /*NOTREACHED*/ 500 break; 501 default: 502 *buf = NULL; 503 break; 504 } 505 506 return (-1); 507 } 508 509 510 /* 511 * version 0 means no LDOMS 512 */ 513 int 514 ldom_major_version(ldom_hdl_t *lhp) 515 { 516 if (lhp == NULL) 517 return (-1); 518 519 if (ldom_getinfo(lhp) == 0) 520 return (lhp->major_version); 521 else 522 return (0); 523 } 524 525 /* 526 * in the absence of ldoms we are on a single OS instance which is the 527 * equivalent of the service ldom 528 */ 529 int 530 ldom_on_service(ldom_hdl_t *lhp) 531 { 532 if (lhp == NULL) 533 return (-1); 534 535 if (ldom_getinfo(lhp) == 0) 536 return (lhp->service_ldom); 537 else 538 return (1); 539 } 540 541 542 ldom_hdl_t * 543 ldom_init(void *(*allocp)(size_t size), 544 void (*freep)(void *addr, size_t size)) 545 { 546 struct ldom_hdl *lhp; 547 548 if ((lhp = allocp(sizeof (struct ldom_hdl))) == NULL) 549 return (NULL); 550 551 lhp->major_version = -1; /* version not yet determined */ 552 lhp->allocp = allocp; 553 lhp->freep = freep; 554 555 ldmsvcs_init(lhp); 556 557 return (lhp); 558 } 559 560 561 void 562 ldom_fini(ldom_hdl_t *lhp) 563 { 564 if (lhp == NULL) 565 return; 566 567 ldmsvcs_fini(lhp); 568 lhp->freep(lhp, sizeof (struct ldom_hdl)); 569 } 570 571 /* end file */ 572