1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/pool.h> 30 #include <sys/pool_impl.h> 31 #include <sys/pool_pset.h> 32 #include <sys/cpuvar.h> 33 #include <sys/cpupart.h> 34 #include <sys/mutex.h> 35 #include <sys/errno.h> 36 #include <sys/systm.h> 37 #include <sys/cmn_err.h> 38 #include <sys/fss.h> 39 #include <sys/exacct.h> 40 #include <sys/time.h> 41 #include <sys/policy.h> 42 #include <sys/class.h> 43 #include <sys/list.h> 44 #include <sys/cred.h> 45 #include <sys/zone.h> 46 47 /* 48 * Processor set plugin for pools. 49 * 50 * This file contains various routines used by the common pools layer to create, 51 * modify, and destroy processor sets. All processor sets created by this 52 * plug-in are stored in the pool_pset_list doubly-linked list, which is 53 * guaranteed to always have an entry for the default processor set, 54 * pool_pset_default. 55 * 56 * Interaction with zones: 57 * 58 * If pools are enabled, non-global zones only have visibility into the 59 * pset of the pool to which they are bound. This is accomplished by 60 * changing the set of processors and processor sets which are visible 61 * through both systemcall interfaces and system kstats. 62 * 63 * To avoid grabbing pool_lock() during cpu change operations, we cache 64 * the pset the zone is currently bound to, and can read this value 65 * while under cpu_lock. The special psetid_t token ZONE_PS_INVAL means 66 * that pools are disabled, and provides a mechanism for determining if the 67 * status of pools without grabbing pool_lock(). 68 * 69 * To avoid grabbing any locks to determine the instantaneous value of 70 * the number of configured and online cpus in the zone, we also cache 71 * these values in a zone_t. If these values are zero, the pools 72 * facility must be disabled, in which case relevant systemcall 73 * interfaces will return the values for the system as a whole. 74 * 75 * The various kstat interfaces are dealt with as follows: if pools are 76 * disabled all cpu-related kstats should be exported to all zones. 77 * When pools are enabled we begin maintaining a list of "permitted 78 * zones" on a per-kstat basis. There are various hooks throughout the 79 * code to update this list when certain pools- or cpu-related events 80 * occur. 81 */ 82 83 static list_t pool_pset_list; /* doubly-linked list of psets */ 84 pool_pset_t *pool_pset_default; /* default pset */ 85 hrtime_t pool_pset_mod; /* last modification time for psets */ 86 hrtime_t pool_cpu_mod; /* last modification time for CPUs */ 87 88 static pool_pset_t * 89 pool_lookup_pset_by_id(psetid_t psetid) 90 { 91 pool_pset_t *pset = pool_pset_default; 92 93 ASSERT(pool_lock_held()); 94 95 for (pset = list_head(&pool_pset_list); pset; 96 pset = list_next(&pool_pset_list, pset)) { 97 if (pset->pset_id == psetid) 98 return (pset); 99 } 100 return (NULL); 101 } 102 103 struct setup_arg { 104 psetid_t psetid; 105 cpu_t *cpu; 106 cpu_setup_t what; 107 }; 108 109 /* 110 * Callback function used to apply a cpu configuration event to a zone. 111 */ 112 static int 113 pool_pset_setup_cb(zone_t *zone, void *arg) 114 { 115 struct setup_arg *sa = arg; 116 117 ASSERT(MUTEX_HELD(&cpu_lock)); 118 ASSERT(INGLOBALZONE(curproc)); 119 ASSERT(zone != NULL); 120 121 if (zone == global_zone) 122 return (0); 123 if (zone_pset_get(zone) != sa->psetid) 124 return (0); /* ignore */ 125 switch (sa->what) { 126 case CPU_CONFIG: 127 cpu_visibility_configure(sa->cpu, zone); 128 break; 129 case CPU_UNCONFIG: 130 cpu_visibility_unconfigure(sa->cpu, zone); 131 break; 132 case CPU_ON: 133 cpu_visibility_online(sa->cpu, zone); 134 break; 135 case CPU_OFF: 136 cpu_visibility_offline(sa->cpu, zone); 137 break; 138 case CPU_CPUPART_IN: 139 cpu_visibility_add(sa->cpu, zone); 140 break; 141 case CPU_CPUPART_OUT: 142 cpu_visibility_remove(sa->cpu, zone); 143 break; 144 default: 145 cmn_err(CE_PANIC, "invalid cpu_setup_t value %d", sa->what); 146 } 147 return (0); 148 } 149 150 /* 151 * Callback function to be executed when a noteworthy cpu event takes 152 * place. Will ensure that the event is reflected by the zones which 153 * were affected by it. 154 */ 155 /* ARGSUSED */ 156 static int 157 pool_pset_cpu_setup(cpu_setup_t what, int id, void *arg) 158 { 159 processorid_t cpuid = id; 160 struct setup_arg sarg; 161 int error; 162 cpu_t *c; 163 164 ASSERT(MUTEX_HELD(&cpu_lock)); 165 ASSERT(INGLOBALZONE(curproc)); 166 167 if (!pool_pset_enabled()) 168 return (0); 169 if (what != CPU_CONFIG && what != CPU_UNCONFIG && 170 what != CPU_ON && what != CPU_OFF && 171 what != CPU_CPUPART_IN && what != CPU_CPUPART_OUT) 172 return (0); 173 c = cpu_get(cpuid); 174 ASSERT(c != NULL); 175 sarg.psetid = cpupart_query_cpu(c); 176 sarg.cpu = c; 177 sarg.what = what; 178 179 error = zone_walk(pool_pset_setup_cb, &sarg); 180 ASSERT(error == 0); 181 return (0); 182 } 183 184 /* 185 * Initialize processor set plugin. Called once at boot time. 186 */ 187 void 188 pool_pset_init(void) 189 { 190 ASSERT(pool_pset_default == NULL); 191 pool_pset_default = kmem_zalloc(sizeof (pool_pset_t), KM_SLEEP); 192 pool_pset_default->pset_id = PS_NONE; 193 pool_pset_default->pset_npools = 1; /* for pool_default */ 194 pool_default->pool_pset = pool_pset_default; 195 list_create(&pool_pset_list, sizeof (pool_pset_t), 196 offsetof(pool_pset_t, pset_link)); 197 list_insert_head(&pool_pset_list, pool_pset_default); 198 mutex_enter(&cpu_lock); 199 register_cpu_setup_func(pool_pset_cpu_setup, NULL); 200 mutex_exit(&cpu_lock); 201 } 202 203 /* 204 * Dummy wrapper function that returns 0 to satisfy zone_walk(). 205 */ 206 static int 207 pool_pset_zone_pset_set(zone_t *zone, void *arg) 208 { 209 psetid_t psetid = (psetid_t)(uintptr_t)arg; 210 211 ASSERT(MUTEX_HELD(&cpu_lock)); 212 zone_pset_set(zone, psetid); 213 return (0); 214 } 215 216 /* 217 * Enable processor set plugin. 218 */ 219 int 220 pool_pset_enable(void) 221 { 222 int error; 223 nvlist_t *props; 224 225 ASSERT(pool_lock_held()); 226 ASSERT(INGLOBALZONE(curproc)); 227 /* 228 * Can't enable pools if there are existing cpu partitions. 229 */ 230 mutex_enter(&cpu_lock); 231 if (cp_numparts > 1) { 232 mutex_exit(&cpu_lock); 233 return (EEXIST); 234 } 235 236 /* 237 * We want to switch things such that everything that was tagged with 238 * the special ALL_ZONES token now is explicitly visible to all zones: 239 * first add individual zones to the visibility list then remove the 240 * special "ALL_ZONES" token. There must only be the default pset 241 * (PS_NONE) active if pools are being enabled, so we only need to 242 * deal with it. 243 * 244 * We want to make pool_pset_enabled() start returning B_TRUE before 245 * we call any of the visibility update functions. 246 */ 247 global_zone->zone_psetid = PS_NONE; 248 /* 249 * We need to explicitly handle the global zone since 250 * zone_pset_set() won't modify it. 251 */ 252 pool_pset_visibility_add(PS_NONE, global_zone); 253 /* 254 * A NULL argument means the ALL_ZONES token. 255 */ 256 pool_pset_visibility_remove(PS_NONE, NULL); 257 error = zone_walk(pool_pset_zone_pset_set, (void *)PS_NONE); 258 ASSERT(error == 0); 259 260 /* 261 * It is safe to drop cpu_lock here. We're still 262 * holding pool_lock so no new cpu partitions can 263 * be created while we're here. 264 */ 265 mutex_exit(&cpu_lock); 266 (void) nvlist_alloc(&pool_pset_default->pset_props, 267 NV_UNIQUE_NAME, KM_SLEEP); 268 props = pool_pset_default->pset_props; 269 (void) nvlist_add_string(props, "pset.name", "pset_default"); 270 (void) nvlist_add_string(props, "pset.comment", ""); 271 (void) nvlist_add_int64(props, "pset.sys_id", PS_NONE); 272 (void) nvlist_add_string(props, "pset.units", "population"); 273 (void) nvlist_add_byte(props, "pset.default", 1); 274 (void) nvlist_add_uint64(props, "pset.max", 65536); 275 (void) nvlist_add_uint64(props, "pset.min", 1); 276 pool_pset_mod = pool_cpu_mod = gethrtime(); 277 return (0); 278 } 279 280 /* 281 * Disable processor set plugin. 282 */ 283 int 284 pool_pset_disable(void) 285 { 286 processorid_t cpuid; 287 cpu_t *cpu; 288 int error; 289 290 ASSERT(pool_lock_held()); 291 ASSERT(INGLOBALZONE(curproc)); 292 293 mutex_enter(&cpu_lock); 294 if (cp_numparts > 1) { /* make sure only default pset is left */ 295 mutex_exit(&cpu_lock); 296 return (EBUSY); 297 } 298 /* 299 * Remove all non-system CPU and processor set properties 300 */ 301 for (cpuid = 0; cpuid < NCPU; cpuid++) { 302 if ((cpu = cpu_get(cpuid)) == NULL) 303 continue; 304 if (cpu->cpu_props != NULL) { 305 (void) nvlist_free(cpu->cpu_props); 306 cpu->cpu_props = NULL; 307 } 308 } 309 310 /* 311 * We want to switch things such that everything is now visible 312 * to ALL_ZONES: first add the special "ALL_ZONES" token to the 313 * visibility list then remove individual zones. There must 314 * only be the default pset active if pools are being disabled, 315 * so we only need to deal with it. 316 */ 317 error = zone_walk(pool_pset_zone_pset_set, (void *)ZONE_PS_INVAL); 318 ASSERT(error == 0); 319 pool_pset_visibility_add(PS_NONE, NULL); 320 pool_pset_visibility_remove(PS_NONE, global_zone); 321 /* 322 * pool_pset_enabled() will henceforth return B_FALSE. 323 */ 324 global_zone->zone_psetid = ZONE_PS_INVAL; 325 mutex_exit(&cpu_lock); 326 if (pool_pset_default->pset_props != NULL) { 327 nvlist_free(pool_pset_default->pset_props); 328 pool_pset_default->pset_props = NULL; 329 } 330 return (0); 331 } 332 333 /* 334 * Create new processor set and give it a temporary name. 335 */ 336 int 337 pool_pset_create(psetid_t *id) 338 { 339 char pset_name[40]; 340 pool_pset_t *pset; 341 psetid_t psetid; 342 int err; 343 344 ASSERT(pool_lock_held()); 345 if ((err = cpupart_create(&psetid)) != 0) 346 return (err); 347 pset = kmem_alloc(sizeof (pool_pset_t), KM_SLEEP); 348 pset->pset_id = *id = psetid; 349 pset->pset_npools = 0; 350 (void) nvlist_alloc(&pset->pset_props, NV_UNIQUE_NAME, KM_SLEEP); 351 (void) nvlist_add_int64(pset->pset_props, "pset.sys_id", psetid); 352 (void) nvlist_add_byte(pset->pset_props, "pset.default", 0); 353 pool_pset_mod = gethrtime(); 354 (void) snprintf(pset_name, sizeof (pset_name), "pset_%lld", 355 pool_pset_mod); 356 (void) nvlist_add_string(pset->pset_props, "pset.name", pset_name); 357 list_insert_tail(&pool_pset_list, pset); 358 return (0); 359 } 360 361 /* 362 * Destroy existing processor set. 363 */ 364 int 365 pool_pset_destroy(psetid_t psetid) 366 { 367 pool_pset_t *pset; 368 int ret; 369 370 ASSERT(pool_lock_held()); 371 372 if (psetid == PS_NONE) 373 return (EINVAL); 374 if ((pset = pool_lookup_pset_by_id(psetid)) == NULL) 375 return (ESRCH); 376 if (pset->pset_npools > 0) /* can't destroy associated psets */ 377 return (EBUSY); 378 if ((ret = cpupart_destroy(pset->pset_id)) != 0) 379 return (ret); 380 (void) nvlist_free(pset->pset_props); 381 list_remove(&pool_pset_list, pset); 382 pool_pset_mod = gethrtime(); 383 kmem_free(pset, sizeof (pool_pset_t)); 384 return (0); 385 } 386 387 /* 388 * Change the visibility of a pset (and all contained cpus) in a zone. 389 * A NULL zone argument implies the special ALL_ZONES token. 390 */ 391 static void 392 pool_pset_visibility_change(psetid_t psetid, zone_t *zone, boolean_t add) 393 { 394 zoneid_t zoneid = zone ? zone->zone_id : ALL_ZONES; 395 cpupart_t *cp; 396 cpu_t *c; 397 398 ASSERT(MUTEX_HELD(&cpu_lock)); 399 ASSERT(psetid != ZONE_PS_INVAL); 400 401 cp = cpupart_find(psetid); 402 ASSERT(cp != NULL); 403 if (cp->cp_kstat != NULL) { 404 if (add) 405 kstat_zone_add(cp->cp_kstat, zoneid); 406 else 407 kstat_zone_remove(cp->cp_kstat, zoneid); 408 } 409 410 c = cpu_list; 411 do { 412 ASSERT(c != NULL); 413 if (c->cpu_part == cp && !cpu_is_poweredoff(c)) { 414 if (add) 415 cpu_visibility_add(c, zone); 416 else 417 cpu_visibility_remove(c, zone); 418 } 419 } while ((c = c->cpu_next) != cpu_list); 420 } 421 422 /* 423 * Make the processor set visible to the zone. A NULL value for 424 * the zone means that the special ALL_ZONES token should be added to 425 * the visibility list. 426 */ 427 void 428 pool_pset_visibility_add(psetid_t psetid, zone_t *zone) 429 { 430 pool_pset_visibility_change(psetid, zone, B_TRUE); 431 } 432 433 /* 434 * Remove zone's visibility into the processor set. A NULL value for 435 * the zone means that the special ALL_ZONES token should be removed 436 * from the visibility list. 437 */ 438 void 439 pool_pset_visibility_remove(psetid_t psetid, zone_t *zone) 440 { 441 pool_pset_visibility_change(psetid, zone, B_FALSE); 442 } 443 444 /* 445 * Quick way of seeing if pools are enabled (as far as processor sets are 446 * concerned) without holding pool_lock(). 447 */ 448 boolean_t 449 pool_pset_enabled(void) 450 { 451 ASSERT(MUTEX_HELD(&cpu_lock)); 452 453 return (zone_pset_get(global_zone) != ZONE_PS_INVAL); 454 } 455 456 struct assoc_zone_arg { 457 poolid_t poolid; 458 psetid_t newpsetid; 459 }; 460 461 /* 462 * Callback function to update a zone's processor set visibility when 463 * a pool is associated with a processor set. 464 */ 465 static int 466 pool_pset_assoc_zone_cb(zone_t *zone, void *arg) 467 { 468 struct assoc_zone_arg *aza = arg; 469 pool_t *pool; 470 zoneid_t zoneid = zone->zone_id; 471 472 ASSERT(pool_lock_held()); 473 ASSERT(MUTEX_HELD(&cpu_lock)); 474 475 if (zoneid == GLOBAL_ZONEID) 476 return (0); 477 pool = zone_pool_get(zone); 478 if (pool->pool_id == aza->poolid) 479 zone_pset_set(zone, aza->newpsetid); 480 return (0); 481 } 482 483 /* 484 * Associate pool with new processor set. 485 */ 486 int 487 pool_pset_assoc(poolid_t poolid, psetid_t psetid) 488 { 489 pool_t *pool; 490 pool_pset_t *pset, *oldpset; 491 int err = 0; 492 493 ASSERT(pool_lock_held()); 494 495 if ((pool = pool_lookup_pool_by_id(poolid)) == NULL || 496 (pset = pool_lookup_pset_by_id(psetid)) == NULL) { 497 return (ESRCH); 498 } 499 if (pool->pool_pset->pset_id == psetid) { 500 /* 501 * Already associated. 502 */ 503 return (0); 504 } 505 506 /* 507 * Hang the new pset off the pool, and rebind all of the pool's 508 * processes to it. If pool_do_bind fails, all processes will remain 509 * bound to the old set. 510 */ 511 oldpset = pool->pool_pset; 512 pool->pool_pset = pset; 513 err = pool_do_bind(pool, P_POOLID, poolid, POOL_BIND_PSET); 514 if (err) { 515 pool->pool_pset = oldpset; 516 } else { 517 struct assoc_zone_arg azarg; 518 519 /* 520 * Update zones' visibility to reflect changes. 521 */ 522 azarg.poolid = poolid; 523 azarg.newpsetid = pset->pset_id; 524 mutex_enter(&cpu_lock); 525 err = zone_walk(pool_pset_assoc_zone_cb, &azarg); 526 ASSERT(err == 0); 527 mutex_exit(&cpu_lock); 528 529 oldpset->pset_npools--; 530 pset->pset_npools++; 531 } 532 return (err); 533 } 534 535 /* 536 * Transfer specified CPUs between processor sets. 537 */ 538 int 539 pool_pset_xtransfer(psetid_t src, psetid_t dst, size_t size, id_t *ids) 540 { 541 struct cpu *cpu; 542 int ret = 0; 543 int id; 544 545 ASSERT(pool_lock_held()); 546 ASSERT(INGLOBALZONE(curproc)); 547 548 if (size == 0 || size > max_ncpus) /* quick sanity check */ 549 return (EINVAL); 550 551 mutex_enter(&cpu_lock); 552 for (id = 0; id < size; id++) { 553 if ((cpu = cpu_get((processorid_t)ids[id])) == NULL || 554 cpupart_query_cpu(cpu) != src) { 555 ret = EINVAL; 556 break; 557 } 558 if ((ret = cpupart_attach_cpu(dst, cpu, 1)) != 0) 559 break; 560 } 561 mutex_exit(&cpu_lock); 562 if (ret == 0) 563 pool_pset_mod = gethrtime(); 564 return (ret); 565 } 566 567 /* 568 * Bind process to processor set. This should never fail because 569 * we should've done all preliminary checks before calling it. 570 */ 571 void 572 pool_pset_bind(proc_t *p, psetid_t psetid, void *projbuf, void *zonebuf) 573 { 574 kthread_t *t; 575 int ret; 576 577 ASSERT(pool_lock_held()); 578 ASSERT(MUTEX_HELD(&cpu_lock)); 579 ASSERT(MUTEX_HELD(&pidlock)); 580 ASSERT(MUTEX_HELD(&p->p_lock)); 581 582 if ((t = p->p_tlist) == NULL) 583 return; 584 do { 585 ret = cpupart_bind_thread(t, psetid, 0, projbuf, zonebuf); 586 ASSERT(ret == 0); 587 t->t_bind_pset = psetid; 588 } while ((t = t->t_forw) != p->p_tlist); 589 } 590 591 /* 592 * See the comment above pool_do_bind() for the semantics of the pset_bind_*() 593 * functions. These must be kept in sync with cpupart_move_thread, and 594 * anything else that could fail a pool_pset_bind. 595 * 596 * Returns non-zero errno on failure and zero on success. 597 * Iff successful, cpu_lock is held on return. 598 */ 599 int 600 pset_bind_start(proc_t **procs, pool_t *pool) 601 { 602 cred_t *pcred; 603 proc_t *p, **pp; 604 kthread_t *t; 605 cpupart_t *newpp; 606 int ret; 607 608 extern int cpupart_movable_thread(kthread_id_t, cpupart_t *, int); 609 610 ASSERT(pool_lock_held()); 611 ASSERT(INGLOBALZONE(curproc)); 612 613 mutex_enter(&cpu_lock); 614 weakbinding_stop(); 615 616 newpp = cpupart_find(pool->pool_pset->pset_id); 617 ASSERT(newpp != NULL); 618 if (newpp->cp_cpulist == NULL) { 619 weakbinding_start(); 620 mutex_exit(&cpu_lock); 621 return (ENOTSUP); 622 } 623 624 pcred = crgetcred(); 625 626 /* 627 * Check for the PRIV_PROC_PRIOCNTL privilege that is required 628 * to enter and exit scheduling classes. If other privileges 629 * are required by CL_ENTERCLASS/CL_CANEXIT types of routines 630 * in the future, this code will have to be updated. 631 */ 632 if (secpolicy_setpriority(pcred) != 0) { 633 weakbinding_start(); 634 mutex_exit(&cpu_lock); 635 crfree(pcred); 636 return (EPERM); 637 } 638 639 for (pp = procs; (p = *pp) != NULL; pp++) { 640 mutex_enter(&p->p_lock); 641 if ((t = p->p_tlist) == NULL) { 642 mutex_exit(&p->p_lock); 643 continue; 644 } 645 /* 646 * Check our basic permissions to control this process. 647 */ 648 if (!prochasprocperm(p, curproc, pcred)) { 649 mutex_exit(&p->p_lock); 650 weakbinding_start(); 651 mutex_exit(&cpu_lock); 652 crfree(pcred); 653 return (EPERM); 654 } 655 do { 656 /* 657 * Check that all threads can be moved to 658 * a new processor set. 659 */ 660 thread_lock(t); 661 ret = cpupart_movable_thread(t, newpp, 0); 662 thread_unlock(t); 663 if (ret != 0) { 664 mutex_exit(&p->p_lock); 665 weakbinding_start(); 666 mutex_exit(&cpu_lock); 667 crfree(pcred); 668 return (ret); 669 } 670 } while ((t = t->t_forw) != p->p_tlist); 671 mutex_exit(&p->p_lock); 672 } 673 crfree(pcred); 674 return (0); /* with cpu_lock held and weakbinding stopped */ 675 } 676 677 /*ARGSUSED*/ 678 void 679 pset_bind_abort(proc_t **procs, pool_t *pool) 680 { 681 mutex_exit(&cpu_lock); 682 } 683 684 void 685 pset_bind_finish(void) 686 { 687 weakbinding_start(); 688 mutex_exit(&cpu_lock); 689 } 690 691 static pool_property_t pool_pset_props[] = { 692 { "pset.name", DATA_TYPE_STRING, PP_RDWR }, 693 { "pset.comment", DATA_TYPE_STRING, PP_RDWR }, 694 { "pset.sys_id", DATA_TYPE_UINT64, PP_READ }, 695 { "pset.units", DATA_TYPE_STRING, PP_RDWR }, 696 { "pset.default", DATA_TYPE_BYTE, PP_READ }, 697 { "pset.min", DATA_TYPE_UINT64, PP_RDWR }, 698 { "pset.max", DATA_TYPE_UINT64, PP_RDWR }, 699 { "pset.size", DATA_TYPE_UINT64, PP_READ }, 700 { "pset.load", DATA_TYPE_UINT64, PP_READ }, 701 { "pset.poold.objectives", DATA_TYPE_STRING, 702 PP_RDWR | PP_OPTIONAL }, 703 { NULL, 0, 0 } 704 }; 705 706 static pool_property_t pool_cpu_props[] = { 707 { "cpu.sys_id", DATA_TYPE_UINT64, PP_READ }, 708 { "cpu.comment", DATA_TYPE_STRING, PP_RDWR }, 709 { "cpu.status", DATA_TYPE_STRING, PP_RDWR }, 710 { "cpu.pinned", DATA_TYPE_BYTE, 711 PP_RDWR | PP_OPTIONAL }, 712 { NULL, 0, 0 } 713 }; 714 715 /* 716 * Put property on the specified processor set. 717 */ 718 int 719 pool_pset_propput(psetid_t psetid, nvpair_t *pair) 720 { 721 pool_pset_t *pset; 722 int ret; 723 724 ASSERT(pool_lock_held()); 725 726 if ((pset = pool_lookup_pset_by_id(psetid)) == NULL) 727 return (ESRCH); 728 ret = pool_propput_common(pset->pset_props, pair, pool_pset_props); 729 if (ret == 0) 730 pool_pset_mod = gethrtime(); 731 return (ret); 732 } 733 734 /* 735 * Remove existing processor set property. 736 */ 737 int 738 pool_pset_proprm(psetid_t psetid, char *name) 739 { 740 pool_pset_t *pset; 741 int ret; 742 743 ASSERT(pool_lock_held()); 744 745 if ((pset = pool_lookup_pset_by_id(psetid)) == NULL) 746 return (EINVAL); 747 ret = pool_proprm_common(pset->pset_props, name, pool_pset_props); 748 if (ret == 0) 749 pool_pset_mod = gethrtime(); 750 return (ret); 751 } 752 753 /* 754 * Put new CPU property. 755 * Handle special case of "cpu.status". 756 */ 757 int 758 pool_cpu_propput(processorid_t cpuid, nvpair_t *pair) 759 { 760 int ret = 0; 761 cpu_t *cpu; 762 763 ASSERT(pool_lock_held()); 764 ASSERT(INGLOBALZONE(curproc)); 765 766 if (nvpair_type(pair) == DATA_TYPE_STRING && 767 strcmp(nvpair_name(pair), "cpu.status") == 0) { 768 char *val; 769 int status; 770 int old_status; 771 (void) nvpair_value_string(pair, &val); 772 if (strcmp(val, PS_OFFLINE) == 0) 773 status = P_OFFLINE; 774 else if (strcmp(val, PS_ONLINE) == 0) 775 status = P_ONLINE; 776 else if (strcmp(val, PS_NOINTR) == 0) 777 status = P_NOINTR; 778 else if (strcmp(val, PS_FAULTED) == 0) 779 status = P_FAULTED; 780 else if (strcmp(val, PS_SPARE) == 0) 781 status = P_SPARE; 782 else 783 return (EINVAL); 784 ret = p_online_internal(cpuid, status, &old_status); 785 } else { 786 mutex_enter(&cpu_lock); 787 if ((cpu = cpu_get(cpuid)) == NULL) 788 ret = EINVAL; 789 if (cpu->cpu_props == NULL) { 790 (void) nvlist_alloc(&cpu->cpu_props, 791 NV_UNIQUE_NAME, KM_SLEEP); 792 (void) nvlist_add_string(cpu->cpu_props, 793 "cpu.comment", ""); 794 } 795 ret = pool_propput_common(cpu->cpu_props, pair, pool_cpu_props); 796 if (ret == 0) 797 pool_cpu_mod = gethrtime(); 798 mutex_exit(&cpu_lock); 799 } 800 return (ret); 801 } 802 803 /* 804 * Remove existing CPU property. 805 */ 806 int 807 pool_cpu_proprm(processorid_t cpuid, char *name) 808 { 809 int ret; 810 cpu_t *cpu; 811 812 ASSERT(pool_lock_held()); 813 ASSERT(INGLOBALZONE(curproc)); 814 815 mutex_enter(&cpu_lock); 816 if ((cpu = cpu_get(cpuid)) == NULL || cpu_is_poweredoff(cpu)) { 817 ret = EINVAL; 818 } else { 819 if (cpu->cpu_props == NULL) 820 ret = EINVAL; 821 else 822 ret = pool_proprm_common(cpu->cpu_props, name, 823 pool_cpu_props); 824 } 825 if (ret == 0) 826 pool_cpu_mod = gethrtime(); 827 mutex_exit(&cpu_lock); 828 return (ret); 829 } 830 831 /* 832 * This macro returns load average multiplied by 1000 w/o losing precision 833 */ 834 #define PSET_LOAD(f) (((f >> 16) * 1000) + (((f & 0xffff) * 1000) / 0xffff)) 835 836 /* 837 * Take a snapshot of the current state of processor sets and CPUs, 838 * pack it in the exacct format, and attach it to specified exacct record. 839 */ 840 int 841 pool_pset_pack(ea_object_t *eo_system) 842 { 843 ea_object_t *eo_pset, *eo_cpu; 844 cpupart_t *cpupart; 845 psetid_t mypsetid; 846 pool_pset_t *pset; 847 nvlist_t *nvl; 848 size_t bufsz; 849 cpu_t *cpu; 850 char *buf; 851 int ncpu; 852 853 ASSERT(pool_lock_held()); 854 855 mutex_enter(&cpu_lock); 856 mypsetid = zone_pset_get(curproc->p_zone); 857 for (pset = list_head(&pool_pset_list); pset; 858 pset = list_next(&pool_pset_list, pset)) { 859 psetid_t psetid = pset->pset_id; 860 861 if (!INGLOBALZONE(curproc) && mypsetid != psetid) 862 continue; 863 cpupart = cpupart_find(psetid); 864 ASSERT(cpupart != NULL); 865 eo_pset = ea_alloc_group(EXT_GROUP | 866 EXC_LOCAL | EXD_GROUP_PSET); 867 (void) ea_attach_item(eo_pset, &psetid, sizeof (id_t), 868 EXC_LOCAL | EXD_PSET_PSETID | EXT_UINT32); 869 /* 870 * Pack info for all CPUs in this processor set. 871 */ 872 ncpu = 0; 873 cpu = cpu_list; 874 do { 875 if (cpu->cpu_part != cpupart) /* not our pset */ 876 continue; 877 ncpu++; 878 eo_cpu = ea_alloc_group(EXT_GROUP 879 | EXC_LOCAL | EXD_GROUP_CPU); 880 (void) ea_attach_item(eo_cpu, &cpu->cpu_id, 881 sizeof (processorid_t), 882 EXC_LOCAL | EXD_CPU_CPUID | EXT_UINT32); 883 if (cpu->cpu_props == NULL) { 884 (void) nvlist_alloc(&cpu->cpu_props, 885 NV_UNIQUE_NAME, KM_SLEEP); 886 (void) nvlist_add_string(cpu->cpu_props, 887 "cpu.comment", ""); 888 } 889 (void) nvlist_dup(cpu->cpu_props, &nvl, KM_SLEEP); 890 (void) nvlist_add_int64(nvl, "cpu.sys_id", cpu->cpu_id); 891 (void) nvlist_add_string(nvl, "cpu.status", 892 (char *)cpu_get_state_str(cpu)); 893 buf = NULL; 894 bufsz = 0; 895 (void) nvlist_pack(nvl, &buf, &bufsz, 896 NV_ENCODE_NATIVE, 0); 897 (void) ea_attach_item(eo_cpu, buf, bufsz, 898 EXC_LOCAL | EXD_CPU_PROP | EXT_RAW); 899 (void) nvlist_free(nvl); 900 kmem_free(buf, bufsz); 901 (void) ea_attach_to_group(eo_pset, eo_cpu); 902 } while ((cpu = cpu->cpu_next) != cpu_list); 903 904 (void) nvlist_dup(pset->pset_props, &nvl, KM_SLEEP); 905 (void) nvlist_add_uint64(nvl, "pset.size", ncpu); 906 (void) nvlist_add_uint64(nvl, "pset.load", 907 (uint64_t)PSET_LOAD(cpupart->cp_hp_avenrun[0])); 908 buf = NULL; 909 bufsz = 0; 910 (void) nvlist_pack(nvl, &buf, &bufsz, NV_ENCODE_NATIVE, 0); 911 (void) ea_attach_item(eo_pset, buf, bufsz, 912 EXC_LOCAL | EXD_PSET_PROP | EXT_RAW); 913 (void) nvlist_free(nvl); 914 kmem_free(buf, bufsz); 915 916 (void) ea_attach_to_group(eo_system, eo_pset); 917 } 918 mutex_exit(&cpu_lock); 919 return (0); 920 } 921 922 /* 923 * Get dynamic property for processor sets. 924 * The only dynamic property currently implemented is "pset.load". 925 */ 926 int 927 pool_pset_propget(psetid_t psetid, char *name, nvlist_t *nvl) 928 { 929 cpupart_t *cpupart; 930 pool_pset_t *pset; 931 int ret = ESRCH; 932 933 ASSERT(pool_lock_held()); 934 935 mutex_enter(&cpu_lock); 936 pset = pool_lookup_pset_by_id(psetid); 937 cpupart = cpupart_find(psetid); 938 if (cpupart == NULL || pset == NULL) { 939 mutex_exit(&cpu_lock); 940 return (EINVAL); 941 } 942 if (strcmp(name, "pset.load") == 0) 943 ret = nvlist_add_uint64(nvl, "pset.load", 944 (uint64_t)PSET_LOAD(cpupart->cp_hp_avenrun[0])); 945 else 946 ret = EINVAL; 947 mutex_exit(&cpu_lock); 948 return (ret); 949 } 950 951 /* 952 * Get dynamic property for CPUs. 953 * The only dynamic property currently implemented is "cpu.status". 954 */ 955 int 956 pool_cpu_propget(processorid_t cpuid, char *name, nvlist_t *nvl) 957 { 958 int ret = ESRCH; 959 cpu_t *cpu; 960 961 ASSERT(pool_lock_held()); 962 963 mutex_enter(&cpu_lock); 964 if ((cpu = cpu_get(cpuid)) == NULL) { 965 mutex_exit(&cpu_lock); 966 return (ESRCH); 967 } 968 if (strcmp(name, "cpu.status") == 0) { 969 ret = nvlist_add_string(nvl, "cpu.status", 970 (char *)cpu_get_state_str(cpu)); 971 } else { 972 ret = EINVAL; 973 } 974 mutex_exit(&cpu_lock); 975 return (ret); 976 } 977