1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Copyright 2019 Joyent, Inc. 29 */ 30 31 #include <sys/pool.h> 32 #include <sys/pool_impl.h> 33 #include <sys/pool_pset.h> 34 #include <sys/cpuvar.h> 35 #include <sys/cpupart.h> 36 #include <sys/mutex.h> 37 #include <sys/errno.h> 38 #include <sys/systm.h> 39 #include <sys/cmn_err.h> 40 #include <sys/fss.h> 41 #include <sys/exacct.h> 42 #include <sys/time.h> 43 #include <sys/policy.h> 44 #include <sys/class.h> 45 #include <sys/list.h> 46 #include <sys/cred.h> 47 #include <sys/zone.h> 48 49 /* 50 * Processor set plugin for pools. 51 * 52 * This file contains various routines used by the common pools layer to create, 53 * modify, and destroy processor sets. All processor sets created by this 54 * plug-in are stored in the pool_pset_list doubly-linked list, which is 55 * guaranteed to always have an entry for the default processor set, 56 * pool_pset_default. 57 * 58 * Interaction with zones: 59 * 60 * If pools are enabled, non-global zones only have visibility into the 61 * pset of the pool to which they are bound. This is accomplished by 62 * changing the set of processors and processor sets which are visible 63 * through both systemcall interfaces and system kstats. 64 * 65 * To avoid grabbing pool_lock() during cpu change operations, we cache 66 * the pset the zone is currently bound to, and can read this value 67 * while under cpu_lock. The special psetid_t token ZONE_PS_INVAL means 68 * that pools are disabled, and provides a mechanism for determining if the 69 * status of pools without grabbing pool_lock(). 70 * 71 * To avoid grabbing any locks to determine the instantaneous value of 72 * the number of configured and online cpus in the zone, we also cache 73 * these values in a zone_t. If these values are zero, the pools 74 * facility must be disabled, in which case relevant systemcall 75 * interfaces will return the values for the system as a whole. 76 * 77 * The various kstat interfaces are dealt with as follows: if pools are 78 * disabled all cpu-related kstats should be exported to all zones. 79 * When pools are enabled we begin maintaining a list of "permitted 80 * zones" on a per-kstat basis. There are various hooks throughout the 81 * code to update this list when certain pools- or cpu-related events 82 * occur. 83 */ 84 85 static list_t pool_pset_list; /* doubly-linked list of psets */ 86 pool_pset_t *pool_pset_default; /* default pset */ 87 hrtime_t pool_pset_mod; /* last modification time for psets */ 88 hrtime_t pool_cpu_mod; /* last modification time for CPUs */ 89 90 static pool_pset_t * 91 pool_lookup_pset_by_id(psetid_t psetid) 92 { 93 pool_pset_t *pset = pool_pset_default; 94 95 ASSERT(pool_lock_held()); 96 97 for (pset = list_head(&pool_pset_list); pset; 98 pset = list_next(&pool_pset_list, pset)) { 99 if (pset->pset_id == psetid) 100 return (pset); 101 } 102 return (NULL); 103 } 104 105 struct setup_arg { 106 psetid_t psetid; 107 cpu_t *cpu; 108 cpu_setup_t what; 109 }; 110 111 /* 112 * Callback function used to apply a cpu configuration event to a zone. 113 */ 114 static int 115 pool_pset_setup_cb(zone_t *zone, void *arg) 116 { 117 struct setup_arg *sa = arg; 118 119 ASSERT(MUTEX_HELD(&cpu_lock)); 120 ASSERT(INGLOBALZONE(curproc)); 121 ASSERT(zone != NULL); 122 123 if (zone == global_zone) 124 return (0); 125 if (zone_pset_get(zone) != sa->psetid) 126 return (0); /* ignore */ 127 switch (sa->what) { 128 case CPU_CONFIG: 129 cpu_visibility_configure(sa->cpu, zone); 130 break; 131 case CPU_UNCONFIG: 132 cpu_visibility_unconfigure(sa->cpu, zone); 133 break; 134 case CPU_ON: 135 cpu_visibility_online(sa->cpu, zone); 136 break; 137 case CPU_OFF: 138 cpu_visibility_offline(sa->cpu, zone); 139 break; 140 case CPU_CPUPART_IN: 141 cpu_visibility_add(sa->cpu, zone); 142 break; 143 case CPU_CPUPART_OUT: 144 cpu_visibility_remove(sa->cpu, zone); 145 break; 146 default: 147 cmn_err(CE_PANIC, "invalid cpu_setup_t value %d", sa->what); 148 } 149 return (0); 150 } 151 152 /* 153 * Callback function to be executed when a noteworthy cpu event takes 154 * place. Will ensure that the event is reflected by the zones which 155 * were affected by it. 156 */ 157 /* ARGSUSED */ 158 static int 159 pool_pset_cpu_setup(cpu_setup_t what, int id, void *arg) 160 { 161 processorid_t cpuid = id; 162 struct setup_arg sarg; 163 int error; 164 cpu_t *c; 165 166 ASSERT(MUTEX_HELD(&cpu_lock)); 167 ASSERT(INGLOBALZONE(curproc)); 168 169 if (!pool_pset_enabled()) 170 return (0); 171 if (what != CPU_CONFIG && what != CPU_UNCONFIG && 172 what != CPU_ON && what != CPU_OFF && 173 what != CPU_CPUPART_IN && what != CPU_CPUPART_OUT) 174 return (0); 175 c = cpu_get(cpuid); 176 ASSERT(c != NULL); 177 sarg.psetid = cpupart_query_cpu(c); 178 sarg.cpu = c; 179 sarg.what = what; 180 181 error = zone_walk(pool_pset_setup_cb, &sarg); 182 ASSERT(error == 0); 183 return (0); 184 } 185 186 /* 187 * Initialize processor set plugin. Called once at boot time. 188 */ 189 void 190 pool_pset_init(void) 191 { 192 ASSERT(pool_pset_default == NULL); 193 pool_pset_default = kmem_zalloc(sizeof (pool_pset_t), KM_SLEEP); 194 pool_pset_default->pset_id = PS_NONE; 195 pool_pset_default->pset_npools = 1; /* for pool_default */ 196 pool_default->pool_pset = pool_pset_default; 197 list_create(&pool_pset_list, sizeof (pool_pset_t), 198 offsetof(pool_pset_t, pset_link)); 199 list_insert_head(&pool_pset_list, pool_pset_default); 200 mutex_enter(&cpu_lock); 201 register_cpu_setup_func(pool_pset_cpu_setup, NULL); 202 mutex_exit(&cpu_lock); 203 } 204 205 /* 206 * Dummy wrapper function that returns 0 to satisfy zone_walk(). 207 */ 208 static int 209 pool_pset_zone_pset_set(zone_t *zone, void *arg) 210 { 211 psetid_t psetid = (psetid_t)(uintptr_t)arg; 212 213 ASSERT(MUTEX_HELD(&cpu_lock)); 214 zone_pset_set(zone, psetid); 215 return (0); 216 } 217 218 /* 219 * Enable processor set plugin. 220 */ 221 int 222 pool_pset_enable(void) 223 { 224 int error; 225 nvlist_t *props; 226 227 ASSERT(pool_lock_held()); 228 ASSERT(INGLOBALZONE(curproc)); 229 /* 230 * Can't enable pools if there are existing cpu partitions. 231 */ 232 mutex_enter(&cpu_lock); 233 if (cp_numparts > 1) { 234 mutex_exit(&cpu_lock); 235 return (EEXIST); 236 } 237 238 /* 239 * We want to switch things such that everything that was tagged with 240 * the special ALL_ZONES token now is explicitly visible to all zones: 241 * first add individual zones to the visibility list then remove the 242 * special "ALL_ZONES" token. There must only be the default pset 243 * (PS_NONE) active if pools are being enabled, so we only need to 244 * deal with it. 245 * 246 * We want to make pool_pset_enabled() start returning B_TRUE before 247 * we call any of the visibility update functions. 248 */ 249 global_zone->zone_psetid = PS_NONE; 250 /* 251 * We need to explicitly handle the global zone since 252 * zone_pset_set() won't modify it. 253 */ 254 pool_pset_visibility_add(PS_NONE, global_zone); 255 /* 256 * A NULL argument means the ALL_ZONES token. 257 */ 258 pool_pset_visibility_remove(PS_NONE, NULL); 259 error = zone_walk(pool_pset_zone_pset_set, (void *)PS_NONE); 260 ASSERT(error == 0); 261 262 /* 263 * It is safe to drop cpu_lock here. We're still 264 * holding pool_lock so no new cpu partitions can 265 * be created while we're here. 266 */ 267 mutex_exit(&cpu_lock); 268 (void) nvlist_alloc(&pool_pset_default->pset_props, 269 NV_UNIQUE_NAME, KM_SLEEP); 270 props = pool_pset_default->pset_props; 271 (void) nvlist_add_string(props, "pset.name", "pset_default"); 272 (void) nvlist_add_string(props, "pset.comment", ""); 273 (void) nvlist_add_int64(props, "pset.sys_id", PS_NONE); 274 (void) nvlist_add_string(props, "pset.units", "population"); 275 (void) nvlist_add_byte(props, "pset.default", 1); 276 (void) nvlist_add_uint64(props, "pset.max", 65536); 277 (void) nvlist_add_uint64(props, "pset.min", 1); 278 pool_pset_mod = pool_cpu_mod = gethrtime(); 279 return (0); 280 } 281 282 /* 283 * Disable processor set plugin. 284 */ 285 int 286 pool_pset_disable(void) 287 { 288 processorid_t cpuid; 289 cpu_t *cpu; 290 int error; 291 292 ASSERT(pool_lock_held()); 293 ASSERT(INGLOBALZONE(curproc)); 294 295 mutex_enter(&cpu_lock); 296 if (cp_numparts > 1) { /* make sure only default pset is left */ 297 mutex_exit(&cpu_lock); 298 return (EBUSY); 299 } 300 /* 301 * Remove all non-system CPU and processor set properties 302 */ 303 for (cpuid = 0; cpuid < NCPU; cpuid++) { 304 if ((cpu = cpu_get(cpuid)) == NULL) 305 continue; 306 if (cpu->cpu_props != NULL) { 307 (void) nvlist_free(cpu->cpu_props); 308 cpu->cpu_props = NULL; 309 } 310 } 311 312 /* 313 * We want to switch things such that everything is now visible 314 * to ALL_ZONES: first add the special "ALL_ZONES" token to the 315 * visibility list then remove individual zones. There must 316 * only be the default pset active if pools are being disabled, 317 * so we only need to deal with it. 318 */ 319 error = zone_walk(pool_pset_zone_pset_set, (void *)ZONE_PS_INVAL); 320 ASSERT(error == 0); 321 pool_pset_visibility_add(PS_NONE, NULL); 322 pool_pset_visibility_remove(PS_NONE, global_zone); 323 /* 324 * pool_pset_enabled() will henceforth return B_FALSE. 325 */ 326 global_zone->zone_psetid = ZONE_PS_INVAL; 327 mutex_exit(&cpu_lock); 328 if (pool_pset_default->pset_props != NULL) { 329 nvlist_free(pool_pset_default->pset_props); 330 pool_pset_default->pset_props = NULL; 331 } 332 return (0); 333 } 334 335 /* 336 * Create new processor set and give it a temporary name. 337 */ 338 int 339 pool_pset_create(psetid_t *id) 340 { 341 char pset_name[40]; 342 pool_pset_t *pset; 343 psetid_t psetid; 344 int err; 345 346 ASSERT(pool_lock_held()); 347 if ((err = cpupart_create(&psetid)) != 0) 348 return (err); 349 pset = kmem_alloc(sizeof (pool_pset_t), KM_SLEEP); 350 pset->pset_id = *id = psetid; 351 pset->pset_npools = 0; 352 (void) nvlist_alloc(&pset->pset_props, NV_UNIQUE_NAME, KM_SLEEP); 353 (void) nvlist_add_int64(pset->pset_props, "pset.sys_id", psetid); 354 (void) nvlist_add_byte(pset->pset_props, "pset.default", 0); 355 pool_pset_mod = gethrtime(); 356 (void) snprintf(pset_name, sizeof (pset_name), "pset_%lld", 357 pool_pset_mod); 358 (void) nvlist_add_string(pset->pset_props, "pset.name", pset_name); 359 list_insert_tail(&pool_pset_list, pset); 360 return (0); 361 } 362 363 /* 364 * Destroy existing processor set. 365 */ 366 int 367 pool_pset_destroy(psetid_t psetid) 368 { 369 pool_pset_t *pset; 370 int ret; 371 372 ASSERT(pool_lock_held()); 373 374 if (psetid == PS_NONE) 375 return (EINVAL); 376 if ((pset = pool_lookup_pset_by_id(psetid)) == NULL) 377 return (ESRCH); 378 if (pset->pset_npools > 0) /* can't destroy associated psets */ 379 return (EBUSY); 380 if ((ret = cpupart_destroy(pset->pset_id)) != 0) 381 return (ret); 382 (void) nvlist_free(pset->pset_props); 383 list_remove(&pool_pset_list, pset); 384 pool_pset_mod = gethrtime(); 385 kmem_free(pset, sizeof (pool_pset_t)); 386 return (0); 387 } 388 389 /* 390 * Change the visibility of a pset (and all contained cpus) in a zone. 391 * A NULL zone argument implies the special ALL_ZONES token. 392 */ 393 static void 394 pool_pset_visibility_change(psetid_t psetid, zone_t *zone, boolean_t add) 395 { 396 zoneid_t zoneid = zone ? zone->zone_id : ALL_ZONES; 397 cpupart_t *cp; 398 cpu_t *c; 399 400 ASSERT(MUTEX_HELD(&cpu_lock)); 401 ASSERT(psetid != ZONE_PS_INVAL); 402 403 cp = cpupart_find(psetid); 404 ASSERT(cp != NULL); 405 if (cp->cp_kstat != NULL) { 406 if (add) 407 kstat_zone_add(cp->cp_kstat, zoneid); 408 else 409 kstat_zone_remove(cp->cp_kstat, zoneid); 410 } 411 412 c = cpu_list; 413 do { 414 ASSERT(c != NULL); 415 if (c->cpu_part == cp && !cpu_is_poweredoff(c)) { 416 if (add) 417 cpu_visibility_add(c, zone); 418 else 419 cpu_visibility_remove(c, zone); 420 } 421 } while ((c = c->cpu_next) != cpu_list); 422 } 423 424 /* 425 * Make the processor set visible to the zone. A NULL value for 426 * the zone means that the special ALL_ZONES token should be added to 427 * the visibility list. 428 */ 429 void 430 pool_pset_visibility_add(psetid_t psetid, zone_t *zone) 431 { 432 pool_pset_visibility_change(psetid, zone, B_TRUE); 433 } 434 435 /* 436 * Remove zone's visibility into the processor set. A NULL value for 437 * the zone means that the special ALL_ZONES token should be removed 438 * from the visibility list. 439 */ 440 void 441 pool_pset_visibility_remove(psetid_t psetid, zone_t *zone) 442 { 443 pool_pset_visibility_change(psetid, zone, B_FALSE); 444 } 445 446 /* 447 * Quick way of seeing if pools are enabled (as far as processor sets are 448 * concerned) without holding pool_lock(). 449 */ 450 boolean_t 451 pool_pset_enabled(void) 452 { 453 ASSERT(MUTEX_HELD(&cpu_lock)); 454 455 return (zone_pset_get(global_zone) != ZONE_PS_INVAL); 456 } 457 458 struct assoc_zone_arg { 459 poolid_t poolid; 460 psetid_t newpsetid; 461 }; 462 463 /* 464 * Callback function to update a zone's processor set visibility when 465 * a pool is associated with a processor set. 466 */ 467 static int 468 pool_pset_assoc_zone_cb(zone_t *zone, void *arg) 469 { 470 struct assoc_zone_arg *aza = arg; 471 pool_t *pool; 472 zoneid_t zoneid = zone->zone_id; 473 474 ASSERT(pool_lock_held()); 475 ASSERT(MUTEX_HELD(&cpu_lock)); 476 477 if (zoneid == GLOBAL_ZONEID) 478 return (0); 479 pool = zone_pool_get(zone); 480 if (pool->pool_id == aza->poolid) 481 zone_pset_set(zone, aza->newpsetid); 482 return (0); 483 } 484 485 /* 486 * Associate pool with new processor set. 487 */ 488 int 489 pool_pset_assoc(poolid_t poolid, psetid_t psetid) 490 { 491 pool_t *pool; 492 pool_pset_t *pset, *oldpset; 493 int err = 0; 494 495 ASSERT(pool_lock_held()); 496 497 if ((pool = pool_lookup_pool_by_id(poolid)) == NULL || 498 (pset = pool_lookup_pset_by_id(psetid)) == NULL) { 499 return (ESRCH); 500 } 501 if (pool->pool_pset->pset_id == psetid) { 502 /* 503 * Already associated. 504 */ 505 return (0); 506 } 507 508 /* 509 * Hang the new pset off the pool, and rebind all of the pool's 510 * processes to it. If pool_do_bind fails, all processes will remain 511 * bound to the old set. 512 */ 513 oldpset = pool->pool_pset; 514 pool->pool_pset = pset; 515 err = pool_do_bind(pool, P_POOLID, poolid, POOL_BIND_PSET); 516 if (err) { 517 pool->pool_pset = oldpset; 518 } else { 519 struct assoc_zone_arg azarg; 520 521 /* 522 * Update zones' visibility to reflect changes. 523 */ 524 azarg.poolid = poolid; 525 azarg.newpsetid = pset->pset_id; 526 mutex_enter(&cpu_lock); 527 err = zone_walk(pool_pset_assoc_zone_cb, &azarg); 528 ASSERT(err == 0); 529 mutex_exit(&cpu_lock); 530 531 oldpset->pset_npools--; 532 pset->pset_npools++; 533 } 534 return (err); 535 } 536 537 /* 538 * Transfer specified CPUs between processor sets. 539 */ 540 int 541 pool_pset_xtransfer(psetid_t src, psetid_t dst, size_t size, id_t *ids) 542 { 543 struct cpu *cpu; 544 int ret = 0; 545 int id; 546 547 ASSERT(pool_lock_held()); 548 ASSERT(INGLOBALZONE(curproc)); 549 550 if (size == 0 || size > max_ncpus) /* quick sanity check */ 551 return (EINVAL); 552 553 mutex_enter(&cpu_lock); 554 for (id = 0; id < size; id++) { 555 if ((cpu = cpu_get((processorid_t)ids[id])) == NULL || 556 cpupart_query_cpu(cpu) != src) { 557 ret = EINVAL; 558 break; 559 } 560 if ((ret = cpupart_attach_cpu(dst, cpu, 1)) != 0) 561 break; 562 } 563 mutex_exit(&cpu_lock); 564 if (ret == 0) 565 pool_pset_mod = gethrtime(); 566 return (ret); 567 } 568 569 /* 570 * Bind process to processor set. This should never fail because 571 * we should've done all preliminary checks before calling it. 572 */ 573 void 574 pool_pset_bind(proc_t *p, psetid_t psetid, void *projbuf, void *zonebuf) 575 { 576 kthread_t *t; 577 int ret; 578 579 ASSERT(pool_lock_held()); 580 ASSERT(MUTEX_HELD(&cpu_lock)); 581 ASSERT(MUTEX_HELD(&pidlock)); 582 ASSERT(MUTEX_HELD(&p->p_lock)); 583 584 if ((t = p->p_tlist) == NULL) 585 return; 586 do { 587 ret = cpupart_bind_thread(t, psetid, 0, projbuf, zonebuf); 588 ASSERT(ret == 0); 589 t->t_bind_pset = psetid; 590 } while ((t = t->t_forw) != p->p_tlist); 591 } 592 593 /* 594 * See the comment above pool_do_bind() for the semantics of the pset_bind_*() 595 * functions. These must be kept in sync with cpupart_move_thread, and 596 * anything else that could fail a pool_pset_bind. 597 * 598 * Returns non-zero errno on failure and zero on success. 599 * Iff successful, cpu_lock is held on return. 600 */ 601 int 602 pset_bind_start(proc_t **procs, pool_t *pool) 603 { 604 cred_t *pcred; 605 proc_t *p, **pp; 606 kthread_t *t; 607 cpupart_t *newpp; 608 int ret; 609 610 extern int cpupart_movable_thread(kthread_id_t, cpupart_t *, int); 611 612 ASSERT(pool_lock_held()); 613 ASSERT(INGLOBALZONE(curproc)); 614 615 mutex_enter(&cpu_lock); 616 weakbinding_stop(); 617 618 newpp = cpupart_find(pool->pool_pset->pset_id); 619 ASSERT(newpp != NULL); 620 if (newpp->cp_cpulist == NULL) { 621 weakbinding_start(); 622 mutex_exit(&cpu_lock); 623 return (ENOTSUP); 624 } 625 626 pcred = crgetcred(); 627 628 /* 629 * Check for the PRIV_PROC_PRIOCNTL privilege that is required 630 * to enter and exit scheduling classes. If other privileges 631 * are required by CL_ENTERCLASS/CL_CANEXIT types of routines 632 * in the future, this code will have to be updated. 633 */ 634 if (secpolicy_setpriority(pcred) != 0) { 635 weakbinding_start(); 636 mutex_exit(&cpu_lock); 637 crfree(pcred); 638 return (EPERM); 639 } 640 641 for (pp = procs; (p = *pp) != NULL; pp++) { 642 mutex_enter(&p->p_lock); 643 if ((t = p->p_tlist) == NULL) { 644 mutex_exit(&p->p_lock); 645 continue; 646 } 647 /* 648 * Check our basic permissions to control this process. 649 */ 650 if (!prochasprocperm(p, curproc, pcred)) { 651 mutex_exit(&p->p_lock); 652 weakbinding_start(); 653 mutex_exit(&cpu_lock); 654 crfree(pcred); 655 return (EPERM); 656 } 657 do { 658 /* 659 * Check that all threads can be moved to 660 * a new processor set. 661 */ 662 thread_lock(t); 663 ret = cpupart_movable_thread(t, newpp, 0); 664 thread_unlock(t); 665 if (ret != 0) { 666 mutex_exit(&p->p_lock); 667 weakbinding_start(); 668 mutex_exit(&cpu_lock); 669 crfree(pcred); 670 return (ret); 671 } 672 } while ((t = t->t_forw) != p->p_tlist); 673 mutex_exit(&p->p_lock); 674 } 675 crfree(pcred); 676 return (0); /* with cpu_lock held and weakbinding stopped */ 677 } 678 679 /*ARGSUSED*/ 680 void 681 pset_bind_abort(proc_t **procs, pool_t *pool) 682 { 683 mutex_exit(&cpu_lock); 684 } 685 686 void 687 pset_bind_finish(void) 688 { 689 weakbinding_start(); 690 mutex_exit(&cpu_lock); 691 } 692 693 static pool_property_t pool_pset_props[] = { 694 { "pset.name", DATA_TYPE_STRING, PP_RDWR }, 695 { "pset.comment", DATA_TYPE_STRING, PP_RDWR }, 696 { "pset.sys_id", DATA_TYPE_UINT64, PP_READ }, 697 { "pset.units", DATA_TYPE_STRING, PP_RDWR }, 698 { "pset.default", DATA_TYPE_BYTE, PP_READ }, 699 { "pset.min", DATA_TYPE_UINT64, PP_RDWR }, 700 { "pset.max", DATA_TYPE_UINT64, PP_RDWR }, 701 { "pset.size", DATA_TYPE_UINT64, PP_READ }, 702 { "pset.load", DATA_TYPE_UINT64, PP_READ }, 703 { "pset.poold.objectives", DATA_TYPE_STRING, 704 PP_RDWR | PP_OPTIONAL }, 705 { NULL, 0, 0 } 706 }; 707 708 static pool_property_t pool_cpu_props[] = { 709 { "cpu.sys_id", DATA_TYPE_UINT64, PP_READ }, 710 { "cpu.comment", DATA_TYPE_STRING, PP_RDWR }, 711 { "cpu.status", DATA_TYPE_STRING, PP_RDWR }, 712 { "cpu.pinned", DATA_TYPE_BYTE, 713 PP_RDWR | PP_OPTIONAL }, 714 { NULL, 0, 0 } 715 }; 716 717 /* 718 * Put property on the specified processor set. 719 */ 720 int 721 pool_pset_propput(psetid_t psetid, nvpair_t *pair) 722 { 723 pool_pset_t *pset; 724 int ret; 725 726 ASSERT(pool_lock_held()); 727 728 if ((pset = pool_lookup_pset_by_id(psetid)) == NULL) 729 return (ESRCH); 730 ret = pool_propput_common(pset->pset_props, pair, pool_pset_props); 731 if (ret == 0) 732 pool_pset_mod = gethrtime(); 733 return (ret); 734 } 735 736 /* 737 * Remove existing processor set property. 738 */ 739 int 740 pool_pset_proprm(psetid_t psetid, char *name) 741 { 742 pool_pset_t *pset; 743 int ret; 744 745 ASSERT(pool_lock_held()); 746 747 if ((pset = pool_lookup_pset_by_id(psetid)) == NULL) 748 return (EINVAL); 749 ret = pool_proprm_common(pset->pset_props, name, pool_pset_props); 750 if (ret == 0) 751 pool_pset_mod = gethrtime(); 752 return (ret); 753 } 754 755 /* 756 * Put new CPU property. 757 * Handle special case of "cpu.status". 758 */ 759 int 760 pool_cpu_propput(processorid_t cpuid, nvpair_t *pair) 761 { 762 int ret = 0; 763 cpu_t *cpu; 764 765 ASSERT(pool_lock_held()); 766 ASSERT(INGLOBALZONE(curproc)); 767 768 if (nvpair_type(pair) == DATA_TYPE_STRING && 769 strcmp(nvpair_name(pair), "cpu.status") == 0) { 770 char *val; 771 int status; 772 int old_status; 773 (void) nvpair_value_string(pair, &val); 774 if (strcmp(val, PS_OFFLINE) == 0) 775 status = P_OFFLINE; 776 else if (strcmp(val, PS_ONLINE) == 0) 777 status = P_ONLINE; 778 else if (strcmp(val, PS_NOINTR) == 0) 779 status = P_NOINTR; 780 else if (strcmp(val, PS_FAULTED) == 0) 781 status = P_FAULTED; 782 else if (strcmp(val, PS_SPARE) == 0) 783 status = P_SPARE; 784 else 785 return (EINVAL); 786 ret = p_online_internal(cpuid, status, &old_status); 787 } else { 788 mutex_enter(&cpu_lock); 789 if ((cpu = cpu_get(cpuid)) == NULL) 790 ret = EINVAL; 791 if (cpu->cpu_props == NULL) { 792 (void) nvlist_alloc(&cpu->cpu_props, 793 NV_UNIQUE_NAME, KM_SLEEP); 794 (void) nvlist_add_string(cpu->cpu_props, 795 "cpu.comment", ""); 796 } 797 ret = pool_propput_common(cpu->cpu_props, pair, pool_cpu_props); 798 if (ret == 0) 799 pool_cpu_mod = gethrtime(); 800 mutex_exit(&cpu_lock); 801 } 802 return (ret); 803 } 804 805 /* 806 * Remove existing CPU property. 807 */ 808 int 809 pool_cpu_proprm(processorid_t cpuid, char *name) 810 { 811 int ret; 812 cpu_t *cpu; 813 814 ASSERT(pool_lock_held()); 815 ASSERT(INGLOBALZONE(curproc)); 816 817 mutex_enter(&cpu_lock); 818 if ((cpu = cpu_get(cpuid)) == NULL || cpu_is_poweredoff(cpu)) { 819 ret = EINVAL; 820 } else { 821 if (cpu->cpu_props == NULL) 822 ret = EINVAL; 823 else 824 ret = pool_proprm_common(cpu->cpu_props, name, 825 pool_cpu_props); 826 } 827 if (ret == 0) 828 pool_cpu_mod = gethrtime(); 829 mutex_exit(&cpu_lock); 830 return (ret); 831 } 832 833 /* 834 * This macro returns load average multiplied by 1000 w/o losing precision 835 */ 836 #define PSET_LOAD(f) (((f >> 16) * 1000) + (((f & 0xffff) * 1000) / 0xffff)) 837 838 /* 839 * Take a snapshot of the current state of processor sets and CPUs, 840 * pack it in the exacct format, and attach it to specified exacct record. 841 */ 842 int 843 pool_pset_pack(ea_object_t *eo_system) 844 { 845 ea_object_t *eo_pset, *eo_cpu; 846 cpupart_t *cpupart; 847 psetid_t mypsetid; 848 pool_pset_t *pset; 849 nvlist_t *nvl; 850 size_t bufsz; 851 cpu_t *cpu; 852 char *buf; 853 int ncpu; 854 855 ASSERT(pool_lock_held()); 856 857 mutex_enter(&cpu_lock); 858 mypsetid = zone_pset_get(curproc->p_zone); 859 for (pset = list_head(&pool_pset_list); pset; 860 pset = list_next(&pool_pset_list, pset)) { 861 psetid_t psetid = pset->pset_id; 862 863 if (!INGLOBALZONE(curproc) && mypsetid != psetid) 864 continue; 865 cpupart = cpupart_find(psetid); 866 ASSERT(cpupart != NULL); 867 eo_pset = ea_alloc_group(EXT_GROUP | 868 EXC_LOCAL | EXD_GROUP_PSET); 869 (void) ea_attach_item(eo_pset, &psetid, sizeof (id_t), 870 EXC_LOCAL | EXD_PSET_PSETID | EXT_UINT32); 871 /* 872 * Pack info for all CPUs in this processor set. 873 */ 874 ncpu = 0; 875 cpu = cpu_list; 876 do { 877 if (cpu->cpu_part != cpupart) /* not our pset */ 878 continue; 879 ncpu++; 880 eo_cpu = ea_alloc_group(EXT_GROUP 881 | EXC_LOCAL | EXD_GROUP_CPU); 882 (void) ea_attach_item(eo_cpu, &cpu->cpu_id, 883 sizeof (processorid_t), 884 EXC_LOCAL | EXD_CPU_CPUID | EXT_UINT32); 885 if (cpu->cpu_props == NULL) { 886 (void) nvlist_alloc(&cpu->cpu_props, 887 NV_UNIQUE_NAME, KM_SLEEP); 888 (void) nvlist_add_string(cpu->cpu_props, 889 "cpu.comment", ""); 890 } 891 (void) nvlist_dup(cpu->cpu_props, &nvl, KM_SLEEP); 892 (void) nvlist_add_int64(nvl, "cpu.sys_id", cpu->cpu_id); 893 (void) nvlist_add_string(nvl, "cpu.status", 894 (char *)cpu_get_state_str(cpu->cpu_flags)); 895 buf = NULL; 896 bufsz = 0; 897 (void) nvlist_pack(nvl, &buf, &bufsz, 898 NV_ENCODE_NATIVE, 0); 899 (void) ea_attach_item(eo_cpu, buf, bufsz, 900 EXC_LOCAL | EXD_CPU_PROP | EXT_RAW); 901 (void) nvlist_free(nvl); 902 kmem_free(buf, bufsz); 903 (void) ea_attach_to_group(eo_pset, eo_cpu); 904 } while ((cpu = cpu->cpu_next) != cpu_list); 905 906 (void) nvlist_dup(pset->pset_props, &nvl, KM_SLEEP); 907 (void) nvlist_add_uint64(nvl, "pset.size", ncpu); 908 (void) nvlist_add_uint64(nvl, "pset.load", 909 (uint64_t)PSET_LOAD(cpupart->cp_hp_avenrun[0])); 910 buf = NULL; 911 bufsz = 0; 912 (void) nvlist_pack(nvl, &buf, &bufsz, NV_ENCODE_NATIVE, 0); 913 (void) ea_attach_item(eo_pset, buf, bufsz, 914 EXC_LOCAL | EXD_PSET_PROP | EXT_RAW); 915 (void) nvlist_free(nvl); 916 kmem_free(buf, bufsz); 917 918 (void) ea_attach_to_group(eo_system, eo_pset); 919 } 920 mutex_exit(&cpu_lock); 921 return (0); 922 } 923 924 /* 925 * Get dynamic property for processor sets. 926 * The only dynamic property currently implemented is "pset.load". 927 */ 928 int 929 pool_pset_propget(psetid_t psetid, char *name, nvlist_t *nvl) 930 { 931 cpupart_t *cpupart; 932 pool_pset_t *pset; 933 int ret = ESRCH; 934 935 ASSERT(pool_lock_held()); 936 937 mutex_enter(&cpu_lock); 938 pset = pool_lookup_pset_by_id(psetid); 939 cpupart = cpupart_find(psetid); 940 if (cpupart == NULL || pset == NULL) { 941 mutex_exit(&cpu_lock); 942 return (EINVAL); 943 } 944 if (strcmp(name, "pset.load") == 0) 945 ret = nvlist_add_uint64(nvl, "pset.load", 946 (uint64_t)PSET_LOAD(cpupart->cp_hp_avenrun[0])); 947 else 948 ret = EINVAL; 949 mutex_exit(&cpu_lock); 950 return (ret); 951 } 952 953 /* 954 * Get dynamic property for CPUs. 955 * The only dynamic property currently implemented is "cpu.status". 956 */ 957 int 958 pool_cpu_propget(processorid_t cpuid, char *name, nvlist_t *nvl) 959 { 960 int ret = ESRCH; 961 cpu_t *cpu; 962 963 ASSERT(pool_lock_held()); 964 965 mutex_enter(&cpu_lock); 966 if ((cpu = cpu_get(cpuid)) == NULL) { 967 mutex_exit(&cpu_lock); 968 return (ESRCH); 969 } 970 if (strcmp(name, "cpu.status") == 0) { 971 ret = nvlist_add_string(nvl, "cpu.status", 972 (char *)cpu_get_state_str(cpu->cpu_flags)); 973 } else { 974 ret = EINVAL; 975 } 976 mutex_exit(&cpu_lock); 977 return (ret); 978 } 979