1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2008, Jeffrey Roberson <jeff@freebsd.org> 5 * All rights reserved. 6 * 7 * Copyright (c) 2008 Nokia Corporation 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice unmodified, this list of conditions, and the following 15 * disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 * 31 */ 32 33 #include <sys/cdefs.h> 34 __FBSDID("$FreeBSD$"); 35 36 #include "opt_ddb.h" 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/sysctl.h> 41 #include <sys/ctype.h> 42 #include <sys/sysproto.h> 43 #include <sys/jail.h> 44 #include <sys/kernel.h> 45 #include <sys/lock.h> 46 #include <sys/malloc.h> 47 #include <sys/mutex.h> 48 #include <sys/priv.h> 49 #include <sys/proc.h> 50 #include <sys/refcount.h> 51 #include <sys/sched.h> 52 #include <sys/smp.h> 53 #include <sys/syscallsubr.h> 54 #include <sys/capsicum.h> 55 #include <sys/cpuset.h> 56 #include <sys/domainset.h> 57 #include <sys/sx.h> 58 #include <sys/queue.h> 59 #include <sys/libkern.h> 60 #include <sys/limits.h> 61 #include <sys/bus.h> 62 #include <sys/interrupt.h> 63 #include <sys/vmmeter.h> 64 65 #include <vm/uma.h> 66 #include <vm/vm.h> 67 #include <vm/vm_object.h> 68 #include <vm/vm_extern.h> 69 70 #ifdef DDB 71 #include <ddb/ddb.h> 72 #endif /* DDB */ 73 74 /* 75 * cpusets provide a mechanism for creating and manipulating sets of 76 * processors for the purpose of constraining the scheduling of threads to 77 * specific processors. 78 * 79 * Each process belongs to an identified set, by default this is set 1. Each 80 * thread may further restrict the cpus it may run on to a subset of this 81 * named set. This creates an anonymous set which other threads and processes 82 * may not join by number. 83 * 84 * The named set is referred to herein as the 'base' set to avoid ambiguity. 85 * This set is usually a child of a 'root' set while the anonymous set may 86 * simply be referred to as a mask. In the syscall api these are referred to 87 * as the ROOT, CPUSET, and MASK levels where CPUSET is called 'base' here. 88 * 89 * Threads inherit their set from their creator whether it be anonymous or 90 * not. This means that anonymous sets are immutable because they may be 91 * shared. To modify an anonymous set a new set is created with the desired 92 * mask and the same parent as the existing anonymous set. This gives the 93 * illusion of each thread having a private mask. 94 * 95 * Via the syscall apis a user may ask to retrieve or modify the root, base, 96 * or mask that is discovered via a pid, tid, or setid. Modifying a set 97 * modifies all numbered and anonymous child sets to comply with the new mask. 98 * Modifying a pid or tid's mask applies only to that tid but must still 99 * exist within the assigned parent set. 100 * 101 * A thread may not be assigned to a group separate from other threads in 102 * the process. This is to remove ambiguity when the setid is queried with 103 * a pid argument. There is no other technical limitation. 104 * 105 * This somewhat complex arrangement is intended to make it easy for 106 * applications to query available processors and bind their threads to 107 * specific processors while also allowing administrators to dynamically 108 * reprovision by changing sets which apply to groups of processes. 109 * 110 * A simple application should not concern itself with sets at all and 111 * rather apply masks to its own threads via CPU_WHICH_TID and a -1 id 112 * meaning 'curthread'. It may query available cpus for that tid with a 113 * getaffinity call using (CPU_LEVEL_CPUSET, CPU_WHICH_PID, -1, ...). 114 */ 115 116 LIST_HEAD(domainlist, domainset); 117 118 static uma_zone_t cpuset_zone; 119 static uma_zone_t domainset_zone; 120 static struct mtx cpuset_lock; 121 static struct setlist cpuset_ids; 122 static struct domainlist cpuset_domains; 123 static struct unrhdr *cpuset_unr; 124 static struct cpuset *cpuset_zero, *cpuset_default, *cpuset_kernel; 125 static struct domainset domainset0, domainset2; 126 127 /* Return the size of cpuset_t at the kernel level */ 128 SYSCTL_INT(_kern_sched, OID_AUTO, cpusetsize, CTLFLAG_RD | CTLFLAG_CAPRD, 129 SYSCTL_NULL_INT_PTR, sizeof(cpuset_t), "sizeof(cpuset_t)"); 130 131 cpuset_t *cpuset_root; 132 cpuset_t cpuset_domain[MAXMEMDOM]; 133 134 static int domainset_valid(const struct domainset *, const struct domainset *); 135 136 /* 137 * Find the first non-anonymous set starting from 'set'. 138 */ 139 static struct cpuset * 140 cpuset_getbase(struct cpuset *set) 141 { 142 143 if (set->cs_id == CPUSET_INVALID) 144 set = set->cs_parent; 145 return (set); 146 } 147 148 /* 149 * Walks up the tree from 'set' to find the root. 150 */ 151 static struct cpuset * 152 cpuset_getroot(struct cpuset *set) 153 { 154 155 while ((set->cs_flags & CPU_SET_ROOT) == 0 && set->cs_parent != NULL) 156 set = set->cs_parent; 157 return (set); 158 } 159 160 /* 161 * Acquire a reference to a cpuset, all pointers must be tracked with refs. 162 */ 163 struct cpuset * 164 cpuset_ref(struct cpuset *set) 165 { 166 167 refcount_acquire(&set->cs_ref); 168 return (set); 169 } 170 171 /* 172 * Walks up the tree from 'set' to find the root. Returns the root 173 * referenced. 174 */ 175 static struct cpuset * 176 cpuset_refroot(struct cpuset *set) 177 { 178 179 return (cpuset_ref(cpuset_getroot(set))); 180 } 181 182 /* 183 * Find the first non-anonymous set starting from 'set'. Returns this set 184 * referenced. May return the passed in set with an extra ref if it is 185 * not anonymous. 186 */ 187 static struct cpuset * 188 cpuset_refbase(struct cpuset *set) 189 { 190 191 return (cpuset_ref(cpuset_getbase(set))); 192 } 193 194 /* 195 * Release a reference in a context where it is safe to allocate. 196 */ 197 void 198 cpuset_rel(struct cpuset *set) 199 { 200 cpusetid_t id; 201 202 if (refcount_release(&set->cs_ref) == 0) 203 return; 204 mtx_lock_spin(&cpuset_lock); 205 LIST_REMOVE(set, cs_siblings); 206 id = set->cs_id; 207 if (id != CPUSET_INVALID) 208 LIST_REMOVE(set, cs_link); 209 mtx_unlock_spin(&cpuset_lock); 210 cpuset_rel(set->cs_parent); 211 uma_zfree(cpuset_zone, set); 212 if (id != CPUSET_INVALID) 213 free_unr(cpuset_unr, id); 214 } 215 216 /* 217 * Deferred release must be used when in a context that is not safe to 218 * allocate/free. This places any unreferenced sets on the list 'head'. 219 */ 220 static void 221 cpuset_rel_defer(struct setlist *head, struct cpuset *set) 222 { 223 224 if (refcount_release(&set->cs_ref) == 0) 225 return; 226 mtx_lock_spin(&cpuset_lock); 227 LIST_REMOVE(set, cs_siblings); 228 if (set->cs_id != CPUSET_INVALID) 229 LIST_REMOVE(set, cs_link); 230 LIST_INSERT_HEAD(head, set, cs_link); 231 mtx_unlock_spin(&cpuset_lock); 232 } 233 234 /* 235 * Complete a deferred release. Removes the set from the list provided to 236 * cpuset_rel_defer. 237 */ 238 static void 239 cpuset_rel_complete(struct cpuset *set) 240 { 241 LIST_REMOVE(set, cs_link); 242 cpuset_rel(set->cs_parent); 243 uma_zfree(cpuset_zone, set); 244 } 245 246 /* 247 * Find a set based on an id. Returns it with a ref. 248 */ 249 static struct cpuset * 250 cpuset_lookup(cpusetid_t setid, struct thread *td) 251 { 252 struct cpuset *set; 253 254 if (setid == CPUSET_INVALID) 255 return (NULL); 256 mtx_lock_spin(&cpuset_lock); 257 LIST_FOREACH(set, &cpuset_ids, cs_link) 258 if (set->cs_id == setid) 259 break; 260 if (set) 261 cpuset_ref(set); 262 mtx_unlock_spin(&cpuset_lock); 263 264 KASSERT(td != NULL, ("[%s:%d] td is NULL", __func__, __LINE__)); 265 if (set != NULL && jailed(td->td_ucred)) { 266 struct cpuset *jset, *tset; 267 268 jset = td->td_ucred->cr_prison->pr_cpuset; 269 for (tset = set; tset != NULL; tset = tset->cs_parent) 270 if (tset == jset) 271 break; 272 if (tset == NULL) { 273 cpuset_rel(set); 274 set = NULL; 275 } 276 } 277 278 return (set); 279 } 280 281 /* 282 * Create a set in the space provided in 'set' with the provided parameters. 283 * The set is returned with a single ref. May return EDEADLK if the set 284 * will have no valid cpu based on restrictions from the parent. 285 */ 286 static int 287 _cpuset_create(struct cpuset *set, struct cpuset *parent, 288 const cpuset_t *mask, struct domainset *domain, cpusetid_t id) 289 { 290 291 if (domain == NULL) 292 domain = parent->cs_domain; 293 if (mask == NULL) 294 mask = &parent->cs_mask; 295 if (!CPU_OVERLAP(&parent->cs_mask, mask)) 296 return (EDEADLK); 297 /* The domain must be prepared ahead of time. */ 298 if (!domainset_valid(parent->cs_domain, domain)) 299 return (EDEADLK); 300 CPU_COPY(mask, &set->cs_mask); 301 LIST_INIT(&set->cs_children); 302 refcount_init(&set->cs_ref, 1); 303 set->cs_flags = 0; 304 mtx_lock_spin(&cpuset_lock); 305 set->cs_domain = domain; 306 CPU_AND(&set->cs_mask, &parent->cs_mask); 307 set->cs_id = id; 308 set->cs_parent = cpuset_ref(parent); 309 LIST_INSERT_HEAD(&parent->cs_children, set, cs_siblings); 310 if (set->cs_id != CPUSET_INVALID) 311 LIST_INSERT_HEAD(&cpuset_ids, set, cs_link); 312 mtx_unlock_spin(&cpuset_lock); 313 314 return (0); 315 } 316 317 /* 318 * Create a new non-anonymous set with the requested parent and mask. May 319 * return failures if the mask is invalid or a new number can not be 320 * allocated. 321 */ 322 static int 323 cpuset_create(struct cpuset **setp, struct cpuset *parent, const cpuset_t *mask) 324 { 325 struct cpuset *set; 326 cpusetid_t id; 327 int error; 328 329 id = alloc_unr(cpuset_unr); 330 if (id == -1) 331 return (ENFILE); 332 *setp = set = uma_zalloc(cpuset_zone, M_WAITOK | M_ZERO); 333 error = _cpuset_create(set, parent, mask, NULL, id); 334 if (error == 0) 335 return (0); 336 free_unr(cpuset_unr, id); 337 uma_zfree(cpuset_zone, set); 338 339 return (error); 340 } 341 342 static void 343 cpuset_freelist_add(struct setlist *list, int count) 344 { 345 struct cpuset *set; 346 int i; 347 348 for (i = 0; i < count; i++) { 349 set = uma_zalloc(cpuset_zone, M_ZERO | M_WAITOK); 350 LIST_INSERT_HEAD(list, set, cs_link); 351 } 352 } 353 354 static void 355 cpuset_freelist_init(struct setlist *list, int count) 356 { 357 358 LIST_INIT(list); 359 cpuset_freelist_add(list, count); 360 } 361 362 static void 363 cpuset_freelist_free(struct setlist *list) 364 { 365 struct cpuset *set; 366 367 while ((set = LIST_FIRST(list)) != NULL) { 368 LIST_REMOVE(set, cs_link); 369 uma_zfree(cpuset_zone, set); 370 } 371 } 372 373 static void 374 domainset_freelist_add(struct domainlist *list, int count) 375 { 376 struct domainset *set; 377 int i; 378 379 for (i = 0; i < count; i++) { 380 set = uma_zalloc(domainset_zone, M_ZERO | M_WAITOK); 381 LIST_INSERT_HEAD(list, set, ds_link); 382 } 383 } 384 385 static void 386 domainset_freelist_init(struct domainlist *list, int count) 387 { 388 389 LIST_INIT(list); 390 domainset_freelist_add(list, count); 391 } 392 393 static void 394 domainset_freelist_free(struct domainlist *list) 395 { 396 struct domainset *set; 397 398 while ((set = LIST_FIRST(list)) != NULL) { 399 LIST_REMOVE(set, ds_link); 400 uma_zfree(domainset_zone, set); 401 } 402 } 403 404 /* Copy a domainset preserving mask and policy. */ 405 static void 406 domainset_copy(const struct domainset *from, struct domainset *to) 407 { 408 409 DOMAINSET_COPY(&from->ds_mask, &to->ds_mask); 410 to->ds_policy = from->ds_policy; 411 to->ds_prefer = from->ds_prefer; 412 } 413 414 /* Return 1 if mask and policy are equal, otherwise 0. */ 415 static int 416 domainset_equal(const struct domainset *one, const struct domainset *two) 417 { 418 419 return (DOMAINSET_CMP(&one->ds_mask, &two->ds_mask) == 0 && 420 one->ds_policy == two->ds_policy && 421 one->ds_prefer == two->ds_prefer); 422 } 423 424 /* Return 1 if child is a valid subset of parent. */ 425 static int 426 domainset_valid(const struct domainset *parent, const struct domainset *child) 427 { 428 if (child->ds_policy != DOMAINSET_POLICY_PREFER) 429 return (DOMAINSET_SUBSET(&parent->ds_mask, &child->ds_mask)); 430 return (DOMAINSET_ISSET(child->ds_prefer, &parent->ds_mask)); 431 } 432 433 static int 434 domainset_restrict(const struct domainset *parent, 435 const struct domainset *child) 436 { 437 if (child->ds_policy != DOMAINSET_POLICY_PREFER) 438 return (DOMAINSET_OVERLAP(&parent->ds_mask, &child->ds_mask)); 439 return (DOMAINSET_ISSET(child->ds_prefer, &parent->ds_mask)); 440 } 441 442 /* 443 * Lookup or create a domainset. The key is provided in ds_mask and 444 * ds_policy. If the domainset does not yet exist the storage in 445 * 'domain' is used to insert. Otherwise this storage is freed to the 446 * domainset_zone and the existing domainset is returned. 447 */ 448 static struct domainset * 449 _domainset_create(struct domainset *domain, struct domainlist *freelist) 450 { 451 struct domainset *ndomain; 452 int i, j, max; 453 454 mtx_lock_spin(&cpuset_lock); 455 LIST_FOREACH(ndomain, &cpuset_domains, ds_link) 456 if (domainset_equal(ndomain, domain)) 457 break; 458 /* 459 * If the domain does not yet exist we insert it and initialize 460 * various iteration helpers which are not part of the key. 461 */ 462 if (ndomain == NULL) { 463 LIST_INSERT_HEAD(&cpuset_domains, domain, ds_link); 464 domain->ds_cnt = DOMAINSET_COUNT(&domain->ds_mask); 465 max = DOMAINSET_FLS(&domain->ds_mask) + 1; 466 for (i = 0, j = 0; i < max; i++) 467 if (DOMAINSET_ISSET(i, &domain->ds_mask)) 468 domain->ds_order[j++] = i; 469 } 470 mtx_unlock_spin(&cpuset_lock); 471 if (ndomain == NULL) 472 return (domain); 473 if (freelist != NULL) 474 LIST_INSERT_HEAD(freelist, domain, ds_link); 475 else 476 uma_zfree(domainset_zone, domain); 477 return (ndomain); 478 479 } 480 481 /* 482 * Create or lookup a domainset based on the key held in 'domain'. 483 */ 484 struct domainset * 485 domainset_create(const struct domainset *domain) 486 { 487 struct domainset *ndomain; 488 489 /* 490 * Validate the policy. It must specify a useable policy number with 491 * only valid domains. Preferred must include the preferred domain 492 * in the mask. 493 */ 494 if (domain->ds_policy <= DOMAINSET_POLICY_INVALID || 495 domain->ds_policy > DOMAINSET_POLICY_MAX) 496 return (NULL); 497 if (domain->ds_policy == DOMAINSET_POLICY_PREFER && 498 !DOMAINSET_ISSET(domain->ds_prefer, &domain->ds_mask)) 499 return (NULL); 500 if (!DOMAINSET_SUBSET(&domainset0.ds_mask, &domain->ds_mask)) 501 return (NULL); 502 ndomain = uma_zalloc(domainset_zone, M_WAITOK | M_ZERO); 503 domainset_copy(domain, ndomain); 504 return _domainset_create(ndomain, NULL); 505 } 506 507 /* 508 * Update thread domainset pointers. 509 */ 510 static void 511 domainset_notify(void) 512 { 513 struct thread *td; 514 struct proc *p; 515 516 sx_slock(&allproc_lock); 517 FOREACH_PROC_IN_SYSTEM(p) { 518 PROC_LOCK(p); 519 if (p->p_state == PRS_NEW) { 520 PROC_UNLOCK(p); 521 continue; 522 } 523 FOREACH_THREAD_IN_PROC(p, td) { 524 thread_lock(td); 525 td->td_domain.dr_policy = td->td_cpuset->cs_domain; 526 thread_unlock(td); 527 } 528 PROC_UNLOCK(p); 529 } 530 sx_sunlock(&allproc_lock); 531 kernel_object->domain.dr_policy = cpuset_kernel->cs_domain; 532 } 533 534 /* 535 * Create a new set that is a subset of a parent. 536 */ 537 static struct domainset * 538 domainset_shadow(const struct domainset *pdomain, 539 const struct domainset *domain, struct domainlist *freelist) 540 { 541 struct domainset *ndomain; 542 543 ndomain = LIST_FIRST(freelist); 544 LIST_REMOVE(ndomain, ds_link); 545 546 /* 547 * Initialize the key from the request. 548 */ 549 domainset_copy(domain, ndomain); 550 551 /* 552 * Restrict the key by the parent. 553 */ 554 DOMAINSET_AND(&ndomain->ds_mask, &pdomain->ds_mask); 555 556 return _domainset_create(ndomain, freelist); 557 } 558 559 /* 560 * Recursively check for errors that would occur from applying mask to 561 * the tree of sets starting at 'set'. Checks for sets that would become 562 * empty as well as RDONLY flags. 563 */ 564 static int 565 cpuset_testupdate(struct cpuset *set, cpuset_t *mask, int check_mask) 566 { 567 struct cpuset *nset; 568 cpuset_t newmask; 569 int error; 570 571 mtx_assert(&cpuset_lock, MA_OWNED); 572 if (set->cs_flags & CPU_SET_RDONLY) 573 return (EPERM); 574 if (check_mask) { 575 if (!CPU_OVERLAP(&set->cs_mask, mask)) 576 return (EDEADLK); 577 CPU_COPY(&set->cs_mask, &newmask); 578 CPU_AND(&newmask, mask); 579 } else 580 CPU_COPY(mask, &newmask); 581 error = 0; 582 LIST_FOREACH(nset, &set->cs_children, cs_siblings) 583 if ((error = cpuset_testupdate(nset, &newmask, 1)) != 0) 584 break; 585 return (error); 586 } 587 588 /* 589 * Applies the mask 'mask' without checking for empty sets or permissions. 590 */ 591 static void 592 cpuset_update(struct cpuset *set, cpuset_t *mask) 593 { 594 struct cpuset *nset; 595 596 mtx_assert(&cpuset_lock, MA_OWNED); 597 CPU_AND(&set->cs_mask, mask); 598 LIST_FOREACH(nset, &set->cs_children, cs_siblings) 599 cpuset_update(nset, &set->cs_mask); 600 601 return; 602 } 603 604 /* 605 * Modify the set 'set' to use a copy of the mask provided. Apply this new 606 * mask to restrict all children in the tree. Checks for validity before 607 * applying the changes. 608 */ 609 static int 610 cpuset_modify(struct cpuset *set, cpuset_t *mask) 611 { 612 struct cpuset *root; 613 int error; 614 615 error = priv_check(curthread, PRIV_SCHED_CPUSET); 616 if (error) 617 return (error); 618 /* 619 * In case we are called from within the jail 620 * we do not allow modifying the dedicated root 621 * cpuset of the jail but may still allow to 622 * change child sets. 623 */ 624 if (jailed(curthread->td_ucred) && 625 set->cs_flags & CPU_SET_ROOT) 626 return (EPERM); 627 /* 628 * Verify that we have access to this set of 629 * cpus. 630 */ 631 root = cpuset_getroot(set); 632 mtx_lock_spin(&cpuset_lock); 633 if (root && !CPU_SUBSET(&root->cs_mask, mask)) { 634 error = EINVAL; 635 goto out; 636 } 637 error = cpuset_testupdate(set, mask, 0); 638 if (error) 639 goto out; 640 CPU_COPY(mask, &set->cs_mask); 641 cpuset_update(set, mask); 642 out: 643 mtx_unlock_spin(&cpuset_lock); 644 645 return (error); 646 } 647 648 /* 649 * Recursively check for errors that would occur from applying mask to 650 * the tree of sets starting at 'set'. Checks for sets that would become 651 * empty as well as RDONLY flags. 652 */ 653 static int 654 cpuset_testupdate_domain(struct cpuset *set, struct domainset *dset, 655 struct domainset *orig, int *count, int check_mask) 656 { 657 struct cpuset *nset; 658 struct domainset *domain; 659 struct domainset newset; 660 int error; 661 662 mtx_assert(&cpuset_lock, MA_OWNED); 663 if (set->cs_flags & CPU_SET_RDONLY) 664 return (EPERM); 665 domain = set->cs_domain; 666 domainset_copy(domain, &newset); 667 if (!domainset_equal(domain, orig)) { 668 if (!domainset_restrict(domain, dset)) 669 return (EDEADLK); 670 DOMAINSET_AND(&newset.ds_mask, &dset->ds_mask); 671 /* Count the number of domains that are changing. */ 672 (*count)++; 673 } 674 error = 0; 675 LIST_FOREACH(nset, &set->cs_children, cs_siblings) 676 if ((error = cpuset_testupdate_domain(nset, &newset, domain, 677 count, 1)) != 0) 678 break; 679 return (error); 680 } 681 682 /* 683 * Applies the mask 'mask' without checking for empty sets or permissions. 684 */ 685 static void 686 cpuset_update_domain(struct cpuset *set, struct domainset *domain, 687 struct domainset *orig, struct domainlist *domains) 688 { 689 struct cpuset *nset; 690 691 mtx_assert(&cpuset_lock, MA_OWNED); 692 /* 693 * If this domainset has changed from the parent we must calculate 694 * a new set. Otherwise it simply inherits from the parent. When 695 * we inherit from the parent we get a new mask and policy. If the 696 * set is modified from the parent we keep the policy and only 697 * update the mask. 698 */ 699 if (set->cs_domain != orig) { 700 orig = set->cs_domain; 701 set->cs_domain = domainset_shadow(domain, orig, domains); 702 } else 703 set->cs_domain = domain; 704 LIST_FOREACH(nset, &set->cs_children, cs_siblings) 705 cpuset_update_domain(nset, set->cs_domain, orig, domains); 706 707 return; 708 } 709 710 /* 711 * Modify the set 'set' to use a copy the domainset provided. Apply this new 712 * mask to restrict all children in the tree. Checks for validity before 713 * applying the changes. 714 */ 715 static int 716 cpuset_modify_domain(struct cpuset *set, struct domainset *domain) 717 { 718 struct domainlist domains; 719 struct domainset temp; 720 struct domainset *dset; 721 struct cpuset *root; 722 int ndomains, needed; 723 int error; 724 725 error = priv_check(curthread, PRIV_SCHED_CPUSET); 726 if (error) 727 return (error); 728 /* 729 * In case we are called from within the jail 730 * we do not allow modifying the dedicated root 731 * cpuset of the jail but may still allow to 732 * change child sets. 733 */ 734 if (jailed(curthread->td_ucred) && 735 set->cs_flags & CPU_SET_ROOT) 736 return (EPERM); 737 domainset_freelist_init(&domains, 0); 738 domain = domainset_create(domain); 739 ndomains = needed = 0; 740 do { 741 if (ndomains < needed) { 742 domainset_freelist_add(&domains, needed - ndomains); 743 ndomains = needed; 744 } 745 root = cpuset_getroot(set); 746 mtx_lock_spin(&cpuset_lock); 747 dset = root->cs_domain; 748 /* 749 * Verify that we have access to this set of domains. 750 */ 751 if (root && !domainset_valid(dset, domain)) { 752 error = EINVAL; 753 goto out; 754 } 755 /* 756 * If applying prefer we keep the current set as the fallback. 757 */ 758 if (domain->ds_policy == DOMAINSET_POLICY_PREFER) 759 DOMAINSET_COPY(&set->cs_domain->ds_mask, 760 &domain->ds_mask); 761 /* 762 * Determine whether we can apply this set of domains and 763 * how many new domain structures it will require. 764 */ 765 domainset_copy(domain, &temp); 766 needed = 0; 767 error = cpuset_testupdate_domain(set, &temp, set->cs_domain, 768 &needed, 0); 769 if (error) 770 goto out; 771 } while (ndomains < needed); 772 dset = set->cs_domain; 773 cpuset_update_domain(set, domain, dset, &domains); 774 out: 775 mtx_unlock_spin(&cpuset_lock); 776 domainset_freelist_free(&domains); 777 if (error == 0) 778 domainset_notify(); 779 780 return (error); 781 } 782 783 /* 784 * Resolve the 'which' parameter of several cpuset apis. 785 * 786 * For WHICH_PID and WHICH_TID return a locked proc and valid proc/tid. Also 787 * checks for permission via p_cansched(). 788 * 789 * For WHICH_SET returns a valid set with a new reference. 790 * 791 * -1 may be supplied for any argument to mean the current proc/thread or 792 * the base set of the current thread. May fail with ESRCH/EPERM. 793 */ 794 int 795 cpuset_which(cpuwhich_t which, id_t id, struct proc **pp, struct thread **tdp, 796 struct cpuset **setp) 797 { 798 struct cpuset *set; 799 struct thread *td; 800 struct proc *p; 801 int error; 802 803 *pp = p = NULL; 804 *tdp = td = NULL; 805 *setp = set = NULL; 806 switch (which) { 807 case CPU_WHICH_PID: 808 if (id == -1) { 809 PROC_LOCK(curproc); 810 p = curproc; 811 break; 812 } 813 if ((p = pfind(id)) == NULL) 814 return (ESRCH); 815 break; 816 case CPU_WHICH_TID: 817 if (id == -1) { 818 PROC_LOCK(curproc); 819 p = curproc; 820 td = curthread; 821 break; 822 } 823 td = tdfind(id, -1); 824 if (td == NULL) 825 return (ESRCH); 826 p = td->td_proc; 827 break; 828 case CPU_WHICH_CPUSET: 829 if (id == -1) { 830 thread_lock(curthread); 831 set = cpuset_refbase(curthread->td_cpuset); 832 thread_unlock(curthread); 833 } else 834 set = cpuset_lookup(id, curthread); 835 if (set) { 836 *setp = set; 837 return (0); 838 } 839 return (ESRCH); 840 case CPU_WHICH_JAIL: 841 { 842 /* Find `set' for prison with given id. */ 843 struct prison *pr; 844 845 sx_slock(&allprison_lock); 846 pr = prison_find_child(curthread->td_ucred->cr_prison, id); 847 sx_sunlock(&allprison_lock); 848 if (pr == NULL) 849 return (ESRCH); 850 cpuset_ref(pr->pr_cpuset); 851 *setp = pr->pr_cpuset; 852 mtx_unlock(&pr->pr_mtx); 853 return (0); 854 } 855 case CPU_WHICH_IRQ: 856 case CPU_WHICH_DOMAIN: 857 return (0); 858 default: 859 return (EINVAL); 860 } 861 error = p_cansched(curthread, p); 862 if (error) { 863 PROC_UNLOCK(p); 864 return (error); 865 } 866 if (td == NULL) 867 td = FIRST_THREAD_IN_PROC(p); 868 *pp = p; 869 *tdp = td; 870 return (0); 871 } 872 873 static int 874 cpuset_testshadow(struct cpuset *set, const cpuset_t *mask, 875 const struct domainset *domain) 876 { 877 struct cpuset *parent; 878 struct domainset *dset; 879 880 parent = cpuset_getbase(set); 881 /* 882 * If we are restricting a cpu mask it must be a subset of the 883 * parent or invalid CPUs have been specified. 884 */ 885 if (mask != NULL && !CPU_SUBSET(&parent->cs_mask, mask)) 886 return (EINVAL); 887 888 /* 889 * If we are restricting a domain mask it must be a subset of the 890 * parent or invalid domains have been specified. 891 */ 892 dset = parent->cs_domain; 893 if (domain != NULL && !domainset_valid(dset, domain)) 894 return (EINVAL); 895 896 return (0); 897 } 898 899 /* 900 * Create an anonymous set with the provided mask in the space provided by 901 * 'nset'. If the passed in set is anonymous we use its parent otherwise 902 * the new set is a child of 'set'. 903 */ 904 static int 905 cpuset_shadow(struct cpuset *set, struct cpuset **nsetp, 906 const cpuset_t *mask, const struct domainset *domain, 907 struct setlist *cpusets, struct domainlist *domains) 908 { 909 struct cpuset *parent; 910 struct cpuset *nset; 911 struct domainset *dset; 912 struct domainset *d; 913 int error; 914 915 error = cpuset_testshadow(set, mask, domain); 916 if (error) 917 return (error); 918 919 parent = cpuset_getbase(set); 920 dset = parent->cs_domain; 921 if (mask == NULL) 922 mask = &set->cs_mask; 923 if (domain != NULL) 924 d = domainset_shadow(dset, domain, domains); 925 else 926 d = set->cs_domain; 927 nset = LIST_FIRST(cpusets); 928 error = _cpuset_create(nset, parent, mask, d, CPUSET_INVALID); 929 if (error == 0) { 930 LIST_REMOVE(nset, cs_link); 931 *nsetp = nset; 932 } 933 return (error); 934 } 935 936 static struct cpuset * 937 cpuset_update_thread(struct thread *td, struct cpuset *nset) 938 { 939 struct cpuset *tdset; 940 941 tdset = td->td_cpuset; 942 td->td_cpuset = nset; 943 td->td_domain.dr_policy = nset->cs_domain; 944 sched_affinity(td); 945 946 return (tdset); 947 } 948 949 static int 950 cpuset_setproc_test_maskthread(struct cpuset *tdset, cpuset_t *mask, 951 struct domainset *domain) 952 { 953 struct cpuset *parent; 954 955 parent = cpuset_getbase(tdset); 956 if (mask == NULL) 957 mask = &tdset->cs_mask; 958 if (domain == NULL) 959 domain = tdset->cs_domain; 960 return cpuset_testshadow(parent, mask, domain); 961 } 962 963 static int 964 cpuset_setproc_maskthread(struct cpuset *tdset, cpuset_t *mask, 965 struct domainset *domain, struct cpuset **nsetp, 966 struct setlist *freelist, struct domainlist *domainlist) 967 { 968 struct cpuset *parent; 969 970 parent = cpuset_getbase(tdset); 971 if (mask == NULL) 972 mask = &tdset->cs_mask; 973 if (domain == NULL) 974 domain = tdset->cs_domain; 975 return cpuset_shadow(parent, nsetp, mask, domain, freelist, 976 domainlist); 977 } 978 979 static int 980 cpuset_setproc_setthread_mask(struct cpuset *tdset, struct cpuset *set, 981 cpuset_t *mask, struct domainset *domain) 982 { 983 struct cpuset *parent; 984 985 parent = cpuset_getbase(tdset); 986 987 /* 988 * If the thread restricted its mask then apply that same 989 * restriction to the new set, otherwise take it wholesale. 990 */ 991 if (CPU_CMP(&tdset->cs_mask, &parent->cs_mask) != 0) { 992 CPU_COPY(&tdset->cs_mask, mask); 993 CPU_AND(mask, &set->cs_mask); 994 } else 995 CPU_COPY(&set->cs_mask, mask); 996 997 /* 998 * If the thread restricted the domain then we apply the 999 * restriction to the new set but retain the policy. 1000 */ 1001 if (tdset->cs_domain != parent->cs_domain) { 1002 domainset_copy(tdset->cs_domain, domain); 1003 DOMAINSET_AND(&domain->ds_mask, &set->cs_domain->ds_mask); 1004 } else 1005 domainset_copy(set->cs_domain, domain); 1006 1007 if (CPU_EMPTY(mask) || DOMAINSET_EMPTY(&domain->ds_mask)) 1008 return (EDEADLK); 1009 1010 return (0); 1011 } 1012 1013 static int 1014 cpuset_setproc_test_setthread(struct cpuset *tdset, struct cpuset *set) 1015 { 1016 struct domainset domain; 1017 cpuset_t mask; 1018 1019 if (tdset->cs_id != CPUSET_INVALID) 1020 return (0); 1021 return cpuset_setproc_setthread_mask(tdset, set, &mask, &domain); 1022 } 1023 1024 static int 1025 cpuset_setproc_setthread(struct cpuset *tdset, struct cpuset *set, 1026 struct cpuset **nsetp, struct setlist *freelist, 1027 struct domainlist *domainlist) 1028 { 1029 struct domainset domain; 1030 cpuset_t mask; 1031 int error; 1032 1033 /* 1034 * If we're replacing on a thread that has not constrained the 1035 * original set we can simply accept the new set. 1036 */ 1037 if (tdset->cs_id != CPUSET_INVALID) { 1038 *nsetp = cpuset_ref(set); 1039 return (0); 1040 } 1041 error = cpuset_setproc_setthread_mask(tdset, set, &mask, &domain); 1042 if (error) 1043 return (error); 1044 1045 return cpuset_shadow(tdset, nsetp, &mask, &domain, freelist, 1046 domainlist); 1047 } 1048 1049 /* 1050 * Handle three cases for updating an entire process. 1051 * 1052 * 1) Set is non-null. This reparents all anonymous sets to the provided 1053 * set and replaces all non-anonymous td_cpusets with the provided set. 1054 * 2) Mask is non-null. This replaces or creates anonymous sets for every 1055 * thread with the existing base as a parent. 1056 * 3) domain is non-null. This creates anonymous sets for every thread 1057 * and replaces the domain set. 1058 * 1059 * This is overly complicated because we can't allocate while holding a 1060 * spinlock and spinlocks must be held while changing and examining thread 1061 * state. 1062 */ 1063 static int 1064 cpuset_setproc(pid_t pid, struct cpuset *set, cpuset_t *mask, 1065 struct domainset *domain) 1066 { 1067 struct setlist freelist; 1068 struct setlist droplist; 1069 struct domainlist domainlist; 1070 struct cpuset *nset; 1071 struct thread *td; 1072 struct proc *p; 1073 int threads; 1074 int nfree; 1075 int error; 1076 1077 /* 1078 * The algorithm requires two passes due to locking considerations. 1079 * 1080 * 1) Lookup the process and acquire the locks in the required order. 1081 * 2) If enough cpusets have not been allocated release the locks and 1082 * allocate them. Loop. 1083 */ 1084 cpuset_freelist_init(&freelist, 1); 1085 domainset_freelist_init(&domainlist, 1); 1086 nfree = 1; 1087 LIST_INIT(&droplist); 1088 nfree = 0; 1089 for (;;) { 1090 error = cpuset_which(CPU_WHICH_PID, pid, &p, &td, &nset); 1091 if (error) 1092 goto out; 1093 if (nfree >= p->p_numthreads) 1094 break; 1095 threads = p->p_numthreads; 1096 PROC_UNLOCK(p); 1097 if (nfree < threads) { 1098 cpuset_freelist_add(&freelist, threads - nfree); 1099 domainset_freelist_add(&domainlist, threads - nfree); 1100 nfree = threads; 1101 } 1102 } 1103 PROC_LOCK_ASSERT(p, MA_OWNED); 1104 /* 1105 * Now that the appropriate locks are held and we have enough cpusets, 1106 * make sure the operation will succeed before applying changes. The 1107 * proc lock prevents td_cpuset from changing between calls. 1108 */ 1109 error = 0; 1110 FOREACH_THREAD_IN_PROC(p, td) { 1111 thread_lock(td); 1112 if (set != NULL) 1113 error = cpuset_setproc_test_setthread(td->td_cpuset, 1114 set); 1115 else 1116 error = cpuset_setproc_test_maskthread(td->td_cpuset, 1117 mask, domain); 1118 thread_unlock(td); 1119 if (error) 1120 goto unlock_out; 1121 } 1122 /* 1123 * Replace each thread's cpuset while using deferred release. We 1124 * must do this because the thread lock must be held while operating 1125 * on the thread and this limits the type of operations allowed. 1126 */ 1127 FOREACH_THREAD_IN_PROC(p, td) { 1128 thread_lock(td); 1129 if (set != NULL) 1130 error = cpuset_setproc_setthread(td->td_cpuset, set, 1131 &nset, &freelist, &domainlist); 1132 else 1133 error = cpuset_setproc_maskthread(td->td_cpuset, mask, 1134 domain, &nset, &freelist, &domainlist); 1135 if (error) { 1136 thread_unlock(td); 1137 break; 1138 } 1139 cpuset_rel_defer(&droplist, cpuset_update_thread(td, nset)); 1140 thread_unlock(td); 1141 } 1142 unlock_out: 1143 PROC_UNLOCK(p); 1144 out: 1145 while ((nset = LIST_FIRST(&droplist)) != NULL) 1146 cpuset_rel_complete(nset); 1147 cpuset_freelist_free(&freelist); 1148 domainset_freelist_free(&domainlist); 1149 return (error); 1150 } 1151 1152 static int 1153 bitset_strprint(char *buf, size_t bufsiz, const struct bitset *set, int setlen) 1154 { 1155 size_t bytes; 1156 int i, once; 1157 char *p; 1158 1159 once = 0; 1160 p = buf; 1161 for (i = 0; i < __bitset_words(setlen); i++) { 1162 if (once != 0) { 1163 if (bufsiz < 1) 1164 return (0); 1165 *p = ','; 1166 p++; 1167 bufsiz--; 1168 } else 1169 once = 1; 1170 if (bufsiz < sizeof(__STRING(ULONG_MAX))) 1171 return (0); 1172 bytes = snprintf(p, bufsiz, "%lx", set->__bits[i]); 1173 p += bytes; 1174 bufsiz -= bytes; 1175 } 1176 return (p - buf); 1177 } 1178 1179 static int 1180 bitset_strscan(struct bitset *set, int setlen, const char *buf) 1181 { 1182 int i, ret; 1183 const char *p; 1184 1185 BIT_ZERO(setlen, set); 1186 p = buf; 1187 for (i = 0; i < __bitset_words(setlen); i++) { 1188 if (*p == ',') { 1189 p++; 1190 continue; 1191 } 1192 ret = sscanf(p, "%lx", &set->__bits[i]); 1193 if (ret == 0 || ret == -1) 1194 break; 1195 while (isxdigit(*p)) 1196 p++; 1197 } 1198 return (p - buf); 1199 } 1200 1201 /* 1202 * Return a string representing a valid layout for a cpuset_t object. 1203 * It expects an incoming buffer at least sized as CPUSETBUFSIZ. 1204 */ 1205 char * 1206 cpusetobj_strprint(char *buf, const cpuset_t *set) 1207 { 1208 1209 bitset_strprint(buf, CPUSETBUFSIZ, (const struct bitset *)set, 1210 CPU_SETSIZE); 1211 return (buf); 1212 } 1213 1214 /* 1215 * Build a valid cpuset_t object from a string representation. 1216 * It expects an incoming buffer at least sized as CPUSETBUFSIZ. 1217 */ 1218 int 1219 cpusetobj_strscan(cpuset_t *set, const char *buf) 1220 { 1221 char p; 1222 1223 if (strlen(buf) > CPUSETBUFSIZ - 1) 1224 return (-1); 1225 1226 p = buf[bitset_strscan((struct bitset *)set, CPU_SETSIZE, buf)]; 1227 if (p != '\0') 1228 return (-1); 1229 1230 return (0); 1231 } 1232 1233 /* 1234 * Handle a domainset specifier in the sysctl tree. A poiner to a pointer to 1235 * a domainset is in arg1. If the user specifies a valid domainset the 1236 * pointer is updated. 1237 * 1238 * Format is: 1239 * hex mask word 0,hex mask word 1,...:decimal policy:decimal preferred 1240 */ 1241 int 1242 sysctl_handle_domainset(SYSCTL_HANDLER_ARGS) 1243 { 1244 char buf[DOMAINSETBUFSIZ]; 1245 struct domainset *dset; 1246 struct domainset key; 1247 int policy, prefer, error; 1248 char *p; 1249 1250 dset = *(struct domainset **)arg1; 1251 error = 0; 1252 1253 if (dset != NULL) { 1254 p = buf + bitset_strprint(buf, DOMAINSETBUFSIZ, 1255 (const struct bitset *)&dset->ds_mask, DOMAINSET_SETSIZE); 1256 sprintf(p, ":%d:%d", dset->ds_policy, dset->ds_prefer); 1257 } else 1258 sprintf(buf, "<NULL>"); 1259 error = sysctl_handle_string(oidp, buf, sizeof(buf), req); 1260 if (error != 0 || req->newptr == NULL) 1261 return (error); 1262 1263 /* 1264 * Read in and validate the string. 1265 */ 1266 memset(&key, 0, sizeof(key)); 1267 p = &buf[bitset_strscan((struct bitset *)&key.ds_mask, 1268 DOMAINSET_SETSIZE, buf)]; 1269 if (p == buf) 1270 return (EINVAL); 1271 if (sscanf(p, ":%d:%d", &policy, &prefer) != 2) 1272 return (EINVAL); 1273 key.ds_policy = policy; 1274 key.ds_prefer = prefer; 1275 1276 /* Domainset_create() validates the policy.*/ 1277 dset = domainset_create(&key); 1278 if (dset == NULL) 1279 return (EINVAL); 1280 *(struct domainset **)arg1 = dset; 1281 1282 return (error); 1283 } 1284 1285 /* 1286 * Apply an anonymous mask or a domain to a single thread. 1287 */ 1288 static int 1289 _cpuset_setthread(lwpid_t id, cpuset_t *mask, struct domainset *domain) 1290 { 1291 struct setlist cpusets; 1292 struct domainlist domainlist; 1293 struct cpuset *nset; 1294 struct cpuset *set; 1295 struct thread *td; 1296 struct proc *p; 1297 int error; 1298 1299 cpuset_freelist_init(&cpusets, 1); 1300 domainset_freelist_init(&domainlist, domain != NULL); 1301 error = cpuset_which(CPU_WHICH_TID, id, &p, &td, &set); 1302 if (error) 1303 goto out; 1304 set = NULL; 1305 thread_lock(td); 1306 error = cpuset_shadow(td->td_cpuset, &nset, mask, domain, 1307 &cpusets, &domainlist); 1308 if (error == 0) 1309 set = cpuset_update_thread(td, nset); 1310 thread_unlock(td); 1311 PROC_UNLOCK(p); 1312 if (set) 1313 cpuset_rel(set); 1314 out: 1315 cpuset_freelist_free(&cpusets); 1316 domainset_freelist_free(&domainlist); 1317 return (error); 1318 } 1319 1320 /* 1321 * Apply an anonymous mask to a single thread. 1322 */ 1323 int 1324 cpuset_setthread(lwpid_t id, cpuset_t *mask) 1325 { 1326 1327 return _cpuset_setthread(id, mask, NULL); 1328 } 1329 1330 /* 1331 * Apply new cpumask to the ithread. 1332 */ 1333 int 1334 cpuset_setithread(lwpid_t id, int cpu) 1335 { 1336 cpuset_t mask; 1337 1338 CPU_ZERO(&mask); 1339 if (cpu == NOCPU) 1340 CPU_COPY(cpuset_root, &mask); 1341 else 1342 CPU_SET(cpu, &mask); 1343 return _cpuset_setthread(id, &mask, NULL); 1344 } 1345 1346 /* 1347 * Create the domainset for cpuset 0, 1 and cpuset 2. 1348 */ 1349 void 1350 domainset_zero(void) 1351 { 1352 struct domainset *dset; 1353 int i; 1354 1355 mtx_init(&cpuset_lock, "cpuset", NULL, MTX_SPIN | MTX_RECURSE); 1356 1357 dset = &domainset0; 1358 DOMAINSET_ZERO(&dset->ds_mask); 1359 for (i = 0; i < vm_ndomains; i++) 1360 DOMAINSET_SET(i, &dset->ds_mask); 1361 dset->ds_policy = DOMAINSET_POLICY_FIRSTTOUCH; 1362 dset->ds_prefer = -1; 1363 curthread->td_domain.dr_policy = _domainset_create(dset, NULL); 1364 1365 domainset_copy(dset, &domainset2); 1366 domainset2.ds_policy = DOMAINSET_POLICY_INTERLEAVE; 1367 kernel_object->domain.dr_policy = _domainset_create(&domainset2, NULL); 1368 } 1369 1370 /* 1371 * Creates system-wide cpusets and the cpuset for thread0 including three 1372 * sets: 1373 * 1374 * 0 - The root set which should represent all valid processors in the 1375 * system. It is initially created with a mask of all processors 1376 * because we don't know what processors are valid until cpuset_init() 1377 * runs. This set is immutable. 1378 * 1 - The default set which all processes are a member of until changed. 1379 * This allows an administrator to move all threads off of given cpus to 1380 * dedicate them to high priority tasks or save power etc. 1381 * 2 - The kernel set which allows restriction and policy to be applied only 1382 * to kernel threads and the kernel_object. 1383 */ 1384 struct cpuset * 1385 cpuset_thread0(void) 1386 { 1387 struct cpuset *set; 1388 int i; 1389 int error __unused; 1390 1391 cpuset_zone = uma_zcreate("cpuset", sizeof(struct cpuset), NULL, NULL, 1392 NULL, NULL, UMA_ALIGN_CACHE, 0); 1393 domainset_zone = uma_zcreate("domainset", sizeof(struct domainset), 1394 NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0); 1395 1396 /* 1397 * Create the root system set (0) for the whole machine. Doesn't use 1398 * cpuset_create() due to NULL parent. 1399 */ 1400 set = uma_zalloc(cpuset_zone, M_WAITOK | M_ZERO); 1401 CPU_COPY(&all_cpus, &set->cs_mask); 1402 LIST_INIT(&set->cs_children); 1403 LIST_INSERT_HEAD(&cpuset_ids, set, cs_link); 1404 set->cs_ref = 1; 1405 set->cs_flags = CPU_SET_ROOT | CPU_SET_RDONLY; 1406 set->cs_domain = &domainset0; 1407 cpuset_zero = set; 1408 cpuset_root = &set->cs_mask; 1409 1410 /* 1411 * Now derive a default (1), modifiable set from that to give out. 1412 */ 1413 set = uma_zalloc(cpuset_zone, M_WAITOK | M_ZERO); 1414 error = _cpuset_create(set, cpuset_zero, NULL, NULL, 1); 1415 KASSERT(error == 0, ("Error creating default set: %d\n", error)); 1416 cpuset_default = set; 1417 /* 1418 * Create the kernel set (2). 1419 */ 1420 set = uma_zalloc(cpuset_zone, M_WAITOK | M_ZERO); 1421 error = _cpuset_create(set, cpuset_zero, NULL, NULL, 2); 1422 KASSERT(error == 0, ("Error creating kernel set: %d\n", error)); 1423 set->cs_domain = &domainset2; 1424 cpuset_kernel = set; 1425 1426 /* 1427 * Initialize the unit allocator. 0 and 1 are allocated above. 1428 */ 1429 cpuset_unr = new_unrhdr(2, INT_MAX, NULL); 1430 1431 /* 1432 * If MD code has not initialized per-domain cpusets, place all 1433 * CPUs in domain 0. 1434 */ 1435 for (i = 0; i < MAXMEMDOM; i++) 1436 if (!CPU_EMPTY(&cpuset_domain[i])) 1437 goto domains_set; 1438 CPU_COPY(&all_cpus, &cpuset_domain[0]); 1439 domains_set: 1440 1441 return (cpuset_default); 1442 } 1443 1444 void 1445 cpuset_kernthread(struct thread *td) 1446 { 1447 struct cpuset *set; 1448 1449 thread_lock(td); 1450 set = td->td_cpuset; 1451 td->td_cpuset = cpuset_ref(cpuset_kernel); 1452 thread_unlock(td); 1453 cpuset_rel(set); 1454 } 1455 1456 /* 1457 * Create a cpuset, which would be cpuset_create() but 1458 * mark the new 'set' as root. 1459 * 1460 * We are not going to reparent the td to it. Use cpuset_setproc_update_set() 1461 * for that. 1462 * 1463 * In case of no error, returns the set in *setp locked with a reference. 1464 */ 1465 int 1466 cpuset_create_root(struct prison *pr, struct cpuset **setp) 1467 { 1468 struct cpuset *set; 1469 int error; 1470 1471 KASSERT(pr != NULL, ("[%s:%d] invalid pr", __func__, __LINE__)); 1472 KASSERT(setp != NULL, ("[%s:%d] invalid setp", __func__, __LINE__)); 1473 1474 error = cpuset_create(setp, pr->pr_cpuset, &pr->pr_cpuset->cs_mask); 1475 if (error) 1476 return (error); 1477 1478 KASSERT(*setp != NULL, ("[%s:%d] cpuset_create returned invalid data", 1479 __func__, __LINE__)); 1480 1481 /* Mark the set as root. */ 1482 set = *setp; 1483 set->cs_flags |= CPU_SET_ROOT; 1484 1485 return (0); 1486 } 1487 1488 int 1489 cpuset_setproc_update_set(struct proc *p, struct cpuset *set) 1490 { 1491 int error; 1492 1493 KASSERT(p != NULL, ("[%s:%d] invalid proc", __func__, __LINE__)); 1494 KASSERT(set != NULL, ("[%s:%d] invalid set", __func__, __LINE__)); 1495 1496 cpuset_ref(set); 1497 error = cpuset_setproc(p->p_pid, set, NULL, NULL); 1498 if (error) 1499 return (error); 1500 cpuset_rel(set); 1501 return (0); 1502 } 1503 1504 #ifndef _SYS_SYSPROTO_H_ 1505 struct cpuset_args { 1506 cpusetid_t *setid; 1507 }; 1508 #endif 1509 int 1510 sys_cpuset(struct thread *td, struct cpuset_args *uap) 1511 { 1512 struct cpuset *root; 1513 struct cpuset *set; 1514 int error; 1515 1516 thread_lock(td); 1517 root = cpuset_refroot(td->td_cpuset); 1518 thread_unlock(td); 1519 error = cpuset_create(&set, root, &root->cs_mask); 1520 cpuset_rel(root); 1521 if (error) 1522 return (error); 1523 error = copyout(&set->cs_id, uap->setid, sizeof(set->cs_id)); 1524 if (error == 0) 1525 error = cpuset_setproc(-1, set, NULL, NULL); 1526 cpuset_rel(set); 1527 return (error); 1528 } 1529 1530 #ifndef _SYS_SYSPROTO_H_ 1531 struct cpuset_setid_args { 1532 cpuwhich_t which; 1533 id_t id; 1534 cpusetid_t setid; 1535 }; 1536 #endif 1537 int 1538 sys_cpuset_setid(struct thread *td, struct cpuset_setid_args *uap) 1539 { 1540 1541 return (kern_cpuset_setid(td, uap->which, uap->id, uap->setid)); 1542 } 1543 1544 int 1545 kern_cpuset_setid(struct thread *td, cpuwhich_t which, 1546 id_t id, cpusetid_t setid) 1547 { 1548 struct cpuset *set; 1549 int error; 1550 1551 /* 1552 * Presently we only support per-process sets. 1553 */ 1554 if (which != CPU_WHICH_PID) 1555 return (EINVAL); 1556 set = cpuset_lookup(setid, td); 1557 if (set == NULL) 1558 return (ESRCH); 1559 error = cpuset_setproc(id, set, NULL, NULL); 1560 cpuset_rel(set); 1561 return (error); 1562 } 1563 1564 #ifndef _SYS_SYSPROTO_H_ 1565 struct cpuset_getid_args { 1566 cpulevel_t level; 1567 cpuwhich_t which; 1568 id_t id; 1569 cpusetid_t *setid; 1570 }; 1571 #endif 1572 int 1573 sys_cpuset_getid(struct thread *td, struct cpuset_getid_args *uap) 1574 { 1575 1576 return (kern_cpuset_getid(td, uap->level, uap->which, uap->id, 1577 uap->setid)); 1578 } 1579 1580 int 1581 kern_cpuset_getid(struct thread *td, cpulevel_t level, cpuwhich_t which, 1582 id_t id, cpusetid_t *setid) 1583 { 1584 struct cpuset *nset; 1585 struct cpuset *set; 1586 struct thread *ttd; 1587 struct proc *p; 1588 cpusetid_t tmpid; 1589 int error; 1590 1591 if (level == CPU_LEVEL_WHICH && which != CPU_WHICH_CPUSET) 1592 return (EINVAL); 1593 error = cpuset_which(which, id, &p, &ttd, &set); 1594 if (error) 1595 return (error); 1596 switch (which) { 1597 case CPU_WHICH_TID: 1598 case CPU_WHICH_PID: 1599 thread_lock(ttd); 1600 set = cpuset_refbase(ttd->td_cpuset); 1601 thread_unlock(ttd); 1602 PROC_UNLOCK(p); 1603 break; 1604 case CPU_WHICH_CPUSET: 1605 case CPU_WHICH_JAIL: 1606 break; 1607 case CPU_WHICH_IRQ: 1608 case CPU_WHICH_DOMAIN: 1609 return (EINVAL); 1610 } 1611 switch (level) { 1612 case CPU_LEVEL_ROOT: 1613 nset = cpuset_refroot(set); 1614 cpuset_rel(set); 1615 set = nset; 1616 break; 1617 case CPU_LEVEL_CPUSET: 1618 break; 1619 case CPU_LEVEL_WHICH: 1620 break; 1621 } 1622 tmpid = set->cs_id; 1623 cpuset_rel(set); 1624 if (error == 0) 1625 error = copyout(&tmpid, setid, sizeof(tmpid)); 1626 1627 return (error); 1628 } 1629 1630 #ifndef _SYS_SYSPROTO_H_ 1631 struct cpuset_getaffinity_args { 1632 cpulevel_t level; 1633 cpuwhich_t which; 1634 id_t id; 1635 size_t cpusetsize; 1636 cpuset_t *mask; 1637 }; 1638 #endif 1639 int 1640 sys_cpuset_getaffinity(struct thread *td, struct cpuset_getaffinity_args *uap) 1641 { 1642 1643 return (kern_cpuset_getaffinity(td, uap->level, uap->which, 1644 uap->id, uap->cpusetsize, uap->mask)); 1645 } 1646 1647 int 1648 kern_cpuset_getaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which, 1649 id_t id, size_t cpusetsize, cpuset_t *maskp) 1650 { 1651 struct thread *ttd; 1652 struct cpuset *nset; 1653 struct cpuset *set; 1654 struct proc *p; 1655 cpuset_t *mask; 1656 int error; 1657 size_t size; 1658 1659 if (cpusetsize < sizeof(cpuset_t) || cpusetsize > CPU_MAXSIZE / NBBY) 1660 return (ERANGE); 1661 /* In Capability mode, you can only get your own CPU set. */ 1662 if (IN_CAPABILITY_MODE(td)) { 1663 if (level != CPU_LEVEL_WHICH) 1664 return (ECAPMODE); 1665 if (which != CPU_WHICH_TID && which != CPU_WHICH_PID) 1666 return (ECAPMODE); 1667 if (id != -1) 1668 return (ECAPMODE); 1669 } 1670 size = cpusetsize; 1671 mask = malloc(size, M_TEMP, M_WAITOK | M_ZERO); 1672 error = cpuset_which(which, id, &p, &ttd, &set); 1673 if (error) 1674 goto out; 1675 switch (level) { 1676 case CPU_LEVEL_ROOT: 1677 case CPU_LEVEL_CPUSET: 1678 switch (which) { 1679 case CPU_WHICH_TID: 1680 case CPU_WHICH_PID: 1681 thread_lock(ttd); 1682 set = cpuset_ref(ttd->td_cpuset); 1683 thread_unlock(ttd); 1684 break; 1685 case CPU_WHICH_CPUSET: 1686 case CPU_WHICH_JAIL: 1687 break; 1688 case CPU_WHICH_IRQ: 1689 case CPU_WHICH_INTRHANDLER: 1690 case CPU_WHICH_ITHREAD: 1691 case CPU_WHICH_DOMAIN: 1692 error = EINVAL; 1693 goto out; 1694 } 1695 if (level == CPU_LEVEL_ROOT) 1696 nset = cpuset_refroot(set); 1697 else 1698 nset = cpuset_refbase(set); 1699 CPU_COPY(&nset->cs_mask, mask); 1700 cpuset_rel(nset); 1701 break; 1702 case CPU_LEVEL_WHICH: 1703 switch (which) { 1704 case CPU_WHICH_TID: 1705 thread_lock(ttd); 1706 CPU_COPY(&ttd->td_cpuset->cs_mask, mask); 1707 thread_unlock(ttd); 1708 break; 1709 case CPU_WHICH_PID: 1710 FOREACH_THREAD_IN_PROC(p, ttd) { 1711 thread_lock(ttd); 1712 CPU_OR(mask, &ttd->td_cpuset->cs_mask); 1713 thread_unlock(ttd); 1714 } 1715 break; 1716 case CPU_WHICH_CPUSET: 1717 case CPU_WHICH_JAIL: 1718 CPU_COPY(&set->cs_mask, mask); 1719 break; 1720 case CPU_WHICH_IRQ: 1721 case CPU_WHICH_INTRHANDLER: 1722 case CPU_WHICH_ITHREAD: 1723 error = intr_getaffinity(id, which, mask); 1724 break; 1725 case CPU_WHICH_DOMAIN: 1726 if (id < 0 || id >= MAXMEMDOM) 1727 error = ESRCH; 1728 else 1729 CPU_COPY(&cpuset_domain[id], mask); 1730 break; 1731 } 1732 break; 1733 default: 1734 error = EINVAL; 1735 break; 1736 } 1737 if (set) 1738 cpuset_rel(set); 1739 if (p) 1740 PROC_UNLOCK(p); 1741 if (error == 0) 1742 error = copyout(mask, maskp, size); 1743 out: 1744 free(mask, M_TEMP); 1745 return (error); 1746 } 1747 1748 #ifndef _SYS_SYSPROTO_H_ 1749 struct cpuset_setaffinity_args { 1750 cpulevel_t level; 1751 cpuwhich_t which; 1752 id_t id; 1753 size_t cpusetsize; 1754 const cpuset_t *mask; 1755 }; 1756 #endif 1757 int 1758 sys_cpuset_setaffinity(struct thread *td, struct cpuset_setaffinity_args *uap) 1759 { 1760 1761 return (kern_cpuset_setaffinity(td, uap->level, uap->which, 1762 uap->id, uap->cpusetsize, uap->mask)); 1763 } 1764 1765 int 1766 kern_cpuset_setaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which, 1767 id_t id, size_t cpusetsize, const cpuset_t *maskp) 1768 { 1769 struct cpuset *nset; 1770 struct cpuset *set; 1771 struct thread *ttd; 1772 struct proc *p; 1773 cpuset_t *mask; 1774 int error; 1775 1776 if (cpusetsize < sizeof(cpuset_t) || cpusetsize > CPU_MAXSIZE / NBBY) 1777 return (ERANGE); 1778 /* In Capability mode, you can only set your own CPU set. */ 1779 if (IN_CAPABILITY_MODE(td)) { 1780 if (level != CPU_LEVEL_WHICH) 1781 return (ECAPMODE); 1782 if (which != CPU_WHICH_TID && which != CPU_WHICH_PID) 1783 return (ECAPMODE); 1784 if (id != -1) 1785 return (ECAPMODE); 1786 } 1787 mask = malloc(cpusetsize, M_TEMP, M_WAITOK | M_ZERO); 1788 error = copyin(maskp, mask, cpusetsize); 1789 if (error) 1790 goto out; 1791 /* 1792 * Verify that no high bits are set. 1793 */ 1794 if (cpusetsize > sizeof(cpuset_t)) { 1795 char *end; 1796 char *cp; 1797 1798 end = cp = (char *)&mask->__bits; 1799 end += cpusetsize; 1800 cp += sizeof(cpuset_t); 1801 while (cp != end) 1802 if (*cp++ != 0) { 1803 error = EINVAL; 1804 goto out; 1805 } 1806 1807 } 1808 switch (level) { 1809 case CPU_LEVEL_ROOT: 1810 case CPU_LEVEL_CPUSET: 1811 error = cpuset_which(which, id, &p, &ttd, &set); 1812 if (error) 1813 break; 1814 switch (which) { 1815 case CPU_WHICH_TID: 1816 case CPU_WHICH_PID: 1817 thread_lock(ttd); 1818 set = cpuset_ref(ttd->td_cpuset); 1819 thread_unlock(ttd); 1820 PROC_UNLOCK(p); 1821 break; 1822 case CPU_WHICH_CPUSET: 1823 case CPU_WHICH_JAIL: 1824 break; 1825 case CPU_WHICH_IRQ: 1826 case CPU_WHICH_INTRHANDLER: 1827 case CPU_WHICH_ITHREAD: 1828 case CPU_WHICH_DOMAIN: 1829 error = EINVAL; 1830 goto out; 1831 } 1832 if (level == CPU_LEVEL_ROOT) 1833 nset = cpuset_refroot(set); 1834 else 1835 nset = cpuset_refbase(set); 1836 error = cpuset_modify(nset, mask); 1837 cpuset_rel(nset); 1838 cpuset_rel(set); 1839 break; 1840 case CPU_LEVEL_WHICH: 1841 switch (which) { 1842 case CPU_WHICH_TID: 1843 error = cpuset_setthread(id, mask); 1844 break; 1845 case CPU_WHICH_PID: 1846 error = cpuset_setproc(id, NULL, mask, NULL); 1847 break; 1848 case CPU_WHICH_CPUSET: 1849 case CPU_WHICH_JAIL: 1850 error = cpuset_which(which, id, &p, &ttd, &set); 1851 if (error == 0) { 1852 error = cpuset_modify(set, mask); 1853 cpuset_rel(set); 1854 } 1855 break; 1856 case CPU_WHICH_IRQ: 1857 case CPU_WHICH_INTRHANDLER: 1858 case CPU_WHICH_ITHREAD: 1859 error = intr_setaffinity(id, which, mask); 1860 break; 1861 default: 1862 error = EINVAL; 1863 break; 1864 } 1865 break; 1866 default: 1867 error = EINVAL; 1868 break; 1869 } 1870 out: 1871 free(mask, M_TEMP); 1872 return (error); 1873 } 1874 1875 #ifndef _SYS_SYSPROTO_H_ 1876 struct cpuset_getdomain_args { 1877 cpulevel_t level; 1878 cpuwhich_t which; 1879 id_t id; 1880 size_t domainsetsize; 1881 domainset_t *mask; 1882 int *policy; 1883 }; 1884 #endif 1885 int 1886 sys_cpuset_getdomain(struct thread *td, struct cpuset_getdomain_args *uap) 1887 { 1888 1889 return (kern_cpuset_getdomain(td, uap->level, uap->which, 1890 uap->id, uap->domainsetsize, uap->mask, uap->policy)); 1891 } 1892 1893 int 1894 kern_cpuset_getdomain(struct thread *td, cpulevel_t level, cpuwhich_t which, 1895 id_t id, size_t domainsetsize, domainset_t *maskp, int *policyp) 1896 { 1897 struct domainset outset; 1898 struct thread *ttd; 1899 struct cpuset *nset; 1900 struct cpuset *set; 1901 struct domainset *dset; 1902 struct proc *p; 1903 domainset_t *mask; 1904 int error; 1905 1906 if (domainsetsize < sizeof(domainset_t) || 1907 domainsetsize > DOMAINSET_MAXSIZE / NBBY) 1908 return (ERANGE); 1909 /* In Capability mode, you can only get your own domain set. */ 1910 if (IN_CAPABILITY_MODE(td)) { 1911 if (level != CPU_LEVEL_WHICH) 1912 return (ECAPMODE); 1913 if (which != CPU_WHICH_TID && which != CPU_WHICH_PID) 1914 return (ECAPMODE); 1915 if (id != -1) 1916 return (ECAPMODE); 1917 } 1918 mask = malloc(domainsetsize, M_TEMP, M_WAITOK | M_ZERO); 1919 bzero(&outset, sizeof(outset)); 1920 error = cpuset_which(which, id, &p, &ttd, &set); 1921 if (error) 1922 goto out; 1923 switch (level) { 1924 case CPU_LEVEL_ROOT: 1925 case CPU_LEVEL_CPUSET: 1926 switch (which) { 1927 case CPU_WHICH_TID: 1928 case CPU_WHICH_PID: 1929 thread_lock(ttd); 1930 set = cpuset_ref(ttd->td_cpuset); 1931 thread_unlock(ttd); 1932 break; 1933 case CPU_WHICH_CPUSET: 1934 case CPU_WHICH_JAIL: 1935 break; 1936 case CPU_WHICH_IRQ: 1937 case CPU_WHICH_INTRHANDLER: 1938 case CPU_WHICH_ITHREAD: 1939 case CPU_WHICH_DOMAIN: 1940 error = EINVAL; 1941 goto out; 1942 } 1943 if (level == CPU_LEVEL_ROOT) 1944 nset = cpuset_refroot(set); 1945 else 1946 nset = cpuset_refbase(set); 1947 domainset_copy(nset->cs_domain, &outset); 1948 cpuset_rel(nset); 1949 break; 1950 case CPU_LEVEL_WHICH: 1951 switch (which) { 1952 case CPU_WHICH_TID: 1953 thread_lock(ttd); 1954 domainset_copy(ttd->td_cpuset->cs_domain, &outset); 1955 thread_unlock(ttd); 1956 break; 1957 case CPU_WHICH_PID: 1958 FOREACH_THREAD_IN_PROC(p, ttd) { 1959 thread_lock(ttd); 1960 dset = ttd->td_cpuset->cs_domain; 1961 /* Show all domains in the proc. */ 1962 DOMAINSET_OR(&outset.ds_mask, &dset->ds_mask); 1963 /* Last policy wins. */ 1964 outset.ds_policy = dset->ds_policy; 1965 outset.ds_prefer = dset->ds_prefer; 1966 thread_unlock(ttd); 1967 } 1968 break; 1969 case CPU_WHICH_CPUSET: 1970 case CPU_WHICH_JAIL: 1971 domainset_copy(set->cs_domain, &outset); 1972 break; 1973 case CPU_WHICH_IRQ: 1974 case CPU_WHICH_INTRHANDLER: 1975 case CPU_WHICH_ITHREAD: 1976 case CPU_WHICH_DOMAIN: 1977 error = EINVAL; 1978 break; 1979 } 1980 break; 1981 default: 1982 error = EINVAL; 1983 break; 1984 } 1985 if (set) 1986 cpuset_rel(set); 1987 if (p) 1988 PROC_UNLOCK(p); 1989 /* 1990 * Translate prefer into a set containing only the preferred domain, 1991 * not the entire fallback set. 1992 */ 1993 if (outset.ds_policy == DOMAINSET_POLICY_PREFER) { 1994 DOMAINSET_ZERO(&outset.ds_mask); 1995 DOMAINSET_SET(outset.ds_prefer, &outset.ds_mask); 1996 } 1997 DOMAINSET_COPY(&outset.ds_mask, mask); 1998 if (error == 0) 1999 error = copyout(mask, maskp, domainsetsize); 2000 if (error == 0) 2001 if (suword32(policyp, outset.ds_policy) != 0) 2002 error = EFAULT; 2003 out: 2004 free(mask, M_TEMP); 2005 return (error); 2006 } 2007 2008 #ifndef _SYS_SYSPROTO_H_ 2009 struct cpuset_setdomain_args { 2010 cpulevel_t level; 2011 cpuwhich_t which; 2012 id_t id; 2013 size_t domainsetsize; 2014 domainset_t *mask; 2015 int policy; 2016 }; 2017 #endif 2018 int 2019 sys_cpuset_setdomain(struct thread *td, struct cpuset_setdomain_args *uap) 2020 { 2021 2022 return (kern_cpuset_setdomain(td, uap->level, uap->which, 2023 uap->id, uap->domainsetsize, uap->mask, uap->policy)); 2024 } 2025 2026 int 2027 kern_cpuset_setdomain(struct thread *td, cpulevel_t level, cpuwhich_t which, 2028 id_t id, size_t domainsetsize, const domainset_t *maskp, int policy) 2029 { 2030 struct cpuset *nset; 2031 struct cpuset *set; 2032 struct thread *ttd; 2033 struct proc *p; 2034 struct domainset domain; 2035 domainset_t *mask; 2036 int error; 2037 2038 if (domainsetsize < sizeof(domainset_t) || 2039 domainsetsize > DOMAINSET_MAXSIZE / NBBY) 2040 return (ERANGE); 2041 if (policy <= DOMAINSET_POLICY_INVALID || 2042 policy > DOMAINSET_POLICY_MAX) 2043 return (EINVAL); 2044 /* In Capability mode, you can only set your own CPU set. */ 2045 if (IN_CAPABILITY_MODE(td)) { 2046 if (level != CPU_LEVEL_WHICH) 2047 return (ECAPMODE); 2048 if (which != CPU_WHICH_TID && which != CPU_WHICH_PID) 2049 return (ECAPMODE); 2050 if (id != -1) 2051 return (ECAPMODE); 2052 } 2053 memset(&domain, 0, sizeof(domain)); 2054 mask = malloc(domainsetsize, M_TEMP, M_WAITOK | M_ZERO); 2055 error = copyin(maskp, mask, domainsetsize); 2056 if (error) 2057 goto out; 2058 /* 2059 * Verify that no high bits are set. 2060 */ 2061 if (domainsetsize > sizeof(domainset_t)) { 2062 char *end; 2063 char *cp; 2064 2065 end = cp = (char *)&mask->__bits; 2066 end += domainsetsize; 2067 cp += sizeof(domainset_t); 2068 while (cp != end) 2069 if (*cp++ != 0) { 2070 error = EINVAL; 2071 goto out; 2072 } 2073 2074 } 2075 DOMAINSET_COPY(mask, &domain.ds_mask); 2076 domain.ds_policy = policy; 2077 2078 /* Translate preferred policy into a mask and fallback. */ 2079 if (policy == DOMAINSET_POLICY_PREFER) { 2080 /* Only support a single preferred domain. */ 2081 if (DOMAINSET_COUNT(&domain.ds_mask) != 1) { 2082 error = EINVAL; 2083 goto out; 2084 } 2085 domain.ds_prefer = DOMAINSET_FFS(&domain.ds_mask) - 1; 2086 /* This will be constrained by domainset_shadow(). */ 2087 DOMAINSET_FILL(&domain.ds_mask); 2088 } 2089 2090 switch (level) { 2091 case CPU_LEVEL_ROOT: 2092 case CPU_LEVEL_CPUSET: 2093 error = cpuset_which(which, id, &p, &ttd, &set); 2094 if (error) 2095 break; 2096 switch (which) { 2097 case CPU_WHICH_TID: 2098 case CPU_WHICH_PID: 2099 thread_lock(ttd); 2100 set = cpuset_ref(ttd->td_cpuset); 2101 thread_unlock(ttd); 2102 PROC_UNLOCK(p); 2103 break; 2104 case CPU_WHICH_CPUSET: 2105 case CPU_WHICH_JAIL: 2106 break; 2107 case CPU_WHICH_IRQ: 2108 case CPU_WHICH_INTRHANDLER: 2109 case CPU_WHICH_ITHREAD: 2110 case CPU_WHICH_DOMAIN: 2111 error = EINVAL; 2112 goto out; 2113 } 2114 if (level == CPU_LEVEL_ROOT) 2115 nset = cpuset_refroot(set); 2116 else 2117 nset = cpuset_refbase(set); 2118 error = cpuset_modify_domain(nset, &domain); 2119 cpuset_rel(nset); 2120 cpuset_rel(set); 2121 break; 2122 case CPU_LEVEL_WHICH: 2123 switch (which) { 2124 case CPU_WHICH_TID: 2125 error = _cpuset_setthread(id, NULL, &domain); 2126 break; 2127 case CPU_WHICH_PID: 2128 error = cpuset_setproc(id, NULL, NULL, &domain); 2129 break; 2130 case CPU_WHICH_CPUSET: 2131 case CPU_WHICH_JAIL: 2132 error = cpuset_which(which, id, &p, &ttd, &set); 2133 if (error == 0) { 2134 error = cpuset_modify_domain(set, &domain); 2135 cpuset_rel(set); 2136 } 2137 break; 2138 case CPU_WHICH_IRQ: 2139 case CPU_WHICH_INTRHANDLER: 2140 case CPU_WHICH_ITHREAD: 2141 default: 2142 error = EINVAL; 2143 break; 2144 } 2145 break; 2146 default: 2147 error = EINVAL; 2148 break; 2149 } 2150 out: 2151 free(mask, M_TEMP); 2152 return (error); 2153 } 2154 2155 #ifdef DDB 2156 2157 static void 2158 ddb_display_bitset(const struct bitset *set, int size) 2159 { 2160 int bit, once; 2161 2162 for (once = 0, bit = 0; bit < size; bit++) { 2163 if (CPU_ISSET(bit, set)) { 2164 if (once == 0) { 2165 db_printf("%d", bit); 2166 once = 1; 2167 } else 2168 db_printf(",%d", bit); 2169 } 2170 } 2171 if (once == 0) 2172 db_printf("<none>"); 2173 } 2174 2175 void 2176 ddb_display_cpuset(const cpuset_t *set) 2177 { 2178 ddb_display_bitset((const struct bitset *)set, CPU_SETSIZE); 2179 } 2180 2181 static void 2182 ddb_display_domainset(const domainset_t *set) 2183 { 2184 ddb_display_bitset((const struct bitset *)set, DOMAINSET_SETSIZE); 2185 } 2186 2187 DB_SHOW_COMMAND(cpusets, db_show_cpusets) 2188 { 2189 struct cpuset *set; 2190 2191 LIST_FOREACH(set, &cpuset_ids, cs_link) { 2192 db_printf("set=%p id=%-6u ref=%-6d flags=0x%04x parent id=%d\n", 2193 set, set->cs_id, set->cs_ref, set->cs_flags, 2194 (set->cs_parent != NULL) ? set->cs_parent->cs_id : 0); 2195 db_printf(" cpu mask="); 2196 ddb_display_cpuset(&set->cs_mask); 2197 db_printf("\n"); 2198 db_printf(" domain policy %d prefer %d mask=", 2199 set->cs_domain->ds_policy, set->cs_domain->ds_prefer); 2200 ddb_display_domainset(&set->cs_domain->ds_mask); 2201 db_printf("\n"); 2202 if (db_pager_quit) 2203 break; 2204 } 2205 } 2206 2207 DB_SHOW_COMMAND(domainsets, db_show_domainsets) 2208 { 2209 struct domainset *set; 2210 2211 LIST_FOREACH(set, &cpuset_domains, ds_link) { 2212 db_printf("set=%p policy %d prefer %d cnt %d\n", 2213 set, set->ds_policy, set->ds_prefer, set->ds_cnt); 2214 db_printf(" mask ="); 2215 ddb_display_domainset(&set->ds_mask); 2216 db_printf("\n"); 2217 } 2218 } 2219 #endif /* DDB */ 2220