1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2008, Jeffrey Roberson <jeff@freebsd.org> 5 * All rights reserved. 6 * 7 * Copyright (c) 2008 Nokia Corporation 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice unmodified, this list of conditions, and the following 15 * disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 * 31 */ 32 33 #include <sys/cdefs.h> 34 __FBSDID("$FreeBSD$"); 35 36 #include "opt_ddb.h" 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/sysctl.h> 41 #include <sys/ctype.h> 42 #include <sys/sysproto.h> 43 #include <sys/jail.h> 44 #include <sys/kernel.h> 45 #include <sys/lock.h> 46 #include <sys/malloc.h> 47 #include <sys/mutex.h> 48 #include <sys/priv.h> 49 #include <sys/proc.h> 50 #include <sys/refcount.h> 51 #include <sys/sched.h> 52 #include <sys/smp.h> 53 #include <sys/syscallsubr.h> 54 #include <sys/capsicum.h> 55 #include <sys/cpuset.h> 56 #include <sys/domainset.h> 57 #include <sys/sx.h> 58 #include <sys/queue.h> 59 #include <sys/libkern.h> 60 #include <sys/limits.h> 61 #include <sys/bus.h> 62 #include <sys/interrupt.h> 63 #include <sys/vmmeter.h> 64 65 #include <vm/uma.h> 66 #include <vm/vm.h> 67 #include <vm/vm_object.h> 68 #include <vm/vm_page.h> 69 #include <vm/vm_pageout.h> 70 #include <vm/vm_extern.h> 71 #include <vm/vm_param.h> 72 #include <vm/vm_phys.h> 73 #include <vm/vm_pagequeue.h> 74 75 #ifdef DDB 76 #include <ddb/ddb.h> 77 #endif /* DDB */ 78 79 /* 80 * cpusets provide a mechanism for creating and manipulating sets of 81 * processors for the purpose of constraining the scheduling of threads to 82 * specific processors. 83 * 84 * Each process belongs to an identified set, by default this is set 1. Each 85 * thread may further restrict the cpus it may run on to a subset of this 86 * named set. This creates an anonymous set which other threads and processes 87 * may not join by number. 88 * 89 * The named set is referred to herein as the 'base' set to avoid ambiguity. 90 * This set is usually a child of a 'root' set while the anonymous set may 91 * simply be referred to as a mask. In the syscall api these are referred to 92 * as the ROOT, CPUSET, and MASK levels where CPUSET is called 'base' here. 93 * 94 * Threads inherit their set from their creator whether it be anonymous or 95 * not. This means that anonymous sets are immutable because they may be 96 * shared. To modify an anonymous set a new set is created with the desired 97 * mask and the same parent as the existing anonymous set. This gives the 98 * illusion of each thread having a private mask. 99 * 100 * Via the syscall apis a user may ask to retrieve or modify the root, base, 101 * or mask that is discovered via a pid, tid, or setid. Modifying a set 102 * modifies all numbered and anonymous child sets to comply with the new mask. 103 * Modifying a pid or tid's mask applies only to that tid but must still 104 * exist within the assigned parent set. 105 * 106 * A thread may not be assigned to a group separate from other threads in 107 * the process. This is to remove ambiguity when the setid is queried with 108 * a pid argument. There is no other technical limitation. 109 * 110 * This somewhat complex arrangement is intended to make it easy for 111 * applications to query available processors and bind their threads to 112 * specific processors while also allowing administrators to dynamically 113 * reprovision by changing sets which apply to groups of processes. 114 * 115 * A simple application should not concern itself with sets at all and 116 * rather apply masks to its own threads via CPU_WHICH_TID and a -1 id 117 * meaning 'curthread'. It may query available cpus for that tid with a 118 * getaffinity call using (CPU_LEVEL_CPUSET, CPU_WHICH_PID, -1, ...). 119 */ 120 121 LIST_HEAD(domainlist, domainset); 122 123 static uma_zone_t cpuset_zone; 124 static uma_zone_t domainset_zone; 125 static struct mtx cpuset_lock; 126 static struct setlist cpuset_ids; 127 static struct domainlist cpuset_domains; 128 static struct unrhdr *cpuset_unr; 129 static struct cpuset *cpuset_zero, *cpuset_default, *cpuset_kernel; 130 static struct domainset domainset0, domainset2; 131 132 /* Return the size of cpuset_t at the kernel level */ 133 SYSCTL_INT(_kern_sched, OID_AUTO, cpusetsize, CTLFLAG_RD | CTLFLAG_CAPRD, 134 SYSCTL_NULL_INT_PTR, sizeof(cpuset_t), "sizeof(cpuset_t)"); 135 136 cpuset_t *cpuset_root; 137 cpuset_t cpuset_domain[MAXMEMDOM]; 138 139 static int domainset_valid(const struct domainset *, const struct domainset *); 140 141 /* 142 * Find the first non-anonymous set starting from 'set'. 143 */ 144 static struct cpuset * 145 cpuset_getbase(struct cpuset *set) 146 { 147 148 if (set->cs_id == CPUSET_INVALID) 149 set = set->cs_parent; 150 return (set); 151 } 152 153 /* 154 * Walks up the tree from 'set' to find the root. 155 */ 156 static struct cpuset * 157 cpuset_getroot(struct cpuset *set) 158 { 159 160 while ((set->cs_flags & CPU_SET_ROOT) == 0 && set->cs_parent != NULL) 161 set = set->cs_parent; 162 return (set); 163 } 164 165 /* 166 * Acquire a reference to a cpuset, all pointers must be tracked with refs. 167 */ 168 struct cpuset * 169 cpuset_ref(struct cpuset *set) 170 { 171 172 refcount_acquire(&set->cs_ref); 173 return (set); 174 } 175 176 /* 177 * Walks up the tree from 'set' to find the root. Returns the root 178 * referenced. 179 */ 180 static struct cpuset * 181 cpuset_refroot(struct cpuset *set) 182 { 183 184 return (cpuset_ref(cpuset_getroot(set))); 185 } 186 187 /* 188 * Find the first non-anonymous set starting from 'set'. Returns this set 189 * referenced. May return the passed in set with an extra ref if it is 190 * not anonymous. 191 */ 192 static struct cpuset * 193 cpuset_refbase(struct cpuset *set) 194 { 195 196 return (cpuset_ref(cpuset_getbase(set))); 197 } 198 199 /* 200 * Release a reference in a context where it is safe to allocate. 201 */ 202 void 203 cpuset_rel(struct cpuset *set) 204 { 205 cpusetid_t id; 206 207 if (refcount_release(&set->cs_ref) == 0) 208 return; 209 mtx_lock_spin(&cpuset_lock); 210 LIST_REMOVE(set, cs_siblings); 211 id = set->cs_id; 212 if (id != CPUSET_INVALID) 213 LIST_REMOVE(set, cs_link); 214 mtx_unlock_spin(&cpuset_lock); 215 cpuset_rel(set->cs_parent); 216 uma_zfree(cpuset_zone, set); 217 if (id != CPUSET_INVALID) 218 free_unr(cpuset_unr, id); 219 } 220 221 /* 222 * Deferred release must be used when in a context that is not safe to 223 * allocate/free. This places any unreferenced sets on the list 'head'. 224 */ 225 static void 226 cpuset_rel_defer(struct setlist *head, struct cpuset *set) 227 { 228 229 if (refcount_release(&set->cs_ref) == 0) 230 return; 231 mtx_lock_spin(&cpuset_lock); 232 LIST_REMOVE(set, cs_siblings); 233 if (set->cs_id != CPUSET_INVALID) 234 LIST_REMOVE(set, cs_link); 235 LIST_INSERT_HEAD(head, set, cs_link); 236 mtx_unlock_spin(&cpuset_lock); 237 } 238 239 /* 240 * Complete a deferred release. Removes the set from the list provided to 241 * cpuset_rel_defer. 242 */ 243 static void 244 cpuset_rel_complete(struct cpuset *set) 245 { 246 LIST_REMOVE(set, cs_link); 247 cpuset_rel(set->cs_parent); 248 uma_zfree(cpuset_zone, set); 249 } 250 251 /* 252 * Find a set based on an id. Returns it with a ref. 253 */ 254 static struct cpuset * 255 cpuset_lookup(cpusetid_t setid, struct thread *td) 256 { 257 struct cpuset *set; 258 259 if (setid == CPUSET_INVALID) 260 return (NULL); 261 mtx_lock_spin(&cpuset_lock); 262 LIST_FOREACH(set, &cpuset_ids, cs_link) 263 if (set->cs_id == setid) 264 break; 265 if (set) 266 cpuset_ref(set); 267 mtx_unlock_spin(&cpuset_lock); 268 269 KASSERT(td != NULL, ("[%s:%d] td is NULL", __func__, __LINE__)); 270 if (set != NULL && jailed(td->td_ucred)) { 271 struct cpuset *jset, *tset; 272 273 jset = td->td_ucred->cr_prison->pr_cpuset; 274 for (tset = set; tset != NULL; tset = tset->cs_parent) 275 if (tset == jset) 276 break; 277 if (tset == NULL) { 278 cpuset_rel(set); 279 set = NULL; 280 } 281 } 282 283 return (set); 284 } 285 286 /* 287 * Create a set in the space provided in 'set' with the provided parameters. 288 * The set is returned with a single ref. May return EDEADLK if the set 289 * will have no valid cpu based on restrictions from the parent. 290 */ 291 static int 292 _cpuset_create(struct cpuset *set, struct cpuset *parent, 293 const cpuset_t *mask, struct domainset *domain, cpusetid_t id) 294 { 295 296 if (domain == NULL) 297 domain = parent->cs_domain; 298 if (mask == NULL) 299 mask = &parent->cs_mask; 300 if (!CPU_OVERLAP(&parent->cs_mask, mask)) 301 return (EDEADLK); 302 /* The domain must be prepared ahead of time. */ 303 if (!domainset_valid(parent->cs_domain, domain)) 304 return (EDEADLK); 305 CPU_COPY(mask, &set->cs_mask); 306 LIST_INIT(&set->cs_children); 307 refcount_init(&set->cs_ref, 1); 308 set->cs_flags = 0; 309 mtx_lock_spin(&cpuset_lock); 310 set->cs_domain = domain; 311 CPU_AND(&set->cs_mask, &parent->cs_mask); 312 set->cs_id = id; 313 set->cs_parent = cpuset_ref(parent); 314 LIST_INSERT_HEAD(&parent->cs_children, set, cs_siblings); 315 if (set->cs_id != CPUSET_INVALID) 316 LIST_INSERT_HEAD(&cpuset_ids, set, cs_link); 317 mtx_unlock_spin(&cpuset_lock); 318 319 return (0); 320 } 321 322 /* 323 * Create a new non-anonymous set with the requested parent and mask. May 324 * return failures if the mask is invalid or a new number can not be 325 * allocated. 326 */ 327 static int 328 cpuset_create(struct cpuset **setp, struct cpuset *parent, const cpuset_t *mask) 329 { 330 struct cpuset *set; 331 cpusetid_t id; 332 int error; 333 334 id = alloc_unr(cpuset_unr); 335 if (id == -1) 336 return (ENFILE); 337 *setp = set = uma_zalloc(cpuset_zone, M_WAITOK | M_ZERO); 338 error = _cpuset_create(set, parent, mask, NULL, id); 339 if (error == 0) 340 return (0); 341 free_unr(cpuset_unr, id); 342 uma_zfree(cpuset_zone, set); 343 344 return (error); 345 } 346 347 static void 348 cpuset_freelist_add(struct setlist *list, int count) 349 { 350 struct cpuset *set; 351 int i; 352 353 for (i = 0; i < count; i++) { 354 set = uma_zalloc(cpuset_zone, M_ZERO | M_WAITOK); 355 LIST_INSERT_HEAD(list, set, cs_link); 356 } 357 } 358 359 static void 360 cpuset_freelist_init(struct setlist *list, int count) 361 { 362 363 LIST_INIT(list); 364 cpuset_freelist_add(list, count); 365 } 366 367 static void 368 cpuset_freelist_free(struct setlist *list) 369 { 370 struct cpuset *set; 371 372 while ((set = LIST_FIRST(list)) != NULL) { 373 LIST_REMOVE(set, cs_link); 374 uma_zfree(cpuset_zone, set); 375 } 376 } 377 378 static void 379 domainset_freelist_add(struct domainlist *list, int count) 380 { 381 struct domainset *set; 382 int i; 383 384 for (i = 0; i < count; i++) { 385 set = uma_zalloc(domainset_zone, M_ZERO | M_WAITOK); 386 LIST_INSERT_HEAD(list, set, ds_link); 387 } 388 } 389 390 static void 391 domainset_freelist_init(struct domainlist *list, int count) 392 { 393 394 LIST_INIT(list); 395 domainset_freelist_add(list, count); 396 } 397 398 static void 399 domainset_freelist_free(struct domainlist *list) 400 { 401 struct domainset *set; 402 403 while ((set = LIST_FIRST(list)) != NULL) { 404 LIST_REMOVE(set, ds_link); 405 uma_zfree(domainset_zone, set); 406 } 407 } 408 409 /* Copy a domainset preserving mask and policy. */ 410 static void 411 domainset_copy(const struct domainset *from, struct domainset *to) 412 { 413 414 DOMAINSET_COPY(&from->ds_mask, &to->ds_mask); 415 to->ds_policy = from->ds_policy; 416 to->ds_prefer = from->ds_prefer; 417 } 418 419 /* Return 1 if mask and policy are equal, otherwise 0. */ 420 static int 421 domainset_equal(const struct domainset *one, const struct domainset *two) 422 { 423 424 return (DOMAINSET_CMP(&one->ds_mask, &two->ds_mask) == 0 && 425 one->ds_policy == two->ds_policy && 426 one->ds_prefer == two->ds_prefer); 427 } 428 429 /* Return 1 if child is a valid subset of parent. */ 430 static int 431 domainset_valid(const struct domainset *parent, const struct domainset *child) 432 { 433 if (child->ds_policy != DOMAINSET_POLICY_PREFER) 434 return (DOMAINSET_SUBSET(&parent->ds_mask, &child->ds_mask)); 435 return (DOMAINSET_ISSET(child->ds_prefer, &parent->ds_mask)); 436 } 437 438 static int 439 domainset_restrict(const struct domainset *parent, 440 const struct domainset *child) 441 { 442 if (child->ds_policy != DOMAINSET_POLICY_PREFER) 443 return (DOMAINSET_OVERLAP(&parent->ds_mask, &child->ds_mask)); 444 return (DOMAINSET_ISSET(child->ds_prefer, &parent->ds_mask)); 445 } 446 447 /* 448 * Lookup or create a domainset. The key is provided in ds_mask and 449 * ds_policy. If the domainset does not yet exist the storage in 450 * 'domain' is used to insert. Otherwise this storage is freed to the 451 * domainset_zone and the existing domainset is returned. 452 */ 453 static struct domainset * 454 _domainset_create(struct domainset *domain, struct domainlist *freelist) 455 { 456 struct domainset *ndomain; 457 int i, j, max; 458 459 mtx_lock_spin(&cpuset_lock); 460 LIST_FOREACH(ndomain, &cpuset_domains, ds_link) 461 if (domainset_equal(ndomain, domain)) 462 break; 463 /* 464 * If the domain does not yet exist we insert it and initialize 465 * various iteration helpers which are not part of the key. 466 */ 467 if (ndomain == NULL) { 468 LIST_INSERT_HEAD(&cpuset_domains, domain, ds_link); 469 domain->ds_cnt = DOMAINSET_COUNT(&domain->ds_mask); 470 max = DOMAINSET_FLS(&domain->ds_mask) + 1; 471 for (i = 0, j = 0; i < max; i++) 472 if (DOMAINSET_ISSET(i, &domain->ds_mask)) 473 domain->ds_order[j++] = i; 474 } 475 mtx_unlock_spin(&cpuset_lock); 476 if (ndomain == NULL) 477 return (domain); 478 if (freelist != NULL) 479 LIST_INSERT_HEAD(freelist, domain, ds_link); 480 else 481 uma_zfree(domainset_zone, domain); 482 return (ndomain); 483 484 } 485 486 /* 487 * Are any of the domains in the mask empty? If so, silently 488 * remove them. If only empty domains are present, we must 489 * return failure. 490 */ 491 static bool 492 domainset_empty_vm(struct domainset *domain) 493 { 494 int i, max; 495 496 max = DOMAINSET_FLS(&domain->ds_mask) + 1; 497 for (i = 0; i < max; i++) { 498 if (DOMAINSET_ISSET(i, &domain->ds_mask) && 499 VM_DOMAIN_EMPTY(i)) 500 DOMAINSET_CLR(i, &domain->ds_mask); 501 } 502 503 return (DOMAINSET_EMPTY(&domain->ds_mask)); 504 } 505 506 /* 507 * Create or lookup a domainset based on the key held in 'domain'. 508 */ 509 struct domainset * 510 domainset_create(const struct domainset *domain) 511 { 512 struct domainset *ndomain; 513 514 /* 515 * Validate the policy. It must specify a useable policy number with 516 * only valid domains. Preferred must include the preferred domain 517 * in the mask. 518 */ 519 if (domain->ds_policy <= DOMAINSET_POLICY_INVALID || 520 domain->ds_policy > DOMAINSET_POLICY_MAX) 521 return (NULL); 522 if (domain->ds_policy == DOMAINSET_POLICY_PREFER && 523 !DOMAINSET_ISSET(domain->ds_prefer, &domain->ds_mask)) 524 return (NULL); 525 if (!DOMAINSET_SUBSET(&domainset0.ds_mask, &domain->ds_mask)) 526 return (NULL); 527 ndomain = uma_zalloc(domainset_zone, M_WAITOK | M_ZERO); 528 domainset_copy(domain, ndomain); 529 return _domainset_create(ndomain, NULL); 530 } 531 532 /* 533 * Update thread domainset pointers. 534 */ 535 static void 536 domainset_notify(void) 537 { 538 struct thread *td; 539 struct proc *p; 540 541 sx_slock(&allproc_lock); 542 FOREACH_PROC_IN_SYSTEM(p) { 543 PROC_LOCK(p); 544 if (p->p_state == PRS_NEW) { 545 PROC_UNLOCK(p); 546 continue; 547 } 548 FOREACH_THREAD_IN_PROC(p, td) { 549 thread_lock(td); 550 td->td_domain.dr_policy = td->td_cpuset->cs_domain; 551 thread_unlock(td); 552 } 553 PROC_UNLOCK(p); 554 } 555 sx_sunlock(&allproc_lock); 556 kernel_object->domain.dr_policy = cpuset_kernel->cs_domain; 557 } 558 559 /* 560 * Create a new set that is a subset of a parent. 561 */ 562 static struct domainset * 563 domainset_shadow(const struct domainset *pdomain, 564 const struct domainset *domain, struct domainlist *freelist) 565 { 566 struct domainset *ndomain; 567 568 ndomain = LIST_FIRST(freelist); 569 LIST_REMOVE(ndomain, ds_link); 570 571 /* 572 * Initialize the key from the request. 573 */ 574 domainset_copy(domain, ndomain); 575 576 /* 577 * Restrict the key by the parent. 578 */ 579 DOMAINSET_AND(&ndomain->ds_mask, &pdomain->ds_mask); 580 581 return _domainset_create(ndomain, freelist); 582 } 583 584 /* 585 * Recursively check for errors that would occur from applying mask to 586 * the tree of sets starting at 'set'. Checks for sets that would become 587 * empty as well as RDONLY flags. 588 */ 589 static int 590 cpuset_testupdate(struct cpuset *set, cpuset_t *mask, int check_mask) 591 { 592 struct cpuset *nset; 593 cpuset_t newmask; 594 int error; 595 596 mtx_assert(&cpuset_lock, MA_OWNED); 597 if (set->cs_flags & CPU_SET_RDONLY) 598 return (EPERM); 599 if (check_mask) { 600 if (!CPU_OVERLAP(&set->cs_mask, mask)) 601 return (EDEADLK); 602 CPU_COPY(&set->cs_mask, &newmask); 603 CPU_AND(&newmask, mask); 604 } else 605 CPU_COPY(mask, &newmask); 606 error = 0; 607 LIST_FOREACH(nset, &set->cs_children, cs_siblings) 608 if ((error = cpuset_testupdate(nset, &newmask, 1)) != 0) 609 break; 610 return (error); 611 } 612 613 /* 614 * Applies the mask 'mask' without checking for empty sets or permissions. 615 */ 616 static void 617 cpuset_update(struct cpuset *set, cpuset_t *mask) 618 { 619 struct cpuset *nset; 620 621 mtx_assert(&cpuset_lock, MA_OWNED); 622 CPU_AND(&set->cs_mask, mask); 623 LIST_FOREACH(nset, &set->cs_children, cs_siblings) 624 cpuset_update(nset, &set->cs_mask); 625 626 return; 627 } 628 629 /* 630 * Modify the set 'set' to use a copy of the mask provided. Apply this new 631 * mask to restrict all children in the tree. Checks for validity before 632 * applying the changes. 633 */ 634 static int 635 cpuset_modify(struct cpuset *set, cpuset_t *mask) 636 { 637 struct cpuset *root; 638 int error; 639 640 error = priv_check(curthread, PRIV_SCHED_CPUSET); 641 if (error) 642 return (error); 643 /* 644 * In case we are called from within the jail 645 * we do not allow modifying the dedicated root 646 * cpuset of the jail but may still allow to 647 * change child sets. 648 */ 649 if (jailed(curthread->td_ucred) && 650 set->cs_flags & CPU_SET_ROOT) 651 return (EPERM); 652 /* 653 * Verify that we have access to this set of 654 * cpus. 655 */ 656 root = cpuset_getroot(set); 657 mtx_lock_spin(&cpuset_lock); 658 if (root && !CPU_SUBSET(&root->cs_mask, mask)) { 659 error = EINVAL; 660 goto out; 661 } 662 error = cpuset_testupdate(set, mask, 0); 663 if (error) 664 goto out; 665 CPU_COPY(mask, &set->cs_mask); 666 cpuset_update(set, mask); 667 out: 668 mtx_unlock_spin(&cpuset_lock); 669 670 return (error); 671 } 672 673 /* 674 * Recursively check for errors that would occur from applying mask to 675 * the tree of sets starting at 'set'. Checks for sets that would become 676 * empty as well as RDONLY flags. 677 */ 678 static int 679 cpuset_testupdate_domain(struct cpuset *set, struct domainset *dset, 680 struct domainset *orig, int *count, int check_mask) 681 { 682 struct cpuset *nset; 683 struct domainset *domain; 684 struct domainset newset; 685 int error; 686 687 mtx_assert(&cpuset_lock, MA_OWNED); 688 if (set->cs_flags & CPU_SET_RDONLY) 689 return (EPERM); 690 domain = set->cs_domain; 691 domainset_copy(domain, &newset); 692 if (!domainset_equal(domain, orig)) { 693 if (!domainset_restrict(domain, dset)) 694 return (EDEADLK); 695 DOMAINSET_AND(&newset.ds_mask, &dset->ds_mask); 696 /* Count the number of domains that are changing. */ 697 (*count)++; 698 } 699 error = 0; 700 LIST_FOREACH(nset, &set->cs_children, cs_siblings) 701 if ((error = cpuset_testupdate_domain(nset, &newset, domain, 702 count, 1)) != 0) 703 break; 704 return (error); 705 } 706 707 /* 708 * Applies the mask 'mask' without checking for empty sets or permissions. 709 */ 710 static void 711 cpuset_update_domain(struct cpuset *set, struct domainset *domain, 712 struct domainset *orig, struct domainlist *domains) 713 { 714 struct cpuset *nset; 715 716 mtx_assert(&cpuset_lock, MA_OWNED); 717 /* 718 * If this domainset has changed from the parent we must calculate 719 * a new set. Otherwise it simply inherits from the parent. When 720 * we inherit from the parent we get a new mask and policy. If the 721 * set is modified from the parent we keep the policy and only 722 * update the mask. 723 */ 724 if (set->cs_domain != orig) { 725 orig = set->cs_domain; 726 set->cs_domain = domainset_shadow(domain, orig, domains); 727 } else 728 set->cs_domain = domain; 729 LIST_FOREACH(nset, &set->cs_children, cs_siblings) 730 cpuset_update_domain(nset, set->cs_domain, orig, domains); 731 732 return; 733 } 734 735 /* 736 * Modify the set 'set' to use a copy the domainset provided. Apply this new 737 * mask to restrict all children in the tree. Checks for validity before 738 * applying the changes. 739 */ 740 static int 741 cpuset_modify_domain(struct cpuset *set, struct domainset *domain) 742 { 743 struct domainlist domains; 744 struct domainset temp; 745 struct domainset *dset; 746 struct cpuset *root; 747 int ndomains, needed; 748 int error; 749 750 error = priv_check(curthread, PRIV_SCHED_CPUSET); 751 if (error) 752 return (error); 753 /* 754 * In case we are called from within the jail 755 * we do not allow modifying the dedicated root 756 * cpuset of the jail but may still allow to 757 * change child sets. 758 */ 759 if (jailed(curthread->td_ucred) && 760 set->cs_flags & CPU_SET_ROOT) 761 return (EPERM); 762 domainset_freelist_init(&domains, 0); 763 domain = domainset_create(domain); 764 ndomains = needed = 0; 765 do { 766 if (ndomains < needed) { 767 domainset_freelist_add(&domains, needed - ndomains); 768 ndomains = needed; 769 } 770 root = cpuset_getroot(set); 771 mtx_lock_spin(&cpuset_lock); 772 dset = root->cs_domain; 773 /* 774 * Verify that we have access to this set of domains. 775 */ 776 if (root && !domainset_valid(dset, domain)) { 777 error = EINVAL; 778 goto out; 779 } 780 /* 781 * If applying prefer we keep the current set as the fallback. 782 */ 783 if (domain->ds_policy == DOMAINSET_POLICY_PREFER) 784 DOMAINSET_COPY(&set->cs_domain->ds_mask, 785 &domain->ds_mask); 786 /* 787 * Determine whether we can apply this set of domains and 788 * how many new domain structures it will require. 789 */ 790 domainset_copy(domain, &temp); 791 needed = 0; 792 error = cpuset_testupdate_domain(set, &temp, set->cs_domain, 793 &needed, 0); 794 if (error) 795 goto out; 796 } while (ndomains < needed); 797 dset = set->cs_domain; 798 cpuset_update_domain(set, domain, dset, &domains); 799 out: 800 mtx_unlock_spin(&cpuset_lock); 801 domainset_freelist_free(&domains); 802 if (error == 0) 803 domainset_notify(); 804 805 return (error); 806 } 807 808 /* 809 * Resolve the 'which' parameter of several cpuset apis. 810 * 811 * For WHICH_PID and WHICH_TID return a locked proc and valid proc/tid. Also 812 * checks for permission via p_cansched(). 813 * 814 * For WHICH_SET returns a valid set with a new reference. 815 * 816 * -1 may be supplied for any argument to mean the current proc/thread or 817 * the base set of the current thread. May fail with ESRCH/EPERM. 818 */ 819 int 820 cpuset_which(cpuwhich_t which, id_t id, struct proc **pp, struct thread **tdp, 821 struct cpuset **setp) 822 { 823 struct cpuset *set; 824 struct thread *td; 825 struct proc *p; 826 int error; 827 828 *pp = p = NULL; 829 *tdp = td = NULL; 830 *setp = set = NULL; 831 switch (which) { 832 case CPU_WHICH_PID: 833 if (id == -1) { 834 PROC_LOCK(curproc); 835 p = curproc; 836 break; 837 } 838 if ((p = pfind(id)) == NULL) 839 return (ESRCH); 840 break; 841 case CPU_WHICH_TID: 842 if (id == -1) { 843 PROC_LOCK(curproc); 844 p = curproc; 845 td = curthread; 846 break; 847 } 848 td = tdfind(id, -1); 849 if (td == NULL) 850 return (ESRCH); 851 p = td->td_proc; 852 break; 853 case CPU_WHICH_CPUSET: 854 if (id == -1) { 855 thread_lock(curthread); 856 set = cpuset_refbase(curthread->td_cpuset); 857 thread_unlock(curthread); 858 } else 859 set = cpuset_lookup(id, curthread); 860 if (set) { 861 *setp = set; 862 return (0); 863 } 864 return (ESRCH); 865 case CPU_WHICH_JAIL: 866 { 867 /* Find `set' for prison with given id. */ 868 struct prison *pr; 869 870 sx_slock(&allprison_lock); 871 pr = prison_find_child(curthread->td_ucred->cr_prison, id); 872 sx_sunlock(&allprison_lock); 873 if (pr == NULL) 874 return (ESRCH); 875 cpuset_ref(pr->pr_cpuset); 876 *setp = pr->pr_cpuset; 877 mtx_unlock(&pr->pr_mtx); 878 return (0); 879 } 880 case CPU_WHICH_IRQ: 881 case CPU_WHICH_DOMAIN: 882 return (0); 883 default: 884 return (EINVAL); 885 } 886 error = p_cansched(curthread, p); 887 if (error) { 888 PROC_UNLOCK(p); 889 return (error); 890 } 891 if (td == NULL) 892 td = FIRST_THREAD_IN_PROC(p); 893 *pp = p; 894 *tdp = td; 895 return (0); 896 } 897 898 static int 899 cpuset_testshadow(struct cpuset *set, const cpuset_t *mask, 900 const struct domainset *domain) 901 { 902 struct cpuset *parent; 903 struct domainset *dset; 904 905 parent = cpuset_getbase(set); 906 /* 907 * If we are restricting a cpu mask it must be a subset of the 908 * parent or invalid CPUs have been specified. 909 */ 910 if (mask != NULL && !CPU_SUBSET(&parent->cs_mask, mask)) 911 return (EINVAL); 912 913 /* 914 * If we are restricting a domain mask it must be a subset of the 915 * parent or invalid domains have been specified. 916 */ 917 dset = parent->cs_domain; 918 if (domain != NULL && !domainset_valid(dset, domain)) 919 return (EINVAL); 920 921 return (0); 922 } 923 924 /* 925 * Create an anonymous set with the provided mask in the space provided by 926 * 'nset'. If the passed in set is anonymous we use its parent otherwise 927 * the new set is a child of 'set'. 928 */ 929 static int 930 cpuset_shadow(struct cpuset *set, struct cpuset **nsetp, 931 const cpuset_t *mask, const struct domainset *domain, 932 struct setlist *cpusets, struct domainlist *domains) 933 { 934 struct cpuset *parent; 935 struct cpuset *nset; 936 struct domainset *dset; 937 struct domainset *d; 938 int error; 939 940 error = cpuset_testshadow(set, mask, domain); 941 if (error) 942 return (error); 943 944 parent = cpuset_getbase(set); 945 dset = parent->cs_domain; 946 if (mask == NULL) 947 mask = &set->cs_mask; 948 if (domain != NULL) 949 d = domainset_shadow(dset, domain, domains); 950 else 951 d = set->cs_domain; 952 nset = LIST_FIRST(cpusets); 953 error = _cpuset_create(nset, parent, mask, d, CPUSET_INVALID); 954 if (error == 0) { 955 LIST_REMOVE(nset, cs_link); 956 *nsetp = nset; 957 } 958 return (error); 959 } 960 961 static struct cpuset * 962 cpuset_update_thread(struct thread *td, struct cpuset *nset) 963 { 964 struct cpuset *tdset; 965 966 tdset = td->td_cpuset; 967 td->td_cpuset = nset; 968 td->td_domain.dr_policy = nset->cs_domain; 969 sched_affinity(td); 970 971 return (tdset); 972 } 973 974 static int 975 cpuset_setproc_test_maskthread(struct cpuset *tdset, cpuset_t *mask, 976 struct domainset *domain) 977 { 978 struct cpuset *parent; 979 980 parent = cpuset_getbase(tdset); 981 if (mask == NULL) 982 mask = &tdset->cs_mask; 983 if (domain == NULL) 984 domain = tdset->cs_domain; 985 return cpuset_testshadow(parent, mask, domain); 986 } 987 988 static int 989 cpuset_setproc_maskthread(struct cpuset *tdset, cpuset_t *mask, 990 struct domainset *domain, struct cpuset **nsetp, 991 struct setlist *freelist, struct domainlist *domainlist) 992 { 993 struct cpuset *parent; 994 995 parent = cpuset_getbase(tdset); 996 if (mask == NULL) 997 mask = &tdset->cs_mask; 998 if (domain == NULL) 999 domain = tdset->cs_domain; 1000 return cpuset_shadow(parent, nsetp, mask, domain, freelist, 1001 domainlist); 1002 } 1003 1004 static int 1005 cpuset_setproc_setthread_mask(struct cpuset *tdset, struct cpuset *set, 1006 cpuset_t *mask, struct domainset *domain) 1007 { 1008 struct cpuset *parent; 1009 1010 parent = cpuset_getbase(tdset); 1011 1012 /* 1013 * If the thread restricted its mask then apply that same 1014 * restriction to the new set, otherwise take it wholesale. 1015 */ 1016 if (CPU_CMP(&tdset->cs_mask, &parent->cs_mask) != 0) { 1017 CPU_COPY(&tdset->cs_mask, mask); 1018 CPU_AND(mask, &set->cs_mask); 1019 } else 1020 CPU_COPY(&set->cs_mask, mask); 1021 1022 /* 1023 * If the thread restricted the domain then we apply the 1024 * restriction to the new set but retain the policy. 1025 */ 1026 if (tdset->cs_domain != parent->cs_domain) { 1027 domainset_copy(tdset->cs_domain, domain); 1028 DOMAINSET_AND(&domain->ds_mask, &set->cs_domain->ds_mask); 1029 } else 1030 domainset_copy(set->cs_domain, domain); 1031 1032 if (CPU_EMPTY(mask) || DOMAINSET_EMPTY(&domain->ds_mask)) 1033 return (EDEADLK); 1034 1035 return (0); 1036 } 1037 1038 static int 1039 cpuset_setproc_test_setthread(struct cpuset *tdset, struct cpuset *set) 1040 { 1041 struct domainset domain; 1042 cpuset_t mask; 1043 1044 if (tdset->cs_id != CPUSET_INVALID) 1045 return (0); 1046 return cpuset_setproc_setthread_mask(tdset, set, &mask, &domain); 1047 } 1048 1049 static int 1050 cpuset_setproc_setthread(struct cpuset *tdset, struct cpuset *set, 1051 struct cpuset **nsetp, struct setlist *freelist, 1052 struct domainlist *domainlist) 1053 { 1054 struct domainset domain; 1055 cpuset_t mask; 1056 int error; 1057 1058 /* 1059 * If we're replacing on a thread that has not constrained the 1060 * original set we can simply accept the new set. 1061 */ 1062 if (tdset->cs_id != CPUSET_INVALID) { 1063 *nsetp = cpuset_ref(set); 1064 return (0); 1065 } 1066 error = cpuset_setproc_setthread_mask(tdset, set, &mask, &domain); 1067 if (error) 1068 return (error); 1069 1070 return cpuset_shadow(tdset, nsetp, &mask, &domain, freelist, 1071 domainlist); 1072 } 1073 1074 /* 1075 * Handle three cases for updating an entire process. 1076 * 1077 * 1) Set is non-null. This reparents all anonymous sets to the provided 1078 * set and replaces all non-anonymous td_cpusets with the provided set. 1079 * 2) Mask is non-null. This replaces or creates anonymous sets for every 1080 * thread with the existing base as a parent. 1081 * 3) domain is non-null. This creates anonymous sets for every thread 1082 * and replaces the domain set. 1083 * 1084 * This is overly complicated because we can't allocate while holding a 1085 * spinlock and spinlocks must be held while changing and examining thread 1086 * state. 1087 */ 1088 static int 1089 cpuset_setproc(pid_t pid, struct cpuset *set, cpuset_t *mask, 1090 struct domainset *domain) 1091 { 1092 struct setlist freelist; 1093 struct setlist droplist; 1094 struct domainlist domainlist; 1095 struct cpuset *nset; 1096 struct thread *td; 1097 struct proc *p; 1098 int threads; 1099 int nfree; 1100 int error; 1101 1102 /* 1103 * The algorithm requires two passes due to locking considerations. 1104 * 1105 * 1) Lookup the process and acquire the locks in the required order. 1106 * 2) If enough cpusets have not been allocated release the locks and 1107 * allocate them. Loop. 1108 */ 1109 cpuset_freelist_init(&freelist, 1); 1110 domainset_freelist_init(&domainlist, 1); 1111 nfree = 1; 1112 LIST_INIT(&droplist); 1113 nfree = 0; 1114 for (;;) { 1115 error = cpuset_which(CPU_WHICH_PID, pid, &p, &td, &nset); 1116 if (error) 1117 goto out; 1118 if (nfree >= p->p_numthreads) 1119 break; 1120 threads = p->p_numthreads; 1121 PROC_UNLOCK(p); 1122 if (nfree < threads) { 1123 cpuset_freelist_add(&freelist, threads - nfree); 1124 domainset_freelist_add(&domainlist, threads - nfree); 1125 nfree = threads; 1126 } 1127 } 1128 PROC_LOCK_ASSERT(p, MA_OWNED); 1129 /* 1130 * Now that the appropriate locks are held and we have enough cpusets, 1131 * make sure the operation will succeed before applying changes. The 1132 * proc lock prevents td_cpuset from changing between calls. 1133 */ 1134 error = 0; 1135 FOREACH_THREAD_IN_PROC(p, td) { 1136 thread_lock(td); 1137 if (set != NULL) 1138 error = cpuset_setproc_test_setthread(td->td_cpuset, 1139 set); 1140 else 1141 error = cpuset_setproc_test_maskthread(td->td_cpuset, 1142 mask, domain); 1143 thread_unlock(td); 1144 if (error) 1145 goto unlock_out; 1146 } 1147 /* 1148 * Replace each thread's cpuset while using deferred release. We 1149 * must do this because the thread lock must be held while operating 1150 * on the thread and this limits the type of operations allowed. 1151 */ 1152 FOREACH_THREAD_IN_PROC(p, td) { 1153 thread_lock(td); 1154 if (set != NULL) 1155 error = cpuset_setproc_setthread(td->td_cpuset, set, 1156 &nset, &freelist, &domainlist); 1157 else 1158 error = cpuset_setproc_maskthread(td->td_cpuset, mask, 1159 domain, &nset, &freelist, &domainlist); 1160 if (error) { 1161 thread_unlock(td); 1162 break; 1163 } 1164 cpuset_rel_defer(&droplist, cpuset_update_thread(td, nset)); 1165 thread_unlock(td); 1166 } 1167 unlock_out: 1168 PROC_UNLOCK(p); 1169 out: 1170 while ((nset = LIST_FIRST(&droplist)) != NULL) 1171 cpuset_rel_complete(nset); 1172 cpuset_freelist_free(&freelist); 1173 domainset_freelist_free(&domainlist); 1174 return (error); 1175 } 1176 1177 static int 1178 bitset_strprint(char *buf, size_t bufsiz, const struct bitset *set, int setlen) 1179 { 1180 size_t bytes; 1181 int i, once; 1182 char *p; 1183 1184 once = 0; 1185 p = buf; 1186 for (i = 0; i < __bitset_words(setlen); i++) { 1187 if (once != 0) { 1188 if (bufsiz < 1) 1189 return (0); 1190 *p = ','; 1191 p++; 1192 bufsiz--; 1193 } else 1194 once = 1; 1195 if (bufsiz < sizeof(__STRING(ULONG_MAX))) 1196 return (0); 1197 bytes = snprintf(p, bufsiz, "%lx", set->__bits[i]); 1198 p += bytes; 1199 bufsiz -= bytes; 1200 } 1201 return (p - buf); 1202 } 1203 1204 static int 1205 bitset_strscan(struct bitset *set, int setlen, const char *buf) 1206 { 1207 int i, ret; 1208 const char *p; 1209 1210 BIT_ZERO(setlen, set); 1211 p = buf; 1212 for (i = 0; i < __bitset_words(setlen); i++) { 1213 if (*p == ',') { 1214 p++; 1215 continue; 1216 } 1217 ret = sscanf(p, "%lx", &set->__bits[i]); 1218 if (ret == 0 || ret == -1) 1219 break; 1220 while (isxdigit(*p)) 1221 p++; 1222 } 1223 return (p - buf); 1224 } 1225 1226 /* 1227 * Return a string representing a valid layout for a cpuset_t object. 1228 * It expects an incoming buffer at least sized as CPUSETBUFSIZ. 1229 */ 1230 char * 1231 cpusetobj_strprint(char *buf, const cpuset_t *set) 1232 { 1233 1234 bitset_strprint(buf, CPUSETBUFSIZ, (const struct bitset *)set, 1235 CPU_SETSIZE); 1236 return (buf); 1237 } 1238 1239 /* 1240 * Build a valid cpuset_t object from a string representation. 1241 * It expects an incoming buffer at least sized as CPUSETBUFSIZ. 1242 */ 1243 int 1244 cpusetobj_strscan(cpuset_t *set, const char *buf) 1245 { 1246 char p; 1247 1248 if (strlen(buf) > CPUSETBUFSIZ - 1) 1249 return (-1); 1250 1251 p = buf[bitset_strscan((struct bitset *)set, CPU_SETSIZE, buf)]; 1252 if (p != '\0') 1253 return (-1); 1254 1255 return (0); 1256 } 1257 1258 /* 1259 * Handle a domainset specifier in the sysctl tree. A poiner to a pointer to 1260 * a domainset is in arg1. If the user specifies a valid domainset the 1261 * pointer is updated. 1262 * 1263 * Format is: 1264 * hex mask word 0,hex mask word 1,...:decimal policy:decimal preferred 1265 */ 1266 int 1267 sysctl_handle_domainset(SYSCTL_HANDLER_ARGS) 1268 { 1269 char buf[DOMAINSETBUFSIZ]; 1270 struct domainset *dset; 1271 struct domainset key; 1272 int policy, prefer, error; 1273 char *p; 1274 1275 dset = *(struct domainset **)arg1; 1276 error = 0; 1277 1278 if (dset != NULL) { 1279 p = buf + bitset_strprint(buf, DOMAINSETBUFSIZ, 1280 (const struct bitset *)&dset->ds_mask, DOMAINSET_SETSIZE); 1281 sprintf(p, ":%d:%d", dset->ds_policy, dset->ds_prefer); 1282 } else 1283 sprintf(buf, "<NULL>"); 1284 error = sysctl_handle_string(oidp, buf, sizeof(buf), req); 1285 if (error != 0 || req->newptr == NULL) 1286 return (error); 1287 1288 /* 1289 * Read in and validate the string. 1290 */ 1291 memset(&key, 0, sizeof(key)); 1292 p = &buf[bitset_strscan((struct bitset *)&key.ds_mask, 1293 DOMAINSET_SETSIZE, buf)]; 1294 if (p == buf) 1295 return (EINVAL); 1296 if (sscanf(p, ":%d:%d", &policy, &prefer) != 2) 1297 return (EINVAL); 1298 key.ds_policy = policy; 1299 key.ds_prefer = prefer; 1300 1301 /* Domainset_create() validates the policy.*/ 1302 dset = domainset_create(&key); 1303 if (dset == NULL) 1304 return (EINVAL); 1305 *(struct domainset **)arg1 = dset; 1306 1307 return (error); 1308 } 1309 1310 /* 1311 * Apply an anonymous mask or a domain to a single thread. 1312 */ 1313 static int 1314 _cpuset_setthread(lwpid_t id, cpuset_t *mask, struct domainset *domain) 1315 { 1316 struct setlist cpusets; 1317 struct domainlist domainlist; 1318 struct cpuset *nset; 1319 struct cpuset *set; 1320 struct thread *td; 1321 struct proc *p; 1322 int error; 1323 1324 cpuset_freelist_init(&cpusets, 1); 1325 domainset_freelist_init(&domainlist, domain != NULL); 1326 error = cpuset_which(CPU_WHICH_TID, id, &p, &td, &set); 1327 if (error) 1328 goto out; 1329 set = NULL; 1330 thread_lock(td); 1331 error = cpuset_shadow(td->td_cpuset, &nset, mask, domain, 1332 &cpusets, &domainlist); 1333 if (error == 0) 1334 set = cpuset_update_thread(td, nset); 1335 thread_unlock(td); 1336 PROC_UNLOCK(p); 1337 if (set) 1338 cpuset_rel(set); 1339 out: 1340 cpuset_freelist_free(&cpusets); 1341 domainset_freelist_free(&domainlist); 1342 return (error); 1343 } 1344 1345 /* 1346 * Apply an anonymous mask to a single thread. 1347 */ 1348 int 1349 cpuset_setthread(lwpid_t id, cpuset_t *mask) 1350 { 1351 1352 return _cpuset_setthread(id, mask, NULL); 1353 } 1354 1355 /* 1356 * Apply new cpumask to the ithread. 1357 */ 1358 int 1359 cpuset_setithread(lwpid_t id, int cpu) 1360 { 1361 cpuset_t mask; 1362 1363 CPU_ZERO(&mask); 1364 if (cpu == NOCPU) 1365 CPU_COPY(cpuset_root, &mask); 1366 else 1367 CPU_SET(cpu, &mask); 1368 return _cpuset_setthread(id, &mask, NULL); 1369 } 1370 1371 /* 1372 * Create the domainset for cpuset 0, 1 and cpuset 2. 1373 */ 1374 void 1375 domainset_zero(void) 1376 { 1377 struct domainset *dset; 1378 int i; 1379 1380 mtx_init(&cpuset_lock, "cpuset", NULL, MTX_SPIN | MTX_RECURSE); 1381 1382 dset = &domainset0; 1383 DOMAINSET_ZERO(&dset->ds_mask); 1384 for (i = 0; i < vm_ndomains; i++) 1385 DOMAINSET_SET(i, &dset->ds_mask); 1386 dset->ds_policy = DOMAINSET_POLICY_FIRSTTOUCH; 1387 dset->ds_prefer = -1; 1388 (void)domainset_empty_vm(dset); 1389 curthread->td_domain.dr_policy = _domainset_create(dset, NULL); 1390 1391 domainset_copy(dset, &domainset2); 1392 domainset2.ds_policy = DOMAINSET_POLICY_INTERLEAVE; 1393 kernel_object->domain.dr_policy = _domainset_create(&domainset2, NULL); 1394 } 1395 1396 /* 1397 * Creates system-wide cpusets and the cpuset for thread0 including three 1398 * sets: 1399 * 1400 * 0 - The root set which should represent all valid processors in the 1401 * system. It is initially created with a mask of all processors 1402 * because we don't know what processors are valid until cpuset_init() 1403 * runs. This set is immutable. 1404 * 1 - The default set which all processes are a member of until changed. 1405 * This allows an administrator to move all threads off of given cpus to 1406 * dedicate them to high priority tasks or save power etc. 1407 * 2 - The kernel set which allows restriction and policy to be applied only 1408 * to kernel threads and the kernel_object. 1409 */ 1410 struct cpuset * 1411 cpuset_thread0(void) 1412 { 1413 struct cpuset *set; 1414 int i; 1415 int error __unused; 1416 1417 cpuset_zone = uma_zcreate("cpuset", sizeof(struct cpuset), NULL, NULL, 1418 NULL, NULL, UMA_ALIGN_CACHE, 0); 1419 domainset_zone = uma_zcreate("domainset", sizeof(struct domainset), 1420 NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0); 1421 1422 /* 1423 * Create the root system set (0) for the whole machine. Doesn't use 1424 * cpuset_create() due to NULL parent. 1425 */ 1426 set = uma_zalloc(cpuset_zone, M_WAITOK | M_ZERO); 1427 CPU_COPY(&all_cpus, &set->cs_mask); 1428 LIST_INIT(&set->cs_children); 1429 LIST_INSERT_HEAD(&cpuset_ids, set, cs_link); 1430 set->cs_ref = 1; 1431 set->cs_flags = CPU_SET_ROOT | CPU_SET_RDONLY; 1432 set->cs_domain = &domainset0; 1433 cpuset_zero = set; 1434 cpuset_root = &set->cs_mask; 1435 1436 /* 1437 * Now derive a default (1), modifiable set from that to give out. 1438 */ 1439 set = uma_zalloc(cpuset_zone, M_WAITOK | M_ZERO); 1440 error = _cpuset_create(set, cpuset_zero, NULL, NULL, 1); 1441 KASSERT(error == 0, ("Error creating default set: %d\n", error)); 1442 cpuset_default = set; 1443 /* 1444 * Create the kernel set (2). 1445 */ 1446 set = uma_zalloc(cpuset_zone, M_WAITOK | M_ZERO); 1447 error = _cpuset_create(set, cpuset_zero, NULL, NULL, 2); 1448 KASSERT(error == 0, ("Error creating kernel set: %d\n", error)); 1449 set->cs_domain = &domainset2; 1450 cpuset_kernel = set; 1451 1452 /* 1453 * Initialize the unit allocator. 0 and 1 are allocated above. 1454 */ 1455 cpuset_unr = new_unrhdr(2, INT_MAX, NULL); 1456 1457 /* 1458 * If MD code has not initialized per-domain cpusets, place all 1459 * CPUs in domain 0. 1460 */ 1461 for (i = 0; i < MAXMEMDOM; i++) 1462 if (!CPU_EMPTY(&cpuset_domain[i])) 1463 goto domains_set; 1464 CPU_COPY(&all_cpus, &cpuset_domain[0]); 1465 domains_set: 1466 1467 return (cpuset_default); 1468 } 1469 1470 void 1471 cpuset_kernthread(struct thread *td) 1472 { 1473 struct cpuset *set; 1474 1475 thread_lock(td); 1476 set = td->td_cpuset; 1477 td->td_cpuset = cpuset_ref(cpuset_kernel); 1478 thread_unlock(td); 1479 cpuset_rel(set); 1480 } 1481 1482 /* 1483 * Create a cpuset, which would be cpuset_create() but 1484 * mark the new 'set' as root. 1485 * 1486 * We are not going to reparent the td to it. Use cpuset_setproc_update_set() 1487 * for that. 1488 * 1489 * In case of no error, returns the set in *setp locked with a reference. 1490 */ 1491 int 1492 cpuset_create_root(struct prison *pr, struct cpuset **setp) 1493 { 1494 struct cpuset *set; 1495 int error; 1496 1497 KASSERT(pr != NULL, ("[%s:%d] invalid pr", __func__, __LINE__)); 1498 KASSERT(setp != NULL, ("[%s:%d] invalid setp", __func__, __LINE__)); 1499 1500 error = cpuset_create(setp, pr->pr_cpuset, &pr->pr_cpuset->cs_mask); 1501 if (error) 1502 return (error); 1503 1504 KASSERT(*setp != NULL, ("[%s:%d] cpuset_create returned invalid data", 1505 __func__, __LINE__)); 1506 1507 /* Mark the set as root. */ 1508 set = *setp; 1509 set->cs_flags |= CPU_SET_ROOT; 1510 1511 return (0); 1512 } 1513 1514 int 1515 cpuset_setproc_update_set(struct proc *p, struct cpuset *set) 1516 { 1517 int error; 1518 1519 KASSERT(p != NULL, ("[%s:%d] invalid proc", __func__, __LINE__)); 1520 KASSERT(set != NULL, ("[%s:%d] invalid set", __func__, __LINE__)); 1521 1522 cpuset_ref(set); 1523 error = cpuset_setproc(p->p_pid, set, NULL, NULL); 1524 if (error) 1525 return (error); 1526 cpuset_rel(set); 1527 return (0); 1528 } 1529 1530 #ifndef _SYS_SYSPROTO_H_ 1531 struct cpuset_args { 1532 cpusetid_t *setid; 1533 }; 1534 #endif 1535 int 1536 sys_cpuset(struct thread *td, struct cpuset_args *uap) 1537 { 1538 struct cpuset *root; 1539 struct cpuset *set; 1540 int error; 1541 1542 thread_lock(td); 1543 root = cpuset_refroot(td->td_cpuset); 1544 thread_unlock(td); 1545 error = cpuset_create(&set, root, &root->cs_mask); 1546 cpuset_rel(root); 1547 if (error) 1548 return (error); 1549 error = copyout(&set->cs_id, uap->setid, sizeof(set->cs_id)); 1550 if (error == 0) 1551 error = cpuset_setproc(-1, set, NULL, NULL); 1552 cpuset_rel(set); 1553 return (error); 1554 } 1555 1556 #ifndef _SYS_SYSPROTO_H_ 1557 struct cpuset_setid_args { 1558 cpuwhich_t which; 1559 id_t id; 1560 cpusetid_t setid; 1561 }; 1562 #endif 1563 int 1564 sys_cpuset_setid(struct thread *td, struct cpuset_setid_args *uap) 1565 { 1566 1567 return (kern_cpuset_setid(td, uap->which, uap->id, uap->setid)); 1568 } 1569 1570 int 1571 kern_cpuset_setid(struct thread *td, cpuwhich_t which, 1572 id_t id, cpusetid_t setid) 1573 { 1574 struct cpuset *set; 1575 int error; 1576 1577 /* 1578 * Presently we only support per-process sets. 1579 */ 1580 if (which != CPU_WHICH_PID) 1581 return (EINVAL); 1582 set = cpuset_lookup(setid, td); 1583 if (set == NULL) 1584 return (ESRCH); 1585 error = cpuset_setproc(id, set, NULL, NULL); 1586 cpuset_rel(set); 1587 return (error); 1588 } 1589 1590 #ifndef _SYS_SYSPROTO_H_ 1591 struct cpuset_getid_args { 1592 cpulevel_t level; 1593 cpuwhich_t which; 1594 id_t id; 1595 cpusetid_t *setid; 1596 }; 1597 #endif 1598 int 1599 sys_cpuset_getid(struct thread *td, struct cpuset_getid_args *uap) 1600 { 1601 1602 return (kern_cpuset_getid(td, uap->level, uap->which, uap->id, 1603 uap->setid)); 1604 } 1605 1606 int 1607 kern_cpuset_getid(struct thread *td, cpulevel_t level, cpuwhich_t which, 1608 id_t id, cpusetid_t *setid) 1609 { 1610 struct cpuset *nset; 1611 struct cpuset *set; 1612 struct thread *ttd; 1613 struct proc *p; 1614 cpusetid_t tmpid; 1615 int error; 1616 1617 if (level == CPU_LEVEL_WHICH && which != CPU_WHICH_CPUSET) 1618 return (EINVAL); 1619 error = cpuset_which(which, id, &p, &ttd, &set); 1620 if (error) 1621 return (error); 1622 switch (which) { 1623 case CPU_WHICH_TID: 1624 case CPU_WHICH_PID: 1625 thread_lock(ttd); 1626 set = cpuset_refbase(ttd->td_cpuset); 1627 thread_unlock(ttd); 1628 PROC_UNLOCK(p); 1629 break; 1630 case CPU_WHICH_CPUSET: 1631 case CPU_WHICH_JAIL: 1632 break; 1633 case CPU_WHICH_IRQ: 1634 case CPU_WHICH_DOMAIN: 1635 return (EINVAL); 1636 } 1637 switch (level) { 1638 case CPU_LEVEL_ROOT: 1639 nset = cpuset_refroot(set); 1640 cpuset_rel(set); 1641 set = nset; 1642 break; 1643 case CPU_LEVEL_CPUSET: 1644 break; 1645 case CPU_LEVEL_WHICH: 1646 break; 1647 } 1648 tmpid = set->cs_id; 1649 cpuset_rel(set); 1650 if (error == 0) 1651 error = copyout(&tmpid, setid, sizeof(tmpid)); 1652 1653 return (error); 1654 } 1655 1656 #ifndef _SYS_SYSPROTO_H_ 1657 struct cpuset_getaffinity_args { 1658 cpulevel_t level; 1659 cpuwhich_t which; 1660 id_t id; 1661 size_t cpusetsize; 1662 cpuset_t *mask; 1663 }; 1664 #endif 1665 int 1666 sys_cpuset_getaffinity(struct thread *td, struct cpuset_getaffinity_args *uap) 1667 { 1668 1669 return (kern_cpuset_getaffinity(td, uap->level, uap->which, 1670 uap->id, uap->cpusetsize, uap->mask)); 1671 } 1672 1673 int 1674 kern_cpuset_getaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which, 1675 id_t id, size_t cpusetsize, cpuset_t *maskp) 1676 { 1677 struct thread *ttd; 1678 struct cpuset *nset; 1679 struct cpuset *set; 1680 struct proc *p; 1681 cpuset_t *mask; 1682 int error; 1683 size_t size; 1684 1685 if (cpusetsize < sizeof(cpuset_t) || cpusetsize > CPU_MAXSIZE / NBBY) 1686 return (ERANGE); 1687 /* In Capability mode, you can only get your own CPU set. */ 1688 if (IN_CAPABILITY_MODE(td)) { 1689 if (level != CPU_LEVEL_WHICH) 1690 return (ECAPMODE); 1691 if (which != CPU_WHICH_TID && which != CPU_WHICH_PID) 1692 return (ECAPMODE); 1693 if (id != -1) 1694 return (ECAPMODE); 1695 } 1696 size = cpusetsize; 1697 mask = malloc(size, M_TEMP, M_WAITOK | M_ZERO); 1698 error = cpuset_which(which, id, &p, &ttd, &set); 1699 if (error) 1700 goto out; 1701 switch (level) { 1702 case CPU_LEVEL_ROOT: 1703 case CPU_LEVEL_CPUSET: 1704 switch (which) { 1705 case CPU_WHICH_TID: 1706 case CPU_WHICH_PID: 1707 thread_lock(ttd); 1708 set = cpuset_ref(ttd->td_cpuset); 1709 thread_unlock(ttd); 1710 break; 1711 case CPU_WHICH_CPUSET: 1712 case CPU_WHICH_JAIL: 1713 break; 1714 case CPU_WHICH_IRQ: 1715 case CPU_WHICH_INTRHANDLER: 1716 case CPU_WHICH_ITHREAD: 1717 case CPU_WHICH_DOMAIN: 1718 error = EINVAL; 1719 goto out; 1720 } 1721 if (level == CPU_LEVEL_ROOT) 1722 nset = cpuset_refroot(set); 1723 else 1724 nset = cpuset_refbase(set); 1725 CPU_COPY(&nset->cs_mask, mask); 1726 cpuset_rel(nset); 1727 break; 1728 case CPU_LEVEL_WHICH: 1729 switch (which) { 1730 case CPU_WHICH_TID: 1731 thread_lock(ttd); 1732 CPU_COPY(&ttd->td_cpuset->cs_mask, mask); 1733 thread_unlock(ttd); 1734 break; 1735 case CPU_WHICH_PID: 1736 FOREACH_THREAD_IN_PROC(p, ttd) { 1737 thread_lock(ttd); 1738 CPU_OR(mask, &ttd->td_cpuset->cs_mask); 1739 thread_unlock(ttd); 1740 } 1741 break; 1742 case CPU_WHICH_CPUSET: 1743 case CPU_WHICH_JAIL: 1744 CPU_COPY(&set->cs_mask, mask); 1745 break; 1746 case CPU_WHICH_IRQ: 1747 case CPU_WHICH_INTRHANDLER: 1748 case CPU_WHICH_ITHREAD: 1749 error = intr_getaffinity(id, which, mask); 1750 break; 1751 case CPU_WHICH_DOMAIN: 1752 if (id < 0 || id >= MAXMEMDOM) 1753 error = ESRCH; 1754 else 1755 CPU_COPY(&cpuset_domain[id], mask); 1756 break; 1757 } 1758 break; 1759 default: 1760 error = EINVAL; 1761 break; 1762 } 1763 if (set) 1764 cpuset_rel(set); 1765 if (p) 1766 PROC_UNLOCK(p); 1767 if (error == 0) 1768 error = copyout(mask, maskp, size); 1769 out: 1770 free(mask, M_TEMP); 1771 return (error); 1772 } 1773 1774 #ifndef _SYS_SYSPROTO_H_ 1775 struct cpuset_setaffinity_args { 1776 cpulevel_t level; 1777 cpuwhich_t which; 1778 id_t id; 1779 size_t cpusetsize; 1780 const cpuset_t *mask; 1781 }; 1782 #endif 1783 int 1784 sys_cpuset_setaffinity(struct thread *td, struct cpuset_setaffinity_args *uap) 1785 { 1786 1787 return (kern_cpuset_setaffinity(td, uap->level, uap->which, 1788 uap->id, uap->cpusetsize, uap->mask)); 1789 } 1790 1791 int 1792 kern_cpuset_setaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which, 1793 id_t id, size_t cpusetsize, const cpuset_t *maskp) 1794 { 1795 struct cpuset *nset; 1796 struct cpuset *set; 1797 struct thread *ttd; 1798 struct proc *p; 1799 cpuset_t *mask; 1800 int error; 1801 1802 if (cpusetsize < sizeof(cpuset_t) || cpusetsize > CPU_MAXSIZE / NBBY) 1803 return (ERANGE); 1804 /* In Capability mode, you can only set your own CPU set. */ 1805 if (IN_CAPABILITY_MODE(td)) { 1806 if (level != CPU_LEVEL_WHICH) 1807 return (ECAPMODE); 1808 if (which != CPU_WHICH_TID && which != CPU_WHICH_PID) 1809 return (ECAPMODE); 1810 if (id != -1) 1811 return (ECAPMODE); 1812 } 1813 mask = malloc(cpusetsize, M_TEMP, M_WAITOK | M_ZERO); 1814 error = copyin(maskp, mask, cpusetsize); 1815 if (error) 1816 goto out; 1817 /* 1818 * Verify that no high bits are set. 1819 */ 1820 if (cpusetsize > sizeof(cpuset_t)) { 1821 char *end; 1822 char *cp; 1823 1824 end = cp = (char *)&mask->__bits; 1825 end += cpusetsize; 1826 cp += sizeof(cpuset_t); 1827 while (cp != end) 1828 if (*cp++ != 0) { 1829 error = EINVAL; 1830 goto out; 1831 } 1832 1833 } 1834 switch (level) { 1835 case CPU_LEVEL_ROOT: 1836 case CPU_LEVEL_CPUSET: 1837 error = cpuset_which(which, id, &p, &ttd, &set); 1838 if (error) 1839 break; 1840 switch (which) { 1841 case CPU_WHICH_TID: 1842 case CPU_WHICH_PID: 1843 thread_lock(ttd); 1844 set = cpuset_ref(ttd->td_cpuset); 1845 thread_unlock(ttd); 1846 PROC_UNLOCK(p); 1847 break; 1848 case CPU_WHICH_CPUSET: 1849 case CPU_WHICH_JAIL: 1850 break; 1851 case CPU_WHICH_IRQ: 1852 case CPU_WHICH_INTRHANDLER: 1853 case CPU_WHICH_ITHREAD: 1854 case CPU_WHICH_DOMAIN: 1855 error = EINVAL; 1856 goto out; 1857 } 1858 if (level == CPU_LEVEL_ROOT) 1859 nset = cpuset_refroot(set); 1860 else 1861 nset = cpuset_refbase(set); 1862 error = cpuset_modify(nset, mask); 1863 cpuset_rel(nset); 1864 cpuset_rel(set); 1865 break; 1866 case CPU_LEVEL_WHICH: 1867 switch (which) { 1868 case CPU_WHICH_TID: 1869 error = cpuset_setthread(id, mask); 1870 break; 1871 case CPU_WHICH_PID: 1872 error = cpuset_setproc(id, NULL, mask, NULL); 1873 break; 1874 case CPU_WHICH_CPUSET: 1875 case CPU_WHICH_JAIL: 1876 error = cpuset_which(which, id, &p, &ttd, &set); 1877 if (error == 0) { 1878 error = cpuset_modify(set, mask); 1879 cpuset_rel(set); 1880 } 1881 break; 1882 case CPU_WHICH_IRQ: 1883 case CPU_WHICH_INTRHANDLER: 1884 case CPU_WHICH_ITHREAD: 1885 error = intr_setaffinity(id, which, mask); 1886 break; 1887 default: 1888 error = EINVAL; 1889 break; 1890 } 1891 break; 1892 default: 1893 error = EINVAL; 1894 break; 1895 } 1896 out: 1897 free(mask, M_TEMP); 1898 return (error); 1899 } 1900 1901 #ifndef _SYS_SYSPROTO_H_ 1902 struct cpuset_getdomain_args { 1903 cpulevel_t level; 1904 cpuwhich_t which; 1905 id_t id; 1906 size_t domainsetsize; 1907 domainset_t *mask; 1908 int *policy; 1909 }; 1910 #endif 1911 int 1912 sys_cpuset_getdomain(struct thread *td, struct cpuset_getdomain_args *uap) 1913 { 1914 1915 return (kern_cpuset_getdomain(td, uap->level, uap->which, 1916 uap->id, uap->domainsetsize, uap->mask, uap->policy)); 1917 } 1918 1919 int 1920 kern_cpuset_getdomain(struct thread *td, cpulevel_t level, cpuwhich_t which, 1921 id_t id, size_t domainsetsize, domainset_t *maskp, int *policyp) 1922 { 1923 struct domainset outset; 1924 struct thread *ttd; 1925 struct cpuset *nset; 1926 struct cpuset *set; 1927 struct domainset *dset; 1928 struct proc *p; 1929 domainset_t *mask; 1930 int error; 1931 1932 if (domainsetsize < sizeof(domainset_t) || 1933 domainsetsize > DOMAINSET_MAXSIZE / NBBY) 1934 return (ERANGE); 1935 /* In Capability mode, you can only get your own domain set. */ 1936 if (IN_CAPABILITY_MODE(td)) { 1937 if (level != CPU_LEVEL_WHICH) 1938 return (ECAPMODE); 1939 if (which != CPU_WHICH_TID && which != CPU_WHICH_PID) 1940 return (ECAPMODE); 1941 if (id != -1) 1942 return (ECAPMODE); 1943 } 1944 mask = malloc(domainsetsize, M_TEMP, M_WAITOK | M_ZERO); 1945 bzero(&outset, sizeof(outset)); 1946 error = cpuset_which(which, id, &p, &ttd, &set); 1947 if (error) 1948 goto out; 1949 switch (level) { 1950 case CPU_LEVEL_ROOT: 1951 case CPU_LEVEL_CPUSET: 1952 switch (which) { 1953 case CPU_WHICH_TID: 1954 case CPU_WHICH_PID: 1955 thread_lock(ttd); 1956 set = cpuset_ref(ttd->td_cpuset); 1957 thread_unlock(ttd); 1958 break; 1959 case CPU_WHICH_CPUSET: 1960 case CPU_WHICH_JAIL: 1961 break; 1962 case CPU_WHICH_IRQ: 1963 case CPU_WHICH_INTRHANDLER: 1964 case CPU_WHICH_ITHREAD: 1965 case CPU_WHICH_DOMAIN: 1966 error = EINVAL; 1967 goto out; 1968 } 1969 if (level == CPU_LEVEL_ROOT) 1970 nset = cpuset_refroot(set); 1971 else 1972 nset = cpuset_refbase(set); 1973 domainset_copy(nset->cs_domain, &outset); 1974 cpuset_rel(nset); 1975 break; 1976 case CPU_LEVEL_WHICH: 1977 switch (which) { 1978 case CPU_WHICH_TID: 1979 thread_lock(ttd); 1980 domainset_copy(ttd->td_cpuset->cs_domain, &outset); 1981 thread_unlock(ttd); 1982 break; 1983 case CPU_WHICH_PID: 1984 FOREACH_THREAD_IN_PROC(p, ttd) { 1985 thread_lock(ttd); 1986 dset = ttd->td_cpuset->cs_domain; 1987 /* Show all domains in the proc. */ 1988 DOMAINSET_OR(&outset.ds_mask, &dset->ds_mask); 1989 /* Last policy wins. */ 1990 outset.ds_policy = dset->ds_policy; 1991 outset.ds_prefer = dset->ds_prefer; 1992 thread_unlock(ttd); 1993 } 1994 break; 1995 case CPU_WHICH_CPUSET: 1996 case CPU_WHICH_JAIL: 1997 domainset_copy(set->cs_domain, &outset); 1998 break; 1999 case CPU_WHICH_IRQ: 2000 case CPU_WHICH_INTRHANDLER: 2001 case CPU_WHICH_ITHREAD: 2002 case CPU_WHICH_DOMAIN: 2003 error = EINVAL; 2004 break; 2005 } 2006 break; 2007 default: 2008 error = EINVAL; 2009 break; 2010 } 2011 if (set) 2012 cpuset_rel(set); 2013 if (p) 2014 PROC_UNLOCK(p); 2015 /* 2016 * Translate prefer into a set containing only the preferred domain, 2017 * not the entire fallback set. 2018 */ 2019 if (outset.ds_policy == DOMAINSET_POLICY_PREFER) { 2020 DOMAINSET_ZERO(&outset.ds_mask); 2021 DOMAINSET_SET(outset.ds_prefer, &outset.ds_mask); 2022 } 2023 DOMAINSET_COPY(&outset.ds_mask, mask); 2024 if (error == 0) 2025 error = copyout(mask, maskp, domainsetsize); 2026 if (error == 0) 2027 if (suword32(policyp, outset.ds_policy) != 0) 2028 error = EFAULT; 2029 out: 2030 free(mask, M_TEMP); 2031 return (error); 2032 } 2033 2034 #ifndef _SYS_SYSPROTO_H_ 2035 struct cpuset_setdomain_args { 2036 cpulevel_t level; 2037 cpuwhich_t which; 2038 id_t id; 2039 size_t domainsetsize; 2040 domainset_t *mask; 2041 int policy; 2042 }; 2043 #endif 2044 int 2045 sys_cpuset_setdomain(struct thread *td, struct cpuset_setdomain_args *uap) 2046 { 2047 2048 return (kern_cpuset_setdomain(td, uap->level, uap->which, 2049 uap->id, uap->domainsetsize, uap->mask, uap->policy)); 2050 } 2051 2052 int 2053 kern_cpuset_setdomain(struct thread *td, cpulevel_t level, cpuwhich_t which, 2054 id_t id, size_t domainsetsize, const domainset_t *maskp, int policy) 2055 { 2056 struct cpuset *nset; 2057 struct cpuset *set; 2058 struct thread *ttd; 2059 struct proc *p; 2060 struct domainset domain; 2061 domainset_t *mask; 2062 int error; 2063 2064 if (domainsetsize < sizeof(domainset_t) || 2065 domainsetsize > DOMAINSET_MAXSIZE / NBBY) 2066 return (ERANGE); 2067 if (policy <= DOMAINSET_POLICY_INVALID || 2068 policy > DOMAINSET_POLICY_MAX) 2069 return (EINVAL); 2070 /* In Capability mode, you can only set your own CPU set. */ 2071 if (IN_CAPABILITY_MODE(td)) { 2072 if (level != CPU_LEVEL_WHICH) 2073 return (ECAPMODE); 2074 if (which != CPU_WHICH_TID && which != CPU_WHICH_PID) 2075 return (ECAPMODE); 2076 if (id != -1) 2077 return (ECAPMODE); 2078 } 2079 memset(&domain, 0, sizeof(domain)); 2080 mask = malloc(domainsetsize, M_TEMP, M_WAITOK | M_ZERO); 2081 error = copyin(maskp, mask, domainsetsize); 2082 if (error) 2083 goto out; 2084 /* 2085 * Verify that no high bits are set. 2086 */ 2087 if (domainsetsize > sizeof(domainset_t)) { 2088 char *end; 2089 char *cp; 2090 2091 end = cp = (char *)&mask->__bits; 2092 end += domainsetsize; 2093 cp += sizeof(domainset_t); 2094 while (cp != end) 2095 if (*cp++ != 0) { 2096 error = EINVAL; 2097 goto out; 2098 } 2099 2100 } 2101 DOMAINSET_COPY(mask, &domain.ds_mask); 2102 domain.ds_policy = policy; 2103 2104 /* Translate preferred policy into a mask and fallback. */ 2105 if (policy == DOMAINSET_POLICY_PREFER) { 2106 /* Only support a single preferred domain. */ 2107 if (DOMAINSET_COUNT(&domain.ds_mask) != 1) { 2108 error = EINVAL; 2109 goto out; 2110 } 2111 domain.ds_prefer = DOMAINSET_FFS(&domain.ds_mask) - 1; 2112 /* This will be constrained by domainset_shadow(). */ 2113 DOMAINSET_FILL(&domain.ds_mask); 2114 } 2115 2116 /* 2117 * When given an impossible policy, fall back to interleaving 2118 * across all domains 2119 */ 2120 if (domainset_empty_vm(&domain)) 2121 domainset_copy(&domainset2, &domain); 2122 2123 switch (level) { 2124 case CPU_LEVEL_ROOT: 2125 case CPU_LEVEL_CPUSET: 2126 error = cpuset_which(which, id, &p, &ttd, &set); 2127 if (error) 2128 break; 2129 switch (which) { 2130 case CPU_WHICH_TID: 2131 case CPU_WHICH_PID: 2132 thread_lock(ttd); 2133 set = cpuset_ref(ttd->td_cpuset); 2134 thread_unlock(ttd); 2135 PROC_UNLOCK(p); 2136 break; 2137 case CPU_WHICH_CPUSET: 2138 case CPU_WHICH_JAIL: 2139 break; 2140 case CPU_WHICH_IRQ: 2141 case CPU_WHICH_INTRHANDLER: 2142 case CPU_WHICH_ITHREAD: 2143 case CPU_WHICH_DOMAIN: 2144 error = EINVAL; 2145 goto out; 2146 } 2147 if (level == CPU_LEVEL_ROOT) 2148 nset = cpuset_refroot(set); 2149 else 2150 nset = cpuset_refbase(set); 2151 error = cpuset_modify_domain(nset, &domain); 2152 cpuset_rel(nset); 2153 cpuset_rel(set); 2154 break; 2155 case CPU_LEVEL_WHICH: 2156 switch (which) { 2157 case CPU_WHICH_TID: 2158 error = _cpuset_setthread(id, NULL, &domain); 2159 break; 2160 case CPU_WHICH_PID: 2161 error = cpuset_setproc(id, NULL, NULL, &domain); 2162 break; 2163 case CPU_WHICH_CPUSET: 2164 case CPU_WHICH_JAIL: 2165 error = cpuset_which(which, id, &p, &ttd, &set); 2166 if (error == 0) { 2167 error = cpuset_modify_domain(set, &domain); 2168 cpuset_rel(set); 2169 } 2170 break; 2171 case CPU_WHICH_IRQ: 2172 case CPU_WHICH_INTRHANDLER: 2173 case CPU_WHICH_ITHREAD: 2174 default: 2175 error = EINVAL; 2176 break; 2177 } 2178 break; 2179 default: 2180 error = EINVAL; 2181 break; 2182 } 2183 out: 2184 free(mask, M_TEMP); 2185 return (error); 2186 } 2187 2188 #ifdef DDB 2189 2190 static void 2191 ddb_display_bitset(const struct bitset *set, int size) 2192 { 2193 int bit, once; 2194 2195 for (once = 0, bit = 0; bit < size; bit++) { 2196 if (CPU_ISSET(bit, set)) { 2197 if (once == 0) { 2198 db_printf("%d", bit); 2199 once = 1; 2200 } else 2201 db_printf(",%d", bit); 2202 } 2203 } 2204 if (once == 0) 2205 db_printf("<none>"); 2206 } 2207 2208 void 2209 ddb_display_cpuset(const cpuset_t *set) 2210 { 2211 ddb_display_bitset((const struct bitset *)set, CPU_SETSIZE); 2212 } 2213 2214 static void 2215 ddb_display_domainset(const domainset_t *set) 2216 { 2217 ddb_display_bitset((const struct bitset *)set, DOMAINSET_SETSIZE); 2218 } 2219 2220 DB_SHOW_COMMAND(cpusets, db_show_cpusets) 2221 { 2222 struct cpuset *set; 2223 2224 LIST_FOREACH(set, &cpuset_ids, cs_link) { 2225 db_printf("set=%p id=%-6u ref=%-6d flags=0x%04x parent id=%d\n", 2226 set, set->cs_id, set->cs_ref, set->cs_flags, 2227 (set->cs_parent != NULL) ? set->cs_parent->cs_id : 0); 2228 db_printf(" cpu mask="); 2229 ddb_display_cpuset(&set->cs_mask); 2230 db_printf("\n"); 2231 db_printf(" domain policy %d prefer %d mask=", 2232 set->cs_domain->ds_policy, set->cs_domain->ds_prefer); 2233 ddb_display_domainset(&set->cs_domain->ds_mask); 2234 db_printf("\n"); 2235 if (db_pager_quit) 2236 break; 2237 } 2238 } 2239 2240 DB_SHOW_COMMAND(domainsets, db_show_domainsets) 2241 { 2242 struct domainset *set; 2243 2244 LIST_FOREACH(set, &cpuset_domains, ds_link) { 2245 db_printf("set=%p policy %d prefer %d cnt %d\n", 2246 set, set->ds_policy, set->ds_prefer, set->ds_cnt); 2247 db_printf(" mask ="); 2248 ddb_display_domainset(&set->ds_mask); 2249 db_printf("\n"); 2250 } 2251 } 2252 #endif /* DDB */ 2253