1 /*- 2 * Copyright (c) 2002-2005, Jeffrey Roberson <jeff@freebsd.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include "opt_hwpmc_hooks.h" 31 #include "opt_sched.h" 32 33 #define kse td_sched 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/kdb.h> 38 #include <sys/kernel.h> 39 #include <sys/ktr.h> 40 #include <sys/lock.h> 41 #include <sys/mutex.h> 42 #include <sys/proc.h> 43 #include <sys/resource.h> 44 #include <sys/resourcevar.h> 45 #include <sys/sched.h> 46 #include <sys/smp.h> 47 #include <sys/sx.h> 48 #include <sys/sysctl.h> 49 #include <sys/sysproto.h> 50 #include <sys/turnstile.h> 51 #include <sys/umtx.h> 52 #include <sys/vmmeter.h> 53 #ifdef KTRACE 54 #include <sys/uio.h> 55 #include <sys/ktrace.h> 56 #endif 57 58 #ifdef HWPMC_HOOKS 59 #include <sys/pmckern.h> 60 #endif 61 62 #include <machine/cpu.h> 63 #include <machine/smp.h> 64 65 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 66 /* XXX This is bogus compatability crap for ps */ 67 static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 68 SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 69 70 static void sched_setup(void *dummy); 71 SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 72 73 static void sched_initticks(void *dummy); 74 SYSINIT(sched_initticks, SI_SUB_CLOCKS, SI_ORDER_THIRD, sched_initticks, NULL) 75 76 static SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0, "Scheduler"); 77 78 SYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "ule", 0, 79 "Scheduler name"); 80 81 static int slice_min = 1; 82 SYSCTL_INT(_kern_sched, OID_AUTO, slice_min, CTLFLAG_RW, &slice_min, 0, ""); 83 84 static int slice_max = 10; 85 SYSCTL_INT(_kern_sched, OID_AUTO, slice_max, CTLFLAG_RW, &slice_max, 0, ""); 86 87 int realstathz; 88 int tickincr = 1 << 10; 89 90 /* 91 * The following datastructures are allocated within their parent structure 92 * but are scheduler specific. 93 */ 94 /* 95 * The schedulable entity that can be given a context to run. A process may 96 * have several of these. 97 */ 98 struct td_sched { /* really kse */ 99 TAILQ_ENTRY(kse) ke_procq; /* (j/z) Run queue. */ 100 int ke_flags; /* (j) KEF_* flags. */ 101 struct thread *ke_thread; /* (*) Active associated thread. */ 102 fixpt_t ke_pctcpu; /* (j) %cpu during p_swtime. */ 103 u_char ke_rqindex; /* (j) Run queue index. */ 104 enum { 105 KES_THREAD = 0x0, /* slaved to thread state */ 106 KES_ONRUNQ 107 } ke_state; /* (j) thread sched specific status. */ 108 int ke_slptime; 109 int ke_slice; 110 struct runq *ke_runq; 111 u_char ke_cpu; /* CPU that we have affinity for. */ 112 /* The following variables are only used for pctcpu calculation */ 113 int ke_ltick; /* Last tick that we were running on */ 114 int ke_ftick; /* First tick that we were running on */ 115 int ke_ticks; /* Tick count */ 116 117 /* originally from kg_sched */ 118 int skg_slptime; /* Number of ticks we vol. slept */ 119 int skg_runtime; /* Number of ticks we were running */ 120 }; 121 #define td_kse td_sched 122 #define ke_assign ke_procq.tqe_next 123 /* flags kept in ke_flags */ 124 #define KEF_ASSIGNED 0x0001 /* Thread is being migrated. */ 125 #define KEF_BOUND 0x0002 /* Thread can not migrate. */ 126 #define KEF_XFERABLE 0x0004 /* Thread was added as transferable. */ 127 #define KEF_HOLD 0x0008 /* Thread is temporarily bound. */ 128 #define KEF_REMOVED 0x0010 /* Thread was removed while ASSIGNED */ 129 #define KEF_INTERNAL 0x0020 /* Thread added due to migration. */ 130 #define KEF_PREEMPTED 0x0040 /* Thread was preempted */ 131 #define KEF_DIDRUN 0x02000 /* Thread actually ran. */ 132 #define KEF_EXIT 0x04000 /* Thread is being killed. */ 133 134 static struct kse kse0; 135 136 /* 137 * The priority is primarily determined by the interactivity score. Thus, we 138 * give lower(better) priorities to kse groups that use less CPU. The nice 139 * value is then directly added to this to allow nice to have some effect 140 * on latency. 141 * 142 * PRI_RANGE: Total priority range for timeshare threads. 143 * PRI_NRESV: Number of nice values. 144 * PRI_BASE: The start of the dynamic range. 145 */ 146 #define SCHED_PRI_RANGE (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1) 147 #define SCHED_PRI_NRESV ((PRIO_MAX - PRIO_MIN) + 1) 148 #define SCHED_PRI_NHALF (SCHED_PRI_NRESV / 2) 149 #define SCHED_PRI_BASE (PRI_MIN_TIMESHARE) 150 #define SCHED_PRI_INTERACT(score) \ 151 ((score) * SCHED_PRI_RANGE / SCHED_INTERACT_MAX) 152 153 /* 154 * These determine the interactivity of a process. 155 * 156 * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate 157 * before throttling back. 158 * SLP_RUN_FORK: Maximum slp+run time to inherit at fork time. 159 * INTERACT_MAX: Maximum interactivity value. Smaller is better. 160 * INTERACT_THRESH: Threshhold for placement on the current runq. 161 */ 162 #define SCHED_SLP_RUN_MAX ((hz * 5) << 10) 163 #define SCHED_SLP_RUN_FORK ((hz / 2) << 10) 164 #define SCHED_INTERACT_MAX (100) 165 #define SCHED_INTERACT_HALF (SCHED_INTERACT_MAX / 2) 166 #define SCHED_INTERACT_THRESH (30) 167 168 /* 169 * These parameters and macros determine the size of the time slice that is 170 * granted to each thread. 171 * 172 * SLICE_MIN: Minimum time slice granted, in units of ticks. 173 * SLICE_MAX: Maximum time slice granted. 174 * SLICE_RANGE: Range of available time slices scaled by hz. 175 * SLICE_SCALE: The number slices granted per val in the range of [0, max]. 176 * SLICE_NICE: Determine the amount of slice granted to a scaled nice. 177 * SLICE_NTHRESH: The nice cutoff point for slice assignment. 178 */ 179 #define SCHED_SLICE_MIN (slice_min) 180 #define SCHED_SLICE_MAX (slice_max) 181 #define SCHED_SLICE_INTERACTIVE (slice_max) 182 #define SCHED_SLICE_NTHRESH (SCHED_PRI_NHALF - 1) 183 #define SCHED_SLICE_RANGE (SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1) 184 #define SCHED_SLICE_SCALE(val, max) (((val) * SCHED_SLICE_RANGE) / (max)) 185 #define SCHED_SLICE_NICE(nice) \ 186 (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((nice), SCHED_SLICE_NTHRESH)) 187 188 /* 189 * This macro determines whether or not the thread belongs on the current or 190 * next run queue. 191 */ 192 #define SCHED_INTERACTIVE(td) \ 193 (sched_interact_score(td) < SCHED_INTERACT_THRESH) 194 #define SCHED_CURR(td, ke) \ 195 ((ke->ke_thread->td_flags & TDF_BORROWING) || \ 196 (ke->ke_flags & KEF_PREEMPTED) || SCHED_INTERACTIVE(td)) 197 198 /* 199 * Cpu percentage computation macros and defines. 200 * 201 * SCHED_CPU_TIME: Number of seconds to average the cpu usage across. 202 * SCHED_CPU_TICKS: Number of hz ticks to average the cpu usage across. 203 */ 204 205 #define SCHED_CPU_TIME 10 206 #define SCHED_CPU_TICKS (hz * SCHED_CPU_TIME) 207 208 /* 209 * kseq - per processor runqs and statistics. 210 */ 211 struct kseq { 212 struct runq ksq_idle; /* Queue of IDLE threads. */ 213 struct runq ksq_timeshare[2]; /* Run queues for !IDLE. */ 214 struct runq *ksq_next; /* Next timeshare queue. */ 215 struct runq *ksq_curr; /* Current queue. */ 216 int ksq_load_timeshare; /* Load for timeshare. */ 217 int ksq_load; /* Aggregate load. */ 218 short ksq_nice[SCHED_PRI_NRESV]; /* KSEs in each nice bin. */ 219 short ksq_nicemin; /* Least nice. */ 220 #ifdef SMP 221 int ksq_transferable; 222 LIST_ENTRY(kseq) ksq_siblings; /* Next in kseq group. */ 223 struct kseq_group *ksq_group; /* Our processor group. */ 224 volatile struct kse *ksq_assigned; /* assigned by another CPU. */ 225 #else 226 int ksq_sysload; /* For loadavg, !ITHD load. */ 227 #endif 228 }; 229 230 #ifdef SMP 231 /* 232 * kseq groups are groups of processors which can cheaply share threads. When 233 * one processor in the group goes idle it will check the runqs of the other 234 * processors in its group prior to halting and waiting for an interrupt. 235 * These groups are suitable for SMT (Symetric Multi-Threading) and not NUMA. 236 * In a numa environment we'd want an idle bitmap per group and a two tiered 237 * load balancer. 238 */ 239 struct kseq_group { 240 int ksg_cpus; /* Count of CPUs in this kseq group. */ 241 cpumask_t ksg_cpumask; /* Mask of cpus in this group. */ 242 cpumask_t ksg_idlemask; /* Idle cpus in this group. */ 243 cpumask_t ksg_mask; /* Bit mask for first cpu. */ 244 int ksg_load; /* Total load of this group. */ 245 int ksg_transferable; /* Transferable load of this group. */ 246 LIST_HEAD(, kseq) ksg_members; /* Linked list of all members. */ 247 }; 248 #endif 249 250 /* 251 * One kse queue per processor. 252 */ 253 #ifdef SMP 254 static cpumask_t kseq_idle; 255 static int ksg_maxid; 256 static struct kseq kseq_cpu[MAXCPU]; 257 static struct kseq_group kseq_groups[MAXCPU]; 258 static int bal_tick; 259 static int gbal_tick; 260 static int balance_groups; 261 262 #define KSEQ_SELF() (&kseq_cpu[PCPU_GET(cpuid)]) 263 #define KSEQ_CPU(x) (&kseq_cpu[(x)]) 264 #define KSEQ_ID(x) ((x) - kseq_cpu) 265 #define KSEQ_GROUP(x) (&kseq_groups[(x)]) 266 #else /* !SMP */ 267 static struct kseq kseq_cpu; 268 269 #define KSEQ_SELF() (&kseq_cpu) 270 #define KSEQ_CPU(x) (&kseq_cpu) 271 #endif 272 273 static struct kse *sched_choose(void); /* XXX Should be thread * */ 274 static void sched_slice(struct kse *); 275 static void sched_priority(struct thread *); 276 static void sched_thread_priority(struct thread *, u_char); 277 static int sched_interact_score(struct thread *); 278 static void sched_interact_update(struct thread *); 279 static void sched_interact_fork(struct thread *); 280 static void sched_pctcpu_update(struct kse *); 281 282 /* Operations on per processor queues */ 283 static struct kse * kseq_choose(struct kseq *); 284 static void kseq_setup(struct kseq *); 285 static void kseq_load_add(struct kseq *, struct kse *); 286 static void kseq_load_rem(struct kseq *, struct kse *); 287 static __inline void kseq_runq_add(struct kseq *, struct kse *, int); 288 static __inline void kseq_runq_rem(struct kseq *, struct kse *); 289 static void kseq_nice_add(struct kseq *, int); 290 static void kseq_nice_rem(struct kseq *, int); 291 void kseq_print(int cpu); 292 #ifdef SMP 293 static int kseq_transfer(struct kseq *, struct kse *, int); 294 static struct kse *runq_steal(struct runq *); 295 static void sched_balance(void); 296 static void sched_balance_groups(void); 297 static void sched_balance_group(struct kseq_group *); 298 static void sched_balance_pair(struct kseq *, struct kseq *); 299 static void kseq_move(struct kseq *, int); 300 static int kseq_idled(struct kseq *); 301 static void kseq_notify(struct kse *, int); 302 static void kseq_assign(struct kseq *); 303 static struct kse *kseq_steal(struct kseq *, int); 304 #define KSE_CAN_MIGRATE(ke) \ 305 ((ke)->ke_thread->td_pinned == 0 && ((ke)->ke_flags & KEF_BOUND) == 0) 306 #endif 307 308 void 309 kseq_print(int cpu) 310 { 311 struct kseq *kseq; 312 int i; 313 314 kseq = KSEQ_CPU(cpu); 315 316 printf("kseq:\n"); 317 printf("\tload: %d\n", kseq->ksq_load); 318 printf("\tload TIMESHARE: %d\n", kseq->ksq_load_timeshare); 319 #ifdef SMP 320 printf("\tload transferable: %d\n", kseq->ksq_transferable); 321 #endif 322 printf("\tnicemin:\t%d\n", kseq->ksq_nicemin); 323 printf("\tnice counts:\n"); 324 for (i = 0; i < SCHED_PRI_NRESV; i++) 325 if (kseq->ksq_nice[i]) 326 printf("\t\t%d = %d\n", 327 i - SCHED_PRI_NHALF, kseq->ksq_nice[i]); 328 } 329 330 static __inline void 331 kseq_runq_add(struct kseq *kseq, struct kse *ke, int flags) 332 { 333 #ifdef SMP 334 if (KSE_CAN_MIGRATE(ke)) { 335 kseq->ksq_transferable++; 336 kseq->ksq_group->ksg_transferable++; 337 ke->ke_flags |= KEF_XFERABLE; 338 } 339 #endif 340 if (ke->ke_flags & KEF_PREEMPTED) 341 flags |= SRQ_PREEMPTED; 342 runq_add(ke->ke_runq, ke, flags); 343 } 344 345 static __inline void 346 kseq_runq_rem(struct kseq *kseq, struct kse *ke) 347 { 348 #ifdef SMP 349 if (ke->ke_flags & KEF_XFERABLE) { 350 kseq->ksq_transferable--; 351 kseq->ksq_group->ksg_transferable--; 352 ke->ke_flags &= ~KEF_XFERABLE; 353 } 354 #endif 355 runq_remove(ke->ke_runq, ke); 356 } 357 358 static void 359 kseq_load_add(struct kseq *kseq, struct kse *ke) 360 { 361 int class; 362 mtx_assert(&sched_lock, MA_OWNED); 363 class = PRI_BASE(ke->ke_thread->td_pri_class); 364 if (class == PRI_TIMESHARE) 365 kseq->ksq_load_timeshare++; 366 kseq->ksq_load++; 367 CTR1(KTR_SCHED, "load: %d", kseq->ksq_load); 368 if (class != PRI_ITHD && (ke->ke_thread->td_proc->p_flag & P_NOLOAD) == 0) 369 #ifdef SMP 370 kseq->ksq_group->ksg_load++; 371 #else 372 kseq->ksq_sysload++; 373 #endif 374 if (ke->ke_thread->td_pri_class == PRI_TIMESHARE) 375 kseq_nice_add(kseq, ke->ke_thread->td_proc->p_nice); 376 } 377 378 static void 379 kseq_load_rem(struct kseq *kseq, struct kse *ke) 380 { 381 int class; 382 mtx_assert(&sched_lock, MA_OWNED); 383 class = PRI_BASE(ke->ke_thread->td_pri_class); 384 if (class == PRI_TIMESHARE) 385 kseq->ksq_load_timeshare--; 386 if (class != PRI_ITHD && (ke->ke_thread->td_proc->p_flag & P_NOLOAD) == 0) 387 #ifdef SMP 388 kseq->ksq_group->ksg_load--; 389 #else 390 kseq->ksq_sysload--; 391 #endif 392 kseq->ksq_load--; 393 CTR1(KTR_SCHED, "load: %d", kseq->ksq_load); 394 ke->ke_runq = NULL; 395 if (ke->ke_thread->td_pri_class == PRI_TIMESHARE) 396 kseq_nice_rem(kseq, ke->ke_thread->td_proc->p_nice); 397 } 398 399 static void 400 kseq_nice_add(struct kseq *kseq, int nice) 401 { 402 mtx_assert(&sched_lock, MA_OWNED); 403 /* Normalize to zero. */ 404 kseq->ksq_nice[nice + SCHED_PRI_NHALF]++; 405 if (nice < kseq->ksq_nicemin || kseq->ksq_load_timeshare == 1) 406 kseq->ksq_nicemin = nice; 407 } 408 409 static void 410 kseq_nice_rem(struct kseq *kseq, int nice) 411 { 412 int n; 413 414 mtx_assert(&sched_lock, MA_OWNED); 415 /* Normalize to zero. */ 416 n = nice + SCHED_PRI_NHALF; 417 kseq->ksq_nice[n]--; 418 KASSERT(kseq->ksq_nice[n] >= 0, ("Negative nice count.")); 419 420 /* 421 * If this wasn't the smallest nice value or there are more in 422 * this bucket we can just return. Otherwise we have to recalculate 423 * the smallest nice. 424 */ 425 if (nice != kseq->ksq_nicemin || 426 kseq->ksq_nice[n] != 0 || 427 kseq->ksq_load_timeshare == 0) 428 return; 429 430 for (; n < SCHED_PRI_NRESV; n++) 431 if (kseq->ksq_nice[n]) { 432 kseq->ksq_nicemin = n - SCHED_PRI_NHALF; 433 return; 434 } 435 } 436 437 #ifdef SMP 438 /* 439 * sched_balance is a simple CPU load balancing algorithm. It operates by 440 * finding the least loaded and most loaded cpu and equalizing their load 441 * by migrating some processes. 442 * 443 * Dealing only with two CPUs at a time has two advantages. Firstly, most 444 * installations will only have 2 cpus. Secondly, load balancing too much at 445 * once can have an unpleasant effect on the system. The scheduler rarely has 446 * enough information to make perfect decisions. So this algorithm chooses 447 * algorithm simplicity and more gradual effects on load in larger systems. 448 * 449 * It could be improved by considering the priorities and slices assigned to 450 * each task prior to balancing them. There are many pathological cases with 451 * any approach and so the semi random algorithm below may work as well as any. 452 * 453 */ 454 static void 455 sched_balance(void) 456 { 457 struct kseq_group *high; 458 struct kseq_group *low; 459 struct kseq_group *ksg; 460 int cnt; 461 int i; 462 463 bal_tick = ticks + (random() % (hz * 2)); 464 if (smp_started == 0) 465 return; 466 low = high = NULL; 467 i = random() % (ksg_maxid + 1); 468 for (cnt = 0; cnt <= ksg_maxid; cnt++) { 469 ksg = KSEQ_GROUP(i); 470 /* 471 * Find the CPU with the highest load that has some 472 * threads to transfer. 473 */ 474 if ((high == NULL || ksg->ksg_load > high->ksg_load) 475 && ksg->ksg_transferable) 476 high = ksg; 477 if (low == NULL || ksg->ksg_load < low->ksg_load) 478 low = ksg; 479 if (++i > ksg_maxid) 480 i = 0; 481 } 482 if (low != NULL && high != NULL && high != low) 483 sched_balance_pair(LIST_FIRST(&high->ksg_members), 484 LIST_FIRST(&low->ksg_members)); 485 } 486 487 static void 488 sched_balance_groups(void) 489 { 490 int i; 491 492 gbal_tick = ticks + (random() % (hz * 2)); 493 mtx_assert(&sched_lock, MA_OWNED); 494 if (smp_started) 495 for (i = 0; i <= ksg_maxid; i++) 496 sched_balance_group(KSEQ_GROUP(i)); 497 } 498 499 static void 500 sched_balance_group(struct kseq_group *ksg) 501 { 502 struct kseq *kseq; 503 struct kseq *high; 504 struct kseq *low; 505 int load; 506 507 if (ksg->ksg_transferable == 0) 508 return; 509 low = NULL; 510 high = NULL; 511 LIST_FOREACH(kseq, &ksg->ksg_members, ksq_siblings) { 512 load = kseq->ksq_load; 513 if (high == NULL || load > high->ksq_load) 514 high = kseq; 515 if (low == NULL || load < low->ksq_load) 516 low = kseq; 517 } 518 if (high != NULL && low != NULL && high != low) 519 sched_balance_pair(high, low); 520 } 521 522 static void 523 sched_balance_pair(struct kseq *high, struct kseq *low) 524 { 525 int transferable; 526 int high_load; 527 int low_load; 528 int move; 529 int diff; 530 int i; 531 532 /* 533 * If we're transfering within a group we have to use this specific 534 * kseq's transferable count, otherwise we can steal from other members 535 * of the group. 536 */ 537 if (high->ksq_group == low->ksq_group) { 538 transferable = high->ksq_transferable; 539 high_load = high->ksq_load; 540 low_load = low->ksq_load; 541 } else { 542 transferable = high->ksq_group->ksg_transferable; 543 high_load = high->ksq_group->ksg_load; 544 low_load = low->ksq_group->ksg_load; 545 } 546 if (transferable == 0) 547 return; 548 /* 549 * Determine what the imbalance is and then adjust that to how many 550 * kses we actually have to give up (transferable). 551 */ 552 diff = high_load - low_load; 553 move = diff / 2; 554 if (diff & 0x1) 555 move++; 556 move = min(move, transferable); 557 for (i = 0; i < move; i++) 558 kseq_move(high, KSEQ_ID(low)); 559 return; 560 } 561 562 static void 563 kseq_move(struct kseq *from, int cpu) 564 { 565 struct kseq *kseq; 566 struct kseq *to; 567 struct kse *ke; 568 569 kseq = from; 570 to = KSEQ_CPU(cpu); 571 ke = kseq_steal(kseq, 1); 572 if (ke == NULL) { 573 struct kseq_group *ksg; 574 575 ksg = kseq->ksq_group; 576 LIST_FOREACH(kseq, &ksg->ksg_members, ksq_siblings) { 577 if (kseq == from || kseq->ksq_transferable == 0) 578 continue; 579 ke = kseq_steal(kseq, 1); 580 break; 581 } 582 if (ke == NULL) 583 panic("kseq_move: No KSEs available with a " 584 "transferable count of %d\n", 585 ksg->ksg_transferable); 586 } 587 if (kseq == to) 588 return; 589 ke->ke_state = KES_THREAD; 590 kseq_runq_rem(kseq, ke); 591 kseq_load_rem(kseq, ke); 592 kseq_notify(ke, cpu); 593 } 594 595 static int 596 kseq_idled(struct kseq *kseq) 597 { 598 struct kseq_group *ksg; 599 struct kseq *steal; 600 struct kse *ke; 601 602 ksg = kseq->ksq_group; 603 /* 604 * If we're in a cpu group, try and steal kses from another cpu in 605 * the group before idling. 606 */ 607 if (ksg->ksg_cpus > 1 && ksg->ksg_transferable) { 608 LIST_FOREACH(steal, &ksg->ksg_members, ksq_siblings) { 609 if (steal == kseq || steal->ksq_transferable == 0) 610 continue; 611 ke = kseq_steal(steal, 0); 612 if (ke == NULL) 613 continue; 614 ke->ke_state = KES_THREAD; 615 kseq_runq_rem(steal, ke); 616 kseq_load_rem(steal, ke); 617 ke->ke_cpu = PCPU_GET(cpuid); 618 ke->ke_flags |= KEF_INTERNAL | KEF_HOLD; 619 sched_add(ke->ke_thread, SRQ_YIELDING); 620 return (0); 621 } 622 } 623 /* 624 * We only set the idled bit when all of the cpus in the group are 625 * idle. Otherwise we could get into a situation where a KSE bounces 626 * back and forth between two idle cores on seperate physical CPUs. 627 */ 628 ksg->ksg_idlemask |= PCPU_GET(cpumask); 629 if (ksg->ksg_idlemask != ksg->ksg_cpumask) 630 return (1); 631 atomic_set_int(&kseq_idle, ksg->ksg_mask); 632 return (1); 633 } 634 635 static void 636 kseq_assign(struct kseq *kseq) 637 { 638 struct kse *nke; 639 struct kse *ke; 640 641 do { 642 *(volatile struct kse **)&ke = kseq->ksq_assigned; 643 } while(!atomic_cmpset_ptr((volatile uintptr_t *)&kseq->ksq_assigned, 644 (uintptr_t)ke, (uintptr_t)NULL)); 645 for (; ke != NULL; ke = nke) { 646 nke = ke->ke_assign; 647 kseq->ksq_group->ksg_load--; 648 kseq->ksq_load--; 649 ke->ke_flags &= ~KEF_ASSIGNED; 650 if (ke->ke_flags & KEF_REMOVED) { 651 ke->ke_flags &= ~KEF_REMOVED; 652 continue; 653 } 654 ke->ke_flags |= KEF_INTERNAL | KEF_HOLD; 655 sched_add(ke->ke_thread, SRQ_YIELDING); 656 } 657 } 658 659 static void 660 kseq_notify(struct kse *ke, int cpu) 661 { 662 struct kseq *kseq; 663 struct thread *td; 664 struct pcpu *pcpu; 665 int class; 666 int prio; 667 668 kseq = KSEQ_CPU(cpu); 669 /* XXX */ 670 class = PRI_BASE(ke->ke_thread->td_pri_class); 671 if ((class == PRI_TIMESHARE || class == PRI_REALTIME) && 672 (kseq_idle & kseq->ksq_group->ksg_mask)) 673 atomic_clear_int(&kseq_idle, kseq->ksq_group->ksg_mask); 674 kseq->ksq_group->ksg_load++; 675 kseq->ksq_load++; 676 ke->ke_cpu = cpu; 677 ke->ke_flags |= KEF_ASSIGNED; 678 prio = ke->ke_thread->td_priority; 679 680 /* 681 * Place a KSE on another cpu's queue and force a resched. 682 */ 683 do { 684 *(volatile struct kse **)&ke->ke_assign = kseq->ksq_assigned; 685 } while(!atomic_cmpset_ptr((volatile uintptr_t *)&kseq->ksq_assigned, 686 (uintptr_t)ke->ke_assign, (uintptr_t)ke)); 687 /* 688 * Without sched_lock we could lose a race where we set NEEDRESCHED 689 * on a thread that is switched out before the IPI is delivered. This 690 * would lead us to miss the resched. This will be a problem once 691 * sched_lock is pushed down. 692 */ 693 pcpu = pcpu_find(cpu); 694 td = pcpu->pc_curthread; 695 if (ke->ke_thread->td_priority < td->td_priority || 696 td == pcpu->pc_idlethread) { 697 td->td_flags |= TDF_NEEDRESCHED; 698 ipi_selected(1 << cpu, IPI_AST); 699 } 700 } 701 702 static struct kse * 703 runq_steal(struct runq *rq) 704 { 705 struct rqhead *rqh; 706 struct rqbits *rqb; 707 struct kse *ke; 708 int word; 709 int bit; 710 711 mtx_assert(&sched_lock, MA_OWNED); 712 rqb = &rq->rq_status; 713 for (word = 0; word < RQB_LEN; word++) { 714 if (rqb->rqb_bits[word] == 0) 715 continue; 716 for (bit = 0; bit < RQB_BPW; bit++) { 717 if ((rqb->rqb_bits[word] & (1ul << bit)) == 0) 718 continue; 719 rqh = &rq->rq_queues[bit + (word << RQB_L2BPW)]; 720 TAILQ_FOREACH(ke, rqh, ke_procq) { 721 if (KSE_CAN_MIGRATE(ke)) 722 return (ke); 723 } 724 } 725 } 726 return (NULL); 727 } 728 729 static struct kse * 730 kseq_steal(struct kseq *kseq, int stealidle) 731 { 732 struct kse *ke; 733 734 /* 735 * Steal from next first to try to get a non-interactive task that 736 * may not have run for a while. 737 */ 738 if ((ke = runq_steal(kseq->ksq_next)) != NULL) 739 return (ke); 740 if ((ke = runq_steal(kseq->ksq_curr)) != NULL) 741 return (ke); 742 if (stealidle) 743 return (runq_steal(&kseq->ksq_idle)); 744 return (NULL); 745 } 746 747 int 748 kseq_transfer(struct kseq *kseq, struct kse *ke, int class) 749 { 750 struct kseq_group *nksg; 751 struct kseq_group *ksg; 752 struct kseq *old; 753 int cpu; 754 int idx; 755 756 if (smp_started == 0) 757 return (0); 758 cpu = 0; 759 /* 760 * If our load exceeds a certain threshold we should attempt to 761 * reassign this thread. The first candidate is the cpu that 762 * originally ran the thread. If it is idle, assign it there, 763 * otherwise, pick an idle cpu. 764 * 765 * The threshold at which we start to reassign kses has a large impact 766 * on the overall performance of the system. Tuned too high and 767 * some CPUs may idle. Too low and there will be excess migration 768 * and context switches. 769 */ 770 old = KSEQ_CPU(ke->ke_cpu); 771 nksg = old->ksq_group; 772 ksg = kseq->ksq_group; 773 if (kseq_idle) { 774 if (kseq_idle & nksg->ksg_mask) { 775 cpu = ffs(nksg->ksg_idlemask); 776 if (cpu) { 777 CTR2(KTR_SCHED, 778 "kseq_transfer: %p found old cpu %X " 779 "in idlemask.", ke, cpu); 780 goto migrate; 781 } 782 } 783 /* 784 * Multiple cpus could find this bit simultaneously 785 * but the race shouldn't be terrible. 786 */ 787 cpu = ffs(kseq_idle); 788 if (cpu) { 789 CTR2(KTR_SCHED, "kseq_transfer: %p found %X " 790 "in idlemask.", ke, cpu); 791 goto migrate; 792 } 793 } 794 idx = 0; 795 #if 0 796 if (old->ksq_load < kseq->ksq_load) { 797 cpu = ke->ke_cpu + 1; 798 CTR2(KTR_SCHED, "kseq_transfer: %p old cpu %X " 799 "load less than ours.", ke, cpu); 800 goto migrate; 801 } 802 /* 803 * No new CPU was found, look for one with less load. 804 */ 805 for (idx = 0; idx <= ksg_maxid; idx++) { 806 nksg = KSEQ_GROUP(idx); 807 if (nksg->ksg_load /*+ (nksg->ksg_cpus * 2)*/ < ksg->ksg_load) { 808 cpu = ffs(nksg->ksg_cpumask); 809 CTR2(KTR_SCHED, "kseq_transfer: %p cpu %X load less " 810 "than ours.", ke, cpu); 811 goto migrate; 812 } 813 } 814 #endif 815 /* 816 * If another cpu in this group has idled, assign a thread over 817 * to them after checking to see if there are idled groups. 818 */ 819 if (ksg->ksg_idlemask) { 820 cpu = ffs(ksg->ksg_idlemask); 821 if (cpu) { 822 CTR2(KTR_SCHED, "kseq_transfer: %p cpu %X idle in " 823 "group.", ke, cpu); 824 goto migrate; 825 } 826 } 827 return (0); 828 migrate: 829 /* 830 * Now that we've found an idle CPU, migrate the thread. 831 */ 832 cpu--; 833 ke->ke_runq = NULL; 834 kseq_notify(ke, cpu); 835 836 return (1); 837 } 838 839 #endif /* SMP */ 840 841 /* 842 * Pick the highest priority task we have and return it. 843 */ 844 845 static struct kse * 846 kseq_choose(struct kseq *kseq) 847 { 848 struct runq *swap; 849 struct kse *ke; 850 int nice; 851 852 mtx_assert(&sched_lock, MA_OWNED); 853 swap = NULL; 854 855 for (;;) { 856 ke = runq_choose(kseq->ksq_curr); 857 if (ke == NULL) { 858 /* 859 * We already swapped once and didn't get anywhere. 860 */ 861 if (swap) 862 break; 863 swap = kseq->ksq_curr; 864 kseq->ksq_curr = kseq->ksq_next; 865 kseq->ksq_next = swap; 866 continue; 867 } 868 /* 869 * If we encounter a slice of 0 the kse is in a 870 * TIMESHARE kse group and its nice was too far out 871 * of the range that receives slices. 872 */ 873 nice = ke->ke_thread->td_proc->p_nice + (0 - kseq->ksq_nicemin); 874 #if 0 875 if (ke->ke_slice == 0 || (nice > SCHED_SLICE_NTHRESH && 876 ke->ke_thread->td_proc->p_nice != 0)) { 877 runq_remove(ke->ke_runq, ke); 878 sched_slice(ke); 879 ke->ke_runq = kseq->ksq_next; 880 runq_add(ke->ke_runq, ke, 0); 881 continue; 882 } 883 #endif 884 return (ke); 885 } 886 887 return (runq_choose(&kseq->ksq_idle)); 888 } 889 890 static void 891 kseq_setup(struct kseq *kseq) 892 { 893 runq_init(&kseq->ksq_timeshare[0]); 894 runq_init(&kseq->ksq_timeshare[1]); 895 runq_init(&kseq->ksq_idle); 896 kseq->ksq_curr = &kseq->ksq_timeshare[0]; 897 kseq->ksq_next = &kseq->ksq_timeshare[1]; 898 kseq->ksq_load = 0; 899 kseq->ksq_load_timeshare = 0; 900 } 901 902 static void 903 sched_setup(void *dummy) 904 { 905 #ifdef SMP 906 int i; 907 #endif 908 909 /* 910 * To avoid divide-by-zero, we set realstathz a dummy value 911 * in case which sched_clock() called before sched_initticks(). 912 */ 913 realstathz = hz; 914 slice_min = (hz/100); /* 10ms */ 915 slice_max = (hz/7); /* ~140ms */ 916 917 #ifdef SMP 918 balance_groups = 0; 919 /* 920 * Initialize the kseqs. 921 */ 922 for (i = 0; i < MAXCPU; i++) { 923 struct kseq *ksq; 924 925 ksq = &kseq_cpu[i]; 926 ksq->ksq_assigned = NULL; 927 kseq_setup(&kseq_cpu[i]); 928 } 929 if (smp_topology == NULL) { 930 struct kseq_group *ksg; 931 struct kseq *ksq; 932 int cpus; 933 934 for (cpus = 0, i = 0; i < MAXCPU; i++) { 935 if (CPU_ABSENT(i)) 936 continue; 937 ksq = &kseq_cpu[i]; 938 ksg = &kseq_groups[cpus]; 939 /* 940 * Setup a kseq group with one member. 941 */ 942 ksq->ksq_transferable = 0; 943 ksq->ksq_group = ksg; 944 ksg->ksg_cpus = 1; 945 ksg->ksg_idlemask = 0; 946 ksg->ksg_cpumask = ksg->ksg_mask = 1 << i; 947 ksg->ksg_load = 0; 948 ksg->ksg_transferable = 0; 949 LIST_INIT(&ksg->ksg_members); 950 LIST_INSERT_HEAD(&ksg->ksg_members, ksq, ksq_siblings); 951 cpus++; 952 } 953 ksg_maxid = cpus - 1; 954 } else { 955 struct kseq_group *ksg; 956 struct cpu_group *cg; 957 int j; 958 959 for (i = 0; i < smp_topology->ct_count; i++) { 960 cg = &smp_topology->ct_group[i]; 961 ksg = &kseq_groups[i]; 962 /* 963 * Initialize the group. 964 */ 965 ksg->ksg_idlemask = 0; 966 ksg->ksg_load = 0; 967 ksg->ksg_transferable = 0; 968 ksg->ksg_cpus = cg->cg_count; 969 ksg->ksg_cpumask = cg->cg_mask; 970 LIST_INIT(&ksg->ksg_members); 971 /* 972 * Find all of the group members and add them. 973 */ 974 for (j = 0; j < MAXCPU; j++) { 975 if ((cg->cg_mask & (1 << j)) != 0) { 976 if (ksg->ksg_mask == 0) 977 ksg->ksg_mask = 1 << j; 978 kseq_cpu[j].ksq_transferable = 0; 979 kseq_cpu[j].ksq_group = ksg; 980 LIST_INSERT_HEAD(&ksg->ksg_members, 981 &kseq_cpu[j], ksq_siblings); 982 } 983 } 984 if (ksg->ksg_cpus > 1) 985 balance_groups = 1; 986 } 987 ksg_maxid = smp_topology->ct_count - 1; 988 } 989 /* 990 * Stagger the group and global load balancer so they do not 991 * interfere with each other. 992 */ 993 bal_tick = ticks + hz; 994 if (balance_groups) 995 gbal_tick = ticks + (hz / 2); 996 #else 997 kseq_setup(KSEQ_SELF()); 998 #endif 999 mtx_lock_spin(&sched_lock); 1000 kseq_load_add(KSEQ_SELF(), &kse0); 1001 mtx_unlock_spin(&sched_lock); 1002 } 1003 1004 /* ARGSUSED */ 1005 static void 1006 sched_initticks(void *dummy) 1007 { 1008 mtx_lock_spin(&sched_lock); 1009 realstathz = stathz ? stathz : hz; 1010 slice_min = (realstathz/100); /* 10ms */ 1011 slice_max = (realstathz/7); /* ~140ms */ 1012 1013 tickincr = (hz << 10) / realstathz; 1014 /* 1015 * XXX This does not work for values of stathz that are much 1016 * larger than hz. 1017 */ 1018 if (tickincr == 0) 1019 tickincr = 1; 1020 mtx_unlock_spin(&sched_lock); 1021 } 1022 1023 1024 /* 1025 * Scale the scheduling priority according to the "interactivity" of this 1026 * process. 1027 */ 1028 static void 1029 sched_priority(struct thread *td) 1030 { 1031 int pri; 1032 1033 if (td->td_pri_class != PRI_TIMESHARE) 1034 return; 1035 1036 pri = SCHED_PRI_INTERACT(sched_interact_score(td)); 1037 pri += SCHED_PRI_BASE; 1038 pri += td->td_proc->p_nice; 1039 1040 if (pri > PRI_MAX_TIMESHARE) 1041 pri = PRI_MAX_TIMESHARE; 1042 else if (pri < PRI_MIN_TIMESHARE) 1043 pri = PRI_MIN_TIMESHARE; 1044 1045 #ifdef KSE 1046 sched_user_prio(kg, pri); 1047 #else 1048 sched_user_prio(td, pri); 1049 #endif 1050 1051 return; 1052 } 1053 1054 /* 1055 * Calculate a time slice based on the properties of the kseg and the runq 1056 * that we're on. This is only for PRI_TIMESHARE threads. 1057 */ 1058 static void 1059 sched_slice(struct kse *ke) 1060 { 1061 struct kseq *kseq; 1062 struct thread *td; 1063 1064 td = ke->ke_thread; 1065 kseq = KSEQ_CPU(ke->ke_cpu); 1066 1067 if (td->td_flags & TDF_BORROWING) { 1068 ke->ke_slice = SCHED_SLICE_MIN; 1069 return; 1070 } 1071 1072 /* 1073 * Rationale: 1074 * KSEs in interactive ksegs get a minimal slice so that we 1075 * quickly notice if it abuses its advantage. 1076 * 1077 * KSEs in non-interactive ksegs are assigned a slice that is 1078 * based on the ksegs nice value relative to the least nice kseg 1079 * on the run queue for this cpu. 1080 * 1081 * If the KSE is less nice than all others it gets the maximum 1082 * slice and other KSEs will adjust their slice relative to 1083 * this when they first expire. 1084 * 1085 * There is 20 point window that starts relative to the least 1086 * nice kse on the run queue. Slice size is determined by 1087 * the kse distance from the last nice thread. 1088 * 1089 * If the kse is outside of the window it will get no slice 1090 * and will be reevaluated each time it is selected on the 1091 * run queue. The exception to this is nice 0 ksegs when 1092 * a nice -20 is running. They are always granted a minimum 1093 * slice. 1094 */ 1095 if (!SCHED_INTERACTIVE(td)) { 1096 int nice; 1097 1098 nice = td->td_proc->p_nice + (0 - kseq->ksq_nicemin); 1099 if (kseq->ksq_load_timeshare == 0 || 1100 td->td_proc->p_nice < kseq->ksq_nicemin) 1101 ke->ke_slice = SCHED_SLICE_MAX; 1102 else if (nice <= SCHED_SLICE_NTHRESH) 1103 ke->ke_slice = SCHED_SLICE_NICE(nice); 1104 else if (td->td_proc->p_nice == 0) 1105 ke->ke_slice = SCHED_SLICE_MIN; 1106 else 1107 ke->ke_slice = SCHED_SLICE_MIN; /* 0 */ 1108 } else 1109 ke->ke_slice = SCHED_SLICE_INTERACTIVE; 1110 1111 return; 1112 } 1113 1114 /* 1115 * This routine enforces a maximum limit on the amount of scheduling history 1116 * kept. It is called after either the slptime or runtime is adjusted. 1117 * This routine will not operate correctly when slp or run times have been 1118 * adjusted to more than double their maximum. 1119 */ 1120 static void 1121 sched_interact_update(struct thread *td) 1122 { 1123 int sum; 1124 1125 sum = td->td_sched->skg_runtime + td->td_sched->skg_slptime; 1126 if (sum < SCHED_SLP_RUN_MAX) 1127 return; 1128 /* 1129 * If we have exceeded by more than 1/5th then the algorithm below 1130 * will not bring us back into range. Dividing by two here forces 1131 * us into the range of [4/5 * SCHED_INTERACT_MAX, SCHED_INTERACT_MAX] 1132 */ 1133 if (sum > (SCHED_SLP_RUN_MAX / 5) * 6) { 1134 td->td_sched->skg_runtime /= 2; 1135 td->td_sched->skg_slptime /= 2; 1136 return; 1137 } 1138 td->td_sched->skg_runtime = (td->td_sched->skg_runtime / 5) * 4; 1139 td->td_sched->skg_slptime = (td->td_sched->skg_slptime / 5) * 4; 1140 } 1141 1142 static void 1143 sched_interact_fork(struct thread *td) 1144 { 1145 int ratio; 1146 int sum; 1147 1148 sum = td->td_sched->skg_runtime + td->td_sched->skg_slptime; 1149 if (sum > SCHED_SLP_RUN_FORK) { 1150 ratio = sum / SCHED_SLP_RUN_FORK; 1151 td->td_sched->skg_runtime /= ratio; 1152 td->td_sched->skg_slptime /= ratio; 1153 } 1154 } 1155 1156 static int 1157 sched_interact_score(struct thread *td) 1158 { 1159 int div; 1160 1161 if (td->td_sched->skg_runtime > td->td_sched->skg_slptime) { 1162 div = max(1, td->td_sched->skg_runtime / SCHED_INTERACT_HALF); 1163 return (SCHED_INTERACT_HALF + 1164 (SCHED_INTERACT_HALF - (td->td_sched->skg_slptime / div))); 1165 } if (td->td_sched->skg_slptime > td->td_sched->skg_runtime) { 1166 div = max(1, td->td_sched->skg_slptime / SCHED_INTERACT_HALF); 1167 return (td->td_sched->skg_runtime / div); 1168 } 1169 1170 /* 1171 * This can happen if slptime and runtime are 0. 1172 */ 1173 return (0); 1174 1175 } 1176 1177 /* 1178 * Very early in the boot some setup of scheduler-specific 1179 * parts of proc0 and of soem scheduler resources needs to be done. 1180 * Called from: 1181 * proc0_init() 1182 */ 1183 void 1184 schedinit(void) 1185 { 1186 /* 1187 * Set up the scheduler specific parts of proc0. 1188 */ 1189 proc0.p_sched = NULL; /* XXX */ 1190 thread0.td_sched = &kse0; 1191 kse0.ke_thread = &thread0; 1192 kse0.ke_state = KES_THREAD; 1193 } 1194 1195 /* 1196 * This is only somewhat accurate since given many processes of the same 1197 * priority they will switch when their slices run out, which will be 1198 * at most SCHED_SLICE_MAX. 1199 */ 1200 int 1201 sched_rr_interval(void) 1202 { 1203 return (SCHED_SLICE_MAX); 1204 } 1205 1206 static void 1207 sched_pctcpu_update(struct kse *ke) 1208 { 1209 /* 1210 * Adjust counters and watermark for pctcpu calc. 1211 */ 1212 if (ke->ke_ltick > ticks - SCHED_CPU_TICKS) { 1213 /* 1214 * Shift the tick count out so that the divide doesn't 1215 * round away our results. 1216 */ 1217 ke->ke_ticks <<= 10; 1218 ke->ke_ticks = (ke->ke_ticks / (ticks - ke->ke_ftick)) * 1219 SCHED_CPU_TICKS; 1220 ke->ke_ticks >>= 10; 1221 } else 1222 ke->ke_ticks = 0; 1223 ke->ke_ltick = ticks; 1224 ke->ke_ftick = ke->ke_ltick - SCHED_CPU_TICKS; 1225 } 1226 1227 void 1228 sched_thread_priority(struct thread *td, u_char prio) 1229 { 1230 struct kse *ke; 1231 1232 CTR6(KTR_SCHED, "sched_prio: %p(%s) prio %d newprio %d by %p(%s)", 1233 td, td->td_proc->p_comm, td->td_priority, prio, curthread, 1234 curthread->td_proc->p_comm); 1235 ke = td->td_kse; 1236 mtx_assert(&sched_lock, MA_OWNED); 1237 if (td->td_priority == prio) 1238 return; 1239 if (TD_ON_RUNQ(td)) { 1240 /* 1241 * If the priority has been elevated due to priority 1242 * propagation, we may have to move ourselves to a new 1243 * queue. We still call adjustrunqueue below in case kse 1244 * needs to fix things up. 1245 */ 1246 if (prio < td->td_priority && ke->ke_runq != NULL && 1247 (ke->ke_flags & KEF_ASSIGNED) == 0 && 1248 ke->ke_runq != KSEQ_CPU(ke->ke_cpu)->ksq_curr) { 1249 runq_remove(ke->ke_runq, ke); 1250 ke->ke_runq = KSEQ_CPU(ke->ke_cpu)->ksq_curr; 1251 runq_add(ke->ke_runq, ke, 0); 1252 } 1253 /* 1254 * Hold this kse on this cpu so that sched_prio() doesn't 1255 * cause excessive migration. We only want migration to 1256 * happen as the result of a wakeup. 1257 */ 1258 ke->ke_flags |= KEF_HOLD; 1259 adjustrunqueue(td, prio); 1260 ke->ke_flags &= ~KEF_HOLD; 1261 } else 1262 td->td_priority = prio; 1263 } 1264 1265 /* 1266 * Update a thread's priority when it is lent another thread's 1267 * priority. 1268 */ 1269 void 1270 sched_lend_prio(struct thread *td, u_char prio) 1271 { 1272 1273 td->td_flags |= TDF_BORROWING; 1274 sched_thread_priority(td, prio); 1275 } 1276 1277 /* 1278 * Restore a thread's priority when priority propagation is 1279 * over. The prio argument is the minimum priority the thread 1280 * needs to have to satisfy other possible priority lending 1281 * requests. If the thread's regular priority is less 1282 * important than prio, the thread will keep a priority boost 1283 * of prio. 1284 */ 1285 void 1286 sched_unlend_prio(struct thread *td, u_char prio) 1287 { 1288 u_char base_pri; 1289 1290 if (td->td_base_pri >= PRI_MIN_TIMESHARE && 1291 td->td_base_pri <= PRI_MAX_TIMESHARE) 1292 base_pri = td->td_user_pri; 1293 else 1294 base_pri = td->td_base_pri; 1295 if (prio >= base_pri) { 1296 td->td_flags &= ~TDF_BORROWING; 1297 sched_thread_priority(td, base_pri); 1298 } else 1299 sched_lend_prio(td, prio); 1300 } 1301 1302 void 1303 sched_prio(struct thread *td, u_char prio) 1304 { 1305 u_char oldprio; 1306 1307 /* First, update the base priority. */ 1308 td->td_base_pri = prio; 1309 1310 /* 1311 * If the thread is borrowing another thread's priority, don't 1312 * ever lower the priority. 1313 */ 1314 if (td->td_flags & TDF_BORROWING && td->td_priority < prio) 1315 return; 1316 1317 /* Change the real priority. */ 1318 oldprio = td->td_priority; 1319 sched_thread_priority(td, prio); 1320 1321 /* 1322 * If the thread is on a turnstile, then let the turnstile update 1323 * its state. 1324 */ 1325 if (TD_ON_LOCK(td) && oldprio != prio) 1326 turnstile_adjust(td, oldprio); 1327 } 1328 1329 void 1330 #ifdef KSE 1331 sched_user_prio(struct ksegrp *kg, u_char prio) 1332 #else 1333 sched_user_prio(struct thread *td, u_char prio) 1334 #endif 1335 { 1336 #ifdef KSE 1337 struct thread *td; 1338 #endif 1339 u_char oldprio; 1340 1341 #ifdef KSE 1342 kg->kg_base_user_pri = prio; 1343 1344 /* XXXKSE only for 1:1 */ 1345 1346 td = TAILQ_FIRST(&kg->kg_threads); 1347 if (td == NULL) { 1348 kg->kg_user_pri = prio; 1349 return; 1350 } 1351 1352 if (td->td_flags & TDF_UBORROWING && kg->kg_user_pri <= prio) 1353 return; 1354 1355 oldprio = kg->kg_user_pri; 1356 kg->kg_user_pri = prio; 1357 #else 1358 td->td_base_user_pri = prio; 1359 1360 oldprio = td->td_user_pri; 1361 td->td_user_pri = prio; 1362 #endif 1363 1364 if (TD_ON_UPILOCK(td) && oldprio != prio) 1365 umtx_pi_adjust(td, oldprio); 1366 } 1367 1368 void 1369 sched_lend_user_prio(struct thread *td, u_char prio) 1370 { 1371 u_char oldprio; 1372 1373 td->td_flags |= TDF_UBORROWING; 1374 1375 #ifdef KSE 1376 oldprio = td->td_ksegrp->kg_user_pri; 1377 td->td_ksegrp->kg_user_pri = prio; 1378 #else 1379 oldprio = td->td__user_pri; 1380 td->td_user_pri = prio; 1381 #endif 1382 1383 if (TD_ON_UPILOCK(td) && oldprio != prio) 1384 umtx_pi_adjust(td, oldprio); 1385 } 1386 1387 void 1388 sched_unlend_user_prio(struct thread *td, u_char prio) 1389 { 1390 #ifdef KSE 1391 struct ksegrp *kg = td->td_ksegrp; 1392 #endif 1393 u_char base_pri; 1394 1395 #ifdef KSE 1396 base_pri = kg->kg_base_user_pri; 1397 #else 1398 base_pri = td->td_base_user_pri; 1399 #endif 1400 if (prio >= base_pri) { 1401 td->td_flags &= ~TDF_UBORROWING; 1402 #ifdef KSE 1403 sched_user_prio(kg, base_pri); 1404 #else 1405 sched_user_prio(td, base_pri); 1406 #endif 1407 } else 1408 sched_lend_user_prio(td, prio); 1409 } 1410 1411 void 1412 sched_switch(struct thread *td, struct thread *newtd, int flags) 1413 { 1414 struct kseq *ksq; 1415 struct kse *ke; 1416 1417 mtx_assert(&sched_lock, MA_OWNED); 1418 1419 ke = td->td_kse; 1420 ksq = KSEQ_SELF(); 1421 1422 td->td_lastcpu = td->td_oncpu; 1423 td->td_oncpu = NOCPU; 1424 td->td_flags &= ~TDF_NEEDRESCHED; 1425 td->td_owepreempt = 0; 1426 1427 /* 1428 * If the KSE has been assigned it may be in the process of switching 1429 * to the new cpu. This is the case in sched_bind(). 1430 */ 1431 if (td == PCPU_GET(idlethread)) { 1432 TD_SET_CAN_RUN(td); 1433 } else if ((ke->ke_flags & KEF_ASSIGNED) == 0) { 1434 /* We are ending our run so make our slot available again */ 1435 kseq_load_rem(ksq, ke); 1436 if (TD_IS_RUNNING(td)) { 1437 /* 1438 * Don't allow the thread to migrate 1439 * from a preemption. 1440 */ 1441 ke->ke_flags |= KEF_HOLD; 1442 setrunqueue(td, (flags & SW_PREEMPT) ? 1443 SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED : 1444 SRQ_OURSELF|SRQ_YIELDING); 1445 ke->ke_flags &= ~KEF_HOLD; 1446 } 1447 } 1448 if (newtd != NULL) { 1449 /* 1450 * If we bring in a thread account for it as if it had been 1451 * added to the run queue and then chosen. 1452 */ 1453 newtd->td_kse->ke_flags |= KEF_DIDRUN; 1454 newtd->td_kse->ke_runq = ksq->ksq_curr; 1455 TD_SET_RUNNING(newtd); 1456 kseq_load_add(KSEQ_SELF(), newtd->td_kse); 1457 } else 1458 newtd = choosethread(); 1459 if (td != newtd) { 1460 #ifdef HWPMC_HOOKS 1461 if (PMC_PROC_IS_USING_PMCS(td->td_proc)) 1462 PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT); 1463 #endif 1464 1465 cpu_switch(td, newtd); 1466 #ifdef HWPMC_HOOKS 1467 if (PMC_PROC_IS_USING_PMCS(td->td_proc)) 1468 PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN); 1469 #endif 1470 } 1471 1472 sched_lock.mtx_lock = (uintptr_t)td; 1473 1474 td->td_oncpu = PCPU_GET(cpuid); 1475 } 1476 1477 void 1478 sched_nice(struct proc *p, int nice) 1479 { 1480 struct kse *ke; 1481 struct thread *td; 1482 struct kseq *kseq; 1483 1484 PROC_LOCK_ASSERT(p, MA_OWNED); 1485 mtx_assert(&sched_lock, MA_OWNED); 1486 /* 1487 * We need to adjust the nice counts for running KSEs. 1488 */ 1489 FOREACH_THREAD_IN_PROC(p, td) { 1490 if (td->td_pri_class == PRI_TIMESHARE) { 1491 ke = td->td_kse; 1492 if (ke->ke_runq == NULL) 1493 continue; 1494 kseq = KSEQ_CPU(ke->ke_cpu); 1495 kseq_nice_rem(kseq, p->p_nice); 1496 kseq_nice_add(kseq, nice); 1497 } 1498 } 1499 p->p_nice = nice; 1500 FOREACH_THREAD_IN_PROC(p, td) { 1501 sched_priority(td); 1502 td->td_flags |= TDF_NEEDRESCHED; 1503 } 1504 } 1505 1506 void 1507 sched_sleep(struct thread *td) 1508 { 1509 mtx_assert(&sched_lock, MA_OWNED); 1510 1511 td->td_kse->ke_slptime = ticks; 1512 } 1513 1514 void 1515 sched_wakeup(struct thread *td) 1516 { 1517 mtx_assert(&sched_lock, MA_OWNED); 1518 1519 /* 1520 * Let the kseg know how long we slept for. This is because process 1521 * interactivity behavior is modeled in the kseg. 1522 */ 1523 if (td->td_kse->ke_slptime) { 1524 int hzticks; 1525 1526 hzticks = (ticks - td->td_kse->ke_slptime) << 10; 1527 if (hzticks >= SCHED_SLP_RUN_MAX) { 1528 td->td_sched->skg_slptime = SCHED_SLP_RUN_MAX; 1529 td->td_sched->skg_runtime = 1; 1530 } else { 1531 td->td_sched->skg_slptime += hzticks; 1532 sched_interact_update(td); 1533 } 1534 sched_priority(td); 1535 sched_slice(td->td_kse); 1536 td->td_kse->ke_slptime = 0; 1537 } 1538 setrunqueue(td, SRQ_BORING); 1539 } 1540 1541 /* 1542 * Penalize the parent for creating a new child and initialize the child's 1543 * priority. 1544 */ 1545 void 1546 sched_fork(struct thread *td, struct thread *child) 1547 { 1548 struct kse *ke; 1549 struct kse *ke2; 1550 1551 mtx_assert(&sched_lock, MA_OWNED); 1552 1553 child->td_sched->skg_slptime = td->td_sched->skg_slptime; 1554 child->td_sched->skg_runtime = td->td_sched->skg_runtime; 1555 child->td_user_pri = td->td_user_pri; 1556 child->kg_base_user_pri = kg->kg_base_user_pri; 1557 sched_interact_fork(child); 1558 td->td_sched->skg_runtime += tickincr; 1559 sched_interact_update(td); 1560 1561 sched_newthread(child); 1562 1563 ke = td->td_kse; 1564 ke2 = child->td_kse; 1565 ke2->ke_slice = 1; /* Attempt to quickly learn interactivity. */ 1566 ke2->ke_cpu = ke->ke_cpu; 1567 ke2->ke_runq = NULL; 1568 1569 /* Grab our parents cpu estimation information. */ 1570 ke2->ke_ticks = ke->ke_ticks; 1571 ke2->ke_ltick = ke->ke_ltick; 1572 ke2->ke_ftick = ke->ke_ftick; 1573 } 1574 1575 void 1576 sched_class(struct thread *td, int class) 1577 { 1578 struct kseq *kseq; 1579 struct kse *ke; 1580 int nclass; 1581 int oclass; 1582 1583 mtx_assert(&sched_lock, MA_OWNED); 1584 if (td->td_pri_class == class) 1585 return; 1586 1587 nclass = PRI_BASE(class); 1588 oclass = PRI_BASE(td->td_pri_class); 1589 ke = td->td_kse; 1590 if ((ke->ke_state != KES_ONRUNQ && 1591 ke->ke_state != KES_THREAD) || ke->ke_runq == NULL) 1592 continue; 1593 kseq = KSEQ_CPU(ke->ke_cpu); 1594 1595 #ifdef SMP 1596 /* 1597 * On SMP if we're on the RUNQ we must adjust the transferable 1598 * count because could be changing to or from an interrupt 1599 * class. 1600 */ 1601 if (ke->ke_state == KES_ONRUNQ) { 1602 if (KSE_CAN_MIGRATE(ke)) { 1603 kseq->ksq_transferable--; 1604 kseq->ksq_group->ksg_transferable--; 1605 } 1606 if (KSE_CAN_MIGRATE(ke)) { 1607 kseq->ksq_transferable++; 1608 kseq->ksq_group->ksg_transferable++; 1609 } 1610 } 1611 #endif 1612 if (oclass == PRI_TIMESHARE) { 1613 kseq->ksq_load_timeshare--; 1614 kseq_nice_rem(kseq, td->td_proc->p_nice); 1615 } 1616 if (nclass == PRI_TIMESHARE) { 1617 kseq->ksq_load_timeshare++; 1618 kseq_nice_add(kseq, td->td_proc->p_nice); 1619 } 1620 1621 td->td_pri_class = class; 1622 } 1623 1624 /* 1625 * Return some of the child's priority and interactivity to the parent. 1626 */ 1627 void 1628 sched_exit(struct proc *p, struct thread *childtd) 1629 { 1630 struct thread *parent = FIRST_THREAD_IN_PROC(p); 1631 mtx_assert(&sched_lock, MA_OWNED); 1632 1633 CTR3(KTR_SCHED, "sched_exit: %p(%s) prio %d", 1634 childtd, childtd->td_proc->p_comm, childtd->td_priority); 1635 1636 /* parent->td_sched->skg_slptime += childtd->td_sched->skg_slptime; */ 1637 parent->td_sched->skg_runtime += childtd->td_sched->skg_runtime; 1638 sched_interact_update(parent); 1639 1640 kseq_load_rem(KSEQ_CPU(childtd->td_kse->ke_cpu), childtd->td_kse); 1641 } 1642 1643 void 1644 sched_clock(struct thread *td) 1645 { 1646 struct kseq *kseq; 1647 struct kse *ke; 1648 1649 mtx_assert(&sched_lock, MA_OWNED); 1650 kseq = KSEQ_SELF(); 1651 #ifdef SMP 1652 if (ticks >= bal_tick) 1653 sched_balance(); 1654 if (ticks >= gbal_tick && balance_groups) 1655 sched_balance_groups(); 1656 /* 1657 * We could have been assigned a non real-time thread without an 1658 * IPI. 1659 */ 1660 if (kseq->ksq_assigned) 1661 kseq_assign(kseq); /* Potentially sets NEEDRESCHED */ 1662 #endif 1663 ke = td->td_kse; 1664 1665 /* Adjust ticks for pctcpu */ 1666 ke->ke_ticks++; 1667 ke->ke_ltick = ticks; 1668 1669 /* Go up to one second beyond our max and then trim back down */ 1670 if (ke->ke_ftick + SCHED_CPU_TICKS + hz < ke->ke_ltick) 1671 sched_pctcpu_update(ke); 1672 1673 if (td->td_flags & TDF_IDLETD) 1674 return; 1675 /* 1676 * We only do slicing code for TIMESHARE threads. 1677 */ 1678 if (td->td_pri_class != PRI_TIMESHARE) 1679 return; 1680 /* 1681 * We used a tick charge it to the thread so that we can compute our 1682 * interactivity. 1683 */ 1684 td->td_sched->skg_runtime += tickincr; 1685 sched_interact_update(td); 1686 1687 /* 1688 * We used up one time slice. 1689 */ 1690 if (--ke->ke_slice > 0) 1691 return; 1692 /* 1693 * We're out of time, recompute priorities and requeue. 1694 */ 1695 kseq_load_rem(kseq, ke); 1696 sched_priority(td); 1697 sched_slice(ke); 1698 if (SCHED_CURR(td, ke)) 1699 ke->ke_runq = kseq->ksq_curr; 1700 else 1701 ke->ke_runq = kseq->ksq_next; 1702 kseq_load_add(kseq, ke); 1703 td->td_flags |= TDF_NEEDRESCHED; 1704 } 1705 1706 int 1707 sched_runnable(void) 1708 { 1709 struct kseq *kseq; 1710 int load; 1711 1712 load = 1; 1713 1714 kseq = KSEQ_SELF(); 1715 #ifdef SMP 1716 if (kseq->ksq_assigned) { 1717 mtx_lock_spin(&sched_lock); 1718 kseq_assign(kseq); 1719 mtx_unlock_spin(&sched_lock); 1720 } 1721 #endif 1722 if ((curthread->td_flags & TDF_IDLETD) != 0) { 1723 if (kseq->ksq_load > 0) 1724 goto out; 1725 } else 1726 if (kseq->ksq_load - 1 > 0) 1727 goto out; 1728 load = 0; 1729 out: 1730 return (load); 1731 } 1732 1733 struct kse * 1734 sched_choose(void) 1735 { 1736 struct kseq *kseq; 1737 struct kse *ke; 1738 1739 mtx_assert(&sched_lock, MA_OWNED); 1740 kseq = KSEQ_SELF(); 1741 #ifdef SMP 1742 restart: 1743 if (kseq->ksq_assigned) 1744 kseq_assign(kseq); 1745 #endif 1746 ke = kseq_choose(kseq); 1747 if (ke) { 1748 #ifdef SMP 1749 if (ke->ke_thread->td_pri_class == PRI_IDLE) 1750 if (kseq_idled(kseq) == 0) 1751 goto restart; 1752 #endif 1753 kseq_runq_rem(kseq, ke); 1754 ke->ke_state = KES_THREAD; 1755 ke->ke_flags &= ~KEF_PREEMPTED; 1756 return (ke); 1757 } 1758 #ifdef SMP 1759 if (kseq_idled(kseq) == 0) 1760 goto restart; 1761 #endif 1762 return (NULL); 1763 } 1764 1765 void 1766 sched_add(struct thread *td, int flags) 1767 { 1768 struct kseq *kseq; 1769 struct kse *ke; 1770 int preemptive; 1771 int canmigrate; 1772 int class; 1773 1774 CTR5(KTR_SCHED, "sched_add: %p(%s) prio %d by %p(%s)", 1775 td, td->td_proc->p_comm, td->td_priority, curthread, 1776 curthread->td_proc->p_comm); 1777 mtx_assert(&sched_lock, MA_OWNED); 1778 ke = td->td_kse; 1779 canmigrate = 1; 1780 preemptive = !(flags & SRQ_YIELDING); 1781 class = PRI_BASE(td->td_pri_class); 1782 kseq = KSEQ_SELF(); 1783 ke->ke_flags &= ~KEF_INTERNAL; 1784 #ifdef SMP 1785 if (ke->ke_flags & KEF_ASSIGNED) { 1786 if (ke->ke_flags & KEF_REMOVED) 1787 ke->ke_flags &= ~KEF_REMOVED; 1788 return; 1789 } 1790 canmigrate = KSE_CAN_MIGRATE(ke); 1791 /* 1792 * Don't migrate running threads here. Force the long term balancer 1793 * to do it. 1794 */ 1795 if (ke->ke_flags & KEF_HOLD) { 1796 ke->ke_flags &= ~KEF_HOLD; 1797 canmigrate = 0; 1798 } 1799 #endif 1800 KASSERT(ke->ke_state != KES_ONRUNQ, 1801 ("sched_add: kse %p (%s) already in run queue", ke, 1802 td->td_proc->p_comm)); 1803 KASSERT(td->td_proc->p_sflag & PS_INMEM, 1804 ("sched_add: process swapped out")); 1805 KASSERT(ke->ke_runq == NULL, 1806 ("sched_add: KSE %p is still assigned to a run queue", ke)); 1807 if (flags & SRQ_PREEMPTED) 1808 ke->ke_flags |= KEF_PREEMPTED; 1809 switch (class) { 1810 case PRI_ITHD: 1811 case PRI_REALTIME: 1812 ke->ke_runq = kseq->ksq_curr; 1813 ke->ke_slice = SCHED_SLICE_MAX; 1814 if (canmigrate) 1815 ke->ke_cpu = PCPU_GET(cpuid); 1816 break; 1817 case PRI_TIMESHARE: 1818 if (SCHED_CURR(td, ke)) 1819 ke->ke_runq = kseq->ksq_curr; 1820 else 1821 ke->ke_runq = kseq->ksq_next; 1822 break; 1823 case PRI_IDLE: 1824 /* 1825 * This is for priority prop. 1826 */ 1827 if (ke->ke_thread->td_priority < PRI_MIN_IDLE) 1828 ke->ke_runq = kseq->ksq_curr; 1829 else 1830 ke->ke_runq = &kseq->ksq_idle; 1831 ke->ke_slice = SCHED_SLICE_MIN; 1832 break; 1833 default: 1834 panic("Unknown pri class."); 1835 break; 1836 } 1837 #ifdef SMP 1838 /* 1839 * If this thread is pinned or bound, notify the target cpu. 1840 */ 1841 if (!canmigrate && ke->ke_cpu != PCPU_GET(cpuid) ) { 1842 ke->ke_runq = NULL; 1843 kseq_notify(ke, ke->ke_cpu); 1844 return; 1845 } 1846 /* 1847 * If we had been idle, clear our bit in the group and potentially 1848 * the global bitmap. If not, see if we should transfer this thread. 1849 */ 1850 if ((class == PRI_TIMESHARE || class == PRI_REALTIME) && 1851 (kseq->ksq_group->ksg_idlemask & PCPU_GET(cpumask)) != 0) { 1852 /* 1853 * Check to see if our group is unidling, and if so, remove it 1854 * from the global idle mask. 1855 */ 1856 if (kseq->ksq_group->ksg_idlemask == 1857 kseq->ksq_group->ksg_cpumask) 1858 atomic_clear_int(&kseq_idle, kseq->ksq_group->ksg_mask); 1859 /* 1860 * Now remove ourselves from the group specific idle mask. 1861 */ 1862 kseq->ksq_group->ksg_idlemask &= ~PCPU_GET(cpumask); 1863 } else if (canmigrate && kseq->ksq_load > 1 && class != PRI_ITHD) 1864 if (kseq_transfer(kseq, ke, class)) 1865 return; 1866 ke->ke_cpu = PCPU_GET(cpuid); 1867 #endif 1868 if (td->td_priority < curthread->td_priority && 1869 ke->ke_runq == kseq->ksq_curr) 1870 curthread->td_flags |= TDF_NEEDRESCHED; 1871 if (preemptive && maybe_preempt(td)) 1872 return; 1873 ke->ke_state = KES_ONRUNQ; 1874 1875 kseq_runq_add(kseq, ke, flags); 1876 kseq_load_add(kseq, ke); 1877 } 1878 1879 void 1880 sched_rem(struct thread *td) 1881 { 1882 struct kseq *kseq; 1883 struct kse *ke; 1884 1885 CTR5(KTR_SCHED, "sched_rem: %p(%s) prio %d by %p(%s)", 1886 td, td->td_proc->p_comm, td->td_priority, curthread, 1887 curthread->td_proc->p_comm); 1888 mtx_assert(&sched_lock, MA_OWNED); 1889 ke = td->td_kse; 1890 ke->ke_flags &= ~KEF_PREEMPTED; 1891 if (ke->ke_flags & KEF_ASSIGNED) { 1892 ke->ke_flags |= KEF_REMOVED; 1893 return; 1894 } 1895 KASSERT((ke->ke_state == KES_ONRUNQ), 1896 ("sched_rem: KSE not on run queue")); 1897 1898 ke->ke_state = KES_THREAD; 1899 kseq = KSEQ_CPU(ke->ke_cpu); 1900 kseq_runq_rem(kseq, ke); 1901 kseq_load_rem(kseq, ke); 1902 } 1903 1904 fixpt_t 1905 sched_pctcpu(struct thread *td) 1906 { 1907 fixpt_t pctcpu; 1908 struct kse *ke; 1909 1910 pctcpu = 0; 1911 ke = td->td_kse; 1912 if (ke == NULL) 1913 return (0); 1914 1915 mtx_lock_spin(&sched_lock); 1916 if (ke->ke_ticks) { 1917 int rtick; 1918 1919 /* 1920 * Don't update more frequently than twice a second. Allowing 1921 * this causes the cpu usage to decay away too quickly due to 1922 * rounding errors. 1923 */ 1924 if (ke->ke_ftick + SCHED_CPU_TICKS < ke->ke_ltick || 1925 ke->ke_ltick < (ticks - (hz / 2))) 1926 sched_pctcpu_update(ke); 1927 /* How many rtick per second ? */ 1928 rtick = min(ke->ke_ticks / SCHED_CPU_TIME, SCHED_CPU_TICKS); 1929 pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT; 1930 } 1931 1932 td->td_proc->p_swtime = ke->ke_ltick - ke->ke_ftick; 1933 mtx_unlock_spin(&sched_lock); 1934 1935 return (pctcpu); 1936 } 1937 1938 void 1939 sched_bind(struct thread *td, int cpu) 1940 { 1941 struct kse *ke; 1942 1943 mtx_assert(&sched_lock, MA_OWNED); 1944 ke = td->td_kse; 1945 ke->ke_flags |= KEF_BOUND; 1946 #ifdef SMP 1947 if (PCPU_GET(cpuid) == cpu) 1948 return; 1949 /* sched_rem without the runq_remove */ 1950 ke->ke_state = KES_THREAD; 1951 kseq_load_rem(KSEQ_CPU(ke->ke_cpu), ke); 1952 kseq_notify(ke, cpu); 1953 /* When we return from mi_switch we'll be on the correct cpu. */ 1954 mi_switch(SW_VOL, NULL); 1955 #endif 1956 } 1957 1958 void 1959 sched_unbind(struct thread *td) 1960 { 1961 mtx_assert(&sched_lock, MA_OWNED); 1962 td->td_kse->ke_flags &= ~KEF_BOUND; 1963 } 1964 1965 int 1966 sched_is_bound(struct thread *td) 1967 { 1968 mtx_assert(&sched_lock, MA_OWNED); 1969 return (td->td_kse->ke_flags & KEF_BOUND); 1970 } 1971 1972 void 1973 sched_relinquish(struct thread *td) 1974 { 1975 #ifdef KSE 1976 struct ksegrp *kg; 1977 1978 kg = td->td_ksegrp; 1979 #endif 1980 mtx_lock_spin(&sched_lock); 1981 #ifdef KSE 1982 if (kg->kg_pri_class == PRI_TIMESHARE) 1983 #else 1984 if (td->td_pri_class == PRI_TIMESHARE) 1985 #endif 1986 sched_prio(td, PRI_MAX_TIMESHARE); 1987 mi_switch(SW_VOL, NULL); 1988 mtx_unlock_spin(&sched_lock); 1989 } 1990 1991 int 1992 sched_load(void) 1993 { 1994 #ifdef SMP 1995 int total; 1996 int i; 1997 1998 total = 0; 1999 for (i = 0; i <= ksg_maxid; i++) 2000 total += KSEQ_GROUP(i)->ksg_load; 2001 return (total); 2002 #else 2003 return (KSEQ_SELF()->ksq_sysload); 2004 #endif 2005 } 2006 2007 int 2008 sched_sizeof_proc(void) 2009 { 2010 return (sizeof(struct proc)); 2011 } 2012 2013 int 2014 sched_sizeof_thread(void) 2015 { 2016 return (sizeof(struct thread) + sizeof(struct td_sched)); 2017 } 2018 2019 void 2020 sched_tick(void) 2021 { 2022 } 2023 #define KERN_SWITCH_INCLUDE 1 2024 #include "kern/kern_switch.c" 2025