1 /*- 2 * Copyright (c) 2002-2003, Jeffrey Roberson <jeff@freebsd.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include <opt_sched.h> 31 32 #define kse td_sched 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/kdb.h> 37 #include <sys/kernel.h> 38 #include <sys/ktr.h> 39 #include <sys/lock.h> 40 #include <sys/mutex.h> 41 #include <sys/proc.h> 42 #include <sys/resource.h> 43 #include <sys/resourcevar.h> 44 #include <sys/sched.h> 45 #include <sys/smp.h> 46 #include <sys/sx.h> 47 #include <sys/sysctl.h> 48 #include <sys/sysproto.h> 49 #include <sys/turnstile.h> 50 #include <sys/vmmeter.h> 51 #ifdef KTRACE 52 #include <sys/uio.h> 53 #include <sys/ktrace.h> 54 #endif 55 56 #ifdef HWPMC_HOOKS 57 #include <sys/pmckern.h> 58 #endif 59 60 #include <machine/cpu.h> 61 #include <machine/smp.h> 62 63 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 64 /* XXX This is bogus compatability crap for ps */ 65 static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 66 SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 67 68 static void sched_setup(void *dummy); 69 SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 70 71 static SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0, "Scheduler"); 72 73 SYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "ule", 0, 74 "Scheduler name"); 75 76 static int slice_min = 1; 77 SYSCTL_INT(_kern_sched, OID_AUTO, slice_min, CTLFLAG_RW, &slice_min, 0, ""); 78 79 static int slice_max = 10; 80 SYSCTL_INT(_kern_sched, OID_AUTO, slice_max, CTLFLAG_RW, &slice_max, 0, ""); 81 82 int realstathz; 83 int tickincr = 1; 84 85 /* 86 * The schedulable entity that can be given a context to run. 87 * A process may have several of these. Probably one per processor 88 * but posibly a few more. In this universe they are grouped 89 * with a KSEG that contains the priority and niceness 90 * for the group. 91 */ 92 struct kse { 93 TAILQ_ENTRY(kse) ke_procq; /* (j/z) Run queue. */ 94 int ke_flags; /* (j) KEF_* flags. */ 95 struct thread *ke_thread; /* (*) Active associated thread. */ 96 fixpt_t ke_pctcpu; /* (j) %cpu during p_swtime. */ 97 char ke_rqindex; /* (j) Run queue index. */ 98 enum { 99 KES_THREAD = 0x0, /* slaved to thread state */ 100 KES_ONRUNQ 101 } ke_state; /* (j) thread sched specific status. */ 102 int ke_slptime; 103 int ke_slice; 104 struct runq *ke_runq; 105 u_char ke_cpu; /* CPU that we have affinity for. */ 106 /* The following variables are only used for pctcpu calculation */ 107 int ke_ltick; /* Last tick that we were running on */ 108 int ke_ftick; /* First tick that we were running on */ 109 int ke_ticks; /* Tick count */ 110 111 }; 112 113 114 #define td_kse td_sched 115 #define td_slptime td_kse->ke_slptime 116 #define ke_proc ke_thread->td_proc 117 #define ke_ksegrp ke_thread->td_ksegrp 118 119 /* flags kept in ke_flags */ 120 #define KEF_SCHED0 0x00001 /* For scheduler-specific use. */ 121 #define KEF_SCHED1 0x00002 /* For scheduler-specific use. */ 122 #define KEF_SCHED2 0x00004 /* For scheduler-specific use. */ 123 #define KEF_SCHED3 0x00008 /* For scheduler-specific use. */ 124 #define KEF_SCHED4 0x00010 125 #define KEF_SCHED5 0x00020 126 #define KEF_DIDRUN 0x02000 /* Thread actually ran. */ 127 #define KEF_EXIT 0x04000 /* Thread is being killed. */ 128 129 /* 130 * These datastructures are allocated within their parent datastructure but 131 * are scheduler specific. 132 */ 133 134 #define ke_assign ke_procq.tqe_next 135 136 #define KEF_ASSIGNED 0x0001 /* Thread is being migrated. */ 137 #define KEF_BOUND 0x0002 /* Thread can not migrate. */ 138 #define KEF_XFERABLE 0x0004 /* Thread was added as transferable. */ 139 #define KEF_HOLD 0x0008 /* Thread is temporarily bound. */ 140 #define KEF_REMOVED 0x0010 /* Thread was removed while ASSIGNED */ 141 #define KEF_INTERNAL 0x0020 142 143 struct kg_sched { 144 struct thread *skg_last_assigned; /* (j) Last thread assigned to */ 145 /* the system scheduler */ 146 int skg_slptime; /* Number of ticks we vol. slept */ 147 int skg_runtime; /* Number of ticks we were running */ 148 int skg_avail_opennings; /* (j) Num unfilled slots in group.*/ 149 int skg_concurrency; /* (j) Num threads requested in group.*/ 150 }; 151 #define kg_last_assigned kg_sched->skg_last_assigned 152 #define kg_avail_opennings kg_sched->skg_avail_opennings 153 #define kg_concurrency kg_sched->skg_concurrency 154 #define kg_runtime kg_sched->skg_runtime 155 #define kg_slptime kg_sched->skg_slptime 156 157 #define SLOT_RELEASE(kg) \ 158 do { \ 159 kg->kg_avail_opennings++; \ 160 CTR3(KTR_RUNQ, "kg %p(%d) Slot released (->%d)", \ 161 kg, \ 162 kg->kg_concurrency, \ 163 kg->kg_avail_opennings); \ 164 /*KASSERT((kg->kg_avail_opennings <= kg->kg_concurrency), \ 165 ("slots out of whack")); */ \ 166 } while (0) 167 168 #define SLOT_USE(kg) \ 169 do { \ 170 kg->kg_avail_opennings--; \ 171 CTR3(KTR_RUNQ, "kg %p(%d) Slot used (->%d)", \ 172 kg, \ 173 kg->kg_concurrency, \ 174 kg->kg_avail_opennings); \ 175 /*KASSERT((kg->kg_avail_opennings >= 0), \ 176 ("slots out of whack"));*/ \ 177 } while (0) 178 179 static struct kse kse0; 180 static struct kg_sched kg_sched0; 181 182 /* 183 * The priority is primarily determined by the interactivity score. Thus, we 184 * give lower(better) priorities to kse groups that use less CPU. The nice 185 * value is then directly added to this to allow nice to have some effect 186 * on latency. 187 * 188 * PRI_RANGE: Total priority range for timeshare threads. 189 * PRI_NRESV: Number of nice values. 190 * PRI_BASE: The start of the dynamic range. 191 */ 192 #define SCHED_PRI_RANGE (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1) 193 #define SCHED_PRI_NRESV ((PRIO_MAX - PRIO_MIN) + 1) 194 #define SCHED_PRI_NHALF (SCHED_PRI_NRESV / 2) 195 #define SCHED_PRI_BASE (PRI_MIN_TIMESHARE) 196 #define SCHED_PRI_INTERACT(score) \ 197 ((score) * SCHED_PRI_RANGE / SCHED_INTERACT_MAX) 198 199 /* 200 * These determine the interactivity of a process. 201 * 202 * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate 203 * before throttling back. 204 * SLP_RUN_FORK: Maximum slp+run time to inherit at fork time. 205 * INTERACT_MAX: Maximum interactivity value. Smaller is better. 206 * INTERACT_THRESH: Threshhold for placement on the current runq. 207 */ 208 #define SCHED_SLP_RUN_MAX ((hz * 5) << 10) 209 #define SCHED_SLP_RUN_FORK ((hz / 2) << 10) 210 #define SCHED_INTERACT_MAX (100) 211 #define SCHED_INTERACT_HALF (SCHED_INTERACT_MAX / 2) 212 #define SCHED_INTERACT_THRESH (30) 213 214 /* 215 * These parameters and macros determine the size of the time slice that is 216 * granted to each thread. 217 * 218 * SLICE_MIN: Minimum time slice granted, in units of ticks. 219 * SLICE_MAX: Maximum time slice granted. 220 * SLICE_RANGE: Range of available time slices scaled by hz. 221 * SLICE_SCALE: The number slices granted per val in the range of [0, max]. 222 * SLICE_NICE: Determine the amount of slice granted to a scaled nice. 223 * SLICE_NTHRESH: The nice cutoff point for slice assignment. 224 */ 225 #define SCHED_SLICE_MIN (slice_min) 226 #define SCHED_SLICE_MAX (slice_max) 227 #define SCHED_SLICE_INTERACTIVE (slice_max) 228 #define SCHED_SLICE_NTHRESH (SCHED_PRI_NHALF - 1) 229 #define SCHED_SLICE_RANGE (SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1) 230 #define SCHED_SLICE_SCALE(val, max) (((val) * SCHED_SLICE_RANGE) / (max)) 231 #define SCHED_SLICE_NICE(nice) \ 232 (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((nice), SCHED_SLICE_NTHRESH)) 233 234 /* 235 * This macro determines whether or not the thread belongs on the current or 236 * next run queue. 237 */ 238 #define SCHED_INTERACTIVE(kg) \ 239 (sched_interact_score(kg) < SCHED_INTERACT_THRESH) 240 #define SCHED_CURR(kg, ke) \ 241 ((ke->ke_thread->td_flags & TDF_BORROWING) || SCHED_INTERACTIVE(kg)) 242 243 /* 244 * Cpu percentage computation macros and defines. 245 * 246 * SCHED_CPU_TIME: Number of seconds to average the cpu usage across. 247 * SCHED_CPU_TICKS: Number of hz ticks to average the cpu usage across. 248 */ 249 250 #define SCHED_CPU_TIME 10 251 #define SCHED_CPU_TICKS (hz * SCHED_CPU_TIME) 252 253 /* 254 * kseq - per processor runqs and statistics. 255 */ 256 struct kseq { 257 struct runq ksq_idle; /* Queue of IDLE threads. */ 258 struct runq ksq_timeshare[2]; /* Run queues for !IDLE. */ 259 struct runq *ksq_next; /* Next timeshare queue. */ 260 struct runq *ksq_curr; /* Current queue. */ 261 int ksq_load_timeshare; /* Load for timeshare. */ 262 int ksq_load; /* Aggregate load. */ 263 short ksq_nice[SCHED_PRI_NRESV]; /* KSEs in each nice bin. */ 264 short ksq_nicemin; /* Least nice. */ 265 #ifdef SMP 266 int ksq_transferable; 267 LIST_ENTRY(kseq) ksq_siblings; /* Next in kseq group. */ 268 struct kseq_group *ksq_group; /* Our processor group. */ 269 volatile struct kse *ksq_assigned; /* assigned by another CPU. */ 270 #else 271 int ksq_sysload; /* For loadavg, !ITHD load. */ 272 #endif 273 }; 274 275 #ifdef SMP 276 /* 277 * kseq groups are groups of processors which can cheaply share threads. When 278 * one processor in the group goes idle it will check the runqs of the other 279 * processors in its group prior to halting and waiting for an interrupt. 280 * These groups are suitable for SMT (Symetric Multi-Threading) and not NUMA. 281 * In a numa environment we'd want an idle bitmap per group and a two tiered 282 * load balancer. 283 */ 284 struct kseq_group { 285 int ksg_cpus; /* Count of CPUs in this kseq group. */ 286 cpumask_t ksg_cpumask; /* Mask of cpus in this group. */ 287 cpumask_t ksg_idlemask; /* Idle cpus in this group. */ 288 cpumask_t ksg_mask; /* Bit mask for first cpu. */ 289 int ksg_load; /* Total load of this group. */ 290 int ksg_transferable; /* Transferable load of this group. */ 291 LIST_HEAD(, kseq) ksg_members; /* Linked list of all members. */ 292 }; 293 #endif 294 295 /* 296 * One kse queue per processor. 297 */ 298 #ifdef SMP 299 static cpumask_t kseq_idle; 300 static int ksg_maxid; 301 static struct kseq kseq_cpu[MAXCPU]; 302 static struct kseq_group kseq_groups[MAXCPU]; 303 static int bal_tick; 304 static int gbal_tick; 305 static int balance_groups; 306 307 #define KSEQ_SELF() (&kseq_cpu[PCPU_GET(cpuid)]) 308 #define KSEQ_CPU(x) (&kseq_cpu[(x)]) 309 #define KSEQ_ID(x) ((x) - kseq_cpu) 310 #define KSEQ_GROUP(x) (&kseq_groups[(x)]) 311 #else /* !SMP */ 312 static struct kseq kseq_cpu; 313 314 #define KSEQ_SELF() (&kseq_cpu) 315 #define KSEQ_CPU(x) (&kseq_cpu) 316 #endif 317 318 static void slot_fill(struct ksegrp *kg); 319 static struct kse *sched_choose(void); /* XXX Should be thread * */ 320 static void sched_slice(struct kse *ke); 321 static void sched_priority(struct ksegrp *kg); 322 static void sched_thread_priority(struct thread *td, u_char prio); 323 static int sched_interact_score(struct ksegrp *kg); 324 static void sched_interact_update(struct ksegrp *kg); 325 static void sched_interact_fork(struct ksegrp *kg); 326 static void sched_pctcpu_update(struct kse *ke); 327 328 /* Operations on per processor queues */ 329 static struct kse * kseq_choose(struct kseq *kseq); 330 static void kseq_setup(struct kseq *kseq); 331 static void kseq_load_add(struct kseq *kseq, struct kse *ke); 332 static void kseq_load_rem(struct kseq *kseq, struct kse *ke); 333 static __inline void kseq_runq_add(struct kseq *kseq, struct kse *ke, int); 334 static __inline void kseq_runq_rem(struct kseq *kseq, struct kse *ke); 335 static void kseq_nice_add(struct kseq *kseq, int nice); 336 static void kseq_nice_rem(struct kseq *kseq, int nice); 337 void kseq_print(int cpu); 338 #ifdef SMP 339 static int kseq_transfer(struct kseq *ksq, struct kse *ke, int class); 340 static struct kse *runq_steal(struct runq *rq); 341 static void sched_balance(void); 342 static void sched_balance_groups(void); 343 static void sched_balance_group(struct kseq_group *ksg); 344 static void sched_balance_pair(struct kseq *high, struct kseq *low); 345 static void kseq_move(struct kseq *from, int cpu); 346 static int kseq_idled(struct kseq *kseq); 347 static void kseq_notify(struct kse *ke, int cpu); 348 static void kseq_assign(struct kseq *); 349 static struct kse *kseq_steal(struct kseq *kseq, int stealidle); 350 #define KSE_CAN_MIGRATE(ke) \ 351 ((ke)->ke_thread->td_pinned == 0 && ((ke)->ke_flags & KEF_BOUND) == 0) 352 #endif 353 354 void 355 kseq_print(int cpu) 356 { 357 struct kseq *kseq; 358 int i; 359 360 kseq = KSEQ_CPU(cpu); 361 362 printf("kseq:\n"); 363 printf("\tload: %d\n", kseq->ksq_load); 364 printf("\tload TIMESHARE: %d\n", kseq->ksq_load_timeshare); 365 #ifdef SMP 366 printf("\tload transferable: %d\n", kseq->ksq_transferable); 367 #endif 368 printf("\tnicemin:\t%d\n", kseq->ksq_nicemin); 369 printf("\tnice counts:\n"); 370 for (i = 0; i < SCHED_PRI_NRESV; i++) 371 if (kseq->ksq_nice[i]) 372 printf("\t\t%d = %d\n", 373 i - SCHED_PRI_NHALF, kseq->ksq_nice[i]); 374 } 375 376 static __inline void 377 kseq_runq_add(struct kseq *kseq, struct kse *ke, int flags) 378 { 379 #ifdef SMP 380 if (KSE_CAN_MIGRATE(ke)) { 381 kseq->ksq_transferable++; 382 kseq->ksq_group->ksg_transferable++; 383 ke->ke_flags |= KEF_XFERABLE; 384 } 385 #endif 386 runq_add(ke->ke_runq, ke, flags); 387 } 388 389 static __inline void 390 kseq_runq_rem(struct kseq *kseq, struct kse *ke) 391 { 392 #ifdef SMP 393 if (ke->ke_flags & KEF_XFERABLE) { 394 kseq->ksq_transferable--; 395 kseq->ksq_group->ksg_transferable--; 396 ke->ke_flags &= ~KEF_XFERABLE; 397 } 398 #endif 399 runq_remove(ke->ke_runq, ke); 400 } 401 402 static void 403 kseq_load_add(struct kseq *kseq, struct kse *ke) 404 { 405 int class; 406 mtx_assert(&sched_lock, MA_OWNED); 407 class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); 408 if (class == PRI_TIMESHARE) 409 kseq->ksq_load_timeshare++; 410 kseq->ksq_load++; 411 CTR1(KTR_SCHED, "load: %d", kseq->ksq_load); 412 if (class != PRI_ITHD && (ke->ke_proc->p_flag & P_NOLOAD) == 0) 413 #ifdef SMP 414 kseq->ksq_group->ksg_load++; 415 #else 416 kseq->ksq_sysload++; 417 #endif 418 if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 419 kseq_nice_add(kseq, ke->ke_proc->p_nice); 420 } 421 422 static void 423 kseq_load_rem(struct kseq *kseq, struct kse *ke) 424 { 425 int class; 426 mtx_assert(&sched_lock, MA_OWNED); 427 class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); 428 if (class == PRI_TIMESHARE) 429 kseq->ksq_load_timeshare--; 430 if (class != PRI_ITHD && (ke->ke_proc->p_flag & P_NOLOAD) == 0) 431 #ifdef SMP 432 kseq->ksq_group->ksg_load--; 433 #else 434 kseq->ksq_sysload--; 435 #endif 436 kseq->ksq_load--; 437 CTR1(KTR_SCHED, "load: %d", kseq->ksq_load); 438 ke->ke_runq = NULL; 439 if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 440 kseq_nice_rem(kseq, ke->ke_proc->p_nice); 441 } 442 443 static void 444 kseq_nice_add(struct kseq *kseq, int nice) 445 { 446 mtx_assert(&sched_lock, MA_OWNED); 447 /* Normalize to zero. */ 448 kseq->ksq_nice[nice + SCHED_PRI_NHALF]++; 449 if (nice < kseq->ksq_nicemin || kseq->ksq_load_timeshare == 1) 450 kseq->ksq_nicemin = nice; 451 } 452 453 static void 454 kseq_nice_rem(struct kseq *kseq, int nice) 455 { 456 int n; 457 458 mtx_assert(&sched_lock, MA_OWNED); 459 /* Normalize to zero. */ 460 n = nice + SCHED_PRI_NHALF; 461 kseq->ksq_nice[n]--; 462 KASSERT(kseq->ksq_nice[n] >= 0, ("Negative nice count.")); 463 464 /* 465 * If this wasn't the smallest nice value or there are more in 466 * this bucket we can just return. Otherwise we have to recalculate 467 * the smallest nice. 468 */ 469 if (nice != kseq->ksq_nicemin || 470 kseq->ksq_nice[n] != 0 || 471 kseq->ksq_load_timeshare == 0) 472 return; 473 474 for (; n < SCHED_PRI_NRESV; n++) 475 if (kseq->ksq_nice[n]) { 476 kseq->ksq_nicemin = n - SCHED_PRI_NHALF; 477 return; 478 } 479 } 480 481 #ifdef SMP 482 /* 483 * sched_balance is a simple CPU load balancing algorithm. It operates by 484 * finding the least loaded and most loaded cpu and equalizing their load 485 * by migrating some processes. 486 * 487 * Dealing only with two CPUs at a time has two advantages. Firstly, most 488 * installations will only have 2 cpus. Secondly, load balancing too much at 489 * once can have an unpleasant effect on the system. The scheduler rarely has 490 * enough information to make perfect decisions. So this algorithm chooses 491 * algorithm simplicity and more gradual effects on load in larger systems. 492 * 493 * It could be improved by considering the priorities and slices assigned to 494 * each task prior to balancing them. There are many pathological cases with 495 * any approach and so the semi random algorithm below may work as well as any. 496 * 497 */ 498 static void 499 sched_balance(void) 500 { 501 struct kseq_group *high; 502 struct kseq_group *low; 503 struct kseq_group *ksg; 504 int cnt; 505 int i; 506 507 bal_tick = ticks + (random() % (hz * 2)); 508 if (smp_started == 0) 509 return; 510 low = high = NULL; 511 i = random() % (ksg_maxid + 1); 512 for (cnt = 0; cnt <= ksg_maxid; cnt++) { 513 ksg = KSEQ_GROUP(i); 514 /* 515 * Find the CPU with the highest load that has some 516 * threads to transfer. 517 */ 518 if ((high == NULL || ksg->ksg_load > high->ksg_load) 519 && ksg->ksg_transferable) 520 high = ksg; 521 if (low == NULL || ksg->ksg_load < low->ksg_load) 522 low = ksg; 523 if (++i > ksg_maxid) 524 i = 0; 525 } 526 if (low != NULL && high != NULL && high != low) 527 sched_balance_pair(LIST_FIRST(&high->ksg_members), 528 LIST_FIRST(&low->ksg_members)); 529 } 530 531 static void 532 sched_balance_groups(void) 533 { 534 int i; 535 536 gbal_tick = ticks + (random() % (hz * 2)); 537 mtx_assert(&sched_lock, MA_OWNED); 538 if (smp_started) 539 for (i = 0; i <= ksg_maxid; i++) 540 sched_balance_group(KSEQ_GROUP(i)); 541 } 542 543 static void 544 sched_balance_group(struct kseq_group *ksg) 545 { 546 struct kseq *kseq; 547 struct kseq *high; 548 struct kseq *low; 549 int load; 550 551 if (ksg->ksg_transferable == 0) 552 return; 553 low = NULL; 554 high = NULL; 555 LIST_FOREACH(kseq, &ksg->ksg_members, ksq_siblings) { 556 load = kseq->ksq_load; 557 if (high == NULL || load > high->ksq_load) 558 high = kseq; 559 if (low == NULL || load < low->ksq_load) 560 low = kseq; 561 } 562 if (high != NULL && low != NULL && high != low) 563 sched_balance_pair(high, low); 564 } 565 566 static void 567 sched_balance_pair(struct kseq *high, struct kseq *low) 568 { 569 int transferable; 570 int high_load; 571 int low_load; 572 int move; 573 int diff; 574 int i; 575 576 /* 577 * If we're transfering within a group we have to use this specific 578 * kseq's transferable count, otherwise we can steal from other members 579 * of the group. 580 */ 581 if (high->ksq_group == low->ksq_group) { 582 transferable = high->ksq_transferable; 583 high_load = high->ksq_load; 584 low_load = low->ksq_load; 585 } else { 586 transferable = high->ksq_group->ksg_transferable; 587 high_load = high->ksq_group->ksg_load; 588 low_load = low->ksq_group->ksg_load; 589 } 590 if (transferable == 0) 591 return; 592 /* 593 * Determine what the imbalance is and then adjust that to how many 594 * kses we actually have to give up (transferable). 595 */ 596 diff = high_load - low_load; 597 move = diff / 2; 598 if (diff & 0x1) 599 move++; 600 move = min(move, transferable); 601 for (i = 0; i < move; i++) 602 kseq_move(high, KSEQ_ID(low)); 603 return; 604 } 605 606 static void 607 kseq_move(struct kseq *from, int cpu) 608 { 609 struct kseq *kseq; 610 struct kseq *to; 611 struct kse *ke; 612 613 kseq = from; 614 to = KSEQ_CPU(cpu); 615 ke = kseq_steal(kseq, 1); 616 if (ke == NULL) { 617 struct kseq_group *ksg; 618 619 ksg = kseq->ksq_group; 620 LIST_FOREACH(kseq, &ksg->ksg_members, ksq_siblings) { 621 if (kseq == from || kseq->ksq_transferable == 0) 622 continue; 623 ke = kseq_steal(kseq, 1); 624 break; 625 } 626 if (ke == NULL) 627 panic("kseq_move: No KSEs available with a " 628 "transferable count of %d\n", 629 ksg->ksg_transferable); 630 } 631 if (kseq == to) 632 return; 633 ke->ke_state = KES_THREAD; 634 kseq_runq_rem(kseq, ke); 635 kseq_load_rem(kseq, ke); 636 kseq_notify(ke, cpu); 637 } 638 639 static int 640 kseq_idled(struct kseq *kseq) 641 { 642 struct kseq_group *ksg; 643 struct kseq *steal; 644 struct kse *ke; 645 646 ksg = kseq->ksq_group; 647 /* 648 * If we're in a cpu group, try and steal kses from another cpu in 649 * the group before idling. 650 */ 651 if (ksg->ksg_cpus > 1 && ksg->ksg_transferable) { 652 LIST_FOREACH(steal, &ksg->ksg_members, ksq_siblings) { 653 if (steal == kseq || steal->ksq_transferable == 0) 654 continue; 655 ke = kseq_steal(steal, 0); 656 if (ke == NULL) 657 continue; 658 ke->ke_state = KES_THREAD; 659 kseq_runq_rem(steal, ke); 660 kseq_load_rem(steal, ke); 661 ke->ke_cpu = PCPU_GET(cpuid); 662 ke->ke_flags |= KEF_INTERNAL | KEF_HOLD; 663 sched_add(ke->ke_thread, SRQ_YIELDING); 664 return (0); 665 } 666 } 667 /* 668 * We only set the idled bit when all of the cpus in the group are 669 * idle. Otherwise we could get into a situation where a KSE bounces 670 * back and forth between two idle cores on seperate physical CPUs. 671 */ 672 ksg->ksg_idlemask |= PCPU_GET(cpumask); 673 if (ksg->ksg_idlemask != ksg->ksg_cpumask) 674 return (1); 675 atomic_set_int(&kseq_idle, ksg->ksg_mask); 676 return (1); 677 } 678 679 static void 680 kseq_assign(struct kseq *kseq) 681 { 682 struct kse *nke; 683 struct kse *ke; 684 685 do { 686 *(volatile struct kse **)&ke = kseq->ksq_assigned; 687 } while(!atomic_cmpset_ptr(&kseq->ksq_assigned, ke, NULL)); 688 for (; ke != NULL; ke = nke) { 689 nke = ke->ke_assign; 690 kseq->ksq_group->ksg_load--; 691 kseq->ksq_load--; 692 ke->ke_flags &= ~KEF_ASSIGNED; 693 ke->ke_flags |= KEF_INTERNAL | KEF_HOLD; 694 sched_add(ke->ke_thread, SRQ_YIELDING); 695 } 696 } 697 698 static void 699 kseq_notify(struct kse *ke, int cpu) 700 { 701 struct kseq *kseq; 702 struct thread *td; 703 struct pcpu *pcpu; 704 int class; 705 int prio; 706 707 kseq = KSEQ_CPU(cpu); 708 /* XXX */ 709 class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); 710 if ((class == PRI_TIMESHARE || class == PRI_REALTIME) && 711 (kseq_idle & kseq->ksq_group->ksg_mask)) 712 atomic_clear_int(&kseq_idle, kseq->ksq_group->ksg_mask); 713 kseq->ksq_group->ksg_load++; 714 kseq->ksq_load++; 715 ke->ke_cpu = cpu; 716 ke->ke_flags |= KEF_ASSIGNED; 717 prio = ke->ke_thread->td_priority; 718 719 /* 720 * Place a KSE on another cpu's queue and force a resched. 721 */ 722 do { 723 *(volatile struct kse **)&ke->ke_assign = kseq->ksq_assigned; 724 } while(!atomic_cmpset_ptr(&kseq->ksq_assigned, ke->ke_assign, ke)); 725 /* 726 * Without sched_lock we could lose a race where we set NEEDRESCHED 727 * on a thread that is switched out before the IPI is delivered. This 728 * would lead us to miss the resched. This will be a problem once 729 * sched_lock is pushed down. 730 */ 731 pcpu = pcpu_find(cpu); 732 td = pcpu->pc_curthread; 733 if (ke->ke_thread->td_priority < td->td_priority || 734 td == pcpu->pc_idlethread) { 735 td->td_flags |= TDF_NEEDRESCHED; 736 ipi_selected(1 << cpu, IPI_AST); 737 } 738 } 739 740 static struct kse * 741 runq_steal(struct runq *rq) 742 { 743 struct rqhead *rqh; 744 struct rqbits *rqb; 745 struct kse *ke; 746 int word; 747 int bit; 748 749 mtx_assert(&sched_lock, MA_OWNED); 750 rqb = &rq->rq_status; 751 for (word = 0; word < RQB_LEN; word++) { 752 if (rqb->rqb_bits[word] == 0) 753 continue; 754 for (bit = 0; bit < RQB_BPW; bit++) { 755 if ((rqb->rqb_bits[word] & (1ul << bit)) == 0) 756 continue; 757 rqh = &rq->rq_queues[bit + (word << RQB_L2BPW)]; 758 TAILQ_FOREACH(ke, rqh, ke_procq) { 759 if (KSE_CAN_MIGRATE(ke)) 760 return (ke); 761 } 762 } 763 } 764 return (NULL); 765 } 766 767 static struct kse * 768 kseq_steal(struct kseq *kseq, int stealidle) 769 { 770 struct kse *ke; 771 772 /* 773 * Steal from next first to try to get a non-interactive task that 774 * may not have run for a while. 775 */ 776 if ((ke = runq_steal(kseq->ksq_next)) != NULL) 777 return (ke); 778 if ((ke = runq_steal(kseq->ksq_curr)) != NULL) 779 return (ke); 780 if (stealidle) 781 return (runq_steal(&kseq->ksq_idle)); 782 return (NULL); 783 } 784 785 int 786 kseq_transfer(struct kseq *kseq, struct kse *ke, int class) 787 { 788 struct kseq_group *nksg; 789 struct kseq_group *ksg; 790 struct kseq *old; 791 int cpu; 792 int idx; 793 794 if (smp_started == 0) 795 return (0); 796 cpu = 0; 797 /* 798 * If our load exceeds a certain threshold we should attempt to 799 * reassign this thread. The first candidate is the cpu that 800 * originally ran the thread. If it is idle, assign it there, 801 * otherwise, pick an idle cpu. 802 * 803 * The threshold at which we start to reassign kses has a large impact 804 * on the overall performance of the system. Tuned too high and 805 * some CPUs may idle. Too low and there will be excess migration 806 * and context switches. 807 */ 808 old = KSEQ_CPU(ke->ke_cpu); 809 nksg = old->ksq_group; 810 ksg = kseq->ksq_group; 811 if (kseq_idle) { 812 if (kseq_idle & nksg->ksg_mask) { 813 cpu = ffs(nksg->ksg_idlemask); 814 if (cpu) { 815 CTR2(KTR_SCHED, 816 "kseq_transfer: %p found old cpu %X " 817 "in idlemask.", ke, cpu); 818 goto migrate; 819 } 820 } 821 /* 822 * Multiple cpus could find this bit simultaneously 823 * but the race shouldn't be terrible. 824 */ 825 cpu = ffs(kseq_idle); 826 if (cpu) { 827 CTR2(KTR_SCHED, "kseq_transfer: %p found %X " 828 "in idlemask.", ke, cpu); 829 goto migrate; 830 } 831 } 832 idx = 0; 833 #if 0 834 if (old->ksq_load < kseq->ksq_load) { 835 cpu = ke->ke_cpu + 1; 836 CTR2(KTR_SCHED, "kseq_transfer: %p old cpu %X " 837 "load less than ours.", ke, cpu); 838 goto migrate; 839 } 840 /* 841 * No new CPU was found, look for one with less load. 842 */ 843 for (idx = 0; idx <= ksg_maxid; idx++) { 844 nksg = KSEQ_GROUP(idx); 845 if (nksg->ksg_load /*+ (nksg->ksg_cpus * 2)*/ < ksg->ksg_load) { 846 cpu = ffs(nksg->ksg_cpumask); 847 CTR2(KTR_SCHED, "kseq_transfer: %p cpu %X load less " 848 "than ours.", ke, cpu); 849 goto migrate; 850 } 851 } 852 #endif 853 /* 854 * If another cpu in this group has idled, assign a thread over 855 * to them after checking to see if there are idled groups. 856 */ 857 if (ksg->ksg_idlemask) { 858 cpu = ffs(ksg->ksg_idlemask); 859 if (cpu) { 860 CTR2(KTR_SCHED, "kseq_transfer: %p cpu %X idle in " 861 "group.", ke, cpu); 862 goto migrate; 863 } 864 } 865 return (0); 866 migrate: 867 /* 868 * Now that we've found an idle CPU, migrate the thread. 869 */ 870 cpu--; 871 ke->ke_runq = NULL; 872 kseq_notify(ke, cpu); 873 874 return (1); 875 } 876 877 #endif /* SMP */ 878 879 /* 880 * Pick the highest priority task we have and return it. 881 */ 882 883 static struct kse * 884 kseq_choose(struct kseq *kseq) 885 { 886 struct runq *swap; 887 struct kse *ke; 888 int nice; 889 890 mtx_assert(&sched_lock, MA_OWNED); 891 swap = NULL; 892 893 for (;;) { 894 ke = runq_choose(kseq->ksq_curr); 895 if (ke == NULL) { 896 /* 897 * We already swapped once and didn't get anywhere. 898 */ 899 if (swap) 900 break; 901 swap = kseq->ksq_curr; 902 kseq->ksq_curr = kseq->ksq_next; 903 kseq->ksq_next = swap; 904 continue; 905 } 906 /* 907 * If we encounter a slice of 0 the kse is in a 908 * TIMESHARE kse group and its nice was too far out 909 * of the range that receives slices. 910 */ 911 nice = ke->ke_proc->p_nice + (0 - kseq->ksq_nicemin); 912 if (ke->ke_slice == 0 || (nice > SCHED_SLICE_NTHRESH && 913 ke->ke_proc->p_nice != 0)) { 914 runq_remove(ke->ke_runq, ke); 915 sched_slice(ke); 916 ke->ke_runq = kseq->ksq_next; 917 runq_add(ke->ke_runq, ke, 0); 918 continue; 919 } 920 return (ke); 921 } 922 923 return (runq_choose(&kseq->ksq_idle)); 924 } 925 926 static void 927 kseq_setup(struct kseq *kseq) 928 { 929 runq_init(&kseq->ksq_timeshare[0]); 930 runq_init(&kseq->ksq_timeshare[1]); 931 runq_init(&kseq->ksq_idle); 932 kseq->ksq_curr = &kseq->ksq_timeshare[0]; 933 kseq->ksq_next = &kseq->ksq_timeshare[1]; 934 kseq->ksq_load = 0; 935 kseq->ksq_load_timeshare = 0; 936 } 937 938 static void 939 sched_setup(void *dummy) 940 { 941 #ifdef SMP 942 int i; 943 #endif 944 945 slice_min = (hz/100); /* 10ms */ 946 slice_max = (hz/7); /* ~140ms */ 947 948 #ifdef SMP 949 balance_groups = 0; 950 /* 951 * Initialize the kseqs. 952 */ 953 for (i = 0; i < MAXCPU; i++) { 954 struct kseq *ksq; 955 956 ksq = &kseq_cpu[i]; 957 ksq->ksq_assigned = NULL; 958 kseq_setup(&kseq_cpu[i]); 959 } 960 if (smp_topology == NULL) { 961 struct kseq_group *ksg; 962 struct kseq *ksq; 963 int cpus; 964 965 for (cpus = 0, i = 0; i < MAXCPU; i++) { 966 if (CPU_ABSENT(i)) 967 continue; 968 ksq = &kseq_cpu[cpus]; 969 ksg = &kseq_groups[cpus]; 970 /* 971 * Setup a kseq group with one member. 972 */ 973 ksq->ksq_transferable = 0; 974 ksq->ksq_group = ksg; 975 ksg->ksg_cpus = 1; 976 ksg->ksg_idlemask = 0; 977 ksg->ksg_cpumask = ksg->ksg_mask = 1 << i; 978 ksg->ksg_load = 0; 979 ksg->ksg_transferable = 0; 980 LIST_INIT(&ksg->ksg_members); 981 LIST_INSERT_HEAD(&ksg->ksg_members, ksq, ksq_siblings); 982 cpus++; 983 } 984 ksg_maxid = cpus - 1; 985 } else { 986 struct kseq_group *ksg; 987 struct cpu_group *cg; 988 int j; 989 990 for (i = 0; i < smp_topology->ct_count; i++) { 991 cg = &smp_topology->ct_group[i]; 992 ksg = &kseq_groups[i]; 993 /* 994 * Initialize the group. 995 */ 996 ksg->ksg_idlemask = 0; 997 ksg->ksg_load = 0; 998 ksg->ksg_transferable = 0; 999 ksg->ksg_cpus = cg->cg_count; 1000 ksg->ksg_cpumask = cg->cg_mask; 1001 LIST_INIT(&ksg->ksg_members); 1002 /* 1003 * Find all of the group members and add them. 1004 */ 1005 for (j = 0; j < MAXCPU; j++) { 1006 if ((cg->cg_mask & (1 << j)) != 0) { 1007 if (ksg->ksg_mask == 0) 1008 ksg->ksg_mask = 1 << j; 1009 kseq_cpu[j].ksq_transferable = 0; 1010 kseq_cpu[j].ksq_group = ksg; 1011 LIST_INSERT_HEAD(&ksg->ksg_members, 1012 &kseq_cpu[j], ksq_siblings); 1013 } 1014 } 1015 if (ksg->ksg_cpus > 1) 1016 balance_groups = 1; 1017 } 1018 ksg_maxid = smp_topology->ct_count - 1; 1019 } 1020 /* 1021 * Stagger the group and global load balancer so they do not 1022 * interfere with each other. 1023 */ 1024 bal_tick = ticks + hz; 1025 if (balance_groups) 1026 gbal_tick = ticks + (hz / 2); 1027 #else 1028 kseq_setup(KSEQ_SELF()); 1029 #endif 1030 mtx_lock_spin(&sched_lock); 1031 kseq_load_add(KSEQ_SELF(), &kse0); 1032 mtx_unlock_spin(&sched_lock); 1033 } 1034 1035 /* 1036 * Scale the scheduling priority according to the "interactivity" of this 1037 * process. 1038 */ 1039 static void 1040 sched_priority(struct ksegrp *kg) 1041 { 1042 int pri; 1043 1044 if (kg->kg_pri_class != PRI_TIMESHARE) 1045 return; 1046 1047 pri = SCHED_PRI_INTERACT(sched_interact_score(kg)); 1048 pri += SCHED_PRI_BASE; 1049 pri += kg->kg_proc->p_nice; 1050 1051 if (pri > PRI_MAX_TIMESHARE) 1052 pri = PRI_MAX_TIMESHARE; 1053 else if (pri < PRI_MIN_TIMESHARE) 1054 pri = PRI_MIN_TIMESHARE; 1055 1056 kg->kg_user_pri = pri; 1057 1058 return; 1059 } 1060 1061 /* 1062 * Calculate a time slice based on the properties of the kseg and the runq 1063 * that we're on. This is only for PRI_TIMESHARE ksegrps. 1064 */ 1065 static void 1066 sched_slice(struct kse *ke) 1067 { 1068 struct kseq *kseq; 1069 struct ksegrp *kg; 1070 1071 kg = ke->ke_ksegrp; 1072 kseq = KSEQ_CPU(ke->ke_cpu); 1073 1074 if (ke->ke_thread->td_flags & TDF_BORROWING) { 1075 ke->ke_slice = SCHED_SLICE_MIN; 1076 return; 1077 } 1078 1079 /* 1080 * Rationale: 1081 * KSEs in interactive ksegs get a minimal slice so that we 1082 * quickly notice if it abuses its advantage. 1083 * 1084 * KSEs in non-interactive ksegs are assigned a slice that is 1085 * based on the ksegs nice value relative to the least nice kseg 1086 * on the run queue for this cpu. 1087 * 1088 * If the KSE is less nice than all others it gets the maximum 1089 * slice and other KSEs will adjust their slice relative to 1090 * this when they first expire. 1091 * 1092 * There is 20 point window that starts relative to the least 1093 * nice kse on the run queue. Slice size is determined by 1094 * the kse distance from the last nice ksegrp. 1095 * 1096 * If the kse is outside of the window it will get no slice 1097 * and will be reevaluated each time it is selected on the 1098 * run queue. The exception to this is nice 0 ksegs when 1099 * a nice -20 is running. They are always granted a minimum 1100 * slice. 1101 */ 1102 if (!SCHED_INTERACTIVE(kg)) { 1103 int nice; 1104 1105 nice = kg->kg_proc->p_nice + (0 - kseq->ksq_nicemin); 1106 if (kseq->ksq_load_timeshare == 0 || 1107 kg->kg_proc->p_nice < kseq->ksq_nicemin) 1108 ke->ke_slice = SCHED_SLICE_MAX; 1109 else if (nice <= SCHED_SLICE_NTHRESH) 1110 ke->ke_slice = SCHED_SLICE_NICE(nice); 1111 else if (kg->kg_proc->p_nice == 0) 1112 ke->ke_slice = SCHED_SLICE_MIN; 1113 else 1114 ke->ke_slice = 0; 1115 } else 1116 ke->ke_slice = SCHED_SLICE_INTERACTIVE; 1117 1118 return; 1119 } 1120 1121 /* 1122 * This routine enforces a maximum limit on the amount of scheduling history 1123 * kept. It is called after either the slptime or runtime is adjusted. 1124 * This routine will not operate correctly when slp or run times have been 1125 * adjusted to more than double their maximum. 1126 */ 1127 static void 1128 sched_interact_update(struct ksegrp *kg) 1129 { 1130 int sum; 1131 1132 sum = kg->kg_runtime + kg->kg_slptime; 1133 if (sum < SCHED_SLP_RUN_MAX) 1134 return; 1135 /* 1136 * If we have exceeded by more than 1/5th then the algorithm below 1137 * will not bring us back into range. Dividing by two here forces 1138 * us into the range of [4/5 * SCHED_INTERACT_MAX, SCHED_INTERACT_MAX] 1139 */ 1140 if (sum > (SCHED_SLP_RUN_MAX / 5) * 6) { 1141 kg->kg_runtime /= 2; 1142 kg->kg_slptime /= 2; 1143 return; 1144 } 1145 kg->kg_runtime = (kg->kg_runtime / 5) * 4; 1146 kg->kg_slptime = (kg->kg_slptime / 5) * 4; 1147 } 1148 1149 static void 1150 sched_interact_fork(struct ksegrp *kg) 1151 { 1152 int ratio; 1153 int sum; 1154 1155 sum = kg->kg_runtime + kg->kg_slptime; 1156 if (sum > SCHED_SLP_RUN_FORK) { 1157 ratio = sum / SCHED_SLP_RUN_FORK; 1158 kg->kg_runtime /= ratio; 1159 kg->kg_slptime /= ratio; 1160 } 1161 } 1162 1163 static int 1164 sched_interact_score(struct ksegrp *kg) 1165 { 1166 int div; 1167 1168 if (kg->kg_runtime > kg->kg_slptime) { 1169 div = max(1, kg->kg_runtime / SCHED_INTERACT_HALF); 1170 return (SCHED_INTERACT_HALF + 1171 (SCHED_INTERACT_HALF - (kg->kg_slptime / div))); 1172 } if (kg->kg_slptime > kg->kg_runtime) { 1173 div = max(1, kg->kg_slptime / SCHED_INTERACT_HALF); 1174 return (kg->kg_runtime / div); 1175 } 1176 1177 /* 1178 * This can happen if slptime and runtime are 0. 1179 */ 1180 return (0); 1181 1182 } 1183 1184 /* 1185 * Very early in the boot some setup of scheduler-specific 1186 * parts of proc0 and of soem scheduler resources needs to be done. 1187 * Called from: 1188 * proc0_init() 1189 */ 1190 void 1191 schedinit(void) 1192 { 1193 /* 1194 * Set up the scheduler specific parts of proc0. 1195 */ 1196 proc0.p_sched = NULL; /* XXX */ 1197 ksegrp0.kg_sched = &kg_sched0; 1198 thread0.td_sched = &kse0; 1199 kse0.ke_thread = &thread0; 1200 kse0.ke_state = KES_THREAD; 1201 kg_sched0.skg_concurrency = 1; 1202 kg_sched0.skg_avail_opennings = 0; /* we are already running */ 1203 } 1204 1205 /* 1206 * This is only somewhat accurate since given many processes of the same 1207 * priority they will switch when their slices run out, which will be 1208 * at most SCHED_SLICE_MAX. 1209 */ 1210 int 1211 sched_rr_interval(void) 1212 { 1213 return (SCHED_SLICE_MAX); 1214 } 1215 1216 static void 1217 sched_pctcpu_update(struct kse *ke) 1218 { 1219 /* 1220 * Adjust counters and watermark for pctcpu calc. 1221 */ 1222 if (ke->ke_ltick > ticks - SCHED_CPU_TICKS) { 1223 /* 1224 * Shift the tick count out so that the divide doesn't 1225 * round away our results. 1226 */ 1227 ke->ke_ticks <<= 10; 1228 ke->ke_ticks = (ke->ke_ticks / (ticks - ke->ke_ftick)) * 1229 SCHED_CPU_TICKS; 1230 ke->ke_ticks >>= 10; 1231 } else 1232 ke->ke_ticks = 0; 1233 ke->ke_ltick = ticks; 1234 ke->ke_ftick = ke->ke_ltick - SCHED_CPU_TICKS; 1235 } 1236 1237 void 1238 sched_thread_priority(struct thread *td, u_char prio) 1239 { 1240 struct kse *ke; 1241 1242 CTR6(KTR_SCHED, "sched_prio: %p(%s) prio %d newprio %d by %p(%s)", 1243 td, td->td_proc->p_comm, td->td_priority, prio, curthread, 1244 curthread->td_proc->p_comm); 1245 ke = td->td_kse; 1246 mtx_assert(&sched_lock, MA_OWNED); 1247 if (td->td_priority == prio) 1248 return; 1249 if (TD_ON_RUNQ(td)) { 1250 /* 1251 * If the priority has been elevated due to priority 1252 * propagation, we may have to move ourselves to a new 1253 * queue. We still call adjustrunqueue below in case kse 1254 * needs to fix things up. 1255 */ 1256 if (prio < td->td_priority && ke->ke_runq != NULL && 1257 (ke->ke_flags & KEF_ASSIGNED) == 0 && 1258 ke->ke_runq != KSEQ_CPU(ke->ke_cpu)->ksq_curr) { 1259 runq_remove(ke->ke_runq, ke); 1260 ke->ke_runq = KSEQ_CPU(ke->ke_cpu)->ksq_curr; 1261 runq_add(ke->ke_runq, ke, 0); 1262 } 1263 /* 1264 * Hold this kse on this cpu so that sched_prio() doesn't 1265 * cause excessive migration. We only want migration to 1266 * happen as the result of a wakeup. 1267 */ 1268 ke->ke_flags |= KEF_HOLD; 1269 adjustrunqueue(td, prio); 1270 ke->ke_flags &= ~KEF_HOLD; 1271 } else 1272 td->td_priority = prio; 1273 } 1274 1275 /* 1276 * Update a thread's priority when it is lent another thread's 1277 * priority. 1278 */ 1279 void 1280 sched_lend_prio(struct thread *td, u_char prio) 1281 { 1282 1283 td->td_flags |= TDF_BORROWING; 1284 sched_thread_priority(td, prio); 1285 } 1286 1287 /* 1288 * Restore a thread's priority when priority propagation is 1289 * over. The prio argument is the minimum priority the thread 1290 * needs to have to satisfy other possible priority lending 1291 * requests. If the thread's regular priority is less 1292 * important than prio, the thread will keep a priority boost 1293 * of prio. 1294 */ 1295 void 1296 sched_unlend_prio(struct thread *td, u_char prio) 1297 { 1298 u_char base_pri; 1299 1300 if (td->td_base_pri >= PRI_MIN_TIMESHARE && 1301 td->td_base_pri <= PRI_MAX_TIMESHARE) 1302 base_pri = td->td_ksegrp->kg_user_pri; 1303 else 1304 base_pri = td->td_base_pri; 1305 if (prio >= base_pri) { 1306 td->td_flags &= ~TDF_BORROWING; 1307 sched_thread_priority(td, base_pri); 1308 } else 1309 sched_lend_prio(td, prio); 1310 } 1311 1312 void 1313 sched_prio(struct thread *td, u_char prio) 1314 { 1315 u_char oldprio; 1316 1317 /* First, update the base priority. */ 1318 td->td_base_pri = prio; 1319 1320 /* 1321 * If the thread is borrowing another thread's priority, don't 1322 * ever lower the priority. 1323 */ 1324 if (td->td_flags & TDF_BORROWING && td->td_priority < prio) 1325 return; 1326 1327 /* Change the real priority. */ 1328 oldprio = td->td_priority; 1329 sched_thread_priority(td, prio); 1330 1331 /* 1332 * If the thread is on a turnstile, then let the turnstile update 1333 * its state. 1334 */ 1335 if (TD_ON_LOCK(td) && oldprio != prio) 1336 turnstile_adjust(td, oldprio); 1337 } 1338 1339 void 1340 sched_switch(struct thread *td, struct thread *newtd, int flags) 1341 { 1342 struct kseq *ksq; 1343 struct kse *ke; 1344 1345 mtx_assert(&sched_lock, MA_OWNED); 1346 1347 ke = td->td_kse; 1348 ksq = KSEQ_SELF(); 1349 1350 td->td_lastcpu = td->td_oncpu; 1351 td->td_oncpu = NOCPU; 1352 td->td_flags &= ~TDF_NEEDRESCHED; 1353 td->td_owepreempt = 0; 1354 1355 /* 1356 * If the KSE has been assigned it may be in the process of switching 1357 * to the new cpu. This is the case in sched_bind(). 1358 */ 1359 if (td == PCPU_GET(idlethread)) { 1360 TD_SET_CAN_RUN(td); 1361 } else if ((ke->ke_flags & KEF_ASSIGNED) == 0) { 1362 /* We are ending our run so make our slot available again */ 1363 SLOT_RELEASE(td->td_ksegrp); 1364 kseq_load_rem(ksq, ke); 1365 if (TD_IS_RUNNING(td)) { 1366 /* 1367 * Don't allow the thread to migrate 1368 * from a preemption. 1369 */ 1370 ke->ke_flags |= KEF_HOLD; 1371 setrunqueue(td, (flags & SW_PREEMPT) ? 1372 SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED : 1373 SRQ_OURSELF|SRQ_YIELDING); 1374 ke->ke_flags &= ~KEF_HOLD; 1375 } else if ((td->td_proc->p_flag & P_HADTHREADS) && 1376 (newtd == NULL || newtd->td_ksegrp != td->td_ksegrp)) 1377 /* 1378 * We will not be on the run queue. 1379 * So we must be sleeping or similar. 1380 * Don't use the slot if we will need it 1381 * for newtd. 1382 */ 1383 slot_fill(td->td_ksegrp); 1384 } 1385 if (newtd != NULL) { 1386 /* 1387 * If we bring in a thread, 1388 * then account for it as if it had been added to the 1389 * run queue and then chosen. 1390 */ 1391 newtd->td_kse->ke_flags |= KEF_DIDRUN; 1392 newtd->td_kse->ke_runq = ksq->ksq_curr; 1393 SLOT_USE(newtd->td_ksegrp); 1394 TD_SET_RUNNING(newtd); 1395 kseq_load_add(KSEQ_SELF(), newtd->td_kse); 1396 } else 1397 newtd = choosethread(); 1398 if (td != newtd) { 1399 #ifdef HWPMC_HOOKS 1400 if (PMC_PROC_IS_USING_PMCS(td->td_proc)) 1401 PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT); 1402 #endif 1403 cpu_switch(td, newtd); 1404 #ifdef HWPMC_HOOKS 1405 if (PMC_PROC_IS_USING_PMCS(td->td_proc)) 1406 PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN); 1407 #endif 1408 } 1409 1410 sched_lock.mtx_lock = (uintptr_t)td; 1411 1412 td->td_oncpu = PCPU_GET(cpuid); 1413 } 1414 1415 void 1416 sched_nice(struct proc *p, int nice) 1417 { 1418 struct ksegrp *kg; 1419 struct kse *ke; 1420 struct thread *td; 1421 struct kseq *kseq; 1422 1423 PROC_LOCK_ASSERT(p, MA_OWNED); 1424 mtx_assert(&sched_lock, MA_OWNED); 1425 /* 1426 * We need to adjust the nice counts for running KSEs. 1427 */ 1428 FOREACH_KSEGRP_IN_PROC(p, kg) { 1429 if (kg->kg_pri_class == PRI_TIMESHARE) { 1430 FOREACH_THREAD_IN_GROUP(kg, td) { 1431 ke = td->td_kse; 1432 if (ke->ke_runq == NULL) 1433 continue; 1434 kseq = KSEQ_CPU(ke->ke_cpu); 1435 kseq_nice_rem(kseq, p->p_nice); 1436 kseq_nice_add(kseq, nice); 1437 } 1438 } 1439 } 1440 p->p_nice = nice; 1441 FOREACH_KSEGRP_IN_PROC(p, kg) { 1442 sched_priority(kg); 1443 FOREACH_THREAD_IN_GROUP(kg, td) 1444 td->td_flags |= TDF_NEEDRESCHED; 1445 } 1446 } 1447 1448 void 1449 sched_sleep(struct thread *td) 1450 { 1451 mtx_assert(&sched_lock, MA_OWNED); 1452 1453 td->td_slptime = ticks; 1454 } 1455 1456 void 1457 sched_wakeup(struct thread *td) 1458 { 1459 mtx_assert(&sched_lock, MA_OWNED); 1460 1461 /* 1462 * Let the kseg know how long we slept for. This is because process 1463 * interactivity behavior is modeled in the kseg. 1464 */ 1465 if (td->td_slptime) { 1466 struct ksegrp *kg; 1467 int hzticks; 1468 1469 kg = td->td_ksegrp; 1470 hzticks = (ticks - td->td_slptime) << 10; 1471 if (hzticks >= SCHED_SLP_RUN_MAX) { 1472 kg->kg_slptime = SCHED_SLP_RUN_MAX; 1473 kg->kg_runtime = 1; 1474 } else { 1475 kg->kg_slptime += hzticks; 1476 sched_interact_update(kg); 1477 } 1478 sched_priority(kg); 1479 sched_slice(td->td_kse); 1480 td->td_slptime = 0; 1481 } 1482 setrunqueue(td, SRQ_BORING); 1483 } 1484 1485 /* 1486 * Penalize the parent for creating a new child and initialize the child's 1487 * priority. 1488 */ 1489 void 1490 sched_fork(struct thread *td, struct thread *childtd) 1491 { 1492 1493 mtx_assert(&sched_lock, MA_OWNED); 1494 1495 sched_fork_ksegrp(td, childtd->td_ksegrp); 1496 sched_fork_thread(td, childtd); 1497 } 1498 1499 void 1500 sched_fork_ksegrp(struct thread *td, struct ksegrp *child) 1501 { 1502 struct ksegrp *kg = td->td_ksegrp; 1503 mtx_assert(&sched_lock, MA_OWNED); 1504 1505 child->kg_slptime = kg->kg_slptime; 1506 child->kg_runtime = kg->kg_runtime; 1507 child->kg_user_pri = kg->kg_user_pri; 1508 sched_interact_fork(child); 1509 kg->kg_runtime += tickincr << 10; 1510 sched_interact_update(kg); 1511 } 1512 1513 void 1514 sched_fork_thread(struct thread *td, struct thread *child) 1515 { 1516 struct kse *ke; 1517 struct kse *ke2; 1518 1519 sched_newthread(child); 1520 ke = td->td_kse; 1521 ke2 = child->td_kse; 1522 ke2->ke_slice = 1; /* Attempt to quickly learn interactivity. */ 1523 ke2->ke_cpu = ke->ke_cpu; 1524 ke2->ke_runq = NULL; 1525 1526 /* Grab our parents cpu estimation information. */ 1527 ke2->ke_ticks = ke->ke_ticks; 1528 ke2->ke_ltick = ke->ke_ltick; 1529 ke2->ke_ftick = ke->ke_ftick; 1530 } 1531 1532 void 1533 sched_class(struct ksegrp *kg, int class) 1534 { 1535 struct kseq *kseq; 1536 struct kse *ke; 1537 struct thread *td; 1538 int nclass; 1539 int oclass; 1540 1541 mtx_assert(&sched_lock, MA_OWNED); 1542 if (kg->kg_pri_class == class) 1543 return; 1544 1545 nclass = PRI_BASE(class); 1546 oclass = PRI_BASE(kg->kg_pri_class); 1547 FOREACH_THREAD_IN_GROUP(kg, td) { 1548 ke = td->td_kse; 1549 if ((ke->ke_state != KES_ONRUNQ && 1550 ke->ke_state != KES_THREAD) || ke->ke_runq == NULL) 1551 continue; 1552 kseq = KSEQ_CPU(ke->ke_cpu); 1553 1554 #ifdef SMP 1555 /* 1556 * On SMP if we're on the RUNQ we must adjust the transferable 1557 * count because could be changing to or from an interrupt 1558 * class. 1559 */ 1560 if (ke->ke_state == KES_ONRUNQ) { 1561 if (KSE_CAN_MIGRATE(ke)) { 1562 kseq->ksq_transferable--; 1563 kseq->ksq_group->ksg_transferable--; 1564 } 1565 if (KSE_CAN_MIGRATE(ke)) { 1566 kseq->ksq_transferable++; 1567 kseq->ksq_group->ksg_transferable++; 1568 } 1569 } 1570 #endif 1571 if (oclass == PRI_TIMESHARE) { 1572 kseq->ksq_load_timeshare--; 1573 kseq_nice_rem(kseq, kg->kg_proc->p_nice); 1574 } 1575 if (nclass == PRI_TIMESHARE) { 1576 kseq->ksq_load_timeshare++; 1577 kseq_nice_add(kseq, kg->kg_proc->p_nice); 1578 } 1579 } 1580 1581 kg->kg_pri_class = class; 1582 } 1583 1584 /* 1585 * Return some of the child's priority and interactivity to the parent. 1586 */ 1587 void 1588 sched_exit(struct proc *p, struct thread *childtd) 1589 { 1590 mtx_assert(&sched_lock, MA_OWNED); 1591 sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), childtd); 1592 sched_exit_thread(NULL, childtd); 1593 } 1594 1595 void 1596 sched_exit_ksegrp(struct ksegrp *kg, struct thread *td) 1597 { 1598 /* kg->kg_slptime += td->td_ksegrp->kg_slptime; */ 1599 kg->kg_runtime += td->td_ksegrp->kg_runtime; 1600 sched_interact_update(kg); 1601 } 1602 1603 void 1604 sched_exit_thread(struct thread *td, struct thread *childtd) 1605 { 1606 CTR3(KTR_SCHED, "sched_exit_thread: %p(%s) prio %d", 1607 childtd, childtd->td_proc->p_comm, childtd->td_priority); 1608 kseq_load_rem(KSEQ_CPU(childtd->td_kse->ke_cpu), childtd->td_kse); 1609 } 1610 1611 void 1612 sched_clock(struct thread *td) 1613 { 1614 struct kseq *kseq; 1615 struct ksegrp *kg; 1616 struct kse *ke; 1617 1618 mtx_assert(&sched_lock, MA_OWNED); 1619 kseq = KSEQ_SELF(); 1620 #ifdef SMP 1621 if (ticks >= bal_tick) 1622 sched_balance(); 1623 if (ticks >= gbal_tick && balance_groups) 1624 sched_balance_groups(); 1625 /* 1626 * We could have been assigned a non real-time thread without an 1627 * IPI. 1628 */ 1629 if (kseq->ksq_assigned) 1630 kseq_assign(kseq); /* Potentially sets NEEDRESCHED */ 1631 #endif 1632 /* 1633 * sched_setup() apparently happens prior to stathz being set. We 1634 * need to resolve the timers earlier in the boot so we can avoid 1635 * calculating this here. 1636 */ 1637 if (realstathz == 0) { 1638 realstathz = stathz ? stathz : hz; 1639 tickincr = hz / realstathz; 1640 /* 1641 * XXX This does not work for values of stathz that are much 1642 * larger than hz. 1643 */ 1644 if (tickincr == 0) 1645 tickincr = 1; 1646 } 1647 1648 ke = td->td_kse; 1649 kg = ke->ke_ksegrp; 1650 1651 /* Adjust ticks for pctcpu */ 1652 ke->ke_ticks++; 1653 ke->ke_ltick = ticks; 1654 1655 /* Go up to one second beyond our max and then trim back down */ 1656 if (ke->ke_ftick + SCHED_CPU_TICKS + hz < ke->ke_ltick) 1657 sched_pctcpu_update(ke); 1658 1659 if (td->td_flags & TDF_IDLETD) 1660 return; 1661 /* 1662 * We only do slicing code for TIMESHARE ksegrps. 1663 */ 1664 if (kg->kg_pri_class != PRI_TIMESHARE) 1665 return; 1666 /* 1667 * We used a tick charge it to the ksegrp so that we can compute our 1668 * interactivity. 1669 */ 1670 kg->kg_runtime += tickincr << 10; 1671 sched_interact_update(kg); 1672 1673 /* 1674 * We used up one time slice. 1675 */ 1676 if (--ke->ke_slice > 0) 1677 return; 1678 /* 1679 * We're out of time, recompute priorities and requeue. 1680 */ 1681 kseq_load_rem(kseq, ke); 1682 sched_priority(kg); 1683 sched_slice(ke); 1684 if (SCHED_CURR(kg, ke)) 1685 ke->ke_runq = kseq->ksq_curr; 1686 else 1687 ke->ke_runq = kseq->ksq_next; 1688 kseq_load_add(kseq, ke); 1689 td->td_flags |= TDF_NEEDRESCHED; 1690 } 1691 1692 int 1693 sched_runnable(void) 1694 { 1695 struct kseq *kseq; 1696 int load; 1697 1698 load = 1; 1699 1700 kseq = KSEQ_SELF(); 1701 #ifdef SMP 1702 if (kseq->ksq_assigned) { 1703 mtx_lock_spin(&sched_lock); 1704 kseq_assign(kseq); 1705 mtx_unlock_spin(&sched_lock); 1706 } 1707 #endif 1708 if ((curthread->td_flags & TDF_IDLETD) != 0) { 1709 if (kseq->ksq_load > 0) 1710 goto out; 1711 } else 1712 if (kseq->ksq_load - 1 > 0) 1713 goto out; 1714 load = 0; 1715 out: 1716 return (load); 1717 } 1718 1719 void 1720 sched_userret(struct thread *td) 1721 { 1722 struct ksegrp *kg; 1723 1724 KASSERT((td->td_flags & TDF_BORROWING) == 0, 1725 ("thread with borrowed priority returning to userland")); 1726 kg = td->td_ksegrp; 1727 if (td->td_priority != kg->kg_user_pri) { 1728 mtx_lock_spin(&sched_lock); 1729 td->td_priority = kg->kg_user_pri; 1730 td->td_base_pri = kg->kg_user_pri; 1731 mtx_unlock_spin(&sched_lock); 1732 } 1733 } 1734 1735 struct kse * 1736 sched_choose(void) 1737 { 1738 struct kseq *kseq; 1739 struct kse *ke; 1740 1741 mtx_assert(&sched_lock, MA_OWNED); 1742 kseq = KSEQ_SELF(); 1743 #ifdef SMP 1744 restart: 1745 if (kseq->ksq_assigned) 1746 kseq_assign(kseq); 1747 #endif 1748 ke = kseq_choose(kseq); 1749 if (ke) { 1750 #ifdef SMP 1751 if (ke->ke_ksegrp->kg_pri_class == PRI_IDLE) 1752 if (kseq_idled(kseq) == 0) 1753 goto restart; 1754 #endif 1755 kseq_runq_rem(kseq, ke); 1756 ke->ke_state = KES_THREAD; 1757 return (ke); 1758 } 1759 #ifdef SMP 1760 if (kseq_idled(kseq) == 0) 1761 goto restart; 1762 #endif 1763 return (NULL); 1764 } 1765 1766 void 1767 sched_add(struct thread *td, int flags) 1768 { 1769 struct kseq *kseq; 1770 struct ksegrp *kg; 1771 struct kse *ke; 1772 int preemptive; 1773 int canmigrate; 1774 int class; 1775 1776 CTR5(KTR_SCHED, "sched_add: %p(%s) prio %d by %p(%s)", 1777 td, td->td_proc->p_comm, td->td_priority, curthread, 1778 curthread->td_proc->p_comm); 1779 mtx_assert(&sched_lock, MA_OWNED); 1780 ke = td->td_kse; 1781 kg = td->td_ksegrp; 1782 canmigrate = 1; 1783 preemptive = !(flags & SRQ_YIELDING); 1784 class = PRI_BASE(kg->kg_pri_class); 1785 kseq = KSEQ_SELF(); 1786 if ((ke->ke_flags & KEF_INTERNAL) == 0) 1787 SLOT_USE(td->td_ksegrp); 1788 ke->ke_flags &= ~KEF_INTERNAL; 1789 #ifdef SMP 1790 if (ke->ke_flags & KEF_ASSIGNED) { 1791 if (ke->ke_flags & KEF_REMOVED) 1792 ke->ke_flags &= ~KEF_REMOVED; 1793 return; 1794 } 1795 canmigrate = KSE_CAN_MIGRATE(ke); 1796 #endif 1797 KASSERT(ke->ke_state != KES_ONRUNQ, 1798 ("sched_add: kse %p (%s) already in run queue", ke, 1799 ke->ke_proc->p_comm)); 1800 KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 1801 ("sched_add: process swapped out")); 1802 KASSERT(ke->ke_runq == NULL, 1803 ("sched_add: KSE %p is still assigned to a run queue", ke)); 1804 switch (class) { 1805 case PRI_ITHD: 1806 case PRI_REALTIME: 1807 ke->ke_runq = kseq->ksq_curr; 1808 ke->ke_slice = SCHED_SLICE_MAX; 1809 if (canmigrate) 1810 ke->ke_cpu = PCPU_GET(cpuid); 1811 break; 1812 case PRI_TIMESHARE: 1813 if (SCHED_CURR(kg, ke)) 1814 ke->ke_runq = kseq->ksq_curr; 1815 else 1816 ke->ke_runq = kseq->ksq_next; 1817 break; 1818 case PRI_IDLE: 1819 /* 1820 * This is for priority prop. 1821 */ 1822 if (ke->ke_thread->td_priority < PRI_MIN_IDLE) 1823 ke->ke_runq = kseq->ksq_curr; 1824 else 1825 ke->ke_runq = &kseq->ksq_idle; 1826 ke->ke_slice = SCHED_SLICE_MIN; 1827 break; 1828 default: 1829 panic("Unknown pri class."); 1830 break; 1831 } 1832 #ifdef SMP 1833 /* 1834 * Don't migrate running threads here. Force the long term balancer 1835 * to do it. 1836 */ 1837 if (ke->ke_flags & KEF_HOLD) { 1838 ke->ke_flags &= ~KEF_HOLD; 1839 canmigrate = 0; 1840 } 1841 /* 1842 * If this thread is pinned or bound, notify the target cpu. 1843 */ 1844 if (!canmigrate && ke->ke_cpu != PCPU_GET(cpuid) ) { 1845 ke->ke_runq = NULL; 1846 kseq_notify(ke, ke->ke_cpu); 1847 return; 1848 } 1849 /* 1850 * If we had been idle, clear our bit in the group and potentially 1851 * the global bitmap. If not, see if we should transfer this thread. 1852 */ 1853 if ((class == PRI_TIMESHARE || class == PRI_REALTIME) && 1854 (kseq->ksq_group->ksg_idlemask & PCPU_GET(cpumask)) != 0) { 1855 /* 1856 * Check to see if our group is unidling, and if so, remove it 1857 * from the global idle mask. 1858 */ 1859 if (kseq->ksq_group->ksg_idlemask == 1860 kseq->ksq_group->ksg_cpumask) 1861 atomic_clear_int(&kseq_idle, kseq->ksq_group->ksg_mask); 1862 /* 1863 * Now remove ourselves from the group specific idle mask. 1864 */ 1865 kseq->ksq_group->ksg_idlemask &= ~PCPU_GET(cpumask); 1866 } else if (canmigrate && kseq->ksq_load > 1 && class != PRI_ITHD) 1867 if (kseq_transfer(kseq, ke, class)) 1868 return; 1869 ke->ke_cpu = PCPU_GET(cpuid); 1870 #endif 1871 if (td->td_priority < curthread->td_priority && 1872 ke->ke_runq == kseq->ksq_curr) 1873 curthread->td_flags |= TDF_NEEDRESCHED; 1874 if (preemptive && maybe_preempt(td)) 1875 return; 1876 ke->ke_state = KES_ONRUNQ; 1877 1878 kseq_runq_add(kseq, ke, flags); 1879 kseq_load_add(kseq, ke); 1880 } 1881 1882 void 1883 sched_rem(struct thread *td) 1884 { 1885 struct kseq *kseq; 1886 struct kse *ke; 1887 1888 CTR5(KTR_SCHED, "sched_rem: %p(%s) prio %d by %p(%s)", 1889 td, td->td_proc->p_comm, td->td_priority, curthread, 1890 curthread->td_proc->p_comm); 1891 mtx_assert(&sched_lock, MA_OWNED); 1892 ke = td->td_kse; 1893 SLOT_RELEASE(td->td_ksegrp); 1894 if (ke->ke_flags & KEF_ASSIGNED) { 1895 ke->ke_flags |= KEF_REMOVED; 1896 return; 1897 } 1898 KASSERT((ke->ke_state == KES_ONRUNQ), 1899 ("sched_rem: KSE not on run queue")); 1900 1901 ke->ke_state = KES_THREAD; 1902 kseq = KSEQ_CPU(ke->ke_cpu); 1903 kseq_runq_rem(kseq, ke); 1904 kseq_load_rem(kseq, ke); 1905 } 1906 1907 fixpt_t 1908 sched_pctcpu(struct thread *td) 1909 { 1910 fixpt_t pctcpu; 1911 struct kse *ke; 1912 1913 pctcpu = 0; 1914 ke = td->td_kse; 1915 if (ke == NULL) 1916 return (0); 1917 1918 mtx_lock_spin(&sched_lock); 1919 if (ke->ke_ticks) { 1920 int rtick; 1921 1922 /* 1923 * Don't update more frequently than twice a second. Allowing 1924 * this causes the cpu usage to decay away too quickly due to 1925 * rounding errors. 1926 */ 1927 if (ke->ke_ftick + SCHED_CPU_TICKS < ke->ke_ltick || 1928 ke->ke_ltick < (ticks - (hz / 2))) 1929 sched_pctcpu_update(ke); 1930 /* How many rtick per second ? */ 1931 rtick = min(ke->ke_ticks / SCHED_CPU_TIME, SCHED_CPU_TICKS); 1932 pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT; 1933 } 1934 1935 ke->ke_proc->p_swtime = ke->ke_ltick - ke->ke_ftick; 1936 mtx_unlock_spin(&sched_lock); 1937 1938 return (pctcpu); 1939 } 1940 1941 void 1942 sched_bind(struct thread *td, int cpu) 1943 { 1944 struct kse *ke; 1945 1946 mtx_assert(&sched_lock, MA_OWNED); 1947 ke = td->td_kse; 1948 ke->ke_flags |= KEF_BOUND; 1949 #ifdef SMP 1950 if (PCPU_GET(cpuid) == cpu) 1951 return; 1952 /* sched_rem without the runq_remove */ 1953 ke->ke_state = KES_THREAD; 1954 kseq_load_rem(KSEQ_CPU(ke->ke_cpu), ke); 1955 kseq_notify(ke, cpu); 1956 /* When we return from mi_switch we'll be on the correct cpu. */ 1957 mi_switch(SW_VOL, NULL); 1958 #endif 1959 } 1960 1961 void 1962 sched_unbind(struct thread *td) 1963 { 1964 mtx_assert(&sched_lock, MA_OWNED); 1965 td->td_kse->ke_flags &= ~KEF_BOUND; 1966 } 1967 1968 int 1969 sched_is_bound(struct thread *td) 1970 { 1971 mtx_assert(&sched_lock, MA_OWNED); 1972 return (td->td_kse->ke_flags & KEF_BOUND); 1973 } 1974 1975 int 1976 sched_load(void) 1977 { 1978 #ifdef SMP 1979 int total; 1980 int i; 1981 1982 total = 0; 1983 for (i = 0; i <= ksg_maxid; i++) 1984 total += KSEQ_GROUP(i)->ksg_load; 1985 return (total); 1986 #else 1987 return (KSEQ_SELF()->ksq_sysload); 1988 #endif 1989 } 1990 1991 int 1992 sched_sizeof_ksegrp(void) 1993 { 1994 return (sizeof(struct ksegrp) + sizeof(struct kg_sched)); 1995 } 1996 1997 int 1998 sched_sizeof_proc(void) 1999 { 2000 return (sizeof(struct proc)); 2001 } 2002 2003 int 2004 sched_sizeof_thread(void) 2005 { 2006 return (sizeof(struct thread) + sizeof(struct td_sched)); 2007 } 2008 #define KERN_SWITCH_INCLUDE 1 2009 #include "kern/kern_switch.c" 2010