1 /*- 2 * Copyright (c) 2002-2003, Jeffrey Roberson <jeff@freebsd.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include <opt_sched.h> 31 32 #define kse td_sched 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/kdb.h> 37 #include <sys/kernel.h> 38 #include <sys/ktr.h> 39 #include <sys/lock.h> 40 #include <sys/mutex.h> 41 #include <sys/proc.h> 42 #include <sys/resource.h> 43 #include <sys/resourcevar.h> 44 #include <sys/sched.h> 45 #include <sys/smp.h> 46 #include <sys/sx.h> 47 #include <sys/sysctl.h> 48 #include <sys/sysproto.h> 49 #include <sys/vmmeter.h> 50 #ifdef KTRACE 51 #include <sys/uio.h> 52 #include <sys/ktrace.h> 53 #endif 54 55 #include <machine/cpu.h> 56 #include <machine/smp.h> 57 58 #define KTR_ULE KTR_NFS 59 60 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 61 /* XXX This is bogus compatability crap for ps */ 62 static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 63 SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 64 65 static void sched_setup(void *dummy); 66 SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 67 68 static SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0, "Scheduler"); 69 70 SYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "ule", 0, 71 "Scheduler name"); 72 73 static int slice_min = 1; 74 SYSCTL_INT(_kern_sched, OID_AUTO, slice_min, CTLFLAG_RW, &slice_min, 0, ""); 75 76 static int slice_max = 10; 77 SYSCTL_INT(_kern_sched, OID_AUTO, slice_max, CTLFLAG_RW, &slice_max, 0, ""); 78 79 int realstathz; 80 int tickincr = 1; 81 82 /* 83 * The schedulable entity that can be given a context to run. 84 * A process may have several of these. Probably one per processor 85 * but posibly a few more. In this universe they are grouped 86 * with a KSEG that contains the priority and niceness 87 * for the group. 88 */ 89 struct kse { 90 TAILQ_ENTRY(kse) ke_procq; /* (j/z) Run queue. */ 91 int ke_flags; /* (j) KEF_* flags. */ 92 struct thread *ke_thread; /* (*) Active associated thread. */ 93 fixpt_t ke_pctcpu; /* (j) %cpu during p_swtime. */ 94 char ke_rqindex; /* (j) Run queue index. */ 95 enum { 96 KES_THREAD = 0x0, /* slaved to thread state */ 97 KES_ONRUNQ 98 } ke_state; /* (j) thread sched specific status. */ 99 int ke_slptime; 100 int ke_slice; 101 struct runq *ke_runq; 102 u_char ke_cpu; /* CPU that we have affinity for. */ 103 /* The following variables are only used for pctcpu calculation */ 104 int ke_ltick; /* Last tick that we were running on */ 105 int ke_ftick; /* First tick that we were running on */ 106 int ke_ticks; /* Tick count */ 107 108 }; 109 110 111 #define td_kse td_sched 112 #define td_slptime td_kse->ke_slptime 113 #define ke_proc ke_thread->td_proc 114 #define ke_ksegrp ke_thread->td_ksegrp 115 116 /* flags kept in ke_flags */ 117 #define KEF_SCHED0 0x00001 /* For scheduler-specific use. */ 118 #define KEF_SCHED1 0x00002 /* For scheduler-specific use. */ 119 #define KEF_SCHED2 0x00004 /* For scheduler-specific use. */ 120 #define KEF_SCHED3 0x00008 /* For scheduler-specific use. */ 121 #define KEF_SCHED4 0x00010 122 #define KEF_SCHED5 0x00020 123 #define KEF_DIDRUN 0x02000 /* Thread actually ran. */ 124 #define KEF_EXIT 0x04000 /* Thread is being killed. */ 125 126 /* 127 * These datastructures are allocated within their parent datastructure but 128 * are scheduler specific. 129 */ 130 131 #define ke_assign ke_procq.tqe_next 132 133 #define KEF_ASSIGNED KEF_SCHED0 /* Thread is being migrated. */ 134 #define KEF_BOUND KEF_SCHED1 /* Thread can not migrate. */ 135 #define KEF_XFERABLE KEF_SCHED2 /* Thread was added as transferable. */ 136 #define KEF_HOLD KEF_SCHED3 /* Thread is temporarily bound. */ 137 #define KEF_REMOVED KEF_SCHED4 /* Thread was removed while ASSIGNED */ 138 #define KEF_PRIOELEV KEF_SCHED5 /* Thread has had its prio elevated. */ 139 140 struct kg_sched { 141 struct thread *skg_last_assigned; /* (j) Last thread assigned to */ 142 /* the system scheduler */ 143 int skg_slptime; /* Number of ticks we vol. slept */ 144 int skg_runtime; /* Number of ticks we were running */ 145 int skg_avail_opennings; /* (j) Num unfilled slots in group.*/ 146 int skg_concurrency; /* (j) Num threads requested in group.*/ 147 }; 148 #define kg_last_assigned kg_sched->skg_last_assigned 149 #define kg_avail_opennings kg_sched->skg_avail_opennings 150 #define kg_concurrency kg_sched->skg_concurrency 151 #define kg_runtime kg_sched->skg_runtime 152 #define kg_slptime kg_sched->skg_slptime 153 154 #define SLOT_RELEASE(kg) \ 155 do { \ 156 kg->kg_avail_opennings++; \ 157 CTR3(KTR_RUNQ, "kg %p(%d) Slot released (->%d)", \ 158 kg, \ 159 kg->kg_concurrency, \ 160 kg->kg_avail_opennings); \ 161 /*KASSERT((kg->kg_avail_opennings <= kg->kg_concurrency), \ 162 ("slots out of whack")); */ \ 163 } while (0) 164 165 #define SLOT_USE(kg) \ 166 do { \ 167 kg->kg_avail_opennings--; \ 168 CTR3(KTR_RUNQ, "kg %p(%d) Slot used (->%d)", \ 169 kg, \ 170 kg->kg_concurrency, \ 171 kg->kg_avail_opennings); \ 172 /*KASSERT((kg->kg_avail_opennings >= 0), \ 173 ("slots out of whack"));*/ \ 174 } while (0) 175 176 static struct kse kse0; 177 static struct kg_sched kg_sched0; 178 179 /* 180 * The priority is primarily determined by the interactivity score. Thus, we 181 * give lower(better) priorities to kse groups that use less CPU. The nice 182 * value is then directly added to this to allow nice to have some effect 183 * on latency. 184 * 185 * PRI_RANGE: Total priority range for timeshare threads. 186 * PRI_NRESV: Number of nice values. 187 * PRI_BASE: The start of the dynamic range. 188 */ 189 #define SCHED_PRI_RANGE (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1) 190 #define SCHED_PRI_NRESV ((PRIO_MAX - PRIO_MIN) + 1) 191 #define SCHED_PRI_NHALF (SCHED_PRI_NRESV / 2) 192 #define SCHED_PRI_BASE (PRI_MIN_TIMESHARE) 193 #define SCHED_PRI_INTERACT(score) \ 194 ((score) * SCHED_PRI_RANGE / SCHED_INTERACT_MAX) 195 196 /* 197 * These determine the interactivity of a process. 198 * 199 * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate 200 * before throttling back. 201 * SLP_RUN_FORK: Maximum slp+run time to inherit at fork time. 202 * INTERACT_MAX: Maximum interactivity value. Smaller is better. 203 * INTERACT_THRESH: Threshhold for placement on the current runq. 204 */ 205 #define SCHED_SLP_RUN_MAX ((hz * 5) << 10) 206 #define SCHED_SLP_RUN_FORK ((hz / 2) << 10) 207 #define SCHED_INTERACT_MAX (100) 208 #define SCHED_INTERACT_HALF (SCHED_INTERACT_MAX / 2) 209 #define SCHED_INTERACT_THRESH (30) 210 211 /* 212 * These parameters and macros determine the size of the time slice that is 213 * granted to each thread. 214 * 215 * SLICE_MIN: Minimum time slice granted, in units of ticks. 216 * SLICE_MAX: Maximum time slice granted. 217 * SLICE_RANGE: Range of available time slices scaled by hz. 218 * SLICE_SCALE: The number slices granted per val in the range of [0, max]. 219 * SLICE_NICE: Determine the amount of slice granted to a scaled nice. 220 * SLICE_NTHRESH: The nice cutoff point for slice assignment. 221 */ 222 #define SCHED_SLICE_MIN (slice_min) 223 #define SCHED_SLICE_MAX (slice_max) 224 #define SCHED_SLICE_INTERACTIVE (slice_max) 225 #define SCHED_SLICE_NTHRESH (SCHED_PRI_NHALF - 1) 226 #define SCHED_SLICE_RANGE (SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1) 227 #define SCHED_SLICE_SCALE(val, max) (((val) * SCHED_SLICE_RANGE) / (max)) 228 #define SCHED_SLICE_NICE(nice) \ 229 (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((nice), SCHED_SLICE_NTHRESH)) 230 231 /* 232 * This macro determines whether or not the thread belongs on the current or 233 * next run queue. 234 */ 235 #define SCHED_INTERACTIVE(kg) \ 236 (sched_interact_score(kg) < SCHED_INTERACT_THRESH) 237 #define SCHED_CURR(kg, ke) \ 238 ((ke->ke_flags & KEF_PRIOELEV) || SCHED_INTERACTIVE(kg)) 239 240 /* 241 * Cpu percentage computation macros and defines. 242 * 243 * SCHED_CPU_TIME: Number of seconds to average the cpu usage across. 244 * SCHED_CPU_TICKS: Number of hz ticks to average the cpu usage across. 245 */ 246 247 #define SCHED_CPU_TIME 10 248 #define SCHED_CPU_TICKS (hz * SCHED_CPU_TIME) 249 250 /* 251 * kseq - per processor runqs and statistics. 252 */ 253 struct kseq { 254 struct runq ksq_idle; /* Queue of IDLE threads. */ 255 struct runq ksq_timeshare[2]; /* Run queues for !IDLE. */ 256 struct runq *ksq_next; /* Next timeshare queue. */ 257 struct runq *ksq_curr; /* Current queue. */ 258 int ksq_load_timeshare; /* Load for timeshare. */ 259 int ksq_load; /* Aggregate load. */ 260 short ksq_nice[SCHED_PRI_NRESV]; /* KSEs in each nice bin. */ 261 short ksq_nicemin; /* Least nice. */ 262 #ifdef SMP 263 int ksq_transferable; 264 LIST_ENTRY(kseq) ksq_siblings; /* Next in kseq group. */ 265 struct kseq_group *ksq_group; /* Our processor group. */ 266 volatile struct kse *ksq_assigned; /* assigned by another CPU. */ 267 #else 268 int ksq_sysload; /* For loadavg, !ITHD load. */ 269 #endif 270 }; 271 272 #ifdef SMP 273 /* 274 * kseq groups are groups of processors which can cheaply share threads. When 275 * one processor in the group goes idle it will check the runqs of the other 276 * processors in its group prior to halting and waiting for an interrupt. 277 * These groups are suitable for SMT (Symetric Multi-Threading) and not NUMA. 278 * In a numa environment we'd want an idle bitmap per group and a two tiered 279 * load balancer. 280 */ 281 struct kseq_group { 282 int ksg_cpus; /* Count of CPUs in this kseq group. */ 283 cpumask_t ksg_cpumask; /* Mask of cpus in this group. */ 284 cpumask_t ksg_idlemask; /* Idle cpus in this group. */ 285 cpumask_t ksg_mask; /* Bit mask for first cpu. */ 286 int ksg_load; /* Total load of this group. */ 287 int ksg_transferable; /* Transferable load of this group. */ 288 LIST_HEAD(, kseq) ksg_members; /* Linked list of all members. */ 289 }; 290 #endif 291 292 /* 293 * One kse queue per processor. 294 */ 295 #ifdef SMP 296 static cpumask_t kseq_idle; 297 static int ksg_maxid; 298 static struct kseq kseq_cpu[MAXCPU]; 299 static struct kseq_group kseq_groups[MAXCPU]; 300 static int bal_tick; 301 static int gbal_tick; 302 303 #define KSEQ_SELF() (&kseq_cpu[PCPU_GET(cpuid)]) 304 #define KSEQ_CPU(x) (&kseq_cpu[(x)]) 305 #define KSEQ_ID(x) ((x) - kseq_cpu) 306 #define KSEQ_GROUP(x) (&kseq_groups[(x)]) 307 #else /* !SMP */ 308 static struct kseq kseq_cpu; 309 310 #define KSEQ_SELF() (&kseq_cpu) 311 #define KSEQ_CPU(x) (&kseq_cpu) 312 #endif 313 314 static void slot_fill(struct ksegrp *kg); 315 static struct kse *sched_choose(void); /* XXX Should be thread * */ 316 static void sched_add_internal(struct thread *td, int preemptive); 317 static void sched_slice(struct kse *ke); 318 static void sched_priority(struct ksegrp *kg); 319 static int sched_interact_score(struct ksegrp *kg); 320 static void sched_interact_update(struct ksegrp *kg); 321 static void sched_interact_fork(struct ksegrp *kg); 322 static void sched_pctcpu_update(struct kse *ke); 323 324 /* Operations on per processor queues */ 325 static struct kse * kseq_choose(struct kseq *kseq); 326 static void kseq_setup(struct kseq *kseq); 327 static void kseq_load_add(struct kseq *kseq, struct kse *ke); 328 static void kseq_load_rem(struct kseq *kseq, struct kse *ke); 329 static __inline void kseq_runq_add(struct kseq *kseq, struct kse *ke); 330 static __inline void kseq_runq_rem(struct kseq *kseq, struct kse *ke); 331 static void kseq_nice_add(struct kseq *kseq, int nice); 332 static void kseq_nice_rem(struct kseq *kseq, int nice); 333 void kseq_print(int cpu); 334 #ifdef SMP 335 static int kseq_transfer(struct kseq *ksq, struct kse *ke, int class); 336 static struct kse *runq_steal(struct runq *rq); 337 static void sched_balance(void); 338 static void sched_balance_groups(void); 339 static void sched_balance_group(struct kseq_group *ksg); 340 static void sched_balance_pair(struct kseq *high, struct kseq *low); 341 static void kseq_move(struct kseq *from, int cpu); 342 static int kseq_idled(struct kseq *kseq); 343 static void kseq_notify(struct kse *ke, int cpu); 344 static void kseq_assign(struct kseq *); 345 static struct kse *kseq_steal(struct kseq *kseq, int stealidle); 346 /* 347 * On P4 Xeons the round-robin interrupt delivery is broken. As a result of 348 * this, we can't pin interrupts to the cpu that they were delivered to, 349 * otherwise all ithreads only run on CPU 0. 350 */ 351 #ifdef __i386__ 352 #define KSE_CAN_MIGRATE(ke, class) \ 353 ((ke)->ke_thread->td_pinned == 0 && ((ke)->ke_flags & KEF_BOUND) == 0) 354 #else /* !__i386__ */ 355 #define KSE_CAN_MIGRATE(ke, class) \ 356 ((class) != PRI_ITHD && (ke)->ke_thread->td_pinned == 0 && \ 357 ((ke)->ke_flags & KEF_BOUND) == 0) 358 #endif /* !__i386__ */ 359 #endif 360 361 void 362 kseq_print(int cpu) 363 { 364 struct kseq *kseq; 365 int i; 366 367 kseq = KSEQ_CPU(cpu); 368 369 printf("kseq:\n"); 370 printf("\tload: %d\n", kseq->ksq_load); 371 printf("\tload TIMESHARE: %d\n", kseq->ksq_load_timeshare); 372 #ifdef SMP 373 printf("\tload transferable: %d\n", kseq->ksq_transferable); 374 #endif 375 printf("\tnicemin:\t%d\n", kseq->ksq_nicemin); 376 printf("\tnice counts:\n"); 377 for (i = 0; i < SCHED_PRI_NRESV; i++) 378 if (kseq->ksq_nice[i]) 379 printf("\t\t%d = %d\n", 380 i - SCHED_PRI_NHALF, kseq->ksq_nice[i]); 381 } 382 383 static __inline void 384 kseq_runq_add(struct kseq *kseq, struct kse *ke) 385 { 386 #ifdef SMP 387 if (KSE_CAN_MIGRATE(ke, PRI_BASE(ke->ke_ksegrp->kg_pri_class))) { 388 kseq->ksq_transferable++; 389 kseq->ksq_group->ksg_transferable++; 390 ke->ke_flags |= KEF_XFERABLE; 391 } 392 #endif 393 runq_add(ke->ke_runq, ke, 0); 394 } 395 396 static __inline void 397 kseq_runq_rem(struct kseq *kseq, struct kse *ke) 398 { 399 #ifdef SMP 400 if (ke->ke_flags & KEF_XFERABLE) { 401 kseq->ksq_transferable--; 402 kseq->ksq_group->ksg_transferable--; 403 ke->ke_flags &= ~KEF_XFERABLE; 404 } 405 #endif 406 runq_remove(ke->ke_runq, ke); 407 } 408 409 static void 410 kseq_load_add(struct kseq *kseq, struct kse *ke) 411 { 412 int class; 413 mtx_assert(&sched_lock, MA_OWNED); 414 class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); 415 if (class == PRI_TIMESHARE) 416 kseq->ksq_load_timeshare++; 417 kseq->ksq_load++; 418 if (class != PRI_ITHD && (ke->ke_proc->p_flag & P_NOLOAD) == 0) 419 #ifdef SMP 420 kseq->ksq_group->ksg_load++; 421 #else 422 kseq->ksq_sysload++; 423 #endif 424 if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 425 CTR6(KTR_ULE, 426 "Add kse %p to %p (slice: %d, pri: %d, nice: %d(%d))", 427 ke, ke->ke_runq, ke->ke_slice, ke->ke_thread->td_priority, 428 ke->ke_proc->p_nice, kseq->ksq_nicemin); 429 if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 430 kseq_nice_add(kseq, ke->ke_proc->p_nice); 431 } 432 433 static void 434 kseq_load_rem(struct kseq *kseq, struct kse *ke) 435 { 436 int class; 437 mtx_assert(&sched_lock, MA_OWNED); 438 class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); 439 if (class == PRI_TIMESHARE) 440 kseq->ksq_load_timeshare--; 441 if (class != PRI_ITHD && (ke->ke_proc->p_flag & P_NOLOAD) == 0) 442 #ifdef SMP 443 kseq->ksq_group->ksg_load--; 444 #else 445 kseq->ksq_sysload--; 446 #endif 447 kseq->ksq_load--; 448 ke->ke_runq = NULL; 449 if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 450 kseq_nice_rem(kseq, ke->ke_proc->p_nice); 451 } 452 453 static void 454 kseq_nice_add(struct kseq *kseq, int nice) 455 { 456 mtx_assert(&sched_lock, MA_OWNED); 457 /* Normalize to zero. */ 458 kseq->ksq_nice[nice + SCHED_PRI_NHALF]++; 459 if (nice < kseq->ksq_nicemin || kseq->ksq_load_timeshare == 1) 460 kseq->ksq_nicemin = nice; 461 } 462 463 static void 464 kseq_nice_rem(struct kseq *kseq, int nice) 465 { 466 int n; 467 468 mtx_assert(&sched_lock, MA_OWNED); 469 /* Normalize to zero. */ 470 n = nice + SCHED_PRI_NHALF; 471 kseq->ksq_nice[n]--; 472 KASSERT(kseq->ksq_nice[n] >= 0, ("Negative nice count.")); 473 474 /* 475 * If this wasn't the smallest nice value or there are more in 476 * this bucket we can just return. Otherwise we have to recalculate 477 * the smallest nice. 478 */ 479 if (nice != kseq->ksq_nicemin || 480 kseq->ksq_nice[n] != 0 || 481 kseq->ksq_load_timeshare == 0) 482 return; 483 484 for (; n < SCHED_PRI_NRESV; n++) 485 if (kseq->ksq_nice[n]) { 486 kseq->ksq_nicemin = n - SCHED_PRI_NHALF; 487 return; 488 } 489 } 490 491 #ifdef SMP 492 /* 493 * sched_balance is a simple CPU load balancing algorithm. It operates by 494 * finding the least loaded and most loaded cpu and equalizing their load 495 * by migrating some processes. 496 * 497 * Dealing only with two CPUs at a time has two advantages. Firstly, most 498 * installations will only have 2 cpus. Secondly, load balancing too much at 499 * once can have an unpleasant effect on the system. The scheduler rarely has 500 * enough information to make perfect decisions. So this algorithm chooses 501 * algorithm simplicity and more gradual effects on load in larger systems. 502 * 503 * It could be improved by considering the priorities and slices assigned to 504 * each task prior to balancing them. There are many pathological cases with 505 * any approach and so the semi random algorithm below may work as well as any. 506 * 507 */ 508 static void 509 sched_balance(void) 510 { 511 struct kseq_group *high; 512 struct kseq_group *low; 513 struct kseq_group *ksg; 514 int cnt; 515 int i; 516 517 if (smp_started == 0) 518 goto out; 519 low = high = NULL; 520 i = random() % (ksg_maxid + 1); 521 for (cnt = 0; cnt <= ksg_maxid; cnt++) { 522 ksg = KSEQ_GROUP(i); 523 /* 524 * Find the CPU with the highest load that has some 525 * threads to transfer. 526 */ 527 if ((high == NULL || ksg->ksg_load > high->ksg_load) 528 && ksg->ksg_transferable) 529 high = ksg; 530 if (low == NULL || ksg->ksg_load < low->ksg_load) 531 low = ksg; 532 if (++i > ksg_maxid) 533 i = 0; 534 } 535 if (low != NULL && high != NULL && high != low) 536 sched_balance_pair(LIST_FIRST(&high->ksg_members), 537 LIST_FIRST(&low->ksg_members)); 538 out: 539 bal_tick = ticks + (random() % (hz * 2)); 540 } 541 542 static void 543 sched_balance_groups(void) 544 { 545 int i; 546 547 mtx_assert(&sched_lock, MA_OWNED); 548 if (smp_started) 549 for (i = 0; i <= ksg_maxid; i++) 550 sched_balance_group(KSEQ_GROUP(i)); 551 gbal_tick = ticks + (random() % (hz * 2)); 552 } 553 554 static void 555 sched_balance_group(struct kseq_group *ksg) 556 { 557 struct kseq *kseq; 558 struct kseq *high; 559 struct kseq *low; 560 int load; 561 562 if (ksg->ksg_transferable == 0) 563 return; 564 low = NULL; 565 high = NULL; 566 LIST_FOREACH(kseq, &ksg->ksg_members, ksq_siblings) { 567 load = kseq->ksq_load; 568 if (high == NULL || load > high->ksq_load) 569 high = kseq; 570 if (low == NULL || load < low->ksq_load) 571 low = kseq; 572 } 573 if (high != NULL && low != NULL && high != low) 574 sched_balance_pair(high, low); 575 } 576 577 static void 578 sched_balance_pair(struct kseq *high, struct kseq *low) 579 { 580 int transferable; 581 int high_load; 582 int low_load; 583 int move; 584 int diff; 585 int i; 586 587 /* 588 * If we're transfering within a group we have to use this specific 589 * kseq's transferable count, otherwise we can steal from other members 590 * of the group. 591 */ 592 if (high->ksq_group == low->ksq_group) { 593 transferable = high->ksq_transferable; 594 high_load = high->ksq_load; 595 low_load = low->ksq_load; 596 } else { 597 transferable = high->ksq_group->ksg_transferable; 598 high_load = high->ksq_group->ksg_load; 599 low_load = low->ksq_group->ksg_load; 600 } 601 if (transferable == 0) 602 return; 603 /* 604 * Determine what the imbalance is and then adjust that to how many 605 * kses we actually have to give up (transferable). 606 */ 607 diff = high_load - low_load; 608 move = diff / 2; 609 if (diff & 0x1) 610 move++; 611 move = min(move, transferable); 612 for (i = 0; i < move; i++) 613 kseq_move(high, KSEQ_ID(low)); 614 return; 615 } 616 617 static void 618 kseq_move(struct kseq *from, int cpu) 619 { 620 struct kseq *kseq; 621 struct kseq *to; 622 struct kse *ke; 623 624 kseq = from; 625 to = KSEQ_CPU(cpu); 626 ke = kseq_steal(kseq, 1); 627 if (ke == NULL) { 628 struct kseq_group *ksg; 629 630 ksg = kseq->ksq_group; 631 LIST_FOREACH(kseq, &ksg->ksg_members, ksq_siblings) { 632 if (kseq == from || kseq->ksq_transferable == 0) 633 continue; 634 ke = kseq_steal(kseq, 1); 635 break; 636 } 637 if (ke == NULL) 638 panic("kseq_move: No KSEs available with a " 639 "transferable count of %d\n", 640 ksg->ksg_transferable); 641 } 642 if (kseq == to) 643 return; 644 ke->ke_state = KES_THREAD; 645 kseq_runq_rem(kseq, ke); 646 kseq_load_rem(kseq, ke); 647 kseq_notify(ke, cpu); 648 } 649 650 static int 651 kseq_idled(struct kseq *kseq) 652 { 653 struct kseq_group *ksg; 654 struct kseq *steal; 655 struct kse *ke; 656 657 ksg = kseq->ksq_group; 658 /* 659 * If we're in a cpu group, try and steal kses from another cpu in 660 * the group before idling. 661 */ 662 if (ksg->ksg_cpus > 1 && ksg->ksg_transferable) { 663 LIST_FOREACH(steal, &ksg->ksg_members, ksq_siblings) { 664 if (steal == kseq || steal->ksq_transferable == 0) 665 continue; 666 ke = kseq_steal(steal, 0); 667 if (ke == NULL) 668 continue; 669 ke->ke_state = KES_THREAD; 670 kseq_runq_rem(steal, ke); 671 kseq_load_rem(steal, ke); 672 ke->ke_cpu = PCPU_GET(cpuid); 673 sched_add_internal(ke->ke_thread, 0); 674 return (0); 675 } 676 } 677 /* 678 * We only set the idled bit when all of the cpus in the group are 679 * idle. Otherwise we could get into a situation where a KSE bounces 680 * back and forth between two idle cores on seperate physical CPUs. 681 */ 682 ksg->ksg_idlemask |= PCPU_GET(cpumask); 683 if (ksg->ksg_idlemask != ksg->ksg_cpumask) 684 return (1); 685 atomic_set_int(&kseq_idle, ksg->ksg_mask); 686 return (1); 687 } 688 689 static void 690 kseq_assign(struct kseq *kseq) 691 { 692 struct kse *nke; 693 struct kse *ke; 694 695 do { 696 *(volatile struct kse **)&ke = kseq->ksq_assigned; 697 } while(!atomic_cmpset_ptr(&kseq->ksq_assigned, ke, NULL)); 698 for (; ke != NULL; ke = nke) { 699 nke = ke->ke_assign; 700 ke->ke_flags &= ~KEF_ASSIGNED; 701 SLOT_RELEASE(ke->ke_thread->td_ksegrp); 702 sched_add_internal(ke->ke_thread, 0); 703 } 704 } 705 706 static void 707 kseq_notify(struct kse *ke, int cpu) 708 { 709 struct kseq *kseq; 710 struct thread *td; 711 struct pcpu *pcpu; 712 int prio; 713 714 ke->ke_cpu = cpu; 715 ke->ke_flags |= KEF_ASSIGNED; 716 SLOT_USE(ke->ke_thread->td_ksegrp); 717 prio = ke->ke_thread->td_priority; 718 719 kseq = KSEQ_CPU(cpu); 720 721 /* 722 * Place a KSE on another cpu's queue and force a resched. 723 */ 724 do { 725 *(volatile struct kse **)&ke->ke_assign = kseq->ksq_assigned; 726 } while(!atomic_cmpset_ptr(&kseq->ksq_assigned, ke->ke_assign, ke)); 727 /* 728 * Without sched_lock we could lose a race where we set NEEDRESCHED 729 * on a thread that is switched out before the IPI is delivered. This 730 * would lead us to miss the resched. This will be a problem once 731 * sched_lock is pushed down. 732 */ 733 pcpu = pcpu_find(cpu); 734 td = pcpu->pc_curthread; 735 if (ke->ke_thread->td_priority < td->td_priority || 736 td == pcpu->pc_idlethread) { 737 td->td_flags |= TDF_NEEDRESCHED; 738 ipi_selected(1 << cpu, IPI_AST); 739 } 740 } 741 742 static struct kse * 743 runq_steal(struct runq *rq) 744 { 745 struct rqhead *rqh; 746 struct rqbits *rqb; 747 struct kse *ke; 748 int word; 749 int bit; 750 751 mtx_assert(&sched_lock, MA_OWNED); 752 rqb = &rq->rq_status; 753 for (word = 0; word < RQB_LEN; word++) { 754 if (rqb->rqb_bits[word] == 0) 755 continue; 756 for (bit = 0; bit < RQB_BPW; bit++) { 757 if ((rqb->rqb_bits[word] & (1ul << bit)) == 0) 758 continue; 759 rqh = &rq->rq_queues[bit + (word << RQB_L2BPW)]; 760 TAILQ_FOREACH(ke, rqh, ke_procq) { 761 if (KSE_CAN_MIGRATE(ke, 762 PRI_BASE(ke->ke_ksegrp->kg_pri_class))) 763 return (ke); 764 } 765 } 766 } 767 return (NULL); 768 } 769 770 static struct kse * 771 kseq_steal(struct kseq *kseq, int stealidle) 772 { 773 struct kse *ke; 774 775 /* 776 * Steal from next first to try to get a non-interactive task that 777 * may not have run for a while. 778 */ 779 if ((ke = runq_steal(kseq->ksq_next)) != NULL) 780 return (ke); 781 if ((ke = runq_steal(kseq->ksq_curr)) != NULL) 782 return (ke); 783 if (stealidle) 784 return (runq_steal(&kseq->ksq_idle)); 785 return (NULL); 786 } 787 788 int 789 kseq_transfer(struct kseq *kseq, struct kse *ke, int class) 790 { 791 struct kseq_group *ksg; 792 int cpu; 793 794 if (smp_started == 0) 795 return (0); 796 cpu = 0; 797 /* 798 * If our load exceeds a certain threshold we should attempt to 799 * reassign this thread. The first candidate is the cpu that 800 * originally ran the thread. If it is idle, assign it there, 801 * otherwise, pick an idle cpu. 802 * 803 * The threshold at which we start to reassign kses has a large impact 804 * on the overall performance of the system. Tuned too high and 805 * some CPUs may idle. Too low and there will be excess migration 806 * and context switches. 807 */ 808 ksg = kseq->ksq_group; 809 if (ksg->ksg_load > ksg->ksg_cpus && kseq_idle) { 810 ksg = KSEQ_CPU(ke->ke_cpu)->ksq_group; 811 if (kseq_idle & ksg->ksg_mask) { 812 cpu = ffs(ksg->ksg_idlemask); 813 if (cpu) 814 goto migrate; 815 } 816 /* 817 * Multiple cpus could find this bit simultaneously 818 * but the race shouldn't be terrible. 819 */ 820 cpu = ffs(kseq_idle); 821 if (cpu) 822 goto migrate; 823 } 824 /* 825 * If another cpu in this group has idled, assign a thread over 826 * to them after checking to see if there are idled groups. 827 */ 828 ksg = kseq->ksq_group; 829 if (ksg->ksg_idlemask) { 830 cpu = ffs(ksg->ksg_idlemask); 831 if (cpu) 832 goto migrate; 833 } 834 /* 835 * No new CPU was found. 836 */ 837 return (0); 838 migrate: 839 /* 840 * Now that we've found an idle CPU, migrate the thread. 841 */ 842 cpu--; 843 ke->ke_runq = NULL; 844 kseq_notify(ke, cpu); 845 846 return (1); 847 } 848 849 #endif /* SMP */ 850 851 /* 852 * Pick the highest priority task we have and return it. 853 */ 854 855 static struct kse * 856 kseq_choose(struct kseq *kseq) 857 { 858 struct runq *swap; 859 struct kse *ke; 860 int nice; 861 862 mtx_assert(&sched_lock, MA_OWNED); 863 swap = NULL; 864 865 for (;;) { 866 ke = runq_choose(kseq->ksq_curr); 867 if (ke == NULL) { 868 /* 869 * We already swapped once and didn't get anywhere. 870 */ 871 if (swap) 872 break; 873 swap = kseq->ksq_curr; 874 kseq->ksq_curr = kseq->ksq_next; 875 kseq->ksq_next = swap; 876 continue; 877 } 878 /* 879 * If we encounter a slice of 0 the kse is in a 880 * TIMESHARE kse group and its nice was too far out 881 * of the range that receives slices. 882 */ 883 nice = ke->ke_proc->p_nice + (0 - kseq->ksq_nicemin); 884 if (ke->ke_slice == 0 || (nice > SCHED_SLICE_NTHRESH && 885 ke->ke_proc->p_nice != 0)) { 886 runq_remove(ke->ke_runq, ke); 887 sched_slice(ke); 888 ke->ke_runq = kseq->ksq_next; 889 runq_add(ke->ke_runq, ke, 0); 890 continue; 891 } 892 return (ke); 893 } 894 895 return (runq_choose(&kseq->ksq_idle)); 896 } 897 898 static void 899 kseq_setup(struct kseq *kseq) 900 { 901 runq_init(&kseq->ksq_timeshare[0]); 902 runq_init(&kseq->ksq_timeshare[1]); 903 runq_init(&kseq->ksq_idle); 904 kseq->ksq_curr = &kseq->ksq_timeshare[0]; 905 kseq->ksq_next = &kseq->ksq_timeshare[1]; 906 kseq->ksq_load = 0; 907 kseq->ksq_load_timeshare = 0; 908 } 909 910 static void 911 sched_setup(void *dummy) 912 { 913 #ifdef SMP 914 int balance_groups; 915 int i; 916 #endif 917 918 slice_min = (hz/100); /* 10ms */ 919 slice_max = (hz/7); /* ~140ms */ 920 921 #ifdef SMP 922 balance_groups = 0; 923 /* 924 * Initialize the kseqs. 925 */ 926 for (i = 0; i < MAXCPU; i++) { 927 struct kseq *ksq; 928 929 ksq = &kseq_cpu[i]; 930 ksq->ksq_assigned = NULL; 931 kseq_setup(&kseq_cpu[i]); 932 } 933 if (smp_topology == NULL) { 934 struct kseq_group *ksg; 935 struct kseq *ksq; 936 937 for (i = 0; i < MAXCPU; i++) { 938 ksq = &kseq_cpu[i]; 939 ksg = &kseq_groups[i]; 940 /* 941 * Setup a kseq group with one member. 942 */ 943 ksq->ksq_transferable = 0; 944 ksq->ksq_group = ksg; 945 ksg->ksg_cpus = 1; 946 ksg->ksg_idlemask = 0; 947 ksg->ksg_cpumask = ksg->ksg_mask = 1 << i; 948 ksg->ksg_load = 0; 949 ksg->ksg_transferable = 0; 950 LIST_INIT(&ksg->ksg_members); 951 LIST_INSERT_HEAD(&ksg->ksg_members, ksq, ksq_siblings); 952 } 953 } else { 954 struct kseq_group *ksg; 955 struct cpu_group *cg; 956 int j; 957 958 for (i = 0; i < smp_topology->ct_count; i++) { 959 cg = &smp_topology->ct_group[i]; 960 ksg = &kseq_groups[i]; 961 /* 962 * Initialize the group. 963 */ 964 ksg->ksg_idlemask = 0; 965 ksg->ksg_load = 0; 966 ksg->ksg_transferable = 0; 967 ksg->ksg_cpus = cg->cg_count; 968 ksg->ksg_cpumask = cg->cg_mask; 969 LIST_INIT(&ksg->ksg_members); 970 /* 971 * Find all of the group members and add them. 972 */ 973 for (j = 0; j < MAXCPU; j++) { 974 if ((cg->cg_mask & (1 << j)) != 0) { 975 if (ksg->ksg_mask == 0) 976 ksg->ksg_mask = 1 << j; 977 kseq_cpu[j].ksq_transferable = 0; 978 kseq_cpu[j].ksq_group = ksg; 979 LIST_INSERT_HEAD(&ksg->ksg_members, 980 &kseq_cpu[j], ksq_siblings); 981 } 982 } 983 if (ksg->ksg_cpus > 1) 984 balance_groups = 1; 985 } 986 ksg_maxid = smp_topology->ct_count - 1; 987 } 988 /* 989 * Stagger the group and global load balancer so they do not 990 * interfere with each other. 991 */ 992 bal_tick = ticks + hz; 993 if (balance_groups) 994 gbal_tick = ticks + (hz / 2); 995 #else 996 kseq_setup(KSEQ_SELF()); 997 #endif 998 mtx_lock_spin(&sched_lock); 999 kseq_load_add(KSEQ_SELF(), &kse0); 1000 mtx_unlock_spin(&sched_lock); 1001 } 1002 1003 /* 1004 * Scale the scheduling priority according to the "interactivity" of this 1005 * process. 1006 */ 1007 static void 1008 sched_priority(struct ksegrp *kg) 1009 { 1010 int pri; 1011 1012 if (kg->kg_pri_class != PRI_TIMESHARE) 1013 return; 1014 1015 pri = SCHED_PRI_INTERACT(sched_interact_score(kg)); 1016 pri += SCHED_PRI_BASE; 1017 pri += kg->kg_proc->p_nice; 1018 1019 if (pri > PRI_MAX_TIMESHARE) 1020 pri = PRI_MAX_TIMESHARE; 1021 else if (pri < PRI_MIN_TIMESHARE) 1022 pri = PRI_MIN_TIMESHARE; 1023 1024 kg->kg_user_pri = pri; 1025 1026 return; 1027 } 1028 1029 /* 1030 * Calculate a time slice based on the properties of the kseg and the runq 1031 * that we're on. This is only for PRI_TIMESHARE ksegrps. 1032 */ 1033 static void 1034 sched_slice(struct kse *ke) 1035 { 1036 struct kseq *kseq; 1037 struct ksegrp *kg; 1038 1039 kg = ke->ke_ksegrp; 1040 kseq = KSEQ_CPU(ke->ke_cpu); 1041 1042 if (ke->ke_flags & KEF_PRIOELEV) { 1043 ke->ke_slice = SCHED_SLICE_MIN; 1044 return; 1045 } 1046 1047 /* 1048 * Rationale: 1049 * KSEs in interactive ksegs get a minimal slice so that we 1050 * quickly notice if it abuses its advantage. 1051 * 1052 * KSEs in non-interactive ksegs are assigned a slice that is 1053 * based on the ksegs nice value relative to the least nice kseg 1054 * on the run queue for this cpu. 1055 * 1056 * If the KSE is less nice than all others it gets the maximum 1057 * slice and other KSEs will adjust their slice relative to 1058 * this when they first expire. 1059 * 1060 * There is 20 point window that starts relative to the least 1061 * nice kse on the run queue. Slice size is determined by 1062 * the kse distance from the last nice ksegrp. 1063 * 1064 * If the kse is outside of the window it will get no slice 1065 * and will be reevaluated each time it is selected on the 1066 * run queue. The exception to this is nice 0 ksegs when 1067 * a nice -20 is running. They are always granted a minimum 1068 * slice. 1069 */ 1070 if (!SCHED_INTERACTIVE(kg)) { 1071 int nice; 1072 1073 nice = kg->kg_proc->p_nice + (0 - kseq->ksq_nicemin); 1074 if (kseq->ksq_load_timeshare == 0 || 1075 kg->kg_proc->p_nice < kseq->ksq_nicemin) 1076 ke->ke_slice = SCHED_SLICE_MAX; 1077 else if (nice <= SCHED_SLICE_NTHRESH) 1078 ke->ke_slice = SCHED_SLICE_NICE(nice); 1079 else if (kg->kg_proc->p_nice == 0) 1080 ke->ke_slice = SCHED_SLICE_MIN; 1081 else 1082 ke->ke_slice = 0; 1083 } else 1084 ke->ke_slice = SCHED_SLICE_INTERACTIVE; 1085 1086 CTR6(KTR_ULE, 1087 "Sliced %p(%d) (nice: %d, nicemin: %d, load: %d, interactive: %d)", 1088 ke, ke->ke_slice, kg->kg_proc->p_nice, kseq->ksq_nicemin, 1089 kseq->ksq_load_timeshare, SCHED_INTERACTIVE(kg)); 1090 1091 return; 1092 } 1093 1094 /* 1095 * This routine enforces a maximum limit on the amount of scheduling history 1096 * kept. It is called after either the slptime or runtime is adjusted. 1097 * This routine will not operate correctly when slp or run times have been 1098 * adjusted to more than double their maximum. 1099 */ 1100 static void 1101 sched_interact_update(struct ksegrp *kg) 1102 { 1103 int sum; 1104 1105 sum = kg->kg_runtime + kg->kg_slptime; 1106 if (sum < SCHED_SLP_RUN_MAX) 1107 return; 1108 /* 1109 * If we have exceeded by more than 1/5th then the algorithm below 1110 * will not bring us back into range. Dividing by two here forces 1111 * us into the range of [4/5 * SCHED_INTERACT_MAX, SCHED_INTERACT_MAX] 1112 */ 1113 if (sum > (SCHED_SLP_RUN_MAX / 5) * 6) { 1114 kg->kg_runtime /= 2; 1115 kg->kg_slptime /= 2; 1116 return; 1117 } 1118 kg->kg_runtime = (kg->kg_runtime / 5) * 4; 1119 kg->kg_slptime = (kg->kg_slptime / 5) * 4; 1120 } 1121 1122 static void 1123 sched_interact_fork(struct ksegrp *kg) 1124 { 1125 int ratio; 1126 int sum; 1127 1128 sum = kg->kg_runtime + kg->kg_slptime; 1129 if (sum > SCHED_SLP_RUN_FORK) { 1130 ratio = sum / SCHED_SLP_RUN_FORK; 1131 kg->kg_runtime /= ratio; 1132 kg->kg_slptime /= ratio; 1133 } 1134 } 1135 1136 static int 1137 sched_interact_score(struct ksegrp *kg) 1138 { 1139 int div; 1140 1141 if (kg->kg_runtime > kg->kg_slptime) { 1142 div = max(1, kg->kg_runtime / SCHED_INTERACT_HALF); 1143 return (SCHED_INTERACT_HALF + 1144 (SCHED_INTERACT_HALF - (kg->kg_slptime / div))); 1145 } if (kg->kg_slptime > kg->kg_runtime) { 1146 div = max(1, kg->kg_slptime / SCHED_INTERACT_HALF); 1147 return (kg->kg_runtime / div); 1148 } 1149 1150 /* 1151 * This can happen if slptime and runtime are 0. 1152 */ 1153 return (0); 1154 1155 } 1156 1157 /* 1158 * Very early in the boot some setup of scheduler-specific 1159 * parts of proc0 and of soem scheduler resources needs to be done. 1160 * Called from: 1161 * proc0_init() 1162 */ 1163 void 1164 schedinit(void) 1165 { 1166 /* 1167 * Set up the scheduler specific parts of proc0. 1168 */ 1169 proc0.p_sched = NULL; /* XXX */ 1170 ksegrp0.kg_sched = &kg_sched0; 1171 thread0.td_sched = &kse0; 1172 kse0.ke_thread = &thread0; 1173 kse0.ke_state = KES_THREAD; 1174 kg_sched0.skg_concurrency = 1; 1175 kg_sched0.skg_avail_opennings = 0; /* we are already running */ 1176 } 1177 1178 /* 1179 * This is only somewhat accurate since given many processes of the same 1180 * priority they will switch when their slices run out, which will be 1181 * at most SCHED_SLICE_MAX. 1182 */ 1183 int 1184 sched_rr_interval(void) 1185 { 1186 return (SCHED_SLICE_MAX); 1187 } 1188 1189 static void 1190 sched_pctcpu_update(struct kse *ke) 1191 { 1192 /* 1193 * Adjust counters and watermark for pctcpu calc. 1194 */ 1195 if (ke->ke_ltick > ticks - SCHED_CPU_TICKS) { 1196 /* 1197 * Shift the tick count out so that the divide doesn't 1198 * round away our results. 1199 */ 1200 ke->ke_ticks <<= 10; 1201 ke->ke_ticks = (ke->ke_ticks / (ticks - ke->ke_ftick)) * 1202 SCHED_CPU_TICKS; 1203 ke->ke_ticks >>= 10; 1204 } else 1205 ke->ke_ticks = 0; 1206 ke->ke_ltick = ticks; 1207 ke->ke_ftick = ke->ke_ltick - SCHED_CPU_TICKS; 1208 } 1209 1210 void 1211 sched_prio(struct thread *td, u_char prio) 1212 { 1213 struct kse *ke; 1214 1215 ke = td->td_kse; 1216 mtx_assert(&sched_lock, MA_OWNED); 1217 if (TD_ON_RUNQ(td)) { 1218 /* 1219 * If the priority has been elevated due to priority 1220 * propagation, we may have to move ourselves to a new 1221 * queue. We still call adjustrunqueue below in case kse 1222 * needs to fix things up. 1223 */ 1224 if (prio < td->td_priority && ke->ke_runq != NULL && 1225 (ke->ke_flags & KEF_ASSIGNED) == 0 && 1226 ke->ke_runq != KSEQ_CPU(ke->ke_cpu)->ksq_curr) { 1227 runq_remove(ke->ke_runq, ke); 1228 ke->ke_runq = KSEQ_CPU(ke->ke_cpu)->ksq_curr; 1229 runq_add(ke->ke_runq, ke, 0); 1230 } 1231 if (prio < td->td_priority) 1232 ke->ke_flags |= KEF_PRIOELEV; 1233 /* 1234 * Hold this kse on this cpu so that sched_prio() doesn't 1235 * cause excessive migration. We only want migration to 1236 * happen as the result of a wakeup. 1237 */ 1238 ke->ke_flags |= KEF_HOLD; 1239 adjustrunqueue(td, prio); 1240 } else 1241 td->td_priority = prio; 1242 } 1243 1244 void 1245 sched_switch(struct thread *td, struct thread *newtd, int flags) 1246 { 1247 struct kse *ke; 1248 1249 mtx_assert(&sched_lock, MA_OWNED); 1250 1251 ke = td->td_kse; 1252 1253 td->td_lastcpu = td->td_oncpu; 1254 td->td_oncpu = NOCPU; 1255 td->td_flags &= ~TDF_NEEDRESCHED; 1256 td->td_pflags &= ~TDP_OWEPREEMPT; 1257 1258 /* 1259 * If the KSE has been assigned it may be in the process of switching 1260 * to the new cpu. This is the case in sched_bind(). 1261 */ 1262 if ((ke->ke_flags & KEF_ASSIGNED) == 0) { 1263 if (td == PCPU_GET(idlethread)) { 1264 TD_SET_CAN_RUN(td); 1265 } else { 1266 /* We are ending our run so make our slot available again */ 1267 SLOT_RELEASE(td->td_ksegrp); 1268 if (TD_IS_RUNNING(td)) { 1269 kseq_load_rem(KSEQ_CPU(ke->ke_cpu), ke); 1270 /* 1271 * Don't allow the thread to migrate 1272 * from a preemption. 1273 */ 1274 ke->ke_flags |= KEF_HOLD; 1275 setrunqueue(td, SRQ_OURSELF|SRQ_YIELDING); 1276 } else { 1277 if (ke->ke_runq) { 1278 kseq_load_rem(KSEQ_CPU(ke->ke_cpu), ke); 1279 } else if ((td->td_flags & TDF_IDLETD) == 0) 1280 kdb_backtrace(); 1281 /* 1282 * We will not be on the run queue. 1283 * So we must be sleeping or similar. 1284 * Don't use the slot if we will need it 1285 * for newtd. 1286 */ 1287 if ((td->td_proc->p_flag & P_HADTHREADS) && 1288 (newtd == NULL || 1289 newtd->td_ksegrp != td->td_ksegrp)) 1290 slot_fill(td->td_ksegrp); 1291 } 1292 } 1293 } 1294 if (newtd != NULL) { 1295 /* 1296 * If we bring in a thread, 1297 * then account for it as if it had been added to the 1298 * run queue and then chosen. 1299 */ 1300 newtd->td_kse->ke_flags |= KEF_DIDRUN; 1301 SLOT_USE(newtd->td_ksegrp); 1302 TD_SET_RUNNING(newtd); 1303 kseq_load_add(KSEQ_SELF(), newtd->td_kse); 1304 } else 1305 newtd = choosethread(); 1306 if (td != newtd) 1307 cpu_switch(td, newtd); 1308 sched_lock.mtx_lock = (uintptr_t)td; 1309 1310 td->td_oncpu = PCPU_GET(cpuid); 1311 } 1312 1313 void 1314 sched_nice(struct proc *p, int nice) 1315 { 1316 struct ksegrp *kg; 1317 struct kse *ke; 1318 struct thread *td; 1319 struct kseq *kseq; 1320 1321 PROC_LOCK_ASSERT(p, MA_OWNED); 1322 mtx_assert(&sched_lock, MA_OWNED); 1323 /* 1324 * We need to adjust the nice counts for running KSEs. 1325 */ 1326 FOREACH_KSEGRP_IN_PROC(p, kg) { 1327 if (kg->kg_pri_class == PRI_TIMESHARE) { 1328 FOREACH_THREAD_IN_GROUP(kg, td) { 1329 ke = td->td_kse; 1330 if (ke->ke_runq == NULL) 1331 continue; 1332 kseq = KSEQ_CPU(ke->ke_cpu); 1333 kseq_nice_rem(kseq, p->p_nice); 1334 kseq_nice_add(kseq, nice); 1335 } 1336 } 1337 } 1338 p->p_nice = nice; 1339 FOREACH_KSEGRP_IN_PROC(p, kg) { 1340 sched_priority(kg); 1341 FOREACH_THREAD_IN_GROUP(kg, td) 1342 td->td_flags |= TDF_NEEDRESCHED; 1343 } 1344 } 1345 1346 void 1347 sched_sleep(struct thread *td) 1348 { 1349 mtx_assert(&sched_lock, MA_OWNED); 1350 1351 td->td_slptime = ticks; 1352 td->td_base_pri = td->td_priority; 1353 1354 CTR2(KTR_ULE, "sleep thread %p (tick: %d)", 1355 td, td->td_slptime); 1356 } 1357 1358 void 1359 sched_wakeup(struct thread *td) 1360 { 1361 mtx_assert(&sched_lock, MA_OWNED); 1362 1363 /* 1364 * Let the kseg know how long we slept for. This is because process 1365 * interactivity behavior is modeled in the kseg. 1366 */ 1367 if (td->td_slptime) { 1368 struct ksegrp *kg; 1369 int hzticks; 1370 1371 kg = td->td_ksegrp; 1372 hzticks = (ticks - td->td_slptime) << 10; 1373 if (hzticks >= SCHED_SLP_RUN_MAX) { 1374 kg->kg_slptime = SCHED_SLP_RUN_MAX; 1375 kg->kg_runtime = 1; 1376 } else { 1377 kg->kg_slptime += hzticks; 1378 sched_interact_update(kg); 1379 } 1380 sched_priority(kg); 1381 sched_slice(td->td_kse); 1382 CTR2(KTR_ULE, "wakeup thread %p (%d ticks)", td, hzticks); 1383 td->td_slptime = 0; 1384 } 1385 setrunqueue(td, SRQ_BORING); 1386 } 1387 1388 /* 1389 * Penalize the parent for creating a new child and initialize the child's 1390 * priority. 1391 */ 1392 void 1393 sched_fork(struct thread *td, struct thread *childtd) 1394 { 1395 1396 mtx_assert(&sched_lock, MA_OWNED); 1397 1398 sched_fork_ksegrp(td, childtd->td_ksegrp); 1399 sched_fork_thread(td, childtd); 1400 } 1401 1402 void 1403 sched_fork_ksegrp(struct thread *td, struct ksegrp *child) 1404 { 1405 struct ksegrp *kg = td->td_ksegrp; 1406 mtx_assert(&sched_lock, MA_OWNED); 1407 1408 child->kg_slptime = kg->kg_slptime; 1409 child->kg_runtime = kg->kg_runtime; 1410 child->kg_user_pri = kg->kg_user_pri; 1411 sched_interact_fork(child); 1412 kg->kg_runtime += tickincr << 10; 1413 sched_interact_update(kg); 1414 1415 CTR6(KTR_ULE, "sched_fork_ksegrp: %d(%d, %d) - %d(%d, %d)", 1416 kg->kg_proc->p_pid, kg->kg_slptime, kg->kg_runtime, 1417 child->kg_proc->p_pid, child->kg_slptime, child->kg_runtime); 1418 } 1419 1420 void 1421 sched_fork_thread(struct thread *td, struct thread *child) 1422 { 1423 struct kse *ke; 1424 struct kse *ke2; 1425 1426 sched_newthread(child); 1427 ke = td->td_kse; 1428 ke2 = child->td_kse; 1429 ke2->ke_slice = 1; /* Attempt to quickly learn interactivity. */ 1430 ke2->ke_cpu = ke->ke_cpu; 1431 ke2->ke_runq = NULL; 1432 1433 /* Grab our parents cpu estimation information. */ 1434 ke2->ke_ticks = ke->ke_ticks; 1435 ke2->ke_ltick = ke->ke_ltick; 1436 ke2->ke_ftick = ke->ke_ftick; 1437 } 1438 1439 void 1440 sched_class(struct ksegrp *kg, int class) 1441 { 1442 struct kseq *kseq; 1443 struct kse *ke; 1444 struct thread *td; 1445 int nclass; 1446 int oclass; 1447 1448 mtx_assert(&sched_lock, MA_OWNED); 1449 if (kg->kg_pri_class == class) 1450 return; 1451 1452 nclass = PRI_BASE(class); 1453 oclass = PRI_BASE(kg->kg_pri_class); 1454 FOREACH_THREAD_IN_GROUP(kg, td) { 1455 ke = td->td_kse; 1456 if (ke->ke_state != KES_ONRUNQ && 1457 ke->ke_state != KES_THREAD) 1458 continue; 1459 kseq = KSEQ_CPU(ke->ke_cpu); 1460 1461 #ifdef SMP 1462 /* 1463 * On SMP if we're on the RUNQ we must adjust the transferable 1464 * count because could be changing to or from an interrupt 1465 * class. 1466 */ 1467 if (ke->ke_state == KES_ONRUNQ) { 1468 if (KSE_CAN_MIGRATE(ke, oclass)) { 1469 kseq->ksq_transferable--; 1470 kseq->ksq_group->ksg_transferable--; 1471 } 1472 if (KSE_CAN_MIGRATE(ke, nclass)) { 1473 kseq->ksq_transferable++; 1474 kseq->ksq_group->ksg_transferable++; 1475 } 1476 } 1477 #endif 1478 if (oclass == PRI_TIMESHARE) { 1479 kseq->ksq_load_timeshare--; 1480 kseq_nice_rem(kseq, kg->kg_proc->p_nice); 1481 } 1482 if (nclass == PRI_TIMESHARE) { 1483 kseq->ksq_load_timeshare++; 1484 kseq_nice_add(kseq, kg->kg_proc->p_nice); 1485 } 1486 } 1487 1488 kg->kg_pri_class = class; 1489 } 1490 1491 /* 1492 * Return some of the child's priority and interactivity to the parent. 1493 * Avoid using sched_exit_thread to avoid having to decide which 1494 * thread in the parent gets the honour since it isn't used. 1495 */ 1496 void 1497 sched_exit(struct proc *p, struct thread *childtd) 1498 { 1499 mtx_assert(&sched_lock, MA_OWNED); 1500 sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), childtd); 1501 kseq_load_rem(KSEQ_CPU(childtd->td_kse->ke_cpu), childtd->td_kse); 1502 } 1503 1504 void 1505 sched_exit_ksegrp(struct ksegrp *kg, struct thread *td) 1506 { 1507 /* kg->kg_slptime += td->td_ksegrp->kg_slptime; */ 1508 kg->kg_runtime += td->td_ksegrp->kg_runtime; 1509 sched_interact_update(kg); 1510 } 1511 1512 void 1513 sched_exit_thread(struct thread *td, struct thread *childtd) 1514 { 1515 kseq_load_rem(KSEQ_CPU(childtd->td_kse->ke_cpu), childtd->td_kse); 1516 } 1517 1518 void 1519 sched_clock(struct thread *td) 1520 { 1521 struct kseq *kseq; 1522 struct ksegrp *kg; 1523 struct kse *ke; 1524 1525 mtx_assert(&sched_lock, MA_OWNED); 1526 kseq = KSEQ_SELF(); 1527 #ifdef SMP 1528 if (ticks == bal_tick) 1529 sched_balance(); 1530 if (ticks == gbal_tick) 1531 sched_balance_groups(); 1532 /* 1533 * We could have been assigned a non real-time thread without an 1534 * IPI. 1535 */ 1536 if (kseq->ksq_assigned) 1537 kseq_assign(kseq); /* Potentially sets NEEDRESCHED */ 1538 #endif 1539 /* 1540 * sched_setup() apparently happens prior to stathz being set. We 1541 * need to resolve the timers earlier in the boot so we can avoid 1542 * calculating this here. 1543 */ 1544 if (realstathz == 0) { 1545 realstathz = stathz ? stathz : hz; 1546 tickincr = hz / realstathz; 1547 /* 1548 * XXX This does not work for values of stathz that are much 1549 * larger than hz. 1550 */ 1551 if (tickincr == 0) 1552 tickincr = 1; 1553 } 1554 1555 ke = td->td_kse; 1556 kg = ke->ke_ksegrp; 1557 1558 /* Adjust ticks for pctcpu */ 1559 ke->ke_ticks++; 1560 ke->ke_ltick = ticks; 1561 1562 /* Go up to one second beyond our max and then trim back down */ 1563 if (ke->ke_ftick + SCHED_CPU_TICKS + hz < ke->ke_ltick) 1564 sched_pctcpu_update(ke); 1565 1566 if (td->td_flags & TDF_IDLETD) 1567 return; 1568 1569 CTR4(KTR_ULE, "Tick thread %p (slice: %d, slptime: %d, runtime: %d)", 1570 td, ke->ke_slice, kg->kg_slptime >> 10, kg->kg_runtime >> 10); 1571 /* 1572 * We only do slicing code for TIMESHARE ksegrps. 1573 */ 1574 if (kg->kg_pri_class != PRI_TIMESHARE) 1575 return; 1576 /* 1577 * We used a tick charge it to the ksegrp so that we can compute our 1578 * interactivity. 1579 */ 1580 kg->kg_runtime += tickincr << 10; 1581 sched_interact_update(kg); 1582 1583 /* 1584 * We used up one time slice. 1585 */ 1586 if (--ke->ke_slice > 0) 1587 return; 1588 /* 1589 * We're out of time, recompute priorities and requeue. 1590 */ 1591 kseq_load_rem(kseq, ke); 1592 sched_priority(kg); 1593 sched_slice(ke); 1594 if (SCHED_CURR(kg, ke)) 1595 ke->ke_runq = kseq->ksq_curr; 1596 else 1597 ke->ke_runq = kseq->ksq_next; 1598 kseq_load_add(kseq, ke); 1599 td->td_flags |= TDF_NEEDRESCHED; 1600 } 1601 1602 int 1603 sched_runnable(void) 1604 { 1605 struct kseq *kseq; 1606 int load; 1607 1608 load = 1; 1609 1610 kseq = KSEQ_SELF(); 1611 #ifdef SMP 1612 if (kseq->ksq_assigned) { 1613 mtx_lock_spin(&sched_lock); 1614 kseq_assign(kseq); 1615 mtx_unlock_spin(&sched_lock); 1616 } 1617 #endif 1618 if ((curthread->td_flags & TDF_IDLETD) != 0) { 1619 if (kseq->ksq_load > 0) 1620 goto out; 1621 } else 1622 if (kseq->ksq_load - 1 > 0) 1623 goto out; 1624 load = 0; 1625 out: 1626 return (load); 1627 } 1628 1629 void 1630 sched_userret(struct thread *td) 1631 { 1632 struct ksegrp *kg; 1633 struct kse *ke; 1634 1635 kg = td->td_ksegrp; 1636 ke = td->td_kse; 1637 1638 if (td->td_priority != kg->kg_user_pri || 1639 ke->ke_flags & KEF_PRIOELEV) { 1640 mtx_lock_spin(&sched_lock); 1641 td->td_priority = kg->kg_user_pri; 1642 if (ke->ke_flags & KEF_PRIOELEV) { 1643 ke->ke_flags &= ~KEF_PRIOELEV; 1644 sched_slice(ke); 1645 if (ke->ke_slice == 0) 1646 mi_switch(SW_INVOL, NULL); 1647 } 1648 mtx_unlock_spin(&sched_lock); 1649 } 1650 } 1651 1652 struct kse * 1653 sched_choose(void) 1654 { 1655 struct kseq *kseq; 1656 struct kse *ke; 1657 1658 mtx_assert(&sched_lock, MA_OWNED); 1659 kseq = KSEQ_SELF(); 1660 #ifdef SMP 1661 restart: 1662 if (kseq->ksq_assigned) 1663 kseq_assign(kseq); 1664 #endif 1665 ke = kseq_choose(kseq); 1666 if (ke) { 1667 #ifdef SMP 1668 if (ke->ke_ksegrp->kg_pri_class == PRI_IDLE) 1669 if (kseq_idled(kseq) == 0) 1670 goto restart; 1671 #endif 1672 kseq_runq_rem(kseq, ke); 1673 ke->ke_state = KES_THREAD; 1674 1675 if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) { 1676 CTR4(KTR_ULE, "Run thread %p from %p (slice: %d, pri: %d)", 1677 ke->ke_thread, ke->ke_runq, ke->ke_slice, 1678 ke->ke_thread->td_priority); 1679 } 1680 return (ke); 1681 } 1682 #ifdef SMP 1683 if (kseq_idled(kseq) == 0) 1684 goto restart; 1685 #endif 1686 return (NULL); 1687 } 1688 1689 void 1690 sched_add(struct thread *td, int flags) 1691 { 1692 1693 /* let jeff work out how to map the flags better */ 1694 /* I'm open to suggestions */ 1695 if (flags & SRQ_YIELDING) 1696 /* 1697 * Preempting during switching can be bad JUJU 1698 * especially for KSE processes 1699 */ 1700 sched_add_internal(td, 0); 1701 else 1702 sched_add_internal(td, 1); 1703 } 1704 1705 static void 1706 sched_add_internal(struct thread *td, int preemptive) 1707 { 1708 struct kseq *kseq; 1709 struct ksegrp *kg; 1710 struct kse *ke; 1711 #ifdef SMP 1712 int canmigrate; 1713 #endif 1714 int class; 1715 1716 mtx_assert(&sched_lock, MA_OWNED); 1717 ke = td->td_kse; 1718 kg = td->td_ksegrp; 1719 if (ke->ke_flags & KEF_ASSIGNED) { 1720 if (ke->ke_flags & KEF_REMOVED) { 1721 SLOT_USE(ke->ke_ksegrp); 1722 ke->ke_flags &= ~KEF_REMOVED; 1723 } 1724 return; 1725 } 1726 kseq = KSEQ_SELF(); 1727 KASSERT(ke->ke_state != KES_ONRUNQ, 1728 ("sched_add: kse %p (%s) already in run queue", ke, 1729 ke->ke_proc->p_comm)); 1730 KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 1731 ("sched_add: process swapped out")); 1732 KASSERT(ke->ke_runq == NULL, 1733 ("sched_add: KSE %p is still assigned to a run queue", ke)); 1734 1735 class = PRI_BASE(kg->kg_pri_class); 1736 switch (class) { 1737 case PRI_ITHD: 1738 case PRI_REALTIME: 1739 ke->ke_runq = kseq->ksq_curr; 1740 ke->ke_slice = SCHED_SLICE_MAX; 1741 ke->ke_cpu = PCPU_GET(cpuid); 1742 break; 1743 case PRI_TIMESHARE: 1744 if (SCHED_CURR(kg, ke)) 1745 ke->ke_runq = kseq->ksq_curr; 1746 else 1747 ke->ke_runq = kseq->ksq_next; 1748 break; 1749 case PRI_IDLE: 1750 /* 1751 * This is for priority prop. 1752 */ 1753 if (ke->ke_thread->td_priority < PRI_MIN_IDLE) 1754 ke->ke_runq = kseq->ksq_curr; 1755 else 1756 ke->ke_runq = &kseq->ksq_idle; 1757 ke->ke_slice = SCHED_SLICE_MIN; 1758 break; 1759 default: 1760 panic("Unknown pri class."); 1761 break; 1762 } 1763 #ifdef SMP 1764 /* 1765 * Don't migrate running threads here. Force the long term balancer 1766 * to do it. 1767 */ 1768 canmigrate = KSE_CAN_MIGRATE(ke, class); 1769 if (ke->ke_flags & KEF_HOLD) { 1770 ke->ke_flags &= ~KEF_HOLD; 1771 canmigrate = 0; 1772 } 1773 /* 1774 * If this thread is pinned or bound, notify the target cpu. 1775 */ 1776 if (!canmigrate && ke->ke_cpu != PCPU_GET(cpuid) ) { 1777 ke->ke_runq = NULL; 1778 kseq_notify(ke, ke->ke_cpu); 1779 return; 1780 } 1781 /* 1782 * If we had been idle, clear our bit in the group and potentially 1783 * the global bitmap. If not, see if we should transfer this thread. 1784 */ 1785 if ((class == PRI_TIMESHARE || class == PRI_REALTIME) && 1786 (kseq->ksq_group->ksg_idlemask & PCPU_GET(cpumask)) != 0) { 1787 /* 1788 * Check to see if our group is unidling, and if so, remove it 1789 * from the global idle mask. 1790 */ 1791 if (kseq->ksq_group->ksg_idlemask == 1792 kseq->ksq_group->ksg_cpumask) 1793 atomic_clear_int(&kseq_idle, kseq->ksq_group->ksg_mask); 1794 /* 1795 * Now remove ourselves from the group specific idle mask. 1796 */ 1797 kseq->ksq_group->ksg_idlemask &= ~PCPU_GET(cpumask); 1798 } else if (kseq->ksq_load > 1 && canmigrate) 1799 if (kseq_transfer(kseq, ke, class)) 1800 return; 1801 ke->ke_cpu = PCPU_GET(cpuid); 1802 #endif 1803 /* 1804 * XXX With preemption this is not necessary. 1805 */ 1806 if (td->td_priority < curthread->td_priority && 1807 ke->ke_runq == kseq->ksq_curr) 1808 curthread->td_flags |= TDF_NEEDRESCHED; 1809 if (preemptive && maybe_preempt(td)) 1810 return; 1811 SLOT_USE(td->td_ksegrp); 1812 ke->ke_state = KES_ONRUNQ; 1813 1814 kseq_runq_add(kseq, ke); 1815 kseq_load_add(kseq, ke); 1816 } 1817 1818 void 1819 sched_rem(struct thread *td) 1820 { 1821 struct kseq *kseq; 1822 struct kse *ke; 1823 1824 mtx_assert(&sched_lock, MA_OWNED); 1825 ke = td->td_kse; 1826 /* 1827 * It is safe to just return here because sched_rem() is only ever 1828 * used in places where we're immediately going to add the 1829 * kse back on again. In that case it'll be added with the correct 1830 * thread and priority when the caller drops the sched_lock. 1831 */ 1832 if (ke->ke_flags & KEF_ASSIGNED) { 1833 SLOT_RELEASE(td->td_ksegrp); 1834 ke->ke_flags |= KEF_REMOVED; 1835 return; 1836 } 1837 KASSERT((ke->ke_state == KES_ONRUNQ), 1838 ("sched_rem: KSE not on run queue")); 1839 1840 SLOT_RELEASE(td->td_ksegrp); 1841 ke->ke_state = KES_THREAD; 1842 kseq = KSEQ_CPU(ke->ke_cpu); 1843 kseq_runq_rem(kseq, ke); 1844 kseq_load_rem(kseq, ke); 1845 } 1846 1847 fixpt_t 1848 sched_pctcpu(struct thread *td) 1849 { 1850 fixpt_t pctcpu; 1851 struct kse *ke; 1852 1853 pctcpu = 0; 1854 ke = td->td_kse; 1855 if (ke == NULL) 1856 return (0); 1857 1858 mtx_lock_spin(&sched_lock); 1859 if (ke->ke_ticks) { 1860 int rtick; 1861 1862 /* 1863 * Don't update more frequently than twice a second. Allowing 1864 * this causes the cpu usage to decay away too quickly due to 1865 * rounding errors. 1866 */ 1867 if (ke->ke_ftick + SCHED_CPU_TICKS < ke->ke_ltick || 1868 ke->ke_ltick < (ticks - (hz / 2))) 1869 sched_pctcpu_update(ke); 1870 /* How many rtick per second ? */ 1871 rtick = min(ke->ke_ticks / SCHED_CPU_TIME, SCHED_CPU_TICKS); 1872 pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT; 1873 } 1874 1875 ke->ke_proc->p_swtime = ke->ke_ltick - ke->ke_ftick; 1876 mtx_unlock_spin(&sched_lock); 1877 1878 return (pctcpu); 1879 } 1880 1881 void 1882 sched_bind(struct thread *td, int cpu) 1883 { 1884 struct kse *ke; 1885 1886 mtx_assert(&sched_lock, MA_OWNED); 1887 ke = td->td_kse; 1888 ke->ke_flags |= KEF_BOUND; 1889 #ifdef SMP 1890 if (PCPU_GET(cpuid) == cpu) 1891 return; 1892 /* sched_rem without the runq_remove */ 1893 ke->ke_state = KES_THREAD; 1894 kseq_load_rem(KSEQ_CPU(ke->ke_cpu), ke); 1895 kseq_notify(ke, cpu); 1896 /* When we return from mi_switch we'll be on the correct cpu. */ 1897 mi_switch(SW_VOL, NULL); 1898 #endif 1899 } 1900 1901 void 1902 sched_unbind(struct thread *td) 1903 { 1904 mtx_assert(&sched_lock, MA_OWNED); 1905 td->td_kse->ke_flags &= ~KEF_BOUND; 1906 } 1907 1908 int 1909 sched_load(void) 1910 { 1911 #ifdef SMP 1912 int total; 1913 int i; 1914 1915 total = 0; 1916 for (i = 0; i <= ksg_maxid; i++) 1917 total += KSEQ_GROUP(i)->ksg_load; 1918 return (total); 1919 #else 1920 return (KSEQ_SELF()->ksq_sysload); 1921 #endif 1922 } 1923 1924 int 1925 sched_sizeof_ksegrp(void) 1926 { 1927 return (sizeof(struct ksegrp) + sizeof(struct kg_sched)); 1928 } 1929 1930 int 1931 sched_sizeof_proc(void) 1932 { 1933 return (sizeof(struct proc)); 1934 } 1935 1936 int 1937 sched_sizeof_thread(void) 1938 { 1939 return (sizeof(struct thread) + sizeof(struct td_sched)); 1940 } 1941 #define KERN_SWITCH_INCLUDE 1 1942 #include "kern/kern_switch.c" 1943