1 /*- 2 * Copyright (c) 2002-2007, Jeffrey Roberson <jeff@freebsd.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include "opt_hwpmc_hooks.h" 31 #include "opt_sched.h" 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/kdb.h> 36 #include <sys/kernel.h> 37 #include <sys/ktr.h> 38 #include <sys/lock.h> 39 #include <sys/mutex.h> 40 #include <sys/proc.h> 41 #include <sys/resource.h> 42 #include <sys/resourcevar.h> 43 #include <sys/sched.h> 44 #include <sys/smp.h> 45 #include <sys/sx.h> 46 #include <sys/sysctl.h> 47 #include <sys/sysproto.h> 48 #include <sys/turnstile.h> 49 #include <sys/umtx.h> 50 #include <sys/vmmeter.h> 51 #ifdef KTRACE 52 #include <sys/uio.h> 53 #include <sys/ktrace.h> 54 #endif 55 56 #ifdef HWPMC_HOOKS 57 #include <sys/pmckern.h> 58 #endif 59 60 #include <machine/cpu.h> 61 #include <machine/smp.h> 62 63 /* 64 * Thread scheduler specific section. 65 */ 66 struct td_sched { 67 TAILQ_ENTRY(td_sched) ts_procq; /* (j/z) Run queue. */ 68 int ts_flags; /* (j) TSF_* flags. */ 69 struct thread *ts_thread; /* (*) Active associated thread. */ 70 u_char ts_rqindex; /* (j) Run queue index. */ 71 enum { 72 TSS_THREAD, 73 TSS_ONRUNQ 74 } ts_state; /* (j) thread sched specific status. */ 75 int ts_slptime; 76 int ts_slice; 77 struct runq *ts_runq; 78 u_char ts_cpu; /* CPU that we have affinity for. */ 79 /* The following variables are only used for pctcpu calculation */ 80 int ts_ltick; /* Last tick that we were running on */ 81 int ts_ftick; /* First tick that we were running on */ 82 int ts_ticks; /* Tick count */ 83 84 /* originally from kg_sched */ 85 int skg_slptime; /* Number of ticks we vol. slept */ 86 int skg_runtime; /* Number of ticks we were running */ 87 }; 88 #define ts_assign ts_procq.tqe_next 89 /* flags kept in ts_flags */ 90 #define TSF_ASSIGNED 0x0001 /* Thread is being migrated. */ 91 #define TSF_BOUND 0x0002 /* Thread can not migrate. */ 92 #define TSF_XFERABLE 0x0004 /* Thread was added as transferable. */ 93 #define TSF_REMOVED 0x0008 /* Thread was removed while ASSIGNED */ 94 #define TSF_DIDRUN 0x2000 /* Thread actually ran. */ 95 96 static struct td_sched td_sched0; 97 98 /* 99 * Cpu percentage computation macros and defines. 100 * 101 * SCHED_TICK_SECS: Number of seconds to average the cpu usage across. 102 * SCHED_TICK_TARG: Number of hz ticks to average the cpu usage across. 103 * SCHED_TICK_MAX: Maximum number of ticks before scaling back. 104 * SCHED_TICK_SHIFT: Shift factor to avoid rounding away results. 105 * SCHED_TICK_HZ: Compute the number of hz ticks for a given ticks count. 106 * SCHED_TICK_TOTAL: Gives the amount of time we've been recording ticks. 107 */ 108 #define SCHED_TICK_SECS 10 109 #define SCHED_TICK_TARG (hz * SCHED_TICK_SECS) 110 #define SCHED_TICK_MAX (SCHED_TICK_TARG + hz) 111 #define SCHED_TICK_SHIFT 10 112 #define SCHED_TICK_HZ(ts) ((ts)->ts_ticks >> SCHED_TICK_SHIFT) 113 #define SCHED_TICK_TOTAL(ts) (max((ts)->ts_ltick - (ts)->ts_ftick, hz)) 114 115 /* 116 * These macros determine priorities for non-interactive threads. They are 117 * assigned a priority based on their recent cpu utilization as expressed 118 * by the ratio of ticks to the tick total. NHALF priorities at the start 119 * and end of the MIN to MAX timeshare range are only reachable with negative 120 * or positive nice respectively. 121 * 122 * PRI_RANGE: Priority range for utilization dependent priorities. 123 * PRI_NRESV: Number of nice values. 124 * PRI_TICKS: Compute a priority in PRI_RANGE from the ticks count and total. 125 * PRI_NICE: Determines the part of the priority inherited from nice. 126 */ 127 #define SCHED_PRI_NRESV (PRIO_MAX - PRIO_MIN) 128 #define SCHED_PRI_NHALF (SCHED_PRI_NRESV / 2) 129 #define SCHED_PRI_MIN (PRI_MIN_TIMESHARE + SCHED_PRI_NHALF) 130 #define SCHED_PRI_MAX (PRI_MAX_TIMESHARE - SCHED_PRI_NHALF) 131 #define SCHED_PRI_RANGE (SCHED_PRI_MAX - SCHED_PRI_MIN + 1) 132 #define SCHED_PRI_TICKS(ts) \ 133 (SCHED_TICK_HZ((ts)) / \ 134 (roundup(SCHED_TICK_TOTAL((ts)), SCHED_PRI_RANGE) / SCHED_PRI_RANGE)) 135 #define SCHED_PRI_NICE(nice) (nice) 136 137 /* 138 * These determine the interactivity of a process. Interactivity differs from 139 * cpu utilization in that it expresses the voluntary time slept vs time ran 140 * while cpu utilization includes all time not running. This more accurately 141 * models the intent of the thread. 142 * 143 * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate 144 * before throttling back. 145 * SLP_RUN_FORK: Maximum slp+run time to inherit at fork time. 146 * INTERACT_MAX: Maximum interactivity value. Smaller is better. 147 * INTERACT_THRESH: Threshhold for placement on the current runq. 148 */ 149 #define SCHED_SLP_RUN_MAX ((hz * 5) << SCHED_TICK_SHIFT) 150 #define SCHED_SLP_RUN_FORK ((hz / 2) << SCHED_TICK_SHIFT) 151 #define SCHED_INTERACT_MAX (100) 152 #define SCHED_INTERACT_HALF (SCHED_INTERACT_MAX / 2) 153 #define SCHED_INTERACT_THRESH (30) 154 155 /* 156 * tickincr: Converts a stathz tick into a hz domain scaled by 157 * the shift factor. Without the shift the error rate 158 * due to rounding would be unacceptably high. 159 * realstathz: stathz is sometimes 0 and run off of hz. 160 * sched_slice: Runtime of each thread before rescheduling. 161 */ 162 static int sched_interact = SCHED_INTERACT_THRESH; 163 static int realstathz; 164 static int tickincr; 165 static int sched_slice; 166 static int sched_rebalance = 1; 167 168 /* 169 * tdq - per processor runqs and statistics. 170 */ 171 struct tdq { 172 struct runq tdq_idle; /* Queue of IDLE threads. */ 173 struct runq tdq_timeshare; /* timeshare run queue. */ 174 struct runq tdq_realtime; /* real-time run queue. */ 175 int tdq_idx; /* Current insert index. */ 176 int tdq_ridx; /* Current removal index. */ 177 int tdq_load; /* Aggregate load. */ 178 #ifdef SMP 179 int tdq_transferable; 180 LIST_ENTRY(tdq) tdq_siblings; /* Next in tdq group. */ 181 struct tdq_group *tdq_group; /* Our processor group. */ 182 volatile struct td_sched *tdq_assigned; /* assigned by another CPU. */ 183 #else 184 int tdq_sysload; /* For loadavg, !ITHD load. */ 185 #endif 186 }; 187 188 #ifdef SMP 189 /* 190 * tdq groups are groups of processors which can cheaply share threads. When 191 * one processor in the group goes idle it will check the runqs of the other 192 * processors in its group prior to halting and waiting for an interrupt. 193 * These groups are suitable for SMT (Symetric Multi-Threading) and not NUMA. 194 * In a numa environment we'd want an idle bitmap per group and a two tiered 195 * load balancer. 196 */ 197 struct tdq_group { 198 int tdg_cpus; /* Count of CPUs in this tdq group. */ 199 cpumask_t tdg_cpumask; /* Mask of cpus in this group. */ 200 cpumask_t tdg_idlemask; /* Idle cpus in this group. */ 201 cpumask_t tdg_mask; /* Bit mask for first cpu. */ 202 int tdg_load; /* Total load of this group. */ 203 int tdg_transferable; /* Transferable load of this group. */ 204 LIST_HEAD(, tdq) tdg_members; /* Linked list of all members. */ 205 }; 206 #endif 207 208 /* 209 * One thread queue per processor. 210 */ 211 #ifdef SMP 212 static cpumask_t tdq_idle; 213 static int tdg_maxid; 214 static struct tdq tdq_cpu[MAXCPU]; 215 static struct tdq_group tdq_groups[MAXCPU]; 216 static int bal_tick; 217 static int gbal_tick; 218 static int balance_groups; 219 220 #define TDQ_SELF() (&tdq_cpu[PCPU_GET(cpuid)]) 221 #define TDQ_CPU(x) (&tdq_cpu[(x)]) 222 #define TDQ_ID(x) ((x) - tdq_cpu) 223 #define TDQ_GROUP(x) (&tdq_groups[(x)]) 224 #else /* !SMP */ 225 static struct tdq tdq_cpu; 226 227 #define TDQ_SELF() (&tdq_cpu) 228 #define TDQ_CPU(x) (&tdq_cpu) 229 #endif 230 231 static struct td_sched *sched_choose(void); /* XXX Should be thread * */ 232 static void sched_priority(struct thread *); 233 static void sched_thread_priority(struct thread *, u_char); 234 static int sched_interact_score(struct thread *); 235 static void sched_interact_update(struct thread *); 236 static void sched_interact_fork(struct thread *); 237 static void sched_pctcpu_update(struct td_sched *); 238 static inline void sched_pin_td(struct thread *td); 239 static inline void sched_unpin_td(struct thread *td); 240 241 /* Operations on per processor queues */ 242 static struct td_sched * tdq_choose(struct tdq *); 243 static void tdq_setup(struct tdq *); 244 static void tdq_load_add(struct tdq *, struct td_sched *); 245 static void tdq_load_rem(struct tdq *, struct td_sched *); 246 static __inline void tdq_runq_add(struct tdq *, struct td_sched *, int); 247 static __inline void tdq_runq_rem(struct tdq *, struct td_sched *); 248 void tdq_print(int cpu); 249 static void runq_print(struct runq *rq); 250 #ifdef SMP 251 static int tdq_transfer(struct tdq *, struct td_sched *, int); 252 static struct td_sched *runq_steal(struct runq *); 253 static void sched_balance(void); 254 static void sched_balance_groups(void); 255 static void sched_balance_group(struct tdq_group *); 256 static void sched_balance_pair(struct tdq *, struct tdq *); 257 static void sched_smp_tick(void); 258 static void tdq_move(struct tdq *, int); 259 static int tdq_idled(struct tdq *); 260 static void tdq_notify(struct td_sched *, int); 261 static void tdq_assign(struct tdq *); 262 static struct td_sched *tdq_steal(struct tdq *, int); 263 264 #define THREAD_CAN_MIGRATE(td) \ 265 ((td)->td_pinned == 0 && (td)->td_pri_class != PRI_ITHD) 266 #endif 267 268 static void sched_setup(void *dummy); 269 SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 270 271 static void sched_initticks(void *dummy); 272 SYSINIT(sched_initticks, SI_SUB_CLOCKS, SI_ORDER_THIRD, sched_initticks, NULL) 273 274 static inline void 275 sched_pin_td(struct thread *td) 276 { 277 td->td_pinned++; 278 } 279 280 static inline void 281 sched_unpin_td(struct thread *td) 282 { 283 td->td_pinned--; 284 } 285 286 static void 287 runq_print(struct runq *rq) 288 { 289 struct rqhead *rqh; 290 struct td_sched *ts; 291 int pri; 292 int j; 293 int i; 294 295 for (i = 0; i < RQB_LEN; i++) { 296 printf("\t\trunq bits %d 0x%zx\n", 297 i, rq->rq_status.rqb_bits[i]); 298 for (j = 0; j < RQB_BPW; j++) 299 if (rq->rq_status.rqb_bits[i] & (1ul << j)) { 300 pri = j + (i << RQB_L2BPW); 301 rqh = &rq->rq_queues[pri]; 302 TAILQ_FOREACH(ts, rqh, ts_procq) { 303 printf("\t\t\ttd %p(%s) priority %d rqindex %d pri %d\n", 304 ts->ts_thread, ts->ts_thread->td_proc->p_comm, ts->ts_thread->td_priority, ts->ts_rqindex, pri); 305 } 306 } 307 } 308 } 309 310 void 311 tdq_print(int cpu) 312 { 313 struct tdq *tdq; 314 315 tdq = TDQ_CPU(cpu); 316 317 printf("tdq:\n"); 318 printf("\tload: %d\n", tdq->tdq_load); 319 printf("\ttimeshare idx: %d\n", tdq->tdq_idx); 320 printf("\ttimeshare ridx: %d\n", tdq->tdq_ridx); 321 printf("\trealtime runq:\n"); 322 runq_print(&tdq->tdq_realtime); 323 printf("\ttimeshare runq:\n"); 324 runq_print(&tdq->tdq_timeshare); 325 printf("\tidle runq:\n"); 326 runq_print(&tdq->tdq_idle); 327 #ifdef SMP 328 printf("\tload transferable: %d\n", tdq->tdq_transferable); 329 #endif 330 } 331 332 static __inline void 333 tdq_runq_add(struct tdq *tdq, struct td_sched *ts, int flags) 334 { 335 #ifdef SMP 336 if (THREAD_CAN_MIGRATE(ts->ts_thread)) { 337 tdq->tdq_transferable++; 338 tdq->tdq_group->tdg_transferable++; 339 ts->ts_flags |= TSF_XFERABLE; 340 } 341 #endif 342 if (ts->ts_runq == &tdq->tdq_timeshare) { 343 int pri; 344 345 pri = ts->ts_thread->td_priority; 346 KASSERT(pri <= PRI_MAX_TIMESHARE && pri >= PRI_MIN_TIMESHARE, 347 ("Invalid priority %d on timeshare runq", pri)); 348 /* 349 * This queue contains only priorities between MIN and MAX 350 * realtime. Use the whole queue to represent these values. 351 */ 352 #define TS_RQ_PPQ (((PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE) + 1) / RQ_NQS) 353 if ((flags & SRQ_BORROWING) == 0) { 354 pri = (pri - PRI_MIN_TIMESHARE) / TS_RQ_PPQ; 355 pri = (pri + tdq->tdq_idx) % RQ_NQS; 356 /* 357 * This effectively shortens the queue by one so we 358 * can have a one slot difference between idx and 359 * ridx while we wait for threads to drain. 360 */ 361 if (tdq->tdq_ridx != tdq->tdq_idx && 362 pri == tdq->tdq_ridx) 363 pri = (pri - 1) % RQ_NQS; 364 } else 365 pri = tdq->tdq_ridx; 366 runq_add_pri(ts->ts_runq, ts, pri, flags); 367 } else 368 runq_add(ts->ts_runq, ts, flags); 369 } 370 371 static __inline void 372 tdq_runq_rem(struct tdq *tdq, struct td_sched *ts) 373 { 374 #ifdef SMP 375 if (ts->ts_flags & TSF_XFERABLE) { 376 tdq->tdq_transferable--; 377 tdq->tdq_group->tdg_transferable--; 378 ts->ts_flags &= ~TSF_XFERABLE; 379 } 380 #endif 381 if (ts->ts_runq == &tdq->tdq_timeshare) { 382 if (tdq->tdq_idx != tdq->tdq_ridx) 383 runq_remove_idx(ts->ts_runq, ts, &tdq->tdq_ridx); 384 else 385 runq_remove_idx(ts->ts_runq, ts, NULL); 386 /* 387 * For timeshare threads we update the priority here so 388 * the priority reflects the time we've been sleeping. 389 */ 390 ts->ts_ltick = ticks; 391 sched_pctcpu_update(ts); 392 sched_priority(ts->ts_thread); 393 } else 394 runq_remove(ts->ts_runq, ts); 395 } 396 397 static void 398 tdq_load_add(struct tdq *tdq, struct td_sched *ts) 399 { 400 int class; 401 mtx_assert(&sched_lock, MA_OWNED); 402 class = PRI_BASE(ts->ts_thread->td_pri_class); 403 tdq->tdq_load++; 404 CTR1(KTR_SCHED, "load: %d", tdq->tdq_load); 405 if (class != PRI_ITHD && (ts->ts_thread->td_proc->p_flag & P_NOLOAD) == 0) 406 #ifdef SMP 407 tdq->tdq_group->tdg_load++; 408 #else 409 tdq->tdq_sysload++; 410 #endif 411 } 412 413 static void 414 tdq_load_rem(struct tdq *tdq, struct td_sched *ts) 415 { 416 int class; 417 mtx_assert(&sched_lock, MA_OWNED); 418 class = PRI_BASE(ts->ts_thread->td_pri_class); 419 if (class != PRI_ITHD && (ts->ts_thread->td_proc->p_flag & P_NOLOAD) == 0) 420 #ifdef SMP 421 tdq->tdq_group->tdg_load--; 422 #else 423 tdq->tdq_sysload--; 424 #endif 425 tdq->tdq_load--; 426 CTR1(KTR_SCHED, "load: %d", tdq->tdq_load); 427 ts->ts_runq = NULL; 428 } 429 430 #ifdef SMP 431 static void 432 sched_smp_tick(void) 433 { 434 struct tdq *tdq; 435 436 tdq = TDQ_SELF(); 437 if (sched_rebalance) { 438 if (ticks >= bal_tick) 439 sched_balance(); 440 if (ticks >= gbal_tick && balance_groups) 441 sched_balance_groups(); 442 } 443 /* 444 * We could have been assigned a non real-time thread without an 445 * IPI. 446 */ 447 if (tdq->tdq_assigned) 448 tdq_assign(tdq); /* Potentially sets NEEDRESCHED */ 449 } 450 451 /* 452 * sched_balance is a simple CPU load balancing algorithm. It operates by 453 * finding the least loaded and most loaded cpu and equalizing their load 454 * by migrating some processes. 455 * 456 * Dealing only with two CPUs at a time has two advantages. Firstly, most 457 * installations will only have 2 cpus. Secondly, load balancing too much at 458 * once can have an unpleasant effect on the system. The scheduler rarely has 459 * enough information to make perfect decisions. So this algorithm chooses 460 * algorithm simplicity and more gradual effects on load in larger systems. 461 * 462 * It could be improved by considering the priorities and slices assigned to 463 * each task prior to balancing them. There are many pathological cases with 464 * any approach and so the semi random algorithm below may work as well as any. 465 * 466 */ 467 static void 468 sched_balance(void) 469 { 470 struct tdq_group *high; 471 struct tdq_group *low; 472 struct tdq_group *tdg; 473 int cnt; 474 int i; 475 476 bal_tick = ticks + (random() % (hz * 2)); 477 if (smp_started == 0) 478 return; 479 low = high = NULL; 480 i = random() % (tdg_maxid + 1); 481 for (cnt = 0; cnt <= tdg_maxid; cnt++) { 482 tdg = TDQ_GROUP(i); 483 /* 484 * Find the CPU with the highest load that has some 485 * threads to transfer. 486 */ 487 if ((high == NULL || tdg->tdg_load > high->tdg_load) 488 && tdg->tdg_transferable) 489 high = tdg; 490 if (low == NULL || tdg->tdg_load < low->tdg_load) 491 low = tdg; 492 if (++i > tdg_maxid) 493 i = 0; 494 } 495 if (low != NULL && high != NULL && high != low) 496 sched_balance_pair(LIST_FIRST(&high->tdg_members), 497 LIST_FIRST(&low->tdg_members)); 498 } 499 500 static void 501 sched_balance_groups(void) 502 { 503 int i; 504 505 gbal_tick = ticks + (random() % (hz * 2)); 506 mtx_assert(&sched_lock, MA_OWNED); 507 if (smp_started) 508 for (i = 0; i <= tdg_maxid; i++) 509 sched_balance_group(TDQ_GROUP(i)); 510 } 511 512 static void 513 sched_balance_group(struct tdq_group *tdg) 514 { 515 struct tdq *tdq; 516 struct tdq *high; 517 struct tdq *low; 518 int load; 519 520 if (tdg->tdg_transferable == 0) 521 return; 522 low = NULL; 523 high = NULL; 524 LIST_FOREACH(tdq, &tdg->tdg_members, tdq_siblings) { 525 load = tdq->tdq_load; 526 if (high == NULL || load > high->tdq_load) 527 high = tdq; 528 if (low == NULL || load < low->tdq_load) 529 low = tdq; 530 } 531 if (high != NULL && low != NULL && high != low) 532 sched_balance_pair(high, low); 533 } 534 535 static void 536 sched_balance_pair(struct tdq *high, struct tdq *low) 537 { 538 int transferable; 539 int high_load; 540 int low_load; 541 int move; 542 int diff; 543 int i; 544 545 /* 546 * If we're transfering within a group we have to use this specific 547 * tdq's transferable count, otherwise we can steal from other members 548 * of the group. 549 */ 550 if (high->tdq_group == low->tdq_group) { 551 transferable = high->tdq_transferable; 552 high_load = high->tdq_load; 553 low_load = low->tdq_load; 554 } else { 555 transferable = high->tdq_group->tdg_transferable; 556 high_load = high->tdq_group->tdg_load; 557 low_load = low->tdq_group->tdg_load; 558 } 559 if (transferable == 0) 560 return; 561 /* 562 * Determine what the imbalance is and then adjust that to how many 563 * threads we actually have to give up (transferable). 564 */ 565 diff = high_load - low_load; 566 move = diff / 2; 567 if (diff & 0x1) 568 move++; 569 move = min(move, transferable); 570 for (i = 0; i < move; i++) 571 tdq_move(high, TDQ_ID(low)); 572 return; 573 } 574 575 static void 576 tdq_move(struct tdq *from, int cpu) 577 { 578 struct tdq *tdq; 579 struct tdq *to; 580 struct td_sched *ts; 581 582 tdq = from; 583 to = TDQ_CPU(cpu); 584 ts = tdq_steal(tdq, 1); 585 if (ts == NULL) { 586 struct tdq_group *tdg; 587 588 tdg = tdq->tdq_group; 589 LIST_FOREACH(tdq, &tdg->tdg_members, tdq_siblings) { 590 if (tdq == from || tdq->tdq_transferable == 0) 591 continue; 592 ts = tdq_steal(tdq, 1); 593 break; 594 } 595 if (ts == NULL) 596 panic("tdq_move: No threads available with a " 597 "transferable count of %d\n", 598 tdg->tdg_transferable); 599 } 600 if (tdq == to) 601 return; 602 ts->ts_state = TSS_THREAD; 603 tdq_runq_rem(tdq, ts); 604 tdq_load_rem(tdq, ts); 605 tdq_notify(ts, cpu); 606 } 607 608 static int 609 tdq_idled(struct tdq *tdq) 610 { 611 struct tdq_group *tdg; 612 struct tdq *steal; 613 struct td_sched *ts; 614 615 tdg = tdq->tdq_group; 616 /* 617 * If we're in a cpu group, try and steal threads from another cpu in 618 * the group before idling. 619 */ 620 if (tdg->tdg_cpus > 1 && tdg->tdg_transferable) { 621 LIST_FOREACH(steal, &tdg->tdg_members, tdq_siblings) { 622 if (steal == tdq || steal->tdq_transferable == 0) 623 continue; 624 ts = tdq_steal(steal, 0); 625 if (ts == NULL) 626 continue; 627 ts->ts_state = TSS_THREAD; 628 tdq_runq_rem(steal, ts); 629 tdq_load_rem(steal, ts); 630 ts->ts_cpu = PCPU_GET(cpuid); 631 sched_pin_td(ts->ts_thread); 632 sched_add(ts->ts_thread, SRQ_YIELDING); 633 sched_unpin_td(ts->ts_thread); 634 return (0); 635 } 636 } 637 /* 638 * We only set the idled bit when all of the cpus in the group are 639 * idle. Otherwise we could get into a situation where a thread bounces 640 * back and forth between two idle cores on seperate physical CPUs. 641 */ 642 tdg->tdg_idlemask |= PCPU_GET(cpumask); 643 if (tdg->tdg_idlemask != tdg->tdg_cpumask) 644 return (1); 645 atomic_set_int(&tdq_idle, tdg->tdg_mask); 646 return (1); 647 } 648 649 static void 650 tdq_assign(struct tdq *tdq) 651 { 652 struct td_sched *nts; 653 struct td_sched *ts; 654 655 do { 656 *(volatile struct td_sched **)&ts = tdq->tdq_assigned; 657 } while(!atomic_cmpset_ptr((volatile uintptr_t *)&tdq->tdq_assigned, 658 (uintptr_t)ts, (uintptr_t)NULL)); 659 for (; ts != NULL; ts = nts) { 660 nts = ts->ts_assign; 661 tdq->tdq_group->tdg_load--; 662 tdq->tdq_load--; 663 ts->ts_flags &= ~TSF_ASSIGNED; 664 if (ts->ts_flags & TSF_REMOVED) { 665 ts->ts_flags &= ~TSF_REMOVED; 666 continue; 667 } 668 sched_pin_td(ts->ts_thread); 669 sched_add(ts->ts_thread, SRQ_YIELDING); 670 sched_unpin_td(ts->ts_thread); 671 } 672 } 673 674 static void 675 tdq_notify(struct td_sched *ts, int cpu) 676 { 677 struct tdq *tdq; 678 struct thread *td; 679 struct pcpu *pcpu; 680 int class; 681 int prio; 682 683 tdq = TDQ_CPU(cpu); 684 class = PRI_BASE(ts->ts_thread->td_pri_class); 685 if ((class != PRI_IDLE && class != PRI_ITHD) 686 && (tdq_idle & tdq->tdq_group->tdg_mask)) 687 atomic_clear_int(&tdq_idle, tdq->tdq_group->tdg_mask); 688 tdq->tdq_group->tdg_load++; 689 tdq->tdq_load++; 690 ts->ts_cpu = cpu; 691 ts->ts_flags |= TSF_ASSIGNED; 692 prio = ts->ts_thread->td_priority; 693 694 /* 695 * Place a thread on another cpu's queue and force a resched. 696 */ 697 do { 698 *(volatile struct td_sched **)&ts->ts_assign = tdq->tdq_assigned; 699 } while(!atomic_cmpset_ptr((volatile uintptr_t *)&tdq->tdq_assigned, 700 (uintptr_t)ts->ts_assign, (uintptr_t)ts)); 701 /* Only ipi for realtime/ithd priorities */ 702 if (ts->ts_thread->td_priority > PRI_MIN_KERN) 703 return; 704 /* 705 * Without sched_lock we could lose a race where we set NEEDRESCHED 706 * on a thread that is switched out before the IPI is delivered. This 707 * would lead us to miss the resched. This will be a problem once 708 * sched_lock is pushed down. 709 */ 710 pcpu = pcpu_find(cpu); 711 td = pcpu->pc_curthread; 712 if (ts->ts_thread->td_priority < td->td_priority) { 713 td->td_flags |= TDF_NEEDRESCHED; 714 ipi_selected(1 << cpu, IPI_AST); 715 } 716 } 717 718 static struct td_sched * 719 runq_steal(struct runq *rq) 720 { 721 struct rqhead *rqh; 722 struct rqbits *rqb; 723 struct td_sched *ts; 724 int word; 725 int bit; 726 727 mtx_assert(&sched_lock, MA_OWNED); 728 rqb = &rq->rq_status; 729 for (word = 0; word < RQB_LEN; word++) { 730 if (rqb->rqb_bits[word] == 0) 731 continue; 732 for (bit = 0; bit < RQB_BPW; bit++) { 733 if ((rqb->rqb_bits[word] & (1ul << bit)) == 0) 734 continue; 735 rqh = &rq->rq_queues[bit + (word << RQB_L2BPW)]; 736 TAILQ_FOREACH(ts, rqh, ts_procq) { 737 if (THREAD_CAN_MIGRATE(ts->ts_thread)) 738 return (ts); 739 } 740 } 741 } 742 return (NULL); 743 } 744 745 static struct td_sched * 746 tdq_steal(struct tdq *tdq, int stealidle) 747 { 748 struct td_sched *ts; 749 750 /* 751 * Steal from next first to try to get a non-interactive task that 752 * may not have run for a while. 753 * XXX Need to effect steal order for timeshare threads. 754 */ 755 if ((ts = runq_steal(&tdq->tdq_realtime)) != NULL) 756 return (ts); 757 if ((ts = runq_steal(&tdq->tdq_timeshare)) != NULL) 758 return (ts); 759 if (stealidle) 760 return (runq_steal(&tdq->tdq_idle)); 761 return (NULL); 762 } 763 764 int 765 tdq_transfer(struct tdq *tdq, struct td_sched *ts, int class) 766 { 767 struct tdq_group *ntdg; 768 struct tdq_group *tdg; 769 struct tdq *old; 770 int cpu; 771 int idx; 772 773 if (smp_started == 0) 774 return (0); 775 cpu = 0; 776 /* 777 * If our load exceeds a certain threshold we should attempt to 778 * reassign this thread. The first candidate is the cpu that 779 * originally ran the thread. If it is idle, assign it there, 780 * otherwise, pick an idle cpu. 781 * 782 * The threshold at which we start to reassign has a large impact 783 * on the overall performance of the system. Tuned too high and 784 * some CPUs may idle. Too low and there will be excess migration 785 * and context switches. 786 */ 787 old = TDQ_CPU(ts->ts_cpu); 788 ntdg = old->tdq_group; 789 tdg = tdq->tdq_group; 790 if (tdq_idle) { 791 if (tdq_idle & ntdg->tdg_mask) { 792 cpu = ffs(ntdg->tdg_idlemask); 793 if (cpu) { 794 CTR2(KTR_SCHED, 795 "tdq_transfer: %p found old cpu %X " 796 "in idlemask.", ts, cpu); 797 goto migrate; 798 } 799 } 800 /* 801 * Multiple cpus could find this bit simultaneously 802 * but the race shouldn't be terrible. 803 */ 804 cpu = ffs(tdq_idle); 805 if (cpu) { 806 CTR2(KTR_SCHED, "tdq_transfer: %p found %X " 807 "in idlemask.", ts, cpu); 808 goto migrate; 809 } 810 } 811 idx = 0; 812 #if 0 813 if (old->tdq_load < tdq->tdq_load) { 814 cpu = ts->ts_cpu + 1; 815 CTR2(KTR_SCHED, "tdq_transfer: %p old cpu %X " 816 "load less than ours.", ts, cpu); 817 goto migrate; 818 } 819 /* 820 * No new CPU was found, look for one with less load. 821 */ 822 for (idx = 0; idx <= tdg_maxid; idx++) { 823 ntdg = TDQ_GROUP(idx); 824 if (ntdg->tdg_load /*+ (ntdg->tdg_cpus * 2)*/ < tdg->tdg_load) { 825 cpu = ffs(ntdg->tdg_cpumask); 826 CTR2(KTR_SCHED, "tdq_transfer: %p cpu %X load less " 827 "than ours.", ts, cpu); 828 goto migrate; 829 } 830 } 831 #endif 832 /* 833 * If another cpu in this group has idled, assign a thread over 834 * to them after checking to see if there are idled groups. 835 */ 836 if (tdg->tdg_idlemask) { 837 cpu = ffs(tdg->tdg_idlemask); 838 if (cpu) { 839 CTR2(KTR_SCHED, "tdq_transfer: %p cpu %X idle in " 840 "group.", ts, cpu); 841 goto migrate; 842 } 843 } 844 return (0); 845 migrate: 846 /* 847 * Now that we've found an idle CPU, migrate the thread. 848 */ 849 cpu--; 850 ts->ts_runq = NULL; 851 tdq_notify(ts, cpu); 852 853 return (1); 854 } 855 856 #endif /* SMP */ 857 858 /* 859 * Pick the highest priority task we have and return it. 860 */ 861 862 static struct td_sched * 863 tdq_choose(struct tdq *tdq) 864 { 865 struct td_sched *ts; 866 867 mtx_assert(&sched_lock, MA_OWNED); 868 869 ts = runq_choose(&tdq->tdq_realtime); 870 if (ts != NULL) { 871 KASSERT(ts->ts_thread->td_priority <= PRI_MAX_REALTIME, 872 ("tdq_choose: Invalid priority on realtime queue %d", 873 ts->ts_thread->td_priority)); 874 return (ts); 875 } 876 ts = runq_choose_from(&tdq->tdq_timeshare, tdq->tdq_ridx); 877 if (ts != NULL) { 878 KASSERT(ts->ts_thread->td_priority <= PRI_MAX_TIMESHARE && 879 ts->ts_thread->td_priority >= PRI_MIN_TIMESHARE, 880 ("tdq_choose: Invalid priority on timeshare queue %d", 881 ts->ts_thread->td_priority)); 882 return (ts); 883 } 884 885 ts = runq_choose(&tdq->tdq_idle); 886 if (ts != NULL) { 887 KASSERT(ts->ts_thread->td_priority >= PRI_MIN_IDLE, 888 ("tdq_choose: Invalid priority on idle queue %d", 889 ts->ts_thread->td_priority)); 890 return (ts); 891 } 892 893 return (NULL); 894 } 895 896 static void 897 tdq_setup(struct tdq *tdq) 898 { 899 runq_init(&tdq->tdq_realtime); 900 runq_init(&tdq->tdq_timeshare); 901 runq_init(&tdq->tdq_idle); 902 tdq->tdq_load = 0; 903 } 904 905 static void 906 sched_setup(void *dummy) 907 { 908 #ifdef SMP 909 int i; 910 #endif 911 912 /* 913 * To avoid divide-by-zero, we set realstathz a dummy value 914 * in case which sched_clock() called before sched_initticks(). 915 */ 916 realstathz = hz; 917 sched_slice = (realstathz/7); /* 140ms */ 918 tickincr = 1 << SCHED_TICK_SHIFT; 919 920 #ifdef SMP 921 balance_groups = 0; 922 /* 923 * Initialize the tdqs. 924 */ 925 for (i = 0; i < MAXCPU; i++) { 926 struct tdq *tdq; 927 928 tdq = &tdq_cpu[i]; 929 tdq->tdq_assigned = NULL; 930 tdq_setup(&tdq_cpu[i]); 931 } 932 if (smp_topology == NULL) { 933 struct tdq_group *tdg; 934 struct tdq *tdq; 935 int cpus; 936 937 for (cpus = 0, i = 0; i < MAXCPU; i++) { 938 if (CPU_ABSENT(i)) 939 continue; 940 tdq = &tdq_cpu[i]; 941 tdg = &tdq_groups[cpus]; 942 /* 943 * Setup a tdq group with one member. 944 */ 945 tdq->tdq_transferable = 0; 946 tdq->tdq_group = tdg; 947 tdg->tdg_cpus = 1; 948 tdg->tdg_idlemask = 0; 949 tdg->tdg_cpumask = tdg->tdg_mask = 1 << i; 950 tdg->tdg_load = 0; 951 tdg->tdg_transferable = 0; 952 LIST_INIT(&tdg->tdg_members); 953 LIST_INSERT_HEAD(&tdg->tdg_members, tdq, tdq_siblings); 954 cpus++; 955 } 956 tdg_maxid = cpus - 1; 957 } else { 958 struct tdq_group *tdg; 959 struct cpu_group *cg; 960 int j; 961 962 for (i = 0; i < smp_topology->ct_count; i++) { 963 cg = &smp_topology->ct_group[i]; 964 tdg = &tdq_groups[i]; 965 /* 966 * Initialize the group. 967 */ 968 tdg->tdg_idlemask = 0; 969 tdg->tdg_load = 0; 970 tdg->tdg_transferable = 0; 971 tdg->tdg_cpus = cg->cg_count; 972 tdg->tdg_cpumask = cg->cg_mask; 973 LIST_INIT(&tdg->tdg_members); 974 /* 975 * Find all of the group members and add them. 976 */ 977 for (j = 0; j < MAXCPU; j++) { 978 if ((cg->cg_mask & (1 << j)) != 0) { 979 if (tdg->tdg_mask == 0) 980 tdg->tdg_mask = 1 << j; 981 tdq_cpu[j].tdq_transferable = 0; 982 tdq_cpu[j].tdq_group = tdg; 983 LIST_INSERT_HEAD(&tdg->tdg_members, 984 &tdq_cpu[j], tdq_siblings); 985 } 986 } 987 if (tdg->tdg_cpus > 1) 988 balance_groups = 1; 989 } 990 tdg_maxid = smp_topology->ct_count - 1; 991 } 992 /* 993 * Stagger the group and global load balancer so they do not 994 * interfere with each other. 995 */ 996 bal_tick = ticks + hz; 997 if (balance_groups) 998 gbal_tick = ticks + (hz / 2); 999 #else 1000 tdq_setup(TDQ_SELF()); 1001 #endif 1002 mtx_lock_spin(&sched_lock); 1003 tdq_load_add(TDQ_SELF(), &td_sched0); 1004 mtx_unlock_spin(&sched_lock); 1005 } 1006 1007 /* ARGSUSED */ 1008 static void 1009 sched_initticks(void *dummy) 1010 { 1011 mtx_lock_spin(&sched_lock); 1012 realstathz = stathz ? stathz : hz; 1013 sched_slice = (realstathz/7); /* ~140ms */ 1014 1015 /* 1016 * tickincr is shifted out by 10 to avoid rounding errors due to 1017 * hz not being evenly divisible by stathz on all platforms. 1018 */ 1019 tickincr = (hz << SCHED_TICK_SHIFT) / realstathz; 1020 /* 1021 * This does not work for values of stathz that are more than 1022 * 1 << SCHED_TICK_SHIFT * hz. In practice this does not happen. 1023 */ 1024 if (tickincr == 0) 1025 tickincr = 1; 1026 mtx_unlock_spin(&sched_lock); 1027 } 1028 1029 1030 /* 1031 * Scale the scheduling priority according to the "interactivity" of this 1032 * process. 1033 */ 1034 static void 1035 sched_priority(struct thread *td) 1036 { 1037 int score; 1038 int pri; 1039 1040 if (td->td_pri_class != PRI_TIMESHARE) 1041 return; 1042 /* 1043 * If the score is interactive we place the thread in the realtime 1044 * queue with a priority that is less than kernel and interrupt 1045 * priorities. These threads are not subject to nice restrictions. 1046 * 1047 * Scores greater than this are placed on the normal realtime queue 1048 * where the priority is partially decided by the most recent cpu 1049 * utilization and the rest is decided by nice value. 1050 */ 1051 score = sched_interact_score(td); 1052 if (score < sched_interact) { 1053 pri = PRI_MIN_REALTIME; 1054 pri += ((PRI_MAX_REALTIME - PRI_MIN_REALTIME) / sched_interact) 1055 * score; 1056 KASSERT(pri >= PRI_MIN_REALTIME && pri <= PRI_MAX_REALTIME, 1057 ("sched_priority: invalid interactive priority %d", pri)); 1058 } else { 1059 pri = SCHED_PRI_MIN; 1060 if (td->td_sched->ts_ticks) 1061 pri += SCHED_PRI_TICKS(td->td_sched); 1062 pri += SCHED_PRI_NICE(td->td_proc->p_nice); 1063 if (!(pri >= PRI_MIN_TIMESHARE && pri <= PRI_MAX_TIMESHARE)) { 1064 static int once = 1; 1065 if (once) { 1066 printf("sched_priority: invalid priority %d", 1067 pri); 1068 printf("nice %d, ticks %d ftick %d ltick %d tick pri %d\n", 1069 td->td_proc->p_nice, 1070 td->td_sched->ts_ticks, 1071 td->td_sched->ts_ftick, 1072 td->td_sched->ts_ltick, 1073 SCHED_PRI_TICKS(td->td_sched)); 1074 once = 0; 1075 } 1076 pri = min(max(pri, PRI_MIN_TIMESHARE), 1077 PRI_MAX_TIMESHARE); 1078 } 1079 } 1080 sched_user_prio(td, pri); 1081 1082 return; 1083 } 1084 1085 /* 1086 * This routine enforces a maximum limit on the amount of scheduling history 1087 * kept. It is called after either the slptime or runtime is adjusted. 1088 */ 1089 static void 1090 sched_interact_update(struct thread *td) 1091 { 1092 struct td_sched *ts; 1093 int sum; 1094 1095 ts = td->td_sched; 1096 sum = ts->skg_runtime + ts->skg_slptime; 1097 if (sum < SCHED_SLP_RUN_MAX) 1098 return; 1099 /* 1100 * This only happens from two places: 1101 * 1) We have added an unusual amount of run time from fork_exit. 1102 * 2) We have added an unusual amount of sleep time from sched_sleep(). 1103 */ 1104 if (sum > SCHED_SLP_RUN_MAX * 2) { 1105 if (ts->skg_runtime > ts->skg_slptime) { 1106 ts->skg_runtime = SCHED_SLP_RUN_MAX; 1107 ts->skg_slptime = 1; 1108 } else { 1109 ts->skg_slptime = SCHED_SLP_RUN_MAX; 1110 ts->skg_runtime = 1; 1111 } 1112 return; 1113 } 1114 /* 1115 * If we have exceeded by more than 1/5th then the algorithm below 1116 * will not bring us back into range. Dividing by two here forces 1117 * us into the range of [4/5 * SCHED_INTERACT_MAX, SCHED_INTERACT_MAX] 1118 */ 1119 if (sum > (SCHED_SLP_RUN_MAX / 5) * 6) { 1120 ts->skg_runtime /= 2; 1121 ts->skg_slptime /= 2; 1122 return; 1123 } 1124 ts->skg_runtime = (ts->skg_runtime / 5) * 4; 1125 ts->skg_slptime = (ts->skg_slptime / 5) * 4; 1126 } 1127 1128 static void 1129 sched_interact_fork(struct thread *td) 1130 { 1131 int ratio; 1132 int sum; 1133 1134 sum = td->td_sched->skg_runtime + td->td_sched->skg_slptime; 1135 if (sum > SCHED_SLP_RUN_FORK) { 1136 ratio = sum / SCHED_SLP_RUN_FORK; 1137 td->td_sched->skg_runtime /= ratio; 1138 td->td_sched->skg_slptime /= ratio; 1139 } 1140 } 1141 1142 static int 1143 sched_interact_score(struct thread *td) 1144 { 1145 int div; 1146 1147 if (td->td_sched->skg_runtime > td->td_sched->skg_slptime) { 1148 div = max(1, td->td_sched->skg_runtime / SCHED_INTERACT_HALF); 1149 return (SCHED_INTERACT_HALF + 1150 (SCHED_INTERACT_HALF - (td->td_sched->skg_slptime / div))); 1151 } if (td->td_sched->skg_slptime > td->td_sched->skg_runtime) { 1152 div = max(1, td->td_sched->skg_slptime / SCHED_INTERACT_HALF); 1153 return (td->td_sched->skg_runtime / div); 1154 } 1155 1156 /* 1157 * This can happen if slptime and runtime are 0. 1158 */ 1159 return (0); 1160 1161 } 1162 1163 /* 1164 * Called from proc0_init() to bootstrap the scheduler. 1165 */ 1166 void 1167 schedinit(void) 1168 { 1169 1170 /* 1171 * Set up the scheduler specific parts of proc0. 1172 */ 1173 proc0.p_sched = NULL; /* XXX */ 1174 thread0.td_sched = &td_sched0; 1175 td_sched0.ts_ltick = ticks; 1176 td_sched0.ts_ftick = ticks; 1177 td_sched0.ts_thread = &thread0; 1178 td_sched0.ts_state = TSS_THREAD; 1179 } 1180 1181 /* 1182 * This is only somewhat accurate since given many processes of the same 1183 * priority they will switch when their slices run out, which will be 1184 * at most sched_slice stathz ticks. 1185 */ 1186 int 1187 sched_rr_interval(void) 1188 { 1189 1190 /* Convert sched_slice to hz */ 1191 return (hz/(realstathz/sched_slice)); 1192 } 1193 1194 static void 1195 sched_pctcpu_update(struct td_sched *ts) 1196 { 1197 1198 if (ts->ts_ticks == 0) 1199 return; 1200 if (ticks - (hz / 10) < ts->ts_ltick && 1201 SCHED_TICK_TOTAL(ts) < SCHED_TICK_MAX) 1202 return; 1203 /* 1204 * Adjust counters and watermark for pctcpu calc. 1205 */ 1206 if (ts->ts_ltick > ticks - SCHED_TICK_TARG) 1207 ts->ts_ticks = (ts->ts_ticks / (ticks - ts->ts_ftick)) * 1208 SCHED_TICK_TARG; 1209 else 1210 ts->ts_ticks = 0; 1211 ts->ts_ltick = ticks; 1212 ts->ts_ftick = ts->ts_ltick - SCHED_TICK_TARG; 1213 } 1214 1215 static void 1216 sched_thread_priority(struct thread *td, u_char prio) 1217 { 1218 struct td_sched *ts; 1219 1220 CTR6(KTR_SCHED, "sched_prio: %p(%s) prio %d newprio %d by %p(%s)", 1221 td, td->td_proc->p_comm, td->td_priority, prio, curthread, 1222 curthread->td_proc->p_comm); 1223 ts = td->td_sched; 1224 mtx_assert(&sched_lock, MA_OWNED); 1225 if (td->td_priority == prio) 1226 return; 1227 1228 if (TD_ON_RUNQ(td) && prio < td->td_priority) { 1229 /* 1230 * If the priority has been elevated due to priority 1231 * propagation, we may have to move ourselves to a new 1232 * queue. This could be optimized to not re-add in some 1233 * cases. 1234 * 1235 * Hold this td_sched on this cpu so that sched_prio() doesn't 1236 * cause excessive migration. We only want migration to 1237 * happen as the result of a wakeup. 1238 */ 1239 sched_pin_td(td); 1240 sched_rem(td); 1241 td->td_priority = prio; 1242 sched_add(td, SRQ_BORROWING); 1243 sched_unpin_td(td); 1244 } else 1245 td->td_priority = prio; 1246 } 1247 1248 /* 1249 * Update a thread's priority when it is lent another thread's 1250 * priority. 1251 */ 1252 void 1253 sched_lend_prio(struct thread *td, u_char prio) 1254 { 1255 1256 td->td_flags |= TDF_BORROWING; 1257 sched_thread_priority(td, prio); 1258 } 1259 1260 /* 1261 * Restore a thread's priority when priority propagation is 1262 * over. The prio argument is the minimum priority the thread 1263 * needs to have to satisfy other possible priority lending 1264 * requests. If the thread's regular priority is less 1265 * important than prio, the thread will keep a priority boost 1266 * of prio. 1267 */ 1268 void 1269 sched_unlend_prio(struct thread *td, u_char prio) 1270 { 1271 u_char base_pri; 1272 1273 if (td->td_base_pri >= PRI_MIN_TIMESHARE && 1274 td->td_base_pri <= PRI_MAX_TIMESHARE) 1275 base_pri = td->td_user_pri; 1276 else 1277 base_pri = td->td_base_pri; 1278 if (prio >= base_pri) { 1279 td->td_flags &= ~TDF_BORROWING; 1280 sched_thread_priority(td, base_pri); 1281 } else 1282 sched_lend_prio(td, prio); 1283 } 1284 1285 void 1286 sched_prio(struct thread *td, u_char prio) 1287 { 1288 u_char oldprio; 1289 1290 /* First, update the base priority. */ 1291 td->td_base_pri = prio; 1292 1293 /* 1294 * If the thread is borrowing another thread's priority, don't 1295 * ever lower the priority. 1296 */ 1297 if (td->td_flags & TDF_BORROWING && td->td_priority < prio) 1298 return; 1299 1300 /* Change the real priority. */ 1301 oldprio = td->td_priority; 1302 sched_thread_priority(td, prio); 1303 1304 /* 1305 * If the thread is on a turnstile, then let the turnstile update 1306 * its state. 1307 */ 1308 if (TD_ON_LOCK(td) && oldprio != prio) 1309 turnstile_adjust(td, oldprio); 1310 } 1311 1312 void 1313 sched_user_prio(struct thread *td, u_char prio) 1314 { 1315 u_char oldprio; 1316 1317 td->td_base_user_pri = prio; 1318 if (td->td_flags & TDF_UBORROWING && td->td_user_pri <= prio) 1319 return; 1320 oldprio = td->td_user_pri; 1321 td->td_user_pri = prio; 1322 1323 if (TD_ON_UPILOCK(td) && oldprio != prio) 1324 umtx_pi_adjust(td, oldprio); 1325 } 1326 1327 void 1328 sched_lend_user_prio(struct thread *td, u_char prio) 1329 { 1330 u_char oldprio; 1331 1332 td->td_flags |= TDF_UBORROWING; 1333 1334 oldprio = td->td_user_pri; 1335 td->td_user_pri = prio; 1336 1337 if (TD_ON_UPILOCK(td) && oldprio != prio) 1338 umtx_pi_adjust(td, oldprio); 1339 } 1340 1341 void 1342 sched_unlend_user_prio(struct thread *td, u_char prio) 1343 { 1344 u_char base_pri; 1345 1346 base_pri = td->td_base_user_pri; 1347 if (prio >= base_pri) { 1348 td->td_flags &= ~TDF_UBORROWING; 1349 sched_user_prio(td, base_pri); 1350 } else 1351 sched_lend_user_prio(td, prio); 1352 } 1353 1354 void 1355 sched_switch(struct thread *td, struct thread *newtd, int flags) 1356 { 1357 struct tdq *tdq; 1358 struct td_sched *ts; 1359 1360 mtx_assert(&sched_lock, MA_OWNED); 1361 1362 tdq = TDQ_SELF(); 1363 ts = td->td_sched; 1364 td->td_lastcpu = td->td_oncpu; 1365 td->td_oncpu = NOCPU; 1366 td->td_flags &= ~TDF_NEEDRESCHED; 1367 td->td_owepreempt = 0; 1368 /* 1369 * If the thread has been assigned it may be in the process of switching 1370 * to the new cpu. This is the case in sched_bind(). 1371 */ 1372 if (td == PCPU_GET(idlethread)) { 1373 TD_SET_CAN_RUN(td); 1374 } else if ((ts->ts_flags & TSF_ASSIGNED) == 0) { 1375 /* We are ending our run so make our slot available again */ 1376 tdq_load_rem(tdq, ts); 1377 if (TD_IS_RUNNING(td)) { 1378 /* 1379 * Don't allow the thread to migrate 1380 * from a preemption. 1381 */ 1382 sched_pin_td(td); 1383 setrunqueue(td, (flags & SW_PREEMPT) ? 1384 SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED : 1385 SRQ_OURSELF|SRQ_YIELDING); 1386 sched_unpin_td(td); 1387 } 1388 } 1389 if (newtd != NULL) { 1390 /* 1391 * If we bring in a thread account for it as if it had been 1392 * added to the run queue and then chosen. 1393 */ 1394 newtd->td_sched->ts_flags |= TSF_DIDRUN; 1395 TD_SET_RUNNING(newtd); 1396 tdq_load_add(TDQ_SELF(), newtd->td_sched); 1397 } else 1398 newtd = choosethread(); 1399 if (td != newtd) { 1400 #ifdef HWPMC_HOOKS 1401 if (PMC_PROC_IS_USING_PMCS(td->td_proc)) 1402 PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT); 1403 #endif 1404 1405 cpu_switch(td, newtd); 1406 #ifdef HWPMC_HOOKS 1407 if (PMC_PROC_IS_USING_PMCS(td->td_proc)) 1408 PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN); 1409 #endif 1410 } 1411 sched_lock.mtx_lock = (uintptr_t)td; 1412 td->td_oncpu = PCPU_GET(cpuid); 1413 } 1414 1415 void 1416 sched_nice(struct proc *p, int nice) 1417 { 1418 struct thread *td; 1419 1420 PROC_LOCK_ASSERT(p, MA_OWNED); 1421 mtx_assert(&sched_lock, MA_OWNED); 1422 1423 p->p_nice = nice; 1424 FOREACH_THREAD_IN_PROC(p, td) { 1425 sched_priority(td); 1426 sched_prio(td, td->td_base_user_pri); 1427 } 1428 } 1429 1430 void 1431 sched_sleep(struct thread *td) 1432 { 1433 1434 mtx_assert(&sched_lock, MA_OWNED); 1435 1436 td->td_sched->ts_slptime = ticks; 1437 } 1438 1439 void 1440 sched_wakeup(struct thread *td) 1441 { 1442 int slptime; 1443 1444 mtx_assert(&sched_lock, MA_OWNED); 1445 1446 /* 1447 * If we slept for more than a tick update our interactivity and 1448 * priority. 1449 */ 1450 slptime = td->td_sched->ts_slptime; 1451 td->td_sched->ts_slptime = 0; 1452 if (slptime && slptime != ticks) { 1453 int hzticks; 1454 1455 hzticks = (ticks - slptime) << SCHED_TICK_SHIFT; 1456 td->td_sched->skg_slptime += hzticks; 1457 sched_interact_update(td); 1458 sched_pctcpu_update(td->td_sched); 1459 sched_priority(td); 1460 } 1461 setrunqueue(td, SRQ_BORING); 1462 } 1463 1464 /* 1465 * Penalize the parent for creating a new child and initialize the child's 1466 * priority. 1467 */ 1468 void 1469 sched_fork(struct thread *td, struct thread *child) 1470 { 1471 mtx_assert(&sched_lock, MA_OWNED); 1472 sched_fork_thread(td, child); 1473 /* 1474 * Penalize the parent and child for forking. 1475 */ 1476 sched_interact_fork(child); 1477 sched_priority(child); 1478 td->td_sched->skg_runtime += tickincr; 1479 sched_interact_update(td); 1480 sched_priority(td); 1481 } 1482 1483 void 1484 sched_fork_thread(struct thread *td, struct thread *child) 1485 { 1486 struct td_sched *ts; 1487 struct td_sched *ts2; 1488 1489 /* 1490 * Initialize child. 1491 */ 1492 sched_newthread(child); 1493 ts = td->td_sched; 1494 ts2 = child->td_sched; 1495 ts2->ts_cpu = ts->ts_cpu; 1496 ts2->ts_runq = NULL; 1497 /* 1498 * Grab our parents cpu estimation information and priority. 1499 */ 1500 ts2->ts_ticks = ts->ts_ticks; 1501 ts2->ts_ltick = ts->ts_ltick; 1502 ts2->ts_ftick = ts->ts_ftick; 1503 child->td_user_pri = td->td_user_pri; 1504 child->td_base_user_pri = td->td_base_user_pri; 1505 /* 1506 * And update interactivity score. 1507 */ 1508 ts2->skg_slptime = ts->skg_slptime; 1509 ts2->skg_runtime = ts->skg_runtime; 1510 ts2->ts_slice = 1; /* Attempt to quickly learn interactivity. */ 1511 } 1512 1513 void 1514 sched_class(struct thread *td, int class) 1515 { 1516 1517 mtx_assert(&sched_lock, MA_OWNED); 1518 if (td->td_pri_class == class) 1519 return; 1520 1521 #ifdef SMP 1522 /* 1523 * On SMP if we're on the RUNQ we must adjust the transferable 1524 * count because could be changing to or from an interrupt 1525 * class. 1526 */ 1527 if (td->td_sched->ts_state == TSS_ONRUNQ) { 1528 struct tdq *tdq; 1529 1530 tdq = TDQ_CPU(td->td_sched->ts_cpu); 1531 if (THREAD_CAN_MIGRATE(td)) { 1532 tdq->tdq_transferable--; 1533 tdq->tdq_group->tdg_transferable--; 1534 } 1535 td->td_pri_class = class; 1536 if (THREAD_CAN_MIGRATE(td)) { 1537 tdq->tdq_transferable++; 1538 tdq->tdq_group->tdg_transferable++; 1539 } 1540 } 1541 #endif 1542 td->td_pri_class = class; 1543 } 1544 1545 /* 1546 * Return some of the child's priority and interactivity to the parent. 1547 */ 1548 void 1549 sched_exit(struct proc *p, struct thread *child) 1550 { 1551 struct thread *td; 1552 1553 CTR3(KTR_SCHED, "sched_exit: %p(%s) prio %d", 1554 child, child->td_proc->p_comm, child->td_priority); 1555 1556 td = FIRST_THREAD_IN_PROC(p); 1557 sched_exit_thread(td, child); 1558 } 1559 1560 void 1561 sched_exit_thread(struct thread *td, struct thread *child) 1562 { 1563 1564 CTR3(KTR_SCHED, "sched_exit_thread: %p(%s) prio %d", 1565 child, child->td_proc->p_comm, child->td_priority); 1566 1567 tdq_load_rem(TDQ_CPU(child->td_sched->ts_cpu), child->td_sched); 1568 #ifdef KSE 1569 /* 1570 * KSE forks and exits so often that this penalty causes short-lived 1571 * threads to always be non-interactive. This causes mozilla to 1572 * crawl under load. 1573 */ 1574 if ((td->td_pflags & TDP_SA) && td->td_proc == child->td_proc) 1575 return; 1576 #endif 1577 /* 1578 * Give the child's runtime to the parent without returning the 1579 * sleep time as a penalty to the parent. This causes shells that 1580 * launch expensive things to mark their children as expensive. 1581 */ 1582 td->td_sched->skg_runtime += child->td_sched->skg_runtime; 1583 sched_interact_update(td); 1584 sched_priority(td); 1585 } 1586 1587 void 1588 sched_userret(struct thread *td) 1589 { 1590 /* 1591 * XXX we cheat slightly on the locking here to avoid locking in 1592 * the usual case. Setting td_priority here is essentially an 1593 * incomplete workaround for not setting it properly elsewhere. 1594 * Now that some interrupt handlers are threads, not setting it 1595 * properly elsewhere can clobber it in the window between setting 1596 * it here and returning to user mode, so don't waste time setting 1597 * it perfectly here. 1598 */ 1599 KASSERT((td->td_flags & TDF_BORROWING) == 0, 1600 ("thread with borrowed priority returning to userland")); 1601 if (td->td_priority != td->td_user_pri) { 1602 mtx_lock_spin(&sched_lock); 1603 td->td_priority = td->td_user_pri; 1604 td->td_base_pri = td->td_user_pri; 1605 mtx_unlock_spin(&sched_lock); 1606 } 1607 } 1608 1609 void 1610 sched_clock(struct thread *td) 1611 { 1612 struct tdq *tdq; 1613 struct td_sched *ts; 1614 1615 mtx_assert(&sched_lock, MA_OWNED); 1616 #ifdef SMP 1617 sched_smp_tick(); 1618 #endif 1619 tdq = TDQ_SELF(); 1620 /* 1621 * Advance the insert index once for each tick to ensure that all 1622 * threads get a chance to run. 1623 */ 1624 if (tdq->tdq_idx == tdq->tdq_ridx) { 1625 tdq->tdq_idx = (tdq->tdq_idx + 1) % RQ_NQS; 1626 if (TAILQ_EMPTY(&tdq->tdq_timeshare.rq_queues[tdq->tdq_ridx])) 1627 tdq->tdq_ridx = tdq->tdq_idx; 1628 } 1629 /* Adjust ticks for pctcpu */ 1630 ts = td->td_sched; 1631 ts->ts_ticks += tickincr; 1632 ts->ts_ltick = ticks; 1633 /* 1634 * Update if we've exceeded our desired tick threshhold by over one 1635 * second. 1636 */ 1637 if (ts->ts_ftick + SCHED_TICK_MAX < ts->ts_ltick) 1638 sched_pctcpu_update(ts); 1639 /* 1640 * We only do slicing code for TIMESHARE threads. 1641 */ 1642 if (td->td_pri_class != PRI_TIMESHARE) 1643 return; 1644 /* 1645 * We used a tick; charge it to the thread so that we can compute our 1646 * interactivity. 1647 */ 1648 td->td_sched->skg_runtime += tickincr; 1649 sched_interact_update(td); 1650 /* 1651 * We used up one time slice. 1652 */ 1653 if (--ts->ts_slice > 0) 1654 return; 1655 /* 1656 * We're out of time, recompute priorities and requeue. 1657 */ 1658 sched_priority(td); 1659 tdq_load_rem(tdq, ts); 1660 ts->ts_slice = sched_slice; 1661 tdq_load_add(tdq, ts); 1662 td->td_flags |= TDF_NEEDRESCHED; 1663 } 1664 1665 int 1666 sched_runnable(void) 1667 { 1668 struct tdq *tdq; 1669 int load; 1670 1671 load = 1; 1672 1673 tdq = TDQ_SELF(); 1674 #ifdef SMP 1675 if (tdq->tdq_assigned) { 1676 mtx_lock_spin(&sched_lock); 1677 tdq_assign(tdq); 1678 mtx_unlock_spin(&sched_lock); 1679 } 1680 #endif 1681 if ((curthread->td_flags & TDF_IDLETD) != 0) { 1682 if (tdq->tdq_load > 0) 1683 goto out; 1684 } else 1685 if (tdq->tdq_load - 1 > 0) 1686 goto out; 1687 load = 0; 1688 out: 1689 return (load); 1690 } 1691 1692 struct td_sched * 1693 sched_choose(void) 1694 { 1695 struct tdq *tdq; 1696 struct td_sched *ts; 1697 1698 mtx_assert(&sched_lock, MA_OWNED); 1699 tdq = TDQ_SELF(); 1700 #ifdef SMP 1701 restart: 1702 if (tdq->tdq_assigned) 1703 tdq_assign(tdq); 1704 #endif 1705 ts = tdq_choose(tdq); 1706 if (ts) { 1707 #ifdef SMP 1708 if (ts->ts_thread->td_priority > PRI_MIN_IDLE) 1709 if (tdq_idled(tdq) == 0) 1710 goto restart; 1711 #endif 1712 tdq_runq_rem(tdq, ts); 1713 ts->ts_state = TSS_THREAD; 1714 return (ts); 1715 } 1716 #ifdef SMP 1717 if (tdq_idled(tdq) == 0) 1718 goto restart; 1719 #endif 1720 return (NULL); 1721 } 1722 1723 void 1724 sched_add(struct thread *td, int flags) 1725 { 1726 struct tdq *tdq; 1727 struct td_sched *ts; 1728 int preemptive; 1729 int canmigrate; 1730 int class; 1731 1732 CTR5(KTR_SCHED, "sched_add: %p(%s) prio %d by %p(%s)", 1733 td, td->td_proc->p_comm, td->td_priority, curthread, 1734 curthread->td_proc->p_comm); 1735 mtx_assert(&sched_lock, MA_OWNED); 1736 tdq = TDQ_SELF(); 1737 ts = td->td_sched; 1738 class = PRI_BASE(td->td_pri_class); 1739 preemptive = !(flags & SRQ_YIELDING); 1740 canmigrate = 1; 1741 #ifdef SMP 1742 if (ts->ts_flags & TSF_ASSIGNED) { 1743 if (ts->ts_flags & TSF_REMOVED) 1744 ts->ts_flags &= ~TSF_REMOVED; 1745 return; 1746 } 1747 canmigrate = THREAD_CAN_MIGRATE(td); 1748 #endif 1749 KASSERT(ts->ts_state != TSS_ONRUNQ, 1750 ("sched_add: thread %p (%s) already in run queue", td, 1751 td->td_proc->p_comm)); 1752 KASSERT(td->td_proc->p_sflag & PS_INMEM, 1753 ("sched_add: process swapped out")); 1754 KASSERT(ts->ts_runq == NULL, 1755 ("sched_add: thread %p is still assigned to a run queue", td)); 1756 /* 1757 * Set the slice and pick the run queue. 1758 */ 1759 if (ts->ts_slice == 0) 1760 ts->ts_slice = sched_slice; 1761 if (class == PRI_TIMESHARE) 1762 sched_priority(td); 1763 if (td->td_priority <= PRI_MAX_REALTIME) { 1764 ts->ts_runq = &tdq->tdq_realtime; 1765 /* 1766 * If the thread is not artificially pinned and it's in 1767 * the realtime queue we directly dispatch it on this cpu 1768 * for minimum latency. Interrupt handlers may also have 1769 * to complete on the cpu that dispatched them. 1770 */ 1771 if (td->td_pinned == 0 && class == PRI_ITHD) 1772 ts->ts_cpu = PCPU_GET(cpuid); 1773 } else if (td->td_priority <= PRI_MAX_TIMESHARE) 1774 ts->ts_runq = &tdq->tdq_timeshare; 1775 else 1776 ts->ts_runq = &tdq->tdq_idle; 1777 1778 #ifdef SMP 1779 /* 1780 * If this thread is pinned or bound, notify the target cpu. 1781 */ 1782 if (!canmigrate && ts->ts_cpu != PCPU_GET(cpuid) ) { 1783 ts->ts_runq = NULL; 1784 tdq_notify(ts, ts->ts_cpu); 1785 return; 1786 } 1787 /* 1788 * If we had been idle, clear our bit in the group and potentially 1789 * the global bitmap. If not, see if we should transfer this thread. 1790 */ 1791 if ((class != PRI_IDLE && class != PRI_ITHD) && 1792 (tdq->tdq_group->tdg_idlemask & PCPU_GET(cpumask)) != 0) { 1793 /* 1794 * Check to see if our group is unidling, and if so, remove it 1795 * from the global idle mask. 1796 */ 1797 if (tdq->tdq_group->tdg_idlemask == 1798 tdq->tdq_group->tdg_cpumask) 1799 atomic_clear_int(&tdq_idle, tdq->tdq_group->tdg_mask); 1800 /* 1801 * Now remove ourselves from the group specific idle mask. 1802 */ 1803 tdq->tdq_group->tdg_idlemask &= ~PCPU_GET(cpumask); 1804 } else if (canmigrate && tdq->tdq_load > 1) 1805 if (tdq_transfer(tdq, ts, class)) 1806 return; 1807 ts->ts_cpu = PCPU_GET(cpuid); 1808 #endif 1809 if (td->td_priority < curthread->td_priority) 1810 curthread->td_flags |= TDF_NEEDRESCHED; 1811 if (preemptive && maybe_preempt(td)) 1812 return; 1813 ts->ts_state = TSS_ONRUNQ; 1814 1815 tdq_runq_add(tdq, ts, flags); 1816 tdq_load_add(tdq, ts); 1817 } 1818 1819 void 1820 sched_rem(struct thread *td) 1821 { 1822 struct tdq *tdq; 1823 struct td_sched *ts; 1824 1825 CTR5(KTR_SCHED, "sched_rem: %p(%s) prio %d by %p(%s)", 1826 td, td->td_proc->p_comm, td->td_priority, curthread, 1827 curthread->td_proc->p_comm); 1828 mtx_assert(&sched_lock, MA_OWNED); 1829 ts = td->td_sched; 1830 if (ts->ts_flags & TSF_ASSIGNED) { 1831 ts->ts_flags |= TSF_REMOVED; 1832 return; 1833 } 1834 KASSERT((ts->ts_state == TSS_ONRUNQ), 1835 ("sched_rem: thread not on run queue")); 1836 1837 ts->ts_state = TSS_THREAD; 1838 tdq = TDQ_CPU(ts->ts_cpu); 1839 tdq_runq_rem(tdq, ts); 1840 tdq_load_rem(tdq, ts); 1841 } 1842 1843 fixpt_t 1844 sched_pctcpu(struct thread *td) 1845 { 1846 fixpt_t pctcpu; 1847 struct td_sched *ts; 1848 1849 pctcpu = 0; 1850 ts = td->td_sched; 1851 if (ts == NULL) 1852 return (0); 1853 1854 mtx_lock_spin(&sched_lock); 1855 if (ts->ts_ticks) { 1856 int rtick; 1857 1858 sched_pctcpu_update(ts); 1859 /* How many rtick per second ? */ 1860 rtick = min(SCHED_TICK_HZ(ts) / SCHED_TICK_SECS, hz); 1861 pctcpu = (FSCALE * ((FSCALE * rtick)/hz)) >> FSHIFT; 1862 } 1863 td->td_proc->p_swtime = ts->ts_ltick - ts->ts_ftick; 1864 mtx_unlock_spin(&sched_lock); 1865 1866 return (pctcpu); 1867 } 1868 1869 void 1870 sched_bind(struct thread *td, int cpu) 1871 { 1872 struct td_sched *ts; 1873 1874 mtx_assert(&sched_lock, MA_OWNED); 1875 ts = td->td_sched; 1876 KASSERT((ts->ts_flags & TSF_BOUND) == 0, 1877 ("sched_bind: thread %p already bound.", td)); 1878 ts->ts_flags |= TSF_BOUND; 1879 #ifdef SMP 1880 if (PCPU_GET(cpuid) == cpu) 1881 return; 1882 /* sched_rem without the runq_remove */ 1883 ts->ts_state = TSS_THREAD; 1884 tdq_load_rem(TDQ_CPU(ts->ts_cpu), ts); 1885 tdq_notify(ts, cpu); 1886 /* When we return from mi_switch we'll be on the correct cpu. */ 1887 mi_switch(SW_VOL, NULL); 1888 sched_pin(); 1889 #endif 1890 } 1891 1892 void 1893 sched_unbind(struct thread *td) 1894 { 1895 struct td_sched *ts; 1896 1897 mtx_assert(&sched_lock, MA_OWNED); 1898 ts = td->td_sched; 1899 KASSERT(ts->ts_flags & TSF_BOUND, 1900 ("sched_unbind: thread %p not bound.", td)); 1901 mtx_assert(&sched_lock, MA_OWNED); 1902 ts->ts_flags &= ~TSF_BOUND; 1903 #ifdef SMP 1904 sched_unpin(); 1905 #endif 1906 } 1907 1908 int 1909 sched_is_bound(struct thread *td) 1910 { 1911 mtx_assert(&sched_lock, MA_OWNED); 1912 return (td->td_sched->ts_flags & TSF_BOUND); 1913 } 1914 1915 void 1916 sched_relinquish(struct thread *td) 1917 { 1918 mtx_lock_spin(&sched_lock); 1919 if (td->td_pri_class == PRI_TIMESHARE) 1920 sched_prio(td, PRI_MAX_TIMESHARE); 1921 mi_switch(SW_VOL, NULL); 1922 mtx_unlock_spin(&sched_lock); 1923 } 1924 1925 int 1926 sched_load(void) 1927 { 1928 #ifdef SMP 1929 int total; 1930 int i; 1931 1932 total = 0; 1933 for (i = 0; i <= tdg_maxid; i++) 1934 total += TDQ_GROUP(i)->tdg_load; 1935 return (total); 1936 #else 1937 return (TDQ_SELF()->tdq_sysload); 1938 #endif 1939 } 1940 1941 int 1942 sched_sizeof_proc(void) 1943 { 1944 return (sizeof(struct proc)); 1945 } 1946 1947 int 1948 sched_sizeof_thread(void) 1949 { 1950 return (sizeof(struct thread) + sizeof(struct td_sched)); 1951 } 1952 1953 void 1954 sched_tick(void) 1955 { 1956 } 1957 1958 static SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0, "Scheduler"); 1959 SYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "ule", 0, 1960 "Scheduler name"); 1961 SYSCTL_INT(_kern_sched, OID_AUTO, slice, CTLFLAG_RW, &sched_slice, 0, ""); 1962 SYSCTL_INT(_kern_sched, OID_AUTO, interact, CTLFLAG_RW, &sched_interact, 0, ""); 1963 SYSCTL_INT(_kern_sched, OID_AUTO, tickincr, CTLFLAG_RD, &tickincr, 0, ""); 1964 SYSCTL_INT(_kern_sched, OID_AUTO, realstathz, CTLFLAG_RD, &realstathz, 0, ""); 1965 SYSCTL_INT(_kern_sched, OID_AUTO, balance, CTLFLAG_RW, &sched_rebalance, 0, ""); 1966 1967 /* ps compat */ 1968 static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 1969 SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 1970 1971 1972 #define KERN_SWITCH_INCLUDE 1 1973 #include "kern/kern_switch.c" 1974