19454b2d8SWarner Losh /*- 2d5a08a60SJake Burkholder * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org> 3d5a08a60SJake Burkholder * All rights reserved. 4dba6c5a6SPeter Wemm * 5dba6c5a6SPeter Wemm * Redistribution and use in source and binary forms, with or without 6dba6c5a6SPeter Wemm * modification, are permitted provided that the following conditions 7dba6c5a6SPeter Wemm * are met: 8dba6c5a6SPeter Wemm * 1. Redistributions of source code must retain the above copyright 9dba6c5a6SPeter Wemm * notice, this list of conditions and the following disclaimer. 10dba6c5a6SPeter Wemm * 2. Redistributions in binary form must reproduce the above copyright 11dba6c5a6SPeter Wemm * notice, this list of conditions and the following disclaimer in the 12dba6c5a6SPeter Wemm * documentation and/or other materials provided with the distribution. 13dba6c5a6SPeter Wemm * 14dba6c5a6SPeter Wemm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15dba6c5a6SPeter Wemm * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16dba6c5a6SPeter Wemm * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17dba6c5a6SPeter Wemm * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18dba6c5a6SPeter Wemm * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19dba6c5a6SPeter Wemm * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20dba6c5a6SPeter Wemm * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21dba6c5a6SPeter Wemm * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22dba6c5a6SPeter Wemm * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23dba6c5a6SPeter Wemm * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24dba6c5a6SPeter Wemm * SUCH DAMAGE. 25dba6c5a6SPeter Wemm */ 26dba6c5a6SPeter Wemm 27e602ba25SJulian Elischer 28677b542eSDavid E. O'Brien #include <sys/cdefs.h> 29677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 30e602ba25SJulian Elischer 316804a3abSJulian Elischer #include "opt_sched.h" 320c0b25aeSJohn Baldwin 33ed062c8dSJulian Elischer #ifndef KERN_SWITCH_INCLUDE 34dba6c5a6SPeter Wemm #include <sys/param.h> 35dba6c5a6SPeter Wemm #include <sys/systm.h> 362d50560aSMarcel Moolenaar #include <sys/kdb.h> 37dba6c5a6SPeter Wemm #include <sys/kernel.h> 380384fff8SJason Evans #include <sys/ktr.h> 39f34fa851SJohn Baldwin #include <sys/lock.h> 4035e0e5b3SJohn Baldwin #include <sys/mutex.h> 41dba6c5a6SPeter Wemm #include <sys/proc.h> 42dba6c5a6SPeter Wemm #include <sys/queue.h> 43b43179fbSJeff Roberson #include <sys/sched.h> 44ed062c8dSJulian Elischer #else /* KERN_SWITCH_INCLUDE */ 450d2a2989SPeter Wemm #if defined(SMP) && (defined(__i386__) || defined(__amd64__)) 46cc66ebe2SPeter Wemm #include <sys/smp.h> 47cc66ebe2SPeter Wemm #endif 486804a3abSJulian Elischer #if defined(SMP) && defined(SCHED_4BSD) 496804a3abSJulian Elischer #include <sys/sysctl.h> 506804a3abSJulian Elischer #endif 516804a3abSJulian Elischer 527b20fb19SJeff Roberson #include <machine/cpu.h> 537b20fb19SJeff Roberson 541335c4dfSNate Lawson /* Uncomment this to enable logging of critical_enter/exit. */ 551335c4dfSNate Lawson #if 0 561335c4dfSNate Lawson #define KTR_CRITICAL KTR_SCHED 571335c4dfSNate Lawson #else 581335c4dfSNate Lawson #define KTR_CRITICAL 0 591335c4dfSNate Lawson #endif 601335c4dfSNate Lawson 619923b511SScott Long #ifdef FULL_PREEMPTION 629923b511SScott Long #ifndef PREEMPTION 639923b511SScott Long #error "The FULL_PREEMPTION option requires the PREEMPTION option" 649923b511SScott Long #endif 659923b511SScott Long #endif 66dba6c5a6SPeter Wemm 67d2ac2316SJake Burkholder CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS); 68d2ac2316SJake Burkholder 696220dcbaSRobert Watson /* 706220dcbaSRobert Watson * kern.sched.preemption allows user space to determine if preemption support 716220dcbaSRobert Watson * is compiled in or not. It is not currently a boot or runtime flag that 726220dcbaSRobert Watson * can be changed. 736220dcbaSRobert Watson */ 746220dcbaSRobert Watson #ifdef PREEMPTION 756220dcbaSRobert Watson static int kern_sched_preemption = 1; 766220dcbaSRobert Watson #else 776220dcbaSRobert Watson static int kern_sched_preemption = 0; 786220dcbaSRobert Watson #endif 796220dcbaSRobert Watson SYSCTL_INT(_kern_sched, OID_AUTO, preemption, CTLFLAG_RD, 806220dcbaSRobert Watson &kern_sched_preemption, 0, "Kernel preemption enabled"); 816220dcbaSRobert Watson 827b20fb19SJeff Roberson #ifdef SCHED_STATS 837b20fb19SJeff Roberson long switch_preempt; 847b20fb19SJeff Roberson long switch_owepreempt; 857b20fb19SJeff Roberson long switch_turnstile; 867b20fb19SJeff Roberson long switch_sleepq; 877b20fb19SJeff Roberson long switch_sleepqtimo; 887b20fb19SJeff Roberson long switch_relinquish; 897b20fb19SJeff Roberson long switch_needresched; 907b20fb19SJeff Roberson static SYSCTL_NODE(_kern_sched, OID_AUTO, stats, CTLFLAG_RW, 0, "switch stats"); 917b20fb19SJeff Roberson SYSCTL_INT(_kern_sched_stats, OID_AUTO, preempt, CTLFLAG_RD, &switch_preempt, 0, ""); 927b20fb19SJeff Roberson SYSCTL_INT(_kern_sched_stats, OID_AUTO, owepreempt, CTLFLAG_RD, &switch_owepreempt, 0, ""); 937b20fb19SJeff Roberson SYSCTL_INT(_kern_sched_stats, OID_AUTO, turnstile, CTLFLAG_RD, &switch_turnstile, 0, ""); 947b20fb19SJeff Roberson SYSCTL_INT(_kern_sched_stats, OID_AUTO, sleepq, CTLFLAG_RD, &switch_sleepq, 0, ""); 957b20fb19SJeff Roberson SYSCTL_INT(_kern_sched_stats, OID_AUTO, sleepqtimo, CTLFLAG_RD, &switch_sleepqtimo, 0, ""); 967b20fb19SJeff Roberson SYSCTL_INT(_kern_sched_stats, OID_AUTO, relinquish, CTLFLAG_RD, &switch_relinquish, 0, ""); 977b20fb19SJeff Roberson SYSCTL_INT(_kern_sched_stats, OID_AUTO, needresched, CTLFLAG_RD, &switch_needresched, 0, ""); 987b20fb19SJeff Roberson static int 997b20fb19SJeff Roberson sysctl_stats_reset(SYSCTL_HANDLER_ARGS) 1007b20fb19SJeff Roberson { 1017b20fb19SJeff Roberson int error; 1027b20fb19SJeff Roberson int val; 1037b20fb19SJeff Roberson 1047b20fb19SJeff Roberson val = 0; 1057b20fb19SJeff Roberson error = sysctl_handle_int(oidp, &val, 0, req); 1067b20fb19SJeff Roberson if (error != 0 || req->newptr == NULL) 1077b20fb19SJeff Roberson return (error); 1087b20fb19SJeff Roberson if (val == 0) 1097b20fb19SJeff Roberson return (0); 1107b20fb19SJeff Roberson switch_preempt = 0; 1117b20fb19SJeff Roberson switch_owepreempt = 0; 1127b20fb19SJeff Roberson switch_turnstile = 0; 1137b20fb19SJeff Roberson switch_sleepq = 0; 1147b20fb19SJeff Roberson switch_sleepqtimo = 0; 1157b20fb19SJeff Roberson switch_relinquish = 0; 1167b20fb19SJeff Roberson switch_needresched = 0; 1177b20fb19SJeff Roberson 1187b20fb19SJeff Roberson return (0); 1197b20fb19SJeff Roberson } 1207b20fb19SJeff Roberson 1217b20fb19SJeff Roberson SYSCTL_PROC(_kern_sched_stats, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_WR, NULL, 1227b20fb19SJeff Roberson 0, sysctl_stats_reset, "I", "Reset scheduler statistics"); 1237b20fb19SJeff Roberson #endif 1247b20fb19SJeff Roberson 125e602ba25SJulian Elischer /************************************************************************ 126e602ba25SJulian Elischer * Functions that manipulate runnability from a thread perspective. * 127e602ba25SJulian Elischer ************************************************************************/ 1288460a577SJohn Birrell /* 1298460a577SJohn Birrell * Select the thread that will be run next. 1308460a577SJohn Birrell */ 131b40ce416SJulian Elischer struct thread * 132b40ce416SJulian Elischer choosethread(void) 133dba6c5a6SPeter Wemm { 134e602ba25SJulian Elischer struct thread *td; 135e602ba25SJulian Elischer 1360d2a2989SPeter Wemm #if defined(SMP) && (defined(__i386__) || defined(__amd64__)) 137cc66ebe2SPeter Wemm if (smp_active == 0 && PCPU_GET(cpuid) != 0) { 138cc66ebe2SPeter Wemm /* Shutting down, run idlethread on AP's */ 139cc66ebe2SPeter Wemm td = PCPU_GET(idlethread); 140cc66ebe2SPeter Wemm CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td); 141cc66ebe2SPeter Wemm TD_SET_RUNNING(td); 142cc66ebe2SPeter Wemm return (td); 143cc66ebe2SPeter Wemm } 144cc66ebe2SPeter Wemm #endif 145cc66ebe2SPeter Wemm 146fe799533SAndrew Gallatin retry: 147f0393f06SJeff Roberson td = sched_choose(); 14893a7aa79SJulian Elischer 14993a7aa79SJulian Elischer /* 150faaa20f6SJulian Elischer * If we are in panic, only allow system threads, 151faaa20f6SJulian Elischer * plus the one we are running in, to be run. 15293a7aa79SJulian Elischer */ 153fe799533SAndrew Gallatin if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 && 154faaa20f6SJulian Elischer (td->td_flags & TDF_INPANIC) == 0)) { 155faaa20f6SJulian Elischer /* note that it is no longer on the run queue */ 156faaa20f6SJulian Elischer TD_SET_CAN_RUN(td); 157fe799533SAndrew Gallatin goto retry; 158faaa20f6SJulian Elischer } 15993a7aa79SJulian Elischer 16071fad9fdSJulian Elischer TD_SET_RUNNING(td); 161e602ba25SJulian Elischer return (td); 162e602ba25SJulian Elischer } 163e602ba25SJulian Elischer 1640c0b25aeSJohn Baldwin /* 1650c0b25aeSJohn Baldwin * Kernel thread preemption implementation. Critical sections mark 1660c0b25aeSJohn Baldwin * regions of code in which preemptions are not allowed. 1670c0b25aeSJohn Baldwin */ 1687e1f6dfeSJohn Baldwin void 1697e1f6dfeSJohn Baldwin critical_enter(void) 1707e1f6dfeSJohn Baldwin { 1717e1f6dfeSJohn Baldwin struct thread *td; 1727e1f6dfeSJohn Baldwin 1737e1f6dfeSJohn Baldwin td = curthread; 1747e1f6dfeSJohn Baldwin td->td_critnest++; 1751335c4dfSNate Lawson CTR4(KTR_CRITICAL, "critical_enter by thread %p (%ld, %s) to %d", td, 176f42a43faSRobert Watson (long)td->td_proc->p_pid, td->td_proc->p_comm, td->td_critnest); 1777e1f6dfeSJohn Baldwin } 1787e1f6dfeSJohn Baldwin 1797e1f6dfeSJohn Baldwin void 1807e1f6dfeSJohn Baldwin critical_exit(void) 1817e1f6dfeSJohn Baldwin { 1827e1f6dfeSJohn Baldwin struct thread *td; 1837e1f6dfeSJohn Baldwin 1847e1f6dfeSJohn Baldwin td = curthread; 185b209e5e3SJeff Roberson KASSERT(td->td_critnest != 0, 186b209e5e3SJeff Roberson ("critical_exit: td_critnest == 0")); 1870c0b25aeSJohn Baldwin #ifdef PREEMPTION 188d13ec713SStephan Uphoff if (td->td_critnest == 1) { 189d13ec713SStephan Uphoff td->td_critnest = 0; 19077918643SStephan Uphoff if (td->td_owepreempt) { 19177918643SStephan Uphoff td->td_critnest = 1; 1927b20fb19SJeff Roberson thread_lock(td); 19377918643SStephan Uphoff td->td_critnest--; 1947b20fb19SJeff Roberson SCHED_STAT_INC(switch_owepreempt); 195413ea6f5SJeff Roberson mi_switch(SW_INVOL|SW_PREEMPT, NULL); 1967b20fb19SJeff Roberson thread_unlock(td); 1970c0b25aeSJohn Baldwin } 198d13ec713SStephan Uphoff } else 1990c0b25aeSJohn Baldwin #endif 2007e1f6dfeSJohn Baldwin td->td_critnest--; 201d13ec713SStephan Uphoff 2021335c4dfSNate Lawson CTR4(KTR_CRITICAL, "critical_exit by thread %p (%ld, %s) to %d", td, 203f42a43faSRobert Watson (long)td->td_proc->p_pid, td->td_proc->p_comm, td->td_critnest); 204d74ac681SMatthew Dillon } 2057e1f6dfeSJohn Baldwin 2060c0b25aeSJohn Baldwin /* 2070c0b25aeSJohn Baldwin * This function is called when a thread is about to be put on run queue 2080c0b25aeSJohn Baldwin * because it has been made runnable or its priority has been adjusted. It 2090c0b25aeSJohn Baldwin * determines if the new thread should be immediately preempted to. If so, 2100c0b25aeSJohn Baldwin * it switches to it and eventually returns true. If not, it returns false 2110c0b25aeSJohn Baldwin * so that the caller may place the thread on an appropriate run queue. 2120c0b25aeSJohn Baldwin */ 2130c0b25aeSJohn Baldwin int 2140c0b25aeSJohn Baldwin maybe_preempt(struct thread *td) 2150c0b25aeSJohn Baldwin { 2168b44a2e2SMarcel Moolenaar #ifdef PREEMPTION 2170c0b25aeSJohn Baldwin struct thread *ctd; 2180c0b25aeSJohn Baldwin int cpri, pri; 2198b44a2e2SMarcel Moolenaar #endif 2200c0b25aeSJohn Baldwin 2210c0b25aeSJohn Baldwin #ifdef PREEMPTION 2220c0b25aeSJohn Baldwin /* 2230c0b25aeSJohn Baldwin * The new thread should not preempt the current thread if any of the 2240c0b25aeSJohn Baldwin * following conditions are true: 2250c0b25aeSJohn Baldwin * 226bc608306SRobert Watson * - The kernel is in the throes of crashing (panicstr). 22752eb8464SJohn Baldwin * - The current thread has a higher (numerically lower) or 22852eb8464SJohn Baldwin * equivalent priority. Note that this prevents curthread from 22952eb8464SJohn Baldwin * trying to preempt to itself. 2300c0b25aeSJohn Baldwin * - It is too early in the boot for context switches (cold is set). 2310c0b25aeSJohn Baldwin * - The current thread has an inhibitor set or is in the process of 2320c0b25aeSJohn Baldwin * exiting. In this case, the current thread is about to switch 2330c0b25aeSJohn Baldwin * out anyways, so there's no point in preempting. If we did, 2340c0b25aeSJohn Baldwin * the current thread would not be properly resumed as well, so 2350c0b25aeSJohn Baldwin * just avoid that whole landmine. 2360c0b25aeSJohn Baldwin * - If the new thread's priority is not a realtime priority and 2370c0b25aeSJohn Baldwin * the current thread's priority is not an idle priority and 2380c0b25aeSJohn Baldwin * FULL_PREEMPTION is disabled. 2390c0b25aeSJohn Baldwin * 2400c0b25aeSJohn Baldwin * If all of these conditions are false, but the current thread is in 2410c0b25aeSJohn Baldwin * a nested critical section, then we have to defer the preemption 2420c0b25aeSJohn Baldwin * until we exit the critical section. Otherwise, switch immediately 2430c0b25aeSJohn Baldwin * to the new thread. 2440c0b25aeSJohn Baldwin */ 2450c0b25aeSJohn Baldwin ctd = curthread; 2467b20fb19SJeff Roberson THREAD_LOCK_ASSERT(td, MA_OWNED); 247ad1e7d28SJulian Elischer KASSERT ((ctd->td_sched != NULL && ctd->td_sched->ts_thread == ctd), 2486a574b2aSJulian Elischer ("thread has no (or wrong) sched-private part.")); 249b2578c6cSJulian Elischer KASSERT((td->td_inhibitors == 0), 2502da78e38SRobert Watson ("maybe_preempt: trying to run inhibited thread")); 2510c0b25aeSJohn Baldwin pri = td->td_priority; 2520c0b25aeSJohn Baldwin cpri = ctd->td_priority; 253bc608306SRobert Watson if (panicstr != NULL || pri >= cpri || cold /* || dumping */ || 254f0393f06SJeff Roberson TD_IS_INHIBITED(ctd)) 2550c0b25aeSJohn Baldwin return (0); 2560c0b25aeSJohn Baldwin #ifndef FULL_PREEMPTION 2573ea6bbc5SStephan Uphoff if (pri > PRI_MAX_ITHD && cpri < PRI_MIN_IDLE) 2580c0b25aeSJohn Baldwin return (0); 2590c0b25aeSJohn Baldwin #endif 260a3f2d842SStephan Uphoff 2610c0b25aeSJohn Baldwin if (ctd->td_critnest > 1) { 2620c0b25aeSJohn Baldwin CTR1(KTR_PROC, "maybe_preempt: in critical section %d", 2630c0b25aeSJohn Baldwin ctd->td_critnest); 26477918643SStephan Uphoff ctd->td_owepreempt = 1; 2650c0b25aeSJohn Baldwin return (0); 2660c0b25aeSJohn Baldwin } 2670c0b25aeSJohn Baldwin /* 268c20c691bSJulian Elischer * Thread is runnable but not yet put on system run queue. 2690c0b25aeSJohn Baldwin */ 27056696bd1SJeff Roberson MPASS(ctd->td_lock == td->td_lock); 2710c0b25aeSJohn Baldwin MPASS(TD_ON_RUNQ(td)); 2720c0b25aeSJohn Baldwin TD_SET_RUNNING(td); 2730c0b25aeSJohn Baldwin CTR3(KTR_PROC, "preempting to thread %p (pid %d, %s)\n", td, 2740c0b25aeSJohn Baldwin td->td_proc->p_pid, td->td_proc->p_comm); 2757b20fb19SJeff Roberson SCHED_STAT_INC(switch_preempt); 276c20c691bSJulian Elischer mi_switch(SW_INVOL|SW_PREEMPT, td); 2777b20fb19SJeff Roberson /* 2787b20fb19SJeff Roberson * td's lock pointer may have changed. We have to return with it 2797b20fb19SJeff Roberson * locked. 2807b20fb19SJeff Roberson */ 2817b20fb19SJeff Roberson spinlock_enter(); 2827b20fb19SJeff Roberson thread_unlock(ctd); 2837b20fb19SJeff Roberson thread_lock(td); 2847b20fb19SJeff Roberson spinlock_exit(); 2850c0b25aeSJohn Baldwin return (1); 2860c0b25aeSJohn Baldwin #else 2870c0b25aeSJohn Baldwin return (0); 2880c0b25aeSJohn Baldwin #endif 2890c0b25aeSJohn Baldwin } 2900c0b25aeSJohn Baldwin 29144fe3c1fSJohn Baldwin #if 0 2920c0b25aeSJohn Baldwin #ifndef PREEMPTION 2930c0b25aeSJohn Baldwin /* XXX: There should be a non-static version of this. */ 2940c0b25aeSJohn Baldwin static void 2950c0b25aeSJohn Baldwin printf_caddr_t(void *data) 2960c0b25aeSJohn Baldwin { 2970c0b25aeSJohn Baldwin printf("%s", (char *)data); 2980c0b25aeSJohn Baldwin } 2990c0b25aeSJohn Baldwin static char preempt_warning[] = 3000c0b25aeSJohn Baldwin "WARNING: Kernel preemption is disabled, expect reduced performance.\n"; 3010c0b25aeSJohn Baldwin SYSINIT(preempt_warning, SI_SUB_COPYRIGHT, SI_ORDER_ANY, printf_caddr_t, 3020c0b25aeSJohn Baldwin preempt_warning) 3030c0b25aeSJohn Baldwin #endif 30444fe3c1fSJohn Baldwin #endif 305e602ba25SJulian Elischer 306e602ba25SJulian Elischer /************************************************************************ 307e602ba25SJulian Elischer * SYSTEM RUN QUEUE manipulations and tests * 308e602ba25SJulian Elischer ************************************************************************/ 309e602ba25SJulian Elischer /* 310e602ba25SJulian Elischer * Initialize a run structure. 311e602ba25SJulian Elischer */ 312e602ba25SJulian Elischer void 313e602ba25SJulian Elischer runq_init(struct runq *rq) 314e602ba25SJulian Elischer { 315e602ba25SJulian Elischer int i; 316e602ba25SJulian Elischer 317e602ba25SJulian Elischer bzero(rq, sizeof *rq); 318e602ba25SJulian Elischer for (i = 0; i < RQ_NQS; i++) 319e602ba25SJulian Elischer TAILQ_INIT(&rq->rq_queues[i]); 320e602ba25SJulian Elischer } 321e602ba25SJulian Elischer 322d5a08a60SJake Burkholder /* 323d5a08a60SJake Burkholder * Clear the status bit of the queue corresponding to priority level pri, 324d5a08a60SJake Burkholder * indicating that it is empty. 325d5a08a60SJake Burkholder */ 326d5a08a60SJake Burkholder static __inline void 327d5a08a60SJake Burkholder runq_clrbit(struct runq *rq, int pri) 328d5a08a60SJake Burkholder { 329d5a08a60SJake Burkholder struct rqbits *rqb; 330d5a08a60SJake Burkholder 331d5a08a60SJake Burkholder rqb = &rq->rq_status; 332d5a08a60SJake Burkholder CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d", 333d5a08a60SJake Burkholder rqb->rqb_bits[RQB_WORD(pri)], 334d5a08a60SJake Burkholder rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri), 335d5a08a60SJake Burkholder RQB_BIT(pri), RQB_WORD(pri)); 336d5a08a60SJake Burkholder rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri); 337d5a08a60SJake Burkholder } 338d5a08a60SJake Burkholder 339d5a08a60SJake Burkholder /* 340d5a08a60SJake Burkholder * Find the index of the first non-empty run queue. This is done by 341d5a08a60SJake Burkholder * scanning the status bits, a set bit indicates a non-empty queue. 342d5a08a60SJake Burkholder */ 343d5a08a60SJake Burkholder static __inline int 344d5a08a60SJake Burkholder runq_findbit(struct runq *rq) 345d5a08a60SJake Burkholder { 346d5a08a60SJake Burkholder struct rqbits *rqb; 347d5a08a60SJake Burkholder int pri; 348d5a08a60SJake Burkholder int i; 349d5a08a60SJake Burkholder 350d5a08a60SJake Burkholder rqb = &rq->rq_status; 351d5a08a60SJake Burkholder for (i = 0; i < RQB_LEN; i++) 352d5a08a60SJake Burkholder if (rqb->rqb_bits[i]) { 3532f9267ecSPeter Wemm pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW); 354d5a08a60SJake Burkholder CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d", 355d5a08a60SJake Burkholder rqb->rqb_bits[i], i, pri); 356d5a08a60SJake Burkholder return (pri); 357d5a08a60SJake Burkholder } 358d5a08a60SJake Burkholder 359d5a08a60SJake Burkholder return (-1); 360d5a08a60SJake Burkholder } 361d5a08a60SJake Burkholder 3623fed7d23SJeff Roberson static __inline int 36367e20930SJeff Roberson runq_findbit_from(struct runq *rq, u_char pri) 3643fed7d23SJeff Roberson { 3653fed7d23SJeff Roberson struct rqbits *rqb; 36667e20930SJeff Roberson rqb_word_t mask; 3673fed7d23SJeff Roberson int i; 3683fed7d23SJeff Roberson 36967e20930SJeff Roberson /* 37067e20930SJeff Roberson * Set the mask for the first word so we ignore priorities before 'pri'. 37167e20930SJeff Roberson */ 37267e20930SJeff Roberson mask = (rqb_word_t)-1 << (pri & (RQB_BPW - 1)); 3733fed7d23SJeff Roberson rqb = &rq->rq_status; 3743fed7d23SJeff Roberson again: 37567e20930SJeff Roberson for (i = RQB_WORD(pri); i < RQB_LEN; mask = -1, i++) { 37667e20930SJeff Roberson mask = rqb->rqb_bits[i] & mask; 37767e20930SJeff Roberson if (mask == 0) 3783fed7d23SJeff Roberson continue; 37967e20930SJeff Roberson pri = RQB_FFS(mask) + (i << RQB_L2BPW); 3803fed7d23SJeff Roberson CTR3(KTR_RUNQ, "runq_findbit_from: bits=%#x i=%d pri=%d", 38167e20930SJeff Roberson mask, i, pri); 3823fed7d23SJeff Roberson return (pri); 3833fed7d23SJeff Roberson } 38467e20930SJeff Roberson if (pri == 0) 3853fed7d23SJeff Roberson return (-1); 38667e20930SJeff Roberson /* 38767e20930SJeff Roberson * Wrap back around to the beginning of the list just once so we 38867e20930SJeff Roberson * scan the whole thing. 38967e20930SJeff Roberson */ 39067e20930SJeff Roberson pri = 0; 39167e20930SJeff Roberson goto again; 3923fed7d23SJeff Roberson } 3933fed7d23SJeff Roberson 394d5a08a60SJake Burkholder /* 395d5a08a60SJake Burkholder * Set the status bit of the queue corresponding to priority level pri, 396d5a08a60SJake Burkholder * indicating that it is non-empty. 397d5a08a60SJake Burkholder */ 398d5a08a60SJake Burkholder static __inline void 399d5a08a60SJake Burkholder runq_setbit(struct runq *rq, int pri) 400d5a08a60SJake Burkholder { 401d5a08a60SJake Burkholder struct rqbits *rqb; 402d5a08a60SJake Burkholder 403d5a08a60SJake Burkholder rqb = &rq->rq_status; 404d5a08a60SJake Burkholder CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d", 405d5a08a60SJake Burkholder rqb->rqb_bits[RQB_WORD(pri)], 406d5a08a60SJake Burkholder rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri), 407d5a08a60SJake Burkholder RQB_BIT(pri), RQB_WORD(pri)); 408d5a08a60SJake Burkholder rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri); 409d5a08a60SJake Burkholder } 410d5a08a60SJake Burkholder 411d5a08a60SJake Burkholder /* 412ad1e7d28SJulian Elischer * Add the thread to the queue specified by its priority, and set the 413d5a08a60SJake Burkholder * corresponding status bit. 414d5a08a60SJake Burkholder */ 415d5a08a60SJake Burkholder void 416ad1e7d28SJulian Elischer runq_add(struct runq *rq, struct td_sched *ts, int flags) 417d5a08a60SJake Burkholder { 418d5a08a60SJake Burkholder struct rqhead *rqh; 419d5a08a60SJake Burkholder int pri; 420dba6c5a6SPeter Wemm 421ad1e7d28SJulian Elischer pri = ts->ts_thread->td_priority / RQ_PPQ; 422ad1e7d28SJulian Elischer ts->ts_rqindex = pri; 423d5a08a60SJake Burkholder runq_setbit(rq, pri); 424d5a08a60SJake Burkholder rqh = &rq->rq_queues[pri]; 425ad1e7d28SJulian Elischer CTR5(KTR_RUNQ, "runq_add: td=%p ts=%p pri=%d %d rqh=%p", 426ad1e7d28SJulian Elischer ts->ts_thread, ts, ts->ts_thread->td_priority, pri, rqh); 427c20c691bSJulian Elischer if (flags & SRQ_PREEMPTED) { 428ad1e7d28SJulian Elischer TAILQ_INSERT_HEAD(rqh, ts, ts_procq); 429c20c691bSJulian Elischer } else { 430ad1e7d28SJulian Elischer TAILQ_INSERT_TAIL(rqh, ts, ts_procq); 431dba6c5a6SPeter Wemm } 432c20c691bSJulian Elischer } 433d5a08a60SJake Burkholder 4343fed7d23SJeff Roberson void 435ed0e8f2fSJeff Roberson runq_add_pri(struct runq *rq, struct td_sched *ts, u_char pri, int flags) 4363fed7d23SJeff Roberson { 4373fed7d23SJeff Roberson struct rqhead *rqh; 4383fed7d23SJeff Roberson 4393fed7d23SJeff Roberson KASSERT(pri < RQ_NQS, ("runq_add_pri: %d out of range", pri)); 4403fed7d23SJeff Roberson ts->ts_rqindex = pri; 4413fed7d23SJeff Roberson runq_setbit(rq, pri); 4423fed7d23SJeff Roberson rqh = &rq->rq_queues[pri]; 4433fed7d23SJeff Roberson CTR5(KTR_RUNQ, "runq_add_pri: td=%p ke=%p pri=%d idx=%d rqh=%p", 4443fed7d23SJeff Roberson ts->ts_thread, ts, ts->ts_thread->td_priority, pri, rqh); 4453fed7d23SJeff Roberson if (flags & SRQ_PREEMPTED) { 4463fed7d23SJeff Roberson TAILQ_INSERT_HEAD(rqh, ts, ts_procq); 4473fed7d23SJeff Roberson } else { 4483fed7d23SJeff Roberson TAILQ_INSERT_TAIL(rqh, ts, ts_procq); 4493fed7d23SJeff Roberson } 4503fed7d23SJeff Roberson } 451d5a08a60SJake Burkholder /* 452d5a08a60SJake Burkholder * Return true if there are runnable processes of any priority on the run 453d5a08a60SJake Burkholder * queue, false otherwise. Has no side effects, does not modify the run 454d5a08a60SJake Burkholder * queue structure. 455d5a08a60SJake Burkholder */ 456d5a08a60SJake Burkholder int 457d5a08a60SJake Burkholder runq_check(struct runq *rq) 458d5a08a60SJake Burkholder { 459d5a08a60SJake Burkholder struct rqbits *rqb; 460d5a08a60SJake Burkholder int i; 461d5a08a60SJake Burkholder 462d5a08a60SJake Burkholder rqb = &rq->rq_status; 463d5a08a60SJake Burkholder for (i = 0; i < RQB_LEN; i++) 464d5a08a60SJake Burkholder if (rqb->rqb_bits[i]) { 465d5a08a60SJake Burkholder CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d", 466d5a08a60SJake Burkholder rqb->rqb_bits[i], i); 467d5a08a60SJake Burkholder return (1); 468dba6c5a6SPeter Wemm } 469d5a08a60SJake Burkholder CTR0(KTR_RUNQ, "runq_check: empty"); 470d5a08a60SJake Burkholder 471d5a08a60SJake Burkholder return (0); 472dba6c5a6SPeter Wemm } 473d5a08a60SJake Burkholder 4746804a3abSJulian Elischer #if defined(SMP) && defined(SCHED_4BSD) 4756804a3abSJulian Elischer int runq_fuzz = 1; 4766804a3abSJulian Elischer SYSCTL_INT(_kern_sched, OID_AUTO, runq_fuzz, CTLFLAG_RW, &runq_fuzz, 0, ""); 4776804a3abSJulian Elischer #endif 4786804a3abSJulian Elischer 479d5a08a60SJake Burkholder /* 480b43179fbSJeff Roberson * Find the highest priority process on the run queue. 481d5a08a60SJake Burkholder */ 482ad1e7d28SJulian Elischer struct td_sched * 483d5a08a60SJake Burkholder runq_choose(struct runq *rq) 484d5a08a60SJake Burkholder { 485d5a08a60SJake Burkholder struct rqhead *rqh; 486ad1e7d28SJulian Elischer struct td_sched *ts; 487d5a08a60SJake Burkholder int pri; 488d5a08a60SJake Burkholder 489e602ba25SJulian Elischer while ((pri = runq_findbit(rq)) != -1) { 490d5a08a60SJake Burkholder rqh = &rq->rq_queues[pri]; 4916804a3abSJulian Elischer #if defined(SMP) && defined(SCHED_4BSD) 4926804a3abSJulian Elischer /* fuzz == 1 is normal.. 0 or less are ignored */ 4936804a3abSJulian Elischer if (runq_fuzz > 1) { 4946804a3abSJulian Elischer /* 4956804a3abSJulian Elischer * In the first couple of entries, check if 4966804a3abSJulian Elischer * there is one for our CPU as a preference. 4976804a3abSJulian Elischer */ 4986804a3abSJulian Elischer int count = runq_fuzz; 4996804a3abSJulian Elischer int cpu = PCPU_GET(cpuid); 500ad1e7d28SJulian Elischer struct td_sched *ts2; 501ad1e7d28SJulian Elischer ts2 = ts = TAILQ_FIRST(rqh); 5026804a3abSJulian Elischer 503ad1e7d28SJulian Elischer while (count-- && ts2) { 504ad1e7d28SJulian Elischer if (ts->ts_thread->td_lastcpu == cpu) { 505ad1e7d28SJulian Elischer ts = ts2; 5066804a3abSJulian Elischer break; 5076804a3abSJulian Elischer } 508ad1e7d28SJulian Elischer ts2 = TAILQ_NEXT(ts2, ts_procq); 5096804a3abSJulian Elischer } 5106804a3abSJulian Elischer } else 5116804a3abSJulian Elischer #endif 512ad1e7d28SJulian Elischer ts = TAILQ_FIRST(rqh); 513ad1e7d28SJulian Elischer KASSERT(ts != NULL, ("runq_choose: no proc on busy queue")); 514e602ba25SJulian Elischer CTR3(KTR_RUNQ, 515ad1e7d28SJulian Elischer "runq_choose: pri=%d td_sched=%p rqh=%p", pri, ts, rqh); 516ad1e7d28SJulian Elischer return (ts); 517d5a08a60SJake Burkholder } 518d5a08a60SJake Burkholder CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri); 519d5a08a60SJake Burkholder 520e602ba25SJulian Elischer return (NULL); 521d5a08a60SJake Burkholder } 522d5a08a60SJake Burkholder 5233fed7d23SJeff Roberson struct td_sched * 524ed0e8f2fSJeff Roberson runq_choose_from(struct runq *rq, u_char idx) 5253fed7d23SJeff Roberson { 5263fed7d23SJeff Roberson struct rqhead *rqh; 5273fed7d23SJeff Roberson struct td_sched *ts; 5283fed7d23SJeff Roberson int pri; 5293fed7d23SJeff Roberson 530cd49bb70SJeff Roberson if ((pri = runq_findbit_from(rq, idx)) != -1) { 5313fed7d23SJeff Roberson rqh = &rq->rq_queues[pri]; 5323fed7d23SJeff Roberson ts = TAILQ_FIRST(rqh); 5333fed7d23SJeff Roberson KASSERT(ts != NULL, ("runq_choose: no proc on busy queue")); 5343fed7d23SJeff Roberson CTR4(KTR_RUNQ, 5353fed7d23SJeff Roberson "runq_choose_from: pri=%d kse=%p idx=%d rqh=%p", 5363fed7d23SJeff Roberson pri, ts, ts->ts_rqindex, rqh); 5373fed7d23SJeff Roberson return (ts); 5383fed7d23SJeff Roberson } 5393fed7d23SJeff Roberson CTR1(KTR_RUNQ, "runq_choose_from: idleproc pri=%d", pri); 5403fed7d23SJeff Roberson 5413fed7d23SJeff Roberson return (NULL); 5423fed7d23SJeff Roberson } 543d5a08a60SJake Burkholder /* 544ad1e7d28SJulian Elischer * Remove the thread from the queue specified by its priority, and clear the 545d5a08a60SJake Burkholder * corresponding status bit if the queue becomes empty. 546f0393f06SJeff Roberson * Caller must set state afterwards. 547d5a08a60SJake Burkholder */ 548d5a08a60SJake Burkholder void 549ad1e7d28SJulian Elischer runq_remove(struct runq *rq, struct td_sched *ts) 550d5a08a60SJake Burkholder { 5513fed7d23SJeff Roberson 5523fed7d23SJeff Roberson runq_remove_idx(rq, ts, NULL); 5533fed7d23SJeff Roberson } 5543fed7d23SJeff Roberson 5553fed7d23SJeff Roberson void 556ed0e8f2fSJeff Roberson runq_remove_idx(struct runq *rq, struct td_sched *ts, u_char *idx) 5573fed7d23SJeff Roberson { 558d5a08a60SJake Burkholder struct rqhead *rqh; 559ed0e8f2fSJeff Roberson u_char pri; 560d5a08a60SJake Burkholder 561b61ce5b0SJeff Roberson KASSERT(ts->ts_thread->td_flags & TDF_INMEM, 562b61ce5b0SJeff Roberson ("runq_remove_idx: thread swapped out")); 563ad1e7d28SJulian Elischer pri = ts->ts_rqindex; 5647b20fb19SJeff Roberson KASSERT(pri < RQ_NQS, ("runq_remove_idx: Invalid index %d\n", pri)); 565d5a08a60SJake Burkholder rqh = &rq->rq_queues[pri]; 5663fed7d23SJeff Roberson CTR5(KTR_RUNQ, "runq_remove_idx: td=%p, ts=%p pri=%d %d rqh=%p", 567ad1e7d28SJulian Elischer ts->ts_thread, ts, ts->ts_thread->td_priority, pri, rqh); 5687b20fb19SJeff Roberson { 5697b20fb19SJeff Roberson struct td_sched *nts; 5707b20fb19SJeff Roberson 5717b20fb19SJeff Roberson TAILQ_FOREACH(nts, rqh, ts_procq) 5727b20fb19SJeff Roberson if (nts == ts) 5737b20fb19SJeff Roberson break; 5747b20fb19SJeff Roberson if (ts != nts) 5757b20fb19SJeff Roberson panic("runq_remove_idx: ts %p not on rqindex %d", 5767b20fb19SJeff Roberson ts, pri); 5777b20fb19SJeff Roberson } 578ad1e7d28SJulian Elischer TAILQ_REMOVE(rqh, ts, ts_procq); 579d5a08a60SJake Burkholder if (TAILQ_EMPTY(rqh)) { 5803fed7d23SJeff Roberson CTR0(KTR_RUNQ, "runq_remove_idx: empty"); 581d5a08a60SJake Burkholder runq_clrbit(rq, pri); 5823fed7d23SJeff Roberson if (idx != NULL && *idx == pri) 5833fed7d23SJeff Roberson *idx = (pri + 1) % RQ_NQS; 584d5a08a60SJake Burkholder } 585dba6c5a6SPeter Wemm } 586e602ba25SJulian Elischer 587ed062c8dSJulian Elischer /****** functions that are temporarily here ***********/ 588ed062c8dSJulian Elischer #include <vm/uma.h> 589ed062c8dSJulian Elischer extern struct mtx kse_zombie_lock; 590ed062c8dSJulian Elischer 591ed062c8dSJulian Elischer /* 592ed062c8dSJulian Elischer * Allocate scheduler specific per-process resources. 593ad1e7d28SJulian Elischer * The thread and proc have already been linked in. 594ed062c8dSJulian Elischer * 595ed062c8dSJulian Elischer * Called from: 596ed062c8dSJulian Elischer * proc_init() (UMA init method) 597ed062c8dSJulian Elischer */ 598ed062c8dSJulian Elischer void 599ad1e7d28SJulian Elischer sched_newproc(struct proc *p, struct thread *td) 600ed062c8dSJulian Elischer { 601ed062c8dSJulian Elischer } 602ed062c8dSJulian Elischer 603ed062c8dSJulian Elischer /* 604ed062c8dSJulian Elischer * thread is being either created or recycled. 605ed062c8dSJulian Elischer * Fix up the per-scheduler resources associated with it. 606ed062c8dSJulian Elischer * Called from: 607ed062c8dSJulian Elischer * sched_fork_thread() 608ed062c8dSJulian Elischer * thread_dtor() (*may go away) 609ed062c8dSJulian Elischer * thread_init() (*may go away) 610ed062c8dSJulian Elischer */ 611ed062c8dSJulian Elischer void 612ed062c8dSJulian Elischer sched_newthread(struct thread *td) 613ed062c8dSJulian Elischer { 614ad1e7d28SJulian Elischer struct td_sched *ts; 615ed062c8dSJulian Elischer 616ad1e7d28SJulian Elischer ts = (struct td_sched *) (td + 1); 617ad1e7d28SJulian Elischer bzero(ts, sizeof(*ts)); 618ad1e7d28SJulian Elischer td->td_sched = ts; 619ad1e7d28SJulian Elischer ts->ts_thread = td; 620ed062c8dSJulian Elischer } 621ed062c8dSJulian Elischer 622ed062c8dSJulian Elischer #endif /* KERN_SWITCH_INCLUDE */ 623