19454b2d8SWarner Losh /*- 2d5a08a60SJake Burkholder * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org> 3d5a08a60SJake Burkholder * All rights reserved. 4dba6c5a6SPeter Wemm * 5dba6c5a6SPeter Wemm * Redistribution and use in source and binary forms, with or without 6dba6c5a6SPeter Wemm * modification, are permitted provided that the following conditions 7dba6c5a6SPeter Wemm * are met: 8dba6c5a6SPeter Wemm * 1. Redistributions of source code must retain the above copyright 9dba6c5a6SPeter Wemm * notice, this list of conditions and the following disclaimer. 10dba6c5a6SPeter Wemm * 2. Redistributions in binary form must reproduce the above copyright 11dba6c5a6SPeter Wemm * notice, this list of conditions and the following disclaimer in the 12dba6c5a6SPeter Wemm * documentation and/or other materials provided with the distribution. 13dba6c5a6SPeter Wemm * 14dba6c5a6SPeter Wemm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15dba6c5a6SPeter Wemm * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16dba6c5a6SPeter Wemm * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17dba6c5a6SPeter Wemm * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18dba6c5a6SPeter Wemm * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19dba6c5a6SPeter Wemm * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20dba6c5a6SPeter Wemm * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21dba6c5a6SPeter Wemm * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22dba6c5a6SPeter Wemm * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23dba6c5a6SPeter Wemm * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24dba6c5a6SPeter Wemm * SUCH DAMAGE. 25dba6c5a6SPeter Wemm */ 26dba6c5a6SPeter Wemm 27e602ba25SJulian Elischer 28677b542eSDavid E. O'Brien #include <sys/cdefs.h> 29677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 30e602ba25SJulian Elischer 316804a3abSJulian Elischer #include "opt_sched.h" 320c0b25aeSJohn Baldwin 33ed062c8dSJulian Elischer #ifndef KERN_SWITCH_INCLUDE 34dba6c5a6SPeter Wemm #include <sys/param.h> 35dba6c5a6SPeter Wemm #include <sys/systm.h> 362d50560aSMarcel Moolenaar #include <sys/kdb.h> 37dba6c5a6SPeter Wemm #include <sys/kernel.h> 380384fff8SJason Evans #include <sys/ktr.h> 39f34fa851SJohn Baldwin #include <sys/lock.h> 4035e0e5b3SJohn Baldwin #include <sys/mutex.h> 41dba6c5a6SPeter Wemm #include <sys/proc.h> 42dba6c5a6SPeter Wemm #include <sys/queue.h> 43b43179fbSJeff Roberson #include <sys/sched.h> 44ed062c8dSJulian Elischer #else /* KERN_SWITCH_INCLUDE */ 450d2a2989SPeter Wemm #if defined(SMP) && (defined(__i386__) || defined(__amd64__)) 46cc66ebe2SPeter Wemm #include <sys/smp.h> 47cc66ebe2SPeter Wemm #endif 486804a3abSJulian Elischer #if defined(SMP) && defined(SCHED_4BSD) 496804a3abSJulian Elischer #include <sys/sysctl.h> 506804a3abSJulian Elischer #endif 516804a3abSJulian Elischer 527b20fb19SJeff Roberson #include <machine/cpu.h> 537b20fb19SJeff Roberson 541335c4dfSNate Lawson /* Uncomment this to enable logging of critical_enter/exit. */ 551335c4dfSNate Lawson #if 0 561335c4dfSNate Lawson #define KTR_CRITICAL KTR_SCHED 571335c4dfSNate Lawson #else 581335c4dfSNate Lawson #define KTR_CRITICAL 0 591335c4dfSNate Lawson #endif 601335c4dfSNate Lawson 619923b511SScott Long #ifdef FULL_PREEMPTION 629923b511SScott Long #ifndef PREEMPTION 639923b511SScott Long #error "The FULL_PREEMPTION option requires the PREEMPTION option" 649923b511SScott Long #endif 659923b511SScott Long #endif 66dba6c5a6SPeter Wemm 67d2ac2316SJake Burkholder CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS); 68d2ac2316SJake Burkholder 696220dcbaSRobert Watson /* 706220dcbaSRobert Watson * kern.sched.preemption allows user space to determine if preemption support 716220dcbaSRobert Watson * is compiled in or not. It is not currently a boot or runtime flag that 726220dcbaSRobert Watson * can be changed. 736220dcbaSRobert Watson */ 746220dcbaSRobert Watson #ifdef PREEMPTION 756220dcbaSRobert Watson static int kern_sched_preemption = 1; 766220dcbaSRobert Watson #else 776220dcbaSRobert Watson static int kern_sched_preemption = 0; 786220dcbaSRobert Watson #endif 796220dcbaSRobert Watson SYSCTL_INT(_kern_sched, OID_AUTO, preemption, CTLFLAG_RD, 806220dcbaSRobert Watson &kern_sched_preemption, 0, "Kernel preemption enabled"); 816220dcbaSRobert Watson 827b20fb19SJeff Roberson #ifdef SCHED_STATS 837b20fb19SJeff Roberson long switch_preempt; 847b20fb19SJeff Roberson long switch_owepreempt; 857b20fb19SJeff Roberson long switch_turnstile; 867b20fb19SJeff Roberson long switch_sleepq; 877b20fb19SJeff Roberson long switch_sleepqtimo; 887b20fb19SJeff Roberson long switch_relinquish; 897b20fb19SJeff Roberson long switch_needresched; 907b20fb19SJeff Roberson static SYSCTL_NODE(_kern_sched, OID_AUTO, stats, CTLFLAG_RW, 0, "switch stats"); 917b20fb19SJeff Roberson SYSCTL_INT(_kern_sched_stats, OID_AUTO, preempt, CTLFLAG_RD, &switch_preempt, 0, ""); 927b20fb19SJeff Roberson SYSCTL_INT(_kern_sched_stats, OID_AUTO, owepreempt, CTLFLAG_RD, &switch_owepreempt, 0, ""); 937b20fb19SJeff Roberson SYSCTL_INT(_kern_sched_stats, OID_AUTO, turnstile, CTLFLAG_RD, &switch_turnstile, 0, ""); 947b20fb19SJeff Roberson SYSCTL_INT(_kern_sched_stats, OID_AUTO, sleepq, CTLFLAG_RD, &switch_sleepq, 0, ""); 957b20fb19SJeff Roberson SYSCTL_INT(_kern_sched_stats, OID_AUTO, sleepqtimo, CTLFLAG_RD, &switch_sleepqtimo, 0, ""); 967b20fb19SJeff Roberson SYSCTL_INT(_kern_sched_stats, OID_AUTO, relinquish, CTLFLAG_RD, &switch_relinquish, 0, ""); 977b20fb19SJeff Roberson SYSCTL_INT(_kern_sched_stats, OID_AUTO, needresched, CTLFLAG_RD, &switch_needresched, 0, ""); 987b20fb19SJeff Roberson static int 997b20fb19SJeff Roberson sysctl_stats_reset(SYSCTL_HANDLER_ARGS) 1007b20fb19SJeff Roberson { 1017b20fb19SJeff Roberson int error; 1027b20fb19SJeff Roberson int val; 1037b20fb19SJeff Roberson 1047b20fb19SJeff Roberson val = 0; 1057b20fb19SJeff Roberson error = sysctl_handle_int(oidp, &val, 0, req); 1067b20fb19SJeff Roberson if (error != 0 || req->newptr == NULL) 1077b20fb19SJeff Roberson return (error); 1087b20fb19SJeff Roberson if (val == 0) 1097b20fb19SJeff Roberson return (0); 1107b20fb19SJeff Roberson switch_preempt = 0; 1117b20fb19SJeff Roberson switch_owepreempt = 0; 1127b20fb19SJeff Roberson switch_turnstile = 0; 1137b20fb19SJeff Roberson switch_sleepq = 0; 1147b20fb19SJeff Roberson switch_sleepqtimo = 0; 1157b20fb19SJeff Roberson switch_relinquish = 0; 1167b20fb19SJeff Roberson switch_needresched = 0; 1177b20fb19SJeff Roberson 1187b20fb19SJeff Roberson return (0); 1197b20fb19SJeff Roberson } 1207b20fb19SJeff Roberson 1217b20fb19SJeff Roberson SYSCTL_PROC(_kern_sched_stats, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_WR, NULL, 1227b20fb19SJeff Roberson 0, sysctl_stats_reset, "I", "Reset scheduler statistics"); 1237b20fb19SJeff Roberson #endif 1247b20fb19SJeff Roberson 125e602ba25SJulian Elischer /************************************************************************ 126e602ba25SJulian Elischer * Functions that manipulate runnability from a thread perspective. * 127e602ba25SJulian Elischer ************************************************************************/ 1288460a577SJohn Birrell /* 1298460a577SJohn Birrell * Select the thread that will be run next. 1308460a577SJohn Birrell */ 131b40ce416SJulian Elischer struct thread * 132b40ce416SJulian Elischer choosethread(void) 133dba6c5a6SPeter Wemm { 134e602ba25SJulian Elischer struct thread *td; 135e602ba25SJulian Elischer 136fe799533SAndrew Gallatin retry: 137f0393f06SJeff Roberson td = sched_choose(); 13893a7aa79SJulian Elischer 13993a7aa79SJulian Elischer /* 140faaa20f6SJulian Elischer * If we are in panic, only allow system threads, 141faaa20f6SJulian Elischer * plus the one we are running in, to be run. 14293a7aa79SJulian Elischer */ 143fe799533SAndrew Gallatin if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 && 144faaa20f6SJulian Elischer (td->td_flags & TDF_INPANIC) == 0)) { 145faaa20f6SJulian Elischer /* note that it is no longer on the run queue */ 146faaa20f6SJulian Elischer TD_SET_CAN_RUN(td); 147fe799533SAndrew Gallatin goto retry; 148faaa20f6SJulian Elischer } 14993a7aa79SJulian Elischer 15071fad9fdSJulian Elischer TD_SET_RUNNING(td); 151e602ba25SJulian Elischer return (td); 152e602ba25SJulian Elischer } 153e602ba25SJulian Elischer 1540c0b25aeSJohn Baldwin /* 1550c0b25aeSJohn Baldwin * Kernel thread preemption implementation. Critical sections mark 1560c0b25aeSJohn Baldwin * regions of code in which preemptions are not allowed. 1570c0b25aeSJohn Baldwin */ 1587e1f6dfeSJohn Baldwin void 1597e1f6dfeSJohn Baldwin critical_enter(void) 1607e1f6dfeSJohn Baldwin { 1617e1f6dfeSJohn Baldwin struct thread *td; 1627e1f6dfeSJohn Baldwin 1637e1f6dfeSJohn Baldwin td = curthread; 1647e1f6dfeSJohn Baldwin td->td_critnest++; 1651335c4dfSNate Lawson CTR4(KTR_CRITICAL, "critical_enter by thread %p (%ld, %s) to %d", td, 166431f8906SJulian Elischer (long)td->td_proc->p_pid, td->td_name, td->td_critnest); 1677e1f6dfeSJohn Baldwin } 1687e1f6dfeSJohn Baldwin 1697e1f6dfeSJohn Baldwin void 1707e1f6dfeSJohn Baldwin critical_exit(void) 1717e1f6dfeSJohn Baldwin { 1727e1f6dfeSJohn Baldwin struct thread *td; 1737e1f6dfeSJohn Baldwin 1747e1f6dfeSJohn Baldwin td = curthread; 175b209e5e3SJeff Roberson KASSERT(td->td_critnest != 0, 176b209e5e3SJeff Roberson ("critical_exit: td_critnest == 0")); 1775bce4ae3SJeff Roberson 178d13ec713SStephan Uphoff if (td->td_critnest == 1) { 179d13ec713SStephan Uphoff td->td_critnest = 0; 18077918643SStephan Uphoff if (td->td_owepreempt) { 18177918643SStephan Uphoff td->td_critnest = 1; 1827b20fb19SJeff Roberson thread_lock(td); 18377918643SStephan Uphoff td->td_critnest--; 1847b20fb19SJeff Roberson SCHED_STAT_INC(switch_owepreempt); 185413ea6f5SJeff Roberson mi_switch(SW_INVOL|SW_PREEMPT, NULL); 1867b20fb19SJeff Roberson thread_unlock(td); 1870c0b25aeSJohn Baldwin } 188d13ec713SStephan Uphoff } else 1897e1f6dfeSJohn Baldwin td->td_critnest--; 190d13ec713SStephan Uphoff 1911335c4dfSNate Lawson CTR4(KTR_CRITICAL, "critical_exit by thread %p (%ld, %s) to %d", td, 192431f8906SJulian Elischer (long)td->td_proc->p_pid, td->td_name, td->td_critnest); 193d74ac681SMatthew Dillon } 1947e1f6dfeSJohn Baldwin 1950c0b25aeSJohn Baldwin /* 1960c0b25aeSJohn Baldwin * This function is called when a thread is about to be put on run queue 1970c0b25aeSJohn Baldwin * because it has been made runnable or its priority has been adjusted. It 1980c0b25aeSJohn Baldwin * determines if the new thread should be immediately preempted to. If so, 1990c0b25aeSJohn Baldwin * it switches to it and eventually returns true. If not, it returns false 2000c0b25aeSJohn Baldwin * so that the caller may place the thread on an appropriate run queue. 2010c0b25aeSJohn Baldwin */ 2020c0b25aeSJohn Baldwin int 2030c0b25aeSJohn Baldwin maybe_preempt(struct thread *td) 2040c0b25aeSJohn Baldwin { 2058b44a2e2SMarcel Moolenaar #ifdef PREEMPTION 2060c0b25aeSJohn Baldwin struct thread *ctd; 2070c0b25aeSJohn Baldwin int cpri, pri; 2088b44a2e2SMarcel Moolenaar #endif 2090c0b25aeSJohn Baldwin 2100c0b25aeSJohn Baldwin #ifdef PREEMPTION 2110c0b25aeSJohn Baldwin /* 2120c0b25aeSJohn Baldwin * The new thread should not preempt the current thread if any of the 2130c0b25aeSJohn Baldwin * following conditions are true: 2140c0b25aeSJohn Baldwin * 215bc608306SRobert Watson * - The kernel is in the throes of crashing (panicstr). 21652eb8464SJohn Baldwin * - The current thread has a higher (numerically lower) or 21752eb8464SJohn Baldwin * equivalent priority. Note that this prevents curthread from 21852eb8464SJohn Baldwin * trying to preempt to itself. 2190c0b25aeSJohn Baldwin * - It is too early in the boot for context switches (cold is set). 2200c0b25aeSJohn Baldwin * - The current thread has an inhibitor set or is in the process of 2210c0b25aeSJohn Baldwin * exiting. In this case, the current thread is about to switch 2220c0b25aeSJohn Baldwin * out anyways, so there's no point in preempting. If we did, 2230c0b25aeSJohn Baldwin * the current thread would not be properly resumed as well, so 2240c0b25aeSJohn Baldwin * just avoid that whole landmine. 2250c0b25aeSJohn Baldwin * - If the new thread's priority is not a realtime priority and 2260c0b25aeSJohn Baldwin * the current thread's priority is not an idle priority and 2270c0b25aeSJohn Baldwin * FULL_PREEMPTION is disabled. 2280c0b25aeSJohn Baldwin * 2290c0b25aeSJohn Baldwin * If all of these conditions are false, but the current thread is in 2300c0b25aeSJohn Baldwin * a nested critical section, then we have to defer the preemption 2310c0b25aeSJohn Baldwin * until we exit the critical section. Otherwise, switch immediately 2320c0b25aeSJohn Baldwin * to the new thread. 2330c0b25aeSJohn Baldwin */ 2340c0b25aeSJohn Baldwin ctd = curthread; 2357b20fb19SJeff Roberson THREAD_LOCK_ASSERT(td, MA_OWNED); 236ad1e7d28SJulian Elischer KASSERT ((ctd->td_sched != NULL && ctd->td_sched->ts_thread == ctd), 2376a574b2aSJulian Elischer ("thread has no (or wrong) sched-private part.")); 238b2578c6cSJulian Elischer KASSERT((td->td_inhibitors == 0), 2392da78e38SRobert Watson ("maybe_preempt: trying to run inhibited thread")); 2400c0b25aeSJohn Baldwin pri = td->td_priority; 2410c0b25aeSJohn Baldwin cpri = ctd->td_priority; 242bc608306SRobert Watson if (panicstr != NULL || pri >= cpri || cold /* || dumping */ || 243f0393f06SJeff Roberson TD_IS_INHIBITED(ctd)) 2440c0b25aeSJohn Baldwin return (0); 2450c0b25aeSJohn Baldwin #ifndef FULL_PREEMPTION 2463ea6bbc5SStephan Uphoff if (pri > PRI_MAX_ITHD && cpri < PRI_MIN_IDLE) 2470c0b25aeSJohn Baldwin return (0); 2480c0b25aeSJohn Baldwin #endif 249a3f2d842SStephan Uphoff 2500c0b25aeSJohn Baldwin if (ctd->td_critnest > 1) { 2510c0b25aeSJohn Baldwin CTR1(KTR_PROC, "maybe_preempt: in critical section %d", 2520c0b25aeSJohn Baldwin ctd->td_critnest); 25377918643SStephan Uphoff ctd->td_owepreempt = 1; 2540c0b25aeSJohn Baldwin return (0); 2550c0b25aeSJohn Baldwin } 2560c0b25aeSJohn Baldwin /* 257c20c691bSJulian Elischer * Thread is runnable but not yet put on system run queue. 2580c0b25aeSJohn Baldwin */ 25956696bd1SJeff Roberson MPASS(ctd->td_lock == td->td_lock); 2600c0b25aeSJohn Baldwin MPASS(TD_ON_RUNQ(td)); 2610c0b25aeSJohn Baldwin TD_SET_RUNNING(td); 2620c0b25aeSJohn Baldwin CTR3(KTR_PROC, "preempting to thread %p (pid %d, %s)\n", td, 263431f8906SJulian Elischer td->td_proc->p_pid, td->td_name); 2647b20fb19SJeff Roberson SCHED_STAT_INC(switch_preempt); 265c20c691bSJulian Elischer mi_switch(SW_INVOL|SW_PREEMPT, td); 2667b20fb19SJeff Roberson /* 2677b20fb19SJeff Roberson * td's lock pointer may have changed. We have to return with it 2687b20fb19SJeff Roberson * locked. 2697b20fb19SJeff Roberson */ 2707b20fb19SJeff Roberson spinlock_enter(); 2717b20fb19SJeff Roberson thread_unlock(ctd); 2727b20fb19SJeff Roberson thread_lock(td); 2737b20fb19SJeff Roberson spinlock_exit(); 2740c0b25aeSJohn Baldwin return (1); 2750c0b25aeSJohn Baldwin #else 2760c0b25aeSJohn Baldwin return (0); 2770c0b25aeSJohn Baldwin #endif 2780c0b25aeSJohn Baldwin } 2790c0b25aeSJohn Baldwin 28044fe3c1fSJohn Baldwin #if 0 2810c0b25aeSJohn Baldwin #ifndef PREEMPTION 2820c0b25aeSJohn Baldwin /* XXX: There should be a non-static version of this. */ 2830c0b25aeSJohn Baldwin static void 2840c0b25aeSJohn Baldwin printf_caddr_t(void *data) 2850c0b25aeSJohn Baldwin { 2860c0b25aeSJohn Baldwin printf("%s", (char *)data); 2870c0b25aeSJohn Baldwin } 2880c0b25aeSJohn Baldwin static char preempt_warning[] = 2890c0b25aeSJohn Baldwin "WARNING: Kernel preemption is disabled, expect reduced performance.\n"; 2900c0b25aeSJohn Baldwin SYSINIT(preempt_warning, SI_SUB_COPYRIGHT, SI_ORDER_ANY, printf_caddr_t, 2910c0b25aeSJohn Baldwin preempt_warning) 2920c0b25aeSJohn Baldwin #endif 29344fe3c1fSJohn Baldwin #endif 294e602ba25SJulian Elischer 295e602ba25SJulian Elischer /************************************************************************ 296e602ba25SJulian Elischer * SYSTEM RUN QUEUE manipulations and tests * 297e602ba25SJulian Elischer ************************************************************************/ 298e602ba25SJulian Elischer /* 299e602ba25SJulian Elischer * Initialize a run structure. 300e602ba25SJulian Elischer */ 301e602ba25SJulian Elischer void 302e602ba25SJulian Elischer runq_init(struct runq *rq) 303e602ba25SJulian Elischer { 304e602ba25SJulian Elischer int i; 305e602ba25SJulian Elischer 306e602ba25SJulian Elischer bzero(rq, sizeof *rq); 307e602ba25SJulian Elischer for (i = 0; i < RQ_NQS; i++) 308e602ba25SJulian Elischer TAILQ_INIT(&rq->rq_queues[i]); 309e602ba25SJulian Elischer } 310e602ba25SJulian Elischer 311d5a08a60SJake Burkholder /* 312d5a08a60SJake Burkholder * Clear the status bit of the queue corresponding to priority level pri, 313d5a08a60SJake Burkholder * indicating that it is empty. 314d5a08a60SJake Burkholder */ 315d5a08a60SJake Burkholder static __inline void 316d5a08a60SJake Burkholder runq_clrbit(struct runq *rq, int pri) 317d5a08a60SJake Burkholder { 318d5a08a60SJake Burkholder struct rqbits *rqb; 319d5a08a60SJake Burkholder 320d5a08a60SJake Burkholder rqb = &rq->rq_status; 321d5a08a60SJake Burkholder CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d", 322d5a08a60SJake Burkholder rqb->rqb_bits[RQB_WORD(pri)], 323d5a08a60SJake Burkholder rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri), 324d5a08a60SJake Burkholder RQB_BIT(pri), RQB_WORD(pri)); 325d5a08a60SJake Burkholder rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri); 326d5a08a60SJake Burkholder } 327d5a08a60SJake Burkholder 328d5a08a60SJake Burkholder /* 329d5a08a60SJake Burkholder * Find the index of the first non-empty run queue. This is done by 330d5a08a60SJake Burkholder * scanning the status bits, a set bit indicates a non-empty queue. 331d5a08a60SJake Burkholder */ 332d5a08a60SJake Burkholder static __inline int 333d5a08a60SJake Burkholder runq_findbit(struct runq *rq) 334d5a08a60SJake Burkholder { 335d5a08a60SJake Burkholder struct rqbits *rqb; 336d5a08a60SJake Burkholder int pri; 337d5a08a60SJake Burkholder int i; 338d5a08a60SJake Burkholder 339d5a08a60SJake Burkholder rqb = &rq->rq_status; 340d5a08a60SJake Burkholder for (i = 0; i < RQB_LEN; i++) 341d5a08a60SJake Burkholder if (rqb->rqb_bits[i]) { 3422f9267ecSPeter Wemm pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW); 343d5a08a60SJake Burkholder CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d", 344d5a08a60SJake Burkholder rqb->rqb_bits[i], i, pri); 345d5a08a60SJake Burkholder return (pri); 346d5a08a60SJake Burkholder } 347d5a08a60SJake Burkholder 348d5a08a60SJake Burkholder return (-1); 349d5a08a60SJake Burkholder } 350d5a08a60SJake Burkholder 3513fed7d23SJeff Roberson static __inline int 35267e20930SJeff Roberson runq_findbit_from(struct runq *rq, u_char pri) 3533fed7d23SJeff Roberson { 3543fed7d23SJeff Roberson struct rqbits *rqb; 35567e20930SJeff Roberson rqb_word_t mask; 3563fed7d23SJeff Roberson int i; 3573fed7d23SJeff Roberson 35867e20930SJeff Roberson /* 35967e20930SJeff Roberson * Set the mask for the first word so we ignore priorities before 'pri'. 36067e20930SJeff Roberson */ 36167e20930SJeff Roberson mask = (rqb_word_t)-1 << (pri & (RQB_BPW - 1)); 3623fed7d23SJeff Roberson rqb = &rq->rq_status; 3633fed7d23SJeff Roberson again: 36467e20930SJeff Roberson for (i = RQB_WORD(pri); i < RQB_LEN; mask = -1, i++) { 36567e20930SJeff Roberson mask = rqb->rqb_bits[i] & mask; 36667e20930SJeff Roberson if (mask == 0) 3673fed7d23SJeff Roberson continue; 36867e20930SJeff Roberson pri = RQB_FFS(mask) + (i << RQB_L2BPW); 3693fed7d23SJeff Roberson CTR3(KTR_RUNQ, "runq_findbit_from: bits=%#x i=%d pri=%d", 37067e20930SJeff Roberson mask, i, pri); 3713fed7d23SJeff Roberson return (pri); 3723fed7d23SJeff Roberson } 37367e20930SJeff Roberson if (pri == 0) 3743fed7d23SJeff Roberson return (-1); 37567e20930SJeff Roberson /* 37667e20930SJeff Roberson * Wrap back around to the beginning of the list just once so we 37767e20930SJeff Roberson * scan the whole thing. 37867e20930SJeff Roberson */ 37967e20930SJeff Roberson pri = 0; 38067e20930SJeff Roberson goto again; 3813fed7d23SJeff Roberson } 3823fed7d23SJeff Roberson 383d5a08a60SJake Burkholder /* 384d5a08a60SJake Burkholder * Set the status bit of the queue corresponding to priority level pri, 385d5a08a60SJake Burkholder * indicating that it is non-empty. 386d5a08a60SJake Burkholder */ 387d5a08a60SJake Burkholder static __inline void 388d5a08a60SJake Burkholder runq_setbit(struct runq *rq, int pri) 389d5a08a60SJake Burkholder { 390d5a08a60SJake Burkholder struct rqbits *rqb; 391d5a08a60SJake Burkholder 392d5a08a60SJake Burkholder rqb = &rq->rq_status; 393d5a08a60SJake Burkholder CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d", 394d5a08a60SJake Burkholder rqb->rqb_bits[RQB_WORD(pri)], 395d5a08a60SJake Burkholder rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri), 396d5a08a60SJake Burkholder RQB_BIT(pri), RQB_WORD(pri)); 397d5a08a60SJake Burkholder rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri); 398d5a08a60SJake Burkholder } 399d5a08a60SJake Burkholder 400d5a08a60SJake Burkholder /* 401ad1e7d28SJulian Elischer * Add the thread to the queue specified by its priority, and set the 402d5a08a60SJake Burkholder * corresponding status bit. 403d5a08a60SJake Burkholder */ 404d5a08a60SJake Burkholder void 405ad1e7d28SJulian Elischer runq_add(struct runq *rq, struct td_sched *ts, int flags) 406d5a08a60SJake Burkholder { 407d5a08a60SJake Burkholder struct rqhead *rqh; 408d5a08a60SJake Burkholder int pri; 409dba6c5a6SPeter Wemm 410ad1e7d28SJulian Elischer pri = ts->ts_thread->td_priority / RQ_PPQ; 411ad1e7d28SJulian Elischer ts->ts_rqindex = pri; 412d5a08a60SJake Burkholder runq_setbit(rq, pri); 413d5a08a60SJake Burkholder rqh = &rq->rq_queues[pri]; 414ad1e7d28SJulian Elischer CTR5(KTR_RUNQ, "runq_add: td=%p ts=%p pri=%d %d rqh=%p", 415ad1e7d28SJulian Elischer ts->ts_thread, ts, ts->ts_thread->td_priority, pri, rqh); 416c20c691bSJulian Elischer if (flags & SRQ_PREEMPTED) { 417ad1e7d28SJulian Elischer TAILQ_INSERT_HEAD(rqh, ts, ts_procq); 418c20c691bSJulian Elischer } else { 419ad1e7d28SJulian Elischer TAILQ_INSERT_TAIL(rqh, ts, ts_procq); 420dba6c5a6SPeter Wemm } 421c20c691bSJulian Elischer } 422d5a08a60SJake Burkholder 4233fed7d23SJeff Roberson void 424ed0e8f2fSJeff Roberson runq_add_pri(struct runq *rq, struct td_sched *ts, u_char pri, int flags) 4253fed7d23SJeff Roberson { 4263fed7d23SJeff Roberson struct rqhead *rqh; 4273fed7d23SJeff Roberson 4283fed7d23SJeff Roberson KASSERT(pri < RQ_NQS, ("runq_add_pri: %d out of range", pri)); 4293fed7d23SJeff Roberson ts->ts_rqindex = pri; 4303fed7d23SJeff Roberson runq_setbit(rq, pri); 4313fed7d23SJeff Roberson rqh = &rq->rq_queues[pri]; 4323fed7d23SJeff Roberson CTR5(KTR_RUNQ, "runq_add_pri: td=%p ke=%p pri=%d idx=%d rqh=%p", 4333fed7d23SJeff Roberson ts->ts_thread, ts, ts->ts_thread->td_priority, pri, rqh); 4343fed7d23SJeff Roberson if (flags & SRQ_PREEMPTED) { 4353fed7d23SJeff Roberson TAILQ_INSERT_HEAD(rqh, ts, ts_procq); 4363fed7d23SJeff Roberson } else { 4373fed7d23SJeff Roberson TAILQ_INSERT_TAIL(rqh, ts, ts_procq); 4383fed7d23SJeff Roberson } 4393fed7d23SJeff Roberson } 440d5a08a60SJake Burkholder /* 441d5a08a60SJake Burkholder * Return true if there are runnable processes of any priority on the run 442d5a08a60SJake Burkholder * queue, false otherwise. Has no side effects, does not modify the run 443d5a08a60SJake Burkholder * queue structure. 444d5a08a60SJake Burkholder */ 445d5a08a60SJake Burkholder int 446d5a08a60SJake Burkholder runq_check(struct runq *rq) 447d5a08a60SJake Burkholder { 448d5a08a60SJake Burkholder struct rqbits *rqb; 449d5a08a60SJake Burkholder int i; 450d5a08a60SJake Burkholder 451d5a08a60SJake Burkholder rqb = &rq->rq_status; 452d5a08a60SJake Burkholder for (i = 0; i < RQB_LEN; i++) 453d5a08a60SJake Burkholder if (rqb->rqb_bits[i]) { 454d5a08a60SJake Burkholder CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d", 455d5a08a60SJake Burkholder rqb->rqb_bits[i], i); 456d5a08a60SJake Burkholder return (1); 457dba6c5a6SPeter Wemm } 458d5a08a60SJake Burkholder CTR0(KTR_RUNQ, "runq_check: empty"); 459d5a08a60SJake Burkholder 460d5a08a60SJake Burkholder return (0); 461dba6c5a6SPeter Wemm } 462d5a08a60SJake Burkholder 4636804a3abSJulian Elischer #if defined(SMP) && defined(SCHED_4BSD) 4646804a3abSJulian Elischer int runq_fuzz = 1; 4656804a3abSJulian Elischer SYSCTL_INT(_kern_sched, OID_AUTO, runq_fuzz, CTLFLAG_RW, &runq_fuzz, 0, ""); 4666804a3abSJulian Elischer #endif 4676804a3abSJulian Elischer 468d5a08a60SJake Burkholder /* 469b43179fbSJeff Roberson * Find the highest priority process on the run queue. 470d5a08a60SJake Burkholder */ 471ad1e7d28SJulian Elischer struct td_sched * 472d5a08a60SJake Burkholder runq_choose(struct runq *rq) 473d5a08a60SJake Burkholder { 474d5a08a60SJake Burkholder struct rqhead *rqh; 475ad1e7d28SJulian Elischer struct td_sched *ts; 476d5a08a60SJake Burkholder int pri; 477d5a08a60SJake Burkholder 478e602ba25SJulian Elischer while ((pri = runq_findbit(rq)) != -1) { 479d5a08a60SJake Burkholder rqh = &rq->rq_queues[pri]; 4806804a3abSJulian Elischer #if defined(SMP) && defined(SCHED_4BSD) 4816804a3abSJulian Elischer /* fuzz == 1 is normal.. 0 or less are ignored */ 4826804a3abSJulian Elischer if (runq_fuzz > 1) { 4836804a3abSJulian Elischer /* 4846804a3abSJulian Elischer * In the first couple of entries, check if 4856804a3abSJulian Elischer * there is one for our CPU as a preference. 4866804a3abSJulian Elischer */ 4876804a3abSJulian Elischer int count = runq_fuzz; 4886804a3abSJulian Elischer int cpu = PCPU_GET(cpuid); 489ad1e7d28SJulian Elischer struct td_sched *ts2; 490ad1e7d28SJulian Elischer ts2 = ts = TAILQ_FIRST(rqh); 4916804a3abSJulian Elischer 492ad1e7d28SJulian Elischer while (count-- && ts2) { 493ad1e7d28SJulian Elischer if (ts->ts_thread->td_lastcpu == cpu) { 494ad1e7d28SJulian Elischer ts = ts2; 4956804a3abSJulian Elischer break; 4966804a3abSJulian Elischer } 497ad1e7d28SJulian Elischer ts2 = TAILQ_NEXT(ts2, ts_procq); 4986804a3abSJulian Elischer } 4996804a3abSJulian Elischer } else 5006804a3abSJulian Elischer #endif 501ad1e7d28SJulian Elischer ts = TAILQ_FIRST(rqh); 502ad1e7d28SJulian Elischer KASSERT(ts != NULL, ("runq_choose: no proc on busy queue")); 503e602ba25SJulian Elischer CTR3(KTR_RUNQ, 504ad1e7d28SJulian Elischer "runq_choose: pri=%d td_sched=%p rqh=%p", pri, ts, rqh); 505ad1e7d28SJulian Elischer return (ts); 506d5a08a60SJake Burkholder } 507d5a08a60SJake Burkholder CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri); 508d5a08a60SJake Burkholder 509e602ba25SJulian Elischer return (NULL); 510d5a08a60SJake Burkholder } 511d5a08a60SJake Burkholder 5123fed7d23SJeff Roberson struct td_sched * 513ed0e8f2fSJeff Roberson runq_choose_from(struct runq *rq, u_char idx) 5143fed7d23SJeff Roberson { 5153fed7d23SJeff Roberson struct rqhead *rqh; 5163fed7d23SJeff Roberson struct td_sched *ts; 5173fed7d23SJeff Roberson int pri; 5183fed7d23SJeff Roberson 519cd49bb70SJeff Roberson if ((pri = runq_findbit_from(rq, idx)) != -1) { 5203fed7d23SJeff Roberson rqh = &rq->rq_queues[pri]; 5213fed7d23SJeff Roberson ts = TAILQ_FIRST(rqh); 5223fed7d23SJeff Roberson KASSERT(ts != NULL, ("runq_choose: no proc on busy queue")); 5233fed7d23SJeff Roberson CTR4(KTR_RUNQ, 5243fed7d23SJeff Roberson "runq_choose_from: pri=%d kse=%p idx=%d rqh=%p", 5253fed7d23SJeff Roberson pri, ts, ts->ts_rqindex, rqh); 5263fed7d23SJeff Roberson return (ts); 5273fed7d23SJeff Roberson } 5283fed7d23SJeff Roberson CTR1(KTR_RUNQ, "runq_choose_from: idleproc pri=%d", pri); 5293fed7d23SJeff Roberson 5303fed7d23SJeff Roberson return (NULL); 5313fed7d23SJeff Roberson } 532d5a08a60SJake Burkholder /* 533ad1e7d28SJulian Elischer * Remove the thread from the queue specified by its priority, and clear the 534d5a08a60SJake Burkholder * corresponding status bit if the queue becomes empty. 535f0393f06SJeff Roberson * Caller must set state afterwards. 536d5a08a60SJake Burkholder */ 537d5a08a60SJake Burkholder void 538ad1e7d28SJulian Elischer runq_remove(struct runq *rq, struct td_sched *ts) 539d5a08a60SJake Burkholder { 5403fed7d23SJeff Roberson 5413fed7d23SJeff Roberson runq_remove_idx(rq, ts, NULL); 5423fed7d23SJeff Roberson } 5433fed7d23SJeff Roberson 5443fed7d23SJeff Roberson void 545ed0e8f2fSJeff Roberson runq_remove_idx(struct runq *rq, struct td_sched *ts, u_char *idx) 5463fed7d23SJeff Roberson { 547d5a08a60SJake Burkholder struct rqhead *rqh; 548ed0e8f2fSJeff Roberson u_char pri; 549d5a08a60SJake Burkholder 550b61ce5b0SJeff Roberson KASSERT(ts->ts_thread->td_flags & TDF_INMEM, 551b61ce5b0SJeff Roberson ("runq_remove_idx: thread swapped out")); 552ad1e7d28SJulian Elischer pri = ts->ts_rqindex; 5537b20fb19SJeff Roberson KASSERT(pri < RQ_NQS, ("runq_remove_idx: Invalid index %d\n", pri)); 554d5a08a60SJake Burkholder rqh = &rq->rq_queues[pri]; 5553fed7d23SJeff Roberson CTR5(KTR_RUNQ, "runq_remove_idx: td=%p, ts=%p pri=%d %d rqh=%p", 556ad1e7d28SJulian Elischer ts->ts_thread, ts, ts->ts_thread->td_priority, pri, rqh); 5577b20fb19SJeff Roberson { 5587b20fb19SJeff Roberson struct td_sched *nts; 5597b20fb19SJeff Roberson 5607b20fb19SJeff Roberson TAILQ_FOREACH(nts, rqh, ts_procq) 5617b20fb19SJeff Roberson if (nts == ts) 5627b20fb19SJeff Roberson break; 5637b20fb19SJeff Roberson if (ts != nts) 5647b20fb19SJeff Roberson panic("runq_remove_idx: ts %p not on rqindex %d", 5657b20fb19SJeff Roberson ts, pri); 5667b20fb19SJeff Roberson } 567ad1e7d28SJulian Elischer TAILQ_REMOVE(rqh, ts, ts_procq); 568d5a08a60SJake Burkholder if (TAILQ_EMPTY(rqh)) { 5693fed7d23SJeff Roberson CTR0(KTR_RUNQ, "runq_remove_idx: empty"); 570d5a08a60SJake Burkholder runq_clrbit(rq, pri); 5713fed7d23SJeff Roberson if (idx != NULL && *idx == pri) 5723fed7d23SJeff Roberson *idx = (pri + 1) % RQ_NQS; 573d5a08a60SJake Burkholder } 574dba6c5a6SPeter Wemm } 575e602ba25SJulian Elischer 576ed062c8dSJulian Elischer /****** functions that are temporarily here ***********/ 577ed062c8dSJulian Elischer #include <vm/uma.h> 578ed062c8dSJulian Elischer 579ed062c8dSJulian Elischer /* 580ed062c8dSJulian Elischer * Allocate scheduler specific per-process resources. 581ad1e7d28SJulian Elischer * The thread and proc have already been linked in. 582ed062c8dSJulian Elischer * 583ed062c8dSJulian Elischer * Called from: 584ed062c8dSJulian Elischer * proc_init() (UMA init method) 585ed062c8dSJulian Elischer */ 586ed062c8dSJulian Elischer void 587ad1e7d28SJulian Elischer sched_newproc(struct proc *p, struct thread *td) 588ed062c8dSJulian Elischer { 589ed062c8dSJulian Elischer } 590ed062c8dSJulian Elischer 591ed062c8dSJulian Elischer /* 592ed062c8dSJulian Elischer * thread is being either created or recycled. 593ed062c8dSJulian Elischer * Fix up the per-scheduler resources associated with it. 594ed062c8dSJulian Elischer * Called from: 595ed062c8dSJulian Elischer * sched_fork_thread() 596ed062c8dSJulian Elischer * thread_dtor() (*may go away) 597ed062c8dSJulian Elischer * thread_init() (*may go away) 598ed062c8dSJulian Elischer */ 599ed062c8dSJulian Elischer void 600ed062c8dSJulian Elischer sched_newthread(struct thread *td) 601ed062c8dSJulian Elischer { 602ad1e7d28SJulian Elischer struct td_sched *ts; 603ed062c8dSJulian Elischer 604ad1e7d28SJulian Elischer ts = (struct td_sched *) (td + 1); 605ad1e7d28SJulian Elischer bzero(ts, sizeof(*ts)); 606ad1e7d28SJulian Elischer td->td_sched = ts; 607ad1e7d28SJulian Elischer ts->ts_thread = td; 608ed062c8dSJulian Elischer } 609ed062c8dSJulian Elischer 610ed062c8dSJulian Elischer #endif /* KERN_SWITCH_INCLUDE */ 611