19454b2d8SWarner Losh /*- 2d5a08a60SJake Burkholder * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org> 3d5a08a60SJake Burkholder * All rights reserved. 4dba6c5a6SPeter Wemm * 5dba6c5a6SPeter Wemm * Redistribution and use in source and binary forms, with or without 6dba6c5a6SPeter Wemm * modification, are permitted provided that the following conditions 7dba6c5a6SPeter Wemm * are met: 8dba6c5a6SPeter Wemm * 1. Redistributions of source code must retain the above copyright 9dba6c5a6SPeter Wemm * notice, this list of conditions and the following disclaimer. 10dba6c5a6SPeter Wemm * 2. Redistributions in binary form must reproduce the above copyright 11dba6c5a6SPeter Wemm * notice, this list of conditions and the following disclaimer in the 12dba6c5a6SPeter Wemm * documentation and/or other materials provided with the distribution. 13dba6c5a6SPeter Wemm * 14dba6c5a6SPeter Wemm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15dba6c5a6SPeter Wemm * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16dba6c5a6SPeter Wemm * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17dba6c5a6SPeter Wemm * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18dba6c5a6SPeter Wemm * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19dba6c5a6SPeter Wemm * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20dba6c5a6SPeter Wemm * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21dba6c5a6SPeter Wemm * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22dba6c5a6SPeter Wemm * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23dba6c5a6SPeter Wemm * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24dba6c5a6SPeter Wemm * SUCH DAMAGE. 25dba6c5a6SPeter Wemm */ 26dba6c5a6SPeter Wemm 27e602ba25SJulian Elischer 28677b542eSDavid E. O'Brien #include <sys/cdefs.h> 29677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 30e602ba25SJulian Elischer 316804a3abSJulian Elischer #include "opt_sched.h" 320c0b25aeSJohn Baldwin 33ed062c8dSJulian Elischer #ifndef KERN_SWITCH_INCLUDE 34dba6c5a6SPeter Wemm #include <sys/param.h> 35dba6c5a6SPeter Wemm #include <sys/systm.h> 362d50560aSMarcel Moolenaar #include <sys/kdb.h> 37dba6c5a6SPeter Wemm #include <sys/kernel.h> 380384fff8SJason Evans #include <sys/ktr.h> 39f34fa851SJohn Baldwin #include <sys/lock.h> 4035e0e5b3SJohn Baldwin #include <sys/mutex.h> 41dba6c5a6SPeter Wemm #include <sys/proc.h> 42dba6c5a6SPeter Wemm #include <sys/queue.h> 43b43179fbSJeff Roberson #include <sys/sched.h> 44ed062c8dSJulian Elischer #else /* KERN_SWITCH_INCLUDE */ 450d2a2989SPeter Wemm #if defined(SMP) && (defined(__i386__) || defined(__amd64__)) 46cc66ebe2SPeter Wemm #include <sys/smp.h> 47cc66ebe2SPeter Wemm #endif 486804a3abSJulian Elischer #if defined(SMP) && defined(SCHED_4BSD) 496804a3abSJulian Elischer #include <sys/sysctl.h> 506804a3abSJulian Elischer #endif 516804a3abSJulian Elischer 521335c4dfSNate Lawson /* Uncomment this to enable logging of critical_enter/exit. */ 531335c4dfSNate Lawson #if 0 541335c4dfSNate Lawson #define KTR_CRITICAL KTR_SCHED 551335c4dfSNate Lawson #else 561335c4dfSNate Lawson #define KTR_CRITICAL 0 571335c4dfSNate Lawson #endif 581335c4dfSNate Lawson 599923b511SScott Long #ifdef FULL_PREEMPTION 609923b511SScott Long #ifndef PREEMPTION 619923b511SScott Long #error "The FULL_PREEMPTION option requires the PREEMPTION option" 629923b511SScott Long #endif 639923b511SScott Long #endif 64dba6c5a6SPeter Wemm 65d2ac2316SJake Burkholder CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS); 66d2ac2316SJake Burkholder 676220dcbaSRobert Watson /* 686220dcbaSRobert Watson * kern.sched.preemption allows user space to determine if preemption support 696220dcbaSRobert Watson * is compiled in or not. It is not currently a boot or runtime flag that 706220dcbaSRobert Watson * can be changed. 716220dcbaSRobert Watson */ 726220dcbaSRobert Watson #ifdef PREEMPTION 736220dcbaSRobert Watson static int kern_sched_preemption = 1; 746220dcbaSRobert Watson #else 756220dcbaSRobert Watson static int kern_sched_preemption = 0; 766220dcbaSRobert Watson #endif 776220dcbaSRobert Watson SYSCTL_INT(_kern_sched, OID_AUTO, preemption, CTLFLAG_RD, 786220dcbaSRobert Watson &kern_sched_preemption, 0, "Kernel preemption enabled"); 796220dcbaSRobert Watson 80e602ba25SJulian Elischer /************************************************************************ 81e602ba25SJulian Elischer * Functions that manipulate runnability from a thread perspective. * 82e602ba25SJulian Elischer ************************************************************************/ 838460a577SJohn Birrell /* 848460a577SJohn Birrell * Select the thread that will be run next. 858460a577SJohn Birrell */ 86b40ce416SJulian Elischer struct thread * 87b40ce416SJulian Elischer choosethread(void) 88dba6c5a6SPeter Wemm { 89e602ba25SJulian Elischer struct thread *td; 90e602ba25SJulian Elischer 910d2a2989SPeter Wemm #if defined(SMP) && (defined(__i386__) || defined(__amd64__)) 92cc66ebe2SPeter Wemm if (smp_active == 0 && PCPU_GET(cpuid) != 0) { 93cc66ebe2SPeter Wemm /* Shutting down, run idlethread on AP's */ 94cc66ebe2SPeter Wemm td = PCPU_GET(idlethread); 95cc66ebe2SPeter Wemm CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td); 96cc66ebe2SPeter Wemm TD_SET_RUNNING(td); 97cc66ebe2SPeter Wemm return (td); 98cc66ebe2SPeter Wemm } 99cc66ebe2SPeter Wemm #endif 100cc66ebe2SPeter Wemm 101fe799533SAndrew Gallatin retry: 102f0393f06SJeff Roberson td = sched_choose(); 10393a7aa79SJulian Elischer 10493a7aa79SJulian Elischer /* 105faaa20f6SJulian Elischer * If we are in panic, only allow system threads, 106faaa20f6SJulian Elischer * plus the one we are running in, to be run. 10793a7aa79SJulian Elischer */ 108fe799533SAndrew Gallatin if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 && 109faaa20f6SJulian Elischer (td->td_flags & TDF_INPANIC) == 0)) { 110faaa20f6SJulian Elischer /* note that it is no longer on the run queue */ 111faaa20f6SJulian Elischer TD_SET_CAN_RUN(td); 112fe799533SAndrew Gallatin goto retry; 113faaa20f6SJulian Elischer } 11493a7aa79SJulian Elischer 11571fad9fdSJulian Elischer TD_SET_RUNNING(td); 116e602ba25SJulian Elischer return (td); 117e602ba25SJulian Elischer } 118e602ba25SJulian Elischer 1190c0b25aeSJohn Baldwin /* 1200c0b25aeSJohn Baldwin * Kernel thread preemption implementation. Critical sections mark 1210c0b25aeSJohn Baldwin * regions of code in which preemptions are not allowed. 1220c0b25aeSJohn Baldwin */ 1237e1f6dfeSJohn Baldwin void 1247e1f6dfeSJohn Baldwin critical_enter(void) 1257e1f6dfeSJohn Baldwin { 1267e1f6dfeSJohn Baldwin struct thread *td; 1277e1f6dfeSJohn Baldwin 1287e1f6dfeSJohn Baldwin td = curthread; 1297e1f6dfeSJohn Baldwin td->td_critnest++; 1301335c4dfSNate Lawson CTR4(KTR_CRITICAL, "critical_enter by thread %p (%ld, %s) to %d", td, 131f42a43faSRobert Watson (long)td->td_proc->p_pid, td->td_proc->p_comm, td->td_critnest); 1327e1f6dfeSJohn Baldwin } 1337e1f6dfeSJohn Baldwin 1347e1f6dfeSJohn Baldwin void 1357e1f6dfeSJohn Baldwin critical_exit(void) 1367e1f6dfeSJohn Baldwin { 1377e1f6dfeSJohn Baldwin struct thread *td; 1387e1f6dfeSJohn Baldwin 1397e1f6dfeSJohn Baldwin td = curthread; 140b209e5e3SJeff Roberson KASSERT(td->td_critnest != 0, 141b209e5e3SJeff Roberson ("critical_exit: td_critnest == 0")); 1420c0b25aeSJohn Baldwin #ifdef PREEMPTION 143d13ec713SStephan Uphoff if (td->td_critnest == 1) { 144d13ec713SStephan Uphoff td->td_critnest = 0; 14552eb8464SJohn Baldwin mtx_assert(&sched_lock, MA_NOTOWNED); 14677918643SStephan Uphoff if (td->td_owepreempt) { 14777918643SStephan Uphoff td->td_critnest = 1; 1480c0b25aeSJohn Baldwin mtx_lock_spin(&sched_lock); 14977918643SStephan Uphoff td->td_critnest--; 1500c0b25aeSJohn Baldwin mi_switch(SW_INVOL, NULL); 1510c0b25aeSJohn Baldwin mtx_unlock_spin(&sched_lock); 1520c0b25aeSJohn Baldwin } 153d13ec713SStephan Uphoff } else 1540c0b25aeSJohn Baldwin #endif 1557e1f6dfeSJohn Baldwin td->td_critnest--; 156d13ec713SStephan Uphoff 1571335c4dfSNate Lawson CTR4(KTR_CRITICAL, "critical_exit by thread %p (%ld, %s) to %d", td, 158f42a43faSRobert Watson (long)td->td_proc->p_pid, td->td_proc->p_comm, td->td_critnest); 159d74ac681SMatthew Dillon } 1607e1f6dfeSJohn Baldwin 1610c0b25aeSJohn Baldwin /* 1620c0b25aeSJohn Baldwin * This function is called when a thread is about to be put on run queue 1630c0b25aeSJohn Baldwin * because it has been made runnable or its priority has been adjusted. It 1640c0b25aeSJohn Baldwin * determines if the new thread should be immediately preempted to. If so, 1650c0b25aeSJohn Baldwin * it switches to it and eventually returns true. If not, it returns false 1660c0b25aeSJohn Baldwin * so that the caller may place the thread on an appropriate run queue. 1670c0b25aeSJohn Baldwin */ 1680c0b25aeSJohn Baldwin int 1690c0b25aeSJohn Baldwin maybe_preempt(struct thread *td) 1700c0b25aeSJohn Baldwin { 1718b44a2e2SMarcel Moolenaar #ifdef PREEMPTION 1720c0b25aeSJohn Baldwin struct thread *ctd; 1730c0b25aeSJohn Baldwin int cpri, pri; 1748b44a2e2SMarcel Moolenaar #endif 1750c0b25aeSJohn Baldwin 1760c0b25aeSJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 1770c0b25aeSJohn Baldwin #ifdef PREEMPTION 1780c0b25aeSJohn Baldwin /* 1790c0b25aeSJohn Baldwin * The new thread should not preempt the current thread if any of the 1800c0b25aeSJohn Baldwin * following conditions are true: 1810c0b25aeSJohn Baldwin * 182bc608306SRobert Watson * - The kernel is in the throes of crashing (panicstr). 18352eb8464SJohn Baldwin * - The current thread has a higher (numerically lower) or 18452eb8464SJohn Baldwin * equivalent priority. Note that this prevents curthread from 18552eb8464SJohn Baldwin * trying to preempt to itself. 1860c0b25aeSJohn Baldwin * - It is too early in the boot for context switches (cold is set). 1870c0b25aeSJohn Baldwin * - The current thread has an inhibitor set or is in the process of 1880c0b25aeSJohn Baldwin * exiting. In this case, the current thread is about to switch 1890c0b25aeSJohn Baldwin * out anyways, so there's no point in preempting. If we did, 1900c0b25aeSJohn Baldwin * the current thread would not be properly resumed as well, so 1910c0b25aeSJohn Baldwin * just avoid that whole landmine. 1920c0b25aeSJohn Baldwin * - If the new thread's priority is not a realtime priority and 1930c0b25aeSJohn Baldwin * the current thread's priority is not an idle priority and 1940c0b25aeSJohn Baldwin * FULL_PREEMPTION is disabled. 1950c0b25aeSJohn Baldwin * 1960c0b25aeSJohn Baldwin * If all of these conditions are false, but the current thread is in 1970c0b25aeSJohn Baldwin * a nested critical section, then we have to defer the preemption 1980c0b25aeSJohn Baldwin * until we exit the critical section. Otherwise, switch immediately 1990c0b25aeSJohn Baldwin * to the new thread. 2000c0b25aeSJohn Baldwin */ 2010c0b25aeSJohn Baldwin ctd = curthread; 202ad1e7d28SJulian Elischer KASSERT ((ctd->td_sched != NULL && ctd->td_sched->ts_thread == ctd), 2036a574b2aSJulian Elischer ("thread has no (or wrong) sched-private part.")); 204b2578c6cSJulian Elischer KASSERT((td->td_inhibitors == 0), 2052da78e38SRobert Watson ("maybe_preempt: trying to run inhibited thread")); 2060c0b25aeSJohn Baldwin pri = td->td_priority; 2070c0b25aeSJohn Baldwin cpri = ctd->td_priority; 208bc608306SRobert Watson if (panicstr != NULL || pri >= cpri || cold /* || dumping */ || 209f0393f06SJeff Roberson TD_IS_INHIBITED(ctd)) 2100c0b25aeSJohn Baldwin return (0); 2110c0b25aeSJohn Baldwin #ifndef FULL_PREEMPTION 2123ea6bbc5SStephan Uphoff if (pri > PRI_MAX_ITHD && cpri < PRI_MIN_IDLE) 2130c0b25aeSJohn Baldwin return (0); 2140c0b25aeSJohn Baldwin #endif 215a3f2d842SStephan Uphoff 2160c0b25aeSJohn Baldwin if (ctd->td_critnest > 1) { 2170c0b25aeSJohn Baldwin CTR1(KTR_PROC, "maybe_preempt: in critical section %d", 2180c0b25aeSJohn Baldwin ctd->td_critnest); 21977918643SStephan Uphoff ctd->td_owepreempt = 1; 2200c0b25aeSJohn Baldwin return (0); 2210c0b25aeSJohn Baldwin } 2220c0b25aeSJohn Baldwin 2230c0b25aeSJohn Baldwin /* 224c20c691bSJulian Elischer * Thread is runnable but not yet put on system run queue. 2250c0b25aeSJohn Baldwin */ 2260c0b25aeSJohn Baldwin MPASS(TD_ON_RUNQ(td)); 2270c0b25aeSJohn Baldwin TD_SET_RUNNING(td); 2280c0b25aeSJohn Baldwin CTR3(KTR_PROC, "preempting to thread %p (pid %d, %s)\n", td, 2290c0b25aeSJohn Baldwin td->td_proc->p_pid, td->td_proc->p_comm); 230c20c691bSJulian Elischer mi_switch(SW_INVOL|SW_PREEMPT, td); 2310c0b25aeSJohn Baldwin return (1); 2320c0b25aeSJohn Baldwin #else 2330c0b25aeSJohn Baldwin return (0); 2340c0b25aeSJohn Baldwin #endif 2350c0b25aeSJohn Baldwin } 2360c0b25aeSJohn Baldwin 23744fe3c1fSJohn Baldwin #if 0 2380c0b25aeSJohn Baldwin #ifndef PREEMPTION 2390c0b25aeSJohn Baldwin /* XXX: There should be a non-static version of this. */ 2400c0b25aeSJohn Baldwin static void 2410c0b25aeSJohn Baldwin printf_caddr_t(void *data) 2420c0b25aeSJohn Baldwin { 2430c0b25aeSJohn Baldwin printf("%s", (char *)data); 2440c0b25aeSJohn Baldwin } 2450c0b25aeSJohn Baldwin static char preempt_warning[] = 2460c0b25aeSJohn Baldwin "WARNING: Kernel preemption is disabled, expect reduced performance.\n"; 2470c0b25aeSJohn Baldwin SYSINIT(preempt_warning, SI_SUB_COPYRIGHT, SI_ORDER_ANY, printf_caddr_t, 2480c0b25aeSJohn Baldwin preempt_warning) 2490c0b25aeSJohn Baldwin #endif 25044fe3c1fSJohn Baldwin #endif 251e602ba25SJulian Elischer 252e602ba25SJulian Elischer /************************************************************************ 253e602ba25SJulian Elischer * SYSTEM RUN QUEUE manipulations and tests * 254e602ba25SJulian Elischer ************************************************************************/ 255e602ba25SJulian Elischer /* 256e602ba25SJulian Elischer * Initialize a run structure. 257e602ba25SJulian Elischer */ 258e602ba25SJulian Elischer void 259e602ba25SJulian Elischer runq_init(struct runq *rq) 260e602ba25SJulian Elischer { 261e602ba25SJulian Elischer int i; 262e602ba25SJulian Elischer 263e602ba25SJulian Elischer bzero(rq, sizeof *rq); 264e602ba25SJulian Elischer for (i = 0; i < RQ_NQS; i++) 265e602ba25SJulian Elischer TAILQ_INIT(&rq->rq_queues[i]); 266e602ba25SJulian Elischer } 267e602ba25SJulian Elischer 268d5a08a60SJake Burkholder /* 269d5a08a60SJake Burkholder * Clear the status bit of the queue corresponding to priority level pri, 270d5a08a60SJake Burkholder * indicating that it is empty. 271d5a08a60SJake Burkholder */ 272d5a08a60SJake Burkholder static __inline void 273d5a08a60SJake Burkholder runq_clrbit(struct runq *rq, int pri) 274d5a08a60SJake Burkholder { 275d5a08a60SJake Burkholder struct rqbits *rqb; 276d5a08a60SJake Burkholder 277d5a08a60SJake Burkholder rqb = &rq->rq_status; 278d5a08a60SJake Burkholder CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d", 279d5a08a60SJake Burkholder rqb->rqb_bits[RQB_WORD(pri)], 280d5a08a60SJake Burkholder rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri), 281d5a08a60SJake Burkholder RQB_BIT(pri), RQB_WORD(pri)); 282d5a08a60SJake Burkholder rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri); 283d5a08a60SJake Burkholder } 284d5a08a60SJake Burkholder 285d5a08a60SJake Burkholder /* 286d5a08a60SJake Burkholder * Find the index of the first non-empty run queue. This is done by 287d5a08a60SJake Burkholder * scanning the status bits, a set bit indicates a non-empty queue. 288d5a08a60SJake Burkholder */ 289d5a08a60SJake Burkholder static __inline int 290d5a08a60SJake Burkholder runq_findbit(struct runq *rq) 291d5a08a60SJake Burkholder { 292d5a08a60SJake Burkholder struct rqbits *rqb; 293d5a08a60SJake Burkholder int pri; 294d5a08a60SJake Burkholder int i; 295d5a08a60SJake Burkholder 296d5a08a60SJake Burkholder rqb = &rq->rq_status; 297d5a08a60SJake Burkholder for (i = 0; i < RQB_LEN; i++) 298d5a08a60SJake Burkholder if (rqb->rqb_bits[i]) { 2992f9267ecSPeter Wemm pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW); 300d5a08a60SJake Burkholder CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d", 301d5a08a60SJake Burkholder rqb->rqb_bits[i], i, pri); 302d5a08a60SJake Burkholder return (pri); 303d5a08a60SJake Burkholder } 304d5a08a60SJake Burkholder 305d5a08a60SJake Burkholder return (-1); 306d5a08a60SJake Burkholder } 307d5a08a60SJake Burkholder 3083fed7d23SJeff Roberson static __inline int 3093fed7d23SJeff Roberson runq_findbit_from(struct runq *rq, int start) 3103fed7d23SJeff Roberson { 3113fed7d23SJeff Roberson struct rqbits *rqb; 3123fed7d23SJeff Roberson int bit; 3133fed7d23SJeff Roberson int pri; 3143fed7d23SJeff Roberson int i; 3153fed7d23SJeff Roberson 3163fed7d23SJeff Roberson rqb = &rq->rq_status; 3173fed7d23SJeff Roberson bit = start & (RQB_BPW -1); 3183fed7d23SJeff Roberson pri = 0; 3193fed7d23SJeff Roberson CTR1(KTR_RUNQ, "runq_findbit_from: start %d", start); 3203fed7d23SJeff Roberson again: 3213fed7d23SJeff Roberson for (i = RQB_WORD(start); i < RQB_LEN; i++) { 3223fed7d23SJeff Roberson CTR3(KTR_RUNQ, "runq_findbit_from: bits %d = %#x bit = %d", 3233fed7d23SJeff Roberson i, rqb->rqb_bits[i], bit); 3243fed7d23SJeff Roberson if (rqb->rqb_bits[i]) { 3253fed7d23SJeff Roberson if (bit != 0) { 3263fed7d23SJeff Roberson for (pri = bit; pri < RQB_BPW; pri++) 3273fed7d23SJeff Roberson if (rqb->rqb_bits[i] & (1ul << pri)) 3283fed7d23SJeff Roberson break; 3293fed7d23SJeff Roberson bit = 0; 3303fed7d23SJeff Roberson if (pri >= RQB_BPW) 3313fed7d23SJeff Roberson continue; 3323fed7d23SJeff Roberson } else 3333fed7d23SJeff Roberson pri = RQB_FFS(rqb->rqb_bits[i]); 3343fed7d23SJeff Roberson pri += (i << RQB_L2BPW); 3353fed7d23SJeff Roberson CTR3(KTR_RUNQ, "runq_findbit_from: bits=%#x i=%d pri=%d", 3363fed7d23SJeff Roberson rqb->rqb_bits[i], i, pri); 3373fed7d23SJeff Roberson return (pri); 3383fed7d23SJeff Roberson } 3393fed7d23SJeff Roberson bit = 0; 3403fed7d23SJeff Roberson } 3413fed7d23SJeff Roberson if (start != 0) { 3423fed7d23SJeff Roberson CTR0(KTR_RUNQ, "runq_findbit_from: restarting"); 3433fed7d23SJeff Roberson start = 0; 3443fed7d23SJeff Roberson goto again; 3453fed7d23SJeff Roberson } 3463fed7d23SJeff Roberson 3473fed7d23SJeff Roberson return (-1); 3483fed7d23SJeff Roberson } 3493fed7d23SJeff Roberson 350d5a08a60SJake Burkholder /* 351d5a08a60SJake Burkholder * Set the status bit of the queue corresponding to priority level pri, 352d5a08a60SJake Burkholder * indicating that it is non-empty. 353d5a08a60SJake Burkholder */ 354d5a08a60SJake Burkholder static __inline void 355d5a08a60SJake Burkholder runq_setbit(struct runq *rq, int pri) 356d5a08a60SJake Burkholder { 357d5a08a60SJake Burkholder struct rqbits *rqb; 358d5a08a60SJake Burkholder 359d5a08a60SJake Burkholder rqb = &rq->rq_status; 360d5a08a60SJake Burkholder CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d", 361d5a08a60SJake Burkholder rqb->rqb_bits[RQB_WORD(pri)], 362d5a08a60SJake Burkholder rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri), 363d5a08a60SJake Burkholder RQB_BIT(pri), RQB_WORD(pri)); 364d5a08a60SJake Burkholder rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri); 365d5a08a60SJake Burkholder } 366d5a08a60SJake Burkholder 367d5a08a60SJake Burkholder /* 368ad1e7d28SJulian Elischer * Add the thread to the queue specified by its priority, and set the 369d5a08a60SJake Burkholder * corresponding status bit. 370d5a08a60SJake Burkholder */ 371d5a08a60SJake Burkholder void 372ad1e7d28SJulian Elischer runq_add(struct runq *rq, struct td_sched *ts, int flags) 373d5a08a60SJake Burkholder { 374d5a08a60SJake Burkholder struct rqhead *rqh; 375d5a08a60SJake Burkholder int pri; 376dba6c5a6SPeter Wemm 377ad1e7d28SJulian Elischer pri = ts->ts_thread->td_priority / RQ_PPQ; 378ad1e7d28SJulian Elischer ts->ts_rqindex = pri; 379d5a08a60SJake Burkholder runq_setbit(rq, pri); 380d5a08a60SJake Burkholder rqh = &rq->rq_queues[pri]; 381ad1e7d28SJulian Elischer CTR5(KTR_RUNQ, "runq_add: td=%p ts=%p pri=%d %d rqh=%p", 382ad1e7d28SJulian Elischer ts->ts_thread, ts, ts->ts_thread->td_priority, pri, rqh); 383c20c691bSJulian Elischer if (flags & SRQ_PREEMPTED) { 384ad1e7d28SJulian Elischer TAILQ_INSERT_HEAD(rqh, ts, ts_procq); 385c20c691bSJulian Elischer } else { 386ad1e7d28SJulian Elischer TAILQ_INSERT_TAIL(rqh, ts, ts_procq); 387dba6c5a6SPeter Wemm } 388c20c691bSJulian Elischer } 389d5a08a60SJake Burkholder 3903fed7d23SJeff Roberson void 3913fed7d23SJeff Roberson runq_add_pri(struct runq *rq, struct td_sched *ts, int pri, int flags) 3923fed7d23SJeff Roberson { 3933fed7d23SJeff Roberson struct rqhead *rqh; 3943fed7d23SJeff Roberson 3953fed7d23SJeff Roberson KASSERT(pri < RQ_NQS, ("runq_add_pri: %d out of range", pri)); 3963fed7d23SJeff Roberson ts->ts_rqindex = pri; 3973fed7d23SJeff Roberson runq_setbit(rq, pri); 3983fed7d23SJeff Roberson rqh = &rq->rq_queues[pri]; 3993fed7d23SJeff Roberson CTR5(KTR_RUNQ, "runq_add_pri: td=%p ke=%p pri=%d idx=%d rqh=%p", 4003fed7d23SJeff Roberson ts->ts_thread, ts, ts->ts_thread->td_priority, pri, rqh); 4013fed7d23SJeff Roberson if (flags & SRQ_PREEMPTED) { 4023fed7d23SJeff Roberson TAILQ_INSERT_HEAD(rqh, ts, ts_procq); 4033fed7d23SJeff Roberson } else { 4043fed7d23SJeff Roberson TAILQ_INSERT_TAIL(rqh, ts, ts_procq); 4053fed7d23SJeff Roberson } 4063fed7d23SJeff Roberson } 407d5a08a60SJake Burkholder /* 408d5a08a60SJake Burkholder * Return true if there are runnable processes of any priority on the run 409d5a08a60SJake Burkholder * queue, false otherwise. Has no side effects, does not modify the run 410d5a08a60SJake Burkholder * queue structure. 411d5a08a60SJake Burkholder */ 412d5a08a60SJake Burkholder int 413d5a08a60SJake Burkholder runq_check(struct runq *rq) 414d5a08a60SJake Burkholder { 415d5a08a60SJake Burkholder struct rqbits *rqb; 416d5a08a60SJake Burkholder int i; 417d5a08a60SJake Burkholder 418d5a08a60SJake Burkholder rqb = &rq->rq_status; 419d5a08a60SJake Burkholder for (i = 0; i < RQB_LEN; i++) 420d5a08a60SJake Burkholder if (rqb->rqb_bits[i]) { 421d5a08a60SJake Burkholder CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d", 422d5a08a60SJake Burkholder rqb->rqb_bits[i], i); 423d5a08a60SJake Burkholder return (1); 424dba6c5a6SPeter Wemm } 425d5a08a60SJake Burkholder CTR0(KTR_RUNQ, "runq_check: empty"); 426d5a08a60SJake Burkholder 427d5a08a60SJake Burkholder return (0); 428dba6c5a6SPeter Wemm } 429d5a08a60SJake Burkholder 4306804a3abSJulian Elischer #if defined(SMP) && defined(SCHED_4BSD) 4316804a3abSJulian Elischer int runq_fuzz = 1; 4326804a3abSJulian Elischer SYSCTL_INT(_kern_sched, OID_AUTO, runq_fuzz, CTLFLAG_RW, &runq_fuzz, 0, ""); 4336804a3abSJulian Elischer #endif 4346804a3abSJulian Elischer 435d5a08a60SJake Burkholder /* 436b43179fbSJeff Roberson * Find the highest priority process on the run queue. 437d5a08a60SJake Burkholder */ 438ad1e7d28SJulian Elischer struct td_sched * 439d5a08a60SJake Burkholder runq_choose(struct runq *rq) 440d5a08a60SJake Burkholder { 441d5a08a60SJake Burkholder struct rqhead *rqh; 442ad1e7d28SJulian Elischer struct td_sched *ts; 443d5a08a60SJake Burkholder int pri; 444d5a08a60SJake Burkholder 445d5a08a60SJake Burkholder mtx_assert(&sched_lock, MA_OWNED); 446e602ba25SJulian Elischer while ((pri = runq_findbit(rq)) != -1) { 447d5a08a60SJake Burkholder rqh = &rq->rq_queues[pri]; 4486804a3abSJulian Elischer #if defined(SMP) && defined(SCHED_4BSD) 4496804a3abSJulian Elischer /* fuzz == 1 is normal.. 0 or less are ignored */ 4506804a3abSJulian Elischer if (runq_fuzz > 1) { 4516804a3abSJulian Elischer /* 4526804a3abSJulian Elischer * In the first couple of entries, check if 4536804a3abSJulian Elischer * there is one for our CPU as a preference. 4546804a3abSJulian Elischer */ 4556804a3abSJulian Elischer int count = runq_fuzz; 4566804a3abSJulian Elischer int cpu = PCPU_GET(cpuid); 457ad1e7d28SJulian Elischer struct td_sched *ts2; 458ad1e7d28SJulian Elischer ts2 = ts = TAILQ_FIRST(rqh); 4596804a3abSJulian Elischer 460ad1e7d28SJulian Elischer while (count-- && ts2) { 461ad1e7d28SJulian Elischer if (ts->ts_thread->td_lastcpu == cpu) { 462ad1e7d28SJulian Elischer ts = ts2; 4636804a3abSJulian Elischer break; 4646804a3abSJulian Elischer } 465ad1e7d28SJulian Elischer ts2 = TAILQ_NEXT(ts2, ts_procq); 4666804a3abSJulian Elischer } 4676804a3abSJulian Elischer } else 4686804a3abSJulian Elischer #endif 469ad1e7d28SJulian Elischer ts = TAILQ_FIRST(rqh); 470ad1e7d28SJulian Elischer KASSERT(ts != NULL, ("runq_choose: no proc on busy queue")); 471e602ba25SJulian Elischer CTR3(KTR_RUNQ, 472ad1e7d28SJulian Elischer "runq_choose: pri=%d td_sched=%p rqh=%p", pri, ts, rqh); 473ad1e7d28SJulian Elischer return (ts); 474d5a08a60SJake Burkholder } 475d5a08a60SJake Burkholder CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri); 476d5a08a60SJake Burkholder 477e602ba25SJulian Elischer return (NULL); 478d5a08a60SJake Burkholder } 479d5a08a60SJake Burkholder 4803fed7d23SJeff Roberson struct td_sched * 481cd49bb70SJeff Roberson runq_choose_from(struct runq *rq, int idx) 4823fed7d23SJeff Roberson { 4833fed7d23SJeff Roberson struct rqhead *rqh; 4843fed7d23SJeff Roberson struct td_sched *ts; 4853fed7d23SJeff Roberson int pri; 4863fed7d23SJeff Roberson 4873fed7d23SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 488cd49bb70SJeff Roberson if ((pri = runq_findbit_from(rq, idx)) != -1) { 4893fed7d23SJeff Roberson rqh = &rq->rq_queues[pri]; 4903fed7d23SJeff Roberson ts = TAILQ_FIRST(rqh); 4913fed7d23SJeff Roberson KASSERT(ts != NULL, ("runq_choose: no proc on busy queue")); 4923fed7d23SJeff Roberson CTR4(KTR_RUNQ, 4933fed7d23SJeff Roberson "runq_choose_from: pri=%d kse=%p idx=%d rqh=%p", 4943fed7d23SJeff Roberson pri, ts, ts->ts_rqindex, rqh); 4953fed7d23SJeff Roberson return (ts); 4963fed7d23SJeff Roberson } 4973fed7d23SJeff Roberson CTR1(KTR_RUNQ, "runq_choose_from: idleproc pri=%d", pri); 4983fed7d23SJeff Roberson 4993fed7d23SJeff Roberson return (NULL); 5003fed7d23SJeff Roberson } 501d5a08a60SJake Burkholder /* 502ad1e7d28SJulian Elischer * Remove the thread from the queue specified by its priority, and clear the 503d5a08a60SJake Burkholder * corresponding status bit if the queue becomes empty. 504f0393f06SJeff Roberson * Caller must set state afterwards. 505d5a08a60SJake Burkholder */ 506d5a08a60SJake Burkholder void 507ad1e7d28SJulian Elischer runq_remove(struct runq *rq, struct td_sched *ts) 508d5a08a60SJake Burkholder { 5093fed7d23SJeff Roberson 5103fed7d23SJeff Roberson runq_remove_idx(rq, ts, NULL); 5113fed7d23SJeff Roberson } 5123fed7d23SJeff Roberson 5133fed7d23SJeff Roberson void 5143fed7d23SJeff Roberson runq_remove_idx(struct runq *rq, struct td_sched *ts, int *idx) 5153fed7d23SJeff Roberson { 516d5a08a60SJake Burkholder struct rqhead *rqh; 517d5a08a60SJake Burkholder int pri; 518d5a08a60SJake Burkholder 519ad1e7d28SJulian Elischer KASSERT(ts->ts_thread->td_proc->p_sflag & PS_INMEM, 5203fed7d23SJeff Roberson ("runq_remove_idx: process swapped out")); 521ad1e7d28SJulian Elischer pri = ts->ts_rqindex; 522d5a08a60SJake Burkholder rqh = &rq->rq_queues[pri]; 5233fed7d23SJeff Roberson CTR5(KTR_RUNQ, "runq_remove_idx: td=%p, ts=%p pri=%d %d rqh=%p", 524ad1e7d28SJulian Elischer ts->ts_thread, ts, ts->ts_thread->td_priority, pri, rqh); 525ad1e7d28SJulian Elischer TAILQ_REMOVE(rqh, ts, ts_procq); 526d5a08a60SJake Burkholder if (TAILQ_EMPTY(rqh)) { 5273fed7d23SJeff Roberson CTR0(KTR_RUNQ, "runq_remove_idx: empty"); 528d5a08a60SJake Burkholder runq_clrbit(rq, pri); 5293fed7d23SJeff Roberson if (idx != NULL && *idx == pri) 5303fed7d23SJeff Roberson *idx = (pri + 1) % RQ_NQS; 531d5a08a60SJake Burkholder } 532dba6c5a6SPeter Wemm } 533e602ba25SJulian Elischer 534ed062c8dSJulian Elischer /****** functions that are temporarily here ***********/ 535ed062c8dSJulian Elischer #include <vm/uma.h> 536ed062c8dSJulian Elischer extern struct mtx kse_zombie_lock; 537ed062c8dSJulian Elischer 538ed062c8dSJulian Elischer /* 539ed062c8dSJulian Elischer * Allocate scheduler specific per-process resources. 540ad1e7d28SJulian Elischer * The thread and proc have already been linked in. 541ed062c8dSJulian Elischer * 542ed062c8dSJulian Elischer * Called from: 543ed062c8dSJulian Elischer * proc_init() (UMA init method) 544ed062c8dSJulian Elischer */ 545ed062c8dSJulian Elischer void 546ad1e7d28SJulian Elischer sched_newproc(struct proc *p, struct thread *td) 547ed062c8dSJulian Elischer { 548ed062c8dSJulian Elischer } 549ed062c8dSJulian Elischer 550ed062c8dSJulian Elischer /* 551ed062c8dSJulian Elischer * thread is being either created or recycled. 552ed062c8dSJulian Elischer * Fix up the per-scheduler resources associated with it. 553ed062c8dSJulian Elischer * Called from: 554ed062c8dSJulian Elischer * sched_fork_thread() 555ed062c8dSJulian Elischer * thread_dtor() (*may go away) 556ed062c8dSJulian Elischer * thread_init() (*may go away) 557ed062c8dSJulian Elischer */ 558ed062c8dSJulian Elischer void 559ed062c8dSJulian Elischer sched_newthread(struct thread *td) 560ed062c8dSJulian Elischer { 561ad1e7d28SJulian Elischer struct td_sched *ts; 562ed062c8dSJulian Elischer 563ad1e7d28SJulian Elischer ts = (struct td_sched *) (td + 1); 564ad1e7d28SJulian Elischer bzero(ts, sizeof(*ts)); 565ad1e7d28SJulian Elischer td->td_sched = ts; 566ad1e7d28SJulian Elischer ts->ts_thread = td; 567ed062c8dSJulian Elischer } 568ed062c8dSJulian Elischer 569ed062c8dSJulian Elischer /* 570ed062c8dSJulian Elischer * Called from: 571ed062c8dSJulian Elischer * thr_create() 572ed062c8dSJulian Elischer * proc_init() (UMA) via sched_newproc() 573ed062c8dSJulian Elischer */ 574ed062c8dSJulian Elischer void 575ad1e7d28SJulian Elischer sched_init_concurrency(struct proc *p) 576ed062c8dSJulian Elischer { 577ed062c8dSJulian Elischer } 578ed062c8dSJulian Elischer 579ed062c8dSJulian Elischer /* 580ad1e7d28SJulian Elischer * Change the concurrency of an existing proc to N 581ed062c8dSJulian Elischer * Called from: 582ed062c8dSJulian Elischer * kse_create() 583ed062c8dSJulian Elischer * kse_exit() 584ed062c8dSJulian Elischer * thread_exit() 585ed062c8dSJulian Elischer * thread_single() 586ed062c8dSJulian Elischer */ 587ed062c8dSJulian Elischer void 588ad1e7d28SJulian Elischer sched_set_concurrency(struct proc *p, int concurrency) 589ed062c8dSJulian Elischer { 590ed062c8dSJulian Elischer } 591ed062c8dSJulian Elischer 592ed062c8dSJulian Elischer /* 593ed062c8dSJulian Elischer * Called from thread_exit() for all exiting thread 594ed062c8dSJulian Elischer * 595ed062c8dSJulian Elischer * Not to be confused with sched_exit_thread() 596ed062c8dSJulian Elischer * that is only called from thread_exit() for threads exiting 597ed062c8dSJulian Elischer * without the rest of the process exiting because it is also called from 598ed062c8dSJulian Elischer * sched_exit() and we wouldn't want to call it twice. 599ed062c8dSJulian Elischer * XXX This can probably be fixed. 600ed062c8dSJulian Elischer */ 601ed062c8dSJulian Elischer void 602ed062c8dSJulian Elischer sched_thread_exit(struct thread *td) 603ed062c8dSJulian Elischer { 604ed062c8dSJulian Elischer } 605ed062c8dSJulian Elischer 606ed062c8dSJulian Elischer #endif /* KERN_SWITCH_INCLUDE */ 607