19454b2d8SWarner Losh /*- 2d5a08a60SJake Burkholder * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org> 3d5a08a60SJake Burkholder * All rights reserved. 4dba6c5a6SPeter Wemm * 5dba6c5a6SPeter Wemm * Redistribution and use in source and binary forms, with or without 6dba6c5a6SPeter Wemm * modification, are permitted provided that the following conditions 7dba6c5a6SPeter Wemm * are met: 8dba6c5a6SPeter Wemm * 1. Redistributions of source code must retain the above copyright 9dba6c5a6SPeter Wemm * notice, this list of conditions and the following disclaimer. 10dba6c5a6SPeter Wemm * 2. Redistributions in binary form must reproduce the above copyright 11dba6c5a6SPeter Wemm * notice, this list of conditions and the following disclaimer in the 12dba6c5a6SPeter Wemm * documentation and/or other materials provided with the distribution. 13dba6c5a6SPeter Wemm * 14dba6c5a6SPeter Wemm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15dba6c5a6SPeter Wemm * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16dba6c5a6SPeter Wemm * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17dba6c5a6SPeter Wemm * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18dba6c5a6SPeter Wemm * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19dba6c5a6SPeter Wemm * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20dba6c5a6SPeter Wemm * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21dba6c5a6SPeter Wemm * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22dba6c5a6SPeter Wemm * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23dba6c5a6SPeter Wemm * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24dba6c5a6SPeter Wemm * SUCH DAMAGE. 25dba6c5a6SPeter Wemm */ 26dba6c5a6SPeter Wemm 27e602ba25SJulian Elischer 28677b542eSDavid E. O'Brien #include <sys/cdefs.h> 29677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 30e602ba25SJulian Elischer 316804a3abSJulian Elischer #include "opt_sched.h" 320c0b25aeSJohn Baldwin 33dba6c5a6SPeter Wemm #include <sys/param.h> 34dba6c5a6SPeter Wemm #include <sys/systm.h> 352d50560aSMarcel Moolenaar #include <sys/kdb.h> 36dba6c5a6SPeter Wemm #include <sys/kernel.h> 370384fff8SJason Evans #include <sys/ktr.h> 38f34fa851SJohn Baldwin #include <sys/lock.h> 3935e0e5b3SJohn Baldwin #include <sys/mutex.h> 40dba6c5a6SPeter Wemm #include <sys/proc.h> 41dba6c5a6SPeter Wemm #include <sys/queue.h> 42b43179fbSJeff Roberson #include <sys/sched.h> 43cc66ebe2SPeter Wemm #include <sys/smp.h> 449727e637SJeff Roberson #include <sys/sysctl.h> 456804a3abSJulian Elischer 467b20fb19SJeff Roberson #include <machine/cpu.h> 477b20fb19SJeff Roberson 481335c4dfSNate Lawson /* Uncomment this to enable logging of critical_enter/exit. */ 491335c4dfSNate Lawson #if 0 501335c4dfSNate Lawson #define KTR_CRITICAL KTR_SCHED 511335c4dfSNate Lawson #else 521335c4dfSNate Lawson #define KTR_CRITICAL 0 531335c4dfSNate Lawson #endif 541335c4dfSNate Lawson 559923b511SScott Long #ifdef FULL_PREEMPTION 569923b511SScott Long #ifndef PREEMPTION 579923b511SScott Long #error "The FULL_PREEMPTION option requires the PREEMPTION option" 589923b511SScott Long #endif 599923b511SScott Long #endif 60dba6c5a6SPeter Wemm 61d2ac2316SJake Burkholder CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS); 62d2ac2316SJake Burkholder 636220dcbaSRobert Watson /* 646220dcbaSRobert Watson * kern.sched.preemption allows user space to determine if preemption support 656220dcbaSRobert Watson * is compiled in or not. It is not currently a boot or runtime flag that 666220dcbaSRobert Watson * can be changed. 676220dcbaSRobert Watson */ 686220dcbaSRobert Watson #ifdef PREEMPTION 696220dcbaSRobert Watson static int kern_sched_preemption = 1; 706220dcbaSRobert Watson #else 716220dcbaSRobert Watson static int kern_sched_preemption = 0; 726220dcbaSRobert Watson #endif 736220dcbaSRobert Watson SYSCTL_INT(_kern_sched, OID_AUTO, preemption, CTLFLAG_RD, 746220dcbaSRobert Watson &kern_sched_preemption, 0, "Kernel preemption enabled"); 756220dcbaSRobert Watson 768df78c41SJeff Roberson /* 778df78c41SJeff Roberson * Support for scheduler stats exported via kern.sched.stats. All stats may 788df78c41SJeff Roberson * be reset with kern.sched.stats.reset = 1. Stats may be defined elsewhere 798df78c41SJeff Roberson * with SCHED_STAT_DEFINE(). 808df78c41SJeff Roberson */ 817b20fb19SJeff Roberson #ifdef SCHED_STATS 828df78c41SJeff Roberson SYSCTL_NODE(_kern_sched, OID_AUTO, stats, CTLFLAG_RW, 0, "switch stats"); 83791d9a6eSJeff Roberson 84791d9a6eSJeff Roberson /* Switch reasons from mi_switch(). */ 85791d9a6eSJeff Roberson DPCPU_DEFINE(long, sched_switch_stats[SWT_COUNT]); 86791d9a6eSJeff Roberson SCHED_STAT_DEFINE_VAR(uncategorized, 87791d9a6eSJeff Roberson &DPCPU_NAME(sched_switch_stats[SWT_NONE]), ""); 88791d9a6eSJeff Roberson SCHED_STAT_DEFINE_VAR(preempt, 89791d9a6eSJeff Roberson &DPCPU_NAME(sched_switch_stats[SWT_PREEMPT]), ""); 90791d9a6eSJeff Roberson SCHED_STAT_DEFINE_VAR(owepreempt, 91791d9a6eSJeff Roberson &DPCPU_NAME(sched_switch_stats[SWT_OWEPREEMPT]), ""); 92791d9a6eSJeff Roberson SCHED_STAT_DEFINE_VAR(turnstile, 93791d9a6eSJeff Roberson &DPCPU_NAME(sched_switch_stats[SWT_TURNSTILE]), ""); 94791d9a6eSJeff Roberson SCHED_STAT_DEFINE_VAR(sleepq, 95791d9a6eSJeff Roberson &DPCPU_NAME(sched_switch_stats[SWT_SLEEPQ]), ""); 96791d9a6eSJeff Roberson SCHED_STAT_DEFINE_VAR(sleepqtimo, 97791d9a6eSJeff Roberson &DPCPU_NAME(sched_switch_stats[SWT_SLEEPQTIMO]), ""); 98791d9a6eSJeff Roberson SCHED_STAT_DEFINE_VAR(relinquish, 99791d9a6eSJeff Roberson &DPCPU_NAME(sched_switch_stats[SWT_RELINQUISH]), ""); 100791d9a6eSJeff Roberson SCHED_STAT_DEFINE_VAR(needresched, 101791d9a6eSJeff Roberson &DPCPU_NAME(sched_switch_stats[SWT_NEEDRESCHED]), ""); 102791d9a6eSJeff Roberson SCHED_STAT_DEFINE_VAR(idle, 103791d9a6eSJeff Roberson &DPCPU_NAME(sched_switch_stats[SWT_IDLE]), ""); 104791d9a6eSJeff Roberson SCHED_STAT_DEFINE_VAR(iwait, 105791d9a6eSJeff Roberson &DPCPU_NAME(sched_switch_stats[SWT_IWAIT]), ""); 106791d9a6eSJeff Roberson SCHED_STAT_DEFINE_VAR(suspend, 107791d9a6eSJeff Roberson &DPCPU_NAME(sched_switch_stats[SWT_SUSPEND]), ""); 108791d9a6eSJeff Roberson SCHED_STAT_DEFINE_VAR(remotepreempt, 109791d9a6eSJeff Roberson &DPCPU_NAME(sched_switch_stats[SWT_REMOTEPREEMPT]), ""); 110791d9a6eSJeff Roberson SCHED_STAT_DEFINE_VAR(remotewakeidle, 111791d9a6eSJeff Roberson &DPCPU_NAME(sched_switch_stats[SWT_REMOTEWAKEIDLE]), ""); 1128df78c41SJeff Roberson 1137b20fb19SJeff Roberson static int 1147b20fb19SJeff Roberson sysctl_stats_reset(SYSCTL_HANDLER_ARGS) 1157b20fb19SJeff Roberson { 1168df78c41SJeff Roberson struct sysctl_oid *p; 117791d9a6eSJeff Roberson uintptr_t counter; 1187b20fb19SJeff Roberson int error; 1197b20fb19SJeff Roberson int val; 120791d9a6eSJeff Roberson int i; 1217b20fb19SJeff Roberson 1227b20fb19SJeff Roberson val = 0; 1237b20fb19SJeff Roberson error = sysctl_handle_int(oidp, &val, 0, req); 1247b20fb19SJeff Roberson if (error != 0 || req->newptr == NULL) 1257b20fb19SJeff Roberson return (error); 1267b20fb19SJeff Roberson if (val == 0) 1277b20fb19SJeff Roberson return (0); 1288df78c41SJeff Roberson /* 1298df78c41SJeff Roberson * Traverse the list of children of _kern_sched_stats and reset each 1308df78c41SJeff Roberson * to 0. Skip the reset entry. 1318df78c41SJeff Roberson */ 1328df78c41SJeff Roberson SLIST_FOREACH(p, oidp->oid_parent, oid_link) { 1338df78c41SJeff Roberson if (p == oidp || p->oid_arg1 == NULL) 1348df78c41SJeff Roberson continue; 135791d9a6eSJeff Roberson counter = (uintptr_t)p->oid_arg1; 136791d9a6eSJeff Roberson for (i = 0; i <= mp_maxid; i++) { 137791d9a6eSJeff Roberson if (CPU_ABSENT(i)) 138791d9a6eSJeff Roberson continue; 139791d9a6eSJeff Roberson *(long *)(dpcpu_off[i] + counter) = 0; 140791d9a6eSJeff Roberson } 1418df78c41SJeff Roberson } 1427b20fb19SJeff Roberson return (0); 1437b20fb19SJeff Roberson } 1447b20fb19SJeff Roberson 1457b20fb19SJeff Roberson SYSCTL_PROC(_kern_sched_stats, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_WR, NULL, 1467b20fb19SJeff Roberson 0, sysctl_stats_reset, "I", "Reset scheduler statistics"); 1477b20fb19SJeff Roberson #endif 1487b20fb19SJeff Roberson 149e602ba25SJulian Elischer /************************************************************************ 150e602ba25SJulian Elischer * Functions that manipulate runnability from a thread perspective. * 151e602ba25SJulian Elischer ************************************************************************/ 1528460a577SJohn Birrell /* 1538460a577SJohn Birrell * Select the thread that will be run next. 1548460a577SJohn Birrell */ 155b40ce416SJulian Elischer struct thread * 156b40ce416SJulian Elischer choosethread(void) 157dba6c5a6SPeter Wemm { 158e602ba25SJulian Elischer struct thread *td; 159e602ba25SJulian Elischer 160fe799533SAndrew Gallatin retry: 161f0393f06SJeff Roberson td = sched_choose(); 16293a7aa79SJulian Elischer 16393a7aa79SJulian Elischer /* 164faaa20f6SJulian Elischer * If we are in panic, only allow system threads, 165faaa20f6SJulian Elischer * plus the one we are running in, to be run. 16693a7aa79SJulian Elischer */ 167fe799533SAndrew Gallatin if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 && 168faaa20f6SJulian Elischer (td->td_flags & TDF_INPANIC) == 0)) { 169faaa20f6SJulian Elischer /* note that it is no longer on the run queue */ 170faaa20f6SJulian Elischer TD_SET_CAN_RUN(td); 171fe799533SAndrew Gallatin goto retry; 172faaa20f6SJulian Elischer } 17393a7aa79SJulian Elischer 17471fad9fdSJulian Elischer TD_SET_RUNNING(td); 175e602ba25SJulian Elischer return (td); 176e602ba25SJulian Elischer } 177e602ba25SJulian Elischer 1780c0b25aeSJohn Baldwin /* 1790c0b25aeSJohn Baldwin * Kernel thread preemption implementation. Critical sections mark 1800c0b25aeSJohn Baldwin * regions of code in which preemptions are not allowed. 1810c0b25aeSJohn Baldwin */ 1827e1f6dfeSJohn Baldwin void 1837e1f6dfeSJohn Baldwin critical_enter(void) 1847e1f6dfeSJohn Baldwin { 1857e1f6dfeSJohn Baldwin struct thread *td; 1867e1f6dfeSJohn Baldwin 1877e1f6dfeSJohn Baldwin td = curthread; 1887e1f6dfeSJohn Baldwin td->td_critnest++; 1891335c4dfSNate Lawson CTR4(KTR_CRITICAL, "critical_enter by thread %p (%ld, %s) to %d", td, 190431f8906SJulian Elischer (long)td->td_proc->p_pid, td->td_name, td->td_critnest); 1917e1f6dfeSJohn Baldwin } 1927e1f6dfeSJohn Baldwin 1937e1f6dfeSJohn Baldwin void 1947e1f6dfeSJohn Baldwin critical_exit(void) 1957e1f6dfeSJohn Baldwin { 1967e1f6dfeSJohn Baldwin struct thread *td; 1978df78c41SJeff Roberson int flags; 1987e1f6dfeSJohn Baldwin 1997e1f6dfeSJohn Baldwin td = curthread; 200b209e5e3SJeff Roberson KASSERT(td->td_critnest != 0, 201b209e5e3SJeff Roberson ("critical_exit: td_critnest == 0")); 2025bce4ae3SJeff Roberson 203d13ec713SStephan Uphoff if (td->td_critnest == 1) { 204d13ec713SStephan Uphoff td->td_critnest = 0; 20577918643SStephan Uphoff if (td->td_owepreempt) { 20677918643SStephan Uphoff td->td_critnest = 1; 2077b20fb19SJeff Roberson thread_lock(td); 20877918643SStephan Uphoff td->td_critnest--; 2098df78c41SJeff Roberson flags = SW_INVOL | SW_PREEMPT; 2108df78c41SJeff Roberson if (TD_IS_IDLETHREAD(td)) 2118df78c41SJeff Roberson flags |= SWT_IDLE; 2128df78c41SJeff Roberson else 2138df78c41SJeff Roberson flags |= SWT_OWEPREEMPT; 2148df78c41SJeff Roberson mi_switch(flags, NULL); 2157b20fb19SJeff Roberson thread_unlock(td); 2160c0b25aeSJohn Baldwin } 217d13ec713SStephan Uphoff } else 2187e1f6dfeSJohn Baldwin td->td_critnest--; 219d13ec713SStephan Uphoff 2201335c4dfSNate Lawson CTR4(KTR_CRITICAL, "critical_exit by thread %p (%ld, %s) to %d", td, 221431f8906SJulian Elischer (long)td->td_proc->p_pid, td->td_name, td->td_critnest); 222d74ac681SMatthew Dillon } 2237e1f6dfeSJohn Baldwin 224e602ba25SJulian Elischer /************************************************************************ 225e602ba25SJulian Elischer * SYSTEM RUN QUEUE manipulations and tests * 226e602ba25SJulian Elischer ************************************************************************/ 227e602ba25SJulian Elischer /* 228e602ba25SJulian Elischer * Initialize a run structure. 229e602ba25SJulian Elischer */ 230e602ba25SJulian Elischer void 231e602ba25SJulian Elischer runq_init(struct runq *rq) 232e602ba25SJulian Elischer { 233e602ba25SJulian Elischer int i; 234e602ba25SJulian Elischer 235e602ba25SJulian Elischer bzero(rq, sizeof *rq); 236e602ba25SJulian Elischer for (i = 0; i < RQ_NQS; i++) 237e602ba25SJulian Elischer TAILQ_INIT(&rq->rq_queues[i]); 238e602ba25SJulian Elischer } 239e602ba25SJulian Elischer 240d5a08a60SJake Burkholder /* 241d5a08a60SJake Burkholder * Clear the status bit of the queue corresponding to priority level pri, 242d5a08a60SJake Burkholder * indicating that it is empty. 243d5a08a60SJake Burkholder */ 244d5a08a60SJake Burkholder static __inline void 245d5a08a60SJake Burkholder runq_clrbit(struct runq *rq, int pri) 246d5a08a60SJake Burkholder { 247d5a08a60SJake Burkholder struct rqbits *rqb; 248d5a08a60SJake Burkholder 249d5a08a60SJake Burkholder rqb = &rq->rq_status; 250d5a08a60SJake Burkholder CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d", 251d5a08a60SJake Burkholder rqb->rqb_bits[RQB_WORD(pri)], 252d5a08a60SJake Burkholder rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri), 253d5a08a60SJake Burkholder RQB_BIT(pri), RQB_WORD(pri)); 254d5a08a60SJake Burkholder rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri); 255d5a08a60SJake Burkholder } 256d5a08a60SJake Burkholder 257d5a08a60SJake Burkholder /* 258d5a08a60SJake Burkholder * Find the index of the first non-empty run queue. This is done by 259d5a08a60SJake Burkholder * scanning the status bits, a set bit indicates a non-empty queue. 260d5a08a60SJake Burkholder */ 261d5a08a60SJake Burkholder static __inline int 262d5a08a60SJake Burkholder runq_findbit(struct runq *rq) 263d5a08a60SJake Burkholder { 264d5a08a60SJake Burkholder struct rqbits *rqb; 265d5a08a60SJake Burkholder int pri; 266d5a08a60SJake Burkholder int i; 267d5a08a60SJake Burkholder 268d5a08a60SJake Burkholder rqb = &rq->rq_status; 269d5a08a60SJake Burkholder for (i = 0; i < RQB_LEN; i++) 270d5a08a60SJake Burkholder if (rqb->rqb_bits[i]) { 2712f9267ecSPeter Wemm pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW); 272d5a08a60SJake Burkholder CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d", 273d5a08a60SJake Burkholder rqb->rqb_bits[i], i, pri); 274d5a08a60SJake Burkholder return (pri); 275d5a08a60SJake Burkholder } 276d5a08a60SJake Burkholder 277d5a08a60SJake Burkholder return (-1); 278d5a08a60SJake Burkholder } 279d5a08a60SJake Burkholder 2803fed7d23SJeff Roberson static __inline int 28167e20930SJeff Roberson runq_findbit_from(struct runq *rq, u_char pri) 2823fed7d23SJeff Roberson { 2833fed7d23SJeff Roberson struct rqbits *rqb; 28467e20930SJeff Roberson rqb_word_t mask; 2853fed7d23SJeff Roberson int i; 2863fed7d23SJeff Roberson 28767e20930SJeff Roberson /* 28867e20930SJeff Roberson * Set the mask for the first word so we ignore priorities before 'pri'. 28967e20930SJeff Roberson */ 29067e20930SJeff Roberson mask = (rqb_word_t)-1 << (pri & (RQB_BPW - 1)); 2913fed7d23SJeff Roberson rqb = &rq->rq_status; 2923fed7d23SJeff Roberson again: 29367e20930SJeff Roberson for (i = RQB_WORD(pri); i < RQB_LEN; mask = -1, i++) { 29467e20930SJeff Roberson mask = rqb->rqb_bits[i] & mask; 29567e20930SJeff Roberson if (mask == 0) 2963fed7d23SJeff Roberson continue; 29767e20930SJeff Roberson pri = RQB_FFS(mask) + (i << RQB_L2BPW); 2983fed7d23SJeff Roberson CTR3(KTR_RUNQ, "runq_findbit_from: bits=%#x i=%d pri=%d", 29967e20930SJeff Roberson mask, i, pri); 3003fed7d23SJeff Roberson return (pri); 3013fed7d23SJeff Roberson } 30267e20930SJeff Roberson if (pri == 0) 3033fed7d23SJeff Roberson return (-1); 30467e20930SJeff Roberson /* 30567e20930SJeff Roberson * Wrap back around to the beginning of the list just once so we 30667e20930SJeff Roberson * scan the whole thing. 30767e20930SJeff Roberson */ 30867e20930SJeff Roberson pri = 0; 30967e20930SJeff Roberson goto again; 3103fed7d23SJeff Roberson } 3113fed7d23SJeff Roberson 312d5a08a60SJake Burkholder /* 313d5a08a60SJake Burkholder * Set the status bit of the queue corresponding to priority level pri, 314d5a08a60SJake Burkholder * indicating that it is non-empty. 315d5a08a60SJake Burkholder */ 316d5a08a60SJake Burkholder static __inline void 317d5a08a60SJake Burkholder runq_setbit(struct runq *rq, int pri) 318d5a08a60SJake Burkholder { 319d5a08a60SJake Burkholder struct rqbits *rqb; 320d5a08a60SJake Burkholder 321d5a08a60SJake Burkholder rqb = &rq->rq_status; 322d5a08a60SJake Burkholder CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d", 323d5a08a60SJake Burkholder rqb->rqb_bits[RQB_WORD(pri)], 324d5a08a60SJake Burkholder rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri), 325d5a08a60SJake Burkholder RQB_BIT(pri), RQB_WORD(pri)); 326d5a08a60SJake Burkholder rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri); 327d5a08a60SJake Burkholder } 328d5a08a60SJake Burkholder 329d5a08a60SJake Burkholder /* 330ad1e7d28SJulian Elischer * Add the thread to the queue specified by its priority, and set the 331d5a08a60SJake Burkholder * corresponding status bit. 332d5a08a60SJake Burkholder */ 333d5a08a60SJake Burkholder void 3349727e637SJeff Roberson runq_add(struct runq *rq, struct thread *td, int flags) 335d5a08a60SJake Burkholder { 336d5a08a60SJake Burkholder struct rqhead *rqh; 337d5a08a60SJake Burkholder int pri; 338dba6c5a6SPeter Wemm 3399727e637SJeff Roberson pri = td->td_priority / RQ_PPQ; 3409727e637SJeff Roberson td->td_rqindex = pri; 341d5a08a60SJake Burkholder runq_setbit(rq, pri); 342d5a08a60SJake Burkholder rqh = &rq->rq_queues[pri]; 3439727e637SJeff Roberson CTR4(KTR_RUNQ, "runq_add: td=%p pri=%d %d rqh=%p", 3449727e637SJeff Roberson td, td->td_priority, pri, rqh); 345c20c691bSJulian Elischer if (flags & SRQ_PREEMPTED) { 3469727e637SJeff Roberson TAILQ_INSERT_HEAD(rqh, td, td_runq); 347c20c691bSJulian Elischer } else { 3489727e637SJeff Roberson TAILQ_INSERT_TAIL(rqh, td, td_runq); 349dba6c5a6SPeter Wemm } 350c20c691bSJulian Elischer } 351d5a08a60SJake Burkholder 3523fed7d23SJeff Roberson void 3539727e637SJeff Roberson runq_add_pri(struct runq *rq, struct thread *td, u_char pri, int flags) 3543fed7d23SJeff Roberson { 3553fed7d23SJeff Roberson struct rqhead *rqh; 3563fed7d23SJeff Roberson 3573fed7d23SJeff Roberson KASSERT(pri < RQ_NQS, ("runq_add_pri: %d out of range", pri)); 3589727e637SJeff Roberson td->td_rqindex = pri; 3593fed7d23SJeff Roberson runq_setbit(rq, pri); 3603fed7d23SJeff Roberson rqh = &rq->rq_queues[pri]; 3619727e637SJeff Roberson CTR4(KTR_RUNQ, "runq_add_pri: td=%p pri=%d idx=%d rqh=%p", 3629727e637SJeff Roberson td, td->td_priority, pri, rqh); 3633fed7d23SJeff Roberson if (flags & SRQ_PREEMPTED) { 3649727e637SJeff Roberson TAILQ_INSERT_HEAD(rqh, td, td_runq); 3653fed7d23SJeff Roberson } else { 3669727e637SJeff Roberson TAILQ_INSERT_TAIL(rqh, td, td_runq); 3673fed7d23SJeff Roberson } 3683fed7d23SJeff Roberson } 369d5a08a60SJake Burkholder /* 370d5a08a60SJake Burkholder * Return true if there are runnable processes of any priority on the run 371d5a08a60SJake Burkholder * queue, false otherwise. Has no side effects, does not modify the run 372d5a08a60SJake Burkholder * queue structure. 373d5a08a60SJake Burkholder */ 374d5a08a60SJake Burkholder int 375d5a08a60SJake Burkholder runq_check(struct runq *rq) 376d5a08a60SJake Burkholder { 377d5a08a60SJake Burkholder struct rqbits *rqb; 378d5a08a60SJake Burkholder int i; 379d5a08a60SJake Burkholder 380d5a08a60SJake Burkholder rqb = &rq->rq_status; 381d5a08a60SJake Burkholder for (i = 0; i < RQB_LEN; i++) 382d5a08a60SJake Burkholder if (rqb->rqb_bits[i]) { 383d5a08a60SJake Burkholder CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d", 384d5a08a60SJake Burkholder rqb->rqb_bits[i], i); 385d5a08a60SJake Burkholder return (1); 386dba6c5a6SPeter Wemm } 387d5a08a60SJake Burkholder CTR0(KTR_RUNQ, "runq_check: empty"); 388d5a08a60SJake Burkholder 389d5a08a60SJake Burkholder return (0); 390dba6c5a6SPeter Wemm } 391d5a08a60SJake Burkholder 392a90f3f25SJeff Roberson /* 393a90f3f25SJeff Roberson * Find the highest priority process on the run queue. 394a90f3f25SJeff Roberson */ 3959727e637SJeff Roberson struct thread * 396a90f3f25SJeff Roberson runq_choose_fuzz(struct runq *rq, int fuzz) 397a90f3f25SJeff Roberson { 398a90f3f25SJeff Roberson struct rqhead *rqh; 3999727e637SJeff Roberson struct thread *td; 400a90f3f25SJeff Roberson int pri; 401a90f3f25SJeff Roberson 402a90f3f25SJeff Roberson while ((pri = runq_findbit(rq)) != -1) { 403a90f3f25SJeff Roberson rqh = &rq->rq_queues[pri]; 404a90f3f25SJeff Roberson /* fuzz == 1 is normal.. 0 or less are ignored */ 405a90f3f25SJeff Roberson if (fuzz > 1) { 406a90f3f25SJeff Roberson /* 407a90f3f25SJeff Roberson * In the first couple of entries, check if 408a90f3f25SJeff Roberson * there is one for our CPU as a preference. 409a90f3f25SJeff Roberson */ 410a90f3f25SJeff Roberson int count = fuzz; 411a90f3f25SJeff Roberson int cpu = PCPU_GET(cpuid); 4129727e637SJeff Roberson struct thread *td2; 4139727e637SJeff Roberson td2 = td = TAILQ_FIRST(rqh); 414a90f3f25SJeff Roberson 4159727e637SJeff Roberson while (count-- && td2) { 416681e4062SJulian Elischer if (td2->td_lastcpu == cpu) { 4179727e637SJeff Roberson td = td2; 418a90f3f25SJeff Roberson break; 419a90f3f25SJeff Roberson } 4209727e637SJeff Roberson td2 = TAILQ_NEXT(td2, td_runq); 421a90f3f25SJeff Roberson } 422a90f3f25SJeff Roberson } else 4239727e637SJeff Roberson td = TAILQ_FIRST(rqh); 4249727e637SJeff Roberson KASSERT(td != NULL, ("runq_choose_fuzz: no proc on busy queue")); 425a90f3f25SJeff Roberson CTR3(KTR_RUNQ, 4269727e637SJeff Roberson "runq_choose_fuzz: pri=%d thread=%p rqh=%p", pri, td, rqh); 4279727e637SJeff Roberson return (td); 428a90f3f25SJeff Roberson } 429a90f3f25SJeff Roberson CTR1(KTR_RUNQ, "runq_choose_fuzz: idleproc pri=%d", pri); 430a90f3f25SJeff Roberson 431a90f3f25SJeff Roberson return (NULL); 432a90f3f25SJeff Roberson } 4336804a3abSJulian Elischer 434d5a08a60SJake Burkholder /* 435b43179fbSJeff Roberson * Find the highest priority process on the run queue. 436d5a08a60SJake Burkholder */ 4379727e637SJeff Roberson struct thread * 438d5a08a60SJake Burkholder runq_choose(struct runq *rq) 439d5a08a60SJake Burkholder { 440d5a08a60SJake Burkholder struct rqhead *rqh; 4419727e637SJeff Roberson struct thread *td; 442d5a08a60SJake Burkholder int pri; 443d5a08a60SJake Burkholder 444e602ba25SJulian Elischer while ((pri = runq_findbit(rq)) != -1) { 445d5a08a60SJake Burkholder rqh = &rq->rq_queues[pri]; 4469727e637SJeff Roberson td = TAILQ_FIRST(rqh); 4479727e637SJeff Roberson KASSERT(td != NULL, ("runq_choose: no thread on busy queue")); 448e602ba25SJulian Elischer CTR3(KTR_RUNQ, 4499727e637SJeff Roberson "runq_choose: pri=%d thread=%p rqh=%p", pri, td, rqh); 4509727e637SJeff Roberson return (td); 451d5a08a60SJake Burkholder } 4529727e637SJeff Roberson CTR1(KTR_RUNQ, "runq_choose: idlethread pri=%d", pri); 453d5a08a60SJake Burkholder 454e602ba25SJulian Elischer return (NULL); 455d5a08a60SJake Burkholder } 456d5a08a60SJake Burkholder 4579727e637SJeff Roberson struct thread * 458ed0e8f2fSJeff Roberson runq_choose_from(struct runq *rq, u_char idx) 4593fed7d23SJeff Roberson { 4603fed7d23SJeff Roberson struct rqhead *rqh; 4619727e637SJeff Roberson struct thread *td; 4623fed7d23SJeff Roberson int pri; 4633fed7d23SJeff Roberson 464cd49bb70SJeff Roberson if ((pri = runq_findbit_from(rq, idx)) != -1) { 4653fed7d23SJeff Roberson rqh = &rq->rq_queues[pri]; 4669727e637SJeff Roberson td = TAILQ_FIRST(rqh); 4679727e637SJeff Roberson KASSERT(td != NULL, ("runq_choose: no thread on busy queue")); 4683fed7d23SJeff Roberson CTR4(KTR_RUNQ, 4699727e637SJeff Roberson "runq_choose_from: pri=%d thread=%p idx=%d rqh=%p", 4709727e637SJeff Roberson pri, td, td->td_rqindex, rqh); 4719727e637SJeff Roberson return (td); 4723fed7d23SJeff Roberson } 4739727e637SJeff Roberson CTR1(KTR_RUNQ, "runq_choose_from: idlethread pri=%d", pri); 4743fed7d23SJeff Roberson 4753fed7d23SJeff Roberson return (NULL); 4763fed7d23SJeff Roberson } 477d5a08a60SJake Burkholder /* 478ad1e7d28SJulian Elischer * Remove the thread from the queue specified by its priority, and clear the 479d5a08a60SJake Burkholder * corresponding status bit if the queue becomes empty. 480f0393f06SJeff Roberson * Caller must set state afterwards. 481d5a08a60SJake Burkholder */ 482d5a08a60SJake Burkholder void 4839727e637SJeff Roberson runq_remove(struct runq *rq, struct thread *td) 484d5a08a60SJake Burkholder { 4853fed7d23SJeff Roberson 4869727e637SJeff Roberson runq_remove_idx(rq, td, NULL); 4873fed7d23SJeff Roberson } 4883fed7d23SJeff Roberson 4893fed7d23SJeff Roberson void 4909727e637SJeff Roberson runq_remove_idx(struct runq *rq, struct thread *td, u_char *idx) 4913fed7d23SJeff Roberson { 492d5a08a60SJake Burkholder struct rqhead *rqh; 493ed0e8f2fSJeff Roberson u_char pri; 494d5a08a60SJake Burkholder 4959727e637SJeff Roberson KASSERT(td->td_flags & TDF_INMEM, 496b61ce5b0SJeff Roberson ("runq_remove_idx: thread swapped out")); 4979727e637SJeff Roberson pri = td->td_rqindex; 4987b20fb19SJeff Roberson KASSERT(pri < RQ_NQS, ("runq_remove_idx: Invalid index %d\n", pri)); 499d5a08a60SJake Burkholder rqh = &rq->rq_queues[pri]; 5009727e637SJeff Roberson CTR4(KTR_RUNQ, "runq_remove_idx: td=%p, pri=%d %d rqh=%p", 5019727e637SJeff Roberson td, td->td_priority, pri, rqh); 5029727e637SJeff Roberson TAILQ_REMOVE(rqh, td, td_runq); 503d5a08a60SJake Burkholder if (TAILQ_EMPTY(rqh)) { 5043fed7d23SJeff Roberson CTR0(KTR_RUNQ, "runq_remove_idx: empty"); 505d5a08a60SJake Burkholder runq_clrbit(rq, pri); 5063fed7d23SJeff Roberson if (idx != NULL && *idx == pri) 5073fed7d23SJeff Roberson *idx = (pri + 1) % RQ_NQS; 508d5a08a60SJake Burkholder } 509dba6c5a6SPeter Wemm } 510