19454b2d8SWarner Losh /*- 28a36da99SPedro F. Giffuni * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 38a36da99SPedro F. Giffuni * 4d5a08a60SJake Burkholder * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org> 5d5a08a60SJake Burkholder * All rights reserved. 6dba6c5a6SPeter Wemm * 7dba6c5a6SPeter Wemm * Redistribution and use in source and binary forms, with or without 8dba6c5a6SPeter Wemm * modification, are permitted provided that the following conditions 9dba6c5a6SPeter Wemm * are met: 10dba6c5a6SPeter Wemm * 1. Redistributions of source code must retain the above copyright 11dba6c5a6SPeter Wemm * notice, this list of conditions and the following disclaimer. 12dba6c5a6SPeter Wemm * 2. Redistributions in binary form must reproduce the above copyright 13dba6c5a6SPeter Wemm * notice, this list of conditions and the following disclaimer in the 14dba6c5a6SPeter Wemm * documentation and/or other materials provided with the distribution. 15dba6c5a6SPeter Wemm * 16dba6c5a6SPeter Wemm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17dba6c5a6SPeter Wemm * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18dba6c5a6SPeter Wemm * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19dba6c5a6SPeter Wemm * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20dba6c5a6SPeter Wemm * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21dba6c5a6SPeter Wemm * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22dba6c5a6SPeter Wemm * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23dba6c5a6SPeter Wemm * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24dba6c5a6SPeter Wemm * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25dba6c5a6SPeter Wemm * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26dba6c5a6SPeter Wemm * SUCH DAMAGE. 27dba6c5a6SPeter Wemm */ 28dba6c5a6SPeter Wemm 29e602ba25SJulian Elischer 30677b542eSDavid E. O'Brien #include <sys/cdefs.h> 31677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 32e602ba25SJulian Elischer 336804a3abSJulian Elischer #include "opt_sched.h" 340c0b25aeSJohn Baldwin 35dba6c5a6SPeter Wemm #include <sys/param.h> 36dba6c5a6SPeter Wemm #include <sys/systm.h> 372d50560aSMarcel Moolenaar #include <sys/kdb.h> 38dba6c5a6SPeter Wemm #include <sys/kernel.h> 390384fff8SJason Evans #include <sys/ktr.h> 40f34fa851SJohn Baldwin #include <sys/lock.h> 4135e0e5b3SJohn Baldwin #include <sys/mutex.h> 42dba6c5a6SPeter Wemm #include <sys/proc.h> 43dba6c5a6SPeter Wemm #include <sys/queue.h> 44b43179fbSJeff Roberson #include <sys/sched.h> 45cc66ebe2SPeter Wemm #include <sys/smp.h> 469727e637SJeff Roberson #include <sys/sysctl.h> 476804a3abSJulian Elischer 487b20fb19SJeff Roberson #include <machine/cpu.h> 497b20fb19SJeff Roberson 501335c4dfSNate Lawson /* Uncomment this to enable logging of critical_enter/exit. */ 511335c4dfSNate Lawson #if 0 521335c4dfSNate Lawson #define KTR_CRITICAL KTR_SCHED 531335c4dfSNate Lawson #else 541335c4dfSNate Lawson #define KTR_CRITICAL 0 551335c4dfSNate Lawson #endif 561335c4dfSNate Lawson 579923b511SScott Long #ifdef FULL_PREEMPTION 589923b511SScott Long #ifndef PREEMPTION 599923b511SScott Long #error "The FULL_PREEMPTION option requires the PREEMPTION option" 609923b511SScott Long #endif 619923b511SScott Long #endif 62dba6c5a6SPeter Wemm 63d2ac2316SJake Burkholder CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS); 64d2ac2316SJake Burkholder 656220dcbaSRobert Watson /* 666220dcbaSRobert Watson * kern.sched.preemption allows user space to determine if preemption support 676220dcbaSRobert Watson * is compiled in or not. It is not currently a boot or runtime flag that 686220dcbaSRobert Watson * can be changed. 696220dcbaSRobert Watson */ 706220dcbaSRobert Watson #ifdef PREEMPTION 716220dcbaSRobert Watson static int kern_sched_preemption = 1; 726220dcbaSRobert Watson #else 736220dcbaSRobert Watson static int kern_sched_preemption = 0; 746220dcbaSRobert Watson #endif 756220dcbaSRobert Watson SYSCTL_INT(_kern_sched, OID_AUTO, preemption, CTLFLAG_RD, 766220dcbaSRobert Watson &kern_sched_preemption, 0, "Kernel preemption enabled"); 776220dcbaSRobert Watson 788df78c41SJeff Roberson /* 798df78c41SJeff Roberson * Support for scheduler stats exported via kern.sched.stats. All stats may 808df78c41SJeff Roberson * be reset with kern.sched.stats.reset = 1. Stats may be defined elsewhere 818df78c41SJeff Roberson * with SCHED_STAT_DEFINE(). 828df78c41SJeff Roberson */ 837b20fb19SJeff Roberson #ifdef SCHED_STATS 848df78c41SJeff Roberson SYSCTL_NODE(_kern_sched, OID_AUTO, stats, CTLFLAG_RW, 0, "switch stats"); 85791d9a6eSJeff Roberson 86791d9a6eSJeff Roberson /* Switch reasons from mi_switch(). */ 87791d9a6eSJeff Roberson DPCPU_DEFINE(long, sched_switch_stats[SWT_COUNT]); 88791d9a6eSJeff Roberson SCHED_STAT_DEFINE_VAR(uncategorized, 89791d9a6eSJeff Roberson &DPCPU_NAME(sched_switch_stats[SWT_NONE]), ""); 90791d9a6eSJeff Roberson SCHED_STAT_DEFINE_VAR(preempt, 91791d9a6eSJeff Roberson &DPCPU_NAME(sched_switch_stats[SWT_PREEMPT]), ""); 92791d9a6eSJeff Roberson SCHED_STAT_DEFINE_VAR(owepreempt, 93791d9a6eSJeff Roberson &DPCPU_NAME(sched_switch_stats[SWT_OWEPREEMPT]), ""); 94791d9a6eSJeff Roberson SCHED_STAT_DEFINE_VAR(turnstile, 95791d9a6eSJeff Roberson &DPCPU_NAME(sched_switch_stats[SWT_TURNSTILE]), ""); 96791d9a6eSJeff Roberson SCHED_STAT_DEFINE_VAR(sleepq, 97791d9a6eSJeff Roberson &DPCPU_NAME(sched_switch_stats[SWT_SLEEPQ]), ""); 98a115fb62SHans Petter Selasky SCHED_STAT_DEFINE_VAR(sleepqtimo, 99a115fb62SHans Petter Selasky &DPCPU_NAME(sched_switch_stats[SWT_SLEEPQTIMO]), ""); 100791d9a6eSJeff Roberson SCHED_STAT_DEFINE_VAR(relinquish, 101791d9a6eSJeff Roberson &DPCPU_NAME(sched_switch_stats[SWT_RELINQUISH]), ""); 102791d9a6eSJeff Roberson SCHED_STAT_DEFINE_VAR(needresched, 103791d9a6eSJeff Roberson &DPCPU_NAME(sched_switch_stats[SWT_NEEDRESCHED]), ""); 104791d9a6eSJeff Roberson SCHED_STAT_DEFINE_VAR(idle, 105791d9a6eSJeff Roberson &DPCPU_NAME(sched_switch_stats[SWT_IDLE]), ""); 106791d9a6eSJeff Roberson SCHED_STAT_DEFINE_VAR(iwait, 107791d9a6eSJeff Roberson &DPCPU_NAME(sched_switch_stats[SWT_IWAIT]), ""); 108791d9a6eSJeff Roberson SCHED_STAT_DEFINE_VAR(suspend, 109791d9a6eSJeff Roberson &DPCPU_NAME(sched_switch_stats[SWT_SUSPEND]), ""); 110791d9a6eSJeff Roberson SCHED_STAT_DEFINE_VAR(remotepreempt, 111791d9a6eSJeff Roberson &DPCPU_NAME(sched_switch_stats[SWT_REMOTEPREEMPT]), ""); 112791d9a6eSJeff Roberson SCHED_STAT_DEFINE_VAR(remotewakeidle, 113791d9a6eSJeff Roberson &DPCPU_NAME(sched_switch_stats[SWT_REMOTEWAKEIDLE]), ""); 1148df78c41SJeff Roberson 1157b20fb19SJeff Roberson static int 1167b20fb19SJeff Roberson sysctl_stats_reset(SYSCTL_HANDLER_ARGS) 1177b20fb19SJeff Roberson { 1188df78c41SJeff Roberson struct sysctl_oid *p; 119791d9a6eSJeff Roberson uintptr_t counter; 1207b20fb19SJeff Roberson int error; 1217b20fb19SJeff Roberson int val; 122791d9a6eSJeff Roberson int i; 1237b20fb19SJeff Roberson 1247b20fb19SJeff Roberson val = 0; 1257b20fb19SJeff Roberson error = sysctl_handle_int(oidp, &val, 0, req); 1267b20fb19SJeff Roberson if (error != 0 || req->newptr == NULL) 1277b20fb19SJeff Roberson return (error); 1287b20fb19SJeff Roberson if (val == 0) 1297b20fb19SJeff Roberson return (0); 1308df78c41SJeff Roberson /* 1318df78c41SJeff Roberson * Traverse the list of children of _kern_sched_stats and reset each 1328df78c41SJeff Roberson * to 0. Skip the reset entry. 1338df78c41SJeff Roberson */ 1348df78c41SJeff Roberson SLIST_FOREACH(p, oidp->oid_parent, oid_link) { 1358df78c41SJeff Roberson if (p == oidp || p->oid_arg1 == NULL) 1368df78c41SJeff Roberson continue; 137791d9a6eSJeff Roberson counter = (uintptr_t)p->oid_arg1; 1383aa6d94eSJohn Baldwin CPU_FOREACH(i) { 139791d9a6eSJeff Roberson *(long *)(dpcpu_off[i] + counter) = 0; 140791d9a6eSJeff Roberson } 1418df78c41SJeff Roberson } 1427b20fb19SJeff Roberson return (0); 1437b20fb19SJeff Roberson } 1447b20fb19SJeff Roberson 1457b20fb19SJeff Roberson SYSCTL_PROC(_kern_sched_stats, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_WR, NULL, 1467b20fb19SJeff Roberson 0, sysctl_stats_reset, "I", "Reset scheduler statistics"); 1477b20fb19SJeff Roberson #endif 1487b20fb19SJeff Roberson 149e602ba25SJulian Elischer /************************************************************************ 150e602ba25SJulian Elischer * Functions that manipulate runnability from a thread perspective. * 151e602ba25SJulian Elischer ************************************************************************/ 1528460a577SJohn Birrell /* 1538460a577SJohn Birrell * Select the thread that will be run next. 1548460a577SJohn Birrell */ 155e602ba25SJulian Elischer 15632aef9ffSMateusz Guzik static __noinline struct thread * 15732aef9ffSMateusz Guzik choosethread_panic(struct thread *td) 15832aef9ffSMateusz Guzik { 15993a7aa79SJulian Elischer 16093a7aa79SJulian Elischer /* 161faaa20f6SJulian Elischer * If we are in panic, only allow system threads, 162faaa20f6SJulian Elischer * plus the one we are running in, to be run. 16393a7aa79SJulian Elischer */ 16432aef9ffSMateusz Guzik retry: 16532aef9ffSMateusz Guzik if (((td->td_proc->p_flag & P_SYSTEM) == 0 && 166faaa20f6SJulian Elischer (td->td_flags & TDF_INPANIC) == 0)) { 167faaa20f6SJulian Elischer /* note that it is no longer on the run queue */ 168faaa20f6SJulian Elischer TD_SET_CAN_RUN(td); 16932aef9ffSMateusz Guzik td = sched_choose(); 170fe799533SAndrew Gallatin goto retry; 171faaa20f6SJulian Elischer } 17293a7aa79SJulian Elischer 17371fad9fdSJulian Elischer TD_SET_RUNNING(td); 174e602ba25SJulian Elischer return (td); 175e602ba25SJulian Elischer } 176e602ba25SJulian Elischer 17732aef9ffSMateusz Guzik struct thread * 17832aef9ffSMateusz Guzik choosethread(void) 17932aef9ffSMateusz Guzik { 18032aef9ffSMateusz Guzik struct thread *td; 18132aef9ffSMateusz Guzik 18232aef9ffSMateusz Guzik td = sched_choose(); 18332aef9ffSMateusz Guzik 18432aef9ffSMateusz Guzik if (__predict_false(panicstr != NULL)) 18532aef9ffSMateusz Guzik return (choosethread_panic(td)); 18632aef9ffSMateusz Guzik 18732aef9ffSMateusz Guzik TD_SET_RUNNING(td); 18832aef9ffSMateusz Guzik return (td); 18932aef9ffSMateusz Guzik } 19032aef9ffSMateusz Guzik 1910c0b25aeSJohn Baldwin /* 1920c0b25aeSJohn Baldwin * Kernel thread preemption implementation. Critical sections mark 1930c0b25aeSJohn Baldwin * regions of code in which preemptions are not allowed. 194e68ccbe8SAttilio Rao * 195e68ccbe8SAttilio Rao * It might seem a good idea to inline critical_enter() but, in order 196e68ccbe8SAttilio Rao * to prevent instructions reordering by the compiler, a __compiler_membar() 197e68ccbe8SAttilio Rao * would have to be used here (the same as sched_pin()). The performance 198e68ccbe8SAttilio Rao * penalty imposed by the membar could, then, produce slower code than 199e68ccbe8SAttilio Rao * the function call itself, for most cases. 2000c0b25aeSJohn Baldwin */ 2017e1f6dfeSJohn Baldwin void 2027e1f6dfeSJohn Baldwin critical_enter(void) 2037e1f6dfeSJohn Baldwin { 2047e1f6dfeSJohn Baldwin struct thread *td; 2057e1f6dfeSJohn Baldwin 2067e1f6dfeSJohn Baldwin td = curthread; 2077e1f6dfeSJohn Baldwin td->td_critnest++; 2081335c4dfSNate Lawson CTR4(KTR_CRITICAL, "critical_enter by thread %p (%ld, %s) to %d", td, 209431f8906SJulian Elischer (long)td->td_proc->p_pid, td->td_name, td->td_critnest); 2107e1f6dfeSJohn Baldwin } 2117e1f6dfeSJohn Baldwin 212748b15fcSMateusz Guzik static void __noinline 213748b15fcSMateusz Guzik critical_exit_preempt(void) 2147e1f6dfeSJohn Baldwin { 2157e1f6dfeSJohn Baldwin struct thread *td; 2168df78c41SJeff Roberson int flags; 2177e1f6dfeSJohn Baldwin 218*6fee84e3SMateusz Guzik /* 219*6fee84e3SMateusz Guzik * If td_critnest is 0, it is possible that we are going to get 220*6fee84e3SMateusz Guzik * preempted again before reaching the code below. This happens 221*6fee84e3SMateusz Guzik * rarely and is harmless. However, this means td_owepreempt may 222*6fee84e3SMateusz Guzik * now be unset. 223*6fee84e3SMateusz Guzik */ 2247e1f6dfeSJohn Baldwin td = curthread; 225748b15fcSMateusz Guzik if (td->td_critnest != 0) 226748b15fcSMateusz Guzik return; 227748b15fcSMateusz Guzik if (kdb_active) 228748b15fcSMateusz Guzik return; 2295bce4ae3SJeff Roberson 2303467f88cSKonstantin Belousov /* 2313467f88cSKonstantin Belousov * Microoptimization: we committed to switch, 2323467f88cSKonstantin Belousov * disable preemption in interrupt handlers 2333467f88cSKonstantin Belousov * while spinning for the thread lock. 2343467f88cSKonstantin Belousov */ 23577918643SStephan Uphoff td->td_critnest = 1; 2367b20fb19SJeff Roberson thread_lock(td); 23777918643SStephan Uphoff td->td_critnest--; 2388df78c41SJeff Roberson flags = SW_INVOL | SW_PREEMPT; 2398df78c41SJeff Roberson if (TD_IS_IDLETHREAD(td)) 2408df78c41SJeff Roberson flags |= SWT_IDLE; 2418df78c41SJeff Roberson else 2428df78c41SJeff Roberson flags |= SWT_OWEPREEMPT; 2438df78c41SJeff Roberson mi_switch(flags, NULL); 2447b20fb19SJeff Roberson thread_unlock(td); 2450c0b25aeSJohn Baldwin } 246d13ec713SStephan Uphoff 247748b15fcSMateusz Guzik void 248748b15fcSMateusz Guzik critical_exit(void) 249748b15fcSMateusz Guzik { 250748b15fcSMateusz Guzik struct thread *td; 251748b15fcSMateusz Guzik 252748b15fcSMateusz Guzik td = curthread; 253748b15fcSMateusz Guzik KASSERT(td->td_critnest != 0, 254748b15fcSMateusz Guzik ("critical_exit: td_critnest == 0")); 255748b15fcSMateusz Guzik td->td_critnest--; 256748b15fcSMateusz Guzik __compiler_membar(); 257748b15fcSMateusz Guzik if (__predict_false(td->td_owepreempt)) 258748b15fcSMateusz Guzik critical_exit_preempt(); 2591335c4dfSNate Lawson CTR4(KTR_CRITICAL, "critical_exit by thread %p (%ld, %s) to %d", td, 260431f8906SJulian Elischer (long)td->td_proc->p_pid, td->td_name, td->td_critnest); 261d74ac681SMatthew Dillon } 2627e1f6dfeSJohn Baldwin 263e602ba25SJulian Elischer /************************************************************************ 264e602ba25SJulian Elischer * SYSTEM RUN QUEUE manipulations and tests * 265e602ba25SJulian Elischer ************************************************************************/ 266e602ba25SJulian Elischer /* 267e602ba25SJulian Elischer * Initialize a run structure. 268e602ba25SJulian Elischer */ 269e602ba25SJulian Elischer void 270e602ba25SJulian Elischer runq_init(struct runq *rq) 271e602ba25SJulian Elischer { 272e602ba25SJulian Elischer int i; 273e602ba25SJulian Elischer 274e602ba25SJulian Elischer bzero(rq, sizeof *rq); 275e602ba25SJulian Elischer for (i = 0; i < RQ_NQS; i++) 276e602ba25SJulian Elischer TAILQ_INIT(&rq->rq_queues[i]); 277e602ba25SJulian Elischer } 278e602ba25SJulian Elischer 279d5a08a60SJake Burkholder /* 280d5a08a60SJake Burkholder * Clear the status bit of the queue corresponding to priority level pri, 281d5a08a60SJake Burkholder * indicating that it is empty. 282d5a08a60SJake Burkholder */ 283d5a08a60SJake Burkholder static __inline void 284d5a08a60SJake Burkholder runq_clrbit(struct runq *rq, int pri) 285d5a08a60SJake Burkholder { 286d5a08a60SJake Burkholder struct rqbits *rqb; 287d5a08a60SJake Burkholder 288d5a08a60SJake Burkholder rqb = &rq->rq_status; 289d5a08a60SJake Burkholder CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d", 290d5a08a60SJake Burkholder rqb->rqb_bits[RQB_WORD(pri)], 291d5a08a60SJake Burkholder rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri), 292d5a08a60SJake Burkholder RQB_BIT(pri), RQB_WORD(pri)); 293d5a08a60SJake Burkholder rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri); 294d5a08a60SJake Burkholder } 295d5a08a60SJake Burkholder 296d5a08a60SJake Burkholder /* 297d5a08a60SJake Burkholder * Find the index of the first non-empty run queue. This is done by 298d5a08a60SJake Burkholder * scanning the status bits, a set bit indicates a non-empty queue. 299d5a08a60SJake Burkholder */ 300d5a08a60SJake Burkholder static __inline int 301d5a08a60SJake Burkholder runq_findbit(struct runq *rq) 302d5a08a60SJake Burkholder { 303d5a08a60SJake Burkholder struct rqbits *rqb; 304d5a08a60SJake Burkholder int pri; 305d5a08a60SJake Burkholder int i; 306d5a08a60SJake Burkholder 307d5a08a60SJake Burkholder rqb = &rq->rq_status; 308d5a08a60SJake Burkholder for (i = 0; i < RQB_LEN; i++) 309d5a08a60SJake Burkholder if (rqb->rqb_bits[i]) { 3102f9267ecSPeter Wemm pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW); 311d5a08a60SJake Burkholder CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d", 312d5a08a60SJake Burkholder rqb->rqb_bits[i], i, pri); 313d5a08a60SJake Burkholder return (pri); 314d5a08a60SJake Burkholder } 315d5a08a60SJake Burkholder 316d5a08a60SJake Burkholder return (-1); 317d5a08a60SJake Burkholder } 318d5a08a60SJake Burkholder 3193fed7d23SJeff Roberson static __inline int 32067e20930SJeff Roberson runq_findbit_from(struct runq *rq, u_char pri) 3213fed7d23SJeff Roberson { 3223fed7d23SJeff Roberson struct rqbits *rqb; 32367e20930SJeff Roberson rqb_word_t mask; 3243fed7d23SJeff Roberson int i; 3253fed7d23SJeff Roberson 32667e20930SJeff Roberson /* 32767e20930SJeff Roberson * Set the mask for the first word so we ignore priorities before 'pri'. 32867e20930SJeff Roberson */ 32967e20930SJeff Roberson mask = (rqb_word_t)-1 << (pri & (RQB_BPW - 1)); 3303fed7d23SJeff Roberson rqb = &rq->rq_status; 3313fed7d23SJeff Roberson again: 33267e20930SJeff Roberson for (i = RQB_WORD(pri); i < RQB_LEN; mask = -1, i++) { 33367e20930SJeff Roberson mask = rqb->rqb_bits[i] & mask; 33467e20930SJeff Roberson if (mask == 0) 3353fed7d23SJeff Roberson continue; 33667e20930SJeff Roberson pri = RQB_FFS(mask) + (i << RQB_L2BPW); 3373fed7d23SJeff Roberson CTR3(KTR_RUNQ, "runq_findbit_from: bits=%#x i=%d pri=%d", 33867e20930SJeff Roberson mask, i, pri); 3393fed7d23SJeff Roberson return (pri); 3403fed7d23SJeff Roberson } 34167e20930SJeff Roberson if (pri == 0) 3423fed7d23SJeff Roberson return (-1); 34367e20930SJeff Roberson /* 34467e20930SJeff Roberson * Wrap back around to the beginning of the list just once so we 34567e20930SJeff Roberson * scan the whole thing. 34667e20930SJeff Roberson */ 34767e20930SJeff Roberson pri = 0; 34867e20930SJeff Roberson goto again; 3493fed7d23SJeff Roberson } 3503fed7d23SJeff Roberson 351d5a08a60SJake Burkholder /* 352d5a08a60SJake Burkholder * Set the status bit of the queue corresponding to priority level pri, 353d5a08a60SJake Burkholder * indicating that it is non-empty. 354d5a08a60SJake Burkholder */ 355d5a08a60SJake Burkholder static __inline void 356d5a08a60SJake Burkholder runq_setbit(struct runq *rq, int pri) 357d5a08a60SJake Burkholder { 358d5a08a60SJake Burkholder struct rqbits *rqb; 359d5a08a60SJake Burkholder 360d5a08a60SJake Burkholder rqb = &rq->rq_status; 361d5a08a60SJake Burkholder CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d", 362d5a08a60SJake Burkholder rqb->rqb_bits[RQB_WORD(pri)], 363d5a08a60SJake Burkholder rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri), 364d5a08a60SJake Burkholder RQB_BIT(pri), RQB_WORD(pri)); 365d5a08a60SJake Burkholder rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri); 366d5a08a60SJake Burkholder } 367d5a08a60SJake Burkholder 368d5a08a60SJake Burkholder /* 369ad1e7d28SJulian Elischer * Add the thread to the queue specified by its priority, and set the 370d5a08a60SJake Burkholder * corresponding status bit. 371d5a08a60SJake Burkholder */ 372d5a08a60SJake Burkholder void 3739727e637SJeff Roberson runq_add(struct runq *rq, struct thread *td, int flags) 374d5a08a60SJake Burkholder { 375d5a08a60SJake Burkholder struct rqhead *rqh; 376d5a08a60SJake Burkholder int pri; 377dba6c5a6SPeter Wemm 3789727e637SJeff Roberson pri = td->td_priority / RQ_PPQ; 3799727e637SJeff Roberson td->td_rqindex = pri; 380d5a08a60SJake Burkholder runq_setbit(rq, pri); 381d5a08a60SJake Burkholder rqh = &rq->rq_queues[pri]; 3829727e637SJeff Roberson CTR4(KTR_RUNQ, "runq_add: td=%p pri=%d %d rqh=%p", 3839727e637SJeff Roberson td, td->td_priority, pri, rqh); 384c20c691bSJulian Elischer if (flags & SRQ_PREEMPTED) { 3859727e637SJeff Roberson TAILQ_INSERT_HEAD(rqh, td, td_runq); 386c20c691bSJulian Elischer } else { 3879727e637SJeff Roberson TAILQ_INSERT_TAIL(rqh, td, td_runq); 388dba6c5a6SPeter Wemm } 389c20c691bSJulian Elischer } 390d5a08a60SJake Burkholder 3913fed7d23SJeff Roberson void 3929727e637SJeff Roberson runq_add_pri(struct runq *rq, struct thread *td, u_char pri, int flags) 3933fed7d23SJeff Roberson { 3943fed7d23SJeff Roberson struct rqhead *rqh; 3953fed7d23SJeff Roberson 3963fed7d23SJeff Roberson KASSERT(pri < RQ_NQS, ("runq_add_pri: %d out of range", pri)); 3979727e637SJeff Roberson td->td_rqindex = pri; 3983fed7d23SJeff Roberson runq_setbit(rq, pri); 3993fed7d23SJeff Roberson rqh = &rq->rq_queues[pri]; 4009727e637SJeff Roberson CTR4(KTR_RUNQ, "runq_add_pri: td=%p pri=%d idx=%d rqh=%p", 4019727e637SJeff Roberson td, td->td_priority, pri, rqh); 4023fed7d23SJeff Roberson if (flags & SRQ_PREEMPTED) { 4039727e637SJeff Roberson TAILQ_INSERT_HEAD(rqh, td, td_runq); 4043fed7d23SJeff Roberson } else { 4059727e637SJeff Roberson TAILQ_INSERT_TAIL(rqh, td, td_runq); 4063fed7d23SJeff Roberson } 4073fed7d23SJeff Roberson } 408d5a08a60SJake Burkholder /* 409d5a08a60SJake Burkholder * Return true if there are runnable processes of any priority on the run 410d5a08a60SJake Burkholder * queue, false otherwise. Has no side effects, does not modify the run 411d5a08a60SJake Burkholder * queue structure. 412d5a08a60SJake Burkholder */ 413d5a08a60SJake Burkholder int 414d5a08a60SJake Burkholder runq_check(struct runq *rq) 415d5a08a60SJake Burkholder { 416d5a08a60SJake Burkholder struct rqbits *rqb; 417d5a08a60SJake Burkholder int i; 418d5a08a60SJake Burkholder 419d5a08a60SJake Burkholder rqb = &rq->rq_status; 420d5a08a60SJake Burkholder for (i = 0; i < RQB_LEN; i++) 421d5a08a60SJake Burkholder if (rqb->rqb_bits[i]) { 422d5a08a60SJake Burkholder CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d", 423d5a08a60SJake Burkholder rqb->rqb_bits[i], i); 424d5a08a60SJake Burkholder return (1); 425dba6c5a6SPeter Wemm } 426d5a08a60SJake Burkholder CTR0(KTR_RUNQ, "runq_check: empty"); 427d5a08a60SJake Burkholder 428d5a08a60SJake Burkholder return (0); 429dba6c5a6SPeter Wemm } 430d5a08a60SJake Burkholder 431a90f3f25SJeff Roberson /* 432a90f3f25SJeff Roberson * Find the highest priority process on the run queue. 433a90f3f25SJeff Roberson */ 4349727e637SJeff Roberson struct thread * 435a90f3f25SJeff Roberson runq_choose_fuzz(struct runq *rq, int fuzz) 436a90f3f25SJeff Roberson { 437a90f3f25SJeff Roberson struct rqhead *rqh; 4389727e637SJeff Roberson struct thread *td; 439a90f3f25SJeff Roberson int pri; 440a90f3f25SJeff Roberson 441a90f3f25SJeff Roberson while ((pri = runq_findbit(rq)) != -1) { 442a90f3f25SJeff Roberson rqh = &rq->rq_queues[pri]; 443a90f3f25SJeff Roberson /* fuzz == 1 is normal.. 0 or less are ignored */ 444a90f3f25SJeff Roberson if (fuzz > 1) { 445a90f3f25SJeff Roberson /* 446a90f3f25SJeff Roberson * In the first couple of entries, check if 447a90f3f25SJeff Roberson * there is one for our CPU as a preference. 448a90f3f25SJeff Roberson */ 449a90f3f25SJeff Roberson int count = fuzz; 450a90f3f25SJeff Roberson int cpu = PCPU_GET(cpuid); 4519727e637SJeff Roberson struct thread *td2; 4529727e637SJeff Roberson td2 = td = TAILQ_FIRST(rqh); 453a90f3f25SJeff Roberson 4549727e637SJeff Roberson while (count-- && td2) { 455681e4062SJulian Elischer if (td2->td_lastcpu == cpu) { 4569727e637SJeff Roberson td = td2; 457a90f3f25SJeff Roberson break; 458a90f3f25SJeff Roberson } 4599727e637SJeff Roberson td2 = TAILQ_NEXT(td2, td_runq); 460a90f3f25SJeff Roberson } 461a90f3f25SJeff Roberson } else 4629727e637SJeff Roberson td = TAILQ_FIRST(rqh); 4639727e637SJeff Roberson KASSERT(td != NULL, ("runq_choose_fuzz: no proc on busy queue")); 464a90f3f25SJeff Roberson CTR3(KTR_RUNQ, 4659727e637SJeff Roberson "runq_choose_fuzz: pri=%d thread=%p rqh=%p", pri, td, rqh); 4669727e637SJeff Roberson return (td); 467a90f3f25SJeff Roberson } 468a90f3f25SJeff Roberson CTR1(KTR_RUNQ, "runq_choose_fuzz: idleproc pri=%d", pri); 469a90f3f25SJeff Roberson 470a90f3f25SJeff Roberson return (NULL); 471a90f3f25SJeff Roberson } 4726804a3abSJulian Elischer 473d5a08a60SJake Burkholder /* 474b43179fbSJeff Roberson * Find the highest priority process on the run queue. 475d5a08a60SJake Burkholder */ 4769727e637SJeff Roberson struct thread * 477d5a08a60SJake Burkholder runq_choose(struct runq *rq) 478d5a08a60SJake Burkholder { 479d5a08a60SJake Burkholder struct rqhead *rqh; 4809727e637SJeff Roberson struct thread *td; 481d5a08a60SJake Burkholder int pri; 482d5a08a60SJake Burkholder 483e602ba25SJulian Elischer while ((pri = runq_findbit(rq)) != -1) { 484d5a08a60SJake Burkholder rqh = &rq->rq_queues[pri]; 4859727e637SJeff Roberson td = TAILQ_FIRST(rqh); 4869727e637SJeff Roberson KASSERT(td != NULL, ("runq_choose: no thread on busy queue")); 487e602ba25SJulian Elischer CTR3(KTR_RUNQ, 4889727e637SJeff Roberson "runq_choose: pri=%d thread=%p rqh=%p", pri, td, rqh); 4899727e637SJeff Roberson return (td); 490d5a08a60SJake Burkholder } 4919727e637SJeff Roberson CTR1(KTR_RUNQ, "runq_choose: idlethread pri=%d", pri); 492d5a08a60SJake Burkholder 493e602ba25SJulian Elischer return (NULL); 494d5a08a60SJake Burkholder } 495d5a08a60SJake Burkholder 4969727e637SJeff Roberson struct thread * 497ed0e8f2fSJeff Roberson runq_choose_from(struct runq *rq, u_char idx) 4983fed7d23SJeff Roberson { 4993fed7d23SJeff Roberson struct rqhead *rqh; 5009727e637SJeff Roberson struct thread *td; 5013fed7d23SJeff Roberson int pri; 5023fed7d23SJeff Roberson 503cd49bb70SJeff Roberson if ((pri = runq_findbit_from(rq, idx)) != -1) { 5043fed7d23SJeff Roberson rqh = &rq->rq_queues[pri]; 5059727e637SJeff Roberson td = TAILQ_FIRST(rqh); 5069727e637SJeff Roberson KASSERT(td != NULL, ("runq_choose: no thread on busy queue")); 5073fed7d23SJeff Roberson CTR4(KTR_RUNQ, 5089727e637SJeff Roberson "runq_choose_from: pri=%d thread=%p idx=%d rqh=%p", 5099727e637SJeff Roberson pri, td, td->td_rqindex, rqh); 5109727e637SJeff Roberson return (td); 5113fed7d23SJeff Roberson } 5129727e637SJeff Roberson CTR1(KTR_RUNQ, "runq_choose_from: idlethread pri=%d", pri); 5133fed7d23SJeff Roberson 5143fed7d23SJeff Roberson return (NULL); 5153fed7d23SJeff Roberson } 516d5a08a60SJake Burkholder /* 517ad1e7d28SJulian Elischer * Remove the thread from the queue specified by its priority, and clear the 518d5a08a60SJake Burkholder * corresponding status bit if the queue becomes empty. 519f0393f06SJeff Roberson * Caller must set state afterwards. 520d5a08a60SJake Burkholder */ 521d5a08a60SJake Burkholder void 5229727e637SJeff Roberson runq_remove(struct runq *rq, struct thread *td) 523d5a08a60SJake Burkholder { 5243fed7d23SJeff Roberson 5259727e637SJeff Roberson runq_remove_idx(rq, td, NULL); 5263fed7d23SJeff Roberson } 5273fed7d23SJeff Roberson 5283fed7d23SJeff Roberson void 5299727e637SJeff Roberson runq_remove_idx(struct runq *rq, struct thread *td, u_char *idx) 5303fed7d23SJeff Roberson { 531d5a08a60SJake Burkholder struct rqhead *rqh; 532ed0e8f2fSJeff Roberson u_char pri; 533d5a08a60SJake Burkholder 5349727e637SJeff Roberson KASSERT(td->td_flags & TDF_INMEM, 535b61ce5b0SJeff Roberson ("runq_remove_idx: thread swapped out")); 5369727e637SJeff Roberson pri = td->td_rqindex; 5377b20fb19SJeff Roberson KASSERT(pri < RQ_NQS, ("runq_remove_idx: Invalid index %d\n", pri)); 538d5a08a60SJake Burkholder rqh = &rq->rq_queues[pri]; 5399727e637SJeff Roberson CTR4(KTR_RUNQ, "runq_remove_idx: td=%p, pri=%d %d rqh=%p", 5409727e637SJeff Roberson td, td->td_priority, pri, rqh); 5419727e637SJeff Roberson TAILQ_REMOVE(rqh, td, td_runq); 542d5a08a60SJake Burkholder if (TAILQ_EMPTY(rqh)) { 5433fed7d23SJeff Roberson CTR0(KTR_RUNQ, "runq_remove_idx: empty"); 544d5a08a60SJake Burkholder runq_clrbit(rq, pri); 5453fed7d23SJeff Roberson if (idx != NULL && *idx == pri) 5463fed7d23SJeff Roberson *idx = (pri + 1) % RQ_NQS; 547d5a08a60SJake Burkholder } 548dba6c5a6SPeter Wemm } 549