135e6168fSJeff Roberson /*- 2e7d50326SJeff Roberson * Copyright (c) 2002-2007, Jeffrey Roberson <jeff@freebsd.org> 335e6168fSJeff Roberson * All rights reserved. 435e6168fSJeff Roberson * 535e6168fSJeff Roberson * Redistribution and use in source and binary forms, with or without 635e6168fSJeff Roberson * modification, are permitted provided that the following conditions 735e6168fSJeff Roberson * are met: 835e6168fSJeff Roberson * 1. Redistributions of source code must retain the above copyright 935e6168fSJeff Roberson * notice unmodified, this list of conditions, and the following 1035e6168fSJeff Roberson * disclaimer. 1135e6168fSJeff Roberson * 2. Redistributions in binary form must reproduce the above copyright 1235e6168fSJeff Roberson * notice, this list of conditions and the following disclaimer in the 1335e6168fSJeff Roberson * documentation and/or other materials provided with the distribution. 1435e6168fSJeff Roberson * 1535e6168fSJeff Roberson * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 1635e6168fSJeff Roberson * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1735e6168fSJeff Roberson * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 1835e6168fSJeff Roberson * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 1935e6168fSJeff Roberson * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2035e6168fSJeff Roberson * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2135e6168fSJeff Roberson * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2235e6168fSJeff Roberson * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2335e6168fSJeff Roberson * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2435e6168fSJeff Roberson * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2535e6168fSJeff Roberson */ 2635e6168fSJeff Roberson 27677b542eSDavid E. O'Brien #include <sys/cdefs.h> 28677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 29677b542eSDavid E. O'Brien 304da0d332SPeter Wemm #include "opt_hwpmc_hooks.h" 314da0d332SPeter Wemm #include "opt_sched.h" 329923b511SScott Long 3335e6168fSJeff Roberson #include <sys/param.h> 3435e6168fSJeff Roberson #include <sys/systm.h> 352c3490b1SMarcel Moolenaar #include <sys/kdb.h> 3635e6168fSJeff Roberson #include <sys/kernel.h> 3735e6168fSJeff Roberson #include <sys/ktr.h> 3835e6168fSJeff Roberson #include <sys/lock.h> 3935e6168fSJeff Roberson #include <sys/mutex.h> 4035e6168fSJeff Roberson #include <sys/proc.h> 41245f3abfSJeff Roberson #include <sys/resource.h> 429bacd788SJeff Roberson #include <sys/resourcevar.h> 4335e6168fSJeff Roberson #include <sys/sched.h> 4435e6168fSJeff Roberson #include <sys/smp.h> 4535e6168fSJeff Roberson #include <sys/sx.h> 4635e6168fSJeff Roberson #include <sys/sysctl.h> 4735e6168fSJeff Roberson #include <sys/sysproto.h> 48f5c157d9SJohn Baldwin #include <sys/turnstile.h> 493db720fdSDavid Xu #include <sys/umtx.h> 5035e6168fSJeff Roberson #include <sys/vmmeter.h> 5135e6168fSJeff Roberson #ifdef KTRACE 5235e6168fSJeff Roberson #include <sys/uio.h> 5335e6168fSJeff Roberson #include <sys/ktrace.h> 5435e6168fSJeff Roberson #endif 5535e6168fSJeff Roberson 56ebccf1e3SJoseph Koshy #ifdef HWPMC_HOOKS 57ebccf1e3SJoseph Koshy #include <sys/pmckern.h> 58ebccf1e3SJoseph Koshy #endif 59ebccf1e3SJoseph Koshy 6035e6168fSJeff Roberson #include <machine/cpu.h> 6122bf7d9aSJeff Roberson #include <machine/smp.h> 6235e6168fSJeff Roberson 637a5e5e2aSJeff Roberson #ifndef PREEMPTION 647a5e5e2aSJeff Roberson #error "SCHED_ULE requires options PREEMPTION" 657a5e5e2aSJeff Roberson #endif 667a5e5e2aSJeff Roberson 6735e6168fSJeff Roberson /* 686b2f763fSJeff Roberson * TODO: 696b2f763fSJeff Roberson * Pick idle from affinity group or self group first. 706b2f763fSJeff Roberson * Implement pick_score. 716b2f763fSJeff Roberson */ 726b2f763fSJeff Roberson 7314618990SJeff Roberson #define KTR_ULE 0x0 /* Enable for pickpri debugging. */ 7414618990SJeff Roberson 756b2f763fSJeff Roberson /* 76ad1e7d28SJulian Elischer * Thread scheduler specific section. 77ed062c8dSJulian Elischer */ 78ad1e7d28SJulian Elischer struct td_sched { 79ad1e7d28SJulian Elischer TAILQ_ENTRY(td_sched) ts_procq; /* (j/z) Run queue. */ 80ad1e7d28SJulian Elischer int ts_flags; /* (j) TSF_* flags. */ 81ad1e7d28SJulian Elischer struct thread *ts_thread; /* (*) Active associated thread. */ 82ad1e7d28SJulian Elischer u_char ts_rqindex; /* (j) Run queue index. */ 83ad1e7d28SJulian Elischer int ts_slptime; 84ad1e7d28SJulian Elischer int ts_slice; 85ad1e7d28SJulian Elischer struct runq *ts_runq; 86ad1e7d28SJulian Elischer u_char ts_cpu; /* CPU that we have affinity for. */ 87ed062c8dSJulian Elischer /* The following variables are only used for pctcpu calculation */ 88ad1e7d28SJulian Elischer int ts_ltick; /* Last tick that we were running on */ 89ad1e7d28SJulian Elischer int ts_ftick; /* First tick that we were running on */ 90ad1e7d28SJulian Elischer int ts_ticks; /* Tick count */ 917b8bfa0dSJeff Roberson #ifdef SMP 927b8bfa0dSJeff Roberson int ts_rltick; /* Real last tick, for affinity. */ 937b8bfa0dSJeff Roberson #endif 94ed062c8dSJulian Elischer 958460a577SJohn Birrell /* originally from kg_sched */ 969a93305aSJeff Roberson u_int skg_slptime; /* Number of ticks we vol. slept */ 979a93305aSJeff Roberson u_int skg_runtime; /* Number of ticks we were running */ 98ed062c8dSJulian Elischer }; 99ad1e7d28SJulian Elischer /* flags kept in ts_flags */ 1007b8bfa0dSJeff Roberson #define TSF_BOUND 0x0001 /* Thread can not migrate. */ 1017b8bfa0dSJeff Roberson #define TSF_XFERABLE 0x0002 /* Thread was added as transferable. */ 10235e6168fSJeff Roberson 103ad1e7d28SJulian Elischer static struct td_sched td_sched0; 10435e6168fSJeff Roberson 10535e6168fSJeff Roberson /* 106e7d50326SJeff Roberson * Cpu percentage computation macros and defines. 107e1f89c22SJeff Roberson * 108e7d50326SJeff Roberson * SCHED_TICK_SECS: Number of seconds to average the cpu usage across. 109e7d50326SJeff Roberson * SCHED_TICK_TARG: Number of hz ticks to average the cpu usage across. 1108ab80cf0SJeff Roberson * SCHED_TICK_MAX: Maximum number of ticks before scaling back. 111e7d50326SJeff Roberson * SCHED_TICK_SHIFT: Shift factor to avoid rounding away results. 112e7d50326SJeff Roberson * SCHED_TICK_HZ: Compute the number of hz ticks for a given ticks count. 113e7d50326SJeff Roberson * SCHED_TICK_TOTAL: Gives the amount of time we've been recording ticks. 11435e6168fSJeff Roberson */ 115e7d50326SJeff Roberson #define SCHED_TICK_SECS 10 116e7d50326SJeff Roberson #define SCHED_TICK_TARG (hz * SCHED_TICK_SECS) 1178ab80cf0SJeff Roberson #define SCHED_TICK_MAX (SCHED_TICK_TARG + hz) 118e7d50326SJeff Roberson #define SCHED_TICK_SHIFT 10 119e7d50326SJeff Roberson #define SCHED_TICK_HZ(ts) ((ts)->ts_ticks >> SCHED_TICK_SHIFT) 120eddb4efaSJeff Roberson #define SCHED_TICK_TOTAL(ts) (max((ts)->ts_ltick - (ts)->ts_ftick, hz)) 12135e6168fSJeff Roberson 12235e6168fSJeff Roberson /* 123e7d50326SJeff Roberson * These macros determine priorities for non-interactive threads. They are 124e7d50326SJeff Roberson * assigned a priority based on their recent cpu utilization as expressed 125e7d50326SJeff Roberson * by the ratio of ticks to the tick total. NHALF priorities at the start 126e7d50326SJeff Roberson * and end of the MIN to MAX timeshare range are only reachable with negative 127e7d50326SJeff Roberson * or positive nice respectively. 128e7d50326SJeff Roberson * 129e7d50326SJeff Roberson * PRI_RANGE: Priority range for utilization dependent priorities. 130e7d50326SJeff Roberson * PRI_NRESV: Number of nice values. 131e7d50326SJeff Roberson * PRI_TICKS: Compute a priority in PRI_RANGE from the ticks count and total. 132e7d50326SJeff Roberson * PRI_NICE: Determines the part of the priority inherited from nice. 133e7d50326SJeff Roberson */ 134e7d50326SJeff Roberson #define SCHED_PRI_NRESV (PRIO_MAX - PRIO_MIN) 135e7d50326SJeff Roberson #define SCHED_PRI_NHALF (SCHED_PRI_NRESV / 2) 136e7d50326SJeff Roberson #define SCHED_PRI_MIN (PRI_MIN_TIMESHARE + SCHED_PRI_NHALF) 137e7d50326SJeff Roberson #define SCHED_PRI_MAX (PRI_MAX_TIMESHARE - SCHED_PRI_NHALF) 138e7d50326SJeff Roberson #define SCHED_PRI_RANGE (SCHED_PRI_MAX - SCHED_PRI_MIN + 1) 139e7d50326SJeff Roberson #define SCHED_PRI_TICKS(ts) \ 140e7d50326SJeff Roberson (SCHED_TICK_HZ((ts)) / \ 1411e516cf5SJeff Roberson (roundup(SCHED_TICK_TOTAL((ts)), SCHED_PRI_RANGE) / SCHED_PRI_RANGE)) 142e7d50326SJeff Roberson #define SCHED_PRI_NICE(nice) (nice) 143e7d50326SJeff Roberson 144e7d50326SJeff Roberson /* 145e7d50326SJeff Roberson * These determine the interactivity of a process. Interactivity differs from 146e7d50326SJeff Roberson * cpu utilization in that it expresses the voluntary time slept vs time ran 147e7d50326SJeff Roberson * while cpu utilization includes all time not running. This more accurately 148e7d50326SJeff Roberson * models the intent of the thread. 14935e6168fSJeff Roberson * 150407b0157SJeff Roberson * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate 151407b0157SJeff Roberson * before throttling back. 152d322132cSJeff Roberson * SLP_RUN_FORK: Maximum slp+run time to inherit at fork time. 153210491d3SJeff Roberson * INTERACT_MAX: Maximum interactivity value. Smaller is better. 154e1f89c22SJeff Roberson * INTERACT_THRESH: Threshhold for placement on the current runq. 15535e6168fSJeff Roberson */ 156e7d50326SJeff Roberson #define SCHED_SLP_RUN_MAX ((hz * 5) << SCHED_TICK_SHIFT) 157e7d50326SJeff Roberson #define SCHED_SLP_RUN_FORK ((hz / 2) << SCHED_TICK_SHIFT) 158210491d3SJeff Roberson #define SCHED_INTERACT_MAX (100) 159210491d3SJeff Roberson #define SCHED_INTERACT_HALF (SCHED_INTERACT_MAX / 2) 1604c9612c6SJeff Roberson #define SCHED_INTERACT_THRESH (30) 161e1f89c22SJeff Roberson 16235e6168fSJeff Roberson /* 163e7d50326SJeff Roberson * tickincr: Converts a stathz tick into a hz domain scaled by 164e7d50326SJeff Roberson * the shift factor. Without the shift the error rate 165e7d50326SJeff Roberson * due to rounding would be unacceptably high. 166e7d50326SJeff Roberson * realstathz: stathz is sometimes 0 and run off of hz. 167e7d50326SJeff Roberson * sched_slice: Runtime of each thread before rescheduling. 16835e6168fSJeff Roberson */ 169e7d50326SJeff Roberson static int sched_interact = SCHED_INTERACT_THRESH; 170e7d50326SJeff Roberson static int realstathz; 171e7d50326SJeff Roberson static int tickincr; 172e7d50326SJeff Roberson static int sched_slice; 17335e6168fSJeff Roberson 17435e6168fSJeff Roberson /* 175ad1e7d28SJulian Elischer * tdq - per processor runqs and statistics. 17635e6168fSJeff Roberson */ 177ad1e7d28SJulian Elischer struct tdq { 178d2ad694cSJeff Roberson struct runq tdq_idle; /* Queue of IDLE threads. */ 179e7d50326SJeff Roberson struct runq tdq_timeshare; /* timeshare run queue. */ 180e7d50326SJeff Roberson struct runq tdq_realtime; /* real-time run queue. */ 181ed0e8f2fSJeff Roberson u_char tdq_idx; /* Current insert index. */ 182ed0e8f2fSJeff Roberson u_char tdq_ridx; /* Current removal index. */ 183ed0e8f2fSJeff Roberson short tdq_flags; /* Thread queue flags */ 184d2ad694cSJeff Roberson int tdq_load; /* Aggregate load. */ 1855d7ef00cSJeff Roberson #ifdef SMP 186d2ad694cSJeff Roberson int tdq_transferable; 187d2ad694cSJeff Roberson LIST_ENTRY(tdq) tdq_siblings; /* Next in tdq group. */ 188d2ad694cSJeff Roberson struct tdq_group *tdq_group; /* Our processor group. */ 18933916c36SJeff Roberson #else 190d2ad694cSJeff Roberson int tdq_sysload; /* For loadavg, !ITHD load. */ 1915d7ef00cSJeff Roberson #endif 19235e6168fSJeff Roberson }; 19335e6168fSJeff Roberson 1947b8bfa0dSJeff Roberson #define TDQF_BUSY 0x0001 /* Queue is marked as busy */ 1957b8bfa0dSJeff Roberson 19680f86c9fSJeff Roberson #ifdef SMP 19780f86c9fSJeff Roberson /* 198ad1e7d28SJulian Elischer * tdq groups are groups of processors which can cheaply share threads. When 19980f86c9fSJeff Roberson * one processor in the group goes idle it will check the runqs of the other 20080f86c9fSJeff Roberson * processors in its group prior to halting and waiting for an interrupt. 20180f86c9fSJeff Roberson * These groups are suitable for SMT (Symetric Multi-Threading) and not NUMA. 20280f86c9fSJeff Roberson * In a numa environment we'd want an idle bitmap per group and a two tiered 20380f86c9fSJeff Roberson * load balancer. 20480f86c9fSJeff Roberson */ 205ad1e7d28SJulian Elischer struct tdq_group { 206d2ad694cSJeff Roberson int tdg_cpus; /* Count of CPUs in this tdq group. */ 207d2ad694cSJeff Roberson cpumask_t tdg_cpumask; /* Mask of cpus in this group. */ 208d2ad694cSJeff Roberson cpumask_t tdg_idlemask; /* Idle cpus in this group. */ 209d2ad694cSJeff Roberson cpumask_t tdg_mask; /* Bit mask for first cpu. */ 210d2ad694cSJeff Roberson int tdg_load; /* Total load of this group. */ 211d2ad694cSJeff Roberson int tdg_transferable; /* Transferable load of this group. */ 212d2ad694cSJeff Roberson LIST_HEAD(, tdq) tdg_members; /* Linked list of all members. */ 21380f86c9fSJeff Roberson }; 2147b8bfa0dSJeff Roberson 2157b8bfa0dSJeff Roberson #define SCHED_AFFINITY_DEFAULT (hz / 100) 2167b8bfa0dSJeff Roberson #define SCHED_AFFINITY(ts) ((ts)->ts_rltick > ticks - affinity) 2177b8bfa0dSJeff Roberson 2187b8bfa0dSJeff Roberson /* 2197b8bfa0dSJeff Roberson * Run-time tunables. 2207b8bfa0dSJeff Roberson */ 2215cea64d5SJeff Roberson static int rebalance = 0; 2227b8bfa0dSJeff Roberson static int pick_pri = 1; 2237b8bfa0dSJeff Roberson static int affinity; 2247b8bfa0dSJeff Roberson static int tryself = 1; 2257b8bfa0dSJeff Roberson static int tryselfidle = 1; 2267b8bfa0dSJeff Roberson static int ipi_ast = 0; 2277b8bfa0dSJeff Roberson static int ipi_preempt = 1; 2287b8bfa0dSJeff Roberson static int ipi_thresh = PRI_MIN_KERN; 2297b8bfa0dSJeff Roberson static int steal_htt = 1; 2307b8bfa0dSJeff Roberson static int steal_busy = 1; 2317b8bfa0dSJeff Roberson static int busy_thresh = 4; 2327b20fb19SJeff Roberson static int topology = 0; 23380f86c9fSJeff Roberson 23435e6168fSJeff Roberson /* 235d2ad694cSJeff Roberson * One thread queue per processor. 23635e6168fSJeff Roberson */ 2377b8bfa0dSJeff Roberson static volatile cpumask_t tdq_idle; 2387b8bfa0dSJeff Roberson static volatile cpumask_t tdq_busy; 239d2ad694cSJeff Roberson static int tdg_maxid; 240ad1e7d28SJulian Elischer static struct tdq tdq_cpu[MAXCPU]; 241ad1e7d28SJulian Elischer static struct tdq_group tdq_groups[MAXCPU]; 242dc03363dSJeff Roberson static int bal_tick; 243dc03363dSJeff Roberson static int gbal_tick; 244598b368dSJeff Roberson static int balance_groups; 245dc03363dSJeff Roberson 246ad1e7d28SJulian Elischer #define TDQ_SELF() (&tdq_cpu[PCPU_GET(cpuid)]) 247ad1e7d28SJulian Elischer #define TDQ_CPU(x) (&tdq_cpu[(x)]) 248ad1e7d28SJulian Elischer #define TDQ_ID(x) ((x) - tdq_cpu) 249ad1e7d28SJulian Elischer #define TDQ_GROUP(x) (&tdq_groups[(x)]) 25080f86c9fSJeff Roberson #else /* !SMP */ 251ad1e7d28SJulian Elischer static struct tdq tdq_cpu; 252dc03363dSJeff Roberson 253ad1e7d28SJulian Elischer #define TDQ_SELF() (&tdq_cpu) 254ad1e7d28SJulian Elischer #define TDQ_CPU(x) (&tdq_cpu) 2550a016a05SJeff Roberson #endif 25635e6168fSJeff Roberson 2578460a577SJohn Birrell static void sched_priority(struct thread *); 25821381d1bSJeff Roberson static void sched_thread_priority(struct thread *, u_char); 2598460a577SJohn Birrell static int sched_interact_score(struct thread *); 2608460a577SJohn Birrell static void sched_interact_update(struct thread *); 2618460a577SJohn Birrell static void sched_interact_fork(struct thread *); 262ad1e7d28SJulian Elischer static void sched_pctcpu_update(struct td_sched *); 2631e516cf5SJeff Roberson static inline void sched_pin_td(struct thread *td); 2641e516cf5SJeff Roberson static inline void sched_unpin_td(struct thread *td); 26535e6168fSJeff Roberson 2665d7ef00cSJeff Roberson /* Operations on per processor queues */ 267ad1e7d28SJulian Elischer static struct td_sched * tdq_choose(struct tdq *); 268ad1e7d28SJulian Elischer static void tdq_setup(struct tdq *); 269ad1e7d28SJulian Elischer static void tdq_load_add(struct tdq *, struct td_sched *); 270ad1e7d28SJulian Elischer static void tdq_load_rem(struct tdq *, struct td_sched *); 271ad1e7d28SJulian Elischer static __inline void tdq_runq_add(struct tdq *, struct td_sched *, int); 272ad1e7d28SJulian Elischer static __inline void tdq_runq_rem(struct tdq *, struct td_sched *); 273ad1e7d28SJulian Elischer void tdq_print(int cpu); 274e7d50326SJeff Roberson static void runq_print(struct runq *rq); 2755d7ef00cSJeff Roberson #ifdef SMP 2767b8bfa0dSJeff Roberson static int tdq_pickidle(struct tdq *, struct td_sched *); 2777b8bfa0dSJeff Roberson static int tdq_pickpri(struct tdq *, struct td_sched *, int); 278ad1e7d28SJulian Elischer static struct td_sched *runq_steal(struct runq *); 279dc03363dSJeff Roberson static void sched_balance(void); 280dc03363dSJeff Roberson static void sched_balance_groups(void); 281ad1e7d28SJulian Elischer static void sched_balance_group(struct tdq_group *); 282ad1e7d28SJulian Elischer static void sched_balance_pair(struct tdq *, struct tdq *); 2837b8bfa0dSJeff Roberson static void sched_smp_tick(struct thread *); 284ad1e7d28SJulian Elischer static void tdq_move(struct tdq *, int); 285ad1e7d28SJulian Elischer static int tdq_idled(struct tdq *); 2867b8bfa0dSJeff Roberson static void tdq_notify(struct td_sched *); 287ad1e7d28SJulian Elischer static struct td_sched *tdq_steal(struct tdq *, int); 2881e516cf5SJeff Roberson 2897b8bfa0dSJeff Roberson #define THREAD_CAN_MIGRATE(td) ((td)->td_pinned == 0) 2905d7ef00cSJeff Roberson #endif 2915d7ef00cSJeff Roberson 292e7d50326SJeff Roberson static void sched_setup(void *dummy); 293e7d50326SJeff Roberson SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 294e7d50326SJeff Roberson 295e7d50326SJeff Roberson static void sched_initticks(void *dummy); 296e7d50326SJeff Roberson SYSINIT(sched_initticks, SI_SUB_CLOCKS, SI_ORDER_THIRD, sched_initticks, NULL) 297e7d50326SJeff Roberson 2981e516cf5SJeff Roberson static inline void 2991e516cf5SJeff Roberson sched_pin_td(struct thread *td) 3001e516cf5SJeff Roberson { 3011e516cf5SJeff Roberson td->td_pinned++; 3021e516cf5SJeff Roberson } 3031e516cf5SJeff Roberson 3041e516cf5SJeff Roberson static inline void 3051e516cf5SJeff Roberson sched_unpin_td(struct thread *td) 3061e516cf5SJeff Roberson { 3071e516cf5SJeff Roberson td->td_pinned--; 3081e516cf5SJeff Roberson } 3091e516cf5SJeff Roberson 310e7d50326SJeff Roberson static void 311e7d50326SJeff Roberson runq_print(struct runq *rq) 312e7d50326SJeff Roberson { 313e7d50326SJeff Roberson struct rqhead *rqh; 314e7d50326SJeff Roberson struct td_sched *ts; 315e7d50326SJeff Roberson int pri; 316e7d50326SJeff Roberson int j; 317e7d50326SJeff Roberson int i; 318e7d50326SJeff Roberson 319e7d50326SJeff Roberson for (i = 0; i < RQB_LEN; i++) { 320e7d50326SJeff Roberson printf("\t\trunq bits %d 0x%zx\n", 321e7d50326SJeff Roberson i, rq->rq_status.rqb_bits[i]); 322e7d50326SJeff Roberson for (j = 0; j < RQB_BPW; j++) 323e7d50326SJeff Roberson if (rq->rq_status.rqb_bits[i] & (1ul << j)) { 324e7d50326SJeff Roberson pri = j + (i << RQB_L2BPW); 325e7d50326SJeff Roberson rqh = &rq->rq_queues[pri]; 326e7d50326SJeff Roberson TAILQ_FOREACH(ts, rqh, ts_procq) { 327e7d50326SJeff Roberson printf("\t\t\ttd %p(%s) priority %d rqindex %d pri %d\n", 328e7d50326SJeff Roberson ts->ts_thread, ts->ts_thread->td_proc->p_comm, ts->ts_thread->td_priority, ts->ts_rqindex, pri); 329e7d50326SJeff Roberson } 330e7d50326SJeff Roberson } 331e7d50326SJeff Roberson } 332e7d50326SJeff Roberson } 333e7d50326SJeff Roberson 33415dc847eSJeff Roberson void 335ad1e7d28SJulian Elischer tdq_print(int cpu) 33615dc847eSJeff Roberson { 337ad1e7d28SJulian Elischer struct tdq *tdq; 33815dc847eSJeff Roberson 339ad1e7d28SJulian Elischer tdq = TDQ_CPU(cpu); 34015dc847eSJeff Roberson 341ad1e7d28SJulian Elischer printf("tdq:\n"); 342d2ad694cSJeff Roberson printf("\tload: %d\n", tdq->tdq_load); 343e7d50326SJeff Roberson printf("\ttimeshare idx: %d\n", tdq->tdq_idx); 3443f872f85SJeff Roberson printf("\ttimeshare ridx: %d\n", tdq->tdq_ridx); 345e7d50326SJeff Roberson printf("\trealtime runq:\n"); 346e7d50326SJeff Roberson runq_print(&tdq->tdq_realtime); 347e7d50326SJeff Roberson printf("\ttimeshare runq:\n"); 348e7d50326SJeff Roberson runq_print(&tdq->tdq_timeshare); 349e7d50326SJeff Roberson printf("\tidle runq:\n"); 350e7d50326SJeff Roberson runq_print(&tdq->tdq_idle); 351ef1134c9SJeff Roberson #ifdef SMP 352d2ad694cSJeff Roberson printf("\tload transferable: %d\n", tdq->tdq_transferable); 353ef1134c9SJeff Roberson #endif 35415dc847eSJeff Roberson } 35515dc847eSJeff Roberson 356155b9987SJeff Roberson static __inline void 357ad1e7d28SJulian Elischer tdq_runq_add(struct tdq *tdq, struct td_sched *ts, int flags) 358155b9987SJeff Roberson { 359155b9987SJeff Roberson #ifdef SMP 360e7d50326SJeff Roberson if (THREAD_CAN_MIGRATE(ts->ts_thread)) { 361d2ad694cSJeff Roberson tdq->tdq_transferable++; 362d2ad694cSJeff Roberson tdq->tdq_group->tdg_transferable++; 363ad1e7d28SJulian Elischer ts->ts_flags |= TSF_XFERABLE; 3647b8bfa0dSJeff Roberson if (tdq->tdq_transferable >= busy_thresh && 3657b8bfa0dSJeff Roberson (tdq->tdq_flags & TDQF_BUSY) == 0) { 3667b8bfa0dSJeff Roberson tdq->tdq_flags |= TDQF_BUSY; 3677b8bfa0dSJeff Roberson atomic_set_int(&tdq_busy, 1 << TDQ_ID(tdq)); 3687b8bfa0dSJeff Roberson } 36980f86c9fSJeff Roberson } 370155b9987SJeff Roberson #endif 371e7d50326SJeff Roberson if (ts->ts_runq == &tdq->tdq_timeshare) { 372ed0e8f2fSJeff Roberson u_char pri; 373e7d50326SJeff Roberson 374e7d50326SJeff Roberson pri = ts->ts_thread->td_priority; 375e7d50326SJeff Roberson KASSERT(pri <= PRI_MAX_TIMESHARE && pri >= PRI_MIN_TIMESHARE, 376e7d50326SJeff Roberson ("Invalid priority %d on timeshare runq", pri)); 377e7d50326SJeff Roberson /* 378e7d50326SJeff Roberson * This queue contains only priorities between MIN and MAX 379e7d50326SJeff Roberson * realtime. Use the whole queue to represent these values. 380e7d50326SJeff Roberson */ 381e7d50326SJeff Roberson #define TS_RQ_PPQ (((PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE) + 1) / RQ_NQS) 382e7d50326SJeff Roberson if ((flags & SRQ_BORROWING) == 0) { 383e7d50326SJeff Roberson pri = (pri - PRI_MIN_TIMESHARE) / TS_RQ_PPQ; 384e7d50326SJeff Roberson pri = (pri + tdq->tdq_idx) % RQ_NQS; 3853f872f85SJeff Roberson /* 3863f872f85SJeff Roberson * This effectively shortens the queue by one so we 3873f872f85SJeff Roberson * can have a one slot difference between idx and 3883f872f85SJeff Roberson * ridx while we wait for threads to drain. 3893f872f85SJeff Roberson */ 3903f872f85SJeff Roberson if (tdq->tdq_ridx != tdq->tdq_idx && 3913f872f85SJeff Roberson pri == tdq->tdq_ridx) 3924499aff6SJeff Roberson pri = (unsigned char)(pri - 1) % RQ_NQS; 393e7d50326SJeff Roberson } else 3943f872f85SJeff Roberson pri = tdq->tdq_ridx; 395e7d50326SJeff Roberson runq_add_pri(ts->ts_runq, ts, pri, flags); 396e7d50326SJeff Roberson } else 397ad1e7d28SJulian Elischer runq_add(ts->ts_runq, ts, flags); 398155b9987SJeff Roberson } 399155b9987SJeff Roberson 400155b9987SJeff Roberson static __inline void 401ad1e7d28SJulian Elischer tdq_runq_rem(struct tdq *tdq, struct td_sched *ts) 402155b9987SJeff Roberson { 403155b9987SJeff Roberson #ifdef SMP 404ad1e7d28SJulian Elischer if (ts->ts_flags & TSF_XFERABLE) { 405d2ad694cSJeff Roberson tdq->tdq_transferable--; 406d2ad694cSJeff Roberson tdq->tdq_group->tdg_transferable--; 407ad1e7d28SJulian Elischer ts->ts_flags &= ~TSF_XFERABLE; 4087b8bfa0dSJeff Roberson if (tdq->tdq_transferable < busy_thresh && 4097b8bfa0dSJeff Roberson (tdq->tdq_flags & TDQF_BUSY)) { 4107b8bfa0dSJeff Roberson atomic_clear_int(&tdq_busy, 1 << TDQ_ID(tdq)); 4117b8bfa0dSJeff Roberson tdq->tdq_flags &= ~TDQF_BUSY; 4127b8bfa0dSJeff Roberson } 41380f86c9fSJeff Roberson } 414155b9987SJeff Roberson #endif 4153f872f85SJeff Roberson if (ts->ts_runq == &tdq->tdq_timeshare) { 4163f872f85SJeff Roberson if (tdq->tdq_idx != tdq->tdq_ridx) 4173f872f85SJeff Roberson runq_remove_idx(ts->ts_runq, ts, &tdq->tdq_ridx); 418e7d50326SJeff Roberson else 4193f872f85SJeff Roberson runq_remove_idx(ts->ts_runq, ts, NULL); 4208ab80cf0SJeff Roberson /* 4218ab80cf0SJeff Roberson * For timeshare threads we update the priority here so 4228ab80cf0SJeff Roberson * the priority reflects the time we've been sleeping. 4238ab80cf0SJeff Roberson */ 4248ab80cf0SJeff Roberson ts->ts_ltick = ticks; 4258ab80cf0SJeff Roberson sched_pctcpu_update(ts); 4268ab80cf0SJeff Roberson sched_priority(ts->ts_thread); 4273f872f85SJeff Roberson } else 428ad1e7d28SJulian Elischer runq_remove(ts->ts_runq, ts); 429155b9987SJeff Roberson } 430155b9987SJeff Roberson 431a8949de2SJeff Roberson static void 432ad1e7d28SJulian Elischer tdq_load_add(struct tdq *tdq, struct td_sched *ts) 4335d7ef00cSJeff Roberson { 434ef1134c9SJeff Roberson int class; 435b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 436ad1e7d28SJulian Elischer class = PRI_BASE(ts->ts_thread->td_pri_class); 437d2ad694cSJeff Roberson tdq->tdq_load++; 4387b20fb19SJeff Roberson CTR2(KTR_SCHED, "cpu %jd load: %d", TDQ_ID(tdq), tdq->tdq_load); 4397b8bfa0dSJeff Roberson if (class != PRI_ITHD && 4407b8bfa0dSJeff Roberson (ts->ts_thread->td_proc->p_flag & P_NOLOAD) == 0) 44133916c36SJeff Roberson #ifdef SMP 442d2ad694cSJeff Roberson tdq->tdq_group->tdg_load++; 44333916c36SJeff Roberson #else 444d2ad694cSJeff Roberson tdq->tdq_sysload++; 445cac77d04SJeff Roberson #endif 4465d7ef00cSJeff Roberson } 44715dc847eSJeff Roberson 448a8949de2SJeff Roberson static void 449ad1e7d28SJulian Elischer tdq_load_rem(struct tdq *tdq, struct td_sched *ts) 4505d7ef00cSJeff Roberson { 451ef1134c9SJeff Roberson int class; 452b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 453ad1e7d28SJulian Elischer class = PRI_BASE(ts->ts_thread->td_pri_class); 4547b8bfa0dSJeff Roberson if (class != PRI_ITHD && 4557b8bfa0dSJeff Roberson (ts->ts_thread->td_proc->p_flag & P_NOLOAD) == 0) 45633916c36SJeff Roberson #ifdef SMP 457d2ad694cSJeff Roberson tdq->tdq_group->tdg_load--; 45833916c36SJeff Roberson #else 459d2ad694cSJeff Roberson tdq->tdq_sysload--; 460cac77d04SJeff Roberson #endif 461d2ad694cSJeff Roberson tdq->tdq_load--; 462d2ad694cSJeff Roberson CTR1(KTR_SCHED, "load: %d", tdq->tdq_load); 463ad1e7d28SJulian Elischer ts->ts_runq = NULL; 46415dc847eSJeff Roberson } 46515dc847eSJeff Roberson 4665d7ef00cSJeff Roberson #ifdef SMP 4673f872f85SJeff Roberson static void 4687b8bfa0dSJeff Roberson sched_smp_tick(struct thread *td) 4693f872f85SJeff Roberson { 4703f872f85SJeff Roberson struct tdq *tdq; 4713f872f85SJeff Roberson 4723f872f85SJeff Roberson tdq = TDQ_SELF(); 4737b8bfa0dSJeff Roberson if (rebalance) { 4743f872f85SJeff Roberson if (ticks >= bal_tick) 4753f872f85SJeff Roberson sched_balance(); 4763f872f85SJeff Roberson if (ticks >= gbal_tick && balance_groups) 4773f872f85SJeff Roberson sched_balance_groups(); 478155b6ca1SJeff Roberson } 4797b8bfa0dSJeff Roberson td->td_sched->ts_rltick = ticks; 4803f872f85SJeff Roberson } 4813f872f85SJeff Roberson 482356500a3SJeff Roberson /* 483155b9987SJeff Roberson * sched_balance is a simple CPU load balancing algorithm. It operates by 484356500a3SJeff Roberson * finding the least loaded and most loaded cpu and equalizing their load 485356500a3SJeff Roberson * by migrating some processes. 486356500a3SJeff Roberson * 487356500a3SJeff Roberson * Dealing only with two CPUs at a time has two advantages. Firstly, most 488356500a3SJeff Roberson * installations will only have 2 cpus. Secondly, load balancing too much at 489356500a3SJeff Roberson * once can have an unpleasant effect on the system. The scheduler rarely has 490356500a3SJeff Roberson * enough information to make perfect decisions. So this algorithm chooses 491356500a3SJeff Roberson * algorithm simplicity and more gradual effects on load in larger systems. 492356500a3SJeff Roberson * 493356500a3SJeff Roberson * It could be improved by considering the priorities and slices assigned to 494356500a3SJeff Roberson * each task prior to balancing them. There are many pathological cases with 495356500a3SJeff Roberson * any approach and so the semi random algorithm below may work as well as any. 496356500a3SJeff Roberson * 497356500a3SJeff Roberson */ 49822bf7d9aSJeff Roberson static void 499dc03363dSJeff Roberson sched_balance(void) 500356500a3SJeff Roberson { 501ad1e7d28SJulian Elischer struct tdq_group *high; 502ad1e7d28SJulian Elischer struct tdq_group *low; 503d2ad694cSJeff Roberson struct tdq_group *tdg; 504cac77d04SJeff Roberson int cnt; 505356500a3SJeff Roberson int i; 506356500a3SJeff Roberson 507598b368dSJeff Roberson bal_tick = ticks + (random() % (hz * 2)); 50886f8ae96SJeff Roberson if (smp_started == 0) 509598b368dSJeff Roberson return; 510cac77d04SJeff Roberson low = high = NULL; 511d2ad694cSJeff Roberson i = random() % (tdg_maxid + 1); 512d2ad694cSJeff Roberson for (cnt = 0; cnt <= tdg_maxid; cnt++) { 513d2ad694cSJeff Roberson tdg = TDQ_GROUP(i); 514cac77d04SJeff Roberson /* 515cac77d04SJeff Roberson * Find the CPU with the highest load that has some 516cac77d04SJeff Roberson * threads to transfer. 517cac77d04SJeff Roberson */ 518d2ad694cSJeff Roberson if ((high == NULL || tdg->tdg_load > high->tdg_load) 519d2ad694cSJeff Roberson && tdg->tdg_transferable) 520d2ad694cSJeff Roberson high = tdg; 521d2ad694cSJeff Roberson if (low == NULL || tdg->tdg_load < low->tdg_load) 522d2ad694cSJeff Roberson low = tdg; 523d2ad694cSJeff Roberson if (++i > tdg_maxid) 524cac77d04SJeff Roberson i = 0; 525cac77d04SJeff Roberson } 526cac77d04SJeff Roberson if (low != NULL && high != NULL && high != low) 527d2ad694cSJeff Roberson sched_balance_pair(LIST_FIRST(&high->tdg_members), 528d2ad694cSJeff Roberson LIST_FIRST(&low->tdg_members)); 529cac77d04SJeff Roberson } 53086f8ae96SJeff Roberson 531cac77d04SJeff Roberson static void 532dc03363dSJeff Roberson sched_balance_groups(void) 533cac77d04SJeff Roberson { 534cac77d04SJeff Roberson int i; 535cac77d04SJeff Roberson 536598b368dSJeff Roberson gbal_tick = ticks + (random() % (hz * 2)); 537dc03363dSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 538cac77d04SJeff Roberson if (smp_started) 539d2ad694cSJeff Roberson for (i = 0; i <= tdg_maxid; i++) 540ad1e7d28SJulian Elischer sched_balance_group(TDQ_GROUP(i)); 541356500a3SJeff Roberson } 542cac77d04SJeff Roberson 543cac77d04SJeff Roberson static void 544d2ad694cSJeff Roberson sched_balance_group(struct tdq_group *tdg) 545cac77d04SJeff Roberson { 546ad1e7d28SJulian Elischer struct tdq *tdq; 547ad1e7d28SJulian Elischer struct tdq *high; 548ad1e7d28SJulian Elischer struct tdq *low; 549cac77d04SJeff Roberson int load; 550cac77d04SJeff Roberson 551d2ad694cSJeff Roberson if (tdg->tdg_transferable == 0) 552cac77d04SJeff Roberson return; 553cac77d04SJeff Roberson low = NULL; 554cac77d04SJeff Roberson high = NULL; 555d2ad694cSJeff Roberson LIST_FOREACH(tdq, &tdg->tdg_members, tdq_siblings) { 556d2ad694cSJeff Roberson load = tdq->tdq_load; 557d2ad694cSJeff Roberson if (high == NULL || load > high->tdq_load) 558ad1e7d28SJulian Elischer high = tdq; 559d2ad694cSJeff Roberson if (low == NULL || load < low->tdq_load) 560ad1e7d28SJulian Elischer low = tdq; 561356500a3SJeff Roberson } 562cac77d04SJeff Roberson if (high != NULL && low != NULL && high != low) 563cac77d04SJeff Roberson sched_balance_pair(high, low); 564356500a3SJeff Roberson } 565cac77d04SJeff Roberson 566cac77d04SJeff Roberson static void 567ad1e7d28SJulian Elischer sched_balance_pair(struct tdq *high, struct tdq *low) 568cac77d04SJeff Roberson { 569cac77d04SJeff Roberson int transferable; 570cac77d04SJeff Roberson int high_load; 571cac77d04SJeff Roberson int low_load; 572cac77d04SJeff Roberson int move; 573cac77d04SJeff Roberson int diff; 574cac77d04SJeff Roberson int i; 575cac77d04SJeff Roberson 57680f86c9fSJeff Roberson /* 57780f86c9fSJeff Roberson * If we're transfering within a group we have to use this specific 578ad1e7d28SJulian Elischer * tdq's transferable count, otherwise we can steal from other members 57980f86c9fSJeff Roberson * of the group. 58080f86c9fSJeff Roberson */ 581d2ad694cSJeff Roberson if (high->tdq_group == low->tdq_group) { 582d2ad694cSJeff Roberson transferable = high->tdq_transferable; 583d2ad694cSJeff Roberson high_load = high->tdq_load; 584d2ad694cSJeff Roberson low_load = low->tdq_load; 585cac77d04SJeff Roberson } else { 586d2ad694cSJeff Roberson transferable = high->tdq_group->tdg_transferable; 587d2ad694cSJeff Roberson high_load = high->tdq_group->tdg_load; 588d2ad694cSJeff Roberson low_load = low->tdq_group->tdg_load; 589cac77d04SJeff Roberson } 59080f86c9fSJeff Roberson if (transferable == 0) 591cac77d04SJeff Roberson return; 592155b9987SJeff Roberson /* 593155b9987SJeff Roberson * Determine what the imbalance is and then adjust that to how many 594d2ad694cSJeff Roberson * threads we actually have to give up (transferable). 595155b9987SJeff Roberson */ 596cac77d04SJeff Roberson diff = high_load - low_load; 597356500a3SJeff Roberson move = diff / 2; 598356500a3SJeff Roberson if (diff & 0x1) 599356500a3SJeff Roberson move++; 60080f86c9fSJeff Roberson move = min(move, transferable); 601356500a3SJeff Roberson for (i = 0; i < move; i++) 602ad1e7d28SJulian Elischer tdq_move(high, TDQ_ID(low)); 603356500a3SJeff Roberson return; 604356500a3SJeff Roberson } 605356500a3SJeff Roberson 60622bf7d9aSJeff Roberson static void 607ad1e7d28SJulian Elischer tdq_move(struct tdq *from, int cpu) 608356500a3SJeff Roberson { 609ad1e7d28SJulian Elischer struct tdq *tdq; 610ad1e7d28SJulian Elischer struct tdq *to; 611ad1e7d28SJulian Elischer struct td_sched *ts; 612356500a3SJeff Roberson 613ad1e7d28SJulian Elischer tdq = from; 614ad1e7d28SJulian Elischer to = TDQ_CPU(cpu); 615ad1e7d28SJulian Elischer ts = tdq_steal(tdq, 1); 616ad1e7d28SJulian Elischer if (ts == NULL) { 617d2ad694cSJeff Roberson struct tdq_group *tdg; 61880f86c9fSJeff Roberson 619d2ad694cSJeff Roberson tdg = tdq->tdq_group; 620d2ad694cSJeff Roberson LIST_FOREACH(tdq, &tdg->tdg_members, tdq_siblings) { 621d2ad694cSJeff Roberson if (tdq == from || tdq->tdq_transferable == 0) 62280f86c9fSJeff Roberson continue; 623ad1e7d28SJulian Elischer ts = tdq_steal(tdq, 1); 62480f86c9fSJeff Roberson break; 62580f86c9fSJeff Roberson } 626ad1e7d28SJulian Elischer if (ts == NULL) 627ad1e7d28SJulian Elischer panic("tdq_move: No threads available with a " 62880f86c9fSJeff Roberson "transferable count of %d\n", 629d2ad694cSJeff Roberson tdg->tdg_transferable); 63080f86c9fSJeff Roberson } 631ad1e7d28SJulian Elischer if (tdq == to) 63280f86c9fSJeff Roberson return; 6337b8bfa0dSJeff Roberson sched_rem(ts->ts_thread); 6347b8bfa0dSJeff Roberson ts->ts_cpu = cpu; 6357b8bfa0dSJeff Roberson sched_pin_td(ts->ts_thread); 6367b8bfa0dSJeff Roberson sched_add(ts->ts_thread, SRQ_YIELDING); 6377b8bfa0dSJeff Roberson sched_unpin_td(ts->ts_thread); 638356500a3SJeff Roberson } 63922bf7d9aSJeff Roberson 64080f86c9fSJeff Roberson static int 641ad1e7d28SJulian Elischer tdq_idled(struct tdq *tdq) 64222bf7d9aSJeff Roberson { 643d2ad694cSJeff Roberson struct tdq_group *tdg; 644ad1e7d28SJulian Elischer struct tdq *steal; 645ad1e7d28SJulian Elischer struct td_sched *ts; 64680f86c9fSJeff Roberson 647d2ad694cSJeff Roberson tdg = tdq->tdq_group; 64880f86c9fSJeff Roberson /* 649d2ad694cSJeff Roberson * If we're in a cpu group, try and steal threads from another cpu in 65080f86c9fSJeff Roberson * the group before idling. 65180f86c9fSJeff Roberson */ 6527b8bfa0dSJeff Roberson if (steal_htt && tdg->tdg_cpus > 1 && tdg->tdg_transferable) { 653d2ad694cSJeff Roberson LIST_FOREACH(steal, &tdg->tdg_members, tdq_siblings) { 654d2ad694cSJeff Roberson if (steal == tdq || steal->tdq_transferable == 0) 65580f86c9fSJeff Roberson continue; 656ad1e7d28SJulian Elischer ts = tdq_steal(steal, 0); 6577b8bfa0dSJeff Roberson if (ts) 6587b8bfa0dSJeff Roberson goto steal; 6597b8bfa0dSJeff Roberson } 6607b8bfa0dSJeff Roberson } 6617b8bfa0dSJeff Roberson if (steal_busy) { 6627b8bfa0dSJeff Roberson while (tdq_busy) { 6637b8bfa0dSJeff Roberson int cpu; 6647b8bfa0dSJeff Roberson 6657b8bfa0dSJeff Roberson cpu = ffs(tdq_busy); 6667b8bfa0dSJeff Roberson if (cpu == 0) 6677b8bfa0dSJeff Roberson break; 6687b8bfa0dSJeff Roberson cpu--; 6697b8bfa0dSJeff Roberson steal = TDQ_CPU(cpu); 6707b8bfa0dSJeff Roberson if (steal->tdq_transferable == 0) 6717b8bfa0dSJeff Roberson continue; 6727b8bfa0dSJeff Roberson ts = tdq_steal(steal, 1); 673ad1e7d28SJulian Elischer if (ts == NULL) 67480f86c9fSJeff Roberson continue; 67514618990SJeff Roberson CTR5(KTR_ULE, 6767b8bfa0dSJeff Roberson "tdq_idled: stealing td %p(%s) pri %d from %d busy 0x%X", 6777b8bfa0dSJeff Roberson ts->ts_thread, ts->ts_thread->td_proc->p_comm, 6787b8bfa0dSJeff Roberson ts->ts_thread->td_priority, cpu, tdq_busy); 6797b8bfa0dSJeff Roberson goto steal; 68080f86c9fSJeff Roberson } 68180f86c9fSJeff Roberson } 68280f86c9fSJeff Roberson /* 68380f86c9fSJeff Roberson * We only set the idled bit when all of the cpus in the group are 684ad1e7d28SJulian Elischer * idle. Otherwise we could get into a situation where a thread bounces 68580f86c9fSJeff Roberson * back and forth between two idle cores on seperate physical CPUs. 68680f86c9fSJeff Roberson */ 687d2ad694cSJeff Roberson tdg->tdg_idlemask |= PCPU_GET(cpumask); 6887b8bfa0dSJeff Roberson if (tdg->tdg_idlemask == tdg->tdg_cpumask) 689d2ad694cSJeff Roberson atomic_set_int(&tdq_idle, tdg->tdg_mask); 69080f86c9fSJeff Roberson return (1); 6917b8bfa0dSJeff Roberson steal: 6927b8bfa0dSJeff Roberson sched_rem(ts->ts_thread); 6937b8bfa0dSJeff Roberson ts->ts_cpu = PCPU_GET(cpuid); 6941e516cf5SJeff Roberson sched_pin_td(ts->ts_thread); 695ad1e7d28SJulian Elischer sched_add(ts->ts_thread, SRQ_YIELDING); 6961e516cf5SJeff Roberson sched_unpin_td(ts->ts_thread); 6977b8bfa0dSJeff Roberson 6987b8bfa0dSJeff Roberson return (0); 69922bf7d9aSJeff Roberson } 70022bf7d9aSJeff Roberson 70122bf7d9aSJeff Roberson static void 7027b8bfa0dSJeff Roberson tdq_notify(struct td_sched *ts) 70322bf7d9aSJeff Roberson { 704fc3a97dcSJeff Roberson struct thread *ctd; 70522bf7d9aSJeff Roberson struct pcpu *pcpu; 706fc3a97dcSJeff Roberson int cpri; 707fc3a97dcSJeff Roberson int pri; 7087b8bfa0dSJeff Roberson int cpu; 70922bf7d9aSJeff Roberson 7107b8bfa0dSJeff Roberson cpu = ts->ts_cpu; 711fc3a97dcSJeff Roberson pri = ts->ts_thread->td_priority; 71222bf7d9aSJeff Roberson pcpu = pcpu_find(cpu); 713fc3a97dcSJeff Roberson ctd = pcpu->pc_curthread; 714fc3a97dcSJeff Roberson cpri = ctd->td_priority; 7156b2f763fSJeff Roberson 7166b2f763fSJeff Roberson /* 7176b2f763fSJeff Roberson * If our priority is not better than the current priority there is 7186b2f763fSJeff Roberson * nothing to do. 7196b2f763fSJeff Roberson */ 720fc3a97dcSJeff Roberson if (pri > cpri) 7216b2f763fSJeff Roberson return; 7227b8bfa0dSJeff Roberson /* 723fc3a97dcSJeff Roberson * Always IPI idle. 7247b8bfa0dSJeff Roberson */ 725fc3a97dcSJeff Roberson if (cpri > PRI_MIN_IDLE) 726fc3a97dcSJeff Roberson goto sendipi; 727fc3a97dcSJeff Roberson /* 728fc3a97dcSJeff Roberson * If we're realtime or better and there is timeshare or worse running 729fc3a97dcSJeff Roberson * send an IPI. 730fc3a97dcSJeff Roberson */ 731fc3a97dcSJeff Roberson if (pri < PRI_MAX_REALTIME && cpri > PRI_MAX_REALTIME) 732fc3a97dcSJeff Roberson goto sendipi; 733fc3a97dcSJeff Roberson /* 734fc3a97dcSJeff Roberson * Otherwise only IPI if we exceed the threshold. 735fc3a97dcSJeff Roberson */ 736fc3a97dcSJeff Roberson if (pri > ipi_thresh) 7377b8bfa0dSJeff Roberson return; 738fc3a97dcSJeff Roberson sendipi: 739fc3a97dcSJeff Roberson ctd->td_flags |= TDF_NEEDRESCHED; 740fc3a97dcSJeff Roberson if (cpri < PRI_MIN_IDLE) { 7416b2f763fSJeff Roberson if (ipi_ast) 74222bf7d9aSJeff Roberson ipi_selected(1 << cpu, IPI_AST); 7436b2f763fSJeff Roberson else if (ipi_preempt) 7447b8bfa0dSJeff Roberson ipi_selected(1 << cpu, IPI_PREEMPT); 74514618990SJeff Roberson } else 74614618990SJeff Roberson ipi_selected(1 << cpu, IPI_PREEMPT); 74722bf7d9aSJeff Roberson } 74822bf7d9aSJeff Roberson 749ad1e7d28SJulian Elischer static struct td_sched * 75022bf7d9aSJeff Roberson runq_steal(struct runq *rq) 75122bf7d9aSJeff Roberson { 75222bf7d9aSJeff Roberson struct rqhead *rqh; 75322bf7d9aSJeff Roberson struct rqbits *rqb; 754ad1e7d28SJulian Elischer struct td_sched *ts; 75522bf7d9aSJeff Roberson int word; 75622bf7d9aSJeff Roberson int bit; 75722bf7d9aSJeff Roberson 75822bf7d9aSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 75922bf7d9aSJeff Roberson rqb = &rq->rq_status; 76022bf7d9aSJeff Roberson for (word = 0; word < RQB_LEN; word++) { 76122bf7d9aSJeff Roberson if (rqb->rqb_bits[word] == 0) 76222bf7d9aSJeff Roberson continue; 76322bf7d9aSJeff Roberson for (bit = 0; bit < RQB_BPW; bit++) { 764a2640c9bSPeter Wemm if ((rqb->rqb_bits[word] & (1ul << bit)) == 0) 76522bf7d9aSJeff Roberson continue; 76622bf7d9aSJeff Roberson rqh = &rq->rq_queues[bit + (word << RQB_L2BPW)]; 767ad1e7d28SJulian Elischer TAILQ_FOREACH(ts, rqh, ts_procq) { 768e7d50326SJeff Roberson if (THREAD_CAN_MIGRATE(ts->ts_thread)) 769ad1e7d28SJulian Elischer return (ts); 77022bf7d9aSJeff Roberson } 77122bf7d9aSJeff Roberson } 77222bf7d9aSJeff Roberson } 77322bf7d9aSJeff Roberson return (NULL); 77422bf7d9aSJeff Roberson } 77522bf7d9aSJeff Roberson 776ad1e7d28SJulian Elischer static struct td_sched * 777ad1e7d28SJulian Elischer tdq_steal(struct tdq *tdq, int stealidle) 77822bf7d9aSJeff Roberson { 779ad1e7d28SJulian Elischer struct td_sched *ts; 78022bf7d9aSJeff Roberson 78180f86c9fSJeff Roberson /* 78280f86c9fSJeff Roberson * Steal from next first to try to get a non-interactive task that 78380f86c9fSJeff Roberson * may not have run for a while. 784e7d50326SJeff Roberson * XXX Need to effect steal order for timeshare threads. 78580f86c9fSJeff Roberson */ 786e7d50326SJeff Roberson if ((ts = runq_steal(&tdq->tdq_realtime)) != NULL) 787ad1e7d28SJulian Elischer return (ts); 788e7d50326SJeff Roberson if ((ts = runq_steal(&tdq->tdq_timeshare)) != NULL) 789ad1e7d28SJulian Elischer return (ts); 79080f86c9fSJeff Roberson if (stealidle) 791d2ad694cSJeff Roberson return (runq_steal(&tdq->tdq_idle)); 79280f86c9fSJeff Roberson return (NULL); 79322bf7d9aSJeff Roberson } 79480f86c9fSJeff Roberson 79580f86c9fSJeff Roberson int 7967b8bfa0dSJeff Roberson tdq_pickidle(struct tdq *tdq, struct td_sched *ts) 79780f86c9fSJeff Roberson { 798d2ad694cSJeff Roberson struct tdq_group *tdg; 7997b8bfa0dSJeff Roberson int self; 80080f86c9fSJeff Roberson int cpu; 80180f86c9fSJeff Roberson 8027b8bfa0dSJeff Roberson self = PCPU_GET(cpuid); 803670c524fSJeff Roberson if (smp_started == 0) 8047b8bfa0dSJeff Roberson return (self); 80580f86c9fSJeff Roberson /* 8067b8bfa0dSJeff Roberson * If the current CPU has idled, just run it here. 807670c524fSJeff Roberson */ 8087b8bfa0dSJeff Roberson if ((tdq->tdq_group->tdg_idlemask & PCPU_GET(cpumask)) != 0) 8097b8bfa0dSJeff Roberson return (self); 81080f86c9fSJeff Roberson /* 8117b8bfa0dSJeff Roberson * Try the last group we ran on. 8127b8bfa0dSJeff Roberson */ 8137b8bfa0dSJeff Roberson tdg = TDQ_CPU(ts->ts_cpu)->tdq_group; 8147b8bfa0dSJeff Roberson cpu = ffs(tdg->tdg_idlemask); 8157b8bfa0dSJeff Roberson if (cpu) 8167b8bfa0dSJeff Roberson return (cpu - 1); 8177b8bfa0dSJeff Roberson /* 8187b8bfa0dSJeff Roberson * Search for an idle group. 81980f86c9fSJeff Roberson */ 820ad1e7d28SJulian Elischer cpu = ffs(tdq_idle); 8217b8bfa0dSJeff Roberson if (cpu) 8227b8bfa0dSJeff Roberson return (cpu - 1); 823598b368dSJeff Roberson /* 8247b8bfa0dSJeff Roberson * XXX If there are no idle groups, check for an idle core. 825598b368dSJeff Roberson */ 82680f86c9fSJeff Roberson /* 8277b8bfa0dSJeff Roberson * No idle CPUs? 82880f86c9fSJeff Roberson */ 8297b8bfa0dSJeff Roberson return (self); 83080f86c9fSJeff Roberson } 8312454aaf5SJeff Roberson 8327b8bfa0dSJeff Roberson static int 8337b8bfa0dSJeff Roberson tdq_pickpri(struct tdq *tdq, struct td_sched *ts, int flags) 8347b8bfa0dSJeff Roberson { 8357b8bfa0dSJeff Roberson struct pcpu *pcpu; 8367b8bfa0dSJeff Roberson int lowpri; 8377b8bfa0dSJeff Roberson int lowcpu; 8387b8bfa0dSJeff Roberson int lowload; 8397b8bfa0dSJeff Roberson int load; 8407b8bfa0dSJeff Roberson int self; 8417b8bfa0dSJeff Roberson int pri; 8427b8bfa0dSJeff Roberson int cpu; 8437b8bfa0dSJeff Roberson 8447b8bfa0dSJeff Roberson self = PCPU_GET(cpuid); 8457b8bfa0dSJeff Roberson if (smp_started == 0) 8467b8bfa0dSJeff Roberson return (self); 8477b8bfa0dSJeff Roberson 8487b8bfa0dSJeff Roberson pri = ts->ts_thread->td_priority; 8497b8bfa0dSJeff Roberson /* 8507b8bfa0dSJeff Roberson * Regardless of affinity, if the last cpu is idle send it there. 8517b8bfa0dSJeff Roberson */ 8527b8bfa0dSJeff Roberson pcpu = pcpu_find(ts->ts_cpu); 8537b8bfa0dSJeff Roberson if (pcpu->pc_curthread->td_priority > PRI_MIN_IDLE) { 85414618990SJeff Roberson CTR5(KTR_ULE, 8557b8bfa0dSJeff Roberson "ts_cpu %d idle, ltick %d ticks %d pri %d curthread %d", 8567b8bfa0dSJeff Roberson ts->ts_cpu, ts->ts_rltick, ticks, pri, 8577b8bfa0dSJeff Roberson pcpu->pc_curthread->td_priority); 8587b8bfa0dSJeff Roberson return (ts->ts_cpu); 8597b8bfa0dSJeff Roberson } 8607b8bfa0dSJeff Roberson /* 8617b8bfa0dSJeff Roberson * If we have affinity, try to place it on the cpu we last ran on. 8627b8bfa0dSJeff Roberson */ 8637b8bfa0dSJeff Roberson if (SCHED_AFFINITY(ts) && pcpu->pc_curthread->td_priority > pri) { 86414618990SJeff Roberson CTR5(KTR_ULE, 8657b8bfa0dSJeff Roberson "affinity for %d, ltick %d ticks %d pri %d curthread %d", 8667b8bfa0dSJeff Roberson ts->ts_cpu, ts->ts_rltick, ticks, pri, 8677b8bfa0dSJeff Roberson pcpu->pc_curthread->td_priority); 8687b8bfa0dSJeff Roberson return (ts->ts_cpu); 8697b8bfa0dSJeff Roberson } 8707b8bfa0dSJeff Roberson /* 8717b8bfa0dSJeff Roberson * Try ourself first; If we're running something lower priority this 8727b8bfa0dSJeff Roberson * may have some locality with the waking thread and execute faster 8737b8bfa0dSJeff Roberson * here. 8747b8bfa0dSJeff Roberson */ 8757b8bfa0dSJeff Roberson if (tryself) { 8767b8bfa0dSJeff Roberson /* 8777b8bfa0dSJeff Roberson * If we're being awoken by an interrupt thread or the waker 8787b8bfa0dSJeff Roberson * is going right to sleep run here as well. 8797b8bfa0dSJeff Roberson */ 8807b8bfa0dSJeff Roberson if ((TDQ_SELF()->tdq_load == 1) && (flags & SRQ_YIELDING || 8817b8bfa0dSJeff Roberson curthread->td_pri_class == PRI_ITHD)) { 88214618990SJeff Roberson CTR2(KTR_ULE, "tryself load %d flags %d", 8837b8bfa0dSJeff Roberson TDQ_SELF()->tdq_load, flags); 8847b8bfa0dSJeff Roberson return (self); 8857b8bfa0dSJeff Roberson } 8867b8bfa0dSJeff Roberson } 8877b8bfa0dSJeff Roberson /* 8887b8bfa0dSJeff Roberson * Look for an idle group. 8897b8bfa0dSJeff Roberson */ 89014618990SJeff Roberson CTR1(KTR_ULE, "tdq_idle %X", tdq_idle); 8917b8bfa0dSJeff Roberson cpu = ffs(tdq_idle); 8927b8bfa0dSJeff Roberson if (cpu) 8937b8bfa0dSJeff Roberson return (cpu - 1); 8947b8bfa0dSJeff Roberson if (tryselfidle && pri < curthread->td_priority) { 89514618990SJeff Roberson CTR1(KTR_ULE, "tryself %d", 8967b8bfa0dSJeff Roberson curthread->td_priority); 8977b8bfa0dSJeff Roberson return (self); 8987b8bfa0dSJeff Roberson } 8997b8bfa0dSJeff Roberson /* 9007b8bfa0dSJeff Roberson * Now search for the cpu running the lowest priority thread with 9017b8bfa0dSJeff Roberson * the least load. 9027b8bfa0dSJeff Roberson */ 9037b8bfa0dSJeff Roberson lowload = 0; 9047b8bfa0dSJeff Roberson lowpri = lowcpu = 0; 9057b8bfa0dSJeff Roberson for (cpu = 0; cpu <= mp_maxid; cpu++) { 9067b8bfa0dSJeff Roberson if (CPU_ABSENT(cpu)) 9077b8bfa0dSJeff Roberson continue; 9087b8bfa0dSJeff Roberson pcpu = pcpu_find(cpu); 9097b8bfa0dSJeff Roberson pri = pcpu->pc_curthread->td_priority; 91014618990SJeff Roberson CTR4(KTR_ULE, 9117b8bfa0dSJeff Roberson "cpu %d pri %d lowcpu %d lowpri %d", 9127b8bfa0dSJeff Roberson cpu, pri, lowcpu, lowpri); 9137b8bfa0dSJeff Roberson if (pri < lowpri) 9147b8bfa0dSJeff Roberson continue; 9157b8bfa0dSJeff Roberson load = TDQ_CPU(cpu)->tdq_load; 9167b8bfa0dSJeff Roberson if (lowpri && lowpri == pri && load > lowload) 9177b8bfa0dSJeff Roberson continue; 9187b8bfa0dSJeff Roberson lowpri = pri; 9197b8bfa0dSJeff Roberson lowcpu = cpu; 9207b8bfa0dSJeff Roberson lowload = load; 9217b8bfa0dSJeff Roberson } 9227b8bfa0dSJeff Roberson 9237b8bfa0dSJeff Roberson return (lowcpu); 92480f86c9fSJeff Roberson } 92580f86c9fSJeff Roberson 92622bf7d9aSJeff Roberson #endif /* SMP */ 92722bf7d9aSJeff Roberson 92822bf7d9aSJeff Roberson /* 92922bf7d9aSJeff Roberson * Pick the highest priority task we have and return it. 9300c0a98b2SJeff Roberson */ 9310c0a98b2SJeff Roberson 932ad1e7d28SJulian Elischer static struct td_sched * 933ad1e7d28SJulian Elischer tdq_choose(struct tdq *tdq) 9345d7ef00cSJeff Roberson { 935ad1e7d28SJulian Elischer struct td_sched *ts; 9365d7ef00cSJeff Roberson 937b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 938a8949de2SJeff Roberson 939e7d50326SJeff Roberson ts = runq_choose(&tdq->tdq_realtime); 940e7d50326SJeff Roberson if (ts != NULL) { 941e7d50326SJeff Roberson KASSERT(ts->ts_thread->td_priority <= PRI_MAX_REALTIME, 942e7d50326SJeff Roberson ("tdq_choose: Invalid priority on realtime queue %d", 943e7d50326SJeff Roberson ts->ts_thread->td_priority)); 944e7d50326SJeff Roberson return (ts); 945a8949de2SJeff Roberson } 9463f872f85SJeff Roberson ts = runq_choose_from(&tdq->tdq_timeshare, tdq->tdq_ridx); 947e7d50326SJeff Roberson if (ts != NULL) { 948e7d50326SJeff Roberson KASSERT(ts->ts_thread->td_priority <= PRI_MAX_TIMESHARE && 949e7d50326SJeff Roberson ts->ts_thread->td_priority >= PRI_MIN_TIMESHARE, 950e7d50326SJeff Roberson ("tdq_choose: Invalid priority on timeshare queue %d", 951e7d50326SJeff Roberson ts->ts_thread->td_priority)); 952ad1e7d28SJulian Elischer return (ts); 95315dc847eSJeff Roberson } 95415dc847eSJeff Roberson 955e7d50326SJeff Roberson ts = runq_choose(&tdq->tdq_idle); 956e7d50326SJeff Roberson if (ts != NULL) { 957e7d50326SJeff Roberson KASSERT(ts->ts_thread->td_priority >= PRI_MIN_IDLE, 958e7d50326SJeff Roberson ("tdq_choose: Invalid priority on idle queue %d", 959e7d50326SJeff Roberson ts->ts_thread->td_priority)); 960e7d50326SJeff Roberson return (ts); 961e7d50326SJeff Roberson } 962e7d50326SJeff Roberson 963e7d50326SJeff Roberson return (NULL); 964245f3abfSJeff Roberson } 9650a016a05SJeff Roberson 9660a016a05SJeff Roberson static void 967ad1e7d28SJulian Elischer tdq_setup(struct tdq *tdq) 9680a016a05SJeff Roberson { 969e7d50326SJeff Roberson runq_init(&tdq->tdq_realtime); 970e7d50326SJeff Roberson runq_init(&tdq->tdq_timeshare); 971d2ad694cSJeff Roberson runq_init(&tdq->tdq_idle); 972d2ad694cSJeff Roberson tdq->tdq_load = 0; 9730a016a05SJeff Roberson } 9740a016a05SJeff Roberson 97535e6168fSJeff Roberson static void 97635e6168fSJeff Roberson sched_setup(void *dummy) 97735e6168fSJeff Roberson { 9780ec896fdSJeff Roberson #ifdef SMP 97935e6168fSJeff Roberson int i; 9800ec896fdSJeff Roberson #endif 98135e6168fSJeff Roberson 982a1d4fe69SDavid Xu /* 983a1d4fe69SDavid Xu * To avoid divide-by-zero, we set realstathz a dummy value 984a1d4fe69SDavid Xu * in case which sched_clock() called before sched_initticks(). 985a1d4fe69SDavid Xu */ 986a1d4fe69SDavid Xu realstathz = hz; 98714618990SJeff Roberson sched_slice = (realstathz/10); /* ~100ms */ 988e7d50326SJeff Roberson tickincr = 1 << SCHED_TICK_SHIFT; 989e1f89c22SJeff Roberson 990356500a3SJeff Roberson #ifdef SMP 991cac77d04SJeff Roberson balance_groups = 0; 99280f86c9fSJeff Roberson /* 993ad1e7d28SJulian Elischer * Initialize the tdqs. 99480f86c9fSJeff Roberson */ 995749d01b0SJeff Roberson for (i = 0; i < MAXCPU; i++) { 996c02bbb43SJeff Roberson struct tdq *tdq; 99780f86c9fSJeff Roberson 998c02bbb43SJeff Roberson tdq = &tdq_cpu[i]; 999ad1e7d28SJulian Elischer tdq_setup(&tdq_cpu[i]); 100080f86c9fSJeff Roberson } 10017b20fb19SJeff Roberson if (smp_topology == NULL) { 1002d2ad694cSJeff Roberson struct tdq_group *tdg; 1003c02bbb43SJeff Roberson struct tdq *tdq; 1004598b368dSJeff Roberson int cpus; 100580f86c9fSJeff Roberson 1006598b368dSJeff Roberson for (cpus = 0, i = 0; i < MAXCPU; i++) { 1007598b368dSJeff Roberson if (CPU_ABSENT(i)) 1008598b368dSJeff Roberson continue; 1009c02bbb43SJeff Roberson tdq = &tdq_cpu[i]; 1010d2ad694cSJeff Roberson tdg = &tdq_groups[cpus]; 101180f86c9fSJeff Roberson /* 1012ad1e7d28SJulian Elischer * Setup a tdq group with one member. 101380f86c9fSJeff Roberson */ 1014c02bbb43SJeff Roberson tdq->tdq_transferable = 0; 1015c02bbb43SJeff Roberson tdq->tdq_group = tdg; 1016d2ad694cSJeff Roberson tdg->tdg_cpus = 1; 1017d2ad694cSJeff Roberson tdg->tdg_idlemask = 0; 1018d2ad694cSJeff Roberson tdg->tdg_cpumask = tdg->tdg_mask = 1 << i; 1019d2ad694cSJeff Roberson tdg->tdg_load = 0; 1020d2ad694cSJeff Roberson tdg->tdg_transferable = 0; 1021d2ad694cSJeff Roberson LIST_INIT(&tdg->tdg_members); 1022c02bbb43SJeff Roberson LIST_INSERT_HEAD(&tdg->tdg_members, tdq, tdq_siblings); 1023598b368dSJeff Roberson cpus++; 1024749d01b0SJeff Roberson } 1025d2ad694cSJeff Roberson tdg_maxid = cpus - 1; 1026749d01b0SJeff Roberson } else { 1027d2ad694cSJeff Roberson struct tdq_group *tdg; 102880f86c9fSJeff Roberson struct cpu_group *cg; 1029749d01b0SJeff Roberson int j; 1030749d01b0SJeff Roberson 10317b20fb19SJeff Roberson topology = 1; 1032749d01b0SJeff Roberson for (i = 0; i < smp_topology->ct_count; i++) { 1033749d01b0SJeff Roberson cg = &smp_topology->ct_group[i]; 1034d2ad694cSJeff Roberson tdg = &tdq_groups[i]; 103580f86c9fSJeff Roberson /* 103680f86c9fSJeff Roberson * Initialize the group. 103780f86c9fSJeff Roberson */ 1038d2ad694cSJeff Roberson tdg->tdg_idlemask = 0; 1039d2ad694cSJeff Roberson tdg->tdg_load = 0; 1040d2ad694cSJeff Roberson tdg->tdg_transferable = 0; 1041d2ad694cSJeff Roberson tdg->tdg_cpus = cg->cg_count; 1042d2ad694cSJeff Roberson tdg->tdg_cpumask = cg->cg_mask; 1043d2ad694cSJeff Roberson LIST_INIT(&tdg->tdg_members); 104480f86c9fSJeff Roberson /* 104580f86c9fSJeff Roberson * Find all of the group members and add them. 104680f86c9fSJeff Roberson */ 104780f86c9fSJeff Roberson for (j = 0; j < MAXCPU; j++) { 104880f86c9fSJeff Roberson if ((cg->cg_mask & (1 << j)) != 0) { 1049d2ad694cSJeff Roberson if (tdg->tdg_mask == 0) 1050d2ad694cSJeff Roberson tdg->tdg_mask = 1 << j; 1051d2ad694cSJeff Roberson tdq_cpu[j].tdq_transferable = 0; 1052d2ad694cSJeff Roberson tdq_cpu[j].tdq_group = tdg; 1053d2ad694cSJeff Roberson LIST_INSERT_HEAD(&tdg->tdg_members, 1054d2ad694cSJeff Roberson &tdq_cpu[j], tdq_siblings); 105580f86c9fSJeff Roberson } 105680f86c9fSJeff Roberson } 1057d2ad694cSJeff Roberson if (tdg->tdg_cpus > 1) 1058cac77d04SJeff Roberson balance_groups = 1; 1059749d01b0SJeff Roberson } 1060d2ad694cSJeff Roberson tdg_maxid = smp_topology->ct_count - 1; 1061749d01b0SJeff Roberson } 1062cac77d04SJeff Roberson /* 1063cac77d04SJeff Roberson * Stagger the group and global load balancer so they do not 1064cac77d04SJeff Roberson * interfere with each other. 1065cac77d04SJeff Roberson */ 1066dc03363dSJeff Roberson bal_tick = ticks + hz; 1067cac77d04SJeff Roberson if (balance_groups) 1068dc03363dSJeff Roberson gbal_tick = ticks + (hz / 2); 1069749d01b0SJeff Roberson #else 1070ad1e7d28SJulian Elischer tdq_setup(TDQ_SELF()); 1071356500a3SJeff Roberson #endif 1072749d01b0SJeff Roberson mtx_lock_spin(&sched_lock); 1073ad1e7d28SJulian Elischer tdq_load_add(TDQ_SELF(), &td_sched0); 1074749d01b0SJeff Roberson mtx_unlock_spin(&sched_lock); 107535e6168fSJeff Roberson } 107635e6168fSJeff Roberson 1077a1d4fe69SDavid Xu /* ARGSUSED */ 1078a1d4fe69SDavid Xu static void 1079a1d4fe69SDavid Xu sched_initticks(void *dummy) 1080a1d4fe69SDavid Xu { 1081a1d4fe69SDavid Xu mtx_lock_spin(&sched_lock); 1082a1d4fe69SDavid Xu realstathz = stathz ? stathz : hz; 108314618990SJeff Roberson sched_slice = (realstathz/10); /* ~100ms */ 1084a1d4fe69SDavid Xu 1085a1d4fe69SDavid Xu /* 1086e7d50326SJeff Roberson * tickincr is shifted out by 10 to avoid rounding errors due to 10873f872f85SJeff Roberson * hz not being evenly divisible by stathz on all platforms. 1088e7d50326SJeff Roberson */ 1089e7d50326SJeff Roberson tickincr = (hz << SCHED_TICK_SHIFT) / realstathz; 1090e7d50326SJeff Roberson /* 1091e7d50326SJeff Roberson * This does not work for values of stathz that are more than 1092e7d50326SJeff Roberson * 1 << SCHED_TICK_SHIFT * hz. In practice this does not happen. 1093a1d4fe69SDavid Xu */ 1094a1d4fe69SDavid Xu if (tickincr == 0) 1095a1d4fe69SDavid Xu tickincr = 1; 10967b8bfa0dSJeff Roberson #ifdef SMP 10977b8bfa0dSJeff Roberson affinity = SCHED_AFFINITY_DEFAULT; 10987b8bfa0dSJeff Roberson #endif 1099a1d4fe69SDavid Xu mtx_unlock_spin(&sched_lock); 1100a1d4fe69SDavid Xu } 1101a1d4fe69SDavid Xu 1102a1d4fe69SDavid Xu 110335e6168fSJeff Roberson /* 110435e6168fSJeff Roberson * Scale the scheduling priority according to the "interactivity" of this 110535e6168fSJeff Roberson * process. 110635e6168fSJeff Roberson */ 110715dc847eSJeff Roberson static void 11088460a577SJohn Birrell sched_priority(struct thread *td) 110935e6168fSJeff Roberson { 1110e7d50326SJeff Roberson int score; 111135e6168fSJeff Roberson int pri; 111235e6168fSJeff Roberson 11138460a577SJohn Birrell if (td->td_pri_class != PRI_TIMESHARE) 111415dc847eSJeff Roberson return; 1115e7d50326SJeff Roberson /* 1116e7d50326SJeff Roberson * If the score is interactive we place the thread in the realtime 1117e7d50326SJeff Roberson * queue with a priority that is less than kernel and interrupt 1118e7d50326SJeff Roberson * priorities. These threads are not subject to nice restrictions. 1119e7d50326SJeff Roberson * 1120e7d50326SJeff Roberson * Scores greater than this are placed on the normal realtime queue 1121e7d50326SJeff Roberson * where the priority is partially decided by the most recent cpu 1122e7d50326SJeff Roberson * utilization and the rest is decided by nice value. 1123e7d50326SJeff Roberson */ 1124e7d50326SJeff Roberson score = sched_interact_score(td); 1125e7d50326SJeff Roberson if (score < sched_interact) { 1126e7d50326SJeff Roberson pri = PRI_MIN_REALTIME; 1127e7d50326SJeff Roberson pri += ((PRI_MAX_REALTIME - PRI_MIN_REALTIME) / sched_interact) 1128e7d50326SJeff Roberson * score; 1129e7d50326SJeff Roberson KASSERT(pri >= PRI_MIN_REALTIME && pri <= PRI_MAX_REALTIME, 11309a93305aSJeff Roberson ("sched_priority: invalid interactive priority %d score %d", 11319a93305aSJeff Roberson pri, score)); 1132e7d50326SJeff Roberson } else { 1133e7d50326SJeff Roberson pri = SCHED_PRI_MIN; 1134e7d50326SJeff Roberson if (td->td_sched->ts_ticks) 1135e7d50326SJeff Roberson pri += SCHED_PRI_TICKS(td->td_sched); 1136e7d50326SJeff Roberson pri += SCHED_PRI_NICE(td->td_proc->p_nice); 11378ab80cf0SJeff Roberson if (!(pri >= PRI_MIN_TIMESHARE && pri <= PRI_MAX_TIMESHARE)) { 11388ab80cf0SJeff Roberson static int once = 1; 11398ab80cf0SJeff Roberson if (once) { 11408ab80cf0SJeff Roberson printf("sched_priority: invalid priority %d", 11418ab80cf0SJeff Roberson pri); 11428ab80cf0SJeff Roberson printf("nice %d, ticks %d ftick %d ltick %d tick pri %d\n", 11438ab80cf0SJeff Roberson td->td_proc->p_nice, 11448ab80cf0SJeff Roberson td->td_sched->ts_ticks, 11458ab80cf0SJeff Roberson td->td_sched->ts_ftick, 11468ab80cf0SJeff Roberson td->td_sched->ts_ltick, 11478ab80cf0SJeff Roberson SCHED_PRI_TICKS(td->td_sched)); 11488ab80cf0SJeff Roberson once = 0; 11498ab80cf0SJeff Roberson } 11508ab80cf0SJeff Roberson pri = min(max(pri, PRI_MIN_TIMESHARE), 11518ab80cf0SJeff Roberson PRI_MAX_TIMESHARE); 11528ab80cf0SJeff Roberson } 1153e7d50326SJeff Roberson } 11548460a577SJohn Birrell sched_user_prio(td, pri); 115535e6168fSJeff Roberson 115615dc847eSJeff Roberson return; 115735e6168fSJeff Roberson } 115835e6168fSJeff Roberson 115935e6168fSJeff Roberson /* 1160d322132cSJeff Roberson * This routine enforces a maximum limit on the amount of scheduling history 1161d322132cSJeff Roberson * kept. It is called after either the slptime or runtime is adjusted. 1162d322132cSJeff Roberson */ 11634b60e324SJeff Roberson static void 11648460a577SJohn Birrell sched_interact_update(struct thread *td) 11654b60e324SJeff Roberson { 1166155b6ca1SJeff Roberson struct td_sched *ts; 11679a93305aSJeff Roberson u_int sum; 11683f741ca1SJeff Roberson 1169155b6ca1SJeff Roberson ts = td->td_sched; 1170155b6ca1SJeff Roberson sum = ts->skg_runtime + ts->skg_slptime; 1171d322132cSJeff Roberson if (sum < SCHED_SLP_RUN_MAX) 1172d322132cSJeff Roberson return; 1173d322132cSJeff Roberson /* 1174155b6ca1SJeff Roberson * This only happens from two places: 1175155b6ca1SJeff Roberson * 1) We have added an unusual amount of run time from fork_exit. 1176155b6ca1SJeff Roberson * 2) We have added an unusual amount of sleep time from sched_sleep(). 1177155b6ca1SJeff Roberson */ 1178155b6ca1SJeff Roberson if (sum > SCHED_SLP_RUN_MAX * 2) { 1179155b6ca1SJeff Roberson if (ts->skg_runtime > ts->skg_slptime) { 1180155b6ca1SJeff Roberson ts->skg_runtime = SCHED_SLP_RUN_MAX; 1181155b6ca1SJeff Roberson ts->skg_slptime = 1; 1182155b6ca1SJeff Roberson } else { 1183155b6ca1SJeff Roberson ts->skg_slptime = SCHED_SLP_RUN_MAX; 1184155b6ca1SJeff Roberson ts->skg_runtime = 1; 1185155b6ca1SJeff Roberson } 1186155b6ca1SJeff Roberson return; 1187155b6ca1SJeff Roberson } 1188155b6ca1SJeff Roberson /* 1189d322132cSJeff Roberson * If we have exceeded by more than 1/5th then the algorithm below 1190d322132cSJeff Roberson * will not bring us back into range. Dividing by two here forces 11912454aaf5SJeff Roberson * us into the range of [4/5 * SCHED_INTERACT_MAX, SCHED_INTERACT_MAX] 1192d322132cSJeff Roberson */ 119337a35e4aSJeff Roberson if (sum > (SCHED_SLP_RUN_MAX / 5) * 6) { 1194155b6ca1SJeff Roberson ts->skg_runtime /= 2; 1195155b6ca1SJeff Roberson ts->skg_slptime /= 2; 1196d322132cSJeff Roberson return; 1197d322132cSJeff Roberson } 1198155b6ca1SJeff Roberson ts->skg_runtime = (ts->skg_runtime / 5) * 4; 1199155b6ca1SJeff Roberson ts->skg_slptime = (ts->skg_slptime / 5) * 4; 1200d322132cSJeff Roberson } 1201d322132cSJeff Roberson 1202d322132cSJeff Roberson static void 12038460a577SJohn Birrell sched_interact_fork(struct thread *td) 1204d322132cSJeff Roberson { 1205d322132cSJeff Roberson int ratio; 1206d322132cSJeff Roberson int sum; 1207d322132cSJeff Roberson 12088460a577SJohn Birrell sum = td->td_sched->skg_runtime + td->td_sched->skg_slptime; 1209d322132cSJeff Roberson if (sum > SCHED_SLP_RUN_FORK) { 1210d322132cSJeff Roberson ratio = sum / SCHED_SLP_RUN_FORK; 12118460a577SJohn Birrell td->td_sched->skg_runtime /= ratio; 12128460a577SJohn Birrell td->td_sched->skg_slptime /= ratio; 12134b60e324SJeff Roberson } 12144b60e324SJeff Roberson } 12154b60e324SJeff Roberson 1216e1f89c22SJeff Roberson static int 12178460a577SJohn Birrell sched_interact_score(struct thread *td) 1218e1f89c22SJeff Roberson { 1219210491d3SJeff Roberson int div; 1220e1f89c22SJeff Roberson 12218460a577SJohn Birrell if (td->td_sched->skg_runtime > td->td_sched->skg_slptime) { 12228460a577SJohn Birrell div = max(1, td->td_sched->skg_runtime / SCHED_INTERACT_HALF); 1223210491d3SJeff Roberson return (SCHED_INTERACT_HALF + 12248460a577SJohn Birrell (SCHED_INTERACT_HALF - (td->td_sched->skg_slptime / div))); 122552bc574cSJeff Roberson } 122652bc574cSJeff Roberson if (td->td_sched->skg_slptime > td->td_sched->skg_runtime) { 12278460a577SJohn Birrell div = max(1, td->td_sched->skg_slptime / SCHED_INTERACT_HALF); 12288460a577SJohn Birrell return (td->td_sched->skg_runtime / div); 1229e1f89c22SJeff Roberson } 123052bc574cSJeff Roberson /* runtime == slptime */ 123152bc574cSJeff Roberson if (td->td_sched->skg_runtime) 123252bc574cSJeff Roberson return (SCHED_INTERACT_HALF); 1233e1f89c22SJeff Roberson 1234210491d3SJeff Roberson /* 1235210491d3SJeff Roberson * This can happen if slptime and runtime are 0. 1236210491d3SJeff Roberson */ 1237210491d3SJeff Roberson return (0); 1238e1f89c22SJeff Roberson 1239e1f89c22SJeff Roberson } 1240e1f89c22SJeff Roberson 124115dc847eSJeff Roberson /* 1242e7d50326SJeff Roberson * Called from proc0_init() to bootstrap the scheduler. 1243ed062c8dSJulian Elischer */ 1244ed062c8dSJulian Elischer void 1245ed062c8dSJulian Elischer schedinit(void) 1246ed062c8dSJulian Elischer { 1247e7d50326SJeff Roberson 1248ed062c8dSJulian Elischer /* 1249ed062c8dSJulian Elischer * Set up the scheduler specific parts of proc0. 1250ed062c8dSJulian Elischer */ 1251ed062c8dSJulian Elischer proc0.p_sched = NULL; /* XXX */ 1252ad1e7d28SJulian Elischer thread0.td_sched = &td_sched0; 12537b20fb19SJeff Roberson thread0.td_lock = &sched_lock; 1254e7d50326SJeff Roberson td_sched0.ts_ltick = ticks; 12558ab80cf0SJeff Roberson td_sched0.ts_ftick = ticks; 1256ad1e7d28SJulian Elischer td_sched0.ts_thread = &thread0; 1257ed062c8dSJulian Elischer } 1258ed062c8dSJulian Elischer 1259ed062c8dSJulian Elischer /* 126015dc847eSJeff Roberson * This is only somewhat accurate since given many processes of the same 126115dc847eSJeff Roberson * priority they will switch when their slices run out, which will be 1262e7d50326SJeff Roberson * at most sched_slice stathz ticks. 126315dc847eSJeff Roberson */ 126435e6168fSJeff Roberson int 126535e6168fSJeff Roberson sched_rr_interval(void) 126635e6168fSJeff Roberson { 1267e7d50326SJeff Roberson 1268e7d50326SJeff Roberson /* Convert sched_slice to hz */ 1269e7d50326SJeff Roberson return (hz/(realstathz/sched_slice)); 127035e6168fSJeff Roberson } 127135e6168fSJeff Roberson 127222bf7d9aSJeff Roberson static void 1273ad1e7d28SJulian Elischer sched_pctcpu_update(struct td_sched *ts) 127435e6168fSJeff Roberson { 1275e7d50326SJeff Roberson 1276e7d50326SJeff Roberson if (ts->ts_ticks == 0) 1277e7d50326SJeff Roberson return; 12788ab80cf0SJeff Roberson if (ticks - (hz / 10) < ts->ts_ltick && 12798ab80cf0SJeff Roberson SCHED_TICK_TOTAL(ts) < SCHED_TICK_MAX) 12808ab80cf0SJeff Roberson return; 128135e6168fSJeff Roberson /* 128235e6168fSJeff Roberson * Adjust counters and watermark for pctcpu calc. 1283210491d3SJeff Roberson */ 1284e7d50326SJeff Roberson if (ts->ts_ltick > ticks - SCHED_TICK_TARG) 1285ad1e7d28SJulian Elischer ts->ts_ticks = (ts->ts_ticks / (ticks - ts->ts_ftick)) * 1286e7d50326SJeff Roberson SCHED_TICK_TARG; 1287e7d50326SJeff Roberson else 1288ad1e7d28SJulian Elischer ts->ts_ticks = 0; 1289ad1e7d28SJulian Elischer ts->ts_ltick = ticks; 1290e7d50326SJeff Roberson ts->ts_ftick = ts->ts_ltick - SCHED_TICK_TARG; 129135e6168fSJeff Roberson } 129235e6168fSJeff Roberson 1293e7d50326SJeff Roberson static void 1294f5c157d9SJohn Baldwin sched_thread_priority(struct thread *td, u_char prio) 129535e6168fSJeff Roberson { 1296ad1e7d28SJulian Elischer struct td_sched *ts; 129735e6168fSJeff Roberson 129881d47d3fSJeff Roberson CTR6(KTR_SCHED, "sched_prio: %p(%s) prio %d newprio %d by %p(%s)", 129981d47d3fSJeff Roberson td, td->td_proc->p_comm, td->td_priority, prio, curthread, 130081d47d3fSJeff Roberson curthread->td_proc->p_comm); 1301ad1e7d28SJulian Elischer ts = td->td_sched; 13027b20fb19SJeff Roberson THREAD_LOCK_ASSERT(td, MA_OWNED); 1303f5c157d9SJohn Baldwin if (td->td_priority == prio) 1304f5c157d9SJohn Baldwin return; 1305e7d50326SJeff Roberson 13063f872f85SJeff Roberson if (TD_ON_RUNQ(td) && prio < td->td_priority) { 13073f741ca1SJeff Roberson /* 13083f741ca1SJeff Roberson * If the priority has been elevated due to priority 13093f741ca1SJeff Roberson * propagation, we may have to move ourselves to a new 1310e7d50326SJeff Roberson * queue. This could be optimized to not re-add in some 1311e7d50326SJeff Roberson * cases. 1312f2b74cbfSJeff Roberson */ 13137b20fb19SJeff Roberson MPASS(td->td_lock == &sched_lock); 1314e7d50326SJeff Roberson sched_rem(td); 1315e7d50326SJeff Roberson td->td_priority = prio; 13167b20fb19SJeff Roberson sched_add(td, SRQ_BORROWING|SRQ_OURSELF); 13173f741ca1SJeff Roberson } else 13183f741ca1SJeff Roberson td->td_priority = prio; 131935e6168fSJeff Roberson } 132035e6168fSJeff Roberson 1321f5c157d9SJohn Baldwin /* 1322f5c157d9SJohn Baldwin * Update a thread's priority when it is lent another thread's 1323f5c157d9SJohn Baldwin * priority. 1324f5c157d9SJohn Baldwin */ 1325f5c157d9SJohn Baldwin void 1326f5c157d9SJohn Baldwin sched_lend_prio(struct thread *td, u_char prio) 1327f5c157d9SJohn Baldwin { 1328f5c157d9SJohn Baldwin 1329f5c157d9SJohn Baldwin td->td_flags |= TDF_BORROWING; 1330f5c157d9SJohn Baldwin sched_thread_priority(td, prio); 1331f5c157d9SJohn Baldwin } 1332f5c157d9SJohn Baldwin 1333f5c157d9SJohn Baldwin /* 1334f5c157d9SJohn Baldwin * Restore a thread's priority when priority propagation is 1335f5c157d9SJohn Baldwin * over. The prio argument is the minimum priority the thread 1336f5c157d9SJohn Baldwin * needs to have to satisfy other possible priority lending 1337f5c157d9SJohn Baldwin * requests. If the thread's regular priority is less 1338f5c157d9SJohn Baldwin * important than prio, the thread will keep a priority boost 1339f5c157d9SJohn Baldwin * of prio. 1340f5c157d9SJohn Baldwin */ 1341f5c157d9SJohn Baldwin void 1342f5c157d9SJohn Baldwin sched_unlend_prio(struct thread *td, u_char prio) 1343f5c157d9SJohn Baldwin { 1344f5c157d9SJohn Baldwin u_char base_pri; 1345f5c157d9SJohn Baldwin 1346f5c157d9SJohn Baldwin if (td->td_base_pri >= PRI_MIN_TIMESHARE && 1347f5c157d9SJohn Baldwin td->td_base_pri <= PRI_MAX_TIMESHARE) 13488460a577SJohn Birrell base_pri = td->td_user_pri; 1349f5c157d9SJohn Baldwin else 1350f5c157d9SJohn Baldwin base_pri = td->td_base_pri; 1351f5c157d9SJohn Baldwin if (prio >= base_pri) { 1352f5c157d9SJohn Baldwin td->td_flags &= ~TDF_BORROWING; 1353f5c157d9SJohn Baldwin sched_thread_priority(td, base_pri); 1354f5c157d9SJohn Baldwin } else 1355f5c157d9SJohn Baldwin sched_lend_prio(td, prio); 1356f5c157d9SJohn Baldwin } 1357f5c157d9SJohn Baldwin 1358f5c157d9SJohn Baldwin void 1359f5c157d9SJohn Baldwin sched_prio(struct thread *td, u_char prio) 1360f5c157d9SJohn Baldwin { 1361f5c157d9SJohn Baldwin u_char oldprio; 1362f5c157d9SJohn Baldwin 1363f5c157d9SJohn Baldwin /* First, update the base priority. */ 1364f5c157d9SJohn Baldwin td->td_base_pri = prio; 1365f5c157d9SJohn Baldwin 1366f5c157d9SJohn Baldwin /* 136750aaa791SJohn Baldwin * If the thread is borrowing another thread's priority, don't 1368f5c157d9SJohn Baldwin * ever lower the priority. 1369f5c157d9SJohn Baldwin */ 1370f5c157d9SJohn Baldwin if (td->td_flags & TDF_BORROWING && td->td_priority < prio) 1371f5c157d9SJohn Baldwin return; 1372f5c157d9SJohn Baldwin 1373f5c157d9SJohn Baldwin /* Change the real priority. */ 1374f5c157d9SJohn Baldwin oldprio = td->td_priority; 1375f5c157d9SJohn Baldwin sched_thread_priority(td, prio); 1376f5c157d9SJohn Baldwin 1377f5c157d9SJohn Baldwin /* 1378f5c157d9SJohn Baldwin * If the thread is on a turnstile, then let the turnstile update 1379f5c157d9SJohn Baldwin * its state. 1380f5c157d9SJohn Baldwin */ 1381f5c157d9SJohn Baldwin if (TD_ON_LOCK(td) && oldprio != prio) 1382f5c157d9SJohn Baldwin turnstile_adjust(td, oldprio); 1383f5c157d9SJohn Baldwin } 1384f5c157d9SJohn Baldwin 138535e6168fSJeff Roberson void 13868460a577SJohn Birrell sched_user_prio(struct thread *td, u_char prio) 13873db720fdSDavid Xu { 13883db720fdSDavid Xu u_char oldprio; 13893db720fdSDavid Xu 13908460a577SJohn Birrell td->td_base_user_pri = prio; 1391fc6c30f6SJulian Elischer if (td->td_flags & TDF_UBORROWING && td->td_user_pri <= prio) 1392fc6c30f6SJulian Elischer return; 13938460a577SJohn Birrell oldprio = td->td_user_pri; 13948460a577SJohn Birrell td->td_user_pri = prio; 13953db720fdSDavid Xu 13963db720fdSDavid Xu if (TD_ON_UPILOCK(td) && oldprio != prio) 13973db720fdSDavid Xu umtx_pi_adjust(td, oldprio); 13983db720fdSDavid Xu } 13993db720fdSDavid Xu 14003db720fdSDavid Xu void 14013db720fdSDavid Xu sched_lend_user_prio(struct thread *td, u_char prio) 14023db720fdSDavid Xu { 14033db720fdSDavid Xu u_char oldprio; 14043db720fdSDavid Xu 14053db720fdSDavid Xu td->td_flags |= TDF_UBORROWING; 14063db720fdSDavid Xu 1407f645b5daSMaxim Konovalov oldprio = td->td_user_pri; 14088460a577SJohn Birrell td->td_user_pri = prio; 14093db720fdSDavid Xu 14103db720fdSDavid Xu if (TD_ON_UPILOCK(td) && oldprio != prio) 14113db720fdSDavid Xu umtx_pi_adjust(td, oldprio); 14123db720fdSDavid Xu } 14133db720fdSDavid Xu 14143db720fdSDavid Xu void 14153db720fdSDavid Xu sched_unlend_user_prio(struct thread *td, u_char prio) 14163db720fdSDavid Xu { 14173db720fdSDavid Xu u_char base_pri; 14183db720fdSDavid Xu 14198460a577SJohn Birrell base_pri = td->td_base_user_pri; 14203db720fdSDavid Xu if (prio >= base_pri) { 14213db720fdSDavid Xu td->td_flags &= ~TDF_UBORROWING; 14228460a577SJohn Birrell sched_user_prio(td, base_pri); 14233db720fdSDavid Xu } else 14243db720fdSDavid Xu sched_lend_user_prio(td, prio); 14253db720fdSDavid Xu } 14263db720fdSDavid Xu 14273db720fdSDavid Xu void 14283389af30SJulian Elischer sched_switch(struct thread *td, struct thread *newtd, int flags) 142935e6168fSJeff Roberson { 1430c02bbb43SJeff Roberson struct tdq *tdq; 1431ad1e7d28SJulian Elischer struct td_sched *ts; 14327b8bfa0dSJeff Roberson int preempt; 143335e6168fSJeff Roberson 14347b20fb19SJeff Roberson THREAD_LOCK_ASSERT(td, MA_OWNED); 143535e6168fSJeff Roberson 14367b8bfa0dSJeff Roberson preempt = flags & SW_PREEMPT; 1437c02bbb43SJeff Roberson tdq = TDQ_SELF(); 1438e7d50326SJeff Roberson ts = td->td_sched; 1439060563ecSJulian Elischer td->td_lastcpu = td->td_oncpu; 1440060563ecSJulian Elischer td->td_oncpu = NOCPU; 144152eb8464SJohn Baldwin td->td_flags &= ~TDF_NEEDRESCHED; 144277918643SStephan Uphoff td->td_owepreempt = 0; 1443b11fdad0SJeff Roberson /* 1444ad1e7d28SJulian Elischer * If the thread has been assigned it may be in the process of switching 1445b11fdad0SJeff Roberson * to the new cpu. This is the case in sched_bind(). 1446b11fdad0SJeff Roberson */ 14477b20fb19SJeff Roberson /* 14487b20fb19SJeff Roberson * Switch to the sched lock to fix things up and pick 14497b20fb19SJeff Roberson * a new thread. 14507b20fb19SJeff Roberson */ 14517b20fb19SJeff Roberson if (td->td_lock != &sched_lock) { 14527b20fb19SJeff Roberson mtx_lock_spin(&sched_lock); 14537b20fb19SJeff Roberson thread_unlock(td); 14547b20fb19SJeff Roberson } 1455486a9414SJulian Elischer if (TD_IS_IDLETHREAD(td)) { 14567b20fb19SJeff Roberson MPASS(td->td_lock == &sched_lock); 1457bf0acc27SJohn Baldwin TD_SET_CAN_RUN(td); 14587b20fb19SJeff Roberson } else if (TD_IS_RUNNING(td)) { 1459f2b74cbfSJeff Roberson /* 1460ed062c8dSJulian Elischer * Don't allow the thread to migrate 1461ed062c8dSJulian Elischer * from a preemption. 1462f2b74cbfSJeff Roberson */ 14637b20fb19SJeff Roberson tdq_load_rem(tdq, ts); 14647b8bfa0dSJeff Roberson if (preempt) 14651e516cf5SJeff Roberson sched_pin_td(td); 14667a5e5e2aSJeff Roberson sched_add(td, preempt ? 1467598b368dSJeff Roberson SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED : 1468598b368dSJeff Roberson SRQ_OURSELF|SRQ_YIELDING); 14697b8bfa0dSJeff Roberson if (preempt) 14701e516cf5SJeff Roberson sched_unpin_td(td); 14717b20fb19SJeff Roberson } else 14727b20fb19SJeff Roberson tdq_load_rem(tdq, ts); 14737b20fb19SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1474d39063f2SJulian Elischer if (newtd != NULL) { 1475c20c691bSJulian Elischer /* 14766680bbd5SJeff Roberson * If we bring in a thread account for it as if it had been 14776680bbd5SJeff Roberson * added to the run queue and then chosen. 1478c20c691bSJulian Elischer */ 1479c20c691bSJulian Elischer TD_SET_RUNNING(newtd); 1480ad1e7d28SJulian Elischer tdq_load_add(TDQ_SELF(), newtd->td_sched); 1481d39063f2SJulian Elischer } else 14822454aaf5SJeff Roberson newtd = choosethread(); 1483ebccf1e3SJoseph Koshy if (td != newtd) { 1484ebccf1e3SJoseph Koshy #ifdef HWPMC_HOOKS 1485ebccf1e3SJoseph Koshy if (PMC_PROC_IS_USING_PMCS(td->td_proc)) 1486ebccf1e3SJoseph Koshy PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT); 1487ebccf1e3SJoseph Koshy #endif 14888460a577SJohn Birrell 14897b20fb19SJeff Roberson cpu_switch(td, newtd, td->td_lock); 1490ebccf1e3SJoseph Koshy #ifdef HWPMC_HOOKS 1491ebccf1e3SJoseph Koshy if (PMC_PROC_IS_USING_PMCS(td->td_proc)) 1492ebccf1e3SJoseph Koshy PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN); 1493ebccf1e3SJoseph Koshy #endif 1494ebccf1e3SJoseph Koshy } 1495ae53b483SJeff Roberson sched_lock.mtx_lock = (uintptr_t)td; 1496060563ecSJulian Elischer td->td_oncpu = PCPU_GET(cpuid); 14977b20fb19SJeff Roberson MPASS(td->td_lock == &sched_lock); 149835e6168fSJeff Roberson } 149935e6168fSJeff Roberson 150035e6168fSJeff Roberson void 1501fa885116SJulian Elischer sched_nice(struct proc *p, int nice) 150235e6168fSJeff Roberson { 150335e6168fSJeff Roberson struct thread *td; 150435e6168fSJeff Roberson 1505fa885116SJulian Elischer PROC_LOCK_ASSERT(p, MA_OWNED); 15067b20fb19SJeff Roberson PROC_SLOCK_ASSERT(p, MA_OWNED); 1507e7d50326SJeff Roberson 1508fa885116SJulian Elischer p->p_nice = nice; 15098460a577SJohn Birrell FOREACH_THREAD_IN_PROC(p, td) { 15107b20fb19SJeff Roberson thread_lock(td); 15118460a577SJohn Birrell sched_priority(td); 1512e7d50326SJeff Roberson sched_prio(td, td->td_base_user_pri); 15137b20fb19SJeff Roberson thread_unlock(td); 151435e6168fSJeff Roberson } 1515fa885116SJulian Elischer } 151635e6168fSJeff Roberson 151735e6168fSJeff Roberson void 151844f3b092SJohn Baldwin sched_sleep(struct thread *td) 151935e6168fSJeff Roberson { 1520e7d50326SJeff Roberson 15217b20fb19SJeff Roberson THREAD_LOCK_ASSERT(td, MA_OWNED); 152235e6168fSJeff Roberson 1523ad1e7d28SJulian Elischer td->td_sched->ts_slptime = ticks; 152435e6168fSJeff Roberson } 152535e6168fSJeff Roberson 152635e6168fSJeff Roberson void 152735e6168fSJeff Roberson sched_wakeup(struct thread *td) 152835e6168fSJeff Roberson { 152914618990SJeff Roberson struct td_sched *ts; 1530e7d50326SJeff Roberson int slptime; 1531e7d50326SJeff Roberson 15327b20fb19SJeff Roberson THREAD_LOCK_ASSERT(td, MA_OWNED); 153314618990SJeff Roberson ts = td->td_sched; 153435e6168fSJeff Roberson /* 1535e7d50326SJeff Roberson * If we slept for more than a tick update our interactivity and 1536e7d50326SJeff Roberson * priority. 153735e6168fSJeff Roberson */ 153814618990SJeff Roberson slptime = ts->ts_slptime; 153914618990SJeff Roberson ts->ts_slptime = 0; 1540e7d50326SJeff Roberson if (slptime && slptime != ticks) { 15419a93305aSJeff Roberson u_int hzticks; 1542f1e8dc4aSJeff Roberson 1543e7d50326SJeff Roberson hzticks = (ticks - slptime) << SCHED_TICK_SHIFT; 154414618990SJeff Roberson ts->skg_slptime += hzticks; 15458460a577SJohn Birrell sched_interact_update(td); 154614618990SJeff Roberson sched_pctcpu_update(ts); 15478460a577SJohn Birrell sched_priority(td); 1548f1e8dc4aSJeff Roberson } 154914618990SJeff Roberson /* Reset the slice value after we sleep. */ 155014618990SJeff Roberson ts->ts_slice = sched_slice; 15517a5e5e2aSJeff Roberson sched_add(td, SRQ_BORING); 155235e6168fSJeff Roberson } 155335e6168fSJeff Roberson 155435e6168fSJeff Roberson /* 155535e6168fSJeff Roberson * Penalize the parent for creating a new child and initialize the child's 155635e6168fSJeff Roberson * priority. 155735e6168fSJeff Roberson */ 155835e6168fSJeff Roberson void 15598460a577SJohn Birrell sched_fork(struct thread *td, struct thread *child) 156015dc847eSJeff Roberson { 15617b20fb19SJeff Roberson THREAD_LOCK_ASSERT(td, MA_OWNED); 1562ad1e7d28SJulian Elischer sched_fork_thread(td, child); 1563e7d50326SJeff Roberson /* 1564e7d50326SJeff Roberson * Penalize the parent and child for forking. 1565e7d50326SJeff Roberson */ 1566e7d50326SJeff Roberson sched_interact_fork(child); 1567e7d50326SJeff Roberson sched_priority(child); 1568e7d50326SJeff Roberson td->td_sched->skg_runtime += tickincr; 1569e7d50326SJeff Roberson sched_interact_update(td); 1570e7d50326SJeff Roberson sched_priority(td); 1571ad1e7d28SJulian Elischer } 1572ad1e7d28SJulian Elischer 1573ad1e7d28SJulian Elischer void 1574ad1e7d28SJulian Elischer sched_fork_thread(struct thread *td, struct thread *child) 1575ad1e7d28SJulian Elischer { 1576ad1e7d28SJulian Elischer struct td_sched *ts; 1577ad1e7d28SJulian Elischer struct td_sched *ts2; 15788460a577SJohn Birrell 1579e7d50326SJeff Roberson /* 1580e7d50326SJeff Roberson * Initialize child. 1581e7d50326SJeff Roberson */ 15827b20fb19SJeff Roberson THREAD_LOCK_ASSERT(td, MA_OWNED); 1583ed062c8dSJulian Elischer sched_newthread(child); 15847b20fb19SJeff Roberson child->td_lock = &sched_lock; 1585ad1e7d28SJulian Elischer ts = td->td_sched; 1586ad1e7d28SJulian Elischer ts2 = child->td_sched; 1587ad1e7d28SJulian Elischer ts2->ts_cpu = ts->ts_cpu; 1588ad1e7d28SJulian Elischer ts2->ts_runq = NULL; 1589e7d50326SJeff Roberson /* 1590e7d50326SJeff Roberson * Grab our parents cpu estimation information and priority. 1591e7d50326SJeff Roberson */ 1592ad1e7d28SJulian Elischer ts2->ts_ticks = ts->ts_ticks; 1593ad1e7d28SJulian Elischer ts2->ts_ltick = ts->ts_ltick; 1594ad1e7d28SJulian Elischer ts2->ts_ftick = ts->ts_ftick; 1595e7d50326SJeff Roberson child->td_user_pri = td->td_user_pri; 1596e7d50326SJeff Roberson child->td_base_user_pri = td->td_base_user_pri; 1597e7d50326SJeff Roberson /* 1598e7d50326SJeff Roberson * And update interactivity score. 1599e7d50326SJeff Roberson */ 1600e7d50326SJeff Roberson ts2->skg_slptime = ts->skg_slptime; 1601e7d50326SJeff Roberson ts2->skg_runtime = ts->skg_runtime; 1602e7d50326SJeff Roberson ts2->ts_slice = 1; /* Attempt to quickly learn interactivity. */ 160315dc847eSJeff Roberson } 160415dc847eSJeff Roberson 160515dc847eSJeff Roberson void 16068460a577SJohn Birrell sched_class(struct thread *td, int class) 160715dc847eSJeff Roberson { 160815dc847eSJeff Roberson 16097b20fb19SJeff Roberson THREAD_LOCK_ASSERT(td, MA_OWNED); 16108460a577SJohn Birrell if (td->td_pri_class == class) 161115dc847eSJeff Roberson return; 161215dc847eSJeff Roberson 1613ef1134c9SJeff Roberson #ifdef SMP 1614155b9987SJeff Roberson /* 1615155b9987SJeff Roberson * On SMP if we're on the RUNQ we must adjust the transferable 1616155b9987SJeff Roberson * count because could be changing to or from an interrupt 1617155b9987SJeff Roberson * class. 1618155b9987SJeff Roberson */ 16197a5e5e2aSJeff Roberson if (TD_ON_RUNQ(td)) { 16201e516cf5SJeff Roberson struct tdq *tdq; 16211e516cf5SJeff Roberson 16221e516cf5SJeff Roberson tdq = TDQ_CPU(td->td_sched->ts_cpu); 16231e516cf5SJeff Roberson if (THREAD_CAN_MIGRATE(td)) { 1624d2ad694cSJeff Roberson tdq->tdq_transferable--; 1625d2ad694cSJeff Roberson tdq->tdq_group->tdg_transferable--; 162680f86c9fSJeff Roberson } 16271e516cf5SJeff Roberson td->td_pri_class = class; 16281e516cf5SJeff Roberson if (THREAD_CAN_MIGRATE(td)) { 1629d2ad694cSJeff Roberson tdq->tdq_transferable++; 1630d2ad694cSJeff Roberson tdq->tdq_group->tdg_transferable++; 163180f86c9fSJeff Roberson } 1632155b9987SJeff Roberson } 1633ef1134c9SJeff Roberson #endif 16348460a577SJohn Birrell td->td_pri_class = class; 163535e6168fSJeff Roberson } 163635e6168fSJeff Roberson 163735e6168fSJeff Roberson /* 163835e6168fSJeff Roberson * Return some of the child's priority and interactivity to the parent. 163935e6168fSJeff Roberson */ 164035e6168fSJeff Roberson void 1641fc6c30f6SJulian Elischer sched_exit(struct proc *p, struct thread *child) 164235e6168fSJeff Roberson { 1643e7d50326SJeff Roberson struct thread *td; 1644141ad61cSJeff Roberson 16458460a577SJohn Birrell CTR3(KTR_SCHED, "sched_exit: %p(%s) prio %d", 1646fc6c30f6SJulian Elischer child, child->td_proc->p_comm, child->td_priority); 16478460a577SJohn Birrell 16487b20fb19SJeff Roberson PROC_SLOCK_ASSERT(p, MA_OWNED); 1649e7d50326SJeff Roberson td = FIRST_THREAD_IN_PROC(p); 1650e7d50326SJeff Roberson sched_exit_thread(td, child); 1651ad1e7d28SJulian Elischer } 1652ad1e7d28SJulian Elischer 1653ad1e7d28SJulian Elischer void 1654fc6c30f6SJulian Elischer sched_exit_thread(struct thread *td, struct thread *child) 1655ad1e7d28SJulian Elischer { 1656fc6c30f6SJulian Elischer 1657e7d50326SJeff Roberson CTR3(KTR_SCHED, "sched_exit_thread: %p(%s) prio %d", 1658e7d50326SJeff Roberson child, child->td_proc->p_comm, child->td_priority); 1659e7d50326SJeff Roberson 16607b20fb19SJeff Roberson thread_lock(child); 1661e7d50326SJeff Roberson tdq_load_rem(TDQ_CPU(child->td_sched->ts_cpu), child->td_sched); 16627b20fb19SJeff Roberson thread_unlock(child); 1663e7d50326SJeff Roberson #ifdef KSE 1664e7d50326SJeff Roberson /* 1665e7d50326SJeff Roberson * KSE forks and exits so often that this penalty causes short-lived 1666e7d50326SJeff Roberson * threads to always be non-interactive. This causes mozilla to 1667e7d50326SJeff Roberson * crawl under load. 1668e7d50326SJeff Roberson */ 1669e7d50326SJeff Roberson if ((td->td_pflags & TDP_SA) && td->td_proc == child->td_proc) 1670e7d50326SJeff Roberson return; 1671e7d50326SJeff Roberson #endif 1672e7d50326SJeff Roberson /* 1673e7d50326SJeff Roberson * Give the child's runtime to the parent without returning the 1674e7d50326SJeff Roberson * sleep time as a penalty to the parent. This causes shells that 1675e7d50326SJeff Roberson * launch expensive things to mark their children as expensive. 1676e7d50326SJeff Roberson */ 16777b20fb19SJeff Roberson thread_lock(td); 1678fc6c30f6SJulian Elischer td->td_sched->skg_runtime += child->td_sched->skg_runtime; 1679fc6c30f6SJulian Elischer sched_interact_update(td); 1680e7d50326SJeff Roberson sched_priority(td); 16817b20fb19SJeff Roberson thread_unlock(td); 1682ad1e7d28SJulian Elischer } 1683ad1e7d28SJulian Elischer 1684ad1e7d28SJulian Elischer void 1685ad1e7d28SJulian Elischer sched_userret(struct thread *td) 1686ad1e7d28SJulian Elischer { 1687ad1e7d28SJulian Elischer /* 1688ad1e7d28SJulian Elischer * XXX we cheat slightly on the locking here to avoid locking in 1689ad1e7d28SJulian Elischer * the usual case. Setting td_priority here is essentially an 1690ad1e7d28SJulian Elischer * incomplete workaround for not setting it properly elsewhere. 1691ad1e7d28SJulian Elischer * Now that some interrupt handlers are threads, not setting it 1692ad1e7d28SJulian Elischer * properly elsewhere can clobber it in the window between setting 1693ad1e7d28SJulian Elischer * it here and returning to user mode, so don't waste time setting 1694ad1e7d28SJulian Elischer * it perfectly here. 1695ad1e7d28SJulian Elischer */ 1696ad1e7d28SJulian Elischer KASSERT((td->td_flags & TDF_BORROWING) == 0, 1697ad1e7d28SJulian Elischer ("thread with borrowed priority returning to userland")); 1698ad1e7d28SJulian Elischer if (td->td_priority != td->td_user_pri) { 16997b20fb19SJeff Roberson thread_lock(td); 1700ad1e7d28SJulian Elischer td->td_priority = td->td_user_pri; 1701ad1e7d28SJulian Elischer td->td_base_pri = td->td_user_pri; 17027b20fb19SJeff Roberson thread_unlock(td); 1703ad1e7d28SJulian Elischer } 170435e6168fSJeff Roberson } 170535e6168fSJeff Roberson 170635e6168fSJeff Roberson void 17077cf90fb3SJeff Roberson sched_clock(struct thread *td) 170835e6168fSJeff Roberson { 1709ad1e7d28SJulian Elischer struct tdq *tdq; 1710ad1e7d28SJulian Elischer struct td_sched *ts; 171135e6168fSJeff Roberson 1712dc03363dSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1713dc03363dSJeff Roberson #ifdef SMP 17147b8bfa0dSJeff Roberson sched_smp_tick(td); 1715dc03363dSJeff Roberson #endif 17163f872f85SJeff Roberson tdq = TDQ_SELF(); 17173f872f85SJeff Roberson /* 17183f872f85SJeff Roberson * Advance the insert index once for each tick to ensure that all 17193f872f85SJeff Roberson * threads get a chance to run. 17203f872f85SJeff Roberson */ 17213f872f85SJeff Roberson if (tdq->tdq_idx == tdq->tdq_ridx) { 17223f872f85SJeff Roberson tdq->tdq_idx = (tdq->tdq_idx + 1) % RQ_NQS; 17233f872f85SJeff Roberson if (TAILQ_EMPTY(&tdq->tdq_timeshare.rq_queues[tdq->tdq_ridx])) 17243f872f85SJeff Roberson tdq->tdq_ridx = tdq->tdq_idx; 17253f872f85SJeff Roberson } 17263f872f85SJeff Roberson ts = td->td_sched; 17273f741ca1SJeff Roberson /* 17288460a577SJohn Birrell * We only do slicing code for TIMESHARE threads. 1729a8949de2SJeff Roberson */ 17308460a577SJohn Birrell if (td->td_pri_class != PRI_TIMESHARE) 1731a8949de2SJeff Roberson return; 1732a8949de2SJeff Roberson /* 17333f872f85SJeff Roberson * We used a tick; charge it to the thread so that we can compute our 173415dc847eSJeff Roberson * interactivity. 173515dc847eSJeff Roberson */ 17368460a577SJohn Birrell td->td_sched->skg_runtime += tickincr; 17378460a577SJohn Birrell sched_interact_update(td); 173835e6168fSJeff Roberson /* 173935e6168fSJeff Roberson * We used up one time slice. 174035e6168fSJeff Roberson */ 1741ad1e7d28SJulian Elischer if (--ts->ts_slice > 0) 174215dc847eSJeff Roberson return; 174335e6168fSJeff Roberson /* 174415dc847eSJeff Roberson * We're out of time, recompute priorities and requeue. 174535e6168fSJeff Roberson */ 17468460a577SJohn Birrell sched_priority(td); 17474a338afdSJulian Elischer td->td_flags |= TDF_NEEDRESCHED; 174835e6168fSJeff Roberson } 174935e6168fSJeff Roberson 175035e6168fSJeff Roberson int 175135e6168fSJeff Roberson sched_runnable(void) 175235e6168fSJeff Roberson { 1753ad1e7d28SJulian Elischer struct tdq *tdq; 1754b90816f1SJeff Roberson int load; 175535e6168fSJeff Roberson 1756b90816f1SJeff Roberson load = 1; 1757b90816f1SJeff Roberson 1758ad1e7d28SJulian Elischer tdq = TDQ_SELF(); 175922bf7d9aSJeff Roberson #ifdef SMP 17607b8bfa0dSJeff Roberson if (tdq_busy) 17617b8bfa0dSJeff Roberson goto out; 176222bf7d9aSJeff Roberson #endif 17633f741ca1SJeff Roberson if ((curthread->td_flags & TDF_IDLETD) != 0) { 1764d2ad694cSJeff Roberson if (tdq->tdq_load > 0) 17653f741ca1SJeff Roberson goto out; 17663f741ca1SJeff Roberson } else 1767d2ad694cSJeff Roberson if (tdq->tdq_load - 1 > 0) 1768b90816f1SJeff Roberson goto out; 1769b90816f1SJeff Roberson load = 0; 1770b90816f1SJeff Roberson out: 1771b90816f1SJeff Roberson return (load); 177235e6168fSJeff Roberson } 177335e6168fSJeff Roberson 17747a5e5e2aSJeff Roberson struct thread * 1775c9f25d8fSJeff Roberson sched_choose(void) 1776c9f25d8fSJeff Roberson { 1777ad1e7d28SJulian Elischer struct tdq *tdq; 1778ad1e7d28SJulian Elischer struct td_sched *ts; 177915dc847eSJeff Roberson 1780b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1781ad1e7d28SJulian Elischer tdq = TDQ_SELF(); 178215dc847eSJeff Roberson #ifdef SMP 178380f86c9fSJeff Roberson restart: 178415dc847eSJeff Roberson #endif 1785ad1e7d28SJulian Elischer ts = tdq_choose(tdq); 1786ad1e7d28SJulian Elischer if (ts) { 178722bf7d9aSJeff Roberson #ifdef SMP 1788155b6ca1SJeff Roberson if (ts->ts_thread->td_priority > PRI_MIN_IDLE) 1789ad1e7d28SJulian Elischer if (tdq_idled(tdq) == 0) 179080f86c9fSJeff Roberson goto restart; 179122bf7d9aSJeff Roberson #endif 1792ad1e7d28SJulian Elischer tdq_runq_rem(tdq, ts); 17937a5e5e2aSJeff Roberson return (ts->ts_thread); 179435e6168fSJeff Roberson } 1795c9f25d8fSJeff Roberson #ifdef SMP 1796ad1e7d28SJulian Elischer if (tdq_idled(tdq) == 0) 179780f86c9fSJeff Roberson goto restart; 1798c9f25d8fSJeff Roberson #endif 17997a5e5e2aSJeff Roberson return (PCPU_GET(idlethread)); 18007a5e5e2aSJeff Roberson } 18017a5e5e2aSJeff Roberson 18027a5e5e2aSJeff Roberson static int 18037a5e5e2aSJeff Roberson sched_preempt(struct thread *td) 18047a5e5e2aSJeff Roberson { 18057a5e5e2aSJeff Roberson struct thread *ctd; 18067a5e5e2aSJeff Roberson int cpri; 18077a5e5e2aSJeff Roberson int pri; 18087a5e5e2aSJeff Roberson 18097a5e5e2aSJeff Roberson ctd = curthread; 18107a5e5e2aSJeff Roberson pri = td->td_priority; 18117a5e5e2aSJeff Roberson cpri = ctd->td_priority; 18127a5e5e2aSJeff Roberson if (panicstr != NULL || pri >= cpri || cold || TD_IS_INHIBITED(ctd)) 18137a5e5e2aSJeff Roberson return (0); 18147a5e5e2aSJeff Roberson /* 18157a5e5e2aSJeff Roberson * Always preempt IDLE threads. Otherwise only if the preempting 18167a5e5e2aSJeff Roberson * thread is an ithread. 18177a5e5e2aSJeff Roberson */ 18187a5e5e2aSJeff Roberson if (pri > PRI_MAX_ITHD && cpri < PRI_MIN_IDLE) 18197a5e5e2aSJeff Roberson return (0); 18207a5e5e2aSJeff Roberson if (ctd->td_critnest > 1) { 18217a5e5e2aSJeff Roberson CTR1(KTR_PROC, "sched_preempt: in critical section %d", 18227a5e5e2aSJeff Roberson ctd->td_critnest); 18237a5e5e2aSJeff Roberson ctd->td_owepreempt = 1; 18247a5e5e2aSJeff Roberson return (0); 18257a5e5e2aSJeff Roberson } 18267a5e5e2aSJeff Roberson /* 18277a5e5e2aSJeff Roberson * Thread is runnable but not yet put on system run queue. 18287a5e5e2aSJeff Roberson */ 18297a5e5e2aSJeff Roberson MPASS(TD_ON_RUNQ(td)); 18307a5e5e2aSJeff Roberson TD_SET_RUNNING(td); 18317b20fb19SJeff Roberson MPASS(ctd->td_lock == &sched_lock); 18327b20fb19SJeff Roberson MPASS(td->td_lock == &sched_lock); 18337a5e5e2aSJeff Roberson CTR3(KTR_PROC, "preempting to thread %p (pid %d, %s)\n", td, 18347a5e5e2aSJeff Roberson td->td_proc->p_pid, td->td_proc->p_comm); 18357b20fb19SJeff Roberson /* 18367b20fb19SJeff Roberson * We enter the switch with two runnable threads that both have 18377b20fb19SJeff Roberson * the same lock. When we return td may be sleeping so we need 18387b20fb19SJeff Roberson * to switch locks to make sure he's locked correctly. 18397b20fb19SJeff Roberson */ 18407b20fb19SJeff Roberson SCHED_STAT_INC(switch_preempt); 18417a5e5e2aSJeff Roberson mi_switch(SW_INVOL|SW_PREEMPT, td); 18427b20fb19SJeff Roberson spinlock_enter(); 18437b20fb19SJeff Roberson thread_unlock(ctd); 18447b20fb19SJeff Roberson thread_lock(td); 18457b20fb19SJeff Roberson spinlock_exit(); 18467b20fb19SJeff Roberson 18477a5e5e2aSJeff Roberson return (1); 184835e6168fSJeff Roberson } 184935e6168fSJeff Roberson 185035e6168fSJeff Roberson void 18512630e4c9SJulian Elischer sched_add(struct thread *td, int flags) 185235e6168fSJeff Roberson { 1853ad1e7d28SJulian Elischer struct tdq *tdq; 1854ad1e7d28SJulian Elischer struct td_sched *ts; 1855598b368dSJeff Roberson int preemptive; 185622bf7d9aSJeff Roberson int class; 18577b8bfa0dSJeff Roberson #ifdef SMP 18587b8bfa0dSJeff Roberson int cpuid; 18597b8bfa0dSJeff Roberson int cpumask; 18607b8bfa0dSJeff Roberson #endif 18617a5e5e2aSJeff Roberson ts = td->td_sched; 1862c9f25d8fSJeff Roberson 18637b20fb19SJeff Roberson THREAD_LOCK_ASSERT(td, MA_OWNED); 186481d47d3fSJeff Roberson CTR5(KTR_SCHED, "sched_add: %p(%s) prio %d by %p(%s)", 186581d47d3fSJeff Roberson td, td->td_proc->p_comm, td->td_priority, curthread, 186681d47d3fSJeff Roberson curthread->td_proc->p_comm); 18677a5e5e2aSJeff Roberson KASSERT((td->td_inhibitors == 0), 18687a5e5e2aSJeff Roberson ("sched_add: trying to run inhibited thread")); 18697a5e5e2aSJeff Roberson KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)), 18707a5e5e2aSJeff Roberson ("sched_add: bad thread state")); 18718460a577SJohn Birrell KASSERT(td->td_proc->p_sflag & PS_INMEM, 18725d7ef00cSJeff Roberson ("sched_add: process swapped out")); 18737b20fb19SJeff Roberson /* 18747b20fb19SJeff Roberson * Now that the thread is moving to the run-queue, set the lock 18757b20fb19SJeff Roberson * to the scheduler's lock. 18767b20fb19SJeff Roberson */ 18777b20fb19SJeff Roberson if (td->td_lock != &sched_lock) { 18787b20fb19SJeff Roberson mtx_lock_spin(&sched_lock); 18797b20fb19SJeff Roberson thread_lock_set(td, &sched_lock); 18807b20fb19SJeff Roberson } 18817b20fb19SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 18827a5e5e2aSJeff Roberson TD_SET_RUNQ(td); 18837a5e5e2aSJeff Roberson tdq = TDQ_SELF(); 18847a5e5e2aSJeff Roberson class = PRI_BASE(td->td_pri_class); 18857a5e5e2aSJeff Roberson preemptive = !(flags & SRQ_YIELDING); 188615dc847eSJeff Roberson /* 18877b8bfa0dSJeff Roberson * Recalculate the priority before we select the target cpu or 18887b8bfa0dSJeff Roberson * run-queue. 188915dc847eSJeff Roberson */ 18908ab80cf0SJeff Roberson if (class == PRI_TIMESHARE) 18918ab80cf0SJeff Roberson sched_priority(td); 18927a5e5e2aSJeff Roberson if (ts->ts_slice == 0) 18937a5e5e2aSJeff Roberson ts->ts_slice = sched_slice; 189422bf7d9aSJeff Roberson #ifdef SMP 18957b8bfa0dSJeff Roberson cpuid = PCPU_GET(cpuid); 18962454aaf5SJeff Roberson /* 18977b8bfa0dSJeff Roberson * Pick the destination cpu and if it isn't ours transfer to the 18987b8bfa0dSJeff Roberson * target cpu. 18992454aaf5SJeff Roberson */ 19007b8bfa0dSJeff Roberson if (THREAD_CAN_MIGRATE(td)) { 19017b8bfa0dSJeff Roberson if (td->td_priority <= PRI_MAX_ITHD) { 190214618990SJeff Roberson CTR2(KTR_ULE, "ithd %d < %d", 190314618990SJeff Roberson td->td_priority, PRI_MAX_ITHD); 19047b8bfa0dSJeff Roberson ts->ts_cpu = cpuid; 1905fb1e3ccdSKip Macy } else if (pick_pri) 19067b8bfa0dSJeff Roberson ts->ts_cpu = tdq_pickpri(tdq, ts, flags); 19077b8bfa0dSJeff Roberson else 19087b8bfa0dSJeff Roberson ts->ts_cpu = tdq_pickidle(tdq, ts); 19097b8bfa0dSJeff Roberson } else 191014618990SJeff Roberson CTR1(KTR_ULE, "pinned %d", td->td_pinned); 19117b8bfa0dSJeff Roberson if (ts->ts_cpu != cpuid) 19127b8bfa0dSJeff Roberson preemptive = 0; 19137b8bfa0dSJeff Roberson tdq = TDQ_CPU(ts->ts_cpu); 19147b8bfa0dSJeff Roberson cpumask = 1 << ts->ts_cpu; 191522bf7d9aSJeff Roberson /* 1916670c524fSJeff Roberson * If we had been idle, clear our bit in the group and potentially 19177b8bfa0dSJeff Roberson * the global bitmap. 191822bf7d9aSJeff Roberson */ 1919e7d50326SJeff Roberson if ((class != PRI_IDLE && class != PRI_ITHD) && 19207b8bfa0dSJeff Roberson (tdq->tdq_group->tdg_idlemask & cpumask) != 0) { 192180f86c9fSJeff Roberson /* 192280f86c9fSJeff Roberson * Check to see if our group is unidling, and if so, remove it 192380f86c9fSJeff Roberson * from the global idle mask. 192480f86c9fSJeff Roberson */ 1925d2ad694cSJeff Roberson if (tdq->tdq_group->tdg_idlemask == 1926d2ad694cSJeff Roberson tdq->tdq_group->tdg_cpumask) 1927d2ad694cSJeff Roberson atomic_clear_int(&tdq_idle, tdq->tdq_group->tdg_mask); 192880f86c9fSJeff Roberson /* 192980f86c9fSJeff Roberson * Now remove ourselves from the group specific idle mask. 193080f86c9fSJeff Roberson */ 19317b8bfa0dSJeff Roberson tdq->tdq_group->tdg_idlemask &= ~cpumask; 19327b8bfa0dSJeff Roberson } 193322bf7d9aSJeff Roberson #endif 19347b8bfa0dSJeff Roberson /* 19357a5e5e2aSJeff Roberson * Pick the run queue based on priority. 19367b8bfa0dSJeff Roberson */ 19377b8bfa0dSJeff Roberson if (td->td_priority <= PRI_MAX_REALTIME) 19387b8bfa0dSJeff Roberson ts->ts_runq = &tdq->tdq_realtime; 19397b8bfa0dSJeff Roberson else if (td->td_priority <= PRI_MAX_TIMESHARE) 19407b8bfa0dSJeff Roberson ts->ts_runq = &tdq->tdq_timeshare; 19417b8bfa0dSJeff Roberson else 19427b8bfa0dSJeff Roberson ts->ts_runq = &tdq->tdq_idle; 19437a5e5e2aSJeff Roberson if (preemptive && sched_preempt(td)) 19440c0b25aeSJohn Baldwin return; 1945ad1e7d28SJulian Elischer tdq_runq_add(tdq, ts, flags); 1946ad1e7d28SJulian Elischer tdq_load_add(tdq, ts); 19477b8bfa0dSJeff Roberson #ifdef SMP 19487b8bfa0dSJeff Roberson if (ts->ts_cpu != cpuid) { 19497b8bfa0dSJeff Roberson tdq_notify(ts); 19507b8bfa0dSJeff Roberson return; 19517b8bfa0dSJeff Roberson } 19527b8bfa0dSJeff Roberson #endif 19537b8bfa0dSJeff Roberson if (td->td_priority < curthread->td_priority) 19547b8bfa0dSJeff Roberson curthread->td_flags |= TDF_NEEDRESCHED; 195535e6168fSJeff Roberson } 195635e6168fSJeff Roberson 195735e6168fSJeff Roberson void 19587cf90fb3SJeff Roberson sched_rem(struct thread *td) 195935e6168fSJeff Roberson { 1960ad1e7d28SJulian Elischer struct tdq *tdq; 1961ad1e7d28SJulian Elischer struct td_sched *ts; 19627cf90fb3SJeff Roberson 196381d47d3fSJeff Roberson CTR5(KTR_SCHED, "sched_rem: %p(%s) prio %d by %p(%s)", 196481d47d3fSJeff Roberson td, td->td_proc->p_comm, td->td_priority, curthread, 196581d47d3fSJeff Roberson curthread->td_proc->p_comm); 19667b20fb19SJeff Roberson THREAD_LOCK_ASSERT(td, MA_OWNED); 1967ad1e7d28SJulian Elischer ts = td->td_sched; 19687a5e5e2aSJeff Roberson KASSERT(TD_ON_RUNQ(td), 1969ad1e7d28SJulian Elischer ("sched_rem: thread not on run queue")); 197035e6168fSJeff Roberson 1971ad1e7d28SJulian Elischer tdq = TDQ_CPU(ts->ts_cpu); 1972ad1e7d28SJulian Elischer tdq_runq_rem(tdq, ts); 1973ad1e7d28SJulian Elischer tdq_load_rem(tdq, ts); 19747a5e5e2aSJeff Roberson TD_SET_CAN_RUN(td); 197535e6168fSJeff Roberson } 197635e6168fSJeff Roberson 197735e6168fSJeff Roberson fixpt_t 19787cf90fb3SJeff Roberson sched_pctcpu(struct thread *td) 197935e6168fSJeff Roberson { 198035e6168fSJeff Roberson fixpt_t pctcpu; 1981ad1e7d28SJulian Elischer struct td_sched *ts; 198235e6168fSJeff Roberson 198335e6168fSJeff Roberson pctcpu = 0; 1984ad1e7d28SJulian Elischer ts = td->td_sched; 1985ad1e7d28SJulian Elischer if (ts == NULL) 1986484288deSJeff Roberson return (0); 198735e6168fSJeff Roberson 19887b20fb19SJeff Roberson thread_lock(td); 1989ad1e7d28SJulian Elischer if (ts->ts_ticks) { 199035e6168fSJeff Roberson int rtick; 199135e6168fSJeff Roberson 1992ad1e7d28SJulian Elischer sched_pctcpu_update(ts); 199335e6168fSJeff Roberson /* How many rtick per second ? */ 1994e7d50326SJeff Roberson rtick = min(SCHED_TICK_HZ(ts) / SCHED_TICK_SECS, hz); 1995e7d50326SJeff Roberson pctcpu = (FSCALE * ((FSCALE * rtick)/hz)) >> FSHIFT; 199635e6168fSJeff Roberson } 1997ad1e7d28SJulian Elischer td->td_proc->p_swtime = ts->ts_ltick - ts->ts_ftick; 19987b20fb19SJeff Roberson thread_unlock(td); 199935e6168fSJeff Roberson 200035e6168fSJeff Roberson return (pctcpu); 200135e6168fSJeff Roberson } 200235e6168fSJeff Roberson 20039bacd788SJeff Roberson void 20049bacd788SJeff Roberson sched_bind(struct thread *td, int cpu) 20059bacd788SJeff Roberson { 2006ad1e7d28SJulian Elischer struct td_sched *ts; 20079bacd788SJeff Roberson 20087b20fb19SJeff Roberson THREAD_LOCK_ASSERT(td, MA_OWNED); 2009ad1e7d28SJulian Elischer ts = td->td_sched; 20106b2f763fSJeff Roberson if (ts->ts_flags & TSF_BOUND) 2011c95d2db2SJeff Roberson sched_unbind(td); 2012ad1e7d28SJulian Elischer ts->ts_flags |= TSF_BOUND; 201380f86c9fSJeff Roberson #ifdef SMP 20146b2f763fSJeff Roberson sched_pin(); 201580f86c9fSJeff Roberson if (PCPU_GET(cpuid) == cpu) 20169bacd788SJeff Roberson return; 20176b2f763fSJeff Roberson ts->ts_cpu = cpu; 20189bacd788SJeff Roberson /* When we return from mi_switch we'll be on the correct cpu. */ 2019279f949eSPoul-Henning Kamp mi_switch(SW_VOL, NULL); 20209bacd788SJeff Roberson #endif 20219bacd788SJeff Roberson } 20229bacd788SJeff Roberson 20239bacd788SJeff Roberson void 20249bacd788SJeff Roberson sched_unbind(struct thread *td) 20259bacd788SJeff Roberson { 2026e7d50326SJeff Roberson struct td_sched *ts; 2027e7d50326SJeff Roberson 20287b20fb19SJeff Roberson THREAD_LOCK_ASSERT(td, MA_OWNED); 2029e7d50326SJeff Roberson ts = td->td_sched; 20306b2f763fSJeff Roberson if ((ts->ts_flags & TSF_BOUND) == 0) 20316b2f763fSJeff Roberson return; 2032e7d50326SJeff Roberson ts->ts_flags &= ~TSF_BOUND; 2033e7d50326SJeff Roberson #ifdef SMP 2034e7d50326SJeff Roberson sched_unpin(); 2035e7d50326SJeff Roberson #endif 20369bacd788SJeff Roberson } 20379bacd788SJeff Roberson 203835e6168fSJeff Roberson int 2039ebccf1e3SJoseph Koshy sched_is_bound(struct thread *td) 2040ebccf1e3SJoseph Koshy { 20417b20fb19SJeff Roberson THREAD_LOCK_ASSERT(td, MA_OWNED); 2042ad1e7d28SJulian Elischer return (td->td_sched->ts_flags & TSF_BOUND); 2043ebccf1e3SJoseph Koshy } 2044ebccf1e3SJoseph Koshy 204536ec198bSDavid Xu void 204636ec198bSDavid Xu sched_relinquish(struct thread *td) 204736ec198bSDavid Xu { 20487b20fb19SJeff Roberson thread_lock(td); 20498460a577SJohn Birrell if (td->td_pri_class == PRI_TIMESHARE) 205036ec198bSDavid Xu sched_prio(td, PRI_MAX_TIMESHARE); 20517b20fb19SJeff Roberson SCHED_STAT_INC(switch_relinquish); 205236ec198bSDavid Xu mi_switch(SW_VOL, NULL); 20537b20fb19SJeff Roberson thread_unlock(td); 205436ec198bSDavid Xu } 205536ec198bSDavid Xu 2056ebccf1e3SJoseph Koshy int 205733916c36SJeff Roberson sched_load(void) 205833916c36SJeff Roberson { 205933916c36SJeff Roberson #ifdef SMP 206033916c36SJeff Roberson int total; 206133916c36SJeff Roberson int i; 206233916c36SJeff Roberson 206333916c36SJeff Roberson total = 0; 2064d2ad694cSJeff Roberson for (i = 0; i <= tdg_maxid; i++) 2065d2ad694cSJeff Roberson total += TDQ_GROUP(i)->tdg_load; 206633916c36SJeff Roberson return (total); 206733916c36SJeff Roberson #else 2068d2ad694cSJeff Roberson return (TDQ_SELF()->tdq_sysload); 206933916c36SJeff Roberson #endif 207033916c36SJeff Roberson } 207133916c36SJeff Roberson 207233916c36SJeff Roberson int 207335e6168fSJeff Roberson sched_sizeof_proc(void) 207435e6168fSJeff Roberson { 207535e6168fSJeff Roberson return (sizeof(struct proc)); 207635e6168fSJeff Roberson } 207735e6168fSJeff Roberson 207835e6168fSJeff Roberson int 207935e6168fSJeff Roberson sched_sizeof_thread(void) 208035e6168fSJeff Roberson { 208135e6168fSJeff Roberson return (sizeof(struct thread) + sizeof(struct td_sched)); 208235e6168fSJeff Roberson } 2083b41f1452SDavid Xu 2084b41f1452SDavid Xu void 2085b41f1452SDavid Xu sched_tick(void) 2086b41f1452SDavid Xu { 20877a5e5e2aSJeff Roberson struct td_sched *ts; 20887a5e5e2aSJeff Roberson 20897a5e5e2aSJeff Roberson ts = curthread->td_sched; 20907a5e5e2aSJeff Roberson /* Adjust ticks for pctcpu */ 20917a5e5e2aSJeff Roberson ts->ts_ticks += 1 << SCHED_TICK_SHIFT; 20927a5e5e2aSJeff Roberson ts->ts_ltick = ticks; 20937a5e5e2aSJeff Roberson /* 20947a5e5e2aSJeff Roberson * Update if we've exceeded our desired tick threshhold by over one 20957a5e5e2aSJeff Roberson * second. 20967a5e5e2aSJeff Roberson */ 20977a5e5e2aSJeff Roberson if (ts->ts_ftick + SCHED_TICK_MAX < ts->ts_ltick) 20987a5e5e2aSJeff Roberson sched_pctcpu_update(ts); 20997a5e5e2aSJeff Roberson } 21007a5e5e2aSJeff Roberson 21017a5e5e2aSJeff Roberson /* 21027a5e5e2aSJeff Roberson * The actual idle process. 21037a5e5e2aSJeff Roberson */ 21047a5e5e2aSJeff Roberson void 21057a5e5e2aSJeff Roberson sched_idletd(void *dummy) 21067a5e5e2aSJeff Roberson { 21077a5e5e2aSJeff Roberson struct proc *p; 21087a5e5e2aSJeff Roberson struct thread *td; 21097a5e5e2aSJeff Roberson 21107a5e5e2aSJeff Roberson td = curthread; 21117a5e5e2aSJeff Roberson p = td->td_proc; 21127a5e5e2aSJeff Roberson mtx_assert(&Giant, MA_NOTOWNED); 21137a5e5e2aSJeff Roberson /* ULE Relies on preemption for idle interruption. */ 21147a5e5e2aSJeff Roberson for (;;) 21157a5e5e2aSJeff Roberson cpu_idle(); 2116b41f1452SDavid Xu } 2117e7d50326SJeff Roberson 21187b20fb19SJeff Roberson /* 21197b20fb19SJeff Roberson * A CPU is entering for the first time or a thread is exiting. 21207b20fb19SJeff Roberson */ 21217b20fb19SJeff Roberson void 21227b20fb19SJeff Roberson sched_throw(struct thread *td) 21237b20fb19SJeff Roberson { 21247b20fb19SJeff Roberson /* 21257b20fb19SJeff Roberson * Correct spinlock nesting. The idle thread context that we are 21267b20fb19SJeff Roberson * borrowing was created so that it would start out with a single 21277b20fb19SJeff Roberson * spin lock (sched_lock) held in fork_trampoline(). Since we've 21287b20fb19SJeff Roberson * explicitly acquired locks in this function, the nesting count 21297b20fb19SJeff Roberson * is now 2 rather than 1. Since we are nested, calling 21307b20fb19SJeff Roberson * spinlock_exit() will simply adjust the counts without allowing 21317b20fb19SJeff Roberson * spin lock using code to interrupt us. 21327b20fb19SJeff Roberson */ 21337b20fb19SJeff Roberson if (td == NULL) { 21347b20fb19SJeff Roberson mtx_lock_spin(&sched_lock); 21357b20fb19SJeff Roberson spinlock_exit(); 21367b20fb19SJeff Roberson } else { 21377b20fb19SJeff Roberson MPASS(td->td_lock == &sched_lock); 21387b20fb19SJeff Roberson } 21397b20fb19SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 21407b20fb19SJeff Roberson KASSERT(curthread->td_md.md_spinlock_count == 1, ("invalid count")); 21417b20fb19SJeff Roberson PCPU_SET(switchtime, cpu_ticks()); 21427b20fb19SJeff Roberson PCPU_SET(switchticks, ticks); 21437b20fb19SJeff Roberson cpu_throw(td, choosethread()); /* doesn't return */ 21447b20fb19SJeff Roberson } 21457b20fb19SJeff Roberson 21467b20fb19SJeff Roberson void 21477b20fb19SJeff Roberson sched_fork_exit(struct thread *ctd) 21487b20fb19SJeff Roberson { 21497b20fb19SJeff Roberson struct thread *td; 21507b20fb19SJeff Roberson 21517b20fb19SJeff Roberson /* 21527b20fb19SJeff Roberson * Finish setting up thread glue so that it begins execution in a 21537b20fb19SJeff Roberson * non-nested critical section with sched_lock held but not recursed. 21547b20fb19SJeff Roberson */ 21557b20fb19SJeff Roberson ctd->td_oncpu = PCPU_GET(cpuid); 21567b20fb19SJeff Roberson sched_lock.mtx_lock = (uintptr_t)ctd; 21577b20fb19SJeff Roberson THREAD_LOCK_ASSERT(ctd, MA_OWNED | MA_NOTRECURSED); 21587b20fb19SJeff Roberson /* 21597b20fb19SJeff Roberson * Processes normally resume in mi_switch() after being 21607b20fb19SJeff Roberson * cpu_switch()'ed to, but when children start up they arrive here 21617b20fb19SJeff Roberson * instead, so we must do much the same things as mi_switch() would. 21627b20fb19SJeff Roberson */ 21637b20fb19SJeff Roberson if ((td = PCPU_GET(deadthread))) { 21647b20fb19SJeff Roberson PCPU_SET(deadthread, NULL); 21657b20fb19SJeff Roberson thread_stash(td); 21667b20fb19SJeff Roberson } 21677b20fb19SJeff Roberson thread_unlock(ctd); 21687b20fb19SJeff Roberson } 21697b20fb19SJeff Roberson 2170e7d50326SJeff Roberson static SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0, "Scheduler"); 2171e7d50326SJeff Roberson SYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "ule", 0, 2172e7d50326SJeff Roberson "Scheduler name"); 2173e7d50326SJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, slice, CTLFLAG_RW, &sched_slice, 0, ""); 2174e7d50326SJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, interact, CTLFLAG_RW, &sched_interact, 0, ""); 2175e7d50326SJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, tickincr, CTLFLAG_RD, &tickincr, 0, ""); 2176e7d50326SJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, realstathz, CTLFLAG_RD, &realstathz, 0, ""); 21777b8bfa0dSJeff Roberson #ifdef SMP 21787b8bfa0dSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, pick_pri, CTLFLAG_RW, &pick_pri, 0, ""); 21797b8bfa0dSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, pick_pri_affinity, CTLFLAG_RW, 21807b8bfa0dSJeff Roberson &affinity, 0, ""); 21817b8bfa0dSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, pick_pri_tryself, CTLFLAG_RW, 21827b8bfa0dSJeff Roberson &tryself, 0, ""); 21837b8bfa0dSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, pick_pri_tryselfidle, CTLFLAG_RW, 21847b8bfa0dSJeff Roberson &tryselfidle, 0, ""); 21857b8bfa0dSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, balance, CTLFLAG_RW, &rebalance, 0, ""); 21867b8bfa0dSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, ipi_preempt, CTLFLAG_RW, &ipi_preempt, 0, ""); 21877b8bfa0dSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, ipi_ast, CTLFLAG_RW, &ipi_ast, 0, ""); 21887b8bfa0dSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, ipi_thresh, CTLFLAG_RW, &ipi_thresh, 0, ""); 21897b8bfa0dSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, steal_htt, CTLFLAG_RW, &steal_htt, 0, ""); 21907b8bfa0dSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, steal_busy, CTLFLAG_RW, &steal_busy, 0, ""); 21917b8bfa0dSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, busy_thresh, CTLFLAG_RW, &busy_thresh, 0, ""); 21927b20fb19SJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, topology, CTLFLAG_RD, &topology, 0, ""); 21937b8bfa0dSJeff Roberson #endif 2194e7d50326SJeff Roberson 2195e7d50326SJeff Roberson /* ps compat */ 2196e7d50326SJeff Roberson static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 2197e7d50326SJeff Roberson SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 2198e7d50326SJeff Roberson 2199e7d50326SJeff Roberson 2200ed062c8dSJulian Elischer #define KERN_SWITCH_INCLUDE 1 2201ed062c8dSJulian Elischer #include "kern/kern_switch.c" 2202