135e6168fSJeff Roberson /*- 2e7d50326SJeff Roberson * Copyright (c) 2002-2007, Jeffrey Roberson <jeff@freebsd.org> 335e6168fSJeff Roberson * All rights reserved. 435e6168fSJeff Roberson * 535e6168fSJeff Roberson * Redistribution and use in source and binary forms, with or without 635e6168fSJeff Roberson * modification, are permitted provided that the following conditions 735e6168fSJeff Roberson * are met: 835e6168fSJeff Roberson * 1. Redistributions of source code must retain the above copyright 935e6168fSJeff Roberson * notice unmodified, this list of conditions, and the following 1035e6168fSJeff Roberson * disclaimer. 1135e6168fSJeff Roberson * 2. Redistributions in binary form must reproduce the above copyright 1235e6168fSJeff Roberson * notice, this list of conditions and the following disclaimer in the 1335e6168fSJeff Roberson * documentation and/or other materials provided with the distribution. 1435e6168fSJeff Roberson * 1535e6168fSJeff Roberson * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 1635e6168fSJeff Roberson * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1735e6168fSJeff Roberson * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 1835e6168fSJeff Roberson * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 1935e6168fSJeff Roberson * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2035e6168fSJeff Roberson * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2135e6168fSJeff Roberson * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2235e6168fSJeff Roberson * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2335e6168fSJeff Roberson * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2435e6168fSJeff Roberson * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2535e6168fSJeff Roberson */ 2635e6168fSJeff Roberson 27677b542eSDavid E. O'Brien #include <sys/cdefs.h> 28677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 29677b542eSDavid E. O'Brien 304da0d332SPeter Wemm #include "opt_hwpmc_hooks.h" 314da0d332SPeter Wemm #include "opt_sched.h" 329923b511SScott Long 3335e6168fSJeff Roberson #include <sys/param.h> 3435e6168fSJeff Roberson #include <sys/systm.h> 352c3490b1SMarcel Moolenaar #include <sys/kdb.h> 3635e6168fSJeff Roberson #include <sys/kernel.h> 3735e6168fSJeff Roberson #include <sys/ktr.h> 3835e6168fSJeff Roberson #include <sys/lock.h> 3935e6168fSJeff Roberson #include <sys/mutex.h> 4035e6168fSJeff Roberson #include <sys/proc.h> 41245f3abfSJeff Roberson #include <sys/resource.h> 429bacd788SJeff Roberson #include <sys/resourcevar.h> 4335e6168fSJeff Roberson #include <sys/sched.h> 4435e6168fSJeff Roberson #include <sys/smp.h> 4535e6168fSJeff Roberson #include <sys/sx.h> 4635e6168fSJeff Roberson #include <sys/sysctl.h> 4735e6168fSJeff Roberson #include <sys/sysproto.h> 48f5c157d9SJohn Baldwin #include <sys/turnstile.h> 493db720fdSDavid Xu #include <sys/umtx.h> 5035e6168fSJeff Roberson #include <sys/vmmeter.h> 5135e6168fSJeff Roberson #ifdef KTRACE 5235e6168fSJeff Roberson #include <sys/uio.h> 5335e6168fSJeff Roberson #include <sys/ktrace.h> 5435e6168fSJeff Roberson #endif 5535e6168fSJeff Roberson 56ebccf1e3SJoseph Koshy #ifdef HWPMC_HOOKS 57ebccf1e3SJoseph Koshy #include <sys/pmckern.h> 58ebccf1e3SJoseph Koshy #endif 59ebccf1e3SJoseph Koshy 6035e6168fSJeff Roberson #include <machine/cpu.h> 6122bf7d9aSJeff Roberson #include <machine/smp.h> 6235e6168fSJeff Roberson 637a5e5e2aSJeff Roberson #ifndef PREEMPTION 647a5e5e2aSJeff Roberson #error "SCHED_ULE requires options PREEMPTION" 657a5e5e2aSJeff Roberson #endif 667a5e5e2aSJeff Roberson 6735e6168fSJeff Roberson /* 686b2f763fSJeff Roberson * TODO: 696b2f763fSJeff Roberson * Pick idle from affinity group or self group first. 706b2f763fSJeff Roberson * Implement pick_score. 716b2f763fSJeff Roberson */ 726b2f763fSJeff Roberson 7314618990SJeff Roberson #define KTR_ULE 0x0 /* Enable for pickpri debugging. */ 7414618990SJeff Roberson 756b2f763fSJeff Roberson /* 76ad1e7d28SJulian Elischer * Thread scheduler specific section. 77ed062c8dSJulian Elischer */ 78ad1e7d28SJulian Elischer struct td_sched { 79ad1e7d28SJulian Elischer TAILQ_ENTRY(td_sched) ts_procq; /* (j/z) Run queue. */ 80ad1e7d28SJulian Elischer int ts_flags; /* (j) TSF_* flags. */ 81ad1e7d28SJulian Elischer struct thread *ts_thread; /* (*) Active associated thread. */ 82ad1e7d28SJulian Elischer u_char ts_rqindex; /* (j) Run queue index. */ 83ad1e7d28SJulian Elischer int ts_slptime; 84ad1e7d28SJulian Elischer int ts_slice; 85ad1e7d28SJulian Elischer struct runq *ts_runq; 86ad1e7d28SJulian Elischer u_char ts_cpu; /* CPU that we have affinity for. */ 87ed062c8dSJulian Elischer /* The following variables are only used for pctcpu calculation */ 88ad1e7d28SJulian Elischer int ts_ltick; /* Last tick that we were running on */ 89ad1e7d28SJulian Elischer int ts_ftick; /* First tick that we were running on */ 90ad1e7d28SJulian Elischer int ts_ticks; /* Tick count */ 917b8bfa0dSJeff Roberson #ifdef SMP 927b8bfa0dSJeff Roberson int ts_rltick; /* Real last tick, for affinity. */ 937b8bfa0dSJeff Roberson #endif 94ed062c8dSJulian Elischer 958460a577SJohn Birrell /* originally from kg_sched */ 969a93305aSJeff Roberson u_int skg_slptime; /* Number of ticks we vol. slept */ 979a93305aSJeff Roberson u_int skg_runtime; /* Number of ticks we were running */ 98ed062c8dSJulian Elischer }; 99ad1e7d28SJulian Elischer /* flags kept in ts_flags */ 1007b8bfa0dSJeff Roberson #define TSF_BOUND 0x0001 /* Thread can not migrate. */ 1017b8bfa0dSJeff Roberson #define TSF_XFERABLE 0x0002 /* Thread was added as transferable. */ 10235e6168fSJeff Roberson 103ad1e7d28SJulian Elischer static struct td_sched td_sched0; 10435e6168fSJeff Roberson 10535e6168fSJeff Roberson /* 106e7d50326SJeff Roberson * Cpu percentage computation macros and defines. 107e1f89c22SJeff Roberson * 108e7d50326SJeff Roberson * SCHED_TICK_SECS: Number of seconds to average the cpu usage across. 109e7d50326SJeff Roberson * SCHED_TICK_TARG: Number of hz ticks to average the cpu usage across. 1108ab80cf0SJeff Roberson * SCHED_TICK_MAX: Maximum number of ticks before scaling back. 111e7d50326SJeff Roberson * SCHED_TICK_SHIFT: Shift factor to avoid rounding away results. 112e7d50326SJeff Roberson * SCHED_TICK_HZ: Compute the number of hz ticks for a given ticks count. 113e7d50326SJeff Roberson * SCHED_TICK_TOTAL: Gives the amount of time we've been recording ticks. 11435e6168fSJeff Roberson */ 115e7d50326SJeff Roberson #define SCHED_TICK_SECS 10 116e7d50326SJeff Roberson #define SCHED_TICK_TARG (hz * SCHED_TICK_SECS) 1178ab80cf0SJeff Roberson #define SCHED_TICK_MAX (SCHED_TICK_TARG + hz) 118e7d50326SJeff Roberson #define SCHED_TICK_SHIFT 10 119e7d50326SJeff Roberson #define SCHED_TICK_HZ(ts) ((ts)->ts_ticks >> SCHED_TICK_SHIFT) 120eddb4efaSJeff Roberson #define SCHED_TICK_TOTAL(ts) (max((ts)->ts_ltick - (ts)->ts_ftick, hz)) 12135e6168fSJeff Roberson 12235e6168fSJeff Roberson /* 123e7d50326SJeff Roberson * These macros determine priorities for non-interactive threads. They are 124e7d50326SJeff Roberson * assigned a priority based on their recent cpu utilization as expressed 125e7d50326SJeff Roberson * by the ratio of ticks to the tick total. NHALF priorities at the start 126e7d50326SJeff Roberson * and end of the MIN to MAX timeshare range are only reachable with negative 127e7d50326SJeff Roberson * or positive nice respectively. 128e7d50326SJeff Roberson * 129e7d50326SJeff Roberson * PRI_RANGE: Priority range for utilization dependent priorities. 130e7d50326SJeff Roberson * PRI_NRESV: Number of nice values. 131e7d50326SJeff Roberson * PRI_TICKS: Compute a priority in PRI_RANGE from the ticks count and total. 132e7d50326SJeff Roberson * PRI_NICE: Determines the part of the priority inherited from nice. 133e7d50326SJeff Roberson */ 134e7d50326SJeff Roberson #define SCHED_PRI_NRESV (PRIO_MAX - PRIO_MIN) 135e7d50326SJeff Roberson #define SCHED_PRI_NHALF (SCHED_PRI_NRESV / 2) 136e7d50326SJeff Roberson #define SCHED_PRI_MIN (PRI_MIN_TIMESHARE + SCHED_PRI_NHALF) 137e7d50326SJeff Roberson #define SCHED_PRI_MAX (PRI_MAX_TIMESHARE - SCHED_PRI_NHALF) 138e7d50326SJeff Roberson #define SCHED_PRI_RANGE (SCHED_PRI_MAX - SCHED_PRI_MIN + 1) 139e7d50326SJeff Roberson #define SCHED_PRI_TICKS(ts) \ 140e7d50326SJeff Roberson (SCHED_TICK_HZ((ts)) / \ 1411e516cf5SJeff Roberson (roundup(SCHED_TICK_TOTAL((ts)), SCHED_PRI_RANGE) / SCHED_PRI_RANGE)) 142e7d50326SJeff Roberson #define SCHED_PRI_NICE(nice) (nice) 143e7d50326SJeff Roberson 144e7d50326SJeff Roberson /* 145e7d50326SJeff Roberson * These determine the interactivity of a process. Interactivity differs from 146e7d50326SJeff Roberson * cpu utilization in that it expresses the voluntary time slept vs time ran 147e7d50326SJeff Roberson * while cpu utilization includes all time not running. This more accurately 148e7d50326SJeff Roberson * models the intent of the thread. 14935e6168fSJeff Roberson * 150407b0157SJeff Roberson * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate 151407b0157SJeff Roberson * before throttling back. 152d322132cSJeff Roberson * SLP_RUN_FORK: Maximum slp+run time to inherit at fork time. 153210491d3SJeff Roberson * INTERACT_MAX: Maximum interactivity value. Smaller is better. 154e1f89c22SJeff Roberson * INTERACT_THRESH: Threshhold for placement on the current runq. 15535e6168fSJeff Roberson */ 156e7d50326SJeff Roberson #define SCHED_SLP_RUN_MAX ((hz * 5) << SCHED_TICK_SHIFT) 157e7d50326SJeff Roberson #define SCHED_SLP_RUN_FORK ((hz / 2) << SCHED_TICK_SHIFT) 158210491d3SJeff Roberson #define SCHED_INTERACT_MAX (100) 159210491d3SJeff Roberson #define SCHED_INTERACT_HALF (SCHED_INTERACT_MAX / 2) 1604c9612c6SJeff Roberson #define SCHED_INTERACT_THRESH (30) 161e1f89c22SJeff Roberson 16235e6168fSJeff Roberson /* 163e7d50326SJeff Roberson * tickincr: Converts a stathz tick into a hz domain scaled by 164e7d50326SJeff Roberson * the shift factor. Without the shift the error rate 165e7d50326SJeff Roberson * due to rounding would be unacceptably high. 166e7d50326SJeff Roberson * realstathz: stathz is sometimes 0 and run off of hz. 167e7d50326SJeff Roberson * sched_slice: Runtime of each thread before rescheduling. 16835e6168fSJeff Roberson */ 169e7d50326SJeff Roberson static int sched_interact = SCHED_INTERACT_THRESH; 170e7d50326SJeff Roberson static int realstathz; 171e7d50326SJeff Roberson static int tickincr; 172e7d50326SJeff Roberson static int sched_slice; 17335e6168fSJeff Roberson 17435e6168fSJeff Roberson /* 175ad1e7d28SJulian Elischer * tdq - per processor runqs and statistics. 17635e6168fSJeff Roberson */ 177ad1e7d28SJulian Elischer struct tdq { 178d2ad694cSJeff Roberson struct runq tdq_idle; /* Queue of IDLE threads. */ 179e7d50326SJeff Roberson struct runq tdq_timeshare; /* timeshare run queue. */ 180e7d50326SJeff Roberson struct runq tdq_realtime; /* real-time run queue. */ 181ed0e8f2fSJeff Roberson u_char tdq_idx; /* Current insert index. */ 182ed0e8f2fSJeff Roberson u_char tdq_ridx; /* Current removal index. */ 183ed0e8f2fSJeff Roberson short tdq_flags; /* Thread queue flags */ 184d2ad694cSJeff Roberson int tdq_load; /* Aggregate load. */ 1855d7ef00cSJeff Roberson #ifdef SMP 186d2ad694cSJeff Roberson int tdq_transferable; 187d2ad694cSJeff Roberson LIST_ENTRY(tdq) tdq_siblings; /* Next in tdq group. */ 188d2ad694cSJeff Roberson struct tdq_group *tdq_group; /* Our processor group. */ 18933916c36SJeff Roberson #else 190d2ad694cSJeff Roberson int tdq_sysload; /* For loadavg, !ITHD load. */ 1915d7ef00cSJeff Roberson #endif 19235e6168fSJeff Roberson }; 19335e6168fSJeff Roberson 1947b8bfa0dSJeff Roberson #define TDQF_BUSY 0x0001 /* Queue is marked as busy */ 1957b8bfa0dSJeff Roberson 19680f86c9fSJeff Roberson #ifdef SMP 19780f86c9fSJeff Roberson /* 198ad1e7d28SJulian Elischer * tdq groups are groups of processors which can cheaply share threads. When 19980f86c9fSJeff Roberson * one processor in the group goes idle it will check the runqs of the other 20080f86c9fSJeff Roberson * processors in its group prior to halting and waiting for an interrupt. 20180f86c9fSJeff Roberson * These groups are suitable for SMT (Symetric Multi-Threading) and not NUMA. 20280f86c9fSJeff Roberson * In a numa environment we'd want an idle bitmap per group and a two tiered 20380f86c9fSJeff Roberson * load balancer. 20480f86c9fSJeff Roberson */ 205ad1e7d28SJulian Elischer struct tdq_group { 206d2ad694cSJeff Roberson int tdg_cpus; /* Count of CPUs in this tdq group. */ 207d2ad694cSJeff Roberson cpumask_t tdg_cpumask; /* Mask of cpus in this group. */ 208d2ad694cSJeff Roberson cpumask_t tdg_idlemask; /* Idle cpus in this group. */ 209d2ad694cSJeff Roberson cpumask_t tdg_mask; /* Bit mask for first cpu. */ 210d2ad694cSJeff Roberson int tdg_load; /* Total load of this group. */ 211d2ad694cSJeff Roberson int tdg_transferable; /* Transferable load of this group. */ 212d2ad694cSJeff Roberson LIST_HEAD(, tdq) tdg_members; /* Linked list of all members. */ 21380f86c9fSJeff Roberson }; 2147b8bfa0dSJeff Roberson 2157b8bfa0dSJeff Roberson #define SCHED_AFFINITY_DEFAULT (hz / 100) 2167b8bfa0dSJeff Roberson #define SCHED_AFFINITY(ts) ((ts)->ts_rltick > ticks - affinity) 2177b8bfa0dSJeff Roberson 2187b8bfa0dSJeff Roberson /* 2197b8bfa0dSJeff Roberson * Run-time tunables. 2207b8bfa0dSJeff Roberson */ 2215cea64d5SJeff Roberson static int rebalance = 0; 2227b8bfa0dSJeff Roberson static int pick_pri = 1; 2237b8bfa0dSJeff Roberson static int affinity; 2247b8bfa0dSJeff Roberson static int tryself = 1; 2257b8bfa0dSJeff Roberson static int tryselfidle = 1; 2267b8bfa0dSJeff Roberson static int ipi_ast = 0; 2277b8bfa0dSJeff Roberson static int ipi_preempt = 1; 2287b8bfa0dSJeff Roberson static int ipi_thresh = PRI_MIN_KERN; 2297b8bfa0dSJeff Roberson static int steal_htt = 1; 2307b8bfa0dSJeff Roberson static int steal_busy = 1; 2317b8bfa0dSJeff Roberson static int busy_thresh = 4; 23280f86c9fSJeff Roberson 23335e6168fSJeff Roberson /* 234d2ad694cSJeff Roberson * One thread queue per processor. 23535e6168fSJeff Roberson */ 2367b8bfa0dSJeff Roberson static volatile cpumask_t tdq_idle; 2377b8bfa0dSJeff Roberson static volatile cpumask_t tdq_busy; 238d2ad694cSJeff Roberson static int tdg_maxid; 239ad1e7d28SJulian Elischer static struct tdq tdq_cpu[MAXCPU]; 240ad1e7d28SJulian Elischer static struct tdq_group tdq_groups[MAXCPU]; 241dc03363dSJeff Roberson static int bal_tick; 242dc03363dSJeff Roberson static int gbal_tick; 243598b368dSJeff Roberson static int balance_groups; 244dc03363dSJeff Roberson 245ad1e7d28SJulian Elischer #define TDQ_SELF() (&tdq_cpu[PCPU_GET(cpuid)]) 246ad1e7d28SJulian Elischer #define TDQ_CPU(x) (&tdq_cpu[(x)]) 247ad1e7d28SJulian Elischer #define TDQ_ID(x) ((x) - tdq_cpu) 248ad1e7d28SJulian Elischer #define TDQ_GROUP(x) (&tdq_groups[(x)]) 24980f86c9fSJeff Roberson #else /* !SMP */ 250ad1e7d28SJulian Elischer static struct tdq tdq_cpu; 251dc03363dSJeff Roberson 252ad1e7d28SJulian Elischer #define TDQ_SELF() (&tdq_cpu) 253ad1e7d28SJulian Elischer #define TDQ_CPU(x) (&tdq_cpu) 2540a016a05SJeff Roberson #endif 25535e6168fSJeff Roberson 2568460a577SJohn Birrell static void sched_priority(struct thread *); 25721381d1bSJeff Roberson static void sched_thread_priority(struct thread *, u_char); 2588460a577SJohn Birrell static int sched_interact_score(struct thread *); 2598460a577SJohn Birrell static void sched_interact_update(struct thread *); 2608460a577SJohn Birrell static void sched_interact_fork(struct thread *); 261ad1e7d28SJulian Elischer static void sched_pctcpu_update(struct td_sched *); 2621e516cf5SJeff Roberson static inline void sched_pin_td(struct thread *td); 2631e516cf5SJeff Roberson static inline void sched_unpin_td(struct thread *td); 26435e6168fSJeff Roberson 2655d7ef00cSJeff Roberson /* Operations on per processor queues */ 266ad1e7d28SJulian Elischer static struct td_sched * tdq_choose(struct tdq *); 267ad1e7d28SJulian Elischer static void tdq_setup(struct tdq *); 268ad1e7d28SJulian Elischer static void tdq_load_add(struct tdq *, struct td_sched *); 269ad1e7d28SJulian Elischer static void tdq_load_rem(struct tdq *, struct td_sched *); 270ad1e7d28SJulian Elischer static __inline void tdq_runq_add(struct tdq *, struct td_sched *, int); 271ad1e7d28SJulian Elischer static __inline void tdq_runq_rem(struct tdq *, struct td_sched *); 272ad1e7d28SJulian Elischer void tdq_print(int cpu); 273e7d50326SJeff Roberson static void runq_print(struct runq *rq); 2745d7ef00cSJeff Roberson #ifdef SMP 2757b8bfa0dSJeff Roberson static int tdq_pickidle(struct tdq *, struct td_sched *); 2767b8bfa0dSJeff Roberson static int tdq_pickpri(struct tdq *, struct td_sched *, int); 277ad1e7d28SJulian Elischer static struct td_sched *runq_steal(struct runq *); 278dc03363dSJeff Roberson static void sched_balance(void); 279dc03363dSJeff Roberson static void sched_balance_groups(void); 280ad1e7d28SJulian Elischer static void sched_balance_group(struct tdq_group *); 281ad1e7d28SJulian Elischer static void sched_balance_pair(struct tdq *, struct tdq *); 2827b8bfa0dSJeff Roberson static void sched_smp_tick(struct thread *); 283ad1e7d28SJulian Elischer static void tdq_move(struct tdq *, int); 284ad1e7d28SJulian Elischer static int tdq_idled(struct tdq *); 2857b8bfa0dSJeff Roberson static void tdq_notify(struct td_sched *); 286ad1e7d28SJulian Elischer static struct td_sched *tdq_steal(struct tdq *, int); 2871e516cf5SJeff Roberson 2887b8bfa0dSJeff Roberson #define THREAD_CAN_MIGRATE(td) ((td)->td_pinned == 0) 2895d7ef00cSJeff Roberson #endif 2905d7ef00cSJeff Roberson 291e7d50326SJeff Roberson static void sched_setup(void *dummy); 292e7d50326SJeff Roberson SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 293e7d50326SJeff Roberson 294e7d50326SJeff Roberson static void sched_initticks(void *dummy); 295e7d50326SJeff Roberson SYSINIT(sched_initticks, SI_SUB_CLOCKS, SI_ORDER_THIRD, sched_initticks, NULL) 296e7d50326SJeff Roberson 2971e516cf5SJeff Roberson static inline void 2981e516cf5SJeff Roberson sched_pin_td(struct thread *td) 2991e516cf5SJeff Roberson { 3001e516cf5SJeff Roberson td->td_pinned++; 3011e516cf5SJeff Roberson } 3021e516cf5SJeff Roberson 3031e516cf5SJeff Roberson static inline void 3041e516cf5SJeff Roberson sched_unpin_td(struct thread *td) 3051e516cf5SJeff Roberson { 3061e516cf5SJeff Roberson td->td_pinned--; 3071e516cf5SJeff Roberson } 3081e516cf5SJeff Roberson 309e7d50326SJeff Roberson static void 310e7d50326SJeff Roberson runq_print(struct runq *rq) 311e7d50326SJeff Roberson { 312e7d50326SJeff Roberson struct rqhead *rqh; 313e7d50326SJeff Roberson struct td_sched *ts; 314e7d50326SJeff Roberson int pri; 315e7d50326SJeff Roberson int j; 316e7d50326SJeff Roberson int i; 317e7d50326SJeff Roberson 318e7d50326SJeff Roberson for (i = 0; i < RQB_LEN; i++) { 319e7d50326SJeff Roberson printf("\t\trunq bits %d 0x%zx\n", 320e7d50326SJeff Roberson i, rq->rq_status.rqb_bits[i]); 321e7d50326SJeff Roberson for (j = 0; j < RQB_BPW; j++) 322e7d50326SJeff Roberson if (rq->rq_status.rqb_bits[i] & (1ul << j)) { 323e7d50326SJeff Roberson pri = j + (i << RQB_L2BPW); 324e7d50326SJeff Roberson rqh = &rq->rq_queues[pri]; 325e7d50326SJeff Roberson TAILQ_FOREACH(ts, rqh, ts_procq) { 326e7d50326SJeff Roberson printf("\t\t\ttd %p(%s) priority %d rqindex %d pri %d\n", 327e7d50326SJeff Roberson ts->ts_thread, ts->ts_thread->td_proc->p_comm, ts->ts_thread->td_priority, ts->ts_rqindex, pri); 328e7d50326SJeff Roberson } 329e7d50326SJeff Roberson } 330e7d50326SJeff Roberson } 331e7d50326SJeff Roberson } 332e7d50326SJeff Roberson 33315dc847eSJeff Roberson void 334ad1e7d28SJulian Elischer tdq_print(int cpu) 33515dc847eSJeff Roberson { 336ad1e7d28SJulian Elischer struct tdq *tdq; 33715dc847eSJeff Roberson 338ad1e7d28SJulian Elischer tdq = TDQ_CPU(cpu); 33915dc847eSJeff Roberson 340ad1e7d28SJulian Elischer printf("tdq:\n"); 341d2ad694cSJeff Roberson printf("\tload: %d\n", tdq->tdq_load); 342e7d50326SJeff Roberson printf("\ttimeshare idx: %d\n", tdq->tdq_idx); 3433f872f85SJeff Roberson printf("\ttimeshare ridx: %d\n", tdq->tdq_ridx); 344e7d50326SJeff Roberson printf("\trealtime runq:\n"); 345e7d50326SJeff Roberson runq_print(&tdq->tdq_realtime); 346e7d50326SJeff Roberson printf("\ttimeshare runq:\n"); 347e7d50326SJeff Roberson runq_print(&tdq->tdq_timeshare); 348e7d50326SJeff Roberson printf("\tidle runq:\n"); 349e7d50326SJeff Roberson runq_print(&tdq->tdq_idle); 350ef1134c9SJeff Roberson #ifdef SMP 351d2ad694cSJeff Roberson printf("\tload transferable: %d\n", tdq->tdq_transferable); 352ef1134c9SJeff Roberson #endif 35315dc847eSJeff Roberson } 35415dc847eSJeff Roberson 355155b9987SJeff Roberson static __inline void 356ad1e7d28SJulian Elischer tdq_runq_add(struct tdq *tdq, struct td_sched *ts, int flags) 357155b9987SJeff Roberson { 358155b9987SJeff Roberson #ifdef SMP 359e7d50326SJeff Roberson if (THREAD_CAN_MIGRATE(ts->ts_thread)) { 360d2ad694cSJeff Roberson tdq->tdq_transferable++; 361d2ad694cSJeff Roberson tdq->tdq_group->tdg_transferable++; 362ad1e7d28SJulian Elischer ts->ts_flags |= TSF_XFERABLE; 3637b8bfa0dSJeff Roberson if (tdq->tdq_transferable >= busy_thresh && 3647b8bfa0dSJeff Roberson (tdq->tdq_flags & TDQF_BUSY) == 0) { 3657b8bfa0dSJeff Roberson tdq->tdq_flags |= TDQF_BUSY; 3667b8bfa0dSJeff Roberson atomic_set_int(&tdq_busy, 1 << TDQ_ID(tdq)); 3677b8bfa0dSJeff Roberson } 36880f86c9fSJeff Roberson } 369155b9987SJeff Roberson #endif 370e7d50326SJeff Roberson if (ts->ts_runq == &tdq->tdq_timeshare) { 371ed0e8f2fSJeff Roberson u_char pri; 372e7d50326SJeff Roberson 373e7d50326SJeff Roberson pri = ts->ts_thread->td_priority; 374e7d50326SJeff Roberson KASSERT(pri <= PRI_MAX_TIMESHARE && pri >= PRI_MIN_TIMESHARE, 375e7d50326SJeff Roberson ("Invalid priority %d on timeshare runq", pri)); 376e7d50326SJeff Roberson /* 377e7d50326SJeff Roberson * This queue contains only priorities between MIN and MAX 378e7d50326SJeff Roberson * realtime. Use the whole queue to represent these values. 379e7d50326SJeff Roberson */ 380e7d50326SJeff Roberson #define TS_RQ_PPQ (((PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE) + 1) / RQ_NQS) 381e7d50326SJeff Roberson if ((flags & SRQ_BORROWING) == 0) { 382e7d50326SJeff Roberson pri = (pri - PRI_MIN_TIMESHARE) / TS_RQ_PPQ; 383e7d50326SJeff Roberson pri = (pri + tdq->tdq_idx) % RQ_NQS; 3843f872f85SJeff Roberson /* 3853f872f85SJeff Roberson * This effectively shortens the queue by one so we 3863f872f85SJeff Roberson * can have a one slot difference between idx and 3873f872f85SJeff Roberson * ridx while we wait for threads to drain. 3883f872f85SJeff Roberson */ 3893f872f85SJeff Roberson if (tdq->tdq_ridx != tdq->tdq_idx && 3903f872f85SJeff Roberson pri == tdq->tdq_ridx) 3914499aff6SJeff Roberson pri = (unsigned char)(pri - 1) % RQ_NQS; 392e7d50326SJeff Roberson } else 3933f872f85SJeff Roberson pri = tdq->tdq_ridx; 394e7d50326SJeff Roberson runq_add_pri(ts->ts_runq, ts, pri, flags); 395e7d50326SJeff Roberson } else 396ad1e7d28SJulian Elischer runq_add(ts->ts_runq, ts, flags); 397155b9987SJeff Roberson } 398155b9987SJeff Roberson 399155b9987SJeff Roberson static __inline void 400ad1e7d28SJulian Elischer tdq_runq_rem(struct tdq *tdq, struct td_sched *ts) 401155b9987SJeff Roberson { 402155b9987SJeff Roberson #ifdef SMP 403ad1e7d28SJulian Elischer if (ts->ts_flags & TSF_XFERABLE) { 404d2ad694cSJeff Roberson tdq->tdq_transferable--; 405d2ad694cSJeff Roberson tdq->tdq_group->tdg_transferable--; 406ad1e7d28SJulian Elischer ts->ts_flags &= ~TSF_XFERABLE; 4077b8bfa0dSJeff Roberson if (tdq->tdq_transferable < busy_thresh && 4087b8bfa0dSJeff Roberson (tdq->tdq_flags & TDQF_BUSY)) { 4097b8bfa0dSJeff Roberson atomic_clear_int(&tdq_busy, 1 << TDQ_ID(tdq)); 4107b8bfa0dSJeff Roberson tdq->tdq_flags &= ~TDQF_BUSY; 4117b8bfa0dSJeff Roberson } 41280f86c9fSJeff Roberson } 413155b9987SJeff Roberson #endif 4143f872f85SJeff Roberson if (ts->ts_runq == &tdq->tdq_timeshare) { 4153f872f85SJeff Roberson if (tdq->tdq_idx != tdq->tdq_ridx) 4163f872f85SJeff Roberson runq_remove_idx(ts->ts_runq, ts, &tdq->tdq_ridx); 417e7d50326SJeff Roberson else 4183f872f85SJeff Roberson runq_remove_idx(ts->ts_runq, ts, NULL); 4198ab80cf0SJeff Roberson /* 4208ab80cf0SJeff Roberson * For timeshare threads we update the priority here so 4218ab80cf0SJeff Roberson * the priority reflects the time we've been sleeping. 4228ab80cf0SJeff Roberson */ 4238ab80cf0SJeff Roberson ts->ts_ltick = ticks; 4248ab80cf0SJeff Roberson sched_pctcpu_update(ts); 4258ab80cf0SJeff Roberson sched_priority(ts->ts_thread); 4263f872f85SJeff Roberson } else 427ad1e7d28SJulian Elischer runq_remove(ts->ts_runq, ts); 428155b9987SJeff Roberson } 429155b9987SJeff Roberson 430a8949de2SJeff Roberson static void 431ad1e7d28SJulian Elischer tdq_load_add(struct tdq *tdq, struct td_sched *ts) 4325d7ef00cSJeff Roberson { 433ef1134c9SJeff Roberson int class; 434b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 435ad1e7d28SJulian Elischer class = PRI_BASE(ts->ts_thread->td_pri_class); 436d2ad694cSJeff Roberson tdq->tdq_load++; 437d2ad694cSJeff Roberson CTR1(KTR_SCHED, "load: %d", tdq->tdq_load); 4387b8bfa0dSJeff Roberson if (class != PRI_ITHD && 4397b8bfa0dSJeff Roberson (ts->ts_thread->td_proc->p_flag & P_NOLOAD) == 0) 44033916c36SJeff Roberson #ifdef SMP 441d2ad694cSJeff Roberson tdq->tdq_group->tdg_load++; 44233916c36SJeff Roberson #else 443d2ad694cSJeff Roberson tdq->tdq_sysload++; 444cac77d04SJeff Roberson #endif 4455d7ef00cSJeff Roberson } 44615dc847eSJeff Roberson 447a8949de2SJeff Roberson static void 448ad1e7d28SJulian Elischer tdq_load_rem(struct tdq *tdq, struct td_sched *ts) 4495d7ef00cSJeff Roberson { 450ef1134c9SJeff Roberson int class; 451b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 452ad1e7d28SJulian Elischer class = PRI_BASE(ts->ts_thread->td_pri_class); 4537b8bfa0dSJeff Roberson if (class != PRI_ITHD && 4547b8bfa0dSJeff Roberson (ts->ts_thread->td_proc->p_flag & P_NOLOAD) == 0) 45533916c36SJeff Roberson #ifdef SMP 456d2ad694cSJeff Roberson tdq->tdq_group->tdg_load--; 45733916c36SJeff Roberson #else 458d2ad694cSJeff Roberson tdq->tdq_sysload--; 459cac77d04SJeff Roberson #endif 460d2ad694cSJeff Roberson tdq->tdq_load--; 461d2ad694cSJeff Roberson CTR1(KTR_SCHED, "load: %d", tdq->tdq_load); 462ad1e7d28SJulian Elischer ts->ts_runq = NULL; 46315dc847eSJeff Roberson } 46415dc847eSJeff Roberson 4655d7ef00cSJeff Roberson #ifdef SMP 4663f872f85SJeff Roberson static void 4677b8bfa0dSJeff Roberson sched_smp_tick(struct thread *td) 4683f872f85SJeff Roberson { 4693f872f85SJeff Roberson struct tdq *tdq; 4703f872f85SJeff Roberson 4713f872f85SJeff Roberson tdq = TDQ_SELF(); 4727b8bfa0dSJeff Roberson if (rebalance) { 4733f872f85SJeff Roberson if (ticks >= bal_tick) 4743f872f85SJeff Roberson sched_balance(); 4753f872f85SJeff Roberson if (ticks >= gbal_tick && balance_groups) 4763f872f85SJeff Roberson sched_balance_groups(); 477155b6ca1SJeff Roberson } 4787b8bfa0dSJeff Roberson td->td_sched->ts_rltick = ticks; 4793f872f85SJeff Roberson } 4803f872f85SJeff Roberson 481356500a3SJeff Roberson /* 482155b9987SJeff Roberson * sched_balance is a simple CPU load balancing algorithm. It operates by 483356500a3SJeff Roberson * finding the least loaded and most loaded cpu and equalizing their load 484356500a3SJeff Roberson * by migrating some processes. 485356500a3SJeff Roberson * 486356500a3SJeff Roberson * Dealing only with two CPUs at a time has two advantages. Firstly, most 487356500a3SJeff Roberson * installations will only have 2 cpus. Secondly, load balancing too much at 488356500a3SJeff Roberson * once can have an unpleasant effect on the system. The scheduler rarely has 489356500a3SJeff Roberson * enough information to make perfect decisions. So this algorithm chooses 490356500a3SJeff Roberson * algorithm simplicity and more gradual effects on load in larger systems. 491356500a3SJeff Roberson * 492356500a3SJeff Roberson * It could be improved by considering the priorities and slices assigned to 493356500a3SJeff Roberson * each task prior to balancing them. There are many pathological cases with 494356500a3SJeff Roberson * any approach and so the semi random algorithm below may work as well as any. 495356500a3SJeff Roberson * 496356500a3SJeff Roberson */ 49722bf7d9aSJeff Roberson static void 498dc03363dSJeff Roberson sched_balance(void) 499356500a3SJeff Roberson { 500ad1e7d28SJulian Elischer struct tdq_group *high; 501ad1e7d28SJulian Elischer struct tdq_group *low; 502d2ad694cSJeff Roberson struct tdq_group *tdg; 503cac77d04SJeff Roberson int cnt; 504356500a3SJeff Roberson int i; 505356500a3SJeff Roberson 506598b368dSJeff Roberson bal_tick = ticks + (random() % (hz * 2)); 50786f8ae96SJeff Roberson if (smp_started == 0) 508598b368dSJeff Roberson return; 509cac77d04SJeff Roberson low = high = NULL; 510d2ad694cSJeff Roberson i = random() % (tdg_maxid + 1); 511d2ad694cSJeff Roberson for (cnt = 0; cnt <= tdg_maxid; cnt++) { 512d2ad694cSJeff Roberson tdg = TDQ_GROUP(i); 513cac77d04SJeff Roberson /* 514cac77d04SJeff Roberson * Find the CPU with the highest load that has some 515cac77d04SJeff Roberson * threads to transfer. 516cac77d04SJeff Roberson */ 517d2ad694cSJeff Roberson if ((high == NULL || tdg->tdg_load > high->tdg_load) 518d2ad694cSJeff Roberson && tdg->tdg_transferable) 519d2ad694cSJeff Roberson high = tdg; 520d2ad694cSJeff Roberson if (low == NULL || tdg->tdg_load < low->tdg_load) 521d2ad694cSJeff Roberson low = tdg; 522d2ad694cSJeff Roberson if (++i > tdg_maxid) 523cac77d04SJeff Roberson i = 0; 524cac77d04SJeff Roberson } 525cac77d04SJeff Roberson if (low != NULL && high != NULL && high != low) 526d2ad694cSJeff Roberson sched_balance_pair(LIST_FIRST(&high->tdg_members), 527d2ad694cSJeff Roberson LIST_FIRST(&low->tdg_members)); 528cac77d04SJeff Roberson } 52986f8ae96SJeff Roberson 530cac77d04SJeff Roberson static void 531dc03363dSJeff Roberson sched_balance_groups(void) 532cac77d04SJeff Roberson { 533cac77d04SJeff Roberson int i; 534cac77d04SJeff Roberson 535598b368dSJeff Roberson gbal_tick = ticks + (random() % (hz * 2)); 536dc03363dSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 537cac77d04SJeff Roberson if (smp_started) 538d2ad694cSJeff Roberson for (i = 0; i <= tdg_maxid; i++) 539ad1e7d28SJulian Elischer sched_balance_group(TDQ_GROUP(i)); 540356500a3SJeff Roberson } 541cac77d04SJeff Roberson 542cac77d04SJeff Roberson static void 543d2ad694cSJeff Roberson sched_balance_group(struct tdq_group *tdg) 544cac77d04SJeff Roberson { 545ad1e7d28SJulian Elischer struct tdq *tdq; 546ad1e7d28SJulian Elischer struct tdq *high; 547ad1e7d28SJulian Elischer struct tdq *low; 548cac77d04SJeff Roberson int load; 549cac77d04SJeff Roberson 550d2ad694cSJeff Roberson if (tdg->tdg_transferable == 0) 551cac77d04SJeff Roberson return; 552cac77d04SJeff Roberson low = NULL; 553cac77d04SJeff Roberson high = NULL; 554d2ad694cSJeff Roberson LIST_FOREACH(tdq, &tdg->tdg_members, tdq_siblings) { 555d2ad694cSJeff Roberson load = tdq->tdq_load; 556d2ad694cSJeff Roberson if (high == NULL || load > high->tdq_load) 557ad1e7d28SJulian Elischer high = tdq; 558d2ad694cSJeff Roberson if (low == NULL || load < low->tdq_load) 559ad1e7d28SJulian Elischer low = tdq; 560356500a3SJeff Roberson } 561cac77d04SJeff Roberson if (high != NULL && low != NULL && high != low) 562cac77d04SJeff Roberson sched_balance_pair(high, low); 563356500a3SJeff Roberson } 564cac77d04SJeff Roberson 565cac77d04SJeff Roberson static void 566ad1e7d28SJulian Elischer sched_balance_pair(struct tdq *high, struct tdq *low) 567cac77d04SJeff Roberson { 568cac77d04SJeff Roberson int transferable; 569cac77d04SJeff Roberson int high_load; 570cac77d04SJeff Roberson int low_load; 571cac77d04SJeff Roberson int move; 572cac77d04SJeff Roberson int diff; 573cac77d04SJeff Roberson int i; 574cac77d04SJeff Roberson 57580f86c9fSJeff Roberson /* 57680f86c9fSJeff Roberson * If we're transfering within a group we have to use this specific 577ad1e7d28SJulian Elischer * tdq's transferable count, otherwise we can steal from other members 57880f86c9fSJeff Roberson * of the group. 57980f86c9fSJeff Roberson */ 580d2ad694cSJeff Roberson if (high->tdq_group == low->tdq_group) { 581d2ad694cSJeff Roberson transferable = high->tdq_transferable; 582d2ad694cSJeff Roberson high_load = high->tdq_load; 583d2ad694cSJeff Roberson low_load = low->tdq_load; 584cac77d04SJeff Roberson } else { 585d2ad694cSJeff Roberson transferable = high->tdq_group->tdg_transferable; 586d2ad694cSJeff Roberson high_load = high->tdq_group->tdg_load; 587d2ad694cSJeff Roberson low_load = low->tdq_group->tdg_load; 588cac77d04SJeff Roberson } 58980f86c9fSJeff Roberson if (transferable == 0) 590cac77d04SJeff Roberson return; 591155b9987SJeff Roberson /* 592155b9987SJeff Roberson * Determine what the imbalance is and then adjust that to how many 593d2ad694cSJeff Roberson * threads we actually have to give up (transferable). 594155b9987SJeff Roberson */ 595cac77d04SJeff Roberson diff = high_load - low_load; 596356500a3SJeff Roberson move = diff / 2; 597356500a3SJeff Roberson if (diff & 0x1) 598356500a3SJeff Roberson move++; 59980f86c9fSJeff Roberson move = min(move, transferable); 600356500a3SJeff Roberson for (i = 0; i < move; i++) 601ad1e7d28SJulian Elischer tdq_move(high, TDQ_ID(low)); 602356500a3SJeff Roberson return; 603356500a3SJeff Roberson } 604356500a3SJeff Roberson 60522bf7d9aSJeff Roberson static void 606ad1e7d28SJulian Elischer tdq_move(struct tdq *from, int cpu) 607356500a3SJeff Roberson { 608ad1e7d28SJulian Elischer struct tdq *tdq; 609ad1e7d28SJulian Elischer struct tdq *to; 610ad1e7d28SJulian Elischer struct td_sched *ts; 611356500a3SJeff Roberson 612ad1e7d28SJulian Elischer tdq = from; 613ad1e7d28SJulian Elischer to = TDQ_CPU(cpu); 614ad1e7d28SJulian Elischer ts = tdq_steal(tdq, 1); 615ad1e7d28SJulian Elischer if (ts == NULL) { 616d2ad694cSJeff Roberson struct tdq_group *tdg; 61780f86c9fSJeff Roberson 618d2ad694cSJeff Roberson tdg = tdq->tdq_group; 619d2ad694cSJeff Roberson LIST_FOREACH(tdq, &tdg->tdg_members, tdq_siblings) { 620d2ad694cSJeff Roberson if (tdq == from || tdq->tdq_transferable == 0) 62180f86c9fSJeff Roberson continue; 622ad1e7d28SJulian Elischer ts = tdq_steal(tdq, 1); 62380f86c9fSJeff Roberson break; 62480f86c9fSJeff Roberson } 625ad1e7d28SJulian Elischer if (ts == NULL) 626ad1e7d28SJulian Elischer panic("tdq_move: No threads available with a " 62780f86c9fSJeff Roberson "transferable count of %d\n", 628d2ad694cSJeff Roberson tdg->tdg_transferable); 62980f86c9fSJeff Roberson } 630ad1e7d28SJulian Elischer if (tdq == to) 63180f86c9fSJeff Roberson return; 6327b8bfa0dSJeff Roberson sched_rem(ts->ts_thread); 6337b8bfa0dSJeff Roberson ts->ts_cpu = cpu; 6347b8bfa0dSJeff Roberson sched_pin_td(ts->ts_thread); 6357b8bfa0dSJeff Roberson sched_add(ts->ts_thread, SRQ_YIELDING); 6367b8bfa0dSJeff Roberson sched_unpin_td(ts->ts_thread); 637356500a3SJeff Roberson } 63822bf7d9aSJeff Roberson 63980f86c9fSJeff Roberson static int 640ad1e7d28SJulian Elischer tdq_idled(struct tdq *tdq) 64122bf7d9aSJeff Roberson { 642d2ad694cSJeff Roberson struct tdq_group *tdg; 643ad1e7d28SJulian Elischer struct tdq *steal; 644ad1e7d28SJulian Elischer struct td_sched *ts; 64580f86c9fSJeff Roberson 646d2ad694cSJeff Roberson tdg = tdq->tdq_group; 64780f86c9fSJeff Roberson /* 648d2ad694cSJeff Roberson * If we're in a cpu group, try and steal threads from another cpu in 64980f86c9fSJeff Roberson * the group before idling. 65080f86c9fSJeff Roberson */ 6517b8bfa0dSJeff Roberson if (steal_htt && tdg->tdg_cpus > 1 && tdg->tdg_transferable) { 652d2ad694cSJeff Roberson LIST_FOREACH(steal, &tdg->tdg_members, tdq_siblings) { 653d2ad694cSJeff Roberson if (steal == tdq || steal->tdq_transferable == 0) 65480f86c9fSJeff Roberson continue; 655ad1e7d28SJulian Elischer ts = tdq_steal(steal, 0); 6567b8bfa0dSJeff Roberson if (ts) 6577b8bfa0dSJeff Roberson goto steal; 6587b8bfa0dSJeff Roberson } 6597b8bfa0dSJeff Roberson } 6607b8bfa0dSJeff Roberson if (steal_busy) { 6617b8bfa0dSJeff Roberson while (tdq_busy) { 6627b8bfa0dSJeff Roberson int cpu; 6637b8bfa0dSJeff Roberson 6647b8bfa0dSJeff Roberson cpu = ffs(tdq_busy); 6657b8bfa0dSJeff Roberson if (cpu == 0) 6667b8bfa0dSJeff Roberson break; 6677b8bfa0dSJeff Roberson cpu--; 6687b8bfa0dSJeff Roberson steal = TDQ_CPU(cpu); 6697b8bfa0dSJeff Roberson if (steal->tdq_transferable == 0) 6707b8bfa0dSJeff Roberson continue; 6717b8bfa0dSJeff Roberson ts = tdq_steal(steal, 1); 672ad1e7d28SJulian Elischer if (ts == NULL) 67380f86c9fSJeff Roberson continue; 67414618990SJeff Roberson CTR5(KTR_ULE, 6757b8bfa0dSJeff Roberson "tdq_idled: stealing td %p(%s) pri %d from %d busy 0x%X", 6767b8bfa0dSJeff Roberson ts->ts_thread, ts->ts_thread->td_proc->p_comm, 6777b8bfa0dSJeff Roberson ts->ts_thread->td_priority, cpu, tdq_busy); 6787b8bfa0dSJeff Roberson goto steal; 67980f86c9fSJeff Roberson } 68080f86c9fSJeff Roberson } 68180f86c9fSJeff Roberson /* 68280f86c9fSJeff Roberson * We only set the idled bit when all of the cpus in the group are 683ad1e7d28SJulian Elischer * idle. Otherwise we could get into a situation where a thread bounces 68480f86c9fSJeff Roberson * back and forth between two idle cores on seperate physical CPUs. 68580f86c9fSJeff Roberson */ 686d2ad694cSJeff Roberson tdg->tdg_idlemask |= PCPU_GET(cpumask); 6877b8bfa0dSJeff Roberson if (tdg->tdg_idlemask == tdg->tdg_cpumask) 688d2ad694cSJeff Roberson atomic_set_int(&tdq_idle, tdg->tdg_mask); 68980f86c9fSJeff Roberson return (1); 6907b8bfa0dSJeff Roberson steal: 6917b8bfa0dSJeff Roberson sched_rem(ts->ts_thread); 6927b8bfa0dSJeff Roberson ts->ts_cpu = PCPU_GET(cpuid); 6931e516cf5SJeff Roberson sched_pin_td(ts->ts_thread); 694ad1e7d28SJulian Elischer sched_add(ts->ts_thread, SRQ_YIELDING); 6951e516cf5SJeff Roberson sched_unpin_td(ts->ts_thread); 6967b8bfa0dSJeff Roberson 6977b8bfa0dSJeff Roberson return (0); 69822bf7d9aSJeff Roberson } 69922bf7d9aSJeff Roberson 70022bf7d9aSJeff Roberson static void 7017b8bfa0dSJeff Roberson tdq_notify(struct td_sched *ts) 70222bf7d9aSJeff Roberson { 703fc3a97dcSJeff Roberson struct thread *ctd; 70422bf7d9aSJeff Roberson struct pcpu *pcpu; 705fc3a97dcSJeff Roberson int cpri; 706fc3a97dcSJeff Roberson int pri; 7077b8bfa0dSJeff Roberson int cpu; 70822bf7d9aSJeff Roberson 7097b8bfa0dSJeff Roberson cpu = ts->ts_cpu; 710fc3a97dcSJeff Roberson pri = ts->ts_thread->td_priority; 71122bf7d9aSJeff Roberson pcpu = pcpu_find(cpu); 712fc3a97dcSJeff Roberson ctd = pcpu->pc_curthread; 713fc3a97dcSJeff Roberson cpri = ctd->td_priority; 7146b2f763fSJeff Roberson 7156b2f763fSJeff Roberson /* 7166b2f763fSJeff Roberson * If our priority is not better than the current priority there is 7176b2f763fSJeff Roberson * nothing to do. 7186b2f763fSJeff Roberson */ 719fc3a97dcSJeff Roberson if (pri > cpri) 7206b2f763fSJeff Roberson return; 7217b8bfa0dSJeff Roberson /* 722fc3a97dcSJeff Roberson * Always IPI idle. 7237b8bfa0dSJeff Roberson */ 724fc3a97dcSJeff Roberson if (cpri > PRI_MIN_IDLE) 725fc3a97dcSJeff Roberson goto sendipi; 726fc3a97dcSJeff Roberson /* 727fc3a97dcSJeff Roberson * If we're realtime or better and there is timeshare or worse running 728fc3a97dcSJeff Roberson * send an IPI. 729fc3a97dcSJeff Roberson */ 730fc3a97dcSJeff Roberson if (pri < PRI_MAX_REALTIME && cpri > PRI_MAX_REALTIME) 731fc3a97dcSJeff Roberson goto sendipi; 732fc3a97dcSJeff Roberson /* 733fc3a97dcSJeff Roberson * Otherwise only IPI if we exceed the threshold. 734fc3a97dcSJeff Roberson */ 735fc3a97dcSJeff Roberson if (pri > ipi_thresh) 7367b8bfa0dSJeff Roberson return; 737fc3a97dcSJeff Roberson sendipi: 738fc3a97dcSJeff Roberson ctd->td_flags |= TDF_NEEDRESCHED; 739fc3a97dcSJeff Roberson if (cpri < PRI_MIN_IDLE) { 7406b2f763fSJeff Roberson if (ipi_ast) 74122bf7d9aSJeff Roberson ipi_selected(1 << cpu, IPI_AST); 7426b2f763fSJeff Roberson else if (ipi_preempt) 7437b8bfa0dSJeff Roberson ipi_selected(1 << cpu, IPI_PREEMPT); 74414618990SJeff Roberson } else 74514618990SJeff Roberson ipi_selected(1 << cpu, IPI_PREEMPT); 74622bf7d9aSJeff Roberson } 74722bf7d9aSJeff Roberson 748ad1e7d28SJulian Elischer static struct td_sched * 74922bf7d9aSJeff Roberson runq_steal(struct runq *rq) 75022bf7d9aSJeff Roberson { 75122bf7d9aSJeff Roberson struct rqhead *rqh; 75222bf7d9aSJeff Roberson struct rqbits *rqb; 753ad1e7d28SJulian Elischer struct td_sched *ts; 75422bf7d9aSJeff Roberson int word; 75522bf7d9aSJeff Roberson int bit; 75622bf7d9aSJeff Roberson 75722bf7d9aSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 75822bf7d9aSJeff Roberson rqb = &rq->rq_status; 75922bf7d9aSJeff Roberson for (word = 0; word < RQB_LEN; word++) { 76022bf7d9aSJeff Roberson if (rqb->rqb_bits[word] == 0) 76122bf7d9aSJeff Roberson continue; 76222bf7d9aSJeff Roberson for (bit = 0; bit < RQB_BPW; bit++) { 763a2640c9bSPeter Wemm if ((rqb->rqb_bits[word] & (1ul << bit)) == 0) 76422bf7d9aSJeff Roberson continue; 76522bf7d9aSJeff Roberson rqh = &rq->rq_queues[bit + (word << RQB_L2BPW)]; 766ad1e7d28SJulian Elischer TAILQ_FOREACH(ts, rqh, ts_procq) { 767e7d50326SJeff Roberson if (THREAD_CAN_MIGRATE(ts->ts_thread)) 768ad1e7d28SJulian Elischer return (ts); 76922bf7d9aSJeff Roberson } 77022bf7d9aSJeff Roberson } 77122bf7d9aSJeff Roberson } 77222bf7d9aSJeff Roberson return (NULL); 77322bf7d9aSJeff Roberson } 77422bf7d9aSJeff Roberson 775ad1e7d28SJulian Elischer static struct td_sched * 776ad1e7d28SJulian Elischer tdq_steal(struct tdq *tdq, int stealidle) 77722bf7d9aSJeff Roberson { 778ad1e7d28SJulian Elischer struct td_sched *ts; 77922bf7d9aSJeff Roberson 78080f86c9fSJeff Roberson /* 78180f86c9fSJeff Roberson * Steal from next first to try to get a non-interactive task that 78280f86c9fSJeff Roberson * may not have run for a while. 783e7d50326SJeff Roberson * XXX Need to effect steal order for timeshare threads. 78480f86c9fSJeff Roberson */ 785e7d50326SJeff Roberson if ((ts = runq_steal(&tdq->tdq_realtime)) != NULL) 786ad1e7d28SJulian Elischer return (ts); 787e7d50326SJeff Roberson if ((ts = runq_steal(&tdq->tdq_timeshare)) != NULL) 788ad1e7d28SJulian Elischer return (ts); 78980f86c9fSJeff Roberson if (stealidle) 790d2ad694cSJeff Roberson return (runq_steal(&tdq->tdq_idle)); 79180f86c9fSJeff Roberson return (NULL); 79222bf7d9aSJeff Roberson } 79380f86c9fSJeff Roberson 79480f86c9fSJeff Roberson int 7957b8bfa0dSJeff Roberson tdq_pickidle(struct tdq *tdq, struct td_sched *ts) 79680f86c9fSJeff Roberson { 797d2ad694cSJeff Roberson struct tdq_group *tdg; 7987b8bfa0dSJeff Roberson int self; 79980f86c9fSJeff Roberson int cpu; 80080f86c9fSJeff Roberson 8017b8bfa0dSJeff Roberson self = PCPU_GET(cpuid); 802670c524fSJeff Roberson if (smp_started == 0) 8037b8bfa0dSJeff Roberson return (self); 80480f86c9fSJeff Roberson /* 8057b8bfa0dSJeff Roberson * If the current CPU has idled, just run it here. 806670c524fSJeff Roberson */ 8077b8bfa0dSJeff Roberson if ((tdq->tdq_group->tdg_idlemask & PCPU_GET(cpumask)) != 0) 8087b8bfa0dSJeff Roberson return (self); 80980f86c9fSJeff Roberson /* 8107b8bfa0dSJeff Roberson * Try the last group we ran on. 8117b8bfa0dSJeff Roberson */ 8127b8bfa0dSJeff Roberson tdg = TDQ_CPU(ts->ts_cpu)->tdq_group; 8137b8bfa0dSJeff Roberson cpu = ffs(tdg->tdg_idlemask); 8147b8bfa0dSJeff Roberson if (cpu) 8157b8bfa0dSJeff Roberson return (cpu - 1); 8167b8bfa0dSJeff Roberson /* 8177b8bfa0dSJeff Roberson * Search for an idle group. 81880f86c9fSJeff Roberson */ 819ad1e7d28SJulian Elischer cpu = ffs(tdq_idle); 8207b8bfa0dSJeff Roberson if (cpu) 8217b8bfa0dSJeff Roberson return (cpu - 1); 822598b368dSJeff Roberson /* 8237b8bfa0dSJeff Roberson * XXX If there are no idle groups, check for an idle core. 824598b368dSJeff Roberson */ 82580f86c9fSJeff Roberson /* 8267b8bfa0dSJeff Roberson * No idle CPUs? 82780f86c9fSJeff Roberson */ 8287b8bfa0dSJeff Roberson return (self); 82980f86c9fSJeff Roberson } 8302454aaf5SJeff Roberson 8317b8bfa0dSJeff Roberson static int 8327b8bfa0dSJeff Roberson tdq_pickpri(struct tdq *tdq, struct td_sched *ts, int flags) 8337b8bfa0dSJeff Roberson { 8347b8bfa0dSJeff Roberson struct pcpu *pcpu; 8357b8bfa0dSJeff Roberson int lowpri; 8367b8bfa0dSJeff Roberson int lowcpu; 8377b8bfa0dSJeff Roberson int lowload; 8387b8bfa0dSJeff Roberson int load; 8397b8bfa0dSJeff Roberson int self; 8407b8bfa0dSJeff Roberson int pri; 8417b8bfa0dSJeff Roberson int cpu; 8427b8bfa0dSJeff Roberson 8437b8bfa0dSJeff Roberson self = PCPU_GET(cpuid); 8447b8bfa0dSJeff Roberson if (smp_started == 0) 8457b8bfa0dSJeff Roberson return (self); 8467b8bfa0dSJeff Roberson 8477b8bfa0dSJeff Roberson pri = ts->ts_thread->td_priority; 8487b8bfa0dSJeff Roberson /* 8497b8bfa0dSJeff Roberson * Regardless of affinity, if the last cpu is idle send it there. 8507b8bfa0dSJeff Roberson */ 8517b8bfa0dSJeff Roberson pcpu = pcpu_find(ts->ts_cpu); 8527b8bfa0dSJeff Roberson if (pcpu->pc_curthread->td_priority > PRI_MIN_IDLE) { 85314618990SJeff Roberson CTR5(KTR_ULE, 8547b8bfa0dSJeff Roberson "ts_cpu %d idle, ltick %d ticks %d pri %d curthread %d", 8557b8bfa0dSJeff Roberson ts->ts_cpu, ts->ts_rltick, ticks, pri, 8567b8bfa0dSJeff Roberson pcpu->pc_curthread->td_priority); 8577b8bfa0dSJeff Roberson return (ts->ts_cpu); 8587b8bfa0dSJeff Roberson } 8597b8bfa0dSJeff Roberson /* 8607b8bfa0dSJeff Roberson * If we have affinity, try to place it on the cpu we last ran on. 8617b8bfa0dSJeff Roberson */ 8627b8bfa0dSJeff Roberson if (SCHED_AFFINITY(ts) && pcpu->pc_curthread->td_priority > pri) { 86314618990SJeff Roberson CTR5(KTR_ULE, 8647b8bfa0dSJeff Roberson "affinity for %d, ltick %d ticks %d pri %d curthread %d", 8657b8bfa0dSJeff Roberson ts->ts_cpu, ts->ts_rltick, ticks, pri, 8667b8bfa0dSJeff Roberson pcpu->pc_curthread->td_priority); 8677b8bfa0dSJeff Roberson return (ts->ts_cpu); 8687b8bfa0dSJeff Roberson } 8697b8bfa0dSJeff Roberson /* 8707b8bfa0dSJeff Roberson * Try ourself first; If we're running something lower priority this 8717b8bfa0dSJeff Roberson * may have some locality with the waking thread and execute faster 8727b8bfa0dSJeff Roberson * here. 8737b8bfa0dSJeff Roberson */ 8747b8bfa0dSJeff Roberson if (tryself) { 8757b8bfa0dSJeff Roberson /* 8767b8bfa0dSJeff Roberson * If we're being awoken by an interrupt thread or the waker 8777b8bfa0dSJeff Roberson * is going right to sleep run here as well. 8787b8bfa0dSJeff Roberson */ 8797b8bfa0dSJeff Roberson if ((TDQ_SELF()->tdq_load == 1) && (flags & SRQ_YIELDING || 8807b8bfa0dSJeff Roberson curthread->td_pri_class == PRI_ITHD)) { 88114618990SJeff Roberson CTR2(KTR_ULE, "tryself load %d flags %d", 8827b8bfa0dSJeff Roberson TDQ_SELF()->tdq_load, flags); 8837b8bfa0dSJeff Roberson return (self); 8847b8bfa0dSJeff Roberson } 8857b8bfa0dSJeff Roberson } 8867b8bfa0dSJeff Roberson /* 8877b8bfa0dSJeff Roberson * Look for an idle group. 8887b8bfa0dSJeff Roberson */ 88914618990SJeff Roberson CTR1(KTR_ULE, "tdq_idle %X", tdq_idle); 8907b8bfa0dSJeff Roberson cpu = ffs(tdq_idle); 8917b8bfa0dSJeff Roberson if (cpu) 8927b8bfa0dSJeff Roberson return (cpu - 1); 8937b8bfa0dSJeff Roberson if (tryselfidle && pri < curthread->td_priority) { 89414618990SJeff Roberson CTR1(KTR_ULE, "tryself %d", 8957b8bfa0dSJeff Roberson curthread->td_priority); 8967b8bfa0dSJeff Roberson return (self); 8977b8bfa0dSJeff Roberson } 8987b8bfa0dSJeff Roberson /* 8997b8bfa0dSJeff Roberson * Now search for the cpu running the lowest priority thread with 9007b8bfa0dSJeff Roberson * the least load. 9017b8bfa0dSJeff Roberson */ 9027b8bfa0dSJeff Roberson lowload = 0; 9037b8bfa0dSJeff Roberson lowpri = lowcpu = 0; 9047b8bfa0dSJeff Roberson for (cpu = 0; cpu <= mp_maxid; cpu++) { 9057b8bfa0dSJeff Roberson if (CPU_ABSENT(cpu)) 9067b8bfa0dSJeff Roberson continue; 9077b8bfa0dSJeff Roberson pcpu = pcpu_find(cpu); 9087b8bfa0dSJeff Roberson pri = pcpu->pc_curthread->td_priority; 90914618990SJeff Roberson CTR4(KTR_ULE, 9107b8bfa0dSJeff Roberson "cpu %d pri %d lowcpu %d lowpri %d", 9117b8bfa0dSJeff Roberson cpu, pri, lowcpu, lowpri); 9127b8bfa0dSJeff Roberson if (pri < lowpri) 9137b8bfa0dSJeff Roberson continue; 9147b8bfa0dSJeff Roberson load = TDQ_CPU(cpu)->tdq_load; 9157b8bfa0dSJeff Roberson if (lowpri && lowpri == pri && load > lowload) 9167b8bfa0dSJeff Roberson continue; 9177b8bfa0dSJeff Roberson lowpri = pri; 9187b8bfa0dSJeff Roberson lowcpu = cpu; 9197b8bfa0dSJeff Roberson lowload = load; 9207b8bfa0dSJeff Roberson } 9217b8bfa0dSJeff Roberson 9227b8bfa0dSJeff Roberson return (lowcpu); 92380f86c9fSJeff Roberson } 92480f86c9fSJeff Roberson 92522bf7d9aSJeff Roberson #endif /* SMP */ 92622bf7d9aSJeff Roberson 92722bf7d9aSJeff Roberson /* 92822bf7d9aSJeff Roberson * Pick the highest priority task we have and return it. 9290c0a98b2SJeff Roberson */ 9300c0a98b2SJeff Roberson 931ad1e7d28SJulian Elischer static struct td_sched * 932ad1e7d28SJulian Elischer tdq_choose(struct tdq *tdq) 9335d7ef00cSJeff Roberson { 934ad1e7d28SJulian Elischer struct td_sched *ts; 9355d7ef00cSJeff Roberson 936b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 937a8949de2SJeff Roberson 938e7d50326SJeff Roberson ts = runq_choose(&tdq->tdq_realtime); 939e7d50326SJeff Roberson if (ts != NULL) { 940e7d50326SJeff Roberson KASSERT(ts->ts_thread->td_priority <= PRI_MAX_REALTIME, 941e7d50326SJeff Roberson ("tdq_choose: Invalid priority on realtime queue %d", 942e7d50326SJeff Roberson ts->ts_thread->td_priority)); 943e7d50326SJeff Roberson return (ts); 944a8949de2SJeff Roberson } 9453f872f85SJeff Roberson ts = runq_choose_from(&tdq->tdq_timeshare, tdq->tdq_ridx); 946e7d50326SJeff Roberson if (ts != NULL) { 947e7d50326SJeff Roberson KASSERT(ts->ts_thread->td_priority <= PRI_MAX_TIMESHARE && 948e7d50326SJeff Roberson ts->ts_thread->td_priority >= PRI_MIN_TIMESHARE, 949e7d50326SJeff Roberson ("tdq_choose: Invalid priority on timeshare queue %d", 950e7d50326SJeff Roberson ts->ts_thread->td_priority)); 951ad1e7d28SJulian Elischer return (ts); 95215dc847eSJeff Roberson } 95315dc847eSJeff Roberson 954e7d50326SJeff Roberson ts = runq_choose(&tdq->tdq_idle); 955e7d50326SJeff Roberson if (ts != NULL) { 956e7d50326SJeff Roberson KASSERT(ts->ts_thread->td_priority >= PRI_MIN_IDLE, 957e7d50326SJeff Roberson ("tdq_choose: Invalid priority on idle queue %d", 958e7d50326SJeff Roberson ts->ts_thread->td_priority)); 959e7d50326SJeff Roberson return (ts); 960e7d50326SJeff Roberson } 961e7d50326SJeff Roberson 962e7d50326SJeff Roberson return (NULL); 963245f3abfSJeff Roberson } 9640a016a05SJeff Roberson 9650a016a05SJeff Roberson static void 966ad1e7d28SJulian Elischer tdq_setup(struct tdq *tdq) 9670a016a05SJeff Roberson { 968e7d50326SJeff Roberson runq_init(&tdq->tdq_realtime); 969e7d50326SJeff Roberson runq_init(&tdq->tdq_timeshare); 970d2ad694cSJeff Roberson runq_init(&tdq->tdq_idle); 971d2ad694cSJeff Roberson tdq->tdq_load = 0; 9720a016a05SJeff Roberson } 9730a016a05SJeff Roberson 97435e6168fSJeff Roberson static void 97535e6168fSJeff Roberson sched_setup(void *dummy) 97635e6168fSJeff Roberson { 9770ec896fdSJeff Roberson #ifdef SMP 97835e6168fSJeff Roberson int i; 9790ec896fdSJeff Roberson #endif 98035e6168fSJeff Roberson 981a1d4fe69SDavid Xu /* 982a1d4fe69SDavid Xu * To avoid divide-by-zero, we set realstathz a dummy value 983a1d4fe69SDavid Xu * in case which sched_clock() called before sched_initticks(). 984a1d4fe69SDavid Xu */ 985a1d4fe69SDavid Xu realstathz = hz; 98614618990SJeff Roberson sched_slice = (realstathz/10); /* ~100ms */ 987e7d50326SJeff Roberson tickincr = 1 << SCHED_TICK_SHIFT; 988e1f89c22SJeff Roberson 989356500a3SJeff Roberson #ifdef SMP 990cac77d04SJeff Roberson balance_groups = 0; 99180f86c9fSJeff Roberson /* 992ad1e7d28SJulian Elischer * Initialize the tdqs. 99380f86c9fSJeff Roberson */ 994749d01b0SJeff Roberson for (i = 0; i < MAXCPU; i++) { 995c02bbb43SJeff Roberson struct tdq *tdq; 99680f86c9fSJeff Roberson 997c02bbb43SJeff Roberson tdq = &tdq_cpu[i]; 998ad1e7d28SJulian Elischer tdq_setup(&tdq_cpu[i]); 99980f86c9fSJeff Roberson } 1000fe68a916SKip Macy if (1) { 1001d2ad694cSJeff Roberson struct tdq_group *tdg; 1002c02bbb43SJeff Roberson struct tdq *tdq; 1003598b368dSJeff Roberson int cpus; 100480f86c9fSJeff Roberson 1005598b368dSJeff Roberson for (cpus = 0, i = 0; i < MAXCPU; i++) { 1006598b368dSJeff Roberson if (CPU_ABSENT(i)) 1007598b368dSJeff Roberson continue; 1008c02bbb43SJeff Roberson tdq = &tdq_cpu[i]; 1009d2ad694cSJeff Roberson tdg = &tdq_groups[cpus]; 101080f86c9fSJeff Roberson /* 1011ad1e7d28SJulian Elischer * Setup a tdq group with one member. 101280f86c9fSJeff Roberson */ 1013c02bbb43SJeff Roberson tdq->tdq_transferable = 0; 1014c02bbb43SJeff Roberson tdq->tdq_group = tdg; 1015d2ad694cSJeff Roberson tdg->tdg_cpus = 1; 1016d2ad694cSJeff Roberson tdg->tdg_idlemask = 0; 1017d2ad694cSJeff Roberson tdg->tdg_cpumask = tdg->tdg_mask = 1 << i; 1018d2ad694cSJeff Roberson tdg->tdg_load = 0; 1019d2ad694cSJeff Roberson tdg->tdg_transferable = 0; 1020d2ad694cSJeff Roberson LIST_INIT(&tdg->tdg_members); 1021c02bbb43SJeff Roberson LIST_INSERT_HEAD(&tdg->tdg_members, tdq, tdq_siblings); 1022598b368dSJeff Roberson cpus++; 1023749d01b0SJeff Roberson } 1024d2ad694cSJeff Roberson tdg_maxid = cpus - 1; 1025749d01b0SJeff Roberson } else { 1026d2ad694cSJeff Roberson struct tdq_group *tdg; 102780f86c9fSJeff Roberson struct cpu_group *cg; 1028749d01b0SJeff Roberson int j; 1029749d01b0SJeff Roberson 1030749d01b0SJeff Roberson for (i = 0; i < smp_topology->ct_count; i++) { 1031749d01b0SJeff Roberson cg = &smp_topology->ct_group[i]; 1032d2ad694cSJeff Roberson tdg = &tdq_groups[i]; 103380f86c9fSJeff Roberson /* 103480f86c9fSJeff Roberson * Initialize the group. 103580f86c9fSJeff Roberson */ 1036d2ad694cSJeff Roberson tdg->tdg_idlemask = 0; 1037d2ad694cSJeff Roberson tdg->tdg_load = 0; 1038d2ad694cSJeff Roberson tdg->tdg_transferable = 0; 1039d2ad694cSJeff Roberson tdg->tdg_cpus = cg->cg_count; 1040d2ad694cSJeff Roberson tdg->tdg_cpumask = cg->cg_mask; 1041d2ad694cSJeff Roberson LIST_INIT(&tdg->tdg_members); 104280f86c9fSJeff Roberson /* 104380f86c9fSJeff Roberson * Find all of the group members and add them. 104480f86c9fSJeff Roberson */ 104580f86c9fSJeff Roberson for (j = 0; j < MAXCPU; j++) { 104680f86c9fSJeff Roberson if ((cg->cg_mask & (1 << j)) != 0) { 1047d2ad694cSJeff Roberson if (tdg->tdg_mask == 0) 1048d2ad694cSJeff Roberson tdg->tdg_mask = 1 << j; 1049d2ad694cSJeff Roberson tdq_cpu[j].tdq_transferable = 0; 1050d2ad694cSJeff Roberson tdq_cpu[j].tdq_group = tdg; 1051d2ad694cSJeff Roberson LIST_INSERT_HEAD(&tdg->tdg_members, 1052d2ad694cSJeff Roberson &tdq_cpu[j], tdq_siblings); 105380f86c9fSJeff Roberson } 105480f86c9fSJeff Roberson } 1055d2ad694cSJeff Roberson if (tdg->tdg_cpus > 1) 1056cac77d04SJeff Roberson balance_groups = 1; 1057749d01b0SJeff Roberson } 1058d2ad694cSJeff Roberson tdg_maxid = smp_topology->ct_count - 1; 1059749d01b0SJeff Roberson } 1060cac77d04SJeff Roberson /* 1061cac77d04SJeff Roberson * Stagger the group and global load balancer so they do not 1062cac77d04SJeff Roberson * interfere with each other. 1063cac77d04SJeff Roberson */ 1064dc03363dSJeff Roberson bal_tick = ticks + hz; 1065cac77d04SJeff Roberson if (balance_groups) 1066dc03363dSJeff Roberson gbal_tick = ticks + (hz / 2); 1067749d01b0SJeff Roberson #else 1068ad1e7d28SJulian Elischer tdq_setup(TDQ_SELF()); 1069356500a3SJeff Roberson #endif 1070749d01b0SJeff Roberson mtx_lock_spin(&sched_lock); 1071ad1e7d28SJulian Elischer tdq_load_add(TDQ_SELF(), &td_sched0); 1072749d01b0SJeff Roberson mtx_unlock_spin(&sched_lock); 107335e6168fSJeff Roberson } 107435e6168fSJeff Roberson 1075a1d4fe69SDavid Xu /* ARGSUSED */ 1076a1d4fe69SDavid Xu static void 1077a1d4fe69SDavid Xu sched_initticks(void *dummy) 1078a1d4fe69SDavid Xu { 1079a1d4fe69SDavid Xu mtx_lock_spin(&sched_lock); 1080a1d4fe69SDavid Xu realstathz = stathz ? stathz : hz; 108114618990SJeff Roberson sched_slice = (realstathz/10); /* ~100ms */ 1082a1d4fe69SDavid Xu 1083a1d4fe69SDavid Xu /* 1084e7d50326SJeff Roberson * tickincr is shifted out by 10 to avoid rounding errors due to 10853f872f85SJeff Roberson * hz not being evenly divisible by stathz on all platforms. 1086e7d50326SJeff Roberson */ 1087e7d50326SJeff Roberson tickincr = (hz << SCHED_TICK_SHIFT) / realstathz; 1088e7d50326SJeff Roberson /* 1089e7d50326SJeff Roberson * This does not work for values of stathz that are more than 1090e7d50326SJeff Roberson * 1 << SCHED_TICK_SHIFT * hz. In practice this does not happen. 1091a1d4fe69SDavid Xu */ 1092a1d4fe69SDavid Xu if (tickincr == 0) 1093a1d4fe69SDavid Xu tickincr = 1; 10947b8bfa0dSJeff Roberson #ifdef SMP 10957b8bfa0dSJeff Roberson affinity = SCHED_AFFINITY_DEFAULT; 10967b8bfa0dSJeff Roberson #endif 1097a1d4fe69SDavid Xu mtx_unlock_spin(&sched_lock); 1098a1d4fe69SDavid Xu } 1099a1d4fe69SDavid Xu 1100a1d4fe69SDavid Xu 110135e6168fSJeff Roberson /* 110235e6168fSJeff Roberson * Scale the scheduling priority according to the "interactivity" of this 110335e6168fSJeff Roberson * process. 110435e6168fSJeff Roberson */ 110515dc847eSJeff Roberson static void 11068460a577SJohn Birrell sched_priority(struct thread *td) 110735e6168fSJeff Roberson { 1108e7d50326SJeff Roberson int score; 110935e6168fSJeff Roberson int pri; 111035e6168fSJeff Roberson 11118460a577SJohn Birrell if (td->td_pri_class != PRI_TIMESHARE) 111215dc847eSJeff Roberson return; 1113e7d50326SJeff Roberson /* 1114e7d50326SJeff Roberson * If the score is interactive we place the thread in the realtime 1115e7d50326SJeff Roberson * queue with a priority that is less than kernel and interrupt 1116e7d50326SJeff Roberson * priorities. These threads are not subject to nice restrictions. 1117e7d50326SJeff Roberson * 1118e7d50326SJeff Roberson * Scores greater than this are placed on the normal realtime queue 1119e7d50326SJeff Roberson * where the priority is partially decided by the most recent cpu 1120e7d50326SJeff Roberson * utilization and the rest is decided by nice value. 1121e7d50326SJeff Roberson */ 1122e7d50326SJeff Roberson score = sched_interact_score(td); 1123e7d50326SJeff Roberson if (score < sched_interact) { 1124e7d50326SJeff Roberson pri = PRI_MIN_REALTIME; 1125e7d50326SJeff Roberson pri += ((PRI_MAX_REALTIME - PRI_MIN_REALTIME) / sched_interact) 1126e7d50326SJeff Roberson * score; 1127e7d50326SJeff Roberson KASSERT(pri >= PRI_MIN_REALTIME && pri <= PRI_MAX_REALTIME, 11289a93305aSJeff Roberson ("sched_priority: invalid interactive priority %d score %d", 11299a93305aSJeff Roberson pri, score)); 1130e7d50326SJeff Roberson } else { 1131e7d50326SJeff Roberson pri = SCHED_PRI_MIN; 1132e7d50326SJeff Roberson if (td->td_sched->ts_ticks) 1133e7d50326SJeff Roberson pri += SCHED_PRI_TICKS(td->td_sched); 1134e7d50326SJeff Roberson pri += SCHED_PRI_NICE(td->td_proc->p_nice); 11358ab80cf0SJeff Roberson if (!(pri >= PRI_MIN_TIMESHARE && pri <= PRI_MAX_TIMESHARE)) { 11368ab80cf0SJeff Roberson static int once = 1; 11378ab80cf0SJeff Roberson if (once) { 11388ab80cf0SJeff Roberson printf("sched_priority: invalid priority %d", 11398ab80cf0SJeff Roberson pri); 11408ab80cf0SJeff Roberson printf("nice %d, ticks %d ftick %d ltick %d tick pri %d\n", 11418ab80cf0SJeff Roberson td->td_proc->p_nice, 11428ab80cf0SJeff Roberson td->td_sched->ts_ticks, 11438ab80cf0SJeff Roberson td->td_sched->ts_ftick, 11448ab80cf0SJeff Roberson td->td_sched->ts_ltick, 11458ab80cf0SJeff Roberson SCHED_PRI_TICKS(td->td_sched)); 11468ab80cf0SJeff Roberson once = 0; 11478ab80cf0SJeff Roberson } 11488ab80cf0SJeff Roberson pri = min(max(pri, PRI_MIN_TIMESHARE), 11498ab80cf0SJeff Roberson PRI_MAX_TIMESHARE); 11508ab80cf0SJeff Roberson } 1151e7d50326SJeff Roberson } 11528460a577SJohn Birrell sched_user_prio(td, pri); 115335e6168fSJeff Roberson 115415dc847eSJeff Roberson return; 115535e6168fSJeff Roberson } 115635e6168fSJeff Roberson 115735e6168fSJeff Roberson /* 1158d322132cSJeff Roberson * This routine enforces a maximum limit on the amount of scheduling history 1159d322132cSJeff Roberson * kept. It is called after either the slptime or runtime is adjusted. 1160d322132cSJeff Roberson */ 11614b60e324SJeff Roberson static void 11628460a577SJohn Birrell sched_interact_update(struct thread *td) 11634b60e324SJeff Roberson { 1164155b6ca1SJeff Roberson struct td_sched *ts; 11659a93305aSJeff Roberson u_int sum; 11663f741ca1SJeff Roberson 1167155b6ca1SJeff Roberson ts = td->td_sched; 1168155b6ca1SJeff Roberson sum = ts->skg_runtime + ts->skg_slptime; 1169d322132cSJeff Roberson if (sum < SCHED_SLP_RUN_MAX) 1170d322132cSJeff Roberson return; 1171d322132cSJeff Roberson /* 1172155b6ca1SJeff Roberson * This only happens from two places: 1173155b6ca1SJeff Roberson * 1) We have added an unusual amount of run time from fork_exit. 1174155b6ca1SJeff Roberson * 2) We have added an unusual amount of sleep time from sched_sleep(). 1175155b6ca1SJeff Roberson */ 1176155b6ca1SJeff Roberson if (sum > SCHED_SLP_RUN_MAX * 2) { 1177155b6ca1SJeff Roberson if (ts->skg_runtime > ts->skg_slptime) { 1178155b6ca1SJeff Roberson ts->skg_runtime = SCHED_SLP_RUN_MAX; 1179155b6ca1SJeff Roberson ts->skg_slptime = 1; 1180155b6ca1SJeff Roberson } else { 1181155b6ca1SJeff Roberson ts->skg_slptime = SCHED_SLP_RUN_MAX; 1182155b6ca1SJeff Roberson ts->skg_runtime = 1; 1183155b6ca1SJeff Roberson } 1184155b6ca1SJeff Roberson return; 1185155b6ca1SJeff Roberson } 1186155b6ca1SJeff Roberson /* 1187d322132cSJeff Roberson * If we have exceeded by more than 1/5th then the algorithm below 1188d322132cSJeff Roberson * will not bring us back into range. Dividing by two here forces 11892454aaf5SJeff Roberson * us into the range of [4/5 * SCHED_INTERACT_MAX, SCHED_INTERACT_MAX] 1190d322132cSJeff Roberson */ 119137a35e4aSJeff Roberson if (sum > (SCHED_SLP_RUN_MAX / 5) * 6) { 1192155b6ca1SJeff Roberson ts->skg_runtime /= 2; 1193155b6ca1SJeff Roberson ts->skg_slptime /= 2; 1194d322132cSJeff Roberson return; 1195d322132cSJeff Roberson } 1196155b6ca1SJeff Roberson ts->skg_runtime = (ts->skg_runtime / 5) * 4; 1197155b6ca1SJeff Roberson ts->skg_slptime = (ts->skg_slptime / 5) * 4; 1198d322132cSJeff Roberson } 1199d322132cSJeff Roberson 1200d322132cSJeff Roberson static void 12018460a577SJohn Birrell sched_interact_fork(struct thread *td) 1202d322132cSJeff Roberson { 1203d322132cSJeff Roberson int ratio; 1204d322132cSJeff Roberson int sum; 1205d322132cSJeff Roberson 12068460a577SJohn Birrell sum = td->td_sched->skg_runtime + td->td_sched->skg_slptime; 1207d322132cSJeff Roberson if (sum > SCHED_SLP_RUN_FORK) { 1208d322132cSJeff Roberson ratio = sum / SCHED_SLP_RUN_FORK; 12098460a577SJohn Birrell td->td_sched->skg_runtime /= ratio; 12108460a577SJohn Birrell td->td_sched->skg_slptime /= ratio; 12114b60e324SJeff Roberson } 12124b60e324SJeff Roberson } 12134b60e324SJeff Roberson 1214e1f89c22SJeff Roberson static int 12158460a577SJohn Birrell sched_interact_score(struct thread *td) 1216e1f89c22SJeff Roberson { 1217210491d3SJeff Roberson int div; 1218e1f89c22SJeff Roberson 12198460a577SJohn Birrell if (td->td_sched->skg_runtime > td->td_sched->skg_slptime) { 12208460a577SJohn Birrell div = max(1, td->td_sched->skg_runtime / SCHED_INTERACT_HALF); 1221210491d3SJeff Roberson return (SCHED_INTERACT_HALF + 12228460a577SJohn Birrell (SCHED_INTERACT_HALF - (td->td_sched->skg_slptime / div))); 12238460a577SJohn Birrell } if (td->td_sched->skg_slptime > td->td_sched->skg_runtime) { 12248460a577SJohn Birrell div = max(1, td->td_sched->skg_slptime / SCHED_INTERACT_HALF); 12258460a577SJohn Birrell return (td->td_sched->skg_runtime / div); 1226e1f89c22SJeff Roberson } 1227e1f89c22SJeff Roberson 1228210491d3SJeff Roberson /* 1229210491d3SJeff Roberson * This can happen if slptime and runtime are 0. 1230210491d3SJeff Roberson */ 1231210491d3SJeff Roberson return (0); 1232e1f89c22SJeff Roberson 1233e1f89c22SJeff Roberson } 1234e1f89c22SJeff Roberson 123515dc847eSJeff Roberson /* 1236e7d50326SJeff Roberson * Called from proc0_init() to bootstrap the scheduler. 1237ed062c8dSJulian Elischer */ 1238ed062c8dSJulian Elischer void 1239ed062c8dSJulian Elischer schedinit(void) 1240ed062c8dSJulian Elischer { 1241e7d50326SJeff Roberson 1242ed062c8dSJulian Elischer /* 1243ed062c8dSJulian Elischer * Set up the scheduler specific parts of proc0. 1244ed062c8dSJulian Elischer */ 1245ed062c8dSJulian Elischer proc0.p_sched = NULL; /* XXX */ 1246ad1e7d28SJulian Elischer thread0.td_sched = &td_sched0; 1247e7d50326SJeff Roberson td_sched0.ts_ltick = ticks; 12488ab80cf0SJeff Roberson td_sched0.ts_ftick = ticks; 1249ad1e7d28SJulian Elischer td_sched0.ts_thread = &thread0; 1250ed062c8dSJulian Elischer } 1251ed062c8dSJulian Elischer 1252ed062c8dSJulian Elischer /* 125315dc847eSJeff Roberson * This is only somewhat accurate since given many processes of the same 125415dc847eSJeff Roberson * priority they will switch when their slices run out, which will be 1255e7d50326SJeff Roberson * at most sched_slice stathz ticks. 125615dc847eSJeff Roberson */ 125735e6168fSJeff Roberson int 125835e6168fSJeff Roberson sched_rr_interval(void) 125935e6168fSJeff Roberson { 1260e7d50326SJeff Roberson 1261e7d50326SJeff Roberson /* Convert sched_slice to hz */ 1262e7d50326SJeff Roberson return (hz/(realstathz/sched_slice)); 126335e6168fSJeff Roberson } 126435e6168fSJeff Roberson 126522bf7d9aSJeff Roberson static void 1266ad1e7d28SJulian Elischer sched_pctcpu_update(struct td_sched *ts) 126735e6168fSJeff Roberson { 1268e7d50326SJeff Roberson 1269e7d50326SJeff Roberson if (ts->ts_ticks == 0) 1270e7d50326SJeff Roberson return; 12718ab80cf0SJeff Roberson if (ticks - (hz / 10) < ts->ts_ltick && 12728ab80cf0SJeff Roberson SCHED_TICK_TOTAL(ts) < SCHED_TICK_MAX) 12738ab80cf0SJeff Roberson return; 127435e6168fSJeff Roberson /* 127535e6168fSJeff Roberson * Adjust counters and watermark for pctcpu calc. 1276210491d3SJeff Roberson */ 1277e7d50326SJeff Roberson if (ts->ts_ltick > ticks - SCHED_TICK_TARG) 1278ad1e7d28SJulian Elischer ts->ts_ticks = (ts->ts_ticks / (ticks - ts->ts_ftick)) * 1279e7d50326SJeff Roberson SCHED_TICK_TARG; 1280e7d50326SJeff Roberson else 1281ad1e7d28SJulian Elischer ts->ts_ticks = 0; 1282ad1e7d28SJulian Elischer ts->ts_ltick = ticks; 1283e7d50326SJeff Roberson ts->ts_ftick = ts->ts_ltick - SCHED_TICK_TARG; 128435e6168fSJeff Roberson } 128535e6168fSJeff Roberson 1286e7d50326SJeff Roberson static void 1287f5c157d9SJohn Baldwin sched_thread_priority(struct thread *td, u_char prio) 128835e6168fSJeff Roberson { 1289ad1e7d28SJulian Elischer struct td_sched *ts; 129035e6168fSJeff Roberson 129181d47d3fSJeff Roberson CTR6(KTR_SCHED, "sched_prio: %p(%s) prio %d newprio %d by %p(%s)", 129281d47d3fSJeff Roberson td, td->td_proc->p_comm, td->td_priority, prio, curthread, 129381d47d3fSJeff Roberson curthread->td_proc->p_comm); 1294ad1e7d28SJulian Elischer ts = td->td_sched; 129535e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1296f5c157d9SJohn Baldwin if (td->td_priority == prio) 1297f5c157d9SJohn Baldwin return; 1298e7d50326SJeff Roberson 12993f872f85SJeff Roberson if (TD_ON_RUNQ(td) && prio < td->td_priority) { 13003f741ca1SJeff Roberson /* 13013f741ca1SJeff Roberson * If the priority has been elevated due to priority 13023f741ca1SJeff Roberson * propagation, we may have to move ourselves to a new 1303e7d50326SJeff Roberson * queue. This could be optimized to not re-add in some 1304e7d50326SJeff Roberson * cases. 1305f2b74cbfSJeff Roberson */ 1306e7d50326SJeff Roberson sched_rem(td); 1307e7d50326SJeff Roberson td->td_priority = prio; 1308e7d50326SJeff Roberson sched_add(td, SRQ_BORROWING); 13093f741ca1SJeff Roberson } else 13103f741ca1SJeff Roberson td->td_priority = prio; 131135e6168fSJeff Roberson } 131235e6168fSJeff Roberson 1313f5c157d9SJohn Baldwin /* 1314f5c157d9SJohn Baldwin * Update a thread's priority when it is lent another thread's 1315f5c157d9SJohn Baldwin * priority. 1316f5c157d9SJohn Baldwin */ 1317f5c157d9SJohn Baldwin void 1318f5c157d9SJohn Baldwin sched_lend_prio(struct thread *td, u_char prio) 1319f5c157d9SJohn Baldwin { 1320f5c157d9SJohn Baldwin 1321f5c157d9SJohn Baldwin td->td_flags |= TDF_BORROWING; 1322f5c157d9SJohn Baldwin sched_thread_priority(td, prio); 1323f5c157d9SJohn Baldwin } 1324f5c157d9SJohn Baldwin 1325f5c157d9SJohn Baldwin /* 1326f5c157d9SJohn Baldwin * Restore a thread's priority when priority propagation is 1327f5c157d9SJohn Baldwin * over. The prio argument is the minimum priority the thread 1328f5c157d9SJohn Baldwin * needs to have to satisfy other possible priority lending 1329f5c157d9SJohn Baldwin * requests. If the thread's regular priority is less 1330f5c157d9SJohn Baldwin * important than prio, the thread will keep a priority boost 1331f5c157d9SJohn Baldwin * of prio. 1332f5c157d9SJohn Baldwin */ 1333f5c157d9SJohn Baldwin void 1334f5c157d9SJohn Baldwin sched_unlend_prio(struct thread *td, u_char prio) 1335f5c157d9SJohn Baldwin { 1336f5c157d9SJohn Baldwin u_char base_pri; 1337f5c157d9SJohn Baldwin 1338f5c157d9SJohn Baldwin if (td->td_base_pri >= PRI_MIN_TIMESHARE && 1339f5c157d9SJohn Baldwin td->td_base_pri <= PRI_MAX_TIMESHARE) 13408460a577SJohn Birrell base_pri = td->td_user_pri; 1341f5c157d9SJohn Baldwin else 1342f5c157d9SJohn Baldwin base_pri = td->td_base_pri; 1343f5c157d9SJohn Baldwin if (prio >= base_pri) { 1344f5c157d9SJohn Baldwin td->td_flags &= ~TDF_BORROWING; 1345f5c157d9SJohn Baldwin sched_thread_priority(td, base_pri); 1346f5c157d9SJohn Baldwin } else 1347f5c157d9SJohn Baldwin sched_lend_prio(td, prio); 1348f5c157d9SJohn Baldwin } 1349f5c157d9SJohn Baldwin 1350f5c157d9SJohn Baldwin void 1351f5c157d9SJohn Baldwin sched_prio(struct thread *td, u_char prio) 1352f5c157d9SJohn Baldwin { 1353f5c157d9SJohn Baldwin u_char oldprio; 1354f5c157d9SJohn Baldwin 1355f5c157d9SJohn Baldwin /* First, update the base priority. */ 1356f5c157d9SJohn Baldwin td->td_base_pri = prio; 1357f5c157d9SJohn Baldwin 1358f5c157d9SJohn Baldwin /* 135950aaa791SJohn Baldwin * If the thread is borrowing another thread's priority, don't 1360f5c157d9SJohn Baldwin * ever lower the priority. 1361f5c157d9SJohn Baldwin */ 1362f5c157d9SJohn Baldwin if (td->td_flags & TDF_BORROWING && td->td_priority < prio) 1363f5c157d9SJohn Baldwin return; 1364f5c157d9SJohn Baldwin 1365f5c157d9SJohn Baldwin /* Change the real priority. */ 1366f5c157d9SJohn Baldwin oldprio = td->td_priority; 1367f5c157d9SJohn Baldwin sched_thread_priority(td, prio); 1368f5c157d9SJohn Baldwin 1369f5c157d9SJohn Baldwin /* 1370f5c157d9SJohn Baldwin * If the thread is on a turnstile, then let the turnstile update 1371f5c157d9SJohn Baldwin * its state. 1372f5c157d9SJohn Baldwin */ 1373f5c157d9SJohn Baldwin if (TD_ON_LOCK(td) && oldprio != prio) 1374f5c157d9SJohn Baldwin turnstile_adjust(td, oldprio); 1375f5c157d9SJohn Baldwin } 1376f5c157d9SJohn Baldwin 137735e6168fSJeff Roberson void 13788460a577SJohn Birrell sched_user_prio(struct thread *td, u_char prio) 13793db720fdSDavid Xu { 13803db720fdSDavid Xu u_char oldprio; 13813db720fdSDavid Xu 13828460a577SJohn Birrell td->td_base_user_pri = prio; 1383fc6c30f6SJulian Elischer if (td->td_flags & TDF_UBORROWING && td->td_user_pri <= prio) 1384fc6c30f6SJulian Elischer return; 13858460a577SJohn Birrell oldprio = td->td_user_pri; 13868460a577SJohn Birrell td->td_user_pri = prio; 13873db720fdSDavid Xu 13883db720fdSDavid Xu if (TD_ON_UPILOCK(td) && oldprio != prio) 13893db720fdSDavid Xu umtx_pi_adjust(td, oldprio); 13903db720fdSDavid Xu } 13913db720fdSDavid Xu 13923db720fdSDavid Xu void 13933db720fdSDavid Xu sched_lend_user_prio(struct thread *td, u_char prio) 13943db720fdSDavid Xu { 13953db720fdSDavid Xu u_char oldprio; 13963db720fdSDavid Xu 13973db720fdSDavid Xu td->td_flags |= TDF_UBORROWING; 13983db720fdSDavid Xu 1399f645b5daSMaxim Konovalov oldprio = td->td_user_pri; 14008460a577SJohn Birrell td->td_user_pri = prio; 14013db720fdSDavid Xu 14023db720fdSDavid Xu if (TD_ON_UPILOCK(td) && oldprio != prio) 14033db720fdSDavid Xu umtx_pi_adjust(td, oldprio); 14043db720fdSDavid Xu } 14053db720fdSDavid Xu 14063db720fdSDavid Xu void 14073db720fdSDavid Xu sched_unlend_user_prio(struct thread *td, u_char prio) 14083db720fdSDavid Xu { 14093db720fdSDavid Xu u_char base_pri; 14103db720fdSDavid Xu 14118460a577SJohn Birrell base_pri = td->td_base_user_pri; 14123db720fdSDavid Xu if (prio >= base_pri) { 14133db720fdSDavid Xu td->td_flags &= ~TDF_UBORROWING; 14148460a577SJohn Birrell sched_user_prio(td, base_pri); 14153db720fdSDavid Xu } else 14163db720fdSDavid Xu sched_lend_user_prio(td, prio); 14173db720fdSDavid Xu } 14183db720fdSDavid Xu 14193db720fdSDavid Xu void 14203389af30SJulian Elischer sched_switch(struct thread *td, struct thread *newtd, int flags) 142135e6168fSJeff Roberson { 1422c02bbb43SJeff Roberson struct tdq *tdq; 1423ad1e7d28SJulian Elischer struct td_sched *ts; 14247b8bfa0dSJeff Roberson int preempt; 142535e6168fSJeff Roberson 142635e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 142735e6168fSJeff Roberson 14287b8bfa0dSJeff Roberson preempt = flags & SW_PREEMPT; 1429c02bbb43SJeff Roberson tdq = TDQ_SELF(); 1430e7d50326SJeff Roberson ts = td->td_sched; 1431060563ecSJulian Elischer td->td_lastcpu = td->td_oncpu; 1432060563ecSJulian Elischer td->td_oncpu = NOCPU; 143352eb8464SJohn Baldwin td->td_flags &= ~TDF_NEEDRESCHED; 143477918643SStephan Uphoff td->td_owepreempt = 0; 1435b11fdad0SJeff Roberson /* 1436ad1e7d28SJulian Elischer * If the thread has been assigned it may be in the process of switching 1437b11fdad0SJeff Roberson * to the new cpu. This is the case in sched_bind(). 1438b11fdad0SJeff Roberson */ 1439486a9414SJulian Elischer if (TD_IS_IDLETHREAD(td)) { 1440bf0acc27SJohn Baldwin TD_SET_CAN_RUN(td); 14417b8bfa0dSJeff Roberson } else { 1442c02bbb43SJeff Roberson tdq_load_rem(tdq, ts); 1443ed062c8dSJulian Elischer if (TD_IS_RUNNING(td)) { 1444f2b74cbfSJeff Roberson /* 1445ed062c8dSJulian Elischer * Don't allow the thread to migrate 1446ed062c8dSJulian Elischer * from a preemption. 1447f2b74cbfSJeff Roberson */ 14487b8bfa0dSJeff Roberson if (preempt) 14491e516cf5SJeff Roberson sched_pin_td(td); 14507a5e5e2aSJeff Roberson sched_add(td, preempt ? 1451598b368dSJeff Roberson SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED : 1452598b368dSJeff Roberson SRQ_OURSELF|SRQ_YIELDING); 14537b8bfa0dSJeff Roberson if (preempt) 14541e516cf5SJeff Roberson sched_unpin_td(td); 14558460a577SJohn Birrell } 1456ed062c8dSJulian Elischer } 1457d39063f2SJulian Elischer if (newtd != NULL) { 1458c20c691bSJulian Elischer /* 14596680bbd5SJeff Roberson * If we bring in a thread account for it as if it had been 14606680bbd5SJeff Roberson * added to the run queue and then chosen. 1461c20c691bSJulian Elischer */ 1462c20c691bSJulian Elischer TD_SET_RUNNING(newtd); 1463ad1e7d28SJulian Elischer tdq_load_add(TDQ_SELF(), newtd->td_sched); 1464d39063f2SJulian Elischer } else 14652454aaf5SJeff Roberson newtd = choosethread(); 1466ebccf1e3SJoseph Koshy if (td != newtd) { 1467ebccf1e3SJoseph Koshy #ifdef HWPMC_HOOKS 1468ebccf1e3SJoseph Koshy if (PMC_PROC_IS_USING_PMCS(td->td_proc)) 1469ebccf1e3SJoseph Koshy PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT); 1470ebccf1e3SJoseph Koshy #endif 14718460a577SJohn Birrell 1472ae53b483SJeff Roberson cpu_switch(td, newtd); 1473ebccf1e3SJoseph Koshy #ifdef HWPMC_HOOKS 1474ebccf1e3SJoseph Koshy if (PMC_PROC_IS_USING_PMCS(td->td_proc)) 1475ebccf1e3SJoseph Koshy PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN); 1476ebccf1e3SJoseph Koshy #endif 1477ebccf1e3SJoseph Koshy } 1478ae53b483SJeff Roberson sched_lock.mtx_lock = (uintptr_t)td; 1479060563ecSJulian Elischer td->td_oncpu = PCPU_GET(cpuid); 148035e6168fSJeff Roberson } 148135e6168fSJeff Roberson 148235e6168fSJeff Roberson void 1483fa885116SJulian Elischer sched_nice(struct proc *p, int nice) 148435e6168fSJeff Roberson { 148535e6168fSJeff Roberson struct thread *td; 148635e6168fSJeff Roberson 1487fa885116SJulian Elischer PROC_LOCK_ASSERT(p, MA_OWNED); 14880b5318c8SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 1489e7d50326SJeff Roberson 1490fa885116SJulian Elischer p->p_nice = nice; 14918460a577SJohn Birrell FOREACH_THREAD_IN_PROC(p, td) { 14928460a577SJohn Birrell sched_priority(td); 1493e7d50326SJeff Roberson sched_prio(td, td->td_base_user_pri); 149435e6168fSJeff Roberson } 1495fa885116SJulian Elischer } 149635e6168fSJeff Roberson 149735e6168fSJeff Roberson void 149844f3b092SJohn Baldwin sched_sleep(struct thread *td) 149935e6168fSJeff Roberson { 1500e7d50326SJeff Roberson 150135e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 150235e6168fSJeff Roberson 1503ad1e7d28SJulian Elischer td->td_sched->ts_slptime = ticks; 150435e6168fSJeff Roberson } 150535e6168fSJeff Roberson 150635e6168fSJeff Roberson void 150735e6168fSJeff Roberson sched_wakeup(struct thread *td) 150835e6168fSJeff Roberson { 150914618990SJeff Roberson struct td_sched *ts; 1510e7d50326SJeff Roberson int slptime; 1511e7d50326SJeff Roberson 151235e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 151314618990SJeff Roberson ts = td->td_sched; 151435e6168fSJeff Roberson /* 1515e7d50326SJeff Roberson * If we slept for more than a tick update our interactivity and 1516e7d50326SJeff Roberson * priority. 151735e6168fSJeff Roberson */ 151814618990SJeff Roberson slptime = ts->ts_slptime; 151914618990SJeff Roberson ts->ts_slptime = 0; 1520e7d50326SJeff Roberson if (slptime && slptime != ticks) { 15219a93305aSJeff Roberson u_int hzticks; 1522f1e8dc4aSJeff Roberson 1523e7d50326SJeff Roberson hzticks = (ticks - slptime) << SCHED_TICK_SHIFT; 152414618990SJeff Roberson ts->skg_slptime += hzticks; 15258460a577SJohn Birrell sched_interact_update(td); 152614618990SJeff Roberson sched_pctcpu_update(ts); 15278460a577SJohn Birrell sched_priority(td); 1528f1e8dc4aSJeff Roberson } 152914618990SJeff Roberson /* Reset the slice value after we sleep. */ 153014618990SJeff Roberson ts->ts_slice = sched_slice; 15317a5e5e2aSJeff Roberson sched_add(td, SRQ_BORING); 153235e6168fSJeff Roberson } 153335e6168fSJeff Roberson 153435e6168fSJeff Roberson /* 153535e6168fSJeff Roberson * Penalize the parent for creating a new child and initialize the child's 153635e6168fSJeff Roberson * priority. 153735e6168fSJeff Roberson */ 153835e6168fSJeff Roberson void 15398460a577SJohn Birrell sched_fork(struct thread *td, struct thread *child) 154015dc847eSJeff Roberson { 15418460a577SJohn Birrell mtx_assert(&sched_lock, MA_OWNED); 1542ad1e7d28SJulian Elischer sched_fork_thread(td, child); 1543e7d50326SJeff Roberson /* 1544e7d50326SJeff Roberson * Penalize the parent and child for forking. 1545e7d50326SJeff Roberson */ 1546e7d50326SJeff Roberson sched_interact_fork(child); 1547e7d50326SJeff Roberson sched_priority(child); 1548e7d50326SJeff Roberson td->td_sched->skg_runtime += tickincr; 1549e7d50326SJeff Roberson sched_interact_update(td); 1550e7d50326SJeff Roberson sched_priority(td); 1551ad1e7d28SJulian Elischer } 1552ad1e7d28SJulian Elischer 1553ad1e7d28SJulian Elischer void 1554ad1e7d28SJulian Elischer sched_fork_thread(struct thread *td, struct thread *child) 1555ad1e7d28SJulian Elischer { 1556ad1e7d28SJulian Elischer struct td_sched *ts; 1557ad1e7d28SJulian Elischer struct td_sched *ts2; 15588460a577SJohn Birrell 1559e7d50326SJeff Roberson /* 1560e7d50326SJeff Roberson * Initialize child. 1561e7d50326SJeff Roberson */ 1562ed062c8dSJulian Elischer sched_newthread(child); 1563ad1e7d28SJulian Elischer ts = td->td_sched; 1564ad1e7d28SJulian Elischer ts2 = child->td_sched; 1565ad1e7d28SJulian Elischer ts2->ts_cpu = ts->ts_cpu; 1566ad1e7d28SJulian Elischer ts2->ts_runq = NULL; 1567e7d50326SJeff Roberson /* 1568e7d50326SJeff Roberson * Grab our parents cpu estimation information and priority. 1569e7d50326SJeff Roberson */ 1570ad1e7d28SJulian Elischer ts2->ts_ticks = ts->ts_ticks; 1571ad1e7d28SJulian Elischer ts2->ts_ltick = ts->ts_ltick; 1572ad1e7d28SJulian Elischer ts2->ts_ftick = ts->ts_ftick; 1573e7d50326SJeff Roberson child->td_user_pri = td->td_user_pri; 1574e7d50326SJeff Roberson child->td_base_user_pri = td->td_base_user_pri; 1575e7d50326SJeff Roberson /* 1576e7d50326SJeff Roberson * And update interactivity score. 1577e7d50326SJeff Roberson */ 1578e7d50326SJeff Roberson ts2->skg_slptime = ts->skg_slptime; 1579e7d50326SJeff Roberson ts2->skg_runtime = ts->skg_runtime; 1580e7d50326SJeff Roberson ts2->ts_slice = 1; /* Attempt to quickly learn interactivity. */ 158115dc847eSJeff Roberson } 158215dc847eSJeff Roberson 158315dc847eSJeff Roberson void 15848460a577SJohn Birrell sched_class(struct thread *td, int class) 158515dc847eSJeff Roberson { 158615dc847eSJeff Roberson 15872056d0a1SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 15888460a577SJohn Birrell if (td->td_pri_class == class) 158915dc847eSJeff Roberson return; 159015dc847eSJeff Roberson 1591ef1134c9SJeff Roberson #ifdef SMP 1592155b9987SJeff Roberson /* 1593155b9987SJeff Roberson * On SMP if we're on the RUNQ we must adjust the transferable 1594155b9987SJeff Roberson * count because could be changing to or from an interrupt 1595155b9987SJeff Roberson * class. 1596155b9987SJeff Roberson */ 15977a5e5e2aSJeff Roberson if (TD_ON_RUNQ(td)) { 15981e516cf5SJeff Roberson struct tdq *tdq; 15991e516cf5SJeff Roberson 16001e516cf5SJeff Roberson tdq = TDQ_CPU(td->td_sched->ts_cpu); 16011e516cf5SJeff Roberson if (THREAD_CAN_MIGRATE(td)) { 1602d2ad694cSJeff Roberson tdq->tdq_transferable--; 1603d2ad694cSJeff Roberson tdq->tdq_group->tdg_transferable--; 160480f86c9fSJeff Roberson } 16051e516cf5SJeff Roberson td->td_pri_class = class; 16061e516cf5SJeff Roberson if (THREAD_CAN_MIGRATE(td)) { 1607d2ad694cSJeff Roberson tdq->tdq_transferable++; 1608d2ad694cSJeff Roberson tdq->tdq_group->tdg_transferable++; 160980f86c9fSJeff Roberson } 1610155b9987SJeff Roberson } 1611ef1134c9SJeff Roberson #endif 16128460a577SJohn Birrell td->td_pri_class = class; 161335e6168fSJeff Roberson } 161435e6168fSJeff Roberson 161535e6168fSJeff Roberson /* 161635e6168fSJeff Roberson * Return some of the child's priority and interactivity to the parent. 161735e6168fSJeff Roberson */ 161835e6168fSJeff Roberson void 1619fc6c30f6SJulian Elischer sched_exit(struct proc *p, struct thread *child) 162035e6168fSJeff Roberson { 1621e7d50326SJeff Roberson struct thread *td; 1622141ad61cSJeff Roberson 16238460a577SJohn Birrell CTR3(KTR_SCHED, "sched_exit: %p(%s) prio %d", 1624fc6c30f6SJulian Elischer child, child->td_proc->p_comm, child->td_priority); 16258460a577SJohn Birrell 1626e7d50326SJeff Roberson td = FIRST_THREAD_IN_PROC(p); 1627e7d50326SJeff Roberson sched_exit_thread(td, child); 1628ad1e7d28SJulian Elischer } 1629ad1e7d28SJulian Elischer 1630ad1e7d28SJulian Elischer void 1631fc6c30f6SJulian Elischer sched_exit_thread(struct thread *td, struct thread *child) 1632ad1e7d28SJulian Elischer { 1633fc6c30f6SJulian Elischer 1634e7d50326SJeff Roberson CTR3(KTR_SCHED, "sched_exit_thread: %p(%s) prio %d", 1635e7d50326SJeff Roberson child, child->td_proc->p_comm, child->td_priority); 1636e7d50326SJeff Roberson 1637e7d50326SJeff Roberson tdq_load_rem(TDQ_CPU(child->td_sched->ts_cpu), child->td_sched); 1638e7d50326SJeff Roberson #ifdef KSE 1639e7d50326SJeff Roberson /* 1640e7d50326SJeff Roberson * KSE forks and exits so often that this penalty causes short-lived 1641e7d50326SJeff Roberson * threads to always be non-interactive. This causes mozilla to 1642e7d50326SJeff Roberson * crawl under load. 1643e7d50326SJeff Roberson */ 1644e7d50326SJeff Roberson if ((td->td_pflags & TDP_SA) && td->td_proc == child->td_proc) 1645e7d50326SJeff Roberson return; 1646e7d50326SJeff Roberson #endif 1647e7d50326SJeff Roberson /* 1648e7d50326SJeff Roberson * Give the child's runtime to the parent without returning the 1649e7d50326SJeff Roberson * sleep time as a penalty to the parent. This causes shells that 1650e7d50326SJeff Roberson * launch expensive things to mark their children as expensive. 1651e7d50326SJeff Roberson */ 1652fc6c30f6SJulian Elischer td->td_sched->skg_runtime += child->td_sched->skg_runtime; 1653fc6c30f6SJulian Elischer sched_interact_update(td); 1654e7d50326SJeff Roberson sched_priority(td); 1655ad1e7d28SJulian Elischer } 1656ad1e7d28SJulian Elischer 1657ad1e7d28SJulian Elischer void 1658ad1e7d28SJulian Elischer sched_userret(struct thread *td) 1659ad1e7d28SJulian Elischer { 1660ad1e7d28SJulian Elischer /* 1661ad1e7d28SJulian Elischer * XXX we cheat slightly on the locking here to avoid locking in 1662ad1e7d28SJulian Elischer * the usual case. Setting td_priority here is essentially an 1663ad1e7d28SJulian Elischer * incomplete workaround for not setting it properly elsewhere. 1664ad1e7d28SJulian Elischer * Now that some interrupt handlers are threads, not setting it 1665ad1e7d28SJulian Elischer * properly elsewhere can clobber it in the window between setting 1666ad1e7d28SJulian Elischer * it here and returning to user mode, so don't waste time setting 1667ad1e7d28SJulian Elischer * it perfectly here. 1668ad1e7d28SJulian Elischer */ 1669ad1e7d28SJulian Elischer KASSERT((td->td_flags & TDF_BORROWING) == 0, 1670ad1e7d28SJulian Elischer ("thread with borrowed priority returning to userland")); 1671ad1e7d28SJulian Elischer if (td->td_priority != td->td_user_pri) { 1672ad1e7d28SJulian Elischer mtx_lock_spin(&sched_lock); 1673ad1e7d28SJulian Elischer td->td_priority = td->td_user_pri; 1674ad1e7d28SJulian Elischer td->td_base_pri = td->td_user_pri; 1675ad1e7d28SJulian Elischer mtx_unlock_spin(&sched_lock); 1676ad1e7d28SJulian Elischer } 167735e6168fSJeff Roberson } 167835e6168fSJeff Roberson 167935e6168fSJeff Roberson void 16807cf90fb3SJeff Roberson sched_clock(struct thread *td) 168135e6168fSJeff Roberson { 1682ad1e7d28SJulian Elischer struct tdq *tdq; 1683ad1e7d28SJulian Elischer struct td_sched *ts; 168435e6168fSJeff Roberson 1685dc03363dSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1686dc03363dSJeff Roberson #ifdef SMP 16877b8bfa0dSJeff Roberson sched_smp_tick(td); 1688dc03363dSJeff Roberson #endif 16893f872f85SJeff Roberson tdq = TDQ_SELF(); 16903f872f85SJeff Roberson /* 16913f872f85SJeff Roberson * Advance the insert index once for each tick to ensure that all 16923f872f85SJeff Roberson * threads get a chance to run. 16933f872f85SJeff Roberson */ 16943f872f85SJeff Roberson if (tdq->tdq_idx == tdq->tdq_ridx) { 16953f872f85SJeff Roberson tdq->tdq_idx = (tdq->tdq_idx + 1) % RQ_NQS; 16963f872f85SJeff Roberson if (TAILQ_EMPTY(&tdq->tdq_timeshare.rq_queues[tdq->tdq_ridx])) 16973f872f85SJeff Roberson tdq->tdq_ridx = tdq->tdq_idx; 16983f872f85SJeff Roberson } 16993f872f85SJeff Roberson ts = td->td_sched; 17003f741ca1SJeff Roberson /* 17018460a577SJohn Birrell * We only do slicing code for TIMESHARE threads. 1702a8949de2SJeff Roberson */ 17038460a577SJohn Birrell if (td->td_pri_class != PRI_TIMESHARE) 1704a8949de2SJeff Roberson return; 1705a8949de2SJeff Roberson /* 17063f872f85SJeff Roberson * We used a tick; charge it to the thread so that we can compute our 170715dc847eSJeff Roberson * interactivity. 170815dc847eSJeff Roberson */ 17098460a577SJohn Birrell td->td_sched->skg_runtime += tickincr; 17108460a577SJohn Birrell sched_interact_update(td); 171135e6168fSJeff Roberson /* 171235e6168fSJeff Roberson * We used up one time slice. 171335e6168fSJeff Roberson */ 1714ad1e7d28SJulian Elischer if (--ts->ts_slice > 0) 171515dc847eSJeff Roberson return; 171635e6168fSJeff Roberson /* 171715dc847eSJeff Roberson * We're out of time, recompute priorities and requeue. 171835e6168fSJeff Roberson */ 17198460a577SJohn Birrell sched_priority(td); 17204a338afdSJulian Elischer td->td_flags |= TDF_NEEDRESCHED; 172135e6168fSJeff Roberson } 172235e6168fSJeff Roberson 172335e6168fSJeff Roberson int 172435e6168fSJeff Roberson sched_runnable(void) 172535e6168fSJeff Roberson { 1726ad1e7d28SJulian Elischer struct tdq *tdq; 1727b90816f1SJeff Roberson int load; 172835e6168fSJeff Roberson 1729b90816f1SJeff Roberson load = 1; 1730b90816f1SJeff Roberson 1731ad1e7d28SJulian Elischer tdq = TDQ_SELF(); 173222bf7d9aSJeff Roberson #ifdef SMP 17337b8bfa0dSJeff Roberson if (tdq_busy) 17347b8bfa0dSJeff Roberson goto out; 173522bf7d9aSJeff Roberson #endif 17363f741ca1SJeff Roberson if ((curthread->td_flags & TDF_IDLETD) != 0) { 1737d2ad694cSJeff Roberson if (tdq->tdq_load > 0) 17383f741ca1SJeff Roberson goto out; 17393f741ca1SJeff Roberson } else 1740d2ad694cSJeff Roberson if (tdq->tdq_load - 1 > 0) 1741b90816f1SJeff Roberson goto out; 1742b90816f1SJeff Roberson load = 0; 1743b90816f1SJeff Roberson out: 1744b90816f1SJeff Roberson return (load); 174535e6168fSJeff Roberson } 174635e6168fSJeff Roberson 17477a5e5e2aSJeff Roberson struct thread * 1748c9f25d8fSJeff Roberson sched_choose(void) 1749c9f25d8fSJeff Roberson { 1750ad1e7d28SJulian Elischer struct tdq *tdq; 1751ad1e7d28SJulian Elischer struct td_sched *ts; 175215dc847eSJeff Roberson 1753b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1754ad1e7d28SJulian Elischer tdq = TDQ_SELF(); 175515dc847eSJeff Roberson #ifdef SMP 175680f86c9fSJeff Roberson restart: 175715dc847eSJeff Roberson #endif 1758ad1e7d28SJulian Elischer ts = tdq_choose(tdq); 1759ad1e7d28SJulian Elischer if (ts) { 176022bf7d9aSJeff Roberson #ifdef SMP 1761155b6ca1SJeff Roberson if (ts->ts_thread->td_priority > PRI_MIN_IDLE) 1762ad1e7d28SJulian Elischer if (tdq_idled(tdq) == 0) 176380f86c9fSJeff Roberson goto restart; 176422bf7d9aSJeff Roberson #endif 1765ad1e7d28SJulian Elischer tdq_runq_rem(tdq, ts); 17667a5e5e2aSJeff Roberson return (ts->ts_thread); 176735e6168fSJeff Roberson } 1768c9f25d8fSJeff Roberson #ifdef SMP 1769ad1e7d28SJulian Elischer if (tdq_idled(tdq) == 0) 177080f86c9fSJeff Roberson goto restart; 1771c9f25d8fSJeff Roberson #endif 17727a5e5e2aSJeff Roberson return (PCPU_GET(idlethread)); 17737a5e5e2aSJeff Roberson } 17747a5e5e2aSJeff Roberson 17757a5e5e2aSJeff Roberson static int 17767a5e5e2aSJeff Roberson sched_preempt(struct thread *td) 17777a5e5e2aSJeff Roberson { 17787a5e5e2aSJeff Roberson struct thread *ctd; 17797a5e5e2aSJeff Roberson int cpri; 17807a5e5e2aSJeff Roberson int pri; 17817a5e5e2aSJeff Roberson 17827a5e5e2aSJeff Roberson ctd = curthread; 17837a5e5e2aSJeff Roberson pri = td->td_priority; 17847a5e5e2aSJeff Roberson cpri = ctd->td_priority; 17857a5e5e2aSJeff Roberson if (panicstr != NULL || pri >= cpri || cold || TD_IS_INHIBITED(ctd)) 17867a5e5e2aSJeff Roberson return (0); 17877a5e5e2aSJeff Roberson /* 17887a5e5e2aSJeff Roberson * Always preempt IDLE threads. Otherwise only if the preempting 17897a5e5e2aSJeff Roberson * thread is an ithread. 17907a5e5e2aSJeff Roberson */ 17917a5e5e2aSJeff Roberson if (pri > PRI_MAX_ITHD && cpri < PRI_MIN_IDLE) 17927a5e5e2aSJeff Roberson return (0); 17937a5e5e2aSJeff Roberson if (ctd->td_critnest > 1) { 17947a5e5e2aSJeff Roberson CTR1(KTR_PROC, "sched_preempt: in critical section %d", 17957a5e5e2aSJeff Roberson ctd->td_critnest); 17967a5e5e2aSJeff Roberson ctd->td_owepreempt = 1; 17977a5e5e2aSJeff Roberson return (0); 17987a5e5e2aSJeff Roberson } 17997a5e5e2aSJeff Roberson /* 18007a5e5e2aSJeff Roberson * Thread is runnable but not yet put on system run queue. 18017a5e5e2aSJeff Roberson */ 18027a5e5e2aSJeff Roberson MPASS(TD_ON_RUNQ(td)); 18037a5e5e2aSJeff Roberson TD_SET_RUNNING(td); 18047a5e5e2aSJeff Roberson CTR3(KTR_PROC, "preempting to thread %p (pid %d, %s)\n", td, 18057a5e5e2aSJeff Roberson td->td_proc->p_pid, td->td_proc->p_comm); 18067a5e5e2aSJeff Roberson mi_switch(SW_INVOL|SW_PREEMPT, td); 18077a5e5e2aSJeff Roberson return (1); 180835e6168fSJeff Roberson } 180935e6168fSJeff Roberson 181035e6168fSJeff Roberson void 18112630e4c9SJulian Elischer sched_add(struct thread *td, int flags) 181235e6168fSJeff Roberson { 1813ad1e7d28SJulian Elischer struct tdq *tdq; 1814ad1e7d28SJulian Elischer struct td_sched *ts; 1815598b368dSJeff Roberson int preemptive; 181622bf7d9aSJeff Roberson int class; 18177b8bfa0dSJeff Roberson #ifdef SMP 18187b8bfa0dSJeff Roberson int cpuid; 18197b8bfa0dSJeff Roberson int cpumask; 18207b8bfa0dSJeff Roberson #endif 18217a5e5e2aSJeff Roberson ts = td->td_sched; 1822c9f25d8fSJeff Roberson 18237a5e5e2aSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 182481d47d3fSJeff Roberson CTR5(KTR_SCHED, "sched_add: %p(%s) prio %d by %p(%s)", 182581d47d3fSJeff Roberson td, td->td_proc->p_comm, td->td_priority, curthread, 182681d47d3fSJeff Roberson curthread->td_proc->p_comm); 18277a5e5e2aSJeff Roberson KASSERT((td->td_inhibitors == 0), 18287a5e5e2aSJeff Roberson ("sched_add: trying to run inhibited thread")); 18297a5e5e2aSJeff Roberson KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)), 18307a5e5e2aSJeff Roberson ("sched_add: bad thread state")); 18318460a577SJohn Birrell KASSERT(td->td_proc->p_sflag & PS_INMEM, 18325d7ef00cSJeff Roberson ("sched_add: process swapped out")); 1833ad1e7d28SJulian Elischer KASSERT(ts->ts_runq == NULL, 1834ad1e7d28SJulian Elischer ("sched_add: thread %p is still assigned to a run queue", td)); 18357a5e5e2aSJeff Roberson TD_SET_RUNQ(td); 18367a5e5e2aSJeff Roberson tdq = TDQ_SELF(); 18377a5e5e2aSJeff Roberson class = PRI_BASE(td->td_pri_class); 18387a5e5e2aSJeff Roberson preemptive = !(flags & SRQ_YIELDING); 183915dc847eSJeff Roberson /* 18407b8bfa0dSJeff Roberson * Recalculate the priority before we select the target cpu or 18417b8bfa0dSJeff Roberson * run-queue. 184215dc847eSJeff Roberson */ 18438ab80cf0SJeff Roberson if (class == PRI_TIMESHARE) 18448ab80cf0SJeff Roberson sched_priority(td); 18457a5e5e2aSJeff Roberson if (ts->ts_slice == 0) 18467a5e5e2aSJeff Roberson ts->ts_slice = sched_slice; 184722bf7d9aSJeff Roberson #ifdef SMP 18487b8bfa0dSJeff Roberson cpuid = PCPU_GET(cpuid); 18492454aaf5SJeff Roberson /* 18507b8bfa0dSJeff Roberson * Pick the destination cpu and if it isn't ours transfer to the 18517b8bfa0dSJeff Roberson * target cpu. 18522454aaf5SJeff Roberson */ 18537b8bfa0dSJeff Roberson if (THREAD_CAN_MIGRATE(td)) { 18547b8bfa0dSJeff Roberson if (td->td_priority <= PRI_MAX_ITHD) { 185514618990SJeff Roberson CTR2(KTR_ULE, "ithd %d < %d", 185614618990SJeff Roberson td->td_priority, PRI_MAX_ITHD); 18577b8bfa0dSJeff Roberson ts->ts_cpu = cpuid; 185880f86c9fSJeff Roberson } 18597b8bfa0dSJeff Roberson if (pick_pri) 18607b8bfa0dSJeff Roberson ts->ts_cpu = tdq_pickpri(tdq, ts, flags); 18617b8bfa0dSJeff Roberson else 18627b8bfa0dSJeff Roberson ts->ts_cpu = tdq_pickidle(tdq, ts); 18637b8bfa0dSJeff Roberson } else 186414618990SJeff Roberson CTR1(KTR_ULE, "pinned %d", td->td_pinned); 18657b8bfa0dSJeff Roberson if (ts->ts_cpu != cpuid) 18667b8bfa0dSJeff Roberson preemptive = 0; 18677b8bfa0dSJeff Roberson tdq = TDQ_CPU(ts->ts_cpu); 18687b8bfa0dSJeff Roberson cpumask = 1 << ts->ts_cpu; 186922bf7d9aSJeff Roberson /* 1870670c524fSJeff Roberson * If we had been idle, clear our bit in the group and potentially 18717b8bfa0dSJeff Roberson * the global bitmap. 187222bf7d9aSJeff Roberson */ 1873e7d50326SJeff Roberson if ((class != PRI_IDLE && class != PRI_ITHD) && 18747b8bfa0dSJeff Roberson (tdq->tdq_group->tdg_idlemask & cpumask) != 0) { 187580f86c9fSJeff Roberson /* 187680f86c9fSJeff Roberson * Check to see if our group is unidling, and if so, remove it 187780f86c9fSJeff Roberson * from the global idle mask. 187880f86c9fSJeff Roberson */ 1879d2ad694cSJeff Roberson if (tdq->tdq_group->tdg_idlemask == 1880d2ad694cSJeff Roberson tdq->tdq_group->tdg_cpumask) 1881d2ad694cSJeff Roberson atomic_clear_int(&tdq_idle, tdq->tdq_group->tdg_mask); 188280f86c9fSJeff Roberson /* 188380f86c9fSJeff Roberson * Now remove ourselves from the group specific idle mask. 188480f86c9fSJeff Roberson */ 18857b8bfa0dSJeff Roberson tdq->tdq_group->tdg_idlemask &= ~cpumask; 18867b8bfa0dSJeff Roberson } 188722bf7d9aSJeff Roberson #endif 18887b8bfa0dSJeff Roberson /* 18897a5e5e2aSJeff Roberson * Pick the run queue based on priority. 18907b8bfa0dSJeff Roberson */ 18917b8bfa0dSJeff Roberson if (td->td_priority <= PRI_MAX_REALTIME) 18927b8bfa0dSJeff Roberson ts->ts_runq = &tdq->tdq_realtime; 18937b8bfa0dSJeff Roberson else if (td->td_priority <= PRI_MAX_TIMESHARE) 18947b8bfa0dSJeff Roberson ts->ts_runq = &tdq->tdq_timeshare; 18957b8bfa0dSJeff Roberson else 18967b8bfa0dSJeff Roberson ts->ts_runq = &tdq->tdq_idle; 18977a5e5e2aSJeff Roberson if (preemptive && sched_preempt(td)) 18980c0b25aeSJohn Baldwin return; 1899ad1e7d28SJulian Elischer tdq_runq_add(tdq, ts, flags); 1900ad1e7d28SJulian Elischer tdq_load_add(tdq, ts); 19017b8bfa0dSJeff Roberson #ifdef SMP 19027b8bfa0dSJeff Roberson if (ts->ts_cpu != cpuid) { 19037b8bfa0dSJeff Roberson tdq_notify(ts); 19047b8bfa0dSJeff Roberson return; 19057b8bfa0dSJeff Roberson } 19067b8bfa0dSJeff Roberson #endif 19077b8bfa0dSJeff Roberson if (td->td_priority < curthread->td_priority) 19087b8bfa0dSJeff Roberson curthread->td_flags |= TDF_NEEDRESCHED; 190935e6168fSJeff Roberson } 191035e6168fSJeff Roberson 191135e6168fSJeff Roberson void 19127cf90fb3SJeff Roberson sched_rem(struct thread *td) 191335e6168fSJeff Roberson { 1914ad1e7d28SJulian Elischer struct tdq *tdq; 1915ad1e7d28SJulian Elischer struct td_sched *ts; 19167cf90fb3SJeff Roberson 191781d47d3fSJeff Roberson CTR5(KTR_SCHED, "sched_rem: %p(%s) prio %d by %p(%s)", 191881d47d3fSJeff Roberson td, td->td_proc->p_comm, td->td_priority, curthread, 191981d47d3fSJeff Roberson curthread->td_proc->p_comm); 1920598b368dSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1921ad1e7d28SJulian Elischer ts = td->td_sched; 19227a5e5e2aSJeff Roberson KASSERT(TD_ON_RUNQ(td), 1923ad1e7d28SJulian Elischer ("sched_rem: thread not on run queue")); 192435e6168fSJeff Roberson 1925ad1e7d28SJulian Elischer tdq = TDQ_CPU(ts->ts_cpu); 1926ad1e7d28SJulian Elischer tdq_runq_rem(tdq, ts); 1927ad1e7d28SJulian Elischer tdq_load_rem(tdq, ts); 19287a5e5e2aSJeff Roberson TD_SET_CAN_RUN(td); 192935e6168fSJeff Roberson } 193035e6168fSJeff Roberson 193135e6168fSJeff Roberson fixpt_t 19327cf90fb3SJeff Roberson sched_pctcpu(struct thread *td) 193335e6168fSJeff Roberson { 193435e6168fSJeff Roberson fixpt_t pctcpu; 1935ad1e7d28SJulian Elischer struct td_sched *ts; 193635e6168fSJeff Roberson 193735e6168fSJeff Roberson pctcpu = 0; 1938ad1e7d28SJulian Elischer ts = td->td_sched; 1939ad1e7d28SJulian Elischer if (ts == NULL) 1940484288deSJeff Roberson return (0); 194135e6168fSJeff Roberson 1942b90816f1SJeff Roberson mtx_lock_spin(&sched_lock); 1943ad1e7d28SJulian Elischer if (ts->ts_ticks) { 194435e6168fSJeff Roberson int rtick; 194535e6168fSJeff Roberson 1946ad1e7d28SJulian Elischer sched_pctcpu_update(ts); 194735e6168fSJeff Roberson /* How many rtick per second ? */ 1948e7d50326SJeff Roberson rtick = min(SCHED_TICK_HZ(ts) / SCHED_TICK_SECS, hz); 1949e7d50326SJeff Roberson pctcpu = (FSCALE * ((FSCALE * rtick)/hz)) >> FSHIFT; 195035e6168fSJeff Roberson } 1951ad1e7d28SJulian Elischer td->td_proc->p_swtime = ts->ts_ltick - ts->ts_ftick; 1952828e7683SJohn Baldwin mtx_unlock_spin(&sched_lock); 195335e6168fSJeff Roberson 195435e6168fSJeff Roberson return (pctcpu); 195535e6168fSJeff Roberson } 195635e6168fSJeff Roberson 19579bacd788SJeff Roberson void 19589bacd788SJeff Roberson sched_bind(struct thread *td, int cpu) 19599bacd788SJeff Roberson { 1960ad1e7d28SJulian Elischer struct td_sched *ts; 19619bacd788SJeff Roberson 19629bacd788SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1963ad1e7d28SJulian Elischer ts = td->td_sched; 19646b2f763fSJeff Roberson if (ts->ts_flags & TSF_BOUND) 1965c95d2db2SJeff Roberson sched_unbind(td); 1966ad1e7d28SJulian Elischer ts->ts_flags |= TSF_BOUND; 196780f86c9fSJeff Roberson #ifdef SMP 19686b2f763fSJeff Roberson sched_pin(); 196980f86c9fSJeff Roberson if (PCPU_GET(cpuid) == cpu) 19709bacd788SJeff Roberson return; 19716b2f763fSJeff Roberson ts->ts_cpu = cpu; 19729bacd788SJeff Roberson /* When we return from mi_switch we'll be on the correct cpu. */ 1973279f949eSPoul-Henning Kamp mi_switch(SW_VOL, NULL); 19749bacd788SJeff Roberson #endif 19759bacd788SJeff Roberson } 19769bacd788SJeff Roberson 19779bacd788SJeff Roberson void 19789bacd788SJeff Roberson sched_unbind(struct thread *td) 19799bacd788SJeff Roberson { 1980e7d50326SJeff Roberson struct td_sched *ts; 1981e7d50326SJeff Roberson 19829bacd788SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1983e7d50326SJeff Roberson ts = td->td_sched; 19846b2f763fSJeff Roberson if ((ts->ts_flags & TSF_BOUND) == 0) 19856b2f763fSJeff Roberson return; 1986e7d50326SJeff Roberson ts->ts_flags &= ~TSF_BOUND; 1987e7d50326SJeff Roberson #ifdef SMP 1988e7d50326SJeff Roberson sched_unpin(); 1989e7d50326SJeff Roberson #endif 19909bacd788SJeff Roberson } 19919bacd788SJeff Roberson 199235e6168fSJeff Roberson int 1993ebccf1e3SJoseph Koshy sched_is_bound(struct thread *td) 1994ebccf1e3SJoseph Koshy { 1995ebccf1e3SJoseph Koshy mtx_assert(&sched_lock, MA_OWNED); 1996ad1e7d28SJulian Elischer return (td->td_sched->ts_flags & TSF_BOUND); 1997ebccf1e3SJoseph Koshy } 1998ebccf1e3SJoseph Koshy 199936ec198bSDavid Xu void 200036ec198bSDavid Xu sched_relinquish(struct thread *td) 200136ec198bSDavid Xu { 200236ec198bSDavid Xu mtx_lock_spin(&sched_lock); 20038460a577SJohn Birrell if (td->td_pri_class == PRI_TIMESHARE) 200436ec198bSDavid Xu sched_prio(td, PRI_MAX_TIMESHARE); 200536ec198bSDavid Xu mi_switch(SW_VOL, NULL); 200636ec198bSDavid Xu mtx_unlock_spin(&sched_lock); 200736ec198bSDavid Xu } 200836ec198bSDavid Xu 2009ebccf1e3SJoseph Koshy int 201033916c36SJeff Roberson sched_load(void) 201133916c36SJeff Roberson { 201233916c36SJeff Roberson #ifdef SMP 201333916c36SJeff Roberson int total; 201433916c36SJeff Roberson int i; 201533916c36SJeff Roberson 201633916c36SJeff Roberson total = 0; 2017d2ad694cSJeff Roberson for (i = 0; i <= tdg_maxid; i++) 2018d2ad694cSJeff Roberson total += TDQ_GROUP(i)->tdg_load; 201933916c36SJeff Roberson return (total); 202033916c36SJeff Roberson #else 2021d2ad694cSJeff Roberson return (TDQ_SELF()->tdq_sysload); 202233916c36SJeff Roberson #endif 202333916c36SJeff Roberson } 202433916c36SJeff Roberson 202533916c36SJeff Roberson int 202635e6168fSJeff Roberson sched_sizeof_proc(void) 202735e6168fSJeff Roberson { 202835e6168fSJeff Roberson return (sizeof(struct proc)); 202935e6168fSJeff Roberson } 203035e6168fSJeff Roberson 203135e6168fSJeff Roberson int 203235e6168fSJeff Roberson sched_sizeof_thread(void) 203335e6168fSJeff Roberson { 203435e6168fSJeff Roberson return (sizeof(struct thread) + sizeof(struct td_sched)); 203535e6168fSJeff Roberson } 2036b41f1452SDavid Xu 2037b41f1452SDavid Xu void 2038b41f1452SDavid Xu sched_tick(void) 2039b41f1452SDavid Xu { 20407a5e5e2aSJeff Roberson struct td_sched *ts; 20417a5e5e2aSJeff Roberson 20427a5e5e2aSJeff Roberson ts = curthread->td_sched; 20437a5e5e2aSJeff Roberson /* Adjust ticks for pctcpu */ 20447a5e5e2aSJeff Roberson ts->ts_ticks += 1 << SCHED_TICK_SHIFT; 20457a5e5e2aSJeff Roberson ts->ts_ltick = ticks; 20467a5e5e2aSJeff Roberson /* 20477a5e5e2aSJeff Roberson * Update if we've exceeded our desired tick threshhold by over one 20487a5e5e2aSJeff Roberson * second. 20497a5e5e2aSJeff Roberson */ 20507a5e5e2aSJeff Roberson if (ts->ts_ftick + SCHED_TICK_MAX < ts->ts_ltick) 20517a5e5e2aSJeff Roberson sched_pctcpu_update(ts); 20527a5e5e2aSJeff Roberson } 20537a5e5e2aSJeff Roberson 20547a5e5e2aSJeff Roberson /* 20557a5e5e2aSJeff Roberson * The actual idle process. 20567a5e5e2aSJeff Roberson */ 20577a5e5e2aSJeff Roberson void 20587a5e5e2aSJeff Roberson sched_idletd(void *dummy) 20597a5e5e2aSJeff Roberson { 20607a5e5e2aSJeff Roberson struct proc *p; 20617a5e5e2aSJeff Roberson struct thread *td; 20627a5e5e2aSJeff Roberson 20637a5e5e2aSJeff Roberson td = curthread; 20647a5e5e2aSJeff Roberson p = td->td_proc; 20657a5e5e2aSJeff Roberson mtx_assert(&Giant, MA_NOTOWNED); 20667a5e5e2aSJeff Roberson /* ULE Relies on preemption for idle interruption. */ 20677a5e5e2aSJeff Roberson for (;;) 20687a5e5e2aSJeff Roberson cpu_idle(); 2069b41f1452SDavid Xu } 2070e7d50326SJeff Roberson 2071e7d50326SJeff Roberson static SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0, "Scheduler"); 2072e7d50326SJeff Roberson SYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "ule", 0, 2073e7d50326SJeff Roberson "Scheduler name"); 2074e7d50326SJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, slice, CTLFLAG_RW, &sched_slice, 0, ""); 2075e7d50326SJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, interact, CTLFLAG_RW, &sched_interact, 0, ""); 2076e7d50326SJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, tickincr, CTLFLAG_RD, &tickincr, 0, ""); 2077e7d50326SJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, realstathz, CTLFLAG_RD, &realstathz, 0, ""); 20787b8bfa0dSJeff Roberson #ifdef SMP 20797b8bfa0dSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, pick_pri, CTLFLAG_RW, &pick_pri, 0, ""); 20807b8bfa0dSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, pick_pri_affinity, CTLFLAG_RW, 20817b8bfa0dSJeff Roberson &affinity, 0, ""); 20827b8bfa0dSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, pick_pri_tryself, CTLFLAG_RW, 20837b8bfa0dSJeff Roberson &tryself, 0, ""); 20847b8bfa0dSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, pick_pri_tryselfidle, CTLFLAG_RW, 20857b8bfa0dSJeff Roberson &tryselfidle, 0, ""); 20867b8bfa0dSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, balance, CTLFLAG_RW, &rebalance, 0, ""); 20877b8bfa0dSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, ipi_preempt, CTLFLAG_RW, &ipi_preempt, 0, ""); 20887b8bfa0dSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, ipi_ast, CTLFLAG_RW, &ipi_ast, 0, ""); 20897b8bfa0dSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, ipi_thresh, CTLFLAG_RW, &ipi_thresh, 0, ""); 20907b8bfa0dSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, steal_htt, CTLFLAG_RW, &steal_htt, 0, ""); 20917b8bfa0dSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, steal_busy, CTLFLAG_RW, &steal_busy, 0, ""); 20927b8bfa0dSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, busy_thresh, CTLFLAG_RW, &busy_thresh, 0, ""); 20937b8bfa0dSJeff Roberson #endif 2094e7d50326SJeff Roberson 2095e7d50326SJeff Roberson /* ps compat */ 2096e7d50326SJeff Roberson static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 2097e7d50326SJeff Roberson SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 2098e7d50326SJeff Roberson 2099e7d50326SJeff Roberson 2100ed062c8dSJulian Elischer #define KERN_SWITCH_INCLUDE 1 2101ed062c8dSJulian Elischer #include "kern/kern_switch.c" 2102