135e6168fSJeff Roberson /*- 2e7d50326SJeff Roberson * Copyright (c) 2002-2007, Jeffrey Roberson <jeff@freebsd.org> 335e6168fSJeff Roberson * All rights reserved. 435e6168fSJeff Roberson * 535e6168fSJeff Roberson * Redistribution and use in source and binary forms, with or without 635e6168fSJeff Roberson * modification, are permitted provided that the following conditions 735e6168fSJeff Roberson * are met: 835e6168fSJeff Roberson * 1. Redistributions of source code must retain the above copyright 935e6168fSJeff Roberson * notice unmodified, this list of conditions, and the following 1035e6168fSJeff Roberson * disclaimer. 1135e6168fSJeff Roberson * 2. Redistributions in binary form must reproduce the above copyright 1235e6168fSJeff Roberson * notice, this list of conditions and the following disclaimer in the 1335e6168fSJeff Roberson * documentation and/or other materials provided with the distribution. 1435e6168fSJeff Roberson * 1535e6168fSJeff Roberson * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 1635e6168fSJeff Roberson * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1735e6168fSJeff Roberson * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 1835e6168fSJeff Roberson * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 1935e6168fSJeff Roberson * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2035e6168fSJeff Roberson * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2135e6168fSJeff Roberson * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2235e6168fSJeff Roberson * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2335e6168fSJeff Roberson * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2435e6168fSJeff Roberson * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2535e6168fSJeff Roberson */ 2635e6168fSJeff Roberson 27677b542eSDavid E. O'Brien #include <sys/cdefs.h> 28677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 29677b542eSDavid E. O'Brien 304da0d332SPeter Wemm #include "opt_hwpmc_hooks.h" 314da0d332SPeter Wemm #include "opt_sched.h" 329923b511SScott Long 3335e6168fSJeff Roberson #include <sys/param.h> 3435e6168fSJeff Roberson #include <sys/systm.h> 352c3490b1SMarcel Moolenaar #include <sys/kdb.h> 3635e6168fSJeff Roberson #include <sys/kernel.h> 3735e6168fSJeff Roberson #include <sys/ktr.h> 3835e6168fSJeff Roberson #include <sys/lock.h> 3935e6168fSJeff Roberson #include <sys/mutex.h> 4035e6168fSJeff Roberson #include <sys/proc.h> 41245f3abfSJeff Roberson #include <sys/resource.h> 429bacd788SJeff Roberson #include <sys/resourcevar.h> 4335e6168fSJeff Roberson #include <sys/sched.h> 4435e6168fSJeff Roberson #include <sys/smp.h> 4535e6168fSJeff Roberson #include <sys/sx.h> 4635e6168fSJeff Roberson #include <sys/sysctl.h> 4735e6168fSJeff Roberson #include <sys/sysproto.h> 48f5c157d9SJohn Baldwin #include <sys/turnstile.h> 493db720fdSDavid Xu #include <sys/umtx.h> 5035e6168fSJeff Roberson #include <sys/vmmeter.h> 5135e6168fSJeff Roberson #ifdef KTRACE 5235e6168fSJeff Roberson #include <sys/uio.h> 5335e6168fSJeff Roberson #include <sys/ktrace.h> 5435e6168fSJeff Roberson #endif 5535e6168fSJeff Roberson 56ebccf1e3SJoseph Koshy #ifdef HWPMC_HOOKS 57ebccf1e3SJoseph Koshy #include <sys/pmckern.h> 58ebccf1e3SJoseph Koshy #endif 59ebccf1e3SJoseph Koshy 6035e6168fSJeff Roberson #include <machine/cpu.h> 6122bf7d9aSJeff Roberson #include <machine/smp.h> 6235e6168fSJeff Roberson 6335e6168fSJeff Roberson /* 646b2f763fSJeff Roberson * TODO: 656b2f763fSJeff Roberson * Pick idle from affinity group or self group first. 666b2f763fSJeff Roberson * Implement pick_score. 676b2f763fSJeff Roberson */ 686b2f763fSJeff Roberson 696b2f763fSJeff Roberson /* 70ad1e7d28SJulian Elischer * Thread scheduler specific section. 71ed062c8dSJulian Elischer */ 72ad1e7d28SJulian Elischer struct td_sched { 73ad1e7d28SJulian Elischer TAILQ_ENTRY(td_sched) ts_procq; /* (j/z) Run queue. */ 74ad1e7d28SJulian Elischer int ts_flags; /* (j) TSF_* flags. */ 75ad1e7d28SJulian Elischer struct thread *ts_thread; /* (*) Active associated thread. */ 76ad1e7d28SJulian Elischer u_char ts_rqindex; /* (j) Run queue index. */ 77ed062c8dSJulian Elischer enum { 78e7d50326SJeff Roberson TSS_THREAD, 79ad1e7d28SJulian Elischer TSS_ONRUNQ 80ad1e7d28SJulian Elischer } ts_state; /* (j) thread sched specific status. */ 81ad1e7d28SJulian Elischer int ts_slptime; 82ad1e7d28SJulian Elischer int ts_slice; 83ad1e7d28SJulian Elischer struct runq *ts_runq; 84ad1e7d28SJulian Elischer u_char ts_cpu; /* CPU that we have affinity for. */ 85ed062c8dSJulian Elischer /* The following variables are only used for pctcpu calculation */ 86ad1e7d28SJulian Elischer int ts_ltick; /* Last tick that we were running on */ 87ad1e7d28SJulian Elischer int ts_ftick; /* First tick that we were running on */ 88ad1e7d28SJulian Elischer int ts_ticks; /* Tick count */ 897b8bfa0dSJeff Roberson #ifdef SMP 907b8bfa0dSJeff Roberson int ts_rltick; /* Real last tick, for affinity. */ 917b8bfa0dSJeff Roberson #endif 92ed062c8dSJulian Elischer 938460a577SJohn Birrell /* originally from kg_sched */ 948460a577SJohn Birrell int skg_slptime; /* Number of ticks we vol. slept */ 958460a577SJohn Birrell int skg_runtime; /* Number of ticks we were running */ 96ed062c8dSJulian Elischer }; 97ad1e7d28SJulian Elischer /* flags kept in ts_flags */ 987b8bfa0dSJeff Roberson #define TSF_BOUND 0x0001 /* Thread can not migrate. */ 997b8bfa0dSJeff Roberson #define TSF_XFERABLE 0x0002 /* Thread was added as transferable. */ 100d2ad694cSJeff Roberson #define TSF_DIDRUN 0x2000 /* Thread actually ran. */ 10135e6168fSJeff Roberson 102ad1e7d28SJulian Elischer static struct td_sched td_sched0; 10335e6168fSJeff Roberson 10435e6168fSJeff Roberson /* 105e7d50326SJeff Roberson * Cpu percentage computation macros and defines. 106e1f89c22SJeff Roberson * 107e7d50326SJeff Roberson * SCHED_TICK_SECS: Number of seconds to average the cpu usage across. 108e7d50326SJeff Roberson * SCHED_TICK_TARG: Number of hz ticks to average the cpu usage across. 1098ab80cf0SJeff Roberson * SCHED_TICK_MAX: Maximum number of ticks before scaling back. 110e7d50326SJeff Roberson * SCHED_TICK_SHIFT: Shift factor to avoid rounding away results. 111e7d50326SJeff Roberson * SCHED_TICK_HZ: Compute the number of hz ticks for a given ticks count. 112e7d50326SJeff Roberson * SCHED_TICK_TOTAL: Gives the amount of time we've been recording ticks. 11335e6168fSJeff Roberson */ 114e7d50326SJeff Roberson #define SCHED_TICK_SECS 10 115e7d50326SJeff Roberson #define SCHED_TICK_TARG (hz * SCHED_TICK_SECS) 1168ab80cf0SJeff Roberson #define SCHED_TICK_MAX (SCHED_TICK_TARG + hz) 117e7d50326SJeff Roberson #define SCHED_TICK_SHIFT 10 118e7d50326SJeff Roberson #define SCHED_TICK_HZ(ts) ((ts)->ts_ticks >> SCHED_TICK_SHIFT) 119eddb4efaSJeff Roberson #define SCHED_TICK_TOTAL(ts) (max((ts)->ts_ltick - (ts)->ts_ftick, hz)) 12035e6168fSJeff Roberson 12135e6168fSJeff Roberson /* 122e7d50326SJeff Roberson * These macros determine priorities for non-interactive threads. They are 123e7d50326SJeff Roberson * assigned a priority based on their recent cpu utilization as expressed 124e7d50326SJeff Roberson * by the ratio of ticks to the tick total. NHALF priorities at the start 125e7d50326SJeff Roberson * and end of the MIN to MAX timeshare range are only reachable with negative 126e7d50326SJeff Roberson * or positive nice respectively. 127e7d50326SJeff Roberson * 128e7d50326SJeff Roberson * PRI_RANGE: Priority range for utilization dependent priorities. 129e7d50326SJeff Roberson * PRI_NRESV: Number of nice values. 130e7d50326SJeff Roberson * PRI_TICKS: Compute a priority in PRI_RANGE from the ticks count and total. 131e7d50326SJeff Roberson * PRI_NICE: Determines the part of the priority inherited from nice. 132e7d50326SJeff Roberson */ 133e7d50326SJeff Roberson #define SCHED_PRI_NRESV (PRIO_MAX - PRIO_MIN) 134e7d50326SJeff Roberson #define SCHED_PRI_NHALF (SCHED_PRI_NRESV / 2) 135e7d50326SJeff Roberson #define SCHED_PRI_MIN (PRI_MIN_TIMESHARE + SCHED_PRI_NHALF) 136e7d50326SJeff Roberson #define SCHED_PRI_MAX (PRI_MAX_TIMESHARE - SCHED_PRI_NHALF) 137e7d50326SJeff Roberson #define SCHED_PRI_RANGE (SCHED_PRI_MAX - SCHED_PRI_MIN + 1) 138e7d50326SJeff Roberson #define SCHED_PRI_TICKS(ts) \ 139e7d50326SJeff Roberson (SCHED_TICK_HZ((ts)) / \ 1401e516cf5SJeff Roberson (roundup(SCHED_TICK_TOTAL((ts)), SCHED_PRI_RANGE) / SCHED_PRI_RANGE)) 141e7d50326SJeff Roberson #define SCHED_PRI_NICE(nice) (nice) 142e7d50326SJeff Roberson 143e7d50326SJeff Roberson /* 144e7d50326SJeff Roberson * These determine the interactivity of a process. Interactivity differs from 145e7d50326SJeff Roberson * cpu utilization in that it expresses the voluntary time slept vs time ran 146e7d50326SJeff Roberson * while cpu utilization includes all time not running. This more accurately 147e7d50326SJeff Roberson * models the intent of the thread. 14835e6168fSJeff Roberson * 149407b0157SJeff Roberson * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate 150407b0157SJeff Roberson * before throttling back. 151d322132cSJeff Roberson * SLP_RUN_FORK: Maximum slp+run time to inherit at fork time. 152210491d3SJeff Roberson * INTERACT_MAX: Maximum interactivity value. Smaller is better. 153e1f89c22SJeff Roberson * INTERACT_THRESH: Threshhold for placement on the current runq. 15435e6168fSJeff Roberson */ 155e7d50326SJeff Roberson #define SCHED_SLP_RUN_MAX ((hz * 5) << SCHED_TICK_SHIFT) 156e7d50326SJeff Roberson #define SCHED_SLP_RUN_FORK ((hz / 2) << SCHED_TICK_SHIFT) 157210491d3SJeff Roberson #define SCHED_INTERACT_MAX (100) 158210491d3SJeff Roberson #define SCHED_INTERACT_HALF (SCHED_INTERACT_MAX / 2) 1594c9612c6SJeff Roberson #define SCHED_INTERACT_THRESH (30) 160e1f89c22SJeff Roberson 16135e6168fSJeff Roberson /* 162e7d50326SJeff Roberson * tickincr: Converts a stathz tick into a hz domain scaled by 163e7d50326SJeff Roberson * the shift factor. Without the shift the error rate 164e7d50326SJeff Roberson * due to rounding would be unacceptably high. 165e7d50326SJeff Roberson * realstathz: stathz is sometimes 0 and run off of hz. 166e7d50326SJeff Roberson * sched_slice: Runtime of each thread before rescheduling. 16735e6168fSJeff Roberson */ 168e7d50326SJeff Roberson static int sched_interact = SCHED_INTERACT_THRESH; 169e7d50326SJeff Roberson static int realstathz; 170e7d50326SJeff Roberson static int tickincr; 171e7d50326SJeff Roberson static int sched_slice; 17235e6168fSJeff Roberson 17335e6168fSJeff Roberson /* 174ad1e7d28SJulian Elischer * tdq - per processor runqs and statistics. 17535e6168fSJeff Roberson */ 176ad1e7d28SJulian Elischer struct tdq { 177d2ad694cSJeff Roberson struct runq tdq_idle; /* Queue of IDLE threads. */ 178e7d50326SJeff Roberson struct runq tdq_timeshare; /* timeshare run queue. */ 179e7d50326SJeff Roberson struct runq tdq_realtime; /* real-time run queue. */ 1803f872f85SJeff Roberson int tdq_idx; /* Current insert index. */ 1813f872f85SJeff Roberson int tdq_ridx; /* Current removal index. */ 182d2ad694cSJeff Roberson int tdq_load; /* Aggregate load. */ 1837b8bfa0dSJeff Roberson int tdq_flags; /* Thread queue flags */ 1845d7ef00cSJeff Roberson #ifdef SMP 185d2ad694cSJeff Roberson int tdq_transferable; 186d2ad694cSJeff Roberson LIST_ENTRY(tdq) tdq_siblings; /* Next in tdq group. */ 187d2ad694cSJeff Roberson struct tdq_group *tdq_group; /* Our processor group. */ 18833916c36SJeff Roberson #else 189d2ad694cSJeff Roberson int tdq_sysload; /* For loadavg, !ITHD load. */ 1905d7ef00cSJeff Roberson #endif 19135e6168fSJeff Roberson }; 19235e6168fSJeff Roberson 1937b8bfa0dSJeff Roberson #define TDQF_BUSY 0x0001 /* Queue is marked as busy */ 1947b8bfa0dSJeff Roberson 19580f86c9fSJeff Roberson #ifdef SMP 19680f86c9fSJeff Roberson /* 197ad1e7d28SJulian Elischer * tdq groups are groups of processors which can cheaply share threads. When 19880f86c9fSJeff Roberson * one processor in the group goes idle it will check the runqs of the other 19980f86c9fSJeff Roberson * processors in its group prior to halting and waiting for an interrupt. 20080f86c9fSJeff Roberson * These groups are suitable for SMT (Symetric Multi-Threading) and not NUMA. 20180f86c9fSJeff Roberson * In a numa environment we'd want an idle bitmap per group and a two tiered 20280f86c9fSJeff Roberson * load balancer. 20380f86c9fSJeff Roberson */ 204ad1e7d28SJulian Elischer struct tdq_group { 205d2ad694cSJeff Roberson int tdg_cpus; /* Count of CPUs in this tdq group. */ 206d2ad694cSJeff Roberson cpumask_t tdg_cpumask; /* Mask of cpus in this group. */ 207d2ad694cSJeff Roberson cpumask_t tdg_idlemask; /* Idle cpus in this group. */ 208d2ad694cSJeff Roberson cpumask_t tdg_mask; /* Bit mask for first cpu. */ 209d2ad694cSJeff Roberson int tdg_load; /* Total load of this group. */ 210d2ad694cSJeff Roberson int tdg_transferable; /* Transferable load of this group. */ 211d2ad694cSJeff Roberson LIST_HEAD(, tdq) tdg_members; /* Linked list of all members. */ 21280f86c9fSJeff Roberson }; 2137b8bfa0dSJeff Roberson 2147b8bfa0dSJeff Roberson #define SCHED_AFFINITY_DEFAULT (hz / 100) 2157b8bfa0dSJeff Roberson #define SCHED_AFFINITY(ts) ((ts)->ts_rltick > ticks - affinity) 2167b8bfa0dSJeff Roberson 2177b8bfa0dSJeff Roberson /* 2187b8bfa0dSJeff Roberson * Run-time tunables. 2197b8bfa0dSJeff Roberson */ 2207b8bfa0dSJeff Roberson static int rebalance = 1; 2217b8bfa0dSJeff Roberson static int pick_pri = 1; 2227b8bfa0dSJeff Roberson static int affinity; 2237b8bfa0dSJeff Roberson static int tryself = 1; 2247b8bfa0dSJeff Roberson static int tryselfidle = 1; 2257b8bfa0dSJeff Roberson static int ipi_ast = 0; 2267b8bfa0dSJeff Roberson static int ipi_preempt = 1; 2277b8bfa0dSJeff Roberson static int ipi_thresh = PRI_MIN_KERN; 2287b8bfa0dSJeff Roberson static int steal_htt = 1; 2297b8bfa0dSJeff Roberson static int steal_busy = 1; 2307b8bfa0dSJeff Roberson static int busy_thresh = 4; 23180f86c9fSJeff Roberson 23235e6168fSJeff Roberson /* 233d2ad694cSJeff Roberson * One thread queue per processor. 23435e6168fSJeff Roberson */ 2357b8bfa0dSJeff Roberson static volatile cpumask_t tdq_idle; 2367b8bfa0dSJeff Roberson static volatile cpumask_t tdq_busy; 237d2ad694cSJeff Roberson static int tdg_maxid; 238ad1e7d28SJulian Elischer static struct tdq tdq_cpu[MAXCPU]; 239ad1e7d28SJulian Elischer static struct tdq_group tdq_groups[MAXCPU]; 240dc03363dSJeff Roberson static int bal_tick; 241dc03363dSJeff Roberson static int gbal_tick; 242598b368dSJeff Roberson static int balance_groups; 243dc03363dSJeff Roberson 244ad1e7d28SJulian Elischer #define TDQ_SELF() (&tdq_cpu[PCPU_GET(cpuid)]) 245ad1e7d28SJulian Elischer #define TDQ_CPU(x) (&tdq_cpu[(x)]) 246ad1e7d28SJulian Elischer #define TDQ_ID(x) ((x) - tdq_cpu) 247ad1e7d28SJulian Elischer #define TDQ_GROUP(x) (&tdq_groups[(x)]) 24880f86c9fSJeff Roberson #else /* !SMP */ 249ad1e7d28SJulian Elischer static struct tdq tdq_cpu; 250dc03363dSJeff Roberson 251ad1e7d28SJulian Elischer #define TDQ_SELF() (&tdq_cpu) 252ad1e7d28SJulian Elischer #define TDQ_CPU(x) (&tdq_cpu) 2530a016a05SJeff Roberson #endif 25435e6168fSJeff Roberson 255ad1e7d28SJulian Elischer static struct td_sched *sched_choose(void); /* XXX Should be thread * */ 2568460a577SJohn Birrell static void sched_priority(struct thread *); 25721381d1bSJeff Roberson static void sched_thread_priority(struct thread *, u_char); 2588460a577SJohn Birrell static int sched_interact_score(struct thread *); 2598460a577SJohn Birrell static void sched_interact_update(struct thread *); 2608460a577SJohn Birrell static void sched_interact_fork(struct thread *); 261ad1e7d28SJulian Elischer static void sched_pctcpu_update(struct td_sched *); 2621e516cf5SJeff Roberson static inline void sched_pin_td(struct thread *td); 2631e516cf5SJeff Roberson static inline void sched_unpin_td(struct thread *td); 26435e6168fSJeff Roberson 2655d7ef00cSJeff Roberson /* Operations on per processor queues */ 266ad1e7d28SJulian Elischer static struct td_sched * tdq_choose(struct tdq *); 267ad1e7d28SJulian Elischer static void tdq_setup(struct tdq *); 268ad1e7d28SJulian Elischer static void tdq_load_add(struct tdq *, struct td_sched *); 269ad1e7d28SJulian Elischer static void tdq_load_rem(struct tdq *, struct td_sched *); 270ad1e7d28SJulian Elischer static __inline void tdq_runq_add(struct tdq *, struct td_sched *, int); 271ad1e7d28SJulian Elischer static __inline void tdq_runq_rem(struct tdq *, struct td_sched *); 272ad1e7d28SJulian Elischer void tdq_print(int cpu); 273e7d50326SJeff Roberson static void runq_print(struct runq *rq); 2745d7ef00cSJeff Roberson #ifdef SMP 2757b8bfa0dSJeff Roberson static int tdq_pickidle(struct tdq *, struct td_sched *); 2767b8bfa0dSJeff Roberson static int tdq_pickpri(struct tdq *, struct td_sched *, int); 277ad1e7d28SJulian Elischer static struct td_sched *runq_steal(struct runq *); 278dc03363dSJeff Roberson static void sched_balance(void); 279dc03363dSJeff Roberson static void sched_balance_groups(void); 280ad1e7d28SJulian Elischer static void sched_balance_group(struct tdq_group *); 281ad1e7d28SJulian Elischer static void sched_balance_pair(struct tdq *, struct tdq *); 2827b8bfa0dSJeff Roberson static void sched_smp_tick(struct thread *); 283ad1e7d28SJulian Elischer static void tdq_move(struct tdq *, int); 284ad1e7d28SJulian Elischer static int tdq_idled(struct tdq *); 2857b8bfa0dSJeff Roberson static void tdq_notify(struct td_sched *); 286ad1e7d28SJulian Elischer static struct td_sched *tdq_steal(struct tdq *, int); 2871e516cf5SJeff Roberson 2887b8bfa0dSJeff Roberson #define THREAD_CAN_MIGRATE(td) ((td)->td_pinned == 0) 2895d7ef00cSJeff Roberson #endif 2905d7ef00cSJeff Roberson 291e7d50326SJeff Roberson static void sched_setup(void *dummy); 292e7d50326SJeff Roberson SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 293e7d50326SJeff Roberson 294e7d50326SJeff Roberson static void sched_initticks(void *dummy); 295e7d50326SJeff Roberson SYSINIT(sched_initticks, SI_SUB_CLOCKS, SI_ORDER_THIRD, sched_initticks, NULL) 296e7d50326SJeff Roberson 2971e516cf5SJeff Roberson static inline void 2981e516cf5SJeff Roberson sched_pin_td(struct thread *td) 2991e516cf5SJeff Roberson { 3001e516cf5SJeff Roberson td->td_pinned++; 3011e516cf5SJeff Roberson } 3021e516cf5SJeff Roberson 3031e516cf5SJeff Roberson static inline void 3041e516cf5SJeff Roberson sched_unpin_td(struct thread *td) 3051e516cf5SJeff Roberson { 3061e516cf5SJeff Roberson td->td_pinned--; 3071e516cf5SJeff Roberson } 3081e516cf5SJeff Roberson 309e7d50326SJeff Roberson static void 310e7d50326SJeff Roberson runq_print(struct runq *rq) 311e7d50326SJeff Roberson { 312e7d50326SJeff Roberson struct rqhead *rqh; 313e7d50326SJeff Roberson struct td_sched *ts; 314e7d50326SJeff Roberson int pri; 315e7d50326SJeff Roberson int j; 316e7d50326SJeff Roberson int i; 317e7d50326SJeff Roberson 318e7d50326SJeff Roberson for (i = 0; i < RQB_LEN; i++) { 319e7d50326SJeff Roberson printf("\t\trunq bits %d 0x%zx\n", 320e7d50326SJeff Roberson i, rq->rq_status.rqb_bits[i]); 321e7d50326SJeff Roberson for (j = 0; j < RQB_BPW; j++) 322e7d50326SJeff Roberson if (rq->rq_status.rqb_bits[i] & (1ul << j)) { 323e7d50326SJeff Roberson pri = j + (i << RQB_L2BPW); 324e7d50326SJeff Roberson rqh = &rq->rq_queues[pri]; 325e7d50326SJeff Roberson TAILQ_FOREACH(ts, rqh, ts_procq) { 326e7d50326SJeff Roberson printf("\t\t\ttd %p(%s) priority %d rqindex %d pri %d\n", 327e7d50326SJeff Roberson ts->ts_thread, ts->ts_thread->td_proc->p_comm, ts->ts_thread->td_priority, ts->ts_rqindex, pri); 328e7d50326SJeff Roberson } 329e7d50326SJeff Roberson } 330e7d50326SJeff Roberson } 331e7d50326SJeff Roberson } 332e7d50326SJeff Roberson 33315dc847eSJeff Roberson void 334ad1e7d28SJulian Elischer tdq_print(int cpu) 33515dc847eSJeff Roberson { 336ad1e7d28SJulian Elischer struct tdq *tdq; 33715dc847eSJeff Roberson 338ad1e7d28SJulian Elischer tdq = TDQ_CPU(cpu); 33915dc847eSJeff Roberson 340ad1e7d28SJulian Elischer printf("tdq:\n"); 341d2ad694cSJeff Roberson printf("\tload: %d\n", tdq->tdq_load); 342e7d50326SJeff Roberson printf("\ttimeshare idx: %d\n", tdq->tdq_idx); 3433f872f85SJeff Roberson printf("\ttimeshare ridx: %d\n", tdq->tdq_ridx); 344e7d50326SJeff Roberson printf("\trealtime runq:\n"); 345e7d50326SJeff Roberson runq_print(&tdq->tdq_realtime); 346e7d50326SJeff Roberson printf("\ttimeshare runq:\n"); 347e7d50326SJeff Roberson runq_print(&tdq->tdq_timeshare); 348e7d50326SJeff Roberson printf("\tidle runq:\n"); 349e7d50326SJeff Roberson runq_print(&tdq->tdq_idle); 350ef1134c9SJeff Roberson #ifdef SMP 351d2ad694cSJeff Roberson printf("\tload transferable: %d\n", tdq->tdq_transferable); 352ef1134c9SJeff Roberson #endif 35315dc847eSJeff Roberson } 35415dc847eSJeff Roberson 355155b9987SJeff Roberson static __inline void 356ad1e7d28SJulian Elischer tdq_runq_add(struct tdq *tdq, struct td_sched *ts, int flags) 357155b9987SJeff Roberson { 358155b9987SJeff Roberson #ifdef SMP 359e7d50326SJeff Roberson if (THREAD_CAN_MIGRATE(ts->ts_thread)) { 360d2ad694cSJeff Roberson tdq->tdq_transferable++; 361d2ad694cSJeff Roberson tdq->tdq_group->tdg_transferable++; 362ad1e7d28SJulian Elischer ts->ts_flags |= TSF_XFERABLE; 3637b8bfa0dSJeff Roberson if (tdq->tdq_transferable >= busy_thresh && 3647b8bfa0dSJeff Roberson (tdq->tdq_flags & TDQF_BUSY) == 0) { 3657b8bfa0dSJeff Roberson tdq->tdq_flags |= TDQF_BUSY; 3667b8bfa0dSJeff Roberson atomic_set_int(&tdq_busy, 1 << TDQ_ID(tdq)); 3677b8bfa0dSJeff Roberson } 36880f86c9fSJeff Roberson } 369155b9987SJeff Roberson #endif 370e7d50326SJeff Roberson if (ts->ts_runq == &tdq->tdq_timeshare) { 371e7d50326SJeff Roberson int pri; 372e7d50326SJeff Roberson 373e7d50326SJeff Roberson pri = ts->ts_thread->td_priority; 374e7d50326SJeff Roberson KASSERT(pri <= PRI_MAX_TIMESHARE && pri >= PRI_MIN_TIMESHARE, 375e7d50326SJeff Roberson ("Invalid priority %d on timeshare runq", pri)); 376e7d50326SJeff Roberson /* 377e7d50326SJeff Roberson * This queue contains only priorities between MIN and MAX 378e7d50326SJeff Roberson * realtime. Use the whole queue to represent these values. 379e7d50326SJeff Roberson */ 380e7d50326SJeff Roberson #define TS_RQ_PPQ (((PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE) + 1) / RQ_NQS) 381e7d50326SJeff Roberson if ((flags & SRQ_BORROWING) == 0) { 382e7d50326SJeff Roberson pri = (pri - PRI_MIN_TIMESHARE) / TS_RQ_PPQ; 383e7d50326SJeff Roberson pri = (pri + tdq->tdq_idx) % RQ_NQS; 3843f872f85SJeff Roberson /* 3853f872f85SJeff Roberson * This effectively shortens the queue by one so we 3863f872f85SJeff Roberson * can have a one slot difference between idx and 3873f872f85SJeff Roberson * ridx while we wait for threads to drain. 3883f872f85SJeff Roberson */ 3893f872f85SJeff Roberson if (tdq->tdq_ridx != tdq->tdq_idx && 3903f872f85SJeff Roberson pri == tdq->tdq_ridx) 3913f872f85SJeff Roberson pri = (pri - 1) % RQ_NQS; 392e7d50326SJeff Roberson } else 3933f872f85SJeff Roberson pri = tdq->tdq_ridx; 394e7d50326SJeff Roberson runq_add_pri(ts->ts_runq, ts, pri, flags); 395e7d50326SJeff Roberson } else 396ad1e7d28SJulian Elischer runq_add(ts->ts_runq, ts, flags); 397155b9987SJeff Roberson } 398155b9987SJeff Roberson 399155b9987SJeff Roberson static __inline void 400ad1e7d28SJulian Elischer tdq_runq_rem(struct tdq *tdq, struct td_sched *ts) 401155b9987SJeff Roberson { 402155b9987SJeff Roberson #ifdef SMP 403ad1e7d28SJulian Elischer if (ts->ts_flags & TSF_XFERABLE) { 404d2ad694cSJeff Roberson tdq->tdq_transferable--; 405d2ad694cSJeff Roberson tdq->tdq_group->tdg_transferable--; 406ad1e7d28SJulian Elischer ts->ts_flags &= ~TSF_XFERABLE; 4077b8bfa0dSJeff Roberson if (tdq->tdq_transferable < busy_thresh && 4087b8bfa0dSJeff Roberson (tdq->tdq_flags & TDQF_BUSY)) { 4097b8bfa0dSJeff Roberson atomic_clear_int(&tdq_busy, 1 << TDQ_ID(tdq)); 4107b8bfa0dSJeff Roberson tdq->tdq_flags &= ~TDQF_BUSY; 4117b8bfa0dSJeff Roberson } 41280f86c9fSJeff Roberson } 413155b9987SJeff Roberson #endif 4143f872f85SJeff Roberson if (ts->ts_runq == &tdq->tdq_timeshare) { 4153f872f85SJeff Roberson if (tdq->tdq_idx != tdq->tdq_ridx) 4163f872f85SJeff Roberson runq_remove_idx(ts->ts_runq, ts, &tdq->tdq_ridx); 417e7d50326SJeff Roberson else 4183f872f85SJeff Roberson runq_remove_idx(ts->ts_runq, ts, NULL); 4198ab80cf0SJeff Roberson /* 4208ab80cf0SJeff Roberson * For timeshare threads we update the priority here so 4218ab80cf0SJeff Roberson * the priority reflects the time we've been sleeping. 4228ab80cf0SJeff Roberson */ 4238ab80cf0SJeff Roberson ts->ts_ltick = ticks; 4248ab80cf0SJeff Roberson sched_pctcpu_update(ts); 4258ab80cf0SJeff Roberson sched_priority(ts->ts_thread); 4263f872f85SJeff Roberson } else 427ad1e7d28SJulian Elischer runq_remove(ts->ts_runq, ts); 428155b9987SJeff Roberson } 429155b9987SJeff Roberson 430a8949de2SJeff Roberson static void 431ad1e7d28SJulian Elischer tdq_load_add(struct tdq *tdq, struct td_sched *ts) 4325d7ef00cSJeff Roberson { 433ef1134c9SJeff Roberson int class; 434b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 435ad1e7d28SJulian Elischer class = PRI_BASE(ts->ts_thread->td_pri_class); 436d2ad694cSJeff Roberson tdq->tdq_load++; 437d2ad694cSJeff Roberson CTR1(KTR_SCHED, "load: %d", tdq->tdq_load); 4387b8bfa0dSJeff Roberson if (class != PRI_ITHD && 4397b8bfa0dSJeff Roberson (ts->ts_thread->td_proc->p_flag & P_NOLOAD) == 0) 44033916c36SJeff Roberson #ifdef SMP 441d2ad694cSJeff Roberson tdq->tdq_group->tdg_load++; 44233916c36SJeff Roberson #else 443d2ad694cSJeff Roberson tdq->tdq_sysload++; 444cac77d04SJeff Roberson #endif 4455d7ef00cSJeff Roberson } 44615dc847eSJeff Roberson 447a8949de2SJeff Roberson static void 448ad1e7d28SJulian Elischer tdq_load_rem(struct tdq *tdq, struct td_sched *ts) 4495d7ef00cSJeff Roberson { 450ef1134c9SJeff Roberson int class; 451b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 452ad1e7d28SJulian Elischer class = PRI_BASE(ts->ts_thread->td_pri_class); 4537b8bfa0dSJeff Roberson if (class != PRI_ITHD && 4547b8bfa0dSJeff Roberson (ts->ts_thread->td_proc->p_flag & P_NOLOAD) == 0) 45533916c36SJeff Roberson #ifdef SMP 456d2ad694cSJeff Roberson tdq->tdq_group->tdg_load--; 45733916c36SJeff Roberson #else 458d2ad694cSJeff Roberson tdq->tdq_sysload--; 459cac77d04SJeff Roberson #endif 460d2ad694cSJeff Roberson tdq->tdq_load--; 461d2ad694cSJeff Roberson CTR1(KTR_SCHED, "load: %d", tdq->tdq_load); 462ad1e7d28SJulian Elischer ts->ts_runq = NULL; 46315dc847eSJeff Roberson } 46415dc847eSJeff Roberson 4655d7ef00cSJeff Roberson #ifdef SMP 4663f872f85SJeff Roberson static void 4677b8bfa0dSJeff Roberson sched_smp_tick(struct thread *td) 4683f872f85SJeff Roberson { 4693f872f85SJeff Roberson struct tdq *tdq; 4703f872f85SJeff Roberson 4713f872f85SJeff Roberson tdq = TDQ_SELF(); 4727b8bfa0dSJeff Roberson if (rebalance) { 4733f872f85SJeff Roberson if (ticks >= bal_tick) 4743f872f85SJeff Roberson sched_balance(); 4753f872f85SJeff Roberson if (ticks >= gbal_tick && balance_groups) 4763f872f85SJeff Roberson sched_balance_groups(); 477155b6ca1SJeff Roberson } 4787b8bfa0dSJeff Roberson td->td_sched->ts_rltick = ticks; 4793f872f85SJeff Roberson } 4803f872f85SJeff Roberson 481356500a3SJeff Roberson /* 482155b9987SJeff Roberson * sched_balance is a simple CPU load balancing algorithm. It operates by 483356500a3SJeff Roberson * finding the least loaded and most loaded cpu and equalizing their load 484356500a3SJeff Roberson * by migrating some processes. 485356500a3SJeff Roberson * 486356500a3SJeff Roberson * Dealing only with two CPUs at a time has two advantages. Firstly, most 487356500a3SJeff Roberson * installations will only have 2 cpus. Secondly, load balancing too much at 488356500a3SJeff Roberson * once can have an unpleasant effect on the system. The scheduler rarely has 489356500a3SJeff Roberson * enough information to make perfect decisions. So this algorithm chooses 490356500a3SJeff Roberson * algorithm simplicity and more gradual effects on load in larger systems. 491356500a3SJeff Roberson * 492356500a3SJeff Roberson * It could be improved by considering the priorities and slices assigned to 493356500a3SJeff Roberson * each task prior to balancing them. There are many pathological cases with 494356500a3SJeff Roberson * any approach and so the semi random algorithm below may work as well as any. 495356500a3SJeff Roberson * 496356500a3SJeff Roberson */ 49722bf7d9aSJeff Roberson static void 498dc03363dSJeff Roberson sched_balance(void) 499356500a3SJeff Roberson { 500ad1e7d28SJulian Elischer struct tdq_group *high; 501ad1e7d28SJulian Elischer struct tdq_group *low; 502d2ad694cSJeff Roberson struct tdq_group *tdg; 503cac77d04SJeff Roberson int cnt; 504356500a3SJeff Roberson int i; 505356500a3SJeff Roberson 506598b368dSJeff Roberson bal_tick = ticks + (random() % (hz * 2)); 50786f8ae96SJeff Roberson if (smp_started == 0) 508598b368dSJeff Roberson return; 509cac77d04SJeff Roberson low = high = NULL; 510d2ad694cSJeff Roberson i = random() % (tdg_maxid + 1); 511d2ad694cSJeff Roberson for (cnt = 0; cnt <= tdg_maxid; cnt++) { 512d2ad694cSJeff Roberson tdg = TDQ_GROUP(i); 513cac77d04SJeff Roberson /* 514cac77d04SJeff Roberson * Find the CPU with the highest load that has some 515cac77d04SJeff Roberson * threads to transfer. 516cac77d04SJeff Roberson */ 517d2ad694cSJeff Roberson if ((high == NULL || tdg->tdg_load > high->tdg_load) 518d2ad694cSJeff Roberson && tdg->tdg_transferable) 519d2ad694cSJeff Roberson high = tdg; 520d2ad694cSJeff Roberson if (low == NULL || tdg->tdg_load < low->tdg_load) 521d2ad694cSJeff Roberson low = tdg; 522d2ad694cSJeff Roberson if (++i > tdg_maxid) 523cac77d04SJeff Roberson i = 0; 524cac77d04SJeff Roberson } 525cac77d04SJeff Roberson if (low != NULL && high != NULL && high != low) 526d2ad694cSJeff Roberson sched_balance_pair(LIST_FIRST(&high->tdg_members), 527d2ad694cSJeff Roberson LIST_FIRST(&low->tdg_members)); 528cac77d04SJeff Roberson } 52986f8ae96SJeff Roberson 530cac77d04SJeff Roberson static void 531dc03363dSJeff Roberson sched_balance_groups(void) 532cac77d04SJeff Roberson { 533cac77d04SJeff Roberson int i; 534cac77d04SJeff Roberson 535598b368dSJeff Roberson gbal_tick = ticks + (random() % (hz * 2)); 536dc03363dSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 537cac77d04SJeff Roberson if (smp_started) 538d2ad694cSJeff Roberson for (i = 0; i <= tdg_maxid; i++) 539ad1e7d28SJulian Elischer sched_balance_group(TDQ_GROUP(i)); 540356500a3SJeff Roberson } 541cac77d04SJeff Roberson 542cac77d04SJeff Roberson static void 543d2ad694cSJeff Roberson sched_balance_group(struct tdq_group *tdg) 544cac77d04SJeff Roberson { 545ad1e7d28SJulian Elischer struct tdq *tdq; 546ad1e7d28SJulian Elischer struct tdq *high; 547ad1e7d28SJulian Elischer struct tdq *low; 548cac77d04SJeff Roberson int load; 549cac77d04SJeff Roberson 550d2ad694cSJeff Roberson if (tdg->tdg_transferable == 0) 551cac77d04SJeff Roberson return; 552cac77d04SJeff Roberson low = NULL; 553cac77d04SJeff Roberson high = NULL; 554d2ad694cSJeff Roberson LIST_FOREACH(tdq, &tdg->tdg_members, tdq_siblings) { 555d2ad694cSJeff Roberson load = tdq->tdq_load; 556d2ad694cSJeff Roberson if (high == NULL || load > high->tdq_load) 557ad1e7d28SJulian Elischer high = tdq; 558d2ad694cSJeff Roberson if (low == NULL || load < low->tdq_load) 559ad1e7d28SJulian Elischer low = tdq; 560356500a3SJeff Roberson } 561cac77d04SJeff Roberson if (high != NULL && low != NULL && high != low) 562cac77d04SJeff Roberson sched_balance_pair(high, low); 563356500a3SJeff Roberson } 564cac77d04SJeff Roberson 565cac77d04SJeff Roberson static void 566ad1e7d28SJulian Elischer sched_balance_pair(struct tdq *high, struct tdq *low) 567cac77d04SJeff Roberson { 568cac77d04SJeff Roberson int transferable; 569cac77d04SJeff Roberson int high_load; 570cac77d04SJeff Roberson int low_load; 571cac77d04SJeff Roberson int move; 572cac77d04SJeff Roberson int diff; 573cac77d04SJeff Roberson int i; 574cac77d04SJeff Roberson 57580f86c9fSJeff Roberson /* 57680f86c9fSJeff Roberson * If we're transfering within a group we have to use this specific 577ad1e7d28SJulian Elischer * tdq's transferable count, otherwise we can steal from other members 57880f86c9fSJeff Roberson * of the group. 57980f86c9fSJeff Roberson */ 580d2ad694cSJeff Roberson if (high->tdq_group == low->tdq_group) { 581d2ad694cSJeff Roberson transferable = high->tdq_transferable; 582d2ad694cSJeff Roberson high_load = high->tdq_load; 583d2ad694cSJeff Roberson low_load = low->tdq_load; 584cac77d04SJeff Roberson } else { 585d2ad694cSJeff Roberson transferable = high->tdq_group->tdg_transferable; 586d2ad694cSJeff Roberson high_load = high->tdq_group->tdg_load; 587d2ad694cSJeff Roberson low_load = low->tdq_group->tdg_load; 588cac77d04SJeff Roberson } 58980f86c9fSJeff Roberson if (transferable == 0) 590cac77d04SJeff Roberson return; 591155b9987SJeff Roberson /* 592155b9987SJeff Roberson * Determine what the imbalance is and then adjust that to how many 593d2ad694cSJeff Roberson * threads we actually have to give up (transferable). 594155b9987SJeff Roberson */ 595cac77d04SJeff Roberson diff = high_load - low_load; 596356500a3SJeff Roberson move = diff / 2; 597356500a3SJeff Roberson if (diff & 0x1) 598356500a3SJeff Roberson move++; 59980f86c9fSJeff Roberson move = min(move, transferable); 600356500a3SJeff Roberson for (i = 0; i < move; i++) 601ad1e7d28SJulian Elischer tdq_move(high, TDQ_ID(low)); 602356500a3SJeff Roberson return; 603356500a3SJeff Roberson } 604356500a3SJeff Roberson 60522bf7d9aSJeff Roberson static void 606ad1e7d28SJulian Elischer tdq_move(struct tdq *from, int cpu) 607356500a3SJeff Roberson { 608ad1e7d28SJulian Elischer struct tdq *tdq; 609ad1e7d28SJulian Elischer struct tdq *to; 610ad1e7d28SJulian Elischer struct td_sched *ts; 611356500a3SJeff Roberson 612ad1e7d28SJulian Elischer tdq = from; 613ad1e7d28SJulian Elischer to = TDQ_CPU(cpu); 614ad1e7d28SJulian Elischer ts = tdq_steal(tdq, 1); 615ad1e7d28SJulian Elischer if (ts == NULL) { 616d2ad694cSJeff Roberson struct tdq_group *tdg; 61780f86c9fSJeff Roberson 618d2ad694cSJeff Roberson tdg = tdq->tdq_group; 619d2ad694cSJeff Roberson LIST_FOREACH(tdq, &tdg->tdg_members, tdq_siblings) { 620d2ad694cSJeff Roberson if (tdq == from || tdq->tdq_transferable == 0) 62180f86c9fSJeff Roberson continue; 622ad1e7d28SJulian Elischer ts = tdq_steal(tdq, 1); 62380f86c9fSJeff Roberson break; 62480f86c9fSJeff Roberson } 625ad1e7d28SJulian Elischer if (ts == NULL) 626ad1e7d28SJulian Elischer panic("tdq_move: No threads available with a " 62780f86c9fSJeff Roberson "transferable count of %d\n", 628d2ad694cSJeff Roberson tdg->tdg_transferable); 62980f86c9fSJeff Roberson } 630ad1e7d28SJulian Elischer if (tdq == to) 63180f86c9fSJeff Roberson return; 6327b8bfa0dSJeff Roberson sched_rem(ts->ts_thread); 6337b8bfa0dSJeff Roberson ts->ts_cpu = cpu; 6347b8bfa0dSJeff Roberson sched_pin_td(ts->ts_thread); 6357b8bfa0dSJeff Roberson sched_add(ts->ts_thread, SRQ_YIELDING); 6367b8bfa0dSJeff Roberson sched_unpin_td(ts->ts_thread); 637356500a3SJeff Roberson } 63822bf7d9aSJeff Roberson 63980f86c9fSJeff Roberson static int 640ad1e7d28SJulian Elischer tdq_idled(struct tdq *tdq) 64122bf7d9aSJeff Roberson { 642d2ad694cSJeff Roberson struct tdq_group *tdg; 643ad1e7d28SJulian Elischer struct tdq *steal; 644ad1e7d28SJulian Elischer struct td_sched *ts; 64580f86c9fSJeff Roberson 646d2ad694cSJeff Roberson tdg = tdq->tdq_group; 64780f86c9fSJeff Roberson /* 648d2ad694cSJeff Roberson * If we're in a cpu group, try and steal threads from another cpu in 64980f86c9fSJeff Roberson * the group before idling. 65080f86c9fSJeff Roberson */ 6517b8bfa0dSJeff Roberson if (steal_htt && tdg->tdg_cpus > 1 && tdg->tdg_transferable) { 652d2ad694cSJeff Roberson LIST_FOREACH(steal, &tdg->tdg_members, tdq_siblings) { 653d2ad694cSJeff Roberson if (steal == tdq || steal->tdq_transferable == 0) 65480f86c9fSJeff Roberson continue; 655ad1e7d28SJulian Elischer ts = tdq_steal(steal, 0); 6567b8bfa0dSJeff Roberson if (ts) 6577b8bfa0dSJeff Roberson goto steal; 6587b8bfa0dSJeff Roberson } 6597b8bfa0dSJeff Roberson } 6607b8bfa0dSJeff Roberson if (steal_busy) { 6617b8bfa0dSJeff Roberson while (tdq_busy) { 6627b8bfa0dSJeff Roberson int cpu; 6637b8bfa0dSJeff Roberson 6647b8bfa0dSJeff Roberson cpu = ffs(tdq_busy); 6657b8bfa0dSJeff Roberson if (cpu == 0) 6667b8bfa0dSJeff Roberson break; 6677b8bfa0dSJeff Roberson cpu--; 6687b8bfa0dSJeff Roberson steal = TDQ_CPU(cpu); 6697b8bfa0dSJeff Roberson if (steal->tdq_transferable == 0) 6707b8bfa0dSJeff Roberson continue; 6717b8bfa0dSJeff Roberson ts = tdq_steal(steal, 1); 672ad1e7d28SJulian Elischer if (ts == NULL) 67380f86c9fSJeff Roberson continue; 6747b8bfa0dSJeff Roberson CTR5(KTR_SCHED, 6757b8bfa0dSJeff Roberson "tdq_idled: stealing td %p(%s) pri %d from %d busy 0x%X", 6767b8bfa0dSJeff Roberson ts->ts_thread, ts->ts_thread->td_proc->p_comm, 6777b8bfa0dSJeff Roberson ts->ts_thread->td_priority, cpu, tdq_busy); 6787b8bfa0dSJeff Roberson goto steal; 67980f86c9fSJeff Roberson } 68080f86c9fSJeff Roberson } 68180f86c9fSJeff Roberson /* 68280f86c9fSJeff Roberson * We only set the idled bit when all of the cpus in the group are 683ad1e7d28SJulian Elischer * idle. Otherwise we could get into a situation where a thread bounces 68480f86c9fSJeff Roberson * back and forth between two idle cores on seperate physical CPUs. 68580f86c9fSJeff Roberson */ 686d2ad694cSJeff Roberson tdg->tdg_idlemask |= PCPU_GET(cpumask); 6877b8bfa0dSJeff Roberson if (tdg->tdg_idlemask == tdg->tdg_cpumask) 688d2ad694cSJeff Roberson atomic_set_int(&tdq_idle, tdg->tdg_mask); 68980f86c9fSJeff Roberson return (1); 6907b8bfa0dSJeff Roberson steal: 6917b8bfa0dSJeff Roberson sched_rem(ts->ts_thread); 6927b8bfa0dSJeff Roberson ts->ts_cpu = PCPU_GET(cpuid); 6931e516cf5SJeff Roberson sched_pin_td(ts->ts_thread); 694ad1e7d28SJulian Elischer sched_add(ts->ts_thread, SRQ_YIELDING); 6951e516cf5SJeff Roberson sched_unpin_td(ts->ts_thread); 6967b8bfa0dSJeff Roberson 6977b8bfa0dSJeff Roberson return (0); 69822bf7d9aSJeff Roberson } 69922bf7d9aSJeff Roberson 70022bf7d9aSJeff Roberson static void 7017b8bfa0dSJeff Roberson tdq_notify(struct td_sched *ts) 70222bf7d9aSJeff Roberson { 70322bf7d9aSJeff Roberson struct thread *td; 70422bf7d9aSJeff Roberson struct pcpu *pcpu; 7052454aaf5SJeff Roberson int prio; 7067b8bfa0dSJeff Roberson int cpu; 70722bf7d9aSJeff Roberson 708ad1e7d28SJulian Elischer prio = ts->ts_thread->td_priority; 7097b8bfa0dSJeff Roberson cpu = ts->ts_cpu; 71022bf7d9aSJeff Roberson pcpu = pcpu_find(cpu); 71122bf7d9aSJeff Roberson td = pcpu->pc_curthread; 7126b2f763fSJeff Roberson 7136b2f763fSJeff Roberson /* 7146b2f763fSJeff Roberson * If our priority is not better than the current priority there is 7156b2f763fSJeff Roberson * nothing to do. 7166b2f763fSJeff Roberson */ 7176b2f763fSJeff Roberson if (prio > td->td_priority) 7186b2f763fSJeff Roberson return; 7196b2f763fSJeff Roberson /* Always set NEEDRESCHED. */ 7206b2f763fSJeff Roberson td->td_flags |= TDF_NEEDRESCHED; 7217b8bfa0dSJeff Roberson /* 7227b8bfa0dSJeff Roberson * IPI if we exceed the threshold or if the target cpu is running an 7237b8bfa0dSJeff Roberson * idle thread. 7247b8bfa0dSJeff Roberson */ 7257b8bfa0dSJeff Roberson if (prio > ipi_thresh && td->td_priority < PRI_MIN_IDLE) 7267b8bfa0dSJeff Roberson return; 7276b2f763fSJeff Roberson if (ipi_ast) 72822bf7d9aSJeff Roberson ipi_selected(1 << cpu, IPI_AST); 7296b2f763fSJeff Roberson else if (ipi_preempt) 7307b8bfa0dSJeff Roberson ipi_selected(1 << cpu, IPI_PREEMPT); 73122bf7d9aSJeff Roberson } 73222bf7d9aSJeff Roberson 733ad1e7d28SJulian Elischer static struct td_sched * 73422bf7d9aSJeff Roberson runq_steal(struct runq *rq) 73522bf7d9aSJeff Roberson { 73622bf7d9aSJeff Roberson struct rqhead *rqh; 73722bf7d9aSJeff Roberson struct rqbits *rqb; 738ad1e7d28SJulian Elischer struct td_sched *ts; 73922bf7d9aSJeff Roberson int word; 74022bf7d9aSJeff Roberson int bit; 74122bf7d9aSJeff Roberson 74222bf7d9aSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 74322bf7d9aSJeff Roberson rqb = &rq->rq_status; 74422bf7d9aSJeff Roberson for (word = 0; word < RQB_LEN; word++) { 74522bf7d9aSJeff Roberson if (rqb->rqb_bits[word] == 0) 74622bf7d9aSJeff Roberson continue; 74722bf7d9aSJeff Roberson for (bit = 0; bit < RQB_BPW; bit++) { 748a2640c9bSPeter Wemm if ((rqb->rqb_bits[word] & (1ul << bit)) == 0) 74922bf7d9aSJeff Roberson continue; 75022bf7d9aSJeff Roberson rqh = &rq->rq_queues[bit + (word << RQB_L2BPW)]; 751ad1e7d28SJulian Elischer TAILQ_FOREACH(ts, rqh, ts_procq) { 752e7d50326SJeff Roberson if (THREAD_CAN_MIGRATE(ts->ts_thread)) 753ad1e7d28SJulian Elischer return (ts); 75422bf7d9aSJeff Roberson } 75522bf7d9aSJeff Roberson } 75622bf7d9aSJeff Roberson } 75722bf7d9aSJeff Roberson return (NULL); 75822bf7d9aSJeff Roberson } 75922bf7d9aSJeff Roberson 760ad1e7d28SJulian Elischer static struct td_sched * 761ad1e7d28SJulian Elischer tdq_steal(struct tdq *tdq, int stealidle) 76222bf7d9aSJeff Roberson { 763ad1e7d28SJulian Elischer struct td_sched *ts; 76422bf7d9aSJeff Roberson 76580f86c9fSJeff Roberson /* 76680f86c9fSJeff Roberson * Steal from next first to try to get a non-interactive task that 76780f86c9fSJeff Roberson * may not have run for a while. 768e7d50326SJeff Roberson * XXX Need to effect steal order for timeshare threads. 76980f86c9fSJeff Roberson */ 770e7d50326SJeff Roberson if ((ts = runq_steal(&tdq->tdq_realtime)) != NULL) 771ad1e7d28SJulian Elischer return (ts); 772e7d50326SJeff Roberson if ((ts = runq_steal(&tdq->tdq_timeshare)) != NULL) 773ad1e7d28SJulian Elischer return (ts); 77480f86c9fSJeff Roberson if (stealidle) 775d2ad694cSJeff Roberson return (runq_steal(&tdq->tdq_idle)); 77680f86c9fSJeff Roberson return (NULL); 77722bf7d9aSJeff Roberson } 77880f86c9fSJeff Roberson 77980f86c9fSJeff Roberson int 7807b8bfa0dSJeff Roberson tdq_pickidle(struct tdq *tdq, struct td_sched *ts) 78180f86c9fSJeff Roberson { 782d2ad694cSJeff Roberson struct tdq_group *tdg; 7837b8bfa0dSJeff Roberson int self; 78480f86c9fSJeff Roberson int cpu; 78580f86c9fSJeff Roberson 7867b8bfa0dSJeff Roberson self = PCPU_GET(cpuid); 787670c524fSJeff Roberson if (smp_started == 0) 7887b8bfa0dSJeff Roberson return (self); 78980f86c9fSJeff Roberson /* 7907b8bfa0dSJeff Roberson * If the current CPU has idled, just run it here. 791670c524fSJeff Roberson */ 7927b8bfa0dSJeff Roberson if ((tdq->tdq_group->tdg_idlemask & PCPU_GET(cpumask)) != 0) 7937b8bfa0dSJeff Roberson return (self); 79480f86c9fSJeff Roberson /* 7957b8bfa0dSJeff Roberson * Try the last group we ran on. 7967b8bfa0dSJeff Roberson */ 7977b8bfa0dSJeff Roberson tdg = TDQ_CPU(ts->ts_cpu)->tdq_group; 7987b8bfa0dSJeff Roberson cpu = ffs(tdg->tdg_idlemask); 7997b8bfa0dSJeff Roberson if (cpu) 8007b8bfa0dSJeff Roberson return (cpu - 1); 8017b8bfa0dSJeff Roberson /* 8027b8bfa0dSJeff Roberson * Search for an idle group. 80380f86c9fSJeff Roberson */ 804ad1e7d28SJulian Elischer cpu = ffs(tdq_idle); 8057b8bfa0dSJeff Roberson if (cpu) 8067b8bfa0dSJeff Roberson return (cpu - 1); 807598b368dSJeff Roberson /* 8087b8bfa0dSJeff Roberson * XXX If there are no idle groups, check for an idle core. 809598b368dSJeff Roberson */ 81080f86c9fSJeff Roberson /* 8117b8bfa0dSJeff Roberson * No idle CPUs? 81280f86c9fSJeff Roberson */ 8137b8bfa0dSJeff Roberson return (self); 81480f86c9fSJeff Roberson } 8152454aaf5SJeff Roberson 8167b8bfa0dSJeff Roberson static int 8177b8bfa0dSJeff Roberson tdq_pickpri(struct tdq *tdq, struct td_sched *ts, int flags) 8187b8bfa0dSJeff Roberson { 8197b8bfa0dSJeff Roberson struct pcpu *pcpu; 8207b8bfa0dSJeff Roberson int lowpri; 8217b8bfa0dSJeff Roberson int lowcpu; 8227b8bfa0dSJeff Roberson int lowload; 8237b8bfa0dSJeff Roberson int load; 8247b8bfa0dSJeff Roberson int self; 8257b8bfa0dSJeff Roberson int pri; 8267b8bfa0dSJeff Roberson int cpu; 8277b8bfa0dSJeff Roberson 8287b8bfa0dSJeff Roberson self = PCPU_GET(cpuid); 8297b8bfa0dSJeff Roberson if (smp_started == 0) 8307b8bfa0dSJeff Roberson return (self); 8317b8bfa0dSJeff Roberson 8327b8bfa0dSJeff Roberson pri = ts->ts_thread->td_priority; 8337b8bfa0dSJeff Roberson /* 8347b8bfa0dSJeff Roberson * Regardless of affinity, if the last cpu is idle send it there. 8357b8bfa0dSJeff Roberson */ 8367b8bfa0dSJeff Roberson pcpu = pcpu_find(ts->ts_cpu); 8377b8bfa0dSJeff Roberson if (pcpu->pc_curthread->td_priority > PRI_MIN_IDLE) { 8387b8bfa0dSJeff Roberson CTR5(KTR_SCHED, 8397b8bfa0dSJeff Roberson "ts_cpu %d idle, ltick %d ticks %d pri %d curthread %d", 8407b8bfa0dSJeff Roberson ts->ts_cpu, ts->ts_rltick, ticks, pri, 8417b8bfa0dSJeff Roberson pcpu->pc_curthread->td_priority); 8427b8bfa0dSJeff Roberson return (ts->ts_cpu); 8437b8bfa0dSJeff Roberson } 8447b8bfa0dSJeff Roberson /* 8457b8bfa0dSJeff Roberson * If we have affinity, try to place it on the cpu we last ran on. 8467b8bfa0dSJeff Roberson */ 8477b8bfa0dSJeff Roberson if (SCHED_AFFINITY(ts) && pcpu->pc_curthread->td_priority > pri) { 8487b8bfa0dSJeff Roberson CTR5(KTR_SCHED, 8497b8bfa0dSJeff Roberson "affinity for %d, ltick %d ticks %d pri %d curthread %d", 8507b8bfa0dSJeff Roberson ts->ts_cpu, ts->ts_rltick, ticks, pri, 8517b8bfa0dSJeff Roberson pcpu->pc_curthread->td_priority); 8527b8bfa0dSJeff Roberson return (ts->ts_cpu); 8537b8bfa0dSJeff Roberson } 8547b8bfa0dSJeff Roberson /* 8557b8bfa0dSJeff Roberson * Try ourself first; If we're running something lower priority this 8567b8bfa0dSJeff Roberson * may have some locality with the waking thread and execute faster 8577b8bfa0dSJeff Roberson * here. 8587b8bfa0dSJeff Roberson */ 8597b8bfa0dSJeff Roberson if (tryself) { 8607b8bfa0dSJeff Roberson /* 8617b8bfa0dSJeff Roberson * If we're being awoken by an interrupt thread or the waker 8627b8bfa0dSJeff Roberson * is going right to sleep run here as well. 8637b8bfa0dSJeff Roberson */ 8647b8bfa0dSJeff Roberson if ((TDQ_SELF()->tdq_load == 1) && (flags & SRQ_YIELDING || 8657b8bfa0dSJeff Roberson curthread->td_pri_class == PRI_ITHD)) { 8667b8bfa0dSJeff Roberson CTR2(KTR_SCHED, "tryself load %d flags %d", 8677b8bfa0dSJeff Roberson TDQ_SELF()->tdq_load, flags); 8687b8bfa0dSJeff Roberson return (self); 8697b8bfa0dSJeff Roberson } 8707b8bfa0dSJeff Roberson } 8717b8bfa0dSJeff Roberson /* 8727b8bfa0dSJeff Roberson * Look for an idle group. 8737b8bfa0dSJeff Roberson */ 8747b8bfa0dSJeff Roberson CTR1(KTR_SCHED, "tdq_idle %X", tdq_idle); 8757b8bfa0dSJeff Roberson cpu = ffs(tdq_idle); 8767b8bfa0dSJeff Roberson if (cpu) 8777b8bfa0dSJeff Roberson return (cpu - 1); 8787b8bfa0dSJeff Roberson if (tryselfidle && pri < curthread->td_priority) { 8797b8bfa0dSJeff Roberson CTR1(KTR_SCHED, "tryself %d", 8807b8bfa0dSJeff Roberson curthread->td_priority); 8817b8bfa0dSJeff Roberson return (self); 8827b8bfa0dSJeff Roberson } 8837b8bfa0dSJeff Roberson /* 8847b8bfa0dSJeff Roberson * Now search for the cpu running the lowest priority thread with 8857b8bfa0dSJeff Roberson * the least load. 8867b8bfa0dSJeff Roberson */ 8877b8bfa0dSJeff Roberson lowload = 0; 8887b8bfa0dSJeff Roberson lowpri = lowcpu = 0; 8897b8bfa0dSJeff Roberson for (cpu = 0; cpu <= mp_maxid; cpu++) { 8907b8bfa0dSJeff Roberson if (CPU_ABSENT(cpu)) 8917b8bfa0dSJeff Roberson continue; 8927b8bfa0dSJeff Roberson pcpu = pcpu_find(cpu); 8937b8bfa0dSJeff Roberson pri = pcpu->pc_curthread->td_priority; 8947b8bfa0dSJeff Roberson CTR4(KTR_SCHED, 8957b8bfa0dSJeff Roberson "cpu %d pri %d lowcpu %d lowpri %d", 8967b8bfa0dSJeff Roberson cpu, pri, lowcpu, lowpri); 8977b8bfa0dSJeff Roberson if (pri < lowpri) 8987b8bfa0dSJeff Roberson continue; 8997b8bfa0dSJeff Roberson load = TDQ_CPU(cpu)->tdq_load; 9007b8bfa0dSJeff Roberson if (lowpri && lowpri == pri && load > lowload) 9017b8bfa0dSJeff Roberson continue; 9027b8bfa0dSJeff Roberson lowpri = pri; 9037b8bfa0dSJeff Roberson lowcpu = cpu; 9047b8bfa0dSJeff Roberson lowload = load; 9057b8bfa0dSJeff Roberson } 9067b8bfa0dSJeff Roberson 9077b8bfa0dSJeff Roberson return (lowcpu); 90880f86c9fSJeff Roberson } 90980f86c9fSJeff Roberson 91022bf7d9aSJeff Roberson #endif /* SMP */ 91122bf7d9aSJeff Roberson 91222bf7d9aSJeff Roberson /* 91322bf7d9aSJeff Roberson * Pick the highest priority task we have and return it. 9140c0a98b2SJeff Roberson */ 9150c0a98b2SJeff Roberson 916ad1e7d28SJulian Elischer static struct td_sched * 917ad1e7d28SJulian Elischer tdq_choose(struct tdq *tdq) 9185d7ef00cSJeff Roberson { 919ad1e7d28SJulian Elischer struct td_sched *ts; 9205d7ef00cSJeff Roberson 921b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 922a8949de2SJeff Roberson 923e7d50326SJeff Roberson ts = runq_choose(&tdq->tdq_realtime); 924e7d50326SJeff Roberson if (ts != NULL) { 925e7d50326SJeff Roberson KASSERT(ts->ts_thread->td_priority <= PRI_MAX_REALTIME, 926e7d50326SJeff Roberson ("tdq_choose: Invalid priority on realtime queue %d", 927e7d50326SJeff Roberson ts->ts_thread->td_priority)); 928e7d50326SJeff Roberson return (ts); 929a8949de2SJeff Roberson } 9303f872f85SJeff Roberson ts = runq_choose_from(&tdq->tdq_timeshare, tdq->tdq_ridx); 931e7d50326SJeff Roberson if (ts != NULL) { 932e7d50326SJeff Roberson KASSERT(ts->ts_thread->td_priority <= PRI_MAX_TIMESHARE && 933e7d50326SJeff Roberson ts->ts_thread->td_priority >= PRI_MIN_TIMESHARE, 934e7d50326SJeff Roberson ("tdq_choose: Invalid priority on timeshare queue %d", 935e7d50326SJeff Roberson ts->ts_thread->td_priority)); 936ad1e7d28SJulian Elischer return (ts); 93715dc847eSJeff Roberson } 93815dc847eSJeff Roberson 939e7d50326SJeff Roberson ts = runq_choose(&tdq->tdq_idle); 940e7d50326SJeff Roberson if (ts != NULL) { 941e7d50326SJeff Roberson KASSERT(ts->ts_thread->td_priority >= PRI_MIN_IDLE, 942e7d50326SJeff Roberson ("tdq_choose: Invalid priority on idle queue %d", 943e7d50326SJeff Roberson ts->ts_thread->td_priority)); 944e7d50326SJeff Roberson return (ts); 945e7d50326SJeff Roberson } 946e7d50326SJeff Roberson 947e7d50326SJeff Roberson return (NULL); 948245f3abfSJeff Roberson } 9490a016a05SJeff Roberson 9500a016a05SJeff Roberson static void 951ad1e7d28SJulian Elischer tdq_setup(struct tdq *tdq) 9520a016a05SJeff Roberson { 953e7d50326SJeff Roberson runq_init(&tdq->tdq_realtime); 954e7d50326SJeff Roberson runq_init(&tdq->tdq_timeshare); 955d2ad694cSJeff Roberson runq_init(&tdq->tdq_idle); 956d2ad694cSJeff Roberson tdq->tdq_load = 0; 9570a016a05SJeff Roberson } 9580a016a05SJeff Roberson 95935e6168fSJeff Roberson static void 96035e6168fSJeff Roberson sched_setup(void *dummy) 96135e6168fSJeff Roberson { 9620ec896fdSJeff Roberson #ifdef SMP 96335e6168fSJeff Roberson int i; 9640ec896fdSJeff Roberson #endif 96535e6168fSJeff Roberson 966a1d4fe69SDavid Xu /* 967a1d4fe69SDavid Xu * To avoid divide-by-zero, we set realstathz a dummy value 968a1d4fe69SDavid Xu * in case which sched_clock() called before sched_initticks(). 969a1d4fe69SDavid Xu */ 970a1d4fe69SDavid Xu realstathz = hz; 971e7d50326SJeff Roberson sched_slice = (realstathz/7); /* 140ms */ 972e7d50326SJeff Roberson tickincr = 1 << SCHED_TICK_SHIFT; 973e1f89c22SJeff Roberson 974356500a3SJeff Roberson #ifdef SMP 975cac77d04SJeff Roberson balance_groups = 0; 97680f86c9fSJeff Roberson /* 977ad1e7d28SJulian Elischer * Initialize the tdqs. 97880f86c9fSJeff Roberson */ 979749d01b0SJeff Roberson for (i = 0; i < MAXCPU; i++) { 980c02bbb43SJeff Roberson struct tdq *tdq; 98180f86c9fSJeff Roberson 982c02bbb43SJeff Roberson tdq = &tdq_cpu[i]; 983ad1e7d28SJulian Elischer tdq_setup(&tdq_cpu[i]); 98480f86c9fSJeff Roberson } 98580f86c9fSJeff Roberson if (smp_topology == NULL) { 986d2ad694cSJeff Roberson struct tdq_group *tdg; 987c02bbb43SJeff Roberson struct tdq *tdq; 988598b368dSJeff Roberson int cpus; 98980f86c9fSJeff Roberson 990598b368dSJeff Roberson for (cpus = 0, i = 0; i < MAXCPU; i++) { 991598b368dSJeff Roberson if (CPU_ABSENT(i)) 992598b368dSJeff Roberson continue; 993c02bbb43SJeff Roberson tdq = &tdq_cpu[i]; 994d2ad694cSJeff Roberson tdg = &tdq_groups[cpus]; 99580f86c9fSJeff Roberson /* 996ad1e7d28SJulian Elischer * Setup a tdq group with one member. 99780f86c9fSJeff Roberson */ 998c02bbb43SJeff Roberson tdq->tdq_transferable = 0; 999c02bbb43SJeff Roberson tdq->tdq_group = tdg; 1000d2ad694cSJeff Roberson tdg->tdg_cpus = 1; 1001d2ad694cSJeff Roberson tdg->tdg_idlemask = 0; 1002d2ad694cSJeff Roberson tdg->tdg_cpumask = tdg->tdg_mask = 1 << i; 1003d2ad694cSJeff Roberson tdg->tdg_load = 0; 1004d2ad694cSJeff Roberson tdg->tdg_transferable = 0; 1005d2ad694cSJeff Roberson LIST_INIT(&tdg->tdg_members); 1006c02bbb43SJeff Roberson LIST_INSERT_HEAD(&tdg->tdg_members, tdq, tdq_siblings); 1007598b368dSJeff Roberson cpus++; 1008749d01b0SJeff Roberson } 1009d2ad694cSJeff Roberson tdg_maxid = cpus - 1; 1010749d01b0SJeff Roberson } else { 1011d2ad694cSJeff Roberson struct tdq_group *tdg; 101280f86c9fSJeff Roberson struct cpu_group *cg; 1013749d01b0SJeff Roberson int j; 1014749d01b0SJeff Roberson 1015749d01b0SJeff Roberson for (i = 0; i < smp_topology->ct_count; i++) { 1016749d01b0SJeff Roberson cg = &smp_topology->ct_group[i]; 1017d2ad694cSJeff Roberson tdg = &tdq_groups[i]; 101880f86c9fSJeff Roberson /* 101980f86c9fSJeff Roberson * Initialize the group. 102080f86c9fSJeff Roberson */ 1021d2ad694cSJeff Roberson tdg->tdg_idlemask = 0; 1022d2ad694cSJeff Roberson tdg->tdg_load = 0; 1023d2ad694cSJeff Roberson tdg->tdg_transferable = 0; 1024d2ad694cSJeff Roberson tdg->tdg_cpus = cg->cg_count; 1025d2ad694cSJeff Roberson tdg->tdg_cpumask = cg->cg_mask; 1026d2ad694cSJeff Roberson LIST_INIT(&tdg->tdg_members); 102780f86c9fSJeff Roberson /* 102880f86c9fSJeff Roberson * Find all of the group members and add them. 102980f86c9fSJeff Roberson */ 103080f86c9fSJeff Roberson for (j = 0; j < MAXCPU; j++) { 103180f86c9fSJeff Roberson if ((cg->cg_mask & (1 << j)) != 0) { 1032d2ad694cSJeff Roberson if (tdg->tdg_mask == 0) 1033d2ad694cSJeff Roberson tdg->tdg_mask = 1 << j; 1034d2ad694cSJeff Roberson tdq_cpu[j].tdq_transferable = 0; 1035d2ad694cSJeff Roberson tdq_cpu[j].tdq_group = tdg; 1036d2ad694cSJeff Roberson LIST_INSERT_HEAD(&tdg->tdg_members, 1037d2ad694cSJeff Roberson &tdq_cpu[j], tdq_siblings); 103880f86c9fSJeff Roberson } 103980f86c9fSJeff Roberson } 1040d2ad694cSJeff Roberson if (tdg->tdg_cpus > 1) 1041cac77d04SJeff Roberson balance_groups = 1; 1042749d01b0SJeff Roberson } 1043d2ad694cSJeff Roberson tdg_maxid = smp_topology->ct_count - 1; 1044749d01b0SJeff Roberson } 1045cac77d04SJeff Roberson /* 1046cac77d04SJeff Roberson * Stagger the group and global load balancer so they do not 1047cac77d04SJeff Roberson * interfere with each other. 1048cac77d04SJeff Roberson */ 1049dc03363dSJeff Roberson bal_tick = ticks + hz; 1050cac77d04SJeff Roberson if (balance_groups) 1051dc03363dSJeff Roberson gbal_tick = ticks + (hz / 2); 1052749d01b0SJeff Roberson #else 1053ad1e7d28SJulian Elischer tdq_setup(TDQ_SELF()); 1054356500a3SJeff Roberson #endif 1055749d01b0SJeff Roberson mtx_lock_spin(&sched_lock); 1056ad1e7d28SJulian Elischer tdq_load_add(TDQ_SELF(), &td_sched0); 1057749d01b0SJeff Roberson mtx_unlock_spin(&sched_lock); 105835e6168fSJeff Roberson } 105935e6168fSJeff Roberson 1060a1d4fe69SDavid Xu /* ARGSUSED */ 1061a1d4fe69SDavid Xu static void 1062a1d4fe69SDavid Xu sched_initticks(void *dummy) 1063a1d4fe69SDavid Xu { 1064a1d4fe69SDavid Xu mtx_lock_spin(&sched_lock); 1065a1d4fe69SDavid Xu realstathz = stathz ? stathz : hz; 1066e7d50326SJeff Roberson sched_slice = (realstathz/7); /* ~140ms */ 1067a1d4fe69SDavid Xu 1068a1d4fe69SDavid Xu /* 1069e7d50326SJeff Roberson * tickincr is shifted out by 10 to avoid rounding errors due to 10703f872f85SJeff Roberson * hz not being evenly divisible by stathz on all platforms. 1071e7d50326SJeff Roberson */ 1072e7d50326SJeff Roberson tickincr = (hz << SCHED_TICK_SHIFT) / realstathz; 1073e7d50326SJeff Roberson /* 1074e7d50326SJeff Roberson * This does not work for values of stathz that are more than 1075e7d50326SJeff Roberson * 1 << SCHED_TICK_SHIFT * hz. In practice this does not happen. 1076a1d4fe69SDavid Xu */ 1077a1d4fe69SDavid Xu if (tickincr == 0) 1078a1d4fe69SDavid Xu tickincr = 1; 10797b8bfa0dSJeff Roberson #ifdef SMP 10807b8bfa0dSJeff Roberson affinity = SCHED_AFFINITY_DEFAULT; 10817b8bfa0dSJeff Roberson #endif 1082a1d4fe69SDavid Xu mtx_unlock_spin(&sched_lock); 1083a1d4fe69SDavid Xu } 1084a1d4fe69SDavid Xu 1085a1d4fe69SDavid Xu 108635e6168fSJeff Roberson /* 108735e6168fSJeff Roberson * Scale the scheduling priority according to the "interactivity" of this 108835e6168fSJeff Roberson * process. 108935e6168fSJeff Roberson */ 109015dc847eSJeff Roberson static void 10918460a577SJohn Birrell sched_priority(struct thread *td) 109235e6168fSJeff Roberson { 1093e7d50326SJeff Roberson int score; 109435e6168fSJeff Roberson int pri; 109535e6168fSJeff Roberson 10968460a577SJohn Birrell if (td->td_pri_class != PRI_TIMESHARE) 109715dc847eSJeff Roberson return; 1098e7d50326SJeff Roberson /* 1099e7d50326SJeff Roberson * If the score is interactive we place the thread in the realtime 1100e7d50326SJeff Roberson * queue with a priority that is less than kernel and interrupt 1101e7d50326SJeff Roberson * priorities. These threads are not subject to nice restrictions. 1102e7d50326SJeff Roberson * 1103e7d50326SJeff Roberson * Scores greater than this are placed on the normal realtime queue 1104e7d50326SJeff Roberson * where the priority is partially decided by the most recent cpu 1105e7d50326SJeff Roberson * utilization and the rest is decided by nice value. 1106e7d50326SJeff Roberson */ 1107e7d50326SJeff Roberson score = sched_interact_score(td); 1108e7d50326SJeff Roberson if (score < sched_interact) { 1109e7d50326SJeff Roberson pri = PRI_MIN_REALTIME; 1110e7d50326SJeff Roberson pri += ((PRI_MAX_REALTIME - PRI_MIN_REALTIME) / sched_interact) 1111e7d50326SJeff Roberson * score; 1112e7d50326SJeff Roberson KASSERT(pri >= PRI_MIN_REALTIME && pri <= PRI_MAX_REALTIME, 1113e7d50326SJeff Roberson ("sched_priority: invalid interactive priority %d", pri)); 1114e7d50326SJeff Roberson } else { 1115e7d50326SJeff Roberson pri = SCHED_PRI_MIN; 1116e7d50326SJeff Roberson if (td->td_sched->ts_ticks) 1117e7d50326SJeff Roberson pri += SCHED_PRI_TICKS(td->td_sched); 1118e7d50326SJeff Roberson pri += SCHED_PRI_NICE(td->td_proc->p_nice); 11198ab80cf0SJeff Roberson if (!(pri >= PRI_MIN_TIMESHARE && pri <= PRI_MAX_TIMESHARE)) { 11208ab80cf0SJeff Roberson static int once = 1; 11218ab80cf0SJeff Roberson if (once) { 11228ab80cf0SJeff Roberson printf("sched_priority: invalid priority %d", 11238ab80cf0SJeff Roberson pri); 11248ab80cf0SJeff Roberson printf("nice %d, ticks %d ftick %d ltick %d tick pri %d\n", 11258ab80cf0SJeff Roberson td->td_proc->p_nice, 11268ab80cf0SJeff Roberson td->td_sched->ts_ticks, 11278ab80cf0SJeff Roberson td->td_sched->ts_ftick, 11288ab80cf0SJeff Roberson td->td_sched->ts_ltick, 11298ab80cf0SJeff Roberson SCHED_PRI_TICKS(td->td_sched)); 11308ab80cf0SJeff Roberson once = 0; 11318ab80cf0SJeff Roberson } 11328ab80cf0SJeff Roberson pri = min(max(pri, PRI_MIN_TIMESHARE), 11338ab80cf0SJeff Roberson PRI_MAX_TIMESHARE); 11348ab80cf0SJeff Roberson } 1135e7d50326SJeff Roberson } 11368460a577SJohn Birrell sched_user_prio(td, pri); 113735e6168fSJeff Roberson 113815dc847eSJeff Roberson return; 113935e6168fSJeff Roberson } 114035e6168fSJeff Roberson 114135e6168fSJeff Roberson /* 1142d322132cSJeff Roberson * This routine enforces a maximum limit on the amount of scheduling history 1143d322132cSJeff Roberson * kept. It is called after either the slptime or runtime is adjusted. 1144d322132cSJeff Roberson */ 11454b60e324SJeff Roberson static void 11468460a577SJohn Birrell sched_interact_update(struct thread *td) 11474b60e324SJeff Roberson { 1148155b6ca1SJeff Roberson struct td_sched *ts; 1149d322132cSJeff Roberson int sum; 11503f741ca1SJeff Roberson 1151155b6ca1SJeff Roberson ts = td->td_sched; 1152155b6ca1SJeff Roberson sum = ts->skg_runtime + ts->skg_slptime; 1153d322132cSJeff Roberson if (sum < SCHED_SLP_RUN_MAX) 1154d322132cSJeff Roberson return; 1155d322132cSJeff Roberson /* 1156155b6ca1SJeff Roberson * This only happens from two places: 1157155b6ca1SJeff Roberson * 1) We have added an unusual amount of run time from fork_exit. 1158155b6ca1SJeff Roberson * 2) We have added an unusual amount of sleep time from sched_sleep(). 1159155b6ca1SJeff Roberson */ 1160155b6ca1SJeff Roberson if (sum > SCHED_SLP_RUN_MAX * 2) { 1161155b6ca1SJeff Roberson if (ts->skg_runtime > ts->skg_slptime) { 1162155b6ca1SJeff Roberson ts->skg_runtime = SCHED_SLP_RUN_MAX; 1163155b6ca1SJeff Roberson ts->skg_slptime = 1; 1164155b6ca1SJeff Roberson } else { 1165155b6ca1SJeff Roberson ts->skg_slptime = SCHED_SLP_RUN_MAX; 1166155b6ca1SJeff Roberson ts->skg_runtime = 1; 1167155b6ca1SJeff Roberson } 1168155b6ca1SJeff Roberson return; 1169155b6ca1SJeff Roberson } 1170155b6ca1SJeff Roberson /* 1171d322132cSJeff Roberson * If we have exceeded by more than 1/5th then the algorithm below 1172d322132cSJeff Roberson * will not bring us back into range. Dividing by two here forces 11732454aaf5SJeff Roberson * us into the range of [4/5 * SCHED_INTERACT_MAX, SCHED_INTERACT_MAX] 1174d322132cSJeff Roberson */ 117537a35e4aSJeff Roberson if (sum > (SCHED_SLP_RUN_MAX / 5) * 6) { 1176155b6ca1SJeff Roberson ts->skg_runtime /= 2; 1177155b6ca1SJeff Roberson ts->skg_slptime /= 2; 1178d322132cSJeff Roberson return; 1179d322132cSJeff Roberson } 1180155b6ca1SJeff Roberson ts->skg_runtime = (ts->skg_runtime / 5) * 4; 1181155b6ca1SJeff Roberson ts->skg_slptime = (ts->skg_slptime / 5) * 4; 1182d322132cSJeff Roberson } 1183d322132cSJeff Roberson 1184d322132cSJeff Roberson static void 11858460a577SJohn Birrell sched_interact_fork(struct thread *td) 1186d322132cSJeff Roberson { 1187d322132cSJeff Roberson int ratio; 1188d322132cSJeff Roberson int sum; 1189d322132cSJeff Roberson 11908460a577SJohn Birrell sum = td->td_sched->skg_runtime + td->td_sched->skg_slptime; 1191d322132cSJeff Roberson if (sum > SCHED_SLP_RUN_FORK) { 1192d322132cSJeff Roberson ratio = sum / SCHED_SLP_RUN_FORK; 11938460a577SJohn Birrell td->td_sched->skg_runtime /= ratio; 11948460a577SJohn Birrell td->td_sched->skg_slptime /= ratio; 11954b60e324SJeff Roberson } 11964b60e324SJeff Roberson } 11974b60e324SJeff Roberson 1198e1f89c22SJeff Roberson static int 11998460a577SJohn Birrell sched_interact_score(struct thread *td) 1200e1f89c22SJeff Roberson { 1201210491d3SJeff Roberson int div; 1202e1f89c22SJeff Roberson 12038460a577SJohn Birrell if (td->td_sched->skg_runtime > td->td_sched->skg_slptime) { 12048460a577SJohn Birrell div = max(1, td->td_sched->skg_runtime / SCHED_INTERACT_HALF); 1205210491d3SJeff Roberson return (SCHED_INTERACT_HALF + 12068460a577SJohn Birrell (SCHED_INTERACT_HALF - (td->td_sched->skg_slptime / div))); 12078460a577SJohn Birrell } if (td->td_sched->skg_slptime > td->td_sched->skg_runtime) { 12088460a577SJohn Birrell div = max(1, td->td_sched->skg_slptime / SCHED_INTERACT_HALF); 12098460a577SJohn Birrell return (td->td_sched->skg_runtime / div); 1210e1f89c22SJeff Roberson } 1211e1f89c22SJeff Roberson 1212210491d3SJeff Roberson /* 1213210491d3SJeff Roberson * This can happen if slptime and runtime are 0. 1214210491d3SJeff Roberson */ 1215210491d3SJeff Roberson return (0); 1216e1f89c22SJeff Roberson 1217e1f89c22SJeff Roberson } 1218e1f89c22SJeff Roberson 121915dc847eSJeff Roberson /* 1220e7d50326SJeff Roberson * Called from proc0_init() to bootstrap the scheduler. 1221ed062c8dSJulian Elischer */ 1222ed062c8dSJulian Elischer void 1223ed062c8dSJulian Elischer schedinit(void) 1224ed062c8dSJulian Elischer { 1225e7d50326SJeff Roberson 1226ed062c8dSJulian Elischer /* 1227ed062c8dSJulian Elischer * Set up the scheduler specific parts of proc0. 1228ed062c8dSJulian Elischer */ 1229ed062c8dSJulian Elischer proc0.p_sched = NULL; /* XXX */ 1230ad1e7d28SJulian Elischer thread0.td_sched = &td_sched0; 1231e7d50326SJeff Roberson td_sched0.ts_ltick = ticks; 12328ab80cf0SJeff Roberson td_sched0.ts_ftick = ticks; 1233ad1e7d28SJulian Elischer td_sched0.ts_thread = &thread0; 1234ad1e7d28SJulian Elischer td_sched0.ts_state = TSS_THREAD; 1235ed062c8dSJulian Elischer } 1236ed062c8dSJulian Elischer 1237ed062c8dSJulian Elischer /* 123815dc847eSJeff Roberson * This is only somewhat accurate since given many processes of the same 123915dc847eSJeff Roberson * priority they will switch when their slices run out, which will be 1240e7d50326SJeff Roberson * at most sched_slice stathz ticks. 124115dc847eSJeff Roberson */ 124235e6168fSJeff Roberson int 124335e6168fSJeff Roberson sched_rr_interval(void) 124435e6168fSJeff Roberson { 1245e7d50326SJeff Roberson 1246e7d50326SJeff Roberson /* Convert sched_slice to hz */ 1247e7d50326SJeff Roberson return (hz/(realstathz/sched_slice)); 124835e6168fSJeff Roberson } 124935e6168fSJeff Roberson 125022bf7d9aSJeff Roberson static void 1251ad1e7d28SJulian Elischer sched_pctcpu_update(struct td_sched *ts) 125235e6168fSJeff Roberson { 1253e7d50326SJeff Roberson 1254e7d50326SJeff Roberson if (ts->ts_ticks == 0) 1255e7d50326SJeff Roberson return; 12568ab80cf0SJeff Roberson if (ticks - (hz / 10) < ts->ts_ltick && 12578ab80cf0SJeff Roberson SCHED_TICK_TOTAL(ts) < SCHED_TICK_MAX) 12588ab80cf0SJeff Roberson return; 125935e6168fSJeff Roberson /* 126035e6168fSJeff Roberson * Adjust counters and watermark for pctcpu calc. 1261210491d3SJeff Roberson */ 1262e7d50326SJeff Roberson if (ts->ts_ltick > ticks - SCHED_TICK_TARG) 1263ad1e7d28SJulian Elischer ts->ts_ticks = (ts->ts_ticks / (ticks - ts->ts_ftick)) * 1264e7d50326SJeff Roberson SCHED_TICK_TARG; 1265e7d50326SJeff Roberson else 1266ad1e7d28SJulian Elischer ts->ts_ticks = 0; 1267ad1e7d28SJulian Elischer ts->ts_ltick = ticks; 1268e7d50326SJeff Roberson ts->ts_ftick = ts->ts_ltick - SCHED_TICK_TARG; 126935e6168fSJeff Roberson } 127035e6168fSJeff Roberson 1271e7d50326SJeff Roberson static void 1272f5c157d9SJohn Baldwin sched_thread_priority(struct thread *td, u_char prio) 127335e6168fSJeff Roberson { 1274ad1e7d28SJulian Elischer struct td_sched *ts; 127535e6168fSJeff Roberson 127681d47d3fSJeff Roberson CTR6(KTR_SCHED, "sched_prio: %p(%s) prio %d newprio %d by %p(%s)", 127781d47d3fSJeff Roberson td, td->td_proc->p_comm, td->td_priority, prio, curthread, 127881d47d3fSJeff Roberson curthread->td_proc->p_comm); 1279ad1e7d28SJulian Elischer ts = td->td_sched; 128035e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1281f5c157d9SJohn Baldwin if (td->td_priority == prio) 1282f5c157d9SJohn Baldwin return; 1283e7d50326SJeff Roberson 12843f872f85SJeff Roberson if (TD_ON_RUNQ(td) && prio < td->td_priority) { 12853f741ca1SJeff Roberson /* 12863f741ca1SJeff Roberson * If the priority has been elevated due to priority 12873f741ca1SJeff Roberson * propagation, we may have to move ourselves to a new 1288e7d50326SJeff Roberson * queue. This could be optimized to not re-add in some 1289e7d50326SJeff Roberson * cases. 1290f2b74cbfSJeff Roberson */ 1291e7d50326SJeff Roberson sched_rem(td); 1292e7d50326SJeff Roberson td->td_priority = prio; 1293e7d50326SJeff Roberson sched_add(td, SRQ_BORROWING); 12943f741ca1SJeff Roberson } else 12953f741ca1SJeff Roberson td->td_priority = prio; 129635e6168fSJeff Roberson } 129735e6168fSJeff Roberson 1298f5c157d9SJohn Baldwin /* 1299f5c157d9SJohn Baldwin * Update a thread's priority when it is lent another thread's 1300f5c157d9SJohn Baldwin * priority. 1301f5c157d9SJohn Baldwin */ 1302f5c157d9SJohn Baldwin void 1303f5c157d9SJohn Baldwin sched_lend_prio(struct thread *td, u_char prio) 1304f5c157d9SJohn Baldwin { 1305f5c157d9SJohn Baldwin 1306f5c157d9SJohn Baldwin td->td_flags |= TDF_BORROWING; 1307f5c157d9SJohn Baldwin sched_thread_priority(td, prio); 1308f5c157d9SJohn Baldwin } 1309f5c157d9SJohn Baldwin 1310f5c157d9SJohn Baldwin /* 1311f5c157d9SJohn Baldwin * Restore a thread's priority when priority propagation is 1312f5c157d9SJohn Baldwin * over. The prio argument is the minimum priority the thread 1313f5c157d9SJohn Baldwin * needs to have to satisfy other possible priority lending 1314f5c157d9SJohn Baldwin * requests. If the thread's regular priority is less 1315f5c157d9SJohn Baldwin * important than prio, the thread will keep a priority boost 1316f5c157d9SJohn Baldwin * of prio. 1317f5c157d9SJohn Baldwin */ 1318f5c157d9SJohn Baldwin void 1319f5c157d9SJohn Baldwin sched_unlend_prio(struct thread *td, u_char prio) 1320f5c157d9SJohn Baldwin { 1321f5c157d9SJohn Baldwin u_char base_pri; 1322f5c157d9SJohn Baldwin 1323f5c157d9SJohn Baldwin if (td->td_base_pri >= PRI_MIN_TIMESHARE && 1324f5c157d9SJohn Baldwin td->td_base_pri <= PRI_MAX_TIMESHARE) 13258460a577SJohn Birrell base_pri = td->td_user_pri; 1326f5c157d9SJohn Baldwin else 1327f5c157d9SJohn Baldwin base_pri = td->td_base_pri; 1328f5c157d9SJohn Baldwin if (prio >= base_pri) { 1329f5c157d9SJohn Baldwin td->td_flags &= ~TDF_BORROWING; 1330f5c157d9SJohn Baldwin sched_thread_priority(td, base_pri); 1331f5c157d9SJohn Baldwin } else 1332f5c157d9SJohn Baldwin sched_lend_prio(td, prio); 1333f5c157d9SJohn Baldwin } 1334f5c157d9SJohn Baldwin 1335f5c157d9SJohn Baldwin void 1336f5c157d9SJohn Baldwin sched_prio(struct thread *td, u_char prio) 1337f5c157d9SJohn Baldwin { 1338f5c157d9SJohn Baldwin u_char oldprio; 1339f5c157d9SJohn Baldwin 1340f5c157d9SJohn Baldwin /* First, update the base priority. */ 1341f5c157d9SJohn Baldwin td->td_base_pri = prio; 1342f5c157d9SJohn Baldwin 1343f5c157d9SJohn Baldwin /* 134450aaa791SJohn Baldwin * If the thread is borrowing another thread's priority, don't 1345f5c157d9SJohn Baldwin * ever lower the priority. 1346f5c157d9SJohn Baldwin */ 1347f5c157d9SJohn Baldwin if (td->td_flags & TDF_BORROWING && td->td_priority < prio) 1348f5c157d9SJohn Baldwin return; 1349f5c157d9SJohn Baldwin 1350f5c157d9SJohn Baldwin /* Change the real priority. */ 1351f5c157d9SJohn Baldwin oldprio = td->td_priority; 1352f5c157d9SJohn Baldwin sched_thread_priority(td, prio); 1353f5c157d9SJohn Baldwin 1354f5c157d9SJohn Baldwin /* 1355f5c157d9SJohn Baldwin * If the thread is on a turnstile, then let the turnstile update 1356f5c157d9SJohn Baldwin * its state. 1357f5c157d9SJohn Baldwin */ 1358f5c157d9SJohn Baldwin if (TD_ON_LOCK(td) && oldprio != prio) 1359f5c157d9SJohn Baldwin turnstile_adjust(td, oldprio); 1360f5c157d9SJohn Baldwin } 1361f5c157d9SJohn Baldwin 136235e6168fSJeff Roberson void 13638460a577SJohn Birrell sched_user_prio(struct thread *td, u_char prio) 13643db720fdSDavid Xu { 13653db720fdSDavid Xu u_char oldprio; 13663db720fdSDavid Xu 13678460a577SJohn Birrell td->td_base_user_pri = prio; 1368fc6c30f6SJulian Elischer if (td->td_flags & TDF_UBORROWING && td->td_user_pri <= prio) 1369fc6c30f6SJulian Elischer return; 13708460a577SJohn Birrell oldprio = td->td_user_pri; 13718460a577SJohn Birrell td->td_user_pri = prio; 13723db720fdSDavid Xu 13733db720fdSDavid Xu if (TD_ON_UPILOCK(td) && oldprio != prio) 13743db720fdSDavid Xu umtx_pi_adjust(td, oldprio); 13753db720fdSDavid Xu } 13763db720fdSDavid Xu 13773db720fdSDavid Xu void 13783db720fdSDavid Xu sched_lend_user_prio(struct thread *td, u_char prio) 13793db720fdSDavid Xu { 13803db720fdSDavid Xu u_char oldprio; 13813db720fdSDavid Xu 13823db720fdSDavid Xu td->td_flags |= TDF_UBORROWING; 13833db720fdSDavid Xu 1384f645b5daSMaxim Konovalov oldprio = td->td_user_pri; 13858460a577SJohn Birrell td->td_user_pri = prio; 13863db720fdSDavid Xu 13873db720fdSDavid Xu if (TD_ON_UPILOCK(td) && oldprio != prio) 13883db720fdSDavid Xu umtx_pi_adjust(td, oldprio); 13893db720fdSDavid Xu } 13903db720fdSDavid Xu 13913db720fdSDavid Xu void 13923db720fdSDavid Xu sched_unlend_user_prio(struct thread *td, u_char prio) 13933db720fdSDavid Xu { 13943db720fdSDavid Xu u_char base_pri; 13953db720fdSDavid Xu 13968460a577SJohn Birrell base_pri = td->td_base_user_pri; 13973db720fdSDavid Xu if (prio >= base_pri) { 13983db720fdSDavid Xu td->td_flags &= ~TDF_UBORROWING; 13998460a577SJohn Birrell sched_user_prio(td, base_pri); 14003db720fdSDavid Xu } else 14013db720fdSDavid Xu sched_lend_user_prio(td, prio); 14023db720fdSDavid Xu } 14033db720fdSDavid Xu 14043db720fdSDavid Xu void 14053389af30SJulian Elischer sched_switch(struct thread *td, struct thread *newtd, int flags) 140635e6168fSJeff Roberson { 1407c02bbb43SJeff Roberson struct tdq *tdq; 1408ad1e7d28SJulian Elischer struct td_sched *ts; 14097b8bfa0dSJeff Roberson int preempt; 141035e6168fSJeff Roberson 141135e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 141235e6168fSJeff Roberson 14137b8bfa0dSJeff Roberson preempt = flags & SW_PREEMPT; 1414c02bbb43SJeff Roberson tdq = TDQ_SELF(); 1415e7d50326SJeff Roberson ts = td->td_sched; 1416060563ecSJulian Elischer td->td_lastcpu = td->td_oncpu; 1417060563ecSJulian Elischer td->td_oncpu = NOCPU; 141852eb8464SJohn Baldwin td->td_flags &= ~TDF_NEEDRESCHED; 141977918643SStephan Uphoff td->td_owepreempt = 0; 1420b11fdad0SJeff Roberson /* 1421ad1e7d28SJulian Elischer * If the thread has been assigned it may be in the process of switching 1422b11fdad0SJeff Roberson * to the new cpu. This is the case in sched_bind(). 1423b11fdad0SJeff Roberson */ 14242454aaf5SJeff Roberson if (td == PCPU_GET(idlethread)) { 1425bf0acc27SJohn Baldwin TD_SET_CAN_RUN(td); 14267b8bfa0dSJeff Roberson } else { 1427c02bbb43SJeff Roberson tdq_load_rem(tdq, ts); 1428ed062c8dSJulian Elischer if (TD_IS_RUNNING(td)) { 1429f2b74cbfSJeff Roberson /* 1430ed062c8dSJulian Elischer * Don't allow the thread to migrate 1431ed062c8dSJulian Elischer * from a preemption. 1432f2b74cbfSJeff Roberson */ 14337b8bfa0dSJeff Roberson if (preempt) 14341e516cf5SJeff Roberson sched_pin_td(td); 14357b8bfa0dSJeff Roberson setrunqueue(td, preempt ? 1436598b368dSJeff Roberson SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED : 1437598b368dSJeff Roberson SRQ_OURSELF|SRQ_YIELDING); 14387b8bfa0dSJeff Roberson if (preempt) 14391e516cf5SJeff Roberson sched_unpin_td(td); 14408460a577SJohn Birrell } 1441ed062c8dSJulian Elischer } 1442d39063f2SJulian Elischer if (newtd != NULL) { 1443c20c691bSJulian Elischer /* 14446680bbd5SJeff Roberson * If we bring in a thread account for it as if it had been 14456680bbd5SJeff Roberson * added to the run queue and then chosen. 1446c20c691bSJulian Elischer */ 1447ad1e7d28SJulian Elischer newtd->td_sched->ts_flags |= TSF_DIDRUN; 1448c20c691bSJulian Elischer TD_SET_RUNNING(newtd); 1449ad1e7d28SJulian Elischer tdq_load_add(TDQ_SELF(), newtd->td_sched); 1450d39063f2SJulian Elischer } else 14512454aaf5SJeff Roberson newtd = choosethread(); 1452ebccf1e3SJoseph Koshy if (td != newtd) { 1453ebccf1e3SJoseph Koshy #ifdef HWPMC_HOOKS 1454ebccf1e3SJoseph Koshy if (PMC_PROC_IS_USING_PMCS(td->td_proc)) 1455ebccf1e3SJoseph Koshy PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT); 1456ebccf1e3SJoseph Koshy #endif 14578460a577SJohn Birrell 1458ae53b483SJeff Roberson cpu_switch(td, newtd); 1459ebccf1e3SJoseph Koshy #ifdef HWPMC_HOOKS 1460ebccf1e3SJoseph Koshy if (PMC_PROC_IS_USING_PMCS(td->td_proc)) 1461ebccf1e3SJoseph Koshy PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN); 1462ebccf1e3SJoseph Koshy #endif 1463ebccf1e3SJoseph Koshy } 1464ae53b483SJeff Roberson sched_lock.mtx_lock = (uintptr_t)td; 1465060563ecSJulian Elischer td->td_oncpu = PCPU_GET(cpuid); 146635e6168fSJeff Roberson } 146735e6168fSJeff Roberson 146835e6168fSJeff Roberson void 1469fa885116SJulian Elischer sched_nice(struct proc *p, int nice) 147035e6168fSJeff Roberson { 147135e6168fSJeff Roberson struct thread *td; 147235e6168fSJeff Roberson 1473fa885116SJulian Elischer PROC_LOCK_ASSERT(p, MA_OWNED); 14740b5318c8SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 1475e7d50326SJeff Roberson 1476fa885116SJulian Elischer p->p_nice = nice; 14778460a577SJohn Birrell FOREACH_THREAD_IN_PROC(p, td) { 14788460a577SJohn Birrell sched_priority(td); 1479e7d50326SJeff Roberson sched_prio(td, td->td_base_user_pri); 148035e6168fSJeff Roberson } 1481fa885116SJulian Elischer } 148235e6168fSJeff Roberson 148335e6168fSJeff Roberson void 148444f3b092SJohn Baldwin sched_sleep(struct thread *td) 148535e6168fSJeff Roberson { 1486e7d50326SJeff Roberson 148735e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 148835e6168fSJeff Roberson 1489ad1e7d28SJulian Elischer td->td_sched->ts_slptime = ticks; 149035e6168fSJeff Roberson } 149135e6168fSJeff Roberson 149235e6168fSJeff Roberson void 149335e6168fSJeff Roberson sched_wakeup(struct thread *td) 149435e6168fSJeff Roberson { 1495e7d50326SJeff Roberson int slptime; 1496e7d50326SJeff Roberson 149735e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 149835e6168fSJeff Roberson 149935e6168fSJeff Roberson /* 1500e7d50326SJeff Roberson * If we slept for more than a tick update our interactivity and 1501e7d50326SJeff Roberson * priority. 150235e6168fSJeff Roberson */ 1503e7d50326SJeff Roberson slptime = td->td_sched->ts_slptime; 1504e7d50326SJeff Roberson td->td_sched->ts_slptime = 0; 1505e7d50326SJeff Roberson if (slptime && slptime != ticks) { 150615dc847eSJeff Roberson int hzticks; 1507f1e8dc4aSJeff Roberson 1508e7d50326SJeff Roberson hzticks = (ticks - slptime) << SCHED_TICK_SHIFT; 15098460a577SJohn Birrell td->td_sched->skg_slptime += hzticks; 15108460a577SJohn Birrell sched_interact_update(td); 1511e7d50326SJeff Roberson sched_pctcpu_update(td->td_sched); 15128460a577SJohn Birrell sched_priority(td); 1513f1e8dc4aSJeff Roberson } 15142630e4c9SJulian Elischer setrunqueue(td, SRQ_BORING); 151535e6168fSJeff Roberson } 151635e6168fSJeff Roberson 151735e6168fSJeff Roberson /* 151835e6168fSJeff Roberson * Penalize the parent for creating a new child and initialize the child's 151935e6168fSJeff Roberson * priority. 152035e6168fSJeff Roberson */ 152135e6168fSJeff Roberson void 15228460a577SJohn Birrell sched_fork(struct thread *td, struct thread *child) 152315dc847eSJeff Roberson { 15248460a577SJohn Birrell mtx_assert(&sched_lock, MA_OWNED); 1525ad1e7d28SJulian Elischer sched_fork_thread(td, child); 1526e7d50326SJeff Roberson /* 1527e7d50326SJeff Roberson * Penalize the parent and child for forking. 1528e7d50326SJeff Roberson */ 1529e7d50326SJeff Roberson sched_interact_fork(child); 1530e7d50326SJeff Roberson sched_priority(child); 1531e7d50326SJeff Roberson td->td_sched->skg_runtime += tickincr; 1532e7d50326SJeff Roberson sched_interact_update(td); 1533e7d50326SJeff Roberson sched_priority(td); 1534ad1e7d28SJulian Elischer } 1535ad1e7d28SJulian Elischer 1536ad1e7d28SJulian Elischer void 1537ad1e7d28SJulian Elischer sched_fork_thread(struct thread *td, struct thread *child) 1538ad1e7d28SJulian Elischer { 1539ad1e7d28SJulian Elischer struct td_sched *ts; 1540ad1e7d28SJulian Elischer struct td_sched *ts2; 15418460a577SJohn Birrell 1542e7d50326SJeff Roberson /* 1543e7d50326SJeff Roberson * Initialize child. 1544e7d50326SJeff Roberson */ 1545ed062c8dSJulian Elischer sched_newthread(child); 1546ad1e7d28SJulian Elischer ts = td->td_sched; 1547ad1e7d28SJulian Elischer ts2 = child->td_sched; 1548ad1e7d28SJulian Elischer ts2->ts_cpu = ts->ts_cpu; 1549ad1e7d28SJulian Elischer ts2->ts_runq = NULL; 1550e7d50326SJeff Roberson /* 1551e7d50326SJeff Roberson * Grab our parents cpu estimation information and priority. 1552e7d50326SJeff Roberson */ 1553ad1e7d28SJulian Elischer ts2->ts_ticks = ts->ts_ticks; 1554ad1e7d28SJulian Elischer ts2->ts_ltick = ts->ts_ltick; 1555ad1e7d28SJulian Elischer ts2->ts_ftick = ts->ts_ftick; 1556e7d50326SJeff Roberson child->td_user_pri = td->td_user_pri; 1557e7d50326SJeff Roberson child->td_base_user_pri = td->td_base_user_pri; 1558e7d50326SJeff Roberson /* 1559e7d50326SJeff Roberson * And update interactivity score. 1560e7d50326SJeff Roberson */ 1561e7d50326SJeff Roberson ts2->skg_slptime = ts->skg_slptime; 1562e7d50326SJeff Roberson ts2->skg_runtime = ts->skg_runtime; 1563e7d50326SJeff Roberson ts2->ts_slice = 1; /* Attempt to quickly learn interactivity. */ 156415dc847eSJeff Roberson } 156515dc847eSJeff Roberson 156615dc847eSJeff Roberson void 15678460a577SJohn Birrell sched_class(struct thread *td, int class) 156815dc847eSJeff Roberson { 156915dc847eSJeff Roberson 15702056d0a1SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 15718460a577SJohn Birrell if (td->td_pri_class == class) 157215dc847eSJeff Roberson return; 157315dc847eSJeff Roberson 1574ef1134c9SJeff Roberson #ifdef SMP 1575155b9987SJeff Roberson /* 1576155b9987SJeff Roberson * On SMP if we're on the RUNQ we must adjust the transferable 1577155b9987SJeff Roberson * count because could be changing to or from an interrupt 1578155b9987SJeff Roberson * class. 1579155b9987SJeff Roberson */ 15801e516cf5SJeff Roberson if (td->td_sched->ts_state == TSS_ONRUNQ) { 15811e516cf5SJeff Roberson struct tdq *tdq; 15821e516cf5SJeff Roberson 15831e516cf5SJeff Roberson tdq = TDQ_CPU(td->td_sched->ts_cpu); 15841e516cf5SJeff Roberson if (THREAD_CAN_MIGRATE(td)) { 1585d2ad694cSJeff Roberson tdq->tdq_transferable--; 1586d2ad694cSJeff Roberson tdq->tdq_group->tdg_transferable--; 158780f86c9fSJeff Roberson } 15881e516cf5SJeff Roberson td->td_pri_class = class; 15891e516cf5SJeff Roberson if (THREAD_CAN_MIGRATE(td)) { 1590d2ad694cSJeff Roberson tdq->tdq_transferable++; 1591d2ad694cSJeff Roberson tdq->tdq_group->tdg_transferable++; 159280f86c9fSJeff Roberson } 1593155b9987SJeff Roberson } 1594ef1134c9SJeff Roberson #endif 15958460a577SJohn Birrell td->td_pri_class = class; 159635e6168fSJeff Roberson } 159735e6168fSJeff Roberson 159835e6168fSJeff Roberson /* 159935e6168fSJeff Roberson * Return some of the child's priority and interactivity to the parent. 160035e6168fSJeff Roberson */ 160135e6168fSJeff Roberson void 1602fc6c30f6SJulian Elischer sched_exit(struct proc *p, struct thread *child) 160335e6168fSJeff Roberson { 1604e7d50326SJeff Roberson struct thread *td; 1605141ad61cSJeff Roberson 16068460a577SJohn Birrell CTR3(KTR_SCHED, "sched_exit: %p(%s) prio %d", 1607fc6c30f6SJulian Elischer child, child->td_proc->p_comm, child->td_priority); 16088460a577SJohn Birrell 1609e7d50326SJeff Roberson td = FIRST_THREAD_IN_PROC(p); 1610e7d50326SJeff Roberson sched_exit_thread(td, child); 1611ad1e7d28SJulian Elischer } 1612ad1e7d28SJulian Elischer 1613ad1e7d28SJulian Elischer void 1614fc6c30f6SJulian Elischer sched_exit_thread(struct thread *td, struct thread *child) 1615ad1e7d28SJulian Elischer { 1616fc6c30f6SJulian Elischer 1617e7d50326SJeff Roberson CTR3(KTR_SCHED, "sched_exit_thread: %p(%s) prio %d", 1618e7d50326SJeff Roberson child, child->td_proc->p_comm, child->td_priority); 1619e7d50326SJeff Roberson 1620e7d50326SJeff Roberson tdq_load_rem(TDQ_CPU(child->td_sched->ts_cpu), child->td_sched); 1621e7d50326SJeff Roberson #ifdef KSE 1622e7d50326SJeff Roberson /* 1623e7d50326SJeff Roberson * KSE forks and exits so often that this penalty causes short-lived 1624e7d50326SJeff Roberson * threads to always be non-interactive. This causes mozilla to 1625e7d50326SJeff Roberson * crawl under load. 1626e7d50326SJeff Roberson */ 1627e7d50326SJeff Roberson if ((td->td_pflags & TDP_SA) && td->td_proc == child->td_proc) 1628e7d50326SJeff Roberson return; 1629e7d50326SJeff Roberson #endif 1630e7d50326SJeff Roberson /* 1631e7d50326SJeff Roberson * Give the child's runtime to the parent without returning the 1632e7d50326SJeff Roberson * sleep time as a penalty to the parent. This causes shells that 1633e7d50326SJeff Roberson * launch expensive things to mark their children as expensive. 1634e7d50326SJeff Roberson */ 1635fc6c30f6SJulian Elischer td->td_sched->skg_runtime += child->td_sched->skg_runtime; 1636fc6c30f6SJulian Elischer sched_interact_update(td); 1637e7d50326SJeff Roberson sched_priority(td); 1638ad1e7d28SJulian Elischer } 1639ad1e7d28SJulian Elischer 1640ad1e7d28SJulian Elischer void 1641ad1e7d28SJulian Elischer sched_userret(struct thread *td) 1642ad1e7d28SJulian Elischer { 1643ad1e7d28SJulian Elischer /* 1644ad1e7d28SJulian Elischer * XXX we cheat slightly on the locking here to avoid locking in 1645ad1e7d28SJulian Elischer * the usual case. Setting td_priority here is essentially an 1646ad1e7d28SJulian Elischer * incomplete workaround for not setting it properly elsewhere. 1647ad1e7d28SJulian Elischer * Now that some interrupt handlers are threads, not setting it 1648ad1e7d28SJulian Elischer * properly elsewhere can clobber it in the window between setting 1649ad1e7d28SJulian Elischer * it here and returning to user mode, so don't waste time setting 1650ad1e7d28SJulian Elischer * it perfectly here. 1651ad1e7d28SJulian Elischer */ 1652ad1e7d28SJulian Elischer KASSERT((td->td_flags & TDF_BORROWING) == 0, 1653ad1e7d28SJulian Elischer ("thread with borrowed priority returning to userland")); 1654ad1e7d28SJulian Elischer if (td->td_priority != td->td_user_pri) { 1655ad1e7d28SJulian Elischer mtx_lock_spin(&sched_lock); 1656ad1e7d28SJulian Elischer td->td_priority = td->td_user_pri; 1657ad1e7d28SJulian Elischer td->td_base_pri = td->td_user_pri; 1658ad1e7d28SJulian Elischer mtx_unlock_spin(&sched_lock); 1659ad1e7d28SJulian Elischer } 166035e6168fSJeff Roberson } 166135e6168fSJeff Roberson 166235e6168fSJeff Roberson void 16637cf90fb3SJeff Roberson sched_clock(struct thread *td) 166435e6168fSJeff Roberson { 1665ad1e7d28SJulian Elischer struct tdq *tdq; 1666ad1e7d28SJulian Elischer struct td_sched *ts; 166735e6168fSJeff Roberson 1668dc03363dSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1669dc03363dSJeff Roberson #ifdef SMP 16707b8bfa0dSJeff Roberson sched_smp_tick(td); 1671dc03363dSJeff Roberson #endif 16723f872f85SJeff Roberson tdq = TDQ_SELF(); 16733f872f85SJeff Roberson /* 16743f872f85SJeff Roberson * Advance the insert index once for each tick to ensure that all 16753f872f85SJeff Roberson * threads get a chance to run. 16763f872f85SJeff Roberson */ 16773f872f85SJeff Roberson if (tdq->tdq_idx == tdq->tdq_ridx) { 16783f872f85SJeff Roberson tdq->tdq_idx = (tdq->tdq_idx + 1) % RQ_NQS; 16793f872f85SJeff Roberson if (TAILQ_EMPTY(&tdq->tdq_timeshare.rq_queues[tdq->tdq_ridx])) 16803f872f85SJeff Roberson tdq->tdq_ridx = tdq->tdq_idx; 16813f872f85SJeff Roberson } 16820a016a05SJeff Roberson /* Adjust ticks for pctcpu */ 16833f872f85SJeff Roberson ts = td->td_sched; 1684e7d50326SJeff Roberson ts->ts_ticks += tickincr; 1685ad1e7d28SJulian Elischer ts->ts_ltick = ticks; 1686e7d50326SJeff Roberson /* 1687e7d50326SJeff Roberson * Update if we've exceeded our desired tick threshhold by over one 1688e7d50326SJeff Roberson * second. 1689e7d50326SJeff Roberson */ 16908ab80cf0SJeff Roberson if (ts->ts_ftick + SCHED_TICK_MAX < ts->ts_ltick) 1691ad1e7d28SJulian Elischer sched_pctcpu_update(ts); 16923f741ca1SJeff Roberson /* 16938460a577SJohn Birrell * We only do slicing code for TIMESHARE threads. 1694a8949de2SJeff Roberson */ 16958460a577SJohn Birrell if (td->td_pri_class != PRI_TIMESHARE) 1696a8949de2SJeff Roberson return; 1697a8949de2SJeff Roberson /* 16983f872f85SJeff Roberson * We used a tick; charge it to the thread so that we can compute our 169915dc847eSJeff Roberson * interactivity. 170015dc847eSJeff Roberson */ 17018460a577SJohn Birrell td->td_sched->skg_runtime += tickincr; 17028460a577SJohn Birrell sched_interact_update(td); 170335e6168fSJeff Roberson /* 170435e6168fSJeff Roberson * We used up one time slice. 170535e6168fSJeff Roberson */ 1706ad1e7d28SJulian Elischer if (--ts->ts_slice > 0) 170715dc847eSJeff Roberson return; 170835e6168fSJeff Roberson /* 170915dc847eSJeff Roberson * We're out of time, recompute priorities and requeue. 171035e6168fSJeff Roberson */ 17118460a577SJohn Birrell sched_priority(td); 17124a338afdSJulian Elischer td->td_flags |= TDF_NEEDRESCHED; 171335e6168fSJeff Roberson } 171435e6168fSJeff Roberson 171535e6168fSJeff Roberson int 171635e6168fSJeff Roberson sched_runnable(void) 171735e6168fSJeff Roberson { 1718ad1e7d28SJulian Elischer struct tdq *tdq; 1719b90816f1SJeff Roberson int load; 172035e6168fSJeff Roberson 1721b90816f1SJeff Roberson load = 1; 1722b90816f1SJeff Roberson 1723ad1e7d28SJulian Elischer tdq = TDQ_SELF(); 172422bf7d9aSJeff Roberson #ifdef SMP 17257b8bfa0dSJeff Roberson if (tdq_busy) 17267b8bfa0dSJeff Roberson goto out; 172722bf7d9aSJeff Roberson #endif 17283f741ca1SJeff Roberson if ((curthread->td_flags & TDF_IDLETD) != 0) { 1729d2ad694cSJeff Roberson if (tdq->tdq_load > 0) 17303f741ca1SJeff Roberson goto out; 17313f741ca1SJeff Roberson } else 1732d2ad694cSJeff Roberson if (tdq->tdq_load - 1 > 0) 1733b90816f1SJeff Roberson goto out; 1734b90816f1SJeff Roberson load = 0; 1735b90816f1SJeff Roberson out: 1736b90816f1SJeff Roberson return (load); 173735e6168fSJeff Roberson } 173835e6168fSJeff Roberson 1739ad1e7d28SJulian Elischer struct td_sched * 1740c9f25d8fSJeff Roberson sched_choose(void) 1741c9f25d8fSJeff Roberson { 1742ad1e7d28SJulian Elischer struct tdq *tdq; 1743ad1e7d28SJulian Elischer struct td_sched *ts; 174415dc847eSJeff Roberson 1745b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1746ad1e7d28SJulian Elischer tdq = TDQ_SELF(); 174715dc847eSJeff Roberson #ifdef SMP 174880f86c9fSJeff Roberson restart: 174915dc847eSJeff Roberson #endif 1750ad1e7d28SJulian Elischer ts = tdq_choose(tdq); 1751ad1e7d28SJulian Elischer if (ts) { 175222bf7d9aSJeff Roberson #ifdef SMP 1753155b6ca1SJeff Roberson if (ts->ts_thread->td_priority > PRI_MIN_IDLE) 1754ad1e7d28SJulian Elischer if (tdq_idled(tdq) == 0) 175580f86c9fSJeff Roberson goto restart; 175622bf7d9aSJeff Roberson #endif 1757ad1e7d28SJulian Elischer tdq_runq_rem(tdq, ts); 1758ad1e7d28SJulian Elischer ts->ts_state = TSS_THREAD; 1759ad1e7d28SJulian Elischer return (ts); 176035e6168fSJeff Roberson } 1761c9f25d8fSJeff Roberson #ifdef SMP 1762ad1e7d28SJulian Elischer if (tdq_idled(tdq) == 0) 176380f86c9fSJeff Roberson goto restart; 1764c9f25d8fSJeff Roberson #endif 176515dc847eSJeff Roberson return (NULL); 176635e6168fSJeff Roberson } 176735e6168fSJeff Roberson 176835e6168fSJeff Roberson void 17692630e4c9SJulian Elischer sched_add(struct thread *td, int flags) 177035e6168fSJeff Roberson { 1771ad1e7d28SJulian Elischer struct tdq *tdq; 1772ad1e7d28SJulian Elischer struct td_sched *ts; 1773598b368dSJeff Roberson int preemptive; 177422bf7d9aSJeff Roberson int class; 17757b8bfa0dSJeff Roberson #ifdef SMP 17767b8bfa0dSJeff Roberson int cpuid; 17777b8bfa0dSJeff Roberson int cpumask; 17787b8bfa0dSJeff Roberson #endif 1779c9f25d8fSJeff Roberson 178081d47d3fSJeff Roberson CTR5(KTR_SCHED, "sched_add: %p(%s) prio %d by %p(%s)", 178181d47d3fSJeff Roberson td, td->td_proc->p_comm, td->td_priority, curthread, 178281d47d3fSJeff Roberson curthread->td_proc->p_comm); 178322bf7d9aSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1784ad1e7d28SJulian Elischer tdq = TDQ_SELF(); 1785e7d50326SJeff Roberson ts = td->td_sched; 1786e7d50326SJeff Roberson class = PRI_BASE(td->td_pri_class); 1787e7d50326SJeff Roberson preemptive = !(flags & SRQ_YIELDING); 1788ad1e7d28SJulian Elischer KASSERT(ts->ts_state != TSS_ONRUNQ, 1789ad1e7d28SJulian Elischer ("sched_add: thread %p (%s) already in run queue", td, 17908460a577SJohn Birrell td->td_proc->p_comm)); 17918460a577SJohn Birrell KASSERT(td->td_proc->p_sflag & PS_INMEM, 17925d7ef00cSJeff Roberson ("sched_add: process swapped out")); 1793ad1e7d28SJulian Elischer KASSERT(ts->ts_runq == NULL, 1794ad1e7d28SJulian Elischer ("sched_add: thread %p is still assigned to a run queue", td)); 179515dc847eSJeff Roberson /* 17967b8bfa0dSJeff Roberson * Recalculate the priority before we select the target cpu or 17977b8bfa0dSJeff Roberson * run-queue. 179815dc847eSJeff Roberson */ 17998ab80cf0SJeff Roberson if (class == PRI_TIMESHARE) 18008ab80cf0SJeff Roberson sched_priority(td); 180122bf7d9aSJeff Roberson #ifdef SMP 18027b8bfa0dSJeff Roberson cpuid = PCPU_GET(cpuid); 18032454aaf5SJeff Roberson /* 18047b8bfa0dSJeff Roberson * Pick the destination cpu and if it isn't ours transfer to the 18057b8bfa0dSJeff Roberson * target cpu. 18062454aaf5SJeff Roberson */ 18077b8bfa0dSJeff Roberson if (THREAD_CAN_MIGRATE(td)) { 18087b8bfa0dSJeff Roberson if (td->td_priority <= PRI_MAX_ITHD) { 18097b8bfa0dSJeff Roberson CTR2(KTR_SCHED, "ithd %d < %d", td->td_priority, PRI_MAX_ITHD); 18107b8bfa0dSJeff Roberson ts->ts_cpu = cpuid; 181180f86c9fSJeff Roberson } 18127b8bfa0dSJeff Roberson if (pick_pri) 18137b8bfa0dSJeff Roberson ts->ts_cpu = tdq_pickpri(tdq, ts, flags); 18147b8bfa0dSJeff Roberson else 18157b8bfa0dSJeff Roberson ts->ts_cpu = tdq_pickidle(tdq, ts); 18167b8bfa0dSJeff Roberson } else 18177b8bfa0dSJeff Roberson CTR1(KTR_SCHED, "pinned %d", td->td_pinned); 18187b8bfa0dSJeff Roberson if (ts->ts_cpu != cpuid) 18197b8bfa0dSJeff Roberson preemptive = 0; 18207b8bfa0dSJeff Roberson tdq = TDQ_CPU(ts->ts_cpu); 18217b8bfa0dSJeff Roberson cpumask = 1 << ts->ts_cpu; 182222bf7d9aSJeff Roberson /* 1823670c524fSJeff Roberson * If we had been idle, clear our bit in the group and potentially 18247b8bfa0dSJeff Roberson * the global bitmap. 182522bf7d9aSJeff Roberson */ 1826e7d50326SJeff Roberson if ((class != PRI_IDLE && class != PRI_ITHD) && 18277b8bfa0dSJeff Roberson (tdq->tdq_group->tdg_idlemask & cpumask) != 0) { 182880f86c9fSJeff Roberson /* 182980f86c9fSJeff Roberson * Check to see if our group is unidling, and if so, remove it 183080f86c9fSJeff Roberson * from the global idle mask. 183180f86c9fSJeff Roberson */ 1832d2ad694cSJeff Roberson if (tdq->tdq_group->tdg_idlemask == 1833d2ad694cSJeff Roberson tdq->tdq_group->tdg_cpumask) 1834d2ad694cSJeff Roberson atomic_clear_int(&tdq_idle, tdq->tdq_group->tdg_mask); 183580f86c9fSJeff Roberson /* 183680f86c9fSJeff Roberson * Now remove ourselves from the group specific idle mask. 183780f86c9fSJeff Roberson */ 18387b8bfa0dSJeff Roberson tdq->tdq_group->tdg_idlemask &= ~cpumask; 18397b8bfa0dSJeff Roberson } 184022bf7d9aSJeff Roberson #endif 18417b8bfa0dSJeff Roberson /* 18427b8bfa0dSJeff Roberson * Set the slice and pick the run queue. 18437b8bfa0dSJeff Roberson */ 18447b8bfa0dSJeff Roberson if (ts->ts_slice == 0) 18457b8bfa0dSJeff Roberson ts->ts_slice = sched_slice; 18467b8bfa0dSJeff Roberson if (td->td_priority <= PRI_MAX_REALTIME) 18477b8bfa0dSJeff Roberson ts->ts_runq = &tdq->tdq_realtime; 18487b8bfa0dSJeff Roberson else if (td->td_priority <= PRI_MAX_TIMESHARE) 18497b8bfa0dSJeff Roberson ts->ts_runq = &tdq->tdq_timeshare; 18507b8bfa0dSJeff Roberson else 18517b8bfa0dSJeff Roberson ts->ts_runq = &tdq->tdq_idle; 185263fcce68SJohn Baldwin if (preemptive && maybe_preempt(td)) 18530c0b25aeSJohn Baldwin return; 1854ad1e7d28SJulian Elischer ts->ts_state = TSS_ONRUNQ; 185535e6168fSJeff Roberson 1856ad1e7d28SJulian Elischer tdq_runq_add(tdq, ts, flags); 1857ad1e7d28SJulian Elischer tdq_load_add(tdq, ts); 18587b8bfa0dSJeff Roberson #ifdef SMP 18597b8bfa0dSJeff Roberson if (ts->ts_cpu != cpuid) { 18607b8bfa0dSJeff Roberson tdq_notify(ts); 18617b8bfa0dSJeff Roberson return; 18627b8bfa0dSJeff Roberson } 18637b8bfa0dSJeff Roberson #endif 18647b8bfa0dSJeff Roberson if (td->td_priority < curthread->td_priority) 18657b8bfa0dSJeff Roberson curthread->td_flags |= TDF_NEEDRESCHED; 186635e6168fSJeff Roberson } 186735e6168fSJeff Roberson 186835e6168fSJeff Roberson void 18697cf90fb3SJeff Roberson sched_rem(struct thread *td) 187035e6168fSJeff Roberson { 1871ad1e7d28SJulian Elischer struct tdq *tdq; 1872ad1e7d28SJulian Elischer struct td_sched *ts; 18737cf90fb3SJeff Roberson 187481d47d3fSJeff Roberson CTR5(KTR_SCHED, "sched_rem: %p(%s) prio %d by %p(%s)", 187581d47d3fSJeff Roberson td, td->td_proc->p_comm, td->td_priority, curthread, 187681d47d3fSJeff Roberson curthread->td_proc->p_comm); 1877598b368dSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1878ad1e7d28SJulian Elischer ts = td->td_sched; 1879ad1e7d28SJulian Elischer KASSERT((ts->ts_state == TSS_ONRUNQ), 1880ad1e7d28SJulian Elischer ("sched_rem: thread not on run queue")); 188135e6168fSJeff Roberson 1882ad1e7d28SJulian Elischer ts->ts_state = TSS_THREAD; 1883ad1e7d28SJulian Elischer tdq = TDQ_CPU(ts->ts_cpu); 1884ad1e7d28SJulian Elischer tdq_runq_rem(tdq, ts); 1885ad1e7d28SJulian Elischer tdq_load_rem(tdq, ts); 188635e6168fSJeff Roberson } 188735e6168fSJeff Roberson 188835e6168fSJeff Roberson fixpt_t 18897cf90fb3SJeff Roberson sched_pctcpu(struct thread *td) 189035e6168fSJeff Roberson { 189135e6168fSJeff Roberson fixpt_t pctcpu; 1892ad1e7d28SJulian Elischer struct td_sched *ts; 189335e6168fSJeff Roberson 189435e6168fSJeff Roberson pctcpu = 0; 1895ad1e7d28SJulian Elischer ts = td->td_sched; 1896ad1e7d28SJulian Elischer if (ts == NULL) 1897484288deSJeff Roberson return (0); 189835e6168fSJeff Roberson 1899b90816f1SJeff Roberson mtx_lock_spin(&sched_lock); 1900ad1e7d28SJulian Elischer if (ts->ts_ticks) { 190135e6168fSJeff Roberson int rtick; 190235e6168fSJeff Roberson 1903ad1e7d28SJulian Elischer sched_pctcpu_update(ts); 190435e6168fSJeff Roberson /* How many rtick per second ? */ 1905e7d50326SJeff Roberson rtick = min(SCHED_TICK_HZ(ts) / SCHED_TICK_SECS, hz); 1906e7d50326SJeff Roberson pctcpu = (FSCALE * ((FSCALE * rtick)/hz)) >> FSHIFT; 190735e6168fSJeff Roberson } 1908ad1e7d28SJulian Elischer td->td_proc->p_swtime = ts->ts_ltick - ts->ts_ftick; 1909828e7683SJohn Baldwin mtx_unlock_spin(&sched_lock); 191035e6168fSJeff Roberson 191135e6168fSJeff Roberson return (pctcpu); 191235e6168fSJeff Roberson } 191335e6168fSJeff Roberson 19149bacd788SJeff Roberson void 19159bacd788SJeff Roberson sched_bind(struct thread *td, int cpu) 19169bacd788SJeff Roberson { 1917ad1e7d28SJulian Elischer struct td_sched *ts; 19189bacd788SJeff Roberson 19199bacd788SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1920ad1e7d28SJulian Elischer ts = td->td_sched; 19216b2f763fSJeff Roberson if (ts->ts_flags & TSF_BOUND) 1922c95d2db2SJeff Roberson sched_unbind(td); 1923ad1e7d28SJulian Elischer ts->ts_flags |= TSF_BOUND; 192480f86c9fSJeff Roberson #ifdef SMP 19256b2f763fSJeff Roberson sched_pin(); 192680f86c9fSJeff Roberson if (PCPU_GET(cpuid) == cpu) 19279bacd788SJeff Roberson return; 19286b2f763fSJeff Roberson ts->ts_cpu = cpu; 1929ad1e7d28SJulian Elischer ts->ts_state = TSS_THREAD; 19309bacd788SJeff Roberson /* When we return from mi_switch we'll be on the correct cpu. */ 1931279f949eSPoul-Henning Kamp mi_switch(SW_VOL, NULL); 19329bacd788SJeff Roberson #endif 19339bacd788SJeff Roberson } 19349bacd788SJeff Roberson 19359bacd788SJeff Roberson void 19369bacd788SJeff Roberson sched_unbind(struct thread *td) 19379bacd788SJeff Roberson { 1938e7d50326SJeff Roberson struct td_sched *ts; 1939e7d50326SJeff Roberson 19409bacd788SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1941e7d50326SJeff Roberson ts = td->td_sched; 19426b2f763fSJeff Roberson if ((ts->ts_flags & TSF_BOUND) == 0) 19436b2f763fSJeff Roberson return; 1944e7d50326SJeff Roberson ts->ts_flags &= ~TSF_BOUND; 1945e7d50326SJeff Roberson #ifdef SMP 1946e7d50326SJeff Roberson sched_unpin(); 1947e7d50326SJeff Roberson #endif 19489bacd788SJeff Roberson } 19499bacd788SJeff Roberson 195035e6168fSJeff Roberson int 1951ebccf1e3SJoseph Koshy sched_is_bound(struct thread *td) 1952ebccf1e3SJoseph Koshy { 1953ebccf1e3SJoseph Koshy mtx_assert(&sched_lock, MA_OWNED); 1954ad1e7d28SJulian Elischer return (td->td_sched->ts_flags & TSF_BOUND); 1955ebccf1e3SJoseph Koshy } 1956ebccf1e3SJoseph Koshy 195736ec198bSDavid Xu void 195836ec198bSDavid Xu sched_relinquish(struct thread *td) 195936ec198bSDavid Xu { 196036ec198bSDavid Xu mtx_lock_spin(&sched_lock); 19618460a577SJohn Birrell if (td->td_pri_class == PRI_TIMESHARE) 196236ec198bSDavid Xu sched_prio(td, PRI_MAX_TIMESHARE); 196336ec198bSDavid Xu mi_switch(SW_VOL, NULL); 196436ec198bSDavid Xu mtx_unlock_spin(&sched_lock); 196536ec198bSDavid Xu } 196636ec198bSDavid Xu 1967ebccf1e3SJoseph Koshy int 196833916c36SJeff Roberson sched_load(void) 196933916c36SJeff Roberson { 197033916c36SJeff Roberson #ifdef SMP 197133916c36SJeff Roberson int total; 197233916c36SJeff Roberson int i; 197333916c36SJeff Roberson 197433916c36SJeff Roberson total = 0; 1975d2ad694cSJeff Roberson for (i = 0; i <= tdg_maxid; i++) 1976d2ad694cSJeff Roberson total += TDQ_GROUP(i)->tdg_load; 197733916c36SJeff Roberson return (total); 197833916c36SJeff Roberson #else 1979d2ad694cSJeff Roberson return (TDQ_SELF()->tdq_sysload); 198033916c36SJeff Roberson #endif 198133916c36SJeff Roberson } 198233916c36SJeff Roberson 198333916c36SJeff Roberson int 198435e6168fSJeff Roberson sched_sizeof_proc(void) 198535e6168fSJeff Roberson { 198635e6168fSJeff Roberson return (sizeof(struct proc)); 198735e6168fSJeff Roberson } 198835e6168fSJeff Roberson 198935e6168fSJeff Roberson int 199035e6168fSJeff Roberson sched_sizeof_thread(void) 199135e6168fSJeff Roberson { 199235e6168fSJeff Roberson return (sizeof(struct thread) + sizeof(struct td_sched)); 199335e6168fSJeff Roberson } 1994b41f1452SDavid Xu 1995b41f1452SDavid Xu void 1996b41f1452SDavid Xu sched_tick(void) 1997b41f1452SDavid Xu { 1998b41f1452SDavid Xu } 1999e7d50326SJeff Roberson 2000e7d50326SJeff Roberson static SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0, "Scheduler"); 2001e7d50326SJeff Roberson SYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "ule", 0, 2002e7d50326SJeff Roberson "Scheduler name"); 2003e7d50326SJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, slice, CTLFLAG_RW, &sched_slice, 0, ""); 2004e7d50326SJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, interact, CTLFLAG_RW, &sched_interact, 0, ""); 2005e7d50326SJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, tickincr, CTLFLAG_RD, &tickincr, 0, ""); 2006e7d50326SJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, realstathz, CTLFLAG_RD, &realstathz, 0, ""); 20077b8bfa0dSJeff Roberson #ifdef SMP 20087b8bfa0dSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, pick_pri, CTLFLAG_RW, &pick_pri, 0, ""); 20097b8bfa0dSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, pick_pri_affinity, CTLFLAG_RW, 20107b8bfa0dSJeff Roberson &affinity, 0, ""); 20117b8bfa0dSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, pick_pri_tryself, CTLFLAG_RW, 20127b8bfa0dSJeff Roberson &tryself, 0, ""); 20137b8bfa0dSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, pick_pri_tryselfidle, CTLFLAG_RW, 20147b8bfa0dSJeff Roberson &tryselfidle, 0, ""); 20157b8bfa0dSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, balance, CTLFLAG_RW, &rebalance, 0, ""); 20167b8bfa0dSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, ipi_preempt, CTLFLAG_RW, &ipi_preempt, 0, ""); 20177b8bfa0dSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, ipi_ast, CTLFLAG_RW, &ipi_ast, 0, ""); 20187b8bfa0dSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, ipi_thresh, CTLFLAG_RW, &ipi_thresh, 0, ""); 20197b8bfa0dSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, steal_htt, CTLFLAG_RW, &steal_htt, 0, ""); 20207b8bfa0dSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, steal_busy, CTLFLAG_RW, &steal_busy, 0, ""); 20217b8bfa0dSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, busy_thresh, CTLFLAG_RW, &busy_thresh, 0, ""); 20227b8bfa0dSJeff Roberson #endif 2023e7d50326SJeff Roberson 2024e7d50326SJeff Roberson /* ps compat */ 2025e7d50326SJeff Roberson static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 2026e7d50326SJeff Roberson SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 2027e7d50326SJeff Roberson 2028e7d50326SJeff Roberson 2029ed062c8dSJulian Elischer #define KERN_SWITCH_INCLUDE 1 2030ed062c8dSJulian Elischer #include "kern/kern_switch.c" 2031