135e6168fSJeff Roberson /*- 2e7d50326SJeff Roberson * Copyright (c) 2002-2007, Jeffrey Roberson <jeff@freebsd.org> 335e6168fSJeff Roberson * All rights reserved. 435e6168fSJeff Roberson * 535e6168fSJeff Roberson * Redistribution and use in source and binary forms, with or without 635e6168fSJeff Roberson * modification, are permitted provided that the following conditions 735e6168fSJeff Roberson * are met: 835e6168fSJeff Roberson * 1. Redistributions of source code must retain the above copyright 935e6168fSJeff Roberson * notice unmodified, this list of conditions, and the following 1035e6168fSJeff Roberson * disclaimer. 1135e6168fSJeff Roberson * 2. Redistributions in binary form must reproduce the above copyright 1235e6168fSJeff Roberson * notice, this list of conditions and the following disclaimer in the 1335e6168fSJeff Roberson * documentation and/or other materials provided with the distribution. 1435e6168fSJeff Roberson * 1535e6168fSJeff Roberson * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 1635e6168fSJeff Roberson * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1735e6168fSJeff Roberson * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 1835e6168fSJeff Roberson * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 1935e6168fSJeff Roberson * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2035e6168fSJeff Roberson * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2135e6168fSJeff Roberson * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2235e6168fSJeff Roberson * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2335e6168fSJeff Roberson * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2435e6168fSJeff Roberson * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2535e6168fSJeff Roberson */ 2635e6168fSJeff Roberson 27677b542eSDavid E. O'Brien #include <sys/cdefs.h> 28677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 29677b542eSDavid E. O'Brien 304da0d332SPeter Wemm #include "opt_hwpmc_hooks.h" 314da0d332SPeter Wemm #include "opt_sched.h" 329923b511SScott Long 3335e6168fSJeff Roberson #include <sys/param.h> 3435e6168fSJeff Roberson #include <sys/systm.h> 352c3490b1SMarcel Moolenaar #include <sys/kdb.h> 3635e6168fSJeff Roberson #include <sys/kernel.h> 3735e6168fSJeff Roberson #include <sys/ktr.h> 3835e6168fSJeff Roberson #include <sys/lock.h> 3935e6168fSJeff Roberson #include <sys/mutex.h> 4035e6168fSJeff Roberson #include <sys/proc.h> 41245f3abfSJeff Roberson #include <sys/resource.h> 429bacd788SJeff Roberson #include <sys/resourcevar.h> 4335e6168fSJeff Roberson #include <sys/sched.h> 4435e6168fSJeff Roberson #include <sys/smp.h> 4535e6168fSJeff Roberson #include <sys/sx.h> 4635e6168fSJeff Roberson #include <sys/sysctl.h> 4735e6168fSJeff Roberson #include <sys/sysproto.h> 48f5c157d9SJohn Baldwin #include <sys/turnstile.h> 493db720fdSDavid Xu #include <sys/umtx.h> 5035e6168fSJeff Roberson #include <sys/vmmeter.h> 5135e6168fSJeff Roberson #ifdef KTRACE 5235e6168fSJeff Roberson #include <sys/uio.h> 5335e6168fSJeff Roberson #include <sys/ktrace.h> 5435e6168fSJeff Roberson #endif 5535e6168fSJeff Roberson 56ebccf1e3SJoseph Koshy #ifdef HWPMC_HOOKS 57ebccf1e3SJoseph Koshy #include <sys/pmckern.h> 58ebccf1e3SJoseph Koshy #endif 59ebccf1e3SJoseph Koshy 6035e6168fSJeff Roberson #include <machine/cpu.h> 6122bf7d9aSJeff Roberson #include <machine/smp.h> 6235e6168fSJeff Roberson 6335e6168fSJeff Roberson /* 64ad1e7d28SJulian Elischer * Thread scheduler specific section. 65ed062c8dSJulian Elischer */ 66ad1e7d28SJulian Elischer struct td_sched { 67ad1e7d28SJulian Elischer TAILQ_ENTRY(td_sched) ts_procq; /* (j/z) Run queue. */ 68ad1e7d28SJulian Elischer int ts_flags; /* (j) TSF_* flags. */ 69ad1e7d28SJulian Elischer struct thread *ts_thread; /* (*) Active associated thread. */ 70ad1e7d28SJulian Elischer fixpt_t ts_pctcpu; /* (j) %cpu during p_swtime. */ 71ad1e7d28SJulian Elischer u_char ts_rqindex; /* (j) Run queue index. */ 72ed062c8dSJulian Elischer enum { 73e7d50326SJeff Roberson TSS_THREAD, 74ad1e7d28SJulian Elischer TSS_ONRUNQ 75ad1e7d28SJulian Elischer } ts_state; /* (j) thread sched specific status. */ 76ad1e7d28SJulian Elischer int ts_slptime; 77ad1e7d28SJulian Elischer int ts_slice; 78ad1e7d28SJulian Elischer struct runq *ts_runq; 79ad1e7d28SJulian Elischer u_char ts_cpu; /* CPU that we have affinity for. */ 80ed062c8dSJulian Elischer /* The following variables are only used for pctcpu calculation */ 81ad1e7d28SJulian Elischer int ts_ltick; /* Last tick that we were running on */ 82ad1e7d28SJulian Elischer int ts_ftick; /* First tick that we were running on */ 83ad1e7d28SJulian Elischer int ts_ticks; /* Tick count */ 84ed062c8dSJulian Elischer 858460a577SJohn Birrell /* originally from kg_sched */ 868460a577SJohn Birrell int skg_slptime; /* Number of ticks we vol. slept */ 878460a577SJohn Birrell int skg_runtime; /* Number of ticks we were running */ 88ed062c8dSJulian Elischer }; 89ad1e7d28SJulian Elischer #define ts_assign ts_procq.tqe_next 90ad1e7d28SJulian Elischer /* flags kept in ts_flags */ 91ad1e7d28SJulian Elischer #define TSF_ASSIGNED 0x0001 /* Thread is being migrated. */ 92ad1e7d28SJulian Elischer #define TSF_BOUND 0x0002 /* Thread can not migrate. */ 93ad1e7d28SJulian Elischer #define TSF_XFERABLE 0x0004 /* Thread was added as transferable. */ 94ad1e7d28SJulian Elischer #define TSF_HOLD 0x0008 /* Thread is temporarily bound. */ 95ad1e7d28SJulian Elischer #define TSF_REMOVED 0x0010 /* Thread was removed while ASSIGNED */ 96ad1e7d28SJulian Elischer #define TSF_INTERNAL 0x0020 /* Thread added due to migration. */ 97d2ad694cSJeff Roberson #define TSF_DIDRUN 0x2000 /* Thread actually ran. */ 98d2ad694cSJeff Roberson #define TSF_EXIT 0x4000 /* Thread is being killed. */ 9935e6168fSJeff Roberson 100ad1e7d28SJulian Elischer static struct td_sched td_sched0; 10135e6168fSJeff Roberson 10235e6168fSJeff Roberson /* 103e7d50326SJeff Roberson * Cpu percentage computation macros and defines. 104e1f89c22SJeff Roberson * 105e7d50326SJeff Roberson * SCHED_TICK_SECS: Number of seconds to average the cpu usage across. 106e7d50326SJeff Roberson * SCHED_TICK_TARG: Number of hz ticks to average the cpu usage across. 1078ab80cf0SJeff Roberson * SCHED_TICK_MAX: Maximum number of ticks before scaling back. 108e7d50326SJeff Roberson * SCHED_TICK_SHIFT: Shift factor to avoid rounding away results. 109e7d50326SJeff Roberson * SCHED_TICK_HZ: Compute the number of hz ticks for a given ticks count. 110e7d50326SJeff Roberson * SCHED_TICK_TOTAL: Gives the amount of time we've been recording ticks. 11135e6168fSJeff Roberson */ 112e7d50326SJeff Roberson #define SCHED_TICK_SECS 10 113e7d50326SJeff Roberson #define SCHED_TICK_TARG (hz * SCHED_TICK_SECS) 1148ab80cf0SJeff Roberson #define SCHED_TICK_MAX (SCHED_TICK_TARG + hz) 115e7d50326SJeff Roberson #define SCHED_TICK_SHIFT 10 116e7d50326SJeff Roberson #define SCHED_TICK_HZ(ts) ((ts)->ts_ticks >> SCHED_TICK_SHIFT) 117e7d50326SJeff Roberson #define SCHED_TICK_TOTAL(ts) ((ts)->ts_ltick - (ts)->ts_ftick) 11835e6168fSJeff Roberson 11935e6168fSJeff Roberson /* 120e7d50326SJeff Roberson * These macros determine priorities for non-interactive threads. They are 121e7d50326SJeff Roberson * assigned a priority based on their recent cpu utilization as expressed 122e7d50326SJeff Roberson * by the ratio of ticks to the tick total. NHALF priorities at the start 123e7d50326SJeff Roberson * and end of the MIN to MAX timeshare range are only reachable with negative 124e7d50326SJeff Roberson * or positive nice respectively. 125e7d50326SJeff Roberson * 126e7d50326SJeff Roberson * PRI_RANGE: Priority range for utilization dependent priorities. 127e7d50326SJeff Roberson * PRI_NRESV: Number of nice values. 128e7d50326SJeff Roberson * PRI_TICKS: Compute a priority in PRI_RANGE from the ticks count and total. 129e7d50326SJeff Roberson * PRI_NICE: Determines the part of the priority inherited from nice. 130e7d50326SJeff Roberson */ 131e7d50326SJeff Roberson #define SCHED_PRI_NRESV (PRIO_MAX - PRIO_MIN) 132e7d50326SJeff Roberson #define SCHED_PRI_NHALF (SCHED_PRI_NRESV / 2) 133e7d50326SJeff Roberson #define SCHED_PRI_MIN (PRI_MIN_TIMESHARE + SCHED_PRI_NHALF) 134e7d50326SJeff Roberson #define SCHED_PRI_MAX (PRI_MAX_TIMESHARE - SCHED_PRI_NHALF) 135e7d50326SJeff Roberson #define SCHED_PRI_RANGE (SCHED_PRI_MAX - SCHED_PRI_MIN + 1) 136e7d50326SJeff Roberson #define SCHED_PRI_TICKS(ts) \ 137e7d50326SJeff Roberson (SCHED_TICK_HZ((ts)) / \ 138e7d50326SJeff Roberson (max(SCHED_TICK_TOTAL((ts)), SCHED_PRI_RANGE) / SCHED_PRI_RANGE)) 139e7d50326SJeff Roberson #define SCHED_PRI_NICE(nice) (nice) 140e7d50326SJeff Roberson 141e7d50326SJeff Roberson /* 142e7d50326SJeff Roberson * These determine the interactivity of a process. Interactivity differs from 143e7d50326SJeff Roberson * cpu utilization in that it expresses the voluntary time slept vs time ran 144e7d50326SJeff Roberson * while cpu utilization includes all time not running. This more accurately 145e7d50326SJeff Roberson * models the intent of the thread. 14635e6168fSJeff Roberson * 147407b0157SJeff Roberson * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate 148407b0157SJeff Roberson * before throttling back. 149d322132cSJeff Roberson * SLP_RUN_FORK: Maximum slp+run time to inherit at fork time. 150210491d3SJeff Roberson * INTERACT_MAX: Maximum interactivity value. Smaller is better. 151e1f89c22SJeff Roberson * INTERACT_THRESH: Threshhold for placement on the current runq. 15235e6168fSJeff Roberson */ 153e7d50326SJeff Roberson #define SCHED_SLP_RUN_MAX ((hz * 5) << SCHED_TICK_SHIFT) 154e7d50326SJeff Roberson #define SCHED_SLP_RUN_FORK ((hz / 2) << SCHED_TICK_SHIFT) 155210491d3SJeff Roberson #define SCHED_INTERACT_MAX (100) 156210491d3SJeff Roberson #define SCHED_INTERACT_HALF (SCHED_INTERACT_MAX / 2) 1574c9612c6SJeff Roberson #define SCHED_INTERACT_THRESH (30) 158e1f89c22SJeff Roberson 15935e6168fSJeff Roberson /* 160e7d50326SJeff Roberson * tickincr: Converts a stathz tick into a hz domain scaled by 161e7d50326SJeff Roberson * the shift factor. Without the shift the error rate 162e7d50326SJeff Roberson * due to rounding would be unacceptably high. 163e7d50326SJeff Roberson * realstathz: stathz is sometimes 0 and run off of hz. 164e7d50326SJeff Roberson * sched_slice: Runtime of each thread before rescheduling. 16535e6168fSJeff Roberson */ 166e7d50326SJeff Roberson static int sched_interact = SCHED_INTERACT_THRESH; 167e7d50326SJeff Roberson static int realstathz; 168e7d50326SJeff Roberson static int tickincr; 169e7d50326SJeff Roberson static int sched_slice; 170155b6ca1SJeff Roberson static int sched_rebalance; 17135e6168fSJeff Roberson 17235e6168fSJeff Roberson /* 173ad1e7d28SJulian Elischer * tdq - per processor runqs and statistics. 17435e6168fSJeff Roberson */ 175ad1e7d28SJulian Elischer struct tdq { 176d2ad694cSJeff Roberson struct runq tdq_idle; /* Queue of IDLE threads. */ 177e7d50326SJeff Roberson struct runq tdq_timeshare; /* timeshare run queue. */ 178e7d50326SJeff Roberson struct runq tdq_realtime; /* real-time run queue. */ 1793f872f85SJeff Roberson int tdq_idx; /* Current insert index. */ 1803f872f85SJeff Roberson int tdq_ridx; /* Current removal index. */ 181d2ad694cSJeff Roberson int tdq_load_timeshare; /* Load for timeshare. */ 182d2ad694cSJeff Roberson int tdq_load; /* Aggregate load. */ 1835d7ef00cSJeff Roberson #ifdef SMP 184d2ad694cSJeff Roberson int tdq_transferable; 185d2ad694cSJeff Roberson LIST_ENTRY(tdq) tdq_siblings; /* Next in tdq group. */ 186d2ad694cSJeff Roberson struct tdq_group *tdq_group; /* Our processor group. */ 187d2ad694cSJeff Roberson volatile struct td_sched *tdq_assigned; /* assigned by another CPU. */ 18833916c36SJeff Roberson #else 189d2ad694cSJeff Roberson int tdq_sysload; /* For loadavg, !ITHD load. */ 1905d7ef00cSJeff Roberson #endif 19135e6168fSJeff Roberson }; 19235e6168fSJeff Roberson 19380f86c9fSJeff Roberson #ifdef SMP 19480f86c9fSJeff Roberson /* 195ad1e7d28SJulian Elischer * tdq groups are groups of processors which can cheaply share threads. When 19680f86c9fSJeff Roberson * one processor in the group goes idle it will check the runqs of the other 19780f86c9fSJeff Roberson * processors in its group prior to halting and waiting for an interrupt. 19880f86c9fSJeff Roberson * These groups are suitable for SMT (Symetric Multi-Threading) and not NUMA. 19980f86c9fSJeff Roberson * In a numa environment we'd want an idle bitmap per group and a two tiered 20080f86c9fSJeff Roberson * load balancer. 20180f86c9fSJeff Roberson */ 202ad1e7d28SJulian Elischer struct tdq_group { 203d2ad694cSJeff Roberson int tdg_cpus; /* Count of CPUs in this tdq group. */ 204d2ad694cSJeff Roberson cpumask_t tdg_cpumask; /* Mask of cpus in this group. */ 205d2ad694cSJeff Roberson cpumask_t tdg_idlemask; /* Idle cpus in this group. */ 206d2ad694cSJeff Roberson cpumask_t tdg_mask; /* Bit mask for first cpu. */ 207d2ad694cSJeff Roberson int tdg_load; /* Total load of this group. */ 208d2ad694cSJeff Roberson int tdg_transferable; /* Transferable load of this group. */ 209d2ad694cSJeff Roberson LIST_HEAD(, tdq) tdg_members; /* Linked list of all members. */ 21080f86c9fSJeff Roberson }; 21180f86c9fSJeff Roberson #endif 21280f86c9fSJeff Roberson 21335e6168fSJeff Roberson /* 214d2ad694cSJeff Roberson * One thread queue per processor. 21535e6168fSJeff Roberson */ 2160a016a05SJeff Roberson #ifdef SMP 217ad1e7d28SJulian Elischer static cpumask_t tdq_idle; 218d2ad694cSJeff Roberson static int tdg_maxid; 219ad1e7d28SJulian Elischer static struct tdq tdq_cpu[MAXCPU]; 220ad1e7d28SJulian Elischer static struct tdq_group tdq_groups[MAXCPU]; 221dc03363dSJeff Roberson static int bal_tick; 222dc03363dSJeff Roberson static int gbal_tick; 223598b368dSJeff Roberson static int balance_groups; 224dc03363dSJeff Roberson 225ad1e7d28SJulian Elischer #define TDQ_SELF() (&tdq_cpu[PCPU_GET(cpuid)]) 226ad1e7d28SJulian Elischer #define TDQ_CPU(x) (&tdq_cpu[(x)]) 227ad1e7d28SJulian Elischer #define TDQ_ID(x) ((x) - tdq_cpu) 228ad1e7d28SJulian Elischer #define TDQ_GROUP(x) (&tdq_groups[(x)]) 22980f86c9fSJeff Roberson #else /* !SMP */ 230ad1e7d28SJulian Elischer static struct tdq tdq_cpu; 231dc03363dSJeff Roberson 232ad1e7d28SJulian Elischer #define TDQ_SELF() (&tdq_cpu) 233ad1e7d28SJulian Elischer #define TDQ_CPU(x) (&tdq_cpu) 2340a016a05SJeff Roberson #endif 23535e6168fSJeff Roberson 236ad1e7d28SJulian Elischer static struct td_sched *sched_choose(void); /* XXX Should be thread * */ 2378460a577SJohn Birrell static void sched_priority(struct thread *); 23821381d1bSJeff Roberson static void sched_thread_priority(struct thread *, u_char); 2398460a577SJohn Birrell static int sched_interact_score(struct thread *); 2408460a577SJohn Birrell static void sched_interact_update(struct thread *); 2418460a577SJohn Birrell static void sched_interact_fork(struct thread *); 242ad1e7d28SJulian Elischer static void sched_pctcpu_update(struct td_sched *); 24335e6168fSJeff Roberson 2445d7ef00cSJeff Roberson /* Operations on per processor queues */ 245ad1e7d28SJulian Elischer static struct td_sched * tdq_choose(struct tdq *); 246ad1e7d28SJulian Elischer static void tdq_setup(struct tdq *); 247ad1e7d28SJulian Elischer static void tdq_load_add(struct tdq *, struct td_sched *); 248ad1e7d28SJulian Elischer static void tdq_load_rem(struct tdq *, struct td_sched *); 249ad1e7d28SJulian Elischer static __inline void tdq_runq_add(struct tdq *, struct td_sched *, int); 250ad1e7d28SJulian Elischer static __inline void tdq_runq_rem(struct tdq *, struct td_sched *); 251ad1e7d28SJulian Elischer void tdq_print(int cpu); 252e7d50326SJeff Roberson static void runq_print(struct runq *rq); 2535d7ef00cSJeff Roberson #ifdef SMP 254ad1e7d28SJulian Elischer static int tdq_transfer(struct tdq *, struct td_sched *, int); 255ad1e7d28SJulian Elischer static struct td_sched *runq_steal(struct runq *); 256dc03363dSJeff Roberson static void sched_balance(void); 257dc03363dSJeff Roberson static void sched_balance_groups(void); 258ad1e7d28SJulian Elischer static void sched_balance_group(struct tdq_group *); 259ad1e7d28SJulian Elischer static void sched_balance_pair(struct tdq *, struct tdq *); 2603f872f85SJeff Roberson static void sched_smp_tick(void); 261ad1e7d28SJulian Elischer static void tdq_move(struct tdq *, int); 262ad1e7d28SJulian Elischer static int tdq_idled(struct tdq *); 263ad1e7d28SJulian Elischer static void tdq_notify(struct td_sched *, int); 264ad1e7d28SJulian Elischer static void tdq_assign(struct tdq *); 265ad1e7d28SJulian Elischer static struct td_sched *tdq_steal(struct tdq *, int); 266e7d50326SJeff Roberson #define THREAD_CAN_MIGRATE(td) \ 267e7d50326SJeff Roberson ((td)->td_pinned == 0 && (td)->td_pri_class != PRI_ITHD) 2685d7ef00cSJeff Roberson #endif 2695d7ef00cSJeff Roberson 270e7d50326SJeff Roberson static void sched_setup(void *dummy); 271e7d50326SJeff Roberson SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 272e7d50326SJeff Roberson 273e7d50326SJeff Roberson static void sched_initticks(void *dummy); 274e7d50326SJeff Roberson SYSINIT(sched_initticks, SI_SUB_CLOCKS, SI_ORDER_THIRD, sched_initticks, NULL) 275e7d50326SJeff Roberson 276e7d50326SJeff Roberson static void 277e7d50326SJeff Roberson runq_print(struct runq *rq) 278e7d50326SJeff Roberson { 279e7d50326SJeff Roberson struct rqhead *rqh; 280e7d50326SJeff Roberson struct td_sched *ts; 281e7d50326SJeff Roberson int pri; 282e7d50326SJeff Roberson int j; 283e7d50326SJeff Roberson int i; 284e7d50326SJeff Roberson 285e7d50326SJeff Roberson for (i = 0; i < RQB_LEN; i++) { 286e7d50326SJeff Roberson printf("\t\trunq bits %d 0x%zx\n", 287e7d50326SJeff Roberson i, rq->rq_status.rqb_bits[i]); 288e7d50326SJeff Roberson for (j = 0; j < RQB_BPW; j++) 289e7d50326SJeff Roberson if (rq->rq_status.rqb_bits[i] & (1ul << j)) { 290e7d50326SJeff Roberson pri = j + (i << RQB_L2BPW); 291e7d50326SJeff Roberson rqh = &rq->rq_queues[pri]; 292e7d50326SJeff Roberson TAILQ_FOREACH(ts, rqh, ts_procq) { 293e7d50326SJeff Roberson printf("\t\t\ttd %p(%s) priority %d rqindex %d pri %d\n", 294e7d50326SJeff Roberson ts->ts_thread, ts->ts_thread->td_proc->p_comm, ts->ts_thread->td_priority, ts->ts_rqindex, pri); 295e7d50326SJeff Roberson } 296e7d50326SJeff Roberson } 297e7d50326SJeff Roberson } 298e7d50326SJeff Roberson } 299e7d50326SJeff Roberson 30015dc847eSJeff Roberson void 301ad1e7d28SJulian Elischer tdq_print(int cpu) 30215dc847eSJeff Roberson { 303ad1e7d28SJulian Elischer struct tdq *tdq; 30415dc847eSJeff Roberson 305ad1e7d28SJulian Elischer tdq = TDQ_CPU(cpu); 30615dc847eSJeff Roberson 307ad1e7d28SJulian Elischer printf("tdq:\n"); 308d2ad694cSJeff Roberson printf("\tload: %d\n", tdq->tdq_load); 309d2ad694cSJeff Roberson printf("\tload TIMESHARE: %d\n", tdq->tdq_load_timeshare); 310e7d50326SJeff Roberson printf("\ttimeshare idx: %d\n", tdq->tdq_idx); 3113f872f85SJeff Roberson printf("\ttimeshare ridx: %d\n", tdq->tdq_ridx); 312e7d50326SJeff Roberson printf("\trealtime runq:\n"); 313e7d50326SJeff Roberson runq_print(&tdq->tdq_realtime); 314e7d50326SJeff Roberson printf("\ttimeshare runq:\n"); 315e7d50326SJeff Roberson runq_print(&tdq->tdq_timeshare); 316e7d50326SJeff Roberson printf("\tidle runq:\n"); 317e7d50326SJeff Roberson runq_print(&tdq->tdq_idle); 318ef1134c9SJeff Roberson #ifdef SMP 319d2ad694cSJeff Roberson printf("\tload transferable: %d\n", tdq->tdq_transferable); 320ef1134c9SJeff Roberson #endif 32115dc847eSJeff Roberson } 32215dc847eSJeff Roberson 323155b9987SJeff Roberson static __inline void 324ad1e7d28SJulian Elischer tdq_runq_add(struct tdq *tdq, struct td_sched *ts, int flags) 325155b9987SJeff Roberson { 326155b9987SJeff Roberson #ifdef SMP 327e7d50326SJeff Roberson if (THREAD_CAN_MIGRATE(ts->ts_thread)) { 328d2ad694cSJeff Roberson tdq->tdq_transferable++; 329d2ad694cSJeff Roberson tdq->tdq_group->tdg_transferable++; 330ad1e7d28SJulian Elischer ts->ts_flags |= TSF_XFERABLE; 33180f86c9fSJeff Roberson } 332155b9987SJeff Roberson #endif 333e7d50326SJeff Roberson if (ts->ts_runq == &tdq->tdq_timeshare) { 334e7d50326SJeff Roberson int pri; 335e7d50326SJeff Roberson 336e7d50326SJeff Roberson pri = ts->ts_thread->td_priority; 337e7d50326SJeff Roberson KASSERT(pri <= PRI_MAX_TIMESHARE && pri >= PRI_MIN_TIMESHARE, 338e7d50326SJeff Roberson ("Invalid priority %d on timeshare runq", pri)); 339e7d50326SJeff Roberson /* 340e7d50326SJeff Roberson * This queue contains only priorities between MIN and MAX 341e7d50326SJeff Roberson * realtime. Use the whole queue to represent these values. 342e7d50326SJeff Roberson */ 343e7d50326SJeff Roberson #define TS_RQ_PPQ (((PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE) + 1) / RQ_NQS) 344e7d50326SJeff Roberson if ((flags & SRQ_BORROWING) == 0) { 345e7d50326SJeff Roberson pri = (pri - PRI_MIN_TIMESHARE) / TS_RQ_PPQ; 346e7d50326SJeff Roberson pri = (pri + tdq->tdq_idx) % RQ_NQS; 3473f872f85SJeff Roberson /* 3483f872f85SJeff Roberson * This effectively shortens the queue by one so we 3493f872f85SJeff Roberson * can have a one slot difference between idx and 3503f872f85SJeff Roberson * ridx while we wait for threads to drain. 3513f872f85SJeff Roberson */ 3523f872f85SJeff Roberson if (tdq->tdq_ridx != tdq->tdq_idx && 3533f872f85SJeff Roberson pri == tdq->tdq_ridx) 3543f872f85SJeff Roberson pri = (pri - 1) % RQ_NQS; 355e7d50326SJeff Roberson } else 3563f872f85SJeff Roberson pri = tdq->tdq_ridx; 357e7d50326SJeff Roberson runq_add_pri(ts->ts_runq, ts, pri, flags); 358e7d50326SJeff Roberson } else 359ad1e7d28SJulian Elischer runq_add(ts->ts_runq, ts, flags); 360155b9987SJeff Roberson } 361155b9987SJeff Roberson 362155b9987SJeff Roberson static __inline void 363ad1e7d28SJulian Elischer tdq_runq_rem(struct tdq *tdq, struct td_sched *ts) 364155b9987SJeff Roberson { 365155b9987SJeff Roberson #ifdef SMP 366ad1e7d28SJulian Elischer if (ts->ts_flags & TSF_XFERABLE) { 367d2ad694cSJeff Roberson tdq->tdq_transferable--; 368d2ad694cSJeff Roberson tdq->tdq_group->tdg_transferable--; 369ad1e7d28SJulian Elischer ts->ts_flags &= ~TSF_XFERABLE; 37080f86c9fSJeff Roberson } 371155b9987SJeff Roberson #endif 3723f872f85SJeff Roberson if (ts->ts_runq == &tdq->tdq_timeshare) { 3733f872f85SJeff Roberson if (tdq->tdq_idx != tdq->tdq_ridx) 3743f872f85SJeff Roberson runq_remove_idx(ts->ts_runq, ts, &tdq->tdq_ridx); 375e7d50326SJeff Roberson else 3763f872f85SJeff Roberson runq_remove_idx(ts->ts_runq, ts, NULL); 3778ab80cf0SJeff Roberson /* 3788ab80cf0SJeff Roberson * For timeshare threads we update the priority here so 3798ab80cf0SJeff Roberson * the priority reflects the time we've been sleeping. 3808ab80cf0SJeff Roberson */ 3818ab80cf0SJeff Roberson ts->ts_ltick = ticks; 3828ab80cf0SJeff Roberson sched_pctcpu_update(ts); 3838ab80cf0SJeff Roberson sched_priority(ts->ts_thread); 3843f872f85SJeff Roberson } else 385ad1e7d28SJulian Elischer runq_remove(ts->ts_runq, ts); 386155b9987SJeff Roberson } 387155b9987SJeff Roberson 388a8949de2SJeff Roberson static void 389ad1e7d28SJulian Elischer tdq_load_add(struct tdq *tdq, struct td_sched *ts) 3905d7ef00cSJeff Roberson { 391ef1134c9SJeff Roberson int class; 392b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 393ad1e7d28SJulian Elischer class = PRI_BASE(ts->ts_thread->td_pri_class); 394ef1134c9SJeff Roberson if (class == PRI_TIMESHARE) 395d2ad694cSJeff Roberson tdq->tdq_load_timeshare++; 396d2ad694cSJeff Roberson tdq->tdq_load++; 397d2ad694cSJeff Roberson CTR1(KTR_SCHED, "load: %d", tdq->tdq_load); 398ad1e7d28SJulian Elischer if (class != PRI_ITHD && (ts->ts_thread->td_proc->p_flag & P_NOLOAD) == 0) 39933916c36SJeff Roberson #ifdef SMP 400d2ad694cSJeff Roberson tdq->tdq_group->tdg_load++; 40133916c36SJeff Roberson #else 402d2ad694cSJeff Roberson tdq->tdq_sysload++; 403cac77d04SJeff Roberson #endif 4045d7ef00cSJeff Roberson } 40515dc847eSJeff Roberson 406a8949de2SJeff Roberson static void 407ad1e7d28SJulian Elischer tdq_load_rem(struct tdq *tdq, struct td_sched *ts) 4085d7ef00cSJeff Roberson { 409ef1134c9SJeff Roberson int class; 410b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 411ad1e7d28SJulian Elischer class = PRI_BASE(ts->ts_thread->td_pri_class); 412ef1134c9SJeff Roberson if (class == PRI_TIMESHARE) 413d2ad694cSJeff Roberson tdq->tdq_load_timeshare--; 414ad1e7d28SJulian Elischer if (class != PRI_ITHD && (ts->ts_thread->td_proc->p_flag & P_NOLOAD) == 0) 41533916c36SJeff Roberson #ifdef SMP 416d2ad694cSJeff Roberson tdq->tdq_group->tdg_load--; 41733916c36SJeff Roberson #else 418d2ad694cSJeff Roberson tdq->tdq_sysload--; 419cac77d04SJeff Roberson #endif 420d2ad694cSJeff Roberson tdq->tdq_load--; 421d2ad694cSJeff Roberson CTR1(KTR_SCHED, "load: %d", tdq->tdq_load); 422ad1e7d28SJulian Elischer ts->ts_runq = NULL; 42315dc847eSJeff Roberson } 42415dc847eSJeff Roberson 4255d7ef00cSJeff Roberson #ifdef SMP 4263f872f85SJeff Roberson static void 4273f872f85SJeff Roberson sched_smp_tick(void) 4283f872f85SJeff Roberson { 4293f872f85SJeff Roberson struct tdq *tdq; 4303f872f85SJeff Roberson 4313f872f85SJeff Roberson tdq = TDQ_SELF(); 432155b6ca1SJeff Roberson if (sched_rebalance) { 4333f872f85SJeff Roberson if (ticks >= bal_tick) 4343f872f85SJeff Roberson sched_balance(); 4353f872f85SJeff Roberson if (ticks >= gbal_tick && balance_groups) 4363f872f85SJeff Roberson sched_balance_groups(); 437155b6ca1SJeff Roberson } 4383f872f85SJeff Roberson /* 4393f872f85SJeff Roberson * We could have been assigned a non real-time thread without an 4403f872f85SJeff Roberson * IPI. 4413f872f85SJeff Roberson */ 4423f872f85SJeff Roberson if (tdq->tdq_assigned) 4433f872f85SJeff Roberson tdq_assign(tdq); /* Potentially sets NEEDRESCHED */ 4443f872f85SJeff Roberson } 4453f872f85SJeff Roberson 446356500a3SJeff Roberson /* 447155b9987SJeff Roberson * sched_balance is a simple CPU load balancing algorithm. It operates by 448356500a3SJeff Roberson * finding the least loaded and most loaded cpu and equalizing their load 449356500a3SJeff Roberson * by migrating some processes. 450356500a3SJeff Roberson * 451356500a3SJeff Roberson * Dealing only with two CPUs at a time has two advantages. Firstly, most 452356500a3SJeff Roberson * installations will only have 2 cpus. Secondly, load balancing too much at 453356500a3SJeff Roberson * once can have an unpleasant effect on the system. The scheduler rarely has 454356500a3SJeff Roberson * enough information to make perfect decisions. So this algorithm chooses 455356500a3SJeff Roberson * algorithm simplicity and more gradual effects on load in larger systems. 456356500a3SJeff Roberson * 457356500a3SJeff Roberson * It could be improved by considering the priorities and slices assigned to 458356500a3SJeff Roberson * each task prior to balancing them. There are many pathological cases with 459356500a3SJeff Roberson * any approach and so the semi random algorithm below may work as well as any. 460356500a3SJeff Roberson * 461356500a3SJeff Roberson */ 46222bf7d9aSJeff Roberson static void 463dc03363dSJeff Roberson sched_balance(void) 464356500a3SJeff Roberson { 465ad1e7d28SJulian Elischer struct tdq_group *high; 466ad1e7d28SJulian Elischer struct tdq_group *low; 467d2ad694cSJeff Roberson struct tdq_group *tdg; 468cac77d04SJeff Roberson int cnt; 469356500a3SJeff Roberson int i; 470356500a3SJeff Roberson 471598b368dSJeff Roberson bal_tick = ticks + (random() % (hz * 2)); 47286f8ae96SJeff Roberson if (smp_started == 0) 473598b368dSJeff Roberson return; 474cac77d04SJeff Roberson low = high = NULL; 475d2ad694cSJeff Roberson i = random() % (tdg_maxid + 1); 476d2ad694cSJeff Roberson for (cnt = 0; cnt <= tdg_maxid; cnt++) { 477d2ad694cSJeff Roberson tdg = TDQ_GROUP(i); 478cac77d04SJeff Roberson /* 479cac77d04SJeff Roberson * Find the CPU with the highest load that has some 480cac77d04SJeff Roberson * threads to transfer. 481cac77d04SJeff Roberson */ 482d2ad694cSJeff Roberson if ((high == NULL || tdg->tdg_load > high->tdg_load) 483d2ad694cSJeff Roberson && tdg->tdg_transferable) 484d2ad694cSJeff Roberson high = tdg; 485d2ad694cSJeff Roberson if (low == NULL || tdg->tdg_load < low->tdg_load) 486d2ad694cSJeff Roberson low = tdg; 487d2ad694cSJeff Roberson if (++i > tdg_maxid) 488cac77d04SJeff Roberson i = 0; 489cac77d04SJeff Roberson } 490cac77d04SJeff Roberson if (low != NULL && high != NULL && high != low) 491d2ad694cSJeff Roberson sched_balance_pair(LIST_FIRST(&high->tdg_members), 492d2ad694cSJeff Roberson LIST_FIRST(&low->tdg_members)); 493cac77d04SJeff Roberson } 49486f8ae96SJeff Roberson 495cac77d04SJeff Roberson static void 496dc03363dSJeff Roberson sched_balance_groups(void) 497cac77d04SJeff Roberson { 498cac77d04SJeff Roberson int i; 499cac77d04SJeff Roberson 500598b368dSJeff Roberson gbal_tick = ticks + (random() % (hz * 2)); 501dc03363dSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 502cac77d04SJeff Roberson if (smp_started) 503d2ad694cSJeff Roberson for (i = 0; i <= tdg_maxid; i++) 504ad1e7d28SJulian Elischer sched_balance_group(TDQ_GROUP(i)); 505356500a3SJeff Roberson } 506cac77d04SJeff Roberson 507cac77d04SJeff Roberson static void 508d2ad694cSJeff Roberson sched_balance_group(struct tdq_group *tdg) 509cac77d04SJeff Roberson { 510ad1e7d28SJulian Elischer struct tdq *tdq; 511ad1e7d28SJulian Elischer struct tdq *high; 512ad1e7d28SJulian Elischer struct tdq *low; 513cac77d04SJeff Roberson int load; 514cac77d04SJeff Roberson 515d2ad694cSJeff Roberson if (tdg->tdg_transferable == 0) 516cac77d04SJeff Roberson return; 517cac77d04SJeff Roberson low = NULL; 518cac77d04SJeff Roberson high = NULL; 519d2ad694cSJeff Roberson LIST_FOREACH(tdq, &tdg->tdg_members, tdq_siblings) { 520d2ad694cSJeff Roberson load = tdq->tdq_load; 521d2ad694cSJeff Roberson if (high == NULL || load > high->tdq_load) 522ad1e7d28SJulian Elischer high = tdq; 523d2ad694cSJeff Roberson if (low == NULL || load < low->tdq_load) 524ad1e7d28SJulian Elischer low = tdq; 525356500a3SJeff Roberson } 526cac77d04SJeff Roberson if (high != NULL && low != NULL && high != low) 527cac77d04SJeff Roberson sched_balance_pair(high, low); 528356500a3SJeff Roberson } 529cac77d04SJeff Roberson 530cac77d04SJeff Roberson static void 531ad1e7d28SJulian Elischer sched_balance_pair(struct tdq *high, struct tdq *low) 532cac77d04SJeff Roberson { 533cac77d04SJeff Roberson int transferable; 534cac77d04SJeff Roberson int high_load; 535cac77d04SJeff Roberson int low_load; 536cac77d04SJeff Roberson int move; 537cac77d04SJeff Roberson int diff; 538cac77d04SJeff Roberson int i; 539cac77d04SJeff Roberson 54080f86c9fSJeff Roberson /* 54180f86c9fSJeff Roberson * If we're transfering within a group we have to use this specific 542ad1e7d28SJulian Elischer * tdq's transferable count, otherwise we can steal from other members 54380f86c9fSJeff Roberson * of the group. 54480f86c9fSJeff Roberson */ 545d2ad694cSJeff Roberson if (high->tdq_group == low->tdq_group) { 546d2ad694cSJeff Roberson transferable = high->tdq_transferable; 547d2ad694cSJeff Roberson high_load = high->tdq_load; 548d2ad694cSJeff Roberson low_load = low->tdq_load; 549cac77d04SJeff Roberson } else { 550d2ad694cSJeff Roberson transferable = high->tdq_group->tdg_transferable; 551d2ad694cSJeff Roberson high_load = high->tdq_group->tdg_load; 552d2ad694cSJeff Roberson low_load = low->tdq_group->tdg_load; 553cac77d04SJeff Roberson } 55480f86c9fSJeff Roberson if (transferable == 0) 555cac77d04SJeff Roberson return; 556155b9987SJeff Roberson /* 557155b9987SJeff Roberson * Determine what the imbalance is and then adjust that to how many 558d2ad694cSJeff Roberson * threads we actually have to give up (transferable). 559155b9987SJeff Roberson */ 560cac77d04SJeff Roberson diff = high_load - low_load; 561356500a3SJeff Roberson move = diff / 2; 562356500a3SJeff Roberson if (diff & 0x1) 563356500a3SJeff Roberson move++; 56480f86c9fSJeff Roberson move = min(move, transferable); 565356500a3SJeff Roberson for (i = 0; i < move; i++) 566ad1e7d28SJulian Elischer tdq_move(high, TDQ_ID(low)); 567356500a3SJeff Roberson return; 568356500a3SJeff Roberson } 569356500a3SJeff Roberson 57022bf7d9aSJeff Roberson static void 571ad1e7d28SJulian Elischer tdq_move(struct tdq *from, int cpu) 572356500a3SJeff Roberson { 573ad1e7d28SJulian Elischer struct tdq *tdq; 574ad1e7d28SJulian Elischer struct tdq *to; 575ad1e7d28SJulian Elischer struct td_sched *ts; 576356500a3SJeff Roberson 577ad1e7d28SJulian Elischer tdq = from; 578ad1e7d28SJulian Elischer to = TDQ_CPU(cpu); 579ad1e7d28SJulian Elischer ts = tdq_steal(tdq, 1); 580ad1e7d28SJulian Elischer if (ts == NULL) { 581d2ad694cSJeff Roberson struct tdq_group *tdg; 58280f86c9fSJeff Roberson 583d2ad694cSJeff Roberson tdg = tdq->tdq_group; 584d2ad694cSJeff Roberson LIST_FOREACH(tdq, &tdg->tdg_members, tdq_siblings) { 585d2ad694cSJeff Roberson if (tdq == from || tdq->tdq_transferable == 0) 58680f86c9fSJeff Roberson continue; 587ad1e7d28SJulian Elischer ts = tdq_steal(tdq, 1); 58880f86c9fSJeff Roberson break; 58980f86c9fSJeff Roberson } 590ad1e7d28SJulian Elischer if (ts == NULL) 591ad1e7d28SJulian Elischer panic("tdq_move: No threads available with a " 59280f86c9fSJeff Roberson "transferable count of %d\n", 593d2ad694cSJeff Roberson tdg->tdg_transferable); 59480f86c9fSJeff Roberson } 595ad1e7d28SJulian Elischer if (tdq == to) 59680f86c9fSJeff Roberson return; 597ad1e7d28SJulian Elischer ts->ts_state = TSS_THREAD; 598ad1e7d28SJulian Elischer tdq_runq_rem(tdq, ts); 599ad1e7d28SJulian Elischer tdq_load_rem(tdq, ts); 600ad1e7d28SJulian Elischer tdq_notify(ts, cpu); 601356500a3SJeff Roberson } 60222bf7d9aSJeff Roberson 60380f86c9fSJeff Roberson static int 604ad1e7d28SJulian Elischer tdq_idled(struct tdq *tdq) 60522bf7d9aSJeff Roberson { 606d2ad694cSJeff Roberson struct tdq_group *tdg; 607ad1e7d28SJulian Elischer struct tdq *steal; 608ad1e7d28SJulian Elischer struct td_sched *ts; 60980f86c9fSJeff Roberson 610d2ad694cSJeff Roberson tdg = tdq->tdq_group; 61180f86c9fSJeff Roberson /* 612d2ad694cSJeff Roberson * If we're in a cpu group, try and steal threads from another cpu in 61380f86c9fSJeff Roberson * the group before idling. 61480f86c9fSJeff Roberson */ 615d2ad694cSJeff Roberson if (tdg->tdg_cpus > 1 && tdg->tdg_transferable) { 616d2ad694cSJeff Roberson LIST_FOREACH(steal, &tdg->tdg_members, tdq_siblings) { 617d2ad694cSJeff Roberson if (steal == tdq || steal->tdq_transferable == 0) 61880f86c9fSJeff Roberson continue; 619ad1e7d28SJulian Elischer ts = tdq_steal(steal, 0); 620ad1e7d28SJulian Elischer if (ts == NULL) 62180f86c9fSJeff Roberson continue; 622ad1e7d28SJulian Elischer ts->ts_state = TSS_THREAD; 623ad1e7d28SJulian Elischer tdq_runq_rem(steal, ts); 624ad1e7d28SJulian Elischer tdq_load_rem(steal, ts); 625ad1e7d28SJulian Elischer ts->ts_cpu = PCPU_GET(cpuid); 626ad1e7d28SJulian Elischer ts->ts_flags |= TSF_INTERNAL | TSF_HOLD; 627ad1e7d28SJulian Elischer sched_add(ts->ts_thread, SRQ_YIELDING); 62880f86c9fSJeff Roberson return (0); 62980f86c9fSJeff Roberson } 63080f86c9fSJeff Roberson } 63180f86c9fSJeff Roberson /* 63280f86c9fSJeff Roberson * We only set the idled bit when all of the cpus in the group are 633ad1e7d28SJulian Elischer * idle. Otherwise we could get into a situation where a thread bounces 63480f86c9fSJeff Roberson * back and forth between two idle cores on seperate physical CPUs. 63580f86c9fSJeff Roberson */ 636d2ad694cSJeff Roberson tdg->tdg_idlemask |= PCPU_GET(cpumask); 637d2ad694cSJeff Roberson if (tdg->tdg_idlemask != tdg->tdg_cpumask) 63880f86c9fSJeff Roberson return (1); 639d2ad694cSJeff Roberson atomic_set_int(&tdq_idle, tdg->tdg_mask); 64080f86c9fSJeff Roberson return (1); 64122bf7d9aSJeff Roberson } 64222bf7d9aSJeff Roberson 64322bf7d9aSJeff Roberson static void 644ad1e7d28SJulian Elischer tdq_assign(struct tdq *tdq) 64522bf7d9aSJeff Roberson { 646ad1e7d28SJulian Elischer struct td_sched *nts; 647ad1e7d28SJulian Elischer struct td_sched *ts; 64822bf7d9aSJeff Roberson 64922bf7d9aSJeff Roberson do { 650d2ad694cSJeff Roberson *(volatile struct td_sched **)&ts = tdq->tdq_assigned; 651d2ad694cSJeff Roberson } while(!atomic_cmpset_ptr((volatile uintptr_t *)&tdq->tdq_assigned, 652ad1e7d28SJulian Elischer (uintptr_t)ts, (uintptr_t)NULL)); 653ad1e7d28SJulian Elischer for (; ts != NULL; ts = nts) { 654ad1e7d28SJulian Elischer nts = ts->ts_assign; 655d2ad694cSJeff Roberson tdq->tdq_group->tdg_load--; 656d2ad694cSJeff Roberson tdq->tdq_load--; 657ad1e7d28SJulian Elischer ts->ts_flags &= ~TSF_ASSIGNED; 658ad1e7d28SJulian Elischer if (ts->ts_flags & TSF_REMOVED) { 659ad1e7d28SJulian Elischer ts->ts_flags &= ~TSF_REMOVED; 6603d16f519SDavid Xu continue; 6613d16f519SDavid Xu } 662ad1e7d28SJulian Elischer ts->ts_flags |= TSF_INTERNAL | TSF_HOLD; 663ad1e7d28SJulian Elischer sched_add(ts->ts_thread, SRQ_YIELDING); 66422bf7d9aSJeff Roberson } 66522bf7d9aSJeff Roberson } 66622bf7d9aSJeff Roberson 66722bf7d9aSJeff Roberson static void 668ad1e7d28SJulian Elischer tdq_notify(struct td_sched *ts, int cpu) 66922bf7d9aSJeff Roberson { 670ad1e7d28SJulian Elischer struct tdq *tdq; 67122bf7d9aSJeff Roberson struct thread *td; 67222bf7d9aSJeff Roberson struct pcpu *pcpu; 673598b368dSJeff Roberson int class; 6742454aaf5SJeff Roberson int prio; 67522bf7d9aSJeff Roberson 676ad1e7d28SJulian Elischer tdq = TDQ_CPU(cpu); 677ad1e7d28SJulian Elischer class = PRI_BASE(ts->ts_thread->td_pri_class); 678e7d50326SJeff Roberson if ((class != PRI_IDLE && class != PRI_ITHD) 679e7d50326SJeff Roberson && (tdq_idle & tdq->tdq_group->tdg_mask)) 680d2ad694cSJeff Roberson atomic_clear_int(&tdq_idle, tdq->tdq_group->tdg_mask); 681d2ad694cSJeff Roberson tdq->tdq_group->tdg_load++; 682d2ad694cSJeff Roberson tdq->tdq_load++; 683ad1e7d28SJulian Elischer ts->ts_cpu = cpu; 684ad1e7d28SJulian Elischer ts->ts_flags |= TSF_ASSIGNED; 685ad1e7d28SJulian Elischer prio = ts->ts_thread->td_priority; 68622bf7d9aSJeff Roberson 6870c0a98b2SJeff Roberson /* 688ad1e7d28SJulian Elischer * Place a thread on another cpu's queue and force a resched. 68922bf7d9aSJeff Roberson */ 69022bf7d9aSJeff Roberson do { 691d2ad694cSJeff Roberson *(volatile struct td_sched **)&ts->ts_assign = tdq->tdq_assigned; 692d2ad694cSJeff Roberson } while(!atomic_cmpset_ptr((volatile uintptr_t *)&tdq->tdq_assigned, 693ad1e7d28SJulian Elischer (uintptr_t)ts->ts_assign, (uintptr_t)ts)); 694155b6ca1SJeff Roberson /* Only ipi for realtime/ithd priorities */ 695155b6ca1SJeff Roberson if (ts->ts_thread->td_priority >= PRI_MIN_TIMESHARE) 696155b6ca1SJeff Roberson return; 6972454aaf5SJeff Roberson /* 6982454aaf5SJeff Roberson * Without sched_lock we could lose a race where we set NEEDRESCHED 6992454aaf5SJeff Roberson * on a thread that is switched out before the IPI is delivered. This 7002454aaf5SJeff Roberson * would lead us to miss the resched. This will be a problem once 7012454aaf5SJeff Roberson * sched_lock is pushed down. 7022454aaf5SJeff Roberson */ 70322bf7d9aSJeff Roberson pcpu = pcpu_find(cpu); 70422bf7d9aSJeff Roberson td = pcpu->pc_curthread; 705155b6ca1SJeff Roberson if (ts->ts_thread->td_priority < td->td_priority) { 70622bf7d9aSJeff Roberson td->td_flags |= TDF_NEEDRESCHED; 70722bf7d9aSJeff Roberson ipi_selected(1 << cpu, IPI_AST); 70822bf7d9aSJeff Roberson } 70922bf7d9aSJeff Roberson } 71022bf7d9aSJeff Roberson 711ad1e7d28SJulian Elischer static struct td_sched * 71222bf7d9aSJeff Roberson runq_steal(struct runq *rq) 71322bf7d9aSJeff Roberson { 71422bf7d9aSJeff Roberson struct rqhead *rqh; 71522bf7d9aSJeff Roberson struct rqbits *rqb; 716ad1e7d28SJulian Elischer struct td_sched *ts; 71722bf7d9aSJeff Roberson int word; 71822bf7d9aSJeff Roberson int bit; 71922bf7d9aSJeff Roberson 72022bf7d9aSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 72122bf7d9aSJeff Roberson rqb = &rq->rq_status; 72222bf7d9aSJeff Roberson for (word = 0; word < RQB_LEN; word++) { 72322bf7d9aSJeff Roberson if (rqb->rqb_bits[word] == 0) 72422bf7d9aSJeff Roberson continue; 72522bf7d9aSJeff Roberson for (bit = 0; bit < RQB_BPW; bit++) { 726a2640c9bSPeter Wemm if ((rqb->rqb_bits[word] & (1ul << bit)) == 0) 72722bf7d9aSJeff Roberson continue; 72822bf7d9aSJeff Roberson rqh = &rq->rq_queues[bit + (word << RQB_L2BPW)]; 729ad1e7d28SJulian Elischer TAILQ_FOREACH(ts, rqh, ts_procq) { 730e7d50326SJeff Roberson if (THREAD_CAN_MIGRATE(ts->ts_thread)) 731ad1e7d28SJulian Elischer return (ts); 73222bf7d9aSJeff Roberson } 73322bf7d9aSJeff Roberson } 73422bf7d9aSJeff Roberson } 73522bf7d9aSJeff Roberson return (NULL); 73622bf7d9aSJeff Roberson } 73722bf7d9aSJeff Roberson 738ad1e7d28SJulian Elischer static struct td_sched * 739ad1e7d28SJulian Elischer tdq_steal(struct tdq *tdq, int stealidle) 74022bf7d9aSJeff Roberson { 741ad1e7d28SJulian Elischer struct td_sched *ts; 74222bf7d9aSJeff Roberson 74380f86c9fSJeff Roberson /* 74480f86c9fSJeff Roberson * Steal from next first to try to get a non-interactive task that 74580f86c9fSJeff Roberson * may not have run for a while. 746e7d50326SJeff Roberson * XXX Need to effect steal order for timeshare threads. 74780f86c9fSJeff Roberson */ 748e7d50326SJeff Roberson if ((ts = runq_steal(&tdq->tdq_realtime)) != NULL) 749ad1e7d28SJulian Elischer return (ts); 750e7d50326SJeff Roberson if ((ts = runq_steal(&tdq->tdq_timeshare)) != NULL) 751ad1e7d28SJulian Elischer return (ts); 75280f86c9fSJeff Roberson if (stealidle) 753d2ad694cSJeff Roberson return (runq_steal(&tdq->tdq_idle)); 75480f86c9fSJeff Roberson return (NULL); 75522bf7d9aSJeff Roberson } 75680f86c9fSJeff Roberson 75780f86c9fSJeff Roberson int 758ad1e7d28SJulian Elischer tdq_transfer(struct tdq *tdq, struct td_sched *ts, int class) 75980f86c9fSJeff Roberson { 760d2ad694cSJeff Roberson struct tdq_group *ntdg; 761d2ad694cSJeff Roberson struct tdq_group *tdg; 762ad1e7d28SJulian Elischer struct tdq *old; 76380f86c9fSJeff Roberson int cpu; 764598b368dSJeff Roberson int idx; 76580f86c9fSJeff Roberson 766670c524fSJeff Roberson if (smp_started == 0) 767670c524fSJeff Roberson return (0); 76880f86c9fSJeff Roberson cpu = 0; 76980f86c9fSJeff Roberson /* 7702454aaf5SJeff Roberson * If our load exceeds a certain threshold we should attempt to 7712454aaf5SJeff Roberson * reassign this thread. The first candidate is the cpu that 7722454aaf5SJeff Roberson * originally ran the thread. If it is idle, assign it there, 7732454aaf5SJeff Roberson * otherwise, pick an idle cpu. 7742454aaf5SJeff Roberson * 775d2ad694cSJeff Roberson * The threshold at which we start to reassign has a large impact 776670c524fSJeff Roberson * on the overall performance of the system. Tuned too high and 777670c524fSJeff Roberson * some CPUs may idle. Too low and there will be excess migration 778d50c87deSOlivier Houchard * and context switches. 779670c524fSJeff Roberson */ 780ad1e7d28SJulian Elischer old = TDQ_CPU(ts->ts_cpu); 781d2ad694cSJeff Roberson ntdg = old->tdq_group; 782d2ad694cSJeff Roberson tdg = tdq->tdq_group; 783ad1e7d28SJulian Elischer if (tdq_idle) { 784d2ad694cSJeff Roberson if (tdq_idle & ntdg->tdg_mask) { 785d2ad694cSJeff Roberson cpu = ffs(ntdg->tdg_idlemask); 786598b368dSJeff Roberson if (cpu) { 787598b368dSJeff Roberson CTR2(KTR_SCHED, 788ad1e7d28SJulian Elischer "tdq_transfer: %p found old cpu %X " 789ad1e7d28SJulian Elischer "in idlemask.", ts, cpu); 7902454aaf5SJeff Roberson goto migrate; 7912454aaf5SJeff Roberson } 792598b368dSJeff Roberson } 79380f86c9fSJeff Roberson /* 79480f86c9fSJeff Roberson * Multiple cpus could find this bit simultaneously 79580f86c9fSJeff Roberson * but the race shouldn't be terrible. 79680f86c9fSJeff Roberson */ 797ad1e7d28SJulian Elischer cpu = ffs(tdq_idle); 798598b368dSJeff Roberson if (cpu) { 799ad1e7d28SJulian Elischer CTR2(KTR_SCHED, "tdq_transfer: %p found %X " 800ad1e7d28SJulian Elischer "in idlemask.", ts, cpu); 8012454aaf5SJeff Roberson goto migrate; 80280f86c9fSJeff Roberson } 803598b368dSJeff Roberson } 804598b368dSJeff Roberson idx = 0; 805598b368dSJeff Roberson #if 0 806d2ad694cSJeff Roberson if (old->tdq_load < tdq->tdq_load) { 807ad1e7d28SJulian Elischer cpu = ts->ts_cpu + 1; 808ad1e7d28SJulian Elischer CTR2(KTR_SCHED, "tdq_transfer: %p old cpu %X " 809ad1e7d28SJulian Elischer "load less than ours.", ts, cpu); 810598b368dSJeff Roberson goto migrate; 811598b368dSJeff Roberson } 812598b368dSJeff Roberson /* 813598b368dSJeff Roberson * No new CPU was found, look for one with less load. 814598b368dSJeff Roberson */ 815d2ad694cSJeff Roberson for (idx = 0; idx <= tdg_maxid; idx++) { 816d2ad694cSJeff Roberson ntdg = TDQ_GROUP(idx); 817d2ad694cSJeff Roberson if (ntdg->tdg_load /*+ (ntdg->tdg_cpus * 2)*/ < tdg->tdg_load) { 818d2ad694cSJeff Roberson cpu = ffs(ntdg->tdg_cpumask); 819ad1e7d28SJulian Elischer CTR2(KTR_SCHED, "tdq_transfer: %p cpu %X load less " 820ad1e7d28SJulian Elischer "than ours.", ts, cpu); 821598b368dSJeff Roberson goto migrate; 822598b368dSJeff Roberson } 823598b368dSJeff Roberson } 824598b368dSJeff Roberson #endif 82580f86c9fSJeff Roberson /* 82680f86c9fSJeff Roberson * If another cpu in this group has idled, assign a thread over 82780f86c9fSJeff Roberson * to them after checking to see if there are idled groups. 82880f86c9fSJeff Roberson */ 829d2ad694cSJeff Roberson if (tdg->tdg_idlemask) { 830d2ad694cSJeff Roberson cpu = ffs(tdg->tdg_idlemask); 831598b368dSJeff Roberson if (cpu) { 832ad1e7d28SJulian Elischer CTR2(KTR_SCHED, "tdq_transfer: %p cpu %X idle in " 833ad1e7d28SJulian Elischer "group.", ts, cpu); 8342454aaf5SJeff Roberson goto migrate; 83580f86c9fSJeff Roberson } 836598b368dSJeff Roberson } 8372454aaf5SJeff Roberson return (0); 8382454aaf5SJeff Roberson migrate: 8392454aaf5SJeff Roberson /* 84080f86c9fSJeff Roberson * Now that we've found an idle CPU, migrate the thread. 84180f86c9fSJeff Roberson */ 84280f86c9fSJeff Roberson cpu--; 843ad1e7d28SJulian Elischer ts->ts_runq = NULL; 844ad1e7d28SJulian Elischer tdq_notify(ts, cpu); 8452454aaf5SJeff Roberson 84680f86c9fSJeff Roberson return (1); 84780f86c9fSJeff Roberson } 84880f86c9fSJeff Roberson 84922bf7d9aSJeff Roberson #endif /* SMP */ 85022bf7d9aSJeff Roberson 85122bf7d9aSJeff Roberson /* 85222bf7d9aSJeff Roberson * Pick the highest priority task we have and return it. 8530c0a98b2SJeff Roberson */ 8540c0a98b2SJeff Roberson 855ad1e7d28SJulian Elischer static struct td_sched * 856ad1e7d28SJulian Elischer tdq_choose(struct tdq *tdq) 8575d7ef00cSJeff Roberson { 858ad1e7d28SJulian Elischer struct td_sched *ts; 8595d7ef00cSJeff Roberson 860b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 861a8949de2SJeff Roberson 862e7d50326SJeff Roberson ts = runq_choose(&tdq->tdq_realtime); 863e7d50326SJeff Roberson if (ts != NULL) { 864e7d50326SJeff Roberson KASSERT(ts->ts_thread->td_priority <= PRI_MAX_REALTIME, 865e7d50326SJeff Roberson ("tdq_choose: Invalid priority on realtime queue %d", 866e7d50326SJeff Roberson ts->ts_thread->td_priority)); 867e7d50326SJeff Roberson return (ts); 868a8949de2SJeff Roberson } 8693f872f85SJeff Roberson ts = runq_choose_from(&tdq->tdq_timeshare, tdq->tdq_ridx); 870e7d50326SJeff Roberson if (ts != NULL) { 871e7d50326SJeff Roberson KASSERT(ts->ts_thread->td_priority <= PRI_MAX_TIMESHARE && 872e7d50326SJeff Roberson ts->ts_thread->td_priority >= PRI_MIN_TIMESHARE, 873e7d50326SJeff Roberson ("tdq_choose: Invalid priority on timeshare queue %d", 874e7d50326SJeff Roberson ts->ts_thread->td_priority)); 875ad1e7d28SJulian Elischer return (ts); 87615dc847eSJeff Roberson } 87715dc847eSJeff Roberson 878e7d50326SJeff Roberson ts = runq_choose(&tdq->tdq_idle); 879e7d50326SJeff Roberson if (ts != NULL) { 880e7d50326SJeff Roberson KASSERT(ts->ts_thread->td_priority >= PRI_MIN_IDLE, 881e7d50326SJeff Roberson ("tdq_choose: Invalid priority on idle queue %d", 882e7d50326SJeff Roberson ts->ts_thread->td_priority)); 883e7d50326SJeff Roberson return (ts); 884e7d50326SJeff Roberson } 885e7d50326SJeff Roberson 886e7d50326SJeff Roberson return (NULL); 887245f3abfSJeff Roberson } 8880a016a05SJeff Roberson 8890a016a05SJeff Roberson static void 890ad1e7d28SJulian Elischer tdq_setup(struct tdq *tdq) 8910a016a05SJeff Roberson { 892e7d50326SJeff Roberson runq_init(&tdq->tdq_realtime); 893e7d50326SJeff Roberson runq_init(&tdq->tdq_timeshare); 894d2ad694cSJeff Roberson runq_init(&tdq->tdq_idle); 895d2ad694cSJeff Roberson tdq->tdq_load = 0; 896d2ad694cSJeff Roberson tdq->tdq_load_timeshare = 0; 8970a016a05SJeff Roberson } 8980a016a05SJeff Roberson 89935e6168fSJeff Roberson static void 90035e6168fSJeff Roberson sched_setup(void *dummy) 90135e6168fSJeff Roberson { 9020ec896fdSJeff Roberson #ifdef SMP 90335e6168fSJeff Roberson int i; 9040ec896fdSJeff Roberson #endif 90535e6168fSJeff Roberson 906a1d4fe69SDavid Xu /* 907a1d4fe69SDavid Xu * To avoid divide-by-zero, we set realstathz a dummy value 908a1d4fe69SDavid Xu * in case which sched_clock() called before sched_initticks(). 909a1d4fe69SDavid Xu */ 910a1d4fe69SDavid Xu realstathz = hz; 911e7d50326SJeff Roberson sched_slice = (realstathz/7); /* 140ms */ 912e7d50326SJeff Roberson tickincr = 1 << SCHED_TICK_SHIFT; 913e1f89c22SJeff Roberson 914356500a3SJeff Roberson #ifdef SMP 915cac77d04SJeff Roberson balance_groups = 0; 91680f86c9fSJeff Roberson /* 917ad1e7d28SJulian Elischer * Initialize the tdqs. 91880f86c9fSJeff Roberson */ 919749d01b0SJeff Roberson for (i = 0; i < MAXCPU; i++) { 920c02bbb43SJeff Roberson struct tdq *tdq; 92180f86c9fSJeff Roberson 922c02bbb43SJeff Roberson tdq = &tdq_cpu[i]; 923c02bbb43SJeff Roberson tdq->tdq_assigned = NULL; 924ad1e7d28SJulian Elischer tdq_setup(&tdq_cpu[i]); 92580f86c9fSJeff Roberson } 92680f86c9fSJeff Roberson if (smp_topology == NULL) { 927d2ad694cSJeff Roberson struct tdq_group *tdg; 928c02bbb43SJeff Roberson struct tdq *tdq; 929598b368dSJeff Roberson int cpus; 93080f86c9fSJeff Roberson 931598b368dSJeff Roberson for (cpus = 0, i = 0; i < MAXCPU; i++) { 932598b368dSJeff Roberson if (CPU_ABSENT(i)) 933598b368dSJeff Roberson continue; 934c02bbb43SJeff Roberson tdq = &tdq_cpu[i]; 935d2ad694cSJeff Roberson tdg = &tdq_groups[cpus]; 93680f86c9fSJeff Roberson /* 937ad1e7d28SJulian Elischer * Setup a tdq group with one member. 93880f86c9fSJeff Roberson */ 939c02bbb43SJeff Roberson tdq->tdq_transferable = 0; 940c02bbb43SJeff Roberson tdq->tdq_group = tdg; 941d2ad694cSJeff Roberson tdg->tdg_cpus = 1; 942d2ad694cSJeff Roberson tdg->tdg_idlemask = 0; 943d2ad694cSJeff Roberson tdg->tdg_cpumask = tdg->tdg_mask = 1 << i; 944d2ad694cSJeff Roberson tdg->tdg_load = 0; 945d2ad694cSJeff Roberson tdg->tdg_transferable = 0; 946d2ad694cSJeff Roberson LIST_INIT(&tdg->tdg_members); 947c02bbb43SJeff Roberson LIST_INSERT_HEAD(&tdg->tdg_members, tdq, tdq_siblings); 948598b368dSJeff Roberson cpus++; 949749d01b0SJeff Roberson } 950d2ad694cSJeff Roberson tdg_maxid = cpus - 1; 951749d01b0SJeff Roberson } else { 952d2ad694cSJeff Roberson struct tdq_group *tdg; 95380f86c9fSJeff Roberson struct cpu_group *cg; 954749d01b0SJeff Roberson int j; 955749d01b0SJeff Roberson 956749d01b0SJeff Roberson for (i = 0; i < smp_topology->ct_count; i++) { 957749d01b0SJeff Roberson cg = &smp_topology->ct_group[i]; 958d2ad694cSJeff Roberson tdg = &tdq_groups[i]; 95980f86c9fSJeff Roberson /* 96080f86c9fSJeff Roberson * Initialize the group. 96180f86c9fSJeff Roberson */ 962d2ad694cSJeff Roberson tdg->tdg_idlemask = 0; 963d2ad694cSJeff Roberson tdg->tdg_load = 0; 964d2ad694cSJeff Roberson tdg->tdg_transferable = 0; 965d2ad694cSJeff Roberson tdg->tdg_cpus = cg->cg_count; 966d2ad694cSJeff Roberson tdg->tdg_cpumask = cg->cg_mask; 967d2ad694cSJeff Roberson LIST_INIT(&tdg->tdg_members); 96880f86c9fSJeff Roberson /* 96980f86c9fSJeff Roberson * Find all of the group members and add them. 97080f86c9fSJeff Roberson */ 97180f86c9fSJeff Roberson for (j = 0; j < MAXCPU; j++) { 97280f86c9fSJeff Roberson if ((cg->cg_mask & (1 << j)) != 0) { 973d2ad694cSJeff Roberson if (tdg->tdg_mask == 0) 974d2ad694cSJeff Roberson tdg->tdg_mask = 1 << j; 975d2ad694cSJeff Roberson tdq_cpu[j].tdq_transferable = 0; 976d2ad694cSJeff Roberson tdq_cpu[j].tdq_group = tdg; 977d2ad694cSJeff Roberson LIST_INSERT_HEAD(&tdg->tdg_members, 978d2ad694cSJeff Roberson &tdq_cpu[j], tdq_siblings); 97980f86c9fSJeff Roberson } 98080f86c9fSJeff Roberson } 981d2ad694cSJeff Roberson if (tdg->tdg_cpus > 1) 982cac77d04SJeff Roberson balance_groups = 1; 983749d01b0SJeff Roberson } 984d2ad694cSJeff Roberson tdg_maxid = smp_topology->ct_count - 1; 985749d01b0SJeff Roberson } 986cac77d04SJeff Roberson /* 987cac77d04SJeff Roberson * Stagger the group and global load balancer so they do not 988cac77d04SJeff Roberson * interfere with each other. 989cac77d04SJeff Roberson */ 990dc03363dSJeff Roberson bal_tick = ticks + hz; 991cac77d04SJeff Roberson if (balance_groups) 992dc03363dSJeff Roberson gbal_tick = ticks + (hz / 2); 993749d01b0SJeff Roberson #else 994ad1e7d28SJulian Elischer tdq_setup(TDQ_SELF()); 995356500a3SJeff Roberson #endif 996749d01b0SJeff Roberson mtx_lock_spin(&sched_lock); 997ad1e7d28SJulian Elischer tdq_load_add(TDQ_SELF(), &td_sched0); 998749d01b0SJeff Roberson mtx_unlock_spin(&sched_lock); 99935e6168fSJeff Roberson } 100035e6168fSJeff Roberson 1001a1d4fe69SDavid Xu /* ARGSUSED */ 1002a1d4fe69SDavid Xu static void 1003a1d4fe69SDavid Xu sched_initticks(void *dummy) 1004a1d4fe69SDavid Xu { 1005a1d4fe69SDavid Xu mtx_lock_spin(&sched_lock); 1006a1d4fe69SDavid Xu realstathz = stathz ? stathz : hz; 1007e7d50326SJeff Roberson sched_slice = (realstathz/7); /* ~140ms */ 1008a1d4fe69SDavid Xu 1009a1d4fe69SDavid Xu /* 1010e7d50326SJeff Roberson * tickincr is shifted out by 10 to avoid rounding errors due to 10113f872f85SJeff Roberson * hz not being evenly divisible by stathz on all platforms. 1012e7d50326SJeff Roberson */ 1013e7d50326SJeff Roberson tickincr = (hz << SCHED_TICK_SHIFT) / realstathz; 1014e7d50326SJeff Roberson /* 1015e7d50326SJeff Roberson * This does not work for values of stathz that are more than 1016e7d50326SJeff Roberson * 1 << SCHED_TICK_SHIFT * hz. In practice this does not happen. 1017a1d4fe69SDavid Xu */ 1018a1d4fe69SDavid Xu if (tickincr == 0) 1019a1d4fe69SDavid Xu tickincr = 1; 1020a1d4fe69SDavid Xu mtx_unlock_spin(&sched_lock); 1021a1d4fe69SDavid Xu } 1022a1d4fe69SDavid Xu 1023a1d4fe69SDavid Xu 102435e6168fSJeff Roberson /* 102535e6168fSJeff Roberson * Scale the scheduling priority according to the "interactivity" of this 102635e6168fSJeff Roberson * process. 102735e6168fSJeff Roberson */ 102815dc847eSJeff Roberson static void 10298460a577SJohn Birrell sched_priority(struct thread *td) 103035e6168fSJeff Roberson { 1031e7d50326SJeff Roberson int score; 103235e6168fSJeff Roberson int pri; 103335e6168fSJeff Roberson 10348460a577SJohn Birrell if (td->td_pri_class != PRI_TIMESHARE) 103515dc847eSJeff Roberson return; 1036e7d50326SJeff Roberson /* 1037e7d50326SJeff Roberson * If the score is interactive we place the thread in the realtime 1038e7d50326SJeff Roberson * queue with a priority that is less than kernel and interrupt 1039e7d50326SJeff Roberson * priorities. These threads are not subject to nice restrictions. 1040e7d50326SJeff Roberson * 1041e7d50326SJeff Roberson * Scores greater than this are placed on the normal realtime queue 1042e7d50326SJeff Roberson * where the priority is partially decided by the most recent cpu 1043e7d50326SJeff Roberson * utilization and the rest is decided by nice value. 1044e7d50326SJeff Roberson */ 1045e7d50326SJeff Roberson score = sched_interact_score(td); 1046e7d50326SJeff Roberson if (score < sched_interact) { 1047e7d50326SJeff Roberson pri = PRI_MIN_REALTIME; 1048e7d50326SJeff Roberson pri += ((PRI_MAX_REALTIME - PRI_MIN_REALTIME) / sched_interact) 1049e7d50326SJeff Roberson * score; 1050e7d50326SJeff Roberson KASSERT(pri >= PRI_MIN_REALTIME && pri <= PRI_MAX_REALTIME, 1051e7d50326SJeff Roberson ("sched_priority: invalid interactive priority %d", pri)); 1052e7d50326SJeff Roberson } else { 1053e7d50326SJeff Roberson pri = SCHED_PRI_MIN; 1054e7d50326SJeff Roberson if (td->td_sched->ts_ticks) 1055e7d50326SJeff Roberson pri += SCHED_PRI_TICKS(td->td_sched); 1056e7d50326SJeff Roberson pri += SCHED_PRI_NICE(td->td_proc->p_nice); 10578ab80cf0SJeff Roberson if (!(pri >= PRI_MIN_TIMESHARE && pri <= PRI_MAX_TIMESHARE)) { 10588ab80cf0SJeff Roberson static int once = 1; 10598ab80cf0SJeff Roberson if (once) { 10608ab80cf0SJeff Roberson printf("sched_priority: invalid priority %d", 10618ab80cf0SJeff Roberson pri); 10628ab80cf0SJeff Roberson printf("nice %d, ticks %d ftick %d ltick %d tick pri %d\n", 10638ab80cf0SJeff Roberson td->td_proc->p_nice, 10648ab80cf0SJeff Roberson td->td_sched->ts_ticks, 10658ab80cf0SJeff Roberson td->td_sched->ts_ftick, 10668ab80cf0SJeff Roberson td->td_sched->ts_ltick, 10678ab80cf0SJeff Roberson SCHED_PRI_TICKS(td->td_sched)); 10688ab80cf0SJeff Roberson once = 0; 10698ab80cf0SJeff Roberson } 10708ab80cf0SJeff Roberson pri = min(max(pri, PRI_MIN_TIMESHARE), 10718ab80cf0SJeff Roberson PRI_MAX_TIMESHARE); 10728ab80cf0SJeff Roberson } 1073e7d50326SJeff Roberson } 10748460a577SJohn Birrell sched_user_prio(td, pri); 107535e6168fSJeff Roberson 107615dc847eSJeff Roberson return; 107735e6168fSJeff Roberson } 107835e6168fSJeff Roberson 107935e6168fSJeff Roberson /* 1080d322132cSJeff Roberson * This routine enforces a maximum limit on the amount of scheduling history 1081d322132cSJeff Roberson * kept. It is called after either the slptime or runtime is adjusted. 1082d322132cSJeff Roberson */ 10834b60e324SJeff Roberson static void 10848460a577SJohn Birrell sched_interact_update(struct thread *td) 10854b60e324SJeff Roberson { 1086155b6ca1SJeff Roberson struct td_sched *ts; 1087d322132cSJeff Roberson int sum; 10883f741ca1SJeff Roberson 1089155b6ca1SJeff Roberson ts = td->td_sched; 1090155b6ca1SJeff Roberson sum = ts->skg_runtime + ts->skg_slptime; 1091d322132cSJeff Roberson if (sum < SCHED_SLP_RUN_MAX) 1092d322132cSJeff Roberson return; 1093d322132cSJeff Roberson /* 1094155b6ca1SJeff Roberson * This only happens from two places: 1095155b6ca1SJeff Roberson * 1) We have added an unusual amount of run time from fork_exit. 1096155b6ca1SJeff Roberson * 2) We have added an unusual amount of sleep time from sched_sleep(). 1097155b6ca1SJeff Roberson */ 1098155b6ca1SJeff Roberson if (sum > SCHED_SLP_RUN_MAX * 2) { 1099155b6ca1SJeff Roberson if (ts->skg_runtime > ts->skg_slptime) { 1100155b6ca1SJeff Roberson ts->skg_runtime = SCHED_SLP_RUN_MAX; 1101155b6ca1SJeff Roberson ts->skg_slptime = 1; 1102155b6ca1SJeff Roberson } else { 1103155b6ca1SJeff Roberson ts->skg_slptime = SCHED_SLP_RUN_MAX; 1104155b6ca1SJeff Roberson ts->skg_runtime = 1; 1105155b6ca1SJeff Roberson } 1106155b6ca1SJeff Roberson return; 1107155b6ca1SJeff Roberson } 1108155b6ca1SJeff Roberson /* 1109d322132cSJeff Roberson * If we have exceeded by more than 1/5th then the algorithm below 1110d322132cSJeff Roberson * will not bring us back into range. Dividing by two here forces 11112454aaf5SJeff Roberson * us into the range of [4/5 * SCHED_INTERACT_MAX, SCHED_INTERACT_MAX] 1112d322132cSJeff Roberson */ 111337a35e4aSJeff Roberson if (sum > (SCHED_SLP_RUN_MAX / 5) * 6) { 1114155b6ca1SJeff Roberson ts->skg_runtime /= 2; 1115155b6ca1SJeff Roberson ts->skg_slptime /= 2; 1116d322132cSJeff Roberson return; 1117d322132cSJeff Roberson } 1118155b6ca1SJeff Roberson ts->skg_runtime = (ts->skg_runtime / 5) * 4; 1119155b6ca1SJeff Roberson ts->skg_slptime = (ts->skg_slptime / 5) * 4; 1120d322132cSJeff Roberson } 1121d322132cSJeff Roberson 1122d322132cSJeff Roberson static void 11238460a577SJohn Birrell sched_interact_fork(struct thread *td) 1124d322132cSJeff Roberson { 1125d322132cSJeff Roberson int ratio; 1126d322132cSJeff Roberson int sum; 1127d322132cSJeff Roberson 11288460a577SJohn Birrell sum = td->td_sched->skg_runtime + td->td_sched->skg_slptime; 1129d322132cSJeff Roberson if (sum > SCHED_SLP_RUN_FORK) { 1130d322132cSJeff Roberson ratio = sum / SCHED_SLP_RUN_FORK; 11318460a577SJohn Birrell td->td_sched->skg_runtime /= ratio; 11328460a577SJohn Birrell td->td_sched->skg_slptime /= ratio; 11334b60e324SJeff Roberson } 11344b60e324SJeff Roberson } 11354b60e324SJeff Roberson 1136e1f89c22SJeff Roberson static int 11378460a577SJohn Birrell sched_interact_score(struct thread *td) 1138e1f89c22SJeff Roberson { 1139210491d3SJeff Roberson int div; 1140e1f89c22SJeff Roberson 11418460a577SJohn Birrell if (td->td_sched->skg_runtime > td->td_sched->skg_slptime) { 11428460a577SJohn Birrell div = max(1, td->td_sched->skg_runtime / SCHED_INTERACT_HALF); 1143210491d3SJeff Roberson return (SCHED_INTERACT_HALF + 11448460a577SJohn Birrell (SCHED_INTERACT_HALF - (td->td_sched->skg_slptime / div))); 11458460a577SJohn Birrell } if (td->td_sched->skg_slptime > td->td_sched->skg_runtime) { 11468460a577SJohn Birrell div = max(1, td->td_sched->skg_slptime / SCHED_INTERACT_HALF); 11478460a577SJohn Birrell return (td->td_sched->skg_runtime / div); 1148e1f89c22SJeff Roberson } 1149e1f89c22SJeff Roberson 1150210491d3SJeff Roberson /* 1151210491d3SJeff Roberson * This can happen if slptime and runtime are 0. 1152210491d3SJeff Roberson */ 1153210491d3SJeff Roberson return (0); 1154e1f89c22SJeff Roberson 1155e1f89c22SJeff Roberson } 1156e1f89c22SJeff Roberson 115715dc847eSJeff Roberson /* 1158e7d50326SJeff Roberson * Called from proc0_init() to bootstrap the scheduler. 1159ed062c8dSJulian Elischer */ 1160ed062c8dSJulian Elischer void 1161ed062c8dSJulian Elischer schedinit(void) 1162ed062c8dSJulian Elischer { 1163e7d50326SJeff Roberson 1164ed062c8dSJulian Elischer /* 1165ed062c8dSJulian Elischer * Set up the scheduler specific parts of proc0. 1166ed062c8dSJulian Elischer */ 1167ed062c8dSJulian Elischer proc0.p_sched = NULL; /* XXX */ 1168ad1e7d28SJulian Elischer thread0.td_sched = &td_sched0; 1169e7d50326SJeff Roberson td_sched0.ts_ltick = ticks; 11708ab80cf0SJeff Roberson td_sched0.ts_ftick = ticks; 1171ad1e7d28SJulian Elischer td_sched0.ts_thread = &thread0; 1172ad1e7d28SJulian Elischer td_sched0.ts_state = TSS_THREAD; 1173ed062c8dSJulian Elischer } 1174ed062c8dSJulian Elischer 1175ed062c8dSJulian Elischer /* 117615dc847eSJeff Roberson * This is only somewhat accurate since given many processes of the same 117715dc847eSJeff Roberson * priority they will switch when their slices run out, which will be 1178e7d50326SJeff Roberson * at most sched_slice stathz ticks. 117915dc847eSJeff Roberson */ 118035e6168fSJeff Roberson int 118135e6168fSJeff Roberson sched_rr_interval(void) 118235e6168fSJeff Roberson { 1183e7d50326SJeff Roberson 1184e7d50326SJeff Roberson /* Convert sched_slice to hz */ 1185e7d50326SJeff Roberson return (hz/(realstathz/sched_slice)); 118635e6168fSJeff Roberson } 118735e6168fSJeff Roberson 118822bf7d9aSJeff Roberson static void 1189ad1e7d28SJulian Elischer sched_pctcpu_update(struct td_sched *ts) 119035e6168fSJeff Roberson { 1191e7d50326SJeff Roberson 1192e7d50326SJeff Roberson if (ts->ts_ticks == 0) 1193e7d50326SJeff Roberson return; 11948ab80cf0SJeff Roberson if (ticks - (hz / 10) < ts->ts_ltick && 11958ab80cf0SJeff Roberson SCHED_TICK_TOTAL(ts) < SCHED_TICK_MAX) 11968ab80cf0SJeff Roberson return; 119735e6168fSJeff Roberson /* 119835e6168fSJeff Roberson * Adjust counters and watermark for pctcpu calc. 1199210491d3SJeff Roberson */ 1200e7d50326SJeff Roberson if (ts->ts_ltick > ticks - SCHED_TICK_TARG) 1201ad1e7d28SJulian Elischer ts->ts_ticks = (ts->ts_ticks / (ticks - ts->ts_ftick)) * 1202e7d50326SJeff Roberson SCHED_TICK_TARG; 1203e7d50326SJeff Roberson else 1204ad1e7d28SJulian Elischer ts->ts_ticks = 0; 1205ad1e7d28SJulian Elischer ts->ts_ltick = ticks; 1206e7d50326SJeff Roberson ts->ts_ftick = ts->ts_ltick - SCHED_TICK_TARG; 120735e6168fSJeff Roberson } 120835e6168fSJeff Roberson 1209e7d50326SJeff Roberson static void 1210f5c157d9SJohn Baldwin sched_thread_priority(struct thread *td, u_char prio) 121135e6168fSJeff Roberson { 1212ad1e7d28SJulian Elischer struct td_sched *ts; 121335e6168fSJeff Roberson 121481d47d3fSJeff Roberson CTR6(KTR_SCHED, "sched_prio: %p(%s) prio %d newprio %d by %p(%s)", 121581d47d3fSJeff Roberson td, td->td_proc->p_comm, td->td_priority, prio, curthread, 121681d47d3fSJeff Roberson curthread->td_proc->p_comm); 1217ad1e7d28SJulian Elischer ts = td->td_sched; 121835e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1219f5c157d9SJohn Baldwin if (td->td_priority == prio) 1220f5c157d9SJohn Baldwin return; 1221e7d50326SJeff Roberson 12223f872f85SJeff Roberson if (TD_ON_RUNQ(td) && prio < td->td_priority) { 12233f741ca1SJeff Roberson /* 12243f741ca1SJeff Roberson * If the priority has been elevated due to priority 12253f741ca1SJeff Roberson * propagation, we may have to move ourselves to a new 1226e7d50326SJeff Roberson * queue. This could be optimized to not re-add in some 1227e7d50326SJeff Roberson * cases. 1228e7d50326SJeff Roberson * 1229ad1e7d28SJulian Elischer * Hold this td_sched on this cpu so that sched_prio() doesn't 1230f2b74cbfSJeff Roberson * cause excessive migration. We only want migration to 1231f2b74cbfSJeff Roberson * happen as the result of a wakeup. 1232f2b74cbfSJeff Roberson */ 1233ad1e7d28SJulian Elischer ts->ts_flags |= TSF_HOLD; 1234e7d50326SJeff Roberson sched_rem(td); 1235e7d50326SJeff Roberson td->td_priority = prio; 1236e7d50326SJeff Roberson sched_add(td, SRQ_BORROWING); 1237ad1e7d28SJulian Elischer ts->ts_flags &= ~TSF_HOLD; 12383f741ca1SJeff Roberson } else 12393f741ca1SJeff Roberson td->td_priority = prio; 124035e6168fSJeff Roberson } 124135e6168fSJeff Roberson 1242f5c157d9SJohn Baldwin /* 1243f5c157d9SJohn Baldwin * Update a thread's priority when it is lent another thread's 1244f5c157d9SJohn Baldwin * priority. 1245f5c157d9SJohn Baldwin */ 1246f5c157d9SJohn Baldwin void 1247f5c157d9SJohn Baldwin sched_lend_prio(struct thread *td, u_char prio) 1248f5c157d9SJohn Baldwin { 1249f5c157d9SJohn Baldwin 1250f5c157d9SJohn Baldwin td->td_flags |= TDF_BORROWING; 1251f5c157d9SJohn Baldwin sched_thread_priority(td, prio); 1252f5c157d9SJohn Baldwin } 1253f5c157d9SJohn Baldwin 1254f5c157d9SJohn Baldwin /* 1255f5c157d9SJohn Baldwin * Restore a thread's priority when priority propagation is 1256f5c157d9SJohn Baldwin * over. The prio argument is the minimum priority the thread 1257f5c157d9SJohn Baldwin * needs to have to satisfy other possible priority lending 1258f5c157d9SJohn Baldwin * requests. If the thread's regular priority is less 1259f5c157d9SJohn Baldwin * important than prio, the thread will keep a priority boost 1260f5c157d9SJohn Baldwin * of prio. 1261f5c157d9SJohn Baldwin */ 1262f5c157d9SJohn Baldwin void 1263f5c157d9SJohn Baldwin sched_unlend_prio(struct thread *td, u_char prio) 1264f5c157d9SJohn Baldwin { 1265f5c157d9SJohn Baldwin u_char base_pri; 1266f5c157d9SJohn Baldwin 1267f5c157d9SJohn Baldwin if (td->td_base_pri >= PRI_MIN_TIMESHARE && 1268f5c157d9SJohn Baldwin td->td_base_pri <= PRI_MAX_TIMESHARE) 12698460a577SJohn Birrell base_pri = td->td_user_pri; 1270f5c157d9SJohn Baldwin else 1271f5c157d9SJohn Baldwin base_pri = td->td_base_pri; 1272f5c157d9SJohn Baldwin if (prio >= base_pri) { 1273f5c157d9SJohn Baldwin td->td_flags &= ~TDF_BORROWING; 1274f5c157d9SJohn Baldwin sched_thread_priority(td, base_pri); 1275f5c157d9SJohn Baldwin } else 1276f5c157d9SJohn Baldwin sched_lend_prio(td, prio); 1277f5c157d9SJohn Baldwin } 1278f5c157d9SJohn Baldwin 1279f5c157d9SJohn Baldwin void 1280f5c157d9SJohn Baldwin sched_prio(struct thread *td, u_char prio) 1281f5c157d9SJohn Baldwin { 1282f5c157d9SJohn Baldwin u_char oldprio; 1283f5c157d9SJohn Baldwin 1284f5c157d9SJohn Baldwin /* First, update the base priority. */ 1285f5c157d9SJohn Baldwin td->td_base_pri = prio; 1286f5c157d9SJohn Baldwin 1287f5c157d9SJohn Baldwin /* 128850aaa791SJohn Baldwin * If the thread is borrowing another thread's priority, don't 1289f5c157d9SJohn Baldwin * ever lower the priority. 1290f5c157d9SJohn Baldwin */ 1291f5c157d9SJohn Baldwin if (td->td_flags & TDF_BORROWING && td->td_priority < prio) 1292f5c157d9SJohn Baldwin return; 1293f5c157d9SJohn Baldwin 1294f5c157d9SJohn Baldwin /* Change the real priority. */ 1295f5c157d9SJohn Baldwin oldprio = td->td_priority; 1296f5c157d9SJohn Baldwin sched_thread_priority(td, prio); 1297f5c157d9SJohn Baldwin 1298f5c157d9SJohn Baldwin /* 1299f5c157d9SJohn Baldwin * If the thread is on a turnstile, then let the turnstile update 1300f5c157d9SJohn Baldwin * its state. 1301f5c157d9SJohn Baldwin */ 1302f5c157d9SJohn Baldwin if (TD_ON_LOCK(td) && oldprio != prio) 1303f5c157d9SJohn Baldwin turnstile_adjust(td, oldprio); 1304f5c157d9SJohn Baldwin } 1305f5c157d9SJohn Baldwin 130635e6168fSJeff Roberson void 13078460a577SJohn Birrell sched_user_prio(struct thread *td, u_char prio) 13083db720fdSDavid Xu { 13093db720fdSDavid Xu u_char oldprio; 13103db720fdSDavid Xu 13118460a577SJohn Birrell td->td_base_user_pri = prio; 1312fc6c30f6SJulian Elischer if (td->td_flags & TDF_UBORROWING && td->td_user_pri <= prio) 1313fc6c30f6SJulian Elischer return; 13148460a577SJohn Birrell oldprio = td->td_user_pri; 13158460a577SJohn Birrell td->td_user_pri = prio; 13163db720fdSDavid Xu 13173db720fdSDavid Xu if (TD_ON_UPILOCK(td) && oldprio != prio) 13183db720fdSDavid Xu umtx_pi_adjust(td, oldprio); 13193db720fdSDavid Xu } 13203db720fdSDavid Xu 13213db720fdSDavid Xu void 13223db720fdSDavid Xu sched_lend_user_prio(struct thread *td, u_char prio) 13233db720fdSDavid Xu { 13243db720fdSDavid Xu u_char oldprio; 13253db720fdSDavid Xu 13263db720fdSDavid Xu td->td_flags |= TDF_UBORROWING; 13273db720fdSDavid Xu 1328f645b5daSMaxim Konovalov oldprio = td->td_user_pri; 13298460a577SJohn Birrell td->td_user_pri = prio; 13303db720fdSDavid Xu 13313db720fdSDavid Xu if (TD_ON_UPILOCK(td) && oldprio != prio) 13323db720fdSDavid Xu umtx_pi_adjust(td, oldprio); 13333db720fdSDavid Xu } 13343db720fdSDavid Xu 13353db720fdSDavid Xu void 13363db720fdSDavid Xu sched_unlend_user_prio(struct thread *td, u_char prio) 13373db720fdSDavid Xu { 13383db720fdSDavid Xu u_char base_pri; 13393db720fdSDavid Xu 13408460a577SJohn Birrell base_pri = td->td_base_user_pri; 13413db720fdSDavid Xu if (prio >= base_pri) { 13423db720fdSDavid Xu td->td_flags &= ~TDF_UBORROWING; 13438460a577SJohn Birrell sched_user_prio(td, base_pri); 13443db720fdSDavid Xu } else 13453db720fdSDavid Xu sched_lend_user_prio(td, prio); 13463db720fdSDavid Xu } 13473db720fdSDavid Xu 13483db720fdSDavid Xu void 13493389af30SJulian Elischer sched_switch(struct thread *td, struct thread *newtd, int flags) 135035e6168fSJeff Roberson { 1351c02bbb43SJeff Roberson struct tdq *tdq; 1352ad1e7d28SJulian Elischer struct td_sched *ts; 135335e6168fSJeff Roberson 135435e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 135535e6168fSJeff Roberson 1356c02bbb43SJeff Roberson tdq = TDQ_SELF(); 1357e7d50326SJeff Roberson ts = td->td_sched; 1358060563ecSJulian Elischer td->td_lastcpu = td->td_oncpu; 1359060563ecSJulian Elischer td->td_oncpu = NOCPU; 136052eb8464SJohn Baldwin td->td_flags &= ~TDF_NEEDRESCHED; 136177918643SStephan Uphoff td->td_owepreempt = 0; 1362b11fdad0SJeff Roberson /* 1363ad1e7d28SJulian Elischer * If the thread has been assigned it may be in the process of switching 1364b11fdad0SJeff Roberson * to the new cpu. This is the case in sched_bind(). 1365b11fdad0SJeff Roberson */ 13662454aaf5SJeff Roberson if (td == PCPU_GET(idlethread)) { 1367bf0acc27SJohn Baldwin TD_SET_CAN_RUN(td); 1368ad1e7d28SJulian Elischer } else if ((ts->ts_flags & TSF_ASSIGNED) == 0) { 1369ed062c8dSJulian Elischer /* We are ending our run so make our slot available again */ 1370c02bbb43SJeff Roberson tdq_load_rem(tdq, ts); 1371ed062c8dSJulian Elischer if (TD_IS_RUNNING(td)) { 1372f2b74cbfSJeff Roberson /* 1373ed062c8dSJulian Elischer * Don't allow the thread to migrate 1374ed062c8dSJulian Elischer * from a preemption. 1375f2b74cbfSJeff Roberson */ 1376ad1e7d28SJulian Elischer ts->ts_flags |= TSF_HOLD; 1377598b368dSJeff Roberson setrunqueue(td, (flags & SW_PREEMPT) ? 1378598b368dSJeff Roberson SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED : 1379598b368dSJeff Roberson SRQ_OURSELF|SRQ_YIELDING); 1380ad1e7d28SJulian Elischer ts->ts_flags &= ~TSF_HOLD; 13818460a577SJohn Birrell } 1382ed062c8dSJulian Elischer } 1383d39063f2SJulian Elischer if (newtd != NULL) { 1384c20c691bSJulian Elischer /* 13856680bbd5SJeff Roberson * If we bring in a thread account for it as if it had been 13866680bbd5SJeff Roberson * added to the run queue and then chosen. 1387c20c691bSJulian Elischer */ 1388ad1e7d28SJulian Elischer newtd->td_sched->ts_flags |= TSF_DIDRUN; 1389c20c691bSJulian Elischer TD_SET_RUNNING(newtd); 1390ad1e7d28SJulian Elischer tdq_load_add(TDQ_SELF(), newtd->td_sched); 1391d39063f2SJulian Elischer } else 13922454aaf5SJeff Roberson newtd = choosethread(); 1393ebccf1e3SJoseph Koshy if (td != newtd) { 1394ebccf1e3SJoseph Koshy #ifdef HWPMC_HOOKS 1395ebccf1e3SJoseph Koshy if (PMC_PROC_IS_USING_PMCS(td->td_proc)) 1396ebccf1e3SJoseph Koshy PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT); 1397ebccf1e3SJoseph Koshy #endif 13988460a577SJohn Birrell 1399ae53b483SJeff Roberson cpu_switch(td, newtd); 1400ebccf1e3SJoseph Koshy #ifdef HWPMC_HOOKS 1401ebccf1e3SJoseph Koshy if (PMC_PROC_IS_USING_PMCS(td->td_proc)) 1402ebccf1e3SJoseph Koshy PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN); 1403ebccf1e3SJoseph Koshy #endif 1404ebccf1e3SJoseph Koshy } 1405ae53b483SJeff Roberson sched_lock.mtx_lock = (uintptr_t)td; 1406060563ecSJulian Elischer td->td_oncpu = PCPU_GET(cpuid); 140735e6168fSJeff Roberson } 140835e6168fSJeff Roberson 140935e6168fSJeff Roberson void 1410fa885116SJulian Elischer sched_nice(struct proc *p, int nice) 141135e6168fSJeff Roberson { 141235e6168fSJeff Roberson struct thread *td; 141335e6168fSJeff Roberson 1414fa885116SJulian Elischer PROC_LOCK_ASSERT(p, MA_OWNED); 14150b5318c8SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 1416e7d50326SJeff Roberson 1417fa885116SJulian Elischer p->p_nice = nice; 14188460a577SJohn Birrell FOREACH_THREAD_IN_PROC(p, td) { 14198460a577SJohn Birrell sched_priority(td); 1420e7d50326SJeff Roberson sched_prio(td, td->td_base_user_pri); 142135e6168fSJeff Roberson } 1422fa885116SJulian Elischer } 142335e6168fSJeff Roberson 142435e6168fSJeff Roberson void 142544f3b092SJohn Baldwin sched_sleep(struct thread *td) 142635e6168fSJeff Roberson { 1427e7d50326SJeff Roberson 142835e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 142935e6168fSJeff Roberson 1430ad1e7d28SJulian Elischer td->td_sched->ts_slptime = ticks; 143135e6168fSJeff Roberson } 143235e6168fSJeff Roberson 143335e6168fSJeff Roberson void 143435e6168fSJeff Roberson sched_wakeup(struct thread *td) 143535e6168fSJeff Roberson { 1436e7d50326SJeff Roberson int slptime; 1437e7d50326SJeff Roberson 143835e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 143935e6168fSJeff Roberson 144035e6168fSJeff Roberson /* 1441e7d50326SJeff Roberson * If we slept for more than a tick update our interactivity and 1442e7d50326SJeff Roberson * priority. 144335e6168fSJeff Roberson */ 1444e7d50326SJeff Roberson slptime = td->td_sched->ts_slptime; 1445e7d50326SJeff Roberson td->td_sched->ts_slptime = 0; 1446e7d50326SJeff Roberson if (slptime && slptime != ticks) { 144715dc847eSJeff Roberson int hzticks; 1448f1e8dc4aSJeff Roberson 1449e7d50326SJeff Roberson hzticks = (ticks - slptime) << SCHED_TICK_SHIFT; 14508460a577SJohn Birrell td->td_sched->skg_slptime += hzticks; 14518460a577SJohn Birrell sched_interact_update(td); 1452e7d50326SJeff Roberson sched_pctcpu_update(td->td_sched); 14538460a577SJohn Birrell sched_priority(td); 1454f1e8dc4aSJeff Roberson } 14552630e4c9SJulian Elischer setrunqueue(td, SRQ_BORING); 145635e6168fSJeff Roberson } 145735e6168fSJeff Roberson 145835e6168fSJeff Roberson /* 145935e6168fSJeff Roberson * Penalize the parent for creating a new child and initialize the child's 146035e6168fSJeff Roberson * priority. 146135e6168fSJeff Roberson */ 146235e6168fSJeff Roberson void 14638460a577SJohn Birrell sched_fork(struct thread *td, struct thread *child) 146415dc847eSJeff Roberson { 14658460a577SJohn Birrell mtx_assert(&sched_lock, MA_OWNED); 1466ad1e7d28SJulian Elischer sched_fork_thread(td, child); 1467e7d50326SJeff Roberson /* 1468e7d50326SJeff Roberson * Penalize the parent and child for forking. 1469e7d50326SJeff Roberson */ 1470e7d50326SJeff Roberson sched_interact_fork(child); 1471e7d50326SJeff Roberson sched_priority(child); 1472e7d50326SJeff Roberson td->td_sched->skg_runtime += tickincr; 1473e7d50326SJeff Roberson sched_interact_update(td); 1474e7d50326SJeff Roberson sched_priority(td); 1475ad1e7d28SJulian Elischer } 1476ad1e7d28SJulian Elischer 1477ad1e7d28SJulian Elischer void 1478ad1e7d28SJulian Elischer sched_fork_thread(struct thread *td, struct thread *child) 1479ad1e7d28SJulian Elischer { 1480ad1e7d28SJulian Elischer struct td_sched *ts; 1481ad1e7d28SJulian Elischer struct td_sched *ts2; 14828460a577SJohn Birrell 1483e7d50326SJeff Roberson /* 1484e7d50326SJeff Roberson * Initialize child. 1485e7d50326SJeff Roberson */ 1486ed062c8dSJulian Elischer sched_newthread(child); 1487ad1e7d28SJulian Elischer ts = td->td_sched; 1488ad1e7d28SJulian Elischer ts2 = child->td_sched; 1489ad1e7d28SJulian Elischer ts2->ts_cpu = ts->ts_cpu; 1490ad1e7d28SJulian Elischer ts2->ts_runq = NULL; 1491e7d50326SJeff Roberson /* 1492e7d50326SJeff Roberson * Grab our parents cpu estimation information and priority. 1493e7d50326SJeff Roberson */ 1494ad1e7d28SJulian Elischer ts2->ts_ticks = ts->ts_ticks; 1495ad1e7d28SJulian Elischer ts2->ts_ltick = ts->ts_ltick; 1496ad1e7d28SJulian Elischer ts2->ts_ftick = ts->ts_ftick; 1497e7d50326SJeff Roberson child->td_user_pri = td->td_user_pri; 1498e7d50326SJeff Roberson child->td_base_user_pri = td->td_base_user_pri; 1499e7d50326SJeff Roberson /* 1500e7d50326SJeff Roberson * And update interactivity score. 1501e7d50326SJeff Roberson */ 1502e7d50326SJeff Roberson ts2->skg_slptime = ts->skg_slptime; 1503e7d50326SJeff Roberson ts2->skg_runtime = ts->skg_runtime; 1504e7d50326SJeff Roberson ts2->ts_slice = 1; /* Attempt to quickly learn interactivity. */ 150515dc847eSJeff Roberson } 150615dc847eSJeff Roberson 150715dc847eSJeff Roberson void 15088460a577SJohn Birrell sched_class(struct thread *td, int class) 150915dc847eSJeff Roberson { 1510ad1e7d28SJulian Elischer struct tdq *tdq; 1511ad1e7d28SJulian Elischer struct td_sched *ts; 1512ef1134c9SJeff Roberson int nclass; 1513ef1134c9SJeff Roberson int oclass; 151415dc847eSJeff Roberson 15152056d0a1SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 15168460a577SJohn Birrell if (td->td_pri_class == class) 151715dc847eSJeff Roberson return; 151815dc847eSJeff Roberson 1519ef1134c9SJeff Roberson nclass = PRI_BASE(class); 15208460a577SJohn Birrell oclass = PRI_BASE(td->td_pri_class); 1521ad1e7d28SJulian Elischer ts = td->td_sched; 1522e7d50326SJeff Roberson if (ts->ts_state == TSS_ONRUNQ || td->td_state == TDS_RUNNING) { 1523ad1e7d28SJulian Elischer tdq = TDQ_CPU(ts->ts_cpu); 1524ef1134c9SJeff Roberson #ifdef SMP 1525155b9987SJeff Roberson /* 1526155b9987SJeff Roberson * On SMP if we're on the RUNQ we must adjust the transferable 1527155b9987SJeff Roberson * count because could be changing to or from an interrupt 1528155b9987SJeff Roberson * class. 1529155b9987SJeff Roberson */ 1530ad1e7d28SJulian Elischer if (ts->ts_state == TSS_ONRUNQ) { 1531e7d50326SJeff Roberson if (THREAD_CAN_MIGRATE(ts->ts_thread)) { 1532d2ad694cSJeff Roberson tdq->tdq_transferable--; 1533d2ad694cSJeff Roberson tdq->tdq_group->tdg_transferable--; 153480f86c9fSJeff Roberson } 1535e7d50326SJeff Roberson if (THREAD_CAN_MIGRATE(ts->ts_thread)) { 1536d2ad694cSJeff Roberson tdq->tdq_transferable++; 1537d2ad694cSJeff Roberson tdq->tdq_group->tdg_transferable++; 153880f86c9fSJeff Roberson } 1539155b9987SJeff Roberson } 1540ef1134c9SJeff Roberson #endif 1541e7d50326SJeff Roberson if (oclass == PRI_TIMESHARE) 1542d2ad694cSJeff Roberson tdq->tdq_load_timeshare--; 1543e7d50326SJeff Roberson if (nclass == PRI_TIMESHARE) 1544d2ad694cSJeff Roberson tdq->tdq_load_timeshare++; 1545155b9987SJeff Roberson } 154615dc847eSJeff Roberson 15478460a577SJohn Birrell td->td_pri_class = class; 154835e6168fSJeff Roberson } 154935e6168fSJeff Roberson 155035e6168fSJeff Roberson /* 155135e6168fSJeff Roberson * Return some of the child's priority and interactivity to the parent. 155235e6168fSJeff Roberson */ 155335e6168fSJeff Roberson void 1554fc6c30f6SJulian Elischer sched_exit(struct proc *p, struct thread *child) 155535e6168fSJeff Roberson { 1556e7d50326SJeff Roberson struct thread *td; 1557141ad61cSJeff Roberson 15588460a577SJohn Birrell CTR3(KTR_SCHED, "sched_exit: %p(%s) prio %d", 1559fc6c30f6SJulian Elischer child, child->td_proc->p_comm, child->td_priority); 15608460a577SJohn Birrell 1561e7d50326SJeff Roberson td = FIRST_THREAD_IN_PROC(p); 1562e7d50326SJeff Roberson sched_exit_thread(td, child); 1563ad1e7d28SJulian Elischer } 1564ad1e7d28SJulian Elischer 1565ad1e7d28SJulian Elischer void 1566fc6c30f6SJulian Elischer sched_exit_thread(struct thread *td, struct thread *child) 1567ad1e7d28SJulian Elischer { 1568fc6c30f6SJulian Elischer 1569e7d50326SJeff Roberson CTR3(KTR_SCHED, "sched_exit_thread: %p(%s) prio %d", 1570e7d50326SJeff Roberson child, child->td_proc->p_comm, child->td_priority); 1571e7d50326SJeff Roberson 1572e7d50326SJeff Roberson tdq_load_rem(TDQ_CPU(child->td_sched->ts_cpu), child->td_sched); 1573e7d50326SJeff Roberson #ifdef KSE 1574e7d50326SJeff Roberson /* 1575e7d50326SJeff Roberson * KSE forks and exits so often that this penalty causes short-lived 1576e7d50326SJeff Roberson * threads to always be non-interactive. This causes mozilla to 1577e7d50326SJeff Roberson * crawl under load. 1578e7d50326SJeff Roberson */ 1579e7d50326SJeff Roberson if ((td->td_pflags & TDP_SA) && td->td_proc == child->td_proc) 1580e7d50326SJeff Roberson return; 1581e7d50326SJeff Roberson #endif 1582e7d50326SJeff Roberson /* 1583e7d50326SJeff Roberson * Give the child's runtime to the parent without returning the 1584e7d50326SJeff Roberson * sleep time as a penalty to the parent. This causes shells that 1585e7d50326SJeff Roberson * launch expensive things to mark their children as expensive. 1586e7d50326SJeff Roberson */ 1587fc6c30f6SJulian Elischer td->td_sched->skg_runtime += child->td_sched->skg_runtime; 1588fc6c30f6SJulian Elischer sched_interact_update(td); 1589e7d50326SJeff Roberson sched_priority(td); 1590ad1e7d28SJulian Elischer } 1591ad1e7d28SJulian Elischer 1592ad1e7d28SJulian Elischer void 1593ad1e7d28SJulian Elischer sched_userret(struct thread *td) 1594ad1e7d28SJulian Elischer { 1595ad1e7d28SJulian Elischer /* 1596ad1e7d28SJulian Elischer * XXX we cheat slightly on the locking here to avoid locking in 1597ad1e7d28SJulian Elischer * the usual case. Setting td_priority here is essentially an 1598ad1e7d28SJulian Elischer * incomplete workaround for not setting it properly elsewhere. 1599ad1e7d28SJulian Elischer * Now that some interrupt handlers are threads, not setting it 1600ad1e7d28SJulian Elischer * properly elsewhere can clobber it in the window between setting 1601ad1e7d28SJulian Elischer * it here and returning to user mode, so don't waste time setting 1602ad1e7d28SJulian Elischer * it perfectly here. 1603ad1e7d28SJulian Elischer */ 1604ad1e7d28SJulian Elischer KASSERT((td->td_flags & TDF_BORROWING) == 0, 1605ad1e7d28SJulian Elischer ("thread with borrowed priority returning to userland")); 1606ad1e7d28SJulian Elischer if (td->td_priority != td->td_user_pri) { 1607ad1e7d28SJulian Elischer mtx_lock_spin(&sched_lock); 1608ad1e7d28SJulian Elischer td->td_priority = td->td_user_pri; 1609ad1e7d28SJulian Elischer td->td_base_pri = td->td_user_pri; 1610ad1e7d28SJulian Elischer mtx_unlock_spin(&sched_lock); 1611ad1e7d28SJulian Elischer } 161235e6168fSJeff Roberson } 161335e6168fSJeff Roberson 161435e6168fSJeff Roberson void 16157cf90fb3SJeff Roberson sched_clock(struct thread *td) 161635e6168fSJeff Roberson { 1617ad1e7d28SJulian Elischer struct tdq *tdq; 1618ad1e7d28SJulian Elischer struct td_sched *ts; 161935e6168fSJeff Roberson 1620dc03363dSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1621dc03363dSJeff Roberson #ifdef SMP 16223f872f85SJeff Roberson sched_smp_tick(); 1623dc03363dSJeff Roberson #endif 16243f872f85SJeff Roberson tdq = TDQ_SELF(); 16253f872f85SJeff Roberson /* 16263f872f85SJeff Roberson * Advance the insert index once for each tick to ensure that all 16273f872f85SJeff Roberson * threads get a chance to run. 16283f872f85SJeff Roberson */ 16293f872f85SJeff Roberson if (tdq->tdq_idx == tdq->tdq_ridx) { 16303f872f85SJeff Roberson tdq->tdq_idx = (tdq->tdq_idx + 1) % RQ_NQS; 16313f872f85SJeff Roberson if (TAILQ_EMPTY(&tdq->tdq_timeshare.rq_queues[tdq->tdq_ridx])) 16323f872f85SJeff Roberson tdq->tdq_ridx = tdq->tdq_idx; 16333f872f85SJeff Roberson } 16340a016a05SJeff Roberson /* Adjust ticks for pctcpu */ 16353f872f85SJeff Roberson ts = td->td_sched; 1636e7d50326SJeff Roberson ts->ts_ticks += tickincr; 1637ad1e7d28SJulian Elischer ts->ts_ltick = ticks; 1638e7d50326SJeff Roberson /* 1639e7d50326SJeff Roberson * Update if we've exceeded our desired tick threshhold by over one 1640e7d50326SJeff Roberson * second. 1641e7d50326SJeff Roberson */ 16428ab80cf0SJeff Roberson if (ts->ts_ftick + SCHED_TICK_MAX < ts->ts_ltick) 1643ad1e7d28SJulian Elischer sched_pctcpu_update(ts); 16443f741ca1SJeff Roberson /* 16458460a577SJohn Birrell * We only do slicing code for TIMESHARE threads. 1646a8949de2SJeff Roberson */ 16478460a577SJohn Birrell if (td->td_pri_class != PRI_TIMESHARE) 1648a8949de2SJeff Roberson return; 1649a8949de2SJeff Roberson /* 16503f872f85SJeff Roberson * We used a tick; charge it to the thread so that we can compute our 165115dc847eSJeff Roberson * interactivity. 165215dc847eSJeff Roberson */ 16538460a577SJohn Birrell td->td_sched->skg_runtime += tickincr; 16548460a577SJohn Birrell sched_interact_update(td); 165535e6168fSJeff Roberson /* 165635e6168fSJeff Roberson * We used up one time slice. 165735e6168fSJeff Roberson */ 1658ad1e7d28SJulian Elischer if (--ts->ts_slice > 0) 165915dc847eSJeff Roberson return; 166035e6168fSJeff Roberson /* 166115dc847eSJeff Roberson * We're out of time, recompute priorities and requeue. 166235e6168fSJeff Roberson */ 16638460a577SJohn Birrell sched_priority(td); 16648ab80cf0SJeff Roberson tdq_load_rem(tdq, ts); 1665e7d50326SJeff Roberson ts->ts_slice = sched_slice; 1666ad1e7d28SJulian Elischer tdq_load_add(tdq, ts); 16674a338afdSJulian Elischer td->td_flags |= TDF_NEEDRESCHED; 166835e6168fSJeff Roberson } 166935e6168fSJeff Roberson 167035e6168fSJeff Roberson int 167135e6168fSJeff Roberson sched_runnable(void) 167235e6168fSJeff Roberson { 1673ad1e7d28SJulian Elischer struct tdq *tdq; 1674b90816f1SJeff Roberson int load; 167535e6168fSJeff Roberson 1676b90816f1SJeff Roberson load = 1; 1677b90816f1SJeff Roberson 1678ad1e7d28SJulian Elischer tdq = TDQ_SELF(); 167922bf7d9aSJeff Roberson #ifdef SMP 1680d2ad694cSJeff Roberson if (tdq->tdq_assigned) { 168146f8b265SJeff Roberson mtx_lock_spin(&sched_lock); 1682ad1e7d28SJulian Elischer tdq_assign(tdq); 168346f8b265SJeff Roberson mtx_unlock_spin(&sched_lock); 168446f8b265SJeff Roberson } 168522bf7d9aSJeff Roberson #endif 16863f741ca1SJeff Roberson if ((curthread->td_flags & TDF_IDLETD) != 0) { 1687d2ad694cSJeff Roberson if (tdq->tdq_load > 0) 16883f741ca1SJeff Roberson goto out; 16893f741ca1SJeff Roberson } else 1690d2ad694cSJeff Roberson if (tdq->tdq_load - 1 > 0) 1691b90816f1SJeff Roberson goto out; 1692b90816f1SJeff Roberson load = 0; 1693b90816f1SJeff Roberson out: 1694b90816f1SJeff Roberson return (load); 169535e6168fSJeff Roberson } 169635e6168fSJeff Roberson 1697ad1e7d28SJulian Elischer struct td_sched * 1698c9f25d8fSJeff Roberson sched_choose(void) 1699c9f25d8fSJeff Roberson { 1700ad1e7d28SJulian Elischer struct tdq *tdq; 1701ad1e7d28SJulian Elischer struct td_sched *ts; 170215dc847eSJeff Roberson 1703b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1704ad1e7d28SJulian Elischer tdq = TDQ_SELF(); 170515dc847eSJeff Roberson #ifdef SMP 170680f86c9fSJeff Roberson restart: 1707d2ad694cSJeff Roberson if (tdq->tdq_assigned) 1708ad1e7d28SJulian Elischer tdq_assign(tdq); 170915dc847eSJeff Roberson #endif 1710ad1e7d28SJulian Elischer ts = tdq_choose(tdq); 1711ad1e7d28SJulian Elischer if (ts) { 171222bf7d9aSJeff Roberson #ifdef SMP 1713155b6ca1SJeff Roberson if (ts->ts_thread->td_priority > PRI_MIN_IDLE) 1714ad1e7d28SJulian Elischer if (tdq_idled(tdq) == 0) 171580f86c9fSJeff Roberson goto restart; 171622bf7d9aSJeff Roberson #endif 1717ad1e7d28SJulian Elischer tdq_runq_rem(tdq, ts); 1718ad1e7d28SJulian Elischer ts->ts_state = TSS_THREAD; 1719ad1e7d28SJulian Elischer return (ts); 172035e6168fSJeff Roberson } 1721c9f25d8fSJeff Roberson #ifdef SMP 1722ad1e7d28SJulian Elischer if (tdq_idled(tdq) == 0) 172380f86c9fSJeff Roberson goto restart; 1724c9f25d8fSJeff Roberson #endif 172515dc847eSJeff Roberson return (NULL); 172635e6168fSJeff Roberson } 172735e6168fSJeff Roberson 172835e6168fSJeff Roberson void 17292630e4c9SJulian Elischer sched_add(struct thread *td, int flags) 173035e6168fSJeff Roberson { 1731ad1e7d28SJulian Elischer struct tdq *tdq; 1732ad1e7d28SJulian Elischer struct td_sched *ts; 1733598b368dSJeff Roberson int preemptive; 17342454aaf5SJeff Roberson int canmigrate; 173522bf7d9aSJeff Roberson int class; 1736c9f25d8fSJeff Roberson 173781d47d3fSJeff Roberson CTR5(KTR_SCHED, "sched_add: %p(%s) prio %d by %p(%s)", 173881d47d3fSJeff Roberson td, td->td_proc->p_comm, td->td_priority, curthread, 173981d47d3fSJeff Roberson curthread->td_proc->p_comm); 174022bf7d9aSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1741ad1e7d28SJulian Elischer tdq = TDQ_SELF(); 1742e7d50326SJeff Roberson ts = td->td_sched; 1743ad1e7d28SJulian Elischer ts->ts_flags &= ~TSF_INTERNAL; 1744e7d50326SJeff Roberson class = PRI_BASE(td->td_pri_class); 1745e7d50326SJeff Roberson preemptive = !(flags & SRQ_YIELDING); 1746e7d50326SJeff Roberson canmigrate = 1; 1747598b368dSJeff Roberson #ifdef SMP 1748ad1e7d28SJulian Elischer if (ts->ts_flags & TSF_ASSIGNED) { 1749ad1e7d28SJulian Elischer if (ts->ts_flags & TSF_REMOVED) 1750ad1e7d28SJulian Elischer ts->ts_flags &= ~TSF_REMOVED; 175122bf7d9aSJeff Roberson return; 17522d59a44dSJeff Roberson } 1753e7d50326SJeff Roberson canmigrate = THREAD_CAN_MIGRATE(td); 1754f8ec133eSDavid Xu /* 1755f8ec133eSDavid Xu * Don't migrate running threads here. Force the long term balancer 1756f8ec133eSDavid Xu * to do it. 1757f8ec133eSDavid Xu */ 1758ad1e7d28SJulian Elischer if (ts->ts_flags & TSF_HOLD) { 1759ad1e7d28SJulian Elischer ts->ts_flags &= ~TSF_HOLD; 1760f8ec133eSDavid Xu canmigrate = 0; 1761f8ec133eSDavid Xu } 1762598b368dSJeff Roberson #endif 1763ad1e7d28SJulian Elischer KASSERT(ts->ts_state != TSS_ONRUNQ, 1764ad1e7d28SJulian Elischer ("sched_add: thread %p (%s) already in run queue", td, 17658460a577SJohn Birrell td->td_proc->p_comm)); 17668460a577SJohn Birrell KASSERT(td->td_proc->p_sflag & PS_INMEM, 17675d7ef00cSJeff Roberson ("sched_add: process swapped out")); 1768ad1e7d28SJulian Elischer KASSERT(ts->ts_runq == NULL, 1769ad1e7d28SJulian Elischer ("sched_add: thread %p is still assigned to a run queue", td)); 177015dc847eSJeff Roberson /* 1771e7d50326SJeff Roberson * Set the slice and pick the run queue. 177215dc847eSJeff Roberson */ 1773e7d50326SJeff Roberson if (ts->ts_slice == 0) 1774e7d50326SJeff Roberson ts->ts_slice = sched_slice; 17758ab80cf0SJeff Roberson if (class == PRI_TIMESHARE) 17768ab80cf0SJeff Roberson sched_priority(td); 1777e7d50326SJeff Roberson if (td->td_priority <= PRI_MAX_REALTIME) { 1778e7d50326SJeff Roberson ts->ts_runq = &tdq->tdq_realtime; 1779e7d50326SJeff Roberson /* 1780e7d50326SJeff Roberson * If the thread is not artificially pinned and it's in 1781e7d50326SJeff Roberson * the realtime queue we directly dispatch it on this cpu 1782e7d50326SJeff Roberson * for minimum latency. Interrupt handlers may also have 1783e7d50326SJeff Roberson * to complete on the cpu that dispatched them. 1784e7d50326SJeff Roberson */ 1785155b6ca1SJeff Roberson if (td->td_pinned == 0 && class == PRI_ITHD) 1786e7d50326SJeff Roberson ts->ts_cpu = PCPU_GET(cpuid); 1787e7d50326SJeff Roberson } else if (td->td_priority <= PRI_MAX_TIMESHARE) 1788e7d50326SJeff Roberson ts->ts_runq = &tdq->tdq_timeshare; 178915dc847eSJeff Roberson else 1790d2ad694cSJeff Roberson ts->ts_runq = &tdq->tdq_idle; 1791e7d50326SJeff Roberson 179222bf7d9aSJeff Roberson #ifdef SMP 17932454aaf5SJeff Roberson /* 17942454aaf5SJeff Roberson * If this thread is pinned or bound, notify the target cpu. 17952454aaf5SJeff Roberson */ 1796ad1e7d28SJulian Elischer if (!canmigrate && ts->ts_cpu != PCPU_GET(cpuid) ) { 1797ad1e7d28SJulian Elischer ts->ts_runq = NULL; 1798ad1e7d28SJulian Elischer tdq_notify(ts, ts->ts_cpu); 179980f86c9fSJeff Roberson return; 180080f86c9fSJeff Roberson } 180122bf7d9aSJeff Roberson /* 1802670c524fSJeff Roberson * If we had been idle, clear our bit in the group and potentially 1803670c524fSJeff Roberson * the global bitmap. If not, see if we should transfer this thread. 180422bf7d9aSJeff Roberson */ 1805e7d50326SJeff Roberson if ((class != PRI_IDLE && class != PRI_ITHD) && 1806d2ad694cSJeff Roberson (tdq->tdq_group->tdg_idlemask & PCPU_GET(cpumask)) != 0) { 180780f86c9fSJeff Roberson /* 180880f86c9fSJeff Roberson * Check to see if our group is unidling, and if so, remove it 180980f86c9fSJeff Roberson * from the global idle mask. 181080f86c9fSJeff Roberson */ 1811d2ad694cSJeff Roberson if (tdq->tdq_group->tdg_idlemask == 1812d2ad694cSJeff Roberson tdq->tdq_group->tdg_cpumask) 1813d2ad694cSJeff Roberson atomic_clear_int(&tdq_idle, tdq->tdq_group->tdg_mask); 181480f86c9fSJeff Roberson /* 181580f86c9fSJeff Roberson * Now remove ourselves from the group specific idle mask. 181680f86c9fSJeff Roberson */ 1817d2ad694cSJeff Roberson tdq->tdq_group->tdg_idlemask &= ~PCPU_GET(cpumask); 1818e7d50326SJeff Roberson } else if (canmigrate && tdq->tdq_load > 1) 1819ad1e7d28SJulian Elischer if (tdq_transfer(tdq, ts, class)) 1820670c524fSJeff Roberson return; 1821ad1e7d28SJulian Elischer ts->ts_cpu = PCPU_GET(cpuid); 182222bf7d9aSJeff Roberson #endif 1823e7d50326SJeff Roberson if (td->td_priority < curthread->td_priority) 182422bf7d9aSJeff Roberson curthread->td_flags |= TDF_NEEDRESCHED; 182563fcce68SJohn Baldwin if (preemptive && maybe_preempt(td)) 18260c0b25aeSJohn Baldwin return; 1827ad1e7d28SJulian Elischer ts->ts_state = TSS_ONRUNQ; 182835e6168fSJeff Roberson 1829ad1e7d28SJulian Elischer tdq_runq_add(tdq, ts, flags); 1830ad1e7d28SJulian Elischer tdq_load_add(tdq, ts); 183135e6168fSJeff Roberson } 183235e6168fSJeff Roberson 183335e6168fSJeff Roberson void 18347cf90fb3SJeff Roberson sched_rem(struct thread *td) 183535e6168fSJeff Roberson { 1836ad1e7d28SJulian Elischer struct tdq *tdq; 1837ad1e7d28SJulian Elischer struct td_sched *ts; 18387cf90fb3SJeff Roberson 183981d47d3fSJeff Roberson CTR5(KTR_SCHED, "sched_rem: %p(%s) prio %d by %p(%s)", 184081d47d3fSJeff Roberson td, td->td_proc->p_comm, td->td_priority, curthread, 184181d47d3fSJeff Roberson curthread->td_proc->p_comm); 1842598b368dSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1843ad1e7d28SJulian Elischer ts = td->td_sched; 1844ad1e7d28SJulian Elischer if (ts->ts_flags & TSF_ASSIGNED) { 1845ad1e7d28SJulian Elischer ts->ts_flags |= TSF_REMOVED; 184622bf7d9aSJeff Roberson return; 18472d59a44dSJeff Roberson } 1848ad1e7d28SJulian Elischer KASSERT((ts->ts_state == TSS_ONRUNQ), 1849ad1e7d28SJulian Elischer ("sched_rem: thread not on run queue")); 185035e6168fSJeff Roberson 1851ad1e7d28SJulian Elischer ts->ts_state = TSS_THREAD; 1852ad1e7d28SJulian Elischer tdq = TDQ_CPU(ts->ts_cpu); 1853ad1e7d28SJulian Elischer tdq_runq_rem(tdq, ts); 1854ad1e7d28SJulian Elischer tdq_load_rem(tdq, ts); 185535e6168fSJeff Roberson } 185635e6168fSJeff Roberson 185735e6168fSJeff Roberson fixpt_t 18587cf90fb3SJeff Roberson sched_pctcpu(struct thread *td) 185935e6168fSJeff Roberson { 186035e6168fSJeff Roberson fixpt_t pctcpu; 1861ad1e7d28SJulian Elischer struct td_sched *ts; 186235e6168fSJeff Roberson 186335e6168fSJeff Roberson pctcpu = 0; 1864ad1e7d28SJulian Elischer ts = td->td_sched; 1865ad1e7d28SJulian Elischer if (ts == NULL) 1866484288deSJeff Roberson return (0); 186735e6168fSJeff Roberson 1868b90816f1SJeff Roberson mtx_lock_spin(&sched_lock); 1869ad1e7d28SJulian Elischer if (ts->ts_ticks) { 187035e6168fSJeff Roberson int rtick; 187135e6168fSJeff Roberson 1872ad1e7d28SJulian Elischer sched_pctcpu_update(ts); 187335e6168fSJeff Roberson /* How many rtick per second ? */ 1874e7d50326SJeff Roberson rtick = min(SCHED_TICK_HZ(ts) / SCHED_TICK_SECS, hz); 1875e7d50326SJeff Roberson pctcpu = (FSCALE * ((FSCALE * rtick)/hz)) >> FSHIFT; 187635e6168fSJeff Roberson } 1877ad1e7d28SJulian Elischer td->td_proc->p_swtime = ts->ts_ltick - ts->ts_ftick; 1878828e7683SJohn Baldwin mtx_unlock_spin(&sched_lock); 187935e6168fSJeff Roberson 188035e6168fSJeff Roberson return (pctcpu); 188135e6168fSJeff Roberson } 188235e6168fSJeff Roberson 18839bacd788SJeff Roberson void 18849bacd788SJeff Roberson sched_bind(struct thread *td, int cpu) 18859bacd788SJeff Roberson { 1886ad1e7d28SJulian Elischer struct td_sched *ts; 18879bacd788SJeff Roberson 18889bacd788SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1889ad1e7d28SJulian Elischer ts = td->td_sched; 1890e7d50326SJeff Roberson KASSERT((ts->ts_flags & TSF_BOUND) == 0, 1891e7d50326SJeff Roberson ("sched_bind: thread %p already bound.", td)); 1892ad1e7d28SJulian Elischer ts->ts_flags |= TSF_BOUND; 189380f86c9fSJeff Roberson #ifdef SMP 189480f86c9fSJeff Roberson if (PCPU_GET(cpuid) == cpu) 18959bacd788SJeff Roberson return; 18969bacd788SJeff Roberson /* sched_rem without the runq_remove */ 1897ad1e7d28SJulian Elischer ts->ts_state = TSS_THREAD; 1898ad1e7d28SJulian Elischer tdq_load_rem(TDQ_CPU(ts->ts_cpu), ts); 1899ad1e7d28SJulian Elischer tdq_notify(ts, cpu); 19009bacd788SJeff Roberson /* When we return from mi_switch we'll be on the correct cpu. */ 1901279f949eSPoul-Henning Kamp mi_switch(SW_VOL, NULL); 1902e7d50326SJeff Roberson sched_pin(); 19039bacd788SJeff Roberson #endif 19049bacd788SJeff Roberson } 19059bacd788SJeff Roberson 19069bacd788SJeff Roberson void 19079bacd788SJeff Roberson sched_unbind(struct thread *td) 19089bacd788SJeff Roberson { 1909e7d50326SJeff Roberson struct td_sched *ts; 1910e7d50326SJeff Roberson 19119bacd788SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1912e7d50326SJeff Roberson ts = td->td_sched; 1913e7d50326SJeff Roberson KASSERT(ts->ts_flags & TSF_BOUND, 1914e7d50326SJeff Roberson ("sched_unbind: thread %p not bound.", td)); 1915e7d50326SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1916e7d50326SJeff Roberson ts->ts_flags &= ~TSF_BOUND; 1917e7d50326SJeff Roberson #ifdef SMP 1918e7d50326SJeff Roberson sched_unpin(); 1919e7d50326SJeff Roberson #endif 19209bacd788SJeff Roberson } 19219bacd788SJeff Roberson 192235e6168fSJeff Roberson int 1923ebccf1e3SJoseph Koshy sched_is_bound(struct thread *td) 1924ebccf1e3SJoseph Koshy { 1925ebccf1e3SJoseph Koshy mtx_assert(&sched_lock, MA_OWNED); 1926ad1e7d28SJulian Elischer return (td->td_sched->ts_flags & TSF_BOUND); 1927ebccf1e3SJoseph Koshy } 1928ebccf1e3SJoseph Koshy 192936ec198bSDavid Xu void 193036ec198bSDavid Xu sched_relinquish(struct thread *td) 193136ec198bSDavid Xu { 193236ec198bSDavid Xu mtx_lock_spin(&sched_lock); 19338460a577SJohn Birrell if (td->td_pri_class == PRI_TIMESHARE) 193436ec198bSDavid Xu sched_prio(td, PRI_MAX_TIMESHARE); 193536ec198bSDavid Xu mi_switch(SW_VOL, NULL); 193636ec198bSDavid Xu mtx_unlock_spin(&sched_lock); 193736ec198bSDavid Xu } 193836ec198bSDavid Xu 1939ebccf1e3SJoseph Koshy int 194033916c36SJeff Roberson sched_load(void) 194133916c36SJeff Roberson { 194233916c36SJeff Roberson #ifdef SMP 194333916c36SJeff Roberson int total; 194433916c36SJeff Roberson int i; 194533916c36SJeff Roberson 194633916c36SJeff Roberson total = 0; 1947d2ad694cSJeff Roberson for (i = 0; i <= tdg_maxid; i++) 1948d2ad694cSJeff Roberson total += TDQ_GROUP(i)->tdg_load; 194933916c36SJeff Roberson return (total); 195033916c36SJeff Roberson #else 1951d2ad694cSJeff Roberson return (TDQ_SELF()->tdq_sysload); 195233916c36SJeff Roberson #endif 195333916c36SJeff Roberson } 195433916c36SJeff Roberson 195533916c36SJeff Roberson int 195635e6168fSJeff Roberson sched_sizeof_proc(void) 195735e6168fSJeff Roberson { 195835e6168fSJeff Roberson return (sizeof(struct proc)); 195935e6168fSJeff Roberson } 196035e6168fSJeff Roberson 196135e6168fSJeff Roberson int 196235e6168fSJeff Roberson sched_sizeof_thread(void) 196335e6168fSJeff Roberson { 196435e6168fSJeff Roberson return (sizeof(struct thread) + sizeof(struct td_sched)); 196535e6168fSJeff Roberson } 1966b41f1452SDavid Xu 1967b41f1452SDavid Xu void 1968b41f1452SDavid Xu sched_tick(void) 1969b41f1452SDavid Xu { 1970b41f1452SDavid Xu } 1971e7d50326SJeff Roberson 1972e7d50326SJeff Roberson static SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0, "Scheduler"); 1973e7d50326SJeff Roberson SYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "ule", 0, 1974e7d50326SJeff Roberson "Scheduler name"); 1975e7d50326SJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, slice, CTLFLAG_RW, &sched_slice, 0, ""); 1976e7d50326SJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, interact, CTLFLAG_RW, &sched_interact, 0, ""); 1977e7d50326SJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, tickincr, CTLFLAG_RD, &tickincr, 0, ""); 1978e7d50326SJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, realstathz, CTLFLAG_RD, &realstathz, 0, ""); 1979155b6ca1SJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, balance, CTLFLAG_RD, &sched_rebalance, 0, ""); 1980e7d50326SJeff Roberson 1981e7d50326SJeff Roberson /* ps compat */ 1982e7d50326SJeff Roberson static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 1983e7d50326SJeff Roberson SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 1984e7d50326SJeff Roberson 1985e7d50326SJeff Roberson 1986ed062c8dSJulian Elischer #define KERN_SWITCH_INCLUDE 1 1987ed062c8dSJulian Elischer #include "kern/kern_switch.c" 1988