135e6168fSJeff Roberson /*- 2e7d50326SJeff Roberson * Copyright (c) 2002-2007, Jeffrey Roberson <jeff@freebsd.org> 335e6168fSJeff Roberson * All rights reserved. 435e6168fSJeff Roberson * 535e6168fSJeff Roberson * Redistribution and use in source and binary forms, with or without 635e6168fSJeff Roberson * modification, are permitted provided that the following conditions 735e6168fSJeff Roberson * are met: 835e6168fSJeff Roberson * 1. Redistributions of source code must retain the above copyright 935e6168fSJeff Roberson * notice unmodified, this list of conditions, and the following 1035e6168fSJeff Roberson * disclaimer. 1135e6168fSJeff Roberson * 2. Redistributions in binary form must reproduce the above copyright 1235e6168fSJeff Roberson * notice, this list of conditions and the following disclaimer in the 1335e6168fSJeff Roberson * documentation and/or other materials provided with the distribution. 1435e6168fSJeff Roberson * 1535e6168fSJeff Roberson * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 1635e6168fSJeff Roberson * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1735e6168fSJeff Roberson * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 1835e6168fSJeff Roberson * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 1935e6168fSJeff Roberson * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2035e6168fSJeff Roberson * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2135e6168fSJeff Roberson * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2235e6168fSJeff Roberson * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2335e6168fSJeff Roberson * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2435e6168fSJeff Roberson * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2535e6168fSJeff Roberson */ 2635e6168fSJeff Roberson 27677b542eSDavid E. O'Brien #include <sys/cdefs.h> 28677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 29677b542eSDavid E. O'Brien 304da0d332SPeter Wemm #include "opt_hwpmc_hooks.h" 314da0d332SPeter Wemm #include "opt_sched.h" 329923b511SScott Long 3335e6168fSJeff Roberson #include <sys/param.h> 3435e6168fSJeff Roberson #include <sys/systm.h> 352c3490b1SMarcel Moolenaar #include <sys/kdb.h> 3635e6168fSJeff Roberson #include <sys/kernel.h> 3735e6168fSJeff Roberson #include <sys/ktr.h> 3835e6168fSJeff Roberson #include <sys/lock.h> 3935e6168fSJeff Roberson #include <sys/mutex.h> 4035e6168fSJeff Roberson #include <sys/proc.h> 41245f3abfSJeff Roberson #include <sys/resource.h> 429bacd788SJeff Roberson #include <sys/resourcevar.h> 4335e6168fSJeff Roberson #include <sys/sched.h> 4435e6168fSJeff Roberson #include <sys/smp.h> 4535e6168fSJeff Roberson #include <sys/sx.h> 4635e6168fSJeff Roberson #include <sys/sysctl.h> 4735e6168fSJeff Roberson #include <sys/sysproto.h> 48f5c157d9SJohn Baldwin #include <sys/turnstile.h> 493db720fdSDavid Xu #include <sys/umtx.h> 5035e6168fSJeff Roberson #include <sys/vmmeter.h> 5135e6168fSJeff Roberson #ifdef KTRACE 5235e6168fSJeff Roberson #include <sys/uio.h> 5335e6168fSJeff Roberson #include <sys/ktrace.h> 5435e6168fSJeff Roberson #endif 5535e6168fSJeff Roberson 56ebccf1e3SJoseph Koshy #ifdef HWPMC_HOOKS 57ebccf1e3SJoseph Koshy #include <sys/pmckern.h> 58ebccf1e3SJoseph Koshy #endif 59ebccf1e3SJoseph Koshy 6035e6168fSJeff Roberson #include <machine/cpu.h> 6122bf7d9aSJeff Roberson #include <machine/smp.h> 6235e6168fSJeff Roberson 6335e6168fSJeff Roberson /* 64ad1e7d28SJulian Elischer * Thread scheduler specific section. 65ed062c8dSJulian Elischer */ 66ad1e7d28SJulian Elischer struct td_sched { 67ad1e7d28SJulian Elischer TAILQ_ENTRY(td_sched) ts_procq; /* (j/z) Run queue. */ 68ad1e7d28SJulian Elischer int ts_flags; /* (j) TSF_* flags. */ 69ad1e7d28SJulian Elischer struct thread *ts_thread; /* (*) Active associated thread. */ 70ad1e7d28SJulian Elischer u_char ts_rqindex; /* (j) Run queue index. */ 71ed062c8dSJulian Elischer enum { 72e7d50326SJeff Roberson TSS_THREAD, 73ad1e7d28SJulian Elischer TSS_ONRUNQ 74ad1e7d28SJulian Elischer } ts_state; /* (j) thread sched specific status. */ 75ad1e7d28SJulian Elischer int ts_slptime; 76ad1e7d28SJulian Elischer int ts_slice; 77ad1e7d28SJulian Elischer struct runq *ts_runq; 78ad1e7d28SJulian Elischer u_char ts_cpu; /* CPU that we have affinity for. */ 79ed062c8dSJulian Elischer /* The following variables are only used for pctcpu calculation */ 80ad1e7d28SJulian Elischer int ts_ltick; /* Last tick that we were running on */ 81ad1e7d28SJulian Elischer int ts_ftick; /* First tick that we were running on */ 82ad1e7d28SJulian Elischer int ts_ticks; /* Tick count */ 83ed062c8dSJulian Elischer 848460a577SJohn Birrell /* originally from kg_sched */ 858460a577SJohn Birrell int skg_slptime; /* Number of ticks we vol. slept */ 868460a577SJohn Birrell int skg_runtime; /* Number of ticks we were running */ 87ed062c8dSJulian Elischer }; 88ad1e7d28SJulian Elischer #define ts_assign ts_procq.tqe_next 89ad1e7d28SJulian Elischer /* flags kept in ts_flags */ 90ad1e7d28SJulian Elischer #define TSF_ASSIGNED 0x0001 /* Thread is being migrated. */ 91ad1e7d28SJulian Elischer #define TSF_BOUND 0x0002 /* Thread can not migrate. */ 92ad1e7d28SJulian Elischer #define TSF_XFERABLE 0x0004 /* Thread was added as transferable. */ 931e516cf5SJeff Roberson #define TSF_REMOVED 0x0008 /* Thread was removed while ASSIGNED */ 94d2ad694cSJeff Roberson #define TSF_DIDRUN 0x2000 /* Thread actually ran. */ 9535e6168fSJeff Roberson 96ad1e7d28SJulian Elischer static struct td_sched td_sched0; 9735e6168fSJeff Roberson 9835e6168fSJeff Roberson /* 99e7d50326SJeff Roberson * Cpu percentage computation macros and defines. 100e1f89c22SJeff Roberson * 101e7d50326SJeff Roberson * SCHED_TICK_SECS: Number of seconds to average the cpu usage across. 102e7d50326SJeff Roberson * SCHED_TICK_TARG: Number of hz ticks to average the cpu usage across. 1038ab80cf0SJeff Roberson * SCHED_TICK_MAX: Maximum number of ticks before scaling back. 104e7d50326SJeff Roberson * SCHED_TICK_SHIFT: Shift factor to avoid rounding away results. 105e7d50326SJeff Roberson * SCHED_TICK_HZ: Compute the number of hz ticks for a given ticks count. 106e7d50326SJeff Roberson * SCHED_TICK_TOTAL: Gives the amount of time we've been recording ticks. 10735e6168fSJeff Roberson */ 108e7d50326SJeff Roberson #define SCHED_TICK_SECS 10 109e7d50326SJeff Roberson #define SCHED_TICK_TARG (hz * SCHED_TICK_SECS) 1108ab80cf0SJeff Roberson #define SCHED_TICK_MAX (SCHED_TICK_TARG + hz) 111e7d50326SJeff Roberson #define SCHED_TICK_SHIFT 10 112e7d50326SJeff Roberson #define SCHED_TICK_HZ(ts) ((ts)->ts_ticks >> SCHED_TICK_SHIFT) 113e7d50326SJeff Roberson #define SCHED_TICK_TOTAL(ts) ((ts)->ts_ltick - (ts)->ts_ftick) 11435e6168fSJeff Roberson 11535e6168fSJeff Roberson /* 116e7d50326SJeff Roberson * These macros determine priorities for non-interactive threads. They are 117e7d50326SJeff Roberson * assigned a priority based on their recent cpu utilization as expressed 118e7d50326SJeff Roberson * by the ratio of ticks to the tick total. NHALF priorities at the start 119e7d50326SJeff Roberson * and end of the MIN to MAX timeshare range are only reachable with negative 120e7d50326SJeff Roberson * or positive nice respectively. 121e7d50326SJeff Roberson * 122e7d50326SJeff Roberson * PRI_RANGE: Priority range for utilization dependent priorities. 123e7d50326SJeff Roberson * PRI_NRESV: Number of nice values. 124e7d50326SJeff Roberson * PRI_TICKS: Compute a priority in PRI_RANGE from the ticks count and total. 125e7d50326SJeff Roberson * PRI_NICE: Determines the part of the priority inherited from nice. 126e7d50326SJeff Roberson */ 127e7d50326SJeff Roberson #define SCHED_PRI_NRESV (PRIO_MAX - PRIO_MIN) 128e7d50326SJeff Roberson #define SCHED_PRI_NHALF (SCHED_PRI_NRESV / 2) 129e7d50326SJeff Roberson #define SCHED_PRI_MIN (PRI_MIN_TIMESHARE + SCHED_PRI_NHALF) 130e7d50326SJeff Roberson #define SCHED_PRI_MAX (PRI_MAX_TIMESHARE - SCHED_PRI_NHALF) 131e7d50326SJeff Roberson #define SCHED_PRI_RANGE (SCHED_PRI_MAX - SCHED_PRI_MIN + 1) 132e7d50326SJeff Roberson #define SCHED_PRI_TICKS(ts) \ 133e7d50326SJeff Roberson (SCHED_TICK_HZ((ts)) / \ 1341e516cf5SJeff Roberson (roundup(SCHED_TICK_TOTAL((ts)), SCHED_PRI_RANGE) / SCHED_PRI_RANGE)) 135e7d50326SJeff Roberson #define SCHED_PRI_NICE(nice) (nice) 136e7d50326SJeff Roberson 137e7d50326SJeff Roberson /* 138e7d50326SJeff Roberson * These determine the interactivity of a process. Interactivity differs from 139e7d50326SJeff Roberson * cpu utilization in that it expresses the voluntary time slept vs time ran 140e7d50326SJeff Roberson * while cpu utilization includes all time not running. This more accurately 141e7d50326SJeff Roberson * models the intent of the thread. 14235e6168fSJeff Roberson * 143407b0157SJeff Roberson * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate 144407b0157SJeff Roberson * before throttling back. 145d322132cSJeff Roberson * SLP_RUN_FORK: Maximum slp+run time to inherit at fork time. 146210491d3SJeff Roberson * INTERACT_MAX: Maximum interactivity value. Smaller is better. 147e1f89c22SJeff Roberson * INTERACT_THRESH: Threshhold for placement on the current runq. 14835e6168fSJeff Roberson */ 149e7d50326SJeff Roberson #define SCHED_SLP_RUN_MAX ((hz * 5) << SCHED_TICK_SHIFT) 150e7d50326SJeff Roberson #define SCHED_SLP_RUN_FORK ((hz / 2) << SCHED_TICK_SHIFT) 151210491d3SJeff Roberson #define SCHED_INTERACT_MAX (100) 152210491d3SJeff Roberson #define SCHED_INTERACT_HALF (SCHED_INTERACT_MAX / 2) 1534c9612c6SJeff Roberson #define SCHED_INTERACT_THRESH (30) 154e1f89c22SJeff Roberson 15535e6168fSJeff Roberson /* 156e7d50326SJeff Roberson * tickincr: Converts a stathz tick into a hz domain scaled by 157e7d50326SJeff Roberson * the shift factor. Without the shift the error rate 158e7d50326SJeff Roberson * due to rounding would be unacceptably high. 159e7d50326SJeff Roberson * realstathz: stathz is sometimes 0 and run off of hz. 160e7d50326SJeff Roberson * sched_slice: Runtime of each thread before rescheduling. 16135e6168fSJeff Roberson */ 162e7d50326SJeff Roberson static int sched_interact = SCHED_INTERACT_THRESH; 163e7d50326SJeff Roberson static int realstathz; 164e7d50326SJeff Roberson static int tickincr; 165e7d50326SJeff Roberson static int sched_slice; 1661e516cf5SJeff Roberson static int sched_rebalance = 1; 16735e6168fSJeff Roberson 16835e6168fSJeff Roberson /* 169ad1e7d28SJulian Elischer * tdq - per processor runqs and statistics. 17035e6168fSJeff Roberson */ 171ad1e7d28SJulian Elischer struct tdq { 172d2ad694cSJeff Roberson struct runq tdq_idle; /* Queue of IDLE threads. */ 173e7d50326SJeff Roberson struct runq tdq_timeshare; /* timeshare run queue. */ 174e7d50326SJeff Roberson struct runq tdq_realtime; /* real-time run queue. */ 1753f872f85SJeff Roberson int tdq_idx; /* Current insert index. */ 1763f872f85SJeff Roberson int tdq_ridx; /* Current removal index. */ 177d2ad694cSJeff Roberson int tdq_load; /* Aggregate load. */ 1785d7ef00cSJeff Roberson #ifdef SMP 179d2ad694cSJeff Roberson int tdq_transferable; 180d2ad694cSJeff Roberson LIST_ENTRY(tdq) tdq_siblings; /* Next in tdq group. */ 181d2ad694cSJeff Roberson struct tdq_group *tdq_group; /* Our processor group. */ 182d2ad694cSJeff Roberson volatile struct td_sched *tdq_assigned; /* assigned by another CPU. */ 18333916c36SJeff Roberson #else 184d2ad694cSJeff Roberson int tdq_sysload; /* For loadavg, !ITHD load. */ 1855d7ef00cSJeff Roberson #endif 18635e6168fSJeff Roberson }; 18735e6168fSJeff Roberson 18880f86c9fSJeff Roberson #ifdef SMP 18980f86c9fSJeff Roberson /* 190ad1e7d28SJulian Elischer * tdq groups are groups of processors which can cheaply share threads. When 19180f86c9fSJeff Roberson * one processor in the group goes idle it will check the runqs of the other 19280f86c9fSJeff Roberson * processors in its group prior to halting and waiting for an interrupt. 19380f86c9fSJeff Roberson * These groups are suitable for SMT (Symetric Multi-Threading) and not NUMA. 19480f86c9fSJeff Roberson * In a numa environment we'd want an idle bitmap per group and a two tiered 19580f86c9fSJeff Roberson * load balancer. 19680f86c9fSJeff Roberson */ 197ad1e7d28SJulian Elischer struct tdq_group { 198d2ad694cSJeff Roberson int tdg_cpus; /* Count of CPUs in this tdq group. */ 199d2ad694cSJeff Roberson cpumask_t tdg_cpumask; /* Mask of cpus in this group. */ 200d2ad694cSJeff Roberson cpumask_t tdg_idlemask; /* Idle cpus in this group. */ 201d2ad694cSJeff Roberson cpumask_t tdg_mask; /* Bit mask for first cpu. */ 202d2ad694cSJeff Roberson int tdg_load; /* Total load of this group. */ 203d2ad694cSJeff Roberson int tdg_transferable; /* Transferable load of this group. */ 204d2ad694cSJeff Roberson LIST_HEAD(, tdq) tdg_members; /* Linked list of all members. */ 20580f86c9fSJeff Roberson }; 20680f86c9fSJeff Roberson #endif 20780f86c9fSJeff Roberson 20835e6168fSJeff Roberson /* 209d2ad694cSJeff Roberson * One thread queue per processor. 21035e6168fSJeff Roberson */ 2110a016a05SJeff Roberson #ifdef SMP 212ad1e7d28SJulian Elischer static cpumask_t tdq_idle; 213d2ad694cSJeff Roberson static int tdg_maxid; 214ad1e7d28SJulian Elischer static struct tdq tdq_cpu[MAXCPU]; 215ad1e7d28SJulian Elischer static struct tdq_group tdq_groups[MAXCPU]; 216dc03363dSJeff Roberson static int bal_tick; 217dc03363dSJeff Roberson static int gbal_tick; 218598b368dSJeff Roberson static int balance_groups; 219dc03363dSJeff Roberson 220ad1e7d28SJulian Elischer #define TDQ_SELF() (&tdq_cpu[PCPU_GET(cpuid)]) 221ad1e7d28SJulian Elischer #define TDQ_CPU(x) (&tdq_cpu[(x)]) 222ad1e7d28SJulian Elischer #define TDQ_ID(x) ((x) - tdq_cpu) 223ad1e7d28SJulian Elischer #define TDQ_GROUP(x) (&tdq_groups[(x)]) 22480f86c9fSJeff Roberson #else /* !SMP */ 225ad1e7d28SJulian Elischer static struct tdq tdq_cpu; 226dc03363dSJeff Roberson 227ad1e7d28SJulian Elischer #define TDQ_SELF() (&tdq_cpu) 228ad1e7d28SJulian Elischer #define TDQ_CPU(x) (&tdq_cpu) 2290a016a05SJeff Roberson #endif 23035e6168fSJeff Roberson 231ad1e7d28SJulian Elischer static struct td_sched *sched_choose(void); /* XXX Should be thread * */ 2328460a577SJohn Birrell static void sched_priority(struct thread *); 23321381d1bSJeff Roberson static void sched_thread_priority(struct thread *, u_char); 2348460a577SJohn Birrell static int sched_interact_score(struct thread *); 2358460a577SJohn Birrell static void sched_interact_update(struct thread *); 2368460a577SJohn Birrell static void sched_interact_fork(struct thread *); 237ad1e7d28SJulian Elischer static void sched_pctcpu_update(struct td_sched *); 2381e516cf5SJeff Roberson static inline void sched_pin_td(struct thread *td); 2391e516cf5SJeff Roberson static inline void sched_unpin_td(struct thread *td); 24035e6168fSJeff Roberson 2415d7ef00cSJeff Roberson /* Operations on per processor queues */ 242ad1e7d28SJulian Elischer static struct td_sched * tdq_choose(struct tdq *); 243ad1e7d28SJulian Elischer static void tdq_setup(struct tdq *); 244ad1e7d28SJulian Elischer static void tdq_load_add(struct tdq *, struct td_sched *); 245ad1e7d28SJulian Elischer static void tdq_load_rem(struct tdq *, struct td_sched *); 246ad1e7d28SJulian Elischer static __inline void tdq_runq_add(struct tdq *, struct td_sched *, int); 247ad1e7d28SJulian Elischer static __inline void tdq_runq_rem(struct tdq *, struct td_sched *); 248ad1e7d28SJulian Elischer void tdq_print(int cpu); 249e7d50326SJeff Roberson static void runq_print(struct runq *rq); 2505d7ef00cSJeff Roberson #ifdef SMP 251ad1e7d28SJulian Elischer static int tdq_transfer(struct tdq *, struct td_sched *, int); 252ad1e7d28SJulian Elischer static struct td_sched *runq_steal(struct runq *); 253dc03363dSJeff Roberson static void sched_balance(void); 254dc03363dSJeff Roberson static void sched_balance_groups(void); 255ad1e7d28SJulian Elischer static void sched_balance_group(struct tdq_group *); 256ad1e7d28SJulian Elischer static void sched_balance_pair(struct tdq *, struct tdq *); 2573f872f85SJeff Roberson static void sched_smp_tick(void); 258ad1e7d28SJulian Elischer static void tdq_move(struct tdq *, int); 259ad1e7d28SJulian Elischer static int tdq_idled(struct tdq *); 260ad1e7d28SJulian Elischer static void tdq_notify(struct td_sched *, int); 261ad1e7d28SJulian Elischer static void tdq_assign(struct tdq *); 262ad1e7d28SJulian Elischer static struct td_sched *tdq_steal(struct tdq *, int); 2631e516cf5SJeff Roberson 264e7d50326SJeff Roberson #define THREAD_CAN_MIGRATE(td) \ 265e7d50326SJeff Roberson ((td)->td_pinned == 0 && (td)->td_pri_class != PRI_ITHD) 2665d7ef00cSJeff Roberson #endif 2675d7ef00cSJeff Roberson 268e7d50326SJeff Roberson static void sched_setup(void *dummy); 269e7d50326SJeff Roberson SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 270e7d50326SJeff Roberson 271e7d50326SJeff Roberson static void sched_initticks(void *dummy); 272e7d50326SJeff Roberson SYSINIT(sched_initticks, SI_SUB_CLOCKS, SI_ORDER_THIRD, sched_initticks, NULL) 273e7d50326SJeff Roberson 2741e516cf5SJeff Roberson static inline void 2751e516cf5SJeff Roberson sched_pin_td(struct thread *td) 2761e516cf5SJeff Roberson { 2771e516cf5SJeff Roberson td->td_pinned++; 2781e516cf5SJeff Roberson } 2791e516cf5SJeff Roberson 2801e516cf5SJeff Roberson static inline void 2811e516cf5SJeff Roberson sched_unpin_td(struct thread *td) 2821e516cf5SJeff Roberson { 2831e516cf5SJeff Roberson td->td_pinned--; 2841e516cf5SJeff Roberson } 2851e516cf5SJeff Roberson 286e7d50326SJeff Roberson static void 287e7d50326SJeff Roberson runq_print(struct runq *rq) 288e7d50326SJeff Roberson { 289e7d50326SJeff Roberson struct rqhead *rqh; 290e7d50326SJeff Roberson struct td_sched *ts; 291e7d50326SJeff Roberson int pri; 292e7d50326SJeff Roberson int j; 293e7d50326SJeff Roberson int i; 294e7d50326SJeff Roberson 295e7d50326SJeff Roberson for (i = 0; i < RQB_LEN; i++) { 296e7d50326SJeff Roberson printf("\t\trunq bits %d 0x%zx\n", 297e7d50326SJeff Roberson i, rq->rq_status.rqb_bits[i]); 298e7d50326SJeff Roberson for (j = 0; j < RQB_BPW; j++) 299e7d50326SJeff Roberson if (rq->rq_status.rqb_bits[i] & (1ul << j)) { 300e7d50326SJeff Roberson pri = j + (i << RQB_L2BPW); 301e7d50326SJeff Roberson rqh = &rq->rq_queues[pri]; 302e7d50326SJeff Roberson TAILQ_FOREACH(ts, rqh, ts_procq) { 303e7d50326SJeff Roberson printf("\t\t\ttd %p(%s) priority %d rqindex %d pri %d\n", 304e7d50326SJeff Roberson ts->ts_thread, ts->ts_thread->td_proc->p_comm, ts->ts_thread->td_priority, ts->ts_rqindex, pri); 305e7d50326SJeff Roberson } 306e7d50326SJeff Roberson } 307e7d50326SJeff Roberson } 308e7d50326SJeff Roberson } 309e7d50326SJeff Roberson 31015dc847eSJeff Roberson void 311ad1e7d28SJulian Elischer tdq_print(int cpu) 31215dc847eSJeff Roberson { 313ad1e7d28SJulian Elischer struct tdq *tdq; 31415dc847eSJeff Roberson 315ad1e7d28SJulian Elischer tdq = TDQ_CPU(cpu); 31615dc847eSJeff Roberson 317ad1e7d28SJulian Elischer printf("tdq:\n"); 318d2ad694cSJeff Roberson printf("\tload: %d\n", tdq->tdq_load); 319e7d50326SJeff Roberson printf("\ttimeshare idx: %d\n", tdq->tdq_idx); 3203f872f85SJeff Roberson printf("\ttimeshare ridx: %d\n", tdq->tdq_ridx); 321e7d50326SJeff Roberson printf("\trealtime runq:\n"); 322e7d50326SJeff Roberson runq_print(&tdq->tdq_realtime); 323e7d50326SJeff Roberson printf("\ttimeshare runq:\n"); 324e7d50326SJeff Roberson runq_print(&tdq->tdq_timeshare); 325e7d50326SJeff Roberson printf("\tidle runq:\n"); 326e7d50326SJeff Roberson runq_print(&tdq->tdq_idle); 327ef1134c9SJeff Roberson #ifdef SMP 328d2ad694cSJeff Roberson printf("\tload transferable: %d\n", tdq->tdq_transferable); 329ef1134c9SJeff Roberson #endif 33015dc847eSJeff Roberson } 33115dc847eSJeff Roberson 332155b9987SJeff Roberson static __inline void 333ad1e7d28SJulian Elischer tdq_runq_add(struct tdq *tdq, struct td_sched *ts, int flags) 334155b9987SJeff Roberson { 335155b9987SJeff Roberson #ifdef SMP 336e7d50326SJeff Roberson if (THREAD_CAN_MIGRATE(ts->ts_thread)) { 337d2ad694cSJeff Roberson tdq->tdq_transferable++; 338d2ad694cSJeff Roberson tdq->tdq_group->tdg_transferable++; 339ad1e7d28SJulian Elischer ts->ts_flags |= TSF_XFERABLE; 34080f86c9fSJeff Roberson } 341155b9987SJeff Roberson #endif 342e7d50326SJeff Roberson if (ts->ts_runq == &tdq->tdq_timeshare) { 343e7d50326SJeff Roberson int pri; 344e7d50326SJeff Roberson 345e7d50326SJeff Roberson pri = ts->ts_thread->td_priority; 346e7d50326SJeff Roberson KASSERT(pri <= PRI_MAX_TIMESHARE && pri >= PRI_MIN_TIMESHARE, 347e7d50326SJeff Roberson ("Invalid priority %d on timeshare runq", pri)); 348e7d50326SJeff Roberson /* 349e7d50326SJeff Roberson * This queue contains only priorities between MIN and MAX 350e7d50326SJeff Roberson * realtime. Use the whole queue to represent these values. 351e7d50326SJeff Roberson */ 352e7d50326SJeff Roberson #define TS_RQ_PPQ (((PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE) + 1) / RQ_NQS) 353e7d50326SJeff Roberson if ((flags & SRQ_BORROWING) == 0) { 354e7d50326SJeff Roberson pri = (pri - PRI_MIN_TIMESHARE) / TS_RQ_PPQ; 355e7d50326SJeff Roberson pri = (pri + tdq->tdq_idx) % RQ_NQS; 3563f872f85SJeff Roberson /* 3573f872f85SJeff Roberson * This effectively shortens the queue by one so we 3583f872f85SJeff Roberson * can have a one slot difference between idx and 3593f872f85SJeff Roberson * ridx while we wait for threads to drain. 3603f872f85SJeff Roberson */ 3613f872f85SJeff Roberson if (tdq->tdq_ridx != tdq->tdq_idx && 3623f872f85SJeff Roberson pri == tdq->tdq_ridx) 3633f872f85SJeff Roberson pri = (pri - 1) % RQ_NQS; 364e7d50326SJeff Roberson } else 3653f872f85SJeff Roberson pri = tdq->tdq_ridx; 366e7d50326SJeff Roberson runq_add_pri(ts->ts_runq, ts, pri, flags); 367e7d50326SJeff Roberson } else 368ad1e7d28SJulian Elischer runq_add(ts->ts_runq, ts, flags); 369155b9987SJeff Roberson } 370155b9987SJeff Roberson 371155b9987SJeff Roberson static __inline void 372ad1e7d28SJulian Elischer tdq_runq_rem(struct tdq *tdq, struct td_sched *ts) 373155b9987SJeff Roberson { 374155b9987SJeff Roberson #ifdef SMP 375ad1e7d28SJulian Elischer if (ts->ts_flags & TSF_XFERABLE) { 376d2ad694cSJeff Roberson tdq->tdq_transferable--; 377d2ad694cSJeff Roberson tdq->tdq_group->tdg_transferable--; 378ad1e7d28SJulian Elischer ts->ts_flags &= ~TSF_XFERABLE; 37980f86c9fSJeff Roberson } 380155b9987SJeff Roberson #endif 3813f872f85SJeff Roberson if (ts->ts_runq == &tdq->tdq_timeshare) { 3823f872f85SJeff Roberson if (tdq->tdq_idx != tdq->tdq_ridx) 3833f872f85SJeff Roberson runq_remove_idx(ts->ts_runq, ts, &tdq->tdq_ridx); 384e7d50326SJeff Roberson else 3853f872f85SJeff Roberson runq_remove_idx(ts->ts_runq, ts, NULL); 3868ab80cf0SJeff Roberson /* 3878ab80cf0SJeff Roberson * For timeshare threads we update the priority here so 3888ab80cf0SJeff Roberson * the priority reflects the time we've been sleeping. 3898ab80cf0SJeff Roberson */ 3908ab80cf0SJeff Roberson ts->ts_ltick = ticks; 3918ab80cf0SJeff Roberson sched_pctcpu_update(ts); 3928ab80cf0SJeff Roberson sched_priority(ts->ts_thread); 3933f872f85SJeff Roberson } else 394ad1e7d28SJulian Elischer runq_remove(ts->ts_runq, ts); 395155b9987SJeff Roberson } 396155b9987SJeff Roberson 397a8949de2SJeff Roberson static void 398ad1e7d28SJulian Elischer tdq_load_add(struct tdq *tdq, struct td_sched *ts) 3995d7ef00cSJeff Roberson { 400ef1134c9SJeff Roberson int class; 401b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 402ad1e7d28SJulian Elischer class = PRI_BASE(ts->ts_thread->td_pri_class); 403d2ad694cSJeff Roberson tdq->tdq_load++; 404d2ad694cSJeff Roberson CTR1(KTR_SCHED, "load: %d", tdq->tdq_load); 405ad1e7d28SJulian Elischer if (class != PRI_ITHD && (ts->ts_thread->td_proc->p_flag & P_NOLOAD) == 0) 40633916c36SJeff Roberson #ifdef SMP 407d2ad694cSJeff Roberson tdq->tdq_group->tdg_load++; 40833916c36SJeff Roberson #else 409d2ad694cSJeff Roberson tdq->tdq_sysload++; 410cac77d04SJeff Roberson #endif 4115d7ef00cSJeff Roberson } 41215dc847eSJeff Roberson 413a8949de2SJeff Roberson static void 414ad1e7d28SJulian Elischer tdq_load_rem(struct tdq *tdq, struct td_sched *ts) 4155d7ef00cSJeff Roberson { 416ef1134c9SJeff Roberson int class; 417b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 418ad1e7d28SJulian Elischer class = PRI_BASE(ts->ts_thread->td_pri_class); 419ad1e7d28SJulian Elischer if (class != PRI_ITHD && (ts->ts_thread->td_proc->p_flag & P_NOLOAD) == 0) 42033916c36SJeff Roberson #ifdef SMP 421d2ad694cSJeff Roberson tdq->tdq_group->tdg_load--; 42233916c36SJeff Roberson #else 423d2ad694cSJeff Roberson tdq->tdq_sysload--; 424cac77d04SJeff Roberson #endif 425d2ad694cSJeff Roberson tdq->tdq_load--; 426d2ad694cSJeff Roberson CTR1(KTR_SCHED, "load: %d", tdq->tdq_load); 427ad1e7d28SJulian Elischer ts->ts_runq = NULL; 42815dc847eSJeff Roberson } 42915dc847eSJeff Roberson 4305d7ef00cSJeff Roberson #ifdef SMP 4313f872f85SJeff Roberson static void 4323f872f85SJeff Roberson sched_smp_tick(void) 4333f872f85SJeff Roberson { 4343f872f85SJeff Roberson struct tdq *tdq; 4353f872f85SJeff Roberson 4363f872f85SJeff Roberson tdq = TDQ_SELF(); 437155b6ca1SJeff Roberson if (sched_rebalance) { 4383f872f85SJeff Roberson if (ticks >= bal_tick) 4393f872f85SJeff Roberson sched_balance(); 4403f872f85SJeff Roberson if (ticks >= gbal_tick && balance_groups) 4413f872f85SJeff Roberson sched_balance_groups(); 442155b6ca1SJeff Roberson } 4433f872f85SJeff Roberson /* 4443f872f85SJeff Roberson * We could have been assigned a non real-time thread without an 4453f872f85SJeff Roberson * IPI. 4463f872f85SJeff Roberson */ 4473f872f85SJeff Roberson if (tdq->tdq_assigned) 4483f872f85SJeff Roberson tdq_assign(tdq); /* Potentially sets NEEDRESCHED */ 4493f872f85SJeff Roberson } 4503f872f85SJeff Roberson 451356500a3SJeff Roberson /* 452155b9987SJeff Roberson * sched_balance is a simple CPU load balancing algorithm. It operates by 453356500a3SJeff Roberson * finding the least loaded and most loaded cpu and equalizing their load 454356500a3SJeff Roberson * by migrating some processes. 455356500a3SJeff Roberson * 456356500a3SJeff Roberson * Dealing only with two CPUs at a time has two advantages. Firstly, most 457356500a3SJeff Roberson * installations will only have 2 cpus. Secondly, load balancing too much at 458356500a3SJeff Roberson * once can have an unpleasant effect on the system. The scheduler rarely has 459356500a3SJeff Roberson * enough information to make perfect decisions. So this algorithm chooses 460356500a3SJeff Roberson * algorithm simplicity and more gradual effects on load in larger systems. 461356500a3SJeff Roberson * 462356500a3SJeff Roberson * It could be improved by considering the priorities and slices assigned to 463356500a3SJeff Roberson * each task prior to balancing them. There are many pathological cases with 464356500a3SJeff Roberson * any approach and so the semi random algorithm below may work as well as any. 465356500a3SJeff Roberson * 466356500a3SJeff Roberson */ 46722bf7d9aSJeff Roberson static void 468dc03363dSJeff Roberson sched_balance(void) 469356500a3SJeff Roberson { 470ad1e7d28SJulian Elischer struct tdq_group *high; 471ad1e7d28SJulian Elischer struct tdq_group *low; 472d2ad694cSJeff Roberson struct tdq_group *tdg; 473cac77d04SJeff Roberson int cnt; 474356500a3SJeff Roberson int i; 475356500a3SJeff Roberson 476598b368dSJeff Roberson bal_tick = ticks + (random() % (hz * 2)); 47786f8ae96SJeff Roberson if (smp_started == 0) 478598b368dSJeff Roberson return; 479cac77d04SJeff Roberson low = high = NULL; 480d2ad694cSJeff Roberson i = random() % (tdg_maxid + 1); 481d2ad694cSJeff Roberson for (cnt = 0; cnt <= tdg_maxid; cnt++) { 482d2ad694cSJeff Roberson tdg = TDQ_GROUP(i); 483cac77d04SJeff Roberson /* 484cac77d04SJeff Roberson * Find the CPU with the highest load that has some 485cac77d04SJeff Roberson * threads to transfer. 486cac77d04SJeff Roberson */ 487d2ad694cSJeff Roberson if ((high == NULL || tdg->tdg_load > high->tdg_load) 488d2ad694cSJeff Roberson && tdg->tdg_transferable) 489d2ad694cSJeff Roberson high = tdg; 490d2ad694cSJeff Roberson if (low == NULL || tdg->tdg_load < low->tdg_load) 491d2ad694cSJeff Roberson low = tdg; 492d2ad694cSJeff Roberson if (++i > tdg_maxid) 493cac77d04SJeff Roberson i = 0; 494cac77d04SJeff Roberson } 495cac77d04SJeff Roberson if (low != NULL && high != NULL && high != low) 496d2ad694cSJeff Roberson sched_balance_pair(LIST_FIRST(&high->tdg_members), 497d2ad694cSJeff Roberson LIST_FIRST(&low->tdg_members)); 498cac77d04SJeff Roberson } 49986f8ae96SJeff Roberson 500cac77d04SJeff Roberson static void 501dc03363dSJeff Roberson sched_balance_groups(void) 502cac77d04SJeff Roberson { 503cac77d04SJeff Roberson int i; 504cac77d04SJeff Roberson 505598b368dSJeff Roberson gbal_tick = ticks + (random() % (hz * 2)); 506dc03363dSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 507cac77d04SJeff Roberson if (smp_started) 508d2ad694cSJeff Roberson for (i = 0; i <= tdg_maxid; i++) 509ad1e7d28SJulian Elischer sched_balance_group(TDQ_GROUP(i)); 510356500a3SJeff Roberson } 511cac77d04SJeff Roberson 512cac77d04SJeff Roberson static void 513d2ad694cSJeff Roberson sched_balance_group(struct tdq_group *tdg) 514cac77d04SJeff Roberson { 515ad1e7d28SJulian Elischer struct tdq *tdq; 516ad1e7d28SJulian Elischer struct tdq *high; 517ad1e7d28SJulian Elischer struct tdq *low; 518cac77d04SJeff Roberson int load; 519cac77d04SJeff Roberson 520d2ad694cSJeff Roberson if (tdg->tdg_transferable == 0) 521cac77d04SJeff Roberson return; 522cac77d04SJeff Roberson low = NULL; 523cac77d04SJeff Roberson high = NULL; 524d2ad694cSJeff Roberson LIST_FOREACH(tdq, &tdg->tdg_members, tdq_siblings) { 525d2ad694cSJeff Roberson load = tdq->tdq_load; 526d2ad694cSJeff Roberson if (high == NULL || load > high->tdq_load) 527ad1e7d28SJulian Elischer high = tdq; 528d2ad694cSJeff Roberson if (low == NULL || load < low->tdq_load) 529ad1e7d28SJulian Elischer low = tdq; 530356500a3SJeff Roberson } 531cac77d04SJeff Roberson if (high != NULL && low != NULL && high != low) 532cac77d04SJeff Roberson sched_balance_pair(high, low); 533356500a3SJeff Roberson } 534cac77d04SJeff Roberson 535cac77d04SJeff Roberson static void 536ad1e7d28SJulian Elischer sched_balance_pair(struct tdq *high, struct tdq *low) 537cac77d04SJeff Roberson { 538cac77d04SJeff Roberson int transferable; 539cac77d04SJeff Roberson int high_load; 540cac77d04SJeff Roberson int low_load; 541cac77d04SJeff Roberson int move; 542cac77d04SJeff Roberson int diff; 543cac77d04SJeff Roberson int i; 544cac77d04SJeff Roberson 54580f86c9fSJeff Roberson /* 54680f86c9fSJeff Roberson * If we're transfering within a group we have to use this specific 547ad1e7d28SJulian Elischer * tdq's transferable count, otherwise we can steal from other members 54880f86c9fSJeff Roberson * of the group. 54980f86c9fSJeff Roberson */ 550d2ad694cSJeff Roberson if (high->tdq_group == low->tdq_group) { 551d2ad694cSJeff Roberson transferable = high->tdq_transferable; 552d2ad694cSJeff Roberson high_load = high->tdq_load; 553d2ad694cSJeff Roberson low_load = low->tdq_load; 554cac77d04SJeff Roberson } else { 555d2ad694cSJeff Roberson transferable = high->tdq_group->tdg_transferable; 556d2ad694cSJeff Roberson high_load = high->tdq_group->tdg_load; 557d2ad694cSJeff Roberson low_load = low->tdq_group->tdg_load; 558cac77d04SJeff Roberson } 55980f86c9fSJeff Roberson if (transferable == 0) 560cac77d04SJeff Roberson return; 561155b9987SJeff Roberson /* 562155b9987SJeff Roberson * Determine what the imbalance is and then adjust that to how many 563d2ad694cSJeff Roberson * threads we actually have to give up (transferable). 564155b9987SJeff Roberson */ 565cac77d04SJeff Roberson diff = high_load - low_load; 566356500a3SJeff Roberson move = diff / 2; 567356500a3SJeff Roberson if (diff & 0x1) 568356500a3SJeff Roberson move++; 56980f86c9fSJeff Roberson move = min(move, transferable); 570356500a3SJeff Roberson for (i = 0; i < move; i++) 571ad1e7d28SJulian Elischer tdq_move(high, TDQ_ID(low)); 572356500a3SJeff Roberson return; 573356500a3SJeff Roberson } 574356500a3SJeff Roberson 57522bf7d9aSJeff Roberson static void 576ad1e7d28SJulian Elischer tdq_move(struct tdq *from, int cpu) 577356500a3SJeff Roberson { 578ad1e7d28SJulian Elischer struct tdq *tdq; 579ad1e7d28SJulian Elischer struct tdq *to; 580ad1e7d28SJulian Elischer struct td_sched *ts; 581356500a3SJeff Roberson 582ad1e7d28SJulian Elischer tdq = from; 583ad1e7d28SJulian Elischer to = TDQ_CPU(cpu); 584ad1e7d28SJulian Elischer ts = tdq_steal(tdq, 1); 585ad1e7d28SJulian Elischer if (ts == NULL) { 586d2ad694cSJeff Roberson struct tdq_group *tdg; 58780f86c9fSJeff Roberson 588d2ad694cSJeff Roberson tdg = tdq->tdq_group; 589d2ad694cSJeff Roberson LIST_FOREACH(tdq, &tdg->tdg_members, tdq_siblings) { 590d2ad694cSJeff Roberson if (tdq == from || tdq->tdq_transferable == 0) 59180f86c9fSJeff Roberson continue; 592ad1e7d28SJulian Elischer ts = tdq_steal(tdq, 1); 59380f86c9fSJeff Roberson break; 59480f86c9fSJeff Roberson } 595ad1e7d28SJulian Elischer if (ts == NULL) 596ad1e7d28SJulian Elischer panic("tdq_move: No threads available with a " 59780f86c9fSJeff Roberson "transferable count of %d\n", 598d2ad694cSJeff Roberson tdg->tdg_transferable); 59980f86c9fSJeff Roberson } 600ad1e7d28SJulian Elischer if (tdq == to) 60180f86c9fSJeff Roberson return; 602ad1e7d28SJulian Elischer ts->ts_state = TSS_THREAD; 603ad1e7d28SJulian Elischer tdq_runq_rem(tdq, ts); 604ad1e7d28SJulian Elischer tdq_load_rem(tdq, ts); 605ad1e7d28SJulian Elischer tdq_notify(ts, cpu); 606356500a3SJeff Roberson } 60722bf7d9aSJeff Roberson 60880f86c9fSJeff Roberson static int 609ad1e7d28SJulian Elischer tdq_idled(struct tdq *tdq) 61022bf7d9aSJeff Roberson { 611d2ad694cSJeff Roberson struct tdq_group *tdg; 612ad1e7d28SJulian Elischer struct tdq *steal; 613ad1e7d28SJulian Elischer struct td_sched *ts; 61480f86c9fSJeff Roberson 615d2ad694cSJeff Roberson tdg = tdq->tdq_group; 61680f86c9fSJeff Roberson /* 617d2ad694cSJeff Roberson * If we're in a cpu group, try and steal threads from another cpu in 61880f86c9fSJeff Roberson * the group before idling. 61980f86c9fSJeff Roberson */ 620d2ad694cSJeff Roberson if (tdg->tdg_cpus > 1 && tdg->tdg_transferable) { 621d2ad694cSJeff Roberson LIST_FOREACH(steal, &tdg->tdg_members, tdq_siblings) { 622d2ad694cSJeff Roberson if (steal == tdq || steal->tdq_transferable == 0) 62380f86c9fSJeff Roberson continue; 624ad1e7d28SJulian Elischer ts = tdq_steal(steal, 0); 625ad1e7d28SJulian Elischer if (ts == NULL) 62680f86c9fSJeff Roberson continue; 627ad1e7d28SJulian Elischer ts->ts_state = TSS_THREAD; 628ad1e7d28SJulian Elischer tdq_runq_rem(steal, ts); 629ad1e7d28SJulian Elischer tdq_load_rem(steal, ts); 630ad1e7d28SJulian Elischer ts->ts_cpu = PCPU_GET(cpuid); 6311e516cf5SJeff Roberson sched_pin_td(ts->ts_thread); 632ad1e7d28SJulian Elischer sched_add(ts->ts_thread, SRQ_YIELDING); 6331e516cf5SJeff Roberson sched_unpin_td(ts->ts_thread); 63480f86c9fSJeff Roberson return (0); 63580f86c9fSJeff Roberson } 63680f86c9fSJeff Roberson } 63780f86c9fSJeff Roberson /* 63880f86c9fSJeff Roberson * We only set the idled bit when all of the cpus in the group are 639ad1e7d28SJulian Elischer * idle. Otherwise we could get into a situation where a thread bounces 64080f86c9fSJeff Roberson * back and forth between two idle cores on seperate physical CPUs. 64180f86c9fSJeff Roberson */ 642d2ad694cSJeff Roberson tdg->tdg_idlemask |= PCPU_GET(cpumask); 643d2ad694cSJeff Roberson if (tdg->tdg_idlemask != tdg->tdg_cpumask) 64480f86c9fSJeff Roberson return (1); 645d2ad694cSJeff Roberson atomic_set_int(&tdq_idle, tdg->tdg_mask); 64680f86c9fSJeff Roberson return (1); 64722bf7d9aSJeff Roberson } 64822bf7d9aSJeff Roberson 64922bf7d9aSJeff Roberson static void 650ad1e7d28SJulian Elischer tdq_assign(struct tdq *tdq) 65122bf7d9aSJeff Roberson { 652ad1e7d28SJulian Elischer struct td_sched *nts; 653ad1e7d28SJulian Elischer struct td_sched *ts; 65422bf7d9aSJeff Roberson 65522bf7d9aSJeff Roberson do { 656d2ad694cSJeff Roberson *(volatile struct td_sched **)&ts = tdq->tdq_assigned; 657d2ad694cSJeff Roberson } while(!atomic_cmpset_ptr((volatile uintptr_t *)&tdq->tdq_assigned, 658ad1e7d28SJulian Elischer (uintptr_t)ts, (uintptr_t)NULL)); 659ad1e7d28SJulian Elischer for (; ts != NULL; ts = nts) { 660ad1e7d28SJulian Elischer nts = ts->ts_assign; 661d2ad694cSJeff Roberson tdq->tdq_group->tdg_load--; 662d2ad694cSJeff Roberson tdq->tdq_load--; 663ad1e7d28SJulian Elischer ts->ts_flags &= ~TSF_ASSIGNED; 664ad1e7d28SJulian Elischer if (ts->ts_flags & TSF_REMOVED) { 665ad1e7d28SJulian Elischer ts->ts_flags &= ~TSF_REMOVED; 6663d16f519SDavid Xu continue; 6673d16f519SDavid Xu } 6681e516cf5SJeff Roberson sched_pin_td(ts->ts_thread); 669ad1e7d28SJulian Elischer sched_add(ts->ts_thread, SRQ_YIELDING); 6701e516cf5SJeff Roberson sched_unpin_td(ts->ts_thread); 67122bf7d9aSJeff Roberson } 67222bf7d9aSJeff Roberson } 67322bf7d9aSJeff Roberson 67422bf7d9aSJeff Roberson static void 675ad1e7d28SJulian Elischer tdq_notify(struct td_sched *ts, int cpu) 67622bf7d9aSJeff Roberson { 677ad1e7d28SJulian Elischer struct tdq *tdq; 67822bf7d9aSJeff Roberson struct thread *td; 67922bf7d9aSJeff Roberson struct pcpu *pcpu; 680598b368dSJeff Roberson int class; 6812454aaf5SJeff Roberson int prio; 68222bf7d9aSJeff Roberson 683ad1e7d28SJulian Elischer tdq = TDQ_CPU(cpu); 684ad1e7d28SJulian Elischer class = PRI_BASE(ts->ts_thread->td_pri_class); 685e7d50326SJeff Roberson if ((class != PRI_IDLE && class != PRI_ITHD) 686e7d50326SJeff Roberson && (tdq_idle & tdq->tdq_group->tdg_mask)) 687d2ad694cSJeff Roberson atomic_clear_int(&tdq_idle, tdq->tdq_group->tdg_mask); 688d2ad694cSJeff Roberson tdq->tdq_group->tdg_load++; 689d2ad694cSJeff Roberson tdq->tdq_load++; 690ad1e7d28SJulian Elischer ts->ts_cpu = cpu; 691ad1e7d28SJulian Elischer ts->ts_flags |= TSF_ASSIGNED; 692ad1e7d28SJulian Elischer prio = ts->ts_thread->td_priority; 69322bf7d9aSJeff Roberson 6940c0a98b2SJeff Roberson /* 695ad1e7d28SJulian Elischer * Place a thread on another cpu's queue and force a resched. 69622bf7d9aSJeff Roberson */ 69722bf7d9aSJeff Roberson do { 698d2ad694cSJeff Roberson *(volatile struct td_sched **)&ts->ts_assign = tdq->tdq_assigned; 699d2ad694cSJeff Roberson } while(!atomic_cmpset_ptr((volatile uintptr_t *)&tdq->tdq_assigned, 700ad1e7d28SJulian Elischer (uintptr_t)ts->ts_assign, (uintptr_t)ts)); 701155b6ca1SJeff Roberson /* Only ipi for realtime/ithd priorities */ 7029330bbbbSJeff Roberson if (ts->ts_thread->td_priority > PRI_MIN_KERN) 703155b6ca1SJeff Roberson return; 7042454aaf5SJeff Roberson /* 7052454aaf5SJeff Roberson * Without sched_lock we could lose a race where we set NEEDRESCHED 7062454aaf5SJeff Roberson * on a thread that is switched out before the IPI is delivered. This 7072454aaf5SJeff Roberson * would lead us to miss the resched. This will be a problem once 7082454aaf5SJeff Roberson * sched_lock is pushed down. 7092454aaf5SJeff Roberson */ 71022bf7d9aSJeff Roberson pcpu = pcpu_find(cpu); 71122bf7d9aSJeff Roberson td = pcpu->pc_curthread; 712155b6ca1SJeff Roberson if (ts->ts_thread->td_priority < td->td_priority) { 71322bf7d9aSJeff Roberson td->td_flags |= TDF_NEEDRESCHED; 71422bf7d9aSJeff Roberson ipi_selected(1 << cpu, IPI_AST); 71522bf7d9aSJeff Roberson } 71622bf7d9aSJeff Roberson } 71722bf7d9aSJeff Roberson 718ad1e7d28SJulian Elischer static struct td_sched * 71922bf7d9aSJeff Roberson runq_steal(struct runq *rq) 72022bf7d9aSJeff Roberson { 72122bf7d9aSJeff Roberson struct rqhead *rqh; 72222bf7d9aSJeff Roberson struct rqbits *rqb; 723ad1e7d28SJulian Elischer struct td_sched *ts; 72422bf7d9aSJeff Roberson int word; 72522bf7d9aSJeff Roberson int bit; 72622bf7d9aSJeff Roberson 72722bf7d9aSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 72822bf7d9aSJeff Roberson rqb = &rq->rq_status; 72922bf7d9aSJeff Roberson for (word = 0; word < RQB_LEN; word++) { 73022bf7d9aSJeff Roberson if (rqb->rqb_bits[word] == 0) 73122bf7d9aSJeff Roberson continue; 73222bf7d9aSJeff Roberson for (bit = 0; bit < RQB_BPW; bit++) { 733a2640c9bSPeter Wemm if ((rqb->rqb_bits[word] & (1ul << bit)) == 0) 73422bf7d9aSJeff Roberson continue; 73522bf7d9aSJeff Roberson rqh = &rq->rq_queues[bit + (word << RQB_L2BPW)]; 736ad1e7d28SJulian Elischer TAILQ_FOREACH(ts, rqh, ts_procq) { 737e7d50326SJeff Roberson if (THREAD_CAN_MIGRATE(ts->ts_thread)) 738ad1e7d28SJulian Elischer return (ts); 73922bf7d9aSJeff Roberson } 74022bf7d9aSJeff Roberson } 74122bf7d9aSJeff Roberson } 74222bf7d9aSJeff Roberson return (NULL); 74322bf7d9aSJeff Roberson } 74422bf7d9aSJeff Roberson 745ad1e7d28SJulian Elischer static struct td_sched * 746ad1e7d28SJulian Elischer tdq_steal(struct tdq *tdq, int stealidle) 74722bf7d9aSJeff Roberson { 748ad1e7d28SJulian Elischer struct td_sched *ts; 74922bf7d9aSJeff Roberson 75080f86c9fSJeff Roberson /* 75180f86c9fSJeff Roberson * Steal from next first to try to get a non-interactive task that 75280f86c9fSJeff Roberson * may not have run for a while. 753e7d50326SJeff Roberson * XXX Need to effect steal order for timeshare threads. 75480f86c9fSJeff Roberson */ 755e7d50326SJeff Roberson if ((ts = runq_steal(&tdq->tdq_realtime)) != NULL) 756ad1e7d28SJulian Elischer return (ts); 757e7d50326SJeff Roberson if ((ts = runq_steal(&tdq->tdq_timeshare)) != NULL) 758ad1e7d28SJulian Elischer return (ts); 75980f86c9fSJeff Roberson if (stealidle) 760d2ad694cSJeff Roberson return (runq_steal(&tdq->tdq_idle)); 76180f86c9fSJeff Roberson return (NULL); 76222bf7d9aSJeff Roberson } 76380f86c9fSJeff Roberson 76480f86c9fSJeff Roberson int 765ad1e7d28SJulian Elischer tdq_transfer(struct tdq *tdq, struct td_sched *ts, int class) 76680f86c9fSJeff Roberson { 767d2ad694cSJeff Roberson struct tdq_group *ntdg; 768d2ad694cSJeff Roberson struct tdq_group *tdg; 769ad1e7d28SJulian Elischer struct tdq *old; 77080f86c9fSJeff Roberson int cpu; 771598b368dSJeff Roberson int idx; 77280f86c9fSJeff Roberson 773670c524fSJeff Roberson if (smp_started == 0) 774670c524fSJeff Roberson return (0); 77580f86c9fSJeff Roberson cpu = 0; 77680f86c9fSJeff Roberson /* 7772454aaf5SJeff Roberson * If our load exceeds a certain threshold we should attempt to 7782454aaf5SJeff Roberson * reassign this thread. The first candidate is the cpu that 7792454aaf5SJeff Roberson * originally ran the thread. If it is idle, assign it there, 7802454aaf5SJeff Roberson * otherwise, pick an idle cpu. 7812454aaf5SJeff Roberson * 782d2ad694cSJeff Roberson * The threshold at which we start to reassign has a large impact 783670c524fSJeff Roberson * on the overall performance of the system. Tuned too high and 784670c524fSJeff Roberson * some CPUs may idle. Too low and there will be excess migration 785d50c87deSOlivier Houchard * and context switches. 786670c524fSJeff Roberson */ 787ad1e7d28SJulian Elischer old = TDQ_CPU(ts->ts_cpu); 788d2ad694cSJeff Roberson ntdg = old->tdq_group; 789d2ad694cSJeff Roberson tdg = tdq->tdq_group; 790ad1e7d28SJulian Elischer if (tdq_idle) { 791d2ad694cSJeff Roberson if (tdq_idle & ntdg->tdg_mask) { 792d2ad694cSJeff Roberson cpu = ffs(ntdg->tdg_idlemask); 793598b368dSJeff Roberson if (cpu) { 794598b368dSJeff Roberson CTR2(KTR_SCHED, 795ad1e7d28SJulian Elischer "tdq_transfer: %p found old cpu %X " 796ad1e7d28SJulian Elischer "in idlemask.", ts, cpu); 7972454aaf5SJeff Roberson goto migrate; 7982454aaf5SJeff Roberson } 799598b368dSJeff Roberson } 80080f86c9fSJeff Roberson /* 80180f86c9fSJeff Roberson * Multiple cpus could find this bit simultaneously 80280f86c9fSJeff Roberson * but the race shouldn't be terrible. 80380f86c9fSJeff Roberson */ 804ad1e7d28SJulian Elischer cpu = ffs(tdq_idle); 805598b368dSJeff Roberson if (cpu) { 806ad1e7d28SJulian Elischer CTR2(KTR_SCHED, "tdq_transfer: %p found %X " 807ad1e7d28SJulian Elischer "in idlemask.", ts, cpu); 8082454aaf5SJeff Roberson goto migrate; 80980f86c9fSJeff Roberson } 810598b368dSJeff Roberson } 811598b368dSJeff Roberson idx = 0; 812598b368dSJeff Roberson #if 0 813d2ad694cSJeff Roberson if (old->tdq_load < tdq->tdq_load) { 814ad1e7d28SJulian Elischer cpu = ts->ts_cpu + 1; 815ad1e7d28SJulian Elischer CTR2(KTR_SCHED, "tdq_transfer: %p old cpu %X " 816ad1e7d28SJulian Elischer "load less than ours.", ts, cpu); 817598b368dSJeff Roberson goto migrate; 818598b368dSJeff Roberson } 819598b368dSJeff Roberson /* 820598b368dSJeff Roberson * No new CPU was found, look for one with less load. 821598b368dSJeff Roberson */ 822d2ad694cSJeff Roberson for (idx = 0; idx <= tdg_maxid; idx++) { 823d2ad694cSJeff Roberson ntdg = TDQ_GROUP(idx); 824d2ad694cSJeff Roberson if (ntdg->tdg_load /*+ (ntdg->tdg_cpus * 2)*/ < tdg->tdg_load) { 825d2ad694cSJeff Roberson cpu = ffs(ntdg->tdg_cpumask); 826ad1e7d28SJulian Elischer CTR2(KTR_SCHED, "tdq_transfer: %p cpu %X load less " 827ad1e7d28SJulian Elischer "than ours.", ts, cpu); 828598b368dSJeff Roberson goto migrate; 829598b368dSJeff Roberson } 830598b368dSJeff Roberson } 831598b368dSJeff Roberson #endif 83280f86c9fSJeff Roberson /* 83380f86c9fSJeff Roberson * If another cpu in this group has idled, assign a thread over 83480f86c9fSJeff Roberson * to them after checking to see if there are idled groups. 83580f86c9fSJeff Roberson */ 836d2ad694cSJeff Roberson if (tdg->tdg_idlemask) { 837d2ad694cSJeff Roberson cpu = ffs(tdg->tdg_idlemask); 838598b368dSJeff Roberson if (cpu) { 839ad1e7d28SJulian Elischer CTR2(KTR_SCHED, "tdq_transfer: %p cpu %X idle in " 840ad1e7d28SJulian Elischer "group.", ts, cpu); 8412454aaf5SJeff Roberson goto migrate; 84280f86c9fSJeff Roberson } 843598b368dSJeff Roberson } 8442454aaf5SJeff Roberson return (0); 8452454aaf5SJeff Roberson migrate: 8462454aaf5SJeff Roberson /* 84780f86c9fSJeff Roberson * Now that we've found an idle CPU, migrate the thread. 84880f86c9fSJeff Roberson */ 84980f86c9fSJeff Roberson cpu--; 850ad1e7d28SJulian Elischer ts->ts_runq = NULL; 851ad1e7d28SJulian Elischer tdq_notify(ts, cpu); 8522454aaf5SJeff Roberson 85380f86c9fSJeff Roberson return (1); 85480f86c9fSJeff Roberson } 85580f86c9fSJeff Roberson 85622bf7d9aSJeff Roberson #endif /* SMP */ 85722bf7d9aSJeff Roberson 85822bf7d9aSJeff Roberson /* 85922bf7d9aSJeff Roberson * Pick the highest priority task we have and return it. 8600c0a98b2SJeff Roberson */ 8610c0a98b2SJeff Roberson 862ad1e7d28SJulian Elischer static struct td_sched * 863ad1e7d28SJulian Elischer tdq_choose(struct tdq *tdq) 8645d7ef00cSJeff Roberson { 865ad1e7d28SJulian Elischer struct td_sched *ts; 8665d7ef00cSJeff Roberson 867b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 868a8949de2SJeff Roberson 869e7d50326SJeff Roberson ts = runq_choose(&tdq->tdq_realtime); 870e7d50326SJeff Roberson if (ts != NULL) { 871e7d50326SJeff Roberson KASSERT(ts->ts_thread->td_priority <= PRI_MAX_REALTIME, 872e7d50326SJeff Roberson ("tdq_choose: Invalid priority on realtime queue %d", 873e7d50326SJeff Roberson ts->ts_thread->td_priority)); 874e7d50326SJeff Roberson return (ts); 875a8949de2SJeff Roberson } 8763f872f85SJeff Roberson ts = runq_choose_from(&tdq->tdq_timeshare, tdq->tdq_ridx); 877e7d50326SJeff Roberson if (ts != NULL) { 878e7d50326SJeff Roberson KASSERT(ts->ts_thread->td_priority <= PRI_MAX_TIMESHARE && 879e7d50326SJeff Roberson ts->ts_thread->td_priority >= PRI_MIN_TIMESHARE, 880e7d50326SJeff Roberson ("tdq_choose: Invalid priority on timeshare queue %d", 881e7d50326SJeff Roberson ts->ts_thread->td_priority)); 882ad1e7d28SJulian Elischer return (ts); 88315dc847eSJeff Roberson } 88415dc847eSJeff Roberson 885e7d50326SJeff Roberson ts = runq_choose(&tdq->tdq_idle); 886e7d50326SJeff Roberson if (ts != NULL) { 887e7d50326SJeff Roberson KASSERT(ts->ts_thread->td_priority >= PRI_MIN_IDLE, 888e7d50326SJeff Roberson ("tdq_choose: Invalid priority on idle queue %d", 889e7d50326SJeff Roberson ts->ts_thread->td_priority)); 890e7d50326SJeff Roberson return (ts); 891e7d50326SJeff Roberson } 892e7d50326SJeff Roberson 893e7d50326SJeff Roberson return (NULL); 894245f3abfSJeff Roberson } 8950a016a05SJeff Roberson 8960a016a05SJeff Roberson static void 897ad1e7d28SJulian Elischer tdq_setup(struct tdq *tdq) 8980a016a05SJeff Roberson { 899e7d50326SJeff Roberson runq_init(&tdq->tdq_realtime); 900e7d50326SJeff Roberson runq_init(&tdq->tdq_timeshare); 901d2ad694cSJeff Roberson runq_init(&tdq->tdq_idle); 902d2ad694cSJeff Roberson tdq->tdq_load = 0; 9030a016a05SJeff Roberson } 9040a016a05SJeff Roberson 90535e6168fSJeff Roberson static void 90635e6168fSJeff Roberson sched_setup(void *dummy) 90735e6168fSJeff Roberson { 9080ec896fdSJeff Roberson #ifdef SMP 90935e6168fSJeff Roberson int i; 9100ec896fdSJeff Roberson #endif 91135e6168fSJeff Roberson 912a1d4fe69SDavid Xu /* 913a1d4fe69SDavid Xu * To avoid divide-by-zero, we set realstathz a dummy value 914a1d4fe69SDavid Xu * in case which sched_clock() called before sched_initticks(). 915a1d4fe69SDavid Xu */ 916a1d4fe69SDavid Xu realstathz = hz; 917e7d50326SJeff Roberson sched_slice = (realstathz/7); /* 140ms */ 918e7d50326SJeff Roberson tickincr = 1 << SCHED_TICK_SHIFT; 919e1f89c22SJeff Roberson 920356500a3SJeff Roberson #ifdef SMP 921cac77d04SJeff Roberson balance_groups = 0; 92280f86c9fSJeff Roberson /* 923ad1e7d28SJulian Elischer * Initialize the tdqs. 92480f86c9fSJeff Roberson */ 925749d01b0SJeff Roberson for (i = 0; i < MAXCPU; i++) { 926c02bbb43SJeff Roberson struct tdq *tdq; 92780f86c9fSJeff Roberson 928c02bbb43SJeff Roberson tdq = &tdq_cpu[i]; 929c02bbb43SJeff Roberson tdq->tdq_assigned = NULL; 930ad1e7d28SJulian Elischer tdq_setup(&tdq_cpu[i]); 93180f86c9fSJeff Roberson } 93280f86c9fSJeff Roberson if (smp_topology == NULL) { 933d2ad694cSJeff Roberson struct tdq_group *tdg; 934c02bbb43SJeff Roberson struct tdq *tdq; 935598b368dSJeff Roberson int cpus; 93680f86c9fSJeff Roberson 937598b368dSJeff Roberson for (cpus = 0, i = 0; i < MAXCPU; i++) { 938598b368dSJeff Roberson if (CPU_ABSENT(i)) 939598b368dSJeff Roberson continue; 940c02bbb43SJeff Roberson tdq = &tdq_cpu[i]; 941d2ad694cSJeff Roberson tdg = &tdq_groups[cpus]; 94280f86c9fSJeff Roberson /* 943ad1e7d28SJulian Elischer * Setup a tdq group with one member. 94480f86c9fSJeff Roberson */ 945c02bbb43SJeff Roberson tdq->tdq_transferable = 0; 946c02bbb43SJeff Roberson tdq->tdq_group = tdg; 947d2ad694cSJeff Roberson tdg->tdg_cpus = 1; 948d2ad694cSJeff Roberson tdg->tdg_idlemask = 0; 949d2ad694cSJeff Roberson tdg->tdg_cpumask = tdg->tdg_mask = 1 << i; 950d2ad694cSJeff Roberson tdg->tdg_load = 0; 951d2ad694cSJeff Roberson tdg->tdg_transferable = 0; 952d2ad694cSJeff Roberson LIST_INIT(&tdg->tdg_members); 953c02bbb43SJeff Roberson LIST_INSERT_HEAD(&tdg->tdg_members, tdq, tdq_siblings); 954598b368dSJeff Roberson cpus++; 955749d01b0SJeff Roberson } 956d2ad694cSJeff Roberson tdg_maxid = cpus - 1; 957749d01b0SJeff Roberson } else { 958d2ad694cSJeff Roberson struct tdq_group *tdg; 95980f86c9fSJeff Roberson struct cpu_group *cg; 960749d01b0SJeff Roberson int j; 961749d01b0SJeff Roberson 962749d01b0SJeff Roberson for (i = 0; i < smp_topology->ct_count; i++) { 963749d01b0SJeff Roberson cg = &smp_topology->ct_group[i]; 964d2ad694cSJeff Roberson tdg = &tdq_groups[i]; 96580f86c9fSJeff Roberson /* 96680f86c9fSJeff Roberson * Initialize the group. 96780f86c9fSJeff Roberson */ 968d2ad694cSJeff Roberson tdg->tdg_idlemask = 0; 969d2ad694cSJeff Roberson tdg->tdg_load = 0; 970d2ad694cSJeff Roberson tdg->tdg_transferable = 0; 971d2ad694cSJeff Roberson tdg->tdg_cpus = cg->cg_count; 972d2ad694cSJeff Roberson tdg->tdg_cpumask = cg->cg_mask; 973d2ad694cSJeff Roberson LIST_INIT(&tdg->tdg_members); 97480f86c9fSJeff Roberson /* 97580f86c9fSJeff Roberson * Find all of the group members and add them. 97680f86c9fSJeff Roberson */ 97780f86c9fSJeff Roberson for (j = 0; j < MAXCPU; j++) { 97880f86c9fSJeff Roberson if ((cg->cg_mask & (1 << j)) != 0) { 979d2ad694cSJeff Roberson if (tdg->tdg_mask == 0) 980d2ad694cSJeff Roberson tdg->tdg_mask = 1 << j; 981d2ad694cSJeff Roberson tdq_cpu[j].tdq_transferable = 0; 982d2ad694cSJeff Roberson tdq_cpu[j].tdq_group = tdg; 983d2ad694cSJeff Roberson LIST_INSERT_HEAD(&tdg->tdg_members, 984d2ad694cSJeff Roberson &tdq_cpu[j], tdq_siblings); 98580f86c9fSJeff Roberson } 98680f86c9fSJeff Roberson } 987d2ad694cSJeff Roberson if (tdg->tdg_cpus > 1) 988cac77d04SJeff Roberson balance_groups = 1; 989749d01b0SJeff Roberson } 990d2ad694cSJeff Roberson tdg_maxid = smp_topology->ct_count - 1; 991749d01b0SJeff Roberson } 992cac77d04SJeff Roberson /* 993cac77d04SJeff Roberson * Stagger the group and global load balancer so they do not 994cac77d04SJeff Roberson * interfere with each other. 995cac77d04SJeff Roberson */ 996dc03363dSJeff Roberson bal_tick = ticks + hz; 997cac77d04SJeff Roberson if (balance_groups) 998dc03363dSJeff Roberson gbal_tick = ticks + (hz / 2); 999749d01b0SJeff Roberson #else 1000ad1e7d28SJulian Elischer tdq_setup(TDQ_SELF()); 1001356500a3SJeff Roberson #endif 1002749d01b0SJeff Roberson mtx_lock_spin(&sched_lock); 1003ad1e7d28SJulian Elischer tdq_load_add(TDQ_SELF(), &td_sched0); 1004749d01b0SJeff Roberson mtx_unlock_spin(&sched_lock); 100535e6168fSJeff Roberson } 100635e6168fSJeff Roberson 1007a1d4fe69SDavid Xu /* ARGSUSED */ 1008a1d4fe69SDavid Xu static void 1009a1d4fe69SDavid Xu sched_initticks(void *dummy) 1010a1d4fe69SDavid Xu { 1011a1d4fe69SDavid Xu mtx_lock_spin(&sched_lock); 1012a1d4fe69SDavid Xu realstathz = stathz ? stathz : hz; 1013e7d50326SJeff Roberson sched_slice = (realstathz/7); /* ~140ms */ 1014a1d4fe69SDavid Xu 1015a1d4fe69SDavid Xu /* 1016e7d50326SJeff Roberson * tickincr is shifted out by 10 to avoid rounding errors due to 10173f872f85SJeff Roberson * hz not being evenly divisible by stathz on all platforms. 1018e7d50326SJeff Roberson */ 1019e7d50326SJeff Roberson tickincr = (hz << SCHED_TICK_SHIFT) / realstathz; 1020e7d50326SJeff Roberson /* 1021e7d50326SJeff Roberson * This does not work for values of stathz that are more than 1022e7d50326SJeff Roberson * 1 << SCHED_TICK_SHIFT * hz. In practice this does not happen. 1023a1d4fe69SDavid Xu */ 1024a1d4fe69SDavid Xu if (tickincr == 0) 1025a1d4fe69SDavid Xu tickincr = 1; 1026a1d4fe69SDavid Xu mtx_unlock_spin(&sched_lock); 1027a1d4fe69SDavid Xu } 1028a1d4fe69SDavid Xu 1029a1d4fe69SDavid Xu 103035e6168fSJeff Roberson /* 103135e6168fSJeff Roberson * Scale the scheduling priority according to the "interactivity" of this 103235e6168fSJeff Roberson * process. 103335e6168fSJeff Roberson */ 103415dc847eSJeff Roberson static void 10358460a577SJohn Birrell sched_priority(struct thread *td) 103635e6168fSJeff Roberson { 1037e7d50326SJeff Roberson int score; 103835e6168fSJeff Roberson int pri; 103935e6168fSJeff Roberson 10408460a577SJohn Birrell if (td->td_pri_class != PRI_TIMESHARE) 104115dc847eSJeff Roberson return; 1042e7d50326SJeff Roberson /* 1043e7d50326SJeff Roberson * If the score is interactive we place the thread in the realtime 1044e7d50326SJeff Roberson * queue with a priority that is less than kernel and interrupt 1045e7d50326SJeff Roberson * priorities. These threads are not subject to nice restrictions. 1046e7d50326SJeff Roberson * 1047e7d50326SJeff Roberson * Scores greater than this are placed on the normal realtime queue 1048e7d50326SJeff Roberson * where the priority is partially decided by the most recent cpu 1049e7d50326SJeff Roberson * utilization and the rest is decided by nice value. 1050e7d50326SJeff Roberson */ 1051e7d50326SJeff Roberson score = sched_interact_score(td); 1052e7d50326SJeff Roberson if (score < sched_interact) { 1053e7d50326SJeff Roberson pri = PRI_MIN_REALTIME; 1054e7d50326SJeff Roberson pri += ((PRI_MAX_REALTIME - PRI_MIN_REALTIME) / sched_interact) 1055e7d50326SJeff Roberson * score; 1056e7d50326SJeff Roberson KASSERT(pri >= PRI_MIN_REALTIME && pri <= PRI_MAX_REALTIME, 1057e7d50326SJeff Roberson ("sched_priority: invalid interactive priority %d", pri)); 1058e7d50326SJeff Roberson } else { 1059e7d50326SJeff Roberson pri = SCHED_PRI_MIN; 1060e7d50326SJeff Roberson if (td->td_sched->ts_ticks) 1061e7d50326SJeff Roberson pri += SCHED_PRI_TICKS(td->td_sched); 1062e7d50326SJeff Roberson pri += SCHED_PRI_NICE(td->td_proc->p_nice); 10638ab80cf0SJeff Roberson if (!(pri >= PRI_MIN_TIMESHARE && pri <= PRI_MAX_TIMESHARE)) { 10648ab80cf0SJeff Roberson static int once = 1; 10658ab80cf0SJeff Roberson if (once) { 10668ab80cf0SJeff Roberson printf("sched_priority: invalid priority %d", 10678ab80cf0SJeff Roberson pri); 10688ab80cf0SJeff Roberson printf("nice %d, ticks %d ftick %d ltick %d tick pri %d\n", 10698ab80cf0SJeff Roberson td->td_proc->p_nice, 10708ab80cf0SJeff Roberson td->td_sched->ts_ticks, 10718ab80cf0SJeff Roberson td->td_sched->ts_ftick, 10728ab80cf0SJeff Roberson td->td_sched->ts_ltick, 10738ab80cf0SJeff Roberson SCHED_PRI_TICKS(td->td_sched)); 10748ab80cf0SJeff Roberson once = 0; 10758ab80cf0SJeff Roberson } 10768ab80cf0SJeff Roberson pri = min(max(pri, PRI_MIN_TIMESHARE), 10778ab80cf0SJeff Roberson PRI_MAX_TIMESHARE); 10788ab80cf0SJeff Roberson } 1079e7d50326SJeff Roberson } 10808460a577SJohn Birrell sched_user_prio(td, pri); 108135e6168fSJeff Roberson 108215dc847eSJeff Roberson return; 108335e6168fSJeff Roberson } 108435e6168fSJeff Roberson 108535e6168fSJeff Roberson /* 1086d322132cSJeff Roberson * This routine enforces a maximum limit on the amount of scheduling history 1087d322132cSJeff Roberson * kept. It is called after either the slptime or runtime is adjusted. 1088d322132cSJeff Roberson */ 10894b60e324SJeff Roberson static void 10908460a577SJohn Birrell sched_interact_update(struct thread *td) 10914b60e324SJeff Roberson { 1092155b6ca1SJeff Roberson struct td_sched *ts; 1093d322132cSJeff Roberson int sum; 10943f741ca1SJeff Roberson 1095155b6ca1SJeff Roberson ts = td->td_sched; 1096155b6ca1SJeff Roberson sum = ts->skg_runtime + ts->skg_slptime; 1097d322132cSJeff Roberson if (sum < SCHED_SLP_RUN_MAX) 1098d322132cSJeff Roberson return; 1099d322132cSJeff Roberson /* 1100155b6ca1SJeff Roberson * This only happens from two places: 1101155b6ca1SJeff Roberson * 1) We have added an unusual amount of run time from fork_exit. 1102155b6ca1SJeff Roberson * 2) We have added an unusual amount of sleep time from sched_sleep(). 1103155b6ca1SJeff Roberson */ 1104155b6ca1SJeff Roberson if (sum > SCHED_SLP_RUN_MAX * 2) { 1105155b6ca1SJeff Roberson if (ts->skg_runtime > ts->skg_slptime) { 1106155b6ca1SJeff Roberson ts->skg_runtime = SCHED_SLP_RUN_MAX; 1107155b6ca1SJeff Roberson ts->skg_slptime = 1; 1108155b6ca1SJeff Roberson } else { 1109155b6ca1SJeff Roberson ts->skg_slptime = SCHED_SLP_RUN_MAX; 1110155b6ca1SJeff Roberson ts->skg_runtime = 1; 1111155b6ca1SJeff Roberson } 1112155b6ca1SJeff Roberson return; 1113155b6ca1SJeff Roberson } 1114155b6ca1SJeff Roberson /* 1115d322132cSJeff Roberson * If we have exceeded by more than 1/5th then the algorithm below 1116d322132cSJeff Roberson * will not bring us back into range. Dividing by two here forces 11172454aaf5SJeff Roberson * us into the range of [4/5 * SCHED_INTERACT_MAX, SCHED_INTERACT_MAX] 1118d322132cSJeff Roberson */ 111937a35e4aSJeff Roberson if (sum > (SCHED_SLP_RUN_MAX / 5) * 6) { 1120155b6ca1SJeff Roberson ts->skg_runtime /= 2; 1121155b6ca1SJeff Roberson ts->skg_slptime /= 2; 1122d322132cSJeff Roberson return; 1123d322132cSJeff Roberson } 1124155b6ca1SJeff Roberson ts->skg_runtime = (ts->skg_runtime / 5) * 4; 1125155b6ca1SJeff Roberson ts->skg_slptime = (ts->skg_slptime / 5) * 4; 1126d322132cSJeff Roberson } 1127d322132cSJeff Roberson 1128d322132cSJeff Roberson static void 11298460a577SJohn Birrell sched_interact_fork(struct thread *td) 1130d322132cSJeff Roberson { 1131d322132cSJeff Roberson int ratio; 1132d322132cSJeff Roberson int sum; 1133d322132cSJeff Roberson 11348460a577SJohn Birrell sum = td->td_sched->skg_runtime + td->td_sched->skg_slptime; 1135d322132cSJeff Roberson if (sum > SCHED_SLP_RUN_FORK) { 1136d322132cSJeff Roberson ratio = sum / SCHED_SLP_RUN_FORK; 11378460a577SJohn Birrell td->td_sched->skg_runtime /= ratio; 11388460a577SJohn Birrell td->td_sched->skg_slptime /= ratio; 11394b60e324SJeff Roberson } 11404b60e324SJeff Roberson } 11414b60e324SJeff Roberson 1142e1f89c22SJeff Roberson static int 11438460a577SJohn Birrell sched_interact_score(struct thread *td) 1144e1f89c22SJeff Roberson { 1145210491d3SJeff Roberson int div; 1146e1f89c22SJeff Roberson 11478460a577SJohn Birrell if (td->td_sched->skg_runtime > td->td_sched->skg_slptime) { 11488460a577SJohn Birrell div = max(1, td->td_sched->skg_runtime / SCHED_INTERACT_HALF); 1149210491d3SJeff Roberson return (SCHED_INTERACT_HALF + 11508460a577SJohn Birrell (SCHED_INTERACT_HALF - (td->td_sched->skg_slptime / div))); 11518460a577SJohn Birrell } if (td->td_sched->skg_slptime > td->td_sched->skg_runtime) { 11528460a577SJohn Birrell div = max(1, td->td_sched->skg_slptime / SCHED_INTERACT_HALF); 11538460a577SJohn Birrell return (td->td_sched->skg_runtime / div); 1154e1f89c22SJeff Roberson } 1155e1f89c22SJeff Roberson 1156210491d3SJeff Roberson /* 1157210491d3SJeff Roberson * This can happen if slptime and runtime are 0. 1158210491d3SJeff Roberson */ 1159210491d3SJeff Roberson return (0); 1160e1f89c22SJeff Roberson 1161e1f89c22SJeff Roberson } 1162e1f89c22SJeff Roberson 116315dc847eSJeff Roberson /* 1164e7d50326SJeff Roberson * Called from proc0_init() to bootstrap the scheduler. 1165ed062c8dSJulian Elischer */ 1166ed062c8dSJulian Elischer void 1167ed062c8dSJulian Elischer schedinit(void) 1168ed062c8dSJulian Elischer { 1169e7d50326SJeff Roberson 1170ed062c8dSJulian Elischer /* 1171ed062c8dSJulian Elischer * Set up the scheduler specific parts of proc0. 1172ed062c8dSJulian Elischer */ 1173ed062c8dSJulian Elischer proc0.p_sched = NULL; /* XXX */ 1174ad1e7d28SJulian Elischer thread0.td_sched = &td_sched0; 1175e7d50326SJeff Roberson td_sched0.ts_ltick = ticks; 11768ab80cf0SJeff Roberson td_sched0.ts_ftick = ticks; 1177ad1e7d28SJulian Elischer td_sched0.ts_thread = &thread0; 1178ad1e7d28SJulian Elischer td_sched0.ts_state = TSS_THREAD; 1179ed062c8dSJulian Elischer } 1180ed062c8dSJulian Elischer 1181ed062c8dSJulian Elischer /* 118215dc847eSJeff Roberson * This is only somewhat accurate since given many processes of the same 118315dc847eSJeff Roberson * priority they will switch when their slices run out, which will be 1184e7d50326SJeff Roberson * at most sched_slice stathz ticks. 118515dc847eSJeff Roberson */ 118635e6168fSJeff Roberson int 118735e6168fSJeff Roberson sched_rr_interval(void) 118835e6168fSJeff Roberson { 1189e7d50326SJeff Roberson 1190e7d50326SJeff Roberson /* Convert sched_slice to hz */ 1191e7d50326SJeff Roberson return (hz/(realstathz/sched_slice)); 119235e6168fSJeff Roberson } 119335e6168fSJeff Roberson 119422bf7d9aSJeff Roberson static void 1195ad1e7d28SJulian Elischer sched_pctcpu_update(struct td_sched *ts) 119635e6168fSJeff Roberson { 1197e7d50326SJeff Roberson 1198e7d50326SJeff Roberson if (ts->ts_ticks == 0) 1199e7d50326SJeff Roberson return; 12008ab80cf0SJeff Roberson if (ticks - (hz / 10) < ts->ts_ltick && 12018ab80cf0SJeff Roberson SCHED_TICK_TOTAL(ts) < SCHED_TICK_MAX) 12028ab80cf0SJeff Roberson return; 120335e6168fSJeff Roberson /* 120435e6168fSJeff Roberson * Adjust counters and watermark for pctcpu calc. 1205210491d3SJeff Roberson */ 1206e7d50326SJeff Roberson if (ts->ts_ltick > ticks - SCHED_TICK_TARG) 1207ad1e7d28SJulian Elischer ts->ts_ticks = (ts->ts_ticks / (ticks - ts->ts_ftick)) * 1208e7d50326SJeff Roberson SCHED_TICK_TARG; 1209e7d50326SJeff Roberson else 1210ad1e7d28SJulian Elischer ts->ts_ticks = 0; 1211ad1e7d28SJulian Elischer ts->ts_ltick = ticks; 1212e7d50326SJeff Roberson ts->ts_ftick = ts->ts_ltick - SCHED_TICK_TARG; 121335e6168fSJeff Roberson } 121435e6168fSJeff Roberson 1215e7d50326SJeff Roberson static void 1216f5c157d9SJohn Baldwin sched_thread_priority(struct thread *td, u_char prio) 121735e6168fSJeff Roberson { 1218ad1e7d28SJulian Elischer struct td_sched *ts; 121935e6168fSJeff Roberson 122081d47d3fSJeff Roberson CTR6(KTR_SCHED, "sched_prio: %p(%s) prio %d newprio %d by %p(%s)", 122181d47d3fSJeff Roberson td, td->td_proc->p_comm, td->td_priority, prio, curthread, 122281d47d3fSJeff Roberson curthread->td_proc->p_comm); 1223ad1e7d28SJulian Elischer ts = td->td_sched; 122435e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1225f5c157d9SJohn Baldwin if (td->td_priority == prio) 1226f5c157d9SJohn Baldwin return; 1227e7d50326SJeff Roberson 12283f872f85SJeff Roberson if (TD_ON_RUNQ(td) && prio < td->td_priority) { 12293f741ca1SJeff Roberson /* 12303f741ca1SJeff Roberson * If the priority has been elevated due to priority 12313f741ca1SJeff Roberson * propagation, we may have to move ourselves to a new 1232e7d50326SJeff Roberson * queue. This could be optimized to not re-add in some 1233e7d50326SJeff Roberson * cases. 1234e7d50326SJeff Roberson * 1235ad1e7d28SJulian Elischer * Hold this td_sched on this cpu so that sched_prio() doesn't 1236f2b74cbfSJeff Roberson * cause excessive migration. We only want migration to 1237f2b74cbfSJeff Roberson * happen as the result of a wakeup. 1238f2b74cbfSJeff Roberson */ 12391e516cf5SJeff Roberson sched_pin_td(td); 1240e7d50326SJeff Roberson sched_rem(td); 1241e7d50326SJeff Roberson td->td_priority = prio; 1242e7d50326SJeff Roberson sched_add(td, SRQ_BORROWING); 12431e516cf5SJeff Roberson sched_unpin_td(td); 12443f741ca1SJeff Roberson } else 12453f741ca1SJeff Roberson td->td_priority = prio; 124635e6168fSJeff Roberson } 124735e6168fSJeff Roberson 1248f5c157d9SJohn Baldwin /* 1249f5c157d9SJohn Baldwin * Update a thread's priority when it is lent another thread's 1250f5c157d9SJohn Baldwin * priority. 1251f5c157d9SJohn Baldwin */ 1252f5c157d9SJohn Baldwin void 1253f5c157d9SJohn Baldwin sched_lend_prio(struct thread *td, u_char prio) 1254f5c157d9SJohn Baldwin { 1255f5c157d9SJohn Baldwin 1256f5c157d9SJohn Baldwin td->td_flags |= TDF_BORROWING; 1257f5c157d9SJohn Baldwin sched_thread_priority(td, prio); 1258f5c157d9SJohn Baldwin } 1259f5c157d9SJohn Baldwin 1260f5c157d9SJohn Baldwin /* 1261f5c157d9SJohn Baldwin * Restore a thread's priority when priority propagation is 1262f5c157d9SJohn Baldwin * over. The prio argument is the minimum priority the thread 1263f5c157d9SJohn Baldwin * needs to have to satisfy other possible priority lending 1264f5c157d9SJohn Baldwin * requests. If the thread's regular priority is less 1265f5c157d9SJohn Baldwin * important than prio, the thread will keep a priority boost 1266f5c157d9SJohn Baldwin * of prio. 1267f5c157d9SJohn Baldwin */ 1268f5c157d9SJohn Baldwin void 1269f5c157d9SJohn Baldwin sched_unlend_prio(struct thread *td, u_char prio) 1270f5c157d9SJohn Baldwin { 1271f5c157d9SJohn Baldwin u_char base_pri; 1272f5c157d9SJohn Baldwin 1273f5c157d9SJohn Baldwin if (td->td_base_pri >= PRI_MIN_TIMESHARE && 1274f5c157d9SJohn Baldwin td->td_base_pri <= PRI_MAX_TIMESHARE) 12758460a577SJohn Birrell base_pri = td->td_user_pri; 1276f5c157d9SJohn Baldwin else 1277f5c157d9SJohn Baldwin base_pri = td->td_base_pri; 1278f5c157d9SJohn Baldwin if (prio >= base_pri) { 1279f5c157d9SJohn Baldwin td->td_flags &= ~TDF_BORROWING; 1280f5c157d9SJohn Baldwin sched_thread_priority(td, base_pri); 1281f5c157d9SJohn Baldwin } else 1282f5c157d9SJohn Baldwin sched_lend_prio(td, prio); 1283f5c157d9SJohn Baldwin } 1284f5c157d9SJohn Baldwin 1285f5c157d9SJohn Baldwin void 1286f5c157d9SJohn Baldwin sched_prio(struct thread *td, u_char prio) 1287f5c157d9SJohn Baldwin { 1288f5c157d9SJohn Baldwin u_char oldprio; 1289f5c157d9SJohn Baldwin 1290f5c157d9SJohn Baldwin /* First, update the base priority. */ 1291f5c157d9SJohn Baldwin td->td_base_pri = prio; 1292f5c157d9SJohn Baldwin 1293f5c157d9SJohn Baldwin /* 129450aaa791SJohn Baldwin * If the thread is borrowing another thread's priority, don't 1295f5c157d9SJohn Baldwin * ever lower the priority. 1296f5c157d9SJohn Baldwin */ 1297f5c157d9SJohn Baldwin if (td->td_flags & TDF_BORROWING && td->td_priority < prio) 1298f5c157d9SJohn Baldwin return; 1299f5c157d9SJohn Baldwin 1300f5c157d9SJohn Baldwin /* Change the real priority. */ 1301f5c157d9SJohn Baldwin oldprio = td->td_priority; 1302f5c157d9SJohn Baldwin sched_thread_priority(td, prio); 1303f5c157d9SJohn Baldwin 1304f5c157d9SJohn Baldwin /* 1305f5c157d9SJohn Baldwin * If the thread is on a turnstile, then let the turnstile update 1306f5c157d9SJohn Baldwin * its state. 1307f5c157d9SJohn Baldwin */ 1308f5c157d9SJohn Baldwin if (TD_ON_LOCK(td) && oldprio != prio) 1309f5c157d9SJohn Baldwin turnstile_adjust(td, oldprio); 1310f5c157d9SJohn Baldwin } 1311f5c157d9SJohn Baldwin 131235e6168fSJeff Roberson void 13138460a577SJohn Birrell sched_user_prio(struct thread *td, u_char prio) 13143db720fdSDavid Xu { 13153db720fdSDavid Xu u_char oldprio; 13163db720fdSDavid Xu 13178460a577SJohn Birrell td->td_base_user_pri = prio; 1318fc6c30f6SJulian Elischer if (td->td_flags & TDF_UBORROWING && td->td_user_pri <= prio) 1319fc6c30f6SJulian Elischer return; 13208460a577SJohn Birrell oldprio = td->td_user_pri; 13218460a577SJohn Birrell td->td_user_pri = prio; 13223db720fdSDavid Xu 13233db720fdSDavid Xu if (TD_ON_UPILOCK(td) && oldprio != prio) 13243db720fdSDavid Xu umtx_pi_adjust(td, oldprio); 13253db720fdSDavid Xu } 13263db720fdSDavid Xu 13273db720fdSDavid Xu void 13283db720fdSDavid Xu sched_lend_user_prio(struct thread *td, u_char prio) 13293db720fdSDavid Xu { 13303db720fdSDavid Xu u_char oldprio; 13313db720fdSDavid Xu 13323db720fdSDavid Xu td->td_flags |= TDF_UBORROWING; 13333db720fdSDavid Xu 1334f645b5daSMaxim Konovalov oldprio = td->td_user_pri; 13358460a577SJohn Birrell td->td_user_pri = prio; 13363db720fdSDavid Xu 13373db720fdSDavid Xu if (TD_ON_UPILOCK(td) && oldprio != prio) 13383db720fdSDavid Xu umtx_pi_adjust(td, oldprio); 13393db720fdSDavid Xu } 13403db720fdSDavid Xu 13413db720fdSDavid Xu void 13423db720fdSDavid Xu sched_unlend_user_prio(struct thread *td, u_char prio) 13433db720fdSDavid Xu { 13443db720fdSDavid Xu u_char base_pri; 13453db720fdSDavid Xu 13468460a577SJohn Birrell base_pri = td->td_base_user_pri; 13473db720fdSDavid Xu if (prio >= base_pri) { 13483db720fdSDavid Xu td->td_flags &= ~TDF_UBORROWING; 13498460a577SJohn Birrell sched_user_prio(td, base_pri); 13503db720fdSDavid Xu } else 13513db720fdSDavid Xu sched_lend_user_prio(td, prio); 13523db720fdSDavid Xu } 13533db720fdSDavid Xu 13543db720fdSDavid Xu void 13553389af30SJulian Elischer sched_switch(struct thread *td, struct thread *newtd, int flags) 135635e6168fSJeff Roberson { 1357c02bbb43SJeff Roberson struct tdq *tdq; 1358ad1e7d28SJulian Elischer struct td_sched *ts; 135935e6168fSJeff Roberson 136035e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 136135e6168fSJeff Roberson 1362c02bbb43SJeff Roberson tdq = TDQ_SELF(); 1363e7d50326SJeff Roberson ts = td->td_sched; 1364060563ecSJulian Elischer td->td_lastcpu = td->td_oncpu; 1365060563ecSJulian Elischer td->td_oncpu = NOCPU; 136652eb8464SJohn Baldwin td->td_flags &= ~TDF_NEEDRESCHED; 136777918643SStephan Uphoff td->td_owepreempt = 0; 1368b11fdad0SJeff Roberson /* 1369ad1e7d28SJulian Elischer * If the thread has been assigned it may be in the process of switching 1370b11fdad0SJeff Roberson * to the new cpu. This is the case in sched_bind(). 1371b11fdad0SJeff Roberson */ 13722454aaf5SJeff Roberson if (td == PCPU_GET(idlethread)) { 1373bf0acc27SJohn Baldwin TD_SET_CAN_RUN(td); 1374ad1e7d28SJulian Elischer } else if ((ts->ts_flags & TSF_ASSIGNED) == 0) { 1375ed062c8dSJulian Elischer /* We are ending our run so make our slot available again */ 1376c02bbb43SJeff Roberson tdq_load_rem(tdq, ts); 1377ed062c8dSJulian Elischer if (TD_IS_RUNNING(td)) { 1378f2b74cbfSJeff Roberson /* 1379ed062c8dSJulian Elischer * Don't allow the thread to migrate 1380ed062c8dSJulian Elischer * from a preemption. 1381f2b74cbfSJeff Roberson */ 13821e516cf5SJeff Roberson sched_pin_td(td); 1383598b368dSJeff Roberson setrunqueue(td, (flags & SW_PREEMPT) ? 1384598b368dSJeff Roberson SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED : 1385598b368dSJeff Roberson SRQ_OURSELF|SRQ_YIELDING); 13861e516cf5SJeff Roberson sched_unpin_td(td); 13878460a577SJohn Birrell } 1388ed062c8dSJulian Elischer } 1389d39063f2SJulian Elischer if (newtd != NULL) { 1390c20c691bSJulian Elischer /* 13916680bbd5SJeff Roberson * If we bring in a thread account for it as if it had been 13926680bbd5SJeff Roberson * added to the run queue and then chosen. 1393c20c691bSJulian Elischer */ 1394ad1e7d28SJulian Elischer newtd->td_sched->ts_flags |= TSF_DIDRUN; 1395c20c691bSJulian Elischer TD_SET_RUNNING(newtd); 1396ad1e7d28SJulian Elischer tdq_load_add(TDQ_SELF(), newtd->td_sched); 1397d39063f2SJulian Elischer } else 13982454aaf5SJeff Roberson newtd = choosethread(); 1399ebccf1e3SJoseph Koshy if (td != newtd) { 1400ebccf1e3SJoseph Koshy #ifdef HWPMC_HOOKS 1401ebccf1e3SJoseph Koshy if (PMC_PROC_IS_USING_PMCS(td->td_proc)) 1402ebccf1e3SJoseph Koshy PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT); 1403ebccf1e3SJoseph Koshy #endif 14048460a577SJohn Birrell 1405ae53b483SJeff Roberson cpu_switch(td, newtd); 1406ebccf1e3SJoseph Koshy #ifdef HWPMC_HOOKS 1407ebccf1e3SJoseph Koshy if (PMC_PROC_IS_USING_PMCS(td->td_proc)) 1408ebccf1e3SJoseph Koshy PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN); 1409ebccf1e3SJoseph Koshy #endif 1410ebccf1e3SJoseph Koshy } 1411ae53b483SJeff Roberson sched_lock.mtx_lock = (uintptr_t)td; 1412060563ecSJulian Elischer td->td_oncpu = PCPU_GET(cpuid); 141335e6168fSJeff Roberson } 141435e6168fSJeff Roberson 141535e6168fSJeff Roberson void 1416fa885116SJulian Elischer sched_nice(struct proc *p, int nice) 141735e6168fSJeff Roberson { 141835e6168fSJeff Roberson struct thread *td; 141935e6168fSJeff Roberson 1420fa885116SJulian Elischer PROC_LOCK_ASSERT(p, MA_OWNED); 14210b5318c8SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 1422e7d50326SJeff Roberson 1423fa885116SJulian Elischer p->p_nice = nice; 14248460a577SJohn Birrell FOREACH_THREAD_IN_PROC(p, td) { 14258460a577SJohn Birrell sched_priority(td); 1426e7d50326SJeff Roberson sched_prio(td, td->td_base_user_pri); 142735e6168fSJeff Roberson } 1428fa885116SJulian Elischer } 142935e6168fSJeff Roberson 143035e6168fSJeff Roberson void 143144f3b092SJohn Baldwin sched_sleep(struct thread *td) 143235e6168fSJeff Roberson { 1433e7d50326SJeff Roberson 143435e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 143535e6168fSJeff Roberson 1436ad1e7d28SJulian Elischer td->td_sched->ts_slptime = ticks; 143735e6168fSJeff Roberson } 143835e6168fSJeff Roberson 143935e6168fSJeff Roberson void 144035e6168fSJeff Roberson sched_wakeup(struct thread *td) 144135e6168fSJeff Roberson { 1442e7d50326SJeff Roberson int slptime; 1443e7d50326SJeff Roberson 144435e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 144535e6168fSJeff Roberson 144635e6168fSJeff Roberson /* 1447e7d50326SJeff Roberson * If we slept for more than a tick update our interactivity and 1448e7d50326SJeff Roberson * priority. 144935e6168fSJeff Roberson */ 1450e7d50326SJeff Roberson slptime = td->td_sched->ts_slptime; 1451e7d50326SJeff Roberson td->td_sched->ts_slptime = 0; 1452e7d50326SJeff Roberson if (slptime && slptime != ticks) { 145315dc847eSJeff Roberson int hzticks; 1454f1e8dc4aSJeff Roberson 1455e7d50326SJeff Roberson hzticks = (ticks - slptime) << SCHED_TICK_SHIFT; 14568460a577SJohn Birrell td->td_sched->skg_slptime += hzticks; 14578460a577SJohn Birrell sched_interact_update(td); 1458e7d50326SJeff Roberson sched_pctcpu_update(td->td_sched); 14598460a577SJohn Birrell sched_priority(td); 1460f1e8dc4aSJeff Roberson } 14612630e4c9SJulian Elischer setrunqueue(td, SRQ_BORING); 146235e6168fSJeff Roberson } 146335e6168fSJeff Roberson 146435e6168fSJeff Roberson /* 146535e6168fSJeff Roberson * Penalize the parent for creating a new child and initialize the child's 146635e6168fSJeff Roberson * priority. 146735e6168fSJeff Roberson */ 146835e6168fSJeff Roberson void 14698460a577SJohn Birrell sched_fork(struct thread *td, struct thread *child) 147015dc847eSJeff Roberson { 14718460a577SJohn Birrell mtx_assert(&sched_lock, MA_OWNED); 1472ad1e7d28SJulian Elischer sched_fork_thread(td, child); 1473e7d50326SJeff Roberson /* 1474e7d50326SJeff Roberson * Penalize the parent and child for forking. 1475e7d50326SJeff Roberson */ 1476e7d50326SJeff Roberson sched_interact_fork(child); 1477e7d50326SJeff Roberson sched_priority(child); 1478e7d50326SJeff Roberson td->td_sched->skg_runtime += tickincr; 1479e7d50326SJeff Roberson sched_interact_update(td); 1480e7d50326SJeff Roberson sched_priority(td); 1481ad1e7d28SJulian Elischer } 1482ad1e7d28SJulian Elischer 1483ad1e7d28SJulian Elischer void 1484ad1e7d28SJulian Elischer sched_fork_thread(struct thread *td, struct thread *child) 1485ad1e7d28SJulian Elischer { 1486ad1e7d28SJulian Elischer struct td_sched *ts; 1487ad1e7d28SJulian Elischer struct td_sched *ts2; 14888460a577SJohn Birrell 1489e7d50326SJeff Roberson /* 1490e7d50326SJeff Roberson * Initialize child. 1491e7d50326SJeff Roberson */ 1492ed062c8dSJulian Elischer sched_newthread(child); 1493ad1e7d28SJulian Elischer ts = td->td_sched; 1494ad1e7d28SJulian Elischer ts2 = child->td_sched; 1495ad1e7d28SJulian Elischer ts2->ts_cpu = ts->ts_cpu; 1496ad1e7d28SJulian Elischer ts2->ts_runq = NULL; 1497e7d50326SJeff Roberson /* 1498e7d50326SJeff Roberson * Grab our parents cpu estimation information and priority. 1499e7d50326SJeff Roberson */ 1500ad1e7d28SJulian Elischer ts2->ts_ticks = ts->ts_ticks; 1501ad1e7d28SJulian Elischer ts2->ts_ltick = ts->ts_ltick; 1502ad1e7d28SJulian Elischer ts2->ts_ftick = ts->ts_ftick; 1503e7d50326SJeff Roberson child->td_user_pri = td->td_user_pri; 1504e7d50326SJeff Roberson child->td_base_user_pri = td->td_base_user_pri; 1505e7d50326SJeff Roberson /* 1506e7d50326SJeff Roberson * And update interactivity score. 1507e7d50326SJeff Roberson */ 1508e7d50326SJeff Roberson ts2->skg_slptime = ts->skg_slptime; 1509e7d50326SJeff Roberson ts2->skg_runtime = ts->skg_runtime; 1510e7d50326SJeff Roberson ts2->ts_slice = 1; /* Attempt to quickly learn interactivity. */ 151115dc847eSJeff Roberson } 151215dc847eSJeff Roberson 151315dc847eSJeff Roberson void 15148460a577SJohn Birrell sched_class(struct thread *td, int class) 151515dc847eSJeff Roberson { 151615dc847eSJeff Roberson 15172056d0a1SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 15188460a577SJohn Birrell if (td->td_pri_class == class) 151915dc847eSJeff Roberson return; 152015dc847eSJeff Roberson 1521ef1134c9SJeff Roberson #ifdef SMP 1522155b9987SJeff Roberson /* 1523155b9987SJeff Roberson * On SMP if we're on the RUNQ we must adjust the transferable 1524155b9987SJeff Roberson * count because could be changing to or from an interrupt 1525155b9987SJeff Roberson * class. 1526155b9987SJeff Roberson */ 15271e516cf5SJeff Roberson if (td->td_sched->ts_state == TSS_ONRUNQ) { 15281e516cf5SJeff Roberson struct tdq *tdq; 15291e516cf5SJeff Roberson 15301e516cf5SJeff Roberson tdq = TDQ_CPU(td->td_sched->ts_cpu); 15311e516cf5SJeff Roberson if (THREAD_CAN_MIGRATE(td)) { 1532d2ad694cSJeff Roberson tdq->tdq_transferable--; 1533d2ad694cSJeff Roberson tdq->tdq_group->tdg_transferable--; 153480f86c9fSJeff Roberson } 15351e516cf5SJeff Roberson td->td_pri_class = class; 15361e516cf5SJeff Roberson if (THREAD_CAN_MIGRATE(td)) { 1537d2ad694cSJeff Roberson tdq->tdq_transferable++; 1538d2ad694cSJeff Roberson tdq->tdq_group->tdg_transferable++; 153980f86c9fSJeff Roberson } 1540155b9987SJeff Roberson } 1541ef1134c9SJeff Roberson #endif 15428460a577SJohn Birrell td->td_pri_class = class; 154335e6168fSJeff Roberson } 154435e6168fSJeff Roberson 154535e6168fSJeff Roberson /* 154635e6168fSJeff Roberson * Return some of the child's priority and interactivity to the parent. 154735e6168fSJeff Roberson */ 154835e6168fSJeff Roberson void 1549fc6c30f6SJulian Elischer sched_exit(struct proc *p, struct thread *child) 155035e6168fSJeff Roberson { 1551e7d50326SJeff Roberson struct thread *td; 1552141ad61cSJeff Roberson 15538460a577SJohn Birrell CTR3(KTR_SCHED, "sched_exit: %p(%s) prio %d", 1554fc6c30f6SJulian Elischer child, child->td_proc->p_comm, child->td_priority); 15558460a577SJohn Birrell 1556e7d50326SJeff Roberson td = FIRST_THREAD_IN_PROC(p); 1557e7d50326SJeff Roberson sched_exit_thread(td, child); 1558ad1e7d28SJulian Elischer } 1559ad1e7d28SJulian Elischer 1560ad1e7d28SJulian Elischer void 1561fc6c30f6SJulian Elischer sched_exit_thread(struct thread *td, struct thread *child) 1562ad1e7d28SJulian Elischer { 1563fc6c30f6SJulian Elischer 1564e7d50326SJeff Roberson CTR3(KTR_SCHED, "sched_exit_thread: %p(%s) prio %d", 1565e7d50326SJeff Roberson child, child->td_proc->p_comm, child->td_priority); 1566e7d50326SJeff Roberson 1567e7d50326SJeff Roberson tdq_load_rem(TDQ_CPU(child->td_sched->ts_cpu), child->td_sched); 1568e7d50326SJeff Roberson #ifdef KSE 1569e7d50326SJeff Roberson /* 1570e7d50326SJeff Roberson * KSE forks and exits so often that this penalty causes short-lived 1571e7d50326SJeff Roberson * threads to always be non-interactive. This causes mozilla to 1572e7d50326SJeff Roberson * crawl under load. 1573e7d50326SJeff Roberson */ 1574e7d50326SJeff Roberson if ((td->td_pflags & TDP_SA) && td->td_proc == child->td_proc) 1575e7d50326SJeff Roberson return; 1576e7d50326SJeff Roberson #endif 1577e7d50326SJeff Roberson /* 1578e7d50326SJeff Roberson * Give the child's runtime to the parent without returning the 1579e7d50326SJeff Roberson * sleep time as a penalty to the parent. This causes shells that 1580e7d50326SJeff Roberson * launch expensive things to mark their children as expensive. 1581e7d50326SJeff Roberson */ 1582fc6c30f6SJulian Elischer td->td_sched->skg_runtime += child->td_sched->skg_runtime; 1583fc6c30f6SJulian Elischer sched_interact_update(td); 1584e7d50326SJeff Roberson sched_priority(td); 1585ad1e7d28SJulian Elischer } 1586ad1e7d28SJulian Elischer 1587ad1e7d28SJulian Elischer void 1588ad1e7d28SJulian Elischer sched_userret(struct thread *td) 1589ad1e7d28SJulian Elischer { 1590ad1e7d28SJulian Elischer /* 1591ad1e7d28SJulian Elischer * XXX we cheat slightly on the locking here to avoid locking in 1592ad1e7d28SJulian Elischer * the usual case. Setting td_priority here is essentially an 1593ad1e7d28SJulian Elischer * incomplete workaround for not setting it properly elsewhere. 1594ad1e7d28SJulian Elischer * Now that some interrupt handlers are threads, not setting it 1595ad1e7d28SJulian Elischer * properly elsewhere can clobber it in the window between setting 1596ad1e7d28SJulian Elischer * it here and returning to user mode, so don't waste time setting 1597ad1e7d28SJulian Elischer * it perfectly here. 1598ad1e7d28SJulian Elischer */ 1599ad1e7d28SJulian Elischer KASSERT((td->td_flags & TDF_BORROWING) == 0, 1600ad1e7d28SJulian Elischer ("thread with borrowed priority returning to userland")); 1601ad1e7d28SJulian Elischer if (td->td_priority != td->td_user_pri) { 1602ad1e7d28SJulian Elischer mtx_lock_spin(&sched_lock); 1603ad1e7d28SJulian Elischer td->td_priority = td->td_user_pri; 1604ad1e7d28SJulian Elischer td->td_base_pri = td->td_user_pri; 1605ad1e7d28SJulian Elischer mtx_unlock_spin(&sched_lock); 1606ad1e7d28SJulian Elischer } 160735e6168fSJeff Roberson } 160835e6168fSJeff Roberson 160935e6168fSJeff Roberson void 16107cf90fb3SJeff Roberson sched_clock(struct thread *td) 161135e6168fSJeff Roberson { 1612ad1e7d28SJulian Elischer struct tdq *tdq; 1613ad1e7d28SJulian Elischer struct td_sched *ts; 161435e6168fSJeff Roberson 1615dc03363dSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1616dc03363dSJeff Roberson #ifdef SMP 16173f872f85SJeff Roberson sched_smp_tick(); 1618dc03363dSJeff Roberson #endif 16193f872f85SJeff Roberson tdq = TDQ_SELF(); 16203f872f85SJeff Roberson /* 16213f872f85SJeff Roberson * Advance the insert index once for each tick to ensure that all 16223f872f85SJeff Roberson * threads get a chance to run. 16233f872f85SJeff Roberson */ 16243f872f85SJeff Roberson if (tdq->tdq_idx == tdq->tdq_ridx) { 16253f872f85SJeff Roberson tdq->tdq_idx = (tdq->tdq_idx + 1) % RQ_NQS; 16263f872f85SJeff Roberson if (TAILQ_EMPTY(&tdq->tdq_timeshare.rq_queues[tdq->tdq_ridx])) 16273f872f85SJeff Roberson tdq->tdq_ridx = tdq->tdq_idx; 16283f872f85SJeff Roberson } 16290a016a05SJeff Roberson /* Adjust ticks for pctcpu */ 16303f872f85SJeff Roberson ts = td->td_sched; 1631e7d50326SJeff Roberson ts->ts_ticks += tickincr; 1632ad1e7d28SJulian Elischer ts->ts_ltick = ticks; 1633e7d50326SJeff Roberson /* 1634e7d50326SJeff Roberson * Update if we've exceeded our desired tick threshhold by over one 1635e7d50326SJeff Roberson * second. 1636e7d50326SJeff Roberson */ 16378ab80cf0SJeff Roberson if (ts->ts_ftick + SCHED_TICK_MAX < ts->ts_ltick) 1638ad1e7d28SJulian Elischer sched_pctcpu_update(ts); 16393f741ca1SJeff Roberson /* 16408460a577SJohn Birrell * We only do slicing code for TIMESHARE threads. 1641a8949de2SJeff Roberson */ 16428460a577SJohn Birrell if (td->td_pri_class != PRI_TIMESHARE) 1643a8949de2SJeff Roberson return; 1644a8949de2SJeff Roberson /* 16453f872f85SJeff Roberson * We used a tick; charge it to the thread so that we can compute our 164615dc847eSJeff Roberson * interactivity. 164715dc847eSJeff Roberson */ 16488460a577SJohn Birrell td->td_sched->skg_runtime += tickincr; 16498460a577SJohn Birrell sched_interact_update(td); 165035e6168fSJeff Roberson /* 165135e6168fSJeff Roberson * We used up one time slice. 165235e6168fSJeff Roberson */ 1653ad1e7d28SJulian Elischer if (--ts->ts_slice > 0) 165415dc847eSJeff Roberson return; 165535e6168fSJeff Roberson /* 165615dc847eSJeff Roberson * We're out of time, recompute priorities and requeue. 165735e6168fSJeff Roberson */ 16588460a577SJohn Birrell sched_priority(td); 16598ab80cf0SJeff Roberson tdq_load_rem(tdq, ts); 1660e7d50326SJeff Roberson ts->ts_slice = sched_slice; 1661ad1e7d28SJulian Elischer tdq_load_add(tdq, ts); 16624a338afdSJulian Elischer td->td_flags |= TDF_NEEDRESCHED; 166335e6168fSJeff Roberson } 166435e6168fSJeff Roberson 166535e6168fSJeff Roberson int 166635e6168fSJeff Roberson sched_runnable(void) 166735e6168fSJeff Roberson { 1668ad1e7d28SJulian Elischer struct tdq *tdq; 1669b90816f1SJeff Roberson int load; 167035e6168fSJeff Roberson 1671b90816f1SJeff Roberson load = 1; 1672b90816f1SJeff Roberson 1673ad1e7d28SJulian Elischer tdq = TDQ_SELF(); 167422bf7d9aSJeff Roberson #ifdef SMP 1675d2ad694cSJeff Roberson if (tdq->tdq_assigned) { 167646f8b265SJeff Roberson mtx_lock_spin(&sched_lock); 1677ad1e7d28SJulian Elischer tdq_assign(tdq); 167846f8b265SJeff Roberson mtx_unlock_spin(&sched_lock); 167946f8b265SJeff Roberson } 168022bf7d9aSJeff Roberson #endif 16813f741ca1SJeff Roberson if ((curthread->td_flags & TDF_IDLETD) != 0) { 1682d2ad694cSJeff Roberson if (tdq->tdq_load > 0) 16833f741ca1SJeff Roberson goto out; 16843f741ca1SJeff Roberson } else 1685d2ad694cSJeff Roberson if (tdq->tdq_load - 1 > 0) 1686b90816f1SJeff Roberson goto out; 1687b90816f1SJeff Roberson load = 0; 1688b90816f1SJeff Roberson out: 1689b90816f1SJeff Roberson return (load); 169035e6168fSJeff Roberson } 169135e6168fSJeff Roberson 1692ad1e7d28SJulian Elischer struct td_sched * 1693c9f25d8fSJeff Roberson sched_choose(void) 1694c9f25d8fSJeff Roberson { 1695ad1e7d28SJulian Elischer struct tdq *tdq; 1696ad1e7d28SJulian Elischer struct td_sched *ts; 169715dc847eSJeff Roberson 1698b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1699ad1e7d28SJulian Elischer tdq = TDQ_SELF(); 170015dc847eSJeff Roberson #ifdef SMP 170180f86c9fSJeff Roberson restart: 1702d2ad694cSJeff Roberson if (tdq->tdq_assigned) 1703ad1e7d28SJulian Elischer tdq_assign(tdq); 170415dc847eSJeff Roberson #endif 1705ad1e7d28SJulian Elischer ts = tdq_choose(tdq); 1706ad1e7d28SJulian Elischer if (ts) { 170722bf7d9aSJeff Roberson #ifdef SMP 1708155b6ca1SJeff Roberson if (ts->ts_thread->td_priority > PRI_MIN_IDLE) 1709ad1e7d28SJulian Elischer if (tdq_idled(tdq) == 0) 171080f86c9fSJeff Roberson goto restart; 171122bf7d9aSJeff Roberson #endif 1712ad1e7d28SJulian Elischer tdq_runq_rem(tdq, ts); 1713ad1e7d28SJulian Elischer ts->ts_state = TSS_THREAD; 1714ad1e7d28SJulian Elischer return (ts); 171535e6168fSJeff Roberson } 1716c9f25d8fSJeff Roberson #ifdef SMP 1717ad1e7d28SJulian Elischer if (tdq_idled(tdq) == 0) 171880f86c9fSJeff Roberson goto restart; 1719c9f25d8fSJeff Roberson #endif 172015dc847eSJeff Roberson return (NULL); 172135e6168fSJeff Roberson } 172235e6168fSJeff Roberson 172335e6168fSJeff Roberson void 17242630e4c9SJulian Elischer sched_add(struct thread *td, int flags) 172535e6168fSJeff Roberson { 1726ad1e7d28SJulian Elischer struct tdq *tdq; 1727ad1e7d28SJulian Elischer struct td_sched *ts; 1728598b368dSJeff Roberson int preemptive; 17292454aaf5SJeff Roberson int canmigrate; 173022bf7d9aSJeff Roberson int class; 1731c9f25d8fSJeff Roberson 173281d47d3fSJeff Roberson CTR5(KTR_SCHED, "sched_add: %p(%s) prio %d by %p(%s)", 173381d47d3fSJeff Roberson td, td->td_proc->p_comm, td->td_priority, curthread, 173481d47d3fSJeff Roberson curthread->td_proc->p_comm); 173522bf7d9aSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1736ad1e7d28SJulian Elischer tdq = TDQ_SELF(); 1737e7d50326SJeff Roberson ts = td->td_sched; 1738e7d50326SJeff Roberson class = PRI_BASE(td->td_pri_class); 1739e7d50326SJeff Roberson preemptive = !(flags & SRQ_YIELDING); 1740e7d50326SJeff Roberson canmigrate = 1; 1741598b368dSJeff Roberson #ifdef SMP 1742ad1e7d28SJulian Elischer if (ts->ts_flags & TSF_ASSIGNED) { 1743ad1e7d28SJulian Elischer if (ts->ts_flags & TSF_REMOVED) 1744ad1e7d28SJulian Elischer ts->ts_flags &= ~TSF_REMOVED; 174522bf7d9aSJeff Roberson return; 17462d59a44dSJeff Roberson } 1747e7d50326SJeff Roberson canmigrate = THREAD_CAN_MIGRATE(td); 1748598b368dSJeff Roberson #endif 1749ad1e7d28SJulian Elischer KASSERT(ts->ts_state != TSS_ONRUNQ, 1750ad1e7d28SJulian Elischer ("sched_add: thread %p (%s) already in run queue", td, 17518460a577SJohn Birrell td->td_proc->p_comm)); 17528460a577SJohn Birrell KASSERT(td->td_proc->p_sflag & PS_INMEM, 17535d7ef00cSJeff Roberson ("sched_add: process swapped out")); 1754ad1e7d28SJulian Elischer KASSERT(ts->ts_runq == NULL, 1755ad1e7d28SJulian Elischer ("sched_add: thread %p is still assigned to a run queue", td)); 175615dc847eSJeff Roberson /* 1757e7d50326SJeff Roberson * Set the slice and pick the run queue. 175815dc847eSJeff Roberson */ 1759e7d50326SJeff Roberson if (ts->ts_slice == 0) 1760e7d50326SJeff Roberson ts->ts_slice = sched_slice; 17618ab80cf0SJeff Roberson if (class == PRI_TIMESHARE) 17628ab80cf0SJeff Roberson sched_priority(td); 1763e7d50326SJeff Roberson if (td->td_priority <= PRI_MAX_REALTIME) { 1764e7d50326SJeff Roberson ts->ts_runq = &tdq->tdq_realtime; 1765e7d50326SJeff Roberson /* 1766e7d50326SJeff Roberson * If the thread is not artificially pinned and it's in 1767e7d50326SJeff Roberson * the realtime queue we directly dispatch it on this cpu 1768e7d50326SJeff Roberson * for minimum latency. Interrupt handlers may also have 1769e7d50326SJeff Roberson * to complete on the cpu that dispatched them. 1770e7d50326SJeff Roberson */ 1771155b6ca1SJeff Roberson if (td->td_pinned == 0 && class == PRI_ITHD) 1772e7d50326SJeff Roberson ts->ts_cpu = PCPU_GET(cpuid); 1773e7d50326SJeff Roberson } else if (td->td_priority <= PRI_MAX_TIMESHARE) 1774e7d50326SJeff Roberson ts->ts_runq = &tdq->tdq_timeshare; 177515dc847eSJeff Roberson else 1776d2ad694cSJeff Roberson ts->ts_runq = &tdq->tdq_idle; 1777e7d50326SJeff Roberson 177822bf7d9aSJeff Roberson #ifdef SMP 17792454aaf5SJeff Roberson /* 17802454aaf5SJeff Roberson * If this thread is pinned or bound, notify the target cpu. 17812454aaf5SJeff Roberson */ 1782ad1e7d28SJulian Elischer if (!canmigrate && ts->ts_cpu != PCPU_GET(cpuid) ) { 1783ad1e7d28SJulian Elischer ts->ts_runq = NULL; 1784ad1e7d28SJulian Elischer tdq_notify(ts, ts->ts_cpu); 178580f86c9fSJeff Roberson return; 178680f86c9fSJeff Roberson } 178722bf7d9aSJeff Roberson /* 1788670c524fSJeff Roberson * If we had been idle, clear our bit in the group and potentially 1789670c524fSJeff Roberson * the global bitmap. If not, see if we should transfer this thread. 179022bf7d9aSJeff Roberson */ 1791e7d50326SJeff Roberson if ((class != PRI_IDLE && class != PRI_ITHD) && 1792d2ad694cSJeff Roberson (tdq->tdq_group->tdg_idlemask & PCPU_GET(cpumask)) != 0) { 179380f86c9fSJeff Roberson /* 179480f86c9fSJeff Roberson * Check to see if our group is unidling, and if so, remove it 179580f86c9fSJeff Roberson * from the global idle mask. 179680f86c9fSJeff Roberson */ 1797d2ad694cSJeff Roberson if (tdq->tdq_group->tdg_idlemask == 1798d2ad694cSJeff Roberson tdq->tdq_group->tdg_cpumask) 1799d2ad694cSJeff Roberson atomic_clear_int(&tdq_idle, tdq->tdq_group->tdg_mask); 180080f86c9fSJeff Roberson /* 180180f86c9fSJeff Roberson * Now remove ourselves from the group specific idle mask. 180280f86c9fSJeff Roberson */ 1803d2ad694cSJeff Roberson tdq->tdq_group->tdg_idlemask &= ~PCPU_GET(cpumask); 1804e7d50326SJeff Roberson } else if (canmigrate && tdq->tdq_load > 1) 1805ad1e7d28SJulian Elischer if (tdq_transfer(tdq, ts, class)) 1806670c524fSJeff Roberson return; 1807ad1e7d28SJulian Elischer ts->ts_cpu = PCPU_GET(cpuid); 180822bf7d9aSJeff Roberson #endif 1809e7d50326SJeff Roberson if (td->td_priority < curthread->td_priority) 181022bf7d9aSJeff Roberson curthread->td_flags |= TDF_NEEDRESCHED; 181163fcce68SJohn Baldwin if (preemptive && maybe_preempt(td)) 18120c0b25aeSJohn Baldwin return; 1813ad1e7d28SJulian Elischer ts->ts_state = TSS_ONRUNQ; 181435e6168fSJeff Roberson 1815ad1e7d28SJulian Elischer tdq_runq_add(tdq, ts, flags); 1816ad1e7d28SJulian Elischer tdq_load_add(tdq, ts); 181735e6168fSJeff Roberson } 181835e6168fSJeff Roberson 181935e6168fSJeff Roberson void 18207cf90fb3SJeff Roberson sched_rem(struct thread *td) 182135e6168fSJeff Roberson { 1822ad1e7d28SJulian Elischer struct tdq *tdq; 1823ad1e7d28SJulian Elischer struct td_sched *ts; 18247cf90fb3SJeff Roberson 182581d47d3fSJeff Roberson CTR5(KTR_SCHED, "sched_rem: %p(%s) prio %d by %p(%s)", 182681d47d3fSJeff Roberson td, td->td_proc->p_comm, td->td_priority, curthread, 182781d47d3fSJeff Roberson curthread->td_proc->p_comm); 1828598b368dSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1829ad1e7d28SJulian Elischer ts = td->td_sched; 1830ad1e7d28SJulian Elischer if (ts->ts_flags & TSF_ASSIGNED) { 1831ad1e7d28SJulian Elischer ts->ts_flags |= TSF_REMOVED; 183222bf7d9aSJeff Roberson return; 18332d59a44dSJeff Roberson } 1834ad1e7d28SJulian Elischer KASSERT((ts->ts_state == TSS_ONRUNQ), 1835ad1e7d28SJulian Elischer ("sched_rem: thread not on run queue")); 183635e6168fSJeff Roberson 1837ad1e7d28SJulian Elischer ts->ts_state = TSS_THREAD; 1838ad1e7d28SJulian Elischer tdq = TDQ_CPU(ts->ts_cpu); 1839ad1e7d28SJulian Elischer tdq_runq_rem(tdq, ts); 1840ad1e7d28SJulian Elischer tdq_load_rem(tdq, ts); 184135e6168fSJeff Roberson } 184235e6168fSJeff Roberson 184335e6168fSJeff Roberson fixpt_t 18447cf90fb3SJeff Roberson sched_pctcpu(struct thread *td) 184535e6168fSJeff Roberson { 184635e6168fSJeff Roberson fixpt_t pctcpu; 1847ad1e7d28SJulian Elischer struct td_sched *ts; 184835e6168fSJeff Roberson 184935e6168fSJeff Roberson pctcpu = 0; 1850ad1e7d28SJulian Elischer ts = td->td_sched; 1851ad1e7d28SJulian Elischer if (ts == NULL) 1852484288deSJeff Roberson return (0); 185335e6168fSJeff Roberson 1854b90816f1SJeff Roberson mtx_lock_spin(&sched_lock); 1855ad1e7d28SJulian Elischer if (ts->ts_ticks) { 185635e6168fSJeff Roberson int rtick; 185735e6168fSJeff Roberson 1858ad1e7d28SJulian Elischer sched_pctcpu_update(ts); 185935e6168fSJeff Roberson /* How many rtick per second ? */ 1860e7d50326SJeff Roberson rtick = min(SCHED_TICK_HZ(ts) / SCHED_TICK_SECS, hz); 1861e7d50326SJeff Roberson pctcpu = (FSCALE * ((FSCALE * rtick)/hz)) >> FSHIFT; 186235e6168fSJeff Roberson } 1863ad1e7d28SJulian Elischer td->td_proc->p_swtime = ts->ts_ltick - ts->ts_ftick; 1864828e7683SJohn Baldwin mtx_unlock_spin(&sched_lock); 186535e6168fSJeff Roberson 186635e6168fSJeff Roberson return (pctcpu); 186735e6168fSJeff Roberson } 186835e6168fSJeff Roberson 18699bacd788SJeff Roberson void 18709bacd788SJeff Roberson sched_bind(struct thread *td, int cpu) 18719bacd788SJeff Roberson { 1872ad1e7d28SJulian Elischer struct td_sched *ts; 18739bacd788SJeff Roberson 18749bacd788SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1875ad1e7d28SJulian Elischer ts = td->td_sched; 1876e7d50326SJeff Roberson KASSERT((ts->ts_flags & TSF_BOUND) == 0, 1877e7d50326SJeff Roberson ("sched_bind: thread %p already bound.", td)); 1878ad1e7d28SJulian Elischer ts->ts_flags |= TSF_BOUND; 187980f86c9fSJeff Roberson #ifdef SMP 188080f86c9fSJeff Roberson if (PCPU_GET(cpuid) == cpu) 18819bacd788SJeff Roberson return; 18829bacd788SJeff Roberson /* sched_rem without the runq_remove */ 1883ad1e7d28SJulian Elischer ts->ts_state = TSS_THREAD; 1884ad1e7d28SJulian Elischer tdq_load_rem(TDQ_CPU(ts->ts_cpu), ts); 1885ad1e7d28SJulian Elischer tdq_notify(ts, cpu); 18869bacd788SJeff Roberson /* When we return from mi_switch we'll be on the correct cpu. */ 1887279f949eSPoul-Henning Kamp mi_switch(SW_VOL, NULL); 1888e7d50326SJeff Roberson sched_pin(); 18899bacd788SJeff Roberson #endif 18909bacd788SJeff Roberson } 18919bacd788SJeff Roberson 18929bacd788SJeff Roberson void 18939bacd788SJeff Roberson sched_unbind(struct thread *td) 18949bacd788SJeff Roberson { 1895e7d50326SJeff Roberson struct td_sched *ts; 1896e7d50326SJeff Roberson 18979bacd788SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1898e7d50326SJeff Roberson ts = td->td_sched; 1899e7d50326SJeff Roberson KASSERT(ts->ts_flags & TSF_BOUND, 1900e7d50326SJeff Roberson ("sched_unbind: thread %p not bound.", td)); 1901e7d50326SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1902e7d50326SJeff Roberson ts->ts_flags &= ~TSF_BOUND; 1903e7d50326SJeff Roberson #ifdef SMP 1904e7d50326SJeff Roberson sched_unpin(); 1905e7d50326SJeff Roberson #endif 19069bacd788SJeff Roberson } 19079bacd788SJeff Roberson 190835e6168fSJeff Roberson int 1909ebccf1e3SJoseph Koshy sched_is_bound(struct thread *td) 1910ebccf1e3SJoseph Koshy { 1911ebccf1e3SJoseph Koshy mtx_assert(&sched_lock, MA_OWNED); 1912ad1e7d28SJulian Elischer return (td->td_sched->ts_flags & TSF_BOUND); 1913ebccf1e3SJoseph Koshy } 1914ebccf1e3SJoseph Koshy 191536ec198bSDavid Xu void 191636ec198bSDavid Xu sched_relinquish(struct thread *td) 191736ec198bSDavid Xu { 191836ec198bSDavid Xu mtx_lock_spin(&sched_lock); 19198460a577SJohn Birrell if (td->td_pri_class == PRI_TIMESHARE) 192036ec198bSDavid Xu sched_prio(td, PRI_MAX_TIMESHARE); 192136ec198bSDavid Xu mi_switch(SW_VOL, NULL); 192236ec198bSDavid Xu mtx_unlock_spin(&sched_lock); 192336ec198bSDavid Xu } 192436ec198bSDavid Xu 1925ebccf1e3SJoseph Koshy int 192633916c36SJeff Roberson sched_load(void) 192733916c36SJeff Roberson { 192833916c36SJeff Roberson #ifdef SMP 192933916c36SJeff Roberson int total; 193033916c36SJeff Roberson int i; 193133916c36SJeff Roberson 193233916c36SJeff Roberson total = 0; 1933d2ad694cSJeff Roberson for (i = 0; i <= tdg_maxid; i++) 1934d2ad694cSJeff Roberson total += TDQ_GROUP(i)->tdg_load; 193533916c36SJeff Roberson return (total); 193633916c36SJeff Roberson #else 1937d2ad694cSJeff Roberson return (TDQ_SELF()->tdq_sysload); 193833916c36SJeff Roberson #endif 193933916c36SJeff Roberson } 194033916c36SJeff Roberson 194133916c36SJeff Roberson int 194235e6168fSJeff Roberson sched_sizeof_proc(void) 194335e6168fSJeff Roberson { 194435e6168fSJeff Roberson return (sizeof(struct proc)); 194535e6168fSJeff Roberson } 194635e6168fSJeff Roberson 194735e6168fSJeff Roberson int 194835e6168fSJeff Roberson sched_sizeof_thread(void) 194935e6168fSJeff Roberson { 195035e6168fSJeff Roberson return (sizeof(struct thread) + sizeof(struct td_sched)); 195135e6168fSJeff Roberson } 1952b41f1452SDavid Xu 1953b41f1452SDavid Xu void 1954b41f1452SDavid Xu sched_tick(void) 1955b41f1452SDavid Xu { 1956b41f1452SDavid Xu } 1957e7d50326SJeff Roberson 1958e7d50326SJeff Roberson static SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0, "Scheduler"); 1959e7d50326SJeff Roberson SYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "ule", 0, 1960e7d50326SJeff Roberson "Scheduler name"); 1961e7d50326SJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, slice, CTLFLAG_RW, &sched_slice, 0, ""); 1962e7d50326SJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, interact, CTLFLAG_RW, &sched_interact, 0, ""); 1963e7d50326SJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, tickincr, CTLFLAG_RD, &tickincr, 0, ""); 1964e7d50326SJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, realstathz, CTLFLAG_RD, &realstathz, 0, ""); 19651e516cf5SJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, balance, CTLFLAG_RW, &sched_rebalance, 0, ""); 1966e7d50326SJeff Roberson 1967e7d50326SJeff Roberson /* ps compat */ 1968e7d50326SJeff Roberson static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 1969e7d50326SJeff Roberson SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 1970e7d50326SJeff Roberson 1971e7d50326SJeff Roberson 1972ed062c8dSJulian Elischer #define KERN_SWITCH_INCLUDE 1 1973ed062c8dSJulian Elischer #include "kern/kern_switch.c" 1974