135e6168fSJeff Roberson /*- 29fe02f7eSJeff Roberson * Copyright (c) 2002-2005, Jeffrey Roberson <jeff@freebsd.org> 335e6168fSJeff Roberson * All rights reserved. 435e6168fSJeff Roberson * 535e6168fSJeff Roberson * Redistribution and use in source and binary forms, with or without 635e6168fSJeff Roberson * modification, are permitted provided that the following conditions 735e6168fSJeff Roberson * are met: 835e6168fSJeff Roberson * 1. Redistributions of source code must retain the above copyright 935e6168fSJeff Roberson * notice unmodified, this list of conditions, and the following 1035e6168fSJeff Roberson * disclaimer. 1135e6168fSJeff Roberson * 2. Redistributions in binary form must reproduce the above copyright 1235e6168fSJeff Roberson * notice, this list of conditions and the following disclaimer in the 1335e6168fSJeff Roberson * documentation and/or other materials provided with the distribution. 1435e6168fSJeff Roberson * 1535e6168fSJeff Roberson * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 1635e6168fSJeff Roberson * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1735e6168fSJeff Roberson * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 1835e6168fSJeff Roberson * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 1935e6168fSJeff Roberson * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2035e6168fSJeff Roberson * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2135e6168fSJeff Roberson * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2235e6168fSJeff Roberson * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2335e6168fSJeff Roberson * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2435e6168fSJeff Roberson * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2535e6168fSJeff Roberson */ 2635e6168fSJeff Roberson 27677b542eSDavid E. O'Brien #include <sys/cdefs.h> 28677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 29677b542eSDavid E. O'Brien 304da0d332SPeter Wemm #include "opt_hwpmc_hooks.h" 314da0d332SPeter Wemm #include "opt_sched.h" 329923b511SScott Long 3335e6168fSJeff Roberson #include <sys/param.h> 3435e6168fSJeff Roberson #include <sys/systm.h> 352c3490b1SMarcel Moolenaar #include <sys/kdb.h> 3635e6168fSJeff Roberson #include <sys/kernel.h> 3735e6168fSJeff Roberson #include <sys/ktr.h> 3835e6168fSJeff Roberson #include <sys/lock.h> 3935e6168fSJeff Roberson #include <sys/mutex.h> 4035e6168fSJeff Roberson #include <sys/proc.h> 41245f3abfSJeff Roberson #include <sys/resource.h> 429bacd788SJeff Roberson #include <sys/resourcevar.h> 4335e6168fSJeff Roberson #include <sys/sched.h> 4435e6168fSJeff Roberson #include <sys/smp.h> 4535e6168fSJeff Roberson #include <sys/sx.h> 4635e6168fSJeff Roberson #include <sys/sysctl.h> 4735e6168fSJeff Roberson #include <sys/sysproto.h> 48f5c157d9SJohn Baldwin #include <sys/turnstile.h> 493db720fdSDavid Xu #include <sys/umtx.h> 5035e6168fSJeff Roberson #include <sys/vmmeter.h> 5135e6168fSJeff Roberson #ifdef KTRACE 5235e6168fSJeff Roberson #include <sys/uio.h> 5335e6168fSJeff Roberson #include <sys/ktrace.h> 5435e6168fSJeff Roberson #endif 5535e6168fSJeff Roberson 56ebccf1e3SJoseph Koshy #ifdef HWPMC_HOOKS 57ebccf1e3SJoseph Koshy #include <sys/pmckern.h> 58ebccf1e3SJoseph Koshy #endif 59ebccf1e3SJoseph Koshy 6035e6168fSJeff Roberson #include <machine/cpu.h> 6122bf7d9aSJeff Roberson #include <machine/smp.h> 6235e6168fSJeff Roberson 6335e6168fSJeff Roberson /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 6435e6168fSJeff Roberson /* XXX This is bogus compatability crap for ps */ 6535e6168fSJeff Roberson static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 6635e6168fSJeff Roberson SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 6735e6168fSJeff Roberson 6835e6168fSJeff Roberson static void sched_setup(void *dummy); 6935e6168fSJeff Roberson SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 7035e6168fSJeff Roberson 71a1d4fe69SDavid Xu static void sched_initticks(void *dummy); 72a1d4fe69SDavid Xu SYSINIT(sched_initticks, SI_SUB_CLOCKS, SI_ORDER_THIRD, sched_initticks, NULL) 73a1d4fe69SDavid Xu 74e038d354SScott Long static SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0, "Scheduler"); 75e1f89c22SJeff Roberson 76e038d354SScott Long SYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "ule", 0, 77e038d354SScott Long "Scheduler name"); 78dc095794SScott Long 7915dc847eSJeff Roberson static int slice_min = 1; 8015dc847eSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, slice_min, CTLFLAG_RW, &slice_min, 0, ""); 8115dc847eSJeff Roberson 82210491d3SJeff Roberson static int slice_max = 10; 8315dc847eSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, slice_max, CTLFLAG_RW, &slice_max, 0, ""); 8415dc847eSJeff Roberson 8515dc847eSJeff Roberson int realstathz; 86a1d4fe69SDavid Xu int tickincr = 1 << 10; 87783caefbSJeff Roberson 8835e6168fSJeff Roberson /* 8921381d1bSJeff Roberson * The following datastructures are allocated within their parent structure 9021381d1bSJeff Roberson * but are scheduler specific. 9121381d1bSJeff Roberson */ 9221381d1bSJeff Roberson /* 93ad1e7d28SJulian Elischer * Thread scheduler specific section. 94ad1e7d28SJulian Elischer * fields int he thread structure that are specific to this scheduler. 95ed062c8dSJulian Elischer */ 96ad1e7d28SJulian Elischer struct td_sched { 97ad1e7d28SJulian Elischer TAILQ_ENTRY(td_sched) ts_procq; /* (j/z) Run queue. */ 98ad1e7d28SJulian Elischer int ts_flags; /* (j) TSF_* flags. */ 99ad1e7d28SJulian Elischer struct thread *ts_thread; /* (*) Active associated thread. */ 100ad1e7d28SJulian Elischer fixpt_t ts_pctcpu; /* (j) %cpu during p_swtime. */ 101ad1e7d28SJulian Elischer u_char ts_rqindex; /* (j) Run queue index. */ 102ed062c8dSJulian Elischer enum { 103ad1e7d28SJulian Elischer TSS_THREAD = 0x0, /* slaved to thread state */ 104ad1e7d28SJulian Elischer TSS_ONRUNQ 105ad1e7d28SJulian Elischer } ts_state; /* (j) thread sched specific status. */ 106ad1e7d28SJulian Elischer int ts_slptime; 107ad1e7d28SJulian Elischer int ts_slice; 108ad1e7d28SJulian Elischer struct runq *ts_runq; 109ad1e7d28SJulian Elischer u_char ts_cpu; /* CPU that we have affinity for. */ 110ed062c8dSJulian Elischer /* The following variables are only used for pctcpu calculation */ 111ad1e7d28SJulian Elischer int ts_ltick; /* Last tick that we were running on */ 112ad1e7d28SJulian Elischer int ts_ftick; /* First tick that we were running on */ 113ad1e7d28SJulian Elischer int ts_ticks; /* Tick count */ 114ed062c8dSJulian Elischer 1158460a577SJohn Birrell /* originally from kg_sched */ 1168460a577SJohn Birrell int skg_slptime; /* Number of ticks we vol. slept */ 1178460a577SJohn Birrell int skg_runtime; /* Number of ticks we were running */ 118ed062c8dSJulian Elischer }; 119ad1e7d28SJulian Elischer #define ts_assign ts_procq.tqe_next 120ad1e7d28SJulian Elischer /* flags kept in ts_flags */ 121ad1e7d28SJulian Elischer #define TSF_ASSIGNED 0x0001 /* Thread is being migrated. */ 122ad1e7d28SJulian Elischer #define TSF_BOUND 0x0002 /* Thread can not migrate. */ 123ad1e7d28SJulian Elischer #define TSF_XFERABLE 0x0004 /* Thread was added as transferable. */ 124ad1e7d28SJulian Elischer #define TSF_HOLD 0x0008 /* Thread is temporarily bound. */ 125ad1e7d28SJulian Elischer #define TSF_REMOVED 0x0010 /* Thread was removed while ASSIGNED */ 126ad1e7d28SJulian Elischer #define TSF_INTERNAL 0x0020 /* Thread added due to migration. */ 127ad1e7d28SJulian Elischer #define TSF_PREEMPTED 0x0040 /* Thread was preempted */ 128ad1e7d28SJulian Elischer #define TSF_DIDRUN 0x02000 /* Thread actually ran. */ 129ad1e7d28SJulian Elischer #define TSF_EXIT 0x04000 /* Thread is being killed. */ 13035e6168fSJeff Roberson 131ad1e7d28SJulian Elischer static struct td_sched td_sched0; 13235e6168fSJeff Roberson 13335e6168fSJeff Roberson /* 134665cb285SJeff Roberson * The priority is primarily determined by the interactivity score. Thus, we 135665cb285SJeff Roberson * give lower(better) priorities to kse groups that use less CPU. The nice 136665cb285SJeff Roberson * value is then directly added to this to allow nice to have some effect 137665cb285SJeff Roberson * on latency. 138e1f89c22SJeff Roberson * 139e1f89c22SJeff Roberson * PRI_RANGE: Total priority range for timeshare threads. 140665cb285SJeff Roberson * PRI_NRESV: Number of nice values. 141e1f89c22SJeff Roberson * PRI_BASE: The start of the dynamic range. 14235e6168fSJeff Roberson */ 143407b0157SJeff Roberson #define SCHED_PRI_RANGE (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1) 144a0a931ceSJeff Roberson #define SCHED_PRI_NRESV ((PRIO_MAX - PRIO_MIN) + 1) 145a0a931ceSJeff Roberson #define SCHED_PRI_NHALF (SCHED_PRI_NRESV / 2) 146665cb285SJeff Roberson #define SCHED_PRI_BASE (PRI_MIN_TIMESHARE) 14715dc847eSJeff Roberson #define SCHED_PRI_INTERACT(score) \ 148665cb285SJeff Roberson ((score) * SCHED_PRI_RANGE / SCHED_INTERACT_MAX) 14935e6168fSJeff Roberson 15035e6168fSJeff Roberson /* 151e1f89c22SJeff Roberson * These determine the interactivity of a process. 15235e6168fSJeff Roberson * 153407b0157SJeff Roberson * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate 154407b0157SJeff Roberson * before throttling back. 155d322132cSJeff Roberson * SLP_RUN_FORK: Maximum slp+run time to inherit at fork time. 156210491d3SJeff Roberson * INTERACT_MAX: Maximum interactivity value. Smaller is better. 157e1f89c22SJeff Roberson * INTERACT_THRESH: Threshhold for placement on the current runq. 15835e6168fSJeff Roberson */ 1594c9612c6SJeff Roberson #define SCHED_SLP_RUN_MAX ((hz * 5) << 10) 160d322132cSJeff Roberson #define SCHED_SLP_RUN_FORK ((hz / 2) << 10) 161210491d3SJeff Roberson #define SCHED_INTERACT_MAX (100) 162210491d3SJeff Roberson #define SCHED_INTERACT_HALF (SCHED_INTERACT_MAX / 2) 1634c9612c6SJeff Roberson #define SCHED_INTERACT_THRESH (30) 164e1f89c22SJeff Roberson 16535e6168fSJeff Roberson /* 16635e6168fSJeff Roberson * These parameters and macros determine the size of the time slice that is 16735e6168fSJeff Roberson * granted to each thread. 16835e6168fSJeff Roberson * 16935e6168fSJeff Roberson * SLICE_MIN: Minimum time slice granted, in units of ticks. 17035e6168fSJeff Roberson * SLICE_MAX: Maximum time slice granted. 17135e6168fSJeff Roberson * SLICE_RANGE: Range of available time slices scaled by hz. 172245f3abfSJeff Roberson * SLICE_SCALE: The number slices granted per val in the range of [0, max]. 173245f3abfSJeff Roberson * SLICE_NICE: Determine the amount of slice granted to a scaled nice. 1747d1a81b4SJeff Roberson * SLICE_NTHRESH: The nice cutoff point for slice assignment. 17535e6168fSJeff Roberson */ 17615dc847eSJeff Roberson #define SCHED_SLICE_MIN (slice_min) 17715dc847eSJeff Roberson #define SCHED_SLICE_MAX (slice_max) 1780392e39dSJeff Roberson #define SCHED_SLICE_INTERACTIVE (slice_max) 1797d1a81b4SJeff Roberson #define SCHED_SLICE_NTHRESH (SCHED_PRI_NHALF - 1) 18035e6168fSJeff Roberson #define SCHED_SLICE_RANGE (SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1) 18135e6168fSJeff Roberson #define SCHED_SLICE_SCALE(val, max) (((val) * SCHED_SLICE_RANGE) / (max)) 182245f3abfSJeff Roberson #define SCHED_SLICE_NICE(nice) \ 1837d1a81b4SJeff Roberson (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((nice), SCHED_SLICE_NTHRESH)) 18435e6168fSJeff Roberson 18535e6168fSJeff Roberson /* 186ed062c8dSJulian Elischer * This macro determines whether or not the thread belongs on the current or 18735e6168fSJeff Roberson * next run queue. 18835e6168fSJeff Roberson */ 1898460a577SJohn Birrell #define SCHED_INTERACTIVE(td) \ 1908460a577SJohn Birrell (sched_interact_score(td) < SCHED_INTERACT_THRESH) 191ad1e7d28SJulian Elischer #define SCHED_CURR(td, ts) \ 192ad1e7d28SJulian Elischer ((ts->ts_thread->td_flags & TDF_BORROWING) || \ 193ad1e7d28SJulian Elischer (ts->ts_flags & TSF_PREEMPTED) || SCHED_INTERACTIVE(td)) 19435e6168fSJeff Roberson 19535e6168fSJeff Roberson /* 19635e6168fSJeff Roberson * Cpu percentage computation macros and defines. 19735e6168fSJeff Roberson * 19835e6168fSJeff Roberson * SCHED_CPU_TIME: Number of seconds to average the cpu usage across. 19935e6168fSJeff Roberson * SCHED_CPU_TICKS: Number of hz ticks to average the cpu usage across. 20035e6168fSJeff Roberson */ 20135e6168fSJeff Roberson 2025053d272SJeff Roberson #define SCHED_CPU_TIME 10 20335e6168fSJeff Roberson #define SCHED_CPU_TICKS (hz * SCHED_CPU_TIME) 20435e6168fSJeff Roberson 20535e6168fSJeff Roberson /* 206ad1e7d28SJulian Elischer * tdq - per processor runqs and statistics. 20735e6168fSJeff Roberson */ 208ad1e7d28SJulian Elischer struct tdq { 209a8949de2SJeff Roberson struct runq ksq_idle; /* Queue of IDLE threads. */ 21015dc847eSJeff Roberson struct runq ksq_timeshare[2]; /* Run queues for !IDLE. */ 21115dc847eSJeff Roberson struct runq *ksq_next; /* Next timeshare queue. */ 21215dc847eSJeff Roberson struct runq *ksq_curr; /* Current queue. */ 213ef1134c9SJeff Roberson int ksq_load_timeshare; /* Load for timeshare. */ 21415dc847eSJeff Roberson int ksq_load; /* Aggregate load. */ 215ad1e7d28SJulian Elischer short ksq_nice[SCHED_PRI_NRESV]; /* threadss in each nice bin. */ 21615dc847eSJeff Roberson short ksq_nicemin; /* Least nice. */ 2175d7ef00cSJeff Roberson #ifdef SMP 21880f86c9fSJeff Roberson int ksq_transferable; 219ad1e7d28SJulian Elischer LIST_ENTRY(tdq) ksq_siblings; /* Next in tdq group. */ 220ad1e7d28SJulian Elischer struct tdq_group *ksq_group; /* Our processor group. */ 221ad1e7d28SJulian Elischer volatile struct td_sched *ksq_assigned; /* assigned by another CPU. */ 22233916c36SJeff Roberson #else 22333916c36SJeff Roberson int ksq_sysload; /* For loadavg, !ITHD load. */ 2245d7ef00cSJeff Roberson #endif 22535e6168fSJeff Roberson }; 22635e6168fSJeff Roberson 22780f86c9fSJeff Roberson #ifdef SMP 22880f86c9fSJeff Roberson /* 229ad1e7d28SJulian Elischer * tdq groups are groups of processors which can cheaply share threads. When 23080f86c9fSJeff Roberson * one processor in the group goes idle it will check the runqs of the other 23180f86c9fSJeff Roberson * processors in its group prior to halting and waiting for an interrupt. 23280f86c9fSJeff Roberson * These groups are suitable for SMT (Symetric Multi-Threading) and not NUMA. 23380f86c9fSJeff Roberson * In a numa environment we'd want an idle bitmap per group and a two tiered 23480f86c9fSJeff Roberson * load balancer. 23580f86c9fSJeff Roberson */ 236ad1e7d28SJulian Elischer struct tdq_group { 237ad1e7d28SJulian Elischer int ksg_cpus; /* Count of CPUs in this tdq group. */ 238b2ae7ed7SMarcel Moolenaar cpumask_t ksg_cpumask; /* Mask of cpus in this group. */ 239b2ae7ed7SMarcel Moolenaar cpumask_t ksg_idlemask; /* Idle cpus in this group. */ 240b2ae7ed7SMarcel Moolenaar cpumask_t ksg_mask; /* Bit mask for first cpu. */ 241cac77d04SJeff Roberson int ksg_load; /* Total load of this group. */ 24280f86c9fSJeff Roberson int ksg_transferable; /* Transferable load of this group. */ 243ad1e7d28SJulian Elischer LIST_HEAD(, tdq) ksg_members; /* Linked list of all members. */ 24480f86c9fSJeff Roberson }; 24580f86c9fSJeff Roberson #endif 24680f86c9fSJeff Roberson 24735e6168fSJeff Roberson /* 24835e6168fSJeff Roberson * One kse queue per processor. 24935e6168fSJeff Roberson */ 2500a016a05SJeff Roberson #ifdef SMP 251ad1e7d28SJulian Elischer static cpumask_t tdq_idle; 252cac77d04SJeff Roberson static int ksg_maxid; 253ad1e7d28SJulian Elischer static struct tdq tdq_cpu[MAXCPU]; 254ad1e7d28SJulian Elischer static struct tdq_group tdq_groups[MAXCPU]; 255dc03363dSJeff Roberson static int bal_tick; 256dc03363dSJeff Roberson static int gbal_tick; 257598b368dSJeff Roberson static int balance_groups; 258dc03363dSJeff Roberson 259ad1e7d28SJulian Elischer #define TDQ_SELF() (&tdq_cpu[PCPU_GET(cpuid)]) 260ad1e7d28SJulian Elischer #define TDQ_CPU(x) (&tdq_cpu[(x)]) 261ad1e7d28SJulian Elischer #define TDQ_ID(x) ((x) - tdq_cpu) 262ad1e7d28SJulian Elischer #define TDQ_GROUP(x) (&tdq_groups[(x)]) 26380f86c9fSJeff Roberson #else /* !SMP */ 264ad1e7d28SJulian Elischer static struct tdq tdq_cpu; 265dc03363dSJeff Roberson 266ad1e7d28SJulian Elischer #define TDQ_SELF() (&tdq_cpu) 267ad1e7d28SJulian Elischer #define TDQ_CPU(x) (&tdq_cpu) 2680a016a05SJeff Roberson #endif 26935e6168fSJeff Roberson 270ad1e7d28SJulian Elischer static struct td_sched *sched_choose(void); /* XXX Should be thread * */ 271ad1e7d28SJulian Elischer static void sched_slice(struct td_sched *); 2728460a577SJohn Birrell static void sched_priority(struct thread *); 27321381d1bSJeff Roberson static void sched_thread_priority(struct thread *, u_char); 2748460a577SJohn Birrell static int sched_interact_score(struct thread *); 2758460a577SJohn Birrell static void sched_interact_update(struct thread *); 2768460a577SJohn Birrell static void sched_interact_fork(struct thread *); 277ad1e7d28SJulian Elischer static void sched_pctcpu_update(struct td_sched *); 27835e6168fSJeff Roberson 2795d7ef00cSJeff Roberson /* Operations on per processor queues */ 280ad1e7d28SJulian Elischer static struct td_sched * tdq_choose(struct tdq *); 281ad1e7d28SJulian Elischer static void tdq_setup(struct tdq *); 282ad1e7d28SJulian Elischer static void tdq_load_add(struct tdq *, struct td_sched *); 283ad1e7d28SJulian Elischer static void tdq_load_rem(struct tdq *, struct td_sched *); 284ad1e7d28SJulian Elischer static __inline void tdq_runq_add(struct tdq *, struct td_sched *, int); 285ad1e7d28SJulian Elischer static __inline void tdq_runq_rem(struct tdq *, struct td_sched *); 286ad1e7d28SJulian Elischer static void tdq_nice_add(struct tdq *, int); 287ad1e7d28SJulian Elischer static void tdq_nice_rem(struct tdq *, int); 288ad1e7d28SJulian Elischer void tdq_print(int cpu); 2895d7ef00cSJeff Roberson #ifdef SMP 290ad1e7d28SJulian Elischer static int tdq_transfer(struct tdq *, struct td_sched *, int); 291ad1e7d28SJulian Elischer static struct td_sched *runq_steal(struct runq *); 292dc03363dSJeff Roberson static void sched_balance(void); 293dc03363dSJeff Roberson static void sched_balance_groups(void); 294ad1e7d28SJulian Elischer static void sched_balance_group(struct tdq_group *); 295ad1e7d28SJulian Elischer static void sched_balance_pair(struct tdq *, struct tdq *); 296ad1e7d28SJulian Elischer static void tdq_move(struct tdq *, int); 297ad1e7d28SJulian Elischer static int tdq_idled(struct tdq *); 298ad1e7d28SJulian Elischer static void tdq_notify(struct td_sched *, int); 299ad1e7d28SJulian Elischer static void tdq_assign(struct tdq *); 300ad1e7d28SJulian Elischer static struct td_sched *tdq_steal(struct tdq *, int); 301ad1e7d28SJulian Elischer #define THREAD_CAN_MIGRATE(ts) \ 302ad1e7d28SJulian Elischer ((ts)->ts_thread->td_pinned == 0 && ((ts)->ts_flags & TSF_BOUND) == 0) 3035d7ef00cSJeff Roberson #endif 3045d7ef00cSJeff Roberson 30515dc847eSJeff Roberson void 306ad1e7d28SJulian Elischer tdq_print(int cpu) 30715dc847eSJeff Roberson { 308ad1e7d28SJulian Elischer struct tdq *tdq; 30915dc847eSJeff Roberson int i; 31015dc847eSJeff Roberson 311ad1e7d28SJulian Elischer tdq = TDQ_CPU(cpu); 31215dc847eSJeff Roberson 313ad1e7d28SJulian Elischer printf("tdq:\n"); 314ad1e7d28SJulian Elischer printf("\tload: %d\n", tdq->ksq_load); 315ad1e7d28SJulian Elischer printf("\tload TIMESHARE: %d\n", tdq->ksq_load_timeshare); 316ef1134c9SJeff Roberson #ifdef SMP 317ad1e7d28SJulian Elischer printf("\tload transferable: %d\n", tdq->ksq_transferable); 318ef1134c9SJeff Roberson #endif 319ad1e7d28SJulian Elischer printf("\tnicemin:\t%d\n", tdq->ksq_nicemin); 32015dc847eSJeff Roberson printf("\tnice counts:\n"); 321a0a931ceSJeff Roberson for (i = 0; i < SCHED_PRI_NRESV; i++) 322ad1e7d28SJulian Elischer if (tdq->ksq_nice[i]) 32315dc847eSJeff Roberson printf("\t\t%d = %d\n", 324ad1e7d28SJulian Elischer i - SCHED_PRI_NHALF, tdq->ksq_nice[i]); 32515dc847eSJeff Roberson } 32615dc847eSJeff Roberson 327155b9987SJeff Roberson static __inline void 328ad1e7d28SJulian Elischer tdq_runq_add(struct tdq *tdq, struct td_sched *ts, int flags) 329155b9987SJeff Roberson { 330155b9987SJeff Roberson #ifdef SMP 331ad1e7d28SJulian Elischer if (THREAD_CAN_MIGRATE(ts)) { 332ad1e7d28SJulian Elischer tdq->ksq_transferable++; 333ad1e7d28SJulian Elischer tdq->ksq_group->ksg_transferable++; 334ad1e7d28SJulian Elischer ts->ts_flags |= TSF_XFERABLE; 33580f86c9fSJeff Roberson } 336155b9987SJeff Roberson #endif 337ad1e7d28SJulian Elischer if (ts->ts_flags & TSF_PREEMPTED) 3381278181cSDavid Xu flags |= SRQ_PREEMPTED; 339ad1e7d28SJulian Elischer runq_add(ts->ts_runq, ts, flags); 340155b9987SJeff Roberson } 341155b9987SJeff Roberson 342155b9987SJeff Roberson static __inline void 343ad1e7d28SJulian Elischer tdq_runq_rem(struct tdq *tdq, struct td_sched *ts) 344155b9987SJeff Roberson { 345155b9987SJeff Roberson #ifdef SMP 346ad1e7d28SJulian Elischer if (ts->ts_flags & TSF_XFERABLE) { 347ad1e7d28SJulian Elischer tdq->ksq_transferable--; 348ad1e7d28SJulian Elischer tdq->ksq_group->ksg_transferable--; 349ad1e7d28SJulian Elischer ts->ts_flags &= ~TSF_XFERABLE; 35080f86c9fSJeff Roberson } 351155b9987SJeff Roberson #endif 352ad1e7d28SJulian Elischer runq_remove(ts->ts_runq, ts); 353155b9987SJeff Roberson } 354155b9987SJeff Roberson 355a8949de2SJeff Roberson static void 356ad1e7d28SJulian Elischer tdq_load_add(struct tdq *tdq, struct td_sched *ts) 3575d7ef00cSJeff Roberson { 358ef1134c9SJeff Roberson int class; 359b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 360ad1e7d28SJulian Elischer class = PRI_BASE(ts->ts_thread->td_pri_class); 361ef1134c9SJeff Roberson if (class == PRI_TIMESHARE) 362ad1e7d28SJulian Elischer tdq->ksq_load_timeshare++; 363ad1e7d28SJulian Elischer tdq->ksq_load++; 364ad1e7d28SJulian Elischer CTR1(KTR_SCHED, "load: %d", tdq->ksq_load); 365ad1e7d28SJulian Elischer if (class != PRI_ITHD && (ts->ts_thread->td_proc->p_flag & P_NOLOAD) == 0) 36633916c36SJeff Roberson #ifdef SMP 367ad1e7d28SJulian Elischer tdq->ksq_group->ksg_load++; 36833916c36SJeff Roberson #else 369ad1e7d28SJulian Elischer tdq->ksq_sysload++; 370cac77d04SJeff Roberson #endif 371ad1e7d28SJulian Elischer if (ts->ts_thread->td_pri_class == PRI_TIMESHARE) 372ad1e7d28SJulian Elischer tdq_nice_add(tdq, ts->ts_thread->td_proc->p_nice); 3735d7ef00cSJeff Roberson } 37415dc847eSJeff Roberson 375a8949de2SJeff Roberson static void 376ad1e7d28SJulian Elischer tdq_load_rem(struct tdq *tdq, struct td_sched *ts) 3775d7ef00cSJeff Roberson { 378ef1134c9SJeff Roberson int class; 379b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 380ad1e7d28SJulian Elischer class = PRI_BASE(ts->ts_thread->td_pri_class); 381ef1134c9SJeff Roberson if (class == PRI_TIMESHARE) 382ad1e7d28SJulian Elischer tdq->ksq_load_timeshare--; 383ad1e7d28SJulian Elischer if (class != PRI_ITHD && (ts->ts_thread->td_proc->p_flag & P_NOLOAD) == 0) 38433916c36SJeff Roberson #ifdef SMP 385ad1e7d28SJulian Elischer tdq->ksq_group->ksg_load--; 38633916c36SJeff Roberson #else 387ad1e7d28SJulian Elischer tdq->ksq_sysload--; 388cac77d04SJeff Roberson #endif 389ad1e7d28SJulian Elischer tdq->ksq_load--; 390ad1e7d28SJulian Elischer CTR1(KTR_SCHED, "load: %d", tdq->ksq_load); 391ad1e7d28SJulian Elischer ts->ts_runq = NULL; 392ad1e7d28SJulian Elischer if (ts->ts_thread->td_pri_class == PRI_TIMESHARE) 393ad1e7d28SJulian Elischer tdq_nice_rem(tdq, ts->ts_thread->td_proc->p_nice); 3945d7ef00cSJeff Roberson } 3955d7ef00cSJeff Roberson 39615dc847eSJeff Roberson static void 397ad1e7d28SJulian Elischer tdq_nice_add(struct tdq *tdq, int nice) 39815dc847eSJeff Roberson { 399b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 40015dc847eSJeff Roberson /* Normalize to zero. */ 401ad1e7d28SJulian Elischer tdq->ksq_nice[nice + SCHED_PRI_NHALF]++; 402ad1e7d28SJulian Elischer if (nice < tdq->ksq_nicemin || tdq->ksq_load_timeshare == 1) 403ad1e7d28SJulian Elischer tdq->ksq_nicemin = nice; 40415dc847eSJeff Roberson } 40515dc847eSJeff Roberson 40615dc847eSJeff Roberson static void 407ad1e7d28SJulian Elischer tdq_nice_rem(struct tdq *tdq, int nice) 40815dc847eSJeff Roberson { 40915dc847eSJeff Roberson int n; 41015dc847eSJeff Roberson 411b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 41215dc847eSJeff Roberson /* Normalize to zero. */ 41315dc847eSJeff Roberson n = nice + SCHED_PRI_NHALF; 414ad1e7d28SJulian Elischer tdq->ksq_nice[n]--; 415ad1e7d28SJulian Elischer KASSERT(tdq->ksq_nice[n] >= 0, ("Negative nice count.")); 41615dc847eSJeff Roberson 41715dc847eSJeff Roberson /* 41815dc847eSJeff Roberson * If this wasn't the smallest nice value or there are more in 41915dc847eSJeff Roberson * this bucket we can just return. Otherwise we have to recalculate 42015dc847eSJeff Roberson * the smallest nice. 42115dc847eSJeff Roberson */ 422ad1e7d28SJulian Elischer if (nice != tdq->ksq_nicemin || 423ad1e7d28SJulian Elischer tdq->ksq_nice[n] != 0 || 424ad1e7d28SJulian Elischer tdq->ksq_load_timeshare == 0) 42515dc847eSJeff Roberson return; 42615dc847eSJeff Roberson 427a0a931ceSJeff Roberson for (; n < SCHED_PRI_NRESV; n++) 428ad1e7d28SJulian Elischer if (tdq->ksq_nice[n]) { 429ad1e7d28SJulian Elischer tdq->ksq_nicemin = n - SCHED_PRI_NHALF; 43015dc847eSJeff Roberson return; 43115dc847eSJeff Roberson } 43215dc847eSJeff Roberson } 43315dc847eSJeff Roberson 4345d7ef00cSJeff Roberson #ifdef SMP 435356500a3SJeff Roberson /* 436155b9987SJeff Roberson * sched_balance is a simple CPU load balancing algorithm. It operates by 437356500a3SJeff Roberson * finding the least loaded and most loaded cpu and equalizing their load 438356500a3SJeff Roberson * by migrating some processes. 439356500a3SJeff Roberson * 440356500a3SJeff Roberson * Dealing only with two CPUs at a time has two advantages. Firstly, most 441356500a3SJeff Roberson * installations will only have 2 cpus. Secondly, load balancing too much at 442356500a3SJeff Roberson * once can have an unpleasant effect on the system. The scheduler rarely has 443356500a3SJeff Roberson * enough information to make perfect decisions. So this algorithm chooses 444356500a3SJeff Roberson * algorithm simplicity and more gradual effects on load in larger systems. 445356500a3SJeff Roberson * 446356500a3SJeff Roberson * It could be improved by considering the priorities and slices assigned to 447356500a3SJeff Roberson * each task prior to balancing them. There are many pathological cases with 448356500a3SJeff Roberson * any approach and so the semi random algorithm below may work as well as any. 449356500a3SJeff Roberson * 450356500a3SJeff Roberson */ 45122bf7d9aSJeff Roberson static void 452dc03363dSJeff Roberson sched_balance(void) 453356500a3SJeff Roberson { 454ad1e7d28SJulian Elischer struct tdq_group *high; 455ad1e7d28SJulian Elischer struct tdq_group *low; 456ad1e7d28SJulian Elischer struct tdq_group *ksg; 457cac77d04SJeff Roberson int cnt; 458356500a3SJeff Roberson int i; 459356500a3SJeff Roberson 460598b368dSJeff Roberson bal_tick = ticks + (random() % (hz * 2)); 46186f8ae96SJeff Roberson if (smp_started == 0) 462598b368dSJeff Roberson return; 463cac77d04SJeff Roberson low = high = NULL; 464cac77d04SJeff Roberson i = random() % (ksg_maxid + 1); 465cac77d04SJeff Roberson for (cnt = 0; cnt <= ksg_maxid; cnt++) { 466ad1e7d28SJulian Elischer ksg = TDQ_GROUP(i); 467cac77d04SJeff Roberson /* 468cac77d04SJeff Roberson * Find the CPU with the highest load that has some 469cac77d04SJeff Roberson * threads to transfer. 470cac77d04SJeff Roberson */ 471cac77d04SJeff Roberson if ((high == NULL || ksg->ksg_load > high->ksg_load) 472cac77d04SJeff Roberson && ksg->ksg_transferable) 473cac77d04SJeff Roberson high = ksg; 474cac77d04SJeff Roberson if (low == NULL || ksg->ksg_load < low->ksg_load) 475cac77d04SJeff Roberson low = ksg; 476cac77d04SJeff Roberson if (++i > ksg_maxid) 477cac77d04SJeff Roberson i = 0; 478cac77d04SJeff Roberson } 479cac77d04SJeff Roberson if (low != NULL && high != NULL && high != low) 480cac77d04SJeff Roberson sched_balance_pair(LIST_FIRST(&high->ksg_members), 481cac77d04SJeff Roberson LIST_FIRST(&low->ksg_members)); 482cac77d04SJeff Roberson } 48386f8ae96SJeff Roberson 484cac77d04SJeff Roberson static void 485dc03363dSJeff Roberson sched_balance_groups(void) 486cac77d04SJeff Roberson { 487cac77d04SJeff Roberson int i; 488cac77d04SJeff Roberson 489598b368dSJeff Roberson gbal_tick = ticks + (random() % (hz * 2)); 490dc03363dSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 491cac77d04SJeff Roberson if (smp_started) 492cac77d04SJeff Roberson for (i = 0; i <= ksg_maxid; i++) 493ad1e7d28SJulian Elischer sched_balance_group(TDQ_GROUP(i)); 494356500a3SJeff Roberson } 495cac77d04SJeff Roberson 496cac77d04SJeff Roberson static void 497ad1e7d28SJulian Elischer sched_balance_group(struct tdq_group *ksg) 498cac77d04SJeff Roberson { 499ad1e7d28SJulian Elischer struct tdq *tdq; 500ad1e7d28SJulian Elischer struct tdq *high; 501ad1e7d28SJulian Elischer struct tdq *low; 502cac77d04SJeff Roberson int load; 503cac77d04SJeff Roberson 504cac77d04SJeff Roberson if (ksg->ksg_transferable == 0) 505cac77d04SJeff Roberson return; 506cac77d04SJeff Roberson low = NULL; 507cac77d04SJeff Roberson high = NULL; 508ad1e7d28SJulian Elischer LIST_FOREACH(tdq, &ksg->ksg_members, ksq_siblings) { 509ad1e7d28SJulian Elischer load = tdq->ksq_load; 510cac77d04SJeff Roberson if (high == NULL || load > high->ksq_load) 511ad1e7d28SJulian Elischer high = tdq; 512cac77d04SJeff Roberson if (low == NULL || load < low->ksq_load) 513ad1e7d28SJulian Elischer low = tdq; 514356500a3SJeff Roberson } 515cac77d04SJeff Roberson if (high != NULL && low != NULL && high != low) 516cac77d04SJeff Roberson sched_balance_pair(high, low); 517356500a3SJeff Roberson } 518cac77d04SJeff Roberson 519cac77d04SJeff Roberson static void 520ad1e7d28SJulian Elischer sched_balance_pair(struct tdq *high, struct tdq *low) 521cac77d04SJeff Roberson { 522cac77d04SJeff Roberson int transferable; 523cac77d04SJeff Roberson int high_load; 524cac77d04SJeff Roberson int low_load; 525cac77d04SJeff Roberson int move; 526cac77d04SJeff Roberson int diff; 527cac77d04SJeff Roberson int i; 528cac77d04SJeff Roberson 52980f86c9fSJeff Roberson /* 53080f86c9fSJeff Roberson * If we're transfering within a group we have to use this specific 531ad1e7d28SJulian Elischer * tdq's transferable count, otherwise we can steal from other members 53280f86c9fSJeff Roberson * of the group. 53380f86c9fSJeff Roberson */ 534cac77d04SJeff Roberson if (high->ksq_group == low->ksq_group) { 535cac77d04SJeff Roberson transferable = high->ksq_transferable; 536cac77d04SJeff Roberson high_load = high->ksq_load; 537cac77d04SJeff Roberson low_load = low->ksq_load; 538cac77d04SJeff Roberson } else { 539cac77d04SJeff Roberson transferable = high->ksq_group->ksg_transferable; 540cac77d04SJeff Roberson high_load = high->ksq_group->ksg_load; 541cac77d04SJeff Roberson low_load = low->ksq_group->ksg_load; 542cac77d04SJeff Roberson } 54380f86c9fSJeff Roberson if (transferable == 0) 544cac77d04SJeff Roberson return; 545155b9987SJeff Roberson /* 546155b9987SJeff Roberson * Determine what the imbalance is and then adjust that to how many 54780f86c9fSJeff Roberson * kses we actually have to give up (transferable). 548155b9987SJeff Roberson */ 549cac77d04SJeff Roberson diff = high_load - low_load; 550356500a3SJeff Roberson move = diff / 2; 551356500a3SJeff Roberson if (diff & 0x1) 552356500a3SJeff Roberson move++; 55380f86c9fSJeff Roberson move = min(move, transferable); 554356500a3SJeff Roberson for (i = 0; i < move; i++) 555ad1e7d28SJulian Elischer tdq_move(high, TDQ_ID(low)); 556356500a3SJeff Roberson return; 557356500a3SJeff Roberson } 558356500a3SJeff Roberson 55922bf7d9aSJeff Roberson static void 560ad1e7d28SJulian Elischer tdq_move(struct tdq *from, int cpu) 561356500a3SJeff Roberson { 562ad1e7d28SJulian Elischer struct tdq *tdq; 563ad1e7d28SJulian Elischer struct tdq *to; 564ad1e7d28SJulian Elischer struct td_sched *ts; 565356500a3SJeff Roberson 566ad1e7d28SJulian Elischer tdq = from; 567ad1e7d28SJulian Elischer to = TDQ_CPU(cpu); 568ad1e7d28SJulian Elischer ts = tdq_steal(tdq, 1); 569ad1e7d28SJulian Elischer if (ts == NULL) { 570ad1e7d28SJulian Elischer struct tdq_group *ksg; 57180f86c9fSJeff Roberson 572ad1e7d28SJulian Elischer ksg = tdq->ksq_group; 573ad1e7d28SJulian Elischer LIST_FOREACH(tdq, &ksg->ksg_members, ksq_siblings) { 574ad1e7d28SJulian Elischer if (tdq == from || tdq->ksq_transferable == 0) 57580f86c9fSJeff Roberson continue; 576ad1e7d28SJulian Elischer ts = tdq_steal(tdq, 1); 57780f86c9fSJeff Roberson break; 57880f86c9fSJeff Roberson } 579ad1e7d28SJulian Elischer if (ts == NULL) 580ad1e7d28SJulian Elischer panic("tdq_move: No threads available with a " 58180f86c9fSJeff Roberson "transferable count of %d\n", 58280f86c9fSJeff Roberson ksg->ksg_transferable); 58380f86c9fSJeff Roberson } 584ad1e7d28SJulian Elischer if (tdq == to) 58580f86c9fSJeff Roberson return; 586ad1e7d28SJulian Elischer ts->ts_state = TSS_THREAD; 587ad1e7d28SJulian Elischer tdq_runq_rem(tdq, ts); 588ad1e7d28SJulian Elischer tdq_load_rem(tdq, ts); 589ad1e7d28SJulian Elischer tdq_notify(ts, cpu); 590356500a3SJeff Roberson } 59122bf7d9aSJeff Roberson 59280f86c9fSJeff Roberson static int 593ad1e7d28SJulian Elischer tdq_idled(struct tdq *tdq) 59422bf7d9aSJeff Roberson { 595ad1e7d28SJulian Elischer struct tdq_group *ksg; 596ad1e7d28SJulian Elischer struct tdq *steal; 597ad1e7d28SJulian Elischer struct td_sched *ts; 59880f86c9fSJeff Roberson 599ad1e7d28SJulian Elischer ksg = tdq->ksq_group; 60080f86c9fSJeff Roberson /* 60180f86c9fSJeff Roberson * If we're in a cpu group, try and steal kses from another cpu in 60280f86c9fSJeff Roberson * the group before idling. 60380f86c9fSJeff Roberson */ 60480f86c9fSJeff Roberson if (ksg->ksg_cpus > 1 && ksg->ksg_transferable) { 60580f86c9fSJeff Roberson LIST_FOREACH(steal, &ksg->ksg_members, ksq_siblings) { 606ad1e7d28SJulian Elischer if (steal == tdq || steal->ksq_transferable == 0) 60780f86c9fSJeff Roberson continue; 608ad1e7d28SJulian Elischer ts = tdq_steal(steal, 0); 609ad1e7d28SJulian Elischer if (ts == NULL) 61080f86c9fSJeff Roberson continue; 611ad1e7d28SJulian Elischer ts->ts_state = TSS_THREAD; 612ad1e7d28SJulian Elischer tdq_runq_rem(steal, ts); 613ad1e7d28SJulian Elischer tdq_load_rem(steal, ts); 614ad1e7d28SJulian Elischer ts->ts_cpu = PCPU_GET(cpuid); 615ad1e7d28SJulian Elischer ts->ts_flags |= TSF_INTERNAL | TSF_HOLD; 616ad1e7d28SJulian Elischer sched_add(ts->ts_thread, SRQ_YIELDING); 61780f86c9fSJeff Roberson return (0); 61880f86c9fSJeff Roberson } 61980f86c9fSJeff Roberson } 62080f86c9fSJeff Roberson /* 62180f86c9fSJeff Roberson * We only set the idled bit when all of the cpus in the group are 622ad1e7d28SJulian Elischer * idle. Otherwise we could get into a situation where a thread bounces 62380f86c9fSJeff Roberson * back and forth between two idle cores on seperate physical CPUs. 62480f86c9fSJeff Roberson */ 62580f86c9fSJeff Roberson ksg->ksg_idlemask |= PCPU_GET(cpumask); 62680f86c9fSJeff Roberson if (ksg->ksg_idlemask != ksg->ksg_cpumask) 62780f86c9fSJeff Roberson return (1); 628ad1e7d28SJulian Elischer atomic_set_int(&tdq_idle, ksg->ksg_mask); 62980f86c9fSJeff Roberson return (1); 63022bf7d9aSJeff Roberson } 63122bf7d9aSJeff Roberson 63222bf7d9aSJeff Roberson static void 633ad1e7d28SJulian Elischer tdq_assign(struct tdq *tdq) 63422bf7d9aSJeff Roberson { 635ad1e7d28SJulian Elischer struct td_sched *nts; 636ad1e7d28SJulian Elischer struct td_sched *ts; 63722bf7d9aSJeff Roberson 63822bf7d9aSJeff Roberson do { 639ad1e7d28SJulian Elischer *(volatile struct td_sched **)&ts = tdq->ksq_assigned; 640ad1e7d28SJulian Elischer } while(!atomic_cmpset_ptr((volatile uintptr_t *)&tdq->ksq_assigned, 641ad1e7d28SJulian Elischer (uintptr_t)ts, (uintptr_t)NULL)); 642ad1e7d28SJulian Elischer for (; ts != NULL; ts = nts) { 643ad1e7d28SJulian Elischer nts = ts->ts_assign; 644ad1e7d28SJulian Elischer tdq->ksq_group->ksg_load--; 645ad1e7d28SJulian Elischer tdq->ksq_load--; 646ad1e7d28SJulian Elischer ts->ts_flags &= ~TSF_ASSIGNED; 647ad1e7d28SJulian Elischer if (ts->ts_flags & TSF_REMOVED) { 648ad1e7d28SJulian Elischer ts->ts_flags &= ~TSF_REMOVED; 6493d16f519SDavid Xu continue; 6503d16f519SDavid Xu } 651ad1e7d28SJulian Elischer ts->ts_flags |= TSF_INTERNAL | TSF_HOLD; 652ad1e7d28SJulian Elischer sched_add(ts->ts_thread, SRQ_YIELDING); 65322bf7d9aSJeff Roberson } 65422bf7d9aSJeff Roberson } 65522bf7d9aSJeff Roberson 65622bf7d9aSJeff Roberson static void 657ad1e7d28SJulian Elischer tdq_notify(struct td_sched *ts, int cpu) 65822bf7d9aSJeff Roberson { 659ad1e7d28SJulian Elischer struct tdq *tdq; 66022bf7d9aSJeff Roberson struct thread *td; 66122bf7d9aSJeff Roberson struct pcpu *pcpu; 662598b368dSJeff Roberson int class; 6632454aaf5SJeff Roberson int prio; 66422bf7d9aSJeff Roberson 665ad1e7d28SJulian Elischer tdq = TDQ_CPU(cpu); 666598b368dSJeff Roberson /* XXX */ 667ad1e7d28SJulian Elischer class = PRI_BASE(ts->ts_thread->td_pri_class); 668598b368dSJeff Roberson if ((class == PRI_TIMESHARE || class == PRI_REALTIME) && 669ad1e7d28SJulian Elischer (tdq_idle & tdq->ksq_group->ksg_mask)) 670ad1e7d28SJulian Elischer atomic_clear_int(&tdq_idle, tdq->ksq_group->ksg_mask); 671ad1e7d28SJulian Elischer tdq->ksq_group->ksg_load++; 672ad1e7d28SJulian Elischer tdq->ksq_load++; 673ad1e7d28SJulian Elischer ts->ts_cpu = cpu; 674ad1e7d28SJulian Elischer ts->ts_flags |= TSF_ASSIGNED; 675ad1e7d28SJulian Elischer prio = ts->ts_thread->td_priority; 67622bf7d9aSJeff Roberson 6770c0a98b2SJeff Roberson /* 678ad1e7d28SJulian Elischer * Place a thread on another cpu's queue and force a resched. 67922bf7d9aSJeff Roberson */ 68022bf7d9aSJeff Roberson do { 681ad1e7d28SJulian Elischer *(volatile struct td_sched **)&ts->ts_assign = tdq->ksq_assigned; 682ad1e7d28SJulian Elischer } while(!atomic_cmpset_ptr((volatile uintptr_t *)&tdq->ksq_assigned, 683ad1e7d28SJulian Elischer (uintptr_t)ts->ts_assign, (uintptr_t)ts)); 6842454aaf5SJeff Roberson /* 6852454aaf5SJeff Roberson * Without sched_lock we could lose a race where we set NEEDRESCHED 6862454aaf5SJeff Roberson * on a thread that is switched out before the IPI is delivered. This 6872454aaf5SJeff Roberson * would lead us to miss the resched. This will be a problem once 6882454aaf5SJeff Roberson * sched_lock is pushed down. 6892454aaf5SJeff Roberson */ 69022bf7d9aSJeff Roberson pcpu = pcpu_find(cpu); 69122bf7d9aSJeff Roberson td = pcpu->pc_curthread; 692ad1e7d28SJulian Elischer if (ts->ts_thread->td_priority < td->td_priority || 69322bf7d9aSJeff Roberson td == pcpu->pc_idlethread) { 69422bf7d9aSJeff Roberson td->td_flags |= TDF_NEEDRESCHED; 69522bf7d9aSJeff Roberson ipi_selected(1 << cpu, IPI_AST); 69622bf7d9aSJeff Roberson } 69722bf7d9aSJeff Roberson } 69822bf7d9aSJeff Roberson 699ad1e7d28SJulian Elischer static struct td_sched * 70022bf7d9aSJeff Roberson runq_steal(struct runq *rq) 70122bf7d9aSJeff Roberson { 70222bf7d9aSJeff Roberson struct rqhead *rqh; 70322bf7d9aSJeff Roberson struct rqbits *rqb; 704ad1e7d28SJulian Elischer struct td_sched *ts; 70522bf7d9aSJeff Roberson int word; 70622bf7d9aSJeff Roberson int bit; 70722bf7d9aSJeff Roberson 70822bf7d9aSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 70922bf7d9aSJeff Roberson rqb = &rq->rq_status; 71022bf7d9aSJeff Roberson for (word = 0; word < RQB_LEN; word++) { 71122bf7d9aSJeff Roberson if (rqb->rqb_bits[word] == 0) 71222bf7d9aSJeff Roberson continue; 71322bf7d9aSJeff Roberson for (bit = 0; bit < RQB_BPW; bit++) { 714a2640c9bSPeter Wemm if ((rqb->rqb_bits[word] & (1ul << bit)) == 0) 71522bf7d9aSJeff Roberson continue; 71622bf7d9aSJeff Roberson rqh = &rq->rq_queues[bit + (word << RQB_L2BPW)]; 717ad1e7d28SJulian Elischer TAILQ_FOREACH(ts, rqh, ts_procq) { 718ad1e7d28SJulian Elischer if (THREAD_CAN_MIGRATE(ts)) 719ad1e7d28SJulian Elischer return (ts); 72022bf7d9aSJeff Roberson } 72122bf7d9aSJeff Roberson } 72222bf7d9aSJeff Roberson } 72322bf7d9aSJeff Roberson return (NULL); 72422bf7d9aSJeff Roberson } 72522bf7d9aSJeff Roberson 726ad1e7d28SJulian Elischer static struct td_sched * 727ad1e7d28SJulian Elischer tdq_steal(struct tdq *tdq, int stealidle) 72822bf7d9aSJeff Roberson { 729ad1e7d28SJulian Elischer struct td_sched *ts; 73022bf7d9aSJeff Roberson 73180f86c9fSJeff Roberson /* 73280f86c9fSJeff Roberson * Steal from next first to try to get a non-interactive task that 73380f86c9fSJeff Roberson * may not have run for a while. 73480f86c9fSJeff Roberson */ 735ad1e7d28SJulian Elischer if ((ts = runq_steal(tdq->ksq_next)) != NULL) 736ad1e7d28SJulian Elischer return (ts); 737ad1e7d28SJulian Elischer if ((ts = runq_steal(tdq->ksq_curr)) != NULL) 738ad1e7d28SJulian Elischer return (ts); 73980f86c9fSJeff Roberson if (stealidle) 740ad1e7d28SJulian Elischer return (runq_steal(&tdq->ksq_idle)); 74180f86c9fSJeff Roberson return (NULL); 74222bf7d9aSJeff Roberson } 74380f86c9fSJeff Roberson 74480f86c9fSJeff Roberson int 745ad1e7d28SJulian Elischer tdq_transfer(struct tdq *tdq, struct td_sched *ts, int class) 74680f86c9fSJeff Roberson { 747ad1e7d28SJulian Elischer struct tdq_group *nksg; 748ad1e7d28SJulian Elischer struct tdq_group *ksg; 749ad1e7d28SJulian Elischer struct tdq *old; 75080f86c9fSJeff Roberson int cpu; 751598b368dSJeff Roberson int idx; 75280f86c9fSJeff Roberson 753670c524fSJeff Roberson if (smp_started == 0) 754670c524fSJeff Roberson return (0); 75580f86c9fSJeff Roberson cpu = 0; 75680f86c9fSJeff Roberson /* 7572454aaf5SJeff Roberson * If our load exceeds a certain threshold we should attempt to 7582454aaf5SJeff Roberson * reassign this thread. The first candidate is the cpu that 7592454aaf5SJeff Roberson * originally ran the thread. If it is idle, assign it there, 7602454aaf5SJeff Roberson * otherwise, pick an idle cpu. 7612454aaf5SJeff Roberson * 7622454aaf5SJeff Roberson * The threshold at which we start to reassign kses has a large impact 763670c524fSJeff Roberson * on the overall performance of the system. Tuned too high and 764670c524fSJeff Roberson * some CPUs may idle. Too low and there will be excess migration 765d50c87deSOlivier Houchard * and context switches. 766670c524fSJeff Roberson */ 767ad1e7d28SJulian Elischer old = TDQ_CPU(ts->ts_cpu); 768598b368dSJeff Roberson nksg = old->ksq_group; 769ad1e7d28SJulian Elischer ksg = tdq->ksq_group; 770ad1e7d28SJulian Elischer if (tdq_idle) { 771ad1e7d28SJulian Elischer if (tdq_idle & nksg->ksg_mask) { 772598b368dSJeff Roberson cpu = ffs(nksg->ksg_idlemask); 773598b368dSJeff Roberson if (cpu) { 774598b368dSJeff Roberson CTR2(KTR_SCHED, 775ad1e7d28SJulian Elischer "tdq_transfer: %p found old cpu %X " 776ad1e7d28SJulian Elischer "in idlemask.", ts, cpu); 7772454aaf5SJeff Roberson goto migrate; 7782454aaf5SJeff Roberson } 779598b368dSJeff Roberson } 78080f86c9fSJeff Roberson /* 78180f86c9fSJeff Roberson * Multiple cpus could find this bit simultaneously 78280f86c9fSJeff Roberson * but the race shouldn't be terrible. 78380f86c9fSJeff Roberson */ 784ad1e7d28SJulian Elischer cpu = ffs(tdq_idle); 785598b368dSJeff Roberson if (cpu) { 786ad1e7d28SJulian Elischer CTR2(KTR_SCHED, "tdq_transfer: %p found %X " 787ad1e7d28SJulian Elischer "in idlemask.", ts, cpu); 7882454aaf5SJeff Roberson goto migrate; 78980f86c9fSJeff Roberson } 790598b368dSJeff Roberson } 791598b368dSJeff Roberson idx = 0; 792598b368dSJeff Roberson #if 0 793ad1e7d28SJulian Elischer if (old->ksq_load < tdq->ksq_load) { 794ad1e7d28SJulian Elischer cpu = ts->ts_cpu + 1; 795ad1e7d28SJulian Elischer CTR2(KTR_SCHED, "tdq_transfer: %p old cpu %X " 796ad1e7d28SJulian Elischer "load less than ours.", ts, cpu); 797598b368dSJeff Roberson goto migrate; 798598b368dSJeff Roberson } 799598b368dSJeff Roberson /* 800598b368dSJeff Roberson * No new CPU was found, look for one with less load. 801598b368dSJeff Roberson */ 802598b368dSJeff Roberson for (idx = 0; idx <= ksg_maxid; idx++) { 803ad1e7d28SJulian Elischer nksg = TDQ_GROUP(idx); 804598b368dSJeff Roberson if (nksg->ksg_load /*+ (nksg->ksg_cpus * 2)*/ < ksg->ksg_load) { 805598b368dSJeff Roberson cpu = ffs(nksg->ksg_cpumask); 806ad1e7d28SJulian Elischer CTR2(KTR_SCHED, "tdq_transfer: %p cpu %X load less " 807ad1e7d28SJulian Elischer "than ours.", ts, cpu); 808598b368dSJeff Roberson goto migrate; 809598b368dSJeff Roberson } 810598b368dSJeff Roberson } 811598b368dSJeff Roberson #endif 81280f86c9fSJeff Roberson /* 81380f86c9fSJeff Roberson * If another cpu in this group has idled, assign a thread over 81480f86c9fSJeff Roberson * to them after checking to see if there are idled groups. 81580f86c9fSJeff Roberson */ 8162454aaf5SJeff Roberson if (ksg->ksg_idlemask) { 81780f86c9fSJeff Roberson cpu = ffs(ksg->ksg_idlemask); 818598b368dSJeff Roberson if (cpu) { 819ad1e7d28SJulian Elischer CTR2(KTR_SCHED, "tdq_transfer: %p cpu %X idle in " 820ad1e7d28SJulian Elischer "group.", ts, cpu); 8212454aaf5SJeff Roberson goto migrate; 82280f86c9fSJeff Roberson } 823598b368dSJeff Roberson } 8242454aaf5SJeff Roberson return (0); 8252454aaf5SJeff Roberson migrate: 8262454aaf5SJeff Roberson /* 82780f86c9fSJeff Roberson * Now that we've found an idle CPU, migrate the thread. 82880f86c9fSJeff Roberson */ 82980f86c9fSJeff Roberson cpu--; 830ad1e7d28SJulian Elischer ts->ts_runq = NULL; 831ad1e7d28SJulian Elischer tdq_notify(ts, cpu); 8322454aaf5SJeff Roberson 83380f86c9fSJeff Roberson return (1); 83480f86c9fSJeff Roberson } 83580f86c9fSJeff Roberson 83622bf7d9aSJeff Roberson #endif /* SMP */ 83722bf7d9aSJeff Roberson 83822bf7d9aSJeff Roberson /* 83922bf7d9aSJeff Roberson * Pick the highest priority task we have and return it. 8400c0a98b2SJeff Roberson */ 8410c0a98b2SJeff Roberson 842ad1e7d28SJulian Elischer static struct td_sched * 843ad1e7d28SJulian Elischer tdq_choose(struct tdq *tdq) 8445d7ef00cSJeff Roberson { 8455d7ef00cSJeff Roberson struct runq *swap; 846ad1e7d28SJulian Elischer struct td_sched *ts; 8470516c8ddSJeff Roberson int nice; 8485d7ef00cSJeff Roberson 849b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 85015dc847eSJeff Roberson swap = NULL; 851a8949de2SJeff Roberson 85215dc847eSJeff Roberson for (;;) { 853ad1e7d28SJulian Elischer ts = runq_choose(tdq->ksq_curr); 854ad1e7d28SJulian Elischer if (ts == NULL) { 85515dc847eSJeff Roberson /* 856bf0acc27SJohn Baldwin * We already swapped once and didn't get anywhere. 85715dc847eSJeff Roberson */ 85815dc847eSJeff Roberson if (swap) 85915dc847eSJeff Roberson break; 860ad1e7d28SJulian Elischer swap = tdq->ksq_curr; 861ad1e7d28SJulian Elischer tdq->ksq_curr = tdq->ksq_next; 862ad1e7d28SJulian Elischer tdq->ksq_next = swap; 86315dc847eSJeff Roberson continue; 864a8949de2SJeff Roberson } 86515dc847eSJeff Roberson /* 866ad1e7d28SJulian Elischer * If we encounter a slice of 0 the td_sched is in a 867ad1e7d28SJulian Elischer * TIMESHARE td_sched group and its nice was too far out 86815dc847eSJeff Roberson * of the range that receives slices. 86915dc847eSJeff Roberson */ 870ad1e7d28SJulian Elischer nice = ts->ts_thread->td_proc->p_nice + (0 - tdq->ksq_nicemin); 871a8615740SDavid Xu #if 0 872ad1e7d28SJulian Elischer if (ts->ts_slice == 0 || (nice > SCHED_SLICE_NTHRESH && 873ad1e7d28SJulian Elischer ts->ts_thread->td_proc->p_nice != 0)) { 874ad1e7d28SJulian Elischer runq_remove(ts->ts_runq, ts); 875ad1e7d28SJulian Elischer sched_slice(ts); 876ad1e7d28SJulian Elischer ts->ts_runq = tdq->ksq_next; 877ad1e7d28SJulian Elischer runq_add(ts->ts_runq, ts, 0); 87815dc847eSJeff Roberson continue; 87915dc847eSJeff Roberson } 880a8615740SDavid Xu #endif 881ad1e7d28SJulian Elischer return (ts); 88215dc847eSJeff Roberson } 88315dc847eSJeff Roberson 884ad1e7d28SJulian Elischer return (runq_choose(&tdq->ksq_idle)); 885245f3abfSJeff Roberson } 8860a016a05SJeff Roberson 8870a016a05SJeff Roberson static void 888ad1e7d28SJulian Elischer tdq_setup(struct tdq *tdq) 8890a016a05SJeff Roberson { 890ad1e7d28SJulian Elischer runq_init(&tdq->ksq_timeshare[0]); 891ad1e7d28SJulian Elischer runq_init(&tdq->ksq_timeshare[1]); 892ad1e7d28SJulian Elischer runq_init(&tdq->ksq_idle); 893ad1e7d28SJulian Elischer tdq->ksq_curr = &tdq->ksq_timeshare[0]; 894ad1e7d28SJulian Elischer tdq->ksq_next = &tdq->ksq_timeshare[1]; 895ad1e7d28SJulian Elischer tdq->ksq_load = 0; 896ad1e7d28SJulian Elischer tdq->ksq_load_timeshare = 0; 8970a016a05SJeff Roberson } 8980a016a05SJeff Roberson 89935e6168fSJeff Roberson static void 90035e6168fSJeff Roberson sched_setup(void *dummy) 90135e6168fSJeff Roberson { 9020ec896fdSJeff Roberson #ifdef SMP 90335e6168fSJeff Roberson int i; 9040ec896fdSJeff Roberson #endif 90535e6168fSJeff Roberson 906a1d4fe69SDavid Xu /* 907a1d4fe69SDavid Xu * To avoid divide-by-zero, we set realstathz a dummy value 908a1d4fe69SDavid Xu * in case which sched_clock() called before sched_initticks(). 909a1d4fe69SDavid Xu */ 910a1d4fe69SDavid Xu realstathz = hz; 911e493a5d9SJeff Roberson slice_min = (hz/100); /* 10ms */ 912e493a5d9SJeff Roberson slice_max = (hz/7); /* ~140ms */ 913e1f89c22SJeff Roberson 914356500a3SJeff Roberson #ifdef SMP 915cac77d04SJeff Roberson balance_groups = 0; 91680f86c9fSJeff Roberson /* 917ad1e7d28SJulian Elischer * Initialize the tdqs. 91880f86c9fSJeff Roberson */ 919749d01b0SJeff Roberson for (i = 0; i < MAXCPU; i++) { 920ad1e7d28SJulian Elischer struct tdq *ksq; 92180f86c9fSJeff Roberson 922ad1e7d28SJulian Elischer ksq = &tdq_cpu[i]; 92380f86c9fSJeff Roberson ksq->ksq_assigned = NULL; 924ad1e7d28SJulian Elischer tdq_setup(&tdq_cpu[i]); 92580f86c9fSJeff Roberson } 92680f86c9fSJeff Roberson if (smp_topology == NULL) { 927ad1e7d28SJulian Elischer struct tdq_group *ksg; 928ad1e7d28SJulian Elischer struct tdq *ksq; 929598b368dSJeff Roberson int cpus; 93080f86c9fSJeff Roberson 931598b368dSJeff Roberson for (cpus = 0, i = 0; i < MAXCPU; i++) { 932598b368dSJeff Roberson if (CPU_ABSENT(i)) 933598b368dSJeff Roberson continue; 934ad1e7d28SJulian Elischer ksq = &tdq_cpu[i]; 935ad1e7d28SJulian Elischer ksg = &tdq_groups[cpus]; 93680f86c9fSJeff Roberson /* 937ad1e7d28SJulian Elischer * Setup a tdq group with one member. 93880f86c9fSJeff Roberson */ 93980f86c9fSJeff Roberson ksq->ksq_transferable = 0; 94080f86c9fSJeff Roberson ksq->ksq_group = ksg; 94180f86c9fSJeff Roberson ksg->ksg_cpus = 1; 94280f86c9fSJeff Roberson ksg->ksg_idlemask = 0; 94380f86c9fSJeff Roberson ksg->ksg_cpumask = ksg->ksg_mask = 1 << i; 944cac77d04SJeff Roberson ksg->ksg_load = 0; 94580f86c9fSJeff Roberson ksg->ksg_transferable = 0; 94680f86c9fSJeff Roberson LIST_INIT(&ksg->ksg_members); 94780f86c9fSJeff Roberson LIST_INSERT_HEAD(&ksg->ksg_members, ksq, ksq_siblings); 948598b368dSJeff Roberson cpus++; 949749d01b0SJeff Roberson } 950598b368dSJeff Roberson ksg_maxid = cpus - 1; 951749d01b0SJeff Roberson } else { 952ad1e7d28SJulian Elischer struct tdq_group *ksg; 95380f86c9fSJeff Roberson struct cpu_group *cg; 954749d01b0SJeff Roberson int j; 955749d01b0SJeff Roberson 956749d01b0SJeff Roberson for (i = 0; i < smp_topology->ct_count; i++) { 957749d01b0SJeff Roberson cg = &smp_topology->ct_group[i]; 958ad1e7d28SJulian Elischer ksg = &tdq_groups[i]; 95980f86c9fSJeff Roberson /* 96080f86c9fSJeff Roberson * Initialize the group. 96180f86c9fSJeff Roberson */ 96280f86c9fSJeff Roberson ksg->ksg_idlemask = 0; 963cac77d04SJeff Roberson ksg->ksg_load = 0; 96480f86c9fSJeff Roberson ksg->ksg_transferable = 0; 96580f86c9fSJeff Roberson ksg->ksg_cpus = cg->cg_count; 96680f86c9fSJeff Roberson ksg->ksg_cpumask = cg->cg_mask; 96780f86c9fSJeff Roberson LIST_INIT(&ksg->ksg_members); 96880f86c9fSJeff Roberson /* 96980f86c9fSJeff Roberson * Find all of the group members and add them. 97080f86c9fSJeff Roberson */ 97180f86c9fSJeff Roberson for (j = 0; j < MAXCPU; j++) { 97280f86c9fSJeff Roberson if ((cg->cg_mask & (1 << j)) != 0) { 97380f86c9fSJeff Roberson if (ksg->ksg_mask == 0) 97480f86c9fSJeff Roberson ksg->ksg_mask = 1 << j; 975ad1e7d28SJulian Elischer tdq_cpu[j].ksq_transferable = 0; 976ad1e7d28SJulian Elischer tdq_cpu[j].ksq_group = ksg; 97780f86c9fSJeff Roberson LIST_INSERT_HEAD(&ksg->ksg_members, 978ad1e7d28SJulian Elischer &tdq_cpu[j], ksq_siblings); 97980f86c9fSJeff Roberson } 98080f86c9fSJeff Roberson } 981cac77d04SJeff Roberson if (ksg->ksg_cpus > 1) 982cac77d04SJeff Roberson balance_groups = 1; 983749d01b0SJeff Roberson } 984cac77d04SJeff Roberson ksg_maxid = smp_topology->ct_count - 1; 985749d01b0SJeff Roberson } 986cac77d04SJeff Roberson /* 987cac77d04SJeff Roberson * Stagger the group and global load balancer so they do not 988cac77d04SJeff Roberson * interfere with each other. 989cac77d04SJeff Roberson */ 990dc03363dSJeff Roberson bal_tick = ticks + hz; 991cac77d04SJeff Roberson if (balance_groups) 992dc03363dSJeff Roberson gbal_tick = ticks + (hz / 2); 993749d01b0SJeff Roberson #else 994ad1e7d28SJulian Elischer tdq_setup(TDQ_SELF()); 995356500a3SJeff Roberson #endif 996749d01b0SJeff Roberson mtx_lock_spin(&sched_lock); 997ad1e7d28SJulian Elischer tdq_load_add(TDQ_SELF(), &td_sched0); 998749d01b0SJeff Roberson mtx_unlock_spin(&sched_lock); 99935e6168fSJeff Roberson } 100035e6168fSJeff Roberson 1001a1d4fe69SDavid Xu /* ARGSUSED */ 1002a1d4fe69SDavid Xu static void 1003a1d4fe69SDavid Xu sched_initticks(void *dummy) 1004a1d4fe69SDavid Xu { 1005a1d4fe69SDavid Xu mtx_lock_spin(&sched_lock); 1006a1d4fe69SDavid Xu realstathz = stathz ? stathz : hz; 1007a1d4fe69SDavid Xu slice_min = (realstathz/100); /* 10ms */ 1008a1d4fe69SDavid Xu slice_max = (realstathz/7); /* ~140ms */ 1009a1d4fe69SDavid Xu 1010a1d4fe69SDavid Xu tickincr = (hz << 10) / realstathz; 1011a1d4fe69SDavid Xu /* 1012a1d4fe69SDavid Xu * XXX This does not work for values of stathz that are much 1013a1d4fe69SDavid Xu * larger than hz. 1014a1d4fe69SDavid Xu */ 1015a1d4fe69SDavid Xu if (tickincr == 0) 1016a1d4fe69SDavid Xu tickincr = 1; 1017a1d4fe69SDavid Xu mtx_unlock_spin(&sched_lock); 1018a1d4fe69SDavid Xu } 1019a1d4fe69SDavid Xu 1020a1d4fe69SDavid Xu 102135e6168fSJeff Roberson /* 102235e6168fSJeff Roberson * Scale the scheduling priority according to the "interactivity" of this 102335e6168fSJeff Roberson * process. 102435e6168fSJeff Roberson */ 102515dc847eSJeff Roberson static void 10268460a577SJohn Birrell sched_priority(struct thread *td) 102735e6168fSJeff Roberson { 102835e6168fSJeff Roberson int pri; 102935e6168fSJeff Roberson 10308460a577SJohn Birrell if (td->td_pri_class != PRI_TIMESHARE) 103115dc847eSJeff Roberson return; 103235e6168fSJeff Roberson 10338460a577SJohn Birrell pri = SCHED_PRI_INTERACT(sched_interact_score(td)); 1034e1f89c22SJeff Roberson pri += SCHED_PRI_BASE; 10358460a577SJohn Birrell pri += td->td_proc->p_nice; 103635e6168fSJeff Roberson 103735e6168fSJeff Roberson if (pri > PRI_MAX_TIMESHARE) 103835e6168fSJeff Roberson pri = PRI_MAX_TIMESHARE; 103935e6168fSJeff Roberson else if (pri < PRI_MIN_TIMESHARE) 104035e6168fSJeff Roberson pri = PRI_MIN_TIMESHARE; 104135e6168fSJeff Roberson 10428460a577SJohn Birrell sched_user_prio(td, pri); 104335e6168fSJeff Roberson 104415dc847eSJeff Roberson return; 104535e6168fSJeff Roberson } 104635e6168fSJeff Roberson 104735e6168fSJeff Roberson /* 1048ad1e7d28SJulian Elischer * Calculate a time slice based on the properties of the process 1049ad1e7d28SJulian Elischer * and the runq that we're on. This is only for PRI_TIMESHARE threads. 105035e6168fSJeff Roberson */ 1051245f3abfSJeff Roberson static void 1052ad1e7d28SJulian Elischer sched_slice(struct td_sched *ts) 105335e6168fSJeff Roberson { 1054ad1e7d28SJulian Elischer struct tdq *tdq; 10558460a577SJohn Birrell struct thread *td; 105635e6168fSJeff Roberson 1057ad1e7d28SJulian Elischer td = ts->ts_thread; 1058ad1e7d28SJulian Elischer tdq = TDQ_CPU(ts->ts_cpu); 105935e6168fSJeff Roberson 10608460a577SJohn Birrell if (td->td_flags & TDF_BORROWING) { 1061ad1e7d28SJulian Elischer ts->ts_slice = SCHED_SLICE_MIN; 10628ffb8f55SJeff Roberson return; 10638ffb8f55SJeff Roberson } 10648ffb8f55SJeff Roberson 1065245f3abfSJeff Roberson /* 1066245f3abfSJeff Roberson * Rationale: 1067ad1e7d28SJulian Elischer * Threads in interactive procs get a minimal slice so that we 1068245f3abfSJeff Roberson * quickly notice if it abuses its advantage. 1069245f3abfSJeff Roberson * 1070ad1e7d28SJulian Elischer * Threads in non-interactive procs are assigned a slice that is 1071ad1e7d28SJulian Elischer * based on the procs nice value relative to the least nice procs 1072245f3abfSJeff Roberson * on the run queue for this cpu. 1073245f3abfSJeff Roberson * 1074ad1e7d28SJulian Elischer * If the thread is less nice than all others it gets the maximum 1075ad1e7d28SJulian Elischer * slice and other threads will adjust their slice relative to 1076245f3abfSJeff Roberson * this when they first expire. 1077245f3abfSJeff Roberson * 1078245f3abfSJeff Roberson * There is 20 point window that starts relative to the least 1079ad1e7d28SJulian Elischer * nice td_sched on the run queue. Slice size is determined by 1080ad1e7d28SJulian Elischer * the td_sched distance from the last nice thread. 1081245f3abfSJeff Roberson * 1082ad1e7d28SJulian Elischer * If the td_sched is outside of the window it will get no slice 10837d1a81b4SJeff Roberson * and will be reevaluated each time it is selected on the 1084ad1e7d28SJulian Elischer * run queue. The exception to this is nice 0 procs when 10857d1a81b4SJeff Roberson * a nice -20 is running. They are always granted a minimum 10867d1a81b4SJeff Roberson * slice. 1087245f3abfSJeff Roberson */ 10888460a577SJohn Birrell if (!SCHED_INTERACTIVE(td)) { 1089245f3abfSJeff Roberson int nice; 1090245f3abfSJeff Roberson 1091ad1e7d28SJulian Elischer nice = td->td_proc->p_nice + (0 - tdq->ksq_nicemin); 1092ad1e7d28SJulian Elischer if (tdq->ksq_load_timeshare == 0 || 1093ad1e7d28SJulian Elischer td->td_proc->p_nice < tdq->ksq_nicemin) 1094ad1e7d28SJulian Elischer ts->ts_slice = SCHED_SLICE_MAX; 10957d1a81b4SJeff Roberson else if (nice <= SCHED_SLICE_NTHRESH) 1096ad1e7d28SJulian Elischer ts->ts_slice = SCHED_SLICE_NICE(nice); 10978460a577SJohn Birrell else if (td->td_proc->p_nice == 0) 1098ad1e7d28SJulian Elischer ts->ts_slice = SCHED_SLICE_MIN; 1099245f3abfSJeff Roberson else 1100ad1e7d28SJulian Elischer ts->ts_slice = SCHED_SLICE_MIN; /* 0 */ 1101245f3abfSJeff Roberson } else 1102ad1e7d28SJulian Elischer ts->ts_slice = SCHED_SLICE_INTERACTIVE; 110335e6168fSJeff Roberson 1104245f3abfSJeff Roberson return; 110535e6168fSJeff Roberson } 110635e6168fSJeff Roberson 1107d322132cSJeff Roberson /* 1108d322132cSJeff Roberson * This routine enforces a maximum limit on the amount of scheduling history 1109d322132cSJeff Roberson * kept. It is called after either the slptime or runtime is adjusted. 1110d322132cSJeff Roberson * This routine will not operate correctly when slp or run times have been 1111d322132cSJeff Roberson * adjusted to more than double their maximum. 1112d322132cSJeff Roberson */ 11134b60e324SJeff Roberson static void 11148460a577SJohn Birrell sched_interact_update(struct thread *td) 11154b60e324SJeff Roberson { 1116d322132cSJeff Roberson int sum; 11173f741ca1SJeff Roberson 11188460a577SJohn Birrell sum = td->td_sched->skg_runtime + td->td_sched->skg_slptime; 1119d322132cSJeff Roberson if (sum < SCHED_SLP_RUN_MAX) 1120d322132cSJeff Roberson return; 1121d322132cSJeff Roberson /* 1122d322132cSJeff Roberson * If we have exceeded by more than 1/5th then the algorithm below 1123d322132cSJeff Roberson * will not bring us back into range. Dividing by two here forces 11242454aaf5SJeff Roberson * us into the range of [4/5 * SCHED_INTERACT_MAX, SCHED_INTERACT_MAX] 1125d322132cSJeff Roberson */ 112637a35e4aSJeff Roberson if (sum > (SCHED_SLP_RUN_MAX / 5) * 6) { 11278460a577SJohn Birrell td->td_sched->skg_runtime /= 2; 11288460a577SJohn Birrell td->td_sched->skg_slptime /= 2; 1129d322132cSJeff Roberson return; 1130d322132cSJeff Roberson } 11318460a577SJohn Birrell td->td_sched->skg_runtime = (td->td_sched->skg_runtime / 5) * 4; 11328460a577SJohn Birrell td->td_sched->skg_slptime = (td->td_sched->skg_slptime / 5) * 4; 1133d322132cSJeff Roberson } 1134d322132cSJeff Roberson 1135d322132cSJeff Roberson static void 11368460a577SJohn Birrell sched_interact_fork(struct thread *td) 1137d322132cSJeff Roberson { 1138d322132cSJeff Roberson int ratio; 1139d322132cSJeff Roberson int sum; 1140d322132cSJeff Roberson 11418460a577SJohn Birrell sum = td->td_sched->skg_runtime + td->td_sched->skg_slptime; 1142d322132cSJeff Roberson if (sum > SCHED_SLP_RUN_FORK) { 1143d322132cSJeff Roberson ratio = sum / SCHED_SLP_RUN_FORK; 11448460a577SJohn Birrell td->td_sched->skg_runtime /= ratio; 11458460a577SJohn Birrell td->td_sched->skg_slptime /= ratio; 11464b60e324SJeff Roberson } 11474b60e324SJeff Roberson } 11484b60e324SJeff Roberson 1149e1f89c22SJeff Roberson static int 11508460a577SJohn Birrell sched_interact_score(struct thread *td) 1151e1f89c22SJeff Roberson { 1152210491d3SJeff Roberson int div; 1153e1f89c22SJeff Roberson 11548460a577SJohn Birrell if (td->td_sched->skg_runtime > td->td_sched->skg_slptime) { 11558460a577SJohn Birrell div = max(1, td->td_sched->skg_runtime / SCHED_INTERACT_HALF); 1156210491d3SJeff Roberson return (SCHED_INTERACT_HALF + 11578460a577SJohn Birrell (SCHED_INTERACT_HALF - (td->td_sched->skg_slptime / div))); 11588460a577SJohn Birrell } if (td->td_sched->skg_slptime > td->td_sched->skg_runtime) { 11598460a577SJohn Birrell div = max(1, td->td_sched->skg_slptime / SCHED_INTERACT_HALF); 11608460a577SJohn Birrell return (td->td_sched->skg_runtime / div); 1161e1f89c22SJeff Roberson } 1162e1f89c22SJeff Roberson 1163210491d3SJeff Roberson /* 1164210491d3SJeff Roberson * This can happen if slptime and runtime are 0. 1165210491d3SJeff Roberson */ 1166210491d3SJeff Roberson return (0); 1167e1f89c22SJeff Roberson 1168e1f89c22SJeff Roberson } 1169e1f89c22SJeff Roberson 117015dc847eSJeff Roberson /* 1171ed062c8dSJulian Elischer * Very early in the boot some setup of scheduler-specific 1172ed062c8dSJulian Elischer * parts of proc0 and of soem scheduler resources needs to be done. 1173ed062c8dSJulian Elischer * Called from: 1174ed062c8dSJulian Elischer * proc0_init() 1175ed062c8dSJulian Elischer */ 1176ed062c8dSJulian Elischer void 1177ed062c8dSJulian Elischer schedinit(void) 1178ed062c8dSJulian Elischer { 1179ed062c8dSJulian Elischer /* 1180ed062c8dSJulian Elischer * Set up the scheduler specific parts of proc0. 1181ed062c8dSJulian Elischer */ 1182ed062c8dSJulian Elischer proc0.p_sched = NULL; /* XXX */ 1183ad1e7d28SJulian Elischer thread0.td_sched = &td_sched0; 1184ad1e7d28SJulian Elischer td_sched0.ts_thread = &thread0; 1185ad1e7d28SJulian Elischer td_sched0.ts_state = TSS_THREAD; 1186ed062c8dSJulian Elischer } 1187ed062c8dSJulian Elischer 1188ed062c8dSJulian Elischer /* 118915dc847eSJeff Roberson * This is only somewhat accurate since given many processes of the same 119015dc847eSJeff Roberson * priority they will switch when their slices run out, which will be 119115dc847eSJeff Roberson * at most SCHED_SLICE_MAX. 119215dc847eSJeff Roberson */ 119335e6168fSJeff Roberson int 119435e6168fSJeff Roberson sched_rr_interval(void) 119535e6168fSJeff Roberson { 119635e6168fSJeff Roberson return (SCHED_SLICE_MAX); 119735e6168fSJeff Roberson } 119835e6168fSJeff Roberson 119922bf7d9aSJeff Roberson static void 1200ad1e7d28SJulian Elischer sched_pctcpu_update(struct td_sched *ts) 120135e6168fSJeff Roberson { 120235e6168fSJeff Roberson /* 120335e6168fSJeff Roberson * Adjust counters and watermark for pctcpu calc. 1204210491d3SJeff Roberson */ 1205ad1e7d28SJulian Elischer if (ts->ts_ltick > ticks - SCHED_CPU_TICKS) { 1206210491d3SJeff Roberson /* 120781de51bfSJeff Roberson * Shift the tick count out so that the divide doesn't 120881de51bfSJeff Roberson * round away our results. 120965c8760dSJeff Roberson */ 1210ad1e7d28SJulian Elischer ts->ts_ticks <<= 10; 1211ad1e7d28SJulian Elischer ts->ts_ticks = (ts->ts_ticks / (ticks - ts->ts_ftick)) * 121235e6168fSJeff Roberson SCHED_CPU_TICKS; 1213ad1e7d28SJulian Elischer ts->ts_ticks >>= 10; 121481de51bfSJeff Roberson } else 1215ad1e7d28SJulian Elischer ts->ts_ticks = 0; 1216ad1e7d28SJulian Elischer ts->ts_ltick = ticks; 1217ad1e7d28SJulian Elischer ts->ts_ftick = ts->ts_ltick - SCHED_CPU_TICKS; 121835e6168fSJeff Roberson } 121935e6168fSJeff Roberson 122035e6168fSJeff Roberson void 1221f5c157d9SJohn Baldwin sched_thread_priority(struct thread *td, u_char prio) 122235e6168fSJeff Roberson { 1223ad1e7d28SJulian Elischer struct td_sched *ts; 122435e6168fSJeff Roberson 122581d47d3fSJeff Roberson CTR6(KTR_SCHED, "sched_prio: %p(%s) prio %d newprio %d by %p(%s)", 122681d47d3fSJeff Roberson td, td->td_proc->p_comm, td->td_priority, prio, curthread, 122781d47d3fSJeff Roberson curthread->td_proc->p_comm); 1228ad1e7d28SJulian Elischer ts = td->td_sched; 122935e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1230f5c157d9SJohn Baldwin if (td->td_priority == prio) 1231f5c157d9SJohn Baldwin return; 123235e6168fSJeff Roberson if (TD_ON_RUNQ(td)) { 12333f741ca1SJeff Roberson /* 12343f741ca1SJeff Roberson * If the priority has been elevated due to priority 12353f741ca1SJeff Roberson * propagation, we may have to move ourselves to a new 12363f741ca1SJeff Roberson * queue. We still call adjustrunqueue below in case kse 12373f741ca1SJeff Roberson * needs to fix things up. 12383f741ca1SJeff Roberson */ 1239ad1e7d28SJulian Elischer if (prio < td->td_priority && ts->ts_runq != NULL && 1240ad1e7d28SJulian Elischer (ts->ts_flags & TSF_ASSIGNED) == 0 && 1241ad1e7d28SJulian Elischer ts->ts_runq != TDQ_CPU(ts->ts_cpu)->ksq_curr) { 1242ad1e7d28SJulian Elischer runq_remove(ts->ts_runq, ts); 1243ad1e7d28SJulian Elischer ts->ts_runq = TDQ_CPU(ts->ts_cpu)->ksq_curr; 1244ad1e7d28SJulian Elischer runq_add(ts->ts_runq, ts, 0); 124535e6168fSJeff Roberson } 1246f2b74cbfSJeff Roberson /* 1247ad1e7d28SJulian Elischer * Hold this td_sched on this cpu so that sched_prio() doesn't 1248f2b74cbfSJeff Roberson * cause excessive migration. We only want migration to 1249f2b74cbfSJeff Roberson * happen as the result of a wakeup. 1250f2b74cbfSJeff Roberson */ 1251ad1e7d28SJulian Elischer ts->ts_flags |= TSF_HOLD; 12523f741ca1SJeff Roberson adjustrunqueue(td, prio); 1253ad1e7d28SJulian Elischer ts->ts_flags &= ~TSF_HOLD; 12543f741ca1SJeff Roberson } else 12553f741ca1SJeff Roberson td->td_priority = prio; 125635e6168fSJeff Roberson } 125735e6168fSJeff Roberson 1258f5c157d9SJohn Baldwin /* 1259f5c157d9SJohn Baldwin * Update a thread's priority when it is lent another thread's 1260f5c157d9SJohn Baldwin * priority. 1261f5c157d9SJohn Baldwin */ 1262f5c157d9SJohn Baldwin void 1263f5c157d9SJohn Baldwin sched_lend_prio(struct thread *td, u_char prio) 1264f5c157d9SJohn Baldwin { 1265f5c157d9SJohn Baldwin 1266f5c157d9SJohn Baldwin td->td_flags |= TDF_BORROWING; 1267f5c157d9SJohn Baldwin sched_thread_priority(td, prio); 1268f5c157d9SJohn Baldwin } 1269f5c157d9SJohn Baldwin 1270f5c157d9SJohn Baldwin /* 1271f5c157d9SJohn Baldwin * Restore a thread's priority when priority propagation is 1272f5c157d9SJohn Baldwin * over. The prio argument is the minimum priority the thread 1273f5c157d9SJohn Baldwin * needs to have to satisfy other possible priority lending 1274f5c157d9SJohn Baldwin * requests. If the thread's regular priority is less 1275f5c157d9SJohn Baldwin * important than prio, the thread will keep a priority boost 1276f5c157d9SJohn Baldwin * of prio. 1277f5c157d9SJohn Baldwin */ 1278f5c157d9SJohn Baldwin void 1279f5c157d9SJohn Baldwin sched_unlend_prio(struct thread *td, u_char prio) 1280f5c157d9SJohn Baldwin { 1281f5c157d9SJohn Baldwin u_char base_pri; 1282f5c157d9SJohn Baldwin 1283f5c157d9SJohn Baldwin if (td->td_base_pri >= PRI_MIN_TIMESHARE && 1284f5c157d9SJohn Baldwin td->td_base_pri <= PRI_MAX_TIMESHARE) 12858460a577SJohn Birrell base_pri = td->td_user_pri; 1286f5c157d9SJohn Baldwin else 1287f5c157d9SJohn Baldwin base_pri = td->td_base_pri; 1288f5c157d9SJohn Baldwin if (prio >= base_pri) { 1289f5c157d9SJohn Baldwin td->td_flags &= ~TDF_BORROWING; 1290f5c157d9SJohn Baldwin sched_thread_priority(td, base_pri); 1291f5c157d9SJohn Baldwin } else 1292f5c157d9SJohn Baldwin sched_lend_prio(td, prio); 1293f5c157d9SJohn Baldwin } 1294f5c157d9SJohn Baldwin 1295f5c157d9SJohn Baldwin void 1296f5c157d9SJohn Baldwin sched_prio(struct thread *td, u_char prio) 1297f5c157d9SJohn Baldwin { 1298f5c157d9SJohn Baldwin u_char oldprio; 1299f5c157d9SJohn Baldwin 1300f5c157d9SJohn Baldwin /* First, update the base priority. */ 1301f5c157d9SJohn Baldwin td->td_base_pri = prio; 1302f5c157d9SJohn Baldwin 1303f5c157d9SJohn Baldwin /* 130450aaa791SJohn Baldwin * If the thread is borrowing another thread's priority, don't 1305f5c157d9SJohn Baldwin * ever lower the priority. 1306f5c157d9SJohn Baldwin */ 1307f5c157d9SJohn Baldwin if (td->td_flags & TDF_BORROWING && td->td_priority < prio) 1308f5c157d9SJohn Baldwin return; 1309f5c157d9SJohn Baldwin 1310f5c157d9SJohn Baldwin /* Change the real priority. */ 1311f5c157d9SJohn Baldwin oldprio = td->td_priority; 1312f5c157d9SJohn Baldwin sched_thread_priority(td, prio); 1313f5c157d9SJohn Baldwin 1314f5c157d9SJohn Baldwin /* 1315f5c157d9SJohn Baldwin * If the thread is on a turnstile, then let the turnstile update 1316f5c157d9SJohn Baldwin * its state. 1317f5c157d9SJohn Baldwin */ 1318f5c157d9SJohn Baldwin if (TD_ON_LOCK(td) && oldprio != prio) 1319f5c157d9SJohn Baldwin turnstile_adjust(td, oldprio); 1320f5c157d9SJohn Baldwin } 1321f5c157d9SJohn Baldwin 132235e6168fSJeff Roberson void 13238460a577SJohn Birrell sched_user_prio(struct thread *td, u_char prio) 13243db720fdSDavid Xu { 13253db720fdSDavid Xu u_char oldprio; 13263db720fdSDavid Xu 13278460a577SJohn Birrell td->td_base_user_pri = prio; 13288460a577SJohn Birrell 13298460a577SJohn Birrell oldprio = td->td_user_pri; 13308460a577SJohn Birrell td->td_user_pri = prio; 13313db720fdSDavid Xu 13323db720fdSDavid Xu if (TD_ON_UPILOCK(td) && oldprio != prio) 13333db720fdSDavid Xu umtx_pi_adjust(td, oldprio); 13343db720fdSDavid Xu } 13353db720fdSDavid Xu 13363db720fdSDavid Xu void 13373db720fdSDavid Xu sched_lend_user_prio(struct thread *td, u_char prio) 13383db720fdSDavid Xu { 13393db720fdSDavid Xu u_char oldprio; 13403db720fdSDavid Xu 13413db720fdSDavid Xu td->td_flags |= TDF_UBORROWING; 13423db720fdSDavid Xu 1343f645b5daSMaxim Konovalov oldprio = td->td_user_pri; 13448460a577SJohn Birrell td->td_user_pri = prio; 13453db720fdSDavid Xu 13463db720fdSDavid Xu if (TD_ON_UPILOCK(td) && oldprio != prio) 13473db720fdSDavid Xu umtx_pi_adjust(td, oldprio); 13483db720fdSDavid Xu } 13493db720fdSDavid Xu 13503db720fdSDavid Xu void 13513db720fdSDavid Xu sched_unlend_user_prio(struct thread *td, u_char prio) 13523db720fdSDavid Xu { 13533db720fdSDavid Xu u_char base_pri; 13543db720fdSDavid Xu 13558460a577SJohn Birrell base_pri = td->td_base_user_pri; 13563db720fdSDavid Xu if (prio >= base_pri) { 13573db720fdSDavid Xu td->td_flags &= ~TDF_UBORROWING; 13588460a577SJohn Birrell sched_user_prio(td, base_pri); 13593db720fdSDavid Xu } else 13603db720fdSDavid Xu sched_lend_user_prio(td, prio); 13613db720fdSDavid Xu } 13623db720fdSDavid Xu 13633db720fdSDavid Xu void 13643389af30SJulian Elischer sched_switch(struct thread *td, struct thread *newtd, int flags) 136535e6168fSJeff Roberson { 1366ad1e7d28SJulian Elischer struct tdq *ksq; 1367ad1e7d28SJulian Elischer struct td_sched *ts; 136835e6168fSJeff Roberson 136935e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 137035e6168fSJeff Roberson 1371ad1e7d28SJulian Elischer ts = td->td_sched; 1372ad1e7d28SJulian Elischer ksq = TDQ_SELF(); 137335e6168fSJeff Roberson 1374060563ecSJulian Elischer td->td_lastcpu = td->td_oncpu; 1375060563ecSJulian Elischer td->td_oncpu = NOCPU; 137652eb8464SJohn Baldwin td->td_flags &= ~TDF_NEEDRESCHED; 137777918643SStephan Uphoff td->td_owepreempt = 0; 137835e6168fSJeff Roberson 1379b11fdad0SJeff Roberson /* 1380ad1e7d28SJulian Elischer * If the thread has been assigned it may be in the process of switching 1381b11fdad0SJeff Roberson * to the new cpu. This is the case in sched_bind(). 1382b11fdad0SJeff Roberson */ 13832454aaf5SJeff Roberson if (td == PCPU_GET(idlethread)) { 1384bf0acc27SJohn Baldwin TD_SET_CAN_RUN(td); 1385ad1e7d28SJulian Elischer } else if ((ts->ts_flags & TSF_ASSIGNED) == 0) { 1386ed062c8dSJulian Elischer /* We are ending our run so make our slot available again */ 1387ad1e7d28SJulian Elischer tdq_load_rem(ksq, ts); 1388ed062c8dSJulian Elischer if (TD_IS_RUNNING(td)) { 1389f2b74cbfSJeff Roberson /* 1390ed062c8dSJulian Elischer * Don't allow the thread to migrate 1391ed062c8dSJulian Elischer * from a preemption. 1392f2b74cbfSJeff Roberson */ 1393ad1e7d28SJulian Elischer ts->ts_flags |= TSF_HOLD; 1394598b368dSJeff Roberson setrunqueue(td, (flags & SW_PREEMPT) ? 1395598b368dSJeff Roberson SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED : 1396598b368dSJeff Roberson SRQ_OURSELF|SRQ_YIELDING); 1397ad1e7d28SJulian Elischer ts->ts_flags &= ~TSF_HOLD; 13988460a577SJohn Birrell } 1399ed062c8dSJulian Elischer } 1400d39063f2SJulian Elischer if (newtd != NULL) { 1401c20c691bSJulian Elischer /* 14026680bbd5SJeff Roberson * If we bring in a thread account for it as if it had been 14036680bbd5SJeff Roberson * added to the run queue and then chosen. 1404c20c691bSJulian Elischer */ 1405ad1e7d28SJulian Elischer newtd->td_sched->ts_flags |= TSF_DIDRUN; 1406ad1e7d28SJulian Elischer newtd->td_sched->ts_runq = ksq->ksq_curr; 1407c20c691bSJulian Elischer TD_SET_RUNNING(newtd); 1408ad1e7d28SJulian Elischer tdq_load_add(TDQ_SELF(), newtd->td_sched); 1409d39063f2SJulian Elischer } else 14102454aaf5SJeff Roberson newtd = choosethread(); 1411ebccf1e3SJoseph Koshy if (td != newtd) { 1412ebccf1e3SJoseph Koshy #ifdef HWPMC_HOOKS 1413ebccf1e3SJoseph Koshy if (PMC_PROC_IS_USING_PMCS(td->td_proc)) 1414ebccf1e3SJoseph Koshy PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT); 1415ebccf1e3SJoseph Koshy #endif 14168460a577SJohn Birrell 1417ae53b483SJeff Roberson cpu_switch(td, newtd); 1418ebccf1e3SJoseph Koshy #ifdef HWPMC_HOOKS 1419ebccf1e3SJoseph Koshy if (PMC_PROC_IS_USING_PMCS(td->td_proc)) 1420ebccf1e3SJoseph Koshy PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN); 1421ebccf1e3SJoseph Koshy #endif 1422ebccf1e3SJoseph Koshy } 1423ebccf1e3SJoseph Koshy 1424ae53b483SJeff Roberson sched_lock.mtx_lock = (uintptr_t)td; 142535e6168fSJeff Roberson 1426060563ecSJulian Elischer td->td_oncpu = PCPU_GET(cpuid); 142735e6168fSJeff Roberson } 142835e6168fSJeff Roberson 142935e6168fSJeff Roberson void 1430fa885116SJulian Elischer sched_nice(struct proc *p, int nice) 143135e6168fSJeff Roberson { 1432ad1e7d28SJulian Elischer struct td_sched *ts; 143335e6168fSJeff Roberson struct thread *td; 1434ad1e7d28SJulian Elischer struct tdq *tdq; 143535e6168fSJeff Roberson 1436fa885116SJulian Elischer PROC_LOCK_ASSERT(p, MA_OWNED); 14370b5318c8SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 143815dc847eSJeff Roberson /* 1439ad1e7d28SJulian Elischer * We need to adjust the nice counts for running threads. 144015dc847eSJeff Roberson */ 14418460a577SJohn Birrell FOREACH_THREAD_IN_PROC(p, td) { 14428460a577SJohn Birrell if (td->td_pri_class == PRI_TIMESHARE) { 1443ad1e7d28SJulian Elischer ts = td->td_sched; 1444ad1e7d28SJulian Elischer if (ts->ts_runq == NULL) 144515dc847eSJeff Roberson continue; 1446ad1e7d28SJulian Elischer tdq = TDQ_CPU(ts->ts_cpu); 1447ad1e7d28SJulian Elischer tdq_nice_rem(tdq, p->p_nice); 1448ad1e7d28SJulian Elischer tdq_nice_add(tdq, nice); 144915dc847eSJeff Roberson } 1450fa885116SJulian Elischer } 1451fa885116SJulian Elischer p->p_nice = nice; 14528460a577SJohn Birrell FOREACH_THREAD_IN_PROC(p, td) { 14538460a577SJohn Birrell sched_priority(td); 14544a338afdSJulian Elischer td->td_flags |= TDF_NEEDRESCHED; 145535e6168fSJeff Roberson } 1456fa885116SJulian Elischer } 145735e6168fSJeff Roberson 145835e6168fSJeff Roberson void 145944f3b092SJohn Baldwin sched_sleep(struct thread *td) 146035e6168fSJeff Roberson { 146135e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 146235e6168fSJeff Roberson 1463ad1e7d28SJulian Elischer td->td_sched->ts_slptime = ticks; 146435e6168fSJeff Roberson } 146535e6168fSJeff Roberson 146635e6168fSJeff Roberson void 146735e6168fSJeff Roberson sched_wakeup(struct thread *td) 146835e6168fSJeff Roberson { 146935e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 147035e6168fSJeff Roberson 147135e6168fSJeff Roberson /* 1472ad1e7d28SJulian Elischer * Let the procs know how long we slept for. This is because process 1473ad1e7d28SJulian Elischer * interactivity behavior is modeled in the procs. 147435e6168fSJeff Roberson */ 1475ad1e7d28SJulian Elischer if (td->td_sched->ts_slptime) { 147615dc847eSJeff Roberson int hzticks; 1477f1e8dc4aSJeff Roberson 1478ad1e7d28SJulian Elischer hzticks = (ticks - td->td_sched->ts_slptime) << 10; 1479d322132cSJeff Roberson if (hzticks >= SCHED_SLP_RUN_MAX) { 14808460a577SJohn Birrell td->td_sched->skg_slptime = SCHED_SLP_RUN_MAX; 14818460a577SJohn Birrell td->td_sched->skg_runtime = 1; 1482d322132cSJeff Roberson } else { 14838460a577SJohn Birrell td->td_sched->skg_slptime += hzticks; 14848460a577SJohn Birrell sched_interact_update(td); 1485d322132cSJeff Roberson } 14868460a577SJohn Birrell sched_priority(td); 1487ad1e7d28SJulian Elischer sched_slice(td->td_sched); 1488ad1e7d28SJulian Elischer td->td_sched->ts_slptime = 0; 1489f1e8dc4aSJeff Roberson } 14902630e4c9SJulian Elischer setrunqueue(td, SRQ_BORING); 149135e6168fSJeff Roberson } 149235e6168fSJeff Roberson 149335e6168fSJeff Roberson /* 149435e6168fSJeff Roberson * Penalize the parent for creating a new child and initialize the child's 149535e6168fSJeff Roberson * priority. 149635e6168fSJeff Roberson */ 149735e6168fSJeff Roberson void 14988460a577SJohn Birrell sched_fork(struct thread *td, struct thread *child) 149915dc847eSJeff Roberson { 15008460a577SJohn Birrell mtx_assert(&sched_lock, MA_OWNED); 1501ad1e7d28SJulian Elischer sched_fork_thread(td, child); 1502ad1e7d28SJulian Elischer } 1503ad1e7d28SJulian Elischer 1504ad1e7d28SJulian Elischer void 1505ad1e7d28SJulian Elischer sched_fork_thread(struct thread *td, struct thread *child) 1506ad1e7d28SJulian Elischer { 1507ad1e7d28SJulian Elischer struct td_sched *ts; 1508ad1e7d28SJulian Elischer struct td_sched *ts2; 15098460a577SJohn Birrell 15108460a577SJohn Birrell child->td_sched->skg_slptime = td->td_sched->skg_slptime; 15118460a577SJohn Birrell child->td_sched->skg_runtime = td->td_sched->skg_runtime; 15128460a577SJohn Birrell child->td_user_pri = td->td_user_pri; 1513f645b5daSMaxim Konovalov child->td_base_user_pri = td->td_base_user_pri; 15148460a577SJohn Birrell sched_interact_fork(child); 15158460a577SJohn Birrell td->td_sched->skg_runtime += tickincr; 15168460a577SJohn Birrell sched_interact_update(td); 15178460a577SJohn Birrell 1518ed062c8dSJulian Elischer sched_newthread(child); 15198460a577SJohn Birrell 1520ad1e7d28SJulian Elischer ts = td->td_sched; 1521ad1e7d28SJulian Elischer ts2 = child->td_sched; 1522ad1e7d28SJulian Elischer ts2->ts_slice = 1; /* Attempt to quickly learn interactivity. */ 1523ad1e7d28SJulian Elischer ts2->ts_cpu = ts->ts_cpu; 1524ad1e7d28SJulian Elischer ts2->ts_runq = NULL; 1525ed062c8dSJulian Elischer 1526ed062c8dSJulian Elischer /* Grab our parents cpu estimation information. */ 1527ad1e7d28SJulian Elischer ts2->ts_ticks = ts->ts_ticks; 1528ad1e7d28SJulian Elischer ts2->ts_ltick = ts->ts_ltick; 1529ad1e7d28SJulian Elischer ts2->ts_ftick = ts->ts_ftick; 153015dc847eSJeff Roberson } 153115dc847eSJeff Roberson 153215dc847eSJeff Roberson void 15338460a577SJohn Birrell sched_class(struct thread *td, int class) 153415dc847eSJeff Roberson { 1535ad1e7d28SJulian Elischer struct tdq *tdq; 1536ad1e7d28SJulian Elischer struct td_sched *ts; 1537ef1134c9SJeff Roberson int nclass; 1538ef1134c9SJeff Roberson int oclass; 153915dc847eSJeff Roberson 15402056d0a1SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 15418460a577SJohn Birrell if (td->td_pri_class == class) 154215dc847eSJeff Roberson return; 154315dc847eSJeff Roberson 1544ef1134c9SJeff Roberson nclass = PRI_BASE(class); 15458460a577SJohn Birrell oclass = PRI_BASE(td->td_pri_class); 1546ad1e7d28SJulian Elischer ts = td->td_sched; 1547ad1e7d28SJulian Elischer if (!((ts->ts_state != TSS_ONRUNQ && 1548ad1e7d28SJulian Elischer ts->ts_state != TSS_THREAD) || ts->ts_runq == NULL)) { 1549ad1e7d28SJulian Elischer tdq = TDQ_CPU(ts->ts_cpu); 155015dc847eSJeff Roberson 1551ef1134c9SJeff Roberson #ifdef SMP 1552155b9987SJeff Roberson /* 1553155b9987SJeff Roberson * On SMP if we're on the RUNQ we must adjust the transferable 1554155b9987SJeff Roberson * count because could be changing to or from an interrupt 1555155b9987SJeff Roberson * class. 1556155b9987SJeff Roberson */ 1557ad1e7d28SJulian Elischer if (ts->ts_state == TSS_ONRUNQ) { 1558ad1e7d28SJulian Elischer if (THREAD_CAN_MIGRATE(ts)) { 1559ad1e7d28SJulian Elischer tdq->ksq_transferable--; 1560ad1e7d28SJulian Elischer tdq->ksq_group->ksg_transferable--; 156180f86c9fSJeff Roberson } 1562ad1e7d28SJulian Elischer if (THREAD_CAN_MIGRATE(ts)) { 1563ad1e7d28SJulian Elischer tdq->ksq_transferable++; 1564ad1e7d28SJulian Elischer tdq->ksq_group->ksg_transferable++; 156580f86c9fSJeff Roberson } 1566155b9987SJeff Roberson } 1567ef1134c9SJeff Roberson #endif 1568155b9987SJeff Roberson if (oclass == PRI_TIMESHARE) { 1569ad1e7d28SJulian Elischer tdq->ksq_load_timeshare--; 1570ad1e7d28SJulian Elischer tdq_nice_rem(tdq, td->td_proc->p_nice); 1571155b9987SJeff Roberson } 1572155b9987SJeff Roberson if (nclass == PRI_TIMESHARE) { 1573ad1e7d28SJulian Elischer tdq->ksq_load_timeshare++; 1574ad1e7d28SJulian Elischer tdq_nice_add(tdq, td->td_proc->p_nice); 1575ad1e7d28SJulian Elischer } 1576155b9987SJeff Roberson } 157715dc847eSJeff Roberson 15788460a577SJohn Birrell td->td_pri_class = class; 157935e6168fSJeff Roberson } 158035e6168fSJeff Roberson 158135e6168fSJeff Roberson /* 158235e6168fSJeff Roberson * Return some of the child's priority and interactivity to the parent. 158335e6168fSJeff Roberson */ 158435e6168fSJeff Roberson void 1585ed062c8dSJulian Elischer sched_exit(struct proc *p, struct thread *childtd) 158635e6168fSJeff Roberson { 15878460a577SJohn Birrell struct thread *parent = FIRST_THREAD_IN_PROC(p); 158835e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1589141ad61cSJeff Roberson 15908460a577SJohn Birrell CTR3(KTR_SCHED, "sched_exit: %p(%s) prio %d", 159181d47d3fSJeff Roberson childtd, childtd->td_proc->p_comm, childtd->td_priority); 15928460a577SJohn Birrell 15938460a577SJohn Birrell /* parent->td_sched->skg_slptime += childtd->td_sched->skg_slptime; */ 15948460a577SJohn Birrell parent->td_sched->skg_runtime += childtd->td_sched->skg_runtime; 15958460a577SJohn Birrell sched_interact_update(parent); 15968460a577SJohn Birrell 1597ad1e7d28SJulian Elischer tdq_load_rem(TDQ_CPU(childtd->td_sched->ts_cpu), childtd->td_sched); 1598ad1e7d28SJulian Elischer } 1599ad1e7d28SJulian Elischer 1600ad1e7d28SJulian Elischer void 1601ad1e7d28SJulian Elischer sched_exit_thread(struct thread *td, struct thread *childtd) 1602ad1e7d28SJulian Elischer { 1603ad1e7d28SJulian Elischer } 1604ad1e7d28SJulian Elischer 1605ad1e7d28SJulian Elischer void 1606ad1e7d28SJulian Elischer sched_userret(struct thread *td) 1607ad1e7d28SJulian Elischer { 1608ad1e7d28SJulian Elischer /* 1609ad1e7d28SJulian Elischer * XXX we cheat slightly on the locking here to avoid locking in 1610ad1e7d28SJulian Elischer * the usual case. Setting td_priority here is essentially an 1611ad1e7d28SJulian Elischer * incomplete workaround for not setting it properly elsewhere. 1612ad1e7d28SJulian Elischer * Now that some interrupt handlers are threads, not setting it 1613ad1e7d28SJulian Elischer * properly elsewhere can clobber it in the window between setting 1614ad1e7d28SJulian Elischer * it here and returning to user mode, so don't waste time setting 1615ad1e7d28SJulian Elischer * it perfectly here. 1616ad1e7d28SJulian Elischer */ 1617ad1e7d28SJulian Elischer KASSERT((td->td_flags & TDF_BORROWING) == 0, 1618ad1e7d28SJulian Elischer ("thread with borrowed priority returning to userland")); 1619ad1e7d28SJulian Elischer if (td->td_priority != td->td_user_pri) { 1620ad1e7d28SJulian Elischer mtx_lock_spin(&sched_lock); 1621ad1e7d28SJulian Elischer td->td_priority = td->td_user_pri; 1622ad1e7d28SJulian Elischer td->td_base_pri = td->td_user_pri; 1623ad1e7d28SJulian Elischer mtx_unlock_spin(&sched_lock); 1624ad1e7d28SJulian Elischer } 162535e6168fSJeff Roberson } 162635e6168fSJeff Roberson 162735e6168fSJeff Roberson void 16287cf90fb3SJeff Roberson sched_clock(struct thread *td) 162935e6168fSJeff Roberson { 1630ad1e7d28SJulian Elischer struct tdq *tdq; 1631ad1e7d28SJulian Elischer struct td_sched *ts; 163235e6168fSJeff Roberson 1633dc03363dSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1634ad1e7d28SJulian Elischer tdq = TDQ_SELF(); 1635dc03363dSJeff Roberson #ifdef SMP 1636598b368dSJeff Roberson if (ticks >= bal_tick) 1637dc03363dSJeff Roberson sched_balance(); 1638598b368dSJeff Roberson if (ticks >= gbal_tick && balance_groups) 1639dc03363dSJeff Roberson sched_balance_groups(); 16402454aaf5SJeff Roberson /* 16412454aaf5SJeff Roberson * We could have been assigned a non real-time thread without an 16422454aaf5SJeff Roberson * IPI. 16432454aaf5SJeff Roberson */ 1644ad1e7d28SJulian Elischer if (tdq->ksq_assigned) 1645ad1e7d28SJulian Elischer tdq_assign(tdq); /* Potentially sets NEEDRESCHED */ 1646dc03363dSJeff Roberson #endif 1647ad1e7d28SJulian Elischer ts = td->td_sched; 164835e6168fSJeff Roberson 16490a016a05SJeff Roberson /* Adjust ticks for pctcpu */ 1650ad1e7d28SJulian Elischer ts->ts_ticks++; 1651ad1e7d28SJulian Elischer ts->ts_ltick = ticks; 1652a8949de2SJeff Roberson 1653d465fb95SJeff Roberson /* Go up to one second beyond our max and then trim back down */ 1654ad1e7d28SJulian Elischer if (ts->ts_ftick + SCHED_CPU_TICKS + hz < ts->ts_ltick) 1655ad1e7d28SJulian Elischer sched_pctcpu_update(ts); 1656d465fb95SJeff Roberson 165743fdafb1SJulian Elischer if (td->td_flags & TDF_IDLETD) 165835e6168fSJeff Roberson return; 16593f741ca1SJeff Roberson /* 16608460a577SJohn Birrell * We only do slicing code for TIMESHARE threads. 1661a8949de2SJeff Roberson */ 16628460a577SJohn Birrell if (td->td_pri_class != PRI_TIMESHARE) 1663a8949de2SJeff Roberson return; 1664a8949de2SJeff Roberson /* 16658460a577SJohn Birrell * We used a tick charge it to the thread so that we can compute our 166615dc847eSJeff Roberson * interactivity. 166715dc847eSJeff Roberson */ 16688460a577SJohn Birrell td->td_sched->skg_runtime += tickincr; 16698460a577SJohn Birrell sched_interact_update(td); 1670407b0157SJeff Roberson 167135e6168fSJeff Roberson /* 167235e6168fSJeff Roberson * We used up one time slice. 167335e6168fSJeff Roberson */ 1674ad1e7d28SJulian Elischer if (--ts->ts_slice > 0) 167515dc847eSJeff Roberson return; 167635e6168fSJeff Roberson /* 167715dc847eSJeff Roberson * We're out of time, recompute priorities and requeue. 167835e6168fSJeff Roberson */ 1679ad1e7d28SJulian Elischer tdq_load_rem(tdq, ts); 16808460a577SJohn Birrell sched_priority(td); 1681ad1e7d28SJulian Elischer sched_slice(ts); 1682ad1e7d28SJulian Elischer if (SCHED_CURR(td, ts)) 1683ad1e7d28SJulian Elischer ts->ts_runq = tdq->ksq_curr; 168415dc847eSJeff Roberson else 1685ad1e7d28SJulian Elischer ts->ts_runq = tdq->ksq_next; 1686ad1e7d28SJulian Elischer tdq_load_add(tdq, ts); 16874a338afdSJulian Elischer td->td_flags |= TDF_NEEDRESCHED; 168835e6168fSJeff Roberson } 168935e6168fSJeff Roberson 169035e6168fSJeff Roberson int 169135e6168fSJeff Roberson sched_runnable(void) 169235e6168fSJeff Roberson { 1693ad1e7d28SJulian Elischer struct tdq *tdq; 1694b90816f1SJeff Roberson int load; 169535e6168fSJeff Roberson 1696b90816f1SJeff Roberson load = 1; 1697b90816f1SJeff Roberson 1698ad1e7d28SJulian Elischer tdq = TDQ_SELF(); 169922bf7d9aSJeff Roberson #ifdef SMP 1700ad1e7d28SJulian Elischer if (tdq->ksq_assigned) { 170146f8b265SJeff Roberson mtx_lock_spin(&sched_lock); 1702ad1e7d28SJulian Elischer tdq_assign(tdq); 170346f8b265SJeff Roberson mtx_unlock_spin(&sched_lock); 170446f8b265SJeff Roberson } 170522bf7d9aSJeff Roberson #endif 17063f741ca1SJeff Roberson if ((curthread->td_flags & TDF_IDLETD) != 0) { 1707ad1e7d28SJulian Elischer if (tdq->ksq_load > 0) 17083f741ca1SJeff Roberson goto out; 17093f741ca1SJeff Roberson } else 1710ad1e7d28SJulian Elischer if (tdq->ksq_load - 1 > 0) 1711b90816f1SJeff Roberson goto out; 1712b90816f1SJeff Roberson load = 0; 1713b90816f1SJeff Roberson out: 1714b90816f1SJeff Roberson return (load); 171535e6168fSJeff Roberson } 171635e6168fSJeff Roberson 1717ad1e7d28SJulian Elischer struct td_sched * 1718c9f25d8fSJeff Roberson sched_choose(void) 1719c9f25d8fSJeff Roberson { 1720ad1e7d28SJulian Elischer struct tdq *tdq; 1721ad1e7d28SJulian Elischer struct td_sched *ts; 172215dc847eSJeff Roberson 1723b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1724ad1e7d28SJulian Elischer tdq = TDQ_SELF(); 172515dc847eSJeff Roberson #ifdef SMP 172680f86c9fSJeff Roberson restart: 1727ad1e7d28SJulian Elischer if (tdq->ksq_assigned) 1728ad1e7d28SJulian Elischer tdq_assign(tdq); 172915dc847eSJeff Roberson #endif 1730ad1e7d28SJulian Elischer ts = tdq_choose(tdq); 1731ad1e7d28SJulian Elischer if (ts) { 173222bf7d9aSJeff Roberson #ifdef SMP 1733ad1e7d28SJulian Elischer if (ts->ts_thread->td_pri_class == PRI_IDLE) 1734ad1e7d28SJulian Elischer if (tdq_idled(tdq) == 0) 173580f86c9fSJeff Roberson goto restart; 173622bf7d9aSJeff Roberson #endif 1737ad1e7d28SJulian Elischer tdq_runq_rem(tdq, ts); 1738ad1e7d28SJulian Elischer ts->ts_state = TSS_THREAD; 1739ad1e7d28SJulian Elischer ts->ts_flags &= ~TSF_PREEMPTED; 1740ad1e7d28SJulian Elischer return (ts); 174135e6168fSJeff Roberson } 1742c9f25d8fSJeff Roberson #ifdef SMP 1743ad1e7d28SJulian Elischer if (tdq_idled(tdq) == 0) 174480f86c9fSJeff Roberson goto restart; 1745c9f25d8fSJeff Roberson #endif 174615dc847eSJeff Roberson return (NULL); 174735e6168fSJeff Roberson } 174835e6168fSJeff Roberson 174935e6168fSJeff Roberson void 17502630e4c9SJulian Elischer sched_add(struct thread *td, int flags) 175135e6168fSJeff Roberson { 1752ad1e7d28SJulian Elischer struct tdq *tdq; 1753ad1e7d28SJulian Elischer struct td_sched *ts; 1754598b368dSJeff Roberson int preemptive; 17552454aaf5SJeff Roberson int canmigrate; 175622bf7d9aSJeff Roberson int class; 1757c9f25d8fSJeff Roberson 175881d47d3fSJeff Roberson CTR5(KTR_SCHED, "sched_add: %p(%s) prio %d by %p(%s)", 175981d47d3fSJeff Roberson td, td->td_proc->p_comm, td->td_priority, curthread, 176081d47d3fSJeff Roberson curthread->td_proc->p_comm); 176122bf7d9aSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1762ad1e7d28SJulian Elischer ts = td->td_sched; 1763598b368dSJeff Roberson canmigrate = 1; 1764598b368dSJeff Roberson preemptive = !(flags & SRQ_YIELDING); 17658460a577SJohn Birrell class = PRI_BASE(td->td_pri_class); 1766ad1e7d28SJulian Elischer tdq = TDQ_SELF(); 1767ad1e7d28SJulian Elischer ts->ts_flags &= ~TSF_INTERNAL; 1768598b368dSJeff Roberson #ifdef SMP 1769ad1e7d28SJulian Elischer if (ts->ts_flags & TSF_ASSIGNED) { 1770ad1e7d28SJulian Elischer if (ts->ts_flags & TSF_REMOVED) 1771ad1e7d28SJulian Elischer ts->ts_flags &= ~TSF_REMOVED; 177222bf7d9aSJeff Roberson return; 17732d59a44dSJeff Roberson } 1774ad1e7d28SJulian Elischer canmigrate = THREAD_CAN_MIGRATE(ts); 1775f8ec133eSDavid Xu /* 1776f8ec133eSDavid Xu * Don't migrate running threads here. Force the long term balancer 1777f8ec133eSDavid Xu * to do it. 1778f8ec133eSDavid Xu */ 1779ad1e7d28SJulian Elischer if (ts->ts_flags & TSF_HOLD) { 1780ad1e7d28SJulian Elischer ts->ts_flags &= ~TSF_HOLD; 1781f8ec133eSDavid Xu canmigrate = 0; 1782f8ec133eSDavid Xu } 1783598b368dSJeff Roberson #endif 1784ad1e7d28SJulian Elischer KASSERT(ts->ts_state != TSS_ONRUNQ, 1785ad1e7d28SJulian Elischer ("sched_add: thread %p (%s) already in run queue", td, 17868460a577SJohn Birrell td->td_proc->p_comm)); 17878460a577SJohn Birrell KASSERT(td->td_proc->p_sflag & PS_INMEM, 17885d7ef00cSJeff Roberson ("sched_add: process swapped out")); 1789ad1e7d28SJulian Elischer KASSERT(ts->ts_runq == NULL, 1790ad1e7d28SJulian Elischer ("sched_add: thread %p is still assigned to a run queue", td)); 17911278181cSDavid Xu if (flags & SRQ_PREEMPTED) 1792ad1e7d28SJulian Elischer ts->ts_flags |= TSF_PREEMPTED; 179322bf7d9aSJeff Roberson switch (class) { 1794a8949de2SJeff Roberson case PRI_ITHD: 1795a8949de2SJeff Roberson case PRI_REALTIME: 1796ad1e7d28SJulian Elischer ts->ts_runq = tdq->ksq_curr; 1797ad1e7d28SJulian Elischer ts->ts_slice = SCHED_SLICE_MAX; 1798598b368dSJeff Roberson if (canmigrate) 1799ad1e7d28SJulian Elischer ts->ts_cpu = PCPU_GET(cpuid); 1800a8949de2SJeff Roberson break; 1801a8949de2SJeff Roberson case PRI_TIMESHARE: 1802ad1e7d28SJulian Elischer if (SCHED_CURR(td, ts)) 1803ad1e7d28SJulian Elischer ts->ts_runq = tdq->ksq_curr; 180415dc847eSJeff Roberson else 1805ad1e7d28SJulian Elischer ts->ts_runq = tdq->ksq_next; 180615dc847eSJeff Roberson break; 180715dc847eSJeff Roberson case PRI_IDLE: 180815dc847eSJeff Roberson /* 180915dc847eSJeff Roberson * This is for priority prop. 181015dc847eSJeff Roberson */ 1811ad1e7d28SJulian Elischer if (ts->ts_thread->td_priority < PRI_MIN_IDLE) 1812ad1e7d28SJulian Elischer ts->ts_runq = tdq->ksq_curr; 181315dc847eSJeff Roberson else 1814ad1e7d28SJulian Elischer ts->ts_runq = &tdq->ksq_idle; 1815ad1e7d28SJulian Elischer ts->ts_slice = SCHED_SLICE_MIN; 181615dc847eSJeff Roberson break; 181715dc847eSJeff Roberson default: 1818d322132cSJeff Roberson panic("Unknown pri class."); 1819a8949de2SJeff Roberson break; 1820a6ed4186SJeff Roberson } 182122bf7d9aSJeff Roberson #ifdef SMP 18222454aaf5SJeff Roberson /* 18232454aaf5SJeff Roberson * If this thread is pinned or bound, notify the target cpu. 18242454aaf5SJeff Roberson */ 1825ad1e7d28SJulian Elischer if (!canmigrate && ts->ts_cpu != PCPU_GET(cpuid) ) { 1826ad1e7d28SJulian Elischer ts->ts_runq = NULL; 1827ad1e7d28SJulian Elischer tdq_notify(ts, ts->ts_cpu); 182880f86c9fSJeff Roberson return; 182980f86c9fSJeff Roberson } 183022bf7d9aSJeff Roberson /* 1831670c524fSJeff Roberson * If we had been idle, clear our bit in the group and potentially 1832670c524fSJeff Roberson * the global bitmap. If not, see if we should transfer this thread. 183322bf7d9aSJeff Roberson */ 183480f86c9fSJeff Roberson if ((class == PRI_TIMESHARE || class == PRI_REALTIME) && 1835ad1e7d28SJulian Elischer (tdq->ksq_group->ksg_idlemask & PCPU_GET(cpumask)) != 0) { 183680f86c9fSJeff Roberson /* 183780f86c9fSJeff Roberson * Check to see if our group is unidling, and if so, remove it 183880f86c9fSJeff Roberson * from the global idle mask. 183980f86c9fSJeff Roberson */ 1840ad1e7d28SJulian Elischer if (tdq->ksq_group->ksg_idlemask == 1841ad1e7d28SJulian Elischer tdq->ksq_group->ksg_cpumask) 1842ad1e7d28SJulian Elischer atomic_clear_int(&tdq_idle, tdq->ksq_group->ksg_mask); 184380f86c9fSJeff Roberson /* 184480f86c9fSJeff Roberson * Now remove ourselves from the group specific idle mask. 184580f86c9fSJeff Roberson */ 1846ad1e7d28SJulian Elischer tdq->ksq_group->ksg_idlemask &= ~PCPU_GET(cpumask); 1847ad1e7d28SJulian Elischer } else if (canmigrate && tdq->ksq_load > 1 && class != PRI_ITHD) 1848ad1e7d28SJulian Elischer if (tdq_transfer(tdq, ts, class)) 1849670c524fSJeff Roberson return; 1850ad1e7d28SJulian Elischer ts->ts_cpu = PCPU_GET(cpuid); 185122bf7d9aSJeff Roberson #endif 1852f2b74cbfSJeff Roberson if (td->td_priority < curthread->td_priority && 1853ad1e7d28SJulian Elischer ts->ts_runq == tdq->ksq_curr) 185422bf7d9aSJeff Roberson curthread->td_flags |= TDF_NEEDRESCHED; 185563fcce68SJohn Baldwin if (preemptive && maybe_preempt(td)) 18560c0b25aeSJohn Baldwin return; 1857ad1e7d28SJulian Elischer ts->ts_state = TSS_ONRUNQ; 185835e6168fSJeff Roberson 1859ad1e7d28SJulian Elischer tdq_runq_add(tdq, ts, flags); 1860ad1e7d28SJulian Elischer tdq_load_add(tdq, ts); 186135e6168fSJeff Roberson } 186235e6168fSJeff Roberson 186335e6168fSJeff Roberson void 18647cf90fb3SJeff Roberson sched_rem(struct thread *td) 186535e6168fSJeff Roberson { 1866ad1e7d28SJulian Elischer struct tdq *tdq; 1867ad1e7d28SJulian Elischer struct td_sched *ts; 18687cf90fb3SJeff Roberson 186981d47d3fSJeff Roberson CTR5(KTR_SCHED, "sched_rem: %p(%s) prio %d by %p(%s)", 187081d47d3fSJeff Roberson td, td->td_proc->p_comm, td->td_priority, curthread, 187181d47d3fSJeff Roberson curthread->td_proc->p_comm); 1872598b368dSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1873ad1e7d28SJulian Elischer ts = td->td_sched; 1874ad1e7d28SJulian Elischer ts->ts_flags &= ~TSF_PREEMPTED; 1875ad1e7d28SJulian Elischer if (ts->ts_flags & TSF_ASSIGNED) { 1876ad1e7d28SJulian Elischer ts->ts_flags |= TSF_REMOVED; 187722bf7d9aSJeff Roberson return; 18782d59a44dSJeff Roberson } 1879ad1e7d28SJulian Elischer KASSERT((ts->ts_state == TSS_ONRUNQ), 1880ad1e7d28SJulian Elischer ("sched_rem: thread not on run queue")); 188135e6168fSJeff Roberson 1882ad1e7d28SJulian Elischer ts->ts_state = TSS_THREAD; 1883ad1e7d28SJulian Elischer tdq = TDQ_CPU(ts->ts_cpu); 1884ad1e7d28SJulian Elischer tdq_runq_rem(tdq, ts); 1885ad1e7d28SJulian Elischer tdq_load_rem(tdq, ts); 188635e6168fSJeff Roberson } 188735e6168fSJeff Roberson 188835e6168fSJeff Roberson fixpt_t 18897cf90fb3SJeff Roberson sched_pctcpu(struct thread *td) 189035e6168fSJeff Roberson { 189135e6168fSJeff Roberson fixpt_t pctcpu; 1892ad1e7d28SJulian Elischer struct td_sched *ts; 189335e6168fSJeff Roberson 189435e6168fSJeff Roberson pctcpu = 0; 1895ad1e7d28SJulian Elischer ts = td->td_sched; 1896ad1e7d28SJulian Elischer if (ts == NULL) 1897484288deSJeff Roberson return (0); 189835e6168fSJeff Roberson 1899b90816f1SJeff Roberson mtx_lock_spin(&sched_lock); 1900ad1e7d28SJulian Elischer if (ts->ts_ticks) { 190135e6168fSJeff Roberson int rtick; 190235e6168fSJeff Roberson 1903210491d3SJeff Roberson /* 1904210491d3SJeff Roberson * Don't update more frequently than twice a second. Allowing 1905210491d3SJeff Roberson * this causes the cpu usage to decay away too quickly due to 1906210491d3SJeff Roberson * rounding errors. 1907210491d3SJeff Roberson */ 1908ad1e7d28SJulian Elischer if (ts->ts_ftick + SCHED_CPU_TICKS < ts->ts_ltick || 1909ad1e7d28SJulian Elischer ts->ts_ltick < (ticks - (hz / 2))) 1910ad1e7d28SJulian Elischer sched_pctcpu_update(ts); 191135e6168fSJeff Roberson /* How many rtick per second ? */ 1912ad1e7d28SJulian Elischer rtick = min(ts->ts_ticks / SCHED_CPU_TIME, SCHED_CPU_TICKS); 19137121cce5SScott Long pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT; 191435e6168fSJeff Roberson } 191535e6168fSJeff Roberson 1916ad1e7d28SJulian Elischer td->td_proc->p_swtime = ts->ts_ltick - ts->ts_ftick; 1917828e7683SJohn Baldwin mtx_unlock_spin(&sched_lock); 191835e6168fSJeff Roberson 191935e6168fSJeff Roberson return (pctcpu); 192035e6168fSJeff Roberson } 192135e6168fSJeff Roberson 19229bacd788SJeff Roberson void 19239bacd788SJeff Roberson sched_bind(struct thread *td, int cpu) 19249bacd788SJeff Roberson { 1925ad1e7d28SJulian Elischer struct td_sched *ts; 19269bacd788SJeff Roberson 19279bacd788SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1928ad1e7d28SJulian Elischer ts = td->td_sched; 1929ad1e7d28SJulian Elischer ts->ts_flags |= TSF_BOUND; 193080f86c9fSJeff Roberson #ifdef SMP 193180f86c9fSJeff Roberson if (PCPU_GET(cpuid) == cpu) 19329bacd788SJeff Roberson return; 19339bacd788SJeff Roberson /* sched_rem without the runq_remove */ 1934ad1e7d28SJulian Elischer ts->ts_state = TSS_THREAD; 1935ad1e7d28SJulian Elischer tdq_load_rem(TDQ_CPU(ts->ts_cpu), ts); 1936ad1e7d28SJulian Elischer tdq_notify(ts, cpu); 19379bacd788SJeff Roberson /* When we return from mi_switch we'll be on the correct cpu. */ 1938279f949eSPoul-Henning Kamp mi_switch(SW_VOL, NULL); 19399bacd788SJeff Roberson #endif 19409bacd788SJeff Roberson } 19419bacd788SJeff Roberson 19429bacd788SJeff Roberson void 19439bacd788SJeff Roberson sched_unbind(struct thread *td) 19449bacd788SJeff Roberson { 19459bacd788SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1946ad1e7d28SJulian Elischer td->td_sched->ts_flags &= ~TSF_BOUND; 19479bacd788SJeff Roberson } 19489bacd788SJeff Roberson 194935e6168fSJeff Roberson int 1950ebccf1e3SJoseph Koshy sched_is_bound(struct thread *td) 1951ebccf1e3SJoseph Koshy { 1952ebccf1e3SJoseph Koshy mtx_assert(&sched_lock, MA_OWNED); 1953ad1e7d28SJulian Elischer return (td->td_sched->ts_flags & TSF_BOUND); 1954ebccf1e3SJoseph Koshy } 1955ebccf1e3SJoseph Koshy 195636ec198bSDavid Xu void 195736ec198bSDavid Xu sched_relinquish(struct thread *td) 195836ec198bSDavid Xu { 195936ec198bSDavid Xu mtx_lock_spin(&sched_lock); 19608460a577SJohn Birrell if (td->td_pri_class == PRI_TIMESHARE) 196136ec198bSDavid Xu sched_prio(td, PRI_MAX_TIMESHARE); 196236ec198bSDavid Xu mi_switch(SW_VOL, NULL); 196336ec198bSDavid Xu mtx_unlock_spin(&sched_lock); 196436ec198bSDavid Xu } 196536ec198bSDavid Xu 1966ebccf1e3SJoseph Koshy int 196733916c36SJeff Roberson sched_load(void) 196833916c36SJeff Roberson { 196933916c36SJeff Roberson #ifdef SMP 197033916c36SJeff Roberson int total; 197133916c36SJeff Roberson int i; 197233916c36SJeff Roberson 197333916c36SJeff Roberson total = 0; 197433916c36SJeff Roberson for (i = 0; i <= ksg_maxid; i++) 1975ad1e7d28SJulian Elischer total += TDQ_GROUP(i)->ksg_load; 197633916c36SJeff Roberson return (total); 197733916c36SJeff Roberson #else 1978ad1e7d28SJulian Elischer return (TDQ_SELF()->ksq_sysload); 197933916c36SJeff Roberson #endif 198033916c36SJeff Roberson } 198133916c36SJeff Roberson 198233916c36SJeff Roberson int 198335e6168fSJeff Roberson sched_sizeof_proc(void) 198435e6168fSJeff Roberson { 198535e6168fSJeff Roberson return (sizeof(struct proc)); 198635e6168fSJeff Roberson } 198735e6168fSJeff Roberson 198835e6168fSJeff Roberson int 198935e6168fSJeff Roberson sched_sizeof_thread(void) 199035e6168fSJeff Roberson { 199135e6168fSJeff Roberson return (sizeof(struct thread) + sizeof(struct td_sched)); 199235e6168fSJeff Roberson } 1993b41f1452SDavid Xu 1994b41f1452SDavid Xu void 1995b41f1452SDavid Xu sched_tick(void) 1996b41f1452SDavid Xu { 1997b41f1452SDavid Xu } 1998ed062c8dSJulian Elischer #define KERN_SWITCH_INCLUDE 1 1999ed062c8dSJulian Elischer #include "kern/kern_switch.c" 2000