135e6168fSJeff Roberson /*- 2d2ad694cSJeff Roberson * Copyright (c) 2002-2006, Jeffrey Roberson <jeff@freebsd.org> 335e6168fSJeff Roberson * All rights reserved. 435e6168fSJeff Roberson * 535e6168fSJeff Roberson * Redistribution and use in source and binary forms, with or without 635e6168fSJeff Roberson * modification, are permitted provided that the following conditions 735e6168fSJeff Roberson * are met: 835e6168fSJeff Roberson * 1. Redistributions of source code must retain the above copyright 935e6168fSJeff Roberson * notice unmodified, this list of conditions, and the following 1035e6168fSJeff Roberson * disclaimer. 1135e6168fSJeff Roberson * 2. Redistributions in binary form must reproduce the above copyright 1235e6168fSJeff Roberson * notice, this list of conditions and the following disclaimer in the 1335e6168fSJeff Roberson * documentation and/or other materials provided with the distribution. 1435e6168fSJeff Roberson * 1535e6168fSJeff Roberson * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 1635e6168fSJeff Roberson * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1735e6168fSJeff Roberson * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 1835e6168fSJeff Roberson * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 1935e6168fSJeff Roberson * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2035e6168fSJeff Roberson * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2135e6168fSJeff Roberson * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2235e6168fSJeff Roberson * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2335e6168fSJeff Roberson * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2435e6168fSJeff Roberson * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2535e6168fSJeff Roberson */ 2635e6168fSJeff Roberson 27677b542eSDavid E. O'Brien #include <sys/cdefs.h> 28677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 29677b542eSDavid E. O'Brien 304da0d332SPeter Wemm #include "opt_hwpmc_hooks.h" 314da0d332SPeter Wemm #include "opt_sched.h" 329923b511SScott Long 3335e6168fSJeff Roberson #include <sys/param.h> 3435e6168fSJeff Roberson #include <sys/systm.h> 352c3490b1SMarcel Moolenaar #include <sys/kdb.h> 3635e6168fSJeff Roberson #include <sys/kernel.h> 3735e6168fSJeff Roberson #include <sys/ktr.h> 3835e6168fSJeff Roberson #include <sys/lock.h> 3935e6168fSJeff Roberson #include <sys/mutex.h> 4035e6168fSJeff Roberson #include <sys/proc.h> 41245f3abfSJeff Roberson #include <sys/resource.h> 429bacd788SJeff Roberson #include <sys/resourcevar.h> 4335e6168fSJeff Roberson #include <sys/sched.h> 4435e6168fSJeff Roberson #include <sys/smp.h> 4535e6168fSJeff Roberson #include <sys/sx.h> 4635e6168fSJeff Roberson #include <sys/sysctl.h> 4735e6168fSJeff Roberson #include <sys/sysproto.h> 48f5c157d9SJohn Baldwin #include <sys/turnstile.h> 493db720fdSDavid Xu #include <sys/umtx.h> 5035e6168fSJeff Roberson #include <sys/vmmeter.h> 5135e6168fSJeff Roberson #ifdef KTRACE 5235e6168fSJeff Roberson #include <sys/uio.h> 5335e6168fSJeff Roberson #include <sys/ktrace.h> 5435e6168fSJeff Roberson #endif 5535e6168fSJeff Roberson 56ebccf1e3SJoseph Koshy #ifdef HWPMC_HOOKS 57ebccf1e3SJoseph Koshy #include <sys/pmckern.h> 58ebccf1e3SJoseph Koshy #endif 59ebccf1e3SJoseph Koshy 6035e6168fSJeff Roberson #include <machine/cpu.h> 6122bf7d9aSJeff Roberson #include <machine/smp.h> 6235e6168fSJeff Roberson 6335e6168fSJeff Roberson /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 6435e6168fSJeff Roberson /* XXX This is bogus compatability crap for ps */ 6535e6168fSJeff Roberson static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 6635e6168fSJeff Roberson SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 6735e6168fSJeff Roberson 6835e6168fSJeff Roberson static void sched_setup(void *dummy); 6935e6168fSJeff Roberson SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 7035e6168fSJeff Roberson 71a1d4fe69SDavid Xu static void sched_initticks(void *dummy); 72a1d4fe69SDavid Xu SYSINIT(sched_initticks, SI_SUB_CLOCKS, SI_ORDER_THIRD, sched_initticks, NULL) 73a1d4fe69SDavid Xu 74e038d354SScott Long static SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0, "Scheduler"); 75e1f89c22SJeff Roberson 76e038d354SScott Long SYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "ule", 0, 77e038d354SScott Long "Scheduler name"); 78dc095794SScott Long 7915dc847eSJeff Roberson static int slice_min = 1; 8015dc847eSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, slice_min, CTLFLAG_RW, &slice_min, 0, ""); 8115dc847eSJeff Roberson 82210491d3SJeff Roberson static int slice_max = 10; 8315dc847eSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, slice_max, CTLFLAG_RW, &slice_max, 0, ""); 8415dc847eSJeff Roberson 8515dc847eSJeff Roberson int realstathz; 86a1d4fe69SDavid Xu int tickincr = 1 << 10; 87783caefbSJeff Roberson 8835e6168fSJeff Roberson /* 89ad1e7d28SJulian Elischer * Thread scheduler specific section. 90ed062c8dSJulian Elischer */ 91ad1e7d28SJulian Elischer struct td_sched { 92ad1e7d28SJulian Elischer TAILQ_ENTRY(td_sched) ts_procq; /* (j/z) Run queue. */ 93ad1e7d28SJulian Elischer int ts_flags; /* (j) TSF_* flags. */ 94ad1e7d28SJulian Elischer struct thread *ts_thread; /* (*) Active associated thread. */ 95ad1e7d28SJulian Elischer fixpt_t ts_pctcpu; /* (j) %cpu during p_swtime. */ 96ad1e7d28SJulian Elischer u_char ts_rqindex; /* (j) Run queue index. */ 97ed062c8dSJulian Elischer enum { 98ad1e7d28SJulian Elischer TSS_THREAD = 0x0, /* slaved to thread state */ 99ad1e7d28SJulian Elischer TSS_ONRUNQ 100ad1e7d28SJulian Elischer } ts_state; /* (j) thread sched specific status. */ 101ad1e7d28SJulian Elischer int ts_slptime; 102ad1e7d28SJulian Elischer int ts_slice; 103ad1e7d28SJulian Elischer struct runq *ts_runq; 104ad1e7d28SJulian Elischer u_char ts_cpu; /* CPU that we have affinity for. */ 105ed062c8dSJulian Elischer /* The following variables are only used for pctcpu calculation */ 106ad1e7d28SJulian Elischer int ts_ltick; /* Last tick that we were running on */ 107ad1e7d28SJulian Elischer int ts_ftick; /* First tick that we were running on */ 108ad1e7d28SJulian Elischer int ts_ticks; /* Tick count */ 109ed062c8dSJulian Elischer 1108460a577SJohn Birrell /* originally from kg_sched */ 1118460a577SJohn Birrell int skg_slptime; /* Number of ticks we vol. slept */ 1128460a577SJohn Birrell int skg_runtime; /* Number of ticks we were running */ 113ed062c8dSJulian Elischer }; 114ad1e7d28SJulian Elischer #define ts_assign ts_procq.tqe_next 115ad1e7d28SJulian Elischer /* flags kept in ts_flags */ 116ad1e7d28SJulian Elischer #define TSF_ASSIGNED 0x0001 /* Thread is being migrated. */ 117ad1e7d28SJulian Elischer #define TSF_BOUND 0x0002 /* Thread can not migrate. */ 118ad1e7d28SJulian Elischer #define TSF_XFERABLE 0x0004 /* Thread was added as transferable. */ 119ad1e7d28SJulian Elischer #define TSF_HOLD 0x0008 /* Thread is temporarily bound. */ 120ad1e7d28SJulian Elischer #define TSF_REMOVED 0x0010 /* Thread was removed while ASSIGNED */ 121ad1e7d28SJulian Elischer #define TSF_INTERNAL 0x0020 /* Thread added due to migration. */ 122ad1e7d28SJulian Elischer #define TSF_PREEMPTED 0x0040 /* Thread was preempted */ 123d2ad694cSJeff Roberson #define TSF_DIDRUN 0x2000 /* Thread actually ran. */ 124d2ad694cSJeff Roberson #define TSF_EXIT 0x4000 /* Thread is being killed. */ 12535e6168fSJeff Roberson 126ad1e7d28SJulian Elischer static struct td_sched td_sched0; 12735e6168fSJeff Roberson 12835e6168fSJeff Roberson /* 129665cb285SJeff Roberson * The priority is primarily determined by the interactivity score. Thus, we 130d2ad694cSJeff Roberson * give lower(better) priorities to threads that use less CPU. The nice 131665cb285SJeff Roberson * value is then directly added to this to allow nice to have some effect 132665cb285SJeff Roberson * on latency. 133e1f89c22SJeff Roberson * 134e1f89c22SJeff Roberson * PRI_RANGE: Total priority range for timeshare threads. 135665cb285SJeff Roberson * PRI_NRESV: Number of nice values. 136e1f89c22SJeff Roberson * PRI_BASE: The start of the dynamic range. 13735e6168fSJeff Roberson */ 138407b0157SJeff Roberson #define SCHED_PRI_RANGE (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1) 139a0a931ceSJeff Roberson #define SCHED_PRI_NRESV ((PRIO_MAX - PRIO_MIN) + 1) 140a0a931ceSJeff Roberson #define SCHED_PRI_NHALF (SCHED_PRI_NRESV / 2) 141665cb285SJeff Roberson #define SCHED_PRI_BASE (PRI_MIN_TIMESHARE) 14215dc847eSJeff Roberson #define SCHED_PRI_INTERACT(score) \ 143665cb285SJeff Roberson ((score) * SCHED_PRI_RANGE / SCHED_INTERACT_MAX) 14435e6168fSJeff Roberson 14535e6168fSJeff Roberson /* 146e1f89c22SJeff Roberson * These determine the interactivity of a process. 14735e6168fSJeff Roberson * 148407b0157SJeff Roberson * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate 149407b0157SJeff Roberson * before throttling back. 150d322132cSJeff Roberson * SLP_RUN_FORK: Maximum slp+run time to inherit at fork time. 151210491d3SJeff Roberson * INTERACT_MAX: Maximum interactivity value. Smaller is better. 152e1f89c22SJeff Roberson * INTERACT_THRESH: Threshhold for placement on the current runq. 15335e6168fSJeff Roberson */ 1544c9612c6SJeff Roberson #define SCHED_SLP_RUN_MAX ((hz * 5) << 10) 155d322132cSJeff Roberson #define SCHED_SLP_RUN_FORK ((hz / 2) << 10) 156210491d3SJeff Roberson #define SCHED_INTERACT_MAX (100) 157210491d3SJeff Roberson #define SCHED_INTERACT_HALF (SCHED_INTERACT_MAX / 2) 1584c9612c6SJeff Roberson #define SCHED_INTERACT_THRESH (30) 159e1f89c22SJeff Roberson 16035e6168fSJeff Roberson /* 16135e6168fSJeff Roberson * These parameters and macros determine the size of the time slice that is 16235e6168fSJeff Roberson * granted to each thread. 16335e6168fSJeff Roberson * 16435e6168fSJeff Roberson * SLICE_MIN: Minimum time slice granted, in units of ticks. 16535e6168fSJeff Roberson * SLICE_MAX: Maximum time slice granted. 16635e6168fSJeff Roberson * SLICE_RANGE: Range of available time slices scaled by hz. 167245f3abfSJeff Roberson * SLICE_SCALE: The number slices granted per val in the range of [0, max]. 168245f3abfSJeff Roberson * SLICE_NICE: Determine the amount of slice granted to a scaled nice. 1697d1a81b4SJeff Roberson * SLICE_NTHRESH: The nice cutoff point for slice assignment. 17035e6168fSJeff Roberson */ 17115dc847eSJeff Roberson #define SCHED_SLICE_MIN (slice_min) 17215dc847eSJeff Roberson #define SCHED_SLICE_MAX (slice_max) 1730392e39dSJeff Roberson #define SCHED_SLICE_INTERACTIVE (slice_max) 1747d1a81b4SJeff Roberson #define SCHED_SLICE_NTHRESH (SCHED_PRI_NHALF - 1) 17535e6168fSJeff Roberson #define SCHED_SLICE_RANGE (SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1) 17635e6168fSJeff Roberson #define SCHED_SLICE_SCALE(val, max) (((val) * SCHED_SLICE_RANGE) / (max)) 177245f3abfSJeff Roberson #define SCHED_SLICE_NICE(nice) \ 1787d1a81b4SJeff Roberson (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((nice), SCHED_SLICE_NTHRESH)) 17935e6168fSJeff Roberson 18035e6168fSJeff Roberson /* 181ed062c8dSJulian Elischer * This macro determines whether or not the thread belongs on the current or 18235e6168fSJeff Roberson * next run queue. 18335e6168fSJeff Roberson */ 1848460a577SJohn Birrell #define SCHED_INTERACTIVE(td) \ 1858460a577SJohn Birrell (sched_interact_score(td) < SCHED_INTERACT_THRESH) 186ad1e7d28SJulian Elischer #define SCHED_CURR(td, ts) \ 187ad1e7d28SJulian Elischer ((ts->ts_thread->td_flags & TDF_BORROWING) || \ 188ad1e7d28SJulian Elischer (ts->ts_flags & TSF_PREEMPTED) || SCHED_INTERACTIVE(td)) 18935e6168fSJeff Roberson 19035e6168fSJeff Roberson /* 19135e6168fSJeff Roberson * Cpu percentage computation macros and defines. 19235e6168fSJeff Roberson * 19335e6168fSJeff Roberson * SCHED_CPU_TIME: Number of seconds to average the cpu usage across. 19435e6168fSJeff Roberson * SCHED_CPU_TICKS: Number of hz ticks to average the cpu usage across. 19535e6168fSJeff Roberson */ 19635e6168fSJeff Roberson 1975053d272SJeff Roberson #define SCHED_CPU_TIME 10 19835e6168fSJeff Roberson #define SCHED_CPU_TICKS (hz * SCHED_CPU_TIME) 19935e6168fSJeff Roberson 20035e6168fSJeff Roberson /* 201ad1e7d28SJulian Elischer * tdq - per processor runqs and statistics. 20235e6168fSJeff Roberson */ 203ad1e7d28SJulian Elischer struct tdq { 204d2ad694cSJeff Roberson struct runq tdq_idle; /* Queue of IDLE threads. */ 205d2ad694cSJeff Roberson struct runq tdq_timeshare[2]; /* Run queues for !IDLE. */ 206d2ad694cSJeff Roberson struct runq *tdq_next; /* Next timeshare queue. */ 207d2ad694cSJeff Roberson struct runq *tdq_curr; /* Current queue. */ 208d2ad694cSJeff Roberson int tdq_load_timeshare; /* Load for timeshare. */ 209d2ad694cSJeff Roberson int tdq_load; /* Aggregate load. */ 210d2ad694cSJeff Roberson short tdq_nice[SCHED_PRI_NRESV]; /* threadss in each nice bin. */ 211d2ad694cSJeff Roberson short tdq_nicemin; /* Least nice. */ 2125d7ef00cSJeff Roberson #ifdef SMP 213d2ad694cSJeff Roberson int tdq_transferable; 214d2ad694cSJeff Roberson LIST_ENTRY(tdq) tdq_siblings; /* Next in tdq group. */ 215d2ad694cSJeff Roberson struct tdq_group *tdq_group; /* Our processor group. */ 216d2ad694cSJeff Roberson volatile struct td_sched *tdq_assigned; /* assigned by another CPU. */ 21733916c36SJeff Roberson #else 218d2ad694cSJeff Roberson int tdq_sysload; /* For loadavg, !ITHD load. */ 2195d7ef00cSJeff Roberson #endif 22035e6168fSJeff Roberson }; 22135e6168fSJeff Roberson 22280f86c9fSJeff Roberson #ifdef SMP 22380f86c9fSJeff Roberson /* 224ad1e7d28SJulian Elischer * tdq groups are groups of processors which can cheaply share threads. When 22580f86c9fSJeff Roberson * one processor in the group goes idle it will check the runqs of the other 22680f86c9fSJeff Roberson * processors in its group prior to halting and waiting for an interrupt. 22780f86c9fSJeff Roberson * These groups are suitable for SMT (Symetric Multi-Threading) and not NUMA. 22880f86c9fSJeff Roberson * In a numa environment we'd want an idle bitmap per group and a two tiered 22980f86c9fSJeff Roberson * load balancer. 23080f86c9fSJeff Roberson */ 231ad1e7d28SJulian Elischer struct tdq_group { 232d2ad694cSJeff Roberson int tdg_cpus; /* Count of CPUs in this tdq group. */ 233d2ad694cSJeff Roberson cpumask_t tdg_cpumask; /* Mask of cpus in this group. */ 234d2ad694cSJeff Roberson cpumask_t tdg_idlemask; /* Idle cpus in this group. */ 235d2ad694cSJeff Roberson cpumask_t tdg_mask; /* Bit mask for first cpu. */ 236d2ad694cSJeff Roberson int tdg_load; /* Total load of this group. */ 237d2ad694cSJeff Roberson int tdg_transferable; /* Transferable load of this group. */ 238d2ad694cSJeff Roberson LIST_HEAD(, tdq) tdg_members; /* Linked list of all members. */ 23980f86c9fSJeff Roberson }; 24080f86c9fSJeff Roberson #endif 24180f86c9fSJeff Roberson 24235e6168fSJeff Roberson /* 243d2ad694cSJeff Roberson * One thread queue per processor. 24435e6168fSJeff Roberson */ 2450a016a05SJeff Roberson #ifdef SMP 246ad1e7d28SJulian Elischer static cpumask_t tdq_idle; 247d2ad694cSJeff Roberson static int tdg_maxid; 248ad1e7d28SJulian Elischer static struct tdq tdq_cpu[MAXCPU]; 249ad1e7d28SJulian Elischer static struct tdq_group tdq_groups[MAXCPU]; 250dc03363dSJeff Roberson static int bal_tick; 251dc03363dSJeff Roberson static int gbal_tick; 252598b368dSJeff Roberson static int balance_groups; 253dc03363dSJeff Roberson 254ad1e7d28SJulian Elischer #define TDQ_SELF() (&tdq_cpu[PCPU_GET(cpuid)]) 255ad1e7d28SJulian Elischer #define TDQ_CPU(x) (&tdq_cpu[(x)]) 256ad1e7d28SJulian Elischer #define TDQ_ID(x) ((x) - tdq_cpu) 257ad1e7d28SJulian Elischer #define TDQ_GROUP(x) (&tdq_groups[(x)]) 25880f86c9fSJeff Roberson #else /* !SMP */ 259ad1e7d28SJulian Elischer static struct tdq tdq_cpu; 260dc03363dSJeff Roberson 261ad1e7d28SJulian Elischer #define TDQ_SELF() (&tdq_cpu) 262ad1e7d28SJulian Elischer #define TDQ_CPU(x) (&tdq_cpu) 2630a016a05SJeff Roberson #endif 26435e6168fSJeff Roberson 265ad1e7d28SJulian Elischer static struct td_sched *sched_choose(void); /* XXX Should be thread * */ 266ad1e7d28SJulian Elischer static void sched_slice(struct td_sched *); 2678460a577SJohn Birrell static void sched_priority(struct thread *); 26821381d1bSJeff Roberson static void sched_thread_priority(struct thread *, u_char); 2698460a577SJohn Birrell static int sched_interact_score(struct thread *); 2708460a577SJohn Birrell static void sched_interact_update(struct thread *); 2718460a577SJohn Birrell static void sched_interact_fork(struct thread *); 272ad1e7d28SJulian Elischer static void sched_pctcpu_update(struct td_sched *); 27335e6168fSJeff Roberson 2745d7ef00cSJeff Roberson /* Operations on per processor queues */ 275ad1e7d28SJulian Elischer static struct td_sched * tdq_choose(struct tdq *); 276ad1e7d28SJulian Elischer static void tdq_setup(struct tdq *); 277ad1e7d28SJulian Elischer static void tdq_load_add(struct tdq *, struct td_sched *); 278ad1e7d28SJulian Elischer static void tdq_load_rem(struct tdq *, struct td_sched *); 279ad1e7d28SJulian Elischer static __inline void tdq_runq_add(struct tdq *, struct td_sched *, int); 280ad1e7d28SJulian Elischer static __inline void tdq_runq_rem(struct tdq *, struct td_sched *); 281ad1e7d28SJulian Elischer static void tdq_nice_add(struct tdq *, int); 282ad1e7d28SJulian Elischer static void tdq_nice_rem(struct tdq *, int); 283ad1e7d28SJulian Elischer void tdq_print(int cpu); 2845d7ef00cSJeff Roberson #ifdef SMP 285ad1e7d28SJulian Elischer static int tdq_transfer(struct tdq *, struct td_sched *, int); 286ad1e7d28SJulian Elischer static struct td_sched *runq_steal(struct runq *); 287dc03363dSJeff Roberson static void sched_balance(void); 288dc03363dSJeff Roberson static void sched_balance_groups(void); 289ad1e7d28SJulian Elischer static void sched_balance_group(struct tdq_group *); 290ad1e7d28SJulian Elischer static void sched_balance_pair(struct tdq *, struct tdq *); 291ad1e7d28SJulian Elischer static void tdq_move(struct tdq *, int); 292ad1e7d28SJulian Elischer static int tdq_idled(struct tdq *); 293ad1e7d28SJulian Elischer static void tdq_notify(struct td_sched *, int); 294ad1e7d28SJulian Elischer static void tdq_assign(struct tdq *); 295ad1e7d28SJulian Elischer static struct td_sched *tdq_steal(struct tdq *, int); 296ad1e7d28SJulian Elischer #define THREAD_CAN_MIGRATE(ts) \ 297ad1e7d28SJulian Elischer ((ts)->ts_thread->td_pinned == 0 && ((ts)->ts_flags & TSF_BOUND) == 0) 2985d7ef00cSJeff Roberson #endif 2995d7ef00cSJeff Roberson 30015dc847eSJeff Roberson void 301ad1e7d28SJulian Elischer tdq_print(int cpu) 30215dc847eSJeff Roberson { 303ad1e7d28SJulian Elischer struct tdq *tdq; 30415dc847eSJeff Roberson int i; 30515dc847eSJeff Roberson 306ad1e7d28SJulian Elischer tdq = TDQ_CPU(cpu); 30715dc847eSJeff Roberson 308ad1e7d28SJulian Elischer printf("tdq:\n"); 309d2ad694cSJeff Roberson printf("\tload: %d\n", tdq->tdq_load); 310d2ad694cSJeff Roberson printf("\tload TIMESHARE: %d\n", tdq->tdq_load_timeshare); 311ef1134c9SJeff Roberson #ifdef SMP 312d2ad694cSJeff Roberson printf("\tload transferable: %d\n", tdq->tdq_transferable); 313ef1134c9SJeff Roberson #endif 314d2ad694cSJeff Roberson printf("\tnicemin:\t%d\n", tdq->tdq_nicemin); 31515dc847eSJeff Roberson printf("\tnice counts:\n"); 316a0a931ceSJeff Roberson for (i = 0; i < SCHED_PRI_NRESV; i++) 317d2ad694cSJeff Roberson if (tdq->tdq_nice[i]) 31815dc847eSJeff Roberson printf("\t\t%d = %d\n", 319d2ad694cSJeff Roberson i - SCHED_PRI_NHALF, tdq->tdq_nice[i]); 32015dc847eSJeff Roberson } 32115dc847eSJeff Roberson 322155b9987SJeff Roberson static __inline void 323ad1e7d28SJulian Elischer tdq_runq_add(struct tdq *tdq, struct td_sched *ts, int flags) 324155b9987SJeff Roberson { 325155b9987SJeff Roberson #ifdef SMP 326ad1e7d28SJulian Elischer if (THREAD_CAN_MIGRATE(ts)) { 327d2ad694cSJeff Roberson tdq->tdq_transferable++; 328d2ad694cSJeff Roberson tdq->tdq_group->tdg_transferable++; 329ad1e7d28SJulian Elischer ts->ts_flags |= TSF_XFERABLE; 33080f86c9fSJeff Roberson } 331155b9987SJeff Roberson #endif 332ad1e7d28SJulian Elischer if (ts->ts_flags & TSF_PREEMPTED) 3331278181cSDavid Xu flags |= SRQ_PREEMPTED; 334ad1e7d28SJulian Elischer runq_add(ts->ts_runq, ts, flags); 335155b9987SJeff Roberson } 336155b9987SJeff Roberson 337155b9987SJeff Roberson static __inline void 338ad1e7d28SJulian Elischer tdq_runq_rem(struct tdq *tdq, struct td_sched *ts) 339155b9987SJeff Roberson { 340155b9987SJeff Roberson #ifdef SMP 341ad1e7d28SJulian Elischer if (ts->ts_flags & TSF_XFERABLE) { 342d2ad694cSJeff Roberson tdq->tdq_transferable--; 343d2ad694cSJeff Roberson tdq->tdq_group->tdg_transferable--; 344ad1e7d28SJulian Elischer ts->ts_flags &= ~TSF_XFERABLE; 34580f86c9fSJeff Roberson } 346155b9987SJeff Roberson #endif 347ad1e7d28SJulian Elischer runq_remove(ts->ts_runq, ts); 348155b9987SJeff Roberson } 349155b9987SJeff Roberson 350a8949de2SJeff Roberson static void 351ad1e7d28SJulian Elischer tdq_load_add(struct tdq *tdq, struct td_sched *ts) 3525d7ef00cSJeff Roberson { 353ef1134c9SJeff Roberson int class; 354b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 355ad1e7d28SJulian Elischer class = PRI_BASE(ts->ts_thread->td_pri_class); 356ef1134c9SJeff Roberson if (class == PRI_TIMESHARE) 357d2ad694cSJeff Roberson tdq->tdq_load_timeshare++; 358d2ad694cSJeff Roberson tdq->tdq_load++; 359d2ad694cSJeff Roberson CTR1(KTR_SCHED, "load: %d", tdq->tdq_load); 360ad1e7d28SJulian Elischer if (class != PRI_ITHD && (ts->ts_thread->td_proc->p_flag & P_NOLOAD) == 0) 36133916c36SJeff Roberson #ifdef SMP 362d2ad694cSJeff Roberson tdq->tdq_group->tdg_load++; 36333916c36SJeff Roberson #else 364d2ad694cSJeff Roberson tdq->tdq_sysload++; 365cac77d04SJeff Roberson #endif 366ad1e7d28SJulian Elischer if (ts->ts_thread->td_pri_class == PRI_TIMESHARE) 367ad1e7d28SJulian Elischer tdq_nice_add(tdq, ts->ts_thread->td_proc->p_nice); 3685d7ef00cSJeff Roberson } 36915dc847eSJeff Roberson 370a8949de2SJeff Roberson static void 371ad1e7d28SJulian Elischer tdq_load_rem(struct tdq *tdq, struct td_sched *ts) 3725d7ef00cSJeff Roberson { 373ef1134c9SJeff Roberson int class; 374b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 375ad1e7d28SJulian Elischer class = PRI_BASE(ts->ts_thread->td_pri_class); 376ef1134c9SJeff Roberson if (class == PRI_TIMESHARE) 377d2ad694cSJeff Roberson tdq->tdq_load_timeshare--; 378ad1e7d28SJulian Elischer if (class != PRI_ITHD && (ts->ts_thread->td_proc->p_flag & P_NOLOAD) == 0) 37933916c36SJeff Roberson #ifdef SMP 380d2ad694cSJeff Roberson tdq->tdq_group->tdg_load--; 38133916c36SJeff Roberson #else 382d2ad694cSJeff Roberson tdq->tdq_sysload--; 383cac77d04SJeff Roberson #endif 384d2ad694cSJeff Roberson tdq->tdq_load--; 385d2ad694cSJeff Roberson CTR1(KTR_SCHED, "load: %d", tdq->tdq_load); 386ad1e7d28SJulian Elischer ts->ts_runq = NULL; 387ad1e7d28SJulian Elischer if (ts->ts_thread->td_pri_class == PRI_TIMESHARE) 388ad1e7d28SJulian Elischer tdq_nice_rem(tdq, ts->ts_thread->td_proc->p_nice); 3895d7ef00cSJeff Roberson } 3905d7ef00cSJeff Roberson 39115dc847eSJeff Roberson static void 392ad1e7d28SJulian Elischer tdq_nice_add(struct tdq *tdq, int nice) 39315dc847eSJeff Roberson { 394b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 39515dc847eSJeff Roberson /* Normalize to zero. */ 396d2ad694cSJeff Roberson tdq->tdq_nice[nice + SCHED_PRI_NHALF]++; 397d2ad694cSJeff Roberson if (nice < tdq->tdq_nicemin || tdq->tdq_load_timeshare == 1) 398d2ad694cSJeff Roberson tdq->tdq_nicemin = nice; 39915dc847eSJeff Roberson } 40015dc847eSJeff Roberson 40115dc847eSJeff Roberson static void 402ad1e7d28SJulian Elischer tdq_nice_rem(struct tdq *tdq, int nice) 40315dc847eSJeff Roberson { 40415dc847eSJeff Roberson int n; 40515dc847eSJeff Roberson 406b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 40715dc847eSJeff Roberson /* Normalize to zero. */ 40815dc847eSJeff Roberson n = nice + SCHED_PRI_NHALF; 409d2ad694cSJeff Roberson tdq->tdq_nice[n]--; 410d2ad694cSJeff Roberson KASSERT(tdq->tdq_nice[n] >= 0, ("Negative nice count.")); 41115dc847eSJeff Roberson 41215dc847eSJeff Roberson /* 41315dc847eSJeff Roberson * If this wasn't the smallest nice value or there are more in 41415dc847eSJeff Roberson * this bucket we can just return. Otherwise we have to recalculate 41515dc847eSJeff Roberson * the smallest nice. 41615dc847eSJeff Roberson */ 417d2ad694cSJeff Roberson if (nice != tdq->tdq_nicemin || 418d2ad694cSJeff Roberson tdq->tdq_nice[n] != 0 || 419d2ad694cSJeff Roberson tdq->tdq_load_timeshare == 0) 42015dc847eSJeff Roberson return; 42115dc847eSJeff Roberson 422a0a931ceSJeff Roberson for (; n < SCHED_PRI_NRESV; n++) 423d2ad694cSJeff Roberson if (tdq->tdq_nice[n]) { 424d2ad694cSJeff Roberson tdq->tdq_nicemin = n - SCHED_PRI_NHALF; 42515dc847eSJeff Roberson return; 42615dc847eSJeff Roberson } 42715dc847eSJeff Roberson } 42815dc847eSJeff Roberson 4295d7ef00cSJeff Roberson #ifdef SMP 430356500a3SJeff Roberson /* 431155b9987SJeff Roberson * sched_balance is a simple CPU load balancing algorithm. It operates by 432356500a3SJeff Roberson * finding the least loaded and most loaded cpu and equalizing their load 433356500a3SJeff Roberson * by migrating some processes. 434356500a3SJeff Roberson * 435356500a3SJeff Roberson * Dealing only with two CPUs at a time has two advantages. Firstly, most 436356500a3SJeff Roberson * installations will only have 2 cpus. Secondly, load balancing too much at 437356500a3SJeff Roberson * once can have an unpleasant effect on the system. The scheduler rarely has 438356500a3SJeff Roberson * enough information to make perfect decisions. So this algorithm chooses 439356500a3SJeff Roberson * algorithm simplicity and more gradual effects on load in larger systems. 440356500a3SJeff Roberson * 441356500a3SJeff Roberson * It could be improved by considering the priorities and slices assigned to 442356500a3SJeff Roberson * each task prior to balancing them. There are many pathological cases with 443356500a3SJeff Roberson * any approach and so the semi random algorithm below may work as well as any. 444356500a3SJeff Roberson * 445356500a3SJeff Roberson */ 44622bf7d9aSJeff Roberson static void 447dc03363dSJeff Roberson sched_balance(void) 448356500a3SJeff Roberson { 449ad1e7d28SJulian Elischer struct tdq_group *high; 450ad1e7d28SJulian Elischer struct tdq_group *low; 451d2ad694cSJeff Roberson struct tdq_group *tdg; 452cac77d04SJeff Roberson int cnt; 453356500a3SJeff Roberson int i; 454356500a3SJeff Roberson 455598b368dSJeff Roberson bal_tick = ticks + (random() % (hz * 2)); 45686f8ae96SJeff Roberson if (smp_started == 0) 457598b368dSJeff Roberson return; 458cac77d04SJeff Roberson low = high = NULL; 459d2ad694cSJeff Roberson i = random() % (tdg_maxid + 1); 460d2ad694cSJeff Roberson for (cnt = 0; cnt <= tdg_maxid; cnt++) { 461d2ad694cSJeff Roberson tdg = TDQ_GROUP(i); 462cac77d04SJeff Roberson /* 463cac77d04SJeff Roberson * Find the CPU with the highest load that has some 464cac77d04SJeff Roberson * threads to transfer. 465cac77d04SJeff Roberson */ 466d2ad694cSJeff Roberson if ((high == NULL || tdg->tdg_load > high->tdg_load) 467d2ad694cSJeff Roberson && tdg->tdg_transferable) 468d2ad694cSJeff Roberson high = tdg; 469d2ad694cSJeff Roberson if (low == NULL || tdg->tdg_load < low->tdg_load) 470d2ad694cSJeff Roberson low = tdg; 471d2ad694cSJeff Roberson if (++i > tdg_maxid) 472cac77d04SJeff Roberson i = 0; 473cac77d04SJeff Roberson } 474cac77d04SJeff Roberson if (low != NULL && high != NULL && high != low) 475d2ad694cSJeff Roberson sched_balance_pair(LIST_FIRST(&high->tdg_members), 476d2ad694cSJeff Roberson LIST_FIRST(&low->tdg_members)); 477cac77d04SJeff Roberson } 47886f8ae96SJeff Roberson 479cac77d04SJeff Roberson static void 480dc03363dSJeff Roberson sched_balance_groups(void) 481cac77d04SJeff Roberson { 482cac77d04SJeff Roberson int i; 483cac77d04SJeff Roberson 484598b368dSJeff Roberson gbal_tick = ticks + (random() % (hz * 2)); 485dc03363dSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 486cac77d04SJeff Roberson if (smp_started) 487d2ad694cSJeff Roberson for (i = 0; i <= tdg_maxid; i++) 488ad1e7d28SJulian Elischer sched_balance_group(TDQ_GROUP(i)); 489356500a3SJeff Roberson } 490cac77d04SJeff Roberson 491cac77d04SJeff Roberson static void 492d2ad694cSJeff Roberson sched_balance_group(struct tdq_group *tdg) 493cac77d04SJeff Roberson { 494ad1e7d28SJulian Elischer struct tdq *tdq; 495ad1e7d28SJulian Elischer struct tdq *high; 496ad1e7d28SJulian Elischer struct tdq *low; 497cac77d04SJeff Roberson int load; 498cac77d04SJeff Roberson 499d2ad694cSJeff Roberson if (tdg->tdg_transferable == 0) 500cac77d04SJeff Roberson return; 501cac77d04SJeff Roberson low = NULL; 502cac77d04SJeff Roberson high = NULL; 503d2ad694cSJeff Roberson LIST_FOREACH(tdq, &tdg->tdg_members, tdq_siblings) { 504d2ad694cSJeff Roberson load = tdq->tdq_load; 505d2ad694cSJeff Roberson if (high == NULL || load > high->tdq_load) 506ad1e7d28SJulian Elischer high = tdq; 507d2ad694cSJeff Roberson if (low == NULL || load < low->tdq_load) 508ad1e7d28SJulian Elischer low = tdq; 509356500a3SJeff Roberson } 510cac77d04SJeff Roberson if (high != NULL && low != NULL && high != low) 511cac77d04SJeff Roberson sched_balance_pair(high, low); 512356500a3SJeff Roberson } 513cac77d04SJeff Roberson 514cac77d04SJeff Roberson static void 515ad1e7d28SJulian Elischer sched_balance_pair(struct tdq *high, struct tdq *low) 516cac77d04SJeff Roberson { 517cac77d04SJeff Roberson int transferable; 518cac77d04SJeff Roberson int high_load; 519cac77d04SJeff Roberson int low_load; 520cac77d04SJeff Roberson int move; 521cac77d04SJeff Roberson int diff; 522cac77d04SJeff Roberson int i; 523cac77d04SJeff Roberson 52480f86c9fSJeff Roberson /* 52580f86c9fSJeff Roberson * If we're transfering within a group we have to use this specific 526ad1e7d28SJulian Elischer * tdq's transferable count, otherwise we can steal from other members 52780f86c9fSJeff Roberson * of the group. 52880f86c9fSJeff Roberson */ 529d2ad694cSJeff Roberson if (high->tdq_group == low->tdq_group) { 530d2ad694cSJeff Roberson transferable = high->tdq_transferable; 531d2ad694cSJeff Roberson high_load = high->tdq_load; 532d2ad694cSJeff Roberson low_load = low->tdq_load; 533cac77d04SJeff Roberson } else { 534d2ad694cSJeff Roberson transferable = high->tdq_group->tdg_transferable; 535d2ad694cSJeff Roberson high_load = high->tdq_group->tdg_load; 536d2ad694cSJeff Roberson low_load = low->tdq_group->tdg_load; 537cac77d04SJeff Roberson } 53880f86c9fSJeff Roberson if (transferable == 0) 539cac77d04SJeff Roberson return; 540155b9987SJeff Roberson /* 541155b9987SJeff Roberson * Determine what the imbalance is and then adjust that to how many 542d2ad694cSJeff Roberson * threads we actually have to give up (transferable). 543155b9987SJeff Roberson */ 544cac77d04SJeff Roberson diff = high_load - low_load; 545356500a3SJeff Roberson move = diff / 2; 546356500a3SJeff Roberson if (diff & 0x1) 547356500a3SJeff Roberson move++; 54880f86c9fSJeff Roberson move = min(move, transferable); 549356500a3SJeff Roberson for (i = 0; i < move; i++) 550ad1e7d28SJulian Elischer tdq_move(high, TDQ_ID(low)); 551356500a3SJeff Roberson return; 552356500a3SJeff Roberson } 553356500a3SJeff Roberson 55422bf7d9aSJeff Roberson static void 555ad1e7d28SJulian Elischer tdq_move(struct tdq *from, int cpu) 556356500a3SJeff Roberson { 557ad1e7d28SJulian Elischer struct tdq *tdq; 558ad1e7d28SJulian Elischer struct tdq *to; 559ad1e7d28SJulian Elischer struct td_sched *ts; 560356500a3SJeff Roberson 561ad1e7d28SJulian Elischer tdq = from; 562ad1e7d28SJulian Elischer to = TDQ_CPU(cpu); 563ad1e7d28SJulian Elischer ts = tdq_steal(tdq, 1); 564ad1e7d28SJulian Elischer if (ts == NULL) { 565d2ad694cSJeff Roberson struct tdq_group *tdg; 56680f86c9fSJeff Roberson 567d2ad694cSJeff Roberson tdg = tdq->tdq_group; 568d2ad694cSJeff Roberson LIST_FOREACH(tdq, &tdg->tdg_members, tdq_siblings) { 569d2ad694cSJeff Roberson if (tdq == from || tdq->tdq_transferable == 0) 57080f86c9fSJeff Roberson continue; 571ad1e7d28SJulian Elischer ts = tdq_steal(tdq, 1); 57280f86c9fSJeff Roberson break; 57380f86c9fSJeff Roberson } 574ad1e7d28SJulian Elischer if (ts == NULL) 575ad1e7d28SJulian Elischer panic("tdq_move: No threads available with a " 57680f86c9fSJeff Roberson "transferable count of %d\n", 577d2ad694cSJeff Roberson tdg->tdg_transferable); 57880f86c9fSJeff Roberson } 579ad1e7d28SJulian Elischer if (tdq == to) 58080f86c9fSJeff Roberson return; 581ad1e7d28SJulian Elischer ts->ts_state = TSS_THREAD; 582ad1e7d28SJulian Elischer tdq_runq_rem(tdq, ts); 583ad1e7d28SJulian Elischer tdq_load_rem(tdq, ts); 584ad1e7d28SJulian Elischer tdq_notify(ts, cpu); 585356500a3SJeff Roberson } 58622bf7d9aSJeff Roberson 58780f86c9fSJeff Roberson static int 588ad1e7d28SJulian Elischer tdq_idled(struct tdq *tdq) 58922bf7d9aSJeff Roberson { 590d2ad694cSJeff Roberson struct tdq_group *tdg; 591ad1e7d28SJulian Elischer struct tdq *steal; 592ad1e7d28SJulian Elischer struct td_sched *ts; 59380f86c9fSJeff Roberson 594d2ad694cSJeff Roberson tdg = tdq->tdq_group; 59580f86c9fSJeff Roberson /* 596d2ad694cSJeff Roberson * If we're in a cpu group, try and steal threads from another cpu in 59780f86c9fSJeff Roberson * the group before idling. 59880f86c9fSJeff Roberson */ 599d2ad694cSJeff Roberson if (tdg->tdg_cpus > 1 && tdg->tdg_transferable) { 600d2ad694cSJeff Roberson LIST_FOREACH(steal, &tdg->tdg_members, tdq_siblings) { 601d2ad694cSJeff Roberson if (steal == tdq || steal->tdq_transferable == 0) 60280f86c9fSJeff Roberson continue; 603ad1e7d28SJulian Elischer ts = tdq_steal(steal, 0); 604ad1e7d28SJulian Elischer if (ts == NULL) 60580f86c9fSJeff Roberson continue; 606ad1e7d28SJulian Elischer ts->ts_state = TSS_THREAD; 607ad1e7d28SJulian Elischer tdq_runq_rem(steal, ts); 608ad1e7d28SJulian Elischer tdq_load_rem(steal, ts); 609ad1e7d28SJulian Elischer ts->ts_cpu = PCPU_GET(cpuid); 610ad1e7d28SJulian Elischer ts->ts_flags |= TSF_INTERNAL | TSF_HOLD; 611ad1e7d28SJulian Elischer sched_add(ts->ts_thread, SRQ_YIELDING); 61280f86c9fSJeff Roberson return (0); 61380f86c9fSJeff Roberson } 61480f86c9fSJeff Roberson } 61580f86c9fSJeff Roberson /* 61680f86c9fSJeff Roberson * We only set the idled bit when all of the cpus in the group are 617ad1e7d28SJulian Elischer * idle. Otherwise we could get into a situation where a thread bounces 61880f86c9fSJeff Roberson * back and forth between two idle cores on seperate physical CPUs. 61980f86c9fSJeff Roberson */ 620d2ad694cSJeff Roberson tdg->tdg_idlemask |= PCPU_GET(cpumask); 621d2ad694cSJeff Roberson if (tdg->tdg_idlemask != tdg->tdg_cpumask) 62280f86c9fSJeff Roberson return (1); 623d2ad694cSJeff Roberson atomic_set_int(&tdq_idle, tdg->tdg_mask); 62480f86c9fSJeff Roberson return (1); 62522bf7d9aSJeff Roberson } 62622bf7d9aSJeff Roberson 62722bf7d9aSJeff Roberson static void 628ad1e7d28SJulian Elischer tdq_assign(struct tdq *tdq) 62922bf7d9aSJeff Roberson { 630ad1e7d28SJulian Elischer struct td_sched *nts; 631ad1e7d28SJulian Elischer struct td_sched *ts; 63222bf7d9aSJeff Roberson 63322bf7d9aSJeff Roberson do { 634d2ad694cSJeff Roberson *(volatile struct td_sched **)&ts = tdq->tdq_assigned; 635d2ad694cSJeff Roberson } while(!atomic_cmpset_ptr((volatile uintptr_t *)&tdq->tdq_assigned, 636ad1e7d28SJulian Elischer (uintptr_t)ts, (uintptr_t)NULL)); 637ad1e7d28SJulian Elischer for (; ts != NULL; ts = nts) { 638ad1e7d28SJulian Elischer nts = ts->ts_assign; 639d2ad694cSJeff Roberson tdq->tdq_group->tdg_load--; 640d2ad694cSJeff Roberson tdq->tdq_load--; 641ad1e7d28SJulian Elischer ts->ts_flags &= ~TSF_ASSIGNED; 642ad1e7d28SJulian Elischer if (ts->ts_flags & TSF_REMOVED) { 643ad1e7d28SJulian Elischer ts->ts_flags &= ~TSF_REMOVED; 6443d16f519SDavid Xu continue; 6453d16f519SDavid Xu } 646ad1e7d28SJulian Elischer ts->ts_flags |= TSF_INTERNAL | TSF_HOLD; 647ad1e7d28SJulian Elischer sched_add(ts->ts_thread, SRQ_YIELDING); 64822bf7d9aSJeff Roberson } 64922bf7d9aSJeff Roberson } 65022bf7d9aSJeff Roberson 65122bf7d9aSJeff Roberson static void 652ad1e7d28SJulian Elischer tdq_notify(struct td_sched *ts, int cpu) 65322bf7d9aSJeff Roberson { 654ad1e7d28SJulian Elischer struct tdq *tdq; 65522bf7d9aSJeff Roberson struct thread *td; 65622bf7d9aSJeff Roberson struct pcpu *pcpu; 657598b368dSJeff Roberson int class; 6582454aaf5SJeff Roberson int prio; 65922bf7d9aSJeff Roberson 660ad1e7d28SJulian Elischer tdq = TDQ_CPU(cpu); 661598b368dSJeff Roberson /* XXX */ 662ad1e7d28SJulian Elischer class = PRI_BASE(ts->ts_thread->td_pri_class); 663598b368dSJeff Roberson if ((class == PRI_TIMESHARE || class == PRI_REALTIME) && 664d2ad694cSJeff Roberson (tdq_idle & tdq->tdq_group->tdg_mask)) 665d2ad694cSJeff Roberson atomic_clear_int(&tdq_idle, tdq->tdq_group->tdg_mask); 666d2ad694cSJeff Roberson tdq->tdq_group->tdg_load++; 667d2ad694cSJeff Roberson tdq->tdq_load++; 668ad1e7d28SJulian Elischer ts->ts_cpu = cpu; 669ad1e7d28SJulian Elischer ts->ts_flags |= TSF_ASSIGNED; 670ad1e7d28SJulian Elischer prio = ts->ts_thread->td_priority; 67122bf7d9aSJeff Roberson 6720c0a98b2SJeff Roberson /* 673ad1e7d28SJulian Elischer * Place a thread on another cpu's queue and force a resched. 67422bf7d9aSJeff Roberson */ 67522bf7d9aSJeff Roberson do { 676d2ad694cSJeff Roberson *(volatile struct td_sched **)&ts->ts_assign = tdq->tdq_assigned; 677d2ad694cSJeff Roberson } while(!atomic_cmpset_ptr((volatile uintptr_t *)&tdq->tdq_assigned, 678ad1e7d28SJulian Elischer (uintptr_t)ts->ts_assign, (uintptr_t)ts)); 6792454aaf5SJeff Roberson /* 6802454aaf5SJeff Roberson * Without sched_lock we could lose a race where we set NEEDRESCHED 6812454aaf5SJeff Roberson * on a thread that is switched out before the IPI is delivered. This 6822454aaf5SJeff Roberson * would lead us to miss the resched. This will be a problem once 6832454aaf5SJeff Roberson * sched_lock is pushed down. 6842454aaf5SJeff Roberson */ 68522bf7d9aSJeff Roberson pcpu = pcpu_find(cpu); 68622bf7d9aSJeff Roberson td = pcpu->pc_curthread; 687ad1e7d28SJulian Elischer if (ts->ts_thread->td_priority < td->td_priority || 68822bf7d9aSJeff Roberson td == pcpu->pc_idlethread) { 68922bf7d9aSJeff Roberson td->td_flags |= TDF_NEEDRESCHED; 69022bf7d9aSJeff Roberson ipi_selected(1 << cpu, IPI_AST); 69122bf7d9aSJeff Roberson } 69222bf7d9aSJeff Roberson } 69322bf7d9aSJeff Roberson 694ad1e7d28SJulian Elischer static struct td_sched * 69522bf7d9aSJeff Roberson runq_steal(struct runq *rq) 69622bf7d9aSJeff Roberson { 69722bf7d9aSJeff Roberson struct rqhead *rqh; 69822bf7d9aSJeff Roberson struct rqbits *rqb; 699ad1e7d28SJulian Elischer struct td_sched *ts; 70022bf7d9aSJeff Roberson int word; 70122bf7d9aSJeff Roberson int bit; 70222bf7d9aSJeff Roberson 70322bf7d9aSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 70422bf7d9aSJeff Roberson rqb = &rq->rq_status; 70522bf7d9aSJeff Roberson for (word = 0; word < RQB_LEN; word++) { 70622bf7d9aSJeff Roberson if (rqb->rqb_bits[word] == 0) 70722bf7d9aSJeff Roberson continue; 70822bf7d9aSJeff Roberson for (bit = 0; bit < RQB_BPW; bit++) { 709a2640c9bSPeter Wemm if ((rqb->rqb_bits[word] & (1ul << bit)) == 0) 71022bf7d9aSJeff Roberson continue; 71122bf7d9aSJeff Roberson rqh = &rq->rq_queues[bit + (word << RQB_L2BPW)]; 712ad1e7d28SJulian Elischer TAILQ_FOREACH(ts, rqh, ts_procq) { 713ad1e7d28SJulian Elischer if (THREAD_CAN_MIGRATE(ts)) 714ad1e7d28SJulian Elischer return (ts); 71522bf7d9aSJeff Roberson } 71622bf7d9aSJeff Roberson } 71722bf7d9aSJeff Roberson } 71822bf7d9aSJeff Roberson return (NULL); 71922bf7d9aSJeff Roberson } 72022bf7d9aSJeff Roberson 721ad1e7d28SJulian Elischer static struct td_sched * 722ad1e7d28SJulian Elischer tdq_steal(struct tdq *tdq, int stealidle) 72322bf7d9aSJeff Roberson { 724ad1e7d28SJulian Elischer struct td_sched *ts; 72522bf7d9aSJeff Roberson 72680f86c9fSJeff Roberson /* 72780f86c9fSJeff Roberson * Steal from next first to try to get a non-interactive task that 72880f86c9fSJeff Roberson * may not have run for a while. 72980f86c9fSJeff Roberson */ 730d2ad694cSJeff Roberson if ((ts = runq_steal(tdq->tdq_next)) != NULL) 731ad1e7d28SJulian Elischer return (ts); 732d2ad694cSJeff Roberson if ((ts = runq_steal(tdq->tdq_curr)) != NULL) 733ad1e7d28SJulian Elischer return (ts); 73480f86c9fSJeff Roberson if (stealidle) 735d2ad694cSJeff Roberson return (runq_steal(&tdq->tdq_idle)); 73680f86c9fSJeff Roberson return (NULL); 73722bf7d9aSJeff Roberson } 73880f86c9fSJeff Roberson 73980f86c9fSJeff Roberson int 740ad1e7d28SJulian Elischer tdq_transfer(struct tdq *tdq, struct td_sched *ts, int class) 74180f86c9fSJeff Roberson { 742d2ad694cSJeff Roberson struct tdq_group *ntdg; 743d2ad694cSJeff Roberson struct tdq_group *tdg; 744ad1e7d28SJulian Elischer struct tdq *old; 74580f86c9fSJeff Roberson int cpu; 746598b368dSJeff Roberson int idx; 74780f86c9fSJeff Roberson 748670c524fSJeff Roberson if (smp_started == 0) 749670c524fSJeff Roberson return (0); 75080f86c9fSJeff Roberson cpu = 0; 75180f86c9fSJeff Roberson /* 7522454aaf5SJeff Roberson * If our load exceeds a certain threshold we should attempt to 7532454aaf5SJeff Roberson * reassign this thread. The first candidate is the cpu that 7542454aaf5SJeff Roberson * originally ran the thread. If it is idle, assign it there, 7552454aaf5SJeff Roberson * otherwise, pick an idle cpu. 7562454aaf5SJeff Roberson * 757d2ad694cSJeff Roberson * The threshold at which we start to reassign has a large impact 758670c524fSJeff Roberson * on the overall performance of the system. Tuned too high and 759670c524fSJeff Roberson * some CPUs may idle. Too low and there will be excess migration 760d50c87deSOlivier Houchard * and context switches. 761670c524fSJeff Roberson */ 762ad1e7d28SJulian Elischer old = TDQ_CPU(ts->ts_cpu); 763d2ad694cSJeff Roberson ntdg = old->tdq_group; 764d2ad694cSJeff Roberson tdg = tdq->tdq_group; 765ad1e7d28SJulian Elischer if (tdq_idle) { 766d2ad694cSJeff Roberson if (tdq_idle & ntdg->tdg_mask) { 767d2ad694cSJeff Roberson cpu = ffs(ntdg->tdg_idlemask); 768598b368dSJeff Roberson if (cpu) { 769598b368dSJeff Roberson CTR2(KTR_SCHED, 770ad1e7d28SJulian Elischer "tdq_transfer: %p found old cpu %X " 771ad1e7d28SJulian Elischer "in idlemask.", ts, cpu); 7722454aaf5SJeff Roberson goto migrate; 7732454aaf5SJeff Roberson } 774598b368dSJeff Roberson } 77580f86c9fSJeff Roberson /* 77680f86c9fSJeff Roberson * Multiple cpus could find this bit simultaneously 77780f86c9fSJeff Roberson * but the race shouldn't be terrible. 77880f86c9fSJeff Roberson */ 779ad1e7d28SJulian Elischer cpu = ffs(tdq_idle); 780598b368dSJeff Roberson if (cpu) { 781ad1e7d28SJulian Elischer CTR2(KTR_SCHED, "tdq_transfer: %p found %X " 782ad1e7d28SJulian Elischer "in idlemask.", ts, cpu); 7832454aaf5SJeff Roberson goto migrate; 78480f86c9fSJeff Roberson } 785598b368dSJeff Roberson } 786598b368dSJeff Roberson idx = 0; 787598b368dSJeff Roberson #if 0 788d2ad694cSJeff Roberson if (old->tdq_load < tdq->tdq_load) { 789ad1e7d28SJulian Elischer cpu = ts->ts_cpu + 1; 790ad1e7d28SJulian Elischer CTR2(KTR_SCHED, "tdq_transfer: %p old cpu %X " 791ad1e7d28SJulian Elischer "load less than ours.", ts, cpu); 792598b368dSJeff Roberson goto migrate; 793598b368dSJeff Roberson } 794598b368dSJeff Roberson /* 795598b368dSJeff Roberson * No new CPU was found, look for one with less load. 796598b368dSJeff Roberson */ 797d2ad694cSJeff Roberson for (idx = 0; idx <= tdg_maxid; idx++) { 798d2ad694cSJeff Roberson ntdg = TDQ_GROUP(idx); 799d2ad694cSJeff Roberson if (ntdg->tdg_load /*+ (ntdg->tdg_cpus * 2)*/ < tdg->tdg_load) { 800d2ad694cSJeff Roberson cpu = ffs(ntdg->tdg_cpumask); 801ad1e7d28SJulian Elischer CTR2(KTR_SCHED, "tdq_transfer: %p cpu %X load less " 802ad1e7d28SJulian Elischer "than ours.", ts, cpu); 803598b368dSJeff Roberson goto migrate; 804598b368dSJeff Roberson } 805598b368dSJeff Roberson } 806598b368dSJeff Roberson #endif 80780f86c9fSJeff Roberson /* 80880f86c9fSJeff Roberson * If another cpu in this group has idled, assign a thread over 80980f86c9fSJeff Roberson * to them after checking to see if there are idled groups. 81080f86c9fSJeff Roberson */ 811d2ad694cSJeff Roberson if (tdg->tdg_idlemask) { 812d2ad694cSJeff Roberson cpu = ffs(tdg->tdg_idlemask); 813598b368dSJeff Roberson if (cpu) { 814ad1e7d28SJulian Elischer CTR2(KTR_SCHED, "tdq_transfer: %p cpu %X idle in " 815ad1e7d28SJulian Elischer "group.", ts, cpu); 8162454aaf5SJeff Roberson goto migrate; 81780f86c9fSJeff Roberson } 818598b368dSJeff Roberson } 8192454aaf5SJeff Roberson return (0); 8202454aaf5SJeff Roberson migrate: 8212454aaf5SJeff Roberson /* 82280f86c9fSJeff Roberson * Now that we've found an idle CPU, migrate the thread. 82380f86c9fSJeff Roberson */ 82480f86c9fSJeff Roberson cpu--; 825ad1e7d28SJulian Elischer ts->ts_runq = NULL; 826ad1e7d28SJulian Elischer tdq_notify(ts, cpu); 8272454aaf5SJeff Roberson 82880f86c9fSJeff Roberson return (1); 82980f86c9fSJeff Roberson } 83080f86c9fSJeff Roberson 83122bf7d9aSJeff Roberson #endif /* SMP */ 83222bf7d9aSJeff Roberson 83322bf7d9aSJeff Roberson /* 83422bf7d9aSJeff Roberson * Pick the highest priority task we have and return it. 8350c0a98b2SJeff Roberson */ 8360c0a98b2SJeff Roberson 837ad1e7d28SJulian Elischer static struct td_sched * 838ad1e7d28SJulian Elischer tdq_choose(struct tdq *tdq) 8395d7ef00cSJeff Roberson { 8405d7ef00cSJeff Roberson struct runq *swap; 841ad1e7d28SJulian Elischer struct td_sched *ts; 8420516c8ddSJeff Roberson int nice; 8435d7ef00cSJeff Roberson 844b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 84515dc847eSJeff Roberson swap = NULL; 846a8949de2SJeff Roberson 84715dc847eSJeff Roberson for (;;) { 848d2ad694cSJeff Roberson ts = runq_choose(tdq->tdq_curr); 849ad1e7d28SJulian Elischer if (ts == NULL) { 85015dc847eSJeff Roberson /* 851bf0acc27SJohn Baldwin * We already swapped once and didn't get anywhere. 85215dc847eSJeff Roberson */ 85315dc847eSJeff Roberson if (swap) 85415dc847eSJeff Roberson break; 855d2ad694cSJeff Roberson swap = tdq->tdq_curr; 856d2ad694cSJeff Roberson tdq->tdq_curr = tdq->tdq_next; 857d2ad694cSJeff Roberson tdq->tdq_next = swap; 85815dc847eSJeff Roberson continue; 859a8949de2SJeff Roberson } 86015dc847eSJeff Roberson /* 861ad1e7d28SJulian Elischer * If we encounter a slice of 0 the td_sched is in a 862ad1e7d28SJulian Elischer * TIMESHARE td_sched group and its nice was too far out 86315dc847eSJeff Roberson * of the range that receives slices. 86415dc847eSJeff Roberson */ 865d2ad694cSJeff Roberson nice = ts->ts_thread->td_proc->p_nice + (0 - tdq->tdq_nicemin); 866a8615740SDavid Xu #if 0 867ad1e7d28SJulian Elischer if (ts->ts_slice == 0 || (nice > SCHED_SLICE_NTHRESH && 868ad1e7d28SJulian Elischer ts->ts_thread->td_proc->p_nice != 0)) { 869ad1e7d28SJulian Elischer runq_remove(ts->ts_runq, ts); 870ad1e7d28SJulian Elischer sched_slice(ts); 871d2ad694cSJeff Roberson ts->ts_runq = tdq->tdq_next; 872ad1e7d28SJulian Elischer runq_add(ts->ts_runq, ts, 0); 87315dc847eSJeff Roberson continue; 87415dc847eSJeff Roberson } 875a8615740SDavid Xu #endif 876ad1e7d28SJulian Elischer return (ts); 87715dc847eSJeff Roberson } 87815dc847eSJeff Roberson 879d2ad694cSJeff Roberson return (runq_choose(&tdq->tdq_idle)); 880245f3abfSJeff Roberson } 8810a016a05SJeff Roberson 8820a016a05SJeff Roberson static void 883ad1e7d28SJulian Elischer tdq_setup(struct tdq *tdq) 8840a016a05SJeff Roberson { 885d2ad694cSJeff Roberson runq_init(&tdq->tdq_timeshare[0]); 886d2ad694cSJeff Roberson runq_init(&tdq->tdq_timeshare[1]); 887d2ad694cSJeff Roberson runq_init(&tdq->tdq_idle); 888d2ad694cSJeff Roberson tdq->tdq_curr = &tdq->tdq_timeshare[0]; 889d2ad694cSJeff Roberson tdq->tdq_next = &tdq->tdq_timeshare[1]; 890d2ad694cSJeff Roberson tdq->tdq_load = 0; 891d2ad694cSJeff Roberson tdq->tdq_load_timeshare = 0; 8920a016a05SJeff Roberson } 8930a016a05SJeff Roberson 89435e6168fSJeff Roberson static void 89535e6168fSJeff Roberson sched_setup(void *dummy) 89635e6168fSJeff Roberson { 8970ec896fdSJeff Roberson #ifdef SMP 89835e6168fSJeff Roberson int i; 8990ec896fdSJeff Roberson #endif 90035e6168fSJeff Roberson 901a1d4fe69SDavid Xu /* 902a1d4fe69SDavid Xu * To avoid divide-by-zero, we set realstathz a dummy value 903a1d4fe69SDavid Xu * in case which sched_clock() called before sched_initticks(). 904a1d4fe69SDavid Xu */ 905a1d4fe69SDavid Xu realstathz = hz; 906e493a5d9SJeff Roberson slice_min = (hz/100); /* 10ms */ 907e493a5d9SJeff Roberson slice_max = (hz/7); /* ~140ms */ 908e1f89c22SJeff Roberson 909356500a3SJeff Roberson #ifdef SMP 910cac77d04SJeff Roberson balance_groups = 0; 91180f86c9fSJeff Roberson /* 912ad1e7d28SJulian Elischer * Initialize the tdqs. 91380f86c9fSJeff Roberson */ 914749d01b0SJeff Roberson for (i = 0; i < MAXCPU; i++) { 915c02bbb43SJeff Roberson struct tdq *tdq; 91680f86c9fSJeff Roberson 917c02bbb43SJeff Roberson tdq = &tdq_cpu[i]; 918c02bbb43SJeff Roberson tdq->tdq_assigned = NULL; 919ad1e7d28SJulian Elischer tdq_setup(&tdq_cpu[i]); 92080f86c9fSJeff Roberson } 92180f86c9fSJeff Roberson if (smp_topology == NULL) { 922d2ad694cSJeff Roberson struct tdq_group *tdg; 923c02bbb43SJeff Roberson struct tdq *tdq; 924598b368dSJeff Roberson int cpus; 92580f86c9fSJeff Roberson 926598b368dSJeff Roberson for (cpus = 0, i = 0; i < MAXCPU; i++) { 927598b368dSJeff Roberson if (CPU_ABSENT(i)) 928598b368dSJeff Roberson continue; 929c02bbb43SJeff Roberson tdq = &tdq_cpu[i]; 930d2ad694cSJeff Roberson tdg = &tdq_groups[cpus]; 93180f86c9fSJeff Roberson /* 932ad1e7d28SJulian Elischer * Setup a tdq group with one member. 93380f86c9fSJeff Roberson */ 934c02bbb43SJeff Roberson tdq->tdq_transferable = 0; 935c02bbb43SJeff Roberson tdq->tdq_group = tdg; 936d2ad694cSJeff Roberson tdg->tdg_cpus = 1; 937d2ad694cSJeff Roberson tdg->tdg_idlemask = 0; 938d2ad694cSJeff Roberson tdg->tdg_cpumask = tdg->tdg_mask = 1 << i; 939d2ad694cSJeff Roberson tdg->tdg_load = 0; 940d2ad694cSJeff Roberson tdg->tdg_transferable = 0; 941d2ad694cSJeff Roberson LIST_INIT(&tdg->tdg_members); 942c02bbb43SJeff Roberson LIST_INSERT_HEAD(&tdg->tdg_members, tdq, tdq_siblings); 943598b368dSJeff Roberson cpus++; 944749d01b0SJeff Roberson } 945d2ad694cSJeff Roberson tdg_maxid = cpus - 1; 946749d01b0SJeff Roberson } else { 947d2ad694cSJeff Roberson struct tdq_group *tdg; 94880f86c9fSJeff Roberson struct cpu_group *cg; 949749d01b0SJeff Roberson int j; 950749d01b0SJeff Roberson 951749d01b0SJeff Roberson for (i = 0; i < smp_topology->ct_count; i++) { 952749d01b0SJeff Roberson cg = &smp_topology->ct_group[i]; 953d2ad694cSJeff Roberson tdg = &tdq_groups[i]; 95480f86c9fSJeff Roberson /* 95580f86c9fSJeff Roberson * Initialize the group. 95680f86c9fSJeff Roberson */ 957d2ad694cSJeff Roberson tdg->tdg_idlemask = 0; 958d2ad694cSJeff Roberson tdg->tdg_load = 0; 959d2ad694cSJeff Roberson tdg->tdg_transferable = 0; 960d2ad694cSJeff Roberson tdg->tdg_cpus = cg->cg_count; 961d2ad694cSJeff Roberson tdg->tdg_cpumask = cg->cg_mask; 962d2ad694cSJeff Roberson LIST_INIT(&tdg->tdg_members); 96380f86c9fSJeff Roberson /* 96480f86c9fSJeff Roberson * Find all of the group members and add them. 96580f86c9fSJeff Roberson */ 96680f86c9fSJeff Roberson for (j = 0; j < MAXCPU; j++) { 96780f86c9fSJeff Roberson if ((cg->cg_mask & (1 << j)) != 0) { 968d2ad694cSJeff Roberson if (tdg->tdg_mask == 0) 969d2ad694cSJeff Roberson tdg->tdg_mask = 1 << j; 970d2ad694cSJeff Roberson tdq_cpu[j].tdq_transferable = 0; 971d2ad694cSJeff Roberson tdq_cpu[j].tdq_group = tdg; 972d2ad694cSJeff Roberson LIST_INSERT_HEAD(&tdg->tdg_members, 973d2ad694cSJeff Roberson &tdq_cpu[j], tdq_siblings); 97480f86c9fSJeff Roberson } 97580f86c9fSJeff Roberson } 976d2ad694cSJeff Roberson if (tdg->tdg_cpus > 1) 977cac77d04SJeff Roberson balance_groups = 1; 978749d01b0SJeff Roberson } 979d2ad694cSJeff Roberson tdg_maxid = smp_topology->ct_count - 1; 980749d01b0SJeff Roberson } 981cac77d04SJeff Roberson /* 982cac77d04SJeff Roberson * Stagger the group and global load balancer so they do not 983cac77d04SJeff Roberson * interfere with each other. 984cac77d04SJeff Roberson */ 985dc03363dSJeff Roberson bal_tick = ticks + hz; 986cac77d04SJeff Roberson if (balance_groups) 987dc03363dSJeff Roberson gbal_tick = ticks + (hz / 2); 988749d01b0SJeff Roberson #else 989ad1e7d28SJulian Elischer tdq_setup(TDQ_SELF()); 990356500a3SJeff Roberson #endif 991749d01b0SJeff Roberson mtx_lock_spin(&sched_lock); 992ad1e7d28SJulian Elischer tdq_load_add(TDQ_SELF(), &td_sched0); 993749d01b0SJeff Roberson mtx_unlock_spin(&sched_lock); 99435e6168fSJeff Roberson } 99535e6168fSJeff Roberson 996a1d4fe69SDavid Xu /* ARGSUSED */ 997a1d4fe69SDavid Xu static void 998a1d4fe69SDavid Xu sched_initticks(void *dummy) 999a1d4fe69SDavid Xu { 1000a1d4fe69SDavid Xu mtx_lock_spin(&sched_lock); 1001a1d4fe69SDavid Xu realstathz = stathz ? stathz : hz; 1002a1d4fe69SDavid Xu slice_min = (realstathz/100); /* 10ms */ 1003a1d4fe69SDavid Xu slice_max = (realstathz/7); /* ~140ms */ 1004a1d4fe69SDavid Xu 1005a1d4fe69SDavid Xu tickincr = (hz << 10) / realstathz; 1006a1d4fe69SDavid Xu /* 1007a1d4fe69SDavid Xu * XXX This does not work for values of stathz that are much 1008a1d4fe69SDavid Xu * larger than hz. 1009a1d4fe69SDavid Xu */ 1010a1d4fe69SDavid Xu if (tickincr == 0) 1011a1d4fe69SDavid Xu tickincr = 1; 1012a1d4fe69SDavid Xu mtx_unlock_spin(&sched_lock); 1013a1d4fe69SDavid Xu } 1014a1d4fe69SDavid Xu 1015a1d4fe69SDavid Xu 101635e6168fSJeff Roberson /* 101735e6168fSJeff Roberson * Scale the scheduling priority according to the "interactivity" of this 101835e6168fSJeff Roberson * process. 101935e6168fSJeff Roberson */ 102015dc847eSJeff Roberson static void 10218460a577SJohn Birrell sched_priority(struct thread *td) 102235e6168fSJeff Roberson { 102335e6168fSJeff Roberson int pri; 102435e6168fSJeff Roberson 10258460a577SJohn Birrell if (td->td_pri_class != PRI_TIMESHARE) 102615dc847eSJeff Roberson return; 102735e6168fSJeff Roberson 10288460a577SJohn Birrell pri = SCHED_PRI_INTERACT(sched_interact_score(td)); 1029e1f89c22SJeff Roberson pri += SCHED_PRI_BASE; 10308460a577SJohn Birrell pri += td->td_proc->p_nice; 103135e6168fSJeff Roberson 103235e6168fSJeff Roberson if (pri > PRI_MAX_TIMESHARE) 103335e6168fSJeff Roberson pri = PRI_MAX_TIMESHARE; 103435e6168fSJeff Roberson else if (pri < PRI_MIN_TIMESHARE) 103535e6168fSJeff Roberson pri = PRI_MIN_TIMESHARE; 103635e6168fSJeff Roberson 10378460a577SJohn Birrell sched_user_prio(td, pri); 103835e6168fSJeff Roberson 103915dc847eSJeff Roberson return; 104035e6168fSJeff Roberson } 104135e6168fSJeff Roberson 104235e6168fSJeff Roberson /* 1043ad1e7d28SJulian Elischer * Calculate a time slice based on the properties of the process 1044ad1e7d28SJulian Elischer * and the runq that we're on. This is only for PRI_TIMESHARE threads. 104535e6168fSJeff Roberson */ 1046245f3abfSJeff Roberson static void 1047ad1e7d28SJulian Elischer sched_slice(struct td_sched *ts) 104835e6168fSJeff Roberson { 1049ad1e7d28SJulian Elischer struct tdq *tdq; 10508460a577SJohn Birrell struct thread *td; 105135e6168fSJeff Roberson 1052ad1e7d28SJulian Elischer td = ts->ts_thread; 1053ad1e7d28SJulian Elischer tdq = TDQ_CPU(ts->ts_cpu); 105435e6168fSJeff Roberson 10558460a577SJohn Birrell if (td->td_flags & TDF_BORROWING) { 1056ad1e7d28SJulian Elischer ts->ts_slice = SCHED_SLICE_MIN; 10578ffb8f55SJeff Roberson return; 10588ffb8f55SJeff Roberson } 10598ffb8f55SJeff Roberson 1060245f3abfSJeff Roberson /* 1061245f3abfSJeff Roberson * Rationale: 1062ad1e7d28SJulian Elischer * Threads in interactive procs get a minimal slice so that we 1063245f3abfSJeff Roberson * quickly notice if it abuses its advantage. 1064245f3abfSJeff Roberson * 1065ad1e7d28SJulian Elischer * Threads in non-interactive procs are assigned a slice that is 1066ad1e7d28SJulian Elischer * based on the procs nice value relative to the least nice procs 1067245f3abfSJeff Roberson * on the run queue for this cpu. 1068245f3abfSJeff Roberson * 1069ad1e7d28SJulian Elischer * If the thread is less nice than all others it gets the maximum 1070ad1e7d28SJulian Elischer * slice and other threads will adjust their slice relative to 1071245f3abfSJeff Roberson * this when they first expire. 1072245f3abfSJeff Roberson * 1073245f3abfSJeff Roberson * There is 20 point window that starts relative to the least 1074ad1e7d28SJulian Elischer * nice td_sched on the run queue. Slice size is determined by 1075ad1e7d28SJulian Elischer * the td_sched distance from the last nice thread. 1076245f3abfSJeff Roberson * 1077ad1e7d28SJulian Elischer * If the td_sched is outside of the window it will get no slice 10787d1a81b4SJeff Roberson * and will be reevaluated each time it is selected on the 1079ad1e7d28SJulian Elischer * run queue. The exception to this is nice 0 procs when 10807d1a81b4SJeff Roberson * a nice -20 is running. They are always granted a minimum 10817d1a81b4SJeff Roberson * slice. 1082245f3abfSJeff Roberson */ 10838460a577SJohn Birrell if (!SCHED_INTERACTIVE(td)) { 1084245f3abfSJeff Roberson int nice; 1085245f3abfSJeff Roberson 1086d2ad694cSJeff Roberson nice = td->td_proc->p_nice + (0 - tdq->tdq_nicemin); 1087d2ad694cSJeff Roberson if (tdq->tdq_load_timeshare == 0 || 1088d2ad694cSJeff Roberson td->td_proc->p_nice < tdq->tdq_nicemin) 1089ad1e7d28SJulian Elischer ts->ts_slice = SCHED_SLICE_MAX; 10907d1a81b4SJeff Roberson else if (nice <= SCHED_SLICE_NTHRESH) 1091ad1e7d28SJulian Elischer ts->ts_slice = SCHED_SLICE_NICE(nice); 10928460a577SJohn Birrell else if (td->td_proc->p_nice == 0) 1093ad1e7d28SJulian Elischer ts->ts_slice = SCHED_SLICE_MIN; 1094245f3abfSJeff Roberson else 1095ad1e7d28SJulian Elischer ts->ts_slice = SCHED_SLICE_MIN; /* 0 */ 1096245f3abfSJeff Roberson } else 1097ad1e7d28SJulian Elischer ts->ts_slice = SCHED_SLICE_INTERACTIVE; 109835e6168fSJeff Roberson 1099245f3abfSJeff Roberson return; 110035e6168fSJeff Roberson } 110135e6168fSJeff Roberson 1102d322132cSJeff Roberson /* 1103d322132cSJeff Roberson * This routine enforces a maximum limit on the amount of scheduling history 1104d322132cSJeff Roberson * kept. It is called after either the slptime or runtime is adjusted. 1105d322132cSJeff Roberson * This routine will not operate correctly when slp or run times have been 1106d322132cSJeff Roberson * adjusted to more than double their maximum. 1107d322132cSJeff Roberson */ 11084b60e324SJeff Roberson static void 11098460a577SJohn Birrell sched_interact_update(struct thread *td) 11104b60e324SJeff Roberson { 1111d322132cSJeff Roberson int sum; 11123f741ca1SJeff Roberson 11138460a577SJohn Birrell sum = td->td_sched->skg_runtime + td->td_sched->skg_slptime; 1114d322132cSJeff Roberson if (sum < SCHED_SLP_RUN_MAX) 1115d322132cSJeff Roberson return; 1116d322132cSJeff Roberson /* 1117d322132cSJeff Roberson * If we have exceeded by more than 1/5th then the algorithm below 1118d322132cSJeff Roberson * will not bring us back into range. Dividing by two here forces 11192454aaf5SJeff Roberson * us into the range of [4/5 * SCHED_INTERACT_MAX, SCHED_INTERACT_MAX] 1120d322132cSJeff Roberson */ 112137a35e4aSJeff Roberson if (sum > (SCHED_SLP_RUN_MAX / 5) * 6) { 11228460a577SJohn Birrell td->td_sched->skg_runtime /= 2; 11238460a577SJohn Birrell td->td_sched->skg_slptime /= 2; 1124d322132cSJeff Roberson return; 1125d322132cSJeff Roberson } 11268460a577SJohn Birrell td->td_sched->skg_runtime = (td->td_sched->skg_runtime / 5) * 4; 11278460a577SJohn Birrell td->td_sched->skg_slptime = (td->td_sched->skg_slptime / 5) * 4; 1128d322132cSJeff Roberson } 1129d322132cSJeff Roberson 1130d322132cSJeff Roberson static void 11318460a577SJohn Birrell sched_interact_fork(struct thread *td) 1132d322132cSJeff Roberson { 1133d322132cSJeff Roberson int ratio; 1134d322132cSJeff Roberson int sum; 1135d322132cSJeff Roberson 11368460a577SJohn Birrell sum = td->td_sched->skg_runtime + td->td_sched->skg_slptime; 1137d322132cSJeff Roberson if (sum > SCHED_SLP_RUN_FORK) { 1138d322132cSJeff Roberson ratio = sum / SCHED_SLP_RUN_FORK; 11398460a577SJohn Birrell td->td_sched->skg_runtime /= ratio; 11408460a577SJohn Birrell td->td_sched->skg_slptime /= ratio; 11414b60e324SJeff Roberson } 11424b60e324SJeff Roberson } 11434b60e324SJeff Roberson 1144e1f89c22SJeff Roberson static int 11458460a577SJohn Birrell sched_interact_score(struct thread *td) 1146e1f89c22SJeff Roberson { 1147210491d3SJeff Roberson int div; 1148e1f89c22SJeff Roberson 11498460a577SJohn Birrell if (td->td_sched->skg_runtime > td->td_sched->skg_slptime) { 11508460a577SJohn Birrell div = max(1, td->td_sched->skg_runtime / SCHED_INTERACT_HALF); 1151210491d3SJeff Roberson return (SCHED_INTERACT_HALF + 11528460a577SJohn Birrell (SCHED_INTERACT_HALF - (td->td_sched->skg_slptime / div))); 11538460a577SJohn Birrell } if (td->td_sched->skg_slptime > td->td_sched->skg_runtime) { 11548460a577SJohn Birrell div = max(1, td->td_sched->skg_slptime / SCHED_INTERACT_HALF); 11558460a577SJohn Birrell return (td->td_sched->skg_runtime / div); 1156e1f89c22SJeff Roberson } 1157e1f89c22SJeff Roberson 1158210491d3SJeff Roberson /* 1159210491d3SJeff Roberson * This can happen if slptime and runtime are 0. 1160210491d3SJeff Roberson */ 1161210491d3SJeff Roberson return (0); 1162e1f89c22SJeff Roberson 1163e1f89c22SJeff Roberson } 1164e1f89c22SJeff Roberson 116515dc847eSJeff Roberson /* 1166ed062c8dSJulian Elischer * Very early in the boot some setup of scheduler-specific 1167ed062c8dSJulian Elischer * parts of proc0 and of soem scheduler resources needs to be done. 1168ed062c8dSJulian Elischer * Called from: 1169ed062c8dSJulian Elischer * proc0_init() 1170ed062c8dSJulian Elischer */ 1171ed062c8dSJulian Elischer void 1172ed062c8dSJulian Elischer schedinit(void) 1173ed062c8dSJulian Elischer { 1174ed062c8dSJulian Elischer /* 1175ed062c8dSJulian Elischer * Set up the scheduler specific parts of proc0. 1176ed062c8dSJulian Elischer */ 1177ed062c8dSJulian Elischer proc0.p_sched = NULL; /* XXX */ 1178ad1e7d28SJulian Elischer thread0.td_sched = &td_sched0; 1179ad1e7d28SJulian Elischer td_sched0.ts_thread = &thread0; 1180ad1e7d28SJulian Elischer td_sched0.ts_state = TSS_THREAD; 1181ed062c8dSJulian Elischer } 1182ed062c8dSJulian Elischer 1183ed062c8dSJulian Elischer /* 118415dc847eSJeff Roberson * This is only somewhat accurate since given many processes of the same 118515dc847eSJeff Roberson * priority they will switch when their slices run out, which will be 118615dc847eSJeff Roberson * at most SCHED_SLICE_MAX. 118715dc847eSJeff Roberson */ 118835e6168fSJeff Roberson int 118935e6168fSJeff Roberson sched_rr_interval(void) 119035e6168fSJeff Roberson { 119135e6168fSJeff Roberson return (SCHED_SLICE_MAX); 119235e6168fSJeff Roberson } 119335e6168fSJeff Roberson 119422bf7d9aSJeff Roberson static void 1195ad1e7d28SJulian Elischer sched_pctcpu_update(struct td_sched *ts) 119635e6168fSJeff Roberson { 119735e6168fSJeff Roberson /* 119835e6168fSJeff Roberson * Adjust counters and watermark for pctcpu calc. 1199210491d3SJeff Roberson */ 1200ad1e7d28SJulian Elischer if (ts->ts_ltick > ticks - SCHED_CPU_TICKS) { 1201210491d3SJeff Roberson /* 120281de51bfSJeff Roberson * Shift the tick count out so that the divide doesn't 120381de51bfSJeff Roberson * round away our results. 120465c8760dSJeff Roberson */ 1205ad1e7d28SJulian Elischer ts->ts_ticks <<= 10; 1206ad1e7d28SJulian Elischer ts->ts_ticks = (ts->ts_ticks / (ticks - ts->ts_ftick)) * 120735e6168fSJeff Roberson SCHED_CPU_TICKS; 1208ad1e7d28SJulian Elischer ts->ts_ticks >>= 10; 120981de51bfSJeff Roberson } else 1210ad1e7d28SJulian Elischer ts->ts_ticks = 0; 1211ad1e7d28SJulian Elischer ts->ts_ltick = ticks; 1212ad1e7d28SJulian Elischer ts->ts_ftick = ts->ts_ltick - SCHED_CPU_TICKS; 121335e6168fSJeff Roberson } 121435e6168fSJeff Roberson 121535e6168fSJeff Roberson void 1216f5c157d9SJohn Baldwin sched_thread_priority(struct thread *td, u_char prio) 121735e6168fSJeff Roberson { 1218ad1e7d28SJulian Elischer struct td_sched *ts; 121935e6168fSJeff Roberson 122081d47d3fSJeff Roberson CTR6(KTR_SCHED, "sched_prio: %p(%s) prio %d newprio %d by %p(%s)", 122181d47d3fSJeff Roberson td, td->td_proc->p_comm, td->td_priority, prio, curthread, 122281d47d3fSJeff Roberson curthread->td_proc->p_comm); 1223ad1e7d28SJulian Elischer ts = td->td_sched; 122435e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1225f5c157d9SJohn Baldwin if (td->td_priority == prio) 1226f5c157d9SJohn Baldwin return; 122735e6168fSJeff Roberson if (TD_ON_RUNQ(td)) { 12283f741ca1SJeff Roberson /* 12293f741ca1SJeff Roberson * If the priority has been elevated due to priority 12303f741ca1SJeff Roberson * propagation, we may have to move ourselves to a new 12313f741ca1SJeff Roberson * queue. We still call adjustrunqueue below in case kse 12323f741ca1SJeff Roberson * needs to fix things up. 12333f741ca1SJeff Roberson */ 1234ad1e7d28SJulian Elischer if (prio < td->td_priority && ts->ts_runq != NULL && 1235ad1e7d28SJulian Elischer (ts->ts_flags & TSF_ASSIGNED) == 0 && 1236d2ad694cSJeff Roberson ts->ts_runq != TDQ_CPU(ts->ts_cpu)->tdq_curr) { 1237ad1e7d28SJulian Elischer runq_remove(ts->ts_runq, ts); 1238d2ad694cSJeff Roberson ts->ts_runq = TDQ_CPU(ts->ts_cpu)->tdq_curr; 1239ad1e7d28SJulian Elischer runq_add(ts->ts_runq, ts, 0); 124035e6168fSJeff Roberson } 1241f2b74cbfSJeff Roberson /* 1242ad1e7d28SJulian Elischer * Hold this td_sched on this cpu so that sched_prio() doesn't 1243f2b74cbfSJeff Roberson * cause excessive migration. We only want migration to 1244f2b74cbfSJeff Roberson * happen as the result of a wakeup. 1245f2b74cbfSJeff Roberson */ 1246ad1e7d28SJulian Elischer ts->ts_flags |= TSF_HOLD; 12473f741ca1SJeff Roberson adjustrunqueue(td, prio); 1248ad1e7d28SJulian Elischer ts->ts_flags &= ~TSF_HOLD; 12493f741ca1SJeff Roberson } else 12503f741ca1SJeff Roberson td->td_priority = prio; 125135e6168fSJeff Roberson } 125235e6168fSJeff Roberson 1253f5c157d9SJohn Baldwin /* 1254f5c157d9SJohn Baldwin * Update a thread's priority when it is lent another thread's 1255f5c157d9SJohn Baldwin * priority. 1256f5c157d9SJohn Baldwin */ 1257f5c157d9SJohn Baldwin void 1258f5c157d9SJohn Baldwin sched_lend_prio(struct thread *td, u_char prio) 1259f5c157d9SJohn Baldwin { 1260f5c157d9SJohn Baldwin 1261f5c157d9SJohn Baldwin td->td_flags |= TDF_BORROWING; 1262f5c157d9SJohn Baldwin sched_thread_priority(td, prio); 1263f5c157d9SJohn Baldwin } 1264f5c157d9SJohn Baldwin 1265f5c157d9SJohn Baldwin /* 1266f5c157d9SJohn Baldwin * Restore a thread's priority when priority propagation is 1267f5c157d9SJohn Baldwin * over. The prio argument is the minimum priority the thread 1268f5c157d9SJohn Baldwin * needs to have to satisfy other possible priority lending 1269f5c157d9SJohn Baldwin * requests. If the thread's regular priority is less 1270f5c157d9SJohn Baldwin * important than prio, the thread will keep a priority boost 1271f5c157d9SJohn Baldwin * of prio. 1272f5c157d9SJohn Baldwin */ 1273f5c157d9SJohn Baldwin void 1274f5c157d9SJohn Baldwin sched_unlend_prio(struct thread *td, u_char prio) 1275f5c157d9SJohn Baldwin { 1276f5c157d9SJohn Baldwin u_char base_pri; 1277f5c157d9SJohn Baldwin 1278f5c157d9SJohn Baldwin if (td->td_base_pri >= PRI_MIN_TIMESHARE && 1279f5c157d9SJohn Baldwin td->td_base_pri <= PRI_MAX_TIMESHARE) 12808460a577SJohn Birrell base_pri = td->td_user_pri; 1281f5c157d9SJohn Baldwin else 1282f5c157d9SJohn Baldwin base_pri = td->td_base_pri; 1283f5c157d9SJohn Baldwin if (prio >= base_pri) { 1284f5c157d9SJohn Baldwin td->td_flags &= ~TDF_BORROWING; 1285f5c157d9SJohn Baldwin sched_thread_priority(td, base_pri); 1286f5c157d9SJohn Baldwin } else 1287f5c157d9SJohn Baldwin sched_lend_prio(td, prio); 1288f5c157d9SJohn Baldwin } 1289f5c157d9SJohn Baldwin 1290f5c157d9SJohn Baldwin void 1291f5c157d9SJohn Baldwin sched_prio(struct thread *td, u_char prio) 1292f5c157d9SJohn Baldwin { 1293f5c157d9SJohn Baldwin u_char oldprio; 1294f5c157d9SJohn Baldwin 1295f5c157d9SJohn Baldwin /* First, update the base priority. */ 1296f5c157d9SJohn Baldwin td->td_base_pri = prio; 1297f5c157d9SJohn Baldwin 1298f5c157d9SJohn Baldwin /* 129950aaa791SJohn Baldwin * If the thread is borrowing another thread's priority, don't 1300f5c157d9SJohn Baldwin * ever lower the priority. 1301f5c157d9SJohn Baldwin */ 1302f5c157d9SJohn Baldwin if (td->td_flags & TDF_BORROWING && td->td_priority < prio) 1303f5c157d9SJohn Baldwin return; 1304f5c157d9SJohn Baldwin 1305f5c157d9SJohn Baldwin /* Change the real priority. */ 1306f5c157d9SJohn Baldwin oldprio = td->td_priority; 1307f5c157d9SJohn Baldwin sched_thread_priority(td, prio); 1308f5c157d9SJohn Baldwin 1309f5c157d9SJohn Baldwin /* 1310f5c157d9SJohn Baldwin * If the thread is on a turnstile, then let the turnstile update 1311f5c157d9SJohn Baldwin * its state. 1312f5c157d9SJohn Baldwin */ 1313f5c157d9SJohn Baldwin if (TD_ON_LOCK(td) && oldprio != prio) 1314f5c157d9SJohn Baldwin turnstile_adjust(td, oldprio); 1315f5c157d9SJohn Baldwin } 1316f5c157d9SJohn Baldwin 131735e6168fSJeff Roberson void 13188460a577SJohn Birrell sched_user_prio(struct thread *td, u_char prio) 13193db720fdSDavid Xu { 13203db720fdSDavid Xu u_char oldprio; 13213db720fdSDavid Xu 13228460a577SJohn Birrell td->td_base_user_pri = prio; 1323fc6c30f6SJulian Elischer if (td->td_flags & TDF_UBORROWING && td->td_user_pri <= prio) 1324fc6c30f6SJulian Elischer return; 13258460a577SJohn Birrell oldprio = td->td_user_pri; 13268460a577SJohn Birrell td->td_user_pri = prio; 13273db720fdSDavid Xu 13283db720fdSDavid Xu if (TD_ON_UPILOCK(td) && oldprio != prio) 13293db720fdSDavid Xu umtx_pi_adjust(td, oldprio); 13303db720fdSDavid Xu } 13313db720fdSDavid Xu 13323db720fdSDavid Xu void 13333db720fdSDavid Xu sched_lend_user_prio(struct thread *td, u_char prio) 13343db720fdSDavid Xu { 13353db720fdSDavid Xu u_char oldprio; 13363db720fdSDavid Xu 13373db720fdSDavid Xu td->td_flags |= TDF_UBORROWING; 13383db720fdSDavid Xu 1339f645b5daSMaxim Konovalov oldprio = td->td_user_pri; 13408460a577SJohn Birrell td->td_user_pri = prio; 13413db720fdSDavid Xu 13423db720fdSDavid Xu if (TD_ON_UPILOCK(td) && oldprio != prio) 13433db720fdSDavid Xu umtx_pi_adjust(td, oldprio); 13443db720fdSDavid Xu } 13453db720fdSDavid Xu 13463db720fdSDavid Xu void 13473db720fdSDavid Xu sched_unlend_user_prio(struct thread *td, u_char prio) 13483db720fdSDavid Xu { 13493db720fdSDavid Xu u_char base_pri; 13503db720fdSDavid Xu 13518460a577SJohn Birrell base_pri = td->td_base_user_pri; 13523db720fdSDavid Xu if (prio >= base_pri) { 13533db720fdSDavid Xu td->td_flags &= ~TDF_UBORROWING; 13548460a577SJohn Birrell sched_user_prio(td, base_pri); 13553db720fdSDavid Xu } else 13563db720fdSDavid Xu sched_lend_user_prio(td, prio); 13573db720fdSDavid Xu } 13583db720fdSDavid Xu 13593db720fdSDavid Xu void 13603389af30SJulian Elischer sched_switch(struct thread *td, struct thread *newtd, int flags) 136135e6168fSJeff Roberson { 1362c02bbb43SJeff Roberson struct tdq *tdq; 1363ad1e7d28SJulian Elischer struct td_sched *ts; 136435e6168fSJeff Roberson 136535e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 136635e6168fSJeff Roberson 1367ad1e7d28SJulian Elischer ts = td->td_sched; 1368c02bbb43SJeff Roberson tdq = TDQ_SELF(); 136935e6168fSJeff Roberson 1370060563ecSJulian Elischer td->td_lastcpu = td->td_oncpu; 1371060563ecSJulian Elischer td->td_oncpu = NOCPU; 137252eb8464SJohn Baldwin td->td_flags &= ~TDF_NEEDRESCHED; 137377918643SStephan Uphoff td->td_owepreempt = 0; 137435e6168fSJeff Roberson 1375b11fdad0SJeff Roberson /* 1376ad1e7d28SJulian Elischer * If the thread has been assigned it may be in the process of switching 1377b11fdad0SJeff Roberson * to the new cpu. This is the case in sched_bind(). 1378b11fdad0SJeff Roberson */ 13792454aaf5SJeff Roberson if (td == PCPU_GET(idlethread)) { 1380bf0acc27SJohn Baldwin TD_SET_CAN_RUN(td); 1381ad1e7d28SJulian Elischer } else if ((ts->ts_flags & TSF_ASSIGNED) == 0) { 1382ed062c8dSJulian Elischer /* We are ending our run so make our slot available again */ 1383c02bbb43SJeff Roberson tdq_load_rem(tdq, ts); 1384ed062c8dSJulian Elischer if (TD_IS_RUNNING(td)) { 1385f2b74cbfSJeff Roberson /* 1386ed062c8dSJulian Elischer * Don't allow the thread to migrate 1387ed062c8dSJulian Elischer * from a preemption. 1388f2b74cbfSJeff Roberson */ 1389ad1e7d28SJulian Elischer ts->ts_flags |= TSF_HOLD; 1390598b368dSJeff Roberson setrunqueue(td, (flags & SW_PREEMPT) ? 1391598b368dSJeff Roberson SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED : 1392598b368dSJeff Roberson SRQ_OURSELF|SRQ_YIELDING); 1393ad1e7d28SJulian Elischer ts->ts_flags &= ~TSF_HOLD; 13948460a577SJohn Birrell } 1395ed062c8dSJulian Elischer } 1396d39063f2SJulian Elischer if (newtd != NULL) { 1397c20c691bSJulian Elischer /* 13986680bbd5SJeff Roberson * If we bring in a thread account for it as if it had been 13996680bbd5SJeff Roberson * added to the run queue and then chosen. 1400c20c691bSJulian Elischer */ 1401ad1e7d28SJulian Elischer newtd->td_sched->ts_flags |= TSF_DIDRUN; 1402c02bbb43SJeff Roberson newtd->td_sched->ts_runq = tdq->tdq_curr; 1403c20c691bSJulian Elischer TD_SET_RUNNING(newtd); 1404ad1e7d28SJulian Elischer tdq_load_add(TDQ_SELF(), newtd->td_sched); 1405d39063f2SJulian Elischer } else 14062454aaf5SJeff Roberson newtd = choosethread(); 1407ebccf1e3SJoseph Koshy if (td != newtd) { 1408ebccf1e3SJoseph Koshy #ifdef HWPMC_HOOKS 1409ebccf1e3SJoseph Koshy if (PMC_PROC_IS_USING_PMCS(td->td_proc)) 1410ebccf1e3SJoseph Koshy PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT); 1411ebccf1e3SJoseph Koshy #endif 14128460a577SJohn Birrell 1413ae53b483SJeff Roberson cpu_switch(td, newtd); 1414ebccf1e3SJoseph Koshy #ifdef HWPMC_HOOKS 1415ebccf1e3SJoseph Koshy if (PMC_PROC_IS_USING_PMCS(td->td_proc)) 1416ebccf1e3SJoseph Koshy PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN); 1417ebccf1e3SJoseph Koshy #endif 1418ebccf1e3SJoseph Koshy } 1419ebccf1e3SJoseph Koshy 1420ae53b483SJeff Roberson sched_lock.mtx_lock = (uintptr_t)td; 142135e6168fSJeff Roberson 1422060563ecSJulian Elischer td->td_oncpu = PCPU_GET(cpuid); 142335e6168fSJeff Roberson } 142435e6168fSJeff Roberson 142535e6168fSJeff Roberson void 1426fa885116SJulian Elischer sched_nice(struct proc *p, int nice) 142735e6168fSJeff Roberson { 1428ad1e7d28SJulian Elischer struct td_sched *ts; 142935e6168fSJeff Roberson struct thread *td; 1430ad1e7d28SJulian Elischer struct tdq *tdq; 143135e6168fSJeff Roberson 1432fa885116SJulian Elischer PROC_LOCK_ASSERT(p, MA_OWNED); 14330b5318c8SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 143415dc847eSJeff Roberson /* 1435ad1e7d28SJulian Elischer * We need to adjust the nice counts for running threads. 143615dc847eSJeff Roberson */ 14378460a577SJohn Birrell FOREACH_THREAD_IN_PROC(p, td) { 14388460a577SJohn Birrell if (td->td_pri_class == PRI_TIMESHARE) { 1439ad1e7d28SJulian Elischer ts = td->td_sched; 1440ad1e7d28SJulian Elischer if (ts->ts_runq == NULL) 144115dc847eSJeff Roberson continue; 1442ad1e7d28SJulian Elischer tdq = TDQ_CPU(ts->ts_cpu); 1443ad1e7d28SJulian Elischer tdq_nice_rem(tdq, p->p_nice); 1444ad1e7d28SJulian Elischer tdq_nice_add(tdq, nice); 144515dc847eSJeff Roberson } 1446fa885116SJulian Elischer } 1447fa885116SJulian Elischer p->p_nice = nice; 14488460a577SJohn Birrell FOREACH_THREAD_IN_PROC(p, td) { 14498460a577SJohn Birrell sched_priority(td); 14504a338afdSJulian Elischer td->td_flags |= TDF_NEEDRESCHED; 145135e6168fSJeff Roberson } 1452fa885116SJulian Elischer } 145335e6168fSJeff Roberson 145435e6168fSJeff Roberson void 145544f3b092SJohn Baldwin sched_sleep(struct thread *td) 145635e6168fSJeff Roberson { 145735e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 145835e6168fSJeff Roberson 1459ad1e7d28SJulian Elischer td->td_sched->ts_slptime = ticks; 146035e6168fSJeff Roberson } 146135e6168fSJeff Roberson 146235e6168fSJeff Roberson void 146335e6168fSJeff Roberson sched_wakeup(struct thread *td) 146435e6168fSJeff Roberson { 146535e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 146635e6168fSJeff Roberson 146735e6168fSJeff Roberson /* 1468ad1e7d28SJulian Elischer * Let the procs know how long we slept for. This is because process 1469ad1e7d28SJulian Elischer * interactivity behavior is modeled in the procs. 147035e6168fSJeff Roberson */ 1471ad1e7d28SJulian Elischer if (td->td_sched->ts_slptime) { 147215dc847eSJeff Roberson int hzticks; 1473f1e8dc4aSJeff Roberson 1474ad1e7d28SJulian Elischer hzticks = (ticks - td->td_sched->ts_slptime) << 10; 1475d322132cSJeff Roberson if (hzticks >= SCHED_SLP_RUN_MAX) { 14768460a577SJohn Birrell td->td_sched->skg_slptime = SCHED_SLP_RUN_MAX; 14778460a577SJohn Birrell td->td_sched->skg_runtime = 1; 1478d322132cSJeff Roberson } else { 14798460a577SJohn Birrell td->td_sched->skg_slptime += hzticks; 14808460a577SJohn Birrell sched_interact_update(td); 1481d322132cSJeff Roberson } 14828460a577SJohn Birrell sched_priority(td); 1483ad1e7d28SJulian Elischer sched_slice(td->td_sched); 1484ad1e7d28SJulian Elischer td->td_sched->ts_slptime = 0; 1485f1e8dc4aSJeff Roberson } 14862630e4c9SJulian Elischer setrunqueue(td, SRQ_BORING); 148735e6168fSJeff Roberson } 148835e6168fSJeff Roberson 148935e6168fSJeff Roberson /* 149035e6168fSJeff Roberson * Penalize the parent for creating a new child and initialize the child's 149135e6168fSJeff Roberson * priority. 149235e6168fSJeff Roberson */ 149335e6168fSJeff Roberson void 14948460a577SJohn Birrell sched_fork(struct thread *td, struct thread *child) 149515dc847eSJeff Roberson { 14968460a577SJohn Birrell mtx_assert(&sched_lock, MA_OWNED); 1497ad1e7d28SJulian Elischer sched_fork_thread(td, child); 1498ad1e7d28SJulian Elischer } 1499ad1e7d28SJulian Elischer 1500ad1e7d28SJulian Elischer void 1501ad1e7d28SJulian Elischer sched_fork_thread(struct thread *td, struct thread *child) 1502ad1e7d28SJulian Elischer { 1503ad1e7d28SJulian Elischer struct td_sched *ts; 1504ad1e7d28SJulian Elischer struct td_sched *ts2; 15058460a577SJohn Birrell 15068460a577SJohn Birrell child->td_sched->skg_slptime = td->td_sched->skg_slptime; 15078460a577SJohn Birrell child->td_sched->skg_runtime = td->td_sched->skg_runtime; 15088460a577SJohn Birrell child->td_user_pri = td->td_user_pri; 1509f645b5daSMaxim Konovalov child->td_base_user_pri = td->td_base_user_pri; 15108460a577SJohn Birrell sched_interact_fork(child); 15118460a577SJohn Birrell td->td_sched->skg_runtime += tickincr; 15128460a577SJohn Birrell sched_interact_update(td); 15138460a577SJohn Birrell 1514ed062c8dSJulian Elischer sched_newthread(child); 15158460a577SJohn Birrell 1516ad1e7d28SJulian Elischer ts = td->td_sched; 1517ad1e7d28SJulian Elischer ts2 = child->td_sched; 1518ad1e7d28SJulian Elischer ts2->ts_slice = 1; /* Attempt to quickly learn interactivity. */ 1519ad1e7d28SJulian Elischer ts2->ts_cpu = ts->ts_cpu; 1520ad1e7d28SJulian Elischer ts2->ts_runq = NULL; 1521ed062c8dSJulian Elischer 1522ed062c8dSJulian Elischer /* Grab our parents cpu estimation information. */ 1523ad1e7d28SJulian Elischer ts2->ts_ticks = ts->ts_ticks; 1524ad1e7d28SJulian Elischer ts2->ts_ltick = ts->ts_ltick; 1525ad1e7d28SJulian Elischer ts2->ts_ftick = ts->ts_ftick; 152615dc847eSJeff Roberson } 152715dc847eSJeff Roberson 152815dc847eSJeff Roberson void 15298460a577SJohn Birrell sched_class(struct thread *td, int class) 153015dc847eSJeff Roberson { 1531ad1e7d28SJulian Elischer struct tdq *tdq; 1532ad1e7d28SJulian Elischer struct td_sched *ts; 1533ef1134c9SJeff Roberson int nclass; 1534ef1134c9SJeff Roberson int oclass; 153515dc847eSJeff Roberson 15362056d0a1SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 15378460a577SJohn Birrell if (td->td_pri_class == class) 153815dc847eSJeff Roberson return; 153915dc847eSJeff Roberson 1540ef1134c9SJeff Roberson nclass = PRI_BASE(class); 15418460a577SJohn Birrell oclass = PRI_BASE(td->td_pri_class); 1542ad1e7d28SJulian Elischer ts = td->td_sched; 1543ad1e7d28SJulian Elischer if (!((ts->ts_state != TSS_ONRUNQ && 1544ad1e7d28SJulian Elischer ts->ts_state != TSS_THREAD) || ts->ts_runq == NULL)) { 1545ad1e7d28SJulian Elischer tdq = TDQ_CPU(ts->ts_cpu); 154615dc847eSJeff Roberson 1547ef1134c9SJeff Roberson #ifdef SMP 1548155b9987SJeff Roberson /* 1549155b9987SJeff Roberson * On SMP if we're on the RUNQ we must adjust the transferable 1550155b9987SJeff Roberson * count because could be changing to or from an interrupt 1551155b9987SJeff Roberson * class. 1552155b9987SJeff Roberson */ 1553ad1e7d28SJulian Elischer if (ts->ts_state == TSS_ONRUNQ) { 1554ad1e7d28SJulian Elischer if (THREAD_CAN_MIGRATE(ts)) { 1555d2ad694cSJeff Roberson tdq->tdq_transferable--; 1556d2ad694cSJeff Roberson tdq->tdq_group->tdg_transferable--; 155780f86c9fSJeff Roberson } 1558ad1e7d28SJulian Elischer if (THREAD_CAN_MIGRATE(ts)) { 1559d2ad694cSJeff Roberson tdq->tdq_transferable++; 1560d2ad694cSJeff Roberson tdq->tdq_group->tdg_transferable++; 156180f86c9fSJeff Roberson } 1562155b9987SJeff Roberson } 1563ef1134c9SJeff Roberson #endif 1564155b9987SJeff Roberson if (oclass == PRI_TIMESHARE) { 1565d2ad694cSJeff Roberson tdq->tdq_load_timeshare--; 1566ad1e7d28SJulian Elischer tdq_nice_rem(tdq, td->td_proc->p_nice); 1567155b9987SJeff Roberson } 1568155b9987SJeff Roberson if (nclass == PRI_TIMESHARE) { 1569d2ad694cSJeff Roberson tdq->tdq_load_timeshare++; 1570ad1e7d28SJulian Elischer tdq_nice_add(tdq, td->td_proc->p_nice); 1571ad1e7d28SJulian Elischer } 1572155b9987SJeff Roberson } 157315dc847eSJeff Roberson 15748460a577SJohn Birrell td->td_pri_class = class; 157535e6168fSJeff Roberson } 157635e6168fSJeff Roberson 157735e6168fSJeff Roberson /* 157835e6168fSJeff Roberson * Return some of the child's priority and interactivity to the parent. 157935e6168fSJeff Roberson */ 158035e6168fSJeff Roberson void 1581fc6c30f6SJulian Elischer sched_exit(struct proc *p, struct thread *child) 158235e6168fSJeff Roberson { 1583141ad61cSJeff Roberson 15848460a577SJohn Birrell CTR3(KTR_SCHED, "sched_exit: %p(%s) prio %d", 1585fc6c30f6SJulian Elischer child, child->td_proc->p_comm, child->td_priority); 15868460a577SJohn Birrell 1587fc6c30f6SJulian Elischer sched_exit_thread(FIRST_THREAD_IN_PROC(p), child); 1588ad1e7d28SJulian Elischer } 1589ad1e7d28SJulian Elischer 1590ad1e7d28SJulian Elischer void 1591fc6c30f6SJulian Elischer sched_exit_thread(struct thread *td, struct thread *child) 1592ad1e7d28SJulian Elischer { 1593fc6c30f6SJulian Elischer CTR3(KTR_SCHED, "sched_exit_thread: %p(%s) prio %d", 1594fc6c30f6SJulian Elischer child, childproc->p_comm, child->td_priority); 1595fc6c30f6SJulian Elischer 1596fc6c30f6SJulian Elischer td->td_sched->skg_runtime += child->td_sched->skg_runtime; 1597fc6c30f6SJulian Elischer sched_interact_update(td); 1598fc6c30f6SJulian Elischer tdq_load_rem(TDQ_CPU(child->td_sched->ts_cpu), child->td_sched); 1599ad1e7d28SJulian Elischer } 1600ad1e7d28SJulian Elischer 1601ad1e7d28SJulian Elischer void 1602ad1e7d28SJulian Elischer sched_userret(struct thread *td) 1603ad1e7d28SJulian Elischer { 1604ad1e7d28SJulian Elischer /* 1605ad1e7d28SJulian Elischer * XXX we cheat slightly on the locking here to avoid locking in 1606ad1e7d28SJulian Elischer * the usual case. Setting td_priority here is essentially an 1607ad1e7d28SJulian Elischer * incomplete workaround for not setting it properly elsewhere. 1608ad1e7d28SJulian Elischer * Now that some interrupt handlers are threads, not setting it 1609ad1e7d28SJulian Elischer * properly elsewhere can clobber it in the window between setting 1610ad1e7d28SJulian Elischer * it here and returning to user mode, so don't waste time setting 1611ad1e7d28SJulian Elischer * it perfectly here. 1612ad1e7d28SJulian Elischer */ 1613ad1e7d28SJulian Elischer KASSERT((td->td_flags & TDF_BORROWING) == 0, 1614ad1e7d28SJulian Elischer ("thread with borrowed priority returning to userland")); 1615ad1e7d28SJulian Elischer if (td->td_priority != td->td_user_pri) { 1616ad1e7d28SJulian Elischer mtx_lock_spin(&sched_lock); 1617ad1e7d28SJulian Elischer td->td_priority = td->td_user_pri; 1618ad1e7d28SJulian Elischer td->td_base_pri = td->td_user_pri; 1619ad1e7d28SJulian Elischer mtx_unlock_spin(&sched_lock); 1620ad1e7d28SJulian Elischer } 162135e6168fSJeff Roberson } 162235e6168fSJeff Roberson 162335e6168fSJeff Roberson void 16247cf90fb3SJeff Roberson sched_clock(struct thread *td) 162535e6168fSJeff Roberson { 1626ad1e7d28SJulian Elischer struct tdq *tdq; 1627ad1e7d28SJulian Elischer struct td_sched *ts; 162835e6168fSJeff Roberson 1629dc03363dSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1630ad1e7d28SJulian Elischer tdq = TDQ_SELF(); 1631dc03363dSJeff Roberson #ifdef SMP 1632598b368dSJeff Roberson if (ticks >= bal_tick) 1633dc03363dSJeff Roberson sched_balance(); 1634598b368dSJeff Roberson if (ticks >= gbal_tick && balance_groups) 1635dc03363dSJeff Roberson sched_balance_groups(); 16362454aaf5SJeff Roberson /* 16372454aaf5SJeff Roberson * We could have been assigned a non real-time thread without an 16382454aaf5SJeff Roberson * IPI. 16392454aaf5SJeff Roberson */ 1640d2ad694cSJeff Roberson if (tdq->tdq_assigned) 1641ad1e7d28SJulian Elischer tdq_assign(tdq); /* Potentially sets NEEDRESCHED */ 1642dc03363dSJeff Roberson #endif 1643ad1e7d28SJulian Elischer ts = td->td_sched; 164435e6168fSJeff Roberson 16450a016a05SJeff Roberson /* Adjust ticks for pctcpu */ 1646ad1e7d28SJulian Elischer ts->ts_ticks++; 1647ad1e7d28SJulian Elischer ts->ts_ltick = ticks; 1648a8949de2SJeff Roberson 1649d465fb95SJeff Roberson /* Go up to one second beyond our max and then trim back down */ 1650ad1e7d28SJulian Elischer if (ts->ts_ftick + SCHED_CPU_TICKS + hz < ts->ts_ltick) 1651ad1e7d28SJulian Elischer sched_pctcpu_update(ts); 1652d465fb95SJeff Roberson 165343fdafb1SJulian Elischer if (td->td_flags & TDF_IDLETD) 165435e6168fSJeff Roberson return; 16553f741ca1SJeff Roberson /* 16568460a577SJohn Birrell * We only do slicing code for TIMESHARE threads. 1657a8949de2SJeff Roberson */ 16588460a577SJohn Birrell if (td->td_pri_class != PRI_TIMESHARE) 1659a8949de2SJeff Roberson return; 1660a8949de2SJeff Roberson /* 16618460a577SJohn Birrell * We used a tick charge it to the thread so that we can compute our 166215dc847eSJeff Roberson * interactivity. 166315dc847eSJeff Roberson */ 16648460a577SJohn Birrell td->td_sched->skg_runtime += tickincr; 16658460a577SJohn Birrell sched_interact_update(td); 1666407b0157SJeff Roberson 166735e6168fSJeff Roberson /* 166835e6168fSJeff Roberson * We used up one time slice. 166935e6168fSJeff Roberson */ 1670ad1e7d28SJulian Elischer if (--ts->ts_slice > 0) 167115dc847eSJeff Roberson return; 167235e6168fSJeff Roberson /* 167315dc847eSJeff Roberson * We're out of time, recompute priorities and requeue. 167435e6168fSJeff Roberson */ 1675ad1e7d28SJulian Elischer tdq_load_rem(tdq, ts); 16768460a577SJohn Birrell sched_priority(td); 1677ad1e7d28SJulian Elischer sched_slice(ts); 1678ad1e7d28SJulian Elischer if (SCHED_CURR(td, ts)) 1679d2ad694cSJeff Roberson ts->ts_runq = tdq->tdq_curr; 168015dc847eSJeff Roberson else 1681d2ad694cSJeff Roberson ts->ts_runq = tdq->tdq_next; 1682ad1e7d28SJulian Elischer tdq_load_add(tdq, ts); 16834a338afdSJulian Elischer td->td_flags |= TDF_NEEDRESCHED; 168435e6168fSJeff Roberson } 168535e6168fSJeff Roberson 168635e6168fSJeff Roberson int 168735e6168fSJeff Roberson sched_runnable(void) 168835e6168fSJeff Roberson { 1689ad1e7d28SJulian Elischer struct tdq *tdq; 1690b90816f1SJeff Roberson int load; 169135e6168fSJeff Roberson 1692b90816f1SJeff Roberson load = 1; 1693b90816f1SJeff Roberson 1694ad1e7d28SJulian Elischer tdq = TDQ_SELF(); 169522bf7d9aSJeff Roberson #ifdef SMP 1696d2ad694cSJeff Roberson if (tdq->tdq_assigned) { 169746f8b265SJeff Roberson mtx_lock_spin(&sched_lock); 1698ad1e7d28SJulian Elischer tdq_assign(tdq); 169946f8b265SJeff Roberson mtx_unlock_spin(&sched_lock); 170046f8b265SJeff Roberson } 170122bf7d9aSJeff Roberson #endif 17023f741ca1SJeff Roberson if ((curthread->td_flags & TDF_IDLETD) != 0) { 1703d2ad694cSJeff Roberson if (tdq->tdq_load > 0) 17043f741ca1SJeff Roberson goto out; 17053f741ca1SJeff Roberson } else 1706d2ad694cSJeff Roberson if (tdq->tdq_load - 1 > 0) 1707b90816f1SJeff Roberson goto out; 1708b90816f1SJeff Roberson load = 0; 1709b90816f1SJeff Roberson out: 1710b90816f1SJeff Roberson return (load); 171135e6168fSJeff Roberson } 171235e6168fSJeff Roberson 1713ad1e7d28SJulian Elischer struct td_sched * 1714c9f25d8fSJeff Roberson sched_choose(void) 1715c9f25d8fSJeff Roberson { 1716ad1e7d28SJulian Elischer struct tdq *tdq; 1717ad1e7d28SJulian Elischer struct td_sched *ts; 171815dc847eSJeff Roberson 1719b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1720ad1e7d28SJulian Elischer tdq = TDQ_SELF(); 172115dc847eSJeff Roberson #ifdef SMP 172280f86c9fSJeff Roberson restart: 1723d2ad694cSJeff Roberson if (tdq->tdq_assigned) 1724ad1e7d28SJulian Elischer tdq_assign(tdq); 172515dc847eSJeff Roberson #endif 1726ad1e7d28SJulian Elischer ts = tdq_choose(tdq); 1727ad1e7d28SJulian Elischer if (ts) { 172822bf7d9aSJeff Roberson #ifdef SMP 1729ad1e7d28SJulian Elischer if (ts->ts_thread->td_pri_class == PRI_IDLE) 1730ad1e7d28SJulian Elischer if (tdq_idled(tdq) == 0) 173180f86c9fSJeff Roberson goto restart; 173222bf7d9aSJeff Roberson #endif 1733ad1e7d28SJulian Elischer tdq_runq_rem(tdq, ts); 1734ad1e7d28SJulian Elischer ts->ts_state = TSS_THREAD; 1735ad1e7d28SJulian Elischer ts->ts_flags &= ~TSF_PREEMPTED; 1736ad1e7d28SJulian Elischer return (ts); 173735e6168fSJeff Roberson } 1738c9f25d8fSJeff Roberson #ifdef SMP 1739ad1e7d28SJulian Elischer if (tdq_idled(tdq) == 0) 174080f86c9fSJeff Roberson goto restart; 1741c9f25d8fSJeff Roberson #endif 174215dc847eSJeff Roberson return (NULL); 174335e6168fSJeff Roberson } 174435e6168fSJeff Roberson 174535e6168fSJeff Roberson void 17462630e4c9SJulian Elischer sched_add(struct thread *td, int flags) 174735e6168fSJeff Roberson { 1748ad1e7d28SJulian Elischer struct tdq *tdq; 1749ad1e7d28SJulian Elischer struct td_sched *ts; 1750598b368dSJeff Roberson int preemptive; 17512454aaf5SJeff Roberson int canmigrate; 175222bf7d9aSJeff Roberson int class; 1753c9f25d8fSJeff Roberson 175481d47d3fSJeff Roberson CTR5(KTR_SCHED, "sched_add: %p(%s) prio %d by %p(%s)", 175581d47d3fSJeff Roberson td, td->td_proc->p_comm, td->td_priority, curthread, 175681d47d3fSJeff Roberson curthread->td_proc->p_comm); 175722bf7d9aSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1758ad1e7d28SJulian Elischer ts = td->td_sched; 1759598b368dSJeff Roberson canmigrate = 1; 1760598b368dSJeff Roberson preemptive = !(flags & SRQ_YIELDING); 17618460a577SJohn Birrell class = PRI_BASE(td->td_pri_class); 1762ad1e7d28SJulian Elischer tdq = TDQ_SELF(); 1763ad1e7d28SJulian Elischer ts->ts_flags &= ~TSF_INTERNAL; 1764598b368dSJeff Roberson #ifdef SMP 1765ad1e7d28SJulian Elischer if (ts->ts_flags & TSF_ASSIGNED) { 1766ad1e7d28SJulian Elischer if (ts->ts_flags & TSF_REMOVED) 1767ad1e7d28SJulian Elischer ts->ts_flags &= ~TSF_REMOVED; 176822bf7d9aSJeff Roberson return; 17692d59a44dSJeff Roberson } 1770ad1e7d28SJulian Elischer canmigrate = THREAD_CAN_MIGRATE(ts); 1771f8ec133eSDavid Xu /* 1772f8ec133eSDavid Xu * Don't migrate running threads here. Force the long term balancer 1773f8ec133eSDavid Xu * to do it. 1774f8ec133eSDavid Xu */ 1775ad1e7d28SJulian Elischer if (ts->ts_flags & TSF_HOLD) { 1776ad1e7d28SJulian Elischer ts->ts_flags &= ~TSF_HOLD; 1777f8ec133eSDavid Xu canmigrate = 0; 1778f8ec133eSDavid Xu } 1779598b368dSJeff Roberson #endif 1780ad1e7d28SJulian Elischer KASSERT(ts->ts_state != TSS_ONRUNQ, 1781ad1e7d28SJulian Elischer ("sched_add: thread %p (%s) already in run queue", td, 17828460a577SJohn Birrell td->td_proc->p_comm)); 17838460a577SJohn Birrell KASSERT(td->td_proc->p_sflag & PS_INMEM, 17845d7ef00cSJeff Roberson ("sched_add: process swapped out")); 1785ad1e7d28SJulian Elischer KASSERT(ts->ts_runq == NULL, 1786ad1e7d28SJulian Elischer ("sched_add: thread %p is still assigned to a run queue", td)); 17871278181cSDavid Xu if (flags & SRQ_PREEMPTED) 1788ad1e7d28SJulian Elischer ts->ts_flags |= TSF_PREEMPTED; 178922bf7d9aSJeff Roberson switch (class) { 1790a8949de2SJeff Roberson case PRI_ITHD: 1791a8949de2SJeff Roberson case PRI_REALTIME: 1792d2ad694cSJeff Roberson ts->ts_runq = tdq->tdq_curr; 1793ad1e7d28SJulian Elischer ts->ts_slice = SCHED_SLICE_MAX; 1794598b368dSJeff Roberson if (canmigrate) 1795ad1e7d28SJulian Elischer ts->ts_cpu = PCPU_GET(cpuid); 1796a8949de2SJeff Roberson break; 1797a8949de2SJeff Roberson case PRI_TIMESHARE: 1798ad1e7d28SJulian Elischer if (SCHED_CURR(td, ts)) 1799d2ad694cSJeff Roberson ts->ts_runq = tdq->tdq_curr; 180015dc847eSJeff Roberson else 1801d2ad694cSJeff Roberson ts->ts_runq = tdq->tdq_next; 180215dc847eSJeff Roberson break; 180315dc847eSJeff Roberson case PRI_IDLE: 180415dc847eSJeff Roberson /* 180515dc847eSJeff Roberson * This is for priority prop. 180615dc847eSJeff Roberson */ 1807ad1e7d28SJulian Elischer if (ts->ts_thread->td_priority < PRI_MIN_IDLE) 1808d2ad694cSJeff Roberson ts->ts_runq = tdq->tdq_curr; 180915dc847eSJeff Roberson else 1810d2ad694cSJeff Roberson ts->ts_runq = &tdq->tdq_idle; 1811ad1e7d28SJulian Elischer ts->ts_slice = SCHED_SLICE_MIN; 181215dc847eSJeff Roberson break; 181315dc847eSJeff Roberson default: 1814d322132cSJeff Roberson panic("Unknown pri class."); 1815a8949de2SJeff Roberson break; 1816a6ed4186SJeff Roberson } 181722bf7d9aSJeff Roberson #ifdef SMP 18182454aaf5SJeff Roberson /* 18192454aaf5SJeff Roberson * If this thread is pinned or bound, notify the target cpu. 18202454aaf5SJeff Roberson */ 1821ad1e7d28SJulian Elischer if (!canmigrate && ts->ts_cpu != PCPU_GET(cpuid) ) { 1822ad1e7d28SJulian Elischer ts->ts_runq = NULL; 1823ad1e7d28SJulian Elischer tdq_notify(ts, ts->ts_cpu); 182480f86c9fSJeff Roberson return; 182580f86c9fSJeff Roberson } 182622bf7d9aSJeff Roberson /* 1827670c524fSJeff Roberson * If we had been idle, clear our bit in the group and potentially 1828670c524fSJeff Roberson * the global bitmap. If not, see if we should transfer this thread. 182922bf7d9aSJeff Roberson */ 183080f86c9fSJeff Roberson if ((class == PRI_TIMESHARE || class == PRI_REALTIME) && 1831d2ad694cSJeff Roberson (tdq->tdq_group->tdg_idlemask & PCPU_GET(cpumask)) != 0) { 183280f86c9fSJeff Roberson /* 183380f86c9fSJeff Roberson * Check to see if our group is unidling, and if so, remove it 183480f86c9fSJeff Roberson * from the global idle mask. 183580f86c9fSJeff Roberson */ 1836d2ad694cSJeff Roberson if (tdq->tdq_group->tdg_idlemask == 1837d2ad694cSJeff Roberson tdq->tdq_group->tdg_cpumask) 1838d2ad694cSJeff Roberson atomic_clear_int(&tdq_idle, tdq->tdq_group->tdg_mask); 183980f86c9fSJeff Roberson /* 184080f86c9fSJeff Roberson * Now remove ourselves from the group specific idle mask. 184180f86c9fSJeff Roberson */ 1842d2ad694cSJeff Roberson tdq->tdq_group->tdg_idlemask &= ~PCPU_GET(cpumask); 1843d2ad694cSJeff Roberson } else if (canmigrate && tdq->tdq_load > 1 && class != PRI_ITHD) 1844ad1e7d28SJulian Elischer if (tdq_transfer(tdq, ts, class)) 1845670c524fSJeff Roberson return; 1846ad1e7d28SJulian Elischer ts->ts_cpu = PCPU_GET(cpuid); 184722bf7d9aSJeff Roberson #endif 1848f2b74cbfSJeff Roberson if (td->td_priority < curthread->td_priority && 1849d2ad694cSJeff Roberson ts->ts_runq == tdq->tdq_curr) 185022bf7d9aSJeff Roberson curthread->td_flags |= TDF_NEEDRESCHED; 185163fcce68SJohn Baldwin if (preemptive && maybe_preempt(td)) 18520c0b25aeSJohn Baldwin return; 1853ad1e7d28SJulian Elischer ts->ts_state = TSS_ONRUNQ; 185435e6168fSJeff Roberson 1855ad1e7d28SJulian Elischer tdq_runq_add(tdq, ts, flags); 1856ad1e7d28SJulian Elischer tdq_load_add(tdq, ts); 185735e6168fSJeff Roberson } 185835e6168fSJeff Roberson 185935e6168fSJeff Roberson void 18607cf90fb3SJeff Roberson sched_rem(struct thread *td) 186135e6168fSJeff Roberson { 1862ad1e7d28SJulian Elischer struct tdq *tdq; 1863ad1e7d28SJulian Elischer struct td_sched *ts; 18647cf90fb3SJeff Roberson 186581d47d3fSJeff Roberson CTR5(KTR_SCHED, "sched_rem: %p(%s) prio %d by %p(%s)", 186681d47d3fSJeff Roberson td, td->td_proc->p_comm, td->td_priority, curthread, 186781d47d3fSJeff Roberson curthread->td_proc->p_comm); 1868598b368dSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1869ad1e7d28SJulian Elischer ts = td->td_sched; 1870ad1e7d28SJulian Elischer ts->ts_flags &= ~TSF_PREEMPTED; 1871ad1e7d28SJulian Elischer if (ts->ts_flags & TSF_ASSIGNED) { 1872ad1e7d28SJulian Elischer ts->ts_flags |= TSF_REMOVED; 187322bf7d9aSJeff Roberson return; 18742d59a44dSJeff Roberson } 1875ad1e7d28SJulian Elischer KASSERT((ts->ts_state == TSS_ONRUNQ), 1876ad1e7d28SJulian Elischer ("sched_rem: thread not on run queue")); 187735e6168fSJeff Roberson 1878ad1e7d28SJulian Elischer ts->ts_state = TSS_THREAD; 1879ad1e7d28SJulian Elischer tdq = TDQ_CPU(ts->ts_cpu); 1880ad1e7d28SJulian Elischer tdq_runq_rem(tdq, ts); 1881ad1e7d28SJulian Elischer tdq_load_rem(tdq, ts); 188235e6168fSJeff Roberson } 188335e6168fSJeff Roberson 188435e6168fSJeff Roberson fixpt_t 18857cf90fb3SJeff Roberson sched_pctcpu(struct thread *td) 188635e6168fSJeff Roberson { 188735e6168fSJeff Roberson fixpt_t pctcpu; 1888ad1e7d28SJulian Elischer struct td_sched *ts; 188935e6168fSJeff Roberson 189035e6168fSJeff Roberson pctcpu = 0; 1891ad1e7d28SJulian Elischer ts = td->td_sched; 1892ad1e7d28SJulian Elischer if (ts == NULL) 1893484288deSJeff Roberson return (0); 189435e6168fSJeff Roberson 1895b90816f1SJeff Roberson mtx_lock_spin(&sched_lock); 1896ad1e7d28SJulian Elischer if (ts->ts_ticks) { 189735e6168fSJeff Roberson int rtick; 189835e6168fSJeff Roberson 1899210491d3SJeff Roberson /* 1900210491d3SJeff Roberson * Don't update more frequently than twice a second. Allowing 1901210491d3SJeff Roberson * this causes the cpu usage to decay away too quickly due to 1902210491d3SJeff Roberson * rounding errors. 1903210491d3SJeff Roberson */ 1904ad1e7d28SJulian Elischer if (ts->ts_ftick + SCHED_CPU_TICKS < ts->ts_ltick || 1905ad1e7d28SJulian Elischer ts->ts_ltick < (ticks - (hz / 2))) 1906ad1e7d28SJulian Elischer sched_pctcpu_update(ts); 190735e6168fSJeff Roberson /* How many rtick per second ? */ 1908ad1e7d28SJulian Elischer rtick = min(ts->ts_ticks / SCHED_CPU_TIME, SCHED_CPU_TICKS); 19097121cce5SScott Long pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT; 191035e6168fSJeff Roberson } 191135e6168fSJeff Roberson 1912ad1e7d28SJulian Elischer td->td_proc->p_swtime = ts->ts_ltick - ts->ts_ftick; 1913828e7683SJohn Baldwin mtx_unlock_spin(&sched_lock); 191435e6168fSJeff Roberson 191535e6168fSJeff Roberson return (pctcpu); 191635e6168fSJeff Roberson } 191735e6168fSJeff Roberson 19189bacd788SJeff Roberson void 19199bacd788SJeff Roberson sched_bind(struct thread *td, int cpu) 19209bacd788SJeff Roberson { 1921ad1e7d28SJulian Elischer struct td_sched *ts; 19229bacd788SJeff Roberson 19239bacd788SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1924ad1e7d28SJulian Elischer ts = td->td_sched; 1925ad1e7d28SJulian Elischer ts->ts_flags |= TSF_BOUND; 192680f86c9fSJeff Roberson #ifdef SMP 192780f86c9fSJeff Roberson if (PCPU_GET(cpuid) == cpu) 19289bacd788SJeff Roberson return; 19299bacd788SJeff Roberson /* sched_rem without the runq_remove */ 1930ad1e7d28SJulian Elischer ts->ts_state = TSS_THREAD; 1931ad1e7d28SJulian Elischer tdq_load_rem(TDQ_CPU(ts->ts_cpu), ts); 1932ad1e7d28SJulian Elischer tdq_notify(ts, cpu); 19339bacd788SJeff Roberson /* When we return from mi_switch we'll be on the correct cpu. */ 1934279f949eSPoul-Henning Kamp mi_switch(SW_VOL, NULL); 19359bacd788SJeff Roberson #endif 19369bacd788SJeff Roberson } 19379bacd788SJeff Roberson 19389bacd788SJeff Roberson void 19399bacd788SJeff Roberson sched_unbind(struct thread *td) 19409bacd788SJeff Roberson { 19419bacd788SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1942ad1e7d28SJulian Elischer td->td_sched->ts_flags &= ~TSF_BOUND; 19439bacd788SJeff Roberson } 19449bacd788SJeff Roberson 194535e6168fSJeff Roberson int 1946ebccf1e3SJoseph Koshy sched_is_bound(struct thread *td) 1947ebccf1e3SJoseph Koshy { 1948ebccf1e3SJoseph Koshy mtx_assert(&sched_lock, MA_OWNED); 1949ad1e7d28SJulian Elischer return (td->td_sched->ts_flags & TSF_BOUND); 1950ebccf1e3SJoseph Koshy } 1951ebccf1e3SJoseph Koshy 195236ec198bSDavid Xu void 195336ec198bSDavid Xu sched_relinquish(struct thread *td) 195436ec198bSDavid Xu { 195536ec198bSDavid Xu mtx_lock_spin(&sched_lock); 19568460a577SJohn Birrell if (td->td_pri_class == PRI_TIMESHARE) 195736ec198bSDavid Xu sched_prio(td, PRI_MAX_TIMESHARE); 195836ec198bSDavid Xu mi_switch(SW_VOL, NULL); 195936ec198bSDavid Xu mtx_unlock_spin(&sched_lock); 196036ec198bSDavid Xu } 196136ec198bSDavid Xu 1962ebccf1e3SJoseph Koshy int 196333916c36SJeff Roberson sched_load(void) 196433916c36SJeff Roberson { 196533916c36SJeff Roberson #ifdef SMP 196633916c36SJeff Roberson int total; 196733916c36SJeff Roberson int i; 196833916c36SJeff Roberson 196933916c36SJeff Roberson total = 0; 1970d2ad694cSJeff Roberson for (i = 0; i <= tdg_maxid; i++) 1971d2ad694cSJeff Roberson total += TDQ_GROUP(i)->tdg_load; 197233916c36SJeff Roberson return (total); 197333916c36SJeff Roberson #else 1974d2ad694cSJeff Roberson return (TDQ_SELF()->tdq_sysload); 197533916c36SJeff Roberson #endif 197633916c36SJeff Roberson } 197733916c36SJeff Roberson 197833916c36SJeff Roberson int 197935e6168fSJeff Roberson sched_sizeof_proc(void) 198035e6168fSJeff Roberson { 198135e6168fSJeff Roberson return (sizeof(struct proc)); 198235e6168fSJeff Roberson } 198335e6168fSJeff Roberson 198435e6168fSJeff Roberson int 198535e6168fSJeff Roberson sched_sizeof_thread(void) 198635e6168fSJeff Roberson { 198735e6168fSJeff Roberson return (sizeof(struct thread) + sizeof(struct td_sched)); 198835e6168fSJeff Roberson } 1989b41f1452SDavid Xu 1990b41f1452SDavid Xu void 1991b41f1452SDavid Xu sched_tick(void) 1992b41f1452SDavid Xu { 1993b41f1452SDavid Xu } 1994ed062c8dSJulian Elischer #define KERN_SWITCH_INCLUDE 1 1995ed062c8dSJulian Elischer #include "kern/kern_switch.c" 1996