135e6168fSJeff Roberson /*- 215dc847eSJeff Roberson * Copyright (c) 2002-2003, Jeffrey Roberson <jeff@freebsd.org> 335e6168fSJeff Roberson * All rights reserved. 435e6168fSJeff Roberson * 535e6168fSJeff Roberson * Redistribution and use in source and binary forms, with or without 635e6168fSJeff Roberson * modification, are permitted provided that the following conditions 735e6168fSJeff Roberson * are met: 835e6168fSJeff Roberson * 1. Redistributions of source code must retain the above copyright 935e6168fSJeff Roberson * notice unmodified, this list of conditions, and the following 1035e6168fSJeff Roberson * disclaimer. 1135e6168fSJeff Roberson * 2. Redistributions in binary form must reproduce the above copyright 1235e6168fSJeff Roberson * notice, this list of conditions and the following disclaimer in the 1335e6168fSJeff Roberson * documentation and/or other materials provided with the distribution. 1435e6168fSJeff Roberson * 1535e6168fSJeff Roberson * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 1635e6168fSJeff Roberson * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1735e6168fSJeff Roberson * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 1835e6168fSJeff Roberson * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 1935e6168fSJeff Roberson * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2035e6168fSJeff Roberson * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2135e6168fSJeff Roberson * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2235e6168fSJeff Roberson * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2335e6168fSJeff Roberson * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2435e6168fSJeff Roberson * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2535e6168fSJeff Roberson */ 2635e6168fSJeff Roberson 27677b542eSDavid E. O'Brien #include <sys/cdefs.h> 28677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 29677b542eSDavid E. O'Brien 309923b511SScott Long #include <opt_sched.h> 319923b511SScott Long 32ed062c8dSJulian Elischer #define kse td_sched 33ed062c8dSJulian Elischer 3435e6168fSJeff Roberson #include <sys/param.h> 3535e6168fSJeff Roberson #include <sys/systm.h> 362c3490b1SMarcel Moolenaar #include <sys/kdb.h> 3735e6168fSJeff Roberson #include <sys/kernel.h> 3835e6168fSJeff Roberson #include <sys/ktr.h> 3935e6168fSJeff Roberson #include <sys/lock.h> 4035e6168fSJeff Roberson #include <sys/mutex.h> 4135e6168fSJeff Roberson #include <sys/proc.h> 42245f3abfSJeff Roberson #include <sys/resource.h> 439bacd788SJeff Roberson #include <sys/resourcevar.h> 4435e6168fSJeff Roberson #include <sys/sched.h> 4535e6168fSJeff Roberson #include <sys/smp.h> 4635e6168fSJeff Roberson #include <sys/sx.h> 4735e6168fSJeff Roberson #include <sys/sysctl.h> 4835e6168fSJeff Roberson #include <sys/sysproto.h> 49f5c157d9SJohn Baldwin #include <sys/turnstile.h> 5035e6168fSJeff Roberson #include <sys/vmmeter.h> 5135e6168fSJeff Roberson #ifdef KTRACE 5235e6168fSJeff Roberson #include <sys/uio.h> 5335e6168fSJeff Roberson #include <sys/ktrace.h> 5435e6168fSJeff Roberson #endif 5535e6168fSJeff Roberson 5635e6168fSJeff Roberson #include <machine/cpu.h> 5722bf7d9aSJeff Roberson #include <machine/smp.h> 5835e6168fSJeff Roberson 5935e6168fSJeff Roberson /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 6035e6168fSJeff Roberson /* XXX This is bogus compatability crap for ps */ 6135e6168fSJeff Roberson static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 6235e6168fSJeff Roberson SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 6335e6168fSJeff Roberson 6435e6168fSJeff Roberson static void sched_setup(void *dummy); 6535e6168fSJeff Roberson SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 6635e6168fSJeff Roberson 67e038d354SScott Long static SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0, "Scheduler"); 68e1f89c22SJeff Roberson 69e038d354SScott Long SYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "ule", 0, 70e038d354SScott Long "Scheduler name"); 71dc095794SScott Long 7215dc847eSJeff Roberson static int slice_min = 1; 7315dc847eSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, slice_min, CTLFLAG_RW, &slice_min, 0, ""); 7415dc847eSJeff Roberson 75210491d3SJeff Roberson static int slice_max = 10; 7615dc847eSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, slice_max, CTLFLAG_RW, &slice_max, 0, ""); 7715dc847eSJeff Roberson 7815dc847eSJeff Roberson int realstathz; 7915dc847eSJeff Roberson int tickincr = 1; 80783caefbSJeff Roberson 8135e6168fSJeff Roberson /* 82ed062c8dSJulian Elischer * The schedulable entity that can be given a context to run. 83ed062c8dSJulian Elischer * A process may have several of these. Probably one per processor 84ed062c8dSJulian Elischer * but posibly a few more. In this universe they are grouped 85ed062c8dSJulian Elischer * with a KSEG that contains the priority and niceness 86ed062c8dSJulian Elischer * for the group. 87ed062c8dSJulian Elischer */ 88ed062c8dSJulian Elischer struct kse { 89ed062c8dSJulian Elischer TAILQ_ENTRY(kse) ke_procq; /* (j/z) Run queue. */ 90ed062c8dSJulian Elischer int ke_flags; /* (j) KEF_* flags. */ 91ed062c8dSJulian Elischer struct thread *ke_thread; /* (*) Active associated thread. */ 92ed062c8dSJulian Elischer fixpt_t ke_pctcpu; /* (j) %cpu during p_swtime. */ 93ed062c8dSJulian Elischer char ke_rqindex; /* (j) Run queue index. */ 94ed062c8dSJulian Elischer enum { 95ed062c8dSJulian Elischer KES_THREAD = 0x0, /* slaved to thread state */ 96ed062c8dSJulian Elischer KES_ONRUNQ 97ed062c8dSJulian Elischer } ke_state; /* (j) thread sched specific status. */ 98ed062c8dSJulian Elischer int ke_slptime; 99ed062c8dSJulian Elischer int ke_slice; 100ed062c8dSJulian Elischer struct runq *ke_runq; 101ed062c8dSJulian Elischer u_char ke_cpu; /* CPU that we have affinity for. */ 102ed062c8dSJulian Elischer /* The following variables are only used for pctcpu calculation */ 103ed062c8dSJulian Elischer int ke_ltick; /* Last tick that we were running on */ 104ed062c8dSJulian Elischer int ke_ftick; /* First tick that we were running on */ 105ed062c8dSJulian Elischer int ke_ticks; /* Tick count */ 106ed062c8dSJulian Elischer 107ed062c8dSJulian Elischer }; 108ed062c8dSJulian Elischer 109ed062c8dSJulian Elischer 110ed062c8dSJulian Elischer #define td_kse td_sched 111ed062c8dSJulian Elischer #define td_slptime td_kse->ke_slptime 112ed062c8dSJulian Elischer #define ke_proc ke_thread->td_proc 113ed062c8dSJulian Elischer #define ke_ksegrp ke_thread->td_ksegrp 114ed062c8dSJulian Elischer 115ed062c8dSJulian Elischer /* flags kept in ke_flags */ 116ed062c8dSJulian Elischer #define KEF_SCHED0 0x00001 /* For scheduler-specific use. */ 117ed062c8dSJulian Elischer #define KEF_SCHED1 0x00002 /* For scheduler-specific use. */ 118ed062c8dSJulian Elischer #define KEF_SCHED2 0x00004 /* For scheduler-specific use. */ 119ed062c8dSJulian Elischer #define KEF_SCHED3 0x00008 /* For scheduler-specific use. */ 1202d59a44dSJeff Roberson #define KEF_SCHED4 0x00010 1218ffb8f55SJeff Roberson #define KEF_SCHED5 0x00020 122ed062c8dSJulian Elischer #define KEF_DIDRUN 0x02000 /* Thread actually ran. */ 123ed062c8dSJulian Elischer #define KEF_EXIT 0x04000 /* Thread is being killed. */ 124ed062c8dSJulian Elischer 125ed062c8dSJulian Elischer /* 12635e6168fSJeff Roberson * These datastructures are allocated within their parent datastructure but 12735e6168fSJeff Roberson * are scheduler specific. 12835e6168fSJeff Roberson */ 12935e6168fSJeff Roberson 13022bf7d9aSJeff Roberson #define ke_assign ke_procq.tqe_next 13122bf7d9aSJeff Roberson 132598b368dSJeff Roberson #define KEF_ASSIGNED 0x0001 /* Thread is being migrated. */ 133598b368dSJeff Roberson #define KEF_BOUND 0x0002 /* Thread can not migrate. */ 134598b368dSJeff Roberson #define KEF_XFERABLE 0x0004 /* Thread was added as transferable. */ 135598b368dSJeff Roberson #define KEF_HOLD 0x0008 /* Thread is temporarily bound. */ 136598b368dSJeff Roberson #define KEF_REMOVED 0x0010 /* Thread was removed while ASSIGNED */ 137f5c157d9SJohn Baldwin #define KEF_INTERNAL 0x0020 13835e6168fSJeff Roberson 13935e6168fSJeff Roberson struct kg_sched { 140ed062c8dSJulian Elischer struct thread *skg_last_assigned; /* (j) Last thread assigned to */ 141ed062c8dSJulian Elischer /* the system scheduler */ 142407b0157SJeff Roberson int skg_slptime; /* Number of ticks we vol. slept */ 143407b0157SJeff Roberson int skg_runtime; /* Number of ticks we were running */ 144ed062c8dSJulian Elischer int skg_avail_opennings; /* (j) Num unfilled slots in group.*/ 145ed062c8dSJulian Elischer int skg_concurrency; /* (j) Num threads requested in group.*/ 14635e6168fSJeff Roberson }; 147ed062c8dSJulian Elischer #define kg_last_assigned kg_sched->skg_last_assigned 148ed062c8dSJulian Elischer #define kg_avail_opennings kg_sched->skg_avail_opennings 149ed062c8dSJulian Elischer #define kg_concurrency kg_sched->skg_concurrency 150407b0157SJeff Roberson #define kg_runtime kg_sched->skg_runtime 151ed062c8dSJulian Elischer #define kg_slptime kg_sched->skg_slptime 15235e6168fSJeff Roberson 153d39063f2SJulian Elischer #define SLOT_RELEASE(kg) \ 154d39063f2SJulian Elischer do { \ 155d39063f2SJulian Elischer kg->kg_avail_opennings++; \ 156d39063f2SJulian Elischer CTR3(KTR_RUNQ, "kg %p(%d) Slot released (->%d)", \ 157d39063f2SJulian Elischer kg, \ 158d39063f2SJulian Elischer kg->kg_concurrency, \ 159d39063f2SJulian Elischer kg->kg_avail_opennings); \ 160d39063f2SJulian Elischer /*KASSERT((kg->kg_avail_opennings <= kg->kg_concurrency), \ 161d39063f2SJulian Elischer ("slots out of whack")); */ \ 162d39063f2SJulian Elischer } while (0) 163d39063f2SJulian Elischer 164d39063f2SJulian Elischer #define SLOT_USE(kg) \ 165d39063f2SJulian Elischer do { \ 166d39063f2SJulian Elischer kg->kg_avail_opennings--; \ 167d39063f2SJulian Elischer CTR3(KTR_RUNQ, "kg %p(%d) Slot used (->%d)", \ 168d39063f2SJulian Elischer kg, \ 169d39063f2SJulian Elischer kg->kg_concurrency, \ 170d39063f2SJulian Elischer kg->kg_avail_opennings); \ 171d39063f2SJulian Elischer /*KASSERT((kg->kg_avail_opennings >= 0), \ 172d39063f2SJulian Elischer ("slots out of whack"));*/ \ 173d39063f2SJulian Elischer } while (0) 174d39063f2SJulian Elischer 175ed062c8dSJulian Elischer static struct kse kse0; 176ed062c8dSJulian Elischer static struct kg_sched kg_sched0; 17735e6168fSJeff Roberson 17835e6168fSJeff Roberson /* 179665cb285SJeff Roberson * The priority is primarily determined by the interactivity score. Thus, we 180665cb285SJeff Roberson * give lower(better) priorities to kse groups that use less CPU. The nice 181665cb285SJeff Roberson * value is then directly added to this to allow nice to have some effect 182665cb285SJeff Roberson * on latency. 183e1f89c22SJeff Roberson * 184e1f89c22SJeff Roberson * PRI_RANGE: Total priority range for timeshare threads. 185665cb285SJeff Roberson * PRI_NRESV: Number of nice values. 186e1f89c22SJeff Roberson * PRI_BASE: The start of the dynamic range. 18735e6168fSJeff Roberson */ 188407b0157SJeff Roberson #define SCHED_PRI_RANGE (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1) 189a0a931ceSJeff Roberson #define SCHED_PRI_NRESV ((PRIO_MAX - PRIO_MIN) + 1) 190a0a931ceSJeff Roberson #define SCHED_PRI_NHALF (SCHED_PRI_NRESV / 2) 191665cb285SJeff Roberson #define SCHED_PRI_BASE (PRI_MIN_TIMESHARE) 19215dc847eSJeff Roberson #define SCHED_PRI_INTERACT(score) \ 193665cb285SJeff Roberson ((score) * SCHED_PRI_RANGE / SCHED_INTERACT_MAX) 19435e6168fSJeff Roberson 19535e6168fSJeff Roberson /* 196e1f89c22SJeff Roberson * These determine the interactivity of a process. 19735e6168fSJeff Roberson * 198407b0157SJeff Roberson * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate 199407b0157SJeff Roberson * before throttling back. 200d322132cSJeff Roberson * SLP_RUN_FORK: Maximum slp+run time to inherit at fork time. 201210491d3SJeff Roberson * INTERACT_MAX: Maximum interactivity value. Smaller is better. 202e1f89c22SJeff Roberson * INTERACT_THRESH: Threshhold for placement on the current runq. 20335e6168fSJeff Roberson */ 2044c9612c6SJeff Roberson #define SCHED_SLP_RUN_MAX ((hz * 5) << 10) 205d322132cSJeff Roberson #define SCHED_SLP_RUN_FORK ((hz / 2) << 10) 206210491d3SJeff Roberson #define SCHED_INTERACT_MAX (100) 207210491d3SJeff Roberson #define SCHED_INTERACT_HALF (SCHED_INTERACT_MAX / 2) 2084c9612c6SJeff Roberson #define SCHED_INTERACT_THRESH (30) 209e1f89c22SJeff Roberson 21035e6168fSJeff Roberson /* 21135e6168fSJeff Roberson * These parameters and macros determine the size of the time slice that is 21235e6168fSJeff Roberson * granted to each thread. 21335e6168fSJeff Roberson * 21435e6168fSJeff Roberson * SLICE_MIN: Minimum time slice granted, in units of ticks. 21535e6168fSJeff Roberson * SLICE_MAX: Maximum time slice granted. 21635e6168fSJeff Roberson * SLICE_RANGE: Range of available time slices scaled by hz. 217245f3abfSJeff Roberson * SLICE_SCALE: The number slices granted per val in the range of [0, max]. 218245f3abfSJeff Roberson * SLICE_NICE: Determine the amount of slice granted to a scaled nice. 2197d1a81b4SJeff Roberson * SLICE_NTHRESH: The nice cutoff point for slice assignment. 22035e6168fSJeff Roberson */ 22115dc847eSJeff Roberson #define SCHED_SLICE_MIN (slice_min) 22215dc847eSJeff Roberson #define SCHED_SLICE_MAX (slice_max) 2230392e39dSJeff Roberson #define SCHED_SLICE_INTERACTIVE (slice_max) 2247d1a81b4SJeff Roberson #define SCHED_SLICE_NTHRESH (SCHED_PRI_NHALF - 1) 22535e6168fSJeff Roberson #define SCHED_SLICE_RANGE (SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1) 22635e6168fSJeff Roberson #define SCHED_SLICE_SCALE(val, max) (((val) * SCHED_SLICE_RANGE) / (max)) 227245f3abfSJeff Roberson #define SCHED_SLICE_NICE(nice) \ 2287d1a81b4SJeff Roberson (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((nice), SCHED_SLICE_NTHRESH)) 22935e6168fSJeff Roberson 23035e6168fSJeff Roberson /* 231ed062c8dSJulian Elischer * This macro determines whether or not the thread belongs on the current or 23235e6168fSJeff Roberson * next run queue. 23335e6168fSJeff Roberson */ 23415dc847eSJeff Roberson #define SCHED_INTERACTIVE(kg) \ 23515dc847eSJeff Roberson (sched_interact_score(kg) < SCHED_INTERACT_THRESH) 236a5f099d0SJeff Roberson #define SCHED_CURR(kg, ke) \ 237f5c157d9SJohn Baldwin ((ke->ke_thread->td_flags & TDF_BORROWING) || SCHED_INTERACTIVE(kg)) 23835e6168fSJeff Roberson 23935e6168fSJeff Roberson /* 24035e6168fSJeff Roberson * Cpu percentage computation macros and defines. 24135e6168fSJeff Roberson * 24235e6168fSJeff Roberson * SCHED_CPU_TIME: Number of seconds to average the cpu usage across. 24335e6168fSJeff Roberson * SCHED_CPU_TICKS: Number of hz ticks to average the cpu usage across. 24435e6168fSJeff Roberson */ 24535e6168fSJeff Roberson 2465053d272SJeff Roberson #define SCHED_CPU_TIME 10 24735e6168fSJeff Roberson #define SCHED_CPU_TICKS (hz * SCHED_CPU_TIME) 24835e6168fSJeff Roberson 24935e6168fSJeff Roberson /* 25015dc847eSJeff Roberson * kseq - per processor runqs and statistics. 25135e6168fSJeff Roberson */ 25235e6168fSJeff Roberson struct kseq { 253a8949de2SJeff Roberson struct runq ksq_idle; /* Queue of IDLE threads. */ 25415dc847eSJeff Roberson struct runq ksq_timeshare[2]; /* Run queues for !IDLE. */ 25515dc847eSJeff Roberson struct runq *ksq_next; /* Next timeshare queue. */ 25615dc847eSJeff Roberson struct runq *ksq_curr; /* Current queue. */ 257ef1134c9SJeff Roberson int ksq_load_timeshare; /* Load for timeshare. */ 25815dc847eSJeff Roberson int ksq_load; /* Aggregate load. */ 259a0a931ceSJeff Roberson short ksq_nice[SCHED_PRI_NRESV]; /* KSEs in each nice bin. */ 26015dc847eSJeff Roberson short ksq_nicemin; /* Least nice. */ 2615d7ef00cSJeff Roberson #ifdef SMP 26280f86c9fSJeff Roberson int ksq_transferable; 26380f86c9fSJeff Roberson LIST_ENTRY(kseq) ksq_siblings; /* Next in kseq group. */ 26480f86c9fSJeff Roberson struct kseq_group *ksq_group; /* Our processor group. */ 265fa9c9717SJeff Roberson volatile struct kse *ksq_assigned; /* assigned by another CPU. */ 26633916c36SJeff Roberson #else 26733916c36SJeff Roberson int ksq_sysload; /* For loadavg, !ITHD load. */ 2685d7ef00cSJeff Roberson #endif 26935e6168fSJeff Roberson }; 27035e6168fSJeff Roberson 27180f86c9fSJeff Roberson #ifdef SMP 27280f86c9fSJeff Roberson /* 27380f86c9fSJeff Roberson * kseq groups are groups of processors which can cheaply share threads. When 27480f86c9fSJeff Roberson * one processor in the group goes idle it will check the runqs of the other 27580f86c9fSJeff Roberson * processors in its group prior to halting and waiting for an interrupt. 27680f86c9fSJeff Roberson * These groups are suitable for SMT (Symetric Multi-Threading) and not NUMA. 27780f86c9fSJeff Roberson * In a numa environment we'd want an idle bitmap per group and a two tiered 27880f86c9fSJeff Roberson * load balancer. 27980f86c9fSJeff Roberson */ 28080f86c9fSJeff Roberson struct kseq_group { 28180f86c9fSJeff Roberson int ksg_cpus; /* Count of CPUs in this kseq group. */ 282b2ae7ed7SMarcel Moolenaar cpumask_t ksg_cpumask; /* Mask of cpus in this group. */ 283b2ae7ed7SMarcel Moolenaar cpumask_t ksg_idlemask; /* Idle cpus in this group. */ 284b2ae7ed7SMarcel Moolenaar cpumask_t ksg_mask; /* Bit mask for first cpu. */ 285cac77d04SJeff Roberson int ksg_load; /* Total load of this group. */ 28680f86c9fSJeff Roberson int ksg_transferable; /* Transferable load of this group. */ 28780f86c9fSJeff Roberson LIST_HEAD(, kseq) ksg_members; /* Linked list of all members. */ 28880f86c9fSJeff Roberson }; 28980f86c9fSJeff Roberson #endif 29080f86c9fSJeff Roberson 29135e6168fSJeff Roberson /* 29235e6168fSJeff Roberson * One kse queue per processor. 29335e6168fSJeff Roberson */ 2940a016a05SJeff Roberson #ifdef SMP 295b2ae7ed7SMarcel Moolenaar static cpumask_t kseq_idle; 296cac77d04SJeff Roberson static int ksg_maxid; 29722bf7d9aSJeff Roberson static struct kseq kseq_cpu[MAXCPU]; 29880f86c9fSJeff Roberson static struct kseq_group kseq_groups[MAXCPU]; 299dc03363dSJeff Roberson static int bal_tick; 300dc03363dSJeff Roberson static int gbal_tick; 301598b368dSJeff Roberson static int balance_groups; 302dc03363dSJeff Roberson 30380f86c9fSJeff Roberson #define KSEQ_SELF() (&kseq_cpu[PCPU_GET(cpuid)]) 30480f86c9fSJeff Roberson #define KSEQ_CPU(x) (&kseq_cpu[(x)]) 305cac77d04SJeff Roberson #define KSEQ_ID(x) ((x) - kseq_cpu) 306cac77d04SJeff Roberson #define KSEQ_GROUP(x) (&kseq_groups[(x)]) 30780f86c9fSJeff Roberson #else /* !SMP */ 30822bf7d9aSJeff Roberson static struct kseq kseq_cpu; 309dc03363dSJeff Roberson 3100a016a05SJeff Roberson #define KSEQ_SELF() (&kseq_cpu) 3110a016a05SJeff Roberson #define KSEQ_CPU(x) (&kseq_cpu) 3120a016a05SJeff Roberson #endif 31335e6168fSJeff Roberson 314ed062c8dSJulian Elischer static void slot_fill(struct ksegrp *kg); 315ed062c8dSJulian Elischer static struct kse *sched_choose(void); /* XXX Should be thread * */ 316245f3abfSJeff Roberson static void sched_slice(struct kse *ke); 31715dc847eSJeff Roberson static void sched_priority(struct ksegrp *kg); 318f5c157d9SJohn Baldwin static void sched_thread_priority(struct thread *td, u_char prio); 319e1f89c22SJeff Roberson static int sched_interact_score(struct ksegrp *kg); 3204b60e324SJeff Roberson static void sched_interact_update(struct ksegrp *kg); 321d322132cSJeff Roberson static void sched_interact_fork(struct ksegrp *kg); 32222bf7d9aSJeff Roberson static void sched_pctcpu_update(struct kse *ke); 32335e6168fSJeff Roberson 3245d7ef00cSJeff Roberson /* Operations on per processor queues */ 32522bf7d9aSJeff Roberson static struct kse * kseq_choose(struct kseq *kseq); 3260a016a05SJeff Roberson static void kseq_setup(struct kseq *kseq); 327155b9987SJeff Roberson static void kseq_load_add(struct kseq *kseq, struct kse *ke); 328155b9987SJeff Roberson static void kseq_load_rem(struct kseq *kseq, struct kse *ke); 329598b368dSJeff Roberson static __inline void kseq_runq_add(struct kseq *kseq, struct kse *ke, int); 330155b9987SJeff Roberson static __inline void kseq_runq_rem(struct kseq *kseq, struct kse *ke); 33115dc847eSJeff Roberson static void kseq_nice_add(struct kseq *kseq, int nice); 33215dc847eSJeff Roberson static void kseq_nice_rem(struct kseq *kseq, int nice); 3337cd650a9SJeff Roberson void kseq_print(int cpu); 3345d7ef00cSJeff Roberson #ifdef SMP 33580f86c9fSJeff Roberson static int kseq_transfer(struct kseq *ksq, struct kse *ke, int class); 33622bf7d9aSJeff Roberson static struct kse *runq_steal(struct runq *rq); 337dc03363dSJeff Roberson static void sched_balance(void); 338dc03363dSJeff Roberson static void sched_balance_groups(void); 339cac77d04SJeff Roberson static void sched_balance_group(struct kseq_group *ksg); 340cac77d04SJeff Roberson static void sched_balance_pair(struct kseq *high, struct kseq *low); 34122bf7d9aSJeff Roberson static void kseq_move(struct kseq *from, int cpu); 34280f86c9fSJeff Roberson static int kseq_idled(struct kseq *kseq); 34322bf7d9aSJeff Roberson static void kseq_notify(struct kse *ke, int cpu); 34422bf7d9aSJeff Roberson static void kseq_assign(struct kseq *); 34580f86c9fSJeff Roberson static struct kse *kseq_steal(struct kseq *kseq, int stealidle); 346598b368dSJeff Roberson #define KSE_CAN_MIGRATE(ke) \ 3471e7fad6bSScott Long ((ke)->ke_thread->td_pinned == 0 && ((ke)->ke_flags & KEF_BOUND) == 0) 3485d7ef00cSJeff Roberson #endif 3495d7ef00cSJeff Roberson 35015dc847eSJeff Roberson void 3517cd650a9SJeff Roberson kseq_print(int cpu) 35215dc847eSJeff Roberson { 3537cd650a9SJeff Roberson struct kseq *kseq; 35415dc847eSJeff Roberson int i; 35515dc847eSJeff Roberson 3567cd650a9SJeff Roberson kseq = KSEQ_CPU(cpu); 35715dc847eSJeff Roberson 35815dc847eSJeff Roberson printf("kseq:\n"); 35915dc847eSJeff Roberson printf("\tload: %d\n", kseq->ksq_load); 360155b9987SJeff Roberson printf("\tload TIMESHARE: %d\n", kseq->ksq_load_timeshare); 361ef1134c9SJeff Roberson #ifdef SMP 36280f86c9fSJeff Roberson printf("\tload transferable: %d\n", kseq->ksq_transferable); 363ef1134c9SJeff Roberson #endif 36415dc847eSJeff Roberson printf("\tnicemin:\t%d\n", kseq->ksq_nicemin); 36515dc847eSJeff Roberson printf("\tnice counts:\n"); 366a0a931ceSJeff Roberson for (i = 0; i < SCHED_PRI_NRESV; i++) 36715dc847eSJeff Roberson if (kseq->ksq_nice[i]) 36815dc847eSJeff Roberson printf("\t\t%d = %d\n", 36915dc847eSJeff Roberson i - SCHED_PRI_NHALF, kseq->ksq_nice[i]); 37015dc847eSJeff Roberson } 37115dc847eSJeff Roberson 372155b9987SJeff Roberson static __inline void 373598b368dSJeff Roberson kseq_runq_add(struct kseq *kseq, struct kse *ke, int flags) 374155b9987SJeff Roberson { 375155b9987SJeff Roberson #ifdef SMP 376598b368dSJeff Roberson if (KSE_CAN_MIGRATE(ke)) { 37780f86c9fSJeff Roberson kseq->ksq_transferable++; 37880f86c9fSJeff Roberson kseq->ksq_group->ksg_transferable++; 3792454aaf5SJeff Roberson ke->ke_flags |= KEF_XFERABLE; 38080f86c9fSJeff Roberson } 381155b9987SJeff Roberson #endif 382598b368dSJeff Roberson runq_add(ke->ke_runq, ke, flags); 383155b9987SJeff Roberson } 384155b9987SJeff Roberson 385155b9987SJeff Roberson static __inline void 386155b9987SJeff Roberson kseq_runq_rem(struct kseq *kseq, struct kse *ke) 387155b9987SJeff Roberson { 388155b9987SJeff Roberson #ifdef SMP 3892454aaf5SJeff Roberson if (ke->ke_flags & KEF_XFERABLE) { 39080f86c9fSJeff Roberson kseq->ksq_transferable--; 39180f86c9fSJeff Roberson kseq->ksq_group->ksg_transferable--; 3922454aaf5SJeff Roberson ke->ke_flags &= ~KEF_XFERABLE; 39380f86c9fSJeff Roberson } 394155b9987SJeff Roberson #endif 395155b9987SJeff Roberson runq_remove(ke->ke_runq, ke); 396155b9987SJeff Roberson } 397155b9987SJeff Roberson 398a8949de2SJeff Roberson static void 399155b9987SJeff Roberson kseq_load_add(struct kseq *kseq, struct kse *ke) 4005d7ef00cSJeff Roberson { 401ef1134c9SJeff Roberson int class; 402b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 403ef1134c9SJeff Roberson class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); 404ef1134c9SJeff Roberson if (class == PRI_TIMESHARE) 405ef1134c9SJeff Roberson kseq->ksq_load_timeshare++; 40615dc847eSJeff Roberson kseq->ksq_load++; 40781d47d3fSJeff Roberson CTR1(KTR_SCHED, "load: %d", kseq->ksq_load); 408207a6c0dSDavid E. O'Brien if (class != PRI_ITHD && (ke->ke_proc->p_flag & P_NOLOAD) == 0) 40933916c36SJeff Roberson #ifdef SMP 410cac77d04SJeff Roberson kseq->ksq_group->ksg_load++; 41133916c36SJeff Roberson #else 41233916c36SJeff Roberson kseq->ksq_sysload++; 413cac77d04SJeff Roberson #endif 41415dc847eSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 415fa885116SJulian Elischer kseq_nice_add(kseq, ke->ke_proc->p_nice); 4165d7ef00cSJeff Roberson } 41715dc847eSJeff Roberson 418a8949de2SJeff Roberson static void 419155b9987SJeff Roberson kseq_load_rem(struct kseq *kseq, struct kse *ke) 4205d7ef00cSJeff Roberson { 421ef1134c9SJeff Roberson int class; 422b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 423ef1134c9SJeff Roberson class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); 424ef1134c9SJeff Roberson if (class == PRI_TIMESHARE) 425ef1134c9SJeff Roberson kseq->ksq_load_timeshare--; 426207a6c0dSDavid E. O'Brien if (class != PRI_ITHD && (ke->ke_proc->p_flag & P_NOLOAD) == 0) 42733916c36SJeff Roberson #ifdef SMP 428cac77d04SJeff Roberson kseq->ksq_group->ksg_load--; 42933916c36SJeff Roberson #else 43033916c36SJeff Roberson kseq->ksq_sysload--; 431cac77d04SJeff Roberson #endif 43215dc847eSJeff Roberson kseq->ksq_load--; 43381d47d3fSJeff Roberson CTR1(KTR_SCHED, "load: %d", kseq->ksq_load); 43415dc847eSJeff Roberson ke->ke_runq = NULL; 43515dc847eSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 436fa885116SJulian Elischer kseq_nice_rem(kseq, ke->ke_proc->p_nice); 4375d7ef00cSJeff Roberson } 4385d7ef00cSJeff Roberson 43915dc847eSJeff Roberson static void 44015dc847eSJeff Roberson kseq_nice_add(struct kseq *kseq, int nice) 44115dc847eSJeff Roberson { 442b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 44315dc847eSJeff Roberson /* Normalize to zero. */ 44415dc847eSJeff Roberson kseq->ksq_nice[nice + SCHED_PRI_NHALF]++; 445ef1134c9SJeff Roberson if (nice < kseq->ksq_nicemin || kseq->ksq_load_timeshare == 1) 44615dc847eSJeff Roberson kseq->ksq_nicemin = nice; 44715dc847eSJeff Roberson } 44815dc847eSJeff Roberson 44915dc847eSJeff Roberson static void 45015dc847eSJeff Roberson kseq_nice_rem(struct kseq *kseq, int nice) 45115dc847eSJeff Roberson { 45215dc847eSJeff Roberson int n; 45315dc847eSJeff Roberson 454b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 45515dc847eSJeff Roberson /* Normalize to zero. */ 45615dc847eSJeff Roberson n = nice + SCHED_PRI_NHALF; 45715dc847eSJeff Roberson kseq->ksq_nice[n]--; 45815dc847eSJeff Roberson KASSERT(kseq->ksq_nice[n] >= 0, ("Negative nice count.")); 45915dc847eSJeff Roberson 46015dc847eSJeff Roberson /* 46115dc847eSJeff Roberson * If this wasn't the smallest nice value or there are more in 46215dc847eSJeff Roberson * this bucket we can just return. Otherwise we have to recalculate 46315dc847eSJeff Roberson * the smallest nice. 46415dc847eSJeff Roberson */ 46515dc847eSJeff Roberson if (nice != kseq->ksq_nicemin || 46615dc847eSJeff Roberson kseq->ksq_nice[n] != 0 || 467ef1134c9SJeff Roberson kseq->ksq_load_timeshare == 0) 46815dc847eSJeff Roberson return; 46915dc847eSJeff Roberson 470a0a931ceSJeff Roberson for (; n < SCHED_PRI_NRESV; n++) 47115dc847eSJeff Roberson if (kseq->ksq_nice[n]) { 47215dc847eSJeff Roberson kseq->ksq_nicemin = n - SCHED_PRI_NHALF; 47315dc847eSJeff Roberson return; 47415dc847eSJeff Roberson } 47515dc847eSJeff Roberson } 47615dc847eSJeff Roberson 4775d7ef00cSJeff Roberson #ifdef SMP 478356500a3SJeff Roberson /* 479155b9987SJeff Roberson * sched_balance is a simple CPU load balancing algorithm. It operates by 480356500a3SJeff Roberson * finding the least loaded and most loaded cpu and equalizing their load 481356500a3SJeff Roberson * by migrating some processes. 482356500a3SJeff Roberson * 483356500a3SJeff Roberson * Dealing only with two CPUs at a time has two advantages. Firstly, most 484356500a3SJeff Roberson * installations will only have 2 cpus. Secondly, load balancing too much at 485356500a3SJeff Roberson * once can have an unpleasant effect on the system. The scheduler rarely has 486356500a3SJeff Roberson * enough information to make perfect decisions. So this algorithm chooses 487356500a3SJeff Roberson * algorithm simplicity and more gradual effects on load in larger systems. 488356500a3SJeff Roberson * 489356500a3SJeff Roberson * It could be improved by considering the priorities and slices assigned to 490356500a3SJeff Roberson * each task prior to balancing them. There are many pathological cases with 491356500a3SJeff Roberson * any approach and so the semi random algorithm below may work as well as any. 492356500a3SJeff Roberson * 493356500a3SJeff Roberson */ 49422bf7d9aSJeff Roberson static void 495dc03363dSJeff Roberson sched_balance(void) 496356500a3SJeff Roberson { 497cac77d04SJeff Roberson struct kseq_group *high; 498cac77d04SJeff Roberson struct kseq_group *low; 499cac77d04SJeff Roberson struct kseq_group *ksg; 500cac77d04SJeff Roberson int cnt; 501356500a3SJeff Roberson int i; 502356500a3SJeff Roberson 503598b368dSJeff Roberson bal_tick = ticks + (random() % (hz * 2)); 50486f8ae96SJeff Roberson if (smp_started == 0) 505598b368dSJeff Roberson return; 506cac77d04SJeff Roberson low = high = NULL; 507cac77d04SJeff Roberson i = random() % (ksg_maxid + 1); 508cac77d04SJeff Roberson for (cnt = 0; cnt <= ksg_maxid; cnt++) { 509cac77d04SJeff Roberson ksg = KSEQ_GROUP(i); 510cac77d04SJeff Roberson /* 511cac77d04SJeff Roberson * Find the CPU with the highest load that has some 512cac77d04SJeff Roberson * threads to transfer. 513cac77d04SJeff Roberson */ 514cac77d04SJeff Roberson if ((high == NULL || ksg->ksg_load > high->ksg_load) 515cac77d04SJeff Roberson && ksg->ksg_transferable) 516cac77d04SJeff Roberson high = ksg; 517cac77d04SJeff Roberson if (low == NULL || ksg->ksg_load < low->ksg_load) 518cac77d04SJeff Roberson low = ksg; 519cac77d04SJeff Roberson if (++i > ksg_maxid) 520cac77d04SJeff Roberson i = 0; 521cac77d04SJeff Roberson } 522cac77d04SJeff Roberson if (low != NULL && high != NULL && high != low) 523cac77d04SJeff Roberson sched_balance_pair(LIST_FIRST(&high->ksg_members), 524cac77d04SJeff Roberson LIST_FIRST(&low->ksg_members)); 525cac77d04SJeff Roberson } 52686f8ae96SJeff Roberson 527cac77d04SJeff Roberson static void 528dc03363dSJeff Roberson sched_balance_groups(void) 529cac77d04SJeff Roberson { 530cac77d04SJeff Roberson int i; 531cac77d04SJeff Roberson 532598b368dSJeff Roberson gbal_tick = ticks + (random() % (hz * 2)); 533dc03363dSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 534cac77d04SJeff Roberson if (smp_started) 535cac77d04SJeff Roberson for (i = 0; i <= ksg_maxid; i++) 536cac77d04SJeff Roberson sched_balance_group(KSEQ_GROUP(i)); 537356500a3SJeff Roberson } 538cac77d04SJeff Roberson 539cac77d04SJeff Roberson static void 540cac77d04SJeff Roberson sched_balance_group(struct kseq_group *ksg) 541cac77d04SJeff Roberson { 542cac77d04SJeff Roberson struct kseq *kseq; 543cac77d04SJeff Roberson struct kseq *high; 544cac77d04SJeff Roberson struct kseq *low; 545cac77d04SJeff Roberson int load; 546cac77d04SJeff Roberson 547cac77d04SJeff Roberson if (ksg->ksg_transferable == 0) 548cac77d04SJeff Roberson return; 549cac77d04SJeff Roberson low = NULL; 550cac77d04SJeff Roberson high = NULL; 551cac77d04SJeff Roberson LIST_FOREACH(kseq, &ksg->ksg_members, ksq_siblings) { 552cac77d04SJeff Roberson load = kseq->ksq_load; 553cac77d04SJeff Roberson if (high == NULL || load > high->ksq_load) 554cac77d04SJeff Roberson high = kseq; 555cac77d04SJeff Roberson if (low == NULL || load < low->ksq_load) 556cac77d04SJeff Roberson low = kseq; 557356500a3SJeff Roberson } 558cac77d04SJeff Roberson if (high != NULL && low != NULL && high != low) 559cac77d04SJeff Roberson sched_balance_pair(high, low); 560356500a3SJeff Roberson } 561cac77d04SJeff Roberson 562cac77d04SJeff Roberson static void 563cac77d04SJeff Roberson sched_balance_pair(struct kseq *high, struct kseq *low) 564cac77d04SJeff Roberson { 565cac77d04SJeff Roberson int transferable; 566cac77d04SJeff Roberson int high_load; 567cac77d04SJeff Roberson int low_load; 568cac77d04SJeff Roberson int move; 569cac77d04SJeff Roberson int diff; 570cac77d04SJeff Roberson int i; 571cac77d04SJeff Roberson 57280f86c9fSJeff Roberson /* 57380f86c9fSJeff Roberson * If we're transfering within a group we have to use this specific 57480f86c9fSJeff Roberson * kseq's transferable count, otherwise we can steal from other members 57580f86c9fSJeff Roberson * of the group. 57680f86c9fSJeff Roberson */ 577cac77d04SJeff Roberson if (high->ksq_group == low->ksq_group) { 578cac77d04SJeff Roberson transferable = high->ksq_transferable; 579cac77d04SJeff Roberson high_load = high->ksq_load; 580cac77d04SJeff Roberson low_load = low->ksq_load; 581cac77d04SJeff Roberson } else { 582cac77d04SJeff Roberson transferable = high->ksq_group->ksg_transferable; 583cac77d04SJeff Roberson high_load = high->ksq_group->ksg_load; 584cac77d04SJeff Roberson low_load = low->ksq_group->ksg_load; 585cac77d04SJeff Roberson } 58680f86c9fSJeff Roberson if (transferable == 0) 587cac77d04SJeff Roberson return; 588155b9987SJeff Roberson /* 589155b9987SJeff Roberson * Determine what the imbalance is and then adjust that to how many 59080f86c9fSJeff Roberson * kses we actually have to give up (transferable). 591155b9987SJeff Roberson */ 592cac77d04SJeff Roberson diff = high_load - low_load; 593356500a3SJeff Roberson move = diff / 2; 594356500a3SJeff Roberson if (diff & 0x1) 595356500a3SJeff Roberson move++; 59680f86c9fSJeff Roberson move = min(move, transferable); 597356500a3SJeff Roberson for (i = 0; i < move; i++) 598cac77d04SJeff Roberson kseq_move(high, KSEQ_ID(low)); 599356500a3SJeff Roberson return; 600356500a3SJeff Roberson } 601356500a3SJeff Roberson 60222bf7d9aSJeff Roberson static void 603356500a3SJeff Roberson kseq_move(struct kseq *from, int cpu) 604356500a3SJeff Roberson { 60580f86c9fSJeff Roberson struct kseq *kseq; 60680f86c9fSJeff Roberson struct kseq *to; 607356500a3SJeff Roberson struct kse *ke; 608356500a3SJeff Roberson 60980f86c9fSJeff Roberson kseq = from; 61080f86c9fSJeff Roberson to = KSEQ_CPU(cpu); 61180f86c9fSJeff Roberson ke = kseq_steal(kseq, 1); 61280f86c9fSJeff Roberson if (ke == NULL) { 61380f86c9fSJeff Roberson struct kseq_group *ksg; 61480f86c9fSJeff Roberson 61580f86c9fSJeff Roberson ksg = kseq->ksq_group; 61680f86c9fSJeff Roberson LIST_FOREACH(kseq, &ksg->ksg_members, ksq_siblings) { 61780f86c9fSJeff Roberson if (kseq == from || kseq->ksq_transferable == 0) 61880f86c9fSJeff Roberson continue; 61980f86c9fSJeff Roberson ke = kseq_steal(kseq, 1); 62080f86c9fSJeff Roberson break; 62180f86c9fSJeff Roberson } 62280f86c9fSJeff Roberson if (ke == NULL) 62380f86c9fSJeff Roberson panic("kseq_move: No KSEs available with a " 62480f86c9fSJeff Roberson "transferable count of %d\n", 62580f86c9fSJeff Roberson ksg->ksg_transferable); 62680f86c9fSJeff Roberson } 62780f86c9fSJeff Roberson if (kseq == to) 62880f86c9fSJeff Roberson return; 629356500a3SJeff Roberson ke->ke_state = KES_THREAD; 63080f86c9fSJeff Roberson kseq_runq_rem(kseq, ke); 63180f86c9fSJeff Roberson kseq_load_rem(kseq, ke); 632112b6d3aSJeff Roberson kseq_notify(ke, cpu); 633356500a3SJeff Roberson } 63422bf7d9aSJeff Roberson 63580f86c9fSJeff Roberson static int 63680f86c9fSJeff Roberson kseq_idled(struct kseq *kseq) 63722bf7d9aSJeff Roberson { 63880f86c9fSJeff Roberson struct kseq_group *ksg; 63980f86c9fSJeff Roberson struct kseq *steal; 64080f86c9fSJeff Roberson struct kse *ke; 64180f86c9fSJeff Roberson 64280f86c9fSJeff Roberson ksg = kseq->ksq_group; 64380f86c9fSJeff Roberson /* 64480f86c9fSJeff Roberson * If we're in a cpu group, try and steal kses from another cpu in 64580f86c9fSJeff Roberson * the group before idling. 64680f86c9fSJeff Roberson */ 64780f86c9fSJeff Roberson if (ksg->ksg_cpus > 1 && ksg->ksg_transferable) { 64880f86c9fSJeff Roberson LIST_FOREACH(steal, &ksg->ksg_members, ksq_siblings) { 64980f86c9fSJeff Roberson if (steal == kseq || steal->ksq_transferable == 0) 65080f86c9fSJeff Roberson continue; 65180f86c9fSJeff Roberson ke = kseq_steal(steal, 0); 65280f86c9fSJeff Roberson if (ke == NULL) 65380f86c9fSJeff Roberson continue; 65480f86c9fSJeff Roberson ke->ke_state = KES_THREAD; 65580f86c9fSJeff Roberson kseq_runq_rem(steal, ke); 65680f86c9fSJeff Roberson kseq_load_rem(steal, ke); 65780f86c9fSJeff Roberson ke->ke_cpu = PCPU_GET(cpuid); 658598b368dSJeff Roberson ke->ke_flags |= KEF_INTERNAL | KEF_HOLD; 659598b368dSJeff Roberson sched_add(ke->ke_thread, SRQ_YIELDING); 66080f86c9fSJeff Roberson return (0); 66180f86c9fSJeff Roberson } 66280f86c9fSJeff Roberson } 66380f86c9fSJeff Roberson /* 66480f86c9fSJeff Roberson * We only set the idled bit when all of the cpus in the group are 66580f86c9fSJeff Roberson * idle. Otherwise we could get into a situation where a KSE bounces 66680f86c9fSJeff Roberson * back and forth between two idle cores on seperate physical CPUs. 66780f86c9fSJeff Roberson */ 66880f86c9fSJeff Roberson ksg->ksg_idlemask |= PCPU_GET(cpumask); 66980f86c9fSJeff Roberson if (ksg->ksg_idlemask != ksg->ksg_cpumask) 67080f86c9fSJeff Roberson return (1); 67180f86c9fSJeff Roberson atomic_set_int(&kseq_idle, ksg->ksg_mask); 67280f86c9fSJeff Roberson return (1); 67322bf7d9aSJeff Roberson } 67422bf7d9aSJeff Roberson 67522bf7d9aSJeff Roberson static void 67622bf7d9aSJeff Roberson kseq_assign(struct kseq *kseq) 67722bf7d9aSJeff Roberson { 67822bf7d9aSJeff Roberson struct kse *nke; 67922bf7d9aSJeff Roberson struct kse *ke; 68022bf7d9aSJeff Roberson 68122bf7d9aSJeff Roberson do { 68200fbcda8SAlexander Kabaev *(volatile struct kse **)&ke = kseq->ksq_assigned; 68322bf7d9aSJeff Roberson } while(!atomic_cmpset_ptr(&kseq->ksq_assigned, ke, NULL)); 68422bf7d9aSJeff Roberson for (; ke != NULL; ke = nke) { 68522bf7d9aSJeff Roberson nke = ke->ke_assign; 686598b368dSJeff Roberson kseq->ksq_group->ksg_load--; 687598b368dSJeff Roberson kseq->ksq_load--; 68822bf7d9aSJeff Roberson ke->ke_flags &= ~KEF_ASSIGNED; 689598b368dSJeff Roberson ke->ke_flags |= KEF_INTERNAL | KEF_HOLD; 690598b368dSJeff Roberson sched_add(ke->ke_thread, SRQ_YIELDING); 69122bf7d9aSJeff Roberson } 69222bf7d9aSJeff Roberson } 69322bf7d9aSJeff Roberson 69422bf7d9aSJeff Roberson static void 69522bf7d9aSJeff Roberson kseq_notify(struct kse *ke, int cpu) 69622bf7d9aSJeff Roberson { 69722bf7d9aSJeff Roberson struct kseq *kseq; 69822bf7d9aSJeff Roberson struct thread *td; 69922bf7d9aSJeff Roberson struct pcpu *pcpu; 700598b368dSJeff Roberson int class; 7012454aaf5SJeff Roberson int prio; 70222bf7d9aSJeff Roberson 703598b368dSJeff Roberson kseq = KSEQ_CPU(cpu); 704598b368dSJeff Roberson /* XXX */ 705598b368dSJeff Roberson class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); 706598b368dSJeff Roberson if ((class == PRI_TIMESHARE || class == PRI_REALTIME) && 707598b368dSJeff Roberson (kseq_idle & kseq->ksq_group->ksg_mask)) 708598b368dSJeff Roberson atomic_clear_int(&kseq_idle, kseq->ksq_group->ksg_mask); 709598b368dSJeff Roberson kseq->ksq_group->ksg_load++; 710598b368dSJeff Roberson kseq->ksq_load++; 71186e1c22aSJeff Roberson ke->ke_cpu = cpu; 71222bf7d9aSJeff Roberson ke->ke_flags |= KEF_ASSIGNED; 7132454aaf5SJeff Roberson prio = ke->ke_thread->td_priority; 71422bf7d9aSJeff Roberson 7150c0a98b2SJeff Roberson /* 71622bf7d9aSJeff Roberson * Place a KSE on another cpu's queue and force a resched. 71722bf7d9aSJeff Roberson */ 71822bf7d9aSJeff Roberson do { 71900fbcda8SAlexander Kabaev *(volatile struct kse **)&ke->ke_assign = kseq->ksq_assigned; 72022bf7d9aSJeff Roberson } while(!atomic_cmpset_ptr(&kseq->ksq_assigned, ke->ke_assign, ke)); 7212454aaf5SJeff Roberson /* 7222454aaf5SJeff Roberson * Without sched_lock we could lose a race where we set NEEDRESCHED 7232454aaf5SJeff Roberson * on a thread that is switched out before the IPI is delivered. This 7242454aaf5SJeff Roberson * would lead us to miss the resched. This will be a problem once 7252454aaf5SJeff Roberson * sched_lock is pushed down. 7262454aaf5SJeff Roberson */ 72722bf7d9aSJeff Roberson pcpu = pcpu_find(cpu); 72822bf7d9aSJeff Roberson td = pcpu->pc_curthread; 72922bf7d9aSJeff Roberson if (ke->ke_thread->td_priority < td->td_priority || 73022bf7d9aSJeff Roberson td == pcpu->pc_idlethread) { 73122bf7d9aSJeff Roberson td->td_flags |= TDF_NEEDRESCHED; 73222bf7d9aSJeff Roberson ipi_selected(1 << cpu, IPI_AST); 73322bf7d9aSJeff Roberson } 73422bf7d9aSJeff Roberson } 73522bf7d9aSJeff Roberson 73622bf7d9aSJeff Roberson static struct kse * 73722bf7d9aSJeff Roberson runq_steal(struct runq *rq) 73822bf7d9aSJeff Roberson { 73922bf7d9aSJeff Roberson struct rqhead *rqh; 74022bf7d9aSJeff Roberson struct rqbits *rqb; 74122bf7d9aSJeff Roberson struct kse *ke; 74222bf7d9aSJeff Roberson int word; 74322bf7d9aSJeff Roberson int bit; 74422bf7d9aSJeff Roberson 74522bf7d9aSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 74622bf7d9aSJeff Roberson rqb = &rq->rq_status; 74722bf7d9aSJeff Roberson for (word = 0; word < RQB_LEN; word++) { 74822bf7d9aSJeff Roberson if (rqb->rqb_bits[word] == 0) 74922bf7d9aSJeff Roberson continue; 75022bf7d9aSJeff Roberson for (bit = 0; bit < RQB_BPW; bit++) { 751a2640c9bSPeter Wemm if ((rqb->rqb_bits[word] & (1ul << bit)) == 0) 75222bf7d9aSJeff Roberson continue; 75322bf7d9aSJeff Roberson rqh = &rq->rq_queues[bit + (word << RQB_L2BPW)]; 75422bf7d9aSJeff Roberson TAILQ_FOREACH(ke, rqh, ke_procq) { 755598b368dSJeff Roberson if (KSE_CAN_MIGRATE(ke)) 75622bf7d9aSJeff Roberson return (ke); 75722bf7d9aSJeff Roberson } 75822bf7d9aSJeff Roberson } 75922bf7d9aSJeff Roberson } 76022bf7d9aSJeff Roberson return (NULL); 76122bf7d9aSJeff Roberson } 76222bf7d9aSJeff Roberson 76322bf7d9aSJeff Roberson static struct kse * 76480f86c9fSJeff Roberson kseq_steal(struct kseq *kseq, int stealidle) 76522bf7d9aSJeff Roberson { 76622bf7d9aSJeff Roberson struct kse *ke; 76722bf7d9aSJeff Roberson 76880f86c9fSJeff Roberson /* 76980f86c9fSJeff Roberson * Steal from next first to try to get a non-interactive task that 77080f86c9fSJeff Roberson * may not have run for a while. 77180f86c9fSJeff Roberson */ 77222bf7d9aSJeff Roberson if ((ke = runq_steal(kseq->ksq_next)) != NULL) 77322bf7d9aSJeff Roberson return (ke); 77480f86c9fSJeff Roberson if ((ke = runq_steal(kseq->ksq_curr)) != NULL) 77580f86c9fSJeff Roberson return (ke); 77680f86c9fSJeff Roberson if (stealidle) 77722bf7d9aSJeff Roberson return (runq_steal(&kseq->ksq_idle)); 77880f86c9fSJeff Roberson return (NULL); 77922bf7d9aSJeff Roberson } 78080f86c9fSJeff Roberson 78180f86c9fSJeff Roberson int 78280f86c9fSJeff Roberson kseq_transfer(struct kseq *kseq, struct kse *ke, int class) 78380f86c9fSJeff Roberson { 784598b368dSJeff Roberson struct kseq_group *nksg; 78580f86c9fSJeff Roberson struct kseq_group *ksg; 786598b368dSJeff Roberson struct kseq *old; 78780f86c9fSJeff Roberson int cpu; 788598b368dSJeff Roberson int idx; 78980f86c9fSJeff Roberson 790670c524fSJeff Roberson if (smp_started == 0) 791670c524fSJeff Roberson return (0); 79280f86c9fSJeff Roberson cpu = 0; 79380f86c9fSJeff Roberson /* 7942454aaf5SJeff Roberson * If our load exceeds a certain threshold we should attempt to 7952454aaf5SJeff Roberson * reassign this thread. The first candidate is the cpu that 7962454aaf5SJeff Roberson * originally ran the thread. If it is idle, assign it there, 7972454aaf5SJeff Roberson * otherwise, pick an idle cpu. 7982454aaf5SJeff Roberson * 7992454aaf5SJeff Roberson * The threshold at which we start to reassign kses has a large impact 800670c524fSJeff Roberson * on the overall performance of the system. Tuned too high and 801670c524fSJeff Roberson * some CPUs may idle. Too low and there will be excess migration 802d50c87deSOlivier Houchard * and context switches. 803670c524fSJeff Roberson */ 804598b368dSJeff Roberson old = KSEQ_CPU(ke->ke_cpu); 805598b368dSJeff Roberson nksg = old->ksq_group; 8062454aaf5SJeff Roberson ksg = kseq->ksq_group; 807598b368dSJeff Roberson if (kseq_idle) { 808598b368dSJeff Roberson if (kseq_idle & nksg->ksg_mask) { 809598b368dSJeff Roberson cpu = ffs(nksg->ksg_idlemask); 810598b368dSJeff Roberson if (cpu) { 811598b368dSJeff Roberson CTR2(KTR_SCHED, 812598b368dSJeff Roberson "kseq_transfer: %p found old cpu %X " 813598b368dSJeff Roberson "in idlemask.", ke, cpu); 8142454aaf5SJeff Roberson goto migrate; 8152454aaf5SJeff Roberson } 816598b368dSJeff Roberson } 81780f86c9fSJeff Roberson /* 81880f86c9fSJeff Roberson * Multiple cpus could find this bit simultaneously 81980f86c9fSJeff Roberson * but the race shouldn't be terrible. 82080f86c9fSJeff Roberson */ 82180f86c9fSJeff Roberson cpu = ffs(kseq_idle); 822598b368dSJeff Roberson if (cpu) { 823598b368dSJeff Roberson CTR2(KTR_SCHED, "kseq_transfer: %p found %X " 824598b368dSJeff Roberson "in idlemask.", ke, cpu); 8252454aaf5SJeff Roberson goto migrate; 82680f86c9fSJeff Roberson } 827598b368dSJeff Roberson } 828598b368dSJeff Roberson idx = 0; 829598b368dSJeff Roberson #if 0 830598b368dSJeff Roberson if (old->ksq_load < kseq->ksq_load) { 831598b368dSJeff Roberson cpu = ke->ke_cpu + 1; 832598b368dSJeff Roberson CTR2(KTR_SCHED, "kseq_transfer: %p old cpu %X " 833598b368dSJeff Roberson "load less than ours.", ke, cpu); 834598b368dSJeff Roberson goto migrate; 835598b368dSJeff Roberson } 836598b368dSJeff Roberson /* 837598b368dSJeff Roberson * No new CPU was found, look for one with less load. 838598b368dSJeff Roberson */ 839598b368dSJeff Roberson for (idx = 0; idx <= ksg_maxid; idx++) { 840598b368dSJeff Roberson nksg = KSEQ_GROUP(idx); 841598b368dSJeff Roberson if (nksg->ksg_load /*+ (nksg->ksg_cpus * 2)*/ < ksg->ksg_load) { 842598b368dSJeff Roberson cpu = ffs(nksg->ksg_cpumask); 843598b368dSJeff Roberson CTR2(KTR_SCHED, "kseq_transfer: %p cpu %X load less " 844598b368dSJeff Roberson "than ours.", ke, cpu); 845598b368dSJeff Roberson goto migrate; 846598b368dSJeff Roberson } 847598b368dSJeff Roberson } 848598b368dSJeff Roberson #endif 84980f86c9fSJeff Roberson /* 85080f86c9fSJeff Roberson * If another cpu in this group has idled, assign a thread over 85180f86c9fSJeff Roberson * to them after checking to see if there are idled groups. 85280f86c9fSJeff Roberson */ 8532454aaf5SJeff Roberson if (ksg->ksg_idlemask) { 85480f86c9fSJeff Roberson cpu = ffs(ksg->ksg_idlemask); 855598b368dSJeff Roberson if (cpu) { 856598b368dSJeff Roberson CTR2(KTR_SCHED, "kseq_transfer: %p cpu %X idle in " 857598b368dSJeff Roberson "group.", ke, cpu); 8582454aaf5SJeff Roberson goto migrate; 85980f86c9fSJeff Roberson } 860598b368dSJeff Roberson } 8612454aaf5SJeff Roberson return (0); 8622454aaf5SJeff Roberson migrate: 8632454aaf5SJeff Roberson /* 86480f86c9fSJeff Roberson * Now that we've found an idle CPU, migrate the thread. 86580f86c9fSJeff Roberson */ 86680f86c9fSJeff Roberson cpu--; 86780f86c9fSJeff Roberson ke->ke_runq = NULL; 86880f86c9fSJeff Roberson kseq_notify(ke, cpu); 8692454aaf5SJeff Roberson 87080f86c9fSJeff Roberson return (1); 87180f86c9fSJeff Roberson } 87280f86c9fSJeff Roberson 87322bf7d9aSJeff Roberson #endif /* SMP */ 87422bf7d9aSJeff Roberson 87522bf7d9aSJeff Roberson /* 87622bf7d9aSJeff Roberson * Pick the highest priority task we have and return it. 8770c0a98b2SJeff Roberson */ 8780c0a98b2SJeff Roberson 87922bf7d9aSJeff Roberson static struct kse * 88022bf7d9aSJeff Roberson kseq_choose(struct kseq *kseq) 8815d7ef00cSJeff Roberson { 8825d7ef00cSJeff Roberson struct runq *swap; 8830516c8ddSJeff Roberson struct kse *ke; 8840516c8ddSJeff Roberson int nice; 8855d7ef00cSJeff Roberson 886b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 88715dc847eSJeff Roberson swap = NULL; 888a8949de2SJeff Roberson 88915dc847eSJeff Roberson for (;;) { 89015dc847eSJeff Roberson ke = runq_choose(kseq->ksq_curr); 89115dc847eSJeff Roberson if (ke == NULL) { 89215dc847eSJeff Roberson /* 893bf0acc27SJohn Baldwin * We already swapped once and didn't get anywhere. 89415dc847eSJeff Roberson */ 89515dc847eSJeff Roberson if (swap) 89615dc847eSJeff Roberson break; 8975d7ef00cSJeff Roberson swap = kseq->ksq_curr; 8985d7ef00cSJeff Roberson kseq->ksq_curr = kseq->ksq_next; 8995d7ef00cSJeff Roberson kseq->ksq_next = swap; 90015dc847eSJeff Roberson continue; 901a8949de2SJeff Roberson } 90215dc847eSJeff Roberson /* 90315dc847eSJeff Roberson * If we encounter a slice of 0 the kse is in a 90415dc847eSJeff Roberson * TIMESHARE kse group and its nice was too far out 90515dc847eSJeff Roberson * of the range that receives slices. 90615dc847eSJeff Roberson */ 9070516c8ddSJeff Roberson nice = ke->ke_proc->p_nice + (0 - kseq->ksq_nicemin); 9088ffb8f55SJeff Roberson if (ke->ke_slice == 0 || (nice > SCHED_SLICE_NTHRESH && 9098ffb8f55SJeff Roberson ke->ke_proc->p_nice != 0)) { 91015dc847eSJeff Roberson runq_remove(ke->ke_runq, ke); 91115dc847eSJeff Roberson sched_slice(ke); 91215dc847eSJeff Roberson ke->ke_runq = kseq->ksq_next; 913c20c691bSJulian Elischer runq_add(ke->ke_runq, ke, 0); 91415dc847eSJeff Roberson continue; 91515dc847eSJeff Roberson } 91615dc847eSJeff Roberson return (ke); 91715dc847eSJeff Roberson } 91815dc847eSJeff Roberson 919a8949de2SJeff Roberson return (runq_choose(&kseq->ksq_idle)); 920245f3abfSJeff Roberson } 9210a016a05SJeff Roberson 9220a016a05SJeff Roberson static void 9230a016a05SJeff Roberson kseq_setup(struct kseq *kseq) 9240a016a05SJeff Roberson { 92515dc847eSJeff Roberson runq_init(&kseq->ksq_timeshare[0]); 92615dc847eSJeff Roberson runq_init(&kseq->ksq_timeshare[1]); 927a8949de2SJeff Roberson runq_init(&kseq->ksq_idle); 92815dc847eSJeff Roberson kseq->ksq_curr = &kseq->ksq_timeshare[0]; 92915dc847eSJeff Roberson kseq->ksq_next = &kseq->ksq_timeshare[1]; 9307cd650a9SJeff Roberson kseq->ksq_load = 0; 931ef1134c9SJeff Roberson kseq->ksq_load_timeshare = 0; 9320a016a05SJeff Roberson } 9330a016a05SJeff Roberson 93435e6168fSJeff Roberson static void 93535e6168fSJeff Roberson sched_setup(void *dummy) 93635e6168fSJeff Roberson { 9370ec896fdSJeff Roberson #ifdef SMP 93835e6168fSJeff Roberson int i; 9390ec896fdSJeff Roberson #endif 94035e6168fSJeff Roberson 941e493a5d9SJeff Roberson slice_min = (hz/100); /* 10ms */ 942e493a5d9SJeff Roberson slice_max = (hz/7); /* ~140ms */ 943e1f89c22SJeff Roberson 944356500a3SJeff Roberson #ifdef SMP 945cac77d04SJeff Roberson balance_groups = 0; 94680f86c9fSJeff Roberson /* 94780f86c9fSJeff Roberson * Initialize the kseqs. 94880f86c9fSJeff Roberson */ 949749d01b0SJeff Roberson for (i = 0; i < MAXCPU; i++) { 95080f86c9fSJeff Roberson struct kseq *ksq; 95180f86c9fSJeff Roberson 95280f86c9fSJeff Roberson ksq = &kseq_cpu[i]; 95380f86c9fSJeff Roberson ksq->ksq_assigned = NULL; 954749d01b0SJeff Roberson kseq_setup(&kseq_cpu[i]); 95580f86c9fSJeff Roberson } 95680f86c9fSJeff Roberson if (smp_topology == NULL) { 95780f86c9fSJeff Roberson struct kseq_group *ksg; 95880f86c9fSJeff Roberson struct kseq *ksq; 959598b368dSJeff Roberson int cpus; 96080f86c9fSJeff Roberson 961598b368dSJeff Roberson for (cpus = 0, i = 0; i < MAXCPU; i++) { 962598b368dSJeff Roberson if (CPU_ABSENT(i)) 963598b368dSJeff Roberson continue; 964598b368dSJeff Roberson ksq = &kseq_cpu[cpus]; 965598b368dSJeff Roberson ksg = &kseq_groups[cpus]; 96680f86c9fSJeff Roberson /* 967dc03363dSJeff Roberson * Setup a kseq group with one member. 96880f86c9fSJeff Roberson */ 96980f86c9fSJeff Roberson ksq->ksq_transferable = 0; 97080f86c9fSJeff Roberson ksq->ksq_group = ksg; 97180f86c9fSJeff Roberson ksg->ksg_cpus = 1; 97280f86c9fSJeff Roberson ksg->ksg_idlemask = 0; 97380f86c9fSJeff Roberson ksg->ksg_cpumask = ksg->ksg_mask = 1 << i; 974cac77d04SJeff Roberson ksg->ksg_load = 0; 97580f86c9fSJeff Roberson ksg->ksg_transferable = 0; 97680f86c9fSJeff Roberson LIST_INIT(&ksg->ksg_members); 97780f86c9fSJeff Roberson LIST_INSERT_HEAD(&ksg->ksg_members, ksq, ksq_siblings); 978598b368dSJeff Roberson cpus++; 979749d01b0SJeff Roberson } 980598b368dSJeff Roberson ksg_maxid = cpus - 1; 981749d01b0SJeff Roberson } else { 98280f86c9fSJeff Roberson struct kseq_group *ksg; 98380f86c9fSJeff Roberson struct cpu_group *cg; 984749d01b0SJeff Roberson int j; 985749d01b0SJeff Roberson 986749d01b0SJeff Roberson for (i = 0; i < smp_topology->ct_count; i++) { 987749d01b0SJeff Roberson cg = &smp_topology->ct_group[i]; 98880f86c9fSJeff Roberson ksg = &kseq_groups[i]; 98980f86c9fSJeff Roberson /* 99080f86c9fSJeff Roberson * Initialize the group. 99180f86c9fSJeff Roberson */ 99280f86c9fSJeff Roberson ksg->ksg_idlemask = 0; 993cac77d04SJeff Roberson ksg->ksg_load = 0; 99480f86c9fSJeff Roberson ksg->ksg_transferable = 0; 99580f86c9fSJeff Roberson ksg->ksg_cpus = cg->cg_count; 99680f86c9fSJeff Roberson ksg->ksg_cpumask = cg->cg_mask; 99780f86c9fSJeff Roberson LIST_INIT(&ksg->ksg_members); 99880f86c9fSJeff Roberson /* 99980f86c9fSJeff Roberson * Find all of the group members and add them. 100080f86c9fSJeff Roberson */ 100180f86c9fSJeff Roberson for (j = 0; j < MAXCPU; j++) { 100280f86c9fSJeff Roberson if ((cg->cg_mask & (1 << j)) != 0) { 100380f86c9fSJeff Roberson if (ksg->ksg_mask == 0) 100480f86c9fSJeff Roberson ksg->ksg_mask = 1 << j; 100580f86c9fSJeff Roberson kseq_cpu[j].ksq_transferable = 0; 100680f86c9fSJeff Roberson kseq_cpu[j].ksq_group = ksg; 100780f86c9fSJeff Roberson LIST_INSERT_HEAD(&ksg->ksg_members, 100880f86c9fSJeff Roberson &kseq_cpu[j], ksq_siblings); 100980f86c9fSJeff Roberson } 101080f86c9fSJeff Roberson } 1011cac77d04SJeff Roberson if (ksg->ksg_cpus > 1) 1012cac77d04SJeff Roberson balance_groups = 1; 1013749d01b0SJeff Roberson } 1014cac77d04SJeff Roberson ksg_maxid = smp_topology->ct_count - 1; 1015749d01b0SJeff Roberson } 1016cac77d04SJeff Roberson /* 1017cac77d04SJeff Roberson * Stagger the group and global load balancer so they do not 1018cac77d04SJeff Roberson * interfere with each other. 1019cac77d04SJeff Roberson */ 1020dc03363dSJeff Roberson bal_tick = ticks + hz; 1021cac77d04SJeff Roberson if (balance_groups) 1022dc03363dSJeff Roberson gbal_tick = ticks + (hz / 2); 1023749d01b0SJeff Roberson #else 1024749d01b0SJeff Roberson kseq_setup(KSEQ_SELF()); 1025356500a3SJeff Roberson #endif 1026749d01b0SJeff Roberson mtx_lock_spin(&sched_lock); 1027155b9987SJeff Roberson kseq_load_add(KSEQ_SELF(), &kse0); 1028749d01b0SJeff Roberson mtx_unlock_spin(&sched_lock); 102935e6168fSJeff Roberson } 103035e6168fSJeff Roberson 103135e6168fSJeff Roberson /* 103235e6168fSJeff Roberson * Scale the scheduling priority according to the "interactivity" of this 103335e6168fSJeff Roberson * process. 103435e6168fSJeff Roberson */ 103515dc847eSJeff Roberson static void 103635e6168fSJeff Roberson sched_priority(struct ksegrp *kg) 103735e6168fSJeff Roberson { 103835e6168fSJeff Roberson int pri; 103935e6168fSJeff Roberson 104035e6168fSJeff Roberson if (kg->kg_pri_class != PRI_TIMESHARE) 104115dc847eSJeff Roberson return; 104235e6168fSJeff Roberson 104315dc847eSJeff Roberson pri = SCHED_PRI_INTERACT(sched_interact_score(kg)); 1044e1f89c22SJeff Roberson pri += SCHED_PRI_BASE; 1045fa885116SJulian Elischer pri += kg->kg_proc->p_nice; 104635e6168fSJeff Roberson 104735e6168fSJeff Roberson if (pri > PRI_MAX_TIMESHARE) 104835e6168fSJeff Roberson pri = PRI_MAX_TIMESHARE; 104935e6168fSJeff Roberson else if (pri < PRI_MIN_TIMESHARE) 105035e6168fSJeff Roberson pri = PRI_MIN_TIMESHARE; 105135e6168fSJeff Roberson 105235e6168fSJeff Roberson kg->kg_user_pri = pri; 105335e6168fSJeff Roberson 105415dc847eSJeff Roberson return; 105535e6168fSJeff Roberson } 105635e6168fSJeff Roberson 105735e6168fSJeff Roberson /* 1058245f3abfSJeff Roberson * Calculate a time slice based on the properties of the kseg and the runq 1059a8949de2SJeff Roberson * that we're on. This is only for PRI_TIMESHARE ksegrps. 106035e6168fSJeff Roberson */ 1061245f3abfSJeff Roberson static void 1062245f3abfSJeff Roberson sched_slice(struct kse *ke) 106335e6168fSJeff Roberson { 106415dc847eSJeff Roberson struct kseq *kseq; 1065245f3abfSJeff Roberson struct ksegrp *kg; 106635e6168fSJeff Roberson 1067245f3abfSJeff Roberson kg = ke->ke_ksegrp; 106815dc847eSJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 106935e6168fSJeff Roberson 1070f5c157d9SJohn Baldwin if (ke->ke_thread->td_flags & TDF_BORROWING) { 10718ffb8f55SJeff Roberson ke->ke_slice = SCHED_SLICE_MIN; 10728ffb8f55SJeff Roberson return; 10738ffb8f55SJeff Roberson } 10748ffb8f55SJeff Roberson 1075245f3abfSJeff Roberson /* 1076245f3abfSJeff Roberson * Rationale: 10772454aaf5SJeff Roberson * KSEs in interactive ksegs get a minimal slice so that we 1078245f3abfSJeff Roberson * quickly notice if it abuses its advantage. 1079245f3abfSJeff Roberson * 1080245f3abfSJeff Roberson * KSEs in non-interactive ksegs are assigned a slice that is 1081245f3abfSJeff Roberson * based on the ksegs nice value relative to the least nice kseg 1082245f3abfSJeff Roberson * on the run queue for this cpu. 1083245f3abfSJeff Roberson * 1084245f3abfSJeff Roberson * If the KSE is less nice than all others it gets the maximum 1085245f3abfSJeff Roberson * slice and other KSEs will adjust their slice relative to 1086245f3abfSJeff Roberson * this when they first expire. 1087245f3abfSJeff Roberson * 1088245f3abfSJeff Roberson * There is 20 point window that starts relative to the least 1089245f3abfSJeff Roberson * nice kse on the run queue. Slice size is determined by 1090245f3abfSJeff Roberson * the kse distance from the last nice ksegrp. 1091245f3abfSJeff Roberson * 10927d1a81b4SJeff Roberson * If the kse is outside of the window it will get no slice 10937d1a81b4SJeff Roberson * and will be reevaluated each time it is selected on the 10947d1a81b4SJeff Roberson * run queue. The exception to this is nice 0 ksegs when 10957d1a81b4SJeff Roberson * a nice -20 is running. They are always granted a minimum 10967d1a81b4SJeff Roberson * slice. 1097245f3abfSJeff Roberson */ 109815dc847eSJeff Roberson if (!SCHED_INTERACTIVE(kg)) { 1099245f3abfSJeff Roberson int nice; 1100245f3abfSJeff Roberson 1101fa885116SJulian Elischer nice = kg->kg_proc->p_nice + (0 - kseq->ksq_nicemin); 1102ef1134c9SJeff Roberson if (kseq->ksq_load_timeshare == 0 || 1103fa885116SJulian Elischer kg->kg_proc->p_nice < kseq->ksq_nicemin) 1104245f3abfSJeff Roberson ke->ke_slice = SCHED_SLICE_MAX; 11057d1a81b4SJeff Roberson else if (nice <= SCHED_SLICE_NTHRESH) 1106245f3abfSJeff Roberson ke->ke_slice = SCHED_SLICE_NICE(nice); 1107fa885116SJulian Elischer else if (kg->kg_proc->p_nice == 0) 11087d1a81b4SJeff Roberson ke->ke_slice = SCHED_SLICE_MIN; 1109245f3abfSJeff Roberson else 1110245f3abfSJeff Roberson ke->ke_slice = 0; 1111245f3abfSJeff Roberson } else 11129b5f6f62SJeff Roberson ke->ke_slice = SCHED_SLICE_INTERACTIVE; 111335e6168fSJeff Roberson 1114245f3abfSJeff Roberson return; 111535e6168fSJeff Roberson } 111635e6168fSJeff Roberson 1117d322132cSJeff Roberson /* 1118d322132cSJeff Roberson * This routine enforces a maximum limit on the amount of scheduling history 1119d322132cSJeff Roberson * kept. It is called after either the slptime or runtime is adjusted. 1120d322132cSJeff Roberson * This routine will not operate correctly when slp or run times have been 1121d322132cSJeff Roberson * adjusted to more than double their maximum. 1122d322132cSJeff Roberson */ 11234b60e324SJeff Roberson static void 11244b60e324SJeff Roberson sched_interact_update(struct ksegrp *kg) 11254b60e324SJeff Roberson { 1126d322132cSJeff Roberson int sum; 11273f741ca1SJeff Roberson 1128d322132cSJeff Roberson sum = kg->kg_runtime + kg->kg_slptime; 1129d322132cSJeff Roberson if (sum < SCHED_SLP_RUN_MAX) 1130d322132cSJeff Roberson return; 1131d322132cSJeff Roberson /* 1132d322132cSJeff Roberson * If we have exceeded by more than 1/5th then the algorithm below 1133d322132cSJeff Roberson * will not bring us back into range. Dividing by two here forces 11342454aaf5SJeff Roberson * us into the range of [4/5 * SCHED_INTERACT_MAX, SCHED_INTERACT_MAX] 1135d322132cSJeff Roberson */ 113637a35e4aSJeff Roberson if (sum > (SCHED_SLP_RUN_MAX / 5) * 6) { 1137d322132cSJeff Roberson kg->kg_runtime /= 2; 1138d322132cSJeff Roberson kg->kg_slptime /= 2; 1139d322132cSJeff Roberson return; 1140d322132cSJeff Roberson } 1141d322132cSJeff Roberson kg->kg_runtime = (kg->kg_runtime / 5) * 4; 1142d322132cSJeff Roberson kg->kg_slptime = (kg->kg_slptime / 5) * 4; 1143d322132cSJeff Roberson } 1144d322132cSJeff Roberson 1145d322132cSJeff Roberson static void 1146d322132cSJeff Roberson sched_interact_fork(struct ksegrp *kg) 1147d322132cSJeff Roberson { 1148d322132cSJeff Roberson int ratio; 1149d322132cSJeff Roberson int sum; 1150d322132cSJeff Roberson 1151d322132cSJeff Roberson sum = kg->kg_runtime + kg->kg_slptime; 1152d322132cSJeff Roberson if (sum > SCHED_SLP_RUN_FORK) { 1153d322132cSJeff Roberson ratio = sum / SCHED_SLP_RUN_FORK; 1154d322132cSJeff Roberson kg->kg_runtime /= ratio; 1155d322132cSJeff Roberson kg->kg_slptime /= ratio; 11564b60e324SJeff Roberson } 11574b60e324SJeff Roberson } 11584b60e324SJeff Roberson 1159e1f89c22SJeff Roberson static int 1160e1f89c22SJeff Roberson sched_interact_score(struct ksegrp *kg) 1161e1f89c22SJeff Roberson { 1162210491d3SJeff Roberson int div; 1163e1f89c22SJeff Roberson 1164e1f89c22SJeff Roberson if (kg->kg_runtime > kg->kg_slptime) { 1165210491d3SJeff Roberson div = max(1, kg->kg_runtime / SCHED_INTERACT_HALF); 1166210491d3SJeff Roberson return (SCHED_INTERACT_HALF + 1167210491d3SJeff Roberson (SCHED_INTERACT_HALF - (kg->kg_slptime / div))); 1168210491d3SJeff Roberson } if (kg->kg_slptime > kg->kg_runtime) { 1169210491d3SJeff Roberson div = max(1, kg->kg_slptime / SCHED_INTERACT_HALF); 1170210491d3SJeff Roberson return (kg->kg_runtime / div); 1171e1f89c22SJeff Roberson } 1172e1f89c22SJeff Roberson 1173210491d3SJeff Roberson /* 1174210491d3SJeff Roberson * This can happen if slptime and runtime are 0. 1175210491d3SJeff Roberson */ 1176210491d3SJeff Roberson return (0); 1177e1f89c22SJeff Roberson 1178e1f89c22SJeff Roberson } 1179e1f89c22SJeff Roberson 118015dc847eSJeff Roberson /* 1181ed062c8dSJulian Elischer * Very early in the boot some setup of scheduler-specific 1182ed062c8dSJulian Elischer * parts of proc0 and of soem scheduler resources needs to be done. 1183ed062c8dSJulian Elischer * Called from: 1184ed062c8dSJulian Elischer * proc0_init() 1185ed062c8dSJulian Elischer */ 1186ed062c8dSJulian Elischer void 1187ed062c8dSJulian Elischer schedinit(void) 1188ed062c8dSJulian Elischer { 1189ed062c8dSJulian Elischer /* 1190ed062c8dSJulian Elischer * Set up the scheduler specific parts of proc0. 1191ed062c8dSJulian Elischer */ 1192ed062c8dSJulian Elischer proc0.p_sched = NULL; /* XXX */ 1193d39063f2SJulian Elischer ksegrp0.kg_sched = &kg_sched0; 1194d39063f2SJulian Elischer thread0.td_sched = &kse0; 1195ed062c8dSJulian Elischer kse0.ke_thread = &thread0; 1196ed062c8dSJulian Elischer kse0.ke_state = KES_THREAD; 1197ed062c8dSJulian Elischer kg_sched0.skg_concurrency = 1; 1198ed062c8dSJulian Elischer kg_sched0.skg_avail_opennings = 0; /* we are already running */ 1199ed062c8dSJulian Elischer } 1200ed062c8dSJulian Elischer 1201ed062c8dSJulian Elischer /* 120215dc847eSJeff Roberson * This is only somewhat accurate since given many processes of the same 120315dc847eSJeff Roberson * priority they will switch when their slices run out, which will be 120415dc847eSJeff Roberson * at most SCHED_SLICE_MAX. 120515dc847eSJeff Roberson */ 120635e6168fSJeff Roberson int 120735e6168fSJeff Roberson sched_rr_interval(void) 120835e6168fSJeff Roberson { 120935e6168fSJeff Roberson return (SCHED_SLICE_MAX); 121035e6168fSJeff Roberson } 121135e6168fSJeff Roberson 121222bf7d9aSJeff Roberson static void 121335e6168fSJeff Roberson sched_pctcpu_update(struct kse *ke) 121435e6168fSJeff Roberson { 121535e6168fSJeff Roberson /* 121635e6168fSJeff Roberson * Adjust counters and watermark for pctcpu calc. 1217210491d3SJeff Roberson */ 121881de51bfSJeff Roberson if (ke->ke_ltick > ticks - SCHED_CPU_TICKS) { 1219210491d3SJeff Roberson /* 122081de51bfSJeff Roberson * Shift the tick count out so that the divide doesn't 122181de51bfSJeff Roberson * round away our results. 122265c8760dSJeff Roberson */ 122365c8760dSJeff Roberson ke->ke_ticks <<= 10; 122481de51bfSJeff Roberson ke->ke_ticks = (ke->ke_ticks / (ticks - ke->ke_ftick)) * 122535e6168fSJeff Roberson SCHED_CPU_TICKS; 122665c8760dSJeff Roberson ke->ke_ticks >>= 10; 122781de51bfSJeff Roberson } else 122881de51bfSJeff Roberson ke->ke_ticks = 0; 122935e6168fSJeff Roberson ke->ke_ltick = ticks; 123035e6168fSJeff Roberson ke->ke_ftick = ke->ke_ltick - SCHED_CPU_TICKS; 123135e6168fSJeff Roberson } 123235e6168fSJeff Roberson 123335e6168fSJeff Roberson void 1234f5c157d9SJohn Baldwin sched_thread_priority(struct thread *td, u_char prio) 123535e6168fSJeff Roberson { 12363f741ca1SJeff Roberson struct kse *ke; 123735e6168fSJeff Roberson 123881d47d3fSJeff Roberson CTR6(KTR_SCHED, "sched_prio: %p(%s) prio %d newprio %d by %p(%s)", 123981d47d3fSJeff Roberson td, td->td_proc->p_comm, td->td_priority, prio, curthread, 124081d47d3fSJeff Roberson curthread->td_proc->p_comm); 12413f741ca1SJeff Roberson ke = td->td_kse; 124235e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1243f5c157d9SJohn Baldwin if (td->td_priority == prio) 1244f5c157d9SJohn Baldwin return; 124535e6168fSJeff Roberson if (TD_ON_RUNQ(td)) { 12463f741ca1SJeff Roberson /* 12473f741ca1SJeff Roberson * If the priority has been elevated due to priority 12483f741ca1SJeff Roberson * propagation, we may have to move ourselves to a new 12493f741ca1SJeff Roberson * queue. We still call adjustrunqueue below in case kse 12503f741ca1SJeff Roberson * needs to fix things up. 12513f741ca1SJeff Roberson */ 12528ffb8f55SJeff Roberson if (prio < td->td_priority && ke->ke_runq != NULL && 1253769a3635SJeff Roberson (ke->ke_flags & KEF_ASSIGNED) == 0 && 125422bf7d9aSJeff Roberson ke->ke_runq != KSEQ_CPU(ke->ke_cpu)->ksq_curr) { 12553f741ca1SJeff Roberson runq_remove(ke->ke_runq, ke); 12563f741ca1SJeff Roberson ke->ke_runq = KSEQ_CPU(ke->ke_cpu)->ksq_curr; 1257c20c691bSJulian Elischer runq_add(ke->ke_runq, ke, 0); 125835e6168fSJeff Roberson } 1259f2b74cbfSJeff Roberson /* 1260f2b74cbfSJeff Roberson * Hold this kse on this cpu so that sched_prio() doesn't 1261f2b74cbfSJeff Roberson * cause excessive migration. We only want migration to 1262f2b74cbfSJeff Roberson * happen as the result of a wakeup. 1263f2b74cbfSJeff Roberson */ 1264f2b74cbfSJeff Roberson ke->ke_flags |= KEF_HOLD; 12653f741ca1SJeff Roberson adjustrunqueue(td, prio); 1266598b368dSJeff Roberson ke->ke_flags &= ~KEF_HOLD; 12673f741ca1SJeff Roberson } else 12683f741ca1SJeff Roberson td->td_priority = prio; 126935e6168fSJeff Roberson } 127035e6168fSJeff Roberson 1271f5c157d9SJohn Baldwin /* 1272f5c157d9SJohn Baldwin * Update a thread's priority when it is lent another thread's 1273f5c157d9SJohn Baldwin * priority. 1274f5c157d9SJohn Baldwin */ 1275f5c157d9SJohn Baldwin void 1276f5c157d9SJohn Baldwin sched_lend_prio(struct thread *td, u_char prio) 1277f5c157d9SJohn Baldwin { 1278f5c157d9SJohn Baldwin 1279f5c157d9SJohn Baldwin td->td_flags |= TDF_BORROWING; 1280f5c157d9SJohn Baldwin sched_thread_priority(td, prio); 1281f5c157d9SJohn Baldwin } 1282f5c157d9SJohn Baldwin 1283f5c157d9SJohn Baldwin /* 1284f5c157d9SJohn Baldwin * Restore a thread's priority when priority propagation is 1285f5c157d9SJohn Baldwin * over. The prio argument is the minimum priority the thread 1286f5c157d9SJohn Baldwin * needs to have to satisfy other possible priority lending 1287f5c157d9SJohn Baldwin * requests. If the thread's regular priority is less 1288f5c157d9SJohn Baldwin * important than prio, the thread will keep a priority boost 1289f5c157d9SJohn Baldwin * of prio. 1290f5c157d9SJohn Baldwin */ 1291f5c157d9SJohn Baldwin void 1292f5c157d9SJohn Baldwin sched_unlend_prio(struct thread *td, u_char prio) 1293f5c157d9SJohn Baldwin { 1294f5c157d9SJohn Baldwin u_char base_pri; 1295f5c157d9SJohn Baldwin 1296f5c157d9SJohn Baldwin if (td->td_base_pri >= PRI_MIN_TIMESHARE && 1297f5c157d9SJohn Baldwin td->td_base_pri <= PRI_MAX_TIMESHARE) 1298f5c157d9SJohn Baldwin base_pri = td->td_ksegrp->kg_user_pri; 1299f5c157d9SJohn Baldwin else 1300f5c157d9SJohn Baldwin base_pri = td->td_base_pri; 1301f5c157d9SJohn Baldwin if (prio >= base_pri) { 1302f5c157d9SJohn Baldwin td->td_flags &= ~ TDF_BORROWING; 1303f5c157d9SJohn Baldwin sched_thread_priority(td, base_pri); 1304f5c157d9SJohn Baldwin } else 1305f5c157d9SJohn Baldwin sched_lend_prio(td, prio); 1306f5c157d9SJohn Baldwin } 1307f5c157d9SJohn Baldwin 1308f5c157d9SJohn Baldwin void 1309f5c157d9SJohn Baldwin sched_prio(struct thread *td, u_char prio) 1310f5c157d9SJohn Baldwin { 1311f5c157d9SJohn Baldwin u_char oldprio; 1312f5c157d9SJohn Baldwin 1313f5c157d9SJohn Baldwin /* First, update the base priority. */ 1314f5c157d9SJohn Baldwin td->td_base_pri = prio; 1315f5c157d9SJohn Baldwin 1316f5c157d9SJohn Baldwin /* 1317f5c157d9SJohn Baldwin * If the therad is borrowing another thread's priority, don't 1318f5c157d9SJohn Baldwin * ever lower the priority. 1319f5c157d9SJohn Baldwin */ 1320f5c157d9SJohn Baldwin if (td->td_flags & TDF_BORROWING && td->td_priority < prio) 1321f5c157d9SJohn Baldwin return; 1322f5c157d9SJohn Baldwin 1323f5c157d9SJohn Baldwin /* Change the real priority. */ 1324f5c157d9SJohn Baldwin oldprio = td->td_priority; 1325f5c157d9SJohn Baldwin sched_thread_priority(td, prio); 1326f5c157d9SJohn Baldwin 1327f5c157d9SJohn Baldwin /* 1328f5c157d9SJohn Baldwin * If the thread is on a turnstile, then let the turnstile update 1329f5c157d9SJohn Baldwin * its state. 1330f5c157d9SJohn Baldwin */ 1331f5c157d9SJohn Baldwin if (TD_ON_LOCK(td) && oldprio != prio) 1332f5c157d9SJohn Baldwin turnstile_adjust(td, oldprio); 1333f5c157d9SJohn Baldwin } 1334f5c157d9SJohn Baldwin 133535e6168fSJeff Roberson void 13363389af30SJulian Elischer sched_switch(struct thread *td, struct thread *newtd, int flags) 133735e6168fSJeff Roberson { 1338598b368dSJeff Roberson struct kseq *ksq; 133935e6168fSJeff Roberson struct kse *ke; 134035e6168fSJeff Roberson 134135e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 134235e6168fSJeff Roberson 134335e6168fSJeff Roberson ke = td->td_kse; 1344598b368dSJeff Roberson ksq = KSEQ_SELF(); 134535e6168fSJeff Roberson 1346060563ecSJulian Elischer td->td_lastcpu = td->td_oncpu; 1347060563ecSJulian Elischer td->td_oncpu = NOCPU; 134852eb8464SJohn Baldwin td->td_flags &= ~TDF_NEEDRESCHED; 134952eb8464SJohn Baldwin td->td_pflags &= ~TDP_OWEPREEMPT; 135035e6168fSJeff Roberson 1351b11fdad0SJeff Roberson /* 1352b11fdad0SJeff Roberson * If the KSE has been assigned it may be in the process of switching 1353b11fdad0SJeff Roberson * to the new cpu. This is the case in sched_bind(). 1354b11fdad0SJeff Roberson */ 13552454aaf5SJeff Roberson if (td == PCPU_GET(idlethread)) { 1356bf0acc27SJohn Baldwin TD_SET_CAN_RUN(td); 1357598b368dSJeff Roberson } else if ((ke->ke_flags & KEF_ASSIGNED) == 0) { 1358ed062c8dSJulian Elischer /* We are ending our run so make our slot available again */ 1359d39063f2SJulian Elischer SLOT_RELEASE(td->td_ksegrp); 1360598b368dSJeff Roberson if (ke->ke_runq == NULL) 1361598b368dSJeff Roberson panic("Thread not on runq."); 1362598b368dSJeff Roberson kseq_load_rem(ksq, ke); 1363ed062c8dSJulian Elischer if (TD_IS_RUNNING(td)) { 1364f2b74cbfSJeff Roberson /* 1365ed062c8dSJulian Elischer * Don't allow the thread to migrate 1366ed062c8dSJulian Elischer * from a preemption. 1367f2b74cbfSJeff Roberson */ 1368f2b74cbfSJeff Roberson ke->ke_flags |= KEF_HOLD; 1369598b368dSJeff Roberson setrunqueue(td, (flags & SW_PREEMPT) ? 1370598b368dSJeff Roberson SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED : 1371598b368dSJeff Roberson SRQ_OURSELF|SRQ_YIELDING); 1372598b368dSJeff Roberson ke->ke_flags &= ~KEF_HOLD; 1373598b368dSJeff Roberson } else if ((td->td_proc->p_flag & P_HADTHREADS) && 1374598b368dSJeff Roberson (newtd == NULL || newtd->td_ksegrp != td->td_ksegrp)) 137535e6168fSJeff Roberson /* 1376ed062c8dSJulian Elischer * We will not be on the run queue. 1377ed062c8dSJulian Elischer * So we must be sleeping or similar. 1378c20c691bSJulian Elischer * Don't use the slot if we will need it 1379c20c691bSJulian Elischer * for newtd. 138035e6168fSJeff Roberson */ 1381ed062c8dSJulian Elischer slot_fill(td->td_ksegrp); 1382ed062c8dSJulian Elischer } 1383d39063f2SJulian Elischer if (newtd != NULL) { 1384c20c691bSJulian Elischer /* 1385c20c691bSJulian Elischer * If we bring in a thread, 1386c20c691bSJulian Elischer * then account for it as if it had been added to the 1387c20c691bSJulian Elischer * run queue and then chosen. 1388c20c691bSJulian Elischer */ 1389c5c3fb33SJulian Elischer newtd->td_kse->ke_flags |= KEF_DIDRUN; 1390598b368dSJeff Roberson newtd->td_kse->ke_runq = ksq->ksq_curr; 1391d39063f2SJulian Elischer SLOT_USE(newtd->td_ksegrp); 1392c20c691bSJulian Elischer TD_SET_RUNNING(newtd); 1393bf0acc27SJohn Baldwin kseq_load_add(KSEQ_SELF(), newtd->td_kse); 1394d39063f2SJulian Elischer } else 13952454aaf5SJeff Roberson newtd = choosethread(); 1396ae53b483SJeff Roberson if (td != newtd) 1397ae53b483SJeff Roberson cpu_switch(td, newtd); 1398ae53b483SJeff Roberson sched_lock.mtx_lock = (uintptr_t)td; 139935e6168fSJeff Roberson 1400060563ecSJulian Elischer td->td_oncpu = PCPU_GET(cpuid); 140135e6168fSJeff Roberson } 140235e6168fSJeff Roberson 140335e6168fSJeff Roberson void 1404fa885116SJulian Elischer sched_nice(struct proc *p, int nice) 140535e6168fSJeff Roberson { 1406fa885116SJulian Elischer struct ksegrp *kg; 140715dc847eSJeff Roberson struct kse *ke; 140835e6168fSJeff Roberson struct thread *td; 140915dc847eSJeff Roberson struct kseq *kseq; 141035e6168fSJeff Roberson 1411fa885116SJulian Elischer PROC_LOCK_ASSERT(p, MA_OWNED); 14120b5318c8SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 141315dc847eSJeff Roberson /* 141415dc847eSJeff Roberson * We need to adjust the nice counts for running KSEs. 141515dc847eSJeff Roberson */ 1416fa885116SJulian Elischer FOREACH_KSEGRP_IN_PROC(p, kg) { 1417fa885116SJulian Elischer if (kg->kg_pri_class == PRI_TIMESHARE) { 1418ed062c8dSJulian Elischer FOREACH_THREAD_IN_GROUP(kg, td) { 1419ed062c8dSJulian Elischer ke = td->td_kse; 1420d07ac847SJeff Roberson if (ke->ke_runq == NULL) 142115dc847eSJeff Roberson continue; 142215dc847eSJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 1423fa885116SJulian Elischer kseq_nice_rem(kseq, p->p_nice); 142415dc847eSJeff Roberson kseq_nice_add(kseq, nice); 142515dc847eSJeff Roberson } 1426fa885116SJulian Elischer } 1427fa885116SJulian Elischer } 1428fa885116SJulian Elischer p->p_nice = nice; 1429fa885116SJulian Elischer FOREACH_KSEGRP_IN_PROC(p, kg) { 143035e6168fSJeff Roberson sched_priority(kg); 143115dc847eSJeff Roberson FOREACH_THREAD_IN_GROUP(kg, td) 14324a338afdSJulian Elischer td->td_flags |= TDF_NEEDRESCHED; 143335e6168fSJeff Roberson } 1434fa885116SJulian Elischer } 143535e6168fSJeff Roberson 143635e6168fSJeff Roberson void 143744f3b092SJohn Baldwin sched_sleep(struct thread *td) 143835e6168fSJeff Roberson { 143935e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 144035e6168fSJeff Roberson 144135e6168fSJeff Roberson td->td_slptime = ticks; 144235e6168fSJeff Roberson } 144335e6168fSJeff Roberson 144435e6168fSJeff Roberson void 144535e6168fSJeff Roberson sched_wakeup(struct thread *td) 144635e6168fSJeff Roberson { 144735e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 144835e6168fSJeff Roberson 144935e6168fSJeff Roberson /* 145035e6168fSJeff Roberson * Let the kseg know how long we slept for. This is because process 145135e6168fSJeff Roberson * interactivity behavior is modeled in the kseg. 145235e6168fSJeff Roberson */ 145335e6168fSJeff Roberson if (td->td_slptime) { 1454f1e8dc4aSJeff Roberson struct ksegrp *kg; 145515dc847eSJeff Roberson int hzticks; 1456f1e8dc4aSJeff Roberson 1457f1e8dc4aSJeff Roberson kg = td->td_ksegrp; 1458d322132cSJeff Roberson hzticks = (ticks - td->td_slptime) << 10; 1459d322132cSJeff Roberson if (hzticks >= SCHED_SLP_RUN_MAX) { 1460d322132cSJeff Roberson kg->kg_slptime = SCHED_SLP_RUN_MAX; 1461d322132cSJeff Roberson kg->kg_runtime = 1; 1462d322132cSJeff Roberson } else { 1463d322132cSJeff Roberson kg->kg_slptime += hzticks; 14644b60e324SJeff Roberson sched_interact_update(kg); 1465d322132cSJeff Roberson } 1466f1e8dc4aSJeff Roberson sched_priority(kg); 14674b60e324SJeff Roberson sched_slice(td->td_kse); 146835e6168fSJeff Roberson td->td_slptime = 0; 1469f1e8dc4aSJeff Roberson } 14702630e4c9SJulian Elischer setrunqueue(td, SRQ_BORING); 147135e6168fSJeff Roberson } 147235e6168fSJeff Roberson 147335e6168fSJeff Roberson /* 147435e6168fSJeff Roberson * Penalize the parent for creating a new child and initialize the child's 147535e6168fSJeff Roberson * priority. 147635e6168fSJeff Roberson */ 147735e6168fSJeff Roberson void 1478ed062c8dSJulian Elischer sched_fork(struct thread *td, struct thread *childtd) 147935e6168fSJeff Roberson { 148035e6168fSJeff Roberson 148135e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 148235e6168fSJeff Roberson 1483ed062c8dSJulian Elischer sched_fork_ksegrp(td, childtd->td_ksegrp); 1484ed062c8dSJulian Elischer sched_fork_thread(td, childtd); 148515dc847eSJeff Roberson } 148615dc847eSJeff Roberson 148715dc847eSJeff Roberson void 148855d44f79SJulian Elischer sched_fork_ksegrp(struct thread *td, struct ksegrp *child) 148915dc847eSJeff Roberson { 149055d44f79SJulian Elischer struct ksegrp *kg = td->td_ksegrp; 1491ed062c8dSJulian Elischer mtx_assert(&sched_lock, MA_OWNED); 1492210491d3SJeff Roberson 1493d322132cSJeff Roberson child->kg_slptime = kg->kg_slptime; 1494d322132cSJeff Roberson child->kg_runtime = kg->kg_runtime; 1495d322132cSJeff Roberson child->kg_user_pri = kg->kg_user_pri; 1496d322132cSJeff Roberson sched_interact_fork(child); 14974b60e324SJeff Roberson kg->kg_runtime += tickincr << 10; 14984b60e324SJeff Roberson sched_interact_update(kg); 1499c9f25d8fSJeff Roberson } 1500c9f25d8fSJeff Roberson 150115dc847eSJeff Roberson void 150215dc847eSJeff Roberson sched_fork_thread(struct thread *td, struct thread *child) 150315dc847eSJeff Roberson { 1504ed062c8dSJulian Elischer struct kse *ke; 1505ed062c8dSJulian Elischer struct kse *ke2; 1506ed062c8dSJulian Elischer 1507ed062c8dSJulian Elischer sched_newthread(child); 1508ed062c8dSJulian Elischer ke = td->td_kse; 1509ed062c8dSJulian Elischer ke2 = child->td_kse; 1510ed062c8dSJulian Elischer ke2->ke_slice = 1; /* Attempt to quickly learn interactivity. */ 1511ed062c8dSJulian Elischer ke2->ke_cpu = ke->ke_cpu; 1512ed062c8dSJulian Elischer ke2->ke_runq = NULL; 1513ed062c8dSJulian Elischer 1514ed062c8dSJulian Elischer /* Grab our parents cpu estimation information. */ 1515ed062c8dSJulian Elischer ke2->ke_ticks = ke->ke_ticks; 1516ed062c8dSJulian Elischer ke2->ke_ltick = ke->ke_ltick; 1517ed062c8dSJulian Elischer ke2->ke_ftick = ke->ke_ftick; 151815dc847eSJeff Roberson } 151915dc847eSJeff Roberson 152015dc847eSJeff Roberson void 152115dc847eSJeff Roberson sched_class(struct ksegrp *kg, int class) 152215dc847eSJeff Roberson { 152315dc847eSJeff Roberson struct kseq *kseq; 152415dc847eSJeff Roberson struct kse *ke; 1525ed062c8dSJulian Elischer struct thread *td; 1526ef1134c9SJeff Roberson int nclass; 1527ef1134c9SJeff Roberson int oclass; 152815dc847eSJeff Roberson 15292056d0a1SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 153015dc847eSJeff Roberson if (kg->kg_pri_class == class) 153115dc847eSJeff Roberson return; 153215dc847eSJeff Roberson 1533ef1134c9SJeff Roberson nclass = PRI_BASE(class); 1534ef1134c9SJeff Roberson oclass = PRI_BASE(kg->kg_pri_class); 1535ed062c8dSJulian Elischer FOREACH_THREAD_IN_GROUP(kg, td) { 1536ed062c8dSJulian Elischer ke = td->td_kse; 153715dc847eSJeff Roberson if (ke->ke_state != KES_ONRUNQ && 153815dc847eSJeff Roberson ke->ke_state != KES_THREAD) 153915dc847eSJeff Roberson continue; 154015dc847eSJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 154115dc847eSJeff Roberson 1542ef1134c9SJeff Roberson #ifdef SMP 1543155b9987SJeff Roberson /* 1544155b9987SJeff Roberson * On SMP if we're on the RUNQ we must adjust the transferable 1545155b9987SJeff Roberson * count because could be changing to or from an interrupt 1546155b9987SJeff Roberson * class. 1547155b9987SJeff Roberson */ 1548155b9987SJeff Roberson if (ke->ke_state == KES_ONRUNQ) { 1549598b368dSJeff Roberson if (KSE_CAN_MIGRATE(ke)) { 155080f86c9fSJeff Roberson kseq->ksq_transferable--; 155180f86c9fSJeff Roberson kseq->ksq_group->ksg_transferable--; 155280f86c9fSJeff Roberson } 1553598b368dSJeff Roberson if (KSE_CAN_MIGRATE(ke)) { 155480f86c9fSJeff Roberson kseq->ksq_transferable++; 155580f86c9fSJeff Roberson kseq->ksq_group->ksg_transferable++; 155680f86c9fSJeff Roberson } 1557155b9987SJeff Roberson } 1558ef1134c9SJeff Roberson #endif 1559155b9987SJeff Roberson if (oclass == PRI_TIMESHARE) { 1560ef1134c9SJeff Roberson kseq->ksq_load_timeshare--; 1561fa885116SJulian Elischer kseq_nice_rem(kseq, kg->kg_proc->p_nice); 1562155b9987SJeff Roberson } 1563155b9987SJeff Roberson if (nclass == PRI_TIMESHARE) { 1564155b9987SJeff Roberson kseq->ksq_load_timeshare++; 1565fa885116SJulian Elischer kseq_nice_add(kseq, kg->kg_proc->p_nice); 156615dc847eSJeff Roberson } 1567155b9987SJeff Roberson } 156815dc847eSJeff Roberson 156915dc847eSJeff Roberson kg->kg_pri_class = class; 157035e6168fSJeff Roberson } 157135e6168fSJeff Roberson 157235e6168fSJeff Roberson /* 157335e6168fSJeff Roberson * Return some of the child's priority and interactivity to the parent. 157435e6168fSJeff Roberson */ 157535e6168fSJeff Roberson void 1576ed062c8dSJulian Elischer sched_exit(struct proc *p, struct thread *childtd) 157735e6168fSJeff Roberson { 157835e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1579ed062c8dSJulian Elischer sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), childtd); 158081d47d3fSJeff Roberson sched_exit_thread(NULL, childtd); 1581141ad61cSJeff Roberson } 1582141ad61cSJeff Roberson 1583141ad61cSJeff Roberson void 158455d44f79SJulian Elischer sched_exit_ksegrp(struct ksegrp *kg, struct thread *td) 1585141ad61cSJeff Roberson { 158655d44f79SJulian Elischer /* kg->kg_slptime += td->td_ksegrp->kg_slptime; */ 158755d44f79SJulian Elischer kg->kg_runtime += td->td_ksegrp->kg_runtime; 15884b60e324SJeff Roberson sched_interact_update(kg); 1589141ad61cSJeff Roberson } 1590141ad61cSJeff Roberson 1591141ad61cSJeff Roberson void 1592ed062c8dSJulian Elischer sched_exit_thread(struct thread *td, struct thread *childtd) 1593141ad61cSJeff Roberson { 159481d47d3fSJeff Roberson CTR3(KTR_SCHED, "sched_exit_thread: %p(%s) prio %d", 159581d47d3fSJeff Roberson childtd, childtd->td_proc->p_comm, childtd->td_priority); 1596ed062c8dSJulian Elischer kseq_load_rem(KSEQ_CPU(childtd->td_kse->ke_cpu), childtd->td_kse); 159735e6168fSJeff Roberson } 159835e6168fSJeff Roberson 159935e6168fSJeff Roberson void 16007cf90fb3SJeff Roberson sched_clock(struct thread *td) 160135e6168fSJeff Roberson { 160235e6168fSJeff Roberson struct kseq *kseq; 16030a016a05SJeff Roberson struct ksegrp *kg; 16047cf90fb3SJeff Roberson struct kse *ke; 160535e6168fSJeff Roberson 1606dc03363dSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 16072454aaf5SJeff Roberson kseq = KSEQ_SELF(); 1608dc03363dSJeff Roberson #ifdef SMP 1609598b368dSJeff Roberson if (ticks >= bal_tick) 1610dc03363dSJeff Roberson sched_balance(); 1611598b368dSJeff Roberson if (ticks >= gbal_tick && balance_groups) 1612dc03363dSJeff Roberson sched_balance_groups(); 16132454aaf5SJeff Roberson /* 16142454aaf5SJeff Roberson * We could have been assigned a non real-time thread without an 16152454aaf5SJeff Roberson * IPI. 16162454aaf5SJeff Roberson */ 16172454aaf5SJeff Roberson if (kseq->ksq_assigned) 16182454aaf5SJeff Roberson kseq_assign(kseq); /* Potentially sets NEEDRESCHED */ 1619dc03363dSJeff Roberson #endif 162015dc847eSJeff Roberson /* 162115dc847eSJeff Roberson * sched_setup() apparently happens prior to stathz being set. We 162215dc847eSJeff Roberson * need to resolve the timers earlier in the boot so we can avoid 162315dc847eSJeff Roberson * calculating this here. 162415dc847eSJeff Roberson */ 162515dc847eSJeff Roberson if (realstathz == 0) { 162615dc847eSJeff Roberson realstathz = stathz ? stathz : hz; 162715dc847eSJeff Roberson tickincr = hz / realstathz; 162815dc847eSJeff Roberson /* 162915dc847eSJeff Roberson * XXX This does not work for values of stathz that are much 163015dc847eSJeff Roberson * larger than hz. 163115dc847eSJeff Roberson */ 163215dc847eSJeff Roberson if (tickincr == 0) 163315dc847eSJeff Roberson tickincr = 1; 163415dc847eSJeff Roberson } 163535e6168fSJeff Roberson 16367cf90fb3SJeff Roberson ke = td->td_kse; 163715dc847eSJeff Roberson kg = ke->ke_ksegrp; 163835e6168fSJeff Roberson 16390a016a05SJeff Roberson /* Adjust ticks for pctcpu */ 164065c8760dSJeff Roberson ke->ke_ticks++; 1641d465fb95SJeff Roberson ke->ke_ltick = ticks; 1642a8949de2SJeff Roberson 1643d465fb95SJeff Roberson /* Go up to one second beyond our max and then trim back down */ 1644d465fb95SJeff Roberson if (ke->ke_ftick + SCHED_CPU_TICKS + hz < ke->ke_ltick) 1645d465fb95SJeff Roberson sched_pctcpu_update(ke); 1646d465fb95SJeff Roberson 164743fdafb1SJulian Elischer if (td->td_flags & TDF_IDLETD) 164835e6168fSJeff Roberson return; 16493f741ca1SJeff Roberson /* 1650a8949de2SJeff Roberson * We only do slicing code for TIMESHARE ksegrps. 1651a8949de2SJeff Roberson */ 1652a8949de2SJeff Roberson if (kg->kg_pri_class != PRI_TIMESHARE) 1653a8949de2SJeff Roberson return; 1654a8949de2SJeff Roberson /* 165515dc847eSJeff Roberson * We used a tick charge it to the ksegrp so that we can compute our 165615dc847eSJeff Roberson * interactivity. 165715dc847eSJeff Roberson */ 165815dc847eSJeff Roberson kg->kg_runtime += tickincr << 10; 16594b60e324SJeff Roberson sched_interact_update(kg); 1660407b0157SJeff Roberson 166135e6168fSJeff Roberson /* 166235e6168fSJeff Roberson * We used up one time slice. 166335e6168fSJeff Roberson */ 1664093c05e3SJeff Roberson if (--ke->ke_slice > 0) 166515dc847eSJeff Roberson return; 166635e6168fSJeff Roberson /* 166715dc847eSJeff Roberson * We're out of time, recompute priorities and requeue. 166835e6168fSJeff Roberson */ 1669155b9987SJeff Roberson kseq_load_rem(kseq, ke); 1670e1f89c22SJeff Roberson sched_priority(kg); 167115dc847eSJeff Roberson sched_slice(ke); 167215dc847eSJeff Roberson if (SCHED_CURR(kg, ke)) 167315dc847eSJeff Roberson ke->ke_runq = kseq->ksq_curr; 167415dc847eSJeff Roberson else 167515dc847eSJeff Roberson ke->ke_runq = kseq->ksq_next; 1676155b9987SJeff Roberson kseq_load_add(kseq, ke); 16774a338afdSJulian Elischer td->td_flags |= TDF_NEEDRESCHED; 167835e6168fSJeff Roberson } 167935e6168fSJeff Roberson 168035e6168fSJeff Roberson int 168135e6168fSJeff Roberson sched_runnable(void) 168235e6168fSJeff Roberson { 168335e6168fSJeff Roberson struct kseq *kseq; 1684b90816f1SJeff Roberson int load; 168535e6168fSJeff Roberson 1686b90816f1SJeff Roberson load = 1; 1687b90816f1SJeff Roberson 16880a016a05SJeff Roberson kseq = KSEQ_SELF(); 168922bf7d9aSJeff Roberson #ifdef SMP 169046f8b265SJeff Roberson if (kseq->ksq_assigned) { 169146f8b265SJeff Roberson mtx_lock_spin(&sched_lock); 169222bf7d9aSJeff Roberson kseq_assign(kseq); 169346f8b265SJeff Roberson mtx_unlock_spin(&sched_lock); 169446f8b265SJeff Roberson } 169522bf7d9aSJeff Roberson #endif 16963f741ca1SJeff Roberson if ((curthread->td_flags & TDF_IDLETD) != 0) { 16973f741ca1SJeff Roberson if (kseq->ksq_load > 0) 16983f741ca1SJeff Roberson goto out; 16993f741ca1SJeff Roberson } else 17003f741ca1SJeff Roberson if (kseq->ksq_load - 1 > 0) 1701b90816f1SJeff Roberson goto out; 1702b90816f1SJeff Roberson load = 0; 1703b90816f1SJeff Roberson out: 1704b90816f1SJeff Roberson return (load); 170535e6168fSJeff Roberson } 170635e6168fSJeff Roberson 170735e6168fSJeff Roberson void 170835e6168fSJeff Roberson sched_userret(struct thread *td) 170935e6168fSJeff Roberson { 171035e6168fSJeff Roberson struct ksegrp *kg; 171135e6168fSJeff Roberson 1712f5c157d9SJohn Baldwin KASSERT((td->td_flags & TDF_BORROWING) == 0, 1713f5c157d9SJohn Baldwin ("thread with borrowed priority returning to userland")); 171435e6168fSJeff Roberson kg = td->td_ksegrp; 1715f5c157d9SJohn Baldwin if (td->td_priority != kg->kg_user_pri) { 171635e6168fSJeff Roberson mtx_lock_spin(&sched_lock); 171735e6168fSJeff Roberson td->td_priority = kg->kg_user_pri; 1718f5c157d9SJohn Baldwin td->td_base_pri = kg->kg_user_pri; 171935e6168fSJeff Roberson mtx_unlock_spin(&sched_lock); 172035e6168fSJeff Roberson } 172135e6168fSJeff Roberson } 172235e6168fSJeff Roberson 1723c9f25d8fSJeff Roberson struct kse * 1724c9f25d8fSJeff Roberson sched_choose(void) 1725c9f25d8fSJeff Roberson { 17260a016a05SJeff Roberson struct kseq *kseq; 1727c9f25d8fSJeff Roberson struct kse *ke; 172815dc847eSJeff Roberson 1729b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 173022bf7d9aSJeff Roberson kseq = KSEQ_SELF(); 173115dc847eSJeff Roberson #ifdef SMP 173280f86c9fSJeff Roberson restart: 173322bf7d9aSJeff Roberson if (kseq->ksq_assigned) 173422bf7d9aSJeff Roberson kseq_assign(kseq); 173515dc847eSJeff Roberson #endif 173622bf7d9aSJeff Roberson ke = kseq_choose(kseq); 173735e6168fSJeff Roberson if (ke) { 173822bf7d9aSJeff Roberson #ifdef SMP 173922bf7d9aSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_IDLE) 174080f86c9fSJeff Roberson if (kseq_idled(kseq) == 0) 174180f86c9fSJeff Roberson goto restart; 174222bf7d9aSJeff Roberson #endif 1743155b9987SJeff Roberson kseq_runq_rem(kseq, ke); 174435e6168fSJeff Roberson ke->ke_state = KES_THREAD; 174515dc847eSJeff Roberson return (ke); 174635e6168fSJeff Roberson } 1747c9f25d8fSJeff Roberson #ifdef SMP 174880f86c9fSJeff Roberson if (kseq_idled(kseq) == 0) 174980f86c9fSJeff Roberson goto restart; 1750c9f25d8fSJeff Roberson #endif 175115dc847eSJeff Roberson return (NULL); 175235e6168fSJeff Roberson } 175335e6168fSJeff Roberson 175435e6168fSJeff Roberson void 17552630e4c9SJulian Elischer sched_add(struct thread *td, int flags) 175635e6168fSJeff Roberson { 1757c9f25d8fSJeff Roberson struct kseq *kseq; 175815dc847eSJeff Roberson struct ksegrp *kg; 17597cf90fb3SJeff Roberson struct kse *ke; 1760598b368dSJeff Roberson int preemptive; 17612454aaf5SJeff Roberson int canmigrate; 176222bf7d9aSJeff Roberson int class; 1763c9f25d8fSJeff Roberson 176481d47d3fSJeff Roberson CTR5(KTR_SCHED, "sched_add: %p(%s) prio %d by %p(%s)", 176581d47d3fSJeff Roberson td, td->td_proc->p_comm, td->td_priority, curthread, 176681d47d3fSJeff Roberson curthread->td_proc->p_comm); 176722bf7d9aSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 17687cf90fb3SJeff Roberson ke = td->td_kse; 17697cf90fb3SJeff Roberson kg = td->td_ksegrp; 1770598b368dSJeff Roberson canmigrate = 1; 1771598b368dSJeff Roberson preemptive = !(flags & SRQ_YIELDING); 1772598b368dSJeff Roberson class = PRI_BASE(kg->kg_pri_class); 1773598b368dSJeff Roberson kseq = KSEQ_SELF(); 1774598b368dSJeff Roberson if ((ke->ke_flags & KEF_INTERNAL) == 0) 1775598b368dSJeff Roberson SLOT_USE(td->td_ksegrp); 1776598b368dSJeff Roberson ke->ke_flags &= ~KEF_INTERNAL; 1777598b368dSJeff Roberson #ifdef SMP 17782d59a44dSJeff Roberson if (ke->ke_flags & KEF_ASSIGNED) { 1779598b368dSJeff Roberson if (ke->ke_flags & KEF_REMOVED) 17802d59a44dSJeff Roberson ke->ke_flags &= ~KEF_REMOVED; 178122bf7d9aSJeff Roberson return; 17822d59a44dSJeff Roberson } 1783598b368dSJeff Roberson canmigrate = KSE_CAN_MIGRATE(ke); 1784598b368dSJeff Roberson #endif 17855d7ef00cSJeff Roberson KASSERT(ke->ke_state != KES_ONRUNQ, 17865d7ef00cSJeff Roberson ("sched_add: kse %p (%s) already in run queue", ke, 17875d7ef00cSJeff Roberson ke->ke_proc->p_comm)); 17885d7ef00cSJeff Roberson KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 17895d7ef00cSJeff Roberson ("sched_add: process swapped out")); 17909bca28a7SJeff Roberson KASSERT(ke->ke_runq == NULL, 17919bca28a7SJeff Roberson ("sched_add: KSE %p is still assigned to a run queue", ke)); 179222bf7d9aSJeff Roberson switch (class) { 1793a8949de2SJeff Roberson case PRI_ITHD: 1794a8949de2SJeff Roberson case PRI_REALTIME: 179515dc847eSJeff Roberson ke->ke_runq = kseq->ksq_curr; 179615dc847eSJeff Roberson ke->ke_slice = SCHED_SLICE_MAX; 1797598b368dSJeff Roberson if (canmigrate) 17987cd650a9SJeff Roberson ke->ke_cpu = PCPU_GET(cpuid); 1799a8949de2SJeff Roberson break; 1800a8949de2SJeff Roberson case PRI_TIMESHARE: 180115dc847eSJeff Roberson if (SCHED_CURR(kg, ke)) 180215dc847eSJeff Roberson ke->ke_runq = kseq->ksq_curr; 180315dc847eSJeff Roberson else 180415dc847eSJeff Roberson ke->ke_runq = kseq->ksq_next; 180515dc847eSJeff Roberson break; 180615dc847eSJeff Roberson case PRI_IDLE: 180715dc847eSJeff Roberson /* 180815dc847eSJeff Roberson * This is for priority prop. 180915dc847eSJeff Roberson */ 18103f741ca1SJeff Roberson if (ke->ke_thread->td_priority < PRI_MIN_IDLE) 181115dc847eSJeff Roberson ke->ke_runq = kseq->ksq_curr; 181215dc847eSJeff Roberson else 181315dc847eSJeff Roberson ke->ke_runq = &kseq->ksq_idle; 181415dc847eSJeff Roberson ke->ke_slice = SCHED_SLICE_MIN; 181515dc847eSJeff Roberson break; 181615dc847eSJeff Roberson default: 1817d322132cSJeff Roberson panic("Unknown pri class."); 1818a8949de2SJeff Roberson break; 1819a6ed4186SJeff Roberson } 182022bf7d9aSJeff Roberson #ifdef SMP 18212454aaf5SJeff Roberson /* 18222454aaf5SJeff Roberson * Don't migrate running threads here. Force the long term balancer 18232454aaf5SJeff Roberson * to do it. 18242454aaf5SJeff Roberson */ 1825f2b74cbfSJeff Roberson if (ke->ke_flags & KEF_HOLD) { 1826f2b74cbfSJeff Roberson ke->ke_flags &= ~KEF_HOLD; 18272454aaf5SJeff Roberson canmigrate = 0; 1828f2b74cbfSJeff Roberson } 18292454aaf5SJeff Roberson /* 18302454aaf5SJeff Roberson * If this thread is pinned or bound, notify the target cpu. 18312454aaf5SJeff Roberson */ 18322454aaf5SJeff Roberson if (!canmigrate && ke->ke_cpu != PCPU_GET(cpuid) ) { 183386e1c22aSJeff Roberson ke->ke_runq = NULL; 183480f86c9fSJeff Roberson kseq_notify(ke, ke->ke_cpu); 183580f86c9fSJeff Roberson return; 183680f86c9fSJeff Roberson } 183722bf7d9aSJeff Roberson /* 1838670c524fSJeff Roberson * If we had been idle, clear our bit in the group and potentially 1839670c524fSJeff Roberson * the global bitmap. If not, see if we should transfer this thread. 184022bf7d9aSJeff Roberson */ 184180f86c9fSJeff Roberson if ((class == PRI_TIMESHARE || class == PRI_REALTIME) && 184280f86c9fSJeff Roberson (kseq->ksq_group->ksg_idlemask & PCPU_GET(cpumask)) != 0) { 184380f86c9fSJeff Roberson /* 184480f86c9fSJeff Roberson * Check to see if our group is unidling, and if so, remove it 184580f86c9fSJeff Roberson * from the global idle mask. 184680f86c9fSJeff Roberson */ 184780f86c9fSJeff Roberson if (kseq->ksq_group->ksg_idlemask == 184880f86c9fSJeff Roberson kseq->ksq_group->ksg_cpumask) 184980f86c9fSJeff Roberson atomic_clear_int(&kseq_idle, kseq->ksq_group->ksg_mask); 185080f86c9fSJeff Roberson /* 185180f86c9fSJeff Roberson * Now remove ourselves from the group specific idle mask. 185280f86c9fSJeff Roberson */ 185380f86c9fSJeff Roberson kseq->ksq_group->ksg_idlemask &= ~PCPU_GET(cpumask); 1854598b368dSJeff Roberson } else if (canmigrate && kseq->ksq_load > 1 && class != PRI_ITHD) 1855670c524fSJeff Roberson if (kseq_transfer(kseq, ke, class)) 1856670c524fSJeff Roberson return; 18572454aaf5SJeff Roberson ke->ke_cpu = PCPU_GET(cpuid); 185822bf7d9aSJeff Roberson #endif 1859f2b74cbfSJeff Roberson if (td->td_priority < curthread->td_priority && 1860f2b74cbfSJeff Roberson ke->ke_runq == kseq->ksq_curr) 186122bf7d9aSJeff Roberson curthread->td_flags |= TDF_NEEDRESCHED; 186263fcce68SJohn Baldwin if (preemptive && maybe_preempt(td)) 18630c0b25aeSJohn Baldwin return; 186435e6168fSJeff Roberson ke->ke_state = KES_ONRUNQ; 186535e6168fSJeff Roberson 1866598b368dSJeff Roberson kseq_runq_add(kseq, ke, flags); 1867155b9987SJeff Roberson kseq_load_add(kseq, ke); 186835e6168fSJeff Roberson } 186935e6168fSJeff Roberson 187035e6168fSJeff Roberson void 18717cf90fb3SJeff Roberson sched_rem(struct thread *td) 187235e6168fSJeff Roberson { 187315dc847eSJeff Roberson struct kseq *kseq; 18747cf90fb3SJeff Roberson struct kse *ke; 18757cf90fb3SJeff Roberson 187681d47d3fSJeff Roberson CTR5(KTR_SCHED, "sched_rem: %p(%s) prio %d by %p(%s)", 187781d47d3fSJeff Roberson td, td->td_proc->p_comm, td->td_priority, curthread, 187881d47d3fSJeff Roberson curthread->td_proc->p_comm); 1879598b368dSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1880598b368dSJeff Roberson ke = td->td_kse; 18812d59a44dSJeff Roberson SLOT_RELEASE(td->td_ksegrp); 1882598b368dSJeff Roberson if (ke->ke_flags & KEF_ASSIGNED) { 18832d59a44dSJeff Roberson ke->ke_flags |= KEF_REMOVED; 188422bf7d9aSJeff Roberson return; 18852d59a44dSJeff Roberson } 1886c494ddc8SJeff Roberson KASSERT((ke->ke_state == KES_ONRUNQ), 1887c494ddc8SJeff Roberson ("sched_rem: KSE not on run queue")); 188835e6168fSJeff Roberson 18892d59a44dSJeff Roberson ke->ke_state = KES_THREAD; 189015dc847eSJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 1891155b9987SJeff Roberson kseq_runq_rem(kseq, ke); 1892155b9987SJeff Roberson kseq_load_rem(kseq, ke); 189335e6168fSJeff Roberson } 189435e6168fSJeff Roberson 189535e6168fSJeff Roberson fixpt_t 18967cf90fb3SJeff Roberson sched_pctcpu(struct thread *td) 189735e6168fSJeff Roberson { 189835e6168fSJeff Roberson fixpt_t pctcpu; 18997cf90fb3SJeff Roberson struct kse *ke; 190035e6168fSJeff Roberson 190135e6168fSJeff Roberson pctcpu = 0; 19027cf90fb3SJeff Roberson ke = td->td_kse; 1903484288deSJeff Roberson if (ke == NULL) 1904484288deSJeff Roberson return (0); 190535e6168fSJeff Roberson 1906b90816f1SJeff Roberson mtx_lock_spin(&sched_lock); 190735e6168fSJeff Roberson if (ke->ke_ticks) { 190835e6168fSJeff Roberson int rtick; 190935e6168fSJeff Roberson 1910210491d3SJeff Roberson /* 1911210491d3SJeff Roberson * Don't update more frequently than twice a second. Allowing 1912210491d3SJeff Roberson * this causes the cpu usage to decay away too quickly due to 1913210491d3SJeff Roberson * rounding errors. 1914210491d3SJeff Roberson */ 19152e227f04SJeff Roberson if (ke->ke_ftick + SCHED_CPU_TICKS < ke->ke_ltick || 19162e227f04SJeff Roberson ke->ke_ltick < (ticks - (hz / 2))) 191735e6168fSJeff Roberson sched_pctcpu_update(ke); 191835e6168fSJeff Roberson /* How many rtick per second ? */ 1919210491d3SJeff Roberson rtick = min(ke->ke_ticks / SCHED_CPU_TIME, SCHED_CPU_TICKS); 19207121cce5SScott Long pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT; 192135e6168fSJeff Roberson } 192235e6168fSJeff Roberson 192335e6168fSJeff Roberson ke->ke_proc->p_swtime = ke->ke_ltick - ke->ke_ftick; 1924828e7683SJohn Baldwin mtx_unlock_spin(&sched_lock); 192535e6168fSJeff Roberson 192635e6168fSJeff Roberson return (pctcpu); 192735e6168fSJeff Roberson } 192835e6168fSJeff Roberson 19299bacd788SJeff Roberson void 19309bacd788SJeff Roberson sched_bind(struct thread *td, int cpu) 19319bacd788SJeff Roberson { 19329bacd788SJeff Roberson struct kse *ke; 19339bacd788SJeff Roberson 19349bacd788SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 19359bacd788SJeff Roberson ke = td->td_kse; 19369bacd788SJeff Roberson ke->ke_flags |= KEF_BOUND; 193780f86c9fSJeff Roberson #ifdef SMP 193880f86c9fSJeff Roberson if (PCPU_GET(cpuid) == cpu) 19399bacd788SJeff Roberson return; 19409bacd788SJeff Roberson /* sched_rem without the runq_remove */ 19419bacd788SJeff Roberson ke->ke_state = KES_THREAD; 1942155b9987SJeff Roberson kseq_load_rem(KSEQ_CPU(ke->ke_cpu), ke); 19439bacd788SJeff Roberson kseq_notify(ke, cpu); 19449bacd788SJeff Roberson /* When we return from mi_switch we'll be on the correct cpu. */ 1945279f949eSPoul-Henning Kamp mi_switch(SW_VOL, NULL); 19469bacd788SJeff Roberson #endif 19479bacd788SJeff Roberson } 19489bacd788SJeff Roberson 19499bacd788SJeff Roberson void 19509bacd788SJeff Roberson sched_unbind(struct thread *td) 19519bacd788SJeff Roberson { 19529bacd788SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 19539bacd788SJeff Roberson td->td_kse->ke_flags &= ~KEF_BOUND; 19549bacd788SJeff Roberson } 19559bacd788SJeff Roberson 195635e6168fSJeff Roberson int 195733916c36SJeff Roberson sched_load(void) 195833916c36SJeff Roberson { 195933916c36SJeff Roberson #ifdef SMP 196033916c36SJeff Roberson int total; 196133916c36SJeff Roberson int i; 196233916c36SJeff Roberson 196333916c36SJeff Roberson total = 0; 196433916c36SJeff Roberson for (i = 0; i <= ksg_maxid; i++) 196533916c36SJeff Roberson total += KSEQ_GROUP(i)->ksg_load; 196633916c36SJeff Roberson return (total); 196733916c36SJeff Roberson #else 196833916c36SJeff Roberson return (KSEQ_SELF()->ksq_sysload); 196933916c36SJeff Roberson #endif 197033916c36SJeff Roberson } 197133916c36SJeff Roberson 197233916c36SJeff Roberson int 197335e6168fSJeff Roberson sched_sizeof_ksegrp(void) 197435e6168fSJeff Roberson { 197535e6168fSJeff Roberson return (sizeof(struct ksegrp) + sizeof(struct kg_sched)); 197635e6168fSJeff Roberson } 197735e6168fSJeff Roberson 197835e6168fSJeff Roberson int 197935e6168fSJeff Roberson sched_sizeof_proc(void) 198035e6168fSJeff Roberson { 198135e6168fSJeff Roberson return (sizeof(struct proc)); 198235e6168fSJeff Roberson } 198335e6168fSJeff Roberson 198435e6168fSJeff Roberson int 198535e6168fSJeff Roberson sched_sizeof_thread(void) 198635e6168fSJeff Roberson { 198735e6168fSJeff Roberson return (sizeof(struct thread) + sizeof(struct td_sched)); 198835e6168fSJeff Roberson } 1989ed062c8dSJulian Elischer #define KERN_SWITCH_INCLUDE 1 1990ed062c8dSJulian Elischer #include "kern/kern_switch.c" 1991