135e6168fSJeff Roberson /*- 29fe02f7eSJeff Roberson * Copyright (c) 2002-2005, Jeffrey Roberson <jeff@freebsd.org> 335e6168fSJeff Roberson * All rights reserved. 435e6168fSJeff Roberson * 535e6168fSJeff Roberson * Redistribution and use in source and binary forms, with or without 635e6168fSJeff Roberson * modification, are permitted provided that the following conditions 735e6168fSJeff Roberson * are met: 835e6168fSJeff Roberson * 1. Redistributions of source code must retain the above copyright 935e6168fSJeff Roberson * notice unmodified, this list of conditions, and the following 1035e6168fSJeff Roberson * disclaimer. 1135e6168fSJeff Roberson * 2. Redistributions in binary form must reproduce the above copyright 1235e6168fSJeff Roberson * notice, this list of conditions and the following disclaimer in the 1335e6168fSJeff Roberson * documentation and/or other materials provided with the distribution. 1435e6168fSJeff Roberson * 1535e6168fSJeff Roberson * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 1635e6168fSJeff Roberson * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1735e6168fSJeff Roberson * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 1835e6168fSJeff Roberson * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 1935e6168fSJeff Roberson * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2035e6168fSJeff Roberson * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2135e6168fSJeff Roberson * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2235e6168fSJeff Roberson * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2335e6168fSJeff Roberson * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2435e6168fSJeff Roberson * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2535e6168fSJeff Roberson */ 2635e6168fSJeff Roberson 27677b542eSDavid E. O'Brien #include <sys/cdefs.h> 28677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 29677b542eSDavid E. O'Brien 304da0d332SPeter Wemm #include "opt_hwpmc_hooks.h" 314da0d332SPeter Wemm #include "opt_sched.h" 329923b511SScott Long 33ed062c8dSJulian Elischer #define kse td_sched 34ed062c8dSJulian Elischer 3535e6168fSJeff Roberson #include <sys/param.h> 3635e6168fSJeff Roberson #include <sys/systm.h> 372c3490b1SMarcel Moolenaar #include <sys/kdb.h> 3835e6168fSJeff Roberson #include <sys/kernel.h> 3935e6168fSJeff Roberson #include <sys/ktr.h> 4035e6168fSJeff Roberson #include <sys/lock.h> 4135e6168fSJeff Roberson #include <sys/mutex.h> 4235e6168fSJeff Roberson #include <sys/proc.h> 43245f3abfSJeff Roberson #include <sys/resource.h> 449bacd788SJeff Roberson #include <sys/resourcevar.h> 4535e6168fSJeff Roberson #include <sys/sched.h> 4635e6168fSJeff Roberson #include <sys/smp.h> 4735e6168fSJeff Roberson #include <sys/sx.h> 4835e6168fSJeff Roberson #include <sys/sysctl.h> 4935e6168fSJeff Roberson #include <sys/sysproto.h> 50f5c157d9SJohn Baldwin #include <sys/turnstile.h> 5135e6168fSJeff Roberson #include <sys/vmmeter.h> 5235e6168fSJeff Roberson #ifdef KTRACE 5335e6168fSJeff Roberson #include <sys/uio.h> 5435e6168fSJeff Roberson #include <sys/ktrace.h> 5535e6168fSJeff Roberson #endif 5635e6168fSJeff Roberson 57ebccf1e3SJoseph Koshy #ifdef HWPMC_HOOKS 58ebccf1e3SJoseph Koshy #include <sys/pmckern.h> 59ebccf1e3SJoseph Koshy #endif 60ebccf1e3SJoseph Koshy 6135e6168fSJeff Roberson #include <machine/cpu.h> 6222bf7d9aSJeff Roberson #include <machine/smp.h> 6335e6168fSJeff Roberson 6435e6168fSJeff Roberson /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 6535e6168fSJeff Roberson /* XXX This is bogus compatability crap for ps */ 6635e6168fSJeff Roberson static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 6735e6168fSJeff Roberson SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 6835e6168fSJeff Roberson 6935e6168fSJeff Roberson static void sched_setup(void *dummy); 7035e6168fSJeff Roberson SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 7135e6168fSJeff Roberson 72e038d354SScott Long static SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0, "Scheduler"); 73e1f89c22SJeff Roberson 74e038d354SScott Long SYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "ule", 0, 75e038d354SScott Long "Scheduler name"); 76dc095794SScott Long 7715dc847eSJeff Roberson static int slice_min = 1; 7815dc847eSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, slice_min, CTLFLAG_RW, &slice_min, 0, ""); 7915dc847eSJeff Roberson 80210491d3SJeff Roberson static int slice_max = 10; 8115dc847eSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, slice_max, CTLFLAG_RW, &slice_max, 0, ""); 8215dc847eSJeff Roberson 8315dc847eSJeff Roberson int realstathz; 8415dc847eSJeff Roberson int tickincr = 1; 85783caefbSJeff Roberson 8635e6168fSJeff Roberson /* 8721381d1bSJeff Roberson * The following datastructures are allocated within their parent structure 8821381d1bSJeff Roberson * but are scheduler specific. 8921381d1bSJeff Roberson */ 9021381d1bSJeff Roberson /* 9121381d1bSJeff Roberson * The schedulable entity that can be given a context to run. A process may 9221381d1bSJeff Roberson * have several of these. 93ed062c8dSJulian Elischer */ 94ed062c8dSJulian Elischer struct kse { 95ed062c8dSJulian Elischer TAILQ_ENTRY(kse) ke_procq; /* (j/z) Run queue. */ 96ed062c8dSJulian Elischer int ke_flags; /* (j) KEF_* flags. */ 97ed062c8dSJulian Elischer struct thread *ke_thread; /* (*) Active associated thread. */ 98ed062c8dSJulian Elischer fixpt_t ke_pctcpu; /* (j) %cpu during p_swtime. */ 99ed062c8dSJulian Elischer char ke_rqindex; /* (j) Run queue index. */ 100ed062c8dSJulian Elischer enum { 101ed062c8dSJulian Elischer KES_THREAD = 0x0, /* slaved to thread state */ 102ed062c8dSJulian Elischer KES_ONRUNQ 103ed062c8dSJulian Elischer } ke_state; /* (j) thread sched specific status. */ 104ed062c8dSJulian Elischer int ke_slptime; 105ed062c8dSJulian Elischer int ke_slice; 106ed062c8dSJulian Elischer struct runq *ke_runq; 107ed062c8dSJulian Elischer u_char ke_cpu; /* CPU that we have affinity for. */ 108ed062c8dSJulian Elischer /* The following variables are only used for pctcpu calculation */ 109ed062c8dSJulian Elischer int ke_ltick; /* Last tick that we were running on */ 110ed062c8dSJulian Elischer int ke_ftick; /* First tick that we were running on */ 111ed062c8dSJulian Elischer int ke_ticks; /* Tick count */ 112ed062c8dSJulian Elischer 113ed062c8dSJulian Elischer }; 114ed062c8dSJulian Elischer #define td_kse td_sched 115ed062c8dSJulian Elischer #define td_slptime td_kse->ke_slptime 116ed062c8dSJulian Elischer #define ke_proc ke_thread->td_proc 117ed062c8dSJulian Elischer #define ke_ksegrp ke_thread->td_ksegrp 11822bf7d9aSJeff Roberson #define ke_assign ke_procq.tqe_next 11921381d1bSJeff Roberson /* flags kept in ke_flags */ 120598b368dSJeff Roberson #define KEF_ASSIGNED 0x0001 /* Thread is being migrated. */ 121598b368dSJeff Roberson #define KEF_BOUND 0x0002 /* Thread can not migrate. */ 122598b368dSJeff Roberson #define KEF_XFERABLE 0x0004 /* Thread was added as transferable. */ 123598b368dSJeff Roberson #define KEF_HOLD 0x0008 /* Thread is temporarily bound. */ 124598b368dSJeff Roberson #define KEF_REMOVED 0x0010 /* Thread was removed while ASSIGNED */ 12521381d1bSJeff Roberson #define KEF_INTERNAL 0x0020 /* Thread added due to migration. */ 12621381d1bSJeff Roberson #define KEF_DIDRUN 0x02000 /* Thread actually ran. */ 12721381d1bSJeff Roberson #define KEF_EXIT 0x04000 /* Thread is being killed. */ 12835e6168fSJeff Roberson 12935e6168fSJeff Roberson struct kg_sched { 130ed062c8dSJulian Elischer struct thread *skg_last_assigned; /* (j) Last thread assigned to */ 131ed062c8dSJulian Elischer /* the system scheduler */ 132407b0157SJeff Roberson int skg_slptime; /* Number of ticks we vol. slept */ 133407b0157SJeff Roberson int skg_runtime; /* Number of ticks we were running */ 134ed062c8dSJulian Elischer int skg_avail_opennings; /* (j) Num unfilled slots in group.*/ 135ed062c8dSJulian Elischer int skg_concurrency; /* (j) Num threads requested in group.*/ 13635e6168fSJeff Roberson }; 137ed062c8dSJulian Elischer #define kg_last_assigned kg_sched->skg_last_assigned 138ed062c8dSJulian Elischer #define kg_avail_opennings kg_sched->skg_avail_opennings 139ed062c8dSJulian Elischer #define kg_concurrency kg_sched->skg_concurrency 140407b0157SJeff Roberson #define kg_runtime kg_sched->skg_runtime 141ed062c8dSJulian Elischer #define kg_slptime kg_sched->skg_slptime 14235e6168fSJeff Roberson 14321381d1bSJeff Roberson #define SLOT_RELEASE(kg) (kg)->kg_avail_opennings++ 14421381d1bSJeff Roberson #define SLOT_USE(kg) (kg)->kg_avail_opennings-- 145d39063f2SJulian Elischer 146ed062c8dSJulian Elischer static struct kse kse0; 147ed062c8dSJulian Elischer static struct kg_sched kg_sched0; 14835e6168fSJeff Roberson 14935e6168fSJeff Roberson /* 150665cb285SJeff Roberson * The priority is primarily determined by the interactivity score. Thus, we 151665cb285SJeff Roberson * give lower(better) priorities to kse groups that use less CPU. The nice 152665cb285SJeff Roberson * value is then directly added to this to allow nice to have some effect 153665cb285SJeff Roberson * on latency. 154e1f89c22SJeff Roberson * 155e1f89c22SJeff Roberson * PRI_RANGE: Total priority range for timeshare threads. 156665cb285SJeff Roberson * PRI_NRESV: Number of nice values. 157e1f89c22SJeff Roberson * PRI_BASE: The start of the dynamic range. 15835e6168fSJeff Roberson */ 159407b0157SJeff Roberson #define SCHED_PRI_RANGE (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1) 160a0a931ceSJeff Roberson #define SCHED_PRI_NRESV ((PRIO_MAX - PRIO_MIN) + 1) 161a0a931ceSJeff Roberson #define SCHED_PRI_NHALF (SCHED_PRI_NRESV / 2) 162665cb285SJeff Roberson #define SCHED_PRI_BASE (PRI_MIN_TIMESHARE) 16315dc847eSJeff Roberson #define SCHED_PRI_INTERACT(score) \ 164665cb285SJeff Roberson ((score) * SCHED_PRI_RANGE / SCHED_INTERACT_MAX) 16535e6168fSJeff Roberson 16635e6168fSJeff Roberson /* 167e1f89c22SJeff Roberson * These determine the interactivity of a process. 16835e6168fSJeff Roberson * 169407b0157SJeff Roberson * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate 170407b0157SJeff Roberson * before throttling back. 171d322132cSJeff Roberson * SLP_RUN_FORK: Maximum slp+run time to inherit at fork time. 172210491d3SJeff Roberson * INTERACT_MAX: Maximum interactivity value. Smaller is better. 173e1f89c22SJeff Roberson * INTERACT_THRESH: Threshhold for placement on the current runq. 17435e6168fSJeff Roberson */ 1754c9612c6SJeff Roberson #define SCHED_SLP_RUN_MAX ((hz * 5) << 10) 176d322132cSJeff Roberson #define SCHED_SLP_RUN_FORK ((hz / 2) << 10) 177210491d3SJeff Roberson #define SCHED_INTERACT_MAX (100) 178210491d3SJeff Roberson #define SCHED_INTERACT_HALF (SCHED_INTERACT_MAX / 2) 1794c9612c6SJeff Roberson #define SCHED_INTERACT_THRESH (30) 180e1f89c22SJeff Roberson 18135e6168fSJeff Roberson /* 18235e6168fSJeff Roberson * These parameters and macros determine the size of the time slice that is 18335e6168fSJeff Roberson * granted to each thread. 18435e6168fSJeff Roberson * 18535e6168fSJeff Roberson * SLICE_MIN: Minimum time slice granted, in units of ticks. 18635e6168fSJeff Roberson * SLICE_MAX: Maximum time slice granted. 18735e6168fSJeff Roberson * SLICE_RANGE: Range of available time slices scaled by hz. 188245f3abfSJeff Roberson * SLICE_SCALE: The number slices granted per val in the range of [0, max]. 189245f3abfSJeff Roberson * SLICE_NICE: Determine the amount of slice granted to a scaled nice. 1907d1a81b4SJeff Roberson * SLICE_NTHRESH: The nice cutoff point for slice assignment. 19135e6168fSJeff Roberson */ 19215dc847eSJeff Roberson #define SCHED_SLICE_MIN (slice_min) 19315dc847eSJeff Roberson #define SCHED_SLICE_MAX (slice_max) 1940392e39dSJeff Roberson #define SCHED_SLICE_INTERACTIVE (slice_max) 1957d1a81b4SJeff Roberson #define SCHED_SLICE_NTHRESH (SCHED_PRI_NHALF - 1) 19635e6168fSJeff Roberson #define SCHED_SLICE_RANGE (SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1) 19735e6168fSJeff Roberson #define SCHED_SLICE_SCALE(val, max) (((val) * SCHED_SLICE_RANGE) / (max)) 198245f3abfSJeff Roberson #define SCHED_SLICE_NICE(nice) \ 1997d1a81b4SJeff Roberson (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((nice), SCHED_SLICE_NTHRESH)) 20035e6168fSJeff Roberson 20135e6168fSJeff Roberson /* 202ed062c8dSJulian Elischer * This macro determines whether or not the thread belongs on the current or 20335e6168fSJeff Roberson * next run queue. 20435e6168fSJeff Roberson */ 20515dc847eSJeff Roberson #define SCHED_INTERACTIVE(kg) \ 20615dc847eSJeff Roberson (sched_interact_score(kg) < SCHED_INTERACT_THRESH) 207a5f099d0SJeff Roberson #define SCHED_CURR(kg, ke) \ 208f5c157d9SJohn Baldwin ((ke->ke_thread->td_flags & TDF_BORROWING) || SCHED_INTERACTIVE(kg)) 20935e6168fSJeff Roberson 21035e6168fSJeff Roberson /* 21135e6168fSJeff Roberson * Cpu percentage computation macros and defines. 21235e6168fSJeff Roberson * 21335e6168fSJeff Roberson * SCHED_CPU_TIME: Number of seconds to average the cpu usage across. 21435e6168fSJeff Roberson * SCHED_CPU_TICKS: Number of hz ticks to average the cpu usage across. 21535e6168fSJeff Roberson */ 21635e6168fSJeff Roberson 2175053d272SJeff Roberson #define SCHED_CPU_TIME 10 21835e6168fSJeff Roberson #define SCHED_CPU_TICKS (hz * SCHED_CPU_TIME) 21935e6168fSJeff Roberson 22035e6168fSJeff Roberson /* 22115dc847eSJeff Roberson * kseq - per processor runqs and statistics. 22235e6168fSJeff Roberson */ 22335e6168fSJeff Roberson struct kseq { 224a8949de2SJeff Roberson struct runq ksq_idle; /* Queue of IDLE threads. */ 22515dc847eSJeff Roberson struct runq ksq_timeshare[2]; /* Run queues for !IDLE. */ 22615dc847eSJeff Roberson struct runq *ksq_next; /* Next timeshare queue. */ 22715dc847eSJeff Roberson struct runq *ksq_curr; /* Current queue. */ 228ef1134c9SJeff Roberson int ksq_load_timeshare; /* Load for timeshare. */ 22915dc847eSJeff Roberson int ksq_load; /* Aggregate load. */ 230a0a931ceSJeff Roberson short ksq_nice[SCHED_PRI_NRESV]; /* KSEs in each nice bin. */ 23115dc847eSJeff Roberson short ksq_nicemin; /* Least nice. */ 2325d7ef00cSJeff Roberson #ifdef SMP 23380f86c9fSJeff Roberson int ksq_transferable; 23480f86c9fSJeff Roberson LIST_ENTRY(kseq) ksq_siblings; /* Next in kseq group. */ 23580f86c9fSJeff Roberson struct kseq_group *ksq_group; /* Our processor group. */ 236fa9c9717SJeff Roberson volatile struct kse *ksq_assigned; /* assigned by another CPU. */ 23733916c36SJeff Roberson #else 23833916c36SJeff Roberson int ksq_sysload; /* For loadavg, !ITHD load. */ 2395d7ef00cSJeff Roberson #endif 24035e6168fSJeff Roberson }; 24135e6168fSJeff Roberson 24280f86c9fSJeff Roberson #ifdef SMP 24380f86c9fSJeff Roberson /* 24480f86c9fSJeff Roberson * kseq groups are groups of processors which can cheaply share threads. When 24580f86c9fSJeff Roberson * one processor in the group goes idle it will check the runqs of the other 24680f86c9fSJeff Roberson * processors in its group prior to halting and waiting for an interrupt. 24780f86c9fSJeff Roberson * These groups are suitable for SMT (Symetric Multi-Threading) and not NUMA. 24880f86c9fSJeff Roberson * In a numa environment we'd want an idle bitmap per group and a two tiered 24980f86c9fSJeff Roberson * load balancer. 25080f86c9fSJeff Roberson */ 25180f86c9fSJeff Roberson struct kseq_group { 25280f86c9fSJeff Roberson int ksg_cpus; /* Count of CPUs in this kseq group. */ 253b2ae7ed7SMarcel Moolenaar cpumask_t ksg_cpumask; /* Mask of cpus in this group. */ 254b2ae7ed7SMarcel Moolenaar cpumask_t ksg_idlemask; /* Idle cpus in this group. */ 255b2ae7ed7SMarcel Moolenaar cpumask_t ksg_mask; /* Bit mask for first cpu. */ 256cac77d04SJeff Roberson int ksg_load; /* Total load of this group. */ 25780f86c9fSJeff Roberson int ksg_transferable; /* Transferable load of this group. */ 25880f86c9fSJeff Roberson LIST_HEAD(, kseq) ksg_members; /* Linked list of all members. */ 25980f86c9fSJeff Roberson }; 26080f86c9fSJeff Roberson #endif 26180f86c9fSJeff Roberson 26235e6168fSJeff Roberson /* 26335e6168fSJeff Roberson * One kse queue per processor. 26435e6168fSJeff Roberson */ 2650a016a05SJeff Roberson #ifdef SMP 266b2ae7ed7SMarcel Moolenaar static cpumask_t kseq_idle; 267cac77d04SJeff Roberson static int ksg_maxid; 26822bf7d9aSJeff Roberson static struct kseq kseq_cpu[MAXCPU]; 26980f86c9fSJeff Roberson static struct kseq_group kseq_groups[MAXCPU]; 270dc03363dSJeff Roberson static int bal_tick; 271dc03363dSJeff Roberson static int gbal_tick; 272598b368dSJeff Roberson static int balance_groups; 273dc03363dSJeff Roberson 27480f86c9fSJeff Roberson #define KSEQ_SELF() (&kseq_cpu[PCPU_GET(cpuid)]) 27580f86c9fSJeff Roberson #define KSEQ_CPU(x) (&kseq_cpu[(x)]) 276cac77d04SJeff Roberson #define KSEQ_ID(x) ((x) - kseq_cpu) 277cac77d04SJeff Roberson #define KSEQ_GROUP(x) (&kseq_groups[(x)]) 27880f86c9fSJeff Roberson #else /* !SMP */ 27922bf7d9aSJeff Roberson static struct kseq kseq_cpu; 280dc03363dSJeff Roberson 2810a016a05SJeff Roberson #define KSEQ_SELF() (&kseq_cpu) 2820a016a05SJeff Roberson #define KSEQ_CPU(x) (&kseq_cpu) 2830a016a05SJeff Roberson #endif 28435e6168fSJeff Roberson 28521381d1bSJeff Roberson static void slot_fill(struct ksegrp *); 286ed062c8dSJulian Elischer static struct kse *sched_choose(void); /* XXX Should be thread * */ 28721381d1bSJeff Roberson static void sched_slice(struct kse *); 28821381d1bSJeff Roberson static void sched_priority(struct ksegrp *); 28921381d1bSJeff Roberson static void sched_thread_priority(struct thread *, u_char); 29021381d1bSJeff Roberson static int sched_interact_score(struct ksegrp *); 29121381d1bSJeff Roberson static void sched_interact_update(struct ksegrp *); 29221381d1bSJeff Roberson static void sched_interact_fork(struct ksegrp *); 29321381d1bSJeff Roberson static void sched_pctcpu_update(struct kse *); 29435e6168fSJeff Roberson 2955d7ef00cSJeff Roberson /* Operations on per processor queues */ 29621381d1bSJeff Roberson static struct kse * kseq_choose(struct kseq *); 29721381d1bSJeff Roberson static void kseq_setup(struct kseq *); 29821381d1bSJeff Roberson static void kseq_load_add(struct kseq *, struct kse *); 29921381d1bSJeff Roberson static void kseq_load_rem(struct kseq *, struct kse *); 30021381d1bSJeff Roberson static __inline void kseq_runq_add(struct kseq *, struct kse *, int); 30121381d1bSJeff Roberson static __inline void kseq_runq_rem(struct kseq *, struct kse *); 30221381d1bSJeff Roberson static void kseq_nice_add(struct kseq *, int); 30321381d1bSJeff Roberson static void kseq_nice_rem(struct kseq *, int); 3047cd650a9SJeff Roberson void kseq_print(int cpu); 3055d7ef00cSJeff Roberson #ifdef SMP 30621381d1bSJeff Roberson static int kseq_transfer(struct kseq *, struct kse *, int); 30721381d1bSJeff Roberson static struct kse *runq_steal(struct runq *); 308dc03363dSJeff Roberson static void sched_balance(void); 309dc03363dSJeff Roberson static void sched_balance_groups(void); 31021381d1bSJeff Roberson static void sched_balance_group(struct kseq_group *); 31121381d1bSJeff Roberson static void sched_balance_pair(struct kseq *, struct kseq *); 31221381d1bSJeff Roberson static void kseq_move(struct kseq *, int); 31321381d1bSJeff Roberson static int kseq_idled(struct kseq *); 31421381d1bSJeff Roberson static void kseq_notify(struct kse *, int); 31522bf7d9aSJeff Roberson static void kseq_assign(struct kseq *); 31621381d1bSJeff Roberson static struct kse *kseq_steal(struct kseq *, int); 317598b368dSJeff Roberson #define KSE_CAN_MIGRATE(ke) \ 3181e7fad6bSScott Long ((ke)->ke_thread->td_pinned == 0 && ((ke)->ke_flags & KEF_BOUND) == 0) 3195d7ef00cSJeff Roberson #endif 3205d7ef00cSJeff Roberson 32115dc847eSJeff Roberson void 3227cd650a9SJeff Roberson kseq_print(int cpu) 32315dc847eSJeff Roberson { 3247cd650a9SJeff Roberson struct kseq *kseq; 32515dc847eSJeff Roberson int i; 32615dc847eSJeff Roberson 3277cd650a9SJeff Roberson kseq = KSEQ_CPU(cpu); 32815dc847eSJeff Roberson 32915dc847eSJeff Roberson printf("kseq:\n"); 33015dc847eSJeff Roberson printf("\tload: %d\n", kseq->ksq_load); 331155b9987SJeff Roberson printf("\tload TIMESHARE: %d\n", kseq->ksq_load_timeshare); 332ef1134c9SJeff Roberson #ifdef SMP 33380f86c9fSJeff Roberson printf("\tload transferable: %d\n", kseq->ksq_transferable); 334ef1134c9SJeff Roberson #endif 33515dc847eSJeff Roberson printf("\tnicemin:\t%d\n", kseq->ksq_nicemin); 33615dc847eSJeff Roberson printf("\tnice counts:\n"); 337a0a931ceSJeff Roberson for (i = 0; i < SCHED_PRI_NRESV; i++) 33815dc847eSJeff Roberson if (kseq->ksq_nice[i]) 33915dc847eSJeff Roberson printf("\t\t%d = %d\n", 34015dc847eSJeff Roberson i - SCHED_PRI_NHALF, kseq->ksq_nice[i]); 34115dc847eSJeff Roberson } 34215dc847eSJeff Roberson 343155b9987SJeff Roberson static __inline void 344598b368dSJeff Roberson kseq_runq_add(struct kseq *kseq, struct kse *ke, int flags) 345155b9987SJeff Roberson { 346155b9987SJeff Roberson #ifdef SMP 347598b368dSJeff Roberson if (KSE_CAN_MIGRATE(ke)) { 34880f86c9fSJeff Roberson kseq->ksq_transferable++; 34980f86c9fSJeff Roberson kseq->ksq_group->ksg_transferable++; 3502454aaf5SJeff Roberson ke->ke_flags |= KEF_XFERABLE; 35180f86c9fSJeff Roberson } 352155b9987SJeff Roberson #endif 353598b368dSJeff Roberson runq_add(ke->ke_runq, ke, flags); 354155b9987SJeff Roberson } 355155b9987SJeff Roberson 356155b9987SJeff Roberson static __inline void 357155b9987SJeff Roberson kseq_runq_rem(struct kseq *kseq, struct kse *ke) 358155b9987SJeff Roberson { 359155b9987SJeff Roberson #ifdef SMP 3602454aaf5SJeff Roberson if (ke->ke_flags & KEF_XFERABLE) { 36180f86c9fSJeff Roberson kseq->ksq_transferable--; 36280f86c9fSJeff Roberson kseq->ksq_group->ksg_transferable--; 3632454aaf5SJeff Roberson ke->ke_flags &= ~KEF_XFERABLE; 36480f86c9fSJeff Roberson } 365155b9987SJeff Roberson #endif 366155b9987SJeff Roberson runq_remove(ke->ke_runq, ke); 367155b9987SJeff Roberson } 368155b9987SJeff Roberson 369a8949de2SJeff Roberson static void 370155b9987SJeff Roberson kseq_load_add(struct kseq *kseq, struct kse *ke) 3715d7ef00cSJeff Roberson { 372ef1134c9SJeff Roberson int class; 373b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 374ef1134c9SJeff Roberson class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); 375ef1134c9SJeff Roberson if (class == PRI_TIMESHARE) 376ef1134c9SJeff Roberson kseq->ksq_load_timeshare++; 37715dc847eSJeff Roberson kseq->ksq_load++; 37881d47d3fSJeff Roberson CTR1(KTR_SCHED, "load: %d", kseq->ksq_load); 379207a6c0dSDavid E. O'Brien if (class != PRI_ITHD && (ke->ke_proc->p_flag & P_NOLOAD) == 0) 38033916c36SJeff Roberson #ifdef SMP 381cac77d04SJeff Roberson kseq->ksq_group->ksg_load++; 38233916c36SJeff Roberson #else 38333916c36SJeff Roberson kseq->ksq_sysload++; 384cac77d04SJeff Roberson #endif 38515dc847eSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 386fa885116SJulian Elischer kseq_nice_add(kseq, ke->ke_proc->p_nice); 3875d7ef00cSJeff Roberson } 38815dc847eSJeff Roberson 389a8949de2SJeff Roberson static void 390155b9987SJeff Roberson kseq_load_rem(struct kseq *kseq, struct kse *ke) 3915d7ef00cSJeff Roberson { 392ef1134c9SJeff Roberson int class; 393b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 394ef1134c9SJeff Roberson class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); 395ef1134c9SJeff Roberson if (class == PRI_TIMESHARE) 396ef1134c9SJeff Roberson kseq->ksq_load_timeshare--; 397207a6c0dSDavid E. O'Brien if (class != PRI_ITHD && (ke->ke_proc->p_flag & P_NOLOAD) == 0) 39833916c36SJeff Roberson #ifdef SMP 399cac77d04SJeff Roberson kseq->ksq_group->ksg_load--; 40033916c36SJeff Roberson #else 40133916c36SJeff Roberson kseq->ksq_sysload--; 402cac77d04SJeff Roberson #endif 40315dc847eSJeff Roberson kseq->ksq_load--; 40481d47d3fSJeff Roberson CTR1(KTR_SCHED, "load: %d", kseq->ksq_load); 40515dc847eSJeff Roberson ke->ke_runq = NULL; 40615dc847eSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 407fa885116SJulian Elischer kseq_nice_rem(kseq, ke->ke_proc->p_nice); 4085d7ef00cSJeff Roberson } 4095d7ef00cSJeff Roberson 41015dc847eSJeff Roberson static void 41115dc847eSJeff Roberson kseq_nice_add(struct kseq *kseq, int nice) 41215dc847eSJeff Roberson { 413b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 41415dc847eSJeff Roberson /* Normalize to zero. */ 41515dc847eSJeff Roberson kseq->ksq_nice[nice + SCHED_PRI_NHALF]++; 416ef1134c9SJeff Roberson if (nice < kseq->ksq_nicemin || kseq->ksq_load_timeshare == 1) 41715dc847eSJeff Roberson kseq->ksq_nicemin = nice; 41815dc847eSJeff Roberson } 41915dc847eSJeff Roberson 42015dc847eSJeff Roberson static void 42115dc847eSJeff Roberson kseq_nice_rem(struct kseq *kseq, int nice) 42215dc847eSJeff Roberson { 42315dc847eSJeff Roberson int n; 42415dc847eSJeff Roberson 425b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 42615dc847eSJeff Roberson /* Normalize to zero. */ 42715dc847eSJeff Roberson n = nice + SCHED_PRI_NHALF; 42815dc847eSJeff Roberson kseq->ksq_nice[n]--; 42915dc847eSJeff Roberson KASSERT(kseq->ksq_nice[n] >= 0, ("Negative nice count.")); 43015dc847eSJeff Roberson 43115dc847eSJeff Roberson /* 43215dc847eSJeff Roberson * If this wasn't the smallest nice value or there are more in 43315dc847eSJeff Roberson * this bucket we can just return. Otherwise we have to recalculate 43415dc847eSJeff Roberson * the smallest nice. 43515dc847eSJeff Roberson */ 43615dc847eSJeff Roberson if (nice != kseq->ksq_nicemin || 43715dc847eSJeff Roberson kseq->ksq_nice[n] != 0 || 438ef1134c9SJeff Roberson kseq->ksq_load_timeshare == 0) 43915dc847eSJeff Roberson return; 44015dc847eSJeff Roberson 441a0a931ceSJeff Roberson for (; n < SCHED_PRI_NRESV; n++) 44215dc847eSJeff Roberson if (kseq->ksq_nice[n]) { 44315dc847eSJeff Roberson kseq->ksq_nicemin = n - SCHED_PRI_NHALF; 44415dc847eSJeff Roberson return; 44515dc847eSJeff Roberson } 44615dc847eSJeff Roberson } 44715dc847eSJeff Roberson 4485d7ef00cSJeff Roberson #ifdef SMP 449356500a3SJeff Roberson /* 450155b9987SJeff Roberson * sched_balance is a simple CPU load balancing algorithm. It operates by 451356500a3SJeff Roberson * finding the least loaded and most loaded cpu and equalizing their load 452356500a3SJeff Roberson * by migrating some processes. 453356500a3SJeff Roberson * 454356500a3SJeff Roberson * Dealing only with two CPUs at a time has two advantages. Firstly, most 455356500a3SJeff Roberson * installations will only have 2 cpus. Secondly, load balancing too much at 456356500a3SJeff Roberson * once can have an unpleasant effect on the system. The scheduler rarely has 457356500a3SJeff Roberson * enough information to make perfect decisions. So this algorithm chooses 458356500a3SJeff Roberson * algorithm simplicity and more gradual effects on load in larger systems. 459356500a3SJeff Roberson * 460356500a3SJeff Roberson * It could be improved by considering the priorities and slices assigned to 461356500a3SJeff Roberson * each task prior to balancing them. There are many pathological cases with 462356500a3SJeff Roberson * any approach and so the semi random algorithm below may work as well as any. 463356500a3SJeff Roberson * 464356500a3SJeff Roberson */ 46522bf7d9aSJeff Roberson static void 466dc03363dSJeff Roberson sched_balance(void) 467356500a3SJeff Roberson { 468cac77d04SJeff Roberson struct kseq_group *high; 469cac77d04SJeff Roberson struct kseq_group *low; 470cac77d04SJeff Roberson struct kseq_group *ksg; 471cac77d04SJeff Roberson int cnt; 472356500a3SJeff Roberson int i; 473356500a3SJeff Roberson 474598b368dSJeff Roberson bal_tick = ticks + (random() % (hz * 2)); 47586f8ae96SJeff Roberson if (smp_started == 0) 476598b368dSJeff Roberson return; 477cac77d04SJeff Roberson low = high = NULL; 478cac77d04SJeff Roberson i = random() % (ksg_maxid + 1); 479cac77d04SJeff Roberson for (cnt = 0; cnt <= ksg_maxid; cnt++) { 480cac77d04SJeff Roberson ksg = KSEQ_GROUP(i); 481cac77d04SJeff Roberson /* 482cac77d04SJeff Roberson * Find the CPU with the highest load that has some 483cac77d04SJeff Roberson * threads to transfer. 484cac77d04SJeff Roberson */ 485cac77d04SJeff Roberson if ((high == NULL || ksg->ksg_load > high->ksg_load) 486cac77d04SJeff Roberson && ksg->ksg_transferable) 487cac77d04SJeff Roberson high = ksg; 488cac77d04SJeff Roberson if (low == NULL || ksg->ksg_load < low->ksg_load) 489cac77d04SJeff Roberson low = ksg; 490cac77d04SJeff Roberson if (++i > ksg_maxid) 491cac77d04SJeff Roberson i = 0; 492cac77d04SJeff Roberson } 493cac77d04SJeff Roberson if (low != NULL && high != NULL && high != low) 494cac77d04SJeff Roberson sched_balance_pair(LIST_FIRST(&high->ksg_members), 495cac77d04SJeff Roberson LIST_FIRST(&low->ksg_members)); 496cac77d04SJeff Roberson } 49786f8ae96SJeff Roberson 498cac77d04SJeff Roberson static void 499dc03363dSJeff Roberson sched_balance_groups(void) 500cac77d04SJeff Roberson { 501cac77d04SJeff Roberson int i; 502cac77d04SJeff Roberson 503598b368dSJeff Roberson gbal_tick = ticks + (random() % (hz * 2)); 504dc03363dSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 505cac77d04SJeff Roberson if (smp_started) 506cac77d04SJeff Roberson for (i = 0; i <= ksg_maxid; i++) 507cac77d04SJeff Roberson sched_balance_group(KSEQ_GROUP(i)); 508356500a3SJeff Roberson } 509cac77d04SJeff Roberson 510cac77d04SJeff Roberson static void 511cac77d04SJeff Roberson sched_balance_group(struct kseq_group *ksg) 512cac77d04SJeff Roberson { 513cac77d04SJeff Roberson struct kseq *kseq; 514cac77d04SJeff Roberson struct kseq *high; 515cac77d04SJeff Roberson struct kseq *low; 516cac77d04SJeff Roberson int load; 517cac77d04SJeff Roberson 518cac77d04SJeff Roberson if (ksg->ksg_transferable == 0) 519cac77d04SJeff Roberson return; 520cac77d04SJeff Roberson low = NULL; 521cac77d04SJeff Roberson high = NULL; 522cac77d04SJeff Roberson LIST_FOREACH(kseq, &ksg->ksg_members, ksq_siblings) { 523cac77d04SJeff Roberson load = kseq->ksq_load; 524cac77d04SJeff Roberson if (high == NULL || load > high->ksq_load) 525cac77d04SJeff Roberson high = kseq; 526cac77d04SJeff Roberson if (low == NULL || load < low->ksq_load) 527cac77d04SJeff Roberson low = kseq; 528356500a3SJeff Roberson } 529cac77d04SJeff Roberson if (high != NULL && low != NULL && high != low) 530cac77d04SJeff Roberson sched_balance_pair(high, low); 531356500a3SJeff Roberson } 532cac77d04SJeff Roberson 533cac77d04SJeff Roberson static void 534cac77d04SJeff Roberson sched_balance_pair(struct kseq *high, struct kseq *low) 535cac77d04SJeff Roberson { 536cac77d04SJeff Roberson int transferable; 537cac77d04SJeff Roberson int high_load; 538cac77d04SJeff Roberson int low_load; 539cac77d04SJeff Roberson int move; 540cac77d04SJeff Roberson int diff; 541cac77d04SJeff Roberson int i; 542cac77d04SJeff Roberson 54380f86c9fSJeff Roberson /* 54480f86c9fSJeff Roberson * If we're transfering within a group we have to use this specific 54580f86c9fSJeff Roberson * kseq's transferable count, otherwise we can steal from other members 54680f86c9fSJeff Roberson * of the group. 54780f86c9fSJeff Roberson */ 548cac77d04SJeff Roberson if (high->ksq_group == low->ksq_group) { 549cac77d04SJeff Roberson transferable = high->ksq_transferable; 550cac77d04SJeff Roberson high_load = high->ksq_load; 551cac77d04SJeff Roberson low_load = low->ksq_load; 552cac77d04SJeff Roberson } else { 553cac77d04SJeff Roberson transferable = high->ksq_group->ksg_transferable; 554cac77d04SJeff Roberson high_load = high->ksq_group->ksg_load; 555cac77d04SJeff Roberson low_load = low->ksq_group->ksg_load; 556cac77d04SJeff Roberson } 55780f86c9fSJeff Roberson if (transferable == 0) 558cac77d04SJeff Roberson return; 559155b9987SJeff Roberson /* 560155b9987SJeff Roberson * Determine what the imbalance is and then adjust that to how many 56180f86c9fSJeff Roberson * kses we actually have to give up (transferable). 562155b9987SJeff Roberson */ 563cac77d04SJeff Roberson diff = high_load - low_load; 564356500a3SJeff Roberson move = diff / 2; 565356500a3SJeff Roberson if (diff & 0x1) 566356500a3SJeff Roberson move++; 56780f86c9fSJeff Roberson move = min(move, transferable); 568356500a3SJeff Roberson for (i = 0; i < move; i++) 569cac77d04SJeff Roberson kseq_move(high, KSEQ_ID(low)); 570356500a3SJeff Roberson return; 571356500a3SJeff Roberson } 572356500a3SJeff Roberson 57322bf7d9aSJeff Roberson static void 574356500a3SJeff Roberson kseq_move(struct kseq *from, int cpu) 575356500a3SJeff Roberson { 57680f86c9fSJeff Roberson struct kseq *kseq; 57780f86c9fSJeff Roberson struct kseq *to; 578356500a3SJeff Roberson struct kse *ke; 579356500a3SJeff Roberson 58080f86c9fSJeff Roberson kseq = from; 58180f86c9fSJeff Roberson to = KSEQ_CPU(cpu); 58280f86c9fSJeff Roberson ke = kseq_steal(kseq, 1); 58380f86c9fSJeff Roberson if (ke == NULL) { 58480f86c9fSJeff Roberson struct kseq_group *ksg; 58580f86c9fSJeff Roberson 58680f86c9fSJeff Roberson ksg = kseq->ksq_group; 58780f86c9fSJeff Roberson LIST_FOREACH(kseq, &ksg->ksg_members, ksq_siblings) { 58880f86c9fSJeff Roberson if (kseq == from || kseq->ksq_transferable == 0) 58980f86c9fSJeff Roberson continue; 59080f86c9fSJeff Roberson ke = kseq_steal(kseq, 1); 59180f86c9fSJeff Roberson break; 59280f86c9fSJeff Roberson } 59380f86c9fSJeff Roberson if (ke == NULL) 59480f86c9fSJeff Roberson panic("kseq_move: No KSEs available with a " 59580f86c9fSJeff Roberson "transferable count of %d\n", 59680f86c9fSJeff Roberson ksg->ksg_transferable); 59780f86c9fSJeff Roberson } 59880f86c9fSJeff Roberson if (kseq == to) 59980f86c9fSJeff Roberson return; 600356500a3SJeff Roberson ke->ke_state = KES_THREAD; 60180f86c9fSJeff Roberson kseq_runq_rem(kseq, ke); 60280f86c9fSJeff Roberson kseq_load_rem(kseq, ke); 603112b6d3aSJeff Roberson kseq_notify(ke, cpu); 604356500a3SJeff Roberson } 60522bf7d9aSJeff Roberson 60680f86c9fSJeff Roberson static int 60780f86c9fSJeff Roberson kseq_idled(struct kseq *kseq) 60822bf7d9aSJeff Roberson { 60980f86c9fSJeff Roberson struct kseq_group *ksg; 61080f86c9fSJeff Roberson struct kseq *steal; 61180f86c9fSJeff Roberson struct kse *ke; 61280f86c9fSJeff Roberson 61380f86c9fSJeff Roberson ksg = kseq->ksq_group; 61480f86c9fSJeff Roberson /* 61580f86c9fSJeff Roberson * If we're in a cpu group, try and steal kses from another cpu in 61680f86c9fSJeff Roberson * the group before idling. 61780f86c9fSJeff Roberson */ 61880f86c9fSJeff Roberson if (ksg->ksg_cpus > 1 && ksg->ksg_transferable) { 61980f86c9fSJeff Roberson LIST_FOREACH(steal, &ksg->ksg_members, ksq_siblings) { 62080f86c9fSJeff Roberson if (steal == kseq || steal->ksq_transferable == 0) 62180f86c9fSJeff Roberson continue; 62280f86c9fSJeff Roberson ke = kseq_steal(steal, 0); 62380f86c9fSJeff Roberson if (ke == NULL) 62480f86c9fSJeff Roberson continue; 62580f86c9fSJeff Roberson ke->ke_state = KES_THREAD; 62680f86c9fSJeff Roberson kseq_runq_rem(steal, ke); 62780f86c9fSJeff Roberson kseq_load_rem(steal, ke); 62880f86c9fSJeff Roberson ke->ke_cpu = PCPU_GET(cpuid); 629598b368dSJeff Roberson ke->ke_flags |= KEF_INTERNAL | KEF_HOLD; 630598b368dSJeff Roberson sched_add(ke->ke_thread, SRQ_YIELDING); 63180f86c9fSJeff Roberson return (0); 63280f86c9fSJeff Roberson } 63380f86c9fSJeff Roberson } 63480f86c9fSJeff Roberson /* 63580f86c9fSJeff Roberson * We only set the idled bit when all of the cpus in the group are 63680f86c9fSJeff Roberson * idle. Otherwise we could get into a situation where a KSE bounces 63780f86c9fSJeff Roberson * back and forth between two idle cores on seperate physical CPUs. 63880f86c9fSJeff Roberson */ 63980f86c9fSJeff Roberson ksg->ksg_idlemask |= PCPU_GET(cpumask); 64080f86c9fSJeff Roberson if (ksg->ksg_idlemask != ksg->ksg_cpumask) 64180f86c9fSJeff Roberson return (1); 64280f86c9fSJeff Roberson atomic_set_int(&kseq_idle, ksg->ksg_mask); 64380f86c9fSJeff Roberson return (1); 64422bf7d9aSJeff Roberson } 64522bf7d9aSJeff Roberson 64622bf7d9aSJeff Roberson static void 64722bf7d9aSJeff Roberson kseq_assign(struct kseq *kseq) 64822bf7d9aSJeff Roberson { 64922bf7d9aSJeff Roberson struct kse *nke; 65022bf7d9aSJeff Roberson struct kse *ke; 65122bf7d9aSJeff Roberson 65222bf7d9aSJeff Roberson do { 65300fbcda8SAlexander Kabaev *(volatile struct kse **)&ke = kseq->ksq_assigned; 65422bf7d9aSJeff Roberson } while(!atomic_cmpset_ptr(&kseq->ksq_assigned, ke, NULL)); 65522bf7d9aSJeff Roberson for (; ke != NULL; ke = nke) { 65622bf7d9aSJeff Roberson nke = ke->ke_assign; 657598b368dSJeff Roberson kseq->ksq_group->ksg_load--; 658598b368dSJeff Roberson kseq->ksq_load--; 65922bf7d9aSJeff Roberson ke->ke_flags &= ~KEF_ASSIGNED; 660598b368dSJeff Roberson ke->ke_flags |= KEF_INTERNAL | KEF_HOLD; 661598b368dSJeff Roberson sched_add(ke->ke_thread, SRQ_YIELDING); 66222bf7d9aSJeff Roberson } 66322bf7d9aSJeff Roberson } 66422bf7d9aSJeff Roberson 66522bf7d9aSJeff Roberson static void 66622bf7d9aSJeff Roberson kseq_notify(struct kse *ke, int cpu) 66722bf7d9aSJeff Roberson { 66822bf7d9aSJeff Roberson struct kseq *kseq; 66922bf7d9aSJeff Roberson struct thread *td; 67022bf7d9aSJeff Roberson struct pcpu *pcpu; 671598b368dSJeff Roberson int class; 6722454aaf5SJeff Roberson int prio; 67322bf7d9aSJeff Roberson 674598b368dSJeff Roberson kseq = KSEQ_CPU(cpu); 675598b368dSJeff Roberson /* XXX */ 676598b368dSJeff Roberson class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); 677598b368dSJeff Roberson if ((class == PRI_TIMESHARE || class == PRI_REALTIME) && 678598b368dSJeff Roberson (kseq_idle & kseq->ksq_group->ksg_mask)) 679598b368dSJeff Roberson atomic_clear_int(&kseq_idle, kseq->ksq_group->ksg_mask); 680598b368dSJeff Roberson kseq->ksq_group->ksg_load++; 681598b368dSJeff Roberson kseq->ksq_load++; 68286e1c22aSJeff Roberson ke->ke_cpu = cpu; 68322bf7d9aSJeff Roberson ke->ke_flags |= KEF_ASSIGNED; 6842454aaf5SJeff Roberson prio = ke->ke_thread->td_priority; 68522bf7d9aSJeff Roberson 6860c0a98b2SJeff Roberson /* 68722bf7d9aSJeff Roberson * Place a KSE on another cpu's queue and force a resched. 68822bf7d9aSJeff Roberson */ 68922bf7d9aSJeff Roberson do { 69000fbcda8SAlexander Kabaev *(volatile struct kse **)&ke->ke_assign = kseq->ksq_assigned; 69122bf7d9aSJeff Roberson } while(!atomic_cmpset_ptr(&kseq->ksq_assigned, ke->ke_assign, ke)); 6922454aaf5SJeff Roberson /* 6932454aaf5SJeff Roberson * Without sched_lock we could lose a race where we set NEEDRESCHED 6942454aaf5SJeff Roberson * on a thread that is switched out before the IPI is delivered. This 6952454aaf5SJeff Roberson * would lead us to miss the resched. This will be a problem once 6962454aaf5SJeff Roberson * sched_lock is pushed down. 6972454aaf5SJeff Roberson */ 69822bf7d9aSJeff Roberson pcpu = pcpu_find(cpu); 69922bf7d9aSJeff Roberson td = pcpu->pc_curthread; 70022bf7d9aSJeff Roberson if (ke->ke_thread->td_priority < td->td_priority || 70122bf7d9aSJeff Roberson td == pcpu->pc_idlethread) { 70222bf7d9aSJeff Roberson td->td_flags |= TDF_NEEDRESCHED; 70322bf7d9aSJeff Roberson ipi_selected(1 << cpu, IPI_AST); 70422bf7d9aSJeff Roberson } 70522bf7d9aSJeff Roberson } 70622bf7d9aSJeff Roberson 70722bf7d9aSJeff Roberson static struct kse * 70822bf7d9aSJeff Roberson runq_steal(struct runq *rq) 70922bf7d9aSJeff Roberson { 71022bf7d9aSJeff Roberson struct rqhead *rqh; 71122bf7d9aSJeff Roberson struct rqbits *rqb; 71222bf7d9aSJeff Roberson struct kse *ke; 71322bf7d9aSJeff Roberson int word; 71422bf7d9aSJeff Roberson int bit; 71522bf7d9aSJeff Roberson 71622bf7d9aSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 71722bf7d9aSJeff Roberson rqb = &rq->rq_status; 71822bf7d9aSJeff Roberson for (word = 0; word < RQB_LEN; word++) { 71922bf7d9aSJeff Roberson if (rqb->rqb_bits[word] == 0) 72022bf7d9aSJeff Roberson continue; 72122bf7d9aSJeff Roberson for (bit = 0; bit < RQB_BPW; bit++) { 722a2640c9bSPeter Wemm if ((rqb->rqb_bits[word] & (1ul << bit)) == 0) 72322bf7d9aSJeff Roberson continue; 72422bf7d9aSJeff Roberson rqh = &rq->rq_queues[bit + (word << RQB_L2BPW)]; 72522bf7d9aSJeff Roberson TAILQ_FOREACH(ke, rqh, ke_procq) { 726598b368dSJeff Roberson if (KSE_CAN_MIGRATE(ke)) 72722bf7d9aSJeff Roberson return (ke); 72822bf7d9aSJeff Roberson } 72922bf7d9aSJeff Roberson } 73022bf7d9aSJeff Roberson } 73122bf7d9aSJeff Roberson return (NULL); 73222bf7d9aSJeff Roberson } 73322bf7d9aSJeff Roberson 73422bf7d9aSJeff Roberson static struct kse * 73580f86c9fSJeff Roberson kseq_steal(struct kseq *kseq, int stealidle) 73622bf7d9aSJeff Roberson { 73722bf7d9aSJeff Roberson struct kse *ke; 73822bf7d9aSJeff Roberson 73980f86c9fSJeff Roberson /* 74080f86c9fSJeff Roberson * Steal from next first to try to get a non-interactive task that 74180f86c9fSJeff Roberson * may not have run for a while. 74280f86c9fSJeff Roberson */ 74322bf7d9aSJeff Roberson if ((ke = runq_steal(kseq->ksq_next)) != NULL) 74422bf7d9aSJeff Roberson return (ke); 74580f86c9fSJeff Roberson if ((ke = runq_steal(kseq->ksq_curr)) != NULL) 74680f86c9fSJeff Roberson return (ke); 74780f86c9fSJeff Roberson if (stealidle) 74822bf7d9aSJeff Roberson return (runq_steal(&kseq->ksq_idle)); 74980f86c9fSJeff Roberson return (NULL); 75022bf7d9aSJeff Roberson } 75180f86c9fSJeff Roberson 75280f86c9fSJeff Roberson int 75380f86c9fSJeff Roberson kseq_transfer(struct kseq *kseq, struct kse *ke, int class) 75480f86c9fSJeff Roberson { 755598b368dSJeff Roberson struct kseq_group *nksg; 75680f86c9fSJeff Roberson struct kseq_group *ksg; 757598b368dSJeff Roberson struct kseq *old; 75880f86c9fSJeff Roberson int cpu; 759598b368dSJeff Roberson int idx; 76080f86c9fSJeff Roberson 761670c524fSJeff Roberson if (smp_started == 0) 762670c524fSJeff Roberson return (0); 76380f86c9fSJeff Roberson cpu = 0; 76480f86c9fSJeff Roberson /* 7652454aaf5SJeff Roberson * If our load exceeds a certain threshold we should attempt to 7662454aaf5SJeff Roberson * reassign this thread. The first candidate is the cpu that 7672454aaf5SJeff Roberson * originally ran the thread. If it is idle, assign it there, 7682454aaf5SJeff Roberson * otherwise, pick an idle cpu. 7692454aaf5SJeff Roberson * 7702454aaf5SJeff Roberson * The threshold at which we start to reassign kses has a large impact 771670c524fSJeff Roberson * on the overall performance of the system. Tuned too high and 772670c524fSJeff Roberson * some CPUs may idle. Too low and there will be excess migration 773d50c87deSOlivier Houchard * and context switches. 774670c524fSJeff Roberson */ 775598b368dSJeff Roberson old = KSEQ_CPU(ke->ke_cpu); 776598b368dSJeff Roberson nksg = old->ksq_group; 7772454aaf5SJeff Roberson ksg = kseq->ksq_group; 778598b368dSJeff Roberson if (kseq_idle) { 779598b368dSJeff Roberson if (kseq_idle & nksg->ksg_mask) { 780598b368dSJeff Roberson cpu = ffs(nksg->ksg_idlemask); 781598b368dSJeff Roberson if (cpu) { 782598b368dSJeff Roberson CTR2(KTR_SCHED, 783598b368dSJeff Roberson "kseq_transfer: %p found old cpu %X " 784598b368dSJeff Roberson "in idlemask.", ke, cpu); 7852454aaf5SJeff Roberson goto migrate; 7862454aaf5SJeff Roberson } 787598b368dSJeff Roberson } 78880f86c9fSJeff Roberson /* 78980f86c9fSJeff Roberson * Multiple cpus could find this bit simultaneously 79080f86c9fSJeff Roberson * but the race shouldn't be terrible. 79180f86c9fSJeff Roberson */ 79280f86c9fSJeff Roberson cpu = ffs(kseq_idle); 793598b368dSJeff Roberson if (cpu) { 794598b368dSJeff Roberson CTR2(KTR_SCHED, "kseq_transfer: %p found %X " 795598b368dSJeff Roberson "in idlemask.", ke, cpu); 7962454aaf5SJeff Roberson goto migrate; 79780f86c9fSJeff Roberson } 798598b368dSJeff Roberson } 799598b368dSJeff Roberson idx = 0; 800598b368dSJeff Roberson #if 0 801598b368dSJeff Roberson if (old->ksq_load < kseq->ksq_load) { 802598b368dSJeff Roberson cpu = ke->ke_cpu + 1; 803598b368dSJeff Roberson CTR2(KTR_SCHED, "kseq_transfer: %p old cpu %X " 804598b368dSJeff Roberson "load less than ours.", ke, cpu); 805598b368dSJeff Roberson goto migrate; 806598b368dSJeff Roberson } 807598b368dSJeff Roberson /* 808598b368dSJeff Roberson * No new CPU was found, look for one with less load. 809598b368dSJeff Roberson */ 810598b368dSJeff Roberson for (idx = 0; idx <= ksg_maxid; idx++) { 811598b368dSJeff Roberson nksg = KSEQ_GROUP(idx); 812598b368dSJeff Roberson if (nksg->ksg_load /*+ (nksg->ksg_cpus * 2)*/ < ksg->ksg_load) { 813598b368dSJeff Roberson cpu = ffs(nksg->ksg_cpumask); 814598b368dSJeff Roberson CTR2(KTR_SCHED, "kseq_transfer: %p cpu %X load less " 815598b368dSJeff Roberson "than ours.", ke, cpu); 816598b368dSJeff Roberson goto migrate; 817598b368dSJeff Roberson } 818598b368dSJeff Roberson } 819598b368dSJeff Roberson #endif 82080f86c9fSJeff Roberson /* 82180f86c9fSJeff Roberson * If another cpu in this group has idled, assign a thread over 82280f86c9fSJeff Roberson * to them after checking to see if there are idled groups. 82380f86c9fSJeff Roberson */ 8242454aaf5SJeff Roberson if (ksg->ksg_idlemask) { 82580f86c9fSJeff Roberson cpu = ffs(ksg->ksg_idlemask); 826598b368dSJeff Roberson if (cpu) { 827598b368dSJeff Roberson CTR2(KTR_SCHED, "kseq_transfer: %p cpu %X idle in " 828598b368dSJeff Roberson "group.", ke, cpu); 8292454aaf5SJeff Roberson goto migrate; 83080f86c9fSJeff Roberson } 831598b368dSJeff Roberson } 8322454aaf5SJeff Roberson return (0); 8332454aaf5SJeff Roberson migrate: 8342454aaf5SJeff Roberson /* 83580f86c9fSJeff Roberson * Now that we've found an idle CPU, migrate the thread. 83680f86c9fSJeff Roberson */ 83780f86c9fSJeff Roberson cpu--; 83880f86c9fSJeff Roberson ke->ke_runq = NULL; 83980f86c9fSJeff Roberson kseq_notify(ke, cpu); 8402454aaf5SJeff Roberson 84180f86c9fSJeff Roberson return (1); 84280f86c9fSJeff Roberson } 84380f86c9fSJeff Roberson 84422bf7d9aSJeff Roberson #endif /* SMP */ 84522bf7d9aSJeff Roberson 84622bf7d9aSJeff Roberson /* 84722bf7d9aSJeff Roberson * Pick the highest priority task we have and return it. 8480c0a98b2SJeff Roberson */ 8490c0a98b2SJeff Roberson 85022bf7d9aSJeff Roberson static struct kse * 85122bf7d9aSJeff Roberson kseq_choose(struct kseq *kseq) 8525d7ef00cSJeff Roberson { 8535d7ef00cSJeff Roberson struct runq *swap; 8540516c8ddSJeff Roberson struct kse *ke; 8550516c8ddSJeff Roberson int nice; 8565d7ef00cSJeff Roberson 857b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 85815dc847eSJeff Roberson swap = NULL; 859a8949de2SJeff Roberson 86015dc847eSJeff Roberson for (;;) { 86115dc847eSJeff Roberson ke = runq_choose(kseq->ksq_curr); 86215dc847eSJeff Roberson if (ke == NULL) { 86315dc847eSJeff Roberson /* 864bf0acc27SJohn Baldwin * We already swapped once and didn't get anywhere. 86515dc847eSJeff Roberson */ 86615dc847eSJeff Roberson if (swap) 86715dc847eSJeff Roberson break; 8685d7ef00cSJeff Roberson swap = kseq->ksq_curr; 8695d7ef00cSJeff Roberson kseq->ksq_curr = kseq->ksq_next; 8705d7ef00cSJeff Roberson kseq->ksq_next = swap; 87115dc847eSJeff Roberson continue; 872a8949de2SJeff Roberson } 87315dc847eSJeff Roberson /* 87415dc847eSJeff Roberson * If we encounter a slice of 0 the kse is in a 87515dc847eSJeff Roberson * TIMESHARE kse group and its nice was too far out 87615dc847eSJeff Roberson * of the range that receives slices. 87715dc847eSJeff Roberson */ 8780516c8ddSJeff Roberson nice = ke->ke_proc->p_nice + (0 - kseq->ksq_nicemin); 8798ffb8f55SJeff Roberson if (ke->ke_slice == 0 || (nice > SCHED_SLICE_NTHRESH && 8808ffb8f55SJeff Roberson ke->ke_proc->p_nice != 0)) { 88115dc847eSJeff Roberson runq_remove(ke->ke_runq, ke); 88215dc847eSJeff Roberson sched_slice(ke); 88315dc847eSJeff Roberson ke->ke_runq = kseq->ksq_next; 884c20c691bSJulian Elischer runq_add(ke->ke_runq, ke, 0); 88515dc847eSJeff Roberson continue; 88615dc847eSJeff Roberson } 88715dc847eSJeff Roberson return (ke); 88815dc847eSJeff Roberson } 88915dc847eSJeff Roberson 890a8949de2SJeff Roberson return (runq_choose(&kseq->ksq_idle)); 891245f3abfSJeff Roberson } 8920a016a05SJeff Roberson 8930a016a05SJeff Roberson static void 8940a016a05SJeff Roberson kseq_setup(struct kseq *kseq) 8950a016a05SJeff Roberson { 89615dc847eSJeff Roberson runq_init(&kseq->ksq_timeshare[0]); 89715dc847eSJeff Roberson runq_init(&kseq->ksq_timeshare[1]); 898a8949de2SJeff Roberson runq_init(&kseq->ksq_idle); 89915dc847eSJeff Roberson kseq->ksq_curr = &kseq->ksq_timeshare[0]; 90015dc847eSJeff Roberson kseq->ksq_next = &kseq->ksq_timeshare[1]; 9017cd650a9SJeff Roberson kseq->ksq_load = 0; 902ef1134c9SJeff Roberson kseq->ksq_load_timeshare = 0; 9030a016a05SJeff Roberson } 9040a016a05SJeff Roberson 90535e6168fSJeff Roberson static void 90635e6168fSJeff Roberson sched_setup(void *dummy) 90735e6168fSJeff Roberson { 9080ec896fdSJeff Roberson #ifdef SMP 90935e6168fSJeff Roberson int i; 9100ec896fdSJeff Roberson #endif 91135e6168fSJeff Roberson 912e493a5d9SJeff Roberson slice_min = (hz/100); /* 10ms */ 913e493a5d9SJeff Roberson slice_max = (hz/7); /* ~140ms */ 914e1f89c22SJeff Roberson 915356500a3SJeff Roberson #ifdef SMP 916cac77d04SJeff Roberson balance_groups = 0; 91780f86c9fSJeff Roberson /* 91880f86c9fSJeff Roberson * Initialize the kseqs. 91980f86c9fSJeff Roberson */ 920749d01b0SJeff Roberson for (i = 0; i < MAXCPU; i++) { 92180f86c9fSJeff Roberson struct kseq *ksq; 92280f86c9fSJeff Roberson 92380f86c9fSJeff Roberson ksq = &kseq_cpu[i]; 92480f86c9fSJeff Roberson ksq->ksq_assigned = NULL; 925749d01b0SJeff Roberson kseq_setup(&kseq_cpu[i]); 92680f86c9fSJeff Roberson } 92780f86c9fSJeff Roberson if (smp_topology == NULL) { 92880f86c9fSJeff Roberson struct kseq_group *ksg; 92980f86c9fSJeff Roberson struct kseq *ksq; 930598b368dSJeff Roberson int cpus; 93180f86c9fSJeff Roberson 932598b368dSJeff Roberson for (cpus = 0, i = 0; i < MAXCPU; i++) { 933598b368dSJeff Roberson if (CPU_ABSENT(i)) 934598b368dSJeff Roberson continue; 935598b368dSJeff Roberson ksq = &kseq_cpu[cpus]; 936598b368dSJeff Roberson ksg = &kseq_groups[cpus]; 93780f86c9fSJeff Roberson /* 938dc03363dSJeff Roberson * Setup a kseq group with one member. 93980f86c9fSJeff Roberson */ 94080f86c9fSJeff Roberson ksq->ksq_transferable = 0; 94180f86c9fSJeff Roberson ksq->ksq_group = ksg; 94280f86c9fSJeff Roberson ksg->ksg_cpus = 1; 94380f86c9fSJeff Roberson ksg->ksg_idlemask = 0; 94480f86c9fSJeff Roberson ksg->ksg_cpumask = ksg->ksg_mask = 1 << i; 945cac77d04SJeff Roberson ksg->ksg_load = 0; 94680f86c9fSJeff Roberson ksg->ksg_transferable = 0; 94780f86c9fSJeff Roberson LIST_INIT(&ksg->ksg_members); 94880f86c9fSJeff Roberson LIST_INSERT_HEAD(&ksg->ksg_members, ksq, ksq_siblings); 949598b368dSJeff Roberson cpus++; 950749d01b0SJeff Roberson } 951598b368dSJeff Roberson ksg_maxid = cpus - 1; 952749d01b0SJeff Roberson } else { 95380f86c9fSJeff Roberson struct kseq_group *ksg; 95480f86c9fSJeff Roberson struct cpu_group *cg; 955749d01b0SJeff Roberson int j; 956749d01b0SJeff Roberson 957749d01b0SJeff Roberson for (i = 0; i < smp_topology->ct_count; i++) { 958749d01b0SJeff Roberson cg = &smp_topology->ct_group[i]; 95980f86c9fSJeff Roberson ksg = &kseq_groups[i]; 96080f86c9fSJeff Roberson /* 96180f86c9fSJeff Roberson * Initialize the group. 96280f86c9fSJeff Roberson */ 96380f86c9fSJeff Roberson ksg->ksg_idlemask = 0; 964cac77d04SJeff Roberson ksg->ksg_load = 0; 96580f86c9fSJeff Roberson ksg->ksg_transferable = 0; 96680f86c9fSJeff Roberson ksg->ksg_cpus = cg->cg_count; 96780f86c9fSJeff Roberson ksg->ksg_cpumask = cg->cg_mask; 96880f86c9fSJeff Roberson LIST_INIT(&ksg->ksg_members); 96980f86c9fSJeff Roberson /* 97080f86c9fSJeff Roberson * Find all of the group members and add them. 97180f86c9fSJeff Roberson */ 97280f86c9fSJeff Roberson for (j = 0; j < MAXCPU; j++) { 97380f86c9fSJeff Roberson if ((cg->cg_mask & (1 << j)) != 0) { 97480f86c9fSJeff Roberson if (ksg->ksg_mask == 0) 97580f86c9fSJeff Roberson ksg->ksg_mask = 1 << j; 97680f86c9fSJeff Roberson kseq_cpu[j].ksq_transferable = 0; 97780f86c9fSJeff Roberson kseq_cpu[j].ksq_group = ksg; 97880f86c9fSJeff Roberson LIST_INSERT_HEAD(&ksg->ksg_members, 97980f86c9fSJeff Roberson &kseq_cpu[j], ksq_siblings); 98080f86c9fSJeff Roberson } 98180f86c9fSJeff Roberson } 982cac77d04SJeff Roberson if (ksg->ksg_cpus > 1) 983cac77d04SJeff Roberson balance_groups = 1; 984749d01b0SJeff Roberson } 985cac77d04SJeff Roberson ksg_maxid = smp_topology->ct_count - 1; 986749d01b0SJeff Roberson } 987cac77d04SJeff Roberson /* 988cac77d04SJeff Roberson * Stagger the group and global load balancer so they do not 989cac77d04SJeff Roberson * interfere with each other. 990cac77d04SJeff Roberson */ 991dc03363dSJeff Roberson bal_tick = ticks + hz; 992cac77d04SJeff Roberson if (balance_groups) 993dc03363dSJeff Roberson gbal_tick = ticks + (hz / 2); 994749d01b0SJeff Roberson #else 995749d01b0SJeff Roberson kseq_setup(KSEQ_SELF()); 996356500a3SJeff Roberson #endif 997749d01b0SJeff Roberson mtx_lock_spin(&sched_lock); 998155b9987SJeff Roberson kseq_load_add(KSEQ_SELF(), &kse0); 999749d01b0SJeff Roberson mtx_unlock_spin(&sched_lock); 100035e6168fSJeff Roberson } 100135e6168fSJeff Roberson 100235e6168fSJeff Roberson /* 100335e6168fSJeff Roberson * Scale the scheduling priority according to the "interactivity" of this 100435e6168fSJeff Roberson * process. 100535e6168fSJeff Roberson */ 100615dc847eSJeff Roberson static void 100735e6168fSJeff Roberson sched_priority(struct ksegrp *kg) 100835e6168fSJeff Roberson { 100935e6168fSJeff Roberson int pri; 101035e6168fSJeff Roberson 101135e6168fSJeff Roberson if (kg->kg_pri_class != PRI_TIMESHARE) 101215dc847eSJeff Roberson return; 101335e6168fSJeff Roberson 101415dc847eSJeff Roberson pri = SCHED_PRI_INTERACT(sched_interact_score(kg)); 1015e1f89c22SJeff Roberson pri += SCHED_PRI_BASE; 1016fa885116SJulian Elischer pri += kg->kg_proc->p_nice; 101735e6168fSJeff Roberson 101835e6168fSJeff Roberson if (pri > PRI_MAX_TIMESHARE) 101935e6168fSJeff Roberson pri = PRI_MAX_TIMESHARE; 102035e6168fSJeff Roberson else if (pri < PRI_MIN_TIMESHARE) 102135e6168fSJeff Roberson pri = PRI_MIN_TIMESHARE; 102235e6168fSJeff Roberson 102335e6168fSJeff Roberson kg->kg_user_pri = pri; 102435e6168fSJeff Roberson 102515dc847eSJeff Roberson return; 102635e6168fSJeff Roberson } 102735e6168fSJeff Roberson 102835e6168fSJeff Roberson /* 1029245f3abfSJeff Roberson * Calculate a time slice based on the properties of the kseg and the runq 1030a8949de2SJeff Roberson * that we're on. This is only for PRI_TIMESHARE ksegrps. 103135e6168fSJeff Roberson */ 1032245f3abfSJeff Roberson static void 1033245f3abfSJeff Roberson sched_slice(struct kse *ke) 103435e6168fSJeff Roberson { 103515dc847eSJeff Roberson struct kseq *kseq; 1036245f3abfSJeff Roberson struct ksegrp *kg; 103735e6168fSJeff Roberson 1038245f3abfSJeff Roberson kg = ke->ke_ksegrp; 103915dc847eSJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 104035e6168fSJeff Roberson 1041f5c157d9SJohn Baldwin if (ke->ke_thread->td_flags & TDF_BORROWING) { 10428ffb8f55SJeff Roberson ke->ke_slice = SCHED_SLICE_MIN; 10438ffb8f55SJeff Roberson return; 10448ffb8f55SJeff Roberson } 10458ffb8f55SJeff Roberson 1046245f3abfSJeff Roberson /* 1047245f3abfSJeff Roberson * Rationale: 10482454aaf5SJeff Roberson * KSEs in interactive ksegs get a minimal slice so that we 1049245f3abfSJeff Roberson * quickly notice if it abuses its advantage. 1050245f3abfSJeff Roberson * 1051245f3abfSJeff Roberson * KSEs in non-interactive ksegs are assigned a slice that is 1052245f3abfSJeff Roberson * based on the ksegs nice value relative to the least nice kseg 1053245f3abfSJeff Roberson * on the run queue for this cpu. 1054245f3abfSJeff Roberson * 1055245f3abfSJeff Roberson * If the KSE is less nice than all others it gets the maximum 1056245f3abfSJeff Roberson * slice and other KSEs will adjust their slice relative to 1057245f3abfSJeff Roberson * this when they first expire. 1058245f3abfSJeff Roberson * 1059245f3abfSJeff Roberson * There is 20 point window that starts relative to the least 1060245f3abfSJeff Roberson * nice kse on the run queue. Slice size is determined by 1061245f3abfSJeff Roberson * the kse distance from the last nice ksegrp. 1062245f3abfSJeff Roberson * 10637d1a81b4SJeff Roberson * If the kse is outside of the window it will get no slice 10647d1a81b4SJeff Roberson * and will be reevaluated each time it is selected on the 10657d1a81b4SJeff Roberson * run queue. The exception to this is nice 0 ksegs when 10667d1a81b4SJeff Roberson * a nice -20 is running. They are always granted a minimum 10677d1a81b4SJeff Roberson * slice. 1068245f3abfSJeff Roberson */ 106915dc847eSJeff Roberson if (!SCHED_INTERACTIVE(kg)) { 1070245f3abfSJeff Roberson int nice; 1071245f3abfSJeff Roberson 1072fa885116SJulian Elischer nice = kg->kg_proc->p_nice + (0 - kseq->ksq_nicemin); 1073ef1134c9SJeff Roberson if (kseq->ksq_load_timeshare == 0 || 1074fa885116SJulian Elischer kg->kg_proc->p_nice < kseq->ksq_nicemin) 1075245f3abfSJeff Roberson ke->ke_slice = SCHED_SLICE_MAX; 10767d1a81b4SJeff Roberson else if (nice <= SCHED_SLICE_NTHRESH) 1077245f3abfSJeff Roberson ke->ke_slice = SCHED_SLICE_NICE(nice); 1078fa885116SJulian Elischer else if (kg->kg_proc->p_nice == 0) 10797d1a81b4SJeff Roberson ke->ke_slice = SCHED_SLICE_MIN; 1080245f3abfSJeff Roberson else 1081245f3abfSJeff Roberson ke->ke_slice = 0; 1082245f3abfSJeff Roberson } else 10839b5f6f62SJeff Roberson ke->ke_slice = SCHED_SLICE_INTERACTIVE; 108435e6168fSJeff Roberson 1085245f3abfSJeff Roberson return; 108635e6168fSJeff Roberson } 108735e6168fSJeff Roberson 1088d322132cSJeff Roberson /* 1089d322132cSJeff Roberson * This routine enforces a maximum limit on the amount of scheduling history 1090d322132cSJeff Roberson * kept. It is called after either the slptime or runtime is adjusted. 1091d322132cSJeff Roberson * This routine will not operate correctly when slp or run times have been 1092d322132cSJeff Roberson * adjusted to more than double their maximum. 1093d322132cSJeff Roberson */ 10944b60e324SJeff Roberson static void 10954b60e324SJeff Roberson sched_interact_update(struct ksegrp *kg) 10964b60e324SJeff Roberson { 1097d322132cSJeff Roberson int sum; 10983f741ca1SJeff Roberson 1099d322132cSJeff Roberson sum = kg->kg_runtime + kg->kg_slptime; 1100d322132cSJeff Roberson if (sum < SCHED_SLP_RUN_MAX) 1101d322132cSJeff Roberson return; 1102d322132cSJeff Roberson /* 1103d322132cSJeff Roberson * If we have exceeded by more than 1/5th then the algorithm below 1104d322132cSJeff Roberson * will not bring us back into range. Dividing by two here forces 11052454aaf5SJeff Roberson * us into the range of [4/5 * SCHED_INTERACT_MAX, SCHED_INTERACT_MAX] 1106d322132cSJeff Roberson */ 110737a35e4aSJeff Roberson if (sum > (SCHED_SLP_RUN_MAX / 5) * 6) { 1108d322132cSJeff Roberson kg->kg_runtime /= 2; 1109d322132cSJeff Roberson kg->kg_slptime /= 2; 1110d322132cSJeff Roberson return; 1111d322132cSJeff Roberson } 1112d322132cSJeff Roberson kg->kg_runtime = (kg->kg_runtime / 5) * 4; 1113d322132cSJeff Roberson kg->kg_slptime = (kg->kg_slptime / 5) * 4; 1114d322132cSJeff Roberson } 1115d322132cSJeff Roberson 1116d322132cSJeff Roberson static void 1117d322132cSJeff Roberson sched_interact_fork(struct ksegrp *kg) 1118d322132cSJeff Roberson { 1119d322132cSJeff Roberson int ratio; 1120d322132cSJeff Roberson int sum; 1121d322132cSJeff Roberson 1122d322132cSJeff Roberson sum = kg->kg_runtime + kg->kg_slptime; 1123d322132cSJeff Roberson if (sum > SCHED_SLP_RUN_FORK) { 1124d322132cSJeff Roberson ratio = sum / SCHED_SLP_RUN_FORK; 1125d322132cSJeff Roberson kg->kg_runtime /= ratio; 1126d322132cSJeff Roberson kg->kg_slptime /= ratio; 11274b60e324SJeff Roberson } 11284b60e324SJeff Roberson } 11294b60e324SJeff Roberson 1130e1f89c22SJeff Roberson static int 1131e1f89c22SJeff Roberson sched_interact_score(struct ksegrp *kg) 1132e1f89c22SJeff Roberson { 1133210491d3SJeff Roberson int div; 1134e1f89c22SJeff Roberson 1135e1f89c22SJeff Roberson if (kg->kg_runtime > kg->kg_slptime) { 1136210491d3SJeff Roberson div = max(1, kg->kg_runtime / SCHED_INTERACT_HALF); 1137210491d3SJeff Roberson return (SCHED_INTERACT_HALF + 1138210491d3SJeff Roberson (SCHED_INTERACT_HALF - (kg->kg_slptime / div))); 1139210491d3SJeff Roberson } if (kg->kg_slptime > kg->kg_runtime) { 1140210491d3SJeff Roberson div = max(1, kg->kg_slptime / SCHED_INTERACT_HALF); 1141210491d3SJeff Roberson return (kg->kg_runtime / div); 1142e1f89c22SJeff Roberson } 1143e1f89c22SJeff Roberson 1144210491d3SJeff Roberson /* 1145210491d3SJeff Roberson * This can happen if slptime and runtime are 0. 1146210491d3SJeff Roberson */ 1147210491d3SJeff Roberson return (0); 1148e1f89c22SJeff Roberson 1149e1f89c22SJeff Roberson } 1150e1f89c22SJeff Roberson 115115dc847eSJeff Roberson /* 1152ed062c8dSJulian Elischer * Very early in the boot some setup of scheduler-specific 1153ed062c8dSJulian Elischer * parts of proc0 and of soem scheduler resources needs to be done. 1154ed062c8dSJulian Elischer * Called from: 1155ed062c8dSJulian Elischer * proc0_init() 1156ed062c8dSJulian Elischer */ 1157ed062c8dSJulian Elischer void 1158ed062c8dSJulian Elischer schedinit(void) 1159ed062c8dSJulian Elischer { 1160ed062c8dSJulian Elischer /* 1161ed062c8dSJulian Elischer * Set up the scheduler specific parts of proc0. 1162ed062c8dSJulian Elischer */ 1163ed062c8dSJulian Elischer proc0.p_sched = NULL; /* XXX */ 1164d39063f2SJulian Elischer ksegrp0.kg_sched = &kg_sched0; 1165d39063f2SJulian Elischer thread0.td_sched = &kse0; 1166ed062c8dSJulian Elischer kse0.ke_thread = &thread0; 1167ed062c8dSJulian Elischer kse0.ke_state = KES_THREAD; 1168ed062c8dSJulian Elischer kg_sched0.skg_concurrency = 1; 1169ed062c8dSJulian Elischer kg_sched0.skg_avail_opennings = 0; /* we are already running */ 1170ed062c8dSJulian Elischer } 1171ed062c8dSJulian Elischer 1172ed062c8dSJulian Elischer /* 117315dc847eSJeff Roberson * This is only somewhat accurate since given many processes of the same 117415dc847eSJeff Roberson * priority they will switch when their slices run out, which will be 117515dc847eSJeff Roberson * at most SCHED_SLICE_MAX. 117615dc847eSJeff Roberson */ 117735e6168fSJeff Roberson int 117835e6168fSJeff Roberson sched_rr_interval(void) 117935e6168fSJeff Roberson { 118035e6168fSJeff Roberson return (SCHED_SLICE_MAX); 118135e6168fSJeff Roberson } 118235e6168fSJeff Roberson 118322bf7d9aSJeff Roberson static void 118435e6168fSJeff Roberson sched_pctcpu_update(struct kse *ke) 118535e6168fSJeff Roberson { 118635e6168fSJeff Roberson /* 118735e6168fSJeff Roberson * Adjust counters and watermark for pctcpu calc. 1188210491d3SJeff Roberson */ 118981de51bfSJeff Roberson if (ke->ke_ltick > ticks - SCHED_CPU_TICKS) { 1190210491d3SJeff Roberson /* 119181de51bfSJeff Roberson * Shift the tick count out so that the divide doesn't 119281de51bfSJeff Roberson * round away our results. 119365c8760dSJeff Roberson */ 119465c8760dSJeff Roberson ke->ke_ticks <<= 10; 119581de51bfSJeff Roberson ke->ke_ticks = (ke->ke_ticks / (ticks - ke->ke_ftick)) * 119635e6168fSJeff Roberson SCHED_CPU_TICKS; 119765c8760dSJeff Roberson ke->ke_ticks >>= 10; 119881de51bfSJeff Roberson } else 119981de51bfSJeff Roberson ke->ke_ticks = 0; 120035e6168fSJeff Roberson ke->ke_ltick = ticks; 120135e6168fSJeff Roberson ke->ke_ftick = ke->ke_ltick - SCHED_CPU_TICKS; 120235e6168fSJeff Roberson } 120335e6168fSJeff Roberson 120435e6168fSJeff Roberson void 1205f5c157d9SJohn Baldwin sched_thread_priority(struct thread *td, u_char prio) 120635e6168fSJeff Roberson { 12073f741ca1SJeff Roberson struct kse *ke; 120835e6168fSJeff Roberson 120981d47d3fSJeff Roberson CTR6(KTR_SCHED, "sched_prio: %p(%s) prio %d newprio %d by %p(%s)", 121081d47d3fSJeff Roberson td, td->td_proc->p_comm, td->td_priority, prio, curthread, 121181d47d3fSJeff Roberson curthread->td_proc->p_comm); 12123f741ca1SJeff Roberson ke = td->td_kse; 121335e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1214f5c157d9SJohn Baldwin if (td->td_priority == prio) 1215f5c157d9SJohn Baldwin return; 121635e6168fSJeff Roberson if (TD_ON_RUNQ(td)) { 12173f741ca1SJeff Roberson /* 12183f741ca1SJeff Roberson * If the priority has been elevated due to priority 12193f741ca1SJeff Roberson * propagation, we may have to move ourselves to a new 12203f741ca1SJeff Roberson * queue. We still call adjustrunqueue below in case kse 12213f741ca1SJeff Roberson * needs to fix things up. 12223f741ca1SJeff Roberson */ 12238ffb8f55SJeff Roberson if (prio < td->td_priority && ke->ke_runq != NULL && 1224769a3635SJeff Roberson (ke->ke_flags & KEF_ASSIGNED) == 0 && 122522bf7d9aSJeff Roberson ke->ke_runq != KSEQ_CPU(ke->ke_cpu)->ksq_curr) { 12263f741ca1SJeff Roberson runq_remove(ke->ke_runq, ke); 12273f741ca1SJeff Roberson ke->ke_runq = KSEQ_CPU(ke->ke_cpu)->ksq_curr; 1228c20c691bSJulian Elischer runq_add(ke->ke_runq, ke, 0); 122935e6168fSJeff Roberson } 1230f2b74cbfSJeff Roberson /* 1231f2b74cbfSJeff Roberson * Hold this kse on this cpu so that sched_prio() doesn't 1232f2b74cbfSJeff Roberson * cause excessive migration. We only want migration to 1233f2b74cbfSJeff Roberson * happen as the result of a wakeup. 1234f2b74cbfSJeff Roberson */ 1235f2b74cbfSJeff Roberson ke->ke_flags |= KEF_HOLD; 12363f741ca1SJeff Roberson adjustrunqueue(td, prio); 1237598b368dSJeff Roberson ke->ke_flags &= ~KEF_HOLD; 12383f741ca1SJeff Roberson } else 12393f741ca1SJeff Roberson td->td_priority = prio; 124035e6168fSJeff Roberson } 124135e6168fSJeff Roberson 1242f5c157d9SJohn Baldwin /* 1243f5c157d9SJohn Baldwin * Update a thread's priority when it is lent another thread's 1244f5c157d9SJohn Baldwin * priority. 1245f5c157d9SJohn Baldwin */ 1246f5c157d9SJohn Baldwin void 1247f5c157d9SJohn Baldwin sched_lend_prio(struct thread *td, u_char prio) 1248f5c157d9SJohn Baldwin { 1249f5c157d9SJohn Baldwin 1250f5c157d9SJohn Baldwin td->td_flags |= TDF_BORROWING; 1251f5c157d9SJohn Baldwin sched_thread_priority(td, prio); 1252f5c157d9SJohn Baldwin } 1253f5c157d9SJohn Baldwin 1254f5c157d9SJohn Baldwin /* 1255f5c157d9SJohn Baldwin * Restore a thread's priority when priority propagation is 1256f5c157d9SJohn Baldwin * over. The prio argument is the minimum priority the thread 1257f5c157d9SJohn Baldwin * needs to have to satisfy other possible priority lending 1258f5c157d9SJohn Baldwin * requests. If the thread's regular priority is less 1259f5c157d9SJohn Baldwin * important than prio, the thread will keep a priority boost 1260f5c157d9SJohn Baldwin * of prio. 1261f5c157d9SJohn Baldwin */ 1262f5c157d9SJohn Baldwin void 1263f5c157d9SJohn Baldwin sched_unlend_prio(struct thread *td, u_char prio) 1264f5c157d9SJohn Baldwin { 1265f5c157d9SJohn Baldwin u_char base_pri; 1266f5c157d9SJohn Baldwin 1267f5c157d9SJohn Baldwin if (td->td_base_pri >= PRI_MIN_TIMESHARE && 1268f5c157d9SJohn Baldwin td->td_base_pri <= PRI_MAX_TIMESHARE) 1269f5c157d9SJohn Baldwin base_pri = td->td_ksegrp->kg_user_pri; 1270f5c157d9SJohn Baldwin else 1271f5c157d9SJohn Baldwin base_pri = td->td_base_pri; 1272f5c157d9SJohn Baldwin if (prio >= base_pri) { 1273f5c157d9SJohn Baldwin td->td_flags &= ~TDF_BORROWING; 1274f5c157d9SJohn Baldwin sched_thread_priority(td, base_pri); 1275f5c157d9SJohn Baldwin } else 1276f5c157d9SJohn Baldwin sched_lend_prio(td, prio); 1277f5c157d9SJohn Baldwin } 1278f5c157d9SJohn Baldwin 1279f5c157d9SJohn Baldwin void 1280f5c157d9SJohn Baldwin sched_prio(struct thread *td, u_char prio) 1281f5c157d9SJohn Baldwin { 1282f5c157d9SJohn Baldwin u_char oldprio; 1283f5c157d9SJohn Baldwin 1284f5c157d9SJohn Baldwin /* First, update the base priority. */ 1285f5c157d9SJohn Baldwin td->td_base_pri = prio; 1286f5c157d9SJohn Baldwin 1287f5c157d9SJohn Baldwin /* 128850aaa791SJohn Baldwin * If the thread is borrowing another thread's priority, don't 1289f5c157d9SJohn Baldwin * ever lower the priority. 1290f5c157d9SJohn Baldwin */ 1291f5c157d9SJohn Baldwin if (td->td_flags & TDF_BORROWING && td->td_priority < prio) 1292f5c157d9SJohn Baldwin return; 1293f5c157d9SJohn Baldwin 1294f5c157d9SJohn Baldwin /* Change the real priority. */ 1295f5c157d9SJohn Baldwin oldprio = td->td_priority; 1296f5c157d9SJohn Baldwin sched_thread_priority(td, prio); 1297f5c157d9SJohn Baldwin 1298f5c157d9SJohn Baldwin /* 1299f5c157d9SJohn Baldwin * If the thread is on a turnstile, then let the turnstile update 1300f5c157d9SJohn Baldwin * its state. 1301f5c157d9SJohn Baldwin */ 1302f5c157d9SJohn Baldwin if (TD_ON_LOCK(td) && oldprio != prio) 1303f5c157d9SJohn Baldwin turnstile_adjust(td, oldprio); 1304f5c157d9SJohn Baldwin } 1305f5c157d9SJohn Baldwin 130635e6168fSJeff Roberson void 13073389af30SJulian Elischer sched_switch(struct thread *td, struct thread *newtd, int flags) 130835e6168fSJeff Roberson { 1309598b368dSJeff Roberson struct kseq *ksq; 131035e6168fSJeff Roberson struct kse *ke; 131135e6168fSJeff Roberson 131235e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 131335e6168fSJeff Roberson 131435e6168fSJeff Roberson ke = td->td_kse; 1315598b368dSJeff Roberson ksq = KSEQ_SELF(); 131635e6168fSJeff Roberson 1317060563ecSJulian Elischer td->td_lastcpu = td->td_oncpu; 1318060563ecSJulian Elischer td->td_oncpu = NOCPU; 131952eb8464SJohn Baldwin td->td_flags &= ~TDF_NEEDRESCHED; 132077918643SStephan Uphoff td->td_owepreempt = 0; 132135e6168fSJeff Roberson 1322b11fdad0SJeff Roberson /* 1323b11fdad0SJeff Roberson * If the KSE has been assigned it may be in the process of switching 1324b11fdad0SJeff Roberson * to the new cpu. This is the case in sched_bind(). 1325b11fdad0SJeff Roberson */ 13262454aaf5SJeff Roberson if (td == PCPU_GET(idlethread)) { 1327bf0acc27SJohn Baldwin TD_SET_CAN_RUN(td); 1328598b368dSJeff Roberson } else if ((ke->ke_flags & KEF_ASSIGNED) == 0) { 1329ed062c8dSJulian Elischer /* We are ending our run so make our slot available again */ 1330d39063f2SJulian Elischer SLOT_RELEASE(td->td_ksegrp); 1331598b368dSJeff Roberson kseq_load_rem(ksq, ke); 1332ed062c8dSJulian Elischer if (TD_IS_RUNNING(td)) { 1333f2b74cbfSJeff Roberson /* 1334ed062c8dSJulian Elischer * Don't allow the thread to migrate 1335ed062c8dSJulian Elischer * from a preemption. 1336f2b74cbfSJeff Roberson */ 1337f2b74cbfSJeff Roberson ke->ke_flags |= KEF_HOLD; 1338598b368dSJeff Roberson setrunqueue(td, (flags & SW_PREEMPT) ? 1339598b368dSJeff Roberson SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED : 1340598b368dSJeff Roberson SRQ_OURSELF|SRQ_YIELDING); 1341598b368dSJeff Roberson ke->ke_flags &= ~KEF_HOLD; 1342598b368dSJeff Roberson } else if ((td->td_proc->p_flag & P_HADTHREADS) && 1343598b368dSJeff Roberson (newtd == NULL || newtd->td_ksegrp != td->td_ksegrp)) 134435e6168fSJeff Roberson /* 1345ed062c8dSJulian Elischer * We will not be on the run queue. 1346ed062c8dSJulian Elischer * So we must be sleeping or similar. 1347c20c691bSJulian Elischer * Don't use the slot if we will need it 1348c20c691bSJulian Elischer * for newtd. 134935e6168fSJeff Roberson */ 1350ed062c8dSJulian Elischer slot_fill(td->td_ksegrp); 1351ed062c8dSJulian Elischer } 1352d39063f2SJulian Elischer if (newtd != NULL) { 1353c20c691bSJulian Elischer /* 13546680bbd5SJeff Roberson * If we bring in a thread account for it as if it had been 13556680bbd5SJeff Roberson * added to the run queue and then chosen. 1356c20c691bSJulian Elischer */ 1357c5c3fb33SJulian Elischer newtd->td_kse->ke_flags |= KEF_DIDRUN; 1358598b368dSJeff Roberson newtd->td_kse->ke_runq = ksq->ksq_curr; 1359c20c691bSJulian Elischer TD_SET_RUNNING(newtd); 1360bf0acc27SJohn Baldwin kseq_load_add(KSEQ_SELF(), newtd->td_kse); 13616680bbd5SJeff Roberson /* 13626680bbd5SJeff Roberson * XXX When we preempt, we've already consumed a slot because 13636680bbd5SJeff Roberson * we got here through sched_add(). However, newtd can come 13646680bbd5SJeff Roberson * from thread_switchout() which can't SLOT_USE() because 13656680bbd5SJeff Roberson * the SLOT code is scheduler dependent. We must use the 13666680bbd5SJeff Roberson * slot here otherwise. 13676680bbd5SJeff Roberson */ 13686680bbd5SJeff Roberson if ((flags & SW_PREEMPT) == 0) 13696680bbd5SJeff Roberson SLOT_USE(newtd->td_ksegrp); 1370d39063f2SJulian Elischer } else 13712454aaf5SJeff Roberson newtd = choosethread(); 1372ebccf1e3SJoseph Koshy if (td != newtd) { 1373ebccf1e3SJoseph Koshy #ifdef HWPMC_HOOKS 1374ebccf1e3SJoseph Koshy if (PMC_PROC_IS_USING_PMCS(td->td_proc)) 1375ebccf1e3SJoseph Koshy PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT); 1376ebccf1e3SJoseph Koshy #endif 1377ae53b483SJeff Roberson cpu_switch(td, newtd); 1378ebccf1e3SJoseph Koshy #ifdef HWPMC_HOOKS 1379ebccf1e3SJoseph Koshy if (PMC_PROC_IS_USING_PMCS(td->td_proc)) 1380ebccf1e3SJoseph Koshy PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN); 1381ebccf1e3SJoseph Koshy #endif 1382ebccf1e3SJoseph Koshy } 1383ebccf1e3SJoseph Koshy 1384ae53b483SJeff Roberson sched_lock.mtx_lock = (uintptr_t)td; 138535e6168fSJeff Roberson 1386060563ecSJulian Elischer td->td_oncpu = PCPU_GET(cpuid); 138735e6168fSJeff Roberson } 138835e6168fSJeff Roberson 138935e6168fSJeff Roberson void 1390fa885116SJulian Elischer sched_nice(struct proc *p, int nice) 139135e6168fSJeff Roberson { 1392fa885116SJulian Elischer struct ksegrp *kg; 139315dc847eSJeff Roberson struct kse *ke; 139435e6168fSJeff Roberson struct thread *td; 139515dc847eSJeff Roberson struct kseq *kseq; 139635e6168fSJeff Roberson 1397fa885116SJulian Elischer PROC_LOCK_ASSERT(p, MA_OWNED); 13980b5318c8SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 139915dc847eSJeff Roberson /* 140015dc847eSJeff Roberson * We need to adjust the nice counts for running KSEs. 140115dc847eSJeff Roberson */ 1402fa885116SJulian Elischer FOREACH_KSEGRP_IN_PROC(p, kg) { 1403fa885116SJulian Elischer if (kg->kg_pri_class == PRI_TIMESHARE) { 1404ed062c8dSJulian Elischer FOREACH_THREAD_IN_GROUP(kg, td) { 1405ed062c8dSJulian Elischer ke = td->td_kse; 1406d07ac847SJeff Roberson if (ke->ke_runq == NULL) 140715dc847eSJeff Roberson continue; 140815dc847eSJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 1409fa885116SJulian Elischer kseq_nice_rem(kseq, p->p_nice); 141015dc847eSJeff Roberson kseq_nice_add(kseq, nice); 141115dc847eSJeff Roberson } 1412fa885116SJulian Elischer } 1413fa885116SJulian Elischer } 1414fa885116SJulian Elischer p->p_nice = nice; 1415fa885116SJulian Elischer FOREACH_KSEGRP_IN_PROC(p, kg) { 141635e6168fSJeff Roberson sched_priority(kg); 141715dc847eSJeff Roberson FOREACH_THREAD_IN_GROUP(kg, td) 14184a338afdSJulian Elischer td->td_flags |= TDF_NEEDRESCHED; 141935e6168fSJeff Roberson } 1420fa885116SJulian Elischer } 142135e6168fSJeff Roberson 142235e6168fSJeff Roberson void 142344f3b092SJohn Baldwin sched_sleep(struct thread *td) 142435e6168fSJeff Roberson { 142535e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 142635e6168fSJeff Roberson 142735e6168fSJeff Roberson td->td_slptime = ticks; 142835e6168fSJeff Roberson } 142935e6168fSJeff Roberson 143035e6168fSJeff Roberson void 143135e6168fSJeff Roberson sched_wakeup(struct thread *td) 143235e6168fSJeff Roberson { 143335e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 143435e6168fSJeff Roberson 143535e6168fSJeff Roberson /* 143635e6168fSJeff Roberson * Let the kseg know how long we slept for. This is because process 143735e6168fSJeff Roberson * interactivity behavior is modeled in the kseg. 143835e6168fSJeff Roberson */ 143935e6168fSJeff Roberson if (td->td_slptime) { 1440f1e8dc4aSJeff Roberson struct ksegrp *kg; 144115dc847eSJeff Roberson int hzticks; 1442f1e8dc4aSJeff Roberson 1443f1e8dc4aSJeff Roberson kg = td->td_ksegrp; 1444d322132cSJeff Roberson hzticks = (ticks - td->td_slptime) << 10; 1445d322132cSJeff Roberson if (hzticks >= SCHED_SLP_RUN_MAX) { 1446d322132cSJeff Roberson kg->kg_slptime = SCHED_SLP_RUN_MAX; 1447d322132cSJeff Roberson kg->kg_runtime = 1; 1448d322132cSJeff Roberson } else { 1449d322132cSJeff Roberson kg->kg_slptime += hzticks; 14504b60e324SJeff Roberson sched_interact_update(kg); 1451d322132cSJeff Roberson } 1452f1e8dc4aSJeff Roberson sched_priority(kg); 14534b60e324SJeff Roberson sched_slice(td->td_kse); 145435e6168fSJeff Roberson td->td_slptime = 0; 1455f1e8dc4aSJeff Roberson } 14562630e4c9SJulian Elischer setrunqueue(td, SRQ_BORING); 145735e6168fSJeff Roberson } 145835e6168fSJeff Roberson 145935e6168fSJeff Roberson /* 146035e6168fSJeff Roberson * Penalize the parent for creating a new child and initialize the child's 146135e6168fSJeff Roberson * priority. 146235e6168fSJeff Roberson */ 146335e6168fSJeff Roberson void 1464ed062c8dSJulian Elischer sched_fork(struct thread *td, struct thread *childtd) 146535e6168fSJeff Roberson { 146635e6168fSJeff Roberson 146735e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 146835e6168fSJeff Roberson 1469ed062c8dSJulian Elischer sched_fork_ksegrp(td, childtd->td_ksegrp); 1470ed062c8dSJulian Elischer sched_fork_thread(td, childtd); 147115dc847eSJeff Roberson } 147215dc847eSJeff Roberson 147315dc847eSJeff Roberson void 147455d44f79SJulian Elischer sched_fork_ksegrp(struct thread *td, struct ksegrp *child) 147515dc847eSJeff Roberson { 147655d44f79SJulian Elischer struct ksegrp *kg = td->td_ksegrp; 1477ed062c8dSJulian Elischer mtx_assert(&sched_lock, MA_OWNED); 1478210491d3SJeff Roberson 1479d322132cSJeff Roberson child->kg_slptime = kg->kg_slptime; 1480d322132cSJeff Roberson child->kg_runtime = kg->kg_runtime; 1481d322132cSJeff Roberson child->kg_user_pri = kg->kg_user_pri; 1482d322132cSJeff Roberson sched_interact_fork(child); 14834b60e324SJeff Roberson kg->kg_runtime += tickincr << 10; 14844b60e324SJeff Roberson sched_interact_update(kg); 1485c9f25d8fSJeff Roberson } 1486c9f25d8fSJeff Roberson 148715dc847eSJeff Roberson void 148815dc847eSJeff Roberson sched_fork_thread(struct thread *td, struct thread *child) 148915dc847eSJeff Roberson { 1490ed062c8dSJulian Elischer struct kse *ke; 1491ed062c8dSJulian Elischer struct kse *ke2; 1492ed062c8dSJulian Elischer 1493ed062c8dSJulian Elischer sched_newthread(child); 1494ed062c8dSJulian Elischer ke = td->td_kse; 1495ed062c8dSJulian Elischer ke2 = child->td_kse; 1496ed062c8dSJulian Elischer ke2->ke_slice = 1; /* Attempt to quickly learn interactivity. */ 1497ed062c8dSJulian Elischer ke2->ke_cpu = ke->ke_cpu; 1498ed062c8dSJulian Elischer ke2->ke_runq = NULL; 1499ed062c8dSJulian Elischer 1500ed062c8dSJulian Elischer /* Grab our parents cpu estimation information. */ 1501ed062c8dSJulian Elischer ke2->ke_ticks = ke->ke_ticks; 1502ed062c8dSJulian Elischer ke2->ke_ltick = ke->ke_ltick; 1503ed062c8dSJulian Elischer ke2->ke_ftick = ke->ke_ftick; 150415dc847eSJeff Roberson } 150515dc847eSJeff Roberson 150615dc847eSJeff Roberson void 150715dc847eSJeff Roberson sched_class(struct ksegrp *kg, int class) 150815dc847eSJeff Roberson { 150915dc847eSJeff Roberson struct kseq *kseq; 151015dc847eSJeff Roberson struct kse *ke; 1511ed062c8dSJulian Elischer struct thread *td; 1512ef1134c9SJeff Roberson int nclass; 1513ef1134c9SJeff Roberson int oclass; 151415dc847eSJeff Roberson 15152056d0a1SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 151615dc847eSJeff Roberson if (kg->kg_pri_class == class) 151715dc847eSJeff Roberson return; 151815dc847eSJeff Roberson 1519ef1134c9SJeff Roberson nclass = PRI_BASE(class); 1520ef1134c9SJeff Roberson oclass = PRI_BASE(kg->kg_pri_class); 1521ed062c8dSJulian Elischer FOREACH_THREAD_IN_GROUP(kg, td) { 1522ed062c8dSJulian Elischer ke = td->td_kse; 152342a29039SJeff Roberson if ((ke->ke_state != KES_ONRUNQ && 152442a29039SJeff Roberson ke->ke_state != KES_THREAD) || ke->ke_runq == NULL) 152515dc847eSJeff Roberson continue; 152615dc847eSJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 152715dc847eSJeff Roberson 1528ef1134c9SJeff Roberson #ifdef SMP 1529155b9987SJeff Roberson /* 1530155b9987SJeff Roberson * On SMP if we're on the RUNQ we must adjust the transferable 1531155b9987SJeff Roberson * count because could be changing to or from an interrupt 1532155b9987SJeff Roberson * class. 1533155b9987SJeff Roberson */ 1534155b9987SJeff Roberson if (ke->ke_state == KES_ONRUNQ) { 1535598b368dSJeff Roberson if (KSE_CAN_MIGRATE(ke)) { 153680f86c9fSJeff Roberson kseq->ksq_transferable--; 153780f86c9fSJeff Roberson kseq->ksq_group->ksg_transferable--; 153880f86c9fSJeff Roberson } 1539598b368dSJeff Roberson if (KSE_CAN_MIGRATE(ke)) { 154080f86c9fSJeff Roberson kseq->ksq_transferable++; 154180f86c9fSJeff Roberson kseq->ksq_group->ksg_transferable++; 154280f86c9fSJeff Roberson } 1543155b9987SJeff Roberson } 1544ef1134c9SJeff Roberson #endif 1545155b9987SJeff Roberson if (oclass == PRI_TIMESHARE) { 1546ef1134c9SJeff Roberson kseq->ksq_load_timeshare--; 1547fa885116SJulian Elischer kseq_nice_rem(kseq, kg->kg_proc->p_nice); 1548155b9987SJeff Roberson } 1549155b9987SJeff Roberson if (nclass == PRI_TIMESHARE) { 1550155b9987SJeff Roberson kseq->ksq_load_timeshare++; 1551fa885116SJulian Elischer kseq_nice_add(kseq, kg->kg_proc->p_nice); 155215dc847eSJeff Roberson } 1553155b9987SJeff Roberson } 155415dc847eSJeff Roberson 155515dc847eSJeff Roberson kg->kg_pri_class = class; 155635e6168fSJeff Roberson } 155735e6168fSJeff Roberson 155835e6168fSJeff Roberson /* 155935e6168fSJeff Roberson * Return some of the child's priority and interactivity to the parent. 156035e6168fSJeff Roberson */ 156135e6168fSJeff Roberson void 1562ed062c8dSJulian Elischer sched_exit(struct proc *p, struct thread *childtd) 156335e6168fSJeff Roberson { 156435e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1565ed062c8dSJulian Elischer sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), childtd); 156681d47d3fSJeff Roberson sched_exit_thread(NULL, childtd); 1567141ad61cSJeff Roberson } 1568141ad61cSJeff Roberson 1569141ad61cSJeff Roberson void 157055d44f79SJulian Elischer sched_exit_ksegrp(struct ksegrp *kg, struct thread *td) 1571141ad61cSJeff Roberson { 157255d44f79SJulian Elischer /* kg->kg_slptime += td->td_ksegrp->kg_slptime; */ 157355d44f79SJulian Elischer kg->kg_runtime += td->td_ksegrp->kg_runtime; 15744b60e324SJeff Roberson sched_interact_update(kg); 1575141ad61cSJeff Roberson } 1576141ad61cSJeff Roberson 1577141ad61cSJeff Roberson void 1578ed062c8dSJulian Elischer sched_exit_thread(struct thread *td, struct thread *childtd) 1579141ad61cSJeff Roberson { 158081d47d3fSJeff Roberson CTR3(KTR_SCHED, "sched_exit_thread: %p(%s) prio %d", 158181d47d3fSJeff Roberson childtd, childtd->td_proc->p_comm, childtd->td_priority); 1582ed062c8dSJulian Elischer kseq_load_rem(KSEQ_CPU(childtd->td_kse->ke_cpu), childtd->td_kse); 158335e6168fSJeff Roberson } 158435e6168fSJeff Roberson 158535e6168fSJeff Roberson void 15867cf90fb3SJeff Roberson sched_clock(struct thread *td) 158735e6168fSJeff Roberson { 158835e6168fSJeff Roberson struct kseq *kseq; 15890a016a05SJeff Roberson struct ksegrp *kg; 15907cf90fb3SJeff Roberson struct kse *ke; 159135e6168fSJeff Roberson 1592dc03363dSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 15932454aaf5SJeff Roberson kseq = KSEQ_SELF(); 1594dc03363dSJeff Roberson #ifdef SMP 1595598b368dSJeff Roberson if (ticks >= bal_tick) 1596dc03363dSJeff Roberson sched_balance(); 1597598b368dSJeff Roberson if (ticks >= gbal_tick && balance_groups) 1598dc03363dSJeff Roberson sched_balance_groups(); 15992454aaf5SJeff Roberson /* 16002454aaf5SJeff Roberson * We could have been assigned a non real-time thread without an 16012454aaf5SJeff Roberson * IPI. 16022454aaf5SJeff Roberson */ 16032454aaf5SJeff Roberson if (kseq->ksq_assigned) 16042454aaf5SJeff Roberson kseq_assign(kseq); /* Potentially sets NEEDRESCHED */ 1605dc03363dSJeff Roberson #endif 160615dc847eSJeff Roberson /* 160715dc847eSJeff Roberson * sched_setup() apparently happens prior to stathz being set. We 160815dc847eSJeff Roberson * need to resolve the timers earlier in the boot so we can avoid 160915dc847eSJeff Roberson * calculating this here. 161015dc847eSJeff Roberson */ 161115dc847eSJeff Roberson if (realstathz == 0) { 161215dc847eSJeff Roberson realstathz = stathz ? stathz : hz; 161315dc847eSJeff Roberson tickincr = hz / realstathz; 161415dc847eSJeff Roberson /* 161515dc847eSJeff Roberson * XXX This does not work for values of stathz that are much 161615dc847eSJeff Roberson * larger than hz. 161715dc847eSJeff Roberson */ 161815dc847eSJeff Roberson if (tickincr == 0) 161915dc847eSJeff Roberson tickincr = 1; 162015dc847eSJeff Roberson } 162135e6168fSJeff Roberson 16227cf90fb3SJeff Roberson ke = td->td_kse; 162315dc847eSJeff Roberson kg = ke->ke_ksegrp; 162435e6168fSJeff Roberson 16250a016a05SJeff Roberson /* Adjust ticks for pctcpu */ 162665c8760dSJeff Roberson ke->ke_ticks++; 1627d465fb95SJeff Roberson ke->ke_ltick = ticks; 1628a8949de2SJeff Roberson 1629d465fb95SJeff Roberson /* Go up to one second beyond our max and then trim back down */ 1630d465fb95SJeff Roberson if (ke->ke_ftick + SCHED_CPU_TICKS + hz < ke->ke_ltick) 1631d465fb95SJeff Roberson sched_pctcpu_update(ke); 1632d465fb95SJeff Roberson 163343fdafb1SJulian Elischer if (td->td_flags & TDF_IDLETD) 163435e6168fSJeff Roberson return; 16353f741ca1SJeff Roberson /* 1636a8949de2SJeff Roberson * We only do slicing code for TIMESHARE ksegrps. 1637a8949de2SJeff Roberson */ 1638a8949de2SJeff Roberson if (kg->kg_pri_class != PRI_TIMESHARE) 1639a8949de2SJeff Roberson return; 1640a8949de2SJeff Roberson /* 164115dc847eSJeff Roberson * We used a tick charge it to the ksegrp so that we can compute our 164215dc847eSJeff Roberson * interactivity. 164315dc847eSJeff Roberson */ 164415dc847eSJeff Roberson kg->kg_runtime += tickincr << 10; 16454b60e324SJeff Roberson sched_interact_update(kg); 1646407b0157SJeff Roberson 164735e6168fSJeff Roberson /* 164835e6168fSJeff Roberson * We used up one time slice. 164935e6168fSJeff Roberson */ 1650093c05e3SJeff Roberson if (--ke->ke_slice > 0) 165115dc847eSJeff Roberson return; 165235e6168fSJeff Roberson /* 165315dc847eSJeff Roberson * We're out of time, recompute priorities and requeue. 165435e6168fSJeff Roberson */ 1655155b9987SJeff Roberson kseq_load_rem(kseq, ke); 1656e1f89c22SJeff Roberson sched_priority(kg); 165715dc847eSJeff Roberson sched_slice(ke); 165815dc847eSJeff Roberson if (SCHED_CURR(kg, ke)) 165915dc847eSJeff Roberson ke->ke_runq = kseq->ksq_curr; 166015dc847eSJeff Roberson else 166115dc847eSJeff Roberson ke->ke_runq = kseq->ksq_next; 1662155b9987SJeff Roberson kseq_load_add(kseq, ke); 16634a338afdSJulian Elischer td->td_flags |= TDF_NEEDRESCHED; 166435e6168fSJeff Roberson } 166535e6168fSJeff Roberson 166635e6168fSJeff Roberson int 166735e6168fSJeff Roberson sched_runnable(void) 166835e6168fSJeff Roberson { 166935e6168fSJeff Roberson struct kseq *kseq; 1670b90816f1SJeff Roberson int load; 167135e6168fSJeff Roberson 1672b90816f1SJeff Roberson load = 1; 1673b90816f1SJeff Roberson 16740a016a05SJeff Roberson kseq = KSEQ_SELF(); 167522bf7d9aSJeff Roberson #ifdef SMP 167646f8b265SJeff Roberson if (kseq->ksq_assigned) { 167746f8b265SJeff Roberson mtx_lock_spin(&sched_lock); 167822bf7d9aSJeff Roberson kseq_assign(kseq); 167946f8b265SJeff Roberson mtx_unlock_spin(&sched_lock); 168046f8b265SJeff Roberson } 168122bf7d9aSJeff Roberson #endif 16823f741ca1SJeff Roberson if ((curthread->td_flags & TDF_IDLETD) != 0) { 16833f741ca1SJeff Roberson if (kseq->ksq_load > 0) 16843f741ca1SJeff Roberson goto out; 16853f741ca1SJeff Roberson } else 16863f741ca1SJeff Roberson if (kseq->ksq_load - 1 > 0) 1687b90816f1SJeff Roberson goto out; 1688b90816f1SJeff Roberson load = 0; 1689b90816f1SJeff Roberson out: 1690b90816f1SJeff Roberson return (load); 169135e6168fSJeff Roberson } 169235e6168fSJeff Roberson 169335e6168fSJeff Roberson void 169435e6168fSJeff Roberson sched_userret(struct thread *td) 169535e6168fSJeff Roberson { 169635e6168fSJeff Roberson struct ksegrp *kg; 169735e6168fSJeff Roberson 1698f5c157d9SJohn Baldwin KASSERT((td->td_flags & TDF_BORROWING) == 0, 1699f5c157d9SJohn Baldwin ("thread with borrowed priority returning to userland")); 170035e6168fSJeff Roberson kg = td->td_ksegrp; 1701f5c157d9SJohn Baldwin if (td->td_priority != kg->kg_user_pri) { 170235e6168fSJeff Roberson mtx_lock_spin(&sched_lock); 170335e6168fSJeff Roberson td->td_priority = kg->kg_user_pri; 1704f5c157d9SJohn Baldwin td->td_base_pri = kg->kg_user_pri; 170535e6168fSJeff Roberson mtx_unlock_spin(&sched_lock); 170635e6168fSJeff Roberson } 170735e6168fSJeff Roberson } 170835e6168fSJeff Roberson 1709c9f25d8fSJeff Roberson struct kse * 1710c9f25d8fSJeff Roberson sched_choose(void) 1711c9f25d8fSJeff Roberson { 17120a016a05SJeff Roberson struct kseq *kseq; 1713c9f25d8fSJeff Roberson struct kse *ke; 171415dc847eSJeff Roberson 1715b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 171622bf7d9aSJeff Roberson kseq = KSEQ_SELF(); 171715dc847eSJeff Roberson #ifdef SMP 171880f86c9fSJeff Roberson restart: 171922bf7d9aSJeff Roberson if (kseq->ksq_assigned) 172022bf7d9aSJeff Roberson kseq_assign(kseq); 172115dc847eSJeff Roberson #endif 172222bf7d9aSJeff Roberson ke = kseq_choose(kseq); 172335e6168fSJeff Roberson if (ke) { 172422bf7d9aSJeff Roberson #ifdef SMP 172522bf7d9aSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_IDLE) 172680f86c9fSJeff Roberson if (kseq_idled(kseq) == 0) 172780f86c9fSJeff Roberson goto restart; 172822bf7d9aSJeff Roberson #endif 1729155b9987SJeff Roberson kseq_runq_rem(kseq, ke); 173035e6168fSJeff Roberson ke->ke_state = KES_THREAD; 173115dc847eSJeff Roberson return (ke); 173235e6168fSJeff Roberson } 1733c9f25d8fSJeff Roberson #ifdef SMP 173480f86c9fSJeff Roberson if (kseq_idled(kseq) == 0) 173580f86c9fSJeff Roberson goto restart; 1736c9f25d8fSJeff Roberson #endif 173715dc847eSJeff Roberson return (NULL); 173835e6168fSJeff Roberson } 173935e6168fSJeff Roberson 174035e6168fSJeff Roberson void 17412630e4c9SJulian Elischer sched_add(struct thread *td, int flags) 174235e6168fSJeff Roberson { 1743c9f25d8fSJeff Roberson struct kseq *kseq; 174415dc847eSJeff Roberson struct ksegrp *kg; 17457cf90fb3SJeff Roberson struct kse *ke; 1746598b368dSJeff Roberson int preemptive; 17472454aaf5SJeff Roberson int canmigrate; 174822bf7d9aSJeff Roberson int class; 1749c9f25d8fSJeff Roberson 175081d47d3fSJeff Roberson CTR5(KTR_SCHED, "sched_add: %p(%s) prio %d by %p(%s)", 175181d47d3fSJeff Roberson td, td->td_proc->p_comm, td->td_priority, curthread, 175281d47d3fSJeff Roberson curthread->td_proc->p_comm); 175322bf7d9aSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 17547cf90fb3SJeff Roberson ke = td->td_kse; 17557cf90fb3SJeff Roberson kg = td->td_ksegrp; 1756598b368dSJeff Roberson canmigrate = 1; 1757598b368dSJeff Roberson preemptive = !(flags & SRQ_YIELDING); 1758598b368dSJeff Roberson class = PRI_BASE(kg->kg_pri_class); 1759598b368dSJeff Roberson kseq = KSEQ_SELF(); 1760598b368dSJeff Roberson if ((ke->ke_flags & KEF_INTERNAL) == 0) 1761598b368dSJeff Roberson SLOT_USE(td->td_ksegrp); 1762598b368dSJeff Roberson ke->ke_flags &= ~KEF_INTERNAL; 1763598b368dSJeff Roberson #ifdef SMP 17642d59a44dSJeff Roberson if (ke->ke_flags & KEF_ASSIGNED) { 1765598b368dSJeff Roberson if (ke->ke_flags & KEF_REMOVED) 17662d59a44dSJeff Roberson ke->ke_flags &= ~KEF_REMOVED; 176722bf7d9aSJeff Roberson return; 17682d59a44dSJeff Roberson } 1769598b368dSJeff Roberson canmigrate = KSE_CAN_MIGRATE(ke); 1770598b368dSJeff Roberson #endif 17715d7ef00cSJeff Roberson KASSERT(ke->ke_state != KES_ONRUNQ, 17725d7ef00cSJeff Roberson ("sched_add: kse %p (%s) already in run queue", ke, 17735d7ef00cSJeff Roberson ke->ke_proc->p_comm)); 17745d7ef00cSJeff Roberson KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 17755d7ef00cSJeff Roberson ("sched_add: process swapped out")); 17769bca28a7SJeff Roberson KASSERT(ke->ke_runq == NULL, 17779bca28a7SJeff Roberson ("sched_add: KSE %p is still assigned to a run queue", ke)); 177822bf7d9aSJeff Roberson switch (class) { 1779a8949de2SJeff Roberson case PRI_ITHD: 1780a8949de2SJeff Roberson case PRI_REALTIME: 178115dc847eSJeff Roberson ke->ke_runq = kseq->ksq_curr; 178215dc847eSJeff Roberson ke->ke_slice = SCHED_SLICE_MAX; 1783598b368dSJeff Roberson if (canmigrate) 17847cd650a9SJeff Roberson ke->ke_cpu = PCPU_GET(cpuid); 1785a8949de2SJeff Roberson break; 1786a8949de2SJeff Roberson case PRI_TIMESHARE: 178715dc847eSJeff Roberson if (SCHED_CURR(kg, ke)) 178815dc847eSJeff Roberson ke->ke_runq = kseq->ksq_curr; 178915dc847eSJeff Roberson else 179015dc847eSJeff Roberson ke->ke_runq = kseq->ksq_next; 179115dc847eSJeff Roberson break; 179215dc847eSJeff Roberson case PRI_IDLE: 179315dc847eSJeff Roberson /* 179415dc847eSJeff Roberson * This is for priority prop. 179515dc847eSJeff Roberson */ 17963f741ca1SJeff Roberson if (ke->ke_thread->td_priority < PRI_MIN_IDLE) 179715dc847eSJeff Roberson ke->ke_runq = kseq->ksq_curr; 179815dc847eSJeff Roberson else 179915dc847eSJeff Roberson ke->ke_runq = &kseq->ksq_idle; 180015dc847eSJeff Roberson ke->ke_slice = SCHED_SLICE_MIN; 180115dc847eSJeff Roberson break; 180215dc847eSJeff Roberson default: 1803d322132cSJeff Roberson panic("Unknown pri class."); 1804a8949de2SJeff Roberson break; 1805a6ed4186SJeff Roberson } 180622bf7d9aSJeff Roberson #ifdef SMP 18072454aaf5SJeff Roberson /* 18082454aaf5SJeff Roberson * Don't migrate running threads here. Force the long term balancer 18092454aaf5SJeff Roberson * to do it. 18102454aaf5SJeff Roberson */ 1811f2b74cbfSJeff Roberson if (ke->ke_flags & KEF_HOLD) { 1812f2b74cbfSJeff Roberson ke->ke_flags &= ~KEF_HOLD; 18132454aaf5SJeff Roberson canmigrate = 0; 1814f2b74cbfSJeff Roberson } 18152454aaf5SJeff Roberson /* 18162454aaf5SJeff Roberson * If this thread is pinned or bound, notify the target cpu. 18172454aaf5SJeff Roberson */ 18182454aaf5SJeff Roberson if (!canmigrate && ke->ke_cpu != PCPU_GET(cpuid) ) { 181986e1c22aSJeff Roberson ke->ke_runq = NULL; 182080f86c9fSJeff Roberson kseq_notify(ke, ke->ke_cpu); 182180f86c9fSJeff Roberson return; 182280f86c9fSJeff Roberson } 182322bf7d9aSJeff Roberson /* 1824670c524fSJeff Roberson * If we had been idle, clear our bit in the group and potentially 1825670c524fSJeff Roberson * the global bitmap. If not, see if we should transfer this thread. 182622bf7d9aSJeff Roberson */ 182780f86c9fSJeff Roberson if ((class == PRI_TIMESHARE || class == PRI_REALTIME) && 182880f86c9fSJeff Roberson (kseq->ksq_group->ksg_idlemask & PCPU_GET(cpumask)) != 0) { 182980f86c9fSJeff Roberson /* 183080f86c9fSJeff Roberson * Check to see if our group is unidling, and if so, remove it 183180f86c9fSJeff Roberson * from the global idle mask. 183280f86c9fSJeff Roberson */ 183380f86c9fSJeff Roberson if (kseq->ksq_group->ksg_idlemask == 183480f86c9fSJeff Roberson kseq->ksq_group->ksg_cpumask) 183580f86c9fSJeff Roberson atomic_clear_int(&kseq_idle, kseq->ksq_group->ksg_mask); 183680f86c9fSJeff Roberson /* 183780f86c9fSJeff Roberson * Now remove ourselves from the group specific idle mask. 183880f86c9fSJeff Roberson */ 183980f86c9fSJeff Roberson kseq->ksq_group->ksg_idlemask &= ~PCPU_GET(cpumask); 1840598b368dSJeff Roberson } else if (canmigrate && kseq->ksq_load > 1 && class != PRI_ITHD) 1841670c524fSJeff Roberson if (kseq_transfer(kseq, ke, class)) 1842670c524fSJeff Roberson return; 18432454aaf5SJeff Roberson ke->ke_cpu = PCPU_GET(cpuid); 184422bf7d9aSJeff Roberson #endif 1845f2b74cbfSJeff Roberson if (td->td_priority < curthread->td_priority && 1846f2b74cbfSJeff Roberson ke->ke_runq == kseq->ksq_curr) 184722bf7d9aSJeff Roberson curthread->td_flags |= TDF_NEEDRESCHED; 184863fcce68SJohn Baldwin if (preemptive && maybe_preempt(td)) 18490c0b25aeSJohn Baldwin return; 185035e6168fSJeff Roberson ke->ke_state = KES_ONRUNQ; 185135e6168fSJeff Roberson 1852598b368dSJeff Roberson kseq_runq_add(kseq, ke, flags); 1853155b9987SJeff Roberson kseq_load_add(kseq, ke); 185435e6168fSJeff Roberson } 185535e6168fSJeff Roberson 185635e6168fSJeff Roberson void 18577cf90fb3SJeff Roberson sched_rem(struct thread *td) 185835e6168fSJeff Roberson { 185915dc847eSJeff Roberson struct kseq *kseq; 18607cf90fb3SJeff Roberson struct kse *ke; 18617cf90fb3SJeff Roberson 186281d47d3fSJeff Roberson CTR5(KTR_SCHED, "sched_rem: %p(%s) prio %d by %p(%s)", 186381d47d3fSJeff Roberson td, td->td_proc->p_comm, td->td_priority, curthread, 186481d47d3fSJeff Roberson curthread->td_proc->p_comm); 1865598b368dSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1866598b368dSJeff Roberson ke = td->td_kse; 18672d59a44dSJeff Roberson SLOT_RELEASE(td->td_ksegrp); 1868598b368dSJeff Roberson if (ke->ke_flags & KEF_ASSIGNED) { 18692d59a44dSJeff Roberson ke->ke_flags |= KEF_REMOVED; 187022bf7d9aSJeff Roberson return; 18712d59a44dSJeff Roberson } 1872c494ddc8SJeff Roberson KASSERT((ke->ke_state == KES_ONRUNQ), 1873c494ddc8SJeff Roberson ("sched_rem: KSE not on run queue")); 187435e6168fSJeff Roberson 18752d59a44dSJeff Roberson ke->ke_state = KES_THREAD; 187615dc847eSJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 1877155b9987SJeff Roberson kseq_runq_rem(kseq, ke); 1878155b9987SJeff Roberson kseq_load_rem(kseq, ke); 187935e6168fSJeff Roberson } 188035e6168fSJeff Roberson 188135e6168fSJeff Roberson fixpt_t 18827cf90fb3SJeff Roberson sched_pctcpu(struct thread *td) 188335e6168fSJeff Roberson { 188435e6168fSJeff Roberson fixpt_t pctcpu; 18857cf90fb3SJeff Roberson struct kse *ke; 188635e6168fSJeff Roberson 188735e6168fSJeff Roberson pctcpu = 0; 18887cf90fb3SJeff Roberson ke = td->td_kse; 1889484288deSJeff Roberson if (ke == NULL) 1890484288deSJeff Roberson return (0); 189135e6168fSJeff Roberson 1892b90816f1SJeff Roberson mtx_lock_spin(&sched_lock); 189335e6168fSJeff Roberson if (ke->ke_ticks) { 189435e6168fSJeff Roberson int rtick; 189535e6168fSJeff Roberson 1896210491d3SJeff Roberson /* 1897210491d3SJeff Roberson * Don't update more frequently than twice a second. Allowing 1898210491d3SJeff Roberson * this causes the cpu usage to decay away too quickly due to 1899210491d3SJeff Roberson * rounding errors. 1900210491d3SJeff Roberson */ 19012e227f04SJeff Roberson if (ke->ke_ftick + SCHED_CPU_TICKS < ke->ke_ltick || 19022e227f04SJeff Roberson ke->ke_ltick < (ticks - (hz / 2))) 190335e6168fSJeff Roberson sched_pctcpu_update(ke); 190435e6168fSJeff Roberson /* How many rtick per second ? */ 1905210491d3SJeff Roberson rtick = min(ke->ke_ticks / SCHED_CPU_TIME, SCHED_CPU_TICKS); 19067121cce5SScott Long pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT; 190735e6168fSJeff Roberson } 190835e6168fSJeff Roberson 190935e6168fSJeff Roberson ke->ke_proc->p_swtime = ke->ke_ltick - ke->ke_ftick; 1910828e7683SJohn Baldwin mtx_unlock_spin(&sched_lock); 191135e6168fSJeff Roberson 191235e6168fSJeff Roberson return (pctcpu); 191335e6168fSJeff Roberson } 191435e6168fSJeff Roberson 19159bacd788SJeff Roberson void 19169bacd788SJeff Roberson sched_bind(struct thread *td, int cpu) 19179bacd788SJeff Roberson { 19189bacd788SJeff Roberson struct kse *ke; 19199bacd788SJeff Roberson 19209bacd788SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 19219bacd788SJeff Roberson ke = td->td_kse; 19229bacd788SJeff Roberson ke->ke_flags |= KEF_BOUND; 192380f86c9fSJeff Roberson #ifdef SMP 192480f86c9fSJeff Roberson if (PCPU_GET(cpuid) == cpu) 19259bacd788SJeff Roberson return; 19269bacd788SJeff Roberson /* sched_rem without the runq_remove */ 19279bacd788SJeff Roberson ke->ke_state = KES_THREAD; 1928155b9987SJeff Roberson kseq_load_rem(KSEQ_CPU(ke->ke_cpu), ke); 19299bacd788SJeff Roberson kseq_notify(ke, cpu); 19309bacd788SJeff Roberson /* When we return from mi_switch we'll be on the correct cpu. */ 1931279f949eSPoul-Henning Kamp mi_switch(SW_VOL, NULL); 19329bacd788SJeff Roberson #endif 19339bacd788SJeff Roberson } 19349bacd788SJeff Roberson 19359bacd788SJeff Roberson void 19369bacd788SJeff Roberson sched_unbind(struct thread *td) 19379bacd788SJeff Roberson { 19389bacd788SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 19399bacd788SJeff Roberson td->td_kse->ke_flags &= ~KEF_BOUND; 19409bacd788SJeff Roberson } 19419bacd788SJeff Roberson 194235e6168fSJeff Roberson int 1943ebccf1e3SJoseph Koshy sched_is_bound(struct thread *td) 1944ebccf1e3SJoseph Koshy { 1945ebccf1e3SJoseph Koshy mtx_assert(&sched_lock, MA_OWNED); 1946ebccf1e3SJoseph Koshy return (td->td_kse->ke_flags & KEF_BOUND); 1947ebccf1e3SJoseph Koshy } 1948ebccf1e3SJoseph Koshy 1949ebccf1e3SJoseph Koshy int 195033916c36SJeff Roberson sched_load(void) 195133916c36SJeff Roberson { 195233916c36SJeff Roberson #ifdef SMP 195333916c36SJeff Roberson int total; 195433916c36SJeff Roberson int i; 195533916c36SJeff Roberson 195633916c36SJeff Roberson total = 0; 195733916c36SJeff Roberson for (i = 0; i <= ksg_maxid; i++) 195833916c36SJeff Roberson total += KSEQ_GROUP(i)->ksg_load; 195933916c36SJeff Roberson return (total); 196033916c36SJeff Roberson #else 196133916c36SJeff Roberson return (KSEQ_SELF()->ksq_sysload); 196233916c36SJeff Roberson #endif 196333916c36SJeff Roberson } 196433916c36SJeff Roberson 196533916c36SJeff Roberson int 196635e6168fSJeff Roberson sched_sizeof_ksegrp(void) 196735e6168fSJeff Roberson { 196835e6168fSJeff Roberson return (sizeof(struct ksegrp) + sizeof(struct kg_sched)); 196935e6168fSJeff Roberson } 197035e6168fSJeff Roberson 197135e6168fSJeff Roberson int 197235e6168fSJeff Roberson sched_sizeof_proc(void) 197335e6168fSJeff Roberson { 197435e6168fSJeff Roberson return (sizeof(struct proc)); 197535e6168fSJeff Roberson } 197635e6168fSJeff Roberson 197735e6168fSJeff Roberson int 197835e6168fSJeff Roberson sched_sizeof_thread(void) 197935e6168fSJeff Roberson { 198035e6168fSJeff Roberson return (sizeof(struct thread) + sizeof(struct td_sched)); 198135e6168fSJeff Roberson } 1982ed062c8dSJulian Elischer #define KERN_SWITCH_INCLUDE 1 1983ed062c8dSJulian Elischer #include "kern/kern_switch.c" 1984