135e6168fSJeff Roberson /*- 215dc847eSJeff Roberson * Copyright (c) 2002-2003, Jeffrey Roberson <jeff@freebsd.org> 335e6168fSJeff Roberson * All rights reserved. 435e6168fSJeff Roberson * 535e6168fSJeff Roberson * Redistribution and use in source and binary forms, with or without 635e6168fSJeff Roberson * modification, are permitted provided that the following conditions 735e6168fSJeff Roberson * are met: 835e6168fSJeff Roberson * 1. Redistributions of source code must retain the above copyright 935e6168fSJeff Roberson * notice unmodified, this list of conditions, and the following 1035e6168fSJeff Roberson * disclaimer. 1135e6168fSJeff Roberson * 2. Redistributions in binary form must reproduce the above copyright 1235e6168fSJeff Roberson * notice, this list of conditions and the following disclaimer in the 1335e6168fSJeff Roberson * documentation and/or other materials provided with the distribution. 1435e6168fSJeff Roberson * 1535e6168fSJeff Roberson * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 1635e6168fSJeff Roberson * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1735e6168fSJeff Roberson * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 1835e6168fSJeff Roberson * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 1935e6168fSJeff Roberson * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2035e6168fSJeff Roberson * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2135e6168fSJeff Roberson * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2235e6168fSJeff Roberson * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2335e6168fSJeff Roberson * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2435e6168fSJeff Roberson * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2535e6168fSJeff Roberson */ 2635e6168fSJeff Roberson 27677b542eSDavid E. O'Brien #include <sys/cdefs.h> 28677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 29677b542eSDavid E. O'Brien 3035e6168fSJeff Roberson #include <sys/param.h> 3135e6168fSJeff Roberson #include <sys/systm.h> 322c3490b1SMarcel Moolenaar #include <sys/kdb.h> 3335e6168fSJeff Roberson #include <sys/kernel.h> 3435e6168fSJeff Roberson #include <sys/ktr.h> 3535e6168fSJeff Roberson #include <sys/lock.h> 3635e6168fSJeff Roberson #include <sys/mutex.h> 3735e6168fSJeff Roberson #include <sys/proc.h> 38245f3abfSJeff Roberson #include <sys/resource.h> 399bacd788SJeff Roberson #include <sys/resourcevar.h> 4035e6168fSJeff Roberson #include <sys/sched.h> 4135e6168fSJeff Roberson #include <sys/smp.h> 4235e6168fSJeff Roberson #include <sys/sx.h> 4335e6168fSJeff Roberson #include <sys/sysctl.h> 4435e6168fSJeff Roberson #include <sys/sysproto.h> 4535e6168fSJeff Roberson #include <sys/vmmeter.h> 4635e6168fSJeff Roberson #ifdef KTRACE 4735e6168fSJeff Roberson #include <sys/uio.h> 4835e6168fSJeff Roberson #include <sys/ktrace.h> 4935e6168fSJeff Roberson #endif 5035e6168fSJeff Roberson 5135e6168fSJeff Roberson #include <machine/cpu.h> 5222bf7d9aSJeff Roberson #include <machine/smp.h> 5335e6168fSJeff Roberson 5415dc847eSJeff Roberson #define KTR_ULE KTR_NFS 5515dc847eSJeff Roberson 5635e6168fSJeff Roberson /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 5735e6168fSJeff Roberson /* XXX This is bogus compatability crap for ps */ 5835e6168fSJeff Roberson static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 5935e6168fSJeff Roberson SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 6035e6168fSJeff Roberson 6135e6168fSJeff Roberson static void sched_setup(void *dummy); 6235e6168fSJeff Roberson SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 6335e6168fSJeff Roberson 64e038d354SScott Long static SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0, "Scheduler"); 65e1f89c22SJeff Roberson 66e038d354SScott Long SYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "ule", 0, 67e038d354SScott Long "Scheduler name"); 68dc095794SScott Long 6915dc847eSJeff Roberson static int slice_min = 1; 7015dc847eSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, slice_min, CTLFLAG_RW, &slice_min, 0, ""); 7115dc847eSJeff Roberson 72210491d3SJeff Roberson static int slice_max = 10; 7315dc847eSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, slice_max, CTLFLAG_RW, &slice_max, 0, ""); 7415dc847eSJeff Roberson 7515dc847eSJeff Roberson int realstathz; 7615dc847eSJeff Roberson int tickincr = 1; 77783caefbSJeff Roberson 7835e6168fSJeff Roberson /* 7935e6168fSJeff Roberson * These datastructures are allocated within their parent datastructure but 8035e6168fSJeff Roberson * are scheduler specific. 8135e6168fSJeff Roberson */ 8235e6168fSJeff Roberson 8335e6168fSJeff Roberson struct ke_sched { 8435e6168fSJeff Roberson int ske_slice; 8535e6168fSJeff Roberson struct runq *ske_runq; 8635e6168fSJeff Roberson /* The following variables are only used for pctcpu calculation */ 8735e6168fSJeff Roberson int ske_ltick; /* Last tick that we were running on */ 8835e6168fSJeff Roberson int ske_ftick; /* First tick that we were running on */ 8935e6168fSJeff Roberson int ske_ticks; /* Tick count */ 9015dc847eSJeff Roberson /* CPU that we have affinity for. */ 91cd6e33dfSJeff Roberson u_char ske_cpu; 9235e6168fSJeff Roberson }; 9335e6168fSJeff Roberson #define ke_slice ke_sched->ske_slice 9435e6168fSJeff Roberson #define ke_runq ke_sched->ske_runq 9535e6168fSJeff Roberson #define ke_ltick ke_sched->ske_ltick 9635e6168fSJeff Roberson #define ke_ftick ke_sched->ske_ftick 9735e6168fSJeff Roberson #define ke_ticks ke_sched->ske_ticks 98cd6e33dfSJeff Roberson #define ke_cpu ke_sched->ske_cpu 9922bf7d9aSJeff Roberson #define ke_assign ke_procq.tqe_next 10022bf7d9aSJeff Roberson 10122bf7d9aSJeff Roberson #define KEF_ASSIGNED KEF_SCHED0 /* KSE is being migrated. */ 102a70d729bSJeff Roberson #define KEF_BOUND KEF_SCHED1 /* KSE can not migrate. */ 1032454aaf5SJeff Roberson #define KEF_XFERABLE KEF_SCHED2 /* KSE was added as transferable. */ 10435e6168fSJeff Roberson 10535e6168fSJeff Roberson struct kg_sched { 106407b0157SJeff Roberson int skg_slptime; /* Number of ticks we vol. slept */ 107407b0157SJeff Roberson int skg_runtime; /* Number of ticks we were running */ 10835e6168fSJeff Roberson }; 10935e6168fSJeff Roberson #define kg_slptime kg_sched->skg_slptime 110407b0157SJeff Roberson #define kg_runtime kg_sched->skg_runtime 11135e6168fSJeff Roberson 11235e6168fSJeff Roberson struct td_sched { 11335e6168fSJeff Roberson int std_slptime; 11435e6168fSJeff Roberson }; 11535e6168fSJeff Roberson #define td_slptime td_sched->std_slptime 11635e6168fSJeff Roberson 1175d7ef00cSJeff Roberson struct td_sched td_sched; 11835e6168fSJeff Roberson struct ke_sched ke_sched; 11935e6168fSJeff Roberson struct kg_sched kg_sched; 12035e6168fSJeff Roberson 12135e6168fSJeff Roberson struct ke_sched *kse0_sched = &ke_sched; 12235e6168fSJeff Roberson struct kg_sched *ksegrp0_sched = &kg_sched; 12335e6168fSJeff Roberson struct p_sched *proc0_sched = NULL; 12435e6168fSJeff Roberson struct td_sched *thread0_sched = &td_sched; 12535e6168fSJeff Roberson 12635e6168fSJeff Roberson /* 127665cb285SJeff Roberson * The priority is primarily determined by the interactivity score. Thus, we 128665cb285SJeff Roberson * give lower(better) priorities to kse groups that use less CPU. The nice 129665cb285SJeff Roberson * value is then directly added to this to allow nice to have some effect 130665cb285SJeff Roberson * on latency. 131e1f89c22SJeff Roberson * 132e1f89c22SJeff Roberson * PRI_RANGE: Total priority range for timeshare threads. 133665cb285SJeff Roberson * PRI_NRESV: Number of nice values. 134e1f89c22SJeff Roberson * PRI_BASE: The start of the dynamic range. 13535e6168fSJeff Roberson */ 136407b0157SJeff Roberson #define SCHED_PRI_RANGE (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1) 137a0a931ceSJeff Roberson #define SCHED_PRI_NRESV ((PRIO_MAX - PRIO_MIN) + 1) 138a0a931ceSJeff Roberson #define SCHED_PRI_NHALF (SCHED_PRI_NRESV / 2) 139665cb285SJeff Roberson #define SCHED_PRI_BASE (PRI_MIN_TIMESHARE) 14015dc847eSJeff Roberson #define SCHED_PRI_INTERACT(score) \ 141665cb285SJeff Roberson ((score) * SCHED_PRI_RANGE / SCHED_INTERACT_MAX) 14235e6168fSJeff Roberson 14335e6168fSJeff Roberson /* 144e1f89c22SJeff Roberson * These determine the interactivity of a process. 14535e6168fSJeff Roberson * 146407b0157SJeff Roberson * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate 147407b0157SJeff Roberson * before throttling back. 148d322132cSJeff Roberson * SLP_RUN_FORK: Maximum slp+run time to inherit at fork time. 149210491d3SJeff Roberson * INTERACT_MAX: Maximum interactivity value. Smaller is better. 150e1f89c22SJeff Roberson * INTERACT_THRESH: Threshhold for placement on the current runq. 15135e6168fSJeff Roberson */ 1524c9612c6SJeff Roberson #define SCHED_SLP_RUN_MAX ((hz * 5) << 10) 153d322132cSJeff Roberson #define SCHED_SLP_RUN_FORK ((hz / 2) << 10) 154210491d3SJeff Roberson #define SCHED_INTERACT_MAX (100) 155210491d3SJeff Roberson #define SCHED_INTERACT_HALF (SCHED_INTERACT_MAX / 2) 1564c9612c6SJeff Roberson #define SCHED_INTERACT_THRESH (30) 157e1f89c22SJeff Roberson 15835e6168fSJeff Roberson /* 15935e6168fSJeff Roberson * These parameters and macros determine the size of the time slice that is 16035e6168fSJeff Roberson * granted to each thread. 16135e6168fSJeff Roberson * 16235e6168fSJeff Roberson * SLICE_MIN: Minimum time slice granted, in units of ticks. 16335e6168fSJeff Roberson * SLICE_MAX: Maximum time slice granted. 16435e6168fSJeff Roberson * SLICE_RANGE: Range of available time slices scaled by hz. 165245f3abfSJeff Roberson * SLICE_SCALE: The number slices granted per val in the range of [0, max]. 166245f3abfSJeff Roberson * SLICE_NICE: Determine the amount of slice granted to a scaled nice. 1677d1a81b4SJeff Roberson * SLICE_NTHRESH: The nice cutoff point for slice assignment. 16835e6168fSJeff Roberson */ 16915dc847eSJeff Roberson #define SCHED_SLICE_MIN (slice_min) 17015dc847eSJeff Roberson #define SCHED_SLICE_MAX (slice_max) 1710392e39dSJeff Roberson #define SCHED_SLICE_INTERACTIVE (slice_max) 1727d1a81b4SJeff Roberson #define SCHED_SLICE_NTHRESH (SCHED_PRI_NHALF - 1) 17335e6168fSJeff Roberson #define SCHED_SLICE_RANGE (SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1) 17435e6168fSJeff Roberson #define SCHED_SLICE_SCALE(val, max) (((val) * SCHED_SLICE_RANGE) / (max)) 175245f3abfSJeff Roberson #define SCHED_SLICE_NICE(nice) \ 1767d1a81b4SJeff Roberson (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((nice), SCHED_SLICE_NTHRESH)) 17735e6168fSJeff Roberson 17835e6168fSJeff Roberson /* 17935e6168fSJeff Roberson * This macro determines whether or not the kse belongs on the current or 18035e6168fSJeff Roberson * next run queue. 18135e6168fSJeff Roberson */ 18215dc847eSJeff Roberson #define SCHED_INTERACTIVE(kg) \ 18315dc847eSJeff Roberson (sched_interact_score(kg) < SCHED_INTERACT_THRESH) 184a5f099d0SJeff Roberson #define SCHED_CURR(kg, ke) \ 185b003da79SDavid E. O'Brien (ke->ke_thread->td_priority < kg->kg_user_pri || \ 18608fd6713SJeff Roberson SCHED_INTERACTIVE(kg)) 18735e6168fSJeff Roberson 18835e6168fSJeff Roberson /* 18935e6168fSJeff Roberson * Cpu percentage computation macros and defines. 19035e6168fSJeff Roberson * 19135e6168fSJeff Roberson * SCHED_CPU_TIME: Number of seconds to average the cpu usage across. 19235e6168fSJeff Roberson * SCHED_CPU_TICKS: Number of hz ticks to average the cpu usage across. 19335e6168fSJeff Roberson */ 19435e6168fSJeff Roberson 1955053d272SJeff Roberson #define SCHED_CPU_TIME 10 19635e6168fSJeff Roberson #define SCHED_CPU_TICKS (hz * SCHED_CPU_TIME) 19735e6168fSJeff Roberson 19835e6168fSJeff Roberson /* 19915dc847eSJeff Roberson * kseq - per processor runqs and statistics. 20035e6168fSJeff Roberson */ 20135e6168fSJeff Roberson struct kseq { 202a8949de2SJeff Roberson struct runq ksq_idle; /* Queue of IDLE threads. */ 20315dc847eSJeff Roberson struct runq ksq_timeshare[2]; /* Run queues for !IDLE. */ 20415dc847eSJeff Roberson struct runq *ksq_next; /* Next timeshare queue. */ 20515dc847eSJeff Roberson struct runq *ksq_curr; /* Current queue. */ 206ef1134c9SJeff Roberson int ksq_load_timeshare; /* Load for timeshare. */ 20715dc847eSJeff Roberson int ksq_load; /* Aggregate load. */ 208a0a931ceSJeff Roberson short ksq_nice[SCHED_PRI_NRESV]; /* KSEs in each nice bin. */ 20915dc847eSJeff Roberson short ksq_nicemin; /* Least nice. */ 2105d7ef00cSJeff Roberson #ifdef SMP 21180f86c9fSJeff Roberson int ksq_transferable; 21280f86c9fSJeff Roberson LIST_ENTRY(kseq) ksq_siblings; /* Next in kseq group. */ 21380f86c9fSJeff Roberson struct kseq_group *ksq_group; /* Our processor group. */ 214fa9c9717SJeff Roberson volatile struct kse *ksq_assigned; /* assigned by another CPU. */ 21533916c36SJeff Roberson #else 21633916c36SJeff Roberson int ksq_sysload; /* For loadavg, !ITHD load. */ 2175d7ef00cSJeff Roberson #endif 21835e6168fSJeff Roberson }; 21935e6168fSJeff Roberson 22080f86c9fSJeff Roberson #ifdef SMP 22180f86c9fSJeff Roberson /* 22280f86c9fSJeff Roberson * kseq groups are groups of processors which can cheaply share threads. When 22380f86c9fSJeff Roberson * one processor in the group goes idle it will check the runqs of the other 22480f86c9fSJeff Roberson * processors in its group prior to halting and waiting for an interrupt. 22580f86c9fSJeff Roberson * These groups are suitable for SMT (Symetric Multi-Threading) and not NUMA. 22680f86c9fSJeff Roberson * In a numa environment we'd want an idle bitmap per group and a two tiered 22780f86c9fSJeff Roberson * load balancer. 22880f86c9fSJeff Roberson */ 22980f86c9fSJeff Roberson struct kseq_group { 23080f86c9fSJeff Roberson int ksg_cpus; /* Count of CPUs in this kseq group. */ 231b2ae7ed7SMarcel Moolenaar cpumask_t ksg_cpumask; /* Mask of cpus in this group. */ 232b2ae7ed7SMarcel Moolenaar cpumask_t ksg_idlemask; /* Idle cpus in this group. */ 233b2ae7ed7SMarcel Moolenaar cpumask_t ksg_mask; /* Bit mask for first cpu. */ 234cac77d04SJeff Roberson int ksg_load; /* Total load of this group. */ 23580f86c9fSJeff Roberson int ksg_transferable; /* Transferable load of this group. */ 23680f86c9fSJeff Roberson LIST_HEAD(, kseq) ksg_members; /* Linked list of all members. */ 23780f86c9fSJeff Roberson }; 23880f86c9fSJeff Roberson #endif 23980f86c9fSJeff Roberson 24035e6168fSJeff Roberson /* 24135e6168fSJeff Roberson * One kse queue per processor. 24235e6168fSJeff Roberson */ 2430a016a05SJeff Roberson #ifdef SMP 244b2ae7ed7SMarcel Moolenaar static cpumask_t kseq_idle; 245cac77d04SJeff Roberson static int ksg_maxid; 24622bf7d9aSJeff Roberson static struct kseq kseq_cpu[MAXCPU]; 24780f86c9fSJeff Roberson static struct kseq_group kseq_groups[MAXCPU]; 248dc03363dSJeff Roberson static int bal_tick; 249dc03363dSJeff Roberson static int gbal_tick; 250dc03363dSJeff Roberson 25180f86c9fSJeff Roberson #define KSEQ_SELF() (&kseq_cpu[PCPU_GET(cpuid)]) 25280f86c9fSJeff Roberson #define KSEQ_CPU(x) (&kseq_cpu[(x)]) 253cac77d04SJeff Roberson #define KSEQ_ID(x) ((x) - kseq_cpu) 254cac77d04SJeff Roberson #define KSEQ_GROUP(x) (&kseq_groups[(x)]) 25580f86c9fSJeff Roberson #else /* !SMP */ 25622bf7d9aSJeff Roberson static struct kseq kseq_cpu; 257dc03363dSJeff Roberson 2580a016a05SJeff Roberson #define KSEQ_SELF() (&kseq_cpu) 2590a016a05SJeff Roberson #define KSEQ_CPU(x) (&kseq_cpu) 2600a016a05SJeff Roberson #endif 26135e6168fSJeff Roberson 26263fcce68SJohn Baldwin static void sched_add_internal(struct thread *td, int preemptive); 263245f3abfSJeff Roberson static void sched_slice(struct kse *ke); 26415dc847eSJeff Roberson static void sched_priority(struct ksegrp *kg); 265e1f89c22SJeff Roberson static int sched_interact_score(struct ksegrp *kg); 2664b60e324SJeff Roberson static void sched_interact_update(struct ksegrp *kg); 267d322132cSJeff Roberson static void sched_interact_fork(struct ksegrp *kg); 26822bf7d9aSJeff Roberson static void sched_pctcpu_update(struct kse *ke); 26935e6168fSJeff Roberson 2705d7ef00cSJeff Roberson /* Operations on per processor queues */ 27122bf7d9aSJeff Roberson static struct kse * kseq_choose(struct kseq *kseq); 2720a016a05SJeff Roberson static void kseq_setup(struct kseq *kseq); 273155b9987SJeff Roberson static void kseq_load_add(struct kseq *kseq, struct kse *ke); 274155b9987SJeff Roberson static void kseq_load_rem(struct kseq *kseq, struct kse *ke); 275155b9987SJeff Roberson static __inline void kseq_runq_add(struct kseq *kseq, struct kse *ke); 276155b9987SJeff Roberson static __inline void kseq_runq_rem(struct kseq *kseq, struct kse *ke); 27715dc847eSJeff Roberson static void kseq_nice_add(struct kseq *kseq, int nice); 27815dc847eSJeff Roberson static void kseq_nice_rem(struct kseq *kseq, int nice); 2797cd650a9SJeff Roberson void kseq_print(int cpu); 2805d7ef00cSJeff Roberson #ifdef SMP 28180f86c9fSJeff Roberson static int kseq_transfer(struct kseq *ksq, struct kse *ke, int class); 28222bf7d9aSJeff Roberson static struct kse *runq_steal(struct runq *rq); 283dc03363dSJeff Roberson static void sched_balance(void); 284dc03363dSJeff Roberson static void sched_balance_groups(void); 285cac77d04SJeff Roberson static void sched_balance_group(struct kseq_group *ksg); 286cac77d04SJeff Roberson static void sched_balance_pair(struct kseq *high, struct kseq *low); 28722bf7d9aSJeff Roberson static void kseq_move(struct kseq *from, int cpu); 28880f86c9fSJeff Roberson static int kseq_idled(struct kseq *kseq); 28922bf7d9aSJeff Roberson static void kseq_notify(struct kse *ke, int cpu); 29022bf7d9aSJeff Roberson static void kseq_assign(struct kseq *); 29180f86c9fSJeff Roberson static struct kse *kseq_steal(struct kseq *kseq, int stealidle); 292e7a976f4SJeff Roberson /* 293e7a976f4SJeff Roberson * On P4 Xeons the round-robin interrupt delivery is broken. As a result of 294e7a976f4SJeff Roberson * this, we can't pin interrupts to the cpu that they were delivered to, 295e7a976f4SJeff Roberson * otherwise all ithreads only run on CPU 0. 296e7a976f4SJeff Roberson */ 297e7a976f4SJeff Roberson #ifdef __i386__ 298e7a976f4SJeff Roberson #define KSE_CAN_MIGRATE(ke, class) \ 299e7a976f4SJeff Roberson ((ke)->ke_thread->td_pinned == 0 && ((ke)->ke_flags & KEF_BOUND) == 0) 300e7a976f4SJeff Roberson #else /* !__i386__ */ 3019bacd788SJeff Roberson #define KSE_CAN_MIGRATE(ke, class) \ 302a70d729bSJeff Roberson ((class) != PRI_ITHD && (ke)->ke_thread->td_pinned == 0 && \ 303f28b3340SJeff Roberson ((ke)->ke_flags & KEF_BOUND) == 0) 304e7a976f4SJeff Roberson #endif /* !__i386__ */ 3055d7ef00cSJeff Roberson #endif 3065d7ef00cSJeff Roberson 30715dc847eSJeff Roberson void 3087cd650a9SJeff Roberson kseq_print(int cpu) 30915dc847eSJeff Roberson { 3107cd650a9SJeff Roberson struct kseq *kseq; 31115dc847eSJeff Roberson int i; 31215dc847eSJeff Roberson 3137cd650a9SJeff Roberson kseq = KSEQ_CPU(cpu); 31415dc847eSJeff Roberson 31515dc847eSJeff Roberson printf("kseq:\n"); 31615dc847eSJeff Roberson printf("\tload: %d\n", kseq->ksq_load); 317155b9987SJeff Roberson printf("\tload TIMESHARE: %d\n", kseq->ksq_load_timeshare); 318ef1134c9SJeff Roberson #ifdef SMP 31980f86c9fSJeff Roberson printf("\tload transferable: %d\n", kseq->ksq_transferable); 320ef1134c9SJeff Roberson #endif 32115dc847eSJeff Roberson printf("\tnicemin:\t%d\n", kseq->ksq_nicemin); 32215dc847eSJeff Roberson printf("\tnice counts:\n"); 323a0a931ceSJeff Roberson for (i = 0; i < SCHED_PRI_NRESV; i++) 32415dc847eSJeff Roberson if (kseq->ksq_nice[i]) 32515dc847eSJeff Roberson printf("\t\t%d = %d\n", 32615dc847eSJeff Roberson i - SCHED_PRI_NHALF, kseq->ksq_nice[i]); 32715dc847eSJeff Roberson } 32815dc847eSJeff Roberson 329155b9987SJeff Roberson static __inline void 330155b9987SJeff Roberson kseq_runq_add(struct kseq *kseq, struct kse *ke) 331155b9987SJeff Roberson { 332155b9987SJeff Roberson #ifdef SMP 33380f86c9fSJeff Roberson if (KSE_CAN_MIGRATE(ke, PRI_BASE(ke->ke_ksegrp->kg_pri_class))) { 33480f86c9fSJeff Roberson kseq->ksq_transferable++; 33580f86c9fSJeff Roberson kseq->ksq_group->ksg_transferable++; 3362454aaf5SJeff Roberson ke->ke_flags |= KEF_XFERABLE; 33780f86c9fSJeff Roberson } 338155b9987SJeff Roberson #endif 339155b9987SJeff Roberson runq_add(ke->ke_runq, ke); 340155b9987SJeff Roberson } 341155b9987SJeff Roberson 342155b9987SJeff Roberson static __inline void 343155b9987SJeff Roberson kseq_runq_rem(struct kseq *kseq, struct kse *ke) 344155b9987SJeff Roberson { 345155b9987SJeff Roberson #ifdef SMP 3462454aaf5SJeff Roberson if (ke->ke_flags & KEF_XFERABLE) { 34780f86c9fSJeff Roberson kseq->ksq_transferable--; 34880f86c9fSJeff Roberson kseq->ksq_group->ksg_transferable--; 3492454aaf5SJeff Roberson ke->ke_flags &= ~KEF_XFERABLE; 35080f86c9fSJeff Roberson } 351155b9987SJeff Roberson #endif 352155b9987SJeff Roberson runq_remove(ke->ke_runq, ke); 353155b9987SJeff Roberson } 354155b9987SJeff Roberson 355a8949de2SJeff Roberson static void 356155b9987SJeff Roberson kseq_load_add(struct kseq *kseq, struct kse *ke) 3575d7ef00cSJeff Roberson { 358ef1134c9SJeff Roberson int class; 359b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 360ef1134c9SJeff Roberson class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); 361ef1134c9SJeff Roberson if (class == PRI_TIMESHARE) 362ef1134c9SJeff Roberson kseq->ksq_load_timeshare++; 36315dc847eSJeff Roberson kseq->ksq_load++; 364207a6c0dSDavid E. O'Brien if (class != PRI_ITHD && (ke->ke_proc->p_flag & P_NOLOAD) == 0) 36533916c36SJeff Roberson #ifdef SMP 366cac77d04SJeff Roberson kseq->ksq_group->ksg_load++; 36733916c36SJeff Roberson #else 36833916c36SJeff Roberson kseq->ksq_sysload++; 369cac77d04SJeff Roberson #endif 37015dc847eSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 371155b9987SJeff Roberson CTR6(KTR_ULE, 372155b9987SJeff Roberson "Add kse %p to %p (slice: %d, pri: %d, nice: %d(%d))", 37315dc847eSJeff Roberson ke, ke->ke_runq, ke->ke_slice, ke->ke_thread->td_priority, 374fa885116SJulian Elischer ke->ke_proc->p_nice, kseq->ksq_nicemin); 37515dc847eSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 376fa885116SJulian Elischer kseq_nice_add(kseq, ke->ke_proc->p_nice); 3775d7ef00cSJeff Roberson } 37815dc847eSJeff Roberson 379a8949de2SJeff Roberson static void 380155b9987SJeff Roberson kseq_load_rem(struct kseq *kseq, struct kse *ke) 3815d7ef00cSJeff Roberson { 382ef1134c9SJeff Roberson int class; 383b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 384ef1134c9SJeff Roberson class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); 385ef1134c9SJeff Roberson if (class == PRI_TIMESHARE) 386ef1134c9SJeff Roberson kseq->ksq_load_timeshare--; 387207a6c0dSDavid E. O'Brien if (class != PRI_ITHD && (ke->ke_proc->p_flag & P_NOLOAD) == 0) 38833916c36SJeff Roberson #ifdef SMP 389cac77d04SJeff Roberson kseq->ksq_group->ksg_load--; 39033916c36SJeff Roberson #else 39133916c36SJeff Roberson kseq->ksq_sysload--; 392cac77d04SJeff Roberson #endif 39315dc847eSJeff Roberson kseq->ksq_load--; 39415dc847eSJeff Roberson ke->ke_runq = NULL; 39515dc847eSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 396fa885116SJulian Elischer kseq_nice_rem(kseq, ke->ke_proc->p_nice); 3975d7ef00cSJeff Roberson } 3985d7ef00cSJeff Roberson 39915dc847eSJeff Roberson static void 40015dc847eSJeff Roberson kseq_nice_add(struct kseq *kseq, int nice) 40115dc847eSJeff Roberson { 402b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 40315dc847eSJeff Roberson /* Normalize to zero. */ 40415dc847eSJeff Roberson kseq->ksq_nice[nice + SCHED_PRI_NHALF]++; 405ef1134c9SJeff Roberson if (nice < kseq->ksq_nicemin || kseq->ksq_load_timeshare == 1) 40615dc847eSJeff Roberson kseq->ksq_nicemin = nice; 40715dc847eSJeff Roberson } 40815dc847eSJeff Roberson 40915dc847eSJeff Roberson static void 41015dc847eSJeff Roberson kseq_nice_rem(struct kseq *kseq, int nice) 41115dc847eSJeff Roberson { 41215dc847eSJeff Roberson int n; 41315dc847eSJeff Roberson 414b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 41515dc847eSJeff Roberson /* Normalize to zero. */ 41615dc847eSJeff Roberson n = nice + SCHED_PRI_NHALF; 41715dc847eSJeff Roberson kseq->ksq_nice[n]--; 41815dc847eSJeff Roberson KASSERT(kseq->ksq_nice[n] >= 0, ("Negative nice count.")); 41915dc847eSJeff Roberson 42015dc847eSJeff Roberson /* 42115dc847eSJeff Roberson * If this wasn't the smallest nice value or there are more in 42215dc847eSJeff Roberson * this bucket we can just return. Otherwise we have to recalculate 42315dc847eSJeff Roberson * the smallest nice. 42415dc847eSJeff Roberson */ 42515dc847eSJeff Roberson if (nice != kseq->ksq_nicemin || 42615dc847eSJeff Roberson kseq->ksq_nice[n] != 0 || 427ef1134c9SJeff Roberson kseq->ksq_load_timeshare == 0) 42815dc847eSJeff Roberson return; 42915dc847eSJeff Roberson 430a0a931ceSJeff Roberson for (; n < SCHED_PRI_NRESV; n++) 43115dc847eSJeff Roberson if (kseq->ksq_nice[n]) { 43215dc847eSJeff Roberson kseq->ksq_nicemin = n - SCHED_PRI_NHALF; 43315dc847eSJeff Roberson return; 43415dc847eSJeff Roberson } 43515dc847eSJeff Roberson } 43615dc847eSJeff Roberson 4375d7ef00cSJeff Roberson #ifdef SMP 438356500a3SJeff Roberson /* 439155b9987SJeff Roberson * sched_balance is a simple CPU load balancing algorithm. It operates by 440356500a3SJeff Roberson * finding the least loaded and most loaded cpu and equalizing their load 441356500a3SJeff Roberson * by migrating some processes. 442356500a3SJeff Roberson * 443356500a3SJeff Roberson * Dealing only with two CPUs at a time has two advantages. Firstly, most 444356500a3SJeff Roberson * installations will only have 2 cpus. Secondly, load balancing too much at 445356500a3SJeff Roberson * once can have an unpleasant effect on the system. The scheduler rarely has 446356500a3SJeff Roberson * enough information to make perfect decisions. So this algorithm chooses 447356500a3SJeff Roberson * algorithm simplicity and more gradual effects on load in larger systems. 448356500a3SJeff Roberson * 449356500a3SJeff Roberson * It could be improved by considering the priorities and slices assigned to 450356500a3SJeff Roberson * each task prior to balancing them. There are many pathological cases with 451356500a3SJeff Roberson * any approach and so the semi random algorithm below may work as well as any. 452356500a3SJeff Roberson * 453356500a3SJeff Roberson */ 45422bf7d9aSJeff Roberson static void 455dc03363dSJeff Roberson sched_balance(void) 456356500a3SJeff Roberson { 457cac77d04SJeff Roberson struct kseq_group *high; 458cac77d04SJeff Roberson struct kseq_group *low; 459cac77d04SJeff Roberson struct kseq_group *ksg; 460cac77d04SJeff Roberson int cnt; 461356500a3SJeff Roberson int i; 462356500a3SJeff Roberson 46386f8ae96SJeff Roberson if (smp_started == 0) 46486f8ae96SJeff Roberson goto out; 465cac77d04SJeff Roberson low = high = NULL; 466cac77d04SJeff Roberson i = random() % (ksg_maxid + 1); 467cac77d04SJeff Roberson for (cnt = 0; cnt <= ksg_maxid; cnt++) { 468cac77d04SJeff Roberson ksg = KSEQ_GROUP(i); 469cac77d04SJeff Roberson /* 470cac77d04SJeff Roberson * Find the CPU with the highest load that has some 471cac77d04SJeff Roberson * threads to transfer. 472cac77d04SJeff Roberson */ 473cac77d04SJeff Roberson if ((high == NULL || ksg->ksg_load > high->ksg_load) 474cac77d04SJeff Roberson && ksg->ksg_transferable) 475cac77d04SJeff Roberson high = ksg; 476cac77d04SJeff Roberson if (low == NULL || ksg->ksg_load < low->ksg_load) 477cac77d04SJeff Roberson low = ksg; 478cac77d04SJeff Roberson if (++i > ksg_maxid) 479cac77d04SJeff Roberson i = 0; 480cac77d04SJeff Roberson } 481cac77d04SJeff Roberson if (low != NULL && high != NULL && high != low) 482cac77d04SJeff Roberson sched_balance_pair(LIST_FIRST(&high->ksg_members), 483cac77d04SJeff Roberson LIST_FIRST(&low->ksg_members)); 484cac77d04SJeff Roberson out: 485dc03363dSJeff Roberson bal_tick = ticks + (random() % (hz * 2)); 486cac77d04SJeff Roberson } 48786f8ae96SJeff Roberson 488cac77d04SJeff Roberson static void 489dc03363dSJeff Roberson sched_balance_groups(void) 490cac77d04SJeff Roberson { 491cac77d04SJeff Roberson int i; 492cac77d04SJeff Roberson 493dc03363dSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 494cac77d04SJeff Roberson if (smp_started) 495cac77d04SJeff Roberson for (i = 0; i <= ksg_maxid; i++) 496cac77d04SJeff Roberson sched_balance_group(KSEQ_GROUP(i)); 497dc03363dSJeff Roberson gbal_tick = ticks + (random() % (hz * 2)); 498356500a3SJeff Roberson } 499cac77d04SJeff Roberson 500cac77d04SJeff Roberson static void 501cac77d04SJeff Roberson sched_balance_group(struct kseq_group *ksg) 502cac77d04SJeff Roberson { 503cac77d04SJeff Roberson struct kseq *kseq; 504cac77d04SJeff Roberson struct kseq *high; 505cac77d04SJeff Roberson struct kseq *low; 506cac77d04SJeff Roberson int load; 507cac77d04SJeff Roberson 508cac77d04SJeff Roberson if (ksg->ksg_transferable == 0) 509cac77d04SJeff Roberson return; 510cac77d04SJeff Roberson low = NULL; 511cac77d04SJeff Roberson high = NULL; 512cac77d04SJeff Roberson LIST_FOREACH(kseq, &ksg->ksg_members, ksq_siblings) { 513cac77d04SJeff Roberson load = kseq->ksq_load; 514cac77d04SJeff Roberson if (high == NULL || load > high->ksq_load) 515cac77d04SJeff Roberson high = kseq; 516cac77d04SJeff Roberson if (low == NULL || load < low->ksq_load) 517cac77d04SJeff Roberson low = kseq; 518356500a3SJeff Roberson } 519cac77d04SJeff Roberson if (high != NULL && low != NULL && high != low) 520cac77d04SJeff Roberson sched_balance_pair(high, low); 521356500a3SJeff Roberson } 522cac77d04SJeff Roberson 523cac77d04SJeff Roberson static void 524cac77d04SJeff Roberson sched_balance_pair(struct kseq *high, struct kseq *low) 525cac77d04SJeff Roberson { 526cac77d04SJeff Roberson int transferable; 527cac77d04SJeff Roberson int high_load; 528cac77d04SJeff Roberson int low_load; 529cac77d04SJeff Roberson int move; 530cac77d04SJeff Roberson int diff; 531cac77d04SJeff Roberson int i; 532cac77d04SJeff Roberson 53380f86c9fSJeff Roberson /* 53480f86c9fSJeff Roberson * If we're transfering within a group we have to use this specific 53580f86c9fSJeff Roberson * kseq's transferable count, otherwise we can steal from other members 53680f86c9fSJeff Roberson * of the group. 53780f86c9fSJeff Roberson */ 538cac77d04SJeff Roberson if (high->ksq_group == low->ksq_group) { 539cac77d04SJeff Roberson transferable = high->ksq_transferable; 540cac77d04SJeff Roberson high_load = high->ksq_load; 541cac77d04SJeff Roberson low_load = low->ksq_load; 542cac77d04SJeff Roberson } else { 543cac77d04SJeff Roberson transferable = high->ksq_group->ksg_transferable; 544cac77d04SJeff Roberson high_load = high->ksq_group->ksg_load; 545cac77d04SJeff Roberson low_load = low->ksq_group->ksg_load; 546cac77d04SJeff Roberson } 54780f86c9fSJeff Roberson if (transferable == 0) 548cac77d04SJeff Roberson return; 549155b9987SJeff Roberson /* 550155b9987SJeff Roberson * Determine what the imbalance is and then adjust that to how many 55180f86c9fSJeff Roberson * kses we actually have to give up (transferable). 552155b9987SJeff Roberson */ 553cac77d04SJeff Roberson diff = high_load - low_load; 554356500a3SJeff Roberson move = diff / 2; 555356500a3SJeff Roberson if (diff & 0x1) 556356500a3SJeff Roberson move++; 55780f86c9fSJeff Roberson move = min(move, transferable); 558356500a3SJeff Roberson for (i = 0; i < move; i++) 559cac77d04SJeff Roberson kseq_move(high, KSEQ_ID(low)); 560356500a3SJeff Roberson return; 561356500a3SJeff Roberson } 562356500a3SJeff Roberson 56322bf7d9aSJeff Roberson static void 564356500a3SJeff Roberson kseq_move(struct kseq *from, int cpu) 565356500a3SJeff Roberson { 56680f86c9fSJeff Roberson struct kseq *kseq; 56780f86c9fSJeff Roberson struct kseq *to; 568356500a3SJeff Roberson struct kse *ke; 569356500a3SJeff Roberson 57080f86c9fSJeff Roberson kseq = from; 57180f86c9fSJeff Roberson to = KSEQ_CPU(cpu); 57280f86c9fSJeff Roberson ke = kseq_steal(kseq, 1); 57380f86c9fSJeff Roberson if (ke == NULL) { 57480f86c9fSJeff Roberson struct kseq_group *ksg; 57580f86c9fSJeff Roberson 57680f86c9fSJeff Roberson ksg = kseq->ksq_group; 57780f86c9fSJeff Roberson LIST_FOREACH(kseq, &ksg->ksg_members, ksq_siblings) { 57880f86c9fSJeff Roberson if (kseq == from || kseq->ksq_transferable == 0) 57980f86c9fSJeff Roberson continue; 58080f86c9fSJeff Roberson ke = kseq_steal(kseq, 1); 58180f86c9fSJeff Roberson break; 58280f86c9fSJeff Roberson } 58380f86c9fSJeff Roberson if (ke == NULL) 58480f86c9fSJeff Roberson panic("kseq_move: No KSEs available with a " 58580f86c9fSJeff Roberson "transferable count of %d\n", 58680f86c9fSJeff Roberson ksg->ksg_transferable); 58780f86c9fSJeff Roberson } 58880f86c9fSJeff Roberson if (kseq == to) 58980f86c9fSJeff Roberson return; 590356500a3SJeff Roberson ke->ke_state = KES_THREAD; 59180f86c9fSJeff Roberson kseq_runq_rem(kseq, ke); 59280f86c9fSJeff Roberson kseq_load_rem(kseq, ke); 593112b6d3aSJeff Roberson kseq_notify(ke, cpu); 594356500a3SJeff Roberson } 59522bf7d9aSJeff Roberson 59680f86c9fSJeff Roberson static int 59780f86c9fSJeff Roberson kseq_idled(struct kseq *kseq) 59822bf7d9aSJeff Roberson { 59980f86c9fSJeff Roberson struct kseq_group *ksg; 60080f86c9fSJeff Roberson struct kseq *steal; 60180f86c9fSJeff Roberson struct kse *ke; 60280f86c9fSJeff Roberson 60380f86c9fSJeff Roberson ksg = kseq->ksq_group; 60480f86c9fSJeff Roberson /* 60580f86c9fSJeff Roberson * If we're in a cpu group, try and steal kses from another cpu in 60680f86c9fSJeff Roberson * the group before idling. 60780f86c9fSJeff Roberson */ 60880f86c9fSJeff Roberson if (ksg->ksg_cpus > 1 && ksg->ksg_transferable) { 60980f86c9fSJeff Roberson LIST_FOREACH(steal, &ksg->ksg_members, ksq_siblings) { 61080f86c9fSJeff Roberson if (steal == kseq || steal->ksq_transferable == 0) 61180f86c9fSJeff Roberson continue; 61280f86c9fSJeff Roberson ke = kseq_steal(steal, 0); 61380f86c9fSJeff Roberson if (ke == NULL) 61480f86c9fSJeff Roberson continue; 61580f86c9fSJeff Roberson ke->ke_state = KES_THREAD; 61680f86c9fSJeff Roberson kseq_runq_rem(steal, ke); 61780f86c9fSJeff Roberson kseq_load_rem(steal, ke); 61880f86c9fSJeff Roberson ke->ke_cpu = PCPU_GET(cpuid); 61963fcce68SJohn Baldwin sched_add_internal(ke->ke_thread, 0); 62080f86c9fSJeff Roberson return (0); 62180f86c9fSJeff Roberson } 62280f86c9fSJeff Roberson } 62380f86c9fSJeff Roberson /* 62480f86c9fSJeff Roberson * We only set the idled bit when all of the cpus in the group are 62580f86c9fSJeff Roberson * idle. Otherwise we could get into a situation where a KSE bounces 62680f86c9fSJeff Roberson * back and forth between two idle cores on seperate physical CPUs. 62780f86c9fSJeff Roberson */ 62880f86c9fSJeff Roberson ksg->ksg_idlemask |= PCPU_GET(cpumask); 62980f86c9fSJeff Roberson if (ksg->ksg_idlemask != ksg->ksg_cpumask) 63080f86c9fSJeff Roberson return (1); 63180f86c9fSJeff Roberson atomic_set_int(&kseq_idle, ksg->ksg_mask); 63280f86c9fSJeff Roberson return (1); 63322bf7d9aSJeff Roberson } 63422bf7d9aSJeff Roberson 63522bf7d9aSJeff Roberson static void 63622bf7d9aSJeff Roberson kseq_assign(struct kseq *kseq) 63722bf7d9aSJeff Roberson { 63822bf7d9aSJeff Roberson struct kse *nke; 63922bf7d9aSJeff Roberson struct kse *ke; 64022bf7d9aSJeff Roberson 64122bf7d9aSJeff Roberson do { 64200fbcda8SAlexander Kabaev *(volatile struct kse **)&ke = kseq->ksq_assigned; 64322bf7d9aSJeff Roberson } while(!atomic_cmpset_ptr(&kseq->ksq_assigned, ke, NULL)); 64422bf7d9aSJeff Roberson for (; ke != NULL; ke = nke) { 64522bf7d9aSJeff Roberson nke = ke->ke_assign; 64622bf7d9aSJeff Roberson ke->ke_flags &= ~KEF_ASSIGNED; 64763fcce68SJohn Baldwin sched_add_internal(ke->ke_thread, 0); 64822bf7d9aSJeff Roberson } 64922bf7d9aSJeff Roberson } 65022bf7d9aSJeff Roberson 65122bf7d9aSJeff Roberson static void 65222bf7d9aSJeff Roberson kseq_notify(struct kse *ke, int cpu) 65322bf7d9aSJeff Roberson { 65422bf7d9aSJeff Roberson struct kseq *kseq; 65522bf7d9aSJeff Roberson struct thread *td; 65622bf7d9aSJeff Roberson struct pcpu *pcpu; 6572454aaf5SJeff Roberson int prio; 65822bf7d9aSJeff Roberson 65986e1c22aSJeff Roberson ke->ke_cpu = cpu; 66022bf7d9aSJeff Roberson ke->ke_flags |= KEF_ASSIGNED; 6612454aaf5SJeff Roberson prio = ke->ke_thread->td_priority; 66222bf7d9aSJeff Roberson 66322bf7d9aSJeff Roberson kseq = KSEQ_CPU(cpu); 6645d7ef00cSJeff Roberson 6650c0a98b2SJeff Roberson /* 66622bf7d9aSJeff Roberson * Place a KSE on another cpu's queue and force a resched. 66722bf7d9aSJeff Roberson */ 66822bf7d9aSJeff Roberson do { 66900fbcda8SAlexander Kabaev *(volatile struct kse **)&ke->ke_assign = kseq->ksq_assigned; 67022bf7d9aSJeff Roberson } while(!atomic_cmpset_ptr(&kseq->ksq_assigned, ke->ke_assign, ke)); 6712454aaf5SJeff Roberson /* 6722454aaf5SJeff Roberson * Without sched_lock we could lose a race where we set NEEDRESCHED 6732454aaf5SJeff Roberson * on a thread that is switched out before the IPI is delivered. This 6742454aaf5SJeff Roberson * would lead us to miss the resched. This will be a problem once 6752454aaf5SJeff Roberson * sched_lock is pushed down. 6762454aaf5SJeff Roberson */ 67722bf7d9aSJeff Roberson pcpu = pcpu_find(cpu); 67822bf7d9aSJeff Roberson td = pcpu->pc_curthread; 67922bf7d9aSJeff Roberson if (ke->ke_thread->td_priority < td->td_priority || 68022bf7d9aSJeff Roberson td == pcpu->pc_idlethread) { 68122bf7d9aSJeff Roberson td->td_flags |= TDF_NEEDRESCHED; 68222bf7d9aSJeff Roberson ipi_selected(1 << cpu, IPI_AST); 68322bf7d9aSJeff Roberson } 68422bf7d9aSJeff Roberson } 68522bf7d9aSJeff Roberson 68622bf7d9aSJeff Roberson static struct kse * 68722bf7d9aSJeff Roberson runq_steal(struct runq *rq) 68822bf7d9aSJeff Roberson { 68922bf7d9aSJeff Roberson struct rqhead *rqh; 69022bf7d9aSJeff Roberson struct rqbits *rqb; 69122bf7d9aSJeff Roberson struct kse *ke; 69222bf7d9aSJeff Roberson int word; 69322bf7d9aSJeff Roberson int bit; 69422bf7d9aSJeff Roberson 69522bf7d9aSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 69622bf7d9aSJeff Roberson rqb = &rq->rq_status; 69722bf7d9aSJeff Roberson for (word = 0; word < RQB_LEN; word++) { 69822bf7d9aSJeff Roberson if (rqb->rqb_bits[word] == 0) 69922bf7d9aSJeff Roberson continue; 70022bf7d9aSJeff Roberson for (bit = 0; bit < RQB_BPW; bit++) { 701a2640c9bSPeter Wemm if ((rqb->rqb_bits[word] & (1ul << bit)) == 0) 70222bf7d9aSJeff Roberson continue; 70322bf7d9aSJeff Roberson rqh = &rq->rq_queues[bit + (word << RQB_L2BPW)]; 70422bf7d9aSJeff Roberson TAILQ_FOREACH(ke, rqh, ke_procq) { 705ef1134c9SJeff Roberson if (KSE_CAN_MIGRATE(ke, 706ef1134c9SJeff Roberson PRI_BASE(ke->ke_ksegrp->kg_pri_class))) 70722bf7d9aSJeff Roberson return (ke); 70822bf7d9aSJeff Roberson } 70922bf7d9aSJeff Roberson } 71022bf7d9aSJeff Roberson } 71122bf7d9aSJeff Roberson return (NULL); 71222bf7d9aSJeff Roberson } 71322bf7d9aSJeff Roberson 71422bf7d9aSJeff Roberson static struct kse * 71580f86c9fSJeff Roberson kseq_steal(struct kseq *kseq, int stealidle) 71622bf7d9aSJeff Roberson { 71722bf7d9aSJeff Roberson struct kse *ke; 71822bf7d9aSJeff Roberson 71980f86c9fSJeff Roberson /* 72080f86c9fSJeff Roberson * Steal from next first to try to get a non-interactive task that 72180f86c9fSJeff Roberson * may not have run for a while. 72280f86c9fSJeff Roberson */ 72322bf7d9aSJeff Roberson if ((ke = runq_steal(kseq->ksq_next)) != NULL) 72422bf7d9aSJeff Roberson return (ke); 72580f86c9fSJeff Roberson if ((ke = runq_steal(kseq->ksq_curr)) != NULL) 72680f86c9fSJeff Roberson return (ke); 72780f86c9fSJeff Roberson if (stealidle) 72822bf7d9aSJeff Roberson return (runq_steal(&kseq->ksq_idle)); 72980f86c9fSJeff Roberson return (NULL); 73022bf7d9aSJeff Roberson } 73180f86c9fSJeff Roberson 73280f86c9fSJeff Roberson int 73380f86c9fSJeff Roberson kseq_transfer(struct kseq *kseq, struct kse *ke, int class) 73480f86c9fSJeff Roberson { 73580f86c9fSJeff Roberson struct kseq_group *ksg; 73680f86c9fSJeff Roberson int cpu; 73780f86c9fSJeff Roberson 738670c524fSJeff Roberson if (smp_started == 0) 739670c524fSJeff Roberson return (0); 74080f86c9fSJeff Roberson cpu = 0; 74180f86c9fSJeff Roberson /* 7422454aaf5SJeff Roberson * If our load exceeds a certain threshold we should attempt to 7432454aaf5SJeff Roberson * reassign this thread. The first candidate is the cpu that 7442454aaf5SJeff Roberson * originally ran the thread. If it is idle, assign it there, 7452454aaf5SJeff Roberson * otherwise, pick an idle cpu. 7462454aaf5SJeff Roberson * 7472454aaf5SJeff Roberson * The threshold at which we start to reassign kses has a large impact 748670c524fSJeff Roberson * on the overall performance of the system. Tuned too high and 749670c524fSJeff Roberson * some CPUs may idle. Too low and there will be excess migration 750d50c87deSOlivier Houchard * and context switches. 751670c524fSJeff Roberson */ 7522454aaf5SJeff Roberson ksg = kseq->ksq_group; 7532454aaf5SJeff Roberson if (ksg->ksg_load > ksg->ksg_cpus && kseq_idle) { 7542454aaf5SJeff Roberson ksg = KSEQ_CPU(ke->ke_cpu)->ksq_group; 7552454aaf5SJeff Roberson if (kseq_idle & ksg->ksg_mask) { 7562454aaf5SJeff Roberson cpu = ffs(ksg->ksg_idlemask); 7572454aaf5SJeff Roberson if (cpu) 7582454aaf5SJeff Roberson goto migrate; 7592454aaf5SJeff Roberson } 76080f86c9fSJeff Roberson /* 76180f86c9fSJeff Roberson * Multiple cpus could find this bit simultaneously 76280f86c9fSJeff Roberson * but the race shouldn't be terrible. 76380f86c9fSJeff Roberson */ 76480f86c9fSJeff Roberson cpu = ffs(kseq_idle); 76580f86c9fSJeff Roberson if (cpu) 7662454aaf5SJeff Roberson goto migrate; 76780f86c9fSJeff Roberson } 76880f86c9fSJeff Roberson /* 76980f86c9fSJeff Roberson * If another cpu in this group has idled, assign a thread over 77080f86c9fSJeff Roberson * to them after checking to see if there are idled groups. 77180f86c9fSJeff Roberson */ 7722454aaf5SJeff Roberson ksg = kseq->ksq_group; 7732454aaf5SJeff Roberson if (ksg->ksg_idlemask) { 77480f86c9fSJeff Roberson cpu = ffs(ksg->ksg_idlemask); 77580f86c9fSJeff Roberson if (cpu) 7762454aaf5SJeff Roberson goto migrate; 77780f86c9fSJeff Roberson } 77880f86c9fSJeff Roberson /* 7792454aaf5SJeff Roberson * No new CPU was found. 7802454aaf5SJeff Roberson */ 7812454aaf5SJeff Roberson return (0); 7822454aaf5SJeff Roberson migrate: 7832454aaf5SJeff Roberson /* 78480f86c9fSJeff Roberson * Now that we've found an idle CPU, migrate the thread. 78580f86c9fSJeff Roberson */ 78680f86c9fSJeff Roberson cpu--; 78780f86c9fSJeff Roberson ke->ke_runq = NULL; 78880f86c9fSJeff Roberson kseq_notify(ke, cpu); 7892454aaf5SJeff Roberson 79080f86c9fSJeff Roberson return (1); 79180f86c9fSJeff Roberson } 79280f86c9fSJeff Roberson 79322bf7d9aSJeff Roberson #endif /* SMP */ 79422bf7d9aSJeff Roberson 79522bf7d9aSJeff Roberson /* 79622bf7d9aSJeff Roberson * Pick the highest priority task we have and return it. 7970c0a98b2SJeff Roberson */ 7980c0a98b2SJeff Roberson 79922bf7d9aSJeff Roberson static struct kse * 80022bf7d9aSJeff Roberson kseq_choose(struct kseq *kseq) 8015d7ef00cSJeff Roberson { 8025d7ef00cSJeff Roberson struct kse *ke; 8035d7ef00cSJeff Roberson struct runq *swap; 8045d7ef00cSJeff Roberson 805b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 80615dc847eSJeff Roberson swap = NULL; 807a8949de2SJeff Roberson 80815dc847eSJeff Roberson for (;;) { 80915dc847eSJeff Roberson ke = runq_choose(kseq->ksq_curr); 81015dc847eSJeff Roberson if (ke == NULL) { 81115dc847eSJeff Roberson /* 812bf0acc27SJohn Baldwin * We already swapped once and didn't get anywhere. 81315dc847eSJeff Roberson */ 81415dc847eSJeff Roberson if (swap) 81515dc847eSJeff Roberson break; 8165d7ef00cSJeff Roberson swap = kseq->ksq_curr; 8175d7ef00cSJeff Roberson kseq->ksq_curr = kseq->ksq_next; 8185d7ef00cSJeff Roberson kseq->ksq_next = swap; 81915dc847eSJeff Roberson continue; 820a8949de2SJeff Roberson } 82115dc847eSJeff Roberson /* 82215dc847eSJeff Roberson * If we encounter a slice of 0 the kse is in a 82315dc847eSJeff Roberson * TIMESHARE kse group and its nice was too far out 82415dc847eSJeff Roberson * of the range that receives slices. 82515dc847eSJeff Roberson */ 82622bf7d9aSJeff Roberson if (ke->ke_slice == 0) { 82715dc847eSJeff Roberson runq_remove(ke->ke_runq, ke); 82815dc847eSJeff Roberson sched_slice(ke); 82915dc847eSJeff Roberson ke->ke_runq = kseq->ksq_next; 83015dc847eSJeff Roberson runq_add(ke->ke_runq, ke); 83115dc847eSJeff Roberson continue; 83215dc847eSJeff Roberson } 83315dc847eSJeff Roberson return (ke); 83415dc847eSJeff Roberson } 83515dc847eSJeff Roberson 836a8949de2SJeff Roberson return (runq_choose(&kseq->ksq_idle)); 837245f3abfSJeff Roberson } 8380a016a05SJeff Roberson 8390a016a05SJeff Roberson static void 8400a016a05SJeff Roberson kseq_setup(struct kseq *kseq) 8410a016a05SJeff Roberson { 84215dc847eSJeff Roberson runq_init(&kseq->ksq_timeshare[0]); 84315dc847eSJeff Roberson runq_init(&kseq->ksq_timeshare[1]); 844a8949de2SJeff Roberson runq_init(&kseq->ksq_idle); 84515dc847eSJeff Roberson kseq->ksq_curr = &kseq->ksq_timeshare[0]; 84615dc847eSJeff Roberson kseq->ksq_next = &kseq->ksq_timeshare[1]; 8477cd650a9SJeff Roberson kseq->ksq_load = 0; 848ef1134c9SJeff Roberson kseq->ksq_load_timeshare = 0; 8490a016a05SJeff Roberson } 8500a016a05SJeff Roberson 85135e6168fSJeff Roberson static void 85235e6168fSJeff Roberson sched_setup(void *dummy) 85335e6168fSJeff Roberson { 8540ec896fdSJeff Roberson #ifdef SMP 855cac77d04SJeff Roberson int balance_groups; 85635e6168fSJeff Roberson int i; 8570ec896fdSJeff Roberson #endif 85835e6168fSJeff Roberson 859e493a5d9SJeff Roberson slice_min = (hz/100); /* 10ms */ 860e493a5d9SJeff Roberson slice_max = (hz/7); /* ~140ms */ 861e1f89c22SJeff Roberson 862356500a3SJeff Roberson #ifdef SMP 863cac77d04SJeff Roberson balance_groups = 0; 86480f86c9fSJeff Roberson /* 86580f86c9fSJeff Roberson * Initialize the kseqs. 86680f86c9fSJeff Roberson */ 867749d01b0SJeff Roberson for (i = 0; i < MAXCPU; i++) { 86880f86c9fSJeff Roberson struct kseq *ksq; 86980f86c9fSJeff Roberson 87080f86c9fSJeff Roberson ksq = &kseq_cpu[i]; 87180f86c9fSJeff Roberson ksq->ksq_assigned = NULL; 872749d01b0SJeff Roberson kseq_setup(&kseq_cpu[i]); 87380f86c9fSJeff Roberson } 87480f86c9fSJeff Roberson if (smp_topology == NULL) { 87580f86c9fSJeff Roberson struct kseq_group *ksg; 87680f86c9fSJeff Roberson struct kseq *ksq; 87780f86c9fSJeff Roberson 87880f86c9fSJeff Roberson for (i = 0; i < MAXCPU; i++) { 87980f86c9fSJeff Roberson ksq = &kseq_cpu[i]; 88080f86c9fSJeff Roberson ksg = &kseq_groups[i]; 88180f86c9fSJeff Roberson /* 882dc03363dSJeff Roberson * Setup a kseq group with one member. 88380f86c9fSJeff Roberson */ 88480f86c9fSJeff Roberson ksq->ksq_transferable = 0; 88580f86c9fSJeff Roberson ksq->ksq_group = ksg; 88680f86c9fSJeff Roberson ksg->ksg_cpus = 1; 88780f86c9fSJeff Roberson ksg->ksg_idlemask = 0; 88880f86c9fSJeff Roberson ksg->ksg_cpumask = ksg->ksg_mask = 1 << i; 889cac77d04SJeff Roberson ksg->ksg_load = 0; 89080f86c9fSJeff Roberson ksg->ksg_transferable = 0; 89180f86c9fSJeff Roberson LIST_INIT(&ksg->ksg_members); 89280f86c9fSJeff Roberson LIST_INSERT_HEAD(&ksg->ksg_members, ksq, ksq_siblings); 893749d01b0SJeff Roberson } 894749d01b0SJeff Roberson } else { 89580f86c9fSJeff Roberson struct kseq_group *ksg; 89680f86c9fSJeff Roberson struct cpu_group *cg; 897749d01b0SJeff Roberson int j; 898749d01b0SJeff Roberson 899749d01b0SJeff Roberson for (i = 0; i < smp_topology->ct_count; i++) { 900749d01b0SJeff Roberson cg = &smp_topology->ct_group[i]; 90180f86c9fSJeff Roberson ksg = &kseq_groups[i]; 90280f86c9fSJeff Roberson /* 90380f86c9fSJeff Roberson * Initialize the group. 90480f86c9fSJeff Roberson */ 90580f86c9fSJeff Roberson ksg->ksg_idlemask = 0; 906cac77d04SJeff Roberson ksg->ksg_load = 0; 90780f86c9fSJeff Roberson ksg->ksg_transferable = 0; 90880f86c9fSJeff Roberson ksg->ksg_cpus = cg->cg_count; 90980f86c9fSJeff Roberson ksg->ksg_cpumask = cg->cg_mask; 91080f86c9fSJeff Roberson LIST_INIT(&ksg->ksg_members); 91180f86c9fSJeff Roberson /* 91280f86c9fSJeff Roberson * Find all of the group members and add them. 91380f86c9fSJeff Roberson */ 91480f86c9fSJeff Roberson for (j = 0; j < MAXCPU; j++) { 91580f86c9fSJeff Roberson if ((cg->cg_mask & (1 << j)) != 0) { 91680f86c9fSJeff Roberson if (ksg->ksg_mask == 0) 91780f86c9fSJeff Roberson ksg->ksg_mask = 1 << j; 91880f86c9fSJeff Roberson kseq_cpu[j].ksq_transferable = 0; 91980f86c9fSJeff Roberson kseq_cpu[j].ksq_group = ksg; 92080f86c9fSJeff Roberson LIST_INSERT_HEAD(&ksg->ksg_members, 92180f86c9fSJeff Roberson &kseq_cpu[j], ksq_siblings); 92280f86c9fSJeff Roberson } 92380f86c9fSJeff Roberson } 924cac77d04SJeff Roberson if (ksg->ksg_cpus > 1) 925cac77d04SJeff Roberson balance_groups = 1; 926749d01b0SJeff Roberson } 927cac77d04SJeff Roberson ksg_maxid = smp_topology->ct_count - 1; 928749d01b0SJeff Roberson } 929cac77d04SJeff Roberson /* 930cac77d04SJeff Roberson * Stagger the group and global load balancer so they do not 931cac77d04SJeff Roberson * interfere with each other. 932cac77d04SJeff Roberson */ 933dc03363dSJeff Roberson bal_tick = ticks + hz; 934cac77d04SJeff Roberson if (balance_groups) 935dc03363dSJeff Roberson gbal_tick = ticks + (hz / 2); 936749d01b0SJeff Roberson #else 937749d01b0SJeff Roberson kseq_setup(KSEQ_SELF()); 938356500a3SJeff Roberson #endif 939749d01b0SJeff Roberson mtx_lock_spin(&sched_lock); 940155b9987SJeff Roberson kseq_load_add(KSEQ_SELF(), &kse0); 941749d01b0SJeff Roberson mtx_unlock_spin(&sched_lock); 94235e6168fSJeff Roberson } 94335e6168fSJeff Roberson 94435e6168fSJeff Roberson /* 94535e6168fSJeff Roberson * Scale the scheduling priority according to the "interactivity" of this 94635e6168fSJeff Roberson * process. 94735e6168fSJeff Roberson */ 94815dc847eSJeff Roberson static void 94935e6168fSJeff Roberson sched_priority(struct ksegrp *kg) 95035e6168fSJeff Roberson { 95135e6168fSJeff Roberson int pri; 95235e6168fSJeff Roberson 95335e6168fSJeff Roberson if (kg->kg_pri_class != PRI_TIMESHARE) 95415dc847eSJeff Roberson return; 95535e6168fSJeff Roberson 95615dc847eSJeff Roberson pri = SCHED_PRI_INTERACT(sched_interact_score(kg)); 957e1f89c22SJeff Roberson pri += SCHED_PRI_BASE; 958fa885116SJulian Elischer pri += kg->kg_proc->p_nice; 95935e6168fSJeff Roberson 96035e6168fSJeff Roberson if (pri > PRI_MAX_TIMESHARE) 96135e6168fSJeff Roberson pri = PRI_MAX_TIMESHARE; 96235e6168fSJeff Roberson else if (pri < PRI_MIN_TIMESHARE) 96335e6168fSJeff Roberson pri = PRI_MIN_TIMESHARE; 96435e6168fSJeff Roberson 96535e6168fSJeff Roberson kg->kg_user_pri = pri; 96635e6168fSJeff Roberson 96715dc847eSJeff Roberson return; 96835e6168fSJeff Roberson } 96935e6168fSJeff Roberson 97035e6168fSJeff Roberson /* 971245f3abfSJeff Roberson * Calculate a time slice based on the properties of the kseg and the runq 972a8949de2SJeff Roberson * that we're on. This is only for PRI_TIMESHARE ksegrps. 97335e6168fSJeff Roberson */ 974245f3abfSJeff Roberson static void 975245f3abfSJeff Roberson sched_slice(struct kse *ke) 97635e6168fSJeff Roberson { 97715dc847eSJeff Roberson struct kseq *kseq; 978245f3abfSJeff Roberson struct ksegrp *kg; 97935e6168fSJeff Roberson 980245f3abfSJeff Roberson kg = ke->ke_ksegrp; 98115dc847eSJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 98235e6168fSJeff Roberson 983245f3abfSJeff Roberson /* 984245f3abfSJeff Roberson * Rationale: 9852454aaf5SJeff Roberson * KSEs in interactive ksegs get a minimal slice so that we 986245f3abfSJeff Roberson * quickly notice if it abuses its advantage. 987245f3abfSJeff Roberson * 988245f3abfSJeff Roberson * KSEs in non-interactive ksegs are assigned a slice that is 989245f3abfSJeff Roberson * based on the ksegs nice value relative to the least nice kseg 990245f3abfSJeff Roberson * on the run queue for this cpu. 991245f3abfSJeff Roberson * 992245f3abfSJeff Roberson * If the KSE is less nice than all others it gets the maximum 993245f3abfSJeff Roberson * slice and other KSEs will adjust their slice relative to 994245f3abfSJeff Roberson * this when they first expire. 995245f3abfSJeff Roberson * 996245f3abfSJeff Roberson * There is 20 point window that starts relative to the least 997245f3abfSJeff Roberson * nice kse on the run queue. Slice size is determined by 998245f3abfSJeff Roberson * the kse distance from the last nice ksegrp. 999245f3abfSJeff Roberson * 10007d1a81b4SJeff Roberson * If the kse is outside of the window it will get no slice 10017d1a81b4SJeff Roberson * and will be reevaluated each time it is selected on the 10027d1a81b4SJeff Roberson * run queue. The exception to this is nice 0 ksegs when 10037d1a81b4SJeff Roberson * a nice -20 is running. They are always granted a minimum 10047d1a81b4SJeff Roberson * slice. 1005245f3abfSJeff Roberson */ 100615dc847eSJeff Roberson if (!SCHED_INTERACTIVE(kg)) { 1007245f3abfSJeff Roberson int nice; 1008245f3abfSJeff Roberson 1009fa885116SJulian Elischer nice = kg->kg_proc->p_nice + (0 - kseq->ksq_nicemin); 1010ef1134c9SJeff Roberson if (kseq->ksq_load_timeshare == 0 || 1011fa885116SJulian Elischer kg->kg_proc->p_nice < kseq->ksq_nicemin) 1012245f3abfSJeff Roberson ke->ke_slice = SCHED_SLICE_MAX; 10137d1a81b4SJeff Roberson else if (nice <= SCHED_SLICE_NTHRESH) 1014245f3abfSJeff Roberson ke->ke_slice = SCHED_SLICE_NICE(nice); 1015fa885116SJulian Elischer else if (kg->kg_proc->p_nice == 0) 10167d1a81b4SJeff Roberson ke->ke_slice = SCHED_SLICE_MIN; 1017245f3abfSJeff Roberson else 1018245f3abfSJeff Roberson ke->ke_slice = 0; 1019245f3abfSJeff Roberson } else 10209b5f6f62SJeff Roberson ke->ke_slice = SCHED_SLICE_INTERACTIVE; 102135e6168fSJeff Roberson 102215dc847eSJeff Roberson CTR6(KTR_ULE, 102315dc847eSJeff Roberson "Sliced %p(%d) (nice: %d, nicemin: %d, load: %d, interactive: %d)", 1024fa885116SJulian Elischer ke, ke->ke_slice, kg->kg_proc->p_nice, kseq->ksq_nicemin, 1025ef1134c9SJeff Roberson kseq->ksq_load_timeshare, SCHED_INTERACTIVE(kg)); 102615dc847eSJeff Roberson 1027245f3abfSJeff Roberson return; 102835e6168fSJeff Roberson } 102935e6168fSJeff Roberson 1030d322132cSJeff Roberson /* 1031d322132cSJeff Roberson * This routine enforces a maximum limit on the amount of scheduling history 1032d322132cSJeff Roberson * kept. It is called after either the slptime or runtime is adjusted. 1033d322132cSJeff Roberson * This routine will not operate correctly when slp or run times have been 1034d322132cSJeff Roberson * adjusted to more than double their maximum. 1035d322132cSJeff Roberson */ 10364b60e324SJeff Roberson static void 10374b60e324SJeff Roberson sched_interact_update(struct ksegrp *kg) 10384b60e324SJeff Roberson { 1039d322132cSJeff Roberson int sum; 10403f741ca1SJeff Roberson 1041d322132cSJeff Roberson sum = kg->kg_runtime + kg->kg_slptime; 1042d322132cSJeff Roberson if (sum < SCHED_SLP_RUN_MAX) 1043d322132cSJeff Roberson return; 1044d322132cSJeff Roberson /* 1045d322132cSJeff Roberson * If we have exceeded by more than 1/5th then the algorithm below 1046d322132cSJeff Roberson * will not bring us back into range. Dividing by two here forces 10472454aaf5SJeff Roberson * us into the range of [4/5 * SCHED_INTERACT_MAX, SCHED_INTERACT_MAX] 1048d322132cSJeff Roberson */ 104937a35e4aSJeff Roberson if (sum > (SCHED_SLP_RUN_MAX / 5) * 6) { 1050d322132cSJeff Roberson kg->kg_runtime /= 2; 1051d322132cSJeff Roberson kg->kg_slptime /= 2; 1052d322132cSJeff Roberson return; 1053d322132cSJeff Roberson } 1054d322132cSJeff Roberson kg->kg_runtime = (kg->kg_runtime / 5) * 4; 1055d322132cSJeff Roberson kg->kg_slptime = (kg->kg_slptime / 5) * 4; 1056d322132cSJeff Roberson } 1057d322132cSJeff Roberson 1058d322132cSJeff Roberson static void 1059d322132cSJeff Roberson sched_interact_fork(struct ksegrp *kg) 1060d322132cSJeff Roberson { 1061d322132cSJeff Roberson int ratio; 1062d322132cSJeff Roberson int sum; 1063d322132cSJeff Roberson 1064d322132cSJeff Roberson sum = kg->kg_runtime + kg->kg_slptime; 1065d322132cSJeff Roberson if (sum > SCHED_SLP_RUN_FORK) { 1066d322132cSJeff Roberson ratio = sum / SCHED_SLP_RUN_FORK; 1067d322132cSJeff Roberson kg->kg_runtime /= ratio; 1068d322132cSJeff Roberson kg->kg_slptime /= ratio; 10694b60e324SJeff Roberson } 10704b60e324SJeff Roberson } 10714b60e324SJeff Roberson 1072e1f89c22SJeff Roberson static int 1073e1f89c22SJeff Roberson sched_interact_score(struct ksegrp *kg) 1074e1f89c22SJeff Roberson { 1075210491d3SJeff Roberson int div; 1076e1f89c22SJeff Roberson 1077e1f89c22SJeff Roberson if (kg->kg_runtime > kg->kg_slptime) { 1078210491d3SJeff Roberson div = max(1, kg->kg_runtime / SCHED_INTERACT_HALF); 1079210491d3SJeff Roberson return (SCHED_INTERACT_HALF + 1080210491d3SJeff Roberson (SCHED_INTERACT_HALF - (kg->kg_slptime / div))); 1081210491d3SJeff Roberson } if (kg->kg_slptime > kg->kg_runtime) { 1082210491d3SJeff Roberson div = max(1, kg->kg_slptime / SCHED_INTERACT_HALF); 1083210491d3SJeff Roberson return (kg->kg_runtime / div); 1084e1f89c22SJeff Roberson } 1085e1f89c22SJeff Roberson 1086210491d3SJeff Roberson /* 1087210491d3SJeff Roberson * This can happen if slptime and runtime are 0. 1088210491d3SJeff Roberson */ 1089210491d3SJeff Roberson return (0); 1090e1f89c22SJeff Roberson 1091e1f89c22SJeff Roberson } 1092e1f89c22SJeff Roberson 109315dc847eSJeff Roberson /* 109415dc847eSJeff Roberson * This is only somewhat accurate since given many processes of the same 109515dc847eSJeff Roberson * priority they will switch when their slices run out, which will be 109615dc847eSJeff Roberson * at most SCHED_SLICE_MAX. 109715dc847eSJeff Roberson */ 109835e6168fSJeff Roberson int 109935e6168fSJeff Roberson sched_rr_interval(void) 110035e6168fSJeff Roberson { 110135e6168fSJeff Roberson return (SCHED_SLICE_MAX); 110235e6168fSJeff Roberson } 110335e6168fSJeff Roberson 110422bf7d9aSJeff Roberson static void 110535e6168fSJeff Roberson sched_pctcpu_update(struct kse *ke) 110635e6168fSJeff Roberson { 110735e6168fSJeff Roberson /* 110835e6168fSJeff Roberson * Adjust counters and watermark for pctcpu calc. 1109210491d3SJeff Roberson */ 111081de51bfSJeff Roberson if (ke->ke_ltick > ticks - SCHED_CPU_TICKS) { 1111210491d3SJeff Roberson /* 111281de51bfSJeff Roberson * Shift the tick count out so that the divide doesn't 111381de51bfSJeff Roberson * round away our results. 111465c8760dSJeff Roberson */ 111565c8760dSJeff Roberson ke->ke_ticks <<= 10; 111681de51bfSJeff Roberson ke->ke_ticks = (ke->ke_ticks / (ticks - ke->ke_ftick)) * 111735e6168fSJeff Roberson SCHED_CPU_TICKS; 111865c8760dSJeff Roberson ke->ke_ticks >>= 10; 111981de51bfSJeff Roberson } else 112081de51bfSJeff Roberson ke->ke_ticks = 0; 112135e6168fSJeff Roberson ke->ke_ltick = ticks; 112235e6168fSJeff Roberson ke->ke_ftick = ke->ke_ltick - SCHED_CPU_TICKS; 112335e6168fSJeff Roberson } 112435e6168fSJeff Roberson 112535e6168fSJeff Roberson void 112635e6168fSJeff Roberson sched_prio(struct thread *td, u_char prio) 112735e6168fSJeff Roberson { 11283f741ca1SJeff Roberson struct kse *ke; 112935e6168fSJeff Roberson 11303f741ca1SJeff Roberson ke = td->td_kse; 113135e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 113235e6168fSJeff Roberson if (TD_ON_RUNQ(td)) { 11333f741ca1SJeff Roberson /* 11343f741ca1SJeff Roberson * If the priority has been elevated due to priority 11353f741ca1SJeff Roberson * propagation, we may have to move ourselves to a new 11363f741ca1SJeff Roberson * queue. We still call adjustrunqueue below in case kse 11373f741ca1SJeff Roberson * needs to fix things up. 11383f741ca1SJeff Roberson */ 1139769a3635SJeff Roberson if (prio < td->td_priority && ke && 1140769a3635SJeff Roberson (ke->ke_flags & KEF_ASSIGNED) == 0 && 114122bf7d9aSJeff Roberson ke->ke_runq != KSEQ_CPU(ke->ke_cpu)->ksq_curr) { 11423f741ca1SJeff Roberson runq_remove(ke->ke_runq, ke); 11433f741ca1SJeff Roberson ke->ke_runq = KSEQ_CPU(ke->ke_cpu)->ksq_curr; 11443f741ca1SJeff Roberson runq_add(ke->ke_runq, ke); 114535e6168fSJeff Roberson } 11463f741ca1SJeff Roberson adjustrunqueue(td, prio); 11473f741ca1SJeff Roberson } else 11483f741ca1SJeff Roberson td->td_priority = prio; 114935e6168fSJeff Roberson } 115035e6168fSJeff Roberson 115135e6168fSJeff Roberson void 1152bf0acc27SJohn Baldwin sched_switch(struct thread *td, struct thread *newtd) 115335e6168fSJeff Roberson { 115435e6168fSJeff Roberson struct kse *ke; 115535e6168fSJeff Roberson 115635e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 115735e6168fSJeff Roberson 115835e6168fSJeff Roberson ke = td->td_kse; 115935e6168fSJeff Roberson 116035e6168fSJeff Roberson td->td_last_kse = ke; 1161060563ecSJulian Elischer td->td_lastcpu = td->td_oncpu; 1162060563ecSJulian Elischer td->td_oncpu = NOCPU; 116352eb8464SJohn Baldwin td->td_flags &= ~TDF_NEEDRESCHED; 116452eb8464SJohn Baldwin td->td_pflags &= ~TDP_OWEPREEMPT; 116535e6168fSJeff Roberson 1166b11fdad0SJeff Roberson /* 1167b11fdad0SJeff Roberson * If the KSE has been assigned it may be in the process of switching 1168b11fdad0SJeff Roberson * to the new cpu. This is the case in sched_bind(). 1169b11fdad0SJeff Roberson */ 1170b11fdad0SJeff Roberson if ((ke->ke_flags & KEF_ASSIGNED) == 0) { 11712454aaf5SJeff Roberson if (td == PCPU_GET(idlethread)) { 1172bf0acc27SJohn Baldwin TD_SET_CAN_RUN(td); 11732454aaf5SJeff Roberson } else if (TD_IS_RUNNING(td)) { 1174155b9987SJeff Roberson kseq_load_rem(KSEQ_CPU(ke->ke_cpu), ke); 1175ab2baa72SDavid Xu setrunqueue(td); 11760e0f6266SJeff Roberson } else { 117733916c36SJeff Roberson if (ke->ke_runq) { 1178155b9987SJeff Roberson kseq_load_rem(KSEQ_CPU(ke->ke_cpu), ke); 117933916c36SJeff Roberson } else if ((td->td_flags & TDF_IDLETD) == 0) 11802c3490b1SMarcel Moolenaar kdb_backtrace(); 118135e6168fSJeff Roberson /* 118235e6168fSJeff Roberson * We will not be on the run queue. So we must be 118335e6168fSJeff Roberson * sleeping or similar. 118435e6168fSJeff Roberson */ 11850e2a4d3aSDavid Xu if (td->td_proc->p_flag & P_SA) 118635e6168fSJeff Roberson kse_reassign(ke); 11870e0f6266SJeff Roberson } 1188b11fdad0SJeff Roberson } 11892454aaf5SJeff Roberson if (newtd != NULL) { 1190bf0acc27SJohn Baldwin kseq_load_add(KSEQ_SELF(), newtd->td_kse); 11912454aaf5SJeff Roberson ke->ke_cpu = PCPU_GET(cpuid); 11922454aaf5SJeff Roberson ke->ke_runq = KSEQ_SELF()->ksq_curr; 11932454aaf5SJeff Roberson } else 11942454aaf5SJeff Roberson newtd = choosethread(); 1195ae53b483SJeff Roberson if (td != newtd) 1196ae53b483SJeff Roberson cpu_switch(td, newtd); 1197ae53b483SJeff Roberson sched_lock.mtx_lock = (uintptr_t)td; 119835e6168fSJeff Roberson 1199060563ecSJulian Elischer td->td_oncpu = PCPU_GET(cpuid); 120035e6168fSJeff Roberson } 120135e6168fSJeff Roberson 120235e6168fSJeff Roberson void 1203fa885116SJulian Elischer sched_nice(struct proc *p, int nice) 120435e6168fSJeff Roberson { 1205fa885116SJulian Elischer struct ksegrp *kg; 120615dc847eSJeff Roberson struct kse *ke; 120735e6168fSJeff Roberson struct thread *td; 120815dc847eSJeff Roberson struct kseq *kseq; 120935e6168fSJeff Roberson 1210fa885116SJulian Elischer PROC_LOCK_ASSERT(p, MA_OWNED); 12110b5318c8SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 121215dc847eSJeff Roberson /* 121315dc847eSJeff Roberson * We need to adjust the nice counts for running KSEs. 121415dc847eSJeff Roberson */ 1215fa885116SJulian Elischer FOREACH_KSEGRP_IN_PROC(p, kg) { 1216fa885116SJulian Elischer if (kg->kg_pri_class == PRI_TIMESHARE) { 121715dc847eSJeff Roberson FOREACH_KSE_IN_GROUP(kg, ke) { 1218d07ac847SJeff Roberson if (ke->ke_runq == NULL) 121915dc847eSJeff Roberson continue; 122015dc847eSJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 1221fa885116SJulian Elischer kseq_nice_rem(kseq, p->p_nice); 122215dc847eSJeff Roberson kseq_nice_add(kseq, nice); 122315dc847eSJeff Roberson } 1224fa885116SJulian Elischer } 1225fa885116SJulian Elischer } 1226fa885116SJulian Elischer p->p_nice = nice; 1227fa885116SJulian Elischer FOREACH_KSEGRP_IN_PROC(p, kg) { 122835e6168fSJeff Roberson sched_priority(kg); 122915dc847eSJeff Roberson FOREACH_THREAD_IN_GROUP(kg, td) 12304a338afdSJulian Elischer td->td_flags |= TDF_NEEDRESCHED; 123135e6168fSJeff Roberson } 1232fa885116SJulian Elischer } 123335e6168fSJeff Roberson 123435e6168fSJeff Roberson void 123544f3b092SJohn Baldwin sched_sleep(struct thread *td) 123635e6168fSJeff Roberson { 123735e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 123835e6168fSJeff Roberson 123935e6168fSJeff Roberson td->td_slptime = ticks; 124044f3b092SJohn Baldwin td->td_base_pri = td->td_priority; 124135e6168fSJeff Roberson 124215dc847eSJeff Roberson CTR2(KTR_ULE, "sleep kse %p (tick: %d)", 124315dc847eSJeff Roberson td->td_kse, td->td_slptime); 124435e6168fSJeff Roberson } 124535e6168fSJeff Roberson 124635e6168fSJeff Roberson void 124735e6168fSJeff Roberson sched_wakeup(struct thread *td) 124835e6168fSJeff Roberson { 124935e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 125035e6168fSJeff Roberson 125135e6168fSJeff Roberson /* 125235e6168fSJeff Roberson * Let the kseg know how long we slept for. This is because process 125335e6168fSJeff Roberson * interactivity behavior is modeled in the kseg. 125435e6168fSJeff Roberson */ 125535e6168fSJeff Roberson if (td->td_slptime) { 1256f1e8dc4aSJeff Roberson struct ksegrp *kg; 125715dc847eSJeff Roberson int hzticks; 1258f1e8dc4aSJeff Roberson 1259f1e8dc4aSJeff Roberson kg = td->td_ksegrp; 1260d322132cSJeff Roberson hzticks = (ticks - td->td_slptime) << 10; 1261d322132cSJeff Roberson if (hzticks >= SCHED_SLP_RUN_MAX) { 1262d322132cSJeff Roberson kg->kg_slptime = SCHED_SLP_RUN_MAX; 1263d322132cSJeff Roberson kg->kg_runtime = 1; 1264d322132cSJeff Roberson } else { 1265d322132cSJeff Roberson kg->kg_slptime += hzticks; 12664b60e324SJeff Roberson sched_interact_update(kg); 1267d322132cSJeff Roberson } 1268f1e8dc4aSJeff Roberson sched_priority(kg); 12694b60e324SJeff Roberson if (td->td_kse) 12704b60e324SJeff Roberson sched_slice(td->td_kse); 127115dc847eSJeff Roberson CTR2(KTR_ULE, "wakeup kse %p (%d ticks)", 127215dc847eSJeff Roberson td->td_kse, hzticks); 127335e6168fSJeff Roberson td->td_slptime = 0; 1274f1e8dc4aSJeff Roberson } 127535e6168fSJeff Roberson setrunqueue(td); 127635e6168fSJeff Roberson } 127735e6168fSJeff Roberson 127835e6168fSJeff Roberson /* 127935e6168fSJeff Roberson * Penalize the parent for creating a new child and initialize the child's 128035e6168fSJeff Roberson * priority. 128135e6168fSJeff Roberson */ 128235e6168fSJeff Roberson void 128355d44f79SJulian Elischer sched_fork(struct thread *td, struct proc *p1) 128435e6168fSJeff Roberson { 128535e6168fSJeff Roberson 128635e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 128735e6168fSJeff Roberson 128855d44f79SJulian Elischer p1->p_nice = td->td_proc->p_nice; 128955d44f79SJulian Elischer sched_fork_ksegrp(td, FIRST_KSEGRP_IN_PROC(p1)); 129055d44f79SJulian Elischer sched_fork_kse(td, FIRST_KSE_IN_PROC(p1)); 129155d44f79SJulian Elischer sched_fork_thread(td, FIRST_THREAD_IN_PROC(p1)); 129215dc847eSJeff Roberson } 129315dc847eSJeff Roberson 129415dc847eSJeff Roberson void 129555d44f79SJulian Elischer sched_fork_kse(struct thread *td, struct kse *child) 129615dc847eSJeff Roberson { 12972056d0a1SJohn Baldwin 129855d44f79SJulian Elischer struct kse *ke = td->td_kse; 129955d44f79SJulian Elischer 1300210491d3SJeff Roberson child->ke_slice = 1; /* Attempt to quickly learn interactivity. */ 1301093c05e3SJeff Roberson child->ke_cpu = ke->ke_cpu; 130215dc847eSJeff Roberson child->ke_runq = NULL; 130315dc847eSJeff Roberson 1304736c97c7SJeff Roberson /* Grab our parents cpu estimation information. */ 1305736c97c7SJeff Roberson child->ke_ticks = ke->ke_ticks; 1306736c97c7SJeff Roberson child->ke_ltick = ke->ke_ltick; 1307736c97c7SJeff Roberson child->ke_ftick = ke->ke_ftick; 130815dc847eSJeff Roberson } 130915dc847eSJeff Roberson 131015dc847eSJeff Roberson void 131155d44f79SJulian Elischer sched_fork_ksegrp(struct thread *td, struct ksegrp *child) 131215dc847eSJeff Roberson { 131355d44f79SJulian Elischer struct ksegrp *kg = td->td_ksegrp; 13142056d0a1SJohn Baldwin PROC_LOCK_ASSERT(child->kg_proc, MA_OWNED); 1315210491d3SJeff Roberson 1316d322132cSJeff Roberson child->kg_slptime = kg->kg_slptime; 1317d322132cSJeff Roberson child->kg_runtime = kg->kg_runtime; 1318d322132cSJeff Roberson child->kg_user_pri = kg->kg_user_pri; 1319d322132cSJeff Roberson sched_interact_fork(child); 13204b60e324SJeff Roberson kg->kg_runtime += tickincr << 10; 13214b60e324SJeff Roberson sched_interact_update(kg); 132215dc847eSJeff Roberson 1323d322132cSJeff Roberson CTR6(KTR_ULE, "sched_fork_ksegrp: %d(%d, %d) - %d(%d, %d)", 1324d322132cSJeff Roberson kg->kg_proc->p_pid, kg->kg_slptime, kg->kg_runtime, 1325d322132cSJeff Roberson child->kg_proc->p_pid, child->kg_slptime, child->kg_runtime); 1326c9f25d8fSJeff Roberson } 1327c9f25d8fSJeff Roberson 132815dc847eSJeff Roberson void 132915dc847eSJeff Roberson sched_fork_thread(struct thread *td, struct thread *child) 133015dc847eSJeff Roberson { 133115dc847eSJeff Roberson } 133215dc847eSJeff Roberson 133315dc847eSJeff Roberson void 133415dc847eSJeff Roberson sched_class(struct ksegrp *kg, int class) 133515dc847eSJeff Roberson { 133615dc847eSJeff Roberson struct kseq *kseq; 133715dc847eSJeff Roberson struct kse *ke; 1338ef1134c9SJeff Roberson int nclass; 1339ef1134c9SJeff Roberson int oclass; 134015dc847eSJeff Roberson 13412056d0a1SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 134215dc847eSJeff Roberson if (kg->kg_pri_class == class) 134315dc847eSJeff Roberson return; 134415dc847eSJeff Roberson 1345ef1134c9SJeff Roberson nclass = PRI_BASE(class); 1346ef1134c9SJeff Roberson oclass = PRI_BASE(kg->kg_pri_class); 134715dc847eSJeff Roberson FOREACH_KSE_IN_GROUP(kg, ke) { 134815dc847eSJeff Roberson if (ke->ke_state != KES_ONRUNQ && 134915dc847eSJeff Roberson ke->ke_state != KES_THREAD) 135015dc847eSJeff Roberson continue; 135115dc847eSJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 135215dc847eSJeff Roberson 1353ef1134c9SJeff Roberson #ifdef SMP 1354155b9987SJeff Roberson /* 1355155b9987SJeff Roberson * On SMP if we're on the RUNQ we must adjust the transferable 1356155b9987SJeff Roberson * count because could be changing to or from an interrupt 1357155b9987SJeff Roberson * class. 1358155b9987SJeff Roberson */ 1359155b9987SJeff Roberson if (ke->ke_state == KES_ONRUNQ) { 136080f86c9fSJeff Roberson if (KSE_CAN_MIGRATE(ke, oclass)) { 136180f86c9fSJeff Roberson kseq->ksq_transferable--; 136280f86c9fSJeff Roberson kseq->ksq_group->ksg_transferable--; 136380f86c9fSJeff Roberson } 136480f86c9fSJeff Roberson if (KSE_CAN_MIGRATE(ke, nclass)) { 136580f86c9fSJeff Roberson kseq->ksq_transferable++; 136680f86c9fSJeff Roberson kseq->ksq_group->ksg_transferable++; 136780f86c9fSJeff Roberson } 1368155b9987SJeff Roberson } 1369ef1134c9SJeff Roberson #endif 1370155b9987SJeff Roberson if (oclass == PRI_TIMESHARE) { 1371ef1134c9SJeff Roberson kseq->ksq_load_timeshare--; 1372fa885116SJulian Elischer kseq_nice_rem(kseq, kg->kg_proc->p_nice); 1373155b9987SJeff Roberson } 1374155b9987SJeff Roberson if (nclass == PRI_TIMESHARE) { 1375155b9987SJeff Roberson kseq->ksq_load_timeshare++; 1376fa885116SJulian Elischer kseq_nice_add(kseq, kg->kg_proc->p_nice); 137715dc847eSJeff Roberson } 1378155b9987SJeff Roberson } 137915dc847eSJeff Roberson 138015dc847eSJeff Roberson kg->kg_pri_class = class; 138135e6168fSJeff Roberson } 138235e6168fSJeff Roberson 138335e6168fSJeff Roberson /* 138435e6168fSJeff Roberson * Return some of the child's priority and interactivity to the parent. 138535e6168fSJeff Roberson */ 138635e6168fSJeff Roberson void 138755d44f79SJulian Elischer sched_exit(struct proc *p, struct thread *td) 138835e6168fSJeff Roberson { 138935e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 139055d44f79SJulian Elischer sched_exit_kse(FIRST_KSE_IN_PROC(p), td); 139155d44f79SJulian Elischer sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), td); 1392141ad61cSJeff Roberson } 1393141ad61cSJeff Roberson 1394141ad61cSJeff Roberson void 139555d44f79SJulian Elischer sched_exit_kse(struct kse *ke, struct thread *td) 1396141ad61cSJeff Roberson { 139755d44f79SJulian Elischer kseq_load_rem(KSEQ_CPU(td->td_kse->ke_cpu), td->td_kse); 1398141ad61cSJeff Roberson } 1399141ad61cSJeff Roberson 1400141ad61cSJeff Roberson void 140155d44f79SJulian Elischer sched_exit_ksegrp(struct ksegrp *kg, struct thread *td) 1402141ad61cSJeff Roberson { 140355d44f79SJulian Elischer /* kg->kg_slptime += td->td_ksegrp->kg_slptime; */ 140455d44f79SJulian Elischer kg->kg_runtime += td->td_ksegrp->kg_runtime; 14054b60e324SJeff Roberson sched_interact_update(kg); 1406141ad61cSJeff Roberson } 1407141ad61cSJeff Roberson 1408141ad61cSJeff Roberson void 1409141ad61cSJeff Roberson sched_exit_thread(struct thread *td, struct thread *child) 1410141ad61cSJeff Roberson { 141135e6168fSJeff Roberson } 141235e6168fSJeff Roberson 141335e6168fSJeff Roberson void 14147cf90fb3SJeff Roberson sched_clock(struct thread *td) 141535e6168fSJeff Roberson { 141635e6168fSJeff Roberson struct kseq *kseq; 14170a016a05SJeff Roberson struct ksegrp *kg; 14187cf90fb3SJeff Roberson struct kse *ke; 141935e6168fSJeff Roberson 1420dc03363dSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 14212454aaf5SJeff Roberson kseq = KSEQ_SELF(); 1422dc03363dSJeff Roberson #ifdef SMP 1423dc03363dSJeff Roberson if (ticks == bal_tick) 1424dc03363dSJeff Roberson sched_balance(); 1425dc03363dSJeff Roberson if (ticks == gbal_tick) 1426dc03363dSJeff Roberson sched_balance_groups(); 14272454aaf5SJeff Roberson /* 14282454aaf5SJeff Roberson * We could have been assigned a non real-time thread without an 14292454aaf5SJeff Roberson * IPI. 14302454aaf5SJeff Roberson */ 14312454aaf5SJeff Roberson if (kseq->ksq_assigned) 14322454aaf5SJeff Roberson kseq_assign(kseq); /* Potentially sets NEEDRESCHED */ 1433dc03363dSJeff Roberson #endif 143415dc847eSJeff Roberson /* 143515dc847eSJeff Roberson * sched_setup() apparently happens prior to stathz being set. We 143615dc847eSJeff Roberson * need to resolve the timers earlier in the boot so we can avoid 143715dc847eSJeff Roberson * calculating this here. 143815dc847eSJeff Roberson */ 143915dc847eSJeff Roberson if (realstathz == 0) { 144015dc847eSJeff Roberson realstathz = stathz ? stathz : hz; 144115dc847eSJeff Roberson tickincr = hz / realstathz; 144215dc847eSJeff Roberson /* 144315dc847eSJeff Roberson * XXX This does not work for values of stathz that are much 144415dc847eSJeff Roberson * larger than hz. 144515dc847eSJeff Roberson */ 144615dc847eSJeff Roberson if (tickincr == 0) 144715dc847eSJeff Roberson tickincr = 1; 144815dc847eSJeff Roberson } 144935e6168fSJeff Roberson 14507cf90fb3SJeff Roberson ke = td->td_kse; 145115dc847eSJeff Roberson kg = ke->ke_ksegrp; 145235e6168fSJeff Roberson 14530a016a05SJeff Roberson /* Adjust ticks for pctcpu */ 145465c8760dSJeff Roberson ke->ke_ticks++; 1455d465fb95SJeff Roberson ke->ke_ltick = ticks; 1456a8949de2SJeff Roberson 1457d465fb95SJeff Roberson /* Go up to one second beyond our max and then trim back down */ 1458d465fb95SJeff Roberson if (ke->ke_ftick + SCHED_CPU_TICKS + hz < ke->ke_ltick) 1459d465fb95SJeff Roberson sched_pctcpu_update(ke); 1460d465fb95SJeff Roberson 146143fdafb1SJulian Elischer if (td->td_flags & TDF_IDLETD) 146235e6168fSJeff Roberson return; 14630a016a05SJeff Roberson 146415dc847eSJeff Roberson CTR4(KTR_ULE, "Tick kse %p (slice: %d, slptime: %d, runtime: %d)", 146515dc847eSJeff Roberson ke, ke->ke_slice, kg->kg_slptime >> 10, kg->kg_runtime >> 10); 14663f741ca1SJeff Roberson /* 1467a8949de2SJeff Roberson * We only do slicing code for TIMESHARE ksegrps. 1468a8949de2SJeff Roberson */ 1469a8949de2SJeff Roberson if (kg->kg_pri_class != PRI_TIMESHARE) 1470a8949de2SJeff Roberson return; 1471a8949de2SJeff Roberson /* 147215dc847eSJeff Roberson * We used a tick charge it to the ksegrp so that we can compute our 147315dc847eSJeff Roberson * interactivity. 147415dc847eSJeff Roberson */ 147515dc847eSJeff Roberson kg->kg_runtime += tickincr << 10; 14764b60e324SJeff Roberson sched_interact_update(kg); 1477407b0157SJeff Roberson 147835e6168fSJeff Roberson /* 147935e6168fSJeff Roberson * We used up one time slice. 148035e6168fSJeff Roberson */ 1481093c05e3SJeff Roberson if (--ke->ke_slice > 0) 148215dc847eSJeff Roberson return; 148335e6168fSJeff Roberson /* 148415dc847eSJeff Roberson * We're out of time, recompute priorities and requeue. 148535e6168fSJeff Roberson */ 1486155b9987SJeff Roberson kseq_load_rem(kseq, ke); 1487e1f89c22SJeff Roberson sched_priority(kg); 148815dc847eSJeff Roberson sched_slice(ke); 148915dc847eSJeff Roberson if (SCHED_CURR(kg, ke)) 149015dc847eSJeff Roberson ke->ke_runq = kseq->ksq_curr; 149115dc847eSJeff Roberson else 149215dc847eSJeff Roberson ke->ke_runq = kseq->ksq_next; 1493155b9987SJeff Roberson kseq_load_add(kseq, ke); 14944a338afdSJulian Elischer td->td_flags |= TDF_NEEDRESCHED; 149535e6168fSJeff Roberson } 149635e6168fSJeff Roberson 149735e6168fSJeff Roberson int 149835e6168fSJeff Roberson sched_runnable(void) 149935e6168fSJeff Roberson { 150035e6168fSJeff Roberson struct kseq *kseq; 1501b90816f1SJeff Roberson int load; 150235e6168fSJeff Roberson 1503b90816f1SJeff Roberson load = 1; 1504b90816f1SJeff Roberson 15050a016a05SJeff Roberson kseq = KSEQ_SELF(); 150622bf7d9aSJeff Roberson #ifdef SMP 150746f8b265SJeff Roberson if (kseq->ksq_assigned) { 150846f8b265SJeff Roberson mtx_lock_spin(&sched_lock); 150922bf7d9aSJeff Roberson kseq_assign(kseq); 151046f8b265SJeff Roberson mtx_unlock_spin(&sched_lock); 151146f8b265SJeff Roberson } 151222bf7d9aSJeff Roberson #endif 15133f741ca1SJeff Roberson if ((curthread->td_flags & TDF_IDLETD) != 0) { 15143f741ca1SJeff Roberson if (kseq->ksq_load > 0) 15153f741ca1SJeff Roberson goto out; 15163f741ca1SJeff Roberson } else 15173f741ca1SJeff Roberson if (kseq->ksq_load - 1 > 0) 1518b90816f1SJeff Roberson goto out; 1519b90816f1SJeff Roberson load = 0; 1520b90816f1SJeff Roberson out: 1521b90816f1SJeff Roberson return (load); 152235e6168fSJeff Roberson } 152335e6168fSJeff Roberson 152435e6168fSJeff Roberson void 152535e6168fSJeff Roberson sched_userret(struct thread *td) 152635e6168fSJeff Roberson { 152735e6168fSJeff Roberson struct ksegrp *kg; 152835e6168fSJeff Roberson 152935e6168fSJeff Roberson kg = td->td_ksegrp; 153035e6168fSJeff Roberson 153135e6168fSJeff Roberson if (td->td_priority != kg->kg_user_pri) { 153235e6168fSJeff Roberson mtx_lock_spin(&sched_lock); 153335e6168fSJeff Roberson td->td_priority = kg->kg_user_pri; 153435e6168fSJeff Roberson mtx_unlock_spin(&sched_lock); 153535e6168fSJeff Roberson } 153635e6168fSJeff Roberson } 153735e6168fSJeff Roberson 1538c9f25d8fSJeff Roberson struct kse * 1539c9f25d8fSJeff Roberson sched_choose(void) 1540c9f25d8fSJeff Roberson { 15410a016a05SJeff Roberson struct kseq *kseq; 1542c9f25d8fSJeff Roberson struct kse *ke; 154315dc847eSJeff Roberson 1544b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 154522bf7d9aSJeff Roberson kseq = KSEQ_SELF(); 154615dc847eSJeff Roberson #ifdef SMP 154780f86c9fSJeff Roberson restart: 154822bf7d9aSJeff Roberson if (kseq->ksq_assigned) 154922bf7d9aSJeff Roberson kseq_assign(kseq); 155015dc847eSJeff Roberson #endif 155122bf7d9aSJeff Roberson ke = kseq_choose(kseq); 155235e6168fSJeff Roberson if (ke) { 155322bf7d9aSJeff Roberson #ifdef SMP 155422bf7d9aSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_IDLE) 155580f86c9fSJeff Roberson if (kseq_idled(kseq) == 0) 155680f86c9fSJeff Roberson goto restart; 155722bf7d9aSJeff Roberson #endif 1558155b9987SJeff Roberson kseq_runq_rem(kseq, ke); 155935e6168fSJeff Roberson ke->ke_state = KES_THREAD; 1560245f3abfSJeff Roberson 156115dc847eSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) { 156215dc847eSJeff Roberson CTR4(KTR_ULE, "Run kse %p from %p (slice: %d, pri: %d)", 156315dc847eSJeff Roberson ke, ke->ke_runq, ke->ke_slice, 156415dc847eSJeff Roberson ke->ke_thread->td_priority); 1565245f3abfSJeff Roberson } 156615dc847eSJeff Roberson return (ke); 156735e6168fSJeff Roberson } 1568c9f25d8fSJeff Roberson #ifdef SMP 156980f86c9fSJeff Roberson if (kseq_idled(kseq) == 0) 157080f86c9fSJeff Roberson goto restart; 1571c9f25d8fSJeff Roberson #endif 157215dc847eSJeff Roberson return (NULL); 157335e6168fSJeff Roberson } 157435e6168fSJeff Roberson 157535e6168fSJeff Roberson void 15767cf90fb3SJeff Roberson sched_add(struct thread *td) 157735e6168fSJeff Roberson { 157863fcce68SJohn Baldwin 157963fcce68SJohn Baldwin sched_add_internal(td, 1); 158063fcce68SJohn Baldwin } 158163fcce68SJohn Baldwin 158263fcce68SJohn Baldwin static void 158363fcce68SJohn Baldwin sched_add_internal(struct thread *td, int preemptive) 158463fcce68SJohn Baldwin { 1585c9f25d8fSJeff Roberson struct kseq *kseq; 158615dc847eSJeff Roberson struct ksegrp *kg; 15877cf90fb3SJeff Roberson struct kse *ke; 15882454aaf5SJeff Roberson #ifdef SMP 15892454aaf5SJeff Roberson int canmigrate; 15902454aaf5SJeff Roberson #endif 159122bf7d9aSJeff Roberson int class; 1592c9f25d8fSJeff Roberson 159322bf7d9aSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 15947cf90fb3SJeff Roberson ke = td->td_kse; 15957cf90fb3SJeff Roberson kg = td->td_ksegrp; 159622bf7d9aSJeff Roberson if (ke->ke_flags & KEF_ASSIGNED) 159722bf7d9aSJeff Roberson return; 159822bf7d9aSJeff Roberson kseq = KSEQ_SELF(); 1599c494ddc8SJeff Roberson KASSERT((ke->ke_thread != NULL), 1600c494ddc8SJeff Roberson ("sched_add: No thread on KSE")); 16015d7ef00cSJeff Roberson KASSERT((ke->ke_thread->td_kse != NULL), 16025d7ef00cSJeff Roberson ("sched_add: No KSE on thread")); 16035d7ef00cSJeff Roberson KASSERT(ke->ke_state != KES_ONRUNQ, 16045d7ef00cSJeff Roberson ("sched_add: kse %p (%s) already in run queue", ke, 16055d7ef00cSJeff Roberson ke->ke_proc->p_comm)); 16065d7ef00cSJeff Roberson KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 16075d7ef00cSJeff Roberson ("sched_add: process swapped out")); 16089bca28a7SJeff Roberson KASSERT(ke->ke_runq == NULL, 16099bca28a7SJeff Roberson ("sched_add: KSE %p is still assigned to a run queue", ke)); 16105d7ef00cSJeff Roberson 161122bf7d9aSJeff Roberson class = PRI_BASE(kg->kg_pri_class); 161222bf7d9aSJeff Roberson switch (class) { 1613a8949de2SJeff Roberson case PRI_ITHD: 1614a8949de2SJeff Roberson case PRI_REALTIME: 161515dc847eSJeff Roberson ke->ke_runq = kseq->ksq_curr; 161615dc847eSJeff Roberson ke->ke_slice = SCHED_SLICE_MAX; 16177cd650a9SJeff Roberson ke->ke_cpu = PCPU_GET(cpuid); 1618a8949de2SJeff Roberson break; 1619a8949de2SJeff Roberson case PRI_TIMESHARE: 162015dc847eSJeff Roberson if (SCHED_CURR(kg, ke)) 162115dc847eSJeff Roberson ke->ke_runq = kseq->ksq_curr; 162215dc847eSJeff Roberson else 162315dc847eSJeff Roberson ke->ke_runq = kseq->ksq_next; 162415dc847eSJeff Roberson break; 162515dc847eSJeff Roberson case PRI_IDLE: 162615dc847eSJeff Roberson /* 162715dc847eSJeff Roberson * This is for priority prop. 162815dc847eSJeff Roberson */ 16293f741ca1SJeff Roberson if (ke->ke_thread->td_priority < PRI_MIN_IDLE) 163015dc847eSJeff Roberson ke->ke_runq = kseq->ksq_curr; 163115dc847eSJeff Roberson else 163215dc847eSJeff Roberson ke->ke_runq = &kseq->ksq_idle; 163315dc847eSJeff Roberson ke->ke_slice = SCHED_SLICE_MIN; 163415dc847eSJeff Roberson break; 163515dc847eSJeff Roberson default: 1636d322132cSJeff Roberson panic("Unknown pri class."); 1637a8949de2SJeff Roberson break; 1638a6ed4186SJeff Roberson } 163922bf7d9aSJeff Roberson #ifdef SMP 16402454aaf5SJeff Roberson /* 16412454aaf5SJeff Roberson * Don't migrate running threads here. Force the long term balancer 16422454aaf5SJeff Roberson * to do it. 16432454aaf5SJeff Roberson */ 16442454aaf5SJeff Roberson canmigrate = KSE_CAN_MIGRATE(ke, class); 16452454aaf5SJeff Roberson if (TD_IS_RUNNING(td)) 16462454aaf5SJeff Roberson canmigrate = 0; 16472454aaf5SJeff Roberson 16482454aaf5SJeff Roberson /* 16492454aaf5SJeff Roberson * If this thread is pinned or bound, notify the target cpu. 16502454aaf5SJeff Roberson */ 16512454aaf5SJeff Roberson if (!canmigrate && ke->ke_cpu != PCPU_GET(cpuid) ) { 165286e1c22aSJeff Roberson ke->ke_runq = NULL; 165380f86c9fSJeff Roberson kseq_notify(ke, ke->ke_cpu); 165480f86c9fSJeff Roberson return; 165580f86c9fSJeff Roberson } 165622bf7d9aSJeff Roberson /* 1657670c524fSJeff Roberson * If we had been idle, clear our bit in the group and potentially 1658670c524fSJeff Roberson * the global bitmap. If not, see if we should transfer this thread. 165922bf7d9aSJeff Roberson */ 166080f86c9fSJeff Roberson if ((class == PRI_TIMESHARE || class == PRI_REALTIME) && 166180f86c9fSJeff Roberson (kseq->ksq_group->ksg_idlemask & PCPU_GET(cpumask)) != 0) { 166280f86c9fSJeff Roberson /* 166380f86c9fSJeff Roberson * Check to see if our group is unidling, and if so, remove it 166480f86c9fSJeff Roberson * from the global idle mask. 166580f86c9fSJeff Roberson */ 166680f86c9fSJeff Roberson if (kseq->ksq_group->ksg_idlemask == 166780f86c9fSJeff Roberson kseq->ksq_group->ksg_cpumask) 166880f86c9fSJeff Roberson atomic_clear_int(&kseq_idle, kseq->ksq_group->ksg_mask); 166980f86c9fSJeff Roberson /* 167080f86c9fSJeff Roberson * Now remove ourselves from the group specific idle mask. 167180f86c9fSJeff Roberson */ 167280f86c9fSJeff Roberson kseq->ksq_group->ksg_idlemask &= ~PCPU_GET(cpumask); 16732454aaf5SJeff Roberson } else if (kseq->ksq_load > 1 && canmigrate) 1674670c524fSJeff Roberson if (kseq_transfer(kseq, ke, class)) 1675670c524fSJeff Roberson return; 16762454aaf5SJeff Roberson ke->ke_cpu = PCPU_GET(cpuid); 167722bf7d9aSJeff Roberson #endif 16782454aaf5SJeff Roberson /* 16792454aaf5SJeff Roberson * XXX With preemption this is not necessary. 16802454aaf5SJeff Roberson */ 168122bf7d9aSJeff Roberson if (td->td_priority < curthread->td_priority) 168222bf7d9aSJeff Roberson curthread->td_flags |= TDF_NEEDRESCHED; 168363fcce68SJohn Baldwin if (preemptive && maybe_preempt(td)) 16840c0b25aeSJohn Baldwin return; 168535e6168fSJeff Roberson ke->ke_ksegrp->kg_runq_kses++; 168635e6168fSJeff Roberson ke->ke_state = KES_ONRUNQ; 168735e6168fSJeff Roberson 1688155b9987SJeff Roberson kseq_runq_add(kseq, ke); 1689155b9987SJeff Roberson kseq_load_add(kseq, ke); 169035e6168fSJeff Roberson } 169135e6168fSJeff Roberson 169235e6168fSJeff Roberson void 16937cf90fb3SJeff Roberson sched_rem(struct thread *td) 169435e6168fSJeff Roberson { 169515dc847eSJeff Roberson struct kseq *kseq; 16967cf90fb3SJeff Roberson struct kse *ke; 16977cf90fb3SJeff Roberson 16987cf90fb3SJeff Roberson ke = td->td_kse; 169922bf7d9aSJeff Roberson /* 170022bf7d9aSJeff Roberson * It is safe to just return here because sched_rem() is only ever 170122bf7d9aSJeff Roberson * used in places where we're immediately going to add the 170222bf7d9aSJeff Roberson * kse back on again. In that case it'll be added with the correct 170322bf7d9aSJeff Roberson * thread and priority when the caller drops the sched_lock. 170422bf7d9aSJeff Roberson */ 170522bf7d9aSJeff Roberson if (ke->ke_flags & KEF_ASSIGNED) 170622bf7d9aSJeff Roberson return; 170735e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1708c494ddc8SJeff Roberson KASSERT((ke->ke_state == KES_ONRUNQ), 1709c494ddc8SJeff Roberson ("sched_rem: KSE not on run queue")); 171035e6168fSJeff Roberson 171135e6168fSJeff Roberson ke->ke_state = KES_THREAD; 171235e6168fSJeff Roberson ke->ke_ksegrp->kg_runq_kses--; 171315dc847eSJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 1714155b9987SJeff Roberson kseq_runq_rem(kseq, ke); 1715155b9987SJeff Roberson kseq_load_rem(kseq, ke); 171635e6168fSJeff Roberson } 171735e6168fSJeff Roberson 171835e6168fSJeff Roberson fixpt_t 17197cf90fb3SJeff Roberson sched_pctcpu(struct thread *td) 172035e6168fSJeff Roberson { 172135e6168fSJeff Roberson fixpt_t pctcpu; 17227cf90fb3SJeff Roberson struct kse *ke; 172335e6168fSJeff Roberson 172435e6168fSJeff Roberson pctcpu = 0; 17257cf90fb3SJeff Roberson ke = td->td_kse; 1726484288deSJeff Roberson if (ke == NULL) 1727484288deSJeff Roberson return (0); 172835e6168fSJeff Roberson 1729b90816f1SJeff Roberson mtx_lock_spin(&sched_lock); 173035e6168fSJeff Roberson if (ke->ke_ticks) { 173135e6168fSJeff Roberson int rtick; 173235e6168fSJeff Roberson 1733210491d3SJeff Roberson /* 1734210491d3SJeff Roberson * Don't update more frequently than twice a second. Allowing 1735210491d3SJeff Roberson * this causes the cpu usage to decay away too quickly due to 1736210491d3SJeff Roberson * rounding errors. 1737210491d3SJeff Roberson */ 17382e227f04SJeff Roberson if (ke->ke_ftick + SCHED_CPU_TICKS < ke->ke_ltick || 17392e227f04SJeff Roberson ke->ke_ltick < (ticks - (hz / 2))) 174035e6168fSJeff Roberson sched_pctcpu_update(ke); 174135e6168fSJeff Roberson /* How many rtick per second ? */ 1742210491d3SJeff Roberson rtick = min(ke->ke_ticks / SCHED_CPU_TIME, SCHED_CPU_TICKS); 17437121cce5SScott Long pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT; 174435e6168fSJeff Roberson } 174535e6168fSJeff Roberson 174635e6168fSJeff Roberson ke->ke_proc->p_swtime = ke->ke_ltick - ke->ke_ftick; 1747828e7683SJohn Baldwin mtx_unlock_spin(&sched_lock); 174835e6168fSJeff Roberson 174935e6168fSJeff Roberson return (pctcpu); 175035e6168fSJeff Roberson } 175135e6168fSJeff Roberson 17529bacd788SJeff Roberson void 17539bacd788SJeff Roberson sched_bind(struct thread *td, int cpu) 17549bacd788SJeff Roberson { 17559bacd788SJeff Roberson struct kse *ke; 17569bacd788SJeff Roberson 17579bacd788SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 17589bacd788SJeff Roberson ke = td->td_kse; 17599bacd788SJeff Roberson ke->ke_flags |= KEF_BOUND; 176080f86c9fSJeff Roberson #ifdef SMP 176180f86c9fSJeff Roberson if (PCPU_GET(cpuid) == cpu) 17629bacd788SJeff Roberson return; 17639bacd788SJeff Roberson /* sched_rem without the runq_remove */ 17649bacd788SJeff Roberson ke->ke_state = KES_THREAD; 17659bacd788SJeff Roberson ke->ke_ksegrp->kg_runq_kses--; 1766155b9987SJeff Roberson kseq_load_rem(KSEQ_CPU(ke->ke_cpu), ke); 17679bacd788SJeff Roberson kseq_notify(ke, cpu); 17689bacd788SJeff Roberson /* When we return from mi_switch we'll be on the correct cpu. */ 1769279f949eSPoul-Henning Kamp mi_switch(SW_VOL, NULL); 17709bacd788SJeff Roberson #endif 17719bacd788SJeff Roberson } 17729bacd788SJeff Roberson 17739bacd788SJeff Roberson void 17749bacd788SJeff Roberson sched_unbind(struct thread *td) 17759bacd788SJeff Roberson { 17769bacd788SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 17779bacd788SJeff Roberson td->td_kse->ke_flags &= ~KEF_BOUND; 17789bacd788SJeff Roberson } 17799bacd788SJeff Roberson 178035e6168fSJeff Roberson int 178133916c36SJeff Roberson sched_load(void) 178233916c36SJeff Roberson { 178333916c36SJeff Roberson #ifdef SMP 178433916c36SJeff Roberson int total; 178533916c36SJeff Roberson int i; 178633916c36SJeff Roberson 178733916c36SJeff Roberson total = 0; 178833916c36SJeff Roberson for (i = 0; i <= ksg_maxid; i++) 178933916c36SJeff Roberson total += KSEQ_GROUP(i)->ksg_load; 179033916c36SJeff Roberson return (total); 179133916c36SJeff Roberson #else 179233916c36SJeff Roberson return (KSEQ_SELF()->ksq_sysload); 179333916c36SJeff Roberson #endif 179433916c36SJeff Roberson } 179533916c36SJeff Roberson 179633916c36SJeff Roberson int 179735e6168fSJeff Roberson sched_sizeof_kse(void) 179835e6168fSJeff Roberson { 179935e6168fSJeff Roberson return (sizeof(struct kse) + sizeof(struct ke_sched)); 180035e6168fSJeff Roberson } 180135e6168fSJeff Roberson 180235e6168fSJeff Roberson int 180335e6168fSJeff Roberson sched_sizeof_ksegrp(void) 180435e6168fSJeff Roberson { 180535e6168fSJeff Roberson return (sizeof(struct ksegrp) + sizeof(struct kg_sched)); 180635e6168fSJeff Roberson } 180735e6168fSJeff Roberson 180835e6168fSJeff Roberson int 180935e6168fSJeff Roberson sched_sizeof_proc(void) 181035e6168fSJeff Roberson { 181135e6168fSJeff Roberson return (sizeof(struct proc)); 181235e6168fSJeff Roberson } 181335e6168fSJeff Roberson 181435e6168fSJeff Roberson int 181535e6168fSJeff Roberson sched_sizeof_thread(void) 181635e6168fSJeff Roberson { 181735e6168fSJeff Roberson return (sizeof(struct thread) + sizeof(struct td_sched)); 181835e6168fSJeff Roberson } 1819