135e6168fSJeff Roberson /*- 215dc847eSJeff Roberson * Copyright (c) 2002-2003, Jeffrey Roberson <jeff@freebsd.org> 335e6168fSJeff Roberson * All rights reserved. 435e6168fSJeff Roberson * 535e6168fSJeff Roberson * Redistribution and use in source and binary forms, with or without 635e6168fSJeff Roberson * modification, are permitted provided that the following conditions 735e6168fSJeff Roberson * are met: 835e6168fSJeff Roberson * 1. Redistributions of source code must retain the above copyright 935e6168fSJeff Roberson * notice unmodified, this list of conditions, and the following 1035e6168fSJeff Roberson * disclaimer. 1135e6168fSJeff Roberson * 2. Redistributions in binary form must reproduce the above copyright 1235e6168fSJeff Roberson * notice, this list of conditions and the following disclaimer in the 1335e6168fSJeff Roberson * documentation and/or other materials provided with the distribution. 1435e6168fSJeff Roberson * 1535e6168fSJeff Roberson * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 1635e6168fSJeff Roberson * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1735e6168fSJeff Roberson * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 1835e6168fSJeff Roberson * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 1935e6168fSJeff Roberson * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2035e6168fSJeff Roberson * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2135e6168fSJeff Roberson * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2235e6168fSJeff Roberson * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2335e6168fSJeff Roberson * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2435e6168fSJeff Roberson * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2535e6168fSJeff Roberson */ 2635e6168fSJeff Roberson 27677b542eSDavid E. O'Brien #include <sys/cdefs.h> 28677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 29677b542eSDavid E. O'Brien 3035e6168fSJeff Roberson #include <sys/param.h> 3135e6168fSJeff Roberson #include <sys/systm.h> 322c3490b1SMarcel Moolenaar #include <sys/kdb.h> 3335e6168fSJeff Roberson #include <sys/kernel.h> 3435e6168fSJeff Roberson #include <sys/ktr.h> 3535e6168fSJeff Roberson #include <sys/lock.h> 3635e6168fSJeff Roberson #include <sys/mutex.h> 3735e6168fSJeff Roberson #include <sys/proc.h> 38245f3abfSJeff Roberson #include <sys/resource.h> 399bacd788SJeff Roberson #include <sys/resourcevar.h> 4035e6168fSJeff Roberson #include <sys/sched.h> 4135e6168fSJeff Roberson #include <sys/smp.h> 4235e6168fSJeff Roberson #include <sys/sx.h> 4335e6168fSJeff Roberson #include <sys/sysctl.h> 4435e6168fSJeff Roberson #include <sys/sysproto.h> 4535e6168fSJeff Roberson #include <sys/vmmeter.h> 4635e6168fSJeff Roberson #ifdef KTRACE 4735e6168fSJeff Roberson #include <sys/uio.h> 4835e6168fSJeff Roberson #include <sys/ktrace.h> 4935e6168fSJeff Roberson #endif 5035e6168fSJeff Roberson 5135e6168fSJeff Roberson #include <machine/cpu.h> 5222bf7d9aSJeff Roberson #include <machine/smp.h> 5335e6168fSJeff Roberson 5415dc847eSJeff Roberson #define KTR_ULE KTR_NFS 5515dc847eSJeff Roberson 5635e6168fSJeff Roberson /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 5735e6168fSJeff Roberson /* XXX This is bogus compatability crap for ps */ 5835e6168fSJeff Roberson static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 5935e6168fSJeff Roberson SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 6035e6168fSJeff Roberson 6135e6168fSJeff Roberson static void sched_setup(void *dummy); 6235e6168fSJeff Roberson SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 6335e6168fSJeff Roberson 64e038d354SScott Long static SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0, "Scheduler"); 65e1f89c22SJeff Roberson 66e038d354SScott Long SYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "ule", 0, 67e038d354SScott Long "Scheduler name"); 68dc095794SScott Long 6915dc847eSJeff Roberson static int slice_min = 1; 7015dc847eSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, slice_min, CTLFLAG_RW, &slice_min, 0, ""); 7115dc847eSJeff Roberson 72210491d3SJeff Roberson static int slice_max = 10; 7315dc847eSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, slice_max, CTLFLAG_RW, &slice_max, 0, ""); 7415dc847eSJeff Roberson 7515dc847eSJeff Roberson int realstathz; 7615dc847eSJeff Roberson int tickincr = 1; 77783caefbSJeff Roberson 7835e6168fSJeff Roberson /* 7935e6168fSJeff Roberson * These datastructures are allocated within their parent datastructure but 8035e6168fSJeff Roberson * are scheduler specific. 8135e6168fSJeff Roberson */ 8235e6168fSJeff Roberson 8335e6168fSJeff Roberson struct ke_sched { 8435e6168fSJeff Roberson int ske_slice; 8535e6168fSJeff Roberson struct runq *ske_runq; 8635e6168fSJeff Roberson /* The following variables are only used for pctcpu calculation */ 8735e6168fSJeff Roberson int ske_ltick; /* Last tick that we were running on */ 8835e6168fSJeff Roberson int ske_ftick; /* First tick that we were running on */ 8935e6168fSJeff Roberson int ske_ticks; /* Tick count */ 9015dc847eSJeff Roberson /* CPU that we have affinity for. */ 91cd6e33dfSJeff Roberson u_char ske_cpu; 9235e6168fSJeff Roberson }; 9335e6168fSJeff Roberson #define ke_slice ke_sched->ske_slice 9435e6168fSJeff Roberson #define ke_runq ke_sched->ske_runq 9535e6168fSJeff Roberson #define ke_ltick ke_sched->ske_ltick 9635e6168fSJeff Roberson #define ke_ftick ke_sched->ske_ftick 9735e6168fSJeff Roberson #define ke_ticks ke_sched->ske_ticks 98cd6e33dfSJeff Roberson #define ke_cpu ke_sched->ske_cpu 9922bf7d9aSJeff Roberson #define ke_assign ke_procq.tqe_next 10022bf7d9aSJeff Roberson 10122bf7d9aSJeff Roberson #define KEF_ASSIGNED KEF_SCHED0 /* KSE is being migrated. */ 102a70d729bSJeff Roberson #define KEF_BOUND KEF_SCHED1 /* KSE can not migrate. */ 1032454aaf5SJeff Roberson #define KEF_XFERABLE KEF_SCHED2 /* KSE was added as transferable. */ 104f2b74cbfSJeff Roberson #define KEF_HOLD KEF_SCHED3 /* KSE is temporarily bound. */ 10535e6168fSJeff Roberson 10635e6168fSJeff Roberson struct kg_sched { 107407b0157SJeff Roberson int skg_slptime; /* Number of ticks we vol. slept */ 108407b0157SJeff Roberson int skg_runtime; /* Number of ticks we were running */ 10935e6168fSJeff Roberson }; 11035e6168fSJeff Roberson #define kg_slptime kg_sched->skg_slptime 111407b0157SJeff Roberson #define kg_runtime kg_sched->skg_runtime 11235e6168fSJeff Roberson 11335e6168fSJeff Roberson struct td_sched { 11435e6168fSJeff Roberson int std_slptime; 11535e6168fSJeff Roberson }; 11635e6168fSJeff Roberson #define td_slptime td_sched->std_slptime 11735e6168fSJeff Roberson 1185d7ef00cSJeff Roberson struct td_sched td_sched; 11935e6168fSJeff Roberson struct ke_sched ke_sched; 12035e6168fSJeff Roberson struct kg_sched kg_sched; 12135e6168fSJeff Roberson 12235e6168fSJeff Roberson struct ke_sched *kse0_sched = &ke_sched; 12335e6168fSJeff Roberson struct kg_sched *ksegrp0_sched = &kg_sched; 12435e6168fSJeff Roberson struct p_sched *proc0_sched = NULL; 12535e6168fSJeff Roberson struct td_sched *thread0_sched = &td_sched; 12635e6168fSJeff Roberson 12735e6168fSJeff Roberson /* 128665cb285SJeff Roberson * The priority is primarily determined by the interactivity score. Thus, we 129665cb285SJeff Roberson * give lower(better) priorities to kse groups that use less CPU. The nice 130665cb285SJeff Roberson * value is then directly added to this to allow nice to have some effect 131665cb285SJeff Roberson * on latency. 132e1f89c22SJeff Roberson * 133e1f89c22SJeff Roberson * PRI_RANGE: Total priority range for timeshare threads. 134665cb285SJeff Roberson * PRI_NRESV: Number of nice values. 135e1f89c22SJeff Roberson * PRI_BASE: The start of the dynamic range. 13635e6168fSJeff Roberson */ 137407b0157SJeff Roberson #define SCHED_PRI_RANGE (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1) 138a0a931ceSJeff Roberson #define SCHED_PRI_NRESV ((PRIO_MAX - PRIO_MIN) + 1) 139a0a931ceSJeff Roberson #define SCHED_PRI_NHALF (SCHED_PRI_NRESV / 2) 140665cb285SJeff Roberson #define SCHED_PRI_BASE (PRI_MIN_TIMESHARE) 14115dc847eSJeff Roberson #define SCHED_PRI_INTERACT(score) \ 142665cb285SJeff Roberson ((score) * SCHED_PRI_RANGE / SCHED_INTERACT_MAX) 14335e6168fSJeff Roberson 14435e6168fSJeff Roberson /* 145e1f89c22SJeff Roberson * These determine the interactivity of a process. 14635e6168fSJeff Roberson * 147407b0157SJeff Roberson * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate 148407b0157SJeff Roberson * before throttling back. 149d322132cSJeff Roberson * SLP_RUN_FORK: Maximum slp+run time to inherit at fork time. 150210491d3SJeff Roberson * INTERACT_MAX: Maximum interactivity value. Smaller is better. 151e1f89c22SJeff Roberson * INTERACT_THRESH: Threshhold for placement on the current runq. 15235e6168fSJeff Roberson */ 1534c9612c6SJeff Roberson #define SCHED_SLP_RUN_MAX ((hz * 5) << 10) 154d322132cSJeff Roberson #define SCHED_SLP_RUN_FORK ((hz / 2) << 10) 155210491d3SJeff Roberson #define SCHED_INTERACT_MAX (100) 156210491d3SJeff Roberson #define SCHED_INTERACT_HALF (SCHED_INTERACT_MAX / 2) 1574c9612c6SJeff Roberson #define SCHED_INTERACT_THRESH (30) 158e1f89c22SJeff Roberson 15935e6168fSJeff Roberson /* 16035e6168fSJeff Roberson * These parameters and macros determine the size of the time slice that is 16135e6168fSJeff Roberson * granted to each thread. 16235e6168fSJeff Roberson * 16335e6168fSJeff Roberson * SLICE_MIN: Minimum time slice granted, in units of ticks. 16435e6168fSJeff Roberson * SLICE_MAX: Maximum time slice granted. 16535e6168fSJeff Roberson * SLICE_RANGE: Range of available time slices scaled by hz. 166245f3abfSJeff Roberson * SLICE_SCALE: The number slices granted per val in the range of [0, max]. 167245f3abfSJeff Roberson * SLICE_NICE: Determine the amount of slice granted to a scaled nice. 1687d1a81b4SJeff Roberson * SLICE_NTHRESH: The nice cutoff point for slice assignment. 16935e6168fSJeff Roberson */ 17015dc847eSJeff Roberson #define SCHED_SLICE_MIN (slice_min) 17115dc847eSJeff Roberson #define SCHED_SLICE_MAX (slice_max) 1720392e39dSJeff Roberson #define SCHED_SLICE_INTERACTIVE (slice_max) 1737d1a81b4SJeff Roberson #define SCHED_SLICE_NTHRESH (SCHED_PRI_NHALF - 1) 17435e6168fSJeff Roberson #define SCHED_SLICE_RANGE (SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1) 17535e6168fSJeff Roberson #define SCHED_SLICE_SCALE(val, max) (((val) * SCHED_SLICE_RANGE) / (max)) 176245f3abfSJeff Roberson #define SCHED_SLICE_NICE(nice) \ 1777d1a81b4SJeff Roberson (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((nice), SCHED_SLICE_NTHRESH)) 17835e6168fSJeff Roberson 17935e6168fSJeff Roberson /* 18035e6168fSJeff Roberson * This macro determines whether or not the kse belongs on the current or 18135e6168fSJeff Roberson * next run queue. 18235e6168fSJeff Roberson */ 18315dc847eSJeff Roberson #define SCHED_INTERACTIVE(kg) \ 18415dc847eSJeff Roberson (sched_interact_score(kg) < SCHED_INTERACT_THRESH) 185a5f099d0SJeff Roberson #define SCHED_CURR(kg, ke) \ 186b003da79SDavid E. O'Brien (ke->ke_thread->td_priority < kg->kg_user_pri || \ 18708fd6713SJeff Roberson SCHED_INTERACTIVE(kg)) 18835e6168fSJeff Roberson 18935e6168fSJeff Roberson /* 19035e6168fSJeff Roberson * Cpu percentage computation macros and defines. 19135e6168fSJeff Roberson * 19235e6168fSJeff Roberson * SCHED_CPU_TIME: Number of seconds to average the cpu usage across. 19335e6168fSJeff Roberson * SCHED_CPU_TICKS: Number of hz ticks to average the cpu usage across. 19435e6168fSJeff Roberson */ 19535e6168fSJeff Roberson 1965053d272SJeff Roberson #define SCHED_CPU_TIME 10 19735e6168fSJeff Roberson #define SCHED_CPU_TICKS (hz * SCHED_CPU_TIME) 19835e6168fSJeff Roberson 19935e6168fSJeff Roberson /* 20015dc847eSJeff Roberson * kseq - per processor runqs and statistics. 20135e6168fSJeff Roberson */ 20235e6168fSJeff Roberson struct kseq { 203a8949de2SJeff Roberson struct runq ksq_idle; /* Queue of IDLE threads. */ 20415dc847eSJeff Roberson struct runq ksq_timeshare[2]; /* Run queues for !IDLE. */ 20515dc847eSJeff Roberson struct runq *ksq_next; /* Next timeshare queue. */ 20615dc847eSJeff Roberson struct runq *ksq_curr; /* Current queue. */ 207ef1134c9SJeff Roberson int ksq_load_timeshare; /* Load for timeshare. */ 20815dc847eSJeff Roberson int ksq_load; /* Aggregate load. */ 209a0a931ceSJeff Roberson short ksq_nice[SCHED_PRI_NRESV]; /* KSEs in each nice bin. */ 21015dc847eSJeff Roberson short ksq_nicemin; /* Least nice. */ 2115d7ef00cSJeff Roberson #ifdef SMP 21280f86c9fSJeff Roberson int ksq_transferable; 21380f86c9fSJeff Roberson LIST_ENTRY(kseq) ksq_siblings; /* Next in kseq group. */ 21480f86c9fSJeff Roberson struct kseq_group *ksq_group; /* Our processor group. */ 215fa9c9717SJeff Roberson volatile struct kse *ksq_assigned; /* assigned by another CPU. */ 21633916c36SJeff Roberson #else 21733916c36SJeff Roberson int ksq_sysload; /* For loadavg, !ITHD load. */ 2185d7ef00cSJeff Roberson #endif 21935e6168fSJeff Roberson }; 22035e6168fSJeff Roberson 22180f86c9fSJeff Roberson #ifdef SMP 22280f86c9fSJeff Roberson /* 22380f86c9fSJeff Roberson * kseq groups are groups of processors which can cheaply share threads. When 22480f86c9fSJeff Roberson * one processor in the group goes idle it will check the runqs of the other 22580f86c9fSJeff Roberson * processors in its group prior to halting and waiting for an interrupt. 22680f86c9fSJeff Roberson * These groups are suitable for SMT (Symetric Multi-Threading) and not NUMA. 22780f86c9fSJeff Roberson * In a numa environment we'd want an idle bitmap per group and a two tiered 22880f86c9fSJeff Roberson * load balancer. 22980f86c9fSJeff Roberson */ 23080f86c9fSJeff Roberson struct kseq_group { 23180f86c9fSJeff Roberson int ksg_cpus; /* Count of CPUs in this kseq group. */ 232b2ae7ed7SMarcel Moolenaar cpumask_t ksg_cpumask; /* Mask of cpus in this group. */ 233b2ae7ed7SMarcel Moolenaar cpumask_t ksg_idlemask; /* Idle cpus in this group. */ 234b2ae7ed7SMarcel Moolenaar cpumask_t ksg_mask; /* Bit mask for first cpu. */ 235cac77d04SJeff Roberson int ksg_load; /* Total load of this group. */ 23680f86c9fSJeff Roberson int ksg_transferable; /* Transferable load of this group. */ 23780f86c9fSJeff Roberson LIST_HEAD(, kseq) ksg_members; /* Linked list of all members. */ 23880f86c9fSJeff Roberson }; 23980f86c9fSJeff Roberson #endif 24080f86c9fSJeff Roberson 24135e6168fSJeff Roberson /* 24235e6168fSJeff Roberson * One kse queue per processor. 24335e6168fSJeff Roberson */ 2440a016a05SJeff Roberson #ifdef SMP 245b2ae7ed7SMarcel Moolenaar static cpumask_t kseq_idle; 246cac77d04SJeff Roberson static int ksg_maxid; 24722bf7d9aSJeff Roberson static struct kseq kseq_cpu[MAXCPU]; 24880f86c9fSJeff Roberson static struct kseq_group kseq_groups[MAXCPU]; 249dc03363dSJeff Roberson static int bal_tick; 250dc03363dSJeff Roberson static int gbal_tick; 251dc03363dSJeff Roberson 25280f86c9fSJeff Roberson #define KSEQ_SELF() (&kseq_cpu[PCPU_GET(cpuid)]) 25380f86c9fSJeff Roberson #define KSEQ_CPU(x) (&kseq_cpu[(x)]) 254cac77d04SJeff Roberson #define KSEQ_ID(x) ((x) - kseq_cpu) 255cac77d04SJeff Roberson #define KSEQ_GROUP(x) (&kseq_groups[(x)]) 25680f86c9fSJeff Roberson #else /* !SMP */ 25722bf7d9aSJeff Roberson static struct kseq kseq_cpu; 258dc03363dSJeff Roberson 2590a016a05SJeff Roberson #define KSEQ_SELF() (&kseq_cpu) 2600a016a05SJeff Roberson #define KSEQ_CPU(x) (&kseq_cpu) 2610a016a05SJeff Roberson #endif 26235e6168fSJeff Roberson 26363fcce68SJohn Baldwin static void sched_add_internal(struct thread *td, int preemptive); 264245f3abfSJeff Roberson static void sched_slice(struct kse *ke); 26515dc847eSJeff Roberson static void sched_priority(struct ksegrp *kg); 266e1f89c22SJeff Roberson static int sched_interact_score(struct ksegrp *kg); 2674b60e324SJeff Roberson static void sched_interact_update(struct ksegrp *kg); 268d322132cSJeff Roberson static void sched_interact_fork(struct ksegrp *kg); 26922bf7d9aSJeff Roberson static void sched_pctcpu_update(struct kse *ke); 27035e6168fSJeff Roberson 2715d7ef00cSJeff Roberson /* Operations on per processor queues */ 27222bf7d9aSJeff Roberson static struct kse * kseq_choose(struct kseq *kseq); 2730a016a05SJeff Roberson static void kseq_setup(struct kseq *kseq); 274155b9987SJeff Roberson static void kseq_load_add(struct kseq *kseq, struct kse *ke); 275155b9987SJeff Roberson static void kseq_load_rem(struct kseq *kseq, struct kse *ke); 276155b9987SJeff Roberson static __inline void kseq_runq_add(struct kseq *kseq, struct kse *ke); 277155b9987SJeff Roberson static __inline void kseq_runq_rem(struct kseq *kseq, struct kse *ke); 27815dc847eSJeff Roberson static void kseq_nice_add(struct kseq *kseq, int nice); 27915dc847eSJeff Roberson static void kseq_nice_rem(struct kseq *kseq, int nice); 2807cd650a9SJeff Roberson void kseq_print(int cpu); 2815d7ef00cSJeff Roberson #ifdef SMP 28280f86c9fSJeff Roberson static int kseq_transfer(struct kseq *ksq, struct kse *ke, int class); 28322bf7d9aSJeff Roberson static struct kse *runq_steal(struct runq *rq); 284dc03363dSJeff Roberson static void sched_balance(void); 285dc03363dSJeff Roberson static void sched_balance_groups(void); 286cac77d04SJeff Roberson static void sched_balance_group(struct kseq_group *ksg); 287cac77d04SJeff Roberson static void sched_balance_pair(struct kseq *high, struct kseq *low); 28822bf7d9aSJeff Roberson static void kseq_move(struct kseq *from, int cpu); 28980f86c9fSJeff Roberson static int kseq_idled(struct kseq *kseq); 29022bf7d9aSJeff Roberson static void kseq_notify(struct kse *ke, int cpu); 29122bf7d9aSJeff Roberson static void kseq_assign(struct kseq *); 29280f86c9fSJeff Roberson static struct kse *kseq_steal(struct kseq *kseq, int stealidle); 293e7a976f4SJeff Roberson /* 294e7a976f4SJeff Roberson * On P4 Xeons the round-robin interrupt delivery is broken. As a result of 295e7a976f4SJeff Roberson * this, we can't pin interrupts to the cpu that they were delivered to, 296e7a976f4SJeff Roberson * otherwise all ithreads only run on CPU 0. 297e7a976f4SJeff Roberson */ 298e7a976f4SJeff Roberson #ifdef __i386__ 299e7a976f4SJeff Roberson #define KSE_CAN_MIGRATE(ke, class) \ 300e7a976f4SJeff Roberson ((ke)->ke_thread->td_pinned == 0 && ((ke)->ke_flags & KEF_BOUND) == 0) 301e7a976f4SJeff Roberson #else /* !__i386__ */ 3029bacd788SJeff Roberson #define KSE_CAN_MIGRATE(ke, class) \ 303a70d729bSJeff Roberson ((class) != PRI_ITHD && (ke)->ke_thread->td_pinned == 0 && \ 304f28b3340SJeff Roberson ((ke)->ke_flags & KEF_BOUND) == 0) 305e7a976f4SJeff Roberson #endif /* !__i386__ */ 3065d7ef00cSJeff Roberson #endif 3075d7ef00cSJeff Roberson 30815dc847eSJeff Roberson void 3097cd650a9SJeff Roberson kseq_print(int cpu) 31015dc847eSJeff Roberson { 3117cd650a9SJeff Roberson struct kseq *kseq; 31215dc847eSJeff Roberson int i; 31315dc847eSJeff Roberson 3147cd650a9SJeff Roberson kseq = KSEQ_CPU(cpu); 31515dc847eSJeff Roberson 31615dc847eSJeff Roberson printf("kseq:\n"); 31715dc847eSJeff Roberson printf("\tload: %d\n", kseq->ksq_load); 318155b9987SJeff Roberson printf("\tload TIMESHARE: %d\n", kseq->ksq_load_timeshare); 319ef1134c9SJeff Roberson #ifdef SMP 32080f86c9fSJeff Roberson printf("\tload transferable: %d\n", kseq->ksq_transferable); 321ef1134c9SJeff Roberson #endif 32215dc847eSJeff Roberson printf("\tnicemin:\t%d\n", kseq->ksq_nicemin); 32315dc847eSJeff Roberson printf("\tnice counts:\n"); 324a0a931ceSJeff Roberson for (i = 0; i < SCHED_PRI_NRESV; i++) 32515dc847eSJeff Roberson if (kseq->ksq_nice[i]) 32615dc847eSJeff Roberson printf("\t\t%d = %d\n", 32715dc847eSJeff Roberson i - SCHED_PRI_NHALF, kseq->ksq_nice[i]); 32815dc847eSJeff Roberson } 32915dc847eSJeff Roberson 330155b9987SJeff Roberson static __inline void 331155b9987SJeff Roberson kseq_runq_add(struct kseq *kseq, struct kse *ke) 332155b9987SJeff Roberson { 333155b9987SJeff Roberson #ifdef SMP 33480f86c9fSJeff Roberson if (KSE_CAN_MIGRATE(ke, PRI_BASE(ke->ke_ksegrp->kg_pri_class))) { 33580f86c9fSJeff Roberson kseq->ksq_transferable++; 33680f86c9fSJeff Roberson kseq->ksq_group->ksg_transferable++; 3372454aaf5SJeff Roberson ke->ke_flags |= KEF_XFERABLE; 33880f86c9fSJeff Roberson } 339155b9987SJeff Roberson #endif 340155b9987SJeff Roberson runq_add(ke->ke_runq, ke); 341155b9987SJeff Roberson } 342155b9987SJeff Roberson 343155b9987SJeff Roberson static __inline void 344155b9987SJeff Roberson kseq_runq_rem(struct kseq *kseq, struct kse *ke) 345155b9987SJeff Roberson { 346155b9987SJeff Roberson #ifdef SMP 3472454aaf5SJeff Roberson if (ke->ke_flags & KEF_XFERABLE) { 34880f86c9fSJeff Roberson kseq->ksq_transferable--; 34980f86c9fSJeff Roberson kseq->ksq_group->ksg_transferable--; 3502454aaf5SJeff Roberson ke->ke_flags &= ~KEF_XFERABLE; 35180f86c9fSJeff Roberson } 352155b9987SJeff Roberson #endif 353155b9987SJeff Roberson runq_remove(ke->ke_runq, ke); 354155b9987SJeff Roberson } 355155b9987SJeff Roberson 356a8949de2SJeff Roberson static void 357155b9987SJeff Roberson kseq_load_add(struct kseq *kseq, struct kse *ke) 3585d7ef00cSJeff Roberson { 359ef1134c9SJeff Roberson int class; 360b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 361ef1134c9SJeff Roberson class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); 362ef1134c9SJeff Roberson if (class == PRI_TIMESHARE) 363ef1134c9SJeff Roberson kseq->ksq_load_timeshare++; 36415dc847eSJeff Roberson kseq->ksq_load++; 365207a6c0dSDavid E. O'Brien if (class != PRI_ITHD && (ke->ke_proc->p_flag & P_NOLOAD) == 0) 36633916c36SJeff Roberson #ifdef SMP 367cac77d04SJeff Roberson kseq->ksq_group->ksg_load++; 36833916c36SJeff Roberson #else 36933916c36SJeff Roberson kseq->ksq_sysload++; 370cac77d04SJeff Roberson #endif 37115dc847eSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 372155b9987SJeff Roberson CTR6(KTR_ULE, 373155b9987SJeff Roberson "Add kse %p to %p (slice: %d, pri: %d, nice: %d(%d))", 37415dc847eSJeff Roberson ke, ke->ke_runq, ke->ke_slice, ke->ke_thread->td_priority, 375fa885116SJulian Elischer ke->ke_proc->p_nice, kseq->ksq_nicemin); 37615dc847eSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 377fa885116SJulian Elischer kseq_nice_add(kseq, ke->ke_proc->p_nice); 3785d7ef00cSJeff Roberson } 37915dc847eSJeff Roberson 380a8949de2SJeff Roberson static void 381155b9987SJeff Roberson kseq_load_rem(struct kseq *kseq, struct kse *ke) 3825d7ef00cSJeff Roberson { 383ef1134c9SJeff Roberson int class; 384b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 385ef1134c9SJeff Roberson class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); 386ef1134c9SJeff Roberson if (class == PRI_TIMESHARE) 387ef1134c9SJeff Roberson kseq->ksq_load_timeshare--; 388207a6c0dSDavid E. O'Brien if (class != PRI_ITHD && (ke->ke_proc->p_flag & P_NOLOAD) == 0) 38933916c36SJeff Roberson #ifdef SMP 390cac77d04SJeff Roberson kseq->ksq_group->ksg_load--; 39133916c36SJeff Roberson #else 39233916c36SJeff Roberson kseq->ksq_sysload--; 393cac77d04SJeff Roberson #endif 39415dc847eSJeff Roberson kseq->ksq_load--; 39515dc847eSJeff Roberson ke->ke_runq = NULL; 39615dc847eSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 397fa885116SJulian Elischer kseq_nice_rem(kseq, ke->ke_proc->p_nice); 3985d7ef00cSJeff Roberson } 3995d7ef00cSJeff Roberson 40015dc847eSJeff Roberson static void 40115dc847eSJeff Roberson kseq_nice_add(struct kseq *kseq, int nice) 40215dc847eSJeff Roberson { 403b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 40415dc847eSJeff Roberson /* Normalize to zero. */ 40515dc847eSJeff Roberson kseq->ksq_nice[nice + SCHED_PRI_NHALF]++; 406ef1134c9SJeff Roberson if (nice < kseq->ksq_nicemin || kseq->ksq_load_timeshare == 1) 40715dc847eSJeff Roberson kseq->ksq_nicemin = nice; 40815dc847eSJeff Roberson } 40915dc847eSJeff Roberson 41015dc847eSJeff Roberson static void 41115dc847eSJeff Roberson kseq_nice_rem(struct kseq *kseq, int nice) 41215dc847eSJeff Roberson { 41315dc847eSJeff Roberson int n; 41415dc847eSJeff Roberson 415b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 41615dc847eSJeff Roberson /* Normalize to zero. */ 41715dc847eSJeff Roberson n = nice + SCHED_PRI_NHALF; 41815dc847eSJeff Roberson kseq->ksq_nice[n]--; 41915dc847eSJeff Roberson KASSERT(kseq->ksq_nice[n] >= 0, ("Negative nice count.")); 42015dc847eSJeff Roberson 42115dc847eSJeff Roberson /* 42215dc847eSJeff Roberson * If this wasn't the smallest nice value or there are more in 42315dc847eSJeff Roberson * this bucket we can just return. Otherwise we have to recalculate 42415dc847eSJeff Roberson * the smallest nice. 42515dc847eSJeff Roberson */ 42615dc847eSJeff Roberson if (nice != kseq->ksq_nicemin || 42715dc847eSJeff Roberson kseq->ksq_nice[n] != 0 || 428ef1134c9SJeff Roberson kseq->ksq_load_timeshare == 0) 42915dc847eSJeff Roberson return; 43015dc847eSJeff Roberson 431a0a931ceSJeff Roberson for (; n < SCHED_PRI_NRESV; n++) 43215dc847eSJeff Roberson if (kseq->ksq_nice[n]) { 43315dc847eSJeff Roberson kseq->ksq_nicemin = n - SCHED_PRI_NHALF; 43415dc847eSJeff Roberson return; 43515dc847eSJeff Roberson } 43615dc847eSJeff Roberson } 43715dc847eSJeff Roberson 4385d7ef00cSJeff Roberson #ifdef SMP 439356500a3SJeff Roberson /* 440155b9987SJeff Roberson * sched_balance is a simple CPU load balancing algorithm. It operates by 441356500a3SJeff Roberson * finding the least loaded and most loaded cpu and equalizing their load 442356500a3SJeff Roberson * by migrating some processes. 443356500a3SJeff Roberson * 444356500a3SJeff Roberson * Dealing only with two CPUs at a time has two advantages. Firstly, most 445356500a3SJeff Roberson * installations will only have 2 cpus. Secondly, load balancing too much at 446356500a3SJeff Roberson * once can have an unpleasant effect on the system. The scheduler rarely has 447356500a3SJeff Roberson * enough information to make perfect decisions. So this algorithm chooses 448356500a3SJeff Roberson * algorithm simplicity and more gradual effects on load in larger systems. 449356500a3SJeff Roberson * 450356500a3SJeff Roberson * It could be improved by considering the priorities and slices assigned to 451356500a3SJeff Roberson * each task prior to balancing them. There are many pathological cases with 452356500a3SJeff Roberson * any approach and so the semi random algorithm below may work as well as any. 453356500a3SJeff Roberson * 454356500a3SJeff Roberson */ 45522bf7d9aSJeff Roberson static void 456dc03363dSJeff Roberson sched_balance(void) 457356500a3SJeff Roberson { 458cac77d04SJeff Roberson struct kseq_group *high; 459cac77d04SJeff Roberson struct kseq_group *low; 460cac77d04SJeff Roberson struct kseq_group *ksg; 461cac77d04SJeff Roberson int cnt; 462356500a3SJeff Roberson int i; 463356500a3SJeff Roberson 46486f8ae96SJeff Roberson if (smp_started == 0) 46586f8ae96SJeff Roberson goto out; 466cac77d04SJeff Roberson low = high = NULL; 467cac77d04SJeff Roberson i = random() % (ksg_maxid + 1); 468cac77d04SJeff Roberson for (cnt = 0; cnt <= ksg_maxid; cnt++) { 469cac77d04SJeff Roberson ksg = KSEQ_GROUP(i); 470cac77d04SJeff Roberson /* 471cac77d04SJeff Roberson * Find the CPU with the highest load that has some 472cac77d04SJeff Roberson * threads to transfer. 473cac77d04SJeff Roberson */ 474cac77d04SJeff Roberson if ((high == NULL || ksg->ksg_load > high->ksg_load) 475cac77d04SJeff Roberson && ksg->ksg_transferable) 476cac77d04SJeff Roberson high = ksg; 477cac77d04SJeff Roberson if (low == NULL || ksg->ksg_load < low->ksg_load) 478cac77d04SJeff Roberson low = ksg; 479cac77d04SJeff Roberson if (++i > ksg_maxid) 480cac77d04SJeff Roberson i = 0; 481cac77d04SJeff Roberson } 482cac77d04SJeff Roberson if (low != NULL && high != NULL && high != low) 483cac77d04SJeff Roberson sched_balance_pair(LIST_FIRST(&high->ksg_members), 484cac77d04SJeff Roberson LIST_FIRST(&low->ksg_members)); 485cac77d04SJeff Roberson out: 486dc03363dSJeff Roberson bal_tick = ticks + (random() % (hz * 2)); 487cac77d04SJeff Roberson } 48886f8ae96SJeff Roberson 489cac77d04SJeff Roberson static void 490dc03363dSJeff Roberson sched_balance_groups(void) 491cac77d04SJeff Roberson { 492cac77d04SJeff Roberson int i; 493cac77d04SJeff Roberson 494dc03363dSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 495cac77d04SJeff Roberson if (smp_started) 496cac77d04SJeff Roberson for (i = 0; i <= ksg_maxid; i++) 497cac77d04SJeff Roberson sched_balance_group(KSEQ_GROUP(i)); 498dc03363dSJeff Roberson gbal_tick = ticks + (random() % (hz * 2)); 499356500a3SJeff Roberson } 500cac77d04SJeff Roberson 501cac77d04SJeff Roberson static void 502cac77d04SJeff Roberson sched_balance_group(struct kseq_group *ksg) 503cac77d04SJeff Roberson { 504cac77d04SJeff Roberson struct kseq *kseq; 505cac77d04SJeff Roberson struct kseq *high; 506cac77d04SJeff Roberson struct kseq *low; 507cac77d04SJeff Roberson int load; 508cac77d04SJeff Roberson 509cac77d04SJeff Roberson if (ksg->ksg_transferable == 0) 510cac77d04SJeff Roberson return; 511cac77d04SJeff Roberson low = NULL; 512cac77d04SJeff Roberson high = NULL; 513cac77d04SJeff Roberson LIST_FOREACH(kseq, &ksg->ksg_members, ksq_siblings) { 514cac77d04SJeff Roberson load = kseq->ksq_load; 515cac77d04SJeff Roberson if (high == NULL || load > high->ksq_load) 516cac77d04SJeff Roberson high = kseq; 517cac77d04SJeff Roberson if (low == NULL || load < low->ksq_load) 518cac77d04SJeff Roberson low = kseq; 519356500a3SJeff Roberson } 520cac77d04SJeff Roberson if (high != NULL && low != NULL && high != low) 521cac77d04SJeff Roberson sched_balance_pair(high, low); 522356500a3SJeff Roberson } 523cac77d04SJeff Roberson 524cac77d04SJeff Roberson static void 525cac77d04SJeff Roberson sched_balance_pair(struct kseq *high, struct kseq *low) 526cac77d04SJeff Roberson { 527cac77d04SJeff Roberson int transferable; 528cac77d04SJeff Roberson int high_load; 529cac77d04SJeff Roberson int low_load; 530cac77d04SJeff Roberson int move; 531cac77d04SJeff Roberson int diff; 532cac77d04SJeff Roberson int i; 533cac77d04SJeff Roberson 53480f86c9fSJeff Roberson /* 53580f86c9fSJeff Roberson * If we're transfering within a group we have to use this specific 53680f86c9fSJeff Roberson * kseq's transferable count, otherwise we can steal from other members 53780f86c9fSJeff Roberson * of the group. 53880f86c9fSJeff Roberson */ 539cac77d04SJeff Roberson if (high->ksq_group == low->ksq_group) { 540cac77d04SJeff Roberson transferable = high->ksq_transferable; 541cac77d04SJeff Roberson high_load = high->ksq_load; 542cac77d04SJeff Roberson low_load = low->ksq_load; 543cac77d04SJeff Roberson } else { 544cac77d04SJeff Roberson transferable = high->ksq_group->ksg_transferable; 545cac77d04SJeff Roberson high_load = high->ksq_group->ksg_load; 546cac77d04SJeff Roberson low_load = low->ksq_group->ksg_load; 547cac77d04SJeff Roberson } 54880f86c9fSJeff Roberson if (transferable == 0) 549cac77d04SJeff Roberson return; 550155b9987SJeff Roberson /* 551155b9987SJeff Roberson * Determine what the imbalance is and then adjust that to how many 55280f86c9fSJeff Roberson * kses we actually have to give up (transferable). 553155b9987SJeff Roberson */ 554cac77d04SJeff Roberson diff = high_load - low_load; 555356500a3SJeff Roberson move = diff / 2; 556356500a3SJeff Roberson if (diff & 0x1) 557356500a3SJeff Roberson move++; 55880f86c9fSJeff Roberson move = min(move, transferable); 559356500a3SJeff Roberson for (i = 0; i < move; i++) 560cac77d04SJeff Roberson kseq_move(high, KSEQ_ID(low)); 561356500a3SJeff Roberson return; 562356500a3SJeff Roberson } 563356500a3SJeff Roberson 56422bf7d9aSJeff Roberson static void 565356500a3SJeff Roberson kseq_move(struct kseq *from, int cpu) 566356500a3SJeff Roberson { 56780f86c9fSJeff Roberson struct kseq *kseq; 56880f86c9fSJeff Roberson struct kseq *to; 569356500a3SJeff Roberson struct kse *ke; 570356500a3SJeff Roberson 57180f86c9fSJeff Roberson kseq = from; 57280f86c9fSJeff Roberson to = KSEQ_CPU(cpu); 57380f86c9fSJeff Roberson ke = kseq_steal(kseq, 1); 57480f86c9fSJeff Roberson if (ke == NULL) { 57580f86c9fSJeff Roberson struct kseq_group *ksg; 57680f86c9fSJeff Roberson 57780f86c9fSJeff Roberson ksg = kseq->ksq_group; 57880f86c9fSJeff Roberson LIST_FOREACH(kseq, &ksg->ksg_members, ksq_siblings) { 57980f86c9fSJeff Roberson if (kseq == from || kseq->ksq_transferable == 0) 58080f86c9fSJeff Roberson continue; 58180f86c9fSJeff Roberson ke = kseq_steal(kseq, 1); 58280f86c9fSJeff Roberson break; 58380f86c9fSJeff Roberson } 58480f86c9fSJeff Roberson if (ke == NULL) 58580f86c9fSJeff Roberson panic("kseq_move: No KSEs available with a " 58680f86c9fSJeff Roberson "transferable count of %d\n", 58780f86c9fSJeff Roberson ksg->ksg_transferable); 58880f86c9fSJeff Roberson } 58980f86c9fSJeff Roberson if (kseq == to) 59080f86c9fSJeff Roberson return; 591356500a3SJeff Roberson ke->ke_state = KES_THREAD; 59280f86c9fSJeff Roberson kseq_runq_rem(kseq, ke); 59380f86c9fSJeff Roberson kseq_load_rem(kseq, ke); 594112b6d3aSJeff Roberson kseq_notify(ke, cpu); 595356500a3SJeff Roberson } 59622bf7d9aSJeff Roberson 59780f86c9fSJeff Roberson static int 59880f86c9fSJeff Roberson kseq_idled(struct kseq *kseq) 59922bf7d9aSJeff Roberson { 60080f86c9fSJeff Roberson struct kseq_group *ksg; 60180f86c9fSJeff Roberson struct kseq *steal; 60280f86c9fSJeff Roberson struct kse *ke; 60380f86c9fSJeff Roberson 60480f86c9fSJeff Roberson ksg = kseq->ksq_group; 60580f86c9fSJeff Roberson /* 60680f86c9fSJeff Roberson * If we're in a cpu group, try and steal kses from another cpu in 60780f86c9fSJeff Roberson * the group before idling. 60880f86c9fSJeff Roberson */ 60980f86c9fSJeff Roberson if (ksg->ksg_cpus > 1 && ksg->ksg_transferable) { 61080f86c9fSJeff Roberson LIST_FOREACH(steal, &ksg->ksg_members, ksq_siblings) { 61180f86c9fSJeff Roberson if (steal == kseq || steal->ksq_transferable == 0) 61280f86c9fSJeff Roberson continue; 61380f86c9fSJeff Roberson ke = kseq_steal(steal, 0); 61480f86c9fSJeff Roberson if (ke == NULL) 61580f86c9fSJeff Roberson continue; 61680f86c9fSJeff Roberson ke->ke_state = KES_THREAD; 61780f86c9fSJeff Roberson kseq_runq_rem(steal, ke); 61880f86c9fSJeff Roberson kseq_load_rem(steal, ke); 61980f86c9fSJeff Roberson ke->ke_cpu = PCPU_GET(cpuid); 62063fcce68SJohn Baldwin sched_add_internal(ke->ke_thread, 0); 62180f86c9fSJeff Roberson return (0); 62280f86c9fSJeff Roberson } 62380f86c9fSJeff Roberson } 62480f86c9fSJeff Roberson /* 62580f86c9fSJeff Roberson * We only set the idled bit when all of the cpus in the group are 62680f86c9fSJeff Roberson * idle. Otherwise we could get into a situation where a KSE bounces 62780f86c9fSJeff Roberson * back and forth between two idle cores on seperate physical CPUs. 62880f86c9fSJeff Roberson */ 62980f86c9fSJeff Roberson ksg->ksg_idlemask |= PCPU_GET(cpumask); 63080f86c9fSJeff Roberson if (ksg->ksg_idlemask != ksg->ksg_cpumask) 63180f86c9fSJeff Roberson return (1); 63280f86c9fSJeff Roberson atomic_set_int(&kseq_idle, ksg->ksg_mask); 63380f86c9fSJeff Roberson return (1); 63422bf7d9aSJeff Roberson } 63522bf7d9aSJeff Roberson 63622bf7d9aSJeff Roberson static void 63722bf7d9aSJeff Roberson kseq_assign(struct kseq *kseq) 63822bf7d9aSJeff Roberson { 63922bf7d9aSJeff Roberson struct kse *nke; 64022bf7d9aSJeff Roberson struct kse *ke; 64122bf7d9aSJeff Roberson 64222bf7d9aSJeff Roberson do { 64300fbcda8SAlexander Kabaev *(volatile struct kse **)&ke = kseq->ksq_assigned; 64422bf7d9aSJeff Roberson } while(!atomic_cmpset_ptr(&kseq->ksq_assigned, ke, NULL)); 64522bf7d9aSJeff Roberson for (; ke != NULL; ke = nke) { 64622bf7d9aSJeff Roberson nke = ke->ke_assign; 64722bf7d9aSJeff Roberson ke->ke_flags &= ~KEF_ASSIGNED; 64863fcce68SJohn Baldwin sched_add_internal(ke->ke_thread, 0); 64922bf7d9aSJeff Roberson } 65022bf7d9aSJeff Roberson } 65122bf7d9aSJeff Roberson 65222bf7d9aSJeff Roberson static void 65322bf7d9aSJeff Roberson kseq_notify(struct kse *ke, int cpu) 65422bf7d9aSJeff Roberson { 65522bf7d9aSJeff Roberson struct kseq *kseq; 65622bf7d9aSJeff Roberson struct thread *td; 65722bf7d9aSJeff Roberson struct pcpu *pcpu; 6582454aaf5SJeff Roberson int prio; 65922bf7d9aSJeff Roberson 66086e1c22aSJeff Roberson ke->ke_cpu = cpu; 66122bf7d9aSJeff Roberson ke->ke_flags |= KEF_ASSIGNED; 6622454aaf5SJeff Roberson prio = ke->ke_thread->td_priority; 66322bf7d9aSJeff Roberson 66422bf7d9aSJeff Roberson kseq = KSEQ_CPU(cpu); 6655d7ef00cSJeff Roberson 6660c0a98b2SJeff Roberson /* 66722bf7d9aSJeff Roberson * Place a KSE on another cpu's queue and force a resched. 66822bf7d9aSJeff Roberson */ 66922bf7d9aSJeff Roberson do { 67000fbcda8SAlexander Kabaev *(volatile struct kse **)&ke->ke_assign = kseq->ksq_assigned; 67122bf7d9aSJeff Roberson } while(!atomic_cmpset_ptr(&kseq->ksq_assigned, ke->ke_assign, ke)); 6722454aaf5SJeff Roberson /* 6732454aaf5SJeff Roberson * Without sched_lock we could lose a race where we set NEEDRESCHED 6742454aaf5SJeff Roberson * on a thread that is switched out before the IPI is delivered. This 6752454aaf5SJeff Roberson * would lead us to miss the resched. This will be a problem once 6762454aaf5SJeff Roberson * sched_lock is pushed down. 6772454aaf5SJeff Roberson */ 67822bf7d9aSJeff Roberson pcpu = pcpu_find(cpu); 67922bf7d9aSJeff Roberson td = pcpu->pc_curthread; 68022bf7d9aSJeff Roberson if (ke->ke_thread->td_priority < td->td_priority || 68122bf7d9aSJeff Roberson td == pcpu->pc_idlethread) { 68222bf7d9aSJeff Roberson td->td_flags |= TDF_NEEDRESCHED; 68322bf7d9aSJeff Roberson ipi_selected(1 << cpu, IPI_AST); 68422bf7d9aSJeff Roberson } 68522bf7d9aSJeff Roberson } 68622bf7d9aSJeff Roberson 68722bf7d9aSJeff Roberson static struct kse * 68822bf7d9aSJeff Roberson runq_steal(struct runq *rq) 68922bf7d9aSJeff Roberson { 69022bf7d9aSJeff Roberson struct rqhead *rqh; 69122bf7d9aSJeff Roberson struct rqbits *rqb; 69222bf7d9aSJeff Roberson struct kse *ke; 69322bf7d9aSJeff Roberson int word; 69422bf7d9aSJeff Roberson int bit; 69522bf7d9aSJeff Roberson 69622bf7d9aSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 69722bf7d9aSJeff Roberson rqb = &rq->rq_status; 69822bf7d9aSJeff Roberson for (word = 0; word < RQB_LEN; word++) { 69922bf7d9aSJeff Roberson if (rqb->rqb_bits[word] == 0) 70022bf7d9aSJeff Roberson continue; 70122bf7d9aSJeff Roberson for (bit = 0; bit < RQB_BPW; bit++) { 702a2640c9bSPeter Wemm if ((rqb->rqb_bits[word] & (1ul << bit)) == 0) 70322bf7d9aSJeff Roberson continue; 70422bf7d9aSJeff Roberson rqh = &rq->rq_queues[bit + (word << RQB_L2BPW)]; 70522bf7d9aSJeff Roberson TAILQ_FOREACH(ke, rqh, ke_procq) { 706ef1134c9SJeff Roberson if (KSE_CAN_MIGRATE(ke, 707ef1134c9SJeff Roberson PRI_BASE(ke->ke_ksegrp->kg_pri_class))) 70822bf7d9aSJeff Roberson return (ke); 70922bf7d9aSJeff Roberson } 71022bf7d9aSJeff Roberson } 71122bf7d9aSJeff Roberson } 71222bf7d9aSJeff Roberson return (NULL); 71322bf7d9aSJeff Roberson } 71422bf7d9aSJeff Roberson 71522bf7d9aSJeff Roberson static struct kse * 71680f86c9fSJeff Roberson kseq_steal(struct kseq *kseq, int stealidle) 71722bf7d9aSJeff Roberson { 71822bf7d9aSJeff Roberson struct kse *ke; 71922bf7d9aSJeff Roberson 72080f86c9fSJeff Roberson /* 72180f86c9fSJeff Roberson * Steal from next first to try to get a non-interactive task that 72280f86c9fSJeff Roberson * may not have run for a while. 72380f86c9fSJeff Roberson */ 72422bf7d9aSJeff Roberson if ((ke = runq_steal(kseq->ksq_next)) != NULL) 72522bf7d9aSJeff Roberson return (ke); 72680f86c9fSJeff Roberson if ((ke = runq_steal(kseq->ksq_curr)) != NULL) 72780f86c9fSJeff Roberson return (ke); 72880f86c9fSJeff Roberson if (stealidle) 72922bf7d9aSJeff Roberson return (runq_steal(&kseq->ksq_idle)); 73080f86c9fSJeff Roberson return (NULL); 73122bf7d9aSJeff Roberson } 73280f86c9fSJeff Roberson 73380f86c9fSJeff Roberson int 73480f86c9fSJeff Roberson kseq_transfer(struct kseq *kseq, struct kse *ke, int class) 73580f86c9fSJeff Roberson { 73680f86c9fSJeff Roberson struct kseq_group *ksg; 73780f86c9fSJeff Roberson int cpu; 73880f86c9fSJeff Roberson 739670c524fSJeff Roberson if (smp_started == 0) 740670c524fSJeff Roberson return (0); 74180f86c9fSJeff Roberson cpu = 0; 74280f86c9fSJeff Roberson /* 7432454aaf5SJeff Roberson * If our load exceeds a certain threshold we should attempt to 7442454aaf5SJeff Roberson * reassign this thread. The first candidate is the cpu that 7452454aaf5SJeff Roberson * originally ran the thread. If it is idle, assign it there, 7462454aaf5SJeff Roberson * otherwise, pick an idle cpu. 7472454aaf5SJeff Roberson * 7482454aaf5SJeff Roberson * The threshold at which we start to reassign kses has a large impact 749670c524fSJeff Roberson * on the overall performance of the system. Tuned too high and 750670c524fSJeff Roberson * some CPUs may idle. Too low and there will be excess migration 751d50c87deSOlivier Houchard * and context switches. 752670c524fSJeff Roberson */ 7532454aaf5SJeff Roberson ksg = kseq->ksq_group; 7542454aaf5SJeff Roberson if (ksg->ksg_load > ksg->ksg_cpus && kseq_idle) { 7552454aaf5SJeff Roberson ksg = KSEQ_CPU(ke->ke_cpu)->ksq_group; 7562454aaf5SJeff Roberson if (kseq_idle & ksg->ksg_mask) { 7572454aaf5SJeff Roberson cpu = ffs(ksg->ksg_idlemask); 7582454aaf5SJeff Roberson if (cpu) 7592454aaf5SJeff Roberson goto migrate; 7602454aaf5SJeff Roberson } 76180f86c9fSJeff Roberson /* 76280f86c9fSJeff Roberson * Multiple cpus could find this bit simultaneously 76380f86c9fSJeff Roberson * but the race shouldn't be terrible. 76480f86c9fSJeff Roberson */ 76580f86c9fSJeff Roberson cpu = ffs(kseq_idle); 76680f86c9fSJeff Roberson if (cpu) 7672454aaf5SJeff Roberson goto migrate; 76880f86c9fSJeff Roberson } 76980f86c9fSJeff Roberson /* 77080f86c9fSJeff Roberson * If another cpu in this group has idled, assign a thread over 77180f86c9fSJeff Roberson * to them after checking to see if there are idled groups. 77280f86c9fSJeff Roberson */ 7732454aaf5SJeff Roberson ksg = kseq->ksq_group; 7742454aaf5SJeff Roberson if (ksg->ksg_idlemask) { 77580f86c9fSJeff Roberson cpu = ffs(ksg->ksg_idlemask); 77680f86c9fSJeff Roberson if (cpu) 7772454aaf5SJeff Roberson goto migrate; 77880f86c9fSJeff Roberson } 77980f86c9fSJeff Roberson /* 7802454aaf5SJeff Roberson * No new CPU was found. 7812454aaf5SJeff Roberson */ 7822454aaf5SJeff Roberson return (0); 7832454aaf5SJeff Roberson migrate: 7842454aaf5SJeff Roberson /* 78580f86c9fSJeff Roberson * Now that we've found an idle CPU, migrate the thread. 78680f86c9fSJeff Roberson */ 78780f86c9fSJeff Roberson cpu--; 78880f86c9fSJeff Roberson ke->ke_runq = NULL; 78980f86c9fSJeff Roberson kseq_notify(ke, cpu); 7902454aaf5SJeff Roberson 79180f86c9fSJeff Roberson return (1); 79280f86c9fSJeff Roberson } 79380f86c9fSJeff Roberson 79422bf7d9aSJeff Roberson #endif /* SMP */ 79522bf7d9aSJeff Roberson 79622bf7d9aSJeff Roberson /* 79722bf7d9aSJeff Roberson * Pick the highest priority task we have and return it. 7980c0a98b2SJeff Roberson */ 7990c0a98b2SJeff Roberson 80022bf7d9aSJeff Roberson static struct kse * 80122bf7d9aSJeff Roberson kseq_choose(struct kseq *kseq) 8025d7ef00cSJeff Roberson { 8035d7ef00cSJeff Roberson struct kse *ke; 8045d7ef00cSJeff Roberson struct runq *swap; 8055d7ef00cSJeff Roberson 806b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 80715dc847eSJeff Roberson swap = NULL; 808a8949de2SJeff Roberson 80915dc847eSJeff Roberson for (;;) { 81015dc847eSJeff Roberson ke = runq_choose(kseq->ksq_curr); 81115dc847eSJeff Roberson if (ke == NULL) { 81215dc847eSJeff Roberson /* 813bf0acc27SJohn Baldwin * We already swapped once and didn't get anywhere. 81415dc847eSJeff Roberson */ 81515dc847eSJeff Roberson if (swap) 81615dc847eSJeff Roberson break; 8175d7ef00cSJeff Roberson swap = kseq->ksq_curr; 8185d7ef00cSJeff Roberson kseq->ksq_curr = kseq->ksq_next; 8195d7ef00cSJeff Roberson kseq->ksq_next = swap; 82015dc847eSJeff Roberson continue; 821a8949de2SJeff Roberson } 82215dc847eSJeff Roberson /* 82315dc847eSJeff Roberson * If we encounter a slice of 0 the kse is in a 82415dc847eSJeff Roberson * TIMESHARE kse group and its nice was too far out 82515dc847eSJeff Roberson * of the range that receives slices. 82615dc847eSJeff Roberson */ 82722bf7d9aSJeff Roberson if (ke->ke_slice == 0) { 82815dc847eSJeff Roberson runq_remove(ke->ke_runq, ke); 82915dc847eSJeff Roberson sched_slice(ke); 83015dc847eSJeff Roberson ke->ke_runq = kseq->ksq_next; 83115dc847eSJeff Roberson runq_add(ke->ke_runq, ke); 83215dc847eSJeff Roberson continue; 83315dc847eSJeff Roberson } 83415dc847eSJeff Roberson return (ke); 83515dc847eSJeff Roberson } 83615dc847eSJeff Roberson 837a8949de2SJeff Roberson return (runq_choose(&kseq->ksq_idle)); 838245f3abfSJeff Roberson } 8390a016a05SJeff Roberson 8400a016a05SJeff Roberson static void 8410a016a05SJeff Roberson kseq_setup(struct kseq *kseq) 8420a016a05SJeff Roberson { 84315dc847eSJeff Roberson runq_init(&kseq->ksq_timeshare[0]); 84415dc847eSJeff Roberson runq_init(&kseq->ksq_timeshare[1]); 845a8949de2SJeff Roberson runq_init(&kseq->ksq_idle); 84615dc847eSJeff Roberson kseq->ksq_curr = &kseq->ksq_timeshare[0]; 84715dc847eSJeff Roberson kseq->ksq_next = &kseq->ksq_timeshare[1]; 8487cd650a9SJeff Roberson kseq->ksq_load = 0; 849ef1134c9SJeff Roberson kseq->ksq_load_timeshare = 0; 8500a016a05SJeff Roberson } 8510a016a05SJeff Roberson 85235e6168fSJeff Roberson static void 85335e6168fSJeff Roberson sched_setup(void *dummy) 85435e6168fSJeff Roberson { 8550ec896fdSJeff Roberson #ifdef SMP 856cac77d04SJeff Roberson int balance_groups; 85735e6168fSJeff Roberson int i; 8580ec896fdSJeff Roberson #endif 85935e6168fSJeff Roberson 860e493a5d9SJeff Roberson slice_min = (hz/100); /* 10ms */ 861e493a5d9SJeff Roberson slice_max = (hz/7); /* ~140ms */ 862e1f89c22SJeff Roberson 863356500a3SJeff Roberson #ifdef SMP 864cac77d04SJeff Roberson balance_groups = 0; 86580f86c9fSJeff Roberson /* 86680f86c9fSJeff Roberson * Initialize the kseqs. 86780f86c9fSJeff Roberson */ 868749d01b0SJeff Roberson for (i = 0; i < MAXCPU; i++) { 86980f86c9fSJeff Roberson struct kseq *ksq; 87080f86c9fSJeff Roberson 87180f86c9fSJeff Roberson ksq = &kseq_cpu[i]; 87280f86c9fSJeff Roberson ksq->ksq_assigned = NULL; 873749d01b0SJeff Roberson kseq_setup(&kseq_cpu[i]); 87480f86c9fSJeff Roberson } 87580f86c9fSJeff Roberson if (smp_topology == NULL) { 87680f86c9fSJeff Roberson struct kseq_group *ksg; 87780f86c9fSJeff Roberson struct kseq *ksq; 87880f86c9fSJeff Roberson 87980f86c9fSJeff Roberson for (i = 0; i < MAXCPU; i++) { 88080f86c9fSJeff Roberson ksq = &kseq_cpu[i]; 88180f86c9fSJeff Roberson ksg = &kseq_groups[i]; 88280f86c9fSJeff Roberson /* 883dc03363dSJeff Roberson * Setup a kseq group with one member. 88480f86c9fSJeff Roberson */ 88580f86c9fSJeff Roberson ksq->ksq_transferable = 0; 88680f86c9fSJeff Roberson ksq->ksq_group = ksg; 88780f86c9fSJeff Roberson ksg->ksg_cpus = 1; 88880f86c9fSJeff Roberson ksg->ksg_idlemask = 0; 88980f86c9fSJeff Roberson ksg->ksg_cpumask = ksg->ksg_mask = 1 << i; 890cac77d04SJeff Roberson ksg->ksg_load = 0; 89180f86c9fSJeff Roberson ksg->ksg_transferable = 0; 89280f86c9fSJeff Roberson LIST_INIT(&ksg->ksg_members); 89380f86c9fSJeff Roberson LIST_INSERT_HEAD(&ksg->ksg_members, ksq, ksq_siblings); 894749d01b0SJeff Roberson } 895749d01b0SJeff Roberson } else { 89680f86c9fSJeff Roberson struct kseq_group *ksg; 89780f86c9fSJeff Roberson struct cpu_group *cg; 898749d01b0SJeff Roberson int j; 899749d01b0SJeff Roberson 900749d01b0SJeff Roberson for (i = 0; i < smp_topology->ct_count; i++) { 901749d01b0SJeff Roberson cg = &smp_topology->ct_group[i]; 90280f86c9fSJeff Roberson ksg = &kseq_groups[i]; 90380f86c9fSJeff Roberson /* 90480f86c9fSJeff Roberson * Initialize the group. 90580f86c9fSJeff Roberson */ 90680f86c9fSJeff Roberson ksg->ksg_idlemask = 0; 907cac77d04SJeff Roberson ksg->ksg_load = 0; 90880f86c9fSJeff Roberson ksg->ksg_transferable = 0; 90980f86c9fSJeff Roberson ksg->ksg_cpus = cg->cg_count; 91080f86c9fSJeff Roberson ksg->ksg_cpumask = cg->cg_mask; 91180f86c9fSJeff Roberson LIST_INIT(&ksg->ksg_members); 91280f86c9fSJeff Roberson /* 91380f86c9fSJeff Roberson * Find all of the group members and add them. 91480f86c9fSJeff Roberson */ 91580f86c9fSJeff Roberson for (j = 0; j < MAXCPU; j++) { 91680f86c9fSJeff Roberson if ((cg->cg_mask & (1 << j)) != 0) { 91780f86c9fSJeff Roberson if (ksg->ksg_mask == 0) 91880f86c9fSJeff Roberson ksg->ksg_mask = 1 << j; 91980f86c9fSJeff Roberson kseq_cpu[j].ksq_transferable = 0; 92080f86c9fSJeff Roberson kseq_cpu[j].ksq_group = ksg; 92180f86c9fSJeff Roberson LIST_INSERT_HEAD(&ksg->ksg_members, 92280f86c9fSJeff Roberson &kseq_cpu[j], ksq_siblings); 92380f86c9fSJeff Roberson } 92480f86c9fSJeff Roberson } 925cac77d04SJeff Roberson if (ksg->ksg_cpus > 1) 926cac77d04SJeff Roberson balance_groups = 1; 927749d01b0SJeff Roberson } 928cac77d04SJeff Roberson ksg_maxid = smp_topology->ct_count - 1; 929749d01b0SJeff Roberson } 930cac77d04SJeff Roberson /* 931cac77d04SJeff Roberson * Stagger the group and global load balancer so they do not 932cac77d04SJeff Roberson * interfere with each other. 933cac77d04SJeff Roberson */ 934dc03363dSJeff Roberson bal_tick = ticks + hz; 935cac77d04SJeff Roberson if (balance_groups) 936dc03363dSJeff Roberson gbal_tick = ticks + (hz / 2); 937749d01b0SJeff Roberson #else 938749d01b0SJeff Roberson kseq_setup(KSEQ_SELF()); 939356500a3SJeff Roberson #endif 940749d01b0SJeff Roberson mtx_lock_spin(&sched_lock); 941155b9987SJeff Roberson kseq_load_add(KSEQ_SELF(), &kse0); 942749d01b0SJeff Roberson mtx_unlock_spin(&sched_lock); 94335e6168fSJeff Roberson } 94435e6168fSJeff Roberson 94535e6168fSJeff Roberson /* 94635e6168fSJeff Roberson * Scale the scheduling priority according to the "interactivity" of this 94735e6168fSJeff Roberson * process. 94835e6168fSJeff Roberson */ 94915dc847eSJeff Roberson static void 95035e6168fSJeff Roberson sched_priority(struct ksegrp *kg) 95135e6168fSJeff Roberson { 95235e6168fSJeff Roberson int pri; 95335e6168fSJeff Roberson 95435e6168fSJeff Roberson if (kg->kg_pri_class != PRI_TIMESHARE) 95515dc847eSJeff Roberson return; 95635e6168fSJeff Roberson 95715dc847eSJeff Roberson pri = SCHED_PRI_INTERACT(sched_interact_score(kg)); 958e1f89c22SJeff Roberson pri += SCHED_PRI_BASE; 959fa885116SJulian Elischer pri += kg->kg_proc->p_nice; 96035e6168fSJeff Roberson 96135e6168fSJeff Roberson if (pri > PRI_MAX_TIMESHARE) 96235e6168fSJeff Roberson pri = PRI_MAX_TIMESHARE; 96335e6168fSJeff Roberson else if (pri < PRI_MIN_TIMESHARE) 96435e6168fSJeff Roberson pri = PRI_MIN_TIMESHARE; 96535e6168fSJeff Roberson 96635e6168fSJeff Roberson kg->kg_user_pri = pri; 96735e6168fSJeff Roberson 96815dc847eSJeff Roberson return; 96935e6168fSJeff Roberson } 97035e6168fSJeff Roberson 97135e6168fSJeff Roberson /* 972245f3abfSJeff Roberson * Calculate a time slice based on the properties of the kseg and the runq 973a8949de2SJeff Roberson * that we're on. This is only for PRI_TIMESHARE ksegrps. 97435e6168fSJeff Roberson */ 975245f3abfSJeff Roberson static void 976245f3abfSJeff Roberson sched_slice(struct kse *ke) 97735e6168fSJeff Roberson { 97815dc847eSJeff Roberson struct kseq *kseq; 979245f3abfSJeff Roberson struct ksegrp *kg; 98035e6168fSJeff Roberson 981245f3abfSJeff Roberson kg = ke->ke_ksegrp; 98215dc847eSJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 98335e6168fSJeff Roberson 984245f3abfSJeff Roberson /* 985245f3abfSJeff Roberson * Rationale: 9862454aaf5SJeff Roberson * KSEs in interactive ksegs get a minimal slice so that we 987245f3abfSJeff Roberson * quickly notice if it abuses its advantage. 988245f3abfSJeff Roberson * 989245f3abfSJeff Roberson * KSEs in non-interactive ksegs are assigned a slice that is 990245f3abfSJeff Roberson * based on the ksegs nice value relative to the least nice kseg 991245f3abfSJeff Roberson * on the run queue for this cpu. 992245f3abfSJeff Roberson * 993245f3abfSJeff Roberson * If the KSE is less nice than all others it gets the maximum 994245f3abfSJeff Roberson * slice and other KSEs will adjust their slice relative to 995245f3abfSJeff Roberson * this when they first expire. 996245f3abfSJeff Roberson * 997245f3abfSJeff Roberson * There is 20 point window that starts relative to the least 998245f3abfSJeff Roberson * nice kse on the run queue. Slice size is determined by 999245f3abfSJeff Roberson * the kse distance from the last nice ksegrp. 1000245f3abfSJeff Roberson * 10017d1a81b4SJeff Roberson * If the kse is outside of the window it will get no slice 10027d1a81b4SJeff Roberson * and will be reevaluated each time it is selected on the 10037d1a81b4SJeff Roberson * run queue. The exception to this is nice 0 ksegs when 10047d1a81b4SJeff Roberson * a nice -20 is running. They are always granted a minimum 10057d1a81b4SJeff Roberson * slice. 1006245f3abfSJeff Roberson */ 100715dc847eSJeff Roberson if (!SCHED_INTERACTIVE(kg)) { 1008245f3abfSJeff Roberson int nice; 1009245f3abfSJeff Roberson 1010fa885116SJulian Elischer nice = kg->kg_proc->p_nice + (0 - kseq->ksq_nicemin); 1011ef1134c9SJeff Roberson if (kseq->ksq_load_timeshare == 0 || 1012fa885116SJulian Elischer kg->kg_proc->p_nice < kseq->ksq_nicemin) 1013245f3abfSJeff Roberson ke->ke_slice = SCHED_SLICE_MAX; 10147d1a81b4SJeff Roberson else if (nice <= SCHED_SLICE_NTHRESH) 1015245f3abfSJeff Roberson ke->ke_slice = SCHED_SLICE_NICE(nice); 1016fa885116SJulian Elischer else if (kg->kg_proc->p_nice == 0) 10177d1a81b4SJeff Roberson ke->ke_slice = SCHED_SLICE_MIN; 1018245f3abfSJeff Roberson else 1019245f3abfSJeff Roberson ke->ke_slice = 0; 1020245f3abfSJeff Roberson } else 10219b5f6f62SJeff Roberson ke->ke_slice = SCHED_SLICE_INTERACTIVE; 102235e6168fSJeff Roberson 102315dc847eSJeff Roberson CTR6(KTR_ULE, 102415dc847eSJeff Roberson "Sliced %p(%d) (nice: %d, nicemin: %d, load: %d, interactive: %d)", 1025fa885116SJulian Elischer ke, ke->ke_slice, kg->kg_proc->p_nice, kseq->ksq_nicemin, 1026ef1134c9SJeff Roberson kseq->ksq_load_timeshare, SCHED_INTERACTIVE(kg)); 102715dc847eSJeff Roberson 1028245f3abfSJeff Roberson return; 102935e6168fSJeff Roberson } 103035e6168fSJeff Roberson 1031d322132cSJeff Roberson /* 1032d322132cSJeff Roberson * This routine enforces a maximum limit on the amount of scheduling history 1033d322132cSJeff Roberson * kept. It is called after either the slptime or runtime is adjusted. 1034d322132cSJeff Roberson * This routine will not operate correctly when slp or run times have been 1035d322132cSJeff Roberson * adjusted to more than double their maximum. 1036d322132cSJeff Roberson */ 10374b60e324SJeff Roberson static void 10384b60e324SJeff Roberson sched_interact_update(struct ksegrp *kg) 10394b60e324SJeff Roberson { 1040d322132cSJeff Roberson int sum; 10413f741ca1SJeff Roberson 1042d322132cSJeff Roberson sum = kg->kg_runtime + kg->kg_slptime; 1043d322132cSJeff Roberson if (sum < SCHED_SLP_RUN_MAX) 1044d322132cSJeff Roberson return; 1045d322132cSJeff Roberson /* 1046d322132cSJeff Roberson * If we have exceeded by more than 1/5th then the algorithm below 1047d322132cSJeff Roberson * will not bring us back into range. Dividing by two here forces 10482454aaf5SJeff Roberson * us into the range of [4/5 * SCHED_INTERACT_MAX, SCHED_INTERACT_MAX] 1049d322132cSJeff Roberson */ 105037a35e4aSJeff Roberson if (sum > (SCHED_SLP_RUN_MAX / 5) * 6) { 1051d322132cSJeff Roberson kg->kg_runtime /= 2; 1052d322132cSJeff Roberson kg->kg_slptime /= 2; 1053d322132cSJeff Roberson return; 1054d322132cSJeff Roberson } 1055d322132cSJeff Roberson kg->kg_runtime = (kg->kg_runtime / 5) * 4; 1056d322132cSJeff Roberson kg->kg_slptime = (kg->kg_slptime / 5) * 4; 1057d322132cSJeff Roberson } 1058d322132cSJeff Roberson 1059d322132cSJeff Roberson static void 1060d322132cSJeff Roberson sched_interact_fork(struct ksegrp *kg) 1061d322132cSJeff Roberson { 1062d322132cSJeff Roberson int ratio; 1063d322132cSJeff Roberson int sum; 1064d322132cSJeff Roberson 1065d322132cSJeff Roberson sum = kg->kg_runtime + kg->kg_slptime; 1066d322132cSJeff Roberson if (sum > SCHED_SLP_RUN_FORK) { 1067d322132cSJeff Roberson ratio = sum / SCHED_SLP_RUN_FORK; 1068d322132cSJeff Roberson kg->kg_runtime /= ratio; 1069d322132cSJeff Roberson kg->kg_slptime /= ratio; 10704b60e324SJeff Roberson } 10714b60e324SJeff Roberson } 10724b60e324SJeff Roberson 1073e1f89c22SJeff Roberson static int 1074e1f89c22SJeff Roberson sched_interact_score(struct ksegrp *kg) 1075e1f89c22SJeff Roberson { 1076210491d3SJeff Roberson int div; 1077e1f89c22SJeff Roberson 1078e1f89c22SJeff Roberson if (kg->kg_runtime > kg->kg_slptime) { 1079210491d3SJeff Roberson div = max(1, kg->kg_runtime / SCHED_INTERACT_HALF); 1080210491d3SJeff Roberson return (SCHED_INTERACT_HALF + 1081210491d3SJeff Roberson (SCHED_INTERACT_HALF - (kg->kg_slptime / div))); 1082210491d3SJeff Roberson } if (kg->kg_slptime > kg->kg_runtime) { 1083210491d3SJeff Roberson div = max(1, kg->kg_slptime / SCHED_INTERACT_HALF); 1084210491d3SJeff Roberson return (kg->kg_runtime / div); 1085e1f89c22SJeff Roberson } 1086e1f89c22SJeff Roberson 1087210491d3SJeff Roberson /* 1088210491d3SJeff Roberson * This can happen if slptime and runtime are 0. 1089210491d3SJeff Roberson */ 1090210491d3SJeff Roberson return (0); 1091e1f89c22SJeff Roberson 1092e1f89c22SJeff Roberson } 1093e1f89c22SJeff Roberson 109415dc847eSJeff Roberson /* 109515dc847eSJeff Roberson * This is only somewhat accurate since given many processes of the same 109615dc847eSJeff Roberson * priority they will switch when their slices run out, which will be 109715dc847eSJeff Roberson * at most SCHED_SLICE_MAX. 109815dc847eSJeff Roberson */ 109935e6168fSJeff Roberson int 110035e6168fSJeff Roberson sched_rr_interval(void) 110135e6168fSJeff Roberson { 110235e6168fSJeff Roberson return (SCHED_SLICE_MAX); 110335e6168fSJeff Roberson } 110435e6168fSJeff Roberson 110522bf7d9aSJeff Roberson static void 110635e6168fSJeff Roberson sched_pctcpu_update(struct kse *ke) 110735e6168fSJeff Roberson { 110835e6168fSJeff Roberson /* 110935e6168fSJeff Roberson * Adjust counters and watermark for pctcpu calc. 1110210491d3SJeff Roberson */ 111181de51bfSJeff Roberson if (ke->ke_ltick > ticks - SCHED_CPU_TICKS) { 1112210491d3SJeff Roberson /* 111381de51bfSJeff Roberson * Shift the tick count out so that the divide doesn't 111481de51bfSJeff Roberson * round away our results. 111565c8760dSJeff Roberson */ 111665c8760dSJeff Roberson ke->ke_ticks <<= 10; 111781de51bfSJeff Roberson ke->ke_ticks = (ke->ke_ticks / (ticks - ke->ke_ftick)) * 111835e6168fSJeff Roberson SCHED_CPU_TICKS; 111965c8760dSJeff Roberson ke->ke_ticks >>= 10; 112081de51bfSJeff Roberson } else 112181de51bfSJeff Roberson ke->ke_ticks = 0; 112235e6168fSJeff Roberson ke->ke_ltick = ticks; 112335e6168fSJeff Roberson ke->ke_ftick = ke->ke_ltick - SCHED_CPU_TICKS; 112435e6168fSJeff Roberson } 112535e6168fSJeff Roberson 112635e6168fSJeff Roberson void 112735e6168fSJeff Roberson sched_prio(struct thread *td, u_char prio) 112835e6168fSJeff Roberson { 11293f741ca1SJeff Roberson struct kse *ke; 113035e6168fSJeff Roberson 11313f741ca1SJeff Roberson ke = td->td_kse; 113235e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 113335e6168fSJeff Roberson if (TD_ON_RUNQ(td)) { 11343f741ca1SJeff Roberson /* 11353f741ca1SJeff Roberson * If the priority has been elevated due to priority 11363f741ca1SJeff Roberson * propagation, we may have to move ourselves to a new 11373f741ca1SJeff Roberson * queue. We still call adjustrunqueue below in case kse 11383f741ca1SJeff Roberson * needs to fix things up. 11393f741ca1SJeff Roberson */ 1140769a3635SJeff Roberson if (prio < td->td_priority && ke && 1141769a3635SJeff Roberson (ke->ke_flags & KEF_ASSIGNED) == 0 && 114222bf7d9aSJeff Roberson ke->ke_runq != KSEQ_CPU(ke->ke_cpu)->ksq_curr) { 11433f741ca1SJeff Roberson runq_remove(ke->ke_runq, ke); 11443f741ca1SJeff Roberson ke->ke_runq = KSEQ_CPU(ke->ke_cpu)->ksq_curr; 11453f741ca1SJeff Roberson runq_add(ke->ke_runq, ke); 114635e6168fSJeff Roberson } 1147f2b74cbfSJeff Roberson /* 1148f2b74cbfSJeff Roberson * Hold this kse on this cpu so that sched_prio() doesn't 1149f2b74cbfSJeff Roberson * cause excessive migration. We only want migration to 1150f2b74cbfSJeff Roberson * happen as the result of a wakeup. 1151f2b74cbfSJeff Roberson */ 1152f2b74cbfSJeff Roberson ke->ke_flags |= KEF_HOLD; 11533f741ca1SJeff Roberson adjustrunqueue(td, prio); 11543f741ca1SJeff Roberson } else 11553f741ca1SJeff Roberson td->td_priority = prio; 115635e6168fSJeff Roberson } 115735e6168fSJeff Roberson 115835e6168fSJeff Roberson void 1159bf0acc27SJohn Baldwin sched_switch(struct thread *td, struct thread *newtd) 116035e6168fSJeff Roberson { 116135e6168fSJeff Roberson struct kse *ke; 116235e6168fSJeff Roberson 116335e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 116435e6168fSJeff Roberson 116535e6168fSJeff Roberson ke = td->td_kse; 116635e6168fSJeff Roberson 116735e6168fSJeff Roberson td->td_last_kse = ke; 1168060563ecSJulian Elischer td->td_lastcpu = td->td_oncpu; 1169060563ecSJulian Elischer td->td_oncpu = NOCPU; 117052eb8464SJohn Baldwin td->td_flags &= ~TDF_NEEDRESCHED; 117152eb8464SJohn Baldwin td->td_pflags &= ~TDP_OWEPREEMPT; 117235e6168fSJeff Roberson 1173b11fdad0SJeff Roberson /* 1174b11fdad0SJeff Roberson * If the KSE has been assigned it may be in the process of switching 1175b11fdad0SJeff Roberson * to the new cpu. This is the case in sched_bind(). 1176b11fdad0SJeff Roberson */ 1177b11fdad0SJeff Roberson if ((ke->ke_flags & KEF_ASSIGNED) == 0) { 11782454aaf5SJeff Roberson if (td == PCPU_GET(idlethread)) { 1179bf0acc27SJohn Baldwin TD_SET_CAN_RUN(td); 11802454aaf5SJeff Roberson } else if (TD_IS_RUNNING(td)) { 1181155b9987SJeff Roberson kseq_load_rem(KSEQ_CPU(ke->ke_cpu), ke); 1182f2b74cbfSJeff Roberson /* 1183f2b74cbfSJeff Roberson * Don't allow the kse to migrate from a preemption. 1184f2b74cbfSJeff Roberson */ 1185f2b74cbfSJeff Roberson ke->ke_flags |= KEF_HOLD; 1186ab2baa72SDavid Xu setrunqueue(td); 11870e0f6266SJeff Roberson } else { 118833916c36SJeff Roberson if (ke->ke_runq) { 1189155b9987SJeff Roberson kseq_load_rem(KSEQ_CPU(ke->ke_cpu), ke); 119033916c36SJeff Roberson } else if ((td->td_flags & TDF_IDLETD) == 0) 11912c3490b1SMarcel Moolenaar kdb_backtrace(); 119235e6168fSJeff Roberson /* 119335e6168fSJeff Roberson * We will not be on the run queue. So we must be 119435e6168fSJeff Roberson * sleeping or similar. 119535e6168fSJeff Roberson */ 11960e2a4d3aSDavid Xu if (td->td_proc->p_flag & P_SA) 119735e6168fSJeff Roberson kse_reassign(ke); 11980e0f6266SJeff Roberson } 1199b11fdad0SJeff Roberson } 12002454aaf5SJeff Roberson if (newtd != NULL) { 1201bf0acc27SJohn Baldwin kseq_load_add(KSEQ_SELF(), newtd->td_kse); 12022454aaf5SJeff Roberson ke->ke_cpu = PCPU_GET(cpuid); 12032454aaf5SJeff Roberson ke->ke_runq = KSEQ_SELF()->ksq_curr; 12042454aaf5SJeff Roberson } else 12052454aaf5SJeff Roberson newtd = choosethread(); 1206ae53b483SJeff Roberson if (td != newtd) 1207ae53b483SJeff Roberson cpu_switch(td, newtd); 1208ae53b483SJeff Roberson sched_lock.mtx_lock = (uintptr_t)td; 120935e6168fSJeff Roberson 1210060563ecSJulian Elischer td->td_oncpu = PCPU_GET(cpuid); 121135e6168fSJeff Roberson } 121235e6168fSJeff Roberson 121335e6168fSJeff Roberson void 1214fa885116SJulian Elischer sched_nice(struct proc *p, int nice) 121535e6168fSJeff Roberson { 1216fa885116SJulian Elischer struct ksegrp *kg; 121715dc847eSJeff Roberson struct kse *ke; 121835e6168fSJeff Roberson struct thread *td; 121915dc847eSJeff Roberson struct kseq *kseq; 122035e6168fSJeff Roberson 1221fa885116SJulian Elischer PROC_LOCK_ASSERT(p, MA_OWNED); 12220b5318c8SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 122315dc847eSJeff Roberson /* 122415dc847eSJeff Roberson * We need to adjust the nice counts for running KSEs. 122515dc847eSJeff Roberson */ 1226fa885116SJulian Elischer FOREACH_KSEGRP_IN_PROC(p, kg) { 1227fa885116SJulian Elischer if (kg->kg_pri_class == PRI_TIMESHARE) { 122815dc847eSJeff Roberson FOREACH_KSE_IN_GROUP(kg, ke) { 1229d07ac847SJeff Roberson if (ke->ke_runq == NULL) 123015dc847eSJeff Roberson continue; 123115dc847eSJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 1232fa885116SJulian Elischer kseq_nice_rem(kseq, p->p_nice); 123315dc847eSJeff Roberson kseq_nice_add(kseq, nice); 123415dc847eSJeff Roberson } 1235fa885116SJulian Elischer } 1236fa885116SJulian Elischer } 1237fa885116SJulian Elischer p->p_nice = nice; 1238fa885116SJulian Elischer FOREACH_KSEGRP_IN_PROC(p, kg) { 123935e6168fSJeff Roberson sched_priority(kg); 124015dc847eSJeff Roberson FOREACH_THREAD_IN_GROUP(kg, td) 12414a338afdSJulian Elischer td->td_flags |= TDF_NEEDRESCHED; 124235e6168fSJeff Roberson } 1243fa885116SJulian Elischer } 124435e6168fSJeff Roberson 124535e6168fSJeff Roberson void 124644f3b092SJohn Baldwin sched_sleep(struct thread *td) 124735e6168fSJeff Roberson { 124835e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 124935e6168fSJeff Roberson 125035e6168fSJeff Roberson td->td_slptime = ticks; 125144f3b092SJohn Baldwin td->td_base_pri = td->td_priority; 125235e6168fSJeff Roberson 125315dc847eSJeff Roberson CTR2(KTR_ULE, "sleep kse %p (tick: %d)", 125415dc847eSJeff Roberson td->td_kse, td->td_slptime); 125535e6168fSJeff Roberson } 125635e6168fSJeff Roberson 125735e6168fSJeff Roberson void 125835e6168fSJeff Roberson sched_wakeup(struct thread *td) 125935e6168fSJeff Roberson { 126035e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 126135e6168fSJeff Roberson 126235e6168fSJeff Roberson /* 126335e6168fSJeff Roberson * Let the kseg know how long we slept for. This is because process 126435e6168fSJeff Roberson * interactivity behavior is modeled in the kseg. 126535e6168fSJeff Roberson */ 126635e6168fSJeff Roberson if (td->td_slptime) { 1267f1e8dc4aSJeff Roberson struct ksegrp *kg; 126815dc847eSJeff Roberson int hzticks; 1269f1e8dc4aSJeff Roberson 1270f1e8dc4aSJeff Roberson kg = td->td_ksegrp; 1271d322132cSJeff Roberson hzticks = (ticks - td->td_slptime) << 10; 1272d322132cSJeff Roberson if (hzticks >= SCHED_SLP_RUN_MAX) { 1273d322132cSJeff Roberson kg->kg_slptime = SCHED_SLP_RUN_MAX; 1274d322132cSJeff Roberson kg->kg_runtime = 1; 1275d322132cSJeff Roberson } else { 1276d322132cSJeff Roberson kg->kg_slptime += hzticks; 12774b60e324SJeff Roberson sched_interact_update(kg); 1278d322132cSJeff Roberson } 1279f1e8dc4aSJeff Roberson sched_priority(kg); 12804b60e324SJeff Roberson if (td->td_kse) 12814b60e324SJeff Roberson sched_slice(td->td_kse); 128215dc847eSJeff Roberson CTR2(KTR_ULE, "wakeup kse %p (%d ticks)", 128315dc847eSJeff Roberson td->td_kse, hzticks); 128435e6168fSJeff Roberson td->td_slptime = 0; 1285f1e8dc4aSJeff Roberson } 128635e6168fSJeff Roberson setrunqueue(td); 128735e6168fSJeff Roberson } 128835e6168fSJeff Roberson 128935e6168fSJeff Roberson /* 129035e6168fSJeff Roberson * Penalize the parent for creating a new child and initialize the child's 129135e6168fSJeff Roberson * priority. 129235e6168fSJeff Roberson */ 129335e6168fSJeff Roberson void 129455d44f79SJulian Elischer sched_fork(struct thread *td, struct proc *p1) 129535e6168fSJeff Roberson { 129635e6168fSJeff Roberson 129735e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 129835e6168fSJeff Roberson 129955d44f79SJulian Elischer p1->p_nice = td->td_proc->p_nice; 130055d44f79SJulian Elischer sched_fork_ksegrp(td, FIRST_KSEGRP_IN_PROC(p1)); 130155d44f79SJulian Elischer sched_fork_kse(td, FIRST_KSE_IN_PROC(p1)); 130255d44f79SJulian Elischer sched_fork_thread(td, FIRST_THREAD_IN_PROC(p1)); 130315dc847eSJeff Roberson } 130415dc847eSJeff Roberson 130515dc847eSJeff Roberson void 130655d44f79SJulian Elischer sched_fork_kse(struct thread *td, struct kse *child) 130715dc847eSJeff Roberson { 130855d44f79SJulian Elischer struct kse *ke = td->td_kse; 130955d44f79SJulian Elischer 1310210491d3SJeff Roberson child->ke_slice = 1; /* Attempt to quickly learn interactivity. */ 1311093c05e3SJeff Roberson child->ke_cpu = ke->ke_cpu; 131215dc847eSJeff Roberson child->ke_runq = NULL; 131315dc847eSJeff Roberson 1314736c97c7SJeff Roberson /* Grab our parents cpu estimation information. */ 1315736c97c7SJeff Roberson child->ke_ticks = ke->ke_ticks; 1316736c97c7SJeff Roberson child->ke_ltick = ke->ke_ltick; 1317736c97c7SJeff Roberson child->ke_ftick = ke->ke_ftick; 131815dc847eSJeff Roberson } 131915dc847eSJeff Roberson 132015dc847eSJeff Roberson void 132155d44f79SJulian Elischer sched_fork_ksegrp(struct thread *td, struct ksegrp *child) 132215dc847eSJeff Roberson { 132355d44f79SJulian Elischer struct ksegrp *kg = td->td_ksegrp; 13242056d0a1SJohn Baldwin PROC_LOCK_ASSERT(child->kg_proc, MA_OWNED); 1325210491d3SJeff Roberson 1326d322132cSJeff Roberson child->kg_slptime = kg->kg_slptime; 1327d322132cSJeff Roberson child->kg_runtime = kg->kg_runtime; 1328d322132cSJeff Roberson child->kg_user_pri = kg->kg_user_pri; 1329d322132cSJeff Roberson sched_interact_fork(child); 13304b60e324SJeff Roberson kg->kg_runtime += tickincr << 10; 13314b60e324SJeff Roberson sched_interact_update(kg); 133215dc847eSJeff Roberson 1333d322132cSJeff Roberson CTR6(KTR_ULE, "sched_fork_ksegrp: %d(%d, %d) - %d(%d, %d)", 1334d322132cSJeff Roberson kg->kg_proc->p_pid, kg->kg_slptime, kg->kg_runtime, 1335d322132cSJeff Roberson child->kg_proc->p_pid, child->kg_slptime, child->kg_runtime); 1336c9f25d8fSJeff Roberson } 1337c9f25d8fSJeff Roberson 133815dc847eSJeff Roberson void 133915dc847eSJeff Roberson sched_fork_thread(struct thread *td, struct thread *child) 134015dc847eSJeff Roberson { 134115dc847eSJeff Roberson } 134215dc847eSJeff Roberson 134315dc847eSJeff Roberson void 134415dc847eSJeff Roberson sched_class(struct ksegrp *kg, int class) 134515dc847eSJeff Roberson { 134615dc847eSJeff Roberson struct kseq *kseq; 134715dc847eSJeff Roberson struct kse *ke; 1348ef1134c9SJeff Roberson int nclass; 1349ef1134c9SJeff Roberson int oclass; 135015dc847eSJeff Roberson 13512056d0a1SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 135215dc847eSJeff Roberson if (kg->kg_pri_class == class) 135315dc847eSJeff Roberson return; 135415dc847eSJeff Roberson 1355ef1134c9SJeff Roberson nclass = PRI_BASE(class); 1356ef1134c9SJeff Roberson oclass = PRI_BASE(kg->kg_pri_class); 135715dc847eSJeff Roberson FOREACH_KSE_IN_GROUP(kg, ke) { 135815dc847eSJeff Roberson if (ke->ke_state != KES_ONRUNQ && 135915dc847eSJeff Roberson ke->ke_state != KES_THREAD) 136015dc847eSJeff Roberson continue; 136115dc847eSJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 136215dc847eSJeff Roberson 1363ef1134c9SJeff Roberson #ifdef SMP 1364155b9987SJeff Roberson /* 1365155b9987SJeff Roberson * On SMP if we're on the RUNQ we must adjust the transferable 1366155b9987SJeff Roberson * count because could be changing to or from an interrupt 1367155b9987SJeff Roberson * class. 1368155b9987SJeff Roberson */ 1369155b9987SJeff Roberson if (ke->ke_state == KES_ONRUNQ) { 137080f86c9fSJeff Roberson if (KSE_CAN_MIGRATE(ke, oclass)) { 137180f86c9fSJeff Roberson kseq->ksq_transferable--; 137280f86c9fSJeff Roberson kseq->ksq_group->ksg_transferable--; 137380f86c9fSJeff Roberson } 137480f86c9fSJeff Roberson if (KSE_CAN_MIGRATE(ke, nclass)) { 137580f86c9fSJeff Roberson kseq->ksq_transferable++; 137680f86c9fSJeff Roberson kseq->ksq_group->ksg_transferable++; 137780f86c9fSJeff Roberson } 1378155b9987SJeff Roberson } 1379ef1134c9SJeff Roberson #endif 1380155b9987SJeff Roberson if (oclass == PRI_TIMESHARE) { 1381ef1134c9SJeff Roberson kseq->ksq_load_timeshare--; 1382fa885116SJulian Elischer kseq_nice_rem(kseq, kg->kg_proc->p_nice); 1383155b9987SJeff Roberson } 1384155b9987SJeff Roberson if (nclass == PRI_TIMESHARE) { 1385155b9987SJeff Roberson kseq->ksq_load_timeshare++; 1386fa885116SJulian Elischer kseq_nice_add(kseq, kg->kg_proc->p_nice); 138715dc847eSJeff Roberson } 1388155b9987SJeff Roberson } 138915dc847eSJeff Roberson 139015dc847eSJeff Roberson kg->kg_pri_class = class; 139135e6168fSJeff Roberson } 139235e6168fSJeff Roberson 139335e6168fSJeff Roberson /* 139435e6168fSJeff Roberson * Return some of the child's priority and interactivity to the parent. 139535e6168fSJeff Roberson */ 139635e6168fSJeff Roberson void 139755d44f79SJulian Elischer sched_exit(struct proc *p, struct thread *td) 139835e6168fSJeff Roberson { 139935e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 140055d44f79SJulian Elischer sched_exit_kse(FIRST_KSE_IN_PROC(p), td); 140155d44f79SJulian Elischer sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), td); 1402141ad61cSJeff Roberson } 1403141ad61cSJeff Roberson 1404141ad61cSJeff Roberson void 140555d44f79SJulian Elischer sched_exit_kse(struct kse *ke, struct thread *td) 1406141ad61cSJeff Roberson { 140755d44f79SJulian Elischer kseq_load_rem(KSEQ_CPU(td->td_kse->ke_cpu), td->td_kse); 1408141ad61cSJeff Roberson } 1409141ad61cSJeff Roberson 1410141ad61cSJeff Roberson void 141155d44f79SJulian Elischer sched_exit_ksegrp(struct ksegrp *kg, struct thread *td) 1412141ad61cSJeff Roberson { 141355d44f79SJulian Elischer /* kg->kg_slptime += td->td_ksegrp->kg_slptime; */ 141455d44f79SJulian Elischer kg->kg_runtime += td->td_ksegrp->kg_runtime; 14154b60e324SJeff Roberson sched_interact_update(kg); 1416141ad61cSJeff Roberson } 1417141ad61cSJeff Roberson 1418141ad61cSJeff Roberson void 1419141ad61cSJeff Roberson sched_exit_thread(struct thread *td, struct thread *child) 1420141ad61cSJeff Roberson { 142135e6168fSJeff Roberson } 142235e6168fSJeff Roberson 142335e6168fSJeff Roberson void 14247cf90fb3SJeff Roberson sched_clock(struct thread *td) 142535e6168fSJeff Roberson { 142635e6168fSJeff Roberson struct kseq *kseq; 14270a016a05SJeff Roberson struct ksegrp *kg; 14287cf90fb3SJeff Roberson struct kse *ke; 142935e6168fSJeff Roberson 1430dc03363dSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 14312454aaf5SJeff Roberson kseq = KSEQ_SELF(); 1432dc03363dSJeff Roberson #ifdef SMP 1433dc03363dSJeff Roberson if (ticks == bal_tick) 1434dc03363dSJeff Roberson sched_balance(); 1435dc03363dSJeff Roberson if (ticks == gbal_tick) 1436dc03363dSJeff Roberson sched_balance_groups(); 14372454aaf5SJeff Roberson /* 14382454aaf5SJeff Roberson * We could have been assigned a non real-time thread without an 14392454aaf5SJeff Roberson * IPI. 14402454aaf5SJeff Roberson */ 14412454aaf5SJeff Roberson if (kseq->ksq_assigned) 14422454aaf5SJeff Roberson kseq_assign(kseq); /* Potentially sets NEEDRESCHED */ 1443dc03363dSJeff Roberson #endif 144415dc847eSJeff Roberson /* 144515dc847eSJeff Roberson * sched_setup() apparently happens prior to stathz being set. We 144615dc847eSJeff Roberson * need to resolve the timers earlier in the boot so we can avoid 144715dc847eSJeff Roberson * calculating this here. 144815dc847eSJeff Roberson */ 144915dc847eSJeff Roberson if (realstathz == 0) { 145015dc847eSJeff Roberson realstathz = stathz ? stathz : hz; 145115dc847eSJeff Roberson tickincr = hz / realstathz; 145215dc847eSJeff Roberson /* 145315dc847eSJeff Roberson * XXX This does not work for values of stathz that are much 145415dc847eSJeff Roberson * larger than hz. 145515dc847eSJeff Roberson */ 145615dc847eSJeff Roberson if (tickincr == 0) 145715dc847eSJeff Roberson tickincr = 1; 145815dc847eSJeff Roberson } 145935e6168fSJeff Roberson 14607cf90fb3SJeff Roberson ke = td->td_kse; 146115dc847eSJeff Roberson kg = ke->ke_ksegrp; 146235e6168fSJeff Roberson 14630a016a05SJeff Roberson /* Adjust ticks for pctcpu */ 146465c8760dSJeff Roberson ke->ke_ticks++; 1465d465fb95SJeff Roberson ke->ke_ltick = ticks; 1466a8949de2SJeff Roberson 1467d465fb95SJeff Roberson /* Go up to one second beyond our max and then trim back down */ 1468d465fb95SJeff Roberson if (ke->ke_ftick + SCHED_CPU_TICKS + hz < ke->ke_ltick) 1469d465fb95SJeff Roberson sched_pctcpu_update(ke); 1470d465fb95SJeff Roberson 147143fdafb1SJulian Elischer if (td->td_flags & TDF_IDLETD) 147235e6168fSJeff Roberson return; 14730a016a05SJeff Roberson 147415dc847eSJeff Roberson CTR4(KTR_ULE, "Tick kse %p (slice: %d, slptime: %d, runtime: %d)", 147515dc847eSJeff Roberson ke, ke->ke_slice, kg->kg_slptime >> 10, kg->kg_runtime >> 10); 14763f741ca1SJeff Roberson /* 1477a8949de2SJeff Roberson * We only do slicing code for TIMESHARE ksegrps. 1478a8949de2SJeff Roberson */ 1479a8949de2SJeff Roberson if (kg->kg_pri_class != PRI_TIMESHARE) 1480a8949de2SJeff Roberson return; 1481a8949de2SJeff Roberson /* 148215dc847eSJeff Roberson * We used a tick charge it to the ksegrp so that we can compute our 148315dc847eSJeff Roberson * interactivity. 148415dc847eSJeff Roberson */ 148515dc847eSJeff Roberson kg->kg_runtime += tickincr << 10; 14864b60e324SJeff Roberson sched_interact_update(kg); 1487407b0157SJeff Roberson 148835e6168fSJeff Roberson /* 148935e6168fSJeff Roberson * We used up one time slice. 149035e6168fSJeff Roberson */ 1491093c05e3SJeff Roberson if (--ke->ke_slice > 0) 149215dc847eSJeff Roberson return; 149335e6168fSJeff Roberson /* 149415dc847eSJeff Roberson * We're out of time, recompute priorities and requeue. 149535e6168fSJeff Roberson */ 1496155b9987SJeff Roberson kseq_load_rem(kseq, ke); 1497e1f89c22SJeff Roberson sched_priority(kg); 149815dc847eSJeff Roberson sched_slice(ke); 149915dc847eSJeff Roberson if (SCHED_CURR(kg, ke)) 150015dc847eSJeff Roberson ke->ke_runq = kseq->ksq_curr; 150115dc847eSJeff Roberson else 150215dc847eSJeff Roberson ke->ke_runq = kseq->ksq_next; 1503155b9987SJeff Roberson kseq_load_add(kseq, ke); 15044a338afdSJulian Elischer td->td_flags |= TDF_NEEDRESCHED; 150535e6168fSJeff Roberson } 150635e6168fSJeff Roberson 150735e6168fSJeff Roberson int 150835e6168fSJeff Roberson sched_runnable(void) 150935e6168fSJeff Roberson { 151035e6168fSJeff Roberson struct kseq *kseq; 1511b90816f1SJeff Roberson int load; 151235e6168fSJeff Roberson 1513b90816f1SJeff Roberson load = 1; 1514b90816f1SJeff Roberson 15150a016a05SJeff Roberson kseq = KSEQ_SELF(); 151622bf7d9aSJeff Roberson #ifdef SMP 151746f8b265SJeff Roberson if (kseq->ksq_assigned) { 151846f8b265SJeff Roberson mtx_lock_spin(&sched_lock); 151922bf7d9aSJeff Roberson kseq_assign(kseq); 152046f8b265SJeff Roberson mtx_unlock_spin(&sched_lock); 152146f8b265SJeff Roberson } 152222bf7d9aSJeff Roberson #endif 15233f741ca1SJeff Roberson if ((curthread->td_flags & TDF_IDLETD) != 0) { 15243f741ca1SJeff Roberson if (kseq->ksq_load > 0) 15253f741ca1SJeff Roberson goto out; 15263f741ca1SJeff Roberson } else 15273f741ca1SJeff Roberson if (kseq->ksq_load - 1 > 0) 1528b90816f1SJeff Roberson goto out; 1529b90816f1SJeff Roberson load = 0; 1530b90816f1SJeff Roberson out: 1531b90816f1SJeff Roberson return (load); 153235e6168fSJeff Roberson } 153335e6168fSJeff Roberson 153435e6168fSJeff Roberson void 153535e6168fSJeff Roberson sched_userret(struct thread *td) 153635e6168fSJeff Roberson { 153735e6168fSJeff Roberson struct ksegrp *kg; 153835e6168fSJeff Roberson 153935e6168fSJeff Roberson kg = td->td_ksegrp; 154035e6168fSJeff Roberson 154135e6168fSJeff Roberson if (td->td_priority != kg->kg_user_pri) { 154235e6168fSJeff Roberson mtx_lock_spin(&sched_lock); 154335e6168fSJeff Roberson td->td_priority = kg->kg_user_pri; 154435e6168fSJeff Roberson mtx_unlock_spin(&sched_lock); 154535e6168fSJeff Roberson } 154635e6168fSJeff Roberson } 154735e6168fSJeff Roberson 1548c9f25d8fSJeff Roberson struct kse * 1549c9f25d8fSJeff Roberson sched_choose(void) 1550c9f25d8fSJeff Roberson { 15510a016a05SJeff Roberson struct kseq *kseq; 1552c9f25d8fSJeff Roberson struct kse *ke; 155315dc847eSJeff Roberson 1554b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 155522bf7d9aSJeff Roberson kseq = KSEQ_SELF(); 155615dc847eSJeff Roberson #ifdef SMP 155780f86c9fSJeff Roberson restart: 155822bf7d9aSJeff Roberson if (kseq->ksq_assigned) 155922bf7d9aSJeff Roberson kseq_assign(kseq); 156015dc847eSJeff Roberson #endif 156122bf7d9aSJeff Roberson ke = kseq_choose(kseq); 156235e6168fSJeff Roberson if (ke) { 156322bf7d9aSJeff Roberson #ifdef SMP 156422bf7d9aSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_IDLE) 156580f86c9fSJeff Roberson if (kseq_idled(kseq) == 0) 156680f86c9fSJeff Roberson goto restart; 156722bf7d9aSJeff Roberson #endif 1568155b9987SJeff Roberson kseq_runq_rem(kseq, ke); 156935e6168fSJeff Roberson ke->ke_state = KES_THREAD; 1570245f3abfSJeff Roberson 157115dc847eSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) { 157215dc847eSJeff Roberson CTR4(KTR_ULE, "Run kse %p from %p (slice: %d, pri: %d)", 157315dc847eSJeff Roberson ke, ke->ke_runq, ke->ke_slice, 157415dc847eSJeff Roberson ke->ke_thread->td_priority); 1575245f3abfSJeff Roberson } 157615dc847eSJeff Roberson return (ke); 157735e6168fSJeff Roberson } 1578c9f25d8fSJeff Roberson #ifdef SMP 157980f86c9fSJeff Roberson if (kseq_idled(kseq) == 0) 158080f86c9fSJeff Roberson goto restart; 1581c9f25d8fSJeff Roberson #endif 158215dc847eSJeff Roberson return (NULL); 158335e6168fSJeff Roberson } 158435e6168fSJeff Roberson 158535e6168fSJeff Roberson void 15867cf90fb3SJeff Roberson sched_add(struct thread *td) 158735e6168fSJeff Roberson { 158863fcce68SJohn Baldwin 158963fcce68SJohn Baldwin sched_add_internal(td, 1); 159063fcce68SJohn Baldwin } 159163fcce68SJohn Baldwin 159263fcce68SJohn Baldwin static void 159363fcce68SJohn Baldwin sched_add_internal(struct thread *td, int preemptive) 159463fcce68SJohn Baldwin { 1595c9f25d8fSJeff Roberson struct kseq *kseq; 159615dc847eSJeff Roberson struct ksegrp *kg; 15977cf90fb3SJeff Roberson struct kse *ke; 15982454aaf5SJeff Roberson #ifdef SMP 15992454aaf5SJeff Roberson int canmigrate; 16002454aaf5SJeff Roberson #endif 160122bf7d9aSJeff Roberson int class; 1602c9f25d8fSJeff Roberson 160322bf7d9aSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 16047cf90fb3SJeff Roberson ke = td->td_kse; 16057cf90fb3SJeff Roberson kg = td->td_ksegrp; 160622bf7d9aSJeff Roberson if (ke->ke_flags & KEF_ASSIGNED) 160722bf7d9aSJeff Roberson return; 160822bf7d9aSJeff Roberson kseq = KSEQ_SELF(); 1609c494ddc8SJeff Roberson KASSERT((ke->ke_thread != NULL), 1610c494ddc8SJeff Roberson ("sched_add: No thread on KSE")); 16115d7ef00cSJeff Roberson KASSERT((ke->ke_thread->td_kse != NULL), 16125d7ef00cSJeff Roberson ("sched_add: No KSE on thread")); 16135d7ef00cSJeff Roberson KASSERT(ke->ke_state != KES_ONRUNQ, 16145d7ef00cSJeff Roberson ("sched_add: kse %p (%s) already in run queue", ke, 16155d7ef00cSJeff Roberson ke->ke_proc->p_comm)); 16165d7ef00cSJeff Roberson KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 16175d7ef00cSJeff Roberson ("sched_add: process swapped out")); 16189bca28a7SJeff Roberson KASSERT(ke->ke_runq == NULL, 16199bca28a7SJeff Roberson ("sched_add: KSE %p is still assigned to a run queue", ke)); 16205d7ef00cSJeff Roberson 162122bf7d9aSJeff Roberson class = PRI_BASE(kg->kg_pri_class); 162222bf7d9aSJeff Roberson switch (class) { 1623a8949de2SJeff Roberson case PRI_ITHD: 1624a8949de2SJeff Roberson case PRI_REALTIME: 162515dc847eSJeff Roberson ke->ke_runq = kseq->ksq_curr; 162615dc847eSJeff Roberson ke->ke_slice = SCHED_SLICE_MAX; 16277cd650a9SJeff Roberson ke->ke_cpu = PCPU_GET(cpuid); 1628a8949de2SJeff Roberson break; 1629a8949de2SJeff Roberson case PRI_TIMESHARE: 163015dc847eSJeff Roberson if (SCHED_CURR(kg, ke)) 163115dc847eSJeff Roberson ke->ke_runq = kseq->ksq_curr; 163215dc847eSJeff Roberson else 163315dc847eSJeff Roberson ke->ke_runq = kseq->ksq_next; 163415dc847eSJeff Roberson break; 163515dc847eSJeff Roberson case PRI_IDLE: 163615dc847eSJeff Roberson /* 163715dc847eSJeff Roberson * This is for priority prop. 163815dc847eSJeff Roberson */ 16393f741ca1SJeff Roberson if (ke->ke_thread->td_priority < PRI_MIN_IDLE) 164015dc847eSJeff Roberson ke->ke_runq = kseq->ksq_curr; 164115dc847eSJeff Roberson else 164215dc847eSJeff Roberson ke->ke_runq = &kseq->ksq_idle; 164315dc847eSJeff Roberson ke->ke_slice = SCHED_SLICE_MIN; 164415dc847eSJeff Roberson break; 164515dc847eSJeff Roberson default: 1646d322132cSJeff Roberson panic("Unknown pri class."); 1647a8949de2SJeff Roberson break; 1648a6ed4186SJeff Roberson } 164922bf7d9aSJeff Roberson #ifdef SMP 16502454aaf5SJeff Roberson /* 16512454aaf5SJeff Roberson * Don't migrate running threads here. Force the long term balancer 16522454aaf5SJeff Roberson * to do it. 16532454aaf5SJeff Roberson */ 16542454aaf5SJeff Roberson canmigrate = KSE_CAN_MIGRATE(ke, class); 1655f2b74cbfSJeff Roberson if (ke->ke_flags & KEF_HOLD) { 1656f2b74cbfSJeff Roberson ke->ke_flags &= ~KEF_HOLD; 16572454aaf5SJeff Roberson canmigrate = 0; 1658f2b74cbfSJeff Roberson } 16592454aaf5SJeff Roberson /* 16602454aaf5SJeff Roberson * If this thread is pinned or bound, notify the target cpu. 16612454aaf5SJeff Roberson */ 16622454aaf5SJeff Roberson if (!canmigrate && ke->ke_cpu != PCPU_GET(cpuid) ) { 166386e1c22aSJeff Roberson ke->ke_runq = NULL; 166480f86c9fSJeff Roberson kseq_notify(ke, ke->ke_cpu); 166580f86c9fSJeff Roberson return; 166680f86c9fSJeff Roberson } 166722bf7d9aSJeff Roberson /* 1668670c524fSJeff Roberson * If we had been idle, clear our bit in the group and potentially 1669670c524fSJeff Roberson * the global bitmap. If not, see if we should transfer this thread. 167022bf7d9aSJeff Roberson */ 167180f86c9fSJeff Roberson if ((class == PRI_TIMESHARE || class == PRI_REALTIME) && 167280f86c9fSJeff Roberson (kseq->ksq_group->ksg_idlemask & PCPU_GET(cpumask)) != 0) { 167380f86c9fSJeff Roberson /* 167480f86c9fSJeff Roberson * Check to see if our group is unidling, and if so, remove it 167580f86c9fSJeff Roberson * from the global idle mask. 167680f86c9fSJeff Roberson */ 167780f86c9fSJeff Roberson if (kseq->ksq_group->ksg_idlemask == 167880f86c9fSJeff Roberson kseq->ksq_group->ksg_cpumask) 167980f86c9fSJeff Roberson atomic_clear_int(&kseq_idle, kseq->ksq_group->ksg_mask); 168080f86c9fSJeff Roberson /* 168180f86c9fSJeff Roberson * Now remove ourselves from the group specific idle mask. 168280f86c9fSJeff Roberson */ 168380f86c9fSJeff Roberson kseq->ksq_group->ksg_idlemask &= ~PCPU_GET(cpumask); 16842454aaf5SJeff Roberson } else if (kseq->ksq_load > 1 && canmigrate) 1685670c524fSJeff Roberson if (kseq_transfer(kseq, ke, class)) 1686670c524fSJeff Roberson return; 16872454aaf5SJeff Roberson ke->ke_cpu = PCPU_GET(cpuid); 168822bf7d9aSJeff Roberson #endif 16892454aaf5SJeff Roberson /* 16902454aaf5SJeff Roberson * XXX With preemption this is not necessary. 16912454aaf5SJeff Roberson */ 1692f2b74cbfSJeff Roberson if (td->td_priority < curthread->td_priority && 1693f2b74cbfSJeff Roberson ke->ke_runq == kseq->ksq_curr) 169422bf7d9aSJeff Roberson curthread->td_flags |= TDF_NEEDRESCHED; 169563fcce68SJohn Baldwin if (preemptive && maybe_preempt(td)) 16960c0b25aeSJohn Baldwin return; 169735e6168fSJeff Roberson ke->ke_ksegrp->kg_runq_kses++; 169835e6168fSJeff Roberson ke->ke_state = KES_ONRUNQ; 169935e6168fSJeff Roberson 1700155b9987SJeff Roberson kseq_runq_add(kseq, ke); 1701155b9987SJeff Roberson kseq_load_add(kseq, ke); 170235e6168fSJeff Roberson } 170335e6168fSJeff Roberson 170435e6168fSJeff Roberson void 17057cf90fb3SJeff Roberson sched_rem(struct thread *td) 170635e6168fSJeff Roberson { 170715dc847eSJeff Roberson struct kseq *kseq; 17087cf90fb3SJeff Roberson struct kse *ke; 17097cf90fb3SJeff Roberson 17107cf90fb3SJeff Roberson ke = td->td_kse; 171122bf7d9aSJeff Roberson /* 171222bf7d9aSJeff Roberson * It is safe to just return here because sched_rem() is only ever 171322bf7d9aSJeff Roberson * used in places where we're immediately going to add the 171422bf7d9aSJeff Roberson * kse back on again. In that case it'll be added with the correct 171522bf7d9aSJeff Roberson * thread and priority when the caller drops the sched_lock. 171622bf7d9aSJeff Roberson */ 171722bf7d9aSJeff Roberson if (ke->ke_flags & KEF_ASSIGNED) 171822bf7d9aSJeff Roberson return; 171935e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1720c494ddc8SJeff Roberson KASSERT((ke->ke_state == KES_ONRUNQ), 1721c494ddc8SJeff Roberson ("sched_rem: KSE not on run queue")); 172235e6168fSJeff Roberson 172335e6168fSJeff Roberson ke->ke_state = KES_THREAD; 172435e6168fSJeff Roberson ke->ke_ksegrp->kg_runq_kses--; 172515dc847eSJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 1726155b9987SJeff Roberson kseq_runq_rem(kseq, ke); 1727155b9987SJeff Roberson kseq_load_rem(kseq, ke); 172835e6168fSJeff Roberson } 172935e6168fSJeff Roberson 173035e6168fSJeff Roberson fixpt_t 17317cf90fb3SJeff Roberson sched_pctcpu(struct thread *td) 173235e6168fSJeff Roberson { 173335e6168fSJeff Roberson fixpt_t pctcpu; 17347cf90fb3SJeff Roberson struct kse *ke; 173535e6168fSJeff Roberson 173635e6168fSJeff Roberson pctcpu = 0; 17377cf90fb3SJeff Roberson ke = td->td_kse; 1738484288deSJeff Roberson if (ke == NULL) 1739484288deSJeff Roberson return (0); 174035e6168fSJeff Roberson 1741b90816f1SJeff Roberson mtx_lock_spin(&sched_lock); 174235e6168fSJeff Roberson if (ke->ke_ticks) { 174335e6168fSJeff Roberson int rtick; 174435e6168fSJeff Roberson 1745210491d3SJeff Roberson /* 1746210491d3SJeff Roberson * Don't update more frequently than twice a second. Allowing 1747210491d3SJeff Roberson * this causes the cpu usage to decay away too quickly due to 1748210491d3SJeff Roberson * rounding errors. 1749210491d3SJeff Roberson */ 17502e227f04SJeff Roberson if (ke->ke_ftick + SCHED_CPU_TICKS < ke->ke_ltick || 17512e227f04SJeff Roberson ke->ke_ltick < (ticks - (hz / 2))) 175235e6168fSJeff Roberson sched_pctcpu_update(ke); 175335e6168fSJeff Roberson /* How many rtick per second ? */ 1754210491d3SJeff Roberson rtick = min(ke->ke_ticks / SCHED_CPU_TIME, SCHED_CPU_TICKS); 17557121cce5SScott Long pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT; 175635e6168fSJeff Roberson } 175735e6168fSJeff Roberson 175835e6168fSJeff Roberson ke->ke_proc->p_swtime = ke->ke_ltick - ke->ke_ftick; 1759828e7683SJohn Baldwin mtx_unlock_spin(&sched_lock); 176035e6168fSJeff Roberson 176135e6168fSJeff Roberson return (pctcpu); 176235e6168fSJeff Roberson } 176335e6168fSJeff Roberson 17649bacd788SJeff Roberson void 17659bacd788SJeff Roberson sched_bind(struct thread *td, int cpu) 17669bacd788SJeff Roberson { 17679bacd788SJeff Roberson struct kse *ke; 17689bacd788SJeff Roberson 17699bacd788SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 17709bacd788SJeff Roberson ke = td->td_kse; 17719bacd788SJeff Roberson ke->ke_flags |= KEF_BOUND; 177280f86c9fSJeff Roberson #ifdef SMP 177380f86c9fSJeff Roberson if (PCPU_GET(cpuid) == cpu) 17749bacd788SJeff Roberson return; 17759bacd788SJeff Roberson /* sched_rem without the runq_remove */ 17769bacd788SJeff Roberson ke->ke_state = KES_THREAD; 17779bacd788SJeff Roberson ke->ke_ksegrp->kg_runq_kses--; 1778155b9987SJeff Roberson kseq_load_rem(KSEQ_CPU(ke->ke_cpu), ke); 17799bacd788SJeff Roberson kseq_notify(ke, cpu); 17809bacd788SJeff Roberson /* When we return from mi_switch we'll be on the correct cpu. */ 1781279f949eSPoul-Henning Kamp mi_switch(SW_VOL, NULL); 17829bacd788SJeff Roberson #endif 17839bacd788SJeff Roberson } 17849bacd788SJeff Roberson 17859bacd788SJeff Roberson void 17869bacd788SJeff Roberson sched_unbind(struct thread *td) 17879bacd788SJeff Roberson { 17889bacd788SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 17899bacd788SJeff Roberson td->td_kse->ke_flags &= ~KEF_BOUND; 17909bacd788SJeff Roberson } 17919bacd788SJeff Roberson 179235e6168fSJeff Roberson int 179333916c36SJeff Roberson sched_load(void) 179433916c36SJeff Roberson { 179533916c36SJeff Roberson #ifdef SMP 179633916c36SJeff Roberson int total; 179733916c36SJeff Roberson int i; 179833916c36SJeff Roberson 179933916c36SJeff Roberson total = 0; 180033916c36SJeff Roberson for (i = 0; i <= ksg_maxid; i++) 180133916c36SJeff Roberson total += KSEQ_GROUP(i)->ksg_load; 180233916c36SJeff Roberson return (total); 180333916c36SJeff Roberson #else 180433916c36SJeff Roberson return (KSEQ_SELF()->ksq_sysload); 180533916c36SJeff Roberson #endif 180633916c36SJeff Roberson } 180733916c36SJeff Roberson 180833916c36SJeff Roberson int 180935e6168fSJeff Roberson sched_sizeof_kse(void) 181035e6168fSJeff Roberson { 181135e6168fSJeff Roberson return (sizeof(struct kse) + sizeof(struct ke_sched)); 181235e6168fSJeff Roberson } 181335e6168fSJeff Roberson 181435e6168fSJeff Roberson int 181535e6168fSJeff Roberson sched_sizeof_ksegrp(void) 181635e6168fSJeff Roberson { 181735e6168fSJeff Roberson return (sizeof(struct ksegrp) + sizeof(struct kg_sched)); 181835e6168fSJeff Roberson } 181935e6168fSJeff Roberson 182035e6168fSJeff Roberson int 182135e6168fSJeff Roberson sched_sizeof_proc(void) 182235e6168fSJeff Roberson { 182335e6168fSJeff Roberson return (sizeof(struct proc)); 182435e6168fSJeff Roberson } 182535e6168fSJeff Roberson 182635e6168fSJeff Roberson int 182735e6168fSJeff Roberson sched_sizeof_thread(void) 182835e6168fSJeff Roberson { 182935e6168fSJeff Roberson return (sizeof(struct thread) + sizeof(struct td_sched)); 183035e6168fSJeff Roberson } 1831