135e6168fSJeff Roberson /*- 215dc847eSJeff Roberson * Copyright (c) 2002-2003, Jeffrey Roberson <jeff@freebsd.org> 335e6168fSJeff Roberson * All rights reserved. 435e6168fSJeff Roberson * 535e6168fSJeff Roberson * Redistribution and use in source and binary forms, with or without 635e6168fSJeff Roberson * modification, are permitted provided that the following conditions 735e6168fSJeff Roberson * are met: 835e6168fSJeff Roberson * 1. Redistributions of source code must retain the above copyright 935e6168fSJeff Roberson * notice unmodified, this list of conditions, and the following 1035e6168fSJeff Roberson * disclaimer. 1135e6168fSJeff Roberson * 2. Redistributions in binary form must reproduce the above copyright 1235e6168fSJeff Roberson * notice, this list of conditions and the following disclaimer in the 1335e6168fSJeff Roberson * documentation and/or other materials provided with the distribution. 1435e6168fSJeff Roberson * 1535e6168fSJeff Roberson * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 1635e6168fSJeff Roberson * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1735e6168fSJeff Roberson * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 1835e6168fSJeff Roberson * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 1935e6168fSJeff Roberson * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2035e6168fSJeff Roberson * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2135e6168fSJeff Roberson * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2235e6168fSJeff Roberson * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2335e6168fSJeff Roberson * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2435e6168fSJeff Roberson * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2535e6168fSJeff Roberson */ 2635e6168fSJeff Roberson 27677b542eSDavid E. O'Brien #include <sys/cdefs.h> 28677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 29677b542eSDavid E. O'Brien 3035e6168fSJeff Roberson #include <sys/param.h> 3135e6168fSJeff Roberson #include <sys/systm.h> 322c3490b1SMarcel Moolenaar #include <sys/kdb.h> 3335e6168fSJeff Roberson #include <sys/kernel.h> 3435e6168fSJeff Roberson #include <sys/ktr.h> 3535e6168fSJeff Roberson #include <sys/lock.h> 3635e6168fSJeff Roberson #include <sys/mutex.h> 3735e6168fSJeff Roberson #include <sys/proc.h> 38245f3abfSJeff Roberson #include <sys/resource.h> 399bacd788SJeff Roberson #include <sys/resourcevar.h> 4035e6168fSJeff Roberson #include <sys/sched.h> 4135e6168fSJeff Roberson #include <sys/smp.h> 4235e6168fSJeff Roberson #include <sys/sx.h> 4335e6168fSJeff Roberson #include <sys/sysctl.h> 4435e6168fSJeff Roberson #include <sys/sysproto.h> 4535e6168fSJeff Roberson #include <sys/vmmeter.h> 4635e6168fSJeff Roberson #ifdef KTRACE 4735e6168fSJeff Roberson #include <sys/uio.h> 4835e6168fSJeff Roberson #include <sys/ktrace.h> 4935e6168fSJeff Roberson #endif 5035e6168fSJeff Roberson 5135e6168fSJeff Roberson #include <machine/cpu.h> 5222bf7d9aSJeff Roberson #include <machine/smp.h> 5335e6168fSJeff Roberson 5415dc847eSJeff Roberson #define KTR_ULE KTR_NFS 5515dc847eSJeff Roberson 5635e6168fSJeff Roberson /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 5735e6168fSJeff Roberson /* XXX This is bogus compatability crap for ps */ 5835e6168fSJeff Roberson static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 5935e6168fSJeff Roberson SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 6035e6168fSJeff Roberson 6135e6168fSJeff Roberson static void sched_setup(void *dummy); 6235e6168fSJeff Roberson SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 6335e6168fSJeff Roberson 6415dc847eSJeff Roberson static SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0, "SCHED"); 65e1f89c22SJeff Roberson 66dc095794SScott Long #define ULE_NAME "ule" 67dc095794SScott Long #define ULE_NAME_LEN 3 68dc095794SScott Long SYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, ULE_NAME, ULE_NAME_LEN, 69dc095794SScott Long "System is using the ULE scheduler"); 70dc095794SScott Long 7115dc847eSJeff Roberson static int slice_min = 1; 7215dc847eSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, slice_min, CTLFLAG_RW, &slice_min, 0, ""); 7315dc847eSJeff Roberson 74210491d3SJeff Roberson static int slice_max = 10; 7515dc847eSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, slice_max, CTLFLAG_RW, &slice_max, 0, ""); 7615dc847eSJeff Roberson 7715dc847eSJeff Roberson int realstathz; 7815dc847eSJeff Roberson int tickincr = 1; 79783caefbSJeff Roberson 8035e6168fSJeff Roberson /* 8135e6168fSJeff Roberson * These datastructures are allocated within their parent datastructure but 8235e6168fSJeff Roberson * are scheduler specific. 8335e6168fSJeff Roberson */ 8435e6168fSJeff Roberson 8535e6168fSJeff Roberson struct ke_sched { 8635e6168fSJeff Roberson int ske_slice; 8735e6168fSJeff Roberson struct runq *ske_runq; 8835e6168fSJeff Roberson /* The following variables are only used for pctcpu calculation */ 8935e6168fSJeff Roberson int ske_ltick; /* Last tick that we were running on */ 9035e6168fSJeff Roberson int ske_ftick; /* First tick that we were running on */ 9135e6168fSJeff Roberson int ske_ticks; /* Tick count */ 9215dc847eSJeff Roberson /* CPU that we have affinity for. */ 93cd6e33dfSJeff Roberson u_char ske_cpu; 9435e6168fSJeff Roberson }; 9535e6168fSJeff Roberson #define ke_slice ke_sched->ske_slice 9635e6168fSJeff Roberson #define ke_runq ke_sched->ske_runq 9735e6168fSJeff Roberson #define ke_ltick ke_sched->ske_ltick 9835e6168fSJeff Roberson #define ke_ftick ke_sched->ske_ftick 9935e6168fSJeff Roberson #define ke_ticks ke_sched->ske_ticks 100cd6e33dfSJeff Roberson #define ke_cpu ke_sched->ske_cpu 10122bf7d9aSJeff Roberson #define ke_assign ke_procq.tqe_next 10222bf7d9aSJeff Roberson 10322bf7d9aSJeff Roberson #define KEF_ASSIGNED KEF_SCHED0 /* KSE is being migrated. */ 104a70d729bSJeff Roberson #define KEF_BOUND KEF_SCHED1 /* KSE can not migrate. */ 10535e6168fSJeff Roberson 10635e6168fSJeff Roberson struct kg_sched { 107407b0157SJeff Roberson int skg_slptime; /* Number of ticks we vol. slept */ 108407b0157SJeff Roberson int skg_runtime; /* Number of ticks we were running */ 10935e6168fSJeff Roberson }; 11035e6168fSJeff Roberson #define kg_slptime kg_sched->skg_slptime 111407b0157SJeff Roberson #define kg_runtime kg_sched->skg_runtime 11235e6168fSJeff Roberson 11335e6168fSJeff Roberson struct td_sched { 11435e6168fSJeff Roberson int std_slptime; 11535e6168fSJeff Roberson }; 11635e6168fSJeff Roberson #define td_slptime td_sched->std_slptime 11735e6168fSJeff Roberson 1185d7ef00cSJeff Roberson struct td_sched td_sched; 11935e6168fSJeff Roberson struct ke_sched ke_sched; 12035e6168fSJeff Roberson struct kg_sched kg_sched; 12135e6168fSJeff Roberson 12235e6168fSJeff Roberson struct ke_sched *kse0_sched = &ke_sched; 12335e6168fSJeff Roberson struct kg_sched *ksegrp0_sched = &kg_sched; 12435e6168fSJeff Roberson struct p_sched *proc0_sched = NULL; 12535e6168fSJeff Roberson struct td_sched *thread0_sched = &td_sched; 12635e6168fSJeff Roberson 12735e6168fSJeff Roberson /* 128665cb285SJeff Roberson * The priority is primarily determined by the interactivity score. Thus, we 129665cb285SJeff Roberson * give lower(better) priorities to kse groups that use less CPU. The nice 130665cb285SJeff Roberson * value is then directly added to this to allow nice to have some effect 131665cb285SJeff Roberson * on latency. 132e1f89c22SJeff Roberson * 133e1f89c22SJeff Roberson * PRI_RANGE: Total priority range for timeshare threads. 134665cb285SJeff Roberson * PRI_NRESV: Number of nice values. 135e1f89c22SJeff Roberson * PRI_BASE: The start of the dynamic range. 13635e6168fSJeff Roberson */ 137407b0157SJeff Roberson #define SCHED_PRI_RANGE (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1) 138a0a931ceSJeff Roberson #define SCHED_PRI_NRESV ((PRIO_MAX - PRIO_MIN) + 1) 139a0a931ceSJeff Roberson #define SCHED_PRI_NHALF (SCHED_PRI_NRESV / 2) 140665cb285SJeff Roberson #define SCHED_PRI_BASE (PRI_MIN_TIMESHARE) 14115dc847eSJeff Roberson #define SCHED_PRI_INTERACT(score) \ 142665cb285SJeff Roberson ((score) * SCHED_PRI_RANGE / SCHED_INTERACT_MAX) 14335e6168fSJeff Roberson 14435e6168fSJeff Roberson /* 145e1f89c22SJeff Roberson * These determine the interactivity of a process. 14635e6168fSJeff Roberson * 147407b0157SJeff Roberson * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate 148407b0157SJeff Roberson * before throttling back. 149d322132cSJeff Roberson * SLP_RUN_FORK: Maximum slp+run time to inherit at fork time. 150210491d3SJeff Roberson * INTERACT_MAX: Maximum interactivity value. Smaller is better. 151e1f89c22SJeff Roberson * INTERACT_THRESH: Threshhold for placement on the current runq. 15235e6168fSJeff Roberson */ 1534c9612c6SJeff Roberson #define SCHED_SLP_RUN_MAX ((hz * 5) << 10) 154d322132cSJeff Roberson #define SCHED_SLP_RUN_FORK ((hz / 2) << 10) 155210491d3SJeff Roberson #define SCHED_INTERACT_MAX (100) 156210491d3SJeff Roberson #define SCHED_INTERACT_HALF (SCHED_INTERACT_MAX / 2) 1574c9612c6SJeff Roberson #define SCHED_INTERACT_THRESH (30) 158e1f89c22SJeff Roberson 15935e6168fSJeff Roberson /* 16035e6168fSJeff Roberson * These parameters and macros determine the size of the time slice that is 16135e6168fSJeff Roberson * granted to each thread. 16235e6168fSJeff Roberson * 16335e6168fSJeff Roberson * SLICE_MIN: Minimum time slice granted, in units of ticks. 16435e6168fSJeff Roberson * SLICE_MAX: Maximum time slice granted. 16535e6168fSJeff Roberson * SLICE_RANGE: Range of available time slices scaled by hz. 166245f3abfSJeff Roberson * SLICE_SCALE: The number slices granted per val in the range of [0, max]. 167245f3abfSJeff Roberson * SLICE_NICE: Determine the amount of slice granted to a scaled nice. 1687d1a81b4SJeff Roberson * SLICE_NTHRESH: The nice cutoff point for slice assignment. 16935e6168fSJeff Roberson */ 17015dc847eSJeff Roberson #define SCHED_SLICE_MIN (slice_min) 17115dc847eSJeff Roberson #define SCHED_SLICE_MAX (slice_max) 1720392e39dSJeff Roberson #define SCHED_SLICE_INTERACTIVE (slice_max) 1737d1a81b4SJeff Roberson #define SCHED_SLICE_NTHRESH (SCHED_PRI_NHALF - 1) 17435e6168fSJeff Roberson #define SCHED_SLICE_RANGE (SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1) 17535e6168fSJeff Roberson #define SCHED_SLICE_SCALE(val, max) (((val) * SCHED_SLICE_RANGE) / (max)) 176245f3abfSJeff Roberson #define SCHED_SLICE_NICE(nice) \ 1777d1a81b4SJeff Roberson (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((nice), SCHED_SLICE_NTHRESH)) 17835e6168fSJeff Roberson 17935e6168fSJeff Roberson /* 18035e6168fSJeff Roberson * This macro determines whether or not the kse belongs on the current or 18135e6168fSJeff Roberson * next run queue. 18235e6168fSJeff Roberson */ 18315dc847eSJeff Roberson #define SCHED_INTERACTIVE(kg) \ 18415dc847eSJeff Roberson (sched_interact_score(kg) < SCHED_INTERACT_THRESH) 185a5f099d0SJeff Roberson #define SCHED_CURR(kg, ke) \ 186b003da79SDavid E. O'Brien (ke->ke_thread->td_priority < kg->kg_user_pri || \ 18708fd6713SJeff Roberson SCHED_INTERACTIVE(kg)) 18835e6168fSJeff Roberson 18935e6168fSJeff Roberson /* 19035e6168fSJeff Roberson * Cpu percentage computation macros and defines. 19135e6168fSJeff Roberson * 19235e6168fSJeff Roberson * SCHED_CPU_TIME: Number of seconds to average the cpu usage across. 19335e6168fSJeff Roberson * SCHED_CPU_TICKS: Number of hz ticks to average the cpu usage across. 19435e6168fSJeff Roberson */ 19535e6168fSJeff Roberson 1965053d272SJeff Roberson #define SCHED_CPU_TIME 10 19735e6168fSJeff Roberson #define SCHED_CPU_TICKS (hz * SCHED_CPU_TIME) 19835e6168fSJeff Roberson 19935e6168fSJeff Roberson /* 20015dc847eSJeff Roberson * kseq - per processor runqs and statistics. 20135e6168fSJeff Roberson */ 20235e6168fSJeff Roberson struct kseq { 203a8949de2SJeff Roberson struct runq ksq_idle; /* Queue of IDLE threads. */ 20415dc847eSJeff Roberson struct runq ksq_timeshare[2]; /* Run queues for !IDLE. */ 20515dc847eSJeff Roberson struct runq *ksq_next; /* Next timeshare queue. */ 20615dc847eSJeff Roberson struct runq *ksq_curr; /* Current queue. */ 207ef1134c9SJeff Roberson int ksq_load_timeshare; /* Load for timeshare. */ 20815dc847eSJeff Roberson int ksq_load; /* Aggregate load. */ 209a0a931ceSJeff Roberson short ksq_nice[SCHED_PRI_NRESV]; /* KSEs in each nice bin. */ 21015dc847eSJeff Roberson short ksq_nicemin; /* Least nice. */ 2115d7ef00cSJeff Roberson #ifdef SMP 21280f86c9fSJeff Roberson int ksq_transferable; 21380f86c9fSJeff Roberson LIST_ENTRY(kseq) ksq_siblings; /* Next in kseq group. */ 21480f86c9fSJeff Roberson struct kseq_group *ksq_group; /* Our processor group. */ 215fa9c9717SJeff Roberson volatile struct kse *ksq_assigned; /* assigned by another CPU. */ 21633916c36SJeff Roberson #else 21733916c36SJeff Roberson int ksq_sysload; /* For loadavg, !ITHD load. */ 2185d7ef00cSJeff Roberson #endif 21935e6168fSJeff Roberson }; 22035e6168fSJeff Roberson 22180f86c9fSJeff Roberson #ifdef SMP 22280f86c9fSJeff Roberson /* 22380f86c9fSJeff Roberson * kseq groups are groups of processors which can cheaply share threads. When 22480f86c9fSJeff Roberson * one processor in the group goes idle it will check the runqs of the other 22580f86c9fSJeff Roberson * processors in its group prior to halting and waiting for an interrupt. 22680f86c9fSJeff Roberson * These groups are suitable for SMT (Symetric Multi-Threading) and not NUMA. 22780f86c9fSJeff Roberson * In a numa environment we'd want an idle bitmap per group and a two tiered 22880f86c9fSJeff Roberson * load balancer. 22980f86c9fSJeff Roberson */ 23080f86c9fSJeff Roberson struct kseq_group { 23180f86c9fSJeff Roberson int ksg_cpus; /* Count of CPUs in this kseq group. */ 232b2ae7ed7SMarcel Moolenaar cpumask_t ksg_cpumask; /* Mask of cpus in this group. */ 233b2ae7ed7SMarcel Moolenaar cpumask_t ksg_idlemask; /* Idle cpus in this group. */ 234b2ae7ed7SMarcel Moolenaar cpumask_t ksg_mask; /* Bit mask for first cpu. */ 235cac77d04SJeff Roberson int ksg_load; /* Total load of this group. */ 23680f86c9fSJeff Roberson int ksg_transferable; /* Transferable load of this group. */ 23780f86c9fSJeff Roberson LIST_HEAD(, kseq) ksg_members; /* Linked list of all members. */ 23880f86c9fSJeff Roberson }; 23980f86c9fSJeff Roberson #endif 24080f86c9fSJeff Roberson 24135e6168fSJeff Roberson /* 24235e6168fSJeff Roberson * One kse queue per processor. 24335e6168fSJeff Roberson */ 2440a016a05SJeff Roberson #ifdef SMP 245b2ae7ed7SMarcel Moolenaar static cpumask_t kseq_idle; 246cac77d04SJeff Roberson static int ksg_maxid; 24722bf7d9aSJeff Roberson static struct kseq kseq_cpu[MAXCPU]; 24880f86c9fSJeff Roberson static struct kseq_group kseq_groups[MAXCPU]; 249dc03363dSJeff Roberson static int bal_tick; 250dc03363dSJeff Roberson static int gbal_tick; 251dc03363dSJeff Roberson 25280f86c9fSJeff Roberson #define KSEQ_SELF() (&kseq_cpu[PCPU_GET(cpuid)]) 25380f86c9fSJeff Roberson #define KSEQ_CPU(x) (&kseq_cpu[(x)]) 254cac77d04SJeff Roberson #define KSEQ_ID(x) ((x) - kseq_cpu) 255cac77d04SJeff Roberson #define KSEQ_GROUP(x) (&kseq_groups[(x)]) 25680f86c9fSJeff Roberson #else /* !SMP */ 25722bf7d9aSJeff Roberson static struct kseq kseq_cpu; 258dc03363dSJeff Roberson 2590a016a05SJeff Roberson #define KSEQ_SELF() (&kseq_cpu) 2600a016a05SJeff Roberson #define KSEQ_CPU(x) (&kseq_cpu) 2610a016a05SJeff Roberson #endif 26235e6168fSJeff Roberson 26363fcce68SJohn Baldwin static void sched_add_internal(struct thread *td, int preemptive); 264245f3abfSJeff Roberson static void sched_slice(struct kse *ke); 26515dc847eSJeff Roberson static void sched_priority(struct ksegrp *kg); 266e1f89c22SJeff Roberson static int sched_interact_score(struct ksegrp *kg); 2674b60e324SJeff Roberson static void sched_interact_update(struct ksegrp *kg); 268d322132cSJeff Roberson static void sched_interact_fork(struct ksegrp *kg); 26922bf7d9aSJeff Roberson static void sched_pctcpu_update(struct kse *ke); 27035e6168fSJeff Roberson 2715d7ef00cSJeff Roberson /* Operations on per processor queues */ 27222bf7d9aSJeff Roberson static struct kse * kseq_choose(struct kseq *kseq); 2730a016a05SJeff Roberson static void kseq_setup(struct kseq *kseq); 274155b9987SJeff Roberson static void kseq_load_add(struct kseq *kseq, struct kse *ke); 275155b9987SJeff Roberson static void kseq_load_rem(struct kseq *kseq, struct kse *ke); 276155b9987SJeff Roberson static __inline void kseq_runq_add(struct kseq *kseq, struct kse *ke); 277155b9987SJeff Roberson static __inline void kseq_runq_rem(struct kseq *kseq, struct kse *ke); 27815dc847eSJeff Roberson static void kseq_nice_add(struct kseq *kseq, int nice); 27915dc847eSJeff Roberson static void kseq_nice_rem(struct kseq *kseq, int nice); 2807cd650a9SJeff Roberson void kseq_print(int cpu); 2815d7ef00cSJeff Roberson #ifdef SMP 28280f86c9fSJeff Roberson static int kseq_transfer(struct kseq *ksq, struct kse *ke, int class); 28322bf7d9aSJeff Roberson static struct kse *runq_steal(struct runq *rq); 284dc03363dSJeff Roberson static void sched_balance(void); 285dc03363dSJeff Roberson static void sched_balance_groups(void); 286cac77d04SJeff Roberson static void sched_balance_group(struct kseq_group *ksg); 287cac77d04SJeff Roberson static void sched_balance_pair(struct kseq *high, struct kseq *low); 28822bf7d9aSJeff Roberson static void kseq_move(struct kseq *from, int cpu); 28980f86c9fSJeff Roberson static int kseq_idled(struct kseq *kseq); 29022bf7d9aSJeff Roberson static void kseq_notify(struct kse *ke, int cpu); 29122bf7d9aSJeff Roberson static void kseq_assign(struct kseq *); 29280f86c9fSJeff Roberson static struct kse *kseq_steal(struct kseq *kseq, int stealidle); 293e7a976f4SJeff Roberson /* 294e7a976f4SJeff Roberson * On P4 Xeons the round-robin interrupt delivery is broken. As a result of 295e7a976f4SJeff Roberson * this, we can't pin interrupts to the cpu that they were delivered to, 296e7a976f4SJeff Roberson * otherwise all ithreads only run on CPU 0. 297e7a976f4SJeff Roberson */ 298e7a976f4SJeff Roberson #ifdef __i386__ 299e7a976f4SJeff Roberson #define KSE_CAN_MIGRATE(ke, class) \ 300e7a976f4SJeff Roberson ((ke)->ke_thread->td_pinned == 0 && ((ke)->ke_flags & KEF_BOUND) == 0) 301e7a976f4SJeff Roberson #else /* !__i386__ */ 3029bacd788SJeff Roberson #define KSE_CAN_MIGRATE(ke, class) \ 303a70d729bSJeff Roberson ((class) != PRI_ITHD && (ke)->ke_thread->td_pinned == 0 && \ 304f28b3340SJeff Roberson ((ke)->ke_flags & KEF_BOUND) == 0) 305e7a976f4SJeff Roberson #endif /* !__i386__ */ 3065d7ef00cSJeff Roberson #endif 3075d7ef00cSJeff Roberson 30815dc847eSJeff Roberson void 3097cd650a9SJeff Roberson kseq_print(int cpu) 31015dc847eSJeff Roberson { 3117cd650a9SJeff Roberson struct kseq *kseq; 31215dc847eSJeff Roberson int i; 31315dc847eSJeff Roberson 3147cd650a9SJeff Roberson kseq = KSEQ_CPU(cpu); 31515dc847eSJeff Roberson 31615dc847eSJeff Roberson printf("kseq:\n"); 31715dc847eSJeff Roberson printf("\tload: %d\n", kseq->ksq_load); 318155b9987SJeff Roberson printf("\tload TIMESHARE: %d\n", kseq->ksq_load_timeshare); 319ef1134c9SJeff Roberson #ifdef SMP 32080f86c9fSJeff Roberson printf("\tload transferable: %d\n", kseq->ksq_transferable); 321ef1134c9SJeff Roberson #endif 32215dc847eSJeff Roberson printf("\tnicemin:\t%d\n", kseq->ksq_nicemin); 32315dc847eSJeff Roberson printf("\tnice counts:\n"); 324a0a931ceSJeff Roberson for (i = 0; i < SCHED_PRI_NRESV; i++) 32515dc847eSJeff Roberson if (kseq->ksq_nice[i]) 32615dc847eSJeff Roberson printf("\t\t%d = %d\n", 32715dc847eSJeff Roberson i - SCHED_PRI_NHALF, kseq->ksq_nice[i]); 32815dc847eSJeff Roberson } 32915dc847eSJeff Roberson 330155b9987SJeff Roberson static __inline void 331155b9987SJeff Roberson kseq_runq_add(struct kseq *kseq, struct kse *ke) 332155b9987SJeff Roberson { 333155b9987SJeff Roberson #ifdef SMP 33480f86c9fSJeff Roberson if (KSE_CAN_MIGRATE(ke, PRI_BASE(ke->ke_ksegrp->kg_pri_class))) { 33580f86c9fSJeff Roberson kseq->ksq_transferable++; 33680f86c9fSJeff Roberson kseq->ksq_group->ksg_transferable++; 33780f86c9fSJeff Roberson } 338155b9987SJeff Roberson #endif 339155b9987SJeff Roberson runq_add(ke->ke_runq, ke); 340155b9987SJeff Roberson } 341155b9987SJeff Roberson 342155b9987SJeff Roberson static __inline void 343155b9987SJeff Roberson kseq_runq_rem(struct kseq *kseq, struct kse *ke) 344155b9987SJeff Roberson { 345155b9987SJeff Roberson #ifdef SMP 34680f86c9fSJeff Roberson if (KSE_CAN_MIGRATE(ke, PRI_BASE(ke->ke_ksegrp->kg_pri_class))) { 34780f86c9fSJeff Roberson kseq->ksq_transferable--; 34880f86c9fSJeff Roberson kseq->ksq_group->ksg_transferable--; 34980f86c9fSJeff Roberson } 350155b9987SJeff Roberson #endif 351155b9987SJeff Roberson runq_remove(ke->ke_runq, ke); 352155b9987SJeff Roberson } 353155b9987SJeff Roberson 354a8949de2SJeff Roberson static void 355155b9987SJeff Roberson kseq_load_add(struct kseq *kseq, struct kse *ke) 3565d7ef00cSJeff Roberson { 357ef1134c9SJeff Roberson int class; 358b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 359ef1134c9SJeff Roberson class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); 360ef1134c9SJeff Roberson if (class == PRI_TIMESHARE) 361ef1134c9SJeff Roberson kseq->ksq_load_timeshare++; 36215dc847eSJeff Roberson kseq->ksq_load++; 363207a6c0dSDavid E. O'Brien if (class != PRI_ITHD && (ke->ke_proc->p_flag & P_NOLOAD) == 0) 36433916c36SJeff Roberson #ifdef SMP 365cac77d04SJeff Roberson kseq->ksq_group->ksg_load++; 36633916c36SJeff Roberson #else 36733916c36SJeff Roberson kseq->ksq_sysload++; 368cac77d04SJeff Roberson #endif 36915dc847eSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 370155b9987SJeff Roberson CTR6(KTR_ULE, 371155b9987SJeff Roberson "Add kse %p to %p (slice: %d, pri: %d, nice: %d(%d))", 37215dc847eSJeff Roberson ke, ke->ke_runq, ke->ke_slice, ke->ke_thread->td_priority, 373fa885116SJulian Elischer ke->ke_proc->p_nice, kseq->ksq_nicemin); 37415dc847eSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 375fa885116SJulian Elischer kseq_nice_add(kseq, ke->ke_proc->p_nice); 3765d7ef00cSJeff Roberson } 37715dc847eSJeff Roberson 378a8949de2SJeff Roberson static void 379155b9987SJeff Roberson kseq_load_rem(struct kseq *kseq, struct kse *ke) 3805d7ef00cSJeff Roberson { 381ef1134c9SJeff Roberson int class; 382b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 383ef1134c9SJeff Roberson class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); 384ef1134c9SJeff Roberson if (class == PRI_TIMESHARE) 385ef1134c9SJeff Roberson kseq->ksq_load_timeshare--; 386207a6c0dSDavid E. O'Brien if (class != PRI_ITHD && (ke->ke_proc->p_flag & P_NOLOAD) == 0) 38733916c36SJeff Roberson #ifdef SMP 388cac77d04SJeff Roberson kseq->ksq_group->ksg_load--; 38933916c36SJeff Roberson #else 39033916c36SJeff Roberson kseq->ksq_sysload--; 391cac77d04SJeff Roberson #endif 39215dc847eSJeff Roberson kseq->ksq_load--; 39315dc847eSJeff Roberson ke->ke_runq = NULL; 39415dc847eSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 395fa885116SJulian Elischer kseq_nice_rem(kseq, ke->ke_proc->p_nice); 3965d7ef00cSJeff Roberson } 3975d7ef00cSJeff Roberson 39815dc847eSJeff Roberson static void 39915dc847eSJeff Roberson kseq_nice_add(struct kseq *kseq, int nice) 40015dc847eSJeff Roberson { 401b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 40215dc847eSJeff Roberson /* Normalize to zero. */ 40315dc847eSJeff Roberson kseq->ksq_nice[nice + SCHED_PRI_NHALF]++; 404ef1134c9SJeff Roberson if (nice < kseq->ksq_nicemin || kseq->ksq_load_timeshare == 1) 40515dc847eSJeff Roberson kseq->ksq_nicemin = nice; 40615dc847eSJeff Roberson } 40715dc847eSJeff Roberson 40815dc847eSJeff Roberson static void 40915dc847eSJeff Roberson kseq_nice_rem(struct kseq *kseq, int nice) 41015dc847eSJeff Roberson { 41115dc847eSJeff Roberson int n; 41215dc847eSJeff Roberson 413b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 41415dc847eSJeff Roberson /* Normalize to zero. */ 41515dc847eSJeff Roberson n = nice + SCHED_PRI_NHALF; 41615dc847eSJeff Roberson kseq->ksq_nice[n]--; 41715dc847eSJeff Roberson KASSERT(kseq->ksq_nice[n] >= 0, ("Negative nice count.")); 41815dc847eSJeff Roberson 41915dc847eSJeff Roberson /* 42015dc847eSJeff Roberson * If this wasn't the smallest nice value or there are more in 42115dc847eSJeff Roberson * this bucket we can just return. Otherwise we have to recalculate 42215dc847eSJeff Roberson * the smallest nice. 42315dc847eSJeff Roberson */ 42415dc847eSJeff Roberson if (nice != kseq->ksq_nicemin || 42515dc847eSJeff Roberson kseq->ksq_nice[n] != 0 || 426ef1134c9SJeff Roberson kseq->ksq_load_timeshare == 0) 42715dc847eSJeff Roberson return; 42815dc847eSJeff Roberson 429a0a931ceSJeff Roberson for (; n < SCHED_PRI_NRESV; n++) 43015dc847eSJeff Roberson if (kseq->ksq_nice[n]) { 43115dc847eSJeff Roberson kseq->ksq_nicemin = n - SCHED_PRI_NHALF; 43215dc847eSJeff Roberson return; 43315dc847eSJeff Roberson } 43415dc847eSJeff Roberson } 43515dc847eSJeff Roberson 4365d7ef00cSJeff Roberson #ifdef SMP 437356500a3SJeff Roberson /* 438155b9987SJeff Roberson * sched_balance is a simple CPU load balancing algorithm. It operates by 439356500a3SJeff Roberson * finding the least loaded and most loaded cpu and equalizing their load 440356500a3SJeff Roberson * by migrating some processes. 441356500a3SJeff Roberson * 442356500a3SJeff Roberson * Dealing only with two CPUs at a time has two advantages. Firstly, most 443356500a3SJeff Roberson * installations will only have 2 cpus. Secondly, load balancing too much at 444356500a3SJeff Roberson * once can have an unpleasant effect on the system. The scheduler rarely has 445356500a3SJeff Roberson * enough information to make perfect decisions. So this algorithm chooses 446356500a3SJeff Roberson * algorithm simplicity and more gradual effects on load in larger systems. 447356500a3SJeff Roberson * 448356500a3SJeff Roberson * It could be improved by considering the priorities and slices assigned to 449356500a3SJeff Roberson * each task prior to balancing them. There are many pathological cases with 450356500a3SJeff Roberson * any approach and so the semi random algorithm below may work as well as any. 451356500a3SJeff Roberson * 452356500a3SJeff Roberson */ 45322bf7d9aSJeff Roberson static void 454dc03363dSJeff Roberson sched_balance(void) 455356500a3SJeff Roberson { 456cac77d04SJeff Roberson struct kseq_group *high; 457cac77d04SJeff Roberson struct kseq_group *low; 458cac77d04SJeff Roberson struct kseq_group *ksg; 459cac77d04SJeff Roberson int cnt; 460356500a3SJeff Roberson int i; 461356500a3SJeff Roberson 46286f8ae96SJeff Roberson if (smp_started == 0) 46386f8ae96SJeff Roberson goto out; 464cac77d04SJeff Roberson low = high = NULL; 465cac77d04SJeff Roberson i = random() % (ksg_maxid + 1); 466cac77d04SJeff Roberson for (cnt = 0; cnt <= ksg_maxid; cnt++) { 467cac77d04SJeff Roberson ksg = KSEQ_GROUP(i); 468cac77d04SJeff Roberson /* 469cac77d04SJeff Roberson * Find the CPU with the highest load that has some 470cac77d04SJeff Roberson * threads to transfer. 471cac77d04SJeff Roberson */ 472cac77d04SJeff Roberson if ((high == NULL || ksg->ksg_load > high->ksg_load) 473cac77d04SJeff Roberson && ksg->ksg_transferable) 474cac77d04SJeff Roberson high = ksg; 475cac77d04SJeff Roberson if (low == NULL || ksg->ksg_load < low->ksg_load) 476cac77d04SJeff Roberson low = ksg; 477cac77d04SJeff Roberson if (++i > ksg_maxid) 478cac77d04SJeff Roberson i = 0; 479cac77d04SJeff Roberson } 480cac77d04SJeff Roberson if (low != NULL && high != NULL && high != low) 481cac77d04SJeff Roberson sched_balance_pair(LIST_FIRST(&high->ksg_members), 482cac77d04SJeff Roberson LIST_FIRST(&low->ksg_members)); 483cac77d04SJeff Roberson out: 484dc03363dSJeff Roberson bal_tick = ticks + (random() % (hz * 2)); 485cac77d04SJeff Roberson } 48686f8ae96SJeff Roberson 487cac77d04SJeff Roberson static void 488dc03363dSJeff Roberson sched_balance_groups(void) 489cac77d04SJeff Roberson { 490cac77d04SJeff Roberson int i; 491cac77d04SJeff Roberson 492dc03363dSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 493cac77d04SJeff Roberson if (smp_started) 494cac77d04SJeff Roberson for (i = 0; i <= ksg_maxid; i++) 495cac77d04SJeff Roberson sched_balance_group(KSEQ_GROUP(i)); 496dc03363dSJeff Roberson gbal_tick = ticks + (random() % (hz * 2)); 497356500a3SJeff Roberson } 498cac77d04SJeff Roberson 499cac77d04SJeff Roberson static void 500cac77d04SJeff Roberson sched_balance_group(struct kseq_group *ksg) 501cac77d04SJeff Roberson { 502cac77d04SJeff Roberson struct kseq *kseq; 503cac77d04SJeff Roberson struct kseq *high; 504cac77d04SJeff Roberson struct kseq *low; 505cac77d04SJeff Roberson int load; 506cac77d04SJeff Roberson 507cac77d04SJeff Roberson if (ksg->ksg_transferable == 0) 508cac77d04SJeff Roberson return; 509cac77d04SJeff Roberson low = NULL; 510cac77d04SJeff Roberson high = NULL; 511cac77d04SJeff Roberson LIST_FOREACH(kseq, &ksg->ksg_members, ksq_siblings) { 512cac77d04SJeff Roberson load = kseq->ksq_load; 513cac77d04SJeff Roberson if (high == NULL || load > high->ksq_load) 514cac77d04SJeff Roberson high = kseq; 515cac77d04SJeff Roberson if (low == NULL || load < low->ksq_load) 516cac77d04SJeff Roberson low = kseq; 517356500a3SJeff Roberson } 518cac77d04SJeff Roberson if (high != NULL && low != NULL && high != low) 519cac77d04SJeff Roberson sched_balance_pair(high, low); 520356500a3SJeff Roberson } 521cac77d04SJeff Roberson 522cac77d04SJeff Roberson static void 523cac77d04SJeff Roberson sched_balance_pair(struct kseq *high, struct kseq *low) 524cac77d04SJeff Roberson { 525cac77d04SJeff Roberson int transferable; 526cac77d04SJeff Roberson int high_load; 527cac77d04SJeff Roberson int low_load; 528cac77d04SJeff Roberson int move; 529cac77d04SJeff Roberson int diff; 530cac77d04SJeff Roberson int i; 531cac77d04SJeff Roberson 53280f86c9fSJeff Roberson /* 53380f86c9fSJeff Roberson * If we're transfering within a group we have to use this specific 53480f86c9fSJeff Roberson * kseq's transferable count, otherwise we can steal from other members 53580f86c9fSJeff Roberson * of the group. 53680f86c9fSJeff Roberson */ 537cac77d04SJeff Roberson if (high->ksq_group == low->ksq_group) { 538cac77d04SJeff Roberson transferable = high->ksq_transferable; 539cac77d04SJeff Roberson high_load = high->ksq_load; 540cac77d04SJeff Roberson low_load = low->ksq_load; 541cac77d04SJeff Roberson } else { 542cac77d04SJeff Roberson transferable = high->ksq_group->ksg_transferable; 543cac77d04SJeff Roberson high_load = high->ksq_group->ksg_load; 544cac77d04SJeff Roberson low_load = low->ksq_group->ksg_load; 545cac77d04SJeff Roberson } 54680f86c9fSJeff Roberson if (transferable == 0) 547cac77d04SJeff Roberson return; 548155b9987SJeff Roberson /* 549155b9987SJeff Roberson * Determine what the imbalance is and then adjust that to how many 55080f86c9fSJeff Roberson * kses we actually have to give up (transferable). 551155b9987SJeff Roberson */ 552cac77d04SJeff Roberson diff = high_load - low_load; 553356500a3SJeff Roberson move = diff / 2; 554356500a3SJeff Roberson if (diff & 0x1) 555356500a3SJeff Roberson move++; 55680f86c9fSJeff Roberson move = min(move, transferable); 557356500a3SJeff Roberson for (i = 0; i < move; i++) 558cac77d04SJeff Roberson kseq_move(high, KSEQ_ID(low)); 559356500a3SJeff Roberson return; 560356500a3SJeff Roberson } 561356500a3SJeff Roberson 56222bf7d9aSJeff Roberson static void 563356500a3SJeff Roberson kseq_move(struct kseq *from, int cpu) 564356500a3SJeff Roberson { 56580f86c9fSJeff Roberson struct kseq *kseq; 56680f86c9fSJeff Roberson struct kseq *to; 567356500a3SJeff Roberson struct kse *ke; 568356500a3SJeff Roberson 56980f86c9fSJeff Roberson kseq = from; 57080f86c9fSJeff Roberson to = KSEQ_CPU(cpu); 57180f86c9fSJeff Roberson ke = kseq_steal(kseq, 1); 57280f86c9fSJeff Roberson if (ke == NULL) { 57380f86c9fSJeff Roberson struct kseq_group *ksg; 57480f86c9fSJeff Roberson 57580f86c9fSJeff Roberson ksg = kseq->ksq_group; 57680f86c9fSJeff Roberson LIST_FOREACH(kseq, &ksg->ksg_members, ksq_siblings) { 57780f86c9fSJeff Roberson if (kseq == from || kseq->ksq_transferable == 0) 57880f86c9fSJeff Roberson continue; 57980f86c9fSJeff Roberson ke = kseq_steal(kseq, 1); 58080f86c9fSJeff Roberson break; 58180f86c9fSJeff Roberson } 58280f86c9fSJeff Roberson if (ke == NULL) 58380f86c9fSJeff Roberson panic("kseq_move: No KSEs available with a " 58480f86c9fSJeff Roberson "transferable count of %d\n", 58580f86c9fSJeff Roberson ksg->ksg_transferable); 58680f86c9fSJeff Roberson } 58780f86c9fSJeff Roberson if (kseq == to) 58880f86c9fSJeff Roberson return; 589356500a3SJeff Roberson ke->ke_state = KES_THREAD; 59080f86c9fSJeff Roberson kseq_runq_rem(kseq, ke); 59180f86c9fSJeff Roberson kseq_load_rem(kseq, ke); 592112b6d3aSJeff Roberson kseq_notify(ke, cpu); 593356500a3SJeff Roberson } 59422bf7d9aSJeff Roberson 59580f86c9fSJeff Roberson static int 59680f86c9fSJeff Roberson kseq_idled(struct kseq *kseq) 59722bf7d9aSJeff Roberson { 59880f86c9fSJeff Roberson struct kseq_group *ksg; 59980f86c9fSJeff Roberson struct kseq *steal; 60080f86c9fSJeff Roberson struct kse *ke; 60180f86c9fSJeff Roberson 60280f86c9fSJeff Roberson ksg = kseq->ksq_group; 60380f86c9fSJeff Roberson /* 60480f86c9fSJeff Roberson * If we're in a cpu group, try and steal kses from another cpu in 60580f86c9fSJeff Roberson * the group before idling. 60680f86c9fSJeff Roberson */ 60780f86c9fSJeff Roberson if (ksg->ksg_cpus > 1 && ksg->ksg_transferable) { 60880f86c9fSJeff Roberson LIST_FOREACH(steal, &ksg->ksg_members, ksq_siblings) { 60980f86c9fSJeff Roberson if (steal == kseq || steal->ksq_transferable == 0) 61080f86c9fSJeff Roberson continue; 61180f86c9fSJeff Roberson ke = kseq_steal(steal, 0); 61280f86c9fSJeff Roberson if (ke == NULL) 61380f86c9fSJeff Roberson continue; 61480f86c9fSJeff Roberson ke->ke_state = KES_THREAD; 61580f86c9fSJeff Roberson kseq_runq_rem(steal, ke); 61680f86c9fSJeff Roberson kseq_load_rem(steal, ke); 61780f86c9fSJeff Roberson ke->ke_cpu = PCPU_GET(cpuid); 61863fcce68SJohn Baldwin sched_add_internal(ke->ke_thread, 0); 61980f86c9fSJeff Roberson return (0); 62080f86c9fSJeff Roberson } 62180f86c9fSJeff Roberson } 62280f86c9fSJeff Roberson /* 62380f86c9fSJeff Roberson * We only set the idled bit when all of the cpus in the group are 62480f86c9fSJeff Roberson * idle. Otherwise we could get into a situation where a KSE bounces 62580f86c9fSJeff Roberson * back and forth between two idle cores on seperate physical CPUs. 62680f86c9fSJeff Roberson */ 62780f86c9fSJeff Roberson ksg->ksg_idlemask |= PCPU_GET(cpumask); 62880f86c9fSJeff Roberson if (ksg->ksg_idlemask != ksg->ksg_cpumask) 62980f86c9fSJeff Roberson return (1); 63080f86c9fSJeff Roberson atomic_set_int(&kseq_idle, ksg->ksg_mask); 63180f86c9fSJeff Roberson return (1); 63222bf7d9aSJeff Roberson } 63322bf7d9aSJeff Roberson 63422bf7d9aSJeff Roberson static void 63522bf7d9aSJeff Roberson kseq_assign(struct kseq *kseq) 63622bf7d9aSJeff Roberson { 63722bf7d9aSJeff Roberson struct kse *nke; 63822bf7d9aSJeff Roberson struct kse *ke; 63922bf7d9aSJeff Roberson 64022bf7d9aSJeff Roberson do { 641fa9c9717SJeff Roberson (volatile struct kse *)ke = kseq->ksq_assigned; 64222bf7d9aSJeff Roberson } while(!atomic_cmpset_ptr(&kseq->ksq_assigned, ke, NULL)); 64322bf7d9aSJeff Roberson for (; ke != NULL; ke = nke) { 64422bf7d9aSJeff Roberson nke = ke->ke_assign; 64522bf7d9aSJeff Roberson ke->ke_flags &= ~KEF_ASSIGNED; 64663fcce68SJohn Baldwin sched_add_internal(ke->ke_thread, 0); 64722bf7d9aSJeff Roberson } 64822bf7d9aSJeff Roberson } 64922bf7d9aSJeff Roberson 65022bf7d9aSJeff Roberson static void 65122bf7d9aSJeff Roberson kseq_notify(struct kse *ke, int cpu) 65222bf7d9aSJeff Roberson { 65322bf7d9aSJeff Roberson struct kseq *kseq; 65422bf7d9aSJeff Roberson struct thread *td; 65522bf7d9aSJeff Roberson struct pcpu *pcpu; 65622bf7d9aSJeff Roberson 65786e1c22aSJeff Roberson ke->ke_cpu = cpu; 65822bf7d9aSJeff Roberson ke->ke_flags |= KEF_ASSIGNED; 65922bf7d9aSJeff Roberson 66022bf7d9aSJeff Roberson kseq = KSEQ_CPU(cpu); 6615d7ef00cSJeff Roberson 6620c0a98b2SJeff Roberson /* 66322bf7d9aSJeff Roberson * Place a KSE on another cpu's queue and force a resched. 66422bf7d9aSJeff Roberson */ 66522bf7d9aSJeff Roberson do { 666fa9c9717SJeff Roberson (volatile struct kse *)ke->ke_assign = kseq->ksq_assigned; 66722bf7d9aSJeff Roberson } while(!atomic_cmpset_ptr(&kseq->ksq_assigned, ke->ke_assign, ke)); 66822bf7d9aSJeff Roberson pcpu = pcpu_find(cpu); 66922bf7d9aSJeff Roberson td = pcpu->pc_curthread; 67022bf7d9aSJeff Roberson if (ke->ke_thread->td_priority < td->td_priority || 67122bf7d9aSJeff Roberson td == pcpu->pc_idlethread) { 67222bf7d9aSJeff Roberson td->td_flags |= TDF_NEEDRESCHED; 67322bf7d9aSJeff Roberson ipi_selected(1 << cpu, IPI_AST); 67422bf7d9aSJeff Roberson } 67522bf7d9aSJeff Roberson } 67622bf7d9aSJeff Roberson 67722bf7d9aSJeff Roberson static struct kse * 67822bf7d9aSJeff Roberson runq_steal(struct runq *rq) 67922bf7d9aSJeff Roberson { 68022bf7d9aSJeff Roberson struct rqhead *rqh; 68122bf7d9aSJeff Roberson struct rqbits *rqb; 68222bf7d9aSJeff Roberson struct kse *ke; 68322bf7d9aSJeff Roberson int word; 68422bf7d9aSJeff Roberson int bit; 68522bf7d9aSJeff Roberson 68622bf7d9aSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 68722bf7d9aSJeff Roberson rqb = &rq->rq_status; 68822bf7d9aSJeff Roberson for (word = 0; word < RQB_LEN; word++) { 68922bf7d9aSJeff Roberson if (rqb->rqb_bits[word] == 0) 69022bf7d9aSJeff Roberson continue; 69122bf7d9aSJeff Roberson for (bit = 0; bit < RQB_BPW; bit++) { 692a2640c9bSPeter Wemm if ((rqb->rqb_bits[word] & (1ul << bit)) == 0) 69322bf7d9aSJeff Roberson continue; 69422bf7d9aSJeff Roberson rqh = &rq->rq_queues[bit + (word << RQB_L2BPW)]; 69522bf7d9aSJeff Roberson TAILQ_FOREACH(ke, rqh, ke_procq) { 696ef1134c9SJeff Roberson if (KSE_CAN_MIGRATE(ke, 697ef1134c9SJeff Roberson PRI_BASE(ke->ke_ksegrp->kg_pri_class))) 69822bf7d9aSJeff Roberson return (ke); 69922bf7d9aSJeff Roberson } 70022bf7d9aSJeff Roberson } 70122bf7d9aSJeff Roberson } 70222bf7d9aSJeff Roberson return (NULL); 70322bf7d9aSJeff Roberson } 70422bf7d9aSJeff Roberson 70522bf7d9aSJeff Roberson static struct kse * 70680f86c9fSJeff Roberson kseq_steal(struct kseq *kseq, int stealidle) 70722bf7d9aSJeff Roberson { 70822bf7d9aSJeff Roberson struct kse *ke; 70922bf7d9aSJeff Roberson 71080f86c9fSJeff Roberson /* 71180f86c9fSJeff Roberson * Steal from next first to try to get a non-interactive task that 71280f86c9fSJeff Roberson * may not have run for a while. 71380f86c9fSJeff Roberson */ 71422bf7d9aSJeff Roberson if ((ke = runq_steal(kseq->ksq_next)) != NULL) 71522bf7d9aSJeff Roberson return (ke); 71680f86c9fSJeff Roberson if ((ke = runq_steal(kseq->ksq_curr)) != NULL) 71780f86c9fSJeff Roberson return (ke); 71880f86c9fSJeff Roberson if (stealidle) 71922bf7d9aSJeff Roberson return (runq_steal(&kseq->ksq_idle)); 72080f86c9fSJeff Roberson return (NULL); 72122bf7d9aSJeff Roberson } 72280f86c9fSJeff Roberson 72380f86c9fSJeff Roberson int 72480f86c9fSJeff Roberson kseq_transfer(struct kseq *kseq, struct kse *ke, int class) 72580f86c9fSJeff Roberson { 72680f86c9fSJeff Roberson struct kseq_group *ksg; 72780f86c9fSJeff Roberson int cpu; 72880f86c9fSJeff Roberson 729670c524fSJeff Roberson if (smp_started == 0) 730670c524fSJeff Roberson return (0); 73180f86c9fSJeff Roberson cpu = 0; 73280f86c9fSJeff Roberson ksg = kseq->ksq_group; 73380f86c9fSJeff Roberson 73480f86c9fSJeff Roberson /* 735670c524fSJeff Roberson * If there are any idle groups, give them our extra load. The 736670c524fSJeff Roberson * threshold at which we start to reassign kses has a large impact 737670c524fSJeff Roberson * on the overall performance of the system. Tuned too high and 738670c524fSJeff Roberson * some CPUs may idle. Too low and there will be excess migration 739d50c87deSOlivier Houchard * and context switches. 740670c524fSJeff Roberson */ 741249e0beaSJeff Roberson if (ksg->ksg_load > (ksg->ksg_cpus * 2) && kseq_idle) { 74280f86c9fSJeff Roberson /* 74380f86c9fSJeff Roberson * Multiple cpus could find this bit simultaneously 74480f86c9fSJeff Roberson * but the race shouldn't be terrible. 74580f86c9fSJeff Roberson */ 74680f86c9fSJeff Roberson cpu = ffs(kseq_idle); 74780f86c9fSJeff Roberson if (cpu) 74880f86c9fSJeff Roberson atomic_clear_int(&kseq_idle, 1 << (cpu - 1)); 74980f86c9fSJeff Roberson } 75080f86c9fSJeff Roberson /* 75180f86c9fSJeff Roberson * If another cpu in this group has idled, assign a thread over 75280f86c9fSJeff Roberson * to them after checking to see if there are idled groups. 75380f86c9fSJeff Roberson */ 75480f86c9fSJeff Roberson if (cpu == 0 && kseq->ksq_load > 1 && ksg->ksg_idlemask) { 75580f86c9fSJeff Roberson cpu = ffs(ksg->ksg_idlemask); 75680f86c9fSJeff Roberson if (cpu) 75780f86c9fSJeff Roberson ksg->ksg_idlemask &= ~(1 << (cpu - 1)); 75880f86c9fSJeff Roberson } 75980f86c9fSJeff Roberson /* 76080f86c9fSJeff Roberson * Now that we've found an idle CPU, migrate the thread. 76180f86c9fSJeff Roberson */ 76280f86c9fSJeff Roberson if (cpu) { 76380f86c9fSJeff Roberson cpu--; 76480f86c9fSJeff Roberson ke->ke_runq = NULL; 76580f86c9fSJeff Roberson kseq_notify(ke, cpu); 76680f86c9fSJeff Roberson return (1); 76780f86c9fSJeff Roberson } 76880f86c9fSJeff Roberson return (0); 76980f86c9fSJeff Roberson } 77080f86c9fSJeff Roberson 77122bf7d9aSJeff Roberson #endif /* SMP */ 77222bf7d9aSJeff Roberson 77322bf7d9aSJeff Roberson /* 77422bf7d9aSJeff Roberson * Pick the highest priority task we have and return it. 7750c0a98b2SJeff Roberson */ 7760c0a98b2SJeff Roberson 77722bf7d9aSJeff Roberson static struct kse * 77822bf7d9aSJeff Roberson kseq_choose(struct kseq *kseq) 7795d7ef00cSJeff Roberson { 7805d7ef00cSJeff Roberson struct kse *ke; 7815d7ef00cSJeff Roberson struct runq *swap; 7825d7ef00cSJeff Roberson 783b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 78415dc847eSJeff Roberson swap = NULL; 785a8949de2SJeff Roberson 78615dc847eSJeff Roberson for (;;) { 78715dc847eSJeff Roberson ke = runq_choose(kseq->ksq_curr); 78815dc847eSJeff Roberson if (ke == NULL) { 78915dc847eSJeff Roberson /* 790bf0acc27SJohn Baldwin * We already swapped once and didn't get anywhere. 79115dc847eSJeff Roberson */ 79215dc847eSJeff Roberson if (swap) 79315dc847eSJeff Roberson break; 7945d7ef00cSJeff Roberson swap = kseq->ksq_curr; 7955d7ef00cSJeff Roberson kseq->ksq_curr = kseq->ksq_next; 7965d7ef00cSJeff Roberson kseq->ksq_next = swap; 79715dc847eSJeff Roberson continue; 798a8949de2SJeff Roberson } 79915dc847eSJeff Roberson /* 80015dc847eSJeff Roberson * If we encounter a slice of 0 the kse is in a 80115dc847eSJeff Roberson * TIMESHARE kse group and its nice was too far out 80215dc847eSJeff Roberson * of the range that receives slices. 80315dc847eSJeff Roberson */ 80422bf7d9aSJeff Roberson if (ke->ke_slice == 0) { 80515dc847eSJeff Roberson runq_remove(ke->ke_runq, ke); 80615dc847eSJeff Roberson sched_slice(ke); 80715dc847eSJeff Roberson ke->ke_runq = kseq->ksq_next; 80815dc847eSJeff Roberson runq_add(ke->ke_runq, ke); 80915dc847eSJeff Roberson continue; 81015dc847eSJeff Roberson } 81115dc847eSJeff Roberson return (ke); 81215dc847eSJeff Roberson } 81315dc847eSJeff Roberson 814a8949de2SJeff Roberson return (runq_choose(&kseq->ksq_idle)); 815245f3abfSJeff Roberson } 8160a016a05SJeff Roberson 8170a016a05SJeff Roberson static void 8180a016a05SJeff Roberson kseq_setup(struct kseq *kseq) 8190a016a05SJeff Roberson { 82015dc847eSJeff Roberson runq_init(&kseq->ksq_timeshare[0]); 82115dc847eSJeff Roberson runq_init(&kseq->ksq_timeshare[1]); 822a8949de2SJeff Roberson runq_init(&kseq->ksq_idle); 82315dc847eSJeff Roberson kseq->ksq_curr = &kseq->ksq_timeshare[0]; 82415dc847eSJeff Roberson kseq->ksq_next = &kseq->ksq_timeshare[1]; 8257cd650a9SJeff Roberson kseq->ksq_load = 0; 826ef1134c9SJeff Roberson kseq->ksq_load_timeshare = 0; 8270a016a05SJeff Roberson } 8280a016a05SJeff Roberson 82935e6168fSJeff Roberson static void 83035e6168fSJeff Roberson sched_setup(void *dummy) 83135e6168fSJeff Roberson { 8320ec896fdSJeff Roberson #ifdef SMP 833cac77d04SJeff Roberson int balance_groups; 83435e6168fSJeff Roberson int i; 8350ec896fdSJeff Roberson #endif 83635e6168fSJeff Roberson 837e493a5d9SJeff Roberson slice_min = (hz/100); /* 10ms */ 838e493a5d9SJeff Roberson slice_max = (hz/7); /* ~140ms */ 839e1f89c22SJeff Roberson 840356500a3SJeff Roberson #ifdef SMP 841cac77d04SJeff Roberson balance_groups = 0; 84280f86c9fSJeff Roberson /* 84380f86c9fSJeff Roberson * Initialize the kseqs. 84480f86c9fSJeff Roberson */ 845749d01b0SJeff Roberson for (i = 0; i < MAXCPU; i++) { 84680f86c9fSJeff Roberson struct kseq *ksq; 84780f86c9fSJeff Roberson 84880f86c9fSJeff Roberson ksq = &kseq_cpu[i]; 84980f86c9fSJeff Roberson ksq->ksq_assigned = NULL; 850749d01b0SJeff Roberson kseq_setup(&kseq_cpu[i]); 85180f86c9fSJeff Roberson } 85280f86c9fSJeff Roberson if (smp_topology == NULL) { 85380f86c9fSJeff Roberson struct kseq_group *ksg; 85480f86c9fSJeff Roberson struct kseq *ksq; 85580f86c9fSJeff Roberson 85680f86c9fSJeff Roberson for (i = 0; i < MAXCPU; i++) { 85780f86c9fSJeff Roberson ksq = &kseq_cpu[i]; 85880f86c9fSJeff Roberson ksg = &kseq_groups[i]; 85980f86c9fSJeff Roberson /* 860dc03363dSJeff Roberson * Setup a kseq group with one member. 86180f86c9fSJeff Roberson */ 86280f86c9fSJeff Roberson ksq->ksq_transferable = 0; 86380f86c9fSJeff Roberson ksq->ksq_group = ksg; 86480f86c9fSJeff Roberson ksg->ksg_cpus = 1; 86580f86c9fSJeff Roberson ksg->ksg_idlemask = 0; 86680f86c9fSJeff Roberson ksg->ksg_cpumask = ksg->ksg_mask = 1 << i; 867cac77d04SJeff Roberson ksg->ksg_load = 0; 86880f86c9fSJeff Roberson ksg->ksg_transferable = 0; 86980f86c9fSJeff Roberson LIST_INIT(&ksg->ksg_members); 87080f86c9fSJeff Roberson LIST_INSERT_HEAD(&ksg->ksg_members, ksq, ksq_siblings); 871749d01b0SJeff Roberson } 872749d01b0SJeff Roberson } else { 87380f86c9fSJeff Roberson struct kseq_group *ksg; 87480f86c9fSJeff Roberson struct cpu_group *cg; 875749d01b0SJeff Roberson int j; 876749d01b0SJeff Roberson 877749d01b0SJeff Roberson for (i = 0; i < smp_topology->ct_count; i++) { 878749d01b0SJeff Roberson cg = &smp_topology->ct_group[i]; 87980f86c9fSJeff Roberson ksg = &kseq_groups[i]; 88080f86c9fSJeff Roberson /* 88180f86c9fSJeff Roberson * Initialize the group. 88280f86c9fSJeff Roberson */ 88380f86c9fSJeff Roberson ksg->ksg_idlemask = 0; 884cac77d04SJeff Roberson ksg->ksg_load = 0; 88580f86c9fSJeff Roberson ksg->ksg_transferable = 0; 88680f86c9fSJeff Roberson ksg->ksg_cpus = cg->cg_count; 88780f86c9fSJeff Roberson ksg->ksg_cpumask = cg->cg_mask; 88880f86c9fSJeff Roberson LIST_INIT(&ksg->ksg_members); 88980f86c9fSJeff Roberson /* 89080f86c9fSJeff Roberson * Find all of the group members and add them. 89180f86c9fSJeff Roberson */ 89280f86c9fSJeff Roberson for (j = 0; j < MAXCPU; j++) { 89380f86c9fSJeff Roberson if ((cg->cg_mask & (1 << j)) != 0) { 89480f86c9fSJeff Roberson if (ksg->ksg_mask == 0) 89580f86c9fSJeff Roberson ksg->ksg_mask = 1 << j; 89680f86c9fSJeff Roberson kseq_cpu[j].ksq_transferable = 0; 89780f86c9fSJeff Roberson kseq_cpu[j].ksq_group = ksg; 89880f86c9fSJeff Roberson LIST_INSERT_HEAD(&ksg->ksg_members, 89980f86c9fSJeff Roberson &kseq_cpu[j], ksq_siblings); 90080f86c9fSJeff Roberson } 90180f86c9fSJeff Roberson } 902cac77d04SJeff Roberson if (ksg->ksg_cpus > 1) 903cac77d04SJeff Roberson balance_groups = 1; 904749d01b0SJeff Roberson } 905cac77d04SJeff Roberson ksg_maxid = smp_topology->ct_count - 1; 906749d01b0SJeff Roberson } 907cac77d04SJeff Roberson /* 908cac77d04SJeff Roberson * Stagger the group and global load balancer so they do not 909cac77d04SJeff Roberson * interfere with each other. 910cac77d04SJeff Roberson */ 911dc03363dSJeff Roberson bal_tick = ticks + hz; 912cac77d04SJeff Roberson if (balance_groups) 913dc03363dSJeff Roberson gbal_tick = ticks + (hz / 2); 914749d01b0SJeff Roberson #else 915749d01b0SJeff Roberson kseq_setup(KSEQ_SELF()); 916356500a3SJeff Roberson #endif 917749d01b0SJeff Roberson mtx_lock_spin(&sched_lock); 918155b9987SJeff Roberson kseq_load_add(KSEQ_SELF(), &kse0); 919749d01b0SJeff Roberson mtx_unlock_spin(&sched_lock); 92035e6168fSJeff Roberson } 92135e6168fSJeff Roberson 92235e6168fSJeff Roberson /* 92335e6168fSJeff Roberson * Scale the scheduling priority according to the "interactivity" of this 92435e6168fSJeff Roberson * process. 92535e6168fSJeff Roberson */ 92615dc847eSJeff Roberson static void 92735e6168fSJeff Roberson sched_priority(struct ksegrp *kg) 92835e6168fSJeff Roberson { 92935e6168fSJeff Roberson int pri; 93035e6168fSJeff Roberson 93135e6168fSJeff Roberson if (kg->kg_pri_class != PRI_TIMESHARE) 93215dc847eSJeff Roberson return; 93335e6168fSJeff Roberson 93415dc847eSJeff Roberson pri = SCHED_PRI_INTERACT(sched_interact_score(kg)); 935e1f89c22SJeff Roberson pri += SCHED_PRI_BASE; 936fa885116SJulian Elischer pri += kg->kg_proc->p_nice; 93735e6168fSJeff Roberson 93835e6168fSJeff Roberson if (pri > PRI_MAX_TIMESHARE) 93935e6168fSJeff Roberson pri = PRI_MAX_TIMESHARE; 94035e6168fSJeff Roberson else if (pri < PRI_MIN_TIMESHARE) 94135e6168fSJeff Roberson pri = PRI_MIN_TIMESHARE; 94235e6168fSJeff Roberson 94335e6168fSJeff Roberson kg->kg_user_pri = pri; 94435e6168fSJeff Roberson 94515dc847eSJeff Roberson return; 94635e6168fSJeff Roberson } 94735e6168fSJeff Roberson 94835e6168fSJeff Roberson /* 949245f3abfSJeff Roberson * Calculate a time slice based on the properties of the kseg and the runq 950a8949de2SJeff Roberson * that we're on. This is only for PRI_TIMESHARE ksegrps. 95135e6168fSJeff Roberson */ 952245f3abfSJeff Roberson static void 953245f3abfSJeff Roberson sched_slice(struct kse *ke) 95435e6168fSJeff Roberson { 95515dc847eSJeff Roberson struct kseq *kseq; 956245f3abfSJeff Roberson struct ksegrp *kg; 95735e6168fSJeff Roberson 958245f3abfSJeff Roberson kg = ke->ke_ksegrp; 95915dc847eSJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 96035e6168fSJeff Roberson 961245f3abfSJeff Roberson /* 962245f3abfSJeff Roberson * Rationale: 963245f3abfSJeff Roberson * KSEs in interactive ksegs get the minimum slice so that we 964245f3abfSJeff Roberson * quickly notice if it abuses its advantage. 965245f3abfSJeff Roberson * 966245f3abfSJeff Roberson * KSEs in non-interactive ksegs are assigned a slice that is 967245f3abfSJeff Roberson * based on the ksegs nice value relative to the least nice kseg 968245f3abfSJeff Roberson * on the run queue for this cpu. 969245f3abfSJeff Roberson * 970245f3abfSJeff Roberson * If the KSE is less nice than all others it gets the maximum 971245f3abfSJeff Roberson * slice and other KSEs will adjust their slice relative to 972245f3abfSJeff Roberson * this when they first expire. 973245f3abfSJeff Roberson * 974245f3abfSJeff Roberson * There is 20 point window that starts relative to the least 975245f3abfSJeff Roberson * nice kse on the run queue. Slice size is determined by 976245f3abfSJeff Roberson * the kse distance from the last nice ksegrp. 977245f3abfSJeff Roberson * 9787d1a81b4SJeff Roberson * If the kse is outside of the window it will get no slice 9797d1a81b4SJeff Roberson * and will be reevaluated each time it is selected on the 9807d1a81b4SJeff Roberson * run queue. The exception to this is nice 0 ksegs when 9817d1a81b4SJeff Roberson * a nice -20 is running. They are always granted a minimum 9827d1a81b4SJeff Roberson * slice. 983245f3abfSJeff Roberson */ 98415dc847eSJeff Roberson if (!SCHED_INTERACTIVE(kg)) { 985245f3abfSJeff Roberson int nice; 986245f3abfSJeff Roberson 987fa885116SJulian Elischer nice = kg->kg_proc->p_nice + (0 - kseq->ksq_nicemin); 988ef1134c9SJeff Roberson if (kseq->ksq_load_timeshare == 0 || 989fa885116SJulian Elischer kg->kg_proc->p_nice < kseq->ksq_nicemin) 990245f3abfSJeff Roberson ke->ke_slice = SCHED_SLICE_MAX; 9917d1a81b4SJeff Roberson else if (nice <= SCHED_SLICE_NTHRESH) 992245f3abfSJeff Roberson ke->ke_slice = SCHED_SLICE_NICE(nice); 993fa885116SJulian Elischer else if (kg->kg_proc->p_nice == 0) 9947d1a81b4SJeff Roberson ke->ke_slice = SCHED_SLICE_MIN; 995245f3abfSJeff Roberson else 996245f3abfSJeff Roberson ke->ke_slice = 0; 997245f3abfSJeff Roberson } else 9989b5f6f62SJeff Roberson ke->ke_slice = SCHED_SLICE_INTERACTIVE; 99935e6168fSJeff Roberson 100015dc847eSJeff Roberson CTR6(KTR_ULE, 100115dc847eSJeff Roberson "Sliced %p(%d) (nice: %d, nicemin: %d, load: %d, interactive: %d)", 1002fa885116SJulian Elischer ke, ke->ke_slice, kg->kg_proc->p_nice, kseq->ksq_nicemin, 1003ef1134c9SJeff Roberson kseq->ksq_load_timeshare, SCHED_INTERACTIVE(kg)); 100415dc847eSJeff Roberson 1005245f3abfSJeff Roberson return; 100635e6168fSJeff Roberson } 100735e6168fSJeff Roberson 1008d322132cSJeff Roberson /* 1009d322132cSJeff Roberson * This routine enforces a maximum limit on the amount of scheduling history 1010d322132cSJeff Roberson * kept. It is called after either the slptime or runtime is adjusted. 1011d322132cSJeff Roberson * This routine will not operate correctly when slp or run times have been 1012d322132cSJeff Roberson * adjusted to more than double their maximum. 1013d322132cSJeff Roberson */ 10144b60e324SJeff Roberson static void 10154b60e324SJeff Roberson sched_interact_update(struct ksegrp *kg) 10164b60e324SJeff Roberson { 1017d322132cSJeff Roberson int sum; 10183f741ca1SJeff Roberson 1019d322132cSJeff Roberson sum = kg->kg_runtime + kg->kg_slptime; 1020d322132cSJeff Roberson if (sum < SCHED_SLP_RUN_MAX) 1021d322132cSJeff Roberson return; 1022d322132cSJeff Roberson /* 1023d322132cSJeff Roberson * If we have exceeded by more than 1/5th then the algorithm below 1024d322132cSJeff Roberson * will not bring us back into range. Dividing by two here forces 1025d322132cSJeff Roberson * us into the range of [3/5 * SCHED_INTERACT_MAX, SCHED_INTERACT_MAX] 1026d322132cSJeff Roberson */ 102737a35e4aSJeff Roberson if (sum > (SCHED_SLP_RUN_MAX / 5) * 6) { 1028d322132cSJeff Roberson kg->kg_runtime /= 2; 1029d322132cSJeff Roberson kg->kg_slptime /= 2; 1030d322132cSJeff Roberson return; 1031d322132cSJeff Roberson } 1032d322132cSJeff Roberson kg->kg_runtime = (kg->kg_runtime / 5) * 4; 1033d322132cSJeff Roberson kg->kg_slptime = (kg->kg_slptime / 5) * 4; 1034d322132cSJeff Roberson } 1035d322132cSJeff Roberson 1036d322132cSJeff Roberson static void 1037d322132cSJeff Roberson sched_interact_fork(struct ksegrp *kg) 1038d322132cSJeff Roberson { 1039d322132cSJeff Roberson int ratio; 1040d322132cSJeff Roberson int sum; 1041d322132cSJeff Roberson 1042d322132cSJeff Roberson sum = kg->kg_runtime + kg->kg_slptime; 1043d322132cSJeff Roberson if (sum > SCHED_SLP_RUN_FORK) { 1044d322132cSJeff Roberson ratio = sum / SCHED_SLP_RUN_FORK; 1045d322132cSJeff Roberson kg->kg_runtime /= ratio; 1046d322132cSJeff Roberson kg->kg_slptime /= ratio; 10474b60e324SJeff Roberson } 10484b60e324SJeff Roberson } 10494b60e324SJeff Roberson 1050e1f89c22SJeff Roberson static int 1051e1f89c22SJeff Roberson sched_interact_score(struct ksegrp *kg) 1052e1f89c22SJeff Roberson { 1053210491d3SJeff Roberson int div; 1054e1f89c22SJeff Roberson 1055e1f89c22SJeff Roberson if (kg->kg_runtime > kg->kg_slptime) { 1056210491d3SJeff Roberson div = max(1, kg->kg_runtime / SCHED_INTERACT_HALF); 1057210491d3SJeff Roberson return (SCHED_INTERACT_HALF + 1058210491d3SJeff Roberson (SCHED_INTERACT_HALF - (kg->kg_slptime / div))); 1059210491d3SJeff Roberson } if (kg->kg_slptime > kg->kg_runtime) { 1060210491d3SJeff Roberson div = max(1, kg->kg_slptime / SCHED_INTERACT_HALF); 1061210491d3SJeff Roberson return (kg->kg_runtime / div); 1062e1f89c22SJeff Roberson } 1063e1f89c22SJeff Roberson 1064210491d3SJeff Roberson /* 1065210491d3SJeff Roberson * This can happen if slptime and runtime are 0. 1066210491d3SJeff Roberson */ 1067210491d3SJeff Roberson return (0); 1068e1f89c22SJeff Roberson 1069e1f89c22SJeff Roberson } 1070e1f89c22SJeff Roberson 107115dc847eSJeff Roberson /* 107215dc847eSJeff Roberson * This is only somewhat accurate since given many processes of the same 107315dc847eSJeff Roberson * priority they will switch when their slices run out, which will be 107415dc847eSJeff Roberson * at most SCHED_SLICE_MAX. 107515dc847eSJeff Roberson */ 107635e6168fSJeff Roberson int 107735e6168fSJeff Roberson sched_rr_interval(void) 107835e6168fSJeff Roberson { 107935e6168fSJeff Roberson return (SCHED_SLICE_MAX); 108035e6168fSJeff Roberson } 108135e6168fSJeff Roberson 108222bf7d9aSJeff Roberson static void 108335e6168fSJeff Roberson sched_pctcpu_update(struct kse *ke) 108435e6168fSJeff Roberson { 108535e6168fSJeff Roberson /* 108635e6168fSJeff Roberson * Adjust counters and watermark for pctcpu calc. 1087210491d3SJeff Roberson */ 108881de51bfSJeff Roberson if (ke->ke_ltick > ticks - SCHED_CPU_TICKS) { 1089210491d3SJeff Roberson /* 109081de51bfSJeff Roberson * Shift the tick count out so that the divide doesn't 109181de51bfSJeff Roberson * round away our results. 109265c8760dSJeff Roberson */ 109365c8760dSJeff Roberson ke->ke_ticks <<= 10; 109481de51bfSJeff Roberson ke->ke_ticks = (ke->ke_ticks / (ticks - ke->ke_ftick)) * 109535e6168fSJeff Roberson SCHED_CPU_TICKS; 109665c8760dSJeff Roberson ke->ke_ticks >>= 10; 109781de51bfSJeff Roberson } else 109881de51bfSJeff Roberson ke->ke_ticks = 0; 109935e6168fSJeff Roberson ke->ke_ltick = ticks; 110035e6168fSJeff Roberson ke->ke_ftick = ke->ke_ltick - SCHED_CPU_TICKS; 110135e6168fSJeff Roberson } 110235e6168fSJeff Roberson 110335e6168fSJeff Roberson void 110435e6168fSJeff Roberson sched_prio(struct thread *td, u_char prio) 110535e6168fSJeff Roberson { 11063f741ca1SJeff Roberson struct kse *ke; 110735e6168fSJeff Roberson 11083f741ca1SJeff Roberson ke = td->td_kse; 110935e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 111035e6168fSJeff Roberson if (TD_ON_RUNQ(td)) { 11113f741ca1SJeff Roberson /* 11123f741ca1SJeff Roberson * If the priority has been elevated due to priority 11133f741ca1SJeff Roberson * propagation, we may have to move ourselves to a new 11143f741ca1SJeff Roberson * queue. We still call adjustrunqueue below in case kse 11153f741ca1SJeff Roberson * needs to fix things up. 11163f741ca1SJeff Roberson */ 1117769a3635SJeff Roberson if (prio < td->td_priority && ke && 1118769a3635SJeff Roberson (ke->ke_flags & KEF_ASSIGNED) == 0 && 111922bf7d9aSJeff Roberson ke->ke_runq != KSEQ_CPU(ke->ke_cpu)->ksq_curr) { 11203f741ca1SJeff Roberson runq_remove(ke->ke_runq, ke); 11213f741ca1SJeff Roberson ke->ke_runq = KSEQ_CPU(ke->ke_cpu)->ksq_curr; 11223f741ca1SJeff Roberson runq_add(ke->ke_runq, ke); 112335e6168fSJeff Roberson } 11243f741ca1SJeff Roberson adjustrunqueue(td, prio); 11253f741ca1SJeff Roberson } else 11263f741ca1SJeff Roberson td->td_priority = prio; 112735e6168fSJeff Roberson } 112835e6168fSJeff Roberson 112935e6168fSJeff Roberson void 1130bf0acc27SJohn Baldwin sched_switch(struct thread *td, struct thread *newtd) 113135e6168fSJeff Roberson { 113235e6168fSJeff Roberson struct kse *ke; 113335e6168fSJeff Roberson 113435e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 113535e6168fSJeff Roberson 113635e6168fSJeff Roberson ke = td->td_kse; 113735e6168fSJeff Roberson 113835e6168fSJeff Roberson td->td_last_kse = ke; 1139060563ecSJulian Elischer td->td_lastcpu = td->td_oncpu; 1140060563ecSJulian Elischer td->td_oncpu = NOCPU; 11410c0b25aeSJohn Baldwin td->td_flags &= ~(TDF_NEEDRESCHED | TDF_OWEPREEMPT); 114235e6168fSJeff Roberson 1143b11fdad0SJeff Roberson /* 1144b11fdad0SJeff Roberson * If the KSE has been assigned it may be in the process of switching 1145b11fdad0SJeff Roberson * to the new cpu. This is the case in sched_bind(). 1146b11fdad0SJeff Roberson */ 1147b11fdad0SJeff Roberson if ((ke->ke_flags & KEF_ASSIGNED) == 0) { 1148bf0acc27SJohn Baldwin if (td == PCPU_GET(idlethread)) 1149bf0acc27SJohn Baldwin TD_SET_CAN_RUN(td); 1150bf0acc27SJohn Baldwin else if (TD_IS_RUNNING(td)) { 1151155b9987SJeff Roberson kseq_load_rem(KSEQ_CPU(ke->ke_cpu), ke); 1152ab2baa72SDavid Xu setrunqueue(td); 11530e0f6266SJeff Roberson } else { 115433916c36SJeff Roberson if (ke->ke_runq) { 1155155b9987SJeff Roberson kseq_load_rem(KSEQ_CPU(ke->ke_cpu), ke); 115633916c36SJeff Roberson } else if ((td->td_flags & TDF_IDLETD) == 0) 11572c3490b1SMarcel Moolenaar kdb_backtrace(); 115835e6168fSJeff Roberson /* 115935e6168fSJeff Roberson * We will not be on the run queue. So we must be 116035e6168fSJeff Roberson * sleeping or similar. 116135e6168fSJeff Roberson */ 11620e2a4d3aSDavid Xu if (td->td_proc->p_flag & P_SA) 116335e6168fSJeff Roberson kse_reassign(ke); 11640e0f6266SJeff Roberson } 1165b11fdad0SJeff Roberson } 1166bf0acc27SJohn Baldwin if (newtd == NULL) 1167ae53b483SJeff Roberson newtd = choosethread(); 1168bf0acc27SJohn Baldwin else 1169bf0acc27SJohn Baldwin kseq_load_add(KSEQ_SELF(), newtd->td_kse); 1170ae53b483SJeff Roberson if (td != newtd) 1171ae53b483SJeff Roberson cpu_switch(td, newtd); 1172ae53b483SJeff Roberson sched_lock.mtx_lock = (uintptr_t)td; 117335e6168fSJeff Roberson 1174060563ecSJulian Elischer td->td_oncpu = PCPU_GET(cpuid); 117535e6168fSJeff Roberson } 117635e6168fSJeff Roberson 117735e6168fSJeff Roberson void 1178fa885116SJulian Elischer sched_nice(struct proc *p, int nice) 117935e6168fSJeff Roberson { 1180fa885116SJulian Elischer struct ksegrp *kg; 118115dc847eSJeff Roberson struct kse *ke; 118235e6168fSJeff Roberson struct thread *td; 118315dc847eSJeff Roberson struct kseq *kseq; 118435e6168fSJeff Roberson 1185fa885116SJulian Elischer PROC_LOCK_ASSERT(p, MA_OWNED); 11860b5318c8SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 118715dc847eSJeff Roberson /* 118815dc847eSJeff Roberson * We need to adjust the nice counts for running KSEs. 118915dc847eSJeff Roberson */ 1190fa885116SJulian Elischer FOREACH_KSEGRP_IN_PROC(p, kg) { 1191fa885116SJulian Elischer if (kg->kg_pri_class == PRI_TIMESHARE) { 119215dc847eSJeff Roberson FOREACH_KSE_IN_GROUP(kg, ke) { 1193d07ac847SJeff Roberson if (ke->ke_runq == NULL) 119415dc847eSJeff Roberson continue; 119515dc847eSJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 1196fa885116SJulian Elischer kseq_nice_rem(kseq, p->p_nice); 119715dc847eSJeff Roberson kseq_nice_add(kseq, nice); 119815dc847eSJeff Roberson } 1199fa885116SJulian Elischer } 1200fa885116SJulian Elischer } 1201fa885116SJulian Elischer p->p_nice = nice; 1202fa885116SJulian Elischer FOREACH_KSEGRP_IN_PROC(p, kg) { 120335e6168fSJeff Roberson sched_priority(kg); 120415dc847eSJeff Roberson FOREACH_THREAD_IN_GROUP(kg, td) 12054a338afdSJulian Elischer td->td_flags |= TDF_NEEDRESCHED; 120635e6168fSJeff Roberson } 1207fa885116SJulian Elischer } 120835e6168fSJeff Roberson 120935e6168fSJeff Roberson void 121044f3b092SJohn Baldwin sched_sleep(struct thread *td) 121135e6168fSJeff Roberson { 121235e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 121335e6168fSJeff Roberson 121435e6168fSJeff Roberson td->td_slptime = ticks; 121544f3b092SJohn Baldwin td->td_base_pri = td->td_priority; 121635e6168fSJeff Roberson 121715dc847eSJeff Roberson CTR2(KTR_ULE, "sleep kse %p (tick: %d)", 121815dc847eSJeff Roberson td->td_kse, td->td_slptime); 121935e6168fSJeff Roberson } 122035e6168fSJeff Roberson 122135e6168fSJeff Roberson void 122235e6168fSJeff Roberson sched_wakeup(struct thread *td) 122335e6168fSJeff Roberson { 122435e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 122535e6168fSJeff Roberson 122635e6168fSJeff Roberson /* 122735e6168fSJeff Roberson * Let the kseg know how long we slept for. This is because process 122835e6168fSJeff Roberson * interactivity behavior is modeled in the kseg. 122935e6168fSJeff Roberson */ 123035e6168fSJeff Roberson if (td->td_slptime) { 1231f1e8dc4aSJeff Roberson struct ksegrp *kg; 123215dc847eSJeff Roberson int hzticks; 1233f1e8dc4aSJeff Roberson 1234f1e8dc4aSJeff Roberson kg = td->td_ksegrp; 1235d322132cSJeff Roberson hzticks = (ticks - td->td_slptime) << 10; 1236d322132cSJeff Roberson if (hzticks >= SCHED_SLP_RUN_MAX) { 1237d322132cSJeff Roberson kg->kg_slptime = SCHED_SLP_RUN_MAX; 1238d322132cSJeff Roberson kg->kg_runtime = 1; 1239d322132cSJeff Roberson } else { 1240d322132cSJeff Roberson kg->kg_slptime += hzticks; 12414b60e324SJeff Roberson sched_interact_update(kg); 1242d322132cSJeff Roberson } 1243f1e8dc4aSJeff Roberson sched_priority(kg); 12444b60e324SJeff Roberson if (td->td_kse) 12454b60e324SJeff Roberson sched_slice(td->td_kse); 124615dc847eSJeff Roberson CTR2(KTR_ULE, "wakeup kse %p (%d ticks)", 124715dc847eSJeff Roberson td->td_kse, hzticks); 124835e6168fSJeff Roberson td->td_slptime = 0; 1249f1e8dc4aSJeff Roberson } 125035e6168fSJeff Roberson setrunqueue(td); 125135e6168fSJeff Roberson } 125235e6168fSJeff Roberson 125335e6168fSJeff Roberson /* 125435e6168fSJeff Roberson * Penalize the parent for creating a new child and initialize the child's 125535e6168fSJeff Roberson * priority. 125635e6168fSJeff Roberson */ 125735e6168fSJeff Roberson void 125815dc847eSJeff Roberson sched_fork(struct proc *p, struct proc *p1) 125935e6168fSJeff Roberson { 126035e6168fSJeff Roberson 126135e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 126235e6168fSJeff Roberson 1263fa885116SJulian Elischer p1->p_nice = p->p_nice; 126415dc847eSJeff Roberson sched_fork_ksegrp(FIRST_KSEGRP_IN_PROC(p), FIRST_KSEGRP_IN_PROC(p1)); 126515dc847eSJeff Roberson sched_fork_kse(FIRST_KSE_IN_PROC(p), FIRST_KSE_IN_PROC(p1)); 126615dc847eSJeff Roberson sched_fork_thread(FIRST_THREAD_IN_PROC(p), FIRST_THREAD_IN_PROC(p1)); 126715dc847eSJeff Roberson } 126815dc847eSJeff Roberson 126915dc847eSJeff Roberson void 127015dc847eSJeff Roberson sched_fork_kse(struct kse *ke, struct kse *child) 127115dc847eSJeff Roberson { 12722056d0a1SJohn Baldwin 1273210491d3SJeff Roberson child->ke_slice = 1; /* Attempt to quickly learn interactivity. */ 1274093c05e3SJeff Roberson child->ke_cpu = ke->ke_cpu; 127515dc847eSJeff Roberson child->ke_runq = NULL; 127615dc847eSJeff Roberson 1277736c97c7SJeff Roberson /* Grab our parents cpu estimation information. */ 1278736c97c7SJeff Roberson child->ke_ticks = ke->ke_ticks; 1279736c97c7SJeff Roberson child->ke_ltick = ke->ke_ltick; 1280736c97c7SJeff Roberson child->ke_ftick = ke->ke_ftick; 128115dc847eSJeff Roberson } 128215dc847eSJeff Roberson 128315dc847eSJeff Roberson void 128415dc847eSJeff Roberson sched_fork_ksegrp(struct ksegrp *kg, struct ksegrp *child) 128515dc847eSJeff Roberson { 12862056d0a1SJohn Baldwin PROC_LOCK_ASSERT(child->kg_proc, MA_OWNED); 1287210491d3SJeff Roberson 1288d322132cSJeff Roberson child->kg_slptime = kg->kg_slptime; 1289d322132cSJeff Roberson child->kg_runtime = kg->kg_runtime; 1290d322132cSJeff Roberson child->kg_user_pri = kg->kg_user_pri; 1291d322132cSJeff Roberson sched_interact_fork(child); 12924b60e324SJeff Roberson kg->kg_runtime += tickincr << 10; 12934b60e324SJeff Roberson sched_interact_update(kg); 129415dc847eSJeff Roberson 1295d322132cSJeff Roberson CTR6(KTR_ULE, "sched_fork_ksegrp: %d(%d, %d) - %d(%d, %d)", 1296d322132cSJeff Roberson kg->kg_proc->p_pid, kg->kg_slptime, kg->kg_runtime, 1297d322132cSJeff Roberson child->kg_proc->p_pid, child->kg_slptime, child->kg_runtime); 1298c9f25d8fSJeff Roberson } 1299c9f25d8fSJeff Roberson 130015dc847eSJeff Roberson void 130115dc847eSJeff Roberson sched_fork_thread(struct thread *td, struct thread *child) 130215dc847eSJeff Roberson { 130315dc847eSJeff Roberson } 130415dc847eSJeff Roberson 130515dc847eSJeff Roberson void 130615dc847eSJeff Roberson sched_class(struct ksegrp *kg, int class) 130715dc847eSJeff Roberson { 130815dc847eSJeff Roberson struct kseq *kseq; 130915dc847eSJeff Roberson struct kse *ke; 1310ef1134c9SJeff Roberson int nclass; 1311ef1134c9SJeff Roberson int oclass; 131215dc847eSJeff Roberson 13132056d0a1SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 131415dc847eSJeff Roberson if (kg->kg_pri_class == class) 131515dc847eSJeff Roberson return; 131615dc847eSJeff Roberson 1317ef1134c9SJeff Roberson nclass = PRI_BASE(class); 1318ef1134c9SJeff Roberson oclass = PRI_BASE(kg->kg_pri_class); 131915dc847eSJeff Roberson FOREACH_KSE_IN_GROUP(kg, ke) { 132015dc847eSJeff Roberson if (ke->ke_state != KES_ONRUNQ && 132115dc847eSJeff Roberson ke->ke_state != KES_THREAD) 132215dc847eSJeff Roberson continue; 132315dc847eSJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 132415dc847eSJeff Roberson 1325ef1134c9SJeff Roberson #ifdef SMP 1326155b9987SJeff Roberson /* 1327155b9987SJeff Roberson * On SMP if we're on the RUNQ we must adjust the transferable 1328155b9987SJeff Roberson * count because could be changing to or from an interrupt 1329155b9987SJeff Roberson * class. 1330155b9987SJeff Roberson */ 1331155b9987SJeff Roberson if (ke->ke_state == KES_ONRUNQ) { 133280f86c9fSJeff Roberson if (KSE_CAN_MIGRATE(ke, oclass)) { 133380f86c9fSJeff Roberson kseq->ksq_transferable--; 133480f86c9fSJeff Roberson kseq->ksq_group->ksg_transferable--; 133580f86c9fSJeff Roberson } 133680f86c9fSJeff Roberson if (KSE_CAN_MIGRATE(ke, nclass)) { 133780f86c9fSJeff Roberson kseq->ksq_transferable++; 133880f86c9fSJeff Roberson kseq->ksq_group->ksg_transferable++; 133980f86c9fSJeff Roberson } 1340155b9987SJeff Roberson } 1341ef1134c9SJeff Roberson #endif 1342155b9987SJeff Roberson if (oclass == PRI_TIMESHARE) { 1343ef1134c9SJeff Roberson kseq->ksq_load_timeshare--; 1344fa885116SJulian Elischer kseq_nice_rem(kseq, kg->kg_proc->p_nice); 1345155b9987SJeff Roberson } 1346155b9987SJeff Roberson if (nclass == PRI_TIMESHARE) { 1347155b9987SJeff Roberson kseq->ksq_load_timeshare++; 1348fa885116SJulian Elischer kseq_nice_add(kseq, kg->kg_proc->p_nice); 134915dc847eSJeff Roberson } 1350155b9987SJeff Roberson } 135115dc847eSJeff Roberson 135215dc847eSJeff Roberson kg->kg_pri_class = class; 135335e6168fSJeff Roberson } 135435e6168fSJeff Roberson 135535e6168fSJeff Roberson /* 135635e6168fSJeff Roberson * Return some of the child's priority and interactivity to the parent. 135735e6168fSJeff Roberson */ 135835e6168fSJeff Roberson void 135915dc847eSJeff Roberson sched_exit(struct proc *p, struct proc *child) 136035e6168fSJeff Roberson { 136135e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1362141ad61cSJeff Roberson sched_exit_kse(FIRST_KSE_IN_PROC(p), FIRST_KSE_IN_PROC(child)); 1363210491d3SJeff Roberson sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), FIRST_KSEGRP_IN_PROC(child)); 1364141ad61cSJeff Roberson } 1365141ad61cSJeff Roberson 1366141ad61cSJeff Roberson void 1367141ad61cSJeff Roberson sched_exit_kse(struct kse *ke, struct kse *child) 1368141ad61cSJeff Roberson { 1369155b9987SJeff Roberson kseq_load_rem(KSEQ_CPU(child->ke_cpu), child); 1370141ad61cSJeff Roberson } 1371141ad61cSJeff Roberson 1372141ad61cSJeff Roberson void 1373141ad61cSJeff Roberson sched_exit_ksegrp(struct ksegrp *kg, struct ksegrp *child) 1374141ad61cSJeff Roberson { 13754b60e324SJeff Roberson /* kg->kg_slptime += child->kg_slptime; */ 1376210491d3SJeff Roberson kg->kg_runtime += child->kg_runtime; 13774b60e324SJeff Roberson sched_interact_update(kg); 1378141ad61cSJeff Roberson } 1379141ad61cSJeff Roberson 1380141ad61cSJeff Roberson void 1381141ad61cSJeff Roberson sched_exit_thread(struct thread *td, struct thread *child) 1382141ad61cSJeff Roberson { 138335e6168fSJeff Roberson } 138435e6168fSJeff Roberson 138535e6168fSJeff Roberson void 13867cf90fb3SJeff Roberson sched_clock(struct thread *td) 138735e6168fSJeff Roberson { 138835e6168fSJeff Roberson struct kseq *kseq; 13890a016a05SJeff Roberson struct ksegrp *kg; 13907cf90fb3SJeff Roberson struct kse *ke; 139135e6168fSJeff Roberson 1392dc03363dSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1393dc03363dSJeff Roberson #ifdef SMP 1394dc03363dSJeff Roberson if (ticks == bal_tick) 1395dc03363dSJeff Roberson sched_balance(); 1396dc03363dSJeff Roberson if (ticks == gbal_tick) 1397dc03363dSJeff Roberson sched_balance_groups(); 1398dc03363dSJeff Roberson #endif 139915dc847eSJeff Roberson /* 140015dc847eSJeff Roberson * sched_setup() apparently happens prior to stathz being set. We 140115dc847eSJeff Roberson * need to resolve the timers earlier in the boot so we can avoid 140215dc847eSJeff Roberson * calculating this here. 140315dc847eSJeff Roberson */ 140415dc847eSJeff Roberson if (realstathz == 0) { 140515dc847eSJeff Roberson realstathz = stathz ? stathz : hz; 140615dc847eSJeff Roberson tickincr = hz / realstathz; 140715dc847eSJeff Roberson /* 140815dc847eSJeff Roberson * XXX This does not work for values of stathz that are much 140915dc847eSJeff Roberson * larger than hz. 141015dc847eSJeff Roberson */ 141115dc847eSJeff Roberson if (tickincr == 0) 141215dc847eSJeff Roberson tickincr = 1; 141315dc847eSJeff Roberson } 141435e6168fSJeff Roberson 14157cf90fb3SJeff Roberson ke = td->td_kse; 141615dc847eSJeff Roberson kg = ke->ke_ksegrp; 141735e6168fSJeff Roberson 14180a016a05SJeff Roberson /* Adjust ticks for pctcpu */ 141965c8760dSJeff Roberson ke->ke_ticks++; 1420d465fb95SJeff Roberson ke->ke_ltick = ticks; 1421a8949de2SJeff Roberson 1422d465fb95SJeff Roberson /* Go up to one second beyond our max and then trim back down */ 1423d465fb95SJeff Roberson if (ke->ke_ftick + SCHED_CPU_TICKS + hz < ke->ke_ltick) 1424d465fb95SJeff Roberson sched_pctcpu_update(ke); 1425d465fb95SJeff Roberson 142643fdafb1SJulian Elischer if (td->td_flags & TDF_IDLETD) 142735e6168fSJeff Roberson return; 14280a016a05SJeff Roberson 142915dc847eSJeff Roberson CTR4(KTR_ULE, "Tick kse %p (slice: %d, slptime: %d, runtime: %d)", 143015dc847eSJeff Roberson ke, ke->ke_slice, kg->kg_slptime >> 10, kg->kg_runtime >> 10); 14313f741ca1SJeff Roberson /* 1432a8949de2SJeff Roberson * We only do slicing code for TIMESHARE ksegrps. 1433a8949de2SJeff Roberson */ 1434a8949de2SJeff Roberson if (kg->kg_pri_class != PRI_TIMESHARE) 1435a8949de2SJeff Roberson return; 1436a8949de2SJeff Roberson /* 143715dc847eSJeff Roberson * We used a tick charge it to the ksegrp so that we can compute our 143815dc847eSJeff Roberson * interactivity. 143915dc847eSJeff Roberson */ 144015dc847eSJeff Roberson kg->kg_runtime += tickincr << 10; 14414b60e324SJeff Roberson sched_interact_update(kg); 1442407b0157SJeff Roberson 144335e6168fSJeff Roberson /* 144435e6168fSJeff Roberson * We used up one time slice. 144535e6168fSJeff Roberson */ 1446093c05e3SJeff Roberson if (--ke->ke_slice > 0) 144715dc847eSJeff Roberson return; 144835e6168fSJeff Roberson /* 144915dc847eSJeff Roberson * We're out of time, recompute priorities and requeue. 145035e6168fSJeff Roberson */ 1451093c05e3SJeff Roberson kseq = KSEQ_SELF(); 1452155b9987SJeff Roberson kseq_load_rem(kseq, ke); 1453e1f89c22SJeff Roberson sched_priority(kg); 145415dc847eSJeff Roberson sched_slice(ke); 145515dc847eSJeff Roberson if (SCHED_CURR(kg, ke)) 145615dc847eSJeff Roberson ke->ke_runq = kseq->ksq_curr; 145715dc847eSJeff Roberson else 145815dc847eSJeff Roberson ke->ke_runq = kseq->ksq_next; 1459155b9987SJeff Roberson kseq_load_add(kseq, ke); 14604a338afdSJulian Elischer td->td_flags |= TDF_NEEDRESCHED; 146135e6168fSJeff Roberson } 146235e6168fSJeff Roberson 146335e6168fSJeff Roberson int 146435e6168fSJeff Roberson sched_runnable(void) 146535e6168fSJeff Roberson { 146635e6168fSJeff Roberson struct kseq *kseq; 1467b90816f1SJeff Roberson int load; 146835e6168fSJeff Roberson 1469b90816f1SJeff Roberson load = 1; 1470b90816f1SJeff Roberson 14710a016a05SJeff Roberson kseq = KSEQ_SELF(); 147222bf7d9aSJeff Roberson #ifdef SMP 147346f8b265SJeff Roberson if (kseq->ksq_assigned) { 147446f8b265SJeff Roberson mtx_lock_spin(&sched_lock); 147522bf7d9aSJeff Roberson kseq_assign(kseq); 147646f8b265SJeff Roberson mtx_unlock_spin(&sched_lock); 147746f8b265SJeff Roberson } 147822bf7d9aSJeff Roberson #endif 14793f741ca1SJeff Roberson if ((curthread->td_flags & TDF_IDLETD) != 0) { 14803f741ca1SJeff Roberson if (kseq->ksq_load > 0) 14813f741ca1SJeff Roberson goto out; 14823f741ca1SJeff Roberson } else 14833f741ca1SJeff Roberson if (kseq->ksq_load - 1 > 0) 1484b90816f1SJeff Roberson goto out; 1485b90816f1SJeff Roberson load = 0; 1486b90816f1SJeff Roberson out: 1487b90816f1SJeff Roberson return (load); 148835e6168fSJeff Roberson } 148935e6168fSJeff Roberson 149035e6168fSJeff Roberson void 149135e6168fSJeff Roberson sched_userret(struct thread *td) 149235e6168fSJeff Roberson { 149335e6168fSJeff Roberson struct ksegrp *kg; 149435e6168fSJeff Roberson 149535e6168fSJeff Roberson kg = td->td_ksegrp; 149635e6168fSJeff Roberson 149735e6168fSJeff Roberson if (td->td_priority != kg->kg_user_pri) { 149835e6168fSJeff Roberson mtx_lock_spin(&sched_lock); 149935e6168fSJeff Roberson td->td_priority = kg->kg_user_pri; 150035e6168fSJeff Roberson mtx_unlock_spin(&sched_lock); 150135e6168fSJeff Roberson } 150235e6168fSJeff Roberson } 150335e6168fSJeff Roberson 1504c9f25d8fSJeff Roberson struct kse * 1505c9f25d8fSJeff Roberson sched_choose(void) 1506c9f25d8fSJeff Roberson { 15070a016a05SJeff Roberson struct kseq *kseq; 1508c9f25d8fSJeff Roberson struct kse *ke; 150915dc847eSJeff Roberson 1510b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 151122bf7d9aSJeff Roberson kseq = KSEQ_SELF(); 151215dc847eSJeff Roberson #ifdef SMP 151380f86c9fSJeff Roberson restart: 151422bf7d9aSJeff Roberson if (kseq->ksq_assigned) 151522bf7d9aSJeff Roberson kseq_assign(kseq); 151615dc847eSJeff Roberson #endif 151722bf7d9aSJeff Roberson ke = kseq_choose(kseq); 151835e6168fSJeff Roberson if (ke) { 151922bf7d9aSJeff Roberson #ifdef SMP 152022bf7d9aSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_IDLE) 152180f86c9fSJeff Roberson if (kseq_idled(kseq) == 0) 152280f86c9fSJeff Roberson goto restart; 152322bf7d9aSJeff Roberson #endif 1524155b9987SJeff Roberson kseq_runq_rem(kseq, ke); 152535e6168fSJeff Roberson ke->ke_state = KES_THREAD; 1526245f3abfSJeff Roberson 152715dc847eSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) { 152815dc847eSJeff Roberson CTR4(KTR_ULE, "Run kse %p from %p (slice: %d, pri: %d)", 152915dc847eSJeff Roberson ke, ke->ke_runq, ke->ke_slice, 153015dc847eSJeff Roberson ke->ke_thread->td_priority); 1531245f3abfSJeff Roberson } 153215dc847eSJeff Roberson return (ke); 153335e6168fSJeff Roberson } 1534c9f25d8fSJeff Roberson #ifdef SMP 153580f86c9fSJeff Roberson if (kseq_idled(kseq) == 0) 153680f86c9fSJeff Roberson goto restart; 1537c9f25d8fSJeff Roberson #endif 153815dc847eSJeff Roberson return (NULL); 153935e6168fSJeff Roberson } 154035e6168fSJeff Roberson 154135e6168fSJeff Roberson void 15427cf90fb3SJeff Roberson sched_add(struct thread *td) 154335e6168fSJeff Roberson { 154463fcce68SJohn Baldwin 154563fcce68SJohn Baldwin sched_add_internal(td, 1); 154663fcce68SJohn Baldwin } 154763fcce68SJohn Baldwin 154863fcce68SJohn Baldwin static void 154963fcce68SJohn Baldwin sched_add_internal(struct thread *td, int preemptive) 155063fcce68SJohn Baldwin { 1551c9f25d8fSJeff Roberson struct kseq *kseq; 155215dc847eSJeff Roberson struct ksegrp *kg; 15537cf90fb3SJeff Roberson struct kse *ke; 155422bf7d9aSJeff Roberson int class; 1555c9f25d8fSJeff Roberson 155622bf7d9aSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 15577cf90fb3SJeff Roberson ke = td->td_kse; 15587cf90fb3SJeff Roberson kg = td->td_ksegrp; 155922bf7d9aSJeff Roberson if (ke->ke_flags & KEF_ASSIGNED) 156022bf7d9aSJeff Roberson return; 156122bf7d9aSJeff Roberson kseq = KSEQ_SELF(); 1562c494ddc8SJeff Roberson KASSERT((ke->ke_thread != NULL), 1563c494ddc8SJeff Roberson ("sched_add: No thread on KSE")); 15645d7ef00cSJeff Roberson KASSERT((ke->ke_thread->td_kse != NULL), 15655d7ef00cSJeff Roberson ("sched_add: No KSE on thread")); 15665d7ef00cSJeff Roberson KASSERT(ke->ke_state != KES_ONRUNQ, 15675d7ef00cSJeff Roberson ("sched_add: kse %p (%s) already in run queue", ke, 15685d7ef00cSJeff Roberson ke->ke_proc->p_comm)); 15695d7ef00cSJeff Roberson KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 15705d7ef00cSJeff Roberson ("sched_add: process swapped out")); 15719bca28a7SJeff Roberson KASSERT(ke->ke_runq == NULL, 15729bca28a7SJeff Roberson ("sched_add: KSE %p is still assigned to a run queue", ke)); 15735d7ef00cSJeff Roberson 157422bf7d9aSJeff Roberson class = PRI_BASE(kg->kg_pri_class); 157522bf7d9aSJeff Roberson switch (class) { 1576a8949de2SJeff Roberson case PRI_ITHD: 1577a8949de2SJeff Roberson case PRI_REALTIME: 157815dc847eSJeff Roberson ke->ke_runq = kseq->ksq_curr; 157915dc847eSJeff Roberson ke->ke_slice = SCHED_SLICE_MAX; 15807cd650a9SJeff Roberson ke->ke_cpu = PCPU_GET(cpuid); 1581a8949de2SJeff Roberson break; 1582a8949de2SJeff Roberson case PRI_TIMESHARE: 158315dc847eSJeff Roberson if (SCHED_CURR(kg, ke)) 158415dc847eSJeff Roberson ke->ke_runq = kseq->ksq_curr; 158515dc847eSJeff Roberson else 158615dc847eSJeff Roberson ke->ke_runq = kseq->ksq_next; 158715dc847eSJeff Roberson break; 158815dc847eSJeff Roberson case PRI_IDLE: 158915dc847eSJeff Roberson /* 159015dc847eSJeff Roberson * This is for priority prop. 159115dc847eSJeff Roberson */ 15923f741ca1SJeff Roberson if (ke->ke_thread->td_priority < PRI_MIN_IDLE) 159315dc847eSJeff Roberson ke->ke_runq = kseq->ksq_curr; 159415dc847eSJeff Roberson else 159515dc847eSJeff Roberson ke->ke_runq = &kseq->ksq_idle; 159615dc847eSJeff Roberson ke->ke_slice = SCHED_SLICE_MIN; 159715dc847eSJeff Roberson break; 159815dc847eSJeff Roberson default: 1599d322132cSJeff Roberson panic("Unknown pri class."); 1600a8949de2SJeff Roberson break; 1601a6ed4186SJeff Roberson } 160222bf7d9aSJeff Roberson #ifdef SMP 160380f86c9fSJeff Roberson if (ke->ke_cpu != PCPU_GET(cpuid)) { 160486e1c22aSJeff Roberson ke->ke_runq = NULL; 160580f86c9fSJeff Roberson kseq_notify(ke, ke->ke_cpu); 160680f86c9fSJeff Roberson return; 160780f86c9fSJeff Roberson } 160822bf7d9aSJeff Roberson /* 1609670c524fSJeff Roberson * If we had been idle, clear our bit in the group and potentially 1610670c524fSJeff Roberson * the global bitmap. If not, see if we should transfer this thread. 161122bf7d9aSJeff Roberson */ 161280f86c9fSJeff Roberson if ((class == PRI_TIMESHARE || class == PRI_REALTIME) && 161380f86c9fSJeff Roberson (kseq->ksq_group->ksg_idlemask & PCPU_GET(cpumask)) != 0) { 161480f86c9fSJeff Roberson /* 161580f86c9fSJeff Roberson * Check to see if our group is unidling, and if so, remove it 161680f86c9fSJeff Roberson * from the global idle mask. 161780f86c9fSJeff Roberson */ 161880f86c9fSJeff Roberson if (kseq->ksq_group->ksg_idlemask == 161980f86c9fSJeff Roberson kseq->ksq_group->ksg_cpumask) 162080f86c9fSJeff Roberson atomic_clear_int(&kseq_idle, kseq->ksq_group->ksg_mask); 162180f86c9fSJeff Roberson /* 162280f86c9fSJeff Roberson * Now remove ourselves from the group specific idle mask. 162380f86c9fSJeff Roberson */ 162480f86c9fSJeff Roberson kseq->ksq_group->ksg_idlemask &= ~PCPU_GET(cpumask); 1625670c524fSJeff Roberson } else if (kseq->ksq_load > 1 && KSE_CAN_MIGRATE(ke, class)) 1626670c524fSJeff Roberson if (kseq_transfer(kseq, ke, class)) 1627670c524fSJeff Roberson return; 162822bf7d9aSJeff Roberson #endif 162922bf7d9aSJeff Roberson if (td->td_priority < curthread->td_priority) 163022bf7d9aSJeff Roberson curthread->td_flags |= TDF_NEEDRESCHED; 1631a8949de2SJeff Roberson 16320c0b25aeSJohn Baldwin #ifdef SMP 16330c0b25aeSJohn Baldwin /* 16340c0b25aeSJohn Baldwin * Only try to preempt if the thread is unpinned or pinned to the 16350c0b25aeSJohn Baldwin * current CPU. 16360c0b25aeSJohn Baldwin */ 1637abdb4e5dSBosko Milekic if (KSE_CAN_MIGRATE(ke, class) || ke->ke_cpu == PCPU_GET(cpuid)) 16380c0b25aeSJohn Baldwin #endif 163963fcce68SJohn Baldwin if (preemptive && maybe_preempt(td)) 16400c0b25aeSJohn Baldwin return; 164135e6168fSJeff Roberson ke->ke_ksegrp->kg_runq_kses++; 164235e6168fSJeff Roberson ke->ke_state = KES_ONRUNQ; 164335e6168fSJeff Roberson 1644155b9987SJeff Roberson kseq_runq_add(kseq, ke); 1645155b9987SJeff Roberson kseq_load_add(kseq, ke); 164635e6168fSJeff Roberson } 164735e6168fSJeff Roberson 164835e6168fSJeff Roberson void 16497cf90fb3SJeff Roberson sched_rem(struct thread *td) 165035e6168fSJeff Roberson { 165115dc847eSJeff Roberson struct kseq *kseq; 16527cf90fb3SJeff Roberson struct kse *ke; 16537cf90fb3SJeff Roberson 16547cf90fb3SJeff Roberson ke = td->td_kse; 165522bf7d9aSJeff Roberson /* 165622bf7d9aSJeff Roberson * It is safe to just return here because sched_rem() is only ever 165722bf7d9aSJeff Roberson * used in places where we're immediately going to add the 165822bf7d9aSJeff Roberson * kse back on again. In that case it'll be added with the correct 165922bf7d9aSJeff Roberson * thread and priority when the caller drops the sched_lock. 166022bf7d9aSJeff Roberson */ 166122bf7d9aSJeff Roberson if (ke->ke_flags & KEF_ASSIGNED) 166222bf7d9aSJeff Roberson return; 166335e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1664c494ddc8SJeff Roberson KASSERT((ke->ke_state == KES_ONRUNQ), 1665c494ddc8SJeff Roberson ("sched_rem: KSE not on run queue")); 166635e6168fSJeff Roberson 166735e6168fSJeff Roberson ke->ke_state = KES_THREAD; 166835e6168fSJeff Roberson ke->ke_ksegrp->kg_runq_kses--; 166915dc847eSJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 1670155b9987SJeff Roberson kseq_runq_rem(kseq, ke); 1671155b9987SJeff Roberson kseq_load_rem(kseq, ke); 167235e6168fSJeff Roberson } 167335e6168fSJeff Roberson 167435e6168fSJeff Roberson fixpt_t 16757cf90fb3SJeff Roberson sched_pctcpu(struct thread *td) 167635e6168fSJeff Roberson { 167735e6168fSJeff Roberson fixpt_t pctcpu; 16787cf90fb3SJeff Roberson struct kse *ke; 167935e6168fSJeff Roberson 168035e6168fSJeff Roberson pctcpu = 0; 16817cf90fb3SJeff Roberson ke = td->td_kse; 1682484288deSJeff Roberson if (ke == NULL) 1683484288deSJeff Roberson return (0); 168435e6168fSJeff Roberson 1685b90816f1SJeff Roberson mtx_lock_spin(&sched_lock); 168635e6168fSJeff Roberson if (ke->ke_ticks) { 168735e6168fSJeff Roberson int rtick; 168835e6168fSJeff Roberson 1689210491d3SJeff Roberson /* 1690210491d3SJeff Roberson * Don't update more frequently than twice a second. Allowing 1691210491d3SJeff Roberson * this causes the cpu usage to decay away too quickly due to 1692210491d3SJeff Roberson * rounding errors. 1693210491d3SJeff Roberson */ 16942e227f04SJeff Roberson if (ke->ke_ftick + SCHED_CPU_TICKS < ke->ke_ltick || 16952e227f04SJeff Roberson ke->ke_ltick < (ticks - (hz / 2))) 169635e6168fSJeff Roberson sched_pctcpu_update(ke); 169735e6168fSJeff Roberson /* How many rtick per second ? */ 1698210491d3SJeff Roberson rtick = min(ke->ke_ticks / SCHED_CPU_TIME, SCHED_CPU_TICKS); 16997121cce5SScott Long pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT; 170035e6168fSJeff Roberson } 170135e6168fSJeff Roberson 170235e6168fSJeff Roberson ke->ke_proc->p_swtime = ke->ke_ltick - ke->ke_ftick; 1703828e7683SJohn Baldwin mtx_unlock_spin(&sched_lock); 170435e6168fSJeff Roberson 170535e6168fSJeff Roberson return (pctcpu); 170635e6168fSJeff Roberson } 170735e6168fSJeff Roberson 17089bacd788SJeff Roberson void 17099bacd788SJeff Roberson sched_bind(struct thread *td, int cpu) 17109bacd788SJeff Roberson { 17119bacd788SJeff Roberson struct kse *ke; 17129bacd788SJeff Roberson 17139bacd788SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 17149bacd788SJeff Roberson ke = td->td_kse; 17159bacd788SJeff Roberson ke->ke_flags |= KEF_BOUND; 171680f86c9fSJeff Roberson #ifdef SMP 171780f86c9fSJeff Roberson if (PCPU_GET(cpuid) == cpu) 17189bacd788SJeff Roberson return; 17199bacd788SJeff Roberson /* sched_rem without the runq_remove */ 17209bacd788SJeff Roberson ke->ke_state = KES_THREAD; 17219bacd788SJeff Roberson ke->ke_ksegrp->kg_runq_kses--; 1722155b9987SJeff Roberson kseq_load_rem(KSEQ_CPU(ke->ke_cpu), ke); 17239bacd788SJeff Roberson kseq_notify(ke, cpu); 17249bacd788SJeff Roberson /* When we return from mi_switch we'll be on the correct cpu. */ 1725279f949eSPoul-Henning Kamp mi_switch(SW_VOL, NULL); 17269bacd788SJeff Roberson #endif 17279bacd788SJeff Roberson } 17289bacd788SJeff Roberson 17299bacd788SJeff Roberson void 17309bacd788SJeff Roberson sched_unbind(struct thread *td) 17319bacd788SJeff Roberson { 17329bacd788SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 17339bacd788SJeff Roberson td->td_kse->ke_flags &= ~KEF_BOUND; 17349bacd788SJeff Roberson } 17359bacd788SJeff Roberson 173635e6168fSJeff Roberson int 173733916c36SJeff Roberson sched_load(void) 173833916c36SJeff Roberson { 173933916c36SJeff Roberson #ifdef SMP 174033916c36SJeff Roberson int total; 174133916c36SJeff Roberson int i; 174233916c36SJeff Roberson 174333916c36SJeff Roberson total = 0; 174433916c36SJeff Roberson for (i = 0; i <= ksg_maxid; i++) 174533916c36SJeff Roberson total += KSEQ_GROUP(i)->ksg_load; 174633916c36SJeff Roberson return (total); 174733916c36SJeff Roberson #else 174833916c36SJeff Roberson return (KSEQ_SELF()->ksq_sysload); 174933916c36SJeff Roberson #endif 175033916c36SJeff Roberson } 175133916c36SJeff Roberson 175233916c36SJeff Roberson int 175335e6168fSJeff Roberson sched_sizeof_kse(void) 175435e6168fSJeff Roberson { 175535e6168fSJeff Roberson return (sizeof(struct kse) + sizeof(struct ke_sched)); 175635e6168fSJeff Roberson } 175735e6168fSJeff Roberson 175835e6168fSJeff Roberson int 175935e6168fSJeff Roberson sched_sizeof_ksegrp(void) 176035e6168fSJeff Roberson { 176135e6168fSJeff Roberson return (sizeof(struct ksegrp) + sizeof(struct kg_sched)); 176235e6168fSJeff Roberson } 176335e6168fSJeff Roberson 176435e6168fSJeff Roberson int 176535e6168fSJeff Roberson sched_sizeof_proc(void) 176635e6168fSJeff Roberson { 176735e6168fSJeff Roberson return (sizeof(struct proc)); 176835e6168fSJeff Roberson } 176935e6168fSJeff Roberson 177035e6168fSJeff Roberson int 177135e6168fSJeff Roberson sched_sizeof_thread(void) 177235e6168fSJeff Roberson { 177335e6168fSJeff Roberson return (sizeof(struct thread) + sizeof(struct td_sched)); 177435e6168fSJeff Roberson } 1775