135e6168fSJeff Roberson /*- 215dc847eSJeff Roberson * Copyright (c) 2002-2003, Jeffrey Roberson <jeff@freebsd.org> 335e6168fSJeff Roberson * All rights reserved. 435e6168fSJeff Roberson * 535e6168fSJeff Roberson * Redistribution and use in source and binary forms, with or without 635e6168fSJeff Roberson * modification, are permitted provided that the following conditions 735e6168fSJeff Roberson * are met: 835e6168fSJeff Roberson * 1. Redistributions of source code must retain the above copyright 935e6168fSJeff Roberson * notice unmodified, this list of conditions, and the following 1035e6168fSJeff Roberson * disclaimer. 1135e6168fSJeff Roberson * 2. Redistributions in binary form must reproduce the above copyright 1235e6168fSJeff Roberson * notice, this list of conditions and the following disclaimer in the 1335e6168fSJeff Roberson * documentation and/or other materials provided with the distribution. 1435e6168fSJeff Roberson * 1535e6168fSJeff Roberson * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 1635e6168fSJeff Roberson * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1735e6168fSJeff Roberson * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 1835e6168fSJeff Roberson * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 1935e6168fSJeff Roberson * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2035e6168fSJeff Roberson * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2135e6168fSJeff Roberson * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2235e6168fSJeff Roberson * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2335e6168fSJeff Roberson * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2435e6168fSJeff Roberson * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2535e6168fSJeff Roberson */ 2635e6168fSJeff Roberson 27677b542eSDavid E. O'Brien #include <sys/cdefs.h> 28677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 29677b542eSDavid E. O'Brien 3035e6168fSJeff Roberson #include <sys/param.h> 3135e6168fSJeff Roberson #include <sys/systm.h> 3235e6168fSJeff Roberson #include <sys/kernel.h> 3335e6168fSJeff Roberson #include <sys/ktr.h> 3435e6168fSJeff Roberson #include <sys/lock.h> 3535e6168fSJeff Roberson #include <sys/mutex.h> 3635e6168fSJeff Roberson #include <sys/proc.h> 37245f3abfSJeff Roberson #include <sys/resource.h> 389bacd788SJeff Roberson #include <sys/resourcevar.h> 3935e6168fSJeff Roberson #include <sys/sched.h> 4035e6168fSJeff Roberson #include <sys/smp.h> 4135e6168fSJeff Roberson #include <sys/sx.h> 4235e6168fSJeff Roberson #include <sys/sysctl.h> 4335e6168fSJeff Roberson #include <sys/sysproto.h> 4435e6168fSJeff Roberson #include <sys/vmmeter.h> 4535e6168fSJeff Roberson #ifdef DDB 4635e6168fSJeff Roberson #include <ddb/ddb.h> 4735e6168fSJeff Roberson #endif 4835e6168fSJeff Roberson #ifdef KTRACE 4935e6168fSJeff Roberson #include <sys/uio.h> 5035e6168fSJeff Roberson #include <sys/ktrace.h> 5135e6168fSJeff Roberson #endif 5235e6168fSJeff Roberson 5335e6168fSJeff Roberson #include <machine/cpu.h> 5422bf7d9aSJeff Roberson #include <machine/smp.h> 5535e6168fSJeff Roberson 5615dc847eSJeff Roberson #define KTR_ULE KTR_NFS 5715dc847eSJeff Roberson 5835e6168fSJeff Roberson /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 5935e6168fSJeff Roberson /* XXX This is bogus compatability crap for ps */ 6035e6168fSJeff Roberson static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 6135e6168fSJeff Roberson SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 6235e6168fSJeff Roberson 6335e6168fSJeff Roberson static void sched_setup(void *dummy); 6435e6168fSJeff Roberson SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 6535e6168fSJeff Roberson 6615dc847eSJeff Roberson static SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0, "SCHED"); 67e1f89c22SJeff Roberson 6815dc847eSJeff Roberson static int sched_strict; 6915dc847eSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, strict, CTLFLAG_RD, &sched_strict, 0, ""); 7015dc847eSJeff Roberson 7115dc847eSJeff Roberson static int slice_min = 1; 7215dc847eSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, slice_min, CTLFLAG_RW, &slice_min, 0, ""); 7315dc847eSJeff Roberson 74210491d3SJeff Roberson static int slice_max = 10; 7515dc847eSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, slice_max, CTLFLAG_RW, &slice_max, 0, ""); 7615dc847eSJeff Roberson 7715dc847eSJeff Roberson int realstathz; 7815dc847eSJeff Roberson int tickincr = 1; 79783caefbSJeff Roberson 80356500a3SJeff Roberson #ifdef SMP 81cac77d04SJeff Roberson /* Callouts to handle load balancing SMP systems. */ 82356500a3SJeff Roberson static struct callout kseq_lb_callout; 83cac77d04SJeff Roberson static struct callout kseq_group_callout; 84356500a3SJeff Roberson #endif 85356500a3SJeff Roberson 8635e6168fSJeff Roberson /* 8735e6168fSJeff Roberson * These datastructures are allocated within their parent datastructure but 8835e6168fSJeff Roberson * are scheduler specific. 8935e6168fSJeff Roberson */ 9035e6168fSJeff Roberson 9135e6168fSJeff Roberson struct ke_sched { 9235e6168fSJeff Roberson int ske_slice; 9335e6168fSJeff Roberson struct runq *ske_runq; 9435e6168fSJeff Roberson /* The following variables are only used for pctcpu calculation */ 9535e6168fSJeff Roberson int ske_ltick; /* Last tick that we were running on */ 9635e6168fSJeff Roberson int ske_ftick; /* First tick that we were running on */ 9735e6168fSJeff Roberson int ske_ticks; /* Tick count */ 9815dc847eSJeff Roberson /* CPU that we have affinity for. */ 99cd6e33dfSJeff Roberson u_char ske_cpu; 10035e6168fSJeff Roberson }; 10135e6168fSJeff Roberson #define ke_slice ke_sched->ske_slice 10235e6168fSJeff Roberson #define ke_runq ke_sched->ske_runq 10335e6168fSJeff Roberson #define ke_ltick ke_sched->ske_ltick 10435e6168fSJeff Roberson #define ke_ftick ke_sched->ske_ftick 10535e6168fSJeff Roberson #define ke_ticks ke_sched->ske_ticks 106cd6e33dfSJeff Roberson #define ke_cpu ke_sched->ske_cpu 10722bf7d9aSJeff Roberson #define ke_assign ke_procq.tqe_next 10822bf7d9aSJeff Roberson 10922bf7d9aSJeff Roberson #define KEF_ASSIGNED KEF_SCHED0 /* KSE is being migrated. */ 110a70d729bSJeff Roberson #define KEF_BOUND KEF_SCHED1 /* KSE can not migrate. */ 11135e6168fSJeff Roberson 11235e6168fSJeff Roberson struct kg_sched { 113407b0157SJeff Roberson int skg_slptime; /* Number of ticks we vol. slept */ 114407b0157SJeff Roberson int skg_runtime; /* Number of ticks we were running */ 11535e6168fSJeff Roberson }; 11635e6168fSJeff Roberson #define kg_slptime kg_sched->skg_slptime 117407b0157SJeff Roberson #define kg_runtime kg_sched->skg_runtime 11835e6168fSJeff Roberson 11935e6168fSJeff Roberson struct td_sched { 12035e6168fSJeff Roberson int std_slptime; 12135e6168fSJeff Roberson }; 12235e6168fSJeff Roberson #define td_slptime td_sched->std_slptime 12335e6168fSJeff Roberson 1245d7ef00cSJeff Roberson struct td_sched td_sched; 12535e6168fSJeff Roberson struct ke_sched ke_sched; 12635e6168fSJeff Roberson struct kg_sched kg_sched; 12735e6168fSJeff Roberson 12835e6168fSJeff Roberson struct ke_sched *kse0_sched = &ke_sched; 12935e6168fSJeff Roberson struct kg_sched *ksegrp0_sched = &kg_sched; 13035e6168fSJeff Roberson struct p_sched *proc0_sched = NULL; 13135e6168fSJeff Roberson struct td_sched *thread0_sched = &td_sched; 13235e6168fSJeff Roberson 13335e6168fSJeff Roberson /* 134665cb285SJeff Roberson * The priority is primarily determined by the interactivity score. Thus, we 135665cb285SJeff Roberson * give lower(better) priorities to kse groups that use less CPU. The nice 136665cb285SJeff Roberson * value is then directly added to this to allow nice to have some effect 137665cb285SJeff Roberson * on latency. 138e1f89c22SJeff Roberson * 139e1f89c22SJeff Roberson * PRI_RANGE: Total priority range for timeshare threads. 140665cb285SJeff Roberson * PRI_NRESV: Number of nice values. 141e1f89c22SJeff Roberson * PRI_BASE: The start of the dynamic range. 14235e6168fSJeff Roberson */ 143407b0157SJeff Roberson #define SCHED_PRI_RANGE (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1) 144a0a931ceSJeff Roberson #define SCHED_PRI_NRESV ((PRIO_MAX - PRIO_MIN) + 1) 145a0a931ceSJeff Roberson #define SCHED_PRI_NHALF (SCHED_PRI_NRESV / 2) 146665cb285SJeff Roberson #define SCHED_PRI_BASE (PRI_MIN_TIMESHARE) 14715dc847eSJeff Roberson #define SCHED_PRI_INTERACT(score) \ 148665cb285SJeff Roberson ((score) * SCHED_PRI_RANGE / SCHED_INTERACT_MAX) 14935e6168fSJeff Roberson 15035e6168fSJeff Roberson /* 151e1f89c22SJeff Roberson * These determine the interactivity of a process. 15235e6168fSJeff Roberson * 153407b0157SJeff Roberson * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate 154407b0157SJeff Roberson * before throttling back. 155d322132cSJeff Roberson * SLP_RUN_FORK: Maximum slp+run time to inherit at fork time. 156210491d3SJeff Roberson * INTERACT_MAX: Maximum interactivity value. Smaller is better. 157e1f89c22SJeff Roberson * INTERACT_THRESH: Threshhold for placement on the current runq. 15835e6168fSJeff Roberson */ 1594c9612c6SJeff Roberson #define SCHED_SLP_RUN_MAX ((hz * 5) << 10) 160d322132cSJeff Roberson #define SCHED_SLP_RUN_FORK ((hz / 2) << 10) 161210491d3SJeff Roberson #define SCHED_INTERACT_MAX (100) 162210491d3SJeff Roberson #define SCHED_INTERACT_HALF (SCHED_INTERACT_MAX / 2) 1634c9612c6SJeff Roberson #define SCHED_INTERACT_THRESH (30) 164e1f89c22SJeff Roberson 16535e6168fSJeff Roberson /* 16635e6168fSJeff Roberson * These parameters and macros determine the size of the time slice that is 16735e6168fSJeff Roberson * granted to each thread. 16835e6168fSJeff Roberson * 16935e6168fSJeff Roberson * SLICE_MIN: Minimum time slice granted, in units of ticks. 17035e6168fSJeff Roberson * SLICE_MAX: Maximum time slice granted. 17135e6168fSJeff Roberson * SLICE_RANGE: Range of available time slices scaled by hz. 172245f3abfSJeff Roberson * SLICE_SCALE: The number slices granted per val in the range of [0, max]. 173245f3abfSJeff Roberson * SLICE_NICE: Determine the amount of slice granted to a scaled nice. 1747d1a81b4SJeff Roberson * SLICE_NTHRESH: The nice cutoff point for slice assignment. 17535e6168fSJeff Roberson */ 17615dc847eSJeff Roberson #define SCHED_SLICE_MIN (slice_min) 17715dc847eSJeff Roberson #define SCHED_SLICE_MAX (slice_max) 1787d1a81b4SJeff Roberson #define SCHED_SLICE_NTHRESH (SCHED_PRI_NHALF - 1) 17935e6168fSJeff Roberson #define SCHED_SLICE_RANGE (SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1) 18035e6168fSJeff Roberson #define SCHED_SLICE_SCALE(val, max) (((val) * SCHED_SLICE_RANGE) / (max)) 181245f3abfSJeff Roberson #define SCHED_SLICE_NICE(nice) \ 1827d1a81b4SJeff Roberson (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((nice), SCHED_SLICE_NTHRESH)) 18335e6168fSJeff Roberson 18435e6168fSJeff Roberson /* 18535e6168fSJeff Roberson * This macro determines whether or not the kse belongs on the current or 18635e6168fSJeff Roberson * next run queue. 18735e6168fSJeff Roberson */ 18815dc847eSJeff Roberson #define SCHED_INTERACTIVE(kg) \ 18915dc847eSJeff Roberson (sched_interact_score(kg) < SCHED_INTERACT_THRESH) 190a5f099d0SJeff Roberson #define SCHED_CURR(kg, ke) \ 19108fd6713SJeff Roberson (ke->ke_thread->td_priority != kg->kg_user_pri || \ 19208fd6713SJeff Roberson SCHED_INTERACTIVE(kg)) 19335e6168fSJeff Roberson 19435e6168fSJeff Roberson /* 19535e6168fSJeff Roberson * Cpu percentage computation macros and defines. 19635e6168fSJeff Roberson * 19735e6168fSJeff Roberson * SCHED_CPU_TIME: Number of seconds to average the cpu usage across. 19835e6168fSJeff Roberson * SCHED_CPU_TICKS: Number of hz ticks to average the cpu usage across. 19935e6168fSJeff Roberson */ 20035e6168fSJeff Roberson 2015053d272SJeff Roberson #define SCHED_CPU_TIME 10 20235e6168fSJeff Roberson #define SCHED_CPU_TICKS (hz * SCHED_CPU_TIME) 20335e6168fSJeff Roberson 20435e6168fSJeff Roberson /* 20515dc847eSJeff Roberson * kseq - per processor runqs and statistics. 20635e6168fSJeff Roberson */ 20735e6168fSJeff Roberson struct kseq { 208a8949de2SJeff Roberson struct runq ksq_idle; /* Queue of IDLE threads. */ 20915dc847eSJeff Roberson struct runq ksq_timeshare[2]; /* Run queues for !IDLE. */ 21015dc847eSJeff Roberson struct runq *ksq_next; /* Next timeshare queue. */ 21115dc847eSJeff Roberson struct runq *ksq_curr; /* Current queue. */ 212ef1134c9SJeff Roberson int ksq_load_timeshare; /* Load for timeshare. */ 21315dc847eSJeff Roberson int ksq_load; /* Aggregate load. */ 214a0a931ceSJeff Roberson short ksq_nice[SCHED_PRI_NRESV]; /* KSEs in each nice bin. */ 21515dc847eSJeff Roberson short ksq_nicemin; /* Least nice. */ 2165d7ef00cSJeff Roberson #ifdef SMP 21780f86c9fSJeff Roberson int ksq_transferable; 21880f86c9fSJeff Roberson LIST_ENTRY(kseq) ksq_siblings; /* Next in kseq group. */ 21980f86c9fSJeff Roberson struct kseq_group *ksq_group; /* Our processor group. */ 220fa9c9717SJeff Roberson volatile struct kse *ksq_assigned; /* assigned by another CPU. */ 2215d7ef00cSJeff Roberson #endif 22235e6168fSJeff Roberson }; 22335e6168fSJeff Roberson 22480f86c9fSJeff Roberson #ifdef SMP 22580f86c9fSJeff Roberson /* 22680f86c9fSJeff Roberson * kseq groups are groups of processors which can cheaply share threads. When 22780f86c9fSJeff Roberson * one processor in the group goes idle it will check the runqs of the other 22880f86c9fSJeff Roberson * processors in its group prior to halting and waiting for an interrupt. 22980f86c9fSJeff Roberson * These groups are suitable for SMT (Symetric Multi-Threading) and not NUMA. 23080f86c9fSJeff Roberson * In a numa environment we'd want an idle bitmap per group and a two tiered 23180f86c9fSJeff Roberson * load balancer. 23280f86c9fSJeff Roberson */ 23380f86c9fSJeff Roberson struct kseq_group { 23480f86c9fSJeff Roberson int ksg_cpus; /* Count of CPUs in this kseq group. */ 23580f86c9fSJeff Roberson int ksg_cpumask; /* Mask of cpus in this group. */ 23680f86c9fSJeff Roberson int ksg_idlemask; /* Idle cpus in this group. */ 23780f86c9fSJeff Roberson int ksg_mask; /* Bit mask for first cpu. */ 238cac77d04SJeff Roberson int ksg_load; /* Total load of this group. */ 23980f86c9fSJeff Roberson int ksg_transferable; /* Transferable load of this group. */ 24080f86c9fSJeff Roberson LIST_HEAD(, kseq) ksg_members; /* Linked list of all members. */ 24180f86c9fSJeff Roberson }; 24280f86c9fSJeff Roberson #endif 24380f86c9fSJeff Roberson 24435e6168fSJeff Roberson /* 24535e6168fSJeff Roberson * One kse queue per processor. 24635e6168fSJeff Roberson */ 2470a016a05SJeff Roberson #ifdef SMP 24822bf7d9aSJeff Roberson static int kseq_idle; 249cac77d04SJeff Roberson static int ksg_maxid; 25022bf7d9aSJeff Roberson static struct kseq kseq_cpu[MAXCPU]; 25180f86c9fSJeff Roberson static struct kseq_group kseq_groups[MAXCPU]; 25280f86c9fSJeff Roberson #define KSEQ_SELF() (&kseq_cpu[PCPU_GET(cpuid)]) 25380f86c9fSJeff Roberson #define KSEQ_CPU(x) (&kseq_cpu[(x)]) 254cac77d04SJeff Roberson #define KSEQ_ID(x) ((x) - kseq_cpu) 255cac77d04SJeff Roberson #define KSEQ_GROUP(x) (&kseq_groups[(x)]) 25680f86c9fSJeff Roberson #else /* !SMP */ 25722bf7d9aSJeff Roberson static struct kseq kseq_cpu; 2580a016a05SJeff Roberson #define KSEQ_SELF() (&kseq_cpu) 2590a016a05SJeff Roberson #define KSEQ_CPU(x) (&kseq_cpu) 2600a016a05SJeff Roberson #endif 26135e6168fSJeff Roberson 262245f3abfSJeff Roberson static void sched_slice(struct kse *ke); 26315dc847eSJeff Roberson static void sched_priority(struct ksegrp *kg); 264e1f89c22SJeff Roberson static int sched_interact_score(struct ksegrp *kg); 2654b60e324SJeff Roberson static void sched_interact_update(struct ksegrp *kg); 266d322132cSJeff Roberson static void sched_interact_fork(struct ksegrp *kg); 26722bf7d9aSJeff Roberson static void sched_pctcpu_update(struct kse *ke); 26835e6168fSJeff Roberson 2695d7ef00cSJeff Roberson /* Operations on per processor queues */ 27022bf7d9aSJeff Roberson static struct kse * kseq_choose(struct kseq *kseq); 2710a016a05SJeff Roberson static void kseq_setup(struct kseq *kseq); 272155b9987SJeff Roberson static void kseq_load_add(struct kseq *kseq, struct kse *ke); 273155b9987SJeff Roberson static void kseq_load_rem(struct kseq *kseq, struct kse *ke); 274155b9987SJeff Roberson static __inline void kseq_runq_add(struct kseq *kseq, struct kse *ke); 275155b9987SJeff Roberson static __inline void kseq_runq_rem(struct kseq *kseq, struct kse *ke); 27615dc847eSJeff Roberson static void kseq_nice_add(struct kseq *kseq, int nice); 27715dc847eSJeff Roberson static void kseq_nice_rem(struct kseq *kseq, int nice); 2787cd650a9SJeff Roberson void kseq_print(int cpu); 2795d7ef00cSJeff Roberson #ifdef SMP 28080f86c9fSJeff Roberson static int kseq_transfer(struct kseq *ksq, struct kse *ke, int class); 28122bf7d9aSJeff Roberson static struct kse *runq_steal(struct runq *rq); 282155b9987SJeff Roberson static void sched_balance(void *arg); 283cac77d04SJeff Roberson static void sched_balance_group(struct kseq_group *ksg); 284cac77d04SJeff Roberson static void sched_balance_pair(struct kseq *high, struct kseq *low); 28522bf7d9aSJeff Roberson static void kseq_move(struct kseq *from, int cpu); 28680f86c9fSJeff Roberson static int kseq_idled(struct kseq *kseq); 28722bf7d9aSJeff Roberson static void kseq_notify(struct kse *ke, int cpu); 28822bf7d9aSJeff Roberson static void kseq_assign(struct kseq *); 28980f86c9fSJeff Roberson static struct kse *kseq_steal(struct kseq *kseq, int stealidle); 2909bacd788SJeff Roberson #define KSE_CAN_MIGRATE(ke, class) \ 291a70d729bSJeff Roberson ((class) != PRI_ITHD && (ke)->ke_thread->td_pinned == 0 && \ 292f28b3340SJeff Roberson ((ke)->ke_flags & KEF_BOUND) == 0) 2935d7ef00cSJeff Roberson #endif 2945d7ef00cSJeff Roberson 29515dc847eSJeff Roberson void 2967cd650a9SJeff Roberson kseq_print(int cpu) 29715dc847eSJeff Roberson { 2987cd650a9SJeff Roberson struct kseq *kseq; 29915dc847eSJeff Roberson int i; 30015dc847eSJeff Roberson 3017cd650a9SJeff Roberson kseq = KSEQ_CPU(cpu); 30215dc847eSJeff Roberson 30315dc847eSJeff Roberson printf("kseq:\n"); 30415dc847eSJeff Roberson printf("\tload: %d\n", kseq->ksq_load); 305155b9987SJeff Roberson printf("\tload TIMESHARE: %d\n", kseq->ksq_load_timeshare); 306ef1134c9SJeff Roberson #ifdef SMP 30780f86c9fSJeff Roberson printf("\tload transferable: %d\n", kseq->ksq_transferable); 308ef1134c9SJeff Roberson #endif 30915dc847eSJeff Roberson printf("\tnicemin:\t%d\n", kseq->ksq_nicemin); 31015dc847eSJeff Roberson printf("\tnice counts:\n"); 311a0a931ceSJeff Roberson for (i = 0; i < SCHED_PRI_NRESV; i++) 31215dc847eSJeff Roberson if (kseq->ksq_nice[i]) 31315dc847eSJeff Roberson printf("\t\t%d = %d\n", 31415dc847eSJeff Roberson i - SCHED_PRI_NHALF, kseq->ksq_nice[i]); 31515dc847eSJeff Roberson } 31615dc847eSJeff Roberson 317155b9987SJeff Roberson static __inline void 318155b9987SJeff Roberson kseq_runq_add(struct kseq *kseq, struct kse *ke) 319155b9987SJeff Roberson { 320155b9987SJeff Roberson #ifdef SMP 32180f86c9fSJeff Roberson if (KSE_CAN_MIGRATE(ke, PRI_BASE(ke->ke_ksegrp->kg_pri_class))) { 32280f86c9fSJeff Roberson kseq->ksq_transferable++; 32380f86c9fSJeff Roberson kseq->ksq_group->ksg_transferable++; 32480f86c9fSJeff Roberson } 325155b9987SJeff Roberson #endif 326155b9987SJeff Roberson runq_add(ke->ke_runq, ke); 327155b9987SJeff Roberson } 328155b9987SJeff Roberson 329155b9987SJeff Roberson static __inline void 330155b9987SJeff Roberson kseq_runq_rem(struct kseq *kseq, struct kse *ke) 331155b9987SJeff Roberson { 332155b9987SJeff Roberson #ifdef SMP 33380f86c9fSJeff Roberson if (KSE_CAN_MIGRATE(ke, PRI_BASE(ke->ke_ksegrp->kg_pri_class))) { 33480f86c9fSJeff Roberson kseq->ksq_transferable--; 33580f86c9fSJeff Roberson kseq->ksq_group->ksg_transferable--; 33680f86c9fSJeff Roberson } 337155b9987SJeff Roberson #endif 338155b9987SJeff Roberson runq_remove(ke->ke_runq, ke); 339155b9987SJeff Roberson } 340155b9987SJeff Roberson 341a8949de2SJeff Roberson static void 342155b9987SJeff Roberson kseq_load_add(struct kseq *kseq, struct kse *ke) 3435d7ef00cSJeff Roberson { 344ef1134c9SJeff Roberson int class; 345b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 346ef1134c9SJeff Roberson class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); 347ef1134c9SJeff Roberson if (class == PRI_TIMESHARE) 348ef1134c9SJeff Roberson kseq->ksq_load_timeshare++; 34915dc847eSJeff Roberson kseq->ksq_load++; 350cac77d04SJeff Roberson #ifdef SMP 351cac77d04SJeff Roberson if (class != PRI_ITHD) 352cac77d04SJeff Roberson kseq->ksq_group->ksg_load++; 353cac77d04SJeff Roberson #endif 35415dc847eSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 355155b9987SJeff Roberson CTR6(KTR_ULE, 356155b9987SJeff Roberson "Add kse %p to %p (slice: %d, pri: %d, nice: %d(%d))", 35715dc847eSJeff Roberson ke, ke->ke_runq, ke->ke_slice, ke->ke_thread->td_priority, 35815dc847eSJeff Roberson ke->ke_ksegrp->kg_nice, kseq->ksq_nicemin); 35915dc847eSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 36015dc847eSJeff Roberson kseq_nice_add(kseq, ke->ke_ksegrp->kg_nice); 3615d7ef00cSJeff Roberson } 36215dc847eSJeff Roberson 363a8949de2SJeff Roberson static void 364155b9987SJeff Roberson kseq_load_rem(struct kseq *kseq, struct kse *ke) 3655d7ef00cSJeff Roberson { 366ef1134c9SJeff Roberson int class; 367b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 368ef1134c9SJeff Roberson class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); 369ef1134c9SJeff Roberson if (class == PRI_TIMESHARE) 370ef1134c9SJeff Roberson kseq->ksq_load_timeshare--; 371cac77d04SJeff Roberson #ifdef SMP 372cac77d04SJeff Roberson if (class != PRI_ITHD) 373cac77d04SJeff Roberson kseq->ksq_group->ksg_load--; 374cac77d04SJeff Roberson #endif 37515dc847eSJeff Roberson kseq->ksq_load--; 37615dc847eSJeff Roberson ke->ke_runq = NULL; 37715dc847eSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 37815dc847eSJeff Roberson kseq_nice_rem(kseq, ke->ke_ksegrp->kg_nice); 3795d7ef00cSJeff Roberson } 3805d7ef00cSJeff Roberson 38115dc847eSJeff Roberson static void 38215dc847eSJeff Roberson kseq_nice_add(struct kseq *kseq, int nice) 38315dc847eSJeff Roberson { 384b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 38515dc847eSJeff Roberson /* Normalize to zero. */ 38615dc847eSJeff Roberson kseq->ksq_nice[nice + SCHED_PRI_NHALF]++; 387ef1134c9SJeff Roberson if (nice < kseq->ksq_nicemin || kseq->ksq_load_timeshare == 1) 38815dc847eSJeff Roberson kseq->ksq_nicemin = nice; 38915dc847eSJeff Roberson } 39015dc847eSJeff Roberson 39115dc847eSJeff Roberson static void 39215dc847eSJeff Roberson kseq_nice_rem(struct kseq *kseq, int nice) 39315dc847eSJeff Roberson { 39415dc847eSJeff Roberson int n; 39515dc847eSJeff Roberson 396b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 39715dc847eSJeff Roberson /* Normalize to zero. */ 39815dc847eSJeff Roberson n = nice + SCHED_PRI_NHALF; 39915dc847eSJeff Roberson kseq->ksq_nice[n]--; 40015dc847eSJeff Roberson KASSERT(kseq->ksq_nice[n] >= 0, ("Negative nice count.")); 40115dc847eSJeff Roberson 40215dc847eSJeff Roberson /* 40315dc847eSJeff Roberson * If this wasn't the smallest nice value or there are more in 40415dc847eSJeff Roberson * this bucket we can just return. Otherwise we have to recalculate 40515dc847eSJeff Roberson * the smallest nice. 40615dc847eSJeff Roberson */ 40715dc847eSJeff Roberson if (nice != kseq->ksq_nicemin || 40815dc847eSJeff Roberson kseq->ksq_nice[n] != 0 || 409ef1134c9SJeff Roberson kseq->ksq_load_timeshare == 0) 41015dc847eSJeff Roberson return; 41115dc847eSJeff Roberson 412a0a931ceSJeff Roberson for (; n < SCHED_PRI_NRESV; n++) 41315dc847eSJeff Roberson if (kseq->ksq_nice[n]) { 41415dc847eSJeff Roberson kseq->ksq_nicemin = n - SCHED_PRI_NHALF; 41515dc847eSJeff Roberson return; 41615dc847eSJeff Roberson } 41715dc847eSJeff Roberson } 41815dc847eSJeff Roberson 4195d7ef00cSJeff Roberson #ifdef SMP 420356500a3SJeff Roberson /* 421155b9987SJeff Roberson * sched_balance is a simple CPU load balancing algorithm. It operates by 422356500a3SJeff Roberson * finding the least loaded and most loaded cpu and equalizing their load 423356500a3SJeff Roberson * by migrating some processes. 424356500a3SJeff Roberson * 425356500a3SJeff Roberson * Dealing only with two CPUs at a time has two advantages. Firstly, most 426356500a3SJeff Roberson * installations will only have 2 cpus. Secondly, load balancing too much at 427356500a3SJeff Roberson * once can have an unpleasant effect on the system. The scheduler rarely has 428356500a3SJeff Roberson * enough information to make perfect decisions. So this algorithm chooses 429356500a3SJeff Roberson * algorithm simplicity and more gradual effects on load in larger systems. 430356500a3SJeff Roberson * 431356500a3SJeff Roberson * It could be improved by considering the priorities and slices assigned to 432356500a3SJeff Roberson * each task prior to balancing them. There are many pathological cases with 433356500a3SJeff Roberson * any approach and so the semi random algorithm below may work as well as any. 434356500a3SJeff Roberson * 435356500a3SJeff Roberson */ 43622bf7d9aSJeff Roberson static void 437155b9987SJeff Roberson sched_balance(void *arg) 438356500a3SJeff Roberson { 439cac77d04SJeff Roberson struct kseq_group *high; 440cac77d04SJeff Roberson struct kseq_group *low; 441cac77d04SJeff Roberson struct kseq_group *ksg; 442cac77d04SJeff Roberson int timo; 443cac77d04SJeff Roberson int cnt; 444356500a3SJeff Roberson int i; 445356500a3SJeff Roberson 446356500a3SJeff Roberson mtx_lock_spin(&sched_lock); 44786f8ae96SJeff Roberson if (smp_started == 0) 44886f8ae96SJeff Roberson goto out; 449cac77d04SJeff Roberson low = high = NULL; 450cac77d04SJeff Roberson i = random() % (ksg_maxid + 1); 451cac77d04SJeff Roberson for (cnt = 0; cnt <= ksg_maxid; cnt++) { 452cac77d04SJeff Roberson ksg = KSEQ_GROUP(i); 453cac77d04SJeff Roberson /* 454cac77d04SJeff Roberson * Find the CPU with the highest load that has some 455cac77d04SJeff Roberson * threads to transfer. 456cac77d04SJeff Roberson */ 457cac77d04SJeff Roberson if ((high == NULL || ksg->ksg_load > high->ksg_load) 458cac77d04SJeff Roberson && ksg->ksg_transferable) 459cac77d04SJeff Roberson high = ksg; 460cac77d04SJeff Roberson if (low == NULL || ksg->ksg_load < low->ksg_load) 461cac77d04SJeff Roberson low = ksg; 462cac77d04SJeff Roberson if (++i > ksg_maxid) 463cac77d04SJeff Roberson i = 0; 464cac77d04SJeff Roberson } 465cac77d04SJeff Roberson if (low != NULL && high != NULL && high != low) 466cac77d04SJeff Roberson sched_balance_pair(LIST_FIRST(&high->ksg_members), 467cac77d04SJeff Roberson LIST_FIRST(&low->ksg_members)); 468cac77d04SJeff Roberson out: 469cac77d04SJeff Roberson mtx_unlock_spin(&sched_lock); 470cac77d04SJeff Roberson timo = random() % (hz * 2); 471cac77d04SJeff Roberson callout_reset(&kseq_lb_callout, timo, sched_balance, NULL); 472cac77d04SJeff Roberson } 47386f8ae96SJeff Roberson 474cac77d04SJeff Roberson static void 475cac77d04SJeff Roberson sched_balance_groups(void *arg) 476cac77d04SJeff Roberson { 477cac77d04SJeff Roberson int timo; 478cac77d04SJeff Roberson int i; 479cac77d04SJeff Roberson 480cac77d04SJeff Roberson mtx_lock_spin(&sched_lock); 481cac77d04SJeff Roberson if (smp_started) 482cac77d04SJeff Roberson for (i = 0; i <= ksg_maxid; i++) 483cac77d04SJeff Roberson sched_balance_group(KSEQ_GROUP(i)); 484cac77d04SJeff Roberson mtx_unlock_spin(&sched_lock); 485cac77d04SJeff Roberson timo = random() % (hz * 2); 486cac77d04SJeff Roberson callout_reset(&kseq_group_callout, timo, sched_balance_groups, NULL); 487356500a3SJeff Roberson } 488cac77d04SJeff Roberson 489cac77d04SJeff Roberson static void 490cac77d04SJeff Roberson sched_balance_group(struct kseq_group *ksg) 491cac77d04SJeff Roberson { 492cac77d04SJeff Roberson struct kseq *kseq; 493cac77d04SJeff Roberson struct kseq *high; 494cac77d04SJeff Roberson struct kseq *low; 495cac77d04SJeff Roberson int load; 496cac77d04SJeff Roberson 497cac77d04SJeff Roberson if (ksg->ksg_transferable == 0) 498cac77d04SJeff Roberson return; 499cac77d04SJeff Roberson low = NULL; 500cac77d04SJeff Roberson high = NULL; 501cac77d04SJeff Roberson LIST_FOREACH(kseq, &ksg->ksg_members, ksq_siblings) { 502cac77d04SJeff Roberson load = kseq->ksq_load; 503cac77d04SJeff Roberson if (kseq == KSEQ_CPU(0)) 504cac77d04SJeff Roberson load--; 505cac77d04SJeff Roberson if (high == NULL || load > high->ksq_load) 506cac77d04SJeff Roberson high = kseq; 507cac77d04SJeff Roberson if (low == NULL || load < low->ksq_load) 508cac77d04SJeff Roberson low = kseq; 509356500a3SJeff Roberson } 510cac77d04SJeff Roberson if (high != NULL && low != NULL && high != low) 511cac77d04SJeff Roberson sched_balance_pair(high, low); 512356500a3SJeff Roberson } 513cac77d04SJeff Roberson 514cac77d04SJeff Roberson static void 515cac77d04SJeff Roberson sched_balance_pair(struct kseq *high, struct kseq *low) 516cac77d04SJeff Roberson { 517cac77d04SJeff Roberson int transferable; 518cac77d04SJeff Roberson int high_load; 519cac77d04SJeff Roberson int low_load; 520cac77d04SJeff Roberson int move; 521cac77d04SJeff Roberson int diff; 522cac77d04SJeff Roberson int i; 523cac77d04SJeff Roberson 52480f86c9fSJeff Roberson /* 52580f86c9fSJeff Roberson * If we're transfering within a group we have to use this specific 52680f86c9fSJeff Roberson * kseq's transferable count, otherwise we can steal from other members 52780f86c9fSJeff Roberson * of the group. 52880f86c9fSJeff Roberson */ 529cac77d04SJeff Roberson if (high->ksq_group == low->ksq_group) { 530cac77d04SJeff Roberson transferable = high->ksq_transferable; 531cac77d04SJeff Roberson high_load = high->ksq_load; 532cac77d04SJeff Roberson low_load = low->ksq_load; 533cac77d04SJeff Roberson /* 534cac77d04SJeff Roberson * XXX If we encounter cpu 0 we must remember to reduce it's 535cac77d04SJeff Roberson * load by 1 to reflect the swi that is running the callout. 536cac77d04SJeff Roberson * At some point we should really fix load balancing of the 537cac77d04SJeff Roberson * swi and then this wont matter. 538cac77d04SJeff Roberson */ 539cac77d04SJeff Roberson if (high == KSEQ_CPU(0)) 540cac77d04SJeff Roberson high_load--; 541cac77d04SJeff Roberson if (low == KSEQ_CPU(0)) 542cac77d04SJeff Roberson low_load--; 543cac77d04SJeff Roberson } else { 544cac77d04SJeff Roberson transferable = high->ksq_group->ksg_transferable; 545cac77d04SJeff Roberson high_load = high->ksq_group->ksg_load; 546cac77d04SJeff Roberson low_load = low->ksq_group->ksg_load; 547cac77d04SJeff Roberson } 54880f86c9fSJeff Roberson if (transferable == 0) 549cac77d04SJeff Roberson return; 550155b9987SJeff Roberson /* 551155b9987SJeff Roberson * Determine what the imbalance is and then adjust that to how many 55280f86c9fSJeff Roberson * kses we actually have to give up (transferable). 553155b9987SJeff Roberson */ 554cac77d04SJeff Roberson diff = high_load - low_load; 555356500a3SJeff Roberson move = diff / 2; 556356500a3SJeff Roberson if (diff & 0x1) 557356500a3SJeff Roberson move++; 55880f86c9fSJeff Roberson move = min(move, transferable); 559356500a3SJeff Roberson for (i = 0; i < move; i++) 560cac77d04SJeff Roberson kseq_move(high, KSEQ_ID(low)); 561356500a3SJeff Roberson return; 562356500a3SJeff Roberson } 563356500a3SJeff Roberson 56422bf7d9aSJeff Roberson static void 565356500a3SJeff Roberson kseq_move(struct kseq *from, int cpu) 566356500a3SJeff Roberson { 56780f86c9fSJeff Roberson struct kseq *kseq; 56880f86c9fSJeff Roberson struct kseq *to; 569356500a3SJeff Roberson struct kse *ke; 570356500a3SJeff Roberson 57180f86c9fSJeff Roberson kseq = from; 57280f86c9fSJeff Roberson to = KSEQ_CPU(cpu); 57380f86c9fSJeff Roberson ke = kseq_steal(kseq, 1); 57480f86c9fSJeff Roberson if (ke == NULL) { 57580f86c9fSJeff Roberson struct kseq_group *ksg; 57680f86c9fSJeff Roberson 57780f86c9fSJeff Roberson ksg = kseq->ksq_group; 57880f86c9fSJeff Roberson LIST_FOREACH(kseq, &ksg->ksg_members, ksq_siblings) { 57980f86c9fSJeff Roberson if (kseq == from || kseq->ksq_transferable == 0) 58080f86c9fSJeff Roberson continue; 58180f86c9fSJeff Roberson ke = kseq_steal(kseq, 1); 58280f86c9fSJeff Roberson break; 58380f86c9fSJeff Roberson } 58480f86c9fSJeff Roberson if (ke == NULL) 58580f86c9fSJeff Roberson panic("kseq_move: No KSEs available with a " 58680f86c9fSJeff Roberson "transferable count of %d\n", 58780f86c9fSJeff Roberson ksg->ksg_transferable); 58880f86c9fSJeff Roberson } 58980f86c9fSJeff Roberson if (kseq == to) 59080f86c9fSJeff Roberson return; 591356500a3SJeff Roberson ke->ke_state = KES_THREAD; 59280f86c9fSJeff Roberson kseq_runq_rem(kseq, ke); 59380f86c9fSJeff Roberson kseq_load_rem(kseq, ke); 594356500a3SJeff Roberson 595356500a3SJeff Roberson ke->ke_cpu = cpu; 596112b6d3aSJeff Roberson kseq_notify(ke, cpu); 597356500a3SJeff Roberson } 59822bf7d9aSJeff Roberson 59980f86c9fSJeff Roberson static int 60080f86c9fSJeff Roberson kseq_idled(struct kseq *kseq) 60122bf7d9aSJeff Roberson { 60280f86c9fSJeff Roberson struct kseq_group *ksg; 60380f86c9fSJeff Roberson struct kseq *steal; 60480f86c9fSJeff Roberson struct kse *ke; 60580f86c9fSJeff Roberson 60680f86c9fSJeff Roberson ksg = kseq->ksq_group; 60780f86c9fSJeff Roberson /* 60880f86c9fSJeff Roberson * If we're in a cpu group, try and steal kses from another cpu in 60980f86c9fSJeff Roberson * the group before idling. 61080f86c9fSJeff Roberson */ 61180f86c9fSJeff Roberson if (ksg->ksg_cpus > 1 && ksg->ksg_transferable) { 61280f86c9fSJeff Roberson LIST_FOREACH(steal, &ksg->ksg_members, ksq_siblings) { 61380f86c9fSJeff Roberson if (steal == kseq || steal->ksq_transferable == 0) 61480f86c9fSJeff Roberson continue; 61580f86c9fSJeff Roberson ke = kseq_steal(steal, 0); 61680f86c9fSJeff Roberson if (ke == NULL) 61780f86c9fSJeff Roberson continue; 61880f86c9fSJeff Roberson ke->ke_state = KES_THREAD; 61980f86c9fSJeff Roberson kseq_runq_rem(steal, ke); 62080f86c9fSJeff Roberson kseq_load_rem(steal, ke); 62180f86c9fSJeff Roberson ke->ke_cpu = PCPU_GET(cpuid); 62280f86c9fSJeff Roberson sched_add(ke->ke_thread); 62380f86c9fSJeff Roberson return (0); 62480f86c9fSJeff Roberson } 62580f86c9fSJeff Roberson } 62680f86c9fSJeff Roberson /* 62780f86c9fSJeff Roberson * We only set the idled bit when all of the cpus in the group are 62880f86c9fSJeff Roberson * idle. Otherwise we could get into a situation where a KSE bounces 62980f86c9fSJeff Roberson * back and forth between two idle cores on seperate physical CPUs. 63080f86c9fSJeff Roberson */ 63180f86c9fSJeff Roberson ksg->ksg_idlemask |= PCPU_GET(cpumask); 63280f86c9fSJeff Roberson if (ksg->ksg_idlemask != ksg->ksg_cpumask) 63380f86c9fSJeff Roberson return (1); 63480f86c9fSJeff Roberson atomic_set_int(&kseq_idle, ksg->ksg_mask); 63580f86c9fSJeff Roberson return (1); 63622bf7d9aSJeff Roberson } 63722bf7d9aSJeff Roberson 63822bf7d9aSJeff Roberson static void 63922bf7d9aSJeff Roberson kseq_assign(struct kseq *kseq) 64022bf7d9aSJeff Roberson { 64122bf7d9aSJeff Roberson struct kse *nke; 64222bf7d9aSJeff Roberson struct kse *ke; 64322bf7d9aSJeff Roberson 64422bf7d9aSJeff Roberson do { 645fa9c9717SJeff Roberson (volatile struct kse *)ke = kseq->ksq_assigned; 64622bf7d9aSJeff Roberson } while(!atomic_cmpset_ptr(&kseq->ksq_assigned, ke, NULL)); 64722bf7d9aSJeff Roberson for (; ke != NULL; ke = nke) { 64822bf7d9aSJeff Roberson nke = ke->ke_assign; 64922bf7d9aSJeff Roberson ke->ke_flags &= ~KEF_ASSIGNED; 65022bf7d9aSJeff Roberson sched_add(ke->ke_thread); 65122bf7d9aSJeff Roberson } 65222bf7d9aSJeff Roberson } 65322bf7d9aSJeff Roberson 65422bf7d9aSJeff Roberson static void 65522bf7d9aSJeff Roberson kseq_notify(struct kse *ke, int cpu) 65622bf7d9aSJeff Roberson { 65722bf7d9aSJeff Roberson struct kseq *kseq; 65822bf7d9aSJeff Roberson struct thread *td; 65922bf7d9aSJeff Roberson struct pcpu *pcpu; 66022bf7d9aSJeff Roberson 66122bf7d9aSJeff Roberson ke->ke_flags |= KEF_ASSIGNED; 66222bf7d9aSJeff Roberson 66322bf7d9aSJeff Roberson kseq = KSEQ_CPU(cpu); 6645d7ef00cSJeff Roberson 6650c0a98b2SJeff Roberson /* 66622bf7d9aSJeff Roberson * Place a KSE on another cpu's queue and force a resched. 66722bf7d9aSJeff Roberson */ 66822bf7d9aSJeff Roberson do { 669fa9c9717SJeff Roberson (volatile struct kse *)ke->ke_assign = kseq->ksq_assigned; 67022bf7d9aSJeff Roberson } while(!atomic_cmpset_ptr(&kseq->ksq_assigned, ke->ke_assign, ke)); 67122bf7d9aSJeff Roberson pcpu = pcpu_find(cpu); 67222bf7d9aSJeff Roberson td = pcpu->pc_curthread; 67322bf7d9aSJeff Roberson if (ke->ke_thread->td_priority < td->td_priority || 67422bf7d9aSJeff Roberson td == pcpu->pc_idlethread) { 67522bf7d9aSJeff Roberson td->td_flags |= TDF_NEEDRESCHED; 67622bf7d9aSJeff Roberson ipi_selected(1 << cpu, IPI_AST); 67722bf7d9aSJeff Roberson } 67822bf7d9aSJeff Roberson } 67922bf7d9aSJeff Roberson 68022bf7d9aSJeff Roberson static struct kse * 68122bf7d9aSJeff Roberson runq_steal(struct runq *rq) 68222bf7d9aSJeff Roberson { 68322bf7d9aSJeff Roberson struct rqhead *rqh; 68422bf7d9aSJeff Roberson struct rqbits *rqb; 68522bf7d9aSJeff Roberson struct kse *ke; 68622bf7d9aSJeff Roberson int word; 68722bf7d9aSJeff Roberson int bit; 68822bf7d9aSJeff Roberson 68922bf7d9aSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 69022bf7d9aSJeff Roberson rqb = &rq->rq_status; 69122bf7d9aSJeff Roberson for (word = 0; word < RQB_LEN; word++) { 69222bf7d9aSJeff Roberson if (rqb->rqb_bits[word] == 0) 69322bf7d9aSJeff Roberson continue; 69422bf7d9aSJeff Roberson for (bit = 0; bit < RQB_BPW; bit++) { 695a2640c9bSPeter Wemm if ((rqb->rqb_bits[word] & (1ul << bit)) == 0) 69622bf7d9aSJeff Roberson continue; 69722bf7d9aSJeff Roberson rqh = &rq->rq_queues[bit + (word << RQB_L2BPW)]; 69822bf7d9aSJeff Roberson TAILQ_FOREACH(ke, rqh, ke_procq) { 699ef1134c9SJeff Roberson if (KSE_CAN_MIGRATE(ke, 700ef1134c9SJeff Roberson PRI_BASE(ke->ke_ksegrp->kg_pri_class))) 70122bf7d9aSJeff Roberson return (ke); 70222bf7d9aSJeff Roberson } 70322bf7d9aSJeff Roberson } 70422bf7d9aSJeff Roberson } 70522bf7d9aSJeff Roberson return (NULL); 70622bf7d9aSJeff Roberson } 70722bf7d9aSJeff Roberson 70822bf7d9aSJeff Roberson static struct kse * 70980f86c9fSJeff Roberson kseq_steal(struct kseq *kseq, int stealidle) 71022bf7d9aSJeff Roberson { 71122bf7d9aSJeff Roberson struct kse *ke; 71222bf7d9aSJeff Roberson 71380f86c9fSJeff Roberson /* 71480f86c9fSJeff Roberson * Steal from next first to try to get a non-interactive task that 71580f86c9fSJeff Roberson * may not have run for a while. 71680f86c9fSJeff Roberson */ 71722bf7d9aSJeff Roberson if ((ke = runq_steal(kseq->ksq_next)) != NULL) 71822bf7d9aSJeff Roberson return (ke); 71980f86c9fSJeff Roberson if ((ke = runq_steal(kseq->ksq_curr)) != NULL) 72080f86c9fSJeff Roberson return (ke); 72180f86c9fSJeff Roberson if (stealidle) 72222bf7d9aSJeff Roberson return (runq_steal(&kseq->ksq_idle)); 72380f86c9fSJeff Roberson return (NULL); 72422bf7d9aSJeff Roberson } 72580f86c9fSJeff Roberson 72680f86c9fSJeff Roberson int 72780f86c9fSJeff Roberson kseq_transfer(struct kseq *kseq, struct kse *ke, int class) 72880f86c9fSJeff Roberson { 72980f86c9fSJeff Roberson struct kseq_group *ksg; 73080f86c9fSJeff Roberson int cpu; 73180f86c9fSJeff Roberson 73280f86c9fSJeff Roberson cpu = 0; 73380f86c9fSJeff Roberson ksg = kseq->ksq_group; 73480f86c9fSJeff Roberson 73580f86c9fSJeff Roberson /* 73680f86c9fSJeff Roberson * XXX This ksg_transferable might work better if we were checking 73780f86c9fSJeff Roberson * against a global group load. As it is now, this prevents us from 73880f86c9fSJeff Roberson * transfering a thread from a group that is potentially bogged down 73980f86c9fSJeff Roberson * with non transferable load. 74080f86c9fSJeff Roberson */ 74180f86c9fSJeff Roberson if (ksg->ksg_transferable > ksg->ksg_cpus && kseq_idle) { 74280f86c9fSJeff Roberson /* 74380f86c9fSJeff Roberson * Multiple cpus could find this bit simultaneously 74480f86c9fSJeff Roberson * but the race shouldn't be terrible. 74580f86c9fSJeff Roberson */ 74680f86c9fSJeff Roberson cpu = ffs(kseq_idle); 74780f86c9fSJeff Roberson if (cpu) 74880f86c9fSJeff Roberson atomic_clear_int(&kseq_idle, 1 << (cpu - 1)); 74980f86c9fSJeff Roberson } 75080f86c9fSJeff Roberson /* 75180f86c9fSJeff Roberson * If another cpu in this group has idled, assign a thread over 75280f86c9fSJeff Roberson * to them after checking to see if there are idled groups. 75380f86c9fSJeff Roberson */ 75480f86c9fSJeff Roberson if (cpu == 0 && kseq->ksq_load > 1 && ksg->ksg_idlemask) { 75580f86c9fSJeff Roberson cpu = ffs(ksg->ksg_idlemask); 75680f86c9fSJeff Roberson if (cpu) 75780f86c9fSJeff Roberson ksg->ksg_idlemask &= ~(1 << (cpu - 1)); 75880f86c9fSJeff Roberson } 75980f86c9fSJeff Roberson /* 76080f86c9fSJeff Roberson * Now that we've found an idle CPU, migrate the thread. 76180f86c9fSJeff Roberson */ 76280f86c9fSJeff Roberson if (cpu) { 76380f86c9fSJeff Roberson cpu--; 76480f86c9fSJeff Roberson ke->ke_cpu = cpu; 76580f86c9fSJeff Roberson ke->ke_runq = NULL; 76680f86c9fSJeff Roberson kseq_notify(ke, cpu); 76780f86c9fSJeff Roberson return (1); 76880f86c9fSJeff Roberson } 76980f86c9fSJeff Roberson return (0); 77080f86c9fSJeff Roberson } 77180f86c9fSJeff Roberson 77222bf7d9aSJeff Roberson #endif /* SMP */ 77322bf7d9aSJeff Roberson 77422bf7d9aSJeff Roberson /* 77522bf7d9aSJeff Roberson * Pick the highest priority task we have and return it. 7760c0a98b2SJeff Roberson */ 7770c0a98b2SJeff Roberson 77822bf7d9aSJeff Roberson static struct kse * 77922bf7d9aSJeff Roberson kseq_choose(struct kseq *kseq) 7805d7ef00cSJeff Roberson { 7815d7ef00cSJeff Roberson struct kse *ke; 7825d7ef00cSJeff Roberson struct runq *swap; 7835d7ef00cSJeff Roberson 784b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 78515dc847eSJeff Roberson swap = NULL; 786a8949de2SJeff Roberson 78715dc847eSJeff Roberson for (;;) { 78815dc847eSJeff Roberson ke = runq_choose(kseq->ksq_curr); 78915dc847eSJeff Roberson if (ke == NULL) { 79015dc847eSJeff Roberson /* 79115dc847eSJeff Roberson * We already swaped once and didn't get anywhere. 79215dc847eSJeff Roberson */ 79315dc847eSJeff Roberson if (swap) 79415dc847eSJeff Roberson break; 7955d7ef00cSJeff Roberson swap = kseq->ksq_curr; 7965d7ef00cSJeff Roberson kseq->ksq_curr = kseq->ksq_next; 7975d7ef00cSJeff Roberson kseq->ksq_next = swap; 79815dc847eSJeff Roberson continue; 799a8949de2SJeff Roberson } 80015dc847eSJeff Roberson /* 80115dc847eSJeff Roberson * If we encounter a slice of 0 the kse is in a 80215dc847eSJeff Roberson * TIMESHARE kse group and its nice was too far out 80315dc847eSJeff Roberson * of the range that receives slices. 80415dc847eSJeff Roberson */ 80522bf7d9aSJeff Roberson if (ke->ke_slice == 0) { 80615dc847eSJeff Roberson runq_remove(ke->ke_runq, ke); 80715dc847eSJeff Roberson sched_slice(ke); 80815dc847eSJeff Roberson ke->ke_runq = kseq->ksq_next; 80915dc847eSJeff Roberson runq_add(ke->ke_runq, ke); 81015dc847eSJeff Roberson continue; 81115dc847eSJeff Roberson } 81215dc847eSJeff Roberson return (ke); 81315dc847eSJeff Roberson } 81415dc847eSJeff Roberson 815a8949de2SJeff Roberson return (runq_choose(&kseq->ksq_idle)); 816245f3abfSJeff Roberson } 8170a016a05SJeff Roberson 8180a016a05SJeff Roberson static void 8190a016a05SJeff Roberson kseq_setup(struct kseq *kseq) 8200a016a05SJeff Roberson { 82115dc847eSJeff Roberson runq_init(&kseq->ksq_timeshare[0]); 82215dc847eSJeff Roberson runq_init(&kseq->ksq_timeshare[1]); 823a8949de2SJeff Roberson runq_init(&kseq->ksq_idle); 82415dc847eSJeff Roberson kseq->ksq_curr = &kseq->ksq_timeshare[0]; 82515dc847eSJeff Roberson kseq->ksq_next = &kseq->ksq_timeshare[1]; 8267cd650a9SJeff Roberson kseq->ksq_load = 0; 827ef1134c9SJeff Roberson kseq->ksq_load_timeshare = 0; 8280a016a05SJeff Roberson } 8290a016a05SJeff Roberson 83035e6168fSJeff Roberson static void 83135e6168fSJeff Roberson sched_setup(void *dummy) 83235e6168fSJeff Roberson { 8330ec896fdSJeff Roberson #ifdef SMP 834cac77d04SJeff Roberson int balance_groups; 83535e6168fSJeff Roberson int i; 8360ec896fdSJeff Roberson #endif 83735e6168fSJeff Roberson 838e493a5d9SJeff Roberson slice_min = (hz/100); /* 10ms */ 839e493a5d9SJeff Roberson slice_max = (hz/7); /* ~140ms */ 840e1f89c22SJeff Roberson 841356500a3SJeff Roberson #ifdef SMP 842cac77d04SJeff Roberson balance_groups = 0; 84380f86c9fSJeff Roberson /* 84480f86c9fSJeff Roberson * Initialize the kseqs. 84580f86c9fSJeff Roberson */ 846749d01b0SJeff Roberson for (i = 0; i < MAXCPU; i++) { 84780f86c9fSJeff Roberson struct kseq *ksq; 84880f86c9fSJeff Roberson 84980f86c9fSJeff Roberson ksq = &kseq_cpu[i]; 85080f86c9fSJeff Roberson ksq->ksq_assigned = NULL; 851749d01b0SJeff Roberson kseq_setup(&kseq_cpu[i]); 85280f86c9fSJeff Roberson } 85380f86c9fSJeff Roberson if (smp_topology == NULL) { 85480f86c9fSJeff Roberson struct kseq_group *ksg; 85580f86c9fSJeff Roberson struct kseq *ksq; 85680f86c9fSJeff Roberson 85780f86c9fSJeff Roberson for (i = 0; i < MAXCPU; i++) { 85880f86c9fSJeff Roberson ksq = &kseq_cpu[i]; 85980f86c9fSJeff Roberson ksg = &kseq_groups[i]; 86080f86c9fSJeff Roberson /* 86180f86c9fSJeff Roberson * Setup a kse group with one member. 86280f86c9fSJeff Roberson */ 86380f86c9fSJeff Roberson ksq->ksq_transferable = 0; 86480f86c9fSJeff Roberson ksq->ksq_group = ksg; 86580f86c9fSJeff Roberson ksg->ksg_cpus = 1; 86680f86c9fSJeff Roberson ksg->ksg_idlemask = 0; 86780f86c9fSJeff Roberson ksg->ksg_cpumask = ksg->ksg_mask = 1 << i; 868cac77d04SJeff Roberson ksg->ksg_load = 0; 86980f86c9fSJeff Roberson ksg->ksg_transferable = 0; 87080f86c9fSJeff Roberson LIST_INIT(&ksg->ksg_members); 87180f86c9fSJeff Roberson LIST_INSERT_HEAD(&ksg->ksg_members, ksq, ksq_siblings); 872749d01b0SJeff Roberson } 873749d01b0SJeff Roberson } else { 87480f86c9fSJeff Roberson struct kseq_group *ksg; 87580f86c9fSJeff Roberson struct cpu_group *cg; 876749d01b0SJeff Roberson int j; 877749d01b0SJeff Roberson 878749d01b0SJeff Roberson for (i = 0; i < smp_topology->ct_count; i++) { 879749d01b0SJeff Roberson cg = &smp_topology->ct_group[i]; 88080f86c9fSJeff Roberson ksg = &kseq_groups[i]; 88180f86c9fSJeff Roberson /* 88280f86c9fSJeff Roberson * Initialize the group. 88380f86c9fSJeff Roberson */ 88480f86c9fSJeff Roberson ksg->ksg_idlemask = 0; 885cac77d04SJeff Roberson ksg->ksg_load = 0; 88680f86c9fSJeff Roberson ksg->ksg_transferable = 0; 88780f86c9fSJeff Roberson ksg->ksg_cpus = cg->cg_count; 88880f86c9fSJeff Roberson ksg->ksg_cpumask = cg->cg_mask; 88980f86c9fSJeff Roberson LIST_INIT(&ksg->ksg_members); 89080f86c9fSJeff Roberson /* 89180f86c9fSJeff Roberson * Find all of the group members and add them. 89280f86c9fSJeff Roberson */ 89380f86c9fSJeff Roberson for (j = 0; j < MAXCPU; j++) { 89480f86c9fSJeff Roberson if ((cg->cg_mask & (1 << j)) != 0) { 89580f86c9fSJeff Roberson if (ksg->ksg_mask == 0) 89680f86c9fSJeff Roberson ksg->ksg_mask = 1 << j; 89780f86c9fSJeff Roberson kseq_cpu[j].ksq_transferable = 0; 89880f86c9fSJeff Roberson kseq_cpu[j].ksq_group = ksg; 89980f86c9fSJeff Roberson LIST_INSERT_HEAD(&ksg->ksg_members, 90080f86c9fSJeff Roberson &kseq_cpu[j], ksq_siblings); 90180f86c9fSJeff Roberson } 90280f86c9fSJeff Roberson } 903cac77d04SJeff Roberson if (ksg->ksg_cpus > 1) 904cac77d04SJeff Roberson balance_groups = 1; 905749d01b0SJeff Roberson } 906cac77d04SJeff Roberson ksg_maxid = smp_topology->ct_count - 1; 907749d01b0SJeff Roberson } 908c06eb4e2SSam Leffler callout_init(&kseq_lb_callout, CALLOUT_MPSAFE); 909cac77d04SJeff Roberson callout_init(&kseq_group_callout, CALLOUT_MPSAFE); 910155b9987SJeff Roberson sched_balance(NULL); 911cac77d04SJeff Roberson /* 912cac77d04SJeff Roberson * Stagger the group and global load balancer so they do not 913cac77d04SJeff Roberson * interfere with each other. 914cac77d04SJeff Roberson */ 915cac77d04SJeff Roberson if (balance_groups) 916cac77d04SJeff Roberson callout_reset(&kseq_group_callout, hz / 2, 917cac77d04SJeff Roberson sched_balance_groups, NULL); 918749d01b0SJeff Roberson #else 919749d01b0SJeff Roberson kseq_setup(KSEQ_SELF()); 920356500a3SJeff Roberson #endif 921749d01b0SJeff Roberson mtx_lock_spin(&sched_lock); 922155b9987SJeff Roberson kseq_load_add(KSEQ_SELF(), &kse0); 923749d01b0SJeff Roberson mtx_unlock_spin(&sched_lock); 92435e6168fSJeff Roberson } 92535e6168fSJeff Roberson 92635e6168fSJeff Roberson /* 92735e6168fSJeff Roberson * Scale the scheduling priority according to the "interactivity" of this 92835e6168fSJeff Roberson * process. 92935e6168fSJeff Roberson */ 93015dc847eSJeff Roberson static void 93135e6168fSJeff Roberson sched_priority(struct ksegrp *kg) 93235e6168fSJeff Roberson { 93335e6168fSJeff Roberson int pri; 93435e6168fSJeff Roberson 93535e6168fSJeff Roberson if (kg->kg_pri_class != PRI_TIMESHARE) 93615dc847eSJeff Roberson return; 93735e6168fSJeff Roberson 93815dc847eSJeff Roberson pri = SCHED_PRI_INTERACT(sched_interact_score(kg)); 939e1f89c22SJeff Roberson pri += SCHED_PRI_BASE; 94035e6168fSJeff Roberson pri += kg->kg_nice; 94135e6168fSJeff Roberson 94235e6168fSJeff Roberson if (pri > PRI_MAX_TIMESHARE) 94335e6168fSJeff Roberson pri = PRI_MAX_TIMESHARE; 94435e6168fSJeff Roberson else if (pri < PRI_MIN_TIMESHARE) 94535e6168fSJeff Roberson pri = PRI_MIN_TIMESHARE; 94635e6168fSJeff Roberson 94735e6168fSJeff Roberson kg->kg_user_pri = pri; 94835e6168fSJeff Roberson 94915dc847eSJeff Roberson return; 95035e6168fSJeff Roberson } 95135e6168fSJeff Roberson 95235e6168fSJeff Roberson /* 953245f3abfSJeff Roberson * Calculate a time slice based on the properties of the kseg and the runq 954a8949de2SJeff Roberson * that we're on. This is only for PRI_TIMESHARE ksegrps. 95535e6168fSJeff Roberson */ 956245f3abfSJeff Roberson static void 957245f3abfSJeff Roberson sched_slice(struct kse *ke) 95835e6168fSJeff Roberson { 95915dc847eSJeff Roberson struct kseq *kseq; 960245f3abfSJeff Roberson struct ksegrp *kg; 96135e6168fSJeff Roberson 962245f3abfSJeff Roberson kg = ke->ke_ksegrp; 96315dc847eSJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 96435e6168fSJeff Roberson 965245f3abfSJeff Roberson /* 966245f3abfSJeff Roberson * Rationale: 967245f3abfSJeff Roberson * KSEs in interactive ksegs get the minimum slice so that we 968245f3abfSJeff Roberson * quickly notice if it abuses its advantage. 969245f3abfSJeff Roberson * 970245f3abfSJeff Roberson * KSEs in non-interactive ksegs are assigned a slice that is 971245f3abfSJeff Roberson * based on the ksegs nice value relative to the least nice kseg 972245f3abfSJeff Roberson * on the run queue for this cpu. 973245f3abfSJeff Roberson * 974245f3abfSJeff Roberson * If the KSE is less nice than all others it gets the maximum 975245f3abfSJeff Roberson * slice and other KSEs will adjust their slice relative to 976245f3abfSJeff Roberson * this when they first expire. 977245f3abfSJeff Roberson * 978245f3abfSJeff Roberson * There is 20 point window that starts relative to the least 979245f3abfSJeff Roberson * nice kse on the run queue. Slice size is determined by 980245f3abfSJeff Roberson * the kse distance from the last nice ksegrp. 981245f3abfSJeff Roberson * 9827d1a81b4SJeff Roberson * If the kse is outside of the window it will get no slice 9837d1a81b4SJeff Roberson * and will be reevaluated each time it is selected on the 9847d1a81b4SJeff Roberson * run queue. The exception to this is nice 0 ksegs when 9857d1a81b4SJeff Roberson * a nice -20 is running. They are always granted a minimum 9867d1a81b4SJeff Roberson * slice. 987245f3abfSJeff Roberson */ 98815dc847eSJeff Roberson if (!SCHED_INTERACTIVE(kg)) { 989245f3abfSJeff Roberson int nice; 990245f3abfSJeff Roberson 99115dc847eSJeff Roberson nice = kg->kg_nice + (0 - kseq->ksq_nicemin); 992ef1134c9SJeff Roberson if (kseq->ksq_load_timeshare == 0 || 99315dc847eSJeff Roberson kg->kg_nice < kseq->ksq_nicemin) 994245f3abfSJeff Roberson ke->ke_slice = SCHED_SLICE_MAX; 9957d1a81b4SJeff Roberson else if (nice <= SCHED_SLICE_NTHRESH) 996245f3abfSJeff Roberson ke->ke_slice = SCHED_SLICE_NICE(nice); 9977d1a81b4SJeff Roberson else if (kg->kg_nice == 0) 9987d1a81b4SJeff Roberson ke->ke_slice = SCHED_SLICE_MIN; 999245f3abfSJeff Roberson else 1000245f3abfSJeff Roberson ke->ke_slice = 0; 1001245f3abfSJeff Roberson } else 1002245f3abfSJeff Roberson ke->ke_slice = SCHED_SLICE_MIN; 100335e6168fSJeff Roberson 100415dc847eSJeff Roberson CTR6(KTR_ULE, 100515dc847eSJeff Roberson "Sliced %p(%d) (nice: %d, nicemin: %d, load: %d, interactive: %d)", 100615dc847eSJeff Roberson ke, ke->ke_slice, kg->kg_nice, kseq->ksq_nicemin, 1007ef1134c9SJeff Roberson kseq->ksq_load_timeshare, SCHED_INTERACTIVE(kg)); 100815dc847eSJeff Roberson 1009245f3abfSJeff Roberson return; 101035e6168fSJeff Roberson } 101135e6168fSJeff Roberson 1012d322132cSJeff Roberson /* 1013d322132cSJeff Roberson * This routine enforces a maximum limit on the amount of scheduling history 1014d322132cSJeff Roberson * kept. It is called after either the slptime or runtime is adjusted. 1015d322132cSJeff Roberson * This routine will not operate correctly when slp or run times have been 1016d322132cSJeff Roberson * adjusted to more than double their maximum. 1017d322132cSJeff Roberson */ 10184b60e324SJeff Roberson static void 10194b60e324SJeff Roberson sched_interact_update(struct ksegrp *kg) 10204b60e324SJeff Roberson { 1021d322132cSJeff Roberson int sum; 10223f741ca1SJeff Roberson 1023d322132cSJeff Roberson sum = kg->kg_runtime + kg->kg_slptime; 1024d322132cSJeff Roberson if (sum < SCHED_SLP_RUN_MAX) 1025d322132cSJeff Roberson return; 1026d322132cSJeff Roberson /* 1027d322132cSJeff Roberson * If we have exceeded by more than 1/5th then the algorithm below 1028d322132cSJeff Roberson * will not bring us back into range. Dividing by two here forces 1029d322132cSJeff Roberson * us into the range of [3/5 * SCHED_INTERACT_MAX, SCHED_INTERACT_MAX] 1030d322132cSJeff Roberson */ 1031d322132cSJeff Roberson if (sum > (SCHED_INTERACT_MAX / 5) * 6) { 1032d322132cSJeff Roberson kg->kg_runtime /= 2; 1033d322132cSJeff Roberson kg->kg_slptime /= 2; 1034d322132cSJeff Roberson return; 1035d322132cSJeff Roberson } 1036d322132cSJeff Roberson kg->kg_runtime = (kg->kg_runtime / 5) * 4; 1037d322132cSJeff Roberson kg->kg_slptime = (kg->kg_slptime / 5) * 4; 1038d322132cSJeff Roberson } 1039d322132cSJeff Roberson 1040d322132cSJeff Roberson static void 1041d322132cSJeff Roberson sched_interact_fork(struct ksegrp *kg) 1042d322132cSJeff Roberson { 1043d322132cSJeff Roberson int ratio; 1044d322132cSJeff Roberson int sum; 1045d322132cSJeff Roberson 1046d322132cSJeff Roberson sum = kg->kg_runtime + kg->kg_slptime; 1047d322132cSJeff Roberson if (sum > SCHED_SLP_RUN_FORK) { 1048d322132cSJeff Roberson ratio = sum / SCHED_SLP_RUN_FORK; 1049d322132cSJeff Roberson kg->kg_runtime /= ratio; 1050d322132cSJeff Roberson kg->kg_slptime /= ratio; 10514b60e324SJeff Roberson } 10524b60e324SJeff Roberson } 10534b60e324SJeff Roberson 1054e1f89c22SJeff Roberson static int 1055e1f89c22SJeff Roberson sched_interact_score(struct ksegrp *kg) 1056e1f89c22SJeff Roberson { 1057210491d3SJeff Roberson int div; 1058e1f89c22SJeff Roberson 1059e1f89c22SJeff Roberson if (kg->kg_runtime > kg->kg_slptime) { 1060210491d3SJeff Roberson div = max(1, kg->kg_runtime / SCHED_INTERACT_HALF); 1061210491d3SJeff Roberson return (SCHED_INTERACT_HALF + 1062210491d3SJeff Roberson (SCHED_INTERACT_HALF - (kg->kg_slptime / div))); 1063210491d3SJeff Roberson } if (kg->kg_slptime > kg->kg_runtime) { 1064210491d3SJeff Roberson div = max(1, kg->kg_slptime / SCHED_INTERACT_HALF); 1065210491d3SJeff Roberson return (kg->kg_runtime / div); 1066e1f89c22SJeff Roberson } 1067e1f89c22SJeff Roberson 1068210491d3SJeff Roberson /* 1069210491d3SJeff Roberson * This can happen if slptime and runtime are 0. 1070210491d3SJeff Roberson */ 1071210491d3SJeff Roberson return (0); 1072e1f89c22SJeff Roberson 1073e1f89c22SJeff Roberson } 1074e1f89c22SJeff Roberson 107515dc847eSJeff Roberson /* 107615dc847eSJeff Roberson * This is only somewhat accurate since given many processes of the same 107715dc847eSJeff Roberson * priority they will switch when their slices run out, which will be 107815dc847eSJeff Roberson * at most SCHED_SLICE_MAX. 107915dc847eSJeff Roberson */ 108035e6168fSJeff Roberson int 108135e6168fSJeff Roberson sched_rr_interval(void) 108235e6168fSJeff Roberson { 108335e6168fSJeff Roberson return (SCHED_SLICE_MAX); 108435e6168fSJeff Roberson } 108535e6168fSJeff Roberson 108622bf7d9aSJeff Roberson static void 108735e6168fSJeff Roberson sched_pctcpu_update(struct kse *ke) 108835e6168fSJeff Roberson { 108935e6168fSJeff Roberson /* 109035e6168fSJeff Roberson * Adjust counters and watermark for pctcpu calc. 1091210491d3SJeff Roberson */ 109281de51bfSJeff Roberson if (ke->ke_ltick > ticks - SCHED_CPU_TICKS) { 1093210491d3SJeff Roberson /* 109481de51bfSJeff Roberson * Shift the tick count out so that the divide doesn't 109581de51bfSJeff Roberson * round away our results. 109665c8760dSJeff Roberson */ 109765c8760dSJeff Roberson ke->ke_ticks <<= 10; 109881de51bfSJeff Roberson ke->ke_ticks = (ke->ke_ticks / (ticks - ke->ke_ftick)) * 109935e6168fSJeff Roberson SCHED_CPU_TICKS; 110065c8760dSJeff Roberson ke->ke_ticks >>= 10; 110181de51bfSJeff Roberson } else 110281de51bfSJeff Roberson ke->ke_ticks = 0; 110335e6168fSJeff Roberson ke->ke_ltick = ticks; 110435e6168fSJeff Roberson ke->ke_ftick = ke->ke_ltick - SCHED_CPU_TICKS; 110535e6168fSJeff Roberson } 110635e6168fSJeff Roberson 110735e6168fSJeff Roberson void 110835e6168fSJeff Roberson sched_prio(struct thread *td, u_char prio) 110935e6168fSJeff Roberson { 11103f741ca1SJeff Roberson struct kse *ke; 111135e6168fSJeff Roberson 11123f741ca1SJeff Roberson ke = td->td_kse; 111335e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 111435e6168fSJeff Roberson if (TD_ON_RUNQ(td)) { 11153f741ca1SJeff Roberson /* 11163f741ca1SJeff Roberson * If the priority has been elevated due to priority 11173f741ca1SJeff Roberson * propagation, we may have to move ourselves to a new 11183f741ca1SJeff Roberson * queue. We still call adjustrunqueue below in case kse 11193f741ca1SJeff Roberson * needs to fix things up. 11203f741ca1SJeff Roberson */ 1121769a3635SJeff Roberson if (prio < td->td_priority && ke && 1122769a3635SJeff Roberson (ke->ke_flags & KEF_ASSIGNED) == 0 && 112322bf7d9aSJeff Roberson ke->ke_runq != KSEQ_CPU(ke->ke_cpu)->ksq_curr) { 11243f741ca1SJeff Roberson runq_remove(ke->ke_runq, ke); 11253f741ca1SJeff Roberson ke->ke_runq = KSEQ_CPU(ke->ke_cpu)->ksq_curr; 11263f741ca1SJeff Roberson runq_add(ke->ke_runq, ke); 112735e6168fSJeff Roberson } 11283f741ca1SJeff Roberson adjustrunqueue(td, prio); 11293f741ca1SJeff Roberson } else 11303f741ca1SJeff Roberson td->td_priority = prio; 113135e6168fSJeff Roberson } 113235e6168fSJeff Roberson 113335e6168fSJeff Roberson void 1134ae53b483SJeff Roberson sched_switch(struct thread *td) 113535e6168fSJeff Roberson { 1136ae53b483SJeff Roberson struct thread *newtd; 113735e6168fSJeff Roberson struct kse *ke; 113835e6168fSJeff Roberson 113935e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 114035e6168fSJeff Roberson 114135e6168fSJeff Roberson ke = td->td_kse; 114235e6168fSJeff Roberson 114335e6168fSJeff Roberson td->td_last_kse = ke; 1144060563ecSJulian Elischer td->td_lastcpu = td->td_oncpu; 1145060563ecSJulian Elischer td->td_oncpu = NOCPU; 11464a338afdSJulian Elischer td->td_flags &= ~TDF_NEEDRESCHED; 114735e6168fSJeff Roberson 1148b11fdad0SJeff Roberson /* 1149b11fdad0SJeff Roberson * If the KSE has been assigned it may be in the process of switching 1150b11fdad0SJeff Roberson * to the new cpu. This is the case in sched_bind(). 1151b11fdad0SJeff Roberson */ 1152b11fdad0SJeff Roberson if ((ke->ke_flags & KEF_ASSIGNED) == 0) { 115335e6168fSJeff Roberson if (TD_IS_RUNNING(td)) { 1154ab2baa72SDavid Xu if (td->td_proc->p_flag & P_SA) { 1155155b9987SJeff Roberson kseq_load_rem(KSEQ_CPU(ke->ke_cpu), ke); 1156ab2baa72SDavid Xu setrunqueue(td); 115780f86c9fSJeff Roberson } else 1158155b9987SJeff Roberson kseq_runq_add(KSEQ_SELF(), ke); 11590e0f6266SJeff Roberson } else { 11600e0f6266SJeff Roberson if (ke->ke_runq) 1161155b9987SJeff Roberson kseq_load_rem(KSEQ_CPU(ke->ke_cpu), ke); 116235e6168fSJeff Roberson /* 116335e6168fSJeff Roberson * We will not be on the run queue. So we must be 116435e6168fSJeff Roberson * sleeping or similar. 116535e6168fSJeff Roberson */ 11660e2a4d3aSDavid Xu if (td->td_proc->p_flag & P_SA) 116735e6168fSJeff Roberson kse_reassign(ke); 11680e0f6266SJeff Roberson } 1169b11fdad0SJeff Roberson } 1170ae53b483SJeff Roberson newtd = choosethread(); 1171ae53b483SJeff Roberson if (td != newtd) 1172ae53b483SJeff Roberson cpu_switch(td, newtd); 1173ae53b483SJeff Roberson sched_lock.mtx_lock = (uintptr_t)td; 117435e6168fSJeff Roberson 1175060563ecSJulian Elischer td->td_oncpu = PCPU_GET(cpuid); 117635e6168fSJeff Roberson } 117735e6168fSJeff Roberson 117835e6168fSJeff Roberson void 117935e6168fSJeff Roberson sched_nice(struct ksegrp *kg, int nice) 118035e6168fSJeff Roberson { 118115dc847eSJeff Roberson struct kse *ke; 118235e6168fSJeff Roberson struct thread *td; 118315dc847eSJeff Roberson struct kseq *kseq; 118435e6168fSJeff Roberson 11850b5318c8SJohn Baldwin PROC_LOCK_ASSERT(kg->kg_proc, MA_OWNED); 11860b5318c8SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 118715dc847eSJeff Roberson /* 118815dc847eSJeff Roberson * We need to adjust the nice counts for running KSEs. 118915dc847eSJeff Roberson */ 119015dc847eSJeff Roberson if (kg->kg_pri_class == PRI_TIMESHARE) 119115dc847eSJeff Roberson FOREACH_KSE_IN_GROUP(kg, ke) { 1192d07ac847SJeff Roberson if (ke->ke_runq == NULL) 119315dc847eSJeff Roberson continue; 119415dc847eSJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 119515dc847eSJeff Roberson kseq_nice_rem(kseq, kg->kg_nice); 119615dc847eSJeff Roberson kseq_nice_add(kseq, nice); 119715dc847eSJeff Roberson } 119835e6168fSJeff Roberson kg->kg_nice = nice; 119935e6168fSJeff Roberson sched_priority(kg); 120015dc847eSJeff Roberson FOREACH_THREAD_IN_GROUP(kg, td) 12014a338afdSJulian Elischer td->td_flags |= TDF_NEEDRESCHED; 120235e6168fSJeff Roberson } 120335e6168fSJeff Roberson 120435e6168fSJeff Roberson void 120535e6168fSJeff Roberson sched_sleep(struct thread *td, u_char prio) 120635e6168fSJeff Roberson { 120735e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 120835e6168fSJeff Roberson 120935e6168fSJeff Roberson td->td_slptime = ticks; 121035e6168fSJeff Roberson td->td_priority = prio; 121135e6168fSJeff Roberson 121215dc847eSJeff Roberson CTR2(KTR_ULE, "sleep kse %p (tick: %d)", 121315dc847eSJeff Roberson td->td_kse, td->td_slptime); 121435e6168fSJeff Roberson } 121535e6168fSJeff Roberson 121635e6168fSJeff Roberson void 121735e6168fSJeff Roberson sched_wakeup(struct thread *td) 121835e6168fSJeff Roberson { 121935e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 122035e6168fSJeff Roberson 122135e6168fSJeff Roberson /* 122235e6168fSJeff Roberson * Let the kseg know how long we slept for. This is because process 122335e6168fSJeff Roberson * interactivity behavior is modeled in the kseg. 122435e6168fSJeff Roberson */ 122535e6168fSJeff Roberson if (td->td_slptime) { 1226f1e8dc4aSJeff Roberson struct ksegrp *kg; 122715dc847eSJeff Roberson int hzticks; 1228f1e8dc4aSJeff Roberson 1229f1e8dc4aSJeff Roberson kg = td->td_ksegrp; 1230d322132cSJeff Roberson hzticks = (ticks - td->td_slptime) << 10; 1231d322132cSJeff Roberson if (hzticks >= SCHED_SLP_RUN_MAX) { 1232d322132cSJeff Roberson kg->kg_slptime = SCHED_SLP_RUN_MAX; 1233d322132cSJeff Roberson kg->kg_runtime = 1; 1234d322132cSJeff Roberson } else { 1235d322132cSJeff Roberson kg->kg_slptime += hzticks; 12364b60e324SJeff Roberson sched_interact_update(kg); 1237d322132cSJeff Roberson } 1238f1e8dc4aSJeff Roberson sched_priority(kg); 12394b60e324SJeff Roberson if (td->td_kse) 12404b60e324SJeff Roberson sched_slice(td->td_kse); 124115dc847eSJeff Roberson CTR2(KTR_ULE, "wakeup kse %p (%d ticks)", 124215dc847eSJeff Roberson td->td_kse, hzticks); 124335e6168fSJeff Roberson td->td_slptime = 0; 1244f1e8dc4aSJeff Roberson } 124535e6168fSJeff Roberson setrunqueue(td); 124635e6168fSJeff Roberson } 124735e6168fSJeff Roberson 124835e6168fSJeff Roberson /* 124935e6168fSJeff Roberson * Penalize the parent for creating a new child and initialize the child's 125035e6168fSJeff Roberson * priority. 125135e6168fSJeff Roberson */ 125235e6168fSJeff Roberson void 125315dc847eSJeff Roberson sched_fork(struct proc *p, struct proc *p1) 125435e6168fSJeff Roberson { 125535e6168fSJeff Roberson 125635e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 125735e6168fSJeff Roberson 125815dc847eSJeff Roberson sched_fork_ksegrp(FIRST_KSEGRP_IN_PROC(p), FIRST_KSEGRP_IN_PROC(p1)); 125915dc847eSJeff Roberson sched_fork_kse(FIRST_KSE_IN_PROC(p), FIRST_KSE_IN_PROC(p1)); 126015dc847eSJeff Roberson sched_fork_thread(FIRST_THREAD_IN_PROC(p), FIRST_THREAD_IN_PROC(p1)); 126115dc847eSJeff Roberson } 126215dc847eSJeff Roberson 126315dc847eSJeff Roberson void 126415dc847eSJeff Roberson sched_fork_kse(struct kse *ke, struct kse *child) 126515dc847eSJeff Roberson { 12662056d0a1SJohn Baldwin 1267210491d3SJeff Roberson child->ke_slice = 1; /* Attempt to quickly learn interactivity. */ 1268093c05e3SJeff Roberson child->ke_cpu = ke->ke_cpu; 126915dc847eSJeff Roberson child->ke_runq = NULL; 127015dc847eSJeff Roberson 1271736c97c7SJeff Roberson /* Grab our parents cpu estimation information. */ 1272736c97c7SJeff Roberson child->ke_ticks = ke->ke_ticks; 1273736c97c7SJeff Roberson child->ke_ltick = ke->ke_ltick; 1274736c97c7SJeff Roberson child->ke_ftick = ke->ke_ftick; 127515dc847eSJeff Roberson } 127615dc847eSJeff Roberson 127715dc847eSJeff Roberson void 127815dc847eSJeff Roberson sched_fork_ksegrp(struct ksegrp *kg, struct ksegrp *child) 127915dc847eSJeff Roberson { 12802056d0a1SJohn Baldwin PROC_LOCK_ASSERT(child->kg_proc, MA_OWNED); 1281210491d3SJeff Roberson 1282d322132cSJeff Roberson child->kg_slptime = kg->kg_slptime; 1283d322132cSJeff Roberson child->kg_runtime = kg->kg_runtime; 1284d322132cSJeff Roberson child->kg_user_pri = kg->kg_user_pri; 1285d322132cSJeff Roberson child->kg_nice = kg->kg_nice; 1286d322132cSJeff Roberson sched_interact_fork(child); 12874b60e324SJeff Roberson kg->kg_runtime += tickincr << 10; 12884b60e324SJeff Roberson sched_interact_update(kg); 128915dc847eSJeff Roberson 1290d322132cSJeff Roberson CTR6(KTR_ULE, "sched_fork_ksegrp: %d(%d, %d) - %d(%d, %d)", 1291d322132cSJeff Roberson kg->kg_proc->p_pid, kg->kg_slptime, kg->kg_runtime, 1292d322132cSJeff Roberson child->kg_proc->p_pid, child->kg_slptime, child->kg_runtime); 1293c9f25d8fSJeff Roberson } 1294c9f25d8fSJeff Roberson 129515dc847eSJeff Roberson void 129615dc847eSJeff Roberson sched_fork_thread(struct thread *td, struct thread *child) 129715dc847eSJeff Roberson { 129815dc847eSJeff Roberson } 129915dc847eSJeff Roberson 130015dc847eSJeff Roberson void 130115dc847eSJeff Roberson sched_class(struct ksegrp *kg, int class) 130215dc847eSJeff Roberson { 130315dc847eSJeff Roberson struct kseq *kseq; 130415dc847eSJeff Roberson struct kse *ke; 1305ef1134c9SJeff Roberson int nclass; 1306ef1134c9SJeff Roberson int oclass; 130715dc847eSJeff Roberson 13082056d0a1SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 130915dc847eSJeff Roberson if (kg->kg_pri_class == class) 131015dc847eSJeff Roberson return; 131115dc847eSJeff Roberson 1312ef1134c9SJeff Roberson nclass = PRI_BASE(class); 1313ef1134c9SJeff Roberson oclass = PRI_BASE(kg->kg_pri_class); 131415dc847eSJeff Roberson FOREACH_KSE_IN_GROUP(kg, ke) { 131515dc847eSJeff Roberson if (ke->ke_state != KES_ONRUNQ && 131615dc847eSJeff Roberson ke->ke_state != KES_THREAD) 131715dc847eSJeff Roberson continue; 131815dc847eSJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 131915dc847eSJeff Roberson 1320ef1134c9SJeff Roberson #ifdef SMP 1321155b9987SJeff Roberson /* 1322155b9987SJeff Roberson * On SMP if we're on the RUNQ we must adjust the transferable 1323155b9987SJeff Roberson * count because could be changing to or from an interrupt 1324155b9987SJeff Roberson * class. 1325155b9987SJeff Roberson */ 1326155b9987SJeff Roberson if (ke->ke_state == KES_ONRUNQ) { 132780f86c9fSJeff Roberson if (KSE_CAN_MIGRATE(ke, oclass)) { 132880f86c9fSJeff Roberson kseq->ksq_transferable--; 132980f86c9fSJeff Roberson kseq->ksq_group->ksg_transferable--; 133080f86c9fSJeff Roberson } 133180f86c9fSJeff Roberson if (KSE_CAN_MIGRATE(ke, nclass)) { 133280f86c9fSJeff Roberson kseq->ksq_transferable++; 133380f86c9fSJeff Roberson kseq->ksq_group->ksg_transferable++; 133480f86c9fSJeff Roberson } 1335155b9987SJeff Roberson } 1336ef1134c9SJeff Roberson #endif 1337155b9987SJeff Roberson if (oclass == PRI_TIMESHARE) { 1338ef1134c9SJeff Roberson kseq->ksq_load_timeshare--; 133915dc847eSJeff Roberson kseq_nice_rem(kseq, kg->kg_nice); 1340155b9987SJeff Roberson } 1341155b9987SJeff Roberson if (nclass == PRI_TIMESHARE) { 1342155b9987SJeff Roberson kseq->ksq_load_timeshare++; 134315dc847eSJeff Roberson kseq_nice_add(kseq, kg->kg_nice); 134415dc847eSJeff Roberson } 1345155b9987SJeff Roberson } 134615dc847eSJeff Roberson 134715dc847eSJeff Roberson kg->kg_pri_class = class; 134835e6168fSJeff Roberson } 134935e6168fSJeff Roberson 135035e6168fSJeff Roberson /* 135135e6168fSJeff Roberson * Return some of the child's priority and interactivity to the parent. 135235e6168fSJeff Roberson */ 135335e6168fSJeff Roberson void 135415dc847eSJeff Roberson sched_exit(struct proc *p, struct proc *child) 135535e6168fSJeff Roberson { 135635e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1357141ad61cSJeff Roberson sched_exit_kse(FIRST_KSE_IN_PROC(p), FIRST_KSE_IN_PROC(child)); 1358210491d3SJeff Roberson sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), FIRST_KSEGRP_IN_PROC(child)); 1359141ad61cSJeff Roberson } 1360141ad61cSJeff Roberson 1361141ad61cSJeff Roberson void 1362141ad61cSJeff Roberson sched_exit_kse(struct kse *ke, struct kse *child) 1363141ad61cSJeff Roberson { 1364155b9987SJeff Roberson kseq_load_rem(KSEQ_CPU(child->ke_cpu), child); 1365141ad61cSJeff Roberson } 1366141ad61cSJeff Roberson 1367141ad61cSJeff Roberson void 1368141ad61cSJeff Roberson sched_exit_ksegrp(struct ksegrp *kg, struct ksegrp *child) 1369141ad61cSJeff Roberson { 13704b60e324SJeff Roberson /* kg->kg_slptime += child->kg_slptime; */ 1371210491d3SJeff Roberson kg->kg_runtime += child->kg_runtime; 13724b60e324SJeff Roberson sched_interact_update(kg); 1373141ad61cSJeff Roberson } 1374141ad61cSJeff Roberson 1375141ad61cSJeff Roberson void 1376141ad61cSJeff Roberson sched_exit_thread(struct thread *td, struct thread *child) 1377141ad61cSJeff Roberson { 137835e6168fSJeff Roberson } 137935e6168fSJeff Roberson 138035e6168fSJeff Roberson void 13817cf90fb3SJeff Roberson sched_clock(struct thread *td) 138235e6168fSJeff Roberson { 138335e6168fSJeff Roberson struct kseq *kseq; 13840a016a05SJeff Roberson struct ksegrp *kg; 13857cf90fb3SJeff Roberson struct kse *ke; 138635e6168fSJeff Roberson 138715dc847eSJeff Roberson /* 138815dc847eSJeff Roberson * sched_setup() apparently happens prior to stathz being set. We 138915dc847eSJeff Roberson * need to resolve the timers earlier in the boot so we can avoid 139015dc847eSJeff Roberson * calculating this here. 139115dc847eSJeff Roberson */ 139215dc847eSJeff Roberson if (realstathz == 0) { 139315dc847eSJeff Roberson realstathz = stathz ? stathz : hz; 139415dc847eSJeff Roberson tickincr = hz / realstathz; 139515dc847eSJeff Roberson /* 139615dc847eSJeff Roberson * XXX This does not work for values of stathz that are much 139715dc847eSJeff Roberson * larger than hz. 139815dc847eSJeff Roberson */ 139915dc847eSJeff Roberson if (tickincr == 0) 140015dc847eSJeff Roberson tickincr = 1; 140115dc847eSJeff Roberson } 140235e6168fSJeff Roberson 14037cf90fb3SJeff Roberson ke = td->td_kse; 140415dc847eSJeff Roberson kg = ke->ke_ksegrp; 140535e6168fSJeff Roberson 14060a016a05SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 14070a016a05SJeff Roberson KASSERT((td != NULL), ("schedclock: null thread pointer")); 14080a016a05SJeff Roberson 14090a016a05SJeff Roberson /* Adjust ticks for pctcpu */ 141065c8760dSJeff Roberson ke->ke_ticks++; 1411d465fb95SJeff Roberson ke->ke_ltick = ticks; 1412a8949de2SJeff Roberson 1413d465fb95SJeff Roberson /* Go up to one second beyond our max and then trim back down */ 1414d465fb95SJeff Roberson if (ke->ke_ftick + SCHED_CPU_TICKS + hz < ke->ke_ltick) 1415d465fb95SJeff Roberson sched_pctcpu_update(ke); 1416d465fb95SJeff Roberson 141743fdafb1SJulian Elischer if (td->td_flags & TDF_IDLETD) 141835e6168fSJeff Roberson return; 14190a016a05SJeff Roberson 142015dc847eSJeff Roberson CTR4(KTR_ULE, "Tick kse %p (slice: %d, slptime: %d, runtime: %d)", 142115dc847eSJeff Roberson ke, ke->ke_slice, kg->kg_slptime >> 10, kg->kg_runtime >> 10); 14223f741ca1SJeff Roberson /* 1423a8949de2SJeff Roberson * We only do slicing code for TIMESHARE ksegrps. 1424a8949de2SJeff Roberson */ 1425a8949de2SJeff Roberson if (kg->kg_pri_class != PRI_TIMESHARE) 1426a8949de2SJeff Roberson return; 1427a8949de2SJeff Roberson /* 142815dc847eSJeff Roberson * We used a tick charge it to the ksegrp so that we can compute our 142915dc847eSJeff Roberson * interactivity. 143015dc847eSJeff Roberson */ 143115dc847eSJeff Roberson kg->kg_runtime += tickincr << 10; 14324b60e324SJeff Roberson sched_interact_update(kg); 1433407b0157SJeff Roberson 143435e6168fSJeff Roberson /* 143535e6168fSJeff Roberson * We used up one time slice. 143635e6168fSJeff Roberson */ 1437093c05e3SJeff Roberson if (--ke->ke_slice > 0) 143815dc847eSJeff Roberson return; 143935e6168fSJeff Roberson /* 144015dc847eSJeff Roberson * We're out of time, recompute priorities and requeue. 144135e6168fSJeff Roberson */ 1442093c05e3SJeff Roberson kseq = KSEQ_SELF(); 1443155b9987SJeff Roberson kseq_load_rem(kseq, ke); 1444e1f89c22SJeff Roberson sched_priority(kg); 144515dc847eSJeff Roberson sched_slice(ke); 144615dc847eSJeff Roberson if (SCHED_CURR(kg, ke)) 144715dc847eSJeff Roberson ke->ke_runq = kseq->ksq_curr; 144815dc847eSJeff Roberson else 144915dc847eSJeff Roberson ke->ke_runq = kseq->ksq_next; 1450155b9987SJeff Roberson kseq_load_add(kseq, ke); 14514a338afdSJulian Elischer td->td_flags |= TDF_NEEDRESCHED; 145235e6168fSJeff Roberson } 145335e6168fSJeff Roberson 145435e6168fSJeff Roberson int 145535e6168fSJeff Roberson sched_runnable(void) 145635e6168fSJeff Roberson { 145735e6168fSJeff Roberson struct kseq *kseq; 1458b90816f1SJeff Roberson int load; 145935e6168fSJeff Roberson 1460b90816f1SJeff Roberson load = 1; 1461b90816f1SJeff Roberson 14620a016a05SJeff Roberson kseq = KSEQ_SELF(); 146322bf7d9aSJeff Roberson #ifdef SMP 146446f8b265SJeff Roberson if (kseq->ksq_assigned) { 146546f8b265SJeff Roberson mtx_lock_spin(&sched_lock); 146622bf7d9aSJeff Roberson kseq_assign(kseq); 146746f8b265SJeff Roberson mtx_unlock_spin(&sched_lock); 146846f8b265SJeff Roberson } 146922bf7d9aSJeff Roberson #endif 14703f741ca1SJeff Roberson if ((curthread->td_flags & TDF_IDLETD) != 0) { 14713f741ca1SJeff Roberson if (kseq->ksq_load > 0) 14723f741ca1SJeff Roberson goto out; 14733f741ca1SJeff Roberson } else 14743f741ca1SJeff Roberson if (kseq->ksq_load - 1 > 0) 1475b90816f1SJeff Roberson goto out; 1476b90816f1SJeff Roberson load = 0; 1477b90816f1SJeff Roberson out: 1478b90816f1SJeff Roberson return (load); 147935e6168fSJeff Roberson } 148035e6168fSJeff Roberson 148135e6168fSJeff Roberson void 148235e6168fSJeff Roberson sched_userret(struct thread *td) 148335e6168fSJeff Roberson { 148435e6168fSJeff Roberson struct ksegrp *kg; 148535e6168fSJeff Roberson 148635e6168fSJeff Roberson kg = td->td_ksegrp; 148735e6168fSJeff Roberson 148835e6168fSJeff Roberson if (td->td_priority != kg->kg_user_pri) { 148935e6168fSJeff Roberson mtx_lock_spin(&sched_lock); 149035e6168fSJeff Roberson td->td_priority = kg->kg_user_pri; 149135e6168fSJeff Roberson mtx_unlock_spin(&sched_lock); 149235e6168fSJeff Roberson } 149335e6168fSJeff Roberson } 149435e6168fSJeff Roberson 1495c9f25d8fSJeff Roberson struct kse * 1496c9f25d8fSJeff Roberson sched_choose(void) 1497c9f25d8fSJeff Roberson { 14980a016a05SJeff Roberson struct kseq *kseq; 1499c9f25d8fSJeff Roberson struct kse *ke; 150015dc847eSJeff Roberson 1501b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 150222bf7d9aSJeff Roberson kseq = KSEQ_SELF(); 150315dc847eSJeff Roberson #ifdef SMP 150480f86c9fSJeff Roberson restart: 150522bf7d9aSJeff Roberson if (kseq->ksq_assigned) 150622bf7d9aSJeff Roberson kseq_assign(kseq); 150715dc847eSJeff Roberson #endif 150822bf7d9aSJeff Roberson ke = kseq_choose(kseq); 150935e6168fSJeff Roberson if (ke) { 151022bf7d9aSJeff Roberson #ifdef SMP 151122bf7d9aSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_IDLE) 151280f86c9fSJeff Roberson if (kseq_idled(kseq) == 0) 151380f86c9fSJeff Roberson goto restart; 151422bf7d9aSJeff Roberson #endif 1515155b9987SJeff Roberson kseq_runq_rem(kseq, ke); 151635e6168fSJeff Roberson ke->ke_state = KES_THREAD; 1517245f3abfSJeff Roberson 151815dc847eSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) { 151915dc847eSJeff Roberson CTR4(KTR_ULE, "Run kse %p from %p (slice: %d, pri: %d)", 152015dc847eSJeff Roberson ke, ke->ke_runq, ke->ke_slice, 152115dc847eSJeff Roberson ke->ke_thread->td_priority); 1522245f3abfSJeff Roberson } 152315dc847eSJeff Roberson return (ke); 152435e6168fSJeff Roberson } 1525c9f25d8fSJeff Roberson #ifdef SMP 152680f86c9fSJeff Roberson if (kseq_idled(kseq) == 0) 152780f86c9fSJeff Roberson goto restart; 1528c9f25d8fSJeff Roberson #endif 152915dc847eSJeff Roberson return (NULL); 153035e6168fSJeff Roberson } 153135e6168fSJeff Roberson 153235e6168fSJeff Roberson void 15337cf90fb3SJeff Roberson sched_add(struct thread *td) 153435e6168fSJeff Roberson { 1535c9f25d8fSJeff Roberson struct kseq *kseq; 153615dc847eSJeff Roberson struct ksegrp *kg; 15377cf90fb3SJeff Roberson struct kse *ke; 153822bf7d9aSJeff Roberson int class; 1539c9f25d8fSJeff Roberson 154022bf7d9aSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 15417cf90fb3SJeff Roberson ke = td->td_kse; 15427cf90fb3SJeff Roberson kg = td->td_ksegrp; 154322bf7d9aSJeff Roberson if (ke->ke_flags & KEF_ASSIGNED) 154422bf7d9aSJeff Roberson return; 154522bf7d9aSJeff Roberson kseq = KSEQ_SELF(); 15465d7ef00cSJeff Roberson KASSERT((ke->ke_thread != NULL), ("sched_add: No thread on KSE")); 15475d7ef00cSJeff Roberson KASSERT((ke->ke_thread->td_kse != NULL), 15485d7ef00cSJeff Roberson ("sched_add: No KSE on thread")); 15495d7ef00cSJeff Roberson KASSERT(ke->ke_state != KES_ONRUNQ, 15505d7ef00cSJeff Roberson ("sched_add: kse %p (%s) already in run queue", ke, 15515d7ef00cSJeff Roberson ke->ke_proc->p_comm)); 15525d7ef00cSJeff Roberson KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 15535d7ef00cSJeff Roberson ("sched_add: process swapped out")); 15549bca28a7SJeff Roberson KASSERT(ke->ke_runq == NULL, 15559bca28a7SJeff Roberson ("sched_add: KSE %p is still assigned to a run queue", ke)); 15565d7ef00cSJeff Roberson 155722bf7d9aSJeff Roberson class = PRI_BASE(kg->kg_pri_class); 155822bf7d9aSJeff Roberson switch (class) { 1559a8949de2SJeff Roberson case PRI_ITHD: 1560a8949de2SJeff Roberson case PRI_REALTIME: 156115dc847eSJeff Roberson ke->ke_runq = kseq->ksq_curr; 156215dc847eSJeff Roberson ke->ke_slice = SCHED_SLICE_MAX; 15637cd650a9SJeff Roberson ke->ke_cpu = PCPU_GET(cpuid); 1564a8949de2SJeff Roberson break; 1565a8949de2SJeff Roberson case PRI_TIMESHARE: 156615dc847eSJeff Roberson if (SCHED_CURR(kg, ke)) 156715dc847eSJeff Roberson ke->ke_runq = kseq->ksq_curr; 156815dc847eSJeff Roberson else 156915dc847eSJeff Roberson ke->ke_runq = kseq->ksq_next; 157015dc847eSJeff Roberson break; 157115dc847eSJeff Roberson case PRI_IDLE: 157215dc847eSJeff Roberson /* 157315dc847eSJeff Roberson * This is for priority prop. 157415dc847eSJeff Roberson */ 15753f741ca1SJeff Roberson if (ke->ke_thread->td_priority < PRI_MIN_IDLE) 157615dc847eSJeff Roberson ke->ke_runq = kseq->ksq_curr; 157715dc847eSJeff Roberson else 157815dc847eSJeff Roberson ke->ke_runq = &kseq->ksq_idle; 157915dc847eSJeff Roberson ke->ke_slice = SCHED_SLICE_MIN; 158015dc847eSJeff Roberson break; 158115dc847eSJeff Roberson default: 1582d322132cSJeff Roberson panic("Unknown pri class."); 1583a8949de2SJeff Roberson break; 1584a6ed4186SJeff Roberson } 158522bf7d9aSJeff Roberson #ifdef SMP 158680f86c9fSJeff Roberson if (ke->ke_cpu != PCPU_GET(cpuid)) { 158780f86c9fSJeff Roberson kseq_notify(ke, ke->ke_cpu); 158880f86c9fSJeff Roberson return; 158980f86c9fSJeff Roberson } 159022bf7d9aSJeff Roberson /* 159180f86c9fSJeff Roberson * If there are any idle groups, give them our extra load. The 1592155b9987SJeff Roberson * threshold at which we start to reassign kses has a large impact 1593155b9987SJeff Roberson * on the overall performance of the system. Tuned too high and 1594155b9987SJeff Roberson * some CPUs may idle. Too low and there will be excess migration 1595155b9987SJeff Roberson * and context swiches. 159622bf7d9aSJeff Roberson */ 159780f86c9fSJeff Roberson if (kseq->ksq_load > 1 && KSE_CAN_MIGRATE(ke, class)) 159880f86c9fSJeff Roberson if (kseq_transfer(kseq, ke, class)) 159922bf7d9aSJeff Roberson return; 160080f86c9fSJeff Roberson if ((class == PRI_TIMESHARE || class == PRI_REALTIME) && 160180f86c9fSJeff Roberson (kseq->ksq_group->ksg_idlemask & PCPU_GET(cpumask)) != 0) { 160280f86c9fSJeff Roberson /* 160380f86c9fSJeff Roberson * Check to see if our group is unidling, and if so, remove it 160480f86c9fSJeff Roberson * from the global idle mask. 160580f86c9fSJeff Roberson */ 160680f86c9fSJeff Roberson if (kseq->ksq_group->ksg_idlemask == 160780f86c9fSJeff Roberson kseq->ksq_group->ksg_cpumask) 160880f86c9fSJeff Roberson atomic_clear_int(&kseq_idle, kseq->ksq_group->ksg_mask); 160980f86c9fSJeff Roberson /* 161080f86c9fSJeff Roberson * Now remove ourselves from the group specific idle mask. 161180f86c9fSJeff Roberson */ 161280f86c9fSJeff Roberson kseq->ksq_group->ksg_idlemask &= ~PCPU_GET(cpumask); 1613112b6d3aSJeff Roberson } 161422bf7d9aSJeff Roberson #endif 161522bf7d9aSJeff Roberson if (td->td_priority < curthread->td_priority) 161622bf7d9aSJeff Roberson curthread->td_flags |= TDF_NEEDRESCHED; 1617a8949de2SJeff Roberson 161835e6168fSJeff Roberson ke->ke_ksegrp->kg_runq_kses++; 161935e6168fSJeff Roberson ke->ke_state = KES_ONRUNQ; 162035e6168fSJeff Roberson 1621155b9987SJeff Roberson kseq_runq_add(kseq, ke); 1622155b9987SJeff Roberson kseq_load_add(kseq, ke); 162335e6168fSJeff Roberson } 162435e6168fSJeff Roberson 162535e6168fSJeff Roberson void 16267cf90fb3SJeff Roberson sched_rem(struct thread *td) 162735e6168fSJeff Roberson { 162815dc847eSJeff Roberson struct kseq *kseq; 16297cf90fb3SJeff Roberson struct kse *ke; 16307cf90fb3SJeff Roberson 16317cf90fb3SJeff Roberson ke = td->td_kse; 163222bf7d9aSJeff Roberson /* 163322bf7d9aSJeff Roberson * It is safe to just return here because sched_rem() is only ever 163422bf7d9aSJeff Roberson * used in places where we're immediately going to add the 163522bf7d9aSJeff Roberson * kse back on again. In that case it'll be added with the correct 163622bf7d9aSJeff Roberson * thread and priority when the caller drops the sched_lock. 163722bf7d9aSJeff Roberson */ 163822bf7d9aSJeff Roberson if (ke->ke_flags & KEF_ASSIGNED) 163922bf7d9aSJeff Roberson return; 164035e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 16419bca28a7SJeff Roberson KASSERT((ke->ke_state == KES_ONRUNQ), ("KSE not on run queue")); 164235e6168fSJeff Roberson 164335e6168fSJeff Roberson ke->ke_state = KES_THREAD; 164435e6168fSJeff Roberson ke->ke_ksegrp->kg_runq_kses--; 164515dc847eSJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 1646155b9987SJeff Roberson kseq_runq_rem(kseq, ke); 1647155b9987SJeff Roberson kseq_load_rem(kseq, ke); 164835e6168fSJeff Roberson } 164935e6168fSJeff Roberson 165035e6168fSJeff Roberson fixpt_t 16517cf90fb3SJeff Roberson sched_pctcpu(struct thread *td) 165235e6168fSJeff Roberson { 165335e6168fSJeff Roberson fixpt_t pctcpu; 16547cf90fb3SJeff Roberson struct kse *ke; 165535e6168fSJeff Roberson 165635e6168fSJeff Roberson pctcpu = 0; 16577cf90fb3SJeff Roberson ke = td->td_kse; 1658484288deSJeff Roberson if (ke == NULL) 1659484288deSJeff Roberson return (0); 166035e6168fSJeff Roberson 1661b90816f1SJeff Roberson mtx_lock_spin(&sched_lock); 166235e6168fSJeff Roberson if (ke->ke_ticks) { 166335e6168fSJeff Roberson int rtick; 166435e6168fSJeff Roberson 1665210491d3SJeff Roberson /* 1666210491d3SJeff Roberson * Don't update more frequently than twice a second. Allowing 1667210491d3SJeff Roberson * this causes the cpu usage to decay away too quickly due to 1668210491d3SJeff Roberson * rounding errors. 1669210491d3SJeff Roberson */ 16702e227f04SJeff Roberson if (ke->ke_ftick + SCHED_CPU_TICKS < ke->ke_ltick || 16712e227f04SJeff Roberson ke->ke_ltick < (ticks - (hz / 2))) 167235e6168fSJeff Roberson sched_pctcpu_update(ke); 167335e6168fSJeff Roberson /* How many rtick per second ? */ 1674210491d3SJeff Roberson rtick = min(ke->ke_ticks / SCHED_CPU_TIME, SCHED_CPU_TICKS); 16757121cce5SScott Long pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT; 167635e6168fSJeff Roberson } 167735e6168fSJeff Roberson 167835e6168fSJeff Roberson ke->ke_proc->p_swtime = ke->ke_ltick - ke->ke_ftick; 1679828e7683SJohn Baldwin mtx_unlock_spin(&sched_lock); 168035e6168fSJeff Roberson 168135e6168fSJeff Roberson return (pctcpu); 168235e6168fSJeff Roberson } 168335e6168fSJeff Roberson 16849bacd788SJeff Roberson void 16859bacd788SJeff Roberson sched_bind(struct thread *td, int cpu) 16869bacd788SJeff Roberson { 16879bacd788SJeff Roberson struct kse *ke; 16889bacd788SJeff Roberson 16899bacd788SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 16909bacd788SJeff Roberson ke = td->td_kse; 16919bacd788SJeff Roberson ke->ke_flags |= KEF_BOUND; 169280f86c9fSJeff Roberson #ifdef SMP 169380f86c9fSJeff Roberson if (PCPU_GET(cpuid) == cpu) 16949bacd788SJeff Roberson return; 16959bacd788SJeff Roberson /* sched_rem without the runq_remove */ 16969bacd788SJeff Roberson ke->ke_state = KES_THREAD; 16979bacd788SJeff Roberson ke->ke_ksegrp->kg_runq_kses--; 1698155b9987SJeff Roberson kseq_load_rem(KSEQ_CPU(ke->ke_cpu), ke); 16999bacd788SJeff Roberson ke->ke_cpu = cpu; 17009bacd788SJeff Roberson kseq_notify(ke, cpu); 17019bacd788SJeff Roberson /* When we return from mi_switch we'll be on the correct cpu. */ 17029bacd788SJeff Roberson td->td_proc->p_stats->p_ru.ru_nvcsw++; 17039bacd788SJeff Roberson mi_switch(); 17049bacd788SJeff Roberson #endif 17059bacd788SJeff Roberson } 17069bacd788SJeff Roberson 17079bacd788SJeff Roberson void 17089bacd788SJeff Roberson sched_unbind(struct thread *td) 17099bacd788SJeff Roberson { 17109bacd788SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 17119bacd788SJeff Roberson td->td_kse->ke_flags &= ~KEF_BOUND; 17129bacd788SJeff Roberson } 17139bacd788SJeff Roberson 171435e6168fSJeff Roberson int 171535e6168fSJeff Roberson sched_sizeof_kse(void) 171635e6168fSJeff Roberson { 171735e6168fSJeff Roberson return (sizeof(struct kse) + sizeof(struct ke_sched)); 171835e6168fSJeff Roberson } 171935e6168fSJeff Roberson 172035e6168fSJeff Roberson int 172135e6168fSJeff Roberson sched_sizeof_ksegrp(void) 172235e6168fSJeff Roberson { 172335e6168fSJeff Roberson return (sizeof(struct ksegrp) + sizeof(struct kg_sched)); 172435e6168fSJeff Roberson } 172535e6168fSJeff Roberson 172635e6168fSJeff Roberson int 172735e6168fSJeff Roberson sched_sizeof_proc(void) 172835e6168fSJeff Roberson { 172935e6168fSJeff Roberson return (sizeof(struct proc)); 173035e6168fSJeff Roberson } 173135e6168fSJeff Roberson 173235e6168fSJeff Roberson int 173335e6168fSJeff Roberson sched_sizeof_thread(void) 173435e6168fSJeff Roberson { 173535e6168fSJeff Roberson return (sizeof(struct thread) + sizeof(struct td_sched)); 173635e6168fSJeff Roberson } 1737