135e6168fSJeff Roberson /*- 215dc847eSJeff Roberson * Copyright (c) 2002-2003, Jeffrey Roberson <jeff@freebsd.org> 335e6168fSJeff Roberson * All rights reserved. 435e6168fSJeff Roberson * 535e6168fSJeff Roberson * Redistribution and use in source and binary forms, with or without 635e6168fSJeff Roberson * modification, are permitted provided that the following conditions 735e6168fSJeff Roberson * are met: 835e6168fSJeff Roberson * 1. Redistributions of source code must retain the above copyright 935e6168fSJeff Roberson * notice unmodified, this list of conditions, and the following 1035e6168fSJeff Roberson * disclaimer. 1135e6168fSJeff Roberson * 2. Redistributions in binary form must reproduce the above copyright 1235e6168fSJeff Roberson * notice, this list of conditions and the following disclaimer in the 1335e6168fSJeff Roberson * documentation and/or other materials provided with the distribution. 1435e6168fSJeff Roberson * 1535e6168fSJeff Roberson * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 1635e6168fSJeff Roberson * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1735e6168fSJeff Roberson * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 1835e6168fSJeff Roberson * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 1935e6168fSJeff Roberson * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2035e6168fSJeff Roberson * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2135e6168fSJeff Roberson * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2235e6168fSJeff Roberson * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2335e6168fSJeff Roberson * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2435e6168fSJeff Roberson * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2535e6168fSJeff Roberson */ 2635e6168fSJeff Roberson 27677b542eSDavid E. O'Brien #include <sys/cdefs.h> 28677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 29677b542eSDavid E. O'Brien 3035e6168fSJeff Roberson #include <sys/param.h> 3135e6168fSJeff Roberson #include <sys/systm.h> 3235e6168fSJeff Roberson #include <sys/kernel.h> 3335e6168fSJeff Roberson #include <sys/ktr.h> 3435e6168fSJeff Roberson #include <sys/lock.h> 3535e6168fSJeff Roberson #include <sys/mutex.h> 3635e6168fSJeff Roberson #include <sys/proc.h> 37245f3abfSJeff Roberson #include <sys/resource.h> 3835e6168fSJeff Roberson #include <sys/sched.h> 3935e6168fSJeff Roberson #include <sys/smp.h> 4035e6168fSJeff Roberson #include <sys/sx.h> 4135e6168fSJeff Roberson #include <sys/sysctl.h> 4235e6168fSJeff Roberson #include <sys/sysproto.h> 4335e6168fSJeff Roberson #include <sys/vmmeter.h> 4435e6168fSJeff Roberson #ifdef DDB 4535e6168fSJeff Roberson #include <ddb/ddb.h> 4635e6168fSJeff Roberson #endif 4735e6168fSJeff Roberson #ifdef KTRACE 4835e6168fSJeff Roberson #include <sys/uio.h> 4935e6168fSJeff Roberson #include <sys/ktrace.h> 5035e6168fSJeff Roberson #endif 5135e6168fSJeff Roberson 5235e6168fSJeff Roberson #include <machine/cpu.h> 5322bf7d9aSJeff Roberson #include <machine/smp.h> 5435e6168fSJeff Roberson 5515dc847eSJeff Roberson #define KTR_ULE KTR_NFS 5615dc847eSJeff Roberson 5735e6168fSJeff Roberson /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 5835e6168fSJeff Roberson /* XXX This is bogus compatability crap for ps */ 5935e6168fSJeff Roberson static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 6035e6168fSJeff Roberson SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 6135e6168fSJeff Roberson 6235e6168fSJeff Roberson static void sched_setup(void *dummy); 6335e6168fSJeff Roberson SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 6435e6168fSJeff Roberson 6515dc847eSJeff Roberson static SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0, "SCHED"); 66e1f89c22SJeff Roberson 6715dc847eSJeff Roberson static int sched_strict; 6815dc847eSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, strict, CTLFLAG_RD, &sched_strict, 0, ""); 6915dc847eSJeff Roberson 7015dc847eSJeff Roberson static int slice_min = 1; 7115dc847eSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, slice_min, CTLFLAG_RW, &slice_min, 0, ""); 7215dc847eSJeff Roberson 73210491d3SJeff Roberson static int slice_max = 10; 7415dc847eSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, slice_max, CTLFLAG_RW, &slice_max, 0, ""); 7515dc847eSJeff Roberson 7615dc847eSJeff Roberson int realstathz; 7715dc847eSJeff Roberson int tickincr = 1; 78783caefbSJeff Roberson 79356500a3SJeff Roberson #ifdef SMP 80356500a3SJeff Roberson /* Callout to handle load balancing SMP systems. */ 81356500a3SJeff Roberson static struct callout kseq_lb_callout; 82356500a3SJeff Roberson #endif 83356500a3SJeff Roberson 8435e6168fSJeff Roberson /* 8535e6168fSJeff Roberson * These datastructures are allocated within their parent datastructure but 8635e6168fSJeff Roberson * are scheduler specific. 8735e6168fSJeff Roberson */ 8835e6168fSJeff Roberson 8935e6168fSJeff Roberson struct ke_sched { 9035e6168fSJeff Roberson int ske_slice; 9135e6168fSJeff Roberson struct runq *ske_runq; 9235e6168fSJeff Roberson /* The following variables are only used for pctcpu calculation */ 9335e6168fSJeff Roberson int ske_ltick; /* Last tick that we were running on */ 9435e6168fSJeff Roberson int ske_ftick; /* First tick that we were running on */ 9535e6168fSJeff Roberson int ske_ticks; /* Tick count */ 9615dc847eSJeff Roberson /* CPU that we have affinity for. */ 97cd6e33dfSJeff Roberson u_char ske_cpu; 9835e6168fSJeff Roberson }; 9935e6168fSJeff Roberson #define ke_slice ke_sched->ske_slice 10035e6168fSJeff Roberson #define ke_runq ke_sched->ske_runq 10135e6168fSJeff Roberson #define ke_ltick ke_sched->ske_ltick 10235e6168fSJeff Roberson #define ke_ftick ke_sched->ske_ftick 10335e6168fSJeff Roberson #define ke_ticks ke_sched->ske_ticks 104cd6e33dfSJeff Roberson #define ke_cpu ke_sched->ske_cpu 10522bf7d9aSJeff Roberson #define ke_assign ke_procq.tqe_next 10622bf7d9aSJeff Roberson 10722bf7d9aSJeff Roberson #define KEF_ASSIGNED KEF_SCHED0 /* KSE is being migrated. */ 10835e6168fSJeff Roberson 10935e6168fSJeff Roberson struct kg_sched { 110407b0157SJeff Roberson int skg_slptime; /* Number of ticks we vol. slept */ 111407b0157SJeff Roberson int skg_runtime; /* Number of ticks we were running */ 11235e6168fSJeff Roberson }; 11335e6168fSJeff Roberson #define kg_slptime kg_sched->skg_slptime 114407b0157SJeff Roberson #define kg_runtime kg_sched->skg_runtime 11535e6168fSJeff Roberson 11635e6168fSJeff Roberson struct td_sched { 11735e6168fSJeff Roberson int std_slptime; 11835e6168fSJeff Roberson }; 11935e6168fSJeff Roberson #define td_slptime td_sched->std_slptime 12035e6168fSJeff Roberson 1215d7ef00cSJeff Roberson struct td_sched td_sched; 12235e6168fSJeff Roberson struct ke_sched ke_sched; 12335e6168fSJeff Roberson struct kg_sched kg_sched; 12435e6168fSJeff Roberson 12535e6168fSJeff Roberson struct ke_sched *kse0_sched = &ke_sched; 12635e6168fSJeff Roberson struct kg_sched *ksegrp0_sched = &kg_sched; 12735e6168fSJeff Roberson struct p_sched *proc0_sched = NULL; 12835e6168fSJeff Roberson struct td_sched *thread0_sched = &td_sched; 12935e6168fSJeff Roberson 13035e6168fSJeff Roberson /* 131665cb285SJeff Roberson * The priority is primarily determined by the interactivity score. Thus, we 132665cb285SJeff Roberson * give lower(better) priorities to kse groups that use less CPU. The nice 133665cb285SJeff Roberson * value is then directly added to this to allow nice to have some effect 134665cb285SJeff Roberson * on latency. 135e1f89c22SJeff Roberson * 136e1f89c22SJeff Roberson * PRI_RANGE: Total priority range for timeshare threads. 137665cb285SJeff Roberson * PRI_NRESV: Number of nice values. 138e1f89c22SJeff Roberson * PRI_BASE: The start of the dynamic range. 13935e6168fSJeff Roberson */ 140407b0157SJeff Roberson #define SCHED_PRI_RANGE (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1) 141245f3abfSJeff Roberson #define SCHED_PRI_NRESV PRIO_TOTAL 14298c9b132SJeff Roberson #define SCHED_PRI_NHALF (PRIO_TOTAL / 2) 14315dc847eSJeff Roberson #define SCHED_PRI_NTHRESH (SCHED_PRI_NHALF - 1) 144665cb285SJeff Roberson #define SCHED_PRI_BASE (PRI_MIN_TIMESHARE) 14515dc847eSJeff Roberson #define SCHED_PRI_INTERACT(score) \ 146665cb285SJeff Roberson ((score) * SCHED_PRI_RANGE / SCHED_INTERACT_MAX) 14735e6168fSJeff Roberson 14835e6168fSJeff Roberson /* 149e1f89c22SJeff Roberson * These determine the interactivity of a process. 15035e6168fSJeff Roberson * 151407b0157SJeff Roberson * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate 152407b0157SJeff Roberson * before throttling back. 153a91172adSJeff Roberson * SLP_RUN_THROTTLE: Divisor for reducing slp/run time at fork time. 154210491d3SJeff Roberson * INTERACT_MAX: Maximum interactivity value. Smaller is better. 155e1f89c22SJeff Roberson * INTERACT_THRESH: Threshhold for placement on the current runq. 15635e6168fSJeff Roberson */ 1574c9612c6SJeff Roberson #define SCHED_SLP_RUN_MAX ((hz * 5) << 10) 158a91172adSJeff Roberson #define SCHED_SLP_RUN_THROTTLE (100) 159210491d3SJeff Roberson #define SCHED_INTERACT_MAX (100) 160210491d3SJeff Roberson #define SCHED_INTERACT_HALF (SCHED_INTERACT_MAX / 2) 1614c9612c6SJeff Roberson #define SCHED_INTERACT_THRESH (30) 162e1f89c22SJeff Roberson 16335e6168fSJeff Roberson /* 16435e6168fSJeff Roberson * These parameters and macros determine the size of the time slice that is 16535e6168fSJeff Roberson * granted to each thread. 16635e6168fSJeff Roberson * 16735e6168fSJeff Roberson * SLICE_MIN: Minimum time slice granted, in units of ticks. 16835e6168fSJeff Roberson * SLICE_MAX: Maximum time slice granted. 16935e6168fSJeff Roberson * SLICE_RANGE: Range of available time slices scaled by hz. 170245f3abfSJeff Roberson * SLICE_SCALE: The number slices granted per val in the range of [0, max]. 171245f3abfSJeff Roberson * SLICE_NICE: Determine the amount of slice granted to a scaled nice. 17235e6168fSJeff Roberson */ 17315dc847eSJeff Roberson #define SCHED_SLICE_MIN (slice_min) 17415dc847eSJeff Roberson #define SCHED_SLICE_MAX (slice_max) 17535e6168fSJeff Roberson #define SCHED_SLICE_RANGE (SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1) 17635e6168fSJeff Roberson #define SCHED_SLICE_SCALE(val, max) (((val) * SCHED_SLICE_RANGE) / (max)) 177245f3abfSJeff Roberson #define SCHED_SLICE_NICE(nice) \ 17815dc847eSJeff Roberson (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((nice), SCHED_PRI_NTHRESH)) 17935e6168fSJeff Roberson 18035e6168fSJeff Roberson /* 18135e6168fSJeff Roberson * This macro determines whether or not the kse belongs on the current or 18235e6168fSJeff Roberson * next run queue. 183407b0157SJeff Roberson * 184407b0157SJeff Roberson * XXX nice value should effect how interactive a kg is. 18535e6168fSJeff Roberson */ 18615dc847eSJeff Roberson #define SCHED_INTERACTIVE(kg) \ 18715dc847eSJeff Roberson (sched_interact_score(kg) < SCHED_INTERACT_THRESH) 188a5f099d0SJeff Roberson #define SCHED_CURR(kg, ke) \ 18908fd6713SJeff Roberson (ke->ke_thread->td_priority != kg->kg_user_pri || \ 19008fd6713SJeff Roberson SCHED_INTERACTIVE(kg)) 19135e6168fSJeff Roberson 19235e6168fSJeff Roberson /* 19335e6168fSJeff Roberson * Cpu percentage computation macros and defines. 19435e6168fSJeff Roberson * 19535e6168fSJeff Roberson * SCHED_CPU_TIME: Number of seconds to average the cpu usage across. 19635e6168fSJeff Roberson * SCHED_CPU_TICKS: Number of hz ticks to average the cpu usage across. 19735e6168fSJeff Roberson */ 19835e6168fSJeff Roberson 1995053d272SJeff Roberson #define SCHED_CPU_TIME 10 20035e6168fSJeff Roberson #define SCHED_CPU_TICKS (hz * SCHED_CPU_TIME) 20135e6168fSJeff Roberson 20235e6168fSJeff Roberson /* 20315dc847eSJeff Roberson * kseq - per processor runqs and statistics. 20435e6168fSJeff Roberson */ 20535e6168fSJeff Roberson 20615dc847eSJeff Roberson #define KSEQ_NCLASS (PRI_IDLE + 1) /* Number of run classes. */ 20715dc847eSJeff Roberson 20835e6168fSJeff Roberson struct kseq { 209a8949de2SJeff Roberson struct runq ksq_idle; /* Queue of IDLE threads. */ 21015dc847eSJeff Roberson struct runq ksq_timeshare[2]; /* Run queues for !IDLE. */ 21115dc847eSJeff Roberson struct runq *ksq_next; /* Next timeshare queue. */ 21215dc847eSJeff Roberson struct runq *ksq_curr; /* Current queue. */ 21315dc847eSJeff Roberson int ksq_loads[KSEQ_NCLASS]; /* Load for each class */ 21415dc847eSJeff Roberson int ksq_load; /* Aggregate load. */ 21515dc847eSJeff Roberson short ksq_nice[PRIO_TOTAL + 1]; /* KSEs in each nice bin. */ 21615dc847eSJeff Roberson short ksq_nicemin; /* Least nice. */ 2175d7ef00cSJeff Roberson #ifdef SMP 2185d7ef00cSJeff Roberson unsigned int ksq_rslices; /* Slices on run queue */ 21922bf7d9aSJeff Roberson int ksq_cpus; /* Count of CPUs in this kseq. */ 22022bf7d9aSJeff Roberson struct kse *ksq_assigned; /* KSEs assigned by another CPU. */ 2215d7ef00cSJeff Roberson #endif 22235e6168fSJeff Roberson }; 22335e6168fSJeff Roberson 22435e6168fSJeff Roberson /* 22535e6168fSJeff Roberson * One kse queue per processor. 22635e6168fSJeff Roberson */ 2270a016a05SJeff Roberson #ifdef SMP 22822bf7d9aSJeff Roberson static int kseq_idle; 22922bf7d9aSJeff Roberson static struct kseq kseq_cpu[MAXCPU]; 23022bf7d9aSJeff Roberson static struct kseq *kseq_idmap[MAXCPU]; 231749d01b0SJeff Roberson #define KSEQ_SELF() (kseq_idmap[PCPU_GET(cpuid)]) 232749d01b0SJeff Roberson #define KSEQ_CPU(x) (kseq_idmap[(x)]) 2330a016a05SJeff Roberson #else 23422bf7d9aSJeff Roberson static struct kseq kseq_cpu; 2350a016a05SJeff Roberson #define KSEQ_SELF() (&kseq_cpu) 2360a016a05SJeff Roberson #define KSEQ_CPU(x) (&kseq_cpu) 2370a016a05SJeff Roberson #endif 23835e6168fSJeff Roberson 239245f3abfSJeff Roberson static void sched_slice(struct kse *ke); 24015dc847eSJeff Roberson static void sched_priority(struct ksegrp *kg); 241e1f89c22SJeff Roberson static int sched_interact_score(struct ksegrp *kg); 2424b60e324SJeff Roberson static void sched_interact_update(struct ksegrp *kg); 24322bf7d9aSJeff Roberson static void sched_pctcpu_update(struct kse *ke); 24435e6168fSJeff Roberson 2455d7ef00cSJeff Roberson /* Operations on per processor queues */ 24622bf7d9aSJeff Roberson static struct kse * kseq_choose(struct kseq *kseq); 2470a016a05SJeff Roberson static void kseq_setup(struct kseq *kseq); 248a8949de2SJeff Roberson static void kseq_add(struct kseq *kseq, struct kse *ke); 24915dc847eSJeff Roberson static void kseq_rem(struct kseq *kseq, struct kse *ke); 25015dc847eSJeff Roberson static void kseq_nice_add(struct kseq *kseq, int nice); 25115dc847eSJeff Roberson static void kseq_nice_rem(struct kseq *kseq, int nice); 2527cd650a9SJeff Roberson void kseq_print(int cpu); 2535d7ef00cSJeff Roberson #ifdef SMP 25422bf7d9aSJeff Roberson #if 0 25522bf7d9aSJeff Roberson static int sched_pickcpu(void); 25622bf7d9aSJeff Roberson #endif 25722bf7d9aSJeff Roberson static struct kse *runq_steal(struct runq *rq); 25822bf7d9aSJeff Roberson static struct kseq *kseq_load_highest(void); 25922bf7d9aSJeff Roberson static void kseq_balance(void *arg); 26022bf7d9aSJeff Roberson static void kseq_move(struct kseq *from, int cpu); 26122bf7d9aSJeff Roberson static int kseq_find(void); 26222bf7d9aSJeff Roberson static void kseq_notify(struct kse *ke, int cpu); 26322bf7d9aSJeff Roberson static void kseq_assign(struct kseq *); 26422bf7d9aSJeff Roberson static struct kse *kseq_steal(struct kseq *kseq); 2655d7ef00cSJeff Roberson #endif 2665d7ef00cSJeff Roberson 26715dc847eSJeff Roberson void 2687cd650a9SJeff Roberson kseq_print(int cpu) 26915dc847eSJeff Roberson { 2707cd650a9SJeff Roberson struct kseq *kseq; 27115dc847eSJeff Roberson int i; 27215dc847eSJeff Roberson 2737cd650a9SJeff Roberson kseq = KSEQ_CPU(cpu); 27415dc847eSJeff Roberson 27515dc847eSJeff Roberson printf("kseq:\n"); 27615dc847eSJeff Roberson printf("\tload: %d\n", kseq->ksq_load); 27715dc847eSJeff Roberson printf("\tload ITHD: %d\n", kseq->ksq_loads[PRI_ITHD]); 27815dc847eSJeff Roberson printf("\tload REALTIME: %d\n", kseq->ksq_loads[PRI_REALTIME]); 27915dc847eSJeff Roberson printf("\tload TIMESHARE: %d\n", kseq->ksq_loads[PRI_TIMESHARE]); 28015dc847eSJeff Roberson printf("\tload IDLE: %d\n", kseq->ksq_loads[PRI_IDLE]); 28115dc847eSJeff Roberson printf("\tnicemin:\t%d\n", kseq->ksq_nicemin); 28215dc847eSJeff Roberson printf("\tnice counts:\n"); 28315dc847eSJeff Roberson for (i = 0; i < PRIO_TOTAL + 1; i++) 28415dc847eSJeff Roberson if (kseq->ksq_nice[i]) 28515dc847eSJeff Roberson printf("\t\t%d = %d\n", 28615dc847eSJeff Roberson i - SCHED_PRI_NHALF, kseq->ksq_nice[i]); 28715dc847eSJeff Roberson } 28815dc847eSJeff Roberson 289a8949de2SJeff Roberson static void 2905d7ef00cSJeff Roberson kseq_add(struct kseq *kseq, struct kse *ke) 2915d7ef00cSJeff Roberson { 292b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 293b5c4c4a7SJeff Roberson kseq->ksq_loads[PRI_BASE(ke->ke_ksegrp->kg_pri_class)]++; 29415dc847eSJeff Roberson kseq->ksq_load++; 29515dc847eSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 29615dc847eSJeff Roberson CTR6(KTR_ULE, "Add kse %p to %p (slice: %d, pri: %d, nice: %d(%d))", 29715dc847eSJeff Roberson ke, ke->ke_runq, ke->ke_slice, ke->ke_thread->td_priority, 29815dc847eSJeff Roberson ke->ke_ksegrp->kg_nice, kseq->ksq_nicemin); 29915dc847eSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 30015dc847eSJeff Roberson kseq_nice_add(kseq, ke->ke_ksegrp->kg_nice); 3015d7ef00cSJeff Roberson #ifdef SMP 3025d7ef00cSJeff Roberson kseq->ksq_rslices += ke->ke_slice; 3035d7ef00cSJeff Roberson #endif 3045d7ef00cSJeff Roberson } 30515dc847eSJeff Roberson 306a8949de2SJeff Roberson static void 3075d7ef00cSJeff Roberson kseq_rem(struct kseq *kseq, struct kse *ke) 3085d7ef00cSJeff Roberson { 309b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 310b5c4c4a7SJeff Roberson kseq->ksq_loads[PRI_BASE(ke->ke_ksegrp->kg_pri_class)]--; 31115dc847eSJeff Roberson kseq->ksq_load--; 31215dc847eSJeff Roberson ke->ke_runq = NULL; 31315dc847eSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 31415dc847eSJeff Roberson kseq_nice_rem(kseq, ke->ke_ksegrp->kg_nice); 3155d7ef00cSJeff Roberson #ifdef SMP 3165d7ef00cSJeff Roberson kseq->ksq_rslices -= ke->ke_slice; 3175d7ef00cSJeff Roberson #endif 3185d7ef00cSJeff Roberson } 3195d7ef00cSJeff Roberson 32015dc847eSJeff Roberson static void 32115dc847eSJeff Roberson kseq_nice_add(struct kseq *kseq, int nice) 32215dc847eSJeff Roberson { 323b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 32415dc847eSJeff Roberson /* Normalize to zero. */ 32515dc847eSJeff Roberson kseq->ksq_nice[nice + SCHED_PRI_NHALF]++; 326b90816f1SJeff Roberson if (nice < kseq->ksq_nicemin || kseq->ksq_loads[PRI_TIMESHARE] == 1) 32715dc847eSJeff Roberson kseq->ksq_nicemin = nice; 32815dc847eSJeff Roberson } 32915dc847eSJeff Roberson 33015dc847eSJeff Roberson static void 33115dc847eSJeff Roberson kseq_nice_rem(struct kseq *kseq, int nice) 33215dc847eSJeff Roberson { 33315dc847eSJeff Roberson int n; 33415dc847eSJeff Roberson 335b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 33615dc847eSJeff Roberson /* Normalize to zero. */ 33715dc847eSJeff Roberson n = nice + SCHED_PRI_NHALF; 33815dc847eSJeff Roberson kseq->ksq_nice[n]--; 33915dc847eSJeff Roberson KASSERT(kseq->ksq_nice[n] >= 0, ("Negative nice count.")); 34015dc847eSJeff Roberson 34115dc847eSJeff Roberson /* 34215dc847eSJeff Roberson * If this wasn't the smallest nice value or there are more in 34315dc847eSJeff Roberson * this bucket we can just return. Otherwise we have to recalculate 34415dc847eSJeff Roberson * the smallest nice. 34515dc847eSJeff Roberson */ 34615dc847eSJeff Roberson if (nice != kseq->ksq_nicemin || 34715dc847eSJeff Roberson kseq->ksq_nice[n] != 0 || 34815dc847eSJeff Roberson kseq->ksq_loads[PRI_TIMESHARE] == 0) 34915dc847eSJeff Roberson return; 35015dc847eSJeff Roberson 35115dc847eSJeff Roberson for (; n < SCHED_PRI_NRESV + 1; n++) 35215dc847eSJeff Roberson if (kseq->ksq_nice[n]) { 35315dc847eSJeff Roberson kseq->ksq_nicemin = n - SCHED_PRI_NHALF; 35415dc847eSJeff Roberson return; 35515dc847eSJeff Roberson } 35615dc847eSJeff Roberson } 35715dc847eSJeff Roberson 3585d7ef00cSJeff Roberson #ifdef SMP 359356500a3SJeff Roberson /* 360356500a3SJeff Roberson * kseq_balance is a simple CPU load balancing algorithm. It operates by 361356500a3SJeff Roberson * finding the least loaded and most loaded cpu and equalizing their load 362356500a3SJeff Roberson * by migrating some processes. 363356500a3SJeff Roberson * 364356500a3SJeff Roberson * Dealing only with two CPUs at a time has two advantages. Firstly, most 365356500a3SJeff Roberson * installations will only have 2 cpus. Secondly, load balancing too much at 366356500a3SJeff Roberson * once can have an unpleasant effect on the system. The scheduler rarely has 367356500a3SJeff Roberson * enough information to make perfect decisions. So this algorithm chooses 368356500a3SJeff Roberson * algorithm simplicity and more gradual effects on load in larger systems. 369356500a3SJeff Roberson * 370356500a3SJeff Roberson * It could be improved by considering the priorities and slices assigned to 371356500a3SJeff Roberson * each task prior to balancing them. There are many pathological cases with 372356500a3SJeff Roberson * any approach and so the semi random algorithm below may work as well as any. 373356500a3SJeff Roberson * 374356500a3SJeff Roberson */ 37522bf7d9aSJeff Roberson static void 376356500a3SJeff Roberson kseq_balance(void *arg) 377356500a3SJeff Roberson { 378356500a3SJeff Roberson struct kseq *kseq; 379356500a3SJeff Roberson int high_load; 380356500a3SJeff Roberson int low_load; 381356500a3SJeff Roberson int high_cpu; 382356500a3SJeff Roberson int low_cpu; 383356500a3SJeff Roberson int move; 384356500a3SJeff Roberson int diff; 385356500a3SJeff Roberson int i; 386356500a3SJeff Roberson 387356500a3SJeff Roberson high_cpu = 0; 388356500a3SJeff Roberson low_cpu = 0; 389356500a3SJeff Roberson high_load = 0; 390356500a3SJeff Roberson low_load = -1; 391356500a3SJeff Roberson 392356500a3SJeff Roberson mtx_lock_spin(&sched_lock); 39386f8ae96SJeff Roberson if (smp_started == 0) 39486f8ae96SJeff Roberson goto out; 39586f8ae96SJeff Roberson 396356500a3SJeff Roberson for (i = 0; i < mp_maxid; i++) { 3977a20304fSJeff Roberson if (CPU_ABSENT(i) || (i & stopped_cpus) != 0) 398356500a3SJeff Roberson continue; 399356500a3SJeff Roberson kseq = KSEQ_CPU(i); 400356500a3SJeff Roberson if (kseq->ksq_load > high_load) { 401356500a3SJeff Roberson high_load = kseq->ksq_load; 402356500a3SJeff Roberson high_cpu = i; 403356500a3SJeff Roberson } 404356500a3SJeff Roberson if (low_load == -1 || kseq->ksq_load < low_load) { 405356500a3SJeff Roberson low_load = kseq->ksq_load; 406356500a3SJeff Roberson low_cpu = i; 407356500a3SJeff Roberson } 408356500a3SJeff Roberson } 409356500a3SJeff Roberson 410749d01b0SJeff Roberson kseq = KSEQ_CPU(high_cpu); 411749d01b0SJeff Roberson 41222bf7d9aSJeff Roberson high_load = kseq->ksq_loads[PRI_IDLE] + kseq->ksq_loads[PRI_TIMESHARE] + 41322bf7d9aSJeff Roberson kseq->ksq_loads[PRI_REALTIME]; 414356500a3SJeff Roberson /* 415356500a3SJeff Roberson * Nothing to do. 416356500a3SJeff Roberson */ 417749d01b0SJeff Roberson if (high_load < kseq->ksq_cpus + 1) 418749d01b0SJeff Roberson goto out; 419749d01b0SJeff Roberson 420749d01b0SJeff Roberson high_load -= kseq->ksq_cpus; 421749d01b0SJeff Roberson 422749d01b0SJeff Roberson if (low_load >= high_load) 423356500a3SJeff Roberson goto out; 424356500a3SJeff Roberson 425356500a3SJeff Roberson diff = high_load - low_load; 426356500a3SJeff Roberson move = diff / 2; 427356500a3SJeff Roberson if (diff & 0x1) 428356500a3SJeff Roberson move++; 429356500a3SJeff Roberson 430356500a3SJeff Roberson for (i = 0; i < move; i++) 431749d01b0SJeff Roberson kseq_move(kseq, low_cpu); 432356500a3SJeff Roberson 433356500a3SJeff Roberson out: 434356500a3SJeff Roberson mtx_unlock_spin(&sched_lock); 435356500a3SJeff Roberson callout_reset(&kseq_lb_callout, hz, kseq_balance, NULL); 436356500a3SJeff Roberson 437356500a3SJeff Roberson return; 438356500a3SJeff Roberson } 439356500a3SJeff Roberson 44022bf7d9aSJeff Roberson static struct kseq * 4415d7ef00cSJeff Roberson kseq_load_highest(void) 4425d7ef00cSJeff Roberson { 4435d7ef00cSJeff Roberson struct kseq *kseq; 4445d7ef00cSJeff Roberson int load; 4455d7ef00cSJeff Roberson int cpu; 4465d7ef00cSJeff Roberson int i; 4475d7ef00cSJeff Roberson 448b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 4495d7ef00cSJeff Roberson cpu = 0; 4505d7ef00cSJeff Roberson load = 0; 4515d7ef00cSJeff Roberson 4525d7ef00cSJeff Roberson for (i = 0; i < mp_maxid; i++) { 4537a20304fSJeff Roberson if (CPU_ABSENT(i) || (i & stopped_cpus) != 0) 4545d7ef00cSJeff Roberson continue; 4555d7ef00cSJeff Roberson kseq = KSEQ_CPU(i); 45615dc847eSJeff Roberson if (kseq->ksq_load > load) { 45715dc847eSJeff Roberson load = kseq->ksq_load; 4585d7ef00cSJeff Roberson cpu = i; 4595d7ef00cSJeff Roberson } 4605d7ef00cSJeff Roberson } 461749d01b0SJeff Roberson kseq = KSEQ_CPU(cpu); 462749d01b0SJeff Roberson 46322bf7d9aSJeff Roberson if ((kseq->ksq_loads[PRI_IDLE] + kseq->ksq_loads[PRI_TIMESHARE] + 46422bf7d9aSJeff Roberson kseq->ksq_loads[PRI_REALTIME]) > kseq->ksq_cpus) 465749d01b0SJeff Roberson return (kseq); 4665d7ef00cSJeff Roberson 4675d7ef00cSJeff Roberson return (NULL); 4685d7ef00cSJeff Roberson } 469356500a3SJeff Roberson 47022bf7d9aSJeff Roberson static void 471356500a3SJeff Roberson kseq_move(struct kseq *from, int cpu) 472356500a3SJeff Roberson { 473356500a3SJeff Roberson struct kse *ke; 474356500a3SJeff Roberson 47522bf7d9aSJeff Roberson ke = kseq_steal(from); 476356500a3SJeff Roberson runq_remove(ke->ke_runq, ke); 477356500a3SJeff Roberson ke->ke_state = KES_THREAD; 478356500a3SJeff Roberson kseq_rem(from, ke); 479356500a3SJeff Roberson 480356500a3SJeff Roberson ke->ke_cpu = cpu; 4810c7da3a4SJeff Roberson sched_add(ke->ke_thread); 482356500a3SJeff Roberson } 48322bf7d9aSJeff Roberson 48422bf7d9aSJeff Roberson static int 48522bf7d9aSJeff Roberson kseq_find(void) 48622bf7d9aSJeff Roberson { 48722bf7d9aSJeff Roberson struct kseq *high; 48822bf7d9aSJeff Roberson 48922bf7d9aSJeff Roberson if (!smp_started) 49022bf7d9aSJeff Roberson return (0); 49122bf7d9aSJeff Roberson if (kseq_idle & PCPU_GET(cpumask)) 49222bf7d9aSJeff Roberson return (0); 49322bf7d9aSJeff Roberson /* 49422bf7d9aSJeff Roberson * Find the cpu with the highest load and steal one proc. 49522bf7d9aSJeff Roberson */ 49622bf7d9aSJeff Roberson if ((high = kseq_load_highest()) == NULL || 49722bf7d9aSJeff Roberson high == KSEQ_SELF()) { 49822bf7d9aSJeff Roberson /* 49922bf7d9aSJeff Roberson * If we couldn't find one, set ourselves in the 50022bf7d9aSJeff Roberson * idle map. 50122bf7d9aSJeff Roberson */ 50222bf7d9aSJeff Roberson atomic_set_int(&kseq_idle, PCPU_GET(cpumask)); 50322bf7d9aSJeff Roberson return (0); 50422bf7d9aSJeff Roberson } 50522bf7d9aSJeff Roberson /* 50622bf7d9aSJeff Roberson * Remove this kse from this kseq and runq and then requeue 50722bf7d9aSJeff Roberson * on the current processor. We now have a load of one! 50822bf7d9aSJeff Roberson */ 50922bf7d9aSJeff Roberson kseq_move(high, PCPU_GET(cpuid)); 51022bf7d9aSJeff Roberson 51122bf7d9aSJeff Roberson return (1); 51222bf7d9aSJeff Roberson } 51322bf7d9aSJeff Roberson 51422bf7d9aSJeff Roberson static void 51522bf7d9aSJeff Roberson kseq_assign(struct kseq *kseq) 51622bf7d9aSJeff Roberson { 51722bf7d9aSJeff Roberson struct kse *nke; 51822bf7d9aSJeff Roberson struct kse *ke; 51922bf7d9aSJeff Roberson 52022bf7d9aSJeff Roberson do { 52122bf7d9aSJeff Roberson ke = kseq->ksq_assigned; 52222bf7d9aSJeff Roberson } while(!atomic_cmpset_ptr(&kseq->ksq_assigned, ke, NULL)); 52322bf7d9aSJeff Roberson for (; ke != NULL; ke = nke) { 52422bf7d9aSJeff Roberson nke = ke->ke_assign; 52522bf7d9aSJeff Roberson ke->ke_flags &= ~KEF_ASSIGNED; 52622bf7d9aSJeff Roberson sched_add(ke->ke_thread); 52722bf7d9aSJeff Roberson } 52822bf7d9aSJeff Roberson } 52922bf7d9aSJeff Roberson 53022bf7d9aSJeff Roberson static void 53122bf7d9aSJeff Roberson kseq_notify(struct kse *ke, int cpu) 53222bf7d9aSJeff Roberson { 53322bf7d9aSJeff Roberson struct kseq *kseq; 53422bf7d9aSJeff Roberson struct thread *td; 53522bf7d9aSJeff Roberson struct pcpu *pcpu; 53622bf7d9aSJeff Roberson 53722bf7d9aSJeff Roberson ke->ke_flags |= KEF_ASSIGNED; 53822bf7d9aSJeff Roberson 53922bf7d9aSJeff Roberson kseq = KSEQ_CPU(cpu); 5405d7ef00cSJeff Roberson 5410c0a98b2SJeff Roberson /* 54222bf7d9aSJeff Roberson * Place a KSE on another cpu's queue and force a resched. 54322bf7d9aSJeff Roberson */ 54422bf7d9aSJeff Roberson do { 54522bf7d9aSJeff Roberson ke->ke_assign = kseq->ksq_assigned; 54622bf7d9aSJeff Roberson } while(!atomic_cmpset_ptr(&kseq->ksq_assigned, ke->ke_assign, ke)); 54722bf7d9aSJeff Roberson pcpu = pcpu_find(cpu); 54822bf7d9aSJeff Roberson td = pcpu->pc_curthread; 54922bf7d9aSJeff Roberson if (ke->ke_thread->td_priority < td->td_priority || 55022bf7d9aSJeff Roberson td == pcpu->pc_idlethread) { 55122bf7d9aSJeff Roberson td->td_flags |= TDF_NEEDRESCHED; 55222bf7d9aSJeff Roberson ipi_selected(1 << cpu, IPI_AST); 55322bf7d9aSJeff Roberson } 55422bf7d9aSJeff Roberson } 55522bf7d9aSJeff Roberson 55622bf7d9aSJeff Roberson static struct kse * 55722bf7d9aSJeff Roberson runq_steal(struct runq *rq) 55822bf7d9aSJeff Roberson { 55922bf7d9aSJeff Roberson struct rqhead *rqh; 56022bf7d9aSJeff Roberson struct rqbits *rqb; 56122bf7d9aSJeff Roberson struct kse *ke; 56222bf7d9aSJeff Roberson int word; 56322bf7d9aSJeff Roberson int bit; 56422bf7d9aSJeff Roberson 56522bf7d9aSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 56622bf7d9aSJeff Roberson rqb = &rq->rq_status; 56722bf7d9aSJeff Roberson for (word = 0; word < RQB_LEN; word++) { 56822bf7d9aSJeff Roberson if (rqb->rqb_bits[word] == 0) 56922bf7d9aSJeff Roberson continue; 57022bf7d9aSJeff Roberson for (bit = 0; bit < RQB_BPW; bit++) { 57122bf7d9aSJeff Roberson if ((rqb->rqb_bits[word] & (1 << bit)) == 0) 57222bf7d9aSJeff Roberson continue; 57322bf7d9aSJeff Roberson rqh = &rq->rq_queues[bit + (word << RQB_L2BPW)]; 57422bf7d9aSJeff Roberson TAILQ_FOREACH(ke, rqh, ke_procq) { 57522bf7d9aSJeff Roberson if (PRI_BASE(ke->ke_ksegrp->kg_pri_class) != 57622bf7d9aSJeff Roberson PRI_ITHD) 57722bf7d9aSJeff Roberson return (ke); 57822bf7d9aSJeff Roberson } 57922bf7d9aSJeff Roberson } 58022bf7d9aSJeff Roberson } 58122bf7d9aSJeff Roberson return (NULL); 58222bf7d9aSJeff Roberson } 58322bf7d9aSJeff Roberson 58422bf7d9aSJeff Roberson static struct kse * 58522bf7d9aSJeff Roberson kseq_steal(struct kseq *kseq) 58622bf7d9aSJeff Roberson { 58722bf7d9aSJeff Roberson struct kse *ke; 58822bf7d9aSJeff Roberson 58922bf7d9aSJeff Roberson if ((ke = runq_steal(kseq->ksq_curr)) != NULL) 59022bf7d9aSJeff Roberson return (ke); 59122bf7d9aSJeff Roberson if ((ke = runq_steal(kseq->ksq_next)) != NULL) 59222bf7d9aSJeff Roberson return (ke); 59322bf7d9aSJeff Roberson return (runq_steal(&kseq->ksq_idle)); 59422bf7d9aSJeff Roberson } 59522bf7d9aSJeff Roberson #endif /* SMP */ 59622bf7d9aSJeff Roberson 59722bf7d9aSJeff Roberson /* 59822bf7d9aSJeff Roberson * Pick the highest priority task we have and return it. 5990c0a98b2SJeff Roberson */ 6000c0a98b2SJeff Roberson 60122bf7d9aSJeff Roberson static struct kse * 60222bf7d9aSJeff Roberson kseq_choose(struct kseq *kseq) 6035d7ef00cSJeff Roberson { 6045d7ef00cSJeff Roberson struct kse *ke; 6055d7ef00cSJeff Roberson struct runq *swap; 6065d7ef00cSJeff Roberson 607b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 60815dc847eSJeff Roberson swap = NULL; 609a8949de2SJeff Roberson 61015dc847eSJeff Roberson for (;;) { 61115dc847eSJeff Roberson ke = runq_choose(kseq->ksq_curr); 61215dc847eSJeff Roberson if (ke == NULL) { 61315dc847eSJeff Roberson /* 61415dc847eSJeff Roberson * We already swaped once and didn't get anywhere. 61515dc847eSJeff Roberson */ 61615dc847eSJeff Roberson if (swap) 61715dc847eSJeff Roberson break; 6185d7ef00cSJeff Roberson swap = kseq->ksq_curr; 6195d7ef00cSJeff Roberson kseq->ksq_curr = kseq->ksq_next; 6205d7ef00cSJeff Roberson kseq->ksq_next = swap; 62115dc847eSJeff Roberson continue; 622a8949de2SJeff Roberson } 62315dc847eSJeff Roberson /* 62415dc847eSJeff Roberson * If we encounter a slice of 0 the kse is in a 62515dc847eSJeff Roberson * TIMESHARE kse group and its nice was too far out 62615dc847eSJeff Roberson * of the range that receives slices. 62715dc847eSJeff Roberson */ 62822bf7d9aSJeff Roberson if (ke->ke_slice == 0) { 62915dc847eSJeff Roberson runq_remove(ke->ke_runq, ke); 63015dc847eSJeff Roberson sched_slice(ke); 63115dc847eSJeff Roberson ke->ke_runq = kseq->ksq_next; 63215dc847eSJeff Roberson runq_add(ke->ke_runq, ke); 63315dc847eSJeff Roberson continue; 63415dc847eSJeff Roberson } 63515dc847eSJeff Roberson return (ke); 63615dc847eSJeff Roberson } 63715dc847eSJeff Roberson 638a8949de2SJeff Roberson return (runq_choose(&kseq->ksq_idle)); 639245f3abfSJeff Roberson } 6400a016a05SJeff Roberson 6410a016a05SJeff Roberson static void 6420a016a05SJeff Roberson kseq_setup(struct kseq *kseq) 6430a016a05SJeff Roberson { 64415dc847eSJeff Roberson runq_init(&kseq->ksq_timeshare[0]); 64515dc847eSJeff Roberson runq_init(&kseq->ksq_timeshare[1]); 646a8949de2SJeff Roberson runq_init(&kseq->ksq_idle); 64715dc847eSJeff Roberson 64815dc847eSJeff Roberson kseq->ksq_curr = &kseq->ksq_timeshare[0]; 64915dc847eSJeff Roberson kseq->ksq_next = &kseq->ksq_timeshare[1]; 65015dc847eSJeff Roberson 65115dc847eSJeff Roberson kseq->ksq_loads[PRI_ITHD] = 0; 65215dc847eSJeff Roberson kseq->ksq_loads[PRI_REALTIME] = 0; 65315dc847eSJeff Roberson kseq->ksq_loads[PRI_TIMESHARE] = 0; 65415dc847eSJeff Roberson kseq->ksq_loads[PRI_IDLE] = 0; 6557cd650a9SJeff Roberson kseq->ksq_load = 0; 6565d7ef00cSJeff Roberson #ifdef SMP 6575d7ef00cSJeff Roberson kseq->ksq_rslices = 0; 65822bf7d9aSJeff Roberson kseq->ksq_assigned = NULL; 6595d7ef00cSJeff Roberson #endif 6600a016a05SJeff Roberson } 6610a016a05SJeff Roberson 66235e6168fSJeff Roberson static void 66335e6168fSJeff Roberson sched_setup(void *dummy) 66435e6168fSJeff Roberson { 6650ec896fdSJeff Roberson #ifdef SMP 66635e6168fSJeff Roberson int i; 6670ec896fdSJeff Roberson #endif 66835e6168fSJeff Roberson 669e493a5d9SJeff Roberson slice_min = (hz/100); /* 10ms */ 670e493a5d9SJeff Roberson slice_max = (hz/7); /* ~140ms */ 671e1f89c22SJeff Roberson 672356500a3SJeff Roberson #ifdef SMP 673749d01b0SJeff Roberson /* init kseqs */ 674749d01b0SJeff Roberson /* Create the idmap. */ 675749d01b0SJeff Roberson #ifdef ULE_HTT_EXPERIMENTAL 676749d01b0SJeff Roberson if (smp_topology == NULL) { 677749d01b0SJeff Roberson #else 678749d01b0SJeff Roberson if (1) { 679749d01b0SJeff Roberson #endif 680749d01b0SJeff Roberson for (i = 0; i < MAXCPU; i++) { 681749d01b0SJeff Roberson kseq_setup(&kseq_cpu[i]); 682749d01b0SJeff Roberson kseq_idmap[i] = &kseq_cpu[i]; 683749d01b0SJeff Roberson kseq_cpu[i].ksq_cpus = 1; 684749d01b0SJeff Roberson } 685749d01b0SJeff Roberson } else { 686749d01b0SJeff Roberson int j; 687749d01b0SJeff Roberson 688749d01b0SJeff Roberson for (i = 0; i < smp_topology->ct_count; i++) { 689749d01b0SJeff Roberson struct cpu_group *cg; 690749d01b0SJeff Roberson 691749d01b0SJeff Roberson cg = &smp_topology->ct_group[i]; 692749d01b0SJeff Roberson kseq_setup(&kseq_cpu[i]); 693749d01b0SJeff Roberson 694749d01b0SJeff Roberson for (j = 0; j < MAXCPU; j++) 695749d01b0SJeff Roberson if ((cg->cg_mask & (1 << j)) != 0) 696749d01b0SJeff Roberson kseq_idmap[j] = &kseq_cpu[i]; 697749d01b0SJeff Roberson kseq_cpu[i].ksq_cpus = cg->cg_count; 698749d01b0SJeff Roberson } 699749d01b0SJeff Roberson } 700c06eb4e2SSam Leffler callout_init(&kseq_lb_callout, CALLOUT_MPSAFE); 701356500a3SJeff Roberson kseq_balance(NULL); 702749d01b0SJeff Roberson #else 703749d01b0SJeff Roberson kseq_setup(KSEQ_SELF()); 704356500a3SJeff Roberson #endif 705749d01b0SJeff Roberson mtx_lock_spin(&sched_lock); 706749d01b0SJeff Roberson kseq_add(KSEQ_SELF(), &kse0); 707749d01b0SJeff Roberson mtx_unlock_spin(&sched_lock); 70835e6168fSJeff Roberson } 70935e6168fSJeff Roberson 71035e6168fSJeff Roberson /* 71135e6168fSJeff Roberson * Scale the scheduling priority according to the "interactivity" of this 71235e6168fSJeff Roberson * process. 71335e6168fSJeff Roberson */ 71415dc847eSJeff Roberson static void 71535e6168fSJeff Roberson sched_priority(struct ksegrp *kg) 71635e6168fSJeff Roberson { 71735e6168fSJeff Roberson int pri; 71835e6168fSJeff Roberson 71935e6168fSJeff Roberson if (kg->kg_pri_class != PRI_TIMESHARE) 72015dc847eSJeff Roberson return; 72135e6168fSJeff Roberson 72215dc847eSJeff Roberson pri = SCHED_PRI_INTERACT(sched_interact_score(kg)); 723e1f89c22SJeff Roberson pri += SCHED_PRI_BASE; 72435e6168fSJeff Roberson pri += kg->kg_nice; 72535e6168fSJeff Roberson 72635e6168fSJeff Roberson if (pri > PRI_MAX_TIMESHARE) 72735e6168fSJeff Roberson pri = PRI_MAX_TIMESHARE; 72835e6168fSJeff Roberson else if (pri < PRI_MIN_TIMESHARE) 72935e6168fSJeff Roberson pri = PRI_MIN_TIMESHARE; 73035e6168fSJeff Roberson 73135e6168fSJeff Roberson kg->kg_user_pri = pri; 73235e6168fSJeff Roberson 73315dc847eSJeff Roberson return; 73435e6168fSJeff Roberson } 73535e6168fSJeff Roberson 73635e6168fSJeff Roberson /* 737245f3abfSJeff Roberson * Calculate a time slice based on the properties of the kseg and the runq 738a8949de2SJeff Roberson * that we're on. This is only for PRI_TIMESHARE ksegrps. 73935e6168fSJeff Roberson */ 740245f3abfSJeff Roberson static void 741245f3abfSJeff Roberson sched_slice(struct kse *ke) 74235e6168fSJeff Roberson { 74315dc847eSJeff Roberson struct kseq *kseq; 744245f3abfSJeff Roberson struct ksegrp *kg; 74535e6168fSJeff Roberson 746245f3abfSJeff Roberson kg = ke->ke_ksegrp; 74715dc847eSJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 74835e6168fSJeff Roberson 749245f3abfSJeff Roberson /* 750245f3abfSJeff Roberson * Rationale: 751245f3abfSJeff Roberson * KSEs in interactive ksegs get the minimum slice so that we 752245f3abfSJeff Roberson * quickly notice if it abuses its advantage. 753245f3abfSJeff Roberson * 754245f3abfSJeff Roberson * KSEs in non-interactive ksegs are assigned a slice that is 755245f3abfSJeff Roberson * based on the ksegs nice value relative to the least nice kseg 756245f3abfSJeff Roberson * on the run queue for this cpu. 757245f3abfSJeff Roberson * 758245f3abfSJeff Roberson * If the KSE is less nice than all others it gets the maximum 759245f3abfSJeff Roberson * slice and other KSEs will adjust their slice relative to 760245f3abfSJeff Roberson * this when they first expire. 761245f3abfSJeff Roberson * 762245f3abfSJeff Roberson * There is 20 point window that starts relative to the least 763245f3abfSJeff Roberson * nice kse on the run queue. Slice size is determined by 764245f3abfSJeff Roberson * the kse distance from the last nice ksegrp. 765245f3abfSJeff Roberson * 766245f3abfSJeff Roberson * If you are outside of the window you will get no slice and 767245f3abfSJeff Roberson * you will be reevaluated each time you are selected on the 768245f3abfSJeff Roberson * run queue. 769245f3abfSJeff Roberson * 770245f3abfSJeff Roberson */ 771245f3abfSJeff Roberson 77215dc847eSJeff Roberson if (!SCHED_INTERACTIVE(kg)) { 773245f3abfSJeff Roberson int nice; 774245f3abfSJeff Roberson 77515dc847eSJeff Roberson nice = kg->kg_nice + (0 - kseq->ksq_nicemin); 77615dc847eSJeff Roberson if (kseq->ksq_loads[PRI_TIMESHARE] == 0 || 77715dc847eSJeff Roberson kg->kg_nice < kseq->ksq_nicemin) 778245f3abfSJeff Roberson ke->ke_slice = SCHED_SLICE_MAX; 77915dc847eSJeff Roberson else if (nice <= SCHED_PRI_NTHRESH) 780245f3abfSJeff Roberson ke->ke_slice = SCHED_SLICE_NICE(nice); 781245f3abfSJeff Roberson else 782245f3abfSJeff Roberson ke->ke_slice = 0; 783245f3abfSJeff Roberson } else 784245f3abfSJeff Roberson ke->ke_slice = SCHED_SLICE_MIN; 78535e6168fSJeff Roberson 78615dc847eSJeff Roberson CTR6(KTR_ULE, 78715dc847eSJeff Roberson "Sliced %p(%d) (nice: %d, nicemin: %d, load: %d, interactive: %d)", 78815dc847eSJeff Roberson ke, ke->ke_slice, kg->kg_nice, kseq->ksq_nicemin, 78915dc847eSJeff Roberson kseq->ksq_loads[PRI_TIMESHARE], SCHED_INTERACTIVE(kg)); 79015dc847eSJeff Roberson 791407b0157SJeff Roberson /* 792a8949de2SJeff Roberson * Check to see if we need to scale back the slp and run time 793a8949de2SJeff Roberson * in the kg. This will cause us to forget old interactivity 794a8949de2SJeff Roberson * while maintaining the current ratio. 795407b0157SJeff Roberson */ 7964b60e324SJeff Roberson sched_interact_update(kg); 797407b0157SJeff Roberson 798245f3abfSJeff Roberson return; 79935e6168fSJeff Roberson } 80035e6168fSJeff Roberson 8014b60e324SJeff Roberson static void 8024b60e324SJeff Roberson sched_interact_update(struct ksegrp *kg) 8034b60e324SJeff Roberson { 8043f741ca1SJeff Roberson int ratio; 8053f741ca1SJeff Roberson 8063f741ca1SJeff Roberson if ((kg->kg_runtime + kg->kg_slptime) > SCHED_SLP_RUN_MAX) { 8073f741ca1SJeff Roberson ratio = ((SCHED_SLP_RUN_MAX * 15) / (kg->kg_runtime + 8083f741ca1SJeff Roberson kg->kg_slptime )); 8093f741ca1SJeff Roberson kg->kg_runtime = (kg->kg_runtime * ratio) / 16; 8103f741ca1SJeff Roberson kg->kg_slptime = (kg->kg_slptime * ratio) / 16; 8114b60e324SJeff Roberson } 8124b60e324SJeff Roberson } 8134b60e324SJeff Roberson 814e1f89c22SJeff Roberson static int 815e1f89c22SJeff Roberson sched_interact_score(struct ksegrp *kg) 816e1f89c22SJeff Roberson { 817210491d3SJeff Roberson int div; 818e1f89c22SJeff Roberson 819e1f89c22SJeff Roberson if (kg->kg_runtime > kg->kg_slptime) { 820210491d3SJeff Roberson div = max(1, kg->kg_runtime / SCHED_INTERACT_HALF); 821210491d3SJeff Roberson return (SCHED_INTERACT_HALF + 822210491d3SJeff Roberson (SCHED_INTERACT_HALF - (kg->kg_slptime / div))); 823210491d3SJeff Roberson } if (kg->kg_slptime > kg->kg_runtime) { 824210491d3SJeff Roberson div = max(1, kg->kg_slptime / SCHED_INTERACT_HALF); 825210491d3SJeff Roberson return (kg->kg_runtime / div); 826e1f89c22SJeff Roberson } 827e1f89c22SJeff Roberson 828210491d3SJeff Roberson /* 829210491d3SJeff Roberson * This can happen if slptime and runtime are 0. 830210491d3SJeff Roberson */ 831210491d3SJeff Roberson return (0); 832e1f89c22SJeff Roberson 833e1f89c22SJeff Roberson } 834e1f89c22SJeff Roberson 83515dc847eSJeff Roberson /* 83615dc847eSJeff Roberson * This is only somewhat accurate since given many processes of the same 83715dc847eSJeff Roberson * priority they will switch when their slices run out, which will be 83815dc847eSJeff Roberson * at most SCHED_SLICE_MAX. 83915dc847eSJeff Roberson */ 84035e6168fSJeff Roberson int 84135e6168fSJeff Roberson sched_rr_interval(void) 84235e6168fSJeff Roberson { 84335e6168fSJeff Roberson return (SCHED_SLICE_MAX); 84435e6168fSJeff Roberson } 84535e6168fSJeff Roberson 84622bf7d9aSJeff Roberson static void 84735e6168fSJeff Roberson sched_pctcpu_update(struct kse *ke) 84835e6168fSJeff Roberson { 84935e6168fSJeff Roberson /* 85035e6168fSJeff Roberson * Adjust counters and watermark for pctcpu calc. 851210491d3SJeff Roberson */ 85281de51bfSJeff Roberson if (ke->ke_ltick > ticks - SCHED_CPU_TICKS) { 853210491d3SJeff Roberson /* 85481de51bfSJeff Roberson * Shift the tick count out so that the divide doesn't 85581de51bfSJeff Roberson * round away our results. 85665c8760dSJeff Roberson */ 85765c8760dSJeff Roberson ke->ke_ticks <<= 10; 85881de51bfSJeff Roberson ke->ke_ticks = (ke->ke_ticks / (ticks - ke->ke_ftick)) * 85935e6168fSJeff Roberson SCHED_CPU_TICKS; 86065c8760dSJeff Roberson ke->ke_ticks >>= 10; 86181de51bfSJeff Roberson } else 86281de51bfSJeff Roberson ke->ke_ticks = 0; 86335e6168fSJeff Roberson ke->ke_ltick = ticks; 86435e6168fSJeff Roberson ke->ke_ftick = ke->ke_ltick - SCHED_CPU_TICKS; 86535e6168fSJeff Roberson } 86635e6168fSJeff Roberson 86722bf7d9aSJeff Roberson #if 0 8685d7ef00cSJeff Roberson /* XXX Should be changed to kseq_load_lowest() */ 86935e6168fSJeff Roberson int 87035e6168fSJeff Roberson sched_pickcpu(void) 87135e6168fSJeff Roberson { 8720a016a05SJeff Roberson struct kseq *kseq; 87335e6168fSJeff Roberson int load; 8740a016a05SJeff Roberson int cpu; 87535e6168fSJeff Roberson int i; 87635e6168fSJeff Roberson 877b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 87835e6168fSJeff Roberson if (!smp_started) 87935e6168fSJeff Roberson return (0); 88035e6168fSJeff Roberson 8810a016a05SJeff Roberson load = 0; 8820a016a05SJeff Roberson cpu = 0; 88335e6168fSJeff Roberson 88435e6168fSJeff Roberson for (i = 0; i < mp_maxid; i++) { 8857a20304fSJeff Roberson if (CPU_ABSENT(i) || (i & stopped_cpus) != 0) 88635e6168fSJeff Roberson continue; 8870a016a05SJeff Roberson kseq = KSEQ_CPU(i); 88815dc847eSJeff Roberson if (kseq->ksq_load < load) { 88935e6168fSJeff Roberson cpu = i; 89015dc847eSJeff Roberson load = kseq->ksq_load; 89135e6168fSJeff Roberson } 89235e6168fSJeff Roberson } 89335e6168fSJeff Roberson 89435e6168fSJeff Roberson CTR1(KTR_RUNQ, "sched_pickcpu: %d", cpu); 89535e6168fSJeff Roberson return (cpu); 89635e6168fSJeff Roberson } 89735e6168fSJeff Roberson #endif 89835e6168fSJeff Roberson 89935e6168fSJeff Roberson void 90035e6168fSJeff Roberson sched_prio(struct thread *td, u_char prio) 90135e6168fSJeff Roberson { 9023f741ca1SJeff Roberson struct kse *ke; 90335e6168fSJeff Roberson 9043f741ca1SJeff Roberson ke = td->td_kse; 90535e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 90635e6168fSJeff Roberson if (TD_ON_RUNQ(td)) { 9073f741ca1SJeff Roberson /* 9083f741ca1SJeff Roberson * If the priority has been elevated due to priority 9093f741ca1SJeff Roberson * propagation, we may have to move ourselves to a new 9103f741ca1SJeff Roberson * queue. We still call adjustrunqueue below in case kse 9113f741ca1SJeff Roberson * needs to fix things up. 9123f741ca1SJeff Roberson */ 91322bf7d9aSJeff Roberson if (ke && (ke->ke_flags & KEF_ASSIGNED) == 0 && 91422bf7d9aSJeff Roberson ke->ke_runq != KSEQ_CPU(ke->ke_cpu)->ksq_curr) { 9153f741ca1SJeff Roberson runq_remove(ke->ke_runq, ke); 9163f741ca1SJeff Roberson ke->ke_runq = KSEQ_CPU(ke->ke_cpu)->ksq_curr; 9173f741ca1SJeff Roberson runq_add(ke->ke_runq, ke); 91835e6168fSJeff Roberson } 9193f741ca1SJeff Roberson adjustrunqueue(td, prio); 9203f741ca1SJeff Roberson } else 9213f741ca1SJeff Roberson td->td_priority = prio; 92235e6168fSJeff Roberson } 92335e6168fSJeff Roberson 92435e6168fSJeff Roberson void 925ae53b483SJeff Roberson sched_switch(struct thread *td) 92635e6168fSJeff Roberson { 927ae53b483SJeff Roberson struct thread *newtd; 92835e6168fSJeff Roberson struct kse *ke; 92935e6168fSJeff Roberson 93035e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 93135e6168fSJeff Roberson 93235e6168fSJeff Roberson ke = td->td_kse; 93335e6168fSJeff Roberson 93435e6168fSJeff Roberson td->td_last_kse = ke; 935060563ecSJulian Elischer td->td_lastcpu = td->td_oncpu; 936060563ecSJulian Elischer td->td_oncpu = NOCPU; 9374a338afdSJulian Elischer td->td_flags &= ~TDF_NEEDRESCHED; 93835e6168fSJeff Roberson 93935e6168fSJeff Roberson if (TD_IS_RUNNING(td)) { 940ab2baa72SDavid Xu if (td->td_proc->p_flag & P_SA) { 941ab2baa72SDavid Xu kseq_rem(KSEQ_CPU(ke->ke_cpu), ke); 942ab2baa72SDavid Xu setrunqueue(td); 943ab2baa72SDavid Xu } else { 944210491d3SJeff Roberson /* 9453f741ca1SJeff Roberson * This queue is always correct except for idle threads 9463f741ca1SJeff Roberson * which have a higher priority due to priority 9473f741ca1SJeff Roberson * propagation. 948210491d3SJeff Roberson */ 9493f741ca1SJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_IDLE) { 9503f741ca1SJeff Roberson if (td->td_priority < PRI_MIN_IDLE) 951210491d3SJeff Roberson ke->ke_runq = KSEQ_SELF()->ksq_curr; 9523f741ca1SJeff Roberson else 9533f741ca1SJeff Roberson ke->ke_runq = &KSEQ_SELF()->ksq_idle; 9543f741ca1SJeff Roberson } 95515dc847eSJeff Roberson runq_add(ke->ke_runq, ke); 95615dc847eSJeff Roberson /* setrunqueue(td); */ 957ab2baa72SDavid Xu } 9580e0f6266SJeff Roberson } else { 9590e0f6266SJeff Roberson if (ke->ke_runq) 96015dc847eSJeff Roberson kseq_rem(KSEQ_CPU(ke->ke_cpu), ke); 96135e6168fSJeff Roberson /* 96235e6168fSJeff Roberson * We will not be on the run queue. So we must be 96335e6168fSJeff Roberson * sleeping or similar. 96435e6168fSJeff Roberson */ 9650e2a4d3aSDavid Xu if (td->td_proc->p_flag & P_SA) 96635e6168fSJeff Roberson kse_reassign(ke); 9670e0f6266SJeff Roberson } 968ae53b483SJeff Roberson newtd = choosethread(); 969ae53b483SJeff Roberson if (td != newtd) 970ae53b483SJeff Roberson cpu_switch(td, newtd); 971ae53b483SJeff Roberson sched_lock.mtx_lock = (uintptr_t)td; 97235e6168fSJeff Roberson 973060563ecSJulian Elischer td->td_oncpu = PCPU_GET(cpuid); 97435e6168fSJeff Roberson } 97535e6168fSJeff Roberson 97635e6168fSJeff Roberson void 97735e6168fSJeff Roberson sched_nice(struct ksegrp *kg, int nice) 97835e6168fSJeff Roberson { 97915dc847eSJeff Roberson struct kse *ke; 98035e6168fSJeff Roberson struct thread *td; 98115dc847eSJeff Roberson struct kseq *kseq; 98235e6168fSJeff Roberson 9830b5318c8SJohn Baldwin PROC_LOCK_ASSERT(kg->kg_proc, MA_OWNED); 9840b5318c8SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 98515dc847eSJeff Roberson /* 98615dc847eSJeff Roberson * We need to adjust the nice counts for running KSEs. 98715dc847eSJeff Roberson */ 98815dc847eSJeff Roberson if (kg->kg_pri_class == PRI_TIMESHARE) 98915dc847eSJeff Roberson FOREACH_KSE_IN_GROUP(kg, ke) { 990d07ac847SJeff Roberson if (ke->ke_runq == NULL) 99115dc847eSJeff Roberson continue; 99215dc847eSJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 99315dc847eSJeff Roberson kseq_nice_rem(kseq, kg->kg_nice); 99415dc847eSJeff Roberson kseq_nice_add(kseq, nice); 99515dc847eSJeff Roberson } 99635e6168fSJeff Roberson kg->kg_nice = nice; 99735e6168fSJeff Roberson sched_priority(kg); 99815dc847eSJeff Roberson FOREACH_THREAD_IN_GROUP(kg, td) 9994a338afdSJulian Elischer td->td_flags |= TDF_NEEDRESCHED; 100035e6168fSJeff Roberson } 100135e6168fSJeff Roberson 100235e6168fSJeff Roberson void 100335e6168fSJeff Roberson sched_sleep(struct thread *td, u_char prio) 100435e6168fSJeff Roberson { 100535e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 100635e6168fSJeff Roberson 100735e6168fSJeff Roberson td->td_slptime = ticks; 100835e6168fSJeff Roberson td->td_priority = prio; 100935e6168fSJeff Roberson 101015dc847eSJeff Roberson CTR2(KTR_ULE, "sleep kse %p (tick: %d)", 101115dc847eSJeff Roberson td->td_kse, td->td_slptime); 101235e6168fSJeff Roberson } 101335e6168fSJeff Roberson 101435e6168fSJeff Roberson void 101535e6168fSJeff Roberson sched_wakeup(struct thread *td) 101635e6168fSJeff Roberson { 101735e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 101835e6168fSJeff Roberson 101935e6168fSJeff Roberson /* 102035e6168fSJeff Roberson * Let the kseg know how long we slept for. This is because process 102135e6168fSJeff Roberson * interactivity behavior is modeled in the kseg. 102235e6168fSJeff Roberson */ 102335e6168fSJeff Roberson if (td->td_slptime) { 1024f1e8dc4aSJeff Roberson struct ksegrp *kg; 102515dc847eSJeff Roberson int hzticks; 1026f1e8dc4aSJeff Roberson 1027f1e8dc4aSJeff Roberson kg = td->td_ksegrp; 102815dc847eSJeff Roberson hzticks = ticks - td->td_slptime; 102915dc847eSJeff Roberson kg->kg_slptime += hzticks << 10; 10304b60e324SJeff Roberson sched_interact_update(kg); 1031f1e8dc4aSJeff Roberson sched_priority(kg); 10324b60e324SJeff Roberson if (td->td_kse) 10334b60e324SJeff Roberson sched_slice(td->td_kse); 103415dc847eSJeff Roberson CTR2(KTR_ULE, "wakeup kse %p (%d ticks)", 103515dc847eSJeff Roberson td->td_kse, hzticks); 103635e6168fSJeff Roberson td->td_slptime = 0; 1037f1e8dc4aSJeff Roberson } 103835e6168fSJeff Roberson setrunqueue(td); 103935e6168fSJeff Roberson } 104035e6168fSJeff Roberson 104135e6168fSJeff Roberson /* 104235e6168fSJeff Roberson * Penalize the parent for creating a new child and initialize the child's 104335e6168fSJeff Roberson * priority. 104435e6168fSJeff Roberson */ 104535e6168fSJeff Roberson void 104615dc847eSJeff Roberson sched_fork(struct proc *p, struct proc *p1) 104735e6168fSJeff Roberson { 104835e6168fSJeff Roberson 104935e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 105035e6168fSJeff Roberson 105115dc847eSJeff Roberson sched_fork_ksegrp(FIRST_KSEGRP_IN_PROC(p), FIRST_KSEGRP_IN_PROC(p1)); 105215dc847eSJeff Roberson sched_fork_kse(FIRST_KSE_IN_PROC(p), FIRST_KSE_IN_PROC(p1)); 105315dc847eSJeff Roberson sched_fork_thread(FIRST_THREAD_IN_PROC(p), FIRST_THREAD_IN_PROC(p1)); 105415dc847eSJeff Roberson } 105515dc847eSJeff Roberson 105615dc847eSJeff Roberson void 105715dc847eSJeff Roberson sched_fork_kse(struct kse *ke, struct kse *child) 105815dc847eSJeff Roberson { 10592056d0a1SJohn Baldwin 1060210491d3SJeff Roberson child->ke_slice = 1; /* Attempt to quickly learn interactivity. */ 106115dc847eSJeff Roberson child->ke_cpu = ke->ke_cpu; /* sched_pickcpu(); */ 106215dc847eSJeff Roberson child->ke_runq = NULL; 106315dc847eSJeff Roberson 1064736c97c7SJeff Roberson /* Grab our parents cpu estimation information. */ 1065736c97c7SJeff Roberson child->ke_ticks = ke->ke_ticks; 1066736c97c7SJeff Roberson child->ke_ltick = ke->ke_ltick; 1067736c97c7SJeff Roberson child->ke_ftick = ke->ke_ftick; 106815dc847eSJeff Roberson } 106915dc847eSJeff Roberson 107015dc847eSJeff Roberson void 107115dc847eSJeff Roberson sched_fork_ksegrp(struct ksegrp *kg, struct ksegrp *child) 107215dc847eSJeff Roberson { 10732056d0a1SJohn Baldwin 10742056d0a1SJohn Baldwin PROC_LOCK_ASSERT(child->kg_proc, MA_OWNED); 107535e6168fSJeff Roberson /* XXX Need something better here */ 1076210491d3SJeff Roberson 1077a91172adSJeff Roberson child->kg_slptime = kg->kg_slptime / SCHED_SLP_RUN_THROTTLE; 1078a91172adSJeff Roberson child->kg_runtime = kg->kg_runtime / SCHED_SLP_RUN_THROTTLE; 10794b60e324SJeff Roberson kg->kg_runtime += tickincr << 10; 10804b60e324SJeff Roberson sched_interact_update(kg); 108115dc847eSJeff Roberson 108235e6168fSJeff Roberson child->kg_user_pri = kg->kg_user_pri; 108315dc847eSJeff Roberson child->kg_nice = kg->kg_nice; 1084c9f25d8fSJeff Roberson } 1085c9f25d8fSJeff Roberson 108615dc847eSJeff Roberson void 108715dc847eSJeff Roberson sched_fork_thread(struct thread *td, struct thread *child) 108815dc847eSJeff Roberson { 108915dc847eSJeff Roberson } 109015dc847eSJeff Roberson 109115dc847eSJeff Roberson void 109215dc847eSJeff Roberson sched_class(struct ksegrp *kg, int class) 109315dc847eSJeff Roberson { 109415dc847eSJeff Roberson struct kseq *kseq; 109515dc847eSJeff Roberson struct kse *ke; 109615dc847eSJeff Roberson 10972056d0a1SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 109815dc847eSJeff Roberson if (kg->kg_pri_class == class) 109915dc847eSJeff Roberson return; 110015dc847eSJeff Roberson 110115dc847eSJeff Roberson FOREACH_KSE_IN_GROUP(kg, ke) { 110215dc847eSJeff Roberson if (ke->ke_state != KES_ONRUNQ && 110315dc847eSJeff Roberson ke->ke_state != KES_THREAD) 110415dc847eSJeff Roberson continue; 110515dc847eSJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 110615dc847eSJeff Roberson 1107b5c4c4a7SJeff Roberson kseq->ksq_loads[PRI_BASE(kg->kg_pri_class)]--; 1108b5c4c4a7SJeff Roberson kseq->ksq_loads[PRI_BASE(class)]++; 110915dc847eSJeff Roberson 111015dc847eSJeff Roberson if (kg->kg_pri_class == PRI_TIMESHARE) 111115dc847eSJeff Roberson kseq_nice_rem(kseq, kg->kg_nice); 111215dc847eSJeff Roberson else if (class == PRI_TIMESHARE) 111315dc847eSJeff Roberson kseq_nice_add(kseq, kg->kg_nice); 111415dc847eSJeff Roberson } 111515dc847eSJeff Roberson 111615dc847eSJeff Roberson kg->kg_pri_class = class; 111735e6168fSJeff Roberson } 111835e6168fSJeff Roberson 111935e6168fSJeff Roberson /* 112035e6168fSJeff Roberson * Return some of the child's priority and interactivity to the parent. 112135e6168fSJeff Roberson */ 112235e6168fSJeff Roberson void 112315dc847eSJeff Roberson sched_exit(struct proc *p, struct proc *child) 112435e6168fSJeff Roberson { 112535e6168fSJeff Roberson /* XXX Need something better here */ 112635e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1127141ad61cSJeff Roberson sched_exit_kse(FIRST_KSE_IN_PROC(p), FIRST_KSE_IN_PROC(child)); 1128210491d3SJeff Roberson sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), FIRST_KSEGRP_IN_PROC(child)); 1129141ad61cSJeff Roberson } 1130141ad61cSJeff Roberson 1131141ad61cSJeff Roberson void 1132141ad61cSJeff Roberson sched_exit_kse(struct kse *ke, struct kse *child) 1133141ad61cSJeff Roberson { 1134141ad61cSJeff Roberson kseq_rem(KSEQ_CPU(child->ke_cpu), child); 1135141ad61cSJeff Roberson } 1136141ad61cSJeff Roberson 1137141ad61cSJeff Roberson void 1138141ad61cSJeff Roberson sched_exit_ksegrp(struct ksegrp *kg, struct ksegrp *child) 1139141ad61cSJeff Roberson { 11404b60e324SJeff Roberson /* kg->kg_slptime += child->kg_slptime; */ 1141210491d3SJeff Roberson kg->kg_runtime += child->kg_runtime; 11424b60e324SJeff Roberson sched_interact_update(kg); 1143141ad61cSJeff Roberson } 1144141ad61cSJeff Roberson 1145141ad61cSJeff Roberson void 1146141ad61cSJeff Roberson sched_exit_thread(struct thread *td, struct thread *child) 1147141ad61cSJeff Roberson { 114835e6168fSJeff Roberson } 114935e6168fSJeff Roberson 115035e6168fSJeff Roberson void 11517cf90fb3SJeff Roberson sched_clock(struct thread *td) 115235e6168fSJeff Roberson { 115335e6168fSJeff Roberson struct kseq *kseq; 11540a016a05SJeff Roberson struct ksegrp *kg; 11557cf90fb3SJeff Roberson struct kse *ke; 115635e6168fSJeff Roberson 115715dc847eSJeff Roberson /* 115815dc847eSJeff Roberson * sched_setup() apparently happens prior to stathz being set. We 115915dc847eSJeff Roberson * need to resolve the timers earlier in the boot so we can avoid 116015dc847eSJeff Roberson * calculating this here. 116115dc847eSJeff Roberson */ 116215dc847eSJeff Roberson if (realstathz == 0) { 116315dc847eSJeff Roberson realstathz = stathz ? stathz : hz; 116415dc847eSJeff Roberson tickincr = hz / realstathz; 116515dc847eSJeff Roberson /* 116615dc847eSJeff Roberson * XXX This does not work for values of stathz that are much 116715dc847eSJeff Roberson * larger than hz. 116815dc847eSJeff Roberson */ 116915dc847eSJeff Roberson if (tickincr == 0) 117015dc847eSJeff Roberson tickincr = 1; 117115dc847eSJeff Roberson } 117235e6168fSJeff Roberson 11737cf90fb3SJeff Roberson ke = td->td_kse; 117415dc847eSJeff Roberson kg = ke->ke_ksegrp; 117535e6168fSJeff Roberson 11760a016a05SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 11770a016a05SJeff Roberson KASSERT((td != NULL), ("schedclock: null thread pointer")); 11780a016a05SJeff Roberson 11790a016a05SJeff Roberson /* Adjust ticks for pctcpu */ 118065c8760dSJeff Roberson ke->ke_ticks++; 1181d465fb95SJeff Roberson ke->ke_ltick = ticks; 1182a8949de2SJeff Roberson 1183d465fb95SJeff Roberson /* Go up to one second beyond our max and then trim back down */ 1184d465fb95SJeff Roberson if (ke->ke_ftick + SCHED_CPU_TICKS + hz < ke->ke_ltick) 1185d465fb95SJeff Roberson sched_pctcpu_update(ke); 1186d465fb95SJeff Roberson 118743fdafb1SJulian Elischer if (td->td_flags & TDF_IDLETD) 118835e6168fSJeff Roberson return; 11890a016a05SJeff Roberson 119015dc847eSJeff Roberson CTR4(KTR_ULE, "Tick kse %p (slice: %d, slptime: %d, runtime: %d)", 119115dc847eSJeff Roberson ke, ke->ke_slice, kg->kg_slptime >> 10, kg->kg_runtime >> 10); 11923f741ca1SJeff Roberson /* 1193a8949de2SJeff Roberson * We only do slicing code for TIMESHARE ksegrps. 1194a8949de2SJeff Roberson */ 1195a8949de2SJeff Roberson if (kg->kg_pri_class != PRI_TIMESHARE) 1196a8949de2SJeff Roberson return; 1197a8949de2SJeff Roberson /* 119815dc847eSJeff Roberson * We used a tick charge it to the ksegrp so that we can compute our 119915dc847eSJeff Roberson * interactivity. 120015dc847eSJeff Roberson */ 120115dc847eSJeff Roberson kg->kg_runtime += tickincr << 10; 12024b60e324SJeff Roberson sched_interact_update(kg); 1203407b0157SJeff Roberson 120435e6168fSJeff Roberson /* 120535e6168fSJeff Roberson * We used up one time slice. 120635e6168fSJeff Roberson */ 120735e6168fSJeff Roberson ke->ke_slice--; 12083f741ca1SJeff Roberson kseq = KSEQ_SELF(); 120915dc847eSJeff Roberson #ifdef SMP 1210c36ccfa2SJeff Roberson kseq->ksq_rslices--; 121115dc847eSJeff Roberson #endif 121215dc847eSJeff Roberson 121315dc847eSJeff Roberson if (ke->ke_slice > 0) 121415dc847eSJeff Roberson return; 121535e6168fSJeff Roberson /* 121615dc847eSJeff Roberson * We're out of time, recompute priorities and requeue. 121735e6168fSJeff Roberson */ 121815dc847eSJeff Roberson kseq_rem(kseq, ke); 1219e1f89c22SJeff Roberson sched_priority(kg); 122015dc847eSJeff Roberson sched_slice(ke); 122115dc847eSJeff Roberson if (SCHED_CURR(kg, ke)) 122215dc847eSJeff Roberson ke->ke_runq = kseq->ksq_curr; 122315dc847eSJeff Roberson else 122415dc847eSJeff Roberson ke->ke_runq = kseq->ksq_next; 122515dc847eSJeff Roberson kseq_add(kseq, ke); 12264a338afdSJulian Elischer td->td_flags |= TDF_NEEDRESCHED; 122735e6168fSJeff Roberson } 122835e6168fSJeff Roberson 122935e6168fSJeff Roberson int 123035e6168fSJeff Roberson sched_runnable(void) 123135e6168fSJeff Roberson { 123235e6168fSJeff Roberson struct kseq *kseq; 1233b90816f1SJeff Roberson int load; 123435e6168fSJeff Roberson 1235b90816f1SJeff Roberson load = 1; 1236b90816f1SJeff Roberson 1237b90816f1SJeff Roberson mtx_lock_spin(&sched_lock); 12380a016a05SJeff Roberson kseq = KSEQ_SELF(); 123922bf7d9aSJeff Roberson #ifdef SMP 124022bf7d9aSJeff Roberson if (kseq->ksq_assigned) 124122bf7d9aSJeff Roberson kseq_assign(kseq); 124222bf7d9aSJeff Roberson #endif 12433f741ca1SJeff Roberson if ((curthread->td_flags & TDF_IDLETD) != 0) { 12443f741ca1SJeff Roberson if (kseq->ksq_load > 0) 12453f741ca1SJeff Roberson goto out; 12463f741ca1SJeff Roberson } else 12473f741ca1SJeff Roberson if (kseq->ksq_load - 1 > 0) 1248b90816f1SJeff Roberson goto out; 1249b90816f1SJeff Roberson load = 0; 1250b90816f1SJeff Roberson out: 1251b90816f1SJeff Roberson mtx_unlock_spin(&sched_lock); 1252b90816f1SJeff Roberson return (load); 125335e6168fSJeff Roberson } 125435e6168fSJeff Roberson 125535e6168fSJeff Roberson void 125635e6168fSJeff Roberson sched_userret(struct thread *td) 125735e6168fSJeff Roberson { 125835e6168fSJeff Roberson struct ksegrp *kg; 125935e6168fSJeff Roberson 126035e6168fSJeff Roberson kg = td->td_ksegrp; 126135e6168fSJeff Roberson 126235e6168fSJeff Roberson if (td->td_priority != kg->kg_user_pri) { 126335e6168fSJeff Roberson mtx_lock_spin(&sched_lock); 126435e6168fSJeff Roberson td->td_priority = kg->kg_user_pri; 126535e6168fSJeff Roberson mtx_unlock_spin(&sched_lock); 126635e6168fSJeff Roberson } 126735e6168fSJeff Roberson } 126835e6168fSJeff Roberson 1269c9f25d8fSJeff Roberson struct kse * 1270c9f25d8fSJeff Roberson sched_choose(void) 1271c9f25d8fSJeff Roberson { 12720a016a05SJeff Roberson struct kseq *kseq; 1273c9f25d8fSJeff Roberson struct kse *ke; 127415dc847eSJeff Roberson 1275b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 127622bf7d9aSJeff Roberson kseq = KSEQ_SELF(); 127715dc847eSJeff Roberson #ifdef SMP 1278245f3abfSJeff Roberson retry: 127922bf7d9aSJeff Roberson if (kseq->ksq_assigned) 128022bf7d9aSJeff Roberson kseq_assign(kseq); 128115dc847eSJeff Roberson #endif 128222bf7d9aSJeff Roberson ke = kseq_choose(kseq); 128335e6168fSJeff Roberson if (ke) { 128422bf7d9aSJeff Roberson #ifdef SMP 128522bf7d9aSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_IDLE) 128622bf7d9aSJeff Roberson if (kseq_find()) 128722bf7d9aSJeff Roberson goto retry; 128822bf7d9aSJeff Roberson #endif 128915dc847eSJeff Roberson runq_remove(ke->ke_runq, ke); 129035e6168fSJeff Roberson ke->ke_state = KES_THREAD; 1291245f3abfSJeff Roberson 129215dc847eSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) { 129315dc847eSJeff Roberson CTR4(KTR_ULE, "Run kse %p from %p (slice: %d, pri: %d)", 129415dc847eSJeff Roberson ke, ke->ke_runq, ke->ke_slice, 129515dc847eSJeff Roberson ke->ke_thread->td_priority); 1296245f3abfSJeff Roberson } 129715dc847eSJeff Roberson return (ke); 129835e6168fSJeff Roberson } 1299c9f25d8fSJeff Roberson #ifdef SMP 130022bf7d9aSJeff Roberson if (kseq_find()) 130115dc847eSJeff Roberson goto retry; 1302c9f25d8fSJeff Roberson #endif 130315dc847eSJeff Roberson 130415dc847eSJeff Roberson return (NULL); 130535e6168fSJeff Roberson } 130635e6168fSJeff Roberson 130735e6168fSJeff Roberson void 13087cf90fb3SJeff Roberson sched_add(struct thread *td) 130935e6168fSJeff Roberson { 1310c9f25d8fSJeff Roberson struct kseq *kseq; 131115dc847eSJeff Roberson struct ksegrp *kg; 13127cf90fb3SJeff Roberson struct kse *ke; 131322bf7d9aSJeff Roberson int class; 1314c9f25d8fSJeff Roberson 131522bf7d9aSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 13167cf90fb3SJeff Roberson ke = td->td_kse; 13177cf90fb3SJeff Roberson kg = td->td_ksegrp; 131822bf7d9aSJeff Roberson if (ke->ke_flags & KEF_ASSIGNED) 131922bf7d9aSJeff Roberson return; 132022bf7d9aSJeff Roberson kseq = KSEQ_SELF(); 13215d7ef00cSJeff Roberson KASSERT((ke->ke_thread != NULL), ("sched_add: No thread on KSE")); 13225d7ef00cSJeff Roberson KASSERT((ke->ke_thread->td_kse != NULL), 13235d7ef00cSJeff Roberson ("sched_add: No KSE on thread")); 13245d7ef00cSJeff Roberson KASSERT(ke->ke_state != KES_ONRUNQ, 13255d7ef00cSJeff Roberson ("sched_add: kse %p (%s) already in run queue", ke, 13265d7ef00cSJeff Roberson ke->ke_proc->p_comm)); 13275d7ef00cSJeff Roberson KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 13285d7ef00cSJeff Roberson ("sched_add: process swapped out")); 13299bca28a7SJeff Roberson KASSERT(ke->ke_runq == NULL, 13309bca28a7SJeff Roberson ("sched_add: KSE %p is still assigned to a run queue", ke)); 13315d7ef00cSJeff Roberson 133222bf7d9aSJeff Roberson class = PRI_BASE(kg->kg_pri_class); 133322bf7d9aSJeff Roberson switch (class) { 1334a8949de2SJeff Roberson case PRI_ITHD: 1335a8949de2SJeff Roberson case PRI_REALTIME: 133615dc847eSJeff Roberson ke->ke_runq = kseq->ksq_curr; 133715dc847eSJeff Roberson ke->ke_slice = SCHED_SLICE_MAX; 13387cd650a9SJeff Roberson ke->ke_cpu = PCPU_GET(cpuid); 1339a8949de2SJeff Roberson break; 1340a8949de2SJeff Roberson case PRI_TIMESHARE: 134122bf7d9aSJeff Roberson #ifdef SMP 134222bf7d9aSJeff Roberson if (ke->ke_cpu != PCPU_GET(cpuid)) { 134322bf7d9aSJeff Roberson kseq_notify(ke, ke->ke_cpu); 134422bf7d9aSJeff Roberson return; 134522bf7d9aSJeff Roberson } 134622bf7d9aSJeff Roberson #endif 134715dc847eSJeff Roberson if (SCHED_CURR(kg, ke)) 134815dc847eSJeff Roberson ke->ke_runq = kseq->ksq_curr; 134915dc847eSJeff Roberson else 135015dc847eSJeff Roberson ke->ke_runq = kseq->ksq_next; 135115dc847eSJeff Roberson break; 135215dc847eSJeff Roberson case PRI_IDLE: 135322bf7d9aSJeff Roberson #ifdef SMP 135422bf7d9aSJeff Roberson if (ke->ke_cpu != PCPU_GET(cpuid)) { 135522bf7d9aSJeff Roberson kseq_notify(ke, ke->ke_cpu); 135622bf7d9aSJeff Roberson return; 135722bf7d9aSJeff Roberson } 135822bf7d9aSJeff Roberson #endif 135915dc847eSJeff Roberson /* 136015dc847eSJeff Roberson * This is for priority prop. 136115dc847eSJeff Roberson */ 13623f741ca1SJeff Roberson if (ke->ke_thread->td_priority < PRI_MIN_IDLE) 136315dc847eSJeff Roberson ke->ke_runq = kseq->ksq_curr; 136415dc847eSJeff Roberson else 136515dc847eSJeff Roberson ke->ke_runq = &kseq->ksq_idle; 136615dc847eSJeff Roberson ke->ke_slice = SCHED_SLICE_MIN; 136715dc847eSJeff Roberson break; 136815dc847eSJeff Roberson default: 136915dc847eSJeff Roberson panic("Unknown pri class.\n"); 1370a8949de2SJeff Roberson break; 1371a6ed4186SJeff Roberson } 137222bf7d9aSJeff Roberson #ifdef SMP 137322bf7d9aSJeff Roberson /* 137422bf7d9aSJeff Roberson * If there are any idle processors, give them our extra load. 137522bf7d9aSJeff Roberson */ 137622bf7d9aSJeff Roberson if (kseq_idle && class != PRI_ITHD && 137722bf7d9aSJeff Roberson (kseq->ksq_loads[PRI_IDLE] + kseq->ksq_loads[PRI_TIMESHARE] + 137822bf7d9aSJeff Roberson kseq->ksq_loads[PRI_REALTIME]) >= kseq->ksq_cpus) { 137922bf7d9aSJeff Roberson int cpu; 138022bf7d9aSJeff Roberson 138122bf7d9aSJeff Roberson /* 138222bf7d9aSJeff Roberson * Multiple cpus could find this bit simultaneously but the 138322bf7d9aSJeff Roberson * race shouldn't be terrible. 138422bf7d9aSJeff Roberson */ 138522bf7d9aSJeff Roberson cpu = ffs(kseq_idle); 138622bf7d9aSJeff Roberson if (cpu) { 138722bf7d9aSJeff Roberson cpu--; 138822bf7d9aSJeff Roberson atomic_clear_int(&kseq_idle, 1 << cpu); 138922bf7d9aSJeff Roberson ke->ke_cpu = cpu; 139022bf7d9aSJeff Roberson ke->ke_runq = NULL; 139122bf7d9aSJeff Roberson kseq_notify(ke, cpu); 139222bf7d9aSJeff Roberson return; 139322bf7d9aSJeff Roberson } 139422bf7d9aSJeff Roberson } 139522bf7d9aSJeff Roberson if (class == PRI_TIMESHARE || class == PRI_REALTIME) 139622bf7d9aSJeff Roberson atomic_clear_int(&kseq_idle, PCPU_GET(cpumask)); 139722bf7d9aSJeff Roberson #endif 139822bf7d9aSJeff Roberson if (td->td_priority < curthread->td_priority) 139922bf7d9aSJeff Roberson curthread->td_flags |= TDF_NEEDRESCHED; 1400a8949de2SJeff Roberson 140135e6168fSJeff Roberson ke->ke_ksegrp->kg_runq_kses++; 140235e6168fSJeff Roberson ke->ke_state = KES_ONRUNQ; 140335e6168fSJeff Roberson 140415dc847eSJeff Roberson runq_add(ke->ke_runq, ke); 14059bca28a7SJeff Roberson kseq_add(kseq, ke); 140635e6168fSJeff Roberson } 140735e6168fSJeff Roberson 140835e6168fSJeff Roberson void 14097cf90fb3SJeff Roberson sched_rem(struct thread *td) 141035e6168fSJeff Roberson { 141115dc847eSJeff Roberson struct kseq *kseq; 14127cf90fb3SJeff Roberson struct kse *ke; 14137cf90fb3SJeff Roberson 14147cf90fb3SJeff Roberson ke = td->td_kse; 141522bf7d9aSJeff Roberson /* 141622bf7d9aSJeff Roberson * It is safe to just return here because sched_rem() is only ever 141722bf7d9aSJeff Roberson * used in places where we're immediately going to add the 141822bf7d9aSJeff Roberson * kse back on again. In that case it'll be added with the correct 141922bf7d9aSJeff Roberson * thread and priority when the caller drops the sched_lock. 142022bf7d9aSJeff Roberson */ 142122bf7d9aSJeff Roberson if (ke->ke_flags & KEF_ASSIGNED) 142222bf7d9aSJeff Roberson return; 142335e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 14249bca28a7SJeff Roberson KASSERT((ke->ke_state == KES_ONRUNQ), ("KSE not on run queue")); 142535e6168fSJeff Roberson 142635e6168fSJeff Roberson ke->ke_state = KES_THREAD; 142735e6168fSJeff Roberson ke->ke_ksegrp->kg_runq_kses--; 142815dc847eSJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 142915dc847eSJeff Roberson runq_remove(ke->ke_runq, ke); 143015dc847eSJeff Roberson kseq_rem(kseq, ke); 143135e6168fSJeff Roberson } 143235e6168fSJeff Roberson 143335e6168fSJeff Roberson fixpt_t 14347cf90fb3SJeff Roberson sched_pctcpu(struct thread *td) 143535e6168fSJeff Roberson { 143635e6168fSJeff Roberson fixpt_t pctcpu; 14377cf90fb3SJeff Roberson struct kse *ke; 143835e6168fSJeff Roberson 143935e6168fSJeff Roberson pctcpu = 0; 14407cf90fb3SJeff Roberson ke = td->td_kse; 1441484288deSJeff Roberson if (ke == NULL) 1442484288deSJeff Roberson return (0); 144335e6168fSJeff Roberson 1444b90816f1SJeff Roberson mtx_lock_spin(&sched_lock); 144535e6168fSJeff Roberson if (ke->ke_ticks) { 144635e6168fSJeff Roberson int rtick; 144735e6168fSJeff Roberson 1448210491d3SJeff Roberson /* 1449210491d3SJeff Roberson * Don't update more frequently than twice a second. Allowing 1450210491d3SJeff Roberson * this causes the cpu usage to decay away too quickly due to 1451210491d3SJeff Roberson * rounding errors. 1452210491d3SJeff Roberson */ 1453210491d3SJeff Roberson if (ke->ke_ltick < (ticks - (hz / 2))) 145435e6168fSJeff Roberson sched_pctcpu_update(ke); 145535e6168fSJeff Roberson /* How many rtick per second ? */ 1456210491d3SJeff Roberson rtick = min(ke->ke_ticks / SCHED_CPU_TIME, SCHED_CPU_TICKS); 14577121cce5SScott Long pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT; 145835e6168fSJeff Roberson } 145935e6168fSJeff Roberson 146035e6168fSJeff Roberson ke->ke_proc->p_swtime = ke->ke_ltick - ke->ke_ftick; 1461828e7683SJohn Baldwin mtx_unlock_spin(&sched_lock); 146235e6168fSJeff Roberson 146335e6168fSJeff Roberson return (pctcpu); 146435e6168fSJeff Roberson } 146535e6168fSJeff Roberson 146635e6168fSJeff Roberson int 146735e6168fSJeff Roberson sched_sizeof_kse(void) 146835e6168fSJeff Roberson { 146935e6168fSJeff Roberson return (sizeof(struct kse) + sizeof(struct ke_sched)); 147035e6168fSJeff Roberson } 147135e6168fSJeff Roberson 147235e6168fSJeff Roberson int 147335e6168fSJeff Roberson sched_sizeof_ksegrp(void) 147435e6168fSJeff Roberson { 147535e6168fSJeff Roberson return (sizeof(struct ksegrp) + sizeof(struct kg_sched)); 147635e6168fSJeff Roberson } 147735e6168fSJeff Roberson 147835e6168fSJeff Roberson int 147935e6168fSJeff Roberson sched_sizeof_proc(void) 148035e6168fSJeff Roberson { 148135e6168fSJeff Roberson return (sizeof(struct proc)); 148235e6168fSJeff Roberson } 148335e6168fSJeff Roberson 148435e6168fSJeff Roberson int 148535e6168fSJeff Roberson sched_sizeof_thread(void) 148635e6168fSJeff Roberson { 148735e6168fSJeff Roberson return (sizeof(struct thread) + sizeof(struct td_sched)); 148835e6168fSJeff Roberson } 1489