135e6168fSJeff Roberson /*- 215dc847eSJeff Roberson * Copyright (c) 2002-2003, Jeffrey Roberson <jeff@freebsd.org> 335e6168fSJeff Roberson * All rights reserved. 435e6168fSJeff Roberson * 535e6168fSJeff Roberson * Redistribution and use in source and binary forms, with or without 635e6168fSJeff Roberson * modification, are permitted provided that the following conditions 735e6168fSJeff Roberson * are met: 835e6168fSJeff Roberson * 1. Redistributions of source code must retain the above copyright 935e6168fSJeff Roberson * notice unmodified, this list of conditions, and the following 1035e6168fSJeff Roberson * disclaimer. 1135e6168fSJeff Roberson * 2. Redistributions in binary form must reproduce the above copyright 1235e6168fSJeff Roberson * notice, this list of conditions and the following disclaimer in the 1335e6168fSJeff Roberson * documentation and/or other materials provided with the distribution. 1435e6168fSJeff Roberson * 1535e6168fSJeff Roberson * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 1635e6168fSJeff Roberson * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1735e6168fSJeff Roberson * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 1835e6168fSJeff Roberson * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 1935e6168fSJeff Roberson * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2035e6168fSJeff Roberson * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2135e6168fSJeff Roberson * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2235e6168fSJeff Roberson * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2335e6168fSJeff Roberson * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2435e6168fSJeff Roberson * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2535e6168fSJeff Roberson */ 2635e6168fSJeff Roberson 27677b542eSDavid E. O'Brien #include <sys/cdefs.h> 28677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 29677b542eSDavid E. O'Brien 3035e6168fSJeff Roberson #include <sys/param.h> 3135e6168fSJeff Roberson #include <sys/systm.h> 3235e6168fSJeff Roberson #include <sys/kernel.h> 3335e6168fSJeff Roberson #include <sys/ktr.h> 3435e6168fSJeff Roberson #include <sys/lock.h> 3535e6168fSJeff Roberson #include <sys/mutex.h> 3635e6168fSJeff Roberson #include <sys/proc.h> 37245f3abfSJeff Roberson #include <sys/resource.h> 389bacd788SJeff Roberson #include <sys/resourcevar.h> 3935e6168fSJeff Roberson #include <sys/sched.h> 4035e6168fSJeff Roberson #include <sys/smp.h> 4135e6168fSJeff Roberson #include <sys/sx.h> 4235e6168fSJeff Roberson #include <sys/sysctl.h> 4335e6168fSJeff Roberson #include <sys/sysproto.h> 4435e6168fSJeff Roberson #include <sys/vmmeter.h> 4535e6168fSJeff Roberson #ifdef DDB 4635e6168fSJeff Roberson #include <ddb/ddb.h> 4735e6168fSJeff Roberson #endif 4835e6168fSJeff Roberson #ifdef KTRACE 4935e6168fSJeff Roberson #include <sys/uio.h> 5035e6168fSJeff Roberson #include <sys/ktrace.h> 5135e6168fSJeff Roberson #endif 5235e6168fSJeff Roberson 5335e6168fSJeff Roberson #include <machine/cpu.h> 5422bf7d9aSJeff Roberson #include <machine/smp.h> 5535e6168fSJeff Roberson 5615dc847eSJeff Roberson #define KTR_ULE KTR_NFS 5715dc847eSJeff Roberson 5835e6168fSJeff Roberson /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 5935e6168fSJeff Roberson /* XXX This is bogus compatability crap for ps */ 6035e6168fSJeff Roberson static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 6135e6168fSJeff Roberson SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 6235e6168fSJeff Roberson 6335e6168fSJeff Roberson static void sched_setup(void *dummy); 6435e6168fSJeff Roberson SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 6535e6168fSJeff Roberson 6615dc847eSJeff Roberson static SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0, "SCHED"); 67e1f89c22SJeff Roberson 6815dc847eSJeff Roberson static int slice_min = 1; 6915dc847eSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, slice_min, CTLFLAG_RW, &slice_min, 0, ""); 7015dc847eSJeff Roberson 71210491d3SJeff Roberson static int slice_max = 10; 7215dc847eSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, slice_max, CTLFLAG_RW, &slice_max, 0, ""); 7315dc847eSJeff Roberson 7415dc847eSJeff Roberson int realstathz; 7515dc847eSJeff Roberson int tickincr = 1; 76783caefbSJeff Roberson 7735e6168fSJeff Roberson /* 7835e6168fSJeff Roberson * These datastructures are allocated within their parent datastructure but 7935e6168fSJeff Roberson * are scheduler specific. 8035e6168fSJeff Roberson */ 8135e6168fSJeff Roberson 8235e6168fSJeff Roberson struct ke_sched { 8335e6168fSJeff Roberson int ske_slice; 8435e6168fSJeff Roberson struct runq *ske_runq; 8535e6168fSJeff Roberson /* The following variables are only used for pctcpu calculation */ 8635e6168fSJeff Roberson int ske_ltick; /* Last tick that we were running on */ 8735e6168fSJeff Roberson int ske_ftick; /* First tick that we were running on */ 8835e6168fSJeff Roberson int ske_ticks; /* Tick count */ 8915dc847eSJeff Roberson /* CPU that we have affinity for. */ 90cd6e33dfSJeff Roberson u_char ske_cpu; 9135e6168fSJeff Roberson }; 9235e6168fSJeff Roberson #define ke_slice ke_sched->ske_slice 9335e6168fSJeff Roberson #define ke_runq ke_sched->ske_runq 9435e6168fSJeff Roberson #define ke_ltick ke_sched->ske_ltick 9535e6168fSJeff Roberson #define ke_ftick ke_sched->ske_ftick 9635e6168fSJeff Roberson #define ke_ticks ke_sched->ske_ticks 97cd6e33dfSJeff Roberson #define ke_cpu ke_sched->ske_cpu 9822bf7d9aSJeff Roberson #define ke_assign ke_procq.tqe_next 9922bf7d9aSJeff Roberson 10022bf7d9aSJeff Roberson #define KEF_ASSIGNED KEF_SCHED0 /* KSE is being migrated. */ 101a70d729bSJeff Roberson #define KEF_BOUND KEF_SCHED1 /* KSE can not migrate. */ 10235e6168fSJeff Roberson 10335e6168fSJeff Roberson struct kg_sched { 104407b0157SJeff Roberson int skg_slptime; /* Number of ticks we vol. slept */ 105407b0157SJeff Roberson int skg_runtime; /* Number of ticks we were running */ 10635e6168fSJeff Roberson }; 10735e6168fSJeff Roberson #define kg_slptime kg_sched->skg_slptime 108407b0157SJeff Roberson #define kg_runtime kg_sched->skg_runtime 10935e6168fSJeff Roberson 11035e6168fSJeff Roberson struct td_sched { 11135e6168fSJeff Roberson int std_slptime; 11235e6168fSJeff Roberson }; 11335e6168fSJeff Roberson #define td_slptime td_sched->std_slptime 11435e6168fSJeff Roberson 1155d7ef00cSJeff Roberson struct td_sched td_sched; 11635e6168fSJeff Roberson struct ke_sched ke_sched; 11735e6168fSJeff Roberson struct kg_sched kg_sched; 11835e6168fSJeff Roberson 11935e6168fSJeff Roberson struct ke_sched *kse0_sched = &ke_sched; 12035e6168fSJeff Roberson struct kg_sched *ksegrp0_sched = &kg_sched; 12135e6168fSJeff Roberson struct p_sched *proc0_sched = NULL; 12235e6168fSJeff Roberson struct td_sched *thread0_sched = &td_sched; 12335e6168fSJeff Roberson 12435e6168fSJeff Roberson /* 125665cb285SJeff Roberson * The priority is primarily determined by the interactivity score. Thus, we 126665cb285SJeff Roberson * give lower(better) priorities to kse groups that use less CPU. The nice 127665cb285SJeff Roberson * value is then directly added to this to allow nice to have some effect 128665cb285SJeff Roberson * on latency. 129e1f89c22SJeff Roberson * 130e1f89c22SJeff Roberson * PRI_RANGE: Total priority range for timeshare threads. 131665cb285SJeff Roberson * PRI_NRESV: Number of nice values. 132e1f89c22SJeff Roberson * PRI_BASE: The start of the dynamic range. 13335e6168fSJeff Roberson */ 134407b0157SJeff Roberson #define SCHED_PRI_RANGE (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1) 135a0a931ceSJeff Roberson #define SCHED_PRI_NRESV ((PRIO_MAX - PRIO_MIN) + 1) 136a0a931ceSJeff Roberson #define SCHED_PRI_NHALF (SCHED_PRI_NRESV / 2) 137665cb285SJeff Roberson #define SCHED_PRI_BASE (PRI_MIN_TIMESHARE) 13815dc847eSJeff Roberson #define SCHED_PRI_INTERACT(score) \ 139665cb285SJeff Roberson ((score) * SCHED_PRI_RANGE / SCHED_INTERACT_MAX) 14035e6168fSJeff Roberson 14135e6168fSJeff Roberson /* 142e1f89c22SJeff Roberson * These determine the interactivity of a process. 14335e6168fSJeff Roberson * 144407b0157SJeff Roberson * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate 145407b0157SJeff Roberson * before throttling back. 146d322132cSJeff Roberson * SLP_RUN_FORK: Maximum slp+run time to inherit at fork time. 147210491d3SJeff Roberson * INTERACT_MAX: Maximum interactivity value. Smaller is better. 148e1f89c22SJeff Roberson * INTERACT_THRESH: Threshhold for placement on the current runq. 14935e6168fSJeff Roberson */ 1504c9612c6SJeff Roberson #define SCHED_SLP_RUN_MAX ((hz * 5) << 10) 151d322132cSJeff Roberson #define SCHED_SLP_RUN_FORK ((hz / 2) << 10) 152210491d3SJeff Roberson #define SCHED_INTERACT_MAX (100) 153210491d3SJeff Roberson #define SCHED_INTERACT_HALF (SCHED_INTERACT_MAX / 2) 1544c9612c6SJeff Roberson #define SCHED_INTERACT_THRESH (30) 155e1f89c22SJeff Roberson 15635e6168fSJeff Roberson /* 15735e6168fSJeff Roberson * These parameters and macros determine the size of the time slice that is 15835e6168fSJeff Roberson * granted to each thread. 15935e6168fSJeff Roberson * 16035e6168fSJeff Roberson * SLICE_MIN: Minimum time slice granted, in units of ticks. 16135e6168fSJeff Roberson * SLICE_MAX: Maximum time slice granted. 16235e6168fSJeff Roberson * SLICE_RANGE: Range of available time slices scaled by hz. 163245f3abfSJeff Roberson * SLICE_SCALE: The number slices granted per val in the range of [0, max]. 164245f3abfSJeff Roberson * SLICE_NICE: Determine the amount of slice granted to a scaled nice. 1657d1a81b4SJeff Roberson * SLICE_NTHRESH: The nice cutoff point for slice assignment. 16635e6168fSJeff Roberson */ 16715dc847eSJeff Roberson #define SCHED_SLICE_MIN (slice_min) 16815dc847eSJeff Roberson #define SCHED_SLICE_MAX (slice_max) 1690392e39dSJeff Roberson #define SCHED_SLICE_INTERACTIVE (slice_max) 1707d1a81b4SJeff Roberson #define SCHED_SLICE_NTHRESH (SCHED_PRI_NHALF - 1) 17135e6168fSJeff Roberson #define SCHED_SLICE_RANGE (SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1) 17235e6168fSJeff Roberson #define SCHED_SLICE_SCALE(val, max) (((val) * SCHED_SLICE_RANGE) / (max)) 173245f3abfSJeff Roberson #define SCHED_SLICE_NICE(nice) \ 1747d1a81b4SJeff Roberson (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((nice), SCHED_SLICE_NTHRESH)) 17535e6168fSJeff Roberson 17635e6168fSJeff Roberson /* 17735e6168fSJeff Roberson * This macro determines whether or not the kse belongs on the current or 17835e6168fSJeff Roberson * next run queue. 17935e6168fSJeff Roberson */ 18015dc847eSJeff Roberson #define SCHED_INTERACTIVE(kg) \ 18115dc847eSJeff Roberson (sched_interact_score(kg) < SCHED_INTERACT_THRESH) 182a5f099d0SJeff Roberson #define SCHED_CURR(kg, ke) \ 183b003da79SDavid E. O'Brien (ke->ke_thread->td_priority < kg->kg_user_pri || \ 18408fd6713SJeff Roberson SCHED_INTERACTIVE(kg)) 18535e6168fSJeff Roberson 18635e6168fSJeff Roberson /* 18735e6168fSJeff Roberson * Cpu percentage computation macros and defines. 18835e6168fSJeff Roberson * 18935e6168fSJeff Roberson * SCHED_CPU_TIME: Number of seconds to average the cpu usage across. 19035e6168fSJeff Roberson * SCHED_CPU_TICKS: Number of hz ticks to average the cpu usage across. 19135e6168fSJeff Roberson */ 19235e6168fSJeff Roberson 1935053d272SJeff Roberson #define SCHED_CPU_TIME 10 19435e6168fSJeff Roberson #define SCHED_CPU_TICKS (hz * SCHED_CPU_TIME) 19535e6168fSJeff Roberson 19635e6168fSJeff Roberson /* 19715dc847eSJeff Roberson * kseq - per processor runqs and statistics. 19835e6168fSJeff Roberson */ 19935e6168fSJeff Roberson struct kseq { 200a8949de2SJeff Roberson struct runq ksq_idle; /* Queue of IDLE threads. */ 20115dc847eSJeff Roberson struct runq ksq_timeshare[2]; /* Run queues for !IDLE. */ 20215dc847eSJeff Roberson struct runq *ksq_next; /* Next timeshare queue. */ 20315dc847eSJeff Roberson struct runq *ksq_curr; /* Current queue. */ 204ef1134c9SJeff Roberson int ksq_load_timeshare; /* Load for timeshare. */ 20515dc847eSJeff Roberson int ksq_load; /* Aggregate load. */ 206a0a931ceSJeff Roberson short ksq_nice[SCHED_PRI_NRESV]; /* KSEs in each nice bin. */ 20715dc847eSJeff Roberson short ksq_nicemin; /* Least nice. */ 2085d7ef00cSJeff Roberson #ifdef SMP 20980f86c9fSJeff Roberson int ksq_transferable; 21080f86c9fSJeff Roberson LIST_ENTRY(kseq) ksq_siblings; /* Next in kseq group. */ 21180f86c9fSJeff Roberson struct kseq_group *ksq_group; /* Our processor group. */ 212fa9c9717SJeff Roberson volatile struct kse *ksq_assigned; /* assigned by another CPU. */ 21333916c36SJeff Roberson #else 21433916c36SJeff Roberson int ksq_sysload; /* For loadavg, !ITHD load. */ 2155d7ef00cSJeff Roberson #endif 21635e6168fSJeff Roberson }; 21735e6168fSJeff Roberson 21880f86c9fSJeff Roberson #ifdef SMP 21980f86c9fSJeff Roberson /* 22080f86c9fSJeff Roberson * kseq groups are groups of processors which can cheaply share threads. When 22180f86c9fSJeff Roberson * one processor in the group goes idle it will check the runqs of the other 22280f86c9fSJeff Roberson * processors in its group prior to halting and waiting for an interrupt. 22380f86c9fSJeff Roberson * These groups are suitable for SMT (Symetric Multi-Threading) and not NUMA. 22480f86c9fSJeff Roberson * In a numa environment we'd want an idle bitmap per group and a two tiered 22580f86c9fSJeff Roberson * load balancer. 22680f86c9fSJeff Roberson */ 22780f86c9fSJeff Roberson struct kseq_group { 22880f86c9fSJeff Roberson int ksg_cpus; /* Count of CPUs in this kseq group. */ 229b2ae7ed7SMarcel Moolenaar cpumask_t ksg_cpumask; /* Mask of cpus in this group. */ 230b2ae7ed7SMarcel Moolenaar cpumask_t ksg_idlemask; /* Idle cpus in this group. */ 231b2ae7ed7SMarcel Moolenaar cpumask_t ksg_mask; /* Bit mask for first cpu. */ 232cac77d04SJeff Roberson int ksg_load; /* Total load of this group. */ 23380f86c9fSJeff Roberson int ksg_transferable; /* Transferable load of this group. */ 23480f86c9fSJeff Roberson LIST_HEAD(, kseq) ksg_members; /* Linked list of all members. */ 23580f86c9fSJeff Roberson }; 23680f86c9fSJeff Roberson #endif 23780f86c9fSJeff Roberson 23835e6168fSJeff Roberson /* 23935e6168fSJeff Roberson * One kse queue per processor. 24035e6168fSJeff Roberson */ 2410a016a05SJeff Roberson #ifdef SMP 242b2ae7ed7SMarcel Moolenaar static cpumask_t kseq_idle; 243cac77d04SJeff Roberson static int ksg_maxid; 24422bf7d9aSJeff Roberson static struct kseq kseq_cpu[MAXCPU]; 24580f86c9fSJeff Roberson static struct kseq_group kseq_groups[MAXCPU]; 246dc03363dSJeff Roberson static int bal_tick; 247dc03363dSJeff Roberson static int gbal_tick; 248dc03363dSJeff Roberson 24980f86c9fSJeff Roberson #define KSEQ_SELF() (&kseq_cpu[PCPU_GET(cpuid)]) 25080f86c9fSJeff Roberson #define KSEQ_CPU(x) (&kseq_cpu[(x)]) 251cac77d04SJeff Roberson #define KSEQ_ID(x) ((x) - kseq_cpu) 252cac77d04SJeff Roberson #define KSEQ_GROUP(x) (&kseq_groups[(x)]) 25380f86c9fSJeff Roberson #else /* !SMP */ 25422bf7d9aSJeff Roberson static struct kseq kseq_cpu; 255dc03363dSJeff Roberson 2560a016a05SJeff Roberson #define KSEQ_SELF() (&kseq_cpu) 2570a016a05SJeff Roberson #define KSEQ_CPU(x) (&kseq_cpu) 2580a016a05SJeff Roberson #endif 25935e6168fSJeff Roberson 260245f3abfSJeff Roberson static void sched_slice(struct kse *ke); 26115dc847eSJeff Roberson static void sched_priority(struct ksegrp *kg); 262e1f89c22SJeff Roberson static int sched_interact_score(struct ksegrp *kg); 2634b60e324SJeff Roberson static void sched_interact_update(struct ksegrp *kg); 264d322132cSJeff Roberson static void sched_interact_fork(struct ksegrp *kg); 26522bf7d9aSJeff Roberson static void sched_pctcpu_update(struct kse *ke); 26635e6168fSJeff Roberson 2675d7ef00cSJeff Roberson /* Operations on per processor queues */ 26822bf7d9aSJeff Roberson static struct kse * kseq_choose(struct kseq *kseq); 2690a016a05SJeff Roberson static void kseq_setup(struct kseq *kseq); 270155b9987SJeff Roberson static void kseq_load_add(struct kseq *kseq, struct kse *ke); 271155b9987SJeff Roberson static void kseq_load_rem(struct kseq *kseq, struct kse *ke); 272155b9987SJeff Roberson static __inline void kseq_runq_add(struct kseq *kseq, struct kse *ke); 273155b9987SJeff Roberson static __inline void kseq_runq_rem(struct kseq *kseq, struct kse *ke); 27415dc847eSJeff Roberson static void kseq_nice_add(struct kseq *kseq, int nice); 27515dc847eSJeff Roberson static void kseq_nice_rem(struct kseq *kseq, int nice); 2767cd650a9SJeff Roberson void kseq_print(int cpu); 2775d7ef00cSJeff Roberson #ifdef SMP 27880f86c9fSJeff Roberson static int kseq_transfer(struct kseq *ksq, struct kse *ke, int class); 27922bf7d9aSJeff Roberson static struct kse *runq_steal(struct runq *rq); 280dc03363dSJeff Roberson static void sched_balance(void); 281dc03363dSJeff Roberson static void sched_balance_groups(void); 282cac77d04SJeff Roberson static void sched_balance_group(struct kseq_group *ksg); 283cac77d04SJeff Roberson static void sched_balance_pair(struct kseq *high, struct kseq *low); 28422bf7d9aSJeff Roberson static void kseq_move(struct kseq *from, int cpu); 28580f86c9fSJeff Roberson static int kseq_idled(struct kseq *kseq); 28622bf7d9aSJeff Roberson static void kseq_notify(struct kse *ke, int cpu); 28722bf7d9aSJeff Roberson static void kseq_assign(struct kseq *); 28880f86c9fSJeff Roberson static struct kse *kseq_steal(struct kseq *kseq, int stealidle); 289e7a976f4SJeff Roberson /* 290e7a976f4SJeff Roberson * On P4 Xeons the round-robin interrupt delivery is broken. As a result of 291e7a976f4SJeff Roberson * this, we can't pin interrupts to the cpu that they were delivered to, 292e7a976f4SJeff Roberson * otherwise all ithreads only run on CPU 0. 293e7a976f4SJeff Roberson */ 294e7a976f4SJeff Roberson #ifdef __i386__ 295e7a976f4SJeff Roberson #define KSE_CAN_MIGRATE(ke, class) \ 296e7a976f4SJeff Roberson ((ke)->ke_thread->td_pinned == 0 && ((ke)->ke_flags & KEF_BOUND) == 0) 297e7a976f4SJeff Roberson #else /* !__i386__ */ 2989bacd788SJeff Roberson #define KSE_CAN_MIGRATE(ke, class) \ 299a70d729bSJeff Roberson ((class) != PRI_ITHD && (ke)->ke_thread->td_pinned == 0 && \ 300f28b3340SJeff Roberson ((ke)->ke_flags & KEF_BOUND) == 0) 301e7a976f4SJeff Roberson #endif /* !__i386__ */ 3025d7ef00cSJeff Roberson #endif 3035d7ef00cSJeff Roberson 30415dc847eSJeff Roberson void 3057cd650a9SJeff Roberson kseq_print(int cpu) 30615dc847eSJeff Roberson { 3077cd650a9SJeff Roberson struct kseq *kseq; 30815dc847eSJeff Roberson int i; 30915dc847eSJeff Roberson 3107cd650a9SJeff Roberson kseq = KSEQ_CPU(cpu); 31115dc847eSJeff Roberson 31215dc847eSJeff Roberson printf("kseq:\n"); 31315dc847eSJeff Roberson printf("\tload: %d\n", kseq->ksq_load); 314155b9987SJeff Roberson printf("\tload TIMESHARE: %d\n", kseq->ksq_load_timeshare); 315ef1134c9SJeff Roberson #ifdef SMP 31680f86c9fSJeff Roberson printf("\tload transferable: %d\n", kseq->ksq_transferable); 317ef1134c9SJeff Roberson #endif 31815dc847eSJeff Roberson printf("\tnicemin:\t%d\n", kseq->ksq_nicemin); 31915dc847eSJeff Roberson printf("\tnice counts:\n"); 320a0a931ceSJeff Roberson for (i = 0; i < SCHED_PRI_NRESV; i++) 32115dc847eSJeff Roberson if (kseq->ksq_nice[i]) 32215dc847eSJeff Roberson printf("\t\t%d = %d\n", 32315dc847eSJeff Roberson i - SCHED_PRI_NHALF, kseq->ksq_nice[i]); 32415dc847eSJeff Roberson } 32515dc847eSJeff Roberson 326155b9987SJeff Roberson static __inline void 327155b9987SJeff Roberson kseq_runq_add(struct kseq *kseq, struct kse *ke) 328155b9987SJeff Roberson { 329155b9987SJeff Roberson #ifdef SMP 33080f86c9fSJeff Roberson if (KSE_CAN_MIGRATE(ke, PRI_BASE(ke->ke_ksegrp->kg_pri_class))) { 33180f86c9fSJeff Roberson kseq->ksq_transferable++; 33280f86c9fSJeff Roberson kseq->ksq_group->ksg_transferable++; 33380f86c9fSJeff Roberson } 334155b9987SJeff Roberson #endif 335155b9987SJeff Roberson runq_add(ke->ke_runq, ke); 336155b9987SJeff Roberson } 337155b9987SJeff Roberson 338155b9987SJeff Roberson static __inline void 339155b9987SJeff Roberson kseq_runq_rem(struct kseq *kseq, struct kse *ke) 340155b9987SJeff Roberson { 341155b9987SJeff Roberson #ifdef SMP 34280f86c9fSJeff Roberson if (KSE_CAN_MIGRATE(ke, PRI_BASE(ke->ke_ksegrp->kg_pri_class))) { 34380f86c9fSJeff Roberson kseq->ksq_transferable--; 34480f86c9fSJeff Roberson kseq->ksq_group->ksg_transferable--; 34580f86c9fSJeff Roberson } 346155b9987SJeff Roberson #endif 347155b9987SJeff Roberson runq_remove(ke->ke_runq, ke); 348155b9987SJeff Roberson } 349155b9987SJeff Roberson 350a8949de2SJeff Roberson static void 351155b9987SJeff Roberson kseq_load_add(struct kseq *kseq, struct kse *ke) 3525d7ef00cSJeff Roberson { 353ef1134c9SJeff Roberson int class; 354b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 355ef1134c9SJeff Roberson class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); 356ef1134c9SJeff Roberson if (class == PRI_TIMESHARE) 357ef1134c9SJeff Roberson kseq->ksq_load_timeshare++; 35815dc847eSJeff Roberson kseq->ksq_load++; 359207a6c0dSDavid E. O'Brien if (class != PRI_ITHD && (ke->ke_proc->p_flag & P_NOLOAD) == 0) 36033916c36SJeff Roberson #ifdef SMP 361cac77d04SJeff Roberson kseq->ksq_group->ksg_load++; 36233916c36SJeff Roberson #else 36333916c36SJeff Roberson kseq->ksq_sysload++; 364cac77d04SJeff Roberson #endif 36515dc847eSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 366155b9987SJeff Roberson CTR6(KTR_ULE, 367155b9987SJeff Roberson "Add kse %p to %p (slice: %d, pri: %d, nice: %d(%d))", 36815dc847eSJeff Roberson ke, ke->ke_runq, ke->ke_slice, ke->ke_thread->td_priority, 36915dc847eSJeff Roberson ke->ke_ksegrp->kg_nice, kseq->ksq_nicemin); 37015dc847eSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 37115dc847eSJeff Roberson kseq_nice_add(kseq, ke->ke_ksegrp->kg_nice); 3725d7ef00cSJeff Roberson } 37315dc847eSJeff Roberson 374a8949de2SJeff Roberson static void 375155b9987SJeff Roberson kseq_load_rem(struct kseq *kseq, struct kse *ke) 3765d7ef00cSJeff Roberson { 377ef1134c9SJeff Roberson int class; 378b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 379ef1134c9SJeff Roberson class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); 380ef1134c9SJeff Roberson if (class == PRI_TIMESHARE) 381ef1134c9SJeff Roberson kseq->ksq_load_timeshare--; 382207a6c0dSDavid E. O'Brien if (class != PRI_ITHD && (ke->ke_proc->p_flag & P_NOLOAD) == 0) 38333916c36SJeff Roberson #ifdef SMP 384cac77d04SJeff Roberson kseq->ksq_group->ksg_load--; 38533916c36SJeff Roberson #else 38633916c36SJeff Roberson kseq->ksq_sysload--; 387cac77d04SJeff Roberson #endif 38815dc847eSJeff Roberson kseq->ksq_load--; 38915dc847eSJeff Roberson ke->ke_runq = NULL; 39015dc847eSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 39115dc847eSJeff Roberson kseq_nice_rem(kseq, ke->ke_ksegrp->kg_nice); 3925d7ef00cSJeff Roberson } 3935d7ef00cSJeff Roberson 39415dc847eSJeff Roberson static void 39515dc847eSJeff Roberson kseq_nice_add(struct kseq *kseq, int nice) 39615dc847eSJeff Roberson { 397b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 39815dc847eSJeff Roberson /* Normalize to zero. */ 39915dc847eSJeff Roberson kseq->ksq_nice[nice + SCHED_PRI_NHALF]++; 400ef1134c9SJeff Roberson if (nice < kseq->ksq_nicemin || kseq->ksq_load_timeshare == 1) 40115dc847eSJeff Roberson kseq->ksq_nicemin = nice; 40215dc847eSJeff Roberson } 40315dc847eSJeff Roberson 40415dc847eSJeff Roberson static void 40515dc847eSJeff Roberson kseq_nice_rem(struct kseq *kseq, int nice) 40615dc847eSJeff Roberson { 40715dc847eSJeff Roberson int n; 40815dc847eSJeff Roberson 409b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 41015dc847eSJeff Roberson /* Normalize to zero. */ 41115dc847eSJeff Roberson n = nice + SCHED_PRI_NHALF; 41215dc847eSJeff Roberson kseq->ksq_nice[n]--; 41315dc847eSJeff Roberson KASSERT(kseq->ksq_nice[n] >= 0, ("Negative nice count.")); 41415dc847eSJeff Roberson 41515dc847eSJeff Roberson /* 41615dc847eSJeff Roberson * If this wasn't the smallest nice value or there are more in 41715dc847eSJeff Roberson * this bucket we can just return. Otherwise we have to recalculate 41815dc847eSJeff Roberson * the smallest nice. 41915dc847eSJeff Roberson */ 42015dc847eSJeff Roberson if (nice != kseq->ksq_nicemin || 42115dc847eSJeff Roberson kseq->ksq_nice[n] != 0 || 422ef1134c9SJeff Roberson kseq->ksq_load_timeshare == 0) 42315dc847eSJeff Roberson return; 42415dc847eSJeff Roberson 425a0a931ceSJeff Roberson for (; n < SCHED_PRI_NRESV; n++) 42615dc847eSJeff Roberson if (kseq->ksq_nice[n]) { 42715dc847eSJeff Roberson kseq->ksq_nicemin = n - SCHED_PRI_NHALF; 42815dc847eSJeff Roberson return; 42915dc847eSJeff Roberson } 43015dc847eSJeff Roberson } 43115dc847eSJeff Roberson 4325d7ef00cSJeff Roberson #ifdef SMP 433356500a3SJeff Roberson /* 434155b9987SJeff Roberson * sched_balance is a simple CPU load balancing algorithm. It operates by 435356500a3SJeff Roberson * finding the least loaded and most loaded cpu and equalizing their load 436356500a3SJeff Roberson * by migrating some processes. 437356500a3SJeff Roberson * 438356500a3SJeff Roberson * Dealing only with two CPUs at a time has two advantages. Firstly, most 439356500a3SJeff Roberson * installations will only have 2 cpus. Secondly, load balancing too much at 440356500a3SJeff Roberson * once can have an unpleasant effect on the system. The scheduler rarely has 441356500a3SJeff Roberson * enough information to make perfect decisions. So this algorithm chooses 442356500a3SJeff Roberson * algorithm simplicity and more gradual effects on load in larger systems. 443356500a3SJeff Roberson * 444356500a3SJeff Roberson * It could be improved by considering the priorities and slices assigned to 445356500a3SJeff Roberson * each task prior to balancing them. There are many pathological cases with 446356500a3SJeff Roberson * any approach and so the semi random algorithm below may work as well as any. 447356500a3SJeff Roberson * 448356500a3SJeff Roberson */ 44922bf7d9aSJeff Roberson static void 450dc03363dSJeff Roberson sched_balance(void) 451356500a3SJeff Roberson { 452cac77d04SJeff Roberson struct kseq_group *high; 453cac77d04SJeff Roberson struct kseq_group *low; 454cac77d04SJeff Roberson struct kseq_group *ksg; 455cac77d04SJeff Roberson int cnt; 456356500a3SJeff Roberson int i; 457356500a3SJeff Roberson 45886f8ae96SJeff Roberson if (smp_started == 0) 45986f8ae96SJeff Roberson goto out; 460cac77d04SJeff Roberson low = high = NULL; 461cac77d04SJeff Roberson i = random() % (ksg_maxid + 1); 462cac77d04SJeff Roberson for (cnt = 0; cnt <= ksg_maxid; cnt++) { 463cac77d04SJeff Roberson ksg = KSEQ_GROUP(i); 464cac77d04SJeff Roberson /* 465cac77d04SJeff Roberson * Find the CPU with the highest load that has some 466cac77d04SJeff Roberson * threads to transfer. 467cac77d04SJeff Roberson */ 468cac77d04SJeff Roberson if ((high == NULL || ksg->ksg_load > high->ksg_load) 469cac77d04SJeff Roberson && ksg->ksg_transferable) 470cac77d04SJeff Roberson high = ksg; 471cac77d04SJeff Roberson if (low == NULL || ksg->ksg_load < low->ksg_load) 472cac77d04SJeff Roberson low = ksg; 473cac77d04SJeff Roberson if (++i > ksg_maxid) 474cac77d04SJeff Roberson i = 0; 475cac77d04SJeff Roberson } 476cac77d04SJeff Roberson if (low != NULL && high != NULL && high != low) 477cac77d04SJeff Roberson sched_balance_pair(LIST_FIRST(&high->ksg_members), 478cac77d04SJeff Roberson LIST_FIRST(&low->ksg_members)); 479cac77d04SJeff Roberson out: 480dc03363dSJeff Roberson bal_tick = ticks + (random() % (hz * 2)); 481cac77d04SJeff Roberson } 48286f8ae96SJeff Roberson 483cac77d04SJeff Roberson static void 484dc03363dSJeff Roberson sched_balance_groups(void) 485cac77d04SJeff Roberson { 486cac77d04SJeff Roberson int i; 487cac77d04SJeff Roberson 488dc03363dSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 489cac77d04SJeff Roberson if (smp_started) 490cac77d04SJeff Roberson for (i = 0; i <= ksg_maxid; i++) 491cac77d04SJeff Roberson sched_balance_group(KSEQ_GROUP(i)); 492dc03363dSJeff Roberson gbal_tick = ticks + (random() % (hz * 2)); 493356500a3SJeff Roberson } 494cac77d04SJeff Roberson 495cac77d04SJeff Roberson static void 496cac77d04SJeff Roberson sched_balance_group(struct kseq_group *ksg) 497cac77d04SJeff Roberson { 498cac77d04SJeff Roberson struct kseq *kseq; 499cac77d04SJeff Roberson struct kseq *high; 500cac77d04SJeff Roberson struct kseq *low; 501cac77d04SJeff Roberson int load; 502cac77d04SJeff Roberson 503cac77d04SJeff Roberson if (ksg->ksg_transferable == 0) 504cac77d04SJeff Roberson return; 505cac77d04SJeff Roberson low = NULL; 506cac77d04SJeff Roberson high = NULL; 507cac77d04SJeff Roberson LIST_FOREACH(kseq, &ksg->ksg_members, ksq_siblings) { 508cac77d04SJeff Roberson load = kseq->ksq_load; 509cac77d04SJeff Roberson if (high == NULL || load > high->ksq_load) 510cac77d04SJeff Roberson high = kseq; 511cac77d04SJeff Roberson if (low == NULL || load < low->ksq_load) 512cac77d04SJeff Roberson low = kseq; 513356500a3SJeff Roberson } 514cac77d04SJeff Roberson if (high != NULL && low != NULL && high != low) 515cac77d04SJeff Roberson sched_balance_pair(high, low); 516356500a3SJeff Roberson } 517cac77d04SJeff Roberson 518cac77d04SJeff Roberson static void 519cac77d04SJeff Roberson sched_balance_pair(struct kseq *high, struct kseq *low) 520cac77d04SJeff Roberson { 521cac77d04SJeff Roberson int transferable; 522cac77d04SJeff Roberson int high_load; 523cac77d04SJeff Roberson int low_load; 524cac77d04SJeff Roberson int move; 525cac77d04SJeff Roberson int diff; 526cac77d04SJeff Roberson int i; 527cac77d04SJeff Roberson 52880f86c9fSJeff Roberson /* 52980f86c9fSJeff Roberson * If we're transfering within a group we have to use this specific 53080f86c9fSJeff Roberson * kseq's transferable count, otherwise we can steal from other members 53180f86c9fSJeff Roberson * of the group. 53280f86c9fSJeff Roberson */ 533cac77d04SJeff Roberson if (high->ksq_group == low->ksq_group) { 534cac77d04SJeff Roberson transferable = high->ksq_transferable; 535cac77d04SJeff Roberson high_load = high->ksq_load; 536cac77d04SJeff Roberson low_load = low->ksq_load; 537cac77d04SJeff Roberson } else { 538cac77d04SJeff Roberson transferable = high->ksq_group->ksg_transferable; 539cac77d04SJeff Roberson high_load = high->ksq_group->ksg_load; 540cac77d04SJeff Roberson low_load = low->ksq_group->ksg_load; 541cac77d04SJeff Roberson } 54280f86c9fSJeff Roberson if (transferable == 0) 543cac77d04SJeff Roberson return; 544155b9987SJeff Roberson /* 545155b9987SJeff Roberson * Determine what the imbalance is and then adjust that to how many 54680f86c9fSJeff Roberson * kses we actually have to give up (transferable). 547155b9987SJeff Roberson */ 548cac77d04SJeff Roberson diff = high_load - low_load; 549356500a3SJeff Roberson move = diff / 2; 550356500a3SJeff Roberson if (diff & 0x1) 551356500a3SJeff Roberson move++; 55280f86c9fSJeff Roberson move = min(move, transferable); 553356500a3SJeff Roberson for (i = 0; i < move; i++) 554cac77d04SJeff Roberson kseq_move(high, KSEQ_ID(low)); 555356500a3SJeff Roberson return; 556356500a3SJeff Roberson } 557356500a3SJeff Roberson 55822bf7d9aSJeff Roberson static void 559356500a3SJeff Roberson kseq_move(struct kseq *from, int cpu) 560356500a3SJeff Roberson { 56180f86c9fSJeff Roberson struct kseq *kseq; 56280f86c9fSJeff Roberson struct kseq *to; 563356500a3SJeff Roberson struct kse *ke; 564356500a3SJeff Roberson 56580f86c9fSJeff Roberson kseq = from; 56680f86c9fSJeff Roberson to = KSEQ_CPU(cpu); 56780f86c9fSJeff Roberson ke = kseq_steal(kseq, 1); 56880f86c9fSJeff Roberson if (ke == NULL) { 56980f86c9fSJeff Roberson struct kseq_group *ksg; 57080f86c9fSJeff Roberson 57180f86c9fSJeff Roberson ksg = kseq->ksq_group; 57280f86c9fSJeff Roberson LIST_FOREACH(kseq, &ksg->ksg_members, ksq_siblings) { 57380f86c9fSJeff Roberson if (kseq == from || kseq->ksq_transferable == 0) 57480f86c9fSJeff Roberson continue; 57580f86c9fSJeff Roberson ke = kseq_steal(kseq, 1); 57680f86c9fSJeff Roberson break; 57780f86c9fSJeff Roberson } 57880f86c9fSJeff Roberson if (ke == NULL) 57980f86c9fSJeff Roberson panic("kseq_move: No KSEs available with a " 58080f86c9fSJeff Roberson "transferable count of %d\n", 58180f86c9fSJeff Roberson ksg->ksg_transferable); 58280f86c9fSJeff Roberson } 58380f86c9fSJeff Roberson if (kseq == to) 58480f86c9fSJeff Roberson return; 585356500a3SJeff Roberson ke->ke_state = KES_THREAD; 58680f86c9fSJeff Roberson kseq_runq_rem(kseq, ke); 58780f86c9fSJeff Roberson kseq_load_rem(kseq, ke); 588112b6d3aSJeff Roberson kseq_notify(ke, cpu); 589356500a3SJeff Roberson } 59022bf7d9aSJeff Roberson 59180f86c9fSJeff Roberson static int 59280f86c9fSJeff Roberson kseq_idled(struct kseq *kseq) 59322bf7d9aSJeff Roberson { 59480f86c9fSJeff Roberson struct kseq_group *ksg; 59580f86c9fSJeff Roberson struct kseq *steal; 59680f86c9fSJeff Roberson struct kse *ke; 59780f86c9fSJeff Roberson 59880f86c9fSJeff Roberson ksg = kseq->ksq_group; 59980f86c9fSJeff Roberson /* 60080f86c9fSJeff Roberson * If we're in a cpu group, try and steal kses from another cpu in 60180f86c9fSJeff Roberson * the group before idling. 60280f86c9fSJeff Roberson */ 60380f86c9fSJeff Roberson if (ksg->ksg_cpus > 1 && ksg->ksg_transferable) { 60480f86c9fSJeff Roberson LIST_FOREACH(steal, &ksg->ksg_members, ksq_siblings) { 60580f86c9fSJeff Roberson if (steal == kseq || steal->ksq_transferable == 0) 60680f86c9fSJeff Roberson continue; 60780f86c9fSJeff Roberson ke = kseq_steal(steal, 0); 60880f86c9fSJeff Roberson if (ke == NULL) 60980f86c9fSJeff Roberson continue; 61080f86c9fSJeff Roberson ke->ke_state = KES_THREAD; 61180f86c9fSJeff Roberson kseq_runq_rem(steal, ke); 61280f86c9fSJeff Roberson kseq_load_rem(steal, ke); 61380f86c9fSJeff Roberson ke->ke_cpu = PCPU_GET(cpuid); 61480f86c9fSJeff Roberson sched_add(ke->ke_thread); 61580f86c9fSJeff Roberson return (0); 61680f86c9fSJeff Roberson } 61780f86c9fSJeff Roberson } 61880f86c9fSJeff Roberson /* 61980f86c9fSJeff Roberson * We only set the idled bit when all of the cpus in the group are 62080f86c9fSJeff Roberson * idle. Otherwise we could get into a situation where a KSE bounces 62180f86c9fSJeff Roberson * back and forth between two idle cores on seperate physical CPUs. 62280f86c9fSJeff Roberson */ 62380f86c9fSJeff Roberson ksg->ksg_idlemask |= PCPU_GET(cpumask); 62480f86c9fSJeff Roberson if (ksg->ksg_idlemask != ksg->ksg_cpumask) 62580f86c9fSJeff Roberson return (1); 62680f86c9fSJeff Roberson atomic_set_int(&kseq_idle, ksg->ksg_mask); 62780f86c9fSJeff Roberson return (1); 62822bf7d9aSJeff Roberson } 62922bf7d9aSJeff Roberson 63022bf7d9aSJeff Roberson static void 63122bf7d9aSJeff Roberson kseq_assign(struct kseq *kseq) 63222bf7d9aSJeff Roberson { 63322bf7d9aSJeff Roberson struct kse *nke; 63422bf7d9aSJeff Roberson struct kse *ke; 63522bf7d9aSJeff Roberson 63622bf7d9aSJeff Roberson do { 637fa9c9717SJeff Roberson (volatile struct kse *)ke = kseq->ksq_assigned; 63822bf7d9aSJeff Roberson } while(!atomic_cmpset_ptr(&kseq->ksq_assigned, ke, NULL)); 63922bf7d9aSJeff Roberson for (; ke != NULL; ke = nke) { 64022bf7d9aSJeff Roberson nke = ke->ke_assign; 64122bf7d9aSJeff Roberson ke->ke_flags &= ~KEF_ASSIGNED; 64222bf7d9aSJeff Roberson sched_add(ke->ke_thread); 64322bf7d9aSJeff Roberson } 64422bf7d9aSJeff Roberson } 64522bf7d9aSJeff Roberson 64622bf7d9aSJeff Roberson static void 64722bf7d9aSJeff Roberson kseq_notify(struct kse *ke, int cpu) 64822bf7d9aSJeff Roberson { 64922bf7d9aSJeff Roberson struct kseq *kseq; 65022bf7d9aSJeff Roberson struct thread *td; 65122bf7d9aSJeff Roberson struct pcpu *pcpu; 65222bf7d9aSJeff Roberson 65386e1c22aSJeff Roberson ke->ke_cpu = cpu; 65422bf7d9aSJeff Roberson ke->ke_flags |= KEF_ASSIGNED; 65522bf7d9aSJeff Roberson 65622bf7d9aSJeff Roberson kseq = KSEQ_CPU(cpu); 6575d7ef00cSJeff Roberson 6580c0a98b2SJeff Roberson /* 65922bf7d9aSJeff Roberson * Place a KSE on another cpu's queue and force a resched. 66022bf7d9aSJeff Roberson */ 66122bf7d9aSJeff Roberson do { 662fa9c9717SJeff Roberson (volatile struct kse *)ke->ke_assign = kseq->ksq_assigned; 66322bf7d9aSJeff Roberson } while(!atomic_cmpset_ptr(&kseq->ksq_assigned, ke->ke_assign, ke)); 66422bf7d9aSJeff Roberson pcpu = pcpu_find(cpu); 66522bf7d9aSJeff Roberson td = pcpu->pc_curthread; 66622bf7d9aSJeff Roberson if (ke->ke_thread->td_priority < td->td_priority || 66722bf7d9aSJeff Roberson td == pcpu->pc_idlethread) { 66822bf7d9aSJeff Roberson td->td_flags |= TDF_NEEDRESCHED; 66922bf7d9aSJeff Roberson ipi_selected(1 << cpu, IPI_AST); 67022bf7d9aSJeff Roberson } 67122bf7d9aSJeff Roberson } 67222bf7d9aSJeff Roberson 67322bf7d9aSJeff Roberson static struct kse * 67422bf7d9aSJeff Roberson runq_steal(struct runq *rq) 67522bf7d9aSJeff Roberson { 67622bf7d9aSJeff Roberson struct rqhead *rqh; 67722bf7d9aSJeff Roberson struct rqbits *rqb; 67822bf7d9aSJeff Roberson struct kse *ke; 67922bf7d9aSJeff Roberson int word; 68022bf7d9aSJeff Roberson int bit; 68122bf7d9aSJeff Roberson 68222bf7d9aSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 68322bf7d9aSJeff Roberson rqb = &rq->rq_status; 68422bf7d9aSJeff Roberson for (word = 0; word < RQB_LEN; word++) { 68522bf7d9aSJeff Roberson if (rqb->rqb_bits[word] == 0) 68622bf7d9aSJeff Roberson continue; 68722bf7d9aSJeff Roberson for (bit = 0; bit < RQB_BPW; bit++) { 688a2640c9bSPeter Wemm if ((rqb->rqb_bits[word] & (1ul << bit)) == 0) 68922bf7d9aSJeff Roberson continue; 69022bf7d9aSJeff Roberson rqh = &rq->rq_queues[bit + (word << RQB_L2BPW)]; 69122bf7d9aSJeff Roberson TAILQ_FOREACH(ke, rqh, ke_procq) { 692ef1134c9SJeff Roberson if (KSE_CAN_MIGRATE(ke, 693ef1134c9SJeff Roberson PRI_BASE(ke->ke_ksegrp->kg_pri_class))) 69422bf7d9aSJeff Roberson return (ke); 69522bf7d9aSJeff Roberson } 69622bf7d9aSJeff Roberson } 69722bf7d9aSJeff Roberson } 69822bf7d9aSJeff Roberson return (NULL); 69922bf7d9aSJeff Roberson } 70022bf7d9aSJeff Roberson 70122bf7d9aSJeff Roberson static struct kse * 70280f86c9fSJeff Roberson kseq_steal(struct kseq *kseq, int stealidle) 70322bf7d9aSJeff Roberson { 70422bf7d9aSJeff Roberson struct kse *ke; 70522bf7d9aSJeff Roberson 70680f86c9fSJeff Roberson /* 70780f86c9fSJeff Roberson * Steal from next first to try to get a non-interactive task that 70880f86c9fSJeff Roberson * may not have run for a while. 70980f86c9fSJeff Roberson */ 71022bf7d9aSJeff Roberson if ((ke = runq_steal(kseq->ksq_next)) != NULL) 71122bf7d9aSJeff Roberson return (ke); 71280f86c9fSJeff Roberson if ((ke = runq_steal(kseq->ksq_curr)) != NULL) 71380f86c9fSJeff Roberson return (ke); 71480f86c9fSJeff Roberson if (stealidle) 71522bf7d9aSJeff Roberson return (runq_steal(&kseq->ksq_idle)); 71680f86c9fSJeff Roberson return (NULL); 71722bf7d9aSJeff Roberson } 71880f86c9fSJeff Roberson 71980f86c9fSJeff Roberson int 72080f86c9fSJeff Roberson kseq_transfer(struct kseq *kseq, struct kse *ke, int class) 72180f86c9fSJeff Roberson { 72280f86c9fSJeff Roberson struct kseq_group *ksg; 72380f86c9fSJeff Roberson int cpu; 72480f86c9fSJeff Roberson 725670c524fSJeff Roberson if (smp_started == 0) 726670c524fSJeff Roberson return (0); 72780f86c9fSJeff Roberson cpu = 0; 72880f86c9fSJeff Roberson ksg = kseq->ksq_group; 72980f86c9fSJeff Roberson 73080f86c9fSJeff Roberson /* 731670c524fSJeff Roberson * If there are any idle groups, give them our extra load. The 732670c524fSJeff Roberson * threshold at which we start to reassign kses has a large impact 733670c524fSJeff Roberson * on the overall performance of the system. Tuned too high and 734670c524fSJeff Roberson * some CPUs may idle. Too low and there will be excess migration 735d50c87deSOlivier Houchard * and context switches. 736670c524fSJeff Roberson */ 737249e0beaSJeff Roberson if (ksg->ksg_load > (ksg->ksg_cpus * 2) && kseq_idle) { 73880f86c9fSJeff Roberson /* 73980f86c9fSJeff Roberson * Multiple cpus could find this bit simultaneously 74080f86c9fSJeff Roberson * but the race shouldn't be terrible. 74180f86c9fSJeff Roberson */ 74280f86c9fSJeff Roberson cpu = ffs(kseq_idle); 74380f86c9fSJeff Roberson if (cpu) 74480f86c9fSJeff Roberson atomic_clear_int(&kseq_idle, 1 << (cpu - 1)); 74580f86c9fSJeff Roberson } 74680f86c9fSJeff Roberson /* 74780f86c9fSJeff Roberson * If another cpu in this group has idled, assign a thread over 74880f86c9fSJeff Roberson * to them after checking to see if there are idled groups. 74980f86c9fSJeff Roberson */ 75080f86c9fSJeff Roberson if (cpu == 0 && kseq->ksq_load > 1 && ksg->ksg_idlemask) { 75180f86c9fSJeff Roberson cpu = ffs(ksg->ksg_idlemask); 75280f86c9fSJeff Roberson if (cpu) 75380f86c9fSJeff Roberson ksg->ksg_idlemask &= ~(1 << (cpu - 1)); 75480f86c9fSJeff Roberson } 75580f86c9fSJeff Roberson /* 75680f86c9fSJeff Roberson * Now that we've found an idle CPU, migrate the thread. 75780f86c9fSJeff Roberson */ 75880f86c9fSJeff Roberson if (cpu) { 75980f86c9fSJeff Roberson cpu--; 76080f86c9fSJeff Roberson ke->ke_runq = NULL; 76180f86c9fSJeff Roberson kseq_notify(ke, cpu); 76280f86c9fSJeff Roberson return (1); 76380f86c9fSJeff Roberson } 76480f86c9fSJeff Roberson return (0); 76580f86c9fSJeff Roberson } 76680f86c9fSJeff Roberson 76722bf7d9aSJeff Roberson #endif /* SMP */ 76822bf7d9aSJeff Roberson 76922bf7d9aSJeff Roberson /* 77022bf7d9aSJeff Roberson * Pick the highest priority task we have and return it. 7710c0a98b2SJeff Roberson */ 7720c0a98b2SJeff Roberson 77322bf7d9aSJeff Roberson static struct kse * 77422bf7d9aSJeff Roberson kseq_choose(struct kseq *kseq) 7755d7ef00cSJeff Roberson { 7765d7ef00cSJeff Roberson struct kse *ke; 7775d7ef00cSJeff Roberson struct runq *swap; 7785d7ef00cSJeff Roberson 779b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 78015dc847eSJeff Roberson swap = NULL; 781a8949de2SJeff Roberson 78215dc847eSJeff Roberson for (;;) { 78315dc847eSJeff Roberson ke = runq_choose(kseq->ksq_curr); 78415dc847eSJeff Roberson if (ke == NULL) { 78515dc847eSJeff Roberson /* 78615dc847eSJeff Roberson * We already swaped once and didn't get anywhere. 78715dc847eSJeff Roberson */ 78815dc847eSJeff Roberson if (swap) 78915dc847eSJeff Roberson break; 7905d7ef00cSJeff Roberson swap = kseq->ksq_curr; 7915d7ef00cSJeff Roberson kseq->ksq_curr = kseq->ksq_next; 7925d7ef00cSJeff Roberson kseq->ksq_next = swap; 79315dc847eSJeff Roberson continue; 794a8949de2SJeff Roberson } 79515dc847eSJeff Roberson /* 79615dc847eSJeff Roberson * If we encounter a slice of 0 the kse is in a 79715dc847eSJeff Roberson * TIMESHARE kse group and its nice was too far out 79815dc847eSJeff Roberson * of the range that receives slices. 79915dc847eSJeff Roberson */ 80022bf7d9aSJeff Roberson if (ke->ke_slice == 0) { 80115dc847eSJeff Roberson runq_remove(ke->ke_runq, ke); 80215dc847eSJeff Roberson sched_slice(ke); 80315dc847eSJeff Roberson ke->ke_runq = kseq->ksq_next; 80415dc847eSJeff Roberson runq_add(ke->ke_runq, ke); 80515dc847eSJeff Roberson continue; 80615dc847eSJeff Roberson } 80715dc847eSJeff Roberson return (ke); 80815dc847eSJeff Roberson } 80915dc847eSJeff Roberson 810a8949de2SJeff Roberson return (runq_choose(&kseq->ksq_idle)); 811245f3abfSJeff Roberson } 8120a016a05SJeff Roberson 8130a016a05SJeff Roberson static void 8140a016a05SJeff Roberson kseq_setup(struct kseq *kseq) 8150a016a05SJeff Roberson { 81615dc847eSJeff Roberson runq_init(&kseq->ksq_timeshare[0]); 81715dc847eSJeff Roberson runq_init(&kseq->ksq_timeshare[1]); 818a8949de2SJeff Roberson runq_init(&kseq->ksq_idle); 81915dc847eSJeff Roberson kseq->ksq_curr = &kseq->ksq_timeshare[0]; 82015dc847eSJeff Roberson kseq->ksq_next = &kseq->ksq_timeshare[1]; 8217cd650a9SJeff Roberson kseq->ksq_load = 0; 822ef1134c9SJeff Roberson kseq->ksq_load_timeshare = 0; 8230a016a05SJeff Roberson } 8240a016a05SJeff Roberson 82535e6168fSJeff Roberson static void 82635e6168fSJeff Roberson sched_setup(void *dummy) 82735e6168fSJeff Roberson { 8280ec896fdSJeff Roberson #ifdef SMP 829cac77d04SJeff Roberson int balance_groups; 83035e6168fSJeff Roberson int i; 8310ec896fdSJeff Roberson #endif 83235e6168fSJeff Roberson 833e493a5d9SJeff Roberson slice_min = (hz/100); /* 10ms */ 834e493a5d9SJeff Roberson slice_max = (hz/7); /* ~140ms */ 835e1f89c22SJeff Roberson 836356500a3SJeff Roberson #ifdef SMP 837cac77d04SJeff Roberson balance_groups = 0; 83880f86c9fSJeff Roberson /* 83980f86c9fSJeff Roberson * Initialize the kseqs. 84080f86c9fSJeff Roberson */ 841749d01b0SJeff Roberson for (i = 0; i < MAXCPU; i++) { 84280f86c9fSJeff Roberson struct kseq *ksq; 84380f86c9fSJeff Roberson 84480f86c9fSJeff Roberson ksq = &kseq_cpu[i]; 84580f86c9fSJeff Roberson ksq->ksq_assigned = NULL; 846749d01b0SJeff Roberson kseq_setup(&kseq_cpu[i]); 84780f86c9fSJeff Roberson } 84880f86c9fSJeff Roberson if (smp_topology == NULL) { 84980f86c9fSJeff Roberson struct kseq_group *ksg; 85080f86c9fSJeff Roberson struct kseq *ksq; 85180f86c9fSJeff Roberson 85280f86c9fSJeff Roberson for (i = 0; i < MAXCPU; i++) { 85380f86c9fSJeff Roberson ksq = &kseq_cpu[i]; 85480f86c9fSJeff Roberson ksg = &kseq_groups[i]; 85580f86c9fSJeff Roberson /* 856dc03363dSJeff Roberson * Setup a kseq group with one member. 85780f86c9fSJeff Roberson */ 85880f86c9fSJeff Roberson ksq->ksq_transferable = 0; 85980f86c9fSJeff Roberson ksq->ksq_group = ksg; 86080f86c9fSJeff Roberson ksg->ksg_cpus = 1; 86180f86c9fSJeff Roberson ksg->ksg_idlemask = 0; 86280f86c9fSJeff Roberson ksg->ksg_cpumask = ksg->ksg_mask = 1 << i; 863cac77d04SJeff Roberson ksg->ksg_load = 0; 86480f86c9fSJeff Roberson ksg->ksg_transferable = 0; 86580f86c9fSJeff Roberson LIST_INIT(&ksg->ksg_members); 86680f86c9fSJeff Roberson LIST_INSERT_HEAD(&ksg->ksg_members, ksq, ksq_siblings); 867749d01b0SJeff Roberson } 868749d01b0SJeff Roberson } else { 86980f86c9fSJeff Roberson struct kseq_group *ksg; 87080f86c9fSJeff Roberson struct cpu_group *cg; 871749d01b0SJeff Roberson int j; 872749d01b0SJeff Roberson 873749d01b0SJeff Roberson for (i = 0; i < smp_topology->ct_count; i++) { 874749d01b0SJeff Roberson cg = &smp_topology->ct_group[i]; 87580f86c9fSJeff Roberson ksg = &kseq_groups[i]; 87680f86c9fSJeff Roberson /* 87780f86c9fSJeff Roberson * Initialize the group. 87880f86c9fSJeff Roberson */ 87980f86c9fSJeff Roberson ksg->ksg_idlemask = 0; 880cac77d04SJeff Roberson ksg->ksg_load = 0; 88180f86c9fSJeff Roberson ksg->ksg_transferable = 0; 88280f86c9fSJeff Roberson ksg->ksg_cpus = cg->cg_count; 88380f86c9fSJeff Roberson ksg->ksg_cpumask = cg->cg_mask; 88480f86c9fSJeff Roberson LIST_INIT(&ksg->ksg_members); 88580f86c9fSJeff Roberson /* 88680f86c9fSJeff Roberson * Find all of the group members and add them. 88780f86c9fSJeff Roberson */ 88880f86c9fSJeff Roberson for (j = 0; j < MAXCPU; j++) { 88980f86c9fSJeff Roberson if ((cg->cg_mask & (1 << j)) != 0) { 89080f86c9fSJeff Roberson if (ksg->ksg_mask == 0) 89180f86c9fSJeff Roberson ksg->ksg_mask = 1 << j; 89280f86c9fSJeff Roberson kseq_cpu[j].ksq_transferable = 0; 89380f86c9fSJeff Roberson kseq_cpu[j].ksq_group = ksg; 89480f86c9fSJeff Roberson LIST_INSERT_HEAD(&ksg->ksg_members, 89580f86c9fSJeff Roberson &kseq_cpu[j], ksq_siblings); 89680f86c9fSJeff Roberson } 89780f86c9fSJeff Roberson } 898cac77d04SJeff Roberson if (ksg->ksg_cpus > 1) 899cac77d04SJeff Roberson balance_groups = 1; 900749d01b0SJeff Roberson } 901cac77d04SJeff Roberson ksg_maxid = smp_topology->ct_count - 1; 902749d01b0SJeff Roberson } 903cac77d04SJeff Roberson /* 904cac77d04SJeff Roberson * Stagger the group and global load balancer so they do not 905cac77d04SJeff Roberson * interfere with each other. 906cac77d04SJeff Roberson */ 907dc03363dSJeff Roberson bal_tick = ticks + hz; 908cac77d04SJeff Roberson if (balance_groups) 909dc03363dSJeff Roberson gbal_tick = ticks + (hz / 2); 910749d01b0SJeff Roberson #else 911749d01b0SJeff Roberson kseq_setup(KSEQ_SELF()); 912356500a3SJeff Roberson #endif 913749d01b0SJeff Roberson mtx_lock_spin(&sched_lock); 914155b9987SJeff Roberson kseq_load_add(KSEQ_SELF(), &kse0); 915749d01b0SJeff Roberson mtx_unlock_spin(&sched_lock); 91635e6168fSJeff Roberson } 91735e6168fSJeff Roberson 91835e6168fSJeff Roberson /* 91935e6168fSJeff Roberson * Scale the scheduling priority according to the "interactivity" of this 92035e6168fSJeff Roberson * process. 92135e6168fSJeff Roberson */ 92215dc847eSJeff Roberson static void 92335e6168fSJeff Roberson sched_priority(struct ksegrp *kg) 92435e6168fSJeff Roberson { 92535e6168fSJeff Roberson int pri; 92635e6168fSJeff Roberson 92735e6168fSJeff Roberson if (kg->kg_pri_class != PRI_TIMESHARE) 92815dc847eSJeff Roberson return; 92935e6168fSJeff Roberson 93015dc847eSJeff Roberson pri = SCHED_PRI_INTERACT(sched_interact_score(kg)); 931e1f89c22SJeff Roberson pri += SCHED_PRI_BASE; 93235e6168fSJeff Roberson pri += kg->kg_nice; 93335e6168fSJeff Roberson 93435e6168fSJeff Roberson if (pri > PRI_MAX_TIMESHARE) 93535e6168fSJeff Roberson pri = PRI_MAX_TIMESHARE; 93635e6168fSJeff Roberson else if (pri < PRI_MIN_TIMESHARE) 93735e6168fSJeff Roberson pri = PRI_MIN_TIMESHARE; 93835e6168fSJeff Roberson 93935e6168fSJeff Roberson kg->kg_user_pri = pri; 94035e6168fSJeff Roberson 94115dc847eSJeff Roberson return; 94235e6168fSJeff Roberson } 94335e6168fSJeff Roberson 94435e6168fSJeff Roberson /* 945245f3abfSJeff Roberson * Calculate a time slice based on the properties of the kseg and the runq 946a8949de2SJeff Roberson * that we're on. This is only for PRI_TIMESHARE ksegrps. 94735e6168fSJeff Roberson */ 948245f3abfSJeff Roberson static void 949245f3abfSJeff Roberson sched_slice(struct kse *ke) 95035e6168fSJeff Roberson { 95115dc847eSJeff Roberson struct kseq *kseq; 952245f3abfSJeff Roberson struct ksegrp *kg; 95335e6168fSJeff Roberson 954245f3abfSJeff Roberson kg = ke->ke_ksegrp; 95515dc847eSJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 95635e6168fSJeff Roberson 957245f3abfSJeff Roberson /* 958245f3abfSJeff Roberson * Rationale: 959245f3abfSJeff Roberson * KSEs in interactive ksegs get the minimum slice so that we 960245f3abfSJeff Roberson * quickly notice if it abuses its advantage. 961245f3abfSJeff Roberson * 962245f3abfSJeff Roberson * KSEs in non-interactive ksegs are assigned a slice that is 963245f3abfSJeff Roberson * based on the ksegs nice value relative to the least nice kseg 964245f3abfSJeff Roberson * on the run queue for this cpu. 965245f3abfSJeff Roberson * 966245f3abfSJeff Roberson * If the KSE is less nice than all others it gets the maximum 967245f3abfSJeff Roberson * slice and other KSEs will adjust their slice relative to 968245f3abfSJeff Roberson * this when they first expire. 969245f3abfSJeff Roberson * 970245f3abfSJeff Roberson * There is 20 point window that starts relative to the least 971245f3abfSJeff Roberson * nice kse on the run queue. Slice size is determined by 972245f3abfSJeff Roberson * the kse distance from the last nice ksegrp. 973245f3abfSJeff Roberson * 9747d1a81b4SJeff Roberson * If the kse is outside of the window it will get no slice 9757d1a81b4SJeff Roberson * and will be reevaluated each time it is selected on the 9767d1a81b4SJeff Roberson * run queue. The exception to this is nice 0 ksegs when 9777d1a81b4SJeff Roberson * a nice -20 is running. They are always granted a minimum 9787d1a81b4SJeff Roberson * slice. 979245f3abfSJeff Roberson */ 98015dc847eSJeff Roberson if (!SCHED_INTERACTIVE(kg)) { 981245f3abfSJeff Roberson int nice; 982245f3abfSJeff Roberson 98315dc847eSJeff Roberson nice = kg->kg_nice + (0 - kseq->ksq_nicemin); 984ef1134c9SJeff Roberson if (kseq->ksq_load_timeshare == 0 || 98515dc847eSJeff Roberson kg->kg_nice < kseq->ksq_nicemin) 986245f3abfSJeff Roberson ke->ke_slice = SCHED_SLICE_MAX; 9877d1a81b4SJeff Roberson else if (nice <= SCHED_SLICE_NTHRESH) 988245f3abfSJeff Roberson ke->ke_slice = SCHED_SLICE_NICE(nice); 9897d1a81b4SJeff Roberson else if (kg->kg_nice == 0) 9907d1a81b4SJeff Roberson ke->ke_slice = SCHED_SLICE_MIN; 991245f3abfSJeff Roberson else 992245f3abfSJeff Roberson ke->ke_slice = 0; 993245f3abfSJeff Roberson } else 9949b5f6f62SJeff Roberson ke->ke_slice = SCHED_SLICE_INTERACTIVE; 99535e6168fSJeff Roberson 99615dc847eSJeff Roberson CTR6(KTR_ULE, 99715dc847eSJeff Roberson "Sliced %p(%d) (nice: %d, nicemin: %d, load: %d, interactive: %d)", 99815dc847eSJeff Roberson ke, ke->ke_slice, kg->kg_nice, kseq->ksq_nicemin, 999ef1134c9SJeff Roberson kseq->ksq_load_timeshare, SCHED_INTERACTIVE(kg)); 100015dc847eSJeff Roberson 1001245f3abfSJeff Roberson return; 100235e6168fSJeff Roberson } 100335e6168fSJeff Roberson 1004d322132cSJeff Roberson /* 1005d322132cSJeff Roberson * This routine enforces a maximum limit on the amount of scheduling history 1006d322132cSJeff Roberson * kept. It is called after either the slptime or runtime is adjusted. 1007d322132cSJeff Roberson * This routine will not operate correctly when slp or run times have been 1008d322132cSJeff Roberson * adjusted to more than double their maximum. 1009d322132cSJeff Roberson */ 10104b60e324SJeff Roberson static void 10114b60e324SJeff Roberson sched_interact_update(struct ksegrp *kg) 10124b60e324SJeff Roberson { 1013d322132cSJeff Roberson int sum; 10143f741ca1SJeff Roberson 1015d322132cSJeff Roberson sum = kg->kg_runtime + kg->kg_slptime; 1016d322132cSJeff Roberson if (sum < SCHED_SLP_RUN_MAX) 1017d322132cSJeff Roberson return; 1018d322132cSJeff Roberson /* 1019d322132cSJeff Roberson * If we have exceeded by more than 1/5th then the algorithm below 1020d322132cSJeff Roberson * will not bring us back into range. Dividing by two here forces 1021d322132cSJeff Roberson * us into the range of [3/5 * SCHED_INTERACT_MAX, SCHED_INTERACT_MAX] 1022d322132cSJeff Roberson */ 102337a35e4aSJeff Roberson if (sum > (SCHED_SLP_RUN_MAX / 5) * 6) { 1024d322132cSJeff Roberson kg->kg_runtime /= 2; 1025d322132cSJeff Roberson kg->kg_slptime /= 2; 1026d322132cSJeff Roberson return; 1027d322132cSJeff Roberson } 1028d322132cSJeff Roberson kg->kg_runtime = (kg->kg_runtime / 5) * 4; 1029d322132cSJeff Roberson kg->kg_slptime = (kg->kg_slptime / 5) * 4; 1030d322132cSJeff Roberson } 1031d322132cSJeff Roberson 1032d322132cSJeff Roberson static void 1033d322132cSJeff Roberson sched_interact_fork(struct ksegrp *kg) 1034d322132cSJeff Roberson { 1035d322132cSJeff Roberson int ratio; 1036d322132cSJeff Roberson int sum; 1037d322132cSJeff Roberson 1038d322132cSJeff Roberson sum = kg->kg_runtime + kg->kg_slptime; 1039d322132cSJeff Roberson if (sum > SCHED_SLP_RUN_FORK) { 1040d322132cSJeff Roberson ratio = sum / SCHED_SLP_RUN_FORK; 1041d322132cSJeff Roberson kg->kg_runtime /= ratio; 1042d322132cSJeff Roberson kg->kg_slptime /= ratio; 10434b60e324SJeff Roberson } 10444b60e324SJeff Roberson } 10454b60e324SJeff Roberson 1046e1f89c22SJeff Roberson static int 1047e1f89c22SJeff Roberson sched_interact_score(struct ksegrp *kg) 1048e1f89c22SJeff Roberson { 1049210491d3SJeff Roberson int div; 1050e1f89c22SJeff Roberson 1051e1f89c22SJeff Roberson if (kg->kg_runtime > kg->kg_slptime) { 1052210491d3SJeff Roberson div = max(1, kg->kg_runtime / SCHED_INTERACT_HALF); 1053210491d3SJeff Roberson return (SCHED_INTERACT_HALF + 1054210491d3SJeff Roberson (SCHED_INTERACT_HALF - (kg->kg_slptime / div))); 1055210491d3SJeff Roberson } if (kg->kg_slptime > kg->kg_runtime) { 1056210491d3SJeff Roberson div = max(1, kg->kg_slptime / SCHED_INTERACT_HALF); 1057210491d3SJeff Roberson return (kg->kg_runtime / div); 1058e1f89c22SJeff Roberson } 1059e1f89c22SJeff Roberson 1060210491d3SJeff Roberson /* 1061210491d3SJeff Roberson * This can happen if slptime and runtime are 0. 1062210491d3SJeff Roberson */ 1063210491d3SJeff Roberson return (0); 1064e1f89c22SJeff Roberson 1065e1f89c22SJeff Roberson } 1066e1f89c22SJeff Roberson 106715dc847eSJeff Roberson /* 106815dc847eSJeff Roberson * This is only somewhat accurate since given many processes of the same 106915dc847eSJeff Roberson * priority they will switch when their slices run out, which will be 107015dc847eSJeff Roberson * at most SCHED_SLICE_MAX. 107115dc847eSJeff Roberson */ 107235e6168fSJeff Roberson int 107335e6168fSJeff Roberson sched_rr_interval(void) 107435e6168fSJeff Roberson { 107535e6168fSJeff Roberson return (SCHED_SLICE_MAX); 107635e6168fSJeff Roberson } 107735e6168fSJeff Roberson 107822bf7d9aSJeff Roberson static void 107935e6168fSJeff Roberson sched_pctcpu_update(struct kse *ke) 108035e6168fSJeff Roberson { 108135e6168fSJeff Roberson /* 108235e6168fSJeff Roberson * Adjust counters and watermark for pctcpu calc. 1083210491d3SJeff Roberson */ 108481de51bfSJeff Roberson if (ke->ke_ltick > ticks - SCHED_CPU_TICKS) { 1085210491d3SJeff Roberson /* 108681de51bfSJeff Roberson * Shift the tick count out so that the divide doesn't 108781de51bfSJeff Roberson * round away our results. 108865c8760dSJeff Roberson */ 108965c8760dSJeff Roberson ke->ke_ticks <<= 10; 109081de51bfSJeff Roberson ke->ke_ticks = (ke->ke_ticks / (ticks - ke->ke_ftick)) * 109135e6168fSJeff Roberson SCHED_CPU_TICKS; 109265c8760dSJeff Roberson ke->ke_ticks >>= 10; 109381de51bfSJeff Roberson } else 109481de51bfSJeff Roberson ke->ke_ticks = 0; 109535e6168fSJeff Roberson ke->ke_ltick = ticks; 109635e6168fSJeff Roberson ke->ke_ftick = ke->ke_ltick - SCHED_CPU_TICKS; 109735e6168fSJeff Roberson } 109835e6168fSJeff Roberson 109935e6168fSJeff Roberson void 110035e6168fSJeff Roberson sched_prio(struct thread *td, u_char prio) 110135e6168fSJeff Roberson { 11023f741ca1SJeff Roberson struct kse *ke; 110335e6168fSJeff Roberson 11043f741ca1SJeff Roberson ke = td->td_kse; 110535e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 110635e6168fSJeff Roberson if (TD_ON_RUNQ(td)) { 11073f741ca1SJeff Roberson /* 11083f741ca1SJeff Roberson * If the priority has been elevated due to priority 11093f741ca1SJeff Roberson * propagation, we may have to move ourselves to a new 11103f741ca1SJeff Roberson * queue. We still call adjustrunqueue below in case kse 11113f741ca1SJeff Roberson * needs to fix things up. 11123f741ca1SJeff Roberson */ 1113769a3635SJeff Roberson if (prio < td->td_priority && ke && 1114769a3635SJeff Roberson (ke->ke_flags & KEF_ASSIGNED) == 0 && 111522bf7d9aSJeff Roberson ke->ke_runq != KSEQ_CPU(ke->ke_cpu)->ksq_curr) { 11163f741ca1SJeff Roberson runq_remove(ke->ke_runq, ke); 11173f741ca1SJeff Roberson ke->ke_runq = KSEQ_CPU(ke->ke_cpu)->ksq_curr; 11183f741ca1SJeff Roberson runq_add(ke->ke_runq, ke); 111935e6168fSJeff Roberson } 11203f741ca1SJeff Roberson adjustrunqueue(td, prio); 11213f741ca1SJeff Roberson } else 11223f741ca1SJeff Roberson td->td_priority = prio; 112335e6168fSJeff Roberson } 112435e6168fSJeff Roberson 112535e6168fSJeff Roberson void 1126ae53b483SJeff Roberson sched_switch(struct thread *td) 112735e6168fSJeff Roberson { 1128ae53b483SJeff Roberson struct thread *newtd; 112935e6168fSJeff Roberson struct kse *ke; 113035e6168fSJeff Roberson 113135e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 113235e6168fSJeff Roberson 113335e6168fSJeff Roberson ke = td->td_kse; 113435e6168fSJeff Roberson 113535e6168fSJeff Roberson td->td_last_kse = ke; 1136060563ecSJulian Elischer td->td_lastcpu = td->td_oncpu; 1137060563ecSJulian Elischer td->td_oncpu = NOCPU; 11384a338afdSJulian Elischer td->td_flags &= ~TDF_NEEDRESCHED; 113935e6168fSJeff Roberson 1140b11fdad0SJeff Roberson /* 1141b11fdad0SJeff Roberson * If the KSE has been assigned it may be in the process of switching 1142b11fdad0SJeff Roberson * to the new cpu. This is the case in sched_bind(). 1143b11fdad0SJeff Roberson */ 1144b11fdad0SJeff Roberson if ((ke->ke_flags & KEF_ASSIGNED) == 0) { 114535e6168fSJeff Roberson if (TD_IS_RUNNING(td)) { 1146155b9987SJeff Roberson kseq_load_rem(KSEQ_CPU(ke->ke_cpu), ke); 1147ab2baa72SDavid Xu setrunqueue(td); 11480e0f6266SJeff Roberson } else { 114933916c36SJeff Roberson if (ke->ke_runq) { 1150155b9987SJeff Roberson kseq_load_rem(KSEQ_CPU(ke->ke_cpu), ke); 115133916c36SJeff Roberson } else if ((td->td_flags & TDF_IDLETD) == 0) 115233916c36SJeff Roberson backtrace(); 115335e6168fSJeff Roberson /* 115435e6168fSJeff Roberson * We will not be on the run queue. So we must be 115535e6168fSJeff Roberson * sleeping or similar. 115635e6168fSJeff Roberson */ 11570e2a4d3aSDavid Xu if (td->td_proc->p_flag & P_SA) 115835e6168fSJeff Roberson kse_reassign(ke); 11590e0f6266SJeff Roberson } 1160b11fdad0SJeff Roberson } 1161ae53b483SJeff Roberson newtd = choosethread(); 1162ae53b483SJeff Roberson if (td != newtd) 1163ae53b483SJeff Roberson cpu_switch(td, newtd); 1164ae53b483SJeff Roberson sched_lock.mtx_lock = (uintptr_t)td; 116535e6168fSJeff Roberson 1166060563ecSJulian Elischer td->td_oncpu = PCPU_GET(cpuid); 116735e6168fSJeff Roberson } 116835e6168fSJeff Roberson 116935e6168fSJeff Roberson void 117035e6168fSJeff Roberson sched_nice(struct ksegrp *kg, int nice) 117135e6168fSJeff Roberson { 117215dc847eSJeff Roberson struct kse *ke; 117335e6168fSJeff Roberson struct thread *td; 117415dc847eSJeff Roberson struct kseq *kseq; 117535e6168fSJeff Roberson 11760b5318c8SJohn Baldwin PROC_LOCK_ASSERT(kg->kg_proc, MA_OWNED); 11770b5318c8SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 117815dc847eSJeff Roberson /* 117915dc847eSJeff Roberson * We need to adjust the nice counts for running KSEs. 118015dc847eSJeff Roberson */ 118115dc847eSJeff Roberson if (kg->kg_pri_class == PRI_TIMESHARE) 118215dc847eSJeff Roberson FOREACH_KSE_IN_GROUP(kg, ke) { 1183d07ac847SJeff Roberson if (ke->ke_runq == NULL) 118415dc847eSJeff Roberson continue; 118515dc847eSJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 118615dc847eSJeff Roberson kseq_nice_rem(kseq, kg->kg_nice); 118715dc847eSJeff Roberson kseq_nice_add(kseq, nice); 118815dc847eSJeff Roberson } 118935e6168fSJeff Roberson kg->kg_nice = nice; 119035e6168fSJeff Roberson sched_priority(kg); 119115dc847eSJeff Roberson FOREACH_THREAD_IN_GROUP(kg, td) 11924a338afdSJulian Elischer td->td_flags |= TDF_NEEDRESCHED; 119335e6168fSJeff Roberson } 119435e6168fSJeff Roberson 119535e6168fSJeff Roberson void 119644f3b092SJohn Baldwin sched_sleep(struct thread *td) 119735e6168fSJeff Roberson { 119835e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 119935e6168fSJeff Roberson 120035e6168fSJeff Roberson td->td_slptime = ticks; 120144f3b092SJohn Baldwin td->td_base_pri = td->td_priority; 120235e6168fSJeff Roberson 120315dc847eSJeff Roberson CTR2(KTR_ULE, "sleep kse %p (tick: %d)", 120415dc847eSJeff Roberson td->td_kse, td->td_slptime); 120535e6168fSJeff Roberson } 120635e6168fSJeff Roberson 120735e6168fSJeff Roberson void 120835e6168fSJeff Roberson sched_wakeup(struct thread *td) 120935e6168fSJeff Roberson { 121035e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 121135e6168fSJeff Roberson 121235e6168fSJeff Roberson /* 121335e6168fSJeff Roberson * Let the kseg know how long we slept for. This is because process 121435e6168fSJeff Roberson * interactivity behavior is modeled in the kseg. 121535e6168fSJeff Roberson */ 121635e6168fSJeff Roberson if (td->td_slptime) { 1217f1e8dc4aSJeff Roberson struct ksegrp *kg; 121815dc847eSJeff Roberson int hzticks; 1219f1e8dc4aSJeff Roberson 1220f1e8dc4aSJeff Roberson kg = td->td_ksegrp; 1221d322132cSJeff Roberson hzticks = (ticks - td->td_slptime) << 10; 1222d322132cSJeff Roberson if (hzticks >= SCHED_SLP_RUN_MAX) { 1223d322132cSJeff Roberson kg->kg_slptime = SCHED_SLP_RUN_MAX; 1224d322132cSJeff Roberson kg->kg_runtime = 1; 1225d322132cSJeff Roberson } else { 1226d322132cSJeff Roberson kg->kg_slptime += hzticks; 12274b60e324SJeff Roberson sched_interact_update(kg); 1228d322132cSJeff Roberson } 1229f1e8dc4aSJeff Roberson sched_priority(kg); 12304b60e324SJeff Roberson if (td->td_kse) 12314b60e324SJeff Roberson sched_slice(td->td_kse); 123215dc847eSJeff Roberson CTR2(KTR_ULE, "wakeup kse %p (%d ticks)", 123315dc847eSJeff Roberson td->td_kse, hzticks); 123435e6168fSJeff Roberson td->td_slptime = 0; 1235f1e8dc4aSJeff Roberson } 123635e6168fSJeff Roberson setrunqueue(td); 123735e6168fSJeff Roberson } 123835e6168fSJeff Roberson 123935e6168fSJeff Roberson /* 124035e6168fSJeff Roberson * Penalize the parent for creating a new child and initialize the child's 124135e6168fSJeff Roberson * priority. 124235e6168fSJeff Roberson */ 124335e6168fSJeff Roberson void 124415dc847eSJeff Roberson sched_fork(struct proc *p, struct proc *p1) 124535e6168fSJeff Roberson { 124635e6168fSJeff Roberson 124735e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 124835e6168fSJeff Roberson 124915dc847eSJeff Roberson sched_fork_ksegrp(FIRST_KSEGRP_IN_PROC(p), FIRST_KSEGRP_IN_PROC(p1)); 125015dc847eSJeff Roberson sched_fork_kse(FIRST_KSE_IN_PROC(p), FIRST_KSE_IN_PROC(p1)); 125115dc847eSJeff Roberson sched_fork_thread(FIRST_THREAD_IN_PROC(p), FIRST_THREAD_IN_PROC(p1)); 125215dc847eSJeff Roberson } 125315dc847eSJeff Roberson 125415dc847eSJeff Roberson void 125515dc847eSJeff Roberson sched_fork_kse(struct kse *ke, struct kse *child) 125615dc847eSJeff Roberson { 12572056d0a1SJohn Baldwin 1258210491d3SJeff Roberson child->ke_slice = 1; /* Attempt to quickly learn interactivity. */ 1259093c05e3SJeff Roberson child->ke_cpu = ke->ke_cpu; 126015dc847eSJeff Roberson child->ke_runq = NULL; 126115dc847eSJeff Roberson 1262736c97c7SJeff Roberson /* Grab our parents cpu estimation information. */ 1263736c97c7SJeff Roberson child->ke_ticks = ke->ke_ticks; 1264736c97c7SJeff Roberson child->ke_ltick = ke->ke_ltick; 1265736c97c7SJeff Roberson child->ke_ftick = ke->ke_ftick; 126615dc847eSJeff Roberson } 126715dc847eSJeff Roberson 126815dc847eSJeff Roberson void 126915dc847eSJeff Roberson sched_fork_ksegrp(struct ksegrp *kg, struct ksegrp *child) 127015dc847eSJeff Roberson { 12712056d0a1SJohn Baldwin PROC_LOCK_ASSERT(child->kg_proc, MA_OWNED); 1272210491d3SJeff Roberson 1273d322132cSJeff Roberson child->kg_slptime = kg->kg_slptime; 1274d322132cSJeff Roberson child->kg_runtime = kg->kg_runtime; 1275d322132cSJeff Roberson child->kg_user_pri = kg->kg_user_pri; 1276d322132cSJeff Roberson child->kg_nice = kg->kg_nice; 1277d322132cSJeff Roberson sched_interact_fork(child); 12784b60e324SJeff Roberson kg->kg_runtime += tickincr << 10; 12794b60e324SJeff Roberson sched_interact_update(kg); 128015dc847eSJeff Roberson 1281d322132cSJeff Roberson CTR6(KTR_ULE, "sched_fork_ksegrp: %d(%d, %d) - %d(%d, %d)", 1282d322132cSJeff Roberson kg->kg_proc->p_pid, kg->kg_slptime, kg->kg_runtime, 1283d322132cSJeff Roberson child->kg_proc->p_pid, child->kg_slptime, child->kg_runtime); 1284c9f25d8fSJeff Roberson } 1285c9f25d8fSJeff Roberson 128615dc847eSJeff Roberson void 128715dc847eSJeff Roberson sched_fork_thread(struct thread *td, struct thread *child) 128815dc847eSJeff Roberson { 128915dc847eSJeff Roberson } 129015dc847eSJeff Roberson 129115dc847eSJeff Roberson void 129215dc847eSJeff Roberson sched_class(struct ksegrp *kg, int class) 129315dc847eSJeff Roberson { 129415dc847eSJeff Roberson struct kseq *kseq; 129515dc847eSJeff Roberson struct kse *ke; 1296ef1134c9SJeff Roberson int nclass; 1297ef1134c9SJeff Roberson int oclass; 129815dc847eSJeff Roberson 12992056d0a1SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 130015dc847eSJeff Roberson if (kg->kg_pri_class == class) 130115dc847eSJeff Roberson return; 130215dc847eSJeff Roberson 1303ef1134c9SJeff Roberson nclass = PRI_BASE(class); 1304ef1134c9SJeff Roberson oclass = PRI_BASE(kg->kg_pri_class); 130515dc847eSJeff Roberson FOREACH_KSE_IN_GROUP(kg, ke) { 130615dc847eSJeff Roberson if (ke->ke_state != KES_ONRUNQ && 130715dc847eSJeff Roberson ke->ke_state != KES_THREAD) 130815dc847eSJeff Roberson continue; 130915dc847eSJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 131015dc847eSJeff Roberson 1311ef1134c9SJeff Roberson #ifdef SMP 1312155b9987SJeff Roberson /* 1313155b9987SJeff Roberson * On SMP if we're on the RUNQ we must adjust the transferable 1314155b9987SJeff Roberson * count because could be changing to or from an interrupt 1315155b9987SJeff Roberson * class. 1316155b9987SJeff Roberson */ 1317155b9987SJeff Roberson if (ke->ke_state == KES_ONRUNQ) { 131880f86c9fSJeff Roberson if (KSE_CAN_MIGRATE(ke, oclass)) { 131980f86c9fSJeff Roberson kseq->ksq_transferable--; 132080f86c9fSJeff Roberson kseq->ksq_group->ksg_transferable--; 132180f86c9fSJeff Roberson } 132280f86c9fSJeff Roberson if (KSE_CAN_MIGRATE(ke, nclass)) { 132380f86c9fSJeff Roberson kseq->ksq_transferable++; 132480f86c9fSJeff Roberson kseq->ksq_group->ksg_transferable++; 132580f86c9fSJeff Roberson } 1326155b9987SJeff Roberson } 1327ef1134c9SJeff Roberson #endif 1328155b9987SJeff Roberson if (oclass == PRI_TIMESHARE) { 1329ef1134c9SJeff Roberson kseq->ksq_load_timeshare--; 133015dc847eSJeff Roberson kseq_nice_rem(kseq, kg->kg_nice); 1331155b9987SJeff Roberson } 1332155b9987SJeff Roberson if (nclass == PRI_TIMESHARE) { 1333155b9987SJeff Roberson kseq->ksq_load_timeshare++; 133415dc847eSJeff Roberson kseq_nice_add(kseq, kg->kg_nice); 133515dc847eSJeff Roberson } 1336155b9987SJeff Roberson } 133715dc847eSJeff Roberson 133815dc847eSJeff Roberson kg->kg_pri_class = class; 133935e6168fSJeff Roberson } 134035e6168fSJeff Roberson 134135e6168fSJeff Roberson /* 134235e6168fSJeff Roberson * Return some of the child's priority and interactivity to the parent. 134335e6168fSJeff Roberson */ 134435e6168fSJeff Roberson void 134515dc847eSJeff Roberson sched_exit(struct proc *p, struct proc *child) 134635e6168fSJeff Roberson { 134735e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1348141ad61cSJeff Roberson sched_exit_kse(FIRST_KSE_IN_PROC(p), FIRST_KSE_IN_PROC(child)); 1349210491d3SJeff Roberson sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), FIRST_KSEGRP_IN_PROC(child)); 1350141ad61cSJeff Roberson } 1351141ad61cSJeff Roberson 1352141ad61cSJeff Roberson void 1353141ad61cSJeff Roberson sched_exit_kse(struct kse *ke, struct kse *child) 1354141ad61cSJeff Roberson { 1355155b9987SJeff Roberson kseq_load_rem(KSEQ_CPU(child->ke_cpu), child); 1356141ad61cSJeff Roberson } 1357141ad61cSJeff Roberson 1358141ad61cSJeff Roberson void 1359141ad61cSJeff Roberson sched_exit_ksegrp(struct ksegrp *kg, struct ksegrp *child) 1360141ad61cSJeff Roberson { 13614b60e324SJeff Roberson /* kg->kg_slptime += child->kg_slptime; */ 1362210491d3SJeff Roberson kg->kg_runtime += child->kg_runtime; 13634b60e324SJeff Roberson sched_interact_update(kg); 1364141ad61cSJeff Roberson } 1365141ad61cSJeff Roberson 1366141ad61cSJeff Roberson void 1367141ad61cSJeff Roberson sched_exit_thread(struct thread *td, struct thread *child) 1368141ad61cSJeff Roberson { 136935e6168fSJeff Roberson } 137035e6168fSJeff Roberson 137135e6168fSJeff Roberson void 13727cf90fb3SJeff Roberson sched_clock(struct thread *td) 137335e6168fSJeff Roberson { 137435e6168fSJeff Roberson struct kseq *kseq; 13750a016a05SJeff Roberson struct ksegrp *kg; 13767cf90fb3SJeff Roberson struct kse *ke; 137735e6168fSJeff Roberson 1378dc03363dSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1379dc03363dSJeff Roberson #ifdef SMP 1380dc03363dSJeff Roberson if (ticks == bal_tick) 1381dc03363dSJeff Roberson sched_balance(); 1382dc03363dSJeff Roberson if (ticks == gbal_tick) 1383dc03363dSJeff Roberson sched_balance_groups(); 1384dc03363dSJeff Roberson #endif 138515dc847eSJeff Roberson /* 138615dc847eSJeff Roberson * sched_setup() apparently happens prior to stathz being set. We 138715dc847eSJeff Roberson * need to resolve the timers earlier in the boot so we can avoid 138815dc847eSJeff Roberson * calculating this here. 138915dc847eSJeff Roberson */ 139015dc847eSJeff Roberson if (realstathz == 0) { 139115dc847eSJeff Roberson realstathz = stathz ? stathz : hz; 139215dc847eSJeff Roberson tickincr = hz / realstathz; 139315dc847eSJeff Roberson /* 139415dc847eSJeff Roberson * XXX This does not work for values of stathz that are much 139515dc847eSJeff Roberson * larger than hz. 139615dc847eSJeff Roberson */ 139715dc847eSJeff Roberson if (tickincr == 0) 139815dc847eSJeff Roberson tickincr = 1; 139915dc847eSJeff Roberson } 140035e6168fSJeff Roberson 14017cf90fb3SJeff Roberson ke = td->td_kse; 140215dc847eSJeff Roberson kg = ke->ke_ksegrp; 140335e6168fSJeff Roberson 14040a016a05SJeff Roberson /* Adjust ticks for pctcpu */ 140565c8760dSJeff Roberson ke->ke_ticks++; 1406d465fb95SJeff Roberson ke->ke_ltick = ticks; 1407a8949de2SJeff Roberson 1408d465fb95SJeff Roberson /* Go up to one second beyond our max and then trim back down */ 1409d465fb95SJeff Roberson if (ke->ke_ftick + SCHED_CPU_TICKS + hz < ke->ke_ltick) 1410d465fb95SJeff Roberson sched_pctcpu_update(ke); 1411d465fb95SJeff Roberson 141243fdafb1SJulian Elischer if (td->td_flags & TDF_IDLETD) 141335e6168fSJeff Roberson return; 14140a016a05SJeff Roberson 141515dc847eSJeff Roberson CTR4(KTR_ULE, "Tick kse %p (slice: %d, slptime: %d, runtime: %d)", 141615dc847eSJeff Roberson ke, ke->ke_slice, kg->kg_slptime >> 10, kg->kg_runtime >> 10); 14173f741ca1SJeff Roberson /* 1418a8949de2SJeff Roberson * We only do slicing code for TIMESHARE ksegrps. 1419a8949de2SJeff Roberson */ 1420a8949de2SJeff Roberson if (kg->kg_pri_class != PRI_TIMESHARE) 1421a8949de2SJeff Roberson return; 1422a8949de2SJeff Roberson /* 142315dc847eSJeff Roberson * We used a tick charge it to the ksegrp so that we can compute our 142415dc847eSJeff Roberson * interactivity. 142515dc847eSJeff Roberson */ 142615dc847eSJeff Roberson kg->kg_runtime += tickincr << 10; 14274b60e324SJeff Roberson sched_interact_update(kg); 1428407b0157SJeff Roberson 142935e6168fSJeff Roberson /* 143035e6168fSJeff Roberson * We used up one time slice. 143135e6168fSJeff Roberson */ 1432093c05e3SJeff Roberson if (--ke->ke_slice > 0) 143315dc847eSJeff Roberson return; 143435e6168fSJeff Roberson /* 143515dc847eSJeff Roberson * We're out of time, recompute priorities and requeue. 143635e6168fSJeff Roberson */ 1437093c05e3SJeff Roberson kseq = KSEQ_SELF(); 1438155b9987SJeff Roberson kseq_load_rem(kseq, ke); 1439e1f89c22SJeff Roberson sched_priority(kg); 144015dc847eSJeff Roberson sched_slice(ke); 144115dc847eSJeff Roberson if (SCHED_CURR(kg, ke)) 144215dc847eSJeff Roberson ke->ke_runq = kseq->ksq_curr; 144315dc847eSJeff Roberson else 144415dc847eSJeff Roberson ke->ke_runq = kseq->ksq_next; 1445155b9987SJeff Roberson kseq_load_add(kseq, ke); 14464a338afdSJulian Elischer td->td_flags |= TDF_NEEDRESCHED; 144735e6168fSJeff Roberson } 144835e6168fSJeff Roberson 144935e6168fSJeff Roberson int 145035e6168fSJeff Roberson sched_runnable(void) 145135e6168fSJeff Roberson { 145235e6168fSJeff Roberson struct kseq *kseq; 1453b90816f1SJeff Roberson int load; 145435e6168fSJeff Roberson 1455b90816f1SJeff Roberson load = 1; 1456b90816f1SJeff Roberson 14570a016a05SJeff Roberson kseq = KSEQ_SELF(); 145822bf7d9aSJeff Roberson #ifdef SMP 145946f8b265SJeff Roberson if (kseq->ksq_assigned) { 146046f8b265SJeff Roberson mtx_lock_spin(&sched_lock); 146122bf7d9aSJeff Roberson kseq_assign(kseq); 146246f8b265SJeff Roberson mtx_unlock_spin(&sched_lock); 146346f8b265SJeff Roberson } 146422bf7d9aSJeff Roberson #endif 14653f741ca1SJeff Roberson if ((curthread->td_flags & TDF_IDLETD) != 0) { 14663f741ca1SJeff Roberson if (kseq->ksq_load > 0) 14673f741ca1SJeff Roberson goto out; 14683f741ca1SJeff Roberson } else 14693f741ca1SJeff Roberson if (kseq->ksq_load - 1 > 0) 1470b90816f1SJeff Roberson goto out; 1471b90816f1SJeff Roberson load = 0; 1472b90816f1SJeff Roberson out: 1473b90816f1SJeff Roberson return (load); 147435e6168fSJeff Roberson } 147535e6168fSJeff Roberson 147635e6168fSJeff Roberson void 147735e6168fSJeff Roberson sched_userret(struct thread *td) 147835e6168fSJeff Roberson { 147935e6168fSJeff Roberson struct ksegrp *kg; 148035e6168fSJeff Roberson 148135e6168fSJeff Roberson kg = td->td_ksegrp; 148235e6168fSJeff Roberson 148335e6168fSJeff Roberson if (td->td_priority != kg->kg_user_pri) { 148435e6168fSJeff Roberson mtx_lock_spin(&sched_lock); 148535e6168fSJeff Roberson td->td_priority = kg->kg_user_pri; 148635e6168fSJeff Roberson mtx_unlock_spin(&sched_lock); 148735e6168fSJeff Roberson } 148835e6168fSJeff Roberson } 148935e6168fSJeff Roberson 1490c9f25d8fSJeff Roberson struct kse * 1491c9f25d8fSJeff Roberson sched_choose(void) 1492c9f25d8fSJeff Roberson { 14930a016a05SJeff Roberson struct kseq *kseq; 1494c9f25d8fSJeff Roberson struct kse *ke; 149515dc847eSJeff Roberson 1496b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 149722bf7d9aSJeff Roberson kseq = KSEQ_SELF(); 149815dc847eSJeff Roberson #ifdef SMP 149980f86c9fSJeff Roberson restart: 150022bf7d9aSJeff Roberson if (kseq->ksq_assigned) 150122bf7d9aSJeff Roberson kseq_assign(kseq); 150215dc847eSJeff Roberson #endif 150322bf7d9aSJeff Roberson ke = kseq_choose(kseq); 150435e6168fSJeff Roberson if (ke) { 150522bf7d9aSJeff Roberson #ifdef SMP 150622bf7d9aSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_IDLE) 150780f86c9fSJeff Roberson if (kseq_idled(kseq) == 0) 150880f86c9fSJeff Roberson goto restart; 150922bf7d9aSJeff Roberson #endif 1510155b9987SJeff Roberson kseq_runq_rem(kseq, ke); 151135e6168fSJeff Roberson ke->ke_state = KES_THREAD; 1512245f3abfSJeff Roberson 151315dc847eSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) { 151415dc847eSJeff Roberson CTR4(KTR_ULE, "Run kse %p from %p (slice: %d, pri: %d)", 151515dc847eSJeff Roberson ke, ke->ke_runq, ke->ke_slice, 151615dc847eSJeff Roberson ke->ke_thread->td_priority); 1517245f3abfSJeff Roberson } 151815dc847eSJeff Roberson return (ke); 151935e6168fSJeff Roberson } 1520c9f25d8fSJeff Roberson #ifdef SMP 152180f86c9fSJeff Roberson if (kseq_idled(kseq) == 0) 152280f86c9fSJeff Roberson goto restart; 1523c9f25d8fSJeff Roberson #endif 152415dc847eSJeff Roberson return (NULL); 152535e6168fSJeff Roberson } 152635e6168fSJeff Roberson 152735e6168fSJeff Roberson void 15287cf90fb3SJeff Roberson sched_add(struct thread *td) 152935e6168fSJeff Roberson { 1530c9f25d8fSJeff Roberson struct kseq *kseq; 153115dc847eSJeff Roberson struct ksegrp *kg; 15327cf90fb3SJeff Roberson struct kse *ke; 153322bf7d9aSJeff Roberson int class; 1534c9f25d8fSJeff Roberson 153522bf7d9aSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 15367cf90fb3SJeff Roberson ke = td->td_kse; 15377cf90fb3SJeff Roberson kg = td->td_ksegrp; 153822bf7d9aSJeff Roberson if (ke->ke_flags & KEF_ASSIGNED) 153922bf7d9aSJeff Roberson return; 154022bf7d9aSJeff Roberson kseq = KSEQ_SELF(); 1541c494ddc8SJeff Roberson KASSERT((ke->ke_thread != NULL), 1542c494ddc8SJeff Roberson ("sched_add: No thread on KSE")); 15435d7ef00cSJeff Roberson KASSERT((ke->ke_thread->td_kse != NULL), 15445d7ef00cSJeff Roberson ("sched_add: No KSE on thread")); 15455d7ef00cSJeff Roberson KASSERT(ke->ke_state != KES_ONRUNQ, 15465d7ef00cSJeff Roberson ("sched_add: kse %p (%s) already in run queue", ke, 15475d7ef00cSJeff Roberson ke->ke_proc->p_comm)); 15485d7ef00cSJeff Roberson KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 15495d7ef00cSJeff Roberson ("sched_add: process swapped out")); 15509bca28a7SJeff Roberson KASSERT(ke->ke_runq == NULL, 15519bca28a7SJeff Roberson ("sched_add: KSE %p is still assigned to a run queue", ke)); 15525d7ef00cSJeff Roberson 155322bf7d9aSJeff Roberson class = PRI_BASE(kg->kg_pri_class); 155422bf7d9aSJeff Roberson switch (class) { 1555a8949de2SJeff Roberson case PRI_ITHD: 1556a8949de2SJeff Roberson case PRI_REALTIME: 155715dc847eSJeff Roberson ke->ke_runq = kseq->ksq_curr; 155815dc847eSJeff Roberson ke->ke_slice = SCHED_SLICE_MAX; 15597cd650a9SJeff Roberson ke->ke_cpu = PCPU_GET(cpuid); 1560a8949de2SJeff Roberson break; 1561a8949de2SJeff Roberson case PRI_TIMESHARE: 156215dc847eSJeff Roberson if (SCHED_CURR(kg, ke)) 156315dc847eSJeff Roberson ke->ke_runq = kseq->ksq_curr; 156415dc847eSJeff Roberson else 156515dc847eSJeff Roberson ke->ke_runq = kseq->ksq_next; 156615dc847eSJeff Roberson break; 156715dc847eSJeff Roberson case PRI_IDLE: 156815dc847eSJeff Roberson /* 156915dc847eSJeff Roberson * This is for priority prop. 157015dc847eSJeff Roberson */ 15713f741ca1SJeff Roberson if (ke->ke_thread->td_priority < PRI_MIN_IDLE) 157215dc847eSJeff Roberson ke->ke_runq = kseq->ksq_curr; 157315dc847eSJeff Roberson else 157415dc847eSJeff Roberson ke->ke_runq = &kseq->ksq_idle; 157515dc847eSJeff Roberson ke->ke_slice = SCHED_SLICE_MIN; 157615dc847eSJeff Roberson break; 157715dc847eSJeff Roberson default: 1578d322132cSJeff Roberson panic("Unknown pri class."); 1579a8949de2SJeff Roberson break; 1580a6ed4186SJeff Roberson } 158122bf7d9aSJeff Roberson #ifdef SMP 158280f86c9fSJeff Roberson if (ke->ke_cpu != PCPU_GET(cpuid)) { 158386e1c22aSJeff Roberson ke->ke_runq = NULL; 158480f86c9fSJeff Roberson kseq_notify(ke, ke->ke_cpu); 158580f86c9fSJeff Roberson return; 158680f86c9fSJeff Roberson } 158722bf7d9aSJeff Roberson /* 1588670c524fSJeff Roberson * If we had been idle, clear our bit in the group and potentially 1589670c524fSJeff Roberson * the global bitmap. If not, see if we should transfer this thread. 159022bf7d9aSJeff Roberson */ 159180f86c9fSJeff Roberson if ((class == PRI_TIMESHARE || class == PRI_REALTIME) && 159280f86c9fSJeff Roberson (kseq->ksq_group->ksg_idlemask & PCPU_GET(cpumask)) != 0) { 159380f86c9fSJeff Roberson /* 159480f86c9fSJeff Roberson * Check to see if our group is unidling, and if so, remove it 159580f86c9fSJeff Roberson * from the global idle mask. 159680f86c9fSJeff Roberson */ 159780f86c9fSJeff Roberson if (kseq->ksq_group->ksg_idlemask == 159880f86c9fSJeff Roberson kseq->ksq_group->ksg_cpumask) 159980f86c9fSJeff Roberson atomic_clear_int(&kseq_idle, kseq->ksq_group->ksg_mask); 160080f86c9fSJeff Roberson /* 160180f86c9fSJeff Roberson * Now remove ourselves from the group specific idle mask. 160280f86c9fSJeff Roberson */ 160380f86c9fSJeff Roberson kseq->ksq_group->ksg_idlemask &= ~PCPU_GET(cpumask); 1604670c524fSJeff Roberson } else if (kseq->ksq_load > 1 && KSE_CAN_MIGRATE(ke, class)) 1605670c524fSJeff Roberson if (kseq_transfer(kseq, ke, class)) 1606670c524fSJeff Roberson return; 160722bf7d9aSJeff Roberson #endif 160822bf7d9aSJeff Roberson if (td->td_priority < curthread->td_priority) 160922bf7d9aSJeff Roberson curthread->td_flags |= TDF_NEEDRESCHED; 1610a8949de2SJeff Roberson 161135e6168fSJeff Roberson ke->ke_ksegrp->kg_runq_kses++; 161235e6168fSJeff Roberson ke->ke_state = KES_ONRUNQ; 161335e6168fSJeff Roberson 1614155b9987SJeff Roberson kseq_runq_add(kseq, ke); 1615155b9987SJeff Roberson kseq_load_add(kseq, ke); 161635e6168fSJeff Roberson } 161735e6168fSJeff Roberson 161835e6168fSJeff Roberson void 16197cf90fb3SJeff Roberson sched_rem(struct thread *td) 162035e6168fSJeff Roberson { 162115dc847eSJeff Roberson struct kseq *kseq; 16227cf90fb3SJeff Roberson struct kse *ke; 16237cf90fb3SJeff Roberson 16247cf90fb3SJeff Roberson ke = td->td_kse; 162522bf7d9aSJeff Roberson /* 162622bf7d9aSJeff Roberson * It is safe to just return here because sched_rem() is only ever 162722bf7d9aSJeff Roberson * used in places where we're immediately going to add the 162822bf7d9aSJeff Roberson * kse back on again. In that case it'll be added with the correct 162922bf7d9aSJeff Roberson * thread and priority when the caller drops the sched_lock. 163022bf7d9aSJeff Roberson */ 163122bf7d9aSJeff Roberson if (ke->ke_flags & KEF_ASSIGNED) 163222bf7d9aSJeff Roberson return; 163335e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1634c494ddc8SJeff Roberson KASSERT((ke->ke_state == KES_ONRUNQ), 1635c494ddc8SJeff Roberson ("sched_rem: KSE not on run queue")); 163635e6168fSJeff Roberson 163735e6168fSJeff Roberson ke->ke_state = KES_THREAD; 163835e6168fSJeff Roberson ke->ke_ksegrp->kg_runq_kses--; 163915dc847eSJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 1640155b9987SJeff Roberson kseq_runq_rem(kseq, ke); 1641155b9987SJeff Roberson kseq_load_rem(kseq, ke); 164235e6168fSJeff Roberson } 164335e6168fSJeff Roberson 164435e6168fSJeff Roberson fixpt_t 16457cf90fb3SJeff Roberson sched_pctcpu(struct thread *td) 164635e6168fSJeff Roberson { 164735e6168fSJeff Roberson fixpt_t pctcpu; 16487cf90fb3SJeff Roberson struct kse *ke; 164935e6168fSJeff Roberson 165035e6168fSJeff Roberson pctcpu = 0; 16517cf90fb3SJeff Roberson ke = td->td_kse; 1652484288deSJeff Roberson if (ke == NULL) 1653484288deSJeff Roberson return (0); 165435e6168fSJeff Roberson 1655b90816f1SJeff Roberson mtx_lock_spin(&sched_lock); 165635e6168fSJeff Roberson if (ke->ke_ticks) { 165735e6168fSJeff Roberson int rtick; 165835e6168fSJeff Roberson 1659210491d3SJeff Roberson /* 1660210491d3SJeff Roberson * Don't update more frequently than twice a second. Allowing 1661210491d3SJeff Roberson * this causes the cpu usage to decay away too quickly due to 1662210491d3SJeff Roberson * rounding errors. 1663210491d3SJeff Roberson */ 16642e227f04SJeff Roberson if (ke->ke_ftick + SCHED_CPU_TICKS < ke->ke_ltick || 16652e227f04SJeff Roberson ke->ke_ltick < (ticks - (hz / 2))) 166635e6168fSJeff Roberson sched_pctcpu_update(ke); 166735e6168fSJeff Roberson /* How many rtick per second ? */ 1668210491d3SJeff Roberson rtick = min(ke->ke_ticks / SCHED_CPU_TIME, SCHED_CPU_TICKS); 16697121cce5SScott Long pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT; 167035e6168fSJeff Roberson } 167135e6168fSJeff Roberson 167235e6168fSJeff Roberson ke->ke_proc->p_swtime = ke->ke_ltick - ke->ke_ftick; 1673828e7683SJohn Baldwin mtx_unlock_spin(&sched_lock); 167435e6168fSJeff Roberson 167535e6168fSJeff Roberson return (pctcpu); 167635e6168fSJeff Roberson } 167735e6168fSJeff Roberson 16789bacd788SJeff Roberson void 16799bacd788SJeff Roberson sched_bind(struct thread *td, int cpu) 16809bacd788SJeff Roberson { 16819bacd788SJeff Roberson struct kse *ke; 16829bacd788SJeff Roberson 16839bacd788SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 16849bacd788SJeff Roberson ke = td->td_kse; 16859bacd788SJeff Roberson ke->ke_flags |= KEF_BOUND; 168680f86c9fSJeff Roberson #ifdef SMP 168780f86c9fSJeff Roberson if (PCPU_GET(cpuid) == cpu) 16889bacd788SJeff Roberson return; 16899bacd788SJeff Roberson /* sched_rem without the runq_remove */ 16909bacd788SJeff Roberson ke->ke_state = KES_THREAD; 16919bacd788SJeff Roberson ke->ke_ksegrp->kg_runq_kses--; 1692155b9987SJeff Roberson kseq_load_rem(KSEQ_CPU(ke->ke_cpu), ke); 16939bacd788SJeff Roberson kseq_notify(ke, cpu); 16949bacd788SJeff Roberson /* When we return from mi_switch we'll be on the correct cpu. */ 169529bcc451SJeff Roberson mi_switch(SW_VOL); 16969bacd788SJeff Roberson #endif 16979bacd788SJeff Roberson } 16989bacd788SJeff Roberson 16999bacd788SJeff Roberson void 17009bacd788SJeff Roberson sched_unbind(struct thread *td) 17019bacd788SJeff Roberson { 17029bacd788SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 17039bacd788SJeff Roberson td->td_kse->ke_flags &= ~KEF_BOUND; 17049bacd788SJeff Roberson } 17059bacd788SJeff Roberson 170635e6168fSJeff Roberson int 170733916c36SJeff Roberson sched_load(void) 170833916c36SJeff Roberson { 170933916c36SJeff Roberson #ifdef SMP 171033916c36SJeff Roberson int total; 171133916c36SJeff Roberson int i; 171233916c36SJeff Roberson 171333916c36SJeff Roberson total = 0; 171433916c36SJeff Roberson for (i = 0; i <= ksg_maxid; i++) 171533916c36SJeff Roberson total += KSEQ_GROUP(i)->ksg_load; 171633916c36SJeff Roberson return (total); 171733916c36SJeff Roberson #else 171833916c36SJeff Roberson return (KSEQ_SELF()->ksq_sysload); 171933916c36SJeff Roberson #endif 172033916c36SJeff Roberson } 172133916c36SJeff Roberson 172233916c36SJeff Roberson int 172335e6168fSJeff Roberson sched_sizeof_kse(void) 172435e6168fSJeff Roberson { 172535e6168fSJeff Roberson return (sizeof(struct kse) + sizeof(struct ke_sched)); 172635e6168fSJeff Roberson } 172735e6168fSJeff Roberson 172835e6168fSJeff Roberson int 172935e6168fSJeff Roberson sched_sizeof_ksegrp(void) 173035e6168fSJeff Roberson { 173135e6168fSJeff Roberson return (sizeof(struct ksegrp) + sizeof(struct kg_sched)); 173235e6168fSJeff Roberson } 173335e6168fSJeff Roberson 173435e6168fSJeff Roberson int 173535e6168fSJeff Roberson sched_sizeof_proc(void) 173635e6168fSJeff Roberson { 173735e6168fSJeff Roberson return (sizeof(struct proc)); 173835e6168fSJeff Roberson } 173935e6168fSJeff Roberson 174035e6168fSJeff Roberson int 174135e6168fSJeff Roberson sched_sizeof_thread(void) 174235e6168fSJeff Roberson { 174335e6168fSJeff Roberson return (sizeof(struct thread) + sizeof(struct td_sched)); 174435e6168fSJeff Roberson } 1745