135e6168fSJeff Roberson /*- 215dc847eSJeff Roberson * Copyright (c) 2002-2003, Jeffrey Roberson <jeff@freebsd.org> 335e6168fSJeff Roberson * All rights reserved. 435e6168fSJeff Roberson * 535e6168fSJeff Roberson * Redistribution and use in source and binary forms, with or without 635e6168fSJeff Roberson * modification, are permitted provided that the following conditions 735e6168fSJeff Roberson * are met: 835e6168fSJeff Roberson * 1. Redistributions of source code must retain the above copyright 935e6168fSJeff Roberson * notice unmodified, this list of conditions, and the following 1035e6168fSJeff Roberson * disclaimer. 1135e6168fSJeff Roberson * 2. Redistributions in binary form must reproduce the above copyright 1235e6168fSJeff Roberson * notice, this list of conditions and the following disclaimer in the 1335e6168fSJeff Roberson * documentation and/or other materials provided with the distribution. 1435e6168fSJeff Roberson * 1535e6168fSJeff Roberson * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 1635e6168fSJeff Roberson * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1735e6168fSJeff Roberson * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 1835e6168fSJeff Roberson * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 1935e6168fSJeff Roberson * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2035e6168fSJeff Roberson * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2135e6168fSJeff Roberson * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2235e6168fSJeff Roberson * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2335e6168fSJeff Roberson * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2435e6168fSJeff Roberson * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2535e6168fSJeff Roberson */ 2635e6168fSJeff Roberson 27677b542eSDavid E. O'Brien #include <sys/cdefs.h> 28677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 29677b542eSDavid E. O'Brien 3035e6168fSJeff Roberson #include <sys/param.h> 3135e6168fSJeff Roberson #include <sys/systm.h> 3235e6168fSJeff Roberson #include <sys/kernel.h> 3335e6168fSJeff Roberson #include <sys/ktr.h> 3435e6168fSJeff Roberson #include <sys/lock.h> 3535e6168fSJeff Roberson #include <sys/mutex.h> 3635e6168fSJeff Roberson #include <sys/proc.h> 37245f3abfSJeff Roberson #include <sys/resource.h> 389bacd788SJeff Roberson #include <sys/resourcevar.h> 3935e6168fSJeff Roberson #include <sys/sched.h> 4035e6168fSJeff Roberson #include <sys/smp.h> 4135e6168fSJeff Roberson #include <sys/sx.h> 4235e6168fSJeff Roberson #include <sys/sysctl.h> 4335e6168fSJeff Roberson #include <sys/sysproto.h> 4435e6168fSJeff Roberson #include <sys/vmmeter.h> 4535e6168fSJeff Roberson #ifdef DDB 4635e6168fSJeff Roberson #include <ddb/ddb.h> 4735e6168fSJeff Roberson #endif 4835e6168fSJeff Roberson #ifdef KTRACE 4935e6168fSJeff Roberson #include <sys/uio.h> 5035e6168fSJeff Roberson #include <sys/ktrace.h> 5135e6168fSJeff Roberson #endif 5235e6168fSJeff Roberson 5335e6168fSJeff Roberson #include <machine/cpu.h> 5422bf7d9aSJeff Roberson #include <machine/smp.h> 5535e6168fSJeff Roberson 5615dc847eSJeff Roberson #define KTR_ULE KTR_NFS 5715dc847eSJeff Roberson 5835e6168fSJeff Roberson /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 5935e6168fSJeff Roberson /* XXX This is bogus compatability crap for ps */ 6035e6168fSJeff Roberson static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 6135e6168fSJeff Roberson SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 6235e6168fSJeff Roberson 6335e6168fSJeff Roberson static void sched_setup(void *dummy); 6435e6168fSJeff Roberson SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 6535e6168fSJeff Roberson 6615dc847eSJeff Roberson static SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0, "SCHED"); 67e1f89c22SJeff Roberson 6815dc847eSJeff Roberson static int sched_strict; 6915dc847eSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, strict, CTLFLAG_RD, &sched_strict, 0, ""); 7015dc847eSJeff Roberson 7115dc847eSJeff Roberson static int slice_min = 1; 7215dc847eSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, slice_min, CTLFLAG_RW, &slice_min, 0, ""); 7315dc847eSJeff Roberson 74210491d3SJeff Roberson static int slice_max = 10; 7515dc847eSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, slice_max, CTLFLAG_RW, &slice_max, 0, ""); 7615dc847eSJeff Roberson 7715dc847eSJeff Roberson int realstathz; 7815dc847eSJeff Roberson int tickincr = 1; 79783caefbSJeff Roberson 80356500a3SJeff Roberson #ifdef SMP 81356500a3SJeff Roberson /* Callout to handle load balancing SMP systems. */ 82356500a3SJeff Roberson static struct callout kseq_lb_callout; 83356500a3SJeff Roberson #endif 84356500a3SJeff Roberson 8535e6168fSJeff Roberson /* 8635e6168fSJeff Roberson * These datastructures are allocated within their parent datastructure but 8735e6168fSJeff Roberson * are scheduler specific. 8835e6168fSJeff Roberson */ 8935e6168fSJeff Roberson 9035e6168fSJeff Roberson struct ke_sched { 9135e6168fSJeff Roberson int ske_slice; 9235e6168fSJeff Roberson struct runq *ske_runq; 9335e6168fSJeff Roberson /* The following variables are only used for pctcpu calculation */ 9435e6168fSJeff Roberson int ske_ltick; /* Last tick that we were running on */ 9535e6168fSJeff Roberson int ske_ftick; /* First tick that we were running on */ 9635e6168fSJeff Roberson int ske_ticks; /* Tick count */ 9715dc847eSJeff Roberson /* CPU that we have affinity for. */ 98cd6e33dfSJeff Roberson u_char ske_cpu; 9935e6168fSJeff Roberson }; 10035e6168fSJeff Roberson #define ke_slice ke_sched->ske_slice 10135e6168fSJeff Roberson #define ke_runq ke_sched->ske_runq 10235e6168fSJeff Roberson #define ke_ltick ke_sched->ske_ltick 10335e6168fSJeff Roberson #define ke_ftick ke_sched->ske_ftick 10435e6168fSJeff Roberson #define ke_ticks ke_sched->ske_ticks 105cd6e33dfSJeff Roberson #define ke_cpu ke_sched->ske_cpu 10622bf7d9aSJeff Roberson #define ke_assign ke_procq.tqe_next 10722bf7d9aSJeff Roberson 10822bf7d9aSJeff Roberson #define KEF_ASSIGNED KEF_SCHED0 /* KSE is being migrated. */ 109a70d729bSJeff Roberson #define KEF_BOUND KEF_SCHED1 /* KSE can not migrate. */ 11035e6168fSJeff Roberson 11135e6168fSJeff Roberson struct kg_sched { 112407b0157SJeff Roberson int skg_slptime; /* Number of ticks we vol. slept */ 113407b0157SJeff Roberson int skg_runtime; /* Number of ticks we were running */ 11435e6168fSJeff Roberson }; 11535e6168fSJeff Roberson #define kg_slptime kg_sched->skg_slptime 116407b0157SJeff Roberson #define kg_runtime kg_sched->skg_runtime 11735e6168fSJeff Roberson 11835e6168fSJeff Roberson struct td_sched { 11935e6168fSJeff Roberson int std_slptime; 12035e6168fSJeff Roberson }; 12135e6168fSJeff Roberson #define td_slptime td_sched->std_slptime 12235e6168fSJeff Roberson 1235d7ef00cSJeff Roberson struct td_sched td_sched; 12435e6168fSJeff Roberson struct ke_sched ke_sched; 12535e6168fSJeff Roberson struct kg_sched kg_sched; 12635e6168fSJeff Roberson 12735e6168fSJeff Roberson struct ke_sched *kse0_sched = &ke_sched; 12835e6168fSJeff Roberson struct kg_sched *ksegrp0_sched = &kg_sched; 12935e6168fSJeff Roberson struct p_sched *proc0_sched = NULL; 13035e6168fSJeff Roberson struct td_sched *thread0_sched = &td_sched; 13135e6168fSJeff Roberson 13235e6168fSJeff Roberson /* 133665cb285SJeff Roberson * The priority is primarily determined by the interactivity score. Thus, we 134665cb285SJeff Roberson * give lower(better) priorities to kse groups that use less CPU. The nice 135665cb285SJeff Roberson * value is then directly added to this to allow nice to have some effect 136665cb285SJeff Roberson * on latency. 137e1f89c22SJeff Roberson * 138e1f89c22SJeff Roberson * PRI_RANGE: Total priority range for timeshare threads. 139665cb285SJeff Roberson * PRI_NRESV: Number of nice values. 140e1f89c22SJeff Roberson * PRI_BASE: The start of the dynamic range. 14135e6168fSJeff Roberson */ 142407b0157SJeff Roberson #define SCHED_PRI_RANGE (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1) 143a0a931ceSJeff Roberson #define SCHED_PRI_NRESV ((PRIO_MAX - PRIO_MIN) + 1) 144a0a931ceSJeff Roberson #define SCHED_PRI_NHALF (SCHED_PRI_NRESV / 2) 145665cb285SJeff Roberson #define SCHED_PRI_BASE (PRI_MIN_TIMESHARE) 14615dc847eSJeff Roberson #define SCHED_PRI_INTERACT(score) \ 147665cb285SJeff Roberson ((score) * SCHED_PRI_RANGE / SCHED_INTERACT_MAX) 14835e6168fSJeff Roberson 14935e6168fSJeff Roberson /* 150e1f89c22SJeff Roberson * These determine the interactivity of a process. 15135e6168fSJeff Roberson * 152407b0157SJeff Roberson * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate 153407b0157SJeff Roberson * before throttling back. 154d322132cSJeff Roberson * SLP_RUN_FORK: Maximum slp+run time to inherit at fork time. 155210491d3SJeff Roberson * INTERACT_MAX: Maximum interactivity value. Smaller is better. 156e1f89c22SJeff Roberson * INTERACT_THRESH: Threshhold for placement on the current runq. 15735e6168fSJeff Roberson */ 1584c9612c6SJeff Roberson #define SCHED_SLP_RUN_MAX ((hz * 5) << 10) 159d322132cSJeff Roberson #define SCHED_SLP_RUN_FORK ((hz / 2) << 10) 160210491d3SJeff Roberson #define SCHED_INTERACT_MAX (100) 161210491d3SJeff Roberson #define SCHED_INTERACT_HALF (SCHED_INTERACT_MAX / 2) 1624c9612c6SJeff Roberson #define SCHED_INTERACT_THRESH (30) 163e1f89c22SJeff Roberson 16435e6168fSJeff Roberson /* 16535e6168fSJeff Roberson * These parameters and macros determine the size of the time slice that is 16635e6168fSJeff Roberson * granted to each thread. 16735e6168fSJeff Roberson * 16835e6168fSJeff Roberson * SLICE_MIN: Minimum time slice granted, in units of ticks. 16935e6168fSJeff Roberson * SLICE_MAX: Maximum time slice granted. 17035e6168fSJeff Roberson * SLICE_RANGE: Range of available time slices scaled by hz. 171245f3abfSJeff Roberson * SLICE_SCALE: The number slices granted per val in the range of [0, max]. 172245f3abfSJeff Roberson * SLICE_NICE: Determine the amount of slice granted to a scaled nice. 1737d1a81b4SJeff Roberson * SLICE_NTHRESH: The nice cutoff point for slice assignment. 17435e6168fSJeff Roberson */ 17515dc847eSJeff Roberson #define SCHED_SLICE_MIN (slice_min) 17615dc847eSJeff Roberson #define SCHED_SLICE_MAX (slice_max) 1777d1a81b4SJeff Roberson #define SCHED_SLICE_NTHRESH (SCHED_PRI_NHALF - 1) 17835e6168fSJeff Roberson #define SCHED_SLICE_RANGE (SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1) 17935e6168fSJeff Roberson #define SCHED_SLICE_SCALE(val, max) (((val) * SCHED_SLICE_RANGE) / (max)) 180245f3abfSJeff Roberson #define SCHED_SLICE_NICE(nice) \ 1817d1a81b4SJeff Roberson (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((nice), SCHED_SLICE_NTHRESH)) 18235e6168fSJeff Roberson 18335e6168fSJeff Roberson /* 18435e6168fSJeff Roberson * This macro determines whether or not the kse belongs on the current or 18535e6168fSJeff Roberson * next run queue. 18635e6168fSJeff Roberson */ 18715dc847eSJeff Roberson #define SCHED_INTERACTIVE(kg) \ 18815dc847eSJeff Roberson (sched_interact_score(kg) < SCHED_INTERACT_THRESH) 189a5f099d0SJeff Roberson #define SCHED_CURR(kg, ke) \ 19008fd6713SJeff Roberson (ke->ke_thread->td_priority != kg->kg_user_pri || \ 19108fd6713SJeff Roberson SCHED_INTERACTIVE(kg)) 19235e6168fSJeff Roberson 19335e6168fSJeff Roberson /* 19435e6168fSJeff Roberson * Cpu percentage computation macros and defines. 19535e6168fSJeff Roberson * 19635e6168fSJeff Roberson * SCHED_CPU_TIME: Number of seconds to average the cpu usage across. 19735e6168fSJeff Roberson * SCHED_CPU_TICKS: Number of hz ticks to average the cpu usage across. 19835e6168fSJeff Roberson */ 19935e6168fSJeff Roberson 2005053d272SJeff Roberson #define SCHED_CPU_TIME 10 20135e6168fSJeff Roberson #define SCHED_CPU_TICKS (hz * SCHED_CPU_TIME) 20235e6168fSJeff Roberson 20335e6168fSJeff Roberson /* 20415dc847eSJeff Roberson * kseq - per processor runqs and statistics. 20535e6168fSJeff Roberson */ 20635e6168fSJeff Roberson struct kseq { 207a8949de2SJeff Roberson struct runq ksq_idle; /* Queue of IDLE threads. */ 20815dc847eSJeff Roberson struct runq ksq_timeshare[2]; /* Run queues for !IDLE. */ 20915dc847eSJeff Roberson struct runq *ksq_next; /* Next timeshare queue. */ 21015dc847eSJeff Roberson struct runq *ksq_curr; /* Current queue. */ 211ef1134c9SJeff Roberson int ksq_load_timeshare; /* Load for timeshare. */ 21215dc847eSJeff Roberson int ksq_load; /* Aggregate load. */ 213a0a931ceSJeff Roberson short ksq_nice[SCHED_PRI_NRESV]; /* KSEs in each nice bin. */ 21415dc847eSJeff Roberson short ksq_nicemin; /* Least nice. */ 2155d7ef00cSJeff Roberson #ifdef SMP 21680f86c9fSJeff Roberson int ksq_transferable; 21780f86c9fSJeff Roberson LIST_ENTRY(kseq) ksq_siblings; /* Next in kseq group. */ 21880f86c9fSJeff Roberson struct kseq_group *ksq_group; /* Our processor group. */ 219fa9c9717SJeff Roberson volatile struct kse *ksq_assigned; /* assigned by another CPU. */ 2205d7ef00cSJeff Roberson #endif 22135e6168fSJeff Roberson }; 22235e6168fSJeff Roberson 22380f86c9fSJeff Roberson #ifdef SMP 22480f86c9fSJeff Roberson /* 22580f86c9fSJeff Roberson * kseq groups are groups of processors which can cheaply share threads. When 22680f86c9fSJeff Roberson * one processor in the group goes idle it will check the runqs of the other 22780f86c9fSJeff Roberson * processors in its group prior to halting and waiting for an interrupt. 22880f86c9fSJeff Roberson * These groups are suitable for SMT (Symetric Multi-Threading) and not NUMA. 22980f86c9fSJeff Roberson * In a numa environment we'd want an idle bitmap per group and a two tiered 23080f86c9fSJeff Roberson * load balancer. 23180f86c9fSJeff Roberson */ 23280f86c9fSJeff Roberson struct kseq_group { 23380f86c9fSJeff Roberson int ksg_cpus; /* Count of CPUs in this kseq group. */ 23480f86c9fSJeff Roberson int ksg_cpumask; /* Mask of cpus in this group. */ 23580f86c9fSJeff Roberson int ksg_idlemask; /* Idle cpus in this group. */ 23680f86c9fSJeff Roberson int ksg_mask; /* Bit mask for first cpu. */ 23780f86c9fSJeff Roberson int ksg_transferable; /* Transferable load of this group. */ 23880f86c9fSJeff Roberson LIST_HEAD(, kseq) ksg_members; /* Linked list of all members. */ 23980f86c9fSJeff Roberson }; 24080f86c9fSJeff Roberson #endif 24180f86c9fSJeff Roberson 24235e6168fSJeff Roberson /* 24335e6168fSJeff Roberson * One kse queue per processor. 24435e6168fSJeff Roberson */ 2450a016a05SJeff Roberson #ifdef SMP 24622bf7d9aSJeff Roberson static int kseq_idle; 24722bf7d9aSJeff Roberson static struct kseq kseq_cpu[MAXCPU]; 24880f86c9fSJeff Roberson static struct kseq_group kseq_groups[MAXCPU]; 24980f86c9fSJeff Roberson #define KSEQ_SELF() (&kseq_cpu[PCPU_GET(cpuid)]) 25080f86c9fSJeff Roberson #define KSEQ_CPU(x) (&kseq_cpu[(x)]) 25180f86c9fSJeff Roberson #else /* !SMP */ 25222bf7d9aSJeff Roberson static struct kseq kseq_cpu; 2530a016a05SJeff Roberson #define KSEQ_SELF() (&kseq_cpu) 2540a016a05SJeff Roberson #define KSEQ_CPU(x) (&kseq_cpu) 2550a016a05SJeff Roberson #endif 25635e6168fSJeff Roberson 257245f3abfSJeff Roberson static void sched_slice(struct kse *ke); 25815dc847eSJeff Roberson static void sched_priority(struct ksegrp *kg); 259e1f89c22SJeff Roberson static int sched_interact_score(struct ksegrp *kg); 2604b60e324SJeff Roberson static void sched_interact_update(struct ksegrp *kg); 261d322132cSJeff Roberson static void sched_interact_fork(struct ksegrp *kg); 26222bf7d9aSJeff Roberson static void sched_pctcpu_update(struct kse *ke); 26335e6168fSJeff Roberson 2645d7ef00cSJeff Roberson /* Operations on per processor queues */ 26522bf7d9aSJeff Roberson static struct kse * kseq_choose(struct kseq *kseq); 2660a016a05SJeff Roberson static void kseq_setup(struct kseq *kseq); 267155b9987SJeff Roberson static void kseq_load_add(struct kseq *kseq, struct kse *ke); 268155b9987SJeff Roberson static void kseq_load_rem(struct kseq *kseq, struct kse *ke); 269155b9987SJeff Roberson static __inline void kseq_runq_add(struct kseq *kseq, struct kse *ke); 270155b9987SJeff Roberson static __inline void kseq_runq_rem(struct kseq *kseq, struct kse *ke); 27115dc847eSJeff Roberson static void kseq_nice_add(struct kseq *kseq, int nice); 27215dc847eSJeff Roberson static void kseq_nice_rem(struct kseq *kseq, int nice); 2737cd650a9SJeff Roberson void kseq_print(int cpu); 2745d7ef00cSJeff Roberson #ifdef SMP 27580f86c9fSJeff Roberson static int kseq_transfer(struct kseq *ksq, struct kse *ke, int class); 27622bf7d9aSJeff Roberson static struct kse *runq_steal(struct runq *rq); 277155b9987SJeff Roberson static void sched_balance(void *arg); 27822bf7d9aSJeff Roberson static void kseq_move(struct kseq *from, int cpu); 27980f86c9fSJeff Roberson static int kseq_idled(struct kseq *kseq); 28022bf7d9aSJeff Roberson static void kseq_notify(struct kse *ke, int cpu); 28122bf7d9aSJeff Roberson static void kseq_assign(struct kseq *); 28280f86c9fSJeff Roberson static struct kse *kseq_steal(struct kseq *kseq, int stealidle); 2839bacd788SJeff Roberson #define KSE_CAN_MIGRATE(ke, class) \ 284a70d729bSJeff Roberson ((class) != PRI_ITHD && (ke)->ke_thread->td_pinned == 0 && \ 285f28b3340SJeff Roberson ((ke)->ke_flags & KEF_BOUND) == 0) 2865d7ef00cSJeff Roberson #endif 2875d7ef00cSJeff Roberson 28815dc847eSJeff Roberson void 2897cd650a9SJeff Roberson kseq_print(int cpu) 29015dc847eSJeff Roberson { 2917cd650a9SJeff Roberson struct kseq *kseq; 29215dc847eSJeff Roberson int i; 29315dc847eSJeff Roberson 2947cd650a9SJeff Roberson kseq = KSEQ_CPU(cpu); 29515dc847eSJeff Roberson 29615dc847eSJeff Roberson printf("kseq:\n"); 29715dc847eSJeff Roberson printf("\tload: %d\n", kseq->ksq_load); 298155b9987SJeff Roberson printf("\tload TIMESHARE: %d\n", kseq->ksq_load_timeshare); 299ef1134c9SJeff Roberson #ifdef SMP 30080f86c9fSJeff Roberson printf("\tload transferable: %d\n", kseq->ksq_transferable); 301ef1134c9SJeff Roberson #endif 30215dc847eSJeff Roberson printf("\tnicemin:\t%d\n", kseq->ksq_nicemin); 30315dc847eSJeff Roberson printf("\tnice counts:\n"); 304a0a931ceSJeff Roberson for (i = 0; i < SCHED_PRI_NRESV; i++) 30515dc847eSJeff Roberson if (kseq->ksq_nice[i]) 30615dc847eSJeff Roberson printf("\t\t%d = %d\n", 30715dc847eSJeff Roberson i - SCHED_PRI_NHALF, kseq->ksq_nice[i]); 30815dc847eSJeff Roberson } 30915dc847eSJeff Roberson 310155b9987SJeff Roberson static __inline void 311155b9987SJeff Roberson kseq_runq_add(struct kseq *kseq, struct kse *ke) 312155b9987SJeff Roberson { 313155b9987SJeff Roberson #ifdef SMP 31480f86c9fSJeff Roberson if (KSE_CAN_MIGRATE(ke, PRI_BASE(ke->ke_ksegrp->kg_pri_class))) { 31580f86c9fSJeff Roberson kseq->ksq_transferable++; 31680f86c9fSJeff Roberson kseq->ksq_group->ksg_transferable++; 31780f86c9fSJeff Roberson } 318155b9987SJeff Roberson #endif 319155b9987SJeff Roberson runq_add(ke->ke_runq, ke); 320155b9987SJeff Roberson } 321155b9987SJeff Roberson 322155b9987SJeff Roberson static __inline void 323155b9987SJeff Roberson kseq_runq_rem(struct kseq *kseq, struct kse *ke) 324155b9987SJeff Roberson { 325155b9987SJeff Roberson #ifdef SMP 32680f86c9fSJeff Roberson if (KSE_CAN_MIGRATE(ke, PRI_BASE(ke->ke_ksegrp->kg_pri_class))) { 32780f86c9fSJeff Roberson kseq->ksq_transferable--; 32880f86c9fSJeff Roberson kseq->ksq_group->ksg_transferable--; 32980f86c9fSJeff Roberson } 330155b9987SJeff Roberson #endif 331155b9987SJeff Roberson runq_remove(ke->ke_runq, ke); 332155b9987SJeff Roberson } 333155b9987SJeff Roberson 334a8949de2SJeff Roberson static void 335155b9987SJeff Roberson kseq_load_add(struct kseq *kseq, struct kse *ke) 3365d7ef00cSJeff Roberson { 337ef1134c9SJeff Roberson int class; 338b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 339ef1134c9SJeff Roberson class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); 340ef1134c9SJeff Roberson if (class == PRI_TIMESHARE) 341ef1134c9SJeff Roberson kseq->ksq_load_timeshare++; 34215dc847eSJeff Roberson kseq->ksq_load++; 34315dc847eSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 344155b9987SJeff Roberson CTR6(KTR_ULE, 345155b9987SJeff Roberson "Add kse %p to %p (slice: %d, pri: %d, nice: %d(%d))", 34615dc847eSJeff Roberson ke, ke->ke_runq, ke->ke_slice, ke->ke_thread->td_priority, 34715dc847eSJeff Roberson ke->ke_ksegrp->kg_nice, kseq->ksq_nicemin); 34815dc847eSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 34915dc847eSJeff Roberson kseq_nice_add(kseq, ke->ke_ksegrp->kg_nice); 3505d7ef00cSJeff Roberson } 35115dc847eSJeff Roberson 352a8949de2SJeff Roberson static void 353155b9987SJeff Roberson kseq_load_rem(struct kseq *kseq, struct kse *ke) 3545d7ef00cSJeff Roberson { 355ef1134c9SJeff Roberson int class; 356b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 357ef1134c9SJeff Roberson class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); 358ef1134c9SJeff Roberson if (class == PRI_TIMESHARE) 359ef1134c9SJeff Roberson kseq->ksq_load_timeshare--; 36015dc847eSJeff Roberson kseq->ksq_load--; 36115dc847eSJeff Roberson ke->ke_runq = NULL; 36215dc847eSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 36315dc847eSJeff Roberson kseq_nice_rem(kseq, ke->ke_ksegrp->kg_nice); 3645d7ef00cSJeff Roberson } 3655d7ef00cSJeff Roberson 36615dc847eSJeff Roberson static void 36715dc847eSJeff Roberson kseq_nice_add(struct kseq *kseq, int nice) 36815dc847eSJeff Roberson { 369b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 37015dc847eSJeff Roberson /* Normalize to zero. */ 37115dc847eSJeff Roberson kseq->ksq_nice[nice + SCHED_PRI_NHALF]++; 372ef1134c9SJeff Roberson if (nice < kseq->ksq_nicemin || kseq->ksq_load_timeshare == 1) 37315dc847eSJeff Roberson kseq->ksq_nicemin = nice; 37415dc847eSJeff Roberson } 37515dc847eSJeff Roberson 37615dc847eSJeff Roberson static void 37715dc847eSJeff Roberson kseq_nice_rem(struct kseq *kseq, int nice) 37815dc847eSJeff Roberson { 37915dc847eSJeff Roberson int n; 38015dc847eSJeff Roberson 381b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 38215dc847eSJeff Roberson /* Normalize to zero. */ 38315dc847eSJeff Roberson n = nice + SCHED_PRI_NHALF; 38415dc847eSJeff Roberson kseq->ksq_nice[n]--; 38515dc847eSJeff Roberson KASSERT(kseq->ksq_nice[n] >= 0, ("Negative nice count.")); 38615dc847eSJeff Roberson 38715dc847eSJeff Roberson /* 38815dc847eSJeff Roberson * If this wasn't the smallest nice value or there are more in 38915dc847eSJeff Roberson * this bucket we can just return. Otherwise we have to recalculate 39015dc847eSJeff Roberson * the smallest nice. 39115dc847eSJeff Roberson */ 39215dc847eSJeff Roberson if (nice != kseq->ksq_nicemin || 39315dc847eSJeff Roberson kseq->ksq_nice[n] != 0 || 394ef1134c9SJeff Roberson kseq->ksq_load_timeshare == 0) 39515dc847eSJeff Roberson return; 39615dc847eSJeff Roberson 397a0a931ceSJeff Roberson for (; n < SCHED_PRI_NRESV; n++) 39815dc847eSJeff Roberson if (kseq->ksq_nice[n]) { 39915dc847eSJeff Roberson kseq->ksq_nicemin = n - SCHED_PRI_NHALF; 40015dc847eSJeff Roberson return; 40115dc847eSJeff Roberson } 40215dc847eSJeff Roberson } 40315dc847eSJeff Roberson 4045d7ef00cSJeff Roberson #ifdef SMP 405356500a3SJeff Roberson /* 406155b9987SJeff Roberson * sched_balance is a simple CPU load balancing algorithm. It operates by 407356500a3SJeff Roberson * finding the least loaded and most loaded cpu and equalizing their load 408356500a3SJeff Roberson * by migrating some processes. 409356500a3SJeff Roberson * 410356500a3SJeff Roberson * Dealing only with two CPUs at a time has two advantages. Firstly, most 411356500a3SJeff Roberson * installations will only have 2 cpus. Secondly, load balancing too much at 412356500a3SJeff Roberson * once can have an unpleasant effect on the system. The scheduler rarely has 413356500a3SJeff Roberson * enough information to make perfect decisions. So this algorithm chooses 414356500a3SJeff Roberson * algorithm simplicity and more gradual effects on load in larger systems. 415356500a3SJeff Roberson * 416356500a3SJeff Roberson * It could be improved by considering the priorities and slices assigned to 417356500a3SJeff Roberson * each task prior to balancing them. There are many pathological cases with 418356500a3SJeff Roberson * any approach and so the semi random algorithm below may work as well as any. 419356500a3SJeff Roberson * 420356500a3SJeff Roberson */ 42122bf7d9aSJeff Roberson static void 422155b9987SJeff Roberson sched_balance(void *arg) 423356500a3SJeff Roberson { 424356500a3SJeff Roberson struct kseq *kseq; 42580f86c9fSJeff Roberson int transferable; 426356500a3SJeff Roberson int high_load; 427356500a3SJeff Roberson int low_load; 428356500a3SJeff Roberson int high_cpu; 429356500a3SJeff Roberson int low_cpu; 430356500a3SJeff Roberson int move; 431356500a3SJeff Roberson int diff; 432356500a3SJeff Roberson int i; 433356500a3SJeff Roberson 434356500a3SJeff Roberson high_cpu = 0; 435356500a3SJeff Roberson low_cpu = 0; 436356500a3SJeff Roberson high_load = 0; 437356500a3SJeff Roberson low_load = -1; 438356500a3SJeff Roberson 439356500a3SJeff Roberson mtx_lock_spin(&sched_lock); 44086f8ae96SJeff Roberson if (smp_started == 0) 44186f8ae96SJeff Roberson goto out; 44286f8ae96SJeff Roberson 443b6c71225SJohn Baldwin for (i = 0; i <= mp_maxid; i++) { 4447a20304fSJeff Roberson if (CPU_ABSENT(i) || (i & stopped_cpus) != 0) 445356500a3SJeff Roberson continue; 446356500a3SJeff Roberson kseq = KSEQ_CPU(i); 44780f86c9fSJeff Roberson /* 44880f86c9fSJeff Roberson * Find the CPU with the highest load that has some threads 44980f86c9fSJeff Roberson * to transfer. 45080f86c9fSJeff Roberson */ 45180f86c9fSJeff Roberson if (kseq->ksq_load > high_load && 45280f86c9fSJeff Roberson kseq->ksq_group->ksg_transferable) { 45380f86c9fSJeff Roberson high_load = kseq->ksq_load; 454356500a3SJeff Roberson high_cpu = i; 455356500a3SJeff Roberson } 456356500a3SJeff Roberson if (low_load == -1 || kseq->ksq_load < low_load) { 457356500a3SJeff Roberson low_load = kseq->ksq_load; 458356500a3SJeff Roberson low_cpu = i; 459356500a3SJeff Roberson } 460356500a3SJeff Roberson } 461749d01b0SJeff Roberson kseq = KSEQ_CPU(high_cpu); 462356500a3SJeff Roberson /* 463356500a3SJeff Roberson * Nothing to do. 464356500a3SJeff Roberson */ 46580f86c9fSJeff Roberson if (low_load >= high_load) 46680f86c9fSJeff Roberson goto out; 46780f86c9fSJeff Roberson /* 46880f86c9fSJeff Roberson * If we're transfering within a group we have to use this specific 46980f86c9fSJeff Roberson * kseq's transferable count, otherwise we can steal from other members 47080f86c9fSJeff Roberson * of the group. 47180f86c9fSJeff Roberson */ 47280f86c9fSJeff Roberson if (kseq->ksq_group == KSEQ_CPU(low_cpu)->ksq_group) 47380f86c9fSJeff Roberson transferable = kseq->ksq_transferable; 47480f86c9fSJeff Roberson else 47580f86c9fSJeff Roberson transferable = kseq->ksq_group->ksg_transferable; 47680f86c9fSJeff Roberson if (transferable == 0) 477749d01b0SJeff Roberson goto out; 478155b9987SJeff Roberson /* 479155b9987SJeff Roberson * Determine what the imbalance is and then adjust that to how many 48080f86c9fSJeff Roberson * kses we actually have to give up (transferable). 481155b9987SJeff Roberson */ 482155b9987SJeff Roberson diff = kseq->ksq_load - low_load; 483356500a3SJeff Roberson move = diff / 2; 484356500a3SJeff Roberson if (diff & 0x1) 485356500a3SJeff Roberson move++; 48680f86c9fSJeff Roberson move = min(move, transferable); 487356500a3SJeff Roberson for (i = 0; i < move; i++) 488749d01b0SJeff Roberson kseq_move(kseq, low_cpu); 489356500a3SJeff Roberson out: 490356500a3SJeff Roberson mtx_unlock_spin(&sched_lock); 491155b9987SJeff Roberson callout_reset(&kseq_lb_callout, hz, sched_balance, NULL); 492356500a3SJeff Roberson 493356500a3SJeff Roberson return; 494356500a3SJeff Roberson } 495356500a3SJeff Roberson 49622bf7d9aSJeff Roberson static void 497356500a3SJeff Roberson kseq_move(struct kseq *from, int cpu) 498356500a3SJeff Roberson { 49980f86c9fSJeff Roberson struct kseq *kseq; 50080f86c9fSJeff Roberson struct kseq *to; 501356500a3SJeff Roberson struct kse *ke; 502356500a3SJeff Roberson 50380f86c9fSJeff Roberson kseq = from; 50480f86c9fSJeff Roberson to = KSEQ_CPU(cpu); 50580f86c9fSJeff Roberson ke = kseq_steal(kseq, 1); 50680f86c9fSJeff Roberson if (ke == NULL) { 50780f86c9fSJeff Roberson struct kseq_group *ksg; 50880f86c9fSJeff Roberson 50980f86c9fSJeff Roberson ksg = kseq->ksq_group; 51080f86c9fSJeff Roberson LIST_FOREACH(kseq, &ksg->ksg_members, ksq_siblings) { 51180f86c9fSJeff Roberson if (kseq == from || kseq->ksq_transferable == 0) 51280f86c9fSJeff Roberson continue; 51380f86c9fSJeff Roberson ke = kseq_steal(kseq, 1); 51480f86c9fSJeff Roberson break; 51580f86c9fSJeff Roberson } 51680f86c9fSJeff Roberson if (ke == NULL) 51780f86c9fSJeff Roberson panic("kseq_move: No KSEs available with a " 51880f86c9fSJeff Roberson "transferable count of %d\n", 51980f86c9fSJeff Roberson ksg->ksg_transferable); 52080f86c9fSJeff Roberson } 52180f86c9fSJeff Roberson if (kseq == to) 52280f86c9fSJeff Roberson return; 523356500a3SJeff Roberson ke->ke_state = KES_THREAD; 52480f86c9fSJeff Roberson kseq_runq_rem(kseq, ke); 52580f86c9fSJeff Roberson kseq_load_rem(kseq, ke); 526356500a3SJeff Roberson 527356500a3SJeff Roberson ke->ke_cpu = cpu; 528112b6d3aSJeff Roberson kseq_notify(ke, cpu); 529356500a3SJeff Roberson } 53022bf7d9aSJeff Roberson 53180f86c9fSJeff Roberson static int 53280f86c9fSJeff Roberson kseq_idled(struct kseq *kseq) 53322bf7d9aSJeff Roberson { 53480f86c9fSJeff Roberson struct kseq_group *ksg; 53580f86c9fSJeff Roberson struct kseq *steal; 53680f86c9fSJeff Roberson struct kse *ke; 53780f86c9fSJeff Roberson 53880f86c9fSJeff Roberson ksg = kseq->ksq_group; 53980f86c9fSJeff Roberson /* 54080f86c9fSJeff Roberson * If we're in a cpu group, try and steal kses from another cpu in 54180f86c9fSJeff Roberson * the group before idling. 54280f86c9fSJeff Roberson */ 54380f86c9fSJeff Roberson if (ksg->ksg_cpus > 1 && ksg->ksg_transferable) { 54480f86c9fSJeff Roberson LIST_FOREACH(steal, &ksg->ksg_members, ksq_siblings) { 54580f86c9fSJeff Roberson if (steal == kseq || steal->ksq_transferable == 0) 54680f86c9fSJeff Roberson continue; 54780f86c9fSJeff Roberson ke = kseq_steal(steal, 0); 54880f86c9fSJeff Roberson if (ke == NULL) 54980f86c9fSJeff Roberson continue; 55080f86c9fSJeff Roberson ke->ke_state = KES_THREAD; 55180f86c9fSJeff Roberson kseq_runq_rem(steal, ke); 55280f86c9fSJeff Roberson kseq_load_rem(steal, ke); 55380f86c9fSJeff Roberson ke->ke_cpu = PCPU_GET(cpuid); 55480f86c9fSJeff Roberson sched_add(ke->ke_thread); 55580f86c9fSJeff Roberson return (0); 55680f86c9fSJeff Roberson } 55780f86c9fSJeff Roberson } 55880f86c9fSJeff Roberson /* 55980f86c9fSJeff Roberson * We only set the idled bit when all of the cpus in the group are 56080f86c9fSJeff Roberson * idle. Otherwise we could get into a situation where a KSE bounces 56180f86c9fSJeff Roberson * back and forth between two idle cores on seperate physical CPUs. 56280f86c9fSJeff Roberson */ 56380f86c9fSJeff Roberson ksg->ksg_idlemask |= PCPU_GET(cpumask); 56480f86c9fSJeff Roberson if (ksg->ksg_idlemask != ksg->ksg_cpumask) 56580f86c9fSJeff Roberson return (1); 56680f86c9fSJeff Roberson atomic_set_int(&kseq_idle, ksg->ksg_mask); 56780f86c9fSJeff Roberson return (1); 56822bf7d9aSJeff Roberson } 56922bf7d9aSJeff Roberson 57022bf7d9aSJeff Roberson static void 57122bf7d9aSJeff Roberson kseq_assign(struct kseq *kseq) 57222bf7d9aSJeff Roberson { 57322bf7d9aSJeff Roberson struct kse *nke; 57422bf7d9aSJeff Roberson struct kse *ke; 57522bf7d9aSJeff Roberson 57622bf7d9aSJeff Roberson do { 577fa9c9717SJeff Roberson (volatile struct kse *)ke = kseq->ksq_assigned; 57822bf7d9aSJeff Roberson } while(!atomic_cmpset_ptr(&kseq->ksq_assigned, ke, NULL)); 57922bf7d9aSJeff Roberson for (; ke != NULL; ke = nke) { 58022bf7d9aSJeff Roberson nke = ke->ke_assign; 58122bf7d9aSJeff Roberson ke->ke_flags &= ~KEF_ASSIGNED; 58222bf7d9aSJeff Roberson sched_add(ke->ke_thread); 58322bf7d9aSJeff Roberson } 58422bf7d9aSJeff Roberson } 58522bf7d9aSJeff Roberson 58622bf7d9aSJeff Roberson static void 58722bf7d9aSJeff Roberson kseq_notify(struct kse *ke, int cpu) 58822bf7d9aSJeff Roberson { 58922bf7d9aSJeff Roberson struct kseq *kseq; 59022bf7d9aSJeff Roberson struct thread *td; 59122bf7d9aSJeff Roberson struct pcpu *pcpu; 59222bf7d9aSJeff Roberson 59322bf7d9aSJeff Roberson ke->ke_flags |= KEF_ASSIGNED; 59422bf7d9aSJeff Roberson 59522bf7d9aSJeff Roberson kseq = KSEQ_CPU(cpu); 5965d7ef00cSJeff Roberson 5970c0a98b2SJeff Roberson /* 59822bf7d9aSJeff Roberson * Place a KSE on another cpu's queue and force a resched. 59922bf7d9aSJeff Roberson */ 60022bf7d9aSJeff Roberson do { 601fa9c9717SJeff Roberson (volatile struct kse *)ke->ke_assign = kseq->ksq_assigned; 60222bf7d9aSJeff Roberson } while(!atomic_cmpset_ptr(&kseq->ksq_assigned, ke->ke_assign, ke)); 60322bf7d9aSJeff Roberson pcpu = pcpu_find(cpu); 60422bf7d9aSJeff Roberson td = pcpu->pc_curthread; 60522bf7d9aSJeff Roberson if (ke->ke_thread->td_priority < td->td_priority || 60622bf7d9aSJeff Roberson td == pcpu->pc_idlethread) { 60722bf7d9aSJeff Roberson td->td_flags |= TDF_NEEDRESCHED; 60822bf7d9aSJeff Roberson ipi_selected(1 << cpu, IPI_AST); 60922bf7d9aSJeff Roberson } 61022bf7d9aSJeff Roberson } 61122bf7d9aSJeff Roberson 61222bf7d9aSJeff Roberson static struct kse * 61322bf7d9aSJeff Roberson runq_steal(struct runq *rq) 61422bf7d9aSJeff Roberson { 61522bf7d9aSJeff Roberson struct rqhead *rqh; 61622bf7d9aSJeff Roberson struct rqbits *rqb; 61722bf7d9aSJeff Roberson struct kse *ke; 61822bf7d9aSJeff Roberson int word; 61922bf7d9aSJeff Roberson int bit; 62022bf7d9aSJeff Roberson 62122bf7d9aSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 62222bf7d9aSJeff Roberson rqb = &rq->rq_status; 62322bf7d9aSJeff Roberson for (word = 0; word < RQB_LEN; word++) { 62422bf7d9aSJeff Roberson if (rqb->rqb_bits[word] == 0) 62522bf7d9aSJeff Roberson continue; 62622bf7d9aSJeff Roberson for (bit = 0; bit < RQB_BPW; bit++) { 627a2640c9bSPeter Wemm if ((rqb->rqb_bits[word] & (1ul << bit)) == 0) 62822bf7d9aSJeff Roberson continue; 62922bf7d9aSJeff Roberson rqh = &rq->rq_queues[bit + (word << RQB_L2BPW)]; 63022bf7d9aSJeff Roberson TAILQ_FOREACH(ke, rqh, ke_procq) { 631ef1134c9SJeff Roberson if (KSE_CAN_MIGRATE(ke, 632ef1134c9SJeff Roberson PRI_BASE(ke->ke_ksegrp->kg_pri_class))) 63322bf7d9aSJeff Roberson return (ke); 63422bf7d9aSJeff Roberson } 63522bf7d9aSJeff Roberson } 63622bf7d9aSJeff Roberson } 63722bf7d9aSJeff Roberson return (NULL); 63822bf7d9aSJeff Roberson } 63922bf7d9aSJeff Roberson 64022bf7d9aSJeff Roberson static struct kse * 64180f86c9fSJeff Roberson kseq_steal(struct kseq *kseq, int stealidle) 64222bf7d9aSJeff Roberson { 64322bf7d9aSJeff Roberson struct kse *ke; 64422bf7d9aSJeff Roberson 64580f86c9fSJeff Roberson /* 64680f86c9fSJeff Roberson * Steal from next first to try to get a non-interactive task that 64780f86c9fSJeff Roberson * may not have run for a while. 64880f86c9fSJeff Roberson */ 64922bf7d9aSJeff Roberson if ((ke = runq_steal(kseq->ksq_next)) != NULL) 65022bf7d9aSJeff Roberson return (ke); 65180f86c9fSJeff Roberson if ((ke = runq_steal(kseq->ksq_curr)) != NULL) 65280f86c9fSJeff Roberson return (ke); 65380f86c9fSJeff Roberson if (stealidle) 65422bf7d9aSJeff Roberson return (runq_steal(&kseq->ksq_idle)); 65580f86c9fSJeff Roberson return (NULL); 65622bf7d9aSJeff Roberson } 65780f86c9fSJeff Roberson 65880f86c9fSJeff Roberson int 65980f86c9fSJeff Roberson kseq_transfer(struct kseq *kseq, struct kse *ke, int class) 66080f86c9fSJeff Roberson { 66180f86c9fSJeff Roberson struct kseq_group *ksg; 66280f86c9fSJeff Roberson int cpu; 66380f86c9fSJeff Roberson 66480f86c9fSJeff Roberson cpu = 0; 66580f86c9fSJeff Roberson ksg = kseq->ksq_group; 66680f86c9fSJeff Roberson 66780f86c9fSJeff Roberson /* 66880f86c9fSJeff Roberson * XXX This ksg_transferable might work better if we were checking 66980f86c9fSJeff Roberson * against a global group load. As it is now, this prevents us from 67080f86c9fSJeff Roberson * transfering a thread from a group that is potentially bogged down 67180f86c9fSJeff Roberson * with non transferable load. 67280f86c9fSJeff Roberson */ 67380f86c9fSJeff Roberson if (ksg->ksg_transferable > ksg->ksg_cpus && kseq_idle) { 67480f86c9fSJeff Roberson /* 67580f86c9fSJeff Roberson * Multiple cpus could find this bit simultaneously 67680f86c9fSJeff Roberson * but the race shouldn't be terrible. 67780f86c9fSJeff Roberson */ 67880f86c9fSJeff Roberson cpu = ffs(kseq_idle); 67980f86c9fSJeff Roberson if (cpu) 68080f86c9fSJeff Roberson atomic_clear_int(&kseq_idle, 1 << (cpu - 1)); 68180f86c9fSJeff Roberson } 68280f86c9fSJeff Roberson /* 68380f86c9fSJeff Roberson * If another cpu in this group has idled, assign a thread over 68480f86c9fSJeff Roberson * to them after checking to see if there are idled groups. 68580f86c9fSJeff Roberson */ 68680f86c9fSJeff Roberson if (cpu == 0 && kseq->ksq_load > 1 && ksg->ksg_idlemask) { 68780f86c9fSJeff Roberson cpu = ffs(ksg->ksg_idlemask); 68880f86c9fSJeff Roberson if (cpu) 68980f86c9fSJeff Roberson ksg->ksg_idlemask &= ~(1 << (cpu - 1)); 69080f86c9fSJeff Roberson } 69180f86c9fSJeff Roberson /* 69280f86c9fSJeff Roberson * Now that we've found an idle CPU, migrate the thread. 69380f86c9fSJeff Roberson */ 69480f86c9fSJeff Roberson if (cpu) { 69580f86c9fSJeff Roberson cpu--; 69680f86c9fSJeff Roberson ke->ke_cpu = cpu; 69780f86c9fSJeff Roberson ke->ke_runq = NULL; 69880f86c9fSJeff Roberson kseq_notify(ke, cpu); 69980f86c9fSJeff Roberson return (1); 70080f86c9fSJeff Roberson } 70180f86c9fSJeff Roberson return (0); 70280f86c9fSJeff Roberson } 70380f86c9fSJeff Roberson 70422bf7d9aSJeff Roberson #endif /* SMP */ 70522bf7d9aSJeff Roberson 70622bf7d9aSJeff Roberson /* 70722bf7d9aSJeff Roberson * Pick the highest priority task we have and return it. 7080c0a98b2SJeff Roberson */ 7090c0a98b2SJeff Roberson 71022bf7d9aSJeff Roberson static struct kse * 71122bf7d9aSJeff Roberson kseq_choose(struct kseq *kseq) 7125d7ef00cSJeff Roberson { 7135d7ef00cSJeff Roberson struct kse *ke; 7145d7ef00cSJeff Roberson struct runq *swap; 7155d7ef00cSJeff Roberson 716b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 71715dc847eSJeff Roberson swap = NULL; 718a8949de2SJeff Roberson 71915dc847eSJeff Roberson for (;;) { 72015dc847eSJeff Roberson ke = runq_choose(kseq->ksq_curr); 72115dc847eSJeff Roberson if (ke == NULL) { 72215dc847eSJeff Roberson /* 72315dc847eSJeff Roberson * We already swaped once and didn't get anywhere. 72415dc847eSJeff Roberson */ 72515dc847eSJeff Roberson if (swap) 72615dc847eSJeff Roberson break; 7275d7ef00cSJeff Roberson swap = kseq->ksq_curr; 7285d7ef00cSJeff Roberson kseq->ksq_curr = kseq->ksq_next; 7295d7ef00cSJeff Roberson kseq->ksq_next = swap; 73015dc847eSJeff Roberson continue; 731a8949de2SJeff Roberson } 73215dc847eSJeff Roberson /* 73315dc847eSJeff Roberson * If we encounter a slice of 0 the kse is in a 73415dc847eSJeff Roberson * TIMESHARE kse group and its nice was too far out 73515dc847eSJeff Roberson * of the range that receives slices. 73615dc847eSJeff Roberson */ 73722bf7d9aSJeff Roberson if (ke->ke_slice == 0) { 73815dc847eSJeff Roberson runq_remove(ke->ke_runq, ke); 73915dc847eSJeff Roberson sched_slice(ke); 74015dc847eSJeff Roberson ke->ke_runq = kseq->ksq_next; 74115dc847eSJeff Roberson runq_add(ke->ke_runq, ke); 74215dc847eSJeff Roberson continue; 74315dc847eSJeff Roberson } 74415dc847eSJeff Roberson return (ke); 74515dc847eSJeff Roberson } 74615dc847eSJeff Roberson 747a8949de2SJeff Roberson return (runq_choose(&kseq->ksq_idle)); 748245f3abfSJeff Roberson } 7490a016a05SJeff Roberson 7500a016a05SJeff Roberson static void 7510a016a05SJeff Roberson kseq_setup(struct kseq *kseq) 7520a016a05SJeff Roberson { 75315dc847eSJeff Roberson runq_init(&kseq->ksq_timeshare[0]); 75415dc847eSJeff Roberson runq_init(&kseq->ksq_timeshare[1]); 755a8949de2SJeff Roberson runq_init(&kseq->ksq_idle); 75615dc847eSJeff Roberson kseq->ksq_curr = &kseq->ksq_timeshare[0]; 75715dc847eSJeff Roberson kseq->ksq_next = &kseq->ksq_timeshare[1]; 7587cd650a9SJeff Roberson kseq->ksq_load = 0; 759ef1134c9SJeff Roberson kseq->ksq_load_timeshare = 0; 7600a016a05SJeff Roberson } 7610a016a05SJeff Roberson 76235e6168fSJeff Roberson static void 76335e6168fSJeff Roberson sched_setup(void *dummy) 76435e6168fSJeff Roberson { 7650ec896fdSJeff Roberson #ifdef SMP 76635e6168fSJeff Roberson int i; 7670ec896fdSJeff Roberson #endif 76835e6168fSJeff Roberson 769e493a5d9SJeff Roberson slice_min = (hz/100); /* 10ms */ 770e493a5d9SJeff Roberson slice_max = (hz/7); /* ~140ms */ 771e1f89c22SJeff Roberson 772356500a3SJeff Roberson #ifdef SMP 77380f86c9fSJeff Roberson /* 77480f86c9fSJeff Roberson * Initialize the kseqs. 77580f86c9fSJeff Roberson */ 776749d01b0SJeff Roberson for (i = 0; i < MAXCPU; i++) { 77780f86c9fSJeff Roberson struct kseq *ksq; 77880f86c9fSJeff Roberson 77980f86c9fSJeff Roberson ksq = &kseq_cpu[i]; 78080f86c9fSJeff Roberson ksq->ksq_assigned = NULL; 781749d01b0SJeff Roberson kseq_setup(&kseq_cpu[i]); 78280f86c9fSJeff Roberson } 78380f86c9fSJeff Roberson if (smp_topology == NULL) { 78480f86c9fSJeff Roberson struct kseq_group *ksg; 78580f86c9fSJeff Roberson struct kseq *ksq; 78680f86c9fSJeff Roberson 78780f86c9fSJeff Roberson for (i = 0; i < MAXCPU; i++) { 78880f86c9fSJeff Roberson ksq = &kseq_cpu[i]; 78980f86c9fSJeff Roberson ksg = &kseq_groups[i]; 79080f86c9fSJeff Roberson /* 79180f86c9fSJeff Roberson * Setup a kse group with one member. 79280f86c9fSJeff Roberson */ 79380f86c9fSJeff Roberson ksq->ksq_transferable = 0; 79480f86c9fSJeff Roberson ksq->ksq_group = ksg; 79580f86c9fSJeff Roberson ksg->ksg_cpus = 1; 79680f86c9fSJeff Roberson ksg->ksg_idlemask = 0; 79780f86c9fSJeff Roberson ksg->ksg_cpumask = ksg->ksg_mask = 1 << i; 79880f86c9fSJeff Roberson ksg->ksg_transferable = 0; 79980f86c9fSJeff Roberson LIST_INIT(&ksg->ksg_members); 80080f86c9fSJeff Roberson LIST_INSERT_HEAD(&ksg->ksg_members, ksq, ksq_siblings); 801749d01b0SJeff Roberson } 802749d01b0SJeff Roberson } else { 80380f86c9fSJeff Roberson struct kseq_group *ksg; 80480f86c9fSJeff Roberson struct cpu_group *cg; 805749d01b0SJeff Roberson int j; 806749d01b0SJeff Roberson 807749d01b0SJeff Roberson for (i = 0; i < smp_topology->ct_count; i++) { 808749d01b0SJeff Roberson cg = &smp_topology->ct_group[i]; 80980f86c9fSJeff Roberson ksg = &kseq_groups[i]; 81080f86c9fSJeff Roberson /* 81180f86c9fSJeff Roberson * Initialize the group. 81280f86c9fSJeff Roberson */ 81380f86c9fSJeff Roberson ksg->ksg_idlemask = 0; 81480f86c9fSJeff Roberson ksg->ksg_transferable = 0; 81580f86c9fSJeff Roberson ksg->ksg_cpus = cg->cg_count; 81680f86c9fSJeff Roberson ksg->ksg_cpumask = cg->cg_mask; 81780f86c9fSJeff Roberson LIST_INIT(&ksg->ksg_members); 81880f86c9fSJeff Roberson /* 81980f86c9fSJeff Roberson * Find all of the group members and add them. 82080f86c9fSJeff Roberson */ 82180f86c9fSJeff Roberson for (j = 0; j < MAXCPU; j++) { 82280f86c9fSJeff Roberson if ((cg->cg_mask & (1 << j)) != 0) { 82380f86c9fSJeff Roberson if (ksg->ksg_mask == 0) 82480f86c9fSJeff Roberson ksg->ksg_mask = 1 << j; 82580f86c9fSJeff Roberson kseq_cpu[j].ksq_transferable = 0; 82680f86c9fSJeff Roberson kseq_cpu[j].ksq_group = ksg; 82780f86c9fSJeff Roberson LIST_INSERT_HEAD(&ksg->ksg_members, 82880f86c9fSJeff Roberson &kseq_cpu[j], ksq_siblings); 82980f86c9fSJeff Roberson } 83080f86c9fSJeff Roberson } 831749d01b0SJeff Roberson } 832749d01b0SJeff Roberson } 833c06eb4e2SSam Leffler callout_init(&kseq_lb_callout, CALLOUT_MPSAFE); 834155b9987SJeff Roberson sched_balance(NULL); 835749d01b0SJeff Roberson #else 836749d01b0SJeff Roberson kseq_setup(KSEQ_SELF()); 837356500a3SJeff Roberson #endif 838749d01b0SJeff Roberson mtx_lock_spin(&sched_lock); 839155b9987SJeff Roberson kseq_load_add(KSEQ_SELF(), &kse0); 840749d01b0SJeff Roberson mtx_unlock_spin(&sched_lock); 84135e6168fSJeff Roberson } 84235e6168fSJeff Roberson 84335e6168fSJeff Roberson /* 84435e6168fSJeff Roberson * Scale the scheduling priority according to the "interactivity" of this 84535e6168fSJeff Roberson * process. 84635e6168fSJeff Roberson */ 84715dc847eSJeff Roberson static void 84835e6168fSJeff Roberson sched_priority(struct ksegrp *kg) 84935e6168fSJeff Roberson { 85035e6168fSJeff Roberson int pri; 85135e6168fSJeff Roberson 85235e6168fSJeff Roberson if (kg->kg_pri_class != PRI_TIMESHARE) 85315dc847eSJeff Roberson return; 85435e6168fSJeff Roberson 85515dc847eSJeff Roberson pri = SCHED_PRI_INTERACT(sched_interact_score(kg)); 856e1f89c22SJeff Roberson pri += SCHED_PRI_BASE; 85735e6168fSJeff Roberson pri += kg->kg_nice; 85835e6168fSJeff Roberson 85935e6168fSJeff Roberson if (pri > PRI_MAX_TIMESHARE) 86035e6168fSJeff Roberson pri = PRI_MAX_TIMESHARE; 86135e6168fSJeff Roberson else if (pri < PRI_MIN_TIMESHARE) 86235e6168fSJeff Roberson pri = PRI_MIN_TIMESHARE; 86335e6168fSJeff Roberson 86435e6168fSJeff Roberson kg->kg_user_pri = pri; 86535e6168fSJeff Roberson 86615dc847eSJeff Roberson return; 86735e6168fSJeff Roberson } 86835e6168fSJeff Roberson 86935e6168fSJeff Roberson /* 870245f3abfSJeff Roberson * Calculate a time slice based on the properties of the kseg and the runq 871a8949de2SJeff Roberson * that we're on. This is only for PRI_TIMESHARE ksegrps. 87235e6168fSJeff Roberson */ 873245f3abfSJeff Roberson static void 874245f3abfSJeff Roberson sched_slice(struct kse *ke) 87535e6168fSJeff Roberson { 87615dc847eSJeff Roberson struct kseq *kseq; 877245f3abfSJeff Roberson struct ksegrp *kg; 87835e6168fSJeff Roberson 879245f3abfSJeff Roberson kg = ke->ke_ksegrp; 88015dc847eSJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 88135e6168fSJeff Roberson 882245f3abfSJeff Roberson /* 883245f3abfSJeff Roberson * Rationale: 884245f3abfSJeff Roberson * KSEs in interactive ksegs get the minimum slice so that we 885245f3abfSJeff Roberson * quickly notice if it abuses its advantage. 886245f3abfSJeff Roberson * 887245f3abfSJeff Roberson * KSEs in non-interactive ksegs are assigned a slice that is 888245f3abfSJeff Roberson * based on the ksegs nice value relative to the least nice kseg 889245f3abfSJeff Roberson * on the run queue for this cpu. 890245f3abfSJeff Roberson * 891245f3abfSJeff Roberson * If the KSE is less nice than all others it gets the maximum 892245f3abfSJeff Roberson * slice and other KSEs will adjust their slice relative to 893245f3abfSJeff Roberson * this when they first expire. 894245f3abfSJeff Roberson * 895245f3abfSJeff Roberson * There is 20 point window that starts relative to the least 896245f3abfSJeff Roberson * nice kse on the run queue. Slice size is determined by 897245f3abfSJeff Roberson * the kse distance from the last nice ksegrp. 898245f3abfSJeff Roberson * 8997d1a81b4SJeff Roberson * If the kse is outside of the window it will get no slice 9007d1a81b4SJeff Roberson * and will be reevaluated each time it is selected on the 9017d1a81b4SJeff Roberson * run queue. The exception to this is nice 0 ksegs when 9027d1a81b4SJeff Roberson * a nice -20 is running. They are always granted a minimum 9037d1a81b4SJeff Roberson * slice. 904245f3abfSJeff Roberson */ 90515dc847eSJeff Roberson if (!SCHED_INTERACTIVE(kg)) { 906245f3abfSJeff Roberson int nice; 907245f3abfSJeff Roberson 90815dc847eSJeff Roberson nice = kg->kg_nice + (0 - kseq->ksq_nicemin); 909ef1134c9SJeff Roberson if (kseq->ksq_load_timeshare == 0 || 91015dc847eSJeff Roberson kg->kg_nice < kseq->ksq_nicemin) 911245f3abfSJeff Roberson ke->ke_slice = SCHED_SLICE_MAX; 9127d1a81b4SJeff Roberson else if (nice <= SCHED_SLICE_NTHRESH) 913245f3abfSJeff Roberson ke->ke_slice = SCHED_SLICE_NICE(nice); 9147d1a81b4SJeff Roberson else if (kg->kg_nice == 0) 9157d1a81b4SJeff Roberson ke->ke_slice = SCHED_SLICE_MIN; 916245f3abfSJeff Roberson else 917245f3abfSJeff Roberson ke->ke_slice = 0; 918245f3abfSJeff Roberson } else 919245f3abfSJeff Roberson ke->ke_slice = SCHED_SLICE_MIN; 92035e6168fSJeff Roberson 92115dc847eSJeff Roberson CTR6(KTR_ULE, 92215dc847eSJeff Roberson "Sliced %p(%d) (nice: %d, nicemin: %d, load: %d, interactive: %d)", 92315dc847eSJeff Roberson ke, ke->ke_slice, kg->kg_nice, kseq->ksq_nicemin, 924ef1134c9SJeff Roberson kseq->ksq_load_timeshare, SCHED_INTERACTIVE(kg)); 92515dc847eSJeff Roberson 926245f3abfSJeff Roberson return; 92735e6168fSJeff Roberson } 92835e6168fSJeff Roberson 929d322132cSJeff Roberson /* 930d322132cSJeff Roberson * This routine enforces a maximum limit on the amount of scheduling history 931d322132cSJeff Roberson * kept. It is called after either the slptime or runtime is adjusted. 932d322132cSJeff Roberson * This routine will not operate correctly when slp or run times have been 933d322132cSJeff Roberson * adjusted to more than double their maximum. 934d322132cSJeff Roberson */ 9354b60e324SJeff Roberson static void 9364b60e324SJeff Roberson sched_interact_update(struct ksegrp *kg) 9374b60e324SJeff Roberson { 938d322132cSJeff Roberson int sum; 9393f741ca1SJeff Roberson 940d322132cSJeff Roberson sum = kg->kg_runtime + kg->kg_slptime; 941d322132cSJeff Roberson if (sum < SCHED_SLP_RUN_MAX) 942d322132cSJeff Roberson return; 943d322132cSJeff Roberson /* 944d322132cSJeff Roberson * If we have exceeded by more than 1/5th then the algorithm below 945d322132cSJeff Roberson * will not bring us back into range. Dividing by two here forces 946d322132cSJeff Roberson * us into the range of [3/5 * SCHED_INTERACT_MAX, SCHED_INTERACT_MAX] 947d322132cSJeff Roberson */ 948d322132cSJeff Roberson if (sum > (SCHED_INTERACT_MAX / 5) * 6) { 949d322132cSJeff Roberson kg->kg_runtime /= 2; 950d322132cSJeff Roberson kg->kg_slptime /= 2; 951d322132cSJeff Roberson return; 952d322132cSJeff Roberson } 953d322132cSJeff Roberson kg->kg_runtime = (kg->kg_runtime / 5) * 4; 954d322132cSJeff Roberson kg->kg_slptime = (kg->kg_slptime / 5) * 4; 955d322132cSJeff Roberson } 956d322132cSJeff Roberson 957d322132cSJeff Roberson static void 958d322132cSJeff Roberson sched_interact_fork(struct ksegrp *kg) 959d322132cSJeff Roberson { 960d322132cSJeff Roberson int ratio; 961d322132cSJeff Roberson int sum; 962d322132cSJeff Roberson 963d322132cSJeff Roberson sum = kg->kg_runtime + kg->kg_slptime; 964d322132cSJeff Roberson if (sum > SCHED_SLP_RUN_FORK) { 965d322132cSJeff Roberson ratio = sum / SCHED_SLP_RUN_FORK; 966d322132cSJeff Roberson kg->kg_runtime /= ratio; 967d322132cSJeff Roberson kg->kg_slptime /= ratio; 9684b60e324SJeff Roberson } 9694b60e324SJeff Roberson } 9704b60e324SJeff Roberson 971e1f89c22SJeff Roberson static int 972e1f89c22SJeff Roberson sched_interact_score(struct ksegrp *kg) 973e1f89c22SJeff Roberson { 974210491d3SJeff Roberson int div; 975e1f89c22SJeff Roberson 976e1f89c22SJeff Roberson if (kg->kg_runtime > kg->kg_slptime) { 977210491d3SJeff Roberson div = max(1, kg->kg_runtime / SCHED_INTERACT_HALF); 978210491d3SJeff Roberson return (SCHED_INTERACT_HALF + 979210491d3SJeff Roberson (SCHED_INTERACT_HALF - (kg->kg_slptime / div))); 980210491d3SJeff Roberson } if (kg->kg_slptime > kg->kg_runtime) { 981210491d3SJeff Roberson div = max(1, kg->kg_slptime / SCHED_INTERACT_HALF); 982210491d3SJeff Roberson return (kg->kg_runtime / div); 983e1f89c22SJeff Roberson } 984e1f89c22SJeff Roberson 985210491d3SJeff Roberson /* 986210491d3SJeff Roberson * This can happen if slptime and runtime are 0. 987210491d3SJeff Roberson */ 988210491d3SJeff Roberson return (0); 989e1f89c22SJeff Roberson 990e1f89c22SJeff Roberson } 991e1f89c22SJeff Roberson 99215dc847eSJeff Roberson /* 99315dc847eSJeff Roberson * This is only somewhat accurate since given many processes of the same 99415dc847eSJeff Roberson * priority they will switch when their slices run out, which will be 99515dc847eSJeff Roberson * at most SCHED_SLICE_MAX. 99615dc847eSJeff Roberson */ 99735e6168fSJeff Roberson int 99835e6168fSJeff Roberson sched_rr_interval(void) 99935e6168fSJeff Roberson { 100035e6168fSJeff Roberson return (SCHED_SLICE_MAX); 100135e6168fSJeff Roberson } 100235e6168fSJeff Roberson 100322bf7d9aSJeff Roberson static void 100435e6168fSJeff Roberson sched_pctcpu_update(struct kse *ke) 100535e6168fSJeff Roberson { 100635e6168fSJeff Roberson /* 100735e6168fSJeff Roberson * Adjust counters and watermark for pctcpu calc. 1008210491d3SJeff Roberson */ 100981de51bfSJeff Roberson if (ke->ke_ltick > ticks - SCHED_CPU_TICKS) { 1010210491d3SJeff Roberson /* 101181de51bfSJeff Roberson * Shift the tick count out so that the divide doesn't 101281de51bfSJeff Roberson * round away our results. 101365c8760dSJeff Roberson */ 101465c8760dSJeff Roberson ke->ke_ticks <<= 10; 101581de51bfSJeff Roberson ke->ke_ticks = (ke->ke_ticks / (ticks - ke->ke_ftick)) * 101635e6168fSJeff Roberson SCHED_CPU_TICKS; 101765c8760dSJeff Roberson ke->ke_ticks >>= 10; 101881de51bfSJeff Roberson } else 101981de51bfSJeff Roberson ke->ke_ticks = 0; 102035e6168fSJeff Roberson ke->ke_ltick = ticks; 102135e6168fSJeff Roberson ke->ke_ftick = ke->ke_ltick - SCHED_CPU_TICKS; 102235e6168fSJeff Roberson } 102335e6168fSJeff Roberson 102435e6168fSJeff Roberson void 102535e6168fSJeff Roberson sched_prio(struct thread *td, u_char prio) 102635e6168fSJeff Roberson { 10273f741ca1SJeff Roberson struct kse *ke; 102835e6168fSJeff Roberson 10293f741ca1SJeff Roberson ke = td->td_kse; 103035e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 103135e6168fSJeff Roberson if (TD_ON_RUNQ(td)) { 10323f741ca1SJeff Roberson /* 10333f741ca1SJeff Roberson * If the priority has been elevated due to priority 10343f741ca1SJeff Roberson * propagation, we may have to move ourselves to a new 10353f741ca1SJeff Roberson * queue. We still call adjustrunqueue below in case kse 10363f741ca1SJeff Roberson * needs to fix things up. 10373f741ca1SJeff Roberson */ 1038769a3635SJeff Roberson if (prio < td->td_priority && ke && 1039769a3635SJeff Roberson (ke->ke_flags & KEF_ASSIGNED) == 0 && 104022bf7d9aSJeff Roberson ke->ke_runq != KSEQ_CPU(ke->ke_cpu)->ksq_curr) { 10413f741ca1SJeff Roberson runq_remove(ke->ke_runq, ke); 10423f741ca1SJeff Roberson ke->ke_runq = KSEQ_CPU(ke->ke_cpu)->ksq_curr; 10433f741ca1SJeff Roberson runq_add(ke->ke_runq, ke); 104435e6168fSJeff Roberson } 10453f741ca1SJeff Roberson adjustrunqueue(td, prio); 10463f741ca1SJeff Roberson } else 10473f741ca1SJeff Roberson td->td_priority = prio; 104835e6168fSJeff Roberson } 104935e6168fSJeff Roberson 105035e6168fSJeff Roberson void 1051ae53b483SJeff Roberson sched_switch(struct thread *td) 105235e6168fSJeff Roberson { 1053ae53b483SJeff Roberson struct thread *newtd; 105435e6168fSJeff Roberson struct kse *ke; 105535e6168fSJeff Roberson 105635e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 105735e6168fSJeff Roberson 105835e6168fSJeff Roberson ke = td->td_kse; 105935e6168fSJeff Roberson 106035e6168fSJeff Roberson td->td_last_kse = ke; 1061060563ecSJulian Elischer td->td_lastcpu = td->td_oncpu; 1062060563ecSJulian Elischer td->td_oncpu = NOCPU; 10634a338afdSJulian Elischer td->td_flags &= ~TDF_NEEDRESCHED; 106435e6168fSJeff Roberson 1065b11fdad0SJeff Roberson /* 1066b11fdad0SJeff Roberson * If the KSE has been assigned it may be in the process of switching 1067b11fdad0SJeff Roberson * to the new cpu. This is the case in sched_bind(). 1068b11fdad0SJeff Roberson */ 1069b11fdad0SJeff Roberson if ((ke->ke_flags & KEF_ASSIGNED) == 0) { 107035e6168fSJeff Roberson if (TD_IS_RUNNING(td)) { 1071ab2baa72SDavid Xu if (td->td_proc->p_flag & P_SA) { 1072155b9987SJeff Roberson kseq_load_rem(KSEQ_CPU(ke->ke_cpu), ke); 1073ab2baa72SDavid Xu setrunqueue(td); 107480f86c9fSJeff Roberson } else 1075155b9987SJeff Roberson kseq_runq_add(KSEQ_SELF(), ke); 10760e0f6266SJeff Roberson } else { 10770e0f6266SJeff Roberson if (ke->ke_runq) 1078155b9987SJeff Roberson kseq_load_rem(KSEQ_CPU(ke->ke_cpu), ke); 107935e6168fSJeff Roberson /* 108035e6168fSJeff Roberson * We will not be on the run queue. So we must be 108135e6168fSJeff Roberson * sleeping or similar. 108235e6168fSJeff Roberson */ 10830e2a4d3aSDavid Xu if (td->td_proc->p_flag & P_SA) 108435e6168fSJeff Roberson kse_reassign(ke); 10850e0f6266SJeff Roberson } 1086b11fdad0SJeff Roberson } 1087ae53b483SJeff Roberson newtd = choosethread(); 1088ae53b483SJeff Roberson if (td != newtd) 1089ae53b483SJeff Roberson cpu_switch(td, newtd); 1090ae53b483SJeff Roberson sched_lock.mtx_lock = (uintptr_t)td; 109135e6168fSJeff Roberson 1092060563ecSJulian Elischer td->td_oncpu = PCPU_GET(cpuid); 109335e6168fSJeff Roberson } 109435e6168fSJeff Roberson 109535e6168fSJeff Roberson void 109635e6168fSJeff Roberson sched_nice(struct ksegrp *kg, int nice) 109735e6168fSJeff Roberson { 109815dc847eSJeff Roberson struct kse *ke; 109935e6168fSJeff Roberson struct thread *td; 110015dc847eSJeff Roberson struct kseq *kseq; 110135e6168fSJeff Roberson 11020b5318c8SJohn Baldwin PROC_LOCK_ASSERT(kg->kg_proc, MA_OWNED); 11030b5318c8SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 110415dc847eSJeff Roberson /* 110515dc847eSJeff Roberson * We need to adjust the nice counts for running KSEs. 110615dc847eSJeff Roberson */ 110715dc847eSJeff Roberson if (kg->kg_pri_class == PRI_TIMESHARE) 110815dc847eSJeff Roberson FOREACH_KSE_IN_GROUP(kg, ke) { 1109d07ac847SJeff Roberson if (ke->ke_runq == NULL) 111015dc847eSJeff Roberson continue; 111115dc847eSJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 111215dc847eSJeff Roberson kseq_nice_rem(kseq, kg->kg_nice); 111315dc847eSJeff Roberson kseq_nice_add(kseq, nice); 111415dc847eSJeff Roberson } 111535e6168fSJeff Roberson kg->kg_nice = nice; 111635e6168fSJeff Roberson sched_priority(kg); 111715dc847eSJeff Roberson FOREACH_THREAD_IN_GROUP(kg, td) 11184a338afdSJulian Elischer td->td_flags |= TDF_NEEDRESCHED; 111935e6168fSJeff Roberson } 112035e6168fSJeff Roberson 112135e6168fSJeff Roberson void 112235e6168fSJeff Roberson sched_sleep(struct thread *td, u_char prio) 112335e6168fSJeff Roberson { 112435e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 112535e6168fSJeff Roberson 112635e6168fSJeff Roberson td->td_slptime = ticks; 112735e6168fSJeff Roberson td->td_priority = prio; 112835e6168fSJeff Roberson 112915dc847eSJeff Roberson CTR2(KTR_ULE, "sleep kse %p (tick: %d)", 113015dc847eSJeff Roberson td->td_kse, td->td_slptime); 113135e6168fSJeff Roberson } 113235e6168fSJeff Roberson 113335e6168fSJeff Roberson void 113435e6168fSJeff Roberson sched_wakeup(struct thread *td) 113535e6168fSJeff Roberson { 113635e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 113735e6168fSJeff Roberson 113835e6168fSJeff Roberson /* 113935e6168fSJeff Roberson * Let the kseg know how long we slept for. This is because process 114035e6168fSJeff Roberson * interactivity behavior is modeled in the kseg. 114135e6168fSJeff Roberson */ 114235e6168fSJeff Roberson if (td->td_slptime) { 1143f1e8dc4aSJeff Roberson struct ksegrp *kg; 114415dc847eSJeff Roberson int hzticks; 1145f1e8dc4aSJeff Roberson 1146f1e8dc4aSJeff Roberson kg = td->td_ksegrp; 1147d322132cSJeff Roberson hzticks = (ticks - td->td_slptime) << 10; 1148d322132cSJeff Roberson if (hzticks >= SCHED_SLP_RUN_MAX) { 1149d322132cSJeff Roberson kg->kg_slptime = SCHED_SLP_RUN_MAX; 1150d322132cSJeff Roberson kg->kg_runtime = 1; 1151d322132cSJeff Roberson } else { 1152d322132cSJeff Roberson kg->kg_slptime += hzticks; 11534b60e324SJeff Roberson sched_interact_update(kg); 1154d322132cSJeff Roberson } 1155f1e8dc4aSJeff Roberson sched_priority(kg); 11564b60e324SJeff Roberson if (td->td_kse) 11574b60e324SJeff Roberson sched_slice(td->td_kse); 115815dc847eSJeff Roberson CTR2(KTR_ULE, "wakeup kse %p (%d ticks)", 115915dc847eSJeff Roberson td->td_kse, hzticks); 116035e6168fSJeff Roberson td->td_slptime = 0; 1161f1e8dc4aSJeff Roberson } 116235e6168fSJeff Roberson setrunqueue(td); 116335e6168fSJeff Roberson } 116435e6168fSJeff Roberson 116535e6168fSJeff Roberson /* 116635e6168fSJeff Roberson * Penalize the parent for creating a new child and initialize the child's 116735e6168fSJeff Roberson * priority. 116835e6168fSJeff Roberson */ 116935e6168fSJeff Roberson void 117015dc847eSJeff Roberson sched_fork(struct proc *p, struct proc *p1) 117135e6168fSJeff Roberson { 117235e6168fSJeff Roberson 117335e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 117435e6168fSJeff Roberson 117515dc847eSJeff Roberson sched_fork_ksegrp(FIRST_KSEGRP_IN_PROC(p), FIRST_KSEGRP_IN_PROC(p1)); 117615dc847eSJeff Roberson sched_fork_kse(FIRST_KSE_IN_PROC(p), FIRST_KSE_IN_PROC(p1)); 117715dc847eSJeff Roberson sched_fork_thread(FIRST_THREAD_IN_PROC(p), FIRST_THREAD_IN_PROC(p1)); 117815dc847eSJeff Roberson } 117915dc847eSJeff Roberson 118015dc847eSJeff Roberson void 118115dc847eSJeff Roberson sched_fork_kse(struct kse *ke, struct kse *child) 118215dc847eSJeff Roberson { 11832056d0a1SJohn Baldwin 1184210491d3SJeff Roberson child->ke_slice = 1; /* Attempt to quickly learn interactivity. */ 1185093c05e3SJeff Roberson child->ke_cpu = ke->ke_cpu; 118615dc847eSJeff Roberson child->ke_runq = NULL; 118715dc847eSJeff Roberson 1188736c97c7SJeff Roberson /* Grab our parents cpu estimation information. */ 1189736c97c7SJeff Roberson child->ke_ticks = ke->ke_ticks; 1190736c97c7SJeff Roberson child->ke_ltick = ke->ke_ltick; 1191736c97c7SJeff Roberson child->ke_ftick = ke->ke_ftick; 119215dc847eSJeff Roberson } 119315dc847eSJeff Roberson 119415dc847eSJeff Roberson void 119515dc847eSJeff Roberson sched_fork_ksegrp(struct ksegrp *kg, struct ksegrp *child) 119615dc847eSJeff Roberson { 11972056d0a1SJohn Baldwin PROC_LOCK_ASSERT(child->kg_proc, MA_OWNED); 1198210491d3SJeff Roberson 1199d322132cSJeff Roberson child->kg_slptime = kg->kg_slptime; 1200d322132cSJeff Roberson child->kg_runtime = kg->kg_runtime; 1201d322132cSJeff Roberson child->kg_user_pri = kg->kg_user_pri; 1202d322132cSJeff Roberson child->kg_nice = kg->kg_nice; 1203d322132cSJeff Roberson sched_interact_fork(child); 12044b60e324SJeff Roberson kg->kg_runtime += tickincr << 10; 12054b60e324SJeff Roberson sched_interact_update(kg); 120615dc847eSJeff Roberson 1207d322132cSJeff Roberson CTR6(KTR_ULE, "sched_fork_ksegrp: %d(%d, %d) - %d(%d, %d)", 1208d322132cSJeff Roberson kg->kg_proc->p_pid, kg->kg_slptime, kg->kg_runtime, 1209d322132cSJeff Roberson child->kg_proc->p_pid, child->kg_slptime, child->kg_runtime); 1210c9f25d8fSJeff Roberson } 1211c9f25d8fSJeff Roberson 121215dc847eSJeff Roberson void 121315dc847eSJeff Roberson sched_fork_thread(struct thread *td, struct thread *child) 121415dc847eSJeff Roberson { 121515dc847eSJeff Roberson } 121615dc847eSJeff Roberson 121715dc847eSJeff Roberson void 121815dc847eSJeff Roberson sched_class(struct ksegrp *kg, int class) 121915dc847eSJeff Roberson { 122015dc847eSJeff Roberson struct kseq *kseq; 122115dc847eSJeff Roberson struct kse *ke; 1222ef1134c9SJeff Roberson int nclass; 1223ef1134c9SJeff Roberson int oclass; 122415dc847eSJeff Roberson 12252056d0a1SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 122615dc847eSJeff Roberson if (kg->kg_pri_class == class) 122715dc847eSJeff Roberson return; 122815dc847eSJeff Roberson 1229ef1134c9SJeff Roberson nclass = PRI_BASE(class); 1230ef1134c9SJeff Roberson oclass = PRI_BASE(kg->kg_pri_class); 123115dc847eSJeff Roberson FOREACH_KSE_IN_GROUP(kg, ke) { 123215dc847eSJeff Roberson if (ke->ke_state != KES_ONRUNQ && 123315dc847eSJeff Roberson ke->ke_state != KES_THREAD) 123415dc847eSJeff Roberson continue; 123515dc847eSJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 123615dc847eSJeff Roberson 1237ef1134c9SJeff Roberson #ifdef SMP 1238155b9987SJeff Roberson /* 1239155b9987SJeff Roberson * On SMP if we're on the RUNQ we must adjust the transferable 1240155b9987SJeff Roberson * count because could be changing to or from an interrupt 1241155b9987SJeff Roberson * class. 1242155b9987SJeff Roberson */ 1243155b9987SJeff Roberson if (ke->ke_state == KES_ONRUNQ) { 124480f86c9fSJeff Roberson if (KSE_CAN_MIGRATE(ke, oclass)) { 124580f86c9fSJeff Roberson kseq->ksq_transferable--; 124680f86c9fSJeff Roberson kseq->ksq_group->ksg_transferable--; 124780f86c9fSJeff Roberson } 124880f86c9fSJeff Roberson if (KSE_CAN_MIGRATE(ke, nclass)) { 124980f86c9fSJeff Roberson kseq->ksq_transferable++; 125080f86c9fSJeff Roberson kseq->ksq_group->ksg_transferable++; 125180f86c9fSJeff Roberson } 1252155b9987SJeff Roberson } 1253ef1134c9SJeff Roberson #endif 1254155b9987SJeff Roberson if (oclass == PRI_TIMESHARE) { 1255ef1134c9SJeff Roberson kseq->ksq_load_timeshare--; 125615dc847eSJeff Roberson kseq_nice_rem(kseq, kg->kg_nice); 1257155b9987SJeff Roberson } 1258155b9987SJeff Roberson if (nclass == PRI_TIMESHARE) { 1259155b9987SJeff Roberson kseq->ksq_load_timeshare++; 126015dc847eSJeff Roberson kseq_nice_add(kseq, kg->kg_nice); 126115dc847eSJeff Roberson } 1262155b9987SJeff Roberson } 126315dc847eSJeff Roberson 126415dc847eSJeff Roberson kg->kg_pri_class = class; 126535e6168fSJeff Roberson } 126635e6168fSJeff Roberson 126735e6168fSJeff Roberson /* 126835e6168fSJeff Roberson * Return some of the child's priority and interactivity to the parent. 126935e6168fSJeff Roberson */ 127035e6168fSJeff Roberson void 127115dc847eSJeff Roberson sched_exit(struct proc *p, struct proc *child) 127235e6168fSJeff Roberson { 127335e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1274141ad61cSJeff Roberson sched_exit_kse(FIRST_KSE_IN_PROC(p), FIRST_KSE_IN_PROC(child)); 1275210491d3SJeff Roberson sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), FIRST_KSEGRP_IN_PROC(child)); 1276141ad61cSJeff Roberson } 1277141ad61cSJeff Roberson 1278141ad61cSJeff Roberson void 1279141ad61cSJeff Roberson sched_exit_kse(struct kse *ke, struct kse *child) 1280141ad61cSJeff Roberson { 1281155b9987SJeff Roberson kseq_load_rem(KSEQ_CPU(child->ke_cpu), child); 1282141ad61cSJeff Roberson } 1283141ad61cSJeff Roberson 1284141ad61cSJeff Roberson void 1285141ad61cSJeff Roberson sched_exit_ksegrp(struct ksegrp *kg, struct ksegrp *child) 1286141ad61cSJeff Roberson { 12874b60e324SJeff Roberson /* kg->kg_slptime += child->kg_slptime; */ 1288210491d3SJeff Roberson kg->kg_runtime += child->kg_runtime; 12894b60e324SJeff Roberson sched_interact_update(kg); 1290141ad61cSJeff Roberson } 1291141ad61cSJeff Roberson 1292141ad61cSJeff Roberson void 1293141ad61cSJeff Roberson sched_exit_thread(struct thread *td, struct thread *child) 1294141ad61cSJeff Roberson { 129535e6168fSJeff Roberson } 129635e6168fSJeff Roberson 129735e6168fSJeff Roberson void 12987cf90fb3SJeff Roberson sched_clock(struct thread *td) 129935e6168fSJeff Roberson { 130035e6168fSJeff Roberson struct kseq *kseq; 13010a016a05SJeff Roberson struct ksegrp *kg; 13027cf90fb3SJeff Roberson struct kse *ke; 130335e6168fSJeff Roberson 130415dc847eSJeff Roberson /* 130515dc847eSJeff Roberson * sched_setup() apparently happens prior to stathz being set. We 130615dc847eSJeff Roberson * need to resolve the timers earlier in the boot so we can avoid 130715dc847eSJeff Roberson * calculating this here. 130815dc847eSJeff Roberson */ 130915dc847eSJeff Roberson if (realstathz == 0) { 131015dc847eSJeff Roberson realstathz = stathz ? stathz : hz; 131115dc847eSJeff Roberson tickincr = hz / realstathz; 131215dc847eSJeff Roberson /* 131315dc847eSJeff Roberson * XXX This does not work for values of stathz that are much 131415dc847eSJeff Roberson * larger than hz. 131515dc847eSJeff Roberson */ 131615dc847eSJeff Roberson if (tickincr == 0) 131715dc847eSJeff Roberson tickincr = 1; 131815dc847eSJeff Roberson } 131935e6168fSJeff Roberson 13207cf90fb3SJeff Roberson ke = td->td_kse; 132115dc847eSJeff Roberson kg = ke->ke_ksegrp; 132235e6168fSJeff Roberson 13230a016a05SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 13240a016a05SJeff Roberson KASSERT((td != NULL), ("schedclock: null thread pointer")); 13250a016a05SJeff Roberson 13260a016a05SJeff Roberson /* Adjust ticks for pctcpu */ 132765c8760dSJeff Roberson ke->ke_ticks++; 1328d465fb95SJeff Roberson ke->ke_ltick = ticks; 1329a8949de2SJeff Roberson 1330d465fb95SJeff Roberson /* Go up to one second beyond our max and then trim back down */ 1331d465fb95SJeff Roberson if (ke->ke_ftick + SCHED_CPU_TICKS + hz < ke->ke_ltick) 1332d465fb95SJeff Roberson sched_pctcpu_update(ke); 1333d465fb95SJeff Roberson 133443fdafb1SJulian Elischer if (td->td_flags & TDF_IDLETD) 133535e6168fSJeff Roberson return; 13360a016a05SJeff Roberson 133715dc847eSJeff Roberson CTR4(KTR_ULE, "Tick kse %p (slice: %d, slptime: %d, runtime: %d)", 133815dc847eSJeff Roberson ke, ke->ke_slice, kg->kg_slptime >> 10, kg->kg_runtime >> 10); 13393f741ca1SJeff Roberson /* 1340a8949de2SJeff Roberson * We only do slicing code for TIMESHARE ksegrps. 1341a8949de2SJeff Roberson */ 1342a8949de2SJeff Roberson if (kg->kg_pri_class != PRI_TIMESHARE) 1343a8949de2SJeff Roberson return; 1344a8949de2SJeff Roberson /* 134515dc847eSJeff Roberson * We used a tick charge it to the ksegrp so that we can compute our 134615dc847eSJeff Roberson * interactivity. 134715dc847eSJeff Roberson */ 134815dc847eSJeff Roberson kg->kg_runtime += tickincr << 10; 13494b60e324SJeff Roberson sched_interact_update(kg); 1350407b0157SJeff Roberson 135135e6168fSJeff Roberson /* 135235e6168fSJeff Roberson * We used up one time slice. 135335e6168fSJeff Roberson */ 1354093c05e3SJeff Roberson if (--ke->ke_slice > 0) 135515dc847eSJeff Roberson return; 135635e6168fSJeff Roberson /* 135715dc847eSJeff Roberson * We're out of time, recompute priorities and requeue. 135835e6168fSJeff Roberson */ 1359093c05e3SJeff Roberson kseq = KSEQ_SELF(); 1360155b9987SJeff Roberson kseq_load_rem(kseq, ke); 1361e1f89c22SJeff Roberson sched_priority(kg); 136215dc847eSJeff Roberson sched_slice(ke); 136315dc847eSJeff Roberson if (SCHED_CURR(kg, ke)) 136415dc847eSJeff Roberson ke->ke_runq = kseq->ksq_curr; 136515dc847eSJeff Roberson else 136615dc847eSJeff Roberson ke->ke_runq = kseq->ksq_next; 1367155b9987SJeff Roberson kseq_load_add(kseq, ke); 13684a338afdSJulian Elischer td->td_flags |= TDF_NEEDRESCHED; 136935e6168fSJeff Roberson } 137035e6168fSJeff Roberson 137135e6168fSJeff Roberson int 137235e6168fSJeff Roberson sched_runnable(void) 137335e6168fSJeff Roberson { 137435e6168fSJeff Roberson struct kseq *kseq; 1375b90816f1SJeff Roberson int load; 137635e6168fSJeff Roberson 1377b90816f1SJeff Roberson load = 1; 1378b90816f1SJeff Roberson 13790a016a05SJeff Roberson kseq = KSEQ_SELF(); 138022bf7d9aSJeff Roberson #ifdef SMP 138146f8b265SJeff Roberson if (kseq->ksq_assigned) { 138246f8b265SJeff Roberson mtx_lock_spin(&sched_lock); 138322bf7d9aSJeff Roberson kseq_assign(kseq); 138446f8b265SJeff Roberson mtx_unlock_spin(&sched_lock); 138546f8b265SJeff Roberson } 138622bf7d9aSJeff Roberson #endif 13873f741ca1SJeff Roberson if ((curthread->td_flags & TDF_IDLETD) != 0) { 13883f741ca1SJeff Roberson if (kseq->ksq_load > 0) 13893f741ca1SJeff Roberson goto out; 13903f741ca1SJeff Roberson } else 13913f741ca1SJeff Roberson if (kseq->ksq_load - 1 > 0) 1392b90816f1SJeff Roberson goto out; 1393b90816f1SJeff Roberson load = 0; 1394b90816f1SJeff Roberson out: 1395b90816f1SJeff Roberson return (load); 139635e6168fSJeff Roberson } 139735e6168fSJeff Roberson 139835e6168fSJeff Roberson void 139935e6168fSJeff Roberson sched_userret(struct thread *td) 140035e6168fSJeff Roberson { 140135e6168fSJeff Roberson struct ksegrp *kg; 140235e6168fSJeff Roberson 140335e6168fSJeff Roberson kg = td->td_ksegrp; 140435e6168fSJeff Roberson 140535e6168fSJeff Roberson if (td->td_priority != kg->kg_user_pri) { 140635e6168fSJeff Roberson mtx_lock_spin(&sched_lock); 140735e6168fSJeff Roberson td->td_priority = kg->kg_user_pri; 140835e6168fSJeff Roberson mtx_unlock_spin(&sched_lock); 140935e6168fSJeff Roberson } 141035e6168fSJeff Roberson } 141135e6168fSJeff Roberson 1412c9f25d8fSJeff Roberson struct kse * 1413c9f25d8fSJeff Roberson sched_choose(void) 1414c9f25d8fSJeff Roberson { 14150a016a05SJeff Roberson struct kseq *kseq; 1416c9f25d8fSJeff Roberson struct kse *ke; 141715dc847eSJeff Roberson 1418b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 141922bf7d9aSJeff Roberson kseq = KSEQ_SELF(); 142015dc847eSJeff Roberson #ifdef SMP 142180f86c9fSJeff Roberson restart: 142222bf7d9aSJeff Roberson if (kseq->ksq_assigned) 142322bf7d9aSJeff Roberson kseq_assign(kseq); 142415dc847eSJeff Roberson #endif 142522bf7d9aSJeff Roberson ke = kseq_choose(kseq); 142635e6168fSJeff Roberson if (ke) { 142722bf7d9aSJeff Roberson #ifdef SMP 142822bf7d9aSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_IDLE) 142980f86c9fSJeff Roberson if (kseq_idled(kseq) == 0) 143080f86c9fSJeff Roberson goto restart; 143122bf7d9aSJeff Roberson #endif 1432155b9987SJeff Roberson kseq_runq_rem(kseq, ke); 143335e6168fSJeff Roberson ke->ke_state = KES_THREAD; 1434245f3abfSJeff Roberson 143515dc847eSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) { 143615dc847eSJeff Roberson CTR4(KTR_ULE, "Run kse %p from %p (slice: %d, pri: %d)", 143715dc847eSJeff Roberson ke, ke->ke_runq, ke->ke_slice, 143815dc847eSJeff Roberson ke->ke_thread->td_priority); 1439245f3abfSJeff Roberson } 144015dc847eSJeff Roberson return (ke); 144135e6168fSJeff Roberson } 1442c9f25d8fSJeff Roberson #ifdef SMP 144380f86c9fSJeff Roberson if (kseq_idled(kseq) == 0) 144480f86c9fSJeff Roberson goto restart; 1445c9f25d8fSJeff Roberson #endif 144615dc847eSJeff Roberson return (NULL); 144735e6168fSJeff Roberson } 144835e6168fSJeff Roberson 144935e6168fSJeff Roberson void 14507cf90fb3SJeff Roberson sched_add(struct thread *td) 145135e6168fSJeff Roberson { 1452c9f25d8fSJeff Roberson struct kseq *kseq; 145315dc847eSJeff Roberson struct ksegrp *kg; 14547cf90fb3SJeff Roberson struct kse *ke; 145522bf7d9aSJeff Roberson int class; 1456c9f25d8fSJeff Roberson 145722bf7d9aSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 14587cf90fb3SJeff Roberson ke = td->td_kse; 14597cf90fb3SJeff Roberson kg = td->td_ksegrp; 146022bf7d9aSJeff Roberson if (ke->ke_flags & KEF_ASSIGNED) 146122bf7d9aSJeff Roberson return; 146222bf7d9aSJeff Roberson kseq = KSEQ_SELF(); 14635d7ef00cSJeff Roberson KASSERT((ke->ke_thread != NULL), ("sched_add: No thread on KSE")); 14645d7ef00cSJeff Roberson KASSERT((ke->ke_thread->td_kse != NULL), 14655d7ef00cSJeff Roberson ("sched_add: No KSE on thread")); 14665d7ef00cSJeff Roberson KASSERT(ke->ke_state != KES_ONRUNQ, 14675d7ef00cSJeff Roberson ("sched_add: kse %p (%s) already in run queue", ke, 14685d7ef00cSJeff Roberson ke->ke_proc->p_comm)); 14695d7ef00cSJeff Roberson KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 14705d7ef00cSJeff Roberson ("sched_add: process swapped out")); 14719bca28a7SJeff Roberson KASSERT(ke->ke_runq == NULL, 14729bca28a7SJeff Roberson ("sched_add: KSE %p is still assigned to a run queue", ke)); 14735d7ef00cSJeff Roberson 147422bf7d9aSJeff Roberson class = PRI_BASE(kg->kg_pri_class); 147522bf7d9aSJeff Roberson switch (class) { 1476a8949de2SJeff Roberson case PRI_ITHD: 1477a8949de2SJeff Roberson case PRI_REALTIME: 147815dc847eSJeff Roberson ke->ke_runq = kseq->ksq_curr; 147915dc847eSJeff Roberson ke->ke_slice = SCHED_SLICE_MAX; 14807cd650a9SJeff Roberson ke->ke_cpu = PCPU_GET(cpuid); 1481a8949de2SJeff Roberson break; 1482a8949de2SJeff Roberson case PRI_TIMESHARE: 148315dc847eSJeff Roberson if (SCHED_CURR(kg, ke)) 148415dc847eSJeff Roberson ke->ke_runq = kseq->ksq_curr; 148515dc847eSJeff Roberson else 148615dc847eSJeff Roberson ke->ke_runq = kseq->ksq_next; 148715dc847eSJeff Roberson break; 148815dc847eSJeff Roberson case PRI_IDLE: 148915dc847eSJeff Roberson /* 149015dc847eSJeff Roberson * This is for priority prop. 149115dc847eSJeff Roberson */ 14923f741ca1SJeff Roberson if (ke->ke_thread->td_priority < PRI_MIN_IDLE) 149315dc847eSJeff Roberson ke->ke_runq = kseq->ksq_curr; 149415dc847eSJeff Roberson else 149515dc847eSJeff Roberson ke->ke_runq = &kseq->ksq_idle; 149615dc847eSJeff Roberson ke->ke_slice = SCHED_SLICE_MIN; 149715dc847eSJeff Roberson break; 149815dc847eSJeff Roberson default: 1499d322132cSJeff Roberson panic("Unknown pri class."); 1500a8949de2SJeff Roberson break; 1501a6ed4186SJeff Roberson } 150222bf7d9aSJeff Roberson #ifdef SMP 150380f86c9fSJeff Roberson if (ke->ke_cpu != PCPU_GET(cpuid)) { 150480f86c9fSJeff Roberson kseq_notify(ke, ke->ke_cpu); 150580f86c9fSJeff Roberson return; 150680f86c9fSJeff Roberson } 150722bf7d9aSJeff Roberson /* 150880f86c9fSJeff Roberson * If there are any idle groups, give them our extra load. The 1509155b9987SJeff Roberson * threshold at which we start to reassign kses has a large impact 1510155b9987SJeff Roberson * on the overall performance of the system. Tuned too high and 1511155b9987SJeff Roberson * some CPUs may idle. Too low and there will be excess migration 1512155b9987SJeff Roberson * and context swiches. 151322bf7d9aSJeff Roberson */ 151480f86c9fSJeff Roberson if (kseq->ksq_load > 1 && KSE_CAN_MIGRATE(ke, class)) 151580f86c9fSJeff Roberson if (kseq_transfer(kseq, ke, class)) 151622bf7d9aSJeff Roberson return; 151780f86c9fSJeff Roberson if ((class == PRI_TIMESHARE || class == PRI_REALTIME) && 151880f86c9fSJeff Roberson (kseq->ksq_group->ksg_idlemask & PCPU_GET(cpumask)) != 0) { 151980f86c9fSJeff Roberson /* 152080f86c9fSJeff Roberson * Check to see if our group is unidling, and if so, remove it 152180f86c9fSJeff Roberson * from the global idle mask. 152280f86c9fSJeff Roberson */ 152380f86c9fSJeff Roberson if (kseq->ksq_group->ksg_idlemask == 152480f86c9fSJeff Roberson kseq->ksq_group->ksg_cpumask) 152580f86c9fSJeff Roberson atomic_clear_int(&kseq_idle, kseq->ksq_group->ksg_mask); 152680f86c9fSJeff Roberson /* 152780f86c9fSJeff Roberson * Now remove ourselves from the group specific idle mask. 152880f86c9fSJeff Roberson */ 152980f86c9fSJeff Roberson kseq->ksq_group->ksg_idlemask &= ~PCPU_GET(cpumask); 1530112b6d3aSJeff Roberson } 153122bf7d9aSJeff Roberson #endif 153222bf7d9aSJeff Roberson if (td->td_priority < curthread->td_priority) 153322bf7d9aSJeff Roberson curthread->td_flags |= TDF_NEEDRESCHED; 1534a8949de2SJeff Roberson 153535e6168fSJeff Roberson ke->ke_ksegrp->kg_runq_kses++; 153635e6168fSJeff Roberson ke->ke_state = KES_ONRUNQ; 153735e6168fSJeff Roberson 1538155b9987SJeff Roberson kseq_runq_add(kseq, ke); 1539155b9987SJeff Roberson kseq_load_add(kseq, ke); 154035e6168fSJeff Roberson } 154135e6168fSJeff Roberson 154235e6168fSJeff Roberson void 15437cf90fb3SJeff Roberson sched_rem(struct thread *td) 154435e6168fSJeff Roberson { 154515dc847eSJeff Roberson struct kseq *kseq; 15467cf90fb3SJeff Roberson struct kse *ke; 15477cf90fb3SJeff Roberson 15487cf90fb3SJeff Roberson ke = td->td_kse; 154922bf7d9aSJeff Roberson /* 155022bf7d9aSJeff Roberson * It is safe to just return here because sched_rem() is only ever 155122bf7d9aSJeff Roberson * used in places where we're immediately going to add the 155222bf7d9aSJeff Roberson * kse back on again. In that case it'll be added with the correct 155322bf7d9aSJeff Roberson * thread and priority when the caller drops the sched_lock. 155422bf7d9aSJeff Roberson */ 155522bf7d9aSJeff Roberson if (ke->ke_flags & KEF_ASSIGNED) 155622bf7d9aSJeff Roberson return; 155735e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 15589bca28a7SJeff Roberson KASSERT((ke->ke_state == KES_ONRUNQ), ("KSE not on run queue")); 155935e6168fSJeff Roberson 156035e6168fSJeff Roberson ke->ke_state = KES_THREAD; 156135e6168fSJeff Roberson ke->ke_ksegrp->kg_runq_kses--; 156215dc847eSJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 1563155b9987SJeff Roberson kseq_runq_rem(kseq, ke); 1564155b9987SJeff Roberson kseq_load_rem(kseq, ke); 156535e6168fSJeff Roberson } 156635e6168fSJeff Roberson 156735e6168fSJeff Roberson fixpt_t 15687cf90fb3SJeff Roberson sched_pctcpu(struct thread *td) 156935e6168fSJeff Roberson { 157035e6168fSJeff Roberson fixpt_t pctcpu; 15717cf90fb3SJeff Roberson struct kse *ke; 157235e6168fSJeff Roberson 157335e6168fSJeff Roberson pctcpu = 0; 15747cf90fb3SJeff Roberson ke = td->td_kse; 1575484288deSJeff Roberson if (ke == NULL) 1576484288deSJeff Roberson return (0); 157735e6168fSJeff Roberson 1578b90816f1SJeff Roberson mtx_lock_spin(&sched_lock); 157935e6168fSJeff Roberson if (ke->ke_ticks) { 158035e6168fSJeff Roberson int rtick; 158135e6168fSJeff Roberson 1582210491d3SJeff Roberson /* 1583210491d3SJeff Roberson * Don't update more frequently than twice a second. Allowing 1584210491d3SJeff Roberson * this causes the cpu usage to decay away too quickly due to 1585210491d3SJeff Roberson * rounding errors. 1586210491d3SJeff Roberson */ 1587210491d3SJeff Roberson if (ke->ke_ltick < (ticks - (hz / 2))) 158835e6168fSJeff Roberson sched_pctcpu_update(ke); 158935e6168fSJeff Roberson /* How many rtick per second ? */ 1590210491d3SJeff Roberson rtick = min(ke->ke_ticks / SCHED_CPU_TIME, SCHED_CPU_TICKS); 15917121cce5SScott Long pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT; 159235e6168fSJeff Roberson } 159335e6168fSJeff Roberson 159435e6168fSJeff Roberson ke->ke_proc->p_swtime = ke->ke_ltick - ke->ke_ftick; 1595828e7683SJohn Baldwin mtx_unlock_spin(&sched_lock); 159635e6168fSJeff Roberson 159735e6168fSJeff Roberson return (pctcpu); 159835e6168fSJeff Roberson } 159935e6168fSJeff Roberson 16009bacd788SJeff Roberson void 16019bacd788SJeff Roberson sched_bind(struct thread *td, int cpu) 16029bacd788SJeff Roberson { 16039bacd788SJeff Roberson struct kse *ke; 16049bacd788SJeff Roberson 16059bacd788SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 16069bacd788SJeff Roberson ke = td->td_kse; 16079bacd788SJeff Roberson ke->ke_flags |= KEF_BOUND; 160880f86c9fSJeff Roberson #ifdef SMP 160980f86c9fSJeff Roberson if (PCPU_GET(cpuid) == cpu) 16109bacd788SJeff Roberson return; 16119bacd788SJeff Roberson /* sched_rem without the runq_remove */ 16129bacd788SJeff Roberson ke->ke_state = KES_THREAD; 16139bacd788SJeff Roberson ke->ke_ksegrp->kg_runq_kses--; 1614155b9987SJeff Roberson kseq_load_rem(KSEQ_CPU(ke->ke_cpu), ke); 16159bacd788SJeff Roberson ke->ke_cpu = cpu; 16169bacd788SJeff Roberson kseq_notify(ke, cpu); 16179bacd788SJeff Roberson /* When we return from mi_switch we'll be on the correct cpu. */ 16189bacd788SJeff Roberson td->td_proc->p_stats->p_ru.ru_nvcsw++; 16199bacd788SJeff Roberson mi_switch(); 16209bacd788SJeff Roberson #endif 16219bacd788SJeff Roberson } 16229bacd788SJeff Roberson 16239bacd788SJeff Roberson void 16249bacd788SJeff Roberson sched_unbind(struct thread *td) 16259bacd788SJeff Roberson { 16269bacd788SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 16279bacd788SJeff Roberson td->td_kse->ke_flags &= ~KEF_BOUND; 16289bacd788SJeff Roberson } 16299bacd788SJeff Roberson 163035e6168fSJeff Roberson int 163135e6168fSJeff Roberson sched_sizeof_kse(void) 163235e6168fSJeff Roberson { 163335e6168fSJeff Roberson return (sizeof(struct kse) + sizeof(struct ke_sched)); 163435e6168fSJeff Roberson } 163535e6168fSJeff Roberson 163635e6168fSJeff Roberson int 163735e6168fSJeff Roberson sched_sizeof_ksegrp(void) 163835e6168fSJeff Roberson { 163935e6168fSJeff Roberson return (sizeof(struct ksegrp) + sizeof(struct kg_sched)); 164035e6168fSJeff Roberson } 164135e6168fSJeff Roberson 164235e6168fSJeff Roberson int 164335e6168fSJeff Roberson sched_sizeof_proc(void) 164435e6168fSJeff Roberson { 164535e6168fSJeff Roberson return (sizeof(struct proc)); 164635e6168fSJeff Roberson } 164735e6168fSJeff Roberson 164835e6168fSJeff Roberson int 164935e6168fSJeff Roberson sched_sizeof_thread(void) 165035e6168fSJeff Roberson { 165135e6168fSJeff Roberson return (sizeof(struct thread) + sizeof(struct td_sched)); 165235e6168fSJeff Roberson } 1653