135e6168fSJeff Roberson /*- 29fe02f7eSJeff Roberson * Copyright (c) 2002-2005, Jeffrey Roberson <jeff@freebsd.org> 335e6168fSJeff Roberson * All rights reserved. 435e6168fSJeff Roberson * 535e6168fSJeff Roberson * Redistribution and use in source and binary forms, with or without 635e6168fSJeff Roberson * modification, are permitted provided that the following conditions 735e6168fSJeff Roberson * are met: 835e6168fSJeff Roberson * 1. Redistributions of source code must retain the above copyright 935e6168fSJeff Roberson * notice unmodified, this list of conditions, and the following 1035e6168fSJeff Roberson * disclaimer. 1135e6168fSJeff Roberson * 2. Redistributions in binary form must reproduce the above copyright 1235e6168fSJeff Roberson * notice, this list of conditions and the following disclaimer in the 1335e6168fSJeff Roberson * documentation and/or other materials provided with the distribution. 1435e6168fSJeff Roberson * 1535e6168fSJeff Roberson * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 1635e6168fSJeff Roberson * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1735e6168fSJeff Roberson * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 1835e6168fSJeff Roberson * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 1935e6168fSJeff Roberson * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2035e6168fSJeff Roberson * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2135e6168fSJeff Roberson * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2235e6168fSJeff Roberson * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2335e6168fSJeff Roberson * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2435e6168fSJeff Roberson * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2535e6168fSJeff Roberson */ 2635e6168fSJeff Roberson 27677b542eSDavid E. O'Brien #include <sys/cdefs.h> 28677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 29677b542eSDavid E. O'Brien 304da0d332SPeter Wemm #include "opt_hwpmc_hooks.h" 314da0d332SPeter Wemm #include "opt_sched.h" 329923b511SScott Long 33ed062c8dSJulian Elischer #define kse td_sched 34ed062c8dSJulian Elischer 3535e6168fSJeff Roberson #include <sys/param.h> 3635e6168fSJeff Roberson #include <sys/systm.h> 372c3490b1SMarcel Moolenaar #include <sys/kdb.h> 3835e6168fSJeff Roberson #include <sys/kernel.h> 3935e6168fSJeff Roberson #include <sys/ktr.h> 4035e6168fSJeff Roberson #include <sys/lock.h> 4135e6168fSJeff Roberson #include <sys/mutex.h> 4235e6168fSJeff Roberson #include <sys/proc.h> 43245f3abfSJeff Roberson #include <sys/resource.h> 449bacd788SJeff Roberson #include <sys/resourcevar.h> 4535e6168fSJeff Roberson #include <sys/sched.h> 4635e6168fSJeff Roberson #include <sys/smp.h> 4735e6168fSJeff Roberson #include <sys/sx.h> 4835e6168fSJeff Roberson #include <sys/sysctl.h> 4935e6168fSJeff Roberson #include <sys/sysproto.h> 50f5c157d9SJohn Baldwin #include <sys/turnstile.h> 513db720fdSDavid Xu #include <sys/umtx.h> 5235e6168fSJeff Roberson #include <sys/vmmeter.h> 5335e6168fSJeff Roberson #ifdef KTRACE 5435e6168fSJeff Roberson #include <sys/uio.h> 5535e6168fSJeff Roberson #include <sys/ktrace.h> 5635e6168fSJeff Roberson #endif 5735e6168fSJeff Roberson 58ebccf1e3SJoseph Koshy #ifdef HWPMC_HOOKS 59ebccf1e3SJoseph Koshy #include <sys/pmckern.h> 60ebccf1e3SJoseph Koshy #endif 61ebccf1e3SJoseph Koshy 6235e6168fSJeff Roberson #include <machine/cpu.h> 6322bf7d9aSJeff Roberson #include <machine/smp.h> 6435e6168fSJeff Roberson 6535e6168fSJeff Roberson /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 6635e6168fSJeff Roberson /* XXX This is bogus compatability crap for ps */ 6735e6168fSJeff Roberson static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 6835e6168fSJeff Roberson SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 6935e6168fSJeff Roberson 7035e6168fSJeff Roberson static void sched_setup(void *dummy); 7135e6168fSJeff Roberson SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 7235e6168fSJeff Roberson 73a1d4fe69SDavid Xu static void sched_initticks(void *dummy); 74a1d4fe69SDavid Xu SYSINIT(sched_initticks, SI_SUB_CLOCKS, SI_ORDER_THIRD, sched_initticks, NULL) 75a1d4fe69SDavid Xu 76e038d354SScott Long static SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0, "Scheduler"); 77e1f89c22SJeff Roberson 78e038d354SScott Long SYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "ule", 0, 79e038d354SScott Long "Scheduler name"); 80dc095794SScott Long 8115dc847eSJeff Roberson static int slice_min = 1; 8215dc847eSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, slice_min, CTLFLAG_RW, &slice_min, 0, ""); 8315dc847eSJeff Roberson 84210491d3SJeff Roberson static int slice_max = 10; 8515dc847eSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, slice_max, CTLFLAG_RW, &slice_max, 0, ""); 8615dc847eSJeff Roberson 8715dc847eSJeff Roberson int realstathz; 88a1d4fe69SDavid Xu int tickincr = 1 << 10; 89783caefbSJeff Roberson 9035e6168fSJeff Roberson /* 9121381d1bSJeff Roberson * The following datastructures are allocated within their parent structure 9221381d1bSJeff Roberson * but are scheduler specific. 9321381d1bSJeff Roberson */ 9421381d1bSJeff Roberson /* 9521381d1bSJeff Roberson * The schedulable entity that can be given a context to run. A process may 9621381d1bSJeff Roberson * have several of these. 97ed062c8dSJulian Elischer */ 98ed062c8dSJulian Elischer struct kse { 99ed062c8dSJulian Elischer TAILQ_ENTRY(kse) ke_procq; /* (j/z) Run queue. */ 100ed062c8dSJulian Elischer int ke_flags; /* (j) KEF_* flags. */ 101ed062c8dSJulian Elischer struct thread *ke_thread; /* (*) Active associated thread. */ 102ed062c8dSJulian Elischer fixpt_t ke_pctcpu; /* (j) %cpu during p_swtime. */ 1030ae716e5SDavid Xu u_char ke_rqindex; /* (j) Run queue index. */ 104ed062c8dSJulian Elischer enum { 105ed062c8dSJulian Elischer KES_THREAD = 0x0, /* slaved to thread state */ 106ed062c8dSJulian Elischer KES_ONRUNQ 107ed062c8dSJulian Elischer } ke_state; /* (j) thread sched specific status. */ 108ed062c8dSJulian Elischer int ke_slptime; 109ed062c8dSJulian Elischer int ke_slice; 110ed062c8dSJulian Elischer struct runq *ke_runq; 111ed062c8dSJulian Elischer u_char ke_cpu; /* CPU that we have affinity for. */ 112ed062c8dSJulian Elischer /* The following variables are only used for pctcpu calculation */ 113ed062c8dSJulian Elischer int ke_ltick; /* Last tick that we were running on */ 114ed062c8dSJulian Elischer int ke_ftick; /* First tick that we were running on */ 115ed062c8dSJulian Elischer int ke_ticks; /* Tick count */ 116ed062c8dSJulian Elischer 117ed062c8dSJulian Elischer }; 118ed062c8dSJulian Elischer #define td_kse td_sched 119ed062c8dSJulian Elischer #define td_slptime td_kse->ke_slptime 120ed062c8dSJulian Elischer #define ke_proc ke_thread->td_proc 121ed062c8dSJulian Elischer #define ke_ksegrp ke_thread->td_ksegrp 12222bf7d9aSJeff Roberson #define ke_assign ke_procq.tqe_next 12321381d1bSJeff Roberson /* flags kept in ke_flags */ 124598b368dSJeff Roberson #define KEF_ASSIGNED 0x0001 /* Thread is being migrated. */ 125598b368dSJeff Roberson #define KEF_BOUND 0x0002 /* Thread can not migrate. */ 126598b368dSJeff Roberson #define KEF_XFERABLE 0x0004 /* Thread was added as transferable. */ 127598b368dSJeff Roberson #define KEF_HOLD 0x0008 /* Thread is temporarily bound. */ 128598b368dSJeff Roberson #define KEF_REMOVED 0x0010 /* Thread was removed while ASSIGNED */ 12921381d1bSJeff Roberson #define KEF_INTERNAL 0x0020 /* Thread added due to migration. */ 1301278181cSDavid Xu #define KEF_PREEMPTED 0x0040 /* Thread was preempted */ 13121381d1bSJeff Roberson #define KEF_DIDRUN 0x02000 /* Thread actually ran. */ 13221381d1bSJeff Roberson #define KEF_EXIT 0x04000 /* Thread is being killed. */ 13335e6168fSJeff Roberson 13435e6168fSJeff Roberson struct kg_sched { 135ed062c8dSJulian Elischer struct thread *skg_last_assigned; /* (j) Last thread assigned to */ 136ed062c8dSJulian Elischer /* the system scheduler */ 137407b0157SJeff Roberson int skg_slptime; /* Number of ticks we vol. slept */ 138407b0157SJeff Roberson int skg_runtime; /* Number of ticks we were running */ 139ed062c8dSJulian Elischer int skg_avail_opennings; /* (j) Num unfilled slots in group.*/ 140ed062c8dSJulian Elischer int skg_concurrency; /* (j) Num threads requested in group.*/ 14135e6168fSJeff Roberson }; 142ed062c8dSJulian Elischer #define kg_last_assigned kg_sched->skg_last_assigned 143ed062c8dSJulian Elischer #define kg_avail_opennings kg_sched->skg_avail_opennings 144ed062c8dSJulian Elischer #define kg_concurrency kg_sched->skg_concurrency 145407b0157SJeff Roberson #define kg_runtime kg_sched->skg_runtime 146ed062c8dSJulian Elischer #define kg_slptime kg_sched->skg_slptime 14735e6168fSJeff Roberson 14821381d1bSJeff Roberson #define SLOT_RELEASE(kg) (kg)->kg_avail_opennings++ 14921381d1bSJeff Roberson #define SLOT_USE(kg) (kg)->kg_avail_opennings-- 150d39063f2SJulian Elischer 151ed062c8dSJulian Elischer static struct kse kse0; 152ed062c8dSJulian Elischer static struct kg_sched kg_sched0; 15335e6168fSJeff Roberson 15435e6168fSJeff Roberson /* 155665cb285SJeff Roberson * The priority is primarily determined by the interactivity score. Thus, we 156665cb285SJeff Roberson * give lower(better) priorities to kse groups that use less CPU. The nice 157665cb285SJeff Roberson * value is then directly added to this to allow nice to have some effect 158665cb285SJeff Roberson * on latency. 159e1f89c22SJeff Roberson * 160e1f89c22SJeff Roberson * PRI_RANGE: Total priority range for timeshare threads. 161665cb285SJeff Roberson * PRI_NRESV: Number of nice values. 162e1f89c22SJeff Roberson * PRI_BASE: The start of the dynamic range. 16335e6168fSJeff Roberson */ 164407b0157SJeff Roberson #define SCHED_PRI_RANGE (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1) 165a0a931ceSJeff Roberson #define SCHED_PRI_NRESV ((PRIO_MAX - PRIO_MIN) + 1) 166a0a931ceSJeff Roberson #define SCHED_PRI_NHALF (SCHED_PRI_NRESV / 2) 167665cb285SJeff Roberson #define SCHED_PRI_BASE (PRI_MIN_TIMESHARE) 16815dc847eSJeff Roberson #define SCHED_PRI_INTERACT(score) \ 169665cb285SJeff Roberson ((score) * SCHED_PRI_RANGE / SCHED_INTERACT_MAX) 17035e6168fSJeff Roberson 17135e6168fSJeff Roberson /* 172e1f89c22SJeff Roberson * These determine the interactivity of a process. 17335e6168fSJeff Roberson * 174407b0157SJeff Roberson * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate 175407b0157SJeff Roberson * before throttling back. 176d322132cSJeff Roberson * SLP_RUN_FORK: Maximum slp+run time to inherit at fork time. 177210491d3SJeff Roberson * INTERACT_MAX: Maximum interactivity value. Smaller is better. 178e1f89c22SJeff Roberson * INTERACT_THRESH: Threshhold for placement on the current runq. 17935e6168fSJeff Roberson */ 1804c9612c6SJeff Roberson #define SCHED_SLP_RUN_MAX ((hz * 5) << 10) 181d322132cSJeff Roberson #define SCHED_SLP_RUN_FORK ((hz / 2) << 10) 182210491d3SJeff Roberson #define SCHED_INTERACT_MAX (100) 183210491d3SJeff Roberson #define SCHED_INTERACT_HALF (SCHED_INTERACT_MAX / 2) 1844c9612c6SJeff Roberson #define SCHED_INTERACT_THRESH (30) 185e1f89c22SJeff Roberson 18635e6168fSJeff Roberson /* 18735e6168fSJeff Roberson * These parameters and macros determine the size of the time slice that is 18835e6168fSJeff Roberson * granted to each thread. 18935e6168fSJeff Roberson * 19035e6168fSJeff Roberson * SLICE_MIN: Minimum time slice granted, in units of ticks. 19135e6168fSJeff Roberson * SLICE_MAX: Maximum time slice granted. 19235e6168fSJeff Roberson * SLICE_RANGE: Range of available time slices scaled by hz. 193245f3abfSJeff Roberson * SLICE_SCALE: The number slices granted per val in the range of [0, max]. 194245f3abfSJeff Roberson * SLICE_NICE: Determine the amount of slice granted to a scaled nice. 1957d1a81b4SJeff Roberson * SLICE_NTHRESH: The nice cutoff point for slice assignment. 19635e6168fSJeff Roberson */ 19715dc847eSJeff Roberson #define SCHED_SLICE_MIN (slice_min) 19815dc847eSJeff Roberson #define SCHED_SLICE_MAX (slice_max) 1990392e39dSJeff Roberson #define SCHED_SLICE_INTERACTIVE (slice_max) 2007d1a81b4SJeff Roberson #define SCHED_SLICE_NTHRESH (SCHED_PRI_NHALF - 1) 20135e6168fSJeff Roberson #define SCHED_SLICE_RANGE (SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1) 20235e6168fSJeff Roberson #define SCHED_SLICE_SCALE(val, max) (((val) * SCHED_SLICE_RANGE) / (max)) 203245f3abfSJeff Roberson #define SCHED_SLICE_NICE(nice) \ 2047d1a81b4SJeff Roberson (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((nice), SCHED_SLICE_NTHRESH)) 20535e6168fSJeff Roberson 20635e6168fSJeff Roberson /* 207ed062c8dSJulian Elischer * This macro determines whether or not the thread belongs on the current or 20835e6168fSJeff Roberson * next run queue. 20935e6168fSJeff Roberson */ 21015dc847eSJeff Roberson #define SCHED_INTERACTIVE(kg) \ 21115dc847eSJeff Roberson (sched_interact_score(kg) < SCHED_INTERACT_THRESH) 212a5f099d0SJeff Roberson #define SCHED_CURR(kg, ke) \ 2131278181cSDavid Xu ((ke->ke_thread->td_flags & TDF_BORROWING) || \ 2141278181cSDavid Xu (ke->ke_flags & KEF_PREEMPTED) || SCHED_INTERACTIVE(kg)) 21535e6168fSJeff Roberson 21635e6168fSJeff Roberson /* 21735e6168fSJeff Roberson * Cpu percentage computation macros and defines. 21835e6168fSJeff Roberson * 21935e6168fSJeff Roberson * SCHED_CPU_TIME: Number of seconds to average the cpu usage across. 22035e6168fSJeff Roberson * SCHED_CPU_TICKS: Number of hz ticks to average the cpu usage across. 22135e6168fSJeff Roberson */ 22235e6168fSJeff Roberson 2235053d272SJeff Roberson #define SCHED_CPU_TIME 10 22435e6168fSJeff Roberson #define SCHED_CPU_TICKS (hz * SCHED_CPU_TIME) 22535e6168fSJeff Roberson 22635e6168fSJeff Roberson /* 22715dc847eSJeff Roberson * kseq - per processor runqs and statistics. 22835e6168fSJeff Roberson */ 22935e6168fSJeff Roberson struct kseq { 230a8949de2SJeff Roberson struct runq ksq_idle; /* Queue of IDLE threads. */ 23115dc847eSJeff Roberson struct runq ksq_timeshare[2]; /* Run queues for !IDLE. */ 23215dc847eSJeff Roberson struct runq *ksq_next; /* Next timeshare queue. */ 23315dc847eSJeff Roberson struct runq *ksq_curr; /* Current queue. */ 234ef1134c9SJeff Roberson int ksq_load_timeshare; /* Load for timeshare. */ 23515dc847eSJeff Roberson int ksq_load; /* Aggregate load. */ 236a0a931ceSJeff Roberson short ksq_nice[SCHED_PRI_NRESV]; /* KSEs in each nice bin. */ 23715dc847eSJeff Roberson short ksq_nicemin; /* Least nice. */ 2385d7ef00cSJeff Roberson #ifdef SMP 23980f86c9fSJeff Roberson int ksq_transferable; 24080f86c9fSJeff Roberson LIST_ENTRY(kseq) ksq_siblings; /* Next in kseq group. */ 24180f86c9fSJeff Roberson struct kseq_group *ksq_group; /* Our processor group. */ 242fa9c9717SJeff Roberson volatile struct kse *ksq_assigned; /* assigned by another CPU. */ 24333916c36SJeff Roberson #else 24433916c36SJeff Roberson int ksq_sysload; /* For loadavg, !ITHD load. */ 2455d7ef00cSJeff Roberson #endif 24635e6168fSJeff Roberson }; 24735e6168fSJeff Roberson 24880f86c9fSJeff Roberson #ifdef SMP 24980f86c9fSJeff Roberson /* 25080f86c9fSJeff Roberson * kseq groups are groups of processors which can cheaply share threads. When 25180f86c9fSJeff Roberson * one processor in the group goes idle it will check the runqs of the other 25280f86c9fSJeff Roberson * processors in its group prior to halting and waiting for an interrupt. 25380f86c9fSJeff Roberson * These groups are suitable for SMT (Symetric Multi-Threading) and not NUMA. 25480f86c9fSJeff Roberson * In a numa environment we'd want an idle bitmap per group and a two tiered 25580f86c9fSJeff Roberson * load balancer. 25680f86c9fSJeff Roberson */ 25780f86c9fSJeff Roberson struct kseq_group { 25880f86c9fSJeff Roberson int ksg_cpus; /* Count of CPUs in this kseq group. */ 259b2ae7ed7SMarcel Moolenaar cpumask_t ksg_cpumask; /* Mask of cpus in this group. */ 260b2ae7ed7SMarcel Moolenaar cpumask_t ksg_idlemask; /* Idle cpus in this group. */ 261b2ae7ed7SMarcel Moolenaar cpumask_t ksg_mask; /* Bit mask for first cpu. */ 262cac77d04SJeff Roberson int ksg_load; /* Total load of this group. */ 26380f86c9fSJeff Roberson int ksg_transferable; /* Transferable load of this group. */ 26480f86c9fSJeff Roberson LIST_HEAD(, kseq) ksg_members; /* Linked list of all members. */ 26580f86c9fSJeff Roberson }; 26680f86c9fSJeff Roberson #endif 26780f86c9fSJeff Roberson 26835e6168fSJeff Roberson /* 26935e6168fSJeff Roberson * One kse queue per processor. 27035e6168fSJeff Roberson */ 2710a016a05SJeff Roberson #ifdef SMP 272b2ae7ed7SMarcel Moolenaar static cpumask_t kseq_idle; 273cac77d04SJeff Roberson static int ksg_maxid; 27422bf7d9aSJeff Roberson static struct kseq kseq_cpu[MAXCPU]; 27580f86c9fSJeff Roberson static struct kseq_group kseq_groups[MAXCPU]; 276dc03363dSJeff Roberson static int bal_tick; 277dc03363dSJeff Roberson static int gbal_tick; 278598b368dSJeff Roberson static int balance_groups; 279dc03363dSJeff Roberson 28080f86c9fSJeff Roberson #define KSEQ_SELF() (&kseq_cpu[PCPU_GET(cpuid)]) 28180f86c9fSJeff Roberson #define KSEQ_CPU(x) (&kseq_cpu[(x)]) 282cac77d04SJeff Roberson #define KSEQ_ID(x) ((x) - kseq_cpu) 283cac77d04SJeff Roberson #define KSEQ_GROUP(x) (&kseq_groups[(x)]) 28480f86c9fSJeff Roberson #else /* !SMP */ 28522bf7d9aSJeff Roberson static struct kseq kseq_cpu; 286dc03363dSJeff Roberson 2870a016a05SJeff Roberson #define KSEQ_SELF() (&kseq_cpu) 2880a016a05SJeff Roberson #define KSEQ_CPU(x) (&kseq_cpu) 2890a016a05SJeff Roberson #endif 29035e6168fSJeff Roberson 29121381d1bSJeff Roberson static void slot_fill(struct ksegrp *); 292ed062c8dSJulian Elischer static struct kse *sched_choose(void); /* XXX Should be thread * */ 29321381d1bSJeff Roberson static void sched_slice(struct kse *); 29421381d1bSJeff Roberson static void sched_priority(struct ksegrp *); 29521381d1bSJeff Roberson static void sched_thread_priority(struct thread *, u_char); 29621381d1bSJeff Roberson static int sched_interact_score(struct ksegrp *); 29721381d1bSJeff Roberson static void sched_interact_update(struct ksegrp *); 29821381d1bSJeff Roberson static void sched_interact_fork(struct ksegrp *); 29921381d1bSJeff Roberson static void sched_pctcpu_update(struct kse *); 30035e6168fSJeff Roberson 3015d7ef00cSJeff Roberson /* Operations on per processor queues */ 30221381d1bSJeff Roberson static struct kse * kseq_choose(struct kseq *); 30321381d1bSJeff Roberson static void kseq_setup(struct kseq *); 30421381d1bSJeff Roberson static void kseq_load_add(struct kseq *, struct kse *); 30521381d1bSJeff Roberson static void kseq_load_rem(struct kseq *, struct kse *); 30621381d1bSJeff Roberson static __inline void kseq_runq_add(struct kseq *, struct kse *, int); 30721381d1bSJeff Roberson static __inline void kseq_runq_rem(struct kseq *, struct kse *); 30821381d1bSJeff Roberson static void kseq_nice_add(struct kseq *, int); 30921381d1bSJeff Roberson static void kseq_nice_rem(struct kseq *, int); 3107cd650a9SJeff Roberson void kseq_print(int cpu); 3115d7ef00cSJeff Roberson #ifdef SMP 31221381d1bSJeff Roberson static int kseq_transfer(struct kseq *, struct kse *, int); 31321381d1bSJeff Roberson static struct kse *runq_steal(struct runq *); 314dc03363dSJeff Roberson static void sched_balance(void); 315dc03363dSJeff Roberson static void sched_balance_groups(void); 31621381d1bSJeff Roberson static void sched_balance_group(struct kseq_group *); 31721381d1bSJeff Roberson static void sched_balance_pair(struct kseq *, struct kseq *); 31821381d1bSJeff Roberson static void kseq_move(struct kseq *, int); 31921381d1bSJeff Roberson static int kseq_idled(struct kseq *); 32021381d1bSJeff Roberson static void kseq_notify(struct kse *, int); 32122bf7d9aSJeff Roberson static void kseq_assign(struct kseq *); 32221381d1bSJeff Roberson static struct kse *kseq_steal(struct kseq *, int); 323598b368dSJeff Roberson #define KSE_CAN_MIGRATE(ke) \ 3241e7fad6bSScott Long ((ke)->ke_thread->td_pinned == 0 && ((ke)->ke_flags & KEF_BOUND) == 0) 3255d7ef00cSJeff Roberson #endif 3265d7ef00cSJeff Roberson 32715dc847eSJeff Roberson void 3287cd650a9SJeff Roberson kseq_print(int cpu) 32915dc847eSJeff Roberson { 3307cd650a9SJeff Roberson struct kseq *kseq; 33115dc847eSJeff Roberson int i; 33215dc847eSJeff Roberson 3337cd650a9SJeff Roberson kseq = KSEQ_CPU(cpu); 33415dc847eSJeff Roberson 33515dc847eSJeff Roberson printf("kseq:\n"); 33615dc847eSJeff Roberson printf("\tload: %d\n", kseq->ksq_load); 337155b9987SJeff Roberson printf("\tload TIMESHARE: %d\n", kseq->ksq_load_timeshare); 338ef1134c9SJeff Roberson #ifdef SMP 33980f86c9fSJeff Roberson printf("\tload transferable: %d\n", kseq->ksq_transferable); 340ef1134c9SJeff Roberson #endif 34115dc847eSJeff Roberson printf("\tnicemin:\t%d\n", kseq->ksq_nicemin); 34215dc847eSJeff Roberson printf("\tnice counts:\n"); 343a0a931ceSJeff Roberson for (i = 0; i < SCHED_PRI_NRESV; i++) 34415dc847eSJeff Roberson if (kseq->ksq_nice[i]) 34515dc847eSJeff Roberson printf("\t\t%d = %d\n", 34615dc847eSJeff Roberson i - SCHED_PRI_NHALF, kseq->ksq_nice[i]); 34715dc847eSJeff Roberson } 34815dc847eSJeff Roberson 349155b9987SJeff Roberson static __inline void 350598b368dSJeff Roberson kseq_runq_add(struct kseq *kseq, struct kse *ke, int flags) 351155b9987SJeff Roberson { 352155b9987SJeff Roberson #ifdef SMP 353598b368dSJeff Roberson if (KSE_CAN_MIGRATE(ke)) { 35480f86c9fSJeff Roberson kseq->ksq_transferable++; 35580f86c9fSJeff Roberson kseq->ksq_group->ksg_transferable++; 3562454aaf5SJeff Roberson ke->ke_flags |= KEF_XFERABLE; 35780f86c9fSJeff Roberson } 358155b9987SJeff Roberson #endif 3591278181cSDavid Xu if (ke->ke_flags & KEF_PREEMPTED) 3601278181cSDavid Xu flags |= SRQ_PREEMPTED; 361598b368dSJeff Roberson runq_add(ke->ke_runq, ke, flags); 362155b9987SJeff Roberson } 363155b9987SJeff Roberson 364155b9987SJeff Roberson static __inline void 365155b9987SJeff Roberson kseq_runq_rem(struct kseq *kseq, struct kse *ke) 366155b9987SJeff Roberson { 367155b9987SJeff Roberson #ifdef SMP 3682454aaf5SJeff Roberson if (ke->ke_flags & KEF_XFERABLE) { 36980f86c9fSJeff Roberson kseq->ksq_transferable--; 37080f86c9fSJeff Roberson kseq->ksq_group->ksg_transferable--; 3712454aaf5SJeff Roberson ke->ke_flags &= ~KEF_XFERABLE; 37280f86c9fSJeff Roberson } 373155b9987SJeff Roberson #endif 374155b9987SJeff Roberson runq_remove(ke->ke_runq, ke); 375155b9987SJeff Roberson } 376155b9987SJeff Roberson 377a8949de2SJeff Roberson static void 378155b9987SJeff Roberson kseq_load_add(struct kseq *kseq, struct kse *ke) 3795d7ef00cSJeff Roberson { 380ef1134c9SJeff Roberson int class; 381b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 382ef1134c9SJeff Roberson class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); 383ef1134c9SJeff Roberson if (class == PRI_TIMESHARE) 384ef1134c9SJeff Roberson kseq->ksq_load_timeshare++; 38515dc847eSJeff Roberson kseq->ksq_load++; 38681d47d3fSJeff Roberson CTR1(KTR_SCHED, "load: %d", kseq->ksq_load); 387207a6c0dSDavid E. O'Brien if (class != PRI_ITHD && (ke->ke_proc->p_flag & P_NOLOAD) == 0) 38833916c36SJeff Roberson #ifdef SMP 389cac77d04SJeff Roberson kseq->ksq_group->ksg_load++; 39033916c36SJeff Roberson #else 39133916c36SJeff Roberson kseq->ksq_sysload++; 392cac77d04SJeff Roberson #endif 39315dc847eSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 394fa885116SJulian Elischer kseq_nice_add(kseq, ke->ke_proc->p_nice); 3955d7ef00cSJeff Roberson } 39615dc847eSJeff Roberson 397a8949de2SJeff Roberson static void 398155b9987SJeff Roberson kseq_load_rem(struct kseq *kseq, struct kse *ke) 3995d7ef00cSJeff Roberson { 400ef1134c9SJeff Roberson int class; 401b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 402ef1134c9SJeff Roberson class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); 403ef1134c9SJeff Roberson if (class == PRI_TIMESHARE) 404ef1134c9SJeff Roberson kseq->ksq_load_timeshare--; 405207a6c0dSDavid E. O'Brien if (class != PRI_ITHD && (ke->ke_proc->p_flag & P_NOLOAD) == 0) 40633916c36SJeff Roberson #ifdef SMP 407cac77d04SJeff Roberson kseq->ksq_group->ksg_load--; 40833916c36SJeff Roberson #else 40933916c36SJeff Roberson kseq->ksq_sysload--; 410cac77d04SJeff Roberson #endif 41115dc847eSJeff Roberson kseq->ksq_load--; 41281d47d3fSJeff Roberson CTR1(KTR_SCHED, "load: %d", kseq->ksq_load); 41315dc847eSJeff Roberson ke->ke_runq = NULL; 41415dc847eSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 415fa885116SJulian Elischer kseq_nice_rem(kseq, ke->ke_proc->p_nice); 4165d7ef00cSJeff Roberson } 4175d7ef00cSJeff Roberson 41815dc847eSJeff Roberson static void 41915dc847eSJeff Roberson kseq_nice_add(struct kseq *kseq, int nice) 42015dc847eSJeff Roberson { 421b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 42215dc847eSJeff Roberson /* Normalize to zero. */ 42315dc847eSJeff Roberson kseq->ksq_nice[nice + SCHED_PRI_NHALF]++; 424ef1134c9SJeff Roberson if (nice < kseq->ksq_nicemin || kseq->ksq_load_timeshare == 1) 42515dc847eSJeff Roberson kseq->ksq_nicemin = nice; 42615dc847eSJeff Roberson } 42715dc847eSJeff Roberson 42815dc847eSJeff Roberson static void 42915dc847eSJeff Roberson kseq_nice_rem(struct kseq *kseq, int nice) 43015dc847eSJeff Roberson { 43115dc847eSJeff Roberson int n; 43215dc847eSJeff Roberson 433b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 43415dc847eSJeff Roberson /* Normalize to zero. */ 43515dc847eSJeff Roberson n = nice + SCHED_PRI_NHALF; 43615dc847eSJeff Roberson kseq->ksq_nice[n]--; 43715dc847eSJeff Roberson KASSERT(kseq->ksq_nice[n] >= 0, ("Negative nice count.")); 43815dc847eSJeff Roberson 43915dc847eSJeff Roberson /* 44015dc847eSJeff Roberson * If this wasn't the smallest nice value or there are more in 44115dc847eSJeff Roberson * this bucket we can just return. Otherwise we have to recalculate 44215dc847eSJeff Roberson * the smallest nice. 44315dc847eSJeff Roberson */ 44415dc847eSJeff Roberson if (nice != kseq->ksq_nicemin || 44515dc847eSJeff Roberson kseq->ksq_nice[n] != 0 || 446ef1134c9SJeff Roberson kseq->ksq_load_timeshare == 0) 44715dc847eSJeff Roberson return; 44815dc847eSJeff Roberson 449a0a931ceSJeff Roberson for (; n < SCHED_PRI_NRESV; n++) 45015dc847eSJeff Roberson if (kseq->ksq_nice[n]) { 45115dc847eSJeff Roberson kseq->ksq_nicemin = n - SCHED_PRI_NHALF; 45215dc847eSJeff Roberson return; 45315dc847eSJeff Roberson } 45415dc847eSJeff Roberson } 45515dc847eSJeff Roberson 4565d7ef00cSJeff Roberson #ifdef SMP 457356500a3SJeff Roberson /* 458155b9987SJeff Roberson * sched_balance is a simple CPU load balancing algorithm. It operates by 459356500a3SJeff Roberson * finding the least loaded and most loaded cpu and equalizing their load 460356500a3SJeff Roberson * by migrating some processes. 461356500a3SJeff Roberson * 462356500a3SJeff Roberson * Dealing only with two CPUs at a time has two advantages. Firstly, most 463356500a3SJeff Roberson * installations will only have 2 cpus. Secondly, load balancing too much at 464356500a3SJeff Roberson * once can have an unpleasant effect on the system. The scheduler rarely has 465356500a3SJeff Roberson * enough information to make perfect decisions. So this algorithm chooses 466356500a3SJeff Roberson * algorithm simplicity and more gradual effects on load in larger systems. 467356500a3SJeff Roberson * 468356500a3SJeff Roberson * It could be improved by considering the priorities and slices assigned to 469356500a3SJeff Roberson * each task prior to balancing them. There are many pathological cases with 470356500a3SJeff Roberson * any approach and so the semi random algorithm below may work as well as any. 471356500a3SJeff Roberson * 472356500a3SJeff Roberson */ 47322bf7d9aSJeff Roberson static void 474dc03363dSJeff Roberson sched_balance(void) 475356500a3SJeff Roberson { 476cac77d04SJeff Roberson struct kseq_group *high; 477cac77d04SJeff Roberson struct kseq_group *low; 478cac77d04SJeff Roberson struct kseq_group *ksg; 479cac77d04SJeff Roberson int cnt; 480356500a3SJeff Roberson int i; 481356500a3SJeff Roberson 482598b368dSJeff Roberson bal_tick = ticks + (random() % (hz * 2)); 48386f8ae96SJeff Roberson if (smp_started == 0) 484598b368dSJeff Roberson return; 485cac77d04SJeff Roberson low = high = NULL; 486cac77d04SJeff Roberson i = random() % (ksg_maxid + 1); 487cac77d04SJeff Roberson for (cnt = 0; cnt <= ksg_maxid; cnt++) { 488cac77d04SJeff Roberson ksg = KSEQ_GROUP(i); 489cac77d04SJeff Roberson /* 490cac77d04SJeff Roberson * Find the CPU with the highest load that has some 491cac77d04SJeff Roberson * threads to transfer. 492cac77d04SJeff Roberson */ 493cac77d04SJeff Roberson if ((high == NULL || ksg->ksg_load > high->ksg_load) 494cac77d04SJeff Roberson && ksg->ksg_transferable) 495cac77d04SJeff Roberson high = ksg; 496cac77d04SJeff Roberson if (low == NULL || ksg->ksg_load < low->ksg_load) 497cac77d04SJeff Roberson low = ksg; 498cac77d04SJeff Roberson if (++i > ksg_maxid) 499cac77d04SJeff Roberson i = 0; 500cac77d04SJeff Roberson } 501cac77d04SJeff Roberson if (low != NULL && high != NULL && high != low) 502cac77d04SJeff Roberson sched_balance_pair(LIST_FIRST(&high->ksg_members), 503cac77d04SJeff Roberson LIST_FIRST(&low->ksg_members)); 504cac77d04SJeff Roberson } 50586f8ae96SJeff Roberson 506cac77d04SJeff Roberson static void 507dc03363dSJeff Roberson sched_balance_groups(void) 508cac77d04SJeff Roberson { 509cac77d04SJeff Roberson int i; 510cac77d04SJeff Roberson 511598b368dSJeff Roberson gbal_tick = ticks + (random() % (hz * 2)); 512dc03363dSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 513cac77d04SJeff Roberson if (smp_started) 514cac77d04SJeff Roberson for (i = 0; i <= ksg_maxid; i++) 515cac77d04SJeff Roberson sched_balance_group(KSEQ_GROUP(i)); 516356500a3SJeff Roberson } 517cac77d04SJeff Roberson 518cac77d04SJeff Roberson static void 519cac77d04SJeff Roberson sched_balance_group(struct kseq_group *ksg) 520cac77d04SJeff Roberson { 521cac77d04SJeff Roberson struct kseq *kseq; 522cac77d04SJeff Roberson struct kseq *high; 523cac77d04SJeff Roberson struct kseq *low; 524cac77d04SJeff Roberson int load; 525cac77d04SJeff Roberson 526cac77d04SJeff Roberson if (ksg->ksg_transferable == 0) 527cac77d04SJeff Roberson return; 528cac77d04SJeff Roberson low = NULL; 529cac77d04SJeff Roberson high = NULL; 530cac77d04SJeff Roberson LIST_FOREACH(kseq, &ksg->ksg_members, ksq_siblings) { 531cac77d04SJeff Roberson load = kseq->ksq_load; 532cac77d04SJeff Roberson if (high == NULL || load > high->ksq_load) 533cac77d04SJeff Roberson high = kseq; 534cac77d04SJeff Roberson if (low == NULL || load < low->ksq_load) 535cac77d04SJeff Roberson low = kseq; 536356500a3SJeff Roberson } 537cac77d04SJeff Roberson if (high != NULL && low != NULL && high != low) 538cac77d04SJeff Roberson sched_balance_pair(high, low); 539356500a3SJeff Roberson } 540cac77d04SJeff Roberson 541cac77d04SJeff Roberson static void 542cac77d04SJeff Roberson sched_balance_pair(struct kseq *high, struct kseq *low) 543cac77d04SJeff Roberson { 544cac77d04SJeff Roberson int transferable; 545cac77d04SJeff Roberson int high_load; 546cac77d04SJeff Roberson int low_load; 547cac77d04SJeff Roberson int move; 548cac77d04SJeff Roberson int diff; 549cac77d04SJeff Roberson int i; 550cac77d04SJeff Roberson 55180f86c9fSJeff Roberson /* 55280f86c9fSJeff Roberson * If we're transfering within a group we have to use this specific 55380f86c9fSJeff Roberson * kseq's transferable count, otherwise we can steal from other members 55480f86c9fSJeff Roberson * of the group. 55580f86c9fSJeff Roberson */ 556cac77d04SJeff Roberson if (high->ksq_group == low->ksq_group) { 557cac77d04SJeff Roberson transferable = high->ksq_transferable; 558cac77d04SJeff Roberson high_load = high->ksq_load; 559cac77d04SJeff Roberson low_load = low->ksq_load; 560cac77d04SJeff Roberson } else { 561cac77d04SJeff Roberson transferable = high->ksq_group->ksg_transferable; 562cac77d04SJeff Roberson high_load = high->ksq_group->ksg_load; 563cac77d04SJeff Roberson low_load = low->ksq_group->ksg_load; 564cac77d04SJeff Roberson } 56580f86c9fSJeff Roberson if (transferable == 0) 566cac77d04SJeff Roberson return; 567155b9987SJeff Roberson /* 568155b9987SJeff Roberson * Determine what the imbalance is and then adjust that to how many 56980f86c9fSJeff Roberson * kses we actually have to give up (transferable). 570155b9987SJeff Roberson */ 571cac77d04SJeff Roberson diff = high_load - low_load; 572356500a3SJeff Roberson move = diff / 2; 573356500a3SJeff Roberson if (diff & 0x1) 574356500a3SJeff Roberson move++; 57580f86c9fSJeff Roberson move = min(move, transferable); 576356500a3SJeff Roberson for (i = 0; i < move; i++) 577cac77d04SJeff Roberson kseq_move(high, KSEQ_ID(low)); 578356500a3SJeff Roberson return; 579356500a3SJeff Roberson } 580356500a3SJeff Roberson 58122bf7d9aSJeff Roberson static void 582356500a3SJeff Roberson kseq_move(struct kseq *from, int cpu) 583356500a3SJeff Roberson { 58480f86c9fSJeff Roberson struct kseq *kseq; 58580f86c9fSJeff Roberson struct kseq *to; 586356500a3SJeff Roberson struct kse *ke; 587356500a3SJeff Roberson 58880f86c9fSJeff Roberson kseq = from; 58980f86c9fSJeff Roberson to = KSEQ_CPU(cpu); 59080f86c9fSJeff Roberson ke = kseq_steal(kseq, 1); 59180f86c9fSJeff Roberson if (ke == NULL) { 59280f86c9fSJeff Roberson struct kseq_group *ksg; 59380f86c9fSJeff Roberson 59480f86c9fSJeff Roberson ksg = kseq->ksq_group; 59580f86c9fSJeff Roberson LIST_FOREACH(kseq, &ksg->ksg_members, ksq_siblings) { 59680f86c9fSJeff Roberson if (kseq == from || kseq->ksq_transferable == 0) 59780f86c9fSJeff Roberson continue; 59880f86c9fSJeff Roberson ke = kseq_steal(kseq, 1); 59980f86c9fSJeff Roberson break; 60080f86c9fSJeff Roberson } 60180f86c9fSJeff Roberson if (ke == NULL) 60280f86c9fSJeff Roberson panic("kseq_move: No KSEs available with a " 60380f86c9fSJeff Roberson "transferable count of %d\n", 60480f86c9fSJeff Roberson ksg->ksg_transferable); 60580f86c9fSJeff Roberson } 60680f86c9fSJeff Roberson if (kseq == to) 60780f86c9fSJeff Roberson return; 608356500a3SJeff Roberson ke->ke_state = KES_THREAD; 60980f86c9fSJeff Roberson kseq_runq_rem(kseq, ke); 61080f86c9fSJeff Roberson kseq_load_rem(kseq, ke); 611112b6d3aSJeff Roberson kseq_notify(ke, cpu); 612356500a3SJeff Roberson } 61322bf7d9aSJeff Roberson 61480f86c9fSJeff Roberson static int 61580f86c9fSJeff Roberson kseq_idled(struct kseq *kseq) 61622bf7d9aSJeff Roberson { 61780f86c9fSJeff Roberson struct kseq_group *ksg; 61880f86c9fSJeff Roberson struct kseq *steal; 61980f86c9fSJeff Roberson struct kse *ke; 62080f86c9fSJeff Roberson 62180f86c9fSJeff Roberson ksg = kseq->ksq_group; 62280f86c9fSJeff Roberson /* 62380f86c9fSJeff Roberson * If we're in a cpu group, try and steal kses from another cpu in 62480f86c9fSJeff Roberson * the group before idling. 62580f86c9fSJeff Roberson */ 62680f86c9fSJeff Roberson if (ksg->ksg_cpus > 1 && ksg->ksg_transferable) { 62780f86c9fSJeff Roberson LIST_FOREACH(steal, &ksg->ksg_members, ksq_siblings) { 62880f86c9fSJeff Roberson if (steal == kseq || steal->ksq_transferable == 0) 62980f86c9fSJeff Roberson continue; 63080f86c9fSJeff Roberson ke = kseq_steal(steal, 0); 63180f86c9fSJeff Roberson if (ke == NULL) 63280f86c9fSJeff Roberson continue; 63380f86c9fSJeff Roberson ke->ke_state = KES_THREAD; 63480f86c9fSJeff Roberson kseq_runq_rem(steal, ke); 63580f86c9fSJeff Roberson kseq_load_rem(steal, ke); 63680f86c9fSJeff Roberson ke->ke_cpu = PCPU_GET(cpuid); 637598b368dSJeff Roberson ke->ke_flags |= KEF_INTERNAL | KEF_HOLD; 638598b368dSJeff Roberson sched_add(ke->ke_thread, SRQ_YIELDING); 63980f86c9fSJeff Roberson return (0); 64080f86c9fSJeff Roberson } 64180f86c9fSJeff Roberson } 64280f86c9fSJeff Roberson /* 64380f86c9fSJeff Roberson * We only set the idled bit when all of the cpus in the group are 64480f86c9fSJeff Roberson * idle. Otherwise we could get into a situation where a KSE bounces 64580f86c9fSJeff Roberson * back and forth between two idle cores on seperate physical CPUs. 64680f86c9fSJeff Roberson */ 64780f86c9fSJeff Roberson ksg->ksg_idlemask |= PCPU_GET(cpumask); 64880f86c9fSJeff Roberson if (ksg->ksg_idlemask != ksg->ksg_cpumask) 64980f86c9fSJeff Roberson return (1); 65080f86c9fSJeff Roberson atomic_set_int(&kseq_idle, ksg->ksg_mask); 65180f86c9fSJeff Roberson return (1); 65222bf7d9aSJeff Roberson } 65322bf7d9aSJeff Roberson 65422bf7d9aSJeff Roberson static void 65522bf7d9aSJeff Roberson kseq_assign(struct kseq *kseq) 65622bf7d9aSJeff Roberson { 65722bf7d9aSJeff Roberson struct kse *nke; 65822bf7d9aSJeff Roberson struct kse *ke; 65922bf7d9aSJeff Roberson 66022bf7d9aSJeff Roberson do { 66100fbcda8SAlexander Kabaev *(volatile struct kse **)&ke = kseq->ksq_assigned; 66205a6b7adSXin LI } while(!atomic_cmpset_ptr((volatile uintptr_t *)&kseq->ksq_assigned, 66305a6b7adSXin LI (uintptr_t)ke, (uintptr_t)NULL)); 66422bf7d9aSJeff Roberson for (; ke != NULL; ke = nke) { 66522bf7d9aSJeff Roberson nke = ke->ke_assign; 666598b368dSJeff Roberson kseq->ksq_group->ksg_load--; 667598b368dSJeff Roberson kseq->ksq_load--; 66822bf7d9aSJeff Roberson ke->ke_flags &= ~KEF_ASSIGNED; 6693d16f519SDavid Xu if (ke->ke_flags & KEF_REMOVED) { 6703d16f519SDavid Xu ke->ke_flags &= ~KEF_REMOVED; 6713d16f519SDavid Xu continue; 6723d16f519SDavid Xu } 673598b368dSJeff Roberson ke->ke_flags |= KEF_INTERNAL | KEF_HOLD; 674598b368dSJeff Roberson sched_add(ke->ke_thread, SRQ_YIELDING); 67522bf7d9aSJeff Roberson } 67622bf7d9aSJeff Roberson } 67722bf7d9aSJeff Roberson 67822bf7d9aSJeff Roberson static void 67922bf7d9aSJeff Roberson kseq_notify(struct kse *ke, int cpu) 68022bf7d9aSJeff Roberson { 68122bf7d9aSJeff Roberson struct kseq *kseq; 68222bf7d9aSJeff Roberson struct thread *td; 68322bf7d9aSJeff Roberson struct pcpu *pcpu; 684598b368dSJeff Roberson int class; 6852454aaf5SJeff Roberson int prio; 68622bf7d9aSJeff Roberson 687598b368dSJeff Roberson kseq = KSEQ_CPU(cpu); 688598b368dSJeff Roberson /* XXX */ 689598b368dSJeff Roberson class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); 690598b368dSJeff Roberson if ((class == PRI_TIMESHARE || class == PRI_REALTIME) && 691598b368dSJeff Roberson (kseq_idle & kseq->ksq_group->ksg_mask)) 692598b368dSJeff Roberson atomic_clear_int(&kseq_idle, kseq->ksq_group->ksg_mask); 693598b368dSJeff Roberson kseq->ksq_group->ksg_load++; 694598b368dSJeff Roberson kseq->ksq_load++; 69586e1c22aSJeff Roberson ke->ke_cpu = cpu; 69622bf7d9aSJeff Roberson ke->ke_flags |= KEF_ASSIGNED; 6972454aaf5SJeff Roberson prio = ke->ke_thread->td_priority; 69822bf7d9aSJeff Roberson 6990c0a98b2SJeff Roberson /* 70022bf7d9aSJeff Roberson * Place a KSE on another cpu's queue and force a resched. 70122bf7d9aSJeff Roberson */ 70222bf7d9aSJeff Roberson do { 70300fbcda8SAlexander Kabaev *(volatile struct kse **)&ke->ke_assign = kseq->ksq_assigned; 70405a6b7adSXin LI } while(!atomic_cmpset_ptr((volatile uintptr_t *)&kseq->ksq_assigned, 70505a6b7adSXin LI (uintptr_t)ke->ke_assign, (uintptr_t)ke)); 7062454aaf5SJeff Roberson /* 7072454aaf5SJeff Roberson * Without sched_lock we could lose a race where we set NEEDRESCHED 7082454aaf5SJeff Roberson * on a thread that is switched out before the IPI is delivered. This 7092454aaf5SJeff Roberson * would lead us to miss the resched. This will be a problem once 7102454aaf5SJeff Roberson * sched_lock is pushed down. 7112454aaf5SJeff Roberson */ 71222bf7d9aSJeff Roberson pcpu = pcpu_find(cpu); 71322bf7d9aSJeff Roberson td = pcpu->pc_curthread; 71422bf7d9aSJeff Roberson if (ke->ke_thread->td_priority < td->td_priority || 71522bf7d9aSJeff Roberson td == pcpu->pc_idlethread) { 71622bf7d9aSJeff Roberson td->td_flags |= TDF_NEEDRESCHED; 71722bf7d9aSJeff Roberson ipi_selected(1 << cpu, IPI_AST); 71822bf7d9aSJeff Roberson } 71922bf7d9aSJeff Roberson } 72022bf7d9aSJeff Roberson 72122bf7d9aSJeff Roberson static struct kse * 72222bf7d9aSJeff Roberson runq_steal(struct runq *rq) 72322bf7d9aSJeff Roberson { 72422bf7d9aSJeff Roberson struct rqhead *rqh; 72522bf7d9aSJeff Roberson struct rqbits *rqb; 72622bf7d9aSJeff Roberson struct kse *ke; 72722bf7d9aSJeff Roberson int word; 72822bf7d9aSJeff Roberson int bit; 72922bf7d9aSJeff Roberson 73022bf7d9aSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 73122bf7d9aSJeff Roberson rqb = &rq->rq_status; 73222bf7d9aSJeff Roberson for (word = 0; word < RQB_LEN; word++) { 73322bf7d9aSJeff Roberson if (rqb->rqb_bits[word] == 0) 73422bf7d9aSJeff Roberson continue; 73522bf7d9aSJeff Roberson for (bit = 0; bit < RQB_BPW; bit++) { 736a2640c9bSPeter Wemm if ((rqb->rqb_bits[word] & (1ul << bit)) == 0) 73722bf7d9aSJeff Roberson continue; 73822bf7d9aSJeff Roberson rqh = &rq->rq_queues[bit + (word << RQB_L2BPW)]; 73922bf7d9aSJeff Roberson TAILQ_FOREACH(ke, rqh, ke_procq) { 740598b368dSJeff Roberson if (KSE_CAN_MIGRATE(ke)) 74122bf7d9aSJeff Roberson return (ke); 74222bf7d9aSJeff Roberson } 74322bf7d9aSJeff Roberson } 74422bf7d9aSJeff Roberson } 74522bf7d9aSJeff Roberson return (NULL); 74622bf7d9aSJeff Roberson } 74722bf7d9aSJeff Roberson 74822bf7d9aSJeff Roberson static struct kse * 74980f86c9fSJeff Roberson kseq_steal(struct kseq *kseq, int stealidle) 75022bf7d9aSJeff Roberson { 75122bf7d9aSJeff Roberson struct kse *ke; 75222bf7d9aSJeff Roberson 75380f86c9fSJeff Roberson /* 75480f86c9fSJeff Roberson * Steal from next first to try to get a non-interactive task that 75580f86c9fSJeff Roberson * may not have run for a while. 75680f86c9fSJeff Roberson */ 75722bf7d9aSJeff Roberson if ((ke = runq_steal(kseq->ksq_next)) != NULL) 75822bf7d9aSJeff Roberson return (ke); 75980f86c9fSJeff Roberson if ((ke = runq_steal(kseq->ksq_curr)) != NULL) 76080f86c9fSJeff Roberson return (ke); 76180f86c9fSJeff Roberson if (stealidle) 76222bf7d9aSJeff Roberson return (runq_steal(&kseq->ksq_idle)); 76380f86c9fSJeff Roberson return (NULL); 76422bf7d9aSJeff Roberson } 76580f86c9fSJeff Roberson 76680f86c9fSJeff Roberson int 76780f86c9fSJeff Roberson kseq_transfer(struct kseq *kseq, struct kse *ke, int class) 76880f86c9fSJeff Roberson { 769598b368dSJeff Roberson struct kseq_group *nksg; 77080f86c9fSJeff Roberson struct kseq_group *ksg; 771598b368dSJeff Roberson struct kseq *old; 77280f86c9fSJeff Roberson int cpu; 773598b368dSJeff Roberson int idx; 77480f86c9fSJeff Roberson 775670c524fSJeff Roberson if (smp_started == 0) 776670c524fSJeff Roberson return (0); 77780f86c9fSJeff Roberson cpu = 0; 77880f86c9fSJeff Roberson /* 7792454aaf5SJeff Roberson * If our load exceeds a certain threshold we should attempt to 7802454aaf5SJeff Roberson * reassign this thread. The first candidate is the cpu that 7812454aaf5SJeff Roberson * originally ran the thread. If it is idle, assign it there, 7822454aaf5SJeff Roberson * otherwise, pick an idle cpu. 7832454aaf5SJeff Roberson * 7842454aaf5SJeff Roberson * The threshold at which we start to reassign kses has a large impact 785670c524fSJeff Roberson * on the overall performance of the system. Tuned too high and 786670c524fSJeff Roberson * some CPUs may idle. Too low and there will be excess migration 787d50c87deSOlivier Houchard * and context switches. 788670c524fSJeff Roberson */ 789598b368dSJeff Roberson old = KSEQ_CPU(ke->ke_cpu); 790598b368dSJeff Roberson nksg = old->ksq_group; 7912454aaf5SJeff Roberson ksg = kseq->ksq_group; 792598b368dSJeff Roberson if (kseq_idle) { 793598b368dSJeff Roberson if (kseq_idle & nksg->ksg_mask) { 794598b368dSJeff Roberson cpu = ffs(nksg->ksg_idlemask); 795598b368dSJeff Roberson if (cpu) { 796598b368dSJeff Roberson CTR2(KTR_SCHED, 797598b368dSJeff Roberson "kseq_transfer: %p found old cpu %X " 798598b368dSJeff Roberson "in idlemask.", ke, cpu); 7992454aaf5SJeff Roberson goto migrate; 8002454aaf5SJeff Roberson } 801598b368dSJeff Roberson } 80280f86c9fSJeff Roberson /* 80380f86c9fSJeff Roberson * Multiple cpus could find this bit simultaneously 80480f86c9fSJeff Roberson * but the race shouldn't be terrible. 80580f86c9fSJeff Roberson */ 80680f86c9fSJeff Roberson cpu = ffs(kseq_idle); 807598b368dSJeff Roberson if (cpu) { 808598b368dSJeff Roberson CTR2(KTR_SCHED, "kseq_transfer: %p found %X " 809598b368dSJeff Roberson "in idlemask.", ke, cpu); 8102454aaf5SJeff Roberson goto migrate; 81180f86c9fSJeff Roberson } 812598b368dSJeff Roberson } 813598b368dSJeff Roberson idx = 0; 814598b368dSJeff Roberson #if 0 815598b368dSJeff Roberson if (old->ksq_load < kseq->ksq_load) { 816598b368dSJeff Roberson cpu = ke->ke_cpu + 1; 817598b368dSJeff Roberson CTR2(KTR_SCHED, "kseq_transfer: %p old cpu %X " 818598b368dSJeff Roberson "load less than ours.", ke, cpu); 819598b368dSJeff Roberson goto migrate; 820598b368dSJeff Roberson } 821598b368dSJeff Roberson /* 822598b368dSJeff Roberson * No new CPU was found, look for one with less load. 823598b368dSJeff Roberson */ 824598b368dSJeff Roberson for (idx = 0; idx <= ksg_maxid; idx++) { 825598b368dSJeff Roberson nksg = KSEQ_GROUP(idx); 826598b368dSJeff Roberson if (nksg->ksg_load /*+ (nksg->ksg_cpus * 2)*/ < ksg->ksg_load) { 827598b368dSJeff Roberson cpu = ffs(nksg->ksg_cpumask); 828598b368dSJeff Roberson CTR2(KTR_SCHED, "kseq_transfer: %p cpu %X load less " 829598b368dSJeff Roberson "than ours.", ke, cpu); 830598b368dSJeff Roberson goto migrate; 831598b368dSJeff Roberson } 832598b368dSJeff Roberson } 833598b368dSJeff Roberson #endif 83480f86c9fSJeff Roberson /* 83580f86c9fSJeff Roberson * If another cpu in this group has idled, assign a thread over 83680f86c9fSJeff Roberson * to them after checking to see if there are idled groups. 83780f86c9fSJeff Roberson */ 8382454aaf5SJeff Roberson if (ksg->ksg_idlemask) { 83980f86c9fSJeff Roberson cpu = ffs(ksg->ksg_idlemask); 840598b368dSJeff Roberson if (cpu) { 841598b368dSJeff Roberson CTR2(KTR_SCHED, "kseq_transfer: %p cpu %X idle in " 842598b368dSJeff Roberson "group.", ke, cpu); 8432454aaf5SJeff Roberson goto migrate; 84480f86c9fSJeff Roberson } 845598b368dSJeff Roberson } 8462454aaf5SJeff Roberson return (0); 8472454aaf5SJeff Roberson migrate: 8482454aaf5SJeff Roberson /* 84980f86c9fSJeff Roberson * Now that we've found an idle CPU, migrate the thread. 85080f86c9fSJeff Roberson */ 85180f86c9fSJeff Roberson cpu--; 85280f86c9fSJeff Roberson ke->ke_runq = NULL; 85380f86c9fSJeff Roberson kseq_notify(ke, cpu); 8542454aaf5SJeff Roberson 85580f86c9fSJeff Roberson return (1); 85680f86c9fSJeff Roberson } 85780f86c9fSJeff Roberson 85822bf7d9aSJeff Roberson #endif /* SMP */ 85922bf7d9aSJeff Roberson 86022bf7d9aSJeff Roberson /* 86122bf7d9aSJeff Roberson * Pick the highest priority task we have and return it. 8620c0a98b2SJeff Roberson */ 8630c0a98b2SJeff Roberson 86422bf7d9aSJeff Roberson static struct kse * 86522bf7d9aSJeff Roberson kseq_choose(struct kseq *kseq) 8665d7ef00cSJeff Roberson { 8675d7ef00cSJeff Roberson struct runq *swap; 8680516c8ddSJeff Roberson struct kse *ke; 8690516c8ddSJeff Roberson int nice; 8705d7ef00cSJeff Roberson 871b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 87215dc847eSJeff Roberson swap = NULL; 873a8949de2SJeff Roberson 87415dc847eSJeff Roberson for (;;) { 87515dc847eSJeff Roberson ke = runq_choose(kseq->ksq_curr); 87615dc847eSJeff Roberson if (ke == NULL) { 87715dc847eSJeff Roberson /* 878bf0acc27SJohn Baldwin * We already swapped once and didn't get anywhere. 87915dc847eSJeff Roberson */ 88015dc847eSJeff Roberson if (swap) 88115dc847eSJeff Roberson break; 8825d7ef00cSJeff Roberson swap = kseq->ksq_curr; 8835d7ef00cSJeff Roberson kseq->ksq_curr = kseq->ksq_next; 8845d7ef00cSJeff Roberson kseq->ksq_next = swap; 88515dc847eSJeff Roberson continue; 886a8949de2SJeff Roberson } 88715dc847eSJeff Roberson /* 88815dc847eSJeff Roberson * If we encounter a slice of 0 the kse is in a 88915dc847eSJeff Roberson * TIMESHARE kse group and its nice was too far out 89015dc847eSJeff Roberson * of the range that receives slices. 89115dc847eSJeff Roberson */ 8920516c8ddSJeff Roberson nice = ke->ke_proc->p_nice + (0 - kseq->ksq_nicemin); 893a8615740SDavid Xu #if 0 8948ffb8f55SJeff Roberson if (ke->ke_slice == 0 || (nice > SCHED_SLICE_NTHRESH && 8958ffb8f55SJeff Roberson ke->ke_proc->p_nice != 0)) { 89615dc847eSJeff Roberson runq_remove(ke->ke_runq, ke); 89715dc847eSJeff Roberson sched_slice(ke); 89815dc847eSJeff Roberson ke->ke_runq = kseq->ksq_next; 899c20c691bSJulian Elischer runq_add(ke->ke_runq, ke, 0); 90015dc847eSJeff Roberson continue; 90115dc847eSJeff Roberson } 902a8615740SDavid Xu #endif 90315dc847eSJeff Roberson return (ke); 90415dc847eSJeff Roberson } 90515dc847eSJeff Roberson 906a8949de2SJeff Roberson return (runq_choose(&kseq->ksq_idle)); 907245f3abfSJeff Roberson } 9080a016a05SJeff Roberson 9090a016a05SJeff Roberson static void 9100a016a05SJeff Roberson kseq_setup(struct kseq *kseq) 9110a016a05SJeff Roberson { 91215dc847eSJeff Roberson runq_init(&kseq->ksq_timeshare[0]); 91315dc847eSJeff Roberson runq_init(&kseq->ksq_timeshare[1]); 914a8949de2SJeff Roberson runq_init(&kseq->ksq_idle); 91515dc847eSJeff Roberson kseq->ksq_curr = &kseq->ksq_timeshare[0]; 91615dc847eSJeff Roberson kseq->ksq_next = &kseq->ksq_timeshare[1]; 9177cd650a9SJeff Roberson kseq->ksq_load = 0; 918ef1134c9SJeff Roberson kseq->ksq_load_timeshare = 0; 9190a016a05SJeff Roberson } 9200a016a05SJeff Roberson 92135e6168fSJeff Roberson static void 92235e6168fSJeff Roberson sched_setup(void *dummy) 92335e6168fSJeff Roberson { 9240ec896fdSJeff Roberson #ifdef SMP 92535e6168fSJeff Roberson int i; 9260ec896fdSJeff Roberson #endif 92735e6168fSJeff Roberson 928a1d4fe69SDavid Xu /* 929a1d4fe69SDavid Xu * To avoid divide-by-zero, we set realstathz a dummy value 930a1d4fe69SDavid Xu * in case which sched_clock() called before sched_initticks(). 931a1d4fe69SDavid Xu */ 932a1d4fe69SDavid Xu realstathz = hz; 933e493a5d9SJeff Roberson slice_min = (hz/100); /* 10ms */ 934e493a5d9SJeff Roberson slice_max = (hz/7); /* ~140ms */ 935e1f89c22SJeff Roberson 936356500a3SJeff Roberson #ifdef SMP 937cac77d04SJeff Roberson balance_groups = 0; 93880f86c9fSJeff Roberson /* 93980f86c9fSJeff Roberson * Initialize the kseqs. 94080f86c9fSJeff Roberson */ 941749d01b0SJeff Roberson for (i = 0; i < MAXCPU; i++) { 94280f86c9fSJeff Roberson struct kseq *ksq; 94380f86c9fSJeff Roberson 94480f86c9fSJeff Roberson ksq = &kseq_cpu[i]; 94580f86c9fSJeff Roberson ksq->ksq_assigned = NULL; 946749d01b0SJeff Roberson kseq_setup(&kseq_cpu[i]); 94780f86c9fSJeff Roberson } 94880f86c9fSJeff Roberson if (smp_topology == NULL) { 94980f86c9fSJeff Roberson struct kseq_group *ksg; 95080f86c9fSJeff Roberson struct kseq *ksq; 951598b368dSJeff Roberson int cpus; 95280f86c9fSJeff Roberson 953598b368dSJeff Roberson for (cpus = 0, i = 0; i < MAXCPU; i++) { 954598b368dSJeff Roberson if (CPU_ABSENT(i)) 955598b368dSJeff Roberson continue; 9569f8eb3cbSDavid Xu ksq = &kseq_cpu[i]; 957598b368dSJeff Roberson ksg = &kseq_groups[cpus]; 95880f86c9fSJeff Roberson /* 959dc03363dSJeff Roberson * Setup a kseq group with one member. 96080f86c9fSJeff Roberson */ 96180f86c9fSJeff Roberson ksq->ksq_transferable = 0; 96280f86c9fSJeff Roberson ksq->ksq_group = ksg; 96380f86c9fSJeff Roberson ksg->ksg_cpus = 1; 96480f86c9fSJeff Roberson ksg->ksg_idlemask = 0; 96580f86c9fSJeff Roberson ksg->ksg_cpumask = ksg->ksg_mask = 1 << i; 966cac77d04SJeff Roberson ksg->ksg_load = 0; 96780f86c9fSJeff Roberson ksg->ksg_transferable = 0; 96880f86c9fSJeff Roberson LIST_INIT(&ksg->ksg_members); 96980f86c9fSJeff Roberson LIST_INSERT_HEAD(&ksg->ksg_members, ksq, ksq_siblings); 970598b368dSJeff Roberson cpus++; 971749d01b0SJeff Roberson } 972598b368dSJeff Roberson ksg_maxid = cpus - 1; 973749d01b0SJeff Roberson } else { 97480f86c9fSJeff Roberson struct kseq_group *ksg; 97580f86c9fSJeff Roberson struct cpu_group *cg; 976749d01b0SJeff Roberson int j; 977749d01b0SJeff Roberson 978749d01b0SJeff Roberson for (i = 0; i < smp_topology->ct_count; i++) { 979749d01b0SJeff Roberson cg = &smp_topology->ct_group[i]; 98080f86c9fSJeff Roberson ksg = &kseq_groups[i]; 98180f86c9fSJeff Roberson /* 98280f86c9fSJeff Roberson * Initialize the group. 98380f86c9fSJeff Roberson */ 98480f86c9fSJeff Roberson ksg->ksg_idlemask = 0; 985cac77d04SJeff Roberson ksg->ksg_load = 0; 98680f86c9fSJeff Roberson ksg->ksg_transferable = 0; 98780f86c9fSJeff Roberson ksg->ksg_cpus = cg->cg_count; 98880f86c9fSJeff Roberson ksg->ksg_cpumask = cg->cg_mask; 98980f86c9fSJeff Roberson LIST_INIT(&ksg->ksg_members); 99080f86c9fSJeff Roberson /* 99180f86c9fSJeff Roberson * Find all of the group members and add them. 99280f86c9fSJeff Roberson */ 99380f86c9fSJeff Roberson for (j = 0; j < MAXCPU; j++) { 99480f86c9fSJeff Roberson if ((cg->cg_mask & (1 << j)) != 0) { 99580f86c9fSJeff Roberson if (ksg->ksg_mask == 0) 99680f86c9fSJeff Roberson ksg->ksg_mask = 1 << j; 99780f86c9fSJeff Roberson kseq_cpu[j].ksq_transferable = 0; 99880f86c9fSJeff Roberson kseq_cpu[j].ksq_group = ksg; 99980f86c9fSJeff Roberson LIST_INSERT_HEAD(&ksg->ksg_members, 100080f86c9fSJeff Roberson &kseq_cpu[j], ksq_siblings); 100180f86c9fSJeff Roberson } 100280f86c9fSJeff Roberson } 1003cac77d04SJeff Roberson if (ksg->ksg_cpus > 1) 1004cac77d04SJeff Roberson balance_groups = 1; 1005749d01b0SJeff Roberson } 1006cac77d04SJeff Roberson ksg_maxid = smp_topology->ct_count - 1; 1007749d01b0SJeff Roberson } 1008cac77d04SJeff Roberson /* 1009cac77d04SJeff Roberson * Stagger the group and global load balancer so they do not 1010cac77d04SJeff Roberson * interfere with each other. 1011cac77d04SJeff Roberson */ 1012dc03363dSJeff Roberson bal_tick = ticks + hz; 1013cac77d04SJeff Roberson if (balance_groups) 1014dc03363dSJeff Roberson gbal_tick = ticks + (hz / 2); 1015749d01b0SJeff Roberson #else 1016749d01b0SJeff Roberson kseq_setup(KSEQ_SELF()); 1017356500a3SJeff Roberson #endif 1018749d01b0SJeff Roberson mtx_lock_spin(&sched_lock); 1019155b9987SJeff Roberson kseq_load_add(KSEQ_SELF(), &kse0); 1020749d01b0SJeff Roberson mtx_unlock_spin(&sched_lock); 102135e6168fSJeff Roberson } 102235e6168fSJeff Roberson 1023a1d4fe69SDavid Xu /* ARGSUSED */ 1024a1d4fe69SDavid Xu static void 1025a1d4fe69SDavid Xu sched_initticks(void *dummy) 1026a1d4fe69SDavid Xu { 1027a1d4fe69SDavid Xu mtx_lock_spin(&sched_lock); 1028a1d4fe69SDavid Xu realstathz = stathz ? stathz : hz; 1029a1d4fe69SDavid Xu slice_min = (realstathz/100); /* 10ms */ 1030a1d4fe69SDavid Xu slice_max = (realstathz/7); /* ~140ms */ 1031a1d4fe69SDavid Xu 1032a1d4fe69SDavid Xu tickincr = (hz << 10) / realstathz; 1033a1d4fe69SDavid Xu /* 1034a1d4fe69SDavid Xu * XXX This does not work for values of stathz that are much 1035a1d4fe69SDavid Xu * larger than hz. 1036a1d4fe69SDavid Xu */ 1037a1d4fe69SDavid Xu if (tickincr == 0) 1038a1d4fe69SDavid Xu tickincr = 1; 1039a1d4fe69SDavid Xu mtx_unlock_spin(&sched_lock); 1040a1d4fe69SDavid Xu } 1041a1d4fe69SDavid Xu 1042a1d4fe69SDavid Xu 104335e6168fSJeff Roberson /* 104435e6168fSJeff Roberson * Scale the scheduling priority according to the "interactivity" of this 104535e6168fSJeff Roberson * process. 104635e6168fSJeff Roberson */ 104715dc847eSJeff Roberson static void 104835e6168fSJeff Roberson sched_priority(struct ksegrp *kg) 104935e6168fSJeff Roberson { 105035e6168fSJeff Roberson int pri; 105135e6168fSJeff Roberson 105235e6168fSJeff Roberson if (kg->kg_pri_class != PRI_TIMESHARE) 105315dc847eSJeff Roberson return; 105435e6168fSJeff Roberson 105515dc847eSJeff Roberson pri = SCHED_PRI_INTERACT(sched_interact_score(kg)); 1056e1f89c22SJeff Roberson pri += SCHED_PRI_BASE; 1057fa885116SJulian Elischer pri += kg->kg_proc->p_nice; 105835e6168fSJeff Roberson 105935e6168fSJeff Roberson if (pri > PRI_MAX_TIMESHARE) 106035e6168fSJeff Roberson pri = PRI_MAX_TIMESHARE; 106135e6168fSJeff Roberson else if (pri < PRI_MIN_TIMESHARE) 106235e6168fSJeff Roberson pri = PRI_MIN_TIMESHARE; 106335e6168fSJeff Roberson 10643db720fdSDavid Xu sched_user_prio(kg, pri); 106535e6168fSJeff Roberson 106615dc847eSJeff Roberson return; 106735e6168fSJeff Roberson } 106835e6168fSJeff Roberson 106935e6168fSJeff Roberson /* 1070245f3abfSJeff Roberson * Calculate a time slice based on the properties of the kseg and the runq 1071a8949de2SJeff Roberson * that we're on. This is only for PRI_TIMESHARE ksegrps. 107235e6168fSJeff Roberson */ 1073245f3abfSJeff Roberson static void 1074245f3abfSJeff Roberson sched_slice(struct kse *ke) 107535e6168fSJeff Roberson { 107615dc847eSJeff Roberson struct kseq *kseq; 1077245f3abfSJeff Roberson struct ksegrp *kg; 107835e6168fSJeff Roberson 1079245f3abfSJeff Roberson kg = ke->ke_ksegrp; 108015dc847eSJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 108135e6168fSJeff Roberson 1082f5c157d9SJohn Baldwin if (ke->ke_thread->td_flags & TDF_BORROWING) { 10838ffb8f55SJeff Roberson ke->ke_slice = SCHED_SLICE_MIN; 10848ffb8f55SJeff Roberson return; 10858ffb8f55SJeff Roberson } 10868ffb8f55SJeff Roberson 1087245f3abfSJeff Roberson /* 1088245f3abfSJeff Roberson * Rationale: 10892454aaf5SJeff Roberson * KSEs in interactive ksegs get a minimal slice so that we 1090245f3abfSJeff Roberson * quickly notice if it abuses its advantage. 1091245f3abfSJeff Roberson * 1092245f3abfSJeff Roberson * KSEs in non-interactive ksegs are assigned a slice that is 1093245f3abfSJeff Roberson * based on the ksegs nice value relative to the least nice kseg 1094245f3abfSJeff Roberson * on the run queue for this cpu. 1095245f3abfSJeff Roberson * 1096245f3abfSJeff Roberson * If the KSE is less nice than all others it gets the maximum 1097245f3abfSJeff Roberson * slice and other KSEs will adjust their slice relative to 1098245f3abfSJeff Roberson * this when they first expire. 1099245f3abfSJeff Roberson * 1100245f3abfSJeff Roberson * There is 20 point window that starts relative to the least 1101245f3abfSJeff Roberson * nice kse on the run queue. Slice size is determined by 1102245f3abfSJeff Roberson * the kse distance from the last nice ksegrp. 1103245f3abfSJeff Roberson * 11047d1a81b4SJeff Roberson * If the kse is outside of the window it will get no slice 11057d1a81b4SJeff Roberson * and will be reevaluated each time it is selected on the 11067d1a81b4SJeff Roberson * run queue. The exception to this is nice 0 ksegs when 11077d1a81b4SJeff Roberson * a nice -20 is running. They are always granted a minimum 11087d1a81b4SJeff Roberson * slice. 1109245f3abfSJeff Roberson */ 111015dc847eSJeff Roberson if (!SCHED_INTERACTIVE(kg)) { 1111245f3abfSJeff Roberson int nice; 1112245f3abfSJeff Roberson 1113fa885116SJulian Elischer nice = kg->kg_proc->p_nice + (0 - kseq->ksq_nicemin); 1114ef1134c9SJeff Roberson if (kseq->ksq_load_timeshare == 0 || 1115fa885116SJulian Elischer kg->kg_proc->p_nice < kseq->ksq_nicemin) 1116245f3abfSJeff Roberson ke->ke_slice = SCHED_SLICE_MAX; 11177d1a81b4SJeff Roberson else if (nice <= SCHED_SLICE_NTHRESH) 1118245f3abfSJeff Roberson ke->ke_slice = SCHED_SLICE_NICE(nice); 1119fa885116SJulian Elischer else if (kg->kg_proc->p_nice == 0) 11207d1a81b4SJeff Roberson ke->ke_slice = SCHED_SLICE_MIN; 1121245f3abfSJeff Roberson else 1122a8615740SDavid Xu ke->ke_slice = SCHED_SLICE_MIN; /* 0 */ 1123245f3abfSJeff Roberson } else 11249b5f6f62SJeff Roberson ke->ke_slice = SCHED_SLICE_INTERACTIVE; 112535e6168fSJeff Roberson 1126245f3abfSJeff Roberson return; 112735e6168fSJeff Roberson } 112835e6168fSJeff Roberson 1129d322132cSJeff Roberson /* 1130d322132cSJeff Roberson * This routine enforces a maximum limit on the amount of scheduling history 1131d322132cSJeff Roberson * kept. It is called after either the slptime or runtime is adjusted. 1132d322132cSJeff Roberson * This routine will not operate correctly when slp or run times have been 1133d322132cSJeff Roberson * adjusted to more than double their maximum. 1134d322132cSJeff Roberson */ 11354b60e324SJeff Roberson static void 11364b60e324SJeff Roberson sched_interact_update(struct ksegrp *kg) 11374b60e324SJeff Roberson { 1138d322132cSJeff Roberson int sum; 11393f741ca1SJeff Roberson 1140d322132cSJeff Roberson sum = kg->kg_runtime + kg->kg_slptime; 1141d322132cSJeff Roberson if (sum < SCHED_SLP_RUN_MAX) 1142d322132cSJeff Roberson return; 1143d322132cSJeff Roberson /* 1144d322132cSJeff Roberson * If we have exceeded by more than 1/5th then the algorithm below 1145d322132cSJeff Roberson * will not bring us back into range. Dividing by two here forces 11462454aaf5SJeff Roberson * us into the range of [4/5 * SCHED_INTERACT_MAX, SCHED_INTERACT_MAX] 1147d322132cSJeff Roberson */ 114837a35e4aSJeff Roberson if (sum > (SCHED_SLP_RUN_MAX / 5) * 6) { 1149d322132cSJeff Roberson kg->kg_runtime /= 2; 1150d322132cSJeff Roberson kg->kg_slptime /= 2; 1151d322132cSJeff Roberson return; 1152d322132cSJeff Roberson } 1153d322132cSJeff Roberson kg->kg_runtime = (kg->kg_runtime / 5) * 4; 1154d322132cSJeff Roberson kg->kg_slptime = (kg->kg_slptime / 5) * 4; 1155d322132cSJeff Roberson } 1156d322132cSJeff Roberson 1157d322132cSJeff Roberson static void 1158d322132cSJeff Roberson sched_interact_fork(struct ksegrp *kg) 1159d322132cSJeff Roberson { 1160d322132cSJeff Roberson int ratio; 1161d322132cSJeff Roberson int sum; 1162d322132cSJeff Roberson 1163d322132cSJeff Roberson sum = kg->kg_runtime + kg->kg_slptime; 1164d322132cSJeff Roberson if (sum > SCHED_SLP_RUN_FORK) { 1165d322132cSJeff Roberson ratio = sum / SCHED_SLP_RUN_FORK; 1166d322132cSJeff Roberson kg->kg_runtime /= ratio; 1167d322132cSJeff Roberson kg->kg_slptime /= ratio; 11684b60e324SJeff Roberson } 11694b60e324SJeff Roberson } 11704b60e324SJeff Roberson 1171e1f89c22SJeff Roberson static int 1172e1f89c22SJeff Roberson sched_interact_score(struct ksegrp *kg) 1173e1f89c22SJeff Roberson { 1174210491d3SJeff Roberson int div; 1175e1f89c22SJeff Roberson 1176e1f89c22SJeff Roberson if (kg->kg_runtime > kg->kg_slptime) { 1177210491d3SJeff Roberson div = max(1, kg->kg_runtime / SCHED_INTERACT_HALF); 1178210491d3SJeff Roberson return (SCHED_INTERACT_HALF + 1179210491d3SJeff Roberson (SCHED_INTERACT_HALF - (kg->kg_slptime / div))); 1180210491d3SJeff Roberson } if (kg->kg_slptime > kg->kg_runtime) { 1181210491d3SJeff Roberson div = max(1, kg->kg_slptime / SCHED_INTERACT_HALF); 1182210491d3SJeff Roberson return (kg->kg_runtime / div); 1183e1f89c22SJeff Roberson } 1184e1f89c22SJeff Roberson 1185210491d3SJeff Roberson /* 1186210491d3SJeff Roberson * This can happen if slptime and runtime are 0. 1187210491d3SJeff Roberson */ 1188210491d3SJeff Roberson return (0); 1189e1f89c22SJeff Roberson 1190e1f89c22SJeff Roberson } 1191e1f89c22SJeff Roberson 119215dc847eSJeff Roberson /* 1193ed062c8dSJulian Elischer * Very early in the boot some setup of scheduler-specific 1194ed062c8dSJulian Elischer * parts of proc0 and of soem scheduler resources needs to be done. 1195ed062c8dSJulian Elischer * Called from: 1196ed062c8dSJulian Elischer * proc0_init() 1197ed062c8dSJulian Elischer */ 1198ed062c8dSJulian Elischer void 1199ed062c8dSJulian Elischer schedinit(void) 1200ed062c8dSJulian Elischer { 1201ed062c8dSJulian Elischer /* 1202ed062c8dSJulian Elischer * Set up the scheduler specific parts of proc0. 1203ed062c8dSJulian Elischer */ 1204ed062c8dSJulian Elischer proc0.p_sched = NULL; /* XXX */ 1205d39063f2SJulian Elischer ksegrp0.kg_sched = &kg_sched0; 1206d39063f2SJulian Elischer thread0.td_sched = &kse0; 1207ed062c8dSJulian Elischer kse0.ke_thread = &thread0; 1208ed062c8dSJulian Elischer kse0.ke_state = KES_THREAD; 1209ed062c8dSJulian Elischer kg_sched0.skg_concurrency = 1; 1210ed062c8dSJulian Elischer kg_sched0.skg_avail_opennings = 0; /* we are already running */ 1211ed062c8dSJulian Elischer } 1212ed062c8dSJulian Elischer 1213ed062c8dSJulian Elischer /* 121415dc847eSJeff Roberson * This is only somewhat accurate since given many processes of the same 121515dc847eSJeff Roberson * priority they will switch when their slices run out, which will be 121615dc847eSJeff Roberson * at most SCHED_SLICE_MAX. 121715dc847eSJeff Roberson */ 121835e6168fSJeff Roberson int 121935e6168fSJeff Roberson sched_rr_interval(void) 122035e6168fSJeff Roberson { 122135e6168fSJeff Roberson return (SCHED_SLICE_MAX); 122235e6168fSJeff Roberson } 122335e6168fSJeff Roberson 122422bf7d9aSJeff Roberson static void 122535e6168fSJeff Roberson sched_pctcpu_update(struct kse *ke) 122635e6168fSJeff Roberson { 122735e6168fSJeff Roberson /* 122835e6168fSJeff Roberson * Adjust counters and watermark for pctcpu calc. 1229210491d3SJeff Roberson */ 123081de51bfSJeff Roberson if (ke->ke_ltick > ticks - SCHED_CPU_TICKS) { 1231210491d3SJeff Roberson /* 123281de51bfSJeff Roberson * Shift the tick count out so that the divide doesn't 123381de51bfSJeff Roberson * round away our results. 123465c8760dSJeff Roberson */ 123565c8760dSJeff Roberson ke->ke_ticks <<= 10; 123681de51bfSJeff Roberson ke->ke_ticks = (ke->ke_ticks / (ticks - ke->ke_ftick)) * 123735e6168fSJeff Roberson SCHED_CPU_TICKS; 123865c8760dSJeff Roberson ke->ke_ticks >>= 10; 123981de51bfSJeff Roberson } else 124081de51bfSJeff Roberson ke->ke_ticks = 0; 124135e6168fSJeff Roberson ke->ke_ltick = ticks; 124235e6168fSJeff Roberson ke->ke_ftick = ke->ke_ltick - SCHED_CPU_TICKS; 124335e6168fSJeff Roberson } 124435e6168fSJeff Roberson 124535e6168fSJeff Roberson void 1246f5c157d9SJohn Baldwin sched_thread_priority(struct thread *td, u_char prio) 124735e6168fSJeff Roberson { 12483f741ca1SJeff Roberson struct kse *ke; 124935e6168fSJeff Roberson 125081d47d3fSJeff Roberson CTR6(KTR_SCHED, "sched_prio: %p(%s) prio %d newprio %d by %p(%s)", 125181d47d3fSJeff Roberson td, td->td_proc->p_comm, td->td_priority, prio, curthread, 125281d47d3fSJeff Roberson curthread->td_proc->p_comm); 12533f741ca1SJeff Roberson ke = td->td_kse; 125435e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1255f5c157d9SJohn Baldwin if (td->td_priority == prio) 1256f5c157d9SJohn Baldwin return; 125735e6168fSJeff Roberson if (TD_ON_RUNQ(td)) { 12583f741ca1SJeff Roberson /* 12593f741ca1SJeff Roberson * If the priority has been elevated due to priority 12603f741ca1SJeff Roberson * propagation, we may have to move ourselves to a new 12613f741ca1SJeff Roberson * queue. We still call adjustrunqueue below in case kse 12623f741ca1SJeff Roberson * needs to fix things up. 12633f741ca1SJeff Roberson */ 12648ffb8f55SJeff Roberson if (prio < td->td_priority && ke->ke_runq != NULL && 1265769a3635SJeff Roberson (ke->ke_flags & KEF_ASSIGNED) == 0 && 126622bf7d9aSJeff Roberson ke->ke_runq != KSEQ_CPU(ke->ke_cpu)->ksq_curr) { 12673f741ca1SJeff Roberson runq_remove(ke->ke_runq, ke); 12683f741ca1SJeff Roberson ke->ke_runq = KSEQ_CPU(ke->ke_cpu)->ksq_curr; 1269c20c691bSJulian Elischer runq_add(ke->ke_runq, ke, 0); 127035e6168fSJeff Roberson } 1271f2b74cbfSJeff Roberson /* 1272f2b74cbfSJeff Roberson * Hold this kse on this cpu so that sched_prio() doesn't 1273f2b74cbfSJeff Roberson * cause excessive migration. We only want migration to 1274f2b74cbfSJeff Roberson * happen as the result of a wakeup. 1275f2b74cbfSJeff Roberson */ 1276f2b74cbfSJeff Roberson ke->ke_flags |= KEF_HOLD; 12773f741ca1SJeff Roberson adjustrunqueue(td, prio); 1278598b368dSJeff Roberson ke->ke_flags &= ~KEF_HOLD; 12793f741ca1SJeff Roberson } else 12803f741ca1SJeff Roberson td->td_priority = prio; 128135e6168fSJeff Roberson } 128235e6168fSJeff Roberson 1283f5c157d9SJohn Baldwin /* 1284f5c157d9SJohn Baldwin * Update a thread's priority when it is lent another thread's 1285f5c157d9SJohn Baldwin * priority. 1286f5c157d9SJohn Baldwin */ 1287f5c157d9SJohn Baldwin void 1288f5c157d9SJohn Baldwin sched_lend_prio(struct thread *td, u_char prio) 1289f5c157d9SJohn Baldwin { 1290f5c157d9SJohn Baldwin 1291f5c157d9SJohn Baldwin td->td_flags |= TDF_BORROWING; 1292f5c157d9SJohn Baldwin sched_thread_priority(td, prio); 1293f5c157d9SJohn Baldwin } 1294f5c157d9SJohn Baldwin 1295f5c157d9SJohn Baldwin /* 1296f5c157d9SJohn Baldwin * Restore a thread's priority when priority propagation is 1297f5c157d9SJohn Baldwin * over. The prio argument is the minimum priority the thread 1298f5c157d9SJohn Baldwin * needs to have to satisfy other possible priority lending 1299f5c157d9SJohn Baldwin * requests. If the thread's regular priority is less 1300f5c157d9SJohn Baldwin * important than prio, the thread will keep a priority boost 1301f5c157d9SJohn Baldwin * of prio. 1302f5c157d9SJohn Baldwin */ 1303f5c157d9SJohn Baldwin void 1304f5c157d9SJohn Baldwin sched_unlend_prio(struct thread *td, u_char prio) 1305f5c157d9SJohn Baldwin { 1306f5c157d9SJohn Baldwin u_char base_pri; 1307f5c157d9SJohn Baldwin 1308f5c157d9SJohn Baldwin if (td->td_base_pri >= PRI_MIN_TIMESHARE && 1309f5c157d9SJohn Baldwin td->td_base_pri <= PRI_MAX_TIMESHARE) 1310f5c157d9SJohn Baldwin base_pri = td->td_ksegrp->kg_user_pri; 1311f5c157d9SJohn Baldwin else 1312f5c157d9SJohn Baldwin base_pri = td->td_base_pri; 1313f5c157d9SJohn Baldwin if (prio >= base_pri) { 1314f5c157d9SJohn Baldwin td->td_flags &= ~TDF_BORROWING; 1315f5c157d9SJohn Baldwin sched_thread_priority(td, base_pri); 1316f5c157d9SJohn Baldwin } else 1317f5c157d9SJohn Baldwin sched_lend_prio(td, prio); 1318f5c157d9SJohn Baldwin } 1319f5c157d9SJohn Baldwin 1320f5c157d9SJohn Baldwin void 1321f5c157d9SJohn Baldwin sched_prio(struct thread *td, u_char prio) 1322f5c157d9SJohn Baldwin { 1323f5c157d9SJohn Baldwin u_char oldprio; 1324f5c157d9SJohn Baldwin 1325f5c157d9SJohn Baldwin /* First, update the base priority. */ 1326f5c157d9SJohn Baldwin td->td_base_pri = prio; 1327f5c157d9SJohn Baldwin 1328f5c157d9SJohn Baldwin /* 132950aaa791SJohn Baldwin * If the thread is borrowing another thread's priority, don't 1330f5c157d9SJohn Baldwin * ever lower the priority. 1331f5c157d9SJohn Baldwin */ 1332f5c157d9SJohn Baldwin if (td->td_flags & TDF_BORROWING && td->td_priority < prio) 1333f5c157d9SJohn Baldwin return; 1334f5c157d9SJohn Baldwin 1335f5c157d9SJohn Baldwin /* Change the real priority. */ 1336f5c157d9SJohn Baldwin oldprio = td->td_priority; 1337f5c157d9SJohn Baldwin sched_thread_priority(td, prio); 1338f5c157d9SJohn Baldwin 1339f5c157d9SJohn Baldwin /* 1340f5c157d9SJohn Baldwin * If the thread is on a turnstile, then let the turnstile update 1341f5c157d9SJohn Baldwin * its state. 1342f5c157d9SJohn Baldwin */ 1343f5c157d9SJohn Baldwin if (TD_ON_LOCK(td) && oldprio != prio) 1344f5c157d9SJohn Baldwin turnstile_adjust(td, oldprio); 1345f5c157d9SJohn Baldwin } 1346f5c157d9SJohn Baldwin 134735e6168fSJeff Roberson void 13483db720fdSDavid Xu sched_user_prio(struct ksegrp *kg, u_char prio) 13493db720fdSDavid Xu { 13503db720fdSDavid Xu struct thread *td; 13513db720fdSDavid Xu u_char oldprio; 13523db720fdSDavid Xu 13533db720fdSDavid Xu kg->kg_base_user_pri = prio; 13543db720fdSDavid Xu 13553db720fdSDavid Xu /* XXXKSE only for 1:1 */ 13563db720fdSDavid Xu 13573db720fdSDavid Xu td = TAILQ_FIRST(&kg->kg_threads); 13583db720fdSDavid Xu if (td == NULL) { 13593db720fdSDavid Xu kg->kg_user_pri = prio; 13603db720fdSDavid Xu return; 13613db720fdSDavid Xu } 13623db720fdSDavid Xu 13633db720fdSDavid Xu if (td->td_flags & TDF_UBORROWING && kg->kg_user_pri <= prio) 13643db720fdSDavid Xu return; 13653db720fdSDavid Xu 13663db720fdSDavid Xu oldprio = kg->kg_user_pri; 13673db720fdSDavid Xu kg->kg_user_pri = prio; 13683db720fdSDavid Xu 13693db720fdSDavid Xu if (TD_ON_UPILOCK(td) && oldprio != prio) 13703db720fdSDavid Xu umtx_pi_adjust(td, oldprio); 13713db720fdSDavid Xu } 13723db720fdSDavid Xu 13733db720fdSDavid Xu void 13743db720fdSDavid Xu sched_lend_user_prio(struct thread *td, u_char prio) 13753db720fdSDavid Xu { 13763db720fdSDavid Xu u_char oldprio; 13773db720fdSDavid Xu 13783db720fdSDavid Xu td->td_flags |= TDF_UBORROWING; 13793db720fdSDavid Xu 13803db720fdSDavid Xu oldprio = td->td_ksegrp->kg_user_pri; 13813db720fdSDavid Xu td->td_ksegrp->kg_user_pri = prio; 13823db720fdSDavid Xu 13833db720fdSDavid Xu if (TD_ON_UPILOCK(td) && oldprio != prio) 13843db720fdSDavid Xu umtx_pi_adjust(td, oldprio); 13853db720fdSDavid Xu } 13863db720fdSDavid Xu 13873db720fdSDavid Xu void 13883db720fdSDavid Xu sched_unlend_user_prio(struct thread *td, u_char prio) 13893db720fdSDavid Xu { 13903db720fdSDavid Xu struct ksegrp *kg = td->td_ksegrp; 13913db720fdSDavid Xu u_char base_pri; 13923db720fdSDavid Xu 13933db720fdSDavid Xu base_pri = kg->kg_base_user_pri; 13943db720fdSDavid Xu if (prio >= base_pri) { 13953db720fdSDavid Xu td->td_flags &= ~TDF_UBORROWING; 13963db720fdSDavid Xu sched_user_prio(kg, base_pri); 13973db720fdSDavid Xu } else 13983db720fdSDavid Xu sched_lend_user_prio(td, prio); 13993db720fdSDavid Xu } 14003db720fdSDavid Xu 14013db720fdSDavid Xu void 14023389af30SJulian Elischer sched_switch(struct thread *td, struct thread *newtd, int flags) 140335e6168fSJeff Roberson { 1404598b368dSJeff Roberson struct kseq *ksq; 140535e6168fSJeff Roberson struct kse *ke; 140635e6168fSJeff Roberson 140735e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 140835e6168fSJeff Roberson 140935e6168fSJeff Roberson ke = td->td_kse; 1410598b368dSJeff Roberson ksq = KSEQ_SELF(); 141135e6168fSJeff Roberson 1412060563ecSJulian Elischer td->td_lastcpu = td->td_oncpu; 1413060563ecSJulian Elischer td->td_oncpu = NOCPU; 141452eb8464SJohn Baldwin td->td_flags &= ~TDF_NEEDRESCHED; 141577918643SStephan Uphoff td->td_owepreempt = 0; 141635e6168fSJeff Roberson 1417b11fdad0SJeff Roberson /* 1418b11fdad0SJeff Roberson * If the KSE has been assigned it may be in the process of switching 1419b11fdad0SJeff Roberson * to the new cpu. This is the case in sched_bind(). 1420b11fdad0SJeff Roberson */ 14212454aaf5SJeff Roberson if (td == PCPU_GET(idlethread)) { 1422bf0acc27SJohn Baldwin TD_SET_CAN_RUN(td); 1423598b368dSJeff Roberson } else if ((ke->ke_flags & KEF_ASSIGNED) == 0) { 1424ed062c8dSJulian Elischer /* We are ending our run so make our slot available again */ 1425d39063f2SJulian Elischer SLOT_RELEASE(td->td_ksegrp); 1426598b368dSJeff Roberson kseq_load_rem(ksq, ke); 1427ed062c8dSJulian Elischer if (TD_IS_RUNNING(td)) { 1428f2b74cbfSJeff Roberson /* 1429ed062c8dSJulian Elischer * Don't allow the thread to migrate 1430ed062c8dSJulian Elischer * from a preemption. 1431f2b74cbfSJeff Roberson */ 1432f2b74cbfSJeff Roberson ke->ke_flags |= KEF_HOLD; 1433598b368dSJeff Roberson setrunqueue(td, (flags & SW_PREEMPT) ? 1434598b368dSJeff Roberson SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED : 1435598b368dSJeff Roberson SRQ_OURSELF|SRQ_YIELDING); 1436598b368dSJeff Roberson ke->ke_flags &= ~KEF_HOLD; 1437598b368dSJeff Roberson } else if ((td->td_proc->p_flag & P_HADTHREADS) && 1438598b368dSJeff Roberson (newtd == NULL || newtd->td_ksegrp != td->td_ksegrp)) 143935e6168fSJeff Roberson /* 1440ed062c8dSJulian Elischer * We will not be on the run queue. 1441ed062c8dSJulian Elischer * So we must be sleeping or similar. 1442c20c691bSJulian Elischer * Don't use the slot if we will need it 1443c20c691bSJulian Elischer * for newtd. 144435e6168fSJeff Roberson */ 1445ed062c8dSJulian Elischer slot_fill(td->td_ksegrp); 1446ed062c8dSJulian Elischer } 1447d39063f2SJulian Elischer if (newtd != NULL) { 1448c20c691bSJulian Elischer /* 14496680bbd5SJeff Roberson * If we bring in a thread account for it as if it had been 14506680bbd5SJeff Roberson * added to the run queue and then chosen. 1451c20c691bSJulian Elischer */ 1452c5c3fb33SJulian Elischer newtd->td_kse->ke_flags |= KEF_DIDRUN; 1453598b368dSJeff Roberson newtd->td_kse->ke_runq = ksq->ksq_curr; 1454c20c691bSJulian Elischer TD_SET_RUNNING(newtd); 1455bf0acc27SJohn Baldwin kseq_load_add(KSEQ_SELF(), newtd->td_kse); 14566680bbd5SJeff Roberson /* 14576680bbd5SJeff Roberson * XXX When we preempt, we've already consumed a slot because 14586680bbd5SJeff Roberson * we got here through sched_add(). However, newtd can come 14596680bbd5SJeff Roberson * from thread_switchout() which can't SLOT_USE() because 14606680bbd5SJeff Roberson * the SLOT code is scheduler dependent. We must use the 14616680bbd5SJeff Roberson * slot here otherwise. 14626680bbd5SJeff Roberson */ 14636680bbd5SJeff Roberson if ((flags & SW_PREEMPT) == 0) 14646680bbd5SJeff Roberson SLOT_USE(newtd->td_ksegrp); 1465d39063f2SJulian Elischer } else 14662454aaf5SJeff Roberson newtd = choosethread(); 1467ebccf1e3SJoseph Koshy if (td != newtd) { 1468ebccf1e3SJoseph Koshy #ifdef HWPMC_HOOKS 1469ebccf1e3SJoseph Koshy if (PMC_PROC_IS_USING_PMCS(td->td_proc)) 1470ebccf1e3SJoseph Koshy PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT); 1471ebccf1e3SJoseph Koshy #endif 1472ae53b483SJeff Roberson cpu_switch(td, newtd); 1473ebccf1e3SJoseph Koshy #ifdef HWPMC_HOOKS 1474ebccf1e3SJoseph Koshy if (PMC_PROC_IS_USING_PMCS(td->td_proc)) 1475ebccf1e3SJoseph Koshy PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN); 1476ebccf1e3SJoseph Koshy #endif 1477ebccf1e3SJoseph Koshy } 1478ebccf1e3SJoseph Koshy 1479ae53b483SJeff Roberson sched_lock.mtx_lock = (uintptr_t)td; 148035e6168fSJeff Roberson 1481060563ecSJulian Elischer td->td_oncpu = PCPU_GET(cpuid); 148235e6168fSJeff Roberson } 148335e6168fSJeff Roberson 148435e6168fSJeff Roberson void 1485fa885116SJulian Elischer sched_nice(struct proc *p, int nice) 148635e6168fSJeff Roberson { 1487fa885116SJulian Elischer struct ksegrp *kg; 148815dc847eSJeff Roberson struct kse *ke; 148935e6168fSJeff Roberson struct thread *td; 149015dc847eSJeff Roberson struct kseq *kseq; 149135e6168fSJeff Roberson 1492fa885116SJulian Elischer PROC_LOCK_ASSERT(p, MA_OWNED); 14930b5318c8SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 149415dc847eSJeff Roberson /* 149515dc847eSJeff Roberson * We need to adjust the nice counts for running KSEs. 149615dc847eSJeff Roberson */ 1497fa885116SJulian Elischer FOREACH_KSEGRP_IN_PROC(p, kg) { 1498fa885116SJulian Elischer if (kg->kg_pri_class == PRI_TIMESHARE) { 1499ed062c8dSJulian Elischer FOREACH_THREAD_IN_GROUP(kg, td) { 1500ed062c8dSJulian Elischer ke = td->td_kse; 1501d07ac847SJeff Roberson if (ke->ke_runq == NULL) 150215dc847eSJeff Roberson continue; 150315dc847eSJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 1504fa885116SJulian Elischer kseq_nice_rem(kseq, p->p_nice); 150515dc847eSJeff Roberson kseq_nice_add(kseq, nice); 150615dc847eSJeff Roberson } 1507fa885116SJulian Elischer } 1508fa885116SJulian Elischer } 1509fa885116SJulian Elischer p->p_nice = nice; 1510fa885116SJulian Elischer FOREACH_KSEGRP_IN_PROC(p, kg) { 151135e6168fSJeff Roberson sched_priority(kg); 151215dc847eSJeff Roberson FOREACH_THREAD_IN_GROUP(kg, td) 15134a338afdSJulian Elischer td->td_flags |= TDF_NEEDRESCHED; 151435e6168fSJeff Roberson } 1515fa885116SJulian Elischer } 151635e6168fSJeff Roberson 151735e6168fSJeff Roberson void 151844f3b092SJohn Baldwin sched_sleep(struct thread *td) 151935e6168fSJeff Roberson { 152035e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 152135e6168fSJeff Roberson 152235e6168fSJeff Roberson td->td_slptime = ticks; 152335e6168fSJeff Roberson } 152435e6168fSJeff Roberson 152535e6168fSJeff Roberson void 152635e6168fSJeff Roberson sched_wakeup(struct thread *td) 152735e6168fSJeff Roberson { 152835e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 152935e6168fSJeff Roberson 153035e6168fSJeff Roberson /* 153135e6168fSJeff Roberson * Let the kseg know how long we slept for. This is because process 153235e6168fSJeff Roberson * interactivity behavior is modeled in the kseg. 153335e6168fSJeff Roberson */ 153435e6168fSJeff Roberson if (td->td_slptime) { 1535f1e8dc4aSJeff Roberson struct ksegrp *kg; 153615dc847eSJeff Roberson int hzticks; 1537f1e8dc4aSJeff Roberson 1538f1e8dc4aSJeff Roberson kg = td->td_ksegrp; 1539d322132cSJeff Roberson hzticks = (ticks - td->td_slptime) << 10; 1540d322132cSJeff Roberson if (hzticks >= SCHED_SLP_RUN_MAX) { 1541d322132cSJeff Roberson kg->kg_slptime = SCHED_SLP_RUN_MAX; 1542d322132cSJeff Roberson kg->kg_runtime = 1; 1543d322132cSJeff Roberson } else { 1544d322132cSJeff Roberson kg->kg_slptime += hzticks; 15454b60e324SJeff Roberson sched_interact_update(kg); 1546d322132cSJeff Roberson } 1547f1e8dc4aSJeff Roberson sched_priority(kg); 15484b60e324SJeff Roberson sched_slice(td->td_kse); 154935e6168fSJeff Roberson td->td_slptime = 0; 1550f1e8dc4aSJeff Roberson } 15512630e4c9SJulian Elischer setrunqueue(td, SRQ_BORING); 155235e6168fSJeff Roberson } 155335e6168fSJeff Roberson 155435e6168fSJeff Roberson /* 155535e6168fSJeff Roberson * Penalize the parent for creating a new child and initialize the child's 155635e6168fSJeff Roberson * priority. 155735e6168fSJeff Roberson */ 155835e6168fSJeff Roberson void 1559ed062c8dSJulian Elischer sched_fork(struct thread *td, struct thread *childtd) 156035e6168fSJeff Roberson { 156135e6168fSJeff Roberson 156235e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 156335e6168fSJeff Roberson 1564ed062c8dSJulian Elischer sched_fork_ksegrp(td, childtd->td_ksegrp); 1565ed062c8dSJulian Elischer sched_fork_thread(td, childtd); 156615dc847eSJeff Roberson } 156715dc847eSJeff Roberson 156815dc847eSJeff Roberson void 156955d44f79SJulian Elischer sched_fork_ksegrp(struct thread *td, struct ksegrp *child) 157015dc847eSJeff Roberson { 157155d44f79SJulian Elischer struct ksegrp *kg = td->td_ksegrp; 1572ed062c8dSJulian Elischer mtx_assert(&sched_lock, MA_OWNED); 1573210491d3SJeff Roberson 1574d322132cSJeff Roberson child->kg_slptime = kg->kg_slptime; 1575d322132cSJeff Roberson child->kg_runtime = kg->kg_runtime; 1576d322132cSJeff Roberson child->kg_user_pri = kg->kg_user_pri; 15773db720fdSDavid Xu child->kg_base_user_pri = kg->kg_base_user_pri; 1578d322132cSJeff Roberson sched_interact_fork(child); 1579a1d4fe69SDavid Xu kg->kg_runtime += tickincr; 15804b60e324SJeff Roberson sched_interact_update(kg); 1581c9f25d8fSJeff Roberson } 1582c9f25d8fSJeff Roberson 158315dc847eSJeff Roberson void 158415dc847eSJeff Roberson sched_fork_thread(struct thread *td, struct thread *child) 158515dc847eSJeff Roberson { 1586ed062c8dSJulian Elischer struct kse *ke; 1587ed062c8dSJulian Elischer struct kse *ke2; 1588ed062c8dSJulian Elischer 1589ed062c8dSJulian Elischer sched_newthread(child); 1590ed062c8dSJulian Elischer ke = td->td_kse; 1591ed062c8dSJulian Elischer ke2 = child->td_kse; 1592ed062c8dSJulian Elischer ke2->ke_slice = 1; /* Attempt to quickly learn interactivity. */ 1593ed062c8dSJulian Elischer ke2->ke_cpu = ke->ke_cpu; 1594ed062c8dSJulian Elischer ke2->ke_runq = NULL; 1595ed062c8dSJulian Elischer 1596ed062c8dSJulian Elischer /* Grab our parents cpu estimation information. */ 1597ed062c8dSJulian Elischer ke2->ke_ticks = ke->ke_ticks; 1598ed062c8dSJulian Elischer ke2->ke_ltick = ke->ke_ltick; 1599ed062c8dSJulian Elischer ke2->ke_ftick = ke->ke_ftick; 160015dc847eSJeff Roberson } 160115dc847eSJeff Roberson 160215dc847eSJeff Roberson void 160315dc847eSJeff Roberson sched_class(struct ksegrp *kg, int class) 160415dc847eSJeff Roberson { 160515dc847eSJeff Roberson struct kseq *kseq; 160615dc847eSJeff Roberson struct kse *ke; 1607ed062c8dSJulian Elischer struct thread *td; 1608ef1134c9SJeff Roberson int nclass; 1609ef1134c9SJeff Roberson int oclass; 161015dc847eSJeff Roberson 16112056d0a1SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 161215dc847eSJeff Roberson if (kg->kg_pri_class == class) 161315dc847eSJeff Roberson return; 161415dc847eSJeff Roberson 1615ef1134c9SJeff Roberson nclass = PRI_BASE(class); 1616ef1134c9SJeff Roberson oclass = PRI_BASE(kg->kg_pri_class); 1617ed062c8dSJulian Elischer FOREACH_THREAD_IN_GROUP(kg, td) { 1618ed062c8dSJulian Elischer ke = td->td_kse; 161942a29039SJeff Roberson if ((ke->ke_state != KES_ONRUNQ && 162042a29039SJeff Roberson ke->ke_state != KES_THREAD) || ke->ke_runq == NULL) 162115dc847eSJeff Roberson continue; 162215dc847eSJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 162315dc847eSJeff Roberson 1624ef1134c9SJeff Roberson #ifdef SMP 1625155b9987SJeff Roberson /* 1626155b9987SJeff Roberson * On SMP if we're on the RUNQ we must adjust the transferable 1627155b9987SJeff Roberson * count because could be changing to or from an interrupt 1628155b9987SJeff Roberson * class. 1629155b9987SJeff Roberson */ 1630155b9987SJeff Roberson if (ke->ke_state == KES_ONRUNQ) { 1631598b368dSJeff Roberson if (KSE_CAN_MIGRATE(ke)) { 163280f86c9fSJeff Roberson kseq->ksq_transferable--; 163380f86c9fSJeff Roberson kseq->ksq_group->ksg_transferable--; 163480f86c9fSJeff Roberson } 1635598b368dSJeff Roberson if (KSE_CAN_MIGRATE(ke)) { 163680f86c9fSJeff Roberson kseq->ksq_transferable++; 163780f86c9fSJeff Roberson kseq->ksq_group->ksg_transferable++; 163880f86c9fSJeff Roberson } 1639155b9987SJeff Roberson } 1640ef1134c9SJeff Roberson #endif 1641155b9987SJeff Roberson if (oclass == PRI_TIMESHARE) { 1642ef1134c9SJeff Roberson kseq->ksq_load_timeshare--; 1643fa885116SJulian Elischer kseq_nice_rem(kseq, kg->kg_proc->p_nice); 1644155b9987SJeff Roberson } 1645155b9987SJeff Roberson if (nclass == PRI_TIMESHARE) { 1646155b9987SJeff Roberson kseq->ksq_load_timeshare++; 1647fa885116SJulian Elischer kseq_nice_add(kseq, kg->kg_proc->p_nice); 164815dc847eSJeff Roberson } 1649155b9987SJeff Roberson } 165015dc847eSJeff Roberson 165115dc847eSJeff Roberson kg->kg_pri_class = class; 165235e6168fSJeff Roberson } 165335e6168fSJeff Roberson 165435e6168fSJeff Roberson /* 165535e6168fSJeff Roberson * Return some of the child's priority and interactivity to the parent. 165635e6168fSJeff Roberson */ 165735e6168fSJeff Roberson void 1658ed062c8dSJulian Elischer sched_exit(struct proc *p, struct thread *childtd) 165935e6168fSJeff Roberson { 166035e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1661ed062c8dSJulian Elischer sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), childtd); 166281d47d3fSJeff Roberson sched_exit_thread(NULL, childtd); 1663141ad61cSJeff Roberson } 1664141ad61cSJeff Roberson 1665141ad61cSJeff Roberson void 166655d44f79SJulian Elischer sched_exit_ksegrp(struct ksegrp *kg, struct thread *td) 1667141ad61cSJeff Roberson { 166855d44f79SJulian Elischer /* kg->kg_slptime += td->td_ksegrp->kg_slptime; */ 166955d44f79SJulian Elischer kg->kg_runtime += td->td_ksegrp->kg_runtime; 16704b60e324SJeff Roberson sched_interact_update(kg); 1671141ad61cSJeff Roberson } 1672141ad61cSJeff Roberson 1673141ad61cSJeff Roberson void 1674ed062c8dSJulian Elischer sched_exit_thread(struct thread *td, struct thread *childtd) 1675141ad61cSJeff Roberson { 167681d47d3fSJeff Roberson CTR3(KTR_SCHED, "sched_exit_thread: %p(%s) prio %d", 167781d47d3fSJeff Roberson childtd, childtd->td_proc->p_comm, childtd->td_priority); 1678ed062c8dSJulian Elischer kseq_load_rem(KSEQ_CPU(childtd->td_kse->ke_cpu), childtd->td_kse); 167935e6168fSJeff Roberson } 168035e6168fSJeff Roberson 168135e6168fSJeff Roberson void 16827cf90fb3SJeff Roberson sched_clock(struct thread *td) 168335e6168fSJeff Roberson { 168435e6168fSJeff Roberson struct kseq *kseq; 16850a016a05SJeff Roberson struct ksegrp *kg; 16867cf90fb3SJeff Roberson struct kse *ke; 168735e6168fSJeff Roberson 1688dc03363dSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 16892454aaf5SJeff Roberson kseq = KSEQ_SELF(); 1690dc03363dSJeff Roberson #ifdef SMP 1691598b368dSJeff Roberson if (ticks >= bal_tick) 1692dc03363dSJeff Roberson sched_balance(); 1693598b368dSJeff Roberson if (ticks >= gbal_tick && balance_groups) 1694dc03363dSJeff Roberson sched_balance_groups(); 16952454aaf5SJeff Roberson /* 16962454aaf5SJeff Roberson * We could have been assigned a non real-time thread without an 16972454aaf5SJeff Roberson * IPI. 16982454aaf5SJeff Roberson */ 16992454aaf5SJeff Roberson if (kseq->ksq_assigned) 17002454aaf5SJeff Roberson kseq_assign(kseq); /* Potentially sets NEEDRESCHED */ 1701dc03363dSJeff Roberson #endif 17027cf90fb3SJeff Roberson ke = td->td_kse; 170315dc847eSJeff Roberson kg = ke->ke_ksegrp; 170435e6168fSJeff Roberson 17050a016a05SJeff Roberson /* Adjust ticks for pctcpu */ 170665c8760dSJeff Roberson ke->ke_ticks++; 1707d465fb95SJeff Roberson ke->ke_ltick = ticks; 1708a8949de2SJeff Roberson 1709d465fb95SJeff Roberson /* Go up to one second beyond our max and then trim back down */ 1710d465fb95SJeff Roberson if (ke->ke_ftick + SCHED_CPU_TICKS + hz < ke->ke_ltick) 1711d465fb95SJeff Roberson sched_pctcpu_update(ke); 1712d465fb95SJeff Roberson 171343fdafb1SJulian Elischer if (td->td_flags & TDF_IDLETD) 171435e6168fSJeff Roberson return; 17153f741ca1SJeff Roberson /* 1716a8949de2SJeff Roberson * We only do slicing code for TIMESHARE ksegrps. 1717a8949de2SJeff Roberson */ 1718a8949de2SJeff Roberson if (kg->kg_pri_class != PRI_TIMESHARE) 1719a8949de2SJeff Roberson return; 1720a8949de2SJeff Roberson /* 172115dc847eSJeff Roberson * We used a tick charge it to the ksegrp so that we can compute our 172215dc847eSJeff Roberson * interactivity. 172315dc847eSJeff Roberson */ 1724a1d4fe69SDavid Xu kg->kg_runtime += tickincr; 17254b60e324SJeff Roberson sched_interact_update(kg); 1726407b0157SJeff Roberson 172735e6168fSJeff Roberson /* 172835e6168fSJeff Roberson * We used up one time slice. 172935e6168fSJeff Roberson */ 1730093c05e3SJeff Roberson if (--ke->ke_slice > 0) 173115dc847eSJeff Roberson return; 173235e6168fSJeff Roberson /* 173315dc847eSJeff Roberson * We're out of time, recompute priorities and requeue. 173435e6168fSJeff Roberson */ 1735155b9987SJeff Roberson kseq_load_rem(kseq, ke); 1736e1f89c22SJeff Roberson sched_priority(kg); 173715dc847eSJeff Roberson sched_slice(ke); 173815dc847eSJeff Roberson if (SCHED_CURR(kg, ke)) 173915dc847eSJeff Roberson ke->ke_runq = kseq->ksq_curr; 174015dc847eSJeff Roberson else 174115dc847eSJeff Roberson ke->ke_runq = kseq->ksq_next; 1742155b9987SJeff Roberson kseq_load_add(kseq, ke); 17434a338afdSJulian Elischer td->td_flags |= TDF_NEEDRESCHED; 174435e6168fSJeff Roberson } 174535e6168fSJeff Roberson 174635e6168fSJeff Roberson int 174735e6168fSJeff Roberson sched_runnable(void) 174835e6168fSJeff Roberson { 174935e6168fSJeff Roberson struct kseq *kseq; 1750b90816f1SJeff Roberson int load; 175135e6168fSJeff Roberson 1752b90816f1SJeff Roberson load = 1; 1753b90816f1SJeff Roberson 17540a016a05SJeff Roberson kseq = KSEQ_SELF(); 175522bf7d9aSJeff Roberson #ifdef SMP 175646f8b265SJeff Roberson if (kseq->ksq_assigned) { 175746f8b265SJeff Roberson mtx_lock_spin(&sched_lock); 175822bf7d9aSJeff Roberson kseq_assign(kseq); 175946f8b265SJeff Roberson mtx_unlock_spin(&sched_lock); 176046f8b265SJeff Roberson } 176122bf7d9aSJeff Roberson #endif 17623f741ca1SJeff Roberson if ((curthread->td_flags & TDF_IDLETD) != 0) { 17633f741ca1SJeff Roberson if (kseq->ksq_load > 0) 17643f741ca1SJeff Roberson goto out; 17653f741ca1SJeff Roberson } else 17663f741ca1SJeff Roberson if (kseq->ksq_load - 1 > 0) 1767b90816f1SJeff Roberson goto out; 1768b90816f1SJeff Roberson load = 0; 1769b90816f1SJeff Roberson out: 1770b90816f1SJeff Roberson return (load); 177135e6168fSJeff Roberson } 177235e6168fSJeff Roberson 177335e6168fSJeff Roberson void 177435e6168fSJeff Roberson sched_userret(struct thread *td) 177535e6168fSJeff Roberson { 177635e6168fSJeff Roberson struct ksegrp *kg; 177735e6168fSJeff Roberson 1778f5c157d9SJohn Baldwin KASSERT((td->td_flags & TDF_BORROWING) == 0, 1779f5c157d9SJohn Baldwin ("thread with borrowed priority returning to userland")); 178035e6168fSJeff Roberson kg = td->td_ksegrp; 1781f5c157d9SJohn Baldwin if (td->td_priority != kg->kg_user_pri) { 178235e6168fSJeff Roberson mtx_lock_spin(&sched_lock); 178335e6168fSJeff Roberson td->td_priority = kg->kg_user_pri; 1784f5c157d9SJohn Baldwin td->td_base_pri = kg->kg_user_pri; 178535e6168fSJeff Roberson mtx_unlock_spin(&sched_lock); 178635e6168fSJeff Roberson } 178735e6168fSJeff Roberson } 178835e6168fSJeff Roberson 1789c9f25d8fSJeff Roberson struct kse * 1790c9f25d8fSJeff Roberson sched_choose(void) 1791c9f25d8fSJeff Roberson { 17920a016a05SJeff Roberson struct kseq *kseq; 1793c9f25d8fSJeff Roberson struct kse *ke; 179415dc847eSJeff Roberson 1795b90816f1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 179622bf7d9aSJeff Roberson kseq = KSEQ_SELF(); 179715dc847eSJeff Roberson #ifdef SMP 179880f86c9fSJeff Roberson restart: 179922bf7d9aSJeff Roberson if (kseq->ksq_assigned) 180022bf7d9aSJeff Roberson kseq_assign(kseq); 180115dc847eSJeff Roberson #endif 180222bf7d9aSJeff Roberson ke = kseq_choose(kseq); 180335e6168fSJeff Roberson if (ke) { 180422bf7d9aSJeff Roberson #ifdef SMP 180522bf7d9aSJeff Roberson if (ke->ke_ksegrp->kg_pri_class == PRI_IDLE) 180680f86c9fSJeff Roberson if (kseq_idled(kseq) == 0) 180780f86c9fSJeff Roberson goto restart; 180822bf7d9aSJeff Roberson #endif 1809155b9987SJeff Roberson kseq_runq_rem(kseq, ke); 181035e6168fSJeff Roberson ke->ke_state = KES_THREAD; 18111278181cSDavid Xu ke->ke_flags &= ~KEF_PREEMPTED; 181215dc847eSJeff Roberson return (ke); 181335e6168fSJeff Roberson } 1814c9f25d8fSJeff Roberson #ifdef SMP 181580f86c9fSJeff Roberson if (kseq_idled(kseq) == 0) 181680f86c9fSJeff Roberson goto restart; 1817c9f25d8fSJeff Roberson #endif 181815dc847eSJeff Roberson return (NULL); 181935e6168fSJeff Roberson } 182035e6168fSJeff Roberson 182135e6168fSJeff Roberson void 18222630e4c9SJulian Elischer sched_add(struct thread *td, int flags) 182335e6168fSJeff Roberson { 1824c9f25d8fSJeff Roberson struct kseq *kseq; 182515dc847eSJeff Roberson struct ksegrp *kg; 18267cf90fb3SJeff Roberson struct kse *ke; 1827598b368dSJeff Roberson int preemptive; 18282454aaf5SJeff Roberson int canmigrate; 182922bf7d9aSJeff Roberson int class; 1830c9f25d8fSJeff Roberson 183181d47d3fSJeff Roberson CTR5(KTR_SCHED, "sched_add: %p(%s) prio %d by %p(%s)", 183281d47d3fSJeff Roberson td, td->td_proc->p_comm, td->td_priority, curthread, 183381d47d3fSJeff Roberson curthread->td_proc->p_comm); 183422bf7d9aSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 18357cf90fb3SJeff Roberson ke = td->td_kse; 18367cf90fb3SJeff Roberson kg = td->td_ksegrp; 1837598b368dSJeff Roberson canmigrate = 1; 1838598b368dSJeff Roberson preemptive = !(flags & SRQ_YIELDING); 1839598b368dSJeff Roberson class = PRI_BASE(kg->kg_pri_class); 1840598b368dSJeff Roberson kseq = KSEQ_SELF(); 1841598b368dSJeff Roberson if ((ke->ke_flags & KEF_INTERNAL) == 0) 1842598b368dSJeff Roberson SLOT_USE(td->td_ksegrp); 1843598b368dSJeff Roberson ke->ke_flags &= ~KEF_INTERNAL; 1844598b368dSJeff Roberson #ifdef SMP 18452d59a44dSJeff Roberson if (ke->ke_flags & KEF_ASSIGNED) { 1846598b368dSJeff Roberson if (ke->ke_flags & KEF_REMOVED) 18472d59a44dSJeff Roberson ke->ke_flags &= ~KEF_REMOVED; 184822bf7d9aSJeff Roberson return; 18492d59a44dSJeff Roberson } 1850598b368dSJeff Roberson canmigrate = KSE_CAN_MIGRATE(ke); 1851f8ec133eSDavid Xu /* 1852f8ec133eSDavid Xu * Don't migrate running threads here. Force the long term balancer 1853f8ec133eSDavid Xu * to do it. 1854f8ec133eSDavid Xu */ 1855f8ec133eSDavid Xu if (ke->ke_flags & KEF_HOLD) { 1856f8ec133eSDavid Xu ke->ke_flags &= ~KEF_HOLD; 1857f8ec133eSDavid Xu canmigrate = 0; 1858f8ec133eSDavid Xu } 1859598b368dSJeff Roberson #endif 18605d7ef00cSJeff Roberson KASSERT(ke->ke_state != KES_ONRUNQ, 18615d7ef00cSJeff Roberson ("sched_add: kse %p (%s) already in run queue", ke, 18625d7ef00cSJeff Roberson ke->ke_proc->p_comm)); 18635d7ef00cSJeff Roberson KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 18645d7ef00cSJeff Roberson ("sched_add: process swapped out")); 18659bca28a7SJeff Roberson KASSERT(ke->ke_runq == NULL, 18669bca28a7SJeff Roberson ("sched_add: KSE %p is still assigned to a run queue", ke)); 18671278181cSDavid Xu if (flags & SRQ_PREEMPTED) 18681278181cSDavid Xu ke->ke_flags |= KEF_PREEMPTED; 186922bf7d9aSJeff Roberson switch (class) { 1870a8949de2SJeff Roberson case PRI_ITHD: 1871a8949de2SJeff Roberson case PRI_REALTIME: 187215dc847eSJeff Roberson ke->ke_runq = kseq->ksq_curr; 187315dc847eSJeff Roberson ke->ke_slice = SCHED_SLICE_MAX; 1874598b368dSJeff Roberson if (canmigrate) 18757cd650a9SJeff Roberson ke->ke_cpu = PCPU_GET(cpuid); 1876a8949de2SJeff Roberson break; 1877a8949de2SJeff Roberson case PRI_TIMESHARE: 187815dc847eSJeff Roberson if (SCHED_CURR(kg, ke)) 187915dc847eSJeff Roberson ke->ke_runq = kseq->ksq_curr; 188015dc847eSJeff Roberson else 188115dc847eSJeff Roberson ke->ke_runq = kseq->ksq_next; 188215dc847eSJeff Roberson break; 188315dc847eSJeff Roberson case PRI_IDLE: 188415dc847eSJeff Roberson /* 188515dc847eSJeff Roberson * This is for priority prop. 188615dc847eSJeff Roberson */ 18873f741ca1SJeff Roberson if (ke->ke_thread->td_priority < PRI_MIN_IDLE) 188815dc847eSJeff Roberson ke->ke_runq = kseq->ksq_curr; 188915dc847eSJeff Roberson else 189015dc847eSJeff Roberson ke->ke_runq = &kseq->ksq_idle; 189115dc847eSJeff Roberson ke->ke_slice = SCHED_SLICE_MIN; 189215dc847eSJeff Roberson break; 189315dc847eSJeff Roberson default: 1894d322132cSJeff Roberson panic("Unknown pri class."); 1895a8949de2SJeff Roberson break; 1896a6ed4186SJeff Roberson } 189722bf7d9aSJeff Roberson #ifdef SMP 18982454aaf5SJeff Roberson /* 18992454aaf5SJeff Roberson * If this thread is pinned or bound, notify the target cpu. 19002454aaf5SJeff Roberson */ 19012454aaf5SJeff Roberson if (!canmigrate && ke->ke_cpu != PCPU_GET(cpuid) ) { 190286e1c22aSJeff Roberson ke->ke_runq = NULL; 190380f86c9fSJeff Roberson kseq_notify(ke, ke->ke_cpu); 190480f86c9fSJeff Roberson return; 190580f86c9fSJeff Roberson } 190622bf7d9aSJeff Roberson /* 1907670c524fSJeff Roberson * If we had been idle, clear our bit in the group and potentially 1908670c524fSJeff Roberson * the global bitmap. If not, see if we should transfer this thread. 190922bf7d9aSJeff Roberson */ 191080f86c9fSJeff Roberson if ((class == PRI_TIMESHARE || class == PRI_REALTIME) && 191180f86c9fSJeff Roberson (kseq->ksq_group->ksg_idlemask & PCPU_GET(cpumask)) != 0) { 191280f86c9fSJeff Roberson /* 191380f86c9fSJeff Roberson * Check to see if our group is unidling, and if so, remove it 191480f86c9fSJeff Roberson * from the global idle mask. 191580f86c9fSJeff Roberson */ 191680f86c9fSJeff Roberson if (kseq->ksq_group->ksg_idlemask == 191780f86c9fSJeff Roberson kseq->ksq_group->ksg_cpumask) 191880f86c9fSJeff Roberson atomic_clear_int(&kseq_idle, kseq->ksq_group->ksg_mask); 191980f86c9fSJeff Roberson /* 192080f86c9fSJeff Roberson * Now remove ourselves from the group specific idle mask. 192180f86c9fSJeff Roberson */ 192280f86c9fSJeff Roberson kseq->ksq_group->ksg_idlemask &= ~PCPU_GET(cpumask); 1923598b368dSJeff Roberson } else if (canmigrate && kseq->ksq_load > 1 && class != PRI_ITHD) 1924670c524fSJeff Roberson if (kseq_transfer(kseq, ke, class)) 1925670c524fSJeff Roberson return; 19262454aaf5SJeff Roberson ke->ke_cpu = PCPU_GET(cpuid); 192722bf7d9aSJeff Roberson #endif 1928f2b74cbfSJeff Roberson if (td->td_priority < curthread->td_priority && 1929f2b74cbfSJeff Roberson ke->ke_runq == kseq->ksq_curr) 193022bf7d9aSJeff Roberson curthread->td_flags |= TDF_NEEDRESCHED; 193163fcce68SJohn Baldwin if (preemptive && maybe_preempt(td)) 19320c0b25aeSJohn Baldwin return; 193335e6168fSJeff Roberson ke->ke_state = KES_ONRUNQ; 193435e6168fSJeff Roberson 1935598b368dSJeff Roberson kseq_runq_add(kseq, ke, flags); 1936155b9987SJeff Roberson kseq_load_add(kseq, ke); 193735e6168fSJeff Roberson } 193835e6168fSJeff Roberson 193935e6168fSJeff Roberson void 19407cf90fb3SJeff Roberson sched_rem(struct thread *td) 194135e6168fSJeff Roberson { 194215dc847eSJeff Roberson struct kseq *kseq; 19437cf90fb3SJeff Roberson struct kse *ke; 19447cf90fb3SJeff Roberson 194581d47d3fSJeff Roberson CTR5(KTR_SCHED, "sched_rem: %p(%s) prio %d by %p(%s)", 194681d47d3fSJeff Roberson td, td->td_proc->p_comm, td->td_priority, curthread, 194781d47d3fSJeff Roberson curthread->td_proc->p_comm); 1948598b368dSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1949598b368dSJeff Roberson ke = td->td_kse; 19502d59a44dSJeff Roberson SLOT_RELEASE(td->td_ksegrp); 19511278181cSDavid Xu ke->ke_flags &= ~KEF_PREEMPTED; 1952598b368dSJeff Roberson if (ke->ke_flags & KEF_ASSIGNED) { 19532d59a44dSJeff Roberson ke->ke_flags |= KEF_REMOVED; 195422bf7d9aSJeff Roberson return; 19552d59a44dSJeff Roberson } 1956c494ddc8SJeff Roberson KASSERT((ke->ke_state == KES_ONRUNQ), 1957c494ddc8SJeff Roberson ("sched_rem: KSE not on run queue")); 195835e6168fSJeff Roberson 19592d59a44dSJeff Roberson ke->ke_state = KES_THREAD; 196015dc847eSJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 1961155b9987SJeff Roberson kseq_runq_rem(kseq, ke); 1962155b9987SJeff Roberson kseq_load_rem(kseq, ke); 196335e6168fSJeff Roberson } 196435e6168fSJeff Roberson 196535e6168fSJeff Roberson fixpt_t 19667cf90fb3SJeff Roberson sched_pctcpu(struct thread *td) 196735e6168fSJeff Roberson { 196835e6168fSJeff Roberson fixpt_t pctcpu; 19697cf90fb3SJeff Roberson struct kse *ke; 197035e6168fSJeff Roberson 197135e6168fSJeff Roberson pctcpu = 0; 19727cf90fb3SJeff Roberson ke = td->td_kse; 1973484288deSJeff Roberson if (ke == NULL) 1974484288deSJeff Roberson return (0); 197535e6168fSJeff Roberson 1976b90816f1SJeff Roberson mtx_lock_spin(&sched_lock); 197735e6168fSJeff Roberson if (ke->ke_ticks) { 197835e6168fSJeff Roberson int rtick; 197935e6168fSJeff Roberson 1980210491d3SJeff Roberson /* 1981210491d3SJeff Roberson * Don't update more frequently than twice a second. Allowing 1982210491d3SJeff Roberson * this causes the cpu usage to decay away too quickly due to 1983210491d3SJeff Roberson * rounding errors. 1984210491d3SJeff Roberson */ 19852e227f04SJeff Roberson if (ke->ke_ftick + SCHED_CPU_TICKS < ke->ke_ltick || 19862e227f04SJeff Roberson ke->ke_ltick < (ticks - (hz / 2))) 198735e6168fSJeff Roberson sched_pctcpu_update(ke); 198835e6168fSJeff Roberson /* How many rtick per second ? */ 1989210491d3SJeff Roberson rtick = min(ke->ke_ticks / SCHED_CPU_TIME, SCHED_CPU_TICKS); 19907121cce5SScott Long pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT; 199135e6168fSJeff Roberson } 199235e6168fSJeff Roberson 199335e6168fSJeff Roberson ke->ke_proc->p_swtime = ke->ke_ltick - ke->ke_ftick; 1994828e7683SJohn Baldwin mtx_unlock_spin(&sched_lock); 199535e6168fSJeff Roberson 199635e6168fSJeff Roberson return (pctcpu); 199735e6168fSJeff Roberson } 199835e6168fSJeff Roberson 19999bacd788SJeff Roberson void 20009bacd788SJeff Roberson sched_bind(struct thread *td, int cpu) 20019bacd788SJeff Roberson { 20029bacd788SJeff Roberson struct kse *ke; 20039bacd788SJeff Roberson 20049bacd788SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 20059bacd788SJeff Roberson ke = td->td_kse; 20069bacd788SJeff Roberson ke->ke_flags |= KEF_BOUND; 200780f86c9fSJeff Roberson #ifdef SMP 200880f86c9fSJeff Roberson if (PCPU_GET(cpuid) == cpu) 20099bacd788SJeff Roberson return; 20109bacd788SJeff Roberson /* sched_rem without the runq_remove */ 20119bacd788SJeff Roberson ke->ke_state = KES_THREAD; 2012155b9987SJeff Roberson kseq_load_rem(KSEQ_CPU(ke->ke_cpu), ke); 20139bacd788SJeff Roberson kseq_notify(ke, cpu); 20149bacd788SJeff Roberson /* When we return from mi_switch we'll be on the correct cpu. */ 2015279f949eSPoul-Henning Kamp mi_switch(SW_VOL, NULL); 20169bacd788SJeff Roberson #endif 20179bacd788SJeff Roberson } 20189bacd788SJeff Roberson 20199bacd788SJeff Roberson void 20209bacd788SJeff Roberson sched_unbind(struct thread *td) 20219bacd788SJeff Roberson { 20229bacd788SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 20239bacd788SJeff Roberson td->td_kse->ke_flags &= ~KEF_BOUND; 20249bacd788SJeff Roberson } 20259bacd788SJeff Roberson 202635e6168fSJeff Roberson int 2027ebccf1e3SJoseph Koshy sched_is_bound(struct thread *td) 2028ebccf1e3SJoseph Koshy { 2029ebccf1e3SJoseph Koshy mtx_assert(&sched_lock, MA_OWNED); 2030ebccf1e3SJoseph Koshy return (td->td_kse->ke_flags & KEF_BOUND); 2031ebccf1e3SJoseph Koshy } 2032ebccf1e3SJoseph Koshy 203336ec198bSDavid Xu void 203436ec198bSDavid Xu sched_relinquish(struct thread *td) 203536ec198bSDavid Xu { 203636ec198bSDavid Xu struct ksegrp *kg; 203736ec198bSDavid Xu 203836ec198bSDavid Xu kg = td->td_ksegrp; 203936ec198bSDavid Xu mtx_lock_spin(&sched_lock); 204036ec198bSDavid Xu if (kg->kg_pri_class == PRI_TIMESHARE) 204136ec198bSDavid Xu sched_prio(td, PRI_MAX_TIMESHARE); 204236ec198bSDavid Xu mi_switch(SW_VOL, NULL); 204336ec198bSDavid Xu mtx_unlock_spin(&sched_lock); 204436ec198bSDavid Xu } 204536ec198bSDavid Xu 2046ebccf1e3SJoseph Koshy int 204733916c36SJeff Roberson sched_load(void) 204833916c36SJeff Roberson { 204933916c36SJeff Roberson #ifdef SMP 205033916c36SJeff Roberson int total; 205133916c36SJeff Roberson int i; 205233916c36SJeff Roberson 205333916c36SJeff Roberson total = 0; 205433916c36SJeff Roberson for (i = 0; i <= ksg_maxid; i++) 205533916c36SJeff Roberson total += KSEQ_GROUP(i)->ksg_load; 205633916c36SJeff Roberson return (total); 205733916c36SJeff Roberson #else 205833916c36SJeff Roberson return (KSEQ_SELF()->ksq_sysload); 205933916c36SJeff Roberson #endif 206033916c36SJeff Roberson } 206133916c36SJeff Roberson 206233916c36SJeff Roberson int 206335e6168fSJeff Roberson sched_sizeof_ksegrp(void) 206435e6168fSJeff Roberson { 206535e6168fSJeff Roberson return (sizeof(struct ksegrp) + sizeof(struct kg_sched)); 206635e6168fSJeff Roberson } 206735e6168fSJeff Roberson 206835e6168fSJeff Roberson int 206935e6168fSJeff Roberson sched_sizeof_proc(void) 207035e6168fSJeff Roberson { 207135e6168fSJeff Roberson return (sizeof(struct proc)); 207235e6168fSJeff Roberson } 207335e6168fSJeff Roberson 207435e6168fSJeff Roberson int 207535e6168fSJeff Roberson sched_sizeof_thread(void) 207635e6168fSJeff Roberson { 207735e6168fSJeff Roberson return (sizeof(struct thread) + sizeof(struct td_sched)); 207835e6168fSJeff Roberson } 2079b41f1452SDavid Xu 2080b41f1452SDavid Xu void 2081b41f1452SDavid Xu sched_tick(void) 2082b41f1452SDavid Xu { 2083b41f1452SDavid Xu } 2084ed062c8dSJulian Elischer #define KERN_SWITCH_INCLUDE 1 2085ed062c8dSJulian Elischer #include "kern/kern_switch.c" 2086