135e6168fSJeff Roberson /*- 235e6168fSJeff Roberson * Copyright (c) 2003, Jeffrey Roberson <jeff@freebsd.org> 335e6168fSJeff Roberson * All rights reserved. 435e6168fSJeff Roberson * 535e6168fSJeff Roberson * Redistribution and use in source and binary forms, with or without 635e6168fSJeff Roberson * modification, are permitted provided that the following conditions 735e6168fSJeff Roberson * are met: 835e6168fSJeff Roberson * 1. Redistributions of source code must retain the above copyright 935e6168fSJeff Roberson * notice unmodified, this list of conditions, and the following 1035e6168fSJeff Roberson * disclaimer. 1135e6168fSJeff Roberson * 2. Redistributions in binary form must reproduce the above copyright 1235e6168fSJeff Roberson * notice, this list of conditions and the following disclaimer in the 1335e6168fSJeff Roberson * documentation and/or other materials provided with the distribution. 1435e6168fSJeff Roberson * 1535e6168fSJeff Roberson * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 1635e6168fSJeff Roberson * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1735e6168fSJeff Roberson * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 1835e6168fSJeff Roberson * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 1935e6168fSJeff Roberson * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2035e6168fSJeff Roberson * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2135e6168fSJeff Roberson * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2235e6168fSJeff Roberson * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2335e6168fSJeff Roberson * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2435e6168fSJeff Roberson * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2535e6168fSJeff Roberson * 2635e6168fSJeff Roberson * $FreeBSD$ 2735e6168fSJeff Roberson */ 2835e6168fSJeff Roberson 2935e6168fSJeff Roberson #include <sys/param.h> 3035e6168fSJeff Roberson #include <sys/systm.h> 3135e6168fSJeff Roberson #include <sys/kernel.h> 3235e6168fSJeff Roberson #include <sys/ktr.h> 3335e6168fSJeff Roberson #include <sys/lock.h> 3435e6168fSJeff Roberson #include <sys/mutex.h> 3535e6168fSJeff Roberson #include <sys/proc.h> 3635e6168fSJeff Roberson #include <sys/sched.h> 3735e6168fSJeff Roberson #include <sys/smp.h> 3835e6168fSJeff Roberson #include <sys/sx.h> 3935e6168fSJeff Roberson #include <sys/sysctl.h> 4035e6168fSJeff Roberson #include <sys/sysproto.h> 4135e6168fSJeff Roberson #include <sys/vmmeter.h> 4235e6168fSJeff Roberson #ifdef DDB 4335e6168fSJeff Roberson #include <ddb/ddb.h> 4435e6168fSJeff Roberson #endif 4535e6168fSJeff Roberson #ifdef KTRACE 4635e6168fSJeff Roberson #include <sys/uio.h> 4735e6168fSJeff Roberson #include <sys/ktrace.h> 4835e6168fSJeff Roberson #endif 4935e6168fSJeff Roberson 5035e6168fSJeff Roberson #include <machine/cpu.h> 5135e6168fSJeff Roberson 5235e6168fSJeff Roberson /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 5335e6168fSJeff Roberson /* XXX This is bogus compatability crap for ps */ 5435e6168fSJeff Roberson static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 5535e6168fSJeff Roberson SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 5635e6168fSJeff Roberson 5735e6168fSJeff Roberson static void sched_setup(void *dummy); 5835e6168fSJeff Roberson SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 5935e6168fSJeff Roberson 6035e6168fSJeff Roberson /* 6135e6168fSJeff Roberson * These datastructures are allocated within their parent datastructure but 6235e6168fSJeff Roberson * are scheduler specific. 6335e6168fSJeff Roberson */ 6435e6168fSJeff Roberson 6535e6168fSJeff Roberson struct ke_sched { 6635e6168fSJeff Roberson int ske_slice; 6735e6168fSJeff Roberson struct runq *ske_runq; 6835e6168fSJeff Roberson /* The following variables are only used for pctcpu calculation */ 6935e6168fSJeff Roberson int ske_ltick; /* Last tick that we were running on */ 7035e6168fSJeff Roberson int ske_ftick; /* First tick that we were running on */ 7135e6168fSJeff Roberson int ske_ticks; /* Tick count */ 7235e6168fSJeff Roberson }; 7335e6168fSJeff Roberson #define ke_slice ke_sched->ske_slice 7435e6168fSJeff Roberson #define ke_runq ke_sched->ske_runq 7535e6168fSJeff Roberson #define ke_ltick ke_sched->ske_ltick 7635e6168fSJeff Roberson #define ke_ftick ke_sched->ske_ftick 7735e6168fSJeff Roberson #define ke_ticks ke_sched->ske_ticks 7835e6168fSJeff Roberson 7935e6168fSJeff Roberson struct kg_sched { 8035e6168fSJeff Roberson int skg_slptime; 8135e6168fSJeff Roberson }; 8235e6168fSJeff Roberson #define kg_slptime kg_sched->skg_slptime 8335e6168fSJeff Roberson 8435e6168fSJeff Roberson struct td_sched { 8535e6168fSJeff Roberson int std_slptime; 8635e6168fSJeff Roberson }; 8735e6168fSJeff Roberson #define td_slptime td_sched->std_slptime 8835e6168fSJeff Roberson 8935e6168fSJeff Roberson struct ke_sched ke_sched; 9035e6168fSJeff Roberson struct kg_sched kg_sched; 9135e6168fSJeff Roberson struct td_sched td_sched; 9235e6168fSJeff Roberson 9335e6168fSJeff Roberson struct ke_sched *kse0_sched = &ke_sched; 9435e6168fSJeff Roberson struct kg_sched *ksegrp0_sched = &kg_sched; 9535e6168fSJeff Roberson struct p_sched *proc0_sched = NULL; 9635e6168fSJeff Roberson struct td_sched *thread0_sched = &td_sched; 9735e6168fSJeff Roberson 9835e6168fSJeff Roberson /* 9935e6168fSJeff Roberson * This priority range has 20 priorities on either end that are reachable 10035e6168fSJeff Roberson * only through nice values. 10135e6168fSJeff Roberson */ 10235e6168fSJeff Roberson #define SCHED_PRI_NRESV 40 10335e6168fSJeff Roberson #define SCHED_PRI_RANGE ((PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1) - \ 10435e6168fSJeff Roberson SCHED_PRI_NRESV) 10535e6168fSJeff Roberson 10635e6168fSJeff Roberson /* 10735e6168fSJeff Roberson * These determine how sleep time effects the priority of a process. 10835e6168fSJeff Roberson * 10935e6168fSJeff Roberson * SLP_MAX: Maximum amount of accrued sleep time. 11035e6168fSJeff Roberson * SLP_SCALE: Scale the number of ticks slept across the dynamic priority 11135e6168fSJeff Roberson * range. 11235e6168fSJeff Roberson * SLP_TOPRI: Convert a number of ticks slept into a priority value. 11335e6168fSJeff Roberson * SLP_DECAY: Reduce the sleep time to 50% for every granted slice. 11435e6168fSJeff Roberson */ 11535e6168fSJeff Roberson #define SCHED_SLP_MAX (hz * 2) 11635e6168fSJeff Roberson #define SCHED_SLP_SCALE(slp) (((slp) * SCHED_PRI_RANGE) / SCHED_SLP_MAX) 11735e6168fSJeff Roberson #define SCHED_SLP_TOPRI(slp) (SCHED_PRI_RANGE - SCHED_SLP_SCALE((slp)) + \ 11835e6168fSJeff Roberson SCHED_PRI_NRESV / 2) 11935e6168fSJeff Roberson #define SCHED_SLP_DECAY(slp) ((slp) / 2) /* XXX Multiple kses break */ 12035e6168fSJeff Roberson 12135e6168fSJeff Roberson /* 12235e6168fSJeff Roberson * These parameters and macros determine the size of the time slice that is 12335e6168fSJeff Roberson * granted to each thread. 12435e6168fSJeff Roberson * 12535e6168fSJeff Roberson * SLICE_MIN: Minimum time slice granted, in units of ticks. 12635e6168fSJeff Roberson * SLICE_MAX: Maximum time slice granted. 12735e6168fSJeff Roberson * SLICE_RANGE: Range of available time slices scaled by hz. 12835e6168fSJeff Roberson * SLICE_SCALE: The number slices granted per unit of pri or slp. 12935e6168fSJeff Roberson * PRI_TOSLICE: Compute a slice size that is proportional to the priority. 13035e6168fSJeff Roberson * SLP_TOSLICE: Compute a slice size that is inversely proportional to the 13135e6168fSJeff Roberson * amount of time slept. (smaller slices for interactive ksegs) 13235e6168fSJeff Roberson * PRI_COMP: This determines what fraction of the actual slice comes from 13335e6168fSJeff Roberson * the slice size computed from the priority. 13435e6168fSJeff Roberson * SLP_COMP: This determines what component of the actual slice comes from 13535e6168fSJeff Roberson * the slize size computed from the sleep time. 13635e6168fSJeff Roberson */ 13735e6168fSJeff Roberson #define SCHED_SLICE_MIN (hz / 100) 13835e6168fSJeff Roberson #define SCHED_SLICE_MAX (hz / 10) 13935e6168fSJeff Roberson #define SCHED_SLICE_RANGE (SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1) 14035e6168fSJeff Roberson #define SCHED_SLICE_SCALE(val, max) (((val) * SCHED_SLICE_RANGE) / (max)) 14135e6168fSJeff Roberson #define SCHED_PRI_TOSLICE(pri) \ 14235e6168fSJeff Roberson (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((pri), SCHED_PRI_RANGE)) 14335e6168fSJeff Roberson #define SCHED_SLP_TOSLICE(slp) \ 14435e6168fSJeff Roberson (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((slp), SCHED_SLP_MAX)) 14535e6168fSJeff Roberson #define SCHED_SLP_COMP(slice) (((slice) / 5) * 3) /* 60% */ 14635e6168fSJeff Roberson #define SCHED_PRI_COMP(slice) (((slice) / 5) * 2) /* 40% */ 14735e6168fSJeff Roberson 14835e6168fSJeff Roberson /* 14935e6168fSJeff Roberson * This macro determines whether or not the kse belongs on the current or 15035e6168fSJeff Roberson * next run queue. 15135e6168fSJeff Roberson */ 15235e6168fSJeff Roberson #define SCHED_CURR(kg) ((kg)->kg_slptime > (hz / 4) || \ 15335e6168fSJeff Roberson (kg)->kg_pri_class != PRI_TIMESHARE) 15435e6168fSJeff Roberson 15535e6168fSJeff Roberson /* 15635e6168fSJeff Roberson * Cpu percentage computation macros and defines. 15735e6168fSJeff Roberson * 15835e6168fSJeff Roberson * SCHED_CPU_TIME: Number of seconds to average the cpu usage across. 15935e6168fSJeff Roberson * SCHED_CPU_TICKS: Number of hz ticks to average the cpu usage across. 16035e6168fSJeff Roberson */ 16135e6168fSJeff Roberson 16235e6168fSJeff Roberson #define SCHED_CPU_TIME 60 16335e6168fSJeff Roberson #define SCHED_CPU_TICKS (hz * SCHED_CPU_TIME) 16435e6168fSJeff Roberson 16535e6168fSJeff Roberson /* 16635e6168fSJeff Roberson * kseq - pair of runqs per processor 16735e6168fSJeff Roberson */ 16835e6168fSJeff Roberson 16935e6168fSJeff Roberson struct kseq { 17035e6168fSJeff Roberson struct runq ksq_runqs[2]; 17135e6168fSJeff Roberson struct runq *ksq_curr; 17235e6168fSJeff Roberson struct runq *ksq_next; 17335e6168fSJeff Roberson int ksq_load; /* Total runnable */ 17435e6168fSJeff Roberson }; 17535e6168fSJeff Roberson 17635e6168fSJeff Roberson /* 17735e6168fSJeff Roberson * One kse queue per processor. 17835e6168fSJeff Roberson */ 1790a016a05SJeff Roberson #ifdef SMP 18035e6168fSJeff Roberson struct kseq kseq_cpu[MAXCPU]; 1810a016a05SJeff Roberson #define KSEQ_SELF() (&kseq_cpu[PCPU_GET(cpuid)]) 1820a016a05SJeff Roberson #define KSEQ_CPU(x) (&kseq_cpu[(x)]) 1830a016a05SJeff Roberson #else 1840a016a05SJeff Roberson struct kseq kseq_cpu; 1850a016a05SJeff Roberson #define KSEQ_SELF() (&kseq_cpu) 1860a016a05SJeff Roberson #define KSEQ_CPU(x) (&kseq_cpu) 1870a016a05SJeff Roberson #endif 18835e6168fSJeff Roberson 18935e6168fSJeff Roberson static int sched_slice(struct ksegrp *kg); 19035e6168fSJeff Roberson static int sched_priority(struct ksegrp *kg); 19135e6168fSJeff Roberson void sched_pctcpu_update(struct kse *ke); 19235e6168fSJeff Roberson int sched_pickcpu(void); 19335e6168fSJeff Roberson 1940a016a05SJeff Roberson static struct kse * kseq_choose(struct kseq *kseq); 1950a016a05SJeff Roberson static void kseq_setup(struct kseq *kseq); 1960a016a05SJeff Roberson 1970a016a05SJeff Roberson static void 1980a016a05SJeff Roberson kseq_setup(struct kseq *kseq) 1990a016a05SJeff Roberson { 2000a016a05SJeff Roberson kseq->ksq_load = 0; 2010a016a05SJeff Roberson kseq->ksq_curr = &kseq->ksq_runqs[0]; 2020a016a05SJeff Roberson kseq->ksq_next = &kseq->ksq_runqs[1]; 2030a016a05SJeff Roberson runq_init(kseq->ksq_curr); 2040a016a05SJeff Roberson runq_init(kseq->ksq_next); 2050a016a05SJeff Roberson } 2060a016a05SJeff Roberson 20735e6168fSJeff Roberson static void 20835e6168fSJeff Roberson sched_setup(void *dummy) 20935e6168fSJeff Roberson { 21035e6168fSJeff Roberson int i; 21135e6168fSJeff Roberson 21235e6168fSJeff Roberson mtx_lock_spin(&sched_lock); 21335e6168fSJeff Roberson /* init kseqs */ 2140a016a05SJeff Roberson for (i = 0; i < MAXCPU; i++) 2150a016a05SJeff Roberson kseq_setup(KSEQ_CPU(i)); 21635e6168fSJeff Roberson mtx_unlock_spin(&sched_lock); 21735e6168fSJeff Roberson } 21835e6168fSJeff Roberson 21935e6168fSJeff Roberson /* 22035e6168fSJeff Roberson * Scale the scheduling priority according to the "interactivity" of this 22135e6168fSJeff Roberson * process. 22235e6168fSJeff Roberson */ 22335e6168fSJeff Roberson static int 22435e6168fSJeff Roberson sched_priority(struct ksegrp *kg) 22535e6168fSJeff Roberson { 22635e6168fSJeff Roberson int pri; 22735e6168fSJeff Roberson 22835e6168fSJeff Roberson if (kg->kg_pri_class != PRI_TIMESHARE) 22935e6168fSJeff Roberson return (kg->kg_user_pri); 23035e6168fSJeff Roberson 23135e6168fSJeff Roberson pri = SCHED_SLP_TOPRI(kg->kg_slptime); 23235e6168fSJeff Roberson CTR2(KTR_RUNQ, "sched_priority: slptime: %d\tpri: %d", 23335e6168fSJeff Roberson kg->kg_slptime, pri); 23435e6168fSJeff Roberson 23535e6168fSJeff Roberson pri += PRI_MIN_TIMESHARE; 23635e6168fSJeff Roberson pri += kg->kg_nice; 23735e6168fSJeff Roberson 23835e6168fSJeff Roberson if (pri > PRI_MAX_TIMESHARE) 23935e6168fSJeff Roberson pri = PRI_MAX_TIMESHARE; 24035e6168fSJeff Roberson else if (pri < PRI_MIN_TIMESHARE) 24135e6168fSJeff Roberson pri = PRI_MIN_TIMESHARE; 24235e6168fSJeff Roberson 24335e6168fSJeff Roberson kg->kg_user_pri = pri; 24435e6168fSJeff Roberson 24535e6168fSJeff Roberson return (kg->kg_user_pri); 24635e6168fSJeff Roberson } 24735e6168fSJeff Roberson 24835e6168fSJeff Roberson /* 24935e6168fSJeff Roberson * Calculate a time slice based on the process priority. 25035e6168fSJeff Roberson */ 25135e6168fSJeff Roberson static int 25235e6168fSJeff Roberson sched_slice(struct ksegrp *kg) 25335e6168fSJeff Roberson { 25435e6168fSJeff Roberson int pslice; 25535e6168fSJeff Roberson int sslice; 25635e6168fSJeff Roberson int slice; 25735e6168fSJeff Roberson int pri; 25835e6168fSJeff Roberson 25935e6168fSJeff Roberson pri = kg->kg_user_pri; 26035e6168fSJeff Roberson pri -= PRI_MIN_TIMESHARE; 26135e6168fSJeff Roberson pslice = SCHED_PRI_TOSLICE(pri); 26235e6168fSJeff Roberson sslice = SCHED_SLP_TOSLICE(kg->kg_slptime); 26335e6168fSJeff Roberson slice = SCHED_SLP_COMP(sslice) + SCHED_PRI_COMP(pslice); 26435e6168fSJeff Roberson kg->kg_slptime = SCHED_SLP_DECAY(kg->kg_slptime); 26535e6168fSJeff Roberson 26635e6168fSJeff Roberson CTR4(KTR_RUNQ, 26735e6168fSJeff Roberson "sched_slice: pri: %d\tsslice: %d\tpslice: %d\tslice: %d", 26835e6168fSJeff Roberson pri, sslice, pslice, slice); 26935e6168fSJeff Roberson 27035e6168fSJeff Roberson if (slice < SCHED_SLICE_MIN) 27135e6168fSJeff Roberson slice = SCHED_SLICE_MIN; 27235e6168fSJeff Roberson else if (slice > SCHED_SLICE_MAX) 27335e6168fSJeff Roberson slice = SCHED_SLICE_MAX; 27435e6168fSJeff Roberson 27535e6168fSJeff Roberson return (slice); 27635e6168fSJeff Roberson } 27735e6168fSJeff Roberson 27835e6168fSJeff Roberson int 27935e6168fSJeff Roberson sched_rr_interval(void) 28035e6168fSJeff Roberson { 28135e6168fSJeff Roberson return (SCHED_SLICE_MAX); 28235e6168fSJeff Roberson } 28335e6168fSJeff Roberson 28435e6168fSJeff Roberson void 28535e6168fSJeff Roberson sched_pctcpu_update(struct kse *ke) 28635e6168fSJeff Roberson { 28735e6168fSJeff Roberson /* 28835e6168fSJeff Roberson * Adjust counters and watermark for pctcpu calc. 28935e6168fSJeff Roberson */ 29035e6168fSJeff Roberson ke->ke_ticks = (ke->ke_ticks / (ke->ke_ltick - ke->ke_ftick)) * 29135e6168fSJeff Roberson SCHED_CPU_TICKS; 29235e6168fSJeff Roberson ke->ke_ltick = ticks; 29335e6168fSJeff Roberson ke->ke_ftick = ke->ke_ltick - SCHED_CPU_TICKS; 29435e6168fSJeff Roberson } 29535e6168fSJeff Roberson 29635e6168fSJeff Roberson #ifdef SMP 29735e6168fSJeff Roberson int 29835e6168fSJeff Roberson sched_pickcpu(void) 29935e6168fSJeff Roberson { 3000a016a05SJeff Roberson struct kseq *kseq; 30135e6168fSJeff Roberson int load; 3020a016a05SJeff Roberson int cpu; 30335e6168fSJeff Roberson int i; 30435e6168fSJeff Roberson 30535e6168fSJeff Roberson if (!smp_started) 30635e6168fSJeff Roberson return (0); 30735e6168fSJeff Roberson 3080a016a05SJeff Roberson load = 0; 3090a016a05SJeff Roberson cpu = 0; 31035e6168fSJeff Roberson 31135e6168fSJeff Roberson for (i = 0; i < mp_maxid; i++) { 31235e6168fSJeff Roberson if (CPU_ABSENT(i)) 31335e6168fSJeff Roberson continue; 3140a016a05SJeff Roberson kseq = KSEQ_CPU(i); 3150a016a05SJeff Roberson if (kseq->ksq_load < load) { 31635e6168fSJeff Roberson cpu = i; 3170a016a05SJeff Roberson load = kseq->ksq_load; 31835e6168fSJeff Roberson } 31935e6168fSJeff Roberson } 32035e6168fSJeff Roberson 32135e6168fSJeff Roberson CTR1(KTR_RUNQ, "sched_pickcpu: %d", cpu); 32235e6168fSJeff Roberson return (cpu); 32335e6168fSJeff Roberson } 32435e6168fSJeff Roberson #else 32535e6168fSJeff Roberson int 32635e6168fSJeff Roberson sched_pickcpu(void) 32735e6168fSJeff Roberson { 32835e6168fSJeff Roberson return (0); 32935e6168fSJeff Roberson } 33035e6168fSJeff Roberson #endif 33135e6168fSJeff Roberson 33235e6168fSJeff Roberson void 33335e6168fSJeff Roberson sched_prio(struct thread *td, u_char prio) 33435e6168fSJeff Roberson { 33535e6168fSJeff Roberson struct kse *ke; 33635e6168fSJeff Roberson struct runq *rq; 33735e6168fSJeff Roberson 33835e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 33935e6168fSJeff Roberson ke = td->td_kse; 34035e6168fSJeff Roberson td->td_priority = prio; 34135e6168fSJeff Roberson 34235e6168fSJeff Roberson if (TD_ON_RUNQ(td)) { 34335e6168fSJeff Roberson rq = ke->ke_runq; 34435e6168fSJeff Roberson 34535e6168fSJeff Roberson runq_remove(rq, ke); 34635e6168fSJeff Roberson runq_add(rq, ke); 34735e6168fSJeff Roberson } 34835e6168fSJeff Roberson } 34935e6168fSJeff Roberson 35035e6168fSJeff Roberson void 35135e6168fSJeff Roberson sched_switchout(struct thread *td) 35235e6168fSJeff Roberson { 35335e6168fSJeff Roberson struct kse *ke; 35435e6168fSJeff Roberson 35535e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 35635e6168fSJeff Roberson 35735e6168fSJeff Roberson ke = td->td_kse; 35835e6168fSJeff Roberson 35935e6168fSJeff Roberson td->td_last_kse = ke; 36035e6168fSJeff Roberson td->td_lastcpu = ke->ke_oncpu; 36135e6168fSJeff Roberson ke->ke_flags &= ~KEF_NEEDRESCHED; 36235e6168fSJeff Roberson 36335e6168fSJeff Roberson if (TD_IS_RUNNING(td)) { 36435e6168fSJeff Roberson setrunqueue(td); 36535e6168fSJeff Roberson return; 36635e6168fSJeff Roberson } else 36735e6168fSJeff Roberson td->td_kse->ke_runq = NULL; 36835e6168fSJeff Roberson 36935e6168fSJeff Roberson /* 37035e6168fSJeff Roberson * We will not be on the run queue. So we must be 37135e6168fSJeff Roberson * sleeping or similar. 37235e6168fSJeff Roberson */ 37335e6168fSJeff Roberson if (td->td_proc->p_flag & P_KSES) 37435e6168fSJeff Roberson kse_reassign(ke); 37535e6168fSJeff Roberson } 37635e6168fSJeff Roberson 37735e6168fSJeff Roberson void 37835e6168fSJeff Roberson sched_switchin(struct thread *td) 37935e6168fSJeff Roberson { 38035e6168fSJeff Roberson /* struct kse *ke = td->td_kse; */ 38135e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 38235e6168fSJeff Roberson 38335e6168fSJeff Roberson td->td_kse->ke_oncpu = PCPU_GET(cpuid); /* XXX */ 38435e6168fSJeff Roberson if (td->td_ksegrp->kg_pri_class == PRI_TIMESHARE && 38535e6168fSJeff Roberson td->td_priority != td->td_ksegrp->kg_user_pri) 38635e6168fSJeff Roberson curthread->td_kse->ke_flags |= KEF_NEEDRESCHED; 38735e6168fSJeff Roberson } 38835e6168fSJeff Roberson 38935e6168fSJeff Roberson void 39035e6168fSJeff Roberson sched_nice(struct ksegrp *kg, int nice) 39135e6168fSJeff Roberson { 39235e6168fSJeff Roberson struct thread *td; 39335e6168fSJeff Roberson 39435e6168fSJeff Roberson kg->kg_nice = nice; 39535e6168fSJeff Roberson sched_priority(kg); 39635e6168fSJeff Roberson FOREACH_THREAD_IN_GROUP(kg, td) { 39735e6168fSJeff Roberson td->td_kse->ke_flags |= KEF_NEEDRESCHED; 39835e6168fSJeff Roberson } 39935e6168fSJeff Roberson } 40035e6168fSJeff Roberson 40135e6168fSJeff Roberson void 40235e6168fSJeff Roberson sched_sleep(struct thread *td, u_char prio) 40335e6168fSJeff Roberson { 40435e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 40535e6168fSJeff Roberson 40635e6168fSJeff Roberson td->td_slptime = ticks; 40735e6168fSJeff Roberson td->td_priority = prio; 40835e6168fSJeff Roberson 40935e6168fSJeff Roberson /* 41035e6168fSJeff Roberson * If this is an interactive task clear its queue so it moves back 41135e6168fSJeff Roberson * on to curr when it wakes up. Otherwise let it stay on the queue 41235e6168fSJeff Roberson * that it was assigned to. 41335e6168fSJeff Roberson */ 41435e6168fSJeff Roberson if (SCHED_CURR(td->td_kse->ke_ksegrp)) 41535e6168fSJeff Roberson td->td_kse->ke_runq = NULL; 4160a016a05SJeff Roberson #if 0 4170a016a05SJeff Roberson if (td->td_priority < PZERO) 4180a016a05SJeff Roberson kseq_cpu[td->td_kse->ke_oncpu].ksq_load++; 4190a016a05SJeff Roberson #endif 42035e6168fSJeff Roberson } 42135e6168fSJeff Roberson 42235e6168fSJeff Roberson void 42335e6168fSJeff Roberson sched_wakeup(struct thread *td) 42435e6168fSJeff Roberson { 42535e6168fSJeff Roberson struct ksegrp *kg; 42635e6168fSJeff Roberson 42735e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 42835e6168fSJeff Roberson 42935e6168fSJeff Roberson /* 43035e6168fSJeff Roberson * Let the kseg know how long we slept for. This is because process 43135e6168fSJeff Roberson * interactivity behavior is modeled in the kseg. 43235e6168fSJeff Roberson */ 43335e6168fSJeff Roberson kg = td->td_ksegrp; 43435e6168fSJeff Roberson 43535e6168fSJeff Roberson if (td->td_slptime) { 43635e6168fSJeff Roberson kg->kg_slptime += ticks - td->td_slptime; 43735e6168fSJeff Roberson if (kg->kg_slptime > SCHED_SLP_MAX) 43835e6168fSJeff Roberson kg->kg_slptime = SCHED_SLP_MAX; 43935e6168fSJeff Roberson td->td_priority = sched_priority(kg); 44035e6168fSJeff Roberson } 44135e6168fSJeff Roberson td->td_slptime = 0; 4420a016a05SJeff Roberson #if 0 4430a016a05SJeff Roberson if (td->td_priority < PZERO) 4440a016a05SJeff Roberson kseq_cpu[td->td_kse->ke_oncpu].ksq_load--; 4450a016a05SJeff Roberson #endif 44635e6168fSJeff Roberson setrunqueue(td); 44735e6168fSJeff Roberson if (td->td_priority < curthread->td_priority) 44835e6168fSJeff Roberson curthread->td_kse->ke_flags |= KEF_NEEDRESCHED; 44935e6168fSJeff Roberson } 45035e6168fSJeff Roberson 45135e6168fSJeff Roberson /* 45235e6168fSJeff Roberson * Penalize the parent for creating a new child and initialize the child's 45335e6168fSJeff Roberson * priority. 45435e6168fSJeff Roberson */ 45535e6168fSJeff Roberson void 45635e6168fSJeff Roberson sched_fork(struct ksegrp *kg, struct ksegrp *child) 45735e6168fSJeff Roberson { 45835e6168fSJeff Roberson struct kse *ckse; 45935e6168fSJeff Roberson struct kse *pkse; 46035e6168fSJeff Roberson 46135e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 46235e6168fSJeff Roberson ckse = FIRST_KSE_IN_KSEGRP(child); 46335e6168fSJeff Roberson pkse = FIRST_KSE_IN_KSEGRP(kg); 46435e6168fSJeff Roberson 46535e6168fSJeff Roberson /* XXX Need something better here */ 46635e6168fSJeff Roberson child->kg_slptime = kg->kg_slptime; 46735e6168fSJeff Roberson child->kg_user_pri = kg->kg_user_pri; 46835e6168fSJeff Roberson 469c9f25d8fSJeff Roberson if (pkse->ke_oncpu != PCPU_GET(cpuid)) { 470c9f25d8fSJeff Roberson printf("pkse->ke_oncpu = %d\n", pkse->ke_oncpu); 471c9f25d8fSJeff Roberson printf("cpuid = %d", PCPU_GET(cpuid)); 472c9f25d8fSJeff Roberson Debugger("stop"); 473c9f25d8fSJeff Roberson } 474c9f25d8fSJeff Roberson 47535e6168fSJeff Roberson ckse->ke_slice = pkse->ke_slice; 476c9f25d8fSJeff Roberson ckse->ke_oncpu = pkse->ke_oncpu; /* sched_pickcpu(); */ 47735e6168fSJeff Roberson ckse->ke_runq = NULL; 47835e6168fSJeff Roberson /* 47935e6168fSJeff Roberson * Claim that we've been running for one second for statistical 48035e6168fSJeff Roberson * purposes. 48135e6168fSJeff Roberson */ 48235e6168fSJeff Roberson ckse->ke_ticks = 0; 48335e6168fSJeff Roberson ckse->ke_ltick = ticks; 48435e6168fSJeff Roberson ckse->ke_ftick = ticks - hz; 48535e6168fSJeff Roberson } 48635e6168fSJeff Roberson 48735e6168fSJeff Roberson /* 48835e6168fSJeff Roberson * Return some of the child's priority and interactivity to the parent. 48935e6168fSJeff Roberson */ 49035e6168fSJeff Roberson void 49135e6168fSJeff Roberson sched_exit(struct ksegrp *kg, struct ksegrp *child) 49235e6168fSJeff Roberson { 49335e6168fSJeff Roberson /* XXX Need something better here */ 49435e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 49535e6168fSJeff Roberson kg->kg_slptime = child->kg_slptime; 49635e6168fSJeff Roberson sched_priority(kg); 49735e6168fSJeff Roberson } 49835e6168fSJeff Roberson 49935e6168fSJeff Roberson void 50035e6168fSJeff Roberson sched_clock(struct thread *td) 50135e6168fSJeff Roberson { 50235e6168fSJeff Roberson struct kse *ke; 50335e6168fSJeff Roberson struct kse *nke; 50435e6168fSJeff Roberson struct kseq *kseq; 5050a016a05SJeff Roberson struct ksegrp *kg; 50635e6168fSJeff Roberson 50735e6168fSJeff Roberson 50835e6168fSJeff Roberson ke = td->td_kse; 50935e6168fSJeff Roberson kg = td->td_ksegrp; 51035e6168fSJeff Roberson 5110a016a05SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 5120a016a05SJeff Roberson KASSERT((td != NULL), ("schedclock: null thread pointer")); 5130a016a05SJeff Roberson 5140a016a05SJeff Roberson /* Adjust ticks for pctcpu */ 515d465fb95SJeff Roberson ke->ke_ticks += 10000; 516d465fb95SJeff Roberson ke->ke_ltick = ticks; 517d465fb95SJeff Roberson /* Go up to one second beyond our max and then trim back down */ 518d465fb95SJeff Roberson if (ke->ke_ftick + SCHED_CPU_TICKS + hz < ke->ke_ltick) 519d465fb95SJeff Roberson sched_pctcpu_update(ke); 520d465fb95SJeff Roberson 5210a016a05SJeff Roberson if (td->td_kse->ke_flags & KEF_IDLEKSE) 52235e6168fSJeff Roberson return; 5230a016a05SJeff Roberson 5240a016a05SJeff Roberson /* 5250a016a05SJeff Roberson * Check for a higher priority task on the run queue. This can happen 5260a016a05SJeff Roberson * on SMP if another processor woke up a process on our runq. 5270a016a05SJeff Roberson */ 5280a016a05SJeff Roberson kseq = KSEQ_SELF(); 529c9f25d8fSJeff Roberson nke = runq_choose(kseq->ksq_curr); 530c9f25d8fSJeff Roberson 53135e6168fSJeff Roberson if (nke && nke->ke_thread && 5320a016a05SJeff Roberson nke->ke_thread->td_priority < td->td_priority) 53335e6168fSJeff Roberson ke->ke_flags |= KEF_NEEDRESCHED; 53435e6168fSJeff Roberson /* 53535e6168fSJeff Roberson * We used a tick, decrease our total sleep time. This decreases our 53635e6168fSJeff Roberson * "interactivity". 53735e6168fSJeff Roberson */ 53835e6168fSJeff Roberson if (kg->kg_slptime) 53935e6168fSJeff Roberson kg->kg_slptime--; 54035e6168fSJeff Roberson /* 54135e6168fSJeff Roberson * We used up one time slice. 54235e6168fSJeff Roberson */ 54335e6168fSJeff Roberson ke->ke_slice--; 54435e6168fSJeff Roberson /* 54535e6168fSJeff Roberson * We're out of time, recompute priorities and requeue 54635e6168fSJeff Roberson */ 54735e6168fSJeff Roberson if (ke->ke_slice == 0) { 54835e6168fSJeff Roberson td->td_priority = sched_priority(kg); 54935e6168fSJeff Roberson ke->ke_slice = sched_slice(kg); 55035e6168fSJeff Roberson ke->ke_flags |= KEF_NEEDRESCHED; 55135e6168fSJeff Roberson ke->ke_runq = NULL; 55235e6168fSJeff Roberson } 55335e6168fSJeff Roberson } 55435e6168fSJeff Roberson 55535e6168fSJeff Roberson int 55635e6168fSJeff Roberson sched_runnable(void) 55735e6168fSJeff Roberson { 55835e6168fSJeff Roberson struct kseq *kseq; 55935e6168fSJeff Roberson 5600a016a05SJeff Roberson kseq = KSEQ_SELF(); 56135e6168fSJeff Roberson 5620a016a05SJeff Roberson if (kseq->ksq_load) 563c9f25d8fSJeff Roberson return (1); 564c9f25d8fSJeff Roberson #ifdef SMP 5650a016a05SJeff Roberson /* 5660a016a05SJeff Roberson * For SMP we may steal other processor's KSEs. Just search until we 5670a016a05SJeff Roberson * verify that at least on other cpu has a runnable task. 5680a016a05SJeff Roberson */ 569c9f25d8fSJeff Roberson if (smp_started) { 570c9f25d8fSJeff Roberson int i; 571c9f25d8fSJeff Roberson 572c9f25d8fSJeff Roberson for (i = 0; i < mp_maxid; i++) { 573c9f25d8fSJeff Roberson if (CPU_ABSENT(i)) 574c9f25d8fSJeff Roberson continue; 5750a016a05SJeff Roberson kseq = KSEQ_CPU(i); 5760a016a05SJeff Roberson if (kseq->ksq_load) 577c9f25d8fSJeff Roberson return (1); 578c9f25d8fSJeff Roberson } 579c9f25d8fSJeff Roberson } 580c9f25d8fSJeff Roberson #endif 581c9f25d8fSJeff Roberson return (0); 58235e6168fSJeff Roberson } 58335e6168fSJeff Roberson 58435e6168fSJeff Roberson void 58535e6168fSJeff Roberson sched_userret(struct thread *td) 58635e6168fSJeff Roberson { 58735e6168fSJeff Roberson struct ksegrp *kg; 58835e6168fSJeff Roberson 58935e6168fSJeff Roberson kg = td->td_ksegrp; 59035e6168fSJeff Roberson 59135e6168fSJeff Roberson if (td->td_priority != kg->kg_user_pri) { 59235e6168fSJeff Roberson mtx_lock_spin(&sched_lock); 59335e6168fSJeff Roberson td->td_priority = kg->kg_user_pri; 59435e6168fSJeff Roberson mtx_unlock_spin(&sched_lock); 59535e6168fSJeff Roberson } 59635e6168fSJeff Roberson } 59735e6168fSJeff Roberson 598c9f25d8fSJeff Roberson struct kse * 5990a016a05SJeff Roberson kseq_choose(struct kseq *kseq) 600c9f25d8fSJeff Roberson { 601c9f25d8fSJeff Roberson struct kse *ke; 602c9f25d8fSJeff Roberson struct runq *swap; 60335e6168fSJeff Roberson 60435e6168fSJeff Roberson if ((ke = runq_choose(kseq->ksq_curr)) == NULL) { 60535e6168fSJeff Roberson swap = kseq->ksq_curr; 60635e6168fSJeff Roberson kseq->ksq_curr = kseq->ksq_next; 60735e6168fSJeff Roberson kseq->ksq_next = swap; 60835e6168fSJeff Roberson ke = runq_choose(kseq->ksq_curr); 60935e6168fSJeff Roberson } 610c9f25d8fSJeff Roberson 611c9f25d8fSJeff Roberson return (ke); 612c9f25d8fSJeff Roberson } 613c9f25d8fSJeff Roberson 614c9f25d8fSJeff Roberson struct kse * 615c9f25d8fSJeff Roberson sched_choose(void) 616c9f25d8fSJeff Roberson { 6170a016a05SJeff Roberson struct kseq *kseq; 618c9f25d8fSJeff Roberson struct kse *ke; 619c9f25d8fSJeff Roberson 6200a016a05SJeff Roberson kseq = KSEQ_SELF(); 6210a016a05SJeff Roberson ke = kseq_choose(kseq); 622c9f25d8fSJeff Roberson 62335e6168fSJeff Roberson if (ke) { 62435e6168fSJeff Roberson runq_remove(ke->ke_runq, ke); 6250a016a05SJeff Roberson kseq->ksq_load--; 62635e6168fSJeff Roberson ke->ke_state = KES_THREAD; 62735e6168fSJeff Roberson } 62835e6168fSJeff Roberson 629c9f25d8fSJeff Roberson #ifdef SMP 630c9f25d8fSJeff Roberson if (ke == NULL && smp_started) { 631c9f25d8fSJeff Roberson int load; 6320a016a05SJeff Roberson int cpu; 633c9f25d8fSJeff Roberson int i; 634c9f25d8fSJeff Roberson 6350a016a05SJeff Roberson load = 0; 6360a016a05SJeff Roberson cpu = 0; 637c9f25d8fSJeff Roberson 638c9f25d8fSJeff Roberson /* 639c9f25d8fSJeff Roberson * Find the cpu with the highest load and steal one proc. 640c9f25d8fSJeff Roberson */ 6410a016a05SJeff Roberson for (i = 0; i < mp_maxid; i++) { 6420a016a05SJeff Roberson if (CPU_ABSENT(i)) 643c9f25d8fSJeff Roberson continue; 6440a016a05SJeff Roberson kseq = KSEQ_CPU(i); 6450a016a05SJeff Roberson if (kseq->ksq_load > load) { 6460a016a05SJeff Roberson load = kseq->ksq_load; 647c9f25d8fSJeff Roberson cpu = i; 648c9f25d8fSJeff Roberson } 649c9f25d8fSJeff Roberson } 650c9f25d8fSJeff Roberson if (load) { 6510a016a05SJeff Roberson kseq = KSEQ_CPU(cpu); 6520a016a05SJeff Roberson ke = kseq_choose(kseq); 6530a016a05SJeff Roberson kseq->ksq_load--; 654c9f25d8fSJeff Roberson ke->ke_state = KES_THREAD; 655c9f25d8fSJeff Roberson runq_remove(ke->ke_runq, ke); 656c9f25d8fSJeff Roberson ke->ke_runq = NULL; 6570a016a05SJeff Roberson ke->ke_oncpu = PCPU_GET(cpuid); 658c9f25d8fSJeff Roberson } 659c9f25d8fSJeff Roberson 660c9f25d8fSJeff Roberson } 661c9f25d8fSJeff Roberson #endif 66235e6168fSJeff Roberson return (ke); 66335e6168fSJeff Roberson } 66435e6168fSJeff Roberson 66535e6168fSJeff Roberson void 66635e6168fSJeff Roberson sched_add(struct kse *ke) 66735e6168fSJeff Roberson { 66835e6168fSJeff Roberson 66935e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 67035e6168fSJeff Roberson KASSERT((ke->ke_thread != NULL), ("runq_add: No thread on KSE")); 67135e6168fSJeff Roberson KASSERT((ke->ke_thread->td_kse != NULL), 67235e6168fSJeff Roberson ("runq_add: No KSE on thread")); 67335e6168fSJeff Roberson KASSERT(ke->ke_state != KES_ONRUNQ, 67435e6168fSJeff Roberson ("runq_add: kse %p (%s) already in run queue", ke, 67535e6168fSJeff Roberson ke->ke_proc->p_comm)); 67635e6168fSJeff Roberson KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 67735e6168fSJeff Roberson ("runq_add: process swapped out")); 67835e6168fSJeff Roberson 67935e6168fSJeff Roberson 68035e6168fSJeff Roberson if (ke->ke_runq == NULL) { 681c9f25d8fSJeff Roberson struct kseq *kseq; 682c9f25d8fSJeff Roberson 6830a016a05SJeff Roberson kseq = KSEQ_CPU(ke->ke_oncpu); 68435e6168fSJeff Roberson if (SCHED_CURR(ke->ke_ksegrp)) 68535e6168fSJeff Roberson ke->ke_runq = kseq->ksq_curr; 68635e6168fSJeff Roberson else 68735e6168fSJeff Roberson ke->ke_runq = kseq->ksq_next; 68835e6168fSJeff Roberson } 68935e6168fSJeff Roberson ke->ke_ksegrp->kg_runq_kses++; 69035e6168fSJeff Roberson ke->ke_state = KES_ONRUNQ; 69135e6168fSJeff Roberson 69235e6168fSJeff Roberson runq_add(ke->ke_runq, ke); 6930a016a05SJeff Roberson KSEQ_CPU(ke->ke_oncpu)->ksq_load++; 69435e6168fSJeff Roberson } 69535e6168fSJeff Roberson 69635e6168fSJeff Roberson void 69735e6168fSJeff Roberson sched_rem(struct kse *ke) 69835e6168fSJeff Roberson { 69935e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 70035e6168fSJeff Roberson /* KASSERT((ke->ke_state == KES_ONRUNQ), ("KSE not on run queue")); */ 70135e6168fSJeff Roberson 70235e6168fSJeff Roberson runq_remove(ke->ke_runq, ke); 70335e6168fSJeff Roberson ke->ke_runq = NULL; 70435e6168fSJeff Roberson ke->ke_state = KES_THREAD; 70535e6168fSJeff Roberson ke->ke_ksegrp->kg_runq_kses--; 7060a016a05SJeff Roberson KSEQ_CPU(ke->ke_oncpu)->ksq_load--; 70735e6168fSJeff Roberson } 70835e6168fSJeff Roberson 70935e6168fSJeff Roberson fixpt_t 71035e6168fSJeff Roberson sched_pctcpu(struct kse *ke) 71135e6168fSJeff Roberson { 71235e6168fSJeff Roberson fixpt_t pctcpu; 71335e6168fSJeff Roberson 71435e6168fSJeff Roberson pctcpu = 0; 71535e6168fSJeff Roberson 71635e6168fSJeff Roberson if (ke->ke_ticks) { 71735e6168fSJeff Roberson int rtick; 71835e6168fSJeff Roberson 71935e6168fSJeff Roberson /* Update to account for time potentially spent sleeping */ 72035e6168fSJeff Roberson ke->ke_ltick = ticks; 72135e6168fSJeff Roberson sched_pctcpu_update(ke); 72235e6168fSJeff Roberson 72335e6168fSJeff Roberson /* How many rtick per second ? */ 72435e6168fSJeff Roberson rtick = ke->ke_ticks / (SCHED_CPU_TIME * 10000); 72535e6168fSJeff Roberson pctcpu = (FSCALE * ((FSCALE * rtick)/stathz)) >> FSHIFT; 72635e6168fSJeff Roberson } 72735e6168fSJeff Roberson 72835e6168fSJeff Roberson ke->ke_proc->p_swtime = ke->ke_ltick - ke->ke_ftick; 72935e6168fSJeff Roberson 73035e6168fSJeff Roberson return (pctcpu); 73135e6168fSJeff Roberson } 73235e6168fSJeff Roberson 73335e6168fSJeff Roberson int 73435e6168fSJeff Roberson sched_sizeof_kse(void) 73535e6168fSJeff Roberson { 73635e6168fSJeff Roberson return (sizeof(struct kse) + sizeof(struct ke_sched)); 73735e6168fSJeff Roberson } 73835e6168fSJeff Roberson 73935e6168fSJeff Roberson int 74035e6168fSJeff Roberson sched_sizeof_ksegrp(void) 74135e6168fSJeff Roberson { 74235e6168fSJeff Roberson return (sizeof(struct ksegrp) + sizeof(struct kg_sched)); 74335e6168fSJeff Roberson } 74435e6168fSJeff Roberson 74535e6168fSJeff Roberson int 74635e6168fSJeff Roberson sched_sizeof_proc(void) 74735e6168fSJeff Roberson { 74835e6168fSJeff Roberson return (sizeof(struct proc)); 74935e6168fSJeff Roberson } 75035e6168fSJeff Roberson 75135e6168fSJeff Roberson int 75235e6168fSJeff Roberson sched_sizeof_thread(void) 75335e6168fSJeff Roberson { 75435e6168fSJeff Roberson return (sizeof(struct thread) + sizeof(struct td_sched)); 75535e6168fSJeff Roberson } 756