135e6168fSJeff Roberson /*- 235e6168fSJeff Roberson * Copyright (c) 2003, Jeffrey Roberson <jeff@freebsd.org> 335e6168fSJeff Roberson * All rights reserved. 435e6168fSJeff Roberson * 535e6168fSJeff Roberson * Redistribution and use in source and binary forms, with or without 635e6168fSJeff Roberson * modification, are permitted provided that the following conditions 735e6168fSJeff Roberson * are met: 835e6168fSJeff Roberson * 1. Redistributions of source code must retain the above copyright 935e6168fSJeff Roberson * notice unmodified, this list of conditions, and the following 1035e6168fSJeff Roberson * disclaimer. 1135e6168fSJeff Roberson * 2. Redistributions in binary form must reproduce the above copyright 1235e6168fSJeff Roberson * notice, this list of conditions and the following disclaimer in the 1335e6168fSJeff Roberson * documentation and/or other materials provided with the distribution. 1435e6168fSJeff Roberson * 1535e6168fSJeff Roberson * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 1635e6168fSJeff Roberson * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1735e6168fSJeff Roberson * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 1835e6168fSJeff Roberson * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 1935e6168fSJeff Roberson * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2035e6168fSJeff Roberson * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2135e6168fSJeff Roberson * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2235e6168fSJeff Roberson * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2335e6168fSJeff Roberson * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2435e6168fSJeff Roberson * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2535e6168fSJeff Roberson * 2635e6168fSJeff Roberson * $FreeBSD$ 2735e6168fSJeff Roberson */ 2835e6168fSJeff Roberson 2935e6168fSJeff Roberson #include <sys/param.h> 3035e6168fSJeff Roberson #include <sys/systm.h> 3135e6168fSJeff Roberson #include <sys/kernel.h> 3235e6168fSJeff Roberson #include <sys/ktr.h> 3335e6168fSJeff Roberson #include <sys/lock.h> 3435e6168fSJeff Roberson #include <sys/mutex.h> 3535e6168fSJeff Roberson #include <sys/proc.h> 36245f3abfSJeff Roberson #include <sys/resource.h> 3735e6168fSJeff Roberson #include <sys/sched.h> 3835e6168fSJeff Roberson #include <sys/smp.h> 3935e6168fSJeff Roberson #include <sys/sx.h> 4035e6168fSJeff Roberson #include <sys/sysctl.h> 4135e6168fSJeff Roberson #include <sys/sysproto.h> 4235e6168fSJeff Roberson #include <sys/vmmeter.h> 4335e6168fSJeff Roberson #ifdef DDB 4435e6168fSJeff Roberson #include <ddb/ddb.h> 4535e6168fSJeff Roberson #endif 4635e6168fSJeff Roberson #ifdef KTRACE 4735e6168fSJeff Roberson #include <sys/uio.h> 4835e6168fSJeff Roberson #include <sys/ktrace.h> 4935e6168fSJeff Roberson #endif 5035e6168fSJeff Roberson 5135e6168fSJeff Roberson #include <machine/cpu.h> 5235e6168fSJeff Roberson 5335e6168fSJeff Roberson /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 5435e6168fSJeff Roberson /* XXX This is bogus compatability crap for ps */ 5535e6168fSJeff Roberson static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 5635e6168fSJeff Roberson SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 5735e6168fSJeff Roberson 5835e6168fSJeff Roberson static void sched_setup(void *dummy); 5935e6168fSJeff Roberson SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 6035e6168fSJeff Roberson 61e1f89c22SJeff Roberson int realstathz; 62e1f89c22SJeff Roberson 63783caefbSJeff Roberson #define SCHED_STRICT_RESCHED 1 64783caefbSJeff Roberson 6535e6168fSJeff Roberson /* 6635e6168fSJeff Roberson * These datastructures are allocated within their parent datastructure but 6735e6168fSJeff Roberson * are scheduler specific. 6835e6168fSJeff Roberson */ 6935e6168fSJeff Roberson 7035e6168fSJeff Roberson struct ke_sched { 7135e6168fSJeff Roberson int ske_slice; 7235e6168fSJeff Roberson struct runq *ske_runq; 7335e6168fSJeff Roberson /* The following variables are only used for pctcpu calculation */ 7435e6168fSJeff Roberson int ske_ltick; /* Last tick that we were running on */ 7535e6168fSJeff Roberson int ske_ftick; /* First tick that we were running on */ 7635e6168fSJeff Roberson int ske_ticks; /* Tick count */ 77cd6e33dfSJeff Roberson u_char ske_cpu; 7835e6168fSJeff Roberson }; 7935e6168fSJeff Roberson #define ke_slice ke_sched->ske_slice 8035e6168fSJeff Roberson #define ke_runq ke_sched->ske_runq 8135e6168fSJeff Roberson #define ke_ltick ke_sched->ske_ltick 8235e6168fSJeff Roberson #define ke_ftick ke_sched->ske_ftick 8335e6168fSJeff Roberson #define ke_ticks ke_sched->ske_ticks 84cd6e33dfSJeff Roberson #define ke_cpu ke_sched->ske_cpu 8535e6168fSJeff Roberson 8635e6168fSJeff Roberson struct kg_sched { 87407b0157SJeff Roberson int skg_slptime; /* Number of ticks we vol. slept */ 88407b0157SJeff Roberson int skg_runtime; /* Number of ticks we were running */ 8935e6168fSJeff Roberson }; 9035e6168fSJeff Roberson #define kg_slptime kg_sched->skg_slptime 91407b0157SJeff Roberson #define kg_runtime kg_sched->skg_runtime 9235e6168fSJeff Roberson 9335e6168fSJeff Roberson struct td_sched { 9435e6168fSJeff Roberson int std_slptime; 955d7ef00cSJeff Roberson int std_schedflag; 9635e6168fSJeff Roberson }; 9735e6168fSJeff Roberson #define td_slptime td_sched->std_slptime 985d7ef00cSJeff Roberson #define td_schedflag td_sched->std_schedflag 9935e6168fSJeff Roberson 1005d7ef00cSJeff Roberson #define TD_SCHED_BLOAD 0x0001 /* 1015d7ef00cSJeff Roberson * thread was counted as being in short 1025d7ef00cSJeff Roberson * term sleep. 1035d7ef00cSJeff Roberson */ 1045d7ef00cSJeff Roberson struct td_sched td_sched; 10535e6168fSJeff Roberson struct ke_sched ke_sched; 10635e6168fSJeff Roberson struct kg_sched kg_sched; 10735e6168fSJeff Roberson 10835e6168fSJeff Roberson struct ke_sched *kse0_sched = &ke_sched; 10935e6168fSJeff Roberson struct kg_sched *ksegrp0_sched = &kg_sched; 11035e6168fSJeff Roberson struct p_sched *proc0_sched = NULL; 11135e6168fSJeff Roberson struct td_sched *thread0_sched = &td_sched; 11235e6168fSJeff Roberson 11335e6168fSJeff Roberson /* 11435e6168fSJeff Roberson * This priority range has 20 priorities on either end that are reachable 11535e6168fSJeff Roberson * only through nice values. 116e1f89c22SJeff Roberson * 117e1f89c22SJeff Roberson * PRI_RANGE: Total priority range for timeshare threads. 118e1f89c22SJeff Roberson * PRI_NRESV: Reserved priorities for nice. 119e1f89c22SJeff Roberson * PRI_BASE: The start of the dynamic range. 120e1f89c22SJeff Roberson * DYN_RANGE: Number of priorities that are available int the dynamic 121e1f89c22SJeff Roberson * priority range. 122e1f89c22SJeff Roberson * DYN_HALF: Half of DYN_RANGE for convenience elsewhere. 123e1f89c22SJeff Roberson * PRI_DYN: The dynamic priority which is derived from the number of ticks 124e1f89c22SJeff Roberson * running vs the total number of ticks. 12535e6168fSJeff Roberson */ 126407b0157SJeff Roberson #define SCHED_PRI_RANGE (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1) 127245f3abfSJeff Roberson #define SCHED_PRI_NRESV PRIO_TOTAL 12898c9b132SJeff Roberson #define SCHED_PRI_NHALF (PRIO_TOTAL / 2) 129e1f89c22SJeff Roberson #define SCHED_PRI_BASE ((SCHED_PRI_NRESV / 2) + PRI_MIN_TIMESHARE) 130e1f89c22SJeff Roberson #define SCHED_DYN_RANGE (SCHED_PRI_RANGE - SCHED_PRI_NRESV) 131e1f89c22SJeff Roberson #define SCHED_DYN_HALF (SCHED_DYN_RANGE / 2) 132e1f89c22SJeff Roberson #define SCHED_PRI_DYN(run, total) (((run) * SCHED_DYN_RANGE) / (total)) 133e1f89c22SJeff Roberson 13435e6168fSJeff Roberson 13535e6168fSJeff Roberson /* 136e1f89c22SJeff Roberson * These determine the interactivity of a process. 13735e6168fSJeff Roberson * 138407b0157SJeff Roberson * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate 139407b0157SJeff Roberson * before throttling back. 140e1f89c22SJeff Roberson * SLP_RUN_THROTTLE: Divisor for reducing slp/run time. 141e1f89c22SJeff Roberson * INTERACT_RANGE: Range of interactivity values. Smaller is better. 142e1f89c22SJeff Roberson * INTERACT_HALF: Convenience define, half of the interactivity range. 143e1f89c22SJeff Roberson * INTERACT_THRESH: Threshhold for placement on the current runq. 14435e6168fSJeff Roberson */ 1455053d272SJeff Roberson #define SCHED_SLP_RUN_MAX ((hz * 2) << 10) 146407b0157SJeff Roberson #define SCHED_SLP_RUN_THROTTLE (10) 147e1f89c22SJeff Roberson #define SCHED_INTERACT_RANGE (100) 148e1f89c22SJeff Roberson #define SCHED_INTERACT_HALF (SCHED_INTERACT_RANGE / 2) 149e1f89c22SJeff Roberson #define SCHED_INTERACT_THRESH (10) 150e1f89c22SJeff Roberson 15135e6168fSJeff Roberson /* 15235e6168fSJeff Roberson * These parameters and macros determine the size of the time slice that is 15335e6168fSJeff Roberson * granted to each thread. 15435e6168fSJeff Roberson * 15535e6168fSJeff Roberson * SLICE_MIN: Minimum time slice granted, in units of ticks. 15635e6168fSJeff Roberson * SLICE_MAX: Maximum time slice granted. 15735e6168fSJeff Roberson * SLICE_RANGE: Range of available time slices scaled by hz. 158245f3abfSJeff Roberson * SLICE_SCALE: The number slices granted per val in the range of [0, max]. 159245f3abfSJeff Roberson * SLICE_NICE: Determine the amount of slice granted to a scaled nice. 16035e6168fSJeff Roberson */ 16135e6168fSJeff Roberson #define SCHED_SLICE_MIN (hz / 100) 162e1f89c22SJeff Roberson #define SCHED_SLICE_MAX (hz / 10) 16335e6168fSJeff Roberson #define SCHED_SLICE_RANGE (SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1) 16435e6168fSJeff Roberson #define SCHED_SLICE_SCALE(val, max) (((val) * SCHED_SLICE_RANGE) / (max)) 165245f3abfSJeff Roberson #define SCHED_SLICE_NICE(nice) \ 166245f3abfSJeff Roberson (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((nice), SCHED_PRI_NHALF)) 16735e6168fSJeff Roberson 16835e6168fSJeff Roberson /* 16935e6168fSJeff Roberson * This macro determines whether or not the kse belongs on the current or 17035e6168fSJeff Roberson * next run queue. 171407b0157SJeff Roberson * 172407b0157SJeff Roberson * XXX nice value should effect how interactive a kg is. 17335e6168fSJeff Roberson */ 174e1f89c22SJeff Roberson #define SCHED_CURR(kg) (sched_interact_score(kg) < SCHED_INTERACT_THRESH) 17535e6168fSJeff Roberson 17635e6168fSJeff Roberson /* 17735e6168fSJeff Roberson * Cpu percentage computation macros and defines. 17835e6168fSJeff Roberson * 17935e6168fSJeff Roberson * SCHED_CPU_TIME: Number of seconds to average the cpu usage across. 18035e6168fSJeff Roberson * SCHED_CPU_TICKS: Number of hz ticks to average the cpu usage across. 18135e6168fSJeff Roberson */ 18235e6168fSJeff Roberson 1835053d272SJeff Roberson #define SCHED_CPU_TIME 10 18435e6168fSJeff Roberson #define SCHED_CPU_TICKS (hz * SCHED_CPU_TIME) 18535e6168fSJeff Roberson 18635e6168fSJeff Roberson /* 18735e6168fSJeff Roberson * kseq - pair of runqs per processor 18835e6168fSJeff Roberson */ 18935e6168fSJeff Roberson 19035e6168fSJeff Roberson struct kseq { 191a8949de2SJeff Roberson struct runq ksq_ithd; /* Queue of ITHD and REALTIME tds. */ 192a8949de2SJeff Roberson struct runq ksq_idle; /* Queue of IDLE threads. */ 193a8949de2SJeff Roberson struct runq ksq_runqs[2]; /* Run queues for TIMESHARE. */ 19435e6168fSJeff Roberson struct runq *ksq_curr; 19535e6168fSJeff Roberson struct runq *ksq_next; 196a8949de2SJeff Roberson int ksq_itload; /* Total runnable for ITHD. */ 197a8949de2SJeff Roberson int ksq_tsload; /* Total runnable for TIMESHARE. */ 198a8949de2SJeff Roberson int ksq_idload; /* Total runnable for IDLE. */ 1995d7ef00cSJeff Roberson #ifdef SMP 2005d7ef00cSJeff Roberson unsigned int ksq_rslices; /* Slices on run queue */ 2015d7ef00cSJeff Roberson unsigned int ksq_bload; /* Threads waiting on IO */ 2025d7ef00cSJeff Roberson #endif 20335e6168fSJeff Roberson }; 20435e6168fSJeff Roberson 20535e6168fSJeff Roberson /* 20635e6168fSJeff Roberson * One kse queue per processor. 20735e6168fSJeff Roberson */ 2080a016a05SJeff Roberson #ifdef SMP 20935e6168fSJeff Roberson struct kseq kseq_cpu[MAXCPU]; 2100a016a05SJeff Roberson #define KSEQ_SELF() (&kseq_cpu[PCPU_GET(cpuid)]) 2110a016a05SJeff Roberson #define KSEQ_CPU(x) (&kseq_cpu[(x)]) 2120a016a05SJeff Roberson #else 2130a016a05SJeff Roberson struct kseq kseq_cpu; 2140a016a05SJeff Roberson #define KSEQ_SELF() (&kseq_cpu) 2150a016a05SJeff Roberson #define KSEQ_CPU(x) (&kseq_cpu) 2160a016a05SJeff Roberson #endif 21735e6168fSJeff Roberson 218245f3abfSJeff Roberson static void sched_slice(struct kse *ke); 21935e6168fSJeff Roberson static int sched_priority(struct ksegrp *kg); 220e1f89c22SJeff Roberson static int sched_interact_score(struct ksegrp *kg); 22135e6168fSJeff Roberson void sched_pctcpu_update(struct kse *ke); 22235e6168fSJeff Roberson int sched_pickcpu(void); 22335e6168fSJeff Roberson 2245d7ef00cSJeff Roberson /* Operations on per processor queues */ 2250a016a05SJeff Roberson static struct kse * kseq_choose(struct kseq *kseq); 226245f3abfSJeff Roberson static int kseq_nice_min(struct kseq *kseq); 2270a016a05SJeff Roberson static void kseq_setup(struct kseq *kseq); 228a8949de2SJeff Roberson static void kseq_add(struct kseq *kseq, struct kse *ke); 2295d7ef00cSJeff Roberson static __inline void kseq_rem(struct kseq *kseq, struct kse *ke); 2305d7ef00cSJeff Roberson #ifdef SMP 2315d7ef00cSJeff Roberson static __inline void kseq_sleep(struct kseq *kseq, struct kse *ke); 2325d7ef00cSJeff Roberson static __inline void kseq_wakeup(struct kseq *kseq, struct kse *ke); 2335d7ef00cSJeff Roberson struct kseq * kseq_load_highest(void); 2345d7ef00cSJeff Roberson #endif 2355d7ef00cSJeff Roberson 236a8949de2SJeff Roberson static void 2375d7ef00cSJeff Roberson kseq_add(struct kseq *kseq, struct kse *ke) 2385d7ef00cSJeff Roberson { 239a8949de2SJeff Roberson struct ksegrp *kg; 240a8949de2SJeff Roberson 241a8949de2SJeff Roberson kg = ke->ke_ksegrp; 242a8949de2SJeff Roberson 243a8949de2SJeff Roberson /* 244a8949de2SJeff Roberson * Figure out what run queue we should go on and assign a slice. 245a8949de2SJeff Roberson */ 246a8949de2SJeff Roberson switch (kg->kg_pri_class) { 247a8949de2SJeff Roberson /* 248a8949de2SJeff Roberson * If we're a real-time or interrupt thread place us on the curr 249a8949de2SJeff Roberson * queue for the current processor. Hopefully this will yield the 250a8949de2SJeff Roberson * lowest latency response. 251a8949de2SJeff Roberson */ 252a8949de2SJeff Roberson case PRI_ITHD: 253a8949de2SJeff Roberson case PRI_REALTIME: 254a8949de2SJeff Roberson ke->ke_runq = &kseq->ksq_ithd; 255a8949de2SJeff Roberson ke->ke_slice = SCHED_SLICE_MAX; 256a8949de2SJeff Roberson kseq->ksq_itload++; 257a8949de2SJeff Roberson break; 258a8949de2SJeff Roberson /* 259a8949de2SJeff Roberson * Timeshare threads get placed on the appropriate queue on their 260a8949de2SJeff Roberson * bound cpu. 261a8949de2SJeff Roberson */ 262a8949de2SJeff Roberson case PRI_TIMESHARE: 263a8949de2SJeff Roberson if (ke->ke_runq == NULL) { 264a8949de2SJeff Roberson if (SCHED_CURR(kg)) 265a8949de2SJeff Roberson ke->ke_runq = kseq->ksq_curr; 266a8949de2SJeff Roberson else 267a8949de2SJeff Roberson ke->ke_runq = kseq->ksq_next; 268a8949de2SJeff Roberson } 269a8949de2SJeff Roberson if (ke->ke_slice == 0) 270a8949de2SJeff Roberson sched_slice(ke); 271a8949de2SJeff Roberson kseq->ksq_tsload++; 272a8949de2SJeff Roberson break; 273a8949de2SJeff Roberson /* 274a8949de2SJeff Roberson * Only grant PRI_IDLE processes a slice if there is nothing else 275a8949de2SJeff Roberson * running. 276a8949de2SJeff Roberson */ 277a8949de2SJeff Roberson case PRI_IDLE: 278a8949de2SJeff Roberson ke->ke_runq = &kseq->ksq_idle; 279a8949de2SJeff Roberson ke->ke_slice = SCHED_SLICE_MIN; 280a8949de2SJeff Roberson kseq->ksq_idload++; 281a8949de2SJeff Roberson break; 282a8949de2SJeff Roberson default: 283a8949de2SJeff Roberson panic("Unknown priority class.\n"); 284a8949de2SJeff Roberson break; 285a8949de2SJeff Roberson } 286a8949de2SJeff Roberson 2875d7ef00cSJeff Roberson runq_add(ke->ke_runq, ke); 2885d7ef00cSJeff Roberson #ifdef SMP 2895d7ef00cSJeff Roberson kseq->ksq_rslices += ke->ke_slice; 2905d7ef00cSJeff Roberson #endif 2915d7ef00cSJeff Roberson } 292a8949de2SJeff Roberson static void 2935d7ef00cSJeff Roberson kseq_rem(struct kseq *kseq, struct kse *ke) 2945d7ef00cSJeff Roberson { 295a8949de2SJeff Roberson struct ksegrp *kg; 296a8949de2SJeff Roberson 297a8949de2SJeff Roberson kg = ke->ke_ksegrp; 298a8949de2SJeff Roberson 299a8949de2SJeff Roberson /* 300a8949de2SJeff Roberson * XXX Consider making the load an array. 301a8949de2SJeff Roberson */ 302a8949de2SJeff Roberson switch (kg->kg_pri_class) { 303a8949de2SJeff Roberson case PRI_ITHD: 304a8949de2SJeff Roberson case PRI_REALTIME: 305a8949de2SJeff Roberson kseq->ksq_itload--; 306a8949de2SJeff Roberson break; 307a8949de2SJeff Roberson case PRI_TIMESHARE: 308a8949de2SJeff Roberson kseq->ksq_tsload--; 309a8949de2SJeff Roberson break; 310a8949de2SJeff Roberson case PRI_IDLE: 311a8949de2SJeff Roberson kseq->ksq_idload--; 312a8949de2SJeff Roberson break; 313a8949de2SJeff Roberson } 3145d7ef00cSJeff Roberson runq_remove(ke->ke_runq, ke); 3155d7ef00cSJeff Roberson #ifdef SMP 3165d7ef00cSJeff Roberson kseq->ksq_rslices -= ke->ke_slice; 3175d7ef00cSJeff Roberson #endif 3185d7ef00cSJeff Roberson } 3195d7ef00cSJeff Roberson 3205d7ef00cSJeff Roberson #ifdef SMP 3215d7ef00cSJeff Roberson static __inline void 3225d7ef00cSJeff Roberson kseq_sleep(struct kseq *kseq, struct kse *ke) 3235d7ef00cSJeff Roberson { 3245d7ef00cSJeff Roberson kseq->ksq_bload++; 3255d7ef00cSJeff Roberson } 3265d7ef00cSJeff Roberson 3275d7ef00cSJeff Roberson static __inline void 3285d7ef00cSJeff Roberson kseq_wakeup(struct kseq *kseq, struct kse *ke) 3295d7ef00cSJeff Roberson { 3305d7ef00cSJeff Roberson kseq->ksq_bload--; 3315d7ef00cSJeff Roberson } 3325d7ef00cSJeff Roberson 3335d7ef00cSJeff Roberson struct kseq * 3345d7ef00cSJeff Roberson kseq_load_highest(void) 3355d7ef00cSJeff Roberson { 3365d7ef00cSJeff Roberson struct kseq *kseq; 3375d7ef00cSJeff Roberson int load; 3385d7ef00cSJeff Roberson int cpu; 3395d7ef00cSJeff Roberson int i; 3405d7ef00cSJeff Roberson 3415d7ef00cSJeff Roberson cpu = 0; 3425d7ef00cSJeff Roberson load = 0; 3435d7ef00cSJeff Roberson 3445d7ef00cSJeff Roberson for (i = 0; i < mp_maxid; i++) { 3455d7ef00cSJeff Roberson if (CPU_ABSENT(i)) 3465d7ef00cSJeff Roberson continue; 3475d7ef00cSJeff Roberson kseq = KSEQ_CPU(i); 348a8949de2SJeff Roberson if (kseq->ksq_tsload > load) { 349a8949de2SJeff Roberson load = kseq->ksq_tsload; 3505d7ef00cSJeff Roberson cpu = i; 3515d7ef00cSJeff Roberson } 3525d7ef00cSJeff Roberson } 3535d7ef00cSJeff Roberson if (load) 3545d7ef00cSJeff Roberson return (KSEQ_CPU(cpu)); 3555d7ef00cSJeff Roberson 3565d7ef00cSJeff Roberson return (NULL); 3575d7ef00cSJeff Roberson } 3585d7ef00cSJeff Roberson #endif 3595d7ef00cSJeff Roberson 3605d7ef00cSJeff Roberson struct kse * 3615d7ef00cSJeff Roberson kseq_choose(struct kseq *kseq) 3625d7ef00cSJeff Roberson { 3635d7ef00cSJeff Roberson struct kse *ke; 3645d7ef00cSJeff Roberson struct runq *swap; 3655d7ef00cSJeff Roberson 366a8949de2SJeff Roberson if (kseq->ksq_itload) 367a8949de2SJeff Roberson return (runq_choose(&kseq->ksq_ithd)); 368a8949de2SJeff Roberson 369a8949de2SJeff Roberson if (kseq->ksq_tsload) { 370a8949de2SJeff Roberson if ((ke = runq_choose(kseq->ksq_curr)) != NULL) 371a8949de2SJeff Roberson return (ke); 372a8949de2SJeff Roberson 3735d7ef00cSJeff Roberson swap = kseq->ksq_curr; 3745d7ef00cSJeff Roberson kseq->ksq_curr = kseq->ksq_next; 3755d7ef00cSJeff Roberson kseq->ksq_next = swap; 3765d7ef00cSJeff Roberson 377a8949de2SJeff Roberson return (runq_choose(kseq->ksq_curr)); 378a8949de2SJeff Roberson } 379a8949de2SJeff Roberson if (kseq->ksq_idload) 380a8949de2SJeff Roberson return (runq_choose(&kseq->ksq_idle)); 381a8949de2SJeff Roberson 382a8949de2SJeff Roberson return (NULL); 3835d7ef00cSJeff Roberson } 3845d7ef00cSJeff Roberson 385245f3abfSJeff Roberson static int 386245f3abfSJeff Roberson kseq_nice_min(struct kseq *kseq) 387245f3abfSJeff Roberson { 388245f3abfSJeff Roberson struct kse *ke0; 389245f3abfSJeff Roberson struct kse *ke1; 390245f3abfSJeff Roberson 391a8949de2SJeff Roberson if (kseq->ksq_tsload == 0) 392245f3abfSJeff Roberson return (0); 393245f3abfSJeff Roberson 394245f3abfSJeff Roberson ke0 = runq_choose(kseq->ksq_curr); 395245f3abfSJeff Roberson ke1 = runq_choose(kseq->ksq_next); 396245f3abfSJeff Roberson 397245f3abfSJeff Roberson if (ke0 == NULL) 398245f3abfSJeff Roberson return (ke1->ke_ksegrp->kg_nice); 399245f3abfSJeff Roberson 400245f3abfSJeff Roberson if (ke1 == NULL) 401245f3abfSJeff Roberson return (ke0->ke_ksegrp->kg_nice); 402245f3abfSJeff Roberson 403245f3abfSJeff Roberson return (min(ke0->ke_ksegrp->kg_nice, ke1->ke_ksegrp->kg_nice)); 404245f3abfSJeff Roberson } 4050a016a05SJeff Roberson 4060a016a05SJeff Roberson static void 4070a016a05SJeff Roberson kseq_setup(struct kseq *kseq) 4080a016a05SJeff Roberson { 4090a016a05SJeff Roberson kseq->ksq_curr = &kseq->ksq_runqs[0]; 4100a016a05SJeff Roberson kseq->ksq_next = &kseq->ksq_runqs[1]; 411a8949de2SJeff Roberson runq_init(&kseq->ksq_ithd); 4120a016a05SJeff Roberson runq_init(kseq->ksq_curr); 4130a016a05SJeff Roberson runq_init(kseq->ksq_next); 414a8949de2SJeff Roberson runq_init(&kseq->ksq_idle); 415a8949de2SJeff Roberson kseq->ksq_itload = 0; 416a8949de2SJeff Roberson kseq->ksq_tsload = 0; 417a8949de2SJeff Roberson kseq->ksq_idload = 0; 4185d7ef00cSJeff Roberson #ifdef SMP 4195d7ef00cSJeff Roberson kseq->ksq_rslices = 0; 4205d7ef00cSJeff Roberson kseq->ksq_bload = 0; 4215d7ef00cSJeff Roberson #endif 4220a016a05SJeff Roberson } 4230a016a05SJeff Roberson 42435e6168fSJeff Roberson static void 42535e6168fSJeff Roberson sched_setup(void *dummy) 42635e6168fSJeff Roberson { 42735e6168fSJeff Roberson int i; 42835e6168fSJeff Roberson 429e1f89c22SJeff Roberson realstathz = stathz ? stathz : hz; 430e1f89c22SJeff Roberson 43135e6168fSJeff Roberson mtx_lock_spin(&sched_lock); 43235e6168fSJeff Roberson /* init kseqs */ 4330a016a05SJeff Roberson for (i = 0; i < MAXCPU; i++) 4340a016a05SJeff Roberson kseq_setup(KSEQ_CPU(i)); 43535e6168fSJeff Roberson mtx_unlock_spin(&sched_lock); 43635e6168fSJeff Roberson } 43735e6168fSJeff Roberson 43835e6168fSJeff Roberson /* 43935e6168fSJeff Roberson * Scale the scheduling priority according to the "interactivity" of this 44035e6168fSJeff Roberson * process. 44135e6168fSJeff Roberson */ 44235e6168fSJeff Roberson static int 44335e6168fSJeff Roberson sched_priority(struct ksegrp *kg) 44435e6168fSJeff Roberson { 44535e6168fSJeff Roberson int pri; 44635e6168fSJeff Roberson 44735e6168fSJeff Roberson if (kg->kg_pri_class != PRI_TIMESHARE) 44835e6168fSJeff Roberson return (kg->kg_user_pri); 44935e6168fSJeff Roberson 450e1f89c22SJeff Roberson pri = sched_interact_score(kg) * SCHED_DYN_RANGE / SCHED_INTERACT_RANGE; 451e1f89c22SJeff Roberson pri += SCHED_PRI_BASE; 45235e6168fSJeff Roberson pri += kg->kg_nice; 45335e6168fSJeff Roberson 45435e6168fSJeff Roberson if (pri > PRI_MAX_TIMESHARE) 45535e6168fSJeff Roberson pri = PRI_MAX_TIMESHARE; 45635e6168fSJeff Roberson else if (pri < PRI_MIN_TIMESHARE) 45735e6168fSJeff Roberson pri = PRI_MIN_TIMESHARE; 45835e6168fSJeff Roberson 45935e6168fSJeff Roberson kg->kg_user_pri = pri; 46035e6168fSJeff Roberson 46135e6168fSJeff Roberson return (kg->kg_user_pri); 46235e6168fSJeff Roberson } 46335e6168fSJeff Roberson 46435e6168fSJeff Roberson /* 465245f3abfSJeff Roberson * Calculate a time slice based on the properties of the kseg and the runq 466a8949de2SJeff Roberson * that we're on. This is only for PRI_TIMESHARE ksegrps. 46735e6168fSJeff Roberson */ 468245f3abfSJeff Roberson static void 469245f3abfSJeff Roberson sched_slice(struct kse *ke) 47035e6168fSJeff Roberson { 471245f3abfSJeff Roberson struct ksegrp *kg; 47235e6168fSJeff Roberson 473245f3abfSJeff Roberson kg = ke->ke_ksegrp; 47435e6168fSJeff Roberson 475245f3abfSJeff Roberson /* 476245f3abfSJeff Roberson * Rationale: 477245f3abfSJeff Roberson * KSEs in interactive ksegs get the minimum slice so that we 478245f3abfSJeff Roberson * quickly notice if it abuses its advantage. 479245f3abfSJeff Roberson * 480245f3abfSJeff Roberson * KSEs in non-interactive ksegs are assigned a slice that is 481245f3abfSJeff Roberson * based on the ksegs nice value relative to the least nice kseg 482245f3abfSJeff Roberson * on the run queue for this cpu. 483245f3abfSJeff Roberson * 484245f3abfSJeff Roberson * If the KSE is less nice than all others it gets the maximum 485245f3abfSJeff Roberson * slice and other KSEs will adjust their slice relative to 486245f3abfSJeff Roberson * this when they first expire. 487245f3abfSJeff Roberson * 488245f3abfSJeff Roberson * There is 20 point window that starts relative to the least 489245f3abfSJeff Roberson * nice kse on the run queue. Slice size is determined by 490245f3abfSJeff Roberson * the kse distance from the last nice ksegrp. 491245f3abfSJeff Roberson * 492245f3abfSJeff Roberson * If you are outside of the window you will get no slice and 493245f3abfSJeff Roberson * you will be reevaluated each time you are selected on the 494245f3abfSJeff Roberson * run queue. 495245f3abfSJeff Roberson * 496245f3abfSJeff Roberson */ 497245f3abfSJeff Roberson 498245f3abfSJeff Roberson if (!SCHED_CURR(kg)) { 499245f3abfSJeff Roberson struct kseq *kseq; 500245f3abfSJeff Roberson int nice_base; 501245f3abfSJeff Roberson int nice; 502245f3abfSJeff Roberson 503245f3abfSJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 504245f3abfSJeff Roberson nice_base = kseq_nice_min(kseq); 505245f3abfSJeff Roberson nice = kg->kg_nice + (0 - nice_base); 506245f3abfSJeff Roberson 507a8949de2SJeff Roberson if (kseq->ksq_tsload == 0 || kg->kg_nice < nice_base) 508245f3abfSJeff Roberson ke->ke_slice = SCHED_SLICE_MAX; 509245f3abfSJeff Roberson else if (nice <= SCHED_PRI_NHALF) 510245f3abfSJeff Roberson ke->ke_slice = SCHED_SLICE_NICE(nice); 511245f3abfSJeff Roberson else 512245f3abfSJeff Roberson ke->ke_slice = 0; 513245f3abfSJeff Roberson } else 514245f3abfSJeff Roberson ke->ke_slice = SCHED_SLICE_MIN; 51535e6168fSJeff Roberson 516407b0157SJeff Roberson /* 517a8949de2SJeff Roberson * Check to see if we need to scale back the slp and run time 518a8949de2SJeff Roberson * in the kg. This will cause us to forget old interactivity 519a8949de2SJeff Roberson * while maintaining the current ratio. 520407b0157SJeff Roberson */ 521407b0157SJeff Roberson if ((kg->kg_runtime + kg->kg_slptime) > SCHED_SLP_RUN_MAX) { 522407b0157SJeff Roberson kg->kg_runtime /= SCHED_SLP_RUN_THROTTLE; 523407b0157SJeff Roberson kg->kg_slptime /= SCHED_SLP_RUN_THROTTLE; 524407b0157SJeff Roberson } 525407b0157SJeff Roberson 526245f3abfSJeff Roberson return; 52735e6168fSJeff Roberson } 52835e6168fSJeff Roberson 529e1f89c22SJeff Roberson static int 530e1f89c22SJeff Roberson sched_interact_score(struct ksegrp *kg) 531e1f89c22SJeff Roberson { 532e1f89c22SJeff Roberson int big; 533e1f89c22SJeff Roberson int small; 534e1f89c22SJeff Roberson int base; 535e1f89c22SJeff Roberson 536e1f89c22SJeff Roberson if (kg->kg_runtime > kg->kg_slptime) { 537e1f89c22SJeff Roberson big = kg->kg_runtime; 538e1f89c22SJeff Roberson small = kg->kg_slptime; 539e1f89c22SJeff Roberson base = SCHED_INTERACT_HALF; 540e1f89c22SJeff Roberson } else { 541e1f89c22SJeff Roberson big = kg->kg_slptime; 542e1f89c22SJeff Roberson small = kg->kg_runtime; 543e1f89c22SJeff Roberson base = 0; 544e1f89c22SJeff Roberson } 545e1f89c22SJeff Roberson 546e1f89c22SJeff Roberson big /= SCHED_INTERACT_HALF; 547e1f89c22SJeff Roberson if (big != 0) 548e1f89c22SJeff Roberson small /= big; 549e1f89c22SJeff Roberson else 550e1f89c22SJeff Roberson small = 0; 551e1f89c22SJeff Roberson 552e1f89c22SJeff Roberson small += base; 553e1f89c22SJeff Roberson /* XXX Factor in nice */ 554e1f89c22SJeff Roberson return (small); 555e1f89c22SJeff Roberson } 556e1f89c22SJeff Roberson 55735e6168fSJeff Roberson int 55835e6168fSJeff Roberson sched_rr_interval(void) 55935e6168fSJeff Roberson { 56035e6168fSJeff Roberson return (SCHED_SLICE_MAX); 56135e6168fSJeff Roberson } 56235e6168fSJeff Roberson 56335e6168fSJeff Roberson void 56435e6168fSJeff Roberson sched_pctcpu_update(struct kse *ke) 56535e6168fSJeff Roberson { 56635e6168fSJeff Roberson /* 56735e6168fSJeff Roberson * Adjust counters and watermark for pctcpu calc. 56835e6168fSJeff Roberson */ 56965c8760dSJeff Roberson /* 57065c8760dSJeff Roberson * Shift the tick count out so that the divide doesn't round away 57165c8760dSJeff Roberson * our results. 57265c8760dSJeff Roberson */ 57365c8760dSJeff Roberson ke->ke_ticks <<= 10; 57435e6168fSJeff Roberson ke->ke_ticks = (ke->ke_ticks / (ke->ke_ltick - ke->ke_ftick)) * 57535e6168fSJeff Roberson SCHED_CPU_TICKS; 57665c8760dSJeff Roberson ke->ke_ticks >>= 10; 57735e6168fSJeff Roberson ke->ke_ltick = ticks; 57835e6168fSJeff Roberson ke->ke_ftick = ke->ke_ltick - SCHED_CPU_TICKS; 57935e6168fSJeff Roberson } 58035e6168fSJeff Roberson 58135e6168fSJeff Roberson #ifdef SMP 5825d7ef00cSJeff Roberson /* XXX Should be changed to kseq_load_lowest() */ 58335e6168fSJeff Roberson int 58435e6168fSJeff Roberson sched_pickcpu(void) 58535e6168fSJeff Roberson { 5860a016a05SJeff Roberson struct kseq *kseq; 58735e6168fSJeff Roberson int load; 5880a016a05SJeff Roberson int cpu; 58935e6168fSJeff Roberson int i; 59035e6168fSJeff Roberson 59135e6168fSJeff Roberson if (!smp_started) 59235e6168fSJeff Roberson return (0); 59335e6168fSJeff Roberson 5940a016a05SJeff Roberson load = 0; 5950a016a05SJeff Roberson cpu = 0; 59635e6168fSJeff Roberson 59735e6168fSJeff Roberson for (i = 0; i < mp_maxid; i++) { 59835e6168fSJeff Roberson if (CPU_ABSENT(i)) 59935e6168fSJeff Roberson continue; 6000a016a05SJeff Roberson kseq = KSEQ_CPU(i); 601a8949de2SJeff Roberson if (kseq->ksq_tsload < load) { 60235e6168fSJeff Roberson cpu = i; 603a8949de2SJeff Roberson load = kseq->ksq_tsload; 60435e6168fSJeff Roberson } 60535e6168fSJeff Roberson } 60635e6168fSJeff Roberson 60735e6168fSJeff Roberson CTR1(KTR_RUNQ, "sched_pickcpu: %d", cpu); 60835e6168fSJeff Roberson return (cpu); 60935e6168fSJeff Roberson } 61035e6168fSJeff Roberson #else 61135e6168fSJeff Roberson int 61235e6168fSJeff Roberson sched_pickcpu(void) 61335e6168fSJeff Roberson { 61435e6168fSJeff Roberson return (0); 61535e6168fSJeff Roberson } 61635e6168fSJeff Roberson #endif 61735e6168fSJeff Roberson 61835e6168fSJeff Roberson void 61935e6168fSJeff Roberson sched_prio(struct thread *td, u_char prio) 62035e6168fSJeff Roberson { 62135e6168fSJeff Roberson struct kse *ke; 62235e6168fSJeff Roberson struct runq *rq; 62335e6168fSJeff Roberson 62435e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 62535e6168fSJeff Roberson ke = td->td_kse; 62635e6168fSJeff Roberson td->td_priority = prio; 62735e6168fSJeff Roberson 62835e6168fSJeff Roberson if (TD_ON_RUNQ(td)) { 62935e6168fSJeff Roberson rq = ke->ke_runq; 63035e6168fSJeff Roberson 63135e6168fSJeff Roberson runq_remove(rq, ke); 63235e6168fSJeff Roberson runq_add(rq, ke); 63335e6168fSJeff Roberson } 63435e6168fSJeff Roberson } 63535e6168fSJeff Roberson 63635e6168fSJeff Roberson void 63735e6168fSJeff Roberson sched_switchout(struct thread *td) 63835e6168fSJeff Roberson { 63935e6168fSJeff Roberson struct kse *ke; 64035e6168fSJeff Roberson 64135e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 64235e6168fSJeff Roberson 64335e6168fSJeff Roberson ke = td->td_kse; 64435e6168fSJeff Roberson 64535e6168fSJeff Roberson td->td_last_kse = ke; 64635e6168fSJeff Roberson td->td_lastcpu = ke->ke_oncpu; 647cd6e33dfSJeff Roberson ke->ke_oncpu = NOCPU; 6484a338afdSJulian Elischer td->td_flags &= ~TDF_NEEDRESCHED; 64935e6168fSJeff Roberson 65035e6168fSJeff Roberson if (TD_IS_RUNNING(td)) { 65135e6168fSJeff Roberson setrunqueue(td); 65235e6168fSJeff Roberson return; 653e1f89c22SJeff Roberson } 65435e6168fSJeff Roberson td->td_kse->ke_runq = NULL; 65535e6168fSJeff Roberson 65635e6168fSJeff Roberson /* 65735e6168fSJeff Roberson * We will not be on the run queue. So we must be 65835e6168fSJeff Roberson * sleeping or similar. 65935e6168fSJeff Roberson */ 660ac2e4153SJulian Elischer if (td->td_proc->p_flag & P_THREADED) 66135e6168fSJeff Roberson kse_reassign(ke); 66235e6168fSJeff Roberson } 66335e6168fSJeff Roberson 66435e6168fSJeff Roberson void 66535e6168fSJeff Roberson sched_switchin(struct thread *td) 66635e6168fSJeff Roberson { 66735e6168fSJeff Roberson /* struct kse *ke = td->td_kse; */ 66835e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 66935e6168fSJeff Roberson 670cd6e33dfSJeff Roberson td->td_kse->ke_oncpu = PCPU_GET(cpuid); 6715d7ef00cSJeff Roberson #if SCHED_STRICT_RESCHED 67235e6168fSJeff Roberson if (td->td_ksegrp->kg_pri_class == PRI_TIMESHARE && 67335e6168fSJeff Roberson td->td_priority != td->td_ksegrp->kg_user_pri) 6744a338afdSJulian Elischer curthread->td_flags |= TDF_NEEDRESCHED; 6755d7ef00cSJeff Roberson #endif 67635e6168fSJeff Roberson } 67735e6168fSJeff Roberson 67835e6168fSJeff Roberson void 67935e6168fSJeff Roberson sched_nice(struct ksegrp *kg, int nice) 68035e6168fSJeff Roberson { 68135e6168fSJeff Roberson struct thread *td; 68235e6168fSJeff Roberson 68335e6168fSJeff Roberson kg->kg_nice = nice; 68435e6168fSJeff Roberson sched_priority(kg); 68535e6168fSJeff Roberson FOREACH_THREAD_IN_GROUP(kg, td) { 6864a338afdSJulian Elischer td->td_flags |= TDF_NEEDRESCHED; 68735e6168fSJeff Roberson } 68835e6168fSJeff Roberson } 68935e6168fSJeff Roberson 69035e6168fSJeff Roberson void 69135e6168fSJeff Roberson sched_sleep(struct thread *td, u_char prio) 69235e6168fSJeff Roberson { 69335e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 69435e6168fSJeff Roberson 69535e6168fSJeff Roberson td->td_slptime = ticks; 69635e6168fSJeff Roberson td->td_priority = prio; 69735e6168fSJeff Roberson 6985d7ef00cSJeff Roberson #ifdef SMP 6995d7ef00cSJeff Roberson if (td->td_priority < PZERO) { 7005d7ef00cSJeff Roberson kseq_sleep(KSEQ_CPU(td->td_kse->ke_cpu), td->td_kse); 7015d7ef00cSJeff Roberson td->td_schedflag |= TD_SCHED_BLOAD; 7025d7ef00cSJeff Roberson } 7030a016a05SJeff Roberson #endif 70435e6168fSJeff Roberson } 70535e6168fSJeff Roberson 70635e6168fSJeff Roberson void 70735e6168fSJeff Roberson sched_wakeup(struct thread *td) 70835e6168fSJeff Roberson { 70935e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 71035e6168fSJeff Roberson 71135e6168fSJeff Roberson /* 71235e6168fSJeff Roberson * Let the kseg know how long we slept for. This is because process 71335e6168fSJeff Roberson * interactivity behavior is modeled in the kseg. 71435e6168fSJeff Roberson */ 71535e6168fSJeff Roberson if (td->td_slptime) { 716f1e8dc4aSJeff Roberson struct ksegrp *kg; 717f1e8dc4aSJeff Roberson 718f1e8dc4aSJeff Roberson kg = td->td_ksegrp; 719e1f89c22SJeff Roberson kg->kg_slptime += (ticks - td->td_slptime) << 10; 720f1e8dc4aSJeff Roberson sched_priority(kg); 72135e6168fSJeff Roberson td->td_slptime = 0; 722f1e8dc4aSJeff Roberson } 7235d7ef00cSJeff Roberson #ifdef SMP 7245d7ef00cSJeff Roberson if (td->td_priority < PZERO && td->td_schedflag & TD_SCHED_BLOAD) { 7255d7ef00cSJeff Roberson kseq_wakeup(KSEQ_CPU(td->td_kse->ke_cpu), td->td_kse); 7265d7ef00cSJeff Roberson td->td_schedflag &= ~TD_SCHED_BLOAD; 7275d7ef00cSJeff Roberson } 7280a016a05SJeff Roberson #endif 72935e6168fSJeff Roberson setrunqueue(td); 7305d7ef00cSJeff Roberson #if SCHED_STRICT_RESCHED 73135e6168fSJeff Roberson if (td->td_priority < curthread->td_priority) 7324a338afdSJulian Elischer curthread->td_flags |= TDF_NEEDRESCHED; 7335d7ef00cSJeff Roberson #endif 73435e6168fSJeff Roberson } 73535e6168fSJeff Roberson 73635e6168fSJeff Roberson /* 73735e6168fSJeff Roberson * Penalize the parent for creating a new child and initialize the child's 73835e6168fSJeff Roberson * priority. 73935e6168fSJeff Roberson */ 74035e6168fSJeff Roberson void 74135e6168fSJeff Roberson sched_fork(struct ksegrp *kg, struct ksegrp *child) 74235e6168fSJeff Roberson { 74335e6168fSJeff Roberson struct kse *ckse; 74435e6168fSJeff Roberson struct kse *pkse; 74535e6168fSJeff Roberson 74635e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 74735e6168fSJeff Roberson ckse = FIRST_KSE_IN_KSEGRP(child); 74835e6168fSJeff Roberson pkse = FIRST_KSE_IN_KSEGRP(kg); 74935e6168fSJeff Roberson 75035e6168fSJeff Roberson /* XXX Need something better here */ 751407b0157SJeff Roberson if (kg->kg_slptime > kg->kg_runtime) { 752e1f89c22SJeff Roberson child->kg_slptime = SCHED_DYN_RANGE; 753e1f89c22SJeff Roberson child->kg_runtime = kg->kg_slptime / SCHED_DYN_RANGE; 754407b0157SJeff Roberson } else { 755e1f89c22SJeff Roberson child->kg_runtime = SCHED_DYN_RANGE; 756e1f89c22SJeff Roberson child->kg_slptime = kg->kg_runtime / SCHED_DYN_RANGE; 757407b0157SJeff Roberson } 758407b0157SJeff Roberson #if 0 75935e6168fSJeff Roberson child->kg_slptime = kg->kg_slptime; 760407b0157SJeff Roberson child->kg_runtime = kg->kg_runtime; 761407b0157SJeff Roberson #endif 76235e6168fSJeff Roberson child->kg_user_pri = kg->kg_user_pri; 76335e6168fSJeff Roberson 764407b0157SJeff Roberson #if 0 765cd6e33dfSJeff Roberson if (pkse->ke_cpu != PCPU_GET(cpuid)) { 766cd6e33dfSJeff Roberson printf("pkse->ke_cpu = %d\n", pkse->ke_cpu); 767c9f25d8fSJeff Roberson printf("cpuid = %d", PCPU_GET(cpuid)); 768c9f25d8fSJeff Roberson Debugger("stop"); 769c9f25d8fSJeff Roberson } 770407b0157SJeff Roberson #endif 771c9f25d8fSJeff Roberson 77235e6168fSJeff Roberson ckse->ke_slice = pkse->ke_slice; 773cd6e33dfSJeff Roberson ckse->ke_cpu = pkse->ke_cpu; /* sched_pickcpu(); */ 77435e6168fSJeff Roberson ckse->ke_runq = NULL; 77535e6168fSJeff Roberson /* 77635e6168fSJeff Roberson * Claim that we've been running for one second for statistical 77735e6168fSJeff Roberson * purposes. 77835e6168fSJeff Roberson */ 77935e6168fSJeff Roberson ckse->ke_ticks = 0; 78035e6168fSJeff Roberson ckse->ke_ltick = ticks; 78135e6168fSJeff Roberson ckse->ke_ftick = ticks - hz; 78235e6168fSJeff Roberson } 78335e6168fSJeff Roberson 78435e6168fSJeff Roberson /* 78535e6168fSJeff Roberson * Return some of the child's priority and interactivity to the parent. 78635e6168fSJeff Roberson */ 78735e6168fSJeff Roberson void 78835e6168fSJeff Roberson sched_exit(struct ksegrp *kg, struct ksegrp *child) 78935e6168fSJeff Roberson { 79035e6168fSJeff Roberson /* XXX Need something better here */ 79135e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 792a8949de2SJeff Roberson #if 0 79335e6168fSJeff Roberson kg->kg_slptime = child->kg_slptime; 794407b0157SJeff Roberson kg->kg_runtime = child->kg_runtime; 79535e6168fSJeff Roberson sched_priority(kg); 796a8949de2SJeff Roberson #endif 79735e6168fSJeff Roberson } 79835e6168fSJeff Roberson 79935e6168fSJeff Roberson void 80035e6168fSJeff Roberson sched_clock(struct thread *td) 80135e6168fSJeff Roberson { 80235e6168fSJeff Roberson struct kse *ke; 8035d7ef00cSJeff Roberson #if SCHED_STRICT_RESCHED 80435e6168fSJeff Roberson struct kse *nke; 80535e6168fSJeff Roberson struct kseq *kseq; 8065d7ef00cSJeff Roberson #endif 8070a016a05SJeff Roberson struct ksegrp *kg; 80835e6168fSJeff Roberson 80935e6168fSJeff Roberson 81035e6168fSJeff Roberson ke = td->td_kse; 81135e6168fSJeff Roberson kg = td->td_ksegrp; 81235e6168fSJeff Roberson 8130a016a05SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 8140a016a05SJeff Roberson KASSERT((td != NULL), ("schedclock: null thread pointer")); 8150a016a05SJeff Roberson 8160a016a05SJeff Roberson /* Adjust ticks for pctcpu */ 81765c8760dSJeff Roberson ke->ke_ticks++; 818d465fb95SJeff Roberson ke->ke_ltick = ticks; 819a8949de2SJeff Roberson 820d465fb95SJeff Roberson /* Go up to one second beyond our max and then trim back down */ 821d465fb95SJeff Roberson if (ke->ke_ftick + SCHED_CPU_TICKS + hz < ke->ke_ltick) 822d465fb95SJeff Roberson sched_pctcpu_update(ke); 823d465fb95SJeff Roberson 8240a016a05SJeff Roberson if (td->td_kse->ke_flags & KEF_IDLEKSE) 82535e6168fSJeff Roberson return; 8260a016a05SJeff Roberson 8270a016a05SJeff Roberson /* 8280a016a05SJeff Roberson * Check for a higher priority task on the run queue. This can happen 8290a016a05SJeff Roberson * on SMP if another processor woke up a process on our runq. 8300a016a05SJeff Roberson */ 8315d7ef00cSJeff Roberson #if SCHED_STRICT_RESCHED 8320a016a05SJeff Roberson kseq = KSEQ_SELF(); 833c9f25d8fSJeff Roberson nke = runq_choose(kseq->ksq_curr); 834c9f25d8fSJeff Roberson 83535e6168fSJeff Roberson if (nke && nke->ke_thread && 8360a016a05SJeff Roberson nke->ke_thread->td_priority < td->td_priority) 8374a338afdSJulian Elischer td->td_flags |= TDF_NEEDRESCHED; 8385d7ef00cSJeff Roberson #endif 83935e6168fSJeff Roberson /* 840a8949de2SJeff Roberson * We only do slicing code for TIMESHARE ksegrps. 841a8949de2SJeff Roberson */ 842a8949de2SJeff Roberson if (kg->kg_pri_class != PRI_TIMESHARE) 843a8949de2SJeff Roberson return; 844a8949de2SJeff Roberson /* 845407b0157SJeff Roberson * We used a tick charge it to the ksegrp so that we can compute our 84635e6168fSJeff Roberson * "interactivity". 84735e6168fSJeff Roberson */ 848e1f89c22SJeff Roberson kg->kg_runtime += 1 << 10; 849407b0157SJeff Roberson 85035e6168fSJeff Roberson /* 85135e6168fSJeff Roberson * We used up one time slice. 85235e6168fSJeff Roberson */ 85335e6168fSJeff Roberson ke->ke_slice--; 85435e6168fSJeff Roberson /* 855a8949de2SJeff Roberson * We're out of time, recompute priorities and requeue. We'll get a 856a8949de2SJeff Roberson * new slice when we're put back on the run queue. 85735e6168fSJeff Roberson */ 858e1f89c22SJeff Roberson if (ke->ke_slice <= 0) { 859e1f89c22SJeff Roberson sched_priority(kg); 8604a338afdSJulian Elischer td->td_flags |= TDF_NEEDRESCHED; 86135e6168fSJeff Roberson ke->ke_runq = NULL; 86235e6168fSJeff Roberson } 86335e6168fSJeff Roberson } 86435e6168fSJeff Roberson 86535e6168fSJeff Roberson int 86635e6168fSJeff Roberson sched_runnable(void) 86735e6168fSJeff Roberson { 86835e6168fSJeff Roberson struct kseq *kseq; 86935e6168fSJeff Roberson 8700a016a05SJeff Roberson kseq = KSEQ_SELF(); 87135e6168fSJeff Roberson 872a8949de2SJeff Roberson if (kseq->ksq_tsload || kseq->ksq_idload || kseq->ksq_itload) 873c9f25d8fSJeff Roberson return (1); 874c9f25d8fSJeff Roberson #ifdef SMP 8750a016a05SJeff Roberson /* 8760a016a05SJeff Roberson * For SMP we may steal other processor's KSEs. Just search until we 8770a016a05SJeff Roberson * verify that at least on other cpu has a runnable task. 8780a016a05SJeff Roberson */ 879c9f25d8fSJeff Roberson if (smp_started) { 880c9f25d8fSJeff Roberson int i; 881c9f25d8fSJeff Roberson 8825d7ef00cSJeff Roberson #if 0 8835d7ef00cSJeff Roberson if (kseq->ksq_bload) 8845d7ef00cSJeff Roberson return (0); 8855d7ef00cSJeff Roberson #endif 8865d7ef00cSJeff Roberson 887c9f25d8fSJeff Roberson for (i = 0; i < mp_maxid; i++) { 888c9f25d8fSJeff Roberson if (CPU_ABSENT(i)) 889c9f25d8fSJeff Roberson continue; 8900a016a05SJeff Roberson kseq = KSEQ_CPU(i); 891a8949de2SJeff Roberson if (kseq->ksq_tsload) 892c9f25d8fSJeff Roberson return (1); 893c9f25d8fSJeff Roberson } 894c9f25d8fSJeff Roberson } 895c9f25d8fSJeff Roberson #endif 896c9f25d8fSJeff Roberson return (0); 89735e6168fSJeff Roberson } 89835e6168fSJeff Roberson 89935e6168fSJeff Roberson void 90035e6168fSJeff Roberson sched_userret(struct thread *td) 90135e6168fSJeff Roberson { 90235e6168fSJeff Roberson struct ksegrp *kg; 90335e6168fSJeff Roberson 90435e6168fSJeff Roberson kg = td->td_ksegrp; 90535e6168fSJeff Roberson 90635e6168fSJeff Roberson if (td->td_priority != kg->kg_user_pri) { 90735e6168fSJeff Roberson mtx_lock_spin(&sched_lock); 90835e6168fSJeff Roberson td->td_priority = kg->kg_user_pri; 90935e6168fSJeff Roberson mtx_unlock_spin(&sched_lock); 91035e6168fSJeff Roberson } 91135e6168fSJeff Roberson } 91235e6168fSJeff Roberson 913c9f25d8fSJeff Roberson struct kse * 914c9f25d8fSJeff Roberson sched_choose(void) 915c9f25d8fSJeff Roberson { 9160a016a05SJeff Roberson struct kseq *kseq; 917c9f25d8fSJeff Roberson struct kse *ke; 918c9f25d8fSJeff Roberson 9190a016a05SJeff Roberson kseq = KSEQ_SELF(); 920245f3abfSJeff Roberson retry: 9210a016a05SJeff Roberson ke = kseq_choose(kseq); 922c9f25d8fSJeff Roberson 92335e6168fSJeff Roberson if (ke) { 92435e6168fSJeff Roberson ke->ke_state = KES_THREAD; 9255d7ef00cSJeff Roberson kseq_rem(kseq, ke); 926245f3abfSJeff Roberson 927245f3abfSJeff Roberson /* 928245f3abfSJeff Roberson * If we dequeue a kse with a slice of zero it was below the 929a8949de2SJeff Roberson * nice threshold to acquire a slice. Force it on to the 930a8949de2SJeff Roberson * next run queue and let kseq_add() pick a new slice. 931a8949de2SJeff Roberson * 932a8949de2SJeff Roberson * XXX This code should live in a TIMESHARE specific section. 933245f3abfSJeff Roberson */ 934245f3abfSJeff Roberson if (ke->ke_slice == 0) { 935245f3abfSJeff Roberson ke->ke_runq = kseq->ksq_next; 936245f3abfSJeff Roberson kseq_add(kseq, ke); 937245f3abfSJeff Roberson goto retry; 938245f3abfSJeff Roberson } 93935e6168fSJeff Roberson } 94035e6168fSJeff Roberson 941c9f25d8fSJeff Roberson #ifdef SMP 942c9f25d8fSJeff Roberson if (ke == NULL && smp_started) { 9435d7ef00cSJeff Roberson #if 0 9445d7ef00cSJeff Roberson if (kseq->ksq_bload) 9455d7ef00cSJeff Roberson return (NULL); 9465d7ef00cSJeff Roberson #endif 947c9f25d8fSJeff Roberson /* 948c9f25d8fSJeff Roberson * Find the cpu with the highest load and steal one proc. 949c9f25d8fSJeff Roberson */ 9505d7ef00cSJeff Roberson kseq = kseq_load_highest(); 9515d7ef00cSJeff Roberson if (kseq == NULL) 9525d7ef00cSJeff Roberson return (NULL); 953a8949de2SJeff Roberson /* 954a8949de2SJeff Roberson * XXX Do we want to migrate interrupt or realtime threads? 955a8949de2SJeff Roberson * Currently we'll only try to steal if there is a TIMESHARE 956a8949de2SJeff Roberson * thread available but we will steal a REALTIME or interrupt 957a8949de2SJeff Roberson */ 9580a016a05SJeff Roberson ke = kseq_choose(kseq); 9595d7ef00cSJeff Roberson kseq_rem(kseq, ke); 9605d7ef00cSJeff Roberson 961c9f25d8fSJeff Roberson ke->ke_state = KES_THREAD; 962c9f25d8fSJeff Roberson ke->ke_runq = NULL; 963cd6e33dfSJeff Roberson ke->ke_cpu = PCPU_GET(cpuid); 964c9f25d8fSJeff Roberson } 965c9f25d8fSJeff Roberson #endif 96635e6168fSJeff Roberson return (ke); 96735e6168fSJeff Roberson } 96835e6168fSJeff Roberson 96935e6168fSJeff Roberson void 97035e6168fSJeff Roberson sched_add(struct kse *ke) 97135e6168fSJeff Roberson { 972c9f25d8fSJeff Roberson struct kseq *kseq; 973c9f25d8fSJeff Roberson 9745d7ef00cSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 9755d7ef00cSJeff Roberson KASSERT((ke->ke_thread != NULL), ("sched_add: No thread on KSE")); 9765d7ef00cSJeff Roberson KASSERT((ke->ke_thread->td_kse != NULL), 9775d7ef00cSJeff Roberson ("sched_add: No KSE on thread")); 9785d7ef00cSJeff Roberson KASSERT(ke->ke_state != KES_ONRUNQ, 9795d7ef00cSJeff Roberson ("sched_add: kse %p (%s) already in run queue", ke, 9805d7ef00cSJeff Roberson ke->ke_proc->p_comm)); 9815d7ef00cSJeff Roberson KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 9825d7ef00cSJeff Roberson ("sched_add: process swapped out")); 9835d7ef00cSJeff Roberson 984a8949de2SJeff Roberson switch (ke->ke_ksegrp->kg_pri_class) { 985a8949de2SJeff Roberson case PRI_ITHD: 986a8949de2SJeff Roberson case PRI_REALTIME: 987a6ed4186SJeff Roberson kseq = KSEQ_SELF(); 988a8949de2SJeff Roberson break; 989a8949de2SJeff Roberson case PRI_TIMESHARE: 990a8949de2SJeff Roberson case PRI_IDLE: 991a8949de2SJeff Roberson default: 992a8949de2SJeff Roberson kseq = KSEQ_CPU(ke->ke_cpu); 993a8949de2SJeff Roberson break; 994a6ed4186SJeff Roberson } 995a8949de2SJeff Roberson 99635e6168fSJeff Roberson ke->ke_ksegrp->kg_runq_kses++; 99735e6168fSJeff Roberson ke->ke_state = KES_ONRUNQ; 99835e6168fSJeff Roberson 9995d7ef00cSJeff Roberson kseq_add(kseq, ke); 100035e6168fSJeff Roberson } 100135e6168fSJeff Roberson 100235e6168fSJeff Roberson void 100335e6168fSJeff Roberson sched_rem(struct kse *ke) 100435e6168fSJeff Roberson { 100535e6168fSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 100635e6168fSJeff Roberson /* KASSERT((ke->ke_state == KES_ONRUNQ), ("KSE not on run queue")); */ 100735e6168fSJeff Roberson 100835e6168fSJeff Roberson ke->ke_runq = NULL; 100935e6168fSJeff Roberson ke->ke_state = KES_THREAD; 101035e6168fSJeff Roberson ke->ke_ksegrp->kg_runq_kses--; 10115d7ef00cSJeff Roberson 10125d7ef00cSJeff Roberson kseq_rem(KSEQ_CPU(ke->ke_cpu), ke); 101335e6168fSJeff Roberson } 101435e6168fSJeff Roberson 101535e6168fSJeff Roberson fixpt_t 101635e6168fSJeff Roberson sched_pctcpu(struct kse *ke) 101735e6168fSJeff Roberson { 101835e6168fSJeff Roberson fixpt_t pctcpu; 10197121cce5SScott Long int realstathz; 102035e6168fSJeff Roberson 102135e6168fSJeff Roberson pctcpu = 0; 10227121cce5SScott Long realstathz = stathz ? stathz : hz; 102335e6168fSJeff Roberson 102435e6168fSJeff Roberson if (ke->ke_ticks) { 102535e6168fSJeff Roberson int rtick; 102635e6168fSJeff Roberson 102735e6168fSJeff Roberson /* Update to account for time potentially spent sleeping */ 102835e6168fSJeff Roberson ke->ke_ltick = ticks; 102935e6168fSJeff Roberson sched_pctcpu_update(ke); 103035e6168fSJeff Roberson 103135e6168fSJeff Roberson /* How many rtick per second ? */ 103265c8760dSJeff Roberson rtick = ke->ke_ticks / SCHED_CPU_TIME; 10337121cce5SScott Long pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT; 103435e6168fSJeff Roberson } 103535e6168fSJeff Roberson 103635e6168fSJeff Roberson ke->ke_proc->p_swtime = ke->ke_ltick - ke->ke_ftick; 103735e6168fSJeff Roberson 103835e6168fSJeff Roberson return (pctcpu); 103935e6168fSJeff Roberson } 104035e6168fSJeff Roberson 104135e6168fSJeff Roberson int 104235e6168fSJeff Roberson sched_sizeof_kse(void) 104335e6168fSJeff Roberson { 104435e6168fSJeff Roberson return (sizeof(struct kse) + sizeof(struct ke_sched)); 104535e6168fSJeff Roberson } 104635e6168fSJeff Roberson 104735e6168fSJeff Roberson int 104835e6168fSJeff Roberson sched_sizeof_ksegrp(void) 104935e6168fSJeff Roberson { 105035e6168fSJeff Roberson return (sizeof(struct ksegrp) + sizeof(struct kg_sched)); 105135e6168fSJeff Roberson } 105235e6168fSJeff Roberson 105335e6168fSJeff Roberson int 105435e6168fSJeff Roberson sched_sizeof_proc(void) 105535e6168fSJeff Roberson { 105635e6168fSJeff Roberson return (sizeof(struct proc)); 105735e6168fSJeff Roberson } 105835e6168fSJeff Roberson 105935e6168fSJeff Roberson int 106035e6168fSJeff Roberson sched_sizeof_thread(void) 106135e6168fSJeff Roberson { 106235e6168fSJeff Roberson return (sizeof(struct thread) + sizeof(struct td_sched)); 106335e6168fSJeff Roberson } 1064