1b43179fbSJeff Roberson /*- 2b43179fbSJeff Roberson * Copyright (c) 1982, 1986, 1990, 1991, 1993 3b43179fbSJeff Roberson * The Regents of the University of California. All rights reserved. 4b43179fbSJeff Roberson * (c) UNIX System Laboratories, Inc. 5b43179fbSJeff Roberson * All or some portions of this file are derived from material licensed 6b43179fbSJeff Roberson * to the University of California by American Telephone and Telegraph 7b43179fbSJeff Roberson * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8b43179fbSJeff Roberson * the permission of UNIX System Laboratories, Inc. 9b43179fbSJeff Roberson * 10b43179fbSJeff Roberson * Redistribution and use in source and binary forms, with or without 11b43179fbSJeff Roberson * modification, are permitted provided that the following conditions 12b43179fbSJeff Roberson * are met: 13b43179fbSJeff Roberson * 1. Redistributions of source code must retain the above copyright 14b43179fbSJeff Roberson * notice, this list of conditions and the following disclaimer. 15b43179fbSJeff Roberson * 2. Redistributions in binary form must reproduce the above copyright 16b43179fbSJeff Roberson * notice, this list of conditions and the following disclaimer in the 17b43179fbSJeff Roberson * documentation and/or other materials provided with the distribution. 18b43179fbSJeff Roberson * 4. Neither the name of the University nor the names of its contributors 19b43179fbSJeff Roberson * may be used to endorse or promote products derived from this software 20b43179fbSJeff Roberson * without specific prior written permission. 21b43179fbSJeff Roberson * 22b43179fbSJeff Roberson * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23b43179fbSJeff Roberson * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24b43179fbSJeff Roberson * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25b43179fbSJeff Roberson * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26b43179fbSJeff Roberson * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27b43179fbSJeff Roberson * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28b43179fbSJeff Roberson * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29b43179fbSJeff Roberson * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30b43179fbSJeff Roberson * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31b43179fbSJeff Roberson * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32b43179fbSJeff Roberson * SUCH DAMAGE. 33b43179fbSJeff Roberson */ 34b43179fbSJeff Roberson 35677b542eSDavid E. O'Brien #include <sys/cdefs.h> 36677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 37677b542eSDavid E. O'Brien 384da0d332SPeter Wemm #include "opt_hwpmc_hooks.h" 394da0d332SPeter Wemm 40ed062c8dSJulian Elischer #define kse td_sched 41ed062c8dSJulian Elischer 42b43179fbSJeff Roberson #include <sys/param.h> 43b43179fbSJeff Roberson #include <sys/systm.h> 44b43179fbSJeff Roberson #include <sys/kernel.h> 45b43179fbSJeff Roberson #include <sys/ktr.h> 46b43179fbSJeff Roberson #include <sys/lock.h> 47c55bbb6cSJohn Baldwin #include <sys/kthread.h> 48b43179fbSJeff Roberson #include <sys/mutex.h> 49b43179fbSJeff Roberson #include <sys/proc.h> 50b43179fbSJeff Roberson #include <sys/resourcevar.h> 51b43179fbSJeff Roberson #include <sys/sched.h> 52b43179fbSJeff Roberson #include <sys/smp.h> 53b43179fbSJeff Roberson #include <sys/sysctl.h> 54b43179fbSJeff Roberson #include <sys/sx.h> 55f5c157d9SJohn Baldwin #include <sys/turnstile.h> 563db720fdSDavid Xu #include <sys/umtx.h> 572e4db89cSDavid E. O'Brien #include <machine/pcb.h> 58293968d8SJulian Elischer #include <machine/smp.h> 59b43179fbSJeff Roberson 60ebccf1e3SJoseph Koshy #ifdef HWPMC_HOOKS 61ebccf1e3SJoseph Koshy #include <sys/pmckern.h> 62ebccf1e3SJoseph Koshy #endif 63ebccf1e3SJoseph Koshy 6406439a04SJeff Roberson /* 6506439a04SJeff Roberson * INVERSE_ESTCPU_WEIGHT is only suitable for statclock() frequencies in 6606439a04SJeff Roberson * the range 100-256 Hz (approximately). 6706439a04SJeff Roberson */ 6806439a04SJeff Roberson #define ESTCPULIM(e) \ 6906439a04SJeff Roberson min((e), INVERSE_ESTCPU_WEIGHT * (NICE_WEIGHT * (PRIO_MAX - PRIO_MIN) - \ 7006439a04SJeff Roberson RQ_PPQ) + INVERSE_ESTCPU_WEIGHT - 1) 71b698380fSBruce Evans #ifdef SMP 72b698380fSBruce Evans #define INVERSE_ESTCPU_WEIGHT (8 * smp_cpus) 73b698380fSBruce Evans #else 7406439a04SJeff Roberson #define INVERSE_ESTCPU_WEIGHT 8 /* 1 / (priorities per estcpu level). */ 75b698380fSBruce Evans #endif 7606439a04SJeff Roberson #define NICE_WEIGHT 1 /* Priorities per nice level. */ 7706439a04SJeff Roberson 788460a577SJohn Birrell #ifdef KSE 79ed062c8dSJulian Elischer /* 80ed062c8dSJulian Elischer * The schedulable entity that can be given a context to run. 81ed062c8dSJulian Elischer * A process may have several of these. Probably one per processor 821f36c876SMaxim Konovalov * but possibly a few more. In this universe they are grouped 83ed062c8dSJulian Elischer * with a KSEG that contains the priority and niceness 84ed062c8dSJulian Elischer * for the group. 85ed062c8dSJulian Elischer */ 868460a577SJohn Birrell #else 878460a577SJohn Birrell /* 888460a577SJohn Birrell * The schedulable entity that runs a context. 898460a577SJohn Birrell * A process may have several of these. Probably one per processor 908460a577SJohn Birrell * but posibly a few more. 918460a577SJohn Birrell */ 928460a577SJohn Birrell #endif 93ed062c8dSJulian Elischer struct kse { 94ed062c8dSJulian Elischer TAILQ_ENTRY(kse) ke_procq; /* (j/z) Run queue. */ 95ed062c8dSJulian Elischer struct thread *ke_thread; /* (*) Active associated thread. */ 96ed062c8dSJulian Elischer fixpt_t ke_pctcpu; /* (j) %cpu during p_swtime. */ 970ae716e5SDavid Xu u_char ke_rqindex; /* (j) Run queue index. */ 98ed062c8dSJulian Elischer enum { 99ed062c8dSJulian Elischer KES_THREAD = 0x0, /* slaved to thread state */ 100ed062c8dSJulian Elischer KES_ONRUNQ 101ed062c8dSJulian Elischer } ke_state; /* (j) KSE status. */ 102ed062c8dSJulian Elischer int ke_cpticks; /* (j) Ticks of cpu time. */ 103ed062c8dSJulian Elischer struct runq *ke_runq; /* runq the kse is currently on */ 104bcb06d59SJeff Roberson }; 105ed062c8dSJulian Elischer 1068460a577SJohn Birrell #ifdef KSE 107ed062c8dSJulian Elischer #define ke_proc ke_thread->td_proc 108ed062c8dSJulian Elischer #define ke_ksegrp ke_thread->td_ksegrp 1098460a577SJohn Birrell #endif 110ed062c8dSJulian Elischer 111ed062c8dSJulian Elischer #define td_kse td_sched 112ed062c8dSJulian Elischer 113ed062c8dSJulian Elischer /* flags kept in td_flags */ 114ed062c8dSJulian Elischer #define TDF_DIDRUN TDF_SCHED0 /* KSE actually ran. */ 115ed062c8dSJulian Elischer #define TDF_EXIT TDF_SCHED1 /* KSE is being killed. */ 116ed062c8dSJulian Elischer #define TDF_BOUND TDF_SCHED2 117ed062c8dSJulian Elischer 118ed062c8dSJulian Elischer #define ke_flags ke_thread->td_flags 119ed062c8dSJulian Elischer #define KEF_DIDRUN TDF_DIDRUN /* KSE actually ran. */ 120ed062c8dSJulian Elischer #define KEF_EXIT TDF_EXIT /* KSE is being killed. */ 121ed062c8dSJulian Elischer #define KEF_BOUND TDF_BOUND /* stuck to one CPU */ 122bcb06d59SJeff Roberson 123e17c57b1SJeff Roberson #define SKE_RUNQ_PCPU(ke) \ 124e17c57b1SJeff Roberson ((ke)->ke_runq != 0 && (ke)->ke_runq != &runq) 125e17c57b1SJeff Roberson 1268460a577SJohn Birrell #ifdef KSE 127ed062c8dSJulian Elischer struct kg_sched { 128ed062c8dSJulian Elischer struct thread *skg_last_assigned; /* (j) Last thread assigned to */ 129ed062c8dSJulian Elischer /* the system scheduler. */ 130ed062c8dSJulian Elischer int skg_avail_opennings; /* (j) Num KSEs requested in group. */ 131ed062c8dSJulian Elischer int skg_concurrency; /* (j) Num KSEs requested in group. */ 132ed062c8dSJulian Elischer }; 133ed062c8dSJulian Elischer #define kg_last_assigned kg_sched->skg_last_assigned 134ed062c8dSJulian Elischer #define kg_avail_opennings kg_sched->skg_avail_opennings 135ed062c8dSJulian Elischer #define kg_concurrency kg_sched->skg_concurrency 136ed062c8dSJulian Elischer 137d39063f2SJulian Elischer #define SLOT_RELEASE(kg) \ 138d39063f2SJulian Elischer do { \ 139d39063f2SJulian Elischer kg->kg_avail_opennings++; \ 140d39063f2SJulian Elischer CTR3(KTR_RUNQ, "kg %p(%d) Slot released (->%d)", \ 141d39063f2SJulian Elischer kg, \ 142d39063f2SJulian Elischer kg->kg_concurrency, \ 143d39063f2SJulian Elischer kg->kg_avail_opennings); \ 144d39063f2SJulian Elischer /* KASSERT((kg->kg_avail_opennings <= kg->kg_concurrency), \ 145d39063f2SJulian Elischer ("slots out of whack"));*/ \ 146d39063f2SJulian Elischer } while (0) 147d39063f2SJulian Elischer 148d39063f2SJulian Elischer #define SLOT_USE(kg) \ 149d39063f2SJulian Elischer do { \ 150d39063f2SJulian Elischer kg->kg_avail_opennings--; \ 151d39063f2SJulian Elischer CTR3(KTR_RUNQ, "kg %p(%d) Slot used (->%d)", \ 152d39063f2SJulian Elischer kg, \ 153d39063f2SJulian Elischer kg->kg_concurrency, \ 154d39063f2SJulian Elischer kg->kg_avail_opennings); \ 155d39063f2SJulian Elischer /* KASSERT((kg->kg_avail_opennings >= 0), \ 156d39063f2SJulian Elischer ("slots out of whack"));*/ \ 157d39063f2SJulian Elischer } while (0) 1588460a577SJohn Birrell #endif 159d39063f2SJulian Elischer 160e17c57b1SJeff Roberson /* 161e17c57b1SJeff Roberson * KSE_CAN_MIGRATE macro returns true if the kse can migrate between 162f2f51f8aSJeff Roberson * cpus. 163e17c57b1SJeff Roberson */ 164e17c57b1SJeff Roberson #define KSE_CAN_MIGRATE(ke) \ 1651e7fad6bSScott Long ((ke)->ke_thread->td_pinned == 0 && ((ke)->ke_flags & KEF_BOUND) == 0) 166bcb06d59SJeff Roberson 167ed062c8dSJulian Elischer static struct kse kse0; 1688460a577SJohn Birrell #ifdef KSE 169ed062c8dSJulian Elischer static struct kg_sched kg_sched0; 1708460a577SJohn Birrell #endif 171b43179fbSJeff Roberson 172ca59f152SJeff Roberson static int sched_tdcnt; /* Total runnable threads in the system. */ 173b43179fbSJeff Roberson static int sched_quantum; /* Roundrobin scheduling quantum in ticks. */ 1744974b53eSMaxime Henrion #define SCHED_QUANTUM (hz / 10) /* Default sched quantum */ 175b43179fbSJeff Roberson 176b43179fbSJeff Roberson static struct callout roundrobin_callout; 177b43179fbSJeff Roberson 1788460a577SJohn Birrell #ifdef KSE 179ed062c8dSJulian Elischer static void slot_fill(struct ksegrp *kg); 180ed062c8dSJulian Elischer static struct kse *sched_choose(void); /* XXX Should be thread * */ 1818460a577SJohn Birrell #else 1828460a577SJohn Birrell static struct thread *sched_choose(void); 1838460a577SJohn Birrell #endif 184ed062c8dSJulian Elischer 185e17c57b1SJeff Roberson static void setup_runqs(void); 186b43179fbSJeff Roberson static void roundrobin(void *arg); 187c55bbb6cSJohn Baldwin static void schedcpu(void); 188e17c57b1SJeff Roberson static void schedcpu_thread(void); 189f5c157d9SJohn Baldwin static void sched_priority(struct thread *td, u_char prio); 190b43179fbSJeff Roberson static void sched_setup(void *dummy); 191b43179fbSJeff Roberson static void maybe_resched(struct thread *td); 1928460a577SJohn Birrell #ifdef KSE 193b43179fbSJeff Roberson static void updatepri(struct ksegrp *kg); 194b43179fbSJeff Roberson static void resetpriority(struct ksegrp *kg); 195f5c157d9SJohn Baldwin static void resetpriority_thread(struct thread *td, struct ksegrp *kg); 1968460a577SJohn Birrell #else 1978460a577SJohn Birrell static void updatepri(struct thread *td); 1988460a577SJohn Birrell static void resetpriority(struct thread *td); 1998460a577SJohn Birrell static void resetpriority_thread(struct thread *td); 2008460a577SJohn Birrell #endif 20100b0483dSJulian Elischer #ifdef SMP 20282a1dfc1SJulian Elischer static int forward_wakeup(int cpunum); 20300b0483dSJulian Elischer #endif 204b43179fbSJeff Roberson 205e17c57b1SJeff Roberson static struct kproc_desc sched_kp = { 206e17c57b1SJeff Roberson "schedcpu", 207e17c57b1SJeff Roberson schedcpu_thread, 208e17c57b1SJeff Roberson NULL 209e17c57b1SJeff Roberson }; 210e17c57b1SJeff Roberson SYSINIT(schedcpu, SI_SUB_RUN_SCHEDULER, SI_ORDER_FIRST, kproc_start, &sched_kp) 211e17c57b1SJeff Roberson SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 212b43179fbSJeff Roberson 213b43179fbSJeff Roberson /* 214b43179fbSJeff Roberson * Global run queue. 215b43179fbSJeff Roberson */ 216b43179fbSJeff Roberson static struct runq runq; 217e17c57b1SJeff Roberson 218e17c57b1SJeff Roberson #ifdef SMP 219e17c57b1SJeff Roberson /* 220e17c57b1SJeff Roberson * Per-CPU run queues 221e17c57b1SJeff Roberson */ 222e17c57b1SJeff Roberson static struct runq runq_pcpu[MAXCPU]; 223e17c57b1SJeff Roberson #endif 224e17c57b1SJeff Roberson 225e17c57b1SJeff Roberson static void 226e17c57b1SJeff Roberson setup_runqs(void) 227e17c57b1SJeff Roberson { 228e17c57b1SJeff Roberson #ifdef SMP 229e17c57b1SJeff Roberson int i; 230e17c57b1SJeff Roberson 231e17c57b1SJeff Roberson for (i = 0; i < MAXCPU; ++i) 232e17c57b1SJeff Roberson runq_init(&runq_pcpu[i]); 233e17c57b1SJeff Roberson #endif 234e17c57b1SJeff Roberson 235e17c57b1SJeff Roberson runq_init(&runq); 236e17c57b1SJeff Roberson } 237b43179fbSJeff Roberson 238b43179fbSJeff Roberson static int 239b43179fbSJeff Roberson sysctl_kern_quantum(SYSCTL_HANDLER_ARGS) 240b43179fbSJeff Roberson { 241b43179fbSJeff Roberson int error, new_val; 242b43179fbSJeff Roberson 243b43179fbSJeff Roberson new_val = sched_quantum * tick; 244b43179fbSJeff Roberson error = sysctl_handle_int(oidp, &new_val, 0, req); 245b43179fbSJeff Roberson if (error != 0 || req->newptr == NULL) 246b43179fbSJeff Roberson return (error); 247b43179fbSJeff Roberson if (new_val < tick) 248b43179fbSJeff Roberson return (EINVAL); 249b43179fbSJeff Roberson sched_quantum = new_val / tick; 250b43179fbSJeff Roberson hogticks = 2 * sched_quantum; 251b43179fbSJeff Roberson return (0); 252b43179fbSJeff Roberson } 253b43179fbSJeff Roberson 254e038d354SScott Long SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RD, 0, "Scheduler"); 255dc095794SScott Long 256e038d354SScott Long SYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "4BSD", 0, 257e038d354SScott Long "Scheduler name"); 258dc095794SScott Long 259dc095794SScott Long SYSCTL_PROC(_kern_sched, OID_AUTO, quantum, CTLTYPE_INT | CTLFLAG_RW, 260b43179fbSJeff Roberson 0, sizeof sched_quantum, sysctl_kern_quantum, "I", 261b43179fbSJeff Roberson "Roundrobin scheduling quantum in microseconds"); 262b43179fbSJeff Roberson 26337c28a02SJulian Elischer #ifdef SMP 26482a1dfc1SJulian Elischer /* Enable forwarding of wakeups to all other cpus */ 26582a1dfc1SJulian Elischer SYSCTL_NODE(_kern_sched, OID_AUTO, ipiwakeup, CTLFLAG_RD, NULL, "Kernel SMP"); 26682a1dfc1SJulian Elischer 267bce73aedSJulian Elischer static int forward_wakeup_enabled = 1; 26882a1dfc1SJulian Elischer SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, enabled, CTLFLAG_RW, 26982a1dfc1SJulian Elischer &forward_wakeup_enabled, 0, 27082a1dfc1SJulian Elischer "Forwarding of wakeup to idle CPUs"); 27182a1dfc1SJulian Elischer 27282a1dfc1SJulian Elischer static int forward_wakeups_requested = 0; 27382a1dfc1SJulian Elischer SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, requested, CTLFLAG_RD, 27482a1dfc1SJulian Elischer &forward_wakeups_requested, 0, 27582a1dfc1SJulian Elischer "Requests for Forwarding of wakeup to idle CPUs"); 27682a1dfc1SJulian Elischer 27782a1dfc1SJulian Elischer static int forward_wakeups_delivered = 0; 27882a1dfc1SJulian Elischer SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, delivered, CTLFLAG_RD, 27982a1dfc1SJulian Elischer &forward_wakeups_delivered, 0, 28082a1dfc1SJulian Elischer "Completed Forwarding of wakeup to idle CPUs"); 28182a1dfc1SJulian Elischer 282bce73aedSJulian Elischer static int forward_wakeup_use_mask = 1; 28382a1dfc1SJulian Elischer SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, usemask, CTLFLAG_RW, 28482a1dfc1SJulian Elischer &forward_wakeup_use_mask, 0, 28582a1dfc1SJulian Elischer "Use the mask of idle cpus"); 28682a1dfc1SJulian Elischer 28782a1dfc1SJulian Elischer static int forward_wakeup_use_loop = 0; 28882a1dfc1SJulian Elischer SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, useloop, CTLFLAG_RW, 28982a1dfc1SJulian Elischer &forward_wakeup_use_loop, 0, 29082a1dfc1SJulian Elischer "Use a loop to find idle cpus"); 29182a1dfc1SJulian Elischer 29282a1dfc1SJulian Elischer static int forward_wakeup_use_single = 0; 29382a1dfc1SJulian Elischer SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, onecpu, CTLFLAG_RW, 29482a1dfc1SJulian Elischer &forward_wakeup_use_single, 0, 29582a1dfc1SJulian Elischer "Only signal one idle cpu"); 29682a1dfc1SJulian Elischer 29782a1dfc1SJulian Elischer static int forward_wakeup_use_htt = 0; 29882a1dfc1SJulian Elischer SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, htt2, CTLFLAG_RW, 29982a1dfc1SJulian Elischer &forward_wakeup_use_htt, 0, 30082a1dfc1SJulian Elischer "account for htt"); 3013389af30SJulian Elischer 30237c28a02SJulian Elischer #endif 3038460a577SJohn Birrell #ifdef KSE 3043389af30SJulian Elischer static int sched_followon = 0; 3053389af30SJulian Elischer SYSCTL_INT(_kern_sched, OID_AUTO, followon, CTLFLAG_RW, 3063389af30SJulian Elischer &sched_followon, 0, 3073389af30SJulian Elischer "allow threads to share a quantum"); 3083389af30SJulian Elischer 3093389af30SJulian Elischer static int sched_pfollowons = 0; 3103389af30SJulian Elischer SYSCTL_INT(_kern_sched, OID_AUTO, pfollowons, CTLFLAG_RD, 3113389af30SJulian Elischer &sched_pfollowons, 0, 3123389af30SJulian Elischer "number of followons done to a different ksegrp"); 3133389af30SJulian Elischer 3143389af30SJulian Elischer static int sched_kgfollowons = 0; 3153389af30SJulian Elischer SYSCTL_INT(_kern_sched, OID_AUTO, kgfollowons, CTLFLAG_RD, 3163389af30SJulian Elischer &sched_kgfollowons, 0, 3173389af30SJulian Elischer "number of followons done in a ksegrp"); 3188460a577SJohn Birrell #endif 31982a1dfc1SJulian Elischer 320907bdbc2SJeff Roberson static __inline void 321907bdbc2SJeff Roberson sched_load_add(void) 322907bdbc2SJeff Roberson { 323907bdbc2SJeff Roberson sched_tdcnt++; 324907bdbc2SJeff Roberson CTR1(KTR_SCHED, "global load: %d", sched_tdcnt); 325907bdbc2SJeff Roberson } 326907bdbc2SJeff Roberson 327907bdbc2SJeff Roberson static __inline void 328907bdbc2SJeff Roberson sched_load_rem(void) 329907bdbc2SJeff Roberson { 330907bdbc2SJeff Roberson sched_tdcnt--; 331907bdbc2SJeff Roberson CTR1(KTR_SCHED, "global load: %d", sched_tdcnt); 332907bdbc2SJeff Roberson } 333b43179fbSJeff Roberson /* 334b43179fbSJeff Roberson * Arrange to reschedule if necessary, taking the priorities and 335b43179fbSJeff Roberson * schedulers into account. 336b43179fbSJeff Roberson */ 337b43179fbSJeff Roberson static void 338b43179fbSJeff Roberson maybe_resched(struct thread *td) 339b43179fbSJeff Roberson { 340b43179fbSJeff Roberson 341b43179fbSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 342ed062c8dSJulian Elischer if (td->td_priority < curthread->td_priority) 3434a338afdSJulian Elischer curthread->td_flags |= TDF_NEEDRESCHED; 344b43179fbSJeff Roberson } 345b43179fbSJeff Roberson 346b43179fbSJeff Roberson /* 347b43179fbSJeff Roberson * Force switch among equal priority processes every 100ms. 348b43179fbSJeff Roberson * We don't actually need to force a context switch of the current process. 349b43179fbSJeff Roberson * The act of firing the event triggers a context switch to softclock() and 350b43179fbSJeff Roberson * then switching back out again which is equivalent to a preemption, thus 351b43179fbSJeff Roberson * no further work is needed on the local CPU. 352b43179fbSJeff Roberson */ 353b43179fbSJeff Roberson /* ARGSUSED */ 354b43179fbSJeff Roberson static void 355b43179fbSJeff Roberson roundrobin(void *arg) 356b43179fbSJeff Roberson { 357b43179fbSJeff Roberson 358b43179fbSJeff Roberson #ifdef SMP 359b43179fbSJeff Roberson mtx_lock_spin(&sched_lock); 360b43179fbSJeff Roberson forward_roundrobin(); 361b43179fbSJeff Roberson mtx_unlock_spin(&sched_lock); 362b43179fbSJeff Roberson #endif 363b43179fbSJeff Roberson 364b43179fbSJeff Roberson callout_reset(&roundrobin_callout, sched_quantum, roundrobin, NULL); 365b43179fbSJeff Roberson } 366b43179fbSJeff Roberson 367b43179fbSJeff Roberson /* 368b43179fbSJeff Roberson * Constants for digital decay and forget: 3698460a577SJohn Birrell * ifdef KSE 37070fca427SJohn Baldwin * 90% of (kg_estcpu) usage in 5 * loadav time 3718460a577SJohn Birrell * else 3728460a577SJohn Birrell * 90% of (td_estcpu) usage in 5 * loadav time 3738460a577SJohn Birrell * endif 37470fca427SJohn Baldwin * 95% of (ke_pctcpu) usage in 60 seconds (load insensitive) 375b43179fbSJeff Roberson * Note that, as ps(1) mentions, this can let percentages 376b43179fbSJeff Roberson * total over 100% (I've seen 137.9% for 3 processes). 377b43179fbSJeff Roberson * 3788460a577SJohn Birrell * ifdef KSE 37970fca427SJohn Baldwin * Note that schedclock() updates kg_estcpu and p_cpticks asynchronously. 3808460a577SJohn Birrell * else 3818460a577SJohn Birrell * Note that schedclock() updates td_estcpu and p_cpticks asynchronously. 3828460a577SJohn Birrell * endif 383b43179fbSJeff Roberson * 3848460a577SJohn Birrell * ifdef KSE 38570fca427SJohn Baldwin * We wish to decay away 90% of kg_estcpu in (5 * loadavg) seconds. 3868460a577SJohn Birrell * else 3878460a577SJohn Birrell * We wish to decay away 90% of td_estcpu in (5 * loadavg) seconds. 3888460a577SJohn Birrell * endif 389b43179fbSJeff Roberson * That is, the system wants to compute a value of decay such 390b43179fbSJeff Roberson * that the following for loop: 391b43179fbSJeff Roberson * for (i = 0; i < (5 * loadavg); i++) 3928460a577SJohn Birrell * ifdef KSE 39370fca427SJohn Baldwin * kg_estcpu *= decay; 3948460a577SJohn Birrell * else 3958460a577SJohn Birrell * td_estcpu *= decay; 3968460a577SJohn Birrell * endif 397b43179fbSJeff Roberson * will compute 3988460a577SJohn Birrell * ifdef KSE 39970fca427SJohn Baldwin * kg_estcpu *= 0.1; 4008460a577SJohn Birrell * else 4018460a577SJohn Birrell * td_estcpu *= 0.1; 4028460a577SJohn Birrell * endif 403b43179fbSJeff Roberson * for all values of loadavg: 404b43179fbSJeff Roberson * 405b43179fbSJeff Roberson * Mathematically this loop can be expressed by saying: 406b43179fbSJeff Roberson * decay ** (5 * loadavg) ~= .1 407b43179fbSJeff Roberson * 408b43179fbSJeff Roberson * The system computes decay as: 409b43179fbSJeff Roberson * decay = (2 * loadavg) / (2 * loadavg + 1) 410b43179fbSJeff Roberson * 411b43179fbSJeff Roberson * We wish to prove that the system's computation of decay 412b43179fbSJeff Roberson * will always fulfill the equation: 413b43179fbSJeff Roberson * decay ** (5 * loadavg) ~= .1 414b43179fbSJeff Roberson * 415b43179fbSJeff Roberson * If we compute b as: 416b43179fbSJeff Roberson * b = 2 * loadavg 417b43179fbSJeff Roberson * then 418b43179fbSJeff Roberson * decay = b / (b + 1) 419b43179fbSJeff Roberson * 420b43179fbSJeff Roberson * We now need to prove two things: 421b43179fbSJeff Roberson * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1) 422b43179fbSJeff Roberson * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg) 423b43179fbSJeff Roberson * 424b43179fbSJeff Roberson * Facts: 425b43179fbSJeff Roberson * For x close to zero, exp(x) =~ 1 + x, since 426b43179fbSJeff Roberson * exp(x) = 0! + x**1/1! + x**2/2! + ... . 427b43179fbSJeff Roberson * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b. 428b43179fbSJeff Roberson * For x close to zero, ln(1+x) =~ x, since 429b43179fbSJeff Roberson * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1 430b43179fbSJeff Roberson * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1). 431b43179fbSJeff Roberson * ln(.1) =~ -2.30 432b43179fbSJeff Roberson * 433b43179fbSJeff Roberson * Proof of (1): 434b43179fbSJeff Roberson * Solve (factor)**(power) =~ .1 given power (5*loadav): 435b43179fbSJeff Roberson * solving for factor, 436b43179fbSJeff Roberson * ln(factor) =~ (-2.30/5*loadav), or 437b43179fbSJeff Roberson * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) = 438b43179fbSJeff Roberson * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED 439b43179fbSJeff Roberson * 440b43179fbSJeff Roberson * Proof of (2): 441b43179fbSJeff Roberson * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)): 442b43179fbSJeff Roberson * solving for power, 443b43179fbSJeff Roberson * power*ln(b/(b+1)) =~ -2.30, or 444b43179fbSJeff Roberson * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED 445b43179fbSJeff Roberson * 446b43179fbSJeff Roberson * Actual power values for the implemented algorithm are as follows: 447b43179fbSJeff Roberson * loadav: 1 2 3 4 448b43179fbSJeff Roberson * power: 5.68 10.32 14.94 19.55 449b43179fbSJeff Roberson */ 450b43179fbSJeff Roberson 451b43179fbSJeff Roberson /* calculations for digital decay to forget 90% of usage in 5*loadav sec */ 452b43179fbSJeff Roberson #define loadfactor(loadav) (2 * (loadav)) 453b43179fbSJeff Roberson #define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE)) 454b43179fbSJeff Roberson 45570fca427SJohn Baldwin /* decay 95% of `ke_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 456b43179fbSJeff Roberson static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 4575c06d111SJohn-Mark Gurney SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 458b43179fbSJeff Roberson 459b43179fbSJeff Roberson /* 460b43179fbSJeff Roberson * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the 461b43179fbSJeff Roberson * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below 462b43179fbSJeff Roberson * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT). 463b43179fbSJeff Roberson * 464b43179fbSJeff Roberson * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used: 465b43179fbSJeff Roberson * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits). 466b43179fbSJeff Roberson * 467b43179fbSJeff Roberson * If you don't want to bother with the faster/more-accurate formula, you 468b43179fbSJeff Roberson * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate 469b43179fbSJeff Roberson * (more general) method of calculating the %age of CPU used by a process. 470b43179fbSJeff Roberson */ 471b43179fbSJeff Roberson #define CCPU_SHIFT 11 472b43179fbSJeff Roberson 473b43179fbSJeff Roberson /* 474b43179fbSJeff Roberson * Recompute process priorities, every hz ticks. 475b43179fbSJeff Roberson * MP-safe, called without the Giant mutex. 476b43179fbSJeff Roberson */ 477b43179fbSJeff Roberson /* ARGSUSED */ 478b43179fbSJeff Roberson static void 479c55bbb6cSJohn Baldwin schedcpu(void) 480b43179fbSJeff Roberson { 481b43179fbSJeff Roberson register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 482b43179fbSJeff Roberson struct thread *td; 483b43179fbSJeff Roberson struct proc *p; 484b43179fbSJeff Roberson struct kse *ke; 4858460a577SJohn Birrell #ifdef KSE 486b43179fbSJeff Roberson struct ksegrp *kg; 4878460a577SJohn Birrell #endif 48870fca427SJohn Baldwin int awake, realstathz; 489b43179fbSJeff Roberson 490b43179fbSJeff Roberson realstathz = stathz ? stathz : hz; 491b43179fbSJeff Roberson sx_slock(&allproc_lock); 492b43179fbSJeff Roberson FOREACH_PROC_IN_SYSTEM(p) { 49370fca427SJohn Baldwin /* 49470fca427SJohn Baldwin * Prevent state changes and protect run queue. 49570fca427SJohn Baldwin */ 496b43179fbSJeff Roberson mtx_lock_spin(&sched_lock); 49770fca427SJohn Baldwin /* 49870fca427SJohn Baldwin * Increment time in/out of memory. We ignore overflow; with 49970fca427SJohn Baldwin * 16-bit int's (remember them?) overflow takes 45 days. 50070fca427SJohn Baldwin */ 501b43179fbSJeff Roberson p->p_swtime++; 5028460a577SJohn Birrell #ifdef KSE 503b43179fbSJeff Roberson FOREACH_KSEGRP_IN_PROC(p, kg) { 5048460a577SJohn Birrell #else 5058460a577SJohn Birrell FOREACH_THREAD_IN_PROC(p, td) { 5068460a577SJohn Birrell #endif 507b43179fbSJeff Roberson awake = 0; 5088460a577SJohn Birrell #ifdef KSE 509ed062c8dSJulian Elischer FOREACH_THREAD_IN_GROUP(kg, td) { 510ed062c8dSJulian Elischer ke = td->td_kse; 511b43179fbSJeff Roberson /* 51270fca427SJohn Baldwin * Increment sleep time (if sleeping). We 51370fca427SJohn Baldwin * ignore overflow, as above. 514b43179fbSJeff Roberson */ 515b43179fbSJeff Roberson /* 516b43179fbSJeff Roberson * The kse slptimes are not touched in wakeup 517b43179fbSJeff Roberson * because the thread may not HAVE a KSE. 518b43179fbSJeff Roberson */ 519b43179fbSJeff Roberson if (ke->ke_state == KES_ONRUNQ) { 520b43179fbSJeff Roberson awake = 1; 521b43179fbSJeff Roberson ke->ke_flags &= ~KEF_DIDRUN; 522b43179fbSJeff Roberson } else if ((ke->ke_state == KES_THREAD) && 523ed062c8dSJulian Elischer (TD_IS_RUNNING(td))) { 524b43179fbSJeff Roberson awake = 1; 525b43179fbSJeff Roberson /* Do not clear KEF_DIDRUN */ 526b43179fbSJeff Roberson } else if (ke->ke_flags & KEF_DIDRUN) { 527b43179fbSJeff Roberson awake = 1; 528b43179fbSJeff Roberson ke->ke_flags &= ~KEF_DIDRUN; 529b43179fbSJeff Roberson } 530b43179fbSJeff Roberson 531b43179fbSJeff Roberson /* 53270fca427SJohn Baldwin * ke_pctcpu is only for ps and ttyinfo(). 53370fca427SJohn Baldwin * Do it per kse, and add them up at the end? 534b43179fbSJeff Roberson * XXXKSE 535b43179fbSJeff Roberson */ 53670fca427SJohn Baldwin ke->ke_pctcpu = (ke->ke_pctcpu * ccpu) >> 537bcb06d59SJeff Roberson FSHIFT; 538b43179fbSJeff Roberson /* 539b43179fbSJeff Roberson * If the kse has been idle the entire second, 540b43179fbSJeff Roberson * stop recalculating its priority until 541b43179fbSJeff Roberson * it wakes up. 542b43179fbSJeff Roberson */ 543ad59c36bSJulian Elischer if (ke->ke_cpticks == 0) 544b43179fbSJeff Roberson continue; 545b43179fbSJeff Roberson #if (FSHIFT >= CCPU_SHIFT) 5468fb913faSJeff Roberson ke->ke_pctcpu += (realstathz == 100) 547ad59c36bSJulian Elischer ? ((fixpt_t) ke->ke_cpticks) << 548b43179fbSJeff Roberson (FSHIFT - CCPU_SHIFT) : 549ad59c36bSJulian Elischer 100 * (((fixpt_t) ke->ke_cpticks) 550bcb06d59SJeff Roberson << (FSHIFT - CCPU_SHIFT)) / realstathz; 551b43179fbSJeff Roberson #else 5528fb913faSJeff Roberson ke->ke_pctcpu += ((FSCALE - ccpu) * 553ad59c36bSJulian Elischer (ke->ke_cpticks * 554bcb06d59SJeff Roberson FSCALE / realstathz)) >> FSHIFT; 555b43179fbSJeff Roberson #endif 556ad59c36bSJulian Elischer ke->ke_cpticks = 0; 557b43179fbSJeff Roberson } /* end of kse loop */ 5588460a577SJohn Birrell #else 5598460a577SJohn Birrell ke = td->td_kse; 560b43179fbSJeff Roberson /* 5618460a577SJohn Birrell * Increment sleep time (if sleeping). We 5628460a577SJohn Birrell * ignore overflow, as above. 5638460a577SJohn Birrell */ 5648460a577SJohn Birrell /* 5658460a577SJohn Birrell * The kse slptimes are not touched in wakeup 5668460a577SJohn Birrell * because the thread may not HAVE a KSE. 5678460a577SJohn Birrell */ 5688460a577SJohn Birrell if (ke->ke_state == KES_ONRUNQ) { 5698460a577SJohn Birrell awake = 1; 5708460a577SJohn Birrell ke->ke_flags &= ~KEF_DIDRUN; 5718460a577SJohn Birrell } else if ((ke->ke_state == KES_THREAD) && 5728460a577SJohn Birrell (TD_IS_RUNNING(td))) { 5738460a577SJohn Birrell awake = 1; 5748460a577SJohn Birrell /* Do not clear KEF_DIDRUN */ 5758460a577SJohn Birrell } else if (ke->ke_flags & KEF_DIDRUN) { 5768460a577SJohn Birrell awake = 1; 5778460a577SJohn Birrell ke->ke_flags &= ~KEF_DIDRUN; 5788460a577SJohn Birrell } 5798460a577SJohn Birrell 5808460a577SJohn Birrell /* 5818460a577SJohn Birrell * ke_pctcpu is only for ps and ttyinfo(). 5828460a577SJohn Birrell * Do it per kse, and add them up at the end? 5838460a577SJohn Birrell * XXXKSE 5848460a577SJohn Birrell */ 5858460a577SJohn Birrell ke->ke_pctcpu = (ke->ke_pctcpu * ccpu) >> 5868460a577SJohn Birrell FSHIFT; 5878460a577SJohn Birrell /* 5888460a577SJohn Birrell * If the kse has been idle the entire second, 5898460a577SJohn Birrell * stop recalculating its priority until 5908460a577SJohn Birrell * it wakes up. 5918460a577SJohn Birrell */ 5928460a577SJohn Birrell if (ke->ke_cpticks == 0) 5938460a577SJohn Birrell continue; 5948460a577SJohn Birrell #if (FSHIFT >= CCPU_SHIFT) 5958460a577SJohn Birrell ke->ke_pctcpu += (realstathz == 100) 5968460a577SJohn Birrell ? ((fixpt_t) ke->ke_cpticks) << 5978460a577SJohn Birrell (FSHIFT - CCPU_SHIFT) : 5988460a577SJohn Birrell 100 * (((fixpt_t) ke->ke_cpticks) 5998460a577SJohn Birrell << (FSHIFT - CCPU_SHIFT)) / realstathz; 6008460a577SJohn Birrell #else 6018460a577SJohn Birrell ke->ke_pctcpu += ((FSCALE - ccpu) * 6028460a577SJohn Birrell (ke->ke_cpticks * 6038460a577SJohn Birrell FSCALE / realstathz)) >> FSHIFT; 6048460a577SJohn Birrell #endif 6058460a577SJohn Birrell ke->ke_cpticks = 0; 6068460a577SJohn Birrell #endif 6078460a577SJohn Birrell 6088460a577SJohn Birrell /* 6098460a577SJohn Birrell * ifdef KSE 610b43179fbSJeff Roberson * If there are ANY running threads in this KSEGRP, 6118460a577SJohn Birrell * else 6128460a577SJohn Birrell * If there are ANY running threads in this process, 6138460a577SJohn Birrell * endif 614b43179fbSJeff Roberson * then don't count it as sleeping. 615b43179fbSJeff Roberson */ 616b43179fbSJeff Roberson if (awake) { 6178460a577SJohn Birrell #ifdef KSE 618b43179fbSJeff Roberson if (kg->kg_slptime > 1) { 6198460a577SJohn Birrell #else 6208460a577SJohn Birrell if (td->td_slptime > 1) { 6218460a577SJohn Birrell #endif 622b43179fbSJeff Roberson /* 623b43179fbSJeff Roberson * In an ideal world, this should not 624b43179fbSJeff Roberson * happen, because whoever woke us 625b43179fbSJeff Roberson * up from the long sleep should have 626b43179fbSJeff Roberson * unwound the slptime and reset our 627b43179fbSJeff Roberson * priority before we run at the stale 628b43179fbSJeff Roberson * priority. Should KASSERT at some 629b43179fbSJeff Roberson * point when all the cases are fixed. 630b43179fbSJeff Roberson */ 6318460a577SJohn Birrell #ifdef KSE 632b43179fbSJeff Roberson updatepri(kg); 633b43179fbSJeff Roberson } 634b43179fbSJeff Roberson kg->kg_slptime = 0; 63570fca427SJohn Baldwin } else 636b43179fbSJeff Roberson kg->kg_slptime++; 637b43179fbSJeff Roberson if (kg->kg_slptime > 1) 638b43179fbSJeff Roberson continue; 639b43179fbSJeff Roberson kg->kg_estcpu = decay_cpu(loadfac, kg->kg_estcpu); 640b43179fbSJeff Roberson resetpriority(kg); 641b43179fbSJeff Roberson FOREACH_THREAD_IN_GROUP(kg, td) { 642f5c157d9SJohn Baldwin resetpriority_thread(td, kg); 643b43179fbSJeff Roberson } 644b43179fbSJeff Roberson } /* end of ksegrp loop */ 6458460a577SJohn Birrell #else 6468460a577SJohn Birrell updatepri(td); 6478460a577SJohn Birrell } 6488460a577SJohn Birrell td->td_slptime = 0; 6498460a577SJohn Birrell } else 6508460a577SJohn Birrell td->td_slptime++; 6518460a577SJohn Birrell if (td->td_slptime > 1) 6528460a577SJohn Birrell continue; 6538460a577SJohn Birrell td->td_estcpu = decay_cpu(loadfac, td->td_estcpu); 6548460a577SJohn Birrell resetpriority(td); 6558460a577SJohn Birrell resetpriority_thread(td); 6568460a577SJohn Birrell } /* end of thread loop */ 6578460a577SJohn Birrell #endif 658b43179fbSJeff Roberson mtx_unlock_spin(&sched_lock); 659b43179fbSJeff Roberson } /* end of process loop */ 660b43179fbSJeff Roberson sx_sunlock(&allproc_lock); 661c55bbb6cSJohn Baldwin } 662c55bbb6cSJohn Baldwin 663c55bbb6cSJohn Baldwin /* 664c55bbb6cSJohn Baldwin * Main loop for a kthread that executes schedcpu once a second. 665c55bbb6cSJohn Baldwin */ 666c55bbb6cSJohn Baldwin static void 667e17c57b1SJeff Roberson schedcpu_thread(void) 668c55bbb6cSJohn Baldwin { 669c55bbb6cSJohn Baldwin int nowake; 670c55bbb6cSJohn Baldwin 671c55bbb6cSJohn Baldwin for (;;) { 672c55bbb6cSJohn Baldwin schedcpu(); 6730f180a7cSJohn Baldwin tsleep(&nowake, 0, "-", hz); 674c55bbb6cSJohn Baldwin } 675b43179fbSJeff Roberson } 676b43179fbSJeff Roberson 677b43179fbSJeff Roberson /* 678b43179fbSJeff Roberson * Recalculate the priority of a process after it has slept for a while. 6798460a577SJohn Birrell * ifdef KSE 68070fca427SJohn Baldwin * For all load averages >= 1 and max kg_estcpu of 255, sleeping for at 68170fca427SJohn Baldwin * least six times the loadfactor will decay kg_estcpu to zero. 6828460a577SJohn Birrell * else 6838460a577SJohn Birrell * For all load averages >= 1 and max td_estcpu of 255, sleeping for at 6848460a577SJohn Birrell * least six times the loadfactor will decay td_estcpu to zero. 6858460a577SJohn Birrell * endif 686b43179fbSJeff Roberson */ 687b43179fbSJeff Roberson static void 6888460a577SJohn Birrell #ifdef KSE 689b43179fbSJeff Roberson updatepri(struct ksegrp *kg) 6908460a577SJohn Birrell #else 6918460a577SJohn Birrell updatepri(struct thread *td) 6928460a577SJohn Birrell #endif 693b43179fbSJeff Roberson { 69470fca427SJohn Baldwin register fixpt_t loadfac; 695b43179fbSJeff Roberson register unsigned int newcpu; 696b43179fbSJeff Roberson 69770fca427SJohn Baldwin loadfac = loadfactor(averunnable.ldavg[0]); 6988460a577SJohn Birrell #ifdef KSE 699b43179fbSJeff Roberson if (kg->kg_slptime > 5 * loadfac) 700b43179fbSJeff Roberson kg->kg_estcpu = 0; 7018460a577SJohn Birrell #else 7028460a577SJohn Birrell if (td->td_slptime > 5 * loadfac) 7038460a577SJohn Birrell td->td_estcpu = 0; 7048460a577SJohn Birrell #endif 705b43179fbSJeff Roberson else { 7068460a577SJohn Birrell #ifdef KSE 70770fca427SJohn Baldwin newcpu = kg->kg_estcpu; 70870fca427SJohn Baldwin kg->kg_slptime--; /* was incremented in schedcpu() */ 709b43179fbSJeff Roberson while (newcpu && --kg->kg_slptime) 7108460a577SJohn Birrell #else 7118460a577SJohn Birrell newcpu = td->td_estcpu; 7128460a577SJohn Birrell td->td_slptime--; /* was incremented in schedcpu() */ 7138460a577SJohn Birrell while (newcpu && --td->td_slptime) 7148460a577SJohn Birrell #endif 715b43179fbSJeff Roberson newcpu = decay_cpu(loadfac, newcpu); 7168460a577SJohn Birrell #ifdef KSE 717b43179fbSJeff Roberson kg->kg_estcpu = newcpu; 7188460a577SJohn Birrell #else 7198460a577SJohn Birrell td->td_estcpu = newcpu; 7208460a577SJohn Birrell #endif 721b43179fbSJeff Roberson } 722b43179fbSJeff Roberson } 723b43179fbSJeff Roberson 724b43179fbSJeff Roberson /* 725b43179fbSJeff Roberson * Compute the priority of a process when running in user mode. 726b43179fbSJeff Roberson * Arrange to reschedule if the resulting priority is better 727b43179fbSJeff Roberson * than that of the current process. 728b43179fbSJeff Roberson */ 729b43179fbSJeff Roberson static void 7308460a577SJohn Birrell #ifdef KSE 731b43179fbSJeff Roberson resetpriority(struct ksegrp *kg) 7328460a577SJohn Birrell #else 7338460a577SJohn Birrell resetpriority(struct thread *td) 7348460a577SJohn Birrell #endif 735b43179fbSJeff Roberson { 736b43179fbSJeff Roberson register unsigned int newpriority; 737b43179fbSJeff Roberson 7388460a577SJohn Birrell #ifdef KSE 739b43179fbSJeff Roberson if (kg->kg_pri_class == PRI_TIMESHARE) { 740b43179fbSJeff Roberson newpriority = PUSER + kg->kg_estcpu / INVERSE_ESTCPU_WEIGHT + 741fa885116SJulian Elischer NICE_WEIGHT * (kg->kg_proc->p_nice - PRIO_MIN); 7428460a577SJohn Birrell #else 7438460a577SJohn Birrell if (td->td_pri_class == PRI_TIMESHARE) { 7448460a577SJohn Birrell newpriority = PUSER + td->td_estcpu / INVERSE_ESTCPU_WEIGHT + 7458460a577SJohn Birrell NICE_WEIGHT * (td->td_proc->p_nice - PRIO_MIN); 7468460a577SJohn Birrell #endif 747b43179fbSJeff Roberson newpriority = min(max(newpriority, PRI_MIN_TIMESHARE), 748b43179fbSJeff Roberson PRI_MAX_TIMESHARE); 7498460a577SJohn Birrell #ifdef KSE 7503db720fdSDavid Xu sched_user_prio(kg, newpriority); 7518460a577SJohn Birrell #else 7528460a577SJohn Birrell sched_user_prio(td, newpriority); 7538460a577SJohn Birrell #endif 754b43179fbSJeff Roberson } 755b43179fbSJeff Roberson } 756f5c157d9SJohn Baldwin 757f5c157d9SJohn Baldwin /* 758f5c157d9SJohn Baldwin * Update the thread's priority when the associated ksegroup's user 759f5c157d9SJohn Baldwin * priority changes. 760f5c157d9SJohn Baldwin */ 761f5c157d9SJohn Baldwin static void 7628460a577SJohn Birrell #ifdef KSE 763f5c157d9SJohn Baldwin resetpriority_thread(struct thread *td, struct ksegrp *kg) 7648460a577SJohn Birrell #else 7658460a577SJohn Birrell resetpriority_thread(struct thread *td) 7668460a577SJohn Birrell #endif 767f5c157d9SJohn Baldwin { 768f5c157d9SJohn Baldwin 769f5c157d9SJohn Baldwin /* Only change threads with a time sharing user priority. */ 770f5c157d9SJohn Baldwin if (td->td_priority < PRI_MIN_TIMESHARE || 771f5c157d9SJohn Baldwin td->td_priority > PRI_MAX_TIMESHARE) 772f5c157d9SJohn Baldwin return; 773f5c157d9SJohn Baldwin 774f5c157d9SJohn Baldwin /* XXX the whole needresched thing is broken, but not silly. */ 775f5c157d9SJohn Baldwin maybe_resched(td); 776f5c157d9SJohn Baldwin 7778460a577SJohn Birrell #ifdef KSE 778f5c157d9SJohn Baldwin sched_prio(td, kg->kg_user_pri); 7798460a577SJohn Birrell #else 7808460a577SJohn Birrell sched_prio(td, td->td_user_pri); 7818460a577SJohn Birrell #endif 782b43179fbSJeff Roberson } 783b43179fbSJeff Roberson 784b43179fbSJeff Roberson /* ARGSUSED */ 785b43179fbSJeff Roberson static void 786b43179fbSJeff Roberson sched_setup(void *dummy) 787b43179fbSJeff Roberson { 788e17c57b1SJeff Roberson setup_runqs(); 78970fca427SJohn Baldwin 790b43179fbSJeff Roberson if (sched_quantum == 0) 791b43179fbSJeff Roberson sched_quantum = SCHED_QUANTUM; 792b43179fbSJeff Roberson hogticks = 2 * sched_quantum; 793b43179fbSJeff Roberson 7948cbec0c8SRobert Watson callout_init(&roundrobin_callout, CALLOUT_MPSAFE); 795b43179fbSJeff Roberson 796b43179fbSJeff Roberson /* Kick off timeout driven events by calling first time. */ 797b43179fbSJeff Roberson roundrobin(NULL); 798ca59f152SJeff Roberson 799ca59f152SJeff Roberson /* Account for thread0. */ 800907bdbc2SJeff Roberson sched_load_add(); 801b43179fbSJeff Roberson } 802b43179fbSJeff Roberson 803b43179fbSJeff Roberson /* External interfaces start here */ 804ed062c8dSJulian Elischer /* 805ed062c8dSJulian Elischer * Very early in the boot some setup of scheduler-specific 806f3050486SMaxim Konovalov * parts of proc0 and of some scheduler resources needs to be done. 807ed062c8dSJulian Elischer * Called from: 808ed062c8dSJulian Elischer * proc0_init() 809ed062c8dSJulian Elischer */ 810ed062c8dSJulian Elischer void 811ed062c8dSJulian Elischer schedinit(void) 812ed062c8dSJulian Elischer { 813ed062c8dSJulian Elischer /* 814ed062c8dSJulian Elischer * Set up the scheduler specific parts of proc0. 815ed062c8dSJulian Elischer */ 816ed062c8dSJulian Elischer proc0.p_sched = NULL; /* XXX */ 8178460a577SJohn Birrell #ifdef KSE 818ed062c8dSJulian Elischer ksegrp0.kg_sched = &kg_sched0; 8198460a577SJohn Birrell #endif 820ed062c8dSJulian Elischer thread0.td_sched = &kse0; 821ed062c8dSJulian Elischer kse0.ke_thread = &thread0; 822ed062c8dSJulian Elischer kse0.ke_state = KES_THREAD; 8238460a577SJohn Birrell #ifdef KSE 824ed062c8dSJulian Elischer kg_sched0.skg_concurrency = 1; 825ed062c8dSJulian Elischer kg_sched0.skg_avail_opennings = 0; /* we are already running */ 8268460a577SJohn Birrell #endif 827ed062c8dSJulian Elischer } 828ed062c8dSJulian Elischer 829b43179fbSJeff Roberson int 830b43179fbSJeff Roberson sched_runnable(void) 831b43179fbSJeff Roberson { 832e17c57b1SJeff Roberson #ifdef SMP 833e17c57b1SJeff Roberson return runq_check(&runq) + runq_check(&runq_pcpu[PCPU_GET(cpuid)]); 834e17c57b1SJeff Roberson #else 835b43179fbSJeff Roberson return runq_check(&runq); 836e17c57b1SJeff Roberson #endif 837b43179fbSJeff Roberson } 838b43179fbSJeff Roberson 839b43179fbSJeff Roberson int 840b43179fbSJeff Roberson sched_rr_interval(void) 841b43179fbSJeff Roberson { 842b43179fbSJeff Roberson if (sched_quantum == 0) 843b43179fbSJeff Roberson sched_quantum = SCHED_QUANTUM; 844b43179fbSJeff Roberson return (sched_quantum); 845b43179fbSJeff Roberson } 846b43179fbSJeff Roberson 847b43179fbSJeff Roberson /* 848b43179fbSJeff Roberson * We adjust the priority of the current process. The priority of 849b43179fbSJeff Roberson * a process gets worse as it accumulates CPU time. The cpu usage 8508460a577SJohn Birrell * ifdef KSE 85170fca427SJohn Baldwin * estimator (kg_estcpu) is increased here. resetpriority() will 85270fca427SJohn Baldwin * compute a different priority each time kg_estcpu increases by 8538460a577SJohn Birrell * else 8548460a577SJohn Birrell * estimator (td_estcpu) is increased here. resetpriority() will 8558460a577SJohn Birrell * compute a different priority each time td_estcpu increases by 8568460a577SJohn Birrell * endif 857b43179fbSJeff Roberson * INVERSE_ESTCPU_WEIGHT 858b43179fbSJeff Roberson * (until MAXPRI is reached). The cpu usage estimator ramps up 859b43179fbSJeff Roberson * quite quickly when the process is running (linearly), and decays 860b43179fbSJeff Roberson * away exponentially, at a rate which is proportionally slower when 861b43179fbSJeff Roberson * the system is busy. The basic principle is that the system will 862b43179fbSJeff Roberson * 90% forget that the process used a lot of CPU time in 5 * loadav 863b43179fbSJeff Roberson * seconds. This causes the system to favor processes which haven't 864b43179fbSJeff Roberson * run much recently, and to round-robin among other processes. 865b43179fbSJeff Roberson */ 866b43179fbSJeff Roberson void 8677cf90fb3SJeff Roberson sched_clock(struct thread *td) 868b43179fbSJeff Roberson { 8698460a577SJohn Birrell #ifdef KSE 870b43179fbSJeff Roberson struct ksegrp *kg; 8718460a577SJohn Birrell #endif 8727cf90fb3SJeff Roberson struct kse *ke; 873b43179fbSJeff Roberson 8742056d0a1SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 8758460a577SJohn Birrell #ifdef KSE 8767cf90fb3SJeff Roberson kg = td->td_ksegrp; 8778460a577SJohn Birrell #endif 8787cf90fb3SJeff Roberson ke = td->td_kse; 879f7f9e7f3SJeff Roberson 880ad59c36bSJulian Elischer ke->ke_cpticks++; 8818460a577SJohn Birrell #ifdef KSE 882b43179fbSJeff Roberson kg->kg_estcpu = ESTCPULIM(kg->kg_estcpu + 1); 883b43179fbSJeff Roberson if ((kg->kg_estcpu % INVERSE_ESTCPU_WEIGHT) == 0) { 884b43179fbSJeff Roberson resetpriority(kg); 885f5c157d9SJohn Baldwin resetpriority_thread(td, kg); 8868460a577SJohn Birrell #else 8878460a577SJohn Birrell td->td_estcpu = ESTCPULIM(td->td_estcpu + 1); 8888460a577SJohn Birrell if ((td->td_estcpu % INVERSE_ESTCPU_WEIGHT) == 0) { 8898460a577SJohn Birrell resetpriority(td); 8908460a577SJohn Birrell resetpriority_thread(td); 8918460a577SJohn Birrell #endif 892b43179fbSJeff Roberson } 893b43179fbSJeff Roberson } 89470fca427SJohn Baldwin 8958460a577SJohn Birrell #ifdef KSE 896b43179fbSJeff Roberson /* 897b43179fbSJeff Roberson * charge childs scheduling cpu usage to parent. 898b43179fbSJeff Roberson * 899b43179fbSJeff Roberson * XXXKSE assume only one thread & kse & ksegrp keep estcpu in each ksegrp. 900b43179fbSJeff Roberson * Charge it to the ksegrp that did the wait since process estcpu is sum of 901b43179fbSJeff Roberson * all ksegrps, this is strictly as expected. Assume that the child process 902b43179fbSJeff Roberson * aggregated all the estcpu into the 'built-in' ksegrp. 903b43179fbSJeff Roberson */ 9048460a577SJohn Birrell #else 9058460a577SJohn Birrell /* 9068460a577SJohn Birrell * charge childs scheduling cpu usage to parent. 9078460a577SJohn Birrell */ 9088460a577SJohn Birrell #endif 909b43179fbSJeff Roberson void 91055d44f79SJulian Elischer sched_exit(struct proc *p, struct thread *td) 911f7f9e7f3SJeff Roberson { 9128460a577SJohn Birrell #ifdef KSE 91355d44f79SJulian Elischer sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), td); 91455d44f79SJulian Elischer sched_exit_thread(FIRST_THREAD_IN_PROC(p), td); 9158460a577SJohn Birrell #else 9168460a577SJohn Birrell struct thread *parent = FIRST_THREAD_IN_PROC(p); 9178460a577SJohn Birrell 9188460a577SJohn Birrell CTR3(KTR_SCHED, "sched_exit: %p(%s) prio %d", 9198460a577SJohn Birrell td, td->td_proc->p_comm, td->td_priority); 9208460a577SJohn Birrell 9218460a577SJohn Birrell parent->td_estcpu = ESTCPULIM(parent->td_estcpu + td->td_estcpu); 9228460a577SJohn Birrell if ((td->td_proc->p_flag & P_NOLOAD) == 0) 9238460a577SJohn Birrell sched_load_rem(); 9248460a577SJohn Birrell #endif 925f7f9e7f3SJeff Roberson } 926f7f9e7f3SJeff Roberson 9278460a577SJohn Birrell #ifdef KSE 928f7f9e7f3SJeff Roberson void 92955d44f79SJulian Elischer sched_exit_ksegrp(struct ksegrp *kg, struct thread *childtd) 930b43179fbSJeff Roberson { 9312056d0a1SJohn Baldwin 9322056d0a1SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 93355d44f79SJulian Elischer kg->kg_estcpu = ESTCPULIM(kg->kg_estcpu + childtd->td_ksegrp->kg_estcpu); 934b43179fbSJeff Roberson } 935b43179fbSJeff Roberson 936b43179fbSJeff Roberson void 937f7f9e7f3SJeff Roberson sched_exit_thread(struct thread *td, struct thread *child) 938b43179fbSJeff Roberson { 939907bdbc2SJeff Roberson CTR3(KTR_SCHED, "sched_exit_thread: %p(%s) prio %d", 940907bdbc2SJeff Roberson child, child->td_proc->p_comm, child->td_priority); 9417d5ea13fSDoug Rabson if ((child->td_proc->p_flag & P_NOLOAD) == 0) 942907bdbc2SJeff Roberson sched_load_rem(); 943f7f9e7f3SJeff Roberson } 9448460a577SJohn Birrell #endif 945bcb06d59SJeff Roberson 946f7f9e7f3SJeff Roberson void 947ed062c8dSJulian Elischer sched_fork(struct thread *td, struct thread *childtd) 948f7f9e7f3SJeff Roberson { 9498460a577SJohn Birrell #ifdef KSE 950ed062c8dSJulian Elischer sched_fork_ksegrp(td, childtd->td_ksegrp); 951ed062c8dSJulian Elischer sched_fork_thread(td, childtd); 9528460a577SJohn Birrell #else 9538460a577SJohn Birrell childtd->td_estcpu = td->td_estcpu; 9548460a577SJohn Birrell sched_newthread(childtd); 9558460a577SJohn Birrell #endif 956f7f9e7f3SJeff Roberson } 957f7f9e7f3SJeff Roberson 9588460a577SJohn Birrell #ifdef KSE 959f7f9e7f3SJeff Roberson void 96055d44f79SJulian Elischer sched_fork_ksegrp(struct thread *td, struct ksegrp *child) 961f7f9e7f3SJeff Roberson { 9622056d0a1SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 96355d44f79SJulian Elischer child->kg_estcpu = td->td_ksegrp->kg_estcpu; 964f7f9e7f3SJeff Roberson } 965bcb06d59SJeff Roberson 966f7f9e7f3SJeff Roberson void 967ed062c8dSJulian Elischer sched_fork_thread(struct thread *td, struct thread *childtd) 968f7f9e7f3SJeff Roberson { 969ed062c8dSJulian Elischer sched_newthread(childtd); 970b43179fbSJeff Roberson } 9718460a577SJohn Birrell #endif 972b43179fbSJeff Roberson 973b43179fbSJeff Roberson void 974fa885116SJulian Elischer sched_nice(struct proc *p, int nice) 975b43179fbSJeff Roberson { 9768460a577SJohn Birrell #ifdef KSE 977fa885116SJulian Elischer struct ksegrp *kg; 9788460a577SJohn Birrell #endif 979f5c157d9SJohn Baldwin struct thread *td; 9800b5318c8SJohn Baldwin 981fa885116SJulian Elischer PROC_LOCK_ASSERT(p, MA_OWNED); 9820b5318c8SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 983fa885116SJulian Elischer p->p_nice = nice; 9848460a577SJohn Birrell #ifdef KSE 985fa885116SJulian Elischer FOREACH_KSEGRP_IN_PROC(p, kg) { 986b43179fbSJeff Roberson resetpriority(kg); 987f5c157d9SJohn Baldwin FOREACH_THREAD_IN_GROUP(kg, td) { 988f5c157d9SJohn Baldwin resetpriority_thread(td, kg); 989f5c157d9SJohn Baldwin } 990b43179fbSJeff Roberson } 9918460a577SJohn Birrell #else 9928460a577SJohn Birrell FOREACH_THREAD_IN_PROC(p, td) { 9938460a577SJohn Birrell resetpriority(td); 9948460a577SJohn Birrell resetpriority_thread(td); 9958460a577SJohn Birrell } 9968460a577SJohn Birrell #endif 997fa885116SJulian Elischer } 998b43179fbSJeff Roberson 999f7f9e7f3SJeff Roberson void 10008460a577SJohn Birrell #ifdef KSE 1001f7f9e7f3SJeff Roberson sched_class(struct ksegrp *kg, int class) 10028460a577SJohn Birrell #else 10038460a577SJohn Birrell sched_class(struct thread *td, int class) 10048460a577SJohn Birrell #endif 1005f7f9e7f3SJeff Roberson { 10062056d0a1SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 10078460a577SJohn Birrell #ifdef KSE 1008f7f9e7f3SJeff Roberson kg->kg_pri_class = class; 10098460a577SJohn Birrell #else 10108460a577SJohn Birrell td->td_pri_class = class; 10118460a577SJohn Birrell #endif 1012f7f9e7f3SJeff Roberson } 1013f7f9e7f3SJeff Roberson 10148460a577SJohn Birrell #ifdef KSE 10151f955e2dSJulian Elischer /* 10161f955e2dSJulian Elischer * Adjust the priority of a thread. 10171f955e2dSJulian Elischer * This may include moving the thread within the KSEGRP, 10181f955e2dSJulian Elischer * changing the assignment of a kse to the thread, 10191f955e2dSJulian Elischer * and moving a KSE in the system run queue. 10201f955e2dSJulian Elischer */ 10218460a577SJohn Birrell #else 10228460a577SJohn Birrell /* 10238460a577SJohn Birrell * Adjust the priority of a thread. 10248460a577SJohn Birrell */ 10258460a577SJohn Birrell #endif 1026f5c157d9SJohn Baldwin static void 1027f5c157d9SJohn Baldwin sched_priority(struct thread *td, u_char prio) 1028b43179fbSJeff Roberson { 1029907bdbc2SJeff Roberson CTR6(KTR_SCHED, "sched_prio: %p(%s) prio %d newprio %d by %p(%s)", 1030907bdbc2SJeff Roberson td, td->td_proc->p_comm, td->td_priority, prio, curthread, 1031907bdbc2SJeff Roberson curthread->td_proc->p_comm); 1032b43179fbSJeff Roberson 10332056d0a1SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 1034f5c157d9SJohn Baldwin if (td->td_priority == prio) 1035f5c157d9SJohn Baldwin return; 1036b43179fbSJeff Roberson if (TD_ON_RUNQ(td)) { 10371f955e2dSJulian Elischer adjustrunqueue(td, prio); 10381f955e2dSJulian Elischer } else { 10391f955e2dSJulian Elischer td->td_priority = prio; 1040b43179fbSJeff Roberson } 1041b43179fbSJeff Roberson } 1042b43179fbSJeff Roberson 1043f5c157d9SJohn Baldwin /* 1044f5c157d9SJohn Baldwin * Update a thread's priority when it is lent another thread's 1045f5c157d9SJohn Baldwin * priority. 1046f5c157d9SJohn Baldwin */ 1047f5c157d9SJohn Baldwin void 1048f5c157d9SJohn Baldwin sched_lend_prio(struct thread *td, u_char prio) 1049f5c157d9SJohn Baldwin { 1050f5c157d9SJohn Baldwin 1051f5c157d9SJohn Baldwin td->td_flags |= TDF_BORROWING; 1052f5c157d9SJohn Baldwin sched_priority(td, prio); 1053f5c157d9SJohn Baldwin } 1054f5c157d9SJohn Baldwin 1055f5c157d9SJohn Baldwin /* 1056f5c157d9SJohn Baldwin * Restore a thread's priority when priority propagation is 1057f5c157d9SJohn Baldwin * over. The prio argument is the minimum priority the thread 1058f5c157d9SJohn Baldwin * needs to have to satisfy other possible priority lending 1059f5c157d9SJohn Baldwin * requests. If the thread's regulary priority is less 1060f5c157d9SJohn Baldwin * important than prio the thread will keep a priority boost 1061f5c157d9SJohn Baldwin * of prio. 1062f5c157d9SJohn Baldwin */ 1063f5c157d9SJohn Baldwin void 1064f5c157d9SJohn Baldwin sched_unlend_prio(struct thread *td, u_char prio) 1065f5c157d9SJohn Baldwin { 1066f5c157d9SJohn Baldwin u_char base_pri; 1067f5c157d9SJohn Baldwin 1068f5c157d9SJohn Baldwin if (td->td_base_pri >= PRI_MIN_TIMESHARE && 1069f5c157d9SJohn Baldwin td->td_base_pri <= PRI_MAX_TIMESHARE) 10708460a577SJohn Birrell #ifdef KSE 1071f5c157d9SJohn Baldwin base_pri = td->td_ksegrp->kg_user_pri; 10728460a577SJohn Birrell #else 10738460a577SJohn Birrell base_pri = td->td_user_pri; 10748460a577SJohn Birrell #endif 1075f5c157d9SJohn Baldwin else 1076f5c157d9SJohn Baldwin base_pri = td->td_base_pri; 1077f5c157d9SJohn Baldwin if (prio >= base_pri) { 1078f5c157d9SJohn Baldwin td->td_flags &= ~TDF_BORROWING; 1079f5c157d9SJohn Baldwin sched_prio(td, base_pri); 1080f5c157d9SJohn Baldwin } else 1081f5c157d9SJohn Baldwin sched_lend_prio(td, prio); 1082f5c157d9SJohn Baldwin } 1083f5c157d9SJohn Baldwin 1084f5c157d9SJohn Baldwin void 1085f5c157d9SJohn Baldwin sched_prio(struct thread *td, u_char prio) 1086f5c157d9SJohn Baldwin { 1087f5c157d9SJohn Baldwin u_char oldprio; 1088f5c157d9SJohn Baldwin 1089f5c157d9SJohn Baldwin /* First, update the base priority. */ 1090f5c157d9SJohn Baldwin td->td_base_pri = prio; 1091f5c157d9SJohn Baldwin 1092f5c157d9SJohn Baldwin /* 1093f5c157d9SJohn Baldwin * If the thread is borrowing another thread's priority, don't ever 1094f5c157d9SJohn Baldwin * lower the priority. 1095f5c157d9SJohn Baldwin */ 1096f5c157d9SJohn Baldwin if (td->td_flags & TDF_BORROWING && td->td_priority < prio) 1097f5c157d9SJohn Baldwin return; 1098f5c157d9SJohn Baldwin 1099f5c157d9SJohn Baldwin /* Change the real priority. */ 1100f5c157d9SJohn Baldwin oldprio = td->td_priority; 1101f5c157d9SJohn Baldwin sched_priority(td, prio); 1102f5c157d9SJohn Baldwin 1103f5c157d9SJohn Baldwin /* 1104f5c157d9SJohn Baldwin * If the thread is on a turnstile, then let the turnstile update 1105f5c157d9SJohn Baldwin * its state. 1106f5c157d9SJohn Baldwin */ 1107f5c157d9SJohn Baldwin if (TD_ON_LOCK(td) && oldprio != prio) 1108f5c157d9SJohn Baldwin turnstile_adjust(td, oldprio); 1109f5c157d9SJohn Baldwin } 1110f5c157d9SJohn Baldwin 1111b43179fbSJeff Roberson void 11128460a577SJohn Birrell #ifdef KSE 11133db720fdSDavid Xu sched_user_prio(struct ksegrp *kg, u_char prio) 11148460a577SJohn Birrell #else 11158460a577SJohn Birrell sched_user_prio(struct thread *td, u_char prio) 11168460a577SJohn Birrell #endif 11173db720fdSDavid Xu { 11188460a577SJohn Birrell #ifdef KSE 11193db720fdSDavid Xu struct thread *td; 11208460a577SJohn Birrell #endif 11213db720fdSDavid Xu u_char oldprio; 11223db720fdSDavid Xu 11238460a577SJohn Birrell #ifdef KSE 11243db720fdSDavid Xu kg->kg_base_user_pri = prio; 11253db720fdSDavid Xu 11263db720fdSDavid Xu /* XXXKSE only for 1:1 */ 11273db720fdSDavid Xu 11283db720fdSDavid Xu td = TAILQ_FIRST(&kg->kg_threads); 11293db720fdSDavid Xu if (td == NULL) { 11303db720fdSDavid Xu kg->kg_user_pri = prio; 11313db720fdSDavid Xu return; 11323db720fdSDavid Xu } 11333db720fdSDavid Xu 11343db720fdSDavid Xu if (td->td_flags & TDF_UBORROWING && kg->kg_user_pri <= prio) 11353db720fdSDavid Xu return; 11363db720fdSDavid Xu 11373db720fdSDavid Xu oldprio = kg->kg_user_pri; 11383db720fdSDavid Xu kg->kg_user_pri = prio; 11398460a577SJohn Birrell #else 11408460a577SJohn Birrell td->td_base_user_pri = prio; 11415a215147SDavid Xu if (td->td_flags & TDF_UBORROWING && td->td_user_pri <= prio) 11425a215147SDavid Xu return; 11438460a577SJohn Birrell oldprio = td->td_user_pri; 11448460a577SJohn Birrell td->td_user_pri = prio; 11458460a577SJohn Birrell #endif 11463db720fdSDavid Xu 11473db720fdSDavid Xu if (TD_ON_UPILOCK(td) && oldprio != prio) 11483db720fdSDavid Xu umtx_pi_adjust(td, oldprio); 11493db720fdSDavid Xu } 11503db720fdSDavid Xu 11513db720fdSDavid Xu void 11523db720fdSDavid Xu sched_lend_user_prio(struct thread *td, u_char prio) 11533db720fdSDavid Xu { 11543db720fdSDavid Xu u_char oldprio; 11553db720fdSDavid Xu 11563db720fdSDavid Xu td->td_flags |= TDF_UBORROWING; 11573db720fdSDavid Xu 11588460a577SJohn Birrell #ifdef KSE 11593db720fdSDavid Xu oldprio = td->td_ksegrp->kg_user_pri; 11603db720fdSDavid Xu td->td_ksegrp->kg_user_pri = prio; 11618460a577SJohn Birrell #else 11628460a577SJohn Birrell oldprio = td->td_user_pri; 11638460a577SJohn Birrell td->td_user_pri = prio; 11648460a577SJohn Birrell #endif 11653db720fdSDavid Xu 11663db720fdSDavid Xu if (TD_ON_UPILOCK(td) && oldprio != prio) 11673db720fdSDavid Xu umtx_pi_adjust(td, oldprio); 11683db720fdSDavid Xu } 11693db720fdSDavid Xu 11703db720fdSDavid Xu void 11713db720fdSDavid Xu sched_unlend_user_prio(struct thread *td, u_char prio) 11723db720fdSDavid Xu { 11738460a577SJohn Birrell #ifdef KSE 11743db720fdSDavid Xu struct ksegrp *kg = td->td_ksegrp; 11758460a577SJohn Birrell #endif 11763db720fdSDavid Xu u_char base_pri; 11773db720fdSDavid Xu 11788460a577SJohn Birrell #ifdef KSE 11793db720fdSDavid Xu base_pri = kg->kg_base_user_pri; 11808460a577SJohn Birrell #else 11818460a577SJohn Birrell base_pri = td->td_base_user_pri; 11828460a577SJohn Birrell #endif 11833db720fdSDavid Xu if (prio >= base_pri) { 11843db720fdSDavid Xu td->td_flags &= ~TDF_UBORROWING; 11858460a577SJohn Birrell #ifdef KSE 11863db720fdSDavid Xu sched_user_prio(kg, base_pri); 11878460a577SJohn Birrell #else 11888460a577SJohn Birrell sched_user_prio(td, base_pri); 11898460a577SJohn Birrell #endif 11903db720fdSDavid Xu } else 11913db720fdSDavid Xu sched_lend_user_prio(td, prio); 11923db720fdSDavid Xu } 11933db720fdSDavid Xu 11943db720fdSDavid Xu void 119544f3b092SJohn Baldwin sched_sleep(struct thread *td) 1196b43179fbSJeff Roberson { 11972056d0a1SJohn Baldwin 11982056d0a1SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 11998460a577SJohn Birrell #ifdef KSE 1200b43179fbSJeff Roberson td->td_ksegrp->kg_slptime = 0; 12018460a577SJohn Birrell #else 12028460a577SJohn Birrell td->td_slptime = 0; 12038460a577SJohn Birrell #endif 1204b43179fbSJeff Roberson } 1205b43179fbSJeff Roberson 12068460a577SJohn Birrell #ifdef KSE 12073389af30SJulian Elischer static void remrunqueue(struct thread *td); 12088460a577SJohn Birrell #endif 12093389af30SJulian Elischer 1210b43179fbSJeff Roberson void 12113389af30SJulian Elischer sched_switch(struct thread *td, struct thread *newtd, int flags) 1212b43179fbSJeff Roberson { 1213b43179fbSJeff Roberson struct kse *ke; 12148460a577SJohn Birrell #ifdef KSE 12153389af30SJulian Elischer struct ksegrp *kg; 12168460a577SJohn Birrell #endif 1217b43179fbSJeff Roberson struct proc *p; 1218b43179fbSJeff Roberson 1219b43179fbSJeff Roberson ke = td->td_kse; 1220b43179fbSJeff Roberson p = td->td_proc; 1221b43179fbSJeff Roberson 12222056d0a1SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 1223b43179fbSJeff Roberson 1224f2f51f8aSJeff Roberson if ((p->p_flag & P_NOLOAD) == 0) 1225907bdbc2SJeff Roberson sched_load_rem(); 12268460a577SJohn Birrell #ifdef KSE 12273389af30SJulian Elischer /* 12283389af30SJulian Elischer * We are volunteering to switch out so we get to nominate 12293389af30SJulian Elischer * a successor for the rest of our quantum 12303389af30SJulian Elischer * First try another thread in our ksegrp, and then look for 12313389af30SJulian Elischer * other ksegrps in our process. 12323389af30SJulian Elischer */ 12333389af30SJulian Elischer if (sched_followon && 12343389af30SJulian Elischer (p->p_flag & P_HADTHREADS) && 12353389af30SJulian Elischer (flags & SW_VOL) && 12363389af30SJulian Elischer newtd == NULL) { 12373389af30SJulian Elischer /* lets schedule another thread from this process */ 12383389af30SJulian Elischer kg = td->td_ksegrp; 12393389af30SJulian Elischer if ((newtd = TAILQ_FIRST(&kg->kg_runq))) { 12403389af30SJulian Elischer remrunqueue(newtd); 12413389af30SJulian Elischer sched_kgfollowons++; 12423389af30SJulian Elischer } else { 12433389af30SJulian Elischer FOREACH_KSEGRP_IN_PROC(p, kg) { 12443389af30SJulian Elischer if ((newtd = TAILQ_FIRST(&kg->kg_runq))) { 12453389af30SJulian Elischer sched_pfollowons++; 12463389af30SJulian Elischer remrunqueue(newtd); 12473389af30SJulian Elischer break; 12483389af30SJulian Elischer } 12493389af30SJulian Elischer } 12503389af30SJulian Elischer } 12513389af30SJulian Elischer } 12528460a577SJohn Birrell #endif 12533389af30SJulian Elischer 125456564741SStephan Uphoff if (newtd) 125556564741SStephan Uphoff newtd->td_flags |= (td->td_flags & TDF_NEEDRESCHED); 125656564741SStephan Uphoff 1257060563ecSJulian Elischer td->td_lastcpu = td->td_oncpu; 125852eb8464SJohn Baldwin td->td_flags &= ~TDF_NEEDRESCHED; 125977918643SStephan Uphoff td->td_owepreempt = 0; 1260ca59f152SJeff Roberson td->td_oncpu = NOCPU; 1261b43179fbSJeff Roberson /* 1262b43179fbSJeff Roberson * At the last moment, if this thread is still marked RUNNING, 1263b43179fbSJeff Roberson * then put it back on the run queue as it has not been suspended 1264bf0acc27SJohn Baldwin * or stopped or any thing else similar. We never put the idle 1265bf0acc27SJohn Baldwin * threads on the run queue, however. 1266b43179fbSJeff Roberson */ 1267bf0acc27SJohn Baldwin if (td == PCPU_GET(idlethread)) 1268bf0acc27SJohn Baldwin TD_SET_CAN_RUN(td); 1269ed062c8dSJulian Elischer else { 12708460a577SJohn Birrell #ifdef KSE 1271d39063f2SJulian Elischer SLOT_RELEASE(td->td_ksegrp); 12728460a577SJohn Birrell #endif 1273ed062c8dSJulian Elischer if (TD_IS_RUNNING(td)) { 1274b43179fbSJeff Roberson /* Put us back on the run queue (kse and all). */ 1275c20c691bSJulian Elischer setrunqueue(td, (flags & SW_PREEMPT) ? 1276c20c691bSJulian Elischer SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED : 1277c20c691bSJulian Elischer SRQ_OURSELF|SRQ_YIELDING); 12788460a577SJohn Birrell #ifdef KSE 1279ed062c8dSJulian Elischer } else if (p->p_flag & P_HADTHREADS) { 1280b43179fbSJeff Roberson /* 1281b43179fbSJeff Roberson * We will not be on the run queue. So we must be 1282b43179fbSJeff Roberson * sleeping or similar. As it's available, 1283b43179fbSJeff Roberson * someone else can use the KSE if they need it. 1284c20c691bSJulian Elischer * It's NOT available if we are about to need it 1285b43179fbSJeff Roberson */ 1286c20c691bSJulian Elischer if (newtd == NULL || newtd->td_ksegrp != td->td_ksegrp) 1287ed062c8dSJulian Elischer slot_fill(td->td_ksegrp); 12888460a577SJohn Birrell #endif 1289ed062c8dSJulian Elischer } 1290b43179fbSJeff Roberson } 1291c20c691bSJulian Elischer if (newtd) { 1292c20c691bSJulian Elischer /* 1293c20c691bSJulian Elischer * The thread we are about to run needs to be counted 1294c20c691bSJulian Elischer * as if it had been added to the run queue and selected. 1295c20c691bSJulian Elischer * It came from: 1296c20c691bSJulian Elischer * * A preemption 12978460a577SJohn Birrell * ifdef KSE 1298c20c691bSJulian Elischer * * An upcall 12998460a577SJohn Birrell * endif 1300c20c691bSJulian Elischer * * A followon 1301c20c691bSJulian Elischer */ 1302c20c691bSJulian Elischer KASSERT((newtd->td_inhibitors == 0), 1303c20c691bSJulian Elischer ("trying to run inhibitted thread")); 13048460a577SJohn Birrell #ifdef KSE 1305c20c691bSJulian Elischer SLOT_USE(newtd->td_ksegrp); 13068460a577SJohn Birrell #endif 1307c20c691bSJulian Elischer newtd->td_kse->ke_flags |= KEF_DIDRUN; 1308c20c691bSJulian Elischer TD_SET_RUNNING(newtd); 1309c20c691bSJulian Elischer if ((newtd->td_proc->p_flag & P_NOLOAD) == 0) 1310907bdbc2SJeff Roberson sched_load_add(); 1311c20c691bSJulian Elischer } else { 1312ae53b483SJeff Roberson newtd = choosethread(); 1313c20c691bSJulian Elischer } 1314c20c691bSJulian Elischer 1315ebccf1e3SJoseph Koshy if (td != newtd) { 1316ebccf1e3SJoseph Koshy #ifdef HWPMC_HOOKS 1317ebccf1e3SJoseph Koshy if (PMC_PROC_IS_USING_PMCS(td->td_proc)) 1318ebccf1e3SJoseph Koshy PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT); 1319ebccf1e3SJoseph Koshy #endif 13208460a577SJohn Birrell 1321ae53b483SJeff Roberson cpu_switch(td, newtd); 1322ebccf1e3SJoseph Koshy #ifdef HWPMC_HOOKS 1323ebccf1e3SJoseph Koshy if (PMC_PROC_IS_USING_PMCS(td->td_proc)) 1324ebccf1e3SJoseph Koshy PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN); 1325ebccf1e3SJoseph Koshy #endif 1326ebccf1e3SJoseph Koshy } 1327ebccf1e3SJoseph Koshy 1328ae53b483SJeff Roberson sched_lock.mtx_lock = (uintptr_t)td; 1329ae53b483SJeff Roberson td->td_oncpu = PCPU_GET(cpuid); 1330b43179fbSJeff Roberson } 1331b43179fbSJeff Roberson 1332b43179fbSJeff Roberson void 1333b43179fbSJeff Roberson sched_wakeup(struct thread *td) 1334b43179fbSJeff Roberson { 13358460a577SJohn Birrell #ifdef KSE 1336b43179fbSJeff Roberson struct ksegrp *kg; 13378460a577SJohn Birrell #endif 1338b43179fbSJeff Roberson 13392056d0a1SJohn Baldwin mtx_assert(&sched_lock, MA_OWNED); 13408460a577SJohn Birrell #ifdef KSE 1341b43179fbSJeff Roberson kg = td->td_ksegrp; 1342f5c157d9SJohn Baldwin if (kg->kg_slptime > 1) { 1343b43179fbSJeff Roberson updatepri(kg); 1344f5c157d9SJohn Baldwin resetpriority(kg); 1345f5c157d9SJohn Baldwin } 1346b43179fbSJeff Roberson kg->kg_slptime = 0; 13478460a577SJohn Birrell #else 13488460a577SJohn Birrell if (td->td_slptime > 1) { 13498460a577SJohn Birrell updatepri(td); 13508460a577SJohn Birrell resetpriority(td); 13518460a577SJohn Birrell } 13528460a577SJohn Birrell td->td_slptime = 0; 13538460a577SJohn Birrell #endif 13542630e4c9SJulian Elischer setrunqueue(td, SRQ_BORING); 1355b43179fbSJeff Roberson } 1356b43179fbSJeff Roberson 135737c28a02SJulian Elischer #ifdef SMP 135882a1dfc1SJulian Elischer /* enable HTT_2 if you have a 2-way HTT cpu.*/ 135982a1dfc1SJulian Elischer static int 136082a1dfc1SJulian Elischer forward_wakeup(int cpunum) 136182a1dfc1SJulian Elischer { 136282a1dfc1SJulian Elischer cpumask_t map, me, dontuse; 136382a1dfc1SJulian Elischer cpumask_t map2; 136482a1dfc1SJulian Elischer struct pcpu *pc; 136582a1dfc1SJulian Elischer cpumask_t id, map3; 136682a1dfc1SJulian Elischer 136782a1dfc1SJulian Elischer mtx_assert(&sched_lock, MA_OWNED); 136882a1dfc1SJulian Elischer 1369ed062c8dSJulian Elischer CTR0(KTR_RUNQ, "forward_wakeup()"); 137082a1dfc1SJulian Elischer 137182a1dfc1SJulian Elischer if ((!forward_wakeup_enabled) || 137282a1dfc1SJulian Elischer (forward_wakeup_use_mask == 0 && forward_wakeup_use_loop == 0)) 137382a1dfc1SJulian Elischer return (0); 137482a1dfc1SJulian Elischer if (!smp_started || cold || panicstr) 137582a1dfc1SJulian Elischer return (0); 137682a1dfc1SJulian Elischer 137782a1dfc1SJulian Elischer forward_wakeups_requested++; 137882a1dfc1SJulian Elischer 137982a1dfc1SJulian Elischer /* 138082a1dfc1SJulian Elischer * check the idle mask we received against what we calculated before 138182a1dfc1SJulian Elischer * in the old version. 138282a1dfc1SJulian Elischer */ 138382a1dfc1SJulian Elischer me = PCPU_GET(cpumask); 138482a1dfc1SJulian Elischer /* 138582a1dfc1SJulian Elischer * don't bother if we should be doing it ourself.. 138682a1dfc1SJulian Elischer */ 138782a1dfc1SJulian Elischer if ((me & idle_cpus_mask) && (cpunum == NOCPU || me == (1 << cpunum))) 138882a1dfc1SJulian Elischer return (0); 138982a1dfc1SJulian Elischer 139082a1dfc1SJulian Elischer dontuse = me | stopped_cpus | hlt_cpus_mask; 139182a1dfc1SJulian Elischer map3 = 0; 139282a1dfc1SJulian Elischer if (forward_wakeup_use_loop) { 139382a1dfc1SJulian Elischer SLIST_FOREACH(pc, &cpuhead, pc_allcpu) { 139482a1dfc1SJulian Elischer id = pc->pc_cpumask; 139582a1dfc1SJulian Elischer if ( (id & dontuse) == 0 && 139682a1dfc1SJulian Elischer pc->pc_curthread == pc->pc_idlethread) { 139782a1dfc1SJulian Elischer map3 |= id; 139882a1dfc1SJulian Elischer } 139982a1dfc1SJulian Elischer } 140082a1dfc1SJulian Elischer } 140182a1dfc1SJulian Elischer 140282a1dfc1SJulian Elischer if (forward_wakeup_use_mask) { 140382a1dfc1SJulian Elischer map = 0; 140482a1dfc1SJulian Elischer map = idle_cpus_mask & ~dontuse; 140582a1dfc1SJulian Elischer 140682a1dfc1SJulian Elischer /* If they are both on, compare and use loop if different */ 140782a1dfc1SJulian Elischer if (forward_wakeup_use_loop) { 140882a1dfc1SJulian Elischer if (map != map3) { 140982a1dfc1SJulian Elischer printf("map (%02X) != map3 (%02X)\n", 141082a1dfc1SJulian Elischer map, map3); 141182a1dfc1SJulian Elischer map = map3; 141282a1dfc1SJulian Elischer } 141382a1dfc1SJulian Elischer } 141482a1dfc1SJulian Elischer } else { 141582a1dfc1SJulian Elischer map = map3; 141682a1dfc1SJulian Elischer } 141782a1dfc1SJulian Elischer /* If we only allow a specific CPU, then mask off all the others */ 141882a1dfc1SJulian Elischer if (cpunum != NOCPU) { 141982a1dfc1SJulian Elischer KASSERT((cpunum <= mp_maxcpus),("forward_wakeup: bad cpunum.")); 142082a1dfc1SJulian Elischer map &= (1 << cpunum); 142182a1dfc1SJulian Elischer } else { 142282a1dfc1SJulian Elischer /* Try choose an idle die. */ 142382a1dfc1SJulian Elischer if (forward_wakeup_use_htt) { 142482a1dfc1SJulian Elischer map2 = (map & (map >> 1)) & 0x5555; 142582a1dfc1SJulian Elischer if (map2) { 142682a1dfc1SJulian Elischer map = map2; 142782a1dfc1SJulian Elischer } 142882a1dfc1SJulian Elischer } 142982a1dfc1SJulian Elischer 143082a1dfc1SJulian Elischer /* set only one bit */ 143182a1dfc1SJulian Elischer if (forward_wakeup_use_single) { 143282a1dfc1SJulian Elischer map = map & ((~map) + 1); 143382a1dfc1SJulian Elischer } 143482a1dfc1SJulian Elischer } 143582a1dfc1SJulian Elischer if (map) { 143682a1dfc1SJulian Elischer forward_wakeups_delivered++; 143782a1dfc1SJulian Elischer ipi_selected(map, IPI_AST); 143882a1dfc1SJulian Elischer return (1); 143982a1dfc1SJulian Elischer } 144082a1dfc1SJulian Elischer if (cpunum == NOCPU) 144182a1dfc1SJulian Elischer printf("forward_wakeup: Idle processor not found\n"); 144282a1dfc1SJulian Elischer return (0); 144382a1dfc1SJulian Elischer } 144437c28a02SJulian Elischer #endif 144582a1dfc1SJulian Elischer 1446f3a0f873SStephan Uphoff #ifdef SMP 1447a3f2d842SStephan Uphoff static void kick_other_cpu(int pri,int cpuid); 1448f3a0f873SStephan Uphoff 1449f3a0f873SStephan Uphoff static void 1450f3a0f873SStephan Uphoff kick_other_cpu(int pri,int cpuid) 1451f3a0f873SStephan Uphoff { 1452f3a0f873SStephan Uphoff struct pcpu * pcpu = pcpu_find(cpuid); 1453f3a0f873SStephan Uphoff int cpri = pcpu->pc_curthread->td_priority; 1454f3a0f873SStephan Uphoff 1455f3a0f873SStephan Uphoff if (idle_cpus_mask & pcpu->pc_cpumask) { 1456f3a0f873SStephan Uphoff forward_wakeups_delivered++; 1457f3a0f873SStephan Uphoff ipi_selected(pcpu->pc_cpumask, IPI_AST); 1458f3a0f873SStephan Uphoff return; 1459f3a0f873SStephan Uphoff } 1460f3a0f873SStephan Uphoff 1461f3a0f873SStephan Uphoff if (pri >= cpri) 1462f3a0f873SStephan Uphoff return; 1463f3a0f873SStephan Uphoff 1464f3a0f873SStephan Uphoff #if defined(IPI_PREEMPTION) && defined(PREEMPTION) 1465f3a0f873SStephan Uphoff #if !defined(FULL_PREEMPTION) 1466f3a0f873SStephan Uphoff if (pri <= PRI_MAX_ITHD) 1467f3a0f873SStephan Uphoff #endif /* ! FULL_PREEMPTION */ 1468f3a0f873SStephan Uphoff { 1469f3a0f873SStephan Uphoff ipi_selected(pcpu->pc_cpumask, IPI_PREEMPT); 1470f3a0f873SStephan Uphoff return; 1471f3a0f873SStephan Uphoff } 1472f3a0f873SStephan Uphoff #endif /* defined(IPI_PREEMPTION) && defined(PREEMPTION) */ 1473f3a0f873SStephan Uphoff 1474f3a0f873SStephan Uphoff pcpu->pc_curthread->td_flags |= TDF_NEEDRESCHED; 1475f3a0f873SStephan Uphoff ipi_selected( pcpu->pc_cpumask , IPI_AST); 1476f3a0f873SStephan Uphoff return; 1477f3a0f873SStephan Uphoff } 1478f3a0f873SStephan Uphoff #endif /* SMP */ 1479f3a0f873SStephan Uphoff 1480b43179fbSJeff Roberson void 14812630e4c9SJulian Elischer sched_add(struct thread *td, int flags) 14826804a3abSJulian Elischer #ifdef SMP 1483f3a0f873SStephan Uphoff { 1484f3a0f873SStephan Uphoff struct kse *ke; 14856804a3abSJulian Elischer int forwarded = 0; 14866804a3abSJulian Elischer int cpu; 1487f3a0f873SStephan Uphoff int single_cpu = 0; 14887cf90fb3SJeff Roberson 14897cf90fb3SJeff Roberson ke = td->td_kse; 1490b43179fbSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1491b43179fbSJeff Roberson KASSERT(ke->ke_state != KES_ONRUNQ, 14925a2b158dSJeff Roberson ("sched_add: kse %p (%s) already in run queue", ke, 14938460a577SJohn Birrell #ifdef KSE 1494b43179fbSJeff Roberson ke->ke_proc->p_comm)); 1495b43179fbSJeff Roberson KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 14968460a577SJohn Birrell #else 14978460a577SJohn Birrell td->td_proc->p_comm)); 14988460a577SJohn Birrell KASSERT(td->td_proc->p_sflag & PS_INMEM, 14998460a577SJohn Birrell #endif 15005a2b158dSJeff Roberson ("sched_add: process swapped out")); 1501907bdbc2SJeff Roberson CTR5(KTR_SCHED, "sched_add: %p(%s) prio %d by %p(%s)", 1502907bdbc2SJeff Roberson td, td->td_proc->p_comm, td->td_priority, curthread, 1503907bdbc2SJeff Roberson curthread->td_proc->p_comm); 15040c0b25aeSJohn Baldwin 1505f3a0f873SStephan Uphoff 1506f3a0f873SStephan Uphoff if (td->td_pinned != 0) { 1507f3a0f873SStephan Uphoff cpu = td->td_lastcpu; 1508f3a0f873SStephan Uphoff ke->ke_runq = &runq_pcpu[cpu]; 1509f3a0f873SStephan Uphoff single_cpu = 1; 1510f3a0f873SStephan Uphoff CTR3(KTR_RUNQ, 1511f3a0f873SStephan Uphoff "sched_add: Put kse:%p(td:%p) on cpu%d runq", ke, td, cpu); 1512f3a0f873SStephan Uphoff } else if ((ke)->ke_flags & KEF_BOUND) { 1513f3a0f873SStephan Uphoff /* Find CPU from bound runq */ 1514f3a0f873SStephan Uphoff KASSERT(SKE_RUNQ_PCPU(ke),("sched_add: bound kse not on cpu runq")); 1515f3a0f873SStephan Uphoff cpu = ke->ke_runq - &runq_pcpu[0]; 1516f3a0f873SStephan Uphoff single_cpu = 1; 1517f3a0f873SStephan Uphoff CTR3(KTR_RUNQ, 1518f3a0f873SStephan Uphoff "sched_add: Put kse:%p(td:%p) on cpu%d runq", ke, td, cpu); 1519f3a0f873SStephan Uphoff } else { 15206804a3abSJulian Elischer CTR2(KTR_RUNQ, 15216804a3abSJulian Elischer "sched_add: adding kse:%p (td:%p) to gbl runq", ke, td); 15226804a3abSJulian Elischer cpu = NOCPU; 1523e17c57b1SJeff Roberson ke->ke_runq = &runq; 1524e17c57b1SJeff Roberson } 1525f3a0f873SStephan Uphoff 1526a3f2d842SStephan Uphoff if (single_cpu && (cpu != PCPU_GET(cpuid))) { 1527f3a0f873SStephan Uphoff kick_other_cpu(td->td_priority,cpu); 1528f3a0f873SStephan Uphoff } else { 1529f3a0f873SStephan Uphoff 1530f3a0f873SStephan Uphoff if (!single_cpu) { 1531f3a0f873SStephan Uphoff cpumask_t me = PCPU_GET(cpumask); 1532f3a0f873SStephan Uphoff int idle = idle_cpus_mask & me; 1533f3a0f873SStephan Uphoff 1534f3a0f873SStephan Uphoff if (!idle && ((flags & SRQ_INTR) == 0) && 1535f3a0f873SStephan Uphoff (idle_cpus_mask & ~(hlt_cpus_mask | me))) 1536f3a0f873SStephan Uphoff forwarded = forward_wakeup(cpu); 1537f3a0f873SStephan Uphoff } 1538f3a0f873SStephan Uphoff 1539f3a0f873SStephan Uphoff if (!forwarded) { 1540a3f2d842SStephan Uphoff if ((flags & SRQ_YIELDING) == 0 && maybe_preempt(td)) 1541f3a0f873SStephan Uphoff return; 1542f3a0f873SStephan Uphoff else 1543f3a0f873SStephan Uphoff maybe_resched(td); 1544f3a0f873SStephan Uphoff } 1545f3a0f873SStephan Uphoff } 1546f3a0f873SStephan Uphoff 1547f3a0f873SStephan Uphoff if ((td->td_proc->p_flag & P_NOLOAD) == 0) 1548f3a0f873SStephan Uphoff sched_load_add(); 15498460a577SJohn Birrell #ifdef KSE 1550f3a0f873SStephan Uphoff SLOT_USE(td->td_ksegrp); 15518460a577SJohn Birrell #endif 1552f3a0f873SStephan Uphoff runq_add(ke->ke_runq, ke, flags); 1553f3a0f873SStephan Uphoff ke->ke_state = KES_ONRUNQ; 1554f3a0f873SStephan Uphoff } 1555f3a0f873SStephan Uphoff #else /* SMP */ 1556f3a0f873SStephan Uphoff { 1557f3a0f873SStephan Uphoff struct kse *ke; 1558f3a0f873SStephan Uphoff ke = td->td_kse; 1559f3a0f873SStephan Uphoff mtx_assert(&sched_lock, MA_OWNED); 1560f3a0f873SStephan Uphoff KASSERT(ke->ke_state != KES_ONRUNQ, 1561f3a0f873SStephan Uphoff ("sched_add: kse %p (%s) already in run queue", ke, 15628460a577SJohn Birrell #ifdef KSE 1563f3a0f873SStephan Uphoff ke->ke_proc->p_comm)); 1564f3a0f873SStephan Uphoff KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 15658460a577SJohn Birrell #else 15668460a577SJohn Birrell td->td_proc->p_comm)); 15678460a577SJohn Birrell KASSERT(td->td_proc->p_sflag & PS_INMEM, 15688460a577SJohn Birrell #endif 1569f3a0f873SStephan Uphoff ("sched_add: process swapped out")); 1570f3a0f873SStephan Uphoff CTR5(KTR_SCHED, "sched_add: %p(%s) prio %d by %p(%s)", 1571f3a0f873SStephan Uphoff td, td->td_proc->p_comm, td->td_priority, curthread, 1572f3a0f873SStephan Uphoff curthread->td_proc->p_comm); 1573732d9528SJulian Elischer CTR2(KTR_RUNQ, "sched_add: adding kse:%p (td:%p) to runq", ke, td); 1574e17c57b1SJeff Roberson ke->ke_runq = &runq; 15756804a3abSJulian Elischer 15766804a3abSJulian Elischer /* 15776804a3abSJulian Elischer * If we are yielding (on the way out anyhow) 15786804a3abSJulian Elischer * or the thread being saved is US, 15796804a3abSJulian Elischer * then don't try be smart about preemption 15806804a3abSJulian Elischer * or kicking off another CPU 15816804a3abSJulian Elischer * as it won't help and may hinder. 15826804a3abSJulian Elischer * In the YIEDLING case, we are about to run whoever is 15836804a3abSJulian Elischer * being put in the queue anyhow, and in the 15846804a3abSJulian Elischer * OURSELF case, we are puting ourself on the run queue 15856804a3abSJulian Elischer * which also only happens when we are about to yield. 15866804a3abSJulian Elischer */ 15876804a3abSJulian Elischer if((flags & SRQ_YIELDING) == 0) { 15886804a3abSJulian Elischer if (maybe_preempt(td)) 15896804a3abSJulian Elischer return; 15906804a3abSJulian Elischer } 1591f2f51f8aSJeff Roberson if ((td->td_proc->p_flag & P_NOLOAD) == 0) 1592907bdbc2SJeff Roberson sched_load_add(); 15938460a577SJohn Birrell #ifdef KSE 1594d39063f2SJulian Elischer SLOT_USE(td->td_ksegrp); 15958460a577SJohn Birrell #endif 1596c20c691bSJulian Elischer runq_add(ke->ke_runq, ke, flags); 15970f54f482SJulian Elischer ke->ke_state = KES_ONRUNQ; 15986942d433SJohn Baldwin maybe_resched(td); 1599b43179fbSJeff Roberson } 1600f3a0f873SStephan Uphoff #endif /* SMP */ 1601f3a0f873SStephan Uphoff 1602b43179fbSJeff Roberson void 16037cf90fb3SJeff Roberson sched_rem(struct thread *td) 1604b43179fbSJeff Roberson { 16057cf90fb3SJeff Roberson struct kse *ke; 16067cf90fb3SJeff Roberson 16077cf90fb3SJeff Roberson ke = td->td_kse; 16088460a577SJohn Birrell #ifdef KSE 1609b43179fbSJeff Roberson KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 16108460a577SJohn Birrell #else 16118460a577SJohn Birrell KASSERT(td->td_proc->p_sflag & PS_INMEM, 16128460a577SJohn Birrell #endif 16135a2b158dSJeff Roberson ("sched_rem: process swapped out")); 16145a2b158dSJeff Roberson KASSERT((ke->ke_state == KES_ONRUNQ), 16155a2b158dSJeff Roberson ("sched_rem: KSE not on run queue")); 1616b43179fbSJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1617907bdbc2SJeff Roberson CTR5(KTR_SCHED, "sched_rem: %p(%s) prio %d by %p(%s)", 1618907bdbc2SJeff Roberson td, td->td_proc->p_comm, td->td_priority, curthread, 1619907bdbc2SJeff Roberson curthread->td_proc->p_comm); 1620b43179fbSJeff Roberson 1621f2f51f8aSJeff Roberson if ((td->td_proc->p_flag & P_NOLOAD) == 0) 1622907bdbc2SJeff Roberson sched_load_rem(); 16238460a577SJohn Birrell #ifdef KSE 1624d39063f2SJulian Elischer SLOT_RELEASE(td->td_ksegrp); 16258460a577SJohn Birrell #endif 1626ad59c36bSJulian Elischer runq_remove(ke->ke_runq, ke); 1627e17c57b1SJeff Roberson 1628b43179fbSJeff Roberson ke->ke_state = KES_THREAD; 1629b43179fbSJeff Roberson } 1630b43179fbSJeff Roberson 163114f0e2e9SJulian Elischer /* 163214f0e2e9SJulian Elischer * Select threads to run. 163314f0e2e9SJulian Elischer * Notice that the running threads still consume a slot. 163414f0e2e9SJulian Elischer */ 16358460a577SJohn Birrell #ifdef KSE 1636b43179fbSJeff Roberson struct kse * 16378460a577SJohn Birrell #else 16388460a577SJohn Birrell struct thread * 16398460a577SJohn Birrell #endif 1640b43179fbSJeff Roberson sched_choose(void) 1641b43179fbSJeff Roberson { 1642b43179fbSJeff Roberson struct kse *ke; 1643e17c57b1SJeff Roberson struct runq *rq; 1644b43179fbSJeff Roberson 1645e17c57b1SJeff Roberson #ifdef SMP 1646e17c57b1SJeff Roberson struct kse *kecpu; 1647e17c57b1SJeff Roberson 1648e17c57b1SJeff Roberson rq = &runq; 1649b43179fbSJeff Roberson ke = runq_choose(&runq); 1650e17c57b1SJeff Roberson kecpu = runq_choose(&runq_pcpu[PCPU_GET(cpuid)]); 1651e17c57b1SJeff Roberson 1652e17c57b1SJeff Roberson if (ke == NULL || 1653e17c57b1SJeff Roberson (kecpu != NULL && 1654e17c57b1SJeff Roberson kecpu->ke_thread->td_priority < ke->ke_thread->td_priority)) { 1655732d9528SJulian Elischer CTR2(KTR_RUNQ, "choosing kse %p from pcpu runq %d", kecpu, 1656e17c57b1SJeff Roberson PCPU_GET(cpuid)); 1657e17c57b1SJeff Roberson ke = kecpu; 1658e17c57b1SJeff Roberson rq = &runq_pcpu[PCPU_GET(cpuid)]; 1659e17c57b1SJeff Roberson } else { 1660732d9528SJulian Elischer CTR1(KTR_RUNQ, "choosing kse %p from main runq", ke); 1661e17c57b1SJeff Roberson } 1662e17c57b1SJeff Roberson 1663e17c57b1SJeff Roberson #else 1664e17c57b1SJeff Roberson rq = &runq; 1665e17c57b1SJeff Roberson ke = runq_choose(&runq); 1666e17c57b1SJeff Roberson #endif 1667b43179fbSJeff Roberson 16688460a577SJohn Birrell #ifdef KSE 1669b43179fbSJeff Roberson if (ke != NULL) { 16708460a577SJohn Birrell #else 16718460a577SJohn Birrell if (ke) { 16728460a577SJohn Birrell #endif 1673e17c57b1SJeff Roberson runq_remove(rq, ke); 1674b43179fbSJeff Roberson ke->ke_state = KES_THREAD; 1675b43179fbSJeff Roberson 16768460a577SJohn Birrell #ifdef KSE 1677b43179fbSJeff Roberson KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 16785a2b158dSJeff Roberson ("sched_choose: process swapped out")); 16798460a577SJohn Birrell #else 16808460a577SJohn Birrell KASSERT(ke->ke_thread->td_proc->p_sflag & PS_INMEM, 16818460a577SJohn Birrell ("sched_choose: process swapped out")); 16828460a577SJohn Birrell return (ke->ke_thread); 16838460a577SJohn Birrell #endif 1684b43179fbSJeff Roberson } 16858460a577SJohn Birrell #ifdef KSE 1686b43179fbSJeff Roberson return (ke); 16878460a577SJohn Birrell #else 16888460a577SJohn Birrell return (NULL); 16898460a577SJohn Birrell #endif 1690b43179fbSJeff Roberson } 1691b43179fbSJeff Roberson 1692b43179fbSJeff Roberson void 1693b43179fbSJeff Roberson sched_userret(struct thread *td) 1694b43179fbSJeff Roberson { 16958460a577SJohn Birrell #ifdef KSE 1696b43179fbSJeff Roberson struct ksegrp *kg; 16978460a577SJohn Birrell #endif 1698b43179fbSJeff Roberson /* 1699b43179fbSJeff Roberson * XXX we cheat slightly on the locking here to avoid locking in 1700b43179fbSJeff Roberson * the usual case. Setting td_priority here is essentially an 1701b43179fbSJeff Roberson * incomplete workaround for not setting it properly elsewhere. 1702b43179fbSJeff Roberson * Now that some interrupt handlers are threads, not setting it 1703b43179fbSJeff Roberson * properly elsewhere can clobber it in the window between setting 1704b43179fbSJeff Roberson * it here and returning to user mode, so don't waste time setting 1705b43179fbSJeff Roberson * it perfectly here. 1706b43179fbSJeff Roberson */ 1707f5c157d9SJohn Baldwin KASSERT((td->td_flags & TDF_BORROWING) == 0, 1708f5c157d9SJohn Baldwin ("thread with borrowed priority returning to userland")); 17098460a577SJohn Birrell #ifdef KSE 1710b43179fbSJeff Roberson kg = td->td_ksegrp; 1711b43179fbSJeff Roberson if (td->td_priority != kg->kg_user_pri) { 1712b43179fbSJeff Roberson mtx_lock_spin(&sched_lock); 1713b43179fbSJeff Roberson td->td_priority = kg->kg_user_pri; 1714f5c157d9SJohn Baldwin td->td_base_pri = kg->kg_user_pri; 1715b43179fbSJeff Roberson mtx_unlock_spin(&sched_lock); 1716b43179fbSJeff Roberson } 17178460a577SJohn Birrell #else 17188460a577SJohn Birrell if (td->td_priority != td->td_user_pri) { 17198460a577SJohn Birrell mtx_lock_spin(&sched_lock); 17208460a577SJohn Birrell td->td_priority = td->td_user_pri; 17218460a577SJohn Birrell td->td_base_pri = td->td_user_pri; 17228460a577SJohn Birrell mtx_unlock_spin(&sched_lock); 17238460a577SJohn Birrell } 17248460a577SJohn Birrell #endif 1725b43179fbSJeff Roberson } 1726de028f5aSJeff Roberson 1727e17c57b1SJeff Roberson void 1728e17c57b1SJeff Roberson sched_bind(struct thread *td, int cpu) 1729e17c57b1SJeff Roberson { 1730e17c57b1SJeff Roberson struct kse *ke; 1731e17c57b1SJeff Roberson 1732e17c57b1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1733e17c57b1SJeff Roberson KASSERT(TD_IS_RUNNING(td), 1734e17c57b1SJeff Roberson ("sched_bind: cannot bind non-running thread")); 1735e17c57b1SJeff Roberson 1736e17c57b1SJeff Roberson ke = td->td_kse; 1737e17c57b1SJeff Roberson 1738e17c57b1SJeff Roberson ke->ke_flags |= KEF_BOUND; 1739e17c57b1SJeff Roberson #ifdef SMP 1740e17c57b1SJeff Roberson ke->ke_runq = &runq_pcpu[cpu]; 1741e17c57b1SJeff Roberson if (PCPU_GET(cpuid) == cpu) 1742e17c57b1SJeff Roberson return; 1743e17c57b1SJeff Roberson 1744e17c57b1SJeff Roberson ke->ke_state = KES_THREAD; 1745e17c57b1SJeff Roberson 1746bf0acc27SJohn Baldwin mi_switch(SW_VOL, NULL); 1747e17c57b1SJeff Roberson #endif 1748e17c57b1SJeff Roberson } 1749e17c57b1SJeff Roberson 1750e17c57b1SJeff Roberson void 1751e17c57b1SJeff Roberson sched_unbind(struct thread* td) 1752e17c57b1SJeff Roberson { 1753e17c57b1SJeff Roberson mtx_assert(&sched_lock, MA_OWNED); 1754e17c57b1SJeff Roberson td->td_kse->ke_flags &= ~KEF_BOUND; 1755e17c57b1SJeff Roberson } 1756e17c57b1SJeff Roberson 1757de028f5aSJeff Roberson int 1758ebccf1e3SJoseph Koshy sched_is_bound(struct thread *td) 1759ebccf1e3SJoseph Koshy { 1760ebccf1e3SJoseph Koshy mtx_assert(&sched_lock, MA_OWNED); 1761ebccf1e3SJoseph Koshy return (td->td_kse->ke_flags & KEF_BOUND); 1762ebccf1e3SJoseph Koshy } 1763ebccf1e3SJoseph Koshy 176436ec198bSDavid Xu void 176536ec198bSDavid Xu sched_relinquish(struct thread *td) 176636ec198bSDavid Xu { 17678460a577SJohn Birrell #ifdef KSE 176836ec198bSDavid Xu struct ksegrp *kg; 176936ec198bSDavid Xu 177036ec198bSDavid Xu kg = td->td_ksegrp; 17718460a577SJohn Birrell #endif 177236ec198bSDavid Xu mtx_lock_spin(&sched_lock); 17738460a577SJohn Birrell #ifdef KSE 177436ec198bSDavid Xu if (kg->kg_pri_class == PRI_TIMESHARE) 17758460a577SJohn Birrell #else 17768460a577SJohn Birrell if (td->td_pri_class == PRI_TIMESHARE) 17778460a577SJohn Birrell #endif 177836ec198bSDavid Xu sched_prio(td, PRI_MAX_TIMESHARE); 177936ec198bSDavid Xu mi_switch(SW_VOL, NULL); 178036ec198bSDavid Xu mtx_unlock_spin(&sched_lock); 178136ec198bSDavid Xu } 178236ec198bSDavid Xu 1783ebccf1e3SJoseph Koshy int 1784ca59f152SJeff Roberson sched_load(void) 1785ca59f152SJeff Roberson { 1786ca59f152SJeff Roberson return (sched_tdcnt); 1787ca59f152SJeff Roberson } 1788ca59f152SJeff Roberson 17898460a577SJohn Birrell #ifdef KSE 1790ca59f152SJeff Roberson int 1791de028f5aSJeff Roberson sched_sizeof_ksegrp(void) 1792de028f5aSJeff Roberson { 1793ed062c8dSJulian Elischer return (sizeof(struct ksegrp) + sizeof(struct kg_sched)); 1794de028f5aSJeff Roberson } 17958460a577SJohn Birrell #endif 179636ec198bSDavid Xu 1797de028f5aSJeff Roberson int 1798de028f5aSJeff Roberson sched_sizeof_proc(void) 1799de028f5aSJeff Roberson { 1800de028f5aSJeff Roberson return (sizeof(struct proc)); 1801de028f5aSJeff Roberson } 180236ec198bSDavid Xu 1803de028f5aSJeff Roberson int 1804de028f5aSJeff Roberson sched_sizeof_thread(void) 1805de028f5aSJeff Roberson { 1806ed062c8dSJulian Elischer return (sizeof(struct thread) + sizeof(struct kse)); 1807de028f5aSJeff Roberson } 180879acfc49SJeff Roberson 180979acfc49SJeff Roberson fixpt_t 18107cf90fb3SJeff Roberson sched_pctcpu(struct thread *td) 181179acfc49SJeff Roberson { 181255f2099aSJeff Roberson struct kse *ke; 181355f2099aSJeff Roberson 181455f2099aSJeff Roberson ke = td->td_kse; 181555f2099aSJeff Roberson return (ke->ke_pctcpu); 181679acfc49SJeff Roberson } 1817b41f1452SDavid Xu 1818b41f1452SDavid Xu void 1819b41f1452SDavid Xu sched_tick(void) 1820b41f1452SDavid Xu { 1821b41f1452SDavid Xu } 1822ed062c8dSJulian Elischer #define KERN_SWITCH_INCLUDE 1 1823ed062c8dSJulian Elischer #include "kern/kern_switch.c" 1824