xref: /freebsd/sys/kern/sched_ule.c (revision f1e8dc4a3b3a1e7653be2620a7b2891b07162ed5)
135e6168fSJeff Roberson /*-
235e6168fSJeff Roberson  * Copyright (c) 2003, Jeffrey Roberson <jeff@freebsd.org>
335e6168fSJeff Roberson  * All rights reserved.
435e6168fSJeff Roberson  *
535e6168fSJeff Roberson  * Redistribution and use in source and binary forms, with or without
635e6168fSJeff Roberson  * modification, are permitted provided that the following conditions
735e6168fSJeff Roberson  * are met:
835e6168fSJeff Roberson  * 1. Redistributions of source code must retain the above copyright
935e6168fSJeff Roberson  *    notice unmodified, this list of conditions, and the following
1035e6168fSJeff Roberson  *    disclaimer.
1135e6168fSJeff Roberson  * 2. Redistributions in binary form must reproduce the above copyright
1235e6168fSJeff Roberson  *    notice, this list of conditions and the following disclaimer in the
1335e6168fSJeff Roberson  *    documentation and/or other materials provided with the distribution.
1435e6168fSJeff Roberson  *
1535e6168fSJeff Roberson  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1635e6168fSJeff Roberson  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1735e6168fSJeff Roberson  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
1835e6168fSJeff Roberson  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
1935e6168fSJeff Roberson  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
2035e6168fSJeff Roberson  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2135e6168fSJeff Roberson  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2235e6168fSJeff Roberson  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2335e6168fSJeff Roberson  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
2435e6168fSJeff Roberson  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2535e6168fSJeff Roberson  *
2635e6168fSJeff Roberson  * $FreeBSD$
2735e6168fSJeff Roberson  */
2835e6168fSJeff Roberson 
2935e6168fSJeff Roberson #include <sys/param.h>
3035e6168fSJeff Roberson #include <sys/systm.h>
3135e6168fSJeff Roberson #include <sys/kernel.h>
3235e6168fSJeff Roberson #include <sys/ktr.h>
3335e6168fSJeff Roberson #include <sys/lock.h>
3435e6168fSJeff Roberson #include <sys/mutex.h>
3535e6168fSJeff Roberson #include <sys/proc.h>
3635e6168fSJeff Roberson #include <sys/sched.h>
3735e6168fSJeff Roberson #include <sys/smp.h>
3835e6168fSJeff Roberson #include <sys/sx.h>
3935e6168fSJeff Roberson #include <sys/sysctl.h>
4035e6168fSJeff Roberson #include <sys/sysproto.h>
4135e6168fSJeff Roberson #include <sys/vmmeter.h>
4235e6168fSJeff Roberson #ifdef DDB
4335e6168fSJeff Roberson #include <ddb/ddb.h>
4435e6168fSJeff Roberson #endif
4535e6168fSJeff Roberson #ifdef KTRACE
4635e6168fSJeff Roberson #include <sys/uio.h>
4735e6168fSJeff Roberson #include <sys/ktrace.h>
4835e6168fSJeff Roberson #endif
4935e6168fSJeff Roberson 
5035e6168fSJeff Roberson #include <machine/cpu.h>
5135e6168fSJeff Roberson 
5235e6168fSJeff Roberson /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
5335e6168fSJeff Roberson /* XXX This is bogus compatability crap for ps */
5435e6168fSJeff Roberson static fixpt_t  ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */
5535e6168fSJeff Roberson SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, "");
5635e6168fSJeff Roberson 
5735e6168fSJeff Roberson static void sched_setup(void *dummy);
5835e6168fSJeff Roberson SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL)
5935e6168fSJeff Roberson 
60783caefbSJeff Roberson #define	SCHED_STRICT_RESCHED 1
61783caefbSJeff Roberson 
6235e6168fSJeff Roberson /*
6335e6168fSJeff Roberson  * These datastructures are allocated within their parent datastructure but
6435e6168fSJeff Roberson  * are scheduler specific.
6535e6168fSJeff Roberson  */
6635e6168fSJeff Roberson 
6735e6168fSJeff Roberson struct ke_sched {
6835e6168fSJeff Roberson 	int		ske_slice;
6935e6168fSJeff Roberson 	struct runq	*ske_runq;
7035e6168fSJeff Roberson 	/* The following variables are only used for pctcpu calculation */
7135e6168fSJeff Roberson 	int		ske_ltick;	/* Last tick that we were running on */
7235e6168fSJeff Roberson 	int		ske_ftick;	/* First tick that we were running on */
7335e6168fSJeff Roberson 	int		ske_ticks;	/* Tick count */
74cd6e33dfSJeff Roberson 	u_char		ske_cpu;
7535e6168fSJeff Roberson };
7635e6168fSJeff Roberson #define	ke_slice	ke_sched->ske_slice
7735e6168fSJeff Roberson #define	ke_runq		ke_sched->ske_runq
7835e6168fSJeff Roberson #define	ke_ltick	ke_sched->ske_ltick
7935e6168fSJeff Roberson #define	ke_ftick	ke_sched->ske_ftick
8035e6168fSJeff Roberson #define	ke_ticks	ke_sched->ske_ticks
81cd6e33dfSJeff Roberson #define	ke_cpu		ke_sched->ske_cpu
8235e6168fSJeff Roberson 
8335e6168fSJeff Roberson struct kg_sched {
84407b0157SJeff Roberson 	int	skg_slptime;		/* Number of ticks we vol. slept */
85407b0157SJeff Roberson 	int	skg_runtime;		/* Number of ticks we were running */
8635e6168fSJeff Roberson };
8735e6168fSJeff Roberson #define	kg_slptime	kg_sched->skg_slptime
88407b0157SJeff Roberson #define	kg_runtime	kg_sched->skg_runtime
8935e6168fSJeff Roberson 
9035e6168fSJeff Roberson struct td_sched {
9135e6168fSJeff Roberson 	int	std_slptime;
925d7ef00cSJeff Roberson 	int	std_schedflag;
9335e6168fSJeff Roberson };
9435e6168fSJeff Roberson #define	td_slptime	td_sched->std_slptime
955d7ef00cSJeff Roberson #define	td_schedflag	td_sched->std_schedflag
9635e6168fSJeff Roberson 
975d7ef00cSJeff Roberson #define	TD_SCHED_BLOAD	0x0001		/*
985d7ef00cSJeff Roberson 					 * thread was counted as being in short
995d7ef00cSJeff Roberson 					 * term sleep.
1005d7ef00cSJeff Roberson 					 */
1015d7ef00cSJeff Roberson struct td_sched td_sched;
10235e6168fSJeff Roberson struct ke_sched ke_sched;
10335e6168fSJeff Roberson struct kg_sched kg_sched;
10435e6168fSJeff Roberson 
10535e6168fSJeff Roberson struct ke_sched *kse0_sched = &ke_sched;
10635e6168fSJeff Roberson struct kg_sched *ksegrp0_sched = &kg_sched;
10735e6168fSJeff Roberson struct p_sched *proc0_sched = NULL;
10835e6168fSJeff Roberson struct td_sched *thread0_sched = &td_sched;
10935e6168fSJeff Roberson 
11035e6168fSJeff Roberson /*
11135e6168fSJeff Roberson  * This priority range has 20 priorities on either end that are reachable
11235e6168fSJeff Roberson  * only through nice values.
11335e6168fSJeff Roberson  */
114407b0157SJeff Roberson #define	SCHED_PRI_RANGE	(PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1)
11535e6168fSJeff Roberson #define	SCHED_PRI_NRESV	40
116407b0157SJeff Roberson #define	SCHED_PRI_BASE	(SCHED_PRI_NRESV / 2)
117407b0157SJeff Roberson #define	SCHED_PRI_DYN	(SCHED_PRI_RANGE - SCHED_PRI_NRESV)
118407b0157SJeff Roberson #define	SCHED_PRI_DYN_HALF	(SCHED_PRI_DYN / 2)
11935e6168fSJeff Roberson 
12035e6168fSJeff Roberson /*
12135e6168fSJeff Roberson  * These determine how sleep time effects the priority of a process.
12235e6168fSJeff Roberson  *
123407b0157SJeff Roberson  * SLP_RUN_MAX:	Maximum amount of sleep time + run time we'll accumulate
124407b0157SJeff Roberson  *		before throttling back.
125407b0157SJeff Roberson  * SLP_RUN_THORTTLE:	Divisor for reducing slp/run time.
126407b0157SJeff Roberson  * SLP_RATIO:	Compute a bounded ratio of slp time vs run time.
127407b0157SJeff Roberson  * SLP_TOPRI:	Convert a number of ticks slept and ticks ran into a priority
12835e6168fSJeff Roberson  */
129407b0157SJeff Roberson #define	SCHED_SLP_RUN_MAX	((hz * 30) * 1024)
130407b0157SJeff Roberson #define	SCHED_SLP_RUN_THROTTLE	(10)
131407b0157SJeff Roberson static __inline int
132407b0157SJeff Roberson sched_slp_ratio(int b, int s)
133407b0157SJeff Roberson {
134407b0157SJeff Roberson 	b /= SCHED_PRI_DYN_HALF;
135407b0157SJeff Roberson 	if (b == 0)
136407b0157SJeff Roberson 		return (0);
137407b0157SJeff Roberson 	s /= b;
138407b0157SJeff Roberson 	return (s);
139407b0157SJeff Roberson }
140407b0157SJeff Roberson #define	SCHED_SLP_TOPRI(slp, run)					\
141407b0157SJeff Roberson     ((((slp) > (run))?							\
142407b0157SJeff Roberson     sched_slp_ratio((slp), (run)):					\
143407b0157SJeff Roberson     SCHED_PRI_DYN_HALF + (SCHED_PRI_DYN_HALF - sched_slp_ratio((run), (slp))))+ \
14435e6168fSJeff Roberson     SCHED_PRI_NRESV / 2)
14535e6168fSJeff Roberson /*
14635e6168fSJeff Roberson  * These parameters and macros determine the size of the time slice that is
14735e6168fSJeff Roberson  * granted to each thread.
14835e6168fSJeff Roberson  *
14935e6168fSJeff Roberson  * SLICE_MIN:	Minimum time slice granted, in units of ticks.
15035e6168fSJeff Roberson  * SLICE_MAX:	Maximum time slice granted.
15135e6168fSJeff Roberson  * SLICE_RANGE:	Range of available time slices scaled by hz.
15235e6168fSJeff Roberson  * SLICE_SCALE:	The number slices granted per unit of pri or slp.
15335e6168fSJeff Roberson  * PRI_TOSLICE:	Compute a slice size that is proportional to the priority.
15435e6168fSJeff Roberson  * SLP_TOSLICE:	Compute a slice size that is inversely proportional to the
15535e6168fSJeff Roberson  *		amount of time slept. (smaller slices for interactive ksegs)
15635e6168fSJeff Roberson  * PRI_COMP:	This determines what fraction of the actual slice comes from
15735e6168fSJeff Roberson  *		the slice size computed from the priority.
15835e6168fSJeff Roberson  * SLP_COMP:	This determines what component of the actual slice comes from
15935e6168fSJeff Roberson  *		the slize size computed from the sleep time.
16035e6168fSJeff Roberson  */
16135e6168fSJeff Roberson #define	SCHED_SLICE_MIN		(hz / 100)
162cd6e33dfSJeff Roberson #define	SCHED_SLICE_MAX		(hz / 4)
16335e6168fSJeff Roberson #define	SCHED_SLICE_RANGE	(SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1)
16435e6168fSJeff Roberson #define	SCHED_SLICE_SCALE(val, max)	(((val) * SCHED_SLICE_RANGE) / (max))
16535e6168fSJeff Roberson #define	SCHED_PRI_TOSLICE(pri)						\
16635e6168fSJeff Roberson     (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((pri), SCHED_PRI_RANGE))
16735e6168fSJeff Roberson #define	SCHED_SLP_TOSLICE(slp)						\
168407b0157SJeff Roberson     (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((slp), SCHED_PRI_DYN))
16935e6168fSJeff Roberson #define	SCHED_SLP_COMP(slice)	(((slice) / 5) * 3)	/* 60% */
17035e6168fSJeff Roberson #define	SCHED_PRI_COMP(slice)	(((slice) / 5) * 2)	/* 40% */
17135e6168fSJeff Roberson 
17235e6168fSJeff Roberson /*
17335e6168fSJeff Roberson  * This macro determines whether or not the kse belongs on the current or
17435e6168fSJeff Roberson  * next run queue.
175407b0157SJeff Roberson  *
176407b0157SJeff Roberson  * XXX nice value should effect how interactive a kg is.
17735e6168fSJeff Roberson  */
178407b0157SJeff Roberson #define	SCHED_CURR(kg)	(((kg)->kg_slptime > (kg)->kg_runtime &&	\
179407b0157SJeff Roberson 	sched_slp_ratio((kg)->kg_slptime, (kg)->kg_runtime) > 4) ||	\
18035e6168fSJeff Roberson 	(kg)->kg_pri_class != PRI_TIMESHARE)
18135e6168fSJeff Roberson 
18235e6168fSJeff Roberson /*
18335e6168fSJeff Roberson  * Cpu percentage computation macros and defines.
18435e6168fSJeff Roberson  *
18535e6168fSJeff Roberson  * SCHED_CPU_TIME:	Number of seconds to average the cpu usage across.
18635e6168fSJeff Roberson  * SCHED_CPU_TICKS:	Number of hz ticks to average the cpu usage across.
18735e6168fSJeff Roberson  */
18835e6168fSJeff Roberson 
18935e6168fSJeff Roberson #define	SCHED_CPU_TIME	60
19035e6168fSJeff Roberson #define	SCHED_CPU_TICKS	(hz * SCHED_CPU_TIME)
19135e6168fSJeff Roberson 
19235e6168fSJeff Roberson /*
19335e6168fSJeff Roberson  * kseq - pair of runqs per processor
19435e6168fSJeff Roberson  */
19535e6168fSJeff Roberson 
19635e6168fSJeff Roberson struct kseq {
19735e6168fSJeff Roberson 	struct runq	ksq_runqs[2];
19835e6168fSJeff Roberson 	struct runq	*ksq_curr;
19935e6168fSJeff Roberson 	struct runq	*ksq_next;
20035e6168fSJeff Roberson 	int		ksq_load;	/* Total runnable */
2015d7ef00cSJeff Roberson #ifdef SMP
2025d7ef00cSJeff Roberson 	unsigned int	ksq_rslices;	/* Slices on run queue */
2035d7ef00cSJeff Roberson 	unsigned int	ksq_bload;	/* Threads waiting on IO */
2045d7ef00cSJeff Roberson #endif
20535e6168fSJeff Roberson };
20635e6168fSJeff Roberson 
20735e6168fSJeff Roberson /*
20835e6168fSJeff Roberson  * One kse queue per processor.
20935e6168fSJeff Roberson  */
2100a016a05SJeff Roberson #ifdef SMP
21135e6168fSJeff Roberson struct kseq	kseq_cpu[MAXCPU];
2120a016a05SJeff Roberson #define	KSEQ_SELF()	(&kseq_cpu[PCPU_GET(cpuid)])
2130a016a05SJeff Roberson #define	KSEQ_CPU(x)	(&kseq_cpu[(x)])
2140a016a05SJeff Roberson #else
2150a016a05SJeff Roberson struct kseq	kseq_cpu;
2160a016a05SJeff Roberson #define	KSEQ_SELF()	(&kseq_cpu)
2170a016a05SJeff Roberson #define	KSEQ_CPU(x)	(&kseq_cpu)
2180a016a05SJeff Roberson #endif
21935e6168fSJeff Roberson 
22035e6168fSJeff Roberson static int sched_slice(struct ksegrp *kg);
22135e6168fSJeff Roberson static int sched_priority(struct ksegrp *kg);
22235e6168fSJeff Roberson void sched_pctcpu_update(struct kse *ke);
22335e6168fSJeff Roberson int sched_pickcpu(void);
22435e6168fSJeff Roberson 
2255d7ef00cSJeff Roberson /* Operations on per processor queues */
2260a016a05SJeff Roberson static struct kse * kseq_choose(struct kseq *kseq);
2270a016a05SJeff Roberson static void kseq_setup(struct kseq *kseq);
2285d7ef00cSJeff Roberson static __inline void kseq_add(struct kseq *kseq, struct kse *ke);
2295d7ef00cSJeff Roberson static __inline void kseq_rem(struct kseq *kseq, struct kse *ke);
2305d7ef00cSJeff Roberson #ifdef SMP
2315d7ef00cSJeff Roberson static __inline void kseq_sleep(struct kseq *kseq, struct kse *ke);
2325d7ef00cSJeff Roberson static __inline void kseq_wakeup(struct kseq *kseq, struct kse *ke);
2335d7ef00cSJeff Roberson struct kseq * kseq_load_highest(void);
2345d7ef00cSJeff Roberson #endif
2355d7ef00cSJeff Roberson 
2365d7ef00cSJeff Roberson static __inline void
2375d7ef00cSJeff Roberson kseq_add(struct kseq *kseq, struct kse *ke)
2385d7ef00cSJeff Roberson {
2395d7ef00cSJeff Roberson 	runq_add(ke->ke_runq, ke);
2405d7ef00cSJeff Roberson 	kseq->ksq_load++;
2415d7ef00cSJeff Roberson #ifdef SMP
2425d7ef00cSJeff Roberson 	kseq->ksq_rslices += ke->ke_slice;
2435d7ef00cSJeff Roberson #endif
2445d7ef00cSJeff Roberson }
2455d7ef00cSJeff Roberson static __inline void
2465d7ef00cSJeff Roberson kseq_rem(struct kseq *kseq, struct kse *ke)
2475d7ef00cSJeff Roberson {
2485d7ef00cSJeff Roberson 	kseq->ksq_load--;
2495d7ef00cSJeff Roberson 	runq_remove(ke->ke_runq, ke);
2505d7ef00cSJeff Roberson #ifdef SMP
2515d7ef00cSJeff Roberson 	kseq->ksq_rslices -= ke->ke_slice;
2525d7ef00cSJeff Roberson #endif
2535d7ef00cSJeff Roberson }
2545d7ef00cSJeff Roberson 
2555d7ef00cSJeff Roberson #ifdef SMP
2565d7ef00cSJeff Roberson static __inline void
2575d7ef00cSJeff Roberson kseq_sleep(struct kseq *kseq, struct kse *ke)
2585d7ef00cSJeff Roberson {
2595d7ef00cSJeff Roberson 	kseq->ksq_bload++;
2605d7ef00cSJeff Roberson }
2615d7ef00cSJeff Roberson 
2625d7ef00cSJeff Roberson static __inline void
2635d7ef00cSJeff Roberson kseq_wakeup(struct kseq *kseq, struct kse *ke)
2645d7ef00cSJeff Roberson {
2655d7ef00cSJeff Roberson 	kseq->ksq_bload--;
2665d7ef00cSJeff Roberson }
2675d7ef00cSJeff Roberson 
2685d7ef00cSJeff Roberson struct kseq *
2695d7ef00cSJeff Roberson kseq_load_highest(void)
2705d7ef00cSJeff Roberson {
2715d7ef00cSJeff Roberson 	struct kseq *kseq;
2725d7ef00cSJeff Roberson 	int load;
2735d7ef00cSJeff Roberson 	int cpu;
2745d7ef00cSJeff Roberson 	int i;
2755d7ef00cSJeff Roberson 
2765d7ef00cSJeff Roberson 	cpu = 0;
2775d7ef00cSJeff Roberson 	load = 0;
2785d7ef00cSJeff Roberson 
2795d7ef00cSJeff Roberson 	for (i = 0; i < mp_maxid; i++) {
2805d7ef00cSJeff Roberson 		if (CPU_ABSENT(i))
2815d7ef00cSJeff Roberson 			continue;
2825d7ef00cSJeff Roberson 		kseq = KSEQ_CPU(i);
2835d7ef00cSJeff Roberson 		if (kseq->ksq_load > load) {
2845d7ef00cSJeff Roberson 			load = kseq->ksq_load;
2855d7ef00cSJeff Roberson 			cpu = i;
2865d7ef00cSJeff Roberson 		}
2875d7ef00cSJeff Roberson 	}
2885d7ef00cSJeff Roberson 	if (load)
2895d7ef00cSJeff Roberson 		return (KSEQ_CPU(cpu));
2905d7ef00cSJeff Roberson 
2915d7ef00cSJeff Roberson 	return (NULL);
2925d7ef00cSJeff Roberson }
2935d7ef00cSJeff Roberson #endif
2945d7ef00cSJeff Roberson 
2955d7ef00cSJeff Roberson struct kse *
2965d7ef00cSJeff Roberson kseq_choose(struct kseq *kseq)
2975d7ef00cSJeff Roberson {
2985d7ef00cSJeff Roberson 	struct kse *ke;
2995d7ef00cSJeff Roberson 	struct runq *swap;
3005d7ef00cSJeff Roberson 
3015d7ef00cSJeff Roberson 	if ((ke = runq_choose(kseq->ksq_curr)) == NULL) {
3025d7ef00cSJeff Roberson 		swap = kseq->ksq_curr;
3035d7ef00cSJeff Roberson 		kseq->ksq_curr = kseq->ksq_next;
3045d7ef00cSJeff Roberson 		kseq->ksq_next = swap;
3055d7ef00cSJeff Roberson 		ke = runq_choose(kseq->ksq_curr);
3065d7ef00cSJeff Roberson 	}
3075d7ef00cSJeff Roberson 
3085d7ef00cSJeff Roberson 	return (ke);
3095d7ef00cSJeff Roberson }
3105d7ef00cSJeff Roberson 
3110a016a05SJeff Roberson 
3120a016a05SJeff Roberson static void
3130a016a05SJeff Roberson kseq_setup(struct kseq *kseq)
3140a016a05SJeff Roberson {
3150a016a05SJeff Roberson 	kseq->ksq_curr = &kseq->ksq_runqs[0];
3160a016a05SJeff Roberson 	kseq->ksq_next = &kseq->ksq_runqs[1];
3170a016a05SJeff Roberson 	runq_init(kseq->ksq_curr);
3180a016a05SJeff Roberson 	runq_init(kseq->ksq_next);
3195d7ef00cSJeff Roberson 	kseq->ksq_load = 0;
3205d7ef00cSJeff Roberson #ifdef SMP
3215d7ef00cSJeff Roberson 	kseq->ksq_rslices = 0;
3225d7ef00cSJeff Roberson 	kseq->ksq_bload = 0;
3235d7ef00cSJeff Roberson #endif
3240a016a05SJeff Roberson }
3250a016a05SJeff Roberson 
32635e6168fSJeff Roberson static void
32735e6168fSJeff Roberson sched_setup(void *dummy)
32835e6168fSJeff Roberson {
32935e6168fSJeff Roberson 	int i;
33035e6168fSJeff Roberson 
33135e6168fSJeff Roberson 	mtx_lock_spin(&sched_lock);
33235e6168fSJeff Roberson 	/* init kseqs */
3330a016a05SJeff Roberson 	for (i = 0; i < MAXCPU; i++)
3340a016a05SJeff Roberson 		kseq_setup(KSEQ_CPU(i));
33535e6168fSJeff Roberson 	mtx_unlock_spin(&sched_lock);
33635e6168fSJeff Roberson }
33735e6168fSJeff Roberson 
33835e6168fSJeff Roberson /*
33935e6168fSJeff Roberson  * Scale the scheduling priority according to the "interactivity" of this
34035e6168fSJeff Roberson  * process.
34135e6168fSJeff Roberson  */
34235e6168fSJeff Roberson static int
34335e6168fSJeff Roberson sched_priority(struct ksegrp *kg)
34435e6168fSJeff Roberson {
34535e6168fSJeff Roberson 	int pri;
34635e6168fSJeff Roberson 
34735e6168fSJeff Roberson 	if (kg->kg_pri_class != PRI_TIMESHARE)
34835e6168fSJeff Roberson 		return (kg->kg_user_pri);
34935e6168fSJeff Roberson 
350407b0157SJeff Roberson 	pri = SCHED_SLP_TOPRI(kg->kg_slptime, kg->kg_runtime);
35135e6168fSJeff Roberson 	CTR2(KTR_RUNQ, "sched_priority: slptime: %d\tpri: %d",
35235e6168fSJeff Roberson 	    kg->kg_slptime, pri);
35335e6168fSJeff Roberson 
35435e6168fSJeff Roberson 	pri += PRI_MIN_TIMESHARE;
35535e6168fSJeff Roberson 	pri += kg->kg_nice;
35635e6168fSJeff Roberson 
35735e6168fSJeff Roberson 	if (pri > PRI_MAX_TIMESHARE)
35835e6168fSJeff Roberson 		pri = PRI_MAX_TIMESHARE;
35935e6168fSJeff Roberson 	else if (pri < PRI_MIN_TIMESHARE)
36035e6168fSJeff Roberson 		pri = PRI_MIN_TIMESHARE;
36135e6168fSJeff Roberson 
36235e6168fSJeff Roberson 	kg->kg_user_pri = pri;
36335e6168fSJeff Roberson 
36435e6168fSJeff Roberson 	return (kg->kg_user_pri);
36535e6168fSJeff Roberson }
36635e6168fSJeff Roberson 
36735e6168fSJeff Roberson /*
36835e6168fSJeff Roberson  * Calculate a time slice based on the process priority.
36935e6168fSJeff Roberson  */
37035e6168fSJeff Roberson static int
37135e6168fSJeff Roberson sched_slice(struct ksegrp *kg)
37235e6168fSJeff Roberson {
37335e6168fSJeff Roberson 	int pslice;
37435e6168fSJeff Roberson 	int sslice;
37535e6168fSJeff Roberson 	int slice;
37635e6168fSJeff Roberson 	int pri;
37735e6168fSJeff Roberson 
37835e6168fSJeff Roberson 	pri = kg->kg_user_pri;
37935e6168fSJeff Roberson 	pri -= PRI_MIN_TIMESHARE;
38035e6168fSJeff Roberson 	pslice = SCHED_PRI_TOSLICE(pri);
381407b0157SJeff Roberson 	sslice = SCHED_PRI_TOSLICE(SCHED_SLP_TOPRI(kg->kg_slptime, kg->kg_runtime));
382407b0157SJeff Roberson /*
383407b0157SJeff Roberson SCHED_SLP_TOSLICE(SCHED_SLP_RATIO(
384407b0157SJeff Roberson 	    kg->kg_slptime, kg->kg_runtime));
385407b0157SJeff Roberson */
38635e6168fSJeff Roberson 	slice = SCHED_SLP_COMP(sslice) + SCHED_PRI_COMP(pslice);
38735e6168fSJeff Roberson 
38835e6168fSJeff Roberson 	CTR4(KTR_RUNQ,
38935e6168fSJeff Roberson 	    "sched_slice: pri: %d\tsslice: %d\tpslice: %d\tslice: %d",
39035e6168fSJeff Roberson 	    pri, sslice, pslice, slice);
39135e6168fSJeff Roberson 
39235e6168fSJeff Roberson 	if (slice < SCHED_SLICE_MIN)
39335e6168fSJeff Roberson 		slice = SCHED_SLICE_MIN;
39435e6168fSJeff Roberson 	else if (slice > SCHED_SLICE_MAX)
39535e6168fSJeff Roberson 		slice = SCHED_SLICE_MAX;
39635e6168fSJeff Roberson 
397407b0157SJeff Roberson 	/*
398407b0157SJeff Roberson 	 * Every time we grant a new slice check to see if we need to scale
399407b0157SJeff Roberson 	 * back the slp and run time in the kg.  This will cause us to forget
400407b0157SJeff Roberson 	 * old interactivity while maintaining the current ratio.
401407b0157SJeff Roberson 	 */
402407b0157SJeff Roberson 	if ((kg->kg_runtime + kg->kg_slptime) >  SCHED_SLP_RUN_MAX) {
403407b0157SJeff Roberson 		kg->kg_runtime /= SCHED_SLP_RUN_THROTTLE;
404407b0157SJeff Roberson 		kg->kg_slptime /= SCHED_SLP_RUN_THROTTLE;
405407b0157SJeff Roberson 	}
406407b0157SJeff Roberson 
40735e6168fSJeff Roberson 	return (slice);
40835e6168fSJeff Roberson }
40935e6168fSJeff Roberson 
41035e6168fSJeff Roberson int
41135e6168fSJeff Roberson sched_rr_interval(void)
41235e6168fSJeff Roberson {
41335e6168fSJeff Roberson 	return (SCHED_SLICE_MAX);
41435e6168fSJeff Roberson }
41535e6168fSJeff Roberson 
41635e6168fSJeff Roberson void
41735e6168fSJeff Roberson sched_pctcpu_update(struct kse *ke)
41835e6168fSJeff Roberson {
41935e6168fSJeff Roberson 	/*
42035e6168fSJeff Roberson 	 * Adjust counters and watermark for pctcpu calc.
42135e6168fSJeff Roberson 	 */
42235e6168fSJeff Roberson 	ke->ke_ticks = (ke->ke_ticks / (ke->ke_ltick - ke->ke_ftick)) *
42335e6168fSJeff Roberson 		    SCHED_CPU_TICKS;
42435e6168fSJeff Roberson 	ke->ke_ltick = ticks;
42535e6168fSJeff Roberson 	ke->ke_ftick = ke->ke_ltick - SCHED_CPU_TICKS;
42635e6168fSJeff Roberson }
42735e6168fSJeff Roberson 
42835e6168fSJeff Roberson #ifdef SMP
4295d7ef00cSJeff Roberson /* XXX Should be changed to kseq_load_lowest() */
43035e6168fSJeff Roberson int
43135e6168fSJeff Roberson sched_pickcpu(void)
43235e6168fSJeff Roberson {
4330a016a05SJeff Roberson 	struct kseq *kseq;
43435e6168fSJeff Roberson 	int load;
4350a016a05SJeff Roberson 	int cpu;
43635e6168fSJeff Roberson 	int i;
43735e6168fSJeff Roberson 
43835e6168fSJeff Roberson 	if (!smp_started)
43935e6168fSJeff Roberson 		return (0);
44035e6168fSJeff Roberson 
4410a016a05SJeff Roberson 	load = 0;
4420a016a05SJeff Roberson 	cpu = 0;
44335e6168fSJeff Roberson 
44435e6168fSJeff Roberson 	for (i = 0; i < mp_maxid; i++) {
44535e6168fSJeff Roberson 		if (CPU_ABSENT(i))
44635e6168fSJeff Roberson 			continue;
4470a016a05SJeff Roberson 		kseq = KSEQ_CPU(i);
4480a016a05SJeff Roberson 		if (kseq->ksq_load < load) {
44935e6168fSJeff Roberson 			cpu = i;
4500a016a05SJeff Roberson 			load = kseq->ksq_load;
45135e6168fSJeff Roberson 		}
45235e6168fSJeff Roberson 	}
45335e6168fSJeff Roberson 
45435e6168fSJeff Roberson 	CTR1(KTR_RUNQ, "sched_pickcpu: %d", cpu);
45535e6168fSJeff Roberson 	return (cpu);
45635e6168fSJeff Roberson }
45735e6168fSJeff Roberson #else
45835e6168fSJeff Roberson int
45935e6168fSJeff Roberson sched_pickcpu(void)
46035e6168fSJeff Roberson {
46135e6168fSJeff Roberson 	return (0);
46235e6168fSJeff Roberson }
46335e6168fSJeff Roberson #endif
46435e6168fSJeff Roberson 
46535e6168fSJeff Roberson void
46635e6168fSJeff Roberson sched_prio(struct thread *td, u_char prio)
46735e6168fSJeff Roberson {
46835e6168fSJeff Roberson 	struct kse *ke;
46935e6168fSJeff Roberson 	struct runq *rq;
47035e6168fSJeff Roberson 
47135e6168fSJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
47235e6168fSJeff Roberson 	ke = td->td_kse;
47335e6168fSJeff Roberson 	td->td_priority = prio;
47435e6168fSJeff Roberson 
47535e6168fSJeff Roberson 	if (TD_ON_RUNQ(td)) {
47635e6168fSJeff Roberson 		rq = ke->ke_runq;
47735e6168fSJeff Roberson 
47835e6168fSJeff Roberson 		runq_remove(rq, ke);
47935e6168fSJeff Roberson 		runq_add(rq, ke);
48035e6168fSJeff Roberson 	}
48135e6168fSJeff Roberson }
48235e6168fSJeff Roberson 
48335e6168fSJeff Roberson void
48435e6168fSJeff Roberson sched_switchout(struct thread *td)
48535e6168fSJeff Roberson {
48635e6168fSJeff Roberson 	struct kse *ke;
48735e6168fSJeff Roberson 
48835e6168fSJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
48935e6168fSJeff Roberson 
49035e6168fSJeff Roberson 	ke = td->td_kse;
49135e6168fSJeff Roberson 
49235e6168fSJeff Roberson 	td->td_last_kse = ke;
49335e6168fSJeff Roberson         td->td_lastcpu = ke->ke_oncpu;
494cd6e33dfSJeff Roberson 	ke->ke_oncpu = NOCPU;
4954a338afdSJulian Elischer         td->td_flags &= ~TDF_NEEDRESCHED;
49635e6168fSJeff Roberson 
49735e6168fSJeff Roberson 	if (TD_IS_RUNNING(td)) {
49835e6168fSJeff Roberson 		setrunqueue(td);
49935e6168fSJeff Roberson 		return;
50035e6168fSJeff Roberson 	} else
50135e6168fSJeff Roberson 		td->td_kse->ke_runq = NULL;
50235e6168fSJeff Roberson 
50335e6168fSJeff Roberson 	/*
50435e6168fSJeff Roberson 	 * We will not be on the run queue. So we must be
50535e6168fSJeff Roberson 	 * sleeping or similar.
50635e6168fSJeff Roberson 	 */
507ac2e4153SJulian Elischer 	if (td->td_proc->p_flag & P_THREADED)
50835e6168fSJeff Roberson 		kse_reassign(ke);
50935e6168fSJeff Roberson }
51035e6168fSJeff Roberson 
51135e6168fSJeff Roberson void
51235e6168fSJeff Roberson sched_switchin(struct thread *td)
51335e6168fSJeff Roberson {
51435e6168fSJeff Roberson 	/* struct kse *ke = td->td_kse; */
51535e6168fSJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
51635e6168fSJeff Roberson 
517cd6e33dfSJeff Roberson 	td->td_kse->ke_oncpu = PCPU_GET(cpuid);
5185d7ef00cSJeff Roberson #if SCHED_STRICT_RESCHED
51935e6168fSJeff Roberson 	if (td->td_ksegrp->kg_pri_class == PRI_TIMESHARE &&
52035e6168fSJeff Roberson 	    td->td_priority != td->td_ksegrp->kg_user_pri)
5214a338afdSJulian Elischer 		curthread->td_flags |= TDF_NEEDRESCHED;
5225d7ef00cSJeff Roberson #endif
52335e6168fSJeff Roberson }
52435e6168fSJeff Roberson 
52535e6168fSJeff Roberson void
52635e6168fSJeff Roberson sched_nice(struct ksegrp *kg, int nice)
52735e6168fSJeff Roberson {
52835e6168fSJeff Roberson 	struct thread *td;
52935e6168fSJeff Roberson 
53035e6168fSJeff Roberson 	kg->kg_nice = nice;
53135e6168fSJeff Roberson 	sched_priority(kg);
53235e6168fSJeff Roberson 	FOREACH_THREAD_IN_GROUP(kg, td) {
5334a338afdSJulian Elischer 		td->td_flags |= TDF_NEEDRESCHED;
53435e6168fSJeff Roberson 	}
53535e6168fSJeff Roberson }
53635e6168fSJeff Roberson 
53735e6168fSJeff Roberson void
53835e6168fSJeff Roberson sched_sleep(struct thread *td, u_char prio)
53935e6168fSJeff Roberson {
54035e6168fSJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
54135e6168fSJeff Roberson 
54235e6168fSJeff Roberson 	td->td_slptime = ticks;
54335e6168fSJeff Roberson 	td->td_priority = prio;
54435e6168fSJeff Roberson 
54535e6168fSJeff Roberson 	/*
54635e6168fSJeff Roberson 	 * If this is an interactive task clear its queue so it moves back
54735e6168fSJeff Roberson 	 * on to curr when it wakes up.  Otherwise let it stay on the queue
54835e6168fSJeff Roberson 	 * that it was assigned to.
54935e6168fSJeff Roberson 	 */
55035e6168fSJeff Roberson 	if (SCHED_CURR(td->td_kse->ke_ksegrp))
55135e6168fSJeff Roberson 		td->td_kse->ke_runq = NULL;
5525d7ef00cSJeff Roberson #ifdef SMP
5535d7ef00cSJeff Roberson 	if (td->td_priority < PZERO) {
5545d7ef00cSJeff Roberson 		kseq_sleep(KSEQ_CPU(td->td_kse->ke_cpu), td->td_kse);
5555d7ef00cSJeff Roberson 		td->td_schedflag |= TD_SCHED_BLOAD;
5565d7ef00cSJeff Roberson 	}
5570a016a05SJeff Roberson #endif
55835e6168fSJeff Roberson }
55935e6168fSJeff Roberson 
56035e6168fSJeff Roberson void
56135e6168fSJeff Roberson sched_wakeup(struct thread *td)
56235e6168fSJeff Roberson {
56335e6168fSJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
56435e6168fSJeff Roberson 
56535e6168fSJeff Roberson 	/*
56635e6168fSJeff Roberson 	 * Let the kseg know how long we slept for.  This is because process
56735e6168fSJeff Roberson 	 * interactivity behavior is modeled in the kseg.
56835e6168fSJeff Roberson 	 */
56935e6168fSJeff Roberson 	if (td->td_slptime) {
570f1e8dc4aSJeff Roberson 		struct ksegrp *kg;
571f1e8dc4aSJeff Roberson 
572f1e8dc4aSJeff Roberson 		kg = td->td_ksegrp;
573407b0157SJeff Roberson 		kg->kg_slptime += (ticks - td->td_slptime) * 1024;
574f1e8dc4aSJeff Roberson 		sched_priority(kg);
57535e6168fSJeff Roberson 		td->td_slptime = 0;
576f1e8dc4aSJeff Roberson 	}
5775d7ef00cSJeff Roberson #ifdef SMP
5785d7ef00cSJeff Roberson 	if (td->td_priority < PZERO && td->td_schedflag & TD_SCHED_BLOAD) {
5795d7ef00cSJeff Roberson 		kseq_wakeup(KSEQ_CPU(td->td_kse->ke_cpu), td->td_kse);
5805d7ef00cSJeff Roberson 		td->td_schedflag &= ~TD_SCHED_BLOAD;
5815d7ef00cSJeff Roberson 	}
5820a016a05SJeff Roberson #endif
58335e6168fSJeff Roberson 	setrunqueue(td);
5845d7ef00cSJeff Roberson #if SCHED_STRICT_RESCHED
58535e6168fSJeff Roberson         if (td->td_priority < curthread->td_priority)
5864a338afdSJulian Elischer                 curthread->td_flags |= TDF_NEEDRESCHED;
5875d7ef00cSJeff Roberson #endif
58835e6168fSJeff Roberson }
58935e6168fSJeff Roberson 
59035e6168fSJeff Roberson /*
59135e6168fSJeff Roberson  * Penalize the parent for creating a new child and initialize the child's
59235e6168fSJeff Roberson  * priority.
59335e6168fSJeff Roberson  */
59435e6168fSJeff Roberson void
59535e6168fSJeff Roberson sched_fork(struct ksegrp *kg, struct ksegrp *child)
59635e6168fSJeff Roberson {
59735e6168fSJeff Roberson 	struct kse *ckse;
59835e6168fSJeff Roberson 	struct kse *pkse;
59935e6168fSJeff Roberson 
60035e6168fSJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
60135e6168fSJeff Roberson 	ckse = FIRST_KSE_IN_KSEGRP(child);
60235e6168fSJeff Roberson 	pkse = FIRST_KSE_IN_KSEGRP(kg);
60335e6168fSJeff Roberson 
60435e6168fSJeff Roberson 	/* XXX Need something better here */
605407b0157SJeff Roberson 	if (kg->kg_slptime > kg->kg_runtime) {
606407b0157SJeff Roberson 		child->kg_slptime = SCHED_PRI_DYN;
607407b0157SJeff Roberson 		child->kg_runtime = kg->kg_slptime / SCHED_PRI_DYN;
608407b0157SJeff Roberson 	} else {
609407b0157SJeff Roberson 		child->kg_runtime = SCHED_PRI_DYN;
610407b0157SJeff Roberson 		child->kg_slptime = kg->kg_runtime / SCHED_PRI_DYN;
611407b0157SJeff Roberson 	}
612407b0157SJeff Roberson #if 0
61335e6168fSJeff Roberson 	child->kg_slptime = kg->kg_slptime;
614407b0157SJeff Roberson 	child->kg_runtime = kg->kg_runtime;
615407b0157SJeff Roberson #endif
61635e6168fSJeff Roberson 	child->kg_user_pri = kg->kg_user_pri;
61735e6168fSJeff Roberson 
618407b0157SJeff Roberson #if 0
619cd6e33dfSJeff Roberson 	if (pkse->ke_cpu != PCPU_GET(cpuid)) {
620cd6e33dfSJeff Roberson 		printf("pkse->ke_cpu = %d\n", pkse->ke_cpu);
621c9f25d8fSJeff Roberson 		printf("cpuid = %d", PCPU_GET(cpuid));
622c9f25d8fSJeff Roberson 		Debugger("stop");
623c9f25d8fSJeff Roberson 	}
624407b0157SJeff Roberson #endif
625c9f25d8fSJeff Roberson 
62635e6168fSJeff Roberson 	ckse->ke_slice = pkse->ke_slice;
627cd6e33dfSJeff Roberson 	ckse->ke_cpu = pkse->ke_cpu; /* sched_pickcpu(); */
62835e6168fSJeff Roberson 	ckse->ke_runq = NULL;
62935e6168fSJeff Roberson 	/*
63035e6168fSJeff Roberson 	 * Claim that we've been running for one second for statistical
63135e6168fSJeff Roberson 	 * purposes.
63235e6168fSJeff Roberson 	 */
63335e6168fSJeff Roberson 	ckse->ke_ticks = 0;
63435e6168fSJeff Roberson 	ckse->ke_ltick = ticks;
63535e6168fSJeff Roberson 	ckse->ke_ftick = ticks - hz;
63635e6168fSJeff Roberson }
63735e6168fSJeff Roberson 
63835e6168fSJeff Roberson /*
63935e6168fSJeff Roberson  * Return some of the child's priority and interactivity to the parent.
64035e6168fSJeff Roberson  */
64135e6168fSJeff Roberson void
64235e6168fSJeff Roberson sched_exit(struct ksegrp *kg, struct ksegrp *child)
64335e6168fSJeff Roberson {
64435e6168fSJeff Roberson 	/* XXX Need something better here */
64535e6168fSJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
64635e6168fSJeff Roberson 	kg->kg_slptime = child->kg_slptime;
647407b0157SJeff Roberson 	kg->kg_runtime = child->kg_runtime;
64835e6168fSJeff Roberson 	sched_priority(kg);
64935e6168fSJeff Roberson }
65035e6168fSJeff Roberson 
65135e6168fSJeff Roberson void
65235e6168fSJeff Roberson sched_clock(struct thread *td)
65335e6168fSJeff Roberson {
65435e6168fSJeff Roberson 	struct kse *ke;
6555d7ef00cSJeff Roberson #if SCHED_STRICT_RESCHED
65635e6168fSJeff Roberson 	struct kse *nke;
65735e6168fSJeff Roberson 	struct kseq *kseq;
6585d7ef00cSJeff Roberson #endif
6590a016a05SJeff Roberson 	struct ksegrp *kg;
66035e6168fSJeff Roberson 
66135e6168fSJeff Roberson 
66235e6168fSJeff Roberson 	ke = td->td_kse;
66335e6168fSJeff Roberson 	kg = td->td_ksegrp;
66435e6168fSJeff Roberson 
6650a016a05SJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
6660a016a05SJeff Roberson 	KASSERT((td != NULL), ("schedclock: null thread pointer"));
6670a016a05SJeff Roberson 
6680a016a05SJeff Roberson 	/* Adjust ticks for pctcpu */
669d465fb95SJeff Roberson 	ke->ke_ticks += 10000;
670d465fb95SJeff Roberson 	ke->ke_ltick = ticks;
671d465fb95SJeff Roberson 	/* Go up to one second beyond our max and then trim back down */
672d465fb95SJeff Roberson 	if (ke->ke_ftick + SCHED_CPU_TICKS + hz < ke->ke_ltick)
673d465fb95SJeff Roberson 		sched_pctcpu_update(ke);
674d465fb95SJeff Roberson 
6750a016a05SJeff Roberson 	if (td->td_kse->ke_flags & KEF_IDLEKSE)
67635e6168fSJeff Roberson 		return;
6770a016a05SJeff Roberson 
6780a016a05SJeff Roberson 	/*
6790a016a05SJeff Roberson 	 * Check for a higher priority task on the run queue.  This can happen
6800a016a05SJeff Roberson 	 * on SMP if another processor woke up a process on our runq.
6810a016a05SJeff Roberson 	 */
6825d7ef00cSJeff Roberson #if SCHED_STRICT_RESCHED
6830a016a05SJeff Roberson 	kseq = KSEQ_SELF();
684c9f25d8fSJeff Roberson 	nke = runq_choose(kseq->ksq_curr);
685c9f25d8fSJeff Roberson 
68635e6168fSJeff Roberson 	if (nke && nke->ke_thread &&
6870a016a05SJeff Roberson 	    nke->ke_thread->td_priority < td->td_priority)
6884a338afdSJulian Elischer 		td->td_flags |= TDF_NEEDRESCHED;
6895d7ef00cSJeff Roberson #endif
69035e6168fSJeff Roberson 	/*
691407b0157SJeff Roberson 	 * We used a tick charge it to the ksegrp so that we can compute our
69235e6168fSJeff Roberson 	 * "interactivity".
69335e6168fSJeff Roberson 	 */
694407b0157SJeff Roberson 	kg->kg_runtime += 1024;
695407b0157SJeff Roberson 
69635e6168fSJeff Roberson 	/*
69735e6168fSJeff Roberson 	 * We used up one time slice.
69835e6168fSJeff Roberson 	 */
69935e6168fSJeff Roberson 	ke->ke_slice--;
70035e6168fSJeff Roberson 	/*
70135e6168fSJeff Roberson 	 * We're out of time, recompute priorities and requeue
70235e6168fSJeff Roberson 	 */
70335e6168fSJeff Roberson 	if (ke->ke_slice == 0) {
70435e6168fSJeff Roberson 		td->td_priority = sched_priority(kg);
70535e6168fSJeff Roberson 		ke->ke_slice = sched_slice(kg);
7064a338afdSJulian Elischer 		td->td_flags |= TDF_NEEDRESCHED;
70735e6168fSJeff Roberson 		ke->ke_runq = NULL;
70835e6168fSJeff Roberson 	}
70935e6168fSJeff Roberson }
71035e6168fSJeff Roberson 
71135e6168fSJeff Roberson int
71235e6168fSJeff Roberson sched_runnable(void)
71335e6168fSJeff Roberson {
71435e6168fSJeff Roberson 	struct kseq *kseq;
71535e6168fSJeff Roberson 
7160a016a05SJeff Roberson 	kseq = KSEQ_SELF();
71735e6168fSJeff Roberson 
7180a016a05SJeff Roberson 	if (kseq->ksq_load)
719c9f25d8fSJeff Roberson 		return (1);
720c9f25d8fSJeff Roberson #ifdef SMP
7210a016a05SJeff Roberson 	/*
7220a016a05SJeff Roberson 	 * For SMP we may steal other processor's KSEs.  Just search until we
7230a016a05SJeff Roberson 	 * verify that at least on other cpu has a runnable task.
7240a016a05SJeff Roberson 	 */
725c9f25d8fSJeff Roberson 	if (smp_started) {
726c9f25d8fSJeff Roberson 		int i;
727c9f25d8fSJeff Roberson 
7285d7ef00cSJeff Roberson #if 0
7295d7ef00cSJeff Roberson 		if (kseq->ksq_bload)
7305d7ef00cSJeff Roberson 			return (0);
7315d7ef00cSJeff Roberson #endif
7325d7ef00cSJeff Roberson 
733c9f25d8fSJeff Roberson 		for (i = 0; i < mp_maxid; i++) {
734c9f25d8fSJeff Roberson 			if (CPU_ABSENT(i))
735c9f25d8fSJeff Roberson 				continue;
7360a016a05SJeff Roberson 			kseq = KSEQ_CPU(i);
7370a016a05SJeff Roberson 			if (kseq->ksq_load)
738c9f25d8fSJeff Roberson 				return (1);
739c9f25d8fSJeff Roberson 		}
740c9f25d8fSJeff Roberson 	}
741c9f25d8fSJeff Roberson #endif
742c9f25d8fSJeff Roberson 	return (0);
74335e6168fSJeff Roberson }
74435e6168fSJeff Roberson 
74535e6168fSJeff Roberson void
74635e6168fSJeff Roberson sched_userret(struct thread *td)
74735e6168fSJeff Roberson {
74835e6168fSJeff Roberson 	struct ksegrp *kg;
74935e6168fSJeff Roberson 
75035e6168fSJeff Roberson 	kg = td->td_ksegrp;
75135e6168fSJeff Roberson 
75235e6168fSJeff Roberson 	if (td->td_priority != kg->kg_user_pri) {
75335e6168fSJeff Roberson 		mtx_lock_spin(&sched_lock);
75435e6168fSJeff Roberson 		td->td_priority = kg->kg_user_pri;
75535e6168fSJeff Roberson 		mtx_unlock_spin(&sched_lock);
75635e6168fSJeff Roberson 	}
75735e6168fSJeff Roberson }
75835e6168fSJeff Roberson 
759c9f25d8fSJeff Roberson struct kse *
760c9f25d8fSJeff Roberson sched_choose(void)
761c9f25d8fSJeff Roberson {
7620a016a05SJeff Roberson 	struct kseq *kseq;
763c9f25d8fSJeff Roberson 	struct kse *ke;
764c9f25d8fSJeff Roberson 
7650a016a05SJeff Roberson 	kseq = KSEQ_SELF();
7660a016a05SJeff Roberson 	ke = kseq_choose(kseq);
767c9f25d8fSJeff Roberson 
76835e6168fSJeff Roberson 	if (ke) {
76935e6168fSJeff Roberson 		ke->ke_state = KES_THREAD;
7705d7ef00cSJeff Roberson 		kseq_rem(kseq, ke);
77135e6168fSJeff Roberson 	}
77235e6168fSJeff Roberson 
773c9f25d8fSJeff Roberson #ifdef SMP
774c9f25d8fSJeff Roberson 	if (ke == NULL && smp_started) {
7755d7ef00cSJeff Roberson #if 0
7765d7ef00cSJeff Roberson 		if (kseq->ksq_bload)
7775d7ef00cSJeff Roberson 			return (NULL);
7785d7ef00cSJeff Roberson #endif
779c9f25d8fSJeff Roberson 		/*
780c9f25d8fSJeff Roberson 		 * Find the cpu with the highest load and steal one proc.
781c9f25d8fSJeff Roberson 		 */
7825d7ef00cSJeff Roberson 		kseq = kseq_load_highest();
7835d7ef00cSJeff Roberson 		if (kseq == NULL)
7845d7ef00cSJeff Roberson 			return (NULL);
7850a016a05SJeff Roberson 		ke = kseq_choose(kseq);
7865d7ef00cSJeff Roberson 		kseq_rem(kseq, ke);
7875d7ef00cSJeff Roberson 
788c9f25d8fSJeff Roberson 		ke->ke_state = KES_THREAD;
789c9f25d8fSJeff Roberson 		ke->ke_runq = NULL;
790cd6e33dfSJeff Roberson 		ke->ke_cpu = PCPU_GET(cpuid);
791c9f25d8fSJeff Roberson 	}
792c9f25d8fSJeff Roberson #endif
79335e6168fSJeff Roberson 	return (ke);
79435e6168fSJeff Roberson }
79535e6168fSJeff Roberson 
79635e6168fSJeff Roberson void
79735e6168fSJeff Roberson sched_add(struct kse *ke)
79835e6168fSJeff Roberson {
799c9f25d8fSJeff Roberson 	struct kseq *kseq;
800c9f25d8fSJeff Roberson 
8015d7ef00cSJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
8025d7ef00cSJeff Roberson 	KASSERT((ke->ke_thread != NULL), ("sched_add: No thread on KSE"));
8035d7ef00cSJeff Roberson 	KASSERT((ke->ke_thread->td_kse != NULL),
8045d7ef00cSJeff Roberson 	    ("sched_add: No KSE on thread"));
8055d7ef00cSJeff Roberson 	KASSERT(ke->ke_state != KES_ONRUNQ,
8065d7ef00cSJeff Roberson 	    ("sched_add: kse %p (%s) already in run queue", ke,
8075d7ef00cSJeff Roberson 	    ke->ke_proc->p_comm));
8085d7ef00cSJeff Roberson 	KASSERT(ke->ke_proc->p_sflag & PS_INMEM,
8095d7ef00cSJeff Roberson 	    ("sched_add: process swapped out"));
8105d7ef00cSJeff Roberson 
811cd6e33dfSJeff Roberson 	kseq = KSEQ_CPU(ke->ke_cpu);
8125d7ef00cSJeff Roberson 
8135d7ef00cSJeff Roberson 	if (ke->ke_runq == NULL) {
81435e6168fSJeff Roberson 		if (SCHED_CURR(ke->ke_ksegrp))
81535e6168fSJeff Roberson 			ke->ke_runq = kseq->ksq_curr;
81635e6168fSJeff Roberson 		else
81735e6168fSJeff Roberson 			ke->ke_runq = kseq->ksq_next;
81835e6168fSJeff Roberson 	}
81935e6168fSJeff Roberson 	ke->ke_ksegrp->kg_runq_kses++;
82035e6168fSJeff Roberson 	ke->ke_state = KES_ONRUNQ;
82135e6168fSJeff Roberson 
8225d7ef00cSJeff Roberson 	kseq_add(kseq, ke);
82335e6168fSJeff Roberson }
82435e6168fSJeff Roberson 
82535e6168fSJeff Roberson void
82635e6168fSJeff Roberson sched_rem(struct kse *ke)
82735e6168fSJeff Roberson {
82835e6168fSJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
82935e6168fSJeff Roberson 	/* KASSERT((ke->ke_state == KES_ONRUNQ), ("KSE not on run queue")); */
83035e6168fSJeff Roberson 
83135e6168fSJeff Roberson 	ke->ke_runq = NULL;
83235e6168fSJeff Roberson 	ke->ke_state = KES_THREAD;
83335e6168fSJeff Roberson 	ke->ke_ksegrp->kg_runq_kses--;
8345d7ef00cSJeff Roberson 
8355d7ef00cSJeff Roberson 	kseq_rem(KSEQ_CPU(ke->ke_cpu), ke);
83635e6168fSJeff Roberson }
83735e6168fSJeff Roberson 
83835e6168fSJeff Roberson fixpt_t
83935e6168fSJeff Roberson sched_pctcpu(struct kse *ke)
84035e6168fSJeff Roberson {
84135e6168fSJeff Roberson 	fixpt_t pctcpu;
8427121cce5SScott Long 	int realstathz;
84335e6168fSJeff Roberson 
84435e6168fSJeff Roberson 	pctcpu = 0;
8457121cce5SScott Long 	realstathz = stathz ? stathz : hz;
84635e6168fSJeff Roberson 
84735e6168fSJeff Roberson 	if (ke->ke_ticks) {
84835e6168fSJeff Roberson 		int rtick;
84935e6168fSJeff Roberson 
85035e6168fSJeff Roberson 		/* Update to account for time potentially spent sleeping */
85135e6168fSJeff Roberson 		ke->ke_ltick = ticks;
85235e6168fSJeff Roberson 		sched_pctcpu_update(ke);
85335e6168fSJeff Roberson 
85435e6168fSJeff Roberson 		/* How many rtick per second ? */
85535e6168fSJeff Roberson 		rtick = ke->ke_ticks / (SCHED_CPU_TIME * 10000);
8567121cce5SScott Long 		pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT;
85735e6168fSJeff Roberson 	}
85835e6168fSJeff Roberson 
85935e6168fSJeff Roberson 	ke->ke_proc->p_swtime = ke->ke_ltick - ke->ke_ftick;
86035e6168fSJeff Roberson 
86135e6168fSJeff Roberson 	return (pctcpu);
86235e6168fSJeff Roberson }
86335e6168fSJeff Roberson 
86435e6168fSJeff Roberson int
86535e6168fSJeff Roberson sched_sizeof_kse(void)
86635e6168fSJeff Roberson {
86735e6168fSJeff Roberson 	return (sizeof(struct kse) + sizeof(struct ke_sched));
86835e6168fSJeff Roberson }
86935e6168fSJeff Roberson 
87035e6168fSJeff Roberson int
87135e6168fSJeff Roberson sched_sizeof_ksegrp(void)
87235e6168fSJeff Roberson {
87335e6168fSJeff Roberson 	return (sizeof(struct ksegrp) + sizeof(struct kg_sched));
87435e6168fSJeff Roberson }
87535e6168fSJeff Roberson 
87635e6168fSJeff Roberson int
87735e6168fSJeff Roberson sched_sizeof_proc(void)
87835e6168fSJeff Roberson {
87935e6168fSJeff Roberson 	return (sizeof(struct proc));
88035e6168fSJeff Roberson }
88135e6168fSJeff Roberson 
88235e6168fSJeff Roberson int
88335e6168fSJeff Roberson sched_sizeof_thread(void)
88435e6168fSJeff Roberson {
88535e6168fSJeff Roberson 	return (sizeof(struct thread) + sizeof(struct td_sched));
88635e6168fSJeff Roberson }
887