xref: /freebsd/sys/kern/sched_ule.c (revision a8949de20eec94dce0e9bc107a5270dd3a8d03b4)
135e6168fSJeff Roberson /*-
235e6168fSJeff Roberson  * Copyright (c) 2003, Jeffrey Roberson <jeff@freebsd.org>
335e6168fSJeff Roberson  * All rights reserved.
435e6168fSJeff Roberson  *
535e6168fSJeff Roberson  * Redistribution and use in source and binary forms, with or without
635e6168fSJeff Roberson  * modification, are permitted provided that the following conditions
735e6168fSJeff Roberson  * are met:
835e6168fSJeff Roberson  * 1. Redistributions of source code must retain the above copyright
935e6168fSJeff Roberson  *    notice unmodified, this list of conditions, and the following
1035e6168fSJeff Roberson  *    disclaimer.
1135e6168fSJeff Roberson  * 2. Redistributions in binary form must reproduce the above copyright
1235e6168fSJeff Roberson  *    notice, this list of conditions and the following disclaimer in the
1335e6168fSJeff Roberson  *    documentation and/or other materials provided with the distribution.
1435e6168fSJeff Roberson  *
1535e6168fSJeff Roberson  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1635e6168fSJeff Roberson  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1735e6168fSJeff Roberson  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
1835e6168fSJeff Roberson  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
1935e6168fSJeff Roberson  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
2035e6168fSJeff Roberson  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2135e6168fSJeff Roberson  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2235e6168fSJeff Roberson  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2335e6168fSJeff Roberson  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
2435e6168fSJeff Roberson  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2535e6168fSJeff Roberson  *
2635e6168fSJeff Roberson  * $FreeBSD$
2735e6168fSJeff Roberson  */
2835e6168fSJeff Roberson 
2935e6168fSJeff Roberson #include <sys/param.h>
3035e6168fSJeff Roberson #include <sys/systm.h>
3135e6168fSJeff Roberson #include <sys/kernel.h>
3235e6168fSJeff Roberson #include <sys/ktr.h>
3335e6168fSJeff Roberson #include <sys/lock.h>
3435e6168fSJeff Roberson #include <sys/mutex.h>
3535e6168fSJeff Roberson #include <sys/proc.h>
36245f3abfSJeff Roberson #include <sys/resource.h>
3735e6168fSJeff Roberson #include <sys/sched.h>
3835e6168fSJeff Roberson #include <sys/smp.h>
3935e6168fSJeff Roberson #include <sys/sx.h>
4035e6168fSJeff Roberson #include <sys/sysctl.h>
4135e6168fSJeff Roberson #include <sys/sysproto.h>
4235e6168fSJeff Roberson #include <sys/vmmeter.h>
4335e6168fSJeff Roberson #ifdef DDB
4435e6168fSJeff Roberson #include <ddb/ddb.h>
4535e6168fSJeff Roberson #endif
4635e6168fSJeff Roberson #ifdef KTRACE
4735e6168fSJeff Roberson #include <sys/uio.h>
4835e6168fSJeff Roberson #include <sys/ktrace.h>
4935e6168fSJeff Roberson #endif
5035e6168fSJeff Roberson 
5135e6168fSJeff Roberson #include <machine/cpu.h>
5235e6168fSJeff Roberson 
5335e6168fSJeff Roberson /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
5435e6168fSJeff Roberson /* XXX This is bogus compatability crap for ps */
5535e6168fSJeff Roberson static fixpt_t  ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */
5635e6168fSJeff Roberson SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, "");
5735e6168fSJeff Roberson 
5835e6168fSJeff Roberson static void sched_setup(void *dummy);
5935e6168fSJeff Roberson SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL)
6035e6168fSJeff Roberson 
61e1f89c22SJeff Roberson int realstathz;
62e1f89c22SJeff Roberson 
63783caefbSJeff Roberson #define	SCHED_STRICT_RESCHED 1
64783caefbSJeff Roberson 
6535e6168fSJeff Roberson /*
6635e6168fSJeff Roberson  * These datastructures are allocated within their parent datastructure but
6735e6168fSJeff Roberson  * are scheduler specific.
6835e6168fSJeff Roberson  */
6935e6168fSJeff Roberson 
7035e6168fSJeff Roberson struct ke_sched {
7135e6168fSJeff Roberson 	int		ske_slice;
7235e6168fSJeff Roberson 	struct runq	*ske_runq;
7335e6168fSJeff Roberson 	/* The following variables are only used for pctcpu calculation */
7435e6168fSJeff Roberson 	int		ske_ltick;	/* Last tick that we were running on */
7535e6168fSJeff Roberson 	int		ske_ftick;	/* First tick that we were running on */
7635e6168fSJeff Roberson 	int		ske_ticks;	/* Tick count */
77cd6e33dfSJeff Roberson 	u_char		ske_cpu;
7835e6168fSJeff Roberson };
7935e6168fSJeff Roberson #define	ke_slice	ke_sched->ske_slice
8035e6168fSJeff Roberson #define	ke_runq		ke_sched->ske_runq
8135e6168fSJeff Roberson #define	ke_ltick	ke_sched->ske_ltick
8235e6168fSJeff Roberson #define	ke_ftick	ke_sched->ske_ftick
8335e6168fSJeff Roberson #define	ke_ticks	ke_sched->ske_ticks
84cd6e33dfSJeff Roberson #define	ke_cpu		ke_sched->ske_cpu
8535e6168fSJeff Roberson 
8635e6168fSJeff Roberson struct kg_sched {
87407b0157SJeff Roberson 	int	skg_slptime;		/* Number of ticks we vol. slept */
88407b0157SJeff Roberson 	int	skg_runtime;		/* Number of ticks we were running */
8935e6168fSJeff Roberson };
9035e6168fSJeff Roberson #define	kg_slptime	kg_sched->skg_slptime
91407b0157SJeff Roberson #define	kg_runtime	kg_sched->skg_runtime
9235e6168fSJeff Roberson 
9335e6168fSJeff Roberson struct td_sched {
9435e6168fSJeff Roberson 	int	std_slptime;
955d7ef00cSJeff Roberson 	int	std_schedflag;
9635e6168fSJeff Roberson };
9735e6168fSJeff Roberson #define	td_slptime	td_sched->std_slptime
985d7ef00cSJeff Roberson #define	td_schedflag	td_sched->std_schedflag
9935e6168fSJeff Roberson 
1005d7ef00cSJeff Roberson #define	TD_SCHED_BLOAD	0x0001		/*
1015d7ef00cSJeff Roberson 					 * thread was counted as being in short
1025d7ef00cSJeff Roberson 					 * term sleep.
1035d7ef00cSJeff Roberson 					 */
1045d7ef00cSJeff Roberson struct td_sched td_sched;
10535e6168fSJeff Roberson struct ke_sched ke_sched;
10635e6168fSJeff Roberson struct kg_sched kg_sched;
10735e6168fSJeff Roberson 
10835e6168fSJeff Roberson struct ke_sched *kse0_sched = &ke_sched;
10935e6168fSJeff Roberson struct kg_sched *ksegrp0_sched = &kg_sched;
11035e6168fSJeff Roberson struct p_sched *proc0_sched = NULL;
11135e6168fSJeff Roberson struct td_sched *thread0_sched = &td_sched;
11235e6168fSJeff Roberson 
11335e6168fSJeff Roberson /*
11435e6168fSJeff Roberson  * This priority range has 20 priorities on either end that are reachable
11535e6168fSJeff Roberson  * only through nice values.
116e1f89c22SJeff Roberson  *
117e1f89c22SJeff Roberson  * PRI_RANGE:	Total priority range for timeshare threads.
118e1f89c22SJeff Roberson  * PRI_NRESV:	Reserved priorities for nice.
119e1f89c22SJeff Roberson  * PRI_BASE:	The start of the dynamic range.
120e1f89c22SJeff Roberson  * DYN_RANGE:	Number of priorities that are available int the dynamic
121e1f89c22SJeff Roberson  *		priority range.
122e1f89c22SJeff Roberson  * DYN_HALF:	Half of DYN_RANGE for convenience elsewhere.
123e1f89c22SJeff Roberson  * PRI_DYN:	The dynamic priority which is derived from the number of ticks
124e1f89c22SJeff Roberson  *		running vs the total number of ticks.
12535e6168fSJeff Roberson  */
126407b0157SJeff Roberson #define	SCHED_PRI_RANGE		(PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1)
127245f3abfSJeff Roberson #define	SCHED_PRI_NRESV		PRIO_TOTAL
12898c9b132SJeff Roberson #define	SCHED_PRI_NHALF		(PRIO_TOTAL / 2)
129e1f89c22SJeff Roberson #define	SCHED_PRI_BASE		((SCHED_PRI_NRESV / 2) + PRI_MIN_TIMESHARE)
130e1f89c22SJeff Roberson #define	SCHED_DYN_RANGE		(SCHED_PRI_RANGE - SCHED_PRI_NRESV)
131e1f89c22SJeff Roberson #define	SCHED_DYN_HALF		(SCHED_DYN_RANGE / 2)
132e1f89c22SJeff Roberson #define	SCHED_PRI_DYN(run, total)	(((run) * SCHED_DYN_RANGE) / (total))
133e1f89c22SJeff Roberson 
13435e6168fSJeff Roberson 
13535e6168fSJeff Roberson /*
136e1f89c22SJeff Roberson  * These determine the interactivity of a process.
13735e6168fSJeff Roberson  *
138407b0157SJeff Roberson  * SLP_RUN_MAX:	Maximum amount of sleep time + run time we'll accumulate
139407b0157SJeff Roberson  *		before throttling back.
140e1f89c22SJeff Roberson  * SLP_RUN_THROTTLE:	Divisor for reducing slp/run time.
141e1f89c22SJeff Roberson  * INTERACT_RANGE:	Range of interactivity values.  Smaller is better.
142e1f89c22SJeff Roberson  * INTERACT_HALF:	Convenience define, half of the interactivity range.
143e1f89c22SJeff Roberson  * INTERACT_THRESH:	Threshhold for placement on the current runq.
14435e6168fSJeff Roberson  */
1455053d272SJeff Roberson #define	SCHED_SLP_RUN_MAX	((hz * 2) << 10)
146407b0157SJeff Roberson #define	SCHED_SLP_RUN_THROTTLE	(10)
147e1f89c22SJeff Roberson #define	SCHED_INTERACT_RANGE	(100)
148e1f89c22SJeff Roberson #define	SCHED_INTERACT_HALF	(SCHED_INTERACT_RANGE / 2)
149e1f89c22SJeff Roberson #define	SCHED_INTERACT_THRESH	(10)
150e1f89c22SJeff Roberson 
15135e6168fSJeff Roberson /*
15235e6168fSJeff Roberson  * These parameters and macros determine the size of the time slice that is
15335e6168fSJeff Roberson  * granted to each thread.
15435e6168fSJeff Roberson  *
15535e6168fSJeff Roberson  * SLICE_MIN:	Minimum time slice granted, in units of ticks.
15635e6168fSJeff Roberson  * SLICE_MAX:	Maximum time slice granted.
15735e6168fSJeff Roberson  * SLICE_RANGE:	Range of available time slices scaled by hz.
158245f3abfSJeff Roberson  * SLICE_SCALE:	The number slices granted per val in the range of [0, max].
159245f3abfSJeff Roberson  * SLICE_NICE:  Determine the amount of slice granted to a scaled nice.
16035e6168fSJeff Roberson  */
16135e6168fSJeff Roberson #define	SCHED_SLICE_MIN			(hz / 100)
162e1f89c22SJeff Roberson #define	SCHED_SLICE_MAX			(hz / 10)
16335e6168fSJeff Roberson #define	SCHED_SLICE_RANGE		(SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1)
16435e6168fSJeff Roberson #define	SCHED_SLICE_SCALE(val, max)	(((val) * SCHED_SLICE_RANGE) / (max))
165245f3abfSJeff Roberson #define	SCHED_SLICE_NICE(nice)						\
166245f3abfSJeff Roberson     (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((nice), SCHED_PRI_NHALF))
16735e6168fSJeff Roberson 
16835e6168fSJeff Roberson /*
16935e6168fSJeff Roberson  * This macro determines whether or not the kse belongs on the current or
17035e6168fSJeff Roberson  * next run queue.
171407b0157SJeff Roberson  *
172407b0157SJeff Roberson  * XXX nice value should effect how interactive a kg is.
17335e6168fSJeff Roberson  */
174e1f89c22SJeff Roberson #define	SCHED_CURR(kg)	(sched_interact_score(kg) < SCHED_INTERACT_THRESH)
17535e6168fSJeff Roberson 
17635e6168fSJeff Roberson /*
17735e6168fSJeff Roberson  * Cpu percentage computation macros and defines.
17835e6168fSJeff Roberson  *
17935e6168fSJeff Roberson  * SCHED_CPU_TIME:	Number of seconds to average the cpu usage across.
18035e6168fSJeff Roberson  * SCHED_CPU_TICKS:	Number of hz ticks to average the cpu usage across.
18135e6168fSJeff Roberson  */
18235e6168fSJeff Roberson 
1835053d272SJeff Roberson #define	SCHED_CPU_TIME	10
18435e6168fSJeff Roberson #define	SCHED_CPU_TICKS	(hz * SCHED_CPU_TIME)
18535e6168fSJeff Roberson 
18635e6168fSJeff Roberson /*
18735e6168fSJeff Roberson  * kseq - pair of runqs per processor
18835e6168fSJeff Roberson  */
18935e6168fSJeff Roberson 
19035e6168fSJeff Roberson struct kseq {
191a8949de2SJeff Roberson 	struct runq	ksq_ithd;	/* Queue of ITHD and REALTIME tds. */
192a8949de2SJeff Roberson 	struct runq	ksq_idle;	/* Queue of IDLE threads. */
193a8949de2SJeff Roberson 	struct runq	ksq_runqs[2];	/* Run queues for TIMESHARE. */
19435e6168fSJeff Roberson 	struct runq	*ksq_curr;
19535e6168fSJeff Roberson 	struct runq	*ksq_next;
196a8949de2SJeff Roberson 	int		ksq_itload;	/* Total runnable for ITHD. */
197a8949de2SJeff Roberson 	int		ksq_tsload;	/* Total runnable for TIMESHARE. */
198a8949de2SJeff Roberson 	int		ksq_idload;	/* Total runnable for IDLE. */
1995d7ef00cSJeff Roberson #ifdef SMP
2005d7ef00cSJeff Roberson 	unsigned int	ksq_rslices;	/* Slices on run queue */
2015d7ef00cSJeff Roberson 	unsigned int	ksq_bload;	/* Threads waiting on IO */
2025d7ef00cSJeff Roberson #endif
20335e6168fSJeff Roberson };
20435e6168fSJeff Roberson 
20535e6168fSJeff Roberson /*
20635e6168fSJeff Roberson  * One kse queue per processor.
20735e6168fSJeff Roberson  */
2080a016a05SJeff Roberson #ifdef SMP
20935e6168fSJeff Roberson struct kseq	kseq_cpu[MAXCPU];
2100a016a05SJeff Roberson #define	KSEQ_SELF()	(&kseq_cpu[PCPU_GET(cpuid)])
2110a016a05SJeff Roberson #define	KSEQ_CPU(x)	(&kseq_cpu[(x)])
2120a016a05SJeff Roberson #else
2130a016a05SJeff Roberson struct kseq	kseq_cpu;
2140a016a05SJeff Roberson #define	KSEQ_SELF()	(&kseq_cpu)
2150a016a05SJeff Roberson #define	KSEQ_CPU(x)	(&kseq_cpu)
2160a016a05SJeff Roberson #endif
21735e6168fSJeff Roberson 
218245f3abfSJeff Roberson static void sched_slice(struct kse *ke);
21935e6168fSJeff Roberson static int sched_priority(struct ksegrp *kg);
220e1f89c22SJeff Roberson static int sched_interact_score(struct ksegrp *kg);
22135e6168fSJeff Roberson void sched_pctcpu_update(struct kse *ke);
22235e6168fSJeff Roberson int sched_pickcpu(void);
22335e6168fSJeff Roberson 
2245d7ef00cSJeff Roberson /* Operations on per processor queues */
2250a016a05SJeff Roberson static struct kse * kseq_choose(struct kseq *kseq);
226245f3abfSJeff Roberson static int kseq_nice_min(struct kseq *kseq);
2270a016a05SJeff Roberson static void kseq_setup(struct kseq *kseq);
228a8949de2SJeff Roberson static void kseq_add(struct kseq *kseq, struct kse *ke);
2295d7ef00cSJeff Roberson static __inline void kseq_rem(struct kseq *kseq, struct kse *ke);
2305d7ef00cSJeff Roberson #ifdef SMP
2315d7ef00cSJeff Roberson static __inline void kseq_sleep(struct kseq *kseq, struct kse *ke);
2325d7ef00cSJeff Roberson static __inline void kseq_wakeup(struct kseq *kseq, struct kse *ke);
2335d7ef00cSJeff Roberson struct kseq * kseq_load_highest(void);
2345d7ef00cSJeff Roberson #endif
2355d7ef00cSJeff Roberson 
236a8949de2SJeff Roberson static void
2375d7ef00cSJeff Roberson kseq_add(struct kseq *kseq, struct kse *ke)
2385d7ef00cSJeff Roberson {
239a8949de2SJeff Roberson 	struct ksegrp *kg;
240a8949de2SJeff Roberson 
241a8949de2SJeff Roberson 	kg = ke->ke_ksegrp;
242a8949de2SJeff Roberson 
243a8949de2SJeff Roberson 	/*
244a8949de2SJeff Roberson 	 * Figure out what run queue we should go on and assign a slice.
245a8949de2SJeff Roberson 	 */
246a8949de2SJeff Roberson 	switch (kg->kg_pri_class) {
247a8949de2SJeff Roberson 	/*
248a8949de2SJeff Roberson 	 * If we're a real-time or interrupt thread place us on the curr
249a8949de2SJeff Roberson 	 * queue for the current processor.  Hopefully this will yield the
250a8949de2SJeff Roberson 	 * lowest latency response.
251a8949de2SJeff Roberson 	 */
252a8949de2SJeff Roberson 	case PRI_ITHD:
253a8949de2SJeff Roberson 	case PRI_REALTIME:
254a8949de2SJeff Roberson 		ke->ke_runq = &kseq->ksq_ithd;
255a8949de2SJeff Roberson 		ke->ke_slice = SCHED_SLICE_MAX;
256a8949de2SJeff Roberson 		kseq->ksq_itload++;
257a8949de2SJeff Roberson 		break;
258a8949de2SJeff Roberson 	/*
259a8949de2SJeff Roberson 	 * Timeshare threads get placed on the appropriate queue on their
260a8949de2SJeff Roberson 	 * bound cpu.
261a8949de2SJeff Roberson 	 */
262a8949de2SJeff Roberson 	case PRI_TIMESHARE:
263a8949de2SJeff Roberson 		if (ke->ke_runq == NULL) {
264a8949de2SJeff Roberson 			if (SCHED_CURR(kg))
265a8949de2SJeff Roberson 				ke->ke_runq = kseq->ksq_curr;
266a8949de2SJeff Roberson 			else
267a8949de2SJeff Roberson 				ke->ke_runq = kseq->ksq_next;
268a8949de2SJeff Roberson 		}
269a8949de2SJeff Roberson 		if (ke->ke_slice == 0)
270a8949de2SJeff Roberson 			sched_slice(ke);
271a8949de2SJeff Roberson 		kseq->ksq_tsload++;
272a8949de2SJeff Roberson 		break;
273a8949de2SJeff Roberson 	/*
274a8949de2SJeff Roberson 	 * Only grant PRI_IDLE processes a slice if there is nothing else
275a8949de2SJeff Roberson 	 * running.
276a8949de2SJeff Roberson 	 */
277a8949de2SJeff Roberson 	case PRI_IDLE:
278a8949de2SJeff Roberson 		ke->ke_runq = &kseq->ksq_idle;
279a8949de2SJeff Roberson 		ke->ke_slice = SCHED_SLICE_MIN;
280a8949de2SJeff Roberson 		kseq->ksq_idload++;
281a8949de2SJeff Roberson 		break;
282a8949de2SJeff Roberson 	default:
283a8949de2SJeff Roberson 		panic("Unknown priority class.\n");
284a8949de2SJeff Roberson 		break;
285a8949de2SJeff Roberson 	}
286a8949de2SJeff Roberson 
2875d7ef00cSJeff Roberson 	runq_add(ke->ke_runq, ke);
2885d7ef00cSJeff Roberson #ifdef SMP
2895d7ef00cSJeff Roberson 	kseq->ksq_rslices += ke->ke_slice;
2905d7ef00cSJeff Roberson #endif
2915d7ef00cSJeff Roberson }
292a8949de2SJeff Roberson static void
2935d7ef00cSJeff Roberson kseq_rem(struct kseq *kseq, struct kse *ke)
2945d7ef00cSJeff Roberson {
295a8949de2SJeff Roberson 	struct ksegrp *kg;
296a8949de2SJeff Roberson 
297a8949de2SJeff Roberson 	kg = ke->ke_ksegrp;
298a8949de2SJeff Roberson 
299a8949de2SJeff Roberson 	/*
300a8949de2SJeff Roberson 	 * XXX Consider making the load an array.
301a8949de2SJeff Roberson 	 */
302a8949de2SJeff Roberson 	switch (kg->kg_pri_class) {
303a8949de2SJeff Roberson 	case PRI_ITHD:
304a8949de2SJeff Roberson 	case PRI_REALTIME:
305a8949de2SJeff Roberson 		kseq->ksq_itload--;
306a8949de2SJeff Roberson 		break;
307a8949de2SJeff Roberson 	case PRI_TIMESHARE:
308a8949de2SJeff Roberson 		kseq->ksq_tsload--;
309a8949de2SJeff Roberson 		break;
310a8949de2SJeff Roberson 	case PRI_IDLE:
311a8949de2SJeff Roberson 		kseq->ksq_idload--;
312a8949de2SJeff Roberson 		break;
313a8949de2SJeff Roberson 	}
3145d7ef00cSJeff Roberson 	runq_remove(ke->ke_runq, ke);
3155d7ef00cSJeff Roberson #ifdef SMP
3165d7ef00cSJeff Roberson 	kseq->ksq_rslices -= ke->ke_slice;
3175d7ef00cSJeff Roberson #endif
3185d7ef00cSJeff Roberson }
3195d7ef00cSJeff Roberson 
3205d7ef00cSJeff Roberson #ifdef SMP
3215d7ef00cSJeff Roberson static __inline void
3225d7ef00cSJeff Roberson kseq_sleep(struct kseq *kseq, struct kse *ke)
3235d7ef00cSJeff Roberson {
3245d7ef00cSJeff Roberson 	kseq->ksq_bload++;
3255d7ef00cSJeff Roberson }
3265d7ef00cSJeff Roberson 
3275d7ef00cSJeff Roberson static __inline void
3285d7ef00cSJeff Roberson kseq_wakeup(struct kseq *kseq, struct kse *ke)
3295d7ef00cSJeff Roberson {
3305d7ef00cSJeff Roberson 	kseq->ksq_bload--;
3315d7ef00cSJeff Roberson }
3325d7ef00cSJeff Roberson 
3335d7ef00cSJeff Roberson struct kseq *
3345d7ef00cSJeff Roberson kseq_load_highest(void)
3355d7ef00cSJeff Roberson {
3365d7ef00cSJeff Roberson 	struct kseq *kseq;
3375d7ef00cSJeff Roberson 	int load;
3385d7ef00cSJeff Roberson 	int cpu;
3395d7ef00cSJeff Roberson 	int i;
3405d7ef00cSJeff Roberson 
3415d7ef00cSJeff Roberson 	cpu = 0;
3425d7ef00cSJeff Roberson 	load = 0;
3435d7ef00cSJeff Roberson 
3445d7ef00cSJeff Roberson 	for (i = 0; i < mp_maxid; i++) {
3455d7ef00cSJeff Roberson 		if (CPU_ABSENT(i))
3465d7ef00cSJeff Roberson 			continue;
3475d7ef00cSJeff Roberson 		kseq = KSEQ_CPU(i);
348a8949de2SJeff Roberson 		if (kseq->ksq_tsload > load) {
349a8949de2SJeff Roberson 			load = kseq->ksq_tsload;
3505d7ef00cSJeff Roberson 			cpu = i;
3515d7ef00cSJeff Roberson 		}
3525d7ef00cSJeff Roberson 	}
3535d7ef00cSJeff Roberson 	if (load)
3545d7ef00cSJeff Roberson 		return (KSEQ_CPU(cpu));
3555d7ef00cSJeff Roberson 
3565d7ef00cSJeff Roberson 	return (NULL);
3575d7ef00cSJeff Roberson }
3585d7ef00cSJeff Roberson #endif
3595d7ef00cSJeff Roberson 
3605d7ef00cSJeff Roberson struct kse *
3615d7ef00cSJeff Roberson kseq_choose(struct kseq *kseq)
3625d7ef00cSJeff Roberson {
3635d7ef00cSJeff Roberson 	struct kse *ke;
3645d7ef00cSJeff Roberson 	struct runq *swap;
3655d7ef00cSJeff Roberson 
366a8949de2SJeff Roberson 	if (kseq->ksq_itload)
367a8949de2SJeff Roberson 		return (runq_choose(&kseq->ksq_ithd));
368a8949de2SJeff Roberson 
369a8949de2SJeff Roberson 	if (kseq->ksq_tsload) {
370a8949de2SJeff Roberson 		if ((ke = runq_choose(kseq->ksq_curr)) != NULL)
371a8949de2SJeff Roberson 			return (ke);
372a8949de2SJeff Roberson 
3735d7ef00cSJeff Roberson 		swap = kseq->ksq_curr;
3745d7ef00cSJeff Roberson 		kseq->ksq_curr = kseq->ksq_next;
3755d7ef00cSJeff Roberson 		kseq->ksq_next = swap;
3765d7ef00cSJeff Roberson 
377a8949de2SJeff Roberson 		return (runq_choose(kseq->ksq_curr));
378a8949de2SJeff Roberson 	}
379a8949de2SJeff Roberson 	if (kseq->ksq_idload)
380a8949de2SJeff Roberson 		return (runq_choose(&kseq->ksq_idle));
381a8949de2SJeff Roberson 
382a8949de2SJeff Roberson 	return (NULL);
3835d7ef00cSJeff Roberson }
3845d7ef00cSJeff Roberson 
385245f3abfSJeff Roberson static int
386245f3abfSJeff Roberson kseq_nice_min(struct kseq *kseq)
387245f3abfSJeff Roberson {
388245f3abfSJeff Roberson 	struct kse *ke0;
389245f3abfSJeff Roberson 	struct kse *ke1;
390245f3abfSJeff Roberson 
391a8949de2SJeff Roberson 	if (kseq->ksq_tsload == 0)
392245f3abfSJeff Roberson 		return (0);
393245f3abfSJeff Roberson 
394245f3abfSJeff Roberson 	ke0 = runq_choose(kseq->ksq_curr);
395245f3abfSJeff Roberson 	ke1 = runq_choose(kseq->ksq_next);
396245f3abfSJeff Roberson 
397245f3abfSJeff Roberson 	if (ke0 == NULL)
398245f3abfSJeff Roberson 		return (ke1->ke_ksegrp->kg_nice);
399245f3abfSJeff Roberson 
400245f3abfSJeff Roberson 	if (ke1 == NULL)
401245f3abfSJeff Roberson 		return (ke0->ke_ksegrp->kg_nice);
402245f3abfSJeff Roberson 
403245f3abfSJeff Roberson 	return (min(ke0->ke_ksegrp->kg_nice, ke1->ke_ksegrp->kg_nice));
404245f3abfSJeff Roberson }
4050a016a05SJeff Roberson 
4060a016a05SJeff Roberson static void
4070a016a05SJeff Roberson kseq_setup(struct kseq *kseq)
4080a016a05SJeff Roberson {
4090a016a05SJeff Roberson 	kseq->ksq_curr = &kseq->ksq_runqs[0];
4100a016a05SJeff Roberson 	kseq->ksq_next = &kseq->ksq_runqs[1];
411a8949de2SJeff Roberson 	runq_init(&kseq->ksq_ithd);
4120a016a05SJeff Roberson 	runq_init(kseq->ksq_curr);
4130a016a05SJeff Roberson 	runq_init(kseq->ksq_next);
414a8949de2SJeff Roberson 	runq_init(&kseq->ksq_idle);
415a8949de2SJeff Roberson 	kseq->ksq_itload = 0;
416a8949de2SJeff Roberson 	kseq->ksq_tsload = 0;
417a8949de2SJeff Roberson 	kseq->ksq_idload = 0;
4185d7ef00cSJeff Roberson #ifdef SMP
4195d7ef00cSJeff Roberson 	kseq->ksq_rslices = 0;
4205d7ef00cSJeff Roberson 	kseq->ksq_bload = 0;
4215d7ef00cSJeff Roberson #endif
4220a016a05SJeff Roberson }
4230a016a05SJeff Roberson 
42435e6168fSJeff Roberson static void
42535e6168fSJeff Roberson sched_setup(void *dummy)
42635e6168fSJeff Roberson {
42735e6168fSJeff Roberson 	int i;
42835e6168fSJeff Roberson 
429e1f89c22SJeff Roberson 	realstathz = stathz ? stathz : hz;
430e1f89c22SJeff Roberson 
43135e6168fSJeff Roberson 	mtx_lock_spin(&sched_lock);
43235e6168fSJeff Roberson 	/* init kseqs */
4330a016a05SJeff Roberson 	for (i = 0; i < MAXCPU; i++)
4340a016a05SJeff Roberson 		kseq_setup(KSEQ_CPU(i));
43535e6168fSJeff Roberson 	mtx_unlock_spin(&sched_lock);
43635e6168fSJeff Roberson }
43735e6168fSJeff Roberson 
43835e6168fSJeff Roberson /*
43935e6168fSJeff Roberson  * Scale the scheduling priority according to the "interactivity" of this
44035e6168fSJeff Roberson  * process.
44135e6168fSJeff Roberson  */
44235e6168fSJeff Roberson static int
44335e6168fSJeff Roberson sched_priority(struct ksegrp *kg)
44435e6168fSJeff Roberson {
44535e6168fSJeff Roberson 	int pri;
44635e6168fSJeff Roberson 
44735e6168fSJeff Roberson 	if (kg->kg_pri_class != PRI_TIMESHARE)
44835e6168fSJeff Roberson 		return (kg->kg_user_pri);
44935e6168fSJeff Roberson 
450e1f89c22SJeff Roberson 	pri = sched_interact_score(kg) * SCHED_DYN_RANGE / SCHED_INTERACT_RANGE;
451e1f89c22SJeff Roberson 	pri += SCHED_PRI_BASE;
45235e6168fSJeff Roberson 	pri += kg->kg_nice;
45335e6168fSJeff Roberson 
45435e6168fSJeff Roberson 	if (pri > PRI_MAX_TIMESHARE)
45535e6168fSJeff Roberson 		pri = PRI_MAX_TIMESHARE;
45635e6168fSJeff Roberson 	else if (pri < PRI_MIN_TIMESHARE)
45735e6168fSJeff Roberson 		pri = PRI_MIN_TIMESHARE;
45835e6168fSJeff Roberson 
45935e6168fSJeff Roberson 	kg->kg_user_pri = pri;
46035e6168fSJeff Roberson 
46135e6168fSJeff Roberson 	return (kg->kg_user_pri);
46235e6168fSJeff Roberson }
46335e6168fSJeff Roberson 
46435e6168fSJeff Roberson /*
465245f3abfSJeff Roberson  * Calculate a time slice based on the properties of the kseg and the runq
466a8949de2SJeff Roberson  * that we're on.  This is only for PRI_TIMESHARE ksegrps.
46735e6168fSJeff Roberson  */
468245f3abfSJeff Roberson static void
469245f3abfSJeff Roberson sched_slice(struct kse *ke)
47035e6168fSJeff Roberson {
471245f3abfSJeff Roberson 	struct ksegrp *kg;
47235e6168fSJeff Roberson 
473245f3abfSJeff Roberson 	kg = ke->ke_ksegrp;
47435e6168fSJeff Roberson 
475245f3abfSJeff Roberson 	/*
476245f3abfSJeff Roberson 	 * Rationale:
477245f3abfSJeff Roberson 	 * KSEs in interactive ksegs get the minimum slice so that we
478245f3abfSJeff Roberson 	 * quickly notice if it abuses its advantage.
479245f3abfSJeff Roberson 	 *
480245f3abfSJeff Roberson 	 * KSEs in non-interactive ksegs are assigned a slice that is
481245f3abfSJeff Roberson 	 * based on the ksegs nice value relative to the least nice kseg
482245f3abfSJeff Roberson 	 * on the run queue for this cpu.
483245f3abfSJeff Roberson 	 *
484245f3abfSJeff Roberson 	 * If the KSE is less nice than all others it gets the maximum
485245f3abfSJeff Roberson 	 * slice and other KSEs will adjust their slice relative to
486245f3abfSJeff Roberson 	 * this when they first expire.
487245f3abfSJeff Roberson 	 *
488245f3abfSJeff Roberson 	 * There is 20 point window that starts relative to the least
489245f3abfSJeff Roberson 	 * nice kse on the run queue.  Slice size is determined by
490245f3abfSJeff Roberson 	 * the kse distance from the last nice ksegrp.
491245f3abfSJeff Roberson 	 *
492245f3abfSJeff Roberson 	 * If you are outside of the window you will get no slice and
493245f3abfSJeff Roberson 	 * you will be reevaluated each time you are selected on the
494245f3abfSJeff Roberson 	 * run queue.
495245f3abfSJeff Roberson 	 *
496245f3abfSJeff Roberson 	 */
497245f3abfSJeff Roberson 
498245f3abfSJeff Roberson 	if (!SCHED_CURR(kg)) {
499245f3abfSJeff Roberson 		struct kseq *kseq;
500245f3abfSJeff Roberson 		int nice_base;
501245f3abfSJeff Roberson 		int nice;
502245f3abfSJeff Roberson 
503245f3abfSJeff Roberson 		kseq = KSEQ_CPU(ke->ke_cpu);
504245f3abfSJeff Roberson 		nice_base = kseq_nice_min(kseq);
505245f3abfSJeff Roberson 		nice = kg->kg_nice + (0 - nice_base);
506245f3abfSJeff Roberson 
507a8949de2SJeff Roberson 		if (kseq->ksq_tsload == 0 || kg->kg_nice < nice_base)
508245f3abfSJeff Roberson 			ke->ke_slice = SCHED_SLICE_MAX;
509245f3abfSJeff Roberson 		else if (nice <= SCHED_PRI_NHALF)
510245f3abfSJeff Roberson 			ke->ke_slice = SCHED_SLICE_NICE(nice);
511245f3abfSJeff Roberson 		else
512245f3abfSJeff Roberson 			ke->ke_slice = 0;
513245f3abfSJeff Roberson 	} else
514245f3abfSJeff Roberson 		ke->ke_slice = SCHED_SLICE_MIN;
51535e6168fSJeff Roberson 
516407b0157SJeff Roberson 	/*
517a8949de2SJeff Roberson 	 * Check to see if we need to scale back the slp and run time
518a8949de2SJeff Roberson 	 * in the kg.  This will cause us to forget old interactivity
519a8949de2SJeff Roberson 	 * while maintaining the current ratio.
520407b0157SJeff Roberson 	 */
521407b0157SJeff Roberson 	if ((kg->kg_runtime + kg->kg_slptime) >  SCHED_SLP_RUN_MAX) {
522407b0157SJeff Roberson 		kg->kg_runtime /= SCHED_SLP_RUN_THROTTLE;
523407b0157SJeff Roberson 		kg->kg_slptime /= SCHED_SLP_RUN_THROTTLE;
524407b0157SJeff Roberson 	}
525407b0157SJeff Roberson 
526245f3abfSJeff Roberson 	return;
52735e6168fSJeff Roberson }
52835e6168fSJeff Roberson 
529e1f89c22SJeff Roberson static int
530e1f89c22SJeff Roberson sched_interact_score(struct ksegrp *kg)
531e1f89c22SJeff Roberson {
532e1f89c22SJeff Roberson 	int big;
533e1f89c22SJeff Roberson 	int small;
534e1f89c22SJeff Roberson 	int base;
535e1f89c22SJeff Roberson 
536e1f89c22SJeff Roberson 	if (kg->kg_runtime > kg->kg_slptime) {
537e1f89c22SJeff Roberson 		big = kg->kg_runtime;
538e1f89c22SJeff Roberson 		small = kg->kg_slptime;
539e1f89c22SJeff Roberson 		base = SCHED_INTERACT_HALF;
540e1f89c22SJeff Roberson 	} else {
541e1f89c22SJeff Roberson 		big = kg->kg_slptime;
542e1f89c22SJeff Roberson 		small = kg->kg_runtime;
543e1f89c22SJeff Roberson 		base = 0;
544e1f89c22SJeff Roberson 	}
545e1f89c22SJeff Roberson 
546e1f89c22SJeff Roberson 	big /= SCHED_INTERACT_HALF;
547e1f89c22SJeff Roberson 	if (big != 0)
548e1f89c22SJeff Roberson 		small /= big;
549e1f89c22SJeff Roberson 	else
550e1f89c22SJeff Roberson 		small = 0;
551e1f89c22SJeff Roberson 
552e1f89c22SJeff Roberson 	small += base;
553e1f89c22SJeff Roberson 	/* XXX Factor in nice */
554e1f89c22SJeff Roberson 	return (small);
555e1f89c22SJeff Roberson }
556e1f89c22SJeff Roberson 
55735e6168fSJeff Roberson int
55835e6168fSJeff Roberson sched_rr_interval(void)
55935e6168fSJeff Roberson {
56035e6168fSJeff Roberson 	return (SCHED_SLICE_MAX);
56135e6168fSJeff Roberson }
56235e6168fSJeff Roberson 
56335e6168fSJeff Roberson void
56435e6168fSJeff Roberson sched_pctcpu_update(struct kse *ke)
56535e6168fSJeff Roberson {
56635e6168fSJeff Roberson 	/*
56735e6168fSJeff Roberson 	 * Adjust counters and watermark for pctcpu calc.
56835e6168fSJeff Roberson 	 */
56965c8760dSJeff Roberson 	/*
57065c8760dSJeff Roberson 	 * Shift the tick count out so that the divide doesn't round away
57165c8760dSJeff Roberson 	 * our results.
57265c8760dSJeff Roberson 	 */
57365c8760dSJeff Roberson 	ke->ke_ticks <<= 10;
57435e6168fSJeff Roberson 	ke->ke_ticks = (ke->ke_ticks / (ke->ke_ltick - ke->ke_ftick)) *
57535e6168fSJeff Roberson 		    SCHED_CPU_TICKS;
57665c8760dSJeff Roberson 	ke->ke_ticks >>= 10;
57735e6168fSJeff Roberson 	ke->ke_ltick = ticks;
57835e6168fSJeff Roberson 	ke->ke_ftick = ke->ke_ltick - SCHED_CPU_TICKS;
57935e6168fSJeff Roberson }
58035e6168fSJeff Roberson 
58135e6168fSJeff Roberson #ifdef SMP
5825d7ef00cSJeff Roberson /* XXX Should be changed to kseq_load_lowest() */
58335e6168fSJeff Roberson int
58435e6168fSJeff Roberson sched_pickcpu(void)
58535e6168fSJeff Roberson {
5860a016a05SJeff Roberson 	struct kseq *kseq;
58735e6168fSJeff Roberson 	int load;
5880a016a05SJeff Roberson 	int cpu;
58935e6168fSJeff Roberson 	int i;
59035e6168fSJeff Roberson 
59135e6168fSJeff Roberson 	if (!smp_started)
59235e6168fSJeff Roberson 		return (0);
59335e6168fSJeff Roberson 
5940a016a05SJeff Roberson 	load = 0;
5950a016a05SJeff Roberson 	cpu = 0;
59635e6168fSJeff Roberson 
59735e6168fSJeff Roberson 	for (i = 0; i < mp_maxid; i++) {
59835e6168fSJeff Roberson 		if (CPU_ABSENT(i))
59935e6168fSJeff Roberson 			continue;
6000a016a05SJeff Roberson 		kseq = KSEQ_CPU(i);
601a8949de2SJeff Roberson 		if (kseq->ksq_tsload < load) {
60235e6168fSJeff Roberson 			cpu = i;
603a8949de2SJeff Roberson 			load = kseq->ksq_tsload;
60435e6168fSJeff Roberson 		}
60535e6168fSJeff Roberson 	}
60635e6168fSJeff Roberson 
60735e6168fSJeff Roberson 	CTR1(KTR_RUNQ, "sched_pickcpu: %d", cpu);
60835e6168fSJeff Roberson 	return (cpu);
60935e6168fSJeff Roberson }
61035e6168fSJeff Roberson #else
61135e6168fSJeff Roberson int
61235e6168fSJeff Roberson sched_pickcpu(void)
61335e6168fSJeff Roberson {
61435e6168fSJeff Roberson 	return (0);
61535e6168fSJeff Roberson }
61635e6168fSJeff Roberson #endif
61735e6168fSJeff Roberson 
61835e6168fSJeff Roberson void
61935e6168fSJeff Roberson sched_prio(struct thread *td, u_char prio)
62035e6168fSJeff Roberson {
62135e6168fSJeff Roberson 	struct kse *ke;
62235e6168fSJeff Roberson 	struct runq *rq;
62335e6168fSJeff Roberson 
62435e6168fSJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
62535e6168fSJeff Roberson 	ke = td->td_kse;
62635e6168fSJeff Roberson 	td->td_priority = prio;
62735e6168fSJeff Roberson 
62835e6168fSJeff Roberson 	if (TD_ON_RUNQ(td)) {
62935e6168fSJeff Roberson 		rq = ke->ke_runq;
63035e6168fSJeff Roberson 
63135e6168fSJeff Roberson 		runq_remove(rq, ke);
63235e6168fSJeff Roberson 		runq_add(rq, ke);
63335e6168fSJeff Roberson 	}
63435e6168fSJeff Roberson }
63535e6168fSJeff Roberson 
63635e6168fSJeff Roberson void
63735e6168fSJeff Roberson sched_switchout(struct thread *td)
63835e6168fSJeff Roberson {
63935e6168fSJeff Roberson 	struct kse *ke;
64035e6168fSJeff Roberson 
64135e6168fSJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
64235e6168fSJeff Roberson 
64335e6168fSJeff Roberson 	ke = td->td_kse;
64435e6168fSJeff Roberson 
64535e6168fSJeff Roberson 	td->td_last_kse = ke;
64635e6168fSJeff Roberson         td->td_lastcpu = ke->ke_oncpu;
647cd6e33dfSJeff Roberson 	ke->ke_oncpu = NOCPU;
6484a338afdSJulian Elischer         td->td_flags &= ~TDF_NEEDRESCHED;
64935e6168fSJeff Roberson 
65035e6168fSJeff Roberson 	if (TD_IS_RUNNING(td)) {
65135e6168fSJeff Roberson 		setrunqueue(td);
65235e6168fSJeff Roberson 		return;
653e1f89c22SJeff Roberson 	}
65435e6168fSJeff Roberson 	td->td_kse->ke_runq = NULL;
65535e6168fSJeff Roberson 
65635e6168fSJeff Roberson 	/*
65735e6168fSJeff Roberson 	 * We will not be on the run queue. So we must be
65835e6168fSJeff Roberson 	 * sleeping or similar.
65935e6168fSJeff Roberson 	 */
660ac2e4153SJulian Elischer 	if (td->td_proc->p_flag & P_THREADED)
66135e6168fSJeff Roberson 		kse_reassign(ke);
66235e6168fSJeff Roberson }
66335e6168fSJeff Roberson 
66435e6168fSJeff Roberson void
66535e6168fSJeff Roberson sched_switchin(struct thread *td)
66635e6168fSJeff Roberson {
66735e6168fSJeff Roberson 	/* struct kse *ke = td->td_kse; */
66835e6168fSJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
66935e6168fSJeff Roberson 
670cd6e33dfSJeff Roberson 	td->td_kse->ke_oncpu = PCPU_GET(cpuid);
6715d7ef00cSJeff Roberson #if SCHED_STRICT_RESCHED
67235e6168fSJeff Roberson 	if (td->td_ksegrp->kg_pri_class == PRI_TIMESHARE &&
67335e6168fSJeff Roberson 	    td->td_priority != td->td_ksegrp->kg_user_pri)
6744a338afdSJulian Elischer 		curthread->td_flags |= TDF_NEEDRESCHED;
6755d7ef00cSJeff Roberson #endif
67635e6168fSJeff Roberson }
67735e6168fSJeff Roberson 
67835e6168fSJeff Roberson void
67935e6168fSJeff Roberson sched_nice(struct ksegrp *kg, int nice)
68035e6168fSJeff Roberson {
68135e6168fSJeff Roberson 	struct thread *td;
68235e6168fSJeff Roberson 
68335e6168fSJeff Roberson 	kg->kg_nice = nice;
68435e6168fSJeff Roberson 	sched_priority(kg);
68535e6168fSJeff Roberson 	FOREACH_THREAD_IN_GROUP(kg, td) {
6864a338afdSJulian Elischer 		td->td_flags |= TDF_NEEDRESCHED;
68735e6168fSJeff Roberson 	}
68835e6168fSJeff Roberson }
68935e6168fSJeff Roberson 
69035e6168fSJeff Roberson void
69135e6168fSJeff Roberson sched_sleep(struct thread *td, u_char prio)
69235e6168fSJeff Roberson {
69335e6168fSJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
69435e6168fSJeff Roberson 
69535e6168fSJeff Roberson 	td->td_slptime = ticks;
69635e6168fSJeff Roberson 	td->td_priority = prio;
69735e6168fSJeff Roberson 
6985d7ef00cSJeff Roberson #ifdef SMP
6995d7ef00cSJeff Roberson 	if (td->td_priority < PZERO) {
7005d7ef00cSJeff Roberson 		kseq_sleep(KSEQ_CPU(td->td_kse->ke_cpu), td->td_kse);
7015d7ef00cSJeff Roberson 		td->td_schedflag |= TD_SCHED_BLOAD;
7025d7ef00cSJeff Roberson 	}
7030a016a05SJeff Roberson #endif
70435e6168fSJeff Roberson }
70535e6168fSJeff Roberson 
70635e6168fSJeff Roberson void
70735e6168fSJeff Roberson sched_wakeup(struct thread *td)
70835e6168fSJeff Roberson {
70935e6168fSJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
71035e6168fSJeff Roberson 
71135e6168fSJeff Roberson 	/*
71235e6168fSJeff Roberson 	 * Let the kseg know how long we slept for.  This is because process
71335e6168fSJeff Roberson 	 * interactivity behavior is modeled in the kseg.
71435e6168fSJeff Roberson 	 */
71535e6168fSJeff Roberson 	if (td->td_slptime) {
716f1e8dc4aSJeff Roberson 		struct ksegrp *kg;
717f1e8dc4aSJeff Roberson 
718f1e8dc4aSJeff Roberson 		kg = td->td_ksegrp;
719e1f89c22SJeff Roberson 		kg->kg_slptime += (ticks - td->td_slptime) << 10;
720f1e8dc4aSJeff Roberson 		sched_priority(kg);
72135e6168fSJeff Roberson 		td->td_slptime = 0;
722f1e8dc4aSJeff Roberson 	}
7235d7ef00cSJeff Roberson #ifdef SMP
7245d7ef00cSJeff Roberson 	if (td->td_priority < PZERO && td->td_schedflag & TD_SCHED_BLOAD) {
7255d7ef00cSJeff Roberson 		kseq_wakeup(KSEQ_CPU(td->td_kse->ke_cpu), td->td_kse);
7265d7ef00cSJeff Roberson 		td->td_schedflag &= ~TD_SCHED_BLOAD;
7275d7ef00cSJeff Roberson 	}
7280a016a05SJeff Roberson #endif
72935e6168fSJeff Roberson 	setrunqueue(td);
7305d7ef00cSJeff Roberson #if SCHED_STRICT_RESCHED
73135e6168fSJeff Roberson         if (td->td_priority < curthread->td_priority)
7324a338afdSJulian Elischer                 curthread->td_flags |= TDF_NEEDRESCHED;
7335d7ef00cSJeff Roberson #endif
73435e6168fSJeff Roberson }
73535e6168fSJeff Roberson 
73635e6168fSJeff Roberson /*
73735e6168fSJeff Roberson  * Penalize the parent for creating a new child and initialize the child's
73835e6168fSJeff Roberson  * priority.
73935e6168fSJeff Roberson  */
74035e6168fSJeff Roberson void
74135e6168fSJeff Roberson sched_fork(struct ksegrp *kg, struct ksegrp *child)
74235e6168fSJeff Roberson {
74335e6168fSJeff Roberson 	struct kse *ckse;
74435e6168fSJeff Roberson 	struct kse *pkse;
74535e6168fSJeff Roberson 
74635e6168fSJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
74735e6168fSJeff Roberson 	ckse = FIRST_KSE_IN_KSEGRP(child);
74835e6168fSJeff Roberson 	pkse = FIRST_KSE_IN_KSEGRP(kg);
74935e6168fSJeff Roberson 
75035e6168fSJeff Roberson 	/* XXX Need something better here */
751407b0157SJeff Roberson 	if (kg->kg_slptime > kg->kg_runtime) {
752e1f89c22SJeff Roberson 		child->kg_slptime = SCHED_DYN_RANGE;
753e1f89c22SJeff Roberson 		child->kg_runtime = kg->kg_slptime / SCHED_DYN_RANGE;
754407b0157SJeff Roberson 	} else {
755e1f89c22SJeff Roberson 		child->kg_runtime = SCHED_DYN_RANGE;
756e1f89c22SJeff Roberson 		child->kg_slptime = kg->kg_runtime / SCHED_DYN_RANGE;
757407b0157SJeff Roberson 	}
758407b0157SJeff Roberson #if 0
75935e6168fSJeff Roberson 	child->kg_slptime = kg->kg_slptime;
760407b0157SJeff Roberson 	child->kg_runtime = kg->kg_runtime;
761407b0157SJeff Roberson #endif
76235e6168fSJeff Roberson 	child->kg_user_pri = kg->kg_user_pri;
76335e6168fSJeff Roberson 
764407b0157SJeff Roberson #if 0
765cd6e33dfSJeff Roberson 	if (pkse->ke_cpu != PCPU_GET(cpuid)) {
766cd6e33dfSJeff Roberson 		printf("pkse->ke_cpu = %d\n", pkse->ke_cpu);
767c9f25d8fSJeff Roberson 		printf("cpuid = %d", PCPU_GET(cpuid));
768c9f25d8fSJeff Roberson 		Debugger("stop");
769c9f25d8fSJeff Roberson 	}
770407b0157SJeff Roberson #endif
771c9f25d8fSJeff Roberson 
77235e6168fSJeff Roberson 	ckse->ke_slice = pkse->ke_slice;
773cd6e33dfSJeff Roberson 	ckse->ke_cpu = pkse->ke_cpu; /* sched_pickcpu(); */
77435e6168fSJeff Roberson 	ckse->ke_runq = NULL;
77535e6168fSJeff Roberson 	/*
77635e6168fSJeff Roberson 	 * Claim that we've been running for one second for statistical
77735e6168fSJeff Roberson 	 * purposes.
77835e6168fSJeff Roberson 	 */
77935e6168fSJeff Roberson 	ckse->ke_ticks = 0;
78035e6168fSJeff Roberson 	ckse->ke_ltick = ticks;
78135e6168fSJeff Roberson 	ckse->ke_ftick = ticks - hz;
78235e6168fSJeff Roberson }
78335e6168fSJeff Roberson 
78435e6168fSJeff Roberson /*
78535e6168fSJeff Roberson  * Return some of the child's priority and interactivity to the parent.
78635e6168fSJeff Roberson  */
78735e6168fSJeff Roberson void
78835e6168fSJeff Roberson sched_exit(struct ksegrp *kg, struct ksegrp *child)
78935e6168fSJeff Roberson {
79035e6168fSJeff Roberson 	/* XXX Need something better here */
79135e6168fSJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
792a8949de2SJeff Roberson #if 0
79335e6168fSJeff Roberson 	kg->kg_slptime = child->kg_slptime;
794407b0157SJeff Roberson 	kg->kg_runtime = child->kg_runtime;
79535e6168fSJeff Roberson 	sched_priority(kg);
796a8949de2SJeff Roberson #endif
79735e6168fSJeff Roberson }
79835e6168fSJeff Roberson 
79935e6168fSJeff Roberson void
80035e6168fSJeff Roberson sched_clock(struct thread *td)
80135e6168fSJeff Roberson {
80235e6168fSJeff Roberson 	struct kse *ke;
8035d7ef00cSJeff Roberson #if SCHED_STRICT_RESCHED
80435e6168fSJeff Roberson 	struct kse *nke;
80535e6168fSJeff Roberson 	struct kseq *kseq;
8065d7ef00cSJeff Roberson #endif
8070a016a05SJeff Roberson 	struct ksegrp *kg;
80835e6168fSJeff Roberson 
80935e6168fSJeff Roberson 
81035e6168fSJeff Roberson 	ke = td->td_kse;
81135e6168fSJeff Roberson 	kg = td->td_ksegrp;
81235e6168fSJeff Roberson 
8130a016a05SJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
8140a016a05SJeff Roberson 	KASSERT((td != NULL), ("schedclock: null thread pointer"));
8150a016a05SJeff Roberson 
8160a016a05SJeff Roberson 	/* Adjust ticks for pctcpu */
81765c8760dSJeff Roberson 	ke->ke_ticks++;
818d465fb95SJeff Roberson 	ke->ke_ltick = ticks;
819a8949de2SJeff Roberson 
820d465fb95SJeff Roberson 	/* Go up to one second beyond our max and then trim back down */
821d465fb95SJeff Roberson 	if (ke->ke_ftick + SCHED_CPU_TICKS + hz < ke->ke_ltick)
822d465fb95SJeff Roberson 		sched_pctcpu_update(ke);
823d465fb95SJeff Roberson 
8240a016a05SJeff Roberson 	if (td->td_kse->ke_flags & KEF_IDLEKSE)
82535e6168fSJeff Roberson 		return;
8260a016a05SJeff Roberson 
8270a016a05SJeff Roberson 	/*
8280a016a05SJeff Roberson 	 * Check for a higher priority task on the run queue.  This can happen
8290a016a05SJeff Roberson 	 * on SMP if another processor woke up a process on our runq.
8300a016a05SJeff Roberson 	 */
8315d7ef00cSJeff Roberson #if SCHED_STRICT_RESCHED
8320a016a05SJeff Roberson 	kseq = KSEQ_SELF();
833c9f25d8fSJeff Roberson 	nke = runq_choose(kseq->ksq_curr);
834c9f25d8fSJeff Roberson 
83535e6168fSJeff Roberson 	if (nke && nke->ke_thread &&
8360a016a05SJeff Roberson 	    nke->ke_thread->td_priority < td->td_priority)
8374a338afdSJulian Elischer 		td->td_flags |= TDF_NEEDRESCHED;
8385d7ef00cSJeff Roberson #endif
83935e6168fSJeff Roberson 	/*
840a8949de2SJeff Roberson 	 * We only do slicing code for TIMESHARE ksegrps.
841a8949de2SJeff Roberson 	 */
842a8949de2SJeff Roberson 	if (kg->kg_pri_class != PRI_TIMESHARE)
843a8949de2SJeff Roberson 		return;
844a8949de2SJeff Roberson 	/*
845407b0157SJeff Roberson 	 * We used a tick charge it to the ksegrp so that we can compute our
84635e6168fSJeff Roberson 	 * "interactivity".
84735e6168fSJeff Roberson 	 */
848e1f89c22SJeff Roberson 	kg->kg_runtime += 1 << 10;
849407b0157SJeff Roberson 
85035e6168fSJeff Roberson 	/*
85135e6168fSJeff Roberson 	 * We used up one time slice.
85235e6168fSJeff Roberson 	 */
85335e6168fSJeff Roberson 	ke->ke_slice--;
85435e6168fSJeff Roberson 	/*
855a8949de2SJeff Roberson 	 * We're out of time, recompute priorities and requeue.  We'll get a
856a8949de2SJeff Roberson 	 * new slice when we're put back on the run queue.
85735e6168fSJeff Roberson 	 */
858e1f89c22SJeff Roberson 	if (ke->ke_slice <= 0) {
859e1f89c22SJeff Roberson 		sched_priority(kg);
8604a338afdSJulian Elischer 		td->td_flags |= TDF_NEEDRESCHED;
86135e6168fSJeff Roberson 		ke->ke_runq = NULL;
86235e6168fSJeff Roberson 	}
86335e6168fSJeff Roberson }
86435e6168fSJeff Roberson 
86535e6168fSJeff Roberson int
86635e6168fSJeff Roberson sched_runnable(void)
86735e6168fSJeff Roberson {
86835e6168fSJeff Roberson 	struct kseq *kseq;
86935e6168fSJeff Roberson 
8700a016a05SJeff Roberson 	kseq = KSEQ_SELF();
87135e6168fSJeff Roberson 
872a8949de2SJeff Roberson 	if (kseq->ksq_tsload || kseq->ksq_idload || kseq->ksq_itload)
873c9f25d8fSJeff Roberson 		return (1);
874c9f25d8fSJeff Roberson #ifdef SMP
8750a016a05SJeff Roberson 	/*
8760a016a05SJeff Roberson 	 * For SMP we may steal other processor's KSEs.  Just search until we
8770a016a05SJeff Roberson 	 * verify that at least on other cpu has a runnable task.
8780a016a05SJeff Roberson 	 */
879c9f25d8fSJeff Roberson 	if (smp_started) {
880c9f25d8fSJeff Roberson 		int i;
881c9f25d8fSJeff Roberson 
8825d7ef00cSJeff Roberson #if 0
8835d7ef00cSJeff Roberson 		if (kseq->ksq_bload)
8845d7ef00cSJeff Roberson 			return (0);
8855d7ef00cSJeff Roberson #endif
8865d7ef00cSJeff Roberson 
887c9f25d8fSJeff Roberson 		for (i = 0; i < mp_maxid; i++) {
888c9f25d8fSJeff Roberson 			if (CPU_ABSENT(i))
889c9f25d8fSJeff Roberson 				continue;
8900a016a05SJeff Roberson 			kseq = KSEQ_CPU(i);
891a8949de2SJeff Roberson 			if (kseq->ksq_tsload)
892c9f25d8fSJeff Roberson 				return (1);
893c9f25d8fSJeff Roberson 		}
894c9f25d8fSJeff Roberson 	}
895c9f25d8fSJeff Roberson #endif
896c9f25d8fSJeff Roberson 	return (0);
89735e6168fSJeff Roberson }
89835e6168fSJeff Roberson 
89935e6168fSJeff Roberson void
90035e6168fSJeff Roberson sched_userret(struct thread *td)
90135e6168fSJeff Roberson {
90235e6168fSJeff Roberson 	struct ksegrp *kg;
90335e6168fSJeff Roberson 
90435e6168fSJeff Roberson 	kg = td->td_ksegrp;
90535e6168fSJeff Roberson 
90635e6168fSJeff Roberson 	if (td->td_priority != kg->kg_user_pri) {
90735e6168fSJeff Roberson 		mtx_lock_spin(&sched_lock);
90835e6168fSJeff Roberson 		td->td_priority = kg->kg_user_pri;
90935e6168fSJeff Roberson 		mtx_unlock_spin(&sched_lock);
91035e6168fSJeff Roberson 	}
91135e6168fSJeff Roberson }
91235e6168fSJeff Roberson 
913c9f25d8fSJeff Roberson struct kse *
914c9f25d8fSJeff Roberson sched_choose(void)
915c9f25d8fSJeff Roberson {
9160a016a05SJeff Roberson 	struct kseq *kseq;
917c9f25d8fSJeff Roberson 	struct kse *ke;
918c9f25d8fSJeff Roberson 
9190a016a05SJeff Roberson 	kseq = KSEQ_SELF();
920245f3abfSJeff Roberson retry:
9210a016a05SJeff Roberson 	ke = kseq_choose(kseq);
922c9f25d8fSJeff Roberson 
92335e6168fSJeff Roberson 	if (ke) {
92435e6168fSJeff Roberson 		ke->ke_state = KES_THREAD;
9255d7ef00cSJeff Roberson 		kseq_rem(kseq, ke);
926245f3abfSJeff Roberson 
927245f3abfSJeff Roberson 		/*
928245f3abfSJeff Roberson 		 * If we dequeue a kse with a slice of zero it was below the
929a8949de2SJeff Roberson 		 * nice threshold to acquire a slice.  Force it on to the
930a8949de2SJeff Roberson 		 * next run queue and let kseq_add() pick a new slice.
931a8949de2SJeff Roberson 		 *
932a8949de2SJeff Roberson 		 * XXX This code should live in a TIMESHARE specific section.
933245f3abfSJeff Roberson 		 */
934245f3abfSJeff Roberson 		if (ke->ke_slice == 0) {
935245f3abfSJeff Roberson 			ke->ke_runq = kseq->ksq_next;
936245f3abfSJeff Roberson 			kseq_add(kseq, ke);
937245f3abfSJeff Roberson 			goto retry;
938245f3abfSJeff Roberson 		}
93935e6168fSJeff Roberson 	}
94035e6168fSJeff Roberson 
941c9f25d8fSJeff Roberson #ifdef SMP
942c9f25d8fSJeff Roberson 	if (ke == NULL && smp_started) {
9435d7ef00cSJeff Roberson #if 0
9445d7ef00cSJeff Roberson 		if (kseq->ksq_bload)
9455d7ef00cSJeff Roberson 			return (NULL);
9465d7ef00cSJeff Roberson #endif
947c9f25d8fSJeff Roberson 		/*
948c9f25d8fSJeff Roberson 		 * Find the cpu with the highest load and steal one proc.
949c9f25d8fSJeff Roberson 		 */
9505d7ef00cSJeff Roberson 		kseq = kseq_load_highest();
9515d7ef00cSJeff Roberson 		if (kseq == NULL)
9525d7ef00cSJeff Roberson 			return (NULL);
953a8949de2SJeff Roberson 		/*
954a8949de2SJeff Roberson 		 * XXX Do we want to migrate interrupt or realtime threads?
955a8949de2SJeff Roberson 		 * Currently we'll only try to steal if there is a TIMESHARE
956a8949de2SJeff Roberson 		 * thread available but we will steal a REALTIME or interrupt
957a8949de2SJeff Roberson 		 */
9580a016a05SJeff Roberson 		ke = kseq_choose(kseq);
9595d7ef00cSJeff Roberson 		kseq_rem(kseq, ke);
9605d7ef00cSJeff Roberson 
961c9f25d8fSJeff Roberson 		ke->ke_state = KES_THREAD;
962c9f25d8fSJeff Roberson 		ke->ke_runq = NULL;
963cd6e33dfSJeff Roberson 		ke->ke_cpu = PCPU_GET(cpuid);
964c9f25d8fSJeff Roberson 	}
965c9f25d8fSJeff Roberson #endif
96635e6168fSJeff Roberson 	return (ke);
96735e6168fSJeff Roberson }
96835e6168fSJeff Roberson 
96935e6168fSJeff Roberson void
97035e6168fSJeff Roberson sched_add(struct kse *ke)
97135e6168fSJeff Roberson {
972c9f25d8fSJeff Roberson 	struct kseq *kseq;
973c9f25d8fSJeff Roberson 
9745d7ef00cSJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
9755d7ef00cSJeff Roberson 	KASSERT((ke->ke_thread != NULL), ("sched_add: No thread on KSE"));
9765d7ef00cSJeff Roberson 	KASSERT((ke->ke_thread->td_kse != NULL),
9775d7ef00cSJeff Roberson 	    ("sched_add: No KSE on thread"));
9785d7ef00cSJeff Roberson 	KASSERT(ke->ke_state != KES_ONRUNQ,
9795d7ef00cSJeff Roberson 	    ("sched_add: kse %p (%s) already in run queue", ke,
9805d7ef00cSJeff Roberson 	    ke->ke_proc->p_comm));
9815d7ef00cSJeff Roberson 	KASSERT(ke->ke_proc->p_sflag & PS_INMEM,
9825d7ef00cSJeff Roberson 	    ("sched_add: process swapped out"));
9835d7ef00cSJeff Roberson 
984a8949de2SJeff Roberson 	switch (ke->ke_ksegrp->kg_pri_class) {
985a8949de2SJeff Roberson 	case PRI_ITHD:
986a8949de2SJeff Roberson 	case PRI_REALTIME:
987a6ed4186SJeff Roberson 		kseq = KSEQ_SELF();
988a8949de2SJeff Roberson 		break;
989a8949de2SJeff Roberson 	case PRI_TIMESHARE:
990a8949de2SJeff Roberson 	case PRI_IDLE:
991a8949de2SJeff Roberson 	default:
992a8949de2SJeff Roberson 		kseq = KSEQ_CPU(ke->ke_cpu);
993a8949de2SJeff Roberson 		break;
994a6ed4186SJeff Roberson 	}
995a8949de2SJeff Roberson 
99635e6168fSJeff Roberson 	ke->ke_ksegrp->kg_runq_kses++;
99735e6168fSJeff Roberson 	ke->ke_state = KES_ONRUNQ;
99835e6168fSJeff Roberson 
9995d7ef00cSJeff Roberson 	kseq_add(kseq, ke);
100035e6168fSJeff Roberson }
100135e6168fSJeff Roberson 
100235e6168fSJeff Roberson void
100335e6168fSJeff Roberson sched_rem(struct kse *ke)
100435e6168fSJeff Roberson {
100535e6168fSJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
100635e6168fSJeff Roberson 	/* KASSERT((ke->ke_state == KES_ONRUNQ), ("KSE not on run queue")); */
100735e6168fSJeff Roberson 
100835e6168fSJeff Roberson 	ke->ke_runq = NULL;
100935e6168fSJeff Roberson 	ke->ke_state = KES_THREAD;
101035e6168fSJeff Roberson 	ke->ke_ksegrp->kg_runq_kses--;
10115d7ef00cSJeff Roberson 
10125d7ef00cSJeff Roberson 	kseq_rem(KSEQ_CPU(ke->ke_cpu), ke);
101335e6168fSJeff Roberson }
101435e6168fSJeff Roberson 
101535e6168fSJeff Roberson fixpt_t
101635e6168fSJeff Roberson sched_pctcpu(struct kse *ke)
101735e6168fSJeff Roberson {
101835e6168fSJeff Roberson 	fixpt_t pctcpu;
10197121cce5SScott Long 	int realstathz;
102035e6168fSJeff Roberson 
102135e6168fSJeff Roberson 	pctcpu = 0;
10227121cce5SScott Long 	realstathz = stathz ? stathz : hz;
102335e6168fSJeff Roberson 
102435e6168fSJeff Roberson 	if (ke->ke_ticks) {
102535e6168fSJeff Roberson 		int rtick;
102635e6168fSJeff Roberson 
102735e6168fSJeff Roberson 		/* Update to account for time potentially spent sleeping */
102835e6168fSJeff Roberson 		ke->ke_ltick = ticks;
102935e6168fSJeff Roberson 		sched_pctcpu_update(ke);
103035e6168fSJeff Roberson 
103135e6168fSJeff Roberson 		/* How many rtick per second ? */
103265c8760dSJeff Roberson 		rtick = ke->ke_ticks / SCHED_CPU_TIME;
10337121cce5SScott Long 		pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT;
103435e6168fSJeff Roberson 	}
103535e6168fSJeff Roberson 
103635e6168fSJeff Roberson 	ke->ke_proc->p_swtime = ke->ke_ltick - ke->ke_ftick;
103735e6168fSJeff Roberson 
103835e6168fSJeff Roberson 	return (pctcpu);
103935e6168fSJeff Roberson }
104035e6168fSJeff Roberson 
104135e6168fSJeff Roberson int
104235e6168fSJeff Roberson sched_sizeof_kse(void)
104335e6168fSJeff Roberson {
104435e6168fSJeff Roberson 	return (sizeof(struct kse) + sizeof(struct ke_sched));
104535e6168fSJeff Roberson }
104635e6168fSJeff Roberson 
104735e6168fSJeff Roberson int
104835e6168fSJeff Roberson sched_sizeof_ksegrp(void)
104935e6168fSJeff Roberson {
105035e6168fSJeff Roberson 	return (sizeof(struct ksegrp) + sizeof(struct kg_sched));
105135e6168fSJeff Roberson }
105235e6168fSJeff Roberson 
105335e6168fSJeff Roberson int
105435e6168fSJeff Roberson sched_sizeof_proc(void)
105535e6168fSJeff Roberson {
105635e6168fSJeff Roberson 	return (sizeof(struct proc));
105735e6168fSJeff Roberson }
105835e6168fSJeff Roberson 
105935e6168fSJeff Roberson int
106035e6168fSJeff Roberson sched_sizeof_thread(void)
106135e6168fSJeff Roberson {
106235e6168fSJeff Roberson 	return (sizeof(struct thread) + sizeof(struct td_sched));
106335e6168fSJeff Roberson }
1064