xref: /freebsd/sys/kern/sched_ule.c (revision 22bf7d9a0e1569a01bd34b8a1cb19ef5474e58d9)
135e6168fSJeff Roberson /*-
215dc847eSJeff Roberson  * Copyright (c) 2002-2003, Jeffrey Roberson <jeff@freebsd.org>
335e6168fSJeff Roberson  * All rights reserved.
435e6168fSJeff Roberson  *
535e6168fSJeff Roberson  * Redistribution and use in source and binary forms, with or without
635e6168fSJeff Roberson  * modification, are permitted provided that the following conditions
735e6168fSJeff Roberson  * are met:
835e6168fSJeff Roberson  * 1. Redistributions of source code must retain the above copyright
935e6168fSJeff Roberson  *    notice unmodified, this list of conditions, and the following
1035e6168fSJeff Roberson  *    disclaimer.
1135e6168fSJeff Roberson  * 2. Redistributions in binary form must reproduce the above copyright
1235e6168fSJeff Roberson  *    notice, this list of conditions and the following disclaimer in the
1335e6168fSJeff Roberson  *    documentation and/or other materials provided with the distribution.
1435e6168fSJeff Roberson  *
1535e6168fSJeff Roberson  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1635e6168fSJeff Roberson  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1735e6168fSJeff Roberson  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
1835e6168fSJeff Roberson  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
1935e6168fSJeff Roberson  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
2035e6168fSJeff Roberson  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2135e6168fSJeff Roberson  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2235e6168fSJeff Roberson  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2335e6168fSJeff Roberson  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
2435e6168fSJeff Roberson  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2535e6168fSJeff Roberson  */
2635e6168fSJeff Roberson 
27677b542eSDavid E. O'Brien #include <sys/cdefs.h>
28677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$");
29677b542eSDavid E. O'Brien 
3035e6168fSJeff Roberson #include <sys/param.h>
3135e6168fSJeff Roberson #include <sys/systm.h>
3235e6168fSJeff Roberson #include <sys/kernel.h>
3335e6168fSJeff Roberson #include <sys/ktr.h>
3435e6168fSJeff Roberson #include <sys/lock.h>
3535e6168fSJeff Roberson #include <sys/mutex.h>
3635e6168fSJeff Roberson #include <sys/proc.h>
37245f3abfSJeff Roberson #include <sys/resource.h>
3835e6168fSJeff Roberson #include <sys/sched.h>
3935e6168fSJeff Roberson #include <sys/smp.h>
4035e6168fSJeff Roberson #include <sys/sx.h>
4135e6168fSJeff Roberson #include <sys/sysctl.h>
4235e6168fSJeff Roberson #include <sys/sysproto.h>
4335e6168fSJeff Roberson #include <sys/vmmeter.h>
4435e6168fSJeff Roberson #ifdef DDB
4535e6168fSJeff Roberson #include <ddb/ddb.h>
4635e6168fSJeff Roberson #endif
4735e6168fSJeff Roberson #ifdef KTRACE
4835e6168fSJeff Roberson #include <sys/uio.h>
4935e6168fSJeff Roberson #include <sys/ktrace.h>
5035e6168fSJeff Roberson #endif
5135e6168fSJeff Roberson 
5235e6168fSJeff Roberson #include <machine/cpu.h>
5322bf7d9aSJeff Roberson #include <machine/smp.h>
5435e6168fSJeff Roberson 
5515dc847eSJeff Roberson #define KTR_ULE         KTR_NFS
5615dc847eSJeff Roberson 
5735e6168fSJeff Roberson /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
5835e6168fSJeff Roberson /* XXX This is bogus compatability crap for ps */
5935e6168fSJeff Roberson static fixpt_t  ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */
6035e6168fSJeff Roberson SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, "");
6135e6168fSJeff Roberson 
6235e6168fSJeff Roberson static void sched_setup(void *dummy);
6335e6168fSJeff Roberson SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL)
6435e6168fSJeff Roberson 
6515dc847eSJeff Roberson static SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0, "SCHED");
66e1f89c22SJeff Roberson 
6715dc847eSJeff Roberson static int sched_strict;
6815dc847eSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, strict, CTLFLAG_RD, &sched_strict, 0, "");
6915dc847eSJeff Roberson 
7015dc847eSJeff Roberson static int slice_min = 1;
7115dc847eSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, slice_min, CTLFLAG_RW, &slice_min, 0, "");
7215dc847eSJeff Roberson 
73210491d3SJeff Roberson static int slice_max = 10;
7415dc847eSJeff Roberson SYSCTL_INT(_kern_sched, OID_AUTO, slice_max, CTLFLAG_RW, &slice_max, 0, "");
7515dc847eSJeff Roberson 
7615dc847eSJeff Roberson int realstathz;
7715dc847eSJeff Roberson int tickincr = 1;
78783caefbSJeff Roberson 
79356500a3SJeff Roberson #ifdef SMP
80356500a3SJeff Roberson /* Callout to handle load balancing SMP systems. */
81356500a3SJeff Roberson static struct callout kseq_lb_callout;
82356500a3SJeff Roberson #endif
83356500a3SJeff Roberson 
8435e6168fSJeff Roberson /*
8535e6168fSJeff Roberson  * These datastructures are allocated within their parent datastructure but
8635e6168fSJeff Roberson  * are scheduler specific.
8735e6168fSJeff Roberson  */
8835e6168fSJeff Roberson 
8935e6168fSJeff Roberson struct ke_sched {
9035e6168fSJeff Roberson 	int		ske_slice;
9135e6168fSJeff Roberson 	struct runq	*ske_runq;
9235e6168fSJeff Roberson 	/* The following variables are only used for pctcpu calculation */
9335e6168fSJeff Roberson 	int		ske_ltick;	/* Last tick that we were running on */
9435e6168fSJeff Roberson 	int		ske_ftick;	/* First tick that we were running on */
9535e6168fSJeff Roberson 	int		ske_ticks;	/* Tick count */
9615dc847eSJeff Roberson 	/* CPU that we have affinity for. */
97cd6e33dfSJeff Roberson 	u_char		ske_cpu;
9835e6168fSJeff Roberson };
9935e6168fSJeff Roberson #define	ke_slice	ke_sched->ske_slice
10035e6168fSJeff Roberson #define	ke_runq		ke_sched->ske_runq
10135e6168fSJeff Roberson #define	ke_ltick	ke_sched->ske_ltick
10235e6168fSJeff Roberson #define	ke_ftick	ke_sched->ske_ftick
10335e6168fSJeff Roberson #define	ke_ticks	ke_sched->ske_ticks
104cd6e33dfSJeff Roberson #define	ke_cpu		ke_sched->ske_cpu
10522bf7d9aSJeff Roberson #define	ke_assign	ke_procq.tqe_next
10622bf7d9aSJeff Roberson 
10722bf7d9aSJeff Roberson #define	KEF_ASSIGNED	KEF_SCHED0	/* KSE is being migrated. */
10835e6168fSJeff Roberson 
10935e6168fSJeff Roberson struct kg_sched {
110407b0157SJeff Roberson 	int	skg_slptime;		/* Number of ticks we vol. slept */
111407b0157SJeff Roberson 	int	skg_runtime;		/* Number of ticks we were running */
11235e6168fSJeff Roberson };
11335e6168fSJeff Roberson #define	kg_slptime	kg_sched->skg_slptime
114407b0157SJeff Roberson #define	kg_runtime	kg_sched->skg_runtime
11535e6168fSJeff Roberson 
11635e6168fSJeff Roberson struct td_sched {
11735e6168fSJeff Roberson 	int	std_slptime;
11835e6168fSJeff Roberson };
11935e6168fSJeff Roberson #define	td_slptime	td_sched->std_slptime
12035e6168fSJeff Roberson 
1215d7ef00cSJeff Roberson struct td_sched td_sched;
12235e6168fSJeff Roberson struct ke_sched ke_sched;
12335e6168fSJeff Roberson struct kg_sched kg_sched;
12435e6168fSJeff Roberson 
12535e6168fSJeff Roberson struct ke_sched *kse0_sched = &ke_sched;
12635e6168fSJeff Roberson struct kg_sched *ksegrp0_sched = &kg_sched;
12735e6168fSJeff Roberson struct p_sched *proc0_sched = NULL;
12835e6168fSJeff Roberson struct td_sched *thread0_sched = &td_sched;
12935e6168fSJeff Roberson 
13035e6168fSJeff Roberson /*
131665cb285SJeff Roberson  * The priority is primarily determined by the interactivity score.  Thus, we
132665cb285SJeff Roberson  * give lower(better) priorities to kse groups that use less CPU.  The nice
133665cb285SJeff Roberson  * value is then directly added to this to allow nice to have some effect
134665cb285SJeff Roberson  * on latency.
135e1f89c22SJeff Roberson  *
136e1f89c22SJeff Roberson  * PRI_RANGE:	Total priority range for timeshare threads.
137665cb285SJeff Roberson  * PRI_NRESV:	Number of nice values.
138e1f89c22SJeff Roberson  * PRI_BASE:	The start of the dynamic range.
13935e6168fSJeff Roberson  */
140407b0157SJeff Roberson #define	SCHED_PRI_RANGE		(PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1)
141245f3abfSJeff Roberson #define	SCHED_PRI_NRESV		PRIO_TOTAL
14298c9b132SJeff Roberson #define	SCHED_PRI_NHALF		(PRIO_TOTAL / 2)
14315dc847eSJeff Roberson #define	SCHED_PRI_NTHRESH	(SCHED_PRI_NHALF - 1)
144665cb285SJeff Roberson #define	SCHED_PRI_BASE		(PRI_MIN_TIMESHARE)
14515dc847eSJeff Roberson #define	SCHED_PRI_INTERACT(score)					\
146665cb285SJeff Roberson     ((score) * SCHED_PRI_RANGE / SCHED_INTERACT_MAX)
14735e6168fSJeff Roberson 
14835e6168fSJeff Roberson /*
149e1f89c22SJeff Roberson  * These determine the interactivity of a process.
15035e6168fSJeff Roberson  *
151407b0157SJeff Roberson  * SLP_RUN_MAX:	Maximum amount of sleep time + run time we'll accumulate
152407b0157SJeff Roberson  *		before throttling back.
153a91172adSJeff Roberson  * SLP_RUN_THROTTLE:	Divisor for reducing slp/run time at fork time.
154210491d3SJeff Roberson  * INTERACT_MAX:	Maximum interactivity value.  Smaller is better.
155e1f89c22SJeff Roberson  * INTERACT_THRESH:	Threshhold for placement on the current runq.
15635e6168fSJeff Roberson  */
1574c9612c6SJeff Roberson #define	SCHED_SLP_RUN_MAX	((hz * 5) << 10)
158a91172adSJeff Roberson #define	SCHED_SLP_RUN_THROTTLE	(100)
159210491d3SJeff Roberson #define	SCHED_INTERACT_MAX	(100)
160210491d3SJeff Roberson #define	SCHED_INTERACT_HALF	(SCHED_INTERACT_MAX / 2)
1614c9612c6SJeff Roberson #define	SCHED_INTERACT_THRESH	(30)
162e1f89c22SJeff Roberson 
16335e6168fSJeff Roberson /*
16435e6168fSJeff Roberson  * These parameters and macros determine the size of the time slice that is
16535e6168fSJeff Roberson  * granted to each thread.
16635e6168fSJeff Roberson  *
16735e6168fSJeff Roberson  * SLICE_MIN:	Minimum time slice granted, in units of ticks.
16835e6168fSJeff Roberson  * SLICE_MAX:	Maximum time slice granted.
16935e6168fSJeff Roberson  * SLICE_RANGE:	Range of available time slices scaled by hz.
170245f3abfSJeff Roberson  * SLICE_SCALE:	The number slices granted per val in the range of [0, max].
171245f3abfSJeff Roberson  * SLICE_NICE:  Determine the amount of slice granted to a scaled nice.
17235e6168fSJeff Roberson  */
17315dc847eSJeff Roberson #define	SCHED_SLICE_MIN			(slice_min)
17415dc847eSJeff Roberson #define	SCHED_SLICE_MAX			(slice_max)
17535e6168fSJeff Roberson #define	SCHED_SLICE_RANGE		(SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1)
17635e6168fSJeff Roberson #define	SCHED_SLICE_SCALE(val, max)	(((val) * SCHED_SLICE_RANGE) / (max))
177245f3abfSJeff Roberson #define	SCHED_SLICE_NICE(nice)						\
17815dc847eSJeff Roberson     (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((nice), SCHED_PRI_NTHRESH))
17935e6168fSJeff Roberson 
18035e6168fSJeff Roberson /*
18135e6168fSJeff Roberson  * This macro determines whether or not the kse belongs on the current or
18235e6168fSJeff Roberson  * next run queue.
183407b0157SJeff Roberson  *
184407b0157SJeff Roberson  * XXX nice value should effect how interactive a kg is.
18535e6168fSJeff Roberson  */
18615dc847eSJeff Roberson #define	SCHED_INTERACTIVE(kg)						\
18715dc847eSJeff Roberson     (sched_interact_score(kg) < SCHED_INTERACT_THRESH)
188a5f099d0SJeff Roberson #define	SCHED_CURR(kg, ke)						\
18908fd6713SJeff Roberson     (ke->ke_thread->td_priority != kg->kg_user_pri ||			\
19008fd6713SJeff Roberson     SCHED_INTERACTIVE(kg))
19135e6168fSJeff Roberson 
19235e6168fSJeff Roberson /*
19335e6168fSJeff Roberson  * Cpu percentage computation macros and defines.
19435e6168fSJeff Roberson  *
19535e6168fSJeff Roberson  * SCHED_CPU_TIME:	Number of seconds to average the cpu usage across.
19635e6168fSJeff Roberson  * SCHED_CPU_TICKS:	Number of hz ticks to average the cpu usage across.
19735e6168fSJeff Roberson  */
19835e6168fSJeff Roberson 
1995053d272SJeff Roberson #define	SCHED_CPU_TIME	10
20035e6168fSJeff Roberson #define	SCHED_CPU_TICKS	(hz * SCHED_CPU_TIME)
20135e6168fSJeff Roberson 
20235e6168fSJeff Roberson /*
20315dc847eSJeff Roberson  * kseq - per processor runqs and statistics.
20435e6168fSJeff Roberson  */
20535e6168fSJeff Roberson 
20615dc847eSJeff Roberson #define	KSEQ_NCLASS	(PRI_IDLE + 1)	/* Number of run classes. */
20715dc847eSJeff Roberson 
20835e6168fSJeff Roberson struct kseq {
209a8949de2SJeff Roberson 	struct runq	ksq_idle;		/* Queue of IDLE threads. */
21015dc847eSJeff Roberson 	struct runq	ksq_timeshare[2];	/* Run queues for !IDLE. */
21115dc847eSJeff Roberson 	struct runq	*ksq_next;		/* Next timeshare queue. */
21215dc847eSJeff Roberson 	struct runq	*ksq_curr;		/* Current queue. */
21315dc847eSJeff Roberson 	int		ksq_loads[KSEQ_NCLASS];	/* Load for each class */
21415dc847eSJeff Roberson 	int		ksq_load;		/* Aggregate load. */
21515dc847eSJeff Roberson 	short		ksq_nice[PRIO_TOTAL + 1]; /* KSEs in each nice bin. */
21615dc847eSJeff Roberson 	short		ksq_nicemin;		/* Least nice. */
2175d7ef00cSJeff Roberson #ifdef SMP
2185d7ef00cSJeff Roberson 	unsigned int	ksq_rslices;	/* Slices on run queue */
21922bf7d9aSJeff Roberson 	int		ksq_cpus;	/* Count of CPUs in this kseq. */
22022bf7d9aSJeff Roberson 	struct kse 	*ksq_assigned;	/* KSEs assigned by another CPU. */
2215d7ef00cSJeff Roberson #endif
22235e6168fSJeff Roberson };
22335e6168fSJeff Roberson 
22435e6168fSJeff Roberson /*
22535e6168fSJeff Roberson  * One kse queue per processor.
22635e6168fSJeff Roberson  */
2270a016a05SJeff Roberson #ifdef SMP
22822bf7d9aSJeff Roberson static int kseq_idle;
22922bf7d9aSJeff Roberson static struct kseq	kseq_cpu[MAXCPU];
23022bf7d9aSJeff Roberson static struct kseq	*kseq_idmap[MAXCPU];
231749d01b0SJeff Roberson #define	KSEQ_SELF()	(kseq_idmap[PCPU_GET(cpuid)])
232749d01b0SJeff Roberson #define	KSEQ_CPU(x)	(kseq_idmap[(x)])
2330a016a05SJeff Roberson #else
23422bf7d9aSJeff Roberson static struct kseq	kseq_cpu;
2350a016a05SJeff Roberson #define	KSEQ_SELF()	(&kseq_cpu)
2360a016a05SJeff Roberson #define	KSEQ_CPU(x)	(&kseq_cpu)
2370a016a05SJeff Roberson #endif
23835e6168fSJeff Roberson 
239245f3abfSJeff Roberson static void sched_slice(struct kse *ke);
24015dc847eSJeff Roberson static void sched_priority(struct ksegrp *kg);
241e1f89c22SJeff Roberson static int sched_interact_score(struct ksegrp *kg);
2424b60e324SJeff Roberson static void sched_interact_update(struct ksegrp *kg);
24322bf7d9aSJeff Roberson static void sched_pctcpu_update(struct kse *ke);
24435e6168fSJeff Roberson 
2455d7ef00cSJeff Roberson /* Operations on per processor queues */
24622bf7d9aSJeff Roberson static struct kse * kseq_choose(struct kseq *kseq);
2470a016a05SJeff Roberson static void kseq_setup(struct kseq *kseq);
248a8949de2SJeff Roberson static void kseq_add(struct kseq *kseq, struct kse *ke);
24915dc847eSJeff Roberson static void kseq_rem(struct kseq *kseq, struct kse *ke);
25015dc847eSJeff Roberson static void kseq_nice_add(struct kseq *kseq, int nice);
25115dc847eSJeff Roberson static void kseq_nice_rem(struct kseq *kseq, int nice);
2527cd650a9SJeff Roberson void kseq_print(int cpu);
2535d7ef00cSJeff Roberson #ifdef SMP
25422bf7d9aSJeff Roberson #if 0
25522bf7d9aSJeff Roberson static int sched_pickcpu(void);
25622bf7d9aSJeff Roberson #endif
25722bf7d9aSJeff Roberson static struct kse *runq_steal(struct runq *rq);
25822bf7d9aSJeff Roberson static struct kseq *kseq_load_highest(void);
25922bf7d9aSJeff Roberson static void kseq_balance(void *arg);
26022bf7d9aSJeff Roberson static void kseq_move(struct kseq *from, int cpu);
26122bf7d9aSJeff Roberson static int kseq_find(void);
26222bf7d9aSJeff Roberson static void kseq_notify(struct kse *ke, int cpu);
26322bf7d9aSJeff Roberson static void kseq_assign(struct kseq *);
26422bf7d9aSJeff Roberson static struct kse *kseq_steal(struct kseq *kseq);
2655d7ef00cSJeff Roberson #endif
2665d7ef00cSJeff Roberson 
26715dc847eSJeff Roberson void
2687cd650a9SJeff Roberson kseq_print(int cpu)
26915dc847eSJeff Roberson {
2707cd650a9SJeff Roberson 	struct kseq *kseq;
27115dc847eSJeff Roberson 	int i;
27215dc847eSJeff Roberson 
2737cd650a9SJeff Roberson 	kseq = KSEQ_CPU(cpu);
27415dc847eSJeff Roberson 
27515dc847eSJeff Roberson 	printf("kseq:\n");
27615dc847eSJeff Roberson 	printf("\tload:           %d\n", kseq->ksq_load);
27715dc847eSJeff Roberson 	printf("\tload ITHD:      %d\n", kseq->ksq_loads[PRI_ITHD]);
27815dc847eSJeff Roberson 	printf("\tload REALTIME:  %d\n", kseq->ksq_loads[PRI_REALTIME]);
27915dc847eSJeff Roberson 	printf("\tload TIMESHARE: %d\n", kseq->ksq_loads[PRI_TIMESHARE]);
28015dc847eSJeff Roberson 	printf("\tload IDLE:      %d\n", kseq->ksq_loads[PRI_IDLE]);
28115dc847eSJeff Roberson 	printf("\tnicemin:\t%d\n", kseq->ksq_nicemin);
28215dc847eSJeff Roberson 	printf("\tnice counts:\n");
28315dc847eSJeff Roberson 	for (i = 0; i < PRIO_TOTAL + 1; i++)
28415dc847eSJeff Roberson 		if (kseq->ksq_nice[i])
28515dc847eSJeff Roberson 			printf("\t\t%d = %d\n",
28615dc847eSJeff Roberson 			    i - SCHED_PRI_NHALF, kseq->ksq_nice[i]);
28715dc847eSJeff Roberson }
28815dc847eSJeff Roberson 
289a8949de2SJeff Roberson static void
2905d7ef00cSJeff Roberson kseq_add(struct kseq *kseq, struct kse *ke)
2915d7ef00cSJeff Roberson {
292b90816f1SJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
293b5c4c4a7SJeff Roberson 	kseq->ksq_loads[PRI_BASE(ke->ke_ksegrp->kg_pri_class)]++;
29415dc847eSJeff Roberson 	kseq->ksq_load++;
29515dc847eSJeff Roberson 	if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE)
29615dc847eSJeff Roberson 	CTR6(KTR_ULE, "Add kse %p to %p (slice: %d, pri: %d, nice: %d(%d))",
29715dc847eSJeff Roberson 	    ke, ke->ke_runq, ke->ke_slice, ke->ke_thread->td_priority,
29815dc847eSJeff Roberson 	    ke->ke_ksegrp->kg_nice, kseq->ksq_nicemin);
29915dc847eSJeff Roberson 	if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE)
30015dc847eSJeff Roberson 		kseq_nice_add(kseq, ke->ke_ksegrp->kg_nice);
3015d7ef00cSJeff Roberson #ifdef SMP
3025d7ef00cSJeff Roberson 	kseq->ksq_rslices += ke->ke_slice;
3035d7ef00cSJeff Roberson #endif
3045d7ef00cSJeff Roberson }
30515dc847eSJeff Roberson 
306a8949de2SJeff Roberson static void
3075d7ef00cSJeff Roberson kseq_rem(struct kseq *kseq, struct kse *ke)
3085d7ef00cSJeff Roberson {
309b90816f1SJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
310b5c4c4a7SJeff Roberson 	kseq->ksq_loads[PRI_BASE(ke->ke_ksegrp->kg_pri_class)]--;
31115dc847eSJeff Roberson 	kseq->ksq_load--;
31215dc847eSJeff Roberson 	ke->ke_runq = NULL;
31315dc847eSJeff Roberson 	if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE)
31415dc847eSJeff Roberson 		kseq_nice_rem(kseq, ke->ke_ksegrp->kg_nice);
3155d7ef00cSJeff Roberson #ifdef SMP
3165d7ef00cSJeff Roberson 	kseq->ksq_rslices -= ke->ke_slice;
3175d7ef00cSJeff Roberson #endif
3185d7ef00cSJeff Roberson }
3195d7ef00cSJeff Roberson 
32015dc847eSJeff Roberson static void
32115dc847eSJeff Roberson kseq_nice_add(struct kseq *kseq, int nice)
32215dc847eSJeff Roberson {
323b90816f1SJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
32415dc847eSJeff Roberson 	/* Normalize to zero. */
32515dc847eSJeff Roberson 	kseq->ksq_nice[nice + SCHED_PRI_NHALF]++;
326b90816f1SJeff Roberson 	if (nice < kseq->ksq_nicemin || kseq->ksq_loads[PRI_TIMESHARE] == 1)
32715dc847eSJeff Roberson 		kseq->ksq_nicemin = nice;
32815dc847eSJeff Roberson }
32915dc847eSJeff Roberson 
33015dc847eSJeff Roberson static void
33115dc847eSJeff Roberson kseq_nice_rem(struct kseq *kseq, int nice)
33215dc847eSJeff Roberson {
33315dc847eSJeff Roberson 	int n;
33415dc847eSJeff Roberson 
335b90816f1SJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
33615dc847eSJeff Roberson 	/* Normalize to zero. */
33715dc847eSJeff Roberson 	n = nice + SCHED_PRI_NHALF;
33815dc847eSJeff Roberson 	kseq->ksq_nice[n]--;
33915dc847eSJeff Roberson 	KASSERT(kseq->ksq_nice[n] >= 0, ("Negative nice count."));
34015dc847eSJeff Roberson 
34115dc847eSJeff Roberson 	/*
34215dc847eSJeff Roberson 	 * If this wasn't the smallest nice value or there are more in
34315dc847eSJeff Roberson 	 * this bucket we can just return.  Otherwise we have to recalculate
34415dc847eSJeff Roberson 	 * the smallest nice.
34515dc847eSJeff Roberson 	 */
34615dc847eSJeff Roberson 	if (nice != kseq->ksq_nicemin ||
34715dc847eSJeff Roberson 	    kseq->ksq_nice[n] != 0 ||
34815dc847eSJeff Roberson 	    kseq->ksq_loads[PRI_TIMESHARE] == 0)
34915dc847eSJeff Roberson 		return;
35015dc847eSJeff Roberson 
35115dc847eSJeff Roberson 	for (; n < SCHED_PRI_NRESV + 1; n++)
35215dc847eSJeff Roberson 		if (kseq->ksq_nice[n]) {
35315dc847eSJeff Roberson 			kseq->ksq_nicemin = n - SCHED_PRI_NHALF;
35415dc847eSJeff Roberson 			return;
35515dc847eSJeff Roberson 		}
35615dc847eSJeff Roberson }
35715dc847eSJeff Roberson 
3585d7ef00cSJeff Roberson #ifdef SMP
359356500a3SJeff Roberson /*
360356500a3SJeff Roberson  * kseq_balance is a simple CPU load balancing algorithm.  It operates by
361356500a3SJeff Roberson  * finding the least loaded and most loaded cpu and equalizing their load
362356500a3SJeff Roberson  * by migrating some processes.
363356500a3SJeff Roberson  *
364356500a3SJeff Roberson  * Dealing only with two CPUs at a time has two advantages.  Firstly, most
365356500a3SJeff Roberson  * installations will only have 2 cpus.  Secondly, load balancing too much at
366356500a3SJeff Roberson  * once can have an unpleasant effect on the system.  The scheduler rarely has
367356500a3SJeff Roberson  * enough information to make perfect decisions.  So this algorithm chooses
368356500a3SJeff Roberson  * algorithm simplicity and more gradual effects on load in larger systems.
369356500a3SJeff Roberson  *
370356500a3SJeff Roberson  * It could be improved by considering the priorities and slices assigned to
371356500a3SJeff Roberson  * each task prior to balancing them.  There are many pathological cases with
372356500a3SJeff Roberson  * any approach and so the semi random algorithm below may work as well as any.
373356500a3SJeff Roberson  *
374356500a3SJeff Roberson  */
37522bf7d9aSJeff Roberson static void
376356500a3SJeff Roberson kseq_balance(void *arg)
377356500a3SJeff Roberson {
378356500a3SJeff Roberson 	struct kseq *kseq;
379356500a3SJeff Roberson 	int high_load;
380356500a3SJeff Roberson 	int low_load;
381356500a3SJeff Roberson 	int high_cpu;
382356500a3SJeff Roberson 	int low_cpu;
383356500a3SJeff Roberson 	int move;
384356500a3SJeff Roberson 	int diff;
385356500a3SJeff Roberson 	int i;
386356500a3SJeff Roberson 
387356500a3SJeff Roberson 	high_cpu = 0;
388356500a3SJeff Roberson 	low_cpu = 0;
389356500a3SJeff Roberson 	high_load = 0;
390356500a3SJeff Roberson 	low_load = -1;
391356500a3SJeff Roberson 
392356500a3SJeff Roberson 	mtx_lock_spin(&sched_lock);
39386f8ae96SJeff Roberson 	if (smp_started == 0)
39486f8ae96SJeff Roberson 		goto out;
39586f8ae96SJeff Roberson 
396356500a3SJeff Roberson 	for (i = 0; i < mp_maxid; i++) {
3977a20304fSJeff Roberson 		if (CPU_ABSENT(i) || (i & stopped_cpus) != 0)
398356500a3SJeff Roberson 			continue;
399356500a3SJeff Roberson 		kseq = KSEQ_CPU(i);
400356500a3SJeff Roberson 		if (kseq->ksq_load > high_load) {
401356500a3SJeff Roberson 			high_load = kseq->ksq_load;
402356500a3SJeff Roberson 			high_cpu = i;
403356500a3SJeff Roberson 		}
404356500a3SJeff Roberson 		if (low_load == -1 || kseq->ksq_load < low_load) {
405356500a3SJeff Roberson 			low_load = kseq->ksq_load;
406356500a3SJeff Roberson 			low_cpu = i;
407356500a3SJeff Roberson 		}
408356500a3SJeff Roberson 	}
409356500a3SJeff Roberson 
410749d01b0SJeff Roberson 	kseq = KSEQ_CPU(high_cpu);
411749d01b0SJeff Roberson 
41222bf7d9aSJeff Roberson 	high_load = kseq->ksq_loads[PRI_IDLE] + kseq->ksq_loads[PRI_TIMESHARE] +
41322bf7d9aSJeff Roberson 	    kseq->ksq_loads[PRI_REALTIME];
414356500a3SJeff Roberson 	/*
415356500a3SJeff Roberson 	 * Nothing to do.
416356500a3SJeff Roberson 	 */
417749d01b0SJeff Roberson 	if (high_load < kseq->ksq_cpus + 1)
418749d01b0SJeff Roberson 		goto out;
419749d01b0SJeff Roberson 
420749d01b0SJeff Roberson 	high_load -= kseq->ksq_cpus;
421749d01b0SJeff Roberson 
422749d01b0SJeff Roberson 	if (low_load >= high_load)
423356500a3SJeff Roberson 		goto out;
424356500a3SJeff Roberson 
425356500a3SJeff Roberson 	diff = high_load - low_load;
426356500a3SJeff Roberson 	move = diff / 2;
427356500a3SJeff Roberson 	if (diff & 0x1)
428356500a3SJeff Roberson 		move++;
429356500a3SJeff Roberson 
430356500a3SJeff Roberson 	for (i = 0; i < move; i++)
431749d01b0SJeff Roberson 		kseq_move(kseq, low_cpu);
432356500a3SJeff Roberson 
433356500a3SJeff Roberson out:
434356500a3SJeff Roberson 	mtx_unlock_spin(&sched_lock);
435356500a3SJeff Roberson 	callout_reset(&kseq_lb_callout, hz, kseq_balance, NULL);
436356500a3SJeff Roberson 
437356500a3SJeff Roberson 	return;
438356500a3SJeff Roberson }
439356500a3SJeff Roberson 
44022bf7d9aSJeff Roberson static struct kseq *
4415d7ef00cSJeff Roberson kseq_load_highest(void)
4425d7ef00cSJeff Roberson {
4435d7ef00cSJeff Roberson 	struct kseq *kseq;
4445d7ef00cSJeff Roberson 	int load;
4455d7ef00cSJeff Roberson 	int cpu;
4465d7ef00cSJeff Roberson 	int i;
4475d7ef00cSJeff Roberson 
448b90816f1SJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
4495d7ef00cSJeff Roberson 	cpu = 0;
4505d7ef00cSJeff Roberson 	load = 0;
4515d7ef00cSJeff Roberson 
4525d7ef00cSJeff Roberson 	for (i = 0; i < mp_maxid; i++) {
4537a20304fSJeff Roberson 		if (CPU_ABSENT(i) || (i & stopped_cpus) != 0)
4545d7ef00cSJeff Roberson 			continue;
4555d7ef00cSJeff Roberson 		kseq = KSEQ_CPU(i);
45615dc847eSJeff Roberson 		if (kseq->ksq_load > load) {
45715dc847eSJeff Roberson 			load = kseq->ksq_load;
4585d7ef00cSJeff Roberson 			cpu = i;
4595d7ef00cSJeff Roberson 		}
4605d7ef00cSJeff Roberson 	}
461749d01b0SJeff Roberson 	kseq = KSEQ_CPU(cpu);
462749d01b0SJeff Roberson 
46322bf7d9aSJeff Roberson 	if ((kseq->ksq_loads[PRI_IDLE] + kseq->ksq_loads[PRI_TIMESHARE] +
46422bf7d9aSJeff Roberson 	    kseq->ksq_loads[PRI_REALTIME]) > kseq->ksq_cpus)
465749d01b0SJeff Roberson 		return (kseq);
4665d7ef00cSJeff Roberson 
4675d7ef00cSJeff Roberson 	return (NULL);
4685d7ef00cSJeff Roberson }
469356500a3SJeff Roberson 
47022bf7d9aSJeff Roberson static void
471356500a3SJeff Roberson kseq_move(struct kseq *from, int cpu)
472356500a3SJeff Roberson {
473356500a3SJeff Roberson 	struct kse *ke;
474356500a3SJeff Roberson 
47522bf7d9aSJeff Roberson 	ke = kseq_steal(from);
476356500a3SJeff Roberson 	runq_remove(ke->ke_runq, ke);
477356500a3SJeff Roberson 	ke->ke_state = KES_THREAD;
478356500a3SJeff Roberson 	kseq_rem(from, ke);
479356500a3SJeff Roberson 
480356500a3SJeff Roberson 	ke->ke_cpu = cpu;
4810c7da3a4SJeff Roberson 	sched_add(ke->ke_thread);
482356500a3SJeff Roberson }
48322bf7d9aSJeff Roberson 
48422bf7d9aSJeff Roberson static int
48522bf7d9aSJeff Roberson kseq_find(void)
48622bf7d9aSJeff Roberson {
48722bf7d9aSJeff Roberson 	struct kseq *high;
48822bf7d9aSJeff Roberson 
48922bf7d9aSJeff Roberson 	if (!smp_started)
49022bf7d9aSJeff Roberson 		return (0);
49122bf7d9aSJeff Roberson 	if (kseq_idle & PCPU_GET(cpumask))
49222bf7d9aSJeff Roberson 		return (0);
49322bf7d9aSJeff Roberson 	/*
49422bf7d9aSJeff Roberson 	 * Find the cpu with the highest load and steal one proc.
49522bf7d9aSJeff Roberson 	 */
49622bf7d9aSJeff Roberson 	if ((high = kseq_load_highest()) == NULL ||
49722bf7d9aSJeff Roberson 	    high == KSEQ_SELF()) {
49822bf7d9aSJeff Roberson 		/*
49922bf7d9aSJeff Roberson 		 * If we couldn't find one, set ourselves in the
50022bf7d9aSJeff Roberson 		 * idle map.
50122bf7d9aSJeff Roberson 		 */
50222bf7d9aSJeff Roberson 		atomic_set_int(&kseq_idle, PCPU_GET(cpumask));
50322bf7d9aSJeff Roberson 		return (0);
50422bf7d9aSJeff Roberson 	}
50522bf7d9aSJeff Roberson 	/*
50622bf7d9aSJeff Roberson 	 * Remove this kse from this kseq and runq and then requeue
50722bf7d9aSJeff Roberson 	 * on the current processor.  We now have a load of one!
50822bf7d9aSJeff Roberson 	 */
50922bf7d9aSJeff Roberson 	kseq_move(high, PCPU_GET(cpuid));
51022bf7d9aSJeff Roberson 
51122bf7d9aSJeff Roberson 	return (1);
51222bf7d9aSJeff Roberson }
51322bf7d9aSJeff Roberson 
51422bf7d9aSJeff Roberson static void
51522bf7d9aSJeff Roberson kseq_assign(struct kseq *kseq)
51622bf7d9aSJeff Roberson {
51722bf7d9aSJeff Roberson 	struct kse *nke;
51822bf7d9aSJeff Roberson 	struct kse *ke;
51922bf7d9aSJeff Roberson 
52022bf7d9aSJeff Roberson 	do {
52122bf7d9aSJeff Roberson 		ke = kseq->ksq_assigned;
52222bf7d9aSJeff Roberson 	} while(!atomic_cmpset_ptr(&kseq->ksq_assigned, ke, NULL));
52322bf7d9aSJeff Roberson 	for (; ke != NULL; ke = nke) {
52422bf7d9aSJeff Roberson 		nke = ke->ke_assign;
52522bf7d9aSJeff Roberson 		ke->ke_flags &= ~KEF_ASSIGNED;
52622bf7d9aSJeff Roberson 		sched_add(ke->ke_thread);
52722bf7d9aSJeff Roberson 	}
52822bf7d9aSJeff Roberson }
52922bf7d9aSJeff Roberson 
53022bf7d9aSJeff Roberson static void
53122bf7d9aSJeff Roberson kseq_notify(struct kse *ke, int cpu)
53222bf7d9aSJeff Roberson {
53322bf7d9aSJeff Roberson 	struct kseq *kseq;
53422bf7d9aSJeff Roberson 	struct thread *td;
53522bf7d9aSJeff Roberson 	struct pcpu *pcpu;
53622bf7d9aSJeff Roberson 
53722bf7d9aSJeff Roberson 	ke->ke_flags |= KEF_ASSIGNED;
53822bf7d9aSJeff Roberson 
53922bf7d9aSJeff Roberson 	kseq = KSEQ_CPU(cpu);
5405d7ef00cSJeff Roberson 
5410c0a98b2SJeff Roberson 	/*
54222bf7d9aSJeff Roberson 	 * Place a KSE on another cpu's queue and force a resched.
54322bf7d9aSJeff Roberson 	 */
54422bf7d9aSJeff Roberson 	do {
54522bf7d9aSJeff Roberson 		ke->ke_assign = kseq->ksq_assigned;
54622bf7d9aSJeff Roberson 	} while(!atomic_cmpset_ptr(&kseq->ksq_assigned, ke->ke_assign, ke));
54722bf7d9aSJeff Roberson 	pcpu = pcpu_find(cpu);
54822bf7d9aSJeff Roberson 	td = pcpu->pc_curthread;
54922bf7d9aSJeff Roberson 	if (ke->ke_thread->td_priority < td->td_priority ||
55022bf7d9aSJeff Roberson 	    td == pcpu->pc_idlethread) {
55122bf7d9aSJeff Roberson 		td->td_flags |= TDF_NEEDRESCHED;
55222bf7d9aSJeff Roberson 		ipi_selected(1 << cpu, IPI_AST);
55322bf7d9aSJeff Roberson 	}
55422bf7d9aSJeff Roberson }
55522bf7d9aSJeff Roberson 
55622bf7d9aSJeff Roberson static struct kse *
55722bf7d9aSJeff Roberson runq_steal(struct runq *rq)
55822bf7d9aSJeff Roberson {
55922bf7d9aSJeff Roberson 	struct rqhead *rqh;
56022bf7d9aSJeff Roberson 	struct rqbits *rqb;
56122bf7d9aSJeff Roberson 	struct kse *ke;
56222bf7d9aSJeff Roberson 	int word;
56322bf7d9aSJeff Roberson 	int bit;
56422bf7d9aSJeff Roberson 
56522bf7d9aSJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
56622bf7d9aSJeff Roberson 	rqb = &rq->rq_status;
56722bf7d9aSJeff Roberson 	for (word = 0; word < RQB_LEN; word++) {
56822bf7d9aSJeff Roberson 		if (rqb->rqb_bits[word] == 0)
56922bf7d9aSJeff Roberson 			continue;
57022bf7d9aSJeff Roberson 		for (bit = 0; bit < RQB_BPW; bit++) {
57122bf7d9aSJeff Roberson 			if ((rqb->rqb_bits[word] & (1 << bit)) == 0)
57222bf7d9aSJeff Roberson 				continue;
57322bf7d9aSJeff Roberson 			rqh = &rq->rq_queues[bit + (word << RQB_L2BPW)];
57422bf7d9aSJeff Roberson 			TAILQ_FOREACH(ke, rqh, ke_procq) {
57522bf7d9aSJeff Roberson 				if (PRI_BASE(ke->ke_ksegrp->kg_pri_class) !=
57622bf7d9aSJeff Roberson 				    PRI_ITHD)
57722bf7d9aSJeff Roberson 					return (ke);
57822bf7d9aSJeff Roberson 			}
57922bf7d9aSJeff Roberson 		}
58022bf7d9aSJeff Roberson 	}
58122bf7d9aSJeff Roberson 	return (NULL);
58222bf7d9aSJeff Roberson }
58322bf7d9aSJeff Roberson 
58422bf7d9aSJeff Roberson static struct kse *
58522bf7d9aSJeff Roberson kseq_steal(struct kseq *kseq)
58622bf7d9aSJeff Roberson {
58722bf7d9aSJeff Roberson 	struct kse *ke;
58822bf7d9aSJeff Roberson 
58922bf7d9aSJeff Roberson 	if ((ke = runq_steal(kseq->ksq_curr)) != NULL)
59022bf7d9aSJeff Roberson 		return (ke);
59122bf7d9aSJeff Roberson 	if ((ke = runq_steal(kseq->ksq_next)) != NULL)
59222bf7d9aSJeff Roberson 		return (ke);
59322bf7d9aSJeff Roberson 	return (runq_steal(&kseq->ksq_idle));
59422bf7d9aSJeff Roberson }
59522bf7d9aSJeff Roberson #endif	/* SMP */
59622bf7d9aSJeff Roberson 
59722bf7d9aSJeff Roberson /*
59822bf7d9aSJeff Roberson  * Pick the highest priority task we have and return it.
5990c0a98b2SJeff Roberson  */
6000c0a98b2SJeff Roberson 
60122bf7d9aSJeff Roberson static struct kse *
60222bf7d9aSJeff Roberson kseq_choose(struct kseq *kseq)
6035d7ef00cSJeff Roberson {
6045d7ef00cSJeff Roberson 	struct kse *ke;
6055d7ef00cSJeff Roberson 	struct runq *swap;
6065d7ef00cSJeff Roberson 
607b90816f1SJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
60815dc847eSJeff Roberson 	swap = NULL;
609a8949de2SJeff Roberson 
61015dc847eSJeff Roberson 	for (;;) {
61115dc847eSJeff Roberson 		ke = runq_choose(kseq->ksq_curr);
61215dc847eSJeff Roberson 		if (ke == NULL) {
61315dc847eSJeff Roberson 			/*
61415dc847eSJeff Roberson 			 * We already swaped once and didn't get anywhere.
61515dc847eSJeff Roberson 			 */
61615dc847eSJeff Roberson 			if (swap)
61715dc847eSJeff Roberson 				break;
6185d7ef00cSJeff Roberson 			swap = kseq->ksq_curr;
6195d7ef00cSJeff Roberson 			kseq->ksq_curr = kseq->ksq_next;
6205d7ef00cSJeff Roberson 			kseq->ksq_next = swap;
62115dc847eSJeff Roberson 			continue;
622a8949de2SJeff Roberson 		}
62315dc847eSJeff Roberson 		/*
62415dc847eSJeff Roberson 		 * If we encounter a slice of 0 the kse is in a
62515dc847eSJeff Roberson 		 * TIMESHARE kse group and its nice was too far out
62615dc847eSJeff Roberson 		 * of the range that receives slices.
62715dc847eSJeff Roberson 		 */
62822bf7d9aSJeff Roberson 		if (ke->ke_slice == 0) {
62915dc847eSJeff Roberson 			runq_remove(ke->ke_runq, ke);
63015dc847eSJeff Roberson 			sched_slice(ke);
63115dc847eSJeff Roberson 			ke->ke_runq = kseq->ksq_next;
63215dc847eSJeff Roberson 			runq_add(ke->ke_runq, ke);
63315dc847eSJeff Roberson 			continue;
63415dc847eSJeff Roberson 		}
63515dc847eSJeff Roberson 		return (ke);
63615dc847eSJeff Roberson 	}
63715dc847eSJeff Roberson 
638a8949de2SJeff Roberson 	return (runq_choose(&kseq->ksq_idle));
639245f3abfSJeff Roberson }
6400a016a05SJeff Roberson 
6410a016a05SJeff Roberson static void
6420a016a05SJeff Roberson kseq_setup(struct kseq *kseq)
6430a016a05SJeff Roberson {
64415dc847eSJeff Roberson 	runq_init(&kseq->ksq_timeshare[0]);
64515dc847eSJeff Roberson 	runq_init(&kseq->ksq_timeshare[1]);
646a8949de2SJeff Roberson 	runq_init(&kseq->ksq_idle);
64715dc847eSJeff Roberson 
64815dc847eSJeff Roberson 	kseq->ksq_curr = &kseq->ksq_timeshare[0];
64915dc847eSJeff Roberson 	kseq->ksq_next = &kseq->ksq_timeshare[1];
65015dc847eSJeff Roberson 
65115dc847eSJeff Roberson 	kseq->ksq_loads[PRI_ITHD] = 0;
65215dc847eSJeff Roberson 	kseq->ksq_loads[PRI_REALTIME] = 0;
65315dc847eSJeff Roberson 	kseq->ksq_loads[PRI_TIMESHARE] = 0;
65415dc847eSJeff Roberson 	kseq->ksq_loads[PRI_IDLE] = 0;
6557cd650a9SJeff Roberson 	kseq->ksq_load = 0;
6565d7ef00cSJeff Roberson #ifdef SMP
6575d7ef00cSJeff Roberson 	kseq->ksq_rslices = 0;
65822bf7d9aSJeff Roberson 	kseq->ksq_assigned = NULL;
6595d7ef00cSJeff Roberson #endif
6600a016a05SJeff Roberson }
6610a016a05SJeff Roberson 
66235e6168fSJeff Roberson static void
66335e6168fSJeff Roberson sched_setup(void *dummy)
66435e6168fSJeff Roberson {
6650ec896fdSJeff Roberson #ifdef SMP
66635e6168fSJeff Roberson 	int i;
6670ec896fdSJeff Roberson #endif
66835e6168fSJeff Roberson 
669e493a5d9SJeff Roberson 	slice_min = (hz/100);	/* 10ms */
670e493a5d9SJeff Roberson 	slice_max = (hz/7);	/* ~140ms */
671e1f89c22SJeff Roberson 
672356500a3SJeff Roberson #ifdef SMP
673749d01b0SJeff Roberson 	/* init kseqs */
674749d01b0SJeff Roberson 	/* Create the idmap. */
675749d01b0SJeff Roberson #ifdef ULE_HTT_EXPERIMENTAL
676749d01b0SJeff Roberson 	if (smp_topology == NULL) {
677749d01b0SJeff Roberson #else
678749d01b0SJeff Roberson 	if (1) {
679749d01b0SJeff Roberson #endif
680749d01b0SJeff Roberson 		for (i = 0; i < MAXCPU; i++) {
681749d01b0SJeff Roberson 			kseq_setup(&kseq_cpu[i]);
682749d01b0SJeff Roberson 			kseq_idmap[i] = &kseq_cpu[i];
683749d01b0SJeff Roberson 			kseq_cpu[i].ksq_cpus = 1;
684749d01b0SJeff Roberson 		}
685749d01b0SJeff Roberson 	} else {
686749d01b0SJeff Roberson 		int j;
687749d01b0SJeff Roberson 
688749d01b0SJeff Roberson 		for (i = 0; i < smp_topology->ct_count; i++) {
689749d01b0SJeff Roberson 			struct cpu_group *cg;
690749d01b0SJeff Roberson 
691749d01b0SJeff Roberson 			cg = &smp_topology->ct_group[i];
692749d01b0SJeff Roberson 			kseq_setup(&kseq_cpu[i]);
693749d01b0SJeff Roberson 
694749d01b0SJeff Roberson 			for (j = 0; j < MAXCPU; j++)
695749d01b0SJeff Roberson 				if ((cg->cg_mask & (1 << j)) != 0)
696749d01b0SJeff Roberson 					kseq_idmap[j] = &kseq_cpu[i];
697749d01b0SJeff Roberson 			kseq_cpu[i].ksq_cpus = cg->cg_count;
698749d01b0SJeff Roberson 		}
699749d01b0SJeff Roberson 	}
700c06eb4e2SSam Leffler 	callout_init(&kseq_lb_callout, CALLOUT_MPSAFE);
701356500a3SJeff Roberson 	kseq_balance(NULL);
702749d01b0SJeff Roberson #else
703749d01b0SJeff Roberson 	kseq_setup(KSEQ_SELF());
704356500a3SJeff Roberson #endif
705749d01b0SJeff Roberson 	mtx_lock_spin(&sched_lock);
706749d01b0SJeff Roberson 	kseq_add(KSEQ_SELF(), &kse0);
707749d01b0SJeff Roberson 	mtx_unlock_spin(&sched_lock);
70835e6168fSJeff Roberson }
70935e6168fSJeff Roberson 
71035e6168fSJeff Roberson /*
71135e6168fSJeff Roberson  * Scale the scheduling priority according to the "interactivity" of this
71235e6168fSJeff Roberson  * process.
71335e6168fSJeff Roberson  */
71415dc847eSJeff Roberson static void
71535e6168fSJeff Roberson sched_priority(struct ksegrp *kg)
71635e6168fSJeff Roberson {
71735e6168fSJeff Roberson 	int pri;
71835e6168fSJeff Roberson 
71935e6168fSJeff Roberson 	if (kg->kg_pri_class != PRI_TIMESHARE)
72015dc847eSJeff Roberson 		return;
72135e6168fSJeff Roberson 
72215dc847eSJeff Roberson 	pri = SCHED_PRI_INTERACT(sched_interact_score(kg));
723e1f89c22SJeff Roberson 	pri += SCHED_PRI_BASE;
72435e6168fSJeff Roberson 	pri += kg->kg_nice;
72535e6168fSJeff Roberson 
72635e6168fSJeff Roberson 	if (pri > PRI_MAX_TIMESHARE)
72735e6168fSJeff Roberson 		pri = PRI_MAX_TIMESHARE;
72835e6168fSJeff Roberson 	else if (pri < PRI_MIN_TIMESHARE)
72935e6168fSJeff Roberson 		pri = PRI_MIN_TIMESHARE;
73035e6168fSJeff Roberson 
73135e6168fSJeff Roberson 	kg->kg_user_pri = pri;
73235e6168fSJeff Roberson 
73315dc847eSJeff Roberson 	return;
73435e6168fSJeff Roberson }
73535e6168fSJeff Roberson 
73635e6168fSJeff Roberson /*
737245f3abfSJeff Roberson  * Calculate a time slice based on the properties of the kseg and the runq
738a8949de2SJeff Roberson  * that we're on.  This is only for PRI_TIMESHARE ksegrps.
73935e6168fSJeff Roberson  */
740245f3abfSJeff Roberson static void
741245f3abfSJeff Roberson sched_slice(struct kse *ke)
74235e6168fSJeff Roberson {
74315dc847eSJeff Roberson 	struct kseq *kseq;
744245f3abfSJeff Roberson 	struct ksegrp *kg;
74535e6168fSJeff Roberson 
746245f3abfSJeff Roberson 	kg = ke->ke_ksegrp;
74715dc847eSJeff Roberson 	kseq = KSEQ_CPU(ke->ke_cpu);
74835e6168fSJeff Roberson 
749245f3abfSJeff Roberson 	/*
750245f3abfSJeff Roberson 	 * Rationale:
751245f3abfSJeff Roberson 	 * KSEs in interactive ksegs get the minimum slice so that we
752245f3abfSJeff Roberson 	 * quickly notice if it abuses its advantage.
753245f3abfSJeff Roberson 	 *
754245f3abfSJeff Roberson 	 * KSEs in non-interactive ksegs are assigned a slice that is
755245f3abfSJeff Roberson 	 * based on the ksegs nice value relative to the least nice kseg
756245f3abfSJeff Roberson 	 * on the run queue for this cpu.
757245f3abfSJeff Roberson 	 *
758245f3abfSJeff Roberson 	 * If the KSE is less nice than all others it gets the maximum
759245f3abfSJeff Roberson 	 * slice and other KSEs will adjust their slice relative to
760245f3abfSJeff Roberson 	 * this when they first expire.
761245f3abfSJeff Roberson 	 *
762245f3abfSJeff Roberson 	 * There is 20 point window that starts relative to the least
763245f3abfSJeff Roberson 	 * nice kse on the run queue.  Slice size is determined by
764245f3abfSJeff Roberson 	 * the kse distance from the last nice ksegrp.
765245f3abfSJeff Roberson 	 *
766245f3abfSJeff Roberson 	 * If you are outside of the window you will get no slice and
767245f3abfSJeff Roberson 	 * you will be reevaluated each time you are selected on the
768245f3abfSJeff Roberson 	 * run queue.
769245f3abfSJeff Roberson 	 *
770245f3abfSJeff Roberson 	 */
771245f3abfSJeff Roberson 
77215dc847eSJeff Roberson 	if (!SCHED_INTERACTIVE(kg)) {
773245f3abfSJeff Roberson 		int nice;
774245f3abfSJeff Roberson 
77515dc847eSJeff Roberson 		nice = kg->kg_nice + (0 - kseq->ksq_nicemin);
77615dc847eSJeff Roberson 		if (kseq->ksq_loads[PRI_TIMESHARE] == 0 ||
77715dc847eSJeff Roberson 		    kg->kg_nice < kseq->ksq_nicemin)
778245f3abfSJeff Roberson 			ke->ke_slice = SCHED_SLICE_MAX;
77915dc847eSJeff Roberson 		else if (nice <= SCHED_PRI_NTHRESH)
780245f3abfSJeff Roberson 			ke->ke_slice = SCHED_SLICE_NICE(nice);
781245f3abfSJeff Roberson 		else
782245f3abfSJeff Roberson 			ke->ke_slice = 0;
783245f3abfSJeff Roberson 	} else
784245f3abfSJeff Roberson 		ke->ke_slice = SCHED_SLICE_MIN;
78535e6168fSJeff Roberson 
78615dc847eSJeff Roberson 	CTR6(KTR_ULE,
78715dc847eSJeff Roberson 	    "Sliced %p(%d) (nice: %d, nicemin: %d, load: %d, interactive: %d)",
78815dc847eSJeff Roberson 	    ke, ke->ke_slice, kg->kg_nice, kseq->ksq_nicemin,
78915dc847eSJeff Roberson 	    kseq->ksq_loads[PRI_TIMESHARE], SCHED_INTERACTIVE(kg));
79015dc847eSJeff Roberson 
791407b0157SJeff Roberson 	/*
792a8949de2SJeff Roberson 	 * Check to see if we need to scale back the slp and run time
793a8949de2SJeff Roberson 	 * in the kg.  This will cause us to forget old interactivity
794a8949de2SJeff Roberson 	 * while maintaining the current ratio.
795407b0157SJeff Roberson 	 */
7964b60e324SJeff Roberson 	sched_interact_update(kg);
797407b0157SJeff Roberson 
798245f3abfSJeff Roberson 	return;
79935e6168fSJeff Roberson }
80035e6168fSJeff Roberson 
8014b60e324SJeff Roberson static void
8024b60e324SJeff Roberson sched_interact_update(struct ksegrp *kg)
8034b60e324SJeff Roberson {
8043f741ca1SJeff Roberson         int ratio;
8053f741ca1SJeff Roberson 
8063f741ca1SJeff Roberson 	if ((kg->kg_runtime + kg->kg_slptime) > SCHED_SLP_RUN_MAX) {
8073f741ca1SJeff Roberson 		ratio = ((SCHED_SLP_RUN_MAX * 15) / (kg->kg_runtime +
8083f741ca1SJeff Roberson 		    kg->kg_slptime ));
8093f741ca1SJeff Roberson 		kg->kg_runtime = (kg->kg_runtime * ratio) / 16;
8103f741ca1SJeff Roberson 		kg->kg_slptime = (kg->kg_slptime * ratio) / 16;
8114b60e324SJeff Roberson 	}
8124b60e324SJeff Roberson }
8134b60e324SJeff Roberson 
814e1f89c22SJeff Roberson static int
815e1f89c22SJeff Roberson sched_interact_score(struct ksegrp *kg)
816e1f89c22SJeff Roberson {
817210491d3SJeff Roberson 	int div;
818e1f89c22SJeff Roberson 
819e1f89c22SJeff Roberson 	if (kg->kg_runtime > kg->kg_slptime) {
820210491d3SJeff Roberson 		div = max(1, kg->kg_runtime / SCHED_INTERACT_HALF);
821210491d3SJeff Roberson 		return (SCHED_INTERACT_HALF +
822210491d3SJeff Roberson 		    (SCHED_INTERACT_HALF - (kg->kg_slptime / div)));
823210491d3SJeff Roberson 	} if (kg->kg_slptime > kg->kg_runtime) {
824210491d3SJeff Roberson 		div = max(1, kg->kg_slptime / SCHED_INTERACT_HALF);
825210491d3SJeff Roberson 		return (kg->kg_runtime / div);
826e1f89c22SJeff Roberson 	}
827e1f89c22SJeff Roberson 
828210491d3SJeff Roberson 	/*
829210491d3SJeff Roberson 	 * This can happen if slptime and runtime are 0.
830210491d3SJeff Roberson 	 */
831210491d3SJeff Roberson 	return (0);
832e1f89c22SJeff Roberson 
833e1f89c22SJeff Roberson }
834e1f89c22SJeff Roberson 
83515dc847eSJeff Roberson /*
83615dc847eSJeff Roberson  * This is only somewhat accurate since given many processes of the same
83715dc847eSJeff Roberson  * priority they will switch when their slices run out, which will be
83815dc847eSJeff Roberson  * at most SCHED_SLICE_MAX.
83915dc847eSJeff Roberson  */
84035e6168fSJeff Roberson int
84135e6168fSJeff Roberson sched_rr_interval(void)
84235e6168fSJeff Roberson {
84335e6168fSJeff Roberson 	return (SCHED_SLICE_MAX);
84435e6168fSJeff Roberson }
84535e6168fSJeff Roberson 
84622bf7d9aSJeff Roberson static void
84735e6168fSJeff Roberson sched_pctcpu_update(struct kse *ke)
84835e6168fSJeff Roberson {
84935e6168fSJeff Roberson 	/*
85035e6168fSJeff Roberson 	 * Adjust counters and watermark for pctcpu calc.
851210491d3SJeff Roberson 	 */
85281de51bfSJeff Roberson 	if (ke->ke_ltick > ticks - SCHED_CPU_TICKS) {
853210491d3SJeff Roberson 		/*
85481de51bfSJeff Roberson 		 * Shift the tick count out so that the divide doesn't
85581de51bfSJeff Roberson 		 * round away our results.
85665c8760dSJeff Roberson 		 */
85765c8760dSJeff Roberson 		ke->ke_ticks <<= 10;
85881de51bfSJeff Roberson 		ke->ke_ticks = (ke->ke_ticks / (ticks - ke->ke_ftick)) *
85935e6168fSJeff Roberson 			    SCHED_CPU_TICKS;
86065c8760dSJeff Roberson 		ke->ke_ticks >>= 10;
86181de51bfSJeff Roberson 	} else
86281de51bfSJeff Roberson 		ke->ke_ticks = 0;
86335e6168fSJeff Roberson 	ke->ke_ltick = ticks;
86435e6168fSJeff Roberson 	ke->ke_ftick = ke->ke_ltick - SCHED_CPU_TICKS;
86535e6168fSJeff Roberson }
86635e6168fSJeff Roberson 
86722bf7d9aSJeff Roberson #if 0
8685d7ef00cSJeff Roberson /* XXX Should be changed to kseq_load_lowest() */
86935e6168fSJeff Roberson int
87035e6168fSJeff Roberson sched_pickcpu(void)
87135e6168fSJeff Roberson {
8720a016a05SJeff Roberson 	struct kseq *kseq;
87335e6168fSJeff Roberson 	int load;
8740a016a05SJeff Roberson 	int cpu;
87535e6168fSJeff Roberson 	int i;
87635e6168fSJeff Roberson 
877b90816f1SJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
87835e6168fSJeff Roberson 	if (!smp_started)
87935e6168fSJeff Roberson 		return (0);
88035e6168fSJeff Roberson 
8810a016a05SJeff Roberson 	load = 0;
8820a016a05SJeff Roberson 	cpu = 0;
88335e6168fSJeff Roberson 
88435e6168fSJeff Roberson 	for (i = 0; i < mp_maxid; i++) {
8857a20304fSJeff Roberson 		if (CPU_ABSENT(i) || (i & stopped_cpus) != 0)
88635e6168fSJeff Roberson 			continue;
8870a016a05SJeff Roberson 		kseq = KSEQ_CPU(i);
88815dc847eSJeff Roberson 		if (kseq->ksq_load < load) {
88935e6168fSJeff Roberson 			cpu = i;
89015dc847eSJeff Roberson 			load = kseq->ksq_load;
89135e6168fSJeff Roberson 		}
89235e6168fSJeff Roberson 	}
89335e6168fSJeff Roberson 
89435e6168fSJeff Roberson 	CTR1(KTR_RUNQ, "sched_pickcpu: %d", cpu);
89535e6168fSJeff Roberson 	return (cpu);
89635e6168fSJeff Roberson }
89735e6168fSJeff Roberson #endif
89835e6168fSJeff Roberson 
89935e6168fSJeff Roberson void
90035e6168fSJeff Roberson sched_prio(struct thread *td, u_char prio)
90135e6168fSJeff Roberson {
9023f741ca1SJeff Roberson 	struct kse *ke;
90335e6168fSJeff Roberson 
9043f741ca1SJeff Roberson 	ke = td->td_kse;
90535e6168fSJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
90635e6168fSJeff Roberson 	if (TD_ON_RUNQ(td)) {
9073f741ca1SJeff Roberson 		/*
9083f741ca1SJeff Roberson 		 * If the priority has been elevated due to priority
9093f741ca1SJeff Roberson 		 * propagation, we may have to move ourselves to a new
9103f741ca1SJeff Roberson 		 * queue.  We still call adjustrunqueue below in case kse
9113f741ca1SJeff Roberson 		 * needs to fix things up.
9123f741ca1SJeff Roberson 		 */
91322bf7d9aSJeff Roberson 		if (ke && (ke->ke_flags & KEF_ASSIGNED) == 0 &&
91422bf7d9aSJeff Roberson 		    ke->ke_runq != KSEQ_CPU(ke->ke_cpu)->ksq_curr) {
9153f741ca1SJeff Roberson 			runq_remove(ke->ke_runq, ke);
9163f741ca1SJeff Roberson 			ke->ke_runq = KSEQ_CPU(ke->ke_cpu)->ksq_curr;
9173f741ca1SJeff Roberson 			runq_add(ke->ke_runq, ke);
91835e6168fSJeff Roberson 		}
9193f741ca1SJeff Roberson 		adjustrunqueue(td, prio);
9203f741ca1SJeff Roberson 	} else
9213f741ca1SJeff Roberson 		td->td_priority = prio;
92235e6168fSJeff Roberson }
92335e6168fSJeff Roberson 
92435e6168fSJeff Roberson void
925ae53b483SJeff Roberson sched_switch(struct thread *td)
92635e6168fSJeff Roberson {
927ae53b483SJeff Roberson 	struct thread *newtd;
92835e6168fSJeff Roberson 	struct kse *ke;
92935e6168fSJeff Roberson 
93035e6168fSJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
93135e6168fSJeff Roberson 
93235e6168fSJeff Roberson 	ke = td->td_kse;
93335e6168fSJeff Roberson 
93435e6168fSJeff Roberson 	td->td_last_kse = ke;
935060563ecSJulian Elischer         td->td_lastcpu = td->td_oncpu;
936060563ecSJulian Elischer 	td->td_oncpu = NOCPU;
9374a338afdSJulian Elischer         td->td_flags &= ~TDF_NEEDRESCHED;
93835e6168fSJeff Roberson 
93935e6168fSJeff Roberson 	if (TD_IS_RUNNING(td)) {
940ab2baa72SDavid Xu 		if (td->td_proc->p_flag & P_SA) {
941ab2baa72SDavid Xu 			kseq_rem(KSEQ_CPU(ke->ke_cpu), ke);
942ab2baa72SDavid Xu 			setrunqueue(td);
943ab2baa72SDavid Xu 		} else {
944210491d3SJeff Roberson 			/*
9453f741ca1SJeff Roberson 			 * This queue is always correct except for idle threads
9463f741ca1SJeff Roberson 			 * which have a higher priority due to priority
9473f741ca1SJeff Roberson 			 * propagation.
948210491d3SJeff Roberson 			 */
9493f741ca1SJeff Roberson 			if (ke->ke_ksegrp->kg_pri_class == PRI_IDLE) {
9503f741ca1SJeff Roberson 				if (td->td_priority < PRI_MIN_IDLE)
951210491d3SJeff Roberson 					ke->ke_runq = KSEQ_SELF()->ksq_curr;
9523f741ca1SJeff Roberson 				else
9533f741ca1SJeff Roberson 					ke->ke_runq = &KSEQ_SELF()->ksq_idle;
9543f741ca1SJeff Roberson 			}
95515dc847eSJeff Roberson 			runq_add(ke->ke_runq, ke);
95615dc847eSJeff Roberson 			/* setrunqueue(td); */
957ab2baa72SDavid Xu 		}
9580e0f6266SJeff Roberson 	} else {
9590e0f6266SJeff Roberson 		if (ke->ke_runq)
96015dc847eSJeff Roberson 			kseq_rem(KSEQ_CPU(ke->ke_cpu), ke);
96135e6168fSJeff Roberson 		/*
96235e6168fSJeff Roberson 		 * We will not be on the run queue. So we must be
96335e6168fSJeff Roberson 		 * sleeping or similar.
96435e6168fSJeff Roberson 		 */
9650e2a4d3aSDavid Xu 		if (td->td_proc->p_flag & P_SA)
96635e6168fSJeff Roberson 			kse_reassign(ke);
9670e0f6266SJeff Roberson 	}
968ae53b483SJeff Roberson 	newtd = choosethread();
969ae53b483SJeff Roberson 	if (td != newtd)
970ae53b483SJeff Roberson 		cpu_switch(td, newtd);
971ae53b483SJeff Roberson 	sched_lock.mtx_lock = (uintptr_t)td;
97235e6168fSJeff Roberson 
973060563ecSJulian Elischer 	td->td_oncpu = PCPU_GET(cpuid);
97435e6168fSJeff Roberson }
97535e6168fSJeff Roberson 
97635e6168fSJeff Roberson void
97735e6168fSJeff Roberson sched_nice(struct ksegrp *kg, int nice)
97835e6168fSJeff Roberson {
97915dc847eSJeff Roberson 	struct kse *ke;
98035e6168fSJeff Roberson 	struct thread *td;
98115dc847eSJeff Roberson 	struct kseq *kseq;
98235e6168fSJeff Roberson 
9830b5318c8SJohn Baldwin 	PROC_LOCK_ASSERT(kg->kg_proc, MA_OWNED);
9840b5318c8SJohn Baldwin 	mtx_assert(&sched_lock, MA_OWNED);
98515dc847eSJeff Roberson 	/*
98615dc847eSJeff Roberson 	 * We need to adjust the nice counts for running KSEs.
98715dc847eSJeff Roberson 	 */
98815dc847eSJeff Roberson 	if (kg->kg_pri_class == PRI_TIMESHARE)
98915dc847eSJeff Roberson 		FOREACH_KSE_IN_GROUP(kg, ke) {
990d07ac847SJeff Roberson 			if (ke->ke_runq == NULL)
99115dc847eSJeff Roberson 				continue;
99215dc847eSJeff Roberson 			kseq = KSEQ_CPU(ke->ke_cpu);
99315dc847eSJeff Roberson 			kseq_nice_rem(kseq, kg->kg_nice);
99415dc847eSJeff Roberson 			kseq_nice_add(kseq, nice);
99515dc847eSJeff Roberson 		}
99635e6168fSJeff Roberson 	kg->kg_nice = nice;
99735e6168fSJeff Roberson 	sched_priority(kg);
99815dc847eSJeff Roberson 	FOREACH_THREAD_IN_GROUP(kg, td)
9994a338afdSJulian Elischer 		td->td_flags |= TDF_NEEDRESCHED;
100035e6168fSJeff Roberson }
100135e6168fSJeff Roberson 
100235e6168fSJeff Roberson void
100335e6168fSJeff Roberson sched_sleep(struct thread *td, u_char prio)
100435e6168fSJeff Roberson {
100535e6168fSJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
100635e6168fSJeff Roberson 
100735e6168fSJeff Roberson 	td->td_slptime = ticks;
100835e6168fSJeff Roberson 	td->td_priority = prio;
100935e6168fSJeff Roberson 
101015dc847eSJeff Roberson 	CTR2(KTR_ULE, "sleep kse %p (tick: %d)",
101115dc847eSJeff Roberson 	    td->td_kse, td->td_slptime);
101235e6168fSJeff Roberson }
101335e6168fSJeff Roberson 
101435e6168fSJeff Roberson void
101535e6168fSJeff Roberson sched_wakeup(struct thread *td)
101635e6168fSJeff Roberson {
101735e6168fSJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
101835e6168fSJeff Roberson 
101935e6168fSJeff Roberson 	/*
102035e6168fSJeff Roberson 	 * Let the kseg know how long we slept for.  This is because process
102135e6168fSJeff Roberson 	 * interactivity behavior is modeled in the kseg.
102235e6168fSJeff Roberson 	 */
102335e6168fSJeff Roberson 	if (td->td_slptime) {
1024f1e8dc4aSJeff Roberson 		struct ksegrp *kg;
102515dc847eSJeff Roberson 		int hzticks;
1026f1e8dc4aSJeff Roberson 
1027f1e8dc4aSJeff Roberson 		kg = td->td_ksegrp;
102815dc847eSJeff Roberson 		hzticks = ticks - td->td_slptime;
102915dc847eSJeff Roberson 		kg->kg_slptime += hzticks << 10;
10304b60e324SJeff Roberson 		sched_interact_update(kg);
1031f1e8dc4aSJeff Roberson 		sched_priority(kg);
10324b60e324SJeff Roberson 		if (td->td_kse)
10334b60e324SJeff Roberson 			sched_slice(td->td_kse);
103415dc847eSJeff Roberson 		CTR2(KTR_ULE, "wakeup kse %p (%d ticks)",
103515dc847eSJeff Roberson 		    td->td_kse, hzticks);
103635e6168fSJeff Roberson 		td->td_slptime = 0;
1037f1e8dc4aSJeff Roberson 	}
103835e6168fSJeff Roberson 	setrunqueue(td);
103935e6168fSJeff Roberson }
104035e6168fSJeff Roberson 
104135e6168fSJeff Roberson /*
104235e6168fSJeff Roberson  * Penalize the parent for creating a new child and initialize the child's
104335e6168fSJeff Roberson  * priority.
104435e6168fSJeff Roberson  */
104535e6168fSJeff Roberson void
104615dc847eSJeff Roberson sched_fork(struct proc *p, struct proc *p1)
104735e6168fSJeff Roberson {
104835e6168fSJeff Roberson 
104935e6168fSJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
105035e6168fSJeff Roberson 
105115dc847eSJeff Roberson 	sched_fork_ksegrp(FIRST_KSEGRP_IN_PROC(p), FIRST_KSEGRP_IN_PROC(p1));
105215dc847eSJeff Roberson 	sched_fork_kse(FIRST_KSE_IN_PROC(p), FIRST_KSE_IN_PROC(p1));
105315dc847eSJeff Roberson 	sched_fork_thread(FIRST_THREAD_IN_PROC(p), FIRST_THREAD_IN_PROC(p1));
105415dc847eSJeff Roberson }
105515dc847eSJeff Roberson 
105615dc847eSJeff Roberson void
105715dc847eSJeff Roberson sched_fork_kse(struct kse *ke, struct kse *child)
105815dc847eSJeff Roberson {
10592056d0a1SJohn Baldwin 
1060210491d3SJeff Roberson 	child->ke_slice = 1;	/* Attempt to quickly learn interactivity. */
106115dc847eSJeff Roberson 	child->ke_cpu = ke->ke_cpu; /* sched_pickcpu(); */
106215dc847eSJeff Roberson 	child->ke_runq = NULL;
106315dc847eSJeff Roberson 
1064736c97c7SJeff Roberson 	/* Grab our parents cpu estimation information. */
1065736c97c7SJeff Roberson 	child->ke_ticks = ke->ke_ticks;
1066736c97c7SJeff Roberson 	child->ke_ltick = ke->ke_ltick;
1067736c97c7SJeff Roberson 	child->ke_ftick = ke->ke_ftick;
106815dc847eSJeff Roberson }
106915dc847eSJeff Roberson 
107015dc847eSJeff Roberson void
107115dc847eSJeff Roberson sched_fork_ksegrp(struct ksegrp *kg, struct ksegrp *child)
107215dc847eSJeff Roberson {
10732056d0a1SJohn Baldwin 
10742056d0a1SJohn Baldwin 	PROC_LOCK_ASSERT(child->kg_proc, MA_OWNED);
107535e6168fSJeff Roberson 	/* XXX Need something better here */
1076210491d3SJeff Roberson 
1077a91172adSJeff Roberson 	child->kg_slptime = kg->kg_slptime / SCHED_SLP_RUN_THROTTLE;
1078a91172adSJeff Roberson 	child->kg_runtime = kg->kg_runtime / SCHED_SLP_RUN_THROTTLE;
10794b60e324SJeff Roberson 	kg->kg_runtime += tickincr << 10;
10804b60e324SJeff Roberson 	sched_interact_update(kg);
108115dc847eSJeff Roberson 
108235e6168fSJeff Roberson 	child->kg_user_pri = kg->kg_user_pri;
108315dc847eSJeff Roberson 	child->kg_nice = kg->kg_nice;
1084c9f25d8fSJeff Roberson }
1085c9f25d8fSJeff Roberson 
108615dc847eSJeff Roberson void
108715dc847eSJeff Roberson sched_fork_thread(struct thread *td, struct thread *child)
108815dc847eSJeff Roberson {
108915dc847eSJeff Roberson }
109015dc847eSJeff Roberson 
109115dc847eSJeff Roberson void
109215dc847eSJeff Roberson sched_class(struct ksegrp *kg, int class)
109315dc847eSJeff Roberson {
109415dc847eSJeff Roberson 	struct kseq *kseq;
109515dc847eSJeff Roberson 	struct kse *ke;
109615dc847eSJeff Roberson 
10972056d0a1SJohn Baldwin 	mtx_assert(&sched_lock, MA_OWNED);
109815dc847eSJeff Roberson 	if (kg->kg_pri_class == class)
109915dc847eSJeff Roberson 		return;
110015dc847eSJeff Roberson 
110115dc847eSJeff Roberson 	FOREACH_KSE_IN_GROUP(kg, ke) {
110215dc847eSJeff Roberson 		if (ke->ke_state != KES_ONRUNQ &&
110315dc847eSJeff Roberson 		    ke->ke_state != KES_THREAD)
110415dc847eSJeff Roberson 			continue;
110515dc847eSJeff Roberson 		kseq = KSEQ_CPU(ke->ke_cpu);
110615dc847eSJeff Roberson 
1107b5c4c4a7SJeff Roberson 		kseq->ksq_loads[PRI_BASE(kg->kg_pri_class)]--;
1108b5c4c4a7SJeff Roberson 		kseq->ksq_loads[PRI_BASE(class)]++;
110915dc847eSJeff Roberson 
111015dc847eSJeff Roberson 		if (kg->kg_pri_class == PRI_TIMESHARE)
111115dc847eSJeff Roberson 			kseq_nice_rem(kseq, kg->kg_nice);
111215dc847eSJeff Roberson 		else if (class == PRI_TIMESHARE)
111315dc847eSJeff Roberson 			kseq_nice_add(kseq, kg->kg_nice);
111415dc847eSJeff Roberson 	}
111515dc847eSJeff Roberson 
111615dc847eSJeff Roberson 	kg->kg_pri_class = class;
111735e6168fSJeff Roberson }
111835e6168fSJeff Roberson 
111935e6168fSJeff Roberson /*
112035e6168fSJeff Roberson  * Return some of the child's priority and interactivity to the parent.
112135e6168fSJeff Roberson  */
112235e6168fSJeff Roberson void
112315dc847eSJeff Roberson sched_exit(struct proc *p, struct proc *child)
112435e6168fSJeff Roberson {
112535e6168fSJeff Roberson 	/* XXX Need something better here */
112635e6168fSJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
1127141ad61cSJeff Roberson 	sched_exit_kse(FIRST_KSE_IN_PROC(p), FIRST_KSE_IN_PROC(child));
1128210491d3SJeff Roberson 	sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), FIRST_KSEGRP_IN_PROC(child));
1129141ad61cSJeff Roberson }
1130141ad61cSJeff Roberson 
1131141ad61cSJeff Roberson void
1132141ad61cSJeff Roberson sched_exit_kse(struct kse *ke, struct kse *child)
1133141ad61cSJeff Roberson {
1134141ad61cSJeff Roberson 	kseq_rem(KSEQ_CPU(child->ke_cpu), child);
1135141ad61cSJeff Roberson }
1136141ad61cSJeff Roberson 
1137141ad61cSJeff Roberson void
1138141ad61cSJeff Roberson sched_exit_ksegrp(struct ksegrp *kg, struct ksegrp *child)
1139141ad61cSJeff Roberson {
11404b60e324SJeff Roberson 	/* kg->kg_slptime += child->kg_slptime; */
1141210491d3SJeff Roberson 	kg->kg_runtime += child->kg_runtime;
11424b60e324SJeff Roberson 	sched_interact_update(kg);
1143141ad61cSJeff Roberson }
1144141ad61cSJeff Roberson 
1145141ad61cSJeff Roberson void
1146141ad61cSJeff Roberson sched_exit_thread(struct thread *td, struct thread *child)
1147141ad61cSJeff Roberson {
114835e6168fSJeff Roberson }
114935e6168fSJeff Roberson 
115035e6168fSJeff Roberson void
11517cf90fb3SJeff Roberson sched_clock(struct thread *td)
115235e6168fSJeff Roberson {
115335e6168fSJeff Roberson 	struct kseq *kseq;
11540a016a05SJeff Roberson 	struct ksegrp *kg;
11557cf90fb3SJeff Roberson 	struct kse *ke;
115635e6168fSJeff Roberson 
115715dc847eSJeff Roberson 	/*
115815dc847eSJeff Roberson 	 * sched_setup() apparently happens prior to stathz being set.  We
115915dc847eSJeff Roberson 	 * need to resolve the timers earlier in the boot so we can avoid
116015dc847eSJeff Roberson 	 * calculating this here.
116115dc847eSJeff Roberson 	 */
116215dc847eSJeff Roberson 	if (realstathz == 0) {
116315dc847eSJeff Roberson 		realstathz = stathz ? stathz : hz;
116415dc847eSJeff Roberson 		tickincr = hz / realstathz;
116515dc847eSJeff Roberson 		/*
116615dc847eSJeff Roberson 		 * XXX This does not work for values of stathz that are much
116715dc847eSJeff Roberson 		 * larger than hz.
116815dc847eSJeff Roberson 		 */
116915dc847eSJeff Roberson 		if (tickincr == 0)
117015dc847eSJeff Roberson 			tickincr = 1;
117115dc847eSJeff Roberson 	}
117235e6168fSJeff Roberson 
11737cf90fb3SJeff Roberson 	ke = td->td_kse;
117415dc847eSJeff Roberson 	kg = ke->ke_ksegrp;
117535e6168fSJeff Roberson 
11760a016a05SJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
11770a016a05SJeff Roberson 	KASSERT((td != NULL), ("schedclock: null thread pointer"));
11780a016a05SJeff Roberson 
11790a016a05SJeff Roberson 	/* Adjust ticks for pctcpu */
118065c8760dSJeff Roberson 	ke->ke_ticks++;
1181d465fb95SJeff Roberson 	ke->ke_ltick = ticks;
1182a8949de2SJeff Roberson 
1183d465fb95SJeff Roberson 	/* Go up to one second beyond our max and then trim back down */
1184d465fb95SJeff Roberson 	if (ke->ke_ftick + SCHED_CPU_TICKS + hz < ke->ke_ltick)
1185d465fb95SJeff Roberson 		sched_pctcpu_update(ke);
1186d465fb95SJeff Roberson 
118743fdafb1SJulian Elischer 	if (td->td_flags & TDF_IDLETD)
118835e6168fSJeff Roberson 		return;
11890a016a05SJeff Roberson 
119015dc847eSJeff Roberson 	CTR4(KTR_ULE, "Tick kse %p (slice: %d, slptime: %d, runtime: %d)",
119115dc847eSJeff Roberson 	    ke, ke->ke_slice, kg->kg_slptime >> 10, kg->kg_runtime >> 10);
11923f741ca1SJeff Roberson 	/*
1193a8949de2SJeff Roberson 	 * We only do slicing code for TIMESHARE ksegrps.
1194a8949de2SJeff Roberson 	 */
1195a8949de2SJeff Roberson 	if (kg->kg_pri_class != PRI_TIMESHARE)
1196a8949de2SJeff Roberson 		return;
1197a8949de2SJeff Roberson 	/*
119815dc847eSJeff Roberson 	 * We used a tick charge it to the ksegrp so that we can compute our
119915dc847eSJeff Roberson 	 * interactivity.
120015dc847eSJeff Roberson 	 */
120115dc847eSJeff Roberson 	kg->kg_runtime += tickincr << 10;
12024b60e324SJeff Roberson 	sched_interact_update(kg);
1203407b0157SJeff Roberson 
120435e6168fSJeff Roberson 	/*
120535e6168fSJeff Roberson 	 * We used up one time slice.
120635e6168fSJeff Roberson 	 */
120735e6168fSJeff Roberson 	ke->ke_slice--;
12083f741ca1SJeff Roberson 	kseq = KSEQ_SELF();
120915dc847eSJeff Roberson #ifdef SMP
1210c36ccfa2SJeff Roberson 	kseq->ksq_rslices--;
121115dc847eSJeff Roberson #endif
121215dc847eSJeff Roberson 
121315dc847eSJeff Roberson 	if (ke->ke_slice > 0)
121415dc847eSJeff Roberson 		return;
121535e6168fSJeff Roberson 	/*
121615dc847eSJeff Roberson 	 * We're out of time, recompute priorities and requeue.
121735e6168fSJeff Roberson 	 */
121815dc847eSJeff Roberson 	kseq_rem(kseq, ke);
1219e1f89c22SJeff Roberson 	sched_priority(kg);
122015dc847eSJeff Roberson 	sched_slice(ke);
122115dc847eSJeff Roberson 	if (SCHED_CURR(kg, ke))
122215dc847eSJeff Roberson 		ke->ke_runq = kseq->ksq_curr;
122315dc847eSJeff Roberson 	else
122415dc847eSJeff Roberson 		ke->ke_runq = kseq->ksq_next;
122515dc847eSJeff Roberson 	kseq_add(kseq, ke);
12264a338afdSJulian Elischer 	td->td_flags |= TDF_NEEDRESCHED;
122735e6168fSJeff Roberson }
122835e6168fSJeff Roberson 
122935e6168fSJeff Roberson int
123035e6168fSJeff Roberson sched_runnable(void)
123135e6168fSJeff Roberson {
123235e6168fSJeff Roberson 	struct kseq *kseq;
1233b90816f1SJeff Roberson 	int load;
123435e6168fSJeff Roberson 
1235b90816f1SJeff Roberson 	load = 1;
1236b90816f1SJeff Roberson 
1237b90816f1SJeff Roberson 	mtx_lock_spin(&sched_lock);
12380a016a05SJeff Roberson 	kseq = KSEQ_SELF();
123922bf7d9aSJeff Roberson #ifdef SMP
124022bf7d9aSJeff Roberson 	if (kseq->ksq_assigned)
124122bf7d9aSJeff Roberson 		kseq_assign(kseq);
124222bf7d9aSJeff Roberson #endif
12433f741ca1SJeff Roberson 	if ((curthread->td_flags & TDF_IDLETD) != 0) {
12443f741ca1SJeff Roberson 		if (kseq->ksq_load > 0)
12453f741ca1SJeff Roberson 			goto out;
12463f741ca1SJeff Roberson 	} else
12473f741ca1SJeff Roberson 		if (kseq->ksq_load - 1 > 0)
1248b90816f1SJeff Roberson 			goto out;
1249b90816f1SJeff Roberson 	load = 0;
1250b90816f1SJeff Roberson out:
1251b90816f1SJeff Roberson 	mtx_unlock_spin(&sched_lock);
1252b90816f1SJeff Roberson 	return (load);
125335e6168fSJeff Roberson }
125435e6168fSJeff Roberson 
125535e6168fSJeff Roberson void
125635e6168fSJeff Roberson sched_userret(struct thread *td)
125735e6168fSJeff Roberson {
125835e6168fSJeff Roberson 	struct ksegrp *kg;
125935e6168fSJeff Roberson 
126035e6168fSJeff Roberson 	kg = td->td_ksegrp;
126135e6168fSJeff Roberson 
126235e6168fSJeff Roberson 	if (td->td_priority != kg->kg_user_pri) {
126335e6168fSJeff Roberson 		mtx_lock_spin(&sched_lock);
126435e6168fSJeff Roberson 		td->td_priority = kg->kg_user_pri;
126535e6168fSJeff Roberson 		mtx_unlock_spin(&sched_lock);
126635e6168fSJeff Roberson 	}
126735e6168fSJeff Roberson }
126835e6168fSJeff Roberson 
1269c9f25d8fSJeff Roberson struct kse *
1270c9f25d8fSJeff Roberson sched_choose(void)
1271c9f25d8fSJeff Roberson {
12720a016a05SJeff Roberson 	struct kseq *kseq;
1273c9f25d8fSJeff Roberson 	struct kse *ke;
127415dc847eSJeff Roberson 
1275b90816f1SJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
127622bf7d9aSJeff Roberson 	kseq = KSEQ_SELF();
127715dc847eSJeff Roberson #ifdef SMP
1278245f3abfSJeff Roberson retry:
127922bf7d9aSJeff Roberson 	if (kseq->ksq_assigned)
128022bf7d9aSJeff Roberson 		kseq_assign(kseq);
128115dc847eSJeff Roberson #endif
128222bf7d9aSJeff Roberson 	ke = kseq_choose(kseq);
128335e6168fSJeff Roberson 	if (ke) {
128422bf7d9aSJeff Roberson #ifdef SMP
128522bf7d9aSJeff Roberson 		if (ke->ke_ksegrp->kg_pri_class == PRI_IDLE)
128622bf7d9aSJeff Roberson 			if (kseq_find())
128722bf7d9aSJeff Roberson 				goto retry;
128822bf7d9aSJeff Roberson #endif
128915dc847eSJeff Roberson 		runq_remove(ke->ke_runq, ke);
129035e6168fSJeff Roberson 		ke->ke_state = KES_THREAD;
1291245f3abfSJeff Roberson 
129215dc847eSJeff Roberson 		if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) {
129315dc847eSJeff Roberson 			CTR4(KTR_ULE, "Run kse %p from %p (slice: %d, pri: %d)",
129415dc847eSJeff Roberson 			    ke, ke->ke_runq, ke->ke_slice,
129515dc847eSJeff Roberson 			    ke->ke_thread->td_priority);
1296245f3abfSJeff Roberson 		}
129715dc847eSJeff Roberson 		return (ke);
129835e6168fSJeff Roberson 	}
1299c9f25d8fSJeff Roberson #ifdef SMP
130022bf7d9aSJeff Roberson 	if (kseq_find())
130115dc847eSJeff Roberson 		goto retry;
1302c9f25d8fSJeff Roberson #endif
130315dc847eSJeff Roberson 
130415dc847eSJeff Roberson 	return (NULL);
130535e6168fSJeff Roberson }
130635e6168fSJeff Roberson 
130735e6168fSJeff Roberson void
13087cf90fb3SJeff Roberson sched_add(struct thread *td)
130935e6168fSJeff Roberson {
1310c9f25d8fSJeff Roberson 	struct kseq *kseq;
131115dc847eSJeff Roberson 	struct ksegrp *kg;
13127cf90fb3SJeff Roberson 	struct kse *ke;
131322bf7d9aSJeff Roberson 	int class;
1314c9f25d8fSJeff Roberson 
131522bf7d9aSJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
13167cf90fb3SJeff Roberson 	ke = td->td_kse;
13177cf90fb3SJeff Roberson 	kg = td->td_ksegrp;
131822bf7d9aSJeff Roberson 	if (ke->ke_flags & KEF_ASSIGNED)
131922bf7d9aSJeff Roberson 		return;
132022bf7d9aSJeff Roberson 	kseq = KSEQ_SELF();
13215d7ef00cSJeff Roberson 	KASSERT((ke->ke_thread != NULL), ("sched_add: No thread on KSE"));
13225d7ef00cSJeff Roberson 	KASSERT((ke->ke_thread->td_kse != NULL),
13235d7ef00cSJeff Roberson 	    ("sched_add: No KSE on thread"));
13245d7ef00cSJeff Roberson 	KASSERT(ke->ke_state != KES_ONRUNQ,
13255d7ef00cSJeff Roberson 	    ("sched_add: kse %p (%s) already in run queue", ke,
13265d7ef00cSJeff Roberson 	    ke->ke_proc->p_comm));
13275d7ef00cSJeff Roberson 	KASSERT(ke->ke_proc->p_sflag & PS_INMEM,
13285d7ef00cSJeff Roberson 	    ("sched_add: process swapped out"));
13299bca28a7SJeff Roberson 	KASSERT(ke->ke_runq == NULL,
13309bca28a7SJeff Roberson 	    ("sched_add: KSE %p is still assigned to a run queue", ke));
13315d7ef00cSJeff Roberson 
133222bf7d9aSJeff Roberson 	class = PRI_BASE(kg->kg_pri_class);
133322bf7d9aSJeff Roberson 	switch (class) {
1334a8949de2SJeff Roberson 	case PRI_ITHD:
1335a8949de2SJeff Roberson 	case PRI_REALTIME:
133615dc847eSJeff Roberson 		ke->ke_runq = kseq->ksq_curr;
133715dc847eSJeff Roberson 		ke->ke_slice = SCHED_SLICE_MAX;
13387cd650a9SJeff Roberson 		ke->ke_cpu = PCPU_GET(cpuid);
1339a8949de2SJeff Roberson 		break;
1340a8949de2SJeff Roberson 	case PRI_TIMESHARE:
134122bf7d9aSJeff Roberson #ifdef SMP
134222bf7d9aSJeff Roberson 		if (ke->ke_cpu != PCPU_GET(cpuid)) {
134322bf7d9aSJeff Roberson 			kseq_notify(ke, ke->ke_cpu);
134422bf7d9aSJeff Roberson 			return;
134522bf7d9aSJeff Roberson 		}
134622bf7d9aSJeff Roberson #endif
134715dc847eSJeff Roberson 		if (SCHED_CURR(kg, ke))
134815dc847eSJeff Roberson 			ke->ke_runq = kseq->ksq_curr;
134915dc847eSJeff Roberson 		else
135015dc847eSJeff Roberson 			ke->ke_runq = kseq->ksq_next;
135115dc847eSJeff Roberson 		break;
135215dc847eSJeff Roberson 	case PRI_IDLE:
135322bf7d9aSJeff Roberson #ifdef SMP
135422bf7d9aSJeff Roberson 		if (ke->ke_cpu != PCPU_GET(cpuid)) {
135522bf7d9aSJeff Roberson 			kseq_notify(ke, ke->ke_cpu);
135622bf7d9aSJeff Roberson 			return;
135722bf7d9aSJeff Roberson 		}
135822bf7d9aSJeff Roberson #endif
135915dc847eSJeff Roberson 		/*
136015dc847eSJeff Roberson 		 * This is for priority prop.
136115dc847eSJeff Roberson 		 */
13623f741ca1SJeff Roberson 		if (ke->ke_thread->td_priority < PRI_MIN_IDLE)
136315dc847eSJeff Roberson 			ke->ke_runq = kseq->ksq_curr;
136415dc847eSJeff Roberson 		else
136515dc847eSJeff Roberson 			ke->ke_runq = &kseq->ksq_idle;
136615dc847eSJeff Roberson 		ke->ke_slice = SCHED_SLICE_MIN;
136715dc847eSJeff Roberson 		break;
136815dc847eSJeff Roberson 	default:
136915dc847eSJeff Roberson 		panic("Unknown pri class.\n");
1370a8949de2SJeff Roberson 		break;
1371a6ed4186SJeff Roberson 	}
137222bf7d9aSJeff Roberson #ifdef SMP
137322bf7d9aSJeff Roberson 	/*
137422bf7d9aSJeff Roberson 	 * If there are any idle processors, give them our extra load.
137522bf7d9aSJeff Roberson 	 */
137622bf7d9aSJeff Roberson 	if (kseq_idle && class != PRI_ITHD &&
137722bf7d9aSJeff Roberson 	    (kseq->ksq_loads[PRI_IDLE] + kseq->ksq_loads[PRI_TIMESHARE] +
137822bf7d9aSJeff Roberson 	    kseq->ksq_loads[PRI_REALTIME]) >= kseq->ksq_cpus) {
137922bf7d9aSJeff Roberson 		int cpu;
138022bf7d9aSJeff Roberson 
138122bf7d9aSJeff Roberson 		/*
138222bf7d9aSJeff Roberson 		 * Multiple cpus could find this bit simultaneously but the
138322bf7d9aSJeff Roberson 		 * race shouldn't be terrible.
138422bf7d9aSJeff Roberson 		 */
138522bf7d9aSJeff Roberson 		cpu = ffs(kseq_idle);
138622bf7d9aSJeff Roberson 		if (cpu) {
138722bf7d9aSJeff Roberson 			cpu--;
138822bf7d9aSJeff Roberson 			atomic_clear_int(&kseq_idle, 1 << cpu);
138922bf7d9aSJeff Roberson 			ke->ke_cpu = cpu;
139022bf7d9aSJeff Roberson 			ke->ke_runq = NULL;
139122bf7d9aSJeff Roberson 			kseq_notify(ke, cpu);
139222bf7d9aSJeff Roberson 			return;
139322bf7d9aSJeff Roberson 		}
139422bf7d9aSJeff Roberson 	}
139522bf7d9aSJeff Roberson 	if (class == PRI_TIMESHARE || class == PRI_REALTIME)
139622bf7d9aSJeff Roberson 		atomic_clear_int(&kseq_idle, PCPU_GET(cpumask));
139722bf7d9aSJeff Roberson #endif
139822bf7d9aSJeff Roberson         if (td->td_priority < curthread->td_priority)
139922bf7d9aSJeff Roberson                 curthread->td_flags |= TDF_NEEDRESCHED;
1400a8949de2SJeff Roberson 
140135e6168fSJeff Roberson 	ke->ke_ksegrp->kg_runq_kses++;
140235e6168fSJeff Roberson 	ke->ke_state = KES_ONRUNQ;
140335e6168fSJeff Roberson 
140415dc847eSJeff Roberson 	runq_add(ke->ke_runq, ke);
14059bca28a7SJeff Roberson 	kseq_add(kseq, ke);
140635e6168fSJeff Roberson }
140735e6168fSJeff Roberson 
140835e6168fSJeff Roberson void
14097cf90fb3SJeff Roberson sched_rem(struct thread *td)
141035e6168fSJeff Roberson {
141115dc847eSJeff Roberson 	struct kseq *kseq;
14127cf90fb3SJeff Roberson 	struct kse *ke;
14137cf90fb3SJeff Roberson 
14147cf90fb3SJeff Roberson 	ke = td->td_kse;
141522bf7d9aSJeff Roberson 	/*
141622bf7d9aSJeff Roberson 	 * It is safe to just return here because sched_rem() is only ever
141722bf7d9aSJeff Roberson 	 * used in places where we're immediately going to add the
141822bf7d9aSJeff Roberson 	 * kse back on again.  In that case it'll be added with the correct
141922bf7d9aSJeff Roberson 	 * thread and priority when the caller drops the sched_lock.
142022bf7d9aSJeff Roberson 	 */
142122bf7d9aSJeff Roberson 	if (ke->ke_flags & KEF_ASSIGNED)
142222bf7d9aSJeff Roberson 		return;
142335e6168fSJeff Roberson 	mtx_assert(&sched_lock, MA_OWNED);
14249bca28a7SJeff Roberson 	KASSERT((ke->ke_state == KES_ONRUNQ), ("KSE not on run queue"));
142535e6168fSJeff Roberson 
142635e6168fSJeff Roberson 	ke->ke_state = KES_THREAD;
142735e6168fSJeff Roberson 	ke->ke_ksegrp->kg_runq_kses--;
142815dc847eSJeff Roberson 	kseq = KSEQ_CPU(ke->ke_cpu);
142915dc847eSJeff Roberson 	runq_remove(ke->ke_runq, ke);
143015dc847eSJeff Roberson 	kseq_rem(kseq, ke);
143135e6168fSJeff Roberson }
143235e6168fSJeff Roberson 
143335e6168fSJeff Roberson fixpt_t
14347cf90fb3SJeff Roberson sched_pctcpu(struct thread *td)
143535e6168fSJeff Roberson {
143635e6168fSJeff Roberson 	fixpt_t pctcpu;
14377cf90fb3SJeff Roberson 	struct kse *ke;
143835e6168fSJeff Roberson 
143935e6168fSJeff Roberson 	pctcpu = 0;
14407cf90fb3SJeff Roberson 	ke = td->td_kse;
1441484288deSJeff Roberson 	if (ke == NULL)
1442484288deSJeff Roberson 		return (0);
144335e6168fSJeff Roberson 
1444b90816f1SJeff Roberson 	mtx_lock_spin(&sched_lock);
144535e6168fSJeff Roberson 	if (ke->ke_ticks) {
144635e6168fSJeff Roberson 		int rtick;
144735e6168fSJeff Roberson 
1448210491d3SJeff Roberson 		/*
1449210491d3SJeff Roberson 		 * Don't update more frequently than twice a second.  Allowing
1450210491d3SJeff Roberson 		 * this causes the cpu usage to decay away too quickly due to
1451210491d3SJeff Roberson 		 * rounding errors.
1452210491d3SJeff Roberson 		 */
1453210491d3SJeff Roberson 		if (ke->ke_ltick < (ticks - (hz / 2)))
145435e6168fSJeff Roberson 			sched_pctcpu_update(ke);
145535e6168fSJeff Roberson 		/* How many rtick per second ? */
1456210491d3SJeff Roberson 		rtick = min(ke->ke_ticks / SCHED_CPU_TIME, SCHED_CPU_TICKS);
14577121cce5SScott Long 		pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT;
145835e6168fSJeff Roberson 	}
145935e6168fSJeff Roberson 
146035e6168fSJeff Roberson 	ke->ke_proc->p_swtime = ke->ke_ltick - ke->ke_ftick;
1461828e7683SJohn Baldwin 	mtx_unlock_spin(&sched_lock);
146235e6168fSJeff Roberson 
146335e6168fSJeff Roberson 	return (pctcpu);
146435e6168fSJeff Roberson }
146535e6168fSJeff Roberson 
146635e6168fSJeff Roberson int
146735e6168fSJeff Roberson sched_sizeof_kse(void)
146835e6168fSJeff Roberson {
146935e6168fSJeff Roberson 	return (sizeof(struct kse) + sizeof(struct ke_sched));
147035e6168fSJeff Roberson }
147135e6168fSJeff Roberson 
147235e6168fSJeff Roberson int
147335e6168fSJeff Roberson sched_sizeof_ksegrp(void)
147435e6168fSJeff Roberson {
147535e6168fSJeff Roberson 	return (sizeof(struct ksegrp) + sizeof(struct kg_sched));
147635e6168fSJeff Roberson }
147735e6168fSJeff Roberson 
147835e6168fSJeff Roberson int
147935e6168fSJeff Roberson sched_sizeof_proc(void)
148035e6168fSJeff Roberson {
148135e6168fSJeff Roberson 	return (sizeof(struct proc));
148235e6168fSJeff Roberson }
148335e6168fSJeff Roberson 
148435e6168fSJeff Roberson int
148535e6168fSJeff Roberson sched_sizeof_thread(void)
148635e6168fSJeff Roberson {
148735e6168fSJeff Roberson 	return (sizeof(struct thread) + sizeof(struct td_sched));
148835e6168fSJeff Roberson }
1489