xref: /freebsd/sys/kern/kern_switch.c (revision c20c691bed1c9e151e06d276a3f053298dd0abf8)
1dba6c5a6SPeter Wemm /*
2d5a08a60SJake Burkholder  * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
3d5a08a60SJake Burkholder  * All rights reserved.
4dba6c5a6SPeter Wemm  *
5dba6c5a6SPeter Wemm  * Redistribution and use in source and binary forms, with or without
6dba6c5a6SPeter Wemm  * modification, are permitted provided that the following conditions
7dba6c5a6SPeter Wemm  * are met:
8dba6c5a6SPeter Wemm  * 1. Redistributions of source code must retain the above copyright
9dba6c5a6SPeter Wemm  *    notice, this list of conditions and the following disclaimer.
10dba6c5a6SPeter Wemm  * 2. Redistributions in binary form must reproduce the above copyright
11dba6c5a6SPeter Wemm  *    notice, this list of conditions and the following disclaimer in the
12dba6c5a6SPeter Wemm  *    documentation and/or other materials provided with the distribution.
13dba6c5a6SPeter Wemm  *
14dba6c5a6SPeter Wemm  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15dba6c5a6SPeter Wemm  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16dba6c5a6SPeter Wemm  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17dba6c5a6SPeter Wemm  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18dba6c5a6SPeter Wemm  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19dba6c5a6SPeter Wemm  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20dba6c5a6SPeter Wemm  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21dba6c5a6SPeter Wemm  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22dba6c5a6SPeter Wemm  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23dba6c5a6SPeter Wemm  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24dba6c5a6SPeter Wemm  * SUCH DAMAGE.
25dba6c5a6SPeter Wemm  */
26dba6c5a6SPeter Wemm 
27e602ba25SJulian Elischer /***
28e602ba25SJulian Elischer Here is the logic..
29e602ba25SJulian Elischer 
30e602ba25SJulian Elischer If there are N processors, then there are at most N KSEs (kernel
31e602ba25SJulian Elischer schedulable entities) working to process threads that belong to a
3209a4a69cSRobert Watson KSEGROUP (kg). If there are X of these KSEs actually running at the
33e602ba25SJulian Elischer moment in question, then there are at most M (N-X) of these KSEs on
34e602ba25SJulian Elischer the run queue, as running KSEs are not on the queue.
35e602ba25SJulian Elischer 
36e602ba25SJulian Elischer Runnable threads are queued off the KSEGROUP in priority order.
37e602ba25SJulian Elischer If there are M or more threads runnable, the top M threads
38e602ba25SJulian Elischer (by priority) are 'preassigned' to the M KSEs not running. The KSEs take
39e602ba25SJulian Elischer their priority from those threads and are put on the run queue.
40e602ba25SJulian Elischer 
41e602ba25SJulian Elischer The last thread that had a priority high enough to have a KSE associated
42e602ba25SJulian Elischer with it, AND IS ON THE RUN QUEUE is pointed to by
43e602ba25SJulian Elischer kg->kg_last_assigned. If no threads queued off the KSEGROUP have KSEs
44e602ba25SJulian Elischer assigned as all the available KSEs are activly running, or because there
45e602ba25SJulian Elischer are no threads queued, that pointer is NULL.
46e602ba25SJulian Elischer 
47e602ba25SJulian Elischer When a KSE is removed from the run queue to become runnable, we know
48e602ba25SJulian Elischer it was associated with the highest priority thread in the queue (at the head
49e602ba25SJulian Elischer of the queue). If it is also the last assigned we know M was 1 and must
50e602ba25SJulian Elischer now be 0. Since the thread is no longer queued that pointer must be
51e602ba25SJulian Elischer removed from it. Since we know there were no more KSEs available,
52e602ba25SJulian Elischer (M was 1 and is now 0) and since we are not FREEING our KSE
53e602ba25SJulian Elischer but using it, we know there are STILL no more KSEs available, we can prove
54e602ba25SJulian Elischer that the next thread in the ksegrp list will not have a KSE to assign to
55e602ba25SJulian Elischer it, so we can show that the pointer must be made 'invalid' (NULL).
56e602ba25SJulian Elischer 
57e602ba25SJulian Elischer The pointer exists so that when a new thread is made runnable, it can
58e602ba25SJulian Elischer have its priority compared with the last assigned thread to see if
59e602ba25SJulian Elischer it should 'steal' its KSE or not.. i.e. is it 'earlier'
60e602ba25SJulian Elischer on the list than that thread or later.. If it's earlier, then the KSE is
61e602ba25SJulian Elischer removed from the last assigned (which is now not assigned a KSE)
62e602ba25SJulian Elischer and reassigned to the new thread, which is placed earlier in the list.
63e602ba25SJulian Elischer The pointer is then backed up to the previous thread (which may or may not
64e602ba25SJulian Elischer be the new thread).
65e602ba25SJulian Elischer 
66e602ba25SJulian Elischer When a thread sleeps or is removed, the KSE becomes available and if there
67e602ba25SJulian Elischer are queued threads that are not assigned KSEs, the highest priority one of
68e602ba25SJulian Elischer them is assigned the KSE, which is then placed back on the run queue at
69e602ba25SJulian Elischer the approipriate place, and the kg->kg_last_assigned pointer is adjusted down
70e602ba25SJulian Elischer to point to it.
71e602ba25SJulian Elischer 
72e602ba25SJulian Elischer The following diagram shows 2 KSEs and 3 threads from a single process.
73e602ba25SJulian Elischer 
74e602ba25SJulian Elischer  RUNQ: --->KSE---KSE--...    (KSEs queued at priorities from threads)
75e602ba25SJulian Elischer               \    \____
76e602ba25SJulian Elischer                \        \
77e602ba25SJulian Elischer     KSEGROUP---thread--thread--thread    (queued in priority order)
78e602ba25SJulian Elischer         \                 /
79e602ba25SJulian Elischer          \_______________/
80e602ba25SJulian Elischer           (last_assigned)
81e602ba25SJulian Elischer 
82e602ba25SJulian Elischer The result of this scheme is that the M available KSEs are always
83e602ba25SJulian Elischer queued at the priorities they have inherrited from the M highest priority
84e602ba25SJulian Elischer threads for that KSEGROUP. If this situation changes, the KSEs are
85e602ba25SJulian Elischer reassigned to keep this true.
86677b542eSDavid E. O'Brien ***/
87e602ba25SJulian Elischer 
88677b542eSDavid E. O'Brien #include <sys/cdefs.h>
89677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$");
90e602ba25SJulian Elischer 
916804a3abSJulian Elischer #include "opt_sched.h"
920c0b25aeSJohn Baldwin 
93ed062c8dSJulian Elischer #ifndef KERN_SWITCH_INCLUDE
94dba6c5a6SPeter Wemm #include <sys/param.h>
95dba6c5a6SPeter Wemm #include <sys/systm.h>
962d50560aSMarcel Moolenaar #include <sys/kdb.h>
97dba6c5a6SPeter Wemm #include <sys/kernel.h>
980384fff8SJason Evans #include <sys/ktr.h>
99f34fa851SJohn Baldwin #include <sys/lock.h>
10035e0e5b3SJohn Baldwin #include <sys/mutex.h>
101dba6c5a6SPeter Wemm #include <sys/proc.h>
102dba6c5a6SPeter Wemm #include <sys/queue.h>
103b43179fbSJeff Roberson #include <sys/sched.h>
104ed062c8dSJulian Elischer #else  /* KERN_SWITCH_INCLUDE */
1050d2a2989SPeter Wemm #if defined(SMP) && (defined(__i386__) || defined(__amd64__))
106cc66ebe2SPeter Wemm #include <sys/smp.h>
107cc66ebe2SPeter Wemm #endif
108182da820SMatthew Dillon #include <machine/critical.h>
1096804a3abSJulian Elischer #if defined(SMP) && defined(SCHED_4BSD)
1106804a3abSJulian Elischer #include <sys/sysctl.h>
1116804a3abSJulian Elischer #endif
1126804a3abSJulian Elischer 
1139923b511SScott Long #ifdef FULL_PREEMPTION
1149923b511SScott Long #ifndef PREEMPTION
1159923b511SScott Long #error "The FULL_PREEMPTION option requires the PREEMPTION option"
1169923b511SScott Long #endif
1179923b511SScott Long #endif
118dba6c5a6SPeter Wemm 
119d2ac2316SJake Burkholder CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
120d2ac2316SJake Burkholder 
121ed062c8dSJulian Elischer #define td_kse td_sched
122ed062c8dSJulian Elischer 
123e602ba25SJulian Elischer /************************************************************************
124e602ba25SJulian Elischer  * Functions that manipulate runnability from a thread perspective.	*
125e602ba25SJulian Elischer  ************************************************************************/
126e602ba25SJulian Elischer /*
1275215b187SJeff Roberson  * Select the KSE that will be run next.  From that find the thread, and
128e602ba25SJulian Elischer  * remove it from the KSEGRP's run queue.  If there is thread clustering,
129e602ba25SJulian Elischer  * this will be what does it.
130e602ba25SJulian Elischer  */
131b40ce416SJulian Elischer struct thread *
132b40ce416SJulian Elischer choosethread(void)
133dba6c5a6SPeter Wemm {
134e602ba25SJulian Elischer 	struct kse *ke;
135e602ba25SJulian Elischer 	struct thread *td;
136e602ba25SJulian Elischer 	struct ksegrp *kg;
137e602ba25SJulian Elischer 
1380d2a2989SPeter Wemm #if defined(SMP) && (defined(__i386__) || defined(__amd64__))
139cc66ebe2SPeter Wemm 	if (smp_active == 0 && PCPU_GET(cpuid) != 0) {
140cc66ebe2SPeter Wemm 		/* Shutting down, run idlethread on AP's */
141cc66ebe2SPeter Wemm 		td = PCPU_GET(idlethread);
142cc66ebe2SPeter Wemm 		ke = td->td_kse;
143cc66ebe2SPeter Wemm 		CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
144cc66ebe2SPeter Wemm 		ke->ke_flags |= KEF_DIDRUN;
145cc66ebe2SPeter Wemm 		TD_SET_RUNNING(td);
146cc66ebe2SPeter Wemm 		return (td);
147cc66ebe2SPeter Wemm 	}
148cc66ebe2SPeter Wemm #endif
149cc66ebe2SPeter Wemm 
150fe799533SAndrew Gallatin retry:
151cc66ebe2SPeter Wemm 	ke = sched_choose();
152cc66ebe2SPeter Wemm 	if (ke) {
153e602ba25SJulian Elischer 		td = ke->ke_thread;
154e602ba25SJulian Elischer 		KASSERT((td->td_kse == ke), ("kse/thread mismatch"));
155e602ba25SJulian Elischer 		kg = ke->ke_ksegrp;
156ed062c8dSJulian Elischer 		if (td->td_proc->p_flag & P_HADTHREADS) {
15733c06e1dSJulian Elischer 			if (kg->kg_last_assigned == td) {
158e602ba25SJulian Elischer 				kg->kg_last_assigned = TAILQ_PREV(td,
159e602ba25SJulian Elischer 				    threadqueue, td_runq);
16033c06e1dSJulian Elischer 			}
161d03c79eeSDavid Xu 			TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
162e602ba25SJulian Elischer 			kg->kg_runnable--;
1631a5cd27bSJulian Elischer 		}
164e602ba25SJulian Elischer 		CTR2(KTR_RUNQ, "choosethread: td=%p pri=%d",
165e602ba25SJulian Elischer 		    td, td->td_priority);
166e602ba25SJulian Elischer 	} else {
16740e55026SJulian Elischer 		/* Simulate runq_choose() having returned the idle thread */
168e602ba25SJulian Elischer 		td = PCPU_GET(idlethread);
169472be958SJulian Elischer 		ke = td->td_kse;
170e602ba25SJulian Elischer 		CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
171e602ba25SJulian Elischer 	}
172472be958SJulian Elischer 	ke->ke_flags |= KEF_DIDRUN;
17393a7aa79SJulian Elischer 
17493a7aa79SJulian Elischer 	/*
175faaa20f6SJulian Elischer 	 * If we are in panic, only allow system threads,
176faaa20f6SJulian Elischer 	 * plus the one we are running in, to be run.
17793a7aa79SJulian Elischer 	 */
178fe799533SAndrew Gallatin 	if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 &&
179faaa20f6SJulian Elischer 	    (td->td_flags & TDF_INPANIC) == 0)) {
180faaa20f6SJulian Elischer 		/* note that it is no longer on the run queue */
181faaa20f6SJulian Elischer 		TD_SET_CAN_RUN(td);
182fe799533SAndrew Gallatin 		goto retry;
183faaa20f6SJulian Elischer 	}
18493a7aa79SJulian Elischer 
18571fad9fdSJulian Elischer 	TD_SET_RUNNING(td);
186e602ba25SJulian Elischer 	return (td);
187e602ba25SJulian Elischer }
188e602ba25SJulian Elischer 
189e602ba25SJulian Elischer /*
190ed062c8dSJulian Elischer  * Given a surplus system slot, try assign a new runnable thread to it.
191ed062c8dSJulian Elischer  * Called from:
192ed062c8dSJulian Elischer  *  sched_thread_exit()  (local)
193ed062c8dSJulian Elischer  *  sched_switch()  (local)
194ed062c8dSJulian Elischer  *  sched_thread_exit()  (local)
19514f0e2e9SJulian Elischer  *  remrunqueue()  (local)  (not at the moment)
196e602ba25SJulian Elischer  */
197ed062c8dSJulian Elischer static void
198ed062c8dSJulian Elischer slot_fill(struct ksegrp *kg)
199e602ba25SJulian Elischer {
200e602ba25SJulian Elischer 	struct thread *td;
201e602ba25SJulian Elischer 
20233c06e1dSJulian Elischer 	mtx_assert(&sched_lock, MA_OWNED);
203ed062c8dSJulian Elischer 	while (kg->kg_avail_opennings > 0) {
204e602ba25SJulian Elischer 		/*
2056f8132a8SJulian Elischer 		 * Find the first unassigned thread
2066f8132a8SJulian Elischer 		 */
2075215b187SJeff Roberson 		if ((td = kg->kg_last_assigned) != NULL)
2086f8132a8SJulian Elischer 			td = TAILQ_NEXT(td, td_runq);
2095215b187SJeff Roberson 		else
2106f8132a8SJulian Elischer 			td = TAILQ_FIRST(&kg->kg_runq);
2116f8132a8SJulian Elischer 
2126f8132a8SJulian Elischer 		/*
213ed062c8dSJulian Elischer 		 * If we found one, send it to the system scheduler.
214e602ba25SJulian Elischer 		 */
215e602ba25SJulian Elischer 		if (td) {
216e602ba25SJulian Elischer 			kg->kg_last_assigned = td;
2172630e4c9SJulian Elischer 			sched_add(td, SRQ_BORING);
218ed062c8dSJulian Elischer 			CTR2(KTR_RUNQ, "slot_fill: td%p -> kg%p", td, kg);
219ed062c8dSJulian Elischer 		} else {
220ed062c8dSJulian Elischer 			/* no threads to use up the slots. quit now */
221ed062c8dSJulian Elischer 			break;
22248bfcdddSJulian Elischer 		}
223ed062c8dSJulian Elischer 	}
224d5a08a60SJake Burkholder }
225d5a08a60SJake Burkholder 
226e8807f22SJulian Elischer #ifdef	SCHED_4BSD
227e602ba25SJulian Elischer /*
228e602ba25SJulian Elischer  * Remove a thread from its KSEGRP's run queue.
229e602ba25SJulian Elischer  * This in turn may remove it from a KSE if it was already assigned
230e602ba25SJulian Elischer  * to one, possibly causing a new thread to be assigned to the KSE
2315215b187SJeff Roberson  * and the KSE getting a new priority.
232e602ba25SJulian Elischer  */
2331f955e2dSJulian Elischer static void
234b40ce416SJulian Elischer remrunqueue(struct thread *td)
235d5a08a60SJake Burkholder {
23648bfcdddSJulian Elischer 	struct thread *td2, *td3;
237e602ba25SJulian Elischer 	struct ksegrp *kg;
238e602ba25SJulian Elischer 	struct kse *ke;
239e602ba25SJulian Elischer 
240e602ba25SJulian Elischer 	mtx_assert(&sched_lock, MA_OWNED);
24171fad9fdSJulian Elischer 	KASSERT((TD_ON_RUNQ(td)), ("remrunqueue: Bad state on run queue"));
242e602ba25SJulian Elischer 	kg = td->td_ksegrp;
243e602ba25SJulian Elischer 	ke = td->td_kse;
244e602ba25SJulian Elischer 	CTR1(KTR_RUNQ, "remrunqueue: td%p", td);
24571fad9fdSJulian Elischer 	TD_SET_CAN_RUN(td);
2465215b187SJeff Roberson 	/*
2475215b187SJeff Roberson 	 * If it is not a threaded process, take the shortcut.
2485215b187SJeff Roberson 	 */
249ed062c8dSJulian Elischer 	if ((td->td_proc->p_flag & P_HADTHREADS) == 0) {
2503389af30SJulian Elischer 		/* remve from sys run queue and free up a slot */
2517cf90fb3SJeff Roberson 		sched_rem(td);
252c3b98db0SJulian Elischer 		ke->ke_state = KES_THREAD;
253e602ba25SJulian Elischer 		return;
254d5a08a60SJake Burkholder 	}
25548bfcdddSJulian Elischer    	td3 = TAILQ_PREV(td, threadqueue, td_runq);
25648bfcdddSJulian Elischer 	TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
2571a5cd27bSJulian Elischer 	kg->kg_runnable--;
258ed062c8dSJulian Elischer 	if (ke->ke_state == KES_ONRUNQ) {
259e602ba25SJulian Elischer 		/*
2603389af30SJulian Elischer 		 * This thread has been assigned to the system run queue.
261e602ba25SJulian Elischer 		 * We need to dissociate it and try assign the
262e602ba25SJulian Elischer 		 * KSE to the next available thread. Then, we should
263e602ba25SJulian Elischer 		 * see if we need to move the KSE in the run queues.
264e602ba25SJulian Elischer 		 */
2657cf90fb3SJeff Roberson 		sched_rem(td);
26693a7aa79SJulian Elischer 		ke->ke_state = KES_THREAD;
267e602ba25SJulian Elischer 		td2 = kg->kg_last_assigned;
268e602ba25SJulian Elischer 		KASSERT((td2 != NULL), ("last assigned has wrong value"));
26948bfcdddSJulian Elischer 		if (td2 == td)
270e602ba25SJulian Elischer 			kg->kg_last_assigned = td3;
2713389af30SJulian Elischer 		/* slot_fill(kg); */ /* will replace it with another */
272e602ba25SJulian Elischer 	}
273e602ba25SJulian Elischer }
274e8807f22SJulian Elischer #endif
2751f955e2dSJulian Elischer 
2761f955e2dSJulian Elischer /*
2771f955e2dSJulian Elischer  * Change the priority of a thread that is on the run queue.
2781f955e2dSJulian Elischer  */
2791f955e2dSJulian Elischer void
2801f955e2dSJulian Elischer adjustrunqueue( struct thread *td, int newpri)
2811f955e2dSJulian Elischer {
2821f955e2dSJulian Elischer 	struct ksegrp *kg;
2831f955e2dSJulian Elischer 	struct kse *ke;
2841f955e2dSJulian Elischer 
2851f955e2dSJulian Elischer 	mtx_assert(&sched_lock, MA_OWNED);
2861f955e2dSJulian Elischer 	KASSERT((TD_ON_RUNQ(td)), ("adjustrunqueue: Bad state on run queue"));
2875215b187SJeff Roberson 
2881f955e2dSJulian Elischer 	ke = td->td_kse;
2891f955e2dSJulian Elischer 	CTR1(KTR_RUNQ, "adjustrunqueue: td%p", td);
2905215b187SJeff Roberson 	/*
2915215b187SJeff Roberson 	 * If it is not a threaded process, take the shortcut.
2925215b187SJeff Roberson 	 */
293ed062c8dSJulian Elischer 	if ((td->td_proc->p_flag & P_HADTHREADS) == 0) {
2941f955e2dSJulian Elischer 		/* We only care about the kse in the run queue. */
29524c5baaeSJulian Elischer 		td->td_priority = newpri;
2961f955e2dSJulian Elischer 		if (ke->ke_rqindex != (newpri / RQ_PPQ)) {
2977cf90fb3SJeff Roberson 			sched_rem(td);
2982630e4c9SJulian Elischer 			sched_add(td, SRQ_BORING);
2991f955e2dSJulian Elischer 		}
3001f955e2dSJulian Elischer 		return;
3011f955e2dSJulian Elischer 	}
3025215b187SJeff Roberson 
3035215b187SJeff Roberson 	/* It is a threaded process */
3041f955e2dSJulian Elischer 	kg = td->td_ksegrp;
305ed062c8dSJulian Elischer 	if (ke->ke_state == KES_ONRUNQ) {
3061f955e2dSJulian Elischer 		if (kg->kg_last_assigned == td) {
3071f955e2dSJulian Elischer 			kg->kg_last_assigned =
3081f955e2dSJulian Elischer 			    TAILQ_PREV(td, threadqueue, td_runq);
3091f955e2dSJulian Elischer 		}
3107cf90fb3SJeff Roberson 		sched_rem(td);
3111f955e2dSJulian Elischer 	}
3121f955e2dSJulian Elischer 	TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
3131a5cd27bSJulian Elischer 	kg->kg_runnable--;
31414f0e2e9SJulian Elischer 	TD_SET_CAN_RUN(td);
3151f955e2dSJulian Elischer 	td->td_priority = newpri;
3162630e4c9SJulian Elischer 	setrunqueue(td, SRQ_BORING);
3171f955e2dSJulian Elischer }
318ed062c8dSJulian Elischer int limitcount;
319d5a08a60SJake Burkholder void
3202630e4c9SJulian Elischer setrunqueue(struct thread *td, int flags)
321d5a08a60SJake Burkholder {
322e602ba25SJulian Elischer 	struct ksegrp *kg;
323e602ba25SJulian Elischer 	struct thread *td2;
324e602ba25SJulian Elischer 	struct thread *tda;
325e602ba25SJulian Elischer 
326ed062c8dSJulian Elischer 	CTR3(KTR_RUNQ, "setrunqueue: td:%p kg:%p pid:%d",
327ed062c8dSJulian Elischer 	    td, td->td_ksegrp, td->td_proc->p_pid);
328e602ba25SJulian Elischer 	mtx_assert(&sched_lock, MA_OWNED);
329b2578c6cSJulian Elischer 	KASSERT((td->td_inhibitors == 0),
330b2578c6cSJulian Elischer 			("setrunqueue: trying to run inhibitted thread"));
33171fad9fdSJulian Elischer 	KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)),
33271fad9fdSJulian Elischer 	    ("setrunqueue: bad thread state"));
33371fad9fdSJulian Elischer 	TD_SET_RUNQ(td);
334e602ba25SJulian Elischer 	kg = td->td_ksegrp;
335ed062c8dSJulian Elischer 	if ((td->td_proc->p_flag & P_HADTHREADS) == 0) {
33648bfcdddSJulian Elischer 		/*
33748bfcdddSJulian Elischer 		 * Common path optimisation: Only one of everything
33848bfcdddSJulian Elischer 		 * and the KSE is always already attached.
33948bfcdddSJulian Elischer 		 * Totally ignore the ksegrp run queue.
34048bfcdddSJulian Elischer 		 */
341ed062c8dSJulian Elischer 		if (kg->kg_avail_opennings != 1) {
34254983505SJulian Elischer 			if (limitcount < 1) {
343ed062c8dSJulian Elischer 				limitcount++;
34454983505SJulian Elischer 				printf("pid %d: corrected slot count (%d->1)\n",
345ed062c8dSJulian Elischer 				    td->td_proc->p_pid, kg->kg_avail_opennings);
346ed062c8dSJulian Elischer 
347ed062c8dSJulian Elischer 			}
348ed062c8dSJulian Elischer 			kg->kg_avail_opennings = 1;
349ed062c8dSJulian Elischer 		}
3502630e4c9SJulian Elischer 		sched_add(td, flags);
35148bfcdddSJulian Elischer 		return;
35248bfcdddSJulian Elischer 	}
35348bfcdddSJulian Elischer 
35414f0e2e9SJulian Elischer 	/*
35514f0e2e9SJulian Elischer 	 * If the concurrency has reduced, and we would go in the
35614f0e2e9SJulian Elischer 	 * assigned section, then keep removing entries from the
35714f0e2e9SJulian Elischer 	 * system run queue, until we are not in that section
35814f0e2e9SJulian Elischer 	 * or there is room for us to be put in that section.
35914f0e2e9SJulian Elischer 	 * What we MUST avoid is the case where there are threads of less
36014f0e2e9SJulian Elischer 	 * priority than the new one scheduled, but it can not
36114f0e2e9SJulian Elischer 	 * be scheduled itself. That would lead to a non contiguous set
36214f0e2e9SJulian Elischer 	 * of scheduled threads, and everything would break.
36314f0e2e9SJulian Elischer 	 */
364e602ba25SJulian Elischer 	tda = kg->kg_last_assigned;
36514f0e2e9SJulian Elischer 	while ((kg->kg_avail_opennings <= 0) &&
366ed062c8dSJulian Elischer 	    (tda && (tda->td_priority > td->td_priority))) {
367e602ba25SJulian Elischer 		/*
368e602ba25SJulian Elischer 		 * None free, but there is one we can commandeer.
369e602ba25SJulian Elischer 		 */
370ed062c8dSJulian Elischer 		CTR2(KTR_RUNQ,
371ed062c8dSJulian Elischer 		    "setrunqueue: kg:%p: take slot from td: %p", kg, tda);
37294816f6dSJeff Roberson 		sched_rem(tda);
373e602ba25SJulian Elischer 		tda = kg->kg_last_assigned =
374e602ba25SJulian Elischer 		    TAILQ_PREV(tda, threadqueue, td_runq);
375d39063f2SJulian Elischer 		SLOT_RELEASE(kg);
376d5a08a60SJake Burkholder 	}
377d5a08a60SJake Burkholder 
378e602ba25SJulian Elischer 	/*
379e602ba25SJulian Elischer 	 * Add the thread to the ksegrp's run queue at
380e602ba25SJulian Elischer 	 * the appropriate place.
381e602ba25SJulian Elischer 	 */
382e602ba25SJulian Elischer 	TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
383e602ba25SJulian Elischer 		if (td2->td_priority > td->td_priority) {
3841a5cd27bSJulian Elischer 			kg->kg_runnable++;
385e602ba25SJulian Elischer 			TAILQ_INSERT_BEFORE(td2, td, td_runq);
386e602ba25SJulian Elischer 			break;
387e602ba25SJulian Elischer 		}
388e602ba25SJulian Elischer 	}
389e602ba25SJulian Elischer 	if (td2 == NULL) {
390e602ba25SJulian Elischer 		/* We ran off the end of the TAILQ or it was empty. */
3911a5cd27bSJulian Elischer 		kg->kg_runnable++;
392e602ba25SJulian Elischer 		TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq);
393e602ba25SJulian Elischer 	}
394e602ba25SJulian Elischer 
395e602ba25SJulian Elischer 	/*
396ed062c8dSJulian Elischer 	 * If we have a slot to use, then put the thread on the system
397ed062c8dSJulian Elischer 	 * run queue and if needed, readjust the last_assigned pointer.
39814f0e2e9SJulian Elischer 	 * it may be that we need to schedule something anyhow
39914f0e2e9SJulian Elischer 	 * even if the availabel slots are -ve so that
40014f0e2e9SJulian Elischer 	 * all the items < last_assigned are scheduled.
401e602ba25SJulian Elischer 	 */
402ed062c8dSJulian Elischer 	if (kg->kg_avail_opennings > 0) {
403e602ba25SJulian Elischer 		if (tda == NULL) {
404e602ba25SJulian Elischer 			/*
405e602ba25SJulian Elischer 			 * No pre-existing last assigned so whoever is first
40614f0e2e9SJulian Elischer 			 * gets the slot.. (maybe us)
407e602ba25SJulian Elischer 			 */
408e602ba25SJulian Elischer 			td2 = TAILQ_FIRST(&kg->kg_runq);
409e602ba25SJulian Elischer 			kg->kg_last_assigned = td2;
410e602ba25SJulian Elischer 		} else if (tda->td_priority > td->td_priority) {
411ed062c8dSJulian Elischer 			td2 = td;
412e602ba25SJulian Elischer 		} else {
413e602ba25SJulian Elischer 			/*
414e602ba25SJulian Elischer 			 * We are past last_assigned, so
41514f0e2e9SJulian Elischer 			 * give the next slot to whatever is next,
416e602ba25SJulian Elischer 			 * which may or may not be us.
417e602ba25SJulian Elischer 			 */
418e602ba25SJulian Elischer 			td2 = TAILQ_NEXT(tda, td_runq);
419e602ba25SJulian Elischer 			kg->kg_last_assigned = td2;
420e602ba25SJulian Elischer 		}
421ed062c8dSJulian Elischer 		sched_add(td2, flags);
422732d9528SJulian Elischer 	} else {
423732d9528SJulian Elischer 		CTR3(KTR_RUNQ, "setrunqueue: held: td%p kg%p pid%d",
424732d9528SJulian Elischer 			td, td->td_ksegrp, td->td_proc->p_pid);
425e602ba25SJulian Elischer 	}
426e602ba25SJulian Elischer }
427e602ba25SJulian Elischer 
4280c0b25aeSJohn Baldwin /*
4290c0b25aeSJohn Baldwin  * Kernel thread preemption implementation.  Critical sections mark
4300c0b25aeSJohn Baldwin  * regions of code in which preemptions are not allowed.
4310c0b25aeSJohn Baldwin  */
4327e1f6dfeSJohn Baldwin void
4337e1f6dfeSJohn Baldwin critical_enter(void)
4347e1f6dfeSJohn Baldwin {
4357e1f6dfeSJohn Baldwin 	struct thread *td;
4367e1f6dfeSJohn Baldwin 
4377e1f6dfeSJohn Baldwin 	td = curthread;
4387e1f6dfeSJohn Baldwin 	if (td->td_critnest == 0)
4391a8cfbc4SRobert Watson 		cpu_critical_enter(td);
4407e1f6dfeSJohn Baldwin 	td->td_critnest++;
4417e1f6dfeSJohn Baldwin }
4427e1f6dfeSJohn Baldwin 
4437e1f6dfeSJohn Baldwin void
4447e1f6dfeSJohn Baldwin critical_exit(void)
4457e1f6dfeSJohn Baldwin {
4467e1f6dfeSJohn Baldwin 	struct thread *td;
4477e1f6dfeSJohn Baldwin 
4487e1f6dfeSJohn Baldwin 	td = curthread;
449b209e5e3SJeff Roberson 	KASSERT(td->td_critnest != 0,
450b209e5e3SJeff Roberson 	    ("critical_exit: td_critnest == 0"));
4517e1f6dfeSJohn Baldwin 	if (td->td_critnest == 1) {
4520c0b25aeSJohn Baldwin #ifdef PREEMPTION
45352eb8464SJohn Baldwin 		mtx_assert(&sched_lock, MA_NOTOWNED);
45452eb8464SJohn Baldwin 		if (td->td_pflags & TDP_OWEPREEMPT) {
4550c0b25aeSJohn Baldwin 			mtx_lock_spin(&sched_lock);
4560c0b25aeSJohn Baldwin 			mi_switch(SW_INVOL, NULL);
4570c0b25aeSJohn Baldwin 			mtx_unlock_spin(&sched_lock);
4580c0b25aeSJohn Baldwin 		}
4590c0b25aeSJohn Baldwin #endif
4607e1f6dfeSJohn Baldwin 		td->td_critnest = 0;
4611a8cfbc4SRobert Watson 		cpu_critical_exit(td);
462d74ac681SMatthew Dillon 	} else {
4637e1f6dfeSJohn Baldwin 		td->td_critnest--;
4647e1f6dfeSJohn Baldwin 	}
465d74ac681SMatthew Dillon }
4667e1f6dfeSJohn Baldwin 
4670c0b25aeSJohn Baldwin /*
4680c0b25aeSJohn Baldwin  * This function is called when a thread is about to be put on run queue
4690c0b25aeSJohn Baldwin  * because it has been made runnable or its priority has been adjusted.  It
4700c0b25aeSJohn Baldwin  * determines if the new thread should be immediately preempted to.  If so,
4710c0b25aeSJohn Baldwin  * it switches to it and eventually returns true.  If not, it returns false
4720c0b25aeSJohn Baldwin  * so that the caller may place the thread on an appropriate run queue.
4730c0b25aeSJohn Baldwin  */
4740c0b25aeSJohn Baldwin int
4750c0b25aeSJohn Baldwin maybe_preempt(struct thread *td)
4760c0b25aeSJohn Baldwin {
4778b44a2e2SMarcel Moolenaar #ifdef PREEMPTION
4780c0b25aeSJohn Baldwin 	struct thread *ctd;
4790c0b25aeSJohn Baldwin 	int cpri, pri;
4808b44a2e2SMarcel Moolenaar #endif
4810c0b25aeSJohn Baldwin 
4820c0b25aeSJohn Baldwin 	mtx_assert(&sched_lock, MA_OWNED);
4830c0b25aeSJohn Baldwin #ifdef PREEMPTION
4840c0b25aeSJohn Baldwin 	/*
4850c0b25aeSJohn Baldwin 	 * The new thread should not preempt the current thread if any of the
4860c0b25aeSJohn Baldwin 	 * following conditions are true:
4870c0b25aeSJohn Baldwin 	 *
48852eb8464SJohn Baldwin 	 *  - The current thread has a higher (numerically lower) or
48952eb8464SJohn Baldwin 	 *    equivalent priority.  Note that this prevents curthread from
49052eb8464SJohn Baldwin 	 *    trying to preempt to itself.
4910c0b25aeSJohn Baldwin 	 *  - It is too early in the boot for context switches (cold is set).
4920c0b25aeSJohn Baldwin 	 *  - The current thread has an inhibitor set or is in the process of
4930c0b25aeSJohn Baldwin 	 *    exiting.  In this case, the current thread is about to switch
4940c0b25aeSJohn Baldwin 	 *    out anyways, so there's no point in preempting.  If we did,
4950c0b25aeSJohn Baldwin 	 *    the current thread would not be properly resumed as well, so
4960c0b25aeSJohn Baldwin 	 *    just avoid that whole landmine.
4970c0b25aeSJohn Baldwin 	 *  - If the new thread's priority is not a realtime priority and
4980c0b25aeSJohn Baldwin 	 *    the current thread's priority is not an idle priority and
4990c0b25aeSJohn Baldwin 	 *    FULL_PREEMPTION is disabled.
5000c0b25aeSJohn Baldwin 	 *
5010c0b25aeSJohn Baldwin 	 * If all of these conditions are false, but the current thread is in
5020c0b25aeSJohn Baldwin 	 * a nested critical section, then we have to defer the preemption
5030c0b25aeSJohn Baldwin 	 * until we exit the critical section.  Otherwise, switch immediately
5040c0b25aeSJohn Baldwin 	 * to the new thread.
5050c0b25aeSJohn Baldwin 	 */
5060c0b25aeSJohn Baldwin 	ctd = curthread;
5076a574b2aSJulian Elischer 	KASSERT ((ctd->td_kse != NULL && ctd->td_kse->ke_thread == ctd),
5086a574b2aSJulian Elischer 	  ("thread has no (or wrong) sched-private part."));
509b2578c6cSJulian Elischer 	KASSERT((td->td_inhibitors == 0),
510b2578c6cSJulian Elischer 			("maybe_preempt: trying to run inhibitted thread"));
5110c0b25aeSJohn Baldwin 	pri = td->td_priority;
5120c0b25aeSJohn Baldwin 	cpri = ctd->td_priority;
5130c0b25aeSJohn Baldwin 	if (pri >= cpri || cold /* || dumping */ || TD_IS_INHIBITED(ctd) ||
5140c0b25aeSJohn Baldwin 	    td->td_kse->ke_state != KES_THREAD)
5150c0b25aeSJohn Baldwin 		return (0);
5160c0b25aeSJohn Baldwin #ifndef FULL_PREEMPTION
5170c0b25aeSJohn Baldwin 	if (!(pri >= PRI_MIN_ITHD && pri <= PRI_MAX_ITHD) &&
5180c0b25aeSJohn Baldwin 	    !(cpri >= PRI_MIN_IDLE))
5190c0b25aeSJohn Baldwin 		return (0);
5200c0b25aeSJohn Baldwin #endif
5210c0b25aeSJohn Baldwin 	if (ctd->td_critnest > 1) {
5220c0b25aeSJohn Baldwin 		CTR1(KTR_PROC, "maybe_preempt: in critical section %d",
5230c0b25aeSJohn Baldwin 		    ctd->td_critnest);
52452eb8464SJohn Baldwin 		ctd->td_pflags |= TDP_OWEPREEMPT;
5250c0b25aeSJohn Baldwin 		return (0);
5260c0b25aeSJohn Baldwin 	}
5270c0b25aeSJohn Baldwin 
5280c0b25aeSJohn Baldwin 	/*
529c20c691bSJulian Elischer 	 * Thread is runnable but not yet put on system run queue.
5300c0b25aeSJohn Baldwin 	 */
5310c0b25aeSJohn Baldwin 	MPASS(TD_ON_RUNQ(td));
5321f9f5df6SJulian Elischer 	MPASS(td->td_sched->ke_state != KES_ONRUNQ);
5331f9f5df6SJulian Elischer 	if (td->td_proc->p_flag & P_HADTHREADS) {
5341f9f5df6SJulian Elischer 		/*
5351f9f5df6SJulian Elischer 		 * If this is a threaded process we actually ARE on the
5361f9f5df6SJulian Elischer 		 * ksegrp run queue so take it off that first.
5379da3e923SJulian Elischer 		 * Also undo any damage done to the last_assigned pointer.
5389da3e923SJulian Elischer 		 * XXX Fix setrunqueue so this isn't needed
5391f9f5df6SJulian Elischer 		 */
5409da3e923SJulian Elischer 		struct ksegrp *kg;
5419da3e923SJulian Elischer 
5429da3e923SJulian Elischer 		kg = td->td_ksegrp;
5439da3e923SJulian Elischer 		if (kg->kg_last_assigned == td)
5449da3e923SJulian Elischer 			kg->kg_last_assigned =
5459da3e923SJulian Elischer 			    TAILQ_PREV(td, threadqueue, td_runq);
5469da3e923SJulian Elischer 		TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
5471f9f5df6SJulian Elischer 	}
5481f9f5df6SJulian Elischer 
5490c0b25aeSJohn Baldwin 	TD_SET_RUNNING(td);
5500c0b25aeSJohn Baldwin 	CTR3(KTR_PROC, "preempting to thread %p (pid %d, %s)\n", td,
5510c0b25aeSJohn Baldwin 	    td->td_proc->p_pid, td->td_proc->p_comm);
552c20c691bSJulian Elischer 	mi_switch(SW_INVOL|SW_PREEMPT, td);
5530c0b25aeSJohn Baldwin 	return (1);
5540c0b25aeSJohn Baldwin #else
5550c0b25aeSJohn Baldwin 	return (0);
5560c0b25aeSJohn Baldwin #endif
5570c0b25aeSJohn Baldwin }
5580c0b25aeSJohn Baldwin 
55944fe3c1fSJohn Baldwin #if 0
5600c0b25aeSJohn Baldwin #ifndef PREEMPTION
5610c0b25aeSJohn Baldwin /* XXX: There should be a non-static version of this. */
5620c0b25aeSJohn Baldwin static void
5630c0b25aeSJohn Baldwin printf_caddr_t(void *data)
5640c0b25aeSJohn Baldwin {
5650c0b25aeSJohn Baldwin 	printf("%s", (char *)data);
5660c0b25aeSJohn Baldwin }
5670c0b25aeSJohn Baldwin static char preempt_warning[] =
5680c0b25aeSJohn Baldwin     "WARNING: Kernel preemption is disabled, expect reduced performance.\n";
5690c0b25aeSJohn Baldwin SYSINIT(preempt_warning, SI_SUB_COPYRIGHT, SI_ORDER_ANY, printf_caddr_t,
5700c0b25aeSJohn Baldwin     preempt_warning)
5710c0b25aeSJohn Baldwin #endif
57244fe3c1fSJohn Baldwin #endif
573e602ba25SJulian Elischer 
574e602ba25SJulian Elischer /************************************************************************
575e602ba25SJulian Elischer  * SYSTEM RUN QUEUE manipulations and tests				*
576e602ba25SJulian Elischer  ************************************************************************/
577e602ba25SJulian Elischer /*
578e602ba25SJulian Elischer  * Initialize a run structure.
579e602ba25SJulian Elischer  */
580e602ba25SJulian Elischer void
581e602ba25SJulian Elischer runq_init(struct runq *rq)
582e602ba25SJulian Elischer {
583e602ba25SJulian Elischer 	int i;
584e602ba25SJulian Elischer 
585e602ba25SJulian Elischer 	bzero(rq, sizeof *rq);
586e602ba25SJulian Elischer 	for (i = 0; i < RQ_NQS; i++)
587e602ba25SJulian Elischer 		TAILQ_INIT(&rq->rq_queues[i]);
588e602ba25SJulian Elischer }
589e602ba25SJulian Elischer 
590d5a08a60SJake Burkholder /*
591d5a08a60SJake Burkholder  * Clear the status bit of the queue corresponding to priority level pri,
592d5a08a60SJake Burkholder  * indicating that it is empty.
593d5a08a60SJake Burkholder  */
594d5a08a60SJake Burkholder static __inline void
595d5a08a60SJake Burkholder runq_clrbit(struct runq *rq, int pri)
596d5a08a60SJake Burkholder {
597d5a08a60SJake Burkholder 	struct rqbits *rqb;
598d5a08a60SJake Burkholder 
599d5a08a60SJake Burkholder 	rqb = &rq->rq_status;
600d5a08a60SJake Burkholder 	CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d",
601d5a08a60SJake Burkholder 	    rqb->rqb_bits[RQB_WORD(pri)],
602d5a08a60SJake Burkholder 	    rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri),
603d5a08a60SJake Burkholder 	    RQB_BIT(pri), RQB_WORD(pri));
604d5a08a60SJake Burkholder 	rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri);
605d5a08a60SJake Burkholder }
606d5a08a60SJake Burkholder 
607d5a08a60SJake Burkholder /*
608d5a08a60SJake Burkholder  * Find the index of the first non-empty run queue.  This is done by
609d5a08a60SJake Burkholder  * scanning the status bits, a set bit indicates a non-empty queue.
610d5a08a60SJake Burkholder  */
611d5a08a60SJake Burkholder static __inline int
612d5a08a60SJake Burkholder runq_findbit(struct runq *rq)
613d5a08a60SJake Burkholder {
614d5a08a60SJake Burkholder 	struct rqbits *rqb;
615d5a08a60SJake Burkholder 	int pri;
616d5a08a60SJake Burkholder 	int i;
617d5a08a60SJake Burkholder 
618d5a08a60SJake Burkholder 	rqb = &rq->rq_status;
619d5a08a60SJake Burkholder 	for (i = 0; i < RQB_LEN; i++)
620d5a08a60SJake Burkholder 		if (rqb->rqb_bits[i]) {
6212f9267ecSPeter Wemm 			pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW);
622d5a08a60SJake Burkholder 			CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d",
623d5a08a60SJake Burkholder 			    rqb->rqb_bits[i], i, pri);
624d5a08a60SJake Burkholder 			return (pri);
625d5a08a60SJake Burkholder 		}
626d5a08a60SJake Burkholder 
627d5a08a60SJake Burkholder 	return (-1);
628d5a08a60SJake Burkholder }
629d5a08a60SJake Burkholder 
630d5a08a60SJake Burkholder /*
631d5a08a60SJake Burkholder  * Set the status bit of the queue corresponding to priority level pri,
632d5a08a60SJake Burkholder  * indicating that it is non-empty.
633d5a08a60SJake Burkholder  */
634d5a08a60SJake Burkholder static __inline void
635d5a08a60SJake Burkholder runq_setbit(struct runq *rq, int pri)
636d5a08a60SJake Burkholder {
637d5a08a60SJake Burkholder 	struct rqbits *rqb;
638d5a08a60SJake Burkholder 
639d5a08a60SJake Burkholder 	rqb = &rq->rq_status;
640d5a08a60SJake Burkholder 	CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d",
641d5a08a60SJake Burkholder 	    rqb->rqb_bits[RQB_WORD(pri)],
642d5a08a60SJake Burkholder 	    rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri),
643d5a08a60SJake Burkholder 	    RQB_BIT(pri), RQB_WORD(pri));
644d5a08a60SJake Burkholder 	rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri);
645d5a08a60SJake Burkholder }
646d5a08a60SJake Burkholder 
647d5a08a60SJake Burkholder /*
648e602ba25SJulian Elischer  * Add the KSE to the queue specified by its priority, and set the
649d5a08a60SJake Burkholder  * corresponding status bit.
650d5a08a60SJake Burkholder  */
651d5a08a60SJake Burkholder void
652c20c691bSJulian Elischer runq_add(struct runq *rq, struct kse *ke, int flags)
653d5a08a60SJake Burkholder {
654d5a08a60SJake Burkholder 	struct rqhead *rqh;
655d5a08a60SJake Burkholder 	int pri;
656dba6c5a6SPeter Wemm 
6572c100766SJulian Elischer 	pri = ke->ke_thread->td_priority / RQ_PPQ;
658b40ce416SJulian Elischer 	ke->ke_rqindex = pri;
659d5a08a60SJake Burkholder 	runq_setbit(rq, pri);
660d5a08a60SJake Burkholder 	rqh = &rq->rq_queues[pri];
661732d9528SJulian Elischer 	CTR5(KTR_RUNQ, "runq_add: td=%p ke=%p pri=%d %d rqh=%p",
662732d9528SJulian Elischer 	    ke->ke_thread, ke, ke->ke_thread->td_priority, pri, rqh);
663c20c691bSJulian Elischer 	if (flags & SRQ_PREEMPTED) {
664c20c691bSJulian Elischer 		TAILQ_INSERT_HEAD(rqh, ke, ke_procq);
665c20c691bSJulian Elischer 	} else {
666b40ce416SJulian Elischer 		TAILQ_INSERT_TAIL(rqh, ke, ke_procq);
667dba6c5a6SPeter Wemm 	}
668c20c691bSJulian Elischer }
669d5a08a60SJake Burkholder 
670d5a08a60SJake Burkholder /*
671d5a08a60SJake Burkholder  * Return true if there are runnable processes of any priority on the run
672d5a08a60SJake Burkholder  * queue, false otherwise.  Has no side effects, does not modify the run
673d5a08a60SJake Burkholder  * queue structure.
674d5a08a60SJake Burkholder  */
675d5a08a60SJake Burkholder int
676d5a08a60SJake Burkholder runq_check(struct runq *rq)
677d5a08a60SJake Burkholder {
678d5a08a60SJake Burkholder 	struct rqbits *rqb;
679d5a08a60SJake Burkholder 	int i;
680d5a08a60SJake Burkholder 
681d5a08a60SJake Burkholder 	rqb = &rq->rq_status;
682d5a08a60SJake Burkholder 	for (i = 0; i < RQB_LEN; i++)
683d5a08a60SJake Burkholder 		if (rqb->rqb_bits[i]) {
684d5a08a60SJake Burkholder 			CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d",
685d5a08a60SJake Burkholder 			    rqb->rqb_bits[i], i);
686d5a08a60SJake Burkholder 			return (1);
687dba6c5a6SPeter Wemm 		}
688d5a08a60SJake Burkholder 	CTR0(KTR_RUNQ, "runq_check: empty");
689d5a08a60SJake Burkholder 
690d5a08a60SJake Burkholder 	return (0);
691dba6c5a6SPeter Wemm }
692d5a08a60SJake Burkholder 
6936804a3abSJulian Elischer #if defined(SMP) && defined(SCHED_4BSD)
6946804a3abSJulian Elischer int runq_fuzz = 1;
6956804a3abSJulian Elischer SYSCTL_INT(_kern_sched, OID_AUTO, runq_fuzz, CTLFLAG_RW, &runq_fuzz, 0, "");
6966804a3abSJulian Elischer #endif
6976804a3abSJulian Elischer 
698d5a08a60SJake Burkholder /*
699b43179fbSJeff Roberson  * Find the highest priority process on the run queue.
700d5a08a60SJake Burkholder  */
701b40ce416SJulian Elischer struct kse *
702d5a08a60SJake Burkholder runq_choose(struct runq *rq)
703d5a08a60SJake Burkholder {
704d5a08a60SJake Burkholder 	struct rqhead *rqh;
705b40ce416SJulian Elischer 	struct kse *ke;
706d5a08a60SJake Burkholder 	int pri;
707d5a08a60SJake Burkholder 
708d5a08a60SJake Burkholder 	mtx_assert(&sched_lock, MA_OWNED);
709e602ba25SJulian Elischer 	while ((pri = runq_findbit(rq)) != -1) {
710d5a08a60SJake Burkholder 		rqh = &rq->rq_queues[pri];
7116804a3abSJulian Elischer #if defined(SMP) && defined(SCHED_4BSD)
7126804a3abSJulian Elischer 		/* fuzz == 1 is normal.. 0 or less are ignored */
7136804a3abSJulian Elischer 		if (runq_fuzz > 1) {
7146804a3abSJulian Elischer 			/*
7156804a3abSJulian Elischer 			 * In the first couple of entries, check if
7166804a3abSJulian Elischer 			 * there is one for our CPU as a preference.
7176804a3abSJulian Elischer 			 */
7186804a3abSJulian Elischer 			int count = runq_fuzz;
7196804a3abSJulian Elischer 			int cpu = PCPU_GET(cpuid);
7206804a3abSJulian Elischer 			struct kse *ke2;
7216804a3abSJulian Elischer 			ke2 = ke = TAILQ_FIRST(rqh);
7226804a3abSJulian Elischer 
7236804a3abSJulian Elischer 			while (count-- && ke2) {
7246804a3abSJulian Elischer 				if (ke->ke_thread->td_lastcpu == cpu) {
7256804a3abSJulian Elischer 					ke = ke2;
7266804a3abSJulian Elischer 					break;
7276804a3abSJulian Elischer 				}
7286804a3abSJulian Elischer 				ke2 = TAILQ_NEXT(ke2, ke_procq);
7296804a3abSJulian Elischer 			}
7306804a3abSJulian Elischer 		} else
7316804a3abSJulian Elischer #endif
732b40ce416SJulian Elischer 			ke = TAILQ_FIRST(rqh);
733b40ce416SJulian Elischer 		KASSERT(ke != NULL, ("runq_choose: no proc on busy queue"));
734e602ba25SJulian Elischer 		CTR3(KTR_RUNQ,
735e602ba25SJulian Elischer 		    "runq_choose: pri=%d kse=%p rqh=%p", pri, ke, rqh);
736b40ce416SJulian Elischer 		return (ke);
737d5a08a60SJake Burkholder 	}
738d5a08a60SJake Burkholder 	CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri);
739d5a08a60SJake Burkholder 
740e602ba25SJulian Elischer 	return (NULL);
741d5a08a60SJake Burkholder }
742d5a08a60SJake Burkholder 
743d5a08a60SJake Burkholder /*
744e602ba25SJulian Elischer  * Remove the KSE from the queue specified by its priority, and clear the
745d5a08a60SJake Burkholder  * corresponding status bit if the queue becomes empty.
746e602ba25SJulian Elischer  * Caller must set ke->ke_state afterwards.
747d5a08a60SJake Burkholder  */
748d5a08a60SJake Burkholder void
749b40ce416SJulian Elischer runq_remove(struct runq *rq, struct kse *ke)
750d5a08a60SJake Burkholder {
751d5a08a60SJake Burkholder 	struct rqhead *rqh;
752d5a08a60SJake Burkholder 	int pri;
753d5a08a60SJake Burkholder 
7549eb881f8SSeigo Tanimura 	KASSERT(ke->ke_proc->p_sflag & PS_INMEM,
7559eb881f8SSeigo Tanimura 		("runq_remove: process swapped out"));
756b40ce416SJulian Elischer 	pri = ke->ke_rqindex;
757d5a08a60SJake Burkholder 	rqh = &rq->rq_queues[pri];
758732d9528SJulian Elischer 	CTR5(KTR_RUNQ, "runq_remove: td=%p, ke=%p pri=%d %d rqh=%p",
759732d9528SJulian Elischer 	    ke->ke_thread, ke, ke->ke_thread->td_priority, pri, rqh);
760b40ce416SJulian Elischer 	KASSERT(ke != NULL, ("runq_remove: no proc on busy queue"));
761b40ce416SJulian Elischer 	TAILQ_REMOVE(rqh, ke, ke_procq);
762d5a08a60SJake Burkholder 	if (TAILQ_EMPTY(rqh)) {
763d5a08a60SJake Burkholder 		CTR0(KTR_RUNQ, "runq_remove: empty");
764d5a08a60SJake Burkholder 		runq_clrbit(rq, pri);
765d5a08a60SJake Burkholder 	}
766dba6c5a6SPeter Wemm }
767e602ba25SJulian Elischer 
768ed062c8dSJulian Elischer /****** functions that are temporarily here ***********/
769ed062c8dSJulian Elischer #include <vm/uma.h>
770ed062c8dSJulian Elischer #define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start))
771ed062c8dSJulian Elischer extern struct mtx kse_zombie_lock;
772ed062c8dSJulian Elischer 
773ed062c8dSJulian Elischer /*
774ed062c8dSJulian Elischer  *  Allocate scheduler specific per-process resources.
775ed062c8dSJulian Elischer  * The thread and ksegrp have already been linked in.
776ed062c8dSJulian Elischer  * In this case just set the default concurrency value.
777ed062c8dSJulian Elischer  *
778ed062c8dSJulian Elischer  * Called from:
779ed062c8dSJulian Elischer  *  proc_init() (UMA init method)
780ed062c8dSJulian Elischer  */
781ed062c8dSJulian Elischer void
782ed062c8dSJulian Elischer sched_newproc(struct proc *p, struct ksegrp *kg, struct thread *td)
783ed062c8dSJulian Elischer {
784ed062c8dSJulian Elischer 
785ed062c8dSJulian Elischer 	/* This can go in sched_fork */
786ed062c8dSJulian Elischer 	sched_init_concurrency(kg);
787ed062c8dSJulian Elischer }
788ed062c8dSJulian Elischer 
789ed062c8dSJulian Elischer #define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start))
790ed062c8dSJulian Elischer /*
791ed062c8dSJulian Elischer  * thread is being either created or recycled.
792ed062c8dSJulian Elischer  * Fix up the per-scheduler resources associated with it.
793ed062c8dSJulian Elischer  * Called from:
794ed062c8dSJulian Elischer  *  sched_fork_thread()
795ed062c8dSJulian Elischer  *  thread_dtor()  (*may go away)
796ed062c8dSJulian Elischer  *  thread_init()  (*may go away)
797ed062c8dSJulian Elischer  */
798ed062c8dSJulian Elischer void
799ed062c8dSJulian Elischer sched_newthread(struct thread *td)
800ed062c8dSJulian Elischer {
801ed062c8dSJulian Elischer 	struct td_sched *ke;
802ed062c8dSJulian Elischer 
803ed062c8dSJulian Elischer 	ke = (struct td_sched *) (td + 1);
804ed062c8dSJulian Elischer 	bzero(ke, sizeof(*ke));
805ed062c8dSJulian Elischer 	td->td_sched     = ke;
806ed062c8dSJulian Elischer 	ke->ke_thread	= td;
807ed062c8dSJulian Elischer 	ke->ke_oncpu	= NOCPU;
808ed062c8dSJulian Elischer 	ke->ke_state	= KES_THREAD;
809ed062c8dSJulian Elischer }
810ed062c8dSJulian Elischer 
811ed062c8dSJulian Elischer /*
812ed062c8dSJulian Elischer  * Set up an initial concurrency of 1
813ed062c8dSJulian Elischer  * and set the given thread (if given) to be using that
814ed062c8dSJulian Elischer  * concurrency slot.
815ed062c8dSJulian Elischer  * May be used "offline"..before the ksegrp is attached to the world
816ed062c8dSJulian Elischer  * and thus wouldn't need schedlock in that case.
817ed062c8dSJulian Elischer  * Called from:
818ed062c8dSJulian Elischer  *  thr_create()
819ed062c8dSJulian Elischer  *  proc_init() (UMA) via sched_newproc()
820ed062c8dSJulian Elischer  */
821ed062c8dSJulian Elischer void
822ed062c8dSJulian Elischer sched_init_concurrency(struct ksegrp *kg)
823ed062c8dSJulian Elischer {
824ed062c8dSJulian Elischer 
825d39063f2SJulian Elischer 	CTR1(KTR_RUNQ,"kg %p init slots and concurrency to 1", kg);
826ed062c8dSJulian Elischer 	kg->kg_concurrency = 1;
827ed062c8dSJulian Elischer 	kg->kg_avail_opennings = 1;
828ed062c8dSJulian Elischer }
829ed062c8dSJulian Elischer 
830ed062c8dSJulian Elischer /*
831ed062c8dSJulian Elischer  * Change the concurrency of an existing ksegrp to N
832ed062c8dSJulian Elischer  * Called from:
833ed062c8dSJulian Elischer  *  kse_create()
834ed062c8dSJulian Elischer  *  kse_exit()
835ed062c8dSJulian Elischer  *  thread_exit()
836ed062c8dSJulian Elischer  *  thread_single()
837ed062c8dSJulian Elischer  */
838ed062c8dSJulian Elischer void
839ed062c8dSJulian Elischer sched_set_concurrency(struct ksegrp *kg, int concurrency)
840ed062c8dSJulian Elischer {
841ed062c8dSJulian Elischer 
842d39063f2SJulian Elischer 	CTR4(KTR_RUNQ,"kg %p set concurrency to %d, slots %d -> %d",
843d39063f2SJulian Elischer 	    kg,
844d39063f2SJulian Elischer 	    concurrency,
845d39063f2SJulian Elischer 	    kg->kg_avail_opennings,
846d39063f2SJulian Elischer 	    kg->kg_avail_opennings + (concurrency - kg->kg_concurrency));
847ed062c8dSJulian Elischer 	kg->kg_avail_opennings += (concurrency - kg->kg_concurrency);
848ed062c8dSJulian Elischer 	kg->kg_concurrency = concurrency;
849ed062c8dSJulian Elischer }
850ed062c8dSJulian Elischer 
851ed062c8dSJulian Elischer /*
852ed062c8dSJulian Elischer  * Called from thread_exit() for all exiting thread
853ed062c8dSJulian Elischer  *
854ed062c8dSJulian Elischer  * Not to be confused with sched_exit_thread()
855ed062c8dSJulian Elischer  * that is only called from thread_exit() for threads exiting
856ed062c8dSJulian Elischer  * without the rest of the process exiting because it is also called from
857ed062c8dSJulian Elischer  * sched_exit() and we wouldn't want to call it twice.
858ed062c8dSJulian Elischer  * XXX This can probably be fixed.
859ed062c8dSJulian Elischer  */
860ed062c8dSJulian Elischer void
861ed062c8dSJulian Elischer sched_thread_exit(struct thread *td)
862ed062c8dSJulian Elischer {
863ed062c8dSJulian Elischer 
864d39063f2SJulian Elischer 	SLOT_RELEASE(td->td_ksegrp);
865ed062c8dSJulian Elischer 	slot_fill(td->td_ksegrp);
866ed062c8dSJulian Elischer }
867ed062c8dSJulian Elischer 
868ed062c8dSJulian Elischer #endif /* KERN_SWITCH_INCLUDE */
869