xref: /freebsd/sys/kern/kern_switch.c (revision 9923b511ed09f7e9aff331c1de463c09bf9af55e)
1dba6c5a6SPeter Wemm /*
2d5a08a60SJake Burkholder  * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
3d5a08a60SJake Burkholder  * All rights reserved.
4dba6c5a6SPeter Wemm  *
5dba6c5a6SPeter Wemm  * Redistribution and use in source and binary forms, with or without
6dba6c5a6SPeter Wemm  * modification, are permitted provided that the following conditions
7dba6c5a6SPeter Wemm  * are met:
8dba6c5a6SPeter Wemm  * 1. Redistributions of source code must retain the above copyright
9dba6c5a6SPeter Wemm  *    notice, this list of conditions and the following disclaimer.
10dba6c5a6SPeter Wemm  * 2. Redistributions in binary form must reproduce the above copyright
11dba6c5a6SPeter Wemm  *    notice, this list of conditions and the following disclaimer in the
12dba6c5a6SPeter Wemm  *    documentation and/or other materials provided with the distribution.
13dba6c5a6SPeter Wemm  *
14dba6c5a6SPeter Wemm  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15dba6c5a6SPeter Wemm  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16dba6c5a6SPeter Wemm  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17dba6c5a6SPeter Wemm  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18dba6c5a6SPeter Wemm  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19dba6c5a6SPeter Wemm  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20dba6c5a6SPeter Wemm  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21dba6c5a6SPeter Wemm  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22dba6c5a6SPeter Wemm  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23dba6c5a6SPeter Wemm  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24dba6c5a6SPeter Wemm  * SUCH DAMAGE.
25dba6c5a6SPeter Wemm  */
26dba6c5a6SPeter Wemm 
27e602ba25SJulian Elischer /***
28e602ba25SJulian Elischer Here is the logic..
29e602ba25SJulian Elischer 
30e602ba25SJulian Elischer If there are N processors, then there are at most N KSEs (kernel
31e602ba25SJulian Elischer schedulable entities) working to process threads that belong to a
3209a4a69cSRobert Watson KSEGROUP (kg). If there are X of these KSEs actually running at the
33e602ba25SJulian Elischer moment in question, then there are at most M (N-X) of these KSEs on
34e602ba25SJulian Elischer the run queue, as running KSEs are not on the queue.
35e602ba25SJulian Elischer 
36e602ba25SJulian Elischer Runnable threads are queued off the KSEGROUP in priority order.
37e602ba25SJulian Elischer If there are M or more threads runnable, the top M threads
38e602ba25SJulian Elischer (by priority) are 'preassigned' to the M KSEs not running. The KSEs take
39e602ba25SJulian Elischer their priority from those threads and are put on the run queue.
40e602ba25SJulian Elischer 
41e602ba25SJulian Elischer The last thread that had a priority high enough to have a KSE associated
42e602ba25SJulian Elischer with it, AND IS ON THE RUN QUEUE is pointed to by
43e602ba25SJulian Elischer kg->kg_last_assigned. If no threads queued off the KSEGROUP have KSEs
44e602ba25SJulian Elischer assigned as all the available KSEs are activly running, or because there
45e602ba25SJulian Elischer are no threads queued, that pointer is NULL.
46e602ba25SJulian Elischer 
47e602ba25SJulian Elischer When a KSE is removed from the run queue to become runnable, we know
48e602ba25SJulian Elischer it was associated with the highest priority thread in the queue (at the head
49e602ba25SJulian Elischer of the queue). If it is also the last assigned we know M was 1 and must
50e602ba25SJulian Elischer now be 0. Since the thread is no longer queued that pointer must be
51e602ba25SJulian Elischer removed from it. Since we know there were no more KSEs available,
52e602ba25SJulian Elischer (M was 1 and is now 0) and since we are not FREEING our KSE
53e602ba25SJulian Elischer but using it, we know there are STILL no more KSEs available, we can prove
54e602ba25SJulian Elischer that the next thread in the ksegrp list will not have a KSE to assign to
55e602ba25SJulian Elischer it, so we can show that the pointer must be made 'invalid' (NULL).
56e602ba25SJulian Elischer 
57e602ba25SJulian Elischer The pointer exists so that when a new thread is made runnable, it can
58e602ba25SJulian Elischer have its priority compared with the last assigned thread to see if
59e602ba25SJulian Elischer it should 'steal' its KSE or not.. i.e. is it 'earlier'
60e602ba25SJulian Elischer on the list than that thread or later.. If it's earlier, then the KSE is
61e602ba25SJulian Elischer removed from the last assigned (which is now not assigned a KSE)
62e602ba25SJulian Elischer and reassigned to the new thread, which is placed earlier in the list.
63e602ba25SJulian Elischer The pointer is then backed up to the previous thread (which may or may not
64e602ba25SJulian Elischer be the new thread).
65e602ba25SJulian Elischer 
66e602ba25SJulian Elischer When a thread sleeps or is removed, the KSE becomes available and if there
67e602ba25SJulian Elischer are queued threads that are not assigned KSEs, the highest priority one of
68e602ba25SJulian Elischer them is assigned the KSE, which is then placed back on the run queue at
69e602ba25SJulian Elischer the approipriate place, and the kg->kg_last_assigned pointer is adjusted down
70e602ba25SJulian Elischer to point to it.
71e602ba25SJulian Elischer 
72e602ba25SJulian Elischer The following diagram shows 2 KSEs and 3 threads from a single process.
73e602ba25SJulian Elischer 
74e602ba25SJulian Elischer  RUNQ: --->KSE---KSE--...    (KSEs queued at priorities from threads)
75e602ba25SJulian Elischer               \    \____
76e602ba25SJulian Elischer                \        \
77e602ba25SJulian Elischer     KSEGROUP---thread--thread--thread    (queued in priority order)
78e602ba25SJulian Elischer         \                 /
79e602ba25SJulian Elischer          \_______________/
80e602ba25SJulian Elischer           (last_assigned)
81e602ba25SJulian Elischer 
82e602ba25SJulian Elischer The result of this scheme is that the M available KSEs are always
83e602ba25SJulian Elischer queued at the priorities they have inherrited from the M highest priority
84e602ba25SJulian Elischer threads for that KSEGROUP. If this situation changes, the KSEs are
85e602ba25SJulian Elischer reassigned to keep this true.
86677b542eSDavid E. O'Brien ***/
87e602ba25SJulian Elischer 
88677b542eSDavid E. O'Brien #include <sys/cdefs.h>
89677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$");
90e602ba25SJulian Elischer 
916804a3abSJulian Elischer #include "opt_sched.h"
920c0b25aeSJohn Baldwin 
93dba6c5a6SPeter Wemm #include <sys/param.h>
94dba6c5a6SPeter Wemm #include <sys/systm.h>
952d50560aSMarcel Moolenaar #include <sys/kdb.h>
96dba6c5a6SPeter Wemm #include <sys/kernel.h>
970384fff8SJason Evans #include <sys/ktr.h>
98f34fa851SJohn Baldwin #include <sys/lock.h>
9935e0e5b3SJohn Baldwin #include <sys/mutex.h>
100dba6c5a6SPeter Wemm #include <sys/proc.h>
101dba6c5a6SPeter Wemm #include <sys/queue.h>
102b43179fbSJeff Roberson #include <sys/sched.h>
1030d2a2989SPeter Wemm #if defined(SMP) && (defined(__i386__) || defined(__amd64__))
104cc66ebe2SPeter Wemm #include <sys/smp.h>
105cc66ebe2SPeter Wemm #endif
106182da820SMatthew Dillon #include <machine/critical.h>
1076804a3abSJulian Elischer #if defined(SMP) && defined(SCHED_4BSD)
1086804a3abSJulian Elischer #include <sys/sysctl.h>
1096804a3abSJulian Elischer #endif
1106804a3abSJulian Elischer 
1119923b511SScott Long #ifdef FULL_PREEMPTION
1129923b511SScott Long #ifndef PREEMPTION
1139923b511SScott Long #error "The FULL_PREEMPTION option requires the PREEMPTION option"
1149923b511SScott Long #endif
1159923b511SScott Long #endif
116dba6c5a6SPeter Wemm 
117d2ac2316SJake Burkholder CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
118d2ac2316SJake Burkholder 
11948bfcdddSJulian Elischer void panc(char *string1, char *string2);
12048bfcdddSJulian Elischer 
12148bfcdddSJulian Elischer #if 0
122e602ba25SJulian Elischer static void runq_readjust(struct runq *rq, struct kse *ke);
12348bfcdddSJulian Elischer #endif
124e602ba25SJulian Elischer /************************************************************************
125e602ba25SJulian Elischer  * Functions that manipulate runnability from a thread perspective.	*
126e602ba25SJulian Elischer  ************************************************************************/
127e602ba25SJulian Elischer /*
1285215b187SJeff Roberson  * Select the KSE that will be run next.  From that find the thread, and
129e602ba25SJulian Elischer  * remove it from the KSEGRP's run queue.  If there is thread clustering,
130e602ba25SJulian Elischer  * this will be what does it.
131e602ba25SJulian Elischer  */
132b40ce416SJulian Elischer struct thread *
133b40ce416SJulian Elischer choosethread(void)
134dba6c5a6SPeter Wemm {
135e602ba25SJulian Elischer 	struct kse *ke;
136e602ba25SJulian Elischer 	struct thread *td;
137e602ba25SJulian Elischer 	struct ksegrp *kg;
138e602ba25SJulian Elischer 
1390d2a2989SPeter Wemm #if defined(SMP) && (defined(__i386__) || defined(__amd64__))
140cc66ebe2SPeter Wemm 	if (smp_active == 0 && PCPU_GET(cpuid) != 0) {
141cc66ebe2SPeter Wemm 		/* Shutting down, run idlethread on AP's */
142cc66ebe2SPeter Wemm 		td = PCPU_GET(idlethread);
143cc66ebe2SPeter Wemm 		ke = td->td_kse;
144cc66ebe2SPeter Wemm 		CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
145cc66ebe2SPeter Wemm 		ke->ke_flags |= KEF_DIDRUN;
146cc66ebe2SPeter Wemm 		TD_SET_RUNNING(td);
147cc66ebe2SPeter Wemm 		return (td);
148cc66ebe2SPeter Wemm 	}
149cc66ebe2SPeter Wemm #endif
150cc66ebe2SPeter Wemm 
151fe799533SAndrew Gallatin retry:
152cc66ebe2SPeter Wemm 	ke = sched_choose();
153cc66ebe2SPeter Wemm 	if (ke) {
154e602ba25SJulian Elischer 		td = ke->ke_thread;
155e602ba25SJulian Elischer 		KASSERT((td->td_kse == ke), ("kse/thread mismatch"));
156e602ba25SJulian Elischer 		kg = ke->ke_ksegrp;
1570e2a4d3aSDavid Xu 		if (td->td_proc->p_flag & P_SA) {
15833c06e1dSJulian Elischer 			if (kg->kg_last_assigned == td) {
159e602ba25SJulian Elischer 				kg->kg_last_assigned = TAILQ_PREV(td,
160e602ba25SJulian Elischer 				    threadqueue, td_runq);
16133c06e1dSJulian Elischer 			}
162d03c79eeSDavid Xu 			TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
163e602ba25SJulian Elischer 			kg->kg_runnable--;
1641a5cd27bSJulian Elischer 		}
165e602ba25SJulian Elischer 		CTR2(KTR_RUNQ, "choosethread: td=%p pri=%d",
166e602ba25SJulian Elischer 		    td, td->td_priority);
167e602ba25SJulian Elischer 	} else {
16840e55026SJulian Elischer 		/* Simulate runq_choose() having returned the idle thread */
169e602ba25SJulian Elischer 		td = PCPU_GET(idlethread);
170472be958SJulian Elischer 		ke = td->td_kse;
171e602ba25SJulian Elischer 		CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
172e602ba25SJulian Elischer 	}
173472be958SJulian Elischer 	ke->ke_flags |= KEF_DIDRUN;
17493a7aa79SJulian Elischer 
17593a7aa79SJulian Elischer 	/*
176faaa20f6SJulian Elischer 	 * If we are in panic, only allow system threads,
177faaa20f6SJulian Elischer 	 * plus the one we are running in, to be run.
17893a7aa79SJulian Elischer 	 */
179fe799533SAndrew Gallatin 	if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 &&
180faaa20f6SJulian Elischer 	    (td->td_flags & TDF_INPANIC) == 0)) {
181faaa20f6SJulian Elischer 		/* note that it is no longer on the run queue */
182faaa20f6SJulian Elischer 		TD_SET_CAN_RUN(td);
183fe799533SAndrew Gallatin 		goto retry;
184faaa20f6SJulian Elischer 	}
18593a7aa79SJulian Elischer 
18671fad9fdSJulian Elischer 	TD_SET_RUNNING(td);
187e602ba25SJulian Elischer 	return (td);
188e602ba25SJulian Elischer }
189e602ba25SJulian Elischer 
190e602ba25SJulian Elischer /*
1915215b187SJeff Roberson  * Given a surplus KSE, either assign a new runable thread to it
1925215b187SJeff Roberson  * (and put it in the run queue) or put it in the ksegrp's idle KSE list.
1934f6cfa45SDavid Xu  * Assumes that the original thread is not runnable.
194e602ba25SJulian Elischer  */
195e602ba25SJulian Elischer void
196e602ba25SJulian Elischer kse_reassign(struct kse *ke)
197e602ba25SJulian Elischer {
198e602ba25SJulian Elischer 	struct ksegrp *kg;
199e602ba25SJulian Elischer 	struct thread *td;
20048bfcdddSJulian Elischer 	struct thread *original;
201e602ba25SJulian Elischer 
20233c06e1dSJulian Elischer 	mtx_assert(&sched_lock, MA_OWNED);
2036f8132a8SJulian Elischer 	original = ke->ke_thread;
2045215b187SJeff Roberson 	KASSERT(original == NULL || TD_IS_INHIBITED(original),
2055215b187SJeff Roberson     	    ("reassigning KSE with runnable thread"));
2065215b187SJeff Roberson 	kg = ke->ke_ksegrp;
2076ce75196SDavid Xu 	if (original)
2085215b187SJeff Roberson 		original->td_kse = NULL;
209e602ba25SJulian Elischer 
210e602ba25SJulian Elischer 	/*
2116f8132a8SJulian Elischer 	 * Find the first unassigned thread
2126f8132a8SJulian Elischer 	 */
2135215b187SJeff Roberson 	if ((td = kg->kg_last_assigned) != NULL)
2146f8132a8SJulian Elischer 		td = TAILQ_NEXT(td, td_runq);
2155215b187SJeff Roberson 	else
2166f8132a8SJulian Elischer 		td = TAILQ_FIRST(&kg->kg_runq);
2176f8132a8SJulian Elischer 
2186f8132a8SJulian Elischer 	/*
2195215b187SJeff Roberson 	 * If we found one, assign it the kse, otherwise idle the kse.
220e602ba25SJulian Elischer 	 */
221e602ba25SJulian Elischer 	if (td) {
222e602ba25SJulian Elischer 		kg->kg_last_assigned = td;
223e602ba25SJulian Elischer 		td->td_kse = ke;
224e602ba25SJulian Elischer 		ke->ke_thread = td;
225e602ba25SJulian Elischer 		CTR2(KTR_RUNQ, "kse_reassign: ke%p -> td%p", ke, td);
2262630e4c9SJulian Elischer 		sched_add(td, SRQ_BORING);
22748bfcdddSJulian Elischer 		return;
22848bfcdddSJulian Elischer 	}
22948bfcdddSJulian Elischer 
2305215b187SJeff Roberson 	ke->ke_state = KES_IDLE;
23193a7aa79SJulian Elischer 	ke->ke_thread = NULL;
2325215b187SJeff Roberson 	TAILQ_INSERT_TAIL(&kg->kg_iq, ke, ke_kgrlist);
2335215b187SJeff Roberson 	kg->kg_idle_kses++;
2345215b187SJeff Roberson 	CTR1(KTR_RUNQ, "kse_reassign: ke%p on idle queue", ke);
23548bfcdddSJulian Elischer 	return;
236d5a08a60SJake Burkholder }
237d5a08a60SJake Burkholder 
2381f955e2dSJulian Elischer #if 0
239e602ba25SJulian Elischer /*
240e602ba25SJulian Elischer  * Remove a thread from its KSEGRP's run queue.
241e602ba25SJulian Elischer  * This in turn may remove it from a KSE if it was already assigned
242e602ba25SJulian Elischer  * to one, possibly causing a new thread to be assigned to the KSE
2435215b187SJeff Roberson  * and the KSE getting a new priority.
244e602ba25SJulian Elischer  */
2451f955e2dSJulian Elischer static void
246b40ce416SJulian Elischer remrunqueue(struct thread *td)
247d5a08a60SJake Burkholder {
24848bfcdddSJulian Elischer 	struct thread *td2, *td3;
249e602ba25SJulian Elischer 	struct ksegrp *kg;
250e602ba25SJulian Elischer 	struct kse *ke;
251e602ba25SJulian Elischer 
252e602ba25SJulian Elischer 	mtx_assert(&sched_lock, MA_OWNED);
25371fad9fdSJulian Elischer 	KASSERT((TD_ON_RUNQ(td)), ("remrunqueue: Bad state on run queue"));
254e602ba25SJulian Elischer 	kg = td->td_ksegrp;
255e602ba25SJulian Elischer 	ke = td->td_kse;
256e602ba25SJulian Elischer 	CTR1(KTR_RUNQ, "remrunqueue: td%p", td);
25771fad9fdSJulian Elischer 	TD_SET_CAN_RUN(td);
2585215b187SJeff Roberson 	/*
2595215b187SJeff Roberson 	 * If it is not a threaded process, take the shortcut.
2605215b187SJeff Roberson 	 */
2610e2a4d3aSDavid Xu 	if ((td->td_proc->p_flag & P_SA) == 0) {
262e602ba25SJulian Elischer 		/* Bring its kse with it, leave the thread attached */
2637cf90fb3SJeff Roberson 		sched_rem(td);
264c3b98db0SJulian Elischer 		ke->ke_state = KES_THREAD;
265e602ba25SJulian Elischer 		return;
266d5a08a60SJake Burkholder 	}
26748bfcdddSJulian Elischer    	td3 = TAILQ_PREV(td, threadqueue, td_runq);
26848bfcdddSJulian Elischer 	TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
2691a5cd27bSJulian Elischer 	kg->kg_runnable--;
270e602ba25SJulian Elischer 	if (ke) {
271e602ba25SJulian Elischer 		/*
272e602ba25SJulian Elischer 		 * This thread has been assigned to a KSE.
273e602ba25SJulian Elischer 		 * We need to dissociate it and try assign the
274e602ba25SJulian Elischer 		 * KSE to the next available thread. Then, we should
275e602ba25SJulian Elischer 		 * see if we need to move the KSE in the run queues.
276e602ba25SJulian Elischer 		 */
2777cf90fb3SJeff Roberson 		sched_rem(td);
27893a7aa79SJulian Elischer 		ke->ke_state = KES_THREAD;
279e602ba25SJulian Elischer 		td2 = kg->kg_last_assigned;
280e602ba25SJulian Elischer 		KASSERT((td2 != NULL), ("last assigned has wrong value"));
28148bfcdddSJulian Elischer 		if (td2 == td)
282e602ba25SJulian Elischer 			kg->kg_last_assigned = td3;
28348bfcdddSJulian Elischer 		kse_reassign(ke);
284e602ba25SJulian Elischer 	}
285e602ba25SJulian Elischer }
2861f955e2dSJulian Elischer #endif
2871f955e2dSJulian Elischer 
2881f955e2dSJulian Elischer /*
2891f955e2dSJulian Elischer  * Change the priority of a thread that is on the run queue.
2901f955e2dSJulian Elischer  */
2911f955e2dSJulian Elischer void
2921f955e2dSJulian Elischer adjustrunqueue( struct thread *td, int newpri)
2931f955e2dSJulian Elischer {
2941f955e2dSJulian Elischer 	struct ksegrp *kg;
2951f955e2dSJulian Elischer 	struct kse *ke;
2961f955e2dSJulian Elischer 
2971f955e2dSJulian Elischer 	mtx_assert(&sched_lock, MA_OWNED);
2981f955e2dSJulian Elischer 	KASSERT((TD_ON_RUNQ(td)), ("adjustrunqueue: Bad state on run queue"));
2995215b187SJeff Roberson 
3001f955e2dSJulian Elischer 	ke = td->td_kse;
3011f955e2dSJulian Elischer 	CTR1(KTR_RUNQ, "adjustrunqueue: td%p", td);
3025215b187SJeff Roberson 	/*
3035215b187SJeff Roberson 	 * If it is not a threaded process, take the shortcut.
3045215b187SJeff Roberson 	 */
3050e2a4d3aSDavid Xu 	if ((td->td_proc->p_flag & P_SA) == 0) {
3061f955e2dSJulian Elischer 		/* We only care about the kse in the run queue. */
30724c5baaeSJulian Elischer 		td->td_priority = newpri;
3081f955e2dSJulian Elischer 		if (ke->ke_rqindex != (newpri / RQ_PPQ)) {
3097cf90fb3SJeff Roberson 			sched_rem(td);
3102630e4c9SJulian Elischer 			sched_add(td, SRQ_BORING);
3111f955e2dSJulian Elischer 		}
3121f955e2dSJulian Elischer 		return;
3131f955e2dSJulian Elischer 	}
3145215b187SJeff Roberson 
3155215b187SJeff Roberson 	/* It is a threaded process */
3161f955e2dSJulian Elischer 	kg = td->td_ksegrp;
3171f955e2dSJulian Elischer 	TD_SET_CAN_RUN(td);
3181f955e2dSJulian Elischer 	if (ke) {
3191f955e2dSJulian Elischer 		if (kg->kg_last_assigned == td) {
3201f955e2dSJulian Elischer 			kg->kg_last_assigned =
3211f955e2dSJulian Elischer 			    TAILQ_PREV(td, threadqueue, td_runq);
3221f955e2dSJulian Elischer 		}
3237cf90fb3SJeff Roberson 		sched_rem(td);
3241f955e2dSJulian Elischer 	}
3251f955e2dSJulian Elischer 	TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
3261a5cd27bSJulian Elischer 	kg->kg_runnable--;
3271f955e2dSJulian Elischer 	td->td_priority = newpri;
3282630e4c9SJulian Elischer 	setrunqueue(td, SRQ_BORING);
3291f955e2dSJulian Elischer }
330e602ba25SJulian Elischer 
331d5a08a60SJake Burkholder void
3322630e4c9SJulian Elischer setrunqueue(struct thread *td, int flags)
333d5a08a60SJake Burkholder {
334e602ba25SJulian Elischer 	struct kse *ke;
335e602ba25SJulian Elischer 	struct ksegrp *kg;
336e602ba25SJulian Elischer 	struct thread *td2;
337e602ba25SJulian Elischer 	struct thread *tda;
3380f4ad918SScott Long 	int count;
339e602ba25SJulian Elischer 
340732d9528SJulian Elischer 	CTR4(KTR_RUNQ, "setrunqueue: td:%p ke:%p kg:%p pid:%d",
341732d9528SJulian Elischer 	    td, td->td_kse, td->td_ksegrp, td->td_proc->p_pid);
342e602ba25SJulian Elischer 	mtx_assert(&sched_lock, MA_OWNED);
34371fad9fdSJulian Elischer 	KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)),
34471fad9fdSJulian Elischer 	    ("setrunqueue: bad thread state"));
34571fad9fdSJulian Elischer 	TD_SET_RUNQ(td);
346e602ba25SJulian Elischer 	kg = td->td_ksegrp;
3470e2a4d3aSDavid Xu 	if ((td->td_proc->p_flag & P_SA) == 0) {
34848bfcdddSJulian Elischer 		/*
34948bfcdddSJulian Elischer 		 * Common path optimisation: Only one of everything
35048bfcdddSJulian Elischer 		 * and the KSE is always already attached.
35148bfcdddSJulian Elischer 		 * Totally ignore the ksegrp run queue.
35248bfcdddSJulian Elischer 		 */
3532630e4c9SJulian Elischer 		sched_add(td, flags);
35448bfcdddSJulian Elischer 		return;
35548bfcdddSJulian Elischer 	}
35648bfcdddSJulian Elischer 
357e602ba25SJulian Elischer 	tda = kg->kg_last_assigned;
358e602ba25SJulian Elischer 	if ((ke = td->td_kse) == NULL) {
3595215b187SJeff Roberson 		if (kg->kg_idle_kses) {
360e602ba25SJulian Elischer 			/*
3615215b187SJeff Roberson 			 * There is a free one so it's ours for the asking..
362e602ba25SJulian Elischer 			 */
3635215b187SJeff Roberson 			ke = TAILQ_FIRST(&kg->kg_iq);
364732d9528SJulian Elischer 			CTR2(KTR_RUNQ, "setrunqueue: kg:%p: Use free ke:%p",
365732d9528SJulian Elischer 			    kg, ke);
3665215b187SJeff Roberson 			TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
3679eb1fdeaSJulian Elischer 			ke->ke_state = KES_THREAD;
3685215b187SJeff Roberson 			kg->kg_idle_kses--;
369e602ba25SJulian Elischer 		} else if (tda && (tda->td_priority > td->td_priority)) {
370e602ba25SJulian Elischer 			/*
371e602ba25SJulian Elischer 			 * None free, but there is one we can commandeer.
372e602ba25SJulian Elischer 			 */
373e602ba25SJulian Elischer 			ke = tda->td_kse;
374732d9528SJulian Elischer 			CTR3(KTR_RUNQ,
375732d9528SJulian Elischer 			    "setrunqueue: kg:%p: take ke:%p from td: %p",
376732d9528SJulian Elischer 			    kg, ke, tda);
37794816f6dSJeff Roberson 			sched_rem(tda);
378e602ba25SJulian Elischer 			tda->td_kse = NULL;
379e602ba25SJulian Elischer 			ke->ke_thread = NULL;
380e602ba25SJulian Elischer 			tda = kg->kg_last_assigned =
381e602ba25SJulian Elischer 		    	    TAILQ_PREV(tda, threadqueue, td_runq);
382e602ba25SJulian Elischer 		}
383e602ba25SJulian Elischer 	} else {
384c3b98db0SJulian Elischer 		/*
385c3b98db0SJulian Elischer 		 * Temporarily disassociate so it looks like the other cases.
386c3b98db0SJulian Elischer 		 */
387e602ba25SJulian Elischer 		ke->ke_thread = NULL;
388e602ba25SJulian Elischer 		td->td_kse = NULL;
389d5a08a60SJake Burkholder 	}
390d5a08a60SJake Burkholder 
391e602ba25SJulian Elischer 	/*
392e602ba25SJulian Elischer 	 * Add the thread to the ksegrp's run queue at
393e602ba25SJulian Elischer 	 * the appropriate place.
394e602ba25SJulian Elischer 	 */
3950f4ad918SScott Long 	count = 0;
396e602ba25SJulian Elischer 	TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
397e602ba25SJulian Elischer 		if (td2->td_priority > td->td_priority) {
3981a5cd27bSJulian Elischer 			kg->kg_runnable++;
399e602ba25SJulian Elischer 			TAILQ_INSERT_BEFORE(td2, td, td_runq);
400e602ba25SJulian Elischer 			break;
401e602ba25SJulian Elischer 		}
4020f4ad918SScott Long 		/* XXX Debugging hack */
4030f4ad918SScott Long 		if (++count > 10000) {
4040f4ad918SScott Long 			printf("setrunqueue(): corrupt kq_runq, td= %p\n", td);
4050f4ad918SScott Long 			panic("deadlock in setrunqueue");
4060f4ad918SScott Long 		}
407e602ba25SJulian Elischer 	}
408e602ba25SJulian Elischer 	if (td2 == NULL) {
409e602ba25SJulian Elischer 		/* We ran off the end of the TAILQ or it was empty. */
4101a5cd27bSJulian Elischer 		kg->kg_runnable++;
411e602ba25SJulian Elischer 		TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq);
412e602ba25SJulian Elischer 	}
413e602ba25SJulian Elischer 
414e602ba25SJulian Elischer 	/*
415e602ba25SJulian Elischer 	 * If we have a ke to use, then put it on the run queue and
416e602ba25SJulian Elischer 	 * If needed, readjust the last_assigned pointer.
417e602ba25SJulian Elischer 	 */
418e602ba25SJulian Elischer 	if (ke) {
419e602ba25SJulian Elischer 		if (tda == NULL) {
420e602ba25SJulian Elischer 			/*
421e602ba25SJulian Elischer 			 * No pre-existing last assigned so whoever is first
422c3b98db0SJulian Elischer 			 * gets the KSE we brought in.. (maybe us)
423e602ba25SJulian Elischer 			 */
424e602ba25SJulian Elischer 			td2 = TAILQ_FIRST(&kg->kg_runq);
425e602ba25SJulian Elischer 			KASSERT((td2->td_kse == NULL),
426e602ba25SJulian Elischer 			    ("unexpected ke present"));
427e602ba25SJulian Elischer 			td2->td_kse = ke;
428e602ba25SJulian Elischer 			ke->ke_thread = td2;
429e602ba25SJulian Elischer 			kg->kg_last_assigned = td2;
430e602ba25SJulian Elischer 		} else if (tda->td_priority > td->td_priority) {
431e602ba25SJulian Elischer 			/*
432e602ba25SJulian Elischer 			 * It's ours, grab it, but last_assigned is past us
433e602ba25SJulian Elischer 			 * so don't change it.
434e602ba25SJulian Elischer 			 */
435e602ba25SJulian Elischer 			td->td_kse = ke;
436e602ba25SJulian Elischer 			ke->ke_thread = td;
437e602ba25SJulian Elischer 		} else {
438e602ba25SJulian Elischer 			/*
439e602ba25SJulian Elischer 			 * We are past last_assigned, so
440e602ba25SJulian Elischer 			 * put the new kse on whatever is next,
441e602ba25SJulian Elischer 			 * which may or may not be us.
442e602ba25SJulian Elischer 			 */
443e602ba25SJulian Elischer 			td2 = TAILQ_NEXT(tda, td_runq);
444e602ba25SJulian Elischer 			kg->kg_last_assigned = td2;
445e602ba25SJulian Elischer 			td2->td_kse = ke;
446e602ba25SJulian Elischer 			ke->ke_thread = td2;
447e602ba25SJulian Elischer 		}
4482630e4c9SJulian Elischer 		sched_add(ke->ke_thread, flags);
449732d9528SJulian Elischer 	} else {
450732d9528SJulian Elischer 		CTR3(KTR_RUNQ, "setrunqueue: held: td%p kg%p pid%d",
451732d9528SJulian Elischer 			td, td->td_ksegrp, td->td_proc->p_pid);
452e602ba25SJulian Elischer 	}
453e602ba25SJulian Elischer }
454e602ba25SJulian Elischer 
4550c0b25aeSJohn Baldwin /*
4560c0b25aeSJohn Baldwin  * Kernel thread preemption implementation.  Critical sections mark
4570c0b25aeSJohn Baldwin  * regions of code in which preemptions are not allowed.
4580c0b25aeSJohn Baldwin  */
4597e1f6dfeSJohn Baldwin void
4607e1f6dfeSJohn Baldwin critical_enter(void)
4617e1f6dfeSJohn Baldwin {
4627e1f6dfeSJohn Baldwin 	struct thread *td;
4637e1f6dfeSJohn Baldwin 
4647e1f6dfeSJohn Baldwin 	td = curthread;
4657e1f6dfeSJohn Baldwin 	if (td->td_critnest == 0)
4661a8cfbc4SRobert Watson 		cpu_critical_enter(td);
4677e1f6dfeSJohn Baldwin 	td->td_critnest++;
4687e1f6dfeSJohn Baldwin }
4697e1f6dfeSJohn Baldwin 
4707e1f6dfeSJohn Baldwin void
4717e1f6dfeSJohn Baldwin critical_exit(void)
4727e1f6dfeSJohn Baldwin {
4737e1f6dfeSJohn Baldwin 	struct thread *td;
4747e1f6dfeSJohn Baldwin 
4757e1f6dfeSJohn Baldwin 	td = curthread;
476b209e5e3SJeff Roberson 	KASSERT(td->td_critnest != 0,
477b209e5e3SJeff Roberson 	    ("critical_exit: td_critnest == 0"));
4787e1f6dfeSJohn Baldwin 	if (td->td_critnest == 1) {
4790c0b25aeSJohn Baldwin #ifdef PREEMPTION
48052eb8464SJohn Baldwin 		mtx_assert(&sched_lock, MA_NOTOWNED);
48152eb8464SJohn Baldwin 		if (td->td_pflags & TDP_OWEPREEMPT) {
4820c0b25aeSJohn Baldwin 			mtx_lock_spin(&sched_lock);
4830c0b25aeSJohn Baldwin 			mi_switch(SW_INVOL, NULL);
4840c0b25aeSJohn Baldwin 			mtx_unlock_spin(&sched_lock);
4850c0b25aeSJohn Baldwin 		}
4860c0b25aeSJohn Baldwin #endif
4877e1f6dfeSJohn Baldwin 		td->td_critnest = 0;
4881a8cfbc4SRobert Watson 		cpu_critical_exit(td);
489d74ac681SMatthew Dillon 	} else {
4907e1f6dfeSJohn Baldwin 		td->td_critnest--;
4917e1f6dfeSJohn Baldwin 	}
492d74ac681SMatthew Dillon }
4937e1f6dfeSJohn Baldwin 
4940c0b25aeSJohn Baldwin /*
4950c0b25aeSJohn Baldwin  * This function is called when a thread is about to be put on run queue
4960c0b25aeSJohn Baldwin  * because it has been made runnable or its priority has been adjusted.  It
4970c0b25aeSJohn Baldwin  * determines if the new thread should be immediately preempted to.  If so,
4980c0b25aeSJohn Baldwin  * it switches to it and eventually returns true.  If not, it returns false
4990c0b25aeSJohn Baldwin  * so that the caller may place the thread on an appropriate run queue.
5000c0b25aeSJohn Baldwin  */
5010c0b25aeSJohn Baldwin int
5020c0b25aeSJohn Baldwin maybe_preempt(struct thread *td)
5030c0b25aeSJohn Baldwin {
5048b44a2e2SMarcel Moolenaar #ifdef PREEMPTION
5050c0b25aeSJohn Baldwin 	struct thread *ctd;
5060c0b25aeSJohn Baldwin 	int cpri, pri;
5078b44a2e2SMarcel Moolenaar #endif
5080c0b25aeSJohn Baldwin 
5090c0b25aeSJohn Baldwin 	mtx_assert(&sched_lock, MA_OWNED);
5100c0b25aeSJohn Baldwin #ifdef PREEMPTION
5110c0b25aeSJohn Baldwin 	/*
5120c0b25aeSJohn Baldwin 	 * The new thread should not preempt the current thread if any of the
5130c0b25aeSJohn Baldwin 	 * following conditions are true:
5140c0b25aeSJohn Baldwin 	 *
51552eb8464SJohn Baldwin 	 *  - The current thread has a higher (numerically lower) or
51652eb8464SJohn Baldwin 	 *    equivalent priority.  Note that this prevents curthread from
51752eb8464SJohn Baldwin 	 *    trying to preempt to itself.
5180c0b25aeSJohn Baldwin 	 *  - It is too early in the boot for context switches (cold is set).
5190c0b25aeSJohn Baldwin 	 *  - The current thread has an inhibitor set or is in the process of
5200c0b25aeSJohn Baldwin 	 *    exiting.  In this case, the current thread is about to switch
5210c0b25aeSJohn Baldwin 	 *    out anyways, so there's no point in preempting.  If we did,
5220c0b25aeSJohn Baldwin 	 *    the current thread would not be properly resumed as well, so
5230c0b25aeSJohn Baldwin 	 *    just avoid that whole landmine.
5240c0b25aeSJohn Baldwin 	 *  - If the new thread's priority is not a realtime priority and
5250c0b25aeSJohn Baldwin 	 *    the current thread's priority is not an idle priority and
5260c0b25aeSJohn Baldwin 	 *    FULL_PREEMPTION is disabled.
5270c0b25aeSJohn Baldwin 	 *
5280c0b25aeSJohn Baldwin 	 * If all of these conditions are false, but the current thread is in
5290c0b25aeSJohn Baldwin 	 * a nested critical section, then we have to defer the preemption
5300c0b25aeSJohn Baldwin 	 * until we exit the critical section.  Otherwise, switch immediately
5310c0b25aeSJohn Baldwin 	 * to the new thread.
5320c0b25aeSJohn Baldwin 	 */
5330c0b25aeSJohn Baldwin 	ctd = curthread;
5346f96710cSPeter Wemm 	if (ctd->td_kse == NULL || ctd->td_kse->ke_thread != ctd)
5356f96710cSPeter Wemm 		return (0);
5360c0b25aeSJohn Baldwin 	pri = td->td_priority;
5370c0b25aeSJohn Baldwin 	cpri = ctd->td_priority;
5380c0b25aeSJohn Baldwin 	if (pri >= cpri || cold /* || dumping */ || TD_IS_INHIBITED(ctd) ||
5390c0b25aeSJohn Baldwin 	    td->td_kse->ke_state != KES_THREAD)
5400c0b25aeSJohn Baldwin 		return (0);
5410c0b25aeSJohn Baldwin #ifndef FULL_PREEMPTION
5420c0b25aeSJohn Baldwin 	if (!(pri >= PRI_MIN_ITHD && pri <= PRI_MAX_ITHD) &&
5430c0b25aeSJohn Baldwin 	    !(cpri >= PRI_MIN_IDLE))
5440c0b25aeSJohn Baldwin 		return (0);
5450c0b25aeSJohn Baldwin #endif
5460c0b25aeSJohn Baldwin 	if (ctd->td_critnest > 1) {
5470c0b25aeSJohn Baldwin 		CTR1(KTR_PROC, "maybe_preempt: in critical section %d",
5480c0b25aeSJohn Baldwin 		    ctd->td_critnest);
54952eb8464SJohn Baldwin 		ctd->td_pflags |= TDP_OWEPREEMPT;
5500c0b25aeSJohn Baldwin 		return (0);
5510c0b25aeSJohn Baldwin 	}
5520c0b25aeSJohn Baldwin 
5530c0b25aeSJohn Baldwin 	/*
5540c0b25aeSJohn Baldwin 	 * Our thread state says that we are already on a run queue, so
5550c0b25aeSJohn Baldwin 	 * update our state as if we had been dequeued by choosethread().
5560c0b25aeSJohn Baldwin 	 */
5570c0b25aeSJohn Baldwin 	MPASS(TD_ON_RUNQ(td));
5580c0b25aeSJohn Baldwin 	TD_SET_RUNNING(td);
5590c0b25aeSJohn Baldwin 	CTR3(KTR_PROC, "preempting to thread %p (pid %d, %s)\n", td,
5600c0b25aeSJohn Baldwin 	    td->td_proc->p_pid, td->td_proc->p_comm);
5610c0b25aeSJohn Baldwin 	mi_switch(SW_INVOL, td);
5620c0b25aeSJohn Baldwin 	return (1);
5630c0b25aeSJohn Baldwin #else
5640c0b25aeSJohn Baldwin 	return (0);
5650c0b25aeSJohn Baldwin #endif
5660c0b25aeSJohn Baldwin }
5670c0b25aeSJohn Baldwin 
56844fe3c1fSJohn Baldwin #if 0
5690c0b25aeSJohn Baldwin #ifndef PREEMPTION
5700c0b25aeSJohn Baldwin /* XXX: There should be a non-static version of this. */
5710c0b25aeSJohn Baldwin static void
5720c0b25aeSJohn Baldwin printf_caddr_t(void *data)
5730c0b25aeSJohn Baldwin {
5740c0b25aeSJohn Baldwin 	printf("%s", (char *)data);
5750c0b25aeSJohn Baldwin }
5760c0b25aeSJohn Baldwin static char preempt_warning[] =
5770c0b25aeSJohn Baldwin     "WARNING: Kernel preemption is disabled, expect reduced performance.\n";
5780c0b25aeSJohn Baldwin SYSINIT(preempt_warning, SI_SUB_COPYRIGHT, SI_ORDER_ANY, printf_caddr_t,
5790c0b25aeSJohn Baldwin     preempt_warning)
5800c0b25aeSJohn Baldwin #endif
58144fe3c1fSJohn Baldwin #endif
582e602ba25SJulian Elischer 
583e602ba25SJulian Elischer /************************************************************************
584e602ba25SJulian Elischer  * SYSTEM RUN QUEUE manipulations and tests				*
585e602ba25SJulian Elischer  ************************************************************************/
586e602ba25SJulian Elischer /*
587e602ba25SJulian Elischer  * Initialize a run structure.
588e602ba25SJulian Elischer  */
589e602ba25SJulian Elischer void
590e602ba25SJulian Elischer runq_init(struct runq *rq)
591e602ba25SJulian Elischer {
592e602ba25SJulian Elischer 	int i;
593e602ba25SJulian Elischer 
594e602ba25SJulian Elischer 	bzero(rq, sizeof *rq);
595e602ba25SJulian Elischer 	for (i = 0; i < RQ_NQS; i++)
596e602ba25SJulian Elischer 		TAILQ_INIT(&rq->rq_queues[i]);
597e602ba25SJulian Elischer }
598e602ba25SJulian Elischer 
599d5a08a60SJake Burkholder /*
600d5a08a60SJake Burkholder  * Clear the status bit of the queue corresponding to priority level pri,
601d5a08a60SJake Burkholder  * indicating that it is empty.
602d5a08a60SJake Burkholder  */
603d5a08a60SJake Burkholder static __inline void
604d5a08a60SJake Burkholder runq_clrbit(struct runq *rq, int pri)
605d5a08a60SJake Burkholder {
606d5a08a60SJake Burkholder 	struct rqbits *rqb;
607d5a08a60SJake Burkholder 
608d5a08a60SJake Burkholder 	rqb = &rq->rq_status;
609d5a08a60SJake Burkholder 	CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d",
610d5a08a60SJake Burkholder 	    rqb->rqb_bits[RQB_WORD(pri)],
611d5a08a60SJake Burkholder 	    rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri),
612d5a08a60SJake Burkholder 	    RQB_BIT(pri), RQB_WORD(pri));
613d5a08a60SJake Burkholder 	rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri);
614d5a08a60SJake Burkholder }
615d5a08a60SJake Burkholder 
616d5a08a60SJake Burkholder /*
617d5a08a60SJake Burkholder  * Find the index of the first non-empty run queue.  This is done by
618d5a08a60SJake Burkholder  * scanning the status bits, a set bit indicates a non-empty queue.
619d5a08a60SJake Burkholder  */
620d5a08a60SJake Burkholder static __inline int
621d5a08a60SJake Burkholder runq_findbit(struct runq *rq)
622d5a08a60SJake Burkholder {
623d5a08a60SJake Burkholder 	struct rqbits *rqb;
624d5a08a60SJake Burkholder 	int pri;
625d5a08a60SJake Burkholder 	int i;
626d5a08a60SJake Burkholder 
627d5a08a60SJake Burkholder 	rqb = &rq->rq_status;
628d5a08a60SJake Burkholder 	for (i = 0; i < RQB_LEN; i++)
629d5a08a60SJake Burkholder 		if (rqb->rqb_bits[i]) {
6302f9267ecSPeter Wemm 			pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW);
631d5a08a60SJake Burkholder 			CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d",
632d5a08a60SJake Burkholder 			    rqb->rqb_bits[i], i, pri);
633d5a08a60SJake Burkholder 			return (pri);
634d5a08a60SJake Burkholder 		}
635d5a08a60SJake Burkholder 
636d5a08a60SJake Burkholder 	return (-1);
637d5a08a60SJake Burkholder }
638d5a08a60SJake Burkholder 
639d5a08a60SJake Burkholder /*
640d5a08a60SJake Burkholder  * Set the status bit of the queue corresponding to priority level pri,
641d5a08a60SJake Burkholder  * indicating that it is non-empty.
642d5a08a60SJake Burkholder  */
643d5a08a60SJake Burkholder static __inline void
644d5a08a60SJake Burkholder runq_setbit(struct runq *rq, int pri)
645d5a08a60SJake Burkholder {
646d5a08a60SJake Burkholder 	struct rqbits *rqb;
647d5a08a60SJake Burkholder 
648d5a08a60SJake Burkholder 	rqb = &rq->rq_status;
649d5a08a60SJake Burkholder 	CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d",
650d5a08a60SJake Burkholder 	    rqb->rqb_bits[RQB_WORD(pri)],
651d5a08a60SJake Burkholder 	    rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri),
652d5a08a60SJake Burkholder 	    RQB_BIT(pri), RQB_WORD(pri));
653d5a08a60SJake Burkholder 	rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri);
654d5a08a60SJake Burkholder }
655d5a08a60SJake Burkholder 
656d5a08a60SJake Burkholder /*
657e602ba25SJulian Elischer  * Add the KSE to the queue specified by its priority, and set the
658d5a08a60SJake Burkholder  * corresponding status bit.
659d5a08a60SJake Burkholder  */
660d5a08a60SJake Burkholder void
661b40ce416SJulian Elischer runq_add(struct runq *rq, struct kse *ke)
662d5a08a60SJake Burkholder {
663d5a08a60SJake Burkholder 	struct rqhead *rqh;
664d5a08a60SJake Burkholder 	int pri;
665dba6c5a6SPeter Wemm 
6662c100766SJulian Elischer 	pri = ke->ke_thread->td_priority / RQ_PPQ;
667b40ce416SJulian Elischer 	ke->ke_rqindex = pri;
668d5a08a60SJake Burkholder 	runq_setbit(rq, pri);
669d5a08a60SJake Burkholder 	rqh = &rq->rq_queues[pri];
670732d9528SJulian Elischer 	CTR5(KTR_RUNQ, "runq_add: td=%p ke=%p pri=%d %d rqh=%p",
671732d9528SJulian Elischer 	    ke->ke_thread, ke, ke->ke_thread->td_priority, pri, rqh);
672b40ce416SJulian Elischer 	TAILQ_INSERT_TAIL(rqh, ke, ke_procq);
673dba6c5a6SPeter Wemm }
674d5a08a60SJake Burkholder 
675d5a08a60SJake Burkholder /*
676d5a08a60SJake Burkholder  * Return true if there are runnable processes of any priority on the run
677d5a08a60SJake Burkholder  * queue, false otherwise.  Has no side effects, does not modify the run
678d5a08a60SJake Burkholder  * queue structure.
679d5a08a60SJake Burkholder  */
680d5a08a60SJake Burkholder int
681d5a08a60SJake Burkholder runq_check(struct runq *rq)
682d5a08a60SJake Burkholder {
683d5a08a60SJake Burkholder 	struct rqbits *rqb;
684d5a08a60SJake Burkholder 	int i;
685d5a08a60SJake Burkholder 
686d5a08a60SJake Burkholder 	rqb = &rq->rq_status;
687d5a08a60SJake Burkholder 	for (i = 0; i < RQB_LEN; i++)
688d5a08a60SJake Burkholder 		if (rqb->rqb_bits[i]) {
689d5a08a60SJake Burkholder 			CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d",
690d5a08a60SJake Burkholder 			    rqb->rqb_bits[i], i);
691d5a08a60SJake Burkholder 			return (1);
692dba6c5a6SPeter Wemm 		}
693d5a08a60SJake Burkholder 	CTR0(KTR_RUNQ, "runq_check: empty");
694d5a08a60SJake Burkholder 
695d5a08a60SJake Burkholder 	return (0);
696dba6c5a6SPeter Wemm }
697d5a08a60SJake Burkholder 
6986804a3abSJulian Elischer #if defined(SMP) && defined(SCHED_4BSD)
6996804a3abSJulian Elischer int runq_fuzz = 1;
7006804a3abSJulian Elischer SYSCTL_DECL(_kern_sched);
7016804a3abSJulian Elischer SYSCTL_INT(_kern_sched, OID_AUTO, runq_fuzz, CTLFLAG_RW, &runq_fuzz, 0, "");
7026804a3abSJulian Elischer #endif
7036804a3abSJulian Elischer 
704d5a08a60SJake Burkholder /*
705b43179fbSJeff Roberson  * Find the highest priority process on the run queue.
706d5a08a60SJake Burkholder  */
707b40ce416SJulian Elischer struct kse *
708d5a08a60SJake Burkholder runq_choose(struct runq *rq)
709d5a08a60SJake Burkholder {
710d5a08a60SJake Burkholder 	struct rqhead *rqh;
711b40ce416SJulian Elischer 	struct kse *ke;
712d5a08a60SJake Burkholder 	int pri;
713d5a08a60SJake Burkholder 
714d5a08a60SJake Burkholder 	mtx_assert(&sched_lock, MA_OWNED);
715e602ba25SJulian Elischer 	while ((pri = runq_findbit(rq)) != -1) {
716d5a08a60SJake Burkholder 		rqh = &rq->rq_queues[pri];
7176804a3abSJulian Elischer #if defined(SMP) && defined(SCHED_4BSD)
7186804a3abSJulian Elischer 		/* fuzz == 1 is normal.. 0 or less are ignored */
7196804a3abSJulian Elischer 		if (runq_fuzz > 1) {
7206804a3abSJulian Elischer 			/*
7216804a3abSJulian Elischer 			 * In the first couple of entries, check if
7226804a3abSJulian Elischer 			 * there is one for our CPU as a preference.
7236804a3abSJulian Elischer 			 */
7246804a3abSJulian Elischer 			int count = runq_fuzz;
7256804a3abSJulian Elischer 			int cpu = PCPU_GET(cpuid);
7266804a3abSJulian Elischer 			struct kse *ke2;
7276804a3abSJulian Elischer 			ke2 = ke = TAILQ_FIRST(rqh);
7286804a3abSJulian Elischer 
7296804a3abSJulian Elischer 			while (count-- && ke2) {
7306804a3abSJulian Elischer 				if (ke->ke_thread->td_lastcpu == cpu) {
7316804a3abSJulian Elischer 					ke = ke2;
7326804a3abSJulian Elischer 					break;
7336804a3abSJulian Elischer 				}
7346804a3abSJulian Elischer 				ke2 = TAILQ_NEXT(ke2, ke_procq);
7356804a3abSJulian Elischer 			}
7366804a3abSJulian Elischer 		} else
7376804a3abSJulian Elischer #endif
738b40ce416SJulian Elischer 			ke = TAILQ_FIRST(rqh);
739b40ce416SJulian Elischer 		KASSERT(ke != NULL, ("runq_choose: no proc on busy queue"));
740e602ba25SJulian Elischer 		CTR3(KTR_RUNQ,
741e602ba25SJulian Elischer 		    "runq_choose: pri=%d kse=%p rqh=%p", pri, ke, rqh);
742b40ce416SJulian Elischer 		return (ke);
743d5a08a60SJake Burkholder 	}
744d5a08a60SJake Burkholder 	CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri);
745d5a08a60SJake Burkholder 
746e602ba25SJulian Elischer 	return (NULL);
747d5a08a60SJake Burkholder }
748d5a08a60SJake Burkholder 
749d5a08a60SJake Burkholder /*
750e602ba25SJulian Elischer  * Remove the KSE from the queue specified by its priority, and clear the
751d5a08a60SJake Burkholder  * corresponding status bit if the queue becomes empty.
752e602ba25SJulian Elischer  * Caller must set ke->ke_state afterwards.
753d5a08a60SJake Burkholder  */
754d5a08a60SJake Burkholder void
755b40ce416SJulian Elischer runq_remove(struct runq *rq, struct kse *ke)
756d5a08a60SJake Burkholder {
757d5a08a60SJake Burkholder 	struct rqhead *rqh;
758d5a08a60SJake Burkholder 	int pri;
759d5a08a60SJake Burkholder 
7609eb881f8SSeigo Tanimura 	KASSERT(ke->ke_proc->p_sflag & PS_INMEM,
7619eb881f8SSeigo Tanimura 		("runq_remove: process swapped out"));
762b40ce416SJulian Elischer 	pri = ke->ke_rqindex;
763d5a08a60SJake Burkholder 	rqh = &rq->rq_queues[pri];
764732d9528SJulian Elischer 	CTR5(KTR_RUNQ, "runq_remove: td=%p, ke=%p pri=%d %d rqh=%p",
765732d9528SJulian Elischer 	    ke->ke_thread, ke, ke->ke_thread->td_priority, pri, rqh);
766b40ce416SJulian Elischer 	KASSERT(ke != NULL, ("runq_remove: no proc on busy queue"));
767b40ce416SJulian Elischer 	TAILQ_REMOVE(rqh, ke, ke_procq);
768d5a08a60SJake Burkholder 	if (TAILQ_EMPTY(rqh)) {
769d5a08a60SJake Burkholder 		CTR0(KTR_RUNQ, "runq_remove: empty");
770d5a08a60SJake Burkholder 		runq_clrbit(rq, pri);
771d5a08a60SJake Burkholder 	}
772dba6c5a6SPeter Wemm }
773e602ba25SJulian Elischer 
77448bfcdddSJulian Elischer #if 0
775e602ba25SJulian Elischer void
77648bfcdddSJulian Elischer panc(char *string1, char *string2)
77748bfcdddSJulian Elischer {
77848bfcdddSJulian Elischer 	printf("%s", string1);
7792d50560aSMarcel Moolenaar 	kdb_enter(string2);
78048bfcdddSJulian Elischer }
78148bfcdddSJulian Elischer 
78248bfcdddSJulian Elischer void
78348bfcdddSJulian Elischer thread_sanity_check(struct thread *td, char *string)
784e602ba25SJulian Elischer {
785e602ba25SJulian Elischer 	struct proc *p;
786e602ba25SJulian Elischer 	struct ksegrp *kg;
787e602ba25SJulian Elischer 	struct kse *ke;
78848bfcdddSJulian Elischer 	struct thread *td2 = NULL;
789e602ba25SJulian Elischer 	unsigned int prevpri;
79048bfcdddSJulian Elischer 	int	saw_lastassigned = 0;
79148bfcdddSJulian Elischer 	int unassigned = 0;
79248bfcdddSJulian Elischer 	int assigned = 0;
793e602ba25SJulian Elischer 
794e602ba25SJulian Elischer 	p = td->td_proc;
795e602ba25SJulian Elischer 	kg = td->td_ksegrp;
796e602ba25SJulian Elischer 	ke = td->td_kse;
797e602ba25SJulian Elischer 
798e602ba25SJulian Elischer 
799e602ba25SJulian Elischer 	if (ke) {
8004f0db5e0SJulian Elischer 		if (p != ke->ke_proc) {
80148bfcdddSJulian Elischer 			panc(string, "wrong proc");
802e602ba25SJulian Elischer 		}
803e602ba25SJulian Elischer 		if (ke->ke_thread != td) {
80448bfcdddSJulian Elischer 			panc(string, "wrong thread");
805e602ba25SJulian Elischer 		}
806e602ba25SJulian Elischer 	}
807e602ba25SJulian Elischer 
8080e2a4d3aSDavid Xu 	if ((p->p_flag & P_SA) == 0) {
809e602ba25SJulian Elischer 		if (ke == NULL) {
81048bfcdddSJulian Elischer 			panc(string, "non KSE thread lost kse");
811e602ba25SJulian Elischer 		}
812e602ba25SJulian Elischer 	} else {
813e602ba25SJulian Elischer 		prevpri = 0;
814e602ba25SJulian Elischer 		saw_lastassigned = 0;
815e602ba25SJulian Elischer 		unassigned = 0;
816e602ba25SJulian Elischer 		assigned = 0;
817e602ba25SJulian Elischer 		TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
818e602ba25SJulian Elischer 			if (td2->td_priority < prevpri) {
81948bfcdddSJulian Elischer 				panc(string, "thread runqueue unosorted");
82048bfcdddSJulian Elischer 			}
82148bfcdddSJulian Elischer 			if ((td2->td_state == TDS_RUNQ) &&
82248bfcdddSJulian Elischer 			    td2->td_kse &&
82348bfcdddSJulian Elischer 			    (td2->td_kse->ke_state != KES_ONRUNQ)) {
82448bfcdddSJulian Elischer 				panc(string, "KSE wrong state");
825e602ba25SJulian Elischer 			}
826e602ba25SJulian Elischer 			prevpri = td2->td_priority;
827e602ba25SJulian Elischer 			if (td2->td_kse) {
828e602ba25SJulian Elischer 				assigned++;
829e602ba25SJulian Elischer 				if (unassigned) {
83048bfcdddSJulian Elischer 					panc(string, "unassigned before assigned");
831e602ba25SJulian Elischer 				}
832e602ba25SJulian Elischer  				if  (kg->kg_last_assigned == NULL) {
83348bfcdddSJulian Elischer 					panc(string, "lastassigned corrupt");
834e602ba25SJulian Elischer 				}
835e602ba25SJulian Elischer 				if (saw_lastassigned) {
83648bfcdddSJulian Elischer 					panc(string, "last assigned not last");
837e602ba25SJulian Elischer 				}
838e602ba25SJulian Elischer 				if (td2->td_kse->ke_thread != td2) {
83948bfcdddSJulian Elischer 					panc(string, "mismatched kse/thread");
840e602ba25SJulian Elischer 				}
841e602ba25SJulian Elischer 			} else {
842e602ba25SJulian Elischer 				unassigned++;
843e602ba25SJulian Elischer 			}
844e602ba25SJulian Elischer 			if (td2 == kg->kg_last_assigned) {
845e602ba25SJulian Elischer 				saw_lastassigned = 1;
846e602ba25SJulian Elischer 				if (td2->td_kse == NULL) {
84748bfcdddSJulian Elischer 					panc(string, "last assigned not assigned");
848e602ba25SJulian Elischer 				}
849e602ba25SJulian Elischer 			}
850e602ba25SJulian Elischer 		}
851e602ba25SJulian Elischer 		if (kg->kg_last_assigned && (saw_lastassigned == 0)) {
85248bfcdddSJulian Elischer 			panc(string, "where on earth does lastassigned point?");
853e602ba25SJulian Elischer 		}
8545215b187SJeff Roberson #if 0
855e602ba25SJulian Elischer 		FOREACH_THREAD_IN_GROUP(kg, td2) {
856e602ba25SJulian Elischer 			if (((td2->td_flags & TDF_UNBOUND) == 0) &&
85771fad9fdSJulian Elischer 			    (TD_ON_RUNQ(td2))) {
858e602ba25SJulian Elischer 				assigned++;
859e602ba25SJulian Elischer 				if (td2->td_kse == NULL) {
86048bfcdddSJulian Elischer 					panc(string, "BOUND thread with no KSE");
861e602ba25SJulian Elischer 				}
862e602ba25SJulian Elischer 			}
863e602ba25SJulian Elischer 		}
8645215b187SJeff Roberson #endif
865e602ba25SJulian Elischer #if 0
866e602ba25SJulian Elischer 		if ((unassigned + assigned) != kg->kg_runnable) {
86748bfcdddSJulian Elischer 			panc(string, "wrong number in runnable");
868e602ba25SJulian Elischer 		}
869e602ba25SJulian Elischer #endif
870e602ba25SJulian Elischer 	}
87148bfcdddSJulian Elischer 	if (assigned == 12345) {
87248bfcdddSJulian Elischer 		printf("%p %p %p %p %p %d, %d",
87348bfcdddSJulian Elischer 		    td, td2, ke, kg, p, assigned, saw_lastassigned);
87448bfcdddSJulian Elischer 	}
875e602ba25SJulian Elischer }
8765e3da64eSJulian Elischer #endif
877e602ba25SJulian Elischer 
878