xref: /freebsd/sys/kern/kern_switch.c (revision 6804a3ab6dfa834f8991859f8aa64d1eb91169b4)
1dba6c5a6SPeter Wemm /*
2d5a08a60SJake Burkholder  * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
3d5a08a60SJake Burkholder  * All rights reserved.
4dba6c5a6SPeter Wemm  *
5dba6c5a6SPeter Wemm  * Redistribution and use in source and binary forms, with or without
6dba6c5a6SPeter Wemm  * modification, are permitted provided that the following conditions
7dba6c5a6SPeter Wemm  * are met:
8dba6c5a6SPeter Wemm  * 1. Redistributions of source code must retain the above copyright
9dba6c5a6SPeter Wemm  *    notice, this list of conditions and the following disclaimer.
10dba6c5a6SPeter Wemm  * 2. Redistributions in binary form must reproduce the above copyright
11dba6c5a6SPeter Wemm  *    notice, this list of conditions and the following disclaimer in the
12dba6c5a6SPeter Wemm  *    documentation and/or other materials provided with the distribution.
13dba6c5a6SPeter Wemm  *
14dba6c5a6SPeter Wemm  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15dba6c5a6SPeter Wemm  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16dba6c5a6SPeter Wemm  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17dba6c5a6SPeter Wemm  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18dba6c5a6SPeter Wemm  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19dba6c5a6SPeter Wemm  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20dba6c5a6SPeter Wemm  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21dba6c5a6SPeter Wemm  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22dba6c5a6SPeter Wemm  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23dba6c5a6SPeter Wemm  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24dba6c5a6SPeter Wemm  * SUCH DAMAGE.
25dba6c5a6SPeter Wemm  */
26dba6c5a6SPeter Wemm 
27e602ba25SJulian Elischer /***
28e602ba25SJulian Elischer Here is the logic..
29e602ba25SJulian Elischer 
30e602ba25SJulian Elischer If there are N processors, then there are at most N KSEs (kernel
31e602ba25SJulian Elischer schedulable entities) working to process threads that belong to a
3209a4a69cSRobert Watson KSEGROUP (kg). If there are X of these KSEs actually running at the
33e602ba25SJulian Elischer moment in question, then there are at most M (N-X) of these KSEs on
34e602ba25SJulian Elischer the run queue, as running KSEs are not on the queue.
35e602ba25SJulian Elischer 
36e602ba25SJulian Elischer Runnable threads are queued off the KSEGROUP in priority order.
37e602ba25SJulian Elischer If there are M or more threads runnable, the top M threads
38e602ba25SJulian Elischer (by priority) are 'preassigned' to the M KSEs not running. The KSEs take
39e602ba25SJulian Elischer their priority from those threads and are put on the run queue.
40e602ba25SJulian Elischer 
41e602ba25SJulian Elischer The last thread that had a priority high enough to have a KSE associated
42e602ba25SJulian Elischer with it, AND IS ON THE RUN QUEUE is pointed to by
43e602ba25SJulian Elischer kg->kg_last_assigned. If no threads queued off the KSEGROUP have KSEs
44e602ba25SJulian Elischer assigned as all the available KSEs are activly running, or because there
45e602ba25SJulian Elischer are no threads queued, that pointer is NULL.
46e602ba25SJulian Elischer 
47e602ba25SJulian Elischer When a KSE is removed from the run queue to become runnable, we know
48e602ba25SJulian Elischer it was associated with the highest priority thread in the queue (at the head
49e602ba25SJulian Elischer of the queue). If it is also the last assigned we know M was 1 and must
50e602ba25SJulian Elischer now be 0. Since the thread is no longer queued that pointer must be
51e602ba25SJulian Elischer removed from it. Since we know there were no more KSEs available,
52e602ba25SJulian Elischer (M was 1 and is now 0) and since we are not FREEING our KSE
53e602ba25SJulian Elischer but using it, we know there are STILL no more KSEs available, we can prove
54e602ba25SJulian Elischer that the next thread in the ksegrp list will not have a KSE to assign to
55e602ba25SJulian Elischer it, so we can show that the pointer must be made 'invalid' (NULL).
56e602ba25SJulian Elischer 
57e602ba25SJulian Elischer The pointer exists so that when a new thread is made runnable, it can
58e602ba25SJulian Elischer have its priority compared with the last assigned thread to see if
59e602ba25SJulian Elischer it should 'steal' its KSE or not.. i.e. is it 'earlier'
60e602ba25SJulian Elischer on the list than that thread or later.. If it's earlier, then the KSE is
61e602ba25SJulian Elischer removed from the last assigned (which is now not assigned a KSE)
62e602ba25SJulian Elischer and reassigned to the new thread, which is placed earlier in the list.
63e602ba25SJulian Elischer The pointer is then backed up to the previous thread (which may or may not
64e602ba25SJulian Elischer be the new thread).
65e602ba25SJulian Elischer 
66e602ba25SJulian Elischer When a thread sleeps or is removed, the KSE becomes available and if there
67e602ba25SJulian Elischer are queued threads that are not assigned KSEs, the highest priority one of
68e602ba25SJulian Elischer them is assigned the KSE, which is then placed back on the run queue at
69e602ba25SJulian Elischer the approipriate place, and the kg->kg_last_assigned pointer is adjusted down
70e602ba25SJulian Elischer to point to it.
71e602ba25SJulian Elischer 
72e602ba25SJulian Elischer The following diagram shows 2 KSEs and 3 threads from a single process.
73e602ba25SJulian Elischer 
74e602ba25SJulian Elischer  RUNQ: --->KSE---KSE--...    (KSEs queued at priorities from threads)
75e602ba25SJulian Elischer               \    \____
76e602ba25SJulian Elischer                \        \
77e602ba25SJulian Elischer     KSEGROUP---thread--thread--thread    (queued in priority order)
78e602ba25SJulian Elischer         \                 /
79e602ba25SJulian Elischer          \_______________/
80e602ba25SJulian Elischer           (last_assigned)
81e602ba25SJulian Elischer 
82e602ba25SJulian Elischer The result of this scheme is that the M available KSEs are always
83e602ba25SJulian Elischer queued at the priorities they have inherrited from the M highest priority
84e602ba25SJulian Elischer threads for that KSEGROUP. If this situation changes, the KSEs are
85e602ba25SJulian Elischer reassigned to keep this true.
86677b542eSDavid E. O'Brien ***/
87e602ba25SJulian Elischer 
88677b542eSDavid E. O'Brien #include <sys/cdefs.h>
89677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$");
90e602ba25SJulian Elischer 
910c0b25aeSJohn Baldwin #include "opt_full_preemption.h"
926804a3abSJulian Elischer #include "opt_sched.h"
930c0b25aeSJohn Baldwin 
94dba6c5a6SPeter Wemm #include <sys/param.h>
95dba6c5a6SPeter Wemm #include <sys/systm.h>
962d50560aSMarcel Moolenaar #include <sys/kdb.h>
97dba6c5a6SPeter Wemm #include <sys/kernel.h>
980384fff8SJason Evans #include <sys/ktr.h>
99f34fa851SJohn Baldwin #include <sys/lock.h>
10035e0e5b3SJohn Baldwin #include <sys/mutex.h>
101dba6c5a6SPeter Wemm #include <sys/proc.h>
102dba6c5a6SPeter Wemm #include <sys/queue.h>
103b43179fbSJeff Roberson #include <sys/sched.h>
1040d2a2989SPeter Wemm #if defined(SMP) && (defined(__i386__) || defined(__amd64__))
105cc66ebe2SPeter Wemm #include <sys/smp.h>
106cc66ebe2SPeter Wemm #endif
107182da820SMatthew Dillon #include <machine/critical.h>
1086804a3abSJulian Elischer #if defined(SMP) && defined(SCHED_4BSD)
1096804a3abSJulian Elischer #include <sys/sysctl.h>
1106804a3abSJulian Elischer #endif
1116804a3abSJulian Elischer 
112dba6c5a6SPeter Wemm 
113d2ac2316SJake Burkholder CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
114d2ac2316SJake Burkholder 
11548bfcdddSJulian Elischer void panc(char *string1, char *string2);
11648bfcdddSJulian Elischer 
11748bfcdddSJulian Elischer #if 0
118e602ba25SJulian Elischer static void runq_readjust(struct runq *rq, struct kse *ke);
11948bfcdddSJulian Elischer #endif
120e602ba25SJulian Elischer /************************************************************************
121e602ba25SJulian Elischer  * Functions that manipulate runnability from a thread perspective.	*
122e602ba25SJulian Elischer  ************************************************************************/
123e602ba25SJulian Elischer /*
1245215b187SJeff Roberson  * Select the KSE that will be run next.  From that find the thread, and
125e602ba25SJulian Elischer  * remove it from the KSEGRP's run queue.  If there is thread clustering,
126e602ba25SJulian Elischer  * this will be what does it.
127e602ba25SJulian Elischer  */
128b40ce416SJulian Elischer struct thread *
129b40ce416SJulian Elischer choosethread(void)
130dba6c5a6SPeter Wemm {
131e602ba25SJulian Elischer 	struct kse *ke;
132e602ba25SJulian Elischer 	struct thread *td;
133e602ba25SJulian Elischer 	struct ksegrp *kg;
134e602ba25SJulian Elischer 
1350d2a2989SPeter Wemm #if defined(SMP) && (defined(__i386__) || defined(__amd64__))
136cc66ebe2SPeter Wemm 	if (smp_active == 0 && PCPU_GET(cpuid) != 0) {
137cc66ebe2SPeter Wemm 		/* Shutting down, run idlethread on AP's */
138cc66ebe2SPeter Wemm 		td = PCPU_GET(idlethread);
139cc66ebe2SPeter Wemm 		ke = td->td_kse;
140cc66ebe2SPeter Wemm 		CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
141cc66ebe2SPeter Wemm 		ke->ke_flags |= KEF_DIDRUN;
142cc66ebe2SPeter Wemm 		TD_SET_RUNNING(td);
143cc66ebe2SPeter Wemm 		return (td);
144cc66ebe2SPeter Wemm 	}
145cc66ebe2SPeter Wemm #endif
146cc66ebe2SPeter Wemm 
147fe799533SAndrew Gallatin retry:
148cc66ebe2SPeter Wemm 	ke = sched_choose();
149cc66ebe2SPeter Wemm 	if (ke) {
150e602ba25SJulian Elischer 		td = ke->ke_thread;
151e602ba25SJulian Elischer 		KASSERT((td->td_kse == ke), ("kse/thread mismatch"));
152e602ba25SJulian Elischer 		kg = ke->ke_ksegrp;
1530e2a4d3aSDavid Xu 		if (td->td_proc->p_flag & P_SA) {
15433c06e1dSJulian Elischer 			if (kg->kg_last_assigned == td) {
155e602ba25SJulian Elischer 				kg->kg_last_assigned = TAILQ_PREV(td,
156e602ba25SJulian Elischer 				    threadqueue, td_runq);
15733c06e1dSJulian Elischer 			}
158d03c79eeSDavid Xu 			TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
159e602ba25SJulian Elischer 			kg->kg_runnable--;
1601a5cd27bSJulian Elischer 		}
161e602ba25SJulian Elischer 		CTR2(KTR_RUNQ, "choosethread: td=%p pri=%d",
162e602ba25SJulian Elischer 		    td, td->td_priority);
163e602ba25SJulian Elischer 	} else {
16440e55026SJulian Elischer 		/* Simulate runq_choose() having returned the idle thread */
165e602ba25SJulian Elischer 		td = PCPU_GET(idlethread);
166472be958SJulian Elischer 		ke = td->td_kse;
167e602ba25SJulian Elischer 		CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
168e602ba25SJulian Elischer 	}
169472be958SJulian Elischer 	ke->ke_flags |= KEF_DIDRUN;
17093a7aa79SJulian Elischer 
17193a7aa79SJulian Elischer 	/*
172faaa20f6SJulian Elischer 	 * If we are in panic, only allow system threads,
173faaa20f6SJulian Elischer 	 * plus the one we are running in, to be run.
17493a7aa79SJulian Elischer 	 */
175fe799533SAndrew Gallatin 	if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 &&
176faaa20f6SJulian Elischer 	    (td->td_flags & TDF_INPANIC) == 0)) {
177faaa20f6SJulian Elischer 		/* note that it is no longer on the run queue */
178faaa20f6SJulian Elischer 		TD_SET_CAN_RUN(td);
179fe799533SAndrew Gallatin 		goto retry;
180faaa20f6SJulian Elischer 	}
18193a7aa79SJulian Elischer 
18271fad9fdSJulian Elischer 	TD_SET_RUNNING(td);
183e602ba25SJulian Elischer 	return (td);
184e602ba25SJulian Elischer }
185e602ba25SJulian Elischer 
186e602ba25SJulian Elischer /*
1875215b187SJeff Roberson  * Given a surplus KSE, either assign a new runable thread to it
1885215b187SJeff Roberson  * (and put it in the run queue) or put it in the ksegrp's idle KSE list.
1894f6cfa45SDavid Xu  * Assumes that the original thread is not runnable.
190e602ba25SJulian Elischer  */
191e602ba25SJulian Elischer void
192e602ba25SJulian Elischer kse_reassign(struct kse *ke)
193e602ba25SJulian Elischer {
194e602ba25SJulian Elischer 	struct ksegrp *kg;
195e602ba25SJulian Elischer 	struct thread *td;
19648bfcdddSJulian Elischer 	struct thread *original;
197e602ba25SJulian Elischer 
19833c06e1dSJulian Elischer 	mtx_assert(&sched_lock, MA_OWNED);
1996f8132a8SJulian Elischer 	original = ke->ke_thread;
2005215b187SJeff Roberson 	KASSERT(original == NULL || TD_IS_INHIBITED(original),
2015215b187SJeff Roberson     	    ("reassigning KSE with runnable thread"));
2025215b187SJeff Roberson 	kg = ke->ke_ksegrp;
2036ce75196SDavid Xu 	if (original)
2045215b187SJeff Roberson 		original->td_kse = NULL;
205e602ba25SJulian Elischer 
206e602ba25SJulian Elischer 	/*
2076f8132a8SJulian Elischer 	 * Find the first unassigned thread
2086f8132a8SJulian Elischer 	 */
2095215b187SJeff Roberson 	if ((td = kg->kg_last_assigned) != NULL)
2106f8132a8SJulian Elischer 		td = TAILQ_NEXT(td, td_runq);
2115215b187SJeff Roberson 	else
2126f8132a8SJulian Elischer 		td = TAILQ_FIRST(&kg->kg_runq);
2136f8132a8SJulian Elischer 
2146f8132a8SJulian Elischer 	/*
2155215b187SJeff Roberson 	 * If we found one, assign it the kse, otherwise idle the kse.
216e602ba25SJulian Elischer 	 */
217e602ba25SJulian Elischer 	if (td) {
218e602ba25SJulian Elischer 		kg->kg_last_assigned = td;
219e602ba25SJulian Elischer 		td->td_kse = ke;
220e602ba25SJulian Elischer 		ke->ke_thread = td;
221e602ba25SJulian Elischer 		CTR2(KTR_RUNQ, "kse_reassign: ke%p -> td%p", ke, td);
2222630e4c9SJulian Elischer 		sched_add(td, SRQ_BORING);
22348bfcdddSJulian Elischer 		return;
22448bfcdddSJulian Elischer 	}
22548bfcdddSJulian Elischer 
2265215b187SJeff Roberson 	ke->ke_state = KES_IDLE;
22793a7aa79SJulian Elischer 	ke->ke_thread = NULL;
2285215b187SJeff Roberson 	TAILQ_INSERT_TAIL(&kg->kg_iq, ke, ke_kgrlist);
2295215b187SJeff Roberson 	kg->kg_idle_kses++;
2305215b187SJeff Roberson 	CTR1(KTR_RUNQ, "kse_reassign: ke%p on idle queue", ke);
23148bfcdddSJulian Elischer 	return;
232d5a08a60SJake Burkholder }
233d5a08a60SJake Burkholder 
2341f955e2dSJulian Elischer #if 0
235e602ba25SJulian Elischer /*
236e602ba25SJulian Elischer  * Remove a thread from its KSEGRP's run queue.
237e602ba25SJulian Elischer  * This in turn may remove it from a KSE if it was already assigned
238e602ba25SJulian Elischer  * to one, possibly causing a new thread to be assigned to the KSE
2395215b187SJeff Roberson  * and the KSE getting a new priority.
240e602ba25SJulian Elischer  */
2411f955e2dSJulian Elischer static void
242b40ce416SJulian Elischer remrunqueue(struct thread *td)
243d5a08a60SJake Burkholder {
24448bfcdddSJulian Elischer 	struct thread *td2, *td3;
245e602ba25SJulian Elischer 	struct ksegrp *kg;
246e602ba25SJulian Elischer 	struct kse *ke;
247e602ba25SJulian Elischer 
248e602ba25SJulian Elischer 	mtx_assert(&sched_lock, MA_OWNED);
24971fad9fdSJulian Elischer 	KASSERT((TD_ON_RUNQ(td)), ("remrunqueue: Bad state on run queue"));
250e602ba25SJulian Elischer 	kg = td->td_ksegrp;
251e602ba25SJulian Elischer 	ke = td->td_kse;
252e602ba25SJulian Elischer 	CTR1(KTR_RUNQ, "remrunqueue: td%p", td);
25371fad9fdSJulian Elischer 	TD_SET_CAN_RUN(td);
2545215b187SJeff Roberson 	/*
2555215b187SJeff Roberson 	 * If it is not a threaded process, take the shortcut.
2565215b187SJeff Roberson 	 */
2570e2a4d3aSDavid Xu 	if ((td->td_proc->p_flag & P_SA) == 0) {
258e602ba25SJulian Elischer 		/* Bring its kse with it, leave the thread attached */
2597cf90fb3SJeff Roberson 		sched_rem(td);
260c3b98db0SJulian Elischer 		ke->ke_state = KES_THREAD;
261e602ba25SJulian Elischer 		return;
262d5a08a60SJake Burkholder 	}
26348bfcdddSJulian Elischer    	td3 = TAILQ_PREV(td, threadqueue, td_runq);
26448bfcdddSJulian Elischer 	TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
2651a5cd27bSJulian Elischer 	kg->kg_runnable--;
266e602ba25SJulian Elischer 	if (ke) {
267e602ba25SJulian Elischer 		/*
268e602ba25SJulian Elischer 		 * This thread has been assigned to a KSE.
269e602ba25SJulian Elischer 		 * We need to dissociate it and try assign the
270e602ba25SJulian Elischer 		 * KSE to the next available thread. Then, we should
271e602ba25SJulian Elischer 		 * see if we need to move the KSE in the run queues.
272e602ba25SJulian Elischer 		 */
2737cf90fb3SJeff Roberson 		sched_rem(td);
27493a7aa79SJulian Elischer 		ke->ke_state = KES_THREAD;
275e602ba25SJulian Elischer 		td2 = kg->kg_last_assigned;
276e602ba25SJulian Elischer 		KASSERT((td2 != NULL), ("last assigned has wrong value"));
27748bfcdddSJulian Elischer 		if (td2 == td)
278e602ba25SJulian Elischer 			kg->kg_last_assigned = td3;
27948bfcdddSJulian Elischer 		kse_reassign(ke);
280e602ba25SJulian Elischer 	}
281e602ba25SJulian Elischer }
2821f955e2dSJulian Elischer #endif
2831f955e2dSJulian Elischer 
2841f955e2dSJulian Elischer /*
2851f955e2dSJulian Elischer  * Change the priority of a thread that is on the run queue.
2861f955e2dSJulian Elischer  */
2871f955e2dSJulian Elischer void
2881f955e2dSJulian Elischer adjustrunqueue( struct thread *td, int newpri)
2891f955e2dSJulian Elischer {
2901f955e2dSJulian Elischer 	struct ksegrp *kg;
2911f955e2dSJulian Elischer 	struct kse *ke;
2921f955e2dSJulian Elischer 
2931f955e2dSJulian Elischer 	mtx_assert(&sched_lock, MA_OWNED);
2941f955e2dSJulian Elischer 	KASSERT((TD_ON_RUNQ(td)), ("adjustrunqueue: Bad state on run queue"));
2955215b187SJeff Roberson 
2961f955e2dSJulian Elischer 	ke = td->td_kse;
2971f955e2dSJulian Elischer 	CTR1(KTR_RUNQ, "adjustrunqueue: td%p", td);
2985215b187SJeff Roberson 	/*
2995215b187SJeff Roberson 	 * If it is not a threaded process, take the shortcut.
3005215b187SJeff Roberson 	 */
3010e2a4d3aSDavid Xu 	if ((td->td_proc->p_flag & P_SA) == 0) {
3021f955e2dSJulian Elischer 		/* We only care about the kse in the run queue. */
30324c5baaeSJulian Elischer 		td->td_priority = newpri;
3041f955e2dSJulian Elischer 		if (ke->ke_rqindex != (newpri / RQ_PPQ)) {
3057cf90fb3SJeff Roberson 			sched_rem(td);
3062630e4c9SJulian Elischer 			sched_add(td, SRQ_BORING);
3071f955e2dSJulian Elischer 		}
3081f955e2dSJulian Elischer 		return;
3091f955e2dSJulian Elischer 	}
3105215b187SJeff Roberson 
3115215b187SJeff Roberson 	/* It is a threaded process */
3121f955e2dSJulian Elischer 	kg = td->td_ksegrp;
3131f955e2dSJulian Elischer 	TD_SET_CAN_RUN(td);
3141f955e2dSJulian Elischer 	if (ke) {
3151f955e2dSJulian Elischer 		if (kg->kg_last_assigned == td) {
3161f955e2dSJulian Elischer 			kg->kg_last_assigned =
3171f955e2dSJulian Elischer 			    TAILQ_PREV(td, threadqueue, td_runq);
3181f955e2dSJulian Elischer 		}
3197cf90fb3SJeff Roberson 		sched_rem(td);
3201f955e2dSJulian Elischer 	}
3211f955e2dSJulian Elischer 	TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
3221a5cd27bSJulian Elischer 	kg->kg_runnable--;
3231f955e2dSJulian Elischer 	td->td_priority = newpri;
3242630e4c9SJulian Elischer 	setrunqueue(td, SRQ_BORING);
3251f955e2dSJulian Elischer }
326e602ba25SJulian Elischer 
327d5a08a60SJake Burkholder void
3282630e4c9SJulian Elischer setrunqueue(struct thread *td, int flags)
329d5a08a60SJake Burkholder {
330e602ba25SJulian Elischer 	struct kse *ke;
331e602ba25SJulian Elischer 	struct ksegrp *kg;
332e602ba25SJulian Elischer 	struct thread *td2;
333e602ba25SJulian Elischer 	struct thread *tda;
3340f4ad918SScott Long 	int count;
335e602ba25SJulian Elischer 
336732d9528SJulian Elischer 	CTR4(KTR_RUNQ, "setrunqueue: td:%p ke:%p kg:%p pid:%d",
337732d9528SJulian Elischer 	    td, td->td_kse, td->td_ksegrp, td->td_proc->p_pid);
338e602ba25SJulian Elischer 	mtx_assert(&sched_lock, MA_OWNED);
33971fad9fdSJulian Elischer 	KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)),
34071fad9fdSJulian Elischer 	    ("setrunqueue: bad thread state"));
34171fad9fdSJulian Elischer 	TD_SET_RUNQ(td);
342e602ba25SJulian Elischer 	kg = td->td_ksegrp;
3430e2a4d3aSDavid Xu 	if ((td->td_proc->p_flag & P_SA) == 0) {
34448bfcdddSJulian Elischer 		/*
34548bfcdddSJulian Elischer 		 * Common path optimisation: Only one of everything
34648bfcdddSJulian Elischer 		 * and the KSE is always already attached.
34748bfcdddSJulian Elischer 		 * Totally ignore the ksegrp run queue.
34848bfcdddSJulian Elischer 		 */
3492630e4c9SJulian Elischer 		sched_add(td, flags);
35048bfcdddSJulian Elischer 		return;
35148bfcdddSJulian Elischer 	}
35248bfcdddSJulian Elischer 
353e602ba25SJulian Elischer 	tda = kg->kg_last_assigned;
354e602ba25SJulian Elischer 	if ((ke = td->td_kse) == NULL) {
3555215b187SJeff Roberson 		if (kg->kg_idle_kses) {
356e602ba25SJulian Elischer 			/*
3575215b187SJeff Roberson 			 * There is a free one so it's ours for the asking..
358e602ba25SJulian Elischer 			 */
3595215b187SJeff Roberson 			ke = TAILQ_FIRST(&kg->kg_iq);
360732d9528SJulian Elischer 			CTR2(KTR_RUNQ, "setrunqueue: kg:%p: Use free ke:%p",
361732d9528SJulian Elischer 			    kg, ke);
3625215b187SJeff Roberson 			TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
3639eb1fdeaSJulian Elischer 			ke->ke_state = KES_THREAD;
3645215b187SJeff Roberson 			kg->kg_idle_kses--;
365e602ba25SJulian Elischer 		} else if (tda && (tda->td_priority > td->td_priority)) {
366e602ba25SJulian Elischer 			/*
367e602ba25SJulian Elischer 			 * None free, but there is one we can commandeer.
368e602ba25SJulian Elischer 			 */
369e602ba25SJulian Elischer 			ke = tda->td_kse;
370732d9528SJulian Elischer 			CTR3(KTR_RUNQ,
371732d9528SJulian Elischer 			    "setrunqueue: kg:%p: take ke:%p from td: %p",
372732d9528SJulian Elischer 			    kg, ke, tda);
37394816f6dSJeff Roberson 			sched_rem(tda);
374e602ba25SJulian Elischer 			tda->td_kse = NULL;
375e602ba25SJulian Elischer 			ke->ke_thread = NULL;
376e602ba25SJulian Elischer 			tda = kg->kg_last_assigned =
377e602ba25SJulian Elischer 		    	    TAILQ_PREV(tda, threadqueue, td_runq);
378e602ba25SJulian Elischer 		}
379e602ba25SJulian Elischer 	} else {
380c3b98db0SJulian Elischer 		/*
381c3b98db0SJulian Elischer 		 * Temporarily disassociate so it looks like the other cases.
382c3b98db0SJulian Elischer 		 */
383e602ba25SJulian Elischer 		ke->ke_thread = NULL;
384e602ba25SJulian Elischer 		td->td_kse = NULL;
385d5a08a60SJake Burkholder 	}
386d5a08a60SJake Burkholder 
387e602ba25SJulian Elischer 	/*
388e602ba25SJulian Elischer 	 * Add the thread to the ksegrp's run queue at
389e602ba25SJulian Elischer 	 * the appropriate place.
390e602ba25SJulian Elischer 	 */
3910f4ad918SScott Long 	count = 0;
392e602ba25SJulian Elischer 	TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
393e602ba25SJulian Elischer 		if (td2->td_priority > td->td_priority) {
3941a5cd27bSJulian Elischer 			kg->kg_runnable++;
395e602ba25SJulian Elischer 			TAILQ_INSERT_BEFORE(td2, td, td_runq);
396e602ba25SJulian Elischer 			break;
397e602ba25SJulian Elischer 		}
3980f4ad918SScott Long 		/* XXX Debugging hack */
3990f4ad918SScott Long 		if (++count > 10000) {
4000f4ad918SScott Long 			printf("setrunqueue(): corrupt kq_runq, td= %p\n", td);
4010f4ad918SScott Long 			panic("deadlock in setrunqueue");
4020f4ad918SScott Long 		}
403e602ba25SJulian Elischer 	}
404e602ba25SJulian Elischer 	if (td2 == NULL) {
405e602ba25SJulian Elischer 		/* We ran off the end of the TAILQ or it was empty. */
4061a5cd27bSJulian Elischer 		kg->kg_runnable++;
407e602ba25SJulian Elischer 		TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq);
408e602ba25SJulian Elischer 	}
409e602ba25SJulian Elischer 
410e602ba25SJulian Elischer 	/*
411e602ba25SJulian Elischer 	 * If we have a ke to use, then put it on the run queue and
412e602ba25SJulian Elischer 	 * If needed, readjust the last_assigned pointer.
413e602ba25SJulian Elischer 	 */
414e602ba25SJulian Elischer 	if (ke) {
415e602ba25SJulian Elischer 		if (tda == NULL) {
416e602ba25SJulian Elischer 			/*
417e602ba25SJulian Elischer 			 * No pre-existing last assigned so whoever is first
418c3b98db0SJulian Elischer 			 * gets the KSE we brought in.. (maybe us)
419e602ba25SJulian Elischer 			 */
420e602ba25SJulian Elischer 			td2 = TAILQ_FIRST(&kg->kg_runq);
421e602ba25SJulian Elischer 			KASSERT((td2->td_kse == NULL),
422e602ba25SJulian Elischer 			    ("unexpected ke present"));
423e602ba25SJulian Elischer 			td2->td_kse = ke;
424e602ba25SJulian Elischer 			ke->ke_thread = td2;
425e602ba25SJulian Elischer 			kg->kg_last_assigned = td2;
426e602ba25SJulian Elischer 		} else if (tda->td_priority > td->td_priority) {
427e602ba25SJulian Elischer 			/*
428e602ba25SJulian Elischer 			 * It's ours, grab it, but last_assigned is past us
429e602ba25SJulian Elischer 			 * so don't change it.
430e602ba25SJulian Elischer 			 */
431e602ba25SJulian Elischer 			td->td_kse = ke;
432e602ba25SJulian Elischer 			ke->ke_thread = td;
433e602ba25SJulian Elischer 		} else {
434e602ba25SJulian Elischer 			/*
435e602ba25SJulian Elischer 			 * We are past last_assigned, so
436e602ba25SJulian Elischer 			 * put the new kse on whatever is next,
437e602ba25SJulian Elischer 			 * which may or may not be us.
438e602ba25SJulian Elischer 			 */
439e602ba25SJulian Elischer 			td2 = TAILQ_NEXT(tda, td_runq);
440e602ba25SJulian Elischer 			kg->kg_last_assigned = td2;
441e602ba25SJulian Elischer 			td2->td_kse = ke;
442e602ba25SJulian Elischer 			ke->ke_thread = td2;
443e602ba25SJulian Elischer 		}
4442630e4c9SJulian Elischer 		sched_add(ke->ke_thread, flags);
445732d9528SJulian Elischer 	} else {
446732d9528SJulian Elischer 		CTR3(KTR_RUNQ, "setrunqueue: held: td%p kg%p pid%d",
447732d9528SJulian Elischer 			td, td->td_ksegrp, td->td_proc->p_pid);
448e602ba25SJulian Elischer 	}
449e602ba25SJulian Elischer }
450e602ba25SJulian Elischer 
4510c0b25aeSJohn Baldwin /*
4520c0b25aeSJohn Baldwin  * Kernel thread preemption implementation.  Critical sections mark
4530c0b25aeSJohn Baldwin  * regions of code in which preemptions are not allowed.
4540c0b25aeSJohn Baldwin  */
4557e1f6dfeSJohn Baldwin void
4567e1f6dfeSJohn Baldwin critical_enter(void)
4577e1f6dfeSJohn Baldwin {
4587e1f6dfeSJohn Baldwin 	struct thread *td;
4597e1f6dfeSJohn Baldwin 
4607e1f6dfeSJohn Baldwin 	td = curthread;
4617e1f6dfeSJohn Baldwin 	if (td->td_critnest == 0)
4621a8cfbc4SRobert Watson 		cpu_critical_enter(td);
4637e1f6dfeSJohn Baldwin 	td->td_critnest++;
4647e1f6dfeSJohn Baldwin }
4657e1f6dfeSJohn Baldwin 
4667e1f6dfeSJohn Baldwin void
4677e1f6dfeSJohn Baldwin critical_exit(void)
4687e1f6dfeSJohn Baldwin {
4697e1f6dfeSJohn Baldwin 	struct thread *td;
4707e1f6dfeSJohn Baldwin 
4717e1f6dfeSJohn Baldwin 	td = curthread;
472b209e5e3SJeff Roberson 	KASSERT(td->td_critnest != 0,
473b209e5e3SJeff Roberson 	    ("critical_exit: td_critnest == 0"));
4747e1f6dfeSJohn Baldwin 	if (td->td_critnest == 1) {
4750c0b25aeSJohn Baldwin #ifdef PREEMPTION
47652eb8464SJohn Baldwin 		mtx_assert(&sched_lock, MA_NOTOWNED);
47752eb8464SJohn Baldwin 		if (td->td_pflags & TDP_OWEPREEMPT) {
4780c0b25aeSJohn Baldwin 			mtx_lock_spin(&sched_lock);
4790c0b25aeSJohn Baldwin 			mi_switch(SW_INVOL, NULL);
4800c0b25aeSJohn Baldwin 			mtx_unlock_spin(&sched_lock);
4810c0b25aeSJohn Baldwin 		}
4820c0b25aeSJohn Baldwin #endif
4837e1f6dfeSJohn Baldwin 		td->td_critnest = 0;
4841a8cfbc4SRobert Watson 		cpu_critical_exit(td);
485d74ac681SMatthew Dillon 	} else {
4867e1f6dfeSJohn Baldwin 		td->td_critnest--;
4877e1f6dfeSJohn Baldwin 	}
488d74ac681SMatthew Dillon }
4897e1f6dfeSJohn Baldwin 
4900c0b25aeSJohn Baldwin /*
4910c0b25aeSJohn Baldwin  * This function is called when a thread is about to be put on run queue
4920c0b25aeSJohn Baldwin  * because it has been made runnable or its priority has been adjusted.  It
4930c0b25aeSJohn Baldwin  * determines if the new thread should be immediately preempted to.  If so,
4940c0b25aeSJohn Baldwin  * it switches to it and eventually returns true.  If not, it returns false
4950c0b25aeSJohn Baldwin  * so that the caller may place the thread on an appropriate run queue.
4960c0b25aeSJohn Baldwin  */
4970c0b25aeSJohn Baldwin int
4980c0b25aeSJohn Baldwin maybe_preempt(struct thread *td)
4990c0b25aeSJohn Baldwin {
5008b44a2e2SMarcel Moolenaar #ifdef PREEMPTION
5010c0b25aeSJohn Baldwin 	struct thread *ctd;
5020c0b25aeSJohn Baldwin 	int cpri, pri;
5038b44a2e2SMarcel Moolenaar #endif
5040c0b25aeSJohn Baldwin 
5050c0b25aeSJohn Baldwin 	mtx_assert(&sched_lock, MA_OWNED);
5060c0b25aeSJohn Baldwin #ifdef PREEMPTION
5070c0b25aeSJohn Baldwin 	/*
5080c0b25aeSJohn Baldwin 	 * The new thread should not preempt the current thread if any of the
5090c0b25aeSJohn Baldwin 	 * following conditions are true:
5100c0b25aeSJohn Baldwin 	 *
51152eb8464SJohn Baldwin 	 *  - The current thread has a higher (numerically lower) or
51252eb8464SJohn Baldwin 	 *    equivalent priority.  Note that this prevents curthread from
51352eb8464SJohn Baldwin 	 *    trying to preempt to itself.
5140c0b25aeSJohn Baldwin 	 *  - It is too early in the boot for context switches (cold is set).
5150c0b25aeSJohn Baldwin 	 *  - The current thread has an inhibitor set or is in the process of
5160c0b25aeSJohn Baldwin 	 *    exiting.  In this case, the current thread is about to switch
5170c0b25aeSJohn Baldwin 	 *    out anyways, so there's no point in preempting.  If we did,
5180c0b25aeSJohn Baldwin 	 *    the current thread would not be properly resumed as well, so
5190c0b25aeSJohn Baldwin 	 *    just avoid that whole landmine.
5200c0b25aeSJohn Baldwin 	 *  - If the new thread's priority is not a realtime priority and
5210c0b25aeSJohn Baldwin 	 *    the current thread's priority is not an idle priority and
5220c0b25aeSJohn Baldwin 	 *    FULL_PREEMPTION is disabled.
5230c0b25aeSJohn Baldwin 	 *
5240c0b25aeSJohn Baldwin 	 * If all of these conditions are false, but the current thread is in
5250c0b25aeSJohn Baldwin 	 * a nested critical section, then we have to defer the preemption
5260c0b25aeSJohn Baldwin 	 * until we exit the critical section.  Otherwise, switch immediately
5270c0b25aeSJohn Baldwin 	 * to the new thread.
5280c0b25aeSJohn Baldwin 	 */
5290c0b25aeSJohn Baldwin 	ctd = curthread;
5306f96710cSPeter Wemm 	if (ctd->td_kse == NULL || ctd->td_kse->ke_thread != ctd)
5316f96710cSPeter Wemm 		return (0);
5320c0b25aeSJohn Baldwin 	pri = td->td_priority;
5330c0b25aeSJohn Baldwin 	cpri = ctd->td_priority;
5340c0b25aeSJohn Baldwin 	if (pri >= cpri || cold /* || dumping */ || TD_IS_INHIBITED(ctd) ||
5350c0b25aeSJohn Baldwin 	    td->td_kse->ke_state != KES_THREAD)
5360c0b25aeSJohn Baldwin 		return (0);
5370c0b25aeSJohn Baldwin #ifndef FULL_PREEMPTION
5380c0b25aeSJohn Baldwin 	if (!(pri >= PRI_MIN_ITHD && pri <= PRI_MAX_ITHD) &&
5390c0b25aeSJohn Baldwin 	    !(cpri >= PRI_MIN_IDLE))
5400c0b25aeSJohn Baldwin 		return (0);
5410c0b25aeSJohn Baldwin #endif
5420c0b25aeSJohn Baldwin 	if (ctd->td_critnest > 1) {
5430c0b25aeSJohn Baldwin 		CTR1(KTR_PROC, "maybe_preempt: in critical section %d",
5440c0b25aeSJohn Baldwin 		    ctd->td_critnest);
54552eb8464SJohn Baldwin 		ctd->td_pflags |= TDP_OWEPREEMPT;
5460c0b25aeSJohn Baldwin 		return (0);
5470c0b25aeSJohn Baldwin 	}
5480c0b25aeSJohn Baldwin 
5490c0b25aeSJohn Baldwin 	/*
5500c0b25aeSJohn Baldwin 	 * Our thread state says that we are already on a run queue, so
5510c0b25aeSJohn Baldwin 	 * update our state as if we had been dequeued by choosethread().
5520c0b25aeSJohn Baldwin 	 */
5530c0b25aeSJohn Baldwin 	MPASS(TD_ON_RUNQ(td));
5540c0b25aeSJohn Baldwin 	TD_SET_RUNNING(td);
5550c0b25aeSJohn Baldwin 	CTR3(KTR_PROC, "preempting to thread %p (pid %d, %s)\n", td,
5560c0b25aeSJohn Baldwin 	    td->td_proc->p_pid, td->td_proc->p_comm);
5570c0b25aeSJohn Baldwin 	mi_switch(SW_INVOL, td);
5580c0b25aeSJohn Baldwin 	return (1);
5590c0b25aeSJohn Baldwin #else
5600c0b25aeSJohn Baldwin 	return (0);
5610c0b25aeSJohn Baldwin #endif
5620c0b25aeSJohn Baldwin }
5630c0b25aeSJohn Baldwin 
56444fe3c1fSJohn Baldwin #if 0
5650c0b25aeSJohn Baldwin #ifndef PREEMPTION
5660c0b25aeSJohn Baldwin /* XXX: There should be a non-static version of this. */
5670c0b25aeSJohn Baldwin static void
5680c0b25aeSJohn Baldwin printf_caddr_t(void *data)
5690c0b25aeSJohn Baldwin {
5700c0b25aeSJohn Baldwin 	printf("%s", (char *)data);
5710c0b25aeSJohn Baldwin }
5720c0b25aeSJohn Baldwin static char preempt_warning[] =
5730c0b25aeSJohn Baldwin     "WARNING: Kernel preemption is disabled, expect reduced performance.\n";
5740c0b25aeSJohn Baldwin SYSINIT(preempt_warning, SI_SUB_COPYRIGHT, SI_ORDER_ANY, printf_caddr_t,
5750c0b25aeSJohn Baldwin     preempt_warning)
5760c0b25aeSJohn Baldwin #endif
57744fe3c1fSJohn Baldwin #endif
578e602ba25SJulian Elischer 
579e602ba25SJulian Elischer /************************************************************************
580e602ba25SJulian Elischer  * SYSTEM RUN QUEUE manipulations and tests				*
581e602ba25SJulian Elischer  ************************************************************************/
582e602ba25SJulian Elischer /*
583e602ba25SJulian Elischer  * Initialize a run structure.
584e602ba25SJulian Elischer  */
585e602ba25SJulian Elischer void
586e602ba25SJulian Elischer runq_init(struct runq *rq)
587e602ba25SJulian Elischer {
588e602ba25SJulian Elischer 	int i;
589e602ba25SJulian Elischer 
590e602ba25SJulian Elischer 	bzero(rq, sizeof *rq);
591e602ba25SJulian Elischer 	for (i = 0; i < RQ_NQS; i++)
592e602ba25SJulian Elischer 		TAILQ_INIT(&rq->rq_queues[i]);
593e602ba25SJulian Elischer }
594e602ba25SJulian Elischer 
595d5a08a60SJake Burkholder /*
596d5a08a60SJake Burkholder  * Clear the status bit of the queue corresponding to priority level pri,
597d5a08a60SJake Burkholder  * indicating that it is empty.
598d5a08a60SJake Burkholder  */
599d5a08a60SJake Burkholder static __inline void
600d5a08a60SJake Burkholder runq_clrbit(struct runq *rq, int pri)
601d5a08a60SJake Burkholder {
602d5a08a60SJake Burkholder 	struct rqbits *rqb;
603d5a08a60SJake Burkholder 
604d5a08a60SJake Burkholder 	rqb = &rq->rq_status;
605d5a08a60SJake Burkholder 	CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d",
606d5a08a60SJake Burkholder 	    rqb->rqb_bits[RQB_WORD(pri)],
607d5a08a60SJake Burkholder 	    rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri),
608d5a08a60SJake Burkholder 	    RQB_BIT(pri), RQB_WORD(pri));
609d5a08a60SJake Burkholder 	rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri);
610d5a08a60SJake Burkholder }
611d5a08a60SJake Burkholder 
612d5a08a60SJake Burkholder /*
613d5a08a60SJake Burkholder  * Find the index of the first non-empty run queue.  This is done by
614d5a08a60SJake Burkholder  * scanning the status bits, a set bit indicates a non-empty queue.
615d5a08a60SJake Burkholder  */
616d5a08a60SJake Burkholder static __inline int
617d5a08a60SJake Burkholder runq_findbit(struct runq *rq)
618d5a08a60SJake Burkholder {
619d5a08a60SJake Burkholder 	struct rqbits *rqb;
620d5a08a60SJake Burkholder 	int pri;
621d5a08a60SJake Burkholder 	int i;
622d5a08a60SJake Burkholder 
623d5a08a60SJake Burkholder 	rqb = &rq->rq_status;
624d5a08a60SJake Burkholder 	for (i = 0; i < RQB_LEN; i++)
625d5a08a60SJake Burkholder 		if (rqb->rqb_bits[i]) {
6262f9267ecSPeter Wemm 			pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW);
627d5a08a60SJake Burkholder 			CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d",
628d5a08a60SJake Burkholder 			    rqb->rqb_bits[i], i, pri);
629d5a08a60SJake Burkholder 			return (pri);
630d5a08a60SJake Burkholder 		}
631d5a08a60SJake Burkholder 
632d5a08a60SJake Burkholder 	return (-1);
633d5a08a60SJake Burkholder }
634d5a08a60SJake Burkholder 
635d5a08a60SJake Burkholder /*
636d5a08a60SJake Burkholder  * Set the status bit of the queue corresponding to priority level pri,
637d5a08a60SJake Burkholder  * indicating that it is non-empty.
638d5a08a60SJake Burkholder  */
639d5a08a60SJake Burkholder static __inline void
640d5a08a60SJake Burkholder runq_setbit(struct runq *rq, int pri)
641d5a08a60SJake Burkholder {
642d5a08a60SJake Burkholder 	struct rqbits *rqb;
643d5a08a60SJake Burkholder 
644d5a08a60SJake Burkholder 	rqb = &rq->rq_status;
645d5a08a60SJake Burkholder 	CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d",
646d5a08a60SJake Burkholder 	    rqb->rqb_bits[RQB_WORD(pri)],
647d5a08a60SJake Burkholder 	    rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri),
648d5a08a60SJake Burkholder 	    RQB_BIT(pri), RQB_WORD(pri));
649d5a08a60SJake Burkholder 	rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri);
650d5a08a60SJake Burkholder }
651d5a08a60SJake Burkholder 
652d5a08a60SJake Burkholder /*
653e602ba25SJulian Elischer  * Add the KSE to the queue specified by its priority, and set the
654d5a08a60SJake Burkholder  * corresponding status bit.
655d5a08a60SJake Burkholder  */
656d5a08a60SJake Burkholder void
657b40ce416SJulian Elischer runq_add(struct runq *rq, struct kse *ke)
658d5a08a60SJake Burkholder {
659d5a08a60SJake Burkholder 	struct rqhead *rqh;
660d5a08a60SJake Burkholder 	int pri;
661dba6c5a6SPeter Wemm 
6622c100766SJulian Elischer 	pri = ke->ke_thread->td_priority / RQ_PPQ;
663b40ce416SJulian Elischer 	ke->ke_rqindex = pri;
664d5a08a60SJake Burkholder 	runq_setbit(rq, pri);
665d5a08a60SJake Burkholder 	rqh = &rq->rq_queues[pri];
666732d9528SJulian Elischer 	CTR5(KTR_RUNQ, "runq_add: td=%p ke=%p pri=%d %d rqh=%p",
667732d9528SJulian Elischer 	    ke->ke_thread, ke, ke->ke_thread->td_priority, pri, rqh);
668b40ce416SJulian Elischer 	TAILQ_INSERT_TAIL(rqh, ke, ke_procq);
669dba6c5a6SPeter Wemm }
670d5a08a60SJake Burkholder 
671d5a08a60SJake Burkholder /*
672d5a08a60SJake Burkholder  * Return true if there are runnable processes of any priority on the run
673d5a08a60SJake Burkholder  * queue, false otherwise.  Has no side effects, does not modify the run
674d5a08a60SJake Burkholder  * queue structure.
675d5a08a60SJake Burkholder  */
676d5a08a60SJake Burkholder int
677d5a08a60SJake Burkholder runq_check(struct runq *rq)
678d5a08a60SJake Burkholder {
679d5a08a60SJake Burkholder 	struct rqbits *rqb;
680d5a08a60SJake Burkholder 	int i;
681d5a08a60SJake Burkholder 
682d5a08a60SJake Burkholder 	rqb = &rq->rq_status;
683d5a08a60SJake Burkholder 	for (i = 0; i < RQB_LEN; i++)
684d5a08a60SJake Burkholder 		if (rqb->rqb_bits[i]) {
685d5a08a60SJake Burkholder 			CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d",
686d5a08a60SJake Burkholder 			    rqb->rqb_bits[i], i);
687d5a08a60SJake Burkholder 			return (1);
688dba6c5a6SPeter Wemm 		}
689d5a08a60SJake Burkholder 	CTR0(KTR_RUNQ, "runq_check: empty");
690d5a08a60SJake Burkholder 
691d5a08a60SJake Burkholder 	return (0);
692dba6c5a6SPeter Wemm }
693d5a08a60SJake Burkholder 
6946804a3abSJulian Elischer #if defined(SMP) && defined(SCHED_4BSD)
6956804a3abSJulian Elischer int runq_fuzz = 1;
6966804a3abSJulian Elischer SYSCTL_DECL(_kern_sched);
6976804a3abSJulian Elischer SYSCTL_INT(_kern_sched, OID_AUTO, runq_fuzz, CTLFLAG_RW, &runq_fuzz, 0, "");
6986804a3abSJulian Elischer #endif
6996804a3abSJulian Elischer 
700d5a08a60SJake Burkholder /*
701b43179fbSJeff Roberson  * Find the highest priority process on the run queue.
702d5a08a60SJake Burkholder  */
703b40ce416SJulian Elischer struct kse *
704d5a08a60SJake Burkholder runq_choose(struct runq *rq)
705d5a08a60SJake Burkholder {
706d5a08a60SJake Burkholder 	struct rqhead *rqh;
707b40ce416SJulian Elischer 	struct kse *ke;
708d5a08a60SJake Burkholder 	int pri;
709d5a08a60SJake Burkholder 
710d5a08a60SJake Burkholder 	mtx_assert(&sched_lock, MA_OWNED);
711e602ba25SJulian Elischer 	while ((pri = runq_findbit(rq)) != -1) {
712d5a08a60SJake Burkholder 		rqh = &rq->rq_queues[pri];
7136804a3abSJulian Elischer #if defined(SMP) && defined(SCHED_4BSD)
7146804a3abSJulian Elischer 		/* fuzz == 1 is normal.. 0 or less are ignored */
7156804a3abSJulian Elischer 		if (runq_fuzz > 1) {
7166804a3abSJulian Elischer 			/*
7176804a3abSJulian Elischer 			 * In the first couple of entries, check if
7186804a3abSJulian Elischer 			 * there is one for our CPU as a preference.
7196804a3abSJulian Elischer 			 */
7206804a3abSJulian Elischer 			int count = runq_fuzz;
7216804a3abSJulian Elischer 			int cpu = PCPU_GET(cpuid);
7226804a3abSJulian Elischer 			struct kse *ke2;
7236804a3abSJulian Elischer 			ke2 = ke = TAILQ_FIRST(rqh);
7246804a3abSJulian Elischer 
7256804a3abSJulian Elischer 			while (count-- && ke2) {
7266804a3abSJulian Elischer 				if (ke->ke_thread->td_lastcpu == cpu) {
7276804a3abSJulian Elischer 					ke = ke2;
7286804a3abSJulian Elischer 					break;
7296804a3abSJulian Elischer 				}
7306804a3abSJulian Elischer 				ke2 = TAILQ_NEXT(ke2, ke_procq);
7316804a3abSJulian Elischer 			}
7326804a3abSJulian Elischer 		} else
7336804a3abSJulian Elischer #endif
734b40ce416SJulian Elischer 			ke = TAILQ_FIRST(rqh);
735b40ce416SJulian Elischer 		KASSERT(ke != NULL, ("runq_choose: no proc on busy queue"));
736e602ba25SJulian Elischer 		CTR3(KTR_RUNQ,
737e602ba25SJulian Elischer 		    "runq_choose: pri=%d kse=%p rqh=%p", pri, ke, rqh);
738b40ce416SJulian Elischer 		return (ke);
739d5a08a60SJake Burkholder 	}
740d5a08a60SJake Burkholder 	CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri);
741d5a08a60SJake Burkholder 
742e602ba25SJulian Elischer 	return (NULL);
743d5a08a60SJake Burkholder }
744d5a08a60SJake Burkholder 
745d5a08a60SJake Burkholder /*
746e602ba25SJulian Elischer  * Remove the KSE from the queue specified by its priority, and clear the
747d5a08a60SJake Burkholder  * corresponding status bit if the queue becomes empty.
748e602ba25SJulian Elischer  * Caller must set ke->ke_state afterwards.
749d5a08a60SJake Burkholder  */
750d5a08a60SJake Burkholder void
751b40ce416SJulian Elischer runq_remove(struct runq *rq, struct kse *ke)
752d5a08a60SJake Burkholder {
753d5a08a60SJake Burkholder 	struct rqhead *rqh;
754d5a08a60SJake Burkholder 	int pri;
755d5a08a60SJake Burkholder 
7569eb881f8SSeigo Tanimura 	KASSERT(ke->ke_proc->p_sflag & PS_INMEM,
7579eb881f8SSeigo Tanimura 		("runq_remove: process swapped out"));
758b40ce416SJulian Elischer 	pri = ke->ke_rqindex;
759d5a08a60SJake Burkholder 	rqh = &rq->rq_queues[pri];
760732d9528SJulian Elischer 	CTR5(KTR_RUNQ, "runq_remove: td=%p, ke=%p pri=%d %d rqh=%p",
761732d9528SJulian Elischer 	    ke->ke_thread, ke, ke->ke_thread->td_priority, pri, rqh);
762b40ce416SJulian Elischer 	KASSERT(ke != NULL, ("runq_remove: no proc on busy queue"));
763b40ce416SJulian Elischer 	TAILQ_REMOVE(rqh, ke, ke_procq);
764d5a08a60SJake Burkholder 	if (TAILQ_EMPTY(rqh)) {
765d5a08a60SJake Burkholder 		CTR0(KTR_RUNQ, "runq_remove: empty");
766d5a08a60SJake Burkholder 		runq_clrbit(rq, pri);
767d5a08a60SJake Burkholder 	}
768dba6c5a6SPeter Wemm }
769e602ba25SJulian Elischer 
77048bfcdddSJulian Elischer #if 0
771e602ba25SJulian Elischer void
77248bfcdddSJulian Elischer panc(char *string1, char *string2)
77348bfcdddSJulian Elischer {
77448bfcdddSJulian Elischer 	printf("%s", string1);
7752d50560aSMarcel Moolenaar 	kdb_enter(string2);
77648bfcdddSJulian Elischer }
77748bfcdddSJulian Elischer 
77848bfcdddSJulian Elischer void
77948bfcdddSJulian Elischer thread_sanity_check(struct thread *td, char *string)
780e602ba25SJulian Elischer {
781e602ba25SJulian Elischer 	struct proc *p;
782e602ba25SJulian Elischer 	struct ksegrp *kg;
783e602ba25SJulian Elischer 	struct kse *ke;
78448bfcdddSJulian Elischer 	struct thread *td2 = NULL;
785e602ba25SJulian Elischer 	unsigned int prevpri;
78648bfcdddSJulian Elischer 	int	saw_lastassigned = 0;
78748bfcdddSJulian Elischer 	int unassigned = 0;
78848bfcdddSJulian Elischer 	int assigned = 0;
789e602ba25SJulian Elischer 
790e602ba25SJulian Elischer 	p = td->td_proc;
791e602ba25SJulian Elischer 	kg = td->td_ksegrp;
792e602ba25SJulian Elischer 	ke = td->td_kse;
793e602ba25SJulian Elischer 
794e602ba25SJulian Elischer 
795e602ba25SJulian Elischer 	if (ke) {
7964f0db5e0SJulian Elischer 		if (p != ke->ke_proc) {
79748bfcdddSJulian Elischer 			panc(string, "wrong proc");
798e602ba25SJulian Elischer 		}
799e602ba25SJulian Elischer 		if (ke->ke_thread != td) {
80048bfcdddSJulian Elischer 			panc(string, "wrong thread");
801e602ba25SJulian Elischer 		}
802e602ba25SJulian Elischer 	}
803e602ba25SJulian Elischer 
8040e2a4d3aSDavid Xu 	if ((p->p_flag & P_SA) == 0) {
805e602ba25SJulian Elischer 		if (ke == NULL) {
80648bfcdddSJulian Elischer 			panc(string, "non KSE thread lost kse");
807e602ba25SJulian Elischer 		}
808e602ba25SJulian Elischer 	} else {
809e602ba25SJulian Elischer 		prevpri = 0;
810e602ba25SJulian Elischer 		saw_lastassigned = 0;
811e602ba25SJulian Elischer 		unassigned = 0;
812e602ba25SJulian Elischer 		assigned = 0;
813e602ba25SJulian Elischer 		TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
814e602ba25SJulian Elischer 			if (td2->td_priority < prevpri) {
81548bfcdddSJulian Elischer 				panc(string, "thread runqueue unosorted");
81648bfcdddSJulian Elischer 			}
81748bfcdddSJulian Elischer 			if ((td2->td_state == TDS_RUNQ) &&
81848bfcdddSJulian Elischer 			    td2->td_kse &&
81948bfcdddSJulian Elischer 			    (td2->td_kse->ke_state != KES_ONRUNQ)) {
82048bfcdddSJulian Elischer 				panc(string, "KSE wrong state");
821e602ba25SJulian Elischer 			}
822e602ba25SJulian Elischer 			prevpri = td2->td_priority;
823e602ba25SJulian Elischer 			if (td2->td_kse) {
824e602ba25SJulian Elischer 				assigned++;
825e602ba25SJulian Elischer 				if (unassigned) {
82648bfcdddSJulian Elischer 					panc(string, "unassigned before assigned");
827e602ba25SJulian Elischer 				}
828e602ba25SJulian Elischer  				if  (kg->kg_last_assigned == NULL) {
82948bfcdddSJulian Elischer 					panc(string, "lastassigned corrupt");
830e602ba25SJulian Elischer 				}
831e602ba25SJulian Elischer 				if (saw_lastassigned) {
83248bfcdddSJulian Elischer 					panc(string, "last assigned not last");
833e602ba25SJulian Elischer 				}
834e602ba25SJulian Elischer 				if (td2->td_kse->ke_thread != td2) {
83548bfcdddSJulian Elischer 					panc(string, "mismatched kse/thread");
836e602ba25SJulian Elischer 				}
837e602ba25SJulian Elischer 			} else {
838e602ba25SJulian Elischer 				unassigned++;
839e602ba25SJulian Elischer 			}
840e602ba25SJulian Elischer 			if (td2 == kg->kg_last_assigned) {
841e602ba25SJulian Elischer 				saw_lastassigned = 1;
842e602ba25SJulian Elischer 				if (td2->td_kse == NULL) {
84348bfcdddSJulian Elischer 					panc(string, "last assigned not assigned");
844e602ba25SJulian Elischer 				}
845e602ba25SJulian Elischer 			}
846e602ba25SJulian Elischer 		}
847e602ba25SJulian Elischer 		if (kg->kg_last_assigned && (saw_lastassigned == 0)) {
84848bfcdddSJulian Elischer 			panc(string, "where on earth does lastassigned point?");
849e602ba25SJulian Elischer 		}
8505215b187SJeff Roberson #if 0
851e602ba25SJulian Elischer 		FOREACH_THREAD_IN_GROUP(kg, td2) {
852e602ba25SJulian Elischer 			if (((td2->td_flags & TDF_UNBOUND) == 0) &&
85371fad9fdSJulian Elischer 			    (TD_ON_RUNQ(td2))) {
854e602ba25SJulian Elischer 				assigned++;
855e602ba25SJulian Elischer 				if (td2->td_kse == NULL) {
85648bfcdddSJulian Elischer 					panc(string, "BOUND thread with no KSE");
857e602ba25SJulian Elischer 				}
858e602ba25SJulian Elischer 			}
859e602ba25SJulian Elischer 		}
8605215b187SJeff Roberson #endif
861e602ba25SJulian Elischer #if 0
862e602ba25SJulian Elischer 		if ((unassigned + assigned) != kg->kg_runnable) {
86348bfcdddSJulian Elischer 			panc(string, "wrong number in runnable");
864e602ba25SJulian Elischer 		}
865e602ba25SJulian Elischer #endif
866e602ba25SJulian Elischer 	}
86748bfcdddSJulian Elischer 	if (assigned == 12345) {
86848bfcdddSJulian Elischer 		printf("%p %p %p %p %p %d, %d",
86948bfcdddSJulian Elischer 		    td, td2, ke, kg, p, assigned, saw_lastassigned);
87048bfcdddSJulian Elischer 	}
871e602ba25SJulian Elischer }
8725e3da64eSJulian Elischer #endif
873e602ba25SJulian Elischer 
874